2026 files changed, 536043 insertions, 0 deletions
diff --git a/arch/powerpc/Kbuild b/arch/powerpc/Kbuild
new file mode 100644
index 0000000000..22cd0d55a8
--- /dev/null
+++ b/arch/powerpc/Kbuild
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
+obj-y += kernel/
+obj-y += mm/
+obj-y += lib/
+obj-y += sysdev/
+obj-y += platforms/
+obj-y += math-emu/
+obj-y += crypto/
+obj-y += net/
+
+obj-$(CONFIG_XMON) += xmon/
+obj-$(CONFIG_KVM)  += kvm/
+
+obj-$(CONFIG_PERF_EVENTS) += perf/
+obj-$(CONFIG_KEXEC_CORE)  += kexec/
+obj-$(CONFIG_KEXEC_FILE)  += purgatory/
+
+# for cleaning
+subdir- += boot
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
new file mode 100644
index 0000000000..2fe51e0ad6
--- /dev/null
+++ b/arch/powerpc/Kconfig
@@ -0,0 +1,1311 @@
+# SPDX-License-Identifier: GPL-2.0
+source "arch/powerpc/platforms/Kconfig.cputype"
+
+config CC_HAS_ELFV2
+	def_bool PPC64 && $(cc-option, -mabi=elfv2)
+
+config CC_HAS_PREFIXED
+	def_bool PPC64 && $(cc-option, -mcpu=power10 -mprefixed)
+
+config CC_HAS_PCREL
+	# Clang has a bug (https://github.com/llvm/llvm-project/issues/62372)
+	# where pcrel code is not generated if -msoft-float, -mno-altivec, or
+	# -mno-vsx options are also given. Without these options, fp/vec
+	# instructions are generated from regular kernel code. So Clang can't
+	# do pcrel yet.
+	def_bool PPC64 && CC_IS_GCC && $(cc-option, -mcpu=power10 -mpcrel)
+
+config 32BIT
+	bool
+	default y if PPC32
+
+config 64BIT
+	bool
+	default y if PPC64
+
+config LIVEPATCH_64
+	def_bool PPC64
+	depends on LIVEPATCH
+
+config MMU
+	bool
+	default y
+
+config ARCH_MMAP_RND_BITS_MAX
+	# On Book3S 64, the default virtual address space for 64-bit processes
+	# is 2^47 (128TB). As a maximum, allow randomisation to consume up to
+	# 32T of address space (2^45), which should ensure a reasonable gap
+	# between bottom-up and top-down allocations for applications that
+	# consume "normal" amounts of address space. Book3S 64 only supports 64K
+	# and 4K page sizes.
+	default 29 if PPC_BOOK3S_64 && PPC_64K_PAGES # 29 = 45 (32T) - 16 (64K)
+	default 33 if PPC_BOOK3S_64		     # 33 = 45 (32T) - 12 (4K)
+	#
+	# On all other 64-bit platforms (currently only Book3E), the virtual
+	# address space is 2^46 (64TB). Allow randomisation to consume up to 16T
+	# of address space (2^44). Only 4K page sizes are supported.
+	default 32 if 64BIT	# 32 = 44 (16T) - 12 (4K)
+	#
+	# For 32-bit, use the compat values, as they're the same.
+	default ARCH_MMAP_RND_COMPAT_BITS_MAX
+
+config ARCH_MMAP_RND_BITS_MIN
+	# Allow randomisation to consume up to 1GB of address space (2^30).
+	default 14 if 64BIT && PPC_64K_PAGES	# 14 = 30 (1GB) - 16 (64K)
+	default 18 if 64BIT			# 18 = 30 (1GB) - 12 (4K)
+	#
+	# For 32-bit, use the compat values, as they're the same.
+	default ARCH_MMAP_RND_COMPAT_BITS_MIN
+
+config ARCH_MMAP_RND_COMPAT_BITS_MAX
+	# Total virtual address space for 32-bit processes is 2^31 (2GB).
+	# Allow randomisation to consume up to 512MB of address space (2^29).
+	default 11 if PPC_256K_PAGES	# 11 = 29 (512MB) - 18 (256K)
+	default 13 if PPC_64K_PAGES	# 13 = 29 (512MB) - 16 (64K)
+	default 15 if PPC_16K_PAGES	# 15 = 29 (512MB) - 14 (16K)
+	default 17			# 17 = 29 (512MB) - 12 (4K)
+
+config ARCH_MMAP_RND_COMPAT_BITS_MIN
+	# Total virtual address space for 32-bit processes is 2^31 (2GB).
+	# Allow randomisation to consume up to 8MB of address space (2^23).
+	default 5 if PPC_256K_PAGES	#  5 = 23 (8MB) - 18 (256K)
+	default 7 if PPC_64K_PAGES	#  7 = 23 (8MB) - 16 (64K)
+	default 9 if PPC_16K_PAGES	#  9 = 23 (8MB) - 14 (16K)
+	default 11			# 11 = 23 (8MB) - 12 (4K)
+
+config NR_IRQS
+	int "Number of virtual interrupt numbers"
+	range 32 1048576
+	default "512"
+	help
+	  This defines the number of virtual interrupt numbers the kernel
+	  can manage. Virtual interrupt numbers are what you see in
+	  /proc/interrupts. If you configure your system to have too few,
+	  drivers will fail to load or worse - handle with care.
+
+config NMI_IPI
+	bool
+	depends on SMP && (DEBUGGER || KEXEC_CORE || HARDLOCKUP_DETECTOR)
+	default y
+
+config PPC_WATCHDOG
+	bool
+	depends on HARDLOCKUP_DETECTOR_ARCH
+	default y
+	help
+	  This is a placeholder when the powerpc hardlockup detector
+	  watchdog is selected (arch/powerpc/kernel/watchdog.c). It is
+	  selected via the generic lockup detector menu which is why we
+	  have no standalone config option for it here.
+
+config STACKTRACE_SUPPORT
+	bool
+	default y
+
+config LOCKDEP_SUPPORT
+	bool
+	default y
+
+config GENERIC_LOCKBREAK
+	bool
+	default y
+	depends on SMP && PREEMPTION && !PPC_QUEUED_SPINLOCKS
+
+config GENERIC_HWEIGHT
+	bool
+	default y
+
+config PPC
+	bool
+	default y
+	#
+	# Please keep this list sorted alphabetically.
+	#
+	select ARCH_32BIT_OFF_T if PPC32
+	select ARCH_DISABLE_KASAN_INLINE	if PPC_RADIX_MMU
+	select ARCH_DMA_DEFAULT_COHERENT	if !NOT_COHERENT_CACHE
+	select ARCH_ENABLE_MEMORY_HOTPLUG
+	select ARCH_ENABLE_MEMORY_HOTREMOVE
+	select ARCH_HAS_COPY_MC			if PPC64
+	select ARCH_HAS_CURRENT_STACK_POINTER
+	select ARCH_HAS_DEBUG_VIRTUAL
+	select ARCH_HAS_DEBUG_VM_PGTABLE
+	select ARCH_HAS_DEBUG_WX		if STRICT_KERNEL_RWX
+	select ARCH_HAS_DEVMEM_IS_ALLOWED
+	select ARCH_HAS_DMA_MAP_DIRECT 		if PPC_PSERIES
+	select ARCH_HAS_FORTIFY_SOURCE
+	select ARCH_HAS_GCOV_PROFILE_ALL
+	select ARCH_HAS_HUGEPD			if HUGETLB_PAGE
+	select ARCH_HAS_KCOV
+	select ARCH_HAS_MEMBARRIER_CALLBACKS
+	select ARCH_HAS_MEMBARRIER_SYNC_CORE
+	select ARCH_HAS_MEMREMAP_COMPAT_ALIGN	if PPC_64S_HASH_MMU
+	select ARCH_HAS_MMIOWB			if PPC64
+	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+	select ARCH_HAS_PHYS_TO_DMA
+	select ARCH_HAS_PMEM_API
+	select ARCH_HAS_PTE_DEVMAP		if PPC_BOOK3S_64
+	select ARCH_HAS_PTE_SPECIAL
+	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
+	select ARCH_HAS_SET_MEMORY
+	select ARCH_HAS_STRICT_KERNEL_RWX	if (PPC_BOOK3S || PPC_8xx || 40x) && !HIBERNATION
+	select ARCH_HAS_STRICT_KERNEL_RWX	if PPC_85xx && !HIBERNATION && !RANDOMIZE_BASE
+	select ARCH_HAS_STRICT_MODULE_RWX	if ARCH_HAS_STRICT_KERNEL_RWX
+	select ARCH_HAS_SYSCALL_WRAPPER		if !SPU_BASE && !COMPAT
+	select ARCH_HAS_TICK_BROADCAST		if GENERIC_CLOCKEVENTS_BROADCAST
+	select ARCH_HAS_UACCESS_FLUSHCACHE
+	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select ARCH_KEEP_MEMBLOCK
+	select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE	if PPC_RADIX_MMU
+	select ARCH_MIGHT_HAVE_PC_PARPORT
+	select ARCH_MIGHT_HAVE_PC_SERIO
+	select ARCH_OPTIONAL_KERNEL_RWX		if ARCH_HAS_STRICT_KERNEL_RWX
+	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
+	select ARCH_SPLIT_ARG64			if PPC32
+	select ARCH_STACKWALK
+	select ARCH_SUPPORTS_ATOMIC_RMW
+	select ARCH_SUPPORTS_DEBUG_PAGEALLOC	if PPC_BOOK3S || PPC_8xx || 40x
+	select ARCH_USE_BUILTIN_BSWAP
+	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64
+	select ARCH_USE_MEMTEST
+	select ARCH_USE_QUEUED_RWLOCKS		if PPC_QUEUED_SPINLOCKS
+	select ARCH_WANT_DEFAULT_BPF_JIT
+	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+	select ARCH_WANT_IPC_PARSE_VERSION
+	select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
+	select ARCH_WANT_LD_ORPHAN_WARN
+	select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP	if PPC_RADIX_MMU
+	select ARCH_WANTS_MODULES_DATA_IN_VMALLOC	if PPC_BOOK3S_32 || PPC_8xx
+	select ARCH_WEAK_RELEASE_ACQUIRE
+	select BINFMT_ELF
+	select BUILDTIME_TABLE_SORT
+	select CLONE_BACKWARDS
+	select CPUMASK_OFFSTACK			if NR_CPUS >= 8192
+	select DCACHE_WORD_ACCESS		if PPC64 && CPU_LITTLE_ENDIAN
+	select DMA_OPS_BYPASS			if PPC64
+	select DMA_OPS				if PPC64
+	select DYNAMIC_FTRACE			if FUNCTION_TRACER
+	select EDAC_ATOMIC_SCRUB
+	select EDAC_SUPPORT
+	select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+	select GENERIC_ATOMIC64			if PPC32
+	select GENERIC_CLOCKEVENTS_BROADCAST	if SMP
+	select GENERIC_CMOS_UPDATE
+	select GENERIC_CPU_AUTOPROBE
+	select GENERIC_CPU_VULNERABILITIES	if PPC_BARRIER_NOSPEC
+	select GENERIC_EARLY_IOREMAP
+	select GENERIC_GETTIMEOFDAY
+	select GENERIC_IDLE_POLL_SETUP
+	select GENERIC_IOREMAP
+	select GENERIC_IRQ_SHOW
+	select GENERIC_IRQ_SHOW_LEVEL
+	select GENERIC_PCI_IOMAP		if PCI
+	select GENERIC_PTDUMP
+	select GENERIC_SMP_IDLE_THREAD
+	select GENERIC_TIME_VSYSCALL
+	select GENERIC_VDSO_TIME_NS
+	select HAS_IOPORT			if PCI
+	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_HUGE_VMALLOC		if HAVE_ARCH_HUGE_VMAP
+	select HAVE_ARCH_HUGE_VMAP		if PPC_RADIX_MMU || PPC_8xx
+	select HAVE_ARCH_JUMP_LABEL
+	select HAVE_ARCH_JUMP_LABEL_RELATIVE
+	select HAVE_ARCH_KASAN			if PPC32 && PPC_PAGE_SHIFT <= 14
+	select HAVE_ARCH_KASAN			if PPC_RADIX_MMU
+	select HAVE_ARCH_KASAN			if PPC_BOOK3E_64
+	select HAVE_ARCH_KASAN_VMALLOC		if HAVE_ARCH_KASAN
+	select HAVE_ARCH_KCSAN
+	select HAVE_ARCH_KFENCE			if ARCH_SUPPORTS_DEBUG_PAGEALLOC
+	select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
+	select HAVE_ARCH_WITHIN_STACK_FRAMES
+	select HAVE_ARCH_KGDB
+	select HAVE_ARCH_MMAP_RND_BITS
+	select HAVE_ARCH_MMAP_RND_COMPAT_BITS	if COMPAT
+	select HAVE_ARCH_NVRAM_OPS
+	select HAVE_ARCH_SECCOMP_FILTER
+	select HAVE_ARCH_TRACEHOOK
+	select HAVE_ASM_MODVERSIONS
+	select HAVE_CONTEXT_TRACKING_USER
+	select HAVE_C_RECORDMCOUNT
+	select HAVE_DEBUG_KMEMLEAK
+	select HAVE_DEBUG_STACKOVERFLOW
+	select HAVE_DYNAMIC_FTRACE
+	select HAVE_DYNAMIC_FTRACE_WITH_ARGS	if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS	if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
+	select HAVE_EBPF_JIT
+	select HAVE_EFFICIENT_UNALIGNED_ACCESS
+	select HAVE_FAST_GUP
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_FUNCTION_DESCRIPTORS	if PPC64_ELF_ABI_V1
+	select HAVE_FUNCTION_ERROR_INJECTION
+	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_FUNCTION_TRACER		if PPC64 || (PPC32 && CC_IS_GCC)
+	select HAVE_GCC_PLUGINS			if GCC_VERSION >= 50200   # plugin support on gcc <= 5.1 is buggy on PPC
+	select HAVE_GENERIC_VDSO
+	select HAVE_HARDLOCKUP_DETECTOR_ARCH	if PPC_BOOK3S_64 && SMP
+	select HAVE_HARDLOCKUP_DETECTOR_PERF	if PERF_EVENTS && HAVE_PERF_EVENTS_NMI
+	select HAVE_HW_BREAKPOINT		if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
+	select HAVE_IOREMAP_PROT
+	select HAVE_IRQ_TIME_ACCOUNTING
+	select HAVE_KERNEL_GZIP
+	select HAVE_KERNEL_LZMA			if DEFAULT_UIMAGE
+	select HAVE_KERNEL_LZO			if DEFAULT_UIMAGE
+	select HAVE_KERNEL_XZ			if PPC_BOOK3S || 44x
+	select HAVE_KPROBES
+	select HAVE_KPROBES_ON_FTRACE
+	select HAVE_KRETPROBES
+	select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if HAVE_OBJTOOL_MCOUNT && (!ARCH_USING_PATCHABLE_FUNCTION_ENTRY || (!CC_IS_GCC || GCC_VERSION >= 110100))
+	select HAVE_LIVEPATCH			if HAVE_DYNAMIC_FTRACE_WITH_REGS
+	select HAVE_MOD_ARCH_SPECIFIC
+	select HAVE_NMI				if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
+	select HAVE_OPTPROBES
+	select HAVE_OBJTOOL			if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
+	select HAVE_OBJTOOL_MCOUNT		if HAVE_OBJTOOL
+	select HAVE_PERF_EVENTS
+	select HAVE_PERF_EVENTS_NMI		if PPC64
+	select HAVE_PERF_REGS
+	select HAVE_PERF_USER_STACK_DUMP
+	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_RELIABLE_STACKTRACE
+	select HAVE_RSEQ
+	select HAVE_SETUP_PER_CPU_AREA		if PPC64
+	select HAVE_SOFTIRQ_ON_OWN_STACK
+	select HAVE_STACKPROTECTOR		if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
+	select HAVE_STACKPROTECTOR		if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
+	select HAVE_STATIC_CALL			if PPC32
+	select HAVE_SYSCALL_TRACEPOINTS
+	select HAVE_VIRT_CPU_ACCOUNTING
+	select HAVE_VIRT_CPU_ACCOUNTING_GEN
+	select HOTPLUG_SMT			if HOTPLUG_CPU
+	select SMT_NUM_THREADS_DYNAMIC
+	select HUGETLB_PAGE_SIZE_VARIABLE	if PPC_BOOK3S_64 && HUGETLB_PAGE
+	select IOMMU_HELPER			if PPC64
+	select IRQ_DOMAIN
+	select IRQ_FORCED_THREADING
+	select KASAN_VMALLOC			if KASAN && MODULES
+	select LOCK_MM_AND_FIND_VMA
+	select MMU_GATHER_PAGE_SIZE
+	select MMU_GATHER_RCU_TABLE_FREE
+	select MMU_GATHER_MERGE_VMAS
+	select MMU_LAZY_TLB_SHOOTDOWN		if PPC_BOOK3S_64
+	select MODULES_USE_ELF_RELA
+	select NEED_DMA_MAP_STATE		if PPC64 || NOT_COHERENT_CACHE
+	select NEED_PER_CPU_EMBED_FIRST_CHUNK	if PPC64
+	select NEED_PER_CPU_PAGE_FIRST_CHUNK	if PPC64
+	select NEED_SG_DMA_LENGTH
+	select OF
+	select OF_EARLY_FLATTREE
+	select OLD_SIGACTION			if PPC32
+	select OLD_SIGSUSPEND
+	select PCI_DOMAINS			if PCI
+	select PCI_MSI_ARCH_FALLBACKS		if PCI_MSI
+	select PCI_SYSCALL			if PCI
+	select PPC_DAWR				if PPC64
+	select RTC_LIB
+	select SPARSE_IRQ
+	select STRICT_KERNEL_RWX if STRICT_MODULE_RWX
+	select SYSCTL_EXCEPTION_TRACE
+	select THREAD_INFO_IN_TASK
+	select TRACE_IRQFLAGS_SUPPORT
+	#
+	# Please keep this list sorted alphabetically.
+	#
+
+config PPC_BARRIER_NOSPEC
+	bool
+	default y
+	depends on PPC_BOOK3S_64 || PPC_E500
+
+config PPC_HAS_LBARX_LHARX
+	bool
+
+config EARLY_PRINTK
+	bool
+	default y
+
+config PANIC_TIMEOUT
+	int
+	default 180
+
+config COMPAT
+	bool "Enable support for 32bit binaries"
+	depends on PPC64
+	depends on !CC_IS_CLANG || CLANG_VERSION >= 120000
+	default y if !CPU_LITTLE_ENDIAN
+	select ARCH_WANT_OLD_COMPAT_IPC
+	select COMPAT_OLD_SIGACTION
+
+config SCHED_OMIT_FRAME_POINTER
+	bool
+	default y
+
+config ARCH_MAY_HAVE_PC_FDC
+	bool
+	default PCI
+
+config PPC_UDBG_16550
+	bool
+
+config GENERIC_TBSYNC
+	bool
+	default y if PPC32 && SMP
+
+config AUDIT_ARCH
+	bool
+	default y
+
+config GENERIC_BUG
+	bool
+	default y
+	depends on BUG
+
+config GENERIC_BUG_RELATIVE_POINTERS
+	def_bool y
+	depends on GENERIC_BUG
+
+config SYS_SUPPORTS_APM_EMULATION
+	default y if PMAC_APM_EMU
+	bool
+
+config EPAPR_BOOT
+	bool
+	help
+	  Used to allow a board to specify it wants an ePAPR compliant wrapper.
+
+config DEFAULT_UIMAGE
+	bool
+	help
+	  Used to allow a board to specify it wants a uImage built by default
+
+config ARCH_HIBERNATION_POSSIBLE
+	bool
+	default y
+
+config ARCH_SUSPEND_POSSIBLE
+	def_bool y
+	depends on ADB_PMU || PPC_EFIKA || PPC_LITE5200 || PPC_83xx || \
+		   (PPC_85xx && !PPC_E500MC) || PPC_86xx || PPC_PSERIES \
+		   || 44x || 40x
+
+config ARCH_SUSPEND_NONZERO_CPU
+	def_bool y
+	depends on PPC_POWERNV || PPC_PSERIES
+
+config ARCH_HAS_ADD_PAGES
+	def_bool y
+	depends on ARCH_ENABLE_MEMORY_HOTPLUG
+
+config PPC_DCR_NATIVE
+	bool
+
+config PPC_DCR_MMIO
+	bool
+
+config PPC_DCR
+	bool
+	depends on PPC_DCR_NATIVE || PPC_DCR_MMIO
+	default y
+
+config PPC_PCI_OF_BUS_MAP
+	bool "Use pci_to_OF_bus_map (deprecated)"
+	depends on PPC32
+	depends on PPC_PMAC || PPC_CHRP
+	help
+	  This option uses pci_to_OF_bus_map to map OF nodes to PCI devices, which
+	  restricts the system to only having 256 PCI buses. On CHRP it also causes
+	  the "pci-OF-bus-map" property to be created in the device tree.
+
+	  If unsure, say "N".
+
+config PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT
+	depends on PPC32
+	depends on !PPC_PCI_OF_BUS_MAP
+	bool "Assign PCI bus numbers from zero individually for each PCI domain"
+	default y
+	help
+	  By default on PPC32 were PCI bus numbers unique across all PCI domains.
+	  So system could have only 256 PCI buses independently of available
+	  PCI domains. When this option is enabled then PCI bus numbers are
+	  PCI domain dependent and each PCI controller on own domain can have
+	  256 PCI buses, like it is on other Linux architectures.
+
+config PPC_OF_PLATFORM_PCI
+	bool
+	depends on PCI
+	depends on PPC64 # not supported on 32 bits yet
+
+config ARCH_SUPPORTS_UPROBES
+	def_bool y
+
+config PPC_ADV_DEBUG_REGS
+	bool
+	depends on 40x || BOOKE
+	default y
+
+config PPC_ADV_DEBUG_IACS
+	int
+	depends on PPC_ADV_DEBUG_REGS
+	default 4 if 44x
+	default 2
+
+config PPC_ADV_DEBUG_DACS
+	int
+	depends on PPC_ADV_DEBUG_REGS
+	default 2
+
+config PPC_ADV_DEBUG_DVCS
+	int
+	depends on PPC_ADV_DEBUG_REGS
+	default 2 if 44x
+	default 0
+
+config PPC_ADV_DEBUG_DAC_RANGE
+	bool
+	depends on PPC_ADV_DEBUG_REGS && 44x
+	default y
+
+config PPC_DAWR
+	bool
+
+config PGTABLE_LEVELS
+	int
+	default 2 if !PPC64
+	default 4
+
+source "arch/powerpc/sysdev/Kconfig"
+source "arch/powerpc/platforms/Kconfig"
+
+menu "Kernel options"
+
+config HIGHMEM
+	bool "High memory support"
+	depends on PPC32
+	select KMAP_LOCAL
+
+source "kernel/Kconfig.hz"
+
+config MATH_EMULATION
+	bool "Math emulation"
+	depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE || PPC_MICROWATT
+	select PPC_FPU_REGS
+	help
+	  Some PowerPC chips designed for embedded applications do not have
+	  a floating-point unit and therefore do not implement the
+	  floating-point instructions in the PowerPC instruction set.  If you
+	  say Y here, the kernel will include code to emulate a floating-point
+	  unit, which will allow programs that use floating-point
+	  instructions to run.
+
+	  This is also useful to emulate missing (optional) instructions
+	  such as fsqrt on cores that do have an FPU but do not implement
+	  them (such as Freescale BookE).
+
+choice
+	prompt "Math emulation options"
+	default MATH_EMULATION_FULL
+	depends on MATH_EMULATION
+
+config MATH_EMULATION_FULL
+	bool "Emulate all the floating point instructions"
+	help
+	  Select this option will enable the kernel to support to emulate
+	  all the floating point instructions. If your SoC doesn't have
+	  a FPU, you should select this.
+
+config MATH_EMULATION_HW_UNIMPLEMENTED
+	bool "Just emulate the FPU unimplemented instructions"
+	help
+	  Select this if you know there does have a hardware FPU on your
+	  SoC, but some floating point instructions are not implemented by that.
+
+endchoice
+
+config PPC_TRANSACTIONAL_MEM
+	bool "Transactional Memory support for POWERPC"
+	depends on PPC_BOOK3S_64
+	depends on SMP
+	select ALTIVEC
+	select VSX
+	help
+	  Support user-mode Transactional Memory on POWERPC.
+
+config PPC_UV
+	bool "Ultravisor support"
+	depends on KVM_BOOK3S_HV_POSSIBLE
+	depends on DEVICE_PRIVATE
+	default n
+	help
+	  This option paravirtualizes the kernel to run in POWER platforms that
+	  supports the Protected Execution Facility (PEF). On such platforms,
+	  the ultravisor firmware runs at a privilege level above the
+	  hypervisor.
+
+	  If unsure, say "N".
+
+config LD_HEAD_STUB_CATCH
+	bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if EXPERT
+	depends on PPC64
+	help
+	  Very large kernels can cause linker branch stubs to be generated by
+	  code in head_64.S, which moves the head text sections out of their
+	  specified location. This option can work around the problem.
+
+	  If unsure, say "N".
+
+config MPROFILE_KERNEL
+	depends on PPC64_ELF_ABI_V2 && FUNCTION_TRACER
+	def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -mlittle-endian) if CPU_LITTLE_ENDIAN
+	def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -mbig-endian) if CPU_BIG_ENDIAN
+
+config ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+	depends on FUNCTION_TRACER && (PPC32 || PPC64_ELF_ABI_V2)
+	depends on $(cc-option,-fpatchable-function-entry=2)
+	def_bool y if PPC32
+	def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mlittle-endian) if PPC64 && CPU_LITTLE_ENDIAN
+	def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mbig-endian) if PPC64 && CPU_BIG_ENDIAN
+
+config HOTPLUG_CPU
+	bool "Support for enabling/disabling CPUs"
+	depends on SMP && (PPC_PSERIES || \
+		PPC_PMAC || PPC_POWERNV || FSL_SOC_BOOKE)
+	help
+	  Say Y here to be able to disable and re-enable individual
+	  CPUs at runtime on SMP machines.
+
+	  Say N if you are unsure.
+
+config INTERRUPT_SANITIZE_REGISTERS
+	bool "Clear gprs on interrupt arrival"
+	depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER
+	default PPC_BOOK3E_64 || PPC_PSERIES || PPC_POWERNV
+	help
+	  Reduce the influence of user register state on interrupt handlers and
+	  syscalls through clearing user state from registers before handling
+	  the exception.
+
+config PPC_QUEUED_SPINLOCKS
+	bool "Queued spinlocks" if EXPERT
+	depends on SMP
+	default PPC_BOOK3S_64
+	help
+	  Say Y here to use queued spinlocks which give better scalability and
+	  fairness on large SMP and NUMA systems without harming single threaded
+	  performance.
+
+config ARCH_CPU_PROBE_RELEASE
+	def_bool y
+	depends on HOTPLUG_CPU
+
+config PPC64_SUPPORTS_MEMORY_FAILURE
+	bool "Add support for memory hwpoison"
+	depends on PPC_BOOK3S_64
+	default "y" if PPC_POWERNV
+	select ARCH_SUPPORTS_MEMORY_FAILURE
+
+config ARCH_SUPPORTS_KEXEC
+	def_bool PPC_BOOK3S || PPC_E500 || (44x && !SMP)
+
+config ARCH_SUPPORTS_KEXEC_FILE
+	def_bool PPC64
+
+config ARCH_SUPPORTS_KEXEC_PURGATORY
+	def_bool y
+
+config ARCH_SELECTS_KEXEC_FILE
+	def_bool y
+	depends on KEXEC_FILE
+	select KEXEC_ELF
+	select HAVE_IMA_KEXEC if IMA
+
+config PPC64_BIG_ENDIAN_ELF_ABI_V2
+	# Option is available to BFD, but LLD does not support ELFv1 so this is
+	# always true there.
+	prompt "Build big-endian kernel using ELF ABI V2" if LD_IS_BFD && EXPERT
+	def_bool y
+	depends on PPC64 && CPU_BIG_ENDIAN
+	depends on CC_HAS_ELFV2
+	help
+	  This builds the kernel image using the "Power Architecture 64-Bit ELF
+	  V2 ABI Specification", which has a reduced stack overhead and faster
+	  function calls. This internal kernel ABI option does not affect
+          userspace compatibility.
+
+	  The V2 ABI is standard for 64-bit little-endian, but for big-endian
+	  it is less well tested by kernel and toolchain. However some distros
+	  build userspace this way, and it can produce a functioning kernel.
+
+config RELOCATABLE
+	bool "Build a relocatable kernel"
+	depends on PPC64 || (FLATMEM && (44x || PPC_85xx))
+	select NONSTATIC_KERNEL
+	help
+	  This builds a kernel image that is capable of running at the
+	  location the kernel is loaded at. For ppc32, there is no any
+	  alignment restrictions, and this feature is a superset of
+	  DYNAMIC_MEMSTART and hence overrides it. For ppc64, we should use
+	  16k-aligned base address. The kernel is linked as a
+	  position-independent executable (PIE) and contains dynamic relocations
+	  which are processed early in the bootup process.
+
+	  One use is for the kexec on panic case where the recovery kernel
+	  must live at a different physical address than the primary
+	  kernel.
+
+	  Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address
+	  it has been loaded at and the compile time physical addresses
+	  CONFIG_PHYSICAL_START is ignored.  However CONFIG_PHYSICAL_START
+	  setting can still be useful to bootwrappers that need to know the
+	  load address of the kernel (eg. u-boot/mkimage).
+
+config RANDOMIZE_BASE
+	bool "Randomize the address of the kernel image"
+	depends on PPC_85xx && FLATMEM
+	depends on RELOCATABLE
+	help
+	  Randomizes the virtual address at which the kernel image is
+	  loaded, as a security feature that deters exploit attempts
+	  relying on knowledge of the location of kernel internals.
+
+	  If unsure, say Y.
+
+config RELOCATABLE_TEST
+	bool "Test relocatable kernel"
+	depends on (PPC64 && RELOCATABLE)
+	help
+	  This runs the relocatable kernel at the address it was initially
+	  loaded at, which tends to be non-zero and therefore test the
+	  relocation code.
+
+config ARCH_SUPPORTS_CRASH_DUMP
+	def_bool PPC64 || PPC_BOOK3S_32 || PPC_85xx || (44x && !SMP)
+
+config ARCH_SELECTS_CRASH_DUMP
+	def_bool y
+	depends on CRASH_DUMP
+	select RELOCATABLE if PPC64 || 44x || PPC_85xx
+
+config FA_DUMP
+	bool "Firmware-assisted dump"
+	depends on PPC64 && (PPC_RTAS || PPC_POWERNV)
+	select CRASH_CORE
+	select CRASH_DUMP
+	help
+	  A robust mechanism to get reliable kernel crash dump with
+	  assistance from firmware. This approach does not use kexec,
+	  instead firmware assists in booting the capture kernel
+	  while preserving memory contents. Firmware-assisted dump
+	  is meant to be a kdump replacement offering robustness and
+	  speed not possible without system firmware assistance.
+
+	  If unsure, say "y". Only special kernels like petitboot may
+	  need to say "N" here.
+
+config PRESERVE_FA_DUMP
+	bool "Preserve Firmware-assisted dump"
+	depends on PPC64 && PPC_POWERNV && !FA_DUMP
+	help
+	  On a kernel with FA_DUMP disabled, this option helps to preserve
+	  crash data from a previously crash'ed kernel. Useful when the next
+	  memory preserving kernel boot would process this crash data.
+	  Petitboot kernel is the typical usecase for this option.
+
+config OPAL_CORE
+	bool "Export OPAL memory as /sys/firmware/opal/core"
+	depends on PPC64 && PPC_POWERNV
+	help
+	  This option uses the MPIPL support in firmware to provide an
+	  ELF core of OPAL memory after a crash. The ELF core is exported
+	  as /sys/firmware/opal/core file which is helpful in debugging
+	  OPAL crashes using GDB.
+
+config IRQ_ALL_CPUS
+	bool "Distribute interrupts on all CPUs by default"
+	depends on SMP
+	help
+	  This option gives the kernel permission to distribute IRQs across
+	  multiple CPUs.  Saying N here will route all IRQs to the first
+	  CPU.  Generally saying Y is safe, although some problems have been
+	  reported with SMP Power Macintoshes with this option enabled.
+
+config NUMA
+	bool "NUMA Memory Allocation and Scheduler Support"
+	depends on PPC64 && SMP
+	default y if PPC_PSERIES || PPC_POWERNV
+	select USE_PERCPU_NUMA_NODE_ID
+	help
+	  Enable NUMA (Non-Uniform Memory Access) support.
+
+	  The kernel will try to allocate memory used by a CPU on the
+	  local memory controller of the CPU and add some more
+	  NUMA awareness to the kernel.
+
+config NODES_SHIFT
+	int
+	default "8" if PPC64
+	default "4"
+	depends on NUMA
+
+config HAVE_MEMORYLESS_NODES
+	def_bool y
+	depends on NUMA
+
+config ARCH_SELECT_MEMORY_MODEL
+	def_bool y
+	depends on PPC64
+
+config ARCH_FLATMEM_ENABLE
+	def_bool y
+	depends on (PPC64 && !NUMA) || PPC32
+
+config ARCH_SPARSEMEM_ENABLE
+	def_bool y
+	depends on PPC64
+	select SPARSEMEM_VMEMMAP_ENABLE
+
+config ARCH_SPARSEMEM_DEFAULT
+	def_bool y
+	depends on PPC_BOOK3S_64
+
+config ILLEGAL_POINTER_VALUE
+	hex
+	# This is roughly half way between the top of user space and the bottom
+	# of kernel space, which seems about as good as we can get.
+	default 0x5deadbeef0000000 if PPC64
+	default 0
+
+config ARCH_MEMORY_PROBE
+	def_bool y
+	depends on MEMORY_HOTPLUG
+
+choice
+	prompt "Page size"
+	default PPC_64K_PAGES if PPC_BOOK3S_64
+	default PPC_4K_PAGES
+	help
+	  Select the kernel logical page size. Increasing the page size
+	  will reduce software overhead at each page boundary, allow
+	  hardware prefetch mechanisms to be more effective, and allow
+	  larger dma transfers increasing IO efficiency and reducing
+	  overhead. However the utilization of memory will increase.
+	  For example, each cached file will using a multiple of the
+	  page size to hold its contents and the difference between the
+	  end of file and the end of page is wasted.
+
+	  Some dedicated systems, such as software raid serving with
+	  accelerated calculations, have shown significant increases.
+
+	  If you configure a 64 bit kernel for 64k pages but the
+	  processor does not support them, then the kernel will simulate
+	  them with 4k pages, loading them on demand, but with the
+	  reduced software overhead and larger internal fragmentation.
+	  For the 32 bit kernel, a large page option will not be offered
+	  unless it is supported by the configured processor.
+
+	  If unsure, choose 4K_PAGES.
+
+config PPC_4K_PAGES
+	bool "4k page size"
+	select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
+
+config PPC_16K_PAGES
+	bool "16k page size"
+	depends on 44x || PPC_8xx
+
+config PPC_64K_PAGES
+	bool "64k page size"
+	depends on 44x || PPC_BOOK3S_64
+	select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
+
+config PPC_256K_PAGES
+	bool "256k page size (Requires non-standard binutils settings)"
+	depends on 44x && !PPC_47x
+	help
+	  Make the page size 256k.
+
+	  The kernel will only be able to run applications that have been
+	  compiled with '-zmax-page-size' set to 256K (the default is 64K) using
+	  binutils later than 2.17.50.0.3, or by patching the ELF_MAXPAGESIZE
+	  definition from 0x10000 to 0x40000 in older versions.
+
+endchoice
+
+config PAGE_SIZE_4KB
+	def_bool y
+	depends on PPC_4K_PAGES
+
+config PAGE_SIZE_16KB
+	def_bool y
+	depends on PPC_16K_PAGES
+
+config PAGE_SIZE_64KB
+	def_bool y
+	depends on PPC_64K_PAGES
+
+config PAGE_SIZE_256KB
+	def_bool y
+	depends on PPC_256K_PAGES
+
+config PPC_PAGE_SHIFT
+	int
+	default 18 if PPC_256K_PAGES
+	default 16 if PPC_64K_PAGES
+	default 14 if PPC_16K_PAGES
+	default 12
+
+config THREAD_SHIFT
+	int "Thread shift" if EXPERT
+	range 13 15
+	default "15" if PPC_256K_PAGES
+	default "15" if PPC_PSERIES || PPC_POWERNV
+	default "14" if PPC64
+	default "13"
+	help
+	  Used to define the stack size. The default is almost always what you
+	  want. Only change this if you know what you are doing.
+
+config DATA_SHIFT_BOOL
+	bool "Set custom data alignment"
+	depends on ADVANCED_OPTIONS
+	depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE
+	depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !STRICT_KERNEL_RWX) || \
+		   PPC_85xx
+	help
+	  This option allows you to set the kernel data alignment. When
+	  RAM is mapped by blocks, the alignment needs to fit the size and
+	  number of possible blocks. The default should be OK for most configs.
+
+	  Say N here unless you know what you are doing.
+
+config DATA_SHIFT
+	int "Data shift" if DATA_SHIFT_BOOL
+	default 24 if STRICT_KERNEL_RWX && PPC64
+	range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32
+	range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx
+	range 20 24 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_85xx
+	default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
+	default 18 if (DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32
+	default 23 if STRICT_KERNEL_RWX && PPC_8xx
+	default 23 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx && PIN_TLB_DATA
+	default 19 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx
+	default 24 if STRICT_KERNEL_RWX && PPC_85xx
+	default PPC_PAGE_SHIFT
+	help
+	  On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO.
+	  Smaller is the alignment, greater is the number of necessary DBATs.
+
+	  On 8xx, large pages (512kb or 8M) are used to map kernel linear
+	  memory. Aligning to 8M reduces TLB misses as only 8M pages are used
+	  in that case. If PIN_TLB is selected, it must be aligned to 8M as
+	  8M pages will be pinned.
+
+config ARCH_FORCE_MAX_ORDER
+	int "Order of maximal physically contiguous allocations"
+	range 7 8 if PPC64 && PPC_64K_PAGES
+	default "8" if PPC64 && PPC_64K_PAGES
+	range 12 12 if PPC64 && !PPC_64K_PAGES
+	default "12" if PPC64 && !PPC_64K_PAGES
+	range 8 10 if PPC32 && PPC_16K_PAGES
+	default "8" if PPC32 && PPC_16K_PAGES
+	range 6 10 if PPC32 && PPC_64K_PAGES
+	default "6" if PPC32 && PPC_64K_PAGES
+	range 4 10 if PPC32 && PPC_256K_PAGES
+	default "4" if PPC32 && PPC_256K_PAGES
+	range 10 12
+	default "10"
+	help
+	  The kernel page allocator limits the size of maximal physically
+	  contiguous allocations. The limit is called MAX_ORDER and it
+	  defines the maximal power of two of number of pages that can be
+	  allocated as a single contiguous block. This option allows
+	  overriding the default setting when ability to allocate very
+	  large blocks of physically contiguous memory is required.
+
+	  The page size is not necessarily 4KB.  For example, on 64-bit
+	  systems, 64KB pages can be enabled via CONFIG_PPC_64K_PAGES.  Keep
+	  this in mind when choosing a value for this option.
+
+	  Don't change if unsure.
+
+config PPC_SUBPAGE_PROT
+	bool "Support setting protections for 4k subpages (subpage_prot syscall)"
+	default n
+	depends on PPC_64S_HASH_MMU && PPC_64K_PAGES
+	help
+	  This option adds support for system call to allow user programs
+	  to set access permissions (read/write, readonly, or no access)
+	  on the 4k subpages of each 64k page.
+
+	  If unsure, say N here.
+
+config PPC_PROT_SAO_LPAR
+	bool "Support PROT_SAO mappings in LPARs"
+	depends on PPC_BOOK3S_64
+	help
+	  This option adds support for PROT_SAO mappings from userspace
+	  inside LPARs on supported CPUs.
+
+	  This may cause issues when performing guest migration from
+	  a CPU that supports SAO to one that does not.
+
+	  If unsure, say N here.
+
+config PPC_COPRO_BASE
+	bool
+
+config SCHED_SMT
+	bool "SMT (Hyperthreading) scheduler support"
+	depends on PPC64 && SMP
+	help
+	  SMT scheduler support improves the CPU scheduler's decision making
+	  when dealing with POWER5 cpus at a cost of slightly increased
+	  overhead in some places. If unsure say N here.
+
+config PPC_DENORMALISATION
+	bool "PowerPC denormalisation exception handling"
+	depends on PPC_BOOK3S_64
+	default "y" if PPC_POWERNV
+	help
+	  Add support for handling denormalisation of single precision
+	  values.  Useful for bare metal only.  If unsure say Y here.
+
+config CMDLINE
+	string "Initial kernel command string"
+	default ""
+	help
+	  On some platforms, there is currently no way for the boot loader to
+	  pass arguments to the kernel. For these platforms, you can supply
+	  some command-line options at build time by entering them here.  In
+	  most cases you will need to specify the root device here.
+
+choice
+	prompt "Kernel command line type" if CMDLINE != ""
+	default CMDLINE_FROM_BOOTLOADER
+
+config CMDLINE_FROM_BOOTLOADER
+	bool "Use bootloader kernel arguments if available"
+	help
+	  Uses the command-line options passed by the boot loader. If
+	  the boot loader doesn't provide any, the default kernel command
+	  string provided in CMDLINE will be used.
+
+config CMDLINE_EXTEND
+	bool "Extend bootloader kernel arguments"
+	help
+	  The command-line arguments provided by the boot loader will be
+	  appended to the default kernel command string.
+
+config CMDLINE_FORCE
+	bool "Always use the default kernel command string"
+	help
+	  Always use the default kernel command string, even if the boot
+	  loader passes other arguments to the kernel.
+	  This is useful if you cannot or don't want to change the
+	  command-line options your boot loader passes to the kernel.
+
+endchoice
+
+config EXTRA_TARGETS
+	string "Additional default image types"
+	help
+	  List additional targets to be built by the bootwrapper here (separated
+	  by spaces).  This is useful for targets that depend of device tree
+	  files in the .dts directory.
+
+	  Targets in this list will be build as part of the default build
+	  target, or when the user does a 'make zImage' or a
+	  'make zImage.initrd'.
+
+	  If unsure, leave blank
+
+config ARCH_WANTS_FREEZER_CONTROL
+	def_bool y
+	depends on ADB_PMU
+
+source "kernel/power/Kconfig"
+
+config PPC_MEM_KEYS
+	prompt "PowerPC Memory Protection Keys"
+	def_bool y
+	depends on PPC_BOOK3S_64
+	depends on PPC_64S_HASH_MMU
+	select ARCH_USES_HIGH_VMA_FLAGS
+	select ARCH_HAS_PKEYS
+	help
+	  Memory Protection Keys provides a mechanism for enforcing
+	  page-based protections, but without requiring modification of the
+	  page tables when an application changes protection domains.
+
+	  For details, see Documentation/core-api/protection-keys.rst
+
+	  If unsure, say y.
+
+config PPC_SECURE_BOOT
+	prompt "Enable secure boot support"
+	bool
+	depends on PPC_POWERNV || PPC_PSERIES
+	depends on IMA_ARCH_POLICY
+	imply IMA_SECURE_AND_OR_TRUSTED_BOOT
+	select PSERIES_PLPKS if PPC_PSERIES
+	help
+	  Systems with firmware secure boot enabled need to define security
+	  policies to extend secure boot to the OS. This config allows a user
+	  to enable OS secure boot on systems that have firmware support for
+	  it. If in doubt say N.
+
+config PPC_SECVAR_SYSFS
+	bool "Enable sysfs interface for POWER secure variables"
+	default y
+	depends on PPC_SECURE_BOOT
+	depends on SYSFS
+	help
+	  POWER secure variables are managed and controlled by firmware.
+	  These variables are exposed to userspace via sysfs to enable
+	  read/write operations on these variables. Say Y if you have
+	  secure boot enabled and want to expose variables to userspace.
+
+endmenu
+
+config ISA_DMA_API
+	bool
+	default PCI
+
+menu "Bus options"
+
+config ISA
+	bool "Support for ISA-bus hardware"
+	depends on PPC_CHRP
+	select PPC_I8259
+	help
+	  Find out whether you have ISA slots on your motherboard.  ISA is the
+	  name of a bus system, i.e. the way the CPU talks to the other stuff
+	  inside your box.  If you have an Apple machine, say N here; if you
+	  have an IBM RS/6000 or pSeries machine, say Y.  If you have an
+	  embedded board, consult your board documentation.
+
+config GENERIC_ISA_DMA
+	bool
+	depends on ISA_DMA_API
+	default y
+
+config PPC_INDIRECT_PCI
+	bool
+	depends on PCI
+	default y if 40x || 44x
+
+config SBUS
+	bool
+
+config FSL_SOC
+	bool
+
+config FSL_PCI
+	bool
+	select ARCH_HAS_DMA_SET_MASK
+	select PPC_INDIRECT_PCI
+	select PCI_QUIRKS
+
+config FSL_PMC
+	bool
+	default y
+	depends on SUSPEND && (PPC_85xx || PPC_86xx)
+	help
+	  Freescale MPC85xx/MPC86xx power management controller support
+	  (suspend/resume). For MPC83xx see platforms/83xx/suspend.c
+
+config PPC4xx_CPM
+	bool
+	default y
+	depends on SUSPEND && (44x || 40x)
+	help
+	  PPC4xx Clock Power Management (CPM) support (suspend/resume).
+	  It also enables support for two different idle states (idle-wait
+	  and idle-doze).
+
+config 4xx_SOC
+	bool
+
+config FSL_LBC
+	bool "Freescale Local Bus support"
+	help
+	  Enables reporting of errors from the Freescale local bus
+	  controller.  Also contains some common code used by
+	  drivers for specific local bus peripherals.
+
+config FSL_GTM
+	bool
+	depends on PPC_83xx || QUICC_ENGINE || CPM2
+	help
+	  Freescale General-purpose Timers support
+
+config FSL_RIO
+	bool "Freescale Embedded SRIO Controller support"
+	depends on RAPIDIO = y && HAVE_RAPIDIO
+	default "n"
+	help
+	  Include support for RapidIO controller on Freescale embedded
+	  processors (MPC8548, MPC8641, etc).
+
+endmenu
+
+config NONSTATIC_KERNEL
+	bool
+
+menu "Advanced setup"
+	depends on PPC32
+
+config ADVANCED_OPTIONS
+	bool "Prompt for advanced kernel configuration options"
+	help
+	  This option will enable prompting for a variety of advanced kernel
+	  configuration options.  These options can cause the kernel to not
+	  work if they are set incorrectly, but can be used to optimize certain
+	  aspects of kernel memory management.
+
+	  Unless you know what you are doing, say N here.
+
+comment "Default settings for advanced configuration options are used"
+	depends on !ADVANCED_OPTIONS
+
+config LOWMEM_SIZE_BOOL
+	bool "Set maximum low memory"
+	depends on ADVANCED_OPTIONS
+	help
+	  This option allows you to set the maximum amount of memory which
+	  will be used as "low memory", that is, memory which the kernel can
+	  access directly, without having to set up a kernel virtual mapping.
+	  This can be useful in optimizing the layout of kernel virtual
+	  memory.
+
+	  Say N here unless you know what you are doing.
+
+config LOWMEM_SIZE
+	hex "Maximum low memory size (in bytes)" if LOWMEM_SIZE_BOOL
+	default "0x30000000"
+
+config LOWMEM_CAM_NUM_BOOL
+	bool "Set number of CAMs to use to map low memory"
+	depends on ADVANCED_OPTIONS && PPC_85xx
+	help
+	  This option allows you to set the maximum number of CAM slots that
+	  will be used to map low memory.  There are a limited number of slots
+	  available and even more limited number that will fit in the L1 MMU.
+	  However, using more entries will allow mapping more low memory.  This
+	  can be useful in optimizing the layout of kernel virtual memory.
+
+	  Say N here unless you know what you are doing.
+
+config LOWMEM_CAM_NUM
+	depends on PPC_85xx
+	int "Number of CAMs to use to map low memory" if LOWMEM_CAM_NUM_BOOL
+	default 3 if !STRICT_KERNEL_RWX
+	default 9 if DATA_SHIFT >= 24
+	default 12 if DATA_SHIFT >= 22
+	default 15
+
+config DYNAMIC_MEMSTART
+	bool "Enable page aligned dynamic load address for kernel"
+	depends on ADVANCED_OPTIONS && FLATMEM && (PPC_85xx || 44x)
+	select NONSTATIC_KERNEL
+	help
+	  This option enables the kernel to be loaded at any page aligned
+	  physical address. The kernel creates a mapping from KERNELBASE to
+	  the address where the kernel is loaded. The page size here implies
+	  the TLB page size of the mapping for kernel on the particular platform.
+	  Please refer to the init code for finding the TLB page size.
+
+	  DYNAMIC_MEMSTART is an easy way of implementing pseudo-RELOCATABLE
+	  kernel image, where the only restriction is the page aligned kernel
+	  load address. When this option is enabled, the compile time physical
+	  address CONFIG_PHYSICAL_START is ignored.
+
+	  This option is overridden by CONFIG_RELOCATABLE
+
+config PAGE_OFFSET_BOOL
+	bool "Set custom page offset address"
+	depends on ADVANCED_OPTIONS
+	help
+	  This option allows you to set the kernel virtual address at which
+	  the kernel will map low memory.  This can be useful in optimizing
+	  the virtual memory layout of the system.
+
+	  Say N here unless you know what you are doing.
+
+config PAGE_OFFSET
+	hex "Virtual address of memory base" if PAGE_OFFSET_BOOL
+	default "0xc0000000"
+
+config KERNEL_START_BOOL
+	bool "Set custom kernel base address"
+	depends on ADVANCED_OPTIONS
+	help
+	  This option allows you to set the kernel virtual address at which
+	  the kernel will be loaded.  Normally this should match PAGE_OFFSET
+	  however there are times (like kdump) that one might not want them
+	  to be the same.
+
+	  Say N here unless you know what you are doing.
+
+config KERNEL_START
+	hex "Virtual address of kernel base" if KERNEL_START_BOOL
+	default PAGE_OFFSET if PAGE_OFFSET_BOOL
+	default "0xc2000000" if CRASH_DUMP && !NONSTATIC_KERNEL
+	default "0xc0000000"
+
+config PHYSICAL_START_BOOL
+	bool "Set physical address where the kernel is loaded"
+	depends on ADVANCED_OPTIONS && FLATMEM && PPC_85xx
+	help
+	  This gives the physical address where the kernel is loaded.
+
+	  Say N here unless you know what you are doing.
+
+config PHYSICAL_START
+	hex "Physical address where the kernel is loaded" if PHYSICAL_START_BOOL
+	default "0x02000000" if PPC_BOOK3S && CRASH_DUMP && !NONSTATIC_KERNEL
+	default "0x00000000"
+
+config PHYSICAL_ALIGN
+	hex
+	default "0x04000000" if PPC_85xx
+	help
+	  This value puts the alignment restrictions on physical address
+	  where kernel is loaded and run from. Kernel is compiled for an
+	  address which meets above alignment restriction.
+
+config TASK_SIZE_BOOL
+	bool "Set custom user task size"
+	depends on ADVANCED_OPTIONS
+	help
+	  This option allows you to set the amount of virtual address space
+	  allocated to user tasks.  This can be useful in optimizing the
+	  virtual memory layout of the system.
+
+	  Say N here unless you know what you are doing.
+
+config TASK_SIZE
+	hex "Size of user task space" if TASK_SIZE_BOOL
+	default "0x80000000" if PPC_8xx
+	default "0xb0000000" if PPC_BOOK3S_32
+	default "0xc0000000"
+endmenu
+
+if PPC64
+# This value must have zeroes in the bottom 60 bits otherwise lots will break
+config PAGE_OFFSET
+	hex
+	default "0xc000000000000000"
+config KERNEL_START
+	hex
+	default "0xc000000000000000"
+config PHYSICAL_START
+	hex
+	default "0x00000000"
+endif
+
+config PPC_LIB_RHEAP
+	bool
+
+source "arch/powerpc/kvm/Kconfig"
+
+source "kernel/livepatch/Kconfig"
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
new file mode 100644
index 0000000000..2a54fadbea
--- /dev/null
+++ b/arch/powerpc/Kconfig.debug
@@ -0,0 +1,393 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config PPC_DISABLE_WERROR
+	bool "Don't build arch/powerpc code with -Werror"
+	help
+	  This option tells the compiler NOT to build the code under
+	  arch/powerpc with the -Werror flag (which means warnings
+	  are treated as errors).
+
+	  Only enable this if you are hitting a build failure in the
+	  arch/powerpc code caused by a warning, and you don't feel
+	  inclined to fix it.
+
+config PPC_WERROR
+	bool
+	depends on !PPC_DISABLE_WERROR
+	default y
+
+config PRINT_STACK_DEPTH
+	int "Stack depth to print" if DEBUG_KERNEL
+	default 64
+	help
+	  This option allows you to set the stack depth that the kernel
+	  prints in stack traces. This can be useful if your display is
+	  too small and stack traces cause important information to
+	  scroll off the screen.
+
+config HCALL_STATS
+	bool "Hypervisor call instrumentation"
+	depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS
+	help
+	  Adds code to keep track of the number of hypervisor calls made and
+	  the amount of time spent in hypervisor calls.  Wall time spent in
+	  each call is always calculated, and if available CPU cycles spent
+	  are also calculated.  A directory named hcall_inst is added at the
+	  root of the debugfs filesystem.  Within the hcall_inst directory
+	  are files that contain CPU specific call statistics.
+
+	  This option will add a small amount of overhead to all hypervisor
+	  calls.
+
+config PPC_EMULATED_STATS
+	bool "Emulated instructions tracking"
+	depends on DEBUG_FS
+	help
+	  Adds code to keep track of the number of instructions that are
+	  emulated by the in-kernel emulator. Counters for the various classes
+	  of emulated instructions are available under
+	  powerpc/emulated_instructions/ in the root of the debugfs file
+	  system. Optionally (controlled by
+	  powerpc/emulated_instructions/do_warn in debugfs), rate-limited
+	  warnings can be printed to the console when instructions are
+	  emulated.
+
+config CODE_PATCHING_SELFTEST
+	bool "Run self-tests of the code-patching code"
+	depends on DEBUG_KERNEL
+
+config JUMP_LABEL_FEATURE_CHECKS
+	bool "Enable use of jump label for cpu/mmu_has_feature()"
+	depends on JUMP_LABEL
+	default y
+	help
+	  Selecting this options enables use of jump labels for some internal
+	  feature checks. This should generate more optimal code for those
+	  checks.
+
+config JUMP_LABEL_FEATURE_CHECK_DEBUG
+	bool "Do extra check on feature fixup calls"
+	depends on DEBUG_KERNEL && JUMP_LABEL_FEATURE_CHECKS
+	help
+	  This tries to catch incorrect usage of cpu_has_feature() and
+	  mmu_has_feature() in the code.
+
+	  If you don't know what this means, say N.
+
+config FTR_FIXUP_SELFTEST
+	bool "Run self-tests of the feature-fixup code"
+	depends on DEBUG_KERNEL
+
+config MSI_BITMAP_SELFTEST
+	bool "Run self-tests of the MSI bitmap code"
+	depends on DEBUG_KERNEL
+
+config PPC_IRQ_SOFT_MASK_DEBUG
+	bool "Include extra checks for powerpc irq soft masking"
+	depends on PPC64
+
+config PPC_RFI_SRR_DEBUG
+	bool "Include extra checks for RFI SRR register validity"
+	depends on PPC_BOOK3S_64
+
+config XMON
+	bool "Include xmon kernel debugger"
+	depends on DEBUG_KERNEL
+	select CONSOLE_POLL if SERIAL_CPM_CONSOLE
+	help
+	  Include in-kernel hooks for the xmon kernel monitor/debugger.
+	  Unless you are intending to debug the kernel, say N here.
+	  Make sure to enable also CONFIG_BOOTX_TEXT on Macs. Otherwise
+	  nothing will appear on the screen (xmon writes directly to the
+	  framebuffer memory).
+	  The cmdline option 'xmon' or 'xmon=early' will drop into xmon
+	  very early during boot. 'xmon=on' will just enable the xmon
+	  debugger hooks.  'xmon=off' will disable the debugger hooks
+	  if CONFIG_XMON_DEFAULT is set.
+	  xmon will print a backtrace on the very first invocation.
+	  'xmon=nobt' will disable this autobacktrace.
+
+config XMON_DEFAULT
+	bool "Enable xmon by default"
+	depends on XMON
+	help
+	  xmon is normally disabled unless booted with 'xmon=on'.
+	  Use 'xmon=off' to disable xmon init during runtime.
+
+config XMON_DISASSEMBLY
+	bool "Include disassembly support in xmon"
+	depends on XMON
+	default y
+	help
+	  Include support for disassembling in xmon. You probably want
+	  to say Y here, unless you're building for a memory-constrained
+	  system.
+
+config XMON_DEFAULT_RO_MODE
+	bool "Restrict xmon to read-only operations by default"
+	depends on XMON
+	default y
+	help
+	  Operate xmon in read-only mode. The cmdline options 'xmon=rw' and
+	  'xmon=ro' override this default.
+
+config DEBUGGER
+	bool
+	depends on KGDB || XMON
+	default y
+
+config BDI_SWITCH
+	bool "Include BDI-2000 user context switcher"
+	depends on DEBUG_KERNEL && PPC32
+	help
+	  Include in-kernel support for the Abatron BDI2000 debugger.
+	  Unless you are intending to debug the kernel with one of these
+	  machines, say N here.
+
+config BOOTX_TEXT
+	bool "Support for early boot text console (BootX or OpenFirmware only)"
+	depends on PPC_BOOK3S
+	help
+	  Say Y here to see progress messages from the boot firmware in text
+	  mode. Requires either BootX or Open Firmware.
+
+config PPC_EARLY_DEBUG
+	bool "Early debugging (dangerous)"
+	help
+	  Say Y to enable some early debugging facilities that may be available
+	  for your processor/board combination. Those facilities are hacks
+	  intended to debug problems early during boot, this should not be
+	  enabled in a production kernel.
+	  Note that enabling this will also cause the kernel default log level
+	  to be pushed to max automatically very early during boot
+
+choice
+	prompt "Early debugging console"
+	depends on PPC_EARLY_DEBUG
+	help
+	  Use the selected console for early debugging. Careful, if you
+	  enable debugging for the wrong type of machine your kernel
+	  _will not boot_.
+
+config PPC_EARLY_DEBUG_BOOTX
+	bool "BootX or OpenFirmware"
+	depends on BOOTX_TEXT
+	help
+	  Select this to enable early debugging for a machine using BootX
+	  or OpenFirmware.
+
+config PPC_EARLY_DEBUG_LPAR
+	bool "LPAR HV Console"
+	depends on PPC_PSERIES && HVC_CONSOLE
+	help
+	  Select this to enable early debugging for a machine with a HVC
+	  console on vterm 0.
+
+config PPC_EARLY_DEBUG_LPAR_HVSI
+	bool "LPAR HVSI Console"
+	depends on PPC_PSERIES && HVC_CONSOLE
+	help
+	  Select this to enable early debugging for a machine with a HVSI
+	  console on a specified vterm.
+
+config PPC_EARLY_DEBUG_G5
+	bool "Apple G5"
+	depends on PPC_PMAC64
+	help
+	  Select this to enable early debugging for Apple G5 machines.
+
+config PPC_EARLY_DEBUG_RTAS_PANEL
+	bool "RTAS Panel"
+	depends on PPC_RTAS
+	help
+	  Select this to enable early debugging via the RTAS panel.
+
+config PPC_EARLY_DEBUG_RTAS_CONSOLE
+	bool "RTAS Console"
+	depends on PPC_RTAS
+	select UDBG_RTAS_CONSOLE
+	help
+	  Select this to enable early debugging via the RTAS console.
+
+config PPC_EARLY_DEBUG_MAPLE
+	bool "Maple real mode"
+	depends on PPC_MAPLE
+	help
+	  Select this to enable early debugging for Maple.
+
+config PPC_EARLY_DEBUG_PAS_REALMODE
+	bool "PA Semi real mode"
+	depends on PPC_PASEMI
+	help
+	  Select this to enable early debugging for PA Semi.
+	  Output will be on UART0.
+
+config PPC_EARLY_DEBUG_44x
+	bool "Early serial debugging for IBM/AMCC 44x CPUs"
+	depends on 44x
+	help
+	  Select this to enable early debugging for IBM 44x chips via the
+	  inbuilt serial port.  If you enable this, ensure you set
+	  PPC_EARLY_DEBUG_44x_PHYSLOW below to suit your target board.
+
+config PPC_EARLY_DEBUG_40x
+	bool "Early serial debugging for IBM/AMCC 40x CPUs"
+	depends on 40x
+	help
+	  Select this to enable early debugging for IBM 40x chips via the
+	  inbuilt serial port. This works on chips with a 16550 compatible
+	  UART.
+
+config PPC_EARLY_DEBUG_CPM
+	bool "Early serial debugging for Freescale CPM-based serial ports"
+	depends on SERIAL_CPM=y
+	help
+	  Select this to enable early debugging for Freescale chips
+	  using a CPM-based serial port.  This assumes that the bootwrapper
+	  has run, and set up the CPM in a particular way.
+
+config PPC_EARLY_DEBUG_USBGECKO
+	bool "Early debugging through the USB Gecko adapter"
+	depends on GAMECUBE_COMMON
+	select USBGECKO_UDBG
+	help
+	  Select this to enable early debugging for Nintendo GameCube/Wii
+	  consoles via an external USB Gecko adapter.
+
+config PPC_EARLY_DEBUG_PS3GELIC
+	bool "Early debugging through the PS3 Ethernet port"
+	depends on PPC_PS3
+	select PS3GELIC_UDBG
+	help
+	  Select this to enable early debugging for the PlayStation3 via
+	  UDP broadcasts sent out through the Ethernet port.
+
+config PPC_EARLY_DEBUG_OPAL_RAW
+	bool "OPAL raw console"
+	depends on HVC_OPAL
+	help
+	  Select this to enable early debugging for the PowerNV platform
+	  using a "raw" console
+
+config PPC_EARLY_DEBUG_OPAL_HVSI
+	bool "OPAL hvsi console"
+	depends on HVC_OPAL
+	help
+	  Select this to enable early debugging for the PowerNV platform
+	  using an "hvsi" console
+
+config PPC_EARLY_DEBUG_MEMCONS
+	bool "In memory console"
+	help
+	  Select this to enable early debugging using an in memory console.
+	  This console provides input and output buffers stored within the
+	  kernel BSS and should be safe to select on any system. A debugger
+	  can then be used to read kernel output or send input to the console.
+
+config PPC_EARLY_DEBUG_16550
+	bool "Serial 16550"
+	depends on PPC_UDBG_16550
+	help
+	  Select this to enable early debugging via Serial 16550 console
+endchoice
+
+config PPC_MEMCONS_OUTPUT_SIZE
+	int "In memory console output buffer size"
+	depends on PPC_EARLY_DEBUG_MEMCONS
+	default 4096
+	help
+	  Selects the size of the output buffer (in bytes) of the in memory
+	  console.
+
+config PPC_MEMCONS_INPUT_SIZE
+	int "In memory console input buffer size"
+	depends on PPC_EARLY_DEBUG_MEMCONS
+	default 128
+	help
+	  Selects the size of the input buffer (in bytes) of the in memory
+	  console.
+
+config PPC_EARLY_DEBUG_OPAL
+	def_bool y
+	depends on PPC_EARLY_DEBUG_OPAL_RAW || PPC_EARLY_DEBUG_OPAL_HVSI
+
+config PPC_EARLY_DEBUG_HVSI_VTERMNO
+	hex "vterm number to use with early debug HVSI"
+	depends on PPC_EARLY_DEBUG_LPAR_HVSI
+	default "0x30000000"
+	help
+	  You probably want 0x30000000 for your first serial port and
+	  0x30000001 for your second one
+
+config PPC_EARLY_DEBUG_OPAL_VTERMNO
+	hex "vterm number to use with OPAL early debug"
+	depends on PPC_EARLY_DEBUG_OPAL
+	default "0"
+	help
+	  This correspond to which /dev/hvcN you want to use for early
+	  debug.
+
+	  On OPAL v2, this will be 0 for network console and 1 or 2 for
+	  the machine built-in serial ports.
+
+config PPC_EARLY_DEBUG_44x_PHYSLOW
+	hex "Low 32 bits of early debug UART physical address"
+	depends on PPC_EARLY_DEBUG_44x
+	default "0x40000200"
+	help
+	  You probably want 0x40000200 for ebony boards and
+	  0x40000300 for taishan
+
+config PPC_EARLY_DEBUG_44x_PHYSHIGH
+	hex "EPRN of early debug UART physical address"
+	depends on PPC_EARLY_DEBUG_44x
+	default "0x1"
+
+config PPC_EARLY_DEBUG_40x_PHYSADDR
+	hex "Early debug UART physical address"
+	depends on PPC_EARLY_DEBUG_40x
+	default "0xef600300"
+
+config PPC_EARLY_DEBUG_CPM_ADDR
+	hex "CPM UART early debug transmit descriptor address"
+	depends on PPC_EARLY_DEBUG_CPM
+	default "0xfa202008" if PPC_EP88XC
+	default "0xf0001ff8" if CPM2
+	default "0xff002008" if CPM1
+	help
+	  This specifies the address of the transmit descriptor
+	  used for early debug output.  Because it is needed before
+	  platform probing is done, all platforms selected must
+	  share the same address.
+
+config PPC_EARLY_DEBUG_16550_PHYSADDR
+	hex "Early debug Serial 16550 physical address"
+	depends on PPC_EARLY_DEBUG_16550
+
+config PPC_EARLY_DEBUG_16550_STRIDE
+	int "Early debug Serial 16550 stride"
+	depends on PPC_EARLY_DEBUG_16550
+	default 1
+
+config FAIL_IOMMU
+	bool "Fault-injection capability for IOMMU"
+	depends on FAULT_INJECTION
+	depends on PCI || IBMVIO
+	help
+	  Provide fault-injection capability for IOMMU. Each device can
+	  be selectively enabled via the fail_iommu property.
+
+	  If you are unsure, say N.
+
+config PPC_FAST_ENDIAN_SWITCH
+	bool "Deprecated fast endian-switch syscall"
+	depends on DEBUG_KERNEL && PPC_BOOK3S_64
+	help
+	  If you're unsure what this is, say N.
+
+config KASAN_SHADOW_OFFSET
+	hex
+	depends on KASAN
+	default 0xe0000000 if PPC32
+	default 0xa80e000000000000 if PPC_BOOK3S_64
+	default 0xa8001c0000000000 if PPC_BOOK3E_64
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
new file mode 100644
index 0000000000..f19dbaa1d5
--- /dev/null
+++ b/arch/powerpc/Makefile
@@ -0,0 +1,407 @@
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+# Changes for PPC by Gary Thomas
+# Rewritten by Cort Dougan and Paul Mackerras
+#
+
+HAS_BIARCH	:= $(call cc-option-yn, -m32)
+
+# Set default 32 bits cross compilers for vdso and boot wrapper
+CROSS32_COMPILE ?=
+
+# If we're on a ppc/ppc64/ppc64le machine use that defconfig, otherwise just use
+# ppc64_defconfig because we have nothing better to go on.
+uname := $(shell uname -m)
+KBUILD_DEFCONFIG := $(if $(filter ppc%,$(uname)),$(uname),ppc64)_defconfig
+
+new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi)
+
+ifeq ($(new_nm),y)
+NM		:= $(NM) --synthetic
+endif
+
+# BITS is used as extension for files which are available in a 32 bit
+# and a 64 bit version to simplify shared Makefiles.
+# e.g.: obj-y += foo_$(BITS).o
+export BITS
+
+ifdef CONFIG_PPC64
+        BITS := 64
+else
+        BITS := 32
+endif
+
+machine-y = ppc
+machine-$(CONFIG_PPC64) += 64
+machine-$(CONFIG_CPU_LITTLE_ENDIAN) += le
+UTS_MACHINE := $(subst $(space),,$(machine-y))
+
+ifeq ($(CONFIG_PPC64)$(CONFIG_LD_IS_BFD),yy)
+# Have the linker provide sfpr if possible.
+# There is a corresponding test in arch/powerpc/lib/Makefile
+KBUILD_LDFLAGS_MODULE += --save-restore-funcs
+else
+KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
+endif
+
+ifdef CONFIG_CPU_LITTLE_ENDIAN
+KBUILD_CFLAGS	+= -mlittle-endian
+KBUILD_LDFLAGS	+= -EL
+LDEMULATION	:= lppc
+GNUTARGET	:= powerpcle
+MULTIPLEWORD	:= -mno-multiple
+KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect)
+else
+KBUILD_CFLAGS += $(call cc-option,-mbig-endian)
+KBUILD_LDFLAGS	+= -EB
+LDEMULATION	:= ppc
+GNUTARGET	:= powerpc
+MULTIPLEWORD	:= -mmultiple
+endif
+
+ifdef CONFIG_PPC64
+ifndef CONFIG_CC_IS_CLANG
+cflags-$(CONFIG_PPC64_ELF_ABI_V1)	+= $(call cc-option,-mabi=elfv1)
+cflags-$(CONFIG_PPC64_ELF_ABI_V1)	+= $(call cc-option,-mcall-aixdesc)
+aflags-$(CONFIG_PPC64_ELF_ABI_V1)	+= $(call cc-option,-mabi=elfv1)
+aflags-$(CONFIG_PPC64_ELF_ABI_V2)	+= -mabi=elfv2
+endif
+endif
+
+ifndef CONFIG_CC_IS_CLANG
+  cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mno-strict-align
+endif
+
+cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
+cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mlittle-endian
+aflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
+aflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mlittle-endian
+
+ifeq ($(HAS_BIARCH),y)
+KBUILD_CFLAGS	+= -m$(BITS)
+KBUILD_AFLAGS	+= -m$(BITS)
+KBUILD_LDFLAGS	+= -m elf$(BITS)$(LDEMULATION)
+endif
+
+cflags-$(CONFIG_STACKPROTECTOR)	+= -mstack-protector-guard=tls
+ifdef CONFIG_PPC64
+cflags-$(CONFIG_STACKPROTECTOR)	+= -mstack-protector-guard-reg=r13
+else
+cflags-$(CONFIG_STACKPROTECTOR)	+= -mstack-protector-guard-reg=r2
+endif
+
+LDFLAGS_vmlinux-y := -Bstatic
+LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie
+LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) += -z notext
+LDFLAGS_vmlinux	:= $(LDFLAGS_vmlinux-y)
+
+ifdef CONFIG_PPC64
+ifndef CONFIG_PPC_KERNEL_PCREL
+ifeq ($(call cc-option-yn,-mcmodel=medium),y)
+	# -mcmodel=medium breaks modules because it uses 32bit offsets from
+	# the TOC pointer to create pointers where possible. Pointers into the
+	# percpu data area are created by this method.
+	#
+	# The kernel module loader relocates the percpu data section from the
+	# original location (starting with 0xd...) to somewhere in the base
+	# kernel percpu data space (starting with 0xc...). We need a full
+	# 64bit relocation for this to work, hence -mcmodel=large.
+	KBUILD_CFLAGS_MODULE += -mcmodel=large
+else
+	export NO_MINIMAL_TOC := -mno-minimal-toc
+endif
+endif
+endif
+
+CFLAGS-$(CONFIG_PPC64)	:= $(call cc-option,-mtraceback=no)
+ifdef CONFIG_PPC64_ELF_ABI_V2
+CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc))
+else
+ifndef CONFIG_CC_IS_CLANG
+CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv1)
+CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcall-aixdesc)
+endif
+endif
+CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
+CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mno-pointers-to-nested-functions)
+CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mlong-double-128)
+
+# Clang unconditionally reserves r2 on ppc32 and does not support the flag
+# https://bugs.llvm.org/show_bug.cgi?id=39555
+CFLAGS-$(CONFIG_PPC32)	:= $(call cc-option, -ffixed-r2)
+
+# Clang doesn't support -mmultiple / -mno-multiple
+# https://bugs.llvm.org/show_bug.cgi?id=39556
+CFLAGS-$(CONFIG_PPC32)	+= $(call cc-option, $(MULTIPLEWORD))
+
+CFLAGS-$(CONFIG_PPC32)	+= $(call cc-option,-mno-readonly-in-sdata)
+
+ifdef CONFIG_FUNCTION_TRACER
+ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+KBUILD_CPPFLAGS	+= -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+else
+CC_FLAGS_FTRACE := -pg
+ifdef CONFIG_MPROFILE_KERNEL
+CC_FLAGS_FTRACE += -mprofile-kernel
+endif
+endif
+endif
+
+CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += -mcpu=$(CONFIG_TARGET_CPU)
+AFLAGS-$(CONFIG_TARGET_CPU_BOOL) += -mcpu=$(CONFIG_TARGET_CPU)
+
+CFLAGS-y += $(CONFIG_TUNE_CPU)
+
+asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1)
+
+KBUILD_CPPFLAGS	+= -I $(srctree)/arch/$(ARCH) $(asinstr)
+KBUILD_AFLAGS	+= $(AFLAGS-y)
+KBUILD_CFLAGS	+= $(call cc-option,-msoft-float)
+KBUILD_CFLAGS	+= $(CFLAGS-y)
+CPP		= $(CC) -E $(KBUILD_CFLAGS)
+
+CHECKFLAGS	+= -m$(BITS) -D__powerpc__ -D__powerpc$(BITS)__
+ifdef CONFIG_CPU_BIG_ENDIAN
+CHECKFLAGS	+= -D__BIG_ENDIAN__
+else
+CHECKFLAGS	+= -D__LITTLE_ENDIAN__
+endif
+
+ifdef CONFIG_476FPE_ERR46
+	KBUILD_LDFLAGS_MODULE += --ppc476-workaround \
+		-T $(srctree)/arch/powerpc/platforms/44x/ppc476_modules.lds
+endif
+
+# No prefix or pcrel
+ifdef CONFIG_PPC_KERNEL_PREFIXED
+KBUILD_CFLAGS += $(call cc-option,-mprefixed)
+else
+KBUILD_CFLAGS += $(call cc-option,-mno-prefixed)
+endif
+ifdef CONFIG_PPC_KERNEL_PCREL
+KBUILD_CFLAGS += $(call cc-option,-mpcrel)
+else
+KBUILD_CFLAGS += $(call cc-option,-mno-pcrel)
+endif
+
+# No AltiVec or VSX or MMA instructions when building kernel
+KBUILD_CFLAGS += $(call cc-option,-mno-altivec)
+KBUILD_CFLAGS += $(call cc-option,-mno-vsx)
+KBUILD_CFLAGS += $(call cc-option,-mno-mma)
+
+# No SPE instruction when building kernel
+# (We use all available options to help semi-broken compilers)
+KBUILD_CFLAGS += $(call cc-option,-mno-spe)
+KBUILD_CFLAGS += $(call cc-option,-mspe=no)
+
+# Don't emit .eh_frame since we have no use for it
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+
+# Never use string load/store instructions as they are
+# often slow when they are implemented at all
+KBUILD_CFLAGS		+= $(call cc-option,-mno-string)
+
+cpu-as-$(CONFIG_ALTIVEC)	+= $(call as-option,-Wa$(comma)-maltivec)
+
+# When using '-many -mpower4' gas will first try and find a matching power4
+# mnemonic and failing that it will allow any valid mnemonic that GAS knows
+# about. GCC will pass -many to GAS when assembling, clang does not.
+# LLVM IAS doesn't understand either flag: https://github.com/ClangBuiltLinux/linux/issues/675
+# but LLVM IAS only supports ISA >= 2.06 for Book3S 64 anyway...
+cpu-as-$(CONFIG_PPC_BOOK3S_64)	+= $(call as-option,-Wa$(comma)-mpower4) $(call as-option,-Wa$(comma)-many)
+
+KBUILD_AFLAGS += $(cpu-as-y)
+KBUILD_CFLAGS += $(cpu-as-y)
+
+KBUILD_AFLAGS += $(aflags-y)
+KBUILD_CFLAGS += $(cflags-y)
+
+# Default to zImage, override when needed
+all: zImage
+
+# With make 3.82 we cannot mix normal and wildcard targets
+BOOT_TARGETS1 := zImage zImage.initrd uImage
+BOOT_TARGETS2 := zImage% dtbImage% treeImage.% cuImage.% simpleImage.% uImage.%
+
+PHONY += $(BOOT_TARGETS1) $(BOOT_TARGETS2)
+
+boot := arch/$(ARCH)/boot
+
+$(BOOT_TARGETS1): vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
+$(BOOT_TARGETS2): vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
+
+
+PHONY += bootwrapper_install
+bootwrapper_install:
+	$(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
+
+include $(srctree)/scripts/Makefile.defconf
+
+generated_configs += ppc64le_defconfig
+ppc64le_defconfig:
+	$(call merge_into_defconfig,ppc64_defconfig,le)
+
+generated_configs += ppc64le_guest_defconfig
+ppc64le_guest_defconfig:
+	$(call merge_into_defconfig,ppc64_defconfig,le guest kvm_guest)
+
+generated_configs += ppc64_guest_defconfig
+ppc64_guest_defconfig:
+	$(call merge_into_defconfig,ppc64_defconfig,be guest kvm_guest)
+
+generated_configs += pseries_le_defconfig
+pseries_le_defconfig: ppc64le_guest_defconfig
+
+generated_configs += pseries_defconfig
+pseries_defconfig: ppc64le_guest_defconfig
+
+generated_configs += powernv_be_defconfig
+powernv_be_defconfig:
+	$(call merge_into_defconfig,powernv_defconfig,be)
+
+generated_configs += mpc85xx_defconfig
+mpc85xx_defconfig:
+	$(call merge_into_defconfig,mpc85xx_base.config,\
+		85xx-32bit 85xx-hw fsl-emb-nonhw)
+
+generated_configs += mpc85xx_smp_defconfig
+mpc85xx_smp_defconfig:
+	$(call merge_into_defconfig,mpc85xx_base.config,\
+		85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw)
+
+generated_configs += corenet32_smp_defconfig
+corenet32_smp_defconfig:
+	$(call merge_into_defconfig,corenet_base.config,\
+		85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw dpaa)
+
+generated_configs += corenet64_smp_defconfig
+corenet64_smp_defconfig:
+	$(call merge_into_defconfig,corenet_base.config,\
+		85xx-64bit 85xx-smp altivec 85xx-hw fsl-emb-nonhw dpaa)
+
+generated_configs += mpc86xx_defconfig
+mpc86xx_defconfig:
+	$(call merge_into_defconfig,mpc86xx_base.config,\
+		86xx-hw fsl-emb-nonhw)
+
+generated_configs += mpc86xx_smp_defconfig
+mpc86xx_smp_defconfig:
+	$(call merge_into_defconfig,mpc86xx_base.config,\
+		86xx-smp 86xx-hw fsl-emb-nonhw)
+
+generated_configs += ppc32_allmodconfig
+ppc32_allmodconfig:
+	$(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/book3s_32.config \
+		-f $(srctree)/Makefile allmodconfig
+
+generated_configs += ppc_defconfig
+ppc_defconfig:
+	$(call merge_into_defconfig,book3s_32.config,)
+
+generated_configs += ppc64le_allmodconfig
+ppc64le_allmodconfig:
+	$(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/le.config \
+		-f $(srctree)/Makefile allmodconfig
+
+generated_configs += ppc64le_allnoconfig
+ppc64le_allnoconfig:
+	$(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/ppc64le.config \
+		-f $(srctree)/Makefile allnoconfig
+
+generated_configs += ppc64_book3e_allmodconfig
+ppc64_book3e_allmodconfig:
+	$(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/85xx-64bit.config \
+		-f $(srctree)/Makefile allmodconfig
+
+generated_configs += ppc32_randconfig
+ppc32_randconfig:
+	$(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/32-bit.config \
+		-f $(srctree)/Makefile randconfig
+
+generated_configs += ppc64_randconfig
+ppc64_randconfig:
+	$(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/64-bit.config \
+		-f $(srctree)/Makefile randconfig
+
+PHONY += $(generated_configs)
+
+define archhelp
+  echo '* zImage          - Build default images selected by kernel config'
+  echo '  zImage.*        - Compressed kernel image (arch/$(ARCH)/boot/zImage.*)'
+  echo '  uImage          - U-Boot native image format'
+  echo '  cuImage.<dt>    - Backwards compatible U-Boot image for older'
+  echo '                    versions which do not support device trees'
+  echo '  dtbImage.<dt>   - zImage with an embedded device tree blob'
+  echo '  simpleImage.<dt> - Firmware independent image.'
+  echo '  treeImage.<dt>  - Support for older IBM 4xx firmware (not U-Boot)'
+  echo '  install         - Install kernel using'
+  echo '                    (your) ~/bin/$(INSTALLKERNEL) or'
+  echo '                    (distribution) /sbin/$(INSTALLKERNEL) or'
+  echo '                    install to $$(INSTALL_PATH) and run lilo'
+  echo '  *_defconfig     - Select default config from arch/$(ARCH)/configs'
+  echo ''
+  echo '  Targets with <dt> embed a device tree blob inside the image'
+  echo '  These targets support board with firmware that does not'
+  echo '  support passing a device tree directly.  Replace <dt> with the'
+  echo '  name of a dts file from the arch/$(ARCH)/boot/dts/ directory'
+  echo '  (minus the .dts extension).'
+  echo
+  $(foreach cfg,$(generated_configs),
+    printf "  %-27s - Build for %s\\n" $(cfg) $(subst _defconfig,,$(cfg));)
+endef
+
+PHONY += install
+install:
+	$(call cmd,install)
+
+ifeq ($(KBUILD_EXTMOD),)
+# We need to generate vdso-offsets.h before compiling certain files in kernel/.
+# In order to do that, we should use the archprepare target, but we can't since
+# asm-offsets.h is included in some files used to generate vdso-offsets.h, and
+# asm-offsets.h is built in prepare0, for which archprepare is a dependency.
+# Therefore we need to generate the header after prepare0 has been made, hence
+# this hack.
+prepare: vdso_prepare
+vdso_prepare: prepare0
+	$(if $(CONFIG_VDSO32),$(Q)$(MAKE) \
+		$(build)=arch/powerpc/kernel/vdso include/generated/vdso32-offsets.h)
+	$(if $(CONFIG_PPC64),$(Q)$(MAKE) \
+		$(build)=arch/powerpc/kernel/vdso include/generated/vdso64-offsets.h)
+endif
+
+archprepare: checkbin
+
+archheaders:
+	$(Q)$(MAKE) $(build)=arch/powerpc/kernel/syscalls all
+
+ifdef CONFIG_STACKPROTECTOR
+prepare: stack_protector_prepare
+
+PHONY += stack_protector_prepare
+stack_protector_prepare: prepare0
+ifdef CONFIG_PPC64
+	$(eval KBUILD_CFLAGS += -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "PACA_CANARY") print $$3;}' include/generated/asm-offsets.h))
+else
+	$(eval KBUILD_CFLAGS += -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "TASK_CANARY") print $$3;}' include/generated/asm-offsets.h))
+endif
+endif
+
+PHONY += checkbin
+checkbin:
+	@if test "x${CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT}" = "xy" -a \
+		"x${CONFIG_LD_IS_BFD}" = "xy" -a \
+		"${CONFIG_LD_VERSION}" = "23700" ; then \
+		echo -n '*** binutils 2.37 drops unused section symbols, which recordmcount ' ; \
+		echo 'is unable to handle.' ; \
+		echo '*** Please use a different binutils version.' ; \
+		false ; \
+	fi
diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink
new file mode 100644
index 0000000000..1f860b3c9b
--- /dev/null
+++ b/arch/powerpc/Makefile.postlink
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: GPL-2.0
+# ===========================================================================
+# Post-link powerpc pass
+# ===========================================================================
+#
+# 1. Check that vmlinux relocations look sane
+
+PHONY := __archpost
+__archpost:
+
+-include include/config/auto.conf
+include $(srctree)/scripts/Kbuild.include
+
+quiet_cmd_head_check = CHKHEAD $@
+      cmd_head_check = $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/head_check.sh "$(NM)" "$@"
+
+quiet_cmd_relocs_check = CHKREL  $@
+ifdef CONFIG_PPC_BOOK3S_64
+      cmd_relocs_check =						\
+	$(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" ; \
+	$(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$(NM)" "$@"
+else
+      cmd_relocs_check =						\
+	$(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@"
+endif
+
+# `@true` prevents complaint when there is nothing to be done
+
+vmlinux: FORCE
+	@true
+ifdef CONFIG_PPC64
+	$(call cmd,head_check)
+endif
+ifdef CONFIG_RELOCATABLE
+	$(call if_changed,relocs_check)
+endif
+
+%.ko: FORCE
+	@true
+
+clean:
+	rm -f .tmp_symbols.txt
+
+PHONY += FORCE clean
+
+FORCE:
+
+.PHONY: $(PHONY)
diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore
new file mode 100644
index 0000000000..a4716d138c
--- /dev/null
+++ b/arch/powerpc/boot/.gitignore
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: GPL-2.0-only
+addnote
+decompress_inflate.c
+empty.c
+hack-coff
+inffast.c
+inffast.h
+inffixed.h
+inflate.c
+inflate.h
+inftrees.c
+inftrees.h
+infutil.c
+infutil.h
+kernel-vmlinux.strip.c
+kernel-vmlinux.strip.gz
+mktree
+otheros.bld
+otheros-too-big.bld
+uImage
+cuImage.*
+dtbImage.*
+treeImage.*
+vmlinux.strip
+zImage
+zImage.initrd
+zImage.bin.*
+zImage.chrp
+zImage.coff
+zImage.epapr
+zImage.holly
+zImage.*lds
+zImage.maple
+zImage.miboot
+zImage.pmac
+zImage.pseries
+zconf.h
+zlib.h
+zutil.h
+fdt.c
+fdt.h
+fdt_ro.c
+fdt_rw.c
+fdt_strerror.c
+fdt_sw.c
+fdt_wip.c
+libfdt.h
+libfdt_internal.h
diff --git a/arch/powerpc/boot/44x.h b/arch/powerpc/boot/44x.h
new file mode 100644
index 0000000000..9b15e59522
--- /dev/null
+++ b/arch/powerpc/boot/44x.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PowerPC 44x related functions
+ *
+ * Copyright 2007 David Gibson, IBM Corporation.
+ */
+#ifndef _PPC_BOOT_44X_H_
+#define _PPC_BOOT_44X_H_
+
+void ebony_init(void *mac0, void *mac1);
+void bamboo_init(void *mac0, void *mac1);
+
+#endif /* _PPC_BOOT_44X_H_ */
diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c
new file mode 100644
index 0000000000..00c4d843a0
--- /dev/null
+++ b/arch/powerpc/boot/4xx.c
@@ -0,0 +1,799 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2007 David Gibson, IBM Corporation.
+ *
+ * Based on earlier code:
+ *   Matt Porter <mporter@kernel.crashing.org>
+ *   Copyright 2002-2005 MontaVista Software Inc.
+ *
+ *   Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ *   Copyright (c) 2003, 2004 Zultys Technologies
+ *
+ * Copyright (C) 2009 Wind River Systems, Inc.
+ *   Updated for supporting PPC405EX on Kilauea.
+ *   Tiejun Chen <tiejun.chen@windriver.com>
+ */
+#include <stddef.h>
+#include "types.h"
+#include "string.h"
+#include "stdio.h"
+#include "ops.h"
+#include "reg.h"
+#include "dcr.h"
+
+static unsigned long chip_11_errata(unsigned long memsize)
+{
+	unsigned long pvr;
+
+	pvr = mfpvr();
+
+	switch (pvr & 0xf0000ff0) {
+		case 0x40000850:
+		case 0x400008d0:
+		case 0x200008d0:
+			memsize -= 4096;
+			break;
+		default:
+			break;
+	}
+
+	return memsize;
+}
+
+/* Read the 4xx SDRAM controller to get size of system memory. */
+void ibm4xx_sdram_fixup_memsize(void)
+{
+	int i;
+	unsigned long memsize, bank_config;
+
+	memsize = 0;
+	for (i = 0; i < ARRAY_SIZE(sdram_bxcr); i++) {
+		bank_config = SDRAM0_READ(sdram_bxcr[i]);
+		if (bank_config & SDRAM_CONFIG_BANK_ENABLE)
+			memsize += SDRAM_CONFIG_BANK_SIZE(bank_config);
+	}
+
+	memsize = chip_11_errata(memsize);
+	dt_fixup_memory(0, memsize);
+}
+
+/* Read the 440SPe MQ controller to get size of system memory. */
+#define DCRN_MQ0_B0BAS		0x40
+#define DCRN_MQ0_B1BAS		0x41
+#define DCRN_MQ0_B2BAS		0x42
+#define DCRN_MQ0_B3BAS		0x43
+
+static u64 ibm440spe_decode_bas(u32 bas)
+{
+	u64 base = ((u64)(bas & 0xFFE00000u)) << 2;
+
+	/* open coded because I'm paranoid about invalid values */
+	switch ((bas >> 4) & 0xFFF) {
+	case 0:
+		return 0;
+	case 0xffc:
+		return base + 0x000800000ull;
+	case 0xff8:
+		return base + 0x001000000ull;
+	case 0xff0:
+		return base + 0x002000000ull;
+	case 0xfe0:
+		return base + 0x004000000ull;
+	case 0xfc0:
+		return base + 0x008000000ull;
+	case 0xf80:
+		return base + 0x010000000ull;
+	case 0xf00:
+		return base + 0x020000000ull;
+	case 0xe00:
+		return base + 0x040000000ull;
+	case 0xc00:
+		return base + 0x080000000ull;
+	case 0x800:
+		return base + 0x100000000ull;
+	}
+	printf("Memory BAS value 0x%08x unsupported !\n", bas);
+	return 0;
+}
+
+void ibm440spe_fixup_memsize(void)
+{
+	u64 banktop, memsize = 0;
+
+	/* Ultimately, we should directly construct the memory node
+	 * so we are able to handle holes in the memory address space
+	 */
+	banktop = ibm440spe_decode_bas(mfdcr(DCRN_MQ0_B0BAS));
+	if (banktop > memsize)
+		memsize = banktop;
+	banktop = ibm440spe_decode_bas(mfdcr(DCRN_MQ0_B1BAS));
+	if (banktop > memsize)
+		memsize = banktop;
+	banktop = ibm440spe_decode_bas(mfdcr(DCRN_MQ0_B2BAS));
+	if (banktop > memsize)
+		memsize = banktop;
+	banktop = ibm440spe_decode_bas(mfdcr(DCRN_MQ0_B3BAS));
+	if (banktop > memsize)
+		memsize = banktop;
+
+	dt_fixup_memory(0, memsize);
+}
+
+
+/* 4xx DDR1/2 Denali memory controller support */
+/* DDR0 registers */
+#define DDR0_02			2
+#define DDR0_08			8
+#define DDR0_10			10
+#define DDR0_14			14
+#define DDR0_42			42
+#define DDR0_43			43
+
+/* DDR0_02 */
+#define DDR_START		0x1
+#define DDR_START_SHIFT		0
+#define DDR_MAX_CS_REG		0x3
+#define DDR_MAX_CS_REG_SHIFT	24
+#define DDR_MAX_COL_REG		0xf
+#define DDR_MAX_COL_REG_SHIFT	16
+#define DDR_MAX_ROW_REG		0xf
+#define DDR_MAX_ROW_REG_SHIFT	8
+/* DDR0_08 */
+#define DDR_DDR2_MODE		0x1
+#define DDR_DDR2_MODE_SHIFT	0
+/* DDR0_10 */
+#define DDR_CS_MAP		0x3
+#define DDR_CS_MAP_SHIFT	8
+/* DDR0_14 */
+#define DDR_REDUC		0x1
+#define DDR_REDUC_SHIFT		16
+/* DDR0_42 */
+#define DDR_APIN		0x7
+#define DDR_APIN_SHIFT		24
+/* DDR0_43 */
+#define DDR_COL_SZ		0x7
+#define DDR_COL_SZ_SHIFT	8
+#define DDR_BANK8		0x1
+#define DDR_BANK8_SHIFT		0
+
+#define DDR_GET_VAL(val, mask, shift)	(((val) >> (shift)) & (mask))
+
+/*
+ * Some U-Boot versions set the number of chipselects to two
+ * for Sequoia/Rainier boards while they only have one chipselect
+ * hardwired. Hardcode the number of chipselects to one
+ * for sequioa/rainer board models or read the actual value
+ * from the memory controller register DDR0_10 otherwise.
+ */
+static inline u32 ibm4xx_denali_get_cs(void)
+{
+	void *devp;
+	char model[64];
+	u32 val, cs;
+
+	devp = finddevice("/");
+	if (!devp)
+		goto read_cs;
+
+	if (getprop(devp, "model", model, sizeof(model)) <= 0)
+		goto read_cs;
+
+	model[sizeof(model)-1] = 0;
+
+	if (!strcmp(model, "amcc,sequoia") ||
+	    !strcmp(model, "amcc,rainier"))
+		return 1;
+
+read_cs:
+	/* get CS value */
+	val = SDRAM0_READ(DDR0_10);
+
+	val = DDR_GET_VAL(val, DDR_CS_MAP, DDR_CS_MAP_SHIFT);
+	cs = 0;
+	while (val) {
+		if (val & 0x1)
+			cs++;
+		val = val >> 1;
+	}
+	return cs;
+}
+
+void ibm4xx_denali_fixup_memsize(void)
+{
+	u32 val, max_cs, max_col, max_row;
+	u32 cs, col, row, bank, dpath;
+	unsigned long memsize;
+
+	val = SDRAM0_READ(DDR0_02);
+	if (!DDR_GET_VAL(val, DDR_START, DDR_START_SHIFT))
+		fatal("DDR controller is not initialized\n");
+
+	/* get maximum cs col and row values */
+	max_cs  = DDR_GET_VAL(val, DDR_MAX_CS_REG, DDR_MAX_CS_REG_SHIFT);
+	max_col = DDR_GET_VAL(val, DDR_MAX_COL_REG, DDR_MAX_COL_REG_SHIFT);
+	max_row = DDR_GET_VAL(val, DDR_MAX_ROW_REG, DDR_MAX_ROW_REG_SHIFT);
+
+	cs = ibm4xx_denali_get_cs();
+	if (!cs)
+		fatal("No memory installed\n");
+	if (cs > max_cs)
+		fatal("DDR wrong CS configuration\n");
+
+	/* get data path bytes */
+	val = SDRAM0_READ(DDR0_14);
+
+	if (DDR_GET_VAL(val, DDR_REDUC, DDR_REDUC_SHIFT))
+		dpath = 4; /* 32 bits */
+	else
+		dpath = 8; /* 64 bits */
+
+	/* get address pins (rows) */
+	val = SDRAM0_READ(DDR0_42);
+
+	row = DDR_GET_VAL(val, DDR_APIN, DDR_APIN_SHIFT);
+	if (row > max_row)
+		fatal("DDR wrong APIN configuration\n");
+	row = max_row - row;
+
+	/* get collomn size and banks */
+	val = SDRAM0_READ(DDR0_43);
+
+	col = DDR_GET_VAL(val, DDR_COL_SZ, DDR_COL_SZ_SHIFT);
+	if (col > max_col)
+		fatal("DDR wrong COL configuration\n");
+	col = max_col - col;
+
+	if (DDR_GET_VAL(val, DDR_BANK8, DDR_BANK8_SHIFT))
+		bank = 8; /* 8 banks */
+	else
+		bank = 4; /* 4 banks */
+
+	memsize = cs * (1 << (col+row)) * bank * dpath;
+	memsize = chip_11_errata(memsize);
+	dt_fixup_memory(0, memsize);
+}
+
+#define SPRN_DBCR0_40X 0x3F2
+#define SPRN_DBCR0_44X 0x134
+#define DBCR0_RST_SYSTEM 0x30000000
+
+void ibm44x_dbcr_reset(void)
+{
+	unsigned long tmp;
+
+	asm volatile (
+		"mfspr	%0,%1\n"
+		"oris	%0,%0,%2@h\n"
+		"mtspr	%1,%0"
+		: "=&r"(tmp) : "i"(SPRN_DBCR0_44X), "i"(DBCR0_RST_SYSTEM)
+		);
+
+}
+
+void ibm40x_dbcr_reset(void)
+{
+	unsigned long tmp;
+
+	asm volatile (
+		"mfspr	%0,%1\n"
+		"oris	%0,%0,%2@h\n"
+		"mtspr	%1,%0"
+		: "=&r"(tmp) : "i"(SPRN_DBCR0_40X), "i"(DBCR0_RST_SYSTEM)
+		);
+}
+
+#define EMAC_RESET 0x20000000
+void ibm4xx_quiesce_eth(u32 *emac0, u32 *emac1)
+{
+	/* Quiesce the MAL and EMAC(s) since PIBS/OpenBIOS don't
+	 * do this for us
+	 */
+	if (emac0)
+		*emac0 = EMAC_RESET;
+	if (emac1)
+		*emac1 = EMAC_RESET;
+
+	mtdcr(DCRN_MAL0_CFG, MAL_RESET);
+	while (mfdcr(DCRN_MAL0_CFG) & MAL_RESET)
+		; /* loop until reset takes effect */
+}
+
+/* Read 4xx EBC bus bridge registers to get mappings of the peripheral
+ * banks into the OPB address space */
+void ibm4xx_fixup_ebc_ranges(const char *ebc)
+{
+	void *devp;
+	u32 bxcr;
+	u32 ranges[EBC_NUM_BANKS*4];
+	u32 *p = ranges;
+	int i;
+
+	for (i = 0; i < EBC_NUM_BANKS; i++) {
+		mtdcr(DCRN_EBC0_CFGADDR, EBC_BXCR(i));
+		bxcr = mfdcr(DCRN_EBC0_CFGDATA);
+
+		if ((bxcr & EBC_BXCR_BU) != EBC_BXCR_BU_OFF) {
+			*p++ = i;
+			*p++ = 0;
+			*p++ = bxcr & EBC_BXCR_BAS;
+			*p++ = EBC_BXCR_BANK_SIZE(bxcr);
+		}
+	}
+
+	devp = finddevice(ebc);
+	if (! devp)
+		fatal("Couldn't locate EBC node %s\n\r", ebc);
+
+	setprop(devp, "ranges", ranges, (p - ranges) * sizeof(u32));
+}
+
+/* Calculate 440GP clocks */
+void ibm440gp_fixup_clocks(unsigned int sys_clk, unsigned int ser_clk)
+{
+	u32 sys0 = mfdcr(DCRN_CPC0_SYS0);
+	u32 cr0 = mfdcr(DCRN_CPC0_CR0);
+	u32 cpu, plb, opb, ebc, tb, uart0, uart1, m;
+	u32 opdv = CPC0_SYS0_OPDV(sys0);
+	u32 epdv = CPC0_SYS0_EPDV(sys0);
+
+	if (sys0 & CPC0_SYS0_BYPASS) {
+		/* Bypass system PLL */
+		cpu = plb = sys_clk;
+	} else {
+		if (sys0 & CPC0_SYS0_EXTSL)
+			/* PerClk */
+			m = CPC0_SYS0_FWDVB(sys0) * opdv * epdv;
+		else
+			/* CPU clock */
+			m = CPC0_SYS0_FBDV(sys0) * CPC0_SYS0_FWDVA(sys0);
+		cpu = sys_clk * m / CPC0_SYS0_FWDVA(sys0);
+		plb = sys_clk * m / CPC0_SYS0_FWDVB(sys0);
+	}
+
+	opb = plb / opdv;
+	ebc = opb / epdv;
+
+	/* FIXME: Check if this is for all 440GP, or just Ebony */
+	if ((mfpvr() & 0xf0000fff) == 0x40000440)
+		/* Rev. B 440GP, use external system clock */
+		tb = sys_clk;
+	else
+		/* Rev. C 440GP, errata force us to use internal clock */
+		tb = cpu;
+
+	if (cr0 & CPC0_CR0_U0EC)
+		/* External UART clock */
+		uart0 = ser_clk;
+	else
+		/* Internal UART clock */
+		uart0 = plb / CPC0_CR0_UDIV(cr0);
+
+	if (cr0 & CPC0_CR0_U1EC)
+		/* External UART clock */
+		uart1 = ser_clk;
+	else
+		/* Internal UART clock */
+		uart1 = plb / CPC0_CR0_UDIV(cr0);
+
+	printf("PPC440GP: SysClk = %dMHz (%x)\n\r",
+	       (sys_clk + 500000) / 1000000, sys_clk);
+
+	dt_fixup_cpu_clocks(cpu, tb, 0);
+
+	dt_fixup_clock("/plb", plb);
+	dt_fixup_clock("/plb/opb", opb);
+	dt_fixup_clock("/plb/opb/ebc", ebc);
+	dt_fixup_clock("/plb/opb/serial@40000200", uart0);
+	dt_fixup_clock("/plb/opb/serial@40000300", uart1);
+}
+
+#define SPRN_CCR1 0x378
+
+static inline u32 __fix_zero(u32 v, u32 def)
+{
+	return v ? v : def;
+}
+
+static unsigned int __ibm440eplike_fixup_clocks(unsigned int sys_clk,
+						unsigned int tmr_clk,
+						int per_clk_from_opb)
+{
+	/* PLL config */
+	u32 pllc  = CPR0_READ(DCRN_CPR0_PLLC);
+	u32 plld  = CPR0_READ(DCRN_CPR0_PLLD);
+
+	/* Dividers */
+	u32 fbdv   = __fix_zero((plld >> 24) & 0x1f, 32);
+	u32 fwdva  = __fix_zero((plld >> 16) & 0xf, 16);
+	u32 fwdvb  = __fix_zero((plld >> 8) & 7, 8);
+	u32 lfbdv  = __fix_zero(plld & 0x3f, 64);
+	u32 pradv0 = __fix_zero((CPR0_READ(DCRN_CPR0_PRIMAD) >> 24) & 7, 8);
+	u32 prbdv0 = __fix_zero((CPR0_READ(DCRN_CPR0_PRIMBD) >> 24) & 7, 8);
+	u32 opbdv0 = __fix_zero((CPR0_READ(DCRN_CPR0_OPBD) >> 24) & 3, 4);
+	u32 perdv0 = __fix_zero((CPR0_READ(DCRN_CPR0_PERD) >> 24) & 3, 4);
+
+	/* Input clocks for primary dividers */
+	u32 clk_a, clk_b;
+
+	/* Resulting clocks */
+	u32 cpu, plb, opb, ebc, vco;
+
+	/* Timebase */
+	u32 ccr1, tb = tmr_clk;
+
+	if (pllc & 0x40000000) {
+		u32 m;
+
+		/* Feedback path */
+		switch ((pllc >> 24) & 7) {
+		case 0:
+			/* PLLOUTx */
+			m = ((pllc & 0x20000000) ? fwdvb : fwdva) * lfbdv;
+			break;
+		case 1:
+			/* CPU */
+			m = fwdva * pradv0;
+			break;
+		case 5:
+			/* PERClk */
+			m = fwdvb * prbdv0 * opbdv0 * perdv0;
+			break;
+		default:
+			printf("WARNING ! Invalid PLL feedback source !\n");
+			goto bypass;
+		}
+		m *= fbdv;
+		vco = sys_clk * m;
+		clk_a = vco / fwdva;
+		clk_b = vco / fwdvb;
+	} else {
+bypass:
+		/* Bypass system PLL */
+		vco = 0;
+		clk_a = clk_b = sys_clk;
+	}
+
+	cpu = clk_a / pradv0;
+	plb = clk_b / prbdv0;
+	opb = plb / opbdv0;
+	ebc = (per_clk_from_opb ? opb : plb) / perdv0;
+
+	/* Figure out timebase.  Either CPU or default TmrClk */
+	ccr1 = mfspr(SPRN_CCR1);
+
+	/* If passed a 0 tmr_clk, force CPU clock */
+	if (tb == 0) {
+		ccr1 &= ~0x80u;
+		mtspr(SPRN_CCR1, ccr1);
+	}
+	if ((ccr1 & 0x0080) == 0)
+		tb = cpu;
+
+	dt_fixup_cpu_clocks(cpu, tb, 0);
+	dt_fixup_clock("/plb", plb);
+	dt_fixup_clock("/plb/opb", opb);
+	dt_fixup_clock("/plb/opb/ebc", ebc);
+
+	return plb;
+}
+
+static void eplike_fixup_uart_clk(int index, const char *path,
+				  unsigned int ser_clk,
+				  unsigned int plb_clk)
+{
+	unsigned int sdr;
+	unsigned int clock;
+
+	switch (index) {
+	case 0:
+		sdr = SDR0_READ(DCRN_SDR0_UART0);
+		break;
+	case 1:
+		sdr = SDR0_READ(DCRN_SDR0_UART1);
+		break;
+	case 2:
+		sdr = SDR0_READ(DCRN_SDR0_UART2);
+		break;
+	case 3:
+		sdr = SDR0_READ(DCRN_SDR0_UART3);
+		break;
+	default:
+		return;
+	}
+
+	if (sdr & 0x00800000u)
+		clock = ser_clk;
+	else
+		clock = plb_clk / __fix_zero(sdr & 0xff, 256);
+
+	dt_fixup_clock(path, clock);
+}
+
+void ibm440ep_fixup_clocks(unsigned int sys_clk,
+			   unsigned int ser_clk,
+			   unsigned int tmr_clk)
+{
+	unsigned int plb_clk = __ibm440eplike_fixup_clocks(sys_clk, tmr_clk, 0);
+
+	/* serial clocks need fixup based on int/ext */
+	eplike_fixup_uart_clk(0, "/plb/opb/serial@ef600300", ser_clk, plb_clk);
+	eplike_fixup_uart_clk(1, "/plb/opb/serial@ef600400", ser_clk, plb_clk);
+	eplike_fixup_uart_clk(2, "/plb/opb/serial@ef600500", ser_clk, plb_clk);
+	eplike_fixup_uart_clk(3, "/plb/opb/serial@ef600600", ser_clk, plb_clk);
+}
+
+void ibm440gx_fixup_clocks(unsigned int sys_clk,
+			   unsigned int ser_clk,
+			   unsigned int tmr_clk)
+{
+	unsigned int plb_clk = __ibm440eplike_fixup_clocks(sys_clk, tmr_clk, 1);
+
+	/* serial clocks need fixup based on int/ext */
+	eplike_fixup_uart_clk(0, "/plb/opb/serial@40000200", ser_clk, plb_clk);
+	eplike_fixup_uart_clk(1, "/plb/opb/serial@40000300", ser_clk, plb_clk);
+}
+
+void ibm440spe_fixup_clocks(unsigned int sys_clk,
+			    unsigned int ser_clk,
+			    unsigned int tmr_clk)
+{
+	unsigned int plb_clk = __ibm440eplike_fixup_clocks(sys_clk, tmr_clk, 1);
+
+	/* serial clocks need fixup based on int/ext */
+	eplike_fixup_uart_clk(0, "/plb/opb/serial@f0000200", ser_clk, plb_clk);
+	eplike_fixup_uart_clk(1, "/plb/opb/serial@f0000300", ser_clk, plb_clk);
+	eplike_fixup_uart_clk(2, "/plb/opb/serial@f0000600", ser_clk, plb_clk);
+}
+
+void ibm405gp_fixup_clocks(unsigned int sys_clk, unsigned int ser_clk)
+{
+	u32 pllmr = mfdcr(DCRN_CPC0_PLLMR);
+	u32 cpc0_cr0 = mfdcr(DCRN_405_CPC0_CR0);
+	u32 cpc0_cr1 = mfdcr(DCRN_405_CPC0_CR1);
+	u32 psr = mfdcr(DCRN_405_CPC0_PSR);
+	u32 cpu, plb, opb, ebc, tb, uart0, uart1, m;
+	u32 fwdv, fwdvb, fbdv, cbdv, opdv, epdv, ppdv, udiv;
+
+	fwdv = (8 - ((pllmr & 0xe0000000) >> 29));
+	fbdv = (pllmr & 0x1e000000) >> 25;
+	if (fbdv == 0)
+		fbdv = 16;
+	cbdv = ((pllmr & 0x00060000) >> 17) + 1; /* CPU:PLB */
+	opdv = ((pllmr & 0x00018000) >> 15) + 1; /* PLB:OPB */
+	ppdv = ((pllmr & 0x00006000) >> 13) + 1; /* PLB:PCI */
+	epdv = ((pllmr & 0x00001800) >> 11) + 2; /* PLB:EBC */
+	udiv = ((cpc0_cr0 & 0x3e) >> 1) + 1;
+
+	/* check for 405GPr */
+	if ((mfpvr() & 0xfffffff0) == (0x50910951 & 0xfffffff0)) {
+		fwdvb = 8 - (pllmr & 0x00000007);
+		if (!(psr & 0x00001000)) /* PCI async mode enable == 0 */
+			if (psr & 0x00000020) /* New mode enable */
+				m = fwdvb * 2 * ppdv;
+			else
+				m = fwdvb * cbdv * ppdv;
+		else if (psr & 0x00000020) /* New mode enable */
+			if (psr & 0x00000800) /* PerClk synch mode */
+				m = fwdvb * 2 * epdv;
+			else
+				m = fbdv * fwdv;
+		else if (epdv == fbdv)
+			m = fbdv * cbdv * epdv;
+		else
+			m = fbdv * fwdvb * cbdv;
+
+		cpu = sys_clk * m / fwdv;
+		plb = sys_clk * m / (fwdvb * cbdv);
+	} else {
+		m = fwdv * fbdv * cbdv;
+		cpu = sys_clk * m / fwdv;
+		plb = cpu / cbdv;
+	}
+	opb = plb / opdv;
+	ebc = plb / epdv;
+
+	if (cpc0_cr0 & 0x80)
+		/* uart0 uses the external clock */
+		uart0 = ser_clk;
+	else
+		uart0 = cpu / udiv;
+
+	if (cpc0_cr0 & 0x40)
+		/* uart1 uses the external clock */
+		uart1 = ser_clk;
+	else
+		uart1 = cpu / udiv;
+
+	/* setup the timebase clock to tick at the cpu frequency */
+	cpc0_cr1 = cpc0_cr1 & ~0x00800000;
+	mtdcr(DCRN_405_CPC0_CR1, cpc0_cr1);
+	tb = cpu;
+
+	dt_fixup_cpu_clocks(cpu, tb, 0);
+	dt_fixup_clock("/plb", plb);
+	dt_fixup_clock("/plb/opb", opb);
+	dt_fixup_clock("/plb/ebc", ebc);
+	dt_fixup_clock("/plb/opb/serial@ef600300", uart0);
+	dt_fixup_clock("/plb/opb/serial@ef600400", uart1);
+}
+
+
+void ibm405ep_fixup_clocks(unsigned int sys_clk)
+{
+	u32 pllmr0 = mfdcr(DCRN_CPC0_PLLMR0);
+	u32 pllmr1 = mfdcr(DCRN_CPC0_PLLMR1);
+	u32 cpc0_ucr = mfdcr(DCRN_CPC0_UCR);
+	u32 cpu, plb, opb, ebc, uart0, uart1;
+	u32 fwdva, fwdvb, fbdv, cbdv, opdv, epdv;
+	u32 pllmr0_ccdv, tb, m;
+
+	fwdva = 8 - ((pllmr1 & 0x00070000) >> 16);
+	fwdvb = 8 - ((pllmr1 & 0x00007000) >> 12);
+	fbdv = (pllmr1 & 0x00f00000) >> 20;
+	if (fbdv == 0)
+		fbdv = 16;
+
+	cbdv = ((pllmr0 & 0x00030000) >> 16) + 1; /* CPU:PLB */
+	epdv = ((pllmr0 & 0x00000300) >> 8) + 2;  /* PLB:EBC */
+	opdv = ((pllmr0 & 0x00003000) >> 12) + 1; /* PLB:OPB */
+
+	m = fbdv * fwdvb;
+
+	pllmr0_ccdv = ((pllmr0 & 0x00300000) >> 20) + 1;
+	if (pllmr1 & 0x80000000)
+		cpu = sys_clk * m / (fwdva * pllmr0_ccdv);
+	else
+		cpu = sys_clk / pllmr0_ccdv;
+
+	plb = cpu / cbdv;
+	opb = plb / opdv;
+	ebc = plb / epdv;
+	tb = cpu;
+	uart0 = cpu / (cpc0_ucr & 0x0000007f);
+	uart1 = cpu / ((cpc0_ucr & 0x00007f00) >> 8);
+
+	dt_fixup_cpu_clocks(cpu, tb, 0);
+	dt_fixup_clock("/plb", plb);
+	dt_fixup_clock("/plb/opb", opb);
+	dt_fixup_clock("/plb/ebc", ebc);
+	dt_fixup_clock("/plb/opb/serial@ef600300", uart0);
+	dt_fixup_clock("/plb/opb/serial@ef600400", uart1);
+}
+
+static u8 ibm405ex_fwdv_multi_bits[] = {
+	/* values for:  1 - 16 */
+	0x01, 0x02, 0x0e, 0x09, 0x04, 0x0b, 0x10, 0x0d, 0x0c, 0x05,
+	0x06, 0x0f, 0x0a, 0x07, 0x08, 0x03
+};
+
+u32 ibm405ex_get_fwdva(unsigned long cpr_fwdv)
+{
+	u32 index;
+
+	for (index = 0; index < ARRAY_SIZE(ibm405ex_fwdv_multi_bits); index++)
+		if (cpr_fwdv == (u32)ibm405ex_fwdv_multi_bits[index])
+			return index + 1;
+
+	return 0;
+}
+
+static u8 ibm405ex_fbdv_multi_bits[] = {
+	/* values for:  1 - 100 */
+	0x00, 0xff, 0x7e, 0xfd, 0x7a, 0xf5, 0x6a, 0xd5, 0x2a, 0xd4,
+	0x29, 0xd3, 0x26, 0xcc, 0x19, 0xb3, 0x67, 0xce, 0x1d, 0xbb,
+	0x77, 0xee, 0x5d, 0xba, 0x74, 0xe9, 0x52, 0xa5, 0x4b, 0x96,
+	0x2c, 0xd8, 0x31, 0xe3, 0x46, 0x8d, 0x1b, 0xb7, 0x6f, 0xde,
+	0x3d, 0xfb, 0x76, 0xed, 0x5a, 0xb5, 0x6b, 0xd6, 0x2d, 0xdb,
+	0x36, 0xec, 0x59, 0xb2, 0x64, 0xc9, 0x12, 0xa4, 0x48, 0x91,
+	0x23, 0xc7, 0x0e, 0x9c, 0x38, 0xf0, 0x61, 0xc2, 0x05, 0x8b,
+	0x17, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0x72, 0xe5, 0x4a, 0x95,
+	0x2b, 0xd7, 0x2e, 0xdc, 0x39, 0xf3, 0x66, 0xcd, 0x1a, 0xb4,
+	0x68, 0xd1, 0x22, 0xc4, 0x09, 0x93, 0x27, 0xcf, 0x1e, 0xbc,
+	/* values for:  101 - 200 */
+	0x78, 0xf1, 0x62, 0xc5, 0x0a, 0x94, 0x28, 0xd0, 0x21, 0xc3,
+	0x06, 0x8c, 0x18, 0xb0, 0x60, 0xc1, 0x02, 0x84, 0x08, 0x90,
+	0x20, 0xc0, 0x01, 0x83, 0x07, 0x8f, 0x1f, 0xbf, 0x7f, 0xfe,
+	0x7d, 0xfa, 0x75, 0xea, 0x55, 0xaa, 0x54, 0xa9, 0x53, 0xa6,
+	0x4c, 0x99, 0x33, 0xe7, 0x4e, 0x9d, 0x3b, 0xf7, 0x6e, 0xdd,
+	0x3a, 0xf4, 0x69, 0xd2, 0x25, 0xcb, 0x16, 0xac, 0x58, 0xb1,
+	0x63, 0xc6, 0x0d, 0x9b, 0x37, 0xef, 0x5e, 0xbd, 0x7b, 0xf6,
+	0x6d, 0xda, 0x35, 0xeb, 0x56, 0xad, 0x5b, 0xb6, 0x6c, 0xd9,
+	0x32, 0xe4, 0x49, 0x92, 0x24, 0xc8, 0x11, 0xa3, 0x47, 0x8e,
+	0x1c, 0xb8, 0x70, 0xe1, 0x42, 0x85, 0x0b, 0x97, 0x2f, 0xdf,
+	/* values for:  201 - 255 */
+	0x3e, 0xfc, 0x79, 0xf2, 0x65, 0xca, 0x15, 0xab, 0x57, 0xae,
+	0x5c, 0xb9, 0x73, 0xe6, 0x4d, 0x9a, 0x34, 0xe8, 0x51, 0xa2,
+	0x44, 0x89, 0x13, 0xa7, 0x4f, 0x9e, 0x3c, 0xf8, 0x71, 0xe2,
+	0x45, 0x8a, 0x14, 0xa8, 0x50, 0xa1, 0x43, 0x86, 0x0c, 0x98,
+	0x30, 0xe0, 0x41, 0x82, 0x04, 0x88, 0x10, 0xa0, 0x40, 0x81,
+	0x03, 0x87, 0x0f, 0x9f, 0x3f  /* END */
+};
+
+u32 ibm405ex_get_fbdv(unsigned long cpr_fbdv)
+{
+	u32 index;
+
+	for (index = 0; index < ARRAY_SIZE(ibm405ex_fbdv_multi_bits); index++)
+		if (cpr_fbdv == (u32)ibm405ex_fbdv_multi_bits[index])
+			return index + 1;
+
+	return 0;
+}
+
+void ibm405ex_fixup_clocks(unsigned int sys_clk, unsigned int uart_clk)
+{
+	/* PLL config */
+	u32 pllc  = CPR0_READ(DCRN_CPR0_PLLC);
+	u32 plld  = CPR0_READ(DCRN_CPR0_PLLD);
+	u32 cpud  = CPR0_READ(DCRN_CPR0_PRIMAD);
+	u32 plbd  = CPR0_READ(DCRN_CPR0_PRIMBD);
+	u32 opbd  = CPR0_READ(DCRN_CPR0_OPBD);
+	u32 perd  = CPR0_READ(DCRN_CPR0_PERD);
+
+	/* Dividers */
+	u32 fbdv   = ibm405ex_get_fbdv(__fix_zero((plld >> 24) & 0xff, 1));
+
+	u32 fwdva  = ibm405ex_get_fwdva(__fix_zero((plld >> 16) & 0x0f, 1));
+
+	u32 cpudv0 = __fix_zero((cpud >> 24) & 7, 8);
+
+	/* PLBDV0 is hardwared to 010. */
+	u32 plbdv0 = 2;
+	u32 plb2xdv0 = __fix_zero((plbd >> 16) & 7, 8);
+
+	u32 opbdv0 = __fix_zero((opbd >> 24) & 3, 4);
+
+	u32 perdv0 = __fix_zero((perd >> 24) & 3, 4);
+
+	/* Resulting clocks */
+	u32 cpu, plb, opb, ebc, vco, tb, uart0, uart1;
+
+	/* PLL's VCO is the source for primary forward ? */
+	if (pllc & 0x40000000) {
+		u32 m;
+
+		/* Feedback path */
+		switch ((pllc >> 24) & 7) {
+		case 0:
+			/* PLLOUTx */
+			m = fbdv;
+			break;
+		case 1:
+			/* CPU */
+			m = fbdv * fwdva * cpudv0;
+			break;
+		case 5:
+			/* PERClk */
+			m = fbdv * fwdva * plb2xdv0 * plbdv0 * opbdv0 * perdv0;
+			break;
+		default:
+			printf("WARNING ! Invalid PLL feedback source !\n");
+			goto bypass;
+		}
+
+		vco = (unsigned int)(sys_clk * m);
+	} else {
+bypass:
+		/* Bypass system PLL */
+		vco = 0;
+	}
+
+	/* CPU = VCO / ( FWDVA x CPUDV0) */
+	cpu = vco / (fwdva * cpudv0);
+	/* PLB = VCO / ( FWDVA x PLB2XDV0 x PLBDV0) */
+	plb = vco / (fwdva * plb2xdv0 * plbdv0);
+	/* OPB = PLB / OPBDV0 */
+	opb = plb / opbdv0;
+	/* EBC = OPB / PERDV0 */
+	ebc = opb / perdv0;
+
+	tb = cpu;
+	uart0 = uart1 = uart_clk;
+
+	dt_fixup_cpu_clocks(cpu, tb, 0);
+	dt_fixup_clock("/plb", plb);
+	dt_fixup_clock("/plb/opb", opb);
+	dt_fixup_clock("/plb/opb/ebc", ebc);
+	dt_fixup_clock("/plb/opb/serial@ef600200", uart0);
+	dt_fixup_clock("/plb/opb/serial@ef600300", uart1);
+}
diff --git a/arch/powerpc/boot/4xx.h b/arch/powerpc/boot/4xx.h
new file mode 100644
index 0000000000..77f15d124c
--- /dev/null
+++ b/arch/powerpc/boot/4xx.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PowerPC 4xx related functions
+ *
+ * Copyright 2007 IBM Corporation.
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ */
+#ifndef _POWERPC_BOOT_4XX_H_
+#define _POWERPC_BOOT_4XX_H_
+
+void ibm4xx_sdram_fixup_memsize(void);
+void ibm440spe_fixup_memsize(void);
+void ibm4xx_denali_fixup_memsize(void);
+void ibm44x_dbcr_reset(void);
+void ibm40x_dbcr_reset(void);
+void ibm4xx_quiesce_eth(u32 *emac0, u32 *emac1);
+void ibm4xx_fixup_ebc_ranges(const char *ebc);
+
+void ibm405gp_fixup_clocks(unsigned int sys_clk, unsigned int ser_clk);
+void ibm405ep_fixup_clocks(unsigned int sys_clk);
+void ibm405ex_fixup_clocks(unsigned int sys_clk, unsigned int uart_clk);
+void ibm440gp_fixup_clocks(unsigned int sys_clk, unsigned int ser_clk);
+void ibm440ep_fixup_clocks(unsigned int sys_clk, unsigned int ser_clk,
+			   unsigned int tmr_clk);
+void ibm440gx_fixup_clocks(unsigned int sys_clk, unsigned int ser_clk,
+			   unsigned int tmr_clk);
+void ibm440spe_fixup_clocks(unsigned int sys_clk, unsigned int ser_clk,
+			    unsigned int tmr_clk);
+
+#endif /* _POWERPC_BOOT_4XX_H_ */
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
new file mode 100644
index 0000000000..968aee2025
--- /dev/null
+++ b/arch/powerpc/boot/Makefile
@@ -0,0 +1,512 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for making ELF bootable images for booting on CHRP
+# using Open Firmware.
+#
+# Geert Uytterhoeven	September 1997
+#
+# Based on coffboot by Paul Mackerras
+# Simplified for ppc64 by Todd Inglett
+#
+# NOTE:	this code may be built for 32 bit in ELF32 format even though
+#	it packages a 64 bit kernel.  We do this to simplify the
+#	bootloader and increase compatibility with OpenFirmware.
+#
+#	To this end we need to define BOOTCC, etc, as the tools
+#	needed to build the 32 bit image.  That's normally the same
+#	compiler for the rest of the kernel, with the -m32 flag added.
+#	To make it easier to setup a cross compiler,
+#	CROSS32_COMPILE is setup as a prefix just like CROSS_COMPILE
+#	in the toplevel makefile.
+
+all: $(obj)/zImage
+
+ifdef CROSS32_COMPILE
+ifdef CONFIG_CC_IS_CLANG
+    BOOTCC := $(CROSS32_COMPILE)clang
+else
+    BOOTCC := $(CROSS32_COMPILE)gcc
+endif
+    BOOTAR := $(CROSS32_COMPILE)ar
+else
+    BOOTCC := $(CC)
+    BOOTAR := $(AR)
+endif
+
+ifdef CONFIG_PPC64_BOOT_WRAPPER
+BOOTTARGETFLAGS	+= -m64
+BOOTTARGETFLAGS	+= -mabi=elfv2
+ifdef CONFIG_PPC64_ELF_ABI_V2
+BOOTTARGETFLAGS	+= $(call cc-option,-mabi=elfv2)
+endif
+else
+BOOTTARGETFLAGS	:= -m32
+endif
+
+ifdef CONFIG_TARGET_CPU_BOOL
+BOOTTARGETFLAGS	+= -mcpu=$(CONFIG_TARGET_CPU)
+else ifdef CONFIG_PPC64_BOOT_WRAPPER
+ifdef CONFIG_CPU_LITTLE_ENDIAN
+BOOTTARGETFLAGS	+= -mcpu=powerpc64le
+else
+BOOTTARGETFLAGS	+= -mcpu=powerpc64
+endif
+endif
+
+$(obj)/4xx.o: BOOTTARGETFLAGS += -mcpu=405
+$(obj)/ebony.o: BOOTTARGETFLAGS += -mcpu=440
+$(obj)/cuboot-hotfoot.o: BOOTTARGETFLAGS += -mcpu=405
+$(obj)/cuboot-taishan.o: BOOTTARGETFLAGS += -mcpu=440
+$(obj)/cuboot-katmai.o: BOOTTARGETFLAGS += -mcpu=440
+$(obj)/cuboot-acadia.o: BOOTTARGETFLAGS += -mcpu=405
+$(obj)/treeboot-iss4xx.o: BOOTTARGETFLAGS += -mcpu=405
+$(obj)/treeboot-currituck.o: BOOTTARGETFLAGS += -mcpu=405
+$(obj)/treeboot-akebono.o: BOOTTARGETFLAGS += -mcpu=405
+
+ifdef CONFIG_CPU_BIG_ENDIAN
+BOOTTARGETFLAGS	+= -mbig-endian
+else
+BOOTTARGETFLAGS	+= -mlittle-endian
+endif
+
+BOOTCPPFLAGS	:= -nostdinc $(LINUXINCLUDE)
+BOOTCPPFLAGS	+= -isystem $(shell $(BOOTCC) -print-file-name=include)
+
+BOOTCFLAGS	:= $(BOOTTARGETFLAGS) \
+		   -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
+		   -fno-strict-aliasing -O2 \
+		   -msoft-float -mno-altivec -mno-vsx \
+		   $(call cc-option,-mno-prefixed) \
+		   $(call cc-option,-mno-pcrel) \
+		   $(call cc-option,-mno-mma) \
+		   $(call cc-option,-mno-spe) $(call cc-option,-mspe=no) \
+		   -fomit-frame-pointer -fno-builtin -fPIC
+
+BOOTAFLAGS	:= $(BOOTTARGETFLAGS) -D__ASSEMBLY__
+
+BOOTARFLAGS	:= -crD
+
+ifdef CONFIG_CC_IS_CLANG
+BOOTCFLAGS += $(CLANG_FLAGS)
+BOOTAFLAGS += $(CLANG_FLAGS)
+endif
+
+ifdef CONFIG_DEBUG_INFO
+BOOTCFLAGS	+= -g
+endif
+
+ifeq ($(call cc-option-yn, -fstack-protector),y)
+BOOTCFLAGS	+= -fno-stack-protector
+endif
+
+BOOTCFLAGS	+= -include $(srctree)/include/linux/compiler_attributes.h
+BOOTCFLAGS	+= -I$(objtree)/$(obj) -I$(srctree)/$(obj)
+
+DTC_FLAGS	?= -p 1024
+
+# The pre-boot decompressors pull in a lot of kernel headers and other source
+# files. This creates a bit of a dependency headache since we need to copy
+# these files into the build dir, fix up any includes and ensure that dependent
+# files are copied in the right order.
+
+# these need to be seperate variables because they are copied out of different
+# directories in the kernel tree. Sure you COULd merge them, but it's a
+# cure-is-worse-than-disease situation.
+zlib-decomp-$(CONFIG_KERNEL_GZIP) := decompress_inflate.c
+zlib-$(CONFIG_KERNEL_GZIP) := inffast.c inflate.c inftrees.c
+zlibheader-$(CONFIG_KERNEL_GZIP) := inffast.h inffixed.h inflate.h inftrees.h infutil.h
+zliblinuxheader-$(CONFIG_KERNEL_GZIP) := zlib.h zconf.h zutil.h
+
+$(addprefix $(obj)/, decompress.o): \
+	$(addprefix $(obj)/,$(zlib-decomp-y))
+
+$(addprefix $(obj)/, $(zlib-decomp-y)): \
+	$(addprefix $(obj)/,$(zliblinuxheader-y)) \
+	$(addprefix $(obj)/,$(zlibheader-y)) \
+	$(addprefix $(obj)/,$(zlib-y))
+
+$(addprefix $(obj)/,$(zlib-y)): \
+	$(addprefix $(obj)/,$(zliblinuxheader-y)) \
+	$(addprefix $(obj)/,$(zlibheader-y))
+
+libfdt       := fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c
+libfdtheader := fdt.h libfdt.h libfdt_internal.h
+
+$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o \
+	treeboot-akebono.o treeboot-currituck.o treeboot-iss4xx.o): \
+	$(addprefix $(obj)/,$(libfdtheader))
+
+src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \
+		$(libfdt) libfdt-wrapper.c \
+		ns16550.c serial.c simple_alloc.c div64.S util.S \
+		elf_util.c $(zlib-y) devtree.c stdlib.c \
+		oflib.c ofconsole.c cuboot.c
+
+src-wlib-$(CONFIG_PPC_MPC52xx) += mpc52xx-psc.c
+src-wlib-$(CONFIG_PPC_POWERNV) += opal-calls.S opal.c
+ifndef CONFIG_PPC64_BOOT_WRAPPER
+src-wlib-y += crtsavres.S
+endif
+src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c
+src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c
+src-wlib-$(CONFIG_PPC_8xx) += mpc8xx.c planetcore.c fsl-soc.c
+src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c
+src-wlib-$(CONFIG_EMBEDDED6xx) += ugecon.c fsl-soc.c
+src-wlib-$(CONFIG_CPM) += cpm-serial.c
+
+src-plat-y := of.c epapr.c
+src-plat-$(CONFIG_40x) += fixed-head.S cuboot-hotfoot.c \
+				cuboot-acadia.c \
+				cuboot-kilauea.c simpleboot.c
+src-plat-$(CONFIG_44x) += treeboot-ebony.c cuboot-ebony.c treeboot-bamboo.c \
+				cuboot-bamboo.c cuboot-sam440ep.c \
+				cuboot-sequoia.c cuboot-rainier.c \
+				cuboot-taishan.c cuboot-katmai.c \
+				cuboot-warp.c cuboot-yosemite.c \
+				treeboot-iss4xx.c treeboot-currituck.c \
+				treeboot-akebono.c \
+				simpleboot.c fixed-head.S
+src-plat-$(CONFIG_PPC_8xx) += cuboot-8xx.c fixed-head.S ep88xc.c redboot-8xx.c
+src-plat-$(CONFIG_PPC_MPC52xx) += cuboot-52xx.c
+src-plat-$(CONFIG_PPC_82xx) += cuboot-pq2.c fixed-head.S ep8248e.c cuboot-824x.c
+src-plat-$(CONFIG_PPC_83xx) += cuboot-83xx.c fixed-head.S redboot-83xx.c
+src-plat-$(CONFIG_FSL_SOC_BOOKE) += cuboot-85xx.c cuboot-85xx-cpm2.c
+src-plat-$(CONFIG_EMBEDDED6xx) += cuboot-pq2.c \
+					gamecube-head.S gamecube.c \
+					wii-head.S wii.c holly.c \
+					fixed-head.S mvme5100.c
+src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c
+src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c
+src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c
+src-plat-$(CONFIG_PPC_PSERIES) += pseries-head.S
+src-plat-$(CONFIG_PPC_POWERNV) += pseries-head.S
+src-plat-$(CONFIG_PPC_IBM_CELL_BLADE) += pseries-head.S
+src-plat-$(CONFIG_MVME7100) += motload-head.S mvme7100.c
+
+src-plat-$(CONFIG_PPC_MICROWATT) += fixed-head.S microwatt.c
+
+src-wlib := $(sort $(src-wlib-y))
+src-plat := $(sort $(src-plat-y))
+src-boot := $(src-wlib) $(src-plat) empty.c
+
+src-boot := $(addprefix $(obj)/, $(src-boot))
+obj-boot := $(addsuffix .o, $(basename $(src-boot)))
+obj-wlib := $(addsuffix .o, $(basename $(addprefix $(obj)/, $(src-wlib))))
+obj-plat := $(addsuffix .o, $(basename $(addprefix $(obj)/, $(src-plat))))
+obj-plat: $(libfdt)
+
+quiet_cmd_copy_kern_src = COPY    $@
+      cmd_copy_kern_src = sed -f $(srctree)/arch/powerpc/boot/fixup-headers.sed $< > $@
+
+$(addprefix $(obj)/,$(zlib-y)): $(obj)/%: $(srctree)/lib/zlib_inflate/%
+	$(call cmd,copy_kern_src)
+
+$(addprefix $(obj)/,$(zlibheader-y)): $(obj)/%: $(srctree)/lib/zlib_inflate/%
+	$(call cmd,copy_kern_src)
+
+$(addprefix $(obj)/,$(zliblinuxheader-y)): $(obj)/%: $(srctree)/include/linux/%
+	$(call cmd,copy_kern_src)
+
+$(addprefix $(obj)/,$(zlib-decomp-y)): $(obj)/%: $(srctree)/lib/%
+	$(call cmd,copy_kern_src)
+
+quiet_cmd_copy_libfdt = COPY    $@
+      cmd_copy_libfdt = cp $< $@
+
+$(addprefix $(obj)/,$(libfdt) $(libfdtheader)): $(obj)/%: $(srctree)/scripts/dtc/libfdt/%
+	$(call cmd,copy_libfdt)
+
+$(obj)/empty.c:
+	$(Q)touch $@
+
+$(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S
+	$(Q)cp $< $@
+
+clean-files := $(zlib-) $(zlibheader-) $(zliblinuxheader-) \
+		$(zlib-decomp-) $(libfdt) $(libfdtheader) \
+		empty.c zImage.coff.lds zImage.ps3.lds zImage.lds
+
+quiet_cmd_bootcc = BOOTCC  $@
+      cmd_bootcc = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTCPPFLAGS) $(BOOTCFLAGS) -c -o $@ $<
+
+quiet_cmd_bootas = BOOTAS  $@
+      cmd_bootas = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTCPPFLAGS) $(BOOTAFLAGS) -c -o $@ $<
+
+quiet_cmd_bootar = BOOTAR  $@
+      cmd_bootar = $(BOOTAR) $(BOOTARFLAGS) $@.$$$$ $(real-prereqs); mv $@.$$$$ $@
+
+$(obj-libfdt): $(obj)/%.o: $(srctree)/scripts/dtc/libfdt/%.c FORCE
+	$(call if_changed_dep,bootcc)
+$(patsubst %.c,%.o, $(filter %.c, $(src-boot))): %.o: %.c FORCE
+	$(Q)mkdir -p $(dir $@)
+	$(call if_changed_dep,bootcc)
+$(patsubst %.S,%.o, $(filter %.S, $(src-boot))): %.o: %.S FORCE
+	$(Q)mkdir -p $(dir $@)
+	$(call if_changed_dep,bootas)
+
+$(obj)/wrapper.a: $(obj-wlib) FORCE
+	$(call if_changed,bootar)
+
+hostprogs	:= addnote hack-coff mktree
+
+targets		+= $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a) zImage.lds
+extra-y		:= $(obj)/wrapper.a $(obj-plat) $(obj)/empty.o \
+		   $(obj)/zImage.lds $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds
+
+dtstree		:= $(srctree)/$(src)/dts
+
+wrapper		:=$(srctree)/$(src)/wrapper
+wrapperbits	:= $(extra-y) $(addprefix $(obj)/,addnote hack-coff mktree) \
+			$(wrapper) FORCE
+
+#############
+# Bits for building various flavours of zImage
+
+ifneq ($(CROSS32_COMPILE),)
+CROSSWRAP := -C "$(CROSS32_COMPILE)"
+else
+ifneq ($(CROSS_COMPILE),)
+CROSSWRAP := -C "$(CROSS_COMPILE)"
+endif
+endif
+
+compressor-$(CONFIG_KERNEL_GZIP) := gz
+compressor-$(CONFIG_KERNEL_XZ)   := xz
+compressor-$(CONFIG_KERNEL_LZMA)   := lzma
+compressor-$(CONFIG_KERNEL_LZO) := lzo
+
+# args (to if_changed): 1 = (this rule), 2 = platform, 3 = dts 4=dtb 5=initrd
+quiet_cmd_wrap	= WRAP    $@
+      cmd_wrap	=$(CONFIG_SHELL) $(wrapper) -Z $(compressor-y) -c -o $@ -p $2 \
+		$(CROSSWRAP) $(if $3, -s $3)$(if $4, -d $4)$(if $5, -i $5) \
+		vmlinux
+
+image-$(CONFIG_PPC_PSERIES)		+= zImage.pseries
+image-$(CONFIG_PPC_POWERNV)		+= zImage.pseries
+image-$(CONFIG_PPC_MAPLE)		+= zImage.maple
+image-$(CONFIG_PPC_IBM_CELL_BLADE)	+= zImage.pseries
+image-$(CONFIG_PPC_PS3)			+= dtbImage.ps3
+image-$(CONFIG_PPC_CHRP)		+= zImage.chrp
+image-$(CONFIG_PPC_EFIKA)		+= zImage.chrp
+image-$(CONFIG_PPC_PMAC)		+= zImage.pmac
+image-$(CONFIG_PPC_HOLLY)		+= dtbImage.holly
+image-$(CONFIG_DEFAULT_UIMAGE)		+= uImage
+image-$(CONFIG_EPAPR_BOOT)		+= zImage.epapr
+
+#
+# Targets which embed a device tree blob
+#
+# Theses are default targets to build images which embed device tree blobs.
+# They are only required on boards which do not have FDT support in firmware.
+# Boards with newish u-boot firmware can use the uImage target above
+#
+
+# Board ports in arch/powerpc/platform/40x/Kconfig
+image-$(CONFIG_HOTFOOT)			+= cuImage.hotfoot
+image-$(CONFIG_ACADIA)			+= cuImage.acadia
+image-$(CONFIG_OBS600)			+= uImage.obs600
+
+# Board ports in arch/powerpc/platform/44x/Kconfig
+image-$(CONFIG_EBONY)			+= treeImage.ebony cuImage.ebony
+image-$(CONFIG_BAMBOO)			+= treeImage.bamboo cuImage.bamboo
+image-$(CONFIG_SAM440EP)		+= cuImage.sam440ep
+image-$(CONFIG_SEQUOIA)			+= cuImage.sequoia
+image-$(CONFIG_RAINIER)			+= cuImage.rainier
+image-$(CONFIG_TAISHAN)			+= cuImage.taishan
+image-$(CONFIG_KATMAI)			+= cuImage.katmai
+image-$(CONFIG_WARP)			+= cuImage.warp
+image-$(CONFIG_YOSEMITE)		+= cuImage.yosemite
+image-$(CONFIG_ISS4xx)			+= treeImage.iss4xx \
+					   treeImage.iss4xx-mpic
+image-$(CONFIG_CURRITUCK)			+= treeImage.currituck
+image-$(CONFIG_AKEBONO)			+= treeImage.akebono
+
+# Board ports in arch/powerpc/platform/8xx/Kconfig
+image-$(CONFIG_MPC86XADS)		+= cuImage.mpc866ads
+image-$(CONFIG_MPC885ADS)		+= cuImage.mpc885ads
+image-$(CONFIG_PPC_EP88XC)		+= dtbImage.ep88xc
+image-$(CONFIG_PPC_ADDER875)		+= cuImage.adder875-uboot \
+					   dtbImage.adder875-redboot
+
+# Board ports in arch/powerpc/platform/52xx/Kconfig
+image-$(CONFIG_PPC_LITE5200)		+= cuImage.lite5200
+image-$(CONFIG_PPC_LITE5200)		+= cuImage.lite5200b
+image-$(CONFIG_PPC_MEDIA5200)		+= cuImage.media5200
+
+# Board ports in arch/powerpc/platform/82xx/Kconfig
+image-$(CONFIG_EP8248E)			+= dtbImage.ep8248e
+
+# Board ports in arch/powerpc/platform/83xx/Kconfig
+image-$(CONFIG_MPC832x_RDB)		+= cuImage.mpc832x_rdb
+image-$(CONFIG_MPC834x_ITX)		+= cuImage.mpc8349emitx \
+					   cuImage.mpc8349emitxgp
+image-$(CONFIG_ASP834x)			+= dtbImage.asp834x-redboot
+
+# Board ports in arch/powerpc/platform/85xx/Kconfig
+image-$(CONFIG_MPC85xx_MDS)		+= cuImage.mpc8568mds
+image-$(CONFIG_MPC85xx_DS)		+= cuImage.mpc8544ds \
+					   cuImage.mpc8572ds
+image-$(CONFIG_TQM8540)			+= cuImage.tqm8540
+image-$(CONFIG_TQM8541)			+= cuImage.tqm8541
+image-$(CONFIG_TQM8548)			+= cuImage.tqm8548
+image-$(CONFIG_TQM8555)			+= cuImage.tqm8555
+image-$(CONFIG_TQM8560)			+= cuImage.tqm8560
+image-$(CONFIG_KSI8560)			+= cuImage.ksi8560
+
+# Board ports in arch/powerpc/platform/86xx/Kconfig
+image-$(CONFIG_MVME7100)                += dtbImage.mvme7100
+
+# Board ports in arch/powerpc/platform/embedded6xx/Kconfig
+image-$(CONFIG_STORCENTER)		+= cuImage.storcenter
+image-$(CONFIG_GAMECUBE)		+= dtbImage.gamecube
+image-$(CONFIG_WII)			+= dtbImage.wii
+image-$(CONFIG_MVME5100)		+= dtbImage.mvme5100
+
+# Board port in arch/powerpc/platform/amigaone/Kconfig
+image-$(CONFIG_AMIGAONE)		+= cuImage.amigaone
+
+image-$(CONFIG_PPC_MICROWATT)		+= dtbImage.microwatt
+
+# For 32-bit powermacs, build the COFF and miboot images
+# as well as the ELF images.
+ifdef CONFIG_PPC32
+image-$(CONFIG_PPC_PMAC)	+= zImage.coff zImage.miboot
+endif
+
+# Allow extra targets to be added to the defconfig
+image-y	+= $(CONFIG_EXTRA_TARGETS)
+
+initrd-  := $(patsubst zImage%, zImage.initrd%, $(image-))
+initrd-y := $(patsubst zImage%, zImage.initrd%, \
+		$(patsubst dtbImage%, dtbImage.initrd%, \
+		$(patsubst simpleImage%, simpleImage.initrd%, \
+		$(patsubst treeImage%, treeImage.initrd%, $(image-y)))))
+initrd-y := $(filter-out $(image-y), $(initrd-y))
+targets	+= $(image-y) $(initrd-y)
+targets += $(foreach x, dtbImage uImage cuImage simpleImage treeImage, \
+		$(patsubst $(x).%, dts/%.dtb, $(filter $(x).%, $(image-y))))
+targets += $(foreach x, dtbImage uImage cuImage simpleImage treeImage, \
+		$(patsubst $(x).%, dts/fsl/%.dtb, $(filter $(x).%, $(image-y))))
+
+$(addprefix $(obj)/, $(initrd-y)): $(obj)/ramdisk.image.gz
+
+# Don't put the ramdisk on the pattern rule; when its missing make will try
+# the pattern rule with less dependencies that also matches (even with the
+# hard dependency listed).
+$(obj)/zImage.initrd.%: vmlinux $(wrapperbits) FORCE
+	$(call if_changed,wrap,$*,,,$(obj)/ramdisk.image.gz)
+
+$(addprefix $(obj)/, $(sort $(filter zImage.%, $(image-y)))): vmlinux $(wrapperbits) FORCE
+	$(call if_changed,wrap,$(subst $(obj)/zImage.,,$@))
+
+# dtbImage% - a dtbImage is a zImage with an embedded device tree blob
+$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/dts/%.dtb FORCE
+	$(call if_changed,wrap,$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
+
+$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/dts/%.dtb FORCE
+	$(call if_changed,wrap,$*,,$(obj)/dts/$*.dtb)
+
+# This cannot be in the root of $(src) as the zImage rule always adds a $(obj)
+# prefix
+$(obj)/vmlinux.strip: vmlinux
+	$(STRIP) -s -R .comment $< -o $@
+
+$(obj)/uImage: vmlinux $(wrapperbits) FORCE
+	$(call if_changed,wrap,uboot)
+
+$(obj)/uImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+	$(call if_changed,wrap,uboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
+
+$(obj)/uImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+	$(call if_changed,wrap,uboot-$*,,$(obj)/dts/$*.dtb)
+
+$(obj)/cuImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+	$(call if_changed,wrap,cuboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
+
+$(obj)/cuImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+	$(call if_changed,wrap,cuboot-$*,,$(obj)/dts/$*.dtb)
+
+$(obj)/simpleImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+	$(call if_changed,wrap,simpleboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
+
+$(obj)/simpleImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+	$(call if_changed,wrap,simpleboot-$*,,$(obj)/dts/$*.dtb)
+
+$(obj)/treeImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+	$(call if_changed,wrap,treeboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
+
+$(obj)/treeImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+	$(call if_changed,wrap,treeboot-$*,,$(obj)/dts/$*.dtb)
+
+# Needed for the above targets to work with dts/fsl/ files
+$(obj)/dts/%.dtb: $(obj)/dts/fsl/%.dtb
+	@cp $< $@
+
+# If there isn't a platform selected then just strip the vmlinux.
+ifeq (,$(image-y))
+image-y := vmlinux.strip
+endif
+
+$(obj)/zImage:		$(addprefix $(obj)/, $(image-y))
+	$(Q)rm -f $@; ln $< $@
+$(obj)/zImage.initrd:	$(addprefix $(obj)/, $(initrd-y))
+	$(Q)rm -f $@; ln $< $@
+
+# anything not in $(targets)
+clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \
+	zImage zImage.initrd zImage.chrp zImage.coff zImage.holly \
+	zImage.miboot zImage.pmac zImage.pseries \
+	zImage.maple simpleImage.* otheros.bld
+
+# clean up files cached by wrapper
+clean-kernel-base := vmlinux.strip vmlinux.bin
+clean-kernel := $(addsuffix .gz,$(clean-kernel-base))
+clean-kernel += $(addsuffix .xz,$(clean-kernel-base))
+# clean-files are relative to $(obj).
+clean-files += $(addprefix ../../../, $(clean-kernel))
+
+WRAPPER_OBJDIR := /usr/lib/kernel-wrapper
+WRAPPER_DTSDIR := /usr/lib/kernel-wrapper/dts
+WRAPPER_BINDIR := /usr/sbin
+INSTALL := install
+
+extra-installed		:= $(patsubst $(obj)/%, $(DESTDIR)$(WRAPPER_OBJDIR)/%, $(extra-y))
+hostprogs-installed	:= $(patsubst %, $(DESTDIR)$(WRAPPER_BINDIR)/%, $(hostprogs))
+wrapper-installed	:= $(DESTDIR)$(WRAPPER_BINDIR)/wrapper
+dts-installed		:= $(patsubst $(dtstree)/%, $(DESTDIR)$(WRAPPER_DTSDIR)/%, $(wildcard $(dtstree)/*.dts))
+
+all-installed		:= $(extra-installed) $(hostprogs-installed) $(wrapper-installed) $(dts-installed)
+
+quiet_cmd_mkdir           = MKDIR   $(patsubst $(INSTALL_HDR_PATH)/%,%,$@)
+      cmd_mkdir           = mkdir -p $@
+
+quiet_cmd_install	  = INSTALL $(patsubst $(DESTDIR)$(WRAPPER_OBJDIR)/%,%,$@)
+      cmd_install	  = $(INSTALL)  -m0644 $(patsubst $(DESTDIR)$(WRAPPER_OBJDIR)/%,$(obj)/%,$@) $@
+
+quiet_cmd_install_dts	  = INSTALL $(patsubst $(DESTDIR)$(WRAPPER_DTSDIR)/%,dts/%,$@)
+      cmd_install_dts	  = $(INSTALL)  -m0644 $(patsubst $(DESTDIR)$(WRAPPER_DTSDIR)/%,$(srctree)/$(obj)/dts/%,$@) $@
+
+quiet_cmd_install_exe	  = INSTALL $(patsubst $(DESTDIR)$(WRAPPER_BINDIR)/%,%,$@)
+      cmd_install_exe	  = $(INSTALL)  -m0755 $(patsubst $(DESTDIR)$(WRAPPER_BINDIR)/%,$(obj)/%,$@) $@
+
+quiet_cmd_install_wrapper = INSTALL $(patsubst $(DESTDIR)$(WRAPPER_BINDIR)/%,%,$@)
+      cmd_install_wrapper = $(INSTALL)  -m0755 $(patsubst $(DESTDIR)$(WRAPPER_BINDIR)/%,$(srctree)/$(obj)/%,$@) $@ ;\
+				sed -i $@ -e 's%^object=.*%object=$(WRAPPER_OBJDIR)%' \
+					  -e 's%^objbin=.*%objbin=$(WRAPPER_BINDIR)%' \
+
+
+$(DESTDIR)$(WRAPPER_OBJDIR) $(DESTDIR)$(WRAPPER_DTSDIR) $(DESTDIR)$(WRAPPER_BINDIR):
+	$(call cmd,mkdir)
+
+$(extra-installed)	: $(DESTDIR)$(WRAPPER_OBJDIR)/% : $(obj)/% | $(DESTDIR)$(WRAPPER_OBJDIR)
+	$(call cmd,install)
+
+$(hostprogs-installed)  : $(DESTDIR)$(WRAPPER_BINDIR)/% : $(obj)/% | $(DESTDIR)$(WRAPPER_BINDIR)
+	$(call cmd,install_exe)
+
+$(dts-installed)	: $(DESTDIR)$(WRAPPER_DTSDIR)/% : $(srctree)/$(obj)/dts/% | $(DESTDIR)$(WRAPPER_DTSDIR)
+	$(call cmd,install_dts)
+
+$(wrapper-installed): $(DESTDIR)$(WRAPPER_BINDIR) $(srctree)/$(obj)/wrapper | $(DESTDIR)$(WRAPPER_BINDIR)
+	$(call cmd,install_wrapper)
+
+$(obj)/bootwrapper_install: $(all-installed)
diff --git a/arch/powerpc/boot/README b/arch/powerpc/boot/README
new file mode 100644
index 0000000000..3e11058760
--- /dev/null
+++ b/arch/powerpc/boot/README
@@ -0,0 +1,11 @@
+
+To extract the kernel vmlinux, System.map, .config or initrd from the zImage binary:
+
+objcopy -j .kernel:vmlinux -O binary zImage vmlinux.gz
+objcopy -j .kernel:System.map -O binary zImage System.map.gz
+objcopy -j .kernel:.config -O binary zImage config.gz
+objcopy -j .kernel:initrd -O binary zImage.initrd initrd.gz
+
+
+	Peter
+
diff --git a/arch/powerpc/boot/addnote.c b/arch/powerpc/boot/addnote.c
new file mode 100644
index 0000000000..53b3b26214
--- /dev/null
+++ b/arch/powerpc/boot/addnote.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Program to hack in a PT_NOTE program header entry in an ELF file.
+ * This is needed for OF on RS/6000s to load an image correctly.
+ * Note that OF needs a program header entry for the note, not an
+ * ELF section.
+ *
+ * Copyright 2000 Paul Mackerras.
+ *
+ * Adapted for 64 bit little endian images by Andrew Tauferner.
+ *
+ * Usage: addnote zImage
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+
+/* CHRP note section */
+static const char arch[] = "PowerPC";
+
+#define N_DESCR	6
+unsigned int descr[N_DESCR] = {
+	0xffffffff,		/* real-mode = true */
+	0x02000000,		/* real-base, i.e. where we expect OF to be */
+	0xffffffff,		/* real-size */
+	0xffffffff,		/* virt-base */
+	0xffffffff,		/* virt-size */
+	0x4000,			/* load-base */
+};
+
+/* RPA note section */
+static const char rpaname[] = "IBM,RPA-Client-Config";
+
+/*
+ * Note: setting ignore_my_client_config *should* mean that OF ignores
+ * all the other fields, but there is a firmware bug which means that
+ * it looks at the splpar field at least.  So these values need to be
+ * reasonable.
+ */
+#define N_RPA_DESCR	8
+unsigned int rpanote[N_RPA_DESCR] = {
+	0,			/* lparaffinity */
+	64,			/* min_rmo_size */
+	0,			/* min_rmo_percent */
+	40,			/* max_pft_size */
+	1,			/* splpar */
+	-1,			/* min_load */
+	0,			/* new_mem_def */
+	1,			/* ignore_my_client_config */
+};
+
+#define ROUNDUP(len)	(((len) + 3) & ~3)
+
+unsigned char buf[1024];
+#define ELFDATA2LSB     1
+#define ELFDATA2MSB     2
+static int e_data = ELFDATA2MSB;
+#define ELFCLASS32      1
+#define ELFCLASS64      2
+static int e_class = ELFCLASS32;
+
+#define GET_16BE(off)	((buf[off] << 8) + (buf[(off)+1]))
+#define GET_32BE(off)	((GET_16BE(off) << 16U) + GET_16BE((off)+2U))
+#define GET_64BE(off)	((((unsigned long long)GET_32BE(off)) << 32ULL) + \
+			((unsigned long long)GET_32BE((off)+4ULL)))
+#define PUT_16BE(off, v)(buf[off] = ((v) >> 8) & 0xff, \
+			 buf[(off) + 1] = (v) & 0xff)
+#define PUT_32BE(off, v)(PUT_16BE((off), (v) >> 16L), PUT_16BE((off) + 2, (v)))
+#define PUT_64BE(off, v)((PUT_32BE((off), (v) >> 32L), \
+			  PUT_32BE((off) + 4, (v))))
+
+#define GET_16LE(off)	((buf[off]) + (buf[(off)+1] << 8))
+#define GET_32LE(off)	(GET_16LE(off) + (GET_16LE((off)+2U) << 16U))
+#define GET_64LE(off)	((unsigned long long)GET_32LE(off) + \
+			(((unsigned long long)GET_32LE((off)+4ULL)) << 32ULL))
+#define PUT_16LE(off, v) (buf[off] = (v) & 0xff, \
+			  buf[(off) + 1] = ((v) >> 8) & 0xff)
+#define PUT_32LE(off, v) (PUT_16LE((off), (v)), PUT_16LE((off) + 2, (v) >> 16L))
+#define PUT_64LE(off, v) (PUT_32LE((off), (v)), PUT_32LE((off) + 4, (v) >> 32L))
+
+#define GET_16(off)	(e_data == ELFDATA2MSB ? GET_16BE(off) : GET_16LE(off))
+#define GET_32(off)	(e_data == ELFDATA2MSB ? GET_32BE(off) : GET_32LE(off))
+#define GET_64(off)	(e_data == ELFDATA2MSB ? GET_64BE(off) : GET_64LE(off))
+#define PUT_16(off, v)	(e_data == ELFDATA2MSB ? PUT_16BE(off, v) : \
+			 PUT_16LE(off, v))
+#define PUT_32(off, v)  (e_data == ELFDATA2MSB ? PUT_32BE(off, v) : \
+			 PUT_32LE(off, v))
+#define PUT_64(off, v)  (e_data == ELFDATA2MSB ? PUT_64BE(off, v) : \
+			 PUT_64LE(off, v))
+
+/* Structure of an ELF file */
+#define E_IDENT		0	/* ELF header */
+#define	E_PHOFF		(e_class == ELFCLASS32 ? 28 : 32)
+#define E_PHENTSIZE	(e_class == ELFCLASS32 ? 42 : 54)
+#define E_PHNUM		(e_class == ELFCLASS32 ? 44 : 56)
+#define E_HSIZE		(e_class == ELFCLASS32 ? 52 : 64)
+
+#define EI_MAGIC	0	/* offsets in E_IDENT area */
+#define EI_CLASS	4
+#define EI_DATA		5
+
+#define PH_TYPE		0	/* ELF program header */
+#define PH_OFFSET	(e_class == ELFCLASS32 ? 4 : 8)
+#define PH_FILESZ	(e_class == ELFCLASS32 ? 16 : 32)
+#define PH_HSIZE	(e_class == ELFCLASS32 ? 32 : 56)
+
+#define PT_NOTE		4	/* Program header type = note */
+
+
+unsigned char elf_magic[4] = { 0x7f, 'E', 'L', 'F' };
+
+int
+main(int ac, char **av)
+{
+	int fd, n, i;
+	unsigned long ph, ps, np;
+	long nnote, nnote2, ns;
+
+	if (ac != 2) {
+		fprintf(stderr, "Usage: %s elf-file\n", av[0]);
+		exit(1);
+	}
+	fd = open(av[1], O_RDWR);
+	if (fd < 0) {
+		perror(av[1]);
+		exit(1);
+	}
+
+	nnote = 12 + ROUNDUP(strlen(arch) + 1) + sizeof(descr);
+	nnote2 = 12 + ROUNDUP(strlen(rpaname) + 1) + sizeof(rpanote);
+
+	n = read(fd, buf, sizeof(buf));
+	if (n < 0) {
+		perror("read");
+		exit(1);
+	}
+
+	if (memcmp(&buf[E_IDENT+EI_MAGIC], elf_magic, 4) != 0)
+		goto notelf;
+	e_class = buf[E_IDENT+EI_CLASS];
+	if (e_class != ELFCLASS32 && e_class != ELFCLASS64)
+		goto notelf;
+	e_data = buf[E_IDENT+EI_DATA];
+	if (e_data != ELFDATA2MSB && e_data != ELFDATA2LSB)
+		goto notelf;
+	if (n < E_HSIZE)
+		goto notelf;
+
+	ph = (e_class == ELFCLASS32 ? GET_32(E_PHOFF) : GET_64(E_PHOFF));
+	ps = GET_16(E_PHENTSIZE);
+	np = GET_16(E_PHNUM);
+	if (ph < E_HSIZE || ps < PH_HSIZE || np < 1)
+		goto notelf;
+	if (ph + (np + 2) * ps + nnote + nnote2 > n)
+		goto nospace;
+
+	for (i = 0; i < np; ++i) {
+		if (GET_32(ph + PH_TYPE) == PT_NOTE) {
+			fprintf(stderr, "%s already has a note entry\n",
+				av[1]);
+			exit(0);
+		}
+		ph += ps;
+	}
+
+	/* XXX check that the area we want to use is all zeroes */
+	for (i = 0; i < 2 * ps + nnote + nnote2; ++i)
+		if (buf[ph + i] != 0)
+			goto nospace;
+
+	/* fill in the program header entry */
+	ns = ph + 2 * ps;
+	PUT_32(ph + PH_TYPE, PT_NOTE);
+	if (e_class == ELFCLASS32)
+		PUT_32(ph + PH_OFFSET, ns);
+	else
+		PUT_64(ph + PH_OFFSET, ns);
+
+	if (e_class == ELFCLASS32)
+		PUT_32(ph + PH_FILESZ, nnote);
+	else
+		PUT_64(ph + PH_FILESZ, nnote);
+
+	/* fill in the note area we point to */
+	/* XXX we should probably make this a proper section */
+	PUT_32(ns, strlen(arch) + 1);
+	PUT_32(ns + 4, N_DESCR * 4);
+	PUT_32(ns + 8, 0x1275);
+	strcpy((char *) &buf[ns + 12], arch);
+	ns += 12 + strlen(arch) + 1;
+	for (i = 0; i < N_DESCR; ++i, ns += 4)
+		PUT_32BE(ns, descr[i]);
+
+	/* fill in the second program header entry and the RPA note area */
+	ph += ps;
+	PUT_32(ph + PH_TYPE, PT_NOTE);
+	if (e_class == ELFCLASS32)
+		PUT_32(ph + PH_OFFSET, ns);
+	else
+		PUT_64(ph + PH_OFFSET, ns);
+
+	if (e_class == ELFCLASS32)
+		PUT_32(ph + PH_FILESZ, nnote);
+	else
+		PUT_64(ph + PH_FILESZ, nnote2);
+
+	/* fill in the note area we point to */
+	PUT_32(ns, strlen(rpaname) + 1);
+	PUT_32(ns + 4, sizeof(rpanote));
+	PUT_32(ns + 8, 0x12759999);
+	strcpy((char *) &buf[ns + 12], rpaname);
+	ns += 12 + ROUNDUP(strlen(rpaname) + 1);
+	for (i = 0; i < N_RPA_DESCR; ++i, ns += 4)
+		PUT_32BE(ns, rpanote[i]);
+
+	/* Update the number of program headers */
+	PUT_16(E_PHNUM, np + 2);
+
+	/* write back */
+	i = lseek(fd, (long) 0, SEEK_SET);
+	if (i < 0) {
+		perror("lseek");
+		exit(1);
+	}
+	i = write(fd, buf, n);
+	if (i < 0) {
+		perror("write");
+		exit(1);
+	}
+	if (i < n) {
+		fprintf(stderr, "%s: write truncated\n", av[1]);
+		exit(1);
+	}
+
+	exit(0);
+
+ notelf:
+	fprintf(stderr, "%s does not appear to be an ELF file\n", av[1]);
+	exit(1);
+
+ nospace:
+	fprintf(stderr, "sorry, I can't find space in %s to put the note\n",
+		av[1]);
+	exit(1);
+}
diff --git a/arch/powerpc/boot/bamboo.c b/arch/powerpc/boot/bamboo.c
new file mode 100644
index 0000000000..dcdfa586ad
--- /dev/null
+++ b/arch/powerpc/boot/bamboo.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright IBM Corporation, 2007
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ *
+ * Based on ebony wrapper:
+ * Copyright 2007 David Gibson, IBM Corporation.
+ *
+ * Clocking code based on code by:
+ * Stefan Roese <sr@denx.de>
+ */
+#include <stdarg.h>
+#include <stddef.h>
+#include "types.h"
+#include "elf.h"
+#include "string.h"
+#include "stdio.h"
+#include "page.h"
+#include "ops.h"
+#include "dcr.h"
+#include "4xx.h"
+#include "44x.h"
+
+static u8 *bamboo_mac0, *bamboo_mac1;
+
+static void bamboo_fixups(void)
+{
+	unsigned long sysclk = 33333333;
+
+	ibm440ep_fixup_clocks(sysclk, 11059200, 25000000);
+	ibm4xx_sdram_fixup_memsize();
+	ibm4xx_quiesce_eth((u32 *)0xef600e00, (u32 *)0xef600f00);
+	dt_fixup_mac_address_by_alias("ethernet0", bamboo_mac0);
+	dt_fixup_mac_address_by_alias("ethernet1", bamboo_mac1);
+}
+
+void bamboo_init(void *mac0, void *mac1)
+{
+	platform_ops.fixups = bamboo_fixups;
+	platform_ops.exit = ibm44x_dbcr_reset;
+	bamboo_mac0 = mac0;
+	bamboo_mac1 = mac1;
+	fdt_init(_dtb_start);
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/cpm-serial.c b/arch/powerpc/boot/cpm-serial.c
new file mode 100644
index 0000000000..dfb56829ca
--- /dev/null
+++ b/arch/powerpc/boot/cpm-serial.c
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CPM serial console support.
+ *
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * It is assumed that the firmware (or the platform file) has already set
+ * up the port.
+ */
+
+#include "types.h"
+#include "io.h"
+#include "ops.h"
+#include "page.h"
+
+struct cpm_scc {
+	u32 gsmrl;
+	u32 gsmrh;
+	u16 psmr;
+	u8 res1[2];
+	u16 todr;
+	u16 dsr;
+	u16 scce;
+	u8 res2[2];
+	u16 sccm;
+	u8 res3;
+	u8 sccs;
+	u8 res4[8];
+};
+
+struct cpm_smc {
+	u8 res1[2];
+	u16 smcmr;
+	u8 res2[2];
+	u8 smce;
+	u8 res3[3];
+	u8 smcm;
+	u8 res4[5];
+};
+
+struct cpm_param {
+	u16 rbase;
+	u16 tbase;
+	u8 rfcr;
+	u8 tfcr;
+	u16 mrblr;
+	u32 rstate;
+	u8 res1[4];
+	u16 rbptr;
+	u8 res2[6];
+	u32 tstate;
+	u8 res3[4];
+	u16 tbptr;
+	u8 res4[6];
+	u16 maxidl;
+	u16 idlc;
+	u16 brkln;
+	u16 brkec;
+	u16 brkcr;
+	u16 rmask;
+	u8 res5[4];
+};
+
+struct cpm_bd {
+	u16 sc;   /* Status and Control */
+	u16 len;  /* Data length in buffer */
+	u8 *addr; /* Buffer address in host memory */
+};
+
+static void *cpcr;
+static struct cpm_param *param;
+static struct cpm_smc *smc;
+static struct cpm_scc *scc;
+static struct cpm_bd *tbdf, *rbdf;
+static u32 cpm_cmd;
+static void *cbd_addr;
+static u32 cbd_offset;
+
+static void (*do_cmd)(int op);
+static void (*enable_port)(void);
+static void (*disable_port)(void);
+
+#define CPM_CMD_STOP_TX     4
+#define CPM_CMD_RESTART_TX  6
+#define CPM_CMD_INIT_RX_TX  0
+
+static void cpm1_cmd(int op)
+{
+	while (in_be16(cpcr) & 1)
+		;
+
+	out_be16(cpcr, (op << 8) | cpm_cmd | 1);
+
+	while (in_be16(cpcr) & 1)
+		;
+}
+
+static void cpm2_cmd(int op)
+{
+	while (in_be32(cpcr) & 0x10000)
+		;
+
+	out_be32(cpcr, op | cpm_cmd | 0x10000);
+
+	while (in_be32(cpcr) & 0x10000)
+		;
+}
+
+static void smc_disable_port(void)
+{
+	do_cmd(CPM_CMD_STOP_TX);
+	out_be16(&smc->smcmr, in_be16(&smc->smcmr) & ~3);
+}
+
+static void scc_disable_port(void)
+{
+	do_cmd(CPM_CMD_STOP_TX);
+	out_be32(&scc->gsmrl, in_be32(&scc->gsmrl) & ~0x30);
+}
+
+static void smc_enable_port(void)
+{
+	out_be16(&smc->smcmr, in_be16(&smc->smcmr) | 3);
+	do_cmd(CPM_CMD_RESTART_TX);
+}
+
+static void scc_enable_port(void)
+{
+	out_be32(&scc->gsmrl, in_be32(&scc->gsmrl) | 0x30);
+	do_cmd(CPM_CMD_RESTART_TX);
+}
+
+static int cpm_serial_open(void)
+{
+	disable_port();
+
+	out_8(&param->rfcr, 0x10);
+	out_8(&param->tfcr, 0x10);
+	out_be16(&param->mrblr, 1);
+	out_be16(&param->maxidl, 0);
+	out_be16(&param->brkec, 0);
+	out_be16(&param->brkln, 0);
+	out_be16(&param->brkcr, 0);
+
+	rbdf = cbd_addr;
+	rbdf->addr = (u8 *)rbdf - 1;
+	rbdf->sc = 0xa000;
+	rbdf->len = 1;
+
+	tbdf = rbdf + 1;
+	tbdf->addr = (u8 *)rbdf - 2;
+	tbdf->sc = 0x2000;
+	tbdf->len = 1;
+
+	sync();
+	out_be16(&param->rbase, cbd_offset);
+	out_be16(&param->tbase, cbd_offset + sizeof(struct cpm_bd));
+
+	do_cmd(CPM_CMD_INIT_RX_TX);
+
+	enable_port();
+	return 0;
+}
+
+static void cpm_serial_putc(unsigned char c)
+{
+	while (tbdf->sc & 0x8000)
+		barrier();
+
+	sync();
+
+	tbdf->addr[0] = c;
+	eieio();
+	tbdf->sc |= 0x8000;
+}
+
+static unsigned char cpm_serial_tstc(void)
+{
+	barrier();
+	return !(rbdf->sc & 0x8000);
+}
+
+static unsigned char cpm_serial_getc(void)
+{
+	unsigned char c;
+
+	while (!cpm_serial_tstc())
+		;
+
+	sync();
+	c = rbdf->addr[0];
+	eieio();
+	rbdf->sc |= 0x8000;
+
+	return c;
+}
+
+int cpm_console_init(void *devp, struct serial_console_data *scdp)
+{
+	void *vreg[2];
+	u32 reg[2];
+	int is_smc = 0, is_cpm2 = 0;
+	void *parent, *muram;
+	void *muram_addr;
+	unsigned long muram_offset, muram_size;
+
+	if (dt_is_compatible(devp, "fsl,cpm1-smc-uart")) {
+		is_smc = 1;
+	} else if (dt_is_compatible(devp, "fsl,cpm2-scc-uart")) {
+		is_cpm2 = 1;
+	} else if (dt_is_compatible(devp, "fsl,cpm2-smc-uart")) {
+		is_cpm2 = 1;
+		is_smc = 1;
+	}
+
+	if (is_smc) {
+		enable_port = smc_enable_port;
+		disable_port = smc_disable_port;
+	} else {
+		enable_port = scc_enable_port;
+		disable_port = scc_disable_port;
+	}
+
+	if (is_cpm2)
+		do_cmd = cpm2_cmd;
+	else
+		do_cmd = cpm1_cmd;
+
+	if (getprop(devp, "fsl,cpm-command", &cpm_cmd, 4) < 4)
+		return -1;
+
+	if (dt_get_virtual_reg(devp, vreg, 2) < 2)
+		return -1;
+
+	if (is_smc)
+		smc = vreg[0];
+	else
+		scc = vreg[0];
+
+	param = vreg[1];
+
+	parent = get_parent(devp);
+	if (!parent)
+		return -1;
+
+	if (dt_get_virtual_reg(parent, &cpcr, 1) < 1)
+		return -1;
+
+	muram = finddevice("/soc/cpm/muram/data");
+	if (!muram)
+		return -1;
+
+	/* For bootwrapper-compatible device trees, we assume that the first
+	 * entry has at least 128 bytes, and that #address-cells/#data-cells
+	 * is one for both parent and child.
+	 */
+
+	if (dt_get_virtual_reg(muram, &muram_addr, 1) < 1)
+		return -1;
+
+	if (getprop(muram, "reg", reg, 8) < 8)
+		return -1;
+
+	muram_offset = reg[0];
+	muram_size = reg[1];
+
+	/* Store the buffer descriptors at the end of the first muram chunk.
+	 * For SMC ports on CPM2-based platforms, relocate the parameter RAM
+	 * just before the buffer descriptors.
+	 */
+
+	cbd_offset = muram_offset + muram_size - 2 * sizeof(struct cpm_bd);
+
+	if (is_cpm2 && is_smc) {
+		u16 *smc_base = (u16 *)param;
+		u16 pram_offset;
+
+		pram_offset = cbd_offset - 64;
+		pram_offset = _ALIGN_DOWN(pram_offset, 64);
+
+		disable_port();
+		out_be16(smc_base, pram_offset);
+		param = muram_addr - muram_offset + pram_offset;
+	}
+
+	cbd_addr = muram_addr - muram_offset + cbd_offset;
+
+	scdp->open = cpm_serial_open;
+	scdp->putc = cpm_serial_putc;
+	scdp->getc = cpm_serial_getc;
+	scdp->tstc = cpm_serial_tstc;
+
+	return 0;
+}
diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
new file mode 100644
index 0000000000..121cab9d57
--- /dev/null
+++ b/arch/powerpc/boot/crt0.S
@@ -0,0 +1,308 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ *
+ * Adapted for 64 bit LE PowerPC by Andrew Tauferner
+ */
+
+#include "ppc_asm.h"
+
+RELA = 7
+RELASZ = 8
+RELAENT = 9
+
+	.data
+	/* A procedure descriptor used when booting this as a COFF file.
+	 * When making COFF, this comes first in the link and we're
+	 * linked at 0x500000.
+	 */
+	.globl	_zimage_start_opd
+_zimage_start_opd:
+	.long	0x500000, 0, 0, 0
+	.text
+	b	_zimage_start
+
+#ifdef __powerpc64__
+.balign 8
+p_start:	.8byte	_start
+p_etext:	.8byte	_etext
+p_bss_start:	.8byte	__bss_start
+p_end:		.8byte	_end
+
+p_toc:		.8byte	.TOC. - p_base
+p_dyn:		.8byte	__dynamic_start - p_base
+p_rela:		.8byte	__rela_dyn_start - p_base
+p_prom:		.8byte	0
+	.weak	_platform_stack_top
+p_pstack:	.8byte	_platform_stack_top
+#else
+p_start:	.long	_start
+p_etext:	.long	_etext
+p_bss_start:	.long	__bss_start
+p_end:		.long	_end
+
+	.weak	_platform_stack_top
+p_pstack:	.long	_platform_stack_top
+#endif
+
+	.weak	_zimage_start
+_zimage_start:
+	.globl	_zimage_start_lib
+_zimage_start_lib:
+	/* Work out the offset between the address we were linked at
+	   and the address where we're running. */
+	bcl	20,31,.+4
+p_base:	mflr	r10		/* r10 now points to runtime addr of p_base */
+#ifndef __powerpc64__
+	/* grab the link address of the dynamic section in r11 */
+	addis	r11,r10,(_GLOBAL_OFFSET_TABLE_-p_base)@ha
+	lwz	r11,(_GLOBAL_OFFSET_TABLE_-p_base)@l(r11)
+	cmpwi	r11,0
+	beq	3f		/* if not linked -pie */
+	/* get the runtime address of the dynamic section in r12 */
+	.weak	__dynamic_start
+	addis	r12,r10,(__dynamic_start-p_base)@ha
+	addi	r12,r12,(__dynamic_start-p_base)@l
+	subf	r11,r11,r12	/* runtime - linktime offset */
+
+	/* The dynamic section contains a series of tagged entries.
+	 * We need the RELA and RELACOUNT entries. */
+	li	r9,0
+	li	r0,0
+9:	lwz	r8,0(r12)	/* get tag */
+	cmpwi	r8,0
+	beq	10f		/* end of list */
+	cmpwi	r8,RELA
+	bne	11f
+	lwz	r9,4(r12)	/* get RELA pointer in r9 */
+	b	12f
+11:	cmpwi	r8,RELASZ
+	bne	.Lcheck_for_relaent
+	lwz	r0,4(r12)       /* get RELASZ value in r0 */
+	b	12f
+.Lcheck_for_relaent:
+	cmpwi	r8,RELAENT
+	bne	12f
+	lwz     r14,4(r12)      /* get RELAENT value in r14 */
+12:	addi	r12,r12,8
+	b	9b
+
+	/* The relocation section contains a list of relocations.
+	 * We now do the R_PPC_RELATIVE ones, which point to words
+	 * which need to be initialized with addend + offset */
+10:	/* skip relocation if we don't have both */
+	cmpwi	r0,0
+	beq	3f
+	cmpwi	r9,0
+	beq	3f
+	cmpwi	r14,0
+	beq	3f
+
+	add	r9,r9,r11	/* Relocate RELA pointer */
+	divwu   r0,r0,r14       /* RELASZ / RELAENT */
+	mtctr	r0
+2:	lbz	r0,4+3(r9)	/* ELF32_R_INFO(reloc->r_info) */
+	cmpwi	r0,22		/* R_PPC_RELATIVE */
+	bne	.Lnext
+	lwz	r12,0(r9)	/* reloc->r_offset */
+	lwz	r0,8(r9)	/* reloc->r_addend */
+	add	r0,r0,r11
+	stwx	r0,r11,r12
+.Lnext:	add	r9,r9,r14
+	bdnz	2b
+
+	/* Do a cache flush for our text, in case the loader didn't */
+3:	lwz	r9,p_start-p_base(r10)	/* note: these are relocated now */
+	lwz	r8,p_etext-p_base(r10)
+4:	dcbf	r0,r9
+	icbi	r0,r9
+	addi	r9,r9,0x20
+	cmplw	cr0,r9,r8
+	blt	4b
+	sync
+	isync
+
+	/* Clear the BSS */
+	lwz	r9,p_bss_start-p_base(r10)
+	lwz	r8,p_end-p_base(r10)
+	li	r0,0
+5:	stw	r0,0(r9)
+	addi	r9,r9,4
+	cmplw	cr0,r9,r8
+	blt	5b
+
+	/* Possibly set up a custom stack */
+	lwz	r8,p_pstack-p_base(r10)
+	cmpwi	r8,0
+	beq	6f
+	lwz	r1,0(r8)
+	li	r0,0
+	stwu	r0,-16(r1)	/* establish a stack frame */
+6:
+#else /* __powerpc64__ */
+	/* Save the prom pointer at p_prom. */
+	std	r5,(p_prom-p_base)(r10)
+
+	/* Set r2 to the TOC. */
+	ld	r2,(p_toc-p_base)(r10)
+	add	r2,r2,r10
+
+	/* Grab the link address of the dynamic section in r11. */
+	ld	r11,-32768(r2)
+	cmpwi	r11,0
+	beq	3f              /* if not linked -pie then no dynamic section */
+
+	ld	r11,(p_dyn-p_base)(r10)
+	add	r11,r11,r10
+	ld	r9,(p_rela-p_base)(r10)
+	add	r9,r9,r10
+
+	li	r13,0
+	li	r8,0
+9:	ld	r12,0(r11)       /* get tag */
+	cmpdi	r12,0
+	beq	12f              /* end of list */
+	cmpdi	r12,RELA
+	bne	10f
+	ld	r13,8(r11)       /* get RELA pointer in r13 */
+	b	11f
+10:	cmpwi   r12,RELASZ
+	bne	.Lcheck_for_relaent
+	lwz	r8,8(r11)	/* get RELASZ pointer in r8 */
+	b	11f
+.Lcheck_for_relaent:
+	cmpwi	r12,RELAENT
+	bne     11f
+	lwz     r14,8(r11)      /* get RELAENT pointer in r14 */
+11:	addi	r11,r11,16
+	b	9b
+12:
+	cmpdi	r13,0            /* check we have both RELA, RELASZ, RELAENT*/
+	cmpdi	cr1,r8,0
+	beq	3f
+	beq	cr1,3f
+	cmpdi	r14,0
+	beq	3f
+
+	/* Calcuate the runtime offset. */
+	subf	r13,r13,r9
+
+	/* Run through the list of relocations and process the
+	 * R_PPC64_RELATIVE ones. */
+	divdu   r8,r8,r14       /* RELASZ / RELAENT */
+	mtctr	r8
+13:	ld	r0,8(r9)        /* ELF64_R_TYPE(reloc->r_info) */
+	cmpdi	r0,22           /* R_PPC64_RELATIVE */
+	bne	.Lnext
+	ld	r12,0(r9)        /* reloc->r_offset */
+	ld	r0,16(r9)       /* reloc->r_addend */
+	add	r0,r0,r13
+	stdx	r0,r13,r12
+.Lnext:	add	r9,r9,r14
+	bdnz	13b
+
+	/* Do a cache flush for our text, in case the loader didn't */
+3:	ld	r9,p_start-p_base(r10)	/* note: these are relocated now */
+	ld	r8,p_etext-p_base(r10)
+4:	dcbf	r0,r9
+	icbi	r0,r9
+	addi	r9,r9,0x20
+	cmpld	cr0,r9,r8
+	blt	4b
+	sync
+	isync
+
+	/* Clear the BSS */
+	ld	r9,p_bss_start-p_base(r10)
+	ld	r8,p_end-p_base(r10)
+	li	r0,0
+5:	std	r0,0(r9)
+	addi	r9,r9,8
+	cmpld	cr0,r9,r8
+	blt	5b
+
+	/* Possibly set up a custom stack */
+	ld	r8,p_pstack-p_base(r10)
+	cmpdi	r8,0
+	beq	6f
+	ld	r1,0(r8)
+	li	r0,0
+	stdu	r0,-112(r1)	/* establish a stack frame */
+6:
+#endif  /* __powerpc64__ */
+	/* Call platform_init() */
+	bl	platform_init
+
+	/* Call start */
+	b	start
+
+#ifdef __powerpc64__
+
+#define PROM_FRAME_SIZE 512
+
+.macro OP_REGS op, width, start, end, base, offset
+	.Lreg=\start
+	.rept (\end - \start + 1)
+	\op	.Lreg,\offset+\width*.Lreg(\base)
+	.Lreg=.Lreg+1
+	.endr
+.endm
+
+#define SAVE_GPRS(start, end, base)	OP_REGS std, 8, start, end, base, 0
+#define REST_GPRS(start, end, base)	OP_REGS ld, 8, start, end, base, 0
+#define SAVE_GPR(n, base)		SAVE_GPRS(n, n, base)
+#define REST_GPR(n, base)		REST_GPRS(n, n, base)
+
+/* prom handles the jump into and return from firmware.  The prom args pointer
+   is loaded in r3. */
+.globl prom
+prom:
+	mflr	r0
+	std	r0,16(r1)
+	stdu	r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */
+
+	SAVE_GPR(2, r1)
+	SAVE_GPRS(13, 31, r1)
+	mfcr    r10
+	std     r10,8*32(r1)
+	mfmsr   r10
+	std     r10,8*33(r1)
+
+	/* remove MSR_LE from msr but keep MSR_SF */
+	mfmsr	r10
+	rldicr	r10,r10,0,62
+	mtsrr1	r10
+
+	/* Load FW address, set LR to label 1, and jump to FW */
+	bcl	20,31,0f
+0:	mflr	r10
+	addi	r11,r10,(1f-0b)
+	mtlr	r11
+
+	ld	r10,(p_prom-0b)(r10)
+	mtsrr0	r10
+
+	rfid
+
+1:	/* Return from OF */
+	FIXUP_ENDIAN
+
+	/* Restore registers and return. */
+	rldicl  r1,r1,0,32
+
+	/* Restore the MSR (back to 64 bits) */
+	ld      r10,8*(33)(r1)
+	mtmsr	r10
+	isync
+
+	/* Restore other registers */
+	REST_GPR(2, r1)
+	REST_GPRS(13, 31, r1)
+	ld      r10,8*32(r1)
+	mtcr	r10
+
+	addi    r1,r1,PROM_FRAME_SIZE
+	ld      r0,16(r1)
+	mtlr    r0
+	blr
+#endif
diff --git a/arch/powerpc/boot/crtsavres.S b/arch/powerpc/boot/crtsavres.S
new file mode 100644
index 0000000000..085fb2b9a8
--- /dev/null
+++ b/arch/powerpc/boot/crtsavres.S
@@ -0,0 +1,233 @@
+/*
+ * Special support for eabi and SVR4
+ *
+ *   Copyright (C) 1995, 1996, 1998, 2000, 2001 Free Software Foundation, Inc.
+ *   Copyright 2008 Freescale Semiconductor, Inc.
+ *   Written By Michael Meissner
+ *
+ * Based on gcc/config/rs6000/crtsavres.asm from gcc
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * In addition to the permissions in the GNU General Public License, the
+ * Free Software Foundation gives you unlimited permission to link the
+ * compiled version of this file with other programs, and to distribute
+ * those programs without any restriction coming from the use of this
+ * file.  (The General Public License restrictions do apply in other
+ * respects; for example, they cover modification of the file, and
+ * distribution when not linked into another program.)
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ *    As a special exception, if you link this library with files
+ *    compiled with GCC to produce an executable, this does not cause
+ *    the resulting executable to be covered by the GNU General Public License.
+ *    This exception does not however invalidate any other reasons why
+ *    the executable file might be covered by the GNU General Public License.
+ */
+
+#ifdef __powerpc64__
+#error "On PPC64, FPR save/restore functions are provided by the linker."
+#endif
+
+	.file	"crtsavres.S"
+	.section ".text"
+
+#define _GLOBAL(name) \
+	.type name,@function; \
+	.globl name; \
+name:
+
+/* Routines for saving integer registers, called by the compiler.  */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer save area.  */
+
+_GLOBAL(_savegpr_14)
+_GLOBAL(_save32gpr_14)
+	stw	14,-72(11)	/* save gp registers */
+_GLOBAL(_savegpr_15)
+_GLOBAL(_save32gpr_15)
+	stw	15,-68(11)
+_GLOBAL(_savegpr_16)
+_GLOBAL(_save32gpr_16)
+	stw	16,-64(11)
+_GLOBAL(_savegpr_17)
+_GLOBAL(_save32gpr_17)
+	stw	17,-60(11)
+_GLOBAL(_savegpr_18)
+_GLOBAL(_save32gpr_18)
+	stw	18,-56(11)
+_GLOBAL(_savegpr_19)
+_GLOBAL(_save32gpr_19)
+	stw	19,-52(11)
+_GLOBAL(_savegpr_20)
+_GLOBAL(_save32gpr_20)
+	stw	20,-48(11)
+_GLOBAL(_savegpr_21)
+_GLOBAL(_save32gpr_21)
+	stw	21,-44(11)
+_GLOBAL(_savegpr_22)
+_GLOBAL(_save32gpr_22)
+	stw	22,-40(11)
+_GLOBAL(_savegpr_23)
+_GLOBAL(_save32gpr_23)
+	stw	23,-36(11)
+_GLOBAL(_savegpr_24)
+_GLOBAL(_save32gpr_24)
+	stw	24,-32(11)
+_GLOBAL(_savegpr_25)
+_GLOBAL(_save32gpr_25)
+	stw	25,-28(11)
+_GLOBAL(_savegpr_26)
+_GLOBAL(_save32gpr_26)
+	stw	26,-24(11)
+_GLOBAL(_savegpr_27)
+_GLOBAL(_save32gpr_27)
+	stw	27,-20(11)
+_GLOBAL(_savegpr_28)
+_GLOBAL(_save32gpr_28)
+	stw	28,-16(11)
+_GLOBAL(_savegpr_29)
+_GLOBAL(_save32gpr_29)
+	stw	29,-12(11)
+_GLOBAL(_savegpr_30)
+_GLOBAL(_save32gpr_30)
+	stw	30,-8(11)
+_GLOBAL(_savegpr_31)
+_GLOBAL(_save32gpr_31)
+	stw	31,-4(11)
+	blr
+
+/* Routines for restoring integer registers, called by the compiler.  */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer restore area.  */
+
+_GLOBAL(_restgpr_14)
+_GLOBAL(_rest32gpr_14)
+	lwz	14,-72(11)	/* restore gp registers */
+_GLOBAL(_restgpr_15)
+_GLOBAL(_rest32gpr_15)
+	lwz	15,-68(11)
+_GLOBAL(_restgpr_16)
+_GLOBAL(_rest32gpr_16)
+	lwz	16,-64(11)
+_GLOBAL(_restgpr_17)
+_GLOBAL(_rest32gpr_17)
+	lwz	17,-60(11)
+_GLOBAL(_restgpr_18)
+_GLOBAL(_rest32gpr_18)
+	lwz	18,-56(11)
+_GLOBAL(_restgpr_19)
+_GLOBAL(_rest32gpr_19)
+	lwz	19,-52(11)
+_GLOBAL(_restgpr_20)
+_GLOBAL(_rest32gpr_20)
+	lwz	20,-48(11)
+_GLOBAL(_restgpr_21)
+_GLOBAL(_rest32gpr_21)
+	lwz	21,-44(11)
+_GLOBAL(_restgpr_22)
+_GLOBAL(_rest32gpr_22)
+	lwz	22,-40(11)
+_GLOBAL(_restgpr_23)
+_GLOBAL(_rest32gpr_23)
+	lwz	23,-36(11)
+_GLOBAL(_restgpr_24)
+_GLOBAL(_rest32gpr_24)
+	lwz	24,-32(11)
+_GLOBAL(_restgpr_25)
+_GLOBAL(_rest32gpr_25)
+	lwz	25,-28(11)
+_GLOBAL(_restgpr_26)
+_GLOBAL(_rest32gpr_26)
+	lwz	26,-24(11)
+_GLOBAL(_restgpr_27)
+_GLOBAL(_rest32gpr_27)
+	lwz	27,-20(11)
+_GLOBAL(_restgpr_28)
+_GLOBAL(_rest32gpr_28)
+	lwz	28,-16(11)
+_GLOBAL(_restgpr_29)
+_GLOBAL(_rest32gpr_29)
+	lwz	29,-12(11)
+_GLOBAL(_restgpr_30)
+_GLOBAL(_rest32gpr_30)
+	lwz	30,-8(11)
+_GLOBAL(_restgpr_31)
+_GLOBAL(_rest32gpr_31)
+	lwz	31,-4(11)
+	blr
+
+/* Routines for restoring integer registers, called by the compiler.  */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer restore area.  */
+
+_GLOBAL(_restgpr_14_x)
+_GLOBAL(_rest32gpr_14_x)
+	lwz	14,-72(11)	/* restore gp registers */
+_GLOBAL(_restgpr_15_x)
+_GLOBAL(_rest32gpr_15_x)
+	lwz	15,-68(11)
+_GLOBAL(_restgpr_16_x)
+_GLOBAL(_rest32gpr_16_x)
+	lwz	16,-64(11)
+_GLOBAL(_restgpr_17_x)
+_GLOBAL(_rest32gpr_17_x)
+	lwz	17,-60(11)
+_GLOBAL(_restgpr_18_x)
+_GLOBAL(_rest32gpr_18_x)
+	lwz	18,-56(11)
+_GLOBAL(_restgpr_19_x)
+_GLOBAL(_rest32gpr_19_x)
+	lwz	19,-52(11)
+_GLOBAL(_restgpr_20_x)
+_GLOBAL(_rest32gpr_20_x)
+	lwz	20,-48(11)
+_GLOBAL(_restgpr_21_x)
+_GLOBAL(_rest32gpr_21_x)
+	lwz	21,-44(11)
+_GLOBAL(_restgpr_22_x)
+_GLOBAL(_rest32gpr_22_x)
+	lwz	22,-40(11)
+_GLOBAL(_restgpr_23_x)
+_GLOBAL(_rest32gpr_23_x)
+	lwz	23,-36(11)
+_GLOBAL(_restgpr_24_x)
+_GLOBAL(_rest32gpr_24_x)
+	lwz	24,-32(11)
+_GLOBAL(_restgpr_25_x)
+_GLOBAL(_rest32gpr_25_x)
+	lwz	25,-28(11)
+_GLOBAL(_restgpr_26_x)
+_GLOBAL(_rest32gpr_26_x)
+	lwz	26,-24(11)
+_GLOBAL(_restgpr_27_x)
+_GLOBAL(_rest32gpr_27_x)
+	lwz	27,-20(11)
+_GLOBAL(_restgpr_28_x)
+_GLOBAL(_rest32gpr_28_x)
+	lwz	28,-16(11)
+_GLOBAL(_restgpr_29_x)
+_GLOBAL(_rest32gpr_29_x)
+	lwz	29,-12(11)
+_GLOBAL(_restgpr_30_x)
+_GLOBAL(_rest32gpr_30_x)
+	lwz	30,-8(11)
+_GLOBAL(_restgpr_31_x)
+_GLOBAL(_rest32gpr_31_x)
+	lwz	0,4(11)
+	lwz	31,-4(11)
+	mtlr	0
+	mr	1,11
+	blr
diff --git a/arch/powerpc/boot/cuboot-52xx.c b/arch/powerpc/boot/cuboot-52xx.c
new file mode 100644
index 0000000000..b332056f24
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-52xx.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for MPC5200
+ *
+ * Author: Grant Likely <grant.likely@secretlab.ca>
+ *
+ * Copyright (c) 2007 Secret Lab Technologies Ltd.
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "io.h"
+#include "cuboot.h"
+
+#define TARGET_PPC_MPC52xx
+#include "ppcboot.h"
+
+static bd_t bd;
+
+static void platform_fixups(void)
+{
+	void *soc, *reg;
+	int div;
+	u32 sysfreq;
+
+
+	dt_fixup_memory(bd.bi_memstart, bd.bi_memsize);
+	dt_fixup_mac_addresses(bd.bi_enetaddr);
+	dt_fixup_cpu_clocks(bd.bi_intfreq, bd.bi_busfreq / 4, bd.bi_busfreq);
+
+	/* Unfortunately, the specific model number is encoded in the
+	 * soc node name in existing dts files -- once that is fixed,
+	 * this can do a simple path lookup.
+	 */
+	soc = find_node_by_devtype(NULL, "soc");
+	if (!soc)
+		soc = find_node_by_compatible(NULL, "fsl,mpc5200-immr");
+	if (!soc)
+		soc = find_node_by_compatible(NULL, "fsl,mpc5200b-immr");
+	if (soc) {
+		setprop(soc, "bus-frequency", &bd.bi_ipbfreq,
+			sizeof(bd.bi_ipbfreq));
+
+		if (!dt_xlate_reg(soc, 0, (void*)&reg, NULL))
+			return;
+		div = in_8(reg + 0x204) & 0x0020 ? 8 : 4;
+		sysfreq = bd.bi_busfreq * div;
+		setprop(soc, "system-frequency", &sysfreq, sizeof(sysfreq));
+	}
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                   unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	fdt_init(_dtb_start);
+	serial_console_init();
+	platform_ops.fixups = platform_fixups;
+}
diff --git a/arch/powerpc/boot/cuboot-824x.c b/arch/powerpc/boot/cuboot-824x.c
new file mode 100644
index 0000000000..15818cb97c
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-824x.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for 824x
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "cuboot.h"
+
+#define TARGET_824x
+#include "ppcboot.h"
+
+static bd_t bd;
+
+
+static void platform_fixups(void)
+{
+	void *soc;
+
+	dt_fixup_memory(bd.bi_memstart, bd.bi_memsize);
+	dt_fixup_mac_addresses(bd.bi_enetaddr);
+	dt_fixup_cpu_clocks(bd.bi_intfreq, bd.bi_busfreq / 4, bd.bi_busfreq);
+
+	soc = find_node_by_devtype(NULL, "soc");
+	if (soc) {
+		void *serial = NULL;
+
+		setprop(soc, "bus-frequency", &bd.bi_busfreq,
+		        sizeof(bd.bi_busfreq));
+
+		while ((serial = find_node_by_devtype(serial, "serial"))) {
+			if (get_parent(serial) != soc)
+				continue;
+
+			setprop(serial, "clock-frequency", &bd.bi_busfreq,
+			        sizeof(bd.bi_busfreq));
+		}
+	}
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                   unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	fdt_init(_dtb_start);
+	serial_console_init();
+	platform_ops.fixups = platform_fixups;
+}
diff --git a/arch/powerpc/boot/cuboot-83xx.c b/arch/powerpc/boot/cuboot-83xx.c
new file mode 100644
index 0000000000..4063c6263c
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-83xx.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for 83xx
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "cuboot.h"
+
+#define TARGET_83xx
+#include "ppcboot.h"
+
+static bd_t bd;
+
+static void platform_fixups(void)
+{
+	void *soc;
+
+	dt_fixup_memory(bd.bi_memstart, bd.bi_memsize);
+	dt_fixup_mac_address_by_alias("ethernet0", bd.bi_enetaddr);
+	dt_fixup_mac_address_by_alias("ethernet1", bd.bi_enet1addr);
+	dt_fixup_cpu_clocks(bd.bi_intfreq, bd.bi_busfreq / 4, bd.bi_busfreq);
+
+	/* Unfortunately, the specific model number is encoded in the
+	 * soc node name in existing dts files -- once that is fixed,
+	 * this can do a simple path lookup.
+	 */
+	soc = find_node_by_devtype(NULL, "soc");
+	if (soc) {
+		void *serial = NULL;
+
+		setprop(soc, "bus-frequency", &bd.bi_busfreq,
+		        sizeof(bd.bi_busfreq));
+
+		while ((serial = find_node_by_devtype(serial, "serial"))) {
+			if (get_parent(serial) != soc)
+				continue;
+
+			setprop(serial, "clock-frequency", &bd.bi_busfreq,
+			        sizeof(bd.bi_busfreq));
+		}
+	}
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                   unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	fdt_init(_dtb_start);
+	serial_console_init();
+	platform_ops.fixups = platform_fixups;
+}
diff --git a/arch/powerpc/boot/cuboot-85xx-cpm2.c b/arch/powerpc/boot/cuboot-85xx-cpm2.c
new file mode 100644
index 0000000000..ac5115beb3
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-85xx-cpm2.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for 85xx
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "cuboot.h"
+
+#define TARGET_85xx
+#define TARGET_CPM2
+#include "ppcboot.h"
+
+static bd_t bd;
+
+static void platform_fixups(void)
+{
+	void *devp;
+
+	dt_fixup_memory(bd.bi_memstart, bd.bi_memsize);
+	dt_fixup_mac_address_by_alias("ethernet0", bd.bi_enetaddr);
+	dt_fixup_mac_address_by_alias("ethernet1", bd.bi_enet1addr);
+	dt_fixup_mac_address_by_alias("ethernet2", bd.bi_enet2addr);
+	dt_fixup_cpu_clocks(bd.bi_intfreq, bd.bi_busfreq / 8, bd.bi_busfreq);
+
+	/* Unfortunately, the specific model number is encoded in the
+	 * soc node name in existing dts files -- once that is fixed,
+	 * this can do a simple path lookup.
+	 */
+	devp = find_node_by_devtype(NULL, "soc");
+	if (devp) {
+		void *serial = NULL;
+
+		setprop(devp, "bus-frequency", &bd.bi_busfreq,
+		        sizeof(bd.bi_busfreq));
+
+		while ((serial = find_node_by_devtype(serial, "serial"))) {
+			if (get_parent(serial) != devp)
+				continue;
+
+			setprop(serial, "clock-frequency", &bd.bi_busfreq,
+			        sizeof(bd.bi_busfreq));
+		}
+	}
+
+	devp = find_node_by_compatible(NULL, "fsl,cpm2-brg");
+	if (devp)
+		setprop(devp, "clock-frequency", &bd.bi_brgfreq,
+		        sizeof(bd.bi_brgfreq));
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                   unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	fdt_init(_dtb_start);
+	serial_console_init();
+	platform_ops.fixups = platform_fixups;
+}
diff --git a/arch/powerpc/boot/cuboot-85xx.c b/arch/powerpc/boot/cuboot-85xx.c
new file mode 100644
index 0000000000..1466cc63d6
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-85xx.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for 85xx
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "cuboot.h"
+
+#define TARGET_85xx
+#define TARGET_HAS_ETH3
+#include "ppcboot.h"
+
+static bd_t bd;
+
+static void platform_fixups(void)
+{
+	void *soc;
+
+	dt_fixup_memory(bd.bi_memstart, bd.bi_memsize);
+	dt_fixup_mac_address_by_alias("ethernet0", bd.bi_enetaddr);
+	dt_fixup_mac_address_by_alias("ethernet1", bd.bi_enet1addr);
+	dt_fixup_mac_address_by_alias("ethernet2", bd.bi_enet2addr);
+	dt_fixup_mac_address_by_alias("ethernet3", bd.bi_enet3addr);
+	dt_fixup_cpu_clocks(bd.bi_intfreq, bd.bi_busfreq / 8, bd.bi_busfreq);
+
+	/* Unfortunately, the specific model number is encoded in the
+	 * soc node name in existing dts files -- once that is fixed,
+	 * this can do a simple path lookup.
+	 */
+	soc = find_node_by_devtype(NULL, "soc");
+	if (soc) {
+		void *serial = NULL;
+
+		setprop(soc, "bus-frequency", &bd.bi_busfreq,
+		        sizeof(bd.bi_busfreq));
+
+		while ((serial = find_node_by_devtype(serial, "serial"))) {
+			if (get_parent(serial) != soc)
+				continue;
+
+			setprop(serial, "clock-frequency", &bd.bi_busfreq,
+			        sizeof(bd.bi_busfreq));
+		}
+	}
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                   unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	fdt_init(_dtb_start);
+	serial_console_init();
+	platform_ops.fixups = platform_fixups;
+}
diff --git a/arch/powerpc/boot/cuboot-8xx.c b/arch/powerpc/boot/cuboot-8xx.c
new file mode 100644
index 0000000000..e4499fba5d
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-8xx.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for 8xx
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "cuboot.h"
+
+#define TARGET_8xx
+#define TARGET_HAS_ETH1
+#include "ppcboot.h"
+
+static bd_t bd;
+
+static void platform_fixups(void)
+{
+	void *node;
+
+	dt_fixup_memory(bd.bi_memstart, bd.bi_memsize);
+	dt_fixup_mac_addresses(bd.bi_enetaddr, bd.bi_enet1addr);
+	dt_fixup_cpu_clocks(bd.bi_intfreq, bd.bi_busfreq / 16, bd.bi_busfreq);
+
+	node = finddevice("/soc/cpm");
+	if (node)
+		setprop(node, "clock-frequency", &bd.bi_busfreq, 4);
+
+	node = finddevice("/soc/cpm/brg");
+	if (node)
+		setprop(node, "clock-frequency",  &bd.bi_busfreq, 4);
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                   unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	fdt_init(_dtb_start);
+	serial_console_init();
+	platform_ops.fixups = platform_fixups;
+}
diff --git a/arch/powerpc/boot/cuboot-acadia.c b/arch/powerpc/boot/cuboot-acadia.c
new file mode 100644
index 0000000000..46e96756cf
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-acadia.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for Acadia
+ *
+ * Author: Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ *
+ * Copyright 2008 IBM Corporation
+ */
+
+#include "ops.h"
+#include "io.h"
+#include "dcr.h"
+#include "stdio.h"
+#include "4xx.h"
+#include "44x.h"
+#include "cuboot.h"
+
+#define TARGET_4xx
+#include "ppcboot.h"
+
+static bd_t bd;
+
+#define CPR_PERD0_SPIDV_MASK   0x000F0000     /* SPI Clock Divider */
+
+#define PLLC_SRC_MASK	       0x20000000     /* PLL feedback source */
+
+#define PLLD_FBDV_MASK	       0x1F000000     /* PLL feedback divider value */
+#define PLLD_FWDVA_MASK        0x000F0000     /* PLL forward divider A value */
+#define PLLD_FWDVB_MASK        0x00000700     /* PLL forward divider B value */
+
+#define PRIMAD_CPUDV_MASK      0x0F000000     /* CPU Clock Divisor Mask */
+#define PRIMAD_PLBDV_MASK      0x000F0000     /* PLB Clock Divisor Mask */
+#define PRIMAD_OPBDV_MASK      0x00000F00     /* OPB Clock Divisor Mask */
+#define PRIMAD_EBCDV_MASK      0x0000000F     /* EBC Clock Divisor Mask */
+
+#define PERD0_PWMDV_MASK       0xFF000000     /* PWM Divider Mask */
+#define PERD0_SPIDV_MASK       0x000F0000     /* SPI Divider Mask */
+#define PERD0_U0DV_MASK        0x0000FF00     /* UART 0 Divider Mask */
+#define PERD0_U1DV_MASK        0x000000FF     /* UART 1 Divider Mask */
+
+static void get_clocks(void)
+{
+	unsigned long sysclk, cpr_plld, cpr_pllc, cpr_primad, plloutb, i;
+	unsigned long pllFwdDiv, pllFwdDivB, pllFbkDiv, pllPlbDiv, pllExtBusDiv;
+	unsigned long pllOpbDiv, freqEBC, freqUART, freqOPB;
+	unsigned long div;		/* total divisor udiv * bdiv */
+	unsigned long umin;		/* minimum udiv	*/
+	unsigned short diff;		/* smallest diff */
+	unsigned long udiv;		/* best udiv */
+	unsigned short idiff;		/* current diff */
+	unsigned short ibdiv;		/* current bdiv */
+	unsigned long est;		/* current estimate */
+	unsigned long baud;
+	void *np;
+
+	/* read the sysclk value from the CPLD */
+	sysclk = (in_8((unsigned char *)0x80000000) == 0xc) ? 66666666 : 33333000;
+
+	/*
+	 * Read PLL Mode registers
+	 */
+	cpr_plld = CPR0_READ(DCRN_CPR0_PLLD);
+	cpr_pllc = CPR0_READ(DCRN_CPR0_PLLC);
+
+	/*
+	 * Determine forward divider A
+	 */
+	pllFwdDiv = ((cpr_plld & PLLD_FWDVA_MASK) >> 16);
+
+	/*
+	 * Determine forward divider B
+	 */
+	pllFwdDivB = ((cpr_plld & PLLD_FWDVB_MASK) >> 8);
+	if (pllFwdDivB == 0)
+		pllFwdDivB = 8;
+
+	/*
+	 * Determine FBK_DIV.
+	 */
+	pllFbkDiv = ((cpr_plld & PLLD_FBDV_MASK) >> 24);
+	if (pllFbkDiv == 0)
+		pllFbkDiv = 256;
+
+	/*
+	 * Read CPR_PRIMAD register
+	 */
+	cpr_primad = CPR0_READ(DCRN_CPR0_PRIMAD);
+
+	/*
+	 * Determine PLB_DIV.
+	 */
+	pllPlbDiv = ((cpr_primad & PRIMAD_PLBDV_MASK) >> 16);
+	if (pllPlbDiv == 0)
+		pllPlbDiv = 16;
+
+	/*
+	 * Determine EXTBUS_DIV.
+	 */
+	pllExtBusDiv = (cpr_primad & PRIMAD_EBCDV_MASK);
+	if (pllExtBusDiv == 0)
+		pllExtBusDiv = 16;
+
+	/*
+	 * Determine OPB_DIV.
+	 */
+	pllOpbDiv = ((cpr_primad & PRIMAD_OPBDV_MASK) >> 8);
+	if (pllOpbDiv == 0)
+		pllOpbDiv = 16;
+
+	/* There is a bug in U-Boot that prevents us from using
+	 * bd.bi_opbfreq because U-Boot doesn't populate it for
+	 * 405EZ.  We get to calculate it, yay!
+	 */
+	freqOPB = (sysclk *pllFbkDiv) /pllOpbDiv;
+
+	freqEBC = (sysclk * pllFbkDiv) / pllExtBusDiv;
+
+	plloutb = ((sysclk * ((cpr_pllc & PLLC_SRC_MASK) ?
+					   pllFwdDivB : pllFwdDiv) *
+		    pllFbkDiv) / pllFwdDivB);
+
+	np = find_node_by_alias("serial0");
+	if (getprop(np, "current-speed", &baud, sizeof(baud)) != sizeof(baud))
+		fatal("no current-speed property\n\r");
+
+	udiv = 256;			/* Assume lowest possible serial clk */
+	div = plloutb / (16 * baud); /* total divisor */
+	umin = (plloutb / freqOPB) << 1;	/* 2 x OPB divisor */
+	diff = 256;			/* highest possible */
+
+	/* i is the test udiv value -- start with the largest
+	 * possible (256) to minimize serial clock and constrain
+	 * search to umin.
+	 */
+	for (i = 256; i > umin; i--) {
+		ibdiv = div / i;
+		est = i * ibdiv;
+		idiff = (est > div) ? (est-div) : (div-est);
+		if (idiff == 0) {
+			udiv = i;
+			break;      /* can't do better */
+		} else if (idiff < diff) {
+			udiv = i;       /* best so far */
+			diff = idiff;   /* update lowest diff*/
+		}
+	}
+	freqUART = plloutb / udiv;
+
+	dt_fixup_cpu_clocks(bd.bi_procfreq, bd.bi_intfreq, bd.bi_plb_busfreq);
+	dt_fixup_clock("/plb/ebc", freqEBC);
+	dt_fixup_clock("/plb/opb", freqOPB);
+	dt_fixup_clock("/plb/opb/serial@ef600300", freqUART);
+	dt_fixup_clock("/plb/opb/serial@ef600400", freqUART);
+}
+
+static void acadia_fixups(void)
+{
+	dt_fixup_memory(bd.bi_memstart, bd.bi_memsize);
+	get_clocks();
+	dt_fixup_mac_address_by_alias("ethernet0", bd.bi_enetaddr);
+}
+	
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+		unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	platform_ops.fixups = acadia_fixups;
+	platform_ops.exit = ibm40x_dbcr_reset;
+	fdt_init(_dtb_start);
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/cuboot-amigaone.c b/arch/powerpc/boot/cuboot-amigaone.c
new file mode 100644
index 0000000000..f3b6d6236c
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-amigaone.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for AmigaOne
+ *
+ * Author: Gerhard Pircher (gerhard_pircher@gmx.net)
+ *
+ *   Based on cuboot-83xx.c
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "cuboot.h"
+
+#include "ppcboot.h"
+
+static bd_t bd;
+
+static void platform_fixups(void)
+{
+	dt_fixup_memory(bd.bi_memstart, bd.bi_memsize);
+	dt_fixup_cpu_clocks(bd.bi_intfreq, bd.bi_busfreq / 4, bd.bi_busfreq);
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                   unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	fdt_init(_dtb_start);
+	serial_console_init();
+	platform_ops.fixups = platform_fixups;
+}
diff --git a/arch/powerpc/boot/cuboot-bamboo.c b/arch/powerpc/boot/cuboot-bamboo.c
new file mode 100644
index 0000000000..a5dcf3091d
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-bamboo.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for Bamboo
+ *
+ * Author: Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ *
+ * Copyright 2007 IBM Corporation
+ *
+ * Based on cuboot-ebony.c
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "44x.h"
+#include "cuboot.h"
+
+#define TARGET_4xx
+#define TARGET_44x
+#include "ppcboot.h"
+
+static bd_t bd;
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+		unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	bamboo_init(&bd.bi_enetaddr, &bd.bi_enet1addr);
+}
diff --git a/arch/powerpc/boot/cuboot-ebony.c b/arch/powerpc/boot/cuboot-ebony.c
new file mode 100644
index 0000000000..3e602ee0e1
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-ebony.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for Ebony
+ *
+ * Author: David Gibson <david@gibson.dropbear.id.au>
+ *
+ * Copyright 2007 David Gibson, IBM Corporatio.
+ *   Based on cuboot-83xx.c, which is:
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "44x.h"
+#include "cuboot.h"
+
+#define TARGET_4xx
+#define TARGET_44x
+#include "ppcboot.h"
+
+static bd_t bd;
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                   unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	ebony_init(&bd.bi_enetaddr, &bd.bi_enet1addr);
+}
diff --git a/arch/powerpc/boot/cuboot-hotfoot.c b/arch/powerpc/boot/cuboot-hotfoot.c
new file mode 100644
index 0000000000..0e5532f855
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-hotfoot.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for Esteem 195E Hotfoot CPU Board
+ *
+ * Author: Solomon Peachy <solomon@linux-wlan.com>
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "reg.h"
+#include "dcr.h"
+#include "4xx.h"
+#include "cuboot.h"
+
+#define TARGET_4xx
+#define TARGET_HOTFOOT
+
+#include "ppcboot-hotfoot.h"
+
+static bd_t bd;
+
+#define NUM_REGS 3
+
+static void hotfoot_fixups(void)
+{
+	u32 uart = mfdcr(DCRN_CPC0_UCR) & 0x7f;
+
+	dt_fixup_memory(bd.bi_memstart, bd.bi_memsize); 
+
+	dt_fixup_cpu_clocks(bd.bi_procfreq, bd.bi_procfreq, 0);
+	dt_fixup_clock("/plb", bd.bi_plb_busfreq);
+	dt_fixup_clock("/plb/opb", bd.bi_opbfreq);
+	dt_fixup_clock("/plb/ebc", bd.bi_pci_busfreq);
+	dt_fixup_clock("/plb/opb/serial@ef600300", bd.bi_procfreq / uart); 
+	dt_fixup_clock("/plb/opb/serial@ef600400", bd.bi_procfreq / uart); 
+	
+	dt_fixup_mac_address_by_alias("ethernet0", bd.bi_enetaddr);
+	dt_fixup_mac_address_by_alias("ethernet1", bd.bi_enet1addr);
+
+	/* Is this a single eth/serial board? */
+	if ((bd.bi_enet1addr[0] == 0) && 
+	    (bd.bi_enet1addr[1] == 0) &&
+	    (bd.bi_enet1addr[2] == 0) &&
+	    (bd.bi_enet1addr[3] == 0) &&
+	    (bd.bi_enet1addr[4] == 0) &&
+	    (bd.bi_enet1addr[5] == 0)) {
+		void *devp;
+
+		printf("Trimming devtree for single serial/eth board\n");
+
+		devp = finddevice("/plb/opb/serial@ef600300");
+		if (!devp)
+			fatal("Can't find node for /plb/opb/serial@ef600300");
+		del_node(devp);
+
+		devp = finddevice("/plb/opb/ethernet@ef600900");
+		if (!devp)
+			fatal("Can't find node for /plb/opb/ethernet@ef600900");
+		del_node(devp);
+	}
+
+	ibm4xx_quiesce_eth((u32 *)0xef600800, (u32 *)0xef600900);
+
+	/* Fix up flash size in fdt for 4M boards. */
+	if (bd.bi_flashsize < 0x800000) {
+		u32 regs[NUM_REGS];
+		void *devp = finddevice("/plb/ebc/nor_flash@0");
+		if (!devp)
+			fatal("Can't find FDT node for nor_flash!??");
+
+		printf("Fixing devtree for 4M Flash\n");
+		
+		/* First fix up the base address */
+		getprop(devp, "reg", regs, sizeof(regs));
+		regs[0] = 0;
+		regs[1] = 0xffc00000;
+		regs[2] = 0x00400000;
+		setprop(devp, "reg", regs, sizeof(regs));
+		
+		/* Then the offsets */
+		devp = finddevice("/plb/ebc/nor_flash@0/partition@0");
+		if (!devp)
+			fatal("Can't find FDT node for partition@0");
+		getprop(devp, "reg", regs, 2*sizeof(u32));
+		regs[0] -= 0x400000;
+		setprop(devp, "reg", regs,  2*sizeof(u32));
+
+		devp = finddevice("/plb/ebc/nor_flash@0/partition@1");
+		if (!devp)
+			fatal("Can't find FDT node for partition@1");
+		getprop(devp, "reg", regs, 2*sizeof(u32));
+		regs[0] -= 0x400000;
+		setprop(devp, "reg", regs,  2*sizeof(u32));
+
+		devp = finddevice("/plb/ebc/nor_flash@0/partition@2");
+		if (!devp)
+			fatal("Can't find FDT node for partition@2");
+		getprop(devp, "reg", regs, 2*sizeof(u32));
+		regs[0] -= 0x400000;
+		setprop(devp, "reg", regs,  2*sizeof(u32));
+
+		devp = finddevice("/plb/ebc/nor_flash@0/partition@3");
+		if (!devp)
+			fatal("Can't find FDT node for partition@3");
+		getprop(devp, "reg", regs, 2*sizeof(u32));
+		regs[0] -= 0x400000;
+		setprop(devp, "reg", regs,  2*sizeof(u32));
+
+		devp = finddevice("/plb/ebc/nor_flash@0/partition@4");
+		if (!devp)
+			fatal("Can't find FDT node for partition@4");
+		getprop(devp, "reg", regs, 2*sizeof(u32));
+		regs[0] -= 0x400000;
+		setprop(devp, "reg", regs,  2*sizeof(u32));
+
+		devp = finddevice("/plb/ebc/nor_flash@0/partition@6");
+		if (!devp)
+			fatal("Can't find FDT node for partition@6");
+		getprop(devp, "reg", regs, 2*sizeof(u32));
+		regs[0] -= 0x400000;
+		setprop(devp, "reg", regs,  2*sizeof(u32));
+
+		/* Delete the FeatFS node */
+		devp = finddevice("/plb/ebc/nor_flash@0/partition@5");
+		if (!devp)
+			fatal("Can't find FDT node for partition@5");
+		del_node(devp);
+	}
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+		   unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	platform_ops.fixups = hotfoot_fixups;
+        platform_ops.exit = ibm40x_dbcr_reset;
+	fdt_init(_dtb_start);
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/cuboot-katmai.c b/arch/powerpc/boot/cuboot-katmai.c
new file mode 100644
index 0000000000..034a748fde
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-katmai.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for Katmai
+ *
+ * Author: Hugh Blemings <hugh@au.ibm.com>
+ *
+ * Copyright 2007 Hugh Blemings, IBM Corporation.
+ *   Based on cuboot-ebony.c which is:
+ * Copyright 2007 David Gibson, IBM Corporation.
+ *   Based on cuboot-83xx.c, which is:
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "reg.h"
+#include "dcr.h"
+#include "4xx.h"
+#include "44x.h"
+#include "cuboot.h"
+
+#define TARGET_4xx
+#define TARGET_44x
+#include "ppcboot.h"
+
+static bd_t bd;
+
+BSS_STACK(4096);
+
+static void katmai_fixups(void)
+{
+	unsigned long sysclk = 33333000;
+
+	/* 440SP Clock logic is all but identical to 440GX
+	 * so we just use that code for now at least
+	 */
+	ibm440spe_fixup_clocks(sysclk, 6 * 1843200, 0);
+
+	ibm440spe_fixup_memsize();
+
+	dt_fixup_mac_address(0, bd.bi_enetaddr);
+
+	ibm4xx_fixup_ebc_ranges("/plb/opb/ebc");
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+		   unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+
+	platform_ops.fixups = katmai_fixups;
+	fdt_init(_dtb_start);
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/cuboot-kilauea.c b/arch/powerpc/boot/cuboot-kilauea.c
new file mode 100644
index 0000000000..fda182f518
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-kilauea.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for PPC405EX. This image is already included
+ * a dtb.
+ *
+ * Author: Tiejun Chen <tiejun.chen@windriver.com>
+ *
+ * Copyright (C) 2009 Wind River Systems, Inc.
+ */
+
+#include "ops.h"
+#include "io.h"
+#include "dcr.h"
+#include "stdio.h"
+#include "4xx.h"
+#include "44x.h"
+#include "cuboot.h"
+
+#define TARGET_4xx
+#define TARGET_44x
+#include "ppcboot.h"
+
+#define KILAUEA_SYS_EXT_SERIAL_CLOCK     11059200        /* ext. 11.059MHz clk */
+
+static bd_t bd;
+
+static void kilauea_fixups(void)
+{
+	unsigned long sysclk = 33333333;
+
+	ibm405ex_fixup_clocks(sysclk, KILAUEA_SYS_EXT_SERIAL_CLOCK);
+	dt_fixup_memory(bd.bi_memstart, bd.bi_memsize);
+	ibm4xx_fixup_ebc_ranges("/plb/opb/ebc");
+	dt_fixup_mac_address_by_alias("ethernet0", bd.bi_enetaddr);
+	dt_fixup_mac_address_by_alias("ethernet1", bd.bi_enet1addr);
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+		unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	platform_ops.fixups = kilauea_fixups;
+	platform_ops.exit = ibm40x_dbcr_reset;
+	fdt_init(_dtb_start);
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/cuboot-pq2.c b/arch/powerpc/boot/cuboot-pq2.c
new file mode 100644
index 0000000000..d32765c03e
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-pq2.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for PowerQUICC II
+ * (a.k.a. 82xx with CPM, not the 8240 family of chips)
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "cuboot.h"
+#include "io.h"
+#include "fsl-soc.h"
+
+#define TARGET_CPM2
+#define TARGET_HAS_ETH1
+#include "ppcboot.h"
+
+static bd_t bd;
+
+struct cs_range {
+	u32 csnum;
+	u32 base; /* must be zero */
+	u32 addr;
+	u32 size;
+};
+
+struct pci_range {
+	u32 flags;
+	u32 pci_addr[2];
+	u32 phys_addr;
+	u32 size[2];
+};
+
+struct cs_range cs_ranges_buf[MAX_PROP_LEN / sizeof(struct cs_range)];
+struct pci_range pci_ranges_buf[MAX_PROP_LEN / sizeof(struct pci_range)];
+
+/* Different versions of u-boot put the BCSR in different places, and
+ * some don't set up the PCI PIC at all, so we assume the device tree is
+ * sane and update the BRx registers appropriately.
+ *
+ * For any node defined as compatible with fsl,pq2-localbus,
+ * #address/#size must be 2/1 for the localbus, and 1/1 for the parent bus.
+ * Ranges must be for whole chip selects.
+ */
+static void update_cs_ranges(void)
+{
+	void *bus_node, *parent_node;
+	u32 *ctrl_addr;
+	unsigned long ctrl_size;
+	u32 naddr, nsize;
+	int len;
+	int i;
+
+	bus_node = finddevice("/localbus");
+	if (!bus_node || !dt_is_compatible(bus_node, "fsl,pq2-localbus"))
+		return;
+
+	dt_get_reg_format(bus_node, &naddr, &nsize);
+	if (naddr != 2 || nsize != 1)
+		goto err;
+
+	parent_node = get_parent(bus_node);
+	if (!parent_node)
+		goto err;
+
+	dt_get_reg_format(parent_node, &naddr, &nsize);
+	if (naddr != 1 || nsize != 1)
+		goto err;
+
+	if (!dt_xlate_reg(bus_node, 0, (unsigned long *)&ctrl_addr,
+	                  &ctrl_size))
+		goto err;
+
+	len = getprop(bus_node, "ranges", cs_ranges_buf, sizeof(cs_ranges_buf));
+
+	for (i = 0; i < len / sizeof(struct cs_range); i++) {
+		u32 base, option;
+		int cs = cs_ranges_buf[i].csnum;
+		if (cs >= ctrl_size / 8)
+			goto err;
+
+		if (cs_ranges_buf[i].base != 0)
+			goto err;
+
+		base = in_be32(&ctrl_addr[cs * 2]);
+
+		/* If CS is already valid, use the existing flags.
+		 * Otherwise, guess a sane default.
+		 */
+		if (base & 1) {
+			base &= 0x7fff;
+			option = in_be32(&ctrl_addr[cs * 2 + 1]) & 0x7fff;
+		} else {
+			base = 0x1801;
+			option = 0x10;
+		}
+
+		out_be32(&ctrl_addr[cs * 2], 0);
+		out_be32(&ctrl_addr[cs * 2 + 1],
+		         option | ~(cs_ranges_buf[i].size - 1));
+		out_be32(&ctrl_addr[cs * 2], base | cs_ranges_buf[i].addr);
+	}
+
+	return;
+
+err:
+	printf("Bad /localbus node\r\n");
+}
+
+/* Older u-boots don't set PCI up properly.  Update the hardware to match
+ * the device tree.  The prefetch mem region and non-prefetch mem region
+ * must be contiguous in the host bus.  As required by the PCI binding,
+ * PCI #addr/#size must be 3/2.  The parent bus must be 1/1.  Only
+ * 32-bit PCI is supported.  All three region types (prefetchable mem,
+ * non-prefetchable mem, and I/O) must be present.
+ */
+static void fixup_pci(void)
+{
+	struct pci_range *mem = NULL, *mmio = NULL,
+	                 *io = NULL, *mem_base = NULL;
+	u32 *pci_regs[3];
+	u8 *soc_regs;
+	int i, len;
+	void *node, *parent_node;
+	u32 naddr, nsize, mem_pow2, mem_mask;
+
+	node = finddevice("/pci");
+	if (!node || !dt_is_compatible(node, "fsl,pq2-pci"))
+		return;
+
+	for (i = 0; i < 3; i++)
+		if (!dt_xlate_reg(node, i,
+		                  (unsigned long *)&pci_regs[i], NULL))
+			goto err;
+
+	soc_regs = (u8 *)fsl_get_immr();
+	if (!soc_regs)
+		goto unhandled;
+
+	dt_get_reg_format(node, &naddr, &nsize);
+	if (naddr != 3 || nsize != 2)
+		goto err;
+
+	parent_node = get_parent(node);
+	if (!parent_node)
+		goto err;
+
+	dt_get_reg_format(parent_node, &naddr, &nsize);
+	if (naddr != 1 || nsize != 1)
+		goto unhandled;
+
+	len = getprop(node, "ranges", pci_ranges_buf,
+	              sizeof(pci_ranges_buf));
+
+	for (i = 0; i < len / sizeof(struct pci_range); i++) {
+		u32 flags = pci_ranges_buf[i].flags & 0x43000000;
+
+		if (flags == 0x42000000)
+			mem = &pci_ranges_buf[i];
+		else if (flags == 0x02000000)
+			mmio = &pci_ranges_buf[i];
+		else if (flags == 0x01000000)
+			io = &pci_ranges_buf[i];
+	}
+
+	if (!mem || !mmio || !io)
+		goto unhandled;
+	if (mem->size[1] != mmio->size[1])
+		goto unhandled;
+	if (mem->size[1] & (mem->size[1] - 1))
+		goto unhandled;
+	if (io->size[1] & (io->size[1] - 1))
+		goto unhandled;
+
+	if (mem->phys_addr + mem->size[1] == mmio->phys_addr)
+		mem_base = mem;
+	else if (mmio->phys_addr + mmio->size[1] == mem->phys_addr)
+		mem_base = mmio;
+	else
+		goto unhandled;
+
+	out_be32(&pci_regs[1][0], mem_base->phys_addr | 1);
+	out_be32(&pci_regs[2][0], ~(mem->size[1] + mmio->size[1] - 1));
+
+	out_be32(&pci_regs[1][1], io->phys_addr | 1);
+	out_be32(&pci_regs[2][1], ~(io->size[1] - 1));
+
+	out_le32(&pci_regs[0][0], mem->pci_addr[1] >> 12);
+	out_le32(&pci_regs[0][2], mem->phys_addr >> 12);
+	out_le32(&pci_regs[0][4], (~(mem->size[1] - 1) >> 12) | 0xa0000000);
+
+	out_le32(&pci_regs[0][6], mmio->pci_addr[1] >> 12);
+	out_le32(&pci_regs[0][8], mmio->phys_addr >> 12);
+	out_le32(&pci_regs[0][10], (~(mmio->size[1] - 1) >> 12) | 0x80000000);
+
+	out_le32(&pci_regs[0][12], io->pci_addr[1] >> 12);
+	out_le32(&pci_regs[0][14], io->phys_addr >> 12);
+	out_le32(&pci_regs[0][16], (~(io->size[1] - 1) >> 12) | 0xc0000000);
+
+	/* Inbound translation */
+	out_le32(&pci_regs[0][58], 0);
+	out_le32(&pci_regs[0][60], 0);
+
+	mem_pow2 = 1 << (__ilog2_u32(bd.bi_memsize - 1) + 1);
+	mem_mask = ~(mem_pow2 - 1) >> 12;
+	out_le32(&pci_regs[0][62], 0xa0000000 | mem_mask);
+
+	/* If PCI is disabled, drive RST high to enable. */
+	if (!(in_le32(&pci_regs[0][32]) & 1)) {
+		 /* Tpvrh (Power valid to RST# high) 100 ms */
+		udelay(100000);
+
+		out_le32(&pci_regs[0][32], 1);
+
+		/* Trhfa (RST# high to first cfg access) 2^25 clocks */
+		udelay(1020000);
+	}
+
+	/* Enable bus master and memory access */
+	out_le32(&pci_regs[0][64], 0x80000004);
+	out_le32(&pci_regs[0][65], in_le32(&pci_regs[0][65]) | 6);
+
+	/* Park the bus on PCI, and elevate PCI's arbitration priority,
+	 * as required by section 9.6 of the user's manual.
+	 */
+	out_8(&soc_regs[0x10028], 3);
+	out_be32((u32 *)&soc_regs[0x1002c], 0x01236745);
+
+	return;
+
+err:
+	printf("Bad PCI node -- using existing firmware setup.\r\n");
+	return;
+
+unhandled:
+	printf("Unsupported PCI node -- using existing firmware setup.\r\n");
+}
+
+static void pq2_platform_fixups(void)
+{
+	void *node;
+
+	dt_fixup_memory(bd.bi_memstart, bd.bi_memsize);
+	dt_fixup_mac_addresses(bd.bi_enetaddr, bd.bi_enet1addr);
+	dt_fixup_cpu_clocks(bd.bi_intfreq, bd.bi_busfreq / 4, bd.bi_busfreq);
+
+	node = finddevice("/soc/cpm");
+	if (node)
+		setprop(node, "clock-frequency", &bd.bi_cpmfreq, 4);
+
+	node = finddevice("/soc/cpm/brg");
+	if (node)
+		setprop(node, "clock-frequency",  &bd.bi_brgfreq, 4);
+
+	update_cs_ranges();
+	fixup_pci();
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                   unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	fdt_init(_dtb_start);
+	serial_console_init();
+	platform_ops.fixups = pq2_platform_fixups;
+}
diff --git a/arch/powerpc/boot/cuboot-rainier.c b/arch/powerpc/boot/cuboot-rainier.c
new file mode 100644
index 0000000000..046478544a
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-rainier.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for Rainier
+ *
+ * Valentine Barshak <vbarshak@ru.mvista.com>
+ * Copyright 2007 MontaVista Software, Inc
+ *
+ * Based on Ebony code by David Gibson <david@gibson.dropbear.id.au>
+ * Copyright IBM Corporation, 2007
+ *
+ * Based on Bamboo code by Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ * Copyright IBM Corporation, 2007
+ */
+
+#include <stdarg.h>
+#include <stddef.h>
+#include "types.h"
+#include "elf.h"
+#include "string.h"
+#include "stdio.h"
+#include "page.h"
+#include "ops.h"
+#include "dcr.h"
+#include "4xx.h"
+#include "44x.h"
+#include "cuboot.h"
+
+#define TARGET_4xx
+#define TARGET_44x
+#include "ppcboot.h"
+
+static bd_t bd;
+
+
+static void rainier_fixups(void)
+{
+	unsigned long sysclk = 33333333;
+
+	ibm440ep_fixup_clocks(sysclk, 11059200, 50000000);
+	ibm4xx_fixup_ebc_ranges("/plb/opb/ebc");
+	ibm4xx_denali_fixup_memsize();
+	dt_fixup_mac_address_by_alias("ethernet0", bd.bi_enetaddr);
+	dt_fixup_mac_address_by_alias("ethernet1", bd.bi_enet1addr);
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                   unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	platform_ops.fixups = rainier_fixups;
+	platform_ops.exit = ibm44x_dbcr_reset;
+	fdt_init(_dtb_start);
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/cuboot-sam440ep.c b/arch/powerpc/boot/cuboot-sam440ep.c
new file mode 100644
index 0000000000..d875119e3c
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-sam440ep.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for Sam440ep based off bamboo.c code
+ * original copyrights below
+ *
+ * Author: Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ *
+ * Copyright 2007 IBM Corporation
+ *
+ * Based on cuboot-ebony.c
+ *
+ * Modified from cuboot-bamboo.c for sam440ep:
+ * Copyright 2008 Giuseppe Coviello <gicoviello@gmail.com>
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "44x.h"
+#include "4xx.h"
+#include "cuboot.h"
+
+#define TARGET_4xx
+#define TARGET_44x
+#include "ppcboot.h"
+
+static bd_t bd;
+
+static void sam440ep_fixups(void)
+{
+       unsigned long sysclk = 66666666;
+
+       ibm440ep_fixup_clocks(sysclk, 11059200, 25000000);
+       ibm4xx_sdram_fixup_memsize();
+       ibm4xx_quiesce_eth((u32 *)0xef600e00, (u32 *)0xef600f00);
+       dt_fixup_mac_addresses(&bd.bi_enetaddr, &bd.bi_enet1addr);
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+		unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	platform_ops.fixups = sam440ep_fixups;
+	platform_ops.exit = ibm44x_dbcr_reset;
+	fdt_init(_dtb_start);
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/cuboot-sequoia.c b/arch/powerpc/boot/cuboot-sequoia.c
new file mode 100644
index 0000000000..e0285c20e3
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-sequoia.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for Sequoia
+ *
+ * Valentine Barshak <vbarshak@ru.mvista.com>
+ * Copyright 2007 MontaVista Software, Inc
+ *
+ * Based on Ebony code by David Gibson <david@gibson.dropbear.id.au>
+ * Copyright IBM Corporation, 2007
+ *
+ * Based on Bamboo code by Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ * Copyright IBM Corporation, 2007
+ */
+
+#include <stdarg.h>
+#include <stddef.h>
+#include "types.h"
+#include "elf.h"
+#include "string.h"
+#include "stdio.h"
+#include "page.h"
+#include "ops.h"
+#include "dcr.h"
+#include "4xx.h"
+#include "44x.h"
+#include "cuboot.h"
+
+#define TARGET_4xx
+#define TARGET_44x
+#include "ppcboot.h"
+
+static bd_t bd;
+
+
+static void sequoia_fixups(void)
+{
+	unsigned long sysclk = 33333333;
+
+	ibm440ep_fixup_clocks(sysclk, 11059200, 50000000);
+	ibm4xx_fixup_ebc_ranges("/plb/opb/ebc");
+	ibm4xx_denali_fixup_memsize();
+	dt_fixup_mac_address_by_alias("ethernet0", bd.bi_enetaddr);
+	dt_fixup_mac_address_by_alias("ethernet1", bd.bi_enet1addr);
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                   unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	platform_ops.fixups = sequoia_fixups;
+	platform_ops.exit = ibm44x_dbcr_reset;
+	fdt_init(_dtb_start);
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/cuboot-taishan.c b/arch/powerpc/boot/cuboot-taishan.c
new file mode 100644
index 0000000000..3d40670b24
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-taishan.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for Taishan
+ *
+ * Author: Hugh Blemings <hugh@au.ibm.com>
+ *
+ * Copyright 2007 Hugh Blemings, IBM Corporation.
+ *   Based on cuboot-ebony.c which is:
+ * Copyright 2007 David Gibson, IBM Corporation.
+ *   Based on cuboot-83xx.c, which is:
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "cuboot.h"
+#include "reg.h"
+#include "dcr.h"
+#include "4xx.h"
+
+#define TARGET_4xx
+#define TARGET_44x
+#define TARGET_440GX
+#include "ppcboot.h"
+
+static bd_t bd;
+
+BSS_STACK(4096);
+
+static void taishan_fixups(void)
+{
+	/* FIXME: sysclk should be derived by reading the FPGA
+	   registers */
+	unsigned long sysclk = 33000000;
+
+	ibm440gx_fixup_clocks(sysclk, 6 * 1843200, 25000000);
+
+	ibm4xx_sdram_fixup_memsize();
+
+	dt_fixup_mac_address_by_alias("ethernet0", bd.bi_enetaddr);
+	dt_fixup_mac_address_by_alias("ethernet1", bd.bi_enet1addr);
+
+	ibm4xx_fixup_ebc_ranges("/plb/opb/ebc");
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+		   unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+
+	platform_ops.fixups = taishan_fixups;
+	fdt_init(_dtb_start);
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/cuboot-warp.c b/arch/powerpc/boot/cuboot-warp.c
new file mode 100644
index 0000000000..1ec0fa2848
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-warp.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2008 PIKA Technologies
+ *   Sean MacLennan <smaclennan@pikatech.com>
+ */
+
+#include "ops.h"
+#include "4xx.h"
+#include "cuboot.h"
+#include "stdio.h"
+
+#define TARGET_4xx
+#define TARGET_44x
+#include "ppcboot.h"
+
+static bd_t bd;
+
+static void warp_fixups(void)
+{
+	ibm440ep_fixup_clocks(66000000, 11059200, 50000000);
+	ibm4xx_sdram_fixup_memsize();
+	ibm4xx_fixup_ebc_ranges("/plb/opb/ebc");
+	dt_fixup_mac_address_by_alias("ethernet0", bd.bi_enetaddr);
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+		   unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+
+	platform_ops.fixups = warp_fixups;
+	platform_ops.exit = ibm44x_dbcr_reset;
+	fdt_init(_dtb_start);
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/cuboot-yosemite.c b/arch/powerpc/boot/cuboot-yosemite.c
new file mode 100644
index 0000000000..ce3fdb7379
--- /dev/null
+++ b/arch/powerpc/boot/cuboot-yosemite.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for Yosemite
+ *
+ * Author: Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ *
+ * Copyright 2008 IBM Corporation
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "4xx.h"
+#include "44x.h"
+#include "cuboot.h"
+
+#define TARGET_4xx
+#define TARGET_44x
+#include "ppcboot.h"
+
+static bd_t bd;
+
+static void yosemite_fixups(void)
+{
+	unsigned long sysclk = 66666666;
+
+	ibm440ep_fixup_clocks(sysclk, 11059200, 50000000);
+	ibm4xx_sdram_fixup_memsize();
+	ibm4xx_quiesce_eth((u32 *)0xef600e00, (u32 *)0xef600f00);
+	dt_fixup_mac_address_by_alias("ethernet0", bd.bi_enetaddr);
+	dt_fixup_mac_address_by_alias("ethernet1", bd.bi_enet1addr);
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+		unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	platform_ops.fixups = yosemite_fixups;
+	platform_ops.exit = ibm44x_dbcr_reset;
+	fdt_init(_dtb_start);
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/cuboot.c b/arch/powerpc/boot/cuboot.c
new file mode 100644
index 0000000000..7f186658ff
--- /dev/null
+++ b/arch/powerpc/boot/cuboot.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Compatibility for old (not device tree aware) U-Boot versions
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ * Consolidated using macros by David Gibson <david@gibson.dropbear.id.au>
+ *
+ * Copyright 2007 David Gibson, IBM Corporation.
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+
+#include "ppcboot.h"
+
+void cuboot_init(unsigned long r4, unsigned long r5,
+		 unsigned long r6, unsigned long r7,
+		 unsigned long end_of_ram)
+{
+	unsigned long avail_ram = end_of_ram - (unsigned long)_end;
+
+	loader_info.initrd_addr = r4;
+	loader_info.initrd_size = r4 ? r5 - r4 : 0;
+	loader_info.cmdline = (char *)r6;
+	loader_info.cmdline_len = r7 - r6;
+
+	simple_alloc_init(_end, avail_ram - 1024*1024, 32, 64);
+}
diff --git a/arch/powerpc/boot/cuboot.h b/arch/powerpc/boot/cuboot.h
new file mode 100644
index 0000000000..c2b2c58eaa
--- /dev/null
+++ b/arch/powerpc/boot/cuboot.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_BOOT_CUBOOT_H_
+#define _PPC_BOOT_CUBOOT_H_
+
+void cuboot_init(unsigned long r4, unsigned long r5,
+		 unsigned long r6, unsigned long r7,
+		 unsigned long end_of_ram);
+
+#define CUBOOT_INIT() \
+	do { \
+		memcpy(&bd, (bd_t *)r3, sizeof(bd)); \
+		cuboot_init(r4, r5, r6, r7, bd.bi_memstart + bd.bi_memsize); \
+	} while (0)
+
+#endif /* _PPC_BOOT_CUBOOT_H_ */
diff --git a/arch/powerpc/boot/dcr.h b/arch/powerpc/boot/dcr.h
new file mode 100644
index 0000000000..334ab8b5a6
--- /dev/null
+++ b/arch/powerpc/boot/dcr.h
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_BOOT_DCR_H_
+#define _PPC_BOOT_DCR_H_
+
+#define mfdcr(rn) \
+	({	\
+		unsigned long rval; \
+		asm volatile("mfdcr %0,%1" : "=r"(rval) : "i"(rn)); \
+		rval; \
+	})
+#define mtdcr(rn, val) \
+	asm volatile("mtdcr %0,%1" : : "i"(rn), "r"(val))
+#define mfdcrx(rn) \
+	({	\
+		unsigned long rval; \
+		asm volatile("mfdcrx %0,%1" : "=r"(rval) : "r"(rn)); \
+		rval; \
+	})
+#define mtdcrx(rn, val) \
+	({	\
+		asm volatile("mtdcrx %0,%1" : : "r"(rn), "r" (val)); \
+	})
+
+/* 440GP/440GX SDRAM controller DCRs */
+#define DCRN_SDRAM0_CFGADDR				0x010
+#define DCRN_SDRAM0_CFGDATA				0x011
+
+#define SDRAM0_READ(offset) ({\
+	mtdcr(DCRN_SDRAM0_CFGADDR, offset); \
+	mfdcr(DCRN_SDRAM0_CFGDATA); })
+#define SDRAM0_WRITE(offset, data) ({\
+	mtdcr(DCRN_SDRAM0_CFGADDR, offset); \
+	mtdcr(DCRN_SDRAM0_CFGDATA, data); })
+
+#define 	SDRAM0_B0CR				0x40
+#define 	SDRAM0_B1CR				0x44
+#define 	SDRAM0_B2CR				0x48
+#define 	SDRAM0_B3CR				0x4c
+
+static const unsigned long sdram_bxcr[] = { SDRAM0_B0CR, SDRAM0_B1CR,
+					    SDRAM0_B2CR, SDRAM0_B3CR };
+
+#define			SDRAM_CONFIG_BANK_ENABLE        0x00000001
+#define			SDRAM_CONFIG_SIZE_MASK          0x000e0000
+#define			SDRAM_CONFIG_BANK_SIZE(reg)	\
+	(0x00400000 << ((reg & SDRAM_CONFIG_SIZE_MASK) >> 17))
+
+/* 440GP External Bus Controller (EBC) */
+#define DCRN_EBC0_CFGADDR				0x012
+#define DCRN_EBC0_CFGDATA				0x013
+#define   EBC_NUM_BANKS					  8
+#define   EBC_B0CR					  0x00
+#define   EBC_B1CR					  0x01
+#define   EBC_B2CR					  0x02
+#define   EBC_B3CR					  0x03
+#define   EBC_B4CR					  0x04
+#define   EBC_B5CR					  0x05
+#define   EBC_B6CR					  0x06
+#define   EBC_B7CR					  0x07
+#define   EBC_BXCR(n)					  (n)
+#define	    EBC_BXCR_BAS				    0xfff00000
+#define	    EBC_BXCR_BS				  	    0x000e0000
+#define	    EBC_BXCR_BANK_SIZE(reg) \
+	(0x100000 << (((reg) & EBC_BXCR_BS) >> 17))
+#define	    EBC_BXCR_BU				  	    0x00018000
+#define	      EBC_BXCR_BU_OFF			  	      0x00000000
+#define	      EBC_BXCR_BU_RO			  	      0x00008000
+#define	      EBC_BXCR_BU_WO			  	      0x00010000
+#define	      EBC_BXCR_BU_RW			  	      0x00018000
+#define	    EBC_BXCR_BW				  	    0x00006000
+#define   EBC_B0AP					  0x10
+#define   EBC_B1AP					  0x11
+#define   EBC_B2AP					  0x12
+#define   EBC_B3AP					  0x13
+#define   EBC_B4AP					  0x14
+#define   EBC_B5AP					  0x15
+#define   EBC_B6AP					  0x16
+#define   EBC_B7AP					  0x17
+#define   EBC_BXAP(n)					  (0x10+(n))
+#define   EBC_BEAR					  0x20
+#define   EBC_BESR					  0x21
+#define   EBC_CFG					  0x23
+#define   EBC_CID					  0x24
+
+/* 440GP Clock, PM, chip control */
+#define DCRN_CPC0_SR					0x0b0
+#define DCRN_CPC0_ER					0x0b1
+#define DCRN_CPC0_FR					0x0b2
+#define DCRN_CPC0_SYS0					0x0e0
+#define	  CPC0_SYS0_TUNE				  0xffc00000
+#define	  CPC0_SYS0_FBDV_MASK				  0x003c0000
+#define	  CPC0_SYS0_FWDVA_MASK				  0x00038000
+#define	  CPC0_SYS0_FWDVB_MASK				  0x00007000
+#define	  CPC0_SYS0_OPDV_MASK				  0x00000c00
+#define	  CPC0_SYS0_EPDV_MASK				  0x00000300
+/* Helper macros to compute the actual clock divider values from the
+ * encodings in the CPC0 register */
+#define	  CPC0_SYS0_FBDV(reg) \
+		((((((reg) & CPC0_SYS0_FBDV_MASK) >> 18) - 1) & 0xf) + 1)
+#define	  CPC0_SYS0_FWDVA(reg) \
+		(8 - (((reg) & CPC0_SYS0_FWDVA_MASK) >> 15))
+#define	  CPC0_SYS0_FWDVB(reg) \
+		(8 - (((reg) & CPC0_SYS0_FWDVB_MASK) >> 12))
+#define	  CPC0_SYS0_OPDV(reg) \
+		((((reg) & CPC0_SYS0_OPDV_MASK) >> 10) + 1)
+#define	  CPC0_SYS0_EPDV(reg) \
+		((((reg) & CPC0_SYS0_EPDV_MASK) >> 8) + 1)
+#define	  CPC0_SYS0_EXTSL				  0x00000080
+#define	  CPC0_SYS0_RW_MASK				  0x00000060
+#define	  CPC0_SYS0_RL					  0x00000010
+#define	  CPC0_SYS0_ZMIISL_MASK				  0x0000000c
+#define	  CPC0_SYS0_BYPASS				  0x00000002
+#define	  CPC0_SYS0_NTO1				  0x00000001
+#define DCRN_CPC0_SYS1					0x0e1
+#define DCRN_CPC0_CUST0					0x0e2
+#define DCRN_CPC0_CUST1					0x0e3
+#define DCRN_CPC0_STRP0					0x0e4
+#define DCRN_CPC0_STRP1					0x0e5
+#define DCRN_CPC0_STRP2					0x0e6
+#define DCRN_CPC0_STRP3					0x0e7
+#define DCRN_CPC0_GPIO					0x0e8
+#define DCRN_CPC0_PLB					0x0e9
+#define DCRN_CPC0_CR1					0x0ea
+#define DCRN_CPC0_CR0					0x0eb
+#define	  CPC0_CR0_SWE					  0x80000000
+#define	  CPC0_CR0_CETE					  0x40000000
+#define	  CPC0_CR0_U1FCS				  0x20000000
+#define	  CPC0_CR0_U0DTE				  0x10000000
+#define	  CPC0_CR0_U0DRE				  0x08000000
+#define	  CPC0_CR0_U0DC					  0x04000000
+#define	  CPC0_CR0_U1DTE				  0x02000000
+#define	  CPC0_CR0_U1DRE				  0x01000000
+#define	  CPC0_CR0_U1DC					  0x00800000
+#define	  CPC0_CR0_U0EC					  0x00400000
+#define	  CPC0_CR0_U1EC					  0x00200000
+#define	  CPC0_CR0_UDIV_MASK				  0x001f0000
+#define	  CPC0_CR0_UDIV(reg) \
+		((((reg) & CPC0_CR0_UDIV_MASK) >> 16) + 1)
+#define DCRN_CPC0_MIRQ0					0x0ec
+#define DCRN_CPC0_MIRQ1					0x0ed
+#define DCRN_CPC0_JTAGID				0x0ef
+
+#define DCRN_MAL0_CFG					0x180
+#define MAL_RESET 0x80000000
+
+/* 440EP Clock/Power-on Reset regs */
+#define DCRN_CPR0_ADDR	0xc
+#define DCRN_CPR0_DATA	0xd
+#define CPR0_PLLD0	0x60
+#define CPR0_OPBD0	0xc0
+#define CPR0_PERD0	0xe0
+#define CPR0_PRIMBD0	0xa0
+#define CPR0_SCPID	0x120
+#define CPR0_PLLC0	0x40
+
+/* 405GP Clocking/Power Management/Chip Control regs */
+#define DCRN_CPC0_PLLMR 0xb0
+#define DCRN_405_CPC0_CR0 0xb1
+#define DCRN_405_CPC0_CR1 0xb2
+#define DCRN_405_CPC0_PSR 0xb4
+
+/* 405EP Clocking/Power Management/Chip Control regs */
+#define DCRN_CPC0_PLLMR0  0xf0
+#define DCRN_CPC0_PLLMR1  0xf4
+#define DCRN_CPC0_UCR     0xf5
+
+/* 440GX/405EX Clock Control reg */
+#define DCRN_CPR0_CLKUPD				0x020
+#define DCRN_CPR0_PLLC					0x040
+#define DCRN_CPR0_PLLD					0x060
+#define DCRN_CPR0_PRIMAD				0x080
+#define DCRN_CPR0_PRIMBD				0x0a0
+#define DCRN_CPR0_OPBD					0x0c0
+#define DCRN_CPR0_PERD					0x0e0
+#define DCRN_CPR0_MALD					0x100
+
+#define DCRN_SDR0_CONFIG_ADDR 	0xe
+#define DCRN_SDR0_CONFIG_DATA	0xf
+
+/* SDR read/write helper macros */
+#define SDR0_READ(offset) ({\
+	mtdcr(DCRN_SDR0_CONFIG_ADDR, offset); \
+	mfdcr(DCRN_SDR0_CONFIG_DATA); })
+#define SDR0_WRITE(offset, data) ({\
+	mtdcr(DCRN_SDR0_CONFIG_ADDR, offset); \
+	mtdcr(DCRN_SDR0_CONFIG_DATA, data); })
+
+#define DCRN_SDR0_UART0		0x0120
+#define DCRN_SDR0_UART1		0x0121
+#define DCRN_SDR0_UART2		0x0122
+#define DCRN_SDR0_UART3		0x0123
+
+
+/* CPRs read/write helper macros - based off include/asm-ppc/ibm44x.h */
+
+#define DCRN_CPR0_CFGADDR				0xc
+#define DCRN_CPR0_CFGDATA				0xd
+
+#define CPR0_READ(offset) ({\
+	mtdcr(DCRN_CPR0_CFGADDR, offset); \
+	mfdcr(DCRN_CPR0_CFGDATA); })
+#define CPR0_WRITE(offset, data) ({\
+	mtdcr(DCRN_CPR0_CFGADDR, offset); \
+	mtdcr(DCRN_CPR0_CFGDATA, data); })
+
+
+
+#endif	/* _PPC_BOOT_DCR_H_ */
diff --git a/arch/powerpc/boot/decompress.c b/arch/powerpc/boot/decompress.c
new file mode 100644
index 0000000000..977eb15a6d
--- /dev/null
+++ b/arch/powerpc/boot/decompress.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Wrapper around the kernel's pre-boot decompression library.
+ *
+ * Copyright (C) IBM Corporation 2016.
+ */
+
+#include "elf.h"
+#include "page.h"
+#include "string.h"
+#include "stdio.h"
+#include "ops.h"
+#include "reg.h"
+#include "types.h"
+
+/*
+ * The decompressor_*.c files play #ifdef games so they can be used in both
+ * pre-boot and regular kernel code. We need these definitions to make the
+ * includes work.
+ */
+
+#define STATIC static
+#define INIT
+
+/*
+ * The build process will copy the required zlib source files and headers
+ * out of lib/ and "fix" the includes so they do not pull in other kernel
+ * headers.
+ */
+
+#ifdef CONFIG_KERNEL_GZIP
+#	include "decompress_inflate.c"
+#endif
+
+#ifdef CONFIG_KERNEL_XZ
+#	include "xz_config.h"
+#	include "../../../lib/decompress_unxz.c"
+#endif
+
+/* globals for tracking the state of the decompression */
+static unsigned long decompressed_bytes;
+static unsigned long limit;
+static unsigned long skip;
+static char *output_buffer;
+
+/*
+ * flush() is called by __decompress() when the decompressor's scratch buffer is
+ * full.
+ */
+static long flush(void *v, unsigned long buffer_size)
+{
+	unsigned long end = decompressed_bytes + buffer_size;
+	unsigned long size = buffer_size;
+	unsigned long offset = 0;
+	char *in = v;
+	char *out;
+
+	/*
+	 * if we hit our decompression limit, we need to fake an error to abort
+	 * the in-progress decompression.
+	 */
+	if (decompressed_bytes >= limit)
+		return -1;
+
+	/* skip this entire block */
+	if (end <= skip) {
+		decompressed_bytes += buffer_size;
+		return buffer_size;
+	}
+
+	/* skip some data at the start, but keep the rest of the block */
+	if (decompressed_bytes < skip && end > skip) {
+		offset = skip - decompressed_bytes;
+
+		in += offset;
+		size -= offset;
+		decompressed_bytes += offset;
+	}
+
+	out = &output_buffer[decompressed_bytes - skip];
+	size = min(decompressed_bytes + size, limit) - decompressed_bytes;
+
+	memcpy(out, in, size);
+	decompressed_bytes += size;
+
+	return buffer_size;
+}
+
+static void print_err(char *s)
+{
+	/* suppress the "error" when we terminate the decompressor */
+	if (decompressed_bytes >= limit)
+		return;
+
+	printf("Decompression error: '%s'\n\r", s);
+}
+
+/**
+ * partial_decompress - decompresses part or all of a compressed buffer
+ * @inbuf:       input buffer
+ * @input_size:  length of the input buffer
+ * @outbuf:      output buffer
+ * @output_size: length of the output buffer
+ * @skip         number of output bytes to ignore
+ *
+ * This function takes compressed data from inbuf, decompresses and write it to
+ * outbuf. Once output_size bytes are written to the output buffer, or the
+ * stream is exhausted the function will return the number of bytes that were
+ * decompressed. Otherwise it will return whatever error code the decompressor
+ * reported (NB: This is specific to each decompressor type).
+ *
+ * The skip functionality is mainly there so the program and discover
+ * the size of the compressed image so that it can ask firmware (if present)
+ * for an appropriately sized buffer.
+ */
+long partial_decompress(void *inbuf, unsigned long input_size,
+	void *outbuf, unsigned long output_size, unsigned long _skip)
+{
+	int ret;
+
+	/*
+	 * The skipped bytes needs to be included in the size of data we want
+	 * to decompress.
+	 */
+	output_size += _skip;
+
+	decompressed_bytes = 0;
+	output_buffer = outbuf;
+	limit = output_size;
+	skip = _skip;
+
+	ret = __decompress(inbuf, input_size, NULL, flush, outbuf,
+		output_size, NULL, print_err);
+
+	/*
+	 * If decompression was aborted due to an actual error rather than
+	 * a fake error that we used to abort, then we should report it.
+	 */
+	if (decompressed_bytes < limit)
+		return ret;
+
+	return decompressed_bytes - skip;
+}
diff --git a/arch/powerpc/boot/devtree.c b/arch/powerpc/boot/devtree.c
new file mode 100644
index 0000000000..58fbcfcc98
--- /dev/null
+++ b/arch/powerpc/boot/devtree.c
@@ -0,0 +1,377 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * devtree.c - convenience functions for device tree manipulation
+ * Copyright 2007 David Gibson, IBM Corporation.
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ *
+ * Authors: David Gibson <david@gibson.dropbear.id.au>
+ *	    Scott Wood <scottwood@freescale.com>
+ */
+#include <stdarg.h>
+#include <stddef.h>
+#include "types.h"
+#include "string.h"
+#include "stdio.h"
+#include "ops.h"
+#include "of.h"
+
+void dt_fixup_memory(u64 start, u64 size)
+{
+	void *root, *memory;
+	int naddr, nsize, i;
+	u32 memreg[4];
+
+	root = finddevice("/");
+	if (getprop(root, "#address-cells", &naddr, sizeof(naddr)) < 0)
+		naddr = 2;
+	else
+		naddr = be32_to_cpu(naddr);
+	if (naddr < 1 || naddr > 2)
+		fatal("Can't cope with #address-cells == %d in /\n\r", naddr);
+
+	if (getprop(root, "#size-cells", &nsize, sizeof(nsize)) < 0)
+		nsize = 1;
+	else
+		nsize = be32_to_cpu(nsize);
+	if (nsize < 1 || nsize > 2)
+		fatal("Can't cope with #size-cells == %d in /\n\r", nsize);
+
+	i = 0;
+	if (naddr == 2)
+		memreg[i++] = cpu_to_be32(start >> 32);
+	memreg[i++] = cpu_to_be32(start & 0xffffffff);
+	if (nsize == 2)
+		memreg[i++] = cpu_to_be32(size >> 32);
+	memreg[i++] = cpu_to_be32(size & 0xffffffff);
+
+	memory = finddevice("/memory");
+	if (! memory) {
+		memory = create_node(NULL, "memory");
+		setprop_str(memory, "device_type", "memory");
+	}
+
+	printf("Memory <- <0x%x", be32_to_cpu(memreg[0]));
+	for (i = 1; i < (naddr + nsize); i++)
+		printf(" 0x%x", be32_to_cpu(memreg[i]));
+	printf("> (%ldMB)\n\r", (unsigned long)(size >> 20));
+
+	setprop(memory, "reg", memreg, (naddr + nsize)*sizeof(u32));
+}
+
+#define MHZ(x)	((x + 500000) / 1000000)
+
+void dt_fixup_cpu_clocks(u32 cpu, u32 tb, u32 bus)
+{
+	void *devp = NULL;
+
+	printf("CPU clock-frequency <- 0x%x (%dMHz)\n\r", cpu, MHZ(cpu));
+	printf("CPU timebase-frequency <- 0x%x (%dMHz)\n\r", tb, MHZ(tb));
+	if (bus > 0)
+		printf("CPU bus-frequency <- 0x%x (%dMHz)\n\r", bus, MHZ(bus));
+
+	while ((devp = find_node_by_devtype(devp, "cpu"))) {
+		setprop_val(devp, "clock-frequency", cpu_to_be32(cpu));
+		setprop_val(devp, "timebase-frequency", cpu_to_be32(tb));
+		if (bus > 0)
+			setprop_val(devp, "bus-frequency", cpu_to_be32(bus));
+	}
+
+	timebase_period_ns = 1000000000 / tb;
+}
+
+void dt_fixup_clock(const char *path, u32 freq)
+{
+	void *devp = finddevice(path);
+
+	if (devp) {
+		printf("%s: clock-frequency <- %x (%dMHz)\n\r", path, freq, MHZ(freq));
+		setprop_val(devp, "clock-frequency", cpu_to_be32(freq));
+	}
+}
+
+void dt_fixup_mac_address_by_alias(const char *alias, const u8 *addr)
+{
+	void *devp = find_node_by_alias(alias);
+
+	if (devp) {
+		printf("%s: local-mac-address <-"
+		       " %02x:%02x:%02x:%02x:%02x:%02x\n\r", alias,
+		       addr[0], addr[1], addr[2],
+		       addr[3], addr[4], addr[5]);
+
+		setprop(devp, "local-mac-address", addr, 6);
+	}
+}
+
+void dt_fixup_mac_address(u32 index, const u8 *addr)
+{
+	void *devp = find_node_by_prop_value(NULL, "linux,network-index",
+	                                     (void*)&index, sizeof(index));
+
+	if (devp) {
+		printf("ENET%d: local-mac-address <-"
+		       " %02x:%02x:%02x:%02x:%02x:%02x\n\r", index,
+		       addr[0], addr[1], addr[2],
+		       addr[3], addr[4], addr[5]);
+
+		setprop(devp, "local-mac-address", addr, 6);
+	}
+}
+
+void __dt_fixup_mac_addresses(u32 startindex, ...)
+{
+	va_list ap;
+	u32 index = startindex;
+	const u8 *addr;
+
+	va_start(ap, startindex);
+
+	while ((addr = va_arg(ap, const u8 *)))
+		dt_fixup_mac_address(index++, addr);
+
+	va_end(ap);
+}
+
+#define MAX_ADDR_CELLS 4
+
+void dt_get_reg_format(void *node, u32 *naddr, u32 *nsize)
+{
+	if (getprop(node, "#address-cells", naddr, 4) != 4)
+		*naddr = 2;
+	else
+		*naddr = be32_to_cpu(*naddr);
+	if (getprop(node, "#size-cells", nsize, 4) != 4)
+		*nsize = 1;
+	else
+		*nsize = be32_to_cpu(*nsize);
+}
+
+static void copy_val(u32 *dest, u32 *src, int naddr)
+{
+	int pad = MAX_ADDR_CELLS - naddr;
+
+	memset(dest, 0, pad * 4);
+	memcpy(dest + pad, src, naddr * 4);
+}
+
+static int sub_reg(u32 *reg, u32 *sub)
+{
+	int i, borrow = 0;
+
+	for (i = MAX_ADDR_CELLS - 1; i >= 0; i--) {
+		int prev_borrow = borrow;
+		borrow = reg[i] < sub[i] + prev_borrow;
+		reg[i] -= sub[i] + prev_borrow;
+	}
+
+	return !borrow;
+}
+
+static int add_reg(u32 *reg, u32 *add, int naddr)
+{
+	int i, carry = 0;
+
+	for (i = MAX_ADDR_CELLS - 1; i >= MAX_ADDR_CELLS - naddr; i--) {
+		u64 tmp = (u64)be32_to_cpu(reg[i]) + be32_to_cpu(add[i]) + carry;
+		carry = tmp >> 32;
+		reg[i] = cpu_to_be32((u32)tmp);
+	}
+
+	return !carry;
+}
+
+/* It is assumed that if the first byte of reg fits in a
+ * range, then the whole reg block fits.
+ */
+static int compare_reg(u32 *reg, u32 *range, u32 *rangesize)
+{
+	int i;
+	u32 end;
+
+	for (i = 0; i < MAX_ADDR_CELLS; i++) {
+		if (be32_to_cpu(reg[i]) < be32_to_cpu(range[i]))
+			return 0;
+		if (be32_to_cpu(reg[i]) > be32_to_cpu(range[i]))
+			break;
+	}
+
+	for (i = 0; i < MAX_ADDR_CELLS; i++) {
+		end = be32_to_cpu(range[i]) + be32_to_cpu(rangesize[i]);
+
+		if (be32_to_cpu(reg[i]) < end)
+			break;
+		if (be32_to_cpu(reg[i]) > end)
+			return 0;
+	}
+
+	return reg[i] != end;
+}
+
+/* reg must be MAX_ADDR_CELLS */
+static int find_range(u32 *reg, u32 *ranges, int nregaddr,
+                      int naddr, int nsize, int buflen)
+{
+	int nrange = nregaddr + naddr + nsize;
+	int i;
+
+	for (i = 0; i + nrange <= buflen; i += nrange) {
+		u32 range_addr[MAX_ADDR_CELLS];
+		u32 range_size[MAX_ADDR_CELLS];
+
+		copy_val(range_addr, ranges + i, nregaddr);
+		copy_val(range_size, ranges + i + nregaddr + naddr, nsize);
+
+		if (compare_reg(reg, range_addr, range_size))
+			return i;
+	}
+
+	return -1;
+}
+
+/* Currently only generic buses without special encodings are supported.
+ * In particular, PCI is not supported.  Also, only the beginning of the
+ * reg block is tracked; size is ignored except in ranges.
+ */
+static u32 prop_buf[MAX_PROP_LEN / 4];
+
+static int dt_xlate(void *node, int res, int reglen, unsigned long *addr,
+		unsigned long *size)
+{
+	u32 last_addr[MAX_ADDR_CELLS];
+	u32 this_addr[MAX_ADDR_CELLS];
+	void *parent;
+	u64 ret_addr, ret_size;
+	u32 naddr, nsize, prev_naddr, prev_nsize;
+	int buflen, offset;
+
+	parent = get_parent(node);
+	if (!parent)
+		return 0;
+
+	dt_get_reg_format(parent, &naddr, &nsize);
+	if (nsize > 2)
+		return 0;
+
+	offset = (naddr + nsize) * res;
+
+	if (reglen < offset + naddr + nsize ||
+	    MAX_PROP_LEN < (offset + naddr + nsize) * 4)
+		return 0;
+
+	copy_val(last_addr, prop_buf + offset, naddr);
+
+	ret_size = be32_to_cpu(prop_buf[offset + naddr]);
+	if (nsize == 2) {
+		ret_size <<= 32;
+		ret_size |= be32_to_cpu(prop_buf[offset + naddr + 1]);
+	}
+
+	for (;;) {
+		prev_naddr = naddr;
+		prev_nsize = nsize;
+		node = parent;
+
+		parent = get_parent(node);
+		if (!parent)
+			break;
+
+		dt_get_reg_format(parent, &naddr, &nsize);
+
+		buflen = getprop(node, "ranges", prop_buf,
+				sizeof(prop_buf));
+		if (buflen == 0)
+			continue;
+		if (buflen < 0 || buflen > sizeof(prop_buf))
+			return 0;
+
+		offset = find_range(last_addr, prop_buf, prev_naddr,
+		                    naddr, prev_nsize, buflen / 4);
+		if (offset < 0)
+			return 0;
+
+		copy_val(this_addr, prop_buf + offset, prev_naddr);
+
+		if (!sub_reg(last_addr, this_addr))
+			return 0;
+
+		copy_val(this_addr, prop_buf + offset + prev_naddr, naddr);
+
+		if (!add_reg(last_addr, this_addr, naddr))
+			return 0;
+	}
+
+	if (naddr > 2)
+		return 0;
+
+	ret_addr = ((u64)be32_to_cpu(last_addr[2]) << 32) | be32_to_cpu(last_addr[3]);
+	if (sizeof(void *) == 4 &&
+	    (ret_addr >= 0x100000000ULL || ret_size > 0x100000000ULL ||
+	     ret_addr + ret_size > 0x100000000ULL))
+		return 0;
+
+	*addr = ret_addr;
+	if (size)
+		*size = ret_size;
+
+	return 1;
+}
+
+int dt_xlate_reg(void *node, int res, unsigned long *addr, unsigned long *size)
+{
+	int reglen;
+
+	reglen = getprop(node, "reg", prop_buf, sizeof(prop_buf)) / 4;
+	return dt_xlate(node, res, reglen, addr, size);
+}
+
+int dt_xlate_addr(void *node, u32 *buf, int buflen, unsigned long *xlated_addr)
+{
+
+	if (buflen > sizeof(prop_buf))
+		return 0;
+
+	memcpy(prop_buf, buf, buflen);
+	return dt_xlate(node, 0, buflen / 4, xlated_addr, NULL);
+}
+
+int dt_is_compatible(void *node, const char *compat)
+{
+	char *buf = (char *)prop_buf;
+	int len, pos;
+
+	len = getprop(node, "compatible", buf, MAX_PROP_LEN);
+	if (len < 0)
+		return 0;
+
+	for (pos = 0; pos < len; pos++) {
+		if (!strcmp(buf + pos, compat))
+			return 1;
+
+		pos += strnlen(&buf[pos], len - pos);
+	}
+
+	return 0;
+}
+
+int dt_get_virtual_reg(void *node, void **addr, int nres)
+{
+	unsigned long xaddr;
+	int n, i;
+
+	n = getprop(node, "virtual-reg", addr, nres * 4);
+	if (n > 0) {
+		for (i = 0; i < n/4; i ++)
+			((u32 *)addr)[i] = be32_to_cpu(((u32 *)addr)[i]);
+		return n / 4;
+	}
+
+	for (n = 0; n < nres; n++) {
+		if (!dt_xlate_reg(node, n, &xaddr, NULL))
+			break;
+
+		addr[n] = (void *)xaddr;
+	}
+
+	return n;
+}
+
diff --git a/arch/powerpc/boot/div64.S b/arch/powerpc/boot/div64.S
new file mode 100644
index 0000000000..4354928ed6
--- /dev/null
+++ b/arch/powerpc/boot/div64.S
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Divide a 64-bit unsigned number by a 32-bit unsigned number.
+ * This routine assumes that the top 32 bits of the dividend are
+ * non-zero to start with.
+ * On entry, r3 points to the dividend, which get overwritten with
+ * the 64-bit quotient, and r4 contains the divisor.
+ * On exit, r3 contains the remainder.
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ */
+#include "ppc_asm.h"
+
+	.globl __div64_32
+__div64_32:
+	lwz	r5,0(r3)	# get the dividend into r5/r6
+	lwz	r6,4(r3)
+	cmplw	r5,r4
+	li	r7,0
+	li	r8,0
+	blt	1f
+	divwu	r7,r5,r4	# if dividend.hi >= divisor,
+	mullw	r0,r7,r4	# quotient.hi = dividend.hi / divisor
+	subf.	r5,r0,r5	# dividend.hi %= divisor
+	beq	3f
+1:	mr	r11,r5		# here dividend.hi != 0
+	andis.	r0,r5,0xc000
+	bne	2f
+	cntlzw	r0,r5		# we are shifting the dividend right
+	li	r10,-1		# to make it < 2^32, and shifting
+	srw	r10,r10,r0	# the divisor right the same amount,
+	addc	r9,r4,r10	# rounding up (so the estimate cannot
+	andc	r11,r6,r10	# ever be too large, only too small)
+	andc	r9,r9,r10
+	addze	r9,r9
+	or	r11,r5,r11
+	rotlw	r9,r9,r0
+	rotlw	r11,r11,r0
+	divwu	r11,r11,r9	# then we divide the shifted quantities
+2:	mullw	r10,r11,r4	# to get an estimate of the quotient,
+	mulhwu	r9,r11,r4	# multiply the estimate by the divisor,
+	subfc	r6,r10,r6	# take the product from the divisor,
+	add	r8,r8,r11	# and add the estimate to the accumulated
+	subfe.	r5,r9,r5	# quotient
+	bne	1b
+3:	cmplw	r6,r4
+	blt	4f
+	divwu	r0,r6,r4	# perform the remaining 32-bit division
+	mullw	r10,r0,r4	# and get the remainder
+	add	r8,r8,r0
+	subf	r6,r10,r6
+4:	stw	r7,0(r3)	# return the quotient in *r3
+	stw	r8,4(r3)
+	mr	r3,r6		# return the remainder in r3
+	blr
+
+/*
+ * Extended precision shifts.
+ *
+ * Updated to be valid for shift counts from 0 to 63 inclusive.
+ * -- Gabriel
+ *
+ * R3/R4 has 64 bit value
+ * R5    has shift count
+ * result in R3/R4
+ *
+ *  ashrdi3: arithmetic right shift (sign propagation)	
+ *  lshrdi3: logical right shift
+ *  ashldi3: left shift
+ */
+	.globl __ashrdi3
+__ashrdi3:
+	subfic	r6,r5,32
+	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
+	addi	r7,r5,32	# could be xori, or addi with -32
+	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
+	rlwinm	r8,r7,0,32	# t3 = (count < 32) ? 32 : 0
+	sraw	r7,r3,r7	# t2 = MSW >> (count-32)
+	or	r4,r4,r6	# LSW |= t1
+	slw	r7,r7,r8	# t2 = (count < 32) ? 0 : t2
+	sraw	r3,r3,r5	# MSW = MSW >> count
+	or	r4,r4,r7	# LSW |= t2
+	blr
+
+	.globl __ashldi3
+__ashldi3:
+	subfic	r6,r5,32
+	slw	r3,r3,r5	# MSW = count > 31 ? 0 : MSW << count
+	addi	r7,r5,32	# could be xori, or addi with -32
+	srw	r6,r4,r6	# t1 = count > 31 ? 0 : LSW >> (32-count)
+	slw	r7,r4,r7	# t2 = count < 32 ? 0 : LSW << (count-32)
+	or	r3,r3,r6	# MSW |= t1
+	slw	r4,r4,r5	# LSW = LSW << count
+	or	r3,r3,r7	# MSW |= t2
+	blr
+
+	.globl __lshrdi3
+__lshrdi3:
+	subfic	r6,r5,32
+	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
+	addi	r7,r5,32	# could be xori, or addi with -32
+	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
+	srw	r7,r3,r7	# t2 = count < 32 ? 0 : MSW >> (count-32)
+	or	r4,r4,r6	# LSW |= t1
+	srw	r3,r3,r5	# MSW = MSW >> count
+	or	r4,r4,r7	# LSW |= t2
+	blr
diff --git a/arch/powerpc/boot/dts/Makefile b/arch/powerpc/boot/dts/Makefile
new file mode 100644
index 0000000000..fb335d05aa
--- /dev/null
+++ b/arch/powerpc/boot/dts/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+subdir-y += fsl
+
+dtstree		:= $(srctree)/$(src)
+dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
diff --git a/arch/powerpc/boot/dts/a3m071.dts b/arch/powerpc/boot/dts/a3m071.dts
new file mode 100644
index 0000000000..034cfd8aa9
--- /dev/null
+++ b/arch/powerpc/boot/dts/a3m071.dts
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * a3m071 board Device Tree Source
+ *
+ * Copyright 2012 Stefan Roese <sr@denx.de>
+ *
+ * Copyright (C) 2011 DENX Software Engineering GmbH
+ * Heiko Schocher <hs@denx.de>
+ *
+ * Copyright (C) 2007 Semihalf
+ * Marian Balakowicz <m8@semihalf.com>
+ */
+
+/include/ "mpc5200b.dtsi"
+
+&gpt0 { fsl,has-wdt; };
+
+/ {
+	model = "anonymous,a3m071";
+	compatible = "anonymous,a3m071";
+
+	soc5200@f0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc5200b-immr";
+		ranges = <0 0xf0000000 0x0000c000>;
+		reg = <0xf0000000 0x00000100>;
+		bus-frequency = <0>; /* From boot loader */
+		system-frequency = <0>; /* From boot loader */
+
+		spi@f00 {
+			status = "disabled";
+		};
+
+		usb: usb@1000 {
+			status = "disabled";
+		};
+
+		psc@2000 {
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+			reg = <0x2000 0x100>;
+			interrupts = <2 1 0>;
+		};
+
+		psc@2200 {
+			status = "disabled";
+		};
+
+		psc@2400 {
+			status = "disabled";
+		};
+
+		psc@2600 {
+			status = "disabled";
+		};
+
+		psc@2800 {
+			status = "disabled";
+		};
+
+		psc@2c00 {		// PSC6
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+			reg = <0x2c00 0x100>;
+			interrupts = <2 4 0>;
+		};
+
+		ethernet@3000 {
+			phy-handle = <&phy0>;
+		};
+
+		mdio@3000 {
+			phy0: ethernet-phy@3 {
+				reg = <0x03>;
+			};
+		};
+
+		ata@3a00 {
+			status = "disabled";
+		};
+
+		i2c@3d00 {
+			status = "disabled";
+		};
+
+		i2c@3d40 {
+			status = "disabled";
+		};
+	};
+
+	localbus {
+		compatible = "fsl,mpc5200b-lpb","simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0xfc000000 0x02000000
+			  3 0 0xe9000000 0x00080000
+			  5 0 0xe8000000 0x00010000>;
+
+		flash@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			reg = <0 0x0 0x02000000>;
+			compatible = "cfi-flash";
+			bank-width = <2>;
+			partition@0 {
+				label = "u-boot";
+				reg = <0x00000000 0x00040000>;
+				read-only;
+			};
+			partition@40000 {
+				label = "env";
+				reg = <0x00040000 0x00020000>;
+			};
+			partition@60000 {
+				label = "dtb";
+				reg = <0x00060000 0x00020000>;
+			};
+			partition@80000 {
+				label = "kernel";
+				reg = <0x00080000 0x00500000>;
+			};
+			partition@580000 {
+				label = "root";
+				reg = <0x00580000 0x00A80000>;
+			};
+		};
+
+		fpga@3,0 {
+			compatible = "anonymous,a3m071-fpga";
+			reg = <3 0x0 0x00080000
+			       5 0x0 0x00010000>;
+			interrupts = <0 0 3>;  /* level low */
+		};
+	};
+
+	pci@f0000d00 {
+		status = "disabled";
+	};
+};
diff --git a/arch/powerpc/boot/dts/a4m072.dts b/arch/powerpc/boot/dts/a4m072.dts
new file mode 100644
index 0000000000..d4270a2ec6
--- /dev/null
+++ b/arch/powerpc/boot/dts/a4m072.dts
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * a4m072 board Device Tree Source
+ *
+ * Copyright (C) 2011 DENX Software Engineering GmbH
+ * Heiko Schocher <hs@denx.de>
+ *
+ * Copyright (C) 2007 Semihalf
+ * Marian Balakowicz <m8@semihalf.com>
+ */
+
+/include/ "mpc5200b.dtsi"
+
+&gpt0 { fsl,has-wdt; };
+&gpt3 { gpio-controller; };
+&gpt4 { gpio-controller; };
+&gpt5 { gpio-controller; };
+
+/ {
+	model = "anonymous,a4m072";
+	compatible = "anonymous,a4m072";
+
+	soc5200@f0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc5200b-immr";
+		ranges = <0 0xf0000000 0x0000c000>;
+		reg = <0xf0000000 0x00000100>;
+		bus-frequency = <0>; /* From boot loader */
+		system-frequency = <0>; /* From boot loader */
+
+		cdm@200 {
+			fsl,init-ext-48mhz-en = <0x0>;
+			fsl,init-fd-enable = <0x01>;
+			fsl,init-fd-counters = <0x3333>;
+		};
+
+		spi@f00 {
+			status = "disabled";
+		};
+
+		psc@2000 {
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+			reg = <0x2000 0x100>;
+			interrupts = <2 1 0>;
+		};
+
+		psc@2200 {
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+			reg = <0x2200 0x100>;
+			interrupts = <2 2 0>;
+		};
+
+		psc@2400 {
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+			reg = <0x2400 0x100>;
+			interrupts = <2 3 0>;
+		};
+
+		psc@2600 {
+			status = "disabled";
+		};
+
+		psc@2800 {
+			status = "disabled";
+		};
+
+		psc@2c00 {
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+			reg = <0x2c00 0x100>;
+			interrupts = <2 4 0>;
+		};
+
+		ethernet@3000 {
+			phy-handle = <&phy0>;
+		};
+
+		mdio@3000 {
+			phy0: ethernet-phy@1f {
+				reg = <0x1f>;
+				interrupts = <1 2 0>; /* IRQ 2 active low */
+			};
+		};
+
+		i2c@3d00 {
+			status = "disabled";
+		};
+
+		i2c@3d40 {
+			hwmon@2e {
+				compatible = "nsc,lm87";
+				reg = <0x2e>;
+			};
+			rtc@51 {
+				compatible = "nxp,rtc8564";
+				reg = <0x51>;
+			};
+		};
+	};
+
+	localbus {
+		compatible = "fsl,mpc5200b-lpb","simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0xfe000000 0x02000000
+			  1 0 0x62000000 0x00400000
+			  2 0 0x64000000 0x00200000
+			  3 0 0x66000000 0x01000000
+			  6 0 0x68000000 0x01000000
+			  7 0 0x6a000000 0x00000004>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x02000000>;
+			bank-width = <2>;
+			#size-cells = <1>;
+			#address-cells = <1>;
+		};
+		sram0@1,0 {
+			compatible = "mtd-ram";
+			reg = <1 0x00000 0x00400000>;
+			bank-width = <2>;
+		};
+	};
+
+	pci@f0000d00 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		compatible = "fsl,mpc5200-pci";
+		reg = <0xf0000d00 0x100>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+				 /* IDSEL 0x16 */
+				 0xc000 0 0 1 &mpc5200_pic 1 3 3
+				 0xc000 0 0 2 &mpc5200_pic 1 3 3
+				 0xc000 0 0 3 &mpc5200_pic 1 3 3
+				 0xc000 0 0 4 &mpc5200_pic 1 3 3>;
+		clock-frequency = <0>; /* From boot loader */
+		interrupts = <2 8 0 2 9 0 2 10 0>;
+		bus-range = <0 0>;
+		ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000>,
+			 <0x02000000 0 0x90000000 0x90000000 0 0x10000000>,
+			 <0x01000000 0 0x00000000 0xa0000000 0 0x01000000>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/ac14xx.dts b/arch/powerpc/boot/dts/ac14xx.dts
new file mode 100644
index 0000000000..5d8877e1f4
--- /dev/null
+++ b/arch/powerpc/boot/dts/ac14xx.dts
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Device Tree Source for the MPC5121e based ac14xx board
+ *
+ * Copyright 2012 Anatolij Gustschin <agust@denx.de>
+ */
+
+
+#include "mpc5121.dtsi"
+
+/ {
+	model = "ac14xx";
+	compatible = "ifm,ac14xx", "fsl,mpc5121";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial7;
+		spi4 = &spi4;
+		spi5 = &spi5;
+	};
+
+	cpus {
+		PowerPC,5121@0 {
+			timebase-frequency = <40000000>;	/*  40 MHz (csb/4) */
+			bus-frequency = <160000000>;		/* 160 MHz csb bus */
+			clock-frequency = <400000000>;		/* 400 MHz ppc core */
+		};
+	};
+
+	memory {
+		reg = <0x00000000 0x10000000>;			/* 256MB at 0 */
+	};
+
+	nfc@40000000 {
+		status = "disabled";
+	};
+
+	localbus@80000020 {
+		ranges = <0x0 0x0 0xfc000000 0x04000000	/* CS0: NOR flash */
+			  0x1 0x0 0xe0000000 0x00010000 /* CS1: FRAM */
+			  0x2 0x0 0xe0100000 0x00080000 /* CS2: asi1 */
+			  0x3 0x0 0xe0300000 0x00020000 /* CS3: comm */
+			  0x5 0x0 0xe0400000 0x00010000 /* CS5: safety */
+			  0x6 0x0 0xe0200000 0x00080000>; /* CS6: asi2 */
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0x00000000 0x04000000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			bank-width = <2>;
+			device-width = <2>;
+
+			partition@0 {
+				label = "dtb-kernel-production";
+				reg = <0x00000000 0x00400000>;
+			};
+			partition@1 {
+				label = "filesystem-production";
+				reg = <0x00400000 0x03400000>;
+			};
+
+			partition@2 {
+				label = "recovery";
+				reg = <0x03800000 0x00700000>;
+			};
+
+			partition@3 {
+				label = "uboot-code";
+				reg = <0x03f00000 0x00040000>;
+			};
+			partition@4 {
+				label = "uboot-env1";
+				reg = <0x03f40000 0x00020000>;
+			};
+			partition@5 {
+				label = "uboot-env2";
+				reg = <0x03f60000 0x00020000>;
+			};
+		};
+
+		fram@1,0 {
+			compatible = "ifm,ac14xx-fram", "linux,uio-pdrv-genirq";
+			reg = <1 0x00000000 0x00010000>;
+		};
+
+		asi@2,0 {
+			/* masters mapping: CS, CS offset, size */
+			reg = <2 0x00000000 0x00080000
+			       6 0x00000000 0x00080000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "ifm,ac14xx-asi-fpga";
+			gpios = <
+				&gpio_pic 26 0	/* prog */
+				&gpio_pic 27 0	/* done */
+				&gpio_pic 10 0	/* reset */
+				>;
+
+			master@1 {
+				interrupts = <20 0x2>;
+				interrupt-parent = <&gpio_pic>;
+				chipselect = <2 0x00009000 0x00009100>;
+				label = "AS-i master 1";
+			};
+
+			master@2 {
+				interrupts = <21 0x2>;
+				interrupt-parent = <&gpio_pic>;
+				chipselect = <6 0x00009000 0x00009100>;
+				label = "AS-i master 2";
+			};
+		};
+
+		netx@3,0 {
+			compatible = "ifm,netx";
+			reg = <0x3 0x00000000 0x00020000>;
+			chipselect = <3 0x00101140 0x00203100>;
+			interrupts = <17 0x8>;
+			gpios = <&gpio_pic 15 0>;
+		};
+
+		safety@5,0 {
+			compatible = "ifm,safety";
+			reg = <0x5 0x00000000 0x00010000>;
+			chipselect = <5 0x00009000 0x00009100>;
+			interrupts = <22 0x2>;
+			interrupt-parent = <&gpio_pic>;
+			gpios = <
+				&gpio_pic 12 0	/* prog */
+				&gpio_pic 11 0	/* done */
+				>;
+		};
+	};
+
+	clocks {
+		osc {
+			clock-frequency = <25000000>;
+		};
+	};
+
+	soc@80000000 {
+		bus-frequency = <80000000>;	/* 80 MHz ips bus */
+
+		clock@f00 {
+			compatible = "fsl,mpc5121rev2-clock", "fsl,mpc5121-clock";
+		};
+
+		/*
+		 * GPIO PIC:
+		 * interrupts cell = <pin nr, sense>
+		 * sense == 8: Level, low assertion
+		 * sense == 2: Edge, high-to-low change
+		 */
+		gpio_pic: gpio@1100 {
+			gpio-controller;
+			#gpio-cells = <2>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+		};
+
+		sdhc@1500 {
+			cd-gpios = <&gpio_pic 23 0>;	/* card detect */
+			wp-gpios = <&gpio_pic 24 0>;	/* write protect */
+			wp-inverted;			/* WP active high */
+		};
+
+		i2c@1700 {
+			/* use Fast-mode */
+			clock-frequency = <400000>;
+
+			at24@30 {
+				compatible = "atmel,24c01";
+				reg = <0x30>;
+			};
+
+			at24@31 {
+				compatible = "atmel,24c01";
+				reg = <0x31>;
+			};
+
+			temp@48 {
+				compatible = "ad,ad7414";
+				reg = <0x48>;
+			};
+
+			at24@50 {
+				compatible = "atmel,24c01";
+				reg = <0x50>;
+			};
+
+			at24@51 {
+				compatible = "atmel,24c01";
+				reg = <0x51>;
+			};
+
+			at24@52 {
+				compatible = "atmel,24c01";
+				reg = <0x52>;
+			};
+
+			at24@53 {
+				compatible = "atmel,24c01";
+				reg = <0x53>;
+			};
+
+			at24@54 {
+				compatible = "atmel,24c01";
+				reg = <0x54>;
+			};
+
+			at24@55 {
+				compatible = "atmel,24c01";
+				reg = <0x55>;
+			};
+
+			at24@56 {
+				compatible = "atmel,24c01";
+				reg = <0x56>;
+			};
+
+			at24@57 {
+				compatible = "atmel,24c01";
+				reg = <0x57>;
+			};
+
+			rtc@68 {
+				compatible = "st,m41t00";
+				reg = <0x68>;
+			};
+		};
+
+		axe_pic: axe-base@2000 {
+			compatible = "fsl,mpc5121-axe-base";
+			reg = <0x2000 0x100>;
+			interrupts = <42 0x8>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+		};
+
+		axe-app {
+			compatible = "fsl,mpc5121-axe-app";
+			interrupt-parent = <&axe_pic>;
+			interrupts = <
+					/* soft interrupts */
+					0 0x0	1 0x0	2 0x0	3 0x0
+					4 0x0	5 0x0	6 0x0	7 0x0
+					/* fifo interrupts */
+					8 0x0	9 0x0	10 0x0	11 0x0
+				>;
+		};
+
+		display@2100 {
+			edid = [00 FF FF FF FF FF FF 00 14 94 00 00 00 00 00 00
+				0A 12 01 03 80 1C 23 78 CA 88 FF 94 52 54 8E 27
+				1E 4C 50 00 00 00 01 01 01 01 01 01 01 01 01 01
+				01 01 01 01 01 01 FB 00 B0 14 00 DC 05 00 08 04
+				21 00 1C 23 00 00 00 18 00 00 00 FD 00 38 3C 1F
+				3C 01 0A 20 20 20 20 20 20 20 00 00 00 FC 00 45
+				54 30 31 38 30 30 33 44 4D 55 0A 0A 00 00 00 10
+				00 41 30 30 30 30 30 30 30 30 30 30 30 31 00 D5];
+		};
+
+		can@2300 {
+			status = "disabled";
+		};
+
+		can@2380 {
+			status = "disabled";
+		};
+
+		viu@2400 {
+			status = "disabled";
+		};
+
+		mdio@2800 {
+			phy0: ethernet-phy@1f {
+				compatible = "smsc,lan8700";
+				reg = <0x1f>;
+			};
+		};
+
+		enet: ethernet@2800 {
+			phy-handle = <&phy0>;
+		};
+
+		usb@3000 {
+			status = "disabled";
+		};
+
+		usb@4000 {
+			status = "disabled";
+		};
+
+		/* PSC3 serial port A, aka ttyPSC0 */
+		serial0: psc@11300 {
+			compatible = "fsl,mpc5121-psc-uart", "fsl,mpc5121-psc";
+			fsl,rx-fifo-size = <512>;
+			fsl,tx-fifo-size = <512>;
+		};
+
+		/* PSC4 in SPI mode */
+		spi4: psc@11400 {
+			compatible = "fsl,mpc5121-psc-spi", "fsl,mpc5121-psc";
+			fsl,rx-fifo-size = <768>;
+			fsl,tx-fifo-size = <768>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			num-cs = <1>;
+			cs-gpios = <&gpio_pic 25 0>;
+
+			flash: m25p128@0 {
+				compatible = "st,m25p128";
+				spi-max-frequency = <20000000>;
+				reg = <0>;
+				#address-cells = <1>;
+				#size-cells = <1>;
+
+				partition@0 {
+					label = "spi-flash0";
+					reg = <0x00000000 0x01000000>;
+				};
+			};
+		};
+
+		/* PSC5 in SPI mode */
+		spi5: psc@11500 {
+			compatible = "fsl,mpc5121-psc-spi", "fsl,mpc5121-psc";
+			fsl,mode = "spi-master";
+			fsl,rx-fifo-size = <128>;
+			fsl,tx-fifo-size = <128>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			lcd@0 {
+				compatible = "ilitek,ili922x";
+				reg = <0>;
+				spi-max-frequency = <100000>;
+				spi-cpol;
+				spi-cpha;
+			};
+		};
+
+		/* PSC7 serial port C, aka ttyPSC2 */
+		serial7: psc@11700 {
+			compatible = "fsl,mpc5121-psc-uart", "fsl,mpc5121-psc";
+			fsl,rx-fifo-size = <512>;
+			fsl,tx-fifo-size = <512>;
+		};
+
+		matrix_keypad@0 {
+			compatible = "gpio-matrix-keypad";
+			debounce-delay-ms = <5>;
+			col-scan-delay-us = <1>;
+			gpio-activelow;
+			col-gpios-binary;
+			col-switch-delay-ms = <200>;
+
+			col-gpios = <&gpio_pic 1 0>;	/* pin1 */
+
+			row-gpios = <&gpio_pic 2 0	/* pin2 */
+				     &gpio_pic 3 0	/* pin3 */
+				     &gpio_pic 4 0>;	/* pin4 */
+
+			linux,keymap = <0x0000006e	/* FN LEFT */
+					0x01000067	/* UP */
+					0x02000066	/* FN RIGHT */
+					0x00010069	/* LEFT */
+					0x0101006a	/* DOWN */
+					0x0201006c>;	/* RIGHT */
+		};
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		backlight {
+			label = "backlight";
+			gpios = <&gpio_pic 0 0>;
+			default-state = "keep";
+		};
+		green {
+			label = "green";
+			gpios = <&gpio_pic 18 0>;
+			default-state = "keep";
+		};
+		red {
+			label = "red";
+			gpios = <&gpio_pic 19 0>;
+			default-state = "keep";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/acadia.dts b/arch/powerpc/boot/dts/acadia.dts
new file mode 100644
index 0000000000..deb52e41ab
--- /dev/null
+++ b/arch/powerpc/boot/dts/acadia.dts
@@ -0,0 +1,224 @@
+/*
+ * Device Tree Source for AMCC Acadia (405EZ)
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	model = "amcc,acadia";
+	compatible = "amcc,acadia";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		serial0 = &UART0;
+		serial1 = &UART1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,405EZ";
+			reg = <0x0>;
+			clock-frequency = <0>; /* Filled in by wrapper */
+			timebase-frequency = <0>; /* Filled in by wrapper */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <16384>;
+			d-cache-size = <16384>;
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x0>; /* Filled in by wrapper */
+	};
+
+	UIC0: interrupt-controller {
+		compatible = "ibm,uic-405ez", "ibm,uic";
+		interrupt-controller;
+		dcr-reg = <0x0c0 0x009>;
+		cell-index = <0>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	plb {
+		compatible = "ibm,plb-405ez", "ibm,plb3";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by wrapper */
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-405ez", "ibm,mcmal";
+			dcr-reg = <0x380 0x62>;
+			num-tx-chans = <1>;
+			num-rx-chans = <1>;
+			interrupt-parent = <&UIC0>;
+			/* 405EZ has only 3 interrupts to the UIC, as
+			 * SERR, TXDE, and RXDE are or'd together into
+			 * one UIC bit
+			 */
+			interrupts = <
+				0x13 0x4 /* TXEOB */
+				0x15 0x4 /* RXEOB */
+				0x12 0x4 /* SERR, TXDE, RXDE */>;
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb-405ez", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges;
+			dcr-reg = <0x0a 0x05>;
+			clock-frequency = <0>; /* Filled in by wrapper */
+
+			UART0: serial@ef600300 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600300 0x8>;
+				virtual-reg = <0xef600300>;
+				clock-frequency = <0>; /* Filled in by wrapper */
+				current-speed = <115200>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x5 0x4>;
+			};
+
+			UART1: serial@ef600400 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600400 0x8>;
+				clock-frequency = <0>; /* Filled in by wrapper */
+				current-speed = <115200>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x6 0x4>;
+			};
+
+			IIC: i2c@ef600500 {
+				compatible = "ibm,iic-405ez", "ibm,iic";
+				reg = <0xef600500 0x11>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0xa 0x4>;
+			};
+
+			GPIO0: gpio@ef600700 {
+				compatible = "ibm,gpio-405ez";
+				reg = <0xef600700 0x20>;
+			};
+
+			GPIO1: gpio@ef600800 {
+				compatible = "ibm,gpio-405ez";
+				reg = <0xef600800 0x20>;
+			};
+
+			EMAC0: ethernet@ef600900 {
+				device_type = "network";
+				compatible = "ibm,emac-405ez", "ibm,emac";
+				interrupt-parent = <&UIC0>;
+				interrupts = <
+					0x10 0x4 /* Ethernet */
+					0x11 0x4 /* Ethernet Wake up */>;
+				local-mac-address = [000000000000]; /* Filled in by wrapper */
+				reg = <0xef600900 0x70>;
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <1500>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "mii";
+				phy-map = <0x0>;
+			};
+
+			CAN0: can@ef601000 {
+				compatible = "amcc,can-405ez";
+				reg = <0xef601000 0x620>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x7 0x4>;
+			};
+
+			CAN1: can@ef601800 {
+				compatible = "amcc,can-405ez";
+				reg = <0xef601800 0x620>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x8 0x4>;
+			};
+
+			cameleon@ef602000 {
+				compatible = "amcc,cameleon-405ez";
+				reg = <0xef602000 0x800>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0xb 0x4 0xc 0x4>;
+			};
+
+			ieee1588@ef602800 {
+				compatible = "amcc,ieee1588-405ez";
+				reg = <0xef602800 0x60>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x4 0x4>;
+				/* This thing is a bit weird.  It has it's own UIC
+				 * that it uses to generate snapshot triggers.  We
+				 * don't really support this device yet, and it needs
+				 * work to figure this out.
+				 */
+				dcr-reg = <0xe0 0x9>;
+			};
+
+			usb@ef603000 {
+				compatible = "ohci-be";
+				reg = <0xef603000 0x80>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0xd 0x4 0xe 0x4>;
+			};
+
+			dac@ef603300 {
+				compatible = "amcc,dac-405ez";
+				reg = <0xef603300 0x40>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x18 0x4>;
+			};
+
+			adc@ef603400 {
+				compatible = "amcc,adc-405ez";
+				reg = <0xef603400 0x40>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x17 0x4>;
+			};
+
+			spi@ef603500 {
+				compatible = "amcc,spi-405ez";
+				reg = <0xef603500 0x100>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x9 0x4>;
+			};
+		};
+
+		EBC0: ebc {
+			compatible = "ibm,ebc-405ez", "ibm,ebc";
+			dcr-reg = <0x12 0x2>;
+			#address-cells = <2>;
+			#size-cells = <1>;
+			clock-frequency = <0>; /* Filled in by wrapper */
+		};
+	};
+
+	chosen {
+		stdout-path = "/plb/opb/serial@ef600300";
+	};
+};
diff --git a/arch/powerpc/boot/dts/adder875-redboot.dts b/arch/powerpc/boot/dts/adder875-redboot.dts
new file mode 100644
index 0000000000..b51c97abfa
--- /dev/null
+++ b/arch/powerpc/boot/dts/adder875-redboot.dts
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Device Tree Source for MPC885 ADS running RedBoot
+ *
+ * Copyright 2006 MontaVista Software, Inc.
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ */
+
+/dts-v1/;
+/ {
+	model = "Analogue & Micro Adder MPC875";
+	compatible = "analogue-and-micro,adder875";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		console = &console;
+		ethernet0 = &eth0;
+		ethernet1 = &eth1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,875@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <16>;
+			i-cache-line-size = <16>;
+			d-cache-size = <8192>;
+			i-cache-size = <8192>;
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			interrupts = <15 2>;	// decrementer interrupt
+			interrupt-parent = <&PIC>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0 0x01000000>;
+	};
+
+	localbus@fa200100 {
+		compatible = "fsl,mpc885-localbus", "fsl,pq1-localbus",
+		             "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		reg = <0xfa200100 0x40>;
+
+		ranges = <
+			0 0 0xfe000000 0x00800000
+			2 0 0xfa100000 0x00008000
+		>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x800000>;
+			bank-width = <2>;
+			device-width = <2>;
+		};
+	};
+
+	soc@fa200000 {
+		compatible = "fsl,mpc875-immr", "fsl,pq1-soc", "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0 0xfa200000 0x00004000>;
+
+		// Temporary until code stops depending on it.
+		device_type = "soc";
+
+		// Temporary until get_immrbase() is fixed.
+		reg = <0xfa200000 0x4000>;
+
+		mdio@e00 {
+			compatible = "fsl,mpc875-fec-mdio", "fsl,pq1-fec-mdio";
+			reg = <0xe00 0x188>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			PHY0: ethernet-phy@0 {
+				reg = <0>;
+			};
+
+			PHY1: ethernet-phy@1 {
+				reg = <1>;
+			};
+		};
+
+		eth0: ethernet@e00 {
+			device_type = "network";
+			compatible = "fsl,mpc875-fec-enet",
+			             "fsl,pq1-fec-enet";
+			reg = <0xe00 0x188>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <3 1>;
+			interrupt-parent = <&PIC>;
+			phy-handle = <&PHY0>;
+			linux,network-index = <0>;
+		};
+
+		eth1: ethernet@1e00 {
+			device_type = "network";
+			compatible = "fsl,mpc875-fec-enet",
+			             "fsl,pq1-fec-enet";
+			reg = <0x1e00 0x188>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <7 1>;
+			interrupt-parent = <&PIC>;
+			phy-handle = <&PHY1>;
+			linux,network-index = <1>;
+		};
+
+		PIC: interrupt-controller@0 {
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			reg = <0 0x24>;
+			compatible = "fsl,mpc875-pic", "fsl,pq1-pic";
+		};
+
+		cpm@9c0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc875-cpm", "fsl,cpm1", "simple-bus";
+			interrupts = <0>;	// cpm error interrupt
+			interrupt-parent = <&CPM_PIC>;
+			reg = <0x9c0 0x40>;
+			ranges;
+
+			muram {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges = <0 0x2000 0x2000>;
+
+				data@0 {
+					compatible = "fsl,cpm-muram-data";
+					reg = <0 0x1c00>;
+				};
+			};
+
+			brg@9f0 {
+				compatible = "fsl,mpc875-brg",
+				             "fsl,cpm1-brg",
+				             "fsl,cpm-brg";
+				clock-frequency = <50000000>;
+				reg = <0x9f0 0x10>;
+			};
+
+			CPM_PIC: interrupt-controller@930 {
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				interrupts = <5 2 0 2>;
+				interrupt-parent = <&PIC>;
+				reg = <0x930 0x20>;
+				compatible = "fsl,mpc875-cpm-pic",
+				             "fsl,cpm1-pic";
+			};
+
+			console: serial@a80 {
+				device_type = "serial";
+				compatible = "fsl,mpc875-smc-uart",
+				             "fsl,cpm1-smc-uart";
+				reg = <0xa80 0x10 0x3e80 0x40>;
+				interrupts = <4>;
+				interrupt-parent = <&CPM_PIC>;
+				fsl,cpm-brg = <1>;
+				fsl,cpm-command = <0x0090>;
+				current-speed = <115200>;
+			};
+		};
+	};
+
+	chosen {
+		stdout-path = &console;
+	};
+};
diff --git a/arch/powerpc/boot/dts/adder875-uboot.dts b/arch/powerpc/boot/dts/adder875-uboot.dts
new file mode 100644
index 0000000000..ec776103f5
--- /dev/null
+++ b/arch/powerpc/boot/dts/adder875-uboot.dts
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Device Tree Source for MPC885 ADS running U-Boot
+ *
+ * Copyright 2006 MontaVista Software, Inc.
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ */
+
+/dts-v1/;
+/ {
+	model = "Analogue & Micro Adder MPC875";
+	compatible = "analogue-and-micro,adder875";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		console = &console;
+		ethernet0 = &eth0;
+		ethernet1 = &eth1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,875@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <16>;
+			i-cache-line-size = <16>;
+			d-cache-size = <8192>;
+			i-cache-size = <8192>;
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			interrupts = <15 2>;	// decrementer interrupt
+			interrupt-parent = <&PIC>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0 0x01000000>;
+	};
+
+	localbus@ff000100 {
+		compatible = "fsl,mpc885-localbus", "fsl,pq1-localbus",
+		             "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		reg = <0xff000100 0x40>;
+
+		ranges = <
+			0 0 0xfe000000 0x01000000
+		>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x800000>;
+			bank-width = <2>;
+			device-width = <2>;
+		};
+	};
+
+	soc@ff000000 {
+		compatible = "fsl,mpc875-immr", "fsl,pq1-soc", "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0 0xff000000 0x00004000>;
+
+		// Temporary until code stops depending on it.
+		device_type = "soc";
+
+		// Temporary until get_immrbase() is fixed.
+		reg = <0xff000000 0x4000>;
+
+		mdio@e00 {
+			compatible = "fsl,mpc875-fec-mdio", "fsl,pq1-fec-mdio";
+			reg = <0xe00 0x188>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			PHY0: ethernet-phy@0 {
+				reg = <0>;
+			};
+
+			PHY1: ethernet-phy@1 {
+				reg = <1>;
+			};
+		};
+
+		eth0: ethernet@e00 {
+			device_type = "network";
+			compatible = "fsl,mpc875-fec-enet",
+			             "fsl,pq1-fec-enet";
+			reg = <0xe00 0x188>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <3 1>;
+			interrupt-parent = <&PIC>;
+			phy-handle = <&PHY0>;
+			linux,network-index = <0>;
+		};
+
+		eth1: ethernet@1e00 {
+			device_type = "network";
+			compatible = "fsl,mpc875-fec-enet",
+			             "fsl,pq1-fec-enet";
+			reg = <0x1e00 0x188>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <7 1>;
+			interrupt-parent = <&PIC>;
+			phy-handle = <&PHY1>;
+			linux,network-index = <1>;
+		};
+
+		PIC: interrupt-controller@0 {
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			reg = <0 0x24>;
+			compatible = "fsl,mpc875-pic", "fsl,pq1-pic";
+		};
+
+		cpm@9c0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc875-cpm", "fsl,cpm1", "simple-bus";
+			interrupts = <0>;	// cpm error interrupt
+			interrupt-parent = <&CPM_PIC>;
+			reg = <0x9c0 0x40>;
+			ranges;
+
+			muram {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges = <0 0x2000 0x2000>;
+
+				data@0 {
+					compatible = "fsl,cpm-muram-data";
+					reg = <0 0x1c00>;
+				};
+			};
+
+			brg@9f0 {
+				compatible = "fsl,mpc875-brg",
+				             "fsl,cpm1-brg",
+				             "fsl,cpm-brg";
+				clock-frequency = <50000000>;
+				reg = <0x9f0 0x10>;
+			};
+
+			CPM_PIC: interrupt-controller@930 {
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				interrupts = <5 2 0 2>;
+				interrupt-parent = <&PIC>;
+				reg = <0x930 0x20>;
+				compatible = "fsl,mpc875-cpm-pic",
+				             "fsl,cpm1-pic";
+			};
+
+			console: serial@a80 {
+				device_type = "serial";
+				compatible = "fsl,mpc875-smc-uart",
+				             "fsl,cpm1-smc-uart";
+				reg = <0xa80 0x10 0x3e80 0x40>;
+				interrupts = <4>;
+				interrupt-parent = <&CPM_PIC>;
+				fsl,cpm-brg = <1>;
+				fsl,cpm-command = <0x0090>;
+				current-speed = <115200>;
+			};
+		};
+	};
+
+	chosen {
+		stdout-path = &console;
+	};
+};
diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts
new file mode 100644
index 0000000000..df18f8dc46
--- /dev/null
+++ b/arch/powerpc/boot/dts/akebono.dts
@@ -0,0 +1,415 @@
+/*
+ * Device Tree Source for IBM Embedded PPC 476 Platform
+ *
+ * Copyright © 2013 Tony Breeds IBM Corporation
+ * Copyright © 2013 Alistair Popple IBM Corporation
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/memreserve/ 0x01f00000 0x00100000;	// spin table
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <2>;
+	model = "ibm,akebono";
+	compatible = "ibm,akebono", "ibm,476gtr";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		serial0 = &UART0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,476";
+			reg = <0>;
+			clock-frequency = <1600000000>; // 1.6 GHz
+			timebase-frequency = <100000000>; // 100Mhz
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+			status = "okay";
+		};
+		cpu@1 {
+			device_type = "cpu";
+			model = "PowerPC,476";
+			reg = <1>;
+			clock-frequency = <1600000000>; // 1.6 GHz
+			timebase-frequency = <100000000>; // 100Mhz
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+			status = "disabled";
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x01f00000>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x0 0x0 0x0>; // filled in by zImage
+	};
+
+	MPIC: interrupt-controller {
+		compatible = "chrp,open-pic";
+		interrupt-controller;
+		dcr-reg = <0xffc00000 0x00040000>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		single-cpu-affinity;
+	};
+
+	plb {
+		compatible = "ibm,plb6";
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+		clock-frequency = <200000000>; // 200Mhz
+
+		HSTA0: hsta@310000e0000 {
+			compatible = "ibm,476gtr-hsta-msi", "ibm,hsta-msi";
+			reg = <0x310 0x000e0000 0x0 0xf0>;
+			interrupt-parent = <&MPIC>;
+			interrupts = <108 0
+				      109 0
+				      110 0
+				      111 0
+				      112 0
+				      113 0
+				      114 0
+				      115 0
+				      116 0
+				      117 0
+				      118 0
+				      119 0
+				      120 0
+				      121 0
+				      122 0
+				      123 0>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-476gtr", "ibm,mcmal2";
+			dcr-reg = <0xc0000000 0x062>;
+			num-tx-chans = <1>;
+			num-rx-chans = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-parent = <&MPIC>;
+			interrupts = <	/*TXEOB*/ 77 0x4
+					/*RXEOB*/ 78 0x4
+					/*SERR*/  76 0x4
+					/*TXDE*/  79 0x4
+					/*RXDE*/  80 0x4>;
+		};
+
+		SATA0: sata@30000010000 {
+			compatible = "ibm,476gtr-ahci";
+			reg = <0x300 0x00010000 0x0 0x10000>;
+			interrupt-parent = <&MPIC>;
+			interrupts = <93 2>;
+		};
+
+		EHCI0: ehci@30010000000 {
+			compatible = "ibm,476gtr-ehci", "generic-ehci";
+			reg = <0x300 0x10000000 0x0 0x10000>;
+			interrupt-parent = <&MPIC>;
+			interrupts = <85 2>;
+		};
+
+		SD0: sd@30000000000 {
+			compatible = "ibm,476gtr-sdhci", "generic-sdhci";
+			reg = <0x300 0x00000000 0x0 0x10000>;
+			interrupts = <91 2>;
+			interrupt-parent = <&MPIC>;
+		};
+
+		OHCI0: ohci@30010010000 {
+			compatible = "ibm,476gtr-ohci", "generic-ohci";
+			reg = <0x300 0x10010000 0x0 0x10000>;
+			interrupt-parent = <&MPIC>;
+			interrupts = <89 1>;
+			};
+
+		OHCI1: ohci@30010020000 {
+			compatible = "ibm,476gtr-ohci", "generic-ohci";
+			reg = <0x300 0x10020000 0x0 0x10000>;
+			interrupt-parent = <&MPIC>;
+			interrupts = <88 1>;
+			};
+
+		POB0: opb {
+			compatible = "ibm,opb-4xx", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			/* Wish there was a nicer way of specifying a full
+			 * 32-bit range
+			 */
+			ranges = <0x00000000 0x0000033f 0x00000000 0x80000000
+				  0x80000000 0x0000033f 0x80000000 0x80000000>;
+			clock-frequency = <100000000>;
+
+			RGMII0: emac-rgmii-wol@50004 {
+				compatible = "ibm,rgmii-wol-476gtr", "ibm,rgmii-wol";
+				reg = <0x50004 0x00000008>;
+				has-mdio;
+			};
+
+			EMAC0: ethernet@30000 {
+				device_type = "network";
+				compatible = "ibm,emac-476gtr", "ibm,emac4sync";
+				interrupt-parent = <&EMAC0>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &MPIC 81 0x4
+						 /*Wake*/   0x1 &MPIC 82 0x4>;
+				reg = <0x30000 0x78>;
+
+				/* local-mac-address will normally be added by
+				 * the wrapper. If your device doesn't support
+				 * passing data to the wrapper (in the form
+				 * local-mac-addr=<hwaddr>) then you will need
+				 * to set it manually here. */
+				//local-mac-address = [000000000000];
+
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-wol-device = <&RGMII0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+
+			UART0: serial@10000 {
+				device_type = "serial";
+				compatible = "ns16750", "ns16550";
+				reg = <0x10000 0x00000008>;
+				virtual-reg = <0xe8010000>;
+				clock-frequency = <1851851>;
+				current-speed = <38400>;
+				interrupt-parent = <&MPIC>;
+				interrupts = <39 2>;
+			};
+
+			IIC0: i2c@0 {
+				compatible = "ibm,iic-476gtr", "ibm,iic";
+				reg = <0x0 0x00000020>;
+				interrupt-parent = <&MPIC>;
+				interrupts = <37 2>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				rtc@68 {
+					compatible = "st,m41t80", "m41st85";
+					reg = <0x68>;
+				};
+			};
+
+			IIC1: i2c@100 {
+				compatible = "ibm,iic-476gtr", "ibm,iic";
+				reg = <0x100 0x00000020>;
+				interrupt-parent = <&MPIC>;
+				interrupts = <38 2>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				avr@58 {
+					compatible = "ibm,akebono-avr";
+					reg = <0x58>;
+				};
+			};
+
+			FPGA0: fpga@ebc00000 {
+				compatible = "ibm,akebono-fpga";
+				reg = <0xebc00000 0x8>;
+			};
+		};
+
+		PCIE0: pcie@10100000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-476fpe", "ibm,plb-pciex";
+			primary;
+			port = <0x0>; /* port number */
+			reg = <0x00000101 0x00000000 0x0 0x10000000	       /* Config space access */
+			       0x00000100 0x00000000 0x0 0x00001000>;	/* UTL Registers space access */
+			dcr-reg = <0xc0 0x20>;
+
+//                                pci_space  < pci_addr          > < cpu_addr          > < size       >
+			ranges = <0x02000000 0x00000000 0x80000000 0x00000110 0x80000000 0x0 0x80000000
+			          0x01000000 0x0        0x0        0x00000140 0x0        0x0 0x00010000>;
+
+			/* Inbound starting at 0x0 to 0x40000000000. In order to use MSI
+			 * PCI devices must be able to write to the HSTA module.
+			 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x400 0x0>;
+
+			/* This drives busses 0 to 0xf */
+			bus-range = <0x0 0xf>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &MPIC 45 0x2 /* int A */
+				0x0 0x0 0x0 0x2 &MPIC 46 0x2 /* int B */
+				0x0 0x0 0x0 0x3 &MPIC 47 0x2 /* int C */
+				0x0 0x0 0x0 0x4 &MPIC 48 0x2 /* int D */>;
+		};
+
+		PCIE1: pcie@20100000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-476fpe", "ibm,plb-pciex";
+			primary;
+			port = <0x1>; /* port number */
+			reg = <0x00000201 0x00000000 0x0 0x10000000	       /* Config space access */
+			       0x00000200 0x00000000 0x0 0x00001000>;	/* UTL Registers space access */
+			dcr-reg = <0x100 0x20>;
+
+//                                pci_space  < pci_addr          > < cpu_addr          > < size       >
+			ranges = <0x02000000 0x00000000 0x80000000 0x00000210 0x80000000 0x0 0x80000000
+			          0x01000000 0x0        0x0        0x00000240 0x0        0x0 0x00010000>;
+
+			/* Inbound starting at 0x0 to 0x40000000000. In order to use MSI
+			 * PCI devices must be able to write to the HSTA module.
+			 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x400 0x0>;
+
+			/* This drives busses 0 to 0xf */
+			bus-range = <0x0 0xf>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &MPIC 53 0x2 /* int A */
+				0x0 0x0 0x0 0x2 &MPIC 54 0x2 /* int B */
+				0x0 0x0 0x0 0x3 &MPIC 55 0x2 /* int C */
+				0x0 0x0 0x0 0x4 &MPIC 56 0x2 /* int D */>;
+		};
+
+		PCIE2: pcie@18100000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-476fpe", "ibm,plb-pciex";
+			primary;
+			port = <0x2>; /* port number */
+			reg = <0x00000181 0x00000000 0x0 0x10000000	       /* Config space access */
+			       0x00000180 0x00000000 0x0 0x00001000>;	/* UTL Registers space access */
+			dcr-reg = <0xe0 0x20>;
+
+//                                pci_space  < pci_addr          > < cpu_addr          > < size       >
+			ranges = <0x02000000 0x00000000 0x80000000 0x00000190 0x80000000 0x0 0x80000000
+			          0x01000000 0x0        0x0        0x000001c0 0x0        0x0 0x00010000>;
+
+			/* Inbound starting at 0x0 to 0x40000000000. In order to use MSI
+			 * PCI devices must be able to write to the HSTA module.
+			 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x400 0x0>;
+
+			/* This drives busses 0 to 0xf */
+			bus-range = <0x0 0xf>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &MPIC 61 0x2 /* int A */
+				0x0 0x0 0x0 0x2 &MPIC 62 0x2 /* int B */
+				0x0 0x0 0x0 0x3 &MPIC 63 0x2 /* int C */
+				0x0 0x0 0x0 0x4 &MPIC 64 0x2 /* int D */>;
+		};
+
+		PCIE3: pcie@28100000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-476fpe", "ibm,plb-pciex";
+			primary;
+			port = <0x3>; /* port number */
+			reg = <0x00000281 0x00000000 0x0 0x10000000	       /* Config space access */
+			       0x00000280 0x00000000 0x0 0x00001000>;	/* UTL Registers space access */
+			dcr-reg = <0x120 0x20>;
+
+//                                pci_space  < pci_addr          > < cpu_addr          > < size       >
+			ranges = <0x02000000 0x00000000 0x80000000 0x00000290 0x80000000 0x0 0x80000000
+			          0x01000000 0x0        0x0        0x000002c0 0x0        0x0 0x00010000>;
+
+			/* Inbound starting at 0x0 to 0x40000000000. In order to use MSI
+			 * PCI devices must be able to write to the HSTA module.
+			 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x400 0x0>;
+
+			/* This drives busses 0 to 0xf */
+			bus-range = <0x0 0xf>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &MPIC 69 0x2 /* int A */
+				0x0 0x0 0x0 0x2 &MPIC 70 0x2 /* int B */
+				0x0 0x0 0x0 0x3 &MPIC 71 0x2 /* int C */
+				0x0 0x0 0x0 0x4 &MPIC 72 0x2 /* int D */>;
+		};
+	};
+
+	chosen {
+		stdout-path = &UART0;
+	};
+};
diff --git a/arch/powerpc/boot/dts/amigaone.dts b/arch/powerpc/boot/dts/amigaone.dts
new file mode 100644
index 0000000000..5c68db36d8
--- /dev/null
+++ b/arch/powerpc/boot/dts/amigaone.dts
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AmigaOne Device Tree Source
+ *
+ * Copyright 2008 Gerhard Pircher (gerhard_pircher@gmx.net)
+ */
+
+/dts-v1/;
+
+/ {
+	model = "AmigaOne";
+	compatible = "eyetech,amigaone";
+	coherency-off;
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	cpus {
+		#cpus = <1>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;	// 32 bytes
+			i-cache-line-size = <32>;	// 32 bytes
+			d-cache-size = <32768>;		// L1, 32K
+			i-cache-size = <32768>;		// L1, 32K
+			timebase-frequency = <0>;	// 33.3 MHz, from U-boot
+			clock-frequency = <0>;		// From U-boot
+			bus-frequency = <0>;		// From U-boot
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0 0>;				// From U-boot
+	};
+
+	pci@80000000 {
+		device_type = "pci";
+		compatible = "mai-logic,articia-s";
+		bus-frequency = <33333333>;
+		bus-range = <0 0xff>;
+		ranges = <0x01000000 0 0x00000000 0xfe000000 0 0x00c00000	// PCI I/O
+		          0x02000000 0 0x80000000 0x80000000 0 0x7d000000	// PCI memory
+		          0x02000000 0 0x00000000 0xfd000000 0 0x01000000>;	// PCI alias memory (ISA)
+		// Configuration address and data register.
+		reg = <0xfec00cf8 4
+		       0xfee00cfc 4>;
+		8259-interrupt-acknowledge = <0xfef00000>;
+		// Do not define a interrupt-parent here, if there is no
+		// interrupt-map property.
+		#address-cells = <3>;
+		#size-cells = <2>;
+
+		isa@7 {
+			device_type = "isa";
+			compatible = "pciclass,0601";
+			vendor-id = <0x00001106>;
+			device-id = <0x00000686>;
+			revision-id = <0x00000010>;
+			class-code = <0x00060100>;
+			subsystem-id = <0>;
+			subsystem-vendor-id = <0>;
+			devsel-speed = <0x00000001>;
+			min-grant = <0>;
+			max-latency = <0>;
+			/* First 4k for I/O at 0x0 on PCI mapped to 0x0 on ISA. */
+			ranges = <0x00000001 0 0x01000000 0 0x00000000 0x00001000>;
+			interrupt-parent = <&i8259>;
+			#interrupt-cells = <2>;
+			#address-cells = <2>;
+			#size-cells = <1>;
+
+			dma-controller@0 {
+				compatible = "pnpPNP,200";
+				reg = <1 0x00000000 0x00000020
+				       1 0x00000080 0x00000010
+				       1 0x000000c0 0x00000020>;
+			};
+
+			i8259: interrupt-controller@20 {
+				device_type = "interrupt-controller";
+				compatible = "pnpPNP,000";
+				interrupt-controller;
+				reg = <1 0x00000020 0x00000002
+				       1 0x000000a0 0x00000002
+				       1 0x000004d0 0x00000002>;
+				reserved-interrupts = <2>;
+				#interrupt-cells = <2>;
+			};
+
+			timer@40 {
+				// Also adds pcspkr to platform devices.
+				compatible = "pnpPNP,100";
+				reg = <1 0x00000040 0x00000020>;
+			};
+
+			8042@60 {
+				device_type = "8042";
+				reg = <1 0x00000060 0x00000001
+				       1 0x00000064 0x00000001>;
+				interrupts = <1 3 12 3>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				keyboard@0 {
+					compatible = "pnpPNP,303";
+					reg = <0>;
+				};
+
+				mouse@1 {
+					compatible = "pnpPNP,f03";
+					reg = <1>;
+				};
+			};
+
+			rtc@70 {
+				compatible = "pnpPNP,b00";
+				reg = <1 0x00000070 0x00000002>;
+				interrupts = <8 3>;
+			};
+
+			serial@3f8 {
+				device_type = "serial";
+				compatible = "pnpPNP,501","pnpPNP,500";
+				reg = <1 0x000003f8 0x00000008>;
+				interrupts = <4 3>;
+				clock-frequency = <1843200>;
+				current-speed = <115200>;
+			};
+
+			serial@2f8 {
+				device_type = "serial";
+				compatible = "pnpPNP,501","pnpPNP,500";
+				reg = <1 0x000002f8 0x00000008>;
+				interrupts = <3 3>;
+				clock-frequency = <1843200>;
+				current-speed = <115200>;
+			};
+
+			parallel@378 {
+				device_type = "parallel";
+				// No ECP support for now, otherwise add "pnpPNP,401".
+				compatible = "pnpPNP,400";
+				reg = <1 0x00000378 0x00000003
+				       1 0x00000778 0x00000003>;
+			};
+
+			fdc@3f0 {
+				device_type = "fdc";
+				compatible = "pnpPNP,700";
+				reg = <1 0x000003f0 0x00000008>;
+				interrupts = <6 3>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				disk@0 {
+					reg = <0>;
+				};
+			};
+		};
+	};
+
+	chosen {
+		stdout-path = "/pci@80000000/isa@7/serial@3f8";
+	};
+};
diff --git a/arch/powerpc/boot/dts/arches.dts b/arch/powerpc/boot/dts/arches.dts
new file mode 100644
index 0000000000..75a376a998
--- /dev/null
+++ b/arch/powerpc/boot/dts/arches.dts
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Device Tree Source for AMCC Arches (dual 460GT board)
+ *
+ * (C) Copyright 2008 Applied Micro Circuits Corporation
+ * Victor Gallardo <vgallardo@amcc.com>
+ * Adam Graham <agraham@amcc.com>
+ *
+ * Based on the glacier.dts file
+ *   Stefan Roese <sr@denx.de>
+ *   Copyright 2008 DENX Software Engineering
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "amcc,arches";
+	compatible = "amcc,arches";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		ethernet1 = &EMAC1;
+		ethernet2 = &EMAC2;
+		serial0 = &UART0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,460GT";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+			timebase-frequency = <0>; /* Filled in by U-Boot */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+			next-level-cache = <&L2C0>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by U-Boot */
+	};
+
+	UIC0: interrupt-controller0 {
+		compatible = "ibm,uic-460gt","ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-460gt","ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC2: interrupt-controller2 {
+		compatible = "ibm,uic-460gt","ibm,uic";
+		interrupt-controller;
+		cell-index = <2>;
+		dcr-reg = <0x0e0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0xa 0x4 0xb 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC3: interrupt-controller3 {
+		compatible = "ibm,uic-460gt","ibm,uic";
+		interrupt-controller;
+		cell-index = <3>;
+		dcr-reg = <0x0f0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x10 0x4 0x11 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	SDR0: sdr {
+		compatible = "ibm,sdr-460gt";
+		dcr-reg = <0x00e 0x002>;
+	};
+
+	CPR0: cpr {
+		compatible = "ibm,cpr-460gt";
+		dcr-reg = <0x00c 0x002>;
+	};
+
+	L2C0: l2c {
+		compatible = "ibm,l2-cache-460gt", "ibm,l2-cache";
+		dcr-reg = <0x020 0x008		/* Internal SRAM DCR's */
+			   0x030 0x008>;	/* L2 cache DCR's */
+		cache-line-size = <32>;		/* 32 bytes */
+		cache-size = <262144>;		/* L2, 256K */
+		interrupt-parent = <&UIC1>;
+		interrupts = <11 1>;
+	};
+
+	plb {
+		compatible = "ibm,plb-460gt", "ibm,plb4";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by U-Boot */
+
+		SDRAM0: sdram {
+			compatible = "ibm,sdram-460gt", "ibm,sdram-405gp";
+			dcr-reg = <0x010 0x002>;
+		};
+
+		CRYPTO: crypto@180000 {
+			compatible = "amcc,ppc460gt-crypto", "amcc,ppc4xx-crypto";
+			reg = <4 0x00180000 0x80400>;
+			interrupt-parent = <&UIC0>;
+			interrupts = <0x1d 0x4>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-460gt", "ibm,mcmal2";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <3>;
+			num-rx-chans = <24>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-parent = <&UIC2>;
+			interrupts = <	/*TXEOB*/ 0x6 0x4
+					/*RXEOB*/ 0x7 0x4
+					/*SERR*/  0x3 0x4
+					/*TXDE*/  0x4 0x4
+					/*RXDE*/  0x5 0x4>;
+			desc-base-addr-high = <0x8>;
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb-460gt", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0xb0000000 0x00000004 0xb0000000 0x50000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-460gt", "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				/* ranges property is supplied by U-Boot */
+				interrupts = <0x6 0x4>;
+				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "amd,s29gl256n", "cfi-flash";
+					bank-width = <2>;
+					reg = <0x00000000 0x00000000 0x02000000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "kernel";
+						reg = <0x00000000 0x001e0000>;
+					};
+					partition@1e0000 {
+						label = "dtb";
+						reg = <0x001e0000 0x00020000>;
+					};
+					partition@200000 {
+						label = "root";
+						reg = <0x00200000 0x00200000>;
+					};
+					partition@400000 {
+						label = "user";
+						reg = <0x00400000 0x01b60000>;
+					};
+					partition@1f60000 {
+						label = "env";
+						reg = <0x01f60000 0x00040000>;
+					};
+					partition@1fa0000 {
+						label = "u-boot";
+						reg = <0x01fa0000 0x00060000>;
+					};
+				};
+			};
+
+			UART0: serial@ef600300 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600300 0x00000008>;
+				virtual-reg = <0xef600300>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>; /* Filled in by U-Boot */
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x1 0x4>;
+			};
+
+			IIC0: i2c@ef600700 {
+				compatible = "ibm,iic-460gt", "ibm,iic";
+				reg = <0xef600700 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				sttm@4a {
+					compatible = "ad,ad7414";
+					reg = <0x4a>;
+					interrupt-parent = <&UIC1>;
+					interrupts = <0x0 0x8>;
+				};
+			};
+
+			IIC1: i2c@ef600800 {
+				compatible = "ibm,iic-460gt", "ibm,iic";
+				reg = <0xef600800 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x3 0x4>;
+			};
+
+			TAH0: emac-tah@ef601350 {
+				compatible = "ibm,tah-460gt", "ibm,tah";
+				reg = <0xef601350 0x00000030>;
+			};
+
+			TAH1: emac-tah@ef601450 {
+				compatible = "ibm,tah-460gt", "ibm,tah";
+				reg = <0xef601450 0x00000030>;
+			};
+
+			EMAC0: ethernet@ef600e00 {
+				device_type = "network";
+				compatible = "ibm,emac-460gt", "ibm,emac4sync";
+				interrupt-parent = <&EMAC0>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC2 0x10 0x4
+						 /*Wake*/   0x1 &UIC2 0x14 0x4>;
+				reg = <0xef600e00 0x000000c4>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				phy-mode = "sgmii";
+				phy-map = <0xffffffff>;
+				gpcs-address = <0x0000000a>;
+				tah-device = <&TAH0>;
+				tah-channel = <0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+
+			EMAC1: ethernet@ef600f00 {
+				device_type = "network";
+				compatible = "ibm,emac-460gt", "ibm,emac4sync";
+				interrupt-parent = <&EMAC1>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC2 0x11 0x4
+						 /*Wake*/   0x1 &UIC2 0x15 0x4>;
+				reg = <0xef600f00 0x000000c4>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <1>;
+				mal-rx-channel = <8>;
+				cell-index = <1>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				phy-mode = "sgmii";
+				phy-map = <0x00000000>;
+				gpcs-address = <0x0000000b>;
+				tah-device = <&TAH1>;
+				tah-channel = <1>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+				mdio-device = <&EMAC0>;
+			};
+
+			EMAC2: ethernet@ef601100 {
+				device_type = "network";
+				compatible = "ibm,emac-460gt", "ibm,emac4sync";
+				interrupt-parent = <&EMAC2>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC2 0x12 0x4
+						 /*Wake*/   0x1 &UIC2 0x16 0x4>;
+				reg = <0xef601100 0x000000c4>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <2>;
+				mal-rx-channel = <16>;
+				cell-index = <2>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>; /* emac2&3 only */
+				phy-mode = "sgmii";
+				phy-map = <0x00000001>;
+				gpcs-address = <0x0000000C>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+				mdio-device = <&EMAC0>;
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/asp834x-redboot.dts b/arch/powerpc/boot/dts/asp834x-redboot.dts
new file mode 100644
index 0000000000..52a84561c4
--- /dev/null
+++ b/arch/powerpc/boot/dts/asp834x-redboot.dts
@@ -0,0 +1,306 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Analogue & Micro ASP8347 Device Tree Source
+ *
+ * Copyright 2008 Codehermit
+ */
+
+/dts-v1/;
+
+/ {
+	model = "Analogue & Micro ASP8347E";
+	compatible = "analogue-and-micro,asp8347e";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8347@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <32768>;
+			i-cache-size = <32768>;
+			timebase-frequency = <0>;	// from bootloader
+			bus-frequency = <0>;		// from bootloader
+			clock-frequency = <0>;		// from bootloader
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x8000000>;	// 128MB at 0
+	};
+
+	localbus@ff005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8347e-localbus",
+			     "fsl,pq2pro-localbus",
+			     "simple-bus";
+		reg = <0xff005000 0x1000>;
+		interrupts = <77 0x8>;
+		interrupt-parent = <&ipic>;
+
+		ranges = <
+			0 0 0xf0000000 0x02000000
+		>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x02000000>;
+			bank-width = <2>;
+			device-width = <2>;
+		};
+	};
+
+	soc8349@ff000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		ranges = <0x0 0xff000000 0x00100000>;
+		reg = <0xff000000 0x00000200>;
+		bus-frequency = <0>;
+
+		wdt@200 {
+			device_type = "watchdog";
+			compatible = "mpc83xx_wdt";
+			reg = <0x200 0x100>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <14 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+
+			rtc@68 {
+				compatible = "dallas,ds1374";
+				reg = <0x68>;
+			};
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <15 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+		};
+
+		spi@7000 {
+			cell-index = <0>;
+			compatible = "fsl,spi";
+			reg = <0x7000 0x1000>;
+			interrupts = <16 0x8>;
+			interrupt-parent = <&ipic>;
+			mode = "cpu";
+		};
+
+		dma@82a8 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8347-dma", "fsl,elo-dma";
+			reg = <0x82a8 4>;
+			ranges = <0 0x8100 0x1a8>;
+			interrupt-parent = <&ipic>;
+			interrupts = <71 8>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8347-dma-channel", "fsl,elo-dma-channel";
+				reg = <0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8347-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8347-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8347-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x180 0x28>;
+				cell-index = <3>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+		};
+
+		/* phy type (ULPI or SERIAL) are only types supported for MPH */
+		/* port = 0 or 1 */
+		usb@22000 {
+			compatible = "fsl-usb2-mph";
+			reg = <0x22000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupt-parent = <&ipic>;
+			interrupts = <39 0x8>;
+			phy_type = "ulpi";
+			port0;
+		};
+		/* phy type (ULPI, UTMI, UTMI_WIDE, SERIAL) */
+		usb@23000 {
+			compatible = "fsl-usb2-dr";
+			reg = <0x23000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupt-parent = <&ipic>;
+			interrupts = <38 0x8>;
+			dr_mode = "otg";
+			phy_type = "ulpi";
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 08 e5 11 32 33 ];
+			interrupts = <32 0x8 33 0x8 34 0x8>;
+			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy0>;
+			linux,network-index = <0>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy0: ethernet-phy@0 {
+					interrupt-parent = <&ipic>;
+					interrupts = <17 0x8>;
+					reg = <0x1>;
+				};
+
+				phy1: ethernet-phy@1 {
+					interrupt-parent = <&ipic>;
+					interrupts = <18 0x8>;
+					reg = <0x2>;
+				};
+
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 08 e5 11 32 34 ];
+			interrupts = <35 0x8 36 0x8 37 0x8>;
+			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi1>;
+			phy-handle = <&phy1>;
+			linux,network-index = <1>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <400000000>;
+			interrupts = <9 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <400000000>;
+			interrupts = <10 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		/* May need to remove if on a part without crypto engine */
+		crypto@30000 {
+			device_type = "crypto";
+			model = "SEC2";
+			compatible = "talitos";
+			reg = <0x30000 0x10000>;
+			interrupts = <11 0x8>;
+			interrupt-parent = <&ipic>;
+			num-channels = <4>;
+			channel-fifo-len = <24>;
+			exec-units-mask = <0x0000007e>;
+			/* desc mask is for rev2.0,
+			 * we need runtime fixup for >2.0 */
+			descriptor-types-mask = <0x01010ebf>;
+		};
+
+		/* IPIC
+		 * interrupts cell = <intr #, sense>
+		 * sense values match linux IORESOURCE_IRQ_* defines:
+		 * sense == 8: Level, low assertion
+		 * sense == 2: Edge, high-to-low change
+		 */
+		ipic: pic@700 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x700 0x100>;
+			device_type = "ipic";
+		};
+	};
+
+	chosen {
+		bootargs = "console=ttyS0,38400 root=/dev/mtdblock3 rootfstype=jffs2";
+		stdout-path = &serial0;
+	};
+
+};
diff --git a/arch/powerpc/boot/dts/bamboo.dts b/arch/powerpc/boot/dts/bamboo.dts
new file mode 100644
index 0000000000..b5861fa383
--- /dev/null
+++ b/arch/powerpc/boot/dts/bamboo.dts
@@ -0,0 +1,302 @@
+/*
+ * Device Tree Source for AMCC Bamboo
+ *
+ * Copyright (c) 2006, 2007 IBM Corp.
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ *
+ * FIXME: Draft only!
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "amcc,bamboo";
+	compatible = "amcc,bamboo";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		ethernet1 = &EMAC1;
+		serial0 = &UART0;
+		serial1 = &UART1;
+		serial2 = &UART2;
+		serial3 = &UART3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,440EP";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by zImage */
+			timebase-frequency = <0>; /* Filled in by zImage */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by zImage */
+	};
+
+	UIC0: interrupt-controller0 {
+		compatible = "ibm,uic-440ep","ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-440ep","ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	SDR0: sdr {
+		compatible = "ibm,sdr-440ep";
+		dcr-reg = <0x00e 0x002>;
+	};
+
+	CPR0: cpr {
+		compatible = "ibm,cpr-440ep";
+		dcr-reg = <0x00c 0x002>;
+	};
+
+	plb {
+		compatible = "ibm,plb-440ep", "ibm,plb-440gp", "ibm,plb4";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by zImage */
+
+		SDRAM0: sdram {
+			compatible = "ibm,sdram-440ep", "ibm,sdram-405gp";
+			dcr-reg = <0x010 0x002>;
+		};
+
+		DMA0: dma {
+			compatible = "ibm,dma-440ep", "ibm,dma-440gp";
+			dcr-reg = <0x100 0x027>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-440ep", "ibm,mcmal-440gp", "ibm,mcmal";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <4>;
+			num-rx-chans = <2>;
+			interrupt-parent = <&MAL0>;
+			interrupts = <0x0 0x1 0x2 0x3 0x4>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = </*TXEOB*/ 0x0 &UIC0 0xa 0x4
+					/*RXEOB*/ 0x1 &UIC0 0xb 0x4
+					/*SERR*/  0x2 &UIC1 0x0 0x4
+					/*TXDE*/  0x3 &UIC1 0x1 0x4
+					/*RXDE*/  0x4 &UIC1 0x2 0x4>;
+		};
+
+		POB0: opb {
+		  	compatible = "ibm,opb-440ep", "ibm,opb-440gp", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			/* Bamboo is oddball in the 44x world and doesn't use the ERPN
+			 * bits.
+			 */
+		  	ranges = <0x00000000 0x00000000 0x00000000 0x80000000
+			          0x80000000 0x00000000 0x80000000 0x80000000>;
+		  	interrupt-parent = <&UIC1>;
+		  	interrupts = <0x7 0x4>;
+		  	clock-frequency = <0>; /* Filled in by zImage */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-440ep", "ibm,ebc-440gp", "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by zImage */
+				interrupts = <0x5 0x1>;
+				interrupt-parent = <&UIC1>;
+			};
+
+			UART0: serial@ef600300 {
+		   		device_type = "serial";
+		   		compatible = "ns16550";
+		   		reg = <0xef600300 0x00000008>;
+		   		virtual-reg = <0xef600300>;
+		   		clock-frequency = <0>; /* Filled in by zImage */
+		   		current-speed = <115200>;
+		   		interrupt-parent = <&UIC0>;
+		   		interrupts = <0x0 0x4>;
+	   		};
+
+			UART1: serial@ef600400 {
+		   		device_type = "serial";
+		   		compatible = "ns16550";
+		   		reg = <0xef600400 0x00000008>;
+		   		virtual-reg = <0xef600400>;
+		   		clock-frequency = <0>;
+		   		current-speed = <0>;
+		   		interrupt-parent = <&UIC0>;
+		   		interrupts = <0x1 0x4>;
+	   		};
+
+			UART2: serial@ef600500 {
+		   		device_type = "serial";
+		   		compatible = "ns16550";
+		   		reg = <0xef600500 0x00000008>;
+		   		virtual-reg = <0xef600500>;
+		   		clock-frequency = <0>;
+		   		current-speed = <0>;
+		   		interrupt-parent = <&UIC0>;
+		   		interrupts = <0x3 0x4>;
+	   		};
+
+			UART3: serial@ef600600 {
+		   		device_type = "serial";
+		   		compatible = "ns16550";
+		   		reg = <0xef600600 0x00000008>;
+		   		virtual-reg = <0xef600600>;
+		   		clock-frequency = <0>;
+		   		current-speed = <0>;
+		   		interrupt-parent = <&UIC0>;
+		   		interrupts = <0x4 0x4>;
+	   		};
+
+			IIC0: i2c@ef600700 {
+				compatible = "ibm,iic-440ep", "ibm,iic-440gp", "ibm,iic";
+				reg = <0xef600700 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+			};
+
+			IIC1: i2c@ef600800 {
+				compatible = "ibm,iic-440ep", "ibm,iic-440gp", "ibm,iic";
+				reg = <0xef600800 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x7 0x4>;
+			};
+
+			ZMII0: emac-zmii@ef600d00 {
+				compatible = "ibm,zmii-440ep", "ibm,zmii-440gp", "ibm,zmii";
+				reg = <0xef600d00 0x0000000c>;
+			};
+
+			EMAC0: ethernet@ef600e00 {
+				device_type = "network";
+				compatible = "ibm,emac-440ep", "ibm,emac-440gp", "ibm,emac";
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x1c 0x4 0x1d 0x4>;
+				reg = <0xef600e00 0x00000070>;
+				local-mac-address = [000000000000];
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0 1>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <1500>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rmii";
+				phy-map = <0x00000000>;
+				zmii-device = <&ZMII0>;
+				zmii-channel = <0>;
+			};
+
+			EMAC1: ethernet@ef600f00 {
+				device_type = "network";
+				compatible = "ibm,emac-440ep", "ibm,emac-440gp", "ibm,emac";
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x1e 0x4 0x1f 0x4>;
+				reg = <0xef600f00 0x00000070>;
+				local-mac-address = [000000000000];
+				mal-device = <&MAL0>;
+				mal-tx-channel = <2 3>;
+				mal-rx-channel = <1>;
+				cell-index = <1>;
+				max-frame-size = <1500>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rmii";
+				phy-map = <0x00000000>;
+				zmii-device = <&ZMII0>;
+				zmii-channel = <1>;
+			};
+
+			usb@ef601000 {
+				compatible = "ohci-be";
+				reg = <0xef601000 0x00000080>;
+				interrupts = <0x8 0x1 0x9 0x1>;
+				interrupt-parent = < &UIC1 >;
+			};
+		};
+
+		PCI0: pci@ec000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb440ep-pci", "ibm,plb-pci";
+			primary;
+			reg = <0x00000000 0xeec00000 0x00000008	/* Config space access */
+			       0x00000000 0xeed00000 0x00000004	/* IACK */
+			       0x00000000 0xeed00000 0x00000004	/* Special cycle */
+			       0x00000000 0xef400000 0x00000040>;	/* Internal registers */
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed. Chip supports a second
+			 * IO range but we don't use it for now
+			 * The chip also supports a larger memory range but
+			 * it's not naturally aligned, so our code will break
+			 */
+			ranges = <0x02000000 0x00000000 0xa0000000 0x00000000 0xa0000000 0x00000000 0x20000000
+				  0x02000000 0x00000000 0x00000000 0x00000000 0xe0000000 0x00000000 0x00100000
+				  0x01000000 0x00000000 0x00000000 0x00000000 0xe8000000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* Bamboo has all 4 IRQ pins tied together per slot */
+			interrupt-map-mask = <0xf800 0x0 0x0 0x0>;
+			interrupt-map = <
+				/* IDSEL 1 */
+				0x800 0x0 0x0 0x0 &UIC0 0x1c 0x8
+
+				/* IDSEL 2 */
+				0x1000 0x0 0x0 0x0 &UIC0 0x1b 0x8
+
+				/* IDSEL 3 */
+				0x1800 0x0 0x0 0x0 &UIC0 0x1a 0x8
+
+				/* IDSEL 4 */
+				0x2000 0x0 0x0 0x0 &UIC0 0x19 0x8
+			>;
+		};
+	};
+
+	chosen {
+		stdout-path = "/plb/opb/serial@ef600300";
+	};
+};
diff --git a/arch/powerpc/boot/dts/bluestone.dts b/arch/powerpc/boot/dts/bluestone.dts
new file mode 100644
index 0000000000..6971595319
--- /dev/null
+++ b/arch/powerpc/boot/dts/bluestone.dts
@@ -0,0 +1,370 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Device Tree for Bluestone (APM821xx) board.
+ *
+ * Copyright (c) 2010, Applied Micro Circuits Corporation
+ * Author: Tirumala R Marri <tmarri@apm.com>
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "apm,bluestone";
+	compatible = "apm,bluestone";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		serial0 = &UART0;
+		serial1 = &UART1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,apm821xx";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+			timebase-frequency = <0>; /* Filled in by U-Boot */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+			next-level-cache = <&L2C0>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by U-Boot */
+	};
+
+	UIC0: interrupt-controller0 {
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC2: interrupt-controller2 {
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <2>;
+		dcr-reg = <0x0e0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0xa 0x4 0xb 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC3: interrupt-controller3 {
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <3>;
+		dcr-reg = <0x0f0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x10 0x4 0x11 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	OCM: ocm@400040000 {
+		compatible = "ibm,ocm";
+		status = "okay";
+		cell-index = <1>;
+		/* configured in U-Boot */
+		reg = <4 0x00040000 0x8000>; /* 32K */
+	};
+
+	SDR0: sdr {
+		compatible = "ibm,sdr-apm821xx";
+		dcr-reg = <0x00e 0x002>;
+	};
+
+	CPR0: cpr {
+		compatible = "ibm,cpr-apm821xx";
+		dcr-reg = <0x00c 0x002>;
+	};
+
+	L2C0: l2c {
+		compatible = "ibm,l2-cache-apm82181", "ibm,l2-cache";
+		dcr-reg = <0x020 0x008
+			   0x030 0x008>;
+		cache-line-size = <32>;
+		cache-size = <262144>;
+		interrupt-parent = <&UIC1>;
+		interrupts = <11 1>;
+	};
+
+	plb {
+		compatible = "ibm,plb4";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by U-Boot */
+
+		SDRAM0: sdram {
+			compatible = "ibm,sdram-apm821xx";
+			dcr-reg = <0x010 0x002>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal2";
+			descriptor-memory = "ocm";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <1>;
+			num-rx-chans = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-parent = <&UIC2>;
+			interrupts = <	/*TXEOB*/ 0x6 0x4
+					/*RXEOB*/ 0x7 0x4
+					/*SERR*/  0x3 0x4
+					/*TXDE*/  0x4 0x4
+					/*RXDE*/  0x5 0x4>;
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0xb0000000 0x00000004 0xb0000000 0x50000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				/* ranges property is supplied by U-Boot */
+				ranges = < 0x00000003 0x00000000 0xe0000000 0x8000000>;
+				interrupts = <0x6 0x4>;
+				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "amd,s29gl512n", "cfi-flash";
+					bank-width = <2>;
+					reg = <0x00000000 0x00000000 0x00400000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "kernel";
+						reg = <0x00000000 0x00180000>;
+					};
+					partition@180000 {
+						label = "env";
+						reg = <0x00180000 0x00020000>;
+					};
+					partition@1a0000 {
+						label = "u-boot";
+						reg = <0x001a0000 0x00060000>;
+					};
+				};
+
+				ndfc@1,0 {
+					compatible = "ibm,ndfc";
+					reg = <0x00000003 0x00000000 0x00002000>;
+					ccr = <0x00001000>;
+					bank-settings = <0x80002222>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					/* 2Gb Nand Flash */
+					nand {
+						#address-cells = <1>;
+						#size-cells = <1>;
+
+						partition@0 {
+							label = "firmware";
+							reg   = <0x00000000 0x00C00000>;
+						};
+						partition@c00000 {
+							label = "environment";
+							reg   = <0x00C00000 0x00B00000>;
+						};
+						partition@1700000 {
+							label = "kernel";
+							reg   = <0x01700000 0x00E00000>;
+						};
+						partition@2500000 {
+							label = "root";
+							reg   = <0x02500000 0x08200000>;
+						};
+						partition@a700000 {
+							label = "device-tree";
+							reg   = <0x0A700000 0x00B00000>;
+						};
+						partition@b200000 {
+							label = "config";
+							reg   = <0x0B200000 0x00D00000>;
+						};
+						partition@bf00000 {
+							label = "diag";
+							reg   = <0x0BF00000 0x00C00000>;
+						};
+						partition@cb00000 {
+							label = "vendor";
+							reg   = <0x0CB00000 0x3500000>;
+						};
+					};
+				};
+			};
+
+			UART0: serial@ef600300 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600300 0x00000008>;
+				virtual-reg = <0xef600300>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>; /* Filled in by U-Boot */
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x1 0x4>;
+			};
+
+			UART1: serial@ef600400 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600400 0x00000008>;
+				virtual-reg = <0xef600400>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>; /* Filled in by U-Boot */
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1 0x4>;
+			};
+
+			IIC0: i2c@ef600700 {
+				compatible = "ibm,iic";
+				reg = <0xef600700 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				rtc@68 {
+					compatible = "st,m41t80";
+					reg = <0x68>;
+					interrupt-parent = <&UIC0>;
+					interrupts = <0x9 0x8>;
+				};
+				sttm@4C {
+					compatible = "adm,adm1032";
+					reg = <0x4C>;
+					interrupt-parent = <&UIC1>;
+					interrupts = <0x1E 0x8>; /* CPU_THERNAL_L */
+				};
+			};
+
+			IIC1: i2c@ef600800 {
+				compatible = "ibm,iic";
+				reg = <0xef600800 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x3 0x4>;
+			};
+
+			RGMII0: emac-rgmii@ef601500 {
+				compatible = "ibm,rgmii";
+				reg = <0xef601500 0x00000008>;
+				has-mdio;
+			};
+
+			TAH0: emac-tah@ef601350 {
+				compatible = "ibm,tah";
+				reg = <0xef601350 0x00000030>;
+			};
+
+			EMAC0: ethernet@ef600c00 {
+				device_type = "network";
+				compatible = "ibm,emac-apm821xx", "ibm,emac4sync";
+				interrupt-parent = <&EMAC0>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC2 0x10 0x4
+						 /*Wake*/   0x1 &UIC2 0x14 0x4>;
+				reg = <0xef600c00 0x000000c4>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <16384>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <0>;
+				tah-device = <&TAH0>;
+				tah-channel = <0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+		};
+
+		PCIE0: pcie@d00000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-apm821xx", "ibm,plb-pciex";
+			primary;
+			port = <0x0>; /* port number */
+			reg = <0x0000000d 0x00000000 0x20000000	/* Config space access */
+			       0x0000000c 0x08010000 0x00001000>;	/* Registers */
+			dcr-reg = <0x100 0x020>;
+			sdr-base = <0x300>;
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x0000000e 0x00000000 0x00000000 0x80000000
+				  0x02000000 0x00000000 0x00000000 0x0000000f 0x00000000 0x00000000 0x00100000
+				  0x01000000 0x00000000 0x00000000 0x0000000f 0x80000000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* This drives busses 40 to 0x7f */
+			bus-range = <0x40 0x7f>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &UIC3 0xc 0x4 /* swizzled int A */
+				0x0 0x0 0x0 0x2 &UIC3 0xd 0x4 /* swizzled int B */
+				0x0 0x0 0x0 0x3 &UIC3 0xe 0x4 /* swizzled int C */
+				0x0 0x0 0x0 0x4 &UIC3 0xf 0x4 /* swizzled int D */>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/canyonlands.dts b/arch/powerpc/boot/dts/canyonlands.dts
new file mode 100644
index 0000000000..5db1bff6b2
--- /dev/null
+++ b/arch/powerpc/boot/dts/canyonlands.dts
@@ -0,0 +1,548 @@
+/*
+ * Device Tree Source for AMCC Canyonlands (460EX)
+ *
+ * Copyright 2008-2009 DENX Software Engineering, Stefan Roese <sr@denx.de>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "amcc,canyonlands";
+	compatible = "amcc,canyonlands";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		ethernet1 = &EMAC1;
+		serial0 = &UART0;
+		serial1 = &UART1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,460EX";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+			timebase-frequency = <0>; /* Filled in by U-Boot */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+			next-level-cache = <&L2C0>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by U-Boot */
+	};
+
+	UIC0: interrupt-controller0 {
+		compatible = "ibm,uic-460ex","ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-460ex","ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC2: interrupt-controller2 {
+		compatible = "ibm,uic-460ex","ibm,uic";
+		interrupt-controller;
+		cell-index = <2>;
+		dcr-reg = <0x0e0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0xa 0x4 0xb 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC3: interrupt-controller3 {
+		compatible = "ibm,uic-460ex","ibm,uic";
+		interrupt-controller;
+		cell-index = <3>;
+		dcr-reg = <0x0f0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x10 0x4 0x11 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	SDR0: sdr {
+		compatible = "ibm,sdr-460ex";
+		dcr-reg = <0x00e 0x002>;
+	};
+
+	CPR0: cpr {
+		compatible = "ibm,cpr-460ex";
+		dcr-reg = <0x00c 0x002>;
+	};
+
+	CPM0: cpm {
+		compatible = "ibm,cpm";
+		dcr-access-method = "native";
+		dcr-reg = <0x160 0x003>;
+		unused-units = <0x00000100>;
+		idle-doze = <0x02000000>;
+		standby = <0xfeff791d>;
+	};
+
+	L2C0: l2c {
+		compatible = "ibm,l2-cache-460ex", "ibm,l2-cache";
+		dcr-reg = <0x020 0x008		/* Internal SRAM DCR's */
+			   0x030 0x008>;	/* L2 cache DCR's */
+		cache-line-size = <32>;		/* 32 bytes */
+		cache-size = <262144>;		/* L2, 256K */
+		interrupt-parent = <&UIC1>;
+		interrupts = <11 1>;
+	};
+
+	plb {
+		compatible = "ibm,plb-460ex", "ibm,plb4";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by U-Boot */
+
+		SDRAM0: sdram {
+			compatible = "ibm,sdram-460ex", "ibm,sdram-405gp";
+			dcr-reg = <0x010 0x002>;
+		};
+
+		CRYPTO: crypto@180000 {
+			compatible = "amcc,ppc460ex-crypto", "amcc,ppc4xx-crypto";
+			reg = <4 0x00180000 0x80400>;
+			interrupt-parent = <&UIC0>;
+			interrupts = <0x1d 0x4>;
+		};
+
+		HWRNG: hwrng@110000 {
+			compatible = "amcc,ppc460ex-rng", "ppc4xx-rng";
+			reg = <4 0x00110000 0x50>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-460ex", "ibm,mcmal2";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <2>;
+			num-rx-chans = <16>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-parent = <&UIC2>;
+			interrupts = <	/*TXEOB*/ 0x6 0x4
+					/*RXEOB*/ 0x7 0x4
+					/*SERR*/  0x3 0x4
+					/*TXDE*/  0x4 0x4
+					/*RXDE*/  0x5 0x4>;
+		};
+
+		USB0: ehci@bffd0400 {
+			compatible = "ibm,usb-ehci-460ex", "usb-ehci";
+			interrupt-parent = <&UIC2>;
+			interrupts = <0x1d 4>;
+			reg = <4 0xbffd0400 0x90 4 0xbffd0490 0x70>;
+		};
+
+		USB1: usb@bffd0000 {
+			compatible = "ohci-le";
+			reg = <4 0xbffd0000 0x60>;
+			interrupt-parent = <&UIC2>;
+			interrupts = <0x1e 4>;
+		};
+
+		USBOTG0: usbotg@bff80000 {
+			compatible = "amcc,dwc-otg";
+			reg = <0x4 0xbff80000 0x10000>;
+			interrupt-parent = <&USBOTG0>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupts = <0x0 0x1 0x2>;
+			interrupt-map = </* USB-OTG */ 0x0 &UIC2 0x1c 0x4
+					 /* HIGH-POWER */ 0x1 &UIC1 0x1a 0x8
+					 /* DMA */ 0x2 &UIC0 0xc 0x4>;
+		};
+
+		AHBDMA: dma@bffd0800 {
+			compatible = "snps,dma-spear1340";
+			reg = <4 0xbffd0800 0x400>;
+			interrupt-parent = <&UIC3>;
+			interrupts = <0x5 0x4>;
+			#dma-cells = <3>;
+		};
+
+		SATA0: sata@bffd1000 {
+			compatible = "amcc,sata-460ex";
+			reg = <4 0xbffd1000 0x800>;
+			interrupt-parent = <&UIC3>;
+			interrupts = <0x0 0x4>;
+			dmas = <&AHBDMA 0 1 0>;
+			dma-names = "sata-dma";
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb-460ex", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0xb0000000 0x00000004 0xb0000000 0x50000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-460ex", "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				/* ranges property is supplied by U-Boot */
+				interrupts = <0x6 0x4>;
+				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "amd,s29gl512n", "cfi-flash";
+					bank-width = <2>;
+					reg = <0x00000000 0x00000000 0x04000000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "kernel";
+						reg = <0x00000000 0x001e0000>;
+					};
+					partition@1e0000 {
+						label = "dtb";
+						reg = <0x001e0000 0x00020000>;
+					};
+					partition@200000 {
+						label = "ramdisk";
+						reg = <0x00200000 0x01400000>;
+					};
+					partition@1600000 {
+						label = "jffs2";
+						reg = <0x01600000 0x00400000>;
+					};
+					partition@1a00000 {
+						label = "user";
+						reg = <0x01a00000 0x02560000>;
+					};
+					partition@3f60000 {
+						label = "env";
+						reg = <0x03f60000 0x00040000>;
+					};
+					partition@3fa0000 {
+						label = "u-boot";
+						reg = <0x03fa0000 0x00060000>;
+					};
+				};
+
+				cpld@2,0 {
+					compatible = "amcc,ppc460ex-bcsr";
+					reg = <2 0x0 0x9>;
+				};
+
+				ndfc@3,0 {
+					compatible = "ibm,ndfc";
+					reg = <0x00000003 0x00000000 0x00002000>;
+					ccr = <0x00001000>;
+					bank-settings = <0x80002222>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+
+					nand {
+						#address-cells = <1>;
+						#size-cells = <1>;
+
+						partition@0 {
+							label = "u-boot";
+							reg = <0x00000000 0x00100000>;
+						};
+						partition@100000 {
+							label = "user";
+							reg = <0x00000000 0x03f00000>;
+						};
+					};
+				};
+			};
+
+			UART0: serial@ef600300 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600300 0x00000008>;
+				virtual-reg = <0xef600300>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>; /* Filled in by U-Boot */
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x1 0x4>;
+			};
+
+			UART1: serial@ef600400 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600400 0x00000008>;
+				virtual-reg = <0xef600400>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>; /* Filled in by U-Boot */
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1 0x4>;
+			};
+
+			IIC0: i2c@ef600700 {
+				compatible = "ibm,iic-460ex", "ibm,iic";
+				reg = <0xef600700 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+                                rtc@68 {
+                                        compatible = "st,m41t80";
+                                        reg = <0x68>;
+					interrupt-parent = <&UIC2>;
+					interrupts = <0x19 0x8>;
+                                };
+                                sttm@48 {
+                                        compatible = "ad,ad7414";
+                                        reg = <0x48>;
+					interrupt-parent = <&UIC1>;
+					interrupts = <0x14 0x8>;
+                                };
+			};
+
+			IIC1: i2c@ef600800 {
+				compatible = "ibm,iic-460ex", "ibm,iic";
+				reg = <0xef600800 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x3 0x4>;
+			};
+
+			GPIO0: gpio@ef600b00 {
+				compatible = "ibm,ppc4xx-gpio";
+				reg = <0xef600b00 0x00000048>;
+				gpio-controller;
+			};
+
+			ZMII0: emac-zmii@ef600d00 {
+				compatible = "ibm,zmii-460ex", "ibm,zmii";
+				reg = <0xef600d00 0x0000000c>;
+			};
+
+			RGMII0: emac-rgmii@ef601500 {
+				compatible = "ibm,rgmii-460ex", "ibm,rgmii";
+				reg = <0xef601500 0x00000008>;
+				has-mdio;
+			};
+
+			TAH0: emac-tah@ef601350 {
+				compatible = "ibm,tah-460ex", "ibm,tah";
+				reg = <0xef601350 0x00000030>;
+			};
+
+			TAH1: emac-tah@ef601450 {
+				compatible = "ibm,tah-460ex", "ibm,tah";
+				reg = <0xef601450 0x00000030>;
+			};
+
+			EMAC0: ethernet@ef600e00 {
+				device_type = "network";
+				compatible = "ibm,emac-460ex", "ibm,emac4sync";
+				interrupt-parent = <&EMAC0>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC2 0x10 0x4
+						 /*Wake*/   0x1 &UIC2 0x14 0x4>;
+				reg = <0xef600e00 0x000000c4>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <0>;
+				tah-device = <&TAH0>;
+				tah-channel = <0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+
+			EMAC1: ethernet@ef600f00 {
+				device_type = "network";
+				compatible = "ibm,emac-460ex", "ibm,emac4sync";
+				interrupt-parent = <&EMAC1>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC2 0x11 0x4
+						 /*Wake*/   0x1 &UIC2 0x15 0x4>;
+				reg = <0xef600f00 0x000000c4>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <1>;
+				mal-rx-channel = <8>;
+				cell-index = <1>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <1>;
+				tah-device = <&TAH1>;
+				tah-channel = <1>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+				mdio-device = <&EMAC0>;
+			};
+		};
+
+		PCIX0: pci@c0ec00000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pcix-460ex", "ibm,plb-pcix";
+			primary;
+			large-inbound-windows;
+			enable-msi-hole;
+			reg = <0x0000000c 0x0ec00000   0x00000008	/* Config space access */
+			       0x00000000 0x00000000 0x00000000		/* no IACK cycles */
+			       0x0000000c 0x0ed00000   0x00000004   /* Special cycles */
+			       0x0000000c 0x0ec80000 0x00000100	/* Internal registers */
+			       0x0000000c 0x0ec80100  0x000000fc>;	/* Internal messaging registers */
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x0000000d 0x80000000 0x00000000 0x80000000
+				  0x02000000 0x00000000 0x00000000 0x0000000c 0x0ee00000 0x00000000 0x00100000
+				  0x01000000 0x00000000 0x00000000 0x0000000c 0x08000000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* This drives busses 0 to 0x3f */
+			bus-range = <0x0 0x3f>;
+
+			/* All PCI interrupts are routed to ext IRQ 2 -> UIC1-0 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+			interrupt-map = < 0x0 0x0 0x0 0x0 &UIC1 0x0 0x8 >;
+		};
+
+		PCIE0: pcie@d00000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-460ex", "ibm,plb-pciex";
+			primary;
+			port = <0x0>; /* port number */
+			reg = <0x0000000d 0x00000000 0x20000000	/* Config space access */
+			       0x0000000c 0x08010000 0x00001000>;	/* Registers */
+			dcr-reg = <0x100 0x020>;
+			sdr-base = <0x300>;
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x0000000e 0x00000000 0x00000000 0x80000000
+				  0x02000000 0x00000000 0x00000000 0x0000000f 0x00000000 0x00000000 0x00100000
+				  0x01000000 0x00000000 0x00000000 0x0000000f 0x80000000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* This drives busses 40 to 0x7f */
+			bus-range = <0x40 0x7f>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &UIC3 0xc 0x4 /* swizzled int A */
+				0x0 0x0 0x0 0x2 &UIC3 0xd 0x4 /* swizzled int B */
+				0x0 0x0 0x0 0x3 &UIC3 0xe 0x4 /* swizzled int C */
+				0x0 0x0 0x0 0x4 &UIC3 0xf 0x4 /* swizzled int D */>;
+		};
+
+		PCIE1: pcie@d20000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-460ex", "ibm,plb-pciex";
+			primary;
+			port = <0x1>; /* port number */
+			reg = <0x0000000d 0x20000000 0x20000000	/* Config space access */
+			       0x0000000c 0x08011000 0x00001000>;	/* Registers */
+			dcr-reg = <0x120 0x020>;
+			sdr-base = <0x340>;
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x0000000e 0x80000000 0x00000000 0x80000000
+				  0x02000000 0x00000000 0x00000000 0x0000000f 0x00100000 0x00000000 0x00100000
+				  0x01000000 0x00000000 0x00000000 0x0000000f 0x80010000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* This drives busses 80 to 0xbf */
+			bus-range = <0x80 0xbf>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &UIC3 0x10 0x4 /* swizzled int A */
+				0x0 0x0 0x0 0x2 &UIC3 0x11 0x4 /* swizzled int B */
+				0x0 0x0 0x0 0x3 &UIC3 0x12 0x4 /* swizzled int C */
+				0x0 0x0 0x0 0x4 &UIC3 0x13 0x4 /* swizzled int D */>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/charon.dts b/arch/powerpc/boot/dts/charon.dts
new file mode 100644
index 0000000000..ea6e76ae25
--- /dev/null
+++ b/arch/powerpc/boot/dts/charon.dts
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * charon board Device Tree Source
+ *
+ * Copyright (C) 2007 Semihalf
+ * Marian Balakowicz <m8@semihalf.com>
+ *
+ * Copyright (C) 2010 DENX Software Engineering GmbH
+ * Heiko Schocher <hs@denx.de>
+ */
+
+/dts-v1/;
+
+/ {
+	model = "anon,charon";
+	compatible = "anon,charon";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&mpc5200_pic>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,5200@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <0x4000>;	// L1, 16K
+			i-cache-size = <0x4000>;	// L1, 16K
+			timebase-frequency = <0>;	// from bootloader
+			bus-frequency = <0>;		// from bootloader
+			clock-frequency = <0>;		// from bootloader
+		};
+	};
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x08000000>;	// 128MB
+	};
+
+	soc5200@f0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc5200-immr";
+		ranges = <0 0xf0000000 0x0000c000>;
+		reg = <0xf0000000 0x00000100>;
+		bus-frequency = <0>;		// from bootloader
+		system-frequency = <0>;		// from bootloader
+
+		cdm@200 {
+			compatible = "fsl,mpc5200-cdm";
+			reg = <0x200 0x38>;
+		};
+
+		mpc5200_pic: interrupt-controller@500 {
+			// 5200 interrupts are encoded into two levels;
+			interrupt-controller;
+			#interrupt-cells = <3>;
+			compatible = "fsl,mpc5200-pic";
+			reg = <0x500 0x80>;
+		};
+
+		timer@600 {	// General Purpose Timer
+			compatible = "fsl,mpc5200-gpt";
+			reg = <0x600 0x10>;
+			interrupts = <1 9 0>;
+			fsl,has-wdt;
+		};
+
+		can@900 {
+			compatible = "fsl,mpc5200-mscan";
+			interrupts = <2 17 0>;
+			reg = <0x900 0x80>;
+		};
+
+		can@980 {
+			compatible = "fsl,mpc5200-mscan";
+			interrupts = <2 18 0>;
+			reg = <0x980 0x80>;
+		};
+
+		gpio_simple: gpio@b00 {
+			compatible = "fsl,mpc5200-gpio";
+			reg = <0xb00 0x40>;
+			interrupts = <1 7 0>;
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
+
+		usb@1000 {
+			compatible = "fsl,mpc5200-ohci","ohci-be";
+			reg = <0x1000 0xff>;
+			interrupts = <2 6 0>;
+		};
+
+		dma-controller@1200 {
+			device_type = "dma-controller";
+			compatible = "fsl,mpc5200-bestcomm";
+			reg = <0x1200 0x80>;
+			interrupts = <3 0 0  3 1 0  3 2 0  3 3 0
+			              3 4 0  3 5 0  3 6 0  3 7 0
+			              3 8 0  3 9 0  3 10 0  3 11 0
+			              3 12 0  3 13 0  3 14 0  3 15 0>;
+		};
+
+		xlb@1f00 {
+			compatible = "fsl,mpc5200-xlb";
+			reg = <0x1f00 0x100>;
+		};
+
+		serial@2000 {		// PSC1
+			compatible = "fsl,mpc5200-psc-uart";
+			reg = <0x2000 0x100>;
+			interrupts = <2 1 0>;
+		};
+
+		serial@2400 {		// PSC3
+			compatible = "fsl,mpc5200-psc-uart";
+			reg = <0x2400 0x100>;
+			interrupts = <2 3 0>;
+		};
+
+		ethernet@3000 {
+			compatible = "fsl,mpc5200-fec";
+			reg = <0x3000 0x400>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <2 5 0>;
+			fixed-link = <1 1 100 0 0>;
+		};
+
+		mdio@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5200-mdio";
+			reg = <0x3000 0x400>;       // fec range, since we need to setup fec interrupts
+			interrupts = <2 5 0>;   // these are for "mii command finished", not link changes & co.
+		};
+
+		ata@3a00 {
+			compatible = "fsl,mpc5200-ata";
+			reg = <0x3a00 0x100>;
+			interrupts = <2 7 0>;
+		};
+
+		i2c@3d00 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5200-i2c","fsl-i2c";
+			reg = <0x3d00 0x40>;
+			interrupts = <2 15 0>;
+		};
+
+
+		i2c@3d40 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5200-i2c","fsl-i2c";
+			reg = <0x3d40 0x40>;
+			interrupts = <2 16 0>;
+
+			dtt@28 {
+				compatible = "national,lm80";
+				reg = <0x28>;
+			};
+
+			rtc@68 {
+				compatible = "dallas,ds1374";
+				reg = <0x68>;
+			};
+		};
+
+		sram@8000 {
+			compatible = "fsl,mpc5200-sram";
+			reg = <0x8000 0x4000>;
+		};
+	};
+
+	localbus {
+		compatible = "fsl,mpc5200-lpb","simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <	0 0 0xfc000000 0x02000000
+				1 0 0xe0000000 0x04000000 // CS1 range, SM501
+				3 0 0xe8000000 0x00080000>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x02000000>;
+			bank-width = <4>;
+			device-width = <2>;
+			#size-cells = <1>;
+			#address-cells = <1>;
+		};
+
+		display@1,0 {
+			compatible = "smi,sm501";
+			reg = <1 0x00000000 0x00800000
+			       1 0x03e00000 0x00200000>;
+			mode = "640x480-32@60";
+			interrupts = <1 1 3>;
+			little-endian;
+		};
+
+		mram0@3,0 {
+			compatible = "mtd-ram";
+			reg = <3 0x00000 0x80000>;
+			bank-width = <1>;
+		};
+	};
+
+	pci@f0000d00 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		compatible = "fsl,mpc5200-pci";
+		reg = <0xf0000d00 0x100>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0xc000 0 0 1 &mpc5200_pic 0 0 3
+				 0xc000 0 0 2 &mpc5200_pic 0 0 3
+				 0xc000 0 0 3 &mpc5200_pic 0 0 3
+				 0xc000 0 0 4 &mpc5200_pic 0 0 3>;
+		clock-frequency = <0>; // From boot loader
+		interrupts = <2 8 0 2 9 0 2 10 0>;
+		bus-range = <0 0>;
+		ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000>,
+			 <0x02000000 0 0x90000000 0x90000000 0 0x10000000>,
+			 <0x01000000 0 0x00000000 0xa0000000 0 0x01000000>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/cm5200.dts b/arch/powerpc/boot/dts/cm5200.dts
new file mode 100644
index 0000000000..66cae7be60
--- /dev/null
+++ b/arch/powerpc/boot/dts/cm5200.dts
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * CM5200 board Device Tree Source
+ *
+ * Copyright (C) 2007 Semihalf
+ * Marian Balakowicz <m8@semihalf.com>
+ */
+
+/include/ "mpc5200b.dtsi"
+
+&gpt0 { fsl,has-wdt; };
+
+/ {
+	model = "schindler,cm5200";
+	compatible = "schindler,cm5200";
+
+	soc5200@f0000000 {
+		can@900 {
+			status = "disabled";
+		};
+
+		can@980 {
+			status = "disabled";
+		};
+
+		psc@2000 {		// PSC1
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		psc@2200 {		// PSC2
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		psc@2400 {		// PSC3
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		psc@2600 {		// PSC4
+			status = "disabled";
+		};
+
+		psc@2800 {		// PSC5
+			status = "disabled";
+		};
+
+		psc@2c00 {		// PSC6
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		ethernet@3000 {
+			phy-handle = <&phy0>;
+		};
+
+		mdio@3000 {
+			phy0: ethernet-phy@0 {
+				reg = <0>;
+			};
+		};
+
+		ata@3a00 {
+			status = "disabled";
+		};
+
+		i2c@3d00 {
+			status = "disabled";
+		};
+
+	};
+
+	pci@f0000d00 {
+		status = "disabled";
+	};
+
+	localbus {
+		// 16-bit flash device at LocalPlus Bus CS0
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x2000000>;
+			bank-width = <2>;
+			device-width = <2>;
+			#size-cells = <1>;
+			#address-cells = <1>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts
new file mode 100644
index 0000000000..aea8af8101
--- /dev/null
+++ b/arch/powerpc/boot/dts/currituck.dts
@@ -0,0 +1,242 @@
+/*
+ * Device Tree Source for IBM Embedded PPC 476 Platform
+ *
+ * Copyright © 2011 Tony Breeds IBM Corporation
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/memreserve/ 0x01f00000 0x00100000;	// spin table
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <2>;
+	model = "ibm,currituck";
+	compatible = "ibm,currituck";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		serial0 = &UART0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,476";
+			reg = <0>;
+			clock-frequency = <1600000000>; // 1.6 GHz
+			timebase-frequency = <100000000>; // 100Mhz
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+			status = "okay";
+		};
+		cpu@1 {
+			device_type = "cpu";
+			model = "PowerPC,476";
+			reg = <1>;
+			clock-frequency = <1600000000>; // 1.6 GHz
+			timebase-frequency = <100000000>; // 100Mhz
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+			status = "disabled";
+			enable-method = "spin-table";
+			cpu-release-addr = <0x0 0x01f00000>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x0 0x0 0x0>; // filled in by zImage
+	};
+
+	MPIC: interrupt-controller {
+		compatible = "chrp,open-pic";
+		interrupt-controller;
+		dcr-reg = <0xffc00000 0x00040000>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+	};
+
+	plb {
+		compatible = "ibm,plb6";
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+		clock-frequency = <200000000>; // 200Mhz
+
+		POB0: opb {
+			compatible = "ibm,opb-4xx", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			/* Wish there was a nicer way of specifying a full
+			 * 32-bit range
+			 */
+			ranges = <0x00000000 0x00000200 0x00000000 0x80000000
+				  0x80000000 0x00000200 0x80000000 0x80000000>;
+			clock-frequency = <100000000>;
+
+			UART0: serial@10000000 {
+				device_type = "serial";
+				compatible = "ns16750", "ns16550";
+				reg = <0x10000000 0x00000008>;
+				virtual-reg = <0xe1000000>;
+				clock-frequency = <1851851>; // PCIe refclk/MCGC0_CTL[UART]
+				current-speed = <115200>;
+				interrupt-parent = <&MPIC>;
+				interrupts = <34 2>;
+			};
+
+			FPGA0: fpga@50000000 {
+				compatible = "ibm,currituck-fpga";
+				reg = <0x50000000 0x4>;
+			};
+
+			IIC0: i2c@0 {
+				compatible = "ibm,iic-currituck", "ibm,iic";
+				reg = <0x0 0x00000014>;
+				interrupt-parent = <&MPIC>;
+				interrupts = <79 2>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+                                rtc@68 {
+                                        compatible = "st,m41t80", "m41st85";
+                                        reg = <0x68>;
+                                };
+			};
+		};
+
+		PCIE0: pcie@10100000000 {		// 4xGBIF1
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-476fpe", "ibm,plb-pciex";
+			primary;
+			port = <0x0>; /* port number */
+			reg = <0x00000101 0x00000000 0x0 0x10000000		/* Config space access */
+			       0x00000100 0x00000000 0x0 0x00001000>;	/* UTL Registers space access */
+			dcr-reg = <0x80 0x20>;
+
+//                                pci_space  < pci_addr          > < cpu_addr          > < size       >
+			ranges = <0x02000000 0x00000000 0x80000000 0x00000110 0x80000000 0x0 0x80000000
+			          0x01000000 0x0        0x0        0x00000140 0x0        0x0 0x00010000>;
+
+			/* Inbound starting at 0 to memsize filled in by zImage */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x0>;
+
+			/* This drives busses 0 to 0xf */
+			bus-range = <0x0 0xf>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &MPIC 46 0x2 /* int A */
+				0x0 0x0 0x0 0x2 &MPIC 47 0x2 /* int B */
+				0x0 0x0 0x0 0x3 &MPIC 48 0x2 /* int C */
+				0x0 0x0 0x0 0x4 &MPIC 49 0x2 /* int D */>;
+		};
+
+		PCIE1: pcie@30100000000 {		// 4xGBIF0
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-476fpe", "ibm,plb-pciex";
+			primary;
+			port = <0x1>; /* port number */
+			reg = <0x00000301 0x00000000 0x0 0x10000000		/* Config space access */
+			       0x00000300 0x00000000 0x0 0x00001000>;	/* UTL Registers space access */
+			dcr-reg = <0x60 0x20>;
+
+			ranges = <0x02000000 0x00000000 0x80000000 0x00000310 0x80000000 0x0 0x80000000
+			          0x01000000 0x0        0x0        0x00000340 0x0        0x0 0x00010000>;
+
+			/* Inbound starting at 0 to memsize filled in by zImage */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x0>;
+
+			/* This drives busses 0 to 0xf */
+			bus-range = <0x0 0xf>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &MPIC 38 0x2 /* int A */
+				0x0 0x0 0x0 0x2 &MPIC 39 0x2 /* int B */
+				0x0 0x0 0x0 0x3 &MPIC 40 0x2 /* int C */
+				0x0 0x0 0x0 0x4 &MPIC 41 0x2 /* int D */>;
+		};
+
+		PCIE2: pcie@38100000000 {		// 2xGBIF0
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-476fpe", "ibm,plb-pciex";
+			primary;
+			port = <0x2>; /* port number */
+			reg = <0x00000381 0x00000000 0x0 0x10000000		/* Config space access */
+			       0x00000380 0x00000000 0x0 0x00001000>;	/* UTL Registers space access */
+			dcr-reg = <0xA0 0x20>;
+
+			ranges = <0x02000000 0x00000000 0x80000000 0x00000390 0x80000000 0x0 0x80000000
+			          0x01000000 0x0        0x0        0x000003C0 0x0        0x0 0x00010000>;
+
+			/* Inbound starting at 0 to memsize filled in by zImage */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x0>;
+
+			/* This drives busses 0 to 0xf */
+			bus-range = <0x0 0xf>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &MPIC 54 0x2 /* int A */
+				0x0 0x0 0x0 0x2 &MPIC 55 0x2 /* int B */
+				0x0 0x0 0x0 0x3 &MPIC 56 0x2 /* int C */
+				0x0 0x0 0x0 0x4 &MPIC 57 0x2 /* int D */>;
+		};
+
+	};
+
+	chosen {
+		stdout-path = &UART0;
+	};
+};
diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts
new file mode 100644
index 0000000000..dfaf974c0c
--- /dev/null
+++ b/arch/powerpc/boot/dts/digsy_mtc.dts
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Digsy MTC board Device Tree Source
+ *
+ * Copyright (C) 2009 Semihalf
+ *
+ * Based on the CM5200 by M. Balakowicz
+ */
+
+/include/ "mpc5200b.dtsi"
+
+&gpt0 { gpio-controller; fsl,has-wdt; };
+&gpt1 { gpio-controller; };
+
+/ {
+	model = "intercontrol,digsy-mtc";
+	compatible = "intercontrol,digsy-mtc";
+
+	memory@0 {
+		reg = <0x00000000 0x02000000>;	// 32MB
+	};
+
+	soc5200@f0000000 {
+		rtc@800 {
+			status = "disabled";
+		};
+
+		psc@2000 {		// PSC1
+			status = "disabled";
+		};
+
+		psc@2200 {		// PSC2
+			status = "disabled";
+		};
+
+		psc@2400 {		// PSC3
+			status = "disabled";
+		};
+
+		psc@2600 {		// PSC4
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		psc@2800 {		// PSC5
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		psc@2c00 {		// PSC6
+			status = "disabled";
+		};
+
+		ethernet@3000 {
+			phy-handle = <&phy0>;
+		};
+
+		mdio@3000 {
+			phy0: ethernet-phy@0 {
+				reg = <0>;
+			};
+		};
+
+		i2c@3d00 {
+			eeprom@50 {
+				compatible = "atmel,24c08";
+				reg = <0x50>;
+			};
+
+			rtc@56 {
+				compatible = "microcrystal,rv3029";
+				reg = <0x56>;
+			};
+
+			rtc@68 {
+				compatible = "dallas,ds1339";
+				reg = <0x68>;
+			};
+		};
+
+		i2c@3d40 {
+			status = "disabled";
+		};
+	};
+
+	pci@f0000d00 {
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0xc000 0 0 1 &mpc5200_pic 0 0 3
+				 0xc000 0 0 2 &mpc5200_pic 0 0 3
+				 0xc000 0 0 3 &mpc5200_pic 0 0 3
+				 0xc000 0 0 4 &mpc5200_pic 0 0 3>;
+		clock-frequency = <0>; // From boot loader
+		interrupts = <2 8 0 2 9 0 2 10 0>;
+		bus-range = <0 0>;
+		ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000>,
+			 <0x02000000 0 0x90000000 0x90000000 0 0x10000000>,
+			 <0x01000000 0 0x00000000 0xa0000000 0 0x01000000>;
+	};
+
+	localbus {
+		ranges = <0 0 0xff000000 0x1000000
+			  4 0 0x60000000 0x0001000>;
+
+		// 16-bit flash device at LocalPlus Bus CS0
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x1000000>;
+			bank-width = <2>;
+			device-width = <2>;
+			#size-cells = <1>;
+			#address-cells = <1>;
+
+			partition@0 {
+				label = "kernel";
+				reg = <0x0 0x00200000>;
+			};
+			partition@200000 {
+				label = "root";
+				reg = <0x00200000 0x00300000>;
+			};
+			partition@500000 {
+				label = "user";
+				reg = <0x00500000 0x00a00000>;
+			};
+			partition@f00000 {
+				label = "u-boot";
+				reg = <0x00f00000 0x100000>;
+			};
+		};
+
+		can@4,0 {
+			compatible = "nxp,sja1000";
+			reg = <4 0x000 0x80>;
+			nxp,external-clock-frequency = <24000000>;
+			interrupts = <1 2 3>; // Level-low
+		};
+
+		can@4,100 {
+			compatible = "nxp,sja1000";
+			reg = <4 0x100 0x80>;
+			nxp,external-clock-frequency = <24000000>;
+			interrupts = <1 2 3>;  // Level-low
+		};
+
+		serial@4,200 {
+			compatible = "nxp,sc28l92";
+			reg = <4 0x200 0x10>;
+			interrupts = <1 3 3>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/ebony.dts b/arch/powerpc/boot/dts/ebony.dts
new file mode 100644
index 0000000000..5d11e6ea74
--- /dev/null
+++ b/arch/powerpc/boot/dts/ebony.dts
@@ -0,0 +1,337 @@
+/*
+ * Device Tree Source for IBM Ebony
+ *
+ * Copyright (c) 2006, 2007 IBM Corp.
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>, David Gibson <dwg@au1.ibm.com>
+ *
+ * FIXME: Draft only!
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "ibm,ebony";
+	compatible = "ibm,ebony";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		ethernet1 = &EMAC1;
+		serial0 = &UART0;
+		serial1 = &UART1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,440GP";
+			reg = <0x00000000>;
+			clock-frequency = <0>; // Filled in by zImage
+			timebase-frequency = <0>; // Filled in by zImage
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>; /* 32 kB */
+			d-cache-size = <32768>; /* 32 kB */
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000>; // Filled in by zImage
+	};
+
+	UIC0: interrupt-controller0 {
+		compatible = "ibm,uic-440gp", "ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-440gp", "ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	CPC0: cpc {
+		compatible = "ibm,cpc-440gp";
+		dcr-reg = <0x0b0 0x003 0x0e0 0x010>;
+		// FIXME: anything else?
+	};
+
+	plb {
+		compatible = "ibm,plb-440gp", "ibm,plb4";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; // Filled in by zImage
+
+		SDRAM0: memory-controller {
+			compatible = "ibm,sdram-440gp";
+			dcr-reg = <0x010 0x002>;
+			// FIXME: anything else?
+		};
+
+		SRAM0: sram {
+			compatible = "ibm,sram-440gp";
+			dcr-reg = <0x020 0x008 0x00a 0x001>;
+		};
+
+		DMA0: dma {
+			// FIXME: ???
+			compatible = "ibm,dma-440gp";
+			dcr-reg = <0x100 0x027>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-440gp", "ibm,mcmal";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <4>;
+			num-rx-chans = <4>;
+			interrupt-parent = <&MAL0>;
+			interrupts = <0x0 0x1 0x2 0x3 0x4>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = </*TXEOB*/ 0x0 &UIC0 0xa 0x4
+					 /*RXEOB*/ 0x1 &UIC0 0xb 0x4
+					 /*SERR*/  0x2 &UIC1 0x0 0x4
+					 /*TXDE*/  0x3 &UIC1 0x1 0x4
+					 /*RXDE*/  0x4 &UIC1 0x2 0x4>;
+			interrupt-map-mask = <0xffffffff>;
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb-440gp", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			/* Wish there was a nicer way of specifying a full 32-bit
+			   range */
+			ranges = <0x00000000 0x00000001 0x00000000 0x80000000
+				  0x80000000 0x00000001 0x80000000 0x80000000>;
+			dcr-reg = <0x090 0x00b>;
+			interrupt-parent = <&UIC1>;
+			interrupts = <0x7 0x4>;
+			clock-frequency = <0>; // Filled in by zImage
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-440gp", "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; // Filled in by zImage
+				// ranges property is supplied by zImage
+				// based on firmware's configuration of the
+				// EBC bridge
+				interrupts = <0x5 0x4>;
+				interrupt-parent = <&UIC1>;
+
+				small-flash@0,80000 {
+					compatible = "jedec-flash";
+					bank-width = <1>;
+					reg = <0x00000000 0x00080000 0x00080000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "OpenBIOS";
+						reg = <0x00000000 0x00080000>;
+						read-only;
+					};
+				};
+
+				nvram@1,0 {
+					/* NVRAM & RTC */
+					compatible = "ds1743-nvram";
+					#bytes = <0x2000>;
+					reg = <0x00000001 0x00000000 0x00002000>;
+				};
+
+				large-flash@2,0 {
+					compatible = "jedec-flash";
+					bank-width = <1>;
+					reg = <0x00000002 0x00000000 0x00400000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "fs";
+						reg = <0x00000000 0x00380000>;
+					};
+					partition@380000 {
+						label = "firmware";
+						reg = <0x00380000 0x00080000>;
+					};
+				};
+
+				ir@3,0 {
+					reg = <0x00000003 0x00000000 0x00000010>;
+				};
+
+				fpga@7,0 {
+					compatible = "Ebony-FPGA";
+					reg = <0x00000007 0x00000000 0x00000010>;
+					virtual-reg = <0xe8300000>;
+				};
+			};
+
+			UART0: serial@40000200 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0x40000200 0x00000008>;
+				virtual-reg = <0xe0000200>;
+				clock-frequency = <11059200>;
+				current-speed = <9600>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x0 0x4>;
+			};
+
+			UART1: serial@40000300 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0x40000300 0x00000008>;
+				virtual-reg = <0xe0000300>;
+				clock-frequency = <11059200>;
+				current-speed = <9600>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1 0x4>;
+			};
+
+			IIC0: i2c@40000400 {
+				/* FIXME */
+				compatible = "ibm,iic-440gp", "ibm,iic";
+				reg = <0x40000400 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+			};
+			IIC1: i2c@40000500 {
+				/* FIXME */
+				compatible = "ibm,iic-440gp", "ibm,iic";
+				reg = <0x40000500 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x3 0x4>;
+			};
+
+			GPIO0: gpio@40000700 {
+				/* FIXME */
+				compatible = "ibm,gpio-440gp";
+				reg = <0x40000700 0x00000020>;
+			};
+
+			ZMII0: emac-zmii@40000780 {
+				compatible = "ibm,zmii-440gp", "ibm,zmii";
+				reg = <0x40000780 0x0000000c>;
+			};
+
+			EMAC0: ethernet@40000800 {
+				device_type = "network";
+				compatible = "ibm,emac-440gp", "ibm,emac";
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x1c 0x4 0x1d 0x4>;
+				reg = <0x40000800 0x00000070>;
+				local-mac-address = [000000000000]; // Filled in by zImage
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0 1>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <1500>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rmii";
+				phy-map = <0x00000001>;
+				zmii-device = <&ZMII0>;
+				zmii-channel = <0>;
+			};
+			EMAC1: ethernet@40000900 {
+				device_type = "network";
+				compatible = "ibm,emac-440gp", "ibm,emac";
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x1e 0x4 0x1f 0x4>;
+				reg = <0x40000900 0x00000070>;
+				local-mac-address = [000000000000]; // Filled in by zImage
+				mal-device = <&MAL0>;
+				mal-tx-channel = <2 3>;
+				mal-rx-channel = <1>;
+				cell-index = <1>;
+				max-frame-size = <1500>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rmii";
+				phy-map = <0x00000001>;
+				zmii-device = <&ZMII0>;
+				zmii-channel = <1>;
+			};
+
+
+			GPT0: gpt@40000a00 {
+				/* FIXME */
+				reg = <0x40000a00 0x000000d4>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x12 0x4 0x13 0x4 0x14 0x4 0x15 0x4 0x16 0x4>;
+			};
+
+		};
+
+		PCIX0: pci@20ec00000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb440gp-pcix", "ibm,plb-pcix";
+			primary;
+			reg = <0x00000002 0x0ec00000 0x00000008	/* Config space access */
+			       0x00000000 0x00000000 0x00000000		/* no IACK cycles */
+			       0x00000002 0x0ed00000 0x00000004     /* Special cycles */
+			       0x00000002 0x0ec80000 0x000000f0	/* Internal registers */
+			       0x00000002 0x0ec80100 0x000000fc>;	/* Internal messaging registers */
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x00000003 0x80000000 0x00000000 0x80000000
+				  0x01000000 0x00000000 0x00000000 0x00000002 0x08000000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* Ebony has all 4 IRQ pins tied together per slot */
+			interrupt-map-mask = <0xf800 0x0 0x0 0x0>;
+			interrupt-map = <
+				/* IDSEL 1 */
+				0x800 0x0 0x0 0x0 &UIC0 0x17 0x8
+
+				/* IDSEL 2 */
+				0x1000 0x0 0x0 0x0 &UIC0 0x18 0x8
+
+				/* IDSEL 3 */
+				0x1800 0x0 0x0 0x0 &UIC0 0x19 0x8
+
+				/* IDSEL 4 */
+				0x2000 0x0 0x0 0x0 &UIC0 0x1a 0x8
+			>;
+		};
+	};
+
+	chosen {
+		stdout-path = "/plb/opb/serial@40000200";
+	};
+};
diff --git a/arch/powerpc/boot/dts/eiger.dts b/arch/powerpc/boot/dts/eiger.dts
new file mode 100644
index 0000000000..7a1231d9d6
--- /dev/null
+++ b/arch/powerpc/boot/dts/eiger.dts
@@ -0,0 +1,427 @@
+/*
+ * Device Tree Source for AMCC (AppliedMicro) Eiger(460SX)
+ *
+ * Copyright 2009 AMCC (AppliedMicro) <ttnguyen@amcc.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "amcc,eiger";
+	compatible = "amcc,eiger";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		ethernet1 = &EMAC1;
+		ethernet2 = &EMAC2;
+		ethernet3 = &EMAC3;
+		serial0 = &UART0;
+		serial1 = &UART1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,460SX";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+			timebase-frequency = <0>; /* Filled in by U-Boot */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by U-Boot */
+	};
+
+	UIC0: interrupt-controller0 {
+		compatible = "ibm,uic-460sx","ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-460sx","ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC2: interrupt-controller2 {
+		compatible = "ibm,uic-460sx","ibm,uic";
+		interrupt-controller;
+		cell-index = <2>;
+		dcr-reg = <0x0e0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0xa 0x4 0xb 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC3: interrupt-controller3 {
+		compatible = "ibm,uic-460sx","ibm,uic";
+		interrupt-controller;
+		cell-index = <3>;
+		dcr-reg = <0x0f0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x10 0x4 0x11 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	SDR0: sdr {
+		compatible = "ibm,sdr-460sx";
+		dcr-reg = <0x00e 0x002>;
+	};
+
+	CPR0: cpr {
+		compatible = "ibm,cpr-460sx";
+		dcr-reg = <0x00c 0x002>;
+	};
+
+	plb {
+		compatible = "ibm,plb-460sx", "ibm,plb4";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by U-Boot */
+
+		SDRAM0: sdram {
+			compatible = "ibm,sdram-460sx", "ibm,sdram-405gp";
+			dcr-reg = <0x010 0x002>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-460sx", "ibm,mcmal2";
+			dcr-reg = <0x180 0x62>;
+			num-tx-chans = <4>;
+			num-rx-chans = <32>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			interrupt-parent = <&UIC1>;
+			interrupts = <	/*TXEOB*/ 0x6 0x4
+					/*RXEOB*/ 0x7 0x4
+					/*SERR*/  0x1 0x4
+					/*TXDE*/  0x2 0x4
+					/*RXDE*/  0x3 0x4
+					/*COAL TX0*/ 0x18 0x2
+					/*COAL TX1*/ 0x19 0x2
+					/*COAL TX2*/ 0x1a 0x2
+					/*COAL TX3*/ 0x1b 0x2
+					/*COAL RX0*/ 0x1c 0x2
+					/*COAL RX1*/ 0x1d 0x2
+					/*COAL RX2*/ 0x1e 0x2
+					/*COAL RX3*/ 0x1f 0x2>;
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb-460sx", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0xb0000000 0x00000004 0xb0000000 0x50000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-460sx", "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				/* ranges property is supplied by U-Boot */
+				interrupts = <0x6 0x4>;
+				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "amd,s29gl512n", "cfi-flash";
+					bank-width = <2>;
+					/* reg property is supplied in by U-Boot */
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "kernel";
+						reg = <0x00000000 0x001e0000>;
+					};
+					partition@1e0000 {
+						label = "dtb";
+						reg = <0x001e0000 0x00020000>;
+					};
+					partition@200000 {
+						label = "ramdisk";
+						reg = <0x00200000 0x01400000>;
+					};
+					partition@1600000 {
+						label = "jffs2";
+						reg = <0x01600000 0x00400000>;
+					};
+					partition@1a00000 {
+						label = "user";
+						reg = <0x01a00000 0x02560000>;
+					};
+					partition@3f60000 {
+						label = "env";
+						reg = <0x03f60000 0x00040000>;
+					};
+					partition@3fa0000 {
+						label = "u-boot";
+						reg = <0x03fa0000 0x00060000>;
+					};
+				};
+
+				ndfc@1,0 {
+					compatible = "ibm,ndfc";
+					/* reg property is supplied by U-boot */
+					ccr = <0x00003000>;
+					bank-settings = <0x80002222>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+
+					nand {
+						#address-cells = <1>;
+						#size-cells = <1>;
+						partition@0 {
+							label = "uboot";
+							reg = <0x00000000 0x00200000>;
+						};
+						partition@200000 {
+							label = "uboot-environment";
+							reg = <0x00200000 0x00100000>;
+						};
+						partition@300000 {
+							label = "linux";
+							reg = <0x00300000 0x00300000>;
+						};
+						partition@600000 {
+							label = "root-file-system";
+							reg = <0x00600000 0x01900000>;
+						};
+						partition@1f00000 {
+							label = "device-tree";
+							reg = <0x01f00000 0x00020000>;
+						};
+						partition@1f20000 {
+							label = "data";
+							reg = <0x01f20000 0x060E0000>;
+						};
+					};
+				};
+			};
+
+			UART0: serial@ef600200 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600200 0x00000008>;
+				virtual-reg = <0xef600200>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>; /* Filled in by U-Boot */
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x0 0x4>;
+			};
+
+			UART1: serial@ef600300 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600300 0x00000008>;
+				virtual-reg = <0xef600300>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>; /* Filled in by U-Boot */
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1 0x4>;
+			};
+
+			IIC0: i2c@ef600400 {
+				compatible = "ibm,iic-460sx", "ibm,iic";
+				reg = <0xef600400 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				index = <0>;
+			};
+
+			IIC1: i2c@ef600500 {
+				compatible = "ibm,iic-460sx", "ibm,iic";
+				reg = <0xef600500 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x3 0x4>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				index = <1>;
+			};
+
+			RGMII0: emac-rgmii@ef600900 {
+				compatible = "ibm,rgmii-460sx", "ibm,rgmii";
+				reg = <0xef600900 0x00000008>;
+				has-mdio;
+			};
+
+			RGMII1: emac-rgmii@ef600920 {
+				compatible = "ibm,rgmii-460sx", "ibm,rgmii";
+				reg = <0xef600920 0x00000008>;
+				has-mdio;
+			};
+
+			TAH0: emac-tah@ef600e50 {
+				compatible = "ibm,tah-460sx", "ibm,tah";
+				reg = <0xef600e50 0x00000030>;
+			};
+
+			TAH1: emac-tah@ef600f50 {
+				compatible = "ibm,tah-460sx", "ibm,tah";
+				reg = <0xef600f50 0x00000030>;
+			};
+
+			EMAC0: ethernet@ef600a00 {
+				device_type = "network";
+				compatible = "ibm,emac-460sx", "ibm,emac4";
+				interrupt-parent = <&EMAC0>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC0 0x13 0x4
+						 /*Wake*/   0x1 &UIC2 0x1d 0x4>;
+				reg = <0xef600a00 0x00000070>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <0>;
+				tah-device = <&TAH0>;
+				tah-channel = <0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+
+			EMAC1: ethernet@ef600b00 {
+				device_type = "network";
+				compatible = "ibm,emac-460sx", "ibm,emac4";
+				interrupt-parent = <&EMAC1>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC0 0x14 0x4
+						 /*Wake*/   0x1 &UIC2 0x1d 0x4>;
+				reg = <0xef600b00 0x00000070>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <1>;
+				mal-rx-channel = <8>;
+				cell-index = <1>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <1>;
+				tah-device = <&TAH1>;
+				tah-channel = <1>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+				mdio-device = <&EMAC0>;
+			};
+
+			EMAC2: ethernet@ef600c00 {
+				device_type = "network";
+				compatible = "ibm,emac-460sx", "ibm,emac4";
+				interrupt-parent = <&EMAC2>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC0 0x15 0x4
+						 /*Wake*/   0x1 &UIC2 0x1d 0x4>;
+				reg = <0xef600c00 0x00000070>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <2>;
+				mal-rx-channel = <16>;
+				cell-index = <2>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>; /* emac2&3 only */
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII1>;
+				rgmii-channel = <0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+				mdio-device = <&EMAC0>;
+			};
+
+			EMAC3: ethernet@ef600d00 {
+				device_type = "network";
+				compatible = "ibm,emac-460sx", "ibm,emac4";
+				interrupt-parent = <&EMAC3>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC0 0x16 0x4
+						 /*Wake*/   0x1 &UIC2 0x1d 0x4>;
+				reg = <0xef600d00 0x00000070>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <3>;
+				mal-rx-channel = <24>;
+				cell-index = <3>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>; /* emac2&3 only */
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII1>;
+				rgmii-channel = <1>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+				mdio-device = <&EMAC0>;
+			};
+		};
+
+	};
+	chosen {
+		stdout-path = "/plb/opb/serial@ef600200";
+	};
+
+};
diff --git a/arch/powerpc/boot/dts/ep8248e.dts b/arch/powerpc/boot/dts/ep8248e.dts
new file mode 100644
index 0000000000..9ae2d92f54
--- /dev/null
+++ b/arch/powerpc/boot/dts/ep8248e.dts
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Device Tree for the Embedded Planet EP8248E board running PlanetCore.
+ *
+ * Copyright 2007 Freescale Semiconductor Inc.
+ */
+
+/dts-v1/;
+/ {
+	model = "EP8248E";
+	compatible = "fsl,ep8248e";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		planetcore-SMC1 = &smc1;
+		planetcore-SCC1 = &scc1;
+		ethernet0 = &eth0;
+		ethernet1 = &eth1;
+		serial0 = &smc1;
+		serial1 = &scc1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8248@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <16384>;
+			i-cache-size = <16384>;
+			timebase-frequency = <0>;
+			clock-frequency = <0>;
+		};
+	};
+
+	localbus@f0010100 {
+		compatible = "fsl,mpc8248-localbus",
+		             "fsl,pq2-localbus",
+		             "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		reg = <0xf0010100 0x40>;
+
+		ranges = <0 0 0xfc000000 0x04000000
+		          1 0 0xfa000000 0x00008000>;
+
+		flash@0,3800000 {
+			compatible = "cfi-flash";
+			reg = <0 0x3800000 0x800000>;
+			bank-width = <4>;
+			device-width = <2>;
+		};
+
+		bcsr@1,0 {
+			#address-cells = <2>;
+			#size-cells = <1>;
+			reg = <1 0 0x10>;
+			compatible = "fsl,ep8248e-bcsr";
+			ranges;
+
+			mdio {
+				compatible = "fsl,ep8248e-mdio-bitbang";
+				#address-cells = <1>;
+				#size-cells = <0>;
+				reg = <1 8 1>;
+
+				PHY0: ethernet-phy@0 {
+					interrupt-parent = <&PIC>;
+					reg = <0>;
+				};
+
+				PHY1: ethernet-phy@1 {
+					interrupt-parent = <&PIC>;
+					reg = <1>;
+				};
+			};
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0 0>;
+	};
+
+	soc@f0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8248-immr", "fsl,pq2-soc", "simple-bus";
+		ranges = <0x00000000 0xf0000000 0x00053000>;
+
+		// Temporary until code stops depending on it.
+		device_type = "soc";
+
+		// Temporary -- will go away once kernel uses ranges for get_immrbase().
+		reg = <0xf0000000 0x00053000>;
+
+		cpm@119c0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			#interrupt-cells = <2>;
+			compatible = "fsl,mpc8248-cpm", "fsl,cpm2",
+			             "simple-bus";
+			reg = <0x119c0 0x30>;
+			ranges;
+
+			muram {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges = <0 0 0x10000>;
+
+				data@0 {
+					compatible = "fsl,cpm-muram-data";
+					reg = <0 0x2000 0x9800 0x800>;
+				};
+			};
+
+			brg@119f0 {
+				compatible = "fsl,mpc8248-brg",
+				             "fsl,cpm2-brg",
+				             "fsl,cpm-brg";
+				reg = <0x119f0 0x10 0x115f0 0x10>;
+			};
+
+			/* Monitor port/SMC1 */
+			smc1: serial@11a80 {
+				device_type = "serial";
+				compatible = "fsl,mpc8248-smc-uart",
+				             "fsl,cpm2-smc-uart";
+				reg = <0x11a80 0x20 0x87fc 2>;
+				interrupts = <4 8>;
+				interrupt-parent = <&PIC>;
+				fsl,cpm-brg = <7>;
+				fsl,cpm-command = <0x1d000000>;
+				linux,planetcore-label = "SMC1";
+			};
+
+			/* "Serial" port/SCC1 */
+			scc1: serial@11a00 {
+				device_type = "serial";
+				compatible = "fsl,mpc8248-scc-uart",
+				             "fsl,cpm2-scc-uart";
+				reg = <0x11a00 0x20 0x8000 0x100>;
+				interrupts = <40 8>;
+				interrupt-parent = <&PIC>;
+				fsl,cpm-brg = <1>;
+				fsl,cpm-command = <0x00800000>;
+				linux,planetcore-label = "SCC1";
+			};
+
+			eth0: ethernet@11300 {
+				device_type = "network";
+				compatible = "fsl,mpc8248-fcc-enet",
+				             "fsl,cpm2-fcc-enet";
+				reg = <0x11300 0x20 0x8400 0x100 0x11390 1>;
+				local-mac-address = [ 00 00 00 00 00 00 ];
+				interrupts = <32 8>;
+				interrupt-parent = <&PIC>;
+				phy-handle = <&PHY0>;
+				linux,network-index = <0>;
+				fsl,cpm-command = <0x12000300>;
+			};
+
+			eth1: ethernet@11320 {
+				device_type = "network";
+				compatible = "fsl,mpc8248-fcc-enet",
+				             "fsl,cpm2-fcc-enet";
+				reg = <0x11320 0x20 0x8500 0x100 0x113b0 1>;
+				local-mac-address = [ 00 00 00 00 00 00 ];
+				interrupts = <33 8>;
+				interrupt-parent = <&PIC>;
+				phy-handle = <&PHY1>;
+				linux,network-index = <1>;
+				fsl,cpm-command = <0x16200300>;
+			};
+
+			usb@11b60 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,mpc8248-usb",
+				             "fsl,cpm2-usb";
+				reg = <0x11b60 0x18 0x8b00 0x100>;
+				interrupt-parent = <&PIC>;
+				interrupts = <11 8>;
+				fsl,cpm-command = <0x2e600000>;
+			};
+		};
+
+		PIC: interrupt-controller@10c00 {
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			reg = <0x10c00 0x80>;
+			compatible = "fsl,mpc8248-pic", "fsl,pq2-pic";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/ep88xc.dts b/arch/powerpc/boot/dts/ep88xc.dts
new file mode 100644
index 0000000000..b6b7e97876
--- /dev/null
+++ b/arch/powerpc/boot/dts/ep88xc.dts
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * EP88xC Device Tree Source
+ *
+ * Copyright 2006 MontaVista Software, Inc.
+ * Copyright 2007,2008 Freescale Semiconductor, Inc.
+ */
+
+/dts-v1/;
+
+/ {
+	model = "EP88xC";
+	compatible = "fsl,ep88xc";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,885@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <16>;
+			i-cache-line-size = <16>;
+			d-cache-size = <8192>;
+			i-cache-size = <8192>;
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			interrupts = <15 2>;	// decrementer interrupt
+			interrupt-parent = <&PIC>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x0>;
+	};
+
+	localbus@fa200100 {
+		compatible = "fsl,mpc885-localbus", "fsl,pq1-localbus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		reg = <0xfa200100 0x40>;
+
+		ranges = <
+			0x0 0x0 0xfc000000 0x4000000
+			0x3 0x0 0xfa000000 0x1000000
+		>;
+
+		flash@0,2000000 {
+			compatible = "cfi-flash";
+			reg = <0x0 0x2000000 0x2000000>;
+			bank-width = <4>;
+			device-width = <2>;
+		};
+
+		board-control@3,400000 {
+			reg = <0x3 0x400000 0x10>;
+			compatible = "fsl,ep88xc-bcsr";
+		};
+	};
+
+	soc@fa200000 {
+		compatible = "fsl,mpc885", "fsl,pq1-soc";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		ranges = <0x0 0xfa200000 0x4000>;
+		bus-frequency = <0>;
+
+		// Temporary -- will go away once kernel uses ranges for get_immrbase().
+		reg = <0xfa200000 0x4000>;
+
+		mdio@e00 {
+			compatible = "fsl,mpc885-fec-mdio", "fsl,pq1-fec-mdio";
+			reg = <0xe00 0x188>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			PHY0: ethernet-phy@0 {
+				reg = <0x0>;
+			};
+
+			PHY1: ethernet-phy@1 {
+				reg = <0x1>;
+			};
+		};
+
+		ethernet@e00 {
+			device_type = "network";
+			compatible = "fsl,mpc885-fec-enet",
+			             "fsl,pq1-fec-enet";
+			reg = <0xe00 0x188>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <3 1>;
+			interrupt-parent = <&PIC>;
+			phy-handle = <&PHY0>;
+			linux,network-index = <0>;
+		};
+
+		ethernet@1e00 {
+			device_type = "network";
+			compatible = "fsl,mpc885-fec-enet",
+			             "fsl,pq1-fec-enet";
+			reg = <0x1e00 0x188>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <7 1>;
+			interrupt-parent = <&PIC>;
+			phy-handle = <&PHY1>;
+			linux,network-index = <1>;
+		};
+
+		PIC: interrupt-controller@0 {
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			reg = <0x0 0x24>;
+			compatible = "fsl,mpc885-pic", "fsl,pq1-pic";
+		};
+
+		pcmcia@80 {
+			#address-cells = <3>;
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			compatible = "fsl,pq-pcmcia";
+			device_type = "pcmcia";
+			reg = <0x80 0x80>;
+			interrupt-parent = <&PIC>;
+			interrupts = <13 1>;
+		};
+
+		cpm@9c0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc885-cpm", "fsl,cpm1";
+			command-proc = <0x9c0>;
+			interrupts = <0>;	// cpm error interrupt
+			interrupt-parent = <&CPM_PIC>;
+			reg = <0x9c0 0x40>;
+			ranges;
+
+			muram@2000 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges = <0x0 0x2000 0x2000>;
+
+				data@0 {
+					compatible = "fsl,cpm-muram-data";
+					reg = <0x0 0x1c00>;
+				};
+			};
+
+			brg@9f0 {
+				compatible = "fsl,mpc885-brg",
+				             "fsl,cpm1-brg",
+				             "fsl,cpm-brg";
+				reg = <0x9f0 0x10>;
+			};
+
+			CPM_PIC: interrupt-controller@930 {
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				interrupts = <5 2 0 2>;
+				interrupt-parent = <&PIC>;
+				reg = <0x930 0x20>;
+				compatible = "fsl,mpc885-cpm-pic",
+				             "fsl,cpm1-pic";
+			};
+
+			// MON-1
+			serial@a80 {
+				device_type = "serial";
+				compatible = "fsl,mpc885-smc-uart",
+				             "fsl,cpm1-smc-uart";
+				reg = <0xa80 0x10 0x3e80 0x40>;
+				interrupts = <4>;
+				interrupt-parent = <&CPM_PIC>;
+				fsl,cpm-brg = <1>;
+				fsl,cpm-command = <0x90>;
+				linux,planetcore-label = "SMC1";
+			};
+
+			// SER-1
+			serial@a20 {
+				device_type = "serial";
+				compatible = "fsl,mpc885-scc-uart",
+				             "fsl,cpm1-scc-uart";
+				reg = <0xa20 0x20 0x3d00 0x80>;
+				interrupts = <29>;
+				interrupt-parent = <&CPM_PIC>;
+				fsl,cpm-brg = <2>;
+				fsl,cpm-command = <0x40>;
+				linux,planetcore-label = "SCC2";
+			};
+
+			usb@a00 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,mpc885-usb",
+				             "fsl,cpm1-usb";
+				reg = <0xa00 0x18 0x1c00 0x80>;
+				interrupt-parent = <&CPM_PIC>;
+				interrupts = <30>;
+				fsl,cpm-command = <0000>;
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/Makefile b/arch/powerpc/boot/dts/fsl/Makefile
new file mode 100644
index 0000000000..3bae982641
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+
+dtstree		:= $(srctree)/$(src)
+dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
diff --git a/arch/powerpc/boot/dts/fsl/b4420qds.dts b/arch/powerpc/boot/dts/fsl/b4420qds.dts
new file mode 100644
index 0000000000..cd9203ceed
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/b4420qds.dts
@@ -0,0 +1,50 @@
+/*
+ * B4420DS Device Tree Source
+ *
+ * Copyright 2012 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * This software is provided by Freescale Semiconductor "as is" and any
+ * express or implied warranties, including, but not limited to, the implied
+ * warranties of merchantability and fitness for a particular purpose are
+ * disclaimed. In no event shall Freescale Semiconductor be liable for any
+ * direct, indirect, incidental, special, exemplary, or consequential damages
+ * (including, but not limited to, procurement of substitute goods or services;
+ * loss of use, data, or profits; or business interruption) however caused and
+ * on any theory of liability, whether in contract, strict liability, or tort
+ * (including negligence or otherwise) arising in any way out of the use of
+ * this software, even if advised of the possibility of such damage.
+ */
+
+/include/ "b4420si-pre.dtsi"
+/include/ "b4qds.dtsi"
+
+/ {
+	model = "fsl,B4420QDS";
+	compatible = "fsl,B4420QDS";
+
+	ifc: localbus@ffe124000 {
+		board-control@3,0 {
+			compatible = "fsl,b4420qds-fpga", "fsl,fpga-qixis";
+		};
+	};
+
+};
+
+/include/ "b4420si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi
new file mode 100644
index 0000000000..f996cced45
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi
@@ -0,0 +1,97 @@
+/*
+ * B4420 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2012 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * This software is provided by Freescale Semiconductor "as is" and any
+ * express or implied warranties, including, but not limited to, the implied
+ * warranties of merchantability and fitness for a particular purpose are
+ * disclaimed. In no event shall Freescale Semiconductor be liable for any
+ * direct, indirect, incidental, special, exemplary, or consequential damages
+ * (including, but not limited to, procurement of substitute goods or services;
+ * loss of use, data, or profits; or business interruption) however caused and
+ * on any theory of liability, whether in contract, strict liability, or tort
+ * (including negligence or otherwise) arising in any way out of the use of
+ * this software, even if advised of the possibility of such damage.
+ */
+
+/include/ "b4si-post.dtsi"
+
+/* controller at 0x200000 */
+&pci0 {
+	compatible = "fsl,b4420-pcie", "fsl,qoriq-pcie-v2.4";
+};
+
+&dcsr {
+	dcsr-epu@0 {
+		compatible = "fsl,b4420-dcsr-epu", "fsl,dcsr-epu";
+	};
+	dcsr-npc {
+		compatible = "fsl,b4420-dcsr-cnpc", "fsl,dcsr-cnpc";
+	};
+	dcsr-dpaa@9000 {
+		compatible = "fsl,b4420-dcsr-dpaa", "fsl,dcsr-dpaa";
+	};
+	dcsr-ocn@11000 {
+		compatible = "fsl,b4420-dcsr-ocn", "fsl,dcsr-ocn";
+	};
+	dcsr-nal@18000 {
+		compatible = "fsl,b4420-dcsr-nal", "fsl,dcsr-nal";
+	};
+	dcsr-rcpm@22000 {
+		compatible = "fsl,b4420-dcsr-rcpm", "fsl,dcsr-rcpm";
+	};
+	dcsr-snpc@30000 {
+		compatible = "fsl,b4420-dcsr-snpc", "fsl,dcsr-snpc";
+	};
+	dcsr-snpc@31000 {
+		compatible = "fsl,b4420-dcsr-snpc", "fsl,dcsr-snpc";
+	};
+	dcsr-cpu-sb-proxy@108000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu1>;
+		reg = <0x108000 0x1000 0x109000 0x1000>;
+	};
+};
+
+&soc {
+	cpc: l3-cache-controller@10000 {
+		compatible = "fsl,b4420-l3-cache-controller", "cache";
+	};
+
+	guts: global-utilities@e0000 {
+		compatible = "fsl,b4420-device-config", "fsl,qoriq-device-config-2.0";
+	};
+
+	global-utilities@e1000 {
+		compatible = "fsl,b4420-clockgen", "fsl,b4-clockgen",
+			      "fsl,qoriq-clockgen-2.0";
+	};
+
+	rcpm: global-utilities@e2000 {
+		compatible = "fsl,b4420-rcpm", "fsl,qoriq-rcpm-2.0";
+	};
+
+	L2_1: l2-cache-controller@c20000 {
+		compatible = "fsl,b4420-l2-cache-controller";
+		reg = <0xc20000 0x40000>;
+		next-level-cache = <&cpc>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi b/arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi
new file mode 100644
index 0000000000..bb7b9b9f3f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi
@@ -0,0 +1,85 @@
+/*
+ * B4420 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * This software is provided by Freescale Semiconductor "as is" and any
+ * express or implied warranties, including, but not limited to, the implied
+ * warranties of merchantability and fitness for a particular purpose are
+ * disclaimed. In no event shall Freescale Semiconductor be liable for any
+ * direct, indirect, incidental, special, exemplary, or consequential damages
+ * (including, but not limited to, procurement of substitute goods or services;
+ * loss of use, data, or profits; or business interruption) however caused and
+ * on any theory of liability, whether in contract, strict liability, or tort
+ * (including negligence or otherwise) arising in any way out of the use of
+ * this software, even if advised of the possibility of such damage.
+ */
+
+/dts-v1/;
+
+/include/ "e6500_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,B4420";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		ccsr = &soc;
+		dcsr = &dcsr;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
+		pci0 = &pci0;
+		usb0 = &usb0;
+		dma0 = &dma0;
+		dma1 = &dma1;
+		sdhc = &sdhc;
+
+		fman0 = &fman0;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,e6500@0 {
+			device_type = "cpu";
+			reg = <0 1>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
+		};
+		cpu1: PowerPC,e6500@2 {
+			device_type = "cpu";
+			reg = <2 3>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/b4860qds.dts b/arch/powerpc/boot/dts/fsl/b4860qds.dts
new file mode 100644
index 0000000000..a8bc419959
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/b4860qds.dts
@@ -0,0 +1,117 @@
+/*
+ * B4860DS Device Tree Source
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "b4860si-pre.dtsi"
+/include/ "b4qds.dtsi"
+
+/ {
+	model = "fsl,B4860QDS";
+	compatible = "fsl,B4860QDS";
+
+	aliases {
+		phy_sgmii_1e = &phy_sgmii_1e;
+		phy_sgmii_1f = &phy_sgmii_1f;
+		phy_xaui_slot1 = &phy_xaui_slot1;
+		phy_xaui_slot2 = &phy_xaui_slot2;
+	};
+
+	ifc: localbus@ffe124000 {
+		board-control@3,0 {
+			compatible = "fsl,b4860qds-fpga", "fsl,fpga-qixis";
+		};
+	};
+
+	soc@ffe000000 {
+		fman@400000 {
+			ethernet@e8000 {
+				phy-handle = <&phy_sgmii_1e>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@ea000 {
+				phy-handle = <&phy_sgmii_1f>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@f0000 {
+				phy-handle = <&phy_xaui_slot1>;
+				phy-connection-type = "xgmii";
+			};
+
+			ethernet@f2000 {
+				phy-handle = <&phy_xaui_slot2>;
+				phy-connection-type = "xgmii";
+			};
+
+			mdio@fc000 {
+				phy_sgmii_1e: ethernet-phy@1e {
+					reg = <0x1e>;
+					status = "disabled";
+				};
+
+				phy_sgmii_1f: ethernet-phy@1f {
+					reg = <0x1f>;
+					status = "disabled";
+				};
+			};
+
+			mdio@fd000 {
+				phy_xaui_slot1: xaui-phy@slot1 {
+					compatible = "ethernet-phy-ieee802.3-c45";
+					reg = <0x7>;
+					status = "disabled";
+				};
+
+				phy_xaui_slot2: xaui-phy@slot2 {
+					compatible = "ethernet-phy-ieee802.3-c45";
+					reg = <0x6>;
+					status = "disabled";
+				};
+			};
+		};
+	};
+
+	rio: rapidio@ffe0c0000 {
+		reg = <0xf 0xfe0c0000 0 0x11000>;
+
+		port1 {
+			ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+		};
+		port2 {
+			ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+		};
+	};
+};
+
+/include/ "b4860si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
new file mode 100644
index 0000000000..8687198211
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
@@ -0,0 +1,284 @@
+/*
+ * B4860 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "b4si-post.dtsi"
+
+/* controller at 0x200000 */
+&pci0 {
+	compatible = "fsl,b4860-pcie", "fsl,qoriq-pcie-v2.4";
+};
+
+&rio {
+	compatible = "fsl,srio";
+	interrupts = <16 2 1 20>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+	fsl,iommu-parent = <&pamu0>;
+	ranges;
+
+	port1 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <1>;
+	};
+
+	port2 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <2>;
+	};
+};
+
+&dcsr {
+	dcsr-epu@0 {
+		compatible = "fsl,b4860-dcsr-epu", "fsl,dcsr-epu";
+	};
+	dcsr-npc {
+		compatible = "fsl,b4860-dcsr-cnpc", "fsl,dcsr-cnpc";
+	};
+	dcsr-dpaa@9000 {
+		compatible = "fsl,b4860-dcsr-dpaa", "fsl,dcsr-dpaa";
+	};
+	dcsr-ocn@11000 {
+		compatible = "fsl,b4860-dcsr-ocn", "fsl,dcsr-ocn";
+	};
+	dcsr-ddr@13000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr2>;
+		reg = <0x13000 0x1000>;
+	};
+	dcsr-nal@18000 {
+		compatible = "fsl,b4860-dcsr-nal", "fsl,dcsr-nal";
+	};
+	dcsr-rcpm@22000 {
+		compatible = "fsl,b4860-dcsr-rcpm", "fsl,dcsr-rcpm";
+	};
+	dcsr-snpc@30000 {
+		compatible = "fsl,b4860-dcsr-snpc", "fsl,dcsr-snpc";
+	};
+	dcsr-snpc@31000 {
+		compatible = "fsl,b4860-dcsr-snpc", "fsl,dcsr-snpc";
+	};
+	dcsr-cpu-sb-proxy@108000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu1>;
+		reg = <0x108000 0x1000 0x109000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@110000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu2>;
+		reg = <0x110000 0x1000 0x111000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@118000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu3>;
+		reg = <0x118000 0x1000 0x119000 0x1000>;
+	};
+};
+
+&bportals {
+	bman-portal@38000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+		interrupts = <133 2 0 0>;
+	};
+	bman-portal@3c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+		interrupts = <135 2 0 0>;
+	};
+	bman-portal@40000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+		interrupts = <137 2 0 0>;
+	};
+	bman-portal@44000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+		interrupts = <139 2 0 0>;
+	};
+	bman-portal@48000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x48000 0x4000>, <0x1012000 0x1000>;
+		interrupts = <141 2 0 0>;
+	};
+	bman-portal@4c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4c000 0x4000>, <0x1013000 0x1000>;
+		interrupts = <143 2 0 0>;
+	};
+	bman-portal@50000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x50000 0x4000>, <0x1014000 0x1000>;
+		interrupts = <145 2 0 0>;
+	};
+	bman-portal@54000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x54000 0x4000>, <0x1015000 0x1000>;
+		interrupts = <147 2 0 0>;
+	};
+	bman-portal@58000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x58000 0x4000>, <0x1016000 0x1000>;
+		interrupts = <149 2 0 0>;
+	};
+	bman-portal@5c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x5c000 0x4000>, <0x1017000 0x1000>;
+		interrupts = <151 2 0 0>;
+	};
+	bman-portal@60000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x60000 0x4000>, <0x1018000 0x1000>;
+		interrupts = <153 2 0 0>;
+	};
+};
+
+&qportals {
+	qportal14: qman-portal@38000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+		interrupts = <132 0x2 0 0>;
+		cell-index = <0xe>;
+	};
+	qportal15: qman-portal@3c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+		interrupts = <134 0x2 0 0>;
+		cell-index = <0xf>;
+	};
+	qportal16: qman-portal@40000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+		interrupts = <136 0x2 0 0>;
+		cell-index = <0x10>;
+	};
+	qportal17: qman-portal@44000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+		interrupts = <138 0x2 0 0>;
+		cell-index = <0x11>;
+	};
+	qportal18: qman-portal@48000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x48000 0x4000>, <0x1012000 0x1000>;
+		interrupts = <140 0x2 0 0>;
+		cell-index = <0x12>;
+	};
+	qportal19: qman-portal@4c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x4c000 0x4000>, <0x1013000 0x1000>;
+		interrupts = <142 0x2 0 0>;
+		cell-index = <0x13>;
+	};
+	qportal20: qman-portal@50000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x50000 0x4000>, <0x1014000 0x1000>;
+		interrupts = <144 0x2 0 0>;
+		cell-index = <0x14>;
+	};
+	qportal21: qman-portal@54000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x54000 0x4000>, <0x1015000 0x1000>;
+		interrupts = <146 0x2 0 0>;
+		cell-index = <0x15>;
+	};
+	qportal22: qman-portal@58000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x58000 0x4000>, <0x1016000 0x1000>;
+		interrupts = <148 0x2 0 0>;
+		cell-index = <0x16>;
+	};
+	qportal23: qman-portal@5c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x5c000 0x4000>, <0x1017000 0x1000>;
+		interrupts = <150 0x2 0 0>;
+		cell-index = <0x17>;
+	};
+	qportal24: qman-portal@60000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x60000 0x4000>, <0x1018000 0x1000>;
+		interrupts = <152 0x2 0 0>;
+		cell-index = <0x18>;
+	};
+};
+
+&soc {
+	ddr2: memory-controller@9000 {
+		compatible = "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-controller";
+		reg = <0x9000 0x1000>;
+		interrupts = <16 2 1 9>;
+	};
+
+	cpc: l3-cache-controller@10000 {
+		compatible = "fsl,b4860-l3-cache-controller", "cache";
+	};
+
+	guts: global-utilities@e0000 {
+		compatible = "fsl,b4860-device-config", "fsl,qoriq-device-config-2.0";
+	};
+
+	global-utilities@e1000 {
+		compatible = "fsl,b4860-clockgen", "fsl,b4-clockgen",
+			      "fsl,qoriq-clockgen-2.0";
+	};
+
+	rcpm: global-utilities@e2000 {
+		compatible = "fsl,b4860-rcpm", "fsl,qoriq-rcpm-2.0";
+	};
+
+/include/ "qoriq-fman3-0-1g-4.dtsi"
+/include/ "qoriq-fman3-0-1g-5.dtsi"
+/include/ "qoriq-fman3-0-10g-0.dtsi"
+/include/ "qoriq-fman3-0-10g-1.dtsi"
+	fman@400000 {
+		enet4: ethernet@e8000 {
+		};
+
+		enet5: ethernet@ea000 {
+		};
+
+		enet6: ethernet@f0000 {
+		};
+
+		enet7: ethernet@f2000 {
+		};
+	};
+
+	L2_1: l2-cache-controller@c20000 {
+		compatible = "fsl,b4860-l2-cache-controller";
+		reg = <0xc20000 0x40000>;
+		next-level-cache = <&cpc>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi b/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi
new file mode 100644
index 0000000000..388ba1b15f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi
@@ -0,0 +1,104 @@
+/*
+ * B4860 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e6500_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,B4860";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		ccsr = &soc;
+		dcsr = &dcsr;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
+		pci0 = &pci0;
+		usb0 = &usb0;
+		dma0 = &dma0;
+		dma1 = &dma1;
+		sdhc = &sdhc;
+
+		fman0 = &fman0;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+		ethernet4 = &enet4;
+		ethernet5 = &enet5;
+		ethernet6 = &enet6;
+		ethernet7 = &enet7;
+	};
+
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,e6500@0 {
+			device_type = "cpu";
+			reg = <0 1>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
+		};
+		cpu1: PowerPC,e6500@2 {
+			device_type = "cpu";
+			reg = <2 3>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
+		};
+		cpu2: PowerPC,e6500@4 {
+			device_type = "cpu";
+			reg = <4 5>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
+		};
+		cpu3: PowerPC,e6500@6 {
+			device_type = "cpu";
+			reg = <6 7>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/b4qds.dtsi b/arch/powerpc/boot/dts/fsl/b4qds.dtsi
new file mode 100644
index 0000000000..05be919f35
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/b4qds.dtsi
@@ -0,0 +1,280 @@
+/*
+ * B4420DS Device Tree Source
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * This software is provided by Freescale Semiconductor "as is" and any
+ * express or implied warranties, including, but not limited to, the implied
+ * warranties of merchantability and fitness for a particular purpose are
+ * disclaimed. In no event shall Freescale Semiconductor be liable for any
+ * direct, indirect, incidental, special, exemplary, or consequential damages
+ * (including, but not limited to, procurement of substitute goods or services;
+ * loss of use, data, or profits; or business interruption) however caused and
+ * on any theory of liability, whether in contract, strict liability, or tort
+ * (including negligence or otherwise) arising in any way out of the use of
+ * this software, even if advised of the possibility of such damage.
+ */
+
+/ {
+	model = "fsl,B4QDS";
+	compatible = "fsl,B4QDS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		crypto = &crypto;
+		phy_sgmii_10 = &phy_sgmii_10;
+		phy_sgmii_11 = &phy_sgmii_11;
+		phy_sgmii_1c = &phy_sgmii_1c;
+		phy_sgmii_1d = &phy_sgmii_1d;
+	};
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xff800000 0x00010000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x2 0x0 0x10000>;
+
+			partition@0 {
+				/* This location must not be altered  */
+				/* 1MB for u-boot Bootloader Image */
+				reg = <0x0 0x00100000>;
+				label = "NAND U-Boot Image";
+				read-only;
+			};
+
+			partition@100000 {
+				/* 1MB for DTB Image */
+				reg = <0x00100000 0x00100000>;
+				label = "NAND DTB Image";
+			};
+
+			partition@200000 {
+				/* 10MB for Linux Kernel Image */
+				reg = <0x00200000 0x00A00000>;
+				label = "NAND Linux Kernel Image";
+			};
+
+			partition@c00000 {
+				/* 500MB for Root file System Image */
+				reg = <0x00c00000 0x1F400000>;
+				label = "NAND RFS Image";
+			};
+		};
+
+		board-control@3,0 {
+			compatible = "fsl,b4qds-fpga", "fsl,fpga-qixis";
+			reg = <3 0 0x300>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01052000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
+	qportals: qman-portals@ff6000000 {
+		ranges = <0x0 0xf 0xf6000000 0x2000000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "sst,sst25wf040", "jedec,spi-nor";
+				reg = <0>;
+				spi-max-frequency = <40000000>; /* input clock */
+			};
+		};
+
+		sdhc@114000 {
+			/*Disabled as there is no sdhc connector on B4420QDS board*/
+			status = "disabled";
+		};
+
+		i2c@118000 {
+			mux@77 {
+				compatible = "nxp,pca9547";
+				reg = <0x77>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				i2c@0 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0>;
+
+					eeprom@50 {
+						compatible = "atmel,24c64";
+						reg = <0x50>;
+					};
+					eeprom@51 {
+						compatible = "atmel,24c256";
+						reg = <0x51>;
+					};
+					eeprom@53 {
+						compatible = "atmel,24c256";
+						reg = <0x53>;
+					};
+					eeprom@57 {
+						compatible = "atmel,24c256";
+						reg = <0x57>;
+					};
+					rtc@68 {
+						compatible = "dallas,ds3232";
+						reg = <0x68>;
+					};
+				};
+
+				i2c@2 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x2>;
+
+					ina220@40 {
+						compatible = "ti,ina220";
+						reg = <0x40>;
+						shunt-resistor = <1000>;
+					};
+				};
+
+				i2c@3 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x3>;
+
+					adt7461@4c {
+						compatible = "adi,adt7461";
+						reg = <0x4c>;
+					};
+				};
+			};
+		};
+
+		usb@210000 {
+			dr_mode = "host";
+			phy_type = "ulpi";
+		};
+
+		fman@400000 {
+			ethernet@e0000 {
+				phy-handle = <&phy_sgmii_10>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e2000 {
+				phy-handle = <&phy_sgmii_11>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e4000 {
+				phy-handle = <&phy_sgmii_1c>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e6000 {
+				phy-handle = <&phy_sgmii_1d>;
+				phy-connection-type = "sgmii";
+			};
+
+			mdio@fc000 {
+				phy_sgmii_10: ethernet-phy@10 {
+					reg = <0x10>;
+				};
+
+				phy_sgmii_11: ethernet-phy@11 {
+					reg = <0x11>;
+				};
+
+				phy_sgmii_1c: ethernet-phy@1c {
+					reg = <0x1c>;
+					status = "disabled";
+				};
+
+				phy_sgmii_1d: ethernet-phy@1d {
+					reg = <0x1d>;
+					status = "disabled";
+				};
+			};
+		};
+	};
+
+	pci0: pcie@ffe200000 {
+		reg = <0xf 0xfe200000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+};
+
+/include/ "b4si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
new file mode 100644
index 0000000000..4f044b41a7
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
@@ -0,0 +1,487 @@
+/*
+ * B4420 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * This software is provided by Freescale Semiconductor "as is" and any
+ * express or implied warranties, including, but not limited to, the implied
+ * warranties of merchantability and fitness for a particular purpose are
+ * disclaimed. In no event shall Freescale Semiconductor be liable for any
+ * direct, indirect, incidental, special, exemplary, or consequential damages
+ * (including, but not limited to, procurement of substitute goods or services;
+ * loss of use, data, or profits; or business interruption) however caused and
+ * on any theory of liability, whether in contract, strict liability, or tort
+ * (including negligence or otherwise) arising in any way out of the use of
+ * this software, even if advised of the possibility of such damage.
+ */
+
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_fqd {
+	compatible = "fsl,qman-fqd";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+	compatible = "fsl,qman-pfdr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&ifc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,ifc", "simple-bus";
+	interrupts = <25 2 0 0>;
+};
+
+/* controller at 0x200000 */
+&pci0 {
+	compatible = "fsl,b4-pcie", "fsl,qoriq-pcie-v2.4";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <20 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		reg = <0 0 0 0 0>;
+		interrupts = <20 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 40 1 0 0
+			0000 0 0 2 &mpic 1 1 0 0
+			0000 0 0 3 &mpic 2 1 0 0
+			0000 0 0 4 &mpic 3 1 0 0
+			>;
+	};
+};
+
+&dcsr {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,dcsr", "simple-bus";
+
+	dcsr-epu@0 {
+		compatible = "fsl,b4-dcsr-epu", "fsl,dcsr-epu";
+		interrupts = <52 2 0 0
+			      84 2 0 0
+			      85 2 0 0
+			      94 2 0 0
+			      95 2 0 0>;
+		reg = <0x0 0x1000>;
+	};
+	dcsr-npc {
+		compatible = "fsl,b4-dcsr-cnpc", "fsl,dcsr-cnpc";
+		reg = <0x1000 0x1000 0x1002000 0x10000>;
+	};
+	dcsr-nxc@2000 {
+		compatible = "fsl,dcsr-nxc";
+		reg = <0x2000 0x1000>;
+	};
+	dcsr-corenet {
+		compatible = "fsl,dcsr-corenet";
+		reg = <0x8000 0x1000 0x1A000 0x1000>;
+	};
+	dcsr-dpaa@9000 {
+		compatible = "fsl,b4-dcsr-dpaa", "fsl,dcsr-dpaa";
+		reg = <0x9000 0x1000>;
+	};
+	dcsr-ocn@11000 {
+		compatible = "fsl,b4-dcsr-ocn", "fsl,dcsr-ocn";
+		reg = <0x11000 0x1000>;
+	};
+	dcsr-ddr@12000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr1>;
+		reg = <0x12000 0x1000>;
+	};
+	dcsr-nal@18000 {
+		compatible = "fsl,b4-dcsr-nal", "fsl,dcsr-nal";
+		reg = <0x18000 0x1000>;
+	};
+	dcsr-rcpm@22000 {
+		compatible = "fsl,b4-dcsr-rcpm", "fsl,dcsr-rcpm";
+		reg = <0x22000 0x1000>;
+	};
+	dcsr-snpc@30000 {
+		compatible = "fsl,b4-dcsr-snpc", "fsl,dcsr-snpc";
+		reg = <0x30000 0x1000 0x1022000 0x10000>;
+	};
+	dcsr-snpc@31000 {
+		compatible = "fsl,b4-dcsr-snpc", "fsl,dcsr-snpc";
+		reg = <0x31000 0x1000 0x1042000 0x10000>;
+	};
+	dcsr-cpu-sb-proxy@100000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu0>;
+		reg = <0x100000 0x1000 0x101000 0x1000>;
+	};
+};
+
+&bportals {
+	#address-cells = <0x1>;
+	#size-cells = <0x1>;
+	compatible = "simple-bus";
+
+	bman-portal@0 {
+		compatible = "fsl,bman-portal";
+		reg = <0x0 0x4000>, <0x1000000 0x1000>;
+		interrupts = <105 2 0 0>;
+	};
+	bman-portal@4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+		interrupts = <107 2 0 0>;
+	};
+	bman-portal@8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+		interrupts = <109 2 0 0>;
+	};
+	bman-portal@c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+		interrupts = <111 2 0 0>;
+	};
+	bman-portal@10000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+		interrupts = <113 2 0 0>;
+	};
+	bman-portal@14000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+		interrupts = <115 2 0 0>;
+	};
+	bman-portal@18000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+		interrupts = <117 2 0 0>;
+	};
+	bman-portal@1c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+		interrupts = <119 2 0 0>;
+	};
+	bman-portal@20000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+		interrupts = <121 2 0 0>;
+	};
+	bman-portal@24000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+		interrupts = <123 2 0 0>;
+	};
+	bman-portal@28000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+		interrupts = <125 2 0 0>;
+	};
+	bman-portal@2c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+		interrupts = <127 2 0 0>;
+	};
+	bman-portal@30000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+		interrupts = <129 2 0 0>;
+	};
+	bman-portal@34000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+		interrupts = <131 2 0 0>;
+	};
+};
+
+&qportals {
+	#address-cells = <0x1>;
+	#size-cells = <0x1>;
+	compatible = "simple-bus";
+
+	qportal0: qman-portal@0 {
+		compatible = "fsl,qman-portal";
+		reg = <0x0 0x4000>, <0x1000000 0x1000>;
+		interrupts = <104 0x2 0 0>;
+		cell-index = <0x0>;
+	};
+	qportal1: qman-portal@4000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+		interrupts = <106 0x2 0 0>;
+		cell-index = <0x1>;
+	};
+	qportal2: qman-portal@8000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+		interrupts = <108 0x2 0 0>;
+		cell-index = <0x2>;
+	};
+	qportal3: qman-portal@c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+		interrupts = <110 0x2 0 0>;
+		cell-index = <0x3>;
+	};
+	qportal4: qman-portal@10000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+		interrupts = <112 0x2 0 0>;
+		cell-index = <0x4>;
+	};
+	qportal5: qman-portal@14000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+		interrupts = <114 0x2 0 0>;
+		cell-index = <0x5>;
+	};
+	qportal6: qman-portal@18000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+		interrupts = <116 0x2 0 0>;
+		cell-index = <0x6>;
+	};
+	qportal7: qman-portal@1c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+		interrupts = <118 0x2 0 0>;
+		cell-index = <0x7>;
+	};
+	qportal8: qman-portal@20000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+		interrupts = <120 0x2 0 0>;
+		cell-index = <0x8>;
+	};
+	qportal9: qman-portal@24000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+		interrupts = <122 0x2 0 0>;
+		cell-index = <0x9>;
+	};
+	qportal10: qman-portal@28000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+		interrupts = <124 0x2 0 0>;
+		cell-index = <0xa>;
+	};
+	qportal11: qman-portal@2c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+		interrupts = <126 0x2 0 0>;
+		cell-index = <0xb>;
+	};
+	qportal12: qman-portal@30000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+		interrupts = <128 0x2 0 0>;
+		cell-index = <0xc>;
+	};
+	qportal13: qman-portal@34000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+		interrupts = <130 0x2 0 0>;
+		cell-index = <0xd>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "simple-bus";
+
+	soc-sram-error {
+		compatible = "fsl,soc-sram-error";
+		interrupts = <16 2 1 2>;
+	};
+
+	corenet-law@0 {
+		compatible = "fsl,corenet-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <32>;
+	};
+
+	ddr1: memory-controller@8000 {
+		compatible = "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-controller";
+		reg = <0x8000 0x1000>;
+		interrupts = <16 2 1 8>;
+	};
+
+	cpc: l3-cache-controller@10000 {
+		compatible = "fsl,b4-l3-cache-controller", "cache";
+		reg = <0x10000 0x1000>;
+		interrupts = <16 2 1 4>;
+	};
+
+	corenet-cf@18000 {
+		compatible = "fsl,corenet2-cf", "fsl,corenet-cf";
+		reg = <0x18000 0x1000>;
+		interrupts = <16 2 1 0>;
+		fsl,ccf-num-csdids = <32>;
+		fsl,ccf-num-snoopids = <32>;
+	};
+
+	iommu@20000 {
+		compatible =  "fsl,pamu-v1.0", "fsl,pamu";
+		reg = <0x20000 0x4000>;
+		fsl,portid-mapping = <0x8000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupts = <
+			24 2 0 0
+			16 2 1 1>;
+
+
+		/* PCIe, DMA, SRIO */
+		pamu0: pamu@0 {
+			reg = <0 0x1000>;
+			fsl,primary-cache-geometry = <8 1>;
+			fsl,secondary-cache-geometry = <32 2>;
+		};
+
+		/* AXI2, Maple */
+		pamu1: pamu@1000 {
+			reg = <0x1000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <32 2>;
+		};
+
+		/* Q/BMan */
+		pamu2: pamu@2000 {
+			reg = <0x2000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <32 2>;
+		};
+
+		/* AXI1, FMAN */
+		pamu3: pamu@3000 {
+			reg = <0x3000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <32 2>;
+		};
+	};
+
+/include/ "qoriq-mpic4.3.dtsi"
+
+	guts: global-utilities@e0000 {
+		compatible = "fsl,b4-device-config";
+		reg = <0xe0000 0xe00>;
+		fsl,has-rstcr;
+		fsl,liodn-bits = <12>;
+	};
+
+/include/ "qoriq-clockgen2.dtsi"
+
+	rcpm: global-utilities@e2000 {
+		compatible = "fsl,b4-rcpm", "fsl,qoriq-rcpm-2.0";
+		reg = <0xe2000 0x1000>;
+	};
+
+/include/ "elo3-dma-0.dtsi"
+	dma@100300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x580>; /* DMA1LIODNR */
+	};
+
+/include/ "elo3-dma-1.dtsi"
+	dma@101300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */
+	};
+
+/include/ "qonverge-usb2-dr-0.dtsi"
+	usb0: usb@210000 {
+		compatible = "fsl-usb2-dr-v2.4", "fsl-usb2-dr";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */
+	};
+
+/include/ "qoriq-espi-0.dtsi"
+	spi@110000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "qoriq-esdhc-0.dtsi"
+	sdhc@114000 {
+		sdhci,auto-cmd12;
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */
+	};
+
+/include/ "qoriq-i2c-0.dtsi"
+/include/ "qoriq-i2c-1.dtsi"
+/include/ "qoriq-duart-0.dtsi"
+/include/ "qoriq-duart-1.dtsi"
+/include/ "qoriq-sec5.3-0.dtsi"
+
+/include/ "qoriq-qman3.dtsi"
+	qman: qman@318000 {
+		interrupts = <16 2 1 28>;
+	};
+
+/include/ "qoriq-bman1.dtsi"
+	bman: bman@31a000 {
+		interrupts = <16 2 1 29>;
+	};
+
+/include/ "qoriq-fman3-0.dtsi"
+/include/ "qoriq-fman3-0-1g-0.dtsi"
+/include/ "qoriq-fman3-0-1g-1.dtsi"
+/include/ "qoriq-fman3-0-1g-2.dtsi"
+/include/ "qoriq-fman3-0-1g-3.dtsi"
+	fman@400000 {
+		interrupts = <96 2 0 0>, <16 2 1 30>;
+
+		muram@0 {
+			compatible = "fsl,fman-muram";
+			reg = <0x0 0x80000>;
+		};
+
+		enet0: ethernet@e0000 {
+		};
+
+		enet1: ethernet@e2000 {
+		};
+
+		enet2: ethernet@e4000 {
+		};
+
+		enet3: ethernet@e6000 {
+		};
+
+		mdio@fc000 {
+			interrupts = <100 1 0 0>;
+		};
+
+		mdio@fd000 {
+			interrupts = <101 1 0 0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/bsc9131rdb.dts b/arch/powerpc/boot/dts/fsl/bsc9131rdb.dts
new file mode 100644
index 0000000000..8da984251a
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/bsc9131rdb.dts
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * BSC9131 RDB Device Tree Source
+ *
+ * Copyright 2011-2012 Freescale Semiconductor Inc.
+ */
+
+/include/ "bsc9131si-pre.dtsi"
+
+/ {
+	model = "fsl,bsc9131rdb";
+	compatible = "fsl,bsc9131rdb";
+
+	memory {
+		device_type = "memory";
+	};
+
+	board_ifc: ifc: ifc@ff71e000 {
+		/* NAND Flash on board */
+		ranges = <0x0 0x0 0x0 0xff800000 0x00004000>;
+		reg = <0x0 0xff71e000 0x0 0x2000>;
+	};
+
+	board_soc: soc: soc@ff700000 {
+		ranges = <0x0 0x0 0xff700000 0x100000>;
+	};
+};
+
+/include/ "bsc9131rdb.dtsi"
+/include/ "bsc9131si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/bsc9131rdb.dtsi b/arch/powerpc/boot/dts/fsl/bsc9131rdb.dtsi
new file mode 100644
index 0000000000..53f8b95634
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/bsc9131rdb.dtsi
@@ -0,0 +1,104 @@
+/*
+ * BSC9131 RDB Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2011-2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&board_ifc {
+
+	nand@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,ifc-nand";
+		reg = <0x0 0x0 0x4000>;
+
+	};
+};
+
+&board_soc {
+	/* BSC9131RDB does not have any device on i2c@3100 */
+	i2c@3100 {
+		status = "disabled";
+	};
+
+	spi@7000 {
+		flash@0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "spansion,s25sl12801", "jedec,spi-nor";
+			reg = <0>;
+			spi-max-frequency = <50000000>;
+
+		};
+	};
+
+	usb@22000 {
+		phy_type = "ulpi";
+	};
+
+	mdio@24000 {
+		phy0: ethernet-phy@0 {
+			interrupts = <3 1 0 0>;
+			reg = <0x0>;
+		};
+
+		phy1: ethernet-phy@1 {
+			interrupts = <2 1 0 0>;
+			reg = <0x3>;
+		};
+	};
+
+	sdhc@2e000 {
+		status = "disabled";
+	};
+
+	ptp_clock@b0e00 {
+		compatible = "fsl,etsec-ptp";
+		reg = <0xb0e00 0xb0>;
+		interrupts = <68 2 0 0 69 2 0 0>;
+		fsl,tclk-period	= <5>;
+		fsl,tmr-prsc	= <2>;
+		fsl,tmr-add	= <0xcccccccd>;
+		fsl,tmr-fiper1	= <999999995>;
+		fsl,tmr-fiper2	= <99990>;
+		fsl,max-adj	= <249999999>;
+	};
+
+	enet0: ethernet@b0000 {
+		phy-handle = <&phy0>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	enet1: ethernet@b1000 {
+		phy-handle = <&phy1>;
+		phy-connection-type = "rgmii-id";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi b/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi
new file mode 100644
index 0000000000..2a677fd323
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi
@@ -0,0 +1,189 @@
+/*
+ * BSC9131 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2011-2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&ifc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,ifc", "simple-bus";
+	interrupts = <16 2 0 0 20 2 0 0>;
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "fsl,bsc9131-immr", "simple-bus";
+	bus-frequency = <0>;		// Filled out by uboot.
+
+	ecm-law@0 {
+		compatible = "fsl,ecm-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <12>;
+	};
+
+	ecm@1000 {
+		compatible = "fsl,bsc9131-ecm", "fsl,ecm";
+		reg = <0x1000 0x1000>;
+		interrupts = <16 2 0 0>;
+	};
+
+	memory-controller@2000 {
+		compatible = "fsl,bsc9131-memory-controller";
+		reg = <0x2000 0x1000>;
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-i2c-0.dtsi"
+	i2c@3000 {
+		interrupts = <17 2 0 0>;
+	};
+
+/include/ "pq3-i2c-1.dtsi"
+	i2c@3100 {
+		interrupts = <17 2 0 0>;
+	};
+
+/include/ "pq3-duart-0.dtsi"
+	serial0: serial@4500 {
+		interrupts = <18 2 0 0>;
+	};
+
+	serial1: serial@4600 {
+		interrupts = <18 2 0 0 >;
+	};
+/include/ "pq3-espi-0.dtsi"
+	spi0: spi@7000 {
+		fsl,espi-num-chipselects = <1>;
+		interrupts = <22 0x2 0 0>;
+	};
+
+/include/ "pq3-gpio-0.dtsi"
+	gpio-controller@f000 {
+		interrupts = <19 0x2 0 0>;
+		};
+
+	L2: l2-cache-controller@20000 {
+		compatible = "fsl,bsc9131-l2-cache-controller";
+		reg = <0x20000 0x1000>;
+		cache-line-size = <32>;	// 32 bytes
+		cache-size = <0x40000>; // L2,256K
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-dma-0.dtsi"
+
+dma@21300 {
+
+	dma-channel@0 {
+		interrupts = <62 2 0 0>;
+	};
+
+	dma-channel@80 {
+		interrupts = <63 2 0 0>;
+	};
+
+	dma-channel@100 {
+		interrupts = <64 2 0 0>;
+	};
+
+	dma-channel@180 {
+		interrupts = <65 2 0 0>;
+	};
+};
+
+/include/ "pq3-usb2-dr-0.dtsi"
+usb@22000 {
+	compatible = "fsl-usb2-dr","fsl-usb2-dr-v2.2";
+	interrupts = <40 0x2 0 0>;
+};
+
+/include/ "pq3-esdhc-0.dtsi"
+	sdhc@2e000 {
+		sdhci,auto-cmd12;
+		interrupts = <41 0x2 0 0>;
+	};
+
+/include/ "pq3-sec4.4-0.dtsi"
+crypto@30000 {
+	interrupts	 = <57 2 0 0>;
+
+	sec_jr0: jr@1000 {
+		interrupts	 = <58 2 0 0>;
+	};
+
+	sec_jr1: jr@2000 {
+		interrupts	 = <59 2 0 0>;
+	};
+
+	sec_jr2: jr@3000 {
+		interrupts	 = <60 2 0 0>;
+	};
+
+	sec_jr3: jr@4000 {
+		interrupts	 = <61 2 0 0>;
+	};
+};
+
+/include/ "pq3-mpic.dtsi"
+
+timer@41100 {
+	compatible = "fsl,mpic-v1.2-msgr", "fsl,mpic-msg";
+	reg = <0x41400 0x200>;
+	interrupts = <
+		0xb0 2
+		0xb1 2
+		0xb2 2
+		0xb3 2>;
+};
+
+/include/ "pq3-etsec2-0.dtsi"
+enet0: ethernet@b0000 {
+	queue-group@b0000 {
+		interrupts = <26 2 0 0 27 2 0 0 28 2 0 0>;
+	};
+};
+
+/include/ "pq3-etsec2-1.dtsi"
+enet1: ethernet@b1000 {
+	queue-group@b1000 {
+		interrupts = <33 2 0 0 34 2 0 0 35 2 0 0>;
+	};
+};
+
+global-utilities@e0000 {
+		compatible = "fsl,bsc9131-guts";
+		reg = <0xe0000 0x1000>;
+		fsl,has-rstcr;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/bsc9131si-pre.dtsi b/arch/powerpc/boot/dts/fsl/bsc9131si-pre.dtsi
new file mode 100644
index 0000000000..f6ec4a6756
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/bsc9131si-pre.dtsi
@@ -0,0 +1,62 @@
+/*
+ * BSC9131 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2011-2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500v2_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,BSC9131";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		serial0 = &serial0;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,BSC9131@0 {
+			device_type = "cpu";
+			compatible = "fsl,e500v2";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/bsc9132qds.dts b/arch/powerpc/boot/dts/fsl/bsc9132qds.dts
new file mode 100644
index 0000000000..7cb2158dfe
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/bsc9132qds.dts
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * BSC9132 QDS Device Tree Source
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ */
+
+/include/ "bsc9132si-pre.dtsi"
+
+/ {
+	model = "fsl,bsc9132qds";
+	compatible = "fsl,bsc9132qds";
+
+	memory {
+		device_type = "memory";
+	};
+
+	ifc: ifc@ff71e000 {
+		/* NOR, NAND Flash on board */
+		ranges = <0x0 0x0 0x0 0x88000000 0x08000000
+			  0x1 0x0 0x0 0xff800000 0x00010000>;
+		reg = <0x0 0xff71e000 0x0 0x2000>;
+	};
+
+	soc: soc@ff700000 {
+		ranges = <0x0 0x0 0xff700000 0x100000>;
+	};
+
+	pci0: pcie@ff70a000 {
+		reg = <0 0xff70a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0x90000000 0 0x90000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xc0010000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0x90000000
+				  0x2000000 0x0 0x90000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "bsc9132qds.dtsi"
+/include/ "bsc9132si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/bsc9132qds.dtsi b/arch/powerpc/boot/dts/fsl/bsc9132qds.dtsi
new file mode 100644
index 0000000000..fead484a81
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/bsc9132qds.dtsi
@@ -0,0 +1,113 @@
+/*
+ * BSC9132 QDS Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&ifc {
+	nor@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x8000000>;
+		bank-width = <2>;
+		device-width = <1>;
+	};
+
+	nand@1,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,ifc-nand";
+		reg = <0x1 0x0 0x4000>;
+	};
+};
+
+&soc {
+	spi@7000 {
+		flash@0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "spansion,s25sl12801", "jedec,spi-nor";
+			reg = <0>;
+			spi-max-frequency = <30000000>;
+		};
+	};
+
+	i2c@3000 {
+		fpga: fpga@66 {
+			compatible = "fsl,bsc9132qds-fpga", "fsl,fpga-qixis-i2c";
+			reg = <0x66>;
+		};
+	};
+
+	usb@22000 {
+		phy_type = "ulpi";
+	};
+
+	mdio@24000 {
+		phy0: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+
+		phy1: ethernet-phy@1 {
+			reg = <0x1>;
+		};
+
+		tbi0: tbi-phy@11 {
+			reg = <0x1f>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	ptp_clock@b0e00 {
+		compatible = "fsl,etsec-ptp";
+		reg = <0xb0e00 0xb0>;
+		interrupts = <68 2 0 0 69 2 0 0>;
+		fsl,tclk-period	= <5>;
+		fsl,tmr-prsc	= <2>;
+		fsl,tmr-add	= <0xcccccccd>;
+		fsl,tmr-fiper1	= <999999995>;
+		fsl,tmr-fiper2	= <99990>;
+		fsl,max-adj	= <249999999>;
+	};
+
+	enet0: ethernet@b0000 {
+		phy-handle = <&phy0>;
+		tbi-handle = <&tbi0>;
+		phy-connection-type = "sgmii";
+	};
+
+	enet1: ethernet@b1000 {
+		phy-handle = <&phy1>;
+		tbi-handle = <&tbi0>;
+		phy-connection-type = "sgmii";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi b/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi
new file mode 100644
index 0000000000..b8e0edd1ac
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi
@@ -0,0 +1,209 @@
+/*
+ * BSC9132 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&ifc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,ifc", "simple-bus";
+	/* FIXME: Test whether interrupts are split */
+	interrupts = <16 2 0 0 20 2 0 0>;
+};
+
+/* controller at 0xa000 */
+&pci0 {
+	compatible = "fsl,bsc9132-pcie", "fsl,qoriq-pcie-v2.2";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	interrupts = <16 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x2 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x1 0x2 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x2 0x2 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x3 0x2 0x0 0x0
+			>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "fsl,bsc9132-immr", "simple-bus";
+	bus-frequency = <0>;		// Filled out by uboot.
+
+	ecm-law@0 {
+		compatible = "fsl,ecm-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <12>;
+	};
+
+	ecm@1000 {
+		compatible = "fsl,bsc9132-ecm", "fsl,ecm";
+		reg = <0x1000 0x1000>;
+		interrupts = <16 2 0 0>;
+	};
+
+	memory-controller@2000 {
+		compatible = "fsl,bsc9132-memory-controller";
+		reg = <0x2000 0x1000>;
+		interrupts = <16 2 1 8>;
+	};
+
+/include/ "pq3-i2c-0.dtsi"
+	i2c@3000 {
+		interrupts = <17 2 0 0>;
+	};
+
+/include/ "pq3-i2c-1.dtsi"
+	i2c@3100 {
+		interrupts = <17 2 0 0>;
+	};
+
+/include/ "pq3-duart-0.dtsi"
+	serial0: serial@4500 {
+		interrupts = <18 2 0 0>;
+	};
+
+	serial1: serial@4600 {
+		interrupts = <18 2 0 0 >;
+	};
+/include/ "pq3-espi-0.dtsi"
+	spi0: spi@7000 {
+		fsl,espi-num-chipselects = <1>;
+		interrupts = <22 0x2 0 0>;
+	};
+
+/include/ "pq3-gpio-0.dtsi"
+	gpio-controller@f000 {
+		interrupts = <19 0x2 0 0>;
+		};
+
+	L2: l2-cache-controller@20000 {
+		compatible = "fsl,bsc9132-l2-cache-controller";
+		reg = <0x20000 0x1000>;
+		cache-line-size = <32>;	// 32 bytes
+		cache-size = <0x40000>; // L2,256K
+		interrupts = <16 2 1 0>;
+	};
+
+/include/ "pq3-dma-0.dtsi"
+
+dma@21300 {
+
+	dma-channel@0 {
+		interrupts = <62 2 0 0>;
+	};
+
+	dma-channel@80 {
+		interrupts = <63 2 0 0>;
+	};
+
+	dma-channel@100 {
+		interrupts = <64 2 0 0>;
+	};
+
+	dma-channel@180 {
+		interrupts = <65 2 0 0>;
+	};
+};
+
+/include/ "pq3-usb2-dr-0.dtsi"
+usb@22000 {
+	compatible = "fsl-usb2-dr","fsl-usb2-dr-v2.2";
+	interrupts = <40 0x2 0 0>;
+};
+
+/include/ "pq3-esdhc-0.dtsi"
+	sdhc@2e000 {
+		fsl,sdhci-auto-cmd12;
+		interrupts = <41 0x2 0 0>;
+	};
+
+/include/ "pq3-sec4.4-0.dtsi"
+crypto@30000 {
+	interrupts	 = <57 2 0 0>;
+
+	sec_jr0: jr@1000 {
+		interrupts	 = <58 2 0 0>;
+	};
+
+	sec_jr1: jr@2000 {
+		interrupts	 = <59 2 0 0>;
+	};
+
+	sec_jr2: jr@3000 {
+		interrupts	 = <60 2 0 0>;
+	};
+
+	sec_jr3: jr@4000 {
+		interrupts	 = <61 2 0 0>;
+	};
+};
+
+/include/ "pq3-mpic.dtsi"
+/include/ "pq3-mpic-timer-B.dtsi"
+
+/include/ "pq3-etsec2-0.dtsi"
+enet0: ethernet@b0000 {
+	queue-group@b0000 {
+		interrupts = <26 2 0 0 27 2 0 0 28 2 0 0>;
+	};
+};
+
+/include/ "pq3-etsec2-1.dtsi"
+enet1: ethernet@b1000 {
+	queue-group@b1000 {
+		interrupts = <33 2 0 0 34 2 0 0 35 2 0 0>;
+	};
+};
+
+global-utilities@e0000 {
+		compatible = "fsl,bsc9132-guts";
+		reg = <0xe0000 0x1000>;
+		fsl,has-rstcr;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi b/arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi
new file mode 100644
index 0000000000..90f7949fe3
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi
@@ -0,0 +1,67 @@
+/*
+ * BSC9132 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500v2_power_isa.dtsi"
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		serial0 = &serial0;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,e500v2@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+
+		cpu1: PowerPC,e500v2@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			next-level-cache = <&L2>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/c293pcie.dts b/arch/powerpc/boot/dts/fsl/c293pcie.dts
new file mode 100644
index 0000000000..5e905e0857
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/c293pcie.dts
@@ -0,0 +1,224 @@
+/*
+ * C293 PCIE Device Tree Source
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "c293si-pre.dtsi"
+
+/ {
+	model = "fsl,C293PCIE";
+	compatible = "fsl,C293PCIE";
+
+	memory {
+		device_type = "memory";
+	};
+
+	ifc: ifc@fffe1e000 {
+		reg = <0xf 0xffe1e000 0 0x2000>;
+		ranges = <0x0 0x0 0xf 0xec000000 0x04000000
+			  0x1 0x0 0xf 0xff800000 0x00010000
+			  0x2 0x0 0xf 0xffdf0000 0x00010000>;
+
+	};
+
+	soc: soc@fffe00000 {
+		ranges = <0x0 0xf 0xffe00000 0x100000>;
+	};
+
+	pci0: pcie@fffe0a000 {
+		reg = <0xf 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0x80000000 0xc 0x00000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+&ifc {
+	nor@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x4000000>;
+		bank-width = <2>;
+		device-width = <1>;
+
+		partition@0 {
+			/* 1MB for DTB Image */
+			reg = <0x0 0x00100000>;
+			label = "NOR DTB Image";
+		};
+
+		partition@100000 {
+			/* 8 MB for Linux Kernel Image */
+			reg = <0x00100000 0x00800000>;
+			label = "NOR Linux Kernel Image";
+		};
+
+		partition@900000 {
+			/* 53MB for rootfs */
+			reg = <0x00900000 0x03500000>;
+			label = "NOR Rootfs Image";
+		};
+
+		partition@3e00000 {
+			/* 1MB for blob encrypted key */
+			reg = <0x03e00000 0x00100000>;
+			label = "NOR blob encrypted key";
+		};
+
+		partition@3f00000 {
+			/* 512KB for u-boot Bootloader Image and evn */
+			reg = <0x03f00000 0x00100000>;
+			label = "NOR U-Boot Image";
+			read-only;
+		};
+	};
+
+	nand@1,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,ifc-nand";
+		reg = <0x1 0x0 0x10000>;
+
+		partition@0 {
+			/* This location must not be altered  */
+			/* 1MB for u-boot Bootloader Image */
+			reg = <0x0 0x00100000>;
+			label = "NAND U-Boot Image";
+			read-only;
+		};
+
+		partition@100000 {
+			/* 1MB for DTB Image */
+			reg = <0x00100000 0x00100000>;
+			label = "NAND DTB Image";
+		};
+
+		partition@200000 {
+			/* 16MB for Linux Kernel Image */
+			reg = <0x00200000 0x01000000>;
+			label = "NAND Linux Kernel Image";
+		};
+
+		partition@1200000 {
+			/* 4078MB for Root file System Image */
+			reg = <0x00600000 0xfee00000>;
+			label = "NAND RFS Image";
+		};
+	};
+
+	cpld@2,0 {
+		compatible = "fsl,c293pcie-cpld";
+		reg = <0x2 0x0 0x20>;
+	};
+};
+
+&soc {
+	i2c@3000 {
+		eeprom@50 {
+			compatible = "st,24c1024", "atmel,24c1024";
+			reg = <0x50>;
+		};
+
+		adt7461@4c {
+			compatible = "adi,adt7461";
+			reg = <0x4c>;
+		};
+	};
+
+	spi@7000 {
+		flash@0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "spansion,s25sl12801", "jedec,spi-nor";
+			reg = <0>;
+			spi-max-frequency = <50000000>;
+
+			partition@0 {
+				/* 1MB for u-boot Bootloader Image */
+				/* 1MB for Environment */
+				reg = <0x0 0x00100000>;
+				label = "SPI Flash U-Boot Image";
+				read-only;
+			};
+
+			partition@100000 {
+				/* 512KB for DTB Image */
+				reg = <0x00100000 0x00080000>;
+				label = "SPI Flash DTB Image";
+			};
+
+			partition@180000 {
+				/* 4MB for Linux Kernel Image */
+				reg = <0x00180000 0x00400000>;
+				label = "SPI Flash Linux Kernel Image";
+			};
+
+			partition@580000 {
+				/* 10.5MB for RFS Image */
+				reg = <0x00580000 0x00a80000>;
+				label = "SPI Flash RFS Image";
+			};
+		};
+	};
+
+	mdio@24000 {
+		phy0: ethernet-phy@0 {
+			interrupts = <2 1 0 0>;
+			reg = <0x0>;
+		};
+
+		phy1: ethernet-phy@1 {
+			interrupts = <2 1 0 0>;
+			reg = <0x2>;
+		};
+	};
+
+	enet0: ethernet@b0000 {
+		phy-handle = <&phy0>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	enet1: ethernet@b1000 {
+		phy-handle = <&phy1>;
+		phy-connection-type = "rgmii-id";
+	};
+};
+/include/ "c293si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/c293si-post.dtsi b/arch/powerpc/boot/dts/fsl/c293si-post.dtsi
new file mode 100644
index 0000000000..f208fb8f64
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/c293si-post.dtsi
@@ -0,0 +1,189 @@
+/*
+ * C293 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&ifc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,ifc", "simple-bus";
+	interrupts = <19 2 0 0>;
+};
+
+/* controller at 0xa000 */
+&pci0 {
+	compatible = "fsl,qoriq-pcie-v2.2", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1 0x0 0x0
+			>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "simple-bus";
+	bus-frequency = <0>;		// Filled out by uboot.
+
+	ecm-law@0 {
+		compatible = "fsl,ecm-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <12>;
+	};
+
+	ecm@1000 {
+		compatible = "fsl,c293-ecm", "fsl,ecm";
+		reg = <0x1000 0x1000>;
+		interrupts = <16 2 0 0>;
+	};
+
+	memory-controller@2000 {
+		compatible = "fsl,c293-memory-controller";
+		reg = <0x2000 0x1000>;
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-i2c-0.dtsi"
+/include/ "pq3-i2c-1.dtsi"
+/include/ "pq3-duart-0.dtsi"
+/include/ "pq3-espi-0.dtsi"
+	spi0: spi@7000 {
+		fsl,espi-num-chipselects = <1>;
+	};
+
+/include/ "pq3-gpio-0.dtsi"
+	L2: l2-cache-controller@20000 {
+		compatible = "fsl,c293-l2-cache-controller";
+		reg = <0x20000 0x1000>;
+		cache-line-size = <32>;	// 32 bytes
+		cache-size = <0x80000>; // L2,512K
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-dma-0.dtsi"
+/include/ "pq3-esdhc-0.dtsi"
+	sdhc@2e000 {
+		compatible = "fsl,c293-esdhc", "fsl,esdhc";
+		sdhci,auto-cmd12;
+	};
+
+	crypto@80000 {
+/include/ "qoriq-sec6.0-0.dtsi"
+	};
+
+	crypto@80000 {
+		reg = <0x80000 0x20000>;
+		ranges = <0x0 0x80000 0x20000>;
+
+		jr@1000 {
+			interrupts = <45 2 0 0>;
+		};
+		jr@2000 {
+			interrupts = <57 2 0 0>;
+		};
+	};
+
+	crypto@a0000 {
+/include/ "qoriq-sec6.0-0.dtsi"
+	};
+
+	crypto@a0000 {
+		reg = <0xa0000 0x20000>;
+		ranges = <0x0 0xa0000 0x20000>;
+
+		jr@1000 {
+			interrupts = <49 2 0 0>;
+		};
+		jr@2000 {
+			interrupts = <50 2 0 0>;
+		};
+	};
+
+	crypto@c0000 {
+/include/ "qoriq-sec6.0-0.dtsi"
+	};
+
+	crypto@c0000 {
+		reg = <0xc0000 0x20000>;
+		ranges = <0x0 0xc0000 0x20000>;
+
+		jr@1000 {
+			interrupts = <55 2 0 0>;
+		};
+		jr@2000 {
+			interrupts = <56 2 0 0>;
+		};
+	};
+
+/include/ "pq3-mpic.dtsi"
+/include/ "pq3-mpic-timer-B.dtsi"
+
+/include/ "pq3-etsec2-0.dtsi"
+	enet0: ethernet@b0000 {
+		queue-group@b0000 {
+			reg = <0x10000 0x1000>;
+		};
+	};
+
+/include/ "pq3-etsec2-1.dtsi"
+	enet1: ethernet@b1000 {
+		queue-group@b1000 {
+			reg = <0x11000 0x1000>;
+		};
+	};
+
+	global-utilities@e0000 {
+		compatible = "fsl,c293-guts";
+		reg = <0xe0000 0x1000>;
+		fsl,has-rstcr;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/c293si-pre.dtsi b/arch/powerpc/boot/dts/fsl/c293si-pre.dtsi
new file mode 100644
index 0000000000..065049d762
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/c293si-pre.dtsi
@@ -0,0 +1,63 @@
+/*
+ * C293 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500v2_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,C293";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial1;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,e500v2@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/cyrus_p5020.dts b/arch/powerpc/boot/dts/fsl/cyrus_p5020.dts
new file mode 100644
index 0000000000..40ba0606ec
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/cyrus_p5020.dts
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Cyrus 5020 Device Tree Source, based on p5020ds.dts
+ *
+ * Copyright 2015 Andy Fleming
+ *
+ * p5020ds.dts copyright:
+ * Copyright 2010 - 2014 Freescale Semiconductor Inc.
+ */
+
+/include/ "p5020si-pre.dtsi"
+
+/ {
+	model = "varisys,CYRUS";
+	compatible = "varisys,CYRUS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	memory {
+		device_type = "memory";
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
+	qportals: qman-portals@ff4200000 {
+		ranges = <0x0 0xf 0xf4200000 0x200000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+		};
+
+		i2c@118100 {
+		};
+
+		i2c@119100 {
+			rtc@6f {
+				compatible = "microchip,mcp7941x";
+				reg = <0x6f>;
+			};
+		};
+	};
+
+	rio: rapidio@ffe0c0000 {
+		reg = <0xf 0xfe0c0000 0 0x11000>;
+
+		port1 {
+			ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+		};
+		port2 {
+			ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+		};
+	};
+
+	lbc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x1000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xffa00000 0x00040000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+	};
+
+	pci0: pcie@ffe200000 {
+		reg = <0xf 0xfe200000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe201000 {
+		reg = <0xf 0xfe201000 0 0x1000>;
+		ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+			  0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe202000 {
+		reg = <0xf 0xfe202000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci3: pcie@ffe203000 {
+		reg = <0xf 0xfe203000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x60000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+};
+
+/include/ "p5020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/e500mc_power_isa.dtsi b/arch/powerpc/boot/dts/fsl/e500mc_power_isa.dtsi
new file mode 100644
index 0000000000..ea145c91cf
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/e500mc_power_isa.dtsi
@@ -0,0 +1,59 @@
+/*
+ * e500mc Power ISA Device Tree Source (include)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/ {
+	cpus {
+		power-isa-version = "2.06";
+		power-isa-b;		// Base
+		power-isa-e;		// Embedded
+		power-isa-atb;		// Alternate Time Base
+		power-isa-cs;		// Cache Specification
+		power-isa-ds;		// Decorated Storage
+		power-isa-e.ed;		// Embedded.Enhanced Debug
+		power-isa-e.pd;		// Embedded.External PID
+		power-isa-e.hv;		// Embedded.Hypervisor
+		power-isa-e.le;		// Embedded.Little-Endian
+		power-isa-e.pm;		// Embedded.Performance Monitor
+		power-isa-e.pc;		// Embedded.Processor Control
+		power-isa-ecl;		// Embedded Cache Locking
+		power-isa-exp;		// External Proxy
+		power-isa-fp;		// Floating Point
+		power-isa-fp.r;		// Floating Point.Record
+		power-isa-mmc;		// Memory Coherence
+		power-isa-scpm;		// Store Conditional Page Mobility
+		power-isa-wt;		// Wait
+		fsl,eref-deo;		// Data Cache Extended Operations
+		mmu-type = "power-embedded";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi b/arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi
new file mode 100644
index 0000000000..7e2a90cde7
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi
@@ -0,0 +1,51 @@
+/*
+ * e500v1 Power ISA Device Tree Source (include)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/ {
+	cpus {
+		power-isa-version = "2.03";
+		power-isa-b;		// Base
+		power-isa-e;		// Embedded
+		power-isa-atb;		// Alternate Time Base
+		power-isa-cs;		// Cache Specification
+		power-isa-e.le;		// Embedded.Little-Endian
+		power-isa-e.pm;		// Embedded.Performance Monitor
+		power-isa-ecl;		// Embedded Cache Locking
+		power-isa-mmc;		// Memory Coherence
+		power-isa-sp;		// Signal Processing Engine
+		power-isa-sp.fs;	// SPE.Embedded Float Scalar Single
+		power-isa-sp.fv;	// SPE.Embedded Float Vector
+		mmu-type = "power-embedded";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/e500v2_power_isa.dtsi b/arch/powerpc/boot/dts/fsl/e500v2_power_isa.dtsi
new file mode 100644
index 0000000000..f4928144d2
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/e500v2_power_isa.dtsi
@@ -0,0 +1,52 @@
+/*
+ * e500v2 Power ISA Device Tree Source (include)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/ {
+	cpus {
+		power-isa-version = "2.03";
+		power-isa-b;		// Base
+		power-isa-e;		// Embedded
+		power-isa-atb;		// Alternate Time Base
+		power-isa-cs;		// Cache Specification
+		power-isa-e.le;		// Embedded.Little-Endian
+		power-isa-e.pm;		// Embedded.Performance Monitor
+		power-isa-ecl;		// Embedded Cache Locking
+		power-isa-mmc;		// Memory Coherence
+		power-isa-sp;		// Signal Processing Engine
+		power-isa-sp.fd;	// SPE.Embedded Float Scalar Double
+		power-isa-sp.fs;	// SPE.Embedded Float Scalar Single
+		power-isa-sp.fv;	// SPE.Embedded Float Vector
+		mmu-type = "power-embedded";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/e5500_power_isa.dtsi b/arch/powerpc/boot/dts/fsl/e5500_power_isa.dtsi
new file mode 100644
index 0000000000..c254c981ae
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/e5500_power_isa.dtsi
@@ -0,0 +1,60 @@
+/*
+ * e5500 Power ISA Device Tree Source (include)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/ {
+	cpus {
+		power-isa-version = "2.06";
+		power-isa-b;		// Base
+		power-isa-e;		// Embedded
+		power-isa-atb;		// Alternate Time Base
+		power-isa-cs;		// Cache Specification
+		power-isa-ds;		// Decorated Storage
+		power-isa-e.ed;		// Embedded.Enhanced Debug
+		power-isa-e.pd;		// Embedded.External PID
+		power-isa-e.hv;		// Embedded.Hypervisor
+		power-isa-e.le;		// Embedded.Little-Endian
+		power-isa-e.pm;		// Embedded.Performance Monitor
+		power-isa-e.pc;		// Embedded.Processor Control
+		power-isa-ecl;		// Embedded Cache Locking
+		power-isa-exp;		// External Proxy
+		power-isa-fp;		// Floating Point
+		power-isa-fp.r;		// Floating Point.Record
+		power-isa-mmc;		// Memory Coherence
+		power-isa-scpm;		// Store Conditional Page Mobility
+		power-isa-wt;		// Wait
+		power-isa-64;		// 64-bit
+		fsl,eref-deo;		// Data Cache Extended Operations
+		mmu-type = "power-embedded";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/e6500_power_isa.dtsi b/arch/powerpc/boot/dts/fsl/e6500_power_isa.dtsi
new file mode 100644
index 0000000000..a912dbeff3
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/e6500_power_isa.dtsi
@@ -0,0 +1,65 @@
+/*
+ * e6500 Power ISA Device Tree Source (include)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/ {
+	cpus {
+		power-isa-version = "2.06";
+		power-isa-b;		// Base
+		power-isa-e;		// Embedded
+		power-isa-atb;		// Alternate Time Base
+		power-isa-cs;		// Cache Specification
+		power-isa-ds;		// Decorated Storage
+		power-isa-e.ed;		// Embedded.Enhanced Debug
+		power-isa-e.pd;		// Embedded.External PID
+		power-isa-e.hv;		// Embedded.Hypervisor
+		power-isa-e.le;		// Embedded.Little-Endian
+		power-isa-e.pm;		// Embedded.Performance Monitor
+		power-isa-e.pc;		// Embedded.Processor Control
+		power-isa-ecl;		// Embedded Cache Locking
+		power-isa-exp;		// External Proxy
+		power-isa-fp;		// Floating Point
+		power-isa-fp.r;		// Floating Point.Record
+		power-isa-mmc;		// Memory Coherence
+		power-isa-scpm;		// Store Conditional Page Mobility
+		power-isa-wt;		// Wait
+		power-isa-64;		// 64-bit
+		power-isa-e.pt;		// Embedded.Page Table
+		power-isa-e.hv.lrat;	// Embedded.Hypervisor.LRAT
+		power-isa-e.em;		// Embedded Multi-Threading
+		power-isa-v;		// Vector (AltiVec)
+		fsl,eref-er;		// Enhanced Reservations (Load and Reserve and Store Cond.)
+		fsl,eref-deo;		// Data Cache Extended Operations
+		mmu-type = "power-embedded";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi b/arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi
new file mode 100644
index 0000000000..3c210e0d52
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi
@@ -0,0 +1,82 @@
+/*
+ * QorIQ Elo3 DMA device tree stub [ controller @ offset 0x100000 ]
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+dma0: dma@100300 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,elo3-dma";
+	reg = <0x100300 0x4>,
+	      <0x100600 0x4>;
+	ranges = <0x0 0x100100 0x500>;
+	dma-channel@0 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x0 0x80>;
+		interrupts = <28 2 0 0>;
+	};
+	dma-channel@80 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x80 0x80>;
+		interrupts = <29 2 0 0>;
+	};
+	dma-channel@100 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x100 0x80>;
+		interrupts = <30 2 0 0>;
+	};
+	dma-channel@180 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x180 0x80>;
+		interrupts = <31 2 0 0>;
+	};
+	dma-channel@300 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x300 0x80>;
+		interrupts = <76 2 0 0>;
+	};
+	dma-channel@380 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x380 0x80>;
+		interrupts = <77 2 0 0>;
+	};
+	dma-channel@400 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x400 0x80>;
+		interrupts = <78 2 0 0>;
+	};
+	dma-channel@480 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x480 0x80>;
+		interrupts = <79 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi b/arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi
new file mode 100644
index 0000000000..cccf3bb382
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi
@@ -0,0 +1,82 @@
+/*
+ * QorIQ Elo3 DMA device tree stub [ controller @ offset 0x101000 ]
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+dma1: dma@101300 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,elo3-dma";
+	reg = <0x101300 0x4>,
+	      <0x101600 0x4>;
+	ranges = <0x0 0x101100 0x500>;
+	dma-channel@0 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x0 0x80>;
+		interrupts = <32 2 0 0>;
+	};
+	dma-channel@80 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x80 0x80>;
+		interrupts = <33 2 0 0>;
+	};
+	dma-channel@100 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x100 0x80>;
+		interrupts = <34 2 0 0>;
+	};
+	dma-channel@180 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x180 0x80>;
+		interrupts = <35 2 0 0>;
+	};
+	dma-channel@300 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x300 0x80>;
+		interrupts = <80 2 0 0>;
+	};
+	dma-channel@380 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x380 0x80>;
+		interrupts = <81 2 0 0>;
+	};
+	dma-channel@400 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x400 0x80>;
+		interrupts = <82 2 0 0>;
+	};
+	dma-channel@480 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x480 0x80>;
+		interrupts = <83 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/elo3-dma-2.dtsi b/arch/powerpc/boot/dts/fsl/elo3-dma-2.dtsi
new file mode 100644
index 0000000000..d3cc8d0f7c
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/elo3-dma-2.dtsi
@@ -0,0 +1,82 @@
+/*
+ * QorIQ Elo3 DMA device tree stub [ controller @ offset 0x102300 ]
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+dma2: dma@102300 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,elo3-dma";
+	reg = <0x102300 0x4>,
+	      <0x102600 0x4>;
+	ranges = <0x0 0x102100 0x500>;
+	dma-channel@0 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x0 0x80>;
+		interrupts = <464 2 0 0>;
+	};
+	dma-channel@80 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x80 0x80>;
+		interrupts = <465 2 0 0>;
+	};
+	dma-channel@100 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x100 0x80>;
+		interrupts = <466 2 0 0>;
+	};
+	dma-channel@180 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x180 0x80>;
+		interrupts = <467 2 0 0>;
+	};
+	dma-channel@300 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x300 0x80>;
+		interrupts = <468 2 0 0>;
+	};
+	dma-channel@380 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x380 0x80>;
+		interrupts = <469 2 0 0>;
+	};
+	dma-channel@400 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x400 0x80>;
+		interrupts = <470 2 0 0>;
+	};
+	dma-channel@480 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x480 0x80>;
+		interrupts = <471 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/ge_imp3a.dts b/arch/powerpc/boot/dts/fsl/ge_imp3a.dts
new file mode 100644
index 0000000000..da3de8e2b7
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/ge_imp3a.dts
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GE IMP3A Device Tree Source
+ *
+ * Copyright 2010-2011 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * Based on: P2020 DS Device Tree Source
+ * Copyright 2009 Freescale Semiconductor Inc.
+ */
+
+/include/ "p2020si-pre.dtsi"
+
+/ {
+	model = "GE_IMP3A";
+	compatible = "ge,imp3a";
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@fef05000 {
+		reg = <0 0xfef05000 0 0x1000>;
+
+		ranges = <0x0 0x0 0x0 0xff000000 0x01000000
+			  0x1 0x0 0x0 0xe0000000 0x08000000
+			  0x2 0x0 0x0 0xe8000000 0x08000000
+			  0x3 0x0 0x0 0xfc100000 0x00020000
+			  0x4 0x0 0x0 0xfc000000 0x00008000
+			  0x5 0x0 0x0 0xfc008000 0x00008000
+			  0x6 0x0 0x0 0xfee00000 0x00040000
+			  0x7 0x0 0x0 0xfee80000 0x00040000>;
+
+		/* nor@0,0 is a mirror of part of the memory in nor@1,0
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "ge,imp3a-firmware-mirror", "cfi-flash";
+			reg = <0x0 0x0 0x1000000>;
+			bank-width = <2>;
+			device-width = <1>;
+
+			partition@0 {
+				label = "firmware";
+				reg = <0x0 0x1000000>;
+				read-only;
+			};
+		};
+		*/
+
+		nor@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "ge,imp3a-paged-flash", "cfi-flash";
+			reg = <0x1 0x0 0x8000000>;
+			bank-width = <2>;
+			device-width = <1>;
+
+			partition@0 {
+				label = "user";
+				reg = <0x0 0x7800000>;
+			};
+
+			partition@7800000 {
+				label = "firmware";
+				reg = <0x7800000 0x800000>;
+				read-only;
+			};
+		};
+
+		nvram@3,0 {
+			device_type = "nvram";
+			compatible = "simtek,stk14ca8";
+			reg = <0x3 0x0 0x20000>;
+		};
+
+		fpga@4,0 {
+			compatible = "ge,imp3a-fpga-regs";
+			reg = <0x4 0x0 0x20>;
+		};
+
+		gef_pic: pic@4,20 {
+			#interrupt-cells = <1>;
+			interrupt-controller;
+			device_type = "interrupt-controller";
+			compatible = "ge,imp3a-fpga-pic", "gef,fpga-pic-1.00";
+			reg = <0x4 0x20 0x20>;
+			interrupts = <6 7 0 0>;
+		};
+
+		gef_gpio: gpio@4,400 {
+			#gpio-cells = <2>;
+			compatible = "ge,imp3a-gpio";
+			reg = <0x4 0x400 0x24>;
+			gpio-controller;
+		};
+
+		wdt@4,800 {
+			compatible = "ge,imp3a-fpga-wdt", "gef,fpga-wdt-1.00",
+				"gef,fpga-wdt";
+			reg = <0x4 0x800 0x8>;
+			interrupts = <10 4>;
+			interrupt-parent = <&gef_pic>;
+		};
+
+		/* Second watchdog available, driver currently supports one.
+		wdt@4,808 {
+			compatible = "gef,imp3a-fpga-wdt", "gef,fpga-wdt-1.00",
+				"gef,fpga-wdt";
+			reg = <0x4 0x808 0x8>;
+			interrupts = <9 4>;
+			interrupt-parent = <&gef_pic>;
+		};
+		*/
+
+		nand@6,0 {
+			compatible = "fsl,elbc-fcm-nand";
+			reg = <0x6 0x0 0x40000>;
+		};
+
+		nand@7,0 {
+			compatible = "fsl,elbc-fcm-nand";
+			reg = <0x7 0x0 0x40000>;
+		};
+	};
+
+	soc: soc@fef00000 {
+		ranges = <0x0 0 0xfef00000 0x100000>;
+
+		i2c@3000 {
+			hwmon@48 {
+				compatible = "national,lm92";
+				reg = <0x48>;
+			};
+
+			hwmon@4c {
+				compatible = "adi,adt7461";
+				reg = <0x4c>;
+			};
+
+			rtc@51 {
+				compatible = "epson,rx8581";
+				reg = <0x51>;
+			};
+
+			eti@6b {
+				compatible = "dallas,ds1682";
+				reg = <0x6b>;
+			};
+		};
+
+		usb@22000 {
+			phy_type = "ulpi";
+			dr_mode = "host";
+		};
+
+		mdio@24520 {
+			phy0: ethernet-phy@0 {
+				interrupt-parent = <&gef_pic>;
+				interrupts = <0xc 0x4>;
+				reg = <0x1>;
+			};
+			phy1: ethernet-phy@1 {
+				interrupt-parent = <&gef_pic>;
+				interrupts = <0xb 0x4>;
+				reg = <0x2>;
+			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@26520 {
+			status = "disabled";
+		};
+
+		enet0: ethernet@24000 {
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy0>;
+			phy-connection-type = "gmii";
+		};
+
+		enet1: ethernet@25000 {
+			tbi-handle = <&tbi1>;
+			phy-handle = <&phy1>;
+			phy-connection-type = "gmii";
+		};
+
+		enet2: ethernet@26000 {
+			status = "disabled";
+		};
+	};
+
+	pci0: pcie@fef08000 {
+		ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xfe020000 0x0 0x10000>;
+		reg = <0 0xfef08000 0 0x1000>;
+
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xc0000000
+				  0x2000000 0x0 0xc0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+	};
+
+	pci1: pcie@fef09000 {
+		reg = <0 0xfef09000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xfe010000 0x0 0x10000>;
+
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+
+	};
+
+	pci2: pcie@fef0a000 {
+		reg = <0 0xfef0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xfe000000 0x0 0x10000>;
+
+		pcie@0 {
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+	};
+};
+
+/include/ "p2020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/gef_ppc9a.dts b/arch/powerpc/boot/dts/fsl/gef_ppc9a.dts
new file mode 100644
index 0000000000..fc92bb032c
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/gef_ppc9a.dts
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GE PPC9A Device Tree Source
+ *
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * Based on: SBS CM6 Device Tree Source
+ * Copyright 2007 SBS Technologies GmbH & Co. KG
+ * And: mpc8641_hpcn.dts (MPC8641 HPCN Device Tree Source)
+ * Copyright 2006 Freescale Semiconductor Inc.
+ */
+
+/*
+ * Compiled with dtc -I dts -O dtb -o gef_ppc9a.dtb gef_ppc9a.dts
+ */
+
+/include/ "mpc8641si-pre.dtsi"
+
+/ {
+	model = "GEF_PPC9A";
+	compatible = "gef,ppc9a";
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x40000000>;	// set by uboot
+	};
+
+	lbc: localbus@fef05000 {
+		reg = <0xfef05000 0x1000>;
+
+		ranges = <0 0 0xff000000 0x01000000	// 16MB Boot flash
+			  1 0 0xe8000000 0x08000000	// Paged Flash 0
+			  2 0 0xe0000000 0x08000000	// Paged Flash 1
+			  3 0 0xfc100000 0x00020000	// NVRAM
+			  4 0 0xfc000000 0x00008000	// FPGA
+			  5 0 0xfc008000 0x00008000	// AFIX FPGA
+			  6 0 0xfd000000 0x00800000	// IO FPGA (8-bit)
+			  7 0 0xfd800000 0x00800000>;	// IO FPGA (32-bit)
+
+		/* flash@0,0 is a mirror of part of the memory in flash@1,0
+		flash@0,0 {
+			compatible = "gef,ppc9a-firmware-mirror", "cfi-flash";
+			reg = <0x0 0x0 0x1000000>;
+			bank-width = <4>;
+			device-width = <2>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			partition@0 {
+				label = "firmware";
+				reg = <0x0 0x1000000>;
+				read-only;
+			};
+		};
+		*/
+
+		flash@1,0 {
+			compatible = "gef,ppc9a-paged-flash", "cfi-flash";
+			reg = <0x1 0x0 0x8000000>;
+			bank-width = <4>;
+			device-width = <2>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			partition@0 {
+				label = "user";
+				reg = <0x0 0x7800000>;
+			};
+			partition@7800000 {
+				label = "firmware";
+				reg = <0x7800000 0x800000>;
+				read-only;
+			};
+		};
+
+		nvram@3,0 {
+			device_type = "nvram";
+			compatible = "simtek,stk14ca8";
+			reg = <0x3 0x0 0x20000>;
+		};
+
+		fpga@4,0 {
+			compatible = "gef,ppc9a-fpga-regs";
+			reg = <0x4 0x0 0x40>;
+		};
+
+		wdt@4,2000 {
+			compatible = "gef,ppc9a-fpga-wdt", "gef,fpga-wdt-1.00",
+				"gef,fpga-wdt";
+			reg = <0x4 0x2000 0x8>;
+			interrupts = <0x1a 0x4>;
+			interrupt-parent = <&gef_pic>;
+		};
+		/* Second watchdog available, driver currently supports one.
+		wdt@4,2010 {
+			compatible = "gef,ppc9a-fpga-wdt", "gef,fpga-wdt-1.00",
+				"gef,fpga-wdt";
+			reg = <0x4 0x2010 0x8>;
+			interrupts = <0x1b 0x4>;
+			interrupt-parent = <&gef_pic>;
+		};
+		*/
+		gef_pic: pic@4,4000 {
+			#interrupt-cells = <1>;
+			interrupt-controller;
+			compatible = "gef,ppc9a-fpga-pic", "gef,fpga-pic-1.00";
+			reg = <0x4 0x4000 0x20>;
+			interrupts = <0x8 0x9 0 0>;
+
+		};
+		gef_gpio: gpio@7,14000 {
+			#gpio-cells = <2>;
+			compatible = "gef,ppc9a-gpio", "gef,sbc610-gpio";
+			reg = <0x7 0x14000 0x24>;
+			gpio-controller;
+		};
+	};
+
+	soc: soc@fef00000 {
+		ranges = <0x0 0xfef00000 0x00100000>;
+
+		i2c@3000 {
+			hwmon@48 {
+				compatible = "national,lm92";
+				reg = <0x48>;
+			};
+
+			hwmon@4c {
+				compatible = "adi,adt7461";
+				reg = <0x4c>;
+			};
+
+			rtc@51 {
+				compatible = "epson,rx8581";
+				reg = <0x00000051>;
+			};
+
+			eti@6b {
+				compatible = "dallas,ds1682";
+				reg = <0x6b>;
+			};
+		};
+
+		enet0: ethernet@24000 {
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy0>;
+			phy-connection-type = "gmii";
+		};
+
+		mdio@24520 {
+			phy0: ethernet-phy@0 {
+				interrupt-parent = <&gef_pic>;
+				interrupts = <0x9 0x4>;
+				reg = <1>;
+			};
+			phy2: ethernet-phy@2 {
+				interrupt-parent = <&gef_pic>;
+				interrupts = <0x8 0x4>;
+				reg = <3>;
+			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet1: ethernet@26000 {
+			tbi-handle = <&tbi2>;
+			phy-handle = <&phy2>;
+			phy-connection-type = "gmii";
+		};
+
+		mdio@26520 {
+			tbi2: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet2: ethernet@25000 {
+			status = "disabled";
+		};
+
+		mdio@25520 {
+			status = "disabled";
+		};
+
+		enet3: ethernet@27000 {
+			status = "disabled";
+		};
+
+		mdio@27520 {
+			status = "disabled";
+		};
+	};
+
+	pci0: pcie@fef08000 {
+		reg = <0xfef08000 0x1000>;
+		ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x40000000
+			  0x01000000 0x0 0x00000000 0xfe000000 0x0 0x00400000>;
+
+		pcie@0 {
+			ranges = <0x02000000 0x0 0x80000000
+				  0x02000000 0x0 0x80000000
+				  0x0 0x40000000
+
+				  0x01000000 0x0 0x00000000
+				  0x01000000 0x0 0x00000000
+				  0x0 0x00400000>;
+		};
+	};
+
+	pci1: pcie@fef09000 {
+		status = "disabled";
+	};
+};
+
+/include/ "mpc8641si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/gef_sbc310.dts b/arch/powerpc/boot/dts/fsl/gef_sbc310.dts
new file mode 100644
index 0000000000..47ae85c346
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/gef_sbc310.dts
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GE SBC310 Device Tree Source
+ *
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * Based on: SBS CM6 Device Tree Source
+ * Copyright 2007 SBS Technologies GmbH & Co. KG
+ * And: mpc8641_hpcn.dts (MPC8641 HPCN Device Tree Source)
+ * Copyright 2006 Freescale Semiconductor Inc.
+ */
+
+/*
+ * Compiled with dtc -I dts -O dtb -o gef_sbc310.dtb gef_sbc310.dts
+ */
+
+/include/ "mpc8641si-pre.dtsi"
+
+/ {
+	model = "GEF_SBC310";
+	compatible = "gef,sbc310";
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x40000000>;	// set by uboot
+	};
+
+	lbc: localbus@fef05000 {
+		reg = <0xfef05000 0x1000>;
+
+		ranges = <0 0 0xff000000 0x01000000	// 16MB Boot flash
+			  1 0 0xe0000000 0x08000000	// Paged Flash 0
+			  2 0 0xe8000000 0x08000000	// Paged Flash 1
+			  3 0 0xfc100000 0x00020000	// NVRAM
+			  4 0 0xfc000000 0x00010000>;	// FPGA
+
+		/* flash@0,0 is a mirror of part of the memory in flash@1,0
+		flash@0,0 {
+			compatible = "gef,sbc310-firmware-mirror", "cfi-flash";
+			reg = <0x0 0x0 0x01000000>;
+			bank-width = <2>;
+			device-width = <2>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			partition@0 {
+				label = "firmware";
+				reg = <0x0 0x01000000>;
+				read-only;
+			};
+		};
+		*/
+
+		flash@1,0 {
+			compatible = "gef,sbc310-paged-flash", "cfi-flash";
+			reg = <0x1 0x0 0x8000000>;
+			bank-width = <2>;
+			device-width = <2>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			partition@0 {
+				label = "user";
+				reg = <0x0 0x7800000>;
+			};
+			partition@7800000 {
+				label = "firmware";
+				reg = <0x7800000 0x800000>;
+				read-only;
+			};
+		};
+
+		nvram@3,0 {
+			device_type = "nvram";
+			compatible = "simtek,stk14ca8";
+			reg = <0x3 0x0 0x20000>;
+		};
+
+		fpga@4,0 {
+			compatible = "gef,fpga-regs";
+			reg = <0x4 0x0 0x40>;
+		};
+
+		wdt@4,2000 {
+			compatible = "gef,sbc310-fpga-wdt", "gef,fpga-wdt-1.00",
+				"gef,fpga-wdt";
+			reg = <0x4 0x2000 0x8>;
+			interrupts = <0x1a 0x4>;
+			interrupt-parent = <&gef_pic>;
+		};
+/*
+		wdt@4,2010 {
+			compatible = "gef,sbc310-fpga-wdt", "gef,fpga-wdt-1.00",
+				"gef,fpga-wdt";
+			reg = <0x4 0x2010 0x8>;
+			interrupts = <0x1b 0x4>;
+			interrupt-parent = <&gef_pic>;
+		};
+*/
+		gef_pic: pic@4,4000 {
+			#interrupt-cells = <1>;
+			interrupt-controller;
+			compatible = "gef,sbc310-fpga-pic", "gef,fpga-pic";
+			reg = <0x4 0x4000 0x20>;
+			interrupts = <0x8 0x9 0 0>;
+
+		};
+		gef_gpio: gpio@4,8000 {
+			#gpio-cells = <2>;
+			compatible = "gef,sbc310-gpio";
+			reg = <0x4 0x8000 0x24>;
+			gpio-controller;
+		};
+	};
+
+	soc: soc@fef00000 {
+		ranges = <0x0 0xfef00000 0x00100000>;
+
+		i2c@3000 {
+			rtc@51 {
+				compatible = "epson,rx8581";
+				reg = <0x00000051>;
+			};
+		};
+
+		i2c@3100 {
+			hwmon@48 {
+				compatible = "national,lm92";
+				reg = <0x48>;
+			};
+
+			hwmon@4c {
+				compatible = "adi,adt7461";
+				reg = <0x4c>;
+			};
+
+			eti@6b {
+				compatible = "dallas,ds1682";
+				reg = <0x6b>;
+			};
+		};
+
+		enet0: ethernet@24000 {
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy0>;
+			phy-connection-type = "gmii";
+		};
+
+		mdio@24520 {
+			phy0: ethernet-phy@0 {
+				interrupt-parent = <&gef_pic>;
+				interrupts = <0x9 0x4>;
+				reg = <1>;
+			};
+			phy2: ethernet-phy@2 {
+				interrupt-parent = <&gef_pic>;
+				interrupts = <0x8 0x4>;
+				reg = <3>;
+			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet1: ethernet@26000 {
+			tbi-handle = <&tbi2>;
+			phy-handle = <&phy2>;
+			phy-connection-type = "gmii";
+		};
+
+		mdio@26520 {
+			tbi2: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet2: ethernet@25000 {
+			status = "disabled";
+		};
+
+		mdio@25520 {
+			status = "disabled";
+		};
+
+		enet3: ethernet@27000 {
+			status = "disabled";
+		};
+
+		mdio@27520 {
+			status = "disabled";
+		};
+	};
+
+	pci0: pcie@fef08000 {
+		reg = <0xfef08000 0x1000>;
+		ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x40000000
+			  0x01000000 0x0 0x00000000 0xfe000000 0x0 0x00400000>;
+		interrupt-map-mask = <0xff00 0x0 0x0 0x7>;
+		interrupt-map = <
+			0x0000 0x0 0x0 0x1 &mpic 0x0 0x2
+			0x0000 0x0 0x0 0x2 &mpic 0x1 0x2
+			0x0000 0x0 0x0 0x3 &mpic 0x2 0x2
+			0x0000 0x0 0x0 0x4 &mpic 0x3 0x2
+		>;
+
+		pcie@0 {
+			ranges = <0x02000000 0x0 0x80000000
+				  0x02000000 0x0 0x80000000
+				  0x0 0x40000000
+
+				  0x01000000 0x0 0x00000000
+				  0x01000000 0x0 0x00000000
+				  0x0 0x00400000>;
+		};
+	};
+
+	pci1: pcie@fef09000 {
+		reg = <0xfef09000 0x1000>;
+		ranges = <0x02000000 0x0 0xc0000000 0xc0000000 0x0 0x20000000
+			  0x01000000 0x0 0x00000000 0xfe400000 0x0 0x00400000>;
+
+		pcie@0 {
+			ranges = <0x02000000 0x0 0xc0000000
+				  0x02000000 0x0 0xc0000000
+				  0x0 0x20000000
+
+				  0x01000000 0x0 0x00000000
+				  0x01000000 0x0 0x00000000
+				  0x0 0x00400000>;
+		};
+	};
+};
+
+/include/ "mpc8641si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/gef_sbc610.dts b/arch/powerpc/boot/dts/fsl/gef_sbc610.dts
new file mode 100644
index 0000000000..5322be44b6
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/gef_sbc610.dts
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GE SBC610 Device Tree Source
+ *
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * Based on: SBS CM6 Device Tree Source
+ * Copyright 2007 SBS Technologies GmbH & Co. KG
+ * And: mpc8641_hpcn.dts (MPC8641 HPCN Device Tree Source)
+ * Copyright 2006 Freescale Semiconductor Inc.
+ */
+
+/*
+ * Compiled with dtc -I dts -O dtb -o gef_sbc610.dtb gef_sbc610.dts
+ */
+
+/include/ "mpc8641si-pre.dtsi"
+
+/ {
+	model = "GEF_SBC610";
+	compatible = "gef,sbc610";
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x40000000>;	// set by uboot
+	};
+
+	lbc: localbus@fef05000 {
+		reg = <0xfef05000 0x1000>;
+
+		ranges = <0 0 0xff000000 0x01000000	// 16MB Boot flash
+			  1 0 0xe8000000 0x08000000	// Paged Flash 0
+			  2 0 0xe0000000 0x08000000	// Paged Flash 1
+			  3 0 0xfc100000 0x00020000	// NVRAM
+			  4 0 0xfc000000 0x00008000	// FPGA
+			  5 0 0xfc008000 0x00008000	// AFIX FPGA
+			  6 0 0xfd000000 0x00800000	// IO FPGA (8-bit)
+			  7 0 0xfd800000 0x00800000>;	// IO FPGA (32-bit)
+
+		/* flash@0,0 is a mirror of part of the memory in flash@1,0
+		flash@0,0 {
+			compatible = "gef,sbc610-firmware-mirror", "cfi-flash";
+			reg = <0x0 0x0 0x1000000>;
+			bank-width = <4>;
+			device-width = <2>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			partition@0 {
+				label = "firmware";
+				reg = <0x0 0x1000000>;
+				read-only;
+			};
+		};
+		*/
+
+		flash@1,0 {
+			compatible = "gef,sbc610-paged-flash", "cfi-flash";
+			reg = <0x1 0x0 0x8000000>;
+			bank-width = <4>;
+			device-width = <2>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			partition@0 {
+				label = "user";
+				reg = <0x0 0x7800000>;
+			};
+			partition@7800000 {
+				label = "firmware";
+				reg = <0x7800000 0x800000>;
+				read-only;
+			};
+		};
+
+		nvram@3,0 {
+			device_type = "nvram";
+			compatible = "simtek,stk14ca8";
+			reg = <0x3 0x0 0x20000>;
+		};
+
+		fpga@4,0 {
+			compatible = "gef,fpga-regs";
+			reg = <0x4 0x0 0x40>;
+		};
+
+		wdt@4,2000 {
+			compatible = "gef,fpga-wdt";
+			reg = <0x4 0x2000 0x8>;
+			interrupts = <0x1a 0x4>;
+			interrupt-parent = <&gef_pic>;
+		};
+		/* Second watchdog available, driver currently supports one.
+		wdt@4,2010 {
+			compatible = "gef,fpga-wdt";
+			reg = <0x4 0x2010 0x8>;
+			interrupts = <0x1b 0x4>;
+			interrupt-parent = <&gef_pic>;
+		};
+		*/
+		gef_pic: pic@4,4000 {
+			#interrupt-cells = <1>;
+			interrupt-controller;
+			compatible = "gef,fpga-pic";
+			reg = <0x4 0x4000 0x20>;
+			interrupts = <0x8 0x9 0 0>;
+
+		};
+		gef_gpio: gpio@7,14000 {
+			#gpio-cells = <2>;
+			compatible = "gef,sbc610-gpio";
+			reg = <0x7 0x14000 0x24>;
+			gpio-controller;
+		};
+	};
+
+	soc: soc@fef00000 {
+		ranges = <0x0 0xfef00000 0x00100000>;
+
+		i2c@3000 {
+			hwmon@48 {
+				compatible = "national,lm92";
+				reg = <0x48>;
+			};
+
+			hwmon@4c {
+				compatible = "adi,adt7461";
+				reg = <0x4c>;
+			};
+
+			rtc@51 {
+				compatible = "epson,rx8581";
+				reg = <0x00000051>;
+			};
+
+			eti@6b {
+				compatible = "dallas,ds1682";
+				reg = <0x6b>;
+			};
+		};
+
+		enet0: ethernet@24000 {
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy0>;
+			phy-connection-type = "gmii";
+		};
+
+		mdio@24520 {
+			phy0: ethernet-phy@0 {
+				interrupt-parent = <&gef_pic>;
+				interrupts = <0x9 0x4>;
+				reg = <1>;
+			};
+			phy2: ethernet-phy@2 {
+				interrupt-parent = <&gef_pic>;
+				interrupts = <0x8 0x4>;
+				reg = <3>;
+			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet1: ethernet@26000 {
+			tbi-handle = <&tbi2>;
+			phy-handle = <&phy2>;
+			phy-connection-type = "gmii";
+		};
+
+		mdio@26520 {
+			tbi2: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet2: ethernet@25000 {
+			status = "disabled";
+		};
+
+		mdio@25520 {
+			status = "disabled";
+		};
+
+		enet3: ethernet@27000 {
+			status = "disabled";
+		};
+
+		mdio@27520 {
+			status = "disabled";
+		};
+	};
+
+	pci0: pcie@fef08000 {
+		reg = <0xfef08000 0x1000>;
+		ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x40000000
+			  0x01000000 0x0 0x00000000 0xfe000000 0x0 0x00400000>;
+
+		pcie@0 {
+			ranges = <0x02000000 0x0 0x80000000
+				  0x02000000 0x0 0x80000000
+				  0x0 0x40000000
+
+				  0x01000000 0x0 0x00000000
+				  0x01000000 0x0 0x00000000
+				  0x0 0x00400000>;
+		};
+	};
+
+	pci1: pcie@fef09000 {
+		status = "disabled";
+	};
+};
+
+/include/ "mpc8641si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi b/arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi
new file mode 100644
index 0000000000..9cffccf4e0
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/interlaken-lac-portals.dtsi
@@ -0,0 +1,156 @@
+/* T4240 Interlaken LAC Portal device tree stub with 24 portals.
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#address-cells = <0x1>;
+#size-cells = <0x1>;
+compatible = "fsl,interlaken-lac-portals";
+
+lportal0: lac-portal@0 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0x0 0x1000>;
+};
+
+lportal1: lac-portal@1000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0x1000 0x1000>;
+};
+
+lportal2: lac-portal@2000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0x2000 0x1000>;
+};
+
+lportal3: lac-portal@3000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0x3000 0x1000>;
+};
+
+lportal4: lac-portal@4000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0x4000 0x1000>;
+};
+
+lportal5: lac-portal@5000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0x5000 0x1000>;
+};
+
+lportal6: lac-portal@6000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0x6000 0x1000>;
+};
+
+lportal7: lac-portal@7000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0x7000 0x1000>;
+};
+
+lportal8: lac-portal@8000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0x8000 0x1000>;
+};
+
+lportal9: lac-portal@9000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0x9000 0x1000>;
+};
+
+lportal10: lac-portal@A000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0xA000 0x1000>;
+};
+
+lportal11: lac-portal@B000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0xB000 0x1000>;
+};
+
+lportal12: lac-portal@C000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0xC000 0x1000>;
+};
+
+lportal13: lac-portal@D000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0xD000 0x1000>;
+};
+
+lportal14: lac-portal@E000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0xE000 0x1000>;
+};
+
+lportal15: lac-portal@F000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0xF000 0x1000>;
+};
+
+lportal16: lac-portal@10000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0x10000 0x1000>;
+};
+
+lportal17: lac-portal@11000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0x11000 0x1000>;
+};
+
+lportal18: lac-portal@1200 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0x12000 0x1000>;
+};
+
+lportal19: lac-portal@13000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0x13000 0x1000>;
+};
+
+lportal20: lac-portal@14000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0x14000 0x1000>;
+};
+
+lportal21: lac-portal@15000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0x15000 0x1000>;
+};
+
+lportal22: lac-portal@16000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0x16000 0x1000>;
+};
+
+lportal23: lac-portal@17000 {
+	compatible = "fsl,interlaken-lac-portal-v1.0";
+	reg = <0x17000 0x1000>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi b/arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi
new file mode 100644
index 0000000000..e8208720ac
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/interlaken-lac.dtsi
@@ -0,0 +1,45 @@
+/*
+ * T4 Interlaken Look-aside Controller (LAC) device tree stub
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+lac: lac@229000 {
+	compatible = "fsl,interlaken-lac";
+	reg = <0x229000 0x1000>;
+	interrupts = <16 2 1 18>;
+};
+
+lac-hv@228000 {
+	compatible = "fsl,interlaken-lac-hv";
+	reg = <0x228000 0x1000>;
+	fsl,non-hv-node = <&lac>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/kmcent2.dts b/arch/powerpc/boot/dts/fsl/kmcent2.dts
new file mode 100644
index 0000000000..8e7f0828af
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/kmcent2.dts
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Keymile kmcent2 Device Tree Source, based on T1040RDB DTS
+ *
+ * (C) Copyright 2016
+ * Valentin Longchamp, Keymile AG, valentin.longchamp@keymile.com
+ *
+ * Copyright 2014 - 2015 Freescale Semiconductor Inc.
+ */
+
+/include/ "t104xsi-pre.dtsi"
+
+/ {
+	model = "keymile,kmcent2";
+	compatible = "keymile,kmcent2";
+
+	aliases {
+		front_phy = &front_phy;
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x04000000
+			  1 0 0xf 0xfa000000 0x00010000
+			  2 0 0xf 0xfb000000 0x00010000
+			  4 0 0xf 0xc0000000 0x08000000
+			  6 0 0xf 0xd0000000 0x08000000
+			  7 0 0xf 0xd8000000 0x08000000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x04000000>;
+			bank-width = <2>;
+			device-width = <2>;
+		};
+
+		nand@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x1 0x0 0x10000>;
+		};
+
+		board-control@2,0 {
+			compatible = "keymile,qriox";
+			reg = <0x2 0x0 0x80>;
+		};
+
+		chassis-mgmt@6,0 {
+			compatible = "keymile,bfticu";
+			reg = <6 0 0x100>;
+			interrupt-controller;
+			interrupt-parent = <&mpic>;
+			interrupts = <11 1 0 0>;
+			#interrupt-cells = <1>;
+		};
+
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
+	qportals: qman-portals@ff6000000 {
+		ranges = <0x0 0xf 0xf6000000 0x2000000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+
+		spi@110000 {
+			network-clock@1 {
+				compatible = "zarlink,zl30364";
+				reg = <1>;
+				spi-max-frequency = <1000000>;
+			};
+		};
+
+		sdhc@114000 {
+			status = "disabled";
+		};
+
+		i2c@118000 {
+			clock-frequency = <100000>;
+
+			mux@70 {
+				compatible = "nxp,pca9547";
+				reg = <0x70>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				i2c-mux-idle-disconnect;
+
+				i2c@0 {
+					reg = <0>;
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					eeprom@54 {
+						compatible = "atmel,24c02";
+						reg = <0x54>;
+						pagesize = <2>;
+						read-only;
+						label = "ddr3-spd";
+					};
+				};
+
+				i2c@7 {
+					reg = <7>;
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					temp-sensor@48 {
+						compatible = "national,lm75";
+						reg = <0x48>;
+						label = "SENSOR_0";
+					};
+					temp-sensor@4a {
+						compatible = "national,lm75";
+						reg = <0x4a>;
+						label = "SENSOR_2";
+					};
+					temp-sensor@4b {
+						compatible = "national,lm75";
+						reg = <0x4b>;
+						label = "SENSOR_3";
+					};
+				};
+			};
+		};
+
+		i2c@118100 {
+			clock-frequency = <100000>;
+
+			eeprom@50 {
+				compatible = "atmel,24c08";
+				reg = <0x50>;
+				pagesize = <16>;
+			};
+
+			eeprom@54 {
+				compatible = "atmel,24c08";
+				reg = <0x54>;
+				pagesize = <16>;
+			};
+		};
+
+		i2c@119000 {
+			status = "disabled";
+		};
+
+		i2c@119100 {
+			status = "disabled";
+		};
+
+		serial2: serial@11d500 {
+			status = "disabled";
+		};
+
+		serial3: serial@11d600 {
+			status = "disabled";
+		};
+
+		usb0: usb@210000 {
+			status = "disabled";
+		};
+		usb1: usb@211000 {
+			status = "disabled";
+		};
+
+		display@180000 {
+			status = "disabled";
+		};
+
+		sata@220000 {
+			status = "disabled";
+		};
+		sata@221000 {
+			status = "disabled";
+		};
+
+		fman@400000 {
+			ethernet@e0000 {
+				phy-mode = "sgmii";
+				fixed-link {
+					speed = <1000>;
+					full-duplex;
+				};
+			};
+
+			ethernet@e2000 {
+				phy-mode = "sgmii";
+				fixed-link {
+					speed = <1000>;
+					full-duplex;
+				};
+			};
+
+			ethernet@e4000 {
+				status = "disabled";
+			};
+
+			ethernet@e6000 {
+				status = "disabled";
+			};
+
+			ethernet@e8000 {
+				phy-handle = <&front_phy>;
+				phy-mode = "rgmii-id";
+			};
+
+			mdio0: mdio@fc000 {
+				front_phy: ethernet-phy@11 {
+					reg = <0x11>;
+				};
+			};
+		};
+	};
+
+
+	pci0: pcie@ffe240000 {
+		reg = <0xf 0xfe240000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe250000 {
+		status = "disabled";
+		reg = <0xf 0xfe250000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000
+			  0x01000000 0 0 0xf 0xf8010000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe260000 {
+		status = "disabled";
+		reg = <0xf 0xfe260000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci3: pcie@ffe270000 {
+		status = "disabled";
+		reg = <0xf 0xfe270000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	qe: qe@ffe140000 {
+		ranges = <0x0 0xf 0xfe140000 0x40000>;
+		reg = <0xf 0xfe140000 0 0x480>;
+		brg-frequency = <0>;
+		bus-frequency = <0>;
+
+		si1: si@700 {
+			compatible = "fsl,t1040-qe-si";
+			reg = <0x700 0x80>;
+		};
+
+		siram1: siram@1000 {
+			compatible = "fsl,t1040-qe-siram";
+			reg = <0x1000 0x800>;
+		};
+
+		ucc_hdlc: ucc@2000 {
+			device_type = "hdlc";
+			compatible = "fsl,ucc-hdlc";
+			rx-clock-name = "clk9";
+			tx-clock-name = "clk9";
+			fsl,hdlc-bus;
+		};
+	};
+};
+
+#include "t1040si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/kmcoge4.dts b/arch/powerpc/boot/dts/fsl/kmcoge4.dts
new file mode 100644
index 0000000000..1c5f942311
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/kmcoge4.dts
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Keymile kmcoge4 Device Tree Source, based on the P2041RDB DTS
+ *
+ * (C) Copyright 2014
+ * Valentin Longchamp, Keymile AG, valentin.longchamp@keymile.com
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ */
+
+/include/ "p2041si-pre.dtsi"
+
+/ {
+	model = "keymile,kmcoge4";
+	compatible = "keymile,kmcoge4", "keymile,kmp204x";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	memory {
+		device_type = "memory";
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
+	qportals: qman-portals@ff4200000 {
+		ranges = <0x0 0xf 0xf4200000 0x200000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "spansion,s25fl256s1", "jedec,spi-nor";
+				reg = <0>;
+				spi-max-frequency = <20000000>; /* input clock */
+			};
+
+			network_clock@1 {
+				compatible = "zarlink,zl30343";
+				reg = <1>;
+				spi-max-frequency = <8000000>;
+			};
+
+			flash@2 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "micron,m25p32", "jedec,spi-nor";
+				reg = <2>;
+				spi-max-frequency = <15000000>;
+			};
+		};
+
+		sdhc@114000 {
+			status = "disabled";
+		};
+
+		i2c@119000 {
+			status = "disabled";
+		};
+
+		i2c@119100 {
+			status = "disabled";
+		};
+
+		usb0: usb@210000 {
+			status = "disabled";
+		};
+
+		usb1: usb@211000 {
+			status = "disabled";
+		};
+
+		sata@220000 {
+			status = "disabled";
+		};
+
+		sata@221000 {
+			status = "disabled";
+		};
+
+		fman0: fman@400000 {
+			enet0: ethernet@e0000 {
+				phy-connection-type = "sgmii";
+				fixed-link {
+					speed = <1000>;
+					full-duplex;
+				};
+			};
+			mdio0: mdio@e1120 {
+				front_phy: ethernet-phy@11 {
+					reg = <0x11>;
+				};
+			};
+
+			enet1: ethernet@e2000 {
+				phy-connection-type = "sgmii";
+				fixed-link {
+					speed = <1000>;
+					full-duplex;
+				};
+			};
+			enet2: ethernet@e4000 {
+				status = "disabled";
+			};
+
+			enet3: ethernet@e6000 {
+				status = "disabled";
+			};
+			enet4: ethernet@e8000 {
+				phy-handle = <&front_phy>;
+				phy-connection-type = "rgmii";
+			};
+			enet5: ethernet@f0000 {
+				status = "disabled";
+			};
+		};
+	};
+
+	rio: rapidio@ffe0c0000 {
+		status = "disabled";
+	};
+
+	lbc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x1000>;
+		ranges = <0 0 0xf 0xffa00000 0x00040000		/* LB 0 */
+			  1 0 0xf 0xfb000000 0x00010000		/* LB 1 */
+			  2 0 0xf 0xd0000000 0x10000000		/* LB 2 */
+			  3 0 0xf 0xe0000000 0x10000000>;	/* LB 3 */
+
+		nand@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,elbc-fcm-nand";
+			reg = <0 0 0x40000>;
+		};
+
+		board-control@1,0 {
+			compatible = "keymile,qriox";
+			reg = <1 0 0x80>;
+		};
+
+		chassis-mgmt@3,0 {
+			compatible = "keymile,bfticu";
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			reg = <3 0 0x100>;
+			interrupt-parent = <&mpic>;
+			interrupts = <6 1 0 0>;
+		};
+	};
+
+	pci0: pcie@ffe200000 {
+		reg = <0xf 0xfe200000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe201000 {
+		status = "disabled";
+	};
+
+	pci2: pcie@ffe202000 {
+		reg = <0xf 0xfe202000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8010000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+};
+
+/include/ "p2041si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/mpc8536ds.dts b/arch/powerpc/boot/dts/fsl/mpc8536ds.dts
new file mode 100644
index 0000000000..ab6997a0fd
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8536ds.dts
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8536 DS Device Tree Source
+ *
+ * Copyright 2008, 2011 Freescale Semiconductor, Inc.
+ */
+
+/include/ "mpc8536si-pre.dtsi"
+
+/ {
+	model = "fsl,mpc8536ds";
+	compatible = "fsl,mpc8536ds";
+
+	cpus {
+		#cpus = <1>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8536@0 {
+			device_type = "cpu";
+			reg = <0>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0 0 0 0>;	// Filled by U-Boot
+	};
+
+	lbc: localbus@ffe05000 {
+		reg = <0 0xffe05000 0 0x1000>;
+
+		ranges = <0x0 0x0 0x0 0xe8000000 0x08000000
+			  0x2 0x0 0x0 0xffa00000 0x00040000
+			  0x3 0x0 0x0 0xffdf0000 0x00008000>;
+	};
+
+	board_soc: soc: soc@ffe00000 {
+		ranges = <0x0 0 0xffe00000 0x100000>;
+	};
+
+	pci0: pci@ffe08000 {
+		reg = <0 0xffe08000 0 0x1000>;
+		ranges = <0x02000000 0 0x80000000 0 0x80000000 0 0x10000000
+			  0x01000000 0 0x00000000 0 0xffc00000 0 0x00010000>;
+		clock-frequency = <66666666>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+
+			/* IDSEL 0x11 J17 Slot 1 */
+			0x8800 0 0 1 &mpic 1 1 0 0
+			0x8800 0 0 2 &mpic 2 1 0 0
+			0x8800 0 0 3 &mpic 3 1 0 0
+			0x8800 0 0 4 &mpic 4 1 0 0>;
+	};
+
+	pci1: pcie@ffe09000 {
+		reg = <0 0xffe09000 0 0x1000>;
+		ranges = <0x02000000 0 0x98000000 0 0x98000000 0 0x08000000
+			  0x01000000 0 0x00000000 0 0xffc20000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0x98000000
+				  0x02000000 0 0x98000000
+				  0 0x08000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe0a000 {
+		reg = <0 0xffe0a000 0 0x1000>;
+		ranges = <0x02000000 0 0x90000000 0 0x90000000 0 0x08000000
+			  0x01000000 0 0x00000000 0 0xffc10000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0x90000000
+				  0x02000000 0 0x90000000
+				  0 0x08000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci3: pcie@ffe0b000 {
+		reg = <0 0xffe0b000 0 0x1000>;
+		ranges = <0x02000000 0 0xa0000000 0 0xa0000000 0 0x20000000
+			  0x01000000 0 0x00000000 0 0xffc30000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xa0000000
+				  0x02000000 0 0xa0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00100000>;
+		};
+	};
+};
+
+/include/ "mpc8536si-post.dtsi"
+/include/ "mpc8536ds.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/mpc8536ds.dtsi b/arch/powerpc/boot/dts/fsl/mpc8536ds.dtsi
new file mode 100644
index 0000000000..a925fe49a7
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8536ds.dtsi
@@ -0,0 +1,244 @@
+/*
+ * MPC8536DS Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+	nor@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x8000000>;
+		bank-width = <2>;
+		device-width = <1>;
+
+		partition@0 {
+			reg = <0x0 0x03000000>;
+			label = "ramdisk-nor";
+		};
+
+		partition@3000000 {
+			reg = <0x03000000 0x00e00000>;
+			label = "diagnostic-nor";
+			read-only;
+		};
+
+		partition@3e00000 {
+			reg = <0x03e00000 0x00200000>;
+			label = "dink-nor";
+			read-only;
+		};
+
+		partition@4000000 {
+			reg = <0x04000000 0x00400000>;
+			label = "kernel-nor";
+		};
+
+		partition@4400000 {
+			reg = <0x04400000 0x03b00000>;
+			label = "fs-nor";
+		};
+
+		partition@7f00000 {
+			reg = <0x07f00000 0x00080000>;
+			label = "dtb-nor";
+		};
+
+		partition@7f80000 {
+			reg = <0x07f80000 0x00080000>;
+			label = "u-boot-nor";
+			read-only;
+		};
+	};
+
+	nand@2,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8536-fcm-nand",
+			     "fsl,elbc-fcm-nand";
+		reg = <0x2 0x0 0x40000>;
+
+		partition@0 {
+			reg = <0x0 0x02000000>;
+			label = "u-boot-nand";
+			read-only;
+		};
+
+		partition@2000000 {
+			reg = <0x02000000 0x10000000>;
+			label = "fs-nand";
+		};
+
+		partition@12000000 {
+			reg = <0x12000000 0x08000000>;
+			label = "ramdisk-nand";
+		};
+
+		partition@1a000000 {
+			reg = <0x1a000000 0x04000000>;
+			label = "kernel-nand";
+		};
+
+		partition@1e000000 {
+			reg = <0x1e000000 0x01000000>;
+			label = "dtb-nand";
+		};
+
+		partition@1f000000 {
+			reg = <0x1f000000 0x21000000>;
+			label = "empty-nand";
+		};
+	};
+
+	board-control@3,0 {
+		compatible = "fsl,mpc8536ds-fpga-pixis";
+		reg = <0x3 0x0 0x8000>;
+	};
+};
+
+&board_soc {
+	i2c@3100 {
+		rtc@68 {
+			compatible = "dallas,ds3232";
+			reg = <0x68>;
+			interrupts = <0 0x1 0 0>;
+		};
+		adt7461@4c {
+			compatible = "adi,adt7461";
+			reg = <0x4c>;
+		};
+	};
+
+	spi@7000 {
+		flash@0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "spansion,s25sl12801", "jedec,spi-nor";
+			reg = <0>;
+			spi-max-frequency = <40000000>;
+			partition@u-boot {
+				label = "u-boot";
+				reg = <0x00000000 0x00100000>;
+				read-only;
+			};
+			partition@kernel {
+				label = "kernel";
+				reg = <0x00100000 0x00500000>;
+				read-only;
+			};
+			partition@dtb {
+				label = "dtb";
+				reg = <0x00600000 0x00100000>;
+				read-only;
+			};
+			partition@fs {
+				label = "file system";
+				reg = <0x00700000 0x00900000>;
+			};
+		};
+		flash@1 {
+			compatible = "spansion,s25sl12801", "jedec,spi-nor";
+			reg = <1>;
+			spi-max-frequency = <40000000>;
+		};
+		flash@2 {
+			compatible = "spansion,s25sl12801", "jedec,spi-nor";
+			reg = <2>;
+			spi-max-frequency = <40000000>;
+		};
+		flash@3 {
+			compatible = "spansion,s25sl12801", "jedec,spi-nor";
+			reg = <3>;
+			spi-max-frequency = <40000000>;
+		};
+	};
+
+	usb@22000 {
+		phy_type = "ulpi";
+	};
+
+	usb@23000 {
+		phy_type = "ulpi";
+	};
+
+	enet0: ethernet@24000 {
+		tbi-handle = <&tbi0>;
+		phy-handle = <&phy1>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	mdio@24520 {
+		phy0: ethernet-phy@0 {
+			interrupts = <10 0x1 0 0>;
+			reg = <0>;
+		};
+		phy1: ethernet-phy@1 {
+			interrupts = <10 0x1 0 0>;
+			reg = <1>;
+		};
+		sgmii_phy0: sgmii-phy@0 {
+			interrupts = <6 1 0 0>;
+			reg = <0x1d>;
+		};
+		sgmii_phy1: sgmii-phy@1 {
+			interrupts = <6 1 0 0>;
+			reg = <0x1c>;
+		};
+		tbi0: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	enet2: ethernet@26000 {
+		tbi-handle = <&tbi1>;
+		phy-handle = <&phy0>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	mdio@26520 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,gianfar-tbi";
+		reg = <0x26520 0x20>;
+
+		tbi1: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	usb@2b000 {
+		dr_mode = "peripheral";
+		phy_type = "ulpi";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8536ds_36b.dts b/arch/powerpc/boot/dts/fsl/mpc8536ds_36b.dts
new file mode 100644
index 0000000000..1b799741cd
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8536ds_36b.dts
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8536DS Device Tree Source (36-bit address map)
+ *
+ * Copyright 2008-2009, 2011 Freescale Semiconductor, Inc.
+ */
+
+/include/ "mpc8536si-pre.dtsi"
+
+/ {
+	model = "fsl,mpc8536ds";
+	compatible = "fsl,mpc8536ds";
+
+	cpus {
+		#cpus = <1>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8536@0 {
+			device_type = "cpu";
+			reg = <0>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0 0 0 0>;	// Filled by U-Boot
+	};
+
+	lbc: localbus@fffe05000 {
+		reg = <0xf 0xffe05000 0 0x1000>;
+
+		ranges = <0x0 0x0 0xf 0xe8000000 0x08000000
+			  0x2 0x0 0xf 0xffa00000 0x00040000
+			  0x3 0x0 0xf 0xffdf0000 0x00008000>;
+	};
+
+	board_soc: soc: soc@fffe00000 {
+		ranges = <0x0 0xf 0xffe00000 0x100000>;
+	};
+
+	pci0: pci@fffe08000 {
+		reg = <0xf 0xffe08000 0 0x1000>;
+		ranges = <0x02000000 0 0xf0000000 0xc 0x00000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xffc00000 0 0x00010000>;
+		clock-frequency = <66666666>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+
+			/* IDSEL 0x11 J17 Slot 1 */
+			0x8800 0 0 1 &mpic 1 1 0 0
+			0x8800 0 0 2 &mpic 2 1 0 0
+			0x8800 0 0 3 &mpic 3 1 0 0
+			0x8800 0 0 4 &mpic 4 1 0 0>;
+	};
+
+	pci1: pcie@fffe09000 {
+		reg = <0xf 0xffe09000 0 0x1000>;
+		ranges = <0x02000000 0 0xf8000000 0xc 0x18000000 0 0x08000000
+			  0x01000000 0 0x00000000 0xf 0xffc20000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xf8000000
+				  0x02000000 0 0xf8000000
+				  0 0x08000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@fffe0a000 {
+		reg = <0xf 0xffe0a000 0 0x1000>;
+		ranges = <0x02000000 0 0xf8000000 0xc 0x10000000 0 0x08000000
+			  0x01000000 0 0x00000000 0xf 0xffc10000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xf8000000
+				  0x02000000 0 0xf8000000
+				  0 0x08000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci3: pcie@fffe0b000 {
+		reg = <0xf 0xffe0b000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xffc30000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00100000>;
+		};
+	};
+};
+
+/include/ "mpc8536si-post.dtsi"
+/include/ "mpc8536ds.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi
new file mode 100644
index 0000000000..41935709eb
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi
@@ -0,0 +1,252 @@
+/*
+ * MPC8536 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,mpc8536-elbc", "fsl,elbc", "simple-bus";
+	interrupts = <19 2 0 0>;
+};
+
+/* controller at 0x8000 */
+&pci0 {
+	compatible = "fsl,mpc8540-pci";
+	device_type = "pci";
+	interrupts = <24 0x2 0 0>;
+	bus-range = <0 0xff>;
+	#interrupt-cells = <1>;
+	#size-cells = <2>;
+	#address-cells = <3>;
+};
+
+/* controller at 0x9000 */
+&pci1 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <25 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <25 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0
+			>;
+	};
+};
+
+/* controller at 0xa000 */
+&pci2 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <26 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <26 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1 0x0 0x0
+			>;
+	};
+};
+
+/* controller at 0xb000 */
+&pci3 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <27 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <27 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x8 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x9 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0xa 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0xb 0x1 0x0 0x0
+			>;
+	};
+};
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "fsl,mpc8536-immr", "simple-bus";
+	bus-frequency = <0>;		// Filled out by uboot.
+
+	ecm-law@0 {
+		compatible = "fsl,ecm-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <12>;
+	};
+
+	ecm@1000 {
+		compatible = "fsl,mpc8536-ecm", "fsl,ecm";
+		reg = <0x1000 0x1000>;
+		interrupts = <17 2 0 0>;
+	};
+
+	memory-controller@2000 {
+		compatible = "fsl,mpc8536-memory-controller";
+		reg = <0x2000 0x1000>;
+		interrupts = <18 2 0 0>;
+	};
+
+/include/ "pq3-i2c-0.dtsi"
+/include/ "pq3-i2c-1.dtsi"
+/include/ "pq3-duart-0.dtsi"
+
+/include/ "pq3-espi-0.dtsi"
+	spi@7000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "pq3-gpio-0.dtsi"
+
+	/* mark compat w/8572 to get some erratum treatment */
+	gpio-controller@f000 {
+		compatible = "fsl,mpc8572-gpio";
+	};
+
+	sata@18000 {
+		compatible = "fsl,mpc8536-sata", "fsl,pq-sata";
+		reg = <0x18000 0x1000>;
+		cell-index = <1>;
+		interrupts = <74 0x2 0 0>;
+	};
+
+	sata@19000 {
+		compatible = "fsl,mpc8536-sata", "fsl,pq-sata";
+		reg = <0x19000 0x1000>;
+		cell-index = <2>;
+		interrupts = <41 0x2 0 0>;
+	};
+
+	L2: l2-cache-controller@20000 {
+		compatible = "fsl,mpc8536-l2-cache-controller";
+		reg = <0x20000 0x1000>;
+		cache-line-size = <32>;	// 32 bytes
+		cache-size = <0x80000>; // L2, 512K
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-dma-0.dtsi"
+/include/ "pq3-etsec1-0.dtsi"
+/include/ "pq3-etsec1-timer-0.dtsi"
+
+	usb@22000 {
+		compatible = "fsl-usb2-mph-v1.2", "fsl,mpc8536-usb2-mph", "fsl-usb2-mph";
+		reg = <0x22000 0x1000>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		interrupts = <28 0x2 0 0>;
+	};
+
+	usb@23000 {
+		compatible = "fsl-usb2-mph-v1.2", "fsl,mpc8536-usb2-mph", "fsl-usb2-mph";
+		reg = <0x23000 0x1000>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		interrupts = <46 0x2 0 0>;
+	};
+
+	ptp_clock@24e00 {
+		interrupts = <68 2 0 0 69 2 0 0 70 2 0 0 71 2 0 0>;
+	};
+
+/include/ "pq3-etsec1-2.dtsi"
+
+	ethernet@26000 {
+		cell-index = <1>;
+	};
+
+	usb@2b000 {
+		compatible = "fsl,mpc8536-usb2-dr", "fsl-usb2-dr";
+		reg = <0x2b000 0x1000>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		interrupts = <60 0x2 0 0>;
+	};
+
+/include/ "pq3-esdhc-0.dtsi"
+	sdhc@2e000 {
+		compatible = "fsl,mpc8536-esdhc", "fsl,esdhc";
+	};
+
+/include/ "pq3-sec3.0-0.dtsi"
+/include/ "pq3-mpic.dtsi"
+/include/ "pq3-mpic-timer-B.dtsi"
+
+	global-utilities@e0000 {
+		compatible = "fsl,mpc8536-guts";
+		reg = <0xe0000 0x1000>;
+		fsl,has-rstcr;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8536si-pre.dtsi b/arch/powerpc/boot/dts/fsl/mpc8536si-pre.dtsi
new file mode 100644
index 0000000000..152906f98a
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8536si-pre.dtsi
@@ -0,0 +1,66 @@
+/*
+ * MPC8536 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500v2_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,MPC8536";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial1;
+		ethernet0 = &enet0;
+		ethernet1 = &enet2;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+		pci3 = &pci3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8536@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8544ds.dts b/arch/powerpc/boot/dts/fsl/mpc8544ds.dts
new file mode 100644
index 0000000000..f4a8b71396
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8544ds.dts
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8544 DS Device Tree Source
+ *
+ * Copyright 2007, 2008 Freescale Semiconductor Inc.
+ */
+
+/include/ "mpc8544si-pre.dtsi"
+
+/ {
+	model = "MPC8544DS";
+	compatible = "MPC8544DS", "MPC85xxDS";
+
+	memory {
+		device_type = "memory";
+		reg = <0 0 0 0>;	// Filled by U-Boot
+	};
+
+	board_lbc: lbc: localbus@e0005000 {
+		reg = <0 0xe0005000 0 0x1000>;
+
+		ranges = <0x0 0x0 0x0 0xff800000 0x800000>;
+	};
+
+	board_soc: soc: soc8544@e0000000 {
+		ranges = <0x0 0x0 0xe0000000 0x100000>;
+	};
+
+	pci0: pci@e0008000 {
+		reg = <0 0xe0008000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xe1000000 0x0 0x10000>;
+		clock-frequency = <66666666>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+
+			/* IDSEL 0x11 J17 Slot 1 */
+			0x8800 0x0 0x0 0x1 &mpic 0x2 0x1 0 0
+			0x8800 0x0 0x0 0x2 &mpic 0x3 0x1 0 0
+			0x8800 0x0 0x0 0x3 &mpic 0x4 0x1 0 0
+			0x8800 0x0 0x0 0x4 &mpic 0x1 0x1 0 0
+
+			/* IDSEL 0x12 J16 Slot 2 */
+
+			0x9000 0x0 0x0 0x1 &mpic 0x3 0x1 0 0
+			0x9000 0x0 0x0 0x2 &mpic 0x4 0x1 0 0
+			0x9000 0x0 0x0 0x3 &mpic 0x2 0x1 0 0
+			0x9000 0x0 0x0 0x4 &mpic 0x1 0x1 0 0>;
+	};
+
+	pci1: pcie@e0009000 {
+		reg = <0x0 0xe0009000 0x0 0x1000>;
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xe1010000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+	};
+
+	pci2: pcie@e000a000 {
+		reg = <0x0 0xe000a000 0x0 0x1000>;
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x10000000
+			  0x1000000 0x0 0x00000000 0 0xe1020000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x10000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+	};
+
+	board_pci3: pci3: pcie@e000b000 {
+		reg = <0x0 0xe000b000 0x0 0x1000>;
+		ranges = <0x2000000 0x0 0xb0000000 0 0xb0000000 0x0 0x100000
+			  0x1000000 0x0 0x00000000 0 0xb0100000 0x0 0x100000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xb0000000
+				  0x2000000 0x0 0xb0000000
+				  0x0 0x100000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/*
+ * mpc8544ds.dtsi must be last to ensure board_pci3 overrides pci3 settings
+ * for interrupt-map & interrupt-map-mask
+ */
+
+/include/ "mpc8544si-post.dtsi"
+/include/ "mpc8544ds.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/mpc8544ds.dtsi b/arch/powerpc/boot/dts/fsl/mpc8544ds.dtsi
new file mode 100644
index 0000000000..47d986b041
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8544ds.dtsi
@@ -0,0 +1,207 @@
+/*
+ * MPC8544DS Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&board_lbc {
+	nor@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x800000>;
+		bank-width = <2>;
+		device-width = <1>;
+
+		partition@0 {
+			reg = <0x0 0x10000>;
+			label = "dtb-nor";
+		};
+
+		partition@20000 {
+			reg = <0x20000 0x30000>;
+			label = "diagnostic-nor";
+			read-only;
+		};
+
+		partition@200000 {
+			reg = <0x200000 0x200000>;
+			label = "dink-nor";
+			read-only;
+		};
+
+		partition@400000 {
+			reg = <0x400000 0x380000>;
+			label = "kernel-nor";
+		};
+
+		partition@780000 {
+			reg = <0x780000 0x80000>;
+			label = "u-boot-nor";
+			read-only;
+		};
+	};
+};
+
+&board_soc {
+	enet0: ethernet@24000 {
+		phy-handle = <&phy0>;
+		tbi-handle = <&tbi0>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	mdio@24520 {
+		phy0: ethernet-phy@0 {
+			interrupts = <10 1 0 0>;
+			reg = <0x0>;
+		};
+		phy1: ethernet-phy@1 {
+			interrupts = <10 1 0 0>;
+			reg = <0x1>;
+		};
+
+		sgmii_phy0: sgmii-phy@0 {
+			interrupts = <6 1 0 0>;
+			reg = <0x1c>;
+		};
+		sgmii_phy1: sgmii-phy@1 {
+			interrupts = <6 1 0 0>;
+			reg = <0x1d>;
+		};
+
+		tbi0: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	enet2: ethernet@26000 {
+		phy-handle = <&phy1>;
+		tbi-handle = <&tbi1>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	mdio@26520 {
+		tbi1: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+};
+
+&board_pci3 {
+	pcie@0 {
+		interrupt-map-mask = <0xff00 0x0 0x0 0x7>;
+		interrupt-map = <
+			// IDSEL 0x1c  USB
+			0xe000 0x0 0x0 0x1 &i8259 0xc 0x2
+			0xe100 0x0 0x0 0x2 &i8259 0x9 0x2
+			0xe200 0x0 0x0 0x3 &i8259 0xa 0x2
+			0xe300 0x0 0x0 0x4 &i8259 0xb 0x2
+
+			// IDSEL 0x1d  Audio
+			0xe800 0x0 0x0 0x1 &i8259 0x6 0x2
+
+			// IDSEL 0x1e Legacy
+			0xf000 0x0 0x0 0x1 &i8259 0x7 0x2
+			0xf100 0x0 0x0 0x1 &i8259 0x7 0x2
+
+			// IDSEL 0x1f IDE/SATA
+			0xf800 0x0 0x0 0x1 &i8259 0xe 0x2
+			0xf900 0x0 0x0 0x1 &i8259 0x5 0x2
+			>;
+
+
+		uli1575@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			ranges = <0x2000000 0x0 0xb0000000
+				  0x2000000 0x0 0xb0000000
+				  0x0 0x100000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+			isa@1e {
+				device_type = "isa";
+				#interrupt-cells = <2>;
+				#size-cells = <1>;
+				#address-cells = <2>;
+				reg = <0xf000 0x0 0x0 0x0 0x0>;
+				ranges = <0x1 0x0 0x1000000 0x0 0x0
+					  0x1000>;
+				interrupt-parent = <&i8259>;
+
+				i8259: interrupt-controller@20 {
+					reg = <0x1 0x20 0x2
+					       0x1 0xa0 0x2
+					       0x1 0x4d0 0x2>;
+					interrupt-controller;
+					device_type = "interrupt-controller";
+					#address-cells = <0>;
+					#interrupt-cells = <2>;
+					compatible = "chrp,iic";
+					interrupts = <9 2 0 0>;
+					interrupt-parent = <&mpic>;
+				};
+
+				i8042@60 {
+					#size-cells = <0>;
+					#address-cells = <1>;
+					reg = <0x1 0x60 0x1 0x1 0x64 0x1>;
+					interrupts = <1 3 12 3>;
+					interrupt-parent =
+						<&i8259>;
+
+					keyboard@0 {
+						reg = <0x0>;
+						compatible = "pnpPNP,303";
+					};
+
+					mouse@1 {
+						reg = <0x1>;
+						compatible = "pnpPNP,f03";
+					};
+				};
+
+				rtc@70 {
+					compatible = "pnpPNP,b00";
+					reg = <0x1 0x70 0x2>;
+				};
+
+				gpio@400 {
+					reg = <0x1 0x400 0x80>;
+				};
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi
new file mode 100644
index 0000000000..b68eb119fa
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi
@@ -0,0 +1,191 @@
+/*
+ * MPC8544 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,mpc8544-lbc", "fsl,pq3-localbus", "simple-bus";
+	interrupts = <19 2 0 0>;
+};
+
+/* controller at 0x8000 */
+&pci0 {
+	compatible = "fsl,mpc8540-pci";
+	device_type = "pci";
+	interrupts = <24 0x2 0 0>;
+	bus-range = <0 0xff>;
+	#interrupt-cells = <1>;
+	#size-cells = <2>;
+	#address-cells = <3>;
+};
+
+/* controller at 0x9000 */
+&pci1 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <25 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <25 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0
+			>;
+	};
+};
+
+/* controller at 0xa000 */
+&pci2 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <26 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <26 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1 0x0 0x0
+			>;
+	};
+};
+
+/* controller at 0xb000 */
+&pci3 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <27 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <27 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x8 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x9 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0xa 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0xb 0x1 0x0 0x0
+			>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "fsl,mpc8544-immr", "simple-bus";
+	bus-frequency = <0>;		// Filled out by uboot.
+
+	ecm-law@0 {
+		compatible = "fsl,ecm-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <10>;
+	};
+
+	ecm@1000 {
+		compatible = "fsl,mpc8544-ecm", "fsl,ecm";
+		reg = <0x1000 0x1000>;
+		interrupts = <17 2 0 0>;
+	};
+
+	memory-controller@2000 {
+		compatible = "fsl,mpc8544-memory-controller";
+		reg = <0x2000 0x1000>;
+		interrupts = <18 2 0 0>;
+	};
+
+/include/ "pq3-i2c-0.dtsi"
+/include/ "pq3-i2c-1.dtsi"
+/include/ "pq3-duart-0.dtsi"
+
+	L2: l2-cache-controller@20000 {
+		compatible = "fsl,mpc8544-l2-cache-controller";
+		reg = <0x20000 0x1000>;
+		cache-line-size = <32>;	// 32 bytes
+		cache-size = <0x40000>; // L2, 256K
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-dma-0.dtsi"
+/include/ "pq3-etsec1-0.dtsi"
+/include/ "pq3-etsec1-2.dtsi"
+
+	ethernet@26000 {
+		cell-index = <1>;
+	};
+
+/include/ "pq3-sec2.1-0.dtsi"
+/include/ "pq3-mpic.dtsi"
+
+	global-utilities@e0000 {
+		compatible = "fsl,mpc8544-guts";
+		reg = <0xe0000 0x1000>;
+		fsl,has-rstcr;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8544si-pre.dtsi b/arch/powerpc/boot/dts/fsl/mpc8544si-pre.dtsi
new file mode 100644
index 0000000000..5a69bafb65
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8544si-pre.dtsi
@@ -0,0 +1,66 @@
+/*
+ * MPC8544 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500v2_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,MPC8544";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial1;
+		ethernet0 = &enet0;
+		ethernet1 = &enet2;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+		pci3 = &pci3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8544@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi
new file mode 100644
index 0000000000..579d76cb8e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi
@@ -0,0 +1,159 @@
+/*
+ * MPC8548 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,mpc8548-lbc", "fsl,pq3-localbus", "simple-bus";
+	interrupts = <19 2 0 0>;
+};
+
+/* controller at 0x8000 */
+&pci0 {
+	compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci";
+	device_type = "pci";
+	interrupts = <24 0x2 0 0>;
+	bus-range = <0 0xff>;
+	#interrupt-cells = <1>;
+	#size-cells = <2>;
+	#address-cells = <3>;
+};
+
+/* controller at 0x9000 */
+&pci1 {
+	compatible = "fsl,mpc8540-pci";
+	device_type = "pci";
+	interrupts = <25 0x2 0 0>;
+	bus-range = <0 0xff>;
+	#interrupt-cells = <1>;
+	#size-cells = <2>;
+	#address-cells = <3>;
+};
+
+/* controller at 0xa000 */
+&pci2 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <26 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <26 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1 0x0 0x0
+			>;
+	};
+};
+
+&rio {
+	compatible = "fsl,srio";
+	interrupts = <48 2 0 0>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+	fsl,srio-rmu-handle = <&rmu>;
+	ranges;
+
+	port1 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <1>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "fsl,mpc8548-immr", "simple-bus";
+	bus-frequency = <0>;		// Filled out by uboot.
+
+	ecm-law@0 {
+		compatible = "fsl,ecm-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <10>;
+	};
+
+	ecm@1000 {
+		compatible = "fsl,mpc8548-ecm", "fsl,ecm";
+		reg = <0x1000 0x1000>;
+		interrupts = <17 2 0 0>;
+	};
+
+	memory-controller@2000 {
+		compatible = "fsl,mpc8548-memory-controller";
+		reg = <0x2000 0x1000>;
+		interrupts = <18 2 0 0>;
+	};
+
+/include/ "pq3-i2c-0.dtsi"
+/include/ "pq3-i2c-1.dtsi"
+/include/ "pq3-duart-0.dtsi"
+
+	L2: l2-cache-controller@20000 {
+		compatible = "fsl,mpc8548-l2-cache-controller";
+		reg = <0x20000 0x1000>;
+		cache-line-size = <32>;	// 32 bytes
+		cache-size = <0x80000>; // L2, 512K
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-dma-0.dtsi"
+/include/ "pq3-etsec1-0.dtsi"
+/include/ "pq3-etsec1-1.dtsi"
+/include/ "pq3-etsec1-2.dtsi"
+/include/ "pq3-etsec1-3.dtsi"
+
+/include/ "pq3-sec2.1-0.dtsi"
+/include/ "pq3-mpic.dtsi"
+/include/ "pq3-rmu-0.dtsi"
+
+	global-utilities@e0000 {
+		compatible = "fsl,mpc8548-guts";
+		reg = <0xe0000 0x1000>;
+		fsl,has-rstcr;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8548si-pre.dtsi b/arch/powerpc/boot/dts/fsl/mpc8548si-pre.dtsi
new file mode 100644
index 0000000000..fc1ce97742
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8548si-pre.dtsi
@@ -0,0 +1,67 @@
+/*
+ * MPC8548 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500v2_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,MPC8548";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial1;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8548@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8568mds.dts b/arch/powerpc/boot/dts/fsl/mpc8568mds.dts
new file mode 100644
index 0000000000..3603b5ae12
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8568mds.dts
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8568E MDS Device Tree Source
+ *
+ * Copyright 2007, 2008 Freescale Semiconductor Inc.
+ */
+
+/include/ "mpc8568si-pre.dtsi"
+
+/ {
+	model = "MPC8568EMDS";
+	compatible = "MPC8568EMDS", "MPC85xxMDS";
+
+	aliases {
+		pci0 = &pci0;
+		pci1 = &pci1;
+		rapidio0 = &rio;
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x0 0x0 0x0>;
+	};
+
+	lbc: localbus@e0005000 {
+		reg = <0x0 0xe0005000 0x0 0x1000>;
+		ranges = <0x0 0x0 0xfe000000 0x02000000
+			  0x1 0x0 0xf8000000 0x00008000
+			  0x2 0x0 0xf0000000 0x04000000
+			  0x4 0x0 0xf8008000 0x00008000
+			  0x5 0x0 0xf8010000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x02000000>;
+			bank-width = <2>;
+			device-width = <2>;
+		};
+
+		bcsr@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8568mds-bcsr";
+			reg = <1 0 0x8000>;
+			ranges = <0 1 0 0x8000>;
+
+			bcsr5: gpio-controller@11 {
+				#gpio-cells = <2>;
+				compatible = "fsl,mpc8568mds-bcsr-gpio";
+				reg = <0x5 0x1>;
+				gpio-controller;
+			};
+		};
+
+		pib@4,0 {
+			compatible = "fsl,mpc8568mds-pib";
+			reg = <4 0 0x8000>;
+		};
+
+		pib@5,0 {
+			compatible = "fsl,mpc8568mds-pib";
+			reg = <5 0 0x8000>;
+		};
+	};
+
+	soc: soc8568@e0000000 {
+		ranges = <0x0 0x0 0xe0000000 0x100000>;
+
+		i2c-sleep-nexus {
+			i2c@3000 {
+				rtc@68 {
+					compatible = "dallas,ds1374";
+					reg = <0x68>;
+					interrupts = <3 1 0 0>;
+				};
+			};
+		};
+
+		enet0: ethernet@24000 {
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy2>;
+		};
+
+		mdio@24520 {
+			phy0: ethernet-phy@7 {
+				interrupts = <1 1 0 0>;
+				reg = <0x7>;
+			};
+			phy1: ethernet-phy@1 {
+				interrupts = <2 1 0 0>;
+				reg = <0x1>;
+			};
+			phy2: ethernet-phy@2 {
+				interrupts = <1 1 0 0>;
+				reg = <0x2>;
+			};
+			phy3: ethernet-phy@3 {
+				interrupts = <2 1 0 0>;
+				reg = <0x3>;
+			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet1: ethernet@25000 {
+			tbi-handle = <&tbi1>;
+			phy-handle = <&phy3>;
+			sleep = <&pmc 0x00000040>;
+		};
+
+		mdio@25520 {
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		par_io@e0100 {
+			num-ports = <7>;
+
+			pio1: ucc_pin@1 {
+				pio-map = <
+			/* port  pin  dir  open_drain  assignment  has_irq */
+					0x4  0xa  0x1  0x0  0x2  0x0 	/* TxD0 */
+					0x4  0x9  0x1  0x0  0x2  0x0 	/* TxD1 */
+					0x4  0x8  0x1  0x0  0x2  0x0 	/* TxD2 */
+					0x4  0x7  0x1  0x0  0x2  0x0 	/* TxD3 */
+					0x4  0x17  0x1  0x0  0x2  0x0 	/* TxD4 */
+					0x4  0x16  0x1  0x0  0x2  0x0 	/* TxD5 */
+					0x4  0x15  0x1  0x0  0x2  0x0 	/* TxD6 */
+					0x4  0x14  0x1  0x0  0x2  0x0 	/* TxD7 */
+					0x4  0xf  0x2  0x0  0x2  0x0 	/* RxD0 */
+					0x4  0xe  0x2  0x0  0x2  0x0 	/* RxD1 */
+					0x4  0xd  0x2  0x0  0x2  0x0 	/* RxD2 */
+					0x4  0xc  0x2  0x0  0x2  0x0 	/* RxD3 */
+					0x4  0x1d  0x2  0x0  0x2  0x0 	/* RxD4 */
+					0x4  0x1c  0x2  0x0  0x2  0x0 	/* RxD5 */
+					0x4  0x1b  0x2  0x0  0x2  0x0 	/* RxD6 */
+					0x4  0x1a  0x2  0x0  0x2  0x0 	/* RxD7 */
+					0x4  0xb  0x1  0x0  0x2  0x0 	/* TX_EN */
+					0x4  0x18  0x1  0x0  0x2  0x0 	/* TX_ER */
+					0x4  0x10  0x2  0x0  0x2  0x0 	/* RX_DV */
+					0x4  0x1e  0x2  0x0  0x2  0x0 	/* RX_ER */
+					0x4  0x11  0x2  0x0  0x2  0x0 	/* RX_CLK */
+					0x4  0x13  0x1  0x0  0x2  0x0 	/* GTX_CLK */
+					0x1  0x1f  0x2  0x0  0x3  0x0>;	/* GTX125 */
+			};
+
+			pio2: ucc_pin@2 {
+				pio-map = <
+			/* port  pin  dir  open_drain  assignment  has_irq */
+					0x5  0xa 0x1  0x0  0x2  0x0   /* TxD0 */
+					0x5  0x9 0x1  0x0  0x2  0x0   /* TxD1 */
+					0x5  0x8 0x1  0x0  0x2  0x0   /* TxD2 */
+					0x5  0x7 0x1  0x0  0x2  0x0   /* TxD3 */
+					0x5  0x17 0x1  0x0  0x2  0x0   /* TxD4 */
+					0x5  0x16 0x1  0x0  0x2  0x0   /* TxD5 */
+					0x5  0x15 0x1  0x0  0x2  0x0   /* TxD6 */
+					0x5  0x14 0x1  0x0  0x2  0x0   /* TxD7 */
+					0x5  0xf 0x2  0x0  0x2  0x0   /* RxD0 */
+					0x5  0xe 0x2  0x0  0x2  0x0   /* RxD1 */
+					0x5  0xd 0x2  0x0  0x2  0x0   /* RxD2 */
+					0x5  0xc 0x2  0x0  0x2  0x0   /* RxD3 */
+					0x5  0x1d 0x2  0x0  0x2  0x0   /* RxD4 */
+					0x5  0x1c 0x2  0x0  0x2  0x0   /* RxD5 */
+					0x5  0x1b 0x2  0x0  0x2  0x0   /* RxD6 */
+					0x5  0x1a 0x2  0x0  0x2  0x0   /* RxD7 */
+					0x5  0xb 0x1  0x0  0x2  0x0   /* TX_EN */
+					0x5  0x18 0x1  0x0  0x2  0x0   /* TX_ER */
+					0x5  0x10 0x2  0x0  0x2  0x0   /* RX_DV */
+					0x5  0x1e 0x2  0x0  0x2  0x0   /* RX_ER */
+					0x5  0x11 0x2  0x0  0x2  0x0   /* RX_CLK */
+					0x5  0x13 0x1  0x0  0x2  0x0   /* GTX_CLK */
+					0x1  0x1f 0x2  0x0  0x3  0x0   /* GTX125 */
+					0x4  0x6 0x3  0x0  0x2  0x0   /* MDIO */
+					0x4  0x5 0x1  0x0  0x2  0x0>; /* MDC */
+			};
+		};
+	};
+
+	qe: qe@e0080000 {
+		ranges = <0x0 0x0 0xe0080000 0x40000>;
+		reg = <0x0 0xe0080000 0x0 0x480>;
+
+		spi@4c0 {
+			mode = "cpu";
+		};
+
+		spi@500 {
+			mode = "cpu";
+		};
+
+		enet2: ucc@2000 {
+			device_type = "network";
+			compatible = "ucc_geth";
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			rx-clock-name = "none";
+			tx-clock-name = "clk16";
+			pio-handle = <&pio1>;
+			phy-handle = <&phy0>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		enet3: ucc@3000 {
+			device_type = "network";
+			compatible = "ucc_geth";
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			rx-clock-name = "none";
+			tx-clock-name = "clk16";
+			pio-handle = <&pio2>;
+			phy-handle = <&phy1>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		mdio@2120 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x2120 0x18>;
+			compatible = "fsl,ucc-mdio";
+
+			/* These are the same PHYs as on
+			 * gianfar's MDIO bus */
+			qe_phy0: ethernet-phy@7 {
+				interrupt-parent = <&mpic>;
+				interrupts = <1 1 0 0>;
+				reg = <0x7>;
+			};
+			qe_phy1: ethernet-phy@1 {
+				interrupt-parent = <&mpic>;
+				interrupts = <2 1 0 0>;
+				reg = <0x1>;
+			};
+			qe_phy2: ethernet-phy@2 {
+				interrupt-parent = <&mpic>;
+				interrupts = <1 1 0 0>;
+				reg = <0x2>;
+			};
+			qe_phy3: ethernet-phy@3 {
+				interrupt-parent = <&mpic>;
+				interrupts = <2 1 0 0>;
+				reg = <0x3>;
+			};
+		};
+	};
+
+	pci0: pci@e0008000 {
+		reg = <0x0 0xe0008000 0x0 0x1000>;
+		ranges = <0x2000000 0x0 0x80000000 0x0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0x0 0xe2000000 0x0 0x800000>;
+		clock-frequency = <66666666>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x12 AD18 */
+			0x9000 0x0 0x0 0x1 &mpic 0x5 0x1 0 0
+			0x9000 0x0 0x0 0x2 &mpic 0x6 0x1 0 0
+			0x9000 0x0 0x0 0x3 &mpic 0x7 0x1 0 0
+			0x9000 0x0 0x0 0x4 &mpic 0x4 0x1 0 0
+
+			/* IDSEL 0x13 AD19 */
+			0x9800 0x0 0x0 0x1 &mpic 0x6 0x1 0 0
+			0x9800 0x0 0x0 0x2 &mpic 0x7 0x1 0 0
+			0x9800 0x0 0x0 0x3 &mpic 0x4 0x1 0 0
+			0x9800 0x0 0x0 0x4 &mpic 0x5 0x1 0 0>;
+	};
+
+	/* PCI Express */
+	pci1: pcie@e000a000 {
+		ranges = <0x2000000 0x0 0xa0000000 0x0 0xa0000000 0x0 0x10000000
+			  0x1000000 0x0 0x00000000 0x0 0xe2800000 0x0 0x800000>;
+		reg = <0x0 0xe000a000 0x0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x10000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x800000>;
+		};
+	};
+
+	rio: rapidio@e00c00000 {
+		reg = <0x0 0xe00c0000 0x0 0x20000>;
+		port1 {
+			ranges = <0x0 0x0 0x0 0xc0000000 0x0 0x20000000>;
+		};
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		green {
+			gpios = <&bcsr5 1 0>;
+		};
+
+		amber {
+			gpios = <&bcsr5 2 0>;
+		};
+
+		red {
+			gpios = <&bcsr5 3 0>;
+		};
+	};
+};
+
+/include/ "mpc8568si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/mpc8568si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8568si-post.dtsi
new file mode 100644
index 0000000000..64e7075a9c
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8568si-post.dtsi
@@ -0,0 +1,270 @@
+/*
+ * MPC8568 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,mpc8568-localbus", "fsl,pq3-localbus", "simple-bus";
+	interrupts = <19 2 0 0>;
+	sleep = <&pmc 0x08000000>;
+};
+
+/* controller at 0x8000 */
+&pci0 {
+	compatible = "fsl,mpc8540-pci";
+	device_type = "pci";
+	interrupts = <24 0x2 0 0>;
+	bus-range = <0 0xff>;
+	#interrupt-cells = <1>;
+	#size-cells = <2>;
+	#address-cells = <3>;
+	sleep = <&pmc 0x80000000>;
+};
+
+/* controller at 0xa000 */
+&pci1 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <26 2 0 0>;
+	sleep = <&pmc 0x20000000>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <26 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1 0x0 0x0
+			>;
+	};
+};
+
+&rio {
+	compatible = "fsl,srio";
+	interrupts = <48 2 0 0>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+	fsl,srio-rmu-handle = <&rmu>;
+	sleep = <&pmc 0x00080000>;
+	ranges;
+
+	port1 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <1>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "fsl,mpc8568-immr", "simple-bus";
+	bus-frequency = <0>;		// Filled out by uboot.
+
+	ecm-law@0 {
+		compatible = "fsl,ecm-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <10>;
+	};
+
+	ecm@1000 {
+		compatible = "fsl,mpc8568-ecm", "fsl,ecm";
+		reg = <0x1000 0x1000>;
+		interrupts = <17 2 0 0>;
+	};
+
+	memory-controller@2000 {
+		compatible = "fsl,mpc8568-memory-controller";
+		reg = <0x2000 0x1000>;
+		interrupts = <18 2 0 0>;
+	};
+
+	i2c-sleep-nexus {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "simple-bus";
+		sleep = <&pmc 0x00000004>;
+		ranges;
+
+/include/ "pq3-i2c-0.dtsi"
+/include/ "pq3-i2c-1.dtsi"
+
+	};
+
+	duart-sleep-nexus {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "simple-bus";
+		sleep = <&pmc 0x00000002>;
+		ranges;
+
+/include/ "pq3-duart-0.dtsi"
+
+	};
+
+	L2: l2-cache-controller@20000 {
+		compatible = "fsl,mpc8568-l2-cache-controller";
+		reg = <0x20000 0x1000>;
+		cache-line-size = <32>;	// 32 bytes
+		cache-size = <0x80000>; // L2, 512K
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-dma-0.dtsi"
+	dma@21300 {
+		sleep = <&pmc 0x00000400>;
+	};
+
+/include/ "pq3-etsec1-0.dtsi"
+	ethernet@24000 {
+		sleep = <&pmc 0x00000080>;
+	};
+
+/include/ "pq3-etsec1-1.dtsi"
+	ethernet@25000 {
+		sleep = <&pmc 0x00000040>;
+	};
+
+	par_io@e0100 {
+		reg = <0xe0100 0x100>;
+		device_type = "par_io";
+	};
+
+/include/ "pq3-sec2.1-0.dtsi"
+	crypto@30000 {
+		sleep = <&pmc 0x01000000>;
+	};
+
+/include/ "pq3-mpic.dtsi"
+/include/ "pq3-rmu-0.dtsi"
+	rmu@d3000 {
+		sleep = <&pmc 0x00040000>;
+	};
+
+	global-utilities@e0000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8568-guts", "fsl,mpc8548-guts";
+		reg = <0xe0000 0x1000>;
+		ranges = <0 0xe0000 0x1000>;
+		fsl,has-rstcr;
+
+		pmc: power@70 {
+			compatible = "fsl,mpc8568-pmc",
+				     "fsl,mpc8548-pmc";
+			reg = <0x70 0x20>;
+		};
+	};
+};
+
+&qe {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "qe";
+	compatible = "fsl,qe";
+	sleep = <&pmc 0x00000800>;
+	brg-frequency = <0>;
+	bus-frequency = <396000000>;
+	fsl,qe-num-riscs = <2>;
+	fsl,qe-num-snums = <28>;
+
+	qeic: interrupt-controller@80 {
+		interrupt-controller;
+		compatible = "fsl,qe-ic";
+		#address-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x80 0x80>;
+		interrupts = <46 2 0 0 46 2 0 0>; //high:30 low:30
+		interrupt-parent = <&mpic>;
+	};
+
+	spi@4c0 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,spi";
+		reg = <0x4c0 0x40>;
+		cell-index = <0>;
+		interrupts = <2>;
+		interrupt-parent = <&qeic>;
+	};
+
+	spi@500 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		cell-index = <1>;
+		compatible = "fsl,spi";
+		reg = <0x500 0x40>;
+		interrupts = <1>;
+		interrupt-parent = <&qeic>;
+	};
+
+	ucc@2000 {
+		cell-index = <1>;
+		reg = <0x2000 0x200>;
+		interrupts = <32>;
+		interrupt-parent = <&qeic>;
+	};
+
+	ucc@3000 {
+		cell-index = <2>;
+		reg = <0x3000 0x200>;
+		interrupts = <33>;
+		interrupt-parent = <&qeic>;
+	};
+
+	muram@10000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,qe-muram", "fsl,cpm-muram";
+		ranges = <0x0 0x10000 0x10000>;
+
+		data-only@0 {
+			compatible = "fsl,qe-muram-data",
+				     "fsl,cpm-muram-data";
+			reg = <0x0 0x10000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8568si-pre.dtsi b/arch/powerpc/boot/dts/fsl/mpc8568si-pre.dtsi
new file mode 100644
index 0000000000..122ca3bd0b
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8568si-pre.dtsi
@@ -0,0 +1,68 @@
+/*
+ * MPC8568 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500v2_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,MPC8568";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial1;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+		pci0 = &pci0;
+		pci1 = &pci1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8568@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+			sleep = <&pmc 0x00008000	// core
+				 &pmc 0x00004000>;	// timebase
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8569mds.dts b/arch/powerpc/boot/dts/fsl/mpc8569mds.dts
new file mode 100644
index 0000000000..206614ea22
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8569mds.dts
@@ -0,0 +1,443 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8569E MDS Device Tree Source
+ *
+ * Copyright (C) 2009 Freescale Semiconductor Inc.
+ */
+
+/include/ "mpc8569si-pre.dtsi"
+
+/ {
+	model = "MPC8569EMDS";
+	compatible = "fsl,MPC8569EMDS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+		ethernet5 = &enet5;
+		ethernet7 = &enet7;
+		rapidio0 = &rio;
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@e0005000 {
+		reg = <0x0 0xe0005000 0x0 0x1000>;
+
+		ranges = <0x0 0x0 0x0 0xfe000000 0x02000000
+			  0x1 0x0 0x0 0xf8000000 0x00008000
+			  0x2 0x0 0x0 0xf0000000 0x04000000
+			  0x3 0x0 0x0 0xfc000000 0x00008000
+			  0x4 0x0 0x0 0xf8008000 0x00008000
+			  0x5 0x0 0x0 0xf8010000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x02000000>;
+			bank-width = <1>;
+			device-width = <1>;
+			partition@0 {
+				label = "ramdisk";
+				reg = <0x00000000 0x01c00000>;
+			};
+			partition@1c00000 {
+				label = "kernel";
+				reg = <0x01c00000 0x002e0000>;
+			};
+			partition@1ee0000 {
+				label = "dtb";
+				reg = <0x01ee0000 0x00020000>;
+			};
+			partition@1f00000 {
+				label = "firmware";
+				reg = <0x01f00000 0x00080000>;
+				read-only;
+			};
+			partition@1f80000 {
+				label = "u-boot";
+				reg = <0x01f80000 0x00080000>;
+				read-only;
+			};
+		};
+
+		bcsr@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8569mds-bcsr";
+			reg = <1 0 0x8000>;
+			ranges = <0 1 0 0x8000>;
+
+			bcsr17: gpio-controller@11 {
+				#gpio-cells = <2>;
+				compatible = "fsl,mpc8569mds-bcsr-gpio";
+				reg = <0x11 0x1>;
+				gpio-controller;
+			};
+		};
+
+		nand@3,0 {
+			compatible = "fsl,mpc8569-fcm-nand",
+				     "fsl,elbc-fcm-nand";
+			reg = <3 0 0x8000>;
+		};
+
+		pib@4,0 {
+			compatible = "fsl,mpc8569mds-pib";
+			reg = <4 0 0x8000>;
+		};
+
+		pib@5,0 {
+			compatible = "fsl,mpc8569mds-pib";
+			reg = <5 0 0x8000>;
+		};
+	};
+
+	soc: soc@e0000000 {
+		ranges = <0x0 0x0 0xe0000000 0x100000>;
+
+		i2c-sleep-nexus {
+			i2c@3000 {
+				rtc@68 {
+					compatible = "dallas,ds1374";
+					reg = <0x68>;
+					interrupts = <3 1 0 0>;
+				};
+			};
+		};
+
+		sdhc@2e000 {
+			status = "disabled";
+			sdhci,1-bit-only;
+			bus-width = <1>;
+		};
+
+		par_io@e0100 {
+			num-ports = <7>;
+
+			qe_pio_e: gpio-controller@80 {
+				#gpio-cells = <2>;
+				compatible = "fsl,mpc8569-qe-pario-bank",
+					     "fsl,mpc8323-qe-pario-bank";
+				reg = <0x80 0x18>;
+				gpio-controller;
+			};
+
+			qe_pio_f: gpio-controller@a0 {
+				#gpio-cells = <2>;
+				compatible = "fsl,mpc8569-qe-pario-bank",
+					     "fsl,mpc8323-qe-pario-bank";
+				reg = <0xa0 0x18>;
+				gpio-controller;
+			};
+
+			pio1: ucc_pin@1 {
+				pio-map = <
+			/* port  pin  dir  open_drain  assignment  has_irq */
+					0x2  0x1f 0x1  0x0  0x1  0x0	/* QE_MUX_MDC */
+					0x2  0x1e 0x3  0x0  0x2  0x0	/* QE_MUX_MDIO */
+					0x2  0x0b 0x2  0x0  0x1  0x0	/* CLK12*/
+					0x0  0x0  0x1  0x0  0x3  0x0	/* ENET1_TXD0_SER1_TXD0 */
+					0x0  0x1  0x1  0x0  0x3  0x0	/* ENET1_TXD1_SER1_TXD1 */
+					0x0  0x2  0x1  0x0  0x1  0x0	/* ENET1_TXD2_SER1_TXD2 */
+					0x0  0x3  0x1  0x0  0x2  0x0	/* ENET1_TXD3_SER1_TXD3 */
+					0x0  0x6  0x2  0x0  0x3  0x0	/* ENET1_RXD0_SER1_RXD0	*/
+					0x0  0x7  0x2  0x0  0x1  0x0	/* ENET1_RXD1_SER1_RXD1	*/
+					0x0  0x8  0x2  0x0  0x2  0x0	/* ENET1_RXD2_SER1_RXD2	*/
+					0x0  0x9  0x2  0x0  0x2  0x0	/* ENET1_RXD3_SER1_RXD3	*/
+					0x0  0x4  0x1  0x0  0x2  0x0	/* ENET1_TX_EN_SER1_RTS_B */
+					0x0  0xc  0x2  0x0  0x3  0x0	/* ENET1_RX_DV_SER1_CTS_B */
+					0x2  0x8  0x2  0x0  0x1  0x0	/* ENET1_GRXCLK	*/
+					0x2  0x14 0x1  0x0  0x2  0x0>;	/* ENET1_GTXCLK	*/
+			};
+
+			pio2: ucc_pin@2 {
+				pio-map = <
+			/* port  pin  dir  open_drain  assignment  has_irq */
+					0x2  0x1f 0x1  0x0  0x1  0x0	/* QE_MUX_MDC */
+					0x2  0x1e 0x3  0x0  0x2  0x0	/* QE_MUX_MDIO */
+					0x2  0x10 0x2  0x0  0x3  0x0	/* CLK17 */
+					0x0  0xe  0x1  0x0  0x2  0x0	/* ENET2_TXD0_SER2_TXD0 */
+					0x0  0xf  0x1  0x0  0x2  0x0	/* ENET2_TXD1_SER2_TXD1 */
+					0x0  0x10 0x1  0x0  0x1  0x0	/* ENET2_TXD2_SER2_TXD2 */
+					0x0  0x11 0x1  0x0  0x1  0x0	/* ENET2_TXD3_SER2_TXD3 */
+					0x0  0x14 0x2  0x0  0x2  0x0	/* ENET2_RXD0_SER2_RXD0	*/
+					0x0  0x15 0x2  0x0  0x1  0x0	/* ENET2_RXD1_SER2_RXD1	*/
+					0x0  0x16 0x2  0x0  0x1  0x0	/* ENET2_RXD2_SER2_RXD2	*/
+					0x0  0x17 0x2  0x0  0x1  0x0	/* ENET2_RXD3_SER2_RXD3	*/
+					0x0  0x12 0x1  0x0  0x2  0x0	/* ENET2_TX_EN_SER2_RTS_B */
+					0x0  0x1a 0x2  0x0  0x3  0x0	/* ENET2_RX_DV_SER2_CTS_B */
+					0x2  0x3  0x2  0x0  0x1  0x0	/* ENET2_GRXCLK	*/
+					0x2  0x2 0x1  0x0  0x2  0x0>;	/* ENET2_GTXCLK	*/
+			};
+
+			pio3: ucc_pin@3 {
+				pio-map = <
+			/* port  pin  dir  open_drain  assignment  has_irq */
+					0x2  0x1f 0x1  0x0  0x1  0x0	/* QE_MUX_MDC */
+					0x2  0x1e 0x3  0x0  0x2  0x0	/* QE_MUX_MDIO */
+					0x2  0x0b 0x2  0x0  0x1  0x0	/* CLK12*/
+					0x0  0x1d 0x1  0x0  0x2  0x0	/* ENET3_TXD0_SER3_TXD0 */
+					0x0  0x1e 0x1  0x0  0x3  0x0	/* ENET3_TXD1_SER3_TXD1 */
+					0x0  0x1f 0x1  0x0  0x2  0x0	/* ENET3_TXD2_SER3_TXD2 */
+					0x1  0x0  0x1  0x0  0x3  0x0	/* ENET3_TXD3_SER3_TXD3 */
+					0x1  0x3  0x2  0x0  0x3  0x0	/* ENET3_RXD0_SER3_RXD0	*/
+					0x1  0x4  0x2  0x0  0x1  0x0	/* ENET3_RXD1_SER3_RXD1	*/
+					0x1  0x5  0x2  0x0  0x2  0x0	/* ENET3_RXD2_SER3_RXD2	*/
+					0x1  0x6  0x2  0x0  0x3  0x0	/* ENET3_RXD3_SER3_RXD3	*/
+					0x1  0x1  0x1  0x0  0x1  0x0	/* ENET3_TX_EN_SER3_RTS_B */
+					0x1  0x9  0x2  0x0  0x3  0x0	/* ENET3_RX_DV_SER3_CTS_B */
+					0x2  0x9  0x2  0x0  0x2  0x0	/* ENET3_GRXCLK	*/
+					0x2  0x19 0x1  0x0  0x2  0x0>;	/* ENET3_GTXCLK	*/
+			};
+
+			pio4: ucc_pin@4 {
+				pio-map = <
+			/* port  pin  dir  open_drain  assignment  has_irq */
+					0x2  0x1f 0x1  0x0  0x1  0x0	/* QE_MUX_MDC */
+					0x2  0x1e 0x3  0x0  0x2  0x0	/* QE_MUX_MDIO */
+					0x2  0x10 0x2  0x0  0x3  0x0	/* CLK17 */
+					0x1  0xc  0x1  0x0  0x2  0x0	/* ENET4_TXD0_SER4_TXD0 */
+					0x1  0xd  0x1  0x0  0x2  0x0	/* ENET4_TXD1_SER4_TXD1 */
+					0x1  0xe  0x1  0x0  0x1  0x0	/* ENET4_TXD2_SER4_TXD2 */
+					0x1  0xf  0x1  0x0  0x2  0x0	/* ENET4_TXD3_SER4_TXD3 */
+					0x1  0x12 0x2  0x0  0x2  0x0	/* ENET4_RXD0_SER4_RXD0	*/
+					0x1  0x13 0x2  0x0  0x1  0x0	/* ENET4_RXD1_SER4_RXD1	*/
+					0x1  0x14 0x2  0x0  0x1  0x0	/* ENET4_RXD2_SER4_RXD2	*/
+					0x1  0x15 0x2  0x0  0x2  0x0	/* ENET4_RXD3_SER4_RXD3	*/
+					0x1  0x10 0x1  0x0  0x2  0x0	/* ENET4_TX_EN_SER4_RTS_B */
+					0x1  0x18 0x2  0x0  0x3  0x0	/* ENET4_RX_DV_SER4_CTS_B */
+					0x2  0x11 0x2  0x0  0x2  0x0	/* ENET4_GRXCLK	*/
+					0x2  0x18 0x1  0x0  0x2  0x0>;	/* ENET4_GTXCLK	*/
+			};
+		};
+	};
+
+	qe: qe@e0080000 {
+		ranges = <0x0 0x0 0xe0080000 0x40000>;
+		reg = <0x0 0xe0080000 0x0 0x480>;
+
+		spi@4c0 {
+			gpios = <&qe_pio_e 30 0>;
+			mode = "cpu-qe";
+
+			serial-flash@0 {
+				compatible = "st,m25p40";
+				reg = <0>;
+				spi-max-frequency = <25000000>;
+			};
+		};
+
+		spi@500 {
+			mode = "cpu";
+		};
+
+		usb@6c0 {
+			fsl,fullspeed-clock = "clk5";
+			fsl,lowspeed-clock = "brg10";
+			gpios = <&qe_pio_f 3 0   /* USBOE */
+				 &qe_pio_f 4 0   /* USBTP */
+				 &qe_pio_f 5 0   /* USBTN */
+				 &qe_pio_f 6 0   /* USBRP */
+				 &qe_pio_f 8 0   /* USBRN */
+				 &bcsr17   1 0   /* SPEED */
+				 &bcsr17   2 0>; /* POWER */
+		};
+
+		enet0: ucc@2000 {
+			device_type = "network";
+			compatible = "ucc_geth";
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			rx-clock-name = "none";
+			tx-clock-name = "clk12";
+			pio-handle = <&pio1>;
+			tbi-handle = <&tbi1>;
+			phy-handle = <&qe_phy0>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		mdio@2120 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x2120 0x18>;
+			compatible = "fsl,ucc-mdio";
+
+			qe_phy0: ethernet-phy@7 {
+				interrupt-parent = <&mpic>;
+				interrupts = <1 1 0 0>;
+				reg = <0x7>;
+			};
+			qe_phy1: ethernet-phy@1 {
+				interrupt-parent = <&mpic>;
+				interrupts = <2 1 0 0>;
+				reg = <0x1>;
+			};
+			qe_phy2: ethernet-phy@2 {
+				interrupt-parent = <&mpic>;
+				interrupts = <3 1 0 0>;
+				reg = <0x2>;
+			};
+			qe_phy3: ethernet-phy@3 {
+				interrupt-parent = <&mpic>;
+				interrupts = <4 1 0 0>;
+				reg = <0x3>;
+			};
+			qe_phy5: ethernet-phy@4 {
+				reg = <0x04>;
+			};
+			qe_phy7: ethernet-phy@6 {
+				reg = <0x6>;
+			};
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+		mdio@3520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x3520 0x18>;
+			compatible = "fsl,ucc-mdio";
+
+			tbi6: tbi-phy@15 {
+			reg = <0x15>;
+			device_type = "tbi-phy";
+			};
+		};
+		mdio@3720 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x3720 0x38>;
+			compatible = "fsl,ucc-mdio";
+			tbi8: tbi-phy@17 {
+				reg = <0x17>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet2: ucc@2200 {
+			device_type = "network";
+			compatible = "ucc_geth";
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			rx-clock-name = "none";
+			tx-clock-name = "clk12";
+			pio-handle = <&pio3>;
+			tbi-handle = <&tbi3>;
+			phy-handle = <&qe_phy2>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		mdio@2320 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x2320 0x18>;
+			compatible = "fsl,ucc-mdio";
+			tbi3: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet1: ucc@3000 {
+			device_type = "network";
+			compatible = "ucc_geth";
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			rx-clock-name = "none";
+			tx-clock-name = "clk17";
+			pio-handle = <&pio2>;
+			tbi-handle = <&tbi2>;
+			phy-handle = <&qe_phy1>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		mdio@3120 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x3120 0x18>;
+			compatible = "fsl,ucc-mdio";
+			tbi2: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet3: ucc@3200 {
+			device_type = "network";
+			compatible = "ucc_geth";
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			rx-clock-name = "none";
+			tx-clock-name = "clk17";
+			pio-handle = <&pio4>;
+			tbi-handle = <&tbi4>;
+			phy-handle = <&qe_phy3>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		mdio@3320 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x3320 0x18>;
+			compatible = "fsl,ucc-mdio";
+			tbi4: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet5: ucc@3400 {
+			device_type = "network";
+			compatible = "ucc_geth";
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			rx-clock-name = "none";
+			tx-clock-name = "none";
+			tbi-handle = <&tbi6>;
+			phy-handle = <&qe_phy5>;
+			phy-connection-type = "sgmii";
+		};
+
+		enet7: ucc@3600 {
+			device_type = "network";
+			compatible = "ucc_geth";
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			rx-clock-name = "none";
+			tx-clock-name = "none";
+			tbi-handle = <&tbi8>;
+			phy-handle = <&qe_phy7>;
+			phy-connection-type = "sgmii";
+		};
+	};
+
+	/* PCI Express */
+	pci1: pcie@e000a000 {
+		reg = <0x0 0xe000a000 0x0 0x1000>;
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x10000000
+			  0x1000000 0x0 0x00000000 0 0xe2800000 0x0 0x00800000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x10000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x800000>;
+		};
+	};
+
+	rio: rapidio@e00c00000 {
+		reg = <0x0 0xe00c0000 0x0 0x20000>;
+		port1 {
+			ranges = <0x0 0x0 0x0 0xc0000000 0x0 0x20000000>;
+		};
+		port2 {
+			status = "disabled";
+		};
+	};
+};
+
+/include/ "mpc8569si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/mpc8569si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8569si-post.dtsi
new file mode 100644
index 0000000000..3e6346a4a1
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8569si-post.dtsi
@@ -0,0 +1,304 @@
+/*
+ * MPC8569 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,mpc8569-elbc", "fsl,elbc", "simple-bus";
+	interrupts = <19 2 0 0>;
+	sleep = <&pmc 0x08000000>;
+};
+
+/* controller at 0xa000 */
+&pci1 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <26 2 0 0>;
+	sleep = <&pmc 0x20000000>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <26 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1 0x0 0x0
+			>;
+	};
+};
+
+&rio {
+	compatible = "fsl,srio";
+	interrupts = <48 2 0 0>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+	fsl,srio-rmu-handle = <&rmu>;
+	sleep = <&pmc 0x00080000>;
+	ranges;
+
+	port1 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <1>;
+	};
+
+	port2 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <2>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "fsl,mpc8569-immr", "simple-bus";
+	bus-frequency = <0>;		// Filled out by uboot.
+
+	ecm-law@0 {
+		compatible = "fsl,ecm-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <10>;
+	};
+
+	ecm@1000 {
+		compatible = "fsl,mpc8569-ecm", "fsl,ecm";
+		reg = <0x1000 0x1000>;
+		interrupts = <17 2 0 0>;
+	};
+
+	memory-controller@2000 {
+		compatible = "fsl,mpc8569-memory-controller";
+		reg = <0x2000 0x1000>;
+		interrupts = <18 2 0 0>;
+	};
+
+	i2c-sleep-nexus {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "simple-bus";
+		sleep = <&pmc 0x00000004>;
+		ranges;
+
+/include/ "pq3-i2c-0.dtsi"
+/include/ "pq3-i2c-1.dtsi"
+
+	};
+
+	duart-sleep-nexus {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "simple-bus";
+		sleep = <&pmc 0x00000002>;
+		ranges;
+
+/include/ "pq3-duart-0.dtsi"
+
+	};
+
+	L2: l2-cache-controller@20000 {
+		compatible = "fsl,mpc8569-l2-cache-controller";
+		reg = <0x20000 0x1000>;
+		cache-line-size = <32>;	// 32 bytes
+		cache-size = <0x80000>; // L2, 512K
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-dma-0.dtsi"
+/include/ "pq3-esdhc-0.dtsi"
+	sdhc@2e000 {
+		sleep = <&pmc 0x00200000>;
+	};
+
+	par_io@e0100 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0xe0100 0x100>;
+		ranges = <0x0 0xe0100 0x100>;
+		device_type = "par_io";
+	};
+
+/include/ "pq3-sec3.1-0.dtsi"
+	crypto@30000 {
+		sleep = <&pmc 0x01000000>;
+	};
+
+/include/ "pq3-mpic.dtsi"
+/include/ "pq3-rmu-0.dtsi"
+	rmu@d3000 {
+		sleep = <&pmc 0x00040000>;
+	};
+
+	global-utilities@e0000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8569-guts", "fsl,mpc8548-guts";
+		reg = <0xe0000 0x1000>;
+		ranges = <0 0xe0000 0x1000>;
+		fsl,has-rstcr;
+
+		pmc: power@70 {
+			compatible = "fsl,mpc8569-pmc",
+				     "fsl,mpc8548-pmc";
+			reg = <0x70 0x20>;
+		};
+	};
+};
+
+&qe {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "qe";
+	compatible = "fsl,qe";
+	sleep = <&pmc 0x00000800>;
+	brg-frequency = <0>;
+	bus-frequency = <0>;
+	fsl,qe-num-riscs = <4>;
+	fsl,qe-num-snums = <46>;
+
+	qeic: interrupt-controller@80 {
+		interrupt-controller;
+		compatible = "fsl,qe-ic";
+		#address-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x80 0x80>;
+		interrupts = <46 2 0 0 46 2 0 0>; //high:30 low:30
+		interrupt-parent = <&mpic>;
+	};
+
+	timer@440 {
+		compatible = "fsl,mpc8569-qe-gtm",
+			     "fsl,qe-gtm", "fsl,gtm";
+		reg = <0x440 0x40>;
+		interrupts = <12 13 14 15>;
+		interrupt-parent = <&qeic>;
+		/* Filled in by U-Boot */
+		clock-frequency = <0>;
+	};
+
+	spi@4c0 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,mpc8569-qe-spi", "fsl,spi";
+		reg = <0x4c0 0x40>;
+		cell-index = <0>;
+		interrupts = <2>;
+		interrupt-parent = <&qeic>;
+	};
+
+	spi@500 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		cell-index = <1>;
+		compatible = "fsl,spi";
+		reg = <0x500 0x40>;
+		interrupts = <1>;
+		interrupt-parent = <&qeic>;
+	};
+
+	usb@6c0 {
+		compatible = "fsl,mpc8569-qe-usb",
+			     "fsl,mpc8323-qe-usb";
+		reg = <0x6c0 0x40 0x8b00 0x100>;
+		interrupts = <11>;
+		interrupt-parent = <&qeic>;
+	};
+
+	ucc@2000 {
+		cell-index = <1>;
+		reg = <0x2000 0x200>;
+		interrupts = <32>;
+		interrupt-parent = <&qeic>;
+	};
+
+	ucc@2200 {
+		cell-index = <3>;
+		reg = <0x2200 0x200>;
+		interrupts = <34>;
+		interrupt-parent = <&qeic>;
+	};
+
+	ucc@3000 {
+		cell-index = <2>;
+		reg = <0x3000 0x200>;
+		interrupts = <33>;
+		interrupt-parent = <&qeic>;
+	};
+
+	ucc@3200 {
+		cell-index = <4>;
+		reg = <0x3200 0x200>;
+		interrupts = <35>;
+		interrupt-parent = <&qeic>;
+	};
+
+	ucc@3400 {
+		cell-index = <6>;
+		reg = <0x3400 0x200>;
+		interrupts = <41>;
+		interrupt-parent = <&qeic>;
+	};
+
+	ucc@3600 {
+		cell-index = <8>;
+		reg = <0x3600 0x200>;
+		interrupts = <43>;
+		interrupt-parent = <&qeic>;
+	};
+
+	muram@10000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,qe-muram", "fsl,cpm-muram";
+		ranges = <0x0 0x10000 0x20000>;
+
+		data-only@0 {
+			compatible = "fsl,qe-muram-data",
+				     "fsl,cpm-muram-data";
+			reg = <0x0 0x20000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8569si-pre.dtsi b/arch/powerpc/boot/dts/fsl/mpc8569si-pre.dtsi
new file mode 100644
index 0000000000..2cd15a2a04
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8569si-pre.dtsi
@@ -0,0 +1,67 @@
+/*
+ * MPC8569 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500v2_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,MPC8569";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial1;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+		pci1 = &pci1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8569@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+			sleep = <&pmc 0x00008000	// core
+				 &pmc 0x00004000>;	// timebase
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8572ds.dts b/arch/powerpc/boot/dts/fsl/mpc8572ds.dts
new file mode 100644
index 0000000000..679d53c4a9
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8572ds.dts
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8572 DS Device Tree Source
+ *
+ * Copyright 2007-2009 Freescale Semiconductor Inc.
+ */
+
+/include/ "mpc8572si-pre.dtsi"
+
+/ {
+	model = "fsl,MPC8572DS";
+	compatible = "fsl,MPC8572DS";
+
+	memory {
+		device_type = "memory";
+	};
+
+	board_lbc: lbc: localbus@ffe05000 {
+		reg = <0 0xffe05000 0 0x1000>;
+
+		ranges = <0x0 0x0 0x0 0xe8000000 0x08000000
+			  0x1 0x0 0x0 0xe0000000 0x08000000
+			  0x2 0x0 0x0 0xffa00000 0x00040000
+			  0x3 0x0 0x0 0xffdf0000 0x00008000
+			  0x4 0x0 0x0 0xffa40000 0x00040000
+			  0x5 0x0 0x0 0xffa80000 0x00040000
+			  0x6 0x0 0x0 0xffac0000 0x00040000>;
+	};
+
+	board_soc: soc: soc8572@ffe00000 {
+		ranges = <0x0 0 0xffe00000 0x100000>;
+	};
+
+	board_pci0: pci0: pcie@ffe08000 {
+		reg = <0 0xffe08000 0 0x1000>;
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+	};
+
+	pci1: pcie@ffe09000 {
+		reg = <0 0xffe09000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+	};
+
+	pci2: pcie@ffe0a000 {
+		reg = <0 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xc0000000
+				  0x2000000 0x0 0xc0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+	};
+};
+
+/*
+ * mpc8572ds.dtsi must be last to ensure board_pci0 overrides pci0 settings
+ * for interrupt-map & interrupt-map-mask
+ */
+
+/include/ "mpc8572si-post.dtsi"
+/include/ "mpc8572ds.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/mpc8572ds.dtsi b/arch/powerpc/boot/dts/fsl/mpc8572ds.dtsi
new file mode 100644
index 0000000000..357490bb84
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8572ds.dtsi
@@ -0,0 +1,428 @@
+/*
+ * MPC8572DS Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&board_lbc {
+	nor@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x8000000>;
+		bank-width = <2>;
+		device-width = <1>;
+
+		partition@0 {
+			reg = <0x0 0x03000000>;
+			label = "ramdisk-nor";
+		};
+
+		partition@3000000 {
+			reg = <0x03000000 0x00e00000>;
+			label = "diagnostic-nor";
+			read-only;
+		};
+
+		partition@3e00000 {
+			reg = <0x03e00000 0x00200000>;
+			label = "dink-nor";
+			read-only;
+		};
+
+		partition@4000000 {
+			reg = <0x04000000 0x00400000>;
+			label = "kernel-nor";
+		};
+
+		partition@4400000 {
+			reg = <0x04400000 0x03b00000>;
+			label = "fs-nor";
+		};
+
+		partition@7f00000 {
+			reg = <0x07f00000 0x00060000>;
+			label = "dtb-nor";
+		};
+
+		partition@7f60000 {
+			reg = <0x07f60000 0x00020000>;
+			label = "env-nor";
+			read-only;
+		};
+
+		partition@7f80000 {
+			reg = <0x07f80000 0x00080000>;
+			label = "u-boot-nor";
+			read-only;
+		};
+	};
+
+	nand@2,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8572-fcm-nand",
+			     "fsl,elbc-fcm-nand";
+		reg = <0x2 0x0 0x40000>;
+
+		partition@0 {
+			reg = <0x0 0x02000000>;
+			label = "u-boot-nand";
+			read-only;
+		};
+
+		partition@2000000 {
+			reg = <0x02000000 0x10000000>;
+			label = "fs-nand";
+		};
+
+		partition@12000000 {
+			reg = <0x12000000 0x08000000>;
+			label = "ramdisk-nand";
+		};
+
+		partition@1a000000 {
+			reg = <0x1a000000 0x04000000>;
+			label = "kernel-nand";
+		};
+
+		partition@1e000000 {
+			reg = <0x1e000000 0x01000000>;
+			label = "dtb-nand";
+		};
+
+		partition@1f000000 {
+			reg = <0x1f000000 0x21000000>;
+			label = "empty-nand";
+		};
+	};
+
+	nand@4,0 {
+		compatible = "fsl,mpc8572-fcm-nand",
+			     "fsl,elbc-fcm-nand";
+		reg = <0x4 0x0 0x40000>;
+	};
+
+	nand@5,0 {
+		compatible = "fsl,mpc8572-fcm-nand",
+			     "fsl,elbc-fcm-nand";
+		reg = <0x5 0x0 0x40000>;
+	};
+
+	nand@6,0 {
+		compatible = "fsl,mpc8572-fcm-nand",
+			     "fsl,elbc-fcm-nand";
+		reg = <0x6 0x0 0x40000>;
+	};
+};
+
+&board_soc {
+	enet0: ethernet@24000 {
+		tbi-handle = <&tbi0>;
+		phy-handle = <&phy0>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	mdio@24520 {
+		phy0: ethernet-phy@0 {
+			interrupts = <10 1 0 0>;
+			reg = <0x0>;
+		};
+		phy1: ethernet-phy@1 {
+			interrupts = <10 1 0 0>;
+			reg = <0x1>;
+		};
+		phy2: ethernet-phy@2 {
+			interrupts = <10 1 0 0>;
+			reg = <0x2>;
+		};
+		phy3: ethernet-phy@3 {
+			interrupts = <10 1 0 0>;
+			reg = <0x3>;
+		};
+
+		sgmii_phy0: sgmii-phy@0 {
+			interrupts = <6 1 0 0>;
+			reg = <0x1c>;
+		};
+		sgmii_phy1: sgmii-phy@1 {
+			interrupts = <6 1 0 0>;
+			reg = <0x1d>;
+		};
+		sgmii_phy2: sgmii-phy@2 {
+			interrupts = <7 1 0 0>;
+			reg = <0x1e>;
+		};
+		sgmii_phy3: sgmii-phy@3 {
+			interrupts = <7 1 0 0>;
+			reg = <0x1f>;
+		};
+
+		tbi0: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	ptp_clock@24e00 {
+		fsl,tclk-period = <5>;
+		fsl,tmr-prsc = <200>;
+		fsl,tmr-add = <0xAAAAAAAB>;
+		fsl,tmr-fiper1 = <0x3B9AC9FB>;
+		fsl,tmr-fiper2 = <0x3B9AC9FB>;
+		fsl,max-adj = <499999999>;
+	};
+
+	enet1: ethernet@25000 {
+		tbi-handle = <&tbi1>;
+		phy-handle = <&phy1>;
+		phy-connection-type = "rgmii-id";
+
+	};
+
+	mdio@25520 {
+		tbi1: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	enet2: ethernet@26000 {
+		tbi-handle = <&tbi2>;
+		phy-handle = <&phy2>;
+		phy-connection-type = "rgmii-id";
+
+	};
+	mdio@26520 {
+		tbi2: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	enet3: ethernet@27000 {
+		tbi-handle = <&tbi3>;
+		phy-handle = <&phy3>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	mdio@27520 {
+		tbi3: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+};
+
+&board_pci0 {
+	pcie@0 {
+		interrupt-map-mask = <0xff00 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x11 func 0 - PCI slot 1 */
+			0x8800 0x0 0x0 0x1 &mpic 0x2 0x1 0 0
+			0x8800 0x0 0x0 0x2 &mpic 0x3 0x1 0 0
+			0x8800 0x0 0x0 0x3 &mpic 0x4 0x1 0 0
+			0x8800 0x0 0x0 0x4 &mpic 0x1 0x1 0 0
+
+			/* IDSEL 0x11 func 1 - PCI slot 1 */
+			0x8900 0x0 0x0 0x1 &mpic 0x2 0x1 0 0
+			0x8900 0x0 0x0 0x2 &mpic 0x3 0x1 0 0
+			0x8900 0x0 0x0 0x3 &mpic 0x4 0x1 0 0
+			0x8900 0x0 0x0 0x4 &mpic 0x1 0x1 0 0
+
+			/* IDSEL 0x11 func 2 - PCI slot 1 */
+			0x8a00 0x0 0x0 0x1 &mpic 0x2 0x1 0 0
+			0x8a00 0x0 0x0 0x2 &mpic 0x3 0x1 0 0
+			0x8a00 0x0 0x0 0x3 &mpic 0x4 0x1 0 0
+			0x8a00 0x0 0x0 0x4 &mpic 0x1 0x1 0 0
+
+			/* IDSEL 0x11 func 3 - PCI slot 1 */
+			0x8b00 0x0 0x0 0x1 &mpic 0x2 0x1 0 0
+			0x8b00 0x0 0x0 0x2 &mpic 0x3 0x1 0 0
+			0x8b00 0x0 0x0 0x3 &mpic 0x4 0x1 0 0
+			0x8b00 0x0 0x0 0x4 &mpic 0x1 0x1 0 0
+
+			/* IDSEL 0x11 func 4 - PCI slot 1 */
+			0x8c00 0x0 0x0 0x1 &mpic 0x2 0x1 0 0
+			0x8c00 0x0 0x0 0x2 &mpic 0x3 0x1 0 0
+			0x8c00 0x0 0x0 0x3 &mpic 0x4 0x1 0 0
+			0x8c00 0x0 0x0 0x4 &mpic 0x1 0x1 0 0
+
+			/* IDSEL 0x11 func 5 - PCI slot 1 */
+			0x8d00 0x0 0x0 0x1 &mpic 0x2 0x1 0 0
+			0x8d00 0x0 0x0 0x2 &mpic 0x3 0x1 0 0
+			0x8d00 0x0 0x0 0x3 &mpic 0x4 0x1 0 0
+			0x8d00 0x0 0x0 0x4 &mpic 0x1 0x1 0 0
+
+			/* IDSEL 0x11 func 6 - PCI slot 1 */
+			0x8e00 0x0 0x0 0x1 &mpic 0x2 0x1 0 0
+			0x8e00 0x0 0x0 0x2 &mpic 0x3 0x1 0 0
+			0x8e00 0x0 0x0 0x3 &mpic 0x4 0x1 0 0
+			0x8e00 0x0 0x0 0x4 &mpic 0x1 0x1 0 0
+
+			/* IDSEL 0x11 func 7 - PCI slot 1 */
+			0x8f00 0x0 0x0 0x1 &mpic 0x2 0x1 0 0
+			0x8f00 0x0 0x0 0x2 &mpic 0x3 0x1 0 0
+			0x8f00 0x0 0x0 0x3 &mpic 0x4 0x1 0 0
+			0x8f00 0x0 0x0 0x4 &mpic 0x1 0x1 0 0
+
+			/* IDSEL 0x12 func 0 - PCI slot 2 */
+			0x9000 0x0 0x0 0x1 &mpic 0x3 0x1 0 0
+			0x9000 0x0 0x0 0x2 &mpic 0x4 0x1 0 0
+			0x9000 0x0 0x0 0x3 &mpic 0x1 0x1 0 0
+			0x9000 0x0 0x0 0x4 &mpic 0x2 0x1 0 0
+
+			/* IDSEL 0x12 func 1 - PCI slot 2 */
+			0x9100 0x0 0x0 0x1 &mpic 0x3 0x1 0 0
+			0x9100 0x0 0x0 0x2 &mpic 0x4 0x1 0 0
+			0x9100 0x0 0x0 0x3 &mpic 0x1 0x1 0 0
+			0x9100 0x0 0x0 0x4 &mpic 0x2 0x1 0 0
+
+			/* IDSEL 0x12 func 2 - PCI slot 2 */
+			0x9200 0x0 0x0 0x1 &mpic 0x3 0x1 0 0
+			0x9200 0x0 0x0 0x2 &mpic 0x4 0x1 0 0
+			0x9200 0x0 0x0 0x3 &mpic 0x1 0x1 0 0
+			0x9200 0x0 0x0 0x4 &mpic 0x2 0x1 0 0
+
+			/* IDSEL 0x12 func 3 - PCI slot 2 */
+			0x9300 0x0 0x0 0x1 &mpic 0x3 0x1 0 0
+			0x9300 0x0 0x0 0x2 &mpic 0x4 0x1 0 0
+			0x9300 0x0 0x0 0x3 &mpic 0x1 0x1 0 0
+			0x9300 0x0 0x0 0x4 &mpic 0x2 0x1 0 0
+
+			/* IDSEL 0x12 func 4 - PCI slot 2 */
+			0x9400 0x0 0x0 0x1 &mpic 0x3 0x1 0 0
+			0x9400 0x0 0x0 0x2 &mpic 0x4 0x1 0 0
+			0x9400 0x0 0x0 0x3 &mpic 0x1 0x1 0 0
+			0x9400 0x0 0x0 0x4 &mpic 0x2 0x1 0 0
+
+			/* IDSEL 0x12 func 5 - PCI slot 2 */
+			0x9500 0x0 0x0 0x1 &mpic 0x3 0x1 0 0
+			0x9500 0x0 0x0 0x2 &mpic 0x4 0x1 0 0
+			0x9500 0x0 0x0 0x3 &mpic 0x1 0x1 0 0
+			0x9500 0x0 0x0 0x4 &mpic 0x2 0x1 0 0
+
+			/* IDSEL 0x12 func 6 - PCI slot 2 */
+			0x9600 0x0 0x0 0x1 &mpic 0x3 0x1 0 0
+			0x9600 0x0 0x0 0x2 &mpic 0x4 0x1 0 0
+			0x9600 0x0 0x0 0x3 &mpic 0x1 0x1 0 0
+			0x9600 0x0 0x0 0x4 &mpic 0x2 0x1 0 0
+
+			/* IDSEL 0x12 func 7 - PCI slot 2 */
+			0x9700 0x0 0x0 0x1 &mpic 0x3 0x1 0 0
+			0x9700 0x0 0x0 0x2 &mpic 0x4 0x1 0 0
+			0x9700 0x0 0x0 0x3 &mpic 0x1 0x1 0 0
+			0x9700 0x0 0x0 0x4 &mpic 0x2 0x1 0 0
+
+			// IDSEL 0x1c  USB
+			0xe000 0x0 0x0 0x1 &i8259 0xc 0x2
+			0xe100 0x0 0x0 0x2 &i8259 0x9 0x2
+			0xe200 0x0 0x0 0x3 &i8259 0xa 0x2
+			0xe300 0x0 0x0 0x4 &i8259 0xb 0x2
+
+			// IDSEL 0x1d  Audio
+			0xe800 0x0 0x0 0x1 &i8259 0x6 0x2
+
+			// IDSEL 0x1e Legacy
+			0xf000 0x0 0x0 0x1 &i8259 0x7 0x2
+			0xf100 0x0 0x0 0x1 &i8259 0x7 0x2
+
+			// IDSEL 0x1f IDE/SATA
+			0xf800 0x0 0x0 0x1 &i8259 0xe 0x2
+			0xf900 0x0 0x0 0x1 &i8259 0x5 0x2
+			>;
+
+
+		uli1575@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+			isa@1e {
+				device_type = "isa";
+				#interrupt-cells = <2>;
+				#size-cells = <1>;
+				#address-cells = <2>;
+				reg = <0xf000 0x0 0x0 0x0 0x0>;
+				ranges = <0x1 0x0 0x1000000 0x0 0x0
+					  0x1000>;
+				interrupt-parent = <&i8259>;
+
+				i8259: interrupt-controller@20 {
+					reg = <0x1 0x20 0x2
+					       0x1 0xa0 0x2
+					       0x1 0x4d0 0x2>;
+					interrupt-controller;
+					device_type = "interrupt-controller";
+					#address-cells = <0>;
+					#interrupt-cells = <2>;
+					compatible = "chrp,iic";
+					interrupts = <9 2 0 0>;
+					interrupt-parent = <&mpic>;
+				};
+
+				i8042@60 {
+					#size-cells = <0>;
+					#address-cells = <1>;
+					reg = <0x1 0x60 0x1 0x1 0x64 0x1>;
+					interrupts = <1 3 12 3>;
+					interrupt-parent =
+						<&i8259>;
+
+					keyboard@0 {
+						reg = <0x0>;
+						compatible = "pnpPNP,303";
+					};
+
+					mouse@1 {
+						reg = <0x1>;
+						compatible = "pnpPNP,f03";
+					};
+				};
+
+				rtc@70 {
+					compatible = "pnpPNP,b00";
+					reg = <0x1 0x70 0x2>;
+				};
+
+				gpio@400 {
+					reg = <0x1 0x400 0x80>;
+				};
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8572ds_36b.dts b/arch/powerpc/boot/dts/fsl/mpc8572ds_36b.dts
new file mode 100644
index 0000000000..f2abce2bb2
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8572ds_36b.dts
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8572DS Device Tree Source (36-bit address map)
+ *
+ * Copyright 2007-2009 Freescale Semiconductor Inc.
+ */
+
+/include/ "mpc8572si-pre.dtsi"
+
+/ {
+	model = "fsl,MPC8572DS";
+	compatible = "fsl,MPC8572DS";
+
+	memory {
+		device_type = "memory";
+	};
+
+	board_lbc: lbc: localbus@fffe05000 {
+		reg = <0xf 0xffe05000 0 0x1000>;
+
+		ranges = <0x0 0x0 0xf 0xe8000000 0x08000000
+			  0x1 0x0 0xf 0xe0000000 0x08000000
+			  0x2 0x0 0xf 0xffa00000 0x00040000
+			  0x3 0x0 0xf 0xffdf0000 0x00008000
+			  0x4 0x0 0xf 0xffa40000 0x00040000
+			  0x5 0x0 0xf 0xffa80000 0x00040000
+			  0x6 0x0 0xf 0xffac0000 0x00040000>;
+	};
+
+	board_soc: soc: soc8572@fffe00000 {
+		ranges = <0x0 0xf 0xffe00000 0x100000>;
+	};
+
+	board_pci0: pci0: pcie@fffe08000 {
+		reg = <0xf 0xffe08000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+	};
+
+	pci1: pcie@fffe09000 {
+		reg = <0xf 0xffe09000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+	};
+
+	pci2: pcie@fffe0a000 {
+		reg = <0xf 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0xc 0x40000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc20000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+	};
+};
+
+/*
+ * mpc8572ds.dtsi must be last to ensure board_pci0 overrides pci0 settings
+ * for interrupt-map & interrupt-map-mask
+ */
+
+/include/ "mpc8572si-post.dtsi"
+/include/ "mpc8572ds.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/mpc8572ds_camp_core0.dts b/arch/powerpc/boot/dts/fsl/mpc8572ds_camp_core0.dts
new file mode 100644
index 0000000000..d1a4993caf
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8572ds_camp_core0.dts
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8572 DS Core0 Device Tree Source in CAMP mode.
+ *
+ * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache
+ * can be shared, all the other devices must be assigned to one core only.
+ * This dts file allows core0 to have memory, l2, i2c, dma1, global-util, eth0,
+ * eth1, crypto, pci0, pci1.
+ *
+ * Copyright 2007-2009 Freescale Semiconductor Inc.
+ */
+
+/include/ "mpc8572ds.dts"
+
+/ {
+	model = "fsl,MPC8572DS";
+	compatible = "fsl,MPC8572DS", "fsl,MPC8572DS-CAMP";
+
+	cpus {
+		PowerPC,8572@0 {
+		};
+		PowerPC,8572@1 {
+			status = "disabled";
+		};
+	};
+
+	localbus@ffe05000 {
+		status = "disabled";
+	};
+
+	soc8572@ffe00000 {
+		serial@4600 {
+			status = "disabled";
+		};
+		dma@c300 {
+			status = "disabled";
+		};
+		gpio-controller@f000 {
+		};
+		l2-cache-controller@20000 {
+			cache-size = <0x80000>;	// L2, 512K
+		};
+		ethernet@26000 {
+			status = "disabled";
+		};
+		mdio@26520 {
+			status = "disabled";
+		};
+		ethernet@27000 {
+			status = "disabled";
+		};
+		mdio@27520 {
+			status = "disabled";
+		};
+		pic@40000 {
+			protected-sources = <
+			31 32 33 37 38 39       /* enet2 enet3 */
+			76 77 78 79 26 42	/* dma2 pci2 serial*/
+			0xe4 0xe5 0xe6 0xe7	/* msi */
+			>;
+		};
+
+		msi@41600 {
+			msi-available-ranges = <0 0x80>;
+			interrupts = <
+				0xe0 0 0 0
+				0xe1 0 0 0
+				0xe2 0 0 0
+				0xe3 0 0 0>;
+		};
+		timer@42100 {
+			status = "disabled";
+		};
+	};
+	pcie@ffe0a000 {
+		status = "disabled";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8572ds_camp_core1.dts b/arch/powerpc/boot/dts/fsl/mpc8572ds_camp_core1.dts
new file mode 100644
index 0000000000..63e8243ff3
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8572ds_camp_core1.dts
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8572 DS Core1 Device Tree Source in CAMP mode.
+ *
+ * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache
+ * can be shared, all the other devices must be assigned to one core only.
+ * This dts allows core1 to have l2, dma2, eth2, eth3, pci2, msi.
+ *
+ * Please note to add "-b 1" for core1's dts compiling.
+ *
+ * Copyright 2007-2009 Freescale Semiconductor Inc.
+ */
+
+/include/ "mpc8572ds.dts"
+
+/ {
+	model = "fsl,MPC8572DS";
+	compatible = "fsl,MPC8572DS", "fsl,MPC8572DS-CAMP";
+
+	cpus {
+		PowerPC,8572@0 {
+			status = "disabled";
+		};
+		PowerPC,8572@1 {
+		};
+	};
+
+	localbus@ffe05000 {
+		status = "disabled";
+	};
+
+	soc8572@ffe00000 {
+		ecm-law@0 {
+			status = "disabled";
+		};
+		ecm@1000 {
+			status = "disabled";
+		};
+		memory-controller@2000 {
+			status = "disabled";
+		};
+		memory-controller@6000 {
+			status = "disabled";
+		};
+		i2c@3000 {
+			status = "disabled";
+		};
+		i2c@3100 {
+			status = "disabled";
+		};
+		serial@4500 {
+			status = "disabled";
+		};
+		gpio-controller@f000 {
+			status = "disabled";
+		};
+		l2-cache-controller@20000 {
+			cache-size = <0x80000>;	// L2, 512K
+		};
+		dma@21300 {
+			status = "disabled";
+		};
+		ethernet@24000 {
+			status = "disabled";
+		};
+		ptp_clock@24e00 {
+			status = "disabled";
+		};
+		ethernet@25000 {
+			status = "disabled";
+		};
+		mdio@25520 {
+			status = "disabled";
+		};
+		crypto@30000 {
+			status = "disabled";
+		};
+		pic@40000 {
+			protected-sources = <
+			18 16 10 42 45 58	/* MEM L2 mdio serial crypto */
+			29 30 34 35 36 40	/* enet0 enet1 */
+			24 25 20 21 22 23	/* pci0 pci1 dma1 */
+			43			/* i2c */
+			0x1 0x2 0x3 0x4         /* pci slot */
+			0x9 0xa 0xb 0xc         /* usb */
+			0x6 0x7 0xe 0x5         /* Audio elgacy SATA */
+			0xe0 0xe1 0xe2 0xe3	/* msi */
+			>;
+		};
+		timer@41100 {
+			status = "disabled";
+		};
+		msi@41600 {
+			msi-available-ranges = <0x80 0x80>;
+			interrupts = <
+				0xe4 0 0 0
+				0xe5 0 0 0
+				0xe6 0 0 0
+				0xe7 0 0 0>;
+		};
+		global-utilities@e0000 {
+			status = "disabled";
+		};
+	};
+	pcie@ffe08000 {
+		status = "disabled";
+	};
+	pcie@ffe09000 {
+		status = "disabled";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi
new file mode 100644
index 0000000000..49294cf36b
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi
@@ -0,0 +1,196 @@
+/*
+ * MPC8572 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,mpc8572-elbc", "fsl,elbc", "simple-bus";
+	interrupts = <19 2 0 0>;
+};
+
+/* controller at 0x8000 */
+&pci0 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <24 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <24 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x8 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x9 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0xa 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0xb 0x1 0x0 0x0
+			>;
+	};
+};
+
+/* controller at 0x9000 */
+&pci1 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <25 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <25 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0
+			>;
+	};
+};
+
+/* controller at 0xa000 */
+&pci2 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <26 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <26 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1 0x0 0x0
+			>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "fsl,mpc8572-immr", "simple-bus";
+	bus-frequency = <0>;		// Filled out by uboot.
+
+	ecm-law@0 {
+		compatible = "fsl,ecm-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <12>;
+	};
+
+	ecm@1000 {
+		compatible = "fsl,mpc8572-ecm", "fsl,ecm";
+		reg = <0x1000 0x1000>;
+		interrupts = <17 2 0 0>;
+	};
+
+	memory-controller@2000 {
+		compatible = "fsl,mpc8572-memory-controller";
+		reg = <0x2000 0x1000>;
+		interrupts = <18 2 0 0>;
+	};
+
+	memory-controller@6000 {
+		compatible = "fsl,mpc8572-memory-controller";
+		reg = <0x6000 0x1000>;
+		interrupts = <18 2 0 0>;
+	};
+
+/include/ "pq3-i2c-0.dtsi"
+/include/ "pq3-i2c-1.dtsi"
+/include/ "pq3-duart-0.dtsi"
+/include/ "pq3-dma-1.dtsi"
+/include/ "pq3-gpio-0.dtsi"
+	gpio-controller@f000 {
+		compatible = "fsl,mpc8572-gpio";
+	};
+
+	L2: l2-cache-controller@20000 {
+		compatible = "fsl,mpc8572-l2-cache-controller";
+		reg = <0x20000 0x1000>;
+		cache-line-size = <32>;	// 32 bytes
+		cache-size = <0x100000>; // L2,1M
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-dma-0.dtsi"
+/include/ "pq3-etsec1-0.dtsi"
+/include/ "pq3-etsec1-timer-0.dtsi"
+
+	ptp_clock@24e00 {
+		interrupts = <68 2 0 0 69 2 0 0 70 2 0 0 71 2 0 0>;
+	};
+
+/include/ "pq3-etsec1-1.dtsi"
+/include/ "pq3-etsec1-2.dtsi"
+/include/ "pq3-etsec1-3.dtsi"
+/include/ "pq3-sec3.0-0.dtsi"
+/include/ "pq3-mpic.dtsi"
+/include/ "pq3-mpic-timer-B.dtsi"
+
+	global-utilities@e0000 {
+		compatible = "fsl,mpc8572-guts";
+		reg = <0xe0000 0x1000>;
+		fsl,has-rstcr;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8572si-pre.dtsi b/arch/powerpc/boot/dts/fsl/mpc8572si-pre.dtsi
new file mode 100644
index 0000000000..28c2a862be
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8572si-pre.dtsi
@@ -0,0 +1,73 @@
+/*
+ * MPC8572 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500v2_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,MPC8572";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial1;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8572@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+
+		PowerPC,8572@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			next-level-cache = <&L2>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8641si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8641si-post.dtsi
new file mode 100644
index 0000000000..77900b9241
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8641si-post.dtsi
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8641 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2016 Elettra-Sincrotrone Trieste S.C.p.A.
+ */
+
+&lbc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,mpc8641-localbus", "simple-bus";
+	interrupts = <19 2 0 0>;
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "fsl,mpc8641-soc", "simple-bus";
+	bus-frequency = <0>;
+
+	mcm-law@0 {
+		compatible = "fsl,mcm-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <10>;
+	};
+
+	mcm@1000 {
+		compatible = "fsl,mpc8641-mcm", "fsl,mcm";
+		reg = <0x1000 0x1000>;
+		interrupts = <17 2 0 0>;
+	};
+
+/include/ "pq3-i2c-0.dtsi"
+/include/ "pq3-i2c-1.dtsi"
+/include/ "pq3-duart-0.dtsi"
+	serial@4600 {
+		interrupts = <28 2 0 0>;
+	};
+/include/ "pq3-dma-0.dtsi"
+	dma@21300 {
+		compatible = "fsl,mpc8641-dma", "fsl,eloplus-dma";
+	};
+	dma-channel@0 {
+		compatible = "fsl,mpc8641-dma-channel", "fsl,eloplus-dma-channel";
+	};
+	dma-channel@80 {
+		compatible = "fsl,mpc8641-dma-channel", "fsl,eloplus-dma-channel";
+	};
+	dma-channel@100 {
+		compatible = "fsl,mpc8641-dma-channel", "fsl,eloplus-dma-channel";
+	};
+	dma-channel@180 {
+		compatible = "fsl,mpc8641-dma-channel", "fsl,eloplus-dma-channel";
+	};
+
+/include/ "pq3-etsec1-0.dtsi"
+	ethernet@24000 {
+		model = "TSEC";
+	};
+/include/ "pq3-etsec1-1.dtsi"
+	ethernet@25000 {
+		model = "TSEC";
+	};
+/include/ "pq3-etsec1-2.dtsi"
+	ethernet@26000 {
+		model = "TSEC";
+	};
+/include/ "pq3-etsec1-3.dtsi"
+	ethernet@27000 {
+		model = "TSEC";
+	};
+
+/include/ "qoriq-mpic.dtsi"
+	msi@41600 {
+		compatible = "fsl,mpc8641-msi", "fsl,mpic-msi";
+	};
+	msi@41800 {
+		compatible = "fsl,mpc8641-msi", "fsl,mpic-msi";
+	};
+	msi@41a00 {
+		compatible = "fsl,mpc8641-msi", "fsl,mpic-msi";
+	};
+
+	global-utilities@e0000 {
+		compatible = "fsl,mpc8641-guts";
+		reg = <0xe0000 0x1000>;
+		fsl,has-rstcr;
+	};
+};
+
+&pci0 {
+	compatible = "fsl,mpc8641-pcie";
+	device_type = "pci";
+	#interrupt-cells = <1>;
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	clock-frequency = <100000000>;
+	interrupts = <24 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <24 2 0 0>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			0x0000 0x0 0x0 0x1 &mpic 0x0 0x1 0x0 0x0
+			0x0000 0x0 0x0 0x2 &mpic 0x1 0x1 0x0 0x0
+			0x0000 0x0 0x0 0x3 &mpic 0x2 0x1 0x0 0x0
+			0x0000 0x0 0x0 0x4 &mpic 0x3 0x1 0x0 0x0
+			>;
+	};
+};
+
+&pci1 {
+	compatible = "fsl,mpc8641-pcie";
+	device_type = "pci";
+	#interrupt-cells = <1>;
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	clock-frequency = <100000000>;
+	interrupts = <25 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <25 2 0 0>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			0x0000 0x0 0x0 0x1 &mpic 0x4 0x1 0x0 0x0
+			0x0000 0x0 0x0 0x2 &mpic 0x5 0x1 0x0 0x0
+			0x0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0
+			0x0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0
+			>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8641si-pre.dtsi b/arch/powerpc/boot/dts/fsl/mpc8641si-pre.dtsi
new file mode 100644
index 0000000000..a9f7e79d33
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8641si-pre.dtsi
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8641 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2016 Elettra-Sincrotrone Trieste S.C.p.A.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+		pci1 = &pci1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8641@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <32768>;
+			i-cache-size = <32768>;
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+		};
+
+		PowerPC,8641@1 {
+			device_type = "cpu";
+			reg = <1>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <32768>;
+			i-cache-size = <32768>;
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mvme2500.dts b/arch/powerpc/boot/dts/fsl/mvme2500.dts
new file mode 100644
index 0000000000..e0f048a039
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mvme2500.dts
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Device tree source for the Emerson/Artesyn MVME2500
+ *
+ * Copyright 2014 Elettra-Sincrotrone Trieste S.C.p.A.
+ *
+ * Based on: P2020 DS Device Tree Source
+ * Copyright 2009 Freescale Semiconductor Inc.
+ */
+
+/include/ "p2020si-pre.dtsi"
+
+/ {
+	model = "MVME2500";
+	compatible = "artesyn,MVME2500";
+
+	aliases {
+		serial2 = &serial2;
+		serial3 = &serial3;
+		serial4 = &serial4;
+		serial5 = &serial5;
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	soc: soc@ffe00000 {
+		ranges = <0x0 0 0xffe00000 0x100000>;
+
+		i2c@3000 {
+			hwmon@4c {
+				compatible = "adi,adt7461";
+				reg = <0x4c>;
+			};
+
+			rtc@68 {
+				compatible = "dallas,ds1337";
+				reg = <0x68>;
+				interrupts = <8 1 0 0>;
+			};
+
+			eeprom@54 {
+				compatible = "atmel,24c64";
+				reg = <0x54>;
+			};
+
+			eeprom@52 {
+				compatible = "atmel,24c512";
+				reg = <0x52>;
+			};
+
+			eeprom@53 {
+				compatible = "atmel,24c512";
+				reg = <0x53>;
+			};
+
+			eeprom@50 {
+				compatible = "atmel,24c02";
+				reg = <0x50>;
+			};
+
+		};
+
+		spi0: spi@7000 {
+			fsl,espi-num-chipselects = <2>;
+
+			flash@0 {
+				compatible = "atmel,at25df641", "jedec,spi-nor";
+				reg = <0>;
+				spi-max-frequency = <10000000>;
+			};
+			flash@1 {
+				compatible = "atmel,at25df641", "jedec,spi-nor";
+				reg = <1>;
+				spi-max-frequency = <10000000>;
+			};
+		};
+
+		usb@22000 {
+			dr_mode = "host";
+			phy_type = "ulpi";
+		};
+
+		enet0: ethernet@24000 {
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy1>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		mdio@24520 {
+			phy1: ethernet-phy@1 {
+				compatible = "brcm,bcm54616S";
+				interrupts = <6 1 0 0>;
+				reg = <0x1>;
+			};
+
+			phy2: ethernet-phy@2 {
+				compatible = "brcm,bcm54616S";
+				interrupts = <6 1 0 0>;
+				reg = <0x2>;
+			};
+
+			phy3: ethernet-phy@3 {
+				compatible = "brcm,bcm54616S";
+				interrupts = <5 1 0 0>;
+				reg = <0x3>;
+			};
+
+			phy7: ethernet-phy@7 {
+				compatible = "brcm,bcm54616S";
+				interrupts = <7 1 0 0>;
+				reg = <0x7>;
+			};
+
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet1: ethernet@25000 {
+			tbi-handle = <&tbi1>;
+			phy-handle = <&phy7>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		mdio@25520 {
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet2: ethernet@26000 {
+			tbi-handle = <&tbi2>;
+			phy-handle = <&phy3>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		mdio@26520 {
+			tbi2: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+	};
+
+	lbc: localbus@ffe05000 {
+		reg = <0 0xffe05000 0 0x1000>;
+
+		ranges = <0x0 0x0 0x0 0xfff00000 0x00080000
+			  0x1 0x0 0x0 0xffc40000 0x00010000
+			  0x2 0x0 0x0 0xffc50000 0x00010000
+			  0x3 0x0 0x0 0xffc60000 0x00010000
+			  0x4 0x0 0x0 0xffc70000 0x00010000
+			  0x6 0x0 0x0 0xffc80000 0x00010000
+			  0x5 0x0 0x0 0xffdf0000 0x00008000>;
+
+		serial2: serial@1,0 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x1 0x0 0x100>;
+			clock-frequency = <1843200>;
+			interrupts = <11 2 0 0>;
+		};
+
+		serial3: serial@2,0 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x2 0x0 0x100>;
+			clock-frequency = <1843200>;
+			interrupts = <1 2 0 0>;
+		};
+
+		serial4: serial@3,0 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x3 0x0 0x100>;
+			clock-frequency = <1843200>;
+			interrupts = <2 2 0 0>;
+		};
+
+		serial5: serial@4,0 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4 0x0 0x100>;
+			clock-frequency = <1843200>;
+			interrupts = <3 2 0 0>;
+		};
+
+		mram@0,0 {
+			compatible = "everspin,mram", "mtd-ram";
+			reg = <0x0 0x0 0x80000>;
+			bank-width = <2>;
+		};
+
+		board-control@5,0 {
+			compatible = "artesyn,mvme2500-fpga";
+			reg = <0x5 0x0 0x01000>;
+		};
+
+		cpld@6,0 {
+			compatible = "artesyn,mvme2500-cpld";
+			reg = <0x6 0x0 0x10000>;
+			interrupts = <9 1 0 0>;
+		};
+	};
+
+	pci0: pcie@ffe08000 {
+		reg = <0 0xffe08000 0 0x1000>;
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+	};
+
+	pci1: pcie@ffe09000 {
+		reg = <0 0xffe09000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+
+	};
+
+	pci2: pcie@ffe0a000 {
+		reg = <0 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xc0000000
+				  0x2000000 0x0 0xc0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+	};
+};
+
+/include/ "p2020si-post.dtsi"
+
+/ {
+	soc@ffe00000 {
+		serial@4600 {
+			status = "disabled";
+		};
+
+		i2c@3100 {
+			status = "disabled";
+		};
+
+		sdhc@2e000 {
+			compatible = "fsl,p2020-esdhc", "fsl,esdhc";
+			non-removable;
+		};
+
+	};
+
+};
diff --git a/arch/powerpc/boot/dts/fsl/mvme7100.dts b/arch/powerpc/boot/dts/fsl/mvme7100.dts
new file mode 100644
index 0000000000..bcc9dedd63
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mvme7100.dts
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Device tree source for the Emerson/Artesyn MVME7100
+ *
+ * Copyright 2016 Elettra-Sincrotrone Trieste S.C.p.A.
+ *
+ * Author: Alessio Igor Bogani <alessio.bogani@elettra.eu>
+ */
+
+/include/ "mpc8641si-pre.dtsi"
+
+/ {
+	model = "MVME7100";
+	compatible = "artesyn,MVME7100";
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x80000000>;
+	};
+
+	soc: soc@f1000000 {
+		ranges = <0x00000000 0xf1000000 0x00100000>;
+
+		i2c@3000 {
+			hwmon@4c {
+				compatible = "dallas,max6649";
+				reg = <0x4c>;
+			};
+
+			rtc@68 {
+				status = "disabled";
+			};
+		};
+
+
+		enet0: ethernet@24000 {
+			phy-handle = <&phy0>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		mdio@24520 {
+			phy0: ethernet-phy@1 {
+				reg = <1>;
+			};
+			phy1: ethernet-phy@2 {
+				reg = <2>;
+			};
+			phy2: ethernet-phy@3 {
+				reg = <3>;
+			};
+			phy3: ethernet-phy@4 {
+				reg = <4>;
+			};
+		};
+
+		enet1: ethernet@25000 {
+			phy-handle = <&phy1>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		mdio@25520 {
+			status = "disabled";
+		};
+
+		enet2: ethernet@26000 {
+			phy-handle = <&phy2>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		mdio@26520 {
+			status = "disabled";
+		};
+
+		enet3: ethernet@27000 {
+			phy-handle = <&phy3>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		mdio@27520 {
+			status = "disabled";
+		};
+
+		serial1: serial@4600 {
+			status = "disabled";
+		};
+	};
+
+	lbc: localbus@f1005000 {
+		reg = <0xf1005000 0x1000>;
+
+		ranges = <0 0 0xf8000000 0x08000000	// NOR Flash (128MB)
+			  2 0 0xf2030000 0x00010000	// NAND Flash (8GB)
+			  3 0 0xf2400000 0x00080000	// MRAM (512KB)
+			  4 0 0xf2000000 0x00010000	// BCSR
+			  5 0 0xf2010000 0x00010000>;	// QUART
+
+		bcsr@4,0 {
+			compatible = "artesyn,mvme7100-bcsr";
+			reg = <4 0 0x10000>;
+		};
+
+		serial@5,1000 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <5 0x1000 0x100>;
+			clock-frequency = <1843200>;
+			interrupts = <11 1 0 0>;
+		};
+
+		serial@5,2000 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <5 0x2000 0x100>;
+			clock-frequency = <1843200>;
+			interrupts = <11 1 0 0>;
+		};
+
+		serial@5,3000 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <5 0x3000 0x100>;
+			clock-frequency = <1843200>;
+			interrupts = <11 1 0 0>;
+		};
+
+		serial@5,4000 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <5 0x4000 0x100>;
+			clock-frequency = <1843200>;
+			interrupts = <11 1 0 0>;
+		};
+	};
+
+	pci0: pcie@f1008000 {
+		status = "disabled";
+	};
+
+	pci1: pcie@f1009000 {
+		status = "disabled";
+	};
+
+	chosen {
+		stdout-path = &serial0;
+	};
+};
+
+/include/ "mpc8641si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/oca4080.dts b/arch/powerpc/boot/dts/fsl/oca4080.dts
new file mode 100644
index 0000000000..17bc6f3912
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/oca4080.dts
@@ -0,0 +1,145 @@
+/*
+ * OCA4080 Device Tree Source
+ *
+ * Copyright 2014 Prodrive Technologies B.V.
+ *
+ * Based on:
+ * P4080DS Device Tree Source
+ * Copyright 2009-2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p4080si-pre.dtsi"
+
+/ {
+	model = "fsl,OCA4080";
+	compatible = "fsl,OCA4080";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	memory {
+		device_type = "memory";
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
+	qportals: qman-portals@ff4200000 {
+		ranges = <0x0 0xf 0xf4200000 0x200000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+
+		i2c@118000 {
+			status = "disabled";
+		};
+
+		i2c@118100 {
+			status = "disabled";
+		};
+
+		i2c@119000 {
+			status = "disabled";
+		};
+
+		i2c@119100 {
+			status = "disabled";
+		};
+
+		usb0: usb@210000 {
+			status = "disabled";
+		};
+
+		usb1: usb@211000 {
+			status = "disabled";
+		};
+	};
+
+	rio: rapidio@ffe0c0000 {
+		reg = <0xf 0xfe0c0000 0 0x11000>;
+
+		port1 {
+			ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+		};
+	};
+
+	lbc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x1000>;
+		ranges = <0 0 0xf 0xef800000 0x800000>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x00800000>;
+			bank-width = <2>;
+			device-width = <2>;
+		};
+	};
+
+	pci0: pcie@ffe200000 {
+		status = "disabled";
+	};
+
+	pci1: pcie@ffe201000 {
+		status = "disabled";
+	};
+
+	pci2: pcie@ffe202000 {
+		status = "disabled";
+	};
+};
+
+/include/ "p4080si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb-pa.dts b/arch/powerpc/boot/dts/fsl/p1010rdb-pa.dts
new file mode 100644
index 0000000000..1e33d78d8c
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb-pa.dts
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * P1010 RDB Device Tree Source
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ */
+
+/include/ "p1010si-pre.dtsi"
+
+/ {
+	model = "fsl,P1010RDB";
+	compatible = "fsl,P1010RDB";
+
+	/include/ "p1010rdb_32b.dtsi"
+};
+
+/include/ "p1010rdb.dtsi"
+/include/ "p1010rdb-pa.dtsi"
+/include/ "p1010si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb-pa.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb-pa.dtsi
new file mode 100644
index 0000000000..434fb2d585
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb-pa.dtsi
@@ -0,0 +1,85 @@
+/*
+ * P1010 RDB Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&ifc_nand {
+	partition@0 {
+		/* This location must not be altered  */
+		/* 1MB for u-boot Bootloader Image */
+		reg = <0x0 0x00100000>;
+		label = "NAND U-Boot Image";
+		read-only;
+	};
+
+	partition@100000 {
+		/* 1MB for DTB Image */
+		reg = <0x00100000 0x00100000>;
+		label = "NAND DTB Image";
+	};
+
+	partition@200000 {
+		/* 4MB for Linux Kernel Image */
+		reg = <0x00200000 0x00400000>;
+		label = "NAND Linux Kernel Image";
+	};
+
+	partition@600000 {
+		/* 4MB for Compressed Root file System Image */
+		reg = <0x00600000 0x00400000>;
+		label = "NAND Compressed RFS Image";
+	};
+
+	partition@a00000 {
+		/* 15MB for JFFS2 based Root file System */
+		reg = <0x00a00000 0x00f00000>;
+		label = "NAND JFFS2 Root File System";
+	};
+
+	partition@1900000 {
+		/* 7MB for User Area */
+		reg = <0x01900000 0x00700000>;
+		label = "NAND User area";
+	};
+};
+
+&phy0 {
+	interrupts = <1 1 0 0>;
+};
+
+&phy1 {
+	interrupts = <2 1 0 0>;
+};
+
+&phy2 {
+	interrupts = <4 1 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb-pa_36b.dts b/arch/powerpc/boot/dts/fsl/p1010rdb-pa_36b.dts
new file mode 100644
index 0000000000..03bd76ca84
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb-pa_36b.dts
@@ -0,0 +1,46 @@
+/*
+ * P1010 RDB Device Tree Source (36-bit address map)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1010si-pre.dtsi"
+
+/ {
+	model = "fsl,P1010RDB";
+	compatible = "fsl,P1010RDB";
+
+	/include/ "p1010rdb_36b.dtsi"
+};
+
+/include/ "p1010rdb.dtsi"
+/include/ "p1010rdb-pa.dtsi"
+/include/ "p1010si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts b/arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts
new file mode 100644
index 0000000000..3a94acbb3c
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * P1010 RDB Device Tree Source
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ */
+
+/include/ "p1010si-pre.dtsi"
+
+/ {
+	model = "fsl,P1010RDB-PB";
+	compatible = "fsl,P1010RDB-PB";
+
+	/include/ "p1010rdb_32b.dtsi"
+};
+
+/include/ "p1010rdb.dtsi"
+
+&phy0 {
+	interrupts = <0 1 0 0>;
+};
+
+&phy1 {
+	interrupts = <2 1 0 0>;
+};
+
+&phy2 {
+	interrupts = <1 1 0 0>;
+};
+
+/include/ "p1010si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts b/arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts
new file mode 100644
index 0000000000..4cf255fedc
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts
@@ -0,0 +1,58 @@
+/*
+ * P1010 RDB Device Tree Source (36-bit address map)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1010si-pre.dtsi"
+
+/ {
+	model = "fsl,P1010RDB-PB";
+	compatible = "fsl,P1010RDB-PB";
+
+	/include/ "p1010rdb_36b.dtsi"
+};
+
+/include/ "p1010rdb.dtsi"
+
+&phy0 {
+	interrupts = <0 1 0 0>;
+};
+
+&phy1 {
+	interrupts = <2 1 0 0>;
+};
+
+&phy2 {
+	interrupts = <1 1 0 0>;
+};
+
+/include/ "p1010si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi
new file mode 100644
index 0000000000..2ca9cee2dd
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi
@@ -0,0 +1,233 @@
+/*
+ * P1010 RDB Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&board_ifc {
+	nor@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x2000000>;
+		bank-width = <2>;
+		device-width = <1>;
+
+		partition@40000 {
+			/* 256KB for DTB Image */
+			reg = <0x00040000 0x00040000>;
+			label = "NOR DTB Image";
+		};
+
+		partition@80000 {
+			/* 7 MB for Linux Kernel Image */
+			reg = <0x00080000 0x00700000>;
+			label = "NOR Linux Kernel Image";
+		};
+
+		partition@800000 {
+			/* 20MB for JFFS2 based Root file System */
+			reg = <0x00800000 0x01400000>;
+			label = "NOR JFFS2 Root File System";
+		};
+
+		partition@1f00000 {
+			/* This location must not be altered  */
+			/* 512KB for u-boot Bootloader Image */
+			/* 512KB for u-boot Environment Variables */
+			reg = <0x01f00000 0x00100000>;
+			label = "NOR U-Boot Image";
+			read-only;
+		};
+	};
+
+	ifc_nand: nand@1,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,ifc-nand";
+		reg = <0x1 0x0 0x10000>;
+	};
+
+	cpld@3,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,p1010rdb-cpld";
+		reg = <0x3 0x0 0x0000020>;
+		bank-width = <1>;
+		device-width = <1>;
+	};
+};
+
+&board_soc {
+	i2c@3000 {
+		eeprom@50 {
+			compatible = "st,24c256", "atmel,24c256";
+			reg = <0x50>;
+		};
+
+		rtc@68 {
+			compatible = "pericom,pt7c4338";
+			reg = <0x68>;
+		};
+	};
+
+	i2c@3100 {
+		eeprom@52 {
+			compatible = "atmel,24c01";
+			reg = <0x52>;
+		};
+	};
+
+	spi@7000 {
+		flash@0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "spansion,s25sl12801", "jedec,spi-nor";
+			reg = <0>;
+			spi-max-frequency = <40000000>;
+
+			partition@0 {
+				/* 1MB for u-boot Bootloader Image */
+				/* 1MB for Environment */
+				reg = <0x0 0x00100000>;
+				label = "SPI Flash U-Boot Image";
+				read-only;
+			};
+
+			partition@100000 {
+				/* 512KB for DTB Image */
+				reg = <0x00100000 0x00080000>;
+				label = "SPI Flash DTB Image";
+			};
+
+			partition@180000 {
+				/* 4MB for Linux Kernel Image */
+				reg = <0x00180000 0x00400000>;
+				label = "SPI Flash Linux Kernel Image";
+			};
+
+			partition@580000 {
+				/* 4MB for Compressed RFS Image */
+				reg = <0x00580000 0x00400000>;
+				label = "SPI Flash Compressed RFSImage";
+			};
+
+			partition@980000 {
+				/* 6.5MB for JFFS2 based RFS */
+				reg = <0x00980000 0x00680000>;
+				label = "SPI Flash JFFS2 RFS";
+			};
+		};
+	};
+
+	usb@22000 {
+		phy_type = "utmi";
+		dr_mode = "host";
+	};
+
+	mdio@24000 {
+		phy0: ethernet-phy@0 {
+			reg = <0x1>;
+		};
+
+		phy1: ethernet-phy@1 {
+			reg = <0x0>;
+		};
+
+		phy2: ethernet-phy@2 {
+			reg = <0x2>;
+		};
+
+		tbi-phy@3 {
+			device_type = "tbi-phy";
+			reg = <0x3>;
+		};
+	};
+
+	mdio@25000 {
+		tbi0: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	mdio@26000 {
+		tbi1: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	ptp_clock@b0e00 {
+		compatible = "fsl,etsec-ptp";
+		reg = <0xb0e00 0xb0>;
+		interrupts = <68 2 0 0 69 2 0 0>;
+		fsl,tclk-period	= <10>;
+		fsl,tmr-prsc	= <2>;
+		fsl,tmr-add	= <0x80000016>;
+		fsl,tmr-fiper1	= <999999990>;
+		fsl,tmr-fiper2	= <99990>;
+		fsl,max-adj	= <199999999>;
+	};
+
+	enet0: ethernet@b0000 {
+		phy-handle = <&phy0>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	enet1: ethernet@b1000 {
+		phy-handle = <&phy1>;
+		tbi-handle = <&tbi0>;
+		phy-connection-type = "sgmii";
+	};
+
+	enet2: ethernet@b2000 {
+		phy-handle = <&phy2>;
+		tbi-handle = <&tbi1>;
+		phy-connection-type = "sgmii";
+	};
+};
+
+&pci0 {
+	pcie@0 {
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			/*
+			 *irq[4:5] are active-high
+			 *irq[6:7] are active-low
+			 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x2 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x5 0x2 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0
+			>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi
new file mode 100644
index 0000000000..fdc19aab2f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi
@@ -0,0 +1,79 @@
+/*
+ * P1010 RDB Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+memory {
+	device_type = "memory";
+};
+
+board_ifc: ifc: ifc@ffe1e000 {
+	/* NOR, NAND Flashes and CPLD on board */
+	ranges = <0x0 0x0 0x0 0xee000000 0x02000000
+		  0x1 0x0 0x0 0xff800000 0x00010000
+		  0x3 0x0 0x0 0xffb00000 0x00000020>;
+	reg = <0x0 0xffe1e000 0 0x2000>;
+};
+
+board_soc: soc: soc@ffe00000 {
+	ranges = <0x0 0x0 0xffe00000 0x100000>;
+};
+
+pci0: pcie@ffe09000 {
+	reg = <0 0xffe09000 0 0x1000>;
+	ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+		  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+	pcie@0 {
+		ranges = <0x2000000 0x0 0xa0000000
+			  0x2000000 0x0 0xa0000000
+			  0x0 0x20000000
+
+			  0x1000000 0x0 0x0
+			  0x1000000 0x0 0x0
+			  0x0 0x100000>;
+	};
+};
+
+pci1: pcie@ffe0a000 {
+	reg = <0 0xffe0a000 0 0x1000>;
+	ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
+		  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+	pcie@0 {
+		ranges = <0x2000000 0x0 0x80000000
+			  0x2000000 0x0 0x80000000
+			  0x0 0x20000000
+
+			  0x1000000 0x0 0x0
+			  0x1000000 0x0 0x0
+			  0x0 0x100000>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi
new file mode 100644
index 0000000000..de2fceed4f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi
@@ -0,0 +1,79 @@
+/*
+ * P1010 RDB Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+memory {
+	device_type = "memory";
+};
+
+board_ifc: ifc: ifc@fffe1e000 {
+	/* NOR, NAND Flashes and CPLD on board */
+	ranges = <0x0 0x0 0xf 0xee000000 0x02000000
+		  0x1 0x0 0xf 0xff800000 0x00010000
+		  0x3 0x0 0xf 0xffb00000 0x00000020>;
+	reg = <0xf 0xffe1e000 0 0x2000>;
+};
+
+board_soc: soc: soc@fffe00000 {
+	ranges = <0x0 0xf 0xffe00000 0x100000>;
+};
+
+pci0: pcie@fffe09000 {
+	reg = <0xf 0xffe09000 0 0x1000>;
+	ranges = <0x2000000 0x0 0xc0000000 0xc 0x20000000 0x0 0x20000000
+		  0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>;
+	pcie@0 {
+		ranges = <0x2000000 0x0 0xc0000000
+			  0x2000000 0x0 0xc0000000
+			  0x0 0x20000000
+
+			  0x1000000 0x0 0x0
+			  0x1000000 0x0 0x0
+			  0x0 0x100000>;
+	};
+};
+
+pci1: pcie@fffe0a000 {
+	reg = <0xf 0xffe0a000 0 0x1000>;
+	ranges = <0x2000000 0x0 0xc0000000 0xc 0x20000000 0x0 0x20000000
+		  0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>;
+	pcie@0 {
+		ranges = <0x2000000 0x0 0xc0000000
+			  0x2000000 0x0 0xc0000000
+			  0x0 0x20000000
+
+			  0x1000000 0x0 0x0
+			  0x1000000 0x0 0x0
+			  0x0 0x100000>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
new file mode 100644
index 0000000000..ccda0a91ab
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
@@ -0,0 +1,191 @@
+/*
+ * P1010/P1014 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&ifc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,ifc", "simple-bus";
+	interrupts = <16 2 0 0 19 2 0 0>;
+};
+
+/* controller at 0x9000 */
+&pci0 {
+	compatible = "fsl,p1010-pcie", "fsl,qoriq-pcie-v2.3";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0
+			>;
+	};
+};
+
+/* controller at 0xa000 */
+&pci1 {
+	compatible = "fsl,p1010-pcie", "fsl,qoriq-pcie-v2.3";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1 0x0 0x0
+			>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "fsl,p1010-immr", "simple-bus";
+	bus-frequency = <0>;		// Filled out by uboot.
+
+	ecm-law@0 {
+		compatible = "fsl,ecm-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <12>;
+	};
+
+	ecm@1000 {
+		compatible = "fsl,p1010-ecm", "fsl,ecm";
+		reg = <0x1000 0x1000>;
+		interrupts = <16 2 0 0>;
+	};
+
+	memory-controller@2000 {
+		compatible = "fsl,p1010-memory-controller";
+		reg = <0x2000 0x1000>;
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-i2c-0.dtsi"
+	i2c@3000 {
+		fsl,i2c-erratum-a004447;
+	};
+
+/include/ "pq3-i2c-1.dtsi"
+	i2c@3100 {
+		fsl,i2c-erratum-a004447;
+	};
+
+/include/ "pq3-duart-0.dtsi"
+/include/ "pq3-espi-0.dtsi"
+	spi0: spi@7000 {
+		fsl,espi-num-chipselects = <1>;
+	};
+
+/include/ "pq3-gpio-0.dtsi"
+/include/ "pq3-sata2-0.dtsi"
+/include/ "pq3-sata2-1.dtsi"
+
+	can0: can@1c000 {
+		compatible = "fsl,p1010-flexcan";
+		reg = <0x1c000 0x1000>;
+		interrupts = <48 0x2 0 0>;
+		big-endian;
+	};
+
+	can1: can@1d000 {
+		compatible = "fsl,p1010-flexcan";
+		reg = <0x1d000 0x1000>;
+		interrupts = <61 0x2 0 0>;
+		big-endian;
+	};
+
+	L2: l2-cache-controller@20000 {
+		compatible = "fsl,p1010-l2-cache-controller",
+				"fsl,p1014-l2-cache-controller";
+		reg = <0x20000 0x1000>;
+		cache-line-size = <32>;	// 32 bytes
+		cache-size = <0x40000>; // L2,256K
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-dma-0.dtsi"
+/include/ "pq3-usb2-dr-0.dtsi"
+	usb@22000 {
+		compatible = "fsl-usb2-dr-v1.6", "fsl-usb2-dr";
+	};
+/include/ "pq3-esdhc-0.dtsi"
+	sdhc@2e000 {
+		compatible = "fsl,p1010-esdhc", "fsl,esdhc";
+		sdhci,auto-cmd12;
+	};
+
+/include/ "pq3-sec4.4-0.dtsi"
+/include/ "pq3-mpic.dtsi"
+/include/ "pq3-mpic-timer-B.dtsi"
+
+/include/ "pq3-etsec2-0.dtsi"
+/include/ "pq3-etsec2-1.dtsi"
+/include/ "pq3-etsec2-2.dtsi"
+
+	global-utilities@e0000 {
+		compatible = "fsl,p1010-guts";
+		reg = <0xe0000 0x1000>;
+		fsl,has-rstcr;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1010si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-pre.dtsi
new file mode 100644
index 0000000000..6e76f9b282
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1010si-pre.dtsi
@@ -0,0 +1,67 @@
+/*
+ * P1010/P1014 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500v2_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,P1010";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial1;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		can0 = &can0;
+		can1 = &can1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,P1010@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1020mbg-pc.dtsi b/arch/powerpc/boot/dts/fsl/p1020mbg-pc.dtsi
new file mode 100644
index 0000000000..a24699cfea
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1020mbg-pc.dtsi
@@ -0,0 +1,151 @@
+/*
+ * P1020 MBG-PC Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+	nor@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x4000000>;
+		bank-width = <2>;
+		device-width = <1>;
+
+		partition@0 {
+			/* 128KB for DTB Image */
+			reg = <0x0 0x00020000>;
+			label = "NOR DTB Image";
+		};
+
+		partition@20000 {
+			/* 3.875 MB for Linux Kernel Image */
+			reg = <0x00020000 0x003e0000>;
+			label = "NOR Linux Kernel Image";
+		};
+
+		partition@400000 {
+			/* 58MB for Root file System */
+			reg = <0x00400000 0x03a00000>;
+			label = "NOR Root File System";
+		};
+
+		partition@3e00000 {
+			/* This location must not be altered  */
+			/* 1M for Vitesse 7385 Switch firmware */
+			reg = <0x3e00000 0x00100000>;
+			label = "NOR Vitesse-7385 Firmware";
+			read-only;
+		};
+
+		partition@3f00000 {
+			/* This location must not be altered  */
+			/* 512KB for u-boot Bootloader Image */
+			/* 512KB for u-boot Environment Variables */
+			reg = <0x03f00000 0x00100000>;
+			label = "NOR U-Boot Image";
+			read-only;
+		};
+	};
+
+	L2switch@2,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "vitesse-7385";
+		reg = <0x2 0x0 0x20000>;
+	};
+};
+
+&soc {
+	i2c@3000 {
+		rtc@68 {
+			compatible = "dallas,ds1339";
+			reg = <0x68>;
+		};
+	};
+
+	mdio@24000 {
+		phy0: ethernet-phy@0 {
+			interrupts = <3 1 0 0>;
+			reg = <0x0>;
+		};
+		phy1: ethernet-phy@1 {
+			interrupts = <2 1 0 0>;
+			reg = <0x1>;
+		};
+	};
+
+	mdio@25000 {
+		tbi1: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	mdio@26000 {
+		tbi2: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	enet0: ethernet@b0000 {
+		fixed-link = <1 1 1000 0 0>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	enet1: ethernet@b1000 {
+		phy-handle = <&phy0>;
+		tbi-handle = <&tbi1>;
+		phy-connection-type = "sgmii";
+	};
+
+	enet2: ethernet@b2000 {
+		phy-handle = <&phy1>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	usb@22000 {
+		phy_type = "ulpi";
+	};
+
+	/* USB2 is shared with localbus, so it must be disabled
+	   by default. We can't put 'status = "disabled";' here
+	   since U-Boot doesn't clear the status property when
+	   it enables USB2. OTOH, U-Boot does create a new node
+	   when there isn't any. So, just comment it out.
+	*/
+	usb@23000 {
+		status = "disabled";
+		phy_type = "ulpi";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1020mbg-pc_32b.dts b/arch/powerpc/boot/dts/fsl/p1020mbg-pc_32b.dts
new file mode 100644
index 0000000000..b29d1fcb5e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1020mbg-pc_32b.dts
@@ -0,0 +1,89 @@
+/*
+ * P1020 MBG-PC Device Tree Source (32-bit address map)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1020si-pre.dtsi"
+/ {
+	model = "fsl,P1020MBG-PC";
+	compatible = "fsl,P1020MBG-PC";
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@ffe05000 {
+		reg = <0x0 0xffe05000 0x0 0x1000>;
+
+		/* NOR and L2 switch */
+		ranges = <0x0 0x0 0x0 0xec000000 0x04000000
+			  0x1 0x0 0x0 0xffa00000 0x00040000
+			  0x2 0x0 0x0 0xffb00000 0x00020000>;
+	};
+
+	soc: soc@ffe00000 {
+		ranges = <0x0 0x0 0xffe00000 0x100000>;
+	};
+
+	pci0: pcie@ffe09000 {
+		reg = <0x0 0xffe09000 0x0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0x0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0x0 0xffc10000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@ffe0a000 {
+		reg = <0x0 0xffe0a000 0x0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0x0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0x0 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "p1020mbg-pc.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1020mbg-pc_36b.dts b/arch/powerpc/boot/dts/fsl/p1020mbg-pc_36b.dts
new file mode 100644
index 0000000000..678d0eec24
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1020mbg-pc_36b.dts
@@ -0,0 +1,89 @@
+/*
+ * P1020 MBG-PC Device Tree Source (36-bit address map)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1020si-pre.dtsi"
+/ {
+	model = "fsl,P1020MBG-PC";
+	compatible = "fsl,P1020MBG-PC";
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@fffe05000 {
+		reg = <0xf 0xffe05000 0x0 0x1000>;
+
+		/* NOR and L2 switch */
+		ranges = <0x0 0x0 0xf 0xec000000 0x04000000
+			  0x1 0x0 0xf 0xffa00000 0x00040000
+			  0x2 0x0 0xf 0xffb00000 0x00020000>;
+	};
+
+	soc: soc@fffe00000 {
+		ranges = <0x0 0xf 0xffe00000 0x100000>;
+	};
+
+	pci0: pcie@fffe09000 {
+		reg = <0xf 0xffe09000 0x0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@fffe0a000 {
+		reg = <0xf 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "p1020mbg-pc.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1020rdb-pc.dtsi b/arch/powerpc/boot/dts/fsl/p1020rdb-pc.dtsi
new file mode 100644
index 0000000000..a13876c05c
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1020rdb-pc.dtsi
@@ -0,0 +1,247 @@
+/*
+ * P1020 RDB-PC Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+	nor@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x1000000>;
+		bank-width = <2>;
+		device-width = <1>;
+
+		partition@0 {
+			/* This location must not be altered  */
+			/* 256KB for Vitesse 7385 Switch firmware */
+			reg = <0x0 0x00040000>;
+			label = "NOR Vitesse-7385 Firmware";
+			read-only;
+		};
+
+		partition@40000 {
+			/* 256KB for DTB Image */
+			reg = <0x00040000 0x00040000>;
+			label = "NOR DTB Image";
+		};
+
+		partition@80000 {
+			/* 3.5 MB for Linux Kernel Image */
+			reg = <0x00080000 0x00380000>;
+			label = "NOR Linux Kernel Image";
+		};
+
+		partition@400000 {
+			/* 11MB for JFFS2 based Root file System */
+			reg = <0x00400000 0x00b00000>;
+			label = "NOR JFFS2 Root File System";
+		};
+
+		partition@f00000 {
+			/* This location must not be altered  */
+			/* 512KB for u-boot Bootloader Image */
+			/* 512KB for u-boot Environment Variables */
+			reg = <0x00f00000 0x00100000>;
+			label = "NOR U-Boot Image";
+			read-only;
+		};
+	};
+
+	nand@1,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,p1020-fcm-nand",
+			     "fsl,elbc-fcm-nand";
+		reg = <0x1 0x0 0x40000>;
+
+		partition@0 {
+			/* This location must not be altered  */
+			/* 1MB for u-boot Bootloader Image */
+			reg = <0x0 0x00100000>;
+			label = "NAND U-Boot Image";
+			read-only;
+		};
+
+		partition@100000 {
+			/* 1MB for DTB Image */
+			reg = <0x00100000 0x00100000>;
+			label = "NAND DTB Image";
+		};
+
+		partition@200000 {
+			/* 4MB for Linux Kernel Image */
+			reg = <0x00200000 0x00400000>;
+			label = "NAND Linux Kernel Image";
+		};
+
+		partition@600000 {
+			/* 4MB for Compressed Root file System Image */
+			reg = <0x00600000 0x00400000>;
+			label = "NAND Compressed RFS Image";
+		};
+
+		partition@a00000 {
+			/* 7MB for JFFS2 based Root file System */
+			reg = <0x00a00000 0x00700000>;
+			label = "NAND JFFS2 Root File System";
+		};
+
+		partition@1100000 {
+			/* 15MB for JFFS2 based Root file System */
+			reg = <0x01100000 0x00f00000>;
+			label = "NAND Writable User area";
+		};
+	};
+
+	L2switch@2,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "vitesse-7385";
+		reg = <0x2 0x0 0x20000>;
+	};
+
+	cpld@3,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cpld";
+		reg = <0x3 0x0 0x20000>;
+		read-only;
+	};
+};
+
+&soc {
+	i2c@3000 {
+		rtc@68 {
+			compatible = "pericom,pt7c4338";
+			reg = <0x68>;
+		};
+	};
+
+	spi@7000 {
+		flash@0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "spansion,s25sl12801", "jedec,spi-nor";
+			reg = <0>;
+			spi-max-frequency = <40000000>; /* input clock */
+
+			partition@u-boot {
+				/* 512KB for u-boot Bootloader Image */
+				reg = <0x0 0x00080000>;
+				label = "u-boot";
+				read-only;
+			};
+
+			partition@dtb {
+				/* 512KB for DTB Image*/
+				reg = <0x00080000 0x00080000>;
+				label = "dtb";
+			};
+
+			partition@kernel {
+				/* 4MB for Linux Kernel Image */
+				reg = <0x00100000 0x00400000>;
+				label = "kernel";
+			};
+
+			partition@fs {
+				/* 4MB for Compressed RFS Image */
+				reg = <0x00500000 0x00400000>;
+				label = "file system";
+			};
+
+			partition@jffs-fs {
+				/* 7MB for JFFS2 based RFS */
+				reg = <0x00900000 0x00700000>;
+				label = "file system jffs2";
+			};
+		};
+	};
+
+	usb@22000 {
+		phy_type = "ulpi";
+	};
+
+	/* USB2 is shared with localbus, so it must be disabled
+	   by default. We can't put 'status = "disabled";' here
+	   since U-Boot doesn't clear the status property when
+	   it enables USB2. OTOH, U-Boot does create a new node
+	   when there isn't any. So, just comment it out.
+	usb@23000 {
+		phy_type = "ulpi";
+	};
+	*/
+
+	mdio@24000 {
+		phy0: ethernet-phy@0 {
+			interrupt-parent = <&mpic>;
+			interrupts = <3 1 0 0>;
+			reg = <0x0>;
+		};
+
+		phy1: ethernet-phy@1 {
+			interrupt-parent = <&mpic>;
+			interrupts = <2 1 0 0>;
+			reg = <0x1>;
+		};
+
+		tbi0: tbi-phy@11 {
+			device_type = "tbi-phy";
+			reg = <0x11>;
+		};
+	};
+
+	mdio@25000 {
+		tbi1: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	enet0: ethernet@b0000 {
+		fixed-link = <1 1 1000 0 0>;
+		phy-connection-type = "rgmii-id";
+
+	};
+
+	enet1: ethernet@b1000 {
+		phy-handle = <&phy0>;
+		tbi-handle = <&tbi1>;
+		phy-connection-type = "sgmii";
+	};
+
+	enet2: ethernet@b2000 {
+		phy-handle = <&phy1>;
+		phy-connection-type = "rgmii-id";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1020rdb-pc_32b.dts b/arch/powerpc/boot/dts/fsl/p1020rdb-pc_32b.dts
new file mode 100644
index 0000000000..8175bf6f3e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1020rdb-pc_32b.dts
@@ -0,0 +1,90 @@
+/*
+ * P1020 RDB-PC Device Tree Source (32-bit address map)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1020si-pre.dtsi"
+/ {
+	model = "fsl,P1020RDB-PC";
+	compatible = "fsl,P1020RDB-PC";
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@ffe05000 {
+		reg = <0 0xffe05000 0 0x1000>;
+
+		/* NOR, NAND Flashes and Vitesse 5 port L2 switch */
+		ranges = <0x0 0x0 0x0 0xef000000 0x01000000
+			  0x1 0x0 0x0 0xff800000 0x00040000
+			  0x2 0x0 0x0 0xffb00000 0x00020000
+			  0x3 0x0 0x0 0xffa00000 0x00020000>;
+	};
+
+	soc: soc@ffe00000 {
+		ranges = <0x0 0x0 0xffe00000 0x100000>;
+	};
+
+	pci0: pcie@ffe09000 {
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		reg = <0 0xffe09000 0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@ffe0a000 {
+		reg = <0 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "p1020rdb-pc.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1020rdb-pc_36b.dts b/arch/powerpc/boot/dts/fsl/p1020rdb-pc_36b.dts
new file mode 100644
index 0000000000..01c3057951
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1020rdb-pc_36b.dts
@@ -0,0 +1,90 @@
+/*
+ * P1020 RDB-PC Device Tree Source (36-bit address map)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1020si-pre.dtsi"
+/ {
+	model = "fsl,P1020RDB-PC";
+	compatible = "fsl,P1020RDB-PC";
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@fffe05000 {
+		reg = <0xf 0xffe05000 0 0x1000>;
+
+		/* NOR, NAND Flashes and Vitesse 5 port L2 switch */
+		ranges = <0x0 0x0 0xf 0xef000000 0x01000000
+			  0x1 0x0 0xf 0xff800000 0x00040000
+			  0x2 0x0 0xf 0xffb00000 0x00040000
+			  0x3 0x0 0xf 0xffa00000 0x00020000>;
+	};
+
+	soc: soc@fffe00000 {
+		ranges = <0x0 0xf 0xffe00000 0x100000>;
+	};
+
+	pci0: pcie@fffe09000 {
+		reg = <0xf 0xffe09000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xc0000000 0xc 0x20000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xc0000000
+				  0x2000000 0x0 0xc0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@fffe0a000 {
+		reg = <0xf 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0x80000000 0xc 0x00000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "p1020rdb-pc.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1020rdb-pc_camp_core0.dts b/arch/powerpc/boot/dts/fsl/p1020rdb-pc_camp_core0.dts
new file mode 100644
index 0000000000..42e1e2fc08
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1020rdb-pc_camp_core0.dts
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * P1020 RDB-PC  Core0 Device Tree Source in CAMP mode.
+ *
+ * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache
+ * can be shared, all the other devices must be assigned to one core only.
+ * This dts file allows core0 to have memory, l2, i2c, spi, gpio, tdm, dma, usb,
+ * eth1, eth2, sdhc, crypto, global-util, message, pci0, pci1, msi.
+ *
+ * Please note to add "-b 0" for core0's dts compiling.
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ */
+
+/include/ "p1020rdb-pc_32b.dts"
+
+/ {
+	model = "fsl,P1020RDB-PC";
+	compatible = "fsl,P1020RDB-PC";
+
+	aliases {
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		serial0 = &serial0;
+		pci0 = &pci0;
+		pci1 = &pci1;
+	};
+
+	cpus {
+		PowerPC,P1020@1 {
+			status = "disabled";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	localbus@ffe05000 {
+		status = "disabled";
+	};
+
+	soc@ffe00000 {
+		serial1: serial@4600 {
+			status = "disabled";
+		};
+
+		enet0: ethernet@b0000 {
+			status = "disabled";
+		};
+
+		mpic: pic@40000 {
+			protected-sources = <
+			42 29 30 34	/* serial1, enet0-queue-group0 */
+			17 18 24 45	/* enet0-queue-group1, crypto */
+			>;
+			pic-no-reset;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1020rdb-pc_camp_core1.dts b/arch/powerpc/boot/dts/fsl/p1020rdb-pc_camp_core1.dts
new file mode 100644
index 0000000000..da9a8e73b3
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1020rdb-pc_camp_core1.dts
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * P1020 RDB-PC Core1 Device Tree Source in CAMP mode.
+ *
+ * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache
+ * can be shared, all the other devices must be assigned to one core only.
+ * This dts allows core1 to have l2, eth0, crypto.
+ *
+ * Please note to add "-b 1" for core1's dts compiling.
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ */
+
+/include/ "p1020rdb-pc_32b.dts"
+
+/ {
+	model = "fsl,P1020RDB-PC";
+	compatible = "fsl,P1020RDB-PC";
+
+	aliases {
+		ethernet0 = &enet0;
+		serial0 = &serial1;
+		};
+
+	cpus {
+		PowerPC,P1020@0 {
+			status = "disabled";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	localbus@ffe05000 {
+		status = "disabled";
+	};
+
+	soc@ffe00000 {
+		ecm-law@0 {
+			status = "disabled";
+		};
+
+		ecm@1000 {
+			status = "disabled";
+		};
+
+		memory-controller@2000 {
+			status = "disabled";
+		};
+
+		i2c@3000 {
+			status = "disabled";
+		};
+
+		i2c@3100 {
+			status = "disabled";
+		};
+
+		serial0: serial@4500 {
+			status = "disabled";
+		};
+
+		spi@7000 {
+			status = "disabled";
+		};
+
+		gpio: gpio-controller@f000 {
+			status = "disabled";
+		};
+
+		dma@21300 {
+			status = "disabled";
+		};
+
+		mdio@24000 {
+			status = "disabled";
+		};
+
+		mdio@25000 {
+			status = "disabled";
+		};
+
+		enet1: ethernet@b1000 {
+			status = "disabled";
+		};
+
+		enet2: ethernet@b2000 {
+			status = "disabled";
+		};
+
+		usb@22000 {
+			status = "disabled";
+		};
+
+		sdhci@2e000 {
+			status = "disabled";
+		};
+
+		mpic: pic@40000 {
+			protected-sources = <
+			16 		/* ecm, mem, L2, pci0, pci1 */
+			43 42 59	/* i2c, serial0, spi */
+			47 63 62 	/* gpio, tdm */
+			20 21 22 23	/* dma */
+			03 02 		/* mdio */
+			35 36 40	/* enet1-queue-group0 */
+			51 52 67	/* enet1-queue-group1 */
+			31 32 33	/* enet2-queue-group0 */
+			25 26 27	/* enet2-queue-group1 */
+			28 72 58 	/* usb, sdhci, crypto */
+			0xb0 0xb1 0xb2	/* message */
+			0xb3 0xb4 0xb5
+			0xb6 0xb7
+			0xe0 0xe1 0xe2	/* msi */
+			0xe3 0xe4 0xe5
+			0xe6 0xe7		/* sdhci, crypto , pci */
+			>;
+			pic-no-reset;
+		};
+
+		msi@41600 {
+			status = "disabled";
+		};
+
+		global-utilities@e0000 {	//global utilities block
+			status = "disabled";
+		};
+	};
+
+	pci0: pcie@ffe09000 {
+		status = "disabled";
+	};
+
+	pci1: pcie@ffe0a000 {
+		status = "disabled";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1020rdb-pd.dts b/arch/powerpc/boot/dts/fsl/p1020rdb-pd.dts
new file mode 100644
index 0000000000..f2dc6c09be
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1020rdb-pd.dts
@@ -0,0 +1,292 @@
+/*
+ * P1020 RDB-PD Device Tree Source (32-bit address map)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1020si-pre.dtsi"
+/ {
+	model = "fsl,P1020RDB-PD";
+	compatible = "fsl,P1020RDB-PD";
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@ffe05000 {
+		reg = <0x0 0xffe05000 0x0 0x1000>;
+
+		/* NOR, NAND flash, L2 switch and CPLD */
+		ranges = <0x0 0x0 0x0 0xec000000 0x04000000
+			  0x1 0x0 0x0 0xff800000 0x00040000
+			  0x2 0x0 0x0 0xffa00000 0x00020000
+			  0x3 0x0 0x0 0xffb00000 0x00020000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x4000000>;
+			bank-width = <2>;
+			device-width = <1>;
+
+			partition@0 {
+				/* 128KB for DTB Image */
+				reg = <0x0 0x00020000>;
+				label = "NOR DTB Image";
+			};
+
+			partition@20000 {
+				/* 3.875 MB for Linux Kernel Image */
+				reg = <0x00020000 0x003e0000>;
+				label = "NOR Linux Kernel Image";
+			};
+
+			partition@400000 {
+				/* 58MB for Root file System */
+				reg = <0x00400000 0x03a00000>;
+				label = "NOR Root File System";
+			};
+
+			partition@3e00000 {
+				/* This location must not be altered  */
+				/* 1M for Vitesse 7385 Switch firmware */
+				reg = <0x3e00000 0x00100000>;
+				label = "NOR Vitesse-7385 Firmware";
+				read-only;
+			};
+
+			partition@3f00000 {
+				/* This location must not be altered  */
+				/* 512KB for u-boot Bootloader Image */
+				/* 512KB for u-boot Environment Variables */
+				reg = <0x03f00000 0x00100000>;
+				label = "NOR U-Boot Image";
+				read-only;
+			};
+		};
+
+		nand@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,p1020-fcm-nand",
+				     "fsl,elbc-fcm-nand";
+			reg = <0x1 0x0 0x40000>;
+
+			partition@0 {
+				/* This location must not be altered  */
+				/* 1MB for u-boot Bootloader Image */
+				reg = <0x0 0x00100000>;
+				label = "NAND U-Boot Image";
+				read-only;
+			};
+
+			partition@100000 {
+				/* 1MB for DTB Image */
+				reg = <0x00100000 0x00100000>;
+				label = "NAND DTB Image";
+			};
+
+			partition@200000 {
+				/* 4MB for Linux Kernel Image */
+				reg = <0x00200000 0x00400000>;
+				label = "NAND Linux Kernel Image";
+			};
+
+			partition@600000 {
+				/* 122MB for File System Image */
+				reg = <0x00600000 0x07a00000>;
+				label = "NAND File System Image";
+			};
+		};
+
+		cpld@2,0 {
+			compatible = "fsl,p1020rdb-pd-cpld";
+			reg = <0x2 0x0 0x20000>;
+		};
+
+		L2switch@3,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "vitesse-7385";
+			reg = <0x3 0x0 0x20000>;
+		};
+	};
+
+	soc: soc@ffe00000 {
+		ranges = <0x0 0x0 0xffe00000 0x100000>;
+
+		i2c@3000 {
+			rtc@68 {
+				compatible = "dallas,ds1339";
+				reg = <0x68>;
+			};
+		};
+
+		spi@7000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "spansion,s25sl12801", "jedec,spi-nor";
+				reg = <0>;
+				/* input clock */
+				spi-max-frequency = <40000000>;
+
+				partition@0 {
+					/* 512KB for u-boot Bootloader Image */
+					reg = <0x0 0x00080000>;
+					label = "SPI U-Boot Image";
+					read-only;
+				};
+
+				partition@80000 {
+					/* 512KB for DTB Image*/
+					reg = <0x00080000 0x00080000>;
+					label = "SPI DTB Image";
+				};
+
+				partition@100000 {
+					/* 4MB for Linux Kernel Image */
+					reg = <0x00100000 0x00400000>;
+					label = "SPI Linux Kernel Image";
+				};
+
+				partition@500000 {
+					/* 11MB for FS System Image */
+					reg = <0x00500000 0x00b00000>;
+					label = "SPI File System Image";
+				};
+			};
+
+			slic@0 {
+				compatible = "zarlink,le88266";
+				reg = <1>;
+				spi-max-frequency = <8000000>;
+			};
+
+			slic@1 {
+				compatible = "zarlink,le88266";
+				reg = <2>;
+				spi-max-frequency = <8000000>;
+			};
+		};
+
+		mdio@24000 {
+			phy0: ethernet-phy@0 {
+				interrupts = <3 1 0 0>;
+				reg = <0x0>;
+			};
+
+			phy1: ethernet-phy@1 {
+				interrupts = <2 1 0 0>;
+				reg = <0x1>;
+			};
+		};
+
+		mdio@25000 {
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@26000 {
+			tbi2: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		ptp_clock@b0e00 {
+			compatible = "fsl,etsec-ptp";
+			reg = <0xb0e00 0xb0>;
+			interrupts = <68 2 0 0 69 2 0 0>;
+			fsl,tclk-period	= <10>;
+			fsl,tmr-prsc	= <2>;
+			fsl,tmr-add	= <0x80000016>;
+			fsl,tmr-fiper1	= <999999990>;
+			fsl,tmr-fiper2	= <99990>;
+			fsl,max-adj	= <199999999>;
+		};
+
+		enet0: ethernet@b0000 {
+			fixed-link = <1 1 1000 0 0>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		enet1: ethernet@b1000 {
+			phy-handle = <&phy0>;
+			tbi-handle = <&tbi1>;
+			phy-connection-type = "sgmii";
+		};
+
+		enet2: ethernet@b2000 {
+			phy-handle = <&phy1>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		usb@22000 {
+			phy_type = "ulpi";
+		};
+	};
+
+	pci0: pcie@ffe09000 {
+		reg = <0x0 0xffe09000 0x0 0x1000>;
+		ranges = <0x2000000 0x0 0xa0000000 0x0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0x0 0xffc10000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@ffe0a000 {
+		reg = <0x0 0xffe0a000 0x0 0x1000>;
+		ranges = <0x2000000 0x0 0x80000000 0x0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0x0 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1020rdb.dts b/arch/powerpc/boot/dts/fsl/p1020rdb.dts
new file mode 100644
index 0000000000..1a8d81ee41
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1020rdb.dts
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * P1020 RDB Device Tree Source
+ *
+ * Copyright 2009-2011 Freescale Semiconductor Inc.
+ */
+
+/include/ "p1020si-pre.dtsi"
+/ {
+	model = "fsl,P1020RDB";
+	compatible = "fsl,P1020RDB";
+
+	memory {
+		device_type = "memory";
+	};
+
+	board_lbc: lbc: localbus@ffe05000 {
+		reg = <0 0xffe05000 0 0x1000>;
+
+		/* NOR, NAND Flashes and Vitesse 5 port L2 switch */
+		ranges = <0x0 0x0 0x0 0xef000000 0x01000000
+			  0x1 0x0 0x0 0xffa00000 0x00040000
+			  0x2 0x0 0x0 0xffb00000 0x00020000>;
+	};
+
+	board_soc: soc: soc@ffe00000 {
+		ranges = <0x0 0x0 0xffe00000 0x100000>;
+	};
+
+	pci0: pcie@ffe09000 {
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		reg = <0 0xffe09000 0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@ffe0a000 {
+		reg = <0 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "p1020rdb.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1020rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1020rdb.dtsi
new file mode 100644
index 0000000000..703142ee66
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1020rdb.dtsi
@@ -0,0 +1,246 @@
+/*
+ * P1020 RDB Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2011-2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&board_lbc {
+	nor@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x1000000>;
+		bank-width = <2>;
+		device-width = <1>;
+
+		partition@0 {
+			/* This location must not be altered  */
+			/* 256KB for Vitesse 7385 Switch firmware */
+			reg = <0x0 0x00040000>;
+			label = "NOR (RO) Vitesse-7385 Firmware";
+			read-only;
+		};
+
+		partition@40000 {
+			/* 256KB for DTB Image */
+			reg = <0x00040000 0x00040000>;
+			label = "NOR (RO) DTB Image";
+			read-only;
+		};
+
+		partition@80000 {
+			/* 3.5 MB for Linux Kernel Image */
+			reg = <0x00080000 0x00380000>;
+			label = "NOR (RO) Linux Kernel Image";
+			read-only;
+		};
+
+		partition@400000 {
+			/* 11MB for JFFS2 based Root file System */
+			reg = <0x00400000 0x00b00000>;
+			label = "NOR (RW) JFFS2 Root File System";
+		};
+
+		partition@f00000 {
+			/* This location must not be altered  */
+			/* 512KB for u-boot Bootloader Image */
+			/* 512KB for u-boot Environment Variables */
+			reg = <0x00f00000 0x00100000>;
+			label = "NOR (RO) U-Boot Image";
+			read-only;
+		};
+	};
+
+	nand@1,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,p1020-fcm-nand",
+			     "fsl,elbc-fcm-nand";
+		reg = <0x1 0x0 0x40000>;
+
+		partition@0 {
+			/* This location must not be altered  */
+			/* 1MB for u-boot Bootloader Image */
+			reg = <0x0 0x00100000>;
+			label = "NAND (RO) U-Boot Image";
+			read-only;
+		};
+
+		partition@100000 {
+			/* 1MB for DTB Image */
+			reg = <0x00100000 0x00100000>;
+			label = "NAND (RO) DTB Image";
+			read-only;
+		};
+
+		partition@200000 {
+			/* 4MB for Linux Kernel Image */
+			reg = <0x00200000 0x00400000>;
+			label = "NAND (RO) Linux Kernel Image";
+			read-only;
+		};
+
+		partition@600000 {
+			/* 4MB for Compressed Root file System Image */
+			reg = <0x00600000 0x00400000>;
+			label = "NAND (RO) Compressed RFS Image";
+			read-only;
+		};
+
+		partition@a00000 {
+			/* 7MB for JFFS2 based Root file System */
+			reg = <0x00a00000 0x00700000>;
+			label = "NAND (RW) JFFS2 Root File System";
+		};
+
+		partition@1100000 {
+			/* 15MB for JFFS2 based Root file System */
+			reg = <0x01100000 0x00f00000>;
+			label = "NAND (RW) Writable User area";
+		};
+	};
+
+	L2switch@2,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "vitesse-7385";
+		reg = <0x2 0x0 0x20000>;
+	};
+};
+
+&board_soc {
+	i2c@3000 {
+		rtc@68 {
+			compatible = "dallas,ds1339";
+			reg = <0x68>;
+		};
+	};
+
+	spi@7000 {
+		flash@0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "spansion,s25sl12801", "jedec,spi-nor";
+			reg = <0>;
+			spi-max-frequency = <40000000>; /* input clock */
+
+			partition@u-boot {
+				/* 512KB for u-boot Bootloader Image */
+				reg = <0x0 0x00080000>;
+				label = "u-boot";
+				read-only;
+			};
+
+			partition@dtb {
+				/* 512KB for DTB Image */
+				reg = <0x00080000 0x00080000>;
+				label = "dtb";
+				read-only;
+			};
+
+			partition@kernel {
+				/* 4MB for Linux Kernel Image */
+				reg = <0x00100000 0x00400000>;
+				label = "kernel";
+				read-only;
+			};
+
+			partition@fs {
+				/* 4MB for Compressed RFS Image */
+				reg = <0x00500000 0x00400000>;
+				label = "file system";
+				read-only;
+			};
+
+			partition@jffs-fs {
+				/* 7MB for JFFS2 based RFS */
+				reg = <0x00900000 0x00700000>;
+				label = "file system jffs2";
+			};
+		};
+	};
+
+	usb@22000 {
+		phy_type = "ulpi";
+		dr_mode = "host";
+	};
+
+	/* USB2 is shared with localbus. It is used
+	   only in case of SPI and SD boot after
+	   appropriate device-tree fixup done by uboot */
+	usb@23000 {
+		phy_type = "ulpi";
+		dr_mode = "host";
+	};
+
+	mdio@24000 {
+		phy0: ethernet-phy@0 {
+			interrupt-parent = <&mpic>;
+			interrupts = <3 1>;
+			reg = <0x0>;
+		};
+
+		phy1: ethernet-phy@1 {
+			interrupt-parent = <&mpic>;
+			interrupts = <2 1>;
+			reg = <0x1>;
+		};
+
+		tbi-phy@2 {
+			device_type = "tbi-phy";
+			reg = <0x2>;
+		};
+	};
+
+	mdio@25000 {
+		tbi0: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	enet0: ethernet@b0000 {
+		fixed-link = <1 1 1000 0 0>;
+		phy-connection-type = "rgmii-id";
+
+	};
+
+	enet1: ethernet@b1000 {
+		phy-handle = <&phy0>;
+		tbi-handle = <&tbi0>;
+		phy-connection-type = "sgmii";
+	};
+
+	enet2: ethernet@b2000 {
+		phy-handle = <&phy1>;
+		phy-connection-type = "rgmii-id";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1020rdb_36b.dts b/arch/powerpc/boot/dts/fsl/p1020rdb_36b.dts
new file mode 100644
index 0000000000..fd09a19789
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1020rdb_36b.dts
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * P1020 RDB Device Tree Source (36-bit address map)
+ *
+ * Copyright 2009-2011 Freescale Semiconductor Inc.
+ */
+
+/include/ "p1020si-pre.dtsi"
+/ {
+	model = "fsl,P1020RDB";
+	compatible = "fsl,P1020RDB";
+
+	memory {
+		device_type = "memory";
+	};
+
+	board_lbc: lbc: localbus@fffe05000 {
+		reg = <0xf 0xffe05000 0 0x1000>;
+
+		/* NOR, NAND Flashes and Vitesse 5 port L2 switch */
+		ranges = <0x0 0x0 0xf 0xef000000 0x01000000
+			  0x1 0x0 0xf 0xffa00000 0x00040000
+			  0x2 0x0 0xf 0xffb00000 0x00020000>;
+	};
+
+	board_soc: soc: soc@fffe00000 {
+		ranges = <0x0 0xf 0xffe00000 0x100000>;
+	};
+
+	pci0: pcie@fffe09000 {
+		reg = <0xf 0xffe09000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xc0000000 0xc 0x20000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xc0000000
+				  0x2000000 0x0 0xc0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@fffe0a000 {
+		reg = <0xf 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0x80000000 0xc 0x00000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "p1020rdb.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi
new file mode 100644
index 0000000000..642dc3a83d
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi
@@ -0,0 +1,185 @@
+/*
+ * P1020/P1011 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,p1020-elbc", "fsl,elbc", "simple-bus";
+	interrupts = <19 2 0 0>,
+		     <16 2 0 0>;
+};
+
+/* controller at 0x9000 */
+&pci0 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0
+			>;
+	};
+};
+
+/* controller at 0xa000 */
+&pci1 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1 0x0 0x0
+			>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "fsl,p1020-immr", "simple-bus";
+	bus-frequency = <0>;		// Filled out by uboot.
+
+	ecm-law@0 {
+		compatible = "fsl,ecm-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <12>;
+	};
+
+	ecm@1000 {
+		compatible = "fsl,p1020-ecm", "fsl,ecm";
+		reg = <0x1000 0x1000>;
+		interrupts = <16 2 0 0>;
+	};
+
+	memory-controller@2000 {
+		compatible = "fsl,p1020-memory-controller";
+		reg = <0x2000 0x1000>;
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-i2c-0.dtsi"
+/include/ "pq3-i2c-1.dtsi"
+/include/ "pq3-duart-0.dtsi"
+
+/include/ "pq3-espi-0.dtsi"
+	spi@7000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "pq3-gpio-0.dtsi"
+
+	L2: l2-cache-controller@20000 {
+		compatible = "fsl,p1020-l2-cache-controller";
+		reg = <0x20000 0x1000>;
+		cache-line-size = <32>;	// 32 bytes
+		cache-size = <0x40000>; // L2,256K
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-dma-0.dtsi"
+/include/ "pq3-usb2-dr-0.dtsi"
+	usb@22000 {
+		compatible = "fsl-usb2-dr-v1.6", "fsl-usb2-dr";
+	};
+/include/ "pq3-usb2-dr-1.dtsi"
+	usb@23000 {
+		compatible = "fsl-usb2-dr-v1.6", "fsl-usb2-dr";
+	};
+
+/include/ "pq3-esdhc-0.dtsi"
+	sdhc@2e000 {
+		compatible = "fsl,p1020-esdhc", "fsl,esdhc";
+		sdhci,auto-cmd12;
+	};
+/include/ "pq3-sec3.3-0.dtsi"
+
+/include/ "pq3-mpic.dtsi"
+/include/ "pq3-mpic-timer-B.dtsi"
+
+/include/ "pq3-etsec2-0.dtsi"
+	enet0: enet0_grp2: ethernet@b0000 {
+	};
+
+/include/ "pq3-etsec2-1.dtsi"
+	enet1: enet1_grp2: ethernet@b1000 {
+	};
+
+/include/ "pq3-etsec2-2.dtsi"
+	enet2: enet2_grp2: ethernet@b2000 {
+	};
+
+	global-utilities@e0000 {
+		compatible = "fsl,p1020-guts";
+		reg = <0xe0000 0x1000>;
+		fsl,has-rstcr;
+	};
+};
+
+/include/ "pq3-etsec2-grp2-0.dtsi"
+/include/ "pq3-etsec2-grp2-1.dtsi"
+/include/ "pq3-etsec2-grp2-2.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1020si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p1020si-pre.dtsi
new file mode 100644
index 0000000000..fed9c4c8d9
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1020si-pre.dtsi
@@ -0,0 +1,71 @@
+/*
+ * P1020/P1011 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500v2_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,P1020";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial1;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		pci0 = &pci0;
+		pci1 = &pci1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,P1020@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+
+		PowerPC,P1020@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			next-level-cache = <&L2>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1020utm-pc.dtsi b/arch/powerpc/boot/dts/fsl/p1020utm-pc.dtsi
new file mode 100644
index 0000000000..7ea85eabcc
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1020utm-pc.dtsi
@@ -0,0 +1,140 @@
+/*
+ * P1020 UTM-PC Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+	nor@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x2000000>;
+		bank-width = <2>;
+		device-width = <1>;
+
+		partition@0 {
+			/* 256KB for DTB Image */
+			reg = <0x0 0x00040000>;
+			label = "NOR DTB Image";
+		};
+
+		partition@40000 {
+			/* 3.75 MB for Linux Kernel Image */
+			reg = <0x00040000 0x003c0000>;
+			label = "NOR Linux Kernel Image";
+		};
+
+		partition@400000 {
+			/* 27MB for Root file System */
+			reg = <0x00400000 0x01b00000>;
+			label = "NOR Root File System";
+		};
+
+		partition@1f00000 {
+			/* This location must not be altered  */
+			/* 512KB for u-boot Bootloader Image */
+			/* 512KB for u-boot Environment Variables */
+			reg = <0x01f00000 0x00100000>;
+			label = "NOR U-Boot Image";
+			read-only;
+		};
+	};
+};
+
+&soc {
+	i2c@3000 {
+		rtc@68 {
+			compatible = "dallas,ds1339";
+			reg = <0x68>;
+		};
+	};
+
+	mdio@24000 {
+		phy0: ethernet-phy@0 {
+			interrupts = <3 1 0 0>;
+			reg = <0x0>;
+		};
+		phy1: ethernet-phy@1 {
+			interrupts = <2 1 0 0>;
+			reg = <0x1>;
+		};
+		phy2: ethernet-phy@2 {
+			interrupts = <1 1 0 0>;
+			reg = <0x2>;
+		};
+	};
+
+	mdio@25000 {
+		tbi1: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	mdio@26000 {
+		tbi2: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	enet0: ethernet@b0000 {
+		phy-handle = <&phy2>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	enet1: ethernet@b1000 {
+		phy-handle = <&phy0>;
+		tbi-handle = <&tbi1>;
+		phy-connection-type = "sgmii";
+	};
+
+	enet2: ethernet@b2000 {
+		phy-handle = <&phy1>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	usb@22000 {
+		phy_type = "ulpi";
+	};
+
+	/* USB2 is shared with localbus, so it must be disabled
+	   by default. We can't put 'status = "disabled";' here
+	   since U-Boot doesn't clear the status property when
+	   it enables USB2. OTOH, U-Boot does create a new node
+	   when there isn't any. So, just comment it out.
+	*/
+	usb@23000 {
+		status = "disabled";
+		phy_type = "ulpi";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1020utm-pc_32b.dts b/arch/powerpc/boot/dts/fsl/p1020utm-pc_32b.dts
new file mode 100644
index 0000000000..bc03ef611f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1020utm-pc_32b.dts
@@ -0,0 +1,89 @@
+/*
+ * P1020 UTM-PC Device Tree Source (32-bit address map)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1020si-pre.dtsi"
+/ {
+	model = "fsl,P1020UTM-PC";
+	compatible = "fsl,P1020UTM-PC";
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@ffe05000 {
+		reg = <0x0 0xffe05000 0x0 0x1000>;
+
+		/* NOR */
+		ranges = <0x0 0x0 0x0 0xec000000 0x02000000
+			  0x1 0x0 0x0 0xffa00000 0x00040000
+			  0x2 0x0 0x0 0xffb00000 0x00020000>;
+	};
+
+	soc: soc@ffe00000 {
+		ranges = <0x0 0x0 0xffe00000 0x100000>;
+	};
+
+	pci0: pcie@ffe09000 {
+		reg = <0x0 0xffe09000 0x0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0x0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0x0 0xffc10000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@ffe0a000 {
+		reg = <0x0 0xffe0a000 0x0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0x0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0x0 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "p1020utm-pc.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1020utm-pc_36b.dts b/arch/powerpc/boot/dts/fsl/p1020utm-pc_36b.dts
new file mode 100644
index 0000000000..32766f6a47
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1020utm-pc_36b.dts
@@ -0,0 +1,89 @@
+/*
+ * P1020 UTM-PC Device Tree Source (36-bit address map)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1020si-pre.dtsi"
+/ {
+	model = "fsl,P1020UTM-PC";
+	compatible = "fsl,P1020UTM-PC";
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@fffe05000 {
+		reg = <0xf 0xffe05000 0x0 0x1000>;
+
+		/* NOR */
+		ranges = <0x0 0x0 0xf 0xec000000 0x02000000
+			  0x1 0x0 0xf 0xffa00000 0x00040000
+			  0x2 0x0 0xf 0xffb00000 0x00020000>;
+	};
+
+	soc: soc@fffe00000 {
+		ranges = <0x0 0xf 0xffe00000 0x100000>;
+	};
+
+	pci0: pcie@fffe09000 {
+		reg = <0xf 0xffe09000 0x0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@fffe0a000 {
+		reg = <0xf 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "p1020utm-pc.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1021mds.dts b/arch/powerpc/boot/dts/fsl/p1021mds.dts
new file mode 100644
index 0000000000..54af8de533
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1021mds.dts
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * P1021 MDS Device Tree Source
+ *
+ * Copyright 2010,2012 Freescale Semiconductor Inc.
+ */
+
+/include/ "p1021si-pre.dtsi"
+/ {
+	model = "fsl,P1021";
+	compatible = "fsl,P1021MDS";
+
+	aliases {
+		ethernet3 = &enet3;
+		ethernet4 = &enet4;
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@ffe05000 {
+		reg = <0x0 0xffe05000 0x0 0x1000>;
+
+		/* NAND Flash, BCSR, PMC0/1*/
+		ranges = <0x0 0x0 0x0 0xfc000000 0x02000000
+			  0x1 0x0 0x0 0xf8000000 0x00008000
+			  0x2 0x0 0x0 0xf8010000 0x00020000
+			  0x3 0x0 0x0 0xf8020000 0x00020000>;
+
+		nand@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,p1021-fcm-nand",
+				     "fsl,elbc-fcm-nand";
+			reg = <0x0 0x0 0x40000>;
+
+			partition@0 {
+				/* This location must not be altered  */
+				/* 1MB for u-boot Bootloader Image */
+				reg = <0x0 0x00100000>;
+				label = "NAND (RO) U-Boot Image";
+				read-only;
+			};
+
+			partition@100000 {
+				/* 1MB for DTB Image */
+				reg = <0x00100000 0x00100000>;
+				label = "NAND (RO) DTB Image";
+				read-only;
+			};
+
+			partition@200000 {
+				/* 4MB for Linux Kernel Image */
+				reg = <0x00200000 0x00400000>;
+				label = "NAND (RO) Linux Kernel Image";
+				read-only;
+			};
+
+			partition@600000 {
+				/* 5MB for Compressed Root file System Image */
+				reg = <0x00600000 0x00500000>;
+				label = "NAND (RO) Compressed RFS Image";
+				read-only;
+			};
+
+			partition@b00000 {
+				/* 6MB for JFFS2 based Root file System */
+				reg = <0x00a00000 0x00600000>;
+				label = "NAND (RW) JFFS2 Root File System";
+			};
+
+			partition@1100000 {
+				/* 14MB for JFFS2 based Root file System */
+				reg = <0x01100000 0x00e00000>;
+				label = "NAND (RW) Writable User area";
+			};
+
+			partition@1f00000 {
+				/* 1MB for microcode */
+				reg = <0x01f00000 0x00100000>;
+				label = "NAND (RO) QE Ucode";
+				read-only;
+			};
+		};
+
+		bcsr@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,p1021mds-bcsr";
+			reg = <1 0 0x8000>;
+			ranges = <0 1 0 0x8000>;
+		};
+
+		pib@2,0 {
+			compatible = "fsl,p1021mds-pib";
+			reg = <2 0 0x10000>;
+		};
+
+		pib@3,0 {
+			compatible = "fsl,p1021mds-pib";
+			reg = <3 0 0x10000>;
+		};
+	};
+
+	soc: soc@ffe00000 {
+		compatible = "fsl,p1021-immr", "simple-bus";
+		ranges = <0x0 0x0 0xffe00000 0x100000>;
+
+		i2c@3000 {
+			rtc@68 {
+				compatible = "dallas,ds1374";
+				reg = <0x68>;
+			};
+		};
+
+		spi@7000 {
+
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "spansion,s25sl12801", "jedec,spi-nor";
+				reg = <0>;
+				spi-max-frequency = <40000000>; /* input clock */
+
+				partition@u-boot {
+					label = "u-boot-spi";
+					reg = <0x00000000 0x00100000>;
+					read-only;
+				};
+				partition@kernel {
+					label = "kernel-spi";
+					reg = <0x00100000 0x00500000>;
+					read-only;
+				};
+				partition@dtb {
+					label = "dtb-spi";
+					reg = <0x00600000 0x00100000>;
+					read-only;
+				};
+				partition@fs {
+					label = "file system-spi";
+					reg = <0x00700000 0x00900000>;
+				};
+			};
+		};
+
+		usb@22000 {
+			phy_type = "ulpi";
+			dr_mode = "host";
+		};
+
+		mdio@24000 {
+			phy0: ethernet-phy@0 {
+				interrupts = <1 1 0 0>;
+				reg = <0x0>;
+			};
+			phy1: ethernet-phy@1 {
+				interrupts = <2 1 0 0>;
+				reg = <0x1>;
+			};
+			phy4: ethernet-phy@4 {
+				reg = <0x4>;
+			};
+			tbi-phy@5 {
+				device_type = "tbi-phy";
+				reg = <0x5>;
+			};
+		};
+
+		mdio@25000 {
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		ethernet@b0000 {
+			phy-handle = <&phy0>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		ethernet@b1000 {
+			phy-handle = <&phy4>;
+			tbi-handle = <&tbi0>;
+			phy-connection-type = "sgmii";
+		};
+
+		ethernet@b2000 {
+			phy-handle = <&phy1>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		par_io@e0100 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			reg = <0xe0100 0x60>;
+			ranges = <0x0 0xe0100 0x60>;
+			device_type = "par_io";
+			num-ports = <3>;
+			pio1: ucc_pin@1 {
+				pio-map = <
+			/* port  pin  dir  open_drain  assignment  has_irq */
+					0x1  0x13 0x1  0x0  0x1  0x0    /* QE_MUX_MDC */
+					0x1  0x14 0x3  0x0  0x1  0x0    /* QE_MUX_MDIO */
+					0x0  0x17 0x2  0x0  0x2  0x0    /* CLK12 */
+					0x0  0x18 0x2  0x0  0x1  0x0    /* CLK9 */
+					0x0  0x7  0x1  0x0  0x2  0x0    /* ENET1_TXD0_SER1_TXD0 */
+					0x0  0x9  0x1  0x0  0x2  0x0    /* ENET1_TXD1_SER1_TXD1 */
+					0x0  0xb  0x1  0x0  0x2  0x0    /* ENET1_TXD2_SER1_TXD2 */
+					0x0  0xc  0x1  0x0  0x2  0x0    /* ENET1_TXD3_SER1_TXD3 */
+					0x0  0x6  0x2  0x0  0x2  0x0    /* ENET1_RXD0_SER1_RXD0 */
+					0x0  0xa  0x2  0x0  0x2  0x0    /* ENET1_RXD1_SER1_RXD1 */
+					0x0  0xe  0x2  0x0  0x2  0x0    /* ENET1_RXD2_SER1_RXD2 */
+					0x0  0xf  0x2  0x0  0x2  0x0    /* ENET1_RXD3_SER1_RXD3 */
+					0x0  0x5  0x1  0x0  0x2  0x0    /* ENET1_TX_EN_SER1_RTS_B */
+					0x0  0xd  0x1  0x0  0x2  0x0    /* ENET1_TX_ER */
+					0x0  0x4  0x2  0x0  0x2  0x0    /* ENET1_RX_DV_SER1_CTS_B */
+					0x0  0x8  0x2  0x0  0x2  0x0    /* ENET1_RX_ER_SER1_CD_B */
+					0x0  0x11 0x2  0x0  0x2  0x0    /* ENET1_CRS */
+					0x0  0x10 0x2  0x0  0x2  0x0>;    /* ENET1_COL */
+			};
+
+			pio2: ucc_pin@2 {
+				pio-map = <
+			/* port  pin  dir  open_drain  assignment  has_irq */
+					0x1  0x13 0x1  0x0  0x1  0x0    /* QE_MUX_MDC */
+					0x1  0x14 0x3  0x0  0x1  0x0    /* QE_MUX_MDIO */
+					0x1  0xb  0x2  0x0  0x1  0x0    /* CLK13 */
+					0x1  0x7  0x1  0x0  0x2  0x0    /* ENET5_TXD0_SER5_TXD0 */
+					0x1  0xa  0x1  0x0  0x2  0x0    /* ENET5_TXD1_SER5_TXD1 */
+					0x1  0x6  0x2  0x0  0x2  0x0    /* ENET5_RXD0_SER5_RXD0 */
+					0x1  0x9  0x2  0x0  0x2  0x0    /* ENET5_RXD1_SER5_RXD1 */
+					0x1  0x5  0x1  0x0  0x2  0x0    /* ENET5_TX_EN_SER5_RTS_B */
+					0x1  0x4  0x2  0x0  0x2  0x0    /* ENET5_RX_DV_SER5_CTS_B */
+					0x1  0x8  0x2  0x0  0x2  0x0>;    /* ENET5_RX_ER_SER5_CD_B */
+			};
+		};
+	};
+
+	pci0: pcie@ffe09000 {
+		reg = <0 0xffe09000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@ffe0a000 {
+		reg = <0 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xc0000000
+				  0x2000000 0x0 0xc0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	qe: qe@ffe80000 {
+		ranges = <0x0 0x0 0xffe80000 0x40000>;
+		reg = <0 0xffe80000 0 0x480>;
+		brg-frequency = <0>;
+		bus-frequency = <0>;
+		status = "disabled"; /* no firmware loaded */
+
+		enet3: ucc@2000 {
+			device_type = "network";
+			compatible = "ucc_geth";
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			rx-clock-name = "clk12";
+			tx-clock-name = "clk9";
+			pio-handle = <&pio1>;
+			phy-handle = <&qe_phy0>;
+			phy-connection-type = "mii";
+		};
+
+		mdio@2120 {
+			qe_phy0: ethernet-phy@0 {
+				interrupt-parent = <&mpic>;
+				interrupts = <4 1 0 0>;
+				reg = <0x0>;
+			};
+			qe_phy1: ethernet-phy@3 {
+				interrupt-parent = <&mpic>;
+				interrupts = <5 1 0 0>;
+				reg = <0x3>;
+			};
+			tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet4: ucc@2400 {
+			device_type = "network";
+			compatible = "ucc_geth";
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			rx-clock-name = "none";
+			tx-clock-name = "clk13";
+			pio-handle = <&pio2>;
+			phy-handle = <&qe_phy1>;
+			phy-connection-type = "rmii";
+		};
+	};
+};
+
+/include/ "p1021si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1021rdb-pc.dtsi b/arch/powerpc/boot/dts/fsl/p1021rdb-pc.dtsi
new file mode 100644
index 0000000000..18f9b31602
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1021rdb-pc.dtsi
@@ -0,0 +1,256 @@
+/*
+ * P1021 RDB Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+	nor@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x1000000>;
+		bank-width = <2>;
+		device-width = <1>;
+
+		partition@0 {
+			/* This location must not be altered  */
+			/* 256KB for Vitesse 7385 Switch firmware */
+			reg = <0x0 0x00040000>;
+			label = "NOR Vitesse-7385 Firmware";
+			read-only;
+		};
+
+		partition@40000 {
+			/* 256KB for DTB Image */
+			reg = <0x00040000 0x00040000>;
+			label = "NOR DTB Image";
+		};
+
+		partition@80000 {
+			/* 3.5 MB for Linux Kernel Image */
+			reg = <0x00080000 0x00380000>;
+			label = "NOR Linux Kernel Image";
+		};
+
+		partition@400000 {
+			/* 10.75MB for JFFS2 based Root file System */
+			reg = <0x00400000 0x00ac0000>;
+			label = "NOR JFFS2 Root File System";
+		};
+
+		partition@ec0000 {
+			/* This location must not be altered  */
+			/* 256KB for QE ucode firmware*/
+			reg = <0x00ec0000 0x00040000>;
+			label = "NOR QE microcode firmware";
+			read-only;
+		};
+
+		partition@f00000 {
+			/* This location must not be altered  */
+			/* 512KB for u-boot Bootloader Image */
+			/* 512KB for u-boot Environment Variables */
+			reg = <0x00f00000 0x00100000>;
+			label = "NOR U-Boot Image";
+		};
+	};
+
+	nand@1,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,p1021-fcm-nand",
+			     "fsl,elbc-fcm-nand";
+		reg = <0x1 0x0 0x40000>;
+
+		partition@0 {
+			/* This location must not be altered  */
+			/* 1MB for u-boot Bootloader Image */
+			reg = <0x0 0x00100000>;
+			label = "NAND U-Boot Image";
+			read-only;
+		};
+
+		partition@100000 {
+			/* 1MB for DTB Image */
+			reg = <0x00100000 0x00100000>;
+			label = "NAND DTB Image";
+		};
+
+		partition@200000 {
+			/* 4MB for Linux Kernel Image */
+			reg = <0x00200000 0x00400000>;
+			label = "NAND Linux Kernel Image";
+		};
+
+		partition@600000 {
+			/* 4MB for Compressed Root file System Image */
+			reg = <0x00600000 0x00400000>;
+			label = "NAND Compressed RFS Image";
+		};
+
+		partition@a00000 {
+			/* 7MB for JFFS2 based Root file System */
+			reg = <0x00a00000 0x00700000>;
+			label = "NAND JFFS2 Root File System";
+		};
+
+		partition@1100000 {
+			/* 15MB for User Writable Area  */
+			reg = <0x01100000 0x00f00000>;
+			label = "NAND Writable User area";
+		};
+	};
+
+	L2switch@2,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "vitesse-7385";
+		reg = <0x2 0x0 0x20000>;
+	};
+};
+
+&soc {
+	i2c@3000 {
+		rtc@68 {
+			compatible = "pericom,pt7c4338";
+			reg = <0x68>;
+		};
+	};
+
+	spi@7000 {
+		flash@0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "spansion,s25sl12801", "jedec,spi-nor";
+			reg = <0>;
+			spi-max-frequency = <40000000>; /* input clock */
+
+			partition@u-boot {
+				/* 512KB for u-boot Bootloader Image */
+				reg = <0x0 0x00080000>;
+				label = "SPI Flash U-Boot Image";
+				read-only;
+			};
+
+			partition@dtb {
+				/* 512KB for DTB Image */
+				reg = <0x00080000 0x00080000>;
+				label = "SPI Flash DTB Image";
+			};
+
+			partition@kernel {
+				/* 4MB for Linux Kernel Image */
+				reg = <0x00100000 0x00400000>;
+				label = "SPI Flash Linux Kernel Image";
+			};
+
+			partition@fs {
+				/* 4MB for Compressed RFS Image */
+				reg = <0x00500000 0x00400000>;
+				label = "SPI Flash Compressed RFSImage";
+			};
+
+			partition@jffs-fs {
+				/* 7MB for JFFS2 based RFS */
+				reg = <0x00900000 0x00700000>;
+				label = "SPI Flash JFFS2 RFS";
+			};
+		};
+	};
+
+	usb@22000 {
+		phy_type = "ulpi";
+	};
+
+	mdio@24000 {
+		phy0: ethernet-phy@0 {
+			interrupt-parent = <&mpic>;
+			interrupts = <3 1 0 0>;
+			reg = <0x0>;
+		};
+
+		phy1: ethernet-phy@1 {
+			interrupt-parent = <&mpic>;
+			interrupts = <2 1 0 0>;
+			reg = <0x1>;
+		};
+
+		tbi0: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	mdio@25000 {
+		tbi1: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	mdio@26000 {
+		tbi2: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	ptp_clock@b0e00 {
+		compatible = "fsl,etsec-ptp";
+		reg = <0xb0e00 0xb0>;
+		interrupts = <68 2 0 0 69 2 0 0>;
+		fsl,tclk-period	= <10>;
+		fsl,tmr-prsc	= <2>;
+		fsl,tmr-add	= <0x80000016>;
+		fsl,tmr-fiper1	= <999999990>;
+		fsl,tmr-fiper2	= <99990>;
+		fsl,max-adj	= <199999999>;
+	};
+
+	enet0: ethernet@b0000 {
+		fixed-link = <1 1 1000 0 0>;
+		phy-connection-type = "rgmii-id";
+
+	};
+
+	enet1: ethernet@b1000 {
+		phy-handle = <&phy0>;
+		tbi-handle = <&tbi1>;
+		phy-connection-type = "sgmii";
+	};
+
+	enet2: ethernet@b2000 {
+		phy-handle = <&phy1>;
+		tbi-handle = <&tbi2>;
+		phy-connection-type = "rgmii-id";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1021rdb-pc_32b.dts b/arch/powerpc/boot/dts/fsl/p1021rdb-pc_32b.dts
new file mode 100644
index 0000000000..d2b4710357
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1021rdb-pc_32b.dts
@@ -0,0 +1,96 @@
+/*
+ * P1021 RDB Device Tree Source
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1021si-pre.dtsi"
+/ {
+	model = "fsl,P1021RDB";
+	compatible = "fsl,P1021RDB-PC";
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@ffe05000 {
+		reg = <0 0xffe05000 0 0x1000>;
+
+		/* NOR, NAND Flashes and Vitesse 5 port L2 switch */
+		ranges = <0x0 0x0 0x0 0xef000000 0x01000000
+			  0x1 0x0 0x0 0xff800000 0x00040000
+			  0x2 0x0 0x0 0xffb00000 0x00020000>;
+	};
+
+	soc: soc@ffe00000 {
+		ranges = <0x0 0x0 0xffe00000 0x100000>;
+	};
+
+	pci0: pcie@ffe09000 {
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		reg = <0 0xffe09000 0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@ffe0a000 {
+		reg = <0 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	qe: qe@ffe80000 {
+                ranges = <0x0 0x0 0xffe80000 0x40000>;
+                reg = <0 0xffe80000 0 0x480>;
+                brg-frequency = <0>;
+                bus-frequency = <0>;
+        };
+};
+
+/include/ "p1021rdb-pc.dtsi"
+/include/ "p1021si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1021rdb-pc_36b.dts b/arch/powerpc/boot/dts/fsl/p1021rdb-pc_36b.dts
new file mode 100644
index 0000000000..e298c29e56
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1021rdb-pc_36b.dts
@@ -0,0 +1,96 @@
+/*
+ * P1021 RDB Device Tree Source (36-bit address map)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1021si-pre.dtsi"
+/ {
+	model = "fsl,P1021RDB";
+	compatible = "fsl,P1021RDB-PC";
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@fffe05000 {
+		reg = <0xf 0xffe05000 0 0x1000>;
+
+		/* NOR, NAND Flashes and Vitesse 5 port L2 switch */
+		ranges = <0x0 0x0 0xf 0xef000000 0x01000000
+			  0x1 0x0 0xf 0xff800000 0x00040000
+			  0x2 0x0 0xf 0xffb00000 0x00020000>;
+	};
+
+	soc: soc@fffe00000 {
+		ranges = <0x0 0xf 0xffe00000 0x100000>;
+	};
+
+	pci0: pcie@fffe09000 {
+		ranges = <0x2000000 0x0 0xc0000000 0xc 0x20000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>;
+		reg = <0xf 0xffe09000 0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@fffe0a000 {
+		reg = <0xf 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0x80000000 0xc 0x00000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xc0000000
+				  0x2000000 0x0 0xc0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	qe: qe@fffe80000 {
+                ranges = <0x0 0xf 0xffe80000 0x40000>;
+                reg = <0xf 0xffe80000 0 0x480>;
+                brg-frequency = <0>;
+                bus-frequency = <0>;
+        };
+};
+
+/include/ "p1021rdb-pc.dtsi"
+/include/ "p1021si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi
new file mode 100644
index 0000000000..407cb5fd0f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi
@@ -0,0 +1,247 @@
+/*
+ * P1021/P1012 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2011-2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,p1021-elbc", "fsl,elbc", "simple-bus";
+	interrupts = <19 2 0 0>,
+		     <16 2 0 0>;
+};
+
+/* controller at 0x9000 */
+&pci0 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0
+			>;
+	};
+};
+
+/* controller at 0xa000 */
+&pci1 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1 0x0 0x0
+			>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "fsl,p1021-immr", "simple-bus";
+	bus-frequency = <0>;		// Filled out by uboot.
+
+	ecm-law@0 {
+		compatible = "fsl,ecm-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <12>;
+	};
+
+	ecm@1000 {
+		compatible = "fsl,p1021-ecm", "fsl,ecm";
+		reg = <0x1000 0x1000>;
+		interrupts = <16 2 0 0>;
+	};
+
+	memory-controller@2000 {
+		compatible = "fsl,p1021-memory-controller";
+		reg = <0x2000 0x1000>;
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-i2c-0.dtsi"
+/include/ "pq3-i2c-1.dtsi"
+/include/ "pq3-duart-0.dtsi"
+
+/include/ "pq3-espi-0.dtsi"
+	spi@7000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "pq3-gpio-0.dtsi"
+
+	L2: l2-cache-controller@20000 {
+		compatible = "fsl,p1021-l2-cache-controller";
+		reg = <0x20000 0x1000>;
+		cache-line-size = <32>;	// 32 bytes
+		cache-size = <0x40000>; // L2,256K
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-dma-0.dtsi"
+/include/ "pq3-usb2-dr-0.dtsi"
+	usb@22000 {
+		compatible = "fsl-usb2-dr-v1.6", "fsl-usb2-dr";
+	};
+
+/include/ "pq3-esdhc-0.dtsi"
+	sdhc@2e000 {
+		sdhci,auto-cmd12;
+	};
+
+/include/ "pq3-sec3.3-0.dtsi"
+
+/include/ "pq3-mpic.dtsi"
+/include/ "pq3-mpic-timer-B.dtsi"
+
+/include/ "pq3-etsec2-0.dtsi"
+	enet0: enet0_grp2: ethernet@b0000 {
+	};
+
+/include/ "pq3-etsec2-1.dtsi"
+	enet1: enet1_grp2: ethernet@b1000 {
+	};
+
+/include/ "pq3-etsec2-2.dtsi"
+	enet2: enet2_grp2: ethernet@b2000 {
+	};
+
+	global-utilities@e0000 {
+		compatible = "fsl,p1021-guts";
+		reg = <0xe0000 0x1000>;
+		fsl,has-rstcr;
+	};
+};
+
+&qe {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "qe";
+	compatible = "fsl,qe";
+	fsl,qe-num-riscs = <1>;
+	fsl,qe-num-snums = <28>;
+
+	qeic: interrupt-controller@80 {
+		interrupt-controller;
+		compatible = "fsl,qe-ic";
+		#address-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x80 0x80>;
+		interrupts = <63 2 0 0 60 2 0 0>; //high:47 low:44
+	};
+
+	ucc@2000 {
+		cell-index = <1>;
+		reg = <0x2000 0x200>;
+		interrupts = <32>;
+		interrupt-parent = <&qeic>;
+	};
+
+	mdio@2120 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x2120 0x18>;
+		compatible = "fsl,ucc-mdio";
+	};
+
+	ucc@2400 {
+		cell-index = <5>;
+		reg = <0x2400 0x200>;
+		interrupts = <40>;
+		interrupt-parent = <&qeic>;
+	};
+
+	ucc@2600 {
+		cell-index = <7>;
+		reg = <0x2600 0x200>;
+		interrupts = <42>;
+		interrupt-parent = <&qeic>;
+	};
+
+	ucc@2200 {
+		cell-index = <3>;
+		reg = <0x2200 0x200>;
+		interrupts = <34>;
+		interrupt-parent = <&qeic>;
+	};
+
+	muram@10000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,qe-muram", "fsl,cpm-muram";
+		ranges = <0x0 0x10000 0x6000>;
+
+		data-only@0 {
+			compatible = "fsl,qe-muram-data",
+			"fsl,cpm-muram-data";
+			reg = <0x0 0x6000>;
+		};
+	};
+};
+
+/include/ "pq3-etsec2-grp2-0.dtsi"
+/include/ "pq3-etsec2-grp2-1.dtsi"
+/include/ "pq3-etsec2-grp2-2.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1021si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p1021si-pre.dtsi
new file mode 100644
index 0000000000..36161b5001
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1021si-pre.dtsi
@@ -0,0 +1,71 @@
+/*
+ * P1021/P1012 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500v2_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,P1021";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial1;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		pci0 = &pci0;
+		pci1 = &pci1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,P1021@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+
+		PowerPC,P1021@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			next-level-cache = <&L2>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1022ds.dtsi b/arch/powerpc/boot/dts/fsl/p1022ds.dtsi
new file mode 100644
index 0000000000..ddefbf64f7
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1022ds.dtsi
@@ -0,0 +1,239 @@
+/*
+ * P1022 DS Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&board_lbc {
+	nor@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x8000000>;
+		bank-width = <2>;
+		device-width = <1>;
+
+		partition@0 {
+			reg = <0x0 0x03000000>;
+			label = "ramdisk-nor";
+			read-only;
+		};
+
+		partition@3000000 {
+			reg = <0x03000000 0x00e00000>;
+			label = "diagnostic-nor";
+			read-only;
+		};
+
+		partition@3e00000 {
+			reg = <0x03e00000 0x00200000>;
+			label = "dink-nor";
+			read-only;
+		};
+
+		partition@4000000 {
+			reg = <0x04000000 0x00400000>;
+			label = "kernel-nor";
+			read-only;
+		};
+
+		partition@4400000 {
+			reg = <0x04400000 0x03b00000>;
+			label = "jffs2-nor";
+		};
+
+		partition@7f00000 {
+			reg = <0x07f00000 0x00080000>;
+			label = "dtb-nor";
+			read-only;
+		};
+
+		partition@7f80000 {
+			reg = <0x07f80000 0x00080000>;
+			label = "u-boot-nor";
+			read-only;
+		};
+	};
+
+	nand@2,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,elbc-fcm-nand";
+		reg = <0x2 0x0 0x40000>;
+
+		partition@0 {
+			reg = <0x0 0x02000000>;
+			label = "u-boot-nand";
+			read-only;
+		};
+
+		partition@2000000 {
+			reg = <0x02000000 0x10000000>;
+			label = "jffs2-nand";
+		};
+
+		partition@12000000 {
+			reg = <0x12000000 0x10000000>;
+			label = "ramdisk-nand";
+			read-only;
+		};
+
+		partition@22000000 {
+			reg = <0x22000000 0x04000000>;
+			label = "kernel-nand";
+		};
+
+		partition@26000000 {
+			reg = <0x26000000 0x01000000>;
+			label = "dtb-nand";
+			read-only;
+		};
+
+		partition@27000000 {
+			reg = <0x27000000 0x19000000>;
+			label = "reserved-nand";
+		};
+	};
+
+	board-control@3,0 {
+		compatible = "fsl,p1022ds-fpga", "fsl,fpga-ngpixis";
+		reg = <3 0 0x30>;
+		interrupt-parent = <&mpic>;
+		/*
+		 * IRQ8 is generated if the "EVENT" switch is pressed
+		 * and PX_CTL[EVESEL] is set to 00.
+		 */
+		interrupts = <8 0 0 0>;
+	};
+};
+
+&board_soc {
+	i2c@3100 {
+		wm8776:codec@1a {
+			compatible = "wlf,wm8776";
+			reg = <0x1a>;
+			/*
+			 * clock-frequency will be set by U-Boot if
+			 * the clock is enabled.
+			 */
+		};
+		rtc@68 {
+			compatible = "dallas,ds3232";
+			reg = <0x68>;
+			interrupts = <0x1 0x1 0 0>;
+		};
+		adt7461@4c {
+			compatible = "adi,adt7461";
+			reg = <0x4c>;
+		};
+	};
+
+	spi@7000 {
+		flash@0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "spansion,s25sl12801", "jedec,spi-nor";
+			reg = <0>;
+			spi-max-frequency = <40000000>; /* input clock */
+
+			partition@0 {
+				label = "u-boot-spi";
+				reg = <0x00000000 0x00100000>;
+				read-only;
+			};
+			partition@100000 {
+				label = "kernel-spi";
+				reg = <0x00100000 0x00500000>;
+				read-only;
+			};
+			partition@600000 {
+				label = "dtb-spi";
+				reg = <0x00600000 0x00100000>;
+				read-only;
+			};
+			partition@700000 {
+				label = "file system-spi";
+				reg = <0x00700000 0x00900000>;
+			};
+		};
+	};
+
+	ssi@15000 {
+		fsl,mode = "i2s-slave";
+		codec-handle = <&wm8776>;
+		fsl,ssi-asynchronous;
+	};
+
+	usb@22000 {
+		phy_type = "ulpi";
+	};
+
+	usb@23000 {
+		status = "disabled";
+	};
+
+	mdio@24000 {
+		phy0: ethernet-phy@0 {
+			interrupts = <3 1 0 0>;
+			reg = <0x1>;
+		};
+		phy1: ethernet-phy@1 {
+			interrupts = <9 1 0 0>;
+			reg = <0x2>;
+		};
+		tbi-phy@2 {
+			device_type = "tbi-phy";
+			reg = <0x2>;
+		};
+	};
+
+	ptp_clock@b0e00 {
+		compatible = "fsl,etsec-ptp";
+		reg = <0xb0e00 0xb0>;
+		interrupts = <68 2 0 0 69 2 0 0>;
+		fsl,tclk-period	= <5>;
+		fsl,tmr-prsc	= <2>;
+		fsl,tmr-add	= <0xc01ebd3d>;
+		fsl,tmr-fiper1	= <999999995>;
+		fsl,tmr-fiper2	= <99990>;
+		fsl,max-adj	= <266499999>;
+	};
+
+	ethernet@b0000 {
+		phy-handle = <&phy0>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	ethernet@b1000 {
+		phy-handle = <&phy1>;
+		phy-connection-type = "rgmii-id";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1022ds_32b.dts b/arch/powerpc/boot/dts/fsl/p1022ds_32b.dts
new file mode 100644
index 0000000000..5a7eaceb9e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1022ds_32b.dts
@@ -0,0 +1,103 @@
+/*
+ * P1022 DS 32-bit Physical Address Map Device Tree Source
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1022si-pre.dtsi"
+/ {
+	model = "fsl,P1022DS";
+	compatible = "fsl,P1022DS";
+
+	memory {
+		device_type = "memory";
+	};
+
+	board_lbc: lbc: localbus@ffe05000 {
+		ranges = <0x0 0x0 0x0 0xe8000000 0x08000000
+			  0x1 0x0 0x0 0xe0000000 0x08000000
+			  0x2 0x0 0x0 0xff800000 0x00040000
+			  0x3 0x0 0x0 0xffdf0000 0x00008000>;
+		reg = <0x0 0xffe05000 0 0x1000>;
+	};
+
+	board_soc: soc: soc@ffe00000 {
+		ranges = <0x0 0x0 0xffe00000 0x100000>;
+	};
+
+	pci0: pcie@ffe09000 {
+		ranges = <0x2000000 0x0 0xe0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		reg = <0x0 0xffe09000 0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@ffe0a000 {
+		ranges = <0x2000000 0x0 0xe0000000 0 0xc0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>;
+		reg = <0 0xffe0a000 0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci2: pcie@ffe0b000 {
+		ranges = <0x2000000 0x0 0xe0000000 0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		reg = <0 0xffe0b000 0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "p1022si-post.dtsi"
+/include/ "p1022ds.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1022ds_36b.dts b/arch/powerpc/boot/dts/fsl/p1022ds_36b.dts
new file mode 100644
index 0000000000..88063cd9e2
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1022ds_36b.dts
@@ -0,0 +1,103 @@
+/*
+ * P1022 DS 36-bit Physical Address Map Device Tree Source
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1022si-pre.dtsi"
+/ {
+	model = "fsl,P1022DS";
+	compatible = "fsl,P1022DS";
+
+	memory {
+		device_type = "memory";
+	};
+
+	board_lbc: lbc: localbus@fffe05000 {
+		ranges = <0x0 0x0 0xf 0xe8000000 0x08000000
+			  0x1 0x0 0xf 0xe0000000 0x08000000
+			  0x2 0x0 0xf 0xff800000 0x00040000
+			  0x3 0x0 0xf 0xffdf0000 0x00008000>;
+		reg = <0xf 0xffe05000 0 0x1000>;
+	};
+
+	board_soc: soc: soc@fffe00000 {
+		ranges = <0x0 0xf 0xffe00000 0x100000>;
+	};
+
+	pci0: pcie@fffe09000 {
+		ranges = <0x2000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>;
+		reg = <0xf 0xffe09000 0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@fffe0a000 {
+		ranges = <0x2000000 0x0 0xe0000000 0xc 0x40000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc20000 0x0 0x10000>;
+		reg = <0xf 0xffe0a000 0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci2: pcie@fffe0b000 {
+		ranges = <0x2000000 0x0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x10000>;
+		reg = <0xf 0xffe0b000 0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "p1022si-post.dtsi"
+/include/ "p1022ds.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1022rdk.dts b/arch/powerpc/boot/dts/fsl/p1022rdk.dts
new file mode 100644
index 0000000000..4261c2f7e4
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1022rdk.dts
@@ -0,0 +1,188 @@
+/*
+ * P1022 RDK 32-bit Physical Address Map Device Tree Source
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1022si-pre.dtsi"
+/ {
+	model = "fsl,P1022RDK";
+	compatible = "fsl,P1022RDK";
+
+	memory {
+		device_type = "memory";
+	};
+
+	board_lbc: lbc: localbus@ffe05000 {
+		/* The P1022 RDK does not have any localbus devices */
+		status = "disabled";
+	};
+
+	board_soc: soc: soc@ffe00000 {
+		ranges = <0x0 0x0 0xffe00000 0x100000>;
+
+		i2c@3100 {
+			wm8960:codec@1a {
+				compatible = "wlf,wm8960";
+				reg = <0x1a>;
+				/* MCLK source is a stand-alone oscillator */
+				clock-frequency = <12288000>;
+			};
+			rtc@68 {
+				compatible = "st,m41t62";
+				reg = <0x68>;
+			};
+			adt7461@4c {
+				compatible = "adi,adt7461";
+				reg = <0x4c>;
+			};
+			zl6100@21 {
+				compatible = "isil,zl6100";
+				reg = <0x21>;
+			};
+			zl6100@24 {
+				compatible = "isil,zl6100";
+				reg = <0x24>;
+			};
+			zl6100@26 {
+				compatible = "isil,zl6100";
+				reg = <0x26>;
+			};
+			zl6100@29 {
+				compatible = "isil,zl6100";
+				reg = <0x29>;
+			};
+		};
+
+		spi@7000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "spansion,m25p80", "jedec,spi-nor";
+				reg = <0>;
+				spi-max-frequency = <1000000>;
+				partition@0 {
+					label = "full-spi-flash";
+					reg = <0x00000000 0x00100000>;
+				};
+			};
+		};
+
+		ssi@15000 {
+			fsl,mode = "i2s-slave";
+			codec-handle = <&wm8960>;
+		};
+
+		usb@22000 {
+			phy_type = "ulpi";
+		};
+
+		usb@23000 {
+			phy_type = "ulpi";
+		};
+
+		mdio@24000 {
+			phy0: ethernet-phy@0 {
+				interrupts = <3 1 0 0>;
+				reg = <0x1>;
+			};
+			phy1: ethernet-phy@1 {
+				interrupts = <9 1 0 0>;
+				reg = <0x2>;
+			};
+		};
+
+		mdio@25000 {
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		ethernet@b0000 {
+			phy-handle = <&phy0>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		ethernet@b1000 {
+			phy-handle = <&phy1>;
+			tbi-handle = <&tbi0>;
+			phy-connection-type = "sgmii";
+		};
+	};
+
+	pci0: pcie@ffe09000 {
+		ranges = <0x2000000 0x0 0xe0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		reg = <0x0 0xffe09000 0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@ffe0a000 {
+		ranges = <0x2000000 0x0 0xe0000000 0 0xc0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>;
+		reg = <0 0xffe0a000 0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci2: pcie@ffe0b000 {
+		ranges = <0x2000000 0x0 0xe0000000 0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		reg = <0 0xffe0b000 0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "p1022si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi
new file mode 100644
index 0000000000..093e4e3ed3
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi
@@ -0,0 +1,249 @@
+/*
+ * P1022/P1013 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	/*
+	 * The localbus on the P1022 is not a simple-bus because of the eLBC
+	 * pin muxing when the DIU is enabled.
+	 */
+	compatible = "fsl,p1022-elbc", "fsl,elbc";
+	interrupts = <19 2 0 0>,
+		     <16 2 0 0>;
+};
+
+/* controller at 0x9000 */
+&pci0 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0
+			>;
+	};
+};
+
+/* controller at 0xa000 */
+&pci1 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1 0x0 0x0
+			>;
+	};
+};
+
+/* controller at 0xb000 */
+&pci2 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x8 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x9 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0xa 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0xb 0x1 0x0 0x0
+			>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "fsl,p1022-immr", "simple-bus";
+	bus-frequency = <0>;		// Filled out by uboot.
+
+	ecm-law@0 {
+		compatible = "fsl,ecm-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <12>;
+	};
+
+	ecm@1000 {
+		compatible = "fsl,p1022-ecm", "fsl,ecm";
+		reg = <0x1000 0x1000>;
+		interrupts = <16 2 0 0>;
+	};
+
+	memory-controller@2000 {
+		compatible = "fsl,p1022-memory-controller";
+		reg = <0x2000 0x1000>;
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-i2c-0.dtsi"
+/include/ "pq3-i2c-1.dtsi"
+/include/ "pq3-duart-0.dtsi"
+/include/ "pq3-espi-0.dtsi"
+	spi@7000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "pq3-dma-1.dtsi"
+	dma@c300 {
+		dma00: dma-channel@0 {
+			compatible = "fsl,ssi-dma-channel";
+		};
+		dma01: dma-channel@80 {
+			compatible = "fsl,ssi-dma-channel";
+		};
+	};
+
+/include/ "pq3-gpio-0.dtsi"
+
+	display: display@10000 {
+		compatible = "fsl,diu", "fsl,p1022-diu";
+		reg = <0x10000 1000>;
+		interrupts = <64 2 0 0>;
+	};
+
+	ssi@15000 {
+		compatible = "fsl,mpc8610-ssi";
+		cell-index = <0>;
+		reg = <0x15000 0x100>;
+		interrupts = <75 2 0 0>;
+		fsl,playback-dma = <&dma00>;
+		fsl,capture-dma = <&dma01>;
+		fsl,fifo-depth = <15>;
+	};
+
+/include/ "pq3-sata2-0.dtsi"
+/include/ "pq3-sata2-1.dtsi"
+
+	L2: l2-cache-controller@20000 {
+		compatible = "fsl,p1022-l2-cache-controller";
+		reg = <0x20000 0x1000>;
+		cache-line-size = <32>;	// 32 bytes
+		cache-size = <0x40000>; // L2,256K
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-dma-0.dtsi"
+/include/ "pq3-usb2-dr-0.dtsi"
+	usb@22000 {
+		compatible = "fsl-usb2-dr-v1.6", "fsl-usb2-dr";
+	};
+/include/ "pq3-usb2-dr-1.dtsi"
+	usb@23000 {
+		compatible = "fsl-usb2-dr-v1.6", "fsl-usb2-dr";
+	};
+
+/include/ "pq3-esdhc-0.dtsi"
+	sdhc@2e000 {
+		compatible = "fsl,p1022-esdhc", "fsl,esdhc";
+		sdhci,auto-cmd12;
+	};
+
+/include/ "pq3-sec3.3-0.dtsi"
+/include/ "pq3-mpic.dtsi"
+/include/ "pq3-mpic-timer-B.dtsi"
+
+/include/ "pq3-etsec2-0.dtsi"
+	enet0: enet0_grp2: ethernet@b0000 {
+		fsl,wake-on-filer;
+	};
+
+/include/ "pq3-etsec2-1.dtsi"
+	enet1: enet1_grp2: ethernet@b1000 {
+		fsl,wake-on-filer;
+	};
+
+	global-utilities@e0000 {
+		compatible = "fsl,p1022-guts";
+		reg = <0xe0000 0x1000>;
+		fsl,has-rstcr;
+	};
+
+	power@e0070 {
+		compatible = "fsl,mpc8536-pmc", "fsl,mpc8548-pmc";
+		reg = <0xe0070 0x20>;
+	};
+
+};
+
+/include/ "pq3-etsec2-grp2-0.dtsi"
+/include/ "pq3-etsec2-grp2-1.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1022si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p1022si-pre.dtsi
new file mode 100644
index 0000000000..de76ae8992
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1022si-pre.dtsi
@@ -0,0 +1,73 @@
+/*
+ * P1022/P1013 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500v2_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,P1022";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial1;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+		vga = &display;
+		display = &display;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,P1022@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+
+		PowerPC,P1022@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			next-level-cache = <&L2>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1023rdb.dts b/arch/powerpc/boot/dts/fsl/p1023rdb.dts
new file mode 100644
index 0000000000..ead928364b
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1023rdb.dts
@@ -0,0 +1,260 @@
+/*
+ * P1023 RDB Device Tree Source
+ *
+ * Copyright 2013 - 2014 Freescale Semiconductor Inc.
+ *
+ * Author: Chunhe Lan <Chunhe.Lan@freescale.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1023si-pre.dtsi"
+
+/ {
+	model = "fsl,P1023";
+	compatible = "fsl,P1023RDB";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	memory {
+		device_type = "memory";
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	qportals: qman-portals@ff000000 {
+		ranges = <0x0 0xf 0xff000000 0x200000>;
+	};
+
+	bportals: bman-portals@ff200000 {
+		ranges = <0x0 0xf 0xff200000 0x200000>;
+	};
+
+	soc: soc@ff600000 {
+		ranges = <0x0 0x0 0xff600000 0x200000>;
+
+		i2c@3000 {
+			eeprom@53 {
+				compatible = "atmel,24c04";
+				reg = <0x53>;
+			};
+
+			rtc@6f {
+				compatible = "microchip,mcp7941x";
+				reg = <0x6f>;
+			};
+		};
+
+		usb@22000 {
+			dr_mode = "host";
+			phy_type = "ulpi";
+		};
+	};
+
+	lbc: localbus@ff605000 {
+		reg = <0 0xff605000 0 0x1000>;
+
+		/* NOR, NAND Flashes */
+		ranges = <0x0 0x0 0x0 0xec000000 0x04000000
+			  0x1 0x0 0x0 0xffa00000 0x08000000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x04000000>;
+			bank-width = <2>;
+			device-width = <1>;
+
+			partition@0 {
+				/* 48MB for Root File System */
+				reg = <0x00000000 0x03000000>;
+				label = "NOR Root File System";
+			};
+
+			partition@3000000 {
+				/* 1MB for DTB Image */
+				reg = <0x03000000 0x00100000>;
+				label = "NOR DTB Image";
+			};
+
+			partition@3100000 {
+				/* 14MB for Linux Kernel Image */
+				reg = <0x03100000 0x00e00000>;
+				label = "NOR Linux Kernel Image";
+			};
+
+			partition@3f00000 {
+				/* This location must not be altered  */
+				/* 512KB for u-boot Bootloader Image */
+				/* 512KB for u-boot Environment Variables */
+				reg = <0x03f00000 0x00100000>;
+				label = "NOR U-Boot Image";
+				read-only;
+			};
+		};
+
+		nand@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,elbc-fcm-nand";
+			reg = <0x1 0x0 0x40000>;
+
+			partition@0 {
+				/* This location must not be altered  */
+				/* 1MB for u-boot Bootloader Image */
+				reg = <0x0 0x00100000>;
+				label = "NAND U-Boot Image";
+				read-only;
+			};
+
+			partition@100000 {
+				/* 1MB for DTB Image */
+				reg = <0x00100000 0x00100000>;
+				label = "NAND DTB Image";
+			};
+
+			partition@200000 {
+				/* 14MB for Linux Kernel Image */
+				reg = <0x00200000 0x00e00000>;
+				label = "NAND Linux Kernel Image";
+			};
+
+			partition@1000000 {
+				/* 96MB for Root File System Image */
+				reg = <0x01000000 0x06000000>;
+				label = "NAND Root File System";
+			};
+
+			partition@7000000 {
+				/* 16MB for User Writable Area */
+				reg = <0x07000000 0x01000000>;
+				label = "NAND Writable User area";
+			};
+		};
+	};
+
+	pci0: pcie@ff60a000 {
+		reg = <0 0xff60a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>;
+		pcie@0 {
+			/* IRQ[0:3] are pulled up on board, set to active-low */
+			interrupt-map-mask = <0xf800 0 0 7>;
+			interrupt-map = <
+				/* IDSEL 0x0 */
+				0000 0 0 1 &mpic 0 1 0 0
+				0000 0 0 2 &mpic 1 1 0 0
+				0000 0 0 3 &mpic 2 1 0 0
+				0000 0 0 4 &mpic 3 1 0 0
+				>;
+			ranges = <0x2000000 0x0 0xc0000000
+				  0x2000000 0x0 0xc0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	board_pci1: pci1: pcie@ff609000 {
+		reg = <0 0xff609000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		pcie@0 {
+			/*
+			 * IRQ[4:6] only for PCIe, set to active-high,
+			 * IRQ[7] is pulled up on board, set to active-low
+			 */
+			interrupt-map-mask = <0xf800 0 0 7>;
+			interrupt-map = <
+				/* IDSEL 0x0 */
+				0000 0 0 1 &mpic 4 2 0 0
+				0000 0 0 2 &mpic 5 2 0 0
+				0000 0 0 3 &mpic 6 2 0 0
+				0000 0 0 4 &mpic 7 1 0 0
+				>;
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci2: pcie@ff60b000 {
+		reg = <0 0xff60b000 0 0x1000>;
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			/*
+			 * IRQ[8:10] are pulled up on board, set to active-low
+			 * IRQ[11] only for PCIe, set to active-high,
+			 */
+			interrupt-map-mask = <0xf800 0 0 7>;
+			interrupt-map = <
+				/* IDSEL 0x0 */
+				0000 0 0 1 &mpic 8 1 0 0
+				0000 0 0 2 &mpic 9 1 0 0
+				0000 0 0 3 &mpic 10 1 0 0
+				0000 0 0 4 &mpic 11 2 0 0
+				>;
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "p1023si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi
new file mode 100644
index 0000000000..da6d3fc6ba
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi
@@ -0,0 +1,307 @@
+/*
+ * P1023/P1017 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_fqd {
+	compatible = "fsl,qman-fqd";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_pfdr {
+	compatible = "fsl,qman-pfdr";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
+&lbc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,p1023-elbc", "fsl,elbc", "simple-bus";
+	interrupts = <19 2 0 0>,
+		     <16 2 0 0>;
+};
+
+/* controller at 0xa000 */
+&pci0 {
+	compatible = "fsl,p1023-pcie", "fsl,qoriq-pcie-v2.2";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 0 0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 0 0>;
+	};
+};
+
+/* controller at 0x9000 */
+&pci1 {
+	compatible = "fsl,p1023-pcie", "fsl,qoriq-pcie-v2.2";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 0 0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 0 0>;
+	};
+};
+
+/* controller at 0xb000 */
+&pci2 {
+	compatible = "fsl,p1023-pcie", "fsl,qoriq-pcie-v2.2";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 0 0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 0 0>;
+	};
+};
+
+&qportals {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "simple-bus";
+
+	qportal0: qman-portal@0 {
+		compatible = "fsl,qman-portal";
+		reg = <0x0 0x4000>, <0x100000 0x1000>;
+		interrupts = <29 2 0 0>;
+		cell-index = <0>;
+	};
+	qportal1: qman-portal@4000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x4000 0x4000>, <0x101000 0x1000>;
+		interrupts = <31 2 0 0>;
+		cell-index = <1>;
+	};
+	qportal2: qman-portal@8000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x8000 0x4000>, <0x102000 0x1000>;
+		interrupts = <33 2 0 0>;
+		cell-index = <2>;
+	};
+};
+
+&bportals {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "simple-bus";
+
+	bman-portal@0 {
+		compatible = "fsl,bman-portal";
+		reg = <0x0 0x4000>, <0x100000 0x1000>;
+		interrupts = <30 2 0 0>;
+	};
+	bman-portal@4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4000 0x4000>, <0x101000 0x1000>;
+		interrupts = <32 2 0 0>;
+	};
+	bman-portal@8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8000 0x4000>, <0x102000 0x1000>;
+		interrupts = <34 2 0 0>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "fsl,p1023-immr", "simple-bus";
+	bus-frequency = <0>;		// Filled out by uboot.
+
+	ecm-law@0 {
+		compatible = "fsl,ecm-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <12>;
+	};
+
+	ecm@1000 {
+		compatible = "fsl,p1023-ecm", "fsl,ecm";
+		reg = <0x1000 0x1000>;
+		interrupts = <16 2 0 0>;
+	};
+
+	memory-controller@2000 {
+		compatible = "fsl,p1023-memory-controller";
+		reg = <0x2000 0x1000>;
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-i2c-0.dtsi"
+/include/ "pq3-i2c-1.dtsi"
+/include/ "pq3-duart-0.dtsi"
+
+/include/ "pq3-espi-0.dtsi"
+	spi@7000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "pq3-gpio-0.dtsi"
+
+	L2: l2-cache-controller@20000 {
+		compatible = "fsl,p1023-l2-cache-controller";
+		reg = <0x20000 0x1000>;
+		cache-line-size = <32>;	// 32 bytes
+		cache-size = <0x40000>; // L2,256K
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-dma-0.dtsi"
+/include/ "pq3-usb2-dr-0.dtsi"
+	usb@22000 {
+		compatible = "fsl-usb2-dr-v1.6", "fsl-usb2-dr";
+	};
+
+	crypto: crypto@300000 {
+		compatible = "fsl,sec-v4.2", "fsl,sec-v4.0";
+		fsl,sec-era = <3>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x30000 0x10000>;
+		ranges = <0 0x30000 0x10000>;
+		interrupts = <58 2 0 0>;
+
+		sec_jr0: jr@1000 {
+			compatible = "fsl,sec-v4.2-job-ring",
+				     "fsl,sec-v4.0-job-ring";
+			reg = <0x1000 0x1000>;
+			interrupts = <45 2 0 0>;
+		};
+
+		sec_jr1: jr@2000 {
+			compatible = "fsl,sec-v4.2-job-ring",
+				     "fsl,sec-v4.0-job-ring";
+			reg = <0x2000 0x1000>;
+			interrupts = <45 2 0 0>;
+		};
+
+		sec_jr2: jr@3000 {
+			compatible = "fsl,sec-v4.2-job-ring",
+				     "fsl,sec-v4.0-job-ring";
+			reg = <0x3000 0x1000>;
+			interrupts = <57 2 0 0>;
+		};
+
+		sec_jr3: jr@4000 {
+			compatible = "fsl,sec-v4.2-job-ring",
+				     "fsl,sec-v4.0-job-ring";
+			reg = <0x4000 0x1000>;
+			interrupts = <57 2 0 0>;
+		};
+
+		rtic@6000 {
+			compatible = "fsl,sec-v4.2-rtic",
+				     "fsl,sec-v4.0-rtic";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			reg = <0x6000 0x100>;
+			ranges = <0x0 0x6100 0xe00>;
+
+			rtic_a: rtic-a@0 {
+				compatible = "fsl,sec-v4.2-rtic-memory",
+					     "fsl,sec-v4.0-rtic-memory";
+				reg = <0x00 0x20 0x100 0x80>;
+			};
+
+			rtic_b: rtic-b@20 {
+				compatible = "fsl,sec-v4.2-rtic-memory",
+					     "fsl,sec-v4.0-rtic-memory";
+				reg = <0x20 0x20 0x200 0x80>;
+			};
+
+			rtic_c: rtic-c@40 {
+				compatible = "fsl,sec-v4.2-rtic-memory",
+					     "fsl,sec-v4.0-rtic-memory";
+				reg = <0x40 0x20 0x300 0x80>;
+			};
+
+			rtic_d: rtic-d@60 {
+				compatible = "fsl,sec-v4.2-rtic-memory",
+					     "fsl,sec-v4.0-rtic-memory";
+				reg = <0x60 0x20 0x500 0x80>;
+			};
+		};
+	};
+
+/include/ "pq3-mpic.dtsi"
+/include/ "pq3-mpic-timer-B.dtsi"
+
+	qman: qman@88000 {
+		compatible = "fsl,qman";
+		reg = <0x88000 0x1000>;
+		interrupts = <16 2 0 0>;
+		fsl,qman-portals = <&qportals>;
+		memory-region = <&qman_fqd &qman_pfdr>;
+	};
+
+	bman: bman@8a000 {
+		compatible = "fsl,bman";
+		reg = <0x8a000 0x1000>;
+		interrupts = <16 2 0 0>;
+		fsl,bman-portals = <&bportals>;
+		memory-region = <&bman_fbpr>;
+	};
+
+	global-utilities@e0000 {
+		compatible = "fsl,p1023-guts";
+		reg = <0xe0000 0x1000>;
+		fsl,has-rstcr;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1023si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p1023si-pre.dtsi
new file mode 100644
index 0000000000..132a152192
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1023si-pre.dtsi
@@ -0,0 +1,79 @@
+/*
+ * P1023/P1017 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500v2_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,P1023";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+
+		crypto = &crypto;
+		sec_jr0 = &sec_jr0;
+		sec_jr1 = &sec_jr1;
+		sec_jr2 = &sec_jr2;
+		sec_jr3 = &sec_jr3;
+		rtic_a = &rtic_a;
+		rtic_b = &rtic_b;
+		rtic_c = &rtic_c;
+		rtic_d = &rtic_d;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,P1023@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+
+		PowerPC,P1023@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			next-level-cache = <&L2>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1024rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1024rdb.dtsi
new file mode 100644
index 0000000000..b4d05867f7
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1024rdb.dtsi
@@ -0,0 +1,228 @@
+/*
+ * P1024 RDB Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+	nor@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x1000000>;
+		bank-width = <2>;
+		device-width = <1>;
+
+		partition@0 {
+			/* This location must not be altered  */
+			/* 256KB for Vitesse 7385 Switch firmware */
+			reg = <0x0 0x00040000>;
+			label = "NOR Vitesse-7385 Firmware";
+			read-only;
+		};
+
+		partition@40000 {
+			/* 256KB for DTB Image */
+			reg = <0x00040000 0x00040000>;
+			label = "NOR DTB Image";
+		};
+
+		partition@80000 {
+			/* 3.5 MB for Linux Kernel Image */
+			reg = <0x00080000 0x00380000>;
+			label = "NOR Linux Kernel Image";
+		};
+
+		partition@400000 {
+			/* 11MB for JFFS2 based Root file System */
+			reg = <0x00400000 0x00b00000>;
+			label = "NOR JFFS2 Root File System";
+		};
+
+		partition@f00000 {
+			/* This location must not be altered  */
+			/* 512KB for u-boot Bootloader Image */
+			/* 512KB for u-boot Environment Variables */
+			reg = <0x00f00000 0x00100000>;
+			label = "NOR U-Boot Image";
+			read-only;
+		};
+	};
+
+	nand@1,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,p1020-fcm-nand",
+				 "fsl,elbc-fcm-nand";
+		reg = <0x1 0x0 0x40000>;
+
+		partition@0 {
+			/* This location must not be altered  */
+			/* 1MB for u-boot Bootloader Image */
+			reg = <0x0 0x00100000>;
+			label = "NAND U-Boot Image";
+			read-only;
+		};
+
+		partition@100000 {
+			/* 1MB for DTB Image */
+			reg = <0x00100000 0x00100000>;
+			label = "NAND DTB Image";
+		};
+
+		partition@200000 {
+			/* 4MB for Linux Kernel Image */
+			reg = <0x00200000 0x00400000>;
+			label = "NAND Linux Kernel Image";
+		};
+
+		partition@600000 {
+			/* 4MB for Compressed Root file System Image */
+			reg = <0x00600000 0x00400000>;
+			label = "NAND Compressed RFS Image";
+		};
+
+		partition@a00000 {
+			/* 15MB for JFFS2 based Root file System */
+			reg = <0x00a00000 0x00f00000>;
+			label = "NAND JFFS2 Root File System";
+		};
+
+		partition@1900000 {
+			/* 7MB for User Writable Area */
+			reg = <0x01900000 0x00700000>;
+			label = "NAND Writable User area";
+		};
+	};
+};
+
+&soc {
+	spi@7000 {
+		flash@0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "spansion,m25p80", "jedec,spi-nor";
+			reg = <0>;
+			spi-max-frequency = <40000000>;
+
+			partition@0 {
+				/* 512KB for u-boot Bootloader Image */
+				reg = <0x0 0x00080000>;
+				label = "SPI U-Boot Image";
+				read-only;
+			};
+
+			partition@80000 {
+				/* 512KB for DTB Image */
+				reg = <0x00080000 0x00080000>;
+				label = "SPI DTB Image";
+			};
+
+			partition@100000 {
+				/* 4MB for Linux Kernel Image */
+				reg = <0x00100000 0x00400000>;
+				label = "SPI Linux Kernel Image";
+			};
+
+			partition@500000 {
+				/* 4MB for Compressed RFS Image */
+				reg = <0x00500000 0x00400000>;
+				label = "SPI Compressed RFS Image";
+			};
+
+			partition@900000 {
+				/* 7MB for JFFS2 based RFS */
+				reg = <0x00900000 0x00700000>;
+				label = "SPI JFFS2 RFS";
+			};
+		};
+	};
+
+	i2c@3000 {
+		rtc@68 {
+			compatible = "dallas,ds1339";
+			reg = <0x68>;
+		};
+	};
+
+	usb@22000 {
+		phy_type = "ulpi";
+	};
+
+	usb@23000 {
+		status = "disabled";
+	};
+
+	mdio@24000 {
+		phy0: ethernet-phy@0 {
+			interrupts = <3 1 0 0>;
+			reg = <0x0>;
+		};
+		phy1: ethernet-phy@1 {
+			interrupts = <2 1 0 0>;
+			reg = <0x1>;
+		};
+		phy2: ethernet-phy@2 {
+			interrupts = <1 1 0 0>;
+			reg = <0x2>;
+		};
+	};
+
+	mdio@25000 {
+		tbi0: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	mdio@26000 {
+		tbi1: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	ethernet@b0000 {
+		phy-handle = <&phy2>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	ethernet@b1000 {
+		phy-handle = <&phy0>;
+		tbi-handle = <&tbi0>;
+		phy-connection-type = "sgmii";
+	};
+
+	ethernet@b2000 {
+		phy-handle = <&phy1>;
+		phy-connection-type = "rgmii-id";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1024rdb_32b.dts b/arch/powerpc/boot/dts/fsl/p1024rdb_32b.dts
new file mode 100644
index 0000000000..8b09b9d56a
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1024rdb_32b.dts
@@ -0,0 +1,87 @@
+/*
+ * P1024 RDB 32Bit Physical Address Map Device Tree Source
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1020si-pre.dtsi"
+/ {
+	model = "fsl,P1024RDB";
+	compatible = "fsl,P1024RDB";
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@ffe05000 {
+		reg = <0x0 0xffe05000 0 0x1000>;
+		ranges = <0x0 0x0 0x0 0xef000000 0x01000000
+			  0x1 0x0 0x0 0xff800000 0x00040000>;
+	};
+
+	soc: soc@ffe00000 {
+		ranges = <0x0 0x0 0xffe00000 0x100000>;
+	};
+
+	pci0: pcie@ffe09000 {
+		reg = <0x0 0xffe09000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0x0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0x0 0xffc10000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@ffe0a000 {
+		reg = <0x0 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0x0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0x0 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "p1024rdb.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1024rdb_36b.dts b/arch/powerpc/boot/dts/fsl/p1024rdb_36b.dts
new file mode 100644
index 0000000000..e7093aef28
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1024rdb_36b.dts
@@ -0,0 +1,87 @@
+/*
+ * P1024 RDB 36Bit Physical Address Map Device Tree Source
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1020si-pre.dtsi"
+/ {
+	model = "fsl,P1024RDB";
+	compatible = "fsl,P1024RDB";
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@fffe05000 {
+		reg = <0xf 0xffe05000 0 0x1000>;
+		ranges = <0x0 0x0 0xf 0xef000000 0x01000000
+			  0x1 0x0 0xf 0xff800000 0x00040000>;
+	};
+
+	soc: soc@fffe00000 {
+		ranges = <0x0 0xf 0xffe00000 0x100000>;
+	};
+
+	pci0: pcie@fffe09000 {
+		reg = <0xf 0xffe09000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@fffe0a000 {
+		reg = <0xf 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "p1024rdb.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi
new file mode 100644
index 0000000000..0a5434a631
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi
@@ -0,0 +1,326 @@
+/*
+ * P1025 RDB Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+	nor@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x1000000>;
+		bank-width = <2>;
+		device-width = <1>;
+
+		partition@0 {
+			/* This location must not be altered  */
+			/* 256KB for Vitesse 7385 Switch firmware */
+			reg = <0x0 0x00040000>;
+			label = "NOR Vitesse-7385 Firmware";
+			read-only;
+		};
+
+		partition@40000 {
+			/* 256KB for DTB Image */
+			reg = <0x00040000 0x00040000>;
+			label = "NOR DTB Image";
+		};
+
+		partition@80000 {
+			/* 3.5 MB for Linux Kernel Image */
+			reg = <0x00080000 0x00380000>;
+			label = "NOR Linux Kernel Image";
+		};
+
+		partition@400000 {
+			/* 11MB for JFFS2 based Root file System */
+			reg = <0x00400000 0x00b00000>;
+			label = "NOR JFFS2 Root File System";
+		};
+
+		partition@f00000 {
+			/* This location must not be altered  */
+			/* 512KB for u-boot Bootloader Image */
+			/* 512KB for u-boot Environment Variables */
+			reg = <0x00f00000 0x00100000>;
+			label = "NOR U-Boot Image";
+			read-only;
+		};
+	};
+
+	nand@1,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,p1025-fcm-nand",
+			     "fsl,elbc-fcm-nand";
+		reg = <0x1 0x0 0x40000>;
+
+		partition@0 {
+			/* This location must not be altered  */
+			/* 1MB for u-boot Bootloader Image */
+			reg = <0x0 0x00100000>;
+			label = "NAND U-Boot Image";
+			read-only;
+		};
+
+		partition@100000 {
+			/* 1MB for DTB Image */
+			reg = <0x00100000 0x00100000>;
+			label = "NAND DTB Image";
+		};
+
+		partition@200000 {
+			/* 4MB for Linux Kernel Image */
+			reg = <0x00200000 0x00400000>;
+			label = "NAND Linux Kernel Image";
+		};
+
+		partition@600000 {
+			/* 4MB for Compressed Root file System Image */
+			reg = <0x00600000 0x00400000>;
+			label = "NAND Compressed RFS Image";
+		};
+
+		partition@a00000 {
+			/* 7MB for JFFS2 based Root file System */
+			reg = <0x00a00000 0x00700000>;
+			label = "NAND JFFS2 Root File System";
+		};
+
+		partition@1100000 {
+			/* 15MB for JFFS2 based Root file System */
+			reg = <0x01100000 0x00f00000>;
+			label = "NAND Writable User area";
+		};
+	};
+
+};
+
+&soc {
+	i2c@3000 {
+		rtc@68 {
+			compatible = "dallas,ds1339";
+			reg = <0x68>;
+		};
+	};
+
+	spi@7000 {
+		flash@0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "spansion,s25sl12801", "jedec,spi-nor";
+			reg = <0>;
+			spi-max-frequency = <40000000>; /* input clock */
+
+			partition@u-boot {
+				/* 512KB for u-boot Bootloader Image */
+				reg = <0x0 0x00080000>;
+				label = "u-boot";
+				read-only;
+			};
+
+			partition@dtb {
+				/* 512KB for DTB Image */
+				reg = <0x00080000 0x00080000>;
+				label = "dtb";
+			};
+
+			partition@kernel {
+				/* 4MB for Linux Kernel Image */
+				reg = <0x00100000 0x00400000>;
+				label = "kernel";
+			};
+
+			partition@fs {
+				/* 4MB for Compressed RFS Image */
+				reg = <0x00500000 0x00400000>;
+				label = "file system";
+			};
+
+			partition@jffs-fs {
+				/* 7MB for JFFS2 based RFS */
+				reg = <0x00900000 0x00700000>;
+				label = "file system jffs2";
+			};
+		};
+	};
+
+	usb@22000 {
+		phy_type = "ulpi";
+	};
+
+	/* USB2 is shared with localbus, so it must be disabled
+	   by default. We can't put 'status = "disabled";' here
+	   since U-Boot doesn't clear the status property when
+	   it enables USB2. OTOH, U-Boot does create a new node
+	   when there isn't any. So, just comment it out.
+	usb@23000 {
+		phy_type = "ulpi";
+	};
+	*/
+
+	mdio@24000 {
+		phy0: ethernet-phy@0 {
+			interrupt-parent = <&mpic>;
+			interrupts = <3 1>;
+			reg = <0x0>;
+		};
+
+		phy1: ethernet-phy@1 {
+			interrupt-parent = <&mpic>;
+			interrupts = <2 1>;
+			reg = <0x1>;
+		};
+
+		tbi0: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	mdio@25000 {
+		tbi1: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	mdio@26000 {
+		tbi2: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	enet0: ethernet@b0000 {
+		fixed-link = <1 1 1000 0 0>;
+		phy-connection-type = "rgmii-id";
+
+	};
+
+	enet1: ethernet@b1000 {
+		phy-handle = <&phy0>;
+		tbi-handle = <&tbi1>;
+		phy-connection-type = "sgmii";
+	};
+
+	enet2: ethernet@b2000 {
+		phy-handle = <&phy1>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	par_io@e0100 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0xe0100 0x60>;
+		ranges = <0x0 0xe0100 0x60>;
+		device_type = "par_io";
+		num-ports = <3>;
+		pio1: ucc_pin@1 {
+			pio-map = <
+		/* port  pin  dir  open_drain  assignment  has_irq */
+				0x1  0x13 0x1  0x0  0x1  0x0    /* QE_MUX_MDC */
+				0x1  0x14 0x3  0x0  0x1  0x0    /* QE_MUX_MDIO */
+				0x0  0x17 0x2  0x0  0x2  0x0    /* CLK12 */
+				0x0  0x18 0x2  0x0  0x1  0x0    /* CLK9 */
+				0x0  0x7  0x1  0x0  0x2  0x0    /* ENET1_TXD0_SER1_TXD0 */
+				0x0  0x9  0x1  0x0  0x2  0x0    /* ENET1_TXD1_SER1_TXD1 */
+				0x0  0xb  0x1  0x0  0x2  0x0    /* ENET1_TXD2_SER1_TXD2 */
+				0x0  0xc  0x1  0x0  0x2  0x0    /* ENET1_TXD3_SER1_TXD3 */
+				0x0  0x6  0x2  0x0  0x2  0x0    /* ENET1_RXD0_SER1_RXD0 */
+				0x0  0xa  0x2  0x0  0x2  0x0    /* ENET1_RXD1_SER1_RXD1 */
+				0x0  0xe  0x2  0x0  0x2  0x0    /* ENET1_RXD2_SER1_RXD2 */
+				0x0  0xf  0x2  0x0  0x2  0x0    /* ENET1_RXD3_SER1_RXD3 */
+				0x0  0x5  0x1  0x0  0x2  0x0    /* ENET1_TX_EN_SER1_RTS_B */
+				0x0  0xd  0x1  0x0  0x2  0x0    /* ENET1_TX_ER */
+				0x0  0x4  0x2  0x0  0x2  0x0    /* ENET1_RX_DV_SER1_CTS_B */
+				0x0  0x8  0x2  0x0  0x2  0x0    /* ENET1_RX_ER_SER1_CD_B */
+				0x0  0x11 0x2  0x0  0x2  0x0    /* ENET1_CRS */
+				0x0  0x10 0x2  0x0  0x2  0x0>;    /* ENET1_COL */
+		};
+
+		pio2: ucc_pin@2 {
+			pio-map = <
+		/* port  pin  dir  open_drain  assignment  has_irq */
+				0x1  0x13 0x1  0x0  0x1  0x0    /* QE_MUX_MDC */
+				0x1  0x14 0x3  0x0  0x1  0x0    /* QE_MUX_MDIO */
+				0x1  0xb  0x2  0x0  0x1  0x0    /* CLK13 */
+				0x1  0x7  0x1  0x0  0x2  0x0    /* ENET5_TXD0_SER5_TXD0 */
+				0x1  0xa  0x1  0x0  0x2  0x0    /* ENET5_TXD1_SER5_TXD1 */
+				0x1  0x6  0x2  0x0  0x2  0x0    /* ENET5_RXD0_SER5_RXD0 */
+				0x1  0x9  0x2  0x0  0x2  0x0    /* ENET5_RXD1_SER5_RXD1 */
+				0x1  0x5  0x1  0x0  0x2  0x0    /* ENET5_TX_EN_SER5_RTS_B */
+				0x1  0x4  0x2  0x0  0x2  0x0    /* ENET5_RX_DV_SER5_CTS_B */
+				0x1  0x8  0x2  0x0  0x2  0x0>;    /* ENET5_RX_ER_SER5_CD_B */
+		};
+
+		pio3: ucc_pin@3 {
+			pio-map = <
+		/* port  pin  dir  open_drain  assignment  has_irq */
+				0x0  0x16 0x2  0x0  0x2  0x0    /* SER7_CD_B*/
+				0x0  0x12 0x2  0x0  0x2  0x0    /* SER7_CTS_B*/
+				0x0  0x13 0x1  0x0  0x2  0x0    /* SER7_RTS_B*/
+				0x0  0x14 0x2  0x0  0x2  0x0    /* SER7_RXD0*/
+				0x0  0x15 0x1  0x0  0x2  0x0>;    /* SER7_TXD0*/
+		};
+
+		pio4: ucc_pin@4 {
+			pio-map = <
+		/* port  pin  dir  open_drain  assignment  has_irq */
+				0x1  0x0  0x2  0x0  0x2  0x0    /* SER3_CD_B*/
+				0x0  0x1c 0x2  0x0  0x2  0x0    /* SER3_CTS_B*/
+				0x0  0x1d 0x1  0x0  0x2  0x0    /* SER3_RTS_B*/
+				0x0  0x1e 0x2  0x0  0x2  0x0    /* SER3_RXD0*/
+				0x0  0x1f 0x1  0x0  0x2  0x0>;    /* SER3_TXD0*/
+		};
+	};
+};
+
+&qe {
+	serial2: ucc@2600 {
+		device_type = "serial";
+		compatible = "ucc_uart";
+		port-number = <0>;
+		rx-clock-name = "brg6";
+		tx-clock-name = "brg6";
+		pio-handle = <&pio3>;
+	};
+
+	serial3: ucc@2200 {
+		device_type = "serial";
+		compatible = "ucc_uart";
+		port-number = <1>;
+		rx-clock-name = "brg2";
+		tx-clock-name = "brg2";
+		pio-handle = <&pio4>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts b/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts
new file mode 100644
index 0000000000..ea33b57f87
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts
@@ -0,0 +1,133 @@
+/*
+ * P1025 RDB Device Tree Source (32-bit address map)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1021si-pre.dtsi"
+/ {
+	model = "fsl,P1025RDB";
+	compatible = "fsl,P1025RDB";
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@ffe05000 {
+		reg = <0 0xffe05000 0 0x1000>;
+
+		/* NOR, NAND Flashes */
+		ranges = <0x0 0x0 0x0 0xef000000 0x01000000
+			  0x1 0x0 0x0 0xff800000 0x00040000>;
+	};
+
+	soc: soc@ffe00000 {
+		ranges = <0x0 0x0 0xffe00000 0x100000>;
+	};
+
+	pci0: pcie@ffe09000 {
+		ranges = <0x2000000 0x0 0xe0000000 0 0xe0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		reg = <0 0xffe09000 0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@ffe0a000 {
+		reg = <0 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0 0xe0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	qe: qe@ffe80000 {
+		ranges = <0x0 0x0 0xffe80000 0x40000>;
+		reg = <0 0xffe80000 0 0x480>;
+		brg-frequency = <0>;
+		bus-frequency = <0>;
+		status = "disabled"; /* no firmware loaded */
+
+		enet3: ucc@2000 {
+			device_type = "network";
+			compatible = "ucc_geth";
+			rx-clock-name = "clk12";
+			tx-clock-name = "clk9";
+			pio-handle = <&pio1>;
+			phy-handle = <&qe_phy0>;
+			phy-connection-type = "mii";
+		};
+
+		mdio@2120 {
+			qe_phy0: ethernet-phy@0 {
+				interrupt-parent = <&mpic>;
+				interrupts = <4 1 0 0>;
+				reg = <0x6>;
+			};
+			qe_phy1: ethernet-phy@3 {
+				interrupt-parent = <&mpic>;
+				interrupts = <5 1 0 0>;
+				reg = <0x3>;
+			};
+			tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet4: ucc@2400 {
+			device_type = "network";
+			compatible = "ucc_geth";
+			rx-clock-name = "none";
+			tx-clock-name = "clk13";
+			pio-handle = <&pio2>;
+			phy-handle = <&qe_phy1>;
+			phy-connection-type = "rmii";
+		};
+	};
+};
+
+/include/ "p1025rdb.dtsi"
+/include/ "p1021si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1025rdb_36b.dts b/arch/powerpc/boot/dts/fsl/p1025rdb_36b.dts
new file mode 100644
index 0000000000..b0ded5e8bd
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1025rdb_36b.dts
@@ -0,0 +1,93 @@
+/*
+ * P1025 RDB Device Tree Source (36-bit address map)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1021si-pre.dtsi"
+/ {
+	model = "fsl,P1025RDB";
+	compatible = "fsl,P1025RDB";
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@fffe05000 {
+		reg = <0xf 0xffe05000 0 0x1000>;
+
+		/* NOR, NAND Flashes */
+		ranges = <0x0 0x0 0xf 0xef000000 0x01000000
+			  0x1 0x0 0xf 0xff800000 0x00040000>;
+	};
+
+	soc: soc@fffe00000 {
+		ranges = <0x0 0xf 0xffe00000 0x100000>;
+	};
+
+	pci0: pcie@fffe09000 {
+		reg = <0xf 0xffe09000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0xe 0x20000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@fffe0a000 {
+		reg = <0xf 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	qe: qe@fffe80000 {
+		status = "disabled"; /* no firmware loaded */
+	};
+
+};
+
+/include/ "p1025rdb.dtsi"
+/include/ "p1021si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1025twr.dts b/arch/powerpc/boot/dts/fsl/p1025twr.dts
new file mode 100644
index 0000000000..9b8863b74b
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1025twr.dts
@@ -0,0 +1,95 @@
+/*
+ * P1025 TWR Device Tree Source (32-bit address map)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p1021si-pre.dtsi"
+/ {
+	model = "fsl,P1025";
+	compatible = "fsl,TWR-P1025";
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@ffe05000 {
+		reg = <0 0xffe05000 0 0x1000>;
+
+		/* NOR Flash and SSD1289 */
+		ranges = <0x0 0x0 0x0 0xec000000 0x04000000
+			  0x2 0x0 0x0 0xe0000000 0x00020000>;
+	};
+
+	soc: soc@ffe00000 {
+		ranges = <0x0 0x0 0xffe00000 0x100000>;
+	};
+
+	pci0: pcie@ffe09000 {
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		reg = <0 0xffe09000 0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@ffe0a000 {
+		reg = <0 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	qe: qe@ffe80000 {
+		ranges = <0x0 0x0 0xffe80000 0x40000>;
+		reg = <0 0xffe80000 0 0x480>;
+		brg-frequency = <0>;
+		bus-frequency = <0>;
+	};
+};
+
+/include/ "p1025twr.dtsi"
+/include/ "p1021si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1025twr.dtsi b/arch/powerpc/boot/dts/fsl/p1025twr.dtsi
new file mode 100644
index 0000000000..ab75b8f29a
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1025twr.dtsi
@@ -0,0 +1,292 @@
+/*
+ * P1025 TWR Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/{
+       aliases {
+		ethernet3 = &enet3;
+		ethernet4 = &enet4;
+       };
+};
+
+&lbc {
+	nor@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x4000000>;
+		bank-width = <2>;
+		device-width = <1>;
+
+		partition@0 {
+			/* This location must not be altered  */
+			/* 256KB for Vitesse 7385 Switch firmware */
+			reg = <0x0 0x00040000>;
+			label = "NOR Vitesse-7385 Firmware";
+			read-only;
+		};
+
+		partition@40000 {
+			/* 256KB for DTB Image */
+			reg = <0x00040000 0x00040000>;
+			label = "NOR DTB Image";
+		};
+
+		partition@80000 {
+			/* 5.5 MB for Linux Kernel Image */
+			reg = <0x00080000 0x00580000>;
+			label = "NOR Linux Kernel Image";
+		};
+
+		partition@400000 {
+			/* 56.75MB for Root file System */
+			reg = <0x00600000 0x038c0000>;
+			label = "NOR Root File System";
+		};
+
+		partition@ec0000 {
+			/* This location must not be altered  */
+			/* 256KB for QE ucode firmware*/
+			reg = <0x03ec0000 0x00040000>;
+			label = "NOR QE microcode firmware";
+			read-only;
+		};
+
+		partition@f00000 {
+			/* This location must not be altered  */
+			/* 512KB for u-boot Bootloader Image */
+			/* 512KB for u-boot Environment Variables */
+			reg = <0x03f00000 0x00100000>;
+			label = "NOR U-Boot Image";
+			read-only;
+		};
+	};
+
+	/* CS2 for Display */
+	display@2,0 {
+		compatible = "solomon,ssd1289fb";
+		reg = <0x2 0x0000 0x0004>;
+	};
+
+};
+
+&soc {
+	usb@22000 {
+		phy_type = "ulpi";
+	};
+
+	mdio@24000 {
+		phy0: ethernet-phy@2 {
+			interrupt-parent = <&mpic>;
+			interrupts = <1 1 0 0>;
+			reg = <0x2>;
+		};
+
+		phy1: ethernet-phy@1 {
+			interrupt-parent = <&mpic>;
+			interrupts = <2 1 0 0>;
+			reg = <0x1>;
+		};
+
+		tbi0: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	mdio@25000 {
+		tbi1: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	mdio@26000 {
+		tbi2: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	ptp_clock@b0e00 {
+		compatible = "fsl,etsec-ptp";
+		reg = <0xb0e00 0xb0>;
+		interrupts = <68 2 0 0 69 2 0 0>;
+		fsl,tclk-period	= <10>;
+		fsl,tmr-prsc	= <2>;
+		fsl,tmr-add	= <0xc0000021>;
+		fsl,tmr-fiper1	= <999999990>;
+		fsl,tmr-fiper2	= <99990>;
+		fsl,max-adj	= <133333332>;
+	};
+
+	enet0: ethernet@b0000 {
+		phy-handle = <&phy0>;
+		phy-connection-type = "rgmii-id";
+
+	};
+
+	enet1: ethernet@b1000 {
+		status = "disabled";
+	};
+
+	enet2: ethernet@b2000 {
+		phy-handle = <&phy1>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	par_io@e0100 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0xe0100 0x60>;
+		ranges = <0x0 0xe0100 0x60>;
+		device_type = "par_io";
+		num-ports = <3>;
+		pio1: ucc_pin@1 {
+			pio-map = <
+		/* port  pin  dir  open_drain  assignment  has_irq */
+				0x1  0x13 0x1  0x0  0x1  0x0    /* QE_MUX_MDC */
+				0x1  0x14 0x3  0x0  0x1  0x0    /* QE_MUX_MDIO */
+				0x0  0x17 0x2  0x0  0x2  0x0    /* CLK12 */
+				0x0  0x18 0x2  0x0  0x1  0x0    /* CLK9 */
+				0x0  0x7  0x1  0x0  0x2  0x0    /* ENET1_TXD0_SER1_TXD0 */
+				0x0  0x9  0x1  0x0  0x2  0x0    /* ENET1_TXD1_SER1_TXD1 */
+				0x0  0xb  0x1  0x0  0x2  0x0    /* ENET1_TXD2_SER1_TXD2 */
+				0x0  0xc  0x1  0x0  0x2  0x0    /* ENET1_TXD3_SER1_TXD3 */
+				0x0  0x6  0x2  0x0  0x2  0x0    /* ENET1_RXD0_SER1_RXD0 */
+				0x0  0xa  0x2  0x0  0x2  0x0    /* ENET1_RXD1_SER1_RXD1 */
+				0x0  0xe  0x2  0x0  0x2  0x0    /* ENET1_RXD2_SER1_RXD2 */
+				0x0  0xf  0x2  0x0  0x2  0x0    /* ENET1_RXD3_SER1_RXD3 */
+				0x0  0x5  0x1  0x0  0x2  0x0    /* ENET1_TX_EN_SER1_RTS_B */
+				0x0  0xd  0x1  0x0  0x2  0x0    /* ENET1_TX_ER */
+				0x0  0x4  0x2  0x0  0x2  0x0    /* ENET1_RX_DV_SER1_CTS_B */
+				0x0  0x8  0x2  0x0  0x2  0x0    /* ENET1_RX_ER_SER1_CD_B */
+				0x0  0x11 0x2  0x0  0x2  0x0    /* ENET1_CRS */
+				0x0  0x10 0x2  0x0  0x2  0x0>;    /* ENET1_COL */
+		};
+
+		pio2: ucc_pin@2 {
+			pio-map = <
+		/* port  pin  dir  open_drain  assignment  has_irq */
+				0x1  0x13 0x1  0x0  0x1  0x0    /* QE_MUX_MDC */
+				0x1  0x14 0x3  0x0  0x1  0x0    /* QE_MUX_MDIO */
+				0x1  0xb  0x2  0x0  0x1  0x0    /* CLK13 */
+				0x1  0x7  0x1  0x0  0x2  0x0    /* ENET5_TXD0_SER5_TXD0 */
+				0x1  0xa  0x1  0x0  0x2  0x0    /* ENET5_TXD1_SER5_TXD1 */
+				0x1  0x6  0x2  0x0  0x2  0x0    /* ENET5_RXD0_SER5_RXD0 */
+				0x1  0x9  0x2  0x0  0x2  0x0    /* ENET5_RXD1_SER5_RXD1 */
+				0x1  0x5  0x1  0x0  0x2  0x0    /* ENET5_TX_EN_SER5_RTS_B */
+				0x1  0x4  0x2  0x0  0x2  0x0    /* ENET5_RX_DV_SER5_CTS_B */
+				0x1  0x8  0x2  0x0  0x2  0x0>;    /* ENET5_RX_ER_SER5_CD_B */
+		};
+
+		pio3: ucc_pin@3 {
+			pio-map = <
+		/* port  pin  dir  open_drain  assignment  has_irq */
+				0x0  0x16 0x2  0x0  0x2  0x0    /* SER7_CD_B*/
+				0x0  0x12 0x2  0x0  0x2  0x0    /* SER7_CTS_B*/
+				0x0  0x13 0x1  0x0  0x2  0x0    /* SER7_RTS_B*/
+				0x0  0x14 0x2  0x0  0x2  0x0    /* SER7_RXD0*/
+				0x0  0x15 0x1  0x0  0x2  0x0>;    /* SER7_TXD0*/
+		};
+
+		pio4: ucc_pin@4 {
+			pio-map = <
+		/* port  pin  dir  open_drain  assignment  has_irq */
+				0x1  0x0  0x2  0x0  0x2  0x0    /* SER3_CD_B*/
+				0x0  0x1c 0x2  0x0  0x2  0x0    /* SER3_CTS_B*/
+				0x0  0x1d 0x1  0x0  0x2  0x0    /* SER3_RTS_B*/
+				0x0  0x1e 0x2  0x0  0x2  0x0    /* SER3_RXD0*/
+				0x0  0x1f 0x1  0x0  0x2  0x0>;    /* SER3_TXD0*/
+		};
+	};
+};
+
+&qe {
+	enet3: ucc@2000 {
+		device_type = "network";
+		compatible = "ucc_geth";
+		rx-clock-name = "clk12";
+		tx-clock-name = "clk9";
+		pio-handle = <&pio1>;
+		phy-handle = <&qe_phy0>;
+		phy-connection-type = "mii";
+	};
+
+	mdio@2120 {
+		qe_phy0: ethernet-phy@18 {
+			interrupt-parent = <&mpic>;
+			interrupts = <4 1 0 0>;
+			reg = <0x18>;
+			device_type = "ethernet-phy";
+		};
+		qe_phy1: ethernet-phy@19 {
+			interrupt-parent = <&mpic>;
+			interrupts = <5 1 0 0>;
+			reg = <0x19>;
+			device_type = "ethernet-phy";
+		};
+		tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	enet4: ucc@2400 {
+		device_type = "network";
+		compatible = "ucc_geth";
+		rx-clock-name = "none";
+		tx-clock-name = "clk13";
+		pio-handle = <&pio2>;
+		phy-handle = <&qe_phy1>;
+		phy-connection-type = "rmii";
+	};
+
+	serial2: ucc@2600 {
+		device_type = "serial";
+		compatible = "ucc_uart";
+		port-number = <0>;
+		rx-clock-name = "brg6";
+		tx-clock-name = "brg6";
+		pio-handle = <&pio3>;
+	};
+
+	serial3: ucc@2200 {
+		device_type = "serial";
+		compatible = "ucc_uart";
+		port-number = <1>;
+		rx-clock-name = "brg2";
+		tx-clock-name = "brg2";
+		pio-handle = <&pio4>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p2020ds.dts b/arch/powerpc/boot/dts/fsl/p2020ds.dts
new file mode 100644
index 0000000000..ae380ebe55
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p2020ds.dts
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * P2020 DS Device Tree Source
+ *
+ * Copyright 2009-2011 Freescale Semiconductor Inc.
+ */
+
+/include/ "p2020si-pre.dtsi"
+
+/ {
+	model = "fsl,P2020DS";
+	compatible = "fsl,P2020DS";
+
+	memory {
+		device_type = "memory";
+	};
+
+	board_lbc: lbc: localbus@ffe05000 {
+		ranges = <0x0 0x0 0x0 0xe8000000 0x08000000
+			  0x1 0x0 0x0 0xe0000000 0x08000000
+			  0x2 0x0 0x0 0xffa00000 0x00040000
+			  0x3 0x0 0x0 0xffdf0000 0x00008000
+			  0x4 0x0 0x0 0xffa40000 0x00040000
+			  0x5 0x0 0x0 0xffa80000 0x00040000
+			  0x6 0x0 0x0 0xffac0000 0x00040000>;
+		reg = <0 0xffe05000 0 0x1000>;
+	};
+
+	board_soc: soc: soc@ffe00000 {
+		ranges = <0x0 0x0 0xffe00000 0x100000>;
+	};
+
+	pci2: pcie@ffe08000 {
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		reg = <0 0xffe08000 0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+	};
+
+	board_pci1: pci1: pcie@ffe09000 {
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		reg = <0 0xffe09000 0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+	};
+
+	pci0: pcie@ffe0a000 {
+		ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>;
+		reg = <0 0xffe0a000 0 0x1000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xc0000000
+				  0x2000000 0x0 0xc0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+	};
+};
+
+/*
+ * p2020ds.dtsi must be last to ensure board_pci0 overrides pci0 settings
+ * for interrupt-map & interrupt-map-mask
+ */
+
+/include/ "p2020si-post.dtsi"
+/include/ "p2020ds.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p2020ds.dtsi b/arch/powerpc/boot/dts/fsl/p2020ds.dtsi
new file mode 100644
index 0000000000..e699cf95b0
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p2020ds.dtsi
@@ -0,0 +1,327 @@
+/*
+ * P2020DS Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2011-2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&board_lbc {
+	nor@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x8000000>;
+		bank-width = <2>;
+		device-width = <1>;
+
+		ramdisk@0 {
+			reg = <0x0 0x03000000>;
+			read-only;
+		};
+
+		diagnostic@3000000 {
+			reg = <0x03000000 0x00e00000>;
+			read-only;
+		};
+
+		dink@3e00000 {
+			reg = <0x03e00000 0x00200000>;
+			read-only;
+		};
+
+		kernel@4000000 {
+			reg = <0x04000000 0x00400000>;
+			read-only;
+		};
+
+		jffs2@4400000 {
+			reg = <0x04400000 0x03b00000>;
+		};
+
+		dtb@7f00000 {
+			reg = <0x07f00000 0x00080000>;
+			read-only;
+		};
+
+		u-boot@7f80000 {
+			reg = <0x07f80000 0x00080000>;
+			read-only;
+		};
+	};
+
+	nand@2,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,elbc-fcm-nand";
+		reg = <0x2 0x0 0x40000>;
+
+		u-boot@0 {
+			reg = <0x0 0x02000000>;
+			read-only;
+		};
+
+		jffs2@2000000 {
+			reg = <0x02000000 0x10000000>;
+		};
+
+		ramdisk@12000000 {
+			reg = <0x12000000 0x08000000>;
+			read-only;
+		};
+
+		kernel@1a000000 {
+			reg = <0x1a000000 0x04000000>;
+		};
+
+		dtb@1e000000 {
+			reg = <0x1e000000 0x01000000>;
+			read-only;
+		};
+
+		empty@1f000000 {
+			reg = <0x1f000000 0x21000000>;
+		};
+	};
+
+	board-control@3,0 {
+		compatible = "fsl,p2020ds-fpga", "fsl,fpga-ngpixis";
+		reg = <0x3 0x0 0x30>;
+	};
+
+	nand@4,0 {
+		compatible = "fsl,elbc-fcm-nand";
+		reg = <0x4 0x0 0x40000>;
+	};
+
+	nand@5,0 {
+		compatible = "fsl,elbc-fcm-nand";
+		reg = <0x5 0x0 0x40000>;
+	};
+
+	nand@6,0 {
+		compatible = "fsl,elbc-fcm-nand";
+		reg = <0x6 0x0 0x40000>;
+	};
+};
+
+&board_soc {
+	usb@22000 {
+		phy_type = "ulpi";
+		dr_mode = "host";
+	};
+
+	mdio@24520 {
+		phy0: ethernet-phy@0 {
+			interrupts = <3 1 0 0>;
+			reg = <0x0>;
+		};
+		phy1: ethernet-phy@1 {
+			interrupts = <3 1 0 0>;
+			reg = <0x1>;
+		};
+		phy2: ethernet-phy@2 {
+			interrupts = <3 1 0 0>;
+			reg = <0x2>;
+		};
+
+		sgmii_phy1: sgmii-phy@1 {
+			interrupts = <5 1 0 0>;
+			reg = <0x1c>;
+		};
+		sgmii_phy2: sgmii-phy@2 {
+			interrupts = <5 1 0 0>;
+			reg = <0x1d>;
+		};
+
+		tbi0: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+
+	};
+
+	mdio@25520 {
+		tbi1: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	mdio@26520 {
+		tbi2: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+
+	};
+
+	ptp_clock@24e00 {
+		fsl,tclk-period = <5>;
+		fsl,tmr-prsc = <200>;
+		fsl,tmr-add = <0xCCCCCCCD>;
+		fsl,tmr-fiper1 = <0x3B9AC9FB>;
+		fsl,tmr-fiper2 = <0x0001869B>;
+		fsl,max-adj = <249999999>;
+	};
+
+	enet0: ethernet@24000 {
+		tbi-handle = <&tbi0>;
+		phy-handle = <&phy0>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	enet1: ethernet@25000 {
+		tbi-handle = <&tbi1>;
+		phy-handle = <&phy1>;
+		phy-connection-type = "rgmii-id";
+
+	};
+
+	enet2: ethernet@26000 {
+		tbi-handle = <&tbi2>;
+		phy-handle = <&phy2>;
+		phy-connection-type = "rgmii-id";
+	};
+};
+
+&board_pci1 {
+	pcie@0 {
+		interrupt-map-mask = <0xff00 0x0 0x0 0x7>;
+		interrupt-map = <
+
+			// IDSEL 0x11 func 0 - PCI slot 1
+			0x8800 0x0 0x0 0x1 &i8259 0x9 0x2
+			0x8800 0x0 0x0 0x2 &i8259 0xa 0x2
+
+			// IDSEL 0x11 func 1 - PCI slot 1
+			0x8900 0x0 0x0 0x1 &i8259 0x9 0x2
+			0x8900 0x0 0x0 0x2 &i8259 0xa 0x2
+
+			// IDSEL 0x11 func 2 - PCI slot 1
+			0x8a00 0x0 0x0 0x1 &i8259 0x9 0x2
+			0x8a00 0x0 0x0 0x2 &i8259 0xa 0x2
+
+			// IDSEL 0x11 func 3 - PCI slot 1
+			0x8b00 0x0 0x0 0x1 &i8259 0x9 0x2
+			0x8b00 0x0 0x0 0x2 &i8259 0xa 0x2
+
+			// IDSEL 0x11 func 4 - PCI slot 1
+			0x8c00 0x0 0x0 0x1 &i8259 0x9 0x2
+			0x8c00 0x0 0x0 0x2 &i8259 0xa 0x2
+
+			// IDSEL 0x11 func 5 - PCI slot 1
+			0x8d00 0x0 0x0 0x1 &i8259 0x9 0x2
+			0x8d00 0x0 0x0 0x2 &i8259 0xa 0x2
+
+			// IDSEL 0x11 func 6 - PCI slot 1
+			0x8e00 0x0 0x0 0x1 &i8259 0x9 0x2
+			0x8e00 0x0 0x0 0x2 &i8259 0xa 0x2
+
+			// IDSEL 0x11 func 7 - PCI slot 1
+			0x8f00 0x0 0x0 0x1 &i8259 0x9 0x2
+			0x8f00 0x0 0x0 0x2 &i8259 0xa 0x2
+
+			// IDSEL 0x1d  Audio
+			0xe800 0x0 0x0 0x1 &i8259 0x6 0x2
+
+			// IDSEL 0x1e Legacy
+			0xf000 0x0 0x0 0x1 &i8259 0x7 0x2
+			0xf100 0x0 0x0 0x1 &i8259 0x7 0x2
+
+			// IDSEL 0x1f IDE/SATA
+			0xf800 0x0 0x0 0x1 &i8259 0xe 0x2
+			0xf900 0x0 0x0 0x1 &i8259 0x5 0x2
+			>;
+
+		uli1575@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+			isa@1e {
+				device_type = "isa";
+				#interrupt-cells = <2>;
+				#size-cells = <1>;
+				#address-cells = <2>;
+				reg = <0xf000 0x0 0x0 0x0 0x0>;
+				ranges = <0x1 0x0 0x1000000 0x0 0x0
+					  0x1000>;
+				interrupt-parent = <&i8259>;
+
+				i8259: interrupt-controller@20 {
+					reg = <0x1 0x20 0x2
+					       0x1 0xa0 0x2
+					       0x1 0x4d0 0x2>;
+					interrupt-controller;
+					device_type = "interrupt-controller";
+					#address-cells = <0>;
+					#interrupt-cells = <2>;
+					compatible = "chrp,iic";
+					interrupts = <4 1 0 0>;
+					interrupt-parent = <&mpic>;
+				};
+
+				i8042@60 {
+					#size-cells = <0>;
+					#address-cells = <1>;
+					reg = <0x1 0x60 0x1 0x1 0x64 0x1>;
+					interrupts = <1 3 12 3>;
+					interrupt-parent =
+						<&i8259>;
+
+					keyboard@0 {
+						reg = <0x0>;
+						compatible = "pnpPNP,303";
+					};
+
+					mouse@1 {
+						reg = <0x1>;
+						compatible = "pnpPNP,f03";
+					};
+				};
+
+				rtc@70 {
+					compatible = "pnpPNP,b00";
+					reg = <0x1 0x70 0x2>;
+				};
+
+				gpio@400 {
+					reg = <0x1 0x400 0x80>;
+				};
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p2020rdb-pc.dtsi b/arch/powerpc/boot/dts/fsl/p2020rdb-pc.dtsi
new file mode 100644
index 0000000000..03c9afc824
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p2020rdb-pc.dtsi
@@ -0,0 +1,241 @@
+/*
+ * P2020 RDB-PC Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+	nor@0,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x1000000>;
+		bank-width = <2>;
+		device-width = <1>;
+
+		partition@0 {
+			/* This location must not be altered  */
+			/* 256KB for Vitesse 7385 Switch firmware */
+			reg = <0x0 0x00040000>;
+			label = "NOR Vitesse-7385 Firmware";
+			read-only;
+		};
+
+		partition@40000 {
+			/* 256KB for DTB Image */
+			reg = <0x00040000 0x00040000>;
+			label = "NOR DTB Image";
+		};
+
+		partition@80000 {
+			/* 3.5 MB for Linux Kernel Image */
+			reg = <0x00080000 0x00380000>;
+			label = "NOR Linux Kernel Image";
+		};
+
+		partition@400000 {
+			/* 11MB for JFFS2 based Root file System */
+			reg = <0x00400000 0x00b00000>;
+			label = "NOR JFFS2 Root File System";
+		};
+
+		partition@f00000 {
+			/* This location must not be altered  */
+			/* 512KB for u-boot Bootloader Image */
+			/* 512KB for u-boot Environment Variables */
+			reg = <0x00f00000 0x00100000>;
+			label = "NOR U-Boot Image";
+			read-only;
+		};
+	};
+
+	nand@1,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,p2020-fcm-nand",
+				 "fsl,elbc-fcm-nand";
+		reg = <0x1 0x0 0x40000>;
+
+		partition@0 {
+			/* This location must not be altered  */
+			/* 1MB for u-boot Bootloader Image */
+			reg = <0x0 0x00100000>;
+			label = "NAND U-Boot Image";
+			read-only;
+		};
+
+		partition@100000 {
+			/* 1MB for DTB Image */
+			reg = <0x00100000 0x00100000>;
+			label = "NAND DTB Image";
+		};
+
+		partition@200000 {
+			/* 4MB for Linux Kernel Image */
+			reg = <0x00200000 0x00400000>;
+			label = "NAND Linux Kernel Image";
+		};
+
+		partition@600000 {
+			/* 4MB for Compressed Root file System Image */
+			reg = <0x00600000 0x00400000>;
+			label = "NAND Compressed RFS Image";
+		};
+
+		partition@a00000 {
+			/* 7MB for JFFS2 based Root file System */
+			reg = <0x00a00000 0x00700000>;
+			label = "NAND JFFS2 Root File System";
+		};
+
+		partition@1100000 {
+			/* 15MB for JFFS2 based Root file System */
+			reg = <0x01100000 0x00f00000>;
+			label = "NAND Writable User area";
+		};
+	};
+
+	L2switch@2,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "vitesse-7385";
+		reg = <0x2 0x0 0x20000>;
+	};
+
+	cpld@3,0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cpld";
+		reg = <0x3 0x0 0x20000>;
+		read-only;
+	};
+};
+
+&soc {
+	i2c@3000 {
+		rtc@68 {
+			compatible = "pericom,pt7c4338";
+			reg = <0x68>;
+		};
+	};
+
+	spi@7000 {
+		flash@0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "spansion,m25p80", "jedec,spi-nor";
+			reg = <0>;
+			spi-max-frequency = <40000000>;
+
+			partition@0 {
+				/* 512KB for u-boot Bootloader Image */
+				reg = <0x0 0x00080000>;
+				label = "SPI U-Boot Image";
+				read-only;
+			};
+
+			partition@80000 {
+				/* 512KB for DTB Image */
+				reg = <0x00080000 0x00080000>;
+				label = "SPI DTB Image";
+			};
+
+			partition@100000 {
+				/* 4MB for Linux Kernel Image */
+				reg = <0x00100000 0x00400000>;
+				label = "SPI Linux Kernel Image";
+			};
+
+			partition@500000 {
+				/* 4MB for Compressed RFS Image */
+				reg = <0x00500000 0x00400000>;
+				label = "SPI Compressed RFS Image";
+			};
+
+			partition@900000 {
+				/* 7MB for JFFS2 based RFS */
+				reg = <0x00900000 0x00700000>;
+				label = "SPI JFFS2 RFS";
+			};
+		};
+	};
+
+	usb@22000 {
+		phy_type = "ulpi";
+	};
+
+	mdio@24520 {
+		phy0: ethernet-phy@0 {
+			interrupts = <3 1 0 0>;
+			reg = <0x0>;
+			};
+		phy1: ethernet-phy@1 {
+			interrupts = <2 1 0 0>;
+			reg = <0x1>;
+			};
+	};
+
+	mdio@25520 {
+		tbi0: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	mdio@26520 {
+		status = "disabled";
+	};
+
+	ptp_clock@24e00 {
+		fsl,tclk-period	= <5>;
+		fsl,tmr-prsc	= <2>;
+		fsl,tmr-add	= <0xaaaaaaab>;
+		fsl,tmr-fiper1	= <999999995>;
+		fsl,tmr-fiper2	= <99990>;
+		fsl,max-adj	= <299999999>;
+	};
+
+	enet0: ethernet@24000 {
+		fixed-link = <1 1 1000 0 0>;
+		phy-connection-type = "rgmii-id";
+	};
+
+	enet1: ethernet@25000 {
+		tbi-handle = <&tbi0>;
+		phy-handle = <&phy0>;
+		phy-connection-type = "sgmii";
+	};
+
+	enet2: ethernet@26000 {
+		phy-handle = <&phy1>;
+		phy-connection-type = "rgmii-id";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p2020rdb-pc_32b.dts b/arch/powerpc/boot/dts/fsl/p2020rdb-pc_32b.dts
new file mode 100644
index 0000000000..d3295c204b
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p2020rdb-pc_32b.dts
@@ -0,0 +1,96 @@
+/*
+ * P2020 RDB-PC 32Bit Physical Address Map Device Tree Source
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p2020si-pre.dtsi"
+
+/ {
+	model = "fsl,P2020RDB";
+	compatible = "fsl,P2020RDB-PC";
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@ffe05000 {
+		reg = <0 0xffe05000 0 0x1000>;
+
+		/* NOR and NAND Flashes */
+		ranges = <0x0 0x0 0x0 0xef000000 0x01000000
+			  0x1 0x0 0x0 0xff800000 0x00040000
+			  0x2 0x0 0x0 0xffb00000 0x00020000
+			  0x3 0x0 0x0 0xffa00000 0x00020000>;
+	};
+
+	soc: soc@ffe00000 {
+		ranges = <0x0 0x0 0xffe00000 0x100000>;
+	};
+
+	pci2: pcie@ffe08000 {
+		reg = <0 0xffe08000 0 0x1000>;
+		status = "disabled";
+	};
+
+	pci1: pcie@ffe09000 {
+		reg = <0 0xffe09000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci0: pcie@ffe0a000 {
+		reg = <0 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "p2020rdb-pc.dtsi"
+/include/ "p2020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p2020rdb-pc_36b.dts b/arch/powerpc/boot/dts/fsl/p2020rdb-pc_36b.dts
new file mode 100644
index 0000000000..9307a8f41d
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p2020rdb-pc_36b.dts
@@ -0,0 +1,96 @@
+/*
+ * P2020 RDB-PC 36Bit Physical Address Map Device Tree Source
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p2020si-pre.dtsi"
+
+/ {
+	model = "fsl,P2020RDB";
+	compatible = "fsl,P2020RDB-PC";
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@fffe05000 {
+		reg = <0xf 0xffe05000 0 0x1000>;
+
+		/* NOR and NAND Flashes */
+		ranges = <0x0 0x0 0xf 0xef000000 0x01000000
+			  0x1 0x0 0xf 0xff800000 0x00040000
+			  0x2 0x0 0xf 0xffb00000 0x00020000
+			  0x3 0x0 0xf 0xffa00000 0x00020000>;
+	};
+
+	soc: soc@fffe00000 {
+		ranges = <0x0 0xf 0xffe00000 0x100000>;
+	};
+
+	pci2: pcie@fffe08000 {
+		reg = <0xf 0xffe08000 0 0x1000>;
+		status = "disabled";
+	};
+
+	pci1: pcie@fffe09000 {
+		reg = <0xf 0xffe09000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci0: pcie@fffe0a000 {
+		reg = <0xf 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xe0000000
+				  0x2000000 0x0 0xe0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "p2020rdb-pc.dtsi"
+/include/ "p2020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p2020rdb.dts b/arch/powerpc/boot/dts/fsl/p2020rdb.dts
new file mode 100644
index 0000000000..3acd3890b3
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p2020rdb.dts
@@ -0,0 +1,287 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * P2020 RDB Device Tree Source
+ *
+ * Copyright 2009-2012 Freescale Semiconductor Inc.
+ */
+
+/include/ "p2020si-pre.dtsi"
+
+/ {
+	model = "fsl,P2020RDB";
+	compatible = "fsl,P2020RDB";
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+		pci1 = &pci1;
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	lbc: localbus@ffe05000 {
+		reg = <0 0xffe05000 0 0x1000>;
+
+		/* NOR and NAND Flashes */
+		ranges = <0x0 0x0 0x0 0xef000000 0x01000000
+			  0x1 0x0 0x0 0xffa00000 0x00040000
+			  0x2 0x0 0x0 0xffb00000 0x00020000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x1000000>;
+			bank-width = <2>;
+			device-width = <1>;
+
+			partition@0 {
+				/* This location must not be altered  */
+				/* 256KB for Vitesse 7385 Switch firmware */
+				reg = <0x0 0x00040000>;
+				label = "NOR (RO) Vitesse-7385 Firmware";
+				read-only;
+			};
+
+			partition@40000 {
+				/* 256KB for DTB Image */
+				reg = <0x00040000 0x00040000>;
+				label = "NOR (RO) DTB Image";
+				read-only;
+			};
+
+			partition@80000 {
+				/* 3.5 MB for Linux Kernel Image */
+				reg = <0x00080000 0x00380000>;
+				label = "NOR (RO) Linux Kernel Image";
+				read-only;
+			};
+
+			partition@400000 {
+				/* 11MB for JFFS2 based Root file System */
+				reg = <0x00400000 0x00b00000>;
+				label = "NOR (RW) JFFS2 Root File System";
+			};
+
+			partition@f00000 {
+				/* This location must not be altered  */
+				/* 512KB for u-boot Bootloader Image */
+				/* 512KB for u-boot Environment Variables */
+				reg = <0x00f00000 0x00100000>;
+				label = "NOR (RO) U-Boot Image";
+				read-only;
+			};
+		};
+
+		nand@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,p2020-fcm-nand",
+				     "fsl,elbc-fcm-nand";
+			reg = <0x1 0x0 0x40000>;
+
+			partition@0 {
+				/* This location must not be altered  */
+				/* 1MB for u-boot Bootloader Image */
+				reg = <0x0 0x00100000>;
+				label = "NAND (RO) U-Boot Image";
+				read-only;
+			};
+
+			partition@100000 {
+				/* 1MB for DTB Image */
+				reg = <0x00100000 0x00100000>;
+				label = "NAND (RO) DTB Image";
+				read-only;
+			};
+
+			partition@200000 {
+				/* 4MB for Linux Kernel Image */
+				reg = <0x00200000 0x00400000>;
+				label = "NAND (RO) Linux Kernel Image";
+				read-only;
+			};
+
+			partition@600000 {
+				/* 4MB for Compressed Root file System Image */
+				reg = <0x00600000 0x00400000>;
+				label = "NAND (RO) Compressed RFS Image";
+				read-only;
+			};
+
+			partition@a00000 {
+				/* 7MB for JFFS2 based Root file System */
+				reg = <0x00a00000 0x00700000>;
+				label = "NAND (RW) JFFS2 Root File System";
+			};
+
+			partition@1100000 {
+				/* 15MB for JFFS2 based Root file System */
+				reg = <0x01100000 0x00f00000>;
+				label = "NAND (RW) Writable User area";
+			};
+		};
+
+		L2switch@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "vitesse-7385";
+			reg = <0x2 0x0 0x20000>;
+		};
+
+	};
+
+	soc: soc@ffe00000 {
+		ranges = <0x0 0x0 0xffe00000 0x100000>;
+
+		i2c@3000 {
+			rtc@68 {
+				compatible = "dallas,ds1339";
+				reg = <0x68>;
+			};
+		};
+
+		spi@7000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "spansion,s25sl12801", "jedec,spi-nor";
+				reg = <0>;
+				spi-max-frequency = <40000000>;
+
+				partition@0 {
+					/* 512KB for u-boot Bootloader Image */
+					reg = <0x0 0x00080000>;
+					label = "SPI (RO) U-Boot Image";
+					read-only;
+				};
+
+				partition@80000 {
+					/* 512KB for DTB Image */
+					reg = <0x00080000 0x00080000>;
+					label = "SPI (RO) DTB Image";
+					read-only;
+				};
+
+				partition@100000 {
+					/* 4MB for Linux Kernel Image */
+					reg = <0x00100000 0x00400000>;
+					label = "SPI (RO) Linux Kernel Image";
+					read-only;
+				};
+
+				partition@500000 {
+					/* 4MB for Compressed RFS Image */
+					reg = <0x00500000 0x00400000>;
+					label = "SPI (RO) Compressed RFS Image";
+					read-only;
+				};
+
+				partition@900000 {
+					/* 7MB for JFFS2 based RFS */
+					reg = <0x00900000 0x00700000>;
+					label = "SPI (RW) JFFS2 RFS";
+				};
+			};
+		};
+
+		usb@22000 {
+			phy_type = "ulpi";
+			dr_mode = "host";
+		};
+
+		mdio@24520 {
+			phy0: ethernet-phy@0 {
+				interrupts = <3 1 0 0>;
+				reg = <0x0>;
+			};
+			phy1: ethernet-phy@1 {
+				interrupts = <3 1 0 0>;
+				reg = <0x1>;
+			};
+			tbi-phy@2 {
+				device_type = "tbi-phy";
+				reg = <0x2>;
+			};
+		};
+
+		mdio@25520 {
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@26520 {
+			status = "disabled";
+		};
+
+		ptp_clock@24e00 {
+			fsl,tclk-period = <5>;
+			fsl,tmr-prsc = <200>;
+			fsl,tmr-add = <0xCCCCCCCD>;
+			fsl,tmr-fiper1 = <0x3B9AC9FB>;
+			fsl,tmr-fiper2 = <0x0001869B>;
+			fsl,max-adj = <249999999>;
+		};
+
+		enet0: ethernet@24000 {
+			fixed-link = <1 1 1000 0 0>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		enet1: ethernet@25000 {
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy0>;
+			phy-connection-type = "sgmii";
+		};
+
+		enet2: ethernet@26000 {
+			phy-handle = <&phy1>;
+			phy-connection-type = "rgmii-id";
+		};
+	};
+
+	pci0: pcie@ffe08000 {
+		reg = <0 0xffe08000 0 0x1000>;
+		status = "disabled";
+	};
+
+	pci1: pcie@ffe09000 {
+		reg = <0 0xffe09000 0 0x1000>;
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci2: pcie@ffe0a000 {
+		reg = <0 0xffe0a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
+
+/include/ "p2020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi
new file mode 100644
index 0000000000..81b9ab2119
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi
@@ -0,0 +1,209 @@
+/*
+ * P2020/P2010 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,p2020-elbc", "fsl,elbc", "simple-bus";
+	interrupts = <19 2 0 0>;
+};
+
+/* controller at 0xa000 */
+&pci0 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <26 2 0 0>;
+	law_trgt_if = <2>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <26 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1 0x0 0x0
+			>;
+	};
+};
+
+/* controller at 0x9000 */
+&pci1 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <25 2 0 0>;
+	law_trgt_if = <1>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <25 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0
+			>;
+	};
+};
+
+/* controller at 0x8000 */
+&pci2 {
+	compatible = "fsl,mpc8548-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	clock-frequency = <33333333>;
+	interrupts = <24 2 0 0>;
+	law_trgt_if = <0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <24 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x8 0x1 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x9 0x1 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0xa 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0xb 0x1 0x0 0x0
+			>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "fsl,p2020-immr", "simple-bus";
+	bus-frequency = <0>;		// Filled out by uboot.
+
+	ecm-law@0 {
+		compatible = "fsl,ecm-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <12>;
+	};
+
+	ecm@1000 {
+		compatible = "fsl,p2020-ecm", "fsl,ecm";
+		reg = <0x1000 0x1000>;
+		interrupts = <17 2 0 0>;
+	};
+
+	memory-controller@2000 {
+		compatible = "fsl,p2020-memory-controller";
+		reg = <0x2000 0x1000>;
+		interrupts = <18 2 0 0>;
+	};
+
+/include/ "pq3-i2c-0.dtsi"
+/include/ "pq3-i2c-1.dtsi"
+/include/ "pq3-duart-0.dtsi"
+/include/ "pq3-espi-0.dtsi"
+	spi0: spi@7000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "pq3-dma-1.dtsi"
+/include/ "pq3-gpio-0.dtsi"
+
+	L2: l2-cache-controller@20000 {
+		compatible = "fsl,p2020-l2-cache-controller";
+		reg = <0x20000 0x1000>;
+		cache-line-size = <32>;	// 32 bytes
+		cache-size = <0x80000>; // L2,512K
+		interrupts = <16 2 0 0>;
+	};
+
+/include/ "pq3-dma-0.dtsi"
+/include/ "pq3-usb2-dr-0.dtsi"
+	usb@22000 {
+		compatible = "fsl-usb2-dr-v1.6", "fsl-usb2-dr";
+	};
+/include/ "pq3-etsec1-0.dtsi"
+/include/ "pq3-etsec1-timer-0.dtsi"
+
+	ptp_clock@24e00 {
+		interrupts = <68 2 0 0 69 2 0 0 70 2 0 0>;
+	};
+
+
+/include/ "pq3-etsec1-1.dtsi"
+/include/ "pq3-etsec1-2.dtsi"
+/include/ "pq3-esdhc-0.dtsi"
+	sdhc@2e000 {
+		compatible = "fsl,p2020-esdhc", "fsl,esdhc";
+	};
+
+/include/ "pq3-sec3.1-0.dtsi"
+/include/ "pq3-mpic.dtsi"
+/include/ "pq3-mpic-timer-B.dtsi"
+
+	global-utilities@e0000 {
+		compatible = "fsl,p2020-guts";
+		reg = <0xe0000 0x1000>;
+		fsl,has-rstcr;
+	};
+
+	pmc: power@e0070 {
+		compatible = "fsl,mpc8548-pmc";
+		reg = <0xe0070 0x20>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p2020si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p2020si-pre.dtsi
new file mode 100644
index 0000000000..42bf3c6d25
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p2020si-pre.dtsi
@@ -0,0 +1,72 @@
+/*
+ * P2020/P2010 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500v2_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,P2020";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial1;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,P2020@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+
+		PowerPC,P2020@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			next-level-cache = <&L2>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p2041rdb.dts b/arch/powerpc/boot/dts/fsl/p2041rdb.dts
new file mode 100644
index 0000000000..950816b9d6
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p2041rdb.dts
@@ -0,0 +1,340 @@
+/*
+ * P2041RDB Device Tree Source
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p2041si-pre.dtsi"
+
+/ {
+	model = "fsl,P2041RDB";
+	compatible = "fsl,P2041RDB";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		phy_rgmii_0 = &phy_rgmii_0;
+		phy_rgmii_1 = &phy_rgmii_1;
+		phy_sgmii_2 = &phy_sgmii_2;
+		phy_sgmii_3 = &phy_sgmii_3;
+		phy_sgmii_4 = &phy_sgmii_4;
+		phy_sgmii_1c = &phy_sgmii_1c;
+		phy_sgmii_1d = &phy_sgmii_1d;
+		phy_sgmii_1e = &phy_sgmii_1e;
+		phy_sgmii_1f = &phy_sgmii_1f;
+		phy_xgmii_2 = &phy_xgmii_2;
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
+	qportals: qman-portals@ff4200000 {
+		ranges = <0x0 0xf 0xf4200000 0x200000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "spansion,s25sl12801", "jedec,spi-nor";
+				reg = <0>;
+				spi-max-frequency = <40000000>; /* input clock */
+				partition@u-boot {
+					label = "u-boot";
+					reg = <0x00000000 0x00100000>;
+					read-only;
+				};
+				partition@kernel {
+					label = "kernel";
+					reg = <0x00100000 0x00500000>;
+					read-only;
+				};
+				partition@dtb {
+					label = "dtb";
+					reg = <0x00600000 0x00100000>;
+					read-only;
+				};
+				partition@fs {
+					label = "file system";
+					reg = <0x00700000 0x00900000>;
+				};
+			};
+		};
+
+		i2c@118000 {
+			lm75b@48 {
+				compatible = "nxp,lm75a";
+				reg = <0x48>;
+			};
+			eeprom@50 {
+				compatible = "atmel,24c256";
+				reg = <0x50>;
+			};
+			rtc@68 {
+				compatible = "pericom,pt7c4338";
+				reg = <0x68>;
+			};
+			adt7461@4c {
+				compatible = "adi,adt7461";
+				reg = <0x4c>;
+			};
+		};
+
+		i2c@118100 {
+			eeprom@50 {
+				compatible = "atmel,24c256";
+				reg = <0x50>;
+			};
+		};
+
+		usb1: usb@211000 {
+			dr_mode = "host";
+		};
+
+		fman@400000 {
+			ethernet@e0000 {
+				phy-handle = <&phy_sgmii_2>;
+				phy-connection-type = "sgmii";
+			};
+
+			mdio@e1120 {
+				phy_rgmii_0: ethernet-phy@0 {
+					reg = <0x0>;
+				};
+
+				phy_rgmii_1: ethernet-phy@1 {
+					reg = <0x1>;
+				};
+
+				phy_sgmii_2: ethernet-phy@2 {
+					reg = <0x2>;
+				};
+
+				phy_sgmii_3: ethernet-phy@3 {
+					reg = <0x3>;
+				};
+
+				phy_sgmii_4: ethernet-phy@4 {
+					reg = <0x4>;
+				};
+
+				phy_sgmii_1c: ethernet-phy@1c {
+					reg = <0x1c>;
+				};
+
+				phy_sgmii_1d: ethernet-phy@1d {
+					reg = <0x1d>;
+				};
+
+				phy_sgmii_1e: ethernet-phy@1e {
+					reg = <0x1e>;
+				};
+
+				phy_sgmii_1f: ethernet-phy@1f {
+					reg = <0x1f>;
+				};
+			};
+
+			ethernet@e2000 {
+				phy-handle = <&phy_sgmii_3>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e4000 {
+				phy-handle = <&phy_sgmii_4>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e6000 {
+				phy-handle = <&phy_rgmii_1>;
+				phy-connection-type = "rgmii";
+			};
+
+			ethernet@e8000 {
+				phy-handle = <&phy_rgmii_0>;
+				phy-connection-type = "rgmii";
+			};
+
+			ethernet@f0000 {
+				phy-handle = <&phy_xgmii_2>;
+				phy-connection-type = "xgmii";
+			};
+
+			mdio@f1000 {
+				phy_xgmii_2: ethernet-phy@0 {
+					compatible = "ethernet-phy-ieee802.3-c45";
+					reg = <0x0>;
+				};
+			};
+		};
+	};
+
+	rio: rapidio@ffe0c0000 {
+		reg = <0xf 0xfe0c0000 0 0x11000>;
+
+		port1 {
+			ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+		};
+		port2 {
+			ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+		};
+	};
+
+	lbc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x1000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  1 0 0xf 0xffa00000 0x00040000>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x08000000>;
+			bank-width = <2>;
+			device-width = <2>;
+		};
+
+		nand@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,elbc-fcm-nand";
+			reg = <0x1 0x0 0x40000>;
+
+			partition@0 {
+				label = "NAND U-Boot Image";
+				reg = <0x0 0x02000000>;
+				read-only;
+			};
+
+			partition@2000000 {
+				label = "NAND Root File System";
+				reg = <0x02000000 0x10000000>;
+			};
+
+			partition@12000000 {
+				label = "NAND Compressed RFS Image";
+				reg = <0x12000000 0x08000000>;
+			};
+
+			partition@1a000000 {
+				label = "NAND Linux Kernel Image";
+				reg = <0x1a000000 0x04000000>;
+			};
+
+			partition@1e000000 {
+				label = "NAND DTB Image";
+				reg = <0x1e000000 0x01000000>;
+			};
+
+			partition@1f000000 {
+				label = "NAND Writable User area";
+				reg = <0x1f000000 0x01000000>;
+			};
+		};
+	};
+
+	pci0: pcie@ffe200000 {
+		reg = <0xf 0xfe200000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe201000 {
+		reg = <0xf 0xfe201000 0 0x1000>;
+		ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+			  0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe202000 {
+		reg = <0xf 0xfe202000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+};
+
+/include/ "p2041si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
new file mode 100644
index 0000000000..ddc018d422
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
@@ -0,0 +1,458 @@
+/*
+ * P2041/P2040 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_fqd {
+	compatible = "fsl,qman-fqd";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_pfdr {
+	compatible = "fsl,qman-pfdr";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
+&lbc {
+	compatible = "fsl,p2041-elbc", "fsl,elbc", "simple-bus";
+	interrupts = <25 2 0 0>;
+	#address-cells = <2>;
+	#size-cells = <1>;
+};
+
+/* controller at 0x200000 */
+&pci0 {
+	compatible = "fsl,p2041-pcie", "fsl,qoriq-pcie-v2.2";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 1 15>;
+	fsl,iommu-parent = <&pamu0>;
+	fsl,liodn-reg = <&guts 0x500>; /* PEX1LIODNR */
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 1 15>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 40 1 0 0
+			0000 0 0 2 &mpic 1 1 0 0
+			0000 0 0 3 &mpic 2 1 0 0
+			0000 0 0 4 &mpic 3 1 0 0
+			>;
+	};
+};
+
+/* controller at 0x201000 */
+&pci1 {
+	compatible = "fsl,p2041-pcie", "fsl,qoriq-pcie-v2.2";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 1 14>;
+	fsl,iommu-parent = <&pamu0>;
+	fsl,liodn-reg = <&guts 0x504>; /* PEX2LIODNR */
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 1 14>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 41 1 0 0
+			0000 0 0 2 &mpic 5 1 0 0
+			0000 0 0 3 &mpic 6 1 0 0
+			0000 0 0 4 &mpic 7 1 0 0
+			>;
+	};
+};
+
+/* controller at 0x202000 */
+&pci2 {
+	compatible = "fsl,p2041-pcie", "fsl,qoriq-pcie-v2.2";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 1 13>;
+	fsl,iommu-parent = <&pamu0>;
+	fsl,liodn-reg = <&guts 0x508>; /* PEX3LIODNR */
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 1 13>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 42 1 0 0
+			0000 0 0 2 &mpic 9 1 0 0
+			0000 0 0 3 &mpic 10 1 0 0
+			0000 0 0 4 &mpic 11 1 0 0
+			>;
+	};
+};
+
+&rio {
+	compatible = "fsl,srio";
+	interrupts = <16 2 1 11>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+	fsl,iommu-parent = <&pamu0>;
+	ranges;
+
+	port1 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <1>;
+		fsl,liodn-reg = <&guts 0x510>; /* RIO1LIODNR */
+	};
+
+	port2 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <2>;
+		fsl,liodn-reg = <&guts 0x514>; /* RIO2LIODNR */
+	};
+};
+
+&dcsr {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,dcsr", "simple-bus";
+
+	dcsr-epu@0 {
+		compatible = "fsl,p2041-dcsr-epu", "fsl,dcsr-epu";
+		interrupts = <52 2 0 0
+			      84 2 0 0
+			      85 2 0 0>;
+		reg = <0x0 0x1000>;
+	};
+	dcsr-npc {
+		compatible = "fsl,dcsr-npc";
+		reg = <0x1000 0x1000 0x1000000 0x8000>;
+	};
+	dcsr-nxc@2000 {
+		compatible = "fsl,dcsr-nxc";
+		reg = <0x2000 0x1000>;
+	};
+	dcsr-corenet {
+		compatible = "fsl,dcsr-corenet";
+		reg = <0x8000 0x1000 0xB0000 0x1000>;
+	};
+	dcsr-dpaa@9000 {
+		compatible = "fsl,p2041-dcsr-dpaa", "fsl,dcsr-dpaa";
+		reg = <0x9000 0x1000>;
+	};
+	dcsr-ocn@11000 {
+		compatible = "fsl,p2041-dcsr-ocn", "fsl,dcsr-ocn";
+		reg = <0x11000 0x1000>;
+	};
+	dcsr-ddr@12000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr1>;
+		reg = <0x12000 0x1000>;
+	};
+	dcsr-nal@18000 {
+		compatible = "fsl,p2041-dcsr-nal", "fsl,dcsr-nal";
+		reg = <0x18000 0x1000>;
+	};
+	dcsr-rcpm@22000 {
+		compatible = "fsl,p2041-dcsr-rcpm", "fsl,dcsr-rcpm";
+		reg = <0x22000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@40000 {
+		compatible = "fsl,dcsr-e500mc-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu0>;
+		reg = <0x40000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@41000 {
+		compatible = "fsl,dcsr-e500mc-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu1>;
+		reg = <0x41000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@42000 {
+		compatible = "fsl,dcsr-e500mc-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu2>;
+		reg = <0x42000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@43000 {
+		compatible = "fsl,dcsr-e500mc-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu3>;
+		reg = <0x43000 0x1000>;
+	};
+};
+
+/include/ "qoriq-bman1-portals.dtsi"
+
+/include/ "qoriq-qman1-portals.dtsi"
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "simple-bus";
+
+	soc-sram-error {
+		compatible = "fsl,soc-sram-error";
+		interrupts = <16 2 1 29>;
+	};
+
+	corenet-law@0 {
+		compatible = "fsl,corenet-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <32>;
+	};
+
+	ddr1: memory-controller@8000 {
+		compatible = "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-controller";
+		reg = <0x8000 0x1000>;
+		interrupts = <16 2 1 23>;
+	};
+
+	cpc: l3-cache-controller@10000 {
+		compatible = "fsl,p2041-l3-cache-controller", "fsl,p4080-l3-cache-controller", "cache";
+		reg = <0x10000 0x1000>;
+		interrupts = <16 2 1 27>;
+	};
+
+	corenet-cf@18000 {
+		compatible = "fsl,corenet1-cf", "fsl,corenet-cf";
+		reg = <0x18000 0x1000>;
+		interrupts = <16 2 1 31>;
+		fsl,ccf-num-csdids = <32>;
+		fsl,ccf-num-snoopids = <32>;
+	};
+
+	iommu@20000 {
+		compatible = "fsl,pamu-v1.0", "fsl,pamu";
+		reg = <0x20000 0x4000>; /* for compatibility with older PAMU drivers */
+		ranges = <0 0x20000 0x4000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupts = <
+			24 2 0 0
+			16 2 1 30>;
+		fsl,portid-mapping = <0x0f000000>;
+
+		pamu0: pamu@0 {
+			reg = <0 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu1: pamu@1000 {
+			reg = <0x1000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu2: pamu@2000 {
+			reg = <0x2000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu3: pamu@3000 {
+			reg = <0x3000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+	};
+
+/include/ "qoriq-mpic.dtsi"
+
+	guts: global-utilities@e0000 {
+		compatible = "fsl,qoriq-device-config-1.0";
+		reg = <0xe0000 0xe00>;
+		fsl,has-rstcr;
+		#sleep-cells = <1>;
+		fsl,liodn-bits = <12>;
+	};
+
+	pins: global-utilities@e0e00 {
+		compatible = "fsl,qoriq-pin-control-1.0";
+		reg = <0xe0e00 0x200>;
+		#sleep-cells = <2>;
+	};
+
+/include/ "qoriq-clockgen1.dtsi"
+	global-utilities@e1000 {
+		compatible = "fsl,p2041-clockgen", "fsl,qoriq-clockgen-1.0";
+	};
+
+	rcpm: global-utilities@e2000 {
+		compatible = "fsl,qoriq-rcpm-1.0";
+		reg = <0xe2000 0x1000>;
+		#sleep-cells = <1>;
+	};
+
+	sfp: sfp@e8000 {
+		compatible = "fsl,p2041-sfp", "fsl,qoriq-sfp-1.0";
+		reg	   = <0xe8000 0x1000>;
+	};
+
+	serdes: serdes@ea000 {
+		compatible = "fsl,p2041-serdes";
+		reg	   = <0xea000 0x1000>;
+	};
+
+/include/ "qoriq-dma-0.dtsi"
+	dma@100300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x580>; /* DMA1LIODNR */
+	};
+
+/include/ "qoriq-dma-1.dtsi"
+	dma@101300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */
+	};
+
+/include/ "qoriq-espi-0.dtsi"
+	spi@110000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "qoriq-esdhc-0.dtsi"
+	sdhc@114000 {
+		compatible = "fsl,p2041-esdhc", "fsl,esdhc";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */
+		sdhci,auto-cmd12;
+	};
+
+/include/ "qoriq-i2c-0.dtsi"
+	i2c@118000 {
+		fsl,i2c-erratum-a004447;
+	};
+
+	i2c@118100 {
+		fsl,i2c-erratum-a004447;
+	};
+
+/include/ "qoriq-i2c-1.dtsi"
+	i2c@119000 {
+		fsl,i2c-erratum-a004447;
+	};
+
+	i2c@119100 {
+		fsl,i2c-erratum-a004447;
+	};
+
+/include/ "qoriq-duart-0.dtsi"
+/include/ "qoriq-duart-1.dtsi"
+/include/ "qoriq-gpio-0.dtsi"
+/include/ "qoriq-usb2-mph-0.dtsi"
+	usb0: usb@210000 {
+		compatible = "fsl-usb2-mph-v1.6", "fsl,mpc85xx-usb2-mph", "fsl-usb2-mph";
+		phy_type = "utmi";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */
+		port0;
+	};
+
+/include/ "qoriq-usb2-dr-0.dtsi"
+	usb1: usb@211000 {
+		compatible = "fsl-usb2-dr-v1.6", "fsl,mpc85xx-usb2-dr", "fsl-usb2-dr";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x524>; /* USB2LIODNR */
+		dr_mode = "host";
+		phy_type = "utmi";
+	};
+
+/include/ "qoriq-sata2-0.dtsi"
+	sata@220000 {
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x550>; /* SATA1LIODNR */
+	};
+
+/include/ "qoriq-sata2-1.dtsi"
+	sata@221000 {
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x554>; /* SATA2LIODNR */
+	};
+
+/include/ "qoriq-sec4.2-0.dtsi"
+crypto: crypto@300000 {
+		fsl,iommu-parent = <&pamu1>;
+	};
+
+/include/ "qoriq-qman1.dtsi"
+/include/ "qoriq-bman1.dtsi"
+
+/include/ "qoriq-fman-0.dtsi"
+/include/ "qoriq-fman-0-1g-0.dtsi"
+/include/ "qoriq-fman-0-1g-1.dtsi"
+/include/ "qoriq-fman-0-1g-2.dtsi"
+/include/ "qoriq-fman-0-1g-3.dtsi"
+/include/ "qoriq-fman-0-1g-4.dtsi"
+/include/ "qoriq-fman-0-10g-0.dtsi"
+	fman@400000 {
+		enet0: ethernet@e0000 {
+		};
+
+		enet1: ethernet@e2000 {
+		};
+
+		enet2: ethernet@e4000 {
+		};
+
+		enet3: ethernet@e6000 {
+		};
+
+		enet4: ethernet@e8000 {
+		};
+
+		enet5: ethernet@f0000 {
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p2041si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-pre.dtsi
new file mode 100644
index 0000000000..6318962e8d
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p2041si-pre.dtsi
@@ -0,0 +1,130 @@
+/*
+ * P2041 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500mc_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,P2041";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		ccsr = &soc;
+		dcsr = &dcsr;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+		usb0 = &usb0;
+		usb1 = &usb1;
+		dma0 = &dma0;
+		dma1 = &dma1;
+		sdhc = &sdhc;
+		msi0 = &msi0;
+		msi1 = &msi1;
+		msi2 = &msi2;
+
+		crypto = &crypto;
+		sec_jr0 = &sec_jr0;
+		sec_jr1 = &sec_jr1;
+		sec_jr2 = &sec_jr2;
+		sec_jr3 = &sec_jr3;
+		rtic_a = &rtic_a;
+		rtic_b = &rtic_b;
+		rtic_c = &rtic_c;
+		rtic_d = &rtic_d;
+		sec_mon = &sec_mon;
+
+		fman0 = &fman0;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+		ethernet4 = &enet4;
+		ethernet5 = &enet5;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,e500mc@0 {
+			device_type = "cpu";
+			reg = <0>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_0>;
+			fsl,portid-mapping = <0x80000000>;
+			L2_0: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu1: PowerPC,e500mc@1 {
+			device_type = "cpu";
+			reg = <1>;
+			clocks = <&clockgen 1 1>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x40000000>;
+			L2_1: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu2: PowerPC,e500mc@2 {
+			device_type = "cpu";
+			reg = <2>;
+			clocks = <&clockgen 1 2>;
+			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x20000000>;
+			L2_2: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu3: PowerPC,e500mc@3 {
+			device_type = "cpu";
+			reg = <3>;
+			clocks = <&clockgen 1 3>;
+			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x10000000>;
+			L2_3: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p3041ds.dts b/arch/powerpc/boot/dts/fsl/p3041ds.dts
new file mode 100644
index 0000000000..ca0e0272ac
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p3041ds.dts
@@ -0,0 +1,394 @@
+/*
+ * P3041DS Device Tree Source
+ *
+ * Copyright 2010 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p3041si-pre.dtsi"
+
+/ {
+	model = "fsl,P3041DS";
+	compatible = "fsl,P3041DS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		phy_rgmii_0 = &phy_rgmii_0;
+		phy_rgmii_1 = &phy_rgmii_1;
+		phy_sgmii_1c = &phy_sgmii_1c;
+		phy_sgmii_1d = &phy_sgmii_1d;
+		phy_sgmii_1e = &phy_sgmii_1e;
+		phy_sgmii_1f = &phy_sgmii_1f;
+		phy_xgmii_1 = &phy_xgmii_1;
+		phy_xgmii_2 = &phy_xgmii_2;
+		emi1_rgmii = &hydra_mdio_rgmii;
+		emi1_sgmii = &hydra_mdio_sgmii;
+		emi2_xgmii = &hydra_mdio_xgmii;
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
+	qportals: qman-portals@ff4200000 {
+		ranges = <0x0 0xf 0xf4200000 0x200000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "spansion,s25sl12801", "jedec,spi-nor";
+				reg = <0>;
+				spi-max-frequency = <35000000>; /* input clock */
+				partition@u-boot {
+					label = "u-boot";
+					reg = <0x00000000 0x00100000>;
+					read-only;
+				};
+				partition@kernel {
+					label = "kernel";
+					reg = <0x00100000 0x00500000>;
+					read-only;
+				};
+				partition@dtb {
+					label = "dtb";
+					reg = <0x00600000 0x00100000>;
+					read-only;
+				};
+				partition@fs {
+					label = "file system";
+					reg = <0x00700000 0x00900000>;
+				};
+			};
+		};
+
+		i2c@118100 {
+			eeprom@51 {
+				compatible = "atmel,24c256";
+				reg = <0x51>;
+			};
+			eeprom@52 {
+				compatible = "atmel,24c256";
+				reg = <0x52>;
+			};
+		};
+
+		i2c@119100 {
+			rtc@68 {
+				compatible = "dallas,ds3232";
+				reg = <0x68>;
+				interrupts = <0x1 0x1 0 0>;
+			};
+			ina220@40 {
+				compatible = "ti,ina220";
+				reg = <0x40>;
+				shunt-resistor = <1000>;
+			};
+			ina220@41 {
+				compatible = "ti,ina220";
+				reg = <0x41>;
+				shunt-resistor = <1000>;
+			};
+			ina220@44 {
+				compatible = "ti,ina220";
+				reg = <0x44>;
+				shunt-resistor = <1000>;
+			};
+			ina220@45 {
+				compatible = "ti,ina220";
+				reg = <0x45>;
+				shunt-resistor = <1000>;
+			};
+			adt7461@4c {
+				compatible = "adi,adt7461";
+				reg = <0x4c>;
+			};
+		};
+
+		fman@400000 {
+			ethernet@e0000 {
+				phy-handle = <&phy_sgmii_1c>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e2000 {
+				phy-handle = <&phy_sgmii_1d>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e4000 {
+				phy-handle = <&phy_sgmii_1e>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e6000 {
+				phy-handle = <&phy_sgmii_1f>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e8000 {
+				phy-handle = <&phy_rgmii_1>;
+				phy-connection-type = "rgmii";
+			};
+
+			ethernet@f0000 {
+				phy-handle = <&phy_xgmii_1>;
+				phy-connection-type = "xgmii";
+			};
+
+			hydra_mdio_xgmii: mdio@f1000 {
+				status = "disabled";
+
+				phy_xgmii_1: ethernet-phy@4 {
+					compatible = "ethernet-phy-ieee802.3-c45";
+					reg = <0x4>;
+				};
+
+				phy_xgmii_2: ethernet-phy@0 {
+					compatible = "ethernet-phy-ieee802.3-c45";
+					reg = <0x0>;
+				};
+			};
+		};
+	};
+
+	rio: rapidio@ffe0c0000 {
+		reg = <0xf 0xfe0c0000 0 0x11000>;
+
+		port1 {
+			ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+		};
+		port2 {
+			ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+		};
+	};
+
+	lbc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x1000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xffa00000 0x00040000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x08000000>;
+			bank-width = <2>;
+			device-width = <2>;
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,elbc-fcm-nand";
+			reg = <0x2 0x0 0x40000>;
+
+			partition@0 {
+				label = "NAND U-Boot Image";
+				reg = <0x0 0x02000000>;
+				read-only;
+			};
+
+			partition@2000000 {
+				label = "NAND Root File System";
+				reg = <0x02000000 0x10000000>;
+			};
+
+			partition@12000000 {
+				label = "NAND Compressed RFS Image";
+				reg = <0x12000000 0x08000000>;
+			};
+
+			partition@1a000000 {
+				label = "NAND Linux Kernel Image";
+				reg = <0x1a000000 0x04000000>;
+			};
+
+			partition@1e000000 {
+				label = "NAND DTB Image";
+				reg = <0x1e000000 0x01000000>;
+			};
+
+			partition@1f000000 {
+				label = "NAND Writable User area";
+				reg = <0x1f000000 0x21000000>;
+			};
+		};
+
+		board-control@3,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,p3041ds-fpga", "fsl,fpga-ngpixis";
+			reg = <3 0 0x30>;
+			ranges = <0 3 0 0x30>;
+
+			mdio-mux-emi1 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "mdio-mux-mmioreg", "mdio-mux";
+				mdio-parent-bus = <&mdio0>;
+				reg = <9 1>;
+				mux-mask = <0x78>;
+
+				hydra_mdio_rgmii: rgmii-mdio@8 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <8>;
+					status = "disabled";
+
+					phy_rgmii_0: ethernet-phy@0 {
+						reg = <0x0>;
+					};
+
+					phy_rgmii_1: ethernet-phy@1 {
+						reg = <0x1>;
+					};
+				};
+
+				hydra_mdio_sgmii: sgmii-mdio@28 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x28>;
+					status = "disabled";
+
+					phy_sgmii_1c: ethernet-phy@1c {
+						reg = <0x1c>;
+					};
+
+					phy_sgmii_1d: ethernet-phy@1d {
+						reg = <0x1d>;
+					};
+
+					phy_sgmii_1e: ethernet-phy@1e {
+						reg = <0x1e>;
+					};
+
+					phy_sgmii_1f: ethernet-phy@1f {
+						reg = <0x1f>;
+					};
+				};
+			};
+		};
+	};
+
+	pci0: pcie@ffe200000 {
+		reg = <0xf 0xfe200000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe201000 {
+		reg = <0xf 0xfe201000 0 0x1000>;
+		ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+			  0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe202000 {
+		reg = <0xf 0xfe202000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci3: pcie@ffe203000 {
+		reg = <0xf 0xfe203000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x60000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+};
+
+/include/ "p3041si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi
new file mode 100644
index 0000000000..81bc75aca2
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi
@@ -0,0 +1,469 @@
+/*
+ * P3041 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_fqd {
+	compatible = "fsl,qman-fqd";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_pfdr {
+	compatible = "fsl,qman-pfdr";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
+&lbc {
+	compatible = "fsl,p3041-elbc", "fsl,elbc", "simple-bus";
+	interrupts = <25 2 0 0>;
+	#address-cells = <2>;
+	#size-cells = <1>;
+};
+
+/* controller at 0x200000 */
+&pci0 {
+	compatible = "fsl,p3041-pcie", "fsl,qoriq-pcie-v2.2";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 1 15>;
+	fsl,iommu-parent = <&pamu0>;
+	fsl,liodn-reg = <&guts 0x500>; /* PEX1LIODNR */
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 1 15>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 40 1 0 0
+			0000 0 0 2 &mpic 1 1 0 0
+			0000 0 0 3 &mpic 2 1 0 0
+			0000 0 0 4 &mpic 3 1 0 0
+			>;
+	};
+};
+
+/* controller at 0x201000 */
+&pci1 {
+	compatible = "fsl,p3041-pcie", "fsl,qoriq-pcie-v2.2";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 1 14>;
+	fsl,iommu-parent = <&pamu0>;
+	fsl,liodn-reg = <&guts 0x504>; /* PEX2LIODNR */
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 1 14>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 41 1 0 0
+			0000 0 0 2 &mpic 5 1 0 0
+			0000 0 0 3 &mpic 6 1 0 0
+			0000 0 0 4 &mpic 7 1 0 0
+			>;
+	};
+};
+
+/* controller at 0x202000 */
+&pci2 {
+	compatible = "fsl,p3041-pcie", "fsl,qoriq-pcie-v2.2";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 1 13>;
+	fsl,iommu-parent = <&pamu0>;
+	fsl,liodn-reg = <&guts 0x508>; /* PEX3LIODNR */
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 1 13>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 42 1 0 0
+			0000 0 0 2 &mpic 9 1 0 0
+			0000 0 0 3 &mpic 10 1 0 0
+			0000 0 0 4 &mpic 11 1 0 0
+			>;
+	};
+};
+
+/* controller at 0x203000 */
+&pci3 {
+	compatible = "fsl,p3041-pcie", "fsl,qoriq-pcie-v2.2";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 1 12>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 1 12>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 43 1 0 0
+			0000 0 0 2 &mpic 0 1 0 0
+			0000 0 0 3 &mpic 4 1 0 0
+			0000 0 0 4 &mpic 8 1 0 0
+			>;
+	};
+};
+
+&rio {
+	compatible = "fsl,srio";
+	interrupts = <16 2 1 11>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+	fsl,iommu-parent = <&pamu0>;
+	ranges;
+
+	port1 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <1>;
+		fsl,liodn-reg = <&guts 0x510>; /* RIO1LIODNR */
+	};
+
+	port2 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <2>;
+		fsl,liodn-reg = <&guts 0x514>; /* RIO2LIODNR */
+	};
+};
+
+&dcsr {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,dcsr", "simple-bus";
+
+	dcsr-epu@0 {
+		compatible = "fsl,p3041-dcsr-epu", "fsl,dcsr-epu";
+		interrupts = <52 2 0 0
+			      84 2 0 0
+			      85 2 0 0>;
+		reg = <0x0 0x1000>;
+	};
+	dcsr-npc {
+		compatible = "fsl,dcsr-npc";
+		reg = <0x1000 0x1000 0x1000000 0x8000>;
+	};
+	dcsr-nxc@2000 {
+		compatible = "fsl,dcsr-nxc";
+		reg = <0x2000 0x1000>;
+	};
+	dcsr-corenet {
+		compatible = "fsl,dcsr-corenet";
+		reg = <0x8000 0x1000 0xB0000 0x1000>;
+	};
+	dcsr-dpaa@9000 {
+		compatible = "fsl,p3041-dcsr-dpaa", "fsl,dcsr-dpaa";
+		reg = <0x9000 0x1000>;
+	};
+	dcsr-ocn@11000 {
+		compatible = "fsl,p3041-dcsr-ocn", "fsl,dcsr-ocn";
+		reg = <0x11000 0x1000>;
+	};
+	dcsr-ddr@12000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr1>;
+		reg = <0x12000 0x1000>;
+	};
+	dcsr-nal@18000 {
+		compatible = "fsl,p3041-dcsr-nal", "fsl,dcsr-nal";
+		reg = <0x18000 0x1000>;
+	};
+	dcsr-rcpm@22000 {
+		compatible = "fsl,p3041-dcsr-rcpm", "fsl,dcsr-rcpm";
+		reg = <0x22000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@40000 {
+		compatible = "fsl,dcsr-e500mc-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu0>;
+		reg = <0x40000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@41000 {
+		compatible = "fsl,dcsr-e500mc-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu1>;
+		reg = <0x41000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@42000 {
+		compatible = "fsl,dcsr-e500mc-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu2>;
+		reg = <0x42000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@43000 {
+		compatible = "fsl,dcsr-e500mc-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu3>;
+		reg = <0x43000 0x1000>;
+	};
+};
+
+/include/ "qoriq-bman1-portals.dtsi"
+
+/include/ "qoriq-qman1-portals.dtsi"
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "simple-bus";
+
+	soc-sram-error {
+		compatible = "fsl,soc-sram-error";
+		interrupts = <16 2 1 29>;
+	};
+
+	corenet-law@0 {
+		compatible = "fsl,corenet-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <32>;
+	};
+
+	ddr1: memory-controller@8000 {
+		compatible = "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-controller";
+		reg = <0x8000 0x1000>;
+		interrupts = <16 2 1 23>;
+	};
+
+	cpc: l3-cache-controller@10000 {
+		compatible = "fsl,p3041-l3-cache-controller", "fsl,p4080-l3-cache-controller", "cache";
+		reg = <0x10000 0x1000>;
+		interrupts = <16 2 1 27>;
+	};
+
+	corenet-cf@18000 {
+		compatible = "fsl,corenet1-cf", "fsl,corenet-cf";
+		reg = <0x18000 0x1000>;
+		interrupts = <16 2 1 31>;
+		fsl,ccf-num-csdids = <32>;
+		fsl,ccf-num-snoopids = <32>;
+	};
+
+	iommu@20000 {
+		compatible = "fsl,pamu-v1.0", "fsl,pamu";
+		reg = <0x20000 0x4000>; /* for compatibility with older PAMU drivers */
+		ranges = <0 0x20000 0x4000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupts = <
+			24 2 0 0
+			16 2 1 30>;
+		fsl,portid-mapping = <0x0f000000>;
+
+		pamu0: pamu@0 {
+			reg = <0 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu1: pamu@1000 {
+			reg = <0x1000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu2: pamu@2000 {
+			reg = <0x2000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu3: pamu@3000 {
+			reg = <0x3000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+	};
+
+/include/ "qoriq-mpic.dtsi"
+
+	guts: global-utilities@e0000 {
+		compatible = "fsl,qoriq-device-config-1.0";
+		reg = <0xe0000 0xe00>;
+		fsl,has-rstcr;
+		#sleep-cells = <1>;
+		fsl,liodn-bits = <12>;
+	};
+
+	pins: global-utilities@e0e00 {
+		compatible = "fsl,qoriq-pin-control-1.0";
+		reg = <0xe0e00 0x200>;
+		#sleep-cells = <2>;
+	};
+
+/include/ "qoriq-clockgen1.dtsi"
+	global-utilities@e1000 {
+		compatible = "fsl,p3041-clockgen", "fsl,qoriq-clockgen-1.0";
+	};
+
+	rcpm: global-utilities@e2000 {
+		compatible = "fsl,qoriq-rcpm-1.0";
+		reg = <0xe2000 0x1000>;
+		#sleep-cells = <1>;
+	};
+
+	sfp: sfp@e8000 {
+		compatible = "fsl,p3041-sfp", "fsl,qoriq-sfp-1.0";
+		reg	   = <0xe8000 0x1000>;
+	};
+
+	serdes: serdes@ea000 {
+		compatible = "fsl,p3041-serdes";
+		reg	   = <0xea000 0x1000>;
+	};
+
+/include/ "qoriq-dma-0.dtsi"
+	dma@100300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x580>; /* DMA1LIODNR */
+	};
+
+/include/ "qoriq-dma-1.dtsi"
+	dma@101300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */
+	};
+
+/include/ "qoriq-espi-0.dtsi"
+	spi@110000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "qoriq-esdhc-0.dtsi"
+	sdhc@114000 {
+		compatible = "fsl,p3041-esdhc", "fsl,esdhc";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */
+		sdhci,auto-cmd12;
+	};
+
+/include/ "qoriq-i2c-0.dtsi"
+/include/ "qoriq-i2c-1.dtsi"
+/include/ "qoriq-duart-0.dtsi"
+/include/ "qoriq-duart-1.dtsi"
+/include/ "qoriq-gpio-0.dtsi"
+/include/ "qoriq-usb2-mph-0.dtsi"
+	usb0: usb@210000 {
+		compatible = "fsl-usb2-mph-v1.6", "fsl-usb2-mph";
+		phy_type = "utmi";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */
+		port0;
+	};
+
+/include/ "qoriq-usb2-dr-0.dtsi"
+	usb1: usb@211000 {
+		compatible = "fsl-usb2-dr-v1.6", "fsl,mpc85xx-usb2-dr", "fsl-usb2-dr";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x524>; /* USB2LIODNR */
+		dr_mode = "host";
+		phy_type = "utmi";
+	};
+
+/include/ "qoriq-sata2-0.dtsi"
+	sata@220000 {
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x550>; /* SATA1LIODNR */
+	};
+
+/include/ "qoriq-sata2-1.dtsi"
+	sata@221000 {
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x554>; /* SATA2LIODNR */
+	};
+
+/include/ "qoriq-sec4.2-0.dtsi"
+crypto: crypto@300000 {
+		fsl,iommu-parent = <&pamu1>;
+	};
+
+/include/ "qoriq-qman1.dtsi"
+/include/ "qoriq-bman1.dtsi"
+
+/include/ "qoriq-fman-0.dtsi"
+/include/ "qoriq-fman-0-1g-0.dtsi"
+/include/ "qoriq-fman-0-1g-1.dtsi"
+/include/ "qoriq-fman-0-1g-2.dtsi"
+/include/ "qoriq-fman-0-1g-3.dtsi"
+/include/ "qoriq-fman-0-1g-4.dtsi"
+/include/ "qoriq-fman-0-10g-0.dtsi"
+	fman@400000 {
+		enet0: ethernet@e0000 {
+		};
+
+		enet1: ethernet@e2000 {
+		};
+
+		enet2: ethernet@e4000 {
+		};
+
+		enet3: ethernet@e6000 {
+		};
+
+		enet4: ethernet@e8000 {
+		};
+
+		enet5: ethernet@f0000 {
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p3041si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p3041si-pre.dtsi
new file mode 100644
index 0000000000..db92f1151a
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p3041si-pre.dtsi
@@ -0,0 +1,131 @@
+/*
+ * P3041 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500mc_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,P3041";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		ccsr = &soc;
+		dcsr = &dcsr;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+		pci3 = &pci3;
+		usb0 = &usb0;
+		usb1 = &usb1;
+		dma0 = &dma0;
+		dma1 = &dma1;
+		sdhc = &sdhc;
+		msi0 = &msi0;
+		msi1 = &msi1;
+		msi2 = &msi2;
+
+		crypto = &crypto;
+		sec_jr0 = &sec_jr0;
+		sec_jr1 = &sec_jr1;
+		sec_jr2 = &sec_jr2;
+		sec_jr3 = &sec_jr3;
+		rtic_a = &rtic_a;
+		rtic_b = &rtic_b;
+		rtic_c = &rtic_c;
+		rtic_d = &rtic_d;
+		sec_mon = &sec_mon;
+
+		fman0 = &fman0;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+		ethernet4 = &enet4;
+		ethernet5 = &enet5;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,e500mc@0 {
+			device_type = "cpu";
+			reg = <0>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_0>;
+			fsl,portid-mapping = <0x80000000>;
+			L2_0: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu1: PowerPC,e500mc@1 {
+			device_type = "cpu";
+			reg = <1>;
+			clocks = <&clockgen 1 1>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x40000000>;
+			L2_1: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu2: PowerPC,e500mc@2 {
+			device_type = "cpu";
+			reg = <2>;
+			clocks = <&clockgen 1 2>;
+			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x20000000>;
+			L2_2: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu3: PowerPC,e500mc@3 {
+			device_type = "cpu";
+			reg = <3>;
+			clocks = <&clockgen 1 3>;
+			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x10000000>;
+			L2_3: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p4080ds.dts b/arch/powerpc/boot/dts/fsl/p4080ds.dts
new file mode 100644
index 0000000000..969b32c4f2
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p4080ds.dts
@@ -0,0 +1,439 @@
+/*
+ * P4080DS Device Tree Source
+ *
+ * Copyright 2009 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p4080si-pre.dtsi"
+
+/ {
+	model = "fsl,P4080DS";
+	compatible = "fsl,P4080DS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		phy_rgmii = &phyrgmii;
+		phy5_slot3 = &phy5slot3;
+		phy6_slot3 = &phy6slot3;
+		phy7_slot3 = &phy7slot3;
+		phy8_slot3 = &phy8slot3;
+		emi1_slot3 = &p4080mdio2;
+		emi1_slot4 = &p4080mdio1;
+		emi1_slot5 = &p4080mdio3;
+		emi1_rgmii = &p4080mdio0;
+		emi2_slot4 = &p4080xmdio1;
+		emi2_slot5 = &p4080xmdio3;
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
+	qportals: qman-portals@ff4200000 {
+		ranges = <0x0 0xf 0xf4200000 0x200000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "spansion,s25sl12801", "jedec,spi-nor";
+				reg = <0>;
+				spi-max-frequency = <40000000>; /* input clock */
+				partition@u-boot {
+					label = "u-boot";
+					reg = <0x00000000 0x00100000>;
+					read-only;
+				};
+				partition@kernel {
+					label = "kernel";
+					reg = <0x00100000 0x00500000>;
+					read-only;
+				};
+				partition@dtb {
+					label = "dtb";
+					reg = <0x00600000 0x00100000>;
+					read-only;
+				};
+				partition@fs {
+					label = "file system";
+					reg = <0x00700000 0x00900000>;
+				};
+			};
+		};
+
+		i2c@118100 {
+			eeprom@51 {
+				compatible = "atmel,spd";
+				reg = <0x51>;
+			};
+			eeprom@52 {
+				compatible = "atmel,spd";
+				reg = <0x52>;
+			};
+			rtc@68 {
+				compatible = "dallas,ds3232";
+				reg = <0x68>;
+				interrupts = <0x1 0x1 0 0>;
+			};
+			adt7461@4c {
+				compatible = "adi,adt7461";
+				reg = <0x4c>;
+			};
+		};
+
+		i2c@118000 {
+			zl2006@21 {
+				compatible = "zl2006";
+				reg = <0x21>;
+			};
+			zl2006@22 {
+				compatible = "zl2006";
+				reg = <0x22>;
+			};
+			zl2006@23 {
+				compatible = "zl2006";
+				reg = <0x23>;
+			};
+			zl2006@24 {
+				compatible = "zl2006";
+				reg = <0x24>;
+			};
+			eeprom@50 {
+				compatible = "atmel,24c64";
+				reg = <0x50>;
+			};
+			eeprom@55 {
+				compatible = "atmel,24c64";
+				reg = <0x55>;
+			};
+			eeprom@56 {
+				compatible = "atmel,24c64";
+				reg = <0x56>;
+			};
+			eeprom@57 {
+				compatible = "atmel,24c02";
+				reg = <0x57>;
+			};
+		};
+
+		i2c@119100 {
+			/* 0x6E: ICS9FG108 */
+		};
+
+		usb0: usb@210000 {
+			phy_type = "ulpi";
+		};
+
+		usb1: usb@211000 {
+			dr_mode = "host";
+			phy_type = "ulpi";
+		};
+
+		fman@400000 {
+			ethernet@e0000 {
+				phy-handle = <&phy0>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e2000 {
+				phy-handle = <&phy1>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e4000 {
+				phy-handle = <&phy2>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e6000 {
+				phy-handle = <&phy3>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@f0000 {
+				phy-handle = <&phy10>;
+				phy-connection-type = "xgmii";
+			};
+		};
+
+		fman@500000 {
+			ethernet@e0000 {
+				phy-handle = <&phy5>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e2000 {
+				phy-handle = <&phy6>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e4000 {
+				phy-handle = <&phy7>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e6000 {
+				phy-handle = <&phy8>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@f0000 {
+				phy-handle = <&phy11>;
+				phy-connection-type = "xgmii";
+			};
+		};
+	};
+
+	rio: rapidio@ffe0c0000 {
+		reg = <0xf 0xfe0c0000 0 0x11000>;
+
+		port1 {
+			ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+		};
+		port2 {
+			ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+		};
+	};
+
+	lbc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x1000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x08000000>;
+			bank-width = <2>;
+			device-width = <2>;
+		};
+
+		board-control@3,0 {
+			compatible = "fsl,p4080ds-fpga", "fsl,fpga-ngpixis";
+			reg = <3 0 0x30>;
+		};
+	};
+
+	pci0: pcie@ffe200000 {
+		reg = <0xf 0xfe200000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe201000 {
+		reg = <0xf 0xfe201000 0 0x1000>;
+		ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+			  0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe202000 {
+		reg = <0xf 0xfe202000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	mdio-mux-emi1 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "mdio-mux-gpio", "mdio-mux";
+		mdio-parent-bus = <&mdio0>;
+		gpios = <&gpio0 1 0>, <&gpio0 0 0>;
+
+		p4080mdio0: mdio@0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0>;
+
+			phyrgmii: ethernet-phy@0 {
+				reg = <0x0>;
+			};
+		};
+
+		p4080mdio1: mdio@1 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <1>;
+
+			phy5: ethernet-phy@1c {
+				reg = <0x1c>;
+			};
+
+			phy6: ethernet-phy@1d {
+				reg = <0x1d>;
+			};
+
+			phy7: ethernet-phy@1e {
+				reg = <0x1e>;
+			};
+
+			phy8: ethernet-phy@1f {
+				reg = <0x1f>;
+			};
+		};
+
+		p4080mdio2: mdio@2 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <2>;
+			status = "disabled";
+
+			phy5slot3: ethernet-phy@1c {
+				reg = <0x1c>;
+			};
+
+			phy6slot3: ethernet-phy@1d {
+				reg = <0x1d>;
+			};
+
+			phy7slot3: ethernet-phy@1e {
+				reg = <0x1e>;
+			};
+
+			phy8slot3: ethernet-phy@1f {
+				reg = <0x1f>;
+			};
+		};
+
+		p4080mdio3: mdio@3 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <3>;
+
+			phy0: ethernet-phy@1c {
+				reg = <0x1c>;
+			};
+
+			phy1: ethernet-phy@1d {
+				reg = <0x1d>;
+			};
+
+			phy2: ethernet-phy@1e {
+				reg = <0x1e>;
+			};
+
+			phy3: ethernet-phy@1f {
+				reg = <0x1f>;
+			};
+		};
+	};
+
+	mdio-mux-emi2 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "mdio-mux-gpio", "mdio-mux";
+		mdio-parent-bus = <&xmdio0>;
+		gpios = <&gpio0 3 0>, <&gpio0 2 0>;
+
+		p4080xmdio1: mdio@1 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <1>;
+
+			phy11: ethernet-phy@0 {
+				compatible = "ethernet-phy-ieee802.3-c45";
+				reg = <0x0>;
+			};
+		};
+
+		p4080xmdio3: mdio@3 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <3>;
+
+			phy10: ethernet-phy@4 {
+				compatible = "ethernet-phy-ieee802.3-c45";
+				reg = <0x4>;
+			};
+		};
+	};
+};
+
+/include/ "p4080si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi
new file mode 100644
index 0000000000..4da49b6dd3
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi
@@ -0,0 +1,492 @@
+/*
+ * P4080/P4040 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_fqd {
+	compatible = "fsl,qman-fqd";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_pfdr {
+	compatible = "fsl,qman-pfdr";
+	alloc-ranges = <0 0 0x10 0>;
+};
+
+&lbc {
+	compatible = "fsl,p4080-elbc", "fsl,elbc", "simple-bus";
+	interrupts = <25 2 0 0>;
+	#address-cells = <2>;
+	#size-cells = <1>;
+};
+
+/* controller at 0x200000 */
+&pci0 {
+	compatible = "fsl,p4080-pcie", "fsl,qoriq-pcie-v2.1";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 1 15>;
+	fsl,iommu-parent = <&pamu0>;
+	fsl,liodn-reg = <&guts 0x500>; /* PEX1LIODNR */
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 1 15>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 40 1 0 0
+			0000 0 0 2 &mpic 1 1 0 0
+			0000 0 0 3 &mpic 2 1 0 0
+			0000 0 0 4 &mpic 3 1 0 0
+			>;
+	};
+};
+
+/* controller at 0x201000 */
+&pci1 {
+	compatible = "fsl,p4080-pcie", "fsl,qoriq-pcie-v2.1";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 1 14>;
+	fsl,iommu-parent = <&pamu0>;
+	fsl,liodn-reg = <&guts 0x504>; /* PEX2LIODNR */
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 1 14>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 41 1 0 0
+			0000 0 0 2 &mpic 5 1 0 0
+			0000 0 0 3 &mpic 6 1 0 0
+			0000 0 0 4 &mpic 7 1 0 0
+			>;
+	};
+};
+
+/* controller at 0x202000 */
+&pci2 {
+	compatible = "fsl,p4080-pcie", "fsl,qoriq-pcie-v2.1";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 1 13>;
+	fsl,iommu-parent = <&pamu0>;
+	fsl,liodn-reg = <&guts 0x508>; /* PEX3LIODNR */
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 1 13>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 42 1 0 0
+			0000 0 0 2 &mpic 9 1 0 0
+			0000 0 0 3 &mpic 10 1 0 0
+			0000 0 0 4 &mpic 11 1 0 0
+			>;
+	};
+};
+
+&rio {
+	compatible = "fsl,srio";
+	interrupts = <16 2 1 11>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+	fsl,srio-rmu-handle = <&rmu>;
+	fsl,iommu-parent = <&pamu0>;
+	ranges;
+
+	port1 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <1>;
+		fsl,liodn-reg = <&guts 0x510>; /* RIO1LIODNR */
+	};
+
+	port2 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <2>;
+		fsl,liodn-reg = <&guts 0x514>; /* RIO2LIODNR */
+	};
+};
+
+&dcsr {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,dcsr", "simple-bus";
+
+	dcsr-epu@0 {
+		compatible = "fsl,p4080-dcsr-epu", "fsl,dcsr-epu";
+		interrupts = <52 2 0 0
+			      84 2 0 0
+			      85 2 0 0>;
+		reg = <0x0 0x1000>;
+	};
+	dcsr-npc {
+		compatible = "fsl,dcsr-npc";
+		reg = <0x1000 0x1000 0x1000000 0x8000>;
+	};
+	dcsr-nxc@2000 {
+		compatible = "fsl,dcsr-nxc";
+		reg = <0x2000 0x1000>;
+	};
+	dcsr-corenet {
+		compatible = "fsl,dcsr-corenet";
+		reg = <0x8000 0x1000 0xB0000 0x1000>;
+	};
+	dcsr-dpaa@9000 {
+		compatible = "fsl,p4080-dcsr-dpaa", "fsl,dcsr-dpaa";
+		reg = <0x9000 0x1000>;
+	};
+	dcsr-ocn@11000 {
+		compatible = "fsl,p4080-dcsr-ocn", "fsl,dcsr-ocn";
+		reg = <0x11000 0x1000>;
+	};
+	dcsr-ddr@12000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr1>;
+		reg = <0x12000 0x1000>;
+	};
+	dcsr-ddr@13000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr2>;
+		reg = <0x13000 0x1000>;
+	};
+	dcsr-nal@18000 {
+		compatible = "fsl,p4080-dcsr-nal", "fsl,dcsr-nal";
+		reg = <0x18000 0x1000>;
+	};
+	dcsr-rcpm@22000 {
+		compatible = "fsl,p4080-dcsr-rcpm", "fsl,dcsr-rcpm";
+		reg = <0x22000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@40000 {
+		compatible = "fsl,dcsr-e500mc-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu0>;
+		reg = <0x40000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@41000 {
+		compatible = "fsl,dcsr-e500mc-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu1>;
+		reg = <0x41000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@42000 {
+		compatible = "fsl,dcsr-e500mc-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu2>;
+		reg = <0x42000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@43000 {
+		compatible = "fsl,dcsr-e500mc-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu3>;
+		reg = <0x43000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@44000 {
+		compatible = "fsl,dcsr-e500mc-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu4>;
+		reg = <0x44000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@45000 {
+		compatible = "fsl,dcsr-e500mc-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu5>;
+		reg = <0x45000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@46000 {
+		compatible = "fsl,dcsr-e500mc-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu6>;
+		reg = <0x46000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@47000 {
+		compatible = "fsl,dcsr-e500mc-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu7>;
+		reg = <0x47000 0x1000>;
+	};
+
+};
+
+/include/ "qoriq-bman1-portals.dtsi"
+
+/include/ "qoriq-qman1-portals.dtsi"
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "simple-bus";
+
+	soc-sram-error {
+		compatible = "fsl,soc-sram-error";
+		interrupts = <16 2 1 29>;
+	};
+
+	corenet-law@0 {
+		compatible = "fsl,corenet-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <32>;
+	};
+
+	ddr1: memory-controller@8000 {
+		compatible = "fsl,qoriq-memory-controller-v4.4", "fsl,qoriq-memory-controller";
+		reg = <0x8000 0x1000>;
+		interrupts = <16 2 1 23>;
+	};
+
+	ddr2: memory-controller@9000 {
+		compatible = "fsl,qoriq-memory-controller-v4.4","fsl,qoriq-memory-controller";
+		reg = <0x9000 0x1000>;
+		interrupts = <16 2 1 22>;
+	};
+
+	cpc: l3-cache-controller@10000 {
+		compatible = "fsl,p4080-l3-cache-controller", "cache";
+		reg = <0x10000 0x1000
+		       0x11000 0x1000>;
+		interrupts = <16 2 1 27
+			      16 2 1 26>;
+	};
+
+	corenet-cf@18000 {
+		compatible = "fsl,corenet1-cf", "fsl,corenet-cf";
+		reg = <0x18000 0x1000>;
+		interrupts = <16 2 1 31>;
+		fsl,ccf-num-csdids = <32>;
+		fsl,ccf-num-snoopids = <32>;
+	};
+
+	iommu@20000 {
+		compatible = "fsl,pamu-v1.0", "fsl,pamu";
+		reg = <0x20000 0x5000>; /* for compatibility with older PAMU drivers */
+		ranges = <0 0x20000 0x5000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupts = <
+			24 2 0 0
+			16 2 1 30>;
+		fsl,portid-mapping = <0x00f80000>;
+
+		pamu0: pamu@0 {
+			reg = <0 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu1: pamu@1000 {
+			reg = <0x1000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu2: pamu@2000 {
+			reg = <0x2000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu3: pamu@3000 {
+			reg = <0x3000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu4: pamu@4000 {
+			reg = <0x4000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+	};
+
+/include/ "qoriq-rmu-0.dtsi"
+	rmu@d3000 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x540>; /* RMULIODNR */
+	};
+
+/include/ "qoriq-mpic.dtsi"
+
+	guts: global-utilities@e0000 {
+		compatible = "fsl,qoriq-device-config-1.0";
+		reg = <0xe0000 0xe00>;
+		fsl,has-rstcr;
+		#sleep-cells = <1>;
+		fsl,liodn-bits = <12>;
+	};
+
+	pins: global-utilities@e0e00 {
+		compatible = "fsl,qoriq-pin-control-1.0";
+		reg = <0xe0e00 0x200>;
+		#sleep-cells = <2>;
+	};
+
+/include/ "qoriq-clockgen1.dtsi"
+	global-utilities@e1000 {
+		compatible = "fsl,p4080-clockgen", "fsl,qoriq-clockgen-1.0";
+	};
+
+	rcpm: global-utilities@e2000 {
+		compatible = "fsl,qoriq-rcpm-1.0";
+		reg = <0xe2000 0x1000>;
+		#sleep-cells = <1>;
+	};
+
+	sfp: sfp@e8000 {
+		compatible = "fsl,p4080-sfp", "fsl,qoriq-sfp-1.0";
+		reg	   = <0xe8000 0x1000>;
+	};
+
+	serdes: serdes@ea000 {
+		compatible = "fsl,p4080-serdes";
+		reg	   = <0xea000 0x1000>;
+	};
+
+/include/ "qoriq-dma-0.dtsi"
+	dma@100300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x580>; /* DMA1LIODNR */
+	};
+
+/include/ "qoriq-dma-1.dtsi"
+	dma@101300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */
+	};
+
+/include/ "qoriq-espi-0.dtsi"
+	spi@110000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "qoriq-esdhc-0.dtsi"
+	sdhc@114000 {
+		compatible = "fsl,p4080-esdhc", "fsl,esdhc";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */
+		voltage-ranges = <3300 3300>;
+		sdhci,auto-cmd12;
+	};
+
+/include/ "qoriq-i2c-0.dtsi"
+/include/ "qoriq-i2c-1.dtsi"
+/include/ "qoriq-duart-0.dtsi"
+/include/ "qoriq-duart-1.dtsi"
+/include/ "qoriq-gpio-0.dtsi"
+/include/ "qoriq-usb2-mph-0.dtsi"
+	usb@210000 {
+		compatible = "fsl-usb2-mph-v1.6", "fsl,mpc85xx-usb2-mph", "fsl-usb2-mph";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */
+		port0;
+	};
+/include/ "qoriq-usb2-dr-0.dtsi"
+	usb@211000 {
+		compatible = "fsl-usb2-dr-v1.6", "fsl,mpc85xx-usb2-dr", "fsl-usb2-dr";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x524>; /* USB2LIODNR */
+	};
+/include/ "qoriq-sec4.0-0.dtsi"
+crypto: crypto@300000 {
+		fsl,iommu-parent = <&pamu1>;
+	};
+
+/include/ "qoriq-qman1.dtsi"
+/include/ "qoriq-bman1.dtsi"
+
+/include/ "qoriq-fman-0.dtsi"
+/include/ "qoriq-fman-0-1g-0.dtsi"
+/include/ "qoriq-fman-0-1g-1.dtsi"
+/include/ "qoriq-fman-0-1g-2.dtsi"
+/include/ "qoriq-fman-0-1g-3.dtsi"
+/include/ "qoriq-fman-0-10g-0.dtsi"
+	fman@400000 {
+		enet0: ethernet@e0000 {
+		};
+
+		enet1: ethernet@e2000 {
+		};
+
+		enet2: ethernet@e4000 {
+		};
+
+		enet3: ethernet@e6000 {
+		};
+
+		enet4: ethernet@f0000 {
+		};
+	};
+
+/include/ "qoriq-fman-1.dtsi"
+/include/ "qoriq-fman-1-1g-0.dtsi"
+/include/ "qoriq-fman-1-1g-1.dtsi"
+/include/ "qoriq-fman-1-1g-2.dtsi"
+/include/ "qoriq-fman-1-1g-3.dtsi"
+/include/ "qoriq-fman-1-10g-0.dtsi"
+	fman@500000 {
+		enet5: ethernet@e0000 {
+		};
+
+		enet6: ethernet@e2000 {
+		};
+
+		enet7: ethernet@e4000 {
+		};
+
+		enet8: ethernet@e6000 {
+		};
+
+		enet9: ethernet@f0000 {
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p4080si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p4080si-pre.dtsi
new file mode 100644
index 0000000000..0a7c65a00e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p4080si-pre.dtsi
@@ -0,0 +1,175 @@
+/*
+ * P4080/P4040 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e500mc_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,P4080";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		ccsr = &soc;
+		dcsr = &dcsr;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+		usb0 = &usb0;
+		usb1 = &usb1;
+		dma0 = &dma0;
+		dma1 = &dma1;
+		sdhc = &sdhc;
+		msi0 = &msi0;
+		msi1 = &msi1;
+		msi2 = &msi2;
+
+		crypto = &crypto;
+		sec_jr0 = &sec_jr0;
+		sec_jr1 = &sec_jr1;
+		sec_jr2 = &sec_jr2;
+		sec_jr3 = &sec_jr3;
+		rtic_a = &rtic_a;
+		rtic_b = &rtic_b;
+		rtic_c = &rtic_c;
+		rtic_d = &rtic_d;
+		sec_mon = &sec_mon;
+
+		fman0 = &fman0;
+		fman1 = &fman1;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+		ethernet4 = &enet4;
+		ethernet5 = &enet5;
+		ethernet6 = &enet6;
+		ethernet7 = &enet7;
+		ethernet8 = &enet8;
+		ethernet9 = &enet9;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,e500mc@0 {
+			device_type = "cpu";
+			reg = <0>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_0>;
+			fsl,portid-mapping = <0x80000000>;
+			L2_0: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu1: PowerPC,e500mc@1 {
+			device_type = "cpu";
+			reg = <1>;
+			clocks = <&clockgen 1 1>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x40000000>;
+			L2_1: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu2: PowerPC,e500mc@2 {
+			device_type = "cpu";
+			reg = <2>;
+			clocks = <&clockgen 1 2>;
+			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x20000000>;
+			L2_2: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu3: PowerPC,e500mc@3 {
+			device_type = "cpu";
+			reg = <3>;
+			clocks = <&clockgen 1 3>;
+			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x10000000>;
+			L2_3: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu4: PowerPC,e500mc@4 {
+			device_type = "cpu";
+			reg = <4>;
+			clocks = <&clockgen 1 4>;
+			next-level-cache = <&L2_4>;
+			fsl,portid-mapping = <0x08000000>;
+			L2_4: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu5: PowerPC,e500mc@5 {
+			device_type = "cpu";
+			reg = <5>;
+			clocks = <&clockgen 1 5>;
+			next-level-cache = <&L2_5>;
+			fsl,portid-mapping = <0x04000000>;
+			L2_5: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu6: PowerPC,e500mc@6 {
+			device_type = "cpu";
+			reg = <6>;
+			clocks = <&clockgen 1 6>;
+			next-level-cache = <&L2_6>;
+			fsl,portid-mapping = <0x02000000>;
+			L2_6: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu7: PowerPC,e500mc@7 {
+			device_type = "cpu";
+			reg = <7>;
+			clocks = <&clockgen 1 7>;
+			next-level-cache = <&L2_7>;
+			fsl,portid-mapping = <0x01000000>;
+			L2_7: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p5020ds.dts b/arch/powerpc/boot/dts/fsl/p5020ds.dts
new file mode 100644
index 0000000000..b24adf902d
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p5020ds.dts
@@ -0,0 +1,394 @@
+/*
+ * P5020DS Device Tree Source
+ *
+ * Copyright 2010 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "p5020si-pre.dtsi"
+
+/ {
+	model = "fsl,P5020DS";
+	compatible = "fsl,P5020DS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		phy_rgmii_0 = &phy_rgmii_0;
+		phy_rgmii_1 = &phy_rgmii_1;
+		phy_sgmii_1c = &phy_sgmii_1c;
+		phy_sgmii_1d = &phy_sgmii_1d;
+		phy_sgmii_1e = &phy_sgmii_1e;
+		phy_sgmii_1f = &phy_sgmii_1f;
+		phy_xgmii_1 = &phy_xgmii_1;
+		phy_xgmii_2 = &phy_xgmii_2;
+		emi1_rgmii = &hydra_mdio_rgmii;
+		emi1_sgmii = &hydra_mdio_sgmii;
+		emi2_xgmii = &hydra_mdio_xgmii;
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
+	qportals: qman-portals@ff4200000 {
+		ranges = <0x0 0xf 0xf4200000 0x200000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "spansion,s25sl12801", "jedec,spi-nor";
+				reg = <0>;
+				spi-max-frequency = <40000000>; /* input clock */
+				partition@u-boot {
+					label = "u-boot";
+					reg = <0x00000000 0x00100000>;
+					read-only;
+				};
+				partition@kernel {
+					label = "kernel";
+					reg = <0x00100000 0x00500000>;
+					read-only;
+				};
+				partition@dtb {
+					label = "dtb";
+					reg = <0x00600000 0x00100000>;
+					read-only;
+				};
+				partition@fs {
+					label = "file system";
+					reg = <0x00700000 0x00900000>;
+				};
+			};
+		};
+
+		i2c@118100 {
+			eeprom@51 {
+				compatible = "atmel,24c256";
+				reg = <0x51>;
+			};
+			eeprom@52 {
+				compatible = "atmel,24c256";
+				reg = <0x52>;
+			};
+		};
+
+		i2c@119100 {
+			rtc@68 {
+				compatible = "dallas,ds3232";
+				reg = <0x68>;
+				interrupts = <0x1 0x1 0 0>;
+			};
+			ina220@40 {
+				compatible = "ti,ina220";
+				reg = <0x40>;
+				shunt-resistor = <1000>;
+			};
+			ina220@41 {
+				compatible = "ti,ina220";
+				reg = <0x41>;
+				shunt-resistor = <1000>;
+			};
+			ina220@44 {
+				compatible = "ti,ina220";
+				reg = <0x44>;
+				shunt-resistor = <1000>;
+			};
+			ina220@45 {
+				compatible = "ti,ina220";
+				reg = <0x45>;
+				shunt-resistor = <1000>;
+			};
+			adt7461@4c {
+				compatible = "adi,adt7461";
+				reg = <0x4c>;
+			};
+		};
+
+		fman@400000 {
+			ethernet@e0000 {
+				phy-handle = <&phy_sgmii_1c>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e2000 {
+				phy-handle = <&phy_sgmii_1d>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e4000 {
+				phy-handle = <&phy_sgmii_1e>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e6000 {
+				phy-handle = <&phy_sgmii_1f>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e8000 {
+				phy-handle = <&phy_rgmii_1>;
+				phy-connection-type = "rgmii";
+			};
+
+			ethernet@f0000 {
+				phy-handle = <&phy_xgmii_1>;
+				phy-connection-type = "xgmii";
+			};
+
+			hydra_mdio_xgmii: mdio@f1000 {
+				status = "disabled";
+
+				phy_xgmii_1: ethernet-phy@4 {
+					compatible = "ethernet-phy-ieee802.3-c45";
+					reg = <0x4>;
+				};
+
+				phy_xgmii_2: ethernet-phy@0 {
+					compatible = "ethernet-phy-ieee802.3-c45";
+					reg = <0x0>;
+				};
+			};
+		};
+	};
+
+	rio: rapidio@ffe0c0000 {
+		reg = <0xf 0xfe0c0000 0 0x11000>;
+
+		port1 {
+			ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+		};
+		port2 {
+			ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+		};
+	};
+
+	lbc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x1000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xffa00000 0x00040000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x08000000>;
+			bank-width = <2>;
+			device-width = <2>;
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,elbc-fcm-nand";
+			reg = <0x2 0x0 0x40000>;
+
+			partition@0 {
+				label = "NAND U-Boot Image";
+				reg = <0x0 0x02000000>;
+				read-only;
+			};
+
+			partition@2000000 {
+				label = "NAND Root File System";
+				reg = <0x02000000 0x10000000>;
+			};
+
+			partition@12000000 {
+				label = "NAND Compressed RFS Image";
+				reg = <0x12000000 0x08000000>;
+			};
+
+			partition@1a000000 {
+				label = "NAND Linux Kernel Image";
+				reg = <0x1a000000 0x04000000>;
+			};
+
+			partition@1e000000 {
+				label = "NAND DTB Image";
+				reg = <0x1e000000 0x01000000>;
+			};
+
+			partition@1f000000 {
+				label = "NAND Writable User area";
+				reg = <0x1f000000 0x21000000>;
+			};
+		};
+
+		board-control@3,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,p5020ds-fpga", "fsl,fpga-ngpixis";
+			reg = <3 0 0x30>;
+			ranges = <0 3 0 0x30>;
+
+			mdio-mux-emi1 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "mdio-mux-mmioreg", "mdio-mux";
+				mdio-parent-bus = <&mdio0>;
+				reg = <9 1>;
+				mux-mask = <0x78>;
+
+				hydra_mdio_rgmii: rgmii-mdio@8 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <8>;
+					status = "disabled";
+
+					phy_rgmii_0: ethernet-phy@0 {
+						reg = <0x0>;
+					};
+
+					phy_rgmii_1: ethernet-phy@1 {
+						reg = <0x1>;
+					};
+				};
+
+				hydra_mdio_sgmii: sgmii-mdio@28 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x28>;
+					status = "disabled";
+
+					phy_sgmii_1c: ethernet-phy@1c {
+						reg = <0x1c>;
+					};
+
+					phy_sgmii_1d: ethernet-phy@1d {
+						reg = <0x1d>;
+					};
+
+					phy_sgmii_1e: ethernet-phy@1e {
+						reg = <0x1e>;
+					};
+
+					phy_sgmii_1f: ethernet-phy@1f {
+						reg = <0x1f>;
+					};
+				};
+			};
+		};
+	};
+
+	pci0: pcie@ffe200000 {
+		reg = <0xf 0xfe200000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe201000 {
+		reg = <0xf 0xfe201000 0 0x1000>;
+		ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+			  0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe202000 {
+		reg = <0xf 0xfe202000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci3: pcie@ffe203000 {
+		reg = <0xf 0xfe203000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x60000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+};
+
+/include/ "p5020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
new file mode 100644
index 0000000000..cd008cdd28
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
@@ -0,0 +1,478 @@
+/*
+ * P5020/5010 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_fqd {
+	compatible = "fsl,qman-fqd";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+	compatible = "fsl,qman-pfdr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&lbc {
+	compatible = "fsl,p5020-elbc", "fsl,elbc", "simple-bus";
+	interrupts = <25 2 0 0>;
+	#address-cells = <2>;
+	#size-cells = <1>;
+};
+
+/* controller at 0x200000 */
+&pci0 {
+	compatible = "fsl,p5020-pcie", "fsl,qoriq-pcie-v2.2";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 1 15>;
+	fsl,iommu-parent = <&pamu0>;
+	fsl,liodn-reg = <&guts 0x500>; /* PEX1LIODNR */
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 1 15>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 40 1 0 0
+			0000 0 0 2 &mpic 1 1 0 0
+			0000 0 0 3 &mpic 2 1 0 0
+			0000 0 0 4 &mpic 3 1 0 0
+			>;
+	};
+};
+
+/* controller at 0x201000 */
+&pci1 {
+	compatible = "fsl,p5020-pcie", "fsl,qoriq-pcie-v2.2";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 1 14>;
+	fsl,iommu-parent = <&pamu0>;
+	fsl,liodn-reg = <&guts 0x504>; /* PEX2LIODNR */
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 1 14>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 41 1 0 0
+			0000 0 0 2 &mpic 5 1 0 0
+			0000 0 0 3 &mpic 6 1 0 0
+			0000 0 0 4 &mpic 7 1 0 0
+			>;
+	};
+};
+
+/* controller at 0x202000 */
+&pci2 {
+	compatible = "fsl,p5020-pcie", "fsl,qoriq-pcie-v2.2";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 1 13>;
+	fsl,iommu-parent = <&pamu0>;
+	fsl,liodn-reg = <&guts 0x508>; /* PEX3LIODNR */
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 1 13>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 42 1 0 0
+			0000 0 0 2 &mpic 9 1 0 0
+			0000 0 0 3 &mpic 10 1 0 0
+			0000 0 0 4 &mpic 11 1 0 0
+			>;
+	};
+};
+
+/* controller at 0x203000 */
+&pci3 {
+	compatible = "fsl,p5020-pcie", "fsl,qoriq-pcie-v2.2";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 1 12>;
+	fsl,iommu-parent = <&pamu0>;
+	fsl,liodn-reg = <&guts 0x50c>; /* PEX4LIODNR */
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 1 12>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 43 1 0 0
+			0000 0 0 2 &mpic 0 1 0 0
+			0000 0 0 3 &mpic 4 1 0 0
+			0000 0 0 4 &mpic 8 1 0 0
+			>;
+	};
+};
+
+&rio {
+	compatible = "fsl,srio";
+	interrupts = <16 2 1 11>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+	fsl,iommu-parent = <&pamu0>;
+	ranges;
+
+	port1 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <1>;
+		fsl,liodn-reg = <&guts 0x510>; /* RIO1LIODNR */
+	};
+
+	port2 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <2>;
+		fsl,liodn-reg = <&guts 0x514>; /* RIO2LIODNR */
+	};
+};
+
+&dcsr {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,dcsr", "simple-bus";
+
+	dcsr-epu@0 {
+		compatible = "fsl,p5020-dcsr-epu", "fsl,dcsr-epu";
+		interrupts = <52 2 0 0
+			      84 2 0 0
+			      85 2 0 0>;
+		reg = <0x0 0x1000>;
+	};
+	dcsr-npc {
+		compatible = "fsl,dcsr-npc";
+		reg = <0x1000 0x1000 0x1000000 0x8000>;
+	};
+	dcsr-nxc@2000 {
+		compatible = "fsl,dcsr-nxc";
+		reg = <0x2000 0x1000>;
+	};
+	dcsr-corenet {
+		compatible = "fsl,dcsr-corenet";
+		reg = <0x8000 0x1000 0xB0000 0x1000>;
+	};
+	dcsr-dpaa@9000 {
+		compatible = "fsl,p5020-dcsr-dpaa", "fsl,dcsr-dpaa";
+		reg = <0x9000 0x1000>;
+	};
+	dcsr-ocn@11000 {
+		compatible = "fsl,p5020-dcsr-ocn", "fsl,dcsr-ocn";
+		reg = <0x11000 0x1000>;
+	};
+	dcsr-ddr@12000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr1>;
+		reg = <0x12000 0x1000>;
+	};
+	dcsr-ddr@13000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr2>;
+		reg = <0x13000 0x1000>;
+	};
+	dcsr-nal@18000 {
+		compatible = "fsl,p5020-dcsr-nal", "fsl,dcsr-nal";
+		reg = <0x18000 0x1000>;
+	};
+	dcsr-rcpm@22000 {
+		compatible = "fsl,p5020-dcsr-rcpm", "fsl,dcsr-rcpm";
+		reg = <0x22000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@40000 {
+		compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu0>;
+		reg = <0x40000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@41000 {
+		compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu1>;
+		reg = <0x41000 0x1000>;
+	};
+};
+
+/include/ "qoriq-bman1-portals.dtsi"
+
+/include/ "qoriq-qman1-portals.dtsi"
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "simple-bus";
+
+	soc-sram-error {
+		compatible = "fsl,soc-sram-error";
+		interrupts = <16 2 1 29>;
+	};
+
+	corenet-law@0 {
+		compatible = "fsl,corenet-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <32>;
+	};
+
+	ddr1: memory-controller@8000 {
+		compatible = "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-controller";
+		reg = <0x8000 0x1000>;
+		interrupts = <16 2 1 23>;
+	};
+
+	ddr2: memory-controller@9000 {
+		compatible = "fsl,qoriq-memory-controller-v4.5","fsl,qoriq-memory-controller";
+		reg = <0x9000 0x1000>;
+		interrupts = <16 2 1 22>;
+	};
+
+	cpc: l3-cache-controller@10000 {
+		compatible = "fsl,p5020-l3-cache-controller", "fsl,p4080-l3-cache-controller", "cache";
+		reg = <0x10000 0x1000
+		       0x11000 0x1000>;
+		interrupts = <16 2 1 27
+			      16 2 1 26>;
+	};
+
+	corenet-cf@18000 {
+		compatible = "fsl,corenet1-cf", "fsl,corenet-cf";
+		reg = <0x18000 0x1000>;
+		interrupts = <16 2 1 31>;
+		fsl,ccf-num-csdids = <32>;
+		fsl,ccf-num-snoopids = <32>;
+	};
+
+	iommu@20000 {
+		compatible = "fsl,pamu-v1.0", "fsl,pamu";
+		reg = <0x20000 0x4000>; /* for compatibility with older PAMU drivers */
+		ranges = <0 0x20000 0x4000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupts = <
+			24 2 0 0
+			16 2 1 30>;
+		fsl,portid-mapping = <0x3c000000>;
+
+		pamu0: pamu@0 {
+			reg = <0 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu1: pamu@1000 {
+			reg = <0x1000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu2: pamu@2000 {
+			reg = <0x2000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu3: pamu@3000 {
+			reg = <0x3000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+	};
+
+/include/ "qoriq-mpic.dtsi"
+
+	guts: global-utilities@e0000 {
+		compatible = "fsl,qoriq-device-config-1.0";
+		reg = <0xe0000 0xe00>;
+		fsl,has-rstcr;
+		#sleep-cells = <1>;
+		fsl,liodn-bits = <12>;
+	};
+
+	pins: global-utilities@e0e00 {
+		compatible = "fsl,qoriq-pin-control-1.0";
+		reg = <0xe0e00 0x200>;
+		#sleep-cells = <2>;
+	};
+
+/include/ "qoriq-clockgen1.dtsi"
+	global-utilities@e1000 {
+		compatible = "fsl,p5020-clockgen", "fsl,qoriq-clockgen-1.0";
+	};
+
+	rcpm: global-utilities@e2000 {
+		compatible = "fsl,qoriq-rcpm-1.0";
+		reg = <0xe2000 0x1000>;
+		#sleep-cells = <1>;
+	};
+
+	sfp: sfp@e8000 {
+		compatible = "fsl,p5020-sfp", "fsl,qoriq-sfp-1.0";
+		reg	   = <0xe8000 0x1000>;
+	};
+
+	serdes: serdes@ea000 {
+		compatible = "fsl,p5020-serdes";
+		reg	   = <0xea000 0x1000>;
+	};
+
+/include/ "qoriq-dma-0.dtsi"
+	dma@100300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x580>; /* DMA1LIODNR */
+	};
+
+/include/ "qoriq-dma-1.dtsi"
+	dma@101300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */
+	};
+
+/include/ "qoriq-espi-0.dtsi"
+	spi@110000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "qoriq-esdhc-0.dtsi"
+	sdhc@114000 {
+		compatible = "fsl,p5020-esdhc", "fsl,esdhc";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */
+		sdhci,auto-cmd12;
+	};
+
+/include/ "qoriq-i2c-0.dtsi"
+/include/ "qoriq-i2c-1.dtsi"
+/include/ "qoriq-duart-0.dtsi"
+/include/ "qoriq-duart-1.dtsi"
+/include/ "qoriq-gpio-0.dtsi"
+/include/ "qoriq-usb2-mph-0.dtsi"
+	usb0: usb@210000 {
+		compatible = "fsl-usb2-mph-v1.6", "fsl,mpc85xx-usb2-mph", "fsl-usb2-mph";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */
+		phy_type = "utmi";
+		port0;
+	};
+
+/include/ "qoriq-usb2-dr-0.dtsi"
+	usb1: usb@211000 {
+		compatible = "fsl-usb2-dr-v1.6", "fsl,mpc85xx-usb2-dr", "fsl-usb2-dr";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x524>; /* USB2LIODNR */
+		dr_mode = "host";
+		phy_type = "utmi";
+	};
+
+/include/ "qoriq-sata2-0.dtsi"
+	sata@220000 {
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x550>; /* SATA1LIODNR */
+	};
+
+/include/ "qoriq-sata2-1.dtsi"
+	sata@221000 {
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x554>; /* SATA2LIODNR */
+	};
+/include/ "qoriq-sec4.2-0.dtsi"
+	crypto@300000 {
+		fsl,iommu-parent = <&pamu1>;
+	};
+
+/include/ "qoriq-qman1.dtsi"
+/include/ "qoriq-bman1.dtsi"
+
+/include/ "qoriq-raid1.0-0.dtsi"
+	raideng@320000 {
+		fsl,iommu-parent = <&pamu1>;
+	};
+
+/include/ "qoriq-fman-0.dtsi"
+/include/ "qoriq-fman-0-1g-0.dtsi"
+/include/ "qoriq-fman-0-1g-1.dtsi"
+/include/ "qoriq-fman-0-1g-2.dtsi"
+/include/ "qoriq-fman-0-1g-3.dtsi"
+/include/ "qoriq-fman-0-1g-4.dtsi"
+/include/ "qoriq-fman-0-10g-0.dtsi"
+	fman@400000 {
+		enet0: ethernet@e0000 {
+		};
+
+		enet1: ethernet@e2000 {
+		};
+
+		enet2: ethernet@e4000 {
+		};
+
+		enet3: ethernet@e6000 {
+		};
+
+		enet4: ethernet@e8000 {
+		};
+
+		enet5: ethernet@f0000 {
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi
new file mode 100644
index 0000000000..2d74ea85e5
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi
@@ -0,0 +1,117 @@
+/*
+ * P5020/P5010 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e5500_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,P5020";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		ccsr = &soc;
+		dcsr = &dcsr;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+		pci3 = &pci3;
+		usb0 = &usb0;
+		usb1 = &usb1;
+		dma0 = &dma0;
+		dma1 = &dma1;
+		sdhc = &sdhc;
+		msi0 = &msi0;
+		msi1 = &msi1;
+		msi2 = &msi2;
+
+		crypto = &crypto;
+		sec_jr0 = &sec_jr0;
+		sec_jr1 = &sec_jr1;
+		sec_jr2 = &sec_jr2;
+		sec_jr3 = &sec_jr3;
+		rtic_a = &rtic_a;
+		rtic_b = &rtic_b;
+		rtic_c = &rtic_c;
+		rtic_d = &rtic_d;
+		sec_mon = &sec_mon;
+
+		raideng = &raideng;
+		raideng_jr0 = &raideng_jr0;
+		raideng_jr1 = &raideng_jr1;
+		raideng_jr2 = &raideng_jr2;
+		raideng_jr3 = &raideng_jr3;
+
+		fman0 = &fman0;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+		ethernet4 = &enet4;
+		ethernet5 = &enet5;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,e5500@0 {
+			device_type = "cpu";
+			reg = <0>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_0>;
+			fsl,portid-mapping = <0x80000000>;
+			L2_0: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu1: PowerPC,e5500@1 {
+			device_type = "cpu";
+			reg = <1>;
+			clocks = <&clockgen 1 1>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x40000000>;
+			L2_1: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p5040ds.dts b/arch/powerpc/boot/dts/fsl/p5040ds.dts
new file mode 100644
index 0000000000..5cfc689ee4
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p5040ds.dts
@@ -0,0 +1,486 @@
+/*
+ * P5040DS Device Tree Source
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * This software is provided by Freescale Semiconductor "as is" and any
+ * express or implied warranties, including, but not limited to, the implied
+ * warranties of merchantability and fitness for a particular purpose are
+ * disclaimed. In no event shall Freescale Semiconductor be liable for any
+ * direct, indirect, incidental, special, exemplary, or consequential damages
+ * (including, but not limited to, procurement of substitute goods or services;
+ * loss of use, data, or profits; or business interruption) however caused and
+ * on any theory of liability, whether in contract, strict liability, or tort
+ * (including negligence or otherwise) arising in any way out of the use of this
+ * software, even if advised of the possibility of such damage.
+ */
+
+/include/ "p5040si-pre.dtsi"
+
+/ {
+	model = "fsl,P5040DS";
+	compatible = "fsl,P5040DS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		phy_sgmii_slot2_1c = &phy_sgmii_slot2_1c;
+		phy_sgmii_slot2_1d = &phy_sgmii_slot2_1d;
+		phy_sgmii_slot2_1e = &phy_sgmii_slot2_1e;
+		phy_sgmii_slot2_1f = &phy_sgmii_slot2_1f;
+		phy_sgmii_slot3_1c = &phy_sgmii_slot3_1c;
+		phy_sgmii_slot3_1d = &phy_sgmii_slot3_1d;
+		phy_sgmii_slot3_1e = &phy_sgmii_slot3_1e;
+		phy_sgmii_slot3_1f = &phy_sgmii_slot3_1f;
+		phy_sgmii_slot5_1c = &phy_sgmii_slot5_1c;
+		phy_sgmii_slot5_1d = &phy_sgmii_slot5_1d;
+		phy_sgmii_slot5_1e = &phy_sgmii_slot5_1e;
+		phy_sgmii_slot5_1f = &phy_sgmii_slot5_1f;
+		phy_sgmii_slot6_1c = &phy_sgmii_slot6_1c;
+		phy_sgmii_slot6_1d = &phy_sgmii_slot6_1d;
+		phy_sgmii_slot6_1e = &phy_sgmii_slot6_1e;
+		phy_sgmii_slot6_1f = &phy_sgmii_slot6_1f;
+		hydra_rg = &hydra_rg;
+		hydra_sg_slot2 = &hydra_sg_slot2;
+		hydra_sg_slot3 = &hydra_sg_slot3;
+		hydra_sg_slot5 = &hydra_sg_slot5;
+		hydra_sg_slot6 = &hydra_sg_slot6;
+		hydra_xg_slot1 = &hydra_xg_slot1;
+		hydra_xg_slot2 = &hydra_xg_slot2;
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x200000>;
+	};
+
+	qportals: qman-portals@ff4200000 {
+		ranges = <0x0 0xf 0xf4200000 0x200000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "spansion,s25sl12801", "jedec,spi-nor";
+				reg = <0>;
+				spi-max-frequency = <40000000>; /* input clock */
+				partition@u-boot {
+					label = "u-boot";
+					reg = <0x00000000 0x00100000>;
+				};
+				partition@kernel {
+					label = "kernel";
+					reg = <0x00100000 0x00500000>;
+				};
+				partition@dtb {
+					label = "dtb";
+					reg = <0x00600000 0x00100000>;
+				};
+				partition@fs {
+					label = "file system";
+					reg = <0x00700000 0x00900000>;
+				};
+			};
+		};
+
+		i2c@118100 {
+			eeprom@51 {
+				compatible = "atmel,24c256";
+				reg = <0x51>;
+			};
+			eeprom@52 {
+				compatible = "atmel,24c256";
+				reg = <0x52>;
+			};
+		};
+
+		i2c@119100 {
+			rtc@68 {
+				compatible = "dallas,ds3232";
+				reg = <0x68>;
+				interrupts = <0x1 0x1 0 0>;
+			};
+			ina220@40 {
+				compatible = "ti,ina220";
+				reg = <0x40>;
+				shunt-resistor = <1000>;
+			};
+			ina220@41 {
+				compatible = "ti,ina220";
+				reg = <0x41>;
+				shunt-resistor = <1000>;
+			};
+			ina220@44 {
+				compatible = "ti,ina220";
+				reg = <0x44>;
+				shunt-resistor = <1000>;
+			};
+			ina220@45 {
+				compatible = "ti,ina220";
+				reg = <0x45>;
+				shunt-resistor = <1000>;
+			};
+			adt7461@4c {
+				compatible = "adi,adt7461";
+				reg = <0x4c>;
+			};
+		};
+
+		fman@400000 {
+			ethernet@e0000 {
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e2000 {
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e4000 {
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e6000 {
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e8000 {
+				phy-handle = <&phy_rgmii_0>;
+				phy-connection-type = "rgmii";
+			};
+
+			ethernet@f0000 {
+				phy-handle = <&phy_xgmii_slot_2>;
+				phy-connection-type = "xgmii";
+			};
+		};
+
+		fman@500000 {
+			ethernet@e0000 {
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e2000 {
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e4000 {
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e6000 {
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e8000 {
+				phy-handle = <&phy_rgmii_1>;
+				phy-connection-type = "rgmii";
+			};
+
+			ethernet@f0000 {
+				phy-handle = <&phy_xgmii_slot_1>;
+				phy-connection-type = "xgmii";
+			};
+		};
+	};
+
+	lbc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x1000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xffa00000 0x00040000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x08000000>;
+			bank-width = <2>;
+			device-width = <2>;
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,elbc-fcm-nand";
+			reg = <0x2 0x0 0x40000>;
+
+			partition@0 {
+				label = "NAND U-Boot Image";
+				reg = <0x0 0x02000000>;
+			};
+
+			partition@2000000 {
+				label = "NAND Root File System";
+				reg = <0x02000000 0x10000000>;
+			};
+
+			partition@12000000 {
+				label = "NAND Compressed RFS Image";
+				reg = <0x12000000 0x08000000>;
+			};
+
+			partition@1a000000 {
+				label = "NAND Linux Kernel Image";
+				reg = <0x1a000000 0x04000000>;
+			};
+
+			partition@1e000000 {
+				label = "NAND DTB Image";
+				reg = <0x1e000000 0x01000000>;
+			};
+
+			partition@1f000000 {
+				label = "NAND Writable User area";
+				reg = <0x1f000000 0x01000000>;
+			};
+		};
+
+		board-control@3,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,p5040ds-fpga", "fsl,fpga-ngpixis";
+			reg = <3 0 0x40>;
+			ranges = <0 3 0 0x40>;
+
+			mdio-mux-emi1 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "mdio-mux-mmioreg", "mdio-mux";
+				mdio-parent-bus = <&mdio0>;
+				reg = <9 1>;
+				mux-mask = <0x78>;
+
+				hydra_rg:rgmii-mdio@8 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <8>;
+					status = "disabled";
+
+					phy_rgmii_0: ethernet-phy@0 {
+						reg = <0x0>;
+					};
+
+					phy_rgmii_1: ethernet-phy@1 {
+						reg = <0x1>;
+					};
+				};
+
+				hydra_sg_slot2: sgmii-mdio@28 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x28>;
+					status = "disabled";
+
+					phy_sgmii_slot2_1c: ethernet-phy@1c {
+						reg = <0x1c>;
+					};
+
+					phy_sgmii_slot2_1d: ethernet-phy@1d {
+						reg = <0x1d>;
+					};
+
+					phy_sgmii_slot2_1e: ethernet-phy@1e {
+						reg = <0x1e>;
+					};
+
+					phy_sgmii_slot2_1f: ethernet-phy@1f {
+						reg = <0x1f>;
+					};
+				};
+
+				hydra_sg_slot3: sgmii-mdio@68 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x68>;
+					status = "disabled";
+
+					phy_sgmii_slot3_1c: ethernet-phy@1c {
+						reg = <0x1c>;
+					};
+
+					phy_sgmii_slot3_1d: ethernet-phy@1d {
+						reg = <0x1d>;
+					};
+
+					phy_sgmii_slot3_1e: ethernet-phy@1e {
+						reg = <0x1e>;
+					};
+
+					phy_sgmii_slot3_1f: ethernet-phy@1f {
+						reg = <0x1f>;
+					};
+				};
+
+				hydra_sg_slot5: sgmii-mdio@38 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x38>;
+					status = "disabled";
+
+					phy_sgmii_slot5_1c: ethernet-phy@1c {
+						reg = <0x1c>;
+					};
+
+					phy_sgmii_slot5_1d: ethernet-phy@1d {
+						reg = <0x1d>;
+					};
+
+					phy_sgmii_slot5_1e: ethernet-phy@1e {
+						reg = <0x1e>;
+					};
+
+					phy_sgmii_slot5_1f: ethernet-phy@1f {
+						reg = <0x1f>;
+					};
+				};
+				hydra_sg_slot6: sgmii-mdio@48 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x48>;
+					status = "disabled";
+
+					phy_sgmii_slot6_1c: ethernet-phy@1c {
+						reg = <0x1c>;
+					};
+
+					phy_sgmii_slot6_1d: ethernet-phy@1d {
+						reg = <0x1d>;
+					};
+
+					phy_sgmii_slot6_1e: ethernet-phy@1e {
+						reg = <0x1e>;
+					};
+
+					phy_sgmii_slot6_1f: ethernet-phy@1f {
+						reg = <0x1f>;
+					};
+				};
+			};
+
+			mdio-mux-emi2 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "mdio-mux-mmioreg", "mdio-mux";
+				mdio-parent-bus = <&xmdio0>;
+				reg = <9 1>;
+				mux-mask = <0x06>;
+
+				hydra_xg_slot1: hydra-xg-slot1@0 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0>;
+					status = "disabled";
+
+					phy_xgmii_slot_1: ethernet-phy@0 {
+						compatible = "ethernet-phy-ieee802.3-c45";
+						reg = <4>;
+					};
+				};
+
+				hydra_xg_slot2: hydra-xg-slot2@2 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <2>;
+
+					phy_xgmii_slot_2: ethernet-phy@4 {
+						compatible = "ethernet-phy-ieee802.3-c45";
+						reg = <0>;
+					};
+				};
+			};
+		};
+	};
+
+	pci0: pcie@ffe200000 {
+		reg = <0xf 0xfe200000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe201000 {
+		reg = <0xf 0xfe201000 0 0x1000>;
+		ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+			  0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe202000 {
+		reg = <0xf 0xfe202000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+};
+
+/include/ "p5040si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi
new file mode 100644
index 0000000000..16b454b504
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi
@@ -0,0 +1,462 @@
+/*
+ * P5040 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * This software is provided by Freescale Semiconductor "as is" and any
+ * express or implied warranties, including, but not limited to, the implied
+ * warranties of merchantability and fitness for a particular purpose are
+ * disclaimed. In no event shall Freescale Semiconductor be liable for any
+ * direct, indirect, incidental, special, exemplary, or consequential damages
+ * (including, but not limited to, procurement of substitute goods or services;
+ * loss of use, data, or profits; or business interruption) however caused and
+ * on any theory of liability, whether in contract, strict liability, or tort
+ * (including negligence or otherwise) arising in any way out of the use of this
+ * software, even if advised of the possibility of such damage.
+ */
+
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_fqd {
+	compatible = "fsl,qman-fqd";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+	compatible = "fsl,qman-pfdr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&lbc {
+	compatible = "fsl,p5040-elbc", "fsl,elbc", "simple-bus";
+	interrupts = <25 2 0 0>;
+	#address-cells = <2>;
+	#size-cells = <1>;
+};
+
+/* controller at 0x200000 */
+&pci0 {
+	compatible = "fsl,p5040-pcie", "fsl,qoriq-pcie-v2.4";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 1 15>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 1 15>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 40 1 0 0
+			0000 0 0 2 &mpic 1 1 0 0
+			0000 0 0 3 &mpic 2 1 0 0
+			0000 0 0 4 &mpic 3 1 0 0
+			>;
+	};
+};
+
+/* controller at 0x201000 */
+&pci1 {
+	compatible = "fsl,p5040-pcie", "fsl,qoriq-pcie-v2.4";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 1 14>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 1 14>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 41 1 0 0
+			0000 0 0 2 &mpic 5 1 0 0
+			0000 0 0 3 &mpic 6 1 0 0
+			0000 0 0 4 &mpic 7 1 0 0
+			>;
+	};
+};
+
+/* controller at 0x202000 */
+&pci2 {
+	compatible = "fsl,p5040-pcie", "fsl,qoriq-pcie-v2.4";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	clock-frequency = <33333333>;
+	interrupts = <16 2 1 13>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 1 13>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 42 1 0 0
+			0000 0 0 2 &mpic 9 1 0 0
+			0000 0 0 3 &mpic 10 1 0 0
+			0000 0 0 4 &mpic 11 1 0 0
+			>;
+	};
+};
+
+&dcsr {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,dcsr", "simple-bus";
+
+	dcsr-epu@0 {
+		compatible = "fsl,p5040-dcsr-epu", "fsl,dcsr-epu";
+		interrupts = <52 2 0 0
+			      84 2 0 0
+			      85 2 0 0>;
+		reg = <0x0 0x1000>;
+	};
+	dcsr-npc {
+		compatible = "fsl,dcsr-npc";
+		reg = <0x1000 0x1000 0x1000000 0x8000>;
+	};
+	dcsr-nxc@2000 {
+		compatible = "fsl,dcsr-nxc";
+		reg = <0x2000 0x1000>;
+	};
+	dcsr-corenet {
+		compatible = "fsl,dcsr-corenet";
+		reg = <0x8000 0x1000 0xB0000 0x1000>;
+	};
+	dcsr-dpaa@9000 {
+		compatible = "fsl,p5040-dcsr-dpaa", "fsl,dcsr-dpaa";
+		reg = <0x9000 0x1000>;
+	};
+	dcsr-ocn@11000 {
+		compatible = "fsl,p5040-dcsr-ocn", "fsl,dcsr-ocn";
+		reg = <0x11000 0x1000>;
+	};
+	dcsr-ddr@12000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr1>;
+		reg = <0x12000 0x1000>;
+	};
+	dcsr-ddr@13000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr2>;
+		reg = <0x13000 0x1000>;
+	};
+	dcsr-nal@18000 {
+		compatible = "fsl,p5040-dcsr-nal", "fsl,dcsr-nal";
+		reg = <0x18000 0x1000>;
+	};
+	dcsr-rcpm@22000 {
+		compatible = "fsl,p5040-dcsr-rcpm", "fsl,dcsr-rcpm";
+		reg = <0x22000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@40000 {
+		compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu0>;
+		reg = <0x40000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@41000 {
+		compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu1>;
+		reg = <0x41000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@42000 {
+		compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu2>;
+		reg = <0x42000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@43000 {
+		compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu3>;
+		reg = <0x43000 0x1000>;
+	};
+};
+
+/include/ "qoriq-bman1-portals.dtsi"
+
+/include/ "qoriq-qman1-portals.dtsi"
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "simple-bus";
+
+	soc-sram-error {
+		compatible = "fsl,soc-sram-error";
+		interrupts = <16 2 1 29>;
+	};
+
+	corenet-law@0 {
+		compatible = "fsl,corenet-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <32>;
+	};
+
+	ddr1: memory-controller@8000 {
+		compatible = "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-controller";
+		reg = <0x8000 0x1000>;
+		interrupts = <16 2 1 23>;
+	};
+
+	ddr2: memory-controller@9000 {
+		compatible = "fsl,qoriq-memory-controller-v4.5","fsl,qoriq-memory-controller";
+		reg = <0x9000 0x1000>;
+		interrupts = <16 2 1 22>;
+	};
+
+	cpc: l3-cache-controller@10000 {
+		compatible = "fsl,p5040-l3-cache-controller", "fsl,p4080-l3-cache-controller", "cache";
+		reg = <0x10000 0x1000
+		       0x11000 0x1000>;
+		interrupts = <16 2 1 27
+			      16 2 1 26>;
+	};
+
+	corenet-cf@18000 {
+		compatible = "fsl,corenet1-cf", "fsl,corenet-cf";
+		reg = <0x18000 0x1000>;
+		interrupts = <16 2 1 31>;
+		fsl,ccf-num-csdids = <32>;
+		fsl,ccf-num-snoopids = <32>;
+	};
+
+	iommu@20000 {
+		compatible = "fsl,pamu-v1.0", "fsl,pamu";
+		reg = <0x20000 0x5000>; /* for compatibility with older PAMU drivers */
+		ranges = <0 0x20000 0x5000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupts = <24 2 0 0
+			      16 2 1 30>;
+		fsl,portid-mapping = <0x0f800000>;
+
+		pamu0: pamu@0 {
+			reg = <0 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu1: pamu@1000 {
+			reg = <0x1000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu2: pamu@2000 {
+			reg = <0x2000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu3: pamu@3000 {
+			reg = <0x3000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu4: pamu@4000 {
+			reg = <0x4000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+	};
+
+/include/ "qoriq-mpic.dtsi"
+
+	guts: global-utilities@e0000 {
+		compatible = "fsl,p5040-device-config", "fsl,qoriq-device-config-1.0";
+		reg = <0xe0000 0xe00>;
+		fsl,has-rstcr;
+		#sleep-cells = <1>;
+		fsl,liodn-bits = <12>;
+	};
+
+	pins: global-utilities@e0e00 {
+		compatible = "fsl,p5040-pin-control", "fsl,qoriq-pin-control-1.0";
+		reg = <0xe0e00 0x200>;
+		#sleep-cells = <2>;
+	};
+
+/include/ "qoriq-clockgen1.dtsi"
+	global-utilities@e1000 {
+		compatible = "fsl,p5040-clockgen", "fsl,qoriq-clockgen-1.0";
+	};
+
+	rcpm: global-utilities@e2000 {
+		compatible = "fsl,p5040-rcpm", "fsl,qoriq-rcpm-1.0";
+		reg = <0xe2000 0x1000>;
+		#sleep-cells = <1>;
+	};
+
+	sfp: sfp@e8000 {
+		compatible = "fsl,p5040-sfp", "fsl,qoriq-sfp-1.0";
+		reg	   = <0xe8000 0x1000>;
+	};
+
+	serdes: serdes@ea000 {
+		compatible = "fsl,p5040-serdes";
+		reg	   = <0xea000 0x1000>;
+	};
+
+/include/ "qoriq-dma-0.dtsi"
+	dma@100300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x580>; /* DMA1LIODNR */
+	};
+
+/include/ "qoriq-dma-1.dtsi"
+	dma@101300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */
+	};
+
+/include/ "qoriq-espi-0.dtsi"
+	spi@110000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "qoriq-esdhc-0.dtsi"
+	sdhc@114000 {
+		compatible = "fsl,p5040-esdhc", "fsl,esdhc";
+		fsl,iommu-parent = <&pamu2>;
+		fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */
+		sdhci,auto-cmd12;
+	};
+
+/include/ "qoriq-i2c-0.dtsi"
+/include/ "qoriq-i2c-1.dtsi"
+/include/ "qoriq-duart-0.dtsi"
+/include/ "qoriq-duart-1.dtsi"
+/include/ "qoriq-gpio-0.dtsi"
+/include/ "qoriq-usb2-mph-0.dtsi"
+	usb0: usb@210000 {
+		compatible = "fsl-usb2-mph-v1.6", "fsl,mpc85xx-usb2-mph", "fsl-usb2-mph";
+		fsl,iommu-parent = <&pamu4>;
+		fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */
+		phy_type = "utmi";
+		port0;
+	};
+
+/include/ "qoriq-usb2-dr-0.dtsi"
+	usb1: usb@211000 {
+		compatible = "fsl-usb2-dr-v1.6", "fsl,mpc85xx-usb2-dr", "fsl-usb2-dr";
+		fsl,iommu-parent = <&pamu4>;
+		fsl,liodn-reg = <&guts 0x524>; /* USB2LIODNR */
+		dr_mode = "host";
+		phy_type = "utmi";
+	};
+
+/include/ "qoriq-sata2-0.dtsi"
+	sata@220000 {
+		fsl,iommu-parent = <&pamu4>;
+		fsl,liodn-reg = <&guts 0x550>; /* SATA1LIODNR */
+	};
+
+/include/ "qoriq-sata2-1.dtsi"
+	sata@221000 {
+		fsl,iommu-parent = <&pamu4>;
+		fsl,liodn-reg = <&guts 0x554>; /* SATA2LIODNR */
+	};
+
+/include/ "qoriq-sec5.2-0.dtsi"
+	crypto@300000 {
+		fsl,iommu-parent = <&pamu4>;
+	};
+
+/include/ "qoriq-raid1.0-0.dtsi"
+/include/ "qoriq-qman1.dtsi"
+/include/ "qoriq-bman1.dtsi"
+
+/include/ "qoriq-fman-0.dtsi"
+/include/ "qoriq-fman-0-1g-0.dtsi"
+/include/ "qoriq-fman-0-1g-1.dtsi"
+/include/ "qoriq-fman-0-1g-2.dtsi"
+/include/ "qoriq-fman-0-1g-3.dtsi"
+/include/ "qoriq-fman-0-1g-4.dtsi"
+/include/ "qoriq-fman-0-10g-0.dtsi"
+	fman@400000 {
+		enet0: ethernet@e0000 {
+		};
+
+		enet1: ethernet@e2000 {
+		};
+
+		enet2: ethernet@e4000 {
+		};
+
+		enet3: ethernet@e6000 {
+		};
+
+		enet4: ethernet@e8000 {
+		};
+
+		enet5: ethernet@f0000 {
+		};
+	};
+
+/include/ "qoriq-fman-1.dtsi"
+/include/ "qoriq-fman-1-1g-0.dtsi"
+/include/ "qoriq-fman-1-1g-1.dtsi"
+/include/ "qoriq-fman-1-1g-2.dtsi"
+/include/ "qoriq-fman-1-1g-3.dtsi"
+/include/ "qoriq-fman-1-1g-4.dtsi"
+/include/ "qoriq-fman-1-10g-0.dtsi"
+	fman@500000 {
+		enet6: ethernet@e0000 {
+		};
+
+		enet7: ethernet@e2000 {
+		};
+
+		enet8: ethernet@e4000 {
+		};
+
+		enet9: ethernet@e6000 {
+		};
+
+		enet10: ethernet@e8000 {
+		};
+
+		enet11: ethernet@f0000 {
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi
new file mode 100644
index 0000000000..ed89dbbdac
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi
@@ -0,0 +1,143 @@
+/*
+ * P5040 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * This software is provided by Freescale Semiconductor "as is" and any
+ * express or implied warranties, including, but not limited to, the implied
+ * warranties of merchantability and fitness for a particular purpose are
+ * disclaimed. In no event shall Freescale Semiconductor be liable for any
+ * direct, indirect, incidental, special, exemplary, or consequential damages
+ * (including, but not limited to, procurement of substitute goods or services;
+ * loss of use, data, or profits; or business interruption) however caused and
+ * on any theory of liability, whether in contract, strict liability, or tort
+ * (including negligence or otherwise) arising in any way out of the use of this
+ * software, even if advised of the possibility of such damage.
+ */
+
+/dts-v1/;
+
+/include/ "e5500_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,P5040";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		ccsr = &soc;
+		dcsr = &dcsr;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+		usb0 = &usb0;
+		usb1 = &usb1;
+		dma0 = &dma0;
+		dma1 = &dma1;
+		sdhc = &sdhc;
+		msi0 = &msi0;
+		msi1 = &msi1;
+		msi2 = &msi2;
+
+		crypto = &crypto;
+		sec_jr0 = &sec_jr0;
+		sec_jr1 = &sec_jr1;
+		sec_jr2 = &sec_jr2;
+		sec_jr3 = &sec_jr3;
+		rtic_a = &rtic_a;
+		rtic_b = &rtic_b;
+		rtic_c = &rtic_c;
+		rtic_d = &rtic_d;
+		sec_mon = &sec_mon;
+
+		raideng = &raideng;
+		raideng_jr0 = &raideng_jr0;
+		raideng_jr1 = &raideng_jr1;
+		raideng_jr2 = &raideng_jr2;
+		raideng_jr3 = &raideng_jr3;
+
+		fman0 = &fman0;
+		fman1 = &fman1;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+		ethernet4 = &enet4;
+		ethernet5 = &enet5;
+		ethernet6 = &enet6;
+		ethernet7 = &enet7;
+		ethernet8 = &enet8;
+		ethernet9 = &enet9;
+		ethernet10 = &enet10;
+		ethernet11 = &enet11;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,e5500@0 {
+			device_type = "cpu";
+			reg = <0>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_0>;
+			fsl,portid-mapping = <0x80000000>;
+			L2_0: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu1: PowerPC,e5500@1 {
+			device_type = "cpu";
+			reg = <1>;
+			clocks = <&clockgen 1 1>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x40000000>;
+			L2_1: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu2: PowerPC,e5500@2 {
+			device_type = "cpu";
+			reg = <2>;
+			clocks = <&clockgen 1 2>;
+			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x20000000>;
+			L2_2: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu3: PowerPC,e5500@3 {
+			device_type = "cpu";
+			reg = <3>;
+			clocks = <&clockgen 1 3>;
+			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x10000000>;
+			L2_3: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/ppa8548.dts b/arch/powerpc/boot/dts/fsl/ppa8548.dts
new file mode 100644
index 0000000000..f39838d939
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/ppa8548.dts
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PPA8548 Device Tree Source (36-bit address map)
+ * Copyright 2013 Prodrive B.V.
+ *
+ * Based on:
+ * MPC8548 CDS Device Tree Source (36-bit address map)
+ * Copyright 2012 Freescale Semiconductor Inc.
+ */
+
+/include/ "mpc8548si-pre.dtsi"
+
+/ {
+	model = "ppa8548";
+	compatible = "ppa8548";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	memory {
+		device_type = "memory";
+		reg = <0 0 0x0 0x40000000>;
+	};
+
+	lbc: localbus@fe0005000 {
+		reg = <0xf 0xe0005000 0 0x1000>;
+		ranges = <0x0 0x0 0xf 0xff800000 0x00800000>;
+	};
+
+	soc: soc8548@fe0000000 {
+		ranges = <0 0xf 0xe0000000 0x100000>;
+	};
+
+	pci0: pci@fe0008000 {
+		/* ppa8548 board doesn't support PCI */
+		status = "disabled";
+	};
+
+	pci1: pci@fe0009000 {
+		/* ppa8548 board doesn't support PCI */
+		status = "disabled";
+	};
+
+	pci2: pcie@fe000a000 {
+		/* ppa8548 board doesn't support PCI */
+		status = "disabled";
+	};
+
+	rio: rapidio@fe00c0000 {
+		reg = <0xf 0xe00c0000 0x0 0x11000>;
+		port1 {
+			ranges = <0x0 0x0 0x0 0x80000000 0x0 0x40000000>;
+		};
+	};
+};
+
+&lbc {
+	nor@0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "cfi-flash";
+		reg = <0x0 0x0 0x00800000>;
+		bank-width = <2>;
+		device-width = <2>;
+
+		partition@0 {
+			reg = <0x0 0x7A0000>;
+			label = "user";
+		};
+
+		partition@7A0000 {
+			reg = <0x7A0000 0x20000>;
+			label = "env";
+			read-only;
+		};
+
+		partition@7C0000 {
+			reg = <0x7C0000 0x40000>;
+			label = "u-boot";
+			read-only;
+		};
+	};
+};
+
+&soc {
+	i2c@3000 {
+		rtc@6f {
+			compatible = "intersil,isl1208";
+			reg = <0x6f>;
+		};
+	};
+
+	i2c@3100 {
+	};
+
+	/*
+	 * Only ethernet controller @25000 and @26000 are used.
+	 * Use alias enet2 and enet3 for the remainig controllers,
+	 * to stay compatible with mpc8548si-pre.dtsi.
+	 */
+	enet2: ethernet@24000 {
+		status = "disabled";
+	};
+
+	mdio@24520 {
+		phy0: ethernet-phy@0 {
+			interrupts = <7 1 0 0>;
+			reg = <0x0>;
+		};
+		phy1: ethernet-phy@1 {
+			interrupts = <8 1 0 0>;
+			reg = <0x1>;
+		};
+		tbi0: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	enet0: ethernet@25000 {
+		tbi-handle = <&tbi1>;
+		phy-handle = <&phy0>;
+	};
+
+	mdio@25520 {
+		tbi1: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	enet1: ethernet@26000 {
+		tbi-handle = <&tbi2>;
+		phy-handle = <&phy1>;
+	};
+
+	mdio@26520 {
+		tbi2: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	enet3: ethernet@27000 {
+		status = "disabled";
+	};
+
+	mdio@27520 {
+		tbi3: tbi-phy@11 {
+			reg = <0x11>;
+			device_type = "tbi-phy";
+		};
+	};
+
+	crypto@30000 {
+		status = "disabled";
+	};
+};
+
+/include/ "mpc8548si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/pq3-dma-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-dma-0.dtsi
new file mode 100644
index 0000000000..b5b37ad30e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-dma-0.dtsi
@@ -0,0 +1,66 @@
+/*
+ * PQ3 DMA device tree stub [ controller @ offset 0x21000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+dma@21300 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,eloplus-dma";
+	reg = <0x21300 0x4>;
+	ranges = <0x0 0x21100 0x200>;
+	cell-index = <0>;
+	dma-channel@0 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x0 0x80>;
+		cell-index = <0>;
+		interrupts = <20 2 0 0>;
+	};
+	dma-channel@80 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x80 0x80>;
+		cell-index = <1>;
+		interrupts = <21 2 0 0>;
+	};
+	dma-channel@100 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x100 0x80>;
+		cell-index = <2>;
+		interrupts = <22 2 0 0>;
+	};
+	dma-channel@180 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x180 0x80>;
+		cell-index = <3>;
+		interrupts = <23 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-dma-1.dtsi b/arch/powerpc/boot/dts/fsl/pq3-dma-1.dtsi
new file mode 100644
index 0000000000..28cb8a55d8
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-dma-1.dtsi
@@ -0,0 +1,66 @@
+/*
+ * PQ3 DMA device tree stub [ controller @ offset 0xc300 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+dma@c300 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,eloplus-dma";
+	reg = <0xc300 0x4>;
+	ranges = <0x0 0xc100 0x200>;
+	cell-index = <1>;
+	dma-channel@0 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x0 0x80>;
+		cell-index = <0>;
+		interrupts = <76 2 0 0>;
+	};
+	dma-channel@80 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x80 0x80>;
+		cell-index = <1>;
+		interrupts = <77 2 0 0>;
+	};
+	dma-channel@100 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x100 0x80>;
+		cell-index = <2>;
+		interrupts = <78 2 0 0>;
+	};
+	dma-channel@180 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x180 0x80>;
+		cell-index = <3>;
+		interrupts = <79 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-duart-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-duart-0.dtsi
new file mode 100644
index 0000000000..5e268fdb9d
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-duart-0.dtsi
@@ -0,0 +1,51 @@
+/*
+ * PQ3 DUART device tree stub [ controller @ offset 0x4000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+serial0: serial@4500 {
+	cell-index = <0>;
+	device_type = "serial";
+	compatible = "fsl,ns16550", "ns16550";
+	reg = <0x4500 0x100>;
+	clock-frequency = <0>;
+	interrupts = <42 2 0 0>;
+};
+
+serial1: serial@4600 {
+	cell-index = <1>;
+	device_type = "serial";
+	compatible = "fsl,ns16550", "ns16550";
+	reg = <0x4600 0x100>;
+	clock-frequency = <0>;
+	interrupts = <42 2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-esdhc-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-esdhc-0.dtsi
new file mode 100644
index 0000000000..5743433e27
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-esdhc-0.dtsi
@@ -0,0 +1,41 @@
+/*
+ * PQ3 eSDHC device tree stub [ controller @ offset 0x2e000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+sdhc@2e000 {
+	compatible = "fsl,esdhc";
+	reg = <0x2e000 0x1000>;
+	interrupts = <72 0x2 0 0>;
+	/* Filled in by U-Boot */
+	clock-frequency = <0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-espi-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-espi-0.dtsi
new file mode 100644
index 0000000000..75854b2e03
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-espi-0.dtsi
@@ -0,0 +1,41 @@
+/*
+ * PQ3 eSPI device tree stub [ controller @ offset 0x7000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+spi@7000 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	compatible = "fsl,mpc8536-espi";
+	reg = <0x7000 0x1000>;
+	interrupts = <59 0x2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec1-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec1-0.dtsi
new file mode 100644
index 0000000000..3b0650a984
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-etsec1-0.dtsi
@@ -0,0 +1,54 @@
+/*
+ * PQ3 eTSEC device tree stub [ @ offsets 0x24000 ]
+ *
+ * Copyright 2011-2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+ethernet@24000 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	cell-index = <0>;
+	device_type = "network";
+	model = "eTSEC";
+	compatible = "gianfar";
+	reg = <0x24000 0x1000>;
+	ranges = <0x0 0x24000 0x1000>;
+	fsl,magic-packet;
+	local-mac-address = [ 00 00 00 00 00 00 ];
+	interrupts = <29 2 0 0 30 2 0 0 34 2 0 0>;
+};
+
+mdio@24520 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	compatible = "fsl,gianfar-mdio";
+	reg = <0x24520 0x20>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec1-1.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec1-1.dtsi
new file mode 100644
index 0000000000..96693b41f0
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-etsec1-1.dtsi
@@ -0,0 +1,54 @@
+/*
+ * PQ3 eTSEC device tree stub [ @ offsets 0x25000 ]
+ *
+ * Copyright 2011-2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+ethernet@25000 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	cell-index = <1>;
+	device_type = "network";
+	model = "eTSEC";
+	compatible = "gianfar";
+	reg = <0x25000 0x1000>;
+	ranges = <0x0 0x25000 0x1000>;
+	fsl,magic-packet;
+	local-mac-address = [ 00 00 00 00 00 00 ];
+	interrupts = <35 2 0 0 36 2 0 0 40 2 0 0>;
+};
+
+mdio@25520 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	compatible = "fsl,gianfar-tbi";
+	reg = <0x25520 0x20>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec1-2.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec1-2.dtsi
new file mode 100644
index 0000000000..6b3fab19da
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-etsec1-2.dtsi
@@ -0,0 +1,54 @@
+/*
+ * PQ3 eTSEC device tree stub [ @ offsets 0x26000 ]
+ *
+ * Copyright 2011-2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+ethernet@26000 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	cell-index = <2>;
+	device_type = "network";
+	model = "eTSEC";
+	compatible = "gianfar";
+	reg = <0x26000 0x1000>;
+	ranges = <0x0 0x26000 0x1000>;
+	fsl,magic-packet;
+	local-mac-address = [ 00 00 00 00 00 00 ];
+	interrupts = <31 2 0 0 32 2 0 0 33 2 0 0>;
+};
+
+mdio@26520 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	compatible = "fsl,gianfar-tbi";
+	reg = <0x26520 0x20>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec1-3.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec1-3.dtsi
new file mode 100644
index 0000000000..0da592d93d
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-etsec1-3.dtsi
@@ -0,0 +1,54 @@
+/*
+ * PQ3 eTSEC device tree stub [ @ offsets 0x27000 ]
+ *
+ * Copyright 2011-2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+ethernet@27000 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	cell-index = <3>;
+	device_type = "network";
+	model = "eTSEC";
+	compatible = "gianfar";
+	reg = <0x27000 0x1000>;
+	ranges = <0x0 0x27000 0x1000>;
+	fsl,magic-packet;
+	local-mac-address = [ 00 00 00 00 00 00 ];
+	interrupts = <37 2 0 0 38 2 0 0 39 2 0 0>;
+};
+
+mdio@27520 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	compatible = "fsl,gianfar-tbi";
+	reg = <0x27520 0x20>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec1-timer-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec1-timer-0.dtsi
new file mode 100644
index 0000000000..efe2ca04bc
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-etsec1-timer-0.dtsi
@@ -0,0 +1,39 @@
+/*
+ * PQ3 eTSEC Timer (IEEE 1588) device tree stub [ @ offsets 0x24e00 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+ptp_clock@24e00 {
+	compatible = "fsl,etsec-ptp";
+	reg = <0x24e00 0xb0>;
+	interrupts = <68 2 0 0 69 2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec2-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec2-0.dtsi
new file mode 100644
index 0000000000..7fcb1ac0f2
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-etsec2-0.dtsi
@@ -0,0 +1,61 @@
+/*
+ * PQ3 eTSEC2 device tree stub [ @ offsets 0x24000/0xb0000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+mdio@24000 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	compatible = "fsl,etsec2-mdio";
+	reg = <0x24000 0x1000 0xb0030 0x4>;
+};
+
+ethernet@b0000 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "network";
+	model = "eTSEC";
+	compatible = "fsl,etsec2";
+	fsl,num_rx_queues = <0x8>;
+	fsl,num_tx_queues = <0x8>;
+	fsl,magic-packet;
+	local-mac-address = [ 00 00 00 00 00 00 ];
+	ranges;
+
+	queue-group@b0000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0xb0000 0x1000>;
+		interrupts = <29 2 0 0 30 2 0 0 34 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec2-1.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec2-1.dtsi
new file mode 100644
index 0000000000..9f25427c15
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-etsec2-1.dtsi
@@ -0,0 +1,61 @@
+/*
+ * PQ3 eTSEC2 device tree stub [ @ offsets 0x25000/0xb1000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+mdio@25000 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	compatible = "fsl,etsec2-tbi";
+	reg = <0x25000 0x1000 0xb1030 0x4>;
+};
+
+ethernet@b1000 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "network";
+	model = "eTSEC";
+	compatible = "fsl,etsec2";
+	fsl,num_rx_queues = <0x8>;
+	fsl,num_tx_queues = <0x8>;
+	fsl,magic-packet;
+	local-mac-address = [ 00 00 00 00 00 00 ];
+	ranges;
+
+	queue-group@b1000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0xb1000 0x1000>;
+		interrupts = <35 2 0 0 36 2 0 0 40 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec2-2.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec2-2.dtsi
new file mode 100644
index 0000000000..cd7c318ab1
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-etsec2-2.dtsi
@@ -0,0 +1,60 @@
+/*
+ * PQ3 eTSEC2 device tree stub [ @ offsets 0x26000/0xb2000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+mdio@26000 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	compatible = "fsl,etsec2-tbi";
+	reg = <0x26000 0x1000 0xb1030 0x4>;
+};
+
+ethernet@b2000 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "network";
+	model = "eTSEC";
+	compatible = "fsl,etsec2";
+	fsl,num_rx_queues = <0x8>;
+	fsl,num_tx_queues = <0x8>;
+	fsl,magic-packet;
+	local-mac-address = [ 00 00 00 00 00 00 ];
+	ranges;
+
+	queue-group@b2000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0xb2000 0x1000>;
+		interrupts = <31 2 0 0 32 2 0 0 33 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec2-grp2-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec2-grp2-0.dtsi
new file mode 100644
index 0000000000..034ab8fac2
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-etsec2-grp2-0.dtsi
@@ -0,0 +1,42 @@
+/*
+ * PQ3 eTSEC2 Group 2 device tree stub [ @ offsets 0xb4000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&enet0_grp2 {
+	queue-group@b4000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0xb4000 0x1000>;
+		interrupts = <17 2 0 0 18 2 0 0 24 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec2-grp2-1.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec2-grp2-1.dtsi
new file mode 100644
index 0000000000..3be9ba3b37
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-etsec2-grp2-1.dtsi
@@ -0,0 +1,42 @@
+/*
+ * PQ3 eTSEC2 Group 2 device tree stub [ @ offsets 0xb5000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&enet1_grp2 {
+	queue-group@b5000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0xb5000 0x1000>;
+		interrupts = <51 2 0 0 52 2 0 0 67 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec2-grp2-2.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec2-grp2-2.dtsi
new file mode 100644
index 0000000000..02a3345704
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-etsec2-grp2-2.dtsi
@@ -0,0 +1,42 @@
+/*
+ * PQ3 eTSEC2 Group 2 device tree stub [ @ offsets 0xb6000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&enet2_grp2 {
+	queue-group@b6000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0xb6000 0x1000>;
+		interrupts = <25 2 0 0 26 2 0 0 27 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-gpio-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-gpio-0.dtsi
new file mode 100644
index 0000000000..a1b48546b0
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-gpio-0.dtsi
@@ -0,0 +1,41 @@
+/*
+ * PQ3 GPIO device tree stub [ controller @ offset 0xfc00 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+gpio-controller@fc00 {
+	#gpio-cells = <2>;
+	compatible = "fsl,pq3-gpio";
+	reg = <0xfc00 0x100>;
+	interrupts = <47 0x2 0 0>;
+	gpio-controller;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-i2c-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-i2c-0.dtsi
new file mode 100644
index 0000000000..d1dd6fb82a
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-i2c-0.dtsi
@@ -0,0 +1,43 @@
+/*
+ * PQ3 I2C device tree stub [ controller @ offset 0x3000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+i2c@3000 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	cell-index = <0>;
+	compatible = "fsl-i2c";
+	reg = <0x3000 0x100>;
+	interrupts = <43 2 0 0>;
+	dfsrr;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-i2c-1.dtsi b/arch/powerpc/boot/dts/fsl/pq3-i2c-1.dtsi
new file mode 100644
index 0000000000..a9bd803e20
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-i2c-1.dtsi
@@ -0,0 +1,43 @@
+/*
+ * PQ3 I2C device tree stub [ controller @ offset 0x3100 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+i2c@3100 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	cell-index = <1>;
+	compatible = "fsl-i2c";
+	reg = <0x3100 0x100>;
+	interrupts = <43 2 0 0>;
+	dfsrr;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-mpic-message-B.dtsi b/arch/powerpc/boot/dts/fsl/pq3-mpic-message-B.dtsi
new file mode 100644
index 0000000000..1cf0b77b1e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-mpic-message-B.dtsi
@@ -0,0 +1,43 @@
+/*
+ * PQ3 MPIC Message (Group B) device tree stub [ controller @ offset 0x42400 ]
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+message@42400 {
+	compatible = "fsl,mpic-v3.1-msgr";
+	reg = <0x42400 0x200>;
+	interrupts = <
+		0xb4 2 0 0
+		0xb5 2 0 0
+		0xb6 2 0 0
+		0xb7 2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-mpic-timer-B.dtsi b/arch/powerpc/boot/dts/fsl/pq3-mpic-timer-B.dtsi
new file mode 100644
index 0000000000..8734cffae1
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-mpic-timer-B.dtsi
@@ -0,0 +1,42 @@
+/*
+ * PQ3 MPIC Timer (Group B) device tree stub [ controller @ offset 0x42100 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+timer@42100 {
+	compatible = "fsl,mpic-global-timer";
+	reg = <0x42100 0x100 0x42300 4>;
+	interrupts = <4 0 3 0
+		      5 0 3 0
+		      6 0 3 0
+		      7 0 3 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-mpic.dtsi b/arch/powerpc/boot/dts/fsl/pq3-mpic.dtsi
new file mode 100644
index 0000000000..71c30eb100
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-mpic.dtsi
@@ -0,0 +1,79 @@
+/*
+ * PQ3 MPIC device tree stub [ controller @ offset 0x40000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+mpic: pic@40000 {
+	interrupt-controller;
+	#address-cells = <0>;
+	#interrupt-cells = <4>;
+	reg = <0x40000 0x40000>;
+	compatible = "fsl,mpic";
+	device_type = "open-pic";
+	big-endian;
+	single-cpu-affinity;
+	last-interrupt-source = <255>;
+};
+
+timer@41100 {
+	compatible = "fsl,mpic-global-timer";
+	reg = <0x41100 0x100 0x41300 4>;
+	interrupts = <0 0 3 0
+		      1 0 3 0
+		      2 0 3 0
+		      3 0 3 0>;
+};
+
+message@41400 {
+	compatible = "fsl,mpic-v3.1-msgr";
+	reg = <0x41400 0x200>;
+	interrupts = <
+		0xb0 2 0 0
+		0xb1 2 0 0
+		0xb2 2 0 0
+		0xb3 2 0 0>;
+};
+
+msi@41600 {
+	compatible = "fsl,mpic-msi";
+	reg = <0x41600 0x80>;
+	msi-available-ranges = <0 0x100>;
+	interrupts = <
+		0xe0 0 0 0
+		0xe1 0 0 0
+		0xe2 0 0 0
+		0xe3 0 0 0
+		0xe4 0 0 0
+		0xe5 0 0 0
+		0xe6 0 0 0
+		0xe7 0 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-rmu-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-rmu-0.dtsi
new file mode 100644
index 0000000000..587ca9ffad
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-rmu-0.dtsi
@@ -0,0 +1,68 @@
+/*
+ * PQ3 RIO Message Unit device tree stub [ controller @ offset 0xd3000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+rmu: rmu@d3000 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,srio-rmu";
+	reg = <0xd3000 0x500>;
+	ranges = <0x0 0xd3000 0x500>;
+
+	message-unit@0 {
+		compatible = "fsl,srio-msg-unit";
+		reg = <0x0 0x100>;
+		interrupts = <
+			53 2 0 0 /* msg1_tx_irq */
+			54 2 0 0>;/* msg1_rx_irq */
+	};
+	message-unit@100 {
+		compatible = "fsl,srio-msg-unit";
+		reg = <0x100 0x100>;
+		interrupts = <
+			55 2 0 0  /* msg2_tx_irq */
+			56 2 0 0>;/* msg2_rx_irq */
+	};
+	doorbell-unit@400 {
+		compatible = "fsl,srio-dbell-unit";
+		reg = <0x400 0x80>;
+		interrupts = <
+			49 2 0 0  /* bell_outb_irq */
+			50 2 0 0>;/* bell_inb_irq */
+	};
+	port-write-unit@4e0 {
+		compatible = "fsl,srio-port-write-unit";
+		reg = <0x4e0 0x20>;
+		interrupts = <48 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-sata2-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-sata2-0.dtsi
new file mode 100644
index 0000000000..3c28dd08d3
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-sata2-0.dtsi
@@ -0,0 +1,40 @@
+/*
+ * PQ3 SATAv2 device tree stub [ controller @ offset 0x18000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+sata@18000 {
+	compatible = "fsl,pq-sata-v2";
+	reg = <0x18000 0x1000>;
+	cell-index = <1>;
+	interrupts = <74 0x2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-sata2-1.dtsi b/arch/powerpc/boot/dts/fsl/pq3-sata2-1.dtsi
new file mode 100644
index 0000000000..eefaf2855e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-sata2-1.dtsi
@@ -0,0 +1,40 @@
+/*
+ * PQ3 SATAv2 device tree stub [ controller @ offset 0x19000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+sata@19000 {
+	compatible = "fsl,pq-sata-v2";
+	reg = <0x19000 0x1000>;
+	cell-index = <2>;
+	interrupts = <41 0x2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-sec2.1-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-sec2.1-0.dtsi
new file mode 100644
index 0000000000..02a5c7ae72
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-sec2.1-0.dtsi
@@ -0,0 +1,43 @@
+/*
+ * PQ3 Sec/Crypto 2.1 device tree stub [ controller @ offset 0x30000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+crypto@30000 {
+	compatible = "fsl,sec2.1", "fsl,sec2.0";
+	reg = <0x30000 0x10000>;
+	interrupts = <45 2 0 0>;
+	fsl,num-channels = <4>;
+	fsl,channel-fifo-len = <24>;
+	fsl,exec-units-mask = <0xfe>;
+	fsl,descriptor-types-mask = <0x12b0ebf>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-sec3.0-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-sec3.0-0.dtsi
new file mode 100644
index 0000000000..bba1ba44cc
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-sec3.0-0.dtsi
@@ -0,0 +1,45 @@
+/*
+ * PQ3 Sec/Crypto 3.0 device tree stub [ controller @ offset 0x30000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+crypto@30000 {
+	compatible = "fsl,sec3.0",
+		     "fsl,sec2.4", "fsl,sec2.2", "fsl,sec2.1",
+		     "fsl,sec2.0";
+	reg = <0x30000 0x10000>;
+	interrupts = <45 2 0 0 58 2 0 0>;
+	fsl,num-channels = <4>;
+	fsl,channel-fifo-len = <24>;
+	fsl,exec-units-mask = <0x9fe>;
+	fsl,descriptor-types-mask = <0x3ab0ebf>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-sec3.1-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-sec3.1-0.dtsi
new file mode 100644
index 0000000000..8f0a5669be
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-sec3.1-0.dtsi
@@ -0,0 +1,45 @@
+/*
+ * PQ3 Sec/Crypto 3.1 device tree stub [ controller @ offset 0x30000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+crypto@30000 {
+	compatible = "fsl,sec3.1", "fsl,sec3.0",
+		     "fsl,sec2.4", "fsl,sec2.2", "fsl,sec2.1",
+		     "fsl,sec2.0";
+	reg = <0x30000 0x10000>;
+	interrupts = <45 2 0 0 58 2 0 0>;
+	fsl,num-channels = <4>;
+	fsl,channel-fifo-len = <24>;
+	fsl,exec-units-mask = <0xbfe>;
+	fsl,descriptor-types-mask = <0x3ab0ebf>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-sec3.3-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-sec3.3-0.dtsi
new file mode 100644
index 0000000000..c227f2748a
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-sec3.3-0.dtsi
@@ -0,0 +1,45 @@
+/*
+ * PQ3 Sec/Crypto 3.3 device tree stub [ controller @ offset 0x30000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+crypto@30000 {
+	compatible = "fsl,sec3.3", "fsl,sec3.1", "fsl,sec3.0",
+		     "fsl,sec2.4", "fsl,sec2.2", "fsl,sec2.1",
+		     "fsl,sec2.0";
+	reg = <0x30000 0x10000>;
+	interrupts = <45 2 0 0 58 2 0 0>;
+	fsl,num-channels = <4>;
+	fsl,channel-fifo-len = <24>;
+	fsl,exec-units-mask = <0x97c>;
+	fsl,descriptor-types-mask = <0x3a30abf>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-sec4.4-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-sec4.4-0.dtsi
new file mode 100644
index 0000000000..bb3d8266b5
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-sec4.4-0.dtsi
@@ -0,0 +1,67 @@
+/*
+ * PQ3 Sec/Crypto 4.4 device tree stub [ controller @ offset 0x30000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+crypto@30000 {
+	compatible = "fsl,sec-v4.4", "fsl,sec-v4.0";
+	fsl,sec-era = <3>;
+	#address-cells = <1>;
+	#size-cells = <1>;
+	ranges		 = <0x0 0x30000 0x10000>;
+	reg		 = <0x30000 0x10000>;
+	interrupts	 = <58 2 0 0>;
+
+	sec_jr0: jr@1000 {
+		compatible = "fsl,sec-v4.4-job-ring", "fsl,sec-v4.0-job-ring";
+		reg	   = <0x1000 0x1000>;
+		interrupts	 = <45 2 0 0>;
+	};
+
+	sec_jr1: jr@2000 {
+		compatible = "fsl,sec-v4.4-job-ring", "fsl,sec-v4.0-job-ring";
+		reg	   = <0x2000 0x1000>;
+		interrupts	 = <45 2 0 0>;
+	};
+
+	sec_jr2: jr@3000 {
+		compatible = "fsl,sec-v4.4-job-ring", "fsl,sec-v4.0-job-ring";
+		reg	   = <0x3000 0x1000>;
+		interrupts	 = <45 2 0 0>;
+	};
+
+	sec_jr3: jr@4000 {
+		compatible = "fsl,sec-v4.4-job-ring", "fsl,sec-v4.0-job-ring";
+		reg	   = <0x4000 0x1000>;
+		interrupts	 = <45 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-usb2-dr-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-usb2-dr-0.dtsi
new file mode 100644
index 0000000000..185ab9dc3e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-usb2-dr-0.dtsi
@@ -0,0 +1,41 @@
+/*
+ * PQ3 USB DR device tree stub [ controller @ offset 0x22000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+usb@22000 {
+	compatible = "fsl-usb2-dr";
+	reg = <0x22000 0x1000>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+	interrupts = <28 0x2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-usb2-dr-1.dtsi b/arch/powerpc/boot/dts/fsl/pq3-usb2-dr-1.dtsi
new file mode 100644
index 0000000000..fe24cd612f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-usb2-dr-1.dtsi
@@ -0,0 +1,41 @@
+/*
+ * PQ3 USB DR device tree stub [ controller @ offset 0x23000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+usb@23000 {
+	compatible = "fsl-usb2-dr";
+	reg = <0x23000 0x1000>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+	interrupts = <46 0x2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qonverge-usb2-dr-0.dtsi b/arch/powerpc/boot/dts/fsl/qonverge-usb2-dr-0.dtsi
new file mode 100644
index 0000000000..fcc7e5b7fd
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qonverge-usb2-dr-0.dtsi
@@ -0,0 +1,41 @@
+/*
+ * QorIQ Qonverge USB Host device tree stub [ controller @ offset 0x210000 ]
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+usb0: usb@210000 {
+	compatible = "fsl-usb2-dr";
+	reg = <0x210000 0x1000>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+	interrupts = <44 0x2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi
new file mode 100644
index 0000000000..5022432eba
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi
@@ -0,0 +1,90 @@
+/*
+ * QorIQ BMan Portal device tree stub for 10 portals
+ *
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&bportals {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "simple-bus";
+
+	bman-portal@0 {
+		compatible = "fsl,bman-portal";
+		reg = <0x0 0x4000>, <0x100000 0x1000>;
+		interrupts = <105 2 0 0>;
+	};
+	bman-portal@4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4000 0x4000>, <0x101000 0x1000>;
+		interrupts = <107 2 0 0>;
+	};
+	bman-portal@8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8000 0x4000>, <0x102000 0x1000>;
+		interrupts = <109 2 0 0>;
+	};
+	bman-portal@c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc000 0x4000>, <0x103000 0x1000>;
+		interrupts = <111 2 0 0>;
+	};
+	bman-portal@10000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x10000 0x4000>, <0x104000 0x1000>;
+		interrupts = <113 2 0 0>;
+	};
+	bman-portal@14000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x14000 0x4000>, <0x105000 0x1000>;
+		interrupts = <115 2 0 0>;
+	};
+	bman-portal@18000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x18000 0x4000>, <0x106000 0x1000>;
+		interrupts = <117 2 0 0>;
+	};
+	bman-portal@1c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x1c000 0x4000>, <0x107000 0x1000>;
+		interrupts = <119 2 0 0>;
+	};
+	bman-portal@20000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x20000 0x4000>, <0x108000 0x1000>;
+		interrupts = <121 2 0 0>;
+	};
+	bman-portal@24000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x24000 0x4000>, <0x109000 0x1000>;
+		interrupts = <123 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi
new file mode 100644
index 0000000000..3b5e3504ac
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi
@@ -0,0 +1,41 @@
+/*
+ * QorIQ BMan device tree stub [ controller @ offset 0x31a000 ]
+ *
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+bman: bman@31a000 {
+	compatible = "fsl,bman";
+	reg = <0x31a000 0x1000>;
+	interrupts = <16 2 1 2>;
+	fsl,bman-portals = <&bportals>;
+	memory-region = <&bman_fbpr>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi
new file mode 100644
index 0000000000..463c1ed9ff
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi
@@ -0,0 +1,39 @@
+/*
+ * QorIQ clock control device tree stub [ controller @ offset 0xe1000 ]
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+clockgen: global-utilities@e1000 {
+	compatible = "fsl,qoriq-clockgen-1.0";
+	reg = <0xe1000 0x1000>;
+	#clock-cells = <2>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi
new file mode 100644
index 0000000000..0361050bb5
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi
@@ -0,0 +1,39 @@
+/*
+ * QorIQ clock control device tree stub [ controller @ offset 0xe1000 ]
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+clockgen: global-utilities@e1000 {
+	compatible = "fsl,qoriq-clockgen-2.0";
+	reg = <0xe1000 0x1000>;
+	#clock-cells = <2>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-dma-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-dma-0.dtsi
new file mode 100644
index 0000000000..1aebf3ea4c
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-dma-0.dtsi
@@ -0,0 +1,66 @@
+/*
+ * QorIQ DMA device tree stub [ controller @ offset 0x100000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+dma0: dma@100300 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,eloplus-dma";
+	reg = <0x100300 0x4>;
+	ranges = <0x0 0x100100 0x200>;
+	cell-index = <0>;
+	dma-channel@0 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x0 0x80>;
+		cell-index = <0>;
+		interrupts = <28 2 0 0>;
+	};
+	dma-channel@80 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x80 0x80>;
+		cell-index = <1>;
+		interrupts = <29 2 0 0>;
+	};
+	dma-channel@100 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x100 0x80>;
+		cell-index = <2>;
+		interrupts = <30 2 0 0>;
+	};
+	dma-channel@180 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x180 0x80>;
+		cell-index = <3>;
+		interrupts = <31 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-dma-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-dma-1.dtsi
new file mode 100644
index 0000000000..ecf5e180fe
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-dma-1.dtsi
@@ -0,0 +1,66 @@
+/*
+ * QorIQ DMA device tree stub [ controller @ offset 0x101000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+dma1: dma@101300 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,eloplus-dma";
+	reg = <0x101300 0x4>;
+	ranges = <0x0 0x101100 0x200>;
+	cell-index = <1>;
+	dma-channel@0 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x0 0x80>;
+		cell-index = <0>;
+		interrupts = <32 2 0 0>;
+	};
+	dma-channel@80 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x80 0x80>;
+		cell-index = <1>;
+		interrupts = <33 2 0 0>;
+	};
+	dma-channel@100 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x100 0x80>;
+		cell-index = <2>;
+		interrupts = <34 2 0 0>;
+	};
+	dma-channel@180 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x180 0x80>;
+		cell-index = <3>;
+		interrupts = <35 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-duart-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-duart-0.dtsi
new file mode 100644
index 0000000000..225c07b4e8
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-duart-0.dtsi
@@ -0,0 +1,51 @@
+/*
+ * QorIQ DUART device tree stub [ controller @ offset 0x11c000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+serial0: serial@11c500 {
+	cell-index = <0>;
+	device_type = "serial";
+	compatible = "fsl,ns16550", "ns16550";
+	reg = <0x11c500 0x100>;
+	clock-frequency = <0>;
+	interrupts = <36 2 0 0>;
+};
+
+serial1: serial@11c600 {
+	cell-index = <1>;
+	device_type = "serial";
+	compatible = "fsl,ns16550", "ns16550";
+	reg = <0x11c600 0x100>;
+	clock-frequency = <0>;
+	interrupts = <36 2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-duart-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-duart-1.dtsi
new file mode 100644
index 0000000000..d23233a56b
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-duart-1.dtsi
@@ -0,0 +1,51 @@
+/*
+ * QorIQ DUART device tree stub [ controller @ offset 0x11d000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+serial2: serial@11d500 {
+	cell-index = <2>;
+	device_type = "serial";
+	compatible = "fsl,ns16550", "ns16550";
+	reg = <0x11d500 0x100>;
+	clock-frequency = <0>;
+	interrupts = <37 2 0 0>;
+};
+
+serial3: serial@11d600 {
+	cell-index = <3>;
+	device_type = "serial";
+	compatible = "fsl,ns16550", "ns16550";
+	reg = <0x11d600 0x100>;
+	clock-frequency = <0>;
+	interrupts = <37 2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-esdhc-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-esdhc-0.dtsi
new file mode 100644
index 0000000000..20835ae216
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-esdhc-0.dtsi
@@ -0,0 +1,40 @@
+/*
+ * QorIQ eSDHC device tree stub [ controller @ offset 0x114000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+sdhc: sdhc@114000 {
+	compatible = "fsl,esdhc";
+	reg = <0x114000 0x1000>;
+	interrupts = <48 2 0 0>;
+	clock-frequency = <0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-espi-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-espi-0.dtsi
new file mode 100644
index 0000000000..6db06975e0
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-espi-0.dtsi
@@ -0,0 +1,41 @@
+/*
+ * QorIQ eSPI device tree stub [ controller @ offset 0x110000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+spi@110000 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	compatible = "fsl,mpc8536-espi";
+	reg = <0x110000 0x1000>;
+	interrupts = <53 0x2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-0-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-10g-0.dtsi
new file mode 100644
index 0000000000..eb77675c25
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-10g-0.dtsi
@@ -0,0 +1,62 @@
+/*
+ * QorIQ FMan 10g port #0 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+	fman0_rx_0x10: port@90000 {
+		cell-index = <0x10>;
+		compatible = "fsl,fman-v2-port-rx";
+		reg = <0x90000 0x1000>;
+	};
+
+	fman0_tx_0x30: port@b0000 {
+		cell-index = <0x30>;
+		compatible = "fsl,fman-v2-port-tx";
+		reg = <0xb0000 0x1000>;
+	};
+
+	ethernet@f0000 {
+		cell-index = <0x8>;
+		compatible = "fsl,fman-xgec";
+		reg = <0xf0000 0x1000>;
+		fsl,fman-ports = <&fman0_rx_0x10 &fman0_tx_0x30>;
+	};
+
+	xmdio0: mdio@f1000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-xmdio";
+		reg = <0xf1000 0x1000>;
+		interrupts = <101 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-0.dtsi
new file mode 100644
index 0000000000..b965bc219b
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-0.dtsi
@@ -0,0 +1,69 @@
+/*
+ * QorIQ FMan 1g port #0 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+	fman0_rx_0x08: port@88000 {
+		cell-index = <0x8>;
+		compatible = "fsl,fman-v2-port-rx";
+		reg = <0x88000 0x1000>;
+	};
+
+	fman0_tx_0x28: port@a8000 {
+		cell-index = <0x28>;
+		compatible = "fsl,fman-v2-port-tx";
+		reg = <0xa8000 0x1000>;
+	};
+
+	ethernet@e0000 {
+		cell-index = <0>;
+		compatible = "fsl,fman-dtsec";
+		reg = <0xe0000 0x1000>;
+		fsl,fman-ports = <&fman0_rx_0x08 &fman0_tx_0x28>;
+		tbi-handle = <&tbi0>;
+		ptp-timer = <&ptp_timer0>;
+	};
+
+	mdio0: mdio@e1120 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-mdio";
+		reg = <0xe1120 0xee0>;
+		interrupts = <100 2 0 0>;
+
+		tbi0: tbi-phy@8 {
+			reg = <0x8>;
+			device_type = "tbi-phy";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-1.dtsi
new file mode 100644
index 0000000000..9eb6e6dd7c
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-1.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ FMan 1g port #1 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+	fman0_rx_0x09: port@89000 {
+		cell-index = <0x9>;
+		compatible = "fsl,fman-v2-port-rx";
+		reg = <0x89000 0x1000>;
+	};
+
+	fman0_tx_0x29: port@a9000 {
+		cell-index = <0x29>;
+		compatible = "fsl,fman-v2-port-tx";
+		reg = <0xa9000 0x1000>;
+	};
+
+	ethernet@e2000 {
+		cell-index = <1>;
+		compatible = "fsl,fman-dtsec";
+		reg = <0xe2000 0x1000>;
+		fsl,fman-ports = <&fman0_rx_0x09 &fman0_tx_0x29>;
+		tbi-handle = <&tbi1>;
+		ptp-timer = <&ptp_timer0>;
+	};
+
+	mdio@e3120 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-mdio";
+		reg = <0xe3120 0xee0>;
+
+		tbi1: tbi-phy@8 {
+			reg = <0x8>;
+			device_type = "tbi-phy";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-2.dtsi
new file mode 100644
index 0000000000..092b899367
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-2.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ FMan 1g port #2 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+	fman0_rx_0x0a: port@8a000 {
+		cell-index = <0xa>;
+		compatible = "fsl,fman-v2-port-rx";
+		reg = <0x8a000 0x1000>;
+	};
+
+	fman0_tx_0x2a: port@aa000 {
+		cell-index = <0x2a>;
+		compatible = "fsl,fman-v2-port-tx";
+		reg = <0xaa000 0x1000>;
+	};
+
+	ethernet@e4000 {
+		cell-index = <2>;
+		compatible = "fsl,fman-dtsec";
+		reg = <0xe4000 0x1000>;
+		fsl,fman-ports = <&fman0_rx_0x0a &fman0_tx_0x2a>;
+		tbi-handle = <&tbi2>;
+		ptp-timer = <&ptp_timer0>;
+	};
+
+	mdio@e5120 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-mdio";
+		reg = <0xe5120 0xee0>;
+
+		tbi2: tbi-phy@8 {
+			reg = <0x8>;
+			device_type = "tbi-phy";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-3.dtsi
new file mode 100644
index 0000000000..2df0dc8760
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-3.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ FMan 1g port #3 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+	fman0_rx_0x0b: port@8b000 {
+		cell-index = <0xb>;
+		compatible = "fsl,fman-v2-port-rx";
+		reg = <0x8b000 0x1000>;
+	};
+
+	fman0_tx_0x2b: port@ab000 {
+		cell-index = <0x2b>;
+		compatible = "fsl,fman-v2-port-tx";
+		reg = <0xab000 0x1000>;
+	};
+
+	ethernet@e6000 {
+		cell-index = <3>;
+		compatible = "fsl,fman-dtsec";
+		reg = <0xe6000 0x1000>;
+		fsl,fman-ports = <&fman0_rx_0x0b &fman0_tx_0x2b>;
+		tbi-handle = <&tbi3>;
+		ptp-timer = <&ptp_timer0>;
+	};
+
+	mdio@e7120 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-mdio";
+		reg = <0xe7120 0xee0>;
+
+		tbi3: tbi-phy@8 {
+			reg = <0x8>;
+			device_type = "tbi-phy";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-4.dtsi
new file mode 100644
index 0000000000..5fceb2438f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-4.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ FMan 1g port #4 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+	fman0_rx_0x0c: port@8c000 {
+		cell-index = <0xc>;
+		compatible = "fsl,fman-v2-port-rx";
+		reg = <0x8c000 0x1000>;
+	};
+
+	fman0_tx_0x2c: port@ac000 {
+		cell-index = <0x2c>;
+		compatible = "fsl,fman-v2-port-tx";
+		reg = <0xac000 0x1000>;
+	};
+
+	ethernet@e8000 {
+		cell-index = <4>;
+		compatible = "fsl,fman-dtsec";
+		reg = <0xe8000 0x1000>;
+		fsl,fman-ports = <&fman0_rx_0x0c &fman0_tx_0x2c>;
+		tbi-handle = <&tbi4>;
+		ptp-timer = <&ptp_timer0>;
+	};
+
+	mdio@e9120 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-mdio";
+		reg = <0xe9120 0xee0>;
+
+		tbi4: tbi-phy@8 {
+			reg = <0x8>;
+			device_type = "tbi-phy";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-0.dtsi
new file mode 100644
index 0000000000..9b6cf91499
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-0.dtsi
@@ -0,0 +1,104 @@
+/*
+ * QorIQ FMan device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman0: fman@400000 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	cell-index = <0>;
+	compatible = "fsl,fman";
+	ranges = <0 0x400000 0xfe000>;
+	reg = <0x400000 0xfe000>;
+	interrupts = <96 2 0 0>, <16 2 1 1>;
+	clocks = <&clockgen 3 0>;
+	clock-names = "fmanclk";
+	fsl,qman-channel-range = <0x40 0xc>;
+	ptimer-handle = <&ptp_timer0>;
+
+	muram@0 {
+		compatible = "fsl,fman-muram";
+		reg = <0x0 0x28000>;
+	};
+
+	fman0_oh_0x1: port@81000 {
+		cell-index = <0x1>;
+		compatible = "fsl,fman-v2-port-oh";
+		reg = <0x81000 0x1000>;
+	};
+
+	fman0_oh_0x2: port@82000 {
+		cell-index = <0x2>;
+		compatible = "fsl,fman-v2-port-oh";
+		reg = <0x82000 0x1000>;
+	};
+
+	fman0_oh_0x3: port@83000 {
+		cell-index = <0x3>;
+		compatible = "fsl,fman-v2-port-oh";
+		reg = <0x83000 0x1000>;
+	};
+
+	fman0_oh_0x4: port@84000 {
+		cell-index = <0x4>;
+		compatible = "fsl,fman-v2-port-oh";
+		reg = <0x84000 0x1000>;
+	};
+
+	fman0_oh_0x5: port@85000 {
+		cell-index = <0x5>;
+		compatible = "fsl,fman-v2-port-oh";
+		reg = <0x85000 0x1000>;
+		status = "disabled";
+	};
+
+	fman0_oh_0x6: port@86000 {
+		cell-index = <0x6>;
+		compatible = "fsl,fman-v2-port-oh";
+		reg = <0x86000 0x1000>;
+		status = "disabled";
+	};
+
+	fman0_oh_0x7: port@87000 {
+		cell-index = <0x7>;
+		compatible = "fsl,fman-v2-port-oh";
+		reg = <0x87000 0x1000>;
+		status = "disabled";
+	};
+};
+
+ptp_timer0: ptp-timer@4fe000 {
+	compatible = "fsl,fman-ptp-timer";
+	reg = <0x4fe000 0x1000>;
+	interrupts = <96 2 0 0>;
+	clocks = <&clockgen 3 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-1-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-10g-0.dtsi
new file mode 100644
index 0000000000..83ae87b69d
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-10g-0.dtsi
@@ -0,0 +1,61 @@
+/*
+ * QorIQ FMan 10g port #0 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+	fman1_rx_0x10: port@90000 {
+		cell-index = <0x10>;
+		compatible = "fsl,fman-v2-port-rx";
+		reg = <0x90000 0x1000>;
+	};
+
+	fman1_tx_0x30: port@b0000 {
+		cell-index = <0x30>;
+		compatible = "fsl,fman-v2-port-tx";
+		reg = <0xb0000 0x1000>;
+	};
+
+	ethernet@f0000 {
+		cell-index = <0x8>;
+		compatible = "fsl,fman-xgec";
+		reg = <0xf0000 0x1000>;
+		fsl,fman-ports = <&fman1_rx_0x10 &fman1_tx_0x30>;
+	};
+
+	mdio@f1000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-xmdio";
+		reg = <0xf1000 0x1000>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-0.dtsi
new file mode 100644
index 0000000000..b0f0e36a4e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-0.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ FMan 1g port #0 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+	fman1_rx_0x08: port@88000 {
+		cell-index = <0x8>;
+		compatible = "fsl,fman-v2-port-rx";
+		reg = <0x88000 0x1000>;
+	};
+
+	fman1_tx_0x28: port@a8000 {
+		cell-index = <0x28>;
+		compatible = "fsl,fman-v2-port-tx";
+		reg = <0xa8000 0x1000>;
+	};
+
+	ethernet@e0000 {
+		cell-index = <0>;
+		compatible = "fsl,fman-dtsec";
+		reg = <0xe0000 0x1000>;
+		fsl,fman-ports = <&fman1_rx_0x08 &fman1_tx_0x28>;
+		tbi-handle = <&tbi5>;
+		ptp-timer = <&ptp_timer1>;
+	};
+
+	mdio@e1120 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-mdio";
+		reg = <0xe1120 0xee0>;
+
+		tbi5: tbi-phy@8 {
+			reg = <0x8>;
+			device_type = "tbi-phy";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-1.dtsi
new file mode 100644
index 0000000000..a3a79f8552
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-1.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ FMan 1g port #1 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+	fman1_rx_0x09: port@89000 {
+		cell-index = <0x9>;
+		compatible = "fsl,fman-v2-port-rx";
+		reg = <0x89000 0x1000>;
+	};
+
+	fman1_tx_0x29: port@a9000 {
+		cell-index = <0x29>;
+		compatible = "fsl,fman-v2-port-tx";
+		reg = <0xa9000 0x1000>;
+	};
+
+	ethernet@e2000 {
+		cell-index = <1>;
+		compatible = "fsl,fman-dtsec";
+		reg = <0xe2000 0x1000>;
+		fsl,fman-ports = <&fman1_rx_0x09 &fman1_tx_0x29>;
+		tbi-handle = <&tbi6>;
+		ptp-timer = <&ptp_timer1>;
+	};
+
+	mdio@e3120 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-mdio";
+		reg = <0xe3120 0xee0>;
+
+		tbi6: tbi-phy@8 {
+			reg = <0x8>;
+			device_type = "tbi-phy";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-2.dtsi
new file mode 100644
index 0000000000..96a69a84b8
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-2.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ FMan 1g port #2 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+	fman1_rx_0x0a: port@8a000 {
+		cell-index = <0xa>;
+		compatible = "fsl,fman-v2-port-rx";
+		reg = <0x8a000 0x1000>;
+	};
+
+	fman1_tx_0x2a: port@aa000 {
+		cell-index = <0x2a>;
+		compatible = "fsl,fman-v2-port-tx";
+		reg = <0xaa000 0x1000>;
+	};
+
+	ethernet@e4000 {
+		cell-index = <2>;
+		compatible = "fsl,fman-dtsec";
+		reg = <0xe4000 0x1000>;
+		fsl,fman-ports = <&fman1_rx_0x0a &fman1_tx_0x2a>;
+		tbi-handle = <&tbi7>;
+		ptp-timer = <&ptp_timer1>;
+	};
+
+	mdio@e5120 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-mdio";
+		reg = <0xe5120 0xee0>;
+
+		tbi7: tbi-phy@8 {
+			reg = <0x8>;
+			device_type = "tbi-phy";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-3.dtsi
new file mode 100644
index 0000000000..7405d19401
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-3.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ FMan 1g port #3 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+	fman1_rx_0x0b: port@8b000 {
+		cell-index = <0xb>;
+		compatible = "fsl,fman-v2-port-rx";
+		reg = <0x8b000 0x1000>;
+	};
+
+	fman1_tx_0x2b: port@ab000 {
+		cell-index = <0x2b>;
+		compatible = "fsl,fman-v2-port-tx";
+		reg = <0xab000 0x1000>;
+	};
+
+	ethernet@e6000 {
+		cell-index = <3>;
+		compatible = "fsl,fman-dtsec";
+		reg = <0xe6000 0x1000>;
+		fsl,fman-ports = <&fman1_rx_0x0b &fman1_tx_0x2b>;
+		tbi-handle = <&tbi8>;
+		ptp-timer = <&ptp_timer1>;
+	};
+
+	mdio@e7120 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-mdio";
+		reg = <0xe7120 0xee0>;
+
+		tbi8: tbi-phy@8 {
+			reg = <0x8>;
+			device_type = "tbi-phy";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-4.dtsi
new file mode 100644
index 0000000000..f49ad69e52
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-4.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ FMan 1g port #4 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+	fman1_rx_0x0c: port@8c000 {
+		cell-index = <0xc>;
+		compatible = "fsl,fman-v2-port-rx";
+		reg = <0x8c000 0x1000>;
+	};
+
+	fman1_tx_0x2c: port@ac000 {
+		cell-index = <0x2c>;
+		compatible = "fsl,fman-v2-port-tx";
+		reg = <0xac000 0x1000>;
+	};
+
+	ethernet@e8000 {
+		cell-index = <4>;
+		compatible = "fsl,fman-dtsec";
+		reg = <0xe8000 0x1000>;
+		fsl,fman-ports = <&fman1_rx_0x0c &fman1_tx_0x2c>;
+		tbi-handle = <&tbi9>;
+		ptp-timer = <&ptp_timer1>;
+	};
+
+	mdio@e9120 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-mdio";
+		reg = <0xe9120 0xee0>;
+
+		tbi9: tbi-phy@8 {
+			reg = <0x8>;
+			device_type = "tbi-phy";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-1.dtsi
new file mode 100644
index 0000000000..e95c11ff04
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-1.dtsi
@@ -0,0 +1,104 @@
+/*
+ * QorIQ FMan device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman1: fman@500000 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	cell-index = <1>;
+	compatible = "fsl,fman";
+	ranges = <0 0x500000 0xfe000>;
+	reg = <0x500000 0xfe000>;
+	interrupts = <97 2 0 0>, <16 2 1 0>;
+	clocks = <&clockgen 3 1>;
+	clock-names = "fmanclk";
+	fsl,qman-channel-range = <0x60 0xc>;
+	ptimer-handle = <&ptp_timer1>;
+
+	muram@0 {
+		compatible = "fsl,fman-muram";
+		reg = <0x0 0x28000>;
+	};
+
+	fman1_oh_0x1: port@81000 {
+		cell-index = <0x1>;
+		compatible = "fsl,fman-v2-port-oh";
+		reg = <0x81000 0x1000>;
+	};
+
+	fman1_oh_0x2: port@82000 {
+		cell-index = <0x2>;
+		compatible = "fsl,fman-v2-port-oh";
+		reg = <0x82000 0x1000>;
+	};
+
+	fman1_oh_0x3: port@83000 {
+		cell-index = <0x3>;
+		compatible = "fsl,fman-v2-port-oh";
+		reg = <0x83000 0x1000>;
+	};
+
+	fman1_oh_0x4: port@84000 {
+		cell-index = <0x4>;
+		compatible = "fsl,fman-v2-port-oh";
+		reg = <0x84000 0x1000>;
+	};
+
+	fman1_oh_0x5: port@85000 {
+		cell-index = <0x5>;
+		compatible = "fsl,fman-v2-port-oh";
+		reg = <0x85000 0x1000>;
+		status = "disabled";
+	};
+
+	fman1_oh_0x6: port@86000 {
+		cell-index = <0x6>;
+		compatible = "fsl,fman-v2-port-oh";
+		reg = <0x86000 0x1000>;
+		status = "disabled";
+	};
+
+	fman1_oh_0x7: port@87000 {
+		cell-index = <0x7>;
+		compatible = "fsl,fman-v2-port-oh";
+		reg = <0x87000 0x1000>;
+		status = "disabled";
+	};
+};
+
+ptp_timer1: ptp-timer@5fe000 {
+	compatible = "fsl,fman-ptp-timer";
+	reg = <0x5fe000 0x1000>;
+	interrupts = <97 2 0 0>;
+	clocks = <&clockgen 3 1>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi
new file mode 100644
index 0000000000..7e70977f28
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi
@@ -0,0 +1,73 @@
+/*
+ * QorIQ FMan v3 1g port #0 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+	fman0_rx_0x08: port@88000 {
+		cell-index = <0x8>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x88000 0x1000>;
+		fsl,fman-10g-port;
+		fsl,fman-best-effort-port;
+	};
+
+	fman0_tx_0x28: port@a8000 {
+		cell-index = <0x28>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xa8000 0x1000>;
+		fsl,fman-10g-port;
+		fsl,fman-best-effort-port;
+	};
+
+	ethernet@e0000 {
+		cell-index = <0>;
+		compatible = "fsl,fman-memac";
+		reg = <0xe0000 0x1000>;
+		fsl,fman-ports = <&fman0_rx_0x08 &fman0_tx_0x28>;
+		ptp-timer = <&ptp_timer0>;
+		pcsphy-handle = <&pcsphy0>, <&pcsphy0>;
+		pcs-handle-names = "sgmii", "qsgmii";
+	};
+
+	mdio@e1000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xe1000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy0: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi
new file mode 100644
index 0000000000..5f89f7c176
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi
@@ -0,0 +1,77 @@
+/*
+ * QorIQ FMan v3 10g port #0 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+	fman0_rx_0x10: port@90000 {
+		cell-index = <0x10>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x90000 0x1000>;
+		fsl,fman-10g-port;
+	};
+
+	fman0_tx_0x30: port@b0000 {
+		cell-index = <0x30>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xb0000 0x1000>;
+		fsl,fman-10g-port;
+	};
+
+	ethernet@f0000 {
+		cell-index = <0x8>;
+		compatible = "fsl,fman-memac";
+		reg = <0xf0000 0x1000>;
+		fsl,fman-ports = <&fman0_rx_0x10 &fman0_tx_0x30>;
+		pcsphy-handle = <&pcsphy6>, <&qsgmiib_pcs2>, <&pcsphy6>;
+		pcs-handle-names = "sgmii", "qsgmii", "xfi";
+	};
+
+	mdio@e9000 {
+		qsgmiib_pcs2: ethernet-pcs@2 {
+			compatible = "fsl,lynx-pcs";
+			reg = <2>;
+		};
+	};
+
+	mdio@f1000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xf1000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy6: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi
new file mode 100644
index 0000000000..71eb75e82c
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi
@@ -0,0 +1,80 @@
+/*
+ * QorIQ FMan v3 1g port #1 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+	fman0_rx_0x09: port@89000 {
+		cell-index = <0x9>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x89000 0x1000>;
+		fsl,fman-10g-port;
+		fsl,fman-best-effort-port;
+	};
+
+	fman0_tx_0x29: port@a9000 {
+		cell-index = <0x29>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xa9000 0x1000>;
+		fsl,fman-10g-port;
+		fsl,fman-best-effort-port;
+	};
+
+	ethernet@e2000 {
+		cell-index = <1>;
+		compatible = "fsl,fman-memac";
+		reg = <0xe2000 0x1000>;
+		fsl,fman-ports = <&fman0_rx_0x09 &fman0_tx_0x29>;
+		ptp-timer = <&ptp_timer0>;
+		pcsphy-handle = <&pcsphy1>, <&qsgmiia_pcs1>;
+		pcs-handle-names = "sgmii", "qsgmii";
+	};
+
+	mdio@e1000 {
+		qsgmiia_pcs1: ethernet-pcs@1 {
+			compatible = "fsl,lynx-pcs";
+			reg = <1>;
+		};
+	};
+
+	mdio@e3000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xe3000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy1: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi
new file mode 100644
index 0000000000..fb7032ddb7
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi
@@ -0,0 +1,77 @@
+/*
+ * QorIQ FMan v3 10g port #1 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+	fman0_rx_0x11: port@91000 {
+		cell-index = <0x11>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x91000 0x1000>;
+		fsl,fman-10g-port;
+	};
+
+	fman0_tx_0x31: port@b1000 {
+		cell-index = <0x31>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xb1000 0x1000>;
+		fsl,fman-10g-port;
+	};
+
+	ethernet@f2000 {
+		cell-index = <0x9>;
+		compatible = "fsl,fman-memac";
+		reg = <0xf2000 0x1000>;
+		fsl,fman-ports = <&fman0_rx_0x11 &fman0_tx_0x31>;
+		pcsphy-handle = <&pcsphy7>, <&qsgmiib_pcs3>, <&pcsphy7>;
+		pcs-handle-names = "sgmii", "qsgmii", "xfi";
+	};
+
+	mdio@e9000 {
+		qsgmiib_pcs3: ethernet-pcs@3 {
+			compatible = "fsl,lynx-pcs";
+			reg = <3>;
+		};
+	};
+
+	mdio@f3000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xf3000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy7: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-2.dtsi
new file mode 100644
index 0000000000..6b3609574b
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-2.dtsi
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later
+/*
+ * QorIQ FMan v3 10g port #2 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2022 Sean Anderson <sean.anderson@seco.com>
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ */
+
+fman@400000 {
+	fman0_rx_0x08: port@88000 {
+		cell-index = <0x8>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x88000 0x1000>;
+		fsl,fman-10g-port;
+	};
+
+	fman0_tx_0x28: port@a8000 {
+		cell-index = <0x28>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xa8000 0x1000>;
+		fsl,fman-10g-port;
+	};
+
+	ethernet@e0000 {
+		cell-index = <0>;
+		compatible = "fsl,fman-memac";
+		reg = <0xe0000 0x1000>;
+		fsl,fman-ports = <&fman0_rx_0x08 &fman0_tx_0x28>;
+		ptp-timer = <&ptp_timer0>;
+		pcsphy-handle = <&pcsphy0>, <&pcsphy0>;
+		pcs-handle-names = "sgmii", "xfi";
+	};
+
+	mdio@e1000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xe1000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy0: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-3.dtsi
new file mode 100644
index 0000000000..28ed1a85a4
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-3.dtsi
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later
+/*
+ * QorIQ FMan v3 10g port #3 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2022 Sean Anderson <sean.anderson@seco.com>
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ */
+
+fman@400000 {
+	fman0_rx_0x09: port@89000 {
+		cell-index = <0x9>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x89000 0x1000>;
+		fsl,fman-10g-port;
+	};
+
+	fman0_tx_0x29: port@a9000 {
+		cell-index = <0x29>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xa9000 0x1000>;
+		fsl,fman-10g-port;
+	};
+
+	ethernet@e2000 {
+		cell-index = <1>;
+		compatible = "fsl,fman-memac";
+		reg = <0xe2000 0x1000>;
+		fsl,fman-ports = <&fman0_rx_0x09 &fman0_tx_0x29>;
+		ptp-timer = <&ptp_timer0>;
+		pcsphy-handle = <&pcsphy1>, <&pcsphy1>;
+		pcs-handle-names = "sgmii", "xfi";
+	};
+
+	mdio@e3000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xe3000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy1: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi
new file mode 100644
index 0000000000..1089d6861b
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi
@@ -0,0 +1,69 @@
+/*
+ * QorIQ FMan v3 1g port #0 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+	fman0_rx_0x08: port@88000 {
+		cell-index = <0x8>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x88000 0x1000>;
+	};
+
+	fman0_tx_0x28: port@a8000 {
+		cell-index = <0x28>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xa8000 0x1000>;
+	};
+
+	ethernet@e0000 {
+		cell-index = <0>;
+		compatible = "fsl,fman-memac";
+		reg = <0xe0000 0x1000>;
+		fsl,fman-ports = <&fman0_rx_0x08 &fman0_tx_0x28>;
+		ptp-timer = <&ptp_timer0>;
+		pcsphy-handle = <&pcsphy0>, <&pcsphy0>;
+		pcs-handle-names = "sgmii", "qsgmii";
+	};
+
+	mdio@e1000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xe1000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy0: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi
new file mode 100644
index 0000000000..a95bbb4fc8
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi
@@ -0,0 +1,76 @@
+/*
+ * QorIQ FMan v3 1g port #1 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+	fman0_rx_0x09: port@89000 {
+		cell-index = <0x9>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x89000 0x1000>;
+	};
+
+	fman0_tx_0x29: port@a9000 {
+		cell-index = <0x29>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xa9000 0x1000>;
+	};
+
+	ethernet@e2000 {
+		cell-index = <1>;
+		compatible = "fsl,fman-memac";
+		reg = <0xe2000 0x1000>;
+		fsl,fman-ports = <&fman0_rx_0x09 &fman0_tx_0x29>;
+		ptp-timer = <&ptp_timer0>;
+		pcsphy-handle = <&pcsphy1>, <&qsgmiia_pcs1>;
+		pcs-handle-names = "sgmii", "qsgmii";
+	};
+
+	mdio@e1000 {
+		qsgmiia_pcs1: ethernet-pcs@1 {
+			compatible = "fsl,lynx-pcs";
+			reg = <1>;
+		};
+	};
+
+	mdio@e3000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xe3000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy1: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi
new file mode 100644
index 0000000000..7d5af0147a
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi
@@ -0,0 +1,76 @@
+/*
+ * QorIQ FMan v3 1g port #2 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+	fman0_rx_0x0a: port@8a000 {
+		cell-index = <0xa>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x8a000 0x1000>;
+	};
+
+	fman0_tx_0x2a: port@aa000 {
+		cell-index = <0x2a>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xaa000 0x1000>;
+	};
+
+	ethernet@e4000 {
+		cell-index = <2>;
+		compatible = "fsl,fman-memac";
+		reg = <0xe4000 0x1000>;
+		fsl,fman-ports = <&fman0_rx_0x0a &fman0_tx_0x2a>;
+		ptp-timer = <&ptp_timer0>;
+		pcsphy-handle = <&pcsphy2>, <&qsgmiia_pcs2>;
+		pcs-handle-names = "sgmii", "qsgmii";
+	};
+
+	mdio@e1000 {
+		qsgmiia_pcs2: ethernet-pcs@2 {
+			compatible = "fsl,lynx-pcs";
+			reg = <2>;
+		};
+	};
+
+	mdio@e5000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xe5000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy2: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi
new file mode 100644
index 0000000000..61e5466ec8
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi
@@ -0,0 +1,76 @@
+/*
+ * QorIQ FMan v3 1g port #3 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+	fman0_rx_0x0b: port@8b000 {
+		cell-index = <0xb>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x8b000 0x1000>;
+	};
+
+	fman0_tx_0x2b: port@ab000 {
+		cell-index = <0x2b>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xab000 0x1000>;
+	};
+
+	ethernet@e6000 {
+		cell-index = <3>;
+		compatible = "fsl,fman-memac";
+		reg = <0xe6000 0x1000>;
+		fsl,fman-ports = <&fman0_rx_0x0b &fman0_tx_0x2b>;
+		ptp-timer = <&ptp_timer0>;
+		pcsphy-handle = <&pcsphy3>, <&qsgmiia_pcs3>;
+		pcs-handle-names = "sgmii", "qsgmii";
+	};
+
+	mdio@e1000 {
+		qsgmiia_pcs3: ethernet-pcs@3 {
+			compatible = "fsl,lynx-pcs";
+			reg = <3>;
+		};
+	};
+
+	mdio@e7000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xe7000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy3: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi
new file mode 100644
index 0000000000..3ba0cdafc0
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi
@@ -0,0 +1,69 @@
+/*
+ * QorIQ FMan v3 1g port #4 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+	fman0_rx_0x0c: port@8c000 {
+		cell-index = <0xc>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x8c000 0x1000>;
+	};
+
+	fman0_tx_0x2c: port@ac000 {
+		cell-index = <0x2c>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xac000 0x1000>;
+	};
+
+	ethernet@e8000 {
+		cell-index = <4>;
+		compatible = "fsl,fman-memac";
+		reg = <0xe8000 0x1000>;
+		fsl,fman-ports = <&fman0_rx_0x0c &fman0_tx_0x2c>;
+		ptp-timer = <&ptp_timer0>;
+		pcsphy-handle = <&pcsphy4>, <&pcsphy4>;
+		pcs-handle-names = "sgmii", "qsgmii";
+	};
+
+	mdio@e9000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xe9000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy4: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi
new file mode 100644
index 0000000000..51748de0a2
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi
@@ -0,0 +1,76 @@
+/*
+ * QorIQ FMan v3 1g port #5 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+	fman0_rx_0x0d: port@8d000 {
+		cell-index = <0xd>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x8d000 0x1000>;
+	};
+
+	fman0_tx_0x2d: port@ad000 {
+		cell-index = <0x2d>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xad000 0x1000>;
+	};
+
+	ethernet@ea000 {
+		cell-index = <5>;
+		compatible = "fsl,fman-memac";
+		reg = <0xea000 0x1000>;
+		fsl,fman-ports = <&fman0_rx_0x0d &fman0_tx_0x2d>;
+		ptp-timer = <&ptp_timer0>;
+		pcsphy-handle = <&pcsphy5>, <&qsgmiib_pcs1>;
+		pcs-handle-names = "sgmii", "qsgmii";
+	};
+
+	mdio@e9000 {
+		qsgmiib_pcs1: ethernet-pcs@1 {
+			compatible = "fsl,lynx-pcs";
+			reg = <1>;
+		};
+	};
+
+	mdio@eb000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xeb000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy5: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0.dtsi
new file mode 100644
index 0000000000..d62b36c5a3
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0.dtsi
@@ -0,0 +1,109 @@
+/*
+ * QorIQ FMan v3 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman0: fman@400000 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	cell-index = <0>;
+	compatible = "fsl,fman";
+	ranges = <0 0x400000 0xfe000>;
+	reg = <0x400000 0xfe000>;
+	interrupts = <96 2 0 0>, <16 2 1 1>;
+	clocks = <&clockgen 3 0>;
+	clock-names = "fmanclk";
+	fsl,qman-channel-range = <0x800 0x10>;
+	ptimer-handle = <&ptp_timer0>;
+
+	muram@0 {
+		compatible = "fsl,fman-muram";
+		reg = <0x0 0x60000>;
+	};
+
+	fman0_oh_0x2: port@82000 {
+		cell-index = <0x2>;
+		compatible = "fsl,fman-v3-port-oh";
+		reg = <0x82000 0x1000>;
+	};
+
+	fman0_oh_0x3: port@83000 {
+		cell-index = <0x3>;
+		compatible = "fsl,fman-v3-port-oh";
+		reg = <0x83000 0x1000>;
+	};
+
+	fman0_oh_0x4: port@84000 {
+		cell-index = <0x4>;
+		compatible = "fsl,fman-v3-port-oh";
+		reg = <0x84000 0x1000>;
+	};
+
+	fman0_oh_0x5: port@85000 {
+		cell-index = <0x5>;
+		compatible = "fsl,fman-v3-port-oh";
+		reg = <0x85000 0x1000>;
+	};
+
+	fman0_oh_0x6: port@86000 {
+		cell-index = <0x6>;
+		compatible = "fsl,fman-v3-port-oh";
+		reg = <0x86000 0x1000>;
+	};
+
+	fman0_oh_0x7: port@87000 {
+		cell-index = <0x7>;
+		compatible = "fsl,fman-v3-port-oh";
+		reg = <0x87000 0x1000>;
+	};
+
+	mdio0: mdio@fc000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xfc000 0x1000>;
+	};
+
+	xmdio0: mdio@fd000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xfd000 0x1000>;
+	};
+};
+
+ptp_timer0: ptp-timer@4fe000 {
+	compatible = "fsl,fman-ptp-timer";
+	reg = <0x4fe000 0x1000>;
+	interrupts = <96 2 0 0>;
+	clocks = <&clockgen 3 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi
new file mode 100644
index 0000000000..ee4f5170f6
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi
@@ -0,0 +1,77 @@
+/*
+ * QorIQ FMan v3 10g port #0 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+	fman1_rx_0x10: port@90000 {
+		cell-index = <0x10>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x90000 0x1000>;
+		fsl,fman-10g-port;
+	};
+
+	fman1_tx_0x30: port@b0000 {
+		cell-index = <0x30>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xb0000 0x1000>;
+		fsl,fman-10g-port;
+	};
+
+	ethernet@f0000 {
+		cell-index = <0x8>;
+		compatible = "fsl,fman-memac";
+		reg = <0xf0000 0x1000>;
+		fsl,fman-ports = <&fman1_rx_0x10 &fman1_tx_0x30>;
+		pcsphy-handle = <&pcsphy14>, <&qsgmiid_pcs2>, <&pcsphy14>;
+		pcs-handle-names = "sgmii", "qsgmii", "xfi";
+	};
+
+	mdio@e9000 {
+		qsgmiid_pcs2: ethernet-pcs@2 {
+			compatible = "fsl,lynx-pcs";
+			reg = <2>;
+		};
+	};
+
+	mdio@f1000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xf1000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy14: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi
new file mode 100644
index 0000000000..83d2e0ce8f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi
@@ -0,0 +1,77 @@
+/*
+ * QorIQ FMan v3 10g port #1 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+	fman1_rx_0x11: port@91000 {
+		cell-index = <0x11>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x91000 0x1000>;
+		fsl,fman-10g-port;
+	};
+
+	fman1_tx_0x31: port@b1000 {
+		cell-index = <0x31>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xb1000 0x1000>;
+		fsl,fman-10g-port;
+	};
+
+	ethernet@f2000 {
+		cell-index = <0x9>;
+		compatible = "fsl,fman-memac";
+		reg = <0xf2000 0x1000>;
+		fsl,fman-ports = <&fman1_rx_0x11 &fman1_tx_0x31>;
+		pcsphy-handle = <&pcsphy15>, <&qsgmiid_pcs3>, <&pcsphy15>;
+		pcs-handle-names = "sgmii", "qsgmii", "xfi";
+	};
+
+	mdio@e9000 {
+		qsgmiid_pcs3: ethernet-pcs@3 {
+			compatible = "fsl,lynx-pcs";
+			reg = <3>;
+		};
+	};
+
+	mdio@f3000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xf3000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy15: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi
new file mode 100644
index 0000000000..3132fc73f1
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi
@@ -0,0 +1,69 @@
+/*
+ * QorIQ FMan v3 1g port #0 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+	fman1_rx_0x08: port@88000 {
+		cell-index = <0x8>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x88000 0x1000>;
+	};
+
+	fman1_tx_0x28: port@a8000 {
+		cell-index = <0x28>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xa8000 0x1000>;
+	};
+
+	ethernet@e0000 {
+		cell-index = <0>;
+		compatible = "fsl,fman-memac";
+		reg = <0xe0000 0x1000>;
+		fsl,fman-ports = <&fman1_rx_0x08 &fman1_tx_0x28>;
+		ptp-timer = <&ptp_timer1>;
+		pcsphy-handle = <&pcsphy8>, <&pcsphy8>;
+		pcs-handle-names = "sgmii", "qsgmii";
+	};
+
+	mdio@e1000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xe1000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy8: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi
new file mode 100644
index 0000000000..75e904d966
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi
@@ -0,0 +1,76 @@
+/*
+ * QorIQ FMan v3 1g port #1 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+	fman1_rx_0x09: port@89000 {
+		cell-index = <0x9>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x89000 0x1000>;
+	};
+
+	fman1_tx_0x29: port@a9000 {
+		cell-index = <0x29>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xa9000 0x1000>;
+	};
+
+	ethernet@e2000 {
+		cell-index = <1>;
+		compatible = "fsl,fman-memac";
+		reg = <0xe2000 0x1000>;
+		fsl,fman-ports = <&fman1_rx_0x09 &fman1_tx_0x29>;
+		ptp-timer = <&ptp_timer1>;
+		pcsphy-handle = <&pcsphy9>, <&qsgmiic_pcs1>;
+		pcs-handle-names = "sgmii", "qsgmii";
+	};
+
+	mdio@e1000 {
+		qsgmiic_pcs1: ethernet-pcs@1 {
+			compatible = "fsl,lynx-pcs";
+			reg = <1>;
+		};
+	};
+
+	mdio@e3000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xe3000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy9: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi
new file mode 100644
index 0000000000..69f2cc7b8f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi
@@ -0,0 +1,76 @@
+/*
+ * QorIQ FMan v3 1g port #2 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+	fman1_rx_0x0a: port@8a000 {
+		cell-index = <0xa>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x8a000 0x1000>;
+	};
+
+	fman1_tx_0x2a: port@aa000 {
+		cell-index = <0x2a>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xaa000 0x1000>;
+	};
+
+	ethernet@e4000 {
+		cell-index = <2>;
+		compatible = "fsl,fman-memac";
+		reg = <0xe4000 0x1000>;
+		fsl,fman-ports = <&fman1_rx_0x0a &fman1_tx_0x2a>;
+		ptp-timer = <&ptp_timer1>;
+		pcsphy-handle = <&pcsphy10>, <&qsgmiic_pcs2>;
+		pcs-handle-names = "sgmii", "qsgmii";
+	};
+
+	mdio@e1000 {
+		qsgmiic_pcs2: ethernet-pcs@2 {
+			compatible = "fsl,lynx-pcs";
+			reg = <2>;
+		};
+	};
+
+	mdio@e5000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xe5000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy10: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi
new file mode 100644
index 0000000000..b3aaf01d7d
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi
@@ -0,0 +1,76 @@
+/*
+ * QorIQ FMan v3 1g port #3 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+	fman1_rx_0x0b: port@8b000 {
+		cell-index = <0xb>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x8b000 0x1000>;
+	};
+
+	fman1_tx_0x2b: port@ab000 {
+		cell-index = <0x2b>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xab000 0x1000>;
+	};
+
+	ethernet@e6000 {
+		cell-index = <3>;
+		compatible = "fsl,fman-memac";
+		reg = <0xe6000 0x1000>;
+		fsl,fman-ports = <&fman1_rx_0x0b &fman1_tx_0x2b>;
+		ptp-timer = <&ptp_timer1>;
+		pcsphy-handle = <&pcsphy11>, <&qsgmiic_pcs3>;
+		pcs-handle-names = "sgmii", "qsgmii";
+	};
+
+	mdio@e1000 {
+		qsgmiic_pcs3: ethernet-pcs@3 {
+			compatible = "fsl,lynx-pcs";
+			reg = <3>;
+		};
+	};
+
+	mdio@e7000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xe7000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy11: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi
new file mode 100644
index 0000000000..18e0204328
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi
@@ -0,0 +1,69 @@
+/*
+ * QorIQ FMan v3 1g port #4 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+	fman1_rx_0x0c: port@8c000 {
+		cell-index = <0xc>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x8c000 0x1000>;
+	};
+
+	fman1_tx_0x2c: port@ac000 {
+		cell-index = <0x2c>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xac000 0x1000>;
+	};
+
+	ethernet@e8000 {
+		cell-index = <4>;
+		compatible = "fsl,fman-memac";
+		reg = <0xe8000 0x1000>;
+		fsl,fman-ports = <&fman1_rx_0x0c &fman1_tx_0x2c>;
+		ptp-timer = <&ptp_timer1>;
+		pcsphy-handle = <&pcsphy12>, <&pcsphy12>;
+		pcs-handle-names = "sgmii", "qsgmii";
+	};
+
+	mdio@e9000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xe9000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy12: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi
new file mode 100644
index 0000000000..55f329d13f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi
@@ -0,0 +1,76 @@
+/*
+ * QorIQ FMan v3 1g port #5 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+	fman1_rx_0x0d: port@8d000 {
+		cell-index = <0xd>;
+		compatible = "fsl,fman-v3-port-rx";
+		reg = <0x8d000 0x1000>;
+	};
+
+	fman1_tx_0x2d: port@ad000 {
+		cell-index = <0x2d>;
+		compatible = "fsl,fman-v3-port-tx";
+		reg = <0xad000 0x1000>;
+	};
+
+	ethernet@ea000 {
+		cell-index = <5>;
+		compatible = "fsl,fman-memac";
+		reg = <0xea000 0x1000>;
+		fsl,fman-ports = <&fman1_rx_0x0d &fman1_tx_0x2d>;
+		ptp-timer = <&ptp_timer1>;
+		pcsphy-handle = <&pcsphy13>, <&qsgmiid_pcs1>;
+		pcs-handle-names = "sgmii", "qsgmii";
+	};
+
+	mdio@e9000 {
+		qsgmiid_pcs1: ethernet-pcs@1 {
+			compatible = "fsl,lynx-pcs";
+			reg = <1>;
+		};
+	};
+
+	mdio@eb000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xeb000 0x1000>;
+		fsl,erratum-a011043; /* must ignore read errors */
+
+		pcsphy13: ethernet-phy@0 {
+			reg = <0x0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1.dtsi
new file mode 100644
index 0000000000..3102324605
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1.dtsi
@@ -0,0 +1,109 @@
+/*
+ * QorIQ FMan v3 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman1: fman@500000 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	cell-index = <1>;
+	compatible = "fsl,fman";
+	ranges = <0 0x500000 0xfe000>;
+	reg = <0x500000 0xfe000>;
+	interrupts = <97 2 0 0>, <16 2 1 0>;
+	clocks = <&clockgen 3 1>;
+	clock-names = "fmanclk";
+	fsl,qman-channel-range = <0x820 0x10>;
+	ptimer-handle = <&ptp_timer1>;
+
+	muram@0 {
+		compatible = "fsl,fman-muram";
+		reg = <0x0 0x60000>;
+	};
+
+	fman1_oh_0x2: port@82000 {
+		cell-index = <0x2>;
+		compatible = "fsl,fman-v3-port-oh";
+		reg = <0x82000 0x1000>;
+	};
+
+	fman1_oh_0x3: port@83000 {
+		cell-index = <0x3>;
+		compatible = "fsl,fman-v3-port-oh";
+		reg = <0x83000 0x1000>;
+	};
+
+	fman1_oh_0x4: port@84000 {
+		cell-index = <0x4>;
+		compatible = "fsl,fman-v3-port-oh";
+		reg = <0x84000 0x1000>;
+	};
+
+	fman1_oh_0x5: port@85000 {
+		cell-index = <0x5>;
+		compatible = "fsl,fman-v3-port-oh";
+		reg = <0x85000 0x1000>;
+	};
+
+	fman1_oh_0x6: port@86000 {
+		cell-index = <0x6>;
+		compatible = "fsl,fman-v3-port-oh";
+		reg = <0x86000 0x1000>;
+	};
+
+	fman1_oh_0x7: port@87000 {
+		cell-index = <0x7>;
+		compatible = "fsl,fman-v3-port-oh";
+		reg = <0x87000 0x1000>;
+	};
+
+	mdio1: mdio@fc000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xfc000 0x1000>;
+	};
+
+	mdio@fd000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xfd000 0x1000>;
+	};
+};
+
+ptp_timer1: ptp-timer@5fe000 {
+	compatible = "fsl,fman-ptp-timer";
+	reg = <0x5fe000 0x1000>;
+	interrupts = <97 2 0 0>;
+	clocks = <&clockgen 3 1>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi
new file mode 100644
index 0000000000..48e5cd6159
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi
@@ -0,0 +1,99 @@
+/*
+ * QorIQ FMan v3 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman0: fman@400000 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	cell-index = <0>;
+	compatible = "fsl,fman";
+	ranges = <0 0x400000 0xfe000>;
+	reg = <0x400000 0xfe000>;
+	interrupts = <96 2 0 0>, <16 2 1 1>;
+	clocks = <&clockgen 3 0>;
+	clock-names = "fmanclk";
+	fsl,qman-channel-range = <0x800 0x10>;
+	ptimer-handle = <&ptp_timer0>;
+
+	muram@0 {
+		compatible = "fsl,fman-muram";
+		reg = <0x0 0x30000>;
+	};
+
+	fman0_oh_0x2: port@82000 {
+		cell-index = <0x2>;
+		compatible = "fsl,fman-v3-port-oh";
+		reg = <0x82000 0x1000>;
+	};
+
+	fman0_oh_0x3: port@83000 {
+		cell-index = <0x3>;
+		compatible = "fsl,fman-v3-port-oh";
+		reg = <0x83000 0x1000>;
+	};
+
+	fman0_oh_0x4: port@84000 {
+		cell-index = <0x4>;
+		compatible = "fsl,fman-v3-port-oh";
+		reg = <0x84000 0x1000>;
+	};
+
+	fman0_oh_0x5: port@85000 {
+		cell-index = <0x5>;
+		compatible = "fsl,fman-v3-port-oh";
+		reg = <0x85000 0x1000>;
+	};
+
+	mdio0: mdio@fc000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xfc000 0x1000>;
+		fsl,erratum-a009885;
+	};
+
+	xmdio0: mdio@fd000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+		reg = <0xfd000 0x1000>;
+		fsl,erratum-a009885;
+	};
+};
+
+ptp_timer0: ptp-timer@4fe000 {
+	compatible = "fsl,fman-ptp-timer";
+	reg = <0x4fe000 0x1000>;
+	interrupts = <96 2 0 0>;
+	clocks = <&clockgen 3 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-gpio-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-gpio-0.dtsi
new file mode 100644
index 0000000000..cf714f5f68
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-gpio-0.dtsi
@@ -0,0 +1,41 @@
+/*
+ * QorIQ GPIO device tree stub [ controller @ offset 0x130000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+gpio0: gpio@130000 {
+	compatible = "fsl,qoriq-gpio";
+	reg = <0x130000 0x1000>;
+	interrupts = <55 2 0 0>;
+	#gpio-cells = <2>;
+	gpio-controller;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-gpio-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-gpio-1.dtsi
new file mode 100644
index 0000000000..c2f9cdadb6
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-gpio-1.dtsi
@@ -0,0 +1,41 @@
+/*
+ * QorIQ GPIO device tree stub [ controller @ offset 0x131000 ]
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+gpio1: gpio@131000 {
+	compatible = "fsl,qoriq-gpio";
+	reg = <0x131000 0x1000>;
+	interrupts = <54 2 0 0>;
+	#gpio-cells = <2>;
+	gpio-controller;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-gpio-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-gpio-2.dtsi
new file mode 100644
index 0000000000..33f3ccbac8
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-gpio-2.dtsi
@@ -0,0 +1,41 @@
+/*
+ * QorIQ GPIO device tree stub [ controller @ offset 0x132000 ]
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+gpio2: gpio@132000 {
+	compatible = "fsl,qoriq-gpio";
+	reg = <0x132000 0x1000>;
+	interrupts = <86 2 0 0>;
+	#gpio-cells = <2>;
+	gpio-controller;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-gpio-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-gpio-3.dtsi
new file mode 100644
index 0000000000..86954e95ea
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-gpio-3.dtsi
@@ -0,0 +1,41 @@
+/*
+ * QorIQ GPIO device tree stub [ controller @ offset 0x133000 ]
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+gpio3: gpio@133000 {
+	compatible = "fsl,qoriq-gpio";
+	reg = <0x133000 0x1000>;
+	interrupts = <87 2 0 0>;
+	#gpio-cells = <2>;
+	gpio-controller;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-i2c-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-i2c-0.dtsi
new file mode 100644
index 0000000000..5f9bf7debe
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-i2c-0.dtsi
@@ -0,0 +1,53 @@
+/*
+ * QorIQ I2C device tree stub [ controller @ offset 0x118000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+i2c@118000 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	cell-index = <0>;
+	compatible = "fsl-i2c";
+	reg = <0x118000 0x100>;
+	interrupts = <38 2 0 0>;
+	dfsrr;
+};
+
+i2c@118100 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	cell-index = <1>;
+	compatible = "fsl-i2c";
+	reg = <0x118100 0x100>;
+	interrupts = <38 2 0 0>;
+	dfsrr;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-i2c-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-i2c-1.dtsi
new file mode 100644
index 0000000000..7989bf5eeb
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-i2c-1.dtsi
@@ -0,0 +1,53 @@
+/*
+ * QorIQ I2C device tree stub [ controller @ offset 0x119000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+i2c@119000 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	cell-index = <2>;
+	compatible = "fsl-i2c";
+	reg = <0x119000 0x100>;
+	interrupts = <39 2 0 0>;
+	dfsrr;
+};
+
+i2c@119100 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	cell-index = <3>;
+	compatible = "fsl-i2c";
+	reg = <0x119100 0x100>;
+	interrupts = <39 2 0 0>;
+	dfsrr;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-mpic.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-mpic.dtsi
new file mode 100644
index 0000000000..08f42271f8
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-mpic.dtsi
@@ -0,0 +1,106 @@
+/*
+ * QorIQ MPIC device tree stub [ controller @ offset 0x40000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+mpic: pic@40000 {
+	interrupt-controller;
+	#address-cells = <0>;
+	#interrupt-cells = <4>;
+	reg = <0x40000 0x40000>;
+	compatible = "fsl,mpic", "chrp,open-pic";
+	device_type = "open-pic";
+	clock-frequency = <0x0>;
+};
+
+timer@41100 {
+	compatible = "fsl,mpic-global-timer";
+	reg = <0x41100 0x100 0x41300 4>;
+	interrupts = <0 0 3 0
+		      1 0 3 0
+		      2 0 3 0
+		      3 0 3 0>;
+};
+
+msi0: msi@41600 {
+	compatible = "fsl,mpic-msi";
+	reg = <0x41600 0x200 0x44140 4>;
+	msi-available-ranges = <0 0x100>;
+	interrupts = <
+		0xe0 0 0 0
+		0xe1 0 0 0
+		0xe2 0 0 0
+		0xe3 0 0 0
+		0xe4 0 0 0
+		0xe5 0 0 0
+		0xe6 0 0 0
+		0xe7 0 0 0>;
+};
+
+msi1: msi@41800 {
+	compatible = "fsl,mpic-msi";
+	reg = <0x41800 0x200 0x45140 4>;
+	msi-available-ranges = <0 0x100>;
+	interrupts = <
+		0xe8 0 0 0
+		0xe9 0 0 0
+		0xea 0 0 0
+		0xeb 0 0 0
+		0xec 0 0 0
+		0xed 0 0 0
+		0xee 0 0 0
+		0xef 0 0 0>;
+};
+
+msi2: msi@41a00 {
+	compatible = "fsl,mpic-msi";
+	reg = <0x41a00 0x200 0x46140 4>;
+	msi-available-ranges = <0 0x100>;
+	interrupts = <
+		0xf0 0 0 0
+		0xf1 0 0 0
+		0xf2 0 0 0
+		0xf3 0 0 0
+		0xf4 0 0 0
+		0xf5 0 0 0
+		0xf6 0 0 0
+		0xf7 0 0 0>;
+};
+
+timer@42100 {
+	compatible = "fsl,mpic-global-timer";
+	reg = <0x42100 0x100 0x42300 4>;
+	interrupts = <4 0 3 0
+		      5 0 3 0
+		      6 0 3 0
+		      7 0 3 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-mpic4.3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-mpic4.3.dtsi
new file mode 100644
index 0000000000..64f713c248
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-mpic4.3.dtsi
@@ -0,0 +1,149 @@
+/*
+ * QorIQ MPIC device tree stub [ controller @ offset 0x40000 ]
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+mpic: pic@40000 {
+	interrupt-controller;
+	#address-cells = <0>;
+	#interrupt-cells = <4>;
+	reg = <0x40000 0x40000>;
+	compatible = "fsl,mpic";
+	device_type = "open-pic";
+	clock-frequency = <0x0>;
+};
+
+timer@41100 {
+	compatible = "fsl,mpic-global-timer";
+	reg = <0x41100 0x100 0x41300 4>;
+	interrupts = <0 0 3 0
+		      1 0 3 0
+		      2 0 3 0
+		      3 0 3 0>;
+};
+
+msi0: msi@41600 {
+	compatible = "fsl,mpic-msi-v4.3";
+	reg = <0x41600 0x200 0x44148 4>;
+	interrupts = <
+		0xe0 0 0 0
+		0xe1 0 0 0
+		0xe2 0 0 0
+		0xe3 0 0 0
+		0xe4 0 0 0
+		0xe5 0 0 0
+		0xe6 0 0 0
+		0xe7 0 0 0
+		0x100 0 0 0
+		0x101 0 0 0
+		0x102 0 0 0
+		0x103 0 0 0
+		0x104 0 0 0
+		0x105 0 0 0
+		0x106 0 0 0
+		0x107 0 0 0>;
+};
+
+msi1: msi@41800 {
+	compatible = "fsl,mpic-msi-v4.3";
+	reg = <0x41800 0x200 0x45148 4>;
+	interrupts = <
+		0xe8 0 0 0
+		0xe9 0 0 0
+		0xea 0 0 0
+		0xeb 0 0 0
+		0xec 0 0 0
+		0xed 0 0 0
+		0xee 0 0 0
+		0xef 0 0 0
+		0x108 0 0 0
+		0x109 0 0 0
+		0x10a 0 0 0
+		0x10b 0 0 0
+		0x10c 0 0 0
+		0x10d 0 0 0
+		0x10e 0 0 0
+		0x10f 0 0 0>;
+};
+
+msi2: msi@41a00 {
+	compatible = "fsl,mpic-msi-v4.3";
+	reg = <0x41a00 0x200 0x46148 4>;
+	interrupts = <
+		0xf0 0 0 0
+		0xf1 0 0 0
+		0xf2 0 0 0
+		0xf3 0 0 0
+		0xf4 0 0 0
+		0xf5 0 0 0
+		0xf6 0 0 0
+		0xf7 0 0 0
+		0x110 0 0 0
+		0x111 0 0 0
+		0x112 0 0 0
+		0x113 0 0 0
+		0x114 0 0 0
+		0x115 0 0 0
+		0x116 0 0 0
+		0x117 0 0 0>;
+};
+
+msi3: msi@41c00 {
+	compatible = "fsl,mpic-msi-v4.3";
+	reg = <0x41c00 0x200 0x47148 4>;
+	interrupts = <
+		0xf8 0 0 0
+		0xf9 0 0 0
+		0xfa 0 0 0
+		0xfb 0 0 0
+		0xfc 0 0 0
+		0xfd 0 0 0
+		0xfe 0 0 0
+		0xff 0 0 0
+		0x118 0 0 0
+		0x119 0 0 0
+		0x11a 0 0 0
+		0x11b 0 0 0
+		0x11c 0 0 0
+		0x11d 0 0 0
+		0x11e 0 0 0
+		0x11f 0 0 0>;
+};
+
+timer@42100 {
+	compatible = "fsl,mpic-global-timer";
+	reg = <0x42100 0x100 0x42300 4>;
+	interrupts = <4 0 3 0
+		      5 0 3 0
+		      6 0 3 0
+		      7 0 3 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi
new file mode 100644
index 0000000000..e77e4b4ed5
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi
@@ -0,0 +1,101 @@
+/*
+ * QorIQ QMan Portal device tree stub for 10 portals & 15 pool channels
+ *
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&qportals {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "simple-bus";
+
+	qportal0: qman-portal@0 {
+		compatible = "fsl,qman-portal";
+		reg = <0x0 0x4000>, <0x100000 0x1000>;
+		interrupts = <104 2 0 0>;
+		cell-index = <0x0>;
+	};
+	qportal1: qman-portal@4000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x4000 0x4000>, <0x101000 0x1000>;
+		interrupts = <106 2 0 0>;
+		cell-index = <1>;
+	};
+	qportal2: qman-portal@8000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x8000 0x4000>, <0x102000 0x1000>;
+		interrupts = <108 2 0 0>;
+		cell-index = <2>;
+	};
+	qportal3: qman-portal@c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0xc000 0x4000>, <0x103000 0x1000>;
+		interrupts = <110 2 0 0>;
+		cell-index = <3>;
+	};
+	qportal4: qman-portal@10000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x10000 0x4000>, <0x104000 0x1000>;
+		interrupts = <112 2 0 0>;
+		cell-index = <4>;
+	};
+	qportal5: qman-portal@14000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x14000 0x4000>, <0x105000 0x1000>;
+		interrupts = <114 2 0 0>;
+		cell-index = <5>;
+	};
+	qportal6: qman-portal@18000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x18000 0x4000>, <0x106000 0x1000>;
+		interrupts = <116 2 0 0>;
+		cell-index = <6>;
+	};
+
+	qportal7: qman-portal@1c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x1c000 0x4000>, <0x107000 0x1000>;
+		interrupts = <118 2 0 0>;
+		cell-index = <7>;
+	};
+	qportal8: qman-portal@20000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x20000 0x4000>, <0x108000 0x1000>;
+		interrupts = <120 2 0 0>;
+		cell-index = <8>;
+	};
+	qportal9: qman-portal@24000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x24000 0x4000>, <0x109000 0x1000>;
+		interrupts = <122 2 0 0>;
+		cell-index = <9>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-qman1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-qman1.dtsi
new file mode 100644
index 0000000000..0695778c43
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-qman1.dtsi
@@ -0,0 +1,41 @@
+/*
+ * QorIQ QMan device tree stub [ controller @ offset 0x318000 ]
+ *
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+qman: qman@318000 {
+	compatible = "fsl,qman";
+	reg = <0x318000 0x1000>;
+	interrupts = <16 2 1 3>;
+	fsl,qman-portals = <&qportals>;
+	memory-region = <&qman_fqd &qman_pfdr>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-qman3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-qman3.dtsi
new file mode 100644
index 0000000000..b379abd143
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-qman3.dtsi
@@ -0,0 +1,41 @@
+/*
+ * QorIQ QMan rev3 device tree stub [ controller @ offset 0x318000 ]
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+qman: qman@318000 {
+	compatible = "fsl,qman";
+	reg = <0x318000 0x2000>;
+	interrupts = <16 2 1 3>;
+	fsl,qman-portals = <&qportals>;
+	memory-region = <&qman_fqd &qman_pfdr>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-raid1.0-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-raid1.0-0.dtsi
new file mode 100644
index 0000000000..8d2e8aa6cf
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-raid1.0-0.dtsi
@@ -0,0 +1,85 @@
+/*
+ * QorIQ RAID 1.0 device tree stub [ controller @ offset 0x320000 ]
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+raideng: raideng@320000 {
+	compatible = "fsl,raideng-v1.0";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	reg = <0x320000 0x10000>;
+	ranges = <0 0x320000 0x10000>;
+
+	raideng_jq0@1000 {
+		compatible = "fsl,raideng-v1.0-job-queue";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x1000 0x1000>;
+		ranges = <0x0 0x1000 0x1000>;
+
+		raideng_jr0: jr@0 {
+			compatible = "fsl,raideng-v1.0-job-ring", "fsl,raideng-v1.0-hp-ring";
+			reg = <0x0 0x400>;
+			interrupts = <139 2 0 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		raideng_jr1: jr@400 {
+			compatible = "fsl,raideng-v1.0-job-ring", "fsl,raideng-v1.0-lp-ring";
+			reg = <0x400 0x400>;
+			interrupts = <140 2 0 0>;
+			interrupt-parent = <&mpic>;
+		};
+	};
+
+	raideng_jq1@2000 {
+		compatible = "fsl,raideng-v1.0-job-queue";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x2000 0x1000>;
+		ranges = <0x0 0x2000 0x1000>;
+
+		raideng_jr2: jr@0 {
+			compatible = "fsl,raideng-v1.0-job-ring", "fsl,raideng-v1.0-hp-ring";
+			reg = <0x0 0x400>;
+			interrupts = <141 2 0 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		raideng_jr3: jr@400 {
+			compatible = "fsl,raideng-v1.0-job-ring", "fsl,raideng-v1.0-lp-ring";
+			reg = <0x400 0x400>;
+			interrupts = <142 2 0 0>;
+			interrupt-parent = <&mpic>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-rmu-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-rmu-0.dtsi
new file mode 100644
index 0000000000..ca7fec792e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-rmu-0.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ RIO Message Unit device tree stub [ controller @ offset 0xd3000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+rmu: rmu@d3000 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,srio-rmu";
+	reg = <0xd3000 0x500>;
+	ranges = <0x0 0xd3000 0x500>;
+
+	message-unit@0 {
+		compatible = "fsl,srio-msg-unit";
+		reg = <0x0 0x100>;
+		interrupts = <
+			60 2 0 0  /* msg1_tx_irq */
+			61 2 0 0>;/* msg1_rx_irq */
+	};
+	message-unit@100 {
+		compatible = "fsl,srio-msg-unit";
+		reg = <0x100 0x100>;
+		interrupts = <
+			62 2 0 0  /* msg2_tx_irq */
+			63 2 0 0>;/* msg2_rx_irq */
+	};
+	doorbell-unit@400 {
+		compatible = "fsl,srio-dbell-unit";
+		reg = <0x400 0x80>;
+		interrupts = <
+			56 2 0 0  /* bell_outb_irq */
+			57 2 0 0>;/* bell_inb_irq */
+	};
+	port-write-unit@4e0 {
+		compatible = "fsl,srio-port-write-unit";
+		reg = <0x4e0 0x20>;
+		interrupts = <16 2 1 11>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-sata2-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-sata2-0.dtsi
new file mode 100644
index 0000000000..b642047fde
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-sata2-0.dtsi
@@ -0,0 +1,39 @@
+/*
+ * QorIQ SATAv2 device tree stub [ controller @ offset 0x220000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+sata@220000 {
+	compatible = "fsl,pq-sata-v2";
+	reg = <0x220000 0x1000>;
+	interrupts = <68 0x2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-sata2-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-sata2-1.dtsi
new file mode 100644
index 0000000000..c573702597
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-sata2-1.dtsi
@@ -0,0 +1,39 @@
+/*
+ * QorIQ SATAv2 device tree stub [ controller @ offset 0x221000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+sata@221000 {
+	compatible = "fsl,pq-sata-v2";
+	reg = <0x221000 0x1000>;
+	interrupts = <69 0x2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-sec4.0-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-sec4.0-0.dtsi
new file mode 100644
index 0000000000..02bee5fcbb
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-sec4.0-0.dtsi
@@ -0,0 +1,101 @@
+/*
+ * QorIQ Sec/Crypto 4.0 device tree stub [ controller @ offset 0x300000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+crypto: crypto@300000 {
+	compatible = "fsl,sec-v4.0";
+	fsl,sec-era = <1>;
+	#address-cells = <1>;
+	#size-cells = <1>;
+	reg = <0x300000 0x10000>;
+	ranges = <0 0x300000 0x10000>;
+	interrupts = <92 2 0 0>;
+
+	sec_jr0: jr@1000 {
+		compatible = "fsl,sec-v4.0-job-ring";
+		reg = <0x1000 0x1000>;
+		interrupts = <88 2 0 0>;
+	};
+
+	sec_jr1: jr@2000 {
+		compatible = "fsl,sec-v4.0-job-ring";
+		reg = <0x2000 0x1000>;
+		interrupts = <89 2 0 0>;
+	};
+
+	sec_jr2: jr@3000 {
+		compatible = "fsl,sec-v4.0-job-ring";
+		reg = <0x3000 0x1000>;
+		interrupts = <90 2 0 0>;
+	};
+
+	sec_jr3: jr@4000 {
+		compatible = "fsl,sec-v4.0-job-ring";
+		reg = <0x4000 0x1000>;
+		interrupts = <91 2 0 0>;
+	};
+
+	rtic@6000 {
+		compatible = "fsl,sec-v4.0-rtic";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x6000 0x100>;
+		ranges = <0x0 0x6100 0xe00>;
+
+		rtic_a: rtic-a@0 {
+			compatible = "fsl,sec-v4.0-rtic-memory";
+			reg = <0x00 0x20 0x100 0x80>;
+		};
+
+		rtic_b: rtic-b@20 {
+			compatible = "fsl,sec-v4.0-rtic-memory";
+			reg = <0x20 0x20 0x200 0x80>;
+		};
+
+		rtic_c: rtic-c@40 {
+			compatible = "fsl,sec-v4.0-rtic-memory";
+			reg = <0x40 0x20 0x300 0x80>;
+		};
+
+		rtic_d: rtic-d@60 {
+			compatible = "fsl,sec-v4.0-rtic-memory";
+			reg = <0x60 0x20 0x500 0x80>;
+		};
+	};
+};
+
+sec_mon: sec_mon@314000 {
+	compatible = "fsl,sec-v4.0-mon";
+	reg = <0x314000 0x1000>;
+	interrupts = <93 2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-sec4.2-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-sec4.2-0.dtsi
new file mode 100644
index 0000000000..7f7574e533
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-sec4.2-0.dtsi
@@ -0,0 +1,110 @@
+/*
+ * QorIQ Sec/Crypto 4.2 device tree stub [ controller @ offset 0x300000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+crypto: crypto@300000 {
+	compatible = "fsl,sec-v4.2", "fsl,sec-v4.0";
+	fsl,sec-era = <3>;
+	#address-cells = <1>;
+	#size-cells = <1>;
+	reg		 = <0x300000 0x10000>;
+	ranges		 = <0 0x300000 0x10000>;
+	interrupts	 = <92 2 0 0>;
+
+	sec_jr0: jr@1000 {
+		compatible = "fsl,sec-v4.2-job-ring",
+			     "fsl,sec-v4.0-job-ring";
+		reg = <0x1000 0x1000>;
+		interrupts = <88 2 0 0>;
+	};
+
+	sec_jr1: jr@2000 {
+		compatible = "fsl,sec-v4.2-job-ring",
+			     "fsl,sec-v4.0-job-ring";
+		reg = <0x2000 0x1000>;
+		interrupts = <89 2 0 0>;
+	};
+
+	sec_jr2: jr@3000 {
+		compatible = "fsl,sec-v4.2-job-ring",
+			     "fsl,sec-v4.0-job-ring";
+		reg = <0x3000 0x1000>;
+		interrupts = <90 2 0 0>;
+	};
+
+	sec_jr3: jr@4000 {
+		compatible = "fsl,sec-v4.2-job-ring",
+			     "fsl,sec-v4.0-job-ring";
+		reg = <0x4000 0x1000>;
+		interrupts = <91 2 0 0>;
+	};
+
+	rtic@6000 {
+		compatible = "fsl,sec-v4.2-rtic",
+			     "fsl,sec-v4.0-rtic";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x6000 0x100>;
+		ranges = <0x0 0x6100 0xe00>;
+
+		rtic_a: rtic-a@0 {
+			compatible = "fsl,sec-v4.2-rtic-memory",
+				     "fsl,sec-v4.0-rtic-memory";
+			reg = <0x00 0x20 0x100 0x80>;
+		};
+
+		rtic_b: rtic-b@20 {
+			compatible = "fsl,sec-v4.2-rtic-memory",
+				     "fsl,sec-v4.0-rtic-memory";
+			reg = <0x20 0x20 0x200 0x80>;
+		};
+
+		rtic_c: rtic-c@40 {
+			compatible = "fsl,sec-v4.2-rtic-memory",
+				     "fsl,sec-v4.0-rtic-memory";
+			reg = <0x40 0x20 0x300 0x80>;
+		};
+
+		rtic_d: rtic-d@60 {
+			compatible = "fsl,sec-v4.2-rtic-memory",
+				     "fsl,sec-v4.0-rtic-memory";
+			reg = <0x60 0x20 0x500 0x80>;
+		};
+	};
+};
+
+sec_mon: sec_mon@314000 {
+	compatible = "fsl,sec-v4.2-mon", "fsl,sec-v4.0-mon";
+	reg = <0x314000 0x1000>;
+	interrupts = <93 2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-sec5.0-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-sec5.0-0.dtsi
new file mode 100644
index 0000000000..e298efbb0f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-sec5.0-0.dtsi
@@ -0,0 +1,110 @@
+/*
+ * QorIQ Sec/Crypto 5.0 device tree stub [ controller @ offset 0x300000 ]
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+crypto: crypto@300000 {
+	compatible = "fsl,sec-v5.0", "fsl,sec-v4.0";
+	fsl,sec-era = <5>;
+	#address-cells = <1>;
+	#size-cells = <1>;
+	reg		 = <0x300000 0x10000>;
+	ranges		 = <0 0x300000 0x10000>;
+	interrupts	 = <92 2 0 0>;
+
+	sec_jr0: jr@1000 {
+		compatible = "fsl,sec-v5.0-job-ring",
+			     "fsl,sec-v4.0-job-ring";
+		reg = <0x1000 0x1000>;
+		interrupts = <88 2 0 0>;
+	};
+
+	sec_jr1: jr@2000 {
+		compatible = "fsl,sec-v5.0-job-ring",
+			     "fsl,sec-v4.0-job-ring";
+		reg = <0x2000 0x1000>;
+		interrupts = <89 2 0 0>;
+	};
+
+	sec_jr2: jr@3000 {
+		compatible = "fsl,sec-v5.0-job-ring",
+			     "fsl,sec-v4.0-job-ring";
+		reg = <0x3000 0x1000>;
+		interrupts = <90 2 0 0>;
+	};
+
+	sec_jr3: jr@4000 {
+		compatible = "fsl,sec-v5.0-job-ring",
+			     "fsl,sec-v4.0-job-ring";
+		reg = <0x4000 0x1000>;
+		interrupts = <91 2 0 0>;
+	};
+
+	rtic@6000 {
+		compatible = "fsl,sec-v5.0-rtic",
+			     "fsl,sec-v4.0-rtic";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x6000 0x100>;
+		ranges = <0x0 0x6100 0xe00>;
+
+		rtic_a: rtic-a@0 {
+			compatible = "fsl,sec-v5.0-rtic-memory",
+				     "fsl,sec-v4.0-rtic-memory";
+			reg = <0x00 0x20 0x100 0x80>;
+		};
+
+		rtic_b: rtic-b@20 {
+			compatible = "fsl,sec-v5.0-rtic-memory",
+				     "fsl,sec-v4.0-rtic-memory";
+			reg = <0x20 0x20 0x200 0x80>;
+		};
+
+		rtic_c: rtic-c@40 {
+			compatible = "fsl,sec-v5.0-rtic-memory",
+				     "fsl,sec-v4.0-rtic-memory";
+			reg = <0x40 0x20 0x300 0x80>;
+		};
+
+		rtic_d: rtic-d@60 {
+			compatible = "fsl,sec-v5.0-rtic-memory",
+				     "fsl,sec-v4.0-rtic-memory";
+			reg = <0x60 0x20 0x500 0x80>;
+		};
+	};
+};
+
+sec_mon: sec_mon@314000 {
+	compatible = "fsl,sec-v5.0-mon", "fsl,sec-v4.0-mon";
+	reg = <0x314000 0x1000>;
+	interrupts = <93 2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-sec5.2-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-sec5.2-0.dtsi
new file mode 100644
index 0000000000..33ff09d52e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-sec5.2-0.dtsi
@@ -0,0 +1,119 @@
+/*
+ * QorIQ Sec/Crypto 5.2 device tree stub [ controller @ offset 0x300000 ]
+ *
+ * Copyright 2011-2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+crypto: crypto@300000 {
+	compatible = "fsl,sec-v5.2", "fsl,sec-v5.0", "fsl,sec-v4.0";
+	fsl,sec-era = <5>;
+	#address-cells = <1>;
+	#size-cells = <1>;
+	reg		 = <0x300000 0x10000>;
+	ranges		 = <0 0x300000 0x10000>;
+	interrupts	 = <92 2 0 0>;
+
+	sec_jr0: jr@1000 {
+		compatible = "fsl,sec-v5.2-job-ring",
+			     "fsl,sec-v5.0-job-ring",
+			     "fsl,sec-v4.0-job-ring";
+		reg = <0x1000 0x1000>;
+		interrupts = <88 2 0 0>;
+	};
+
+	sec_jr1: jr@2000 {
+		compatible = "fsl,sec-v5.2-job-ring",
+			     "fsl,sec-v5.0-job-ring",
+			     "fsl,sec-v4.0-job-ring";
+		reg = <0x2000 0x1000>;
+		interrupts = <89 2 0 0>;
+	};
+
+	sec_jr2: jr@3000 {
+		compatible = "fsl,sec-v5.2-job-ring",
+			     "fsl,sec-v5.0-job-ring",
+			     "fsl,sec-v4.0-job-ring";
+		reg = <0x3000 0x1000>;
+		interrupts = <90 2 0 0>;
+	};
+
+	sec_jr3: jr@4000 {
+		compatible = "fsl,sec-v5.2-job-ring",
+			     "fsl,sec-v5.0-job-ring",
+			     "fsl,sec-v4.0-job-ring";
+		reg = <0x4000 0x1000>;
+		interrupts = <91 2 0 0>;
+	};
+
+	rtic@6000 {
+		compatible = "fsl,sec-v5.2-rtic",
+			     "fsl,sec-v5.0-rtic",
+			     "fsl,sec-v4.0-rtic";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x6000 0x100>;
+		ranges = <0x0 0x6100 0xe00>;
+
+		rtic_a: rtic-a@0 {
+			compatible = "fsl,sec-v5.2-rtic-memory",
+				     "fsl,sec-v5.0-rtic-memory",
+				     "fsl,sec-v4.0-rtic-memory";
+			reg = <0x00 0x20 0x100 0x80>;
+		};
+
+		rtic_b: rtic-b@20 {
+			compatible = "fsl,sec-v5.2-rtic-memory",
+				     "fsl,sec-v5.0-rtic-memory",
+				     "fsl,sec-v4.0-rtic-memory";
+			reg = <0x20 0x20 0x200 0x80>;
+		};
+
+		rtic_c: rtic-c@40 {
+			compatible = "fsl,sec-v5.2-rtic-memory",
+				     "fsl,sec-v5.0-rtic-memory",
+				     "fsl,sec-v4.0-rtic-memory";
+			reg = <0x40 0x20 0x300 0x80>;
+		};
+
+		rtic_d: rtic-d@60 {
+			compatible = "fsl,sec-v5.2-rtic-memory",
+				     "fsl,sec-v5.0-rtic-memory",
+				     "fsl,sec-v4.0-rtic-memory";
+			reg = <0x60 0x20 0x500 0x80>;
+		};
+	};
+};
+
+sec_mon: sec_mon@314000 {
+	compatible = "fsl,sec-v5.2-mon", "fsl,sec-v5.0-mon", "fsl,sec-v4.0-mon";
+	reg = <0x314000 0x1000>;
+	interrupts = <93 2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-sec5.3-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-sec5.3-0.dtsi
new file mode 100644
index 0000000000..08778221c1
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-sec5.3-0.dtsi
@@ -0,0 +1,119 @@
+/*
+ * QorIQ Sec/Crypto 5.3 device tree stub [ controller @ offset 0x300000 ]
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+crypto: crypto@300000 {
+	compatible = "fsl,sec-v5.3", "fsl,sec-v5.0", "fsl,sec-v4.0";
+	fsl,sec-era = <4>;
+	#address-cells = <1>;
+	#size-cells = <1>;
+	reg		 = <0x300000 0x10000>;
+	ranges		 = <0 0x300000 0x10000>;
+	interrupts	 = <92 2 0 0>;
+
+	sec_jr0: jr@1000 {
+		compatible = "fsl,sec-v5.3-job-ring",
+			     "fsl,sec-v5.0-job-ring",
+			     "fsl,sec-v4.0-job-ring";
+		reg = <0x1000 0x1000>;
+		interrupts = <88 2 0 0>;
+	};
+
+	sec_jr1: jr@2000 {
+		compatible = "fsl,sec-v5.3-job-ring",
+			     "fsl,sec-v5.0-job-ring",
+			     "fsl,sec-v4.0-job-ring";
+		reg = <0x2000 0x1000>;
+		interrupts = <89 2 0 0>;
+	};
+
+	sec_jr2: jr@3000 {
+		compatible = "fsl,sec-v5.3-job-ring",
+			     "fsl,sec-v5.0-job-ring",
+			     "fsl,sec-v4.0-job-ring";
+		reg = <0x3000 0x1000>;
+		interrupts = <90 2 0 0>;
+	};
+
+	sec_jr3: jr@4000 {
+		compatible = "fsl,sec-v5.3-job-ring",
+			     "fsl,sec-v5.0-job-ring",
+			     "fsl,sec-v4.0-job-ring";
+		reg = <0x4000 0x1000>;
+		interrupts = <91 2 0 0>;
+	};
+
+	rtic@6000 {
+		compatible = "fsl,sec-v5.3-rtic",
+			     "fsl,sec-v5.0-rtic",
+			     "fsl,sec-v4.0-rtic";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x6000 0x100>;
+		ranges = <0x0 0x6100 0xe00>;
+
+		rtic_a: rtic-a@0 {
+			compatible = "fsl,sec-v5.3-rtic-memory",
+				     "fsl,sec-v5.0-rtic-memory",
+				     "fsl,sec-v4.0-rtic-memory";
+			reg = <0x00 0x20 0x100 0x80>;
+		};
+
+		rtic_b: rtic-b@20 {
+			compatible = "fsl,sec-v5.3-rtic-memory",
+				     "fsl,sec-v5.0-rtic-memory",
+				     "fsl,sec-v4.0-rtic-memory";
+			reg = <0x20 0x20 0x200 0x80>;
+		};
+
+		rtic_c: rtic-c@40 {
+			compatible = "fsl,sec-v5.3-rtic-memory",
+				     "fsl,sec-v5.0-rtic-memory",
+				     "fsl,sec-v4.0-rtic-memory";
+			reg = <0x40 0x20 0x300 0x80>;
+		};
+
+		rtic_d: rtic-d@60 {
+			compatible = "fsl,sec-v5.3-rtic-memory",
+				     "fsl,sec-v5.0-rtic-memory",
+				     "fsl,sec-v4.0-rtic-memory";
+			reg = <0x60 0x20 0x500 0x80>;
+		};
+	};
+};
+
+sec_mon: sec_mon@314000 {
+	compatible = "fsl,sec-v5.3-mon", "fsl,sec-v5.0-mon", "fsl,sec-v4.0-mon";
+	reg = <0x314000 0x1000>;
+	interrupts = <93 2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-sec6.0-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-sec6.0-0.dtsi
new file mode 100644
index 0000000000..7d4a6a2354
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-sec6.0-0.dtsi
@@ -0,0 +1,57 @@
+/*
+ * QorIQ Sec/Crypto 6.0 device tree stub
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+	compatible = "fsl,sec-v6.0", "fsl,sec-v5.0",
+		     "fsl,sec-v4.0";
+	fsl,sec-era = <6>;
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	jr@1000 {
+		compatible = "fsl,sec-v6.0-job-ring",
+			     "fsl,sec-v5.2-job-ring",
+			     "fsl,sec-v5.0-job-ring",
+			     "fsl,sec-v4.4-job-ring",
+			     "fsl,sec-v4.0-job-ring";
+		reg	   = <0x1000 0x1000>;
+	};
+
+	jr@2000 {
+		compatible = "fsl,sec-v6.0-job-ring",
+			     "fsl,sec-v5.2-job-ring",
+			     "fsl,sec-v5.0-job-ring",
+			     "fsl,sec-v4.4-job-ring",
+			     "fsl,sec-v4.0-job-ring";
+		reg	   = <0x2000 0x1000>;
+	};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-usb2-dr-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-usb2-dr-0.dtsi
new file mode 100644
index 0000000000..4dd6f84c23
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-usb2-dr-0.dtsi
@@ -0,0 +1,41 @@
+/*
+ * QorIQ USB DR device tree stub [ controller @ offset 0x211000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+usb@211000 {
+	compatible = "fsl,mpc85xx-usb2-dr", "fsl-usb2-dr";
+	reg = <0x211000 0x1000>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+	interrupts = <45 0x2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-usb2-mph-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-usb2-mph-0.dtsi
new file mode 100644
index 0000000000..f053835aa1
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-usb2-mph-0.dtsi
@@ -0,0 +1,41 @@
+/*
+ * QorIQ USB Host device tree stub [ controller @ offset 0x210000 ]
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+usb@210000 {
+	compatible = "fsl,mpc85xx-usb2-mph", "fsl-usb2-mph";
+	reg = <0x210000 0x1000>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+	interrupts = <44 0x2 0 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/t1023rdb.dts b/arch/powerpc/boot/dts/fsl/t1023rdb.dts
new file mode 100644
index 0000000000..f82f85c659
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1023rdb.dts
@@ -0,0 +1,232 @@
+/*
+ * T1023 RDB Device Tree Source
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t102xsi-pre.dtsi"
+
+/ {
+	model = "fsl,T1023RDB";
+	compatible = "fsl,T1023RDB";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  1 0 0xf 0xff800000 0x00010000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			status = "disabled";
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x1 0x0 0x10000>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
+	qportals: qman-portals@ff6000000 {
+		ranges = <0x0 0xf 0xf6000000 0x2000000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "spansion,s25fl512s", "jedec,spi-nor";
+				reg = <0>;
+				spi-max-frequency = <10000000>; /* input clk */
+			};
+		};
+
+		i2c@118000 {
+			eeprom@50 {
+				compatible = "st,m24256";
+				reg = <0x50>;
+			};
+
+			rtc@68 {
+				compatible = "dallas,ds1339";
+				reg = <0x68>;
+				interrupts = <0x5 0x1 0 0>;
+			};
+		};
+
+		i2c@118100 {
+			current-sensor@40 {
+				compatible = "ti,ina220";
+				reg = <0x40>;
+				shunt-resistor = <1000>;
+			};
+
+			current-sensor@41 {
+				compatible = "ti,ina220";
+				reg = <0x41>;
+				shunt-resistor = <1000>;
+			};
+		};
+
+		fman@400000 {
+			fm1mac1: ethernet@e0000 {
+				phy-handle = <&sgmii_rtk_phy2>;
+				phy-connection-type = "sgmii";
+				sleep = <&rcpm 0x80000000>;
+			};
+
+			fm1mac2: ethernet@e2000 {
+				sleep = <&rcpm 0x40000000>;
+			};
+
+			fm1mac3: ethernet@e4000 {
+				phy-handle = <&sgmii_aqr_phy3>;
+				phy-connection-type = "2500base-x";
+				sleep = <&rcpm 0x20000000>;
+			};
+
+			fm1mac4: ethernet@e6000 {
+				phy-handle = <&rgmii_rtk_phy1>;
+				phy-connection-type = "rgmii";
+				sleep = <&rcpm 0x10000000>;
+			};
+
+
+			mdio0: mdio@fc000 {
+				rgmii_rtk_phy1: ethernet-phy@1 {
+					reg = <0x1>;
+				};
+				sgmii_rtk_phy2: ethernet-phy@3 {
+					reg = <0x3>;
+				};
+			};
+
+			xmdio0: mdio@fd000 {
+				sgmii_aqr_phy3: ethernet-phy@2 {
+					compatible = "ethernet-phy-ieee802.3-c45";
+					reg = <0x2>;
+				};
+			};
+		};
+	};
+
+	pci0: pcie@ffe240000 {
+		reg = <0xf 0xfe240000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe250000 {
+		reg = <0xf 0xfe250000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8010000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe260000 {
+		reg = <0xf 0xfe260000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+};
+
+#include "t1023si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
new file mode 100644
index 0000000000..d552044c5a
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
@@ -0,0 +1,522 @@
+/*
+ * T1023 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <dt-bindings/thermal/thermal.h>
+
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_fqd {
+	compatible = "fsl,qman-fqd";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+	compatible = "fsl,qman-pfdr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&ifc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,ifc", "simple-bus";
+	interrupts = <25 2 0 0>;
+};
+
+&pci0 {
+	compatible = "fsl,t1023-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <20 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <20 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 40 1 0 0
+			0000 0 0 2 &mpic 1 1 0 0
+			0000 0 0 3 &mpic 2 1 0 0
+			0000 0 0 4 &mpic 3 1 0 0
+			>;
+	};
+};
+
+&pci1 {
+	compatible = "fsl,t1023-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 0xff>;
+	interrupts = <21 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <21 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 41 1 0 0
+			0000 0 0 2 &mpic 5 1 0 0
+			0000 0 0 3 &mpic 6 1 0 0
+			0000 0 0 4 &mpic 7 1 0 0
+			>;
+	};
+};
+
+&pci2 {
+	compatible = "fsl,t1023-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <22 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <22 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 42 1 0 0
+			0000 0 0 2 &mpic 9 1 0 0
+			0000 0 0 3 &mpic 10 1 0 0
+			0000 0 0 4 &mpic 11 1 0 0
+			>;
+	};
+};
+
+&dcsr {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,dcsr", "simple-bus";
+
+	dcsr-epu@0 {
+		compatible = "fsl,t1023-dcsr-epu", "fsl,dcsr-epu";
+		interrupts = <52 2 0 0
+			      84 2 0 0
+			      85 2 0 0>;
+		reg = <0x0 0x1000>;
+	};
+	dcsr-npc {
+		compatible = "fsl,t1023-dcsr-cnpc", "fsl,dcsr-cnpc";
+		reg = <0x1000 0x1000 0x1002000 0x10000>;
+	};
+	dcsr-nxc@2000 {
+		compatible = "fsl,dcsr-nxc";
+		reg = <0x2000 0x1000>;
+	};
+	dcsr-corenet {
+		compatible = "fsl,dcsr-corenet";
+		reg = <0x8000 0x1000 0x1A000 0x1000>;
+	};
+	dcsr-ocn@11000 {
+		compatible = "fsl,t1023-dcsr-ocn", "fsl,dcsr-ocn";
+		reg = <0x11000 0x1000>;
+	};
+	dcsr-ddr@12000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr1>;
+		reg = <0x12000 0x1000>;
+	};
+	dcsr-nal@18000 {
+		compatible = "fsl,t1023-dcsr-nal", "fsl,dcsr-nal";
+		reg = <0x18000 0x1000>;
+	};
+	dcsr-rcpm@22000 {
+		compatible = "fsl,t1023-dcsr-rcpm", "fsl,dcsr-rcpm";
+		reg = <0x22000 0x1000>;
+	};
+	dcsr-snpc@30000 {
+		compatible = "fsl,t1023-dcsr-snpc", "fsl,dcsr-snpc";
+		reg = <0x30000 0x1000 0x1022000 0x10000>;
+	};
+	dcsr-snpc@31000 {
+		compatible = "fsl,t1023-dcsr-snpc", "fsl,dcsr-snpc";
+		reg = <0x31000 0x1000 0x1042000 0x10000>;
+	};
+	dcsr-cpu-sb-proxy@100000 {
+		compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu0>;
+		reg = <0x100000 0x1000 0x101000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@108000 {
+		compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu1>;
+		reg = <0x108000 0x1000 0x109000 0x1000>;
+	};
+};
+
+&bportals {
+	#address-cells = <0x1>;
+	#size-cells = <0x1>;
+	compatible = "simple-bus";
+
+	bman-portal@0 {
+		cell-index = <0x0>;
+		compatible = "fsl,bman-portal";
+		reg = <0x0 0x4000>, <0x1000000 0x1000>;
+		interrupts = <105 2 0 0>;
+	};
+	bman-portal@4000 {
+		cell-index = <0x1>;
+		compatible = "fsl,bman-portal";
+		reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+		interrupts = <107 2 0 0>;
+	};
+	bman-portal@8000 {
+		cell-index = <2>;
+		compatible = "fsl,bman-portal";
+		reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+		interrupts = <109 2 0 0>;
+	};
+	bman-portal@c000 {
+		cell-index = <0x3>;
+		compatible = "fsl,bman-portal";
+		reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+		interrupts = <111 2 0 0>;
+	};
+	bman-portal@10000 {
+		cell-index = <0x4>;
+		compatible = "fsl,bman-portal";
+		reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+		interrupts = <113 2 0 0>;
+	};
+	bman-portal@14000 {
+		cell-index = <0x5>;
+		compatible = "fsl,bman-portal";
+		reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+		interrupts = <115 2 0 0>;
+	};
+};
+
+&qportals {
+	#address-cells = <0x1>;
+	#size-cells = <0x1>;
+	compatible = "simple-bus";
+
+	qportal0: qman-portal@0 {
+		compatible = "fsl,qman-portal";
+		reg = <0x0 0x4000>, <0x1000000 0x1000>;
+		interrupts = <104 0x2 0 0>;
+		cell-index = <0x0>;
+	};
+	qportal1: qman-portal@4000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+		interrupts = <106 0x2 0 0>;
+		cell-index = <0x1>;
+	};
+	qportal2: qman-portal@8000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+		interrupts = <108 0x2 0 0>;
+		cell-index = <0x2>;
+	};
+	qportal3: qman-portal@c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+		interrupts = <110 0x2 0 0>;
+		cell-index = <0x3>;
+	};
+	qportal4: qman-portal@10000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+		interrupts = <112 0x2 0 0>;
+		cell-index = <0x4>;
+	};
+	qportal5: qman-portal@14000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+		interrupts = <114 0x2 0 0>;
+		cell-index = <0x5>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "simple-bus";
+
+	soc-sram-error {
+		compatible = "fsl,soc-sram-error";
+		interrupts = <16 2 1 29>;
+	};
+
+	corenet-law@0 {
+		compatible = "fsl,corenet-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <16>;
+	};
+
+	ddr1: memory-controller@8000 {
+		compatible = "fsl,qoriq-memory-controller-v5.0",
+				"fsl,qoriq-memory-controller";
+		reg = <0x8000 0x1000>;
+		interrupts = <16 2 1 23>;
+	};
+
+	cpc: l3-cache-controller@10000 {
+		compatible = "fsl,t1023-l3-cache-controller", "cache";
+		reg = <0x10000 0x1000>;
+		interrupts = <16 2 1 27>;
+	};
+
+	corenet-cf@18000 {
+		compatible = "fsl,corenet2-cf";
+		reg = <0x18000 0x1000>;
+		interrupts = <16 2 1 31>;
+	};
+
+	iommu@20000 {
+		compatible = "fsl,pamu-v1.0", "fsl,pamu";
+		reg = <0x20000 0x1000>;
+		ranges = <0 0x20000 0x1000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupts = <
+			24 2 0 0
+			16 2 1 30>;
+		pamu0: pamu@0 {
+			reg = <0 0x1000>;
+			fsl,primary-cache-geometry = <128 1>;
+			fsl,secondary-cache-geometry = <32 2>;
+		};
+	};
+
+/include/ "qoriq-mpic.dtsi"
+
+	guts: global-utilities@e0000 {
+		compatible = "fsl,t1023-device-config", "fsl,qoriq-device-config-2.0";
+		reg = <0xe0000 0xe00>;
+		fsl,has-rstcr;
+		fsl,liodn-bits = <12>;
+	};
+
+/include/ "qoriq-clockgen2.dtsi"
+	global-utilities@e1000 {
+		compatible = "fsl,t1023-clockgen", "fsl,qoriq-clockgen-2.0";
+	};
+
+	rcpm: global-utilities@e2000 {
+		compatible = "fsl,t1023-rcpm", "fsl,qoriq-rcpm-2.1";
+		reg = <0xe2000 0x1000>;
+	};
+
+	sfp: sfp@e8000 {
+		compatible = "fsl,t1023-sfp";
+		reg = <0xe8000 0x1000>;
+	};
+
+	serdes: serdes@ea000 {
+		compatible = "fsl,t1023-serdes";
+		reg = <0xea000 0x4000>;
+	};
+
+	tmu: tmu@f0000 {
+		compatible = "fsl,qoriq-tmu";
+		reg = <0xf0000 0x1000>;
+		interrupts = <18 2 0 0>;
+		fsl,tmu-range = <0xb0000 0xa0026 0x80048 0x30061>;
+		fsl,tmu-calibration = <0x00000000 0x0000000f
+				       0x00000001 0x00000017
+				       0x00000002 0x0000001e
+				       0x00000003 0x00000026
+				       0x00000004 0x0000002e
+				       0x00000005 0x00000035
+				       0x00000006 0x0000003d
+				       0x00000007 0x00000044
+				       0x00000008 0x0000004c
+				       0x00000009 0x00000053
+				       0x0000000a 0x0000005b
+				       0x0000000b 0x00000064
+
+				       0x00010000 0x00000011
+				       0x00010001 0x0000001c
+				       0x00010002 0x00000024
+				       0x00010003 0x0000002b
+				       0x00010004 0x00000034
+				       0x00010005 0x00000039
+				       0x00010006 0x00000042
+				       0x00010007 0x0000004c
+				       0x00010008 0x00000051
+				       0x00010009 0x0000005a
+				       0x0001000a 0x00000063
+
+				       0x00020000 0x00000013
+				       0x00020001 0x00000019
+				       0x00020002 0x00000024
+				       0x00020003 0x0000002c
+				       0x00020004 0x00000035
+				       0x00020005 0x0000003d
+				       0x00020006 0x00000046
+				       0x00020007 0x00000050
+				       0x00020008 0x00000059
+
+				       0x00030000 0x00000002
+				       0x00030001 0x0000000d
+				       0x00030002 0x00000019
+				       0x00030003 0x00000024>;
+		#thermal-sensor-cells = <1>;
+	};
+
+	thermal-zones {
+		cpu_thermal: cpu-thermal {
+			polling-delay-passive = <1000>;
+			polling-delay = <5000>;
+
+			thermal-sensors = <&tmu 0>;
+
+			trips {
+				cpu_alert: cpu-alert {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				cpu_crit: cpu-crit {
+					temperature = <95000>;
+					hysteresis = <2000>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+				map0 {
+					trip = <&cpu_alert>;
+					cooling-device =
+						<&cpu0 THERMAL_NO_LIMIT
+							THERMAL_NO_LIMIT>;
+				};
+				map1 {
+					trip = <&cpu_alert>;
+					cooling-device =
+						<&cpu1 THERMAL_NO_LIMIT
+							THERMAL_NO_LIMIT>;
+				};
+			};
+		};
+	};
+
+	scfg: global-utilities@fc000 {
+		compatible = "fsl,t1023-scfg";
+		reg = <0xfc000 0x1000>;
+	};
+
+/include/ "elo3-dma-0.dtsi"
+/include/ "elo3-dma-1.dtsi"
+
+/include/ "qoriq-espi-0.dtsi"
+	spi@110000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "qoriq-esdhc-0.dtsi"
+	sdhc@114000 {
+		compatible = "fsl,t1023-esdhc", "fsl,esdhc";
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */
+		sdhci,auto-cmd12;
+		no-1-8-v;
+	};
+/include/ "qoriq-i2c-0.dtsi"
+/include/ "qoriq-i2c-1.dtsi"
+/include/ "qoriq-duart-0.dtsi"
+/include/ "qoriq-duart-1.dtsi"
+/include/ "qoriq-gpio-0.dtsi"
+/include/ "qoriq-gpio-1.dtsi"
+/include/ "qoriq-gpio-2.dtsi"
+/include/ "qoriq-gpio-3.dtsi"
+/include/ "qoriq-usb2-mph-0.dtsi"
+	usb0: usb@210000 {
+		compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph";
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */
+		phy_type = "utmi";
+		port0;
+	};
+/include/ "qoriq-usb2-dr-0.dtsi"
+	usb1: usb@211000 {
+		compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr";
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x524>; /* USB2LIODNR */
+		dr_mode = "host";
+		phy_type = "utmi";
+	};
+/include/ "qoriq-sata2-0.dtsi"
+	sata@220000 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x550>; /* SATA1LIODNR */
+	};
+
+/include/ "qoriq-sec5.0-0.dtsi"
+/include/ "qoriq-qman3.dtsi"
+/include/ "qoriq-bman1.dtsi"
+
+/include/ "qoriq-fman3l-0.dtsi"
+/include/ "qoriq-fman3-0-10g-0-best-effort.dtsi"
+/include/ "qoriq-fman3-0-1g-1.dtsi"
+/include/ "qoriq-fman3-0-1g-2.dtsi"
+/include/ "qoriq-fman3-0-1g-3.dtsi"
+	fman@400000 {
+		enet0: ethernet@e0000 {
+		};
+
+		enet1: ethernet@e2000 {
+		};
+
+		enet2: ethernet@e4000 {
+		};
+
+		enet3: ethernet@e6000 {
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t1024qds.dts b/arch/powerpc/boot/dts/fsl/t1024qds.dts
new file mode 100644
index 0000000000..9ea7942f91
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1024qds.dts
@@ -0,0 +1,280 @@
+/*
+ * T1024 QDS Device Tree Source
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t102xsi-pre.dtsi"
+
+/ {
+	model = "fsl,T1024QDS";
+	compatible = "fsl,T1024QDS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xff800000 0x00010000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x2 0x0 0x10000>;
+		};
+
+		board-control@3,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,tetra-fpga", "fsl,fpga-qixis";
+			reg = <3 0 0x300>;
+			ranges = <0 3 0 0x300>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
+	qportals: qman-portals@ff6000000 {
+		ranges = <0x0 0xf 0xf6000000 0x2000000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "micron,n25q128a11", "jedec,spi-nor";  /* 16MB */
+				reg = <0>;
+				spi-max-frequency = <10000000>;
+			};
+
+			flash@1 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "sst,sst25wf040", "jedec,spi-nor";  /* 512KB */
+				reg = <1>;
+				spi-max-frequency = <10000000>;
+			};
+
+			flash@2 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "eon,en25s64", "jedec,spi-nor";   /* 8MB */
+				reg = <2>;
+				spi-max-frequency = <10000000>;
+			};
+
+			slic@2 {
+				compatible = "maxim,ds26522";
+				reg = <2>;
+				spi-max-frequency = <2000000>;
+			};
+
+			slic@3 {
+				compatible = "maxim,ds26522";
+				reg = <3>;
+				spi-max-frequency = <2000000>;
+			};
+		};
+
+		i2c@118000 {
+			i2c-mux@77 {
+				compatible = "nxp,pca9547";
+				reg = <0x77>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				i2c@0 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x0>;
+
+					eeprom@50 {
+						compatible = "atmel,24c512";
+						reg = <0x50>;
+					};
+
+					eeprom@51 {
+						compatible = "atmel,24c02";
+						reg = <0x51>;
+					};
+
+					eeprom@57 {
+						compatible = "atmel,24c02";
+						reg = <0x57>;
+					};
+				};
+
+				i2c@2 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x2>;
+
+					ina220@40 {
+						compatible = "ti,ina220";
+						reg = <0x40>;
+						shunt-resistor = <1000>;
+					};
+
+					ina220@41 {
+						compatible = "ti,ina220";
+						reg = <0x41>;
+						shunt-resistor = <1000>;
+					};
+				};
+
+				i2c@3 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x3>;
+
+					adt7461@4c {
+						/* Thermal Monitor */
+						compatible = "adi,adt7461";
+						reg = <0x4c>;
+					};
+
+					eeprom@55 {
+						compatible = "atmel,24c02";
+						reg = <0x55>;
+					};
+
+					eeprom@56 {
+						compatible = "atmel,24c512";
+						reg = <0x56>;
+					};
+
+					eeprom@57 {
+						compatible = "atmel,24c512";
+						reg = <0x57>;
+					};
+				};
+			};
+			rtc@68 {
+				compatible = "dallas,ds3232";
+				reg = <0x68>;
+				interrupts = <0x5 0x1 0 0>;
+			};
+		};
+	};
+
+	pci0: pcie@ffe240000 {
+		reg = <0xf 0xfe240000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe250000 {
+		reg = <0xf 0xfe250000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8010000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe260000 {
+		reg = <0xf 0xfe260000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+};
+
+#include "t1024si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
new file mode 100644
index 0000000000..270aaf631f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
@@ -0,0 +1,268 @@
+/*
+ * T1024 RDB Device Tree Source
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t102xsi-pre.dtsi"
+
+/ {
+	model = "fsl,T1024RDB";
+	compatible = "fsl,T1024RDB";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		sg_2500_aqr105_phy4 = &sg_2500_aqr105_phy4;
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xff800000 0x00010000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x2 0x0 0x10000>;
+		};
+
+		board-control@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,t1024-cpld";
+			reg = <3 0 0x300>;
+			ranges = <0 3 0 0x300>;
+			bank-width = <1>;
+			device-width = <1>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
+	qportals: qman-portals@ff6000000 {
+		ranges = <0x0 0xf 0xf6000000 0x2000000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "micron,n25q512ax3", "jedec,spi-nor";
+				reg = <0>;
+				spi-max-frequency = <10000000>; /* input clk */
+			};
+
+			slic@1 {
+				compatible = "maxim,ds26522";
+				reg = <1>;
+				spi-max-frequency = <2000000>;
+			};
+
+			slic@2 {
+				compatible = "maxim,ds26522";
+				reg = <2>;
+				spi-max-frequency = <2000000>;
+			};
+		};
+
+		i2c@118000 {
+			adt7461@4c {
+				/* Thermal Monitor */
+				compatible = "adi,adt7461";
+				reg = <0x4c>;
+			};
+
+			current-sensor@40 {
+				compatible = "ti,ina220";
+				reg = <0x40>;
+				shunt-resistor = <1000>;
+			};
+
+			eeprom@50 {
+				compatible = "atmel,24c256";
+				reg = <0x50>;
+			};
+
+			rtc@68 {
+				compatible = "dallas,ds1339";
+				reg = <0x68>;
+			};
+		};
+
+		i2c@118100 {
+			i2c-mux@77 {
+				compatible = "nxp,pca9546";
+				reg = <0x77>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+			};
+		};
+
+		fman@400000 {
+			fm1mac1: ethernet@e0000 {
+				phy-handle = <&xg_aqr105_phy3>;
+				phy-connection-type = "xgmii";
+				sleep = <&rcpm 0x80000000>;
+			};
+
+			fm1mac2: ethernet@e2000 {
+				sleep = <&rcpm 0x40000000>;
+			};
+
+			fm1mac3: ethernet@e4000 {
+				phy-handle = <&rgmii_phy2>;
+				phy-connection-type = "rgmii";
+				sleep = <&rcpm 0x20000000>;
+			};
+
+			fm1mac4: ethernet@e6000 {
+				phy-handle = <&rgmii_phy1>;
+				phy-connection-type = "rgmii";
+				sleep = <&rcpm 0x10000000>;
+			};
+
+
+			mdio0: mdio@fc000 {
+				rgmii_phy1: ethernet-phy@2 {
+					reg = <0x2>;
+				};
+				rgmii_phy2: ethernet-phy@6 {
+					reg = <0x6>;
+				};
+			};
+
+			xmdio0: mdio@fd000 {
+				xg_aqr105_phy3: ethernet-phy@1 {
+					compatible = "ethernet-phy-ieee802.3-c45";
+					reg = <0x1>;
+				};
+				sg_2500_aqr105_phy4: ethernet-phy@2 {
+					compatible = "ethernet-phy-ieee802.3-c45";
+					reg = <0x2>;
+				};
+			};
+		};
+	};
+
+	pci0: pcie@ffe240000 {
+		reg = <0xf 0xfe240000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe250000 {
+		reg = <0xf 0xfe250000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8010000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe260000 {
+		reg = <0xf 0xfe260000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+};
+
+#include "t1024si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi
new file mode 100644
index 0000000000..bb480346a5
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi
@@ -0,0 +1,100 @@
+/*
+ * T1024 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "t1023si-post.dtsi"
+
+/ {
+	aliases {
+		vga = &display;
+		display = &display;
+	};
+
+	qe:qe@ffe140000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "qe";
+		compatible = "fsl,qe";
+		ranges = <0x0 0xf 0xfe140000 0x40000>;
+		reg = <0xf 0xfe140000 0 0x480>;
+		fsl,qe-num-riscs = <1>;
+		fsl,qe-num-snums = <28>;
+		brg-frequency = <0>;
+		bus-frequency = <0>;
+	};
+};
+
+&soc {
+	display:display@180000 {
+		compatible = "fsl,t1024-diu", "fsl,diu";
+		reg = <0x180000 1000>;
+		interrupts = <74 2 0 0>;
+	};
+};
+
+&qe {
+	qeic: interrupt-controller@80 {
+		interrupt-controller;
+		compatible = "fsl,qe-ic";
+		#address-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x80 0x80>;
+		interrupts = <95 2 0 0  94 2 0 0>; //high:79 low:78
+	};
+
+	ucc@2000 {
+		cell-index = <1>;
+		reg = <0x2000 0x200>;
+		interrupts = <32>;
+		interrupt-parent = <&qeic>;
+	};
+
+	ucc@2200 {
+		cell-index = <3>;
+		reg = <0x2200 0x200>;
+		interrupts = <34>;
+		interrupt-parent = <&qeic>;
+	};
+
+	muram@10000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,qe-muram", "fsl,cpm-muram";
+		ranges = <0x0 0x10000 0x6000>;
+
+		data-only@0 {
+			compatible = "fsl,qe-muram-data", "fsl,cpm-muram-data";
+			reg = <0x0 0x6000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi b/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi
new file mode 100644
index 0000000000..d87ea13164
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi
@@ -0,0 +1,95 @@
+/*
+ * T1024/T1023 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e5500_power_isa.dtsi"
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		ccsr = &soc;
+		dcsr = &dcsr;
+
+		dma0 = &dma0;
+		dma1 = &dma1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+		usb0 = &usb0;
+		usb1 = &usb1;
+		sdhc = &sdhc;
+
+		crypto = &crypto;
+
+		fman0 = &fman0;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,e5500@0 {
+			device_type = "cpu";
+			reg = <0>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_1>;
+			#cooling-cells = <2>;
+			L2_1: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu1: PowerPC,e5500@1 {
+			device_type = "cpu";
+			reg = <1>;
+			clocks = <&clockgen 1 1>;
+			next-level-cache = <&L2_2>;
+			#cooling-cells = <2>;
+			L2_2: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t1040d4rdb.dts b/arch/powerpc/boot/dts/fsl/t1040d4rdb.dts
new file mode 100644
index 0000000000..fb6bc02ebb
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1040d4rdb.dts
@@ -0,0 +1,46 @@
+/*
+ * T1040D4RDB Device Tree Source
+ *
+ * Copyright 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t104xsi-pre.dtsi"
+/include/ "t104xd4rdb.dtsi"
+
+/ {
+	model = "fsl,T1040D4RDB";
+	compatible = "fsl,T1040D4RDB";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+};
+
+#include "t1040si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1040qds.dts b/arch/powerpc/boot/dts/fsl/t1040qds.dts
new file mode 100644
index 0000000000..5f76edc783
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1040qds.dts
@@ -0,0 +1,46 @@
+/*
+ * T1040QDS Device Tree Source
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t104xsi-pre.dtsi"
+/include/ "t104xqds.dtsi"
+
+/ {
+	model = "fsl,T1040QDS";
+	compatible = "fsl,T1040QDS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+};
+
+#include "t1040si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts
new file mode 100644
index 0000000000..d4f5f159d6
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * T1040RDB-REV-A Device Tree Source
+ *
+ * Copyright 2014 - 2015 Freescale Semiconductor Inc.
+ *
+ */
+
+#include "t1040rdb.dts"
+
+/ {
+	model = "fsl,T1040RDB-REV-A";
+};
+
+&seville_port0 {
+	label = "ETH5";
+};
+
+&seville_port2 {
+	label = "ETH7";
+};
+
+&seville_port4 {
+	label = "ETH9";
+};
+
+&seville_port6 {
+	label = "ETH11";
+};
diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
new file mode 100644
index 0000000000..dd3aab81e9
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
@@ -0,0 +1,188 @@
+/*
+ * T1040RDB Device Tree Source
+ *
+ * Copyright 2014 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t104xsi-pre.dtsi"
+/include/ "t104xrdb.dtsi"
+
+/ {
+	model = "fsl,T1040RDB";
+	compatible = "fsl,T1040RDB";
+
+	aliases {
+		phy_sgmii_2 = &phy_sgmii_2;
+	};
+
+	soc@ffe000000 {
+		fman@400000 {
+			ethernet@e0000 {
+				fixed-link = <0 1 1000 0 0>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e2000 {
+				fixed-link = <1 1 1000 0 0>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e4000 {
+				phy-handle = <&phy_sgmii_2>;
+				phy-connection-type = "sgmii";
+			};
+
+			mdio@fc000 {
+				phy_sgmii_2: ethernet-phy@3 {
+					reg = <0x03>;
+				};
+
+				/* VSC8514 QSGMII PHY */
+				phy_qsgmii_0: ethernet-phy@4 {
+					reg = <0x4>;
+				};
+
+				phy_qsgmii_1: ethernet-phy@5 {
+					reg = <0x5>;
+				};
+
+				phy_qsgmii_2: ethernet-phy@6 {
+					reg = <0x6>;
+				};
+
+				phy_qsgmii_3: ethernet-phy@7 {
+					reg = <0x7>;
+				};
+
+				/* VSC8514 QSGMII PHY */
+				phy_qsgmii_4: ethernet-phy@8 {
+					reg = <0x8>;
+				};
+
+				phy_qsgmii_5: ethernet-phy@9 {
+					reg = <0x9>;
+				};
+
+				phy_qsgmii_6: ethernet-phy@a {
+					reg = <0xa>;
+				};
+
+				phy_qsgmii_7: ethernet-phy@b {
+					reg = <0xb>;
+				};
+			};
+		};
+	};
+
+	ifc: localbus@ffe124000 {
+		cpld@3,0 {
+			compatible = "fsl,t1040rdb-cpld";
+		};
+	};
+};
+
+#include "t1040si-post.dtsi"
+
+&seville_switch {
+	status = "okay";
+};
+
+&seville_port0 {
+	managed = "in-band-status";
+	phy-handle = <&phy_qsgmii_0>;
+	phy-mode = "qsgmii";
+	label = "ETH3";
+	status = "okay";
+};
+
+&seville_port1 {
+	managed = "in-band-status";
+	phy-handle = <&phy_qsgmii_1>;
+	phy-mode = "qsgmii";
+	label = "ETH4";
+	status = "okay";
+};
+
+&seville_port2 {
+	managed = "in-band-status";
+	phy-handle = <&phy_qsgmii_2>;
+	phy-mode = "qsgmii";
+	label = "ETH5";
+	status = "okay";
+};
+
+&seville_port3 {
+	managed = "in-band-status";
+	phy-handle = <&phy_qsgmii_3>;
+	phy-mode = "qsgmii";
+	label = "ETH6";
+	status = "okay";
+};
+
+&seville_port4 {
+	managed = "in-band-status";
+	phy-handle = <&phy_qsgmii_4>;
+	phy-mode = "qsgmii";
+	label = "ETH7";
+	status = "okay";
+};
+
+&seville_port5 {
+	managed = "in-band-status";
+	phy-handle = <&phy_qsgmii_5>;
+	phy-mode = "qsgmii";
+	label = "ETH8";
+	status = "okay";
+};
+
+&seville_port6 {
+	managed = "in-band-status";
+	phy-handle = <&phy_qsgmii_6>;
+	phy-mode = "qsgmii";
+	label = "ETH9";
+	status = "okay";
+};
+
+&seville_port7 {
+	managed = "in-band-status";
+	phy-handle = <&phy_qsgmii_7>;
+	phy-mode = "qsgmii";
+	label = "ETH10";
+	status = "okay";
+};
+
+&seville_port8 {
+	status = "okay";
+};
+
+&seville_port9 {
+	status = "okay";
+};
diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
new file mode 100644
index 0000000000..ad0ab33336
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
@@ -0,0 +1,756 @@
+/*
+ * T1040 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2013 - 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <dt-bindings/thermal/thermal.h>
+
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_fqd {
+	compatible = "fsl,qman-fqd";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+	compatible = "fsl,qman-pfdr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&ifc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,ifc", "simple-bus";
+	interrupts = <25 2 0 0>;
+};
+
+&pci0 {
+	compatible = "fsl,t1040-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <20 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <20 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 40 1 0 0
+			0000 0 0 2 &mpic 1 1 0 0
+			0000 0 0 3 &mpic 2 1 0 0
+			0000 0 0 4 &mpic 3 1 0 0
+			>;
+	};
+};
+
+&pci1 {
+	compatible = "fsl,t1040-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 0xff>;
+	interrupts = <21 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <21 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 41 1 0 0
+			0000 0 0 2 &mpic 5 1 0 0
+			0000 0 0 3 &mpic 6 1 0 0
+			0000 0 0 4 &mpic 7 1 0 0
+			>;
+	};
+};
+
+&pci2 {
+	compatible = "fsl,t1040-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <22 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <22 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 42 1 0 0
+			0000 0 0 2 &mpic 9 1 0 0
+			0000 0 0 3 &mpic 10 1 0 0
+			0000 0 0 4 &mpic 11 1 0 0
+			>;
+	};
+};
+
+&pci3 {
+	compatible = "fsl,t1040-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <23 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <23 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 43 1 0 0
+			0000 0 0 2 &mpic 0 1 0 0
+			0000 0 0 3 &mpic 4 1 0 0
+			0000 0 0 4 &mpic 8 1 0 0
+			>;
+	};
+};
+
+&dcsr {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,dcsr", "simple-bus";
+
+	dcsr-epu@0 {
+		compatible = "fsl,t1040-dcsr-epu", "fsl,dcsr-epu";
+		interrupts = <52 2 0 0
+			      84 2 0 0
+			      85 2 0 0>;
+		reg = <0x0 0x1000>;
+	};
+	dcsr-npc {
+		compatible = "fsl,t1040-dcsr-cnpc", "fsl,dcsr-cnpc";
+		reg = <0x1000 0x1000 0x1002000 0x10000>;
+	};
+	dcsr-nxc@2000 {
+		compatible = "fsl,dcsr-nxc";
+		reg = <0x2000 0x1000>;
+	};
+	dcsr-corenet {
+		compatible = "fsl,dcsr-corenet";
+		reg = <0x8000 0x1000 0x1A000 0x1000>;
+	};
+	dcsr-dpaa@9000 {
+		compatible = "fsl,t1040-dcsr-dpaa", "fsl,dcsr-dpaa";
+		reg = <0x9000 0x1000>;
+	};
+	dcsr-ocn@11000 {
+		compatible = "fsl,t1040-dcsr-ocn", "fsl,dcsr-ocn";
+		reg = <0x11000 0x1000>;
+	};
+	dcsr-ddr@12000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr1>;
+		reg = <0x12000 0x1000>;
+	};
+	dcsr-nal@18000 {
+		compatible = "fsl,t1040-dcsr-nal", "fsl,dcsr-nal";
+		reg = <0x18000 0x1000>;
+	};
+	dcsr-rcpm@22000 {
+		compatible = "fsl,t1040-dcsr-rcpm", "fsl,dcsr-rcpm";
+		reg = <0x22000 0x1000>;
+	};
+	dcsr-snpc@30000 {
+		compatible = "fsl,t1040-dcsr-snpc", "fsl,dcsr-snpc";
+		reg = <0x30000 0x1000 0x1022000 0x10000>;
+	};
+	dcsr-snpc@31000 {
+		compatible = "fsl,t1040-dcsr-snpc", "fsl,dcsr-snpc";
+		reg = <0x31000 0x1000 0x1042000 0x10000>;
+	};
+	dcsr-cpu-sb-proxy@100000 {
+		compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu0>;
+		reg = <0x100000 0x1000 0x101000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@108000 {
+		compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu1>;
+		reg = <0x108000 0x1000 0x109000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@110000 {
+		compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu2>;
+		reg = <0x110000 0x1000 0x111000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@118000 {
+		compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu3>;
+		reg = <0x118000 0x1000 0x119000 0x1000>;
+	};
+};
+
+&bportals {
+	#address-cells = <0x1>;
+	#size-cells = <0x1>;
+	compatible = "simple-bus";
+
+	bman-portal@0 {
+		compatible = "fsl,bman-portal";
+		reg = <0x0 0x4000>, <0x1000000 0x1000>;
+		interrupts = <105 2 0 0>;
+	};
+	bman-portal@4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+		interrupts = <107 2 0 0>;
+	};
+	bman-portal@8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+		interrupts = <109 2 0 0>;
+	};
+	bman-portal@c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+		interrupts = <111 2 0 0>;
+	};
+	bman-portal@10000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+		interrupts = <113 2 0 0>;
+	};
+	bman-portal@14000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+		interrupts = <115 2 0 0>;
+	};
+	bman-portal@18000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+		interrupts = <117 2 0 0>;
+	};
+	bman-portal@1c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+		interrupts = <119 2 0 0>;
+	};
+	bman-portal@20000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+		interrupts = <121 2 0 0>;
+	};
+	bman-portal@24000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+		interrupts = <123 2 0 0>;
+	};
+};
+
+&qportals {
+	#address-cells = <0x1>;
+	#size-cells = <0x1>;
+	compatible = "simple-bus";
+
+	qportal0: qman-portal@0 {
+		compatible = "fsl,qman-portal";
+		reg = <0x0 0x4000>, <0x1000000 0x1000>;
+		interrupts = <104 0x2 0 0>;
+		cell-index = <0x0>;
+	};
+	qportal1: qman-portal@4000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+		interrupts = <106 0x2 0 0>;
+		cell-index = <0x1>;
+	};
+	qportal2: qman-portal@8000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+		interrupts = <108 0x2 0 0>;
+		cell-index = <0x2>;
+	};
+	qportal3: qman-portal@c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+		interrupts = <110 0x2 0 0>;
+		cell-index = <0x3>;
+	};
+	qportal4: qman-portal@10000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+		interrupts = <112 0x2 0 0>;
+		cell-index = <0x4>;
+	};
+	qportal5: qman-portal@14000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+		interrupts = <114 0x2 0 0>;
+		cell-index = <0x5>;
+	};
+	qportal6: qman-portal@18000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+		interrupts = <116 0x2 0 0>;
+		cell-index = <0x6>;
+	};
+	qportal7: qman-portal@1c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+		interrupts = <118 0x2 0 0>;
+		cell-index = <0x7>;
+	};
+	qportal8: qman-portal@20000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+		interrupts = <120 0x2 0 0>;
+		cell-index = <0x8>;
+	};
+	qportal9: qman-portal@24000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+		interrupts = <122 0x2 0 0>;
+		cell-index = <0x9>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "simple-bus";
+
+	soc-sram-error {
+		compatible = "fsl,soc-sram-error";
+		interrupts = <16 2 1 29>;
+	};
+
+	corenet-law@0 {
+		compatible = "fsl,corenet-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <16>;
+	};
+
+	ddr1: memory-controller@8000 {
+		compatible = "fsl,qoriq-memory-controller-v5.0",
+				"fsl,qoriq-memory-controller";
+		reg = <0x8000 0x1000>;
+		interrupts = <16 2 1 23>;
+	};
+
+	cpc: l3-cache-controller@10000 {
+		compatible = "fsl,t1040-l3-cache-controller", "cache";
+		reg = <0x10000 0x1000>;
+		interrupts = <16 2 1 27>;
+	};
+
+	corenet-cf@18000 {
+		compatible = "fsl,corenet2-cf", "fsl,corenet-cf";
+		reg = <0x18000 0x1000>;
+		interrupts = <16 2 1 31>;
+		fsl,ccf-num-csdids = <32>;
+		fsl,ccf-num-snoopids = <32>;
+	};
+
+	iommu@20000 {
+		compatible = "fsl,pamu-v1.0", "fsl,pamu";
+		reg = <0x20000 0x1000>;
+		ranges = <0 0x20000 0x1000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupts = <
+			24 2 0 0
+			16 2 1 30>;
+		pamu0: pamu@0 {
+			reg = <0 0x1000>;
+			fsl,primary-cache-geometry = <128 1>;
+			fsl,secondary-cache-geometry = <16 2>;
+		};
+	};
+
+/include/ "qoriq-mpic.dtsi"
+
+	guts: global-utilities@e0000 {
+		compatible = "fsl,t1040-device-config", "fsl,qoriq-device-config-2.0";
+		reg = <0xe0000 0xe00>;
+		fsl,has-rstcr;
+		fsl,liodn-bits = <12>;
+	};
+
+/include/ "qoriq-clockgen2.dtsi"
+	global-utilities@e1000 {
+		compatible = "fsl,t1040-clockgen", "fsl,qoriq-clockgen-2.0";
+	};
+
+	rcpm: global-utilities@e2000 {
+		compatible = "fsl,t1040-rcpm", "fsl,qoriq-rcpm-2.1";
+		reg = <0xe2000 0x1000>;
+	};
+
+	sfp: sfp@e8000 {
+		compatible = "fsl,t1040-sfp";
+		reg	   = <0xe8000 0x1000>;
+	};
+
+	serdes: serdes@ea000 {
+		compatible = "fsl,t1040-serdes";
+		reg	   = <0xea000 0x4000>;
+	};
+
+	tmu: tmu@f0000 {
+		compatible = "fsl,qoriq-tmu";
+		reg = <0xf0000 0x1000>;
+		interrupts = <18 2 0 0>;
+		fsl,tmu-range = <0xa0000 0x90026 0x8004a 0x1006a>;
+		fsl,tmu-calibration = <0x00000000 0x00000025
+				       0x00000001 0x00000028
+				       0x00000002 0x0000002d
+				       0x00000003 0x00000031
+				       0x00000004 0x00000036
+				       0x00000005 0x0000003a
+				       0x00000006 0x00000040
+				       0x00000007 0x00000044
+				       0x00000008 0x0000004a
+				       0x00000009 0x0000004f
+				       0x0000000a 0x00000054
+
+				       0x00010000 0x0000000d
+				       0x00010001 0x00000013
+				       0x00010002 0x00000019
+				       0x00010003 0x0000001f
+				       0x00010004 0x00000025
+				       0x00010005 0x0000002d
+				       0x00010006 0x00000033
+				       0x00010007 0x00000043
+				       0x00010008 0x0000004b
+				       0x00010009 0x00000053
+
+				       0x00020000 0x00000010
+				       0x00020001 0x00000017
+				       0x00020002 0x0000001f
+				       0x00020003 0x00000029
+				       0x00020004 0x00000031
+				       0x00020005 0x0000003c
+				       0x00020006 0x00000042
+				       0x00020007 0x0000004d
+				       0x00020008 0x00000056
+
+				       0x00030000 0x00000012
+				       0x00030001 0x0000001d>;
+		#thermal-sensor-cells = <1>;
+	};
+
+	thermal-zones {
+		cpu_thermal: cpu-thermal {
+			polling-delay-passive = <1000>;
+			polling-delay = <5000>;
+
+			thermal-sensors = <&tmu 2>;
+
+			trips {
+				cpu_alert: cpu-alert {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				cpu_crit: cpu-crit {
+					temperature = <95000>;
+					hysteresis = <2000>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+				map0 {
+					trip = <&cpu_alert>;
+					cooling-device =
+						<&cpu0 THERMAL_NO_LIMIT
+							THERMAL_NO_LIMIT>;
+				};
+				map1 {
+					trip = <&cpu_alert>;
+					cooling-device =
+						<&cpu1 THERMAL_NO_LIMIT
+							THERMAL_NO_LIMIT>;
+				};
+				map2 {
+					trip = <&cpu_alert>;
+					cooling-device =
+						<&cpu2 THERMAL_NO_LIMIT
+							THERMAL_NO_LIMIT>;
+				};
+				map3 {
+					trip = <&cpu_alert>;
+					cooling-device =
+						<&cpu3 THERMAL_NO_LIMIT
+							THERMAL_NO_LIMIT>;
+				};
+			};
+		};
+	};
+
+	scfg: global-utilities@fc000 {
+		compatible = "fsl,t1040-scfg";
+		reg = <0xfc000 0x1000>;
+	};
+
+/include/ "elo3-dma-0.dtsi"
+/include/ "elo3-dma-1.dtsi"
+/include/ "qoriq-espi-0.dtsi"
+	spi@110000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "qoriq-esdhc-0.dtsi"
+	sdhc@114000 {
+		compatible = "fsl,t1040-esdhc", "fsl,esdhc";
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */
+		sdhci,auto-cmd12;
+	};
+/include/ "qoriq-i2c-0.dtsi"
+/include/ "qoriq-i2c-1.dtsi"
+/include/ "qoriq-duart-0.dtsi"
+/include/ "qoriq-duart-1.dtsi"
+/include/ "qoriq-gpio-0.dtsi"
+/include/ "qoriq-gpio-1.dtsi"
+/include/ "qoriq-gpio-2.dtsi"
+/include/ "qoriq-gpio-3.dtsi"
+/include/ "qoriq-usb2-mph-0.dtsi"
+		usb0: usb@210000 {
+			compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph";
+			fsl,iommu-parent = <&pamu0>;
+			fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */
+			phy_type = "utmi";
+			port0;
+		};
+/include/ "qoriq-usb2-dr-0.dtsi"
+		usb1: usb@211000 {
+			compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr";
+			fsl,iommu-parent = <&pamu0>;
+			fsl,liodn-reg = <&guts 0x524>; /* USB2LIODNR */
+			dr_mode = "host";
+			phy_type = "utmi";
+		};
+
+	display@180000 {
+		compatible = "fsl,t1040-diu", "fsl,diu";
+		reg = <0x180000 1000>;
+		interrupts = <74 2 0 0>;
+	};
+
+/include/ "qoriq-sata2-0.dtsi"
+	sata@220000 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x550>; /* SATA1LIODNR */
+	};
+/include/ "qoriq-sata2-1.dtsi"
+	sata@221000 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x554>; /* SATA2LIODNR */
+	};
+/include/ "qoriq-sec5.0-0.dtsi"
+/include/ "qoriq-qman3.dtsi"
+/include/ "qoriq-bman1.dtsi"
+
+/include/ "qoriq-fman3l-0.dtsi"
+/include/ "qoriq-fman3-0-1g-0.dtsi"
+/include/ "qoriq-fman3-0-1g-1.dtsi"
+/include/ "qoriq-fman3-0-1g-2.dtsi"
+/include/ "qoriq-fman3-0-1g-3.dtsi"
+/include/ "qoriq-fman3-0-1g-4.dtsi"
+	fman@400000 {
+		enet0: ethernet@e0000 {
+		};
+
+		enet1: ethernet@e2000 {
+		};
+
+		enet2: ethernet@e4000 {
+		};
+
+		enet3: ethernet@e6000 {
+		};
+
+		enet4: ethernet@e8000 {
+		};
+
+		mdio@fc000 {
+			interrupts = <100 1 0 0>;
+		};
+
+		mdio@fd000 {
+			status = "disabled";
+		};
+	};
+
+	seville_switch: ethernet-switch@800000 {
+		compatible = "mscc,vsc9953-switch";
+		reg = <0x800000 0x290000>;
+		interrupts = <26 2 0 0>;
+		interrupt-names = "xtr";
+		little-endian;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		status = "disabled";
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			seville_port0: port@0 {
+				reg = <0>;
+				status = "disabled";
+			};
+
+			seville_port1: port@1 {
+				reg = <1>;
+				status = "disabled";
+			};
+
+			seville_port2: port@2 {
+				reg = <2>;
+				status = "disabled";
+			};
+
+			seville_port3: port@3 {
+				reg = <3>;
+				status = "disabled";
+			};
+
+			seville_port4: port@4 {
+				reg = <4>;
+				status = "disabled";
+			};
+
+			seville_port5: port@5 {
+				reg = <5>;
+				status = "disabled";
+			};
+
+			seville_port6: port@6 {
+				reg = <6>;
+				status = "disabled";
+			};
+
+			seville_port7: port@7 {
+				reg = <7>;
+				status = "disabled";
+			};
+
+			seville_port8: port@8 {
+				reg = <8>;
+				phy-mode = "internal";
+				ethernet = <&enet0>;
+				status = "disabled";
+
+				fixed-link {
+					speed = <2500>;
+					full-duplex;
+				};
+			};
+
+			seville_port9: port@9 {
+				reg = <9>;
+				phy-mode = "internal";
+				ethernet = <&enet1>;
+				status = "disabled";
+
+				fixed-link {
+					speed = <2500>;
+					full-duplex;
+				};
+			};
+		};
+	};
+};
+
+&qe {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "qe";
+	compatible = "fsl,qe";
+	fsl,qe-num-riscs = <1>;
+	fsl,qe-num-snums = <28>;
+
+	qeic: interrupt-controller@80 {
+		interrupt-controller;
+		compatible = "fsl,qe-ic";
+		#address-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x80 0x80>;
+		interrupts = <95 2 0 0  94 2 0 0>; //high:79 low:78
+	};
+
+	ucc@2000 {
+		cell-index = <1>;
+		reg = <0x2000 0x200>;
+		interrupts = <32>;
+		interrupt-parent = <&qeic>;
+	};
+
+	ucc@2200 {
+		cell-index = <3>;
+		reg = <0x2200 0x200>;
+		interrupts = <34>;
+		interrupt-parent = <&qeic>;
+	};
+
+	muram@10000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,qe-muram", "fsl,cpm-muram";
+		ranges = <0x0 0x10000 0x6000>;
+
+		data-only@0 {
+			compatible = "fsl,qe-muram-data",
+			"fsl,cpm-muram-data";
+			reg = <0x0 0x6000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
new file mode 100644
index 0000000000..4fa15f48a4
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
@@ -0,0 +1,105 @@
+/*
+ * T1042D4RDB Device Tree Source
+ *
+ * Copyright 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t104xsi-pre.dtsi"
+/include/ "t104xd4rdb.dtsi"
+
+/ {
+	model = "fsl,T1042D4RDB";
+	compatible = "fsl,T1042D4RDB";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	ifc: localbus@ffe124000 {
+		cpld@3,0 {
+			compatible = "fsl,t1040d4rdb-cpld",
+					"fsl,deepsleep-cpld";
+		};
+	};
+
+	soc: soc@ffe000000 {
+		fman0: fman@400000 {
+			ethernet@e0000 {
+				phy-handle = <&phy_sgmii_0>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e2000 {
+				phy-handle = <&phy_sgmii_1>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e4000 {
+				phy-handle = <&phy_sgmii_2>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e6000 {
+				phy-handle = <&phy_rgmii_0>;
+				phy-connection-type = "rgmii";
+			};
+
+			ethernet@e8000 {
+				phy-handle = <&phy_rgmii_1>;
+				phy-connection-type = "rgmii";
+			};
+
+			mdio0: mdio@fc000 {
+				phy_sgmii_0: ethernet-phy@2 {
+					reg = <0x02>;
+				};
+
+				phy_sgmii_1: ethernet-phy@3 {
+					reg = <0x03>;
+				};
+
+				phy_sgmii_2: ethernet-phy@1 {
+					reg = <0x01>;
+				};
+
+				phy_rgmii_0: ethernet-phy@4 {
+					reg = <0x04>;
+				};
+
+				phy_rgmii_1: ethernet-phy@5 {
+					reg = <0x05>;
+				};
+			};
+		};
+	};
+
+};
+
+#include "t1042si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1042qds.dts b/arch/powerpc/boot/dts/fsl/t1042qds.dts
new file mode 100644
index 0000000000..90a4a73bb9
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1042qds.dts
@@ -0,0 +1,46 @@
+/*
+ * T1042QDS Device Tree Source
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t104xsi-pre.dtsi"
+/include/ "t104xqds.dtsi"
+
+/ {
+	model = "fsl,T1042QDS";
+	compatible = "fsl,T1042QDS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+};
+
+#include "t1042si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1042rdb.dts b/arch/powerpc/boot/dts/fsl/t1042rdb.dts
new file mode 100644
index 0000000000..3ebb712224
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1042rdb.dts
@@ -0,0 +1,76 @@
+/*
+ * T1042RDB Device Tree Source
+ *
+ * Copyright 2014 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t104xsi-pre.dtsi"
+/include/ "t104xrdb.dtsi"
+
+/ {
+	model = "fsl,T1042RDB";
+	compatible = "fsl,T1042RDB";
+
+	aliases {
+		phy_sgmii_2 = &phy_sgmii_2;
+	};
+
+	soc@ffe000000 {
+		fman@400000 {
+			ethernet@e0000 {
+			       status = "disabled";
+			};
+
+			ethernet@e2000 {
+			       status = "disabled";
+			};
+
+			ethernet@e4000 {
+				phy-handle = <&phy_sgmii_2>;
+				phy-connection-type = "sgmii";
+			};
+
+			mdio@fc000 {
+				phy_sgmii_2: ethernet-phy@3 {
+					reg = <0x03>;
+				};
+			};
+		};
+	};
+
+	ifc: localbus@ffe124000 {
+		cpld@3,0 {
+			compatible = "fsl,t1042rdb-cpld";
+		};
+	};
+};
+
+#include "t1042si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts b/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts
new file mode 100644
index 0000000000..8ec3ff45e6
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts
@@ -0,0 +1,73 @@
+/*
+ * T1042RDB_PI Device Tree Source
+ *
+ * Copyright 2014 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t104xsi-pre.dtsi"
+/include/ "t104xrdb.dtsi"
+
+/ {
+	model = "fsl,T1042RDB_PI";
+	compatible = "fsl,T1042RDB_PI";
+
+	ifc: localbus@ffe124000 {
+		cpld@3,0 {
+			compatible = "fsl,t1042rdb_pi-cpld";
+		};
+	};
+
+	soc: soc@ffe000000 {
+		i2c@118000 {
+			rtc@68 {
+				compatible = "dallas,ds1337";
+				reg = <0x68>;
+				interrupts = <0x2 0x1 0 0>;
+			};
+		};
+
+		fman@400000 {
+			ethernet@e0000 {
+				status = "disabled";
+			};
+
+			ethernet@e2000 {
+				status = "disabled";
+			};
+
+			ethernet@e4000 {
+				status = "disabled";
+			};
+		};
+	};
+};
+
+#include "t1042si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1042si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1042si-post.dtsi
new file mode 100644
index 0000000000..a5544f9368
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1042si-post.dtsi
@@ -0,0 +1,37 @@
+/*
+ * T1042 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "t1040si-post.dtsi"
+
+/* Place holder for ethernet related device tree nodes */
diff --git a/arch/powerpc/boot/dts/fsl/t104xd4rdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xd4rdb.dtsi
new file mode 100644
index 0000000000..863f943128
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t104xd4rdb.dtsi
@@ -0,0 +1,253 @@
+/*
+ * T1040D4RDB/T1042D4RDB Device Tree Source
+ *
+ * Copyright 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/ {
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xff800000 0x00010000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x2 0x0 0x10000>;
+		};
+
+		cpld@3,0 {
+			compatible = "fsl,t1040d4rdb-cpld";
+			reg = <3 0 0x300>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
+	qportals: qman-portals@ff6000000 {
+		ranges = <0x0 0xf 0xf6000000 0x2000000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "micron,n25q512ax3", "jedec,spi-nor";
+				reg = <0>;
+				/* input clock */
+				spi-max-frequency = <10000000>;
+			};
+			slic@1 {
+				compatible = "maxim,ds26522";
+				reg = <1>;
+				spi-max-frequency = <2000000>; /* input clock */
+			};
+			slic@2 {
+				compatible = "maxim,ds26522";
+				reg = <2>;
+				spi-max-frequency = <2000000>; /* input clock */
+			};
+		};
+		i2c@118000 {
+			hwmon@4c {
+				compatible = "adi,adt7461";
+				reg = <0x4c>;
+			};
+
+			rtc@68 {
+				compatible = "dallas,ds1337";
+				reg = <0x68>;
+				interrupts = <0x2 0x1 0 0>;
+			};
+		};
+
+		i2c@118100 {
+			mux@77 {
+				/*
+				 * Child nodes of mux depend on which i2c
+				 * devices are connected via the mini PCI
+				 * connector slot1, the mini PCI connector
+				 * slot2, the HDMI connector, and the PEX
+				 * slot. Systems with such devices attached
+				 * should provide a wrapper .dts file that
+				 * includes this one, and adds those nodes
+				 */
+				compatible = "nxp,pca9546";
+				reg = <0x77>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+			};
+		};
+
+	};
+
+	pci0: pcie@ffe240000 {
+		reg = <0xf 0xfe240000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x0 0x0 0x10000000
+			  0x01000000 0 0x0 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe250000 {
+		reg = <0xf 0xfe250000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000
+			  0x01000000 0 0 0xf 0xf8010000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe260000 {
+		reg = <0xf 0xfe260000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci3: pcie@ffe270000 {
+		reg = <0xf 0xfe270000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	qe: qe@ffe140000 {
+		ranges = <0x0 0xf 0xfe140000 0x40000>;
+		reg = <0xf 0xfe140000 0 0x480>;
+		brg-frequency = <0>;
+		bus-frequency = <0>;
+
+		si1: si@700 {
+			compatible = "fsl,t1040-qe-si";
+			reg = <0x700 0x80>;
+		};
+
+		siram1: siram@1000 {
+			compatible = "fsl,t1040-qe-siram";
+			reg = <0x1000 0x800>;
+		};
+
+		ucc_hdlc: ucc@2000 {
+			compatible = "fsl,ucc-hdlc";
+			rx-clock-name = "clk8";
+			tx-clock-name = "clk9";
+			fsl,rx-sync-clock = "rsync_pin";
+			fsl,tx-sync-clock = "tsync_pin";
+			fsl,tx-timeslot-mask = <0xfffffffe>;
+			fsl,rx-timeslot-mask = <0xfffffffe>;
+			fsl,tdm-framer-type = "e1";
+			fsl,tdm-id = <0>;
+			fsl,siram-entry-id = <0>;
+			fsl,tdm-interface;
+		};
+
+		ucc_serial: ucc@2200 {
+			compatible = "fsl,t1040-ucc-uart";
+			port-number = <0>;
+			rx-clock-name = "brg2";
+			tx-clock-name = "brg2";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t104xqds.dtsi b/arch/powerpc/boot/dts/fsl/t104xqds.dtsi
new file mode 100644
index 0000000000..1c329f076f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t104xqds.dtsi
@@ -0,0 +1,407 @@
+/*
+ * T104xQDS Device Tree Source
+ *
+ * Copyright 2013 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/ {
+	model = "fsl,T1040QDS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		emi1_rgmii0 = &t1040mdio0;
+		emi1_rgmii1 = &t1040mdio1;
+		emi1_slot3 = &t1040mdio3;
+		emi1_slot5 = &t1040mdio5;
+		emi1_slot6 = &t1040mdio6;
+		emi1_slot7 = &t1040mdio7;
+		rgmii_phy1 = &rgmii_phy1;
+		rgmii_phy2 = &rgmii_phy2;
+		phy_s3_01 = &phy_s3_01;
+		phy_s3_02 = &phy_s3_02;
+		phy_s3_03 = &phy_s3_03;
+		phy_s3_04 = &phy_s3_04;
+		phy_s5_01 = &phy_s5_01;
+		phy_s5_02 = &phy_s5_02;
+		phy_s5_03 = &phy_s5_03;
+		phy_s5_04 = &phy_s5_04;
+		phy_s6_01 = &phy_s6_01;
+		phy_s6_02 = &phy_s6_02;
+		phy_s6_03 = &phy_s6_03;
+		phy_s6_04 = &phy_s6_04;
+		phy_s7_01 = &phy_s7_01;
+		phy_s7_02 = &phy_s7_02;
+		phy_s7_03 = &phy_s7_03;
+		phy_s7_04 = &phy_s7_04;
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xff800000 0x00010000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x2 0x0 0x10000>;
+		};
+
+		board-control@3,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,fpga-qixis";
+			reg = <3 0 0x300>;
+			ranges = <0 3 0 0x300>;
+
+			mdio-mux-emi1 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "mdio-mux-mmioreg", "mdio-mux";
+				mdio-parent-bus = <&mdio0>;
+				reg = <0x54 1>;
+				mux-mask = <0xe0>;
+
+				t1040mdio0: mdio@0 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x00>;
+					status = "disabled";
+
+					rgmii_phy1: ethernet-phy@1 {
+						reg = <0x1>;
+					};
+				};
+
+				t1040mdio1: mdio@20 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x20>;
+					status = "disabled";
+
+					rgmii_phy2: ethernet-phy@2 {
+						reg = <0x2>;
+					};
+				};
+
+				t1040mdio3: mdio@60 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x60>;
+					status = "disabled";
+
+					phy_s3_01: ethernet-phy@1c {
+						reg = <0x1c>;
+					};
+
+					phy_s3_02: ethernet-phy@1d {
+						reg = <0x1d>;
+					};
+
+					phy_s3_03: ethernet-phy@1e {
+						reg = <0x1e>;
+					};
+
+					phy_s3_04: ethernet-phy@1f {
+						reg = <0x1f>;
+					};
+				};
+
+				t1040mdio5: mdio@a0 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0xa0>;
+
+					phy_s5_01: ethernet-phy@1c {
+						reg = <0x14>;
+					};
+
+					phy_s5_02: ethernet-phy@1d {
+						reg = <0x15>;
+					};
+
+					phy_s5_03: ethernet-phy@1e {
+						reg = <0x16>;
+					};
+
+					phy_s5_04: ethernet-phy@1f {
+						reg = <0x17>;
+					};
+				};
+
+				t1040mdio6: mdio@c0 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0xc0>;
+
+					phy_s6_01: ethernet-phy@1c {
+						reg = <0x18>;
+					};
+
+					phy_s6_02: ethernet-phy@1d {
+						reg = <0x19>;
+					};
+
+					phy_s6_03: ethernet-phy@1e {
+						reg = <0x1a>;
+					};
+
+					phy_s6_04: ethernet-phy@1f {
+						reg = <0x1b>;
+					};
+				};
+
+				t1040mdio7: mdio@e0 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0xe0>;
+					status = "disabled";
+
+					phy_s7_01: ethernet-phy@1c {
+						reg = <0x1c>;
+					};
+
+					phy_s7_02: ethernet-phy@1d {
+						reg = <0x1d>;
+					};
+
+					phy_s7_03: ethernet-phy@1e {
+						reg = <0x1e>;
+					};
+
+					phy_s7_04: ethernet-phy@1f {
+						reg = <0x1f>;
+					};
+				};
+			};
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
+	qportals: qman-portals@ff6000000 {
+		ranges = <0x0 0xf 0xf6000000 0x2000000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "micron,n25q128a11", "jedec,spi-nor";
+				reg = <0>;
+				spi-max-frequency = <10000000>; /* input clock */
+			};
+		};
+
+		i2c@118000 {
+			i2c-mux@77 {
+				compatible = "nxp,pca9547";
+				reg = <0x77>;
+			};
+			rtc@68 {
+				compatible = "dallas,ds3232";
+				reg = <0x68>;
+				interrupts = <0x1 0x1 0 0>;
+			};
+		};
+
+		fman@400000 {
+			ethernet@e0000 {
+				fixed-link = <0 1 1000 0 0>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e2000 {
+				fixed-link = <1 1 1000 0 0>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e4000 {
+				phy-handle = <&phy_s7_03>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e6000 {
+				phy-handle = <&rgmii_phy1>;
+				phy-connection-type = "rgmii";
+			};
+
+			ethernet@e8000 {
+				phy-handle = <&rgmii_phy2>;
+				phy-connection-type = "rgmii";
+			};
+		};
+	};
+
+	pci0: pcie@ffe240000 {
+		reg = <0xf 0xfe240000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe250000 {
+		reg = <0xf 0xfe250000 0 0x10000>;
+		ranges = <0x02000000 0x0 0xe0000000 0xc 0x10000000 0x0 0x10000000
+			  0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe260000 {
+		reg = <0xf 0xfe260000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci3: pcie@ffe270000 {
+		reg = <0xf 0xfe270000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	qe: qe@ffe140000 {
+		ranges = <0x0 0xf 0xfe140000 0x40000>;
+		reg = <0xf 0xfe140000 0 0x480>;
+		brg-frequency = <0>;
+		bus-frequency = <0>;
+
+		si1: si@700 {
+			compatible = "fsl,t1040-qe-si";
+			reg = <0x700 0x80>;
+		};
+
+		siram1: siram@1000 {
+			compatible = "fsl,t1040-qe-siram";
+			reg = <0x1000 0x800>;
+		};
+
+		ucc_hdlc: ucc@2000 {
+			compatible = "fsl,ucc-hdlc";
+			rx-clock-name = "clk8";
+			tx-clock-name = "clk9";
+			fsl,rx-sync-clock = "rsync_pin";
+			fsl,tx-sync-clock = "tsync_pin";
+			fsl,tx-timeslot-mask = <0xfffffffe>;
+			fsl,rx-timeslot-mask = <0xfffffffe>;
+			fsl,tdm-framer-type = "e1";
+			fsl,tdm-id = <0>;
+			fsl,siram-entry-id = <0>;
+			fsl,tdm-interface;
+		};
+
+		ucc_serial: ucc@2200 {
+			compatible = "fsl,t1040-ucc-uart";
+			port-number = <0>;
+			rx-clock-name = "brg2";
+			tx-clock-name = "brg2";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
new file mode 100644
index 0000000000..fc7bec5dcb
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
@@ -0,0 +1,263 @@
+/*
+ * T1040RDB/T1042RDB Device Tree Source
+ *
+ * Copyright 2014 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/ {
+	aliases {
+		phy_rgmii_0 = &phy_rgmii_0;
+		phy_rgmii_1 = &phy_rgmii_1;
+		phy_sgmii_2 = &phy_sgmii_2;
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xff800000 0x00010000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x2 0x0 0x10000>;
+		};
+
+		cpld@3,0 {
+			reg = <3 0 0x300>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
+	qportals: qman-portals@ff6000000 {
+		ranges = <0x0 0xf 0xf6000000 0x2000000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "micron,n25q512ax3", "jedec,spi-nor";
+				reg = <0>;
+				spi-max-frequency = <10000000>; /* input clock */
+			};
+			slic@3 {
+				compatible = "maxim,ds26522";
+				reg = <3>;
+				spi-max-frequency = <2000000>; /* input clock */
+			};
+		};
+
+		i2c@118000 {
+			adt7461@4c {
+				compatible = "adi,adt7461";
+				reg = <0x4c>;
+			};
+		};
+
+		i2c@118100 {
+			i2c-mux@77 {
+				compatible = "nxp,pca9546";
+				reg = <0x77>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+			};
+		};
+
+		fman@400000 {
+			ethernet@e6000 {
+				phy-handle = <&phy_rgmii_0>;
+				phy-connection-type = "rgmii-id";
+			};
+
+			ethernet@e8000 {
+				phy-handle = <&phy_rgmii_1>;
+				phy-connection-type = "rgmii-id";
+			};
+
+			mdio0: mdio@fc000 {
+				phy_sgmii_2: ethernet-phy@3 {
+					reg = <0x03>;
+				};
+
+				phy_rgmii_0: ethernet-phy@1 {
+					reg = <0x01>;
+				};
+
+				phy_rgmii_1: ethernet-phy@2 {
+					reg = <0x02>;
+				};
+			};
+		};
+	};
+
+	pci0: pcie@ffe240000 {
+		reg = <0xf 0xfe240000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe250000 {
+		reg = <0xf 0xfe250000 0 0x10000>;
+		ranges = <0x02000000 0x0 0xe0000000 0xc 0x10000000 0x0 0x10000000
+			  0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe260000 {
+		reg = <0xf 0xfe260000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci3: pcie@ffe270000 {
+		reg = <0xf 0xfe270000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	qe: qe@ffe140000 {
+		ranges = <0x0 0xf 0xfe140000 0x40000>;
+		reg = <0xf 0xfe140000 0 0x480>;
+		brg-frequency = <0>;
+		bus-frequency = <0>;
+
+		si1: si@700 {
+			compatible = "fsl,t1040-qe-si";
+			reg = <0x700 0x80>;
+		};
+
+		siram1: siram@1000 {
+			compatible = "fsl,t1040-qe-siram";
+			reg = <0x1000 0x800>;
+		};
+
+		ucc_hdlc: ucc@2000 {
+			compatible = "fsl,ucc-hdlc";
+			rx-clock-name = "clk8";
+			tx-clock-name = "clk9";
+			fsl,rx-sync-clock = "rsync_pin";
+			fsl,tx-sync-clock = "tsync_pin";
+			fsl,tx-timeslot-mask = <0xfffffffe>;
+			fsl,rx-timeslot-mask = <0xfffffffe>;
+			fsl,tdm-framer-type = "e1";
+			fsl,tdm-id = <0>;
+			fsl,siram-entry-id = <0>;
+			fsl,tdm-interface;
+		};
+
+		ucc_serial: ucc@2200 {
+			compatible = "fsl,t1040-ucc-uart";
+			port-number = <0>;
+			rx-clock-name = "brg2";
+			tx-clock-name = "brg2";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi b/arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi
new file mode 100644
index 0000000000..dd59e4b694
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi
@@ -0,0 +1,115 @@
+/*
+ * T1040/T1042 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2013-2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e5500_power_isa.dtsi"
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		ccsr = &soc;
+		dcsr = &dcsr;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+		pci3 = &pci3;
+		usb0 = &usb0;
+		usb1 = &usb1;
+		sdhc = &sdhc;
+
+		crypto = &crypto;
+
+		fman0 = &fman0;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+		ethernet4 = &enet4;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,e5500@0 {
+			device_type = "cpu";
+			reg = <0>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_1>;
+			#cooling-cells = <2>;
+			L2_1: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu1: PowerPC,e5500@1 {
+			device_type = "cpu";
+			reg = <1>;
+			clocks = <&clockgen 1 1>;
+			next-level-cache = <&L2_2>;
+			#cooling-cells = <2>;
+			L2_2: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu2: PowerPC,e5500@2 {
+			device_type = "cpu";
+			reg = <2>;
+			clocks = <&clockgen 1 2>;
+			next-level-cache = <&L2_3>;
+			#cooling-cells = <2>;
+			L2_3: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+		cpu3: PowerPC,e5500@3 {
+			device_type = "cpu";
+			reg = <3>;
+			clocks = <&clockgen 1 3>;
+			next-level-cache = <&L2_4>;
+			#cooling-cells = <2>;
+			L2_4: l2-cache {
+				next-level-cache = <&cpc>;
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t2080qds.dts b/arch/powerpc/boot/dts/fsl/t2080qds.dts
new file mode 100644
index 0000000000..8d190e8c62
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t2080qds.dts
@@ -0,0 +1,213 @@
+/*
+ * T2080QDS Device Tree Source
+ *
+ * Copyright 2013 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t208xsi-pre.dtsi"
+/include/ "t208xqds.dtsi"
+
+/ {
+	model = "fsl,T2080QDS";
+	compatible = "fsl,T2080QDS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		emi1_slot1 = &t2080mdio2;
+		emi1_slot2 = &t2080mdio3;
+		emi1_slot3 = &t2080mdio4;
+	};
+
+	rio: rapidio@ffe0c0000 {
+		reg = <0xf 0xfe0c0000 0 0x11000>;
+
+		port1 {
+			ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+		};
+		port2 {
+			ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+		};
+	};
+};
+
+&soc {
+	fman@400000 {
+		ethernet@e0000 {
+			phy-handle = <&phy_sgmii_s3_1e>;
+			phy-connection-type = "xgmii";
+		};
+
+		ethernet@e2000 {
+			phy-handle = <&phy_sgmii_s3_1f>;
+			phy-connection-type = "xgmii";
+		};
+
+		ethernet@e4000 {
+			phy-handle = <&rgmii_phy1>;
+			phy-connection-type = "rgmii";
+		};
+
+		ethernet@e6000 {
+			phy-handle = <&rgmii_phy2>;
+			phy-connection-type = "rgmii";
+		};
+
+		ethernet@e8000 {
+			phy-handle = <&phy_sgmii_s2_1e>;
+			phy-connection-type = "sgmii";
+		};
+
+		ethernet@ea000 {
+			phy-handle = <&phy_sgmii_s2_1d>;
+			phy-connection-type = "sgmii";
+		};
+
+		ethernet@f0000 {
+			phy-handle = <&phy_xaui_slot3>;
+			phy-connection-type = "xgmii";
+		};
+
+		ethernet@f2000 {
+			phy-handle = <&phy_sgmii_s3_1f>;
+			phy-connection-type = "xgmii";
+		};
+
+		mdio@fd000 {
+			phy_xaui_slot3: ethernet-phy@3 {
+				compatible = "ethernet-phy-ieee802.3-c45";
+				reg = <0x3>;
+			};
+		};
+	};
+};
+
+&boardctrl {
+	mdio-mux-emi1 {
+		compatible = "mdio-mux-mmioreg", "mdio-mux";
+		mdio-parent-bus = <&mdio0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x54 1>;
+		mux-mask = <0xe0>;
+
+		t2080mdio0: mdio@0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0>;
+
+			rgmii_phy1: ethernet-phy@1 {
+				reg = <0x1>;
+			};
+		};
+
+		t2080mdio1: mdio@20 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x20>;
+
+			rgmii_phy2: ethernet-phy@2 {
+				reg = <0x2>;
+			};
+		};
+
+		t2080mdio2: mdio@40 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x40>;
+			status = "disabled";
+
+			phy_sgmii_s1_1c: ethernet-phy@1c {
+				reg = <0x1c>;
+			};
+
+			phy_sgmii_s1_1d: ethernet-phy@1d {
+				reg = <0x1d>;
+			};
+
+			phy_sgmii_s1_1e: ethernet-phy@1e {
+				reg = <0x1e>;
+			};
+
+			phy_sgmii_s1_1f: ethernet-phy@1f {
+				reg = <0x1f>;
+			};
+		};
+
+		t2080mdio3: mdio@c0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0xc0>;
+
+			phy_sgmii_s2_1c: ethernet-phy@1c {
+				reg = <0x1c>;
+			};
+
+			phy_sgmii_s2_1d: ethernet-phy@1d {
+				reg = <0x1d>;
+			};
+
+			phy_sgmii_s2_1e: ethernet-phy@1e {
+				reg = <0x1e>;
+			};
+
+			phy_sgmii_s2_1f: ethernet-phy@1f {
+				reg = <0x1f>;
+			};
+		};
+
+		t2080mdio4: mdio@60 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x60>;
+			status = "disabled";
+
+			phy_sgmii_s3_1c: ethernet-phy@1c {
+				reg = <0x1c>;
+			};
+
+			phy_sgmii_s3_1d: ethernet-phy@1d {
+				reg = <0x1d>;
+			};
+
+			phy_sgmii_s3_1e: ethernet-phy@1e {
+				reg = <0x1e>;
+			};
+
+			phy_sgmii_s3_1f: ethernet-phy@1f {
+				reg = <0x1f>;
+			};
+		};
+	};
+};
+
+/include/ "t2080si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t2080rdb.dts b/arch/powerpc/boot/dts/fsl/t2080rdb.dts
new file mode 100644
index 0000000000..092a400740
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t2080rdb.dts
@@ -0,0 +1,122 @@
+/*
+ * T2080PCIe-RDB Board Device Tree Source
+ *
+ * Copyright 2014 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t208xsi-pre.dtsi"
+/include/ "t208xrdb.dtsi"
+
+/ {
+	model = "fsl,T2080RDB";
+	compatible = "fsl,T2080RDB";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	rio: rapidio@ffe0c0000 {
+		reg = <0xf 0xfe0c0000 0 0x11000>;
+
+		port1 {
+			ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+		};
+		port2 {
+			ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+		};
+	};
+};
+
+&soc {
+	fman@400000 {
+		ethernet@e0000 {
+			phy-handle = <&xg_aq1202_phy3>;
+			phy-connection-type = "xgmii";
+		};
+
+		ethernet@e2000 {
+			phy-handle = <&xg_aq1202_phy4>;
+			phy-connection-type = "xgmii";
+		};
+
+		ethernet@e4000 {
+			phy-handle = <&rgmii_phy1>;
+			phy-connection-type = "rgmii";
+		};
+
+		ethernet@e6000 {
+			phy-handle = <&rgmii_phy2>;
+			phy-connection-type = "rgmii";
+		};
+
+		ethernet@f0000 {
+			phy-handle = <&xg_cs4315_phy2>;
+			phy-connection-type = "xgmii";
+		};
+
+		ethernet@f2000 {
+			phy-handle = <&xg_cs4315_phy1>;
+			phy-connection-type = "xgmii";
+		};
+
+		mdio@fc000 {
+			rgmii_phy1: ethernet-phy@1 {
+				reg = <0x1>;
+			};
+			rgmii_phy2: ethernet-phy@2 {
+				reg = <0x2>;
+			};
+		};
+
+		mdio@fd000 {
+			xg_cs4315_phy1: ethernet-phy@c {
+				compatible = "ethernet-phy-id13e5.1002";
+				reg = <0xc>;
+			};
+
+			xg_cs4315_phy2: ethernet-phy@d {
+				compatible = "ethernet-phy-id13e5.1002";
+				reg = <0xd>;
+			};
+
+			xg_aq1202_phy3: ethernet-phy@0 {
+				compatible = "ethernet-phy-ieee802.3-c45";
+				reg = <0x0>;
+			};
+
+			xg_aq1202_phy4: ethernet-phy@1 {
+				compatible = "ethernet-phy-ieee802.3-c45";
+				reg = <0x1>;
+			};
+		};
+	};
+};
+
+/include/ "t2080si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t2080si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2080si-post.dtsi
new file mode 100644
index 0000000000..082ec20440
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t2080si-post.dtsi
@@ -0,0 +1,69 @@
+/*
+ * T2080 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t2081si-post.dtsi"
+
+&soc {
+/include/ "qoriq-sata2-0.dtsi"
+	sata@220000 {
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x550>; /* SATA1LIODNR */
+	};
+
+/include/ "qoriq-sata2-1.dtsi"
+	sata@221000 {
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x554>; /* SATA2LIODNR */
+	};
+};
+
+&rio {
+	compatible = "fsl,srio";
+	interrupts = <16 2 1 11>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+	ranges;
+
+	port1 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <1>;
+	};
+
+	port2 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <2>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t2081qds.dts b/arch/powerpc/boot/dts/fsl/t2081qds.dts
new file mode 100644
index 0000000000..fc5c4a30f7
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t2081qds.dts
@@ -0,0 +1,265 @@
+/*
+ * T2081QDS Device Tree Source
+ *
+ * Copyright 2013 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t208xsi-pre.dtsi"
+/include/ "t208xqds.dtsi"
+
+/ {
+	model = "fsl,T2081QDS";
+	compatible = "fsl,T2081QDS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		emi1_slot1 = &t2081mdio2;
+		emi1_slot2 = &t2081mdio3;
+		emi1_slot3 = &t2081mdio4;
+		emi1_slot5 = &t2081mdio5;
+		emi1_slot6 = &t2081mdio6;
+		emi1_slot7 = &t2081mdio7;
+	};
+};
+
+&soc {
+	fman@400000 {
+		ethernet@e0000 {
+			phy-handle = <&phy_sgmii_s7_1c>;
+			phy-connection-type = "sgmii";
+		};
+
+		ethernet@e2000 {
+			phy-handle = <&phy_sgmii_s7_1d>;
+			phy-connection-type = "sgmii";
+		};
+
+		ethernet@e4000 {
+			phy-handle = <&rgmii_phy1>;
+			phy-connection-type = "rgmii";
+		};
+
+		ethernet@e6000 {
+			phy-handle = <&rgmii_phy2>;
+			phy-connection-type = "rgmii";
+		};
+
+		ethernet@e8000 {
+			phy-handle = <&phy_sgmii_s3_1c>;
+			phy-connection-type = "sgmii";
+		};
+
+		ethernet@ea000 {
+			phy-handle = <&phy_sgmii_s7_1f>;
+			phy-connection-type = "sgmii";
+		};
+
+		ethernet@f0000 {
+			phy-handle = <&phy_sgmii_s2_1c>;
+			phy-connection-type = "xgmii";
+		};
+
+		ethernet@f2000 {
+			phy-handle = <&phy_sgmii_s7_1e>;
+			phy-connection-type = "xgmii";
+		};
+	};
+};
+
+&boardctrl {
+	mdio-mux-emi1 {
+		compatible = "mdio-mux-mmioreg", "mdio-mux";
+		mdio-parent-bus = <&mdio0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x54 1>;
+		mux-mask = <0xe0>;
+
+		t2081mdio0: mdio@0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0>;
+
+			rgmii_phy1: ethernet-phy@1 {
+				reg = <0x1>;
+			};
+		};
+
+		t2081mdio1: mdio@20 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x20>;
+
+			rgmii_phy2: ethernet-phy@2 {
+				reg = <0x2>;
+			};
+		};
+
+		t2081mdio2: mdio@40 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x40>;
+
+			phy_sgmii_s1_1c: ethernet-phy@1c {
+				reg = <0x1c>;
+			};
+
+			phy_sgmii_s1_1d: ethernet-phy@1d {
+				reg = <0x1d>;
+			};
+
+			phy_sgmii_s1_1e: ethernet-phy@1e {
+				reg = <0x1e>;
+			};
+
+			phy_sgmii_s1_1f: ethernet-phy@1f {
+				reg = <0x1f>;
+			};
+		};
+
+		t2081mdio3: mdio@60 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x60>;
+
+			phy_sgmii_s2_1c: ethernet-phy@1c {
+				reg = <0x1c>;
+			};
+
+			phy_sgmii_s2_1d: ethernet-phy@1d {
+				reg = <0x1d>;
+			};
+
+			phy_sgmii_s2_1e: ethernet-phy@1e {
+				reg = <0x1e>;
+			};
+
+			phy_sgmii_s2_1f: ethernet-phy@1f {
+				reg = <0x1f>;
+			};
+		};
+
+		t2081mdio4: mdio@80 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x80>;
+			status = "disabled";
+
+			phy_sgmii_s3_1c: ethernet-phy@1c {
+				reg = <0x1c>;
+			};
+
+			phy_sgmii_s3_1d: ethernet-phy@1d {
+				reg = <0x1d>;
+			};
+
+			phy_sgmii_s3_1e: ethernet-phy@1e {
+				reg = <0x1e>;
+			};
+
+			phy_sgmii_s3_1f: ethernet-phy@1f {
+				reg = <0x1f>;
+			};
+		};
+
+		t2081mdio5: mdio@a0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0xa0>;
+			status = "disabled";
+
+			phy_sgmii_s5_1c: ethernet-phy@1c {
+				reg = <0x1c>;
+			};
+
+			phy_sgmii_s5_1d: ethernet-phy@1d {
+				reg = <0x1d>;
+			};
+
+			phy_sgmii_s5_1e: ethernet-phy@1e {
+				reg = <0x1e>;
+			};
+
+			phy_sgmii_s5_1f: ethernet-phy@1f {
+				reg = <0x1f>;
+			};
+		};
+
+		t2081mdio6: mdio@c0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0xc0>;
+			status = "disabled";
+
+			phy_sgmii_s6_1c: ethernet-phy@1c {
+				reg = <0x1c>;
+			};
+
+			phy_sgmii_s6_1d: ethernet-phy@1d {
+				reg = <0x1d>;
+			};
+
+			phy_sgmii_s6_1e: ethernet-phy@1e {
+				reg = <0x1e>;
+			};
+
+			phy_sgmii_s6_1f: ethernet-phy@1f {
+				reg = <0x1f>;
+			};
+		};
+
+		t2081mdio7: mdio@e0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0xe0>;
+
+			phy_sgmii_s7_1c: ethernet-phy@1c {
+				reg = <0x1c>;
+			};
+
+			phy_sgmii_s7_1d: ethernet-phy@1d {
+				reg = <0x1d>;
+			};
+
+			phy_sgmii_s7_1e: ethernet-phy@1e {
+				reg = <0x1e>;
+			};
+
+			phy_sgmii_s7_1f: ethernet-phy@1f {
+				reg = <0x1f>;
+			};
+		};
+	};
+};
+
+/include/ "t2081si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
new file mode 100644
index 0000000000..27714dc2f0
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
@@ -0,0 +1,677 @@
+/*
+ * T2081 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2013 - 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_fqd {
+	compatible = "fsl,qman-fqd";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+	compatible = "fsl,qman-pfdr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&ifc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,ifc", "simple-bus";
+	interrupts = <25 2 0 0>;
+};
+
+/* controller at 0x240000 */
+&pci0 {
+	compatible = "fsl,t2080-pcie", "fsl,qoriq-pcie-v3.0", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <20 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <20 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 40 1 0 0
+			0000 0 0 2 &mpic 1 1 0 0
+			0000 0 0 3 &mpic 2 1 0 0
+			0000 0 0 4 &mpic 3 1 0 0
+		>;
+	};
+};
+
+/* controller at 0x250000 */
+&pci1 {
+	compatible = "fsl,t2080-pcie", "fsl,qoriq-pcie-v3.0", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 0xff>;
+	interrupts = <21 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <21 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 41 1 0 0
+			0000 0 0 2 &mpic 5 1 0 0
+			0000 0 0 3 &mpic 6 1 0 0
+			0000 0 0 4 &mpic 7 1 0 0
+		>;
+	};
+};
+
+/* controller at 0x260000 */
+&pci2 {
+	compatible = "fsl,t2080-pcie", "fsl,qoriq-pcie-v3.0", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <22 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <22 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 42 1 0 0
+			0000 0 0 2 &mpic 9 1 0 0
+			0000 0 0 3 &mpic 10 1 0 0
+			0000 0 0 4 &mpic 11 1 0 0
+		>;
+	};
+};
+
+/* controller at 0x270000 */
+&pci3 {
+	compatible = "fsl,t2080-pcie", "fsl,qoriq-pcie-v3.0", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <23 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <23 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 43 1 0 0
+			0000 0 0 2 &mpic 0 1 0 0
+			0000 0 0 3 &mpic 4 1 0 0
+			0000 0 0 4 &mpic 8 1 0 0
+		>;
+	};
+};
+
+&dcsr {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,dcsr", "simple-bus";
+
+	dcsr-epu@0 {
+		compatible = "fsl,t2080-dcsr-epu", "fsl,dcsr-epu";
+		interrupts = <52 2 0 0
+			      84 2 0 0
+			      85 2 0 0
+			      94 2 0 0
+			      95 2 0 0>;
+		reg = <0x0 0x1000>;
+	};
+	dcsr-npc {
+		compatible = "fsl,t2080-dcsr-cnpc", "fsl,dcsr-cnpc";
+		reg = <0x1000 0x1000 0x1002000 0x10000>;
+	};
+	dcsr-nxc@2000 {
+		compatible = "fsl,dcsr-nxc";
+		reg = <0x2000 0x1000>;
+	};
+	dcsr-corenet {
+		compatible = "fsl,dcsr-corenet";
+		reg = <0x8000 0x1000 0x1A000 0x1000>;
+	};
+	dcsr-ocn@11000 {
+		compatible = "fsl,t2080-dcsr-ocn", "fsl,dcsr-ocn";
+		reg = <0x11000 0x1000>;
+	};
+	dcsr-ddr@12000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr1>;
+		reg = <0x12000 0x1000>;
+	};
+	dcsr-nal@18000 {
+		compatible = "fsl,t2080-dcsr-nal", "fsl,dcsr-nal";
+		reg = <0x18000 0x1000>;
+	};
+	dcsr-rcpm@22000 {
+		compatible = "fsl,t2080-dcsr-rcpm", "fsl,dcsr-rcpm";
+		reg = <0x22000 0x1000>;
+	};
+	dcsr-snpc@30000 {
+		compatible = "fsl,t2080-dcsr-snpc", "fsl,dcsr-snpc";
+		reg = <0x30000 0x1000 0x1022000 0x10000>;
+	};
+	dcsr-snpc@31000 {
+		compatible = "fsl,t2080-dcsr-snpc", "fsl,dcsr-snpc";
+		reg = <0x31000 0x1000 0x1042000 0x10000>;
+	};
+	dcsr-snpc@32000 {
+		compatible = "fsl,t2080-dcsr-snpc", "fsl,dcsr-snpc";
+		reg = <0x32000 0x1000 0x1062000 0x10000>;
+	};
+	dcsr-cpu-sb-proxy@100000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu0>;
+		reg = <0x100000 0x1000 0x101000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@108000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu1>;
+		reg = <0x108000 0x1000 0x109000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@110000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu2>;
+		reg = <0x110000 0x1000 0x111000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@118000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu3>;
+		reg = <0x118000 0x1000 0x119000 0x1000>;
+	};
+};
+
+&bportals {
+	#address-cells = <0x1>;
+	#size-cells = <0x1>;
+	compatible = "simple-bus";
+
+	bman-portal@0 {
+		compatible = "fsl,bman-portal";
+		reg = <0x0 0x4000>, <0x1000000 0x1000>;
+		interrupts = <105 2 0 0>;
+	};
+	bman-portal@4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+		interrupts = <107 2 0 0>;
+	};
+	bman-portal@8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+		interrupts = <109 2 0 0>;
+	};
+	bman-portal@c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+		interrupts = <111 2 0 0>;
+	};
+	bman-portal@10000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+		interrupts = <113 2 0 0>;
+	};
+	bman-portal@14000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+		interrupts = <115 2 0 0>;
+	};
+	bman-portal@18000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+		interrupts = <117 2 0 0>;
+	};
+	bman-portal@1c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+		interrupts = <119 2 0 0>;
+	};
+	bman-portal@20000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+		interrupts = <121 2 0 0>;
+	};
+	bman-portal@24000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+		interrupts = <123 2 0 0>;
+	};
+	bman-portal@28000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+		interrupts = <125 2 0 0>;
+	};
+	bman-portal@2c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+		interrupts = <127 2 0 0>;
+	};
+	bman-portal@30000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+		interrupts = <129 2 0 0>;
+	};
+	bman-portal@34000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+		interrupts = <131 2 0 0>;
+	};
+	bman-portal@38000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+		interrupts = <133 2 0 0>;
+	};
+	bman-portal@3c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+		interrupts = <135 2 0 0>;
+	};
+	bman-portal@40000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+		interrupts = <137 2 0 0>;
+	};
+	bman-portal@44000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+		interrupts = <139 2 0 0>;
+	};
+};
+
+&qportals {
+	#address-cells = <0x1>;
+	#size-cells = <0x1>;
+	compatible = "simple-bus";
+
+	qportal0: qman-portal@0 {
+		compatible = "fsl,qman-portal";
+		reg = <0x0 0x4000>, <0x1000000 0x1000>;
+		interrupts = <104 0x2 0 0>;
+		cell-index = <0x0>;
+	};
+	qportal1: qman-portal@4000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+		interrupts = <106 0x2 0 0>;
+		cell-index = <0x1>;
+	};
+	qportal2: qman-portal@8000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+		interrupts = <108 0x2 0 0>;
+		cell-index = <0x2>;
+	};
+	qportal3: qman-portal@c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+		interrupts = <110 0x2 0 0>;
+		cell-index = <0x3>;
+	};
+	qportal4: qman-portal@10000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+		interrupts = <112 0x2 0 0>;
+		cell-index = <0x4>;
+	};
+	qportal5: qman-portal@14000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+		interrupts = <114 0x2 0 0>;
+		cell-index = <0x5>;
+	};
+	qportal6: qman-portal@18000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+		interrupts = <116 0x2 0 0>;
+		cell-index = <0x6>;
+	};
+	qportal7: qman-portal@1c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+		interrupts = <118 0x2 0 0>;
+		cell-index = <0x7>;
+	};
+	qportal8: qman-portal@20000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+		interrupts = <120 0x2 0 0>;
+		cell-index = <0x8>;
+	};
+	qportal9: qman-portal@24000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+		interrupts = <122 0x2 0 0>;
+		cell-index = <0x9>;
+	};
+	qportal10: qman-portal@28000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+		interrupts = <124 0x2 0 0>;
+		cell-index = <0xa>;
+	};
+	qportal11: qman-portal@2c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+		interrupts = <126 0x2 0 0>;
+		cell-index = <0xb>;
+	};
+	qportal12: qman-portal@30000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+		interrupts = <128 0x2 0 0>;
+		cell-index = <0xc>;
+	};
+	qportal13: qman-portal@34000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+		interrupts = <130 0x2 0 0>;
+		cell-index = <0xd>;
+	};
+	qportal14: qman-portal@38000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+		interrupts = <132 0x2 0 0>;
+		cell-index = <0xe>;
+	};
+	qportal15: qman-portal@3c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+		interrupts = <134 0x2 0 0>;
+		cell-index = <0xf>;
+	};
+	qportal16: qman-portal@40000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+		interrupts = <136 0x2 0 0>;
+		cell-index = <0x10>;
+	};
+	qportal17: qman-portal@44000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+		interrupts = <138 0x2 0 0>;
+		cell-index = <0x11>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "simple-bus";
+
+	soc-sram-error {
+		compatible = "fsl,soc-sram-error";
+		interrupts = <16 2 1 29>;
+	};
+
+	corenet-law@0 {
+		compatible = "fsl,corenet-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <32>;
+	};
+
+	ddr1: memory-controller@8000 {
+		compatible = "fsl,qoriq-memory-controller-v4.7",
+				"fsl,qoriq-memory-controller";
+		reg = <0x8000 0x1000>;
+		interrupts = <16 2 1 23>;
+	};
+
+	cpc: l3-cache-controller@10000 {
+		compatible = "fsl,t2080-l3-cache-controller", "cache";
+		reg = <0x10000 0x1000
+		       0x11000 0x1000
+		       0x12000 0x1000>;
+		interrupts = <16 2 1 27
+			      16 2 1 26
+			      16 2 1 25>;
+	};
+
+	corenet-cf@18000 {
+		compatible = "fsl,corenet2-cf", "fsl,corenet-cf";
+		reg = <0x18000 0x1000>;
+		interrupts = <16 2 1 31>;
+		fsl,ccf-num-csdids = <32>;
+		fsl,ccf-num-snoopids = <32>;
+	};
+
+	iommu@20000 {
+		compatible = "fsl,pamu-v1.0", "fsl,pamu";
+		reg = <0x20000 0x3000>;
+		fsl,portid-mapping = <0x8000>;
+		ranges = <0 0x20000 0x3000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupts = <
+			24 2 0 0
+			16 2 1 30>;
+
+		pamu0: pamu@0 {
+			reg = <0 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu1: pamu@1000 {
+			reg = <0x1000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu2: pamu@2000 {
+			reg = <0x2000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+	};
+
+/include/ "qoriq-mpic4.3.dtsi"
+
+	guts: global-utilities@e0000 {
+		compatible = "fsl,t2080-device-config", "fsl,qoriq-device-config-2.0";
+		reg = <0xe0000 0xe00>;
+		fsl,has-rstcr;
+		fsl,liodn-bits = <12>;
+	};
+
+/include/ "qoriq-clockgen2.dtsi"
+	global-utilities@e1000 {
+		compatible = "fsl,t2080-clockgen", "fsl,qoriq-clockgen-2.0";
+	};
+
+	rcpm: global-utilities@e2000 {
+		compatible = "fsl,t2080-rcpm", "fsl,qoriq-rcpm-2.0";
+		reg = <0xe2000 0x1000>;
+	};
+
+	sfp: sfp@e8000 {
+		compatible = "fsl,t2080-sfp";
+		reg = <0xe8000 0x1000>;
+	};
+
+	serdes: serdes@ea000 {
+		compatible = "fsl,t2080-serdes";
+		reg = <0xea000 0x4000>;
+	};
+
+/include/ "elo3-dma-0.dtsi"
+	dma@100300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x580>; /* DMA1LIODNR */
+	};
+/include/ "elo3-dma-1.dtsi"
+	dma@101300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */
+	};
+/include/ "elo3-dma-2.dtsi"
+	dma@102300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x588>; /* DMA3LIODNR */
+	};
+
+/include/ "qoriq-espi-0.dtsi"
+	spi@110000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "qoriq-esdhc-0.dtsi"
+	sdhc@114000 {
+		compatible = "fsl,t2080-esdhc", "fsl,esdhc";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x530>; /* SDMMCLIODNR */
+		sdhci,auto-cmd12;
+	};
+/include/ "qoriq-i2c-0.dtsi"
+/include/ "qoriq-i2c-1.dtsi"
+/include/ "qoriq-duart-0.dtsi"
+/include/ "qoriq-duart-1.dtsi"
+/include/ "qoriq-gpio-0.dtsi"
+/include/ "qoriq-gpio-1.dtsi"
+/include/ "qoriq-gpio-2.dtsi"
+/include/ "qoriq-gpio-3.dtsi"
+/include/ "qoriq-usb2-mph-0.dtsi"
+	usb0: usb@210000 {
+		compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */
+		phy_type = "utmi";
+		port0;
+	};
+/include/ "qoriq-usb2-dr-0.dtsi"
+	usb1: usb@211000 {
+		compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x524>; /* USB1LIODNR */
+		dr_mode = "host";
+		phy_type = "utmi";
+	};
+/include/ "qoriq-sec5.2-0.dtsi"
+/include/ "qoriq-qman3.dtsi"
+/include/ "qoriq-bman1.dtsi"
+
+/include/ "qoriq-fman3-0.dtsi"
+/include/ "qoriq-fman3-0-10g-2.dtsi"
+/include/ "qoriq-fman3-0-10g-3.dtsi"
+/include/ "qoriq-fman3-0-1g-2.dtsi"
+/include/ "qoriq-fman3-0-1g-3.dtsi"
+/include/ "qoriq-fman3-0-1g-4.dtsi"
+/include/ "qoriq-fman3-0-1g-5.dtsi"
+/include/ "qoriq-fman3-0-10g-0.dtsi"
+/include/ "qoriq-fman3-0-10g-1.dtsi"
+	fman@400000 {
+		enet0: ethernet@e0000 {
+		};
+
+		enet1: ethernet@e2000 {
+		};
+
+		enet2: ethernet@e4000 {
+		};
+
+		enet3: ethernet@e6000 {
+		};
+
+		enet4: ethernet@e8000 {
+		};
+
+		enet5: ethernet@ea000 {
+		};
+
+		enet6: ethernet@f0000 {
+		};
+
+		enet7: ethernet@f2000 {
+		};
+
+		mdio@fc000 {
+			interrupts = <100 1 0 0>;
+		};
+
+		mdio@fd000 {
+			interrupts = <101 1 0 0>;
+		};
+	};
+
+	L2_1: l2-cache-controller@c20000 {
+		/* Cluster 0 L2 cache */
+		compatible = "fsl,t2080-l2-cache-controller";
+		reg = <0xc20000 0x40000>;
+		next-level-cache = <&cpc>;
+		interrupts = <16 2 1 9>;
+	};
+};
+
+&fman0_rx_0x08 {
+	/delete-property/ fsl,fman-10g-port;
+};
+
+&fman0_tx_0x28 {
+	/delete-property/ fsl,fman-10g-port;
+};
+
+&fman0_rx_0x09 {
+	/delete-property/ fsl,fman-10g-port;
+};
+
+&fman0_tx_0x29 {
+	/delete-property/ fsl,fman-10g-port;
+};
diff --git a/arch/powerpc/boot/dts/fsl/t208xqds.dtsi b/arch/powerpc/boot/dts/fsl/t208xqds.dtsi
new file mode 100644
index 0000000000..962c999416
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t208xqds.dtsi
@@ -0,0 +1,277 @@
+/*
+ * T2080/T2081 QDS Device Tree Source
+ *
+ * Copyright 2013 - 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/ {
+	model = "fsl,T2080QDS";
+	compatible = "fsl,T2080QDS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xff800000 0x00010000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x2 0x0 0x10000>;
+		};
+
+		boardctrl: board-control@3,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,fpga-qixis";
+			reg = <3 0 0x300>;
+			ranges = <0 3 0 0x300>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
+	qportals: qman-portals@ff6000000 {
+		ranges = <0x0 0xf 0xf6000000 0x2000000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "micron,n25q128a11", "jedec,spi-nor"; /* 16MB */
+				reg = <0>;
+				spi-max-frequency = <40000000>; /* input clock */
+			};
+
+			flash@1 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "sst,sst25wf040", "jedec,spi-nor";
+				reg = <1>;
+				spi-max-frequency = <35000000>;
+			};
+
+			flash@2 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "eon,en25s64", "jedec,spi-nor";
+				reg = <2>;
+				spi-max-frequency = <35000000>;
+			};
+		};
+
+		i2c@118000 {
+			i2c-mux@77 {
+				compatible = "nxp,pca9547";
+				reg = <0x77>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				i2c@0 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x0>;
+
+					eeprom@50 {
+						compatible = "atmel,24c512";
+						reg = <0x50>;
+					};
+
+					eeprom@51 {
+						compatible = "atmel,24c02";
+						reg = <0x51>;
+					};
+
+					eeprom@57 {
+						compatible = "atmel,24c02";
+						reg = <0x57>;
+					};
+
+					rtc@68 {
+						compatible = "dallas,ds3232";
+						reg = <0x68>;
+						interrupts = <0xb 0x1 0 0>;
+					};
+				};
+
+				i2c@1 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x1>;
+
+					eeprom@55 {
+						compatible = "atmel,24c02";
+						reg = <0x55>;
+					};
+				};
+
+				i2c@2 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x2>;
+
+					ina220@40 {
+						compatible = "ti,ina220";
+						reg = <0x40>;
+						shunt-resistor = <1000>;
+					};
+
+					ina220@41 {
+						compatible = "ti,ina220";
+						reg = <0x41>;
+						shunt-resistor = <1000>;
+					};
+				};
+
+				i2c@3 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x3>;
+
+					adt7461@4c {
+						compatible = "adi,adt7461";
+						reg = <0x4c>;
+					};
+				};
+			};
+		};
+
+		sdhc@114000 {
+			voltage-ranges = <1800 1800 3300 3300>;
+		};
+	};
+
+	pci0: pcie@ffe240000 {
+		reg = <0xf 0xfe240000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe250000 {
+		reg = <0xf 0xfe250000 0 0x10000>;
+		ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x10000000
+			  0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe260000 {
+		reg = <0xf 0xfe260000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci3: pcie@ffe270000 {
+		reg = <0xf 0xfe270000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi
new file mode 100644
index 0000000000..ecc3e8c739
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi
@@ -0,0 +1,211 @@
+/*
+ * T2080PCIe-RDB Board Device Tree Source
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/ {
+	model = "fsl,T2080RDB";
+	compatible = "fsl,T2080RDB";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xff800000 0x00010000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x2 0x0 0x10000>;
+		};
+
+		boardctrl: board-control@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,t2080-cpld";
+			reg = <3 0 0x300>;
+			ranges = <0 3 0 0x300>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
+	qportals: qman-portals@ff6000000 {
+		ranges = <0x0 0xf 0xf6000000 0x2000000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "micron,n25q512ax3", "jedec,spi-nor";
+				reg = <0>;
+				spi-max-frequency = <10000000>; /* input clock */
+			};
+		};
+
+		i2c@118000 {
+			adt7481@4c {
+				compatible = "adi,adt7481";
+				reg = <0x4c>;
+			};
+
+			rtc@68 {
+				compatible = "dallas,ds1339";
+				reg = <0x68>;
+				interrupts = <0x1 0x1 0 0>;
+			};
+
+			eeprom@50 {
+				compatible = "atmel,24c256";
+				reg = <0x50>;
+			};
+		};
+
+		i2c@118100 {
+			i2c-mux@77 {
+				compatible = "nxp,pca9546";
+				reg = <0x77>;
+			};
+		};
+
+		sdhc@114000 {
+			voltage-ranges = <1800 1800 3300 3300>;
+		};
+	};
+
+	pci0: pcie@ffe240000 {
+		reg = <0xf 0xfe240000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe250000 {
+		reg = <0xf 0xfe250000 0 0x10000>;
+		ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x10000000
+			  0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe260000 {
+		reg = <0xf 0xfe260000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci3: pcie@ffe270000 {
+		reg = <0xf 0xfe270000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi b/arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi
new file mode 100644
index 0000000000..3f745de442
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi
@@ -0,0 +1,110 @@
+/*
+ * T2080/T2081 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e6500_power_isa.dtsi"
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		ccsr = &soc;
+		dcsr = &dcsr;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
+
+		crypto = &crypto;
+
+		fman0 = &fman0;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+		ethernet4 = &enet4;
+		ethernet5 = &enet5;
+		ethernet6 = &enet6;
+		ethernet7 = &enet7;
+
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+		pci3 = &pci3;
+		usb0 = &usb0;
+		usb1 = &usb1;
+		dma0 = &dma0;
+		dma1 = &dma1;
+		dma2 = &dma2;
+		sdhc = &sdhc;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,e6500@0 {
+			device_type = "cpu";
+			reg = <0 1>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
+		};
+		cpu1: PowerPC,e6500@2 {
+			device_type = "cpu";
+			reg = <2 3>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
+		};
+		cpu2: PowerPC,e6500@4 {
+			device_type = "cpu";
+			reg = <4 5>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
+		};
+		cpu3: PowerPC,e6500@6 {
+			device_type = "cpu";
+			reg = <6 7>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t4240qds.dts b/arch/powerpc/boot/dts/fsl/t4240qds.dts
new file mode 100644
index 0000000000..128b5798bb
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t4240qds.dts
@@ -0,0 +1,708 @@
+/*
+ * T4240QDS Device Tree Source
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t4240si-pre.dtsi"
+
+/ {
+	model = "fsl,T4240QDS";
+	compatible = "fsl,T4240QDS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		phy_rgmii1 = &phyrgmii1;
+		phy_rgmii2 = &phyrgmii2;
+		phy_sgmii3 = &phy3;
+		phy_sgmii4 = &phy4;
+		phy_sgmii11 = &phy11;
+		phy_sgmii12 = &phy12;
+		sgmii_phy11 = &sgmiiphy11;
+		sgmii_phy12 = &sgmiiphy12;
+		sgmii_phy13 = &sgmiiphy13;
+		sgmii_phy14 = &sgmiiphy14;
+		sgmii_phy21 = &sgmiiphy21;
+		sgmii_phy22 = &sgmiiphy22;
+		sgmii_phy23 = &sgmiiphy23;
+		sgmii_phy24 = &sgmiiphy24;
+		sgmii_phy31 = &sgmiiphy31;
+		sgmii_phy32 = &sgmiiphy32;
+		sgmii_phy33 = &sgmiiphy33;
+		sgmii_phy34 = &sgmiiphy34;
+		sgmii_phy41 = &sgmiiphy41;
+		sgmii_phy42 = &sgmiiphy42;
+		sgmii_phy43 = &sgmiiphy43;
+		sgmii_phy44 = &sgmiiphy44;
+		phy_xfi1 = &xfiphy1;
+		phy_xfi2 = &xfiphy2;
+		phy_xfi3 = &xfiphy3;
+		phy_xfi4 = &xfiphy4;
+		xfi_pcs_mdio1 = &xfimdio0;
+		xfi_pcs_mdio2 = &xfimdio1;
+		xfi_pcs_mdio3 = &xfimdio2;
+		xfi_pcs_mdio4 = &xfimdio3;
+		emi1_rgmii = &t4240mdio0;
+		emi1_slot1 = &t4240mdio1;
+		emi1_slot2 = &t4240mdio2;
+		emi1_slot3 = &t4240mdio3;
+		emi1_slot4 = &t4240mdio4;
+	};
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xff800000 0x00010000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x2 0x0 0x10000>;
+
+			partition@0 {
+				/* This location must not be altered  */
+				/* 1MB for u-boot Bootloader Image */
+				reg = <0x0 0x00100000>;
+				label = "NAND U-Boot Image";
+				read-only;
+			};
+
+			partition@100000 {
+				/* 1MB for DTB Image */
+				reg = <0x00100000 0x00100000>;
+				label = "NAND DTB Image";
+			};
+
+			partition@200000 {
+				/* 10MB for Linux Kernel Image */
+				reg = <0x00200000 0x00A00000>;
+				label = "NAND Linux Kernel Image";
+			};
+
+			partition@C00000 {
+				/* 500MB for Root file System Image */
+				reg = <0x00c00000 0x1F400000>;
+				label = "NAND RFS Image";
+			};
+		};
+
+		board-control@3,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,t4240qds-fpga", "fsl,fpga-qixis";
+			reg = <3 0 0x300>;
+			ranges = <0 3 0 0x300>;
+
+			mdio-mux-emi1 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "mdio-mux-mmioreg", "mdio-mux";
+				mdio-parent-bus = <&mdio1>;
+				reg = <0x54 1>;
+				mux-mask = <0xe0>;
+
+				t4240mdio0: mdio@0 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0>;
+
+					phyrgmii1: ethernet-phy@1 {
+						reg = <0x1>;
+					};
+
+					phyrgmii2: ethernet-phy@2 {
+						reg = <0x2>;
+					};
+				};
+
+				t4240mdio1: mdio@20 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x20>;
+					status = "disabled";
+
+					phy1: ethernet-phy@0 {
+						reg = <0x0>;
+					};
+
+					phy2: ethernet-phy@1 {
+						reg = <0x1>;
+					};
+
+					phy3: ethernet-phy@2 {
+						reg = <0x2>;
+					};
+
+					phy4: ethernet-phy@3 {
+						reg = <0x3>;
+					};
+
+					sgmiiphy11: ethernet-phy@1c {
+						reg = <0x1c>;
+					};
+
+					sgmiiphy12: ethernet-phy@1d {
+						reg = <0x1d>;
+					};
+
+					sgmiiphy13: ethernet-phy@1e {
+						reg = <0x1e>;
+					};
+
+					sgmiiphy14: ethernet-phy@1f {
+						reg = <0x1f>;
+					};
+				};
+
+				t4240mdio2: mdio@40 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x40>;
+					status = "disabled";
+
+					phy5: ethernet-phy@4 {
+						reg = <0x4>;
+					};
+
+					phy6: ethernet-phy@5 {
+						reg = <0x5>;
+					};
+
+					phy7: ethernet-phy@6 {
+						reg = <0x6>;
+					};
+
+					phy8: ethernet-phy@7 {
+						reg = <0x7>;
+					};
+
+					sgmiiphy21: ethernet-phy@1c {
+						reg = <0x1c>;
+					};
+
+					sgmiiphy22: ethernet-phy@1d {
+						reg = <0x1d>;
+					};
+
+					sgmiiphy23: ethernet-phy@1e {
+						reg = <0x1e>;
+					};
+
+					sgmiiphy24: ethernet-phy@1f {
+						reg = <0x1f>;
+					};
+				};
+
+				t4240mdio3: mdio@60 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x60>;
+					status = "disabled";
+
+					phy9: ethernet-phy@8 {
+						reg = <0x8>;
+					};
+
+					phy10: ethernet-phy@9 {
+						reg = <0x9>;
+					};
+
+					phy11: ethernet-phy@a {
+						reg = <0xa>;
+					};
+
+					phy12: ethernet-phy@b {
+						reg = <0xb>;
+					};
+
+					sgmiiphy31: ethernet-phy@1c {
+						reg = <0x1c>;
+					};
+
+					sgmiiphy32: ethernet-phy@1d {
+						reg = <0x1d>;
+					};
+
+					sgmiiphy33: ethernet-phy@1e {
+						reg = <0x1e>;
+					};
+
+					sgmiiphy34: ethernet-phy@1f {
+						reg = <0x1f>;
+					};
+				};
+
+				t4240mdio4: mdio@80 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x80>;
+					status = "disabled";
+
+					phy13: ethernet-phy@c {
+						reg = <0xc>;
+					};
+
+					phy14: ethernet-phy@d {
+						reg = <0xd>;
+					};
+
+					phy15: ethernet-phy@e {
+						reg = <0xe>;
+					};
+
+					phy16: ethernet-phy@f {
+						reg = <0xf>;
+					};
+
+					sgmiiphy41: ethernet-phy@1c {
+						reg = <0x1c>;
+					};
+
+					sgmiiphy42: ethernet-phy@1d {
+						reg = <0x1d>;
+					};
+
+					sgmiiphy43: ethernet-phy@1e {
+						reg = <0x1e>;
+					};
+
+					sgmiiphy44: ethernet-phy@1f {
+						reg = <0x1f>;
+					};
+				};
+			};
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
+	qportals: qman-portals@ff6000000 {
+		ranges = <0x0 0xf 0xf6000000 0x2000000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "sst,sst25wf040", "jedec,spi-nor";
+				reg = <0>;
+				spi-max-frequency = <40000000>; /* input clock */
+			};
+		};
+
+		i2c@118000 {
+			mux@77 {
+				compatible = "nxp,pca9547";
+				reg = <0x77>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				i2c@0 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0>;
+
+					eeprom@51 {
+						compatible = "atmel,24c256";
+						reg = <0x51>;
+					};
+					eeprom@52 {
+						compatible = "atmel,24c256";
+						reg = <0x52>;
+					};
+					eeprom@53 {
+						compatible = "atmel,24c256";
+						reg = <0x53>;
+					};
+					eeprom@54 {
+						compatible = "atmel,24c256";
+						reg = <0x54>;
+					};
+					eeprom@55 {
+						compatible = "atmel,24c256";
+						reg = <0x55>;
+					};
+					eeprom@56 {
+						compatible = "atmel,24c256";
+						reg = <0x56>;
+					};
+					rtc@68 {
+						compatible = "dallas,ds3232";
+						reg = <0x68>;
+						interrupts = <0x1 0x1 0 0>;
+					};
+				};
+
+				i2c@2 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x2>;
+
+					ina220@40 {
+						compatible = "ti,ina220";
+						reg = <0x40>;
+						shunt-resistor = <1000>;
+					};
+
+					ina220@41 {
+						compatible = "ti,ina220";
+						reg = <0x41>;
+						shunt-resistor = <1000>;
+					};
+
+					ina220@44 {
+						compatible = "ti,ina220";
+						reg = <0x44>;
+						shunt-resistor = <1000>;
+					};
+
+					ina220@45 {
+						compatible = "ti,ina220";
+						reg = <0x45>;
+						shunt-resistor = <1000>;
+					};
+
+					ina220@46 {
+						compatible = "ti,ina220";
+						reg = <0x46>;
+						shunt-resistor = <1000>;
+					};
+
+					ina220@47 {
+						compatible = "ti,ina220";
+						reg = <0x47>;
+						shunt-resistor = <1000>;
+					};
+				};
+			};
+		};
+
+		sdhc@114000 {
+			voltage-ranges = <1800 1800 3300 3300>;
+		};
+
+		fman@400000 {
+			port@83000 {
+				status = "disabled";
+			};
+
+			port@84000 {
+				status = "disabled";
+			};
+
+			port@85000 {
+				status = "disabled";
+			};
+
+			port@86000 {
+				status = "disabled";
+			};
+
+			port@87000 {
+				status = "disabled";
+			};
+
+			ethernet@e0000 {
+				phy-handle = <&phy5>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e2000 {
+				phy-handle = <&phy6>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e4000 {
+				phy-handle = <&phy7>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e6000 {
+				phy-handle = <&phy8>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e8000 {
+				phy-handle = <&phyrgmii2>;
+				phy-connection-type = "rgmii";
+			};
+
+			ethernet@ea000 {
+				phy-handle = <&phy2>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@f0000 {
+				phy-handle = <&xauiphy1>;
+				phy-connection-type = "xgmii";
+			};
+
+			ethernet@f2000 {
+				phy-handle = <&xauiphy2>;
+				phy-connection-type = "xgmii";
+			};
+
+			xfimdio0: mdio@f1000 {
+				status = "disabled";
+
+				xfiphy1: ethernet-phy@0 {
+					compatible = "ethernet-phy-ieee802.3-c45";
+					reg = <0x0>;
+				};
+			};
+
+			xfimdio1: mdio@f3000 {
+				status = "disabled";
+
+				xfiphy2: ethernet-phy@0 {
+					compatible = "ethernet-phy-ieee802.3-c45";
+					reg = <0x0>;
+				};
+			};
+		};
+
+		fman@500000 {
+			port@84000 {
+				status = "disabled";
+			};
+
+			port@85000 {
+				status = "disabled";
+			};
+
+			port@86000 {
+				status = "disabled";
+			};
+
+			port@87000 {
+				status = "disabled";
+			};
+
+			ethernet@e0000 {
+				phy-handle = <&phy13>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e2000 {
+				phy-handle = <&phy14>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e4000 {
+				phy-handle = <&phy15>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e6000 {
+				phy-handle = <&phy16>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e8000 {
+				phy-handle = <&phyrgmii1>;
+				phy-connection-type = "rgmii";
+			};
+
+			ethernet@ea000 {
+				phy-handle = <&phy10>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@f0000 {
+				phy-handle = <&xauiphy3>;
+				phy-connection-type = "xgmii";
+			};
+
+			ethernet@f2000 {
+				phy-handle = <&xauiphy4>;
+				phy-connection-type = "xgmii";
+			};
+
+			xfimdio2: mdio@f1000 {
+				status = "disabled";
+
+				xfiphy3: ethernet-phy@0 {
+					compatible = "ethernet-phy-ieee802.3-c45";
+					reg = <0x0>;
+				};
+			};
+
+			xfimdio3: mdio@f3000 {
+				status = "disabled";
+
+				xfiphy4: ethernet-phy@0 {
+					compatible = "ethernet-phy-ieee802.3-c45";
+					reg = <0x0>;
+				};
+			};
+
+			mdio@fd000 {
+				xauiphy1: ethernet-phy@0 {
+					compatible = "ethernet-phy-ieee802.3-c45";
+					reg = <0x0>;
+				};
+
+				xauiphy2: ethernet-phy@1 {
+					compatible = "ethernet-phy-ieee802.3-c45";
+					reg = <0x1>;
+				};
+
+				xauiphy3: ethernet-phy@2 {
+					compatible = "ethernet-phy-ieee802.3-c45";
+					reg = <0x2>;
+				};
+
+				xauiphy4: ethernet-phy@3 {
+					compatible = "ethernet-phy-ieee802.3-c45";
+					reg = <0x3>;
+				};
+			};
+		};
+	};
+
+	pci0: pcie@ffe240000 {
+		reg = <0xf 0xfe240000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe250000 {
+		reg = <0xf 0xfe250000 0 0x10000>;
+		ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+			  0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe260000 {
+		reg = <0xf 0xfe260000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci3: pcie@ffe270000 {
+		reg = <0xf 0xfe270000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x60000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+	rio: rapidio@ffe0c0000 {
+		reg = <0xf 0xfe0c0000 0 0x11000>;
+
+		port1 {
+			ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+		};
+		port2 {
+			ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+		};
+	};
+};
+
+/include/ "t4240si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t4240rdb.dts b/arch/powerpc/boot/dts/fsl/t4240rdb.dts
new file mode 100644
index 0000000000..145896f2ee
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t4240rdb.dts
@@ -0,0 +1,363 @@
+/*
+ * T4240RDB Device Tree Source
+ *
+ * Copyright 2014 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t4240si-pre.dtsi"
+
+/ {
+	model = "fsl,T4240RDB";
+	compatible = "fsl,T4240RDB";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		sgmii_phy21 = &sgmiiphy21;
+		sgmii_phy22 = &sgmiiphy22;
+		sgmii_phy23 = &sgmiiphy23;
+		sgmii_phy24 = &sgmiiphy24;
+		sgmii_phy41 = &sgmiiphy41;
+		sgmii_phy42 = &sgmiiphy42;
+		sgmii_phy43 = &sgmiiphy43;
+		sgmii_phy44 = &sgmiiphy44;
+	};
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xff800000 0x00010000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x2 0x0 0x10000>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
+	qportals: qman-portals@ff6000000 {
+		ranges = <0x0 0xf 0xf6000000 0x2000000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "sst,sst25wf040", "jedec,spi-nor";
+				reg = <0>;
+				spi-max-frequency = <40000000>; /* input clock */
+			};
+		};
+
+		i2c@118000 {
+			hwmon@2f {
+				compatible = "winbond,w83793";
+				reg = <0x2f>;
+			};
+			eeprom@52 {
+				compatible = "atmel,24c256";
+				reg = <0x52>;
+			};
+			eeprom@54 {
+				compatible = "atmel,24c256";
+				reg = <0x54>;
+			};
+			eeprom@56 {
+				compatible = "atmel,24c256";
+				reg = <0x56>;
+			};
+			rtc@68 {
+				compatible = "dallas,ds1374";
+				reg = <0x68>;
+			};
+		};
+
+		sdhc@114000 {
+			voltage-ranges = <1800 1800 3300 3300>;
+		};
+
+		fman@400000 {
+			ethernet@e0000 {
+				phy-handle = <&sgmiiphy21>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e2000 {
+				phy-handle = <&sgmiiphy22>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e4000 {
+				phy-handle = <&sgmiiphy23>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e6000 {
+				phy-handle = <&sgmiiphy24>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e8000 {
+				status = "disabled";
+			};
+
+			ethernet@ea000 {
+				status = "disabled";
+			};
+
+			ethernet@f0000 {
+				phy-handle = <&xfiphy1>;
+				phy-connection-type = "xgmii";
+			};
+
+			ethernet@f2000 {
+				phy-handle = <&xfiphy2>;
+				phy-connection-type = "xgmii";
+			};
+		};
+
+		fman@500000 {
+			ethernet@e0000 {
+				phy-handle = <&sgmiiphy41>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e2000 {
+				phy-handle = <&sgmiiphy42>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e4000 {
+				phy-handle = <&sgmiiphy43>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e6000 {
+				phy-handle = <&sgmiiphy44>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e8000 {
+				status = "disabled";
+			};
+
+			ethernet@ea000 {
+				status = "disabled";
+			};
+
+			ethernet@f0000 {
+				phy-handle = <&xfiphy3>;
+				phy-connection-type = "xgmii";
+			};
+
+			ethernet@f2000 {
+				phy-handle = <&xfiphy4>;
+				phy-connection-type = "xgmii";
+			};
+
+			mdio@fc000 {
+				sgmiiphy21: ethernet-phy@0 {
+					reg = <0x0>;
+				};
+
+				sgmiiphy22: ethernet-phy@1 {
+					reg = <0x1>;
+				};
+
+				sgmiiphy23: ethernet-phy@2 {
+					reg = <0x2>;
+				};
+
+				sgmiiphy24: ethernet-phy@3 {
+					reg = <0x3>;
+				};
+
+				sgmiiphy41: ethernet-phy@4 {
+					reg = <0x4>;
+				};
+
+				sgmiiphy42: ethernet-phy@5 {
+					reg = <0x5>;
+				};
+
+				sgmiiphy43: ethernet-phy@6 {
+					reg = <0x6>;
+				};
+
+				sgmiiphy44: ethernet-phy@7 {
+					reg = <0x7>;
+				};
+			};
+
+			mdio@fd000 {
+				xfiphy1: ethernet-phy@10 {
+					compatible = "ethernet-phy-id13e5.1002";
+					reg = <0x10>;
+				};
+
+				xfiphy2: ethernet-phy@11 {
+					compatible = "ethernet-phy-id13e5.1002";
+					reg = <0x11>;
+				};
+
+				xfiphy3: ethernet-phy@13 {
+					compatible = "ethernet-phy-id13e5.1002";
+					reg = <0x13>;
+				};
+
+				xfiphy4: ethernet-phy@12 {
+					compatible = "ethernet-phy-id13e5.1002";
+					reg = <0x12>;
+				};
+			};
+		};
+	};
+
+	pci0: pcie@ffe240000 {
+		reg = <0xf 0xfe240000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe250000 {
+		reg = <0xf 0xfe250000 0 0x10000>;
+		ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+			  0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe260000 {
+		reg = <0xf 0xfe260000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci3: pcie@ffe270000 {
+		reg = <0xf 0xfe270000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x60000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	rio: rapidio@ffe0c0000 {
+		reg = <0xf 0xfe0c0000 0 0x11000>;
+
+		port1 {
+			ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+		};
+		port2 {
+			ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+		};
+	};
+};
+
+/include/ "t4240si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
new file mode 100644
index 0000000000..fcac73486d
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
@@ -0,0 +1,1111 @@
+/*
+ * T4240 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&bman_fbpr {
+	compatible = "fsl,bman-fbpr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_fqd {
+	compatible = "fsl,qman-fqd";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+	compatible = "fsl,qman-pfdr";
+	alloc-ranges = <0 0 0x10000 0>;
+};
+
+&ifc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,ifc", "simple-bus";
+	interrupts = <25 2 0 0>;
+};
+
+/* controller at 0x240000 */
+&pci0 {
+	compatible = "fsl,t4240-pcie", "fsl,qoriq-pcie-v3.0";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <20 2 0 0>;
+	pcie@0 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		reg = <0 0 0 0 0>;
+		interrupts = <20 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 40 1 0 0
+			0000 0 0 2 &mpic 1 1 0 0
+			0000 0 0 3 &mpic 2 1 0 0
+			0000 0 0 4 &mpic 3 1 0 0
+			>;
+	};
+};
+
+/* controller at 0x250000 */
+&pci1 {
+	compatible = "fsl,t4240-pcie", "fsl,qoriq-pcie-v3.0";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 0xff>;
+	interrupts = <21 2 0 0>;
+	pcie@0 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		reg = <0 0 0 0 0>;
+		interrupts = <21 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 41 1 0 0
+			0000 0 0 2 &mpic 5 1 0 0
+			0000 0 0 3 &mpic 6 1 0 0
+			0000 0 0 4 &mpic 7 1 0 0
+			>;
+	};
+};
+
+/* controller at 0x260000 */
+&pci2 {
+	compatible = "fsl,t4240-pcie", "fsl,qoriq-pcie-v3.0";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <22 2 0 0>;
+	pcie@0 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		reg = <0 0 0 0 0>;
+		interrupts = <22 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 42 1 0 0
+			0000 0 0 2 &mpic 9 1 0 0
+			0000 0 0 3 &mpic 10 1 0 0
+			0000 0 0 4 &mpic 11 1 0 0
+			>;
+	};
+};
+
+/* controller at 0x270000 */
+&pci3 {
+	compatible = "fsl,t4240-pcie", "fsl,qoriq-pcie-v3.0";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <23 2 0 0>;
+	pcie@0 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		reg = <0 0 0 0 0>;
+		interrupts = <23 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 43 1 0 0
+			0000 0 0 2 &mpic 0 1 0 0
+			0000 0 0 3 &mpic 4 1 0 0
+			0000 0 0 4 &mpic 8 1 0 0
+			>;
+	};
+};
+
+&rio {
+	compatible = "fsl,srio";
+	interrupts = <16 2 1 11>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+	ranges;
+
+	port1 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <1>;
+	};
+
+	port2 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <2>;
+	};
+};
+
+&dcsr {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,dcsr", "simple-bus";
+
+	dcsr-epu@0 {
+		compatible = "fsl,t4240-dcsr-epu", "fsl,dcsr-epu";
+		interrupts = <52 2 0 0
+			      84 2 0 0
+			      85 2 0 0
+			      94 2 0 0
+			      95 2 0 0>;
+		reg = <0x0 0x1000>;
+	};
+	dcsr-npc {
+		compatible = "fsl,t4240-dcsr-cnpc", "fsl,dcsr-cnpc";
+		reg = <0x1000 0x1000 0x1002000 0x10000>;
+	};
+	dcsr-nxc@2000 {
+		compatible = "fsl,dcsr-nxc";
+		reg = <0x2000 0x1000>;
+	};
+	dcsr-corenet {
+		compatible = "fsl,dcsr-corenet";
+		reg = <0x8000 0x1000 0x1A000 0x1000>;
+	};
+	dcsr-dpaa@9000 {
+		compatible = "fsl,t4240-dcsr-dpaa", "fsl,dcsr-dpaa";
+		reg = <0x9000 0x1000>;
+	};
+	dcsr-ocn@11000 {
+		compatible = "fsl,t4240-dcsr-ocn", "fsl,dcsr-ocn";
+		reg = <0x11000 0x1000>;
+	};
+	dcsr-ddr@12000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr1>;
+		reg = <0x12000 0x1000>;
+	};
+	dcsr-ddr@13000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr2>;
+		reg = <0x13000 0x1000>;
+	};
+	dcsr-ddr@14000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr3>;
+		reg = <0x14000 0x1000>;
+	};
+	dcsr-nal@18000 {
+		compatible = "fsl,t4240-dcsr-nal", "fsl,dcsr-nal";
+		reg = <0x18000 0x1000>;
+	};
+	dcsr-rcpm@22000 {
+		compatible = "fsl,t4240-dcsr-rcpm", "fsl,dcsr-rcpm";
+		reg = <0x22000 0x1000>;
+	};
+	dcsr-snpc@30000 {
+		compatible = "fsl,t4240-dcsr-snpc", "fsl,dcsr-snpc";
+		reg = <0x30000 0x1000 0x1022000 0x10000>;
+	};
+	dcsr-snpc@31000 {
+		compatible = "fsl,t4240-dcsr-snpc", "fsl,dcsr-snpc";
+		reg = <0x31000 0x1000 0x1042000 0x10000>;
+	};
+	dcsr-snpc@32000 {
+		compatible = "fsl,t4240-dcsr-snpc", "fsl,dcsr-snpc";
+		reg = <0x32000 0x1000 0x1062000 0x10000>;
+	};
+	dcsr-cpu-sb-proxy@100000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu0>;
+		reg = <0x100000 0x1000 0x101000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@108000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu1>;
+		reg = <0x108000 0x1000 0x109000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@110000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu2>;
+		reg = <0x110000 0x1000 0x111000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@118000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu3>;
+		reg = <0x118000 0x1000 0x119000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@120000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu4>;
+		reg = <0x120000 0x1000 0x121000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@128000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu5>;
+		reg = <0x128000 0x1000 0x129000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@130000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu6>;
+		reg = <0x130000 0x1000 0x131000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@138000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu7>;
+		reg = <0x138000 0x1000 0x139000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@140000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu8>;
+		reg = <0x140000 0x1000 0x141000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@148000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu9>;
+		reg = <0x148000 0x1000 0x149000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@150000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu10>;
+		reg = <0x150000 0x1000 0x151000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@158000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu11>;
+		reg = <0x158000 0x1000 0x159000 0x1000>;
+	};
+};
+
+&bportals {
+	#address-cells = <0x1>;
+	#size-cells = <0x1>;
+	compatible = "simple-bus";
+
+	bman-portal@0 {
+		compatible = "fsl,bman-portal";
+		reg = <0x0 0x4000>, <0x1000000 0x1000>;
+		interrupts = <105 2 0 0>;
+	};
+	bman-portal@4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+		interrupts = <107 2 0 0>;
+	};
+	bman-portal@8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+		interrupts = <109 2 0 0>;
+	};
+	bman-portal@c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+		interrupts = <111 2 0 0>;
+	};
+	bman-portal@10000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+		interrupts = <113 2 0 0>;
+	};
+	bman-portal@14000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+		interrupts = <115 2 0 0>;
+	};
+	bman-portal@18000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+		interrupts = <117 2 0 0>;
+	};
+	bman-portal@1c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+		interrupts = <119 2 0 0>;
+	};
+	bman-portal@20000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+		interrupts = <121 2 0 0>;
+	};
+	bman-portal@24000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+		interrupts = <123 2 0 0>;
+	};
+	bman-portal@28000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+		interrupts = <125 2 0 0>;
+	};
+	bman-portal@2c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+		interrupts = <127 2 0 0>;
+	};
+	bman-portal@30000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+		interrupts = <129 2 0 0>;
+	};
+	bman-portal@34000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+		interrupts = <131 2 0 0>;
+	};
+	bman-portal@38000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+		interrupts = <133 2 0 0>;
+	};
+	bman-portal@3c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+		interrupts = <135 2 0 0>;
+	};
+	bman-portal@40000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+		interrupts = <137 2 0 0>;
+	};
+	bman-portal@44000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+		interrupts = <139 2 0 0>;
+	};
+	bman-portal@48000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x48000 0x4000>, <0x1012000 0x1000>;
+		interrupts = <141 2 0 0>;
+	};
+	bman-portal@4c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x4c000 0x4000>, <0x1013000 0x1000>;
+		interrupts = <143 2 0 0>;
+	};
+	bman-portal@50000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x50000 0x4000>, <0x1014000 0x1000>;
+		interrupts = <145 2 0 0>;
+	};
+	bman-portal@54000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x54000 0x4000>, <0x1015000 0x1000>;
+		interrupts = <147 2 0 0>;
+	};
+	bman-portal@58000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x58000 0x4000>, <0x1016000 0x1000>;
+		interrupts = <149 2 0 0>;
+	};
+	bman-portal@5c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x5c000 0x4000>, <0x1017000 0x1000>;
+		interrupts = <151 2 0 0>;
+	};
+	bman-portal@60000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x60000 0x4000>, <0x1018000 0x1000>;
+		interrupts = <153 2 0 0>;
+	};
+	bman-portal@64000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x64000 0x4000>, <0x1019000 0x1000>;
+		interrupts = <155 2 0 0>;
+	};
+	bman-portal@68000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x68000 0x4000>, <0x101a000 0x1000>;
+		interrupts = <157 2 0 0>;
+	};
+	bman-portal@6c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x6c000 0x4000>, <0x101b000 0x1000>;
+		interrupts = <159 2 0 0>;
+	};
+	bman-portal@70000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x70000 0x4000>, <0x101c000 0x1000>;
+		interrupts = <161 2 0 0>;
+	};
+	bman-portal@74000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x74000 0x4000>, <0x101d000 0x1000>;
+		interrupts = <163 2 0 0>;
+	};
+	bman-portal@78000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x78000 0x4000>, <0x101e000 0x1000>;
+		interrupts = <165 2 0 0>;
+	};
+	bman-portal@7c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x7c000 0x4000>, <0x101f000 0x1000>;
+		interrupts = <167 2 0 0>;
+	};
+	bman-portal@80000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x80000 0x4000>, <0x1020000 0x1000>;
+		interrupts = <169 2 0 0>;
+	};
+	bman-portal@84000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x84000 0x4000>, <0x1021000 0x1000>;
+		interrupts = <171 2 0 0>;
+	};
+	bman-portal@88000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x88000 0x4000>, <0x1022000 0x1000>;
+		interrupts = <173 2 0 0>;
+	};
+	bman-portal@8c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x8c000 0x4000>, <0x1023000 0x1000>;
+		interrupts = <175 2 0 0>;
+	};
+	bman-portal@90000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x90000 0x4000>, <0x1024000 0x1000>;
+		interrupts = <385 2 0 0>;
+	};
+	bman-portal@94000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x94000 0x4000>, <0x1025000 0x1000>;
+		interrupts = <387 2 0 0>;
+	};
+	bman-portal@98000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x98000 0x4000>, <0x1026000 0x1000>;
+		interrupts = <389 2 0 0>;
+	};
+	bman-portal@9c000 {
+		compatible = "fsl,bman-portal";
+		reg = <0x9c000 0x4000>, <0x1027000 0x1000>;
+		interrupts = <391 2 0 0>;
+	};
+	bman-portal@a0000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xa0000 0x4000>, <0x1028000 0x1000>;
+		interrupts = <393 2 0 0>;
+	};
+	bman-portal@a4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xa4000 0x4000>, <0x1029000 0x1000>;
+		interrupts = <395 2 0 0>;
+	};
+	bman-portal@a8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xa8000 0x4000>, <0x102a000 0x1000>;
+		interrupts = <397 2 0 0>;
+	};
+	bman-portal@ac000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xac000 0x4000>, <0x102b000 0x1000>;
+		interrupts = <399 2 0 0>;
+	};
+	bman-portal@b0000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xb0000 0x4000>, <0x102c000 0x1000>;
+		interrupts = <401 2 0 0>;
+	};
+	bman-portal@b4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xb4000 0x4000>, <0x102d000 0x1000>;
+		interrupts = <403 2 0 0>;
+	};
+	bman-portal@b8000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xb8000 0x4000>, <0x102e000 0x1000>;
+		interrupts = <405 2 0 0>;
+	};
+	bman-portal@bc000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xbc000 0x4000>, <0x102f000 0x1000>;
+		interrupts = <407 2 0 0>;
+	};
+	bman-portal@c0000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc0000 0x4000>, <0x1030000 0x1000>;
+		interrupts = <409 2 0 0>;
+	};
+	bman-portal@c4000 {
+		compatible = "fsl,bman-portal";
+		reg = <0xc4000 0x4000>, <0x1031000 0x1000>;
+		interrupts = <411 2 0 0>;
+	};
+};
+
+&qportals {
+	#address-cells = <0x1>;
+	#size-cells = <0x1>;
+	compatible = "simple-bus";
+
+	qportal0: qman-portal@0 {
+		compatible = "fsl,qman-portal";
+		reg = <0x0 0x4000>, <0x1000000 0x1000>;
+		interrupts = <104 0x2 0 0>;
+		cell-index = <0x0>;
+	};
+	qportal1: qman-portal@4000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+		interrupts = <106 0x2 0 0>;
+		cell-index = <0x1>;
+	};
+	qportal2: qman-portal@8000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+		interrupts = <108 0x2 0 0>;
+		cell-index = <0x2>;
+	};
+	qportal3: qman-portal@c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+		interrupts = <110 0x2 0 0>;
+		cell-index = <0x3>;
+	};
+	qportal4: qman-portal@10000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+		interrupts = <112 0x2 0 0>;
+		cell-index = <0x4>;
+	};
+	qportal5: qman-portal@14000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+		interrupts = <114 0x2 0 0>;
+		cell-index = <0x5>;
+	};
+	qportal6: qman-portal@18000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+		interrupts = <116 0x2 0 0>;
+		cell-index = <0x6>;
+	};
+	qportal7: qman-portal@1c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+		interrupts = <118 0x2 0 0>;
+		cell-index = <0x7>;
+	};
+	qportal8: qman-portal@20000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+		interrupts = <120 0x2 0 0>;
+		cell-index = <0x8>;
+	};
+	qportal9: qman-portal@24000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+		interrupts = <122 0x2 0 0>;
+		cell-index = <0x9>;
+	};
+	qportal10: qman-portal@28000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+		interrupts = <124 0x2 0 0>;
+		cell-index = <0xa>;
+	};
+	qportal11: qman-portal@2c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+		interrupts = <126 0x2 0 0>;
+		cell-index = <0xb>;
+	};
+	qportal12: qman-portal@30000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+		interrupts = <128 0x2 0 0>;
+		cell-index = <0xc>;
+	};
+	qportal13: qman-portal@34000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+		interrupts = <130 0x2 0 0>;
+		cell-index = <0xd>;
+	};
+	qportal14: qman-portal@38000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+		interrupts = <132 0x2 0 0>;
+		cell-index = <0xe>;
+	};
+	qportal15: qman-portal@3c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+		interrupts = <134 0x2 0 0>;
+		cell-index = <0xf>;
+	};
+	qportal16: qman-portal@40000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+		interrupts = <136 0x2 0 0>;
+		cell-index = <0x10>;
+	};
+	qportal17: qman-portal@44000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+		interrupts = <138 0x2 0 0>;
+		cell-index = <0x11>;
+	};
+	qportal18: qman-portal@48000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x48000 0x4000>, <0x1012000 0x1000>;
+		interrupts = <140 0x2 0 0>;
+		cell-index = <0x12>;
+	};
+	qportal19: qman-portal@4c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x4c000 0x4000>, <0x1013000 0x1000>;
+		interrupts = <142 0x2 0 0>;
+		cell-index = <0x13>;
+	};
+	qportal20: qman-portal@50000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x50000 0x4000>, <0x1014000 0x1000>;
+		interrupts = <144 0x2 0 0>;
+		cell-index = <0x14>;
+	};
+	qportal21: qman-portal@54000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x54000 0x4000>, <0x1015000 0x1000>;
+		interrupts = <146 0x2 0 0>;
+		cell-index = <0x15>;
+	};
+	qportal22: qman-portal@58000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x58000 0x4000>, <0x1016000 0x1000>;
+		interrupts = <148 0x2 0 0>;
+		cell-index = <0x16>;
+	};
+	qportal23: qman-portal@5c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x5c000 0x4000>, <0x1017000 0x1000>;
+		interrupts = <150 0x2 0 0>;
+		cell-index = <0x17>;
+	};
+	qportal24: qman-portal@60000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x60000 0x4000>, <0x1018000 0x1000>;
+		interrupts = <152 0x2 0 0>;
+		cell-index = <0x18>;
+	};
+	qportal25: qman-portal@64000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x64000 0x4000>, <0x1019000 0x1000>;
+		interrupts = <154 0x2 0 0>;
+		cell-index = <0x19>;
+	};
+	qportal26: qman-portal@68000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x68000 0x4000>, <0x101a000 0x1000>;
+		interrupts = <156 0x2 0 0>;
+		cell-index = <0x1a>;
+	};
+	qportal27: qman-portal@6c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x6c000 0x4000>, <0x101b000 0x1000>;
+		interrupts = <158 0x2 0 0>;
+		cell-index = <0x1b>;
+	};
+	qportal28: qman-portal@70000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x70000 0x4000>, <0x101c000 0x1000>;
+		interrupts = <160 0x2 0 0>;
+		cell-index = <0x1c>;
+	};
+	qportal29: qman-portal@74000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x74000 0x4000>, <0x101d000 0x1000>;
+		interrupts = <162 0x2 0 0>;
+		cell-index = <0x1d>;
+	};
+	qportal30: qman-portal@78000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x78000 0x4000>, <0x101e000 0x1000>;
+		interrupts = <164 0x2 0 0>;
+		cell-index = <0x1e>;
+	};
+	qportal31: qman-portal@7c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x7c000 0x4000>, <0x101f000 0x1000>;
+		interrupts = <166 0x2 0 0>;
+		cell-index = <0x1f>;
+	};
+	qportal32: qman-portal@80000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x80000 0x4000>, <0x1020000 0x1000>;
+		interrupts = <168 0x2 0 0>;
+		cell-index = <0x20>;
+	};
+	qportal33: qman-portal@84000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x84000 0x4000>, <0x1021000 0x1000>;
+		interrupts = <170 0x2 0 0>;
+		cell-index = <0x21>;
+	};
+	qportal34: qman-portal@88000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x88000 0x4000>, <0x1022000 0x1000>;
+		interrupts = <172 0x2 0 0>;
+		cell-index = <0x22>;
+	};
+	qportal35: qman-portal@8c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x8c000 0x4000>, <0x1023000 0x1000>;
+		interrupts = <174 0x2 0 0>;
+		cell-index = <0x23>;
+	};
+	qportal36: qman-portal@90000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x90000 0x4000>, <0x1024000 0x1000>;
+		interrupts = <384 0x2 0 0>;
+		cell-index = <0x24>;
+	};
+	qportal37: qman-portal@94000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x94000 0x4000>, <0x1025000 0x1000>;
+		interrupts = <386 0x2 0 0>;
+		cell-index = <0x25>;
+	};
+	qportal38: qman-portal@98000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x98000 0x4000>, <0x1026000 0x1000>;
+		interrupts = <388 0x2 0 0>;
+		cell-index = <0x26>;
+	};
+	qportal39: qman-portal@9c000 {
+		compatible = "fsl,qman-portal";
+		reg = <0x9c000 0x4000>, <0x1027000 0x1000>;
+		interrupts = <390 0x2 0 0>;
+		cell-index = <0x27>;
+	};
+	qportal40: qman-portal@a0000 {
+		compatible = "fsl,qman-portal";
+		reg = <0xa0000 0x4000>, <0x1028000 0x1000>;
+		interrupts = <392 0x2 0 0>;
+		cell-index = <0x28>;
+	};
+	qportal41: qman-portal@a4000 {
+		compatible = "fsl,qman-portal";
+		reg = <0xa4000 0x4000>, <0x1029000 0x1000>;
+		interrupts = <394 0x2 0 0>;
+		cell-index = <0x29>;
+	};
+	qportal42: qman-portal@a8000 {
+		compatible = "fsl,qman-portal";
+		reg = <0xa8000 0x4000>, <0x102a000 0x1000>;
+		interrupts = <396 0x2 0 0>;
+		cell-index = <0x2a>;
+	};
+	qportal43: qman-portal@ac000 {
+		compatible = "fsl,qman-portal";
+		reg = <0xac000 0x4000>, <0x102b000 0x1000>;
+		interrupts = <398 0x2 0 0>;
+		cell-index = <0x2b>;
+	};
+	qportal44: qman-portal@b0000 {
+		compatible = "fsl,qman-portal";
+		reg = <0xb0000 0x4000>, <0x102c000 0x1000>;
+		interrupts = <400 0x2 0 0>;
+		cell-index = <0x2c>;
+	};
+	qportal45: qman-portal@b4000 {
+		compatible = "fsl,qman-portal";
+		reg = <0xb4000 0x4000>, <0x102d000 0x1000>;
+		interrupts = <402 0x2 0 0>;
+		cell-index = <0x2d>;
+	};
+	qportal46: qman-portal@b8000 {
+		compatible = "fsl,qman-portal";
+		reg = <0xb8000 0x4000>, <0x102e000 0x1000>;
+		interrupts = <404 0x2 0 0>;
+		cell-index = <0x2e>;
+	};
+	qportal47: qman-portal@bc000 {
+		compatible = "fsl,qman-portal";
+		reg = <0xbc000 0x4000>, <0x102f000 0x1000>;
+		interrupts = <406 0x2 0 0>;
+		cell-index = <0x2f>;
+	};
+	qportal48: qman-portal@c0000 {
+		compatible = "fsl,qman-portal";
+		reg = <0xc0000 0x4000>, <0x1030000 0x1000>;
+		interrupts = <408 0x2 0 0>;
+		cell-index = <0x30>;
+	};
+	qportal49: qman-portal@c4000 {
+		compatible = "fsl,qman-portal";
+		reg = <0xc4000 0x4000>, <0x1031000 0x1000>;
+		interrupts = <410 0x2 0 0>;
+		cell-index = <0x31>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "simple-bus";
+
+	soc-sram-error {
+		compatible = "fsl,soc-sram-error";
+		interrupts = <16 2 1 29>;
+	};
+
+	corenet-law@0 {
+		compatible = "fsl,corenet-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <32>;
+	};
+
+	ddr1: memory-controller@8000 {
+		compatible = "fsl,qoriq-memory-controller-v4.7",
+				"fsl,qoriq-memory-controller";
+		reg = <0x8000 0x1000>;
+		interrupts = <16 2 1 23>;
+	};
+
+	ddr2: memory-controller@9000 {
+		compatible = "fsl,qoriq-memory-controller-v4.7",
+				"fsl,qoriq-memory-controller";
+		reg = <0x9000 0x1000>;
+		interrupts = <16 2 1 22>;
+	};
+
+	ddr3: memory-controller@a000 {
+		compatible = "fsl,qoriq-memory-controller-v4.7",
+				"fsl,qoriq-memory-controller";
+		reg = <0xa000 0x1000>;
+		interrupts = <16 2 1 21>;
+	};
+
+	cpc: l3-cache-controller@10000 {
+		compatible = "fsl,t4240-l3-cache-controller", "cache";
+		reg = <0x10000 0x1000
+		       0x11000 0x1000
+		       0x12000 0x1000>;
+		interrupts = <16 2 1 27
+			      16 2 1 26
+			      16 2 1 25>;
+	};
+
+	corenet-cf@18000 {
+		compatible = "fsl,corenet2-cf", "fsl,corenet-cf";
+		reg = <0x18000 0x1000>;
+		interrupts = <16 2 1 31>;
+		fsl,ccf-num-csdids = <32>;
+		fsl,ccf-num-snoopids = <32>;
+	};
+
+	iommu@20000 {
+		compatible = "fsl,pamu-v1.0", "fsl,pamu";
+		reg = <0x20000 0x6000>;
+		fsl,portid-mapping = <0x8000>;
+		interrupts = <
+			24 2 0 0
+			16 2 1 30>;
+	};
+
+/include/ "qoriq-mpic4.3.dtsi"
+
+	guts: global-utilities@e0000 {
+		compatible = "fsl,t4240-device-config", "fsl,qoriq-device-config-2.0";
+		reg = <0xe0000 0xe00>;
+		fsl,has-rstcr;
+		fsl,liodn-bits = <12>;
+	};
+
+/include/ "qoriq-clockgen2.dtsi"
+	global-utilities@e1000 {
+		compatible = "fsl,t4240-clockgen", "fsl,qoriq-clockgen-2.0";
+	};
+
+	rcpm: global-utilities@e2000 {
+		compatible = "fsl,t4240-rcpm", "fsl,qoriq-rcpm-2.0";
+		reg = <0xe2000 0x1000>;
+	};
+
+	sfp: sfp@e8000 {
+		compatible = "fsl,t4240-sfp";
+		reg	   = <0xe8000 0x1000>;
+	};
+
+	serdes: serdes@ea000 {
+		compatible = "fsl,t4240-serdes";
+		reg	   = <0xea000 0x4000>;
+	};
+
+/include/ "elo3-dma-0.dtsi"
+/include/ "elo3-dma-1.dtsi"
+/include/ "elo3-dma-2.dtsi"
+
+/include/ "qoriq-espi-0.dtsi"
+	spi@110000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "qoriq-esdhc-0.dtsi"
+	sdhc@114000 {
+		compatible = "fsl,t4240-esdhc", "fsl,esdhc";
+		sdhci,auto-cmd12;
+	};
+/include/ "qoriq-i2c-0.dtsi"
+/include/ "qoriq-i2c-1.dtsi"
+/include/ "qoriq-duart-0.dtsi"
+/include/ "qoriq-duart-1.dtsi"
+/include/ "qoriq-gpio-0.dtsi"
+/include/ "qoriq-gpio-1.dtsi"
+/include/ "qoriq-gpio-2.dtsi"
+/include/ "qoriq-gpio-3.dtsi"
+/include/ "qoriq-usb2-mph-0.dtsi"
+		usb0: usb@210000 {
+			compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph";
+			phy_type = "utmi";
+			port0;
+		};
+/include/ "qoriq-usb2-dr-0.dtsi"
+		usb1: usb@211000 {
+			compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr";
+			dr_mode = "host";
+			phy_type = "utmi";
+		};
+/include/ "qoriq-sata2-0.dtsi"
+/include/ "qoriq-sata2-1.dtsi"
+/include/ "qoriq-sec5.0-0.dtsi"
+/include/ "qoriq-qman3.dtsi"
+/include/ "qoriq-bman1.dtsi"
+
+/include/ "qoriq-fman3-0.dtsi"
+/include/ "qoriq-fman3-0-1g-0.dtsi"
+/include/ "qoriq-fman3-0-1g-1.dtsi"
+/include/ "qoriq-fman3-0-1g-2.dtsi"
+/include/ "qoriq-fman3-0-1g-3.dtsi"
+/include/ "qoriq-fman3-0-1g-4.dtsi"
+/include/ "qoriq-fman3-0-1g-5.dtsi"
+/include/ "qoriq-fman3-0-10g-0.dtsi"
+/include/ "qoriq-fman3-0-10g-1.dtsi"
+	fman@400000 {
+		enet0: ethernet@e0000 {
+		};
+
+		enet1: ethernet@e2000 {
+		};
+
+		enet2: ethernet@e4000 {
+		};
+
+		enet3: ethernet@e6000 {
+		};
+
+		enet4: ethernet@e8000 {
+		};
+
+		enet5: ethernet@ea000 {
+		};
+
+		enet6: ethernet@f0000 {
+		};
+
+		enet7: ethernet@f2000 {
+		};
+
+		mdio@fc000 {
+			status = "disabled";
+		};
+
+		mdio@fd000 {
+			status = "disabled";
+		};
+	};
+
+/include/ "qoriq-fman3-1.dtsi"
+/include/ "qoriq-fman3-1-1g-0.dtsi"
+/include/ "qoriq-fman3-1-1g-1.dtsi"
+/include/ "qoriq-fman3-1-1g-2.dtsi"
+/include/ "qoriq-fman3-1-1g-3.dtsi"
+/include/ "qoriq-fman3-1-1g-4.dtsi"
+/include/ "qoriq-fman3-1-1g-5.dtsi"
+/include/ "qoriq-fman3-1-10g-0.dtsi"
+/include/ "qoriq-fman3-1-10g-1.dtsi"
+	fman@500000 {
+		enet8: ethernet@e0000 {
+		};
+
+		enet9: ethernet@e2000 {
+		};
+
+		enet10: ethernet@e4000 {
+		};
+
+		enet11: ethernet@e6000 {
+		};
+
+		enet12: ethernet@e8000 {
+		};
+
+		enet13: ethernet@ea000 {
+		};
+
+		enet14: ethernet@f0000 {
+		};
+
+		enet15: ethernet@f2000 {
+		};
+
+		mdio@fc000 {
+			interrupts = <100 1 0 0>;
+		};
+
+		mdio@fd000 {
+			interrupts = <101 1 0 0>;
+		};
+	};
+
+	L2_1: l2-cache-controller@c20000 {
+		compatible = "fsl,t4240-l2-cache-controller";
+		reg = <0xc20000 0x40000>;
+		next-level-cache = <&cpc>;
+	};
+	L2_2: l2-cache-controller@c60000 {
+		compatible = "fsl,t4240-l2-cache-controller";
+		reg = <0xc60000 0x40000>;
+		next-level-cache = <&cpc>;
+	};
+	L2_3: l2-cache-controller@ca0000 {
+		compatible = "fsl,t4240-l2-cache-controller";
+		reg = <0xca0000 0x40000>;
+		next-level-cache = <&cpc>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi
new file mode 100644
index 0000000000..632314c6fa
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi
@@ -0,0 +1,175 @@
+/*
+ * T4240 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e6500_power_isa.dtsi"
+
+/ {
+	compatible = "fsl,T4240";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		ccsr = &soc;
+		dcsr = &dcsr;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
+		crypto = &crypto;
+
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+		pci3 = &pci3;
+		usb0 = &usb0;
+		usb1 = &usb1;
+		dma0 = &dma0;
+		dma1 = &dma1;
+		dma2 = &dma2;
+		sdhc = &sdhc;
+
+		fman0 = &fman0;
+		fman1 = &fman1;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+		ethernet4 = &enet4;
+		ethernet5 = &enet5;
+		ethernet6 = &enet6;
+		ethernet7 = &enet7;
+		ethernet8 = &enet8;
+		ethernet9 = &enet9;
+		ethernet10 = &enet10;
+		ethernet11 = &enet11;
+		ethernet12 = &enet12;
+		ethernet13 = &enet13;
+		ethernet14 = &enet14;
+		ethernet15 = &enet15;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,e6500@0 {
+			device_type = "cpu";
+			reg = <0 1>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
+		};
+		cpu1: PowerPC,e6500@2 {
+			device_type = "cpu";
+			reg = <2 3>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
+		};
+		cpu2: PowerPC,e6500@4 {
+			device_type = "cpu";
+			reg = <4 5>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
+		};
+		cpu3: PowerPC,e6500@6 {
+			device_type = "cpu";
+			reg = <6 7>;
+			clocks = <&clockgen 1 0>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
+		};
+		cpu4: PowerPC,e6500@8 {
+			device_type = "cpu";
+			reg = <8 9>;
+			clocks = <&clockgen 1 1>;
+			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x40000000>;
+		};
+		cpu5: PowerPC,e6500@10 {
+			device_type = "cpu";
+			reg = <10 11>;
+			clocks = <&clockgen 1 1>;
+			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x40000000>;
+		};
+		cpu6: PowerPC,e6500@12 {
+			device_type = "cpu";
+			reg = <12 13>;
+			clocks = <&clockgen 1 1>;
+			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x40000000>;
+		};
+		cpu7: PowerPC,e6500@14 {
+			device_type = "cpu";
+			reg = <14 15>;
+			clocks = <&clockgen 1 1>;
+			next-level-cache = <&L2_2>;
+			fsl,portid-mapping = <0x40000000>;
+		};
+		cpu8: PowerPC,e6500@16 {
+			device_type = "cpu";
+			reg = <16 17>;
+			clocks = <&clockgen 1 2>;
+			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x20000000>;
+		};
+		cpu9: PowerPC,e6500@18 {
+			device_type = "cpu";
+			reg = <18 19>;
+			clocks = <&clockgen 1 2>;
+			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x20000000>;
+		};
+		cpu10: PowerPC,e6500@20 {
+			device_type = "cpu";
+			reg = <20 21>;
+			clocks = <&clockgen 1 2>;
+			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x20000000>;
+		};
+		cpu11: PowerPC,e6500@22 {
+			device_type = "cpu";
+			reg = <22 23>;
+			clocks = <&clockgen 1 2>;
+			next-level-cache = <&L2_3>;
+			fsl,portid-mapping = <0x20000000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsp2.dts b/arch/powerpc/boot/dts/fsp2.dts
new file mode 100644
index 0000000000..9311b86b1b
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsp2.dts
@@ -0,0 +1,613 @@
+/*
+ * Device Tree Source for FSP2
+ *
+ * Copyright 2010,2012 IBM Corp.
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "ibm,fsp2";
+	compatible = "ibm,fsp2";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		ethernet1 = &EMAC1;
+		serial0 = &UART0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC, 476FSP2";
+			reg = <0x0>;
+			clock-frequency = <0>;    /* Filled in by cuboot */
+			timebase-frequency = <0>; /* Filled in by cuboot */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			d-cache-size = <32768>;
+			i-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by
+							     cuboot */
+	};
+
+	clocks {
+		mmc_clk: mmc_clk {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <50000000>;
+			clock-output-names = "mmc_clk";
+		};
+	};
+
+	UIC0: uic0 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x2c0 0x8>;
+	};
+
+	/* "interrupts" field is <bit level bit level>
+	   first pair is non-critical, second is critical */
+	UIC1_0: uic1_0 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x2c8 0x8>;
+		interrupt-parent = <&UIC0>;
+		interrupts = <21 0x4 4 0x84>;
+	};
+
+	/* PSI and DMA */
+	UIC1_1: uic1_1 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <2>;
+		dcr-reg = <0x350 0x8>;
+		interrupt-parent = <&UIC0>;
+		interrupts = <22 0x4 5 0x84>;
+	};
+
+	/* Ethernet and USB */
+	UIC1_2: uic1_2 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <3>;
+		dcr-reg = <0x358 0x8>;
+		interrupt-parent = <&UIC0>;
+		interrupts = <23 0x4 6 0x84>;
+	};
+
+	/* PLB Errors */
+	UIC1_3: uic1_3 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <4>;
+		dcr-reg = <0x360 0x8>;
+		interrupt-parent = <&UIC0>;
+		interrupts = <24 0x4 7 0x84>;
+	};
+
+	UIC1_4: uic1_4 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <5>;
+		dcr-reg = <0x368 0x8>;
+		interrupt-parent = <&UIC0>;
+		interrupts = <25 0x4 8 0x84>;
+	};
+
+	UIC1_5: uic1_5 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <6>;
+		dcr-reg = <0x370 0x8>;
+		interrupt-parent = <&UIC0>;
+		interrupts = <26 0x4 9 0x84>;
+	};
+
+	/* 2nd level UICs for FSI */
+	UIC2_0: uic2_0 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <7>;
+		dcr-reg = <0x2d0 0x8>;
+		interrupt-parent = <&UIC1_0>;
+		interrupts = <16 0x4 0 0x84>;
+	};
+
+	UIC2_1: uic2_1 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <8>;
+		dcr-reg = <0x2d8 0x8>;
+		interrupt-parent = <&UIC1_0>;
+		interrupts = <17 0x4 1 0x84>;
+	};
+
+	UIC2_2: uic2_2 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <9>;
+		dcr-reg = <0x2e0 0x8>;
+		interrupt-parent = <&UIC1_0>;
+		interrupts = <18 0x4 2 0x84>;
+	};
+
+	UIC2_3: uic2_3 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <10>;
+		dcr-reg = <0x2e8 0x8>;
+		interrupt-parent = <&UIC1_0>;
+		interrupts = <19 0x4 3 0x84>;
+	};
+
+	UIC2_4: uic2_4 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <11>;
+		dcr-reg = <0x2f0 0x8>;
+		interrupt-parent = <&UIC1_0>;
+		interrupts = <20 0x4 4 0x84>;
+	};
+
+	UIC2_5: uic2_5 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <12>;
+		dcr-reg = <0x2f8 0x8>;
+		interrupt-parent = <&UIC1_0>;
+		interrupts = <21 0x4 5 0x84>;
+	};
+
+	UIC2_6: uic2_6 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <13>;
+		dcr-reg = <0x300 0x8>;
+		interrupt-parent = <&UIC1_0>;
+		interrupts = <22 0x4 6 0x84>;
+	};
+
+	UIC2_7: uic2_7 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <14>;
+		dcr-reg = <0x308 0x8>;
+		interrupt-parent = <&UIC1_0>;
+		interrupts = <23 0x4 7 0x84>;
+	};
+
+	UIC2_8: uic2_8 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <15>;
+		dcr-reg = <0x310 0x8>;
+		interrupt-parent = <&UIC1_0>;
+		interrupts = <24 0x4 8 0x84>;
+	};
+
+	UIC2_9: uic2_9 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <16>;
+		dcr-reg = <0x318 0x8>;
+		interrupt-parent = <&UIC1_0>;
+		interrupts = <25 0x4 9 0x84>;
+	};
+
+	UIC2_10: uic2_10 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <17>;
+		dcr-reg = <0x320 0x8>;
+		interrupt-parent = <&UIC1_0>;
+		interrupts = <26 0x4 10 0x84>;
+	};
+
+	UIC2_11: uic2_11 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <18>;
+		dcr-reg = <0x328 0x8>;
+		interrupt-parent = <&UIC1_0>;
+		interrupts = <27 0x4 11 0x84>;
+	};
+
+	UIC2_12: uic2_12 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <19>;
+		dcr-reg = <0x330 0x8>;
+		interrupt-parent = <&UIC1_0>;
+		interrupts = <28 0x4 12 0x84>;
+	};
+
+	UIC2_13: uic2_13 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <20>;
+		dcr-reg = <0x338 0x8>;
+		interrupt-parent = <&UIC1_0>;
+		interrupts = <29 0x4 13 0x84>;
+	};
+
+	UIC2_14: uic2_14 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <21>;
+		dcr-reg = <0x340 0x8>;
+		interrupt-parent = <&UIC1_0>;
+		interrupts = <30 0x4 14 0x84>;
+	};
+
+	UIC2_15: uic2_15 {
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <22>;
+		dcr-reg = <0x348 0x8>;
+		interrupt-parent = <&UIC1_0>;
+		interrupts = <31 0x4 15 0x84>;
+	};
+
+	plb6 {
+		compatible = "ibm,plb6";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+
+		MCW0: memory-controller-wrapper {
+			compatible = "ibm,cw-476fsp2";
+			dcr-reg = <0x11111800 0x40>;
+		};
+
+		MCIF0: memory-controller {
+			compatible = "ibm,sdram-476fsp2", "ibm,sdram-4xx-ddr3";
+			dcr-reg = <0x11120000 0x10000>;
+			mcer-device = <&MCW0>;
+			interrupt-parent = <&UIC0>;
+			interrupts = <10 0x84   /* ECC UE */
+				      11 0x84>; /* ECC CE */
+		};
+	};
+
+	plb4 {
+		compatible = "ibm,plb4";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0x00000000 0x00000010 0x00000000 0x80000000
+			  0x80000000 0x00000010 0x80000000 0x80000000>;
+		clock-frequency = <333333334>;
+
+		plb6-system-hung-irq {
+			compatible = "ibm,bus-error-irq";
+			#interrupt-cells = <2>;
+			interrupt-parent = <&UIC0>;
+			interrupts = <0 0x84>;
+		};
+
+		l2-error-irq {
+			compatible = "ibm,bus-error-irq";
+			#interrupt-cells = <2>;
+			interrupt-parent = <&UIC0>;
+			interrupts = <20 0x84>;
+		};
+
+		plb6-plb4-irq {
+			compatible = "ibm,bus-error-irq";
+			#interrupt-cells = <2>;
+			interrupt-parent = <&UIC0>;
+			interrupts = <1 0x84>;
+		};
+
+		plb4-ahb-irq {
+			compatible = "ibm,bus-error-irq";
+			#interrupt-cells = <2>;
+			interrupt-parent = <&UIC1_3>;
+			interrupts = <20 0x84>;
+		};
+
+		opbd-error-irq {
+			compatible = "ibm,opbd-error-irq";
+			#interrupt-cells = <2>;
+			interrupt-parent = <&UIC1_4>;
+			interrupts = <5 0x84>;
+		};
+
+		cmu-error-irq {
+			compatible = "ibm,cmu-error-irq";
+			#interrupt-cells = <2>;
+			interrupt-parent = <&UIC0>;
+			interrupts = <28 0x84>;
+		};
+
+		conf-error-irq {
+			compatible = "ibm,conf-error-irq";
+			#interrupt-cells = <2>;
+			interrupt-parent = <&UIC1_4>;
+			interrupts = <11 0x84>;
+		};
+
+		mc-ue-irq {
+			compatible = "ibm,mc-ue-irq";
+			#interrupt-cells = <2>;
+			interrupt-parent = <&UIC0>;
+			interrupts = <10 0x84>;
+		};
+
+		reset-warning-irq {
+			compatible = "ibm,reset-warning-irq";
+			#interrupt-cells = <2>;
+			interrupt-parent = <&UIC0>;
+			interrupts = <17 0x84>;
+		};
+
+		MAL0: mcmal0 {
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			compatible = "ibm,mcmal";
+			dcr-reg = <0x80 0x80>;
+			num-tx-chans = <1>;
+			num-rx-chans = <1>;
+			interrupt-parent = <&MAL0>;
+			interrupts = <0 1 2 3 4>;
+			/* index interrupt-parent interrupt# type */
+			interrupt-map = </*TXEOB*/ 0 &UIC1_2 4 0x4
+					 /*RXEOB*/ 1 &UIC1_2 3 0x4
+					 /*SERR*/  2 &UIC1_2 7 0x4
+					 /*TXDE*/  3 &UIC1_2 6 0x4
+					 /*RXDE*/  4 &UIC1_2 5 0x4>;
+		};
+
+		MAL1: mcmal1 {
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			compatible = "ibm,mcmal";
+			dcr-reg = <0x100 0x80>;
+			num-tx-chans = <1>;
+			num-rx-chans = <1>;
+			interrupt-parent = <&MAL1>;
+			interrupts = <0 1 2 3 4>;
+			/* index interrupt-parent interrupt# type */
+			interrupt-map = </*TXEOB*/ 0 &UIC1_2 12 0x4
+					 /*RXEOB*/ 1 &UIC1_2 11 0x4
+					 /*SERR*/  2 &UIC1_2 15 0x4
+					 /*TXDE*/  3 &UIC1_2 14 0x4
+					 /*RXDE*/  4 &UIC1_2 13 0x4>;
+		};
+
+		mmc0: mmc@20c0000 {
+			compatible	= "st,sdhci-stih407", "st,sdhci";
+			reg		= <0x020c0000 0x20000>;
+			reg-names	= "mmc";
+			interrupts	= <21 0x4>;
+			interrupt-parent = <&UIC1_3>;
+			interrupt-names	= "mmcirq";
+			pinctrl-names	= "default";
+			pinctrl-0	= <>;
+			clock-names	= "mmc";
+			clocks		= <&mmc_clk>;
+			bus-width	= <4>;
+			non-removable;
+			sd-uhs-sdr50;
+			sd-uhs-sdr104;
+			sd-uhs-ddr50;
+		};
+
+		opb {
+			compatible = "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges; // pass-thru to parent bus
+			clock-frequency = <83333334>;
+
+			EMAC0: ethernet@b0000000 {
+				linux,network-index = <0>;
+				device_type = "network";
+				compatible = "ibm,emac4sync";
+				has-inverted-stacr-oc;
+				interrupt-parent = <&UIC1_2>;
+				interrupts = <1 0x4 0 0x4>;
+				reg = <0xb0000000 0x100>;
+				local-mac-address = [000000000000]; /* Filled in by
+							       cuboot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <1500>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <4096>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <8192>;
+				phy-address = <1>;
+				phy-mode = "rgmii";
+				phy-map = <00000003>;
+				rgmii-device = <&RGMII>;
+				rgmii-channel = <0>;
+			};
+
+			EMAC1: ethernet@b0000100 {
+				linux,network-index = <1>;
+				device_type = "network";
+				compatible = "ibm,emac4sync";
+				has-inverted-stacr-oc;
+				interrupt-parent = <&UIC1_2>;
+				interrupts = <9 0x4 8 0x4>;
+				reg = <0xb0000100 0x100>;
+				local-mac-address = [000000000000]; /* Filled in by
+							       cuboot */
+				mal-device = <&MAL1>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <1>;
+				max-frame-size = <1500>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <4096>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <8192>;
+				phy-address = <2>;
+				phy-mode = "rgmii";
+				phy-map = <00000003>;
+				rgmii-device = <&RGMII>;
+				rgmii-channel = <1>;
+			};
+
+			RGMII: rgmii@b0000600 {
+				compatible = "ibm,rgmii";
+				has-mdio;
+				reg = <0xb0000600 0x8>;
+			};
+
+			UART0: serial@b0020000 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xb0020000 0x8>;
+				virtual-reg = <0xb0020000>;
+				clock-frequency = <20833333>;
+				current-speed = <115200>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <31 0x4>;
+			};
+		};
+
+		OHCI1: ohci@2040000 {
+			compatible = "ohci-le";
+			reg = <0x02040000 0xa0>;
+			interrupt-parent = <&UIC1_3>;
+			interrupts = <28 0x8 29 0x8>;
+		};
+
+		OHCI2: ohci@2080000 {
+			compatible = "ohci-le";
+			reg = <0x02080000 0xa0>;
+			interrupt-parent = <&UIC1_3>;
+			interrupts = <30 0x8 31 0x8>;
+		};
+
+		EHCI: ehci@2000000 {
+			compatible = "usb-ehci";
+			reg = <0x02000000 0xa4>;
+			interrupt-parent = <&UIC1_3>;
+			interrupts = <23 0x4>;
+		};
+
+	};
+
+	chosen {
+		stdout-path = "/plb/opb/serial@b0020000";
+		bootargs = "console=ttyS0,115200 rw log_buf_len=32768 debug";
+	};
+};
diff --git a/arch/powerpc/boot/dts/gamecube.dts b/arch/powerpc/boot/dts/gamecube.dts
new file mode 100644
index 0000000000..a564cb7cb1
--- /dev/null
+++ b/arch/powerpc/boot/dts/gamecube.dts
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/boot/dts/gamecube.dts
+ *
+ * Nintendo GameCube platform device tree source
+ * Copyright (C) 2007-2009 The GameCube Linux Team
+ * Copyright (C) 2007,2008,2009 Albert Herranz
+ */
+
+/dts-v1/;
+
+/ {
+	model = "nintendo,gamecube";
+	compatible = "nintendo,gamecube";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	chosen {
+		bootargs = "root=/dev/gcnsda2 rootwait udbg-immortal";
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x01800000>;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,gekko@0 {
+			device_type = "cpu";
+			reg = <0>;
+			clock-frequency = <486000000>; /* 486MHz */
+			bus-frequency = <162000000>; /* 162MHz core-to-bus 3x */
+			timebase-frequency = <40500000>; /* 162MHz / 4 */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+		};
+	};
+
+	/* devices contained int the flipper chipset */
+	flipper {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "nintendo,flipper";
+		ranges = <0x0c000000 0x0c000000 0x00010000>;
+		interrupt-parent = <&PIC>;
+
+		video@c002000 {
+			compatible = "nintendo,flipper-vi";
+			reg = <0x0c002000 0x100>;
+			interrupts = <8>;
+		};
+
+		processor-interface@c003000 {
+			compatible = "nintendo,flipper-pi";
+			reg = <0x0c003000 0x100>;
+
+			PIC: pic {
+				#interrupt-cells = <1>;
+				compatible = "nintendo,flipper-pic";
+				interrupt-controller;
+			};
+		};
+
+		dsp@c005000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "nintendo,flipper-dsp";
+			reg = <0x0c005000 0x200>;
+			interrupts = <6>;
+
+			memory@0 {
+				compatible = "nintendo,flipper-aram";
+				reg = <0 0x1000000>;	/* 16MB */
+			};
+		};
+
+		disk@c006000 {
+			compatible = "nintendo,flipper-di";
+			reg = <0x0c006000 0x40>;
+			interrupts = <2>;
+		};
+
+		audio@c006c00 {
+			compatible = "nintendo,flipper-ai";
+			reg = <0x0c006c00 0x20>;
+			interrupts = <6>;
+		};
+
+		gamepad-controller@c006400 {
+			compatible = "nintendo,flipper-si";
+			reg = <0x0c006400 0x100>;
+			interrupts = <3>;
+		};
+
+		/* External Interface bus */
+		exi@c006800 {
+			compatible = "nintendo,flipper-exi";
+			reg = <0x0c006800 0x40>;
+			virtual-reg = <0x0c006800>;
+			interrupts = <4>;
+		};
+        };
+};
+
diff --git a/arch/powerpc/boot/dts/glacier.dts b/arch/powerpc/boot/dts/glacier.dts
new file mode 100644
index 0000000000..e84ff1afb5
--- /dev/null
+++ b/arch/powerpc/boot/dts/glacier.dts
@@ -0,0 +1,576 @@
+/*
+ * Device Tree Source for AMCC Glacier (460GT)
+ *
+ * Copyright 2008-2010 DENX Software Engineering, Stefan Roese <sr@denx.de>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "amcc,glacier";
+	compatible = "amcc,glacier";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		ethernet1 = &EMAC1;
+		ethernet2 = &EMAC2;
+		ethernet3 = &EMAC3;
+		serial0 = &UART0;
+		serial1 = &UART1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,460GT";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+			timebase-frequency = <0>; /* Filled in by U-Boot */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+			next-level-cache = <&L2C0>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by U-Boot */
+	};
+
+	UIC0: interrupt-controller0 {
+		compatible = "ibm,uic-460gt","ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-460gt","ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC2: interrupt-controller2 {
+		compatible = "ibm,uic-460gt","ibm,uic";
+		interrupt-controller;
+		cell-index = <2>;
+		dcr-reg = <0x0e0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0xa 0x4 0xb 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC3: interrupt-controller3 {
+		compatible = "ibm,uic-460gt","ibm,uic";
+		interrupt-controller;
+		cell-index = <3>;
+		dcr-reg = <0x0f0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x10 0x4 0x11 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	SDR0: sdr {
+		compatible = "ibm,sdr-460gt";
+		dcr-reg = <0x00e 0x002>;
+	};
+
+	CPR0: cpr {
+		compatible = "ibm,cpr-460gt";
+		dcr-reg = <0x00c 0x002>;
+	};
+
+	L2C0: l2c {
+		compatible = "ibm,l2-cache-460gt", "ibm,l2-cache";
+		dcr-reg = <0x020 0x008		/* Internal SRAM DCR's */
+			   0x030 0x008>;	/* L2 cache DCR's */
+		cache-line-size = <32>;		/* 32 bytes */
+		cache-size = <262144>;		/* L2, 256K */
+		interrupt-parent = <&UIC1>;
+		interrupts = <11 1>;
+	};
+
+	plb {
+		compatible = "ibm,plb-460gt", "ibm,plb4";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by U-Boot */
+
+		SDRAM0: sdram {
+			compatible = "ibm,sdram-460gt", "ibm,sdram-405gp";
+			dcr-reg = <0x010 0x002>;
+		};
+
+		CRYPTO: crypto@180000 {
+			compatible = "amcc,ppc460gt-crypto", "amcc,ppc460ex-crypto",
+				"amcc,ppc4xx-crypto";
+			reg = <4 0x00180000 0x80400>;
+			interrupt-parent = <&UIC0>;
+			interrupts = <0x1d 0x4>;
+		};
+
+		HWRNG: hwrng@110000 {
+			compatible = "amcc,ppc460ex-rng", "ppc4xx-rng";
+			reg = <4 0x00110000 0x50>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-460gt", "ibm,mcmal2";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <4>;
+			num-rx-chans = <32>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-parent = <&UIC2>;
+			interrupts = <	/*TXEOB*/ 0x6 0x4
+					/*RXEOB*/ 0x7 0x4
+					/*SERR*/  0x3 0x4
+					/*TXDE*/  0x4 0x4
+					/*RXDE*/  0x5 0x4>;
+			desc-base-addr-high = <0x8>;
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb-460gt", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0xb0000000 0x00000004 0xb0000000 0x50000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-460gt", "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				/* ranges property is supplied by U-Boot */
+				interrupts = <0x6 0x4>;
+				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "amd,s29gl512n", "cfi-flash";
+					bank-width = <2>;
+					reg = <0x00000000 0x00000000 0x04000000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "kernel";
+						reg = <0x00000000 0x001e0000>;
+					};
+					partition@1e0000 {
+						label = "dtb";
+						reg = <0x001e0000 0x00020000>;
+					};
+					partition@200000 {
+						label = "ramdisk";
+						reg = <0x00200000 0x01400000>;
+					};
+					partition@1600000 {
+						label = "jffs2";
+						reg = <0x01600000 0x00400000>;
+					};
+					partition@1a00000 {
+						label = "user";
+						reg = <0x01a00000 0x02560000>;
+					};
+					partition@3f60000 {
+						label = "env";
+						reg = <0x03f60000 0x00040000>;
+					};
+					partition@3fa0000 {
+						label = "u-boot";
+						reg = <0x03fa0000 0x00060000>;
+					};
+				};
+
+				ndfc@3,0 {
+					compatible = "ibm,ndfc";
+					reg = <0x00000003 0x00000000 0x00002000>;
+					ccr = <0x00001000>;
+					bank-settings = <0x80002222>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+
+					nand {
+						#address-cells = <1>;
+						#size-cells = <1>;
+
+						partition@0 {
+							label = "u-boot";
+							reg = <0x00000000 0x00100000>;
+						};
+						partition@100000 {
+							label = "user";
+							reg = <0x00000000 0x03f00000>;
+						};
+					};
+				};
+			};
+
+			UART0: serial@ef600300 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600300 0x00000008>;
+				virtual-reg = <0xef600300>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>; /* Filled in by U-Boot */
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x1 0x4>;
+			};
+
+			UART1: serial@ef600400 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600400 0x00000008>;
+				virtual-reg = <0xef600400>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>; /* Filled in by U-Boot */
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1 0x4>;
+			};
+
+			UART2: serial@ef600500 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600500 0x00000008>;
+				virtual-reg = <0xef600500>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>; /* Filled in by U-Boot */
+				interrupt-parent = <&UIC1>;
+				interrupts = <28 0x4>;
+			};
+
+			UART3: serial@ef600600 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600600 0x00000008>;
+				virtual-reg = <0xef600600>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>; /* Filled in by U-Boot */
+				interrupt-parent = <&UIC1>;
+				interrupts = <29 0x4>;
+			};
+
+			IIC0: i2c@ef600700 {
+				compatible = "ibm,iic-460gt", "ibm,iic";
+				reg = <0xef600700 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				rtc@68 {
+					compatible = "st,m41t80";
+					reg = <0x68>;
+					interrupt-parent = <&UIC2>;
+					interrupts = <0x19 0x8>;
+				};
+				sttm@48 {
+					compatible = "ad,ad7414";
+					reg = <0x48>;
+					interrupt-parent = <&UIC1>;
+					interrupts = <0x14 0x8>;
+				};
+			};
+
+			IIC1: i2c@ef600800 {
+				compatible = "ibm,iic-460gt", "ibm,iic";
+				reg = <0xef600800 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x3 0x4>;
+			};
+
+			ZMII0: emac-zmii@ef600d00 {
+				compatible = "ibm,zmii-460gt", "ibm,zmii";
+				reg = <0xef600d00 0x0000000c>;
+			};
+
+			RGMII0: emac-rgmii@ef601500 {
+				compatible = "ibm,rgmii-460gt", "ibm,rgmii";
+				reg = <0xef601500 0x00000008>;
+				has-mdio;
+			};
+
+			RGMII1: emac-rgmii@ef601600 {
+				compatible = "ibm,rgmii-460gt", "ibm,rgmii";
+				reg = <0xef601600 0x00000008>;
+				has-mdio;
+			};
+
+			TAH0: emac-tah@ef601350 {
+				compatible = "ibm,tah-460gt", "ibm,tah";
+				reg = <0xef601350 0x00000030>;
+			};
+
+			TAH1: emac-tah@ef601450 {
+				compatible = "ibm,tah-460gt", "ibm,tah";
+				reg = <0xef601450 0x00000030>;
+			};
+
+			EMAC0: ethernet@ef600e00 {
+				device_type = "network";
+				compatible = "ibm,emac-460gt", "ibm,emac4sync";
+				interrupt-parent = <&EMAC0>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC2 0x10 0x4
+						 /*Wake*/   0x1 &UIC2 0x14 0x4>;
+				reg = <0xef600e00 0x000000c4>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <0>;
+				tah-device = <&TAH0>;
+				tah-channel = <0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+
+			EMAC1: ethernet@ef600f00 {
+				device_type = "network";
+				compatible = "ibm,emac-460gt", "ibm,emac4sync";
+				interrupt-parent = <&EMAC1>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC2 0x11 0x4
+						 /*Wake*/   0x1 &UIC2 0x15 0x4>;
+				reg = <0xef600f00 0x000000c4>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <1>;
+				mal-rx-channel = <8>;
+				cell-index = <1>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <1>;
+				tah-device = <&TAH1>;
+				tah-channel = <1>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+				mdio-device = <&EMAC0>;
+			};
+
+			EMAC2: ethernet@ef601100 {
+				device_type = "network";
+				compatible = "ibm,emac-460gt", "ibm,emac4sync";
+				interrupt-parent = <&EMAC2>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC2 0x12 0x4
+						 /*Wake*/   0x1 &UIC2 0x16 0x4>;
+				reg = <0xef601100 0x000000c4>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <2>;
+				mal-rx-channel = <16>;
+				cell-index = <2>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>; /* emac2&3 only */
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII1>;
+				rgmii-channel = <0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+				mdio-device = <&EMAC0>;
+			};
+
+			EMAC3: ethernet@ef601200 {
+				device_type = "network";
+				compatible = "ibm,emac-460gt", "ibm,emac4sync";
+				interrupt-parent = <&EMAC3>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC2 0x13 0x4
+						 /*Wake*/   0x1 &UIC2 0x17 0x4>;
+				reg = <0xef601200 0x000000c4>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <3>;
+				mal-rx-channel = <24>;
+				cell-index = <3>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>; /* emac2&3 only */
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII1>;
+				rgmii-channel = <1>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+				mdio-device = <&EMAC0>;
+			};
+		};
+
+		PCIX0: pci@c0ec00000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pcix-460gt", "ibm,plb-pcix";
+			primary;
+			large-inbound-windows;
+			enable-msi-hole;
+			reg = <0x0000000c 0x0ec00000   0x00000008	/* Config space access */
+			       0x00000000 0x00000000 0x00000000		/* no IACK cycles */
+			       0x0000000c 0x0ed00000   0x00000004   /* Special cycles */
+			       0x0000000c 0x0ec80000 0x00000100	/* Internal registers */
+			       0x0000000c 0x0ec80100  0x000000fc>;	/* Internal messaging registers */
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x0000000d 0x80000000 0x00000000 0x80000000
+				  0x02000000 0x00000000 0x00000000 0x0000000c 0x0ee00000 0x00000000 0x00100000
+				  0x01000000 0x00000000 0x00000000 0x0000000c 0x08000000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* This drives busses 0 to 0x3f */
+			bus-range = <0x0 0x3f>;
+
+			/* All PCI interrupts are routed to ext IRQ 2 -> UIC1-0 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+			interrupt-map = < 0x0 0x0 0x0 0x0 &UIC1 0x0 0x8 >;
+		};
+
+		PCIE0: pcie@d00000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-460ex", "ibm,plb-pciex";
+			primary;
+			port = <0x0>; /* port number */
+			reg = <0x0000000d 0x00000000 0x20000000	/* Config space access */
+			       0x0000000c 0x08010000 0x00001000>;	/* Registers */
+			dcr-reg = <0x100 0x020>;
+			sdr-base = <0x300>;
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x0000000e 0x00000000 0x00000000 0x80000000
+				  0x02000000 0x00000000 0x00000000 0x0000000f 0x00000000 0x00000000 0x00100000
+				  0x01000000 0x00000000 0x00000000 0x0000000f 0x80000000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* This drives busses 40 to 0x7f */
+			bus-range = <0x40 0x7f>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &UIC3 0xc 0x4 /* swizzled int A */
+				0x0 0x0 0x0 0x2 &UIC3 0xd 0x4 /* swizzled int B */
+				0x0 0x0 0x0 0x3 &UIC3 0xe 0x4 /* swizzled int C */
+				0x0 0x0 0x0 0x4 &UIC3 0xf 0x4 /* swizzled int D */>;
+		};
+
+		PCIE1: pcie@d20000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-460ex", "ibm,plb-pciex";
+			primary;
+			port = <0x1>; /* port number */
+			reg = <0x0000000d 0x20000000 0x20000000	/* Config space access */
+			       0x0000000c 0x08011000 0x00001000>;	/* Registers */
+			dcr-reg = <0x120 0x020>;
+			sdr-base = <0x340>;
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x0000000e 0x80000000 0x00000000 0x80000000
+				  0x02000000 0x00000000 0x00000000 0x0000000f 0x00100000 0x00000000 0x00100000
+				  0x01000000 0x00000000 0x00000000 0x0000000f 0x80010000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* This drives busses 80 to 0xbf */
+			bus-range = <0x80 0xbf>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &UIC3 0x10 0x4 /* swizzled int A */
+				0x0 0x0 0x0 0x2 &UIC3 0x11 0x4 /* swizzled int B */
+				0x0 0x0 0x0 0x3 &UIC3 0x12 0x4 /* swizzled int C */
+				0x0 0x0 0x0 0x4 &UIC3 0x13 0x4 /* swizzled int D */>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/haleakala.dts b/arch/powerpc/boot/dts/haleakala.dts
new file mode 100644
index 0000000000..f81ce8786d
--- /dev/null
+++ b/arch/powerpc/boot/dts/haleakala.dts
@@ -0,0 +1,281 @@
+/*
+ * Device Tree Source for AMCC Haleakala (405EXr)
+ *
+ * Copyright 2008 DENX Software Engineering, Stefan Roese <sr@denx.de>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	model = "amcc,haleakala";
+	compatible = "amcc,haleakala", "amcc,kilauea";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		serial0 = &UART0;
+		serial1 = &UART1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,405EXr";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+			timebase-frequency = <0>; /* Filled in by U-Boot */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <16384>; /* 16 kB */
+			d-cache-size = <16384>; /* 16 kB */
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000>; /* Filled in by U-Boot */
+	};
+
+	UIC0: interrupt-controller {
+		compatible = "ibm,uic-405exr", "ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-405exr","ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC2: interrupt-controller2 {
+		compatible = "ibm,uic-405exr","ibm,uic";
+		interrupt-controller;
+		cell-index = <2>;
+		dcr-reg = <0x0e0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1c 0x4 0x1d 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	plb {
+		compatible = "ibm,plb-405exr", "ibm,plb4";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by U-Boot */
+
+		SDRAM0: memory-controller {
+			compatible = "ibm,sdram-405exr", "ibm,sdram-4xx-ddr2";
+			dcr-reg = <0x010 0x002>;
+			interrupt-parent = <&UIC2>;
+			interrupts = <0x5 0x4	/* ECC DED Error */ 
+				      0x6 0x4>;	/* ECC SEC Error */ 
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-405exr", "ibm,mcmal2";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <2>;
+			num-rx-chans = <2>;
+			interrupt-parent = <&MAL0>;
+			interrupts = <0x0 0x1 0x2 0x3 0x4>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = </*TXEOB*/ 0x0 &UIC0 0xa 0x4
+					/*RXEOB*/ 0x1 &UIC0 0xb 0x4
+					/*SERR*/  0x2 &UIC1 0x0 0x4
+					/*TXDE*/  0x3 &UIC1 0x1 0x4
+					/*RXDE*/  0x4 &UIC1 0x2 0x4>;
+			interrupt-map-mask = <0xffffffff>;
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb-405exr", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0x80000000 0x80000000 0x10000000
+				  0xef600000 0xef600000 0x00a00000
+				  0xf0000000 0xf0000000 0x10000000>;
+			dcr-reg = <0x0a0 0x005>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-405exr", "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				/* ranges property is supplied by U-Boot */
+				interrupts = <0x5 0x1>;
+				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "amd,s29gl512n", "cfi-flash";
+					bank-width = <2>;
+					reg = <0x00000000 0x00000000 0x04000000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "kernel";
+						reg = <0x00000000 0x00200000>;
+					};
+					partition@200000 {
+						label = "root";
+						reg = <0x00200000 0x00200000>;
+					};
+					partition@400000 {
+						label = "user";
+						reg = <0x00400000 0x03b60000>;
+					};
+					partition@3f60000 {
+						label = "env";
+						reg = <0x03f60000 0x00040000>;
+					};
+					partition@3fa0000 {
+						label = "u-boot";
+						reg = <0x03fa0000 0x00060000>;
+					};
+				};
+			};
+
+			UART0: serial@ef600200 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600200 0x00000008>;
+				virtual-reg = <0xef600200>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1a 0x4>;
+			};
+
+			UART1: serial@ef600300 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600300 0x00000008>;
+				virtual-reg = <0xef600300>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1 0x4>;
+			};
+
+			IIC0: i2c@ef600400 {
+				compatible = "ibm,iic-405exr", "ibm,iic";
+				reg = <0xef600400 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+			};
+
+			IIC1: i2c@ef600500 {
+				compatible = "ibm,iic-405exr", "ibm,iic";
+				reg = <0xef600500 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x7 0x4>;
+			};
+
+
+			RGMII0: emac-rgmii@ef600b00 {
+				compatible = "ibm,rgmii-405exr", "ibm,rgmii";
+				reg = <0xef600b00 0x00000104>;
+				has-mdio;
+			};
+
+			EMAC0: ethernet@ef600900 {
+				linux,network-index = <0x0>;
+				device_type = "network";
+				compatible = "ibm,emac-405exr", "ibm,emac4sync";
+				interrupt-parent = <&EMAC0>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC0 0x18 0x4
+						/*Wake*/  0x1 &UIC1 0x1d 0x4>;
+				reg = <0xef600900 0x000000c4>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+		};
+
+		PCIE0: pcie@a0000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-405ex", "ibm,plb-pciex";
+			primary;
+			port = <0x0>; /* port number */
+			reg = <0xa0000000 0x20000000	/* Config space access */
+			       0xef000000 0x00001000>;	/* Registers */
+			dcr-reg = <0x040 0x020>;
+			sdr-base = <0x400>;
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x90000000 0x00000000 0x08000000
+				  0x01000000 0x00000000 0x00000000 0xe0000000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* This drives busses 0x00 to 0x3f */
+			bus-range = <0x0 0x3f>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &UIC2 0x0 0x4 /* swizzled int A */
+				0x0 0x0 0x0 0x2 &UIC2 0x1 0x4 /* swizzled int B */
+				0x0 0x0 0x0 0x3 &UIC2 0x2 0x4 /* swizzled int C */
+				0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/holly.dts b/arch/powerpc/boot/dts/holly.dts
new file mode 100644
index 0000000000..02bd304c7d
--- /dev/null
+++ b/arch/powerpc/boot/dts/holly.dts
@@ -0,0 +1,196 @@
+/*
+ * Device Tree Source for IBM Holly (PPC 750CL with TSI controller)
+ * Copyright 2007, IBM Corporation
+ *
+ * Stephen Winiecki <stevewin@us.ibm.com>
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	model = "41K7339";
+	compatible = "ibm,holly";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells =<0>;
+		PowerPC,750CL@0 {
+			device_type = "cpu";
+			reg = <0x00000000>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <32768>;
+			i-cache-size = <32768>;
+			d-cache-sets = <128>;
+			i-cache-sets = <128>;
+			timebase-frequency = <50000000>;
+			clock-frequency = <600000000>;
+			bus-frequency = <200000000>;
+		};
+	};
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x20000000>;
+	};
+
+  	tsi109@c0000000 {
+		device_type = "tsi-bridge";
+		compatible = "tsi109-bridge", "tsi108-bridge";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0x00000000 0xc0000000 0x00010000>;
+		reg = <0xc0000000 0x00010000>;
+
+		i2c@7000 {
+			device_type = "i2c";
+			compatible  = "tsi109-i2c", "tsi108-i2c";
+			interrupt-parent = <&MPIC>;
+			interrupts = <0xe 0x2>;
+			reg = <0x00007000 0x00000400>;
+		};
+
+		MDIO: mdio@6000 {
+			compatible = "tsi109-mdio", "tsi108-mdio";
+			reg = <0x00006000 0x00000050>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			PHY1: ethernet-phy@1 {
+				compatible = "bcm5461a";
+				reg = <0x00000001>;
+				txc-rxc-delay-disable;
+			};
+
+			PHY2: ethernet-phy@2 {
+				compatible = "bcm5461a";
+				reg = <0x00000002>;
+				txc-rxc-delay-disable;
+			};
+		};
+
+		ethernet@6200 {
+			device_type = "network";
+			compatible = "tsi109-ethernet", "tsi108-ethernet";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x00006000 0x00000200>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupt-parent = <&MPIC>;
+			interrupts = <0x10 0x2>;
+			mdio-handle = <&MDIO>;
+			phy-handle = <&PHY1>;
+		};
+
+		ethernet@6600 {
+			device_type = "network";
+			compatible = "tsi109-ethernet", "tsi108-ethernet";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x00006400 0x00000200>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupt-parent = <&MPIC>;
+			interrupts = <0x11 0x2>;
+			mdio-handle = <&MDIO>;
+			phy-handle = <&PHY2>;
+		};
+
+		serial@7808 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x00007808 0x00000200>;
+			virtual-reg = <0xc0007808>;
+			clock-frequency = <1067212800>;
+			current-speed = <115200>;
+			interrupt-parent = <&MPIC>;
+			interrupts = <0xc 0x2>;
+		};
+
+		serial@7c08 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x00007c08 0x00000200>;
+			virtual-reg = <0xc0007c08>;
+			clock-frequency = <1067212800>;
+			current-speed = <115200>;
+			interrupt-parent = <&MPIC>;
+			interrupts = <0xd 0x2>;
+		};
+
+	  	MPIC: pic@7400 {
+			device_type = "open-pic";
+			compatible = "chrp,open-pic";
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			reg = <0x00007400 0x00000400>;
+			big-endian;
+		};
+	};
+
+	pci@c0001000 {
+		device_type = "pci";
+		compatible = "tsi109-pci", "tsi108-pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xc0001000 0x00001000>;
+		bus-range = <0x0 0x0>;
+		/*----------------------------------------------------+
+		| PCI memory range.
+		| 01 denotes I/O space
+		| 02 denotes 32-bit memory space
+		+----------------------------------------------------*/
+		ranges = <0x02000000 0x00000000 0x40000000 0x40000000 0x00000000 0x10000000
+			  0x01000000 0x00000000 0x00000000 0x7e000000 0x00000000 0x00010000>;
+		clock-frequency = <133333332>;
+		interrupt-parent = <&MPIC>;
+		interrupts = <0x17 0x2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		/*----------------------------------------------------+
+		| The INTA, INTB, INTC, INTD are shared.
+		+----------------------------------------------------*/
+		interrupt-map = <
+			0x800 0x0 0x0 0x1 &RT0 0x24 0x0
+			0x800 0x0 0x0 0x2 &RT0 0x25 0x0
+			0x800 0x0 0x0 0x3 &RT0 0x26 0x0
+			0x800 0x0 0x0 0x4 &RT0 0x27 0x0
+
+			0x1000 0x0 0x0 0x1 &RT0 0x25 0x0
+			0x1000 0x0 0x0 0x2 &RT0 0x26 0x0
+			0x1000 0x0 0x0 0x3 &RT0 0x27 0x0
+			0x1000 0x0 0x0 0x4 &RT0 0x24 0x0
+
+			0x1800 0x0 0x0 0x1 &RT0 0x26 0x0
+			0x1800 0x0 0x0 0x2 &RT0 0x27 0x0
+			0x1800 0x0 0x0 0x3 &RT0 0x24 0x0
+			0x1800 0x0 0x0 0x4 &RT0 0x25 0x0
+
+			0x2000 0x0 0x0 0x1 &RT0 0x27 0x0
+			0x2000 0x0 0x0 0x2 &RT0 0x24 0x0
+			0x2000 0x0 0x0 0x3 &RT0 0x25 0x0
+			0x2000 0x0 0x0 0x4 &RT0 0x26 0x0
+			>;
+
+		RT0: router@1180 {
+			device_type = "pic-router";
+			interrupt-controller;
+			big-endian;
+			clock-frequency = <0>;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			interrupts = <0x17 0x2>;
+			interrupt-parent = <&MPIC>;
+		};
+	};
+
+	chosen {
+		stdout-path = "/tsi109@c0000000/serial@7808";
+	};
+};
diff --git a/arch/powerpc/boot/dts/hotfoot.dts b/arch/powerpc/boot/dts/hotfoot.dts
new file mode 100644
index 0000000000..b93bf2d9dd
--- /dev/null
+++ b/arch/powerpc/boot/dts/hotfoot.dts
@@ -0,0 +1,296 @@
+/*
+ * Device Tree Source for ESTeem 195E Hotfoot
+ *
+ * Copyright 2009 AbsoluteValue Systems <solomon@linux-wlan.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	model = "est,hotfoot";
+	compatible = "est,hotfoot";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		ethernet1 = &EMAC1;
+		serial0 = &UART0;
+		serial1 = &UART1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,405EP";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by zImage */
+			timebase-frequency = <0>; /* Filled in by zImage */
+			i-cache-line-size = <0x20>;
+			d-cache-line-size = <0x20>;
+			i-cache-size = <0x4000>;
+			d-cache-size = <0x4000>;
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000>; /* Filled in by zImage */
+	};
+
+	UIC0: interrupt-controller {
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	plb {
+		compatible = "ibm,plb3";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by zImage */
+
+		SDRAM0: memory-controller {
+			compatible = "ibm,sdram-405ep";
+			dcr-reg = <0x010 0x002>;
+		};
+
+		MAL: mcmal {
+			compatible = "ibm,mcmal-405ep", "ibm,mcmal";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <4>;
+			num-rx-chans = <2>;
+			interrupt-parent = <&UIC0>;
+			interrupts = <
+				0xb 0x4 /* TXEOB */
+				0xc 0x4 /* RXEOB */
+				0xa 0x4 /* SERR */
+				0xd 0x4 /* TXDE */
+				0xe 0x4 /* RXDE */>;
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb-405ep", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0xef600000 0xef600000 0x00a00000>;
+			dcr-reg = <0x0a0 0x005>;
+			clock-frequency = <0>; /* Filled in by zImage */
+
+			/* Hotfoot has UART0/UART1 swapped */
+
+			UART0: serial@ef600400 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600400 0x00000008>;
+				virtual-reg = <0xef600400>;
+				clock-frequency = <0>; /* Filled in by zImage */
+				current-speed = <0x9600>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1 0x4>;
+			};
+
+			UART1: serial@ef600300 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600300 0x00000008>;
+				virtual-reg = <0xef600300>;
+				clock-frequency = <0>; /* Filled in by zImage */
+				current-speed = <0x9600>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x0 0x4>;
+			};
+
+			IIC: i2c@ef600500 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "ibm,iic-405ep", "ibm,iic";
+				reg = <0xef600500 0x00000011>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+
+				rtc@68 {
+					/* Actually a DS1339 */
+					compatible = "dallas,ds1307";
+					reg = <0x68>;
+				};
+
+				temp@4a {
+					/* Not present on all boards */
+					compatible = "national,lm75";
+					reg = <0x4a>;
+				};
+			};
+
+			GPIO: gpio@ef600700 {
+				#gpio-cells = <2>;
+				compatible = "ibm,ppc4xx-gpio";
+				reg = <0xef600700 0x00000020>;
+				gpio-controller;
+			};
+
+			gpio-leds {
+				compatible = "gpio-leds";
+				status {
+					label = "Status";
+					gpios = <&GPIO 1 0>;
+				};
+				radiorx {
+					label = "Rx";
+					gpios = <&GPIO 0xe 0>;
+				};
+			};
+
+			EMAC0: ethernet@ef600800 {
+				linux,network-index = <0x0>;
+				device_type = "network";
+				compatible = "ibm,emac-405ep", "ibm,emac";
+				interrupt-parent = <&UIC0>;
+				interrupts = <
+					0xf 0x4 /* Ethernet */
+					0x9 0x4 /* Ethernet Wake Up */>;
+				local-mac-address = [000000000000]; /* Filled in by zImage */
+				reg = <0xef600800 0x00000070>;
+				mal-device = <&MAL>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <0x5dc>;
+				rx-fifo-size = <0x1000>;
+				tx-fifo-size = <0x800>;
+				phy-mode = "mii";
+				phy-map = <0x00000000>;
+			};
+
+			EMAC1: ethernet@ef600900 {
+				linux,network-index = <0x1>;
+				device_type = "network";
+				compatible = "ibm,emac-405ep", "ibm,emac";
+				interrupt-parent = <&UIC0>;
+				interrupts = <
+					0x11 0x4 /* Ethernet */
+					0x9 0x4 /* Ethernet Wake Up */>;
+				local-mac-address = [000000000000]; /* Filled in by zImage */
+				reg = <0xef600900 0x00000070>;
+				mal-device = <&MAL>;
+				mal-tx-channel = <2>;
+				mal-rx-channel = <1>;
+				cell-index = <1>;
+				max-frame-size = <0x5dc>;
+				rx-fifo-size = <0x1000>;
+				tx-fifo-size = <0x800>;
+				mdio-device = <&EMAC0>;
+				phy-mode = "mii";
+				phy-map = <0x0000001>;
+			};
+		};
+
+		EBC0: ebc {
+			compatible = "ibm,ebc-405ep", "ibm,ebc";
+			dcr-reg = <0x012 0x002>;
+			#address-cells = <2>;
+			#size-cells = <1>;
+
+			/* The ranges property is supplied by the bootwrapper
+			 * and is based on the firmware's configuration of the
+			 * EBC bridge
+			 */
+			clock-frequency = <0>; /* Filled in by zImage */
+
+			nor_flash@0 {
+				compatible = "cfi-flash";
+				bank-width = <2>;
+				reg = <0x0 0xff800000 0x00800000>;
+				#address-cells = <1>;
+				#size-cells = <1>;
+
+				/* This mapping is for the 8M flash
+				   4M flash has all ofssets -= 4M,
+				   and FeatFS partition is not present */
+				partition@0 {
+					label = "Bootloader";
+					reg = <0x7c0000 0x40000>;
+					/* read-only; */
+				};
+				partition@1 {
+					label = "Env_and_Config_Primary";
+					reg = <0x400000 0x10000>;
+				};
+				partition@2 {
+					label = "Kernel";
+					reg = <0x420000 0x100000>;
+				};
+				partition@3 {
+					label = "Filesystem";
+					reg = <0x520000 0x2a0000>;
+				};
+				partition@4 {
+					label = "Env_and_Config_Secondary";
+					reg = <0x410000 0x10000>;
+				};
+				partition@5 {
+					label = "FeatFS";
+					reg = <0x000000 0x400000>;
+				};
+				partition@6 {
+					label = "Bootloader_Env";
+					reg = <0x7d0000 0x10000>;
+				};
+			};
+		};
+
+		PCI0: pci@ec000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb405ep-pci", "ibm,plb-pci";
+			primary;
+			reg = <0xeec00000 0x00000008    /* Config space access */
+				0xeed80000 0x00000004    /* IACK */
+				0xeed80000 0x00000004    /* Special cycle */
+				0xef480000 0x00000040>;  /* Internal registers */
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed. Chip supports a second
+			 * IO range but we don't use it for now
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x80000000 0x00000000 0x20000000
+				0x01000000 0x00000000 0x00000000 0xe8000000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>;
+
+			interrupt-parent = <&UIC0>;
+			interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+			interrupt-map = <
+				/* IDSEL 3 -- slot1 (optional) 27/29 A/B IRQ2/4 */
+				0x1800 0x0 0x0 0x1 &UIC0 0x1b 0x8
+				0x1800 0x0 0x0 0x2 &UIC0 0x1d 0x8
+
+				/* IDSEL 4 -- slot0, 26/28 A/B IRQ1/3 */
+				0x2000 0x0 0x0 0x1 &UIC0 0x1a 0x8
+				0x2000 0x0 0x0 0x2 &UIC0 0x1c 0x8
+				>;
+		};
+	};
+
+	chosen {
+		stdout-path = &UART0;
+	};
+};
diff --git a/arch/powerpc/boot/dts/icon.dts b/arch/powerpc/boot/dts/icon.dts
new file mode 100644
index 0000000000..4fd7a4fbb4
--- /dev/null
+++ b/arch/powerpc/boot/dts/icon.dts
@@ -0,0 +1,440 @@
+/*
+ * Device Tree Source for Mosaix Technologies, Inc. ICON board
+ *
+ * Copyright 2010 DENX Software Engineering, Stefan Roese <sr@denx.de>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <2>;
+	model = "mosaixtech,icon";
+	compatible = "mosaixtech,icon";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		serial0 = &UART0;
+		serial1 = &UART1;
+		serial2 = &UART2;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,440SPe";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+			timebase-frequency = <0>; /* Filled in by U-Boot */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+			reset-type = <2>;	/* Use chip-reset */
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x00000000 0x0 0x00000000>; /* Filled in by U-Boot */
+	};
+
+	UIC0: interrupt-controller0 {
+		compatible = "ibm,uic-440spe","ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-440spe","ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC2: interrupt-controller2 {
+		compatible = "ibm,uic-440spe","ibm,uic";
+		interrupt-controller;
+		cell-index = <2>;
+		dcr-reg = <0x0e0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0xa 0x4 0xb 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC3: interrupt-controller3 {
+		compatible = "ibm,uic-440spe","ibm,uic";
+		interrupt-controller;
+		cell-index = <3>;
+		dcr-reg = <0x0f0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x10 0x4 0x11 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	SDR0: sdr {
+		compatible = "ibm,sdr-440spe";
+		dcr-reg = <0x00e 0x002>;
+	};
+
+	CPR0: cpr {
+		compatible = "ibm,cpr-440spe";
+		dcr-reg = <0x00c 0x002>;
+	};
+
+	MQ0: mq {
+		compatible = "ibm,mq-440spe";
+		dcr-reg = <0x040 0x020>;
+	};
+
+	plb {
+		compatible = "ibm,plb-440spe", "ibm,plb-440gp", "ibm,plb4";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		/*        addr-child     addr-parent    size */
+		ranges = <0x4 0x00100000 0x4 0x00100000 0x00001000
+			  0x4 0x00200000 0x4 0x00200000 0x00000400
+			  0x4 0xe0000000 0x4 0xe0000000 0x20000000
+			  0xc 0x00000000 0xc 0x00000000 0x20000000
+			  0xd 0x00000000 0xd 0x00000000 0x80000000
+			  0xd 0x80000000 0xd 0x80000000 0x80000000
+			  0xe 0x00000000 0xe 0x00000000 0x80000000
+			  0xe 0x80000000 0xe 0x80000000 0x80000000
+			  0xf 0x00000000 0xf 0x00000000 0x80000000
+			  0xf 0x80000000 0xf 0x80000000 0x80000000>;
+		clock-frequency = <0>; /* Filled in by U-Boot */
+
+		SDRAM0: sdram {
+			compatible = "ibm,sdram-440spe", "ibm,sdram-405gp";
+			dcr-reg = <0x010 0x002>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-440spe", "ibm,mcmal2";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <2>;
+			num-rx-chans = <1>;
+			interrupt-parent = <&MAL0>;
+			interrupts = <0x0 0x1 0x2 0x3 0x4>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = </*TXEOB*/ 0x0 &UIC1 0x6 0x4
+					 /*RXEOB*/ 0x1 &UIC1 0x7 0x4
+					 /*SERR*/  0x2 &UIC1 0x1 0x4
+					 /*TXDE*/  0x3 &UIC1 0x2 0x4
+					 /*RXDE*/  0x4 &UIC1 0x3 0x4>;
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb-440spe", "ibm,opb-440gp", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0xe0000000 0x00000004 0xe0000000 0x20000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-440spe", "ibm,ebc-440gp", "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				/* ranges property is supplied by U-Boot */
+				interrupts = <0x5 0x1>;
+				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "cfi-flash";
+					bank-width = <2>;
+					reg = <0x00000000 0x00000000 0x01000000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "kernel";
+						reg = <0x00000000 0x001e0000>;
+					};
+					partition@1e0000 {
+						label = "dtb";
+						reg = <0x001e0000 0x00020000>;
+					};
+					partition@200000 {
+						label = "root";
+						reg = <0x00200000 0x00200000>;
+					};
+					partition@400000 {
+						label = "user";
+						reg = <0x00400000 0x00b60000>;
+					};
+					partition@f60000 {
+						label = "env";
+						reg = <0x00f60000 0x00040000>;
+					};
+					partition@fa0000 {
+						label = "u-boot";
+						reg = <0x00fa0000 0x00060000>;
+					};
+				};
+			};
+
+			UART0: serial@f0000200 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xf0000200 0x00000008>;
+				virtual-reg = <0xa0000200>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <115200>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x0 0x4>;
+			};
+
+			UART1: serial@f0000300 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xf0000300 0x00000008>;
+				virtual-reg = <0xa0000300>;
+				clock-frequency = <0>;
+				current-speed = <0>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1 0x4>;
+			};
+
+
+			UART2: serial@f0000600 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xf0000600 0x00000008>;
+				virtual-reg = <0xa0000600>;
+				clock-frequency = <0>;
+				current-speed = <0>;
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x5 0x4>;
+			};
+
+			IIC0: i2c@f0000400 {
+				compatible = "ibm,iic-440spe", "ibm,iic-440gp", "ibm,iic";
+				reg = <0xf0000400 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+			};
+
+			IIC1: i2c@f0000500 {
+				compatible = "ibm,iic-440spe", "ibm,iic-440gp", "ibm,iic";
+				reg = <0xf0000500 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x3 0x4>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+                                rtc@68 {
+                                        compatible = "st,m41t00";
+                                        reg = <0x68>;
+                                };
+			};
+
+			EMAC0: ethernet@f0000800 {
+				linux,network-index = <0x0>;
+				device_type = "network";
+				compatible = "ibm,emac-440spe", "ibm,emac4";
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x1c 0x4 0x1d 0x4>;
+				reg = <0xf0000800 0x00000074>;
+				local-mac-address = [000000000000];
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "gmii";
+				phy-map = <0x00000000>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+		};
+
+		PCIX0: pci@c0ec00000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pcix-440spe", "ibm,plb-pcix";
+			primary;
+			large-inbound-windows;
+			enable-msi-hole;
+			reg = <0x0000000c 0x0ec00000 0x00000008   /* Config space access */
+			       0x00000000 0x00000000 0x00000000   /* no IACK cycles */
+			       0x0000000c 0x0ed00000 0x00000004   /* Special cycles */
+			       0x0000000c 0x0ec80000 0x00000100   /* Internal registers */
+			       0x0000000c 0x0ec80100 0x000000fc>; /* Internal messaging registers */
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x0000000d 0x80000000 0x00000000 0x80000000
+				  0x01000000 0x00000000 0x00000000 0x0000000c 0x08000000 0x00000000 0x00010000>;
+
+			/* Inbound 4GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x1 0x00000000>;
+
+			/* This drives busses 0 to 0xf */
+			bus-range = <0x0 0xf>;
+
+			/* PCI-X interrupt (SM502) is routed to extIRQ10 (UIC1, 19) */
+			interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+			interrupt-map = <0x0 0x0 0x0 0x0 &UIC1 19 0x8>;
+		};
+
+		PCIE0: pcie@d00000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-440spe", "ibm,plb-pciex";
+			primary;
+			port = <0x0>; /* port number */
+			reg = <0x0000000d 0x00000000 0x20000000	/* Config space access */
+			       0x0000000c 0x10000000 0x00001000>;	/* Registers */
+			dcr-reg = <0x100 0x020>;
+			sdr-base = <0x300>;
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x0000000e 0x00000000 0x00000000 0x80000000
+				  0x01000000 0x00000000 0x00000000 0x0000000f 0x80000000 0x00000000 0x00010000>;
+
+			/* Inbound 4GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x1 0x00000000>;
+
+			/* This drives busses 0x10 to 0x1f */
+			bus-range = <0x10 0x1f>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &UIC3 0x0 0x4 /* swizzled int A */
+				0x0 0x0 0x0 0x2 &UIC3 0x1 0x4 /* swizzled int B */
+				0x0 0x0 0x0 0x3 &UIC3 0x2 0x4 /* swizzled int C */
+				0x0 0x0 0x0 0x4 &UIC3 0x3 0x4 /* swizzled int D */>;
+		};
+
+		PCIE1: pcie@d20000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-440spe", "ibm,plb-pciex";
+			primary;
+			port = <0x1>; /* port number */
+			reg = <0x0000000d 0x20000000 0x20000000	/* Config space access */
+			       0x0000000c 0x10001000 0x00001000>;	/* Registers */
+			dcr-reg = <0x120 0x020>;
+			sdr-base = <0x340>;
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x0000000e 0x80000000 0x00000000 0x80000000
+				  0x01000000 0x00000000 0x00000000 0x0000000f 0x80010000 0x00000000 0x00010000>;
+
+			/* Inbound 4GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x1 0x00000000>;
+
+			/* This drives busses 0x20 to 0x2f */
+			bus-range = <0x20 0x2f>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &UIC3 0x4 0x4 /* swizzled int A */
+				0x0 0x0 0x0 0x2 &UIC3 0x5 0x4 /* swizzled int B */
+				0x0 0x0 0x0 0x3 &UIC3 0x6 0x4 /* swizzled int C */
+				0x0 0x0 0x0 0x4 &UIC3 0x7 0x4 /* swizzled int D */>;
+		};
+
+		I2O: i2o@400100000 {
+			compatible = "ibm,i2o-440spe";
+			reg = <0x00000004 0x00100000 0x100>;
+			dcr-reg = <0x060 0x020>;
+		};
+
+		DMA0: dma0@400100100 {
+			compatible = "ibm,dma-440spe";
+			cell-index = <0>;
+			reg = <0x00000004 0x00100100 0x100>;
+			dcr-reg = <0x060 0x020>;
+			interrupt-parent = <&DMA0>;
+			interrupts = <0 1>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = <
+				0 &UIC0 0x14 4
+				1 &UIC1 0x16 4>;
+		};
+
+		DMA1: dma1@400100200 {
+			compatible = "ibm,dma-440spe";
+			cell-index = <1>;
+			reg = <0x00000004 0x00100200 0x100>;
+			dcr-reg = <0x060 0x020>;
+			interrupt-parent = <&DMA1>;
+			interrupts = <0 1>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = <
+				0 &UIC0 0x16 4
+				1 &UIC1 0x16 4>;
+		};
+
+		xor-accel@400200000 {
+			compatible = "amcc,xor-accelerator";
+			reg = <0x00000004 0x00200000 0x400>;
+			interrupt-parent = <&UIC1>;
+			interrupts = <0x1f 4>;
+		};
+	};
+
+	chosen {
+		stdout-path = "/plb/opb/serial@f0000200";
+	};
+};
diff --git a/arch/powerpc/boot/dts/iss4xx-mpic.dts b/arch/powerpc/boot/dts/iss4xx-mpic.dts
new file mode 100644
index 0000000000..c9f90f1a9c
--- /dev/null
+++ b/arch/powerpc/boot/dts/iss4xx-mpic.dts
@@ -0,0 +1,155 @@
+/*
+ * Device Tree Source for IBM Embedded PPC 476 Platform
+ *
+ * Copyright 2010 Torez Smith, IBM Corporation.
+ *
+ * Based on earlier code:
+ *     Copyright (c) 2006, 2007 IBM Corp.
+ *     Josh Boyer <jwboyer@linux.vnet.ibm.com>, David Gibson <dwg@au1.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/memreserve/ 0x01f00000 0x00100000;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "ibm,iss-4xx";
+	compatible = "ibm,iss-4xx";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		serial0 = &UART0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,4xx"; // real CPU changed in sim
+			reg = <0>;
+			clock-frequency = <100000000>; // 100Mhz :-)
+			timebase-frequency = <100000000>;
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+			status = "okay";
+		};
+		cpu@1 {
+			device_type = "cpu";
+			model = "PowerPC,4xx"; // real CPU changed in sim
+			reg = <1>;
+			clock-frequency = <100000000>; // 100Mhz :-)
+			timebase-frequency = <100000000>;
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+			status = "disabled";
+			enable-method = "spin-table";
+			cpu-release-addr = <0 0x01f00100>;
+		};
+		cpu@2 {
+			device_type = "cpu";
+			model = "PowerPC,4xx"; // real CPU changed in sim
+			reg = <2>;
+			clock-frequency = <100000000>; // 100Mhz :-)
+			timebase-frequency = <100000000>;
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+			status = "disabled";
+			enable-method = "spin-table";
+			cpu-release-addr = <0 0x01f00200>;
+		};
+		cpu@3 {
+			device_type = "cpu";
+			model = "PowerPC,4xx"; // real CPU changed in sim
+			reg = <3>;
+			clock-frequency = <100000000>; // 100Mhz :-)
+			timebase-frequency = <100000000>;
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+			status = "disabled";
+			enable-method = "spin-table";
+			cpu-release-addr = <0 0x01f00300>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg =  <0x00000000 0x00000000 0x00000000>; // Filled in by zImage
+
+	};
+
+	MPIC: interrupt-controller {
+		compatible = "chrp,open-pic";
+		interrupt-controller;
+		dcr-reg = <0xffc00000 0x00030000>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+	};
+
+	plb {
+		compatible = "ibm,plb-4xx", "ibm,plb4"; /* Could be PLB6, doesn't matter */
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; // Filled in by zImage
+
+		POB0: opb {
+			compatible = "ibm,opb-4xx", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			/* Wish there was a nicer way of specifying a full 32-bit
+			   range */
+			ranges = <0x00000000 0x00000001 0x00000000 0x80000000
+				  0x80000000 0x00000001 0x80000000 0x80000000>;
+			clock-frequency = <0>; // Filled in by zImage
+			UART0: serial@40000200 {
+				device_type = "serial";
+				compatible = "ns16550a";
+				reg = <0x40000200 0x00000008>;
+				virtual-reg = <0xe0000200>;
+				clock-frequency = <11059200>;
+				current-speed = <115200>;
+				interrupt-parent = <&MPIC>;
+				interrupts = <0x0 0x2>;
+			};
+		};
+	};
+
+	nvrtc {
+		compatible = "ds1743-nvram", "ds1743", "rtc-ds1743";
+		reg = <0 0xEF703000 0x2000>;
+	};
+	iss-block {
+		compatible = "ibm,iss-sim-block-device";
+		reg = <0 0xEF701000 0x1000>;
+	};
+
+	chosen {
+		stdout-path = "/plb/opb/serial@40000200";
+	};
+};
diff --git a/arch/powerpc/boot/dts/iss4xx.dts b/arch/powerpc/boot/dts/iss4xx.dts
new file mode 100644
index 0000000000..5533aff25e
--- /dev/null
+++ b/arch/powerpc/boot/dts/iss4xx.dts
@@ -0,0 +1,116 @@
+/*
+ * Device Tree Source for IBM Embedded PPC 476 Platform
+ *
+ * Copyright 2010 Torez Smith, IBM Corporation.
+ *
+ * Based on earlier code:
+ *    Copyright (c) 2006, 2007 IBM Corp.
+ *    Josh Boyer <jwboyer@linux.vnet.ibm.com>, David Gibson <dwg@au1.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "ibm,iss-4xx";
+	compatible = "ibm,iss-4xx";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		serial0 = &UART0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,4xx"; // real CPU changed in sim
+			reg = <0x00000000>;
+			clock-frequency = <100000000>; // 100Mhz :-)
+			timebase-frequency = <100000000>;
+			i-cache-line-size = <32>; // may need fixup in sim
+			d-cache-line-size = <32>; // may need fixup in sim
+			i-cache-size = <32768>; /* may need fixup in sim */
+			d-cache-size = <32768>; /* may need fixup in sim */
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000>; // Filled in by zImage
+	};
+
+	UIC0: interrupt-controller0 {
+		compatible = "ibm,uic-4xx", "ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-4xx", "ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	plb {
+		compatible = "ibm,plb-4xx", "ibm,plb4"; /* Could be PLB6, doesn't matter */
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; // Filled in by zImage
+
+		POB0: opb {
+			compatible = "ibm,opb-4xx", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			/* Wish there was a nicer way of specifying a full 32-bit
+			   range */
+			ranges = <0x00000000 0x00000001 0x00000000 0x80000000
+				  0x80000000 0x00000001 0x80000000 0x80000000>;
+			clock-frequency = <0>; // Filled in by zImage
+			UART0: serial@40000200 {
+				device_type = "serial";
+				compatible = "ns16550a";
+				reg = <0x40000200 0x00000008>;
+				virtual-reg = <0xe0000200>;
+				clock-frequency = <11059200>;
+				current-speed = <115200>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x0 0x4>;
+			};
+		};
+	};
+
+	nvrtc {
+		compatible = "ds1743-nvram", "ds1743", "rtc-ds1743";
+		reg = <0 0xEF703000 0x2000>;
+	};
+	iss-block {
+		compatible = "ibm,iss-sim-block-device";
+		reg = <0 0xEF701000 0x1000>;
+	};
+
+	chosen {
+		stdout-path = "/plb/opb/serial@40000200";
+	};
+};
diff --git a/arch/powerpc/boot/dts/katmai.dts b/arch/powerpc/boot/dts/katmai.dts
new file mode 100644
index 0000000000..4262b2bbd6
--- /dev/null
+++ b/arch/powerpc/boot/dts/katmai.dts
@@ -0,0 +1,492 @@
+/*
+ * Device Tree Source for AMCC Katmai eval board
+ *
+ * Copyright (c) 2006, 2007 IBM Corp.
+ * Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ *
+ * Copyright (c) 2006, 2007 IBM Corp.
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <2>;
+	model = "amcc,katmai";
+	compatible = "amcc,katmai";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		serial0 = &UART0;
+		serial1 = &UART1;
+		serial2 = &UART2;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,440SPe";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by zImage */
+			timebase-frequency = <0>; /* Filled in by zImage */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+			reset-type = <2>;	/* Use chip-reset */
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x00000000 0x0 0x00000000>; /* Filled in by U-Boot */
+	};
+
+	UIC0: interrupt-controller0 {
+		compatible = "ibm,uic-440spe","ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-440spe","ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC2: interrupt-controller2 {
+		compatible = "ibm,uic-440spe","ibm,uic";
+		interrupt-controller;
+		cell-index = <2>;
+		dcr-reg = <0x0e0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0xa 0x4 0xb 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC3: interrupt-controller3 {
+		compatible = "ibm,uic-440spe","ibm,uic";
+		interrupt-controller;
+		cell-index = <3>;
+		dcr-reg = <0x0f0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x10 0x4 0x11 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	SDR0: sdr {
+		compatible = "ibm,sdr-440spe";
+		dcr-reg = <0x00e 0x002>;
+	};
+
+	CPR0: cpr {
+		compatible = "ibm,cpr-440spe";
+		dcr-reg = <0x00c 0x002>;
+	};
+
+	MQ0: mq {
+		compatible = "ibm,mq-440spe";
+		dcr-reg = <0x040 0x020>;
+	};
+
+	plb {
+		compatible = "ibm,plb-440spe", "ibm,plb-440gp", "ibm,plb4";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		/*        addr-child     addr-parent    size */
+		ranges = <0x4 0x00100000 0x4 0x00100000 0x00001000
+			  0x4 0x00200000 0x4 0x00200000 0x00000400
+			  0x4 0xe0000000 0x4 0xe0000000 0x20000000
+			  0xc 0x00000000 0xc 0x00000000 0x20000000
+			  0xd 0x00000000 0xd 0x00000000 0x80000000
+			  0xd 0x80000000 0xd 0x80000000 0x80000000
+			  0xe 0x00000000 0xe 0x00000000 0x80000000
+			  0xe 0x80000000 0xe 0x80000000 0x80000000
+			  0xf 0x00000000 0xf 0x00000000 0x80000000
+			  0xf 0x80000000 0xf 0x80000000 0x80000000>;
+		clock-frequency = <0>; /* Filled in by zImage */
+
+		SDRAM0: sdram {
+			compatible = "ibm,sdram-440spe", "ibm,sdram-405gp";
+			dcr-reg = <0x010 0x002>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-440spe", "ibm,mcmal2";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <2>;
+			num-rx-chans = <1>;
+			interrupt-parent = <&MAL0>;
+			interrupts = <0x0 0x1 0x2 0x3 0x4>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = </*TXEOB*/ 0x0 &UIC1 0x6 0x4
+					 /*RXEOB*/ 0x1 &UIC1 0x7 0x4
+					 /*SERR*/  0x2 &UIC1 0x1 0x4
+					 /*TXDE*/  0x3 &UIC1 0x2 0x4
+					 /*RXDE*/  0x4 &UIC1 0x3 0x4>;
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb-440spe", "ibm,opb-440gp", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0xe0000000 0x00000004 0xe0000000 0x20000000>;
+			clock-frequency = <0>; /* Filled in by zImage */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-440spe", "ibm,ebc-440gp", "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by zImage */
+				/* ranges property is supplied by U-Boot */
+				interrupts = <0x5 0x1>;
+				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "cfi-flash";
+					bank-width = <2>;
+					reg = <0x00000000 0x00000000 0x01000000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "kernel";
+						reg = <0x00000000 0x001e0000>;
+					};
+					partition@1e0000 {
+						label = "dtb";
+						reg = <0x001e0000 0x00020000>;
+					};
+					partition@200000 {
+						label = "root";
+						reg = <0x00200000 0x00200000>;
+					};
+					partition@400000 {
+						label = "user";
+						reg = <0x00400000 0x00b60000>;
+					};
+					partition@f60000 {
+						label = "env";
+						reg = <0x00f60000 0x00040000>;
+					};
+					partition@fa0000 {
+						label = "u-boot";
+						reg = <0x00fa0000 0x00060000>;
+					};
+				};
+			};
+
+			UART0: serial@f0000200 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xf0000200 0x00000008>;
+				virtual-reg = <0xa0000200>;
+				clock-frequency = <0>; /* Filled in by zImage */
+				current-speed = <115200>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x0 0x4>;
+			};
+
+			UART1: serial@f0000300 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xf0000300 0x00000008>;
+				virtual-reg = <0xa0000300>;
+				clock-frequency = <0>;
+				current-speed = <0>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1 0x4>;
+			};
+
+
+			UART2: serial@f0000600 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xf0000600 0x00000008>;
+				virtual-reg = <0xa0000600>;
+				clock-frequency = <0>;
+				current-speed = <0>;
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x5 0x4>;
+			};
+
+			IIC0: i2c@f0000400 {
+				compatible = "ibm,iic-440spe", "ibm,iic-440gp", "ibm,iic";
+				reg = <0xf0000400 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+			};
+
+			IIC1: i2c@f0000500 {
+				compatible = "ibm,iic-440spe", "ibm,iic-440gp", "ibm,iic";
+				reg = <0xf0000500 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x3 0x4>;
+			};
+
+			EMAC0: ethernet@f0000800 {
+				linux,network-index = <0x0>;
+				device_type = "network";
+				compatible = "ibm,emac-440spe", "ibm,emac4";
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x1c 0x4 0x1d 0x4>;
+				reg = <0xf0000800 0x00000074>;
+				local-mac-address = [000000000000];
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "gmii";
+				phy-map = <0x00000000>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+		};
+
+		PCIX0: pci@c0ec00000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pcix-440spe", "ibm,plb-pcix";
+			primary;
+			large-inbound-windows;
+			enable-msi-hole;
+			reg = <0x0000000c 0x0ec00000 0x00000008   /* Config space access */
+			       0x00000000 0x00000000 0x00000000   /* no IACK cycles */
+			       0x0000000c 0x0ed00000 0x00000004   /* Special cycles */
+			       0x0000000c 0x0ec80000 0x00000100   /* Internal registers */
+			       0x0000000c 0x0ec80100 0x000000fc>; /* Internal messaging registers */
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x0000000d 0x80000000 0x00000000 0x80000000
+				  0x01000000 0x00000000 0x00000000 0x0000000c 0x08000000 0x00000000 0x00010000>;
+
+			/* Inbound 4GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x1 0x00000000>;
+
+			/* This drives busses 0 to 0xf */
+			bus-range = <0x0 0xf>;
+
+			/*
+			 * On Katmai, the following PCI-X interrupts signals
+			 * have to be enabled via jumpers (only INTA is
+			 * enabled per default):
+			 *
+			 * INTB: J3: 1-2
+			 * INTC: J2: 1-2
+			 * INTD: J1: 1-2
+			 */
+			interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+			interrupt-map = <
+				/* IDSEL 1 */
+				0x800 0x0 0x0 0x1 &UIC1 0x14 0x8
+				0x800 0x0 0x0 0x2 &UIC1 0x13 0x8
+				0x800 0x0 0x0 0x3 &UIC1 0x12 0x8
+				0x800 0x0 0x0 0x4 &UIC1 0x11 0x8
+			>;
+		};
+
+		PCIE0: pcie@d00000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-440spe", "ibm,plb-pciex";
+			primary;
+			port = <0x0>; /* port number */
+			reg = <0x0000000d 0x00000000 0x20000000	/* Config space access */
+			       0x0000000c 0x10000000 0x00001000>;	/* Registers */
+			dcr-reg = <0x100 0x020>;
+			sdr-base = <0x300>;
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x0000000e 0x00000000 0x00000000 0x80000000
+				  0x01000000 0x00000000 0x00000000 0x0000000f 0x80000000 0x00000000 0x00010000>;
+
+			/* Inbound 4GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x1 0x00000000>;
+
+			/* This drives busses 0x10 to 0x1f */
+			bus-range = <0x10 0x1f>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &UIC3 0x0 0x4 /* swizzled int A */
+				0x0 0x0 0x0 0x2 &UIC3 0x1 0x4 /* swizzled int B */
+				0x0 0x0 0x0 0x3 &UIC3 0x2 0x4 /* swizzled int C */
+				0x0 0x0 0x0 0x4 &UIC3 0x3 0x4 /* swizzled int D */>;
+		};
+
+		PCIE1: pcie@d20000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-440spe", "ibm,plb-pciex";
+			primary;
+			port = <0x1>; /* port number */
+			reg = <0x0000000d 0x20000000 0x20000000	/* Config space access */
+			       0x0000000c 0x10001000 0x00001000>;	/* Registers */
+			dcr-reg = <0x120 0x020>;
+			sdr-base = <0x340>;
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x0000000e 0x80000000 0x00000000 0x80000000
+				  0x01000000 0x00000000 0x00000000 0x0000000f 0x80010000 0x00000000 0x00010000>;
+
+			/* Inbound 4GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x1 0x00000000>;
+
+			/* This drives busses 0x20 to 0x2f */
+			bus-range = <0x20 0x2f>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &UIC3 0x4 0x4 /* swizzled int A */
+				0x0 0x0 0x0 0x2 &UIC3 0x5 0x4 /* swizzled int B */
+				0x0 0x0 0x0 0x3 &UIC3 0x6 0x4 /* swizzled int C */
+				0x0 0x0 0x0 0x4 &UIC3 0x7 0x4 /* swizzled int D */>;
+		};
+
+		PCIE2: pcie@d40000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-440spe", "ibm,plb-pciex";
+			primary;
+			port = <0x2>; /* port number */
+			reg = <0x0000000d 0x40000000 0x20000000	/* Config space access */
+			       0x0000000c 0x10002000 0x00001000>;	/* Registers */
+			dcr-reg = <0x140 0x020>;
+			sdr-base = <0x370>;
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x0000000f 0x00000000 0x00000000 0x80000000
+				  0x01000000 0x00000000 0x00000000 0x0000000f 0x80020000 0x00000000 0x00010000>;
+
+			/* Inbound 4GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x1 0x00000000>;
+
+			/* This drives busses 0x30 to 0x3f */
+			bus-range = <0x30 0x3f>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &UIC3 0x8 0x4 /* swizzled int A */
+				0x0 0x0 0x0 0x2 &UIC3 0x9 0x4 /* swizzled int B */
+				0x0 0x0 0x0 0x3 &UIC3 0xa 0x4 /* swizzled int C */
+				0x0 0x0 0x0 0x4 &UIC3 0xb 0x4 /* swizzled int D */>;
+		};
+
+		I2O: i2o@400100000 {
+			compatible = "ibm,i2o-440spe";
+			reg = <0x00000004 0x00100000 0x100>;
+			dcr-reg = <0x060 0x020>;
+		};
+
+		DMA0: dma0@400100100 {
+			compatible = "ibm,dma-440spe";
+			cell-index = <0>;
+			reg = <0x00000004 0x00100100 0x100>;
+			dcr-reg = <0x060 0x020>;
+			interrupt-parent = <&DMA0>;
+			interrupts = <0 1>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = <
+				0 &UIC0 0x14 4
+				1 &UIC1 0x16 4>;
+		};
+
+		DMA1: dma1@400100200 {
+			compatible = "ibm,dma-440spe";
+			cell-index = <1>;
+			reg = <0x00000004 0x00100200 0x100>;
+			dcr-reg = <0x060 0x020>;
+			interrupt-parent = <&DMA1>;
+			interrupts = <0 1>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = <
+				0 &UIC0 0x16 4
+				1 &UIC1 0x16 4>;
+		};
+
+		xor-accel@400200000 {
+			compatible = "amcc,xor-accelerator";
+			reg = <0x00000004 0x00200000 0x400>;
+			interrupt-parent = <&UIC1>;
+			interrupts = <0x1f 4>;
+		};
+	};
+
+	chosen {
+		stdout-path = "/plb/opb/serial@f0000200";
+	};
+};
diff --git a/arch/powerpc/boot/dts/kilauea.dts b/arch/powerpc/boot/dts/kilauea.dts
new file mode 100644
index 0000000000..c07a7525a7
--- /dev/null
+++ b/arch/powerpc/boot/dts/kilauea.dts
@@ -0,0 +1,407 @@
+/*
+ * Device Tree Source for AMCC Kilauea (405EX)
+ *
+ * Copyright 2007-2009 DENX Software Engineering, Stefan Roese <sr@denx.de>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	model = "amcc,kilauea";
+	compatible = "amcc,kilauea";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		ethernet1 = &EMAC1;
+		serial0 = &UART0;
+		serial1 = &UART1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,405EX";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+			timebase-frequency = <0>; /* Filled in by U-Boot */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <16384>; /* 16 kB */
+			d-cache-size = <16384>; /* 16 kB */
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000>; /* Filled in by U-Boot */
+	};
+
+	UIC0: interrupt-controller {
+		compatible = "ibm,uic-405ex", "ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-405ex","ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC2: interrupt-controller2 {
+		compatible = "ibm,uic-405ex","ibm,uic";
+		interrupt-controller;
+		cell-index = <2>;
+		dcr-reg = <0x0e0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1c 0x4 0x1d 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	CPM0: cpm {
+		compatible = "ibm,cpm";
+		dcr-access-method = "native";
+		dcr-reg = <0x0b0 0x003>;
+		unused-units = <0x00000000>;
+		idle-doze = <0x02000000>;
+		standby = <0xe3e74800>;
+	};
+
+	plb {
+		compatible = "ibm,plb-405ex", "ibm,plb4";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by U-Boot */
+
+		SDRAM0: memory-controller {
+			compatible = "ibm,sdram-405ex", "ibm,sdram-4xx-ddr2";
+			dcr-reg = <0x010 0x002>;
+			interrupt-parent = <&UIC2>;
+			interrupts = <0x5 0x4	/* ECC DED Error */ 
+				      0x6 0x4>;	/* ECC SEC Error */ 
+		};
+
+		CRYPTO: crypto@ef700000 {
+			compatible = "amcc,ppc405ex-crypto", "amcc,ppc4xx-crypto";
+			reg = <0xef700000 0x80400>;
+			interrupt-parent = <&UIC0>;
+			interrupts = <0x17 0x2>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-405ex", "ibm,mcmal2";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <2>;
+			num-rx-chans = <2>;
+			interrupt-parent = <&MAL0>;
+			interrupts = <0x0 0x1 0x2 0x3 0x4>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = </*TXEOB*/ 0x0 &UIC0 0xa 0x4
+					/*RXEOB*/ 0x1 &UIC0 0xb 0x4
+					/*SERR*/  0x2 &UIC1 0x0 0x4
+					/*TXDE*/  0x3 &UIC1 0x1 0x4
+					/*RXDE*/  0x4 &UIC1 0x2 0x4>;
+			interrupt-map-mask = <0xffffffff>;
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb-405ex", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0x80000000 0x80000000 0x10000000
+				  0xef600000 0xef600000 0x00a00000
+				  0xf0000000 0xf0000000 0x10000000>;
+			dcr-reg = <0x0a0 0x005>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-405ex", "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				/* ranges property is supplied by U-Boot */
+				interrupts = <0x5 0x1>;
+				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "amd,s29gl512n", "cfi-flash";
+					bank-width = <2>;
+					reg = <0x00000000 0x00000000 0x04000000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "kernel";
+						reg = <0x00000000 0x001e0000>;
+					};
+					partition@1e0000 {
+						label = "dtb";
+						reg = <0x001e0000 0x00020000>;
+					};
+					partition@200000 {
+						label = "root";
+						reg = <0x00200000 0x00200000>;
+					};
+					partition@400000 {
+						label = "user";
+						reg = <0x00400000 0x03b60000>;
+					};
+					partition@3f60000 {
+						label = "env";
+						reg = <0x03f60000 0x00040000>;
+					};
+					partition@3fa0000 {
+						label = "u-boot";
+						reg = <0x03fa0000 0x00060000>;
+					};
+				};
+
+				ndfc@1,0 {
+					compatible = "ibm,ndfc";
+					reg = <0x00000001 0x00000000 0x00002000>;
+					ccr = <0x00001000>;
+					bank-settings = <0x80002222>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+
+					nand {
+						#address-cells = <1>;
+						#size-cells = <1>;
+
+						partition@0 {
+							label = "u-boot";
+							reg = <0x00000000 0x00100000>;
+						};
+						partition@100000 {
+							label = "user";
+							reg = <0x00000000 0x03f00000>;
+						};
+					};
+				};
+			};
+
+			UART0: serial@ef600200 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600200 0x00000008>;
+				virtual-reg = <0xef600200>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1a 0x4>;
+			};
+
+			UART1: serial@ef600300 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600300 0x00000008>;
+				virtual-reg = <0xef600300>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1 0x4>;
+			};
+
+			IIC0: i2c@ef600400 {
+				compatible = "ibm,iic-405ex", "ibm,iic";
+				reg = <0xef600400 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				rtc@68 {
+					compatible = "dallas,ds1338";
+					reg = <0x68>;
+				};
+
+				dtt@48 {
+					compatible = "dallas,ds1775";
+					reg = <0x48>;
+				};
+			};
+
+			IIC1: i2c@ef600500 {
+				compatible = "ibm,iic-405ex", "ibm,iic";
+				reg = <0xef600500 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x7 0x4>;
+			};
+
+			RGMII0: emac-rgmii@ef600b00 {
+				compatible = "ibm,rgmii-405ex", "ibm,rgmii";
+				reg = <0xef600b00 0x00000104>;
+				has-mdio;
+			};
+
+			EMAC0: ethernet@ef600900 {
+				linux,network-index = <0x0>;
+				device_type = "network";
+				compatible = "ibm,emac-405ex", "ibm,emac4sync";
+				interrupt-parent = <&EMAC0>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC0 0x18 0x4
+						/*Wake*/  0x1 &UIC1 0x1d 0x4>;
+				reg = <0xef600900 0x000000c4>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+
+			EMAC1: ethernet@ef600a00 {
+				linux,network-index = <0x1>;
+				device_type = "network";
+				compatible = "ibm,emac-405ex", "ibm,emac4sync";
+				interrupt-parent = <&EMAC1>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC0 0x19 0x4
+						/*Wake*/  0x1 &UIC1 0x1f 0x4>;
+				reg = <0xef600a00 0x000000c4>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <1>;
+				mal-rx-channel = <1>;
+				cell-index = <1>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <1>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+		};
+
+		PCIE0: pcie@a0000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-405ex", "ibm,plb-pciex";
+			primary;
+			port = <0x0>; /* port number */
+			reg = <0xa0000000 0x20000000	/* Config space access */
+			       0xef000000 0x00001000>;	/* Registers */
+			dcr-reg = <0x040 0x020>;
+			sdr-base = <0x400>;
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x90000000 0x00000000 0x08000000
+				  0x01000000 0x00000000 0x00000000 0xe0000000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* This drives busses 0x00 to 0x3f */
+			bus-range = <0x0 0x3f>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &UIC2 0x0 0x4 /* swizzled int A */
+				0x0 0x0 0x0 0x2 &UIC2 0x1 0x4 /* swizzled int B */
+				0x0 0x0 0x0 0x3 &UIC2 0x2 0x4 /* swizzled int C */
+				0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>;
+		};
+
+		PCIE1: pcie@c0000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-405ex", "ibm,plb-pciex";
+			primary;
+			port = <0x1>; /* port number */
+			reg = <0xc0000000 0x20000000	/* Config space access */
+			       0xef001000 0x00001000>;	/* Registers */
+			dcr-reg = <0x060 0x020>;
+			sdr-base = <0x440>;
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x98000000 0x00000000 0x08000000
+				  0x01000000 0x00000000 0x00000000 0xe0010000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* This drives busses 0x40 to 0x7f */
+			bus-range = <0x40 0x7f>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &UIC2 0xb 0x4 /* swizzled int A */
+				0x0 0x0 0x0 0x2 &UIC2 0xc 0x4 /* swizzled int B */
+				0x0 0x0 0x0 0x3 &UIC2 0xd 0x4 /* swizzled int C */
+				0x0 0x0 0x0 0x4 &UIC2 0xe 0x4 /* swizzled int D */>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/klondike.dts b/arch/powerpc/boot/dts/klondike.dts
new file mode 100644
index 0000000000..9743217789
--- /dev/null
+++ b/arch/powerpc/boot/dts/klondike.dts
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Device Tree for Klondike (APM8018X) board.
+ *
+ * Copyright (c) 2010, Applied Micro Circuits Corporation
+ * Author: Tanmay Inamdar <tinamdar@apm.com>
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	model = "apm,klondike";
+	compatible = "apm,klondike";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		ethernet1 = &EMAC1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,apm8018x";
+			reg = <0x00000000>;
+			clock-frequency = <300000000>; /* Filled in by U-Boot */
+			timebase-frequency = <300000000>; /* Filled in by U-Boot */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <16384>; /* 16 kB */
+			d-cache-size = <16384>; /* 16 kB */
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x20000000>; /* Filled in by U-Boot */
+	};
+
+	UIC0: interrupt-controller {
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x010>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x010>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC2: interrupt-controller2 {
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <2>;
+		dcr-reg = <0x0e0 0x010>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x0a 0x4 0x0b 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC3: interrupt-controller3 {
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <3>;
+		dcr-reg = <0x0f0 0x010>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x10 0x4 0x11 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	plb {
+		compatible = "ibm,plb4";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by U-Boot */
+
+		SDRAM0: memory-controller {
+			compatible = "ibm,sdram-apm8018x";
+			dcr-reg = <0x010 0x002>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal2";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <2>;
+			num-rx-chans = <16>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-parent = <&UIC1>;
+			interrupts = </*TXEOB*/   0x6 0x4
+					/*RXEOB*/ 0x7 0x4
+					/*SERR*/  0x1 0x4
+					/*TXDE*/  0x2 0x4
+					/*RXDE*/  0x3 0x4>;
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0x20000000 0x20000000 0x30000000
+				  0x50000000 0x50000000 0x10000000
+				  0x60000000 0x60000000 0x10000000
+				  0xFE000000 0xFE000000 0x00010000>;
+			dcr-reg = <0x100 0x020>;
+			clock-frequency = <300000000>; /* Filled in by U-Boot */
+
+			RGMII0: emac-rgmii@400a2000 {
+				compatible = "ibm,rgmii";
+				reg = <0x400a2000 0x00000010>;
+				has-mdio;
+			};
+
+			TAH0: emac-tah@400a3000 {
+				compatible = "ibm,tah";
+				reg = <0x400a3000 0x100>;
+			};
+
+			TAH1: emac-tah@400a4000 {
+				compatible = "ibm,tah";
+				reg = <0x400a4000 0x100>;
+			};
+
+			EMAC0: ethernet@400a0000 {
+				compatible = "ibm,emac4", "ibm-emac4sync";
+				interrupt-parent = <&EMAC0>;
+				interrupts = <0x0>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC0 0x13 0x4>;
+				reg = <0x400a0000 0x00000100>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0x0>;
+				mal-rx-channel = <0x0>;
+				cell-index = <0>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rgmii";
+				phy-address = <0x2>;
+				turbo = "no";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <0>;
+				tah-device = <&TAH0>;
+				tah-channel = <0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+
+			EMAC1: ethernet@400a1000 {
+				compatible = "ibm,emac4", "ibm-emac4sync";
+				status = "disabled";
+				interrupt-parent = <&EMAC1>;
+				interrupts = <0x0>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC0 0x14 0x4>;
+				reg = <0x400a1000 0x00000100>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <1>;
+				mal-rx-channel = <8>;
+				cell-index = <1>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rgmii";
+				phy-address = <0x3>;
+				turbo = "no";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <1>;
+				tah-device = <&TAH1>;
+				tah-channel = <0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+				mdio-device = <&EMAC0>;
+			};
+		};
+	};
+
+	chosen {
+		stdout-path = "/plb/opb/serial@50001000";
+	};
+};
diff --git a/arch/powerpc/boot/dts/kmeter1.dts b/arch/powerpc/boot/dts/kmeter1.dts
new file mode 100644
index 0000000000..154f5d293f
--- /dev/null
+++ b/arch/powerpc/boot/dts/kmeter1.dts
@@ -0,0 +1,528 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Keymile KMETER1 Device Tree Source
+ *
+ * 2008-2011 DENX Software Engineering GmbH
+ */
+
+/dts-v1/;
+
+/ {
+	model = "KMETER1";
+	compatible = "keymile,KMETER1";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet_piggy2;
+		ethernet1 = &enet_estar1;
+		ethernet2 = &enet_estar2;
+		ethernet3 = &enet_eth1;
+		ethernet4 = &enet_eth2;
+		ethernet5 = &enet_eth3;
+		ethernet6 = &enet_eth4;
+		serial0 = &serial0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8360@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;	// 32 bytes
+			i-cache-line-size = <32>;	// 32 bytes
+			d-cache-size = <32768>;		// L1, 32K
+			i-cache-size = <32768>;		// L1, 32K
+			timebase-frequency = <0>;	/* Filled in by U-Boot */
+			bus-frequency = <0>;	/* Filled in by U-Boot */
+			clock-frequency = <0>;	/* Filled in by U-Boot */
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0 0>;	/* Filled in by U-Boot */
+	};
+
+	soc8360@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,mpc8360-immr", "simple-bus";
+		ranges = <0x0 0xe0000000 0x00200000>;
+		reg = <0xe0000000 0x00000200>;
+		bus-frequency = <0>;	/* Filled in by U-Boot */
+
+		pmc: power@b00 {
+			compatible = "fsl,mpc8360-pmc", "fsl,mpc8349-pmc";
+			reg = <0xb00 0x100 0xa00 0x100>;
+			interrupts = <80 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl,mpc8313-i2c","fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <14 0x8>;
+			interrupt-parent = <&ipic>;
+			clock-frequency = <400000>;
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <264000000>;
+			interrupts = <9 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		dma@82a8 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8360-dma", "fsl,elo-dma";
+			reg = <0x82a8 4>;
+			ranges = <0 0x8100 0x1a8>;
+			interrupt-parent = <&ipic>;
+			interrupts = <71 8>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8360-dma-channel", "fsl,elo-dma-channel";
+				reg = <0 0x80>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8360-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x80 0x80>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8360-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x100 0x80>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8360-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x180 0x28>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+		};
+
+		ipic: pic@700 {
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			compatible = "fsl,pq2pro-pic", "fsl,ipic";
+			interrupt-controller;
+			reg = <0x700 0x100>;
+		};
+
+		par_io@1400 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x1400 0x100>;
+			compatible = "fsl,mpc8360-par_io";
+			num-ports = <7>;
+
+			qe_pio_c: gpio-controller@30 {
+				#gpio-cells = <2>;
+				compatible = "fsl,mpc8360-qe-pario-bank",
+					     "fsl,mpc8323-qe-pario-bank";
+				reg = <0x1430 0x18>;
+				gpio-controller;
+			};
+			pio_ucc1: ucc_pin@0 {
+				reg = <0>;
+
+				pio-map = <
+					/* port pin dir open_drain assignment has_irq */
+					0   1  3  0  2  0	/* MDIO   */
+					0   2  1  0  1  0	/* MDC    */
+
+					0   3  1  0  1  0	/* TxD0   */
+					0   4  1  0  1  0	/* TxD1   */
+					0   5  1  0  1  0	/* TxD2   */
+					0   6  1  0  1  0	/* TxD3   */
+					0   9  2  0  1  0	/* RxD0   */
+					0  10  2  0  1  0	/* RxD1   */
+					0  11  2  0  1  0	/* RxD2   */
+					0  12  2  0  1  0	/* RxD3   */
+					0   7  1  0  1  0	/* TX_EN  */
+					0   8  1  0  1  0	/* TX_ER  */
+					0  15  2  0  1  0	/* RX_DV  */
+					0  16  2  0  1  0	/* RX_ER  */
+					0   0  2  0  1  0	/* RX_CLK */
+					2   9  1  0  3  0	/* GTX_CLK - CLK10 */
+					2   8  2  0  1  0	/* GTX125  - CLK9  */
+				>;
+			};
+
+			pio_ucc2: ucc_pin@1 {
+				reg = <1>;
+
+				pio-map = <
+					/* port pin dir open_drain assignment has_irq */
+					0   1  3  0  2  0	/* MDIO   */
+					0   2  1  0  1  0	/* MDC    */
+
+					0  17  1  0  1  0	/* TxD0   */
+					0  18  1  0  1  0	/* TxD1   */
+					0  19  1  0  1  0	/* TxD2   */
+					0  20  1  0  1  0	/* TxD3   */
+					0  23  2  0  1  0	/* RxD0   */
+					0  24  2  0  1  0	/* RxD1   */
+					0  25  2  0  1  0	/* RxD2   */
+					0  26  2  0  1  0	/* RxD3   */
+					0  21  1  0  1  0	/* TX_EN  */
+					0  22  1  0  1  0	/* TX_ER  */
+					0  29  2  0  1  0	/* RX_DV  */
+					0  30  2  0  1  0	/* RX_ER  */
+					0  31  2  0  1  0	/* RX_CLK */
+					2  2   1  0  2  0	/* GTX_CLK - CLK3  */
+					2  3   2  0  1  0	/* GTX125  - CLK4  */
+				>;
+			};
+
+			pio_ucc4: ucc_pin@3 {
+				reg = <3>;
+
+				pio-map = <
+					/* port pin dir open_drain assignment has_irq */
+					0   1  3  0  2  0	/* MDIO */
+					0   2  1  0  1  0	/* MDC  */
+
+					1  14  1  0  1  0	/* TxD0   (PB14, out, f1) */
+					1  15  1  0  1  0	/* TxD1   (PB15, out, f1) */
+					1  20  2  0  1  0	/* RxD0   (PB20, in,  f1) */
+					1  21  2  0  1  0	/* RxD1   (PB21, in,  f1) */
+					1  18  1  0  1  0	/* TX_EN  (PB18, out, f1) */
+					1  26  2  0  1  0	/* RX_DV  (PB26, in,  f1) */
+					1  27  2  0  1  0	/* RX_ER  (PB27, in,  f1) */
+
+					2  16  2  0  1  0	/* UCC4_RMII_CLK (CLK17) */
+				>;
+			};
+
+			pio_ucc5: ucc_pin@4 {
+				reg = <4>;
+
+				pio-map = <
+					/* port pin dir open_drain assignment has_irq */
+					0   1  3  0  2  0	/* MDIO */
+					0   2  1  0  1  0	/* MDC  */
+
+					3   0  1  0  1  0	/* TxD0  (PD0,  out, f1) */
+					3   1  1  0  1  0	/* TxD1  (PD1,  out, f1) */
+					3   6  2  0  1  0	/* RxD0  (PD6,   in, f1) */
+					3   7  2  0  1  0	/* RxD1  (PD7,   in, f1) */
+					3   4  1  0  1  0	/* TX_EN (PD4,  out, f1) */
+					3  12  2  0  1  0	/* RX_DV (PD12,  in, f1) */
+					3  13  2  0  1  0	/* RX_ER (PD13,  in, f1) */
+				>;
+			};
+
+			pio_ucc6: ucc_pin@5 {
+				reg = <5>;
+
+				pio-map = <
+					/* port pin dir open_drain assignment has_irq */
+					0   1  3  0  2  0	/* MDIO */
+					0   2  1  0  1  0	/* MDC  */
+
+					3  14  1  0  1  0	/* TxD0   (PD14, out, f1) */
+					3  15  1  0  1  0	/* TxD1   (PD15, out, f1) */
+					3  20  2  0  1  0	/* RxD0   (PD20, in,  f1) */
+					3  21  2  0  1  0	/* RxD1   (PD21, in,  f1) */
+					3  18  1  0  1  0	/* TX_EN  (PD18, out, f1) */
+					3  26  2  0  1  0	/* RX_DV  (PD26, in,  f1) */
+					3  27  2  0  1  0	/* RX_ER  (PD27, in,  f1) */
+				>;
+			};
+
+			pio_ucc7: ucc_pin@6 {
+				reg = <6>;
+
+				pio-map = <
+					/* port pin dir open_drain assignment has_irq */
+					0   1  3  0  2  0	/* MDIO */
+					0   2  1  0  1  0	/* MDC  */
+
+					4   0  1  0  1  0	/* TxD0   (PE0,  out, f1) */
+					4   1  1  0  1  0	/* TxD1   (PE1,  out, f1) */
+					4   6  2  0  1  0	/* RxD0   (PE6,   in, f1) */
+					4   7  2  0  1  0	/* RxD1   (PE7,   in, f1) */
+					4   4  1  0  1  0	/* TX_EN  (PE4,  out, f1) */
+					4  12  2  0  1  0	/* RX_DV  (PE12,  in, f1) */
+					4  13  2  0  1  0	/* RX_ER  (PE13,  in, f1) */
+				>;
+			};
+
+			pio_ucc8: ucc_pin@7 {
+				reg = <7>;
+
+				pio-map = <
+					/* port pin dir open_drain assignment has_irq */
+					0   1  3  0  2  0	/* MDIO */
+					0   2  1  0  1  0	/* MDC  */
+
+					4  14  1  0  2  0	/* TxD0   (PE14, out, f2) */
+					4  15  1  0  1  0	/* TxD1   (PE15, out, f1) */
+					4  20  2  0  1  0	/* RxD0   (PE20, in,  f1) */
+					4  21  2  0  1  0	/* RxD1   (PE21, in,  f1) */
+					4  18  1  0  1  0	/* TX_EN  (PE18, out, f1) */
+					4  26  2  0  1  0	/* RX_DV  (PE26, in,  f1) */
+					4  27  2  0  1  0	/* RX_ER  (PE27, in,  f1) */
+
+					2  15  2  0  1  0	/* UCCx_RMII_CLK (CLK16) */
+				>;
+			};
+
+		};
+
+		qe@100000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,qe";
+			ranges = <0x0 0x100000 0x100000>;
+			reg = <0x100000 0x480>;
+			clock-frequency = <0>;	/* Filled in by U-Boot */
+			brg-frequency = <0>;	/* Filled in by U-Boot */
+			bus-frequency = <0>;	/* Filled in by U-Boot */
+
+			muram@10000 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "fsl,qe-muram", "fsl,cpm-muram";
+				ranges = <0x0 0x00010000 0x0000c000>;
+
+				data-only@0 {
+					compatible = "fsl,qe-muram-data",
+						     "fsl,cpm-muram-data";
+					reg = <0x0 0xc000>;
+				};
+			};
+
+			/* ESTAR-1 (UCC1, MDIO 0x10, RGMII) */
+			enet_estar1: ucc@2000 {
+				device_type = "network";
+				compatible = "ucc_geth";
+				cell-index = <1>;
+				reg = <0x2000 0x200>;
+				interrupts = <32>;
+				interrupt-parent = <&qeic>;
+				local-mac-address = [ 00 00 00 00 00 00 ];
+				rx-clock-name = "none";
+				tx-clock-name = "clk9";
+				phy-handle = <&phy_estar1>;
+				phy-connection-type = "rgmii-id";
+				pio-handle = <&pio_ucc1>;
+			};
+
+			/* ESTAR-2 (UCC2, MDIO 0x11, RGMII) */
+			enet_estar2: ucc@3000 {
+				device_type = "network";
+				compatible = "ucc_geth";
+				cell-index = <2>;
+				reg = <0x3000 0x200>;
+				interrupts = <33>;
+				interrupt-parent = <&qeic>;
+				local-mac-address = [ 00 00 00 00 00 00 ];
+				rx-clock-name = "none";
+				tx-clock-name = "clk4";
+				phy-handle = <&phy_estar2>;
+				phy-connection-type = "rgmii-id";
+				pio-handle = <&pio_ucc2>;
+			};
+
+			/* Piggy2 (UCC4, MDIO 0x00, RMII) */
+			enet_piggy2: ucc@3200 {
+				device_type = "network";
+				compatible = "ucc_geth";
+				cell-index = <4>;
+				reg = <0x3200 0x200>;
+				interrupts = <35>;
+				interrupt-parent = <&qeic>;
+				local-mac-address = [ 00 00 00 00 00 00 ];
+				rx-clock-name = "none";
+				tx-clock-name = "clk17";
+				phy-handle = <&phy_piggy2>;
+				phy-connection-type = "rmii";
+				pio-handle = <&pio_ucc4>;
+			};
+
+			/* Eth-1 (UCC5, MDIO 0x08, RMII) */
+			enet_eth1: ucc@2400 {
+				device_type = "network";
+				compatible = "ucc_geth";
+				cell-index = <5>;
+				reg = <0x2400 0x200>;
+				interrupts = <40>;
+				interrupt-parent = <&qeic>;
+				local-mac-address = [ 00 00 00 00 00 00 ];
+				rx-clock-name = "none";
+				tx-clock-name = "clk16";
+				phy-handle = <&phy_eth1>;
+				phy-connection-type = "rmii";
+				pio-handle = <&pio_ucc5>;
+			};
+
+			/* Eth-2 (UCC6, MDIO 0x09, RMII) */
+			enet_eth2: ucc@3400 {
+				device_type = "network";
+				compatible = "ucc_geth";
+				cell-index = <6>;
+				reg = <0x3400 0x200>;
+				interrupts = <41>;
+				interrupt-parent = <&qeic>;
+				local-mac-address = [ 00 00 00 00 00 00 ];
+				rx-clock-name = "none";
+				tx-clock-name = "clk16";
+				phy-handle = <&phy_eth2>;
+				phy-connection-type = "rmii";
+				pio-handle = <&pio_ucc6>;
+			};
+
+			/* Eth-3 (UCC7, MDIO 0x0a, RMII) */
+			enet_eth3: ucc@2600 {
+				device_type = "network";
+				compatible = "ucc_geth";
+				cell-index = <7>;
+				reg = <0x2600 0x200>;
+				interrupts = <42>;
+				interrupt-parent = <&qeic>;
+				local-mac-address = [ 00 00 00 00 00 00 ];
+				rx-clock-name = "none";
+				tx-clock-name = "clk16";
+				phy-handle = <&phy_eth3>;
+				phy-connection-type = "rmii";
+				pio-handle = <&pio_ucc7>;
+			};
+
+			/* Eth-4 (UCC8, MDIO 0x0b, RMII) */
+			enet_eth4: ucc@3600 {
+				device_type = "network";
+				compatible = "ucc_geth";
+				cell-index = <8>;
+				reg = <0x3600 0x200>;
+				interrupts = <43>;
+				interrupt-parent = <&qeic>;
+				local-mac-address = [ 00 00 00 00 00 00 ];
+				rx-clock-name = "none";
+				tx-clock-name = "clk16";
+				phy-handle = <&phy_eth4>;
+				phy-connection-type = "rmii";
+				pio-handle = <&pio_ucc8>;
+			};
+
+			mdio@3320 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				reg = <0x3320 0x18>;
+				compatible = "fsl,ucc-mdio";
+
+				/* Piggy2 (UCC4, MDIO 0x00, RMII) */
+				phy_piggy2: ethernet-phy@0 {
+					reg = <0x0>;
+				};
+
+				/* Eth-1 (UCC5, MDIO 0x08, RMII) */
+				phy_eth1: ethernet-phy@8 {
+					reg = <0x08>;
+				};
+
+				/* Eth-2 (UCC6, MDIO 0x09, RMII) */
+				phy_eth2: ethernet-phy@9 {
+					reg = <0x09>;
+				};
+
+				/* Eth-3 (UCC7, MDIO 0x0a, RMII) */
+				phy_eth3: ethernet-phy@a {
+					reg = <0x0a>;
+				};
+
+				/* Eth-4 (UCC8, MDIO 0x0b, RMII) */
+				phy_eth4: ethernet-phy@b {
+					reg = <0x0b>;
+				};
+
+				/* ESTAR-1 (UCC1, MDIO 0x10, RGMII) */
+				phy_estar1: ethernet-phy@10 {
+					interrupt-parent = <&ipic>;
+					interrupts = <17 0x8>;
+					reg = <0x10>;
+				};
+
+				/* ESTAR-2 (UCC2, MDIO 0x11, RGMII) */
+				phy_estar2: ethernet-phy@11 {
+					interrupt-parent = <&ipic>;
+					interrupts = <18 0x8>;
+					reg = <0x11>;
+				};
+			};
+
+			qeic: interrupt-controller@80 {
+				interrupt-controller;
+				compatible = "fsl,qe-ic";
+				#address-cells = <0>;
+				#interrupt-cells = <1>;
+				reg = <0x80 0x80>;
+				big-endian;
+				interrupts = <
+					32 0x8
+					33 0x8
+					34 0x8
+					35 0x8
+					40 0x8
+					41 0x8
+					42 0x8
+					43 0x8
+				>;
+				interrupt-parent = <&ipic>;
+			};
+		};
+	};
+
+	localbus@e0005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8360-localbus", "fsl,pq2pro-localbus",
+			     "simple-bus";
+		reg = <0xe0005000 0xd8>;
+		ranges = <0 0 0xf0000000 0x04000000	/* LB 0 */
+			  1 0 0xe8000000 0x01000000	/* LB 1 */
+			  3 0 0xa0000000 0x10000000>;	/* LB 3 */
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x04000000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			bank-width = <2>;
+			partition@0 { /* 768KB */
+				label = "u-boot";
+				reg = <0 0xC0000>;
+			};
+			partition@c0000 { /* 128KB */
+				label = "env";
+				reg = <0xC0000 0x20000>;
+			};
+			partition@e0000 { /* 128KB */
+				label = "envred";
+				reg = <0xE0000 0x20000>;
+			};
+			partition@100000 { /* 64512KB */
+				label = "ubi0";
+				reg = <0x100000 0x3F00000>;
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/ksi8560.dts b/arch/powerpc/boot/dts/ksi8560.dts
new file mode 100644
index 0000000000..37a7eb576d
--- /dev/null
+++ b/arch/powerpc/boot/dts/ksi8560.dts
@@ -0,0 +1,346 @@
+/*
+ * Device Tree Source for Emerson KSI8560
+ *
+ * Author: Alexandr Smirnov <asmirnov@ru.mvista.com>
+ *
+ * Based on mpc8560ads.dts
+ *
+ * 2008 (c) MontaVista, Software, Inc.  This file is licensed under
+ * the terms of the GNU General Public License version 2.  This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ *
+ */
+
+/dts-v1/;
+
+/include/ "fsl/e500v1_power_isa.dtsi"
+
+/ {
+	model = "KSI8560";
+	compatible = "emerson,KSI8560";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8560@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <0x8000>;		/* L1, 32K */
+			i-cache-size = <0x8000>;		/* L1, 32K */
+			timebase-frequency = <0>;		/* From U-boot */
+			bus-frequency = <0>;			/* From U-boot */
+			clock-frequency = <0>;			/* From U-boot */
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>;			/* Fixed by bootwrapper */
+	};
+
+	soc@fdf00000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		ranges = <0x00000000 0xfdf00000 0x00100000>;
+		bus-frequency = <0>;				/* Fixed by bootwrapper */
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <8>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,mpc8560-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,mpc8540-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <0x12 0x2>;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,mpc8540-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <0x20>;		/* 32 bytes */
+			cache-size = <0x40000>;			/* L2, 256K */
+			interrupt-parent = <&mpic>;
+			interrupts = <0x10 0x2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <0x2b 0x2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8560-dma", "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8560-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8560-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8560-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8560-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			/* Mac address filled in by bootwrapper */
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <0x1d 0x2 0x1e 0x2 0x22 0x2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&PHY1>;
+
+			mdio@520 {					/* For TSECs */
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				PHY1: ethernet-phy@1 {
+					interrupt-parent = <&mpic>;
+					reg = <0x1>;
+				};
+
+				PHY2: ethernet-phy@2 {
+					interrupt-parent = <&mpic>;
+					reg = <0x2>;
+				};
+
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			/* Mac address filled in by bootwrapper */
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <0x23 0x2 0x24 0x2 0x28 0x2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
+			phy-handle = <&PHY2>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		mpic: pic@40000 {
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			reg = <0x40000 0x40000>;
+			device_type = "open-pic";
+		};
+
+		cpm@919c0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8560-cpm", "fsl,cpm2";
+			reg = <0x919c0 0x30>;
+			ranges;
+
+			muram@80000 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges = <0x0 0x80000 0x10000>;
+
+				data@0 {
+					compatible = "fsl,cpm-muram-data";
+					reg = <0x0 0x4000 0x9000 0x2000>;
+				};
+			};
+
+			brg@919f0 {
+				compatible = "fsl,mpc8560-brg",
+					     "fsl,cpm2-brg",
+					     "fsl,cpm-brg";
+				reg = <0x919f0 0x10 0x915f0 0x10>;
+				clock-frequency = <165000000>;	/* 166MHz */
+			};
+
+			CPMPIC: pic@90c00 {
+				#address-cells = <0>;
+				#interrupt-cells = <2>;
+				interrupt-controller;
+				interrupts = <0x2e 0x2>;
+				interrupt-parent = <&mpic>;
+				reg = <0x90c00 0x80>;
+				compatible = "fsl,mpc8560-cpm-pic", "fsl,cpm2-pic";
+			};
+
+			serial@91a00 {
+				device_type = "serial";
+				compatible = "fsl,mpc8560-scc-uart",
+					     "fsl,cpm2-scc-uart";
+				reg = <0x91a00 0x20 0x88000 0x100>;
+				fsl,cpm-brg = <1>;
+				fsl,cpm-command = <0x800000>;
+				current-speed = <0x1c200>;
+				interrupts = <0x28 0x8>;
+				interrupt-parent = <&CPMPIC>;
+			};
+
+			serial@91a20 {
+				device_type = "serial";
+				compatible = "fsl,mpc8560-scc-uart",
+					     "fsl,cpm2-scc-uart";
+				reg = <0x91a20 0x20 0x88100 0x100>;
+				fsl,cpm-brg = <2>;
+				fsl,cpm-command = <0x4a00000>;
+				current-speed = <0x1c200>;
+				interrupts = <0x29 0x8>;
+				interrupt-parent = <&CPMPIC>;
+			};
+
+			mdio@90d00 {				/* For FCCs */
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,cpm2-mdio-bitbang";
+				reg = <0x90d00 0x14>;
+				fsl,mdio-pin = <24>;
+				fsl,mdc-pin = <25>;
+
+				PHY0: ethernet-phy@0 {
+					interrupt-parent = <&mpic>;
+					reg = <0x0>;
+				};
+			};
+
+			enet2: ethernet@91300 {
+				device_type = "network";
+				compatible = "fsl,mpc8560-fcc-enet",
+					     "fsl,cpm2-fcc-enet";
+				reg = <0x91300 0x20 0x88400 0x100 0x91390 0x1>;
+				/* Mac address filled in by bootwrapper */
+				local-mac-address = [ 00 00 00 00 00 00 ];
+				fsl,cpm-command = <0x12000300>;
+				interrupts = <0x20 0x8>;
+				interrupt-parent = <&CPMPIC>;
+				phy-handle = <&PHY0>;
+			};
+		};
+	};
+
+	localbus@fdf05000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8560-localbus", "simple-bus";
+		reg = <0xfdf05000 0x68>;
+
+		ranges = <0x0 0x0 0xe0000000 0x00800000
+			  0x4 0x0 0xe8080000 0x00080000>;
+
+		flash@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "jedec-flash";
+			reg = <0x0 0x0 0x800000>;
+			bank-width = <0x2>;
+
+			partition@0 {
+				label = "Primary Kernel";
+				reg = <0x0 0x180000>;
+			};
+			partition@180000 {
+				label = "Primary Filesystem";
+				reg = <0x180000 0x580000>;
+			};
+			partition@700000 {
+				label = "Monitor";
+				reg = <0x300000 0x100000>;
+				read-only;
+			};
+		};
+
+		cpld@4,0 {
+			compatible = "emerson,KSI8560-cpld";
+			reg = <0x4 0x0 0x80000>;
+		};
+	};
+
+
+	chosen {
+		stdout-path = "/soc/cpm/serial@91a00";
+	};
+};
diff --git a/arch/powerpc/boot/dts/kuroboxHD.dts b/arch/powerpc/boot/dts/kuroboxHD.dts
new file mode 100644
index 0000000000..0a4545159e
--- /dev/null
+++ b/arch/powerpc/boot/dts/kuroboxHD.dts
@@ -0,0 +1,147 @@
+/*
+ * Device Tree Souce for Buffalo KuroboxHD
+ *
+ * Choose CONFIG_LINKSTATION to build a kernel for KuroboxHD, or use
+ * the default configuration linkstation_defconfig.
+ *
+ * Based on sandpoint.dts
+ *
+ * 2006 (c) G. Liakhovetski <g.liakhovetski@gmx.de>
+ * Copyright 2008 Freescale Semiconductor, Inc.
+ *
+ * This file is licensed under
+ * the terms of the GNU General Public License version 2.  This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+
+XXXX add flash parts, rtc, ??
+
+ */
+
+/dts-v1/;
+
+/ {
+	model = "KuroboxHD";
+	compatible = "linkstation";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,603e { /* Really 8241 */
+			device_type = "cpu";
+			reg = <0x0>;
+			clock-frequency = <200000000>;	/* Fixed by bootloader */
+			timebase-frequency = <24391680>; /* Fixed by bootloader */
+			bus-frequency = <0>;		/* Fixed by bootloader */
+			/* Following required by dtc but not used */
+			i-cache-size = <0x4000>;
+			d-cache-size = <0x4000>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x4000000>;
+	};
+
+	soc10x { /* AFAICT need to make soc for 8245's uarts to be defined */
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "mpc10x";
+		store-gathering = <0>; /* 0 == off, !0 == on */
+		reg = <0x80000000 0x100000>;
+		ranges = <0x80000000 0x80000000 0x70000000	/* pci mem space */
+			  0xfc000000 0xfc000000 0x100000	/* EUMB */
+			  0xfe000000 0xfe000000 0xc00000	/* pci i/o space */
+			  0xfec00000 0xfec00000 0x300000	/* pci cfg regs */
+			  0xfef00000 0xfef00000 0x100000>;	/* pci iack */
+
+		i2c@80003000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x80003000 0x1000>;
+			interrupts = <5 2>;
+			interrupt-parent = <&mpic>;
+
+			rtc@32 {
+				compatible = "ricoh,rs5c372a";
+				reg = <0x32>;
+			};
+		};
+
+		serial0: serial@80004500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x80004500 0x8>;
+			clock-frequency = <97553800>;
+			current-speed = <9600>;
+			interrupts = <9 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@80004600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x80004600 0x8>;
+			clock-frequency = <97553800>;
+			current-speed = <57600>;
+			interrupts = <10 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		mpic: interrupt-controller@80040000 {
+			#interrupt-cells = <2>;
+			#address-cells = <0>;
+			device_type = "open-pic";
+			compatible = "chrp,open-pic";
+			interrupt-controller;
+			reg = <0x80040000 0x40000>;
+		};
+
+		pci0: pci@fec00000 {
+			#address-cells = <3>;
+			#size-cells = <2>;
+			#interrupt-cells = <1>;
+			device_type = "pci";
+			compatible = "mpc10x-pci";
+			reg = <0xfec00000 0x400000>;
+			ranges = <0x1000000 0x0        0x0 0xfe000000 0x0 0xc00000
+				  0x2000000 0x0 0x80000000 0x80000000 0x0 0x70000000>;
+			bus-range = <0 255>;
+			clock-frequency = <133333333>;
+			interrupt-parent = <&mpic>;
+			interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+			interrupt-map = <
+				/* IDSEL 11 - IRQ0 ETH */
+				0x5800 0x0 0x0 0x1 &mpic 0x0 0x1
+				0x5800 0x0 0x0 0x2 &mpic 0x1 0x1
+				0x5800 0x0 0x0 0x3 &mpic 0x2 0x1
+				0x5800 0x0 0x0 0x4 &mpic 0x3 0x1
+				/* IDSEL 12 - IRQ1 IDE0 */
+				0x6000 0x0 0x0 0x1 &mpic 0x1 0x1
+				0x6000 0x0 0x0 0x2 &mpic 0x2 0x1
+				0x6000 0x0 0x0 0x3 &mpic 0x3 0x1
+				0x6000 0x0 0x0 0x4 &mpic 0x0 0x1
+				/* IDSEL 14 - IRQ3 USB2.0 */
+				0x7000 0x0 0x0 0x1 &mpic 0x3 0x1
+				0x7000 0x0 0x0 0x2 &mpic 0x3 0x1
+				0x7000 0x0 0x0 0x3 &mpic 0x3 0x1
+				0x7000 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/kuroboxHG.dts b/arch/powerpc/boot/dts/kuroboxHG.dts
new file mode 100644
index 0000000000..0e758b347c
--- /dev/null
+++ b/arch/powerpc/boot/dts/kuroboxHG.dts
@@ -0,0 +1,147 @@
+/*
+ * Device Tree Souce for Buffalo KuroboxHG
+ *
+ * Choose CONFIG_LINKSTATION to build a kernel for KuroboxHG, or use
+ * the default configuration linkstation_defconfig.
+ *
+ * Based on sandpoint.dts
+ *
+ * 2006 (c) G. Liakhovetski <g.liakhovetski@gmx.de>
+ * Copyright 2008 Freescale Semiconductor, Inc.
+ *
+ * This file is licensed under
+ * the terms of the GNU General Public License version 2.  This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+
+XXXX add flash parts, rtc, ??
+
+ */
+
+/dts-v1/;
+
+/ {
+	model = "KuroboxHG";
+	compatible = "linkstation";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,603e { /* Really 8241 */
+			device_type = "cpu";
+			reg = <0x0>;
+			clock-frequency = <266000000>;	/* Fixed by bootloader */
+			timebase-frequency = <32522240>; /* Fixed by bootloader */
+			bus-frequency = <0>;		/* Fixed by bootloader */
+			/* Following required by dtc but not used */
+			i-cache-size = <0x4000>;
+			d-cache-size = <0x4000>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x8000000>;
+	};
+
+	soc10x { /* AFAICT need to make soc for 8245's uarts to be defined */
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "mpc10x";
+		store-gathering = <0>; /* 0 == off, !0 == on */
+		reg = <0x80000000 0x100000>;
+		ranges = <0x80000000 0x80000000 0x70000000	/* pci mem space */
+			  0xfc000000 0xfc000000 0x100000	/* EUMB */
+			  0xfe000000 0xfe000000 0xc00000	/* pci i/o space */
+			  0xfec00000 0xfec00000 0x300000	/* pci cfg regs */
+			  0xfef00000 0xfef00000 0x100000>;	/* pci iack */
+
+		i2c@80003000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x80003000 0x1000>;
+			interrupts = <5 2>;
+			interrupt-parent = <&mpic>;
+
+			rtc@32 {
+				compatible = "ricoh,rs5c372a";
+				reg = <0x32>;
+			};
+		};
+
+		serial0: serial@80004500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x80004500 0x8>;
+			clock-frequency = <130041000>;
+			current-speed = <9600>;
+			interrupts = <9 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@80004600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x80004600 0x8>;
+			clock-frequency = <130041000>;
+			current-speed = <57600>;
+			interrupts = <10 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		mpic: interrupt-controller@80040000 {
+			#interrupt-cells = <2>;
+			#address-cells = <0>;
+			device_type = "open-pic";
+			compatible = "chrp,open-pic";
+			interrupt-controller;
+			reg = <0x80040000 0x40000>;
+		};
+
+		pci0: pci@fec00000 {
+			#address-cells = <3>;
+			#size-cells = <2>;
+			#interrupt-cells = <1>;
+			device_type = "pci";
+			compatible = "mpc10x-pci";
+			reg = <0xfec00000 0x400000>;
+			ranges = <0x1000000 0x0        0x0 0xfe000000 0x0 0xc00000
+				  0x2000000 0x0 0x80000000 0x80000000 0x0 0x70000000>;
+			bus-range = <0 255>;
+			clock-frequency = <133333333>;
+			interrupt-parent = <&mpic>;
+			interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+			interrupt-map = <
+				/* IDSEL 11 - IRQ0 ETH */
+				0x5800 0x0 0x0 0x1 &mpic 0x0 0x1
+				0x5800 0x0 0x0 0x2 &mpic 0x1 0x1
+				0x5800 0x0 0x0 0x3 &mpic 0x2 0x1
+				0x5800 0x0 0x0 0x4 &mpic 0x3 0x1
+				/* IDSEL 12 - IRQ1 IDE0 */
+				0x6000 0x0 0x0 0x1 &mpic 0x1 0x1
+				0x6000 0x0 0x0 0x2 &mpic 0x2 0x1
+				0x6000 0x0 0x0 0x3 &mpic 0x3 0x1
+				0x6000 0x0 0x0 0x4 &mpic 0x0 0x1
+				/* IDSEL 14 - IRQ3 USB2.0 */
+				0x7000 0x0 0x0 0x1 &mpic 0x3 0x1
+				0x7000 0x0 0x0 0x2 &mpic 0x3 0x1
+				0x7000 0x0 0x0 0x3 &mpic 0x3 0x1
+				0x7000 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/lite5200.dts b/arch/powerpc/boot/dts/lite5200.dts
new file mode 100644
index 0000000000..b9d8487813
--- /dev/null
+++ b/arch/powerpc/boot/dts/lite5200.dts
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Lite5200 board Device Tree Source
+ *
+ * Copyright 2006-2007 Secret Lab Technologies Ltd.
+ * Grant Likely <grant.likely@secretlab.ca>
+ */
+
+/dts-v1/;
+
+/ {
+	model = "fsl,lite5200";
+	compatible = "fsl,lite5200";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&mpc5200_pic>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,5200@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <0x4000>;	// L1, 16K
+			i-cache-size = <0x4000>;	// L1, 16K
+			timebase-frequency = <0>;	// from bootloader
+			bus-frequency = <0>;		// from bootloader
+			clock-frequency = <0>;		// from bootloader
+		};
+	};
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x04000000>;	// 64MB
+	};
+
+	soc5200@f0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc5200-immr";
+		ranges = <0 0xf0000000 0x0000c000>;
+		reg = <0xf0000000 0x00000100>;
+		bus-frequency = <0>;		// from bootloader
+		system-frequency = <0>;		// from bootloader
+
+		cdm@200 {
+			compatible = "fsl,mpc5200-cdm";
+			reg = <0x200 0x38>;
+		};
+
+		mpc5200_pic: interrupt-controller@500 {
+			// 5200 interrupts are encoded into two levels;
+			interrupt-controller;
+			#interrupt-cells = <3>;
+			compatible = "fsl,mpc5200-pic";
+			reg = <0x500 0x80>;
+		};
+
+		timer@600 {	// General Purpose Timer
+			compatible = "fsl,mpc5200-gpt";
+			reg = <0x600 0x10>;
+			interrupts = <1 9 0>;
+			fsl,has-wdt;
+		};
+
+		timer@610 {	// General Purpose Timer
+			compatible = "fsl,mpc5200-gpt";
+			reg = <0x610 0x10>;
+			interrupts = <1 10 0>;
+		};
+
+		timer@620 {	// General Purpose Timer
+			compatible = "fsl,mpc5200-gpt";
+			reg = <0x620 0x10>;
+			interrupts = <1 11 0>;
+		};
+
+		timer@630 {	// General Purpose Timer
+			compatible = "fsl,mpc5200-gpt";
+			reg = <0x630 0x10>;
+			interrupts = <1 12 0>;
+		};
+
+		timer@640 {	// General Purpose Timer
+			compatible = "fsl,mpc5200-gpt";
+			reg = <0x640 0x10>;
+			interrupts = <1 13 0>;
+		};
+
+		timer@650 {	// General Purpose Timer
+			compatible = "fsl,mpc5200-gpt";
+			reg = <0x650 0x10>;
+			interrupts = <1 14 0>;
+		};
+
+		timer@660 {	// General Purpose Timer
+			compatible = "fsl,mpc5200-gpt";
+			reg = <0x660 0x10>;
+			interrupts = <1 15 0>;
+		};
+
+		timer@670 {	// General Purpose Timer
+			compatible = "fsl,mpc5200-gpt";
+			reg = <0x670 0x10>;
+			interrupts = <1 16 0>;
+		};
+
+		rtc@800 {	// Real time clock
+			compatible = "fsl,mpc5200-rtc";
+			reg = <0x800 0x100>;
+			interrupts = <1 5 0 1 6 0>;
+		};
+
+		can@900 {
+			compatible = "fsl,mpc5200-mscan";
+			interrupts = <2 17 0>;
+			reg = <0x900 0x80>;
+		};
+
+		can@980 {
+			compatible = "fsl,mpc5200-mscan";
+			interrupts = <2 18 0>;
+			reg = <0x980 0x80>;
+		};
+
+		gpio@b00 {
+			compatible = "fsl,mpc5200-gpio";
+			reg = <0xb00 0x40>;
+			interrupts = <1 7 0>;
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
+
+		gpio@c00 {
+			compatible = "fsl,mpc5200-gpio-wkup";
+			reg = <0xc00 0x40>;
+			interrupts = <1 8 0 0 3 0>;
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
+
+		spi@f00 {
+			compatible = "fsl,mpc5200-spi";
+			reg = <0xf00 0x20>;
+			interrupts = <2 13 0 2 14 0>;
+		};
+
+		usb@1000 {
+			compatible = "fsl,mpc5200-ohci","ohci-be";
+			reg = <0x1000 0xff>;
+			interrupts = <2 6 0>;
+		};
+
+		dma-controller@1200 {
+			compatible = "fsl,mpc5200-bestcomm";
+			reg = <0x1200 0x80>;
+			interrupts = <3 0 0  3 1 0  3 2 0  3 3 0
+			              3 4 0  3 5 0  3 6 0  3 7 0
+			              3 8 0  3 9 0  3 10 0  3 11 0
+			              3 12 0  3 13 0  3 14 0  3 15 0>;
+		};
+
+		xlb@1f00 {
+			compatible = "fsl,mpc5200-xlb";
+			reg = <0x1f00 0x100>;
+		};
+
+		serial@2000 {		// PSC1
+			compatible = "fsl,mpc5200-psc-uart";
+			cell-index = <0>;
+			reg = <0x2000 0x100>;
+			interrupts = <2 1 0>;
+		};
+
+		// PSC2 in ac97 mode example
+		//ac97@2200 {		// PSC2
+		//	compatible = "fsl,mpc5200-psc-ac97";
+		//	cell-index = <1>;
+		//	reg = <0x2200 0x100>;
+		//	interrupts = <2 2 0>;
+		//};
+
+		// PSC3 in CODEC mode example
+		//i2s@2400 {		// PSC3
+		//	compatible = "fsl,mpc5200-psc-i2s";
+		//	cell-index = <2>;
+		//	reg = <0x2400 0x100>;
+		//	interrupts = <2 3 0>;
+		//};
+
+		// PSC4 in uart mode example
+		//serial@2600 {		// PSC4
+		//	compatible = "fsl,mpc5200-psc-uart";
+		//	cell-index = <3>;
+		//	reg = <0x2600 0x100>;
+		//	interrupts = <2 11 0>;
+		//};
+
+		// PSC5 in uart mode example
+		//serial@2800 {		// PSC5
+		//	compatible = "fsl,mpc5200-psc-uart";
+		//	cell-index = <4>;
+		//	reg = <0x2800 0x100>;
+		//	interrupts = <2 12 0>;
+		//};
+
+		// PSC6 in spi mode example
+		//spi@2c00 {		// PSC6
+		//	compatible = "fsl,mpc5200-psc-spi";
+		//	cell-index = <5>;
+		//	reg = <0x2c00 0x100>;
+		//	interrupts = <2 4 0>;
+		//};
+
+		ethernet@3000 {
+			compatible = "fsl,mpc5200-fec";
+			reg = <0x3000 0x400>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <2 5 0>;
+			phy-handle = <&phy0>;
+		};
+
+		mdio@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5200-mdio";
+			reg = <0x3000 0x400>;	// fec range, since we need to setup fec interrupts
+			interrupts = <2 5 0>;	// these are for "mii command finished", not link changes & co.
+
+			phy0: ethernet-phy@0 {
+				reg = <0>;
+			};
+		};
+
+		ata@3a00 {
+			compatible = "fsl,mpc5200-ata";
+			reg = <0x3a00 0x100>;
+			interrupts = <2 7 0>;
+		};
+
+		i2c@3d00 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5200-i2c","fsl-i2c";
+			reg = <0x3d00 0x40>;
+			interrupts = <2 15 0>;
+		};
+
+		i2c@3d40 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5200-i2c","fsl-i2c";
+			reg = <0x3d40 0x40>;
+			interrupts = <2 16 0>;
+
+			eeprom@50 {
+				compatible = "atmel,24c02";
+				reg = <0x50>;
+			};
+		};
+
+		sram@8000 {
+			compatible = "fsl,mpc5200-sram";
+			reg = <0x8000 0x4000>;
+		};
+	};
+
+	pci@f0000d00 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		compatible = "fsl,mpc5200-pci";
+		reg = <0xf0000d00 0x100>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0xc000 0 0 1 &mpc5200_pic 0 0 3
+				 0xc000 0 0 2 &mpc5200_pic 0 0 3
+				 0xc000 0 0 3 &mpc5200_pic 0 0 3
+				 0xc000 0 0 4 &mpc5200_pic 0 0 3>;
+		clock-frequency = <0>; // From boot loader
+		interrupts = <2 8 0 2 9 0 2 10 0>;
+		bus-range = <0 0>;
+		ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000>,
+			 <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000>,
+			 <0x01000000 0 0x00000000 0xb0000000 0 0x01000000>;
+	};
+
+	localbus {
+		compatible = "fsl,mpc5200-lpb","simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+
+		ranges = <0 0 0xff000000 0x01000000>;
+
+		flash@0,0 {
+			compatible = "amd,am29lv652d", "cfi-flash";
+			reg = <0 0 0x01000000>;
+			bank-width = <1>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/lite5200b.dts b/arch/powerpc/boot/dts/lite5200b.dts
new file mode 100644
index 0000000000..7e2d91c7cb
--- /dev/null
+++ b/arch/powerpc/boot/dts/lite5200b.dts
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Lite5200B board Device Tree Source
+ *
+ * Copyright 2006-2007 Secret Lab Technologies Ltd.
+ * Grant Likely <grant.likely@secretlab.ca>
+ */
+
+/include/ "mpc5200b.dtsi"
+
+&gpt0 { fsl,has-wdt; };
+&gpt2 { gpio-controller; };
+&gpt3 { gpio-controller; };
+
+/ {
+	model = "fsl,lite5200b";
+	compatible = "fsl,lite5200b";
+
+	leds {
+		compatible = "gpio-leds";
+		tmr2 {
+			gpios = <&gpt2 0 1>;
+		};
+		tmr3 {
+			gpios = <&gpt3 0 1>;
+			linux,default-trigger = "heartbeat";
+		};
+		led1 { gpios = <&gpio_wkup 2 1>; };
+		led2 { gpios = <&gpio_simple 3 1>; };
+		led3 { gpios = <&gpio_wkup 3 1>; };
+		led4 { gpios = <&gpio_simple 2 1>; };
+	};
+
+	memory@0 {
+		reg = <0x00000000 0x10000000>;	// 256MB
+	};
+
+	soc5200@f0000000 {
+		psc@2000 {		// PSC1
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+			cell-index = <0>;
+		};
+
+		psc@2200 {		// PSC2
+			status = "disabled";
+		};
+
+		psc@2400 {		// PSC3
+			status = "disabled";
+		};
+
+		psc@2600 {		// PSC4
+			status = "disabled";
+		};
+
+		psc@2800 {		// PSC5
+			status = "disabled";
+		};
+
+		psc@2c00 {		// PSC6
+			status = "disabled";
+		};
+
+		// PSC2 in ac97 mode example
+		//ac97@2200 {		// PSC2
+		//	compatible = "fsl,mpc5200b-psc-ac97","fsl,mpc5200-psc-ac97";
+		//	cell-index = <1>;
+		//};
+
+		// PSC3 in CODEC mode example
+		//i2s@2400 {		// PSC3
+		//	compatible = "fsl,mpc5200b-psc-i2s"; //not 5200 compatible
+		//	cell-index = <2>;
+		//};
+
+		// PSC6 in spi mode example
+		//spi@2c00 {		// PSC6
+		//	compatible = "fsl,mpc5200b-psc-spi","fsl,mpc5200-psc-spi";
+		//	cell-index = <5>;
+		//};
+
+		ethernet@3000 {
+			phy-handle = <&phy0>;
+		};
+
+		mdio@3000 {
+			phy0: ethernet-phy@0 {
+				reg = <0>;
+			};
+		};
+
+		i2c@3d40 {
+			eeprom@50 {
+				compatible = "atmel,24c02";
+				reg = <0x50>;
+			};
+		};
+
+		sram@8000 {
+			compatible = "fsl,mpc5200b-sram","fsl,mpc5200-sram";
+			reg = <0x8000 0x4000>;
+		};
+	};
+
+	pci@f0000d00 {
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0xc000 0 0 1 &mpc5200_pic 0 0 3 // 1st slot
+				 0xc000 0 0 2 &mpc5200_pic 1 1 3
+				 0xc000 0 0 3 &mpc5200_pic 1 2 3
+				 0xc000 0 0 4 &mpc5200_pic 1 3 3
+
+				 0xc800 0 0 1 &mpc5200_pic 1 1 3 // 2nd slot
+				 0xc800 0 0 2 &mpc5200_pic 1 2 3
+				 0xc800 0 0 3 &mpc5200_pic 1 3 3
+				 0xc800 0 0 4 &mpc5200_pic 0 0 3>;
+		clock-frequency = <0>; // From boot loader
+		interrupts = <2 8 0 2 9 0 2 10 0>;
+		bus-range = <0 0>;
+		ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000>,
+			 <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000>,
+			 <0x01000000 0 0x00000000 0xb0000000 0 0x01000000>;
+	};
+
+	localbus {
+		ranges = <0 0 0xfe000000 0x02000000>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x02000000>;
+			bank-width = <1>;
+			#size-cells = <1>;
+			#address-cells = <1>;
+
+			partition@0 {
+				label = "kernel";
+				reg = <0x00000000 0x00200000>;
+			};
+			partition@200000 {
+				label = "rootfs";
+				reg = <0x00200000 0x01d00000>;
+			};
+			partition@1f00000 {
+				label = "u-boot";
+				reg = <0x01f00000 0x00060000>;
+			};
+			partition@1f60000 {
+				label = "u-boot-env";
+				reg = <0x01f60000 0x00020000>;
+			};
+			partition@1f80000 {
+				label = "dtb";
+				reg = <0x01f80000 0x00080000>;
+			};
+		};
+	};
+
+};
diff --git a/arch/powerpc/boot/dts/makalu.dts b/arch/powerpc/boot/dts/makalu.dts
new file mode 100644
index 0000000000..c473cd911b
--- /dev/null
+++ b/arch/powerpc/boot/dts/makalu.dts
@@ -0,0 +1,353 @@
+/*
+ * Device Tree Source for AMCC Makalu (405EX)
+ *
+ * Copyright 2007 DENX Software Engineering, Stefan Roese <sr@denx.de>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	model = "amcc,makalu";
+	compatible = "amcc,makalu";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		ethernet1 = &EMAC1;
+		serial0 = &UART0;
+		serial1 = &UART1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,405EX";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+			timebase-frequency = <0>; /* Filled in by U-Boot */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <16384>; /* 16 kB */
+			d-cache-size = <16384>; /* 16 kB */
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000>; /* Filled in by U-Boot */
+	};
+
+	UIC0: interrupt-controller {
+		compatible = "ibm,uic-405ex", "ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-405ex","ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC2: interrupt-controller2 {
+		compatible = "ibm,uic-405ex","ibm,uic";
+		interrupt-controller;
+		cell-index = <2>;
+		dcr-reg = <0x0e0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1c 0x4 0x1d 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	plb {
+		compatible = "ibm,plb-405ex", "ibm,plb4";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by U-Boot */
+
+		SDRAM0: memory-controller {
+			compatible = "ibm,sdram-405ex", "ibm,sdram-4xx-ddr2";
+			dcr-reg = <0x010 0x002>;
+			interrupt-parent = <&UIC2>;
+			interrupts = <0x5 0x4 /* ECC DED Error */
+			              0x6 0x4 /* ECC SEC Error */ >;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-405ex", "ibm,mcmal2";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <2>;
+			num-rx-chans = <2>;
+			interrupt-parent = <&MAL0>;
+			interrupts = <0x0 0x1 0x2 0x3 0x4>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = </*TXEOB*/ 0x0 &UIC0 0xa 0x4
+					/*RXEOB*/ 0x1 &UIC0 0xb 0x4
+					/*SERR*/  0x2 &UIC1 0x0 0x4
+					/*TXDE*/  0x3 &UIC1 0x1 0x4
+					/*RXDE*/  0x4 &UIC1 0x2 0x4>;
+			interrupt-map-mask = <0xffffffff>;
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb-405ex", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0x80000000 0x80000000 0x10000000
+				  0xef600000 0xef600000 0x00a00000
+				  0xf0000000 0xf0000000 0x10000000>;
+			dcr-reg = <0x0a0 0x005>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-405ex", "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				/* ranges property is supplied by U-Boot */
+				interrupts = <0x5 0x1>;
+				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "amd,s29gl512n", "cfi-flash";
+					bank-width = <2>;
+					reg = <0x00000000 0x00000000 0x04000000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "kernel";
+						reg = <0x00000000 0x00200000>;
+					};
+					partition@200000 {
+						label = "root";
+						reg = <0x00200000 0x00200000>;
+					};
+					partition@400000 {
+						label = "user";
+						reg = <0x00400000 0x03b60000>;
+					};
+					partition@3f60000 {
+						label = "env";
+						reg = <0x03f60000 0x00040000>;
+					};
+					partition@3fa0000 {
+						label = "u-boot";
+						reg = <0x03fa0000 0x00060000>;
+					};
+				};
+			};
+
+			UART0: serial@ef600200 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600200 0x00000008>;
+				virtual-reg = <0xef600200>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1a 0x4>;
+			};
+
+			UART1: serial@ef600300 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600300 0x00000008>;
+				virtual-reg = <0xef600300>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1 0x4>;
+			};
+
+			IIC0: i2c@ef600400 {
+				compatible = "ibm,iic-405ex", "ibm,iic";
+				reg = <0xef600400 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+			};
+
+			IIC1: i2c@ef600500 {
+				compatible = "ibm,iic-405ex", "ibm,iic";
+				reg = <0xef600500 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x7 0x4>;
+			};
+
+
+			RGMII0: emac-rgmii@ef600b00 {
+				compatible = "ibm,rgmii-405ex", "ibm,rgmii";
+				reg = <0xef600b00 0x00000104>;
+				has-mdio;
+			};
+
+			EMAC0: ethernet@ef600900 {
+				linux,network-index = <0x0>;
+				device_type = "network";
+				compatible = "ibm,emac-405ex", "ibm,emac4sync";
+				interrupt-parent = <&EMAC0>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC0 0x18 0x4
+						/*Wake*/  0x1 &UIC1 0x1d 0x4>;
+				reg = <0xef600900 0x000000c4>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>;
+				phy-mode = "rgmii";
+				phy-map = <0x0000003f>;	/* Start at 6 */
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+
+			EMAC1: ethernet@ef600a00 {
+				linux,network-index = <0x1>;
+				device_type = "network";
+				compatible = "ibm,emac-405ex", "ibm,emac4sync";
+				interrupt-parent = <&EMAC1>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC0 0x19 0x4
+						/*Wake*/  0x1 &UIC1 0x1f 0x4>;
+				reg = <0xef600a00 0x000000c4>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <1>;
+				mal-rx-channel = <1>;
+				cell-index = <1>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+                                rx-fifo-size-gige = <16384>;
+                                tx-fifo-size-gige = <16384>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <1>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+		};
+
+		PCIE0: pcie@a0000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-405ex", "ibm,plb-pciex";
+			primary;
+			port = <0x0>; /* port number */
+			reg = <0xa0000000 0x20000000	/* Config space access */
+			       0xef000000 0x00001000>;	/* Registers */
+			dcr-reg = <0x040 0x020>;
+			sdr-base = <0x400>;
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x90000000 0x00000000 0x08000000
+				  0x01000000 0x00000000 0x00000000 0xe0000000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* This drives busses 0x00 to 0x3f */
+			bus-range = <0x0 0x3f>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &UIC2 0x0 0x4 /* swizzled int A */
+				0x0 0x0 0x0 0x2 &UIC2 0x1 0x4 /* swizzled int B */
+				0x0 0x0 0x0 0x3 &UIC2 0x2 0x4 /* swizzled int C */
+				0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>;
+		};
+
+		PCIE1: pcie@c0000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-405ex", "ibm,plb-pciex";
+			primary;
+			port = <0x1>; /* port number */
+			reg = <0xc0000000 0x20000000	/* Config space access */
+			       0xef001000 0x00001000>;	/* Registers */
+			dcr-reg = <0x060 0x020>;
+			sdr-base = <0x440>;
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x98000000 0x00000000 0x08000000
+				  0x01000000 0x00000000 0x00000000 0xe0010000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* This drives busses 0x40 to 0x7f */
+			bus-range = <0x40 0x7f>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &UIC2 0xb 0x4 /* swizzled int A */
+				0x0 0x0 0x0 0x2 &UIC2 0xc 0x4 /* swizzled int B */
+				0x0 0x0 0x0 0x3 &UIC2 0xd 0x4 /* swizzled int C */
+				0x0 0x0 0x0 0x4 &UIC2 0xe 0x4 /* swizzled int D */>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/media5200.dts b/arch/powerpc/boot/dts/media5200.dts
new file mode 100644
index 0000000000..96524ede16
--- /dev/null
+++ b/arch/powerpc/boot/dts/media5200.dts
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale Media5200 board Device Tree Source
+ *
+ * Copyright 2009 Secret Lab Technologies Ltd.
+ * Grant Likely <grant.likely@secretlab.ca>
+ * Steven Cavanagh <scavanagh@secretlab.ca>
+ */
+
+/include/ "mpc5200b.dtsi"
+
+&gpt0 { fsl,has-wdt; };
+
+/ {
+	model = "fsl,media5200";
+	compatible = "fsl,media5200";
+
+	aliases {
+		console = &console;
+		ethernet0 = &eth0;
+	};
+
+	chosen {
+		stdout-path = &console;
+	};
+
+	cpus {
+		PowerPC,5200@0 {
+			timebase-frequency = <33000000>;	// 33 MHz, these were configured by U-Boot
+			bus-frequency = <132000000>;		// 132 MHz
+			clock-frequency = <396000000>;		// 396 MHz
+		};
+	};
+
+	memory@0 {
+		reg = <0x00000000 0x08000000>;	// 128MB RAM
+	};
+
+	soc5200@f0000000 {
+		bus-frequency = <132000000>;// 132 MHz
+
+		psc@2000 {	// PSC1
+			status = "disabled";
+		};
+
+		psc@2200 {	// PSC2
+			status = "disabled";
+		};
+
+		psc@2400 {	// PSC3
+			status = "disabled";
+		};
+
+		psc@2600 {	// PSC4
+			status = "disabled";
+		};
+
+		psc@2800 {	// PSC5
+			status = "disabled";
+		};
+
+		// PSC6 in uart mode
+		console: psc@2c00 {		// PSC6
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		ethernet@3000 {
+			phy-handle = <&phy0>;
+		};
+
+		mdio@3000 {
+			phy0: ethernet-phy@0 {
+				reg = <0>;
+			};
+		};
+
+		usb@1000 {
+			reg = <0x1000 0x100>;
+		};
+	};
+
+	pci@f0000d00 {
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0xc000 0 0 1 &media5200_fpga 0 2 // 1st slot
+				 0xc000 0 0 2 &media5200_fpga 0 3
+				 0xc000 0 0 3 &media5200_fpga 0 4
+				 0xc000 0 0 4 &media5200_fpga 0 5
+
+				 0xc800 0 0 1 &media5200_fpga 0 3 // 2nd slot
+				 0xc800 0 0 2 &media5200_fpga 0 4
+				 0xc800 0 0 3 &media5200_fpga 0 5
+				 0xc800 0 0 4 &media5200_fpga 0 2
+
+				 0xd000 0 0 1 &media5200_fpga 0 4 // miniPCI
+				 0xd000 0 0 2 &media5200_fpga 0 5
+
+				 0xe000 0 0 1 &media5200_fpga 0 5 // CoralIP
+				>;
+		ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000>,
+			 <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000>,
+			 <0x01000000 0 0x00000000 0xb0000000 0 0x01000000>;
+		interrupt-parent = <&mpc5200_pic>;
+	};
+
+	localbus {
+		ranges = < 0 0 0xfc000000 0x02000000
+			   1 0 0xfe000000 0x02000000
+			   2 0 0xf0010000 0x00010000
+			   3 0 0xf0020000 0x00010000 >;
+		flash@0,0 {
+			compatible = "amd,am29lv28ml", "cfi-flash";
+			reg = <0 0x0 0x2000000>;                // 32 MB
+			bank-width = <4>;                       // Width in bytes of the flash bank
+			device-width = <2>;                     // Two devices on each bank
+		};
+
+		flash@1,0 {
+			compatible = "amd,am29lv28ml", "cfi-flash";
+			reg = <1 0 0x2000000>;                  // 32 MB
+			bank-width = <4>;                       // Width in bytes of the flash bank
+			device-width = <2>;                     // Two devices on each bank
+		};
+
+		media5200_fpga: fpga@2,0 {
+			compatible = "fsl,media5200-fpga";
+			interrupt-controller;
+			#interrupt-cells = <2>;	// 0:bank 1:id; no type field
+			reg = <2 0 0x10000>;
+
+			interrupt-parent = <&mpc5200_pic>;
+			interrupts = <0 0 3	// IRQ bank 0
+			              1 1 3>;	// IRQ bank 1
+		};
+
+		uart@3,0 {
+			compatible = "ti,tl16c752bpt";
+			reg = <3 0 0x10000>;
+			interrupt-parent = <&media5200_fpga>;
+			interrupts = <0 0  0 1>; // 2 irqs
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/mgcoge.dts b/arch/powerpc/boot/dts/mgcoge.dts
new file mode 100644
index 0000000000..9cefed2072
--- /dev/null
+++ b/arch/powerpc/boot/dts/mgcoge.dts
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Device Tree for the MGCOGE plattform from keymile
+ *
+ * Copyright 2008 DENX Software Engineering GmbH
+ * Heiko Schocher <hs@denx.de>
+ */
+
+/dts-v1/;
+/ {
+	model = "MGCOGE";
+	compatible = "keymile,km82xx";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &eth0;
+		serial0 = &smc2;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8247@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <16384>;
+			i-cache-size = <16384>;
+			timebase-frequency = <0>; /* Filled in by U-Boot */
+			clock-frequency = <0>; /* Filled in by U-Boot */
+			bus-frequency = <0>; /* Filled in by U-Boot */
+		};
+	};
+
+	localbus@f0010100 {
+		compatible = "fsl,mpc8247-localbus",
+		             "fsl,pq2-localbus",
+		             "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		reg = <0xf0010100 0x40>;
+
+		ranges = <0 0 0xfe000000 0x00400000
+			  1 0 0x30000000 0x00010000
+			  2 0 0x40000000 0x00010000
+			  5 0 0x50000000 0x04000000
+			>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0x0 0x400000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			bank-width = <1>;
+			device-width = <1>;
+			partition@0 {
+				label = "u-boot";
+				reg = <0x00000 0xC0000>;
+			};
+			partition@1 {
+				label = "env";
+				reg = <0xC0000 0x20000>;
+			};
+			partition@2 {
+				label = "envred";
+				reg = <0xE0000 0x20000>;
+			};
+			partition@3 {
+				label = "free";
+				reg = <0x100000 0x300000>;
+			};
+		};
+
+		flash@5,0 {
+			compatible = "cfi-flash";
+			reg = <5 0x00000000 0x02000000
+			       5 0x02000000 0x02000000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			bank-width = <2>;
+			partition@app { /* 64 MBytes */
+				label = "ubi0";
+				reg = <0x00000000 0x04000000>;
+			};
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0 0>; /* Filled in by U-Boot */
+	};
+
+	soc@f0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8247-immr", "fsl,pq2-soc", "simple-bus";
+		ranges = <0x00000000 0xf0000000 0x00053000>;
+
+		// Temporary until code stops depending on it.
+		device_type = "soc";
+
+		cpm@119c0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			#interrupt-cells = <2>;
+			compatible = "fsl,mpc8247-cpm", "fsl,cpm2",
+					"simple-bus";
+			reg = <0x119c0 0x30>;
+			ranges;
+
+			muram {
+				compatible = "fsl,cpm-muram";
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges = <0 0 0x10000>;
+
+				data@0 {
+					compatible = "fsl,cpm-muram-data";
+					reg = <0x80 0x1f80 0x9800 0x800>;
+				};
+			};
+
+			brg@119f0 {
+				compatible = "fsl,mpc8247-brg",
+				             "fsl,cpm2-brg",
+				             "fsl,cpm-brg";
+				reg = <0x119f0 0x10 0x115f0 0x10>;
+			};
+
+			/* Monitor port/SMC2 */
+			smc2: serial@11a90 {
+				device_type = "serial";
+				compatible = "fsl,mpc8247-smc-uart",
+				             "fsl,cpm2-smc-uart";
+				reg = <0x11a90 0x20 0x88fc 0x02>;
+				interrupts = <5 8>;
+				interrupt-parent = <&PIC>;
+				fsl,cpm-brg = <2>;
+				fsl,cpm-command = <0x21200000>;
+				current-speed = <0>; /* Filled in by U-Boot */
+			};
+
+			eth0: ethernet@11a60 {
+				device_type = "network";
+				compatible = "fsl,mpc8247-scc-enet",
+				             "fsl,cpm2-scc-enet";
+				reg = <0x11a60 0x20 0x8300 0x100 0x11390 1>;
+				local-mac-address = [ 00 00 00 00 00 00 ]; /* Filled in by U-Boot */
+				interrupts = <43 8>;
+				interrupt-parent = <&PIC>;
+				linux,network-index = <0>;
+				fsl,cpm-command = <0xce00000>;
+				fixed-link = <0 0 10 0 0>;
+			};
+
+			i2c@11860 {
+				compatible = "fsl,mpc8272-i2c",
+					     "fsl,cpm2-i2c";
+				reg = <0x11860 0x20 0x8afc 0x2>;
+				interrupts = <1 8>;
+				interrupt-parent = <&PIC>;
+				fsl,cpm-command = <0x29600000>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+			};
+
+			mdio@10d40 {
+				compatible = "fsl,cpm2-mdio-bitbang";
+				reg = <0x10d00 0x14>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				fsl,mdio-pin = <12>;
+				fsl,mdc-pin = <13>;
+
+				phy0: ethernet-phy@0 {
+					reg = <0x0>;
+				};
+
+				phy1: ethernet-phy@1 {
+					reg = <0x1>;
+				};
+			};
+
+			/* FCC1 management to switch */
+			ethernet@11300 {
+				device_type = "network";
+				compatible = "fsl,cpm2-fcc-enet";
+				reg = <0x11300 0x20 0x8400 0x100 0x11390 0x1>;
+				local-mac-address = [ 00 01 02 03 04 07 ];
+				interrupts = <32 8>;
+				interrupt-parent = <&PIC>;
+				phy-handle = <&phy0>;
+				linux,network-index = <1>;
+				fsl,cpm-command = <0x12000300>;
+			};
+
+			/* FCC2 to redundant core unit over backplane */
+			ethernet@11320 {
+				device_type = "network";
+				compatible = "fsl,cpm2-fcc-enet";
+				reg = <0x11320 0x20 0x8500 0x100 0x113b0 0x1>;
+				local-mac-address = [ 00 01 02 03 04 08 ];
+				interrupts = <33 8>;
+				interrupt-parent = <&PIC>;
+				phy-handle = <&phy1>;
+				linux,network-index = <2>;
+				fsl,cpm-command = <0x16200300>;
+			};
+
+			usb@11b60 {
+				compatible = "fsl,mpc8272-cpm-usb";
+				mode = "peripheral";
+				reg = <0x11b60 0x40 0x8b00 0x100>;
+				interrupts = <11 8>;
+				interrupt-parent = <&PIC>;
+				usb-clock = <5>;
+			};
+			spi@11aa0 {
+				cell-index = <0>;
+				compatible = "fsl,spi", "fsl,cpm2-spi";
+				reg = <0x11a80 0x40 0x89fc 0x2>;
+				interrupts = <2 8>;
+				interrupt-parent = <&PIC>;
+				cs-gpios = < &cpm2_pio_d 19 0>;
+			};
+
+		};
+
+		cpm2_pio_d: gpio-controller@10d60 {
+			#gpio-cells = <2>;
+			compatible = "fsl,cpm2-pario-bank";
+			reg = <0x10d60 0x14>;
+			gpio-controller;
+		};
+
+		cpm2_pio_c: gpio-controller@10d40 {
+			#gpio-cells = <2>;
+			compatible = "fsl,cpm2-pario-bank";
+			reg = <0x10d40 0x14>;
+			gpio-controller;
+		};
+
+		PIC: interrupt-controller@10c00 {
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			reg = <0x10c00 0x80>;
+			compatible = "fsl,mpc8247-pic", "fsl,pq2-pic";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts
new file mode 100644
index 0000000000..269e930b3b
--- /dev/null
+++ b/arch/powerpc/boot/dts/microwatt.dts
@@ -0,0 +1,174 @@
+/dts-v1/;
+
+/ {
+	#size-cells = <0x02>;
+	#address-cells = <0x02>;
+	model-name = "microwatt";
+	compatible = "microwatt-soc";
+
+	aliases {
+		serial0 = &UART0;
+	};
+
+	reserved-memory {
+		#size-cells = <0x02>;
+		#address-cells = <0x02>;
+		ranges;
+	};
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000 0x10000000>;
+	};
+
+	clocks {
+		sys_clk: litex_sys_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <100000000>;
+		};
+	};
+
+	cpus {
+		#size-cells = <0x00>;
+		#address-cells = <0x01>;
+
+		ibm,powerpc-cpu-features {
+			display-name = "Microwatt";
+			isa = <3000>;
+			device_type = "cpu-features";
+			compatible = "ibm,powerpc-cpu-features";
+
+			mmu-radix {
+				isa = <3000>;
+				usable-privilege = <2>;
+			};
+
+			little-endian {
+				isa = <2050>;
+				usable-privilege = <3>;
+				hwcap-bit-nr = <1>;
+			};
+
+			cache-inhibited-large-page {
+				isa = <2040>;
+				usable-privilege = <2>;
+			};
+
+			fixed-point-v3 {
+				isa = <3000>;
+				usable-privilege = <3>;
+			};
+
+			no-execute {
+				isa = <2010>;
+				usable-privilege = <2>;
+			};
+
+			floating-point {
+				hwcap-bit-nr = <27>;
+				isa = <0>;
+				usable-privilege = <3>;
+			};
+		};
+
+		PowerPC,Microwatt@0 {
+			i-cache-sets = <2>;
+			ibm,dec-bits = <64>;
+			reservation-granule-size = <64>;
+			clock-frequency = <100000000>;
+			timebase-frequency = <100000000>;
+			i-tlb-sets = <1>;
+			ibm,ppc-interrupt-server#s = <0>;
+			i-cache-block-size = <64>;
+			d-cache-block-size = <64>;
+			d-cache-sets = <2>;
+			i-tlb-size = <64>;
+			cpu-version = <0x990000>;
+			status = "okay";
+			i-cache-size = <0x1000>;
+			ibm,processor-radix-AP-encodings = <0x0c 0xa0000010 0x20000015 0x4000001e>;
+			tlb-size = <0>;
+			tlb-sets = <0>;
+			device_type = "cpu";
+			d-tlb-size = <128>;
+			d-tlb-sets = <2>;
+			reg = <0>;
+			general-purpose;
+			64-bit;
+			d-cache-size = <0x1000>;
+			ibm,chip-id = <0>;
+			ibm,mmu-lpid-bits = <12>;
+			ibm,mmu-pid-bits = <20>;
+		};
+	};
+
+	soc@c0000000 {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupt-parent = <&ICS>;
+
+		ranges = <0 0 0xc0000000 0x40000000>;
+
+		interrupt-controller@4000 {
+			compatible = "openpower,xics-presentation", "ibm,ppc-xicp";
+			ibm,interrupt-server-ranges = <0x0 0x1>;
+			reg = <0x4000 0x100>;
+		};
+
+		ICS: interrupt-controller@5000 {
+			compatible = "openpower,xics-sources";
+			interrupt-controller;
+			interrupt-ranges = <0x10 0x10>;
+			reg = <0x5000 0x100>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			#interrupt-cells = <2>;
+		};
+
+		UART0: serial@2000 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x2000 0x8>;
+			clock-frequency = <100000000>;
+			current-speed = <115200>;
+			reg-shift = <2>;
+			fifo-size = <16>;
+			interrupts = <0x10 0x1>;
+		};
+
+		ethernet@8020000 {
+			compatible = "litex,liteeth";
+			reg = <0x8021000 0x100
+				0x8020800 0x100
+				0x8030000 0x2000>;
+			reg-names = "mac", "mido", "buffer";
+			litex,rx-slots = <2>;
+			litex,tx-slots = <2>;
+			litex,slot-size = <0x800>;
+			interrupts = <0x11 0x1>;
+		};
+
+		mmc@8040000 {
+			compatible = "litex,mmc";
+			reg = <0x8042800 0x800
+				0x8041000 0x800
+				0x8040800 0x800
+				0x8042000 0x800
+				0x8041800 0x800>;
+			reg-names = "phy", "core", "reader", "writer", "irq";
+			bus-width = <4>;
+			interrupts = <0x13 1>;
+			cap-sd-highspeed;
+			clocks = <&sys_clk>;
+		};
+	};
+
+	chosen {
+		bootargs = "";
+		ibm,architecture-vec-5 = [19 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00
+					  00 00 00 00 00 00 00 00 40 00 40];
+		stdout-path = &UART0;
+	};
+};
diff --git a/arch/powerpc/boot/dts/motionpro.dts b/arch/powerpc/boot/dts/motionpro.dts
new file mode 100644
index 0000000000..c23676093d
--- /dev/null
+++ b/arch/powerpc/boot/dts/motionpro.dts
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Motion-PRO board Device Tree Source
+ *
+ * Copyright (C) 2007 Semihalf
+ * Marian Balakowicz <m8@semihalf.com>
+ */
+
+/include/ "mpc5200b.dtsi"
+
+&gpt0 { fsl,has-wdt; };
+&gpt6 { // Motion-PRO status LED
+	compatible = "promess,motionpro-led";
+	label = "motionpro-statusled";
+	blink-delay = <100>; // 100 msec
+};
+&gpt7 { // Motion-PRO ready LED
+	compatible = "promess,motionpro-led";
+	label = "motionpro-readyled";
+};
+
+/ {
+	model = "promess,motionpro";
+	compatible = "promess,motionpro";
+
+	soc5200@f0000000 {
+		can@900 {
+			status = "disabled";
+		};
+
+		psc@2000 {		// PSC1
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		// PSC2 in spi master mode 
+		psc@2200 {		// PSC2
+			compatible = "fsl,mpc5200b-psc-spi","fsl,mpc5200-psc-spi";
+			cell-index = <1>;
+		};
+
+		psc@2400 {		// PSC3
+			status = "disabled";
+		};
+
+		psc@2600 {		// PSC4
+			status = "disabled";
+		};
+
+		psc@2800 {		// PSC5
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		psc@2c00 {		// PSC6
+			status = "disabled";
+		};
+
+		ethernet@3000 {
+			phy-handle = <&phy0>;
+		};
+
+		mdio@3000 {
+			phy0: ethernet-phy@2 {
+				reg = <2>;
+			};
+		};
+
+		i2c@3d00 {
+			status = "disabled";
+		};
+
+		i2c@3d40 {
+			rtc@68 {
+				compatible = "dallas,ds1339";
+				reg = <0x68>;
+			};
+		};
+
+		sram@8000 {
+			compatible = "fsl,mpc5200b-sram","fsl,mpc5200-sram";
+			reg = <0x8000 0x4000>;
+		};
+	};
+
+	pci@f0000d00 {
+		status = "disabled";
+	};
+
+	localbus {
+		ranges = <0 0 0xff000000 0x01000000
+			  1 0 0x50000000 0x00010000
+			  2 0 0x50010000 0x00010000
+			  3 0 0x50020000 0x00010000>;
+
+		// 8-bit DualPort SRAM on LocalPlus Bus CS1
+		kollmorgen@1,0 {
+			compatible = "promess,motionpro-kollmorgen";
+			reg = <1 0 0x10000>;
+			interrupts = <1 1 0>;
+		};
+
+		// 8-bit board CPLD on LocalPlus Bus CS2
+		cpld@2,0 {
+			compatible = "promess,motionpro-cpld";
+			reg = <2 0 0x10000>;
+		};
+
+		// 8-bit custom Anybus Module on LocalPlus Bus CS3
+		anybus@3,0 {
+			compatible = "promess,motionpro-anybus";
+			reg = <3 0 0x10000>;
+		};
+		pro_module_general@3,0 {
+			compatible = "promess,pro_module_general";
+			reg = <3 0 3>;
+		};
+		pro_module_dio@3,800 {
+			compatible = "promess,pro_module_dio";
+			reg = <3 0x800 2>;
+		};
+
+		// 16-bit flash device at LocalPlus Bus CS0
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x01000000>;
+			bank-width = <2>;
+			device-width = <2>;
+			#size-cells = <1>;
+			#address-cells = <1>;
+		};
+
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc5121.dtsi b/arch/powerpc/boot/dts/mpc5121.dtsi
new file mode 100644
index 0000000000..d3fc8062fb
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc5121.dtsi
@@ -0,0 +1,526 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * base MPC5121 Device Tree Source
+ *
+ * Copyright 2007-2008 Freescale Semiconductor Inc.
+ */
+
+#include <dt-bindings/clock/mpc512x-clock.h>
+
+/dts-v1/;
+
+/ {
+	model = "mpc5121";
+	compatible = "fsl,mpc5121";
+	#address-cells = <1>;
+	#size-cells = <1>;
+        interrupt-parent = <&ipic>;
+
+	aliases {
+		ethernet0 = &eth0;
+		pci = &pci;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,5121@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <0x20>;	/* 32 bytes */
+			i-cache-line-size = <0x20>;	/* 32 bytes */
+			d-cache-size = <0x8000>;	/* L1, 32K */
+			i-cache-size = <0x8000>;	/* L1, 32K */
+			timebase-frequency = <49500000>;/* 49.5 MHz (csb/4) */
+			bus-frequency = <198000000>;	/* 198 MHz csb bus */
+			clock-frequency = <396000000>;	/* 396 MHz ppc core */
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>;	/* 256MB at 0 */
+	};
+
+	mbx@20000000 {
+		compatible = "fsl,mpc5121-mbx";
+		reg = <0x20000000 0x4000>;
+		interrupts = <66 0x8>;
+		clocks = <&clks MPC512x_CLK_MBX_BUS>,
+			 <&clks MPC512x_CLK_MBX_3D>,
+			 <&clks MPC512x_CLK_MBX>;
+		clock-names = "mbx-bus", "mbx-3d", "mbx";
+	};
+
+	sram@30000000 {
+		compatible = "fsl,mpc5121-sram";
+		reg = <0x30000000 0x20000>;	/* 128K at 0x30000000 */
+	};
+
+	nfc@40000000 {
+		compatible = "fsl,mpc5121-nfc";
+		reg = <0x40000000 0x100000>;	/* 1M at 0x40000000 */
+		interrupts = <6 8>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		clocks = <&clks MPC512x_CLK_NFC>;
+		clock-names = "ipg";
+	};
+
+	localbus@80000020 {
+		compatible = "fsl,mpc5121-localbus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		reg = <0x80000020 0x40>;
+		ranges = <0x0 0x0 0xfc000000 0x04000000>;
+	};
+
+	clocks {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		osc: osc {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <33000000>;
+		};
+	};
+
+	soc@80000000 {
+		compatible = "fsl,mpc5121-immr";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0x0 0x80000000 0x400000>;
+		reg = <0x80000000 0x400000>;
+		bus-frequency = <66000000>;	/* 66 MHz ips bus */
+
+
+		/*
+		 * IPIC
+		 * interrupts cell = <intr #, sense>
+		 * sense values match linux IORESOURCE_IRQ_* defines:
+		 * sense == 8: Level, low assertion
+		 * sense == 2: Edge, high-to-low change
+		 */
+		ipic: interrupt-controller@c00 {
+			compatible = "fsl,mpc5121-ipic", "fsl,ipic";
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0xc00 0x100>;
+		};
+
+		/* Watchdog timer */
+		wdt@900 {
+			compatible = "fsl,mpc5121-wdt";
+			reg = <0x900 0x100>;
+		};
+
+		/* Real time clock */
+		rtc@a00 {
+			compatible = "fsl,mpc5121-rtc";
+			reg = <0xa00 0x100>;
+			interrupts = <79 0x8 80 0x8>;
+		};
+
+		/* Reset module */
+		reset@e00 {
+			compatible = "fsl,mpc5121-reset";
+			reg = <0xe00 0x100>;
+		};
+
+		/* Clock control */
+		clks: clock@f00 {
+			compatible = "fsl,mpc5121-clock";
+			reg = <0xf00 0x100>;
+			#clock-cells = <1>;
+			clocks = <&osc>;
+			clock-names = "osc";
+		};
+
+		/* Power Management Controller */
+		pmc@1000 {
+			compatible = "fsl,mpc5121-pmc";
+			reg = <0x1000 0x100>;
+			interrupts = <83 0x8>;
+		};
+
+		gpio@1100 {
+			compatible = "fsl,mpc5121-gpio";
+			reg = <0x1100 0x100>;
+			interrupts = <78 0x8>;
+		};
+
+		can@1300 {
+			compatible = "fsl,mpc5121-mscan";
+			reg = <0x1300 0x80>;
+			interrupts = <12 0x8>;
+			clocks = <&clks MPC512x_CLK_BDLC>,
+				 <&clks MPC512x_CLK_IPS>,
+				 <&clks MPC512x_CLK_SYS>,
+				 <&clks MPC512x_CLK_REF>,
+				 <&clks MPC512x_CLK_MSCAN0_MCLK>;
+			clock-names = "ipg", "ips", "sys", "ref", "mclk";
+		};
+
+		can@1380 {
+			compatible = "fsl,mpc5121-mscan";
+			reg = <0x1380 0x80>;
+			interrupts = <13 0x8>;
+			clocks = <&clks MPC512x_CLK_BDLC>,
+				 <&clks MPC512x_CLK_IPS>,
+				 <&clks MPC512x_CLK_SYS>,
+				 <&clks MPC512x_CLK_REF>,
+				 <&clks MPC512x_CLK_MSCAN1_MCLK>;
+			clock-names = "ipg", "ips", "sys", "ref", "mclk";
+		};
+
+		sdhc@1500 {
+			compatible = "fsl,mpc5121-sdhc";
+			reg = <0x1500 0x100>;
+			interrupts = <8 0x8>;
+			dmas = <&dma0 30>;
+			dma-names = "rx-tx";
+			clocks = <&clks MPC512x_CLK_IPS>,
+				 <&clks MPC512x_CLK_SDHC>;
+			clock-names = "ipg", "per";
+		};
+
+		i2c@1700 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5121-i2c", "fsl-i2c";
+			reg = <0x1700 0x20>;
+			interrupts = <9 0x8>;
+			clocks = <&clks MPC512x_CLK_I2C>;
+			clock-names = "ipg";
+		};
+
+		i2c@1720 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5121-i2c", "fsl-i2c";
+			reg = <0x1720 0x20>;
+			interrupts = <10 0x8>;
+			clocks = <&clks MPC512x_CLK_I2C>;
+			clock-names = "ipg";
+		};
+
+		i2c@1740 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5121-i2c", "fsl-i2c";
+			reg = <0x1740 0x20>;
+			interrupts = <11 0x8>;
+			clocks = <&clks MPC512x_CLK_I2C>;
+			clock-names = "ipg";
+		};
+
+		i2ccontrol@1760 {
+			compatible = "fsl,mpc5121-i2c-ctrl";
+			reg = <0x1760 0x8>;
+		};
+
+		axe@2000 {
+			compatible = "fsl,mpc5121-axe";
+			reg = <0x2000 0x100>;
+			interrupts = <42 0x8>;
+			clocks = <&clks MPC512x_CLK_AXE>;
+			clock-names = "ipg";
+		};
+
+		display@2100 {
+			compatible = "fsl,mpc5121-diu";
+			reg = <0x2100 0x100>;
+			interrupts = <64 0x8>;
+			clocks = <&clks MPC512x_CLK_DIU>;
+			clock-names = "ipg";
+		};
+
+		can@2300 {
+			compatible = "fsl,mpc5121-mscan";
+			reg = <0x2300 0x80>;
+			interrupts = <90 0x8>;
+			clocks = <&clks MPC512x_CLK_BDLC>,
+				 <&clks MPC512x_CLK_IPS>,
+				 <&clks MPC512x_CLK_SYS>,
+				 <&clks MPC512x_CLK_REF>,
+				 <&clks MPC512x_CLK_MSCAN2_MCLK>;
+			clock-names = "ipg", "ips", "sys", "ref", "mclk";
+		};
+
+		can@2380 {
+			compatible = "fsl,mpc5121-mscan";
+			reg = <0x2380 0x80>;
+			interrupts = <91 0x8>;
+			clocks = <&clks MPC512x_CLK_BDLC>,
+				 <&clks MPC512x_CLK_IPS>,
+				 <&clks MPC512x_CLK_SYS>,
+				 <&clks MPC512x_CLK_REF>,
+				 <&clks MPC512x_CLK_MSCAN3_MCLK>;
+			clock-names = "ipg", "ips", "sys", "ref", "mclk";
+		};
+
+		viu@2400 {
+			compatible = "fsl,mpc5121-viu";
+			reg = <0x2400 0x400>;
+			interrupts = <67 0x8>;
+			clocks = <&clks MPC512x_CLK_VIU>;
+			clock-names = "ipg";
+		};
+
+		mdio@2800 {
+			compatible = "fsl,mpc5121-fec-mdio";
+			reg = <0x2800 0x800>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			clocks = <&clks MPC512x_CLK_FEC>;
+			clock-names = "per";
+		};
+
+		eth0: ethernet@2800 {
+			device_type = "network";
+			compatible = "fsl,mpc5121-fec";
+			reg = <0x2800 0x800>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <4 0x8>;
+			clocks = <&clks MPC512x_CLK_FEC>;
+			clock-names = "per";
+		};
+
+		/* USB1 using external ULPI PHY */
+		usb@3000 {
+			compatible = "fsl,mpc5121-usb2-dr";
+			reg = <0x3000 0x600>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupts = <43 0x8>;
+			dr_mode = "otg";
+			phy_type = "ulpi";
+			clocks = <&clks MPC512x_CLK_USB1>;
+			clock-names = "ipg";
+		};
+
+		/* USB0 using internal UTMI PHY */
+		usb@4000 {
+			compatible = "fsl,mpc5121-usb2-dr";
+			reg = <0x4000 0x600>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupts = <44 0x8>;
+			dr_mode = "otg";
+			phy_type = "utmi_wide";
+			clocks = <&clks MPC512x_CLK_USB2>;
+			clock-names = "ipg";
+		};
+
+		/* IO control */
+		ioctl@a000 {
+			compatible = "fsl,mpc5121-ioctl";
+			reg = <0xA000 0x1000>;
+		};
+
+		/* LocalPlus controller */
+		lpc@10000 {
+			compatible = "fsl,mpc5121-lpc";
+			reg = <0x10000 0x100>;
+		};
+
+		sclpc@10100 {
+			compatible = "fsl,mpc512x-lpbfifo";
+			reg = <0x10100 0x50>;
+			interrupts = <7 0x8>;
+			dmas = <&dma0 26>;
+			dma-names = "rx-tx";
+		};
+
+		pata@10200 {
+			compatible = "fsl,mpc5121-pata";
+			reg = <0x10200 0x100>;
+			interrupts = <5 0x8>;
+			clocks = <&clks MPC512x_CLK_PATA>;
+			clock-names = "ipg";
+		};
+
+		/* 512x PSCs are not 52xx PSC compatible */
+
+		/* PSC0 */
+		psc@11000 {
+			compatible = "fsl,mpc5121-psc";
+			reg = <0x11000 0x100>;
+			interrupts = <40 0x8>;
+			fsl,rx-fifo-size = <16>;
+			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC0>,
+				 <&clks MPC512x_CLK_PSC0_MCLK>;
+			clock-names = "ipg", "mclk";
+		};
+
+		/* PSC1 */
+		psc@11100 {
+			compatible = "fsl,mpc5121-psc";
+			reg = <0x11100 0x100>;
+			interrupts = <40 0x8>;
+			fsl,rx-fifo-size = <16>;
+			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC1>,
+				 <&clks MPC512x_CLK_PSC1_MCLK>;
+			clock-names = "ipg", "mclk";
+		};
+
+		/* PSC2 */
+		psc@11200 {
+			compatible = "fsl,mpc5121-psc";
+			reg = <0x11200 0x100>;
+			interrupts = <40 0x8>;
+			fsl,rx-fifo-size = <16>;
+			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC2>,
+				 <&clks MPC512x_CLK_PSC2_MCLK>;
+			clock-names = "ipg", "mclk";
+		};
+
+		/* PSC3 */
+		psc@11300 {
+			compatible = "fsl,mpc5121-psc-uart", "fsl,mpc5121-psc";
+			reg = <0x11300 0x100>;
+			interrupts = <40 0x8>;
+			fsl,rx-fifo-size = <16>;
+			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC3>,
+				 <&clks MPC512x_CLK_PSC3_MCLK>;
+			clock-names = "ipg", "mclk";
+		};
+
+		/* PSC4 */
+		psc@11400 {
+			compatible = "fsl,mpc5121-psc-uart", "fsl,mpc5121-psc";
+			reg = <0x11400 0x100>;
+			interrupts = <40 0x8>;
+			fsl,rx-fifo-size = <16>;
+			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC4>,
+				 <&clks MPC512x_CLK_PSC4_MCLK>;
+			clock-names = "ipg", "mclk";
+		};
+
+		/* PSC5 */
+		psc@11500 {
+			compatible = "fsl,mpc5121-psc";
+			reg = <0x11500 0x100>;
+			interrupts = <40 0x8>;
+			fsl,rx-fifo-size = <16>;
+			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC5>,
+				 <&clks MPC512x_CLK_PSC5_MCLK>;
+			clock-names = "ipg", "mclk";
+		};
+
+		/* PSC6 */
+		psc@11600 {
+			compatible = "fsl,mpc5121-psc";
+			reg = <0x11600 0x100>;
+			interrupts = <40 0x8>;
+			fsl,rx-fifo-size = <16>;
+			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC6>,
+				 <&clks MPC512x_CLK_PSC6_MCLK>;
+			clock-names = "ipg", "mclk";
+		};
+
+		/* PSC7 */
+		psc@11700 {
+			compatible = "fsl,mpc5121-psc";
+			reg = <0x11700 0x100>;
+			interrupts = <40 0x8>;
+			fsl,rx-fifo-size = <16>;
+			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC7>,
+				 <&clks MPC512x_CLK_PSC7_MCLK>;
+			clock-names = "ipg", "mclk";
+		};
+
+		/* PSC8 */
+		psc@11800 {
+			compatible = "fsl,mpc5121-psc";
+			reg = <0x11800 0x100>;
+			interrupts = <40 0x8>;
+			fsl,rx-fifo-size = <16>;
+			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC8>,
+				 <&clks MPC512x_CLK_PSC8_MCLK>;
+			clock-names = "ipg", "mclk";
+		};
+
+		/* PSC9 */
+		psc@11900 {
+			compatible = "fsl,mpc5121-psc";
+			reg = <0x11900 0x100>;
+			interrupts = <40 0x8>;
+			fsl,rx-fifo-size = <16>;
+			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC9>,
+				 <&clks MPC512x_CLK_PSC9_MCLK>;
+			clock-names = "ipg", "mclk";
+		};
+
+		/* PSC10 */
+		psc@11a00 {
+			compatible = "fsl,mpc5121-psc";
+			reg = <0x11a00 0x100>;
+			interrupts = <40 0x8>;
+			fsl,rx-fifo-size = <16>;
+			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC10>,
+				 <&clks MPC512x_CLK_PSC10_MCLK>;
+			clock-names = "ipg", "mclk";
+		};
+
+		/* PSC11 */
+		psc@11b00 {
+			compatible = "fsl,mpc5121-psc";
+			reg = <0x11b00 0x100>;
+			interrupts = <40 0x8>;
+			fsl,rx-fifo-size = <16>;
+			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC11>,
+				 <&clks MPC512x_CLK_PSC11_MCLK>;
+			clock-names = "ipg", "mclk";
+		};
+
+		pscfifo@11f00 {
+			compatible = "fsl,mpc5121-psc-fifo";
+			reg = <0x11f00 0x100>;
+			interrupts = <40 0x8>;
+			clocks = <&clks MPC512x_CLK_PSC_FIFO>;
+			clock-names = "ipg";
+		};
+
+		dma0: dma@14000 {
+			compatible = "fsl,mpc5121-dma";
+			reg = <0x14000 0x1800>;
+			interrupts = <65 0x8>;
+			#dma-cells = <1>;
+		};
+	};
+
+	pci: pci@80008500 {
+		compatible = "fsl,mpc5121-pci";
+		device_type = "pci";
+		interrupts = <1 0x8>;
+		clock-frequency = <0>;
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		clocks = <&clks MPC512x_CLK_PCI>;
+		clock-names = "ipg";
+
+		reg = <0x80008500 0x100	/* internal registers */
+		       0x80008300 0x8>;	/* config space access registers */
+		bus-range = <0x0 0x0>;
+		ranges = <0x42000000 0x0 0xa0000000 0xa0000000 0x0 0x10000000
+			  0x02000000 0x0 0xb0000000 0xb0000000 0x0 0x10000000
+			  0x01000000 0x0 0x00000000 0x84000000 0x0 0x01000000>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc5121ads.dts b/arch/powerpc/boot/dts/mpc5121ads.dts
new file mode 100644
index 0000000000..b407a50ee6
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc5121ads.dts
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC5121E ADS Device Tree Source
+ *
+ * Copyright 2007-2008 Freescale Semiconductor Inc.
+ */
+
+#include "mpc5121.dtsi"
+
+/ {
+	model = "mpc5121ads";
+	compatible = "fsl,mpc5121ads", "fsl,mpc5121";
+
+	nfc@40000000 {
+		/*
+		 * ADS has two Hynix 512MB Nand flash chips in a single
+		 * stacked package.
+		 */
+		chips = <2>;
+
+		nand@0 {
+			label = "nand";
+			reg = <0x00000000 0x40000000>;	/* 512MB + 512MB */
+		};
+	};
+
+	localbus@80000020 {
+		ranges = <0x0 0x0 0xfc000000 0x04000000
+			  0x2 0x0 0x82000000 0x00008000>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0x0 0x4000000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			bank-width = <4>;
+			device-width = <2>;
+
+			protected@0 {
+				label = "protected";
+				reg = <0x00000000 0x00040000>;  // first sector is protected
+				read-only;
+			};
+			filesystem@40000 {
+				label = "filesystem";
+				reg = <0x00040000 0x03c00000>;  // 60M for filesystem
+			};
+			kernel@3c40000 {
+				label = "kernel";
+				reg = <0x03c40000 0x00280000>;  // 2.5M for kernel
+			};
+			device-tree@3ec0000 {
+				label = "device-tree";
+				reg = <0x03ec0000 0x00040000>;  // one sector for device tree
+			};
+			u-boot@3f00000 {
+				label = "u-boot";
+				reg = <0x03f00000 0x00100000>;  // 1M for u-boot
+				read-only;
+			};
+		};
+
+		board-control@2,0 {
+			compatible = "fsl,mpc5121ads-cpld";
+			reg = <0x2 0x0 0x8000>;
+		};
+
+		cpld_pic: pic@2,a {
+			compatible = "fsl,mpc5121ads-cpld-pic";
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			reg = <0x2 0xa 0x5>;
+			/* irq routing:
+			 * all irqs but touch screen are routed to irq0 (ipic 48)
+			 * touch screen is statically routed to irq1 (ipic 17)
+			 * so don't use it here
+			 */
+			interrupts = <48 0x8>;
+		};
+	};
+
+	soc@80000000 {
+
+		i2c@1700 {
+			fsl,preserve-clocking;
+
+			hwmon@4a {
+				compatible = "adi,ad7414";
+				reg = <0x4a>;
+			};
+
+			eeprom@50 {
+				compatible = "atmel,24c32";
+				reg = <0x50>;
+			};
+
+			rtc@68 {
+				compatible = "st,m41t62";
+				reg = <0x68>;
+			};
+		};
+
+		eth0: ethernet@2800 {
+			phy-handle = <&phy0>;
+		};
+
+		can@2300 {
+			status = "disabled";
+		};
+
+		can@2380 {
+			status = "disabled";
+		};
+
+		viu@2400 {
+			status = "disabled";
+		};
+
+		mdio@2800 {
+			phy0: ethernet-phy@0 {
+				reg = <1>;
+			};
+		};
+
+		/* mpc5121ads only uses USB0 */
+		usb@3000 {
+			status = "disabled";
+		};
+
+		/* USB0 using internal UTMI PHY */
+		usb@4000 {
+			dr_mode = "host";
+			fsl,invert-drvvbus;
+			fsl,invert-pwr-fault;
+		};
+
+		/* PSC3 serial port A aka ttyPSC0 */
+		psc@11300 {
+			compatible = "fsl,mpc5121-psc-uart", "fsl,mpc5121-psc";
+		};
+
+		/* PSC4 serial port B aka ttyPSC1 */
+		psc@11400 {
+			compatible = "fsl,mpc5121-psc-uart", "fsl,mpc5121-psc";
+		};
+
+		/* PSC5 in ac97 mode */
+		ac97: psc@11500 {
+			compatible = "fsl,mpc5121-psc-ac97", "fsl,mpc5121-psc";
+			fsl,mode = "ac97-slave";
+			fsl,rx-fifo-size = <384>;
+			fsl,tx-fifo-size = <384>;
+		};
+	};
+
+	pci: pci@80008500 {
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+				/* IDSEL 0x15 - Slot 1 PCI */
+				 0xa800 0x0 0x0 0x1 &cpld_pic 0x0 0x8
+				 0xa800 0x0 0x0 0x2 &cpld_pic 0x1 0x8
+				 0xa800 0x0 0x0 0x3 &cpld_pic 0x2 0x8
+				 0xa800 0x0 0x0 0x4 &cpld_pic 0x3 0x8
+
+				/* IDSEL 0x16 - Slot 2 MiniPCI */
+				 0xb000 0x0 0x0 0x1 &cpld_pic 0x4 0x8
+				 0xb000 0x0 0x0 0x2 &cpld_pic 0x5 0x8
+
+				/* IDSEL 0x17 - Slot 3 MiniPCI */
+				 0xb800 0x0 0x0 0x1 &cpld_pic 0x6 0x8
+				 0xb800 0x0 0x0 0x2 &cpld_pic 0x7 0x8
+				>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc5125twr.dts b/arch/powerpc/boot/dts/mpc5125twr.dts
new file mode 100644
index 0000000000..ee090709aa
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc5125twr.dts
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * STx/Freescale ADS5125 MPC5125 silicon
+ *
+ * Copyright (C) 2009 Freescale Semiconductor Inc. All rights reserved.
+ *
+ * Reworked by Matteo Facchinetti (engineering@sirius-es.it)
+ * Copyright (C) 2013 Sirius Electronic Systems
+ */
+
+#include <dt-bindings/clock/mpc512x-clock.h>
+
+/dts-v1/;
+
+/ {
+	model = "mpc5125twr"; // In BSP "mpc5125ads"
+	compatible = "fsl,mpc5125ads", "fsl,mpc5125";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&ipic>;
+
+	aliases {
+		gpio0 = &gpio0;
+		gpio1 = &gpio1;
+		ethernet0 = &eth0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,5125@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <0x20>;	// 32 bytes
+			i-cache-line-size = <0x20>;	// 32 bytes
+			d-cache-size = <0x8000>;	// L1, 32K
+			i-cache-size = <0x8000>;	// L1, 32K
+			timebase-frequency = <49500000>;// 49.5 MHz (csb/4)
+			bus-frequency = <198000000>;	// 198 MHz csb bus
+			clock-frequency = <396000000>;	// 396 MHz ppc core
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>;	// 256MB at 0
+	};
+
+	sram@30000000 {
+		compatible = "fsl,mpc5121-sram";
+		reg = <0x30000000 0x08000>;		// 32K at 0x30000000
+	};
+
+	clocks {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		osc: osc {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <33000000>;
+		};
+	};
+
+	soc@80000000 {
+		compatible = "fsl,mpc5121-immr";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0x0 0x80000000 0x400000>;
+		reg = <0x80000000 0x400000>;
+		bus-frequency = <66000000>;	// 66 MHz ips bus
+
+		// IPIC
+		// interrupts cell = <intr #, sense>
+		// sense values match linux IORESOURCE_IRQ_* defines:
+		// sense == 8: Level, low assertion
+		// sense == 2: Edge, high-to-low change
+		//
+		ipic: interrupt-controller@c00 {
+			compatible = "fsl,mpc5121-ipic", "fsl,ipic";
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0xc00 0x100>;
+		};
+
+		rtc@a00 {	// Real time clock
+			compatible = "fsl,mpc5121-rtc";
+			reg = <0xa00 0x100>;
+			interrupts = <79 0x8 80 0x8>;
+		};
+
+		reset@e00 {	// Reset module
+			compatible = "fsl,mpc5125-reset";
+			reg = <0xe00 0x100>;
+		};
+
+		clks: clock@f00 {	// Clock control
+			compatible = "fsl,mpc5121-clock";
+			reg = <0xf00 0x100>;
+			#clock-cells = <1>;
+			clocks = <&osc>;
+			clock-names = "osc";
+		};
+
+		pmc@1000 {  // Power Management Controller
+			compatible = "fsl,mpc5121-pmc";
+			reg = <0x1000 0x100>;
+			interrupts = <83 0x2>;
+		};
+
+		gpio0: gpio@1100 {
+			compatible = "fsl,mpc5125-gpio";
+			reg = <0x1100 0x080>;
+			interrupts = <78 0x8>;
+		};
+
+		gpio1: gpio@1180 {
+			compatible = "fsl,mpc5125-gpio";
+			reg = <0x1180 0x080>;
+			interrupts = <86 0x8>;
+		};
+
+		can@1300 { // CAN rev.2
+			compatible = "fsl,mpc5121-mscan";
+			interrupts = <12 0x8>;
+			reg = <0x1300 0x80>;
+			clocks = <&clks MPC512x_CLK_BDLC>,
+				 <&clks MPC512x_CLK_IPS>,
+				 <&clks MPC512x_CLK_SYS>,
+				 <&clks MPC512x_CLK_REF>,
+				 <&clks MPC512x_CLK_MSCAN0_MCLK>;
+			clock-names = "ipg", "ips", "sys", "ref", "mclk";
+		};
+
+		can@1380 {
+			compatible = "fsl,mpc5121-mscan";
+			interrupts = <13 0x8>;
+			reg = <0x1380 0x80>;
+			clocks = <&clks MPC512x_CLK_BDLC>,
+				 <&clks MPC512x_CLK_IPS>,
+				 <&clks MPC512x_CLK_SYS>,
+				 <&clks MPC512x_CLK_REF>,
+				 <&clks MPC512x_CLK_MSCAN1_MCLK>;
+			clock-names = "ipg", "ips", "sys", "ref", "mclk";
+		};
+
+		sdhc@1500 {
+			compatible = "fsl,mpc5121-sdhc";
+			interrupts = <8 0x8>;
+			reg = <0x1500 0x100>;
+			clocks = <&clks MPC512x_CLK_IPS>,
+				 <&clks MPC512x_CLK_SDHC>;
+			clock-names = "ipg", "per";
+		};
+
+		i2c@1700 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5121-i2c", "fsl-i2c";
+			reg = <0x1700 0x20>;
+			interrupts = <0x9 0x8>;
+			clocks = <&clks MPC512x_CLK_I2C>;
+			clock-names = "ipg";
+		};
+
+		i2c@1720 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5121-i2c", "fsl-i2c";
+			reg = <0x1720 0x20>;
+			interrupts = <0xa 0x8>;
+			clocks = <&clks MPC512x_CLK_I2C>;
+			clock-names = "ipg";
+		};
+
+		i2c@1740 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5121-i2c", "fsl-i2c";
+			reg = <0x1740 0x20>;
+			interrupts = <0xb 0x8>;
+			clocks = <&clks MPC512x_CLK_I2C>;
+			clock-names = "ipg";
+		};
+
+		i2ccontrol@1760 {
+			compatible = "fsl,mpc5121-i2c-ctrl";
+			reg = <0x1760 0x8>;
+		};
+
+		diu@2100 {
+			compatible = "fsl,mpc5121-diu";
+			reg = <0x2100 0x100>;
+			interrupts = <64 0x8>;
+			clocks = <&clks MPC512x_CLK_DIU>;
+			clock-names = "ipg";
+		};
+
+		mdio@2800 {
+			compatible = "fsl,mpc5121-fec-mdio";
+			reg = <0x2800 0x800>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			phy0: ethernet-phy@0 {
+				reg = <1>;
+			};
+		};
+
+		eth0: ethernet@2800 {
+			compatible = "fsl,mpc5125-fec";
+			reg = <0x2800 0x800>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <4 0x8>;
+			phy-handle = < &phy0 >;
+			phy-connection-type = "rmii";
+			clocks = <&clks MPC512x_CLK_FEC>;
+			clock-names = "per";
+		};
+
+		// IO control
+		ioctl@a000 {
+			compatible = "fsl,mpc5125-ioctl";
+			reg = <0xA000 0x1000>;
+		};
+
+		// disable USB1 port
+		// TODO:
+		// correct pinmux config and fix USB3320 ulpi dependency
+		// before re-enabling it
+		usb@3000 {
+			compatible = "fsl,mpc5121-usb2-dr";
+			reg = <0x3000 0x400>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupts = <43 0x8>;
+			dr_mode = "host";
+			phy_type = "ulpi";
+			clocks = <&clks MPC512x_CLK_USB1>;
+			clock-names = "ipg";
+			status = "disabled";
+		};
+
+		sclpc@10100 {
+			compatible = "fsl,mpc512x-lpbfifo";
+			reg = <0x10100 0x50>;
+			interrupts = <7 0x8>;
+			dmas = <&dma0 26>;
+			dma-names = "rx-tx";
+		};
+
+		// 5125 PSCs are not 52xx or 5121 PSC compatible
+		// PSC1 uart0 aka ttyPSC0
+		serial@11100 {
+			compatible = "fsl,mpc5125-psc-uart", "fsl,mpc5125-psc";
+			reg = <0x11100 0x100>;
+			interrupts = <40 0x8>;
+			fsl,rx-fifo-size = <16>;
+			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC1>,
+				 <&clks MPC512x_CLK_PSC1_MCLK>;
+			clock-names = "ipg", "mclk";
+		};
+
+		// PSC9 uart1 aka ttyPSC1
+		serial@11900 {
+			compatible = "fsl,mpc5125-psc-uart", "fsl,mpc5125-psc";
+			reg = <0x11900 0x100>;
+			interrupts = <40 0x8>;
+			fsl,rx-fifo-size = <16>;
+			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC9>,
+				 <&clks MPC512x_CLK_PSC9_MCLK>;
+			clock-names = "ipg", "mclk";
+		};
+
+		pscfifo@11f00 {
+			compatible = "fsl,mpc5121-psc-fifo";
+			reg = <0x11f00 0x100>;
+			interrupts = <40 0x8>;
+			clocks = <&clks MPC512x_CLK_PSC_FIFO>;
+			clock-names = "ipg";
+		};
+
+		dma0: dma@14000 {
+			compatible = "fsl,mpc5121-dma"; // BSP name: "mpc512x-dma2"
+			reg = <0x14000 0x1800>;
+			interrupts = <65 0x8>;
+			#dma-cells = <1>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc5200b.dtsi b/arch/powerpc/boot/dts/mpc5200b.dtsi
new file mode 100644
index 0000000000..ffa82c7e10
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc5200b.dtsi
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * base MPC5200b Device Tree Source
+ *
+ * Copyright (C) 2010 SecretLab
+ * Grant Likely <grant@secretlab.ca>
+ * John Bonesio <bones@secretlab.ca>
+ */
+
+/dts-v1/;
+
+/ {
+	model = "fsl,mpc5200b";
+	compatible = "fsl,mpc5200b";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&mpc5200_pic>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		powerpc: PowerPC,5200@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <0x4000>;	// L1, 16K
+			i-cache-size = <0x4000>;	// L1, 16K
+			timebase-frequency = <0>;	// from bootloader
+			bus-frequency = <0>;		// from bootloader
+			clock-frequency = <0>;		// from bootloader
+		};
+	};
+
+	memory: memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x04000000>;	// 64MB
+	};
+
+	soc: soc5200@f0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc5200b-immr";
+		ranges = <0 0xf0000000 0x0000c000>;
+		reg = <0xf0000000 0x00000100>;
+		bus-frequency = <0>;		// from bootloader
+		system-frequency = <0>;		// from bootloader
+
+		cdm@200 {
+			compatible = "fsl,mpc5200b-cdm","fsl,mpc5200-cdm";
+			reg = <0x200 0x38>;
+		};
+
+		mpc5200_pic: interrupt-controller@500 {
+			// 5200 interrupts are encoded into two levels;
+			interrupt-controller;
+			#interrupt-cells = <3>;
+			compatible = "fsl,mpc5200b-pic","fsl,mpc5200-pic";
+			reg = <0x500 0x80>;
+		};
+
+		gpt0: timer@600 {	// General Purpose Timer
+			compatible = "fsl,mpc5200b-gpt","fsl,mpc5200-gpt";
+			#gpio-cells = <2>;  // Add 'gpio-controller;' to enable gpio mode
+			reg = <0x600 0x10>;
+			interrupts = <1 9 0>;
+			// add 'fsl,has-wdt' to enable watchdog
+		};
+
+		gpt1: timer@610 {	// General Purpose Timer
+			compatible = "fsl,mpc5200b-gpt","fsl,mpc5200-gpt";
+			#gpio-cells = <2>;  // Add 'gpio-controller;' to enable gpio mode
+			reg = <0x610 0x10>;
+			interrupts = <1 10 0>;
+		};
+
+		gpt2: timer@620 {	// General Purpose Timer
+			compatible = "fsl,mpc5200b-gpt","fsl,mpc5200-gpt";
+			#gpio-cells = <2>;  // Add 'gpio-controller;' to enable gpio mode
+			reg = <0x620 0x10>;
+			interrupts = <1 11 0>;
+		};
+
+		gpt3: timer@630 {	// General Purpose Timer
+			compatible = "fsl,mpc5200b-gpt","fsl,mpc5200-gpt";
+			#gpio-cells = <2>;  // Add 'gpio-controller;' to enable gpio mode
+			reg = <0x630 0x10>;
+			interrupts = <1 12 0>;
+		};
+
+		gpt4: timer@640 {	// General Purpose Timer
+			compatible = "fsl,mpc5200b-gpt","fsl,mpc5200-gpt";
+			#gpio-cells = <2>;  // Add 'gpio-controller;' to enable gpio mode
+			reg = <0x640 0x10>;
+			interrupts = <1 13 0>;
+		};
+
+		gpt5: timer@650 {	// General Purpose Timer
+			compatible = "fsl,mpc5200b-gpt","fsl,mpc5200-gpt";
+			#gpio-cells = <2>;  // Add 'gpio-controller;' to enable gpio mode
+			reg = <0x650 0x10>;
+			interrupts = <1 14 0>;
+		};
+
+		gpt6: timer@660 {	// General Purpose Timer
+			compatible = "fsl,mpc5200b-gpt","fsl,mpc5200-gpt";
+			#gpio-cells = <2>;  // Add 'gpio-controller;' to enable gpio mode
+			reg = <0x660 0x10>;
+			interrupts = <1 15 0>;
+		};
+
+		gpt7: timer@670 {	// General Purpose Timer
+			compatible = "fsl,mpc5200b-gpt","fsl,mpc5200-gpt";
+			#gpio-cells = <2>;  // Add 'gpio-controller;' to enable gpio mode
+			reg = <0x670 0x10>;
+			interrupts = <1 16 0>;
+		};
+
+		rtc@800 {	// Real time clock
+			compatible = "fsl,mpc5200b-rtc","fsl,mpc5200-rtc";
+			reg = <0x800 0x100>;
+			interrupts = <1 5 0 1 6 0>;
+		};
+
+		can@900 {
+			compatible = "fsl,mpc5200b-mscan","fsl,mpc5200-mscan";
+			interrupts = <2 17 0>;
+			reg = <0x900 0x80>;
+		};
+
+		can@980 {
+			compatible = "fsl,mpc5200b-mscan","fsl,mpc5200-mscan";
+			interrupts = <2 18 0>;
+			reg = <0x980 0x80>;
+		};
+
+		gpio_simple: gpio@b00 {
+			compatible = "fsl,mpc5200b-gpio","fsl,mpc5200-gpio";
+			reg = <0xb00 0x40>;
+			interrupts = <1 7 0>;
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
+
+		gpio_wkup: gpio@c00 {
+			compatible = "fsl,mpc5200b-gpio-wkup","fsl,mpc5200-gpio-wkup";
+			reg = <0xc00 0x40>;
+			interrupts = <1 8 0 0 3 0>;
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
+
+		spi@f00 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5200b-spi","fsl,mpc5200-spi";
+			reg = <0xf00 0x20>;
+			interrupts = <2 13 0 2 14 0>;
+		};
+
+		usb: usb@1000 {
+			compatible = "fsl,mpc5200b-ohci","fsl,mpc5200-ohci","ohci-be";
+			reg = <0x1000 0xff>;
+			interrupts = <2 6 0>;
+		};
+
+		dma-controller@1200 {
+			compatible = "fsl,mpc5200b-bestcomm","fsl,mpc5200-bestcomm";
+			reg = <0x1200 0x80>;
+			interrupts = <3 0 0  3 1 0  3 2 0  3 3 0
+			              3 4 0  3 5 0  3 6 0  3 7 0
+			              3 8 0  3 9 0  3 10 0  3 11 0
+			              3 12 0  3 13 0  3 14 0  3 15 0>;
+		};
+
+		xlb@1f00 {
+			compatible = "fsl,mpc5200b-xlb","fsl,mpc5200-xlb";
+			reg = <0x1f00 0x100>;
+		};
+
+		psc1: psc@2000 {		// PSC1
+			compatible = "fsl,mpc5200b-psc","fsl,mpc5200-psc";
+			reg = <0x2000 0x100>;
+			interrupts = <2 1 0>;
+		};
+
+		psc2: psc@2200 {		// PSC2
+			compatible = "fsl,mpc5200b-psc","fsl,mpc5200-psc";
+			reg = <0x2200 0x100>;
+			interrupts = <2 2 0>;
+		};
+
+		psc3: psc@2400 {		// PSC3
+			compatible = "fsl,mpc5200b-psc","fsl,mpc5200-psc";
+			reg = <0x2400 0x100>;
+			interrupts = <2 3 0>;
+		};
+
+		psc4: psc@2600 {		// PSC4
+			compatible = "fsl,mpc5200b-psc","fsl,mpc5200-psc";
+			reg = <0x2600 0x100>;
+			interrupts = <2 11 0>;
+		};
+
+		psc5: psc@2800 {		// PSC5
+			compatible = "fsl,mpc5200b-psc","fsl,mpc5200-psc";
+			reg = <0x2800 0x100>;
+			interrupts = <2 12 0>;
+		};
+
+		psc6: psc@2c00 {		// PSC6
+			compatible = "fsl,mpc5200b-psc","fsl,mpc5200-psc";
+			reg = <0x2c00 0x100>;
+			interrupts = <2 4 0>;
+		};
+
+		eth0: ethernet@3000 {
+			compatible = "fsl,mpc5200b-fec","fsl,mpc5200-fec";
+			reg = <0x3000 0x400>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <2 5 0>;
+		};
+
+		mdio@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5200b-mdio","fsl,mpc5200-mdio";
+			reg = <0x3000 0x400>;	// fec range, since we need to setup fec interrupts
+			interrupts = <2 5 0>;	// these are for "mii command finished", not link changes & co.
+		};
+
+		ata@3a00 {
+			compatible = "fsl,mpc5200b-ata","fsl,mpc5200-ata";
+			reg = <0x3a00 0x100>;
+			interrupts = <2 7 0>;
+		};
+
+		sclpc@3c00 {
+			compatible = "fsl,mpc5200-lpbfifo";
+			reg = <0x3c00 0x60>;
+			interrupts = <2 23 0>;
+		};
+
+		i2c@3d00 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5200b-i2c","fsl,mpc5200-i2c","fsl-i2c";
+			reg = <0x3d00 0x40>;
+			interrupts = <2 15 0>;
+		};
+
+		i2c@3d40 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5200b-i2c","fsl,mpc5200-i2c","fsl-i2c";
+			reg = <0x3d40 0x40>;
+			interrupts = <2 16 0>;
+		};
+
+		sram@8000 {
+			compatible = "fsl,mpc5200b-sram","fsl,mpc5200-sram";
+			reg = <0x8000 0x4000>;
+		};
+	};
+
+	pci: pci@f0000d00 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		compatible = "fsl,mpc5200b-pci","fsl,mpc5200-pci";
+		reg = <0xf0000d00 0x100>;
+		// interrupt-map-mask = need to add
+		// interrupt-map = need to add
+		clock-frequency = <0>; // From boot loader
+		interrupts = <2 8 0 2 9 0 2 10 0>;
+		bus-range = <0 0>;
+		ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000>,
+			 <0x02000000 0 0x90000000 0x90000000 0 0x10000000>,
+			 <0x01000000 0 0x00000000 0xa0000000 0 0x01000000>;
+	};
+
+	localbus: localbus {
+		compatible = "fsl,mpc5200b-lpb","fsl,mpc5200-lpb","simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0xfc000000 0x2000000>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc8308_p1m.dts b/arch/powerpc/boot/dts/mpc8308_p1m.dts
new file mode 100644
index 0000000000..2638555afc
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc8308_p1m.dts
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * mpc8308_p1m Device Tree Source
+ *
+ * Copyright 2010 Ilya Yanok, Emcraft Systems, yanok@emcraft.com
+ */
+
+/dts-v1/;
+
+/ {
+	compatible = "denx,mpc8308_p1m";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8308@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <16384>;
+			i-cache-size = <16384>;
+			timebase-frequency = <0>;	// from bootloader
+			bus-frequency = <0>;		// from bootloader
+			clock-frequency = <0>;		// from bootloader
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x08000000>;	// 128MB at 0
+	};
+
+	localbus@e0005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8315-elbc", "fsl,elbc", "simple-bus";
+		reg = <0xe0005000 0x1000>;
+		interrupts = <77 0x8>;
+		interrupt-parent = <&ipic>;
+
+		ranges = <0x0 0x0 0xfc000000 0x04000000
+		          0x1 0x0 0xfbff0000 0x00008000
+		          0x2 0x0 0xfbff8000 0x00008000>;
+
+		flash@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x4000000>;
+			bank-width = <2>;
+			device-width = <1>;
+
+			u-boot@0 {
+				reg = <0x0 0x60000>;
+				read-only;
+			};
+			env@60000 {
+				reg = <0x60000 0x20000>;
+			};
+			env1@80000 {
+				reg = <0x80000 0x20000>;
+			};
+			kernel@a0000 {
+				reg = <0xa0000 0x200000>;
+			};
+			dtb@2a0000 {
+				reg = <0x2a0000 0x20000>;
+			};
+			ramdisk@2c0000 {
+				reg = <0x2c0000 0x640000>;
+			};
+			user@700000 {
+				reg = <0x700000 0x3900000>;
+			};
+		};
+
+		can@1,0 {
+			compatible = "nxp,sja1000";
+			reg = <0x1 0x0 0x80>;
+			interrupts = <18 0x8>;
+			interrups-parent = <&ipic>;
+		};
+
+		cpld@2,0 {
+			compatible = "denx,mpc8308_p1m-cpld";
+			reg = <0x2 0x0 0x8>;
+			interrupts = <48 0x8>;
+			interrups-parent = <&ipic>;
+		};
+	};
+
+	immr@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,mpc8308-immr", "simple-bus";
+		ranges = <0 0xe0000000 0x00100000>;
+		reg = <0xe0000000 0x00000200>;
+		bus-frequency = <0>;
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <14 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+			fram@50 {
+				compatible = "ramtron,24c64", "atmel,24c64";
+				reg = <0x50>;
+			};
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <15 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+			pwm@28 {
+				compatible = "maxim,ds1050";
+				reg = <0x28>;
+			};
+			sensor@48 {
+				compatible = "maxim,max6625";
+				reg = <0x48>;
+			};
+			sensor@49 {
+				compatible = "maxim,max6625";
+				reg = <0x49>;
+			};
+			sensor@4b {
+				compatible = "maxim,max6625";
+				reg = <0x4b>;
+			};
+		};
+
+		usb@23000 {
+			compatible = "fsl-usb2-dr";
+			reg = <0x23000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupt-parent = <&ipic>;
+			interrupts = <38 0x8>;
+			dr_mode = "peripheral";
+			phy_type = "ulpi";
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0x0 0x24000 0x1000>;
+
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <32 0x8 33 0x8 34 0x8>;
+			interrupt-parent = <&ipic>;
+			phy-handle = < &phy1 >;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+				phy1: ethernet-phy@1 {
+					interrupt-parent = <&ipic>;
+					interrupts = <17 0x8>;
+					reg = <0x1>;
+				};
+				phy2: ethernet-phy@2 {
+					interrupt-parent = <&ipic>;
+					interrupts = <19 0x8>;
+					reg = <0x2>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 0x8 36 0x8 37 0x8>;
+			interrupt-parent = <&ipic>;
+			phy-handle = < &phy2 >;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <133333333>;
+			interrupts = <9 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <133333333>;
+			interrupts = <10 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		gpio@c00 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8308-gpio", "fsl,mpc8349-gpio";
+			reg = <0xc00 0x18>;
+			interrupts = <74 0x8>;
+			interrupt-parent = <&ipic>;
+			gpio-controller;
+		};
+
+		timer@500 {
+			compatible = "fsl,mpc8308-gtm", "fsl,gtm";
+			reg = <0x500 0x100>;
+			interrupts = <90 8 78 8 84 8 72 8>;
+			interrupt-parent = <&ipic>;
+			clock-frequency = <133333333>;
+		};
+
+		/* IPIC
+		 * interrupts cell = <intr #, sense>
+		 * sense values match linux IORESOURCE_IRQ_* defines:
+		 * sense == 8: Level, low assertion
+		 * sense == 2: Edge, high-to-low change
+		 */
+		ipic: interrupt-controller@700 {
+			compatible = "fsl,ipic";
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x700 0x100>;
+			device_type = "ipic";
+		};
+
+		ipic-msi@7c0 {
+			compatible = "fsl,ipic-msi";
+			reg = <0x7c0 0x40>;
+			msi-available-ranges = <0x0 0x100>;
+			interrupts = < 0x43 0x8
+					0x4  0x8
+					0x51 0x8
+					0x52 0x8
+					0x56 0x8
+					0x57 0x8
+					0x58 0x8
+					0x59 0x8 >;
+			interrupt-parent = < &ipic >;
+		};
+
+		dma@2c000 {
+			compatible = "fsl,mpc8308-dma";
+			reg = <0x2c000 0x1800>;
+			interrupts = <3 0x8
+					94 0x8>;
+			interrupt-parent = < &ipic >;
+		};
+
+	};
+
+	pci0: pcie@e0009000 {
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		compatible = "fsl,mpc8308-pcie", "fsl,mpc8314-pcie";
+		reg = <0xe0009000 0x00001000
+			0xb0000000 0x01000000>;
+		ranges = <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000
+		          0x01000000 0 0x00000000 0xb1000000 0 0x00800000>;
+		bus-range = <0 0>;
+		interrupt-map-mask = <0 0 0 0>;
+		interrupt-map = <0 0 0 0 &ipic 1 8>;
+		interrupts = <0x1 0x8>;
+		interrupt-parent = <&ipic>;
+		clock-frequency = <0>;
+
+		pcie@0 {
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			reg = <0 0 0 0 0>;
+			ranges = <0x02000000 0 0xa0000000
+				  0x02000000 0 0xa0000000
+				  0 0x10000000
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00800000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc8308rdb.dts b/arch/powerpc/boot/dts/mpc8308rdb.dts
new file mode 100644
index 0000000000..af2ed8380a
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc8308rdb.dts
@@ -0,0 +1,306 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8308RDB Device Tree Source
+ *
+ * Copyright 2009 Freescale Semiconductor Inc.
+ * Copyright 2010 Ilya Yanok, Emcraft Systems, yanok@emcraft.com
+ */
+
+/dts-v1/;
+
+/ {
+	compatible = "fsl,mpc8308rdb";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8308@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <16384>;
+			i-cache-size = <16384>;
+			timebase-frequency = <0>;	// from bootloader
+			bus-frequency = <0>;		// from bootloader
+			clock-frequency = <0>;		// from bootloader
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x08000000>;	// 128MB at 0
+	};
+
+	localbus@e0005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8315-elbc", "fsl,elbc", "simple-bus";
+		reg = <0xe0005000 0x1000>;
+		interrupts = <77 0x8>;
+		interrupt-parent = <&ipic>;
+
+		// CS0 and CS1 are swapped when
+		// booting from nand, but the
+		// addresses are the same.
+		ranges = <0x0 0x0 0xfe000000 0x00800000
+		          0x1 0x0 0xe0600000 0x00002000
+		          0x2 0x0 0xf0000000 0x00020000
+		          0x3 0x0 0xfa000000 0x00008000>;
+
+		flash@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x800000>;
+			bank-width = <2>;
+			device-width = <1>;
+
+			u-boot@0 {
+				reg = <0x0 0x60000>;
+				read-only;
+			};
+			env@60000 {
+				reg = <0x60000 0x10000>;
+			};
+			env1@70000 {
+				reg = <0x70000 0x10000>;
+			};
+			kernel@80000 {
+				reg = <0x80000 0x200000>;
+			};
+			dtb@280000 {
+				reg = <0x280000 0x10000>;
+			};
+			ramdisk@290000 {
+				reg = <0x290000 0x570000>;
+			};
+		};
+
+		nand@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8315-fcm-nand",
+			             "fsl,elbc-fcm-nand";
+			reg = <0x1 0x0 0x2000>;
+
+			jffs2@0 {
+				reg = <0x0 0x2000000>;
+			};
+		};
+	};
+
+	immr@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,mpc8308-immr", "simple-bus";
+		ranges = <0 0xe0000000 0x00100000>;
+		reg = <0xe0000000 0x00000200>;
+		bus-frequency = <0>;
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <14 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+			rtc@68 {
+				compatible = "dallas,ds1339";
+				reg = <0x68>;
+			};
+		};
+
+		usb@23000 {
+			compatible = "fsl-usb2-dr";
+			reg = <0x23000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupt-parent = <&ipic>;
+			interrupts = <38 0x8>;
+			dr_mode = "peripheral";
+			phy_type = "ulpi";
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0x0 0x24000 0x1000>;
+
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <32 0x8 33 0x8 34 0x8>;
+			interrupt-parent = <&ipic>;
+			tbi-handle = < &tbi0 >;
+			phy-handle = < &phy2 >;
+			fsl,magic-packet;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+				phy2: ethernet-phy@2 {
+					interrupt-parent = <&ipic>;
+					interrupts = <17 0x8>;
+					reg = <0x2>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 0x8 36 0x8 37 0x8>;
+			interrupt-parent = <&ipic>;
+			tbi-handle = < &tbi1 >;
+			/* Vitesse 7385 isn't on the MDIO bus */
+			fixed-link = <1 1 1000 0 0>;
+			fsl,magic-packet;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <133333333>;
+			interrupts = <9 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <133333333>;
+			interrupts = <10 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		gpio@c00 {
+			#gpio-cells = <2>;
+			device_type = "gpio";
+			compatible = "fsl,mpc8308-gpio", "fsl,mpc8349-gpio";
+			reg = <0xc00 0x18>;
+			interrupts = <74 0x8>;
+			interrupt-parent = <&ipic>;
+			gpio-controller;
+		};
+
+		/* IPIC
+		 * interrupts cell = <intr #, sense>
+		 * sense values match linux IORESOURCE_IRQ_* defines:
+		 * sense == 8: Level, low assertion
+		 * sense == 2: Edge, high-to-low change
+		 */
+		ipic: interrupt-controller@700 {
+			compatible = "fsl,ipic";
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x700 0x100>;
+			device_type = "ipic";
+		};
+
+		ipic-msi@7c0 {
+			compatible = "fsl,ipic-msi";
+			reg = <0x7c0 0x40>;
+			msi-available-ranges = <0x0 0x100>;
+			interrupts = < 0x43 0x8
+					0x4  0x8
+					0x51 0x8
+					0x52 0x8
+					0x56 0x8
+					0x57 0x8
+					0x58 0x8
+					0x59 0x8 >;
+			interrupt-parent = < &ipic >;
+		};
+
+		dma@2c000 {
+			compatible = "fsl,mpc8308-dma";
+			reg = <0x2c000 0x1800>;
+			interrupts = <3 0x8
+					94 0x8>;
+			interrupt-parent = < &ipic >;
+		};
+
+	};
+
+	pci0: pcie@e0009000 {
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		compatible = "fsl,mpc8308-pcie", "fsl,mpc8314-pcie";
+		reg = <0xe0009000 0x00001000
+			0xb0000000 0x01000000>;
+		ranges = <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000
+		          0x01000000 0 0x00000000 0xb1000000 0 0x00800000>;
+		bus-range = <0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0 0 0 1 &ipic 1 8
+				 0 0 0 2 &ipic 1 8
+				 0 0 0 3 &ipic 1 8
+				 0 0 0 4 &ipic 1 8>;
+		interrupts = <0x1 0x8>;
+		interrupt-parent = <&ipic>;
+		clock-frequency = <0>;
+
+		pcie@0 {
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			reg = <0 0 0 0 0>;
+			ranges = <0x02000000 0 0xa0000000
+				  0x02000000 0 0xa0000000
+				  0 0x10000000
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00800000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc8313erdb.dts b/arch/powerpc/boot/dts/mpc8313erdb.dts
new file mode 100644
index 0000000000..a8315795b2
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc8313erdb.dts
@@ -0,0 +1,405 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8313E RDB Device Tree Source
+ *
+ * Copyright 2005, 2006, 2007 Freescale Semiconductor Inc.
+ */
+
+/dts-v1/;
+
+/ {
+	model = "MPC8313ERDB";
+	compatible = "MPC8313ERDB", "MPC831xRDB", "MPC83xxRDB";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8313@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <16384>;
+			i-cache-size = <16384>;
+			timebase-frequency = <0>;	// from bootloader
+			bus-frequency = <0>;		// from bootloader
+			clock-frequency = <0>;		// from bootloader
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x08000000>;	// 128MB at 0
+	};
+
+	localbus@e0005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8313-elbc", "fsl,elbc", "simple-bus";
+		reg = <0xe0005000 0x1000>;
+		interrupts = <77 0x8>;
+		interrupt-parent = <&ipic>;
+
+		// CS0 and CS1 are swapped when
+		// booting from nand, but the
+		// addresses are the same.
+		ranges = <0x0 0x0 0xfe000000 0x00800000
+		          0x1 0x0 0xe2800000 0x00008000
+		          0x2 0x0 0xf0000000 0x00020000
+		          0x3 0x0 0xfa000000 0x00008000>;
+
+		flash@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x800000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8313-fcm-nand",
+			             "fsl,elbc-fcm-nand";
+			reg = <0x1 0x0 0x2000>;
+
+			u-boot@0 {
+				reg = <0x0 0x100000>;
+				read-only;
+			};
+
+			kernel@100000 {
+				reg = <0x100000 0x300000>;
+			};
+
+			fs@400000 {
+				reg = <0x400000 0x1c00000>;
+			};
+		};
+	};
+
+	soc8313@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "simple-bus";
+		ranges = <0x0 0xe0000000 0x00100000>;
+		reg = <0xe0000000 0x00000200>;
+		bus-frequency = <0>;
+
+		wdt@200 {
+			device_type = "watchdog";
+			compatible = "mpc83xx_wdt";
+			reg = <0x200 0x100>;
+		};
+
+		sleep-nexus {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "simple-bus";
+			sleep = <&pmc 0x03000000>;
+			ranges;
+
+			i2c@3000 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				cell-index = <0>;
+				compatible = "fsl-i2c";
+				reg = <0x3000 0x100>;
+				interrupts = <14 0x8>;
+				interrupt-parent = <&ipic>;
+				dfsrr;
+				rtc@68 {
+					compatible = "dallas,ds1339";
+					reg = <0x68>;
+				};
+			};
+
+			crypto@30000 {
+				compatible = "fsl,sec2.2", "fsl,sec2.1",
+				             "fsl,sec2.0";
+				reg = <0x30000 0x10000>;
+				interrupts = <11 0x8>;
+				interrupt-parent = <&ipic>;
+				fsl,num-channels = <1>;
+				fsl,channel-fifo-len = <24>;
+				fsl,exec-units-mask = <0x4c>;
+				fsl,descriptor-types-mask = <0x0122003f>;
+			};
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <15 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+		};
+
+		spi@7000 {
+			cell-index = <0>;
+			compatible = "fsl,spi";
+			reg = <0x7000 0x1000>;
+			interrupts = <16 0x8>;
+			interrupt-parent = <&ipic>;
+			mode = "cpu";
+		};
+
+		/* phy type (ULPI, UTMI, UTMI_WIDE, SERIAL) */
+		usb@23000 {
+			compatible = "fsl-usb2-dr";
+			reg = <0x23000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupt-parent = <&ipic>;
+			interrupts = <38 0x8>;
+			phy_type = "utmi_wide";
+			sleep = <&pmc 0x00300000>;
+		};
+
+		ptp_clock@24E00 {
+			compatible = "fsl,etsec-ptp";
+			reg = <0x24E00 0xB0>;
+			interrupts = <12 0x8 13 0x8>;
+			interrupt-parent = < &ipic >;
+			fsl,tclk-period = <10>;
+			fsl,tmr-prsc    = <100>;
+			fsl,tmr-add     = <0x999999A4>;
+			fsl,tmr-fiper1  = <0x3B9AC9F6>;
+			fsl,tmr-fiper2  = <0x00018696>;
+			fsl,max-adj     = <659999998>;
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			sleep = <&pmc 0x20000000>;
+			ranges = <0x0 0x24000 0x1000>;
+
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <37 0x8 36 0x8 35 0x8>;
+			interrupt-parent = <&ipic>;
+			tbi-handle = < &tbi0 >;
+			/* Vitesse 7385 isn't on the MDIO bus */
+			fixed-link = <1 1 1000 0 0>;
+			fsl,magic-packet;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+				phy4: ethernet-phy@4 {
+					interrupt-parent = <&ipic>;
+					interrupts = <20 0x8>;
+					reg = <0x4>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <34 0x8 33 0x8 32 0x8>;
+			interrupt-parent = <&ipic>;
+			tbi-handle = < &tbi1 >;
+			phy-handle = < &phy4 >;
+			sleep = <&pmc 0x10000000>;
+			fsl,magic-packet;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+
+
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <9 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <10 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		/* IPIC
+		 * interrupts cell = <intr #, sense>
+		 * sense values match linux IORESOURCE_IRQ_* defines:
+		 * sense == 8: Level, low assertion
+		 * sense == 2: Edge, high-to-low change
+		 */
+		ipic: pic@700 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x700 0x100>;
+			device_type = "ipic";
+		};
+
+		pmc: power@b00 {
+			compatible = "fsl,mpc8313-pmc", "fsl,mpc8349-pmc";
+			reg = <0xb00 0x100 0xa00 0x100>;
+			interrupts = <80 8>;
+			interrupt-parent = <&ipic>;
+			fsl,mpc8313-wakeup-timer = <&gtm1>;
+
+			/* Remove this (or change to "okay") if you have
+			 * a REVA3 or later board, if you apply one of the
+			 * workarounds listed in section 8.5 of the board
+			 * manual, or if you are adapting this device tree
+			 * to a different board.
+			 */
+			status = "fail";
+		};
+
+		gtm1: timer@500 {
+			compatible = "fsl,mpc8313-gtm", "fsl,gtm";
+			reg = <0x500 0x100>;
+			interrupts = <90 8 78 8 84 8 72 8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		timer@600 {
+			compatible = "fsl,mpc8313-gtm", "fsl,gtm";
+			reg = <0x600 0x100>;
+			interrupts = <91 8 79 8 85 8 73 8>;
+			interrupt-parent = <&ipic>;
+		};
+	};
+
+	sleep-nexus {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "simple-bus";
+		sleep = <&pmc 0x00010000>;
+		ranges;
+
+		pci0: pci@e0008500 {
+			cell-index = <1>;
+			interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+			interrupt-map = <
+					/* IDSEL 0x0E -mini PCI */
+					 0x7000 0x0 0x0 0x1 &ipic 18 0x8
+					 0x7000 0x0 0x0 0x2 &ipic 18 0x8
+					 0x7000 0x0 0x0 0x3 &ipic 18 0x8
+					 0x7000 0x0 0x0 0x4 &ipic 18 0x8
+
+					/* IDSEL 0x0F - PCI slot */
+					 0x7800 0x0 0x0 0x1 &ipic 17 0x8
+					 0x7800 0x0 0x0 0x2 &ipic 18 0x8
+					 0x7800 0x0 0x0 0x3 &ipic 17 0x8
+					 0x7800 0x0 0x0 0x4 &ipic 18 0x8>;
+			interrupt-parent = <&ipic>;
+			interrupts = <66 0x8>;
+			bus-range = <0x0 0x0>;
+			ranges = <0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000
+				  0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000
+				  0x01000000 0x0 0x00000000 0xe2000000 0x0 0x00100000>;
+			clock-frequency = <66666666>;
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			reg = <0xe0008500 0x100		/* internal registers */
+			       0xe0008300 0x8>;		/* config space access registers */
+			compatible = "fsl,mpc8349-pci";
+			device_type = "pci";
+		};
+
+		dma@82a8 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8313-dma", "fsl,elo-dma";
+			reg = <0xe00082a8 4>;
+			ranges = <0 0xe0008100 0x1a8>;
+			interrupt-parent = <&ipic>;
+			interrupts = <71 8>;
+
+			dma-channel@0 {
+				compatible = "fsl,mpc8313-dma-channel",
+				             "fsl,elo-dma-channel";
+				reg = <0 0x28>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+				cell-index = <0>;
+			};
+
+			dma-channel@80 {
+				compatible = "fsl,mpc8313-dma-channel",
+				             "fsl,elo-dma-channel";
+				reg = <0x80 0x28>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+				cell-index = <1>;
+			};
+
+			dma-channel@100 {
+				compatible = "fsl,mpc8313-dma-channel",
+				             "fsl,elo-dma-channel";
+				reg = <0x100 0x28>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+				cell-index = <2>;
+			};
+
+			dma-channel@180 {
+				compatible = "fsl,mpc8313-dma-channel",
+				             "fsl,elo-dma-channel";
+				reg = <0x180 0x28>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+				cell-index = <3>;
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc8315erdb.dts b/arch/powerpc/boot/dts/mpc8315erdb.dts
new file mode 100644
index 0000000000..e09b37d748
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc8315erdb.dts
@@ -0,0 +1,474 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8315E RDB Device Tree Source
+ *
+ * Copyright 2007 Freescale Semiconductor Inc.
+ */
+
+/dts-v1/;
+
+/ {
+	compatible = "fsl,mpc8315erdb";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8315@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <16384>;
+			i-cache-size = <16384>;
+			timebase-frequency = <0>;	// from bootloader
+			bus-frequency = <0>;		// from bootloader
+			clock-frequency = <0>;		// from bootloader
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x08000000>;	// 128MB at 0
+	};
+
+	localbus@e0005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8315-elbc", "fsl,elbc", "simple-bus";
+		reg = <0xe0005000 0x1000>;
+		interrupts = <77 0x8>;
+		interrupt-parent = <&ipic>;
+
+		// CS0 and CS1 are swapped when
+		// booting from nand, but the
+		// addresses are the same.
+		ranges = <0x0 0x0 0xfe000000 0x00800000
+		          0x1 0x0 0xe0600000 0x00002000
+		          0x2 0x0 0xf0000000 0x00020000
+		          0x3 0x0 0xfa000000 0x00008000>;
+
+		flash@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x800000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8315-fcm-nand",
+			             "fsl,elbc-fcm-nand";
+			reg = <0x1 0x0 0x2000>;
+
+			u-boot@0 {
+				reg = <0x0 0x100000>;
+				read-only;
+			};
+
+			kernel@100000 {
+				reg = <0x100000 0x300000>;
+			};
+			fs@400000 {
+				reg = <0x400000 0x1c00000>;
+			};
+		};
+	};
+
+	immr@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,mpc8315-immr", "simple-bus";
+		ranges = <0 0xe0000000 0x00100000>;
+		reg = <0xe0000000 0x00000200>;
+		bus-frequency = <0>;
+
+		wdt@200 {
+			device_type = "watchdog";
+			compatible = "mpc83xx_wdt";
+			reg = <0x200 0x100>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <14 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+			rtc@68 {
+				compatible = "dallas,ds1339";
+				reg = <0x68>;
+			};
+
+			mcu_pio: mcu@a {
+				#gpio-cells = <2>;
+				compatible = "fsl,mc9s08qg8-mpc8315erdb",
+					     "fsl,mcu-mpc8349emitx";
+				reg = <0x0a>;
+				gpio-controller;
+			};
+		};
+
+		spi@7000 {
+			cell-index = <0>;
+			compatible = "fsl,spi";
+			reg = <0x7000 0x1000>;
+			interrupts = <16 0x8>;
+			interrupt-parent = <&ipic>;
+			mode = "cpu";
+		};
+
+		dma@82a8 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8315-dma", "fsl,elo-dma";
+			reg = <0x82a8 4>;
+			ranges = <0 0x8100 0x1a8>;
+			interrupt-parent = <&ipic>;
+			interrupts = <71 8>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8315-dma-channel", "fsl,elo-dma-channel";
+				reg = <0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8315-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8315-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8315-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x180 0x28>;
+				cell-index = <3>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+		};
+
+		usb@23000 {
+			compatible = "fsl-usb2-dr";
+			reg = <0x23000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupt-parent = <&ipic>;
+			interrupts = <38 0x8>;
+			phy_type = "utmi";
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <32 0x8 33 0x8 34 0x8>;
+			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = < &phy0 >;
+			fsl,magic-packet;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy0: ethernet-phy@0 {
+					interrupt-parent = <&ipic>;
+					interrupts = <20 0x8>;
+					reg = <0x0>;
+				};
+
+				phy1: ethernet-phy@1 {
+					interrupt-parent = <&ipic>;
+					interrupts = <19 0x8>;
+					reg = <0x1>;
+				};
+
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 0x8 36 0x8 37 0x8>;
+			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi1>;
+			phy-handle = < &phy1 >;
+			fsl,magic-packet;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <133333333>;
+			interrupts = <9 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <133333333>;
+			interrupts = <10 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.3", "fsl,sec3.1", "fsl,sec3.0",
+				     "fsl,sec2.4", "fsl,sec2.2", "fsl,sec2.1",
+				     "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <11 0x8>;
+			interrupt-parent = <&ipic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x97c>;
+			fsl,descriptor-types-mask = <0x3a30abf>;
+		};
+
+		sata@18000 {
+			compatible = "fsl,mpc8315-sata", "fsl,pq-sata";
+			reg = <0x18000 0x1000>;
+			cell-index = <1>;
+			interrupts = <44 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		sata@19000 {
+			compatible = "fsl,mpc8315-sata", "fsl,pq-sata";
+			reg = <0x19000 0x1000>;
+			cell-index = <2>;
+			interrupts = <45 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		gtm1: timer@500 {
+			compatible = "fsl,mpc8315-gtm", "fsl,gtm";
+			reg = <0x500 0x100>;
+			interrupts = <90 8 78 8 84 8 72 8>;
+			interrupt-parent = <&ipic>;
+			clock-frequency = <133333333>;
+		};
+
+		timer@600 {
+			compatible = "fsl,mpc8315-gtm", "fsl,gtm";
+			reg = <0x600 0x100>;
+			interrupts = <91 8 79 8 85 8 73 8>;
+			interrupt-parent = <&ipic>;
+			clock-frequency = <133333333>;
+		};
+
+		/* IPIC
+		 * interrupts cell = <intr #, sense>
+		 * sense values match linux IORESOURCE_IRQ_* defines:
+		 * sense == 8: Level, low assertion
+		 * sense == 2: Edge, high-to-low change
+		 */
+		ipic: interrupt-controller@700 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x700 0x100>;
+			device_type = "ipic";
+		};
+
+		ipic-msi@7c0 {
+			compatible = "fsl,ipic-msi";
+			reg = <0x7c0 0x40>;
+			msi-available-ranges = <0 0x100>;
+			interrupts = <0x43 0x8
+				      0x4  0x8
+				      0x51 0x8
+				      0x52 0x8
+				      0x56 0x8
+				      0x57 0x8
+				      0x58 0x8
+				      0x59 0x8>;
+			interrupt-parent = < &ipic >;
+		};
+
+		pmc: power@b00 {
+			compatible = "fsl,mpc8315-pmc", "fsl,mpc8313-pmc",
+				     "fsl,mpc8349-pmc";
+			reg = <0xb00 0x100 0xa00 0x100>;
+			interrupts = <80 8>;
+			interrupt-parent = <&ipic>;
+			fsl,mpc8313-wakeup-timer = <&gtm1>;
+		};
+	};
+
+	pci0: pci@e0008500 {
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+				/* IDSEL 0x0E -mini PCI */
+				 0x7000 0x0 0x0 0x1 &ipic 18 0x8
+				 0x7000 0x0 0x0 0x2 &ipic 18 0x8
+				 0x7000 0x0 0x0 0x3 &ipic 18 0x8
+				 0x7000 0x0 0x0 0x4 &ipic 18 0x8
+
+				/* IDSEL 0x0F -mini PCI */
+				 0x7800 0x0 0x0 0x1 &ipic 17 0x8
+				 0x7800 0x0 0x0 0x2 &ipic 17 0x8
+				 0x7800 0x0 0x0 0x3 &ipic 17 0x8
+				 0x7800 0x0 0x0 0x4 &ipic 17 0x8
+
+				/* IDSEL 0x10 - PCI slot */
+				 0x8000 0x0 0x0 0x1 &ipic 48 0x8
+				 0x8000 0x0 0x0 0x2 &ipic 17 0x8
+				 0x8000 0x0 0x0 0x3 &ipic 48 0x8
+				 0x8000 0x0 0x0 0x4 &ipic 17 0x8>;
+		interrupt-parent = <&ipic>;
+		interrupts = <66 0x8>;
+		bus-range = <0x0 0x0>;
+		ranges = <0x02000000 0 0x90000000 0x90000000 0 0x10000000
+			  0x42000000 0 0x80000000 0x80000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xe0300000 0 0x00100000>;
+		clock-frequency = <66666666>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xe0008500 0x100		/* internal registers */
+		       0xe0008300 0x8>;		/* config space access registers */
+		compatible = "fsl,mpc8349-pci";
+		device_type = "pci";
+	};
+
+	pci1: pcie@e0009000 {
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		compatible = "fsl,mpc8315-pcie", "fsl,mpc8314-pcie";
+		reg = <0xe0009000 0x00001000>;
+		ranges = <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000
+		          0x01000000 0 0x00000000 0xb1000000 0 0x00800000>;
+		bus-range = <0 255>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0 0 0 1 &ipic 1 8
+				 0 0 0 2 &ipic 1 8
+				 0 0 0 3 &ipic 1 8
+				 0 0 0 4 &ipic 1 8>;
+		clock-frequency = <0>;
+
+		pcie@0 {
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			reg = <0 0 0 0 0>;
+			ranges = <0x02000000 0 0xa0000000
+				  0x02000000 0 0xa0000000
+				  0 0x10000000
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00800000>;
+		};
+	};
+
+	pci2: pcie@e000a000 {
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		compatible = "fsl,mpc8315-pcie", "fsl,mpc8314-pcie";
+		reg = <0xe000a000 0x00001000>;
+		ranges = <0x02000000 0 0xc0000000 0xc0000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xd1000000 0 0x00800000>;
+		bus-range = <0 255>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0 0 0 1 &ipic 2 8
+				 0 0 0 2 &ipic 2 8
+				 0 0 0 3 &ipic 2 8
+				 0 0 0 4 &ipic 2 8>;
+		clock-frequency = <0>;
+
+		pcie@0 {
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			reg = <0 0 0 0 0>;
+			ranges = <0x02000000 0 0xc0000000
+				  0x02000000 0 0xc0000000
+				  0 0x10000000
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00800000>;
+		};
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		pwr {
+			gpios = <&mcu_pio 0 0>;
+			default-state = "on";
+		};
+
+		hdd {
+			gpios = <&mcu_pio 1 0>;
+			linux,default-trigger = "disk-activity";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc832x_rdb.dts b/arch/powerpc/boot/dts/mpc832x_rdb.dts
new file mode 100644
index 0000000000..ecebc27a28
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc832x_rdb.dts
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC832x RDB Device Tree Source
+ *
+ * Copyright 2007 Freescale Semiconductor Inc.
+ */
+
+/dts-v1/;
+
+/ {
+	model = "MPC8323ERDB";
+	compatible = "MPC8323ERDB", "MPC832xRDB", "MPC83xxRDB";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet1;
+		ethernet1 = &enet0;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8323@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <0x20>;	// 32 bytes
+			i-cache-line-size = <0x20>;	// 32 bytes
+			d-cache-size = <16384>;	// L1, 16K
+			i-cache-size = <16384>;	// L1, 16K
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x04000000>;
+	};
+
+	soc8323@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "simple-bus";
+		ranges = <0x0 0xe0000000 0x00100000>;
+		reg = <0xe0000000 0x00000200>;
+		bus-frequency = <0>;
+
+		wdt@200 {
+			device_type = "watchdog";
+			compatible = "mpc83xx_wdt";
+			reg = <0x200 0x100>;
+		};
+
+		pmc: power@b00 {
+			compatible = "fsl,mpc8323-pmc", "fsl,mpc8349-pmc";
+			reg = <0xb00 0x100 0xa00 0x100>;
+			interrupts = <80 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <14 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <9 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <10 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		dma@82a8 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8323-dma", "fsl,elo-dma";
+			reg = <0x82a8 4>;
+			ranges = <0 0x8100 0x1a8>;
+			interrupt-parent = <&ipic>;
+			interrupts = <71 8>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8323-dma-channel", "fsl,elo-dma-channel";
+				reg = <0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8323-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8323-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8323-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x180 0x28>;
+				cell-index = <3>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <11 0x8>;
+			interrupt-parent = <&ipic>;
+			fsl,num-channels = <1>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x4c>;
+			fsl,descriptor-types-mask = <0x0122003f>;
+			sleep = <&pmc 0x03000000>;
+		};
+
+		ipic:pic@700 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x700 0x100>;
+			device_type = "ipic";
+		};
+
+		par_io@1400 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			reg = <0x1400 0x100>;
+			ranges = <3 0x1448 0x18>;
+			compatible = "fsl,mpc8323-qe-pario";
+			device_type = "par_io";
+			num-ports = <7>;
+
+			qe_pio_d: gpio-controller@1448 {
+				#gpio-cells = <2>;
+				compatible = "fsl,mpc8323-qe-pario-bank";
+				reg = <3 0x18>;
+				gpio-controller;
+			};
+
+			ucc2pio:ucc_pin@2 {
+				pio-map = <
+			/* port  pin  dir  open_drain  assignment  has_irq */
+					3  4  3  0  2  0 	/* MDIO */
+					3  5  1  0  2  0 	/* MDC */
+					3 21  2  0  1  0 	/* RX_CLK (CLK16) */
+					3 23  2  0  1  0 	/* TX_CLK (CLK3) */
+					0 18  1  0  1  0 	/* TxD0 */
+					0 19  1  0  1  0 	/* TxD1 */
+					0 20  1  0  1  0 	/* TxD2 */
+					0 21  1  0  1  0 	/* TxD3 */
+					0 22  2  0  1  0 	/* RxD0 */
+					0 23  2  0  1  0 	/* RxD1 */
+					0 24  2  0  1  0 	/* RxD2 */
+					0 25  2  0  1  0 	/* RxD3 */
+					0 26  2  0  1  0 	/* RX_ER */
+					0 27  1  0  1  0 	/* TX_ER */
+					0 28  2  0  1  0 	/* RX_DV */
+					0 29  2  0  1  0 	/* COL */
+					0 30  1  0  1  0 	/* TX_EN */
+					0 31  2  0  1  0>;      /* CRS */
+			};
+			ucc3pio:ucc_pin@3 {
+				pio-map = <
+			/* port  pin  dir  open_drain  assignment  has_irq */
+					0 13  2  0  1  0 	/* RX_CLK (CLK9) */
+					3 24  2  0  1  0 	/* TX_CLK (CLK10) */
+					1  0  1  0  1  0 	/* TxD0 */
+					1  1  1  0  1  0 	/* TxD1 */
+					1  2  1  0  1  0 	/* TxD2 */
+					1  3  1  0  1  0 	/* TxD3 */
+					1  4  2  0  1  0 	/* RxD0 */
+					1  5  2  0  1  0 	/* RxD1 */
+					1  6  2  0  1  0 	/* RxD2 */
+					1  7  2  0  1  0 	/* RxD3 */
+					1  8  2  0  1  0 	/* RX_ER */
+					1  9  1  0  1  0 	/* TX_ER */
+					1 10  2  0  1  0 	/* RX_DV */
+					1 11  2  0  1  0 	/* COL */
+					1 12  1  0  1  0 	/* TX_EN */
+					1 13  2  0  1  0>;      /* CRS */
+			};
+		};
+	};
+
+	qe@e0100000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "qe";
+		compatible = "fsl,qe";
+		ranges = <0x0 0xe0100000 0x00100000>;
+		reg = <0xe0100000 0x480>;
+		brg-frequency = <0>;
+		bus-frequency = <198000000>;
+		fsl,qe-num-riscs = <1>;
+		fsl,qe-num-snums = <28>;
+
+		muram@10000 {
+ 			#address-cells = <1>;
+ 			#size-cells = <1>;
+			compatible = "fsl,qe-muram", "fsl,cpm-muram";
+			ranges = <0x0 0x00010000 0x00004000>;
+
+			data-only@0 {
+				compatible = "fsl,qe-muram-data",
+					     "fsl,cpm-muram-data";
+				reg = <0x0 0x4000>;
+			};
+		};
+
+		spi@4c0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl,spi";
+			reg = <0x4c0 0x40>;
+			interrupts = <2>;
+			interrupt-parent = <&qeic>;
+			cs-gpios = <&qe_pio_d 13 0>;
+			mode = "cpu-qe";
+
+			mmc-slot@0 {
+				compatible = "fsl,mpc8323rdb-mmc-slot",
+					     "mmc-spi-slot";
+				reg = <0>;
+				gpios = <&qe_pio_d 14 1
+					 &qe_pio_d 15 0>;
+				voltage-ranges = <3300 3300>;
+				spi-max-frequency = <50000000>;
+			};
+		};
+
+		spi@500 {
+			cell-index = <1>;
+			compatible = "fsl,spi";
+			reg = <0x500 0x40>;
+			interrupts = <1>;
+			interrupt-parent = <&qeic>;
+			mode = "cpu";
+		};
+
+		enet0: ucc@3000 {
+			device_type = "network";
+			compatible = "ucc_geth";
+			cell-index = <2>;
+			reg = <0x3000 0x200>;
+			interrupts = <33>;
+			interrupt-parent = <&qeic>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			rx-clock-name = "clk16";
+			tx-clock-name = "clk3";
+			phy-handle = <&phy00>;
+			pio-handle = <&ucc2pio>;
+		};
+
+		enet1: ucc@2200 {
+			device_type = "network";
+			compatible = "ucc_geth";
+			cell-index = <3>;
+			reg = <0x2200 0x200>;
+			interrupts = <34>;
+			interrupt-parent = <&qeic>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			rx-clock-name = "clk9";
+			tx-clock-name = "clk10";
+			phy-handle = <&phy04>;
+			pio-handle = <&ucc3pio>;
+		};
+
+		mdio@3120 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x3120 0x18>;
+			compatible = "fsl,ucc-mdio";
+
+			phy00:ethernet-phy@0 {
+				reg = <0x0>;
+			};
+			phy04:ethernet-phy@4 {
+				reg = <0x4>;
+			};
+		};
+
+		qeic:interrupt-controller@80 {
+			interrupt-controller;
+			compatible = "fsl,qe-ic";
+			#address-cells = <0>;
+			#interrupt-cells = <1>;
+			reg = <0x80 0x80>;
+			big-endian;
+			interrupts = <32 0x8 33 0x8>; //high:32 low:33
+			interrupt-parent = <&ipic>;
+		};
+	};
+
+	pci0: pci@e0008500 {
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+				/* IDSEL 0x10 AD16 (USB) */
+				 0x8000 0x0 0x0 0x1 &ipic 17 0x8
+
+				/* IDSEL 0x11 AD17 (Mini1)*/
+				 0x8800 0x0 0x0 0x1 &ipic 18 0x8
+				 0x8800 0x0 0x0 0x2 &ipic 19 0x8
+				 0x8800 0x0 0x0 0x3 &ipic 20 0x8
+				 0x8800 0x0 0x0 0x4 &ipic 48 0x8
+
+				/* IDSEL 0x12 AD18 (PCI/Mini2) */
+				 0x9000 0x0 0x0 0x1 &ipic 19 0x8
+				 0x9000 0x0 0x0 0x2 &ipic 20 0x8
+				 0x9000 0x0 0x0 0x3 &ipic 48 0x8
+				 0x9000 0x0 0x0 0x4 &ipic 17 0x8>;
+
+		interrupt-parent = <&ipic>;
+		interrupts = <66 0x8>;
+		bus-range = <0x0 0x0>;
+		ranges = <0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000
+			  0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000
+			  0x01000000 0x0 0xd0000000 0xd0000000 0x0 0x04000000>;
+		clock-frequency = <0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xe0008500 0x100		/* internal registers */
+		       0xe0008300 0x8>;		/* config space access registers */
+		compatible = "fsl,mpc8349-pci";
+		device_type = "pci";
+		sleep = <&pmc 0x00010000>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc8349emitx.dts b/arch/powerpc/boot/dts/mpc8349emitx.dts
new file mode 100644
index 0000000000..d4ebbb93de
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc8349emitx.dts
@@ -0,0 +1,421 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8349E-mITX Device Tree Source
+ *
+ * Copyright 2006 Freescale Semiconductor Inc.
+ */
+
+/dts-v1/;
+
+/ {
+	model = "MPC8349EMITX";
+	compatible = "MPC8349EMITX", "MPC834xMITX", "MPC83xxMITX";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+		pci1 = &pci1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8349@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <32768>;
+			i-cache-size = <32768>;
+			timebase-frequency = <0>;	// from bootloader
+			bus-frequency = <0>;		// from bootloader
+			clock-frequency = <0>;		// from bootloader
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>;
+	};
+
+	soc8349@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "simple-bus";
+		ranges = <0x0 0xe0000000 0x00100000>;
+		reg = <0xe0000000 0x00000200>;
+		bus-frequency = <0>;                    // from bootloader
+
+		wdt@200 {
+			device_type = "watchdog";
+			compatible = "mpc83xx_wdt";
+			reg = <0x200 0x100>;
+		};
+
+		gpio1: gpio-controller@c00 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8349-gpio";
+			reg = <0xc00 0x100>;
+			interrupts = <74 0x8>;
+			interrupt-parent = <&ipic>;
+			gpio-controller;
+		};
+
+		gpio2: gpio-controller@d00 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8349-gpio";
+			reg = <0xd00 0x100>;
+			interrupts = <75 0x8>;
+			interrupt-parent = <&ipic>;
+			gpio-controller;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <14 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+
+			eeprom: at24@50 {
+				compatible = "st,24c256", "atmel,24c256";
+				reg = <0x50>;
+			};
+
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <15 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+
+			rtc@68 {
+				compatible = "dallas,ds1339";
+				reg = <0x68>;
+				interrupts = <18 0x8>;
+				interrupt-parent = <&ipic>;
+			};
+
+			pcf1: iexp@38 {
+				#gpio-cells = <2>;
+				compatible = "ti,pcf8574a";
+				reg = <0x38>;
+				gpio-controller;
+			};
+
+			pcf2: iexp@39 {
+				#gpio-cells = <2>;
+				compatible = "ti,pcf8574a";
+				reg = <0x39>;
+				gpio-controller;
+			};
+
+			spd: at24@51 {
+				compatible = "atmel,spd";
+				reg = <0x51>;
+			};
+
+			mcu_pio: mcu@a {
+				#gpio-cells = <2>;
+				compatible = "fsl,mc9s08qg8-mpc8349emitx",
+					     "fsl,mcu-mpc8349emitx";
+				reg = <0x0a>;
+				gpio-controller;
+			};
+		};
+
+		spi@7000 {
+			cell-index = <0>;
+			compatible = "fsl,spi";
+			reg = <0x7000 0x1000>;
+			interrupts = <16 0x8>;
+			interrupt-parent = <&ipic>;
+			mode = "cpu";
+		};
+
+		dma@82a8 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8349-dma", "fsl,elo-dma";
+			reg = <0x82a8 4>;
+			ranges = <0 0x8100 0x1a8>;
+			interrupt-parent = <&ipic>;
+			interrupts = <71 8>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
+				reg = <0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x180 0x28>;
+				cell-index = <3>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+		};
+
+		usb@22000 {
+			compatible = "fsl-usb2-mph";
+			reg = <0x22000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupt-parent = <&ipic>;
+			interrupts = <39 0x8>;
+			phy_type = "ulpi";
+			port0;
+		};
+
+		usb@23000 {
+			compatible = "fsl-usb2-dr";
+			reg = <0x23000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupt-parent = <&ipic>;
+			interrupts = <38 0x8>;
+			dr_mode = "peripheral";
+			phy_type = "ulpi";
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <32 0x8 33 0x8 34 0x8>;
+			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy1c>;
+			linux,network-index = <0>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				/* Vitesse 8201 */
+				phy1c: ethernet-phy@1c {
+					interrupt-parent = <&ipic>;
+					interrupts = <18 0x8>;
+					reg = <0x1c>;
+				};
+
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 0x8 36 0x8 37 0x8>;
+			interrupt-parent = <&ipic>;
+			/* Vitesse 7385 isn't on the MDIO bus */
+			fixed-link = <1 1 1000 0 0>;
+			linux,network-index = <1>;
+			tbi-handle = <&tbi1>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;		// from bootloader
+			interrupts = <9 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;		// from bootloader
+			interrupts = <10 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <11 0x8>;
+			interrupt-parent = <&ipic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x7e>;
+			fsl,descriptor-types-mask = <0x01010ebf>;
+		};
+
+		ipic: pic@700 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x700 0x100>;
+			device_type = "ipic";
+		};
+
+		gpio-leds {
+			compatible = "gpio-leds";
+
+			green {
+				label = "Green";
+				gpios = <&pcf1 0 1>;
+				linux,default-trigger = "heartbeat";
+			};
+
+			yellow {
+				label = "Yellow";
+				gpios = <&pcf1 1 1>;
+				/* linux,default-trigger = "heartbeat"; */
+				default-state = "on";
+			};
+		};
+
+	};
+
+	pci0: pci@e0008500 {
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+				/* IDSEL 0x10 - SATA */
+				0x8000 0x0 0x0 0x1 &ipic 22 0x8 /* SATA_INTA */
+				>;
+		interrupt-parent = <&ipic>;
+		interrupts = <66 0x8>;
+		bus-range = <0x0 0x0>;
+		ranges = <0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000
+			  0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000
+			  0x01000000 0x0 0x00000000 0xe2000000 0x0 0x01000000>;
+		clock-frequency = <66666666>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xe0008500 0x100		/* internal registers */
+		       0xe0008300 0x8>;		/* config space access registers */
+		compatible = "fsl,mpc8349-pci";
+		device_type = "pci";
+	};
+
+	pci1: pci@e0008600 {
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+				/* IDSEL 0x0E - MiniPCI Slot */
+				0x7000 0x0 0x0 0x1 &ipic 21 0x8 /* PCI_INTA */
+
+				/* IDSEL 0x0F - PCI Slot */
+				0x7800 0x0 0x0 0x1 &ipic 20 0x8 /* PCI_INTA */
+				0x7800 0x0 0x0 0x2 &ipic 21 0x8 /* PCI_INTB */
+				>;
+		interrupt-parent = <&ipic>;
+		interrupts = <67 0x8>;
+		bus-range = <0x0 0x0>;
+		ranges = <0x42000000 0x0 0xa0000000 0xa0000000 0x0 0x10000000
+			  0x02000000 0x0 0xb0000000 0xb0000000 0x0 0x10000000
+			  0x01000000 0x0 0x00000000 0xe3000000 0x0 0x01000000>;
+		clock-frequency = <66666666>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xe0008600 0x100		/* internal registers */
+		       0xe0008380 0x8>;		/* config space access registers */
+		compatible = "fsl,mpc8349-pci";
+		device_type = "pci";
+	};
+
+	localbus@e0005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8349e-localbus",
+			     "fsl,pq2pro-localbus",
+			     "simple-bus";
+		reg = <0xe0005000 0xd8>;
+		ranges = <0x0 0x0 0xfe000000 0x1000000	/* flash */
+			  0x1 0x0 0xf8000000 0x20000	/* VSC 7385 */
+			  0x2 0x0 0xf9000000 0x200000	/* exp slot */
+			  0x3 0x0 0xf0000000 0x210>;	/* CF slot */
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0x0      0x0 0x800000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		flash@0,800000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x800000 0x800000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		pata@3,0 {
+			compatible = "fsl,mpc8349emitx-pata", "ata-generic";
+			reg = <0x3 0x0 0x10 0x3 0x20c 0x4>;
+			reg-shift = <1>;
+			pio-mode = <6>;
+			interrupts = <23 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc8349emitxgp.dts b/arch/powerpc/boot/dts/mpc8349emitxgp.dts
new file mode 100644
index 0000000000..bcf68a0a7b
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc8349emitxgp.dts
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8349E-mITX-GP Device Tree Source
+ *
+ * Copyright 2007 Freescale Semiconductor Inc.
+ */
+
+/dts-v1/;
+
+/ {
+	model = "MPC8349EMITXGP";
+	compatible = "MPC8349EMITXGP", "MPC834xMITX", "MPC83xxMITX";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8349@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <32768>;
+			i-cache-size = <32768>;
+			timebase-frequency = <0>;	// from bootloader
+			bus-frequency = <0>;		// from bootloader
+			clock-frequency = <0>;		// from bootloader
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>;
+	};
+
+	soc8349@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "simple-bus";
+		ranges = <0x0 0xe0000000 0x00100000>;
+		reg = <0xe0000000 0x00000200>;
+		bus-frequency = <0>;                    // from bootloader
+
+		wdt@200 {
+			device_type = "watchdog";
+			compatible = "mpc83xx_wdt";
+			reg = <0x200 0x100>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <14 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <15 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+
+			rtc@68 {
+				compatible = "dallas,ds1339";
+				reg = <0x68>;
+				interrupts = <18 0x8>;
+				interrupt-parent = <&ipic>;
+			};
+		};
+
+		spi@7000 {
+			cell-index = <0>;
+			compatible = "fsl,spi";
+			reg = <0x7000 0x1000>;
+			interrupts = <16 0x8>;
+			interrupt-parent = <&ipic>;
+			mode = "cpu";
+		};
+
+		dma@82a8 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8349-dma", "fsl,elo-dma";
+			reg = <0x82a8 4>;
+			ranges = <0 0x8100 0x1a8>;
+			interrupt-parent = <&ipic>;
+			interrupts = <71 8>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
+				reg = <0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x180 0x28>;
+				cell-index = <3>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+		};
+
+		usb@23000 {
+			compatible = "fsl-usb2-dr";
+			reg = <0x23000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupt-parent = <&ipic>;
+			interrupts = <38 0x8>;
+			dr_mode = "otg";
+			phy_type = "ulpi";
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <32 0x8 33 0x8 34 0x8>;
+			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy1c>;
+			linux,network-index = <0>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				/* Vitesse 8201 */
+				phy1c: ethernet-phy@1c {
+					interrupt-parent = <&ipic>;
+					interrupts = <18 0x8>;
+					reg = <0x1c>;
+				};
+
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;		// from bootloader
+			interrupts = <9 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;		// from bootloader
+			interrupts = <10 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <11 0x8>;
+			interrupt-parent = <&ipic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x7e>;
+			fsl,descriptor-types-mask = <0x01010ebf>;
+		};
+
+		ipic: pic@700 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x700 0x100>;
+			device_type = "ipic";
+		};
+	};
+
+	pci0: pci@e0008600 {
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+				/* IDSEL 0x0F - PCI Slot */
+				0x7800 0x0 0x0 0x1 &ipic 20 0x8 /* PCI_INTA */
+				0x7800 0x0 0x0 0x2 &ipic 21 0x8 /* PCI_INTB */
+				 >;
+		interrupt-parent = <&ipic>;
+		interrupts = <67 0x8>;
+		bus-range = <0x1 0x1>;
+		ranges = <0x42000000 0x0 0xa0000000 0xa0000000 0x0 0x10000000
+			  0x02000000 0x0 0xb0000000 0xb0000000 0x0 0x10000000
+			  0x01000000 0x0 0x00000000 0xe3000000 0x0 0x01000000>;
+		clock-frequency = <66666666>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xe0008600 0x100		/* internal registers */
+		       0xe0008380 0x8>;		/* config space access registers */
+		compatible = "fsl,mpc8349-pci";
+		device_type = "pci";
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc836x_rdk.dts b/arch/powerpc/boot/dts/mpc836x_rdk.dts
new file mode 100644
index 0000000000..a0cc195348
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc836x_rdk.dts
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8360E RDK Device Tree Source
+ *
+ * Copyright 2006 Freescale Semiconductor Inc.
+ * Copyright 2007-2008 MontaVista Software, Inc.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,mpc8360rdk";
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8360@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <32768>;
+			i-cache-size = <32768>;
+			/* filled by u-boot */
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		/* filled by u-boot */
+		reg = <0 0>;
+	};
+
+	soc@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,mpc8360-immr", "fsl,immr", "fsl,soc",
+			     "simple-bus";
+		ranges = <0 0xe0000000 0x200000>;
+		reg = <0xe0000000 0x200>;
+		/* filled by u-boot */
+		bus-frequency = <0>;
+
+		wdt@200 {
+			compatible = "mpc83xx_wdt";
+			reg = <0x200 0x100>;
+		};
+
+		pmc: power@b00 {
+			compatible = "fsl,mpc8360-pmc", "fsl,mpc8349-pmc";
+			reg = <0xb00 0x100 0xa00 0x100>;
+			interrupts = <80 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <14 8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <16 8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+		};
+
+		serial0: serial@4500 {
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			interrupts = <9 8>;
+			interrupt-parent = <&ipic>;
+			/* filled by u-boot */
+			clock-frequency = <0>;
+		};
+
+		serial1: serial@4600 {
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			interrupts = <10 8>;
+			interrupt-parent = <&ipic>;
+			/* filled by u-boot */
+			clock-frequency = <0>;
+		};
+
+		dma@82a8 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8360-dma", "fsl,elo-dma";
+			reg = <0x82a8 4>;
+			ranges = <0 0x8100 0x1a8>;
+			interrupt-parent = <&ipic>;
+			interrupts = <71 8>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8360-dma-channel", "fsl,elo-dma-channel";
+				reg = <0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8360-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8360-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8360-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x180 0x28>;
+				cell-index = <3>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <11 0x8>;
+			interrupt-parent = <&ipic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x7e>;
+			fsl,descriptor-types-mask = <0x01010ebf>;
+			sleep = <&pmc 0x03000000>;
+		};
+
+		ipic: interrupt-controller@700 {
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			compatible = "fsl,pq2pro-pic", "fsl,ipic";
+			interrupt-controller;
+			reg = <0x700 0x100>;
+		};
+
+		qe_pio_b: gpio-controller@1418 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8360-qe-pario-bank",
+				     "fsl,mpc8323-qe-pario-bank";
+			reg = <0x1418 0x18>;
+			gpio-controller;
+		};
+
+		qe_pio_e: gpio-controller@1460 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8360-qe-pario-bank",
+				     "fsl,mpc8323-qe-pario-bank";
+			reg = <0x1460 0x18>;
+			gpio-controller;
+		};
+
+		qe@100000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			device_type = "qe";
+			compatible = "fsl,qe", "simple-bus";
+			ranges = <0 0x100000 0x100000>;
+			reg = <0x100000 0x480>;
+			/* filled by u-boot */
+			clock-frequency = <0>;
+			bus-frequency = <0>;
+			brg-frequency = <0>;
+			fsl,qe-num-riscs = <2>;
+			fsl,qe-num-snums = <28>;
+
+			muram@10000 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "fsl,qe-muram", "fsl,cpm-muram";
+				ranges = <0 0x10000 0xc000>;
+
+				data-only@0 {
+					compatible = "fsl,qe-muram-data",
+						     "fsl,cpm-muram-data";
+					reg = <0 0xc000>;
+				};
+			};
+
+			timer@440 {
+				compatible = "fsl,mpc8360-qe-gtm",
+					     "fsl,qe-gtm", "fsl,gtm";
+				reg = <0x440 0x40>;
+				interrupts = <12 13 14 15>;
+				interrupt-parent = <&qeic>;
+				clock-frequency = <166666666>;
+			};
+
+			usb@6c0 {
+				compatible = "fsl,mpc8360-qe-usb",
+					     "fsl,mpc8323-qe-usb";
+				reg = <0x6c0 0x40 0x8b00 0x100>;
+				interrupts = <11>;
+				interrupt-parent = <&qeic>;
+				fsl,fullspeed-clock = "clk21";
+				gpios = <&qe_pio_b  2 0 /* USBOE */
+					 &qe_pio_b  3 0 /* USBTP */
+					 &qe_pio_b  8 0 /* USBTN */
+					 &qe_pio_b  9 0 /* USBRP */
+					 &qe_pio_b 11 0 /* USBRN */
+					 &qe_pio_e 20 0 /* SPEED */
+					 &qe_pio_e 21 1 /* POWER */>;
+			};
+
+			spi@4c0 {
+				cell-index = <0>;
+				compatible = "fsl,spi";
+				reg = <0x4c0 0x40>;
+				interrupts = <2>;
+				interrupt-parent = <&qeic>;
+				mode = "cpu-qe";
+			};
+
+			spi@500 {
+				cell-index = <1>;
+				compatible = "fsl,spi";
+				reg = <0x500 0x40>;
+				interrupts = <1>;
+				interrupt-parent = <&qeic>;
+				mode = "cpu-qe";
+			};
+
+			enet0: ucc@2000 {
+				device_type = "network";
+				compatible = "ucc_geth";
+				cell-index = <1>;
+				reg = <0x2000 0x200>;
+				interrupts = <32>;
+				interrupt-parent = <&qeic>;
+				rx-clock-name = "none";
+				tx-clock-name = "clk9";
+				phy-handle = <&phy2>;
+				phy-connection-type = "rgmii-rxid";
+				/* filled by u-boot */
+				local-mac-address = [ 00 00 00 00 00 00 ];
+			};
+
+			enet1: ucc@3000 {
+				device_type = "network";
+				compatible = "ucc_geth";
+				cell-index = <2>;
+				reg = <0x3000 0x200>;
+				interrupts = <33>;
+				interrupt-parent = <&qeic>;
+				rx-clock-name = "none";
+				tx-clock-name = "clk4";
+				phy-handle = <&phy4>;
+				phy-connection-type = "rgmii-rxid";
+				/* filled by u-boot */
+				local-mac-address = [ 00 00 00 00 00 00 ];
+			};
+
+			enet2: ucc@2600 {
+				device_type = "network";
+				compatible = "ucc_geth";
+				cell-index = <7>;
+				reg = <0x2600 0x200>;
+				interrupts = <42>;
+				interrupt-parent = <&qeic>;
+				rx-clock-name = "clk20";
+				tx-clock-name = "clk19";
+				phy-handle = <&phy1>;
+				phy-connection-type = "mii";
+				/* filled by u-boot */
+				local-mac-address = [ 00 00 00 00 00 00 ];
+			};
+
+			enet3: ucc@3200 {
+				device_type = "network";
+				compatible = "ucc_geth";
+				cell-index = <4>;
+				reg = <0x3200 0x200>;
+				interrupts = <35>;
+				interrupt-parent = <&qeic>;
+				rx-clock-name = "clk8";
+				tx-clock-name = "clk7";
+				phy-handle = <&phy3>;
+				phy-connection-type = "mii";
+				/* filled by u-boot */
+				local-mac-address = [ 00 00 00 00 00 00 ];
+			};
+
+			mdio@2120 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,ucc-mdio";
+				reg = <0x2120 0x18>;
+
+				phy1: ethernet-phy@1 {
+					compatible = "national,DP83848VV";
+					reg = <1>;
+				};
+
+				phy2: ethernet-phy@2 {
+					compatible = "broadcom,BCM5481UA2KMLG";
+					reg = <2>;
+				};
+
+				phy3: ethernet-phy@3 {
+					compatible = "national,DP83848VV";
+					reg = <3>;
+				};
+
+				phy4: ethernet-phy@4 {
+					compatible = "broadcom,BCM5481UA2KMLG";
+					reg = <4>;
+				};
+			};
+
+			serial2: ucc@2400 {
+				device_type = "serial";
+				compatible = "ucc_uart";
+				reg = <0x2400 0x200>;
+				cell-index = <5>;
+				port-number = <0>;
+				rx-clock-name = "brg7";
+				tx-clock-name = "brg8";
+				interrupts = <40>;
+				interrupt-parent = <&qeic>;
+				soft-uart;
+			};
+
+			serial3: ucc@3400 {
+				device_type = "serial";
+				compatible = "ucc_uart";
+				reg = <0x3400 0x200>;
+				cell-index = <6>;
+				port-number = <1>;
+				rx-clock-name = "brg13";
+				tx-clock-name = "brg14";
+				interrupts = <41>;
+				interrupt-parent = <&qeic>;
+				soft-uart;
+			};
+
+			qeic: interrupt-controller@80 {
+				#address-cells = <0>;
+				#interrupt-cells = <1>;
+				compatible = "fsl,qe-ic";
+				interrupt-controller;
+				reg = <0x80 0x80>;
+				big-endian;
+				interrupts = <32 8 33 8>;
+				interrupt-parent = <&ipic>;
+			};
+		};
+	};
+
+	localbus@e0005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8360-localbus", "fsl,pq2pro-localbus",
+			     "simple-bus";
+		reg = <0xe0005000 0xd8>;
+		ranges = <0 0 0xff800000 0x0800000
+			  1 0 0x60000000 0x0001000
+			  2 0 0x70000000 0x4000000>;
+
+		flash@0,0 {
+			compatible = "intel,PC28F640P30T85", "cfi-flash";
+			reg = <0 0 0x800000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		upm@1,0 {
+			compatible = "fsl,upm-nand";
+			reg = <1 0 1>;
+			fsl,upm-addr-offset = <16>;
+			fsl,upm-cmd-offset = <8>;
+			gpios = <&qe_pio_e 18 0>;
+
+			flash {
+				compatible = "st,nand512-a";
+			};
+		};
+
+		display@2,0 {
+			device_type = "display";
+			compatible = "fujitsu,MB86277", "fujitsu,mint";
+			reg = <2 0 0x4000000>;
+			fujitsu,sh3;
+			little-endian;
+			/* filled by u-boot */
+			address = <0>;
+			depth = <0>;
+			width = <0>;
+			height = <0>;
+			linebytes = <0>;
+			/* linux,opened; - added by uboot */
+		};
+	};
+
+	pci0: pci@e0008500 {
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		compatible = "fsl,mpc8360-pci", "fsl,mpc8349-pci";
+		reg = <0xe0008500 0x100		/* internal registers */
+		       0xe0008300 0x8>;		/* config space access registers */
+		ranges = <0x02000000 0 0x90000000 0x90000000 0 0x10000000
+			  0x42000000 0 0x80000000 0x80000000 0 0x10000000
+			  0x01000000 0 0xe0300000 0xe0300000 0 0x00100000>;
+		interrupts = <66 8>;
+		interrupt-parent = <&ipic>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = </* miniPCI0 IDSEL 0x14 AD20 */
+				 0xa000 0 0 1 &ipic 18 8
+				 0xa000 0 0 2 &ipic 19 8
+
+				 /* PCI1 IDSEL 0x15 AD21 */
+				 0xa800 0 0 1 &ipic 19 8
+				 0xa800 0 0 2 &ipic 20 8
+				 0xa800 0 0 3 &ipic 21 8
+				 0xa800 0 0 4 &ipic 18 8>;
+		sleep = <&pmc 0x00010000>;
+		/* filled by u-boot */
+		bus-range = <0 0>;
+		clock-frequency = <0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc8377_rdb.dts b/arch/powerpc/boot/dts/mpc8377_rdb.dts
new file mode 100644
index 0000000000..7df452efa9
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc8377_rdb.dts
@@ -0,0 +1,498 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8377E RDB Device Tree Source
+ *
+ * Copyright 2007, 2008 Freescale Semiconductor Inc.
+ */
+
+/dts-v1/;
+
+/ {
+	compatible = "fsl,mpc8377rdb";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8377@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <32768>;
+			i-cache-size = <32768>;
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>;	// 256MB at 0
+	};
+
+	localbus@e0005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8377-elbc", "fsl,elbc", "simple-bus";
+		reg = <0xe0005000 0x1000>;
+		interrupts = <77 0x8>;
+		interrupt-parent = <&ipic>;
+
+		// CS0 and CS1 are swapped when
+		// booting from nand, but the
+		// addresses are the same.
+		ranges = <0x0 0x0 0xfe000000 0x00800000
+		          0x1 0x0 0xe0600000 0x00008000
+		          0x2 0x0 0xf0000000 0x00020000
+		          0x3 0x0 0xfa000000 0x00008000>;
+
+		flash@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x800000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8377-fcm-nand",
+			             "fsl,elbc-fcm-nand";
+			reg = <0x1 0x0 0x8000>;
+
+			u-boot@0 {
+				reg = <0x0 0x100000>;
+				read-only;
+			};
+
+			kernel@100000 {
+				reg = <0x100000 0x300000>;
+			};
+			fs@400000 {
+				reg = <0x400000 0x1c00000>;
+			};
+		};
+	};
+
+	immr@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "simple-bus";
+		ranges = <0x0 0xe0000000 0x00100000>;
+		reg = <0xe0000000 0x00000200>;
+		bus-frequency = <0>;
+
+		wdt@200 {
+			device_type = "watchdog";
+			compatible = "mpc83xx_wdt";
+			reg = <0x200 0x100>;
+		};
+
+		gpio1: gpio-controller@c00 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8377-gpio", "fsl,mpc8349-gpio";
+			reg = <0xc00 0x100>;
+			interrupts = <74 0x8>;
+			interrupt-parent = <&ipic>;
+			gpio-controller;
+		};
+
+		gpio2: gpio-controller@d00 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8377-gpio", "fsl,mpc8349-gpio";
+			reg = <0xd00 0x100>;
+			interrupts = <75 0x8>;
+			interrupt-parent = <&ipic>;
+			gpio-controller;
+		};
+
+		sleep-nexus {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "simple-bus";
+			sleep = <&pmc 0x0c000000>;
+			ranges;
+
+			i2c@3000 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				cell-index = <0>;
+				compatible = "fsl-i2c";
+				reg = <0x3000 0x100>;
+				interrupts = <14 0x8>;
+				interrupt-parent = <&ipic>;
+				dfsrr;
+
+				dtt@48 {
+					compatible = "national,lm75";
+					reg = <0x48>;
+				};
+
+				at24@50 {
+					compatible = "atmel,24c256";
+					reg = <0x50>;
+				};
+
+				rtc@68 {
+					compatible = "dallas,ds1339";
+					reg = <0x68>;
+				};
+
+				mcu_pio: mcu@a {
+					#gpio-cells = <2>;
+					compatible = "fsl,mc9s08qg8-mpc8377erdb",
+						     "fsl,mcu-mpc8349emitx";
+					reg = <0x0a>;
+					gpio-controller;
+				};
+			};
+
+			sdhci@2e000 {
+				compatible = "fsl,mpc8377-esdhc", "fsl,esdhc";
+				reg = <0x2e000 0x1000>;
+				interrupts = <42 0x8>;
+				interrupt-parent = <&ipic>;
+				sdhci,wp-inverted;
+				/* Filled in by U-Boot */
+				clock-frequency = <111111111>;
+			};
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <15 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+		};
+
+		spi@7000 {
+			cell-index = <0>;
+			compatible = "fsl,spi";
+			reg = <0x7000 0x1000>;
+			interrupts = <16 0x8>;
+			interrupt-parent = <&ipic>;
+			mode = "cpu";
+		};
+
+		dma@82a8 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8377-dma", "fsl,elo-dma";
+			reg = <0x82a8 4>;
+			ranges = <0 0x8100 0x1a8>;
+			interrupt-parent = <&ipic>;
+			interrupts = <71 8>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8377-dma-channel", "fsl,elo-dma-channel";
+				reg = <0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8377-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8377-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8377-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x180 0x28>;
+				cell-index = <3>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+		};
+
+		usb@23000 {
+			compatible = "fsl-usb2-dr";
+			reg = <0x23000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupt-parent = <&ipic>;
+			interrupts = <38 0x8>;
+			phy_type = "ulpi";
+			sleep = <&pmc 0x00c00000>;
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <32 0x8 33 0x8 34 0x8>;
+			phy-connection-type = "mii";
+			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy2>;
+			sleep = <&pmc 0xc0000000>;
+			fsl,magic-packet;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy2: ethernet-phy@2 {
+					interrupt-parent = <&ipic>;
+					interrupts = <17 0x8>;
+					reg = <0x2>;
+				};
+
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 0x8 36 0x8 37 0x8>;
+			phy-connection-type = "mii";
+			interrupt-parent = <&ipic>;
+			fixed-link = <1 1 1000 0 0>;
+			tbi-handle = <&tbi1>;
+			sleep = <&pmc 0x30000000>;
+			fsl,magic-packet;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <9 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <10 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.0", "fsl,sec2.4", "fsl,sec2.2",
+				     "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <11 0x8>;
+			interrupt-parent = <&ipic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x9fe>;
+			fsl,descriptor-types-mask = <0x3ab0ebf>;
+			sleep = <&pmc 0x03000000>;
+		};
+
+		sata@18000 {
+			compatible = "fsl,mpc8377-sata", "fsl,pq-sata";
+			reg = <0x18000 0x1000>;
+			interrupts = <44 0x8>;
+			interrupt-parent = <&ipic>;
+			sleep = <&pmc 0x000000c0>;
+		};
+
+		sata@19000 {
+			compatible = "fsl,mpc8377-sata", "fsl,pq-sata";
+			reg = <0x19000 0x1000>;
+			interrupts = <45 0x8>;
+			interrupt-parent = <&ipic>;
+			sleep = <&pmc 0x00000030>;
+		};
+
+		/* IPIC
+		 * interrupts cell = <intr #, sense>
+		 * sense values match linux IORESOURCE_IRQ_* defines:
+		 * sense == 8: Level, low assertion
+		 * sense == 2: Edge, high-to-low change
+		 */
+		ipic: interrupt-controller@700 {
+			compatible = "fsl,ipic";
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x700 0x100>;
+		};
+
+		pmc: power@b00 {
+			compatible = "fsl,mpc8377-pmc", "fsl,mpc8349-pmc";
+			reg = <0xb00 0x100 0xa00 0x100>;
+			interrupts = <80 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+	};
+
+	pci0: pci@e0008500 {
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+				/* IRQ5 = 21 = 0x15, IRQ6 = 0x16, IRQ7 = 23 = 0x17 */
+
+				/* IDSEL AD14 IRQ6 inta */
+				 0x7000 0x0 0x0 0x1 &ipic 22 0x8
+
+				/* IDSEL AD15 IRQ5 inta, IRQ6 intb, IRQ7 intd */
+				 0x7800 0x0 0x0 0x1 &ipic 21 0x8
+				 0x7800 0x0 0x0 0x2 &ipic 22 0x8
+				 0x7800 0x0 0x0 0x4 &ipic 23 0x8
+
+				/* IDSEL AD28 IRQ7 inta, IRQ5 intb IRQ6 intc*/
+				 0xE000 0x0 0x0 0x1 &ipic 23 0x8
+				 0xE000 0x0 0x0 0x2 &ipic 21 0x8
+				 0xE000 0x0 0x0 0x3 &ipic 22 0x8>;
+		interrupt-parent = <&ipic>;
+		interrupts = <66 0x8>;
+		bus-range = <0 0>;
+		ranges = <0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000
+		          0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000
+		          0x01000000 0x0 0x00000000 0xe0300000 0x0 0x00100000>;
+		sleep = <&pmc 0x00010000>;
+		clock-frequency = <66666666>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xe0008500 0x100		/* internal registers */
+		       0xe0008300 0x8>;		/* config space access registers */
+		compatible = "fsl,mpc8349-pci";
+		device_type = "pci";
+	};
+
+	pci1: pcie@e0009000 {
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		compatible = "fsl,mpc8377-pcie", "fsl,mpc8314-pcie";
+		reg = <0xe0009000 0x00001000>;
+		ranges = <0x02000000 0 0xa8000000 0xa8000000 0 0x10000000
+		          0x01000000 0 0x00000000 0xb8000000 0 0x00800000>;
+		bus-range = <0 255>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0 0 0 1 &ipic 1 8
+				 0 0 0 2 &ipic 1 8
+				 0 0 0 3 &ipic 1 8
+				 0 0 0 4 &ipic 1 8>;
+		sleep = <&pmc 0x00300000>;
+		clock-frequency = <0>;
+
+		pcie@0 {
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			reg = <0 0 0 0 0>;
+			ranges = <0x02000000 0 0xa8000000
+				  0x02000000 0 0xa8000000
+				  0 0x10000000
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00800000>;
+		};
+	};
+
+	pci2: pcie@e000a000 {
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		compatible = "fsl,mpc8377-pcie", "fsl,mpc8314-pcie";
+		reg = <0xe000a000 0x00001000>;
+		ranges = <0x02000000 0 0xc8000000 0xc8000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xd8000000 0 0x00800000>;
+		bus-range = <0 255>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0 0 0 1 &ipic 2 8
+				 0 0 0 2 &ipic 2 8
+				 0 0 0 3 &ipic 2 8
+				 0 0 0 4 &ipic 2 8>;
+		sleep = <&pmc 0x000c0000>;
+		clock-frequency = <0>;
+
+		pcie@0 {
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			reg = <0 0 0 0 0>;
+			ranges = <0x02000000 0 0xc8000000
+				  0x02000000 0 0xc8000000
+				  0 0x10000000
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00800000>;
+		};
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		pwr {
+			gpios = <&mcu_pio 0 0>;
+			default-state = "on";
+		};
+
+		hdd {
+			gpios = <&mcu_pio 1 0>;
+			linux,default-trigger = "disk-activity";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc8377_wlan.dts b/arch/powerpc/boot/dts/mpc8377_wlan.dts
new file mode 100644
index 0000000000..d8e7d40aea
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc8377_wlan.dts
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8377E WLAN Device Tree Source
+ *
+ * Copyright 2007-2009 Freescale Semiconductor Inc.
+ * Copyright 2009 MontaVista Software, Inc.
+ */
+
+/dts-v1/;
+
+/ {
+	compatible = "fsl,mpc8377wlan";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8377@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <32768>;
+			i-cache-size = <32768>;
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x20000000>;	// 512MB at 0
+	};
+
+	localbus@e0005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8377-elbc", "fsl,elbc", "simple-bus";
+		reg = <0xe0005000 0x1000>;
+		interrupts = <77 0x8>;
+		interrupt-parent = <&ipic>;
+		ranges = <0x0 0x0 0xfc000000 0x04000000>;
+
+		flash@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x4000000>;
+			bank-width = <2>;
+			device-width = <1>;
+
+			partition@0 {
+				reg = <0 0x80000>;
+				label = "u-boot";
+				read-only;
+			};
+
+			partition@a0000 {
+				reg = <0xa0000 0x300000>;
+				label = "kernel";
+			};
+
+			partition@3a0000 {
+				reg = <0x3a0000 0x3c60000>;
+				label = "rootfs";
+			};
+		};
+	};
+
+	immr@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "simple-bus";
+		ranges = <0x0 0xe0000000 0x00100000>;
+		reg = <0xe0000000 0x00000200>;
+		bus-frequency = <0>;
+
+		wdt@200 {
+			device_type = "watchdog";
+			compatible = "mpc83xx_wdt";
+			reg = <0x200 0x100>;
+		};
+
+		gpio1: gpio-controller@c00 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8377-gpio", "fsl,mpc8349-gpio";
+			reg = <0xc00 0x100>;
+			interrupts = <74 0x8>;
+			interrupt-parent = <&ipic>;
+			gpio-controller;
+		};
+
+		gpio2: gpio-controller@d00 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8377-gpio", "fsl,mpc8349-gpio";
+			reg = <0xd00 0x100>;
+			interrupts = <75 0x8>;
+			interrupt-parent = <&ipic>;
+			gpio-controller;
+		};
+
+		sleep-nexus {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "simple-bus";
+			sleep = <&pmc 0x0c000000>;
+			ranges;
+
+			i2c@3000 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				cell-index = <0>;
+				compatible = "fsl-i2c";
+				reg = <0x3000 0x100>;
+				interrupts = <14 0x8>;
+				interrupt-parent = <&ipic>;
+				dfsrr;
+
+				at24@50 {
+					compatible = "atmel,24c256";
+					reg = <0x50>;
+				};
+
+				rtc@68 {
+					compatible = "dallas,ds1339";
+					reg = <0x68>;
+				};
+			};
+
+			sdhci@2e000 {
+				compatible = "fsl,mpc8377-esdhc", "fsl,esdhc";
+				reg = <0x2e000 0x1000>;
+				interrupts = <42 0x8>;
+				interrupt-parent = <&ipic>;
+				sdhci,wp-inverted;
+				clock-frequency = <133333333>;
+			};
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <15 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+		};
+
+		spi@7000 {
+			cell-index = <0>;
+			compatible = "fsl,spi";
+			reg = <0x7000 0x1000>;
+			interrupts = <16 0x8>;
+			interrupt-parent = <&ipic>;
+			mode = "cpu";
+		};
+
+		dma@82a8 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8377-dma", "fsl,elo-dma";
+			reg = <0x82a8 4>;
+			ranges = <0 0x8100 0x1a8>;
+			interrupt-parent = <&ipic>;
+			interrupts = <71 8>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8377-dma-channel", "fsl,elo-dma-channel";
+				reg = <0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8377-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8377-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8377-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x180 0x28>;
+				cell-index = <3>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+		};
+
+		usb@23000 {
+			compatible = "fsl-usb2-dr";
+			reg = <0x23000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupt-parent = <&ipic>;
+			interrupts = <38 0x8>;
+			phy_type = "ulpi";
+			sleep = <&pmc 0x00c00000>;
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <32 0x8 33 0x8 34 0x8>;
+			phy-connection-type = "mii";
+			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy2>;
+			sleep = <&pmc 0xc0000000>;
+			fsl,magic-packet;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy2: ethernet-phy@2 {
+					interrupt-parent = <&ipic>;
+					interrupts = <17 0x8>;
+					reg = <0x2>;
+				};
+
+				phy3: ethernet-phy@3 {
+					interrupt-parent = <&ipic>;
+					interrupts = <18 0x8>;
+					reg = <0x3>;
+				};
+
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 0x8 36 0x8 37 0x8>;
+			phy-connection-type = "mii";
+			interrupt-parent = <&ipic>;
+			phy-handle = <&phy3>;
+			tbi-handle = <&tbi1>;
+			sleep = <&pmc 0x30000000>;
+			fsl,magic-packet;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <9 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <10 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.0", "fsl,sec2.4", "fsl,sec2.2",
+				     "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <11 0x8>;
+			interrupt-parent = <&ipic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x9fe>;
+			fsl,descriptor-types-mask = <0x3ab0ebf>;
+			sleep = <&pmc 0x03000000>;
+		};
+
+		sata@18000 {
+			compatible = "fsl,mpc8377-sata", "fsl,pq-sata";
+			reg = <0x18000 0x1000>;
+			interrupts = <44 0x8>;
+			interrupt-parent = <&ipic>;
+			sleep = <&pmc 0x000000c0>;
+		};
+
+		sata@19000 {
+			compatible = "fsl,mpc8377-sata", "fsl,pq-sata";
+			reg = <0x19000 0x1000>;
+			interrupts = <45 0x8>;
+			interrupt-parent = <&ipic>;
+			sleep = <&pmc 0x00000030>;
+		};
+
+		/* IPIC
+		 * interrupts cell = <intr #, sense>
+		 * sense values match linux IORESOURCE_IRQ_* defines:
+		 * sense == 8: Level, low assertion
+		 * sense == 2: Edge, high-to-low change
+		 */
+		ipic: interrupt-controller@700 {
+			compatible = "fsl,ipic";
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x700 0x100>;
+		};
+
+		pmc: power@b00 {
+			compatible = "fsl,mpc8377-pmc", "fsl,mpc8349-pmc";
+			reg = <0xb00 0x100 0xa00 0x100>;
+			interrupts = <80 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+	};
+
+	pci0: pci@e0008500 {
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+				/* IRQ5 = 21 = 0x15, IRQ6 = 0x16, IRQ7 = 23 = 0x17 */
+
+				/* IDSEL AD14 IRQ6 inta */
+				 0x7000 0x0 0x0 0x1 &ipic 22 0x8
+
+				/* IDSEL AD15 IRQ5 inta */
+				 0x7800 0x0 0x0 0x1 &ipic 21 0x8>;
+		interrupt-parent = <&ipic>;
+		interrupts = <66 0x8>;
+		bus-range = <0 0>;
+		ranges = <0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000
+		          0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000
+		          0x01000000 0x0 0x00000000 0xe0300000 0x0 0x00100000>;
+		sleep = <&pmc 0x00010000>;
+		clock-frequency = <66666666>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xe0008500 0x100		/* internal registers */
+		       0xe0008300 0x8>;		/* config space access registers */
+		compatible = "fsl,mpc8349-pci";
+		device_type = "pci";
+	};
+
+	pci1: pcie@e0009000 {
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		compatible = "fsl,mpc8377-pcie", "fsl,mpc8314-pcie";
+		reg = <0xe0009000 0x00001000>;
+		ranges = <0x02000000 0 0xa8000000 0xa8000000 0 0x10000000
+		          0x01000000 0 0x00000000 0xb8000000 0 0x00800000>;
+		bus-range = <0 255>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0 0 0 1 &ipic 1 8
+				 0 0 0 2 &ipic 1 8
+				 0 0 0 3 &ipic 1 8
+				 0 0 0 4 &ipic 1 8>;
+		sleep = <&pmc 0x00300000>;
+		clock-frequency = <0>;
+
+		pcie@0 {
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			reg = <0 0 0 0 0>;
+			ranges = <0x02000000 0 0xa8000000
+				  0x02000000 0 0xa8000000
+				  0 0x10000000
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00800000>;
+		};
+	};
+
+	pci2: pcie@e000a000 {
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		compatible = "fsl,mpc8377-pcie", "fsl,mpc8314-pcie";
+		reg = <0xe000a000 0x00001000>;
+		ranges = <0x02000000 0 0xc8000000 0xc8000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xd8000000 0 0x00800000>;
+		bus-range = <0 255>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0 0 0 1 &ipic 2 8
+				 0 0 0 2 &ipic 2 8
+				 0 0 0 3 &ipic 2 8
+				 0 0 0 4 &ipic 2 8>;
+		sleep = <&pmc 0x000c0000>;
+		clock-frequency = <0>;
+
+		pcie@0 {
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			reg = <0 0 0 0 0>;
+			ranges = <0x02000000 0 0xc8000000
+				  0x02000000 0 0xc8000000
+				  0 0x10000000
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00800000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc8378_rdb.dts b/arch/powerpc/boot/dts/mpc8378_rdb.dts
new file mode 100644
index 0000000000..bdcfe83a56
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc8378_rdb.dts
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8378E RDB Device Tree Source
+ *
+ * Copyright 2007, 2008 Freescale Semiconductor Inc.
+ */
+
+/dts-v1/;
+
+/ {
+	compatible = "fsl,mpc8378rdb";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8378@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <32768>;
+			i-cache-size = <32768>;
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>;	// 256MB at 0
+	};
+
+	localbus@e0005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8378-elbc", "fsl,elbc", "simple-bus";
+		reg = <0xe0005000 0x1000>;
+		interrupts = <77 0x8>;
+		interrupt-parent = <&ipic>;
+
+		// CS0 and CS1 are swapped when
+		// booting from nand, but the
+		// addresses are the same.
+		ranges = <0x0 0x0 0xfe000000 0x00800000
+		          0x1 0x0 0xe0600000 0x00008000
+		          0x2 0x0 0xf0000000 0x00020000
+		          0x3 0x0 0xfa000000 0x00008000>;
+
+		flash@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x800000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8378-fcm-nand",
+			             "fsl,elbc-fcm-nand";
+			reg = <0x1 0x0 0x8000>;
+
+			u-boot@0 {
+				reg = <0x0 0x100000>;
+				read-only;
+			};
+
+			kernel@100000 {
+				reg = <0x100000 0x300000>;
+			};
+			fs@400000 {
+				reg = <0x400000 0x1c00000>;
+			};
+		};
+	};
+
+	immr@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "simple-bus";
+		ranges = <0x0 0xe0000000 0x00100000>;
+		reg = <0xe0000000 0x00000200>;
+		bus-frequency = <0>;
+
+		wdt@200 {
+			device_type = "watchdog";
+			compatible = "mpc83xx_wdt";
+			reg = <0x200 0x100>;
+		};
+
+		gpio1: gpio-controller@c00 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8378-gpio", "fsl,mpc8349-gpio";
+			reg = <0xc00 0x100>;
+			interrupts = <74 0x8>;
+			interrupt-parent = <&ipic>;
+			gpio-controller;
+		};
+
+		gpio2: gpio-controller@d00 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8378-gpio", "fsl,mpc8349-gpio";
+			reg = <0xd00 0x100>;
+			interrupts = <75 0x8>;
+			interrupt-parent = <&ipic>;
+			gpio-controller;
+		};
+
+		sleep-nexus {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "simple-bus";
+			sleep = <&pmc 0x0c000000>;
+			ranges;
+
+			i2c@3000 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				cell-index = <0>;
+				compatible = "fsl-i2c";
+				reg = <0x3000 0x100>;
+				interrupts = <14 0x8>;
+				interrupt-parent = <&ipic>;
+				dfsrr;
+
+				dtt@48 {
+					compatible = "national,lm75";
+					reg = <0x48>;
+				};
+
+				at24@50 {
+					compatible = "atmel,24c256";
+					reg = <0x50>;
+				};
+
+				rtc@68 {
+					compatible = "dallas,ds1339";
+					reg = <0x68>;
+				};
+
+				mcu_pio: mcu@a {
+					#gpio-cells = <2>;
+					compatible = "fsl,mc9s08qg8-mpc8378erdb",
+						     "fsl,mcu-mpc8349emitx";
+					reg = <0x0a>;
+					gpio-controller;
+				};
+			};
+
+			sdhci@2e000 {
+				compatible = "fsl,mpc8378-esdhc", "fsl,esdhc";
+				reg = <0x2e000 0x1000>;
+				interrupts = <42 0x8>;
+				interrupt-parent = <&ipic>;
+				sdhci,wp-inverted;
+				/* Filled in by U-Boot */
+				clock-frequency = <111111111>;
+			};
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <15 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+		};
+
+		spi@7000 {
+			cell-index = <0>;
+			compatible = "fsl,spi";
+			reg = <0x7000 0x1000>;
+			interrupts = <16 0x8>;
+			interrupt-parent = <&ipic>;
+			mode = "cpu";
+		};
+
+		dma@82a8 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8378-dma", "fsl,elo-dma";
+			reg = <0x82a8 4>;
+			ranges = <0 0x8100 0x1a8>;
+			interrupt-parent = <&ipic>;
+			interrupts = <71 8>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8378-dma-channel", "fsl,elo-dma-channel";
+				reg = <0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8378-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8378-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8378-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x180 0x28>;
+				cell-index = <3>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+		};
+
+		usb@23000 {
+			compatible = "fsl-usb2-dr";
+			reg = <0x23000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupt-parent = <&ipic>;
+			interrupts = <38 0x8>;
+			phy_type = "ulpi";
+			sleep = <&pmc 0x00c00000>;
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <32 0x8 33 0x8 34 0x8>;
+			phy-connection-type = "mii";
+			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy2>;
+			sleep = <&pmc 0xc0000000>;
+			fsl,magic-packet;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy2: ethernet-phy@2 {
+					interrupt-parent = <&ipic>;
+					interrupts = <17 0x8>;
+					reg = <0x2>;
+				};
+
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 0x8 36 0x8 37 0x8>;
+			phy-connection-type = "mii";
+			interrupt-parent = <&ipic>;
+			fixed-link = <1 1 1000 0 0>;
+			tbi-handle = <&tbi1>;
+			sleep = <&pmc 0x30000000>;
+			fsl,magic-packet;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <9 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <10 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.0", "fsl,sec2.4", "fsl,sec2.2",
+				     "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <11 0x8>;
+			interrupt-parent = <&ipic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x9fe>;
+			fsl,descriptor-types-mask = <0x3ab0ebf>;
+			sleep = <&pmc 0x03000000>;
+		};
+
+		/* IPIC
+		 * interrupts cell = <intr #, sense>
+		 * sense values match linux IORESOURCE_IRQ_* defines:
+		 * sense == 8: Level, low assertion
+		 * sense == 2: Edge, high-to-low change
+		 */
+		ipic: interrupt-controller@700 {
+			compatible = "fsl,ipic";
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x700 0x100>;
+		};
+
+		pmc: power@b00 {
+			compatible = "fsl,mpc8378-pmc", "fsl,mpc8349-pmc";
+			reg = <0xb00 0x100 0xa00 0x100>;
+			interrupts = <80 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+	};
+
+	pci0: pci@e0008500 {
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+				/* IRQ5 = 21 = 0x15, IRQ6 = 0x16, IRQ7 = 23 = 0x17 */
+
+				/* IDSEL AD14 IRQ6 inta */
+				 0x7000 0x0 0x0 0x1 &ipic 22 0x8
+
+				/* IDSEL AD15 IRQ5 inta, IRQ6 intb, IRQ7 intd */
+				 0x7800 0x0 0x0 0x1 &ipic 21 0x8
+				 0x7800 0x0 0x0 0x2 &ipic 22 0x8
+				 0x7800 0x0 0x0 0x4 &ipic 23 0x8
+
+				/* IDSEL AD28 IRQ7 inta, IRQ5 intb IRQ6 intc*/
+				 0xE000 0x0 0x0 0x1 &ipic 23 0x8
+				 0xE000 0x0 0x0 0x2 &ipic 21 0x8
+				 0xE000 0x0 0x0 0x3 &ipic 22 0x8>;
+		interrupt-parent = <&ipic>;
+		interrupts = <66 0x8>;
+		bus-range = <0 0>;
+		ranges = <0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000
+		          0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000
+		          0x01000000 0x0 0x00000000 0xe0300000 0x0 0x00100000>;
+		sleep = <&pmc 0x00010000>;
+		clock-frequency = <66666666>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xe0008500 0x100		/* internal registers */
+		       0xe0008300 0x8>;		/* config space access registers */
+		compatible = "fsl,mpc8349-pci";
+		device_type = "pci";
+	};
+
+	pci1: pcie@e0009000 {
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		compatible = "fsl,mpc8378-pcie", "fsl,mpc8314-pcie";
+		reg = <0xe0009000 0x00001000>;
+		ranges = <0x02000000 0 0xa8000000 0xa8000000 0 0x10000000
+		          0x01000000 0 0x00000000 0xb8000000 0 0x00800000>;
+		bus-range = <0 255>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0 0 0 1 &ipic 1 8
+				 0 0 0 2 &ipic 1 8
+				 0 0 0 3 &ipic 1 8
+				 0 0 0 4 &ipic 1 8>;
+		sleep = <&pmc 0x00300000>;
+		clock-frequency = <0>;
+
+		pcie@0 {
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			reg = <0 0 0 0 0>;
+			ranges = <0x02000000 0 0xa8000000
+				  0x02000000 0 0xa8000000
+				  0 0x10000000
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00800000>;
+		};
+	};
+
+	pci2: pcie@e000a000 {
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		compatible = "fsl,mpc8378-pcie", "fsl,mpc8314-pcie";
+		reg = <0xe000a000 0x00001000>;
+		ranges = <0x02000000 0 0xc8000000 0xc8000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xd8000000 0 0x00800000>;
+		bus-range = <0 255>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0 0 0 1 &ipic 2 8
+				 0 0 0 2 &ipic 2 8
+				 0 0 0 3 &ipic 2 8
+				 0 0 0 4 &ipic 2 8>;
+		sleep = <&pmc 0x000c0000>;
+		clock-frequency = <0>;
+
+		pcie@0 {
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			reg = <0 0 0 0 0>;
+			ranges = <0x02000000 0 0xc8000000
+				  0x02000000 0 0xc8000000
+				  0 0x10000000
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00800000>;
+		};
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		pwr {
+			gpios = <&mcu_pio 0 0>;
+			default-state = "on";
+		};
+
+		hdd {
+			gpios = <&mcu_pio 1 0>;
+			linux,default-trigger = "disk-activity";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc8379_rdb.dts b/arch/powerpc/boot/dts/mpc8379_rdb.dts
new file mode 100644
index 0000000000..a5f702304a
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc8379_rdb.dts
@@ -0,0 +1,448 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8379E RDB Device Tree Source
+ *
+ * Copyright 2007, 2008 Freescale Semiconductor Inc.
+ */
+
+/dts-v1/;
+
+/ {
+	compatible = "fsl,mpc8379rdb";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8379@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <32768>;
+			i-cache-size = <32768>;
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>;	// 256MB at 0
+	};
+
+	localbus@e0005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8379-elbc", "fsl,elbc", "simple-bus";
+		reg = <0xe0005000 0x1000>;
+		interrupts = <77 0x8>;
+		interrupt-parent = <&ipic>;
+
+		// CS0 and CS1 are swapped when
+		// booting from nand, but the
+		// addresses are the same.
+		ranges = <0x0 0x0 0xfe000000 0x00800000
+		          0x1 0x0 0xe0600000 0x00008000
+		          0x2 0x0 0xf0000000 0x00020000
+		          0x3 0x0 0xfa000000 0x00008000>;
+
+		flash@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x800000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8379-fcm-nand",
+			             "fsl,elbc-fcm-nand";
+			reg = <0x1 0x0 0x8000>;
+
+			u-boot@0 {
+				reg = <0x0 0x100000>;
+				read-only;
+			};
+
+			kernel@100000 {
+				reg = <0x100000 0x300000>;
+			};
+			fs@400000 {
+				reg = <0x400000 0x1c00000>;
+			};
+		};
+	};
+
+	immr@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "simple-bus";
+		ranges = <0x0 0xe0000000 0x00100000>;
+		reg = <0xe0000000 0x00000200>;
+		bus-frequency = <0>;
+
+		wdt@200 {
+			device_type = "watchdog";
+			compatible = "mpc83xx_wdt";
+			reg = <0x200 0x100>;
+		};
+
+		gpio1: gpio-controller@c00 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8379-gpio", "fsl,mpc8349-gpio";
+			reg = <0xc00 0x100>;
+			interrupts = <74 0x8>;
+			interrupt-parent = <&ipic>;
+			gpio-controller;
+		};
+
+		gpio2: gpio-controller@d00 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8379-gpio", "fsl,mpc8349-gpio";
+			reg = <0xd00 0x100>;
+			interrupts = <75 0x8>;
+			interrupt-parent = <&ipic>;
+			gpio-controller;
+		};
+
+		sleep-nexus {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "simple-bus";
+			sleep = <&pmc 0x0c000000>;
+			ranges;
+
+			i2c@3000 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				cell-index = <0>;
+				compatible = "fsl-i2c";
+				reg = <0x3000 0x100>;
+				interrupts = <14 0x8>;
+				interrupt-parent = <&ipic>;
+				dfsrr;
+
+				dtt@48 {
+					compatible = "national,lm75";
+					reg = <0x48>;
+				};
+
+				at24@50 {
+					compatible = "atmel,24c256";
+					reg = <0x50>;
+				};
+
+				rtc@68 {
+					compatible = "dallas,ds1339";
+					reg = <0x68>;
+				};
+
+				mcu_pio: mcu@a {
+					#gpio-cells = <2>;
+					compatible = "fsl,mc9s08qg8-mpc8379erdb",
+						     "fsl,mcu-mpc8349emitx";
+					reg = <0x0a>;
+					gpio-controller;
+				};
+			};
+
+			sdhci@2e000 {
+				compatible = "fsl,mpc8379-esdhc", "fsl,esdhc";
+				reg = <0x2e000 0x1000>;
+				interrupts = <42 0x8>;
+				interrupt-parent = <&ipic>;
+				sdhci,wp-inverted;
+				/* Filled in by U-Boot */
+				clock-frequency = <111111111>;
+			};
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <15 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+		};
+
+		spi@7000 {
+			cell-index = <0>;
+			compatible = "fsl,spi";
+			reg = <0x7000 0x1000>;
+			interrupts = <16 0x8>;
+			interrupt-parent = <&ipic>;
+			mode = "cpu";
+		};
+
+		dma@82a8 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8379-dma", "fsl,elo-dma";
+			reg = <0x82a8 4>;
+			ranges = <0 0x8100 0x1a8>;
+			interrupt-parent = <&ipic>;
+			interrupts = <71 8>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8379-dma-channel", "fsl,elo-dma-channel";
+				reg = <0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8379-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8379-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8379-dma-channel", "fsl,elo-dma-channel";
+				reg = <0x180 0x28>;
+				cell-index = <3>;
+				interrupt-parent = <&ipic>;
+				interrupts = <71 8>;
+			};
+		};
+
+		usb@23000 {
+			compatible = "fsl-usb2-dr";
+			reg = <0x23000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupt-parent = <&ipic>;
+			interrupts = <38 0x8>;
+			phy_type = "ulpi";
+			sleep = <&pmc 0x00c00000>;
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <32 0x8 33 0x8 34 0x8>;
+			phy-connection-type = "mii";
+			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy2>;
+			sleep = <&pmc 0xc0000000>;
+			fsl,magic-packet;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy2: ethernet-phy@2 {
+					interrupt-parent = <&ipic>;
+					interrupts = <17 0x8>;
+					reg = <0x2>;
+				};
+
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 0x8 36 0x8 37 0x8>;
+			phy-connection-type = "mii";
+			interrupt-parent = <&ipic>;
+			fixed-link = <1 1 1000 0 0>;
+			tbi-handle = <&tbi1>;
+			sleep = <&pmc 0x30000000>;
+			fsl,magic-packet;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <9 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <10 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.0", "fsl,sec2.4", "fsl,sec2.2",
+				     "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <11 0x8>;
+			interrupt-parent = <&ipic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x9fe>;
+			fsl,descriptor-types-mask = <0x3ab0ebf>;
+			sleep = <&pmc 0x03000000>;
+		};
+
+		sata@18000 {
+			compatible = "fsl,mpc8379-sata", "fsl,pq-sata";
+			reg = <0x18000 0x1000>;
+			interrupts = <44 0x8>;
+			interrupt-parent = <&ipic>;
+			sleep = <&pmc 0x000000c0>;
+		};
+
+		sata@19000 {
+			compatible = "fsl,mpc8379-sata", "fsl,pq-sata";
+			reg = <0x19000 0x1000>;
+			interrupts = <45 0x8>;
+			interrupt-parent = <&ipic>;
+			sleep = <&pmc 0x00000030>;
+		};
+
+		sata@1a000 {
+			compatible = "fsl,mpc8379-sata", "fsl,pq-sata";
+			reg = <0x1a000 0x1000>;
+			interrupts = <46 0x8>;
+			interrupt-parent = <&ipic>;
+			sleep = <&pmc 0x0000000c>;
+		};
+
+		sata@1b000 {
+			compatible = "fsl,mpc8379-sata", "fsl,pq-sata";
+			reg = <0x1b000 0x1000>;
+			interrupts = <47 0x8>;
+			interrupt-parent = <&ipic>;
+			sleep = <&pmc 0x00000003>;
+		};
+
+		/* IPIC
+		 * interrupts cell = <intr #, sense>
+		 * sense values match linux IORESOURCE_IRQ_* defines:
+		 * sense == 8: Level, low assertion
+		 * sense == 2: Edge, high-to-low change
+		 */
+		ipic: interrupt-controller@700 {
+			compatible = "fsl,ipic";
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x700 0x100>;
+		};
+
+		pmc: power@b00 {
+			compatible = "fsl,mpc8379-pmc", "fsl,mpc8349-pmc";
+			reg = <0xb00 0x100 0xa00 0x100>;
+			interrupts = <80 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+	};
+
+	pci0: pci@e0008500 {
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+				/* IRQ5 = 21 = 0x15, IRQ6 = 0x16, IRQ7 = 23 = 0x17 */
+
+				/* IDSEL AD14 IRQ6 inta */
+				 0x7000 0x0 0x0 0x1 &ipic 22 0x8
+
+				/* IDSEL AD15 IRQ5 inta, IRQ6 intb, IRQ7 intd */
+				 0x7800 0x0 0x0 0x1 &ipic 21 0x8
+				 0x7800 0x0 0x0 0x2 &ipic 22 0x8
+				 0x7800 0x0 0x0 0x4 &ipic 23 0x8
+
+				/* IDSEL AD28 IRQ7 inta, IRQ5 intb IRQ6 intc*/
+				 0xE000 0x0 0x0 0x1 &ipic 23 0x8
+				 0xE000 0x0 0x0 0x2 &ipic 21 0x8
+				 0xE000 0x0 0x0 0x3 &ipic 22 0x8>;
+		interrupt-parent = <&ipic>;
+		interrupts = <66 0x8>;
+		bus-range = <0x0 0x0>;
+		ranges = <0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000
+		          0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000
+		          0x01000000 0x0 0x00000000 0xe0300000 0x0 0x00100000>;
+		sleep = <&pmc 0x00010000>;
+		clock-frequency = <66666666>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xe0008500 0x100		/* internal registers */
+		       0xe0008300 0x8>;		/* config space access registers */
+		compatible = "fsl,mpc8349-pci";
+		device_type = "pci";
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		pwr {
+			gpios = <&mcu_pio 0 0>;
+			default-state = "on";
+		};
+
+		hdd {
+			gpios = <&mcu_pio 1 0>;
+			linux,default-trigger = "disk-activity";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc866ads.dts b/arch/powerpc/boot/dts/mpc866ads.dts
new file mode 100644
index 0000000000..ff60d678c6
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc866ads.dts
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC866 ADS Device Tree Source
+ *
+ * Copyright 2006 MontaVista Software, Inc.
+ * Copyright 2008 Freescale Semiconductor, Inc.
+ */
+
+/dts-v1/;
+
+/ {
+	model = "MPC866ADS";
+	compatible = "fsl,mpc866ads";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,866@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <16>;	// 16 bytes
+			i-cache-line-size = <16>;	// 16 bytes
+			d-cache-size = <0x2000>;		// L1, 8K
+			i-cache-size = <0x4000>;		// L1, 16K
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			interrupts = <15 2>;	// decrementer interrupt
+			interrupt-parent = <&PIC>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x800000>;
+	};
+
+	localbus@ff000100 {
+		compatible = "fsl,mpc866-localbus", "fsl,pq1-localbus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		reg = <0xff000100 0x40>;
+
+		ranges = <
+			0x1 0x0 0xff080000 0x8000
+			0x5 0x0 0xff0a0000 0x8000
+		>;
+
+		board-control@1,0 {
+			reg = <0x1 0x0 0x20 0x5 0x300 0x4>;
+			compatible = "fsl,mpc866ads-bcsr";
+		};
+	};
+
+	soc@ff000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		ranges = <0x0 0xff000000 0x100000>;
+		reg = <0xff000000 0x200>;
+		bus-frequency = <0>;
+
+		mdio@e00 {
+			compatible = "fsl,mpc866-fec-mdio", "fsl,pq1-fec-mdio";
+			reg = <0xe00 0x188>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			PHY: ethernet-phy@f {
+				reg = <0xf>;
+			};
+		};
+
+		ethernet@e00 {
+			device_type = "network";
+			compatible = "fsl,mpc866-fec-enet",
+			             "fsl,pq1-fec-enet";
+			reg = <0xe00 0x188>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <3 1>;
+			interrupt-parent = <&PIC>;
+			phy-handle = <&PHY>;
+			linux,network-index = <0>;
+		};
+
+		PIC: pic@0 {
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			reg = <0x0 0x24>;
+			compatible = "fsl,mpc866-pic", "fsl,pq1-pic";
+		};
+
+		cpm@9c0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc866-cpm", "fsl,cpm1";
+			ranges;
+			reg = <0x9c0 0x40>;
+			brg-frequency = <0>;
+			interrupts = <0 2>;	// cpm error interrupt
+			interrupt-parent = <&CPM_PIC>;
+
+			muram@2000 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges = <0x0 0x2000 0x2000>;
+
+				data@0 {
+					compatible = "fsl,cpm-muram-data";
+					reg = <0x0 0x1c00>;
+				};
+			};
+
+			brg@9f0 {
+				compatible = "fsl,mpc866-brg",
+					     "fsl,cpm1-brg",
+					     "fsl,cpm-brg";
+				reg = <0x9f0 0x10>;
+				clock-frequency = <0>;
+			};
+
+			CPM_PIC: pic@930 {
+				interrupt-controller;
+				#address-cells = <0>;
+				#interrupt-cells = <1>;
+				interrupts = <5 2 0 2>;
+				interrupt-parent = <&PIC>;
+				reg = <0x930 0x20>;
+				compatible = "fsl,mpc866-cpm-pic",
+				             "fsl,cpm1-pic";
+			};
+
+
+			serial@a80 {
+				device_type = "serial";
+				compatible = "fsl,mpc866-smc-uart",
+				             "fsl,cpm1-smc-uart";
+				reg = <0xa80 0x10 0x3e80 0x40>;
+				interrupts = <4>;
+				interrupt-parent = <&CPM_PIC>;
+				fsl,cpm-brg = <1>;
+				fsl,cpm-command = <0x90>;
+			};
+
+			serial@a90 {
+				device_type = "serial";
+				compatible = "fsl,mpc866-smc-uart",
+				             "fsl,cpm1-smc-uart";
+				reg = <0xa90 0x10 0x3f80 0x40>;
+				interrupts = <3>;
+				interrupt-parent = <&CPM_PIC>;
+				fsl,cpm-brg = <2>;
+				fsl,cpm-command = <0xd0>;
+			};
+
+			ethernet@a00 {
+				device_type = "network";
+				compatible = "fsl,mpc866-scc-enet",
+				             "fsl,cpm1-scc-enet";
+				reg = <0xa00 0x18 0x3c00 0x100>;
+				local-mac-address = [ 00 00 00 00 00 00 ];
+				interrupts = <30>;
+				interrupt-parent = <&CPM_PIC>;
+				fsl,cpm-command = <0000>;
+				linux,network-index = <1>;
+			};
+
+			i2c@860 {
+				compatible = "fsl,mpc866-i2c",
+					     "fsl,cpm1-i2c";
+				reg = <0x860 0x20 0x3c80 0x30>;
+				interrupts = <16>;
+				interrupt-parent = <&CPM_PIC>;
+				fsl,cpm-command = <0x10>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+			};
+		};
+	};
+
+	chosen {
+		stdout-path = "/soc/cpm/serial@a80";
+	};
+};
diff --git a/arch/powerpc/boot/dts/mpc885ads.dts b/arch/powerpc/boot/dts/mpc885ads.dts
new file mode 100644
index 0000000000..be58e7f29c
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc885ads.dts
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC885 ADS Device Tree Source
+ *
+ * Copyright 2006 MontaVista Software, Inc.
+ * Copyright 2007,2008 Freescale Semiconductor, Inc.
+ */
+
+/dts-v1/;
+
+/ {
+	model = "MPC885ADS";
+	compatible = "fsl,mpc885ads";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,885@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <16>;
+			i-cache-line-size = <16>;
+			d-cache-size = <8192>;
+			i-cache-size = <8192>;
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			interrupts = <15 2>;	// decrementer interrupt
+			interrupt-parent = <&PIC>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x0>;
+	};
+
+	localbus@ff000100 {
+		compatible = "fsl,mpc885-localbus", "fsl,pq1-localbus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		reg = <0xff000100 0x40>;
+
+		ranges = <
+			0x0 0x0 0xfe000000 0x800000
+			0x1 0x0 0xff080000 0x8000
+			0x5 0x0 0xff0a0000 0x8000
+		>;
+
+		flash@0,0 {
+			compatible = "jedec-flash";
+			reg = <0x0 0x0 0x800000>;
+			bank-width = <4>;
+			device-width = <1>;
+		};
+
+		board-control@1,0 {
+			reg = <0x1 0x0 0x20 0x5 0x300 0x4>;
+			compatible = "fsl,mpc885ads-bcsr";
+		};
+	};
+
+	soc@ff000000 {
+		compatible = "fsl,mpc885", "fsl,pq1-soc";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		ranges = <0x0 0xff000000 0x28000>;
+		bus-frequency = <0>;
+
+		// Temporary -- will go away once kernel uses ranges for get_immrbase().
+		reg = <0xff000000 0x4000>;
+
+		mdio@e00 {
+			compatible = "fsl,mpc885-fec-mdio", "fsl,pq1-fec-mdio";
+			reg = <0xe00 0x188>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			PHY0: ethernet-phy@0 {
+				reg = <0x0>;
+			};
+
+			PHY1: ethernet-phy@1 {
+				reg = <0x1>;
+			};
+
+			PHY2: ethernet-phy@2 {
+				reg = <0x2>;
+			};
+		};
+
+		ethernet@e00 {
+			device_type = "network";
+			compatible = "fsl,mpc885-fec-enet",
+			             "fsl,pq1-fec-enet";
+			reg = <0xe00 0x188>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <3 1>;
+			interrupt-parent = <&PIC>;
+			phy-handle = <&PHY0>;
+			linux,network-index = <0>;
+		};
+
+		ethernet@1e00 {
+			device_type = "network";
+			compatible = "fsl,mpc885-fec-enet",
+			             "fsl,pq1-fec-enet";
+			reg = <0x1e00 0x188>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <7 1>;
+			interrupt-parent = <&PIC>;
+			phy-handle = <&PHY1>;
+			linux,network-index = <1>;
+		};
+
+		PIC: interrupt-controller@0 {
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			reg = <0x0 0x24>;
+			compatible = "fsl,mpc885-pic", "fsl,pq1-pic";
+		};
+
+		pcmcia@80 {
+			#address-cells = <3>;
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			compatible = "fsl,pq-pcmcia";
+			device_type = "pcmcia";
+			reg = <0x80 0x80>;
+			interrupt-parent = <&PIC>;
+			interrupts = <13 1>;
+		};
+
+		cpm@9c0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc885-cpm", "fsl,cpm1";
+			command-proc = <0x9c0>;
+			interrupts = <0>;	// cpm error interrupt
+			interrupt-parent = <&CPM_PIC>;
+			reg = <0x9c0 0x40>;
+			ranges;
+
+			muram@2000 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges = <0x0 0x2000 0x2000>;
+
+				data@0 {
+					compatible = "fsl,cpm-muram-data";
+					reg = <0x0 0x1c00>;
+				};
+			};
+
+			brg@9f0 {
+				compatible = "fsl,mpc885-brg",
+				             "fsl,cpm1-brg",
+				             "fsl,cpm-brg";
+				clock-frequency = <0>;
+				reg = <0x9f0 0x10>;
+			};
+
+			CPM_PIC: interrupt-controller@930 {
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				interrupts = <5 2 0 2>;
+				interrupt-parent = <&PIC>;
+				reg = <0x930 0x20>;
+				compatible = "fsl,mpc885-cpm-pic",
+				             "fsl,cpm1-pic";
+			};
+
+			serial@a80 {
+				device_type = "serial";
+				compatible = "fsl,mpc885-smc-uart",
+				             "fsl,cpm1-smc-uart";
+				reg = <0xa80 0x10 0x3e80 0x40>;
+				interrupts = <4>;
+				interrupt-parent = <&CPM_PIC>;
+				fsl,cpm-brg = <1>;
+				fsl,cpm-command = <0x90>;
+			};
+
+			serial@a90 {
+				device_type = "serial";
+				compatible = "fsl,mpc885-smc-uart",
+				             "fsl,cpm1-smc-uart";
+				reg = <0xa90 0x10 0x3f80 0x40>;
+				interrupts = <3>;
+				interrupt-parent = <&CPM_PIC>;
+				fsl,cpm-brg = <2>;
+				fsl,cpm-command = <0xd0>;
+			};
+
+			ethernet@a40 {
+				device_type = "network";
+				compatible = "fsl,mpc885-scc-enet",
+				             "fsl,cpm1-scc-enet";
+				reg = <0xa40 0x18 0x3e00 0x100>;
+				local-mac-address = [ 00 00 00 00 00 00 ];
+				interrupts = <28>;
+				interrupt-parent = <&CPM_PIC>;
+				phy-handle = <&PHY2>;
+				fsl,cpm-command = <0x80>;
+				linux,network-index = <2>;
+			};
+
+			i2c@860 {
+				compatible = "fsl,mpc885-i2c",
+					     "fsl,cpm1-i2c";
+				reg = <0x860 0x20 0x3c80 0x30>;
+				interrupts = <16>;
+				interrupt-parent = <&CPM_PIC>;
+				fsl,cpm-command = <0x10>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+			};
+		};
+
+		crypto@20000 {
+			compatible = "fsl,sec1.2", "fsl,sec1.0";
+			reg = <0x20000 0x8000>;
+			interrupts = <1 1>;
+			interrupt-parent = <&PIC>;
+			fsl,num-channels = <1>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x4c>;
+			fsl,descriptor-types-mask = <0x05000154>;
+		};
+	};
+
+	chosen {
+		stdout-path = "/soc/cpm/serial@a80";
+	};
+};
diff --git a/arch/powerpc/boot/dts/mucmc52.dts b/arch/powerpc/boot/dts/mucmc52.dts
new file mode 100644
index 0000000000..e88a7bd403
--- /dev/null
+++ b/arch/powerpc/boot/dts/mucmc52.dts
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Manroland mucmc52 board Device Tree Source
+ *
+ * Copyright (C) 2009 DENX Software Engineering GmbH
+ * Heiko Schocher <hs@denx.de>
+ * Copyright 2006-2007 Secret Lab Technologies Ltd.
+ */
+
+/include/ "mpc5200b.dtsi"
+
+/* Timer pins that need to be in GPIO mode */
+&gpt0 { gpio-controller; };
+&gpt1 { gpio-controller; };
+&gpt2 { gpio-controller; };
+&gpt3 { gpio-controller; };
+
+/* Disabled timers */
+&gpt4 { status = "disabled"; };
+&gpt5 { status = "disabled"; };
+&gpt6 { status = "disabled"; };
+&gpt7 { status = "disabled"; };
+
+/ {
+	model = "manroland,mucmc52";
+	compatible = "manroland,mucmc52";
+
+	soc5200@f0000000 {
+		rtc@800 {
+			status = "disabled";
+		};
+
+		can@900 {
+			status = "disabled";
+		};
+
+		can@980 {
+			status = "disabled";
+		};
+
+		spi@f00 {
+			status = "disabled";
+		};
+
+		usb@1000 {
+			status = "disabled";
+		};
+
+		psc@2000 {		// PSC1
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		psc@2200 {		// PSC2
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		psc@2400 {		// PSC3
+			status = "disabled";
+		};
+
+		psc@2600 {		// PSC4
+			status = "disabled";
+		};
+
+		psc@2800 {		// PSC5
+			status = "disabled";
+		};
+
+		psc@2c00 {		// PSC6
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		ethernet@3000 {
+			phy-handle = <&phy0>;
+		};
+
+		mdio@3000 {
+			phy0: ethernet-phy@0 {
+				compatible = "intel,lxt971";
+				reg = <0>;
+			};
+		};
+
+		i2c@3d00 {
+			status = "disabled";
+		};
+
+		i2c@3d40 {
+			hwmon@2c {
+				compatible = "ad,adm9240";
+				reg = <0x2c>;
+			};
+			rtc@51 {
+				compatible = "nxp,pcf8563";
+				reg = <0x51>;
+			};
+		};
+	};
+
+	pci@f0000d00 {
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+				/* IDSEL 0x10 */
+				0x8000 0 0 1 &mpc5200_pic 0 3 3
+				0x8000 0 0 2 &mpc5200_pic 0 3 3
+				0x8000 0 0 3 &mpc5200_pic 0 2 3
+				0x8000 0 0 4 &mpc5200_pic 0 1 3
+				>;
+		ranges = <0x42000000 0 0x60000000 0x60000000 0 0x10000000>,
+			 <0x02000000 0 0x90000000 0x90000000 0 0x10000000>,
+			 <0x01000000 0 0x00000000 0xa0000000 0 0x01000000>;
+	};
+
+	localbus {
+		ranges = <0 0 0xff800000 0x00800000
+			  1 0 0x80000000 0x00800000
+			  3 0 0x80000000 0x00800000>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x00800000>;
+			bank-width = <4>;
+			device-width = <2>;
+			#size-cells = <1>;
+			#address-cells = <1>;
+			partition@0 {
+				label = "DTS";
+				reg = <0x0 0x00100000>;
+			};
+			partition@100000 {
+				label = "Kernel";
+				reg = <0x100000 0x00200000>;
+			};
+			partition@300000 {
+				label = "RootFS";
+				reg = <0x00300000 0x00200000>;
+			};
+			partition@500000 {
+				label = "user";
+				reg = <0x00500000 0x00200000>;
+			};
+			partition@700000 {
+				label = "U-Boot";
+				reg = <0x00700000 0x00040000>;
+			};
+			partition@740000 {
+				label = "Env";
+				reg = <0x00740000 0x00020000>;
+			};
+			partition@760000 {
+				label = "red. Env";
+				reg = <0x00760000 0x00020000>;
+			};
+			partition@780000 {
+				label = "reserve";
+				reg = <0x00780000 0x00080000>;
+			};
+		};
+
+		simple100: gpio-controller-100@3,600100 {
+			compatible = "manroland,mucmc52-aux-gpio";
+			reg = <3 0x00600100 0x1>;
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
+		simple104: gpio-controller-104@3,600104 {
+			compatible = "manroland,mucmc52-aux-gpio";
+			reg = <3 0x00600104 0x1>;
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
+		simple200: gpio-controller-200@3,600200 {
+			compatible = "manroland,mucmc52-aux-gpio";
+			reg = <3 0x00600200 0x1>;
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
+		simple201: gpio-controller-201@3,600201 {
+			compatible = "manroland,mucmc52-aux-gpio";
+			reg = <3 0x00600201 0x1>;
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
+		simple202: gpio-controller-202@3,600202 {
+			compatible = "manroland,mucmc52-aux-gpio";
+			reg = <3 0x00600202 0x1>;
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
+		simple203: gpio-controller-203@3,600203 {
+			compatible = "manroland,mucmc52-aux-gpio";
+			reg = <3 0x00600203 0x1>;
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
+		simple204: gpio-controller-204@3,600204 {
+			compatible = "manroland,mucmc52-aux-gpio";
+			reg = <3 0x00600204 0x1>;
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
+		simple206: gpio-controller-206@3,600206 {
+			compatible = "manroland,mucmc52-aux-gpio";
+			reg = <3 0x00600206 0x1>;
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
+		simple207: gpio-controller-207@3,600207 {
+			compatible = "manroland,mucmc52-aux-gpio";
+			reg = <3 0x00600207 0x1>;
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
+		simple20f: gpio-controller-20f@3,60020f {
+			compatible = "manroland,mucmc52-aux-gpio";
+			reg = <3 0x0060020f 0x1>;
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
+
+	};
+};
diff --git a/arch/powerpc/boot/dts/mvme5100.dts b/arch/powerpc/boot/dts/mvme5100.dts
new file mode 100644
index 0000000000..a7eb6d2590
--- /dev/null
+++ b/arch/powerpc/boot/dts/mvme5100.dts
@@ -0,0 +1,185 @@
+/*
+ * Device Tree Source for Motorola/Emerson MVME5100.
+ *
+ * Copyright 2013 CSC Australia Pty. Ltd.
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	model = "MVME5100";
+	compatible = "MVME5100";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		serial0 = &serial0;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,7410 {
+			device_type = "cpu";
+			reg = <0x0>;
+			/* Following required by dtc but not used */
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			timebase-frequency = <25000000>;
+			clock-frequency = <500000000>;
+			bus-frequency = <100000000>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x20000000>;
+	};
+
+	hawk@fef80000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "hawk-bridge", "simple-bus";
+		ranges = <0x0 0xfef80000 0x10000>;
+		reg = <0xfef80000 0x10000>;
+
+		serial0: serial@8000 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x8000 0x80>;
+			reg-shift = <4>;
+			clock-frequency = <1843200>;
+			current-speed = <9600>;
+			interrupts = <1 1>; // IRQ1 Level Active Low.
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@8200 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x8200 0x80>;
+			reg-shift = <4>;
+			clock-frequency = <1843200>;
+			current-speed = <9600>;
+			interrupts = <1 1>; // IRQ1 Level Active Low.
+			interrupt-parent = <&mpic>;
+		};
+
+		mpic: interrupt-controller@f3f80000 {
+			#interrupt-cells = <2>;
+			#address-cells = <0>;
+			device_type = "open-pic";
+			compatible = "chrp,open-pic";
+			interrupt-controller;
+			reg = <0xf3f80000 0x40000>;
+		};
+	};
+
+	pci0: pci@feff0000 {
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		compatible = "hawk-pci";
+		reg = <0xfec00000 0x400000>;
+		8259-interrupt-acknowledge = <0xfeff0030>;
+		ranges = <0x1000000 0x0        0x0 0xfe000000 0x0 0x800000
+			  0x2000000 0x0 0x80000000 0x80000000 0x0 0x74000000>;
+		bus-range = <0 255>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+
+			/*
+			 * This definition (IDSEL 11) duplicates the
+			 * interrupts definition in the i8259
+			 * interrupt controller below.
+			 *
+			 * Do not change the interrupt sense/polarity from
+			 * 0x2 to anything else, doing so will cause endless
+			 * "spurious" i8259 interrupts to be fielded.
+			 */
+			// IDSEL 11 - iPMC712 PCI/ISA Bridge
+			0x5800 0x0 0x0 0x1 &mpic 0x0 0x2
+			0x5800 0x0 0x0 0x2 &mpic 0x0 0x2
+			0x5800 0x0 0x0 0x3 &mpic 0x0 0x2
+			0x5800 0x0 0x0 0x4 &mpic 0x0 0x2
+
+			/* IDSEL 12 - Not Used */
+
+			/* IDSEL 13 - Universe VME Bridge */
+			0x6800 0x0 0x0 0x1 &mpic 0x5 0x1
+			0x6800 0x0 0x0 0x2 &mpic 0x6 0x1
+			0x6800 0x0 0x0 0x3 &mpic 0x7 0x1
+			0x6800 0x0 0x0 0x4 &mpic 0x8 0x1
+
+			/* IDSEL 14 - ENET 1 */
+			0x7000 0x0 0x0 0x1 &mpic 0x2 0x1
+
+			/* IDSEL 15 - Not Used */
+
+			/* IDSEL 16 - PMC Slot 1 */
+			0x8000 0x0 0x0 0x1 &mpic 0x9 0x1
+			0x8000 0x0 0x0 0x2 &mpic 0xa 0x1
+			0x8000 0x0 0x0 0x3 &mpic 0xb 0x1
+			0x8000 0x0 0x0 0x4 &mpic 0xc 0x1
+
+			/* IDSEL 17 - PMC Slot 2 */
+			0x8800 0x0 0x0 0x1 &mpic 0xc 0x1
+			0x8800 0x0 0x0 0x2 &mpic 0x9 0x1
+			0x8800 0x0 0x0 0x3 &mpic 0xa 0x1
+			0x8800 0x0 0x0 0x4 &mpic 0xb 0x1
+
+			/* IDSEL 18 - Not Used */
+
+			/* IDSEL 19 - ENET 2 */
+			0x9800 0x0 0x0 0x1 &mpic 0xd 0x1
+
+			/* IDSEL 20 - PMCSPAN (PCI-X) */
+			0xa000 0x0 0x0 0x1 &mpic 0x9 0x1
+			0xa000 0x0 0x0 0x2 &mpic 0xa 0x1
+			0xa000 0x0 0x0 0x3 &mpic 0xb 0x1
+			0xa000 0x0 0x0 0x4 &mpic 0xc 0x1
+
+		>;
+
+		isa {
+			#address-cells = <2>;
+			#size-cells = <1>;
+			#interrupt-cells = <2>;
+			device_type = "isa";
+			compatible = "isa";
+			ranges = <0x00000001 0 0x01000000 0 0x00000000 0x00001000>;
+			interrupt-parent = <&i8259>;
+
+			i8259: interrupt-controller@20 {
+				#interrupt-cells = <2>;
+				#address-cells = <0>;
+				interrupts = <0 2>;
+				device_type = "interrupt-controller";
+				compatible = "chrp,iic";
+				interrupt-controller;
+				reg = <1 0x00000020 0x00000002
+                                       1 0x000000a0 0x00000002
+                                       1 0x000004d0 0x00000002>;
+				interrupt-parent = <&mpic>;
+			};
+
+		};
+
+	};
+
+	chosen {
+		stdout-path = &serial0;
+        };
+
+};
diff --git a/arch/powerpc/boot/dts/o2d.dts b/arch/powerpc/boot/dts/o2d.dts
new file mode 100644
index 0000000000..e0a8d30344
--- /dev/null
+++ b/arch/powerpc/boot/dts/o2d.dts
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * O2D Device Tree Source
+ *
+ * Copyright (C) 2012 DENX Software Engineering
+ * Anatolij Gustschin <agust@denx.de>
+ */
+
+/include/ "o2d.dtsi"
+
+/ {
+	model = "ifm,o2d";
+	compatible = "ifm,o2d";
+
+	memory@0 {
+		reg = <0x00000000 0x08000000>;  // 128MB
+	};
+
+	localbus {
+		ranges = <0 0 0xfc000000 0x02000000
+			  3 0 0xe3000000 0x00100000>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x02000000>;
+			bank-width = <2>;
+			device-width = <2>;
+			#size-cells = <1>;
+			#address-cells = <1>;
+
+			partition@60000 {
+				label = "kernel";
+				reg = <0x00060000 0x00260000>;
+				read-only;
+			};
+			/* o2d specific partitions */
+			partition@2c0000 {
+				label = "o2d user defined";
+				reg = <0x002c0000 0x01d40000>;
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/o2d.dtsi b/arch/powerpc/boot/dts/o2d.dtsi
new file mode 100644
index 0000000000..7e52509fa5
--- /dev/null
+++ b/arch/powerpc/boot/dts/o2d.dtsi
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * O2D base Device Tree Source
+ *
+ * Copyright (C) 2012 DENX Software Engineering
+ * Anatolij Gustschin <agust@denx.de>
+ */
+
+/include/ "mpc5200b.dtsi"
+
+&gpt0 {
+	gpio-controller;
+	fsl,has-wdt;
+	fsl,wdt-on-boot = <0>;
+};
+&gpt1 { gpio-controller; };
+
+/ {
+	model = "ifm,o2d";
+	compatible = "ifm,o2d";
+
+	memory@0 {
+		reg = <0x00000000 0x04000000>;	// 64MB
+	};
+
+	soc5200@f0000000 {
+
+		rtc@800 {
+			status = "disabled";
+		};
+
+		psc@2000 {		// PSC1
+			compatible = "fsl,mpc5200b-psc-spi","fsl,mpc5200-psc-spi";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+		};
+
+		psc@2200 {		// PSC2
+			status = "disabled";
+		};
+
+		psc@2400 {		// PSC3
+			status = "disabled";
+		};
+
+		psc@2600 {		// PSC4
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		psc@2800 {		// PSC5
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		psc@2c00 {		// PSC6
+			status = "disabled";
+		};
+
+		ethernet@3000 {
+			phy-handle = <&phy0>;
+		};
+
+		mdio@3000 {
+			phy0: ethernet-phy@0 {
+				reg = <0>;
+			};
+		};
+	};
+
+	localbus {
+		ranges = <0 0 0xff000000 0x01000000
+			  3 0 0xe3000000 0x00100000>;
+
+		// flash device at LocalPlus Bus CS0
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x01000000>;
+			bank-width = <1>;
+			device-width = <2>;
+			#size-cells = <1>;
+			#address-cells = <1>;
+			no-unaligned-direct-access;
+
+			/* common layout for all machines */
+			partition@0 {
+				label = "u-boot";
+				reg = <0x00000000 0x00040000>;
+				read-only;
+			};
+			partition@40000 {
+				label = "env";
+				reg = <0x00040000 0x00020000>;
+				read-only;
+			};
+		};
+
+		csi@3,0 {
+			compatible = "ifm,o2d-csi";
+			reg = <3 0 0x00100000>;
+			ifm,csi-clk-handle = <&gpt7>;
+			gpios = <&gpio_simple 23 0	/* imag_capture */
+				 &gpio_simple 26 0	/* imag_reset */
+				 &gpio_simple 29 0>;	/* imag_master_en */
+
+			interrupts = <1 1 2>;		/* IRQ1, edge falling */
+
+			ifm,csi-addr-bus-width = <24>;
+			ifm,csi-data-bus-width = <8>;
+			ifm,csi-wait-cycles = <0>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/o2d300.dts b/arch/powerpc/boot/dts/o2d300.dts
new file mode 100644
index 0000000000..55a25b700b
--- /dev/null
+++ b/arch/powerpc/boot/dts/o2d300.dts
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * O2D300 Device Tree Source
+ *
+ * Copyright (C) 2012 DENX Software Engineering
+ * Anatolij Gustschin <agust@denx.de>
+ */
+
+/include/ "o2d.dtsi"
+
+/ {
+	model = "ifm,o2d300";
+	compatible = "ifm,o2d";
+
+	localbus {
+		ranges = <0 0 0xfc000000 0x02000000
+			  3 0 0xe3000000 0x00100000>;
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x02000000>;
+			bank-width = <2>;
+			device-width = <2>;
+			#size-cells = <1>;
+			#address-cells = <1>;
+
+			partition@40000 {
+				label = "env_1";
+				reg = <0x00040000 0x00020000>;
+				read-only;
+			};
+			partition@60000 {
+				label = "env_2";
+				reg = <0x00060000 0x00020000>;
+				read-only;
+			};
+			partition@80000 {
+				label = "kernel";
+				reg = <0x00080000 0x00260000>;
+				read-only;
+			};
+			/* o2d300 specific partitions */
+			partition@2e0000 {
+				label = "o2d300 user defined";
+				reg = <0x002e0000 0x01d20000>;
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/o2dnt2.dts b/arch/powerpc/boot/dts/o2dnt2.dts
new file mode 100644
index 0000000000..c2eedbd1f5
--- /dev/null
+++ b/arch/powerpc/boot/dts/o2dnt2.dts
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * O2DNT2 Device Tree Source
+ *
+ * Copyright (C) 2012 DENX Software Engineering
+ * Anatolij Gustschin <agust@denx.de>
+ */
+
+/include/ "o2d.dtsi"
+
+/ {
+	model = "ifm,o2dnt2";
+	compatible = "ifm,o2d";
+
+	memory@0 {
+		reg = <0x00000000 0x08000000>;  // 128MB
+	};
+
+	localbus {
+		ranges = <0 0 0xfc000000 0x02000000
+			  3 0 0xe3000000 0x00100000>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x02000000>;
+			bank-width = <2>;
+			device-width = <2>;
+			#size-cells = <1>;
+			#address-cells = <1>;
+
+			partition@60000 {
+				label = "kernel";
+				reg = <0x00060000 0x00260000>;
+				read-only;
+			};
+
+			/* o2dnt2 specific partitions */
+			partition@2c0000 {
+				label = "o2dnt2 user defined";
+				reg = <0x002c0000 0x01d40000>;
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/o2i.dts b/arch/powerpc/boot/dts/o2i.dts
new file mode 100644
index 0000000000..3fb2e0ad73
--- /dev/null
+++ b/arch/powerpc/boot/dts/o2i.dts
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * O2I Device Tree Source
+ *
+ * Copyright (C) 2012 DENX Software Engineering
+ * Anatolij Gustschin <agust@denx.de>
+ */
+
+/include/ "o2d.dtsi"
+
+/ {
+	model = "ifm,o2i";
+	compatible = "ifm,o2d";
+
+	localbus {
+		flash@0,0 {
+			partition@60000 {
+				label = "kernel";
+				reg = <0x00060000 0x00260000>;
+				read-only;
+			};
+			/* o2i specific partitions */
+			partition@2c0000 {
+				label = "o2i user defined";
+				reg = <0x002c0000 0x00d40000>;
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/o2mnt.dts b/arch/powerpc/boot/dts/o2mnt.dts
new file mode 100644
index 0000000000..c5e0ba6e8f
--- /dev/null
+++ b/arch/powerpc/boot/dts/o2mnt.dts
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * O2MNT Device Tree Source
+ *
+ * Copyright (C) 2012 DENX Software Engineering
+ * Anatolij Gustschin <agust@denx.de>
+ */
+
+/include/ "o2d.dtsi"
+
+/ {
+	model = "ifm,o2mnt";
+	compatible = "ifm,o2d";
+
+	localbus {
+		flash@0,0 {
+			partition@60000 {
+				label = "kernel";
+				reg = <0x00060000 0x00260000>;
+				read-only;
+			};
+			/* add o2mnt specific partitions */
+			partition@2c0000 {
+				label = "o2mnt user defined";
+				reg = <0x002c0000 0x00d40000>;
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/o3dnt.dts b/arch/powerpc/boot/dts/o3dnt.dts
new file mode 100644
index 0000000000..e4c1bdd412
--- /dev/null
+++ b/arch/powerpc/boot/dts/o3dnt.dts
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * O3DNT Device Tree Source
+ *
+ * Copyright (C) 2012 DENX Software Engineering
+ * Anatolij Gustschin <agust@denx.de>
+ */
+
+/include/ "o2d.dtsi"
+
+/ {
+	model = "ifm,o3dnt";
+	compatible = "ifm,o2d";
+
+	memory@0 {
+		reg = <0x00000000 0x04000000>;  // 64MB
+	};
+
+	localbus {
+		ranges = <0 0 0xfc000000 0x01000000
+			  3 0 0xe3000000 0x00100000>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x01000000>;
+			bank-width = <2>;
+			device-width = <2>;
+			#size-cells = <1>;
+			#address-cells = <1>;
+
+			partition@60000 {
+				label = "kernel";
+				reg = <0x00060000 0x00260000>;
+				read-only;
+			};
+
+			/* o3dnt specific partitions */
+			partition@2c0000 {
+				label = "o3dnt user defined";
+				reg = <0x002c0000 0x00d40000>;
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/obs600.dts b/arch/powerpc/boot/dts/obs600.dts
new file mode 100644
index 0000000000..d10b041180
--- /dev/null
+++ b/arch/powerpc/boot/dts/obs600.dts
@@ -0,0 +1,314 @@
+/*
+ * Device Tree Source for PlatHome OpenBlockS 600 (405EX)
+ *
+ * Copyright 2011 Ben Herrenschmidt, IBM Corp.
+ *
+ * Based on Kilauea by:
+ *
+ * Copyright 2007-2009 DENX Software Engineering, Stefan Roese <sr@denx.de>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	model = "PlatHome,OpenBlockS 600";
+	compatible = "plathome,obs600";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		ethernet1 = &EMAC1;
+		serial0 = &UART0;
+		serial1 = &UART1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,405EX";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+			timebase-frequency = <0>; /* Filled in by U-Boot */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <16384>; /* 16 kB */
+			d-cache-size = <16384>; /* 16 kB */
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000>; /* Filled in by U-Boot */
+	};
+
+	UIC0: interrupt-controller {
+		compatible = "ibm,uic-405ex", "ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-405ex","ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC2: interrupt-controller2 {
+		compatible = "ibm,uic-405ex","ibm,uic";
+		interrupt-controller;
+		cell-index = <2>;
+		dcr-reg = <0x0e0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1c 0x4 0x1d 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	CPM0: cpm {
+		compatible = "ibm,cpm";
+		dcr-access-method = "native";
+		dcr-reg = <0x0b0 0x003>;
+		unused-units = <0x00000000>;
+		idle-doze = <0x02000000>;
+		standby = <0xe3e74800>;
+	};
+
+	plb {
+		compatible = "ibm,plb-405ex", "ibm,plb4";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by U-Boot */
+
+		SDRAM0: memory-controller {
+			compatible = "ibm,sdram-405ex", "ibm,sdram-4xx-ddr2";
+			dcr-reg = <0x010 0x002>;
+			interrupt-parent = <&UIC2>;
+			interrupts = <0x5 0x4	/* ECC DED Error */
+				      0x6 0x4>;	/* ECC SEC Error */
+		};
+
+		CRYPTO: crypto@ef700000 {
+			compatible = "amcc,ppc405ex-crypto", "amcc,ppc4xx-crypto";
+			reg = <0xef700000 0x80400>;
+			interrupt-parent = <&UIC0>;
+			interrupts = <0x17 0x2>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-405ex", "ibm,mcmal2";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <2>;
+			num-rx-chans = <2>;
+			interrupt-parent = <&MAL0>;
+			interrupts = <0x0 0x1 0x2 0x3 0x4>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = </*TXEOB*/ 0x0 &UIC0 0xa 0x4
+					/*RXEOB*/ 0x1 &UIC0 0xb 0x4
+					/*SERR*/  0x2 &UIC1 0x0 0x4
+					/*TXDE*/  0x3 &UIC1 0x1 0x4
+					/*RXDE*/  0x4 &UIC1 0x2 0x4>;
+			interrupt-map-mask = <0xffffffff>;
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb-405ex", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0x80000000 0x80000000 0x10000000
+				  0xef600000 0xef600000 0x00a00000
+				  0xf0000000 0xf0000000 0x10000000>;
+			dcr-reg = <0x0a0 0x005>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-405ex", "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				/* ranges property is supplied by U-Boot */
+				interrupts = <0x5 0x1>;
+				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "amd,s29gl512n", "cfi-flash";
+					bank-width = <2>;
+					reg = <0x00000000 0x00000000 0x08000000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "kernel + initrd";
+						reg = <0x00000000 0x03de0000>;
+					};
+					partition@3de0000 {
+						label = "user config area";
+						reg = <0x03de0000 0x00080000>;
+					};
+					partition@3e60000 {
+						label = "user program area";
+						reg = <0x03e60000 0x04000000>;
+					};
+					partition@7e60000 {
+						label = "flat device tree";
+						reg = <0x07e60000 0x00080000>;
+					};
+					partition@7ee0000 {
+						label = "test program";
+						reg = <0x07ee0000 0x00080000>;
+					};
+					partition@7f60000 {
+						label = "u-boot env";
+						reg = <0x07f60000 0x00040000>;
+					};
+					partition@7fa0000 {
+						label = "u-boot";
+						reg = <0x07fa0000 0x00060000>;
+					};
+				};
+			};
+
+			UART0: serial@ef600200 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600200 0x00000008>;
+				virtual-reg = <0xef600200>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1a 0x4>;
+			};
+
+			UART1: serial@ef600300 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600300 0x00000008>;
+				virtual-reg = <0xef600300>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1 0x4>;
+			};
+
+			IIC0: i2c@ef600400 {
+				compatible = "ibm,iic-405ex", "ibm,iic";
+				reg = <0xef600400 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				rtc@68 {
+					compatible = "dallas,ds1340";
+					reg = <0x68>;
+				};
+			};
+
+			IIC1: i2c@ef600500 {
+				compatible = "ibm,iic-405ex", "ibm,iic";
+				reg = <0xef600500 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x7 0x4>;
+			};
+
+			RGMII0: emac-rgmii@ef600b00 {
+				compatible = "ibm,rgmii-405ex", "ibm,rgmii";
+				reg = <0xef600b00 0x00000104>;
+				has-mdio;
+			};
+
+			EMAC0: ethernet@ef600900 {
+				linux,network-index = <0x0>;
+				device_type = "network";
+				compatible = "ibm,emac-405ex", "ibm,emac4sync";
+				interrupt-parent = <&EMAC0>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC0 0x18 0x4
+						/*Wake*/  0x1 &UIC1 0x1d 0x4>;
+				reg = <0xef600900 0x000000c4>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+
+			EMAC1: ethernet@ef600a00 {
+				linux,network-index = <0x1>;
+				device_type = "network";
+				compatible = "ibm,emac-405ex", "ibm,emac4sync";
+				interrupt-parent = <&EMAC1>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC0 0x19 0x4
+						/*Wake*/  0x1 &UIC1 0x1f 0x4>;
+				reg = <0xef600a00 0x000000c4>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <1>;
+				mal-rx-channel = <1>;
+				cell-index = <1>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <1>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+
+			GPIO: gpio@ef600800 {
+				device_type = "gpio";
+				compatible = "ibm,gpio-405ex", "ibm,ppc4xx-gpio";
+				reg = <0xef600800 0x50>;
+			};
+		};
+	};
+        chosen {
+                stdout-path = "/plb/opb/serial@ef600200";
+        };
+};
diff --git a/arch/powerpc/boot/dts/pcm030.dts b/arch/powerpc/boot/dts/pcm030.dts
new file mode 100644
index 0000000000..5cee474dcc
--- /dev/null
+++ b/arch/powerpc/boot/dts/pcm030.dts
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * phyCORE-MPC5200B-tiny (pcm030) board Device Tree Source
+ *
+ * Copyright 2006 Pengutronix
+ * Sascha Hauer <s.hauer@pengutronix.de>
+ * Copyright 2007 Pengutronix
+ * Juergen Beisert <j.beisert@pengutronix.de>
+ */
+
+/include/ "mpc5200b.dtsi"
+
+&gpt0 { fsl,has-wdt; };
+&gpt2 { gpio-controller; };
+&gpt3 { gpio-controller; };
+&gpt4 { gpio-controller; };
+&gpt5 { gpio-controller; };
+&gpt6 { gpio-controller; };
+&gpt7 { gpio-controller; };
+
+/ {
+	model = "phytec,pcm030";
+	compatible = "phytec,pcm030";
+
+	soc5200@f0000000 {
+		audioplatform: psc@2000 { /* PSC1 in ac97 mode */
+			compatible = "mpc5200b-psc-ac97","fsl,mpc5200b-psc-ac97";
+			cell-index = <0>;
+		};
+
+		/* PSC2 port is used by CAN1/2 */
+		psc@2200 {
+			status = "disabled";
+		};
+
+		psc@2400 { /* PSC3 in UART mode */
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		/* PSC4 is ??? */
+		psc@2600 {
+			status = "disabled";
+		};
+
+		/* PSC5 is ??? */
+		psc@2800 {
+			status = "disabled";
+		};
+
+		psc@2c00 { /* PSC6 in UART mode */
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		ethernet@3000 {
+			phy-handle = <&phy0>;
+		};
+
+		mdio@3000 {
+			phy0: ethernet-phy@0 {
+				reg = <0>;
+			};
+		};
+
+		i2c@3d40 {
+			rtc@51 {
+				compatible = "nxp,pcf8563";
+				reg = <0x51>;
+			};
+			eeprom@52 {
+				compatible = "catalyst,24c32", "atmel,24c32";
+				reg = <0x52>;
+				pagesize = <32>;
+			};
+		};
+
+		sram@8000 {
+			compatible = "fsl,mpc5200b-sram","fsl,mpc5200-sram";
+			reg = <0x8000 0x4000>;
+		};
+	};
+
+	pci@f0000d00 {
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0xc000 0 0 1 &mpc5200_pic 0 0 3 // 1st slot
+				 0xc000 0 0 2 &mpc5200_pic 1 1 3
+				 0xc000 0 0 3 &mpc5200_pic 1 2 3
+				 0xc000 0 0 4 &mpc5200_pic 1 3 3
+
+				 0xc800 0 0 1 &mpc5200_pic 1 1 3 // 2nd slot
+				 0xc800 0 0 2 &mpc5200_pic 1 2 3
+				 0xc800 0 0 3 &mpc5200_pic 1 3 3
+				 0xc800 0 0 4 &mpc5200_pic 0 0 3>;
+		ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000>,
+			 <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000>,
+			 <0x01000000 0 0x00000000 0xb0000000 0 0x01000000>;
+	};
+
+	localbus {
+		status = "disabled";
+	};
+
+	sound {
+		compatible = "phytec,pcm030-audio-fabric";
+		asoc-platform = <&audioplatform>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/pcm032.dts b/arch/powerpc/boot/dts/pcm032.dts
new file mode 100644
index 0000000000..d00f13b625
--- /dev/null
+++ b/arch/powerpc/boot/dts/pcm032.dts
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * phyCORE-MPC5200B-IO (pcm032) board Device Tree Source
+ *
+ * Copyright (C) 2006-2009 Pengutronix
+ * Sascha Hauer, Juergen Beisert, Wolfram Sang <kernel@pengutronix.de>
+ */
+
+/include/ "mpc5200b.dtsi"
+
+&gpt0 { fsl,has-wdt; };
+&gpt2 { gpio-controller; };
+&gpt3 { gpio-controller; };
+&gpt4 { gpio-controller; };
+&gpt5 { gpio-controller; };
+&gpt6 { gpio-controller; };
+&gpt7 { gpio-controller; };
+
+/ {
+	model = "phytec,pcm032";
+	compatible = "phytec,pcm032";
+
+	memory@0 {
+		reg = <0x00000000 0x08000000>;	// 128MB
+	};
+
+	soc5200@f0000000 {
+		psc@2000 {	/* PSC1 is ac97 */
+			compatible = "fsl,mpc5200b-psc-ac97","fsl,mpc5200-psc-ac97";
+			cell-index = <0>;
+		};
+
+		/* PSC2 port is used by CAN1/2 */
+		psc@2200 {
+			status = "disabled";
+		};
+
+		psc@2400 { /* PSC3 in UART mode */
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		/* PSC4 is ??? */
+		psc@2600 {
+			status = "disabled";
+		};
+
+		/* PSC5 is ??? */
+		psc@2800 {
+			status = "disabled";
+		};
+
+		psc@2c00 { /* PSC6 in UART mode */
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		ethernet@3000 {
+			phy-handle = <&phy0>;
+		};
+
+		mdio@3000 {
+			phy0: ethernet-phy@0 {
+				reg = <0>;
+			};
+		};
+
+		i2c@3d40 {
+			rtc@51 {
+				compatible = "nxp,pcf8563";
+				reg = <0x51>;
+			};
+			eeprom@52 {
+				compatible = "catalyst,24c32", "atmel,24c32";
+				reg = <0x52>;
+				pagesize = <32>;
+			};
+		};
+	};
+
+	pci@f0000d00 {
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0xc000 0 0 1 &mpc5200_pic 0 0 3 // 1st slot
+				 0xc000 0 0 2 &mpc5200_pic 1 1 3
+				 0xc000 0 0 3 &mpc5200_pic 1 2 3
+				 0xc000 0 0 4 &mpc5200_pic 1 3 3
+
+				 0xc800 0 0 1 &mpc5200_pic 1 1 3 // 2nd slot
+				 0xc800 0 0 2 &mpc5200_pic 1 2 3
+				 0xc800 0 0 3 &mpc5200_pic 1 3 3
+				 0xc800 0 0 4 &mpc5200_pic 0 0 3>;
+		ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000>,
+			 <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000>,
+			 <0x01000000 0 0x00000000 0xb0000000 0 0x01000000>;
+	};
+
+	localbus {
+		ranges = <0 0 0xfe000000 0x02000000
+			  1 0 0xfc000000 0x02000000
+			  2 0 0xfbe00000 0x00200000
+			  3 0 0xf9e00000 0x02000000
+			  4 0 0xf7e00000 0x02000000
+			  5 0 0xe6000000 0x02000000
+			  6 0 0xe8000000 0x02000000
+			  7 0 0xea000000 0x02000000>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x02000000>;
+			bank-width = <4>;
+			#size-cells = <1>;
+			#address-cells = <1>;
+
+			partition@0 {
+				label = "ubootl";
+				reg = <0x00000000 0x00040000>;
+			};
+			partition@40000 {
+				label = "kernel";
+				reg = <0x00040000 0x001c0000>;
+			};
+			partition@200000 {
+				label = "jffs2";
+				reg = <0x00200000 0x01d00000>;
+			};
+			partition@1f00000 {
+				label = "uboot";
+				reg = <0x01f00000 0x00040000>;
+			};
+			partition@1f40000 {
+				label = "env";
+				reg = <0x01f40000 0x00040000>;
+			};
+			partition@1f80000 {
+				label = "oftree";
+				reg = <0x01f80000 0x00040000>;
+			};
+			partition@1fc0000 {
+				label = "space";
+				reg = <0x01fc0000 0x00040000>;
+			};
+		};
+
+		sram@2,0 {
+			compatible = "mtd-ram";
+			reg = <2 0 0x00200000>;
+			bank-width = <2>;
+		};
+
+		/*
+		 * example snippets for FPGA
+		 *
+		 * fpga@3,0 {
+		 *	 compatible = "fpga_driver";
+		 *	 reg = <3 0 0x02000000>;
+		 *	 bank-width = <4>;
+		 * };
+		 *
+		 * fpga@4,0 {
+		 *	 compatible = "fpga_driver";
+		 *	 reg = <4 0 0x02000000>;
+		 *	 bank-width = <4>;
+		 * };
+		 */
+
+		/*
+		 * example snippets for free chipselects
+		 *
+		 * device@5,0 {
+		 *	 compatible = "custom_driver";
+		 *	 reg = <5 0 0x02000000>;
+		 * };
+		 *
+		 * device@6,0 {
+		 *	 compatible = "custom_driver";
+		 *	 reg = <6 0 0x02000000>;
+		 * };
+		 *
+		 * device@7,0 {
+		 *	 compatible = "custom_driver";
+		 *	 reg = <7 0 0x02000000>;
+		 * };
+		 */
+	};
+};
diff --git a/arch/powerpc/boot/dts/pdm360ng.dts b/arch/powerpc/boot/dts/pdm360ng.dts
new file mode 100644
index 0000000000..67c3b9db75
--- /dev/null
+++ b/arch/powerpc/boot/dts/pdm360ng.dts
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Device Tree Source for IFM PDM360NG.
+ *
+ * Copyright 2009 - 2010 DENX Software Engineering.
+ * Anatolij Gustschin <agust@denx.de>
+ *
+ * Based on MPC5121E ADS dts.
+ * Copyright 2008 Freescale Semiconductor Inc.
+ */
+
+#include "mpc5121.dtsi"
+
+/ {
+	model = "pdm360ng";
+	compatible = "ifm,pdm360ng", "fsl,mpc5121";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&ipic>;
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x20000000>;	// 512MB at 0
+	};
+
+	nfc@40000000 {
+		bank-width = <0x1>;
+		chips = <0x1>;
+
+		partition@0 {
+			label = "nand0";
+			reg = <0x0 0x40000000>;
+		};
+	};
+
+	localbus@80000020 {
+		ranges = <0x0 0x0 0xf0000000 0x10000000   /* Flash */
+			  0x2 0x0 0x50040000 0x00020000>; /* CS2: MRAM */
+
+		flash@0,0 {
+			compatible = "amd,s29gl01gp", "cfi-flash";
+			reg = <0 0x00000000 0x08000000
+			       0 0x08000000 0x08000000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			bank-width = <4>;
+			device-width = <2>;
+
+			partition@0 {
+				label = "u-boot";
+				reg = <0x00000000 0x00080000>;
+				read-only;
+			};
+			partition@80000 {
+				label = "environment";
+				reg = <0x00080000 0x00080000>;
+				read-only;
+			};
+			partition@100000 {
+				label = "splash-image";
+				reg = <0x00100000 0x00080000>;
+				read-only;
+			};
+			partition@180000 {
+				label = "device-tree";
+				reg = <0x00180000 0x00040000>;
+			};
+			partition@1c0000 {
+				label = "kernel";
+				reg = <0x001c0000 0x00500000>;
+			};
+			partition@6c0000 {
+				label = "filesystem";
+				reg = <0x006c0000 0x07940000>;
+			};
+		};
+
+		mram0@2,0 {
+			compatible = "mtd-ram";
+			reg = <2 0x00000 0x10000>;
+			bank-width = <2>;
+		};
+
+		mram1@2,10000 {
+			compatible = "mtd-ram";
+			reg = <2 0x010000 0x10000>;
+			bank-width = <2>;
+		};
+	};
+
+	soc@80000000 {
+
+		i2c@1700 {
+			fsl,preserve-clocking;
+
+			eeprom@50 {
+				compatible = "atmel,24c01";
+				reg = <0x50>;
+			};
+
+			rtc@68 {
+				compatible = "st,m41t00";
+				reg = <0x68>;
+			};
+		};
+
+		i2c@1720 {
+			status = "disabled";
+		};
+
+		i2c@1740 {
+			fsl,preserve-clocking;
+		};
+
+		ethernet@2800 {
+			phy-handle = <&phy0>;
+		};
+
+		mdio@2800 {
+			phy0: ethernet-phy@1f {
+				compatible = "smsc,lan8700";
+				reg = <0x1f>;
+			};
+		};
+
+		/* USB1 using external ULPI PHY */
+		usb@3000 {
+			dr_mode = "host";
+		};
+
+		/* USB0 using internal UTMI PHY */
+		usb@4000 {
+			fsl,invert-pwr-fault;
+		};
+
+		psc@11000 {
+			compatible = "fsl,mpc5121-psc-uart", "fsl,mpc5121-psc";
+		};
+
+		psc@11100 {
+			compatible = "fsl,mpc5121-psc-uart", "fsl,mpc5121-psc";
+		};
+
+		psc@11200 {
+			compatible = "fsl,mpc5121-psc-uart", "fsl,mpc5121-psc";
+		};
+
+		psc@11300 {
+			compatible = "fsl,mpc5121-psc-uart", "fsl,mpc5121-psc";
+		};
+
+		psc@11400 {
+			compatible = "fsl,mpc5121-psc-uart", "fsl,mpc5121-psc";
+		};
+
+		psc@11500 {
+			status = "disabled";
+		};
+
+		psc@11600 {
+			compatible = "fsl,mpc5121-psc-uart", "fsl,mpc5121-psc";
+		};
+
+		psc@11700 {
+			status = "disabled";
+		};
+
+		psc@11800 {
+			compatible = "fsl,mpc5121-psc-uart", "fsl,mpc5121-psc";
+		};
+
+		psc@11900 {
+			compatible = "fsl,mpc5121-psc-spi", "fsl,mpc5121-psc";
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			/* ADS7845 touch screen controller */
+			ts@0 {
+				compatible = "ti,ads7846";
+				reg = <0x0>;
+				spi-max-frequency = <3000000>;
+				/* pen irq is GPIO25 */
+				interrupts = <78 0x8>;
+			};
+		};
+
+		psc@11a00 {
+			status = "disabled";
+		};
+
+		psc@11b00 {
+			compatible = "fsl,mpc5121-psc-uart", "fsl,mpc5121-psc";
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/ps3.dts b/arch/powerpc/boot/dts/ps3.dts
new file mode 100644
index 0000000000..6bdfba6cbb
--- /dev/null
+++ b/arch/powerpc/boot/dts/ps3.dts
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  PS3 Game Console device tree.
+ *
+ *  Copyright (C) 2007 Sony Computer Entertainment Inc.
+ *  Copyright 2007 Sony Corp.
+ */
+
+/dts-v1/;
+
+/ {
+	model = "SonyPS3";
+	compatible = "sony,ps3";
+	#size-cells = <2>;
+	#address-cells = <2>;
+
+	chosen {
+	};
+
+	/*
+	 * We'll get the size of the bootmem block from lv1 after startup,
+	 * so we'll put a null entry here.
+	 */
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000 0x00000000>;
+	};
+
+	/*
+	 * The boot cpu is always zero for PS3.
+	 *
+	 * dtc expects a clock-frequency and timebase-frequency entries, so
+	 * we'll put a null entries here.  These will be initialized after
+	 * startup with data from lv1.
+	 *
+	 * Seems the only way currently to indicate a processor has multiple
+	 * threads is with an ibm,ppc-interrupt-server#s entry.  We'll put one
+	 * here so we can bring up both of ours.  See smp_setup_cpu_maps().
+	 */
+
+	cpus {
+		#size-cells = <0>;
+		#address-cells = <1>;
+
+		cpu@0 {
+			device_type = "cpu";
+			reg = <0x00000000>;
+			ibm,ppc-interrupt-server#s = <0x0 0x1>;
+			clock-frequency = <0>;
+			timebase-frequency = <0>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			i-cache-line-size = <128>;
+			d-cache-line-size = <128>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/rainier.dts b/arch/powerpc/boot/dts/rainier.dts
new file mode 100644
index 0000000000..e59829cff5
--- /dev/null
+++ b/arch/powerpc/boot/dts/rainier.dts
@@ -0,0 +1,350 @@
+/*
+ * Device Tree Source for AMCC Rainier
+ *
+ * Based on Sequoia code
+ * Copyright (c) 2007 MontaVista Software, Inc.
+ *
+ * FIXME: Draft only!
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ *
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "amcc,rainier";
+	compatible = "amcc,rainier";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		ethernet1 = &EMAC1;
+		serial0 = &UART0;
+		serial1 = &UART1;
+		serial2 = &UART2;
+		serial3 = &UART3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,440GRx";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by zImage */
+			timebase-frequency = <0>; /* Filled in by zImage */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by zImage */
+	};
+
+	UIC0: interrupt-controller0 {
+		compatible = "ibm,uic-440grx","ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-440grx","ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC2: interrupt-controller2 {
+		compatible = "ibm,uic-440grx","ibm,uic";
+		interrupt-controller;
+		cell-index = <2>;
+		dcr-reg = <0x0e0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1c 0x4 0x1d 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	SDR0: sdr {
+		compatible = "ibm,sdr-440grx", "ibm,sdr-440ep";
+		dcr-reg = <0x00e 0x002>;
+	};
+
+	CPR0: cpr {
+		compatible = "ibm,cpr-440grx", "ibm,cpr-440ep";
+		dcr-reg = <0x00c 0x002>;
+	};
+
+	plb {
+		compatible = "ibm,plb-440grx", "ibm,plb4";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by zImage */
+
+		SDRAM0: sdram {
+			compatible = "ibm,sdram-440grx", "ibm,sdram-44x-ddr2denali";
+			dcr-reg = <0x010 0x002>;
+		};
+
+		DMA0: dma {
+			compatible = "ibm,dma-440grx", "ibm,dma-4xx";
+			dcr-reg = <0x100 0x027>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-440grx", "ibm,mcmal2";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <2>;
+			num-rx-chans = <2>;
+			interrupt-parent = <&MAL0>;
+			interrupts = <0x0 0x1 0x2 0x3 0x4>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = </*TXEOB*/ 0x0 &UIC0 0xa 0x4
+					/*RXEOB*/ 0x1 &UIC0 0xb 0x4
+					/*SERR*/  0x2 &UIC1 0x0 0x4
+					/*TXDE*/  0x3 &UIC1 0x1 0x4
+					/*RXDE*/  0x4 &UIC1 0x2 0x4>;
+			interrupt-map-mask = <0xffffffff>;
+		};
+
+		POB0: opb {
+		  	compatible = "ibm,opb-440grx", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+		  	ranges = <0x00000000 0x00000001 0x00000000 0x80000000
+			          0x80000000 0x00000001 0x80000000 0x80000000>;
+		  	interrupt-parent = <&UIC1>;
+		  	interrupts = <0x7 0x4>;
+		  	clock-frequency = <0>; /* Filled in by zImage */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-440grx", "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by zImage */
+				interrupts = <0x5 0x1>;
+				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "amd,s29gl256n", "cfi-flash";
+					bank-width = <2>;
+					reg = <0x00000000 0x00000000 0x04000000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "Kernel";
+						reg = <0x00000000 0x00180000>;
+					};
+					partition@180000 {
+						label = "ramdisk";
+						reg = <0x00180000 0x00200000>;
+					};
+					partition@380000 {
+						label = "file system";
+						reg = <0x00380000 0x03aa0000>;
+					};
+					partition@3e20000 {
+						label = "kozio";
+						reg = <0x03e20000 0x00140000>;
+					};
+					partition@3f60000 {
+						label = "env";
+						reg = <0x03f60000 0x00040000>;
+					};
+					partition@3fa0000 {
+						label = "u-boot";
+						reg = <0x03fa0000 0x00060000>;
+					};
+				};
+
+			};
+
+			UART0: serial@ef600300 {
+		   		device_type = "serial";
+		   		compatible = "ns16550";
+		   		reg = <0xef600300 0x00000008>;
+		   		virtual-reg = <0xef600300>;
+		   		clock-frequency = <0>; /* Filled in by zImage */
+		   		current-speed = <115200>;
+		   		interrupt-parent = <&UIC0>;
+		   		interrupts = <0x0 0x4>;
+	   		};
+
+			UART1: serial@ef600400 {
+		   		device_type = "serial";
+		   		compatible = "ns16550";
+		   		reg = <0xef600400 0x00000008>;
+		   		virtual-reg = <0xef600400>;
+		   		clock-frequency = <0>;
+		   		current-speed = <0>;
+		   		interrupt-parent = <&UIC0>;
+		   		interrupts = <0x1 0x4>;
+	   		};
+
+			UART2: serial@ef600500 {
+		   		device_type = "serial";
+		   		compatible = "ns16550";
+		   		reg = <0xef600500 0x00000008>;
+		   		virtual-reg = <0xef600500>;
+		   		clock-frequency = <0>;
+		   		current-speed = <0>;
+		   		interrupt-parent = <&UIC1>;
+		   		interrupts = <0x3 0x4>;
+	   		};
+
+			UART3: serial@ef600600 {
+		   		device_type = "serial";
+		   		compatible = "ns16550";
+		   		reg = <0xef600600 0x00000008>;
+		   		virtual-reg = <0xef600600>;
+		   		clock-frequency = <0>;
+		   		current-speed = <0>;
+		   		interrupt-parent = <&UIC1>;
+		   		interrupts = <0x4 0x4>;
+	   		};
+
+			IIC0: i2c@ef600700 {
+				compatible = "ibm,iic-440grx", "ibm,iic";
+				reg = <0xef600700 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+			};
+
+			IIC1: i2c@ef600800 {
+				compatible = "ibm,iic-440grx", "ibm,iic";
+				reg = <0xef600800 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x7 0x4>;
+			};
+
+			ZMII0: emac-zmii@ef600d00 {
+				compatible = "ibm,zmii-440grx", "ibm,zmii";
+				reg = <0xef600d00 0x0000000c>;
+			};
+
+			RGMII0: emac-rgmii@ef601000 {
+				compatible = "ibm,rgmii-440grx", "ibm,rgmii";
+				reg = <0xef601000 0x00000008>;
+				has-mdio;
+			};
+
+			EMAC0: ethernet@ef600e00 {
+				device_type = "network";
+				compatible = "ibm,emac-440grx", "ibm,emac-440epx", "ibm,emac4";
+				interrupt-parent = <&EMAC0>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC0 0x18 0x4
+						/*Wake*/  0x1 &UIC1 0x1d 0x4>;
+				reg = <0xef600e00 0x00000074>;
+				local-mac-address = [000000000000];
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				zmii-device = <&ZMII0>;
+				zmii-channel = <0>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+
+			EMAC1: ethernet@ef600f00 {
+				device_type = "network";
+				compatible = "ibm,emac-440grx", "ibm,emac-440epx", "ibm,emac4";
+				interrupt-parent = <&EMAC1>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC0 0x19 0x4
+						/*Wake*/  0x1 &UIC1 0x1f 0x4>;
+				reg = <0xef600f00 0x00000074>;
+				local-mac-address = [000000000000];
+				mal-device = <&MAL0>;
+				mal-tx-channel = <1>;
+				mal-rx-channel = <1>;
+				cell-index = <1>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				zmii-device = <&ZMII0>;
+				zmii-channel = <1>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <1>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+		};
+
+		PCI0: pci@1ec000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb440grx-pci", "ibm,plb-pci";
+			primary;
+			reg = <0x00000001 0xeec00000 0x00000008	/* Config space access */
+			       0x00000001 0xeed00000 0x00000004	/* IACK */
+			       0x00000001 0xeed00000 0x00000004	/* Special cycle */
+			       0x00000001 0xef400000 0x00000040>;	/* Internal registers */
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed. Chip supports a second
+			 * IO range but we don't use it for now
+			 */
+			ranges = <0x02000000 0x0 0x80000000 0x1 0x80000000 0x0 0x40000000
+				0x01000000 0x0 0x00000000 0x1 0xe8000000 0x0 0x00010000
+				0x01000000 0x0 0x00000000 0x1 0xe8800000 0x0 0x03800000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* All PCI interrupts are routed to IRQ 67 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+			interrupt-map = < 0x0 0x0 0x0 0x0 &UIC2 0x3 0x8 >;
+		};
+	};
+
+	chosen {
+		stdout-path = "/plb/opb/serial@ef600300";
+		bootargs = "console=ttyS0,115200";
+	};
+};
diff --git a/arch/powerpc/boot/dts/redwood.dts b/arch/powerpc/boot/dts/redwood.dts
new file mode 100644
index 0000000000..3c849e23e5
--- /dev/null
+++ b/arch/powerpc/boot/dts/redwood.dts
@@ -0,0 +1,368 @@
+/*
+ * Device Tree Source for AMCC Redwood(460SX)
+ *
+ * Copyright 2008 AMCC <tmarri@amcc.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "amcc,redwood";
+	compatible = "amcc,redwood";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		serial0 = &UART0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,460SX";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+			timebase-frequency = <0>; /* Filled in by U-Boot */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by U-Boot */
+	};
+
+	UIC0: interrupt-controller0 {
+		compatible = "ibm,uic-460sx","ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-460sx","ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC2: interrupt-controller2 {
+		compatible = "ibm,uic-460sx","ibm,uic";
+		interrupt-controller;
+		cell-index = <2>;
+		dcr-reg = <0x0e0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0xa 0x4 0xb 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC3: interrupt-controller3 {
+		compatible = "ibm,uic-460sx","ibm,uic";
+		interrupt-controller;
+		cell-index = <3>;
+		dcr-reg = <0x0f0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x10 0x4 0x11 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	SDR0: sdr {
+		compatible = "ibm,sdr-460sx";
+		dcr-reg = <0x00e 0x002>;
+	};
+
+	CPR0: cpr {
+		compatible = "ibm,cpr-460sx";
+		dcr-reg = <0x00c 0x002>;
+	};
+
+	plb {
+		compatible = "ibm,plb-460sx", "ibm,plb4";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by U-Boot */
+
+		SDRAM0: sdram {
+			compatible = "ibm,sdram-460sx", "ibm,sdram-405gp";
+			dcr-reg = <0x010 0x002>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-460sx", "ibm,mcmal2";
+			dcr-reg = <0x180 0x62>;
+			num-tx-chans = <4>;
+			num-rx-chans = <32>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			interrupt-parent = <&UIC1>;
+			interrupts = <	/*TXEOB*/ 0x6 0x4
+					/*RXEOB*/ 0x7 0x4
+					/*SERR*/  0x1 0x4
+					/*TXDE*/  0x2 0x4
+					/*RXDE*/  0x3 0x4
+					/*COAL TX0*/ 0x18 0x2
+					/*COAL TX1*/ 0x19 0x2
+					/*COAL TX2*/ 0x1a 0x2
+					/*COAL TX3*/ 0x1b 0x2
+					/*COAL RX0*/ 0x1c 0x2
+					/*COAL RX1*/ 0x1d 0x2
+					/*COAL RX2*/ 0x1e 0x2
+					/*COAL RX3*/ 0x1f 0x2>;
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb-460sx", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0xb0000000 0x00000004 0xb0000000 0x50000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-460sx", "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				/* ranges property is supplied by U-Boot */
+				interrupts = <0x6 0x4>;
+				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "amd,s29gl512n", "cfi-flash";
+					bank-width = <2>;
+					reg = <0x0000000 0x00000000 0x04000000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "kernel";
+						reg = <0x00000000 0x001e0000>;
+					};
+					partition@1e0000 {
+						label = "dtb";
+						reg = <0x001e0000 0x00020000>;
+					};
+					partition@200000 {
+						label = "ramdisk";
+						reg = <0x00200000 0x01400000>;
+					};
+					partition@1600000 {
+						label = "jffs2";
+						reg = <0x01600000 0x00400000>;
+					};
+					partition@1a00000 {
+						label = "user";
+						reg = <0x01a00000 0x02560000>;
+					};
+					partition@3f60000 {
+						label = "env";
+						reg = <0x03f60000 0x00040000>;
+					};
+					partition@3fa0000 {
+						label = "u-boot";
+						reg = <0x03fa0000 0x00060000>;
+					};
+				};
+			};
+
+			UART0: serial@ef600200 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600200 0x00000008>;
+				virtual-reg = <0xef600200>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>; /* Filled in by U-Boot */
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x0 0x4>;
+			};
+
+			RGMII0: emac-rgmii@ef600900 {
+				compatible = "ibm,rgmii-460sx", "ibm,rgmii";
+				reg = <0xef600900 0x00000008>;
+			};
+
+			EMAC0: ethernet@ef600a00 {
+				device_type = "network";
+				compatible = "ibm,emac-460sx", "ibm,emac4";
+				interrupt-parent = <&EMAC0>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC0 0x13 0x4
+						 /*Wake*/   0x1 &UIC2 0x1d 0x4>;
+				reg = <0xef600a00 0x00000070>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+		};
+		PCIE0: pcie@d00000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-460sx", "ibm,plb-pciex";
+			primary;
+			port = <0x0>; /* port number */
+			reg = <0x0000000d 0x00000000 0x20000000	/* Config space access */
+			       0x0000000c 0x10000000 0x00001000>;	/* Registers */
+			dcr-reg = <0x100 0x020>;
+			sdr-base = <0x300>;
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x0000000e 0x00000000 0x00000000 0x80000000
+				  0x01000000 0x00000000 0x00000000 0x0000000f 0x80000000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* This drives busses 10 to 0x1f */
+			bus-range = <0x10 0x1f>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &UIC3 0x0 0x4 /* swizzled int A */
+				0x0 0x0 0x0 0x2 &UIC3 0x1 0x4 /* swizzled int B */
+				0x0 0x0 0x0 0x3 &UIC3 0x2 0x4 /* swizzled int C */
+				0x0 0x0 0x0 0x4 &UIC3 0x3 0x4 /* swizzled int D */>;
+		};
+
+		PCIE1: pcie@d20000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-460sx", "ibm,plb-pciex";
+			primary;
+			port = <0x1>; /* port number */
+			reg = <0x0000000d 0x20000000 0x20000000	/* Config space access */
+			       0x0000000c 0x10001000 0x00001000>;	/* Registers */
+			dcr-reg = <0x120 0x020>;
+			sdr-base = <0x340>;
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x0000000e 0x80000000 0x00000000 0x80000000
+				  0x01000000 0x00000000 0x00000000 0x0000000f 0x80010000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* This drives busses 10 to 0x1f */
+			bus-range = <0x20 0x2f>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &UIC3 0x4 0x4 /* swizzled int A */
+				0x0 0x0 0x0 0x2 &UIC3 0x5 0x4 /* swizzled int B */
+				0x0 0x0 0x0 0x3 &UIC3 0x6 0x4 /* swizzled int C */
+				0x0 0x0 0x0 0x4 &UIC3 0x7 0x4 /* swizzled int D */>;
+		};
+
+		PCIE2: pcie@d40000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb-pciex-460sx", "ibm,plb-pciex";
+			primary;
+			port = <0x2>; /* port number */
+			reg = <0x0000000d 0x40000000 0x20000000	/* Config space access */
+			       0x0000000c 0x10002000 0x00001000>;	/* Registers */
+			dcr-reg = <0x140 0x020>;
+			sdr-base = <0x370>;
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x0000000f 0x00000000 0x00000000 0x80000000
+				  0x01000000 0x00000000 0x00000000 0x0000000f 0x80020000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* This drives busses 10 to 0x1f */
+			bus-range = <0x30 0x3f>;
+
+			/* Legacy interrupts (note the weird polarity, the bridge seems
+			 * to invert PCIe legacy interrupts).
+			 * We are de-swizzling here because the numbers are actually for
+			 * port of the root complex virtual P2P bridge. But I want
+			 * to avoid putting a node for it in the tree, so the numbers
+			 * below are basically de-swizzled numbers.
+			 * The real slot is on idsel 0, so the swizzling is 1:1
+			 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+			interrupt-map = <
+				0x0 0x0 0x0 0x1 &UIC3 0x8 0x4 /* swizzled int A */
+				0x0 0x0 0x0 0x2 &UIC3 0x9 0x4 /* swizzled int B */
+				0x0 0x0 0x0 0x3 &UIC3 0xa 0x4 /* swizzled int C */
+				0x0 0x0 0x0 0x4 &UIC3 0xb 0x4 /* swizzled int D */>;
+		};
+
+	};
+
+
+	chosen {
+		stdout-path = "/plb/opb/serial@ef600200";
+	};
+
+};
diff --git a/arch/powerpc/boot/dts/sam440ep.dts b/arch/powerpc/boot/dts/sam440ep.dts
new file mode 100644
index 0000000000..7d15f18e11
--- /dev/null
+++ b/arch/powerpc/boot/dts/sam440ep.dts
@@ -0,0 +1,293 @@
+/*
+ * Device Tree Source for ACube Sam440ep  based off bamboo.dts code 
+ * original copyrights below 
+ *
+ * Copyright (c) 2006, 2007 IBM Corp.
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ *
+ * Modified from bamboo.dts for sam440ep:
+ * Copyright 2008 Giuseppe Coviello <gicoviello@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "acube,sam440ep";
+	compatible = "acube,sam440ep";
+
+	aliases {
+		ethernet0 = &EMAC0;
+		ethernet1 = &EMAC1;
+		serial0 = &UART0;
+		serial1 = &UART1;
+		serial2 = &UART2;
+		serial3 = &UART3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,440EP";
+			reg = <0>;
+			clock-frequency = <0>; /* Filled in by zImage */
+			timebase-frequency = <0>; /* Filled in by zImage */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0 0 0>; /* Filled in by zImage */
+	};
+
+	UIC0: interrupt-controller0 {
+		compatible = "ibm,uic-440ep","ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 9>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-440ep","ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 9>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 4 0x1f 4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	SDR0: sdr {
+		compatible = "ibm,sdr-440ep";
+		dcr-reg = <0x00e 2>;
+	};
+
+	CPR0: cpr {
+		compatible = "ibm,cpr-440ep";
+		dcr-reg = <0x00c 2>;
+	};
+
+	plb {
+		compatible = "ibm,plb-440ep", "ibm,plb-440gp", "ibm,plb4";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by zImage */
+
+		SDRAM0: sdram {
+			compatible = "ibm,sdram-440ep", "ibm,sdram-405gp";
+			dcr-reg = <0x010 2>;
+		};
+
+		DMA0: dma {
+			compatible = "ibm,dma-440ep", "ibm,dma-440gp";
+			dcr-reg = <0x100 0x027>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-440ep", "ibm,mcmal-440gp", "ibm,mcmal";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <4>;
+			num-rx-chans = <2>;
+			interrupt-parent = <&MAL0>;
+			interrupts = <0 1 2 3 4>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = </*TXEOB*/ 0 &UIC0 10 4
+					/*RXEOB*/ 1 &UIC0 11 4
+					/*SERR*/  2 &UIC1 0 4
+					/*TXDE*/  3 &UIC1 1 4
+					/*RXDE*/  4 &UIC1 2 4>;
+		};
+
+		POB0: opb {
+		  	compatible = "ibm,opb-440ep", "ibm,opb-440gp", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			/* Bamboo is oddball in the 44x world and doesn't use the ERPN
+			 * bits.
+			 */
+		  	ranges = <0x00000000 0 0x00000000 0x80000000
+			          0x80000000 0 0x80000000 0x80000000>;
+		  	interrupt-parent = <&UIC1>;
+		  	interrupts = <7 4>;
+		  	clock-frequency = <0>; /* Filled in by zImage */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-440ep", "ibm,ebc-440gp", "ibm,ebc";
+				dcr-reg = <0x012 2>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by zImage */
+				interrupts = <5 1>;
+				interrupt-parent = <&UIC1>;
+			};
+
+			UART0: serial@ef600300 {
+		   		device_type = "serial";
+		   		compatible = "ns16550";
+		   		reg = <0xef600300 8>;
+		   		virtual-reg = <0xef600300>;
+		   		clock-frequency = <0>; /* Filled in by zImage */
+		   		current-speed = <0x1c200>;
+		   		interrupt-parent = <&UIC0>;
+		   		interrupts = <0 4>;
+	   		};
+
+			UART1: serial@ef600400 {
+		   		device_type = "serial";
+		   		compatible = "ns16550";
+		   		reg = <0xef600400 8>;
+		   		virtual-reg = <0xef600400>;
+		   		clock-frequency = <0>;
+		   		current-speed = <0>;
+		   		interrupt-parent = <&UIC0>;
+		   		interrupts = <1 4>;
+	   		};
+
+			UART2: serial@ef600500 {
+		   		device_type = "serial";
+		   		compatible = "ns16550";
+		   		reg = <0xef600500 8>;
+		   		virtual-reg = <0xef600500>;
+		   		clock-frequency = <0>;
+		   		current-speed = <0>;
+		   		interrupt-parent = <&UIC0>;
+		   		interrupts = <3 4>;
+	   		};
+
+			UART3: serial@ef600600 {
+		   		device_type = "serial";
+		   		compatible = "ns16550";
+		   		reg = <0xef600600 8>;
+		   		virtual-reg = <0xef600600>;
+		   		clock-frequency = <0>;
+		   		current-speed = <0>;
+		   		interrupt-parent = <&UIC0>;
+		   		interrupts = <4 4>;
+	   		};
+
+			IIC0: i2c@ef600700 {
+                                #address-cells = <1>;
+                                #size-cells = <0>;
+				compatible = "ibm,iic-440ep", "ibm,iic-440gp", "ibm,iic";
+				index = <0>;
+				reg = <0xef600700 0x14>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <2 4>;
+				rtc@68 {
+					compatible = "st,m41t80";
+					reg = <0x68>;
+				};
+			};
+
+			IIC1: i2c@ef600800 {
+				compatible = "ibm,iic-440ep", "ibm,iic-440gp", "ibm,iic";
+				index = <5>;
+				reg = <0xef600800 0x14>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <7 4>;
+			};
+
+			ZMII0: emac-zmii@ef600d00 {
+				compatible = "ibm,zmii-440ep", "ibm,zmii-440gp", "ibm,zmii";
+				reg = <0xef600d00 0xc>;
+			};
+
+			EMAC0: ethernet@ef600e00 {
+				linux,network-index = <0>;
+				device_type = "network";
+				compatible = "ibm,emac-440ep", "ibm,emac-440gp", "ibm,emac";
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x1c 4 0x1d 4>;
+				reg = <0xef600e00 0x70>;
+				local-mac-address = [000000000000];
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0 1>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <0x5dc>;
+				rx-fifo-size = <0x1000>;
+				tx-fifo-size = <0x800>;
+				phy-mode = "rmii";
+				phy-map = <00000000>;
+				zmii-device = <&ZMII0>;
+				zmii-channel = <0>;
+			};
+
+			EMAC1: ethernet@ef600f00 {
+				linux,network-index = <1>;
+				device_type = "network";
+				compatible = "ibm,emac-440ep", "ibm,emac-440gp", "ibm,emac";
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x1e 4 0x1f 4>;
+				reg = <0xef600f00 0x70>;
+				local-mac-address = [000000000000];
+				mal-device = <&MAL0>;
+				mal-tx-channel = <2 3>;
+				mal-rx-channel = <1>;
+				cell-index = <1>;
+				max-frame-size = <0x5dc>;
+				rx-fifo-size = <0x1000>;
+				tx-fifo-size = <0x800>;
+				phy-mode = "rmii";
+				phy-map = <00000000>;
+				zmii-device = <&ZMII0>;
+				zmii-channel = <1>;
+			};
+			usb@ef601000 {
+				compatible = "ohci-be";
+				reg = <0xef601000 0x80>;
+				interrupts = <8 4 9 4>;
+				interrupt-parent = <&UIC1>;
+			};	
+		};
+
+		PCI0: pci@ec000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb440ep-pci", "ibm,plb-pci";
+			primary;
+			reg = <0 0xeec00000 8	   /* Config space access */
+			       0 0xeed00000 4	   /* IACK */
+			       0 0xeed00000 4	   /* Special cycle */
+			       0 0xef400000 0x40>; /* Internal registers */
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed. Chip supports a second
+			 * IO range but we don't use it for now
+			 */
+			ranges = <0x02000000 0 0xa0000000 0 0xa0000000 0 0x20000000
+				  0x01000000 0 0x00000000 0 0xe8000000 0 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0 0 0 0 0 0x80000000>;
+		};
+	};
+
+	chosen {
+		stdout-path = "/plb/opb/serial@ef600300";
+	};
+};
diff --git a/arch/powerpc/boot/dts/sequoia.dts b/arch/powerpc/boot/dts/sequoia.dts
new file mode 100644
index 0000000000..60d211da95
--- /dev/null
+++ b/arch/powerpc/boot/dts/sequoia.dts
@@ -0,0 +1,412 @@
+/*
+ * Device Tree Source for AMCC Sequoia
+ *
+ * Based on Bamboo code by Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ * Copyright (c) 2006, 2007 IBM Corp.
+ *
+ * FIXME: Draft only!
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ *
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "amcc,sequoia";
+	compatible = "amcc,sequoia";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		ethernet1 = &EMAC1;
+		serial0 = &UART0;
+		serial1 = &UART1;
+		serial2 = &UART2;
+		serial3 = &UART3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,440EPx";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by zImage */
+			timebase-frequency = <0>; /* Filled in by zImage */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by zImage */
+	};
+
+	UIC0: interrupt-controller0 {
+		compatible = "ibm,uic-440epx","ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-440epx","ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC2: interrupt-controller2 {
+		compatible = "ibm,uic-440epx","ibm,uic";
+		interrupt-controller;
+		cell-index = <2>;
+		dcr-reg = <0x0e0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1c 0x4 0x1d 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	SDR0: sdr {
+		compatible = "ibm,sdr-440epx", "ibm,sdr-440ep";
+		dcr-reg = <0x00e 0x002>;
+	};
+
+	CPR0: cpr {
+		compatible = "ibm,cpr-440epx", "ibm,cpr-440ep";
+		dcr-reg = <0x00c 0x002>;
+	};
+
+	plb {
+		compatible = "ibm,plb-440epx", "ibm,plb4";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by zImage */
+
+		SDRAM0: sdram {
+			compatible = "ibm,sdram-440epx", "ibm,sdram-44x-ddr2denali";
+			dcr-reg = <0x010 0x002>;
+		};
+
+		CRYPTO: crypto@e0100000 {
+			compatible = "amcc,ppc440epx-crypto","amcc,ppc4xx-crypto";
+			reg = <0 0xE0100000 0x80400>;
+			interrupt-parent = <&UIC0>;
+			interrupts = <0x17 0x4>;
+		};
+
+		rng@e0120000 {
+			compatible = "amcc,ppc440epx-rng","amcc,ppc4xx-rng";
+			reg = <0 0xE0120000 0x150>;
+		};
+
+		DMA0: dma {
+			compatible = "ibm,dma-440epx", "ibm,dma-4xx";
+			dcr-reg = <0x100 0x027>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-440epx", "ibm,mcmal2";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <2>;
+			num-rx-chans = <2>;
+			interrupt-parent = <&MAL0>;
+			interrupts = <0x0 0x1 0x2 0x3 0x4>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = </*TXEOB*/ 0x0 &UIC0 0xa 0x4
+					/*RXEOB*/ 0x1 &UIC0 0xb 0x4
+					/*SERR*/  0x2 &UIC1 0x0 0x4
+					/*TXDE*/  0x3 &UIC1 0x1 0x4
+					/*RXDE*/  0x4 &UIC1 0x2 0x4>;
+			interrupt-map-mask = <0xffffffff>;
+		};
+
+		USB1: usb@e0000400 {
+			compatible = "ibm,usb-ohci-440epx", "ohci-be";
+			reg = <0x00000000 0xe0000400 0x00000060>;
+			interrupt-parent = <&UIC0>;
+			interrupts = <0x15 0x8>;
+		};
+
+		USB0: ehci@e0000300 {
+			compatible = "ibm,usb-ehci-440epx", "usb-ehci";
+			interrupt-parent = <&UIC0>;
+			interrupts = <0x1a 0x4>;
+			reg = <0x00000000 0xe0000300 0x00000090 0x00000000 0xe0000390 0x00000070>;
+			big-endian;
+		};
+
+		POB0: opb {
+		  	compatible = "ibm,opb-440epx", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+		  	ranges = <0x00000000 0x00000001 0x00000000 0x80000000
+			          0x80000000 0x00000001 0x80000000 0x80000000>;
+		  	interrupt-parent = <&UIC1>;
+		  	interrupts = <0x7 0x4>;
+		  	clock-frequency = <0>; /* Filled in by zImage */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-440epx", "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by zImage */
+				interrupts = <0x5 0x1>;
+				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "amd,s29gl256n", "cfi-flash";
+					bank-width = <2>;
+					reg = <0x00000000 0x00000000 0x04000000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "Kernel";
+						reg = <0x00000000 0x00180000>;
+					};
+					partition@180000 {
+						label = "ramdisk";
+						reg = <0x00180000 0x00200000>;
+					};
+					partition@380000 {
+						label = "file system";
+						reg = <0x00380000 0x03aa0000>;
+					};
+					partition@3e20000 {
+						label = "kozio";
+						reg = <0x03e20000 0x00140000>;
+					};
+					partition@3f60000 {
+						label = "env";
+						reg = <0x03f60000 0x00040000>;
+					};
+					partition@3fa0000 {
+						label = "u-boot";
+						reg = <0x03fa0000 0x00060000>;
+					};
+				};
+
+				ndfc@3,0 {
+					compatible = "ibm,ndfc";
+					reg = <0x00000003 0x00000000 0x00002000>;
+					ccr = <0x00001000>;
+					bank-settings = <0x80002222>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+
+					nand {
+						#address-cells = <1>;
+						#size-cells = <1>;
+
+						partition@0 {
+							label = "u-boot";
+							reg = <0x00000000 0x00084000>;
+						};
+						partition@84000 {
+							label = "user";
+							reg = <0x00084000 0x01f7c000>;
+						};
+					};
+				};
+			};
+
+			UART0: serial@ef600300 {
+		   		device_type = "serial";
+		   		compatible = "ns16550";
+		   		reg = <0xef600300 0x00000008>;
+		   		virtual-reg = <0xef600300>;
+		   		clock-frequency = <0>; /* Filled in by zImage */
+		   		current-speed = <115200>;
+		   		interrupt-parent = <&UIC0>;
+		   		interrupts = <0x0 0x4>;
+	   		};
+
+			UART1: serial@ef600400 {
+		   		device_type = "serial";
+		   		compatible = "ns16550";
+		   		reg = <0xef600400 0x00000008>;
+		   		virtual-reg = <0xef600400>;
+		   		clock-frequency = <0>;
+		   		current-speed = <0>;
+		   		interrupt-parent = <&UIC0>;
+		   		interrupts = <0x1 0x4>;
+	   		};
+
+			UART2: serial@ef600500 {
+		   		device_type = "serial";
+		   		compatible = "ns16550";
+		   		reg = <0xef600500 0x00000008>;
+		   		virtual-reg = <0xef600500>;
+		   		clock-frequency = <0>;
+		   		current-speed = <0>;
+		   		interrupt-parent = <&UIC1>;
+		   		interrupts = <0x3 0x4>;
+	   		};
+
+			UART3: serial@ef600600 {
+		   		device_type = "serial";
+		   		compatible = "ns16550";
+		   		reg = <0xef600600 0x00000008>;
+		   		virtual-reg = <0xef600600>;
+		   		clock-frequency = <0>;
+		   		current-speed = <0>;
+		   		interrupt-parent = <&UIC1>;
+		   		interrupts = <0x4 0x4>;
+	   		};
+
+			IIC0: i2c@ef600700 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "ibm,iic-440epx", "ibm,iic";
+				reg = <0xef600700 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+
+				hwmon@48 {
+					compatible = "adi,ad7414";
+					reg = <0x48>;
+				};
+			};
+
+			IIC1: i2c@ef600800 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "ibm,iic-440epx", "ibm,iic";
+				reg = <0xef600800 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x7 0x4>;
+			};
+
+			ZMII0: emac-zmii@ef600d00 {
+				compatible = "ibm,zmii-440epx", "ibm,zmii";
+				reg = <0xef600d00 0x0000000c>;
+			};
+
+			RGMII0: emac-rgmii@ef601000 {
+				compatible = "ibm,rgmii-440epx", "ibm,rgmii";
+				reg = <0xef601000 0x00000008>;
+				has-mdio;
+			};
+
+			EMAC0: ethernet@ef600e00 {
+				device_type = "network";
+				compatible = "ibm,emac-440epx", "ibm,emac4";
+				interrupt-parent = <&EMAC0>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC0 0x18 0x4
+						/*Wake*/  0x1 &UIC1 0x1d 0x4>;
+				reg = <0xef600e00 0x00000074>;
+				local-mac-address = [000000000000];
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				zmii-device = <&ZMII0>;
+				zmii-channel = <0>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+
+			EMAC1: ethernet@ef600f00 {
+				device_type = "network";
+				compatible = "ibm,emac-440epx", "ibm,emac4";
+				interrupt-parent = <&EMAC1>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC0 0x19 0x4
+						/*Wake*/  0x1 &UIC1 0x1f 0x4>;
+				reg = <0xef600f00 0x00000074>;
+				local-mac-address = [000000000000];
+				mal-device = <&MAL0>;
+				mal-tx-channel = <1>;
+				mal-rx-channel = <1>;
+				cell-index = <1>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				zmii-device = <&ZMII0>;
+				zmii-channel = <1>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <1>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+		};
+
+		PCI0: pci@1ec000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb440epx-pci", "ibm,plb-pci";
+			primary;
+			reg = <0x00000001 0xeec00000 0x00000008	/* Config space access */
+			       0x00000001 0xeed00000 0x00000004	/* IACK */
+			       0x00000001 0xeed00000 0x00000004	/* Special cycle */
+			       0x00000001 0xef400000 0x00000040>;	/* Internal registers */
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed. Chip supports a second
+			 * IO range but we don't use it for now
+			 * From the 440EPx user manual:
+			 * PCI 1 Memory     1 8000 0000     1 BFFF FFFF     1GB
+			 * I/O              1 E800 0000     1 E800 FFFF     64KB
+			 * I/O              1 E880 0000     1 EBFF FFFF     56MB
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x00000001 0x80000000 0x00000000 0x40000000
+				0x01000000 0x00000000 0x00000000 0x00000001 0xe8000000 0x00000000 0x00010000
+				0x01000000 0x00000000 0x00000000 0x00000001 0xe8800000 0x00000000 0x03800000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+
+			/* All PCI interrupts are routed to IRQ 67 */
+			interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+			interrupt-map = < 0x0 0x0 0x0 0x0 &UIC2 0x3 0x8 >;
+		};
+	};
+
+	chosen {
+		stdout-path = "/plb/opb/serial@ef600300";
+		bootargs = "console=ttyS0,115200";
+	};
+};
diff --git a/arch/powerpc/boot/dts/socrates.dts b/arch/powerpc/boot/dts/socrates.dts
new file mode 100644
index 0000000000..00a56e8e36
--- /dev/null
+++ b/arch/powerpc/boot/dts/socrates.dts
@@ -0,0 +1,348 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Device Tree Source for the Socrates board (MPC8544).
+ *
+ * Copyright (c) 2008 Emcraft Systems.
+ * Sergei Poselenov, <sposelenov@emcraft.com>
+ */
+
+/dts-v1/;
+
+/ {
+	model = "abb,socrates";
+	compatible = "abb,socrates";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8544@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <0x8000>;	// L1, 32K
+			i-cache-size = <0x8000>;	// L1, 32K
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000>;	// Filled in by U-Boot
+	};
+
+	soc8544@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+
+		ranges = <0x00000000 0xe0000000 0x00100000>;
+		bus-frequency = <0>;		// Filled in by U-Boot
+		compatible = "fsl,mpc8544-immr", "simple-bus";
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <10>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,mpc8544-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,mpc8544-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,mpc8544-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;
+			cache-size = <0x40000>;	// L2, 256K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl,mpc8544-i2c", "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			fsl,preserve-clocking;
+
+			dtt@28 {
+				compatible = "winbond,w83782d";
+				reg = <0x28>;
+			};
+			rtc@32 {
+				compatible = "epson,rx8025";
+				reg = <0x32>;
+				interrupts = <7 1>;
+				interrupt-parent = <&mpic>;
+			};
+			dtt@4c {
+				compatible = "dallas,ds75";
+				reg = <0x4c>;
+			};
+			ts@4a {
+				compatible = "ti,tsc2003";
+				reg = <0x4a>;
+				interrupt-parent = <&mpic>;
+				interrupts = <8 1>;
+			};
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl,mpc8544-i2c", "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			fsl,preserve-clocking;
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+			phy-handle = <&phy0>;
+			tbi-handle = <&tbi0>;
+			phy-connection-type = "rgmii-id";
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy0: ethernet-phy@0 {
+					interrupt-parent = <&mpic>;
+					interrupts = <0 1>;
+					reg = <0>;
+				};
+				phy1: ethernet-phy@1 {
+					interrupt-parent = <&mpic>;
+					interrupts = <0 1>;
+					reg = <1>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+				};
+			};
+		};
+
+		enet1: ethernet@26000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x26000 0x1000>;
+			ranges = <0x0 0x26000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <31 2 32 2 33 2>;
+			interrupt-parent = <&mpic>;
+			phy-handle = <&phy1>;
+			tbi-handle = <&tbi1>;
+			phy-connection-type = "rgmii-id";
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		global-utilities@e0000 {	//global utilities block
+			compatible = "fsl,mpc8548-guts";
+			reg = <0xe0000 0x1000>;
+			fsl,has-rstcr;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+	};
+
+
+	localbus {
+		compatible = "fsl,mpc8544-localbus",
+		             "fsl,pq3-localbus",
+			     "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		reg = <0xe0005000 0x40>;
+		interrupt-parent = <&mpic>;
+		interrupts = <19 2>;
+
+		ranges = <0 0 0xfc000000 0x04000000
+			  2 0 0xc8000000 0x04000000
+			  3 0 0xc0000000 0x00100000
+			>; /* Overwritten by U-Boot */
+
+		nor_flash@0,0 {
+			compatible = "amd,s29gl256n", "cfi-flash";
+			bank-width = <2>;
+			reg = <0x0 0x000000 0x4000000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			partition@0 {
+				label = "kernel";
+				reg = <0x0 0x1e0000>;
+				read-only;
+			};
+			partition@1e0000 {
+				label = "dtb";
+				reg = <0x1e0000 0x20000>;
+			};
+			partition@200000 {
+				label = "root";
+				reg = <0x200000 0x200000>;
+			};
+			partition@400000 {
+				label = "user";
+				reg = <0x400000 0x3b80000>;
+			};
+			partition@3f80000 {
+				label = "env";
+				reg = <0x3f80000 0x40000>;
+				read-only;
+			};
+			partition@3fc0000 {
+				label = "u-boot";
+				reg = <0x3fc0000 0x40000>;
+				read-only;
+			};
+		};
+
+		display@2,0 {
+			compatible = "fujitsu,lime";
+			reg = <2 0x0 0x4000000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <6 1>;
+		};
+
+		fpga_pic: fpga-pic@3,10 {
+			compatible = "abb,socrates-fpga-pic";
+			reg = <3 0x10 0x10>;
+			interrupt-controller;
+			/* IRQs 2, 10, 11, active low, level-sensitive */
+			interrupts = <2 1 10 1 11 1>;
+			interrupt-parent = <&mpic>;
+			#interrupt-cells = <3>;
+		};
+
+		spi@3,60 {
+			compatible = "abb,socrates-spi";
+			reg = <3 0x60 0x10>;
+			interrupts = <8 4 0>;	// number, type, routing
+			interrupt-parent = <&fpga_pic>;
+		};
+
+		nand@3,70 {
+			compatible = "abb,socrates-nand";
+			reg = <3 0x70 0x04>;
+			bank-width = <1>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			data@0 {
+				label = "data";
+				reg = <0x0 0x40000000>;
+			};
+		};
+
+		can@3,100 {
+			compatible = "philips,sja1000";
+			reg = <3 0x100 0x80>;
+			interrupts = <2 8 1>;	// number, type, routing
+			interrupt-parent = <&fpga_pic>;
+		};
+	};
+
+	pci0: pci@e0008000 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		compatible = "fsl,mpc8540-pci";
+		device_type = "pci";
+		reg = <0xe0008000 0x1000>;
+		clock-frequency = <66666666>;
+
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+				/* IDSEL 0x11 */
+				 0x8800 0x0 0x0 1 &mpic 5 1
+				/* IDSEL 0x12 */
+				 0x9000 0x0 0x0 1 &mpic 4 1>;
+		interrupt-parent = <&mpic>;
+		interrupts = <24 2>;
+		bus-range = <0x0 0x0>;
+		ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x20000000
+			  0x01000000 0x0 0x00000000 0xe2000000 0x0 0x01000000>;
+	};
+
+};
diff --git a/arch/powerpc/boot/dts/storcenter.dts b/arch/powerpc/boot/dts/storcenter.dts
new file mode 100644
index 0000000000..99f6f544dc
--- /dev/null
+++ b/arch/powerpc/boot/dts/storcenter.dts
@@ -0,0 +1,142 @@
+/*
+ * Device Tree Source for IOMEGA StorCenter
+ *
+ * Copyright 2007 Oyvind Repvik
+ * Copyright 2007 Jon Loeliger
+ *
+ * Based on the Kurobox DTS by G. Liakhovetski <g.liakhovetski@gmx.de>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	model = "StorCenter";
+	compatible = "iomega,storcenter";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8241@0 {
+			device_type = "cpu";
+			reg = <0>;
+			clock-frequency = <200000000>;
+			timebase-frequency = <25000000>;
+			bus-frequency = <0>;	/* from bootwrapper */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <16384>;
+			d-cache-size = <16384>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x04000000>;	/* 64MB @ 0x0 */
+	};
+
+	soc@fc000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,mpc8241", "mpc10x";
+		store-gathering = <0>; /* 0 == off, !0 == on */
+		ranges = <0x0 0xfc000000 0x100000>;
+		reg = <0xfc000000 0x100000>;	/* EUMB */
+		bus-frequency = <0>;		/* fixed by loader */
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+
+			rtc@68 {
+				compatible = "dallas,ds1337";
+				reg = <0x68>;
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x20>;
+			clock-frequency = <97553800>; /* Hz */
+			current-speed = <115200>;
+			interrupts = <25 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x20>;
+			clock-frequency = <97553800>; /* Hz */
+			current-speed = <9600>;
+			interrupts = <26 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		mpic: interrupt-controller@40000 {
+			#interrupt-cells = <2>;
+			#address-cells = <0>;
+			device_type = "open-pic";
+			compatible = "chrp,open-pic";
+			interrupt-controller;
+			reg = <0x40000 0x40000>;
+		};
+
+	};
+
+	pci0: pci@fe800000 {
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		compatible = "mpc10x-pci";
+		reg = <0xfe800000 0x1000>;
+		ranges = <0x01000000 0x0        0x0 0xfe000000 0x0 0x00c00000
+			  0x02000000 0x0 0x80000000 0x80000000 0x0 0x70000000>;
+		bus-range = <0 0xff>;
+		clock-frequency = <97553800>;
+		interrupt-parent = <&mpic>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 13 - IDE */
+			0x6800 0 0 1 &mpic 0 1
+			0x6800 0 0 2 &mpic 0 1
+			0x6800 0 0 3 &mpic 0 1
+			0x6800 0 0 4 &mpic 0 1
+			/* IDSEL 14 - USB */
+			0x7000 0 0 1 &mpic 0 1
+			0x7000 0 0 2 &mpic 0 1
+			0x7000 0 0 3 &mpic 0 1
+			0x7000 0 0 4 &mpic 0 1
+			/* IDSEL 15 - ETH */
+			0x7800 0 0 1 &mpic 0 1
+			0x7800 0 0 2 &mpic 0 1
+			0x7800 0 0 3 &mpic 0 1
+			0x7800 0 0 4 &mpic 0 1
+		>;
+	};
+
+	chosen {
+		stdout-path = &serial0;
+	};
+};
diff --git a/arch/powerpc/boot/dts/stx_gp3_8560.dts b/arch/powerpc/boot/dts/stx_gp3_8560.dts
new file mode 100644
index 0000000000..e73f7e75b0
--- /dev/null
+++ b/arch/powerpc/boot/dts/stx_gp3_8560.dts
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * STX GP3 - 8560 ADS Device Tree Source
+ *
+ * Copyright 2008 Freescale Semiconductor Inc.
+ */
+
+/dts-v1/;
+
+/include/ "fsl/e500v1_power_isa.dtsi"
+
+/ {
+	model = "stx,gp3";
+	compatible = "stx,gp3-8560", "stx,gp3";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8560@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <32768>;
+			i-cache-size = <32768>;
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>;
+	};
+
+	soc@fdf00000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		ranges = <0 0xfdf00000 0x100000>;
+		bus-frequency = <0>;
+		compatible = "fsl,mpc8560-immr", "simple-bus";
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <8>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,mpc8560-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,mpc8540-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,mpc8540-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;
+			cache-size = <0x40000>;	// L2, 256K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8560-dma", "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8560-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8560-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8560-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8560-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy2>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy2: ethernet-phy@2 {
+					interrupt-parent = <&mpic>;
+					interrupts = <5 4>;
+					reg = <2>;
+				};
+				phy4: ethernet-phy@4 {
+					interrupt-parent = <&mpic>;
+					interrupts = <5 4>;
+					reg = <4>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
+			phy-handle = <&phy4>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+
+		cpm@919c0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8560-cpm", "fsl,cpm2", "simple-bus";
+			reg = <0x919c0 0x30>;
+			ranges;
+
+			muram@80000 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges = <0 0x80000 0x10000>;
+
+				data@0 {
+					compatible = "fsl,cpm-muram-data";
+					reg = <0 0x4000 0x9000 0x2000>;
+				};
+			};
+
+			brg@919f0 {
+				compatible = "fsl,mpc8560-brg",
+				             "fsl,cpm2-brg",
+				             "fsl,cpm-brg";
+				reg = <0x919f0 0x10 0x915f0 0x10>;
+				clock-frequency = <0>;
+			};
+
+			cpmpic: pic@90c00 {
+				interrupt-controller;
+				#address-cells = <0>;
+				#interrupt-cells = <2>;
+				interrupts = <46 2>;
+				interrupt-parent = <&mpic>;
+				reg = <0x90c00 0x80>;
+				compatible = "fsl,mpc8560-cpm-pic", "fsl,cpm2-pic";
+			};
+
+			serial0: serial@91a20 {
+				device_type = "serial";
+				compatible = "fsl,mpc8560-scc-uart",
+				             "fsl,cpm2-scc-uart";
+				reg = <0x91a20 0x20 0x88100 0x100>;
+				fsl,cpm-brg = <2>;
+				fsl,cpm-command = <0x4a00000>;
+				interrupts = <41 8>;
+				interrupt-parent = <&cpmpic>;
+			};
+		};
+	};
+
+	pci0: pci@fdf08000 {
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+
+			/* IDSEL 0x0c */
+			0x6000 0 0 1 &mpic 1 1
+			0x6000 0 0 2 &mpic 2 1
+			0x6000 0 0 3 &mpic 3 1
+			0x6000 0 0 4 &mpic 4 1
+
+			/* IDSEL 0x0d */
+			0x6800 0 0 1 &mpic 4 1
+			0x6800 0 0 2 &mpic 1 1
+			0x6800 0 0 3 &mpic 2 1
+			0x6800 0 0 4 &mpic 3 1
+
+			/* IDSEL 0x0e */
+			0x7000 0 0 1 &mpic 3 1
+			0x7000 0 0 2 &mpic 4 1
+			0x7000 0 0 3 &mpic 1 1
+			0x7000 0 0 4 &mpic 2 1
+
+			/* IDSEL 0x0f */
+			0x7800 0 0 1 &mpic 2 1
+			0x7800 0 0 2 &mpic 3 1
+			0x7800 0 0 3 &mpic 4 1
+			0x7800 0 0 4 &mpic 1 1>;
+
+		interrupt-parent = <&mpic>;
+		interrupts = <24 2>;
+		bus-range = <0 0>;
+		ranges = <0x02000000 0 0x80000000 0x80000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xe2000000 0 0x00100000>;
+		clock-frequency = <66666666>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xfdf08000 0x1000>;
+		compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci";
+		device_type = "pci";
+	};
+};
diff --git a/arch/powerpc/boot/dts/stxssa8555.dts b/arch/powerpc/boot/dts/stxssa8555.dts
new file mode 100644
index 0000000000..96add25c90
--- /dev/null
+++ b/arch/powerpc/boot/dts/stxssa8555.dts
@@ -0,0 +1,376 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8555-based STx GP3 Device Tree Source
+ *
+ * Copyright 2006, 2008 Freescale Semiconductor Inc.
+ *
+ * Copyright 2010 Silicon Turnkey Express LLC.
+ */
+
+/dts-v1/;
+
+/include/ "fsl/e500v1_power_isa.dtsi"
+
+/ {
+	model = "stx,gp3";
+        compatible = "stx,gp3-8560", "stx,gp3";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8555@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;	// 32 bytes
+			i-cache-line-size = <32>;	// 32 bytes
+			d-cache-size = <0x8000>;		// L1, 32K
+			i-cache-size = <0x8000>;		// L1, 32K
+			timebase-frequency = <0>;	//  33 MHz, from uboot
+			bus-frequency = <0>;	// 166 MHz
+			clock-frequency = <0>;	// 825 MHz, from uboot
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>;
+	};
+
+	soc8555@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "simple-bus";
+		ranges = <0x0 0xe0000000 0x100000>;
+		bus-frequency = <0>;
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <8>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,mpc8555-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,mpc8555-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,mpc8555-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x40000>;	// L2, 256K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8555-dma", "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8555-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8555-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8555-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8555-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy0>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy0: ethernet-phy@2 {
+					interrupt-parent = <&mpic>;
+					interrupts = <5 1>;
+					reg = <0x2>;
+				};
+				phy1: ethernet-phy@4 {
+					interrupt-parent = <&mpic>;
+					interrupts = <5 1>;
+					reg = <0x4>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
+			phy-handle = <&phy1>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>; 	// reg base, size
+			clock-frequency = <0>; 	// should we fill in in uboot?
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;	// reg base, size
+			clock-frequency = <0>; 	// should we fill in in uboot?
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <45 2>;
+			interrupt-parent = <&mpic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x7e>;
+			fsl,descriptor-types-mask = <0x01010ebf>;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+
+		cpm@919c0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8555-cpm", "fsl,cpm2";
+			reg = <0x919c0 0x30>;
+			ranges;
+
+			muram@80000 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges = <0x0 0x80000 0x10000>;
+
+				data@0 {
+					compatible = "fsl,cpm-muram-data";
+					reg = <0x0 0x2000 0x9000 0x1000>;
+				};
+			};
+
+			brg@919f0 {
+				compatible = "fsl,mpc8555-brg",
+				             "fsl,cpm2-brg",
+				             "fsl,cpm-brg";
+				reg = <0x919f0 0x10 0x915f0 0x10>;
+			};
+
+			cpmpic: pic@90c00 {
+				interrupt-controller;
+				#address-cells = <0>;
+				#interrupt-cells = <2>;
+				interrupts = <46 2>;
+				interrupt-parent = <&mpic>;
+				reg = <0x90c00 0x80>;
+				compatible = "fsl,mpc8555-cpm-pic", "fsl,cpm2-pic";
+			};
+		};
+	};
+
+	pci0: pci@e0008000 {
+		interrupt-map-mask = <0x1f800 0x0 0x0 0x7>;
+		interrupt-map = <
+
+			/* IDSEL 0x10 */
+			0x8000 0x0 0x0 0x1 &mpic 0x0 0x1
+			0x8000 0x0 0x0 0x2 &mpic 0x1 0x1
+			0x8000 0x0 0x0 0x3 &mpic 0x2 0x1
+			0x8000 0x0 0x0 0x4 &mpic 0x3 0x1
+
+			/* IDSEL 0x11 */
+			0x8800 0x0 0x0 0x1 &mpic 0x0 0x1
+			0x8800 0x0 0x0 0x2 &mpic 0x1 0x1
+			0x8800 0x0 0x0 0x3 &mpic 0x2 0x1
+			0x8800 0x0 0x0 0x4 &mpic 0x3 0x1
+
+			/* IDSEL 0x12 (Slot 1) */
+			0x9000 0x0 0x0 0x1 &mpic 0x0 0x1
+			0x9000 0x0 0x0 0x2 &mpic 0x1 0x1
+			0x9000 0x0 0x0 0x3 &mpic 0x2 0x1
+			0x9000 0x0 0x0 0x4 &mpic 0x3 0x1
+
+			/* IDSEL 0x13 (Slot 2) */
+			0x9800 0x0 0x0 0x1 &mpic 0x1 0x1
+			0x9800 0x0 0x0 0x2 &mpic 0x2 0x1
+			0x9800 0x0 0x0 0x3 &mpic 0x3 0x1
+			0x9800 0x0 0x0 0x4 &mpic 0x0 0x1
+
+			/* IDSEL 0x14 (Slot 3) */
+			0xa000 0x0 0x0 0x1 &mpic 0x2 0x1
+			0xa000 0x0 0x0 0x2 &mpic 0x3 0x1
+			0xa000 0x0 0x0 0x3 &mpic 0x0 0x1
+			0xa000 0x0 0x0 0x4 &mpic 0x1 0x1
+
+			/* IDSEL 0x15 (Slot 4) */
+			0xa800 0x0 0x0 0x1 &mpic 0x3 0x1
+			0xa800 0x0 0x0 0x2 &mpic 0x0 0x1
+			0xa800 0x0 0x0 0x3 &mpic 0x1 0x1
+			0xa800 0x0 0x0 0x4 &mpic 0x2 0x1
+
+			/* Bus 1 (Tundra Bridge) */
+			/* IDSEL 0x12 (ISA bridge) */
+			0x19000 0x0 0x0 0x1 &mpic 0x0 0x1
+			0x19000 0x0 0x0 0x2 &mpic 0x1 0x1
+			0x19000 0x0 0x0 0x3 &mpic 0x2 0x1
+			0x19000 0x0 0x0 0x4 &mpic 0x3 0x1>;
+		interrupt-parent = <&mpic>;
+		interrupts = <24 2>;
+		bus-range = <0 0>;
+		ranges = <0x2000000 0x0 0x80000000 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x0 0xe2000000 0x0 0x100000>;
+		clock-frequency = <66666666>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xe0008000 0x1000>;
+		compatible = "fsl,mpc8540-pci";
+		device_type = "pci";
+
+		i8259@19000 {
+			interrupt-controller;
+			device_type = "interrupt-controller";
+			reg = <0x19000 0x0 0x0 0x0 0x1>;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			compatible = "chrp,iic";
+			interrupts = <1>;
+			interrupt-parent = <&pci0>;
+		};
+	};
+
+	pci1: pci@e0009000 {
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+
+			/* IDSEL 0x15 */
+			0xa800 0x0 0x0 0x1 &mpic 0xb 0x1
+			0xa800 0x0 0x0 0x2 &mpic 0xb 0x1
+			0xa800 0x0 0x0 0x3 &mpic 0xb 0x1
+			0xa800 0x0 0x0 0x4 &mpic 0xb 0x1>;
+		interrupt-parent = <&mpic>;
+		interrupts = <25 2>;
+		bus-range = <0 0>;
+		ranges = <0x2000000 0x0 0xa0000000 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x0 0xe3000000 0x0 0x100000>;
+		clock-frequency = <66666666>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xe0009000 0x1000>;
+		compatible = "fsl,mpc8540-pci";
+		device_type = "pci";
+	};
+};
diff --git a/arch/powerpc/boot/dts/taishan.dts b/arch/powerpc/boot/dts/taishan.dts
new file mode 100644
index 0000000000..803f1bff7f
--- /dev/null
+++ b/arch/powerpc/boot/dts/taishan.dts
@@ -0,0 +1,427 @@
+/*
+ * Device Tree Source for IBM/AMCC Taishan
+ *
+ * Copyright 2007 IBM Corp.
+ * Hugh Blemings <hugh@au.ibm.com> based off code by
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>, David Gibson <dwg@au1.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "amcc,taishan";
+	compatible = "amcc,taishan";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC2;
+		ethernet1 = &EMAC3;
+		serial0 = &UART0;
+		serial1 = &UART1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,440GX";
+			reg = <0x00000000>;
+			clock-frequency = <800000000>; // 800MHz
+			timebase-frequency = <0>; // Filled in by zImage
+			i-cache-line-size = <50>;
+			d-cache-line-size = <50>;
+			i-cache-size = <32768>; /* 32 kB */
+			d-cache-size = <32768>; /* 32 kB */
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000>; // Filled in by zImage
+	};
+
+
+	UICB0: interrupt-controller-base {
+		compatible = "ibm,uic-440gx", "ibm,uic";
+		interrupt-controller;
+		cell-index = <3>;
+		dcr-reg = <0x200 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+
+	UIC0: interrupt-controller0 {
+		compatible = "ibm,uic-440gx", "ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1 0x4 0x0 0x4>; /* cascade - first non-critical */
+		interrupt-parent = <&UICB0>;
+
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-440gx", "ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x3 0x4 0x2 0x4>; /* cascade */
+		interrupt-parent = <&UICB0>;
+	};
+
+	UIC2: interrupt-controller2 {
+		compatible = "ibm,uic-440gx", "ibm,uic";
+		interrupt-controller;
+		cell-index = <2>; /* was 1 */
+		dcr-reg = <0x210 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x5 0x4 0x4 0x4>; /* cascade */
+		interrupt-parent = <&UICB0>;
+	};
+
+
+	CPC0: cpc {
+		compatible = "ibm,cpc-440gp";
+		dcr-reg = <0x0b0 0x003 0x0e0 0x010>;
+		// FIXME: anything else?
+	};
+
+	L2C0: l2c {
+		compatible = "ibm,l2-cache-440gx", "ibm,l2-cache";
+		dcr-reg = <0x020 0x008			/* Internal SRAM DCR's */
+			   0x030 0x008>;		/* L2 cache DCR's */
+		cache-line-size = <32>;		/* 32 bytes */
+		cache-size = <262144>;		/* L2, 256K */
+		interrupt-parent = <&UIC2>;
+		interrupts = <0x17 0x1>;
+	};
+
+	plb {
+		compatible = "ibm,plb-440gx", "ibm,plb4";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <160000000>; // 160MHz
+
+		SDRAM0: memory-controller {
+			compatible = "ibm,sdram-440gp";
+			dcr-reg = <0x010 0x002>;
+			// FIXME: anything else?
+		};
+
+		SRAM0: sram {
+			compatible = "ibm,sram-440gp";
+			dcr-reg = <0x020 0x008 0x00a 0x001>;
+		};
+
+		DMA0: dma {
+			// FIXME: ???
+			compatible = "ibm,dma-440gp";
+			dcr-reg = <0x100 0x027>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-440gx", "ibm,mcmal2";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <4>;
+			num-rx-chans = <4>;
+			interrupt-parent = <&MAL0>;
+			interrupts = <0x0 0x1 0x2 0x3 0x4>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = </*TXEOB*/ 0x0 &UIC0 0xa 0x4
+					 /*RXEOB*/ 0x1 &UIC0 0xb 0x4
+					 /*SERR*/  0x2 &UIC1 0x0 0x4
+					 /*TXDE*/  0x3 &UIC1 0x1 0x4
+					 /*RXDE*/  0x4 &UIC1 0x2 0x4>;
+			interrupt-map-mask = <0xffffffff>;
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb-440gx", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			/* Wish there was a nicer way of specifying a full 32-bit
+			   range */
+			ranges = <0x00000000 0x00000001 0x00000000 0x80000000
+				  0x80000000 0x00000001 0x80000000 0x80000000>;
+			dcr-reg = <0x090 0x00b>;
+			interrupt-parent = <&UIC1>;
+			interrupts = <0x7 0x4>;
+			clock-frequency = <80000000>; // 80MHz
+
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-440gx", "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <80000000>; // 80MHz
+
+				/* ranges property is supplied by zImage
+				 * based on firmware's configuration of the
+				 * EBC bridge */
+
+				interrupts = <0x5 0x4>;
+				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "cfi-flash";
+					bank-width = <4>;
+					device-width = <2>;
+					reg = <0x0 0x0 0x4000000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "kernel";
+						reg = <0x0 0x180000>;
+					};
+					partition@180000 {
+						label = "root";
+						reg = <0x180000 0x200000>;
+					};
+					partition@380000 {
+						label = "user";
+						reg = <0x380000 0x3bc0000>;
+					};
+					partition@3f40000 {
+						label = "env";
+						reg = <0x3f40000 0x80000>;
+					};
+					partition@3fc0000 {
+						label = "u-boot";
+						reg = <0x3fc0000 0x40000>;
+					};
+				};
+			};
+
+
+
+			UART0: serial@40000200 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0x40000200 0x00000008>;
+				virtual-reg = <0xe0000200>;
+ 				clock-frequency = <11059200>;
+				current-speed = <115200>; /* 115200 */
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x0 0x4>;
+			};
+
+			UART1: serial@40000300 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0x40000300 0x00000008>;
+				virtual-reg = <0xe0000300>;
+				clock-frequency = <11059200>;
+				current-speed = <115200>; /* 115200 */
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1 0x4>;
+			};
+
+			IIC0: i2c@40000400 {
+				/* FIXME */
+				compatible = "ibm,iic-440gp", "ibm,iic";
+				reg = <0x40000400 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+			};
+			IIC1: i2c@40000500 {
+				/* FIXME */
+				compatible = "ibm,iic-440gp", "ibm,iic";
+				reg = <0x40000500 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x3 0x4>;
+			};
+
+			GPIO0: gpio@40000700 {
+				/* FIXME */
+				compatible = "ibm,gpio-440gp";
+				reg = <0x40000700 0x00000020>;
+			};
+
+			ZMII0: emac-zmii@40000780 {
+				compatible = "ibm,zmii-440gx", "ibm,zmii";
+				reg = <0x40000780 0x0000000c>;
+			};
+
+			RGMII0: emac-rgmii@40000790 {
+				compatible = "ibm,rgmii";
+				reg = <0x40000790 0x00000008>;
+			};
+
+			TAH0: emac-tah@40000b50 {
+				compatible = "ibm,tah-440gx", "ibm,tah";
+				reg = <0x40000b50 0x00000030>;
+			};
+
+			TAH1: emac-tah@40000d50 {
+				compatible = "ibm,tah-440gx", "ibm,tah";
+				reg = <0x40000d50 0x00000030>;
+			};
+
+			EMAC0: ethernet@40000800 {
+				unused = <0x1>;
+				device_type = "network";
+				compatible = "ibm,emac-440gx", "ibm,emac4";
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x1c 0x4 0x1d 0x4>;
+				reg = <0x40000800 0x00000074>;
+				local-mac-address = [000000000000]; // Filled in by zImage
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <1500>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rmii";
+				phy-map = <0x00000001>;
+				zmii-device = <&ZMII0>;
+				zmii-channel = <0>;
+			};
+		 	EMAC1: ethernet@40000900 {
+				unused = <0x1>;
+				device_type = "network";
+				compatible = "ibm,emac-440gx", "ibm,emac4";
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x1e 0x4 0x1f 0x4>;
+				reg = <0x40000900 0x00000074>;
+				local-mac-address = [000000000000]; // Filled in by zImage
+				mal-device = <&MAL0>;
+				mal-tx-channel = <1>;
+				mal-rx-channel = <1>;
+				cell-index = <1>;
+				max-frame-size = <1500>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rmii";
+				phy-map = <0x00000001>;
+ 				zmii-device = <&ZMII0>;
+				zmii-channel = <1>;
+			};
+
+		 	EMAC2: ethernet@40000c00 {
+				device_type = "network";
+				compatible = "ibm,emac-440gx", "ibm,emac4";
+				interrupt-parent = <&UIC2>;
+				interrupts = <0x0 0x4 0x1 0x4>;
+				reg = <0x40000c00 0x00000074>;
+				local-mac-address = [000000000000]; // Filled in by zImage
+				mal-device = <&MAL0>;
+				mal-tx-channel = <2>;
+				mal-rx-channel = <2>;
+				cell-index = <2>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rgmii";
+				phy-address = <1>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <0>;
+ 				zmii-device = <&ZMII0>;
+				zmii-channel = <2>;
+				tah-device = <&TAH0>;
+				tah-channel = <0>;
+			};
+
+		 	EMAC3: ethernet@40000e00 {
+				device_type = "network";
+				compatible = "ibm,emac-440gx", "ibm,emac4";
+				interrupt-parent = <&UIC2>;
+				interrupts = <0x2 0x4 0x3 0x4>;
+				reg = <0x40000e00 0x00000074>;
+				local-mac-address = [000000000000]; // Filled in by zImage
+				mal-device = <&MAL0>;
+				mal-tx-channel = <3>;
+				mal-rx-channel = <3>;
+				cell-index = <3>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rgmii";
+				phy-address = <3>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <1>;
+ 				zmii-device = <&ZMII0>;
+				zmii-channel = <3>;
+				tah-device = <&TAH1>;
+				tah-channel = <0>;
+			};
+
+
+			GPT0: gpt@40000a00 {
+				/* FIXME */
+				reg = <0x40000a00 0x000000d4>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x12 0x4 0x13 0x4 0x14 0x4 0x15 0x4 0x16 0x4>;
+			};
+
+		};
+
+		PCIX0: pci@20ec00000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb440gp-pcix", "ibm,plb-pcix";
+			primary;
+			large-inbound-windows;
+			enable-msi-hole;
+			reg = <0x00000002 0x0ec00000   0x00000008	/* Config space access */
+			       0x00000000 0x00000000 0x00000000		/* no IACK cycles */
+			       0x00000002 0x0ed00000   0x00000004   /* Special cycles */
+			       0x00000002 0x0ec80000 0x00000100	/* Internal registers */
+			       0x00000002 0x0ec80100  0x000000fc>;	/* Internal messaging registers */
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed
+			 */
+			ranges = <0x02000000 0x00000000 0x80000000 0x00000003 0x80000000 0x00000000 0x80000000
+				  0x01000000 0x00000000 0x00000000 0x00000002 0x08000000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+
+			interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+			interrupt-map = <
+				/* IDSEL 1 */
+				0x800 0x0 0x0 0x1 &UIC0 0x17 0x8
+				0x800 0x0 0x0 0x2 &UIC0 0x18 0x8
+				0x800 0x0 0x0 0x3 &UIC0 0x19 0x8
+				0x800 0x0 0x0 0x4 &UIC0 0x1a 0x8
+
+				/* IDSEL 2 */
+				0x1000 0x0 0x0 0x1 &UIC0 0x18 0x8
+				0x1000 0x0 0x0 0x2 &UIC0 0x19 0x8
+				0x1000 0x0 0x0 0x3 &UIC0 0x1a 0x8
+				0x1000 0x0 0x0 0x4 &UIC0 0x17 0x8
+			>;
+		};
+	};
+
+	chosen {
+		stdout-path = "/plb/opb/serial@40000300";
+	};
+};
diff --git a/arch/powerpc/boot/dts/tqm5200.dts b/arch/powerpc/boot/dts/tqm5200.dts
new file mode 100644
index 0000000000..372177b19e
--- /dev/null
+++ b/arch/powerpc/boot/dts/tqm5200.dts
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * TQM5200 board Device Tree Source
+ *
+ * Copyright (C) 2007 Semihalf
+ * Marian Balakowicz <m8@semihalf.com>
+ */
+
+/dts-v1/;
+
+/ {
+	model = "tqc,tqm5200";
+	compatible = "tqc,tqm5200";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&mpc5200_pic>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,5200@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <0x4000>;	// L1, 16K
+			i-cache-size = <0x4000>;	// L1, 16K
+			timebase-frequency = <0>;	// from bootloader
+			bus-frequency = <0>;		// from bootloader
+			clock-frequency = <0>;		// from bootloader
+		};
+	};
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x04000000>;	// 64MB
+	};
+
+	soc5200@f0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc5200-immr";
+		ranges = <0 0xf0000000 0x0000c000>;
+		reg = <0xf0000000 0x00000100>;
+		bus-frequency = <0>;		// from bootloader
+		system-frequency = <0>;		// from bootloader
+
+		cdm@200 {
+			compatible = "fsl,mpc5200-cdm";
+			reg = <0x200 0x38>;
+		};
+
+		mpc5200_pic: interrupt-controller@500 {
+			// 5200 interrupts are encoded into two levels;
+			interrupt-controller;
+			#interrupt-cells = <3>;
+			compatible = "fsl,mpc5200-pic";
+			reg = <0x500 0x80>;
+		};
+
+		timer@600 {	// General Purpose Timer
+			compatible = "fsl,mpc5200-gpt";
+			reg = <0x600 0x10>;
+			interrupts = <1 9 0>;
+			fsl,has-wdt;
+		};
+
+		can@900 {
+			compatible = "fsl,mpc5200-mscan";
+			interrupts = <2 17 0>;
+			reg = <0x900 0x80>;
+		};
+
+		can@980 {
+			compatible = "fsl,mpc5200-mscan";
+			interrupts = <2 18 0>;
+			reg = <0x980 0x80>;
+		};
+
+		gpio_simple: gpio@b00 {
+			compatible = "fsl,mpc5200-gpio";
+			reg = <0xb00 0x40>;
+			interrupts = <1 7 0>;
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
+
+		usb@1000 {
+			compatible = "fsl,mpc5200-ohci","ohci-be";
+			reg = <0x1000 0xff>;
+			interrupts = <2 6 0>;
+		};
+
+		dma-controller@1200 {
+			compatible = "fsl,mpc5200-bestcomm";
+			reg = <0x1200 0x80>;
+			interrupts = <3 0 0  3 1 0  3 2 0  3 3 0
+			              3 4 0  3 5 0  3 6 0  3 7 0
+			              3 8 0  3 9 0  3 10 0  3 11 0
+			              3 12 0  3 13 0  3 14 0  3 15 0>;
+		};
+
+		xlb@1f00 {
+			compatible = "fsl,mpc5200-xlb";
+			reg = <0x1f00 0x100>;
+		};
+
+		serial@2000 {		// PSC1
+			compatible = "fsl,mpc5200-psc-uart";
+			reg = <0x2000 0x100>;
+			interrupts = <2 1 0>;
+		};
+
+		serial@2200 {		// PSC2
+			compatible = "fsl,mpc5200-psc-uart";
+			reg = <0x2200 0x100>;
+			interrupts = <2 2 0>;
+		};
+
+		serial@2400 {		// PSC3
+			compatible = "fsl,mpc5200-psc-uart";
+			reg = <0x2400 0x100>;
+			interrupts = <2 3 0>;
+		};
+
+		ethernet@3000 {
+			compatible = "fsl,mpc5200-fec";
+			reg = <0x3000 0x400>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <2 5 0>;
+			phy-handle = <&phy0>;
+		};
+
+		mdio@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5200-mdio";
+			reg = <0x3000 0x400>;       // fec range, since we need to setup fec interrupts
+			interrupts = <2 5 0>;   // these are for "mii command finished", not link changes & co.
+
+			phy0: ethernet-phy@0 {
+				reg = <0>;
+			};
+		};
+
+		ata@3a00 {
+			compatible = "fsl,mpc5200-ata";
+			reg = <0x3a00 0x100>;
+			interrupts = <2 7 0>;
+		};
+
+		i2c@3d40 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc5200-i2c","fsl-i2c";
+			reg = <0x3d40 0x40>;
+			interrupts = <2 16 0>;
+
+			 rtc@68 {
+				compatible = "dallas,ds1307";
+				reg = <0x68>;
+			};
+		};
+
+		sram@8000 {
+			compatible = "fsl,mpc5200-sram";
+			reg = <0x8000 0x4000>;
+		};
+	};
+
+	localbus {
+		compatible = "fsl,mpc5200-lpb","simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0xfc000000 0x02000000>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x02000000>;
+			bank-width = <4>;
+			device-width = <2>;
+			#size-cells = <1>;
+			#address-cells = <1>;
+		};
+	};
+
+	pci@f0000d00 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		compatible = "fsl,mpc5200-pci";
+		reg = <0xf0000d00 0x100>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <0xc000 0 0 1 &mpc5200_pic 0 0 3
+				 0xc000 0 0 2 &mpc5200_pic 0 0 3
+				 0xc000 0 0 3 &mpc5200_pic 0 0 3
+				 0xc000 0 0 4 &mpc5200_pic 0 0 3>;
+		clock-frequency = <0>; // From boot loader
+		interrupts = <2 8 0 2 9 0 2 10 0>;
+		bus-range = <0 0>;
+		ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000>,
+			 <0x02000000 0 0x90000000 0x90000000 0 0x10000000>,
+			 <0x01000000 0 0x00000000 0xa0000000 0 0x01000000>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/tqm8540.dts b/arch/powerpc/boot/dts/tqm8540.dts
new file mode 100644
index 0000000000..eb4d8fd3f7
--- /dev/null
+++ b/arch/powerpc/boot/dts/tqm8540.dts
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * TQM 8540 Device Tree Source
+ *
+ * Copyright 2008 Freescale Semiconductor Inc.
+ */
+
+/dts-v1/;
+
+/include/ "fsl/e500v1_power_isa.dtsi"
+
+/ {
+	model = "tqc,tqm8540";
+	compatible = "tqc,tqm8540";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8540@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <32768>;
+			i-cache-size = <32768>;
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>;
+	};
+
+	soc@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		ranges = <0x0 0xe0000000 0x100000>;
+		bus-frequency = <0>;
+		compatible = "fsl,mpc8540-immr", "simple-bus";
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <8>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,mpc8540-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,mpc8540-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,mpc8540-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;
+			cache-size = <0x40000>;	// L2, 256K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+
+			dtt@48 {
+				compatible = "national,lm75";
+				reg = <0x48>;
+			};
+
+			rtc@68 {
+				compatible = "dallas,ds1337";
+				reg = <0x68>;
+			};
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8540-dma", "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8540-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8540-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8540-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8540-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+			phy-handle = <&phy2>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy1: ethernet-phy@1 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <1>;
+				};
+				phy2: ethernet-phy@2 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <2>;
+				};
+				phy3: ethernet-phy@3 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <3>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+			phy-handle = <&phy1>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet2: ethernet@26000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <2>;
+			device_type = "network";
+			model = "FEC";
+			compatible = "gianfar";
+			reg = <0x26000 0x1000>;
+			ranges = <0x0 0x26000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <41 2>;
+			interrupt-parent = <&mpic>;
+			phy-handle = <&phy3>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi2: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>; 	// reg base, size
+			clock-frequency = <0>; 	// should we fill in in uboot?
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;	// reg base, size
+			clock-frequency = <0>; 	// should we fill in in uboot?
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			device_type = "open-pic";
+			compatible = "chrp,open-pic";
+		};
+	};
+
+	localbus@e0005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8540-localbus", "fsl,pq3-localbus",
+			     "simple-bus";
+		reg = <0xe0005000 0x1000>;
+		interrupt-parent = <&mpic>;
+		interrupts = <19 2>;
+
+		ranges = <0x0 0x0 0xfe000000 0x02000000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x02000000>;
+			bank-width = <4>;
+			device-width = <2>;
+			partition@0 {
+				label = "kernel";
+				reg = <0x00000000 0x00180000>;
+			};
+			partition@180000 {
+				label = "root";
+				reg = <0x00180000 0x01dc0000>;
+			};
+			partition@1f40000 {
+				label = "env1";
+				reg = <0x01f40000 0x00040000>;
+			};
+			partition@1f80000 {
+				label = "env2";
+				reg = <0x01f80000 0x00040000>;
+			};
+			partition@1fc0000 {
+				label = "u-boot";
+				reg = <0x01fc0000 0x00040000>;
+				read-only;
+			};
+		};
+	};
+
+	pci0: pci@e0008000 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci";
+		device_type = "pci";
+		reg = <0xe0008000 0x1000>;
+		clock-frequency = <66666666>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+				/* IDSEL 28 */
+				 0xe000 0 0 1 &mpic 2 1
+				 0xe000 0 0 2 &mpic 3 1
+				 0xe000 0 0 3 &mpic 6 1
+				 0xe000 0 0 4 &mpic 5 1
+
+				/* IDSEL 11 */
+				 0x5800 0 0 1 &mpic 6 1
+				 0x5800 0 0 2 &mpic 5 1
+				 >;
+
+		interrupt-parent = <&mpic>;
+		interrupts = <24 2>;
+		bus-range = <0 0>;
+		ranges = <0x02000000 0 0x80000000 0x80000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xe2000000 0 0x01000000>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/tqm8541.dts b/arch/powerpc/boot/dts/tqm8541.dts
new file mode 100644
index 0000000000..fe5d3d873e
--- /dev/null
+++ b/arch/powerpc/boot/dts/tqm8541.dts
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * TQM 8541 Device Tree Source
+ *
+ * Copyright 2008 Freescale Semiconductor Inc.
+ */
+
+/dts-v1/;
+
+/include/ "fsl/e500v1_power_isa.dtsi"
+
+/ {
+	model = "tqc,tqm8541";
+	compatible = "tqc,tqm8541";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8541@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <32768>;
+			i-cache-size = <32768>;
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>;
+	};
+
+	soc@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		ranges = <0x0 0xe0000000 0x100000>;
+		bus-frequency = <0>;
+		compatible = "fsl,mpc8541-immr", "simple-bus";
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <8>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,mpc8541-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,mpc8540-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,mpc8540-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;
+			cache-size = <0x40000>;	// L2, 256K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+
+			dtt@48 {
+				compatible = "national,lm75";
+				reg = <0x48>;
+			};
+
+			rtc@68 {
+				compatible = "dallas,ds1337";
+				reg = <0x68>;
+			};
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8541-dma", "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8541-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8541-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8541-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8541-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy2>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy1: ethernet-phy@1 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <1>;
+				};
+				phy2: ethernet-phy@2 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <2>;
+				};
+				phy3: ethernet-phy@3 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <3>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
+			phy-handle = <&phy1>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>; 	// reg base, size
+			clock-frequency = <0>; 	// should we fill in in uboot?
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;	// reg base, size
+			clock-frequency = <0>; 	// should we fill in in uboot?
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <45 2>;
+			interrupt-parent = <&mpic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x7e>;
+			fsl,descriptor-types-mask = <0x01010ebf>;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			device_type = "open-pic";
+			compatible = "chrp,open-pic";
+		};
+
+		cpm@919c0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8541-cpm", "fsl,cpm2", "simple-bus";
+			reg = <0x919c0 0x30>;
+			ranges;
+
+			muram@80000 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges = <0 0x80000 0x10000>;
+
+				data@0 {
+					compatible = "fsl,cpm-muram-data";
+					reg = <0 0x2000 0x9000 0x1000>;
+				};
+			};
+
+			brg@919f0 {
+				compatible = "fsl,mpc8541-brg",
+				             "fsl,cpm2-brg",
+				             "fsl,cpm-brg";
+				reg = <0x919f0 0x10 0x915f0 0x10>;
+				clock-frequency = <0>;
+			};
+
+			cpmpic: pic@90c00 {
+				interrupt-controller;
+				#address-cells = <0>;
+				#interrupt-cells = <2>;
+				interrupts = <46 2>;
+				interrupt-parent = <&mpic>;
+				reg = <0x90c00 0x80>;
+				compatible = "fsl,mpc8541-cpm-pic", "fsl,cpm2-pic";
+			};
+		};
+	};
+
+	pci0: pci@e0008000 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci";
+		device_type = "pci";
+		reg = <0xe0008000 0x1000>;
+		clock-frequency = <66666666>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+				/* IDSEL 28 */
+				 0xe000 0 0 1 &mpic 2 1
+				 0xe000 0 0 2 &mpic 3 1
+				 0xe000 0 0 3 &mpic 6 1
+				 0xe000 0 0 4 &mpic 5 1
+
+				/* IDSEL 11 */
+				 0x5800 0 0 1 &mpic 6 1
+				 0x5800 0 0 2 &mpic 5 1
+				 >;
+
+		interrupt-parent = <&mpic>;
+		interrupts = <24 2>;
+		bus-range = <0 0>;
+		ranges = <0x02000000 0 0x80000000 0x80000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xe2000000 0 0x01000000>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/tqm8548-bigflash.dts b/arch/powerpc/boot/dts/tqm8548-bigflash.dts
new file mode 100644
index 0000000000..caa36c5ef1
--- /dev/null
+++ b/arch/powerpc/boot/dts/tqm8548-bigflash.dts
@@ -0,0 +1,495 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * TQM8548 Device Tree Source
+ *
+ * Copyright 2006 Freescale Semiconductor Inc.
+ * Copyright 2008 Wolfgang Grandegger <wg@denx.de>
+ */
+
+/dts-v1/;
+
+/ {
+	model = "tqc,tqm8548";
+	compatible = "tqc,tqm8548";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+		pci1 = &pci1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8548@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;	// 32 bytes
+			i-cache-line-size = <32>;	// 32 bytes
+			d-cache-size = <0x8000>;	// L1, 32K
+			i-cache-size = <0x8000>;	// L1, 32K
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000>;	// Filled in by U-Boot
+	};
+
+	soc@a0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		ranges = <0x0 0xa0000000 0x100000>;
+		bus-frequency = <0>;
+		compatible = "fsl,mpc8548-immr", "simple-bus";
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <10>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,mpc8548-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,mpc8548-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,mpc8548-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x80000>;	// L2, 512K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+
+			dtt@48 {
+				compatible = "national,lm75";
+				reg = <0x48>;
+			};
+
+			rtc@68 {
+				compatible = "dallas,ds1337";
+				reg = <0x68>;
+			};
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8548-dma", "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8548-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8548-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8548-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8548-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy2>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy1: ethernet-phy@0 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <1>;
+				};
+				phy2: ethernet-phy@1 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <2>;
+				};
+				phy3: ethernet-phy@3 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <3>;
+				};
+				phy4: ethernet-phy@4 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <4>;
+				};
+				phy5: ethernet-phy@5 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <5>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
+			phy-handle = <&phy1>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet2: ethernet@26000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <2>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x26000 0x1000>;
+			ranges = <0x0 0x26000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <31 2 32 2 33 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi2>;
+			phy-handle = <&phy4>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi2: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet3: ethernet@27000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <3>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x27000 0x1000>;
+			ranges = <0x0 0x27000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <37 2 38 2 39 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi3>;
+			phy-handle = <&phy5>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi3: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;	// reg base, size
+			clock-frequency = <0>;	// should we fill in in uboot?
+			current-speed = <115200>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;	// reg base, size
+			clock-frequency = <0>;	// should we fill in in uboot?
+			current-speed = <115200>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		global-utilities@e0000 {	// global utilities reg
+			compatible = "fsl,mpc8548-guts";
+			reg = <0xe0000 0x1000>;
+			fsl,has-rstcr;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+	};
+
+	localbus@a0005000 {
+		compatible = "fsl,mpc8548-localbus", "fsl,pq3-localbus",
+			     "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		reg = <0xa0005000 0x100>;	// BRx, ORx, etc.
+		interrupt-parent = <&mpic>;
+		interrupts = <19 2>;
+
+		ranges = <
+			0 0x0 0xfc000000 0x04000000	// NOR FLASH bank 1
+			1 0x0 0xf8000000 0x08000000	// NOR FLASH bank 0
+			2 0x0 0xa3000000 0x00008000	// CAN (2 x CC770)
+			3 0x0 0xa3010000 0x00008000	// NAND FLASH
+
+		>;
+
+		flash@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <1 0x0 0x8000000>;
+			bank-width = <4>;
+			device-width = <1>;
+
+			partition@0 {
+				label = "kernel";
+				reg = <0x00000000 0x00200000>;
+			};
+			partition@200000 {
+				label = "root";
+				reg = <0x00200000 0x00300000>;
+			};
+			partition@500000 {
+				label = "user";
+				reg = <0x00500000 0x07a00000>;
+			};
+			partition@7f00000 {
+				label = "env1";
+				reg = <0x07f00000 0x00040000>;
+			};
+			partition@7f40000 {
+				label = "env2";
+				reg = <0x07f40000 0x00040000>;
+			};
+			partition@7f80000 {
+				label = "u-boot";
+				reg = <0x07f80000 0x00080000>;
+				read-only;
+			};
+		};
+
+		/* Note: CAN support needs be enabled in U-Boot */
+		can@2,0 {
+			compatible = "bosch,cc770"; // Bosch CC770
+			reg = <2 0x0 0x100>;
+			interrupts = <4 1>;
+			interrupt-parent = <&mpic>;
+			bosch,external-clock-frequency = <16000000>;
+			bosch,disconnect-rx1-input;
+			bosch,disconnect-tx1-output;
+			bosch,iso-low-speed-mux;
+			bosch,clock-out-frequency = <16000000>;
+		};
+
+		can@2,100 {
+			compatible = "bosch,cc770"; // Bosch CC770
+			reg = <2 0x100 0x100>;
+			interrupts = <4 1>;
+			interrupt-parent = <&mpic>;
+			bosch,external-clock-frequency = <16000000>;
+			bosch,disconnect-rx1-input;
+			bosch,disconnect-tx1-output;
+			bosch,iso-low-speed-mux;
+		};
+
+		/* Note: NAND support needs to be enabled in U-Boot */
+		upm@3,0 {
+			#address-cells = <0>;
+			#size-cells = <0>;
+			compatible = "tqc,tqm8548-upm-nand", "fsl,upm-nand";
+			reg = <3 0x0 0x800>;
+			fsl,upm-addr-offset = <0x10>;
+			fsl,upm-cmd-offset = <0x08>;
+			/* Micron MT29F8G08FAB multi-chip device */
+			fsl,upm-addr-line-cs-offsets = <0x0 0x200>;
+			fsl,upm-wait-flags = <0x5>;
+			chip-delay = <25>; // in micro-seconds
+
+			nand@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+
+				partition@0 {
+					    label = "fs";
+					    reg = <0x00000000 0x10000000>;
+				};
+			};
+		};
+	};
+
+	pci0: pci@a0008000 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci";
+		device_type = "pci";
+		reg = <0xa0008000 0x1000>;
+		clock-frequency = <33333333>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+				/* IDSEL 28 */
+				 0xe000 0 0 1 &mpic 2 1
+				 0xe000 0 0 2 &mpic 3 1
+				 0xe000 0 0 3 &mpic 6 1
+				 0xe000 0 0 4 &mpic 5 1
+
+				/* IDSEL 11 */
+				 0x5800 0 0 1 &mpic 6 1
+				 0x5800 0 0 2 &mpic 5 1
+				 >;
+
+		interrupt-parent = <&mpic>;
+		interrupts = <24 2>;
+		bus-range = <0 0>;
+		ranges = <0x02000000 0 0x80000000 0x80000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xa2000000 0 0x01000000>;
+	};
+
+	pci1: pcie@a000a000 {
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 (PEX) */
+			0x00000 0 0 1 &mpic 0 1
+			0x00000 0 0 2 &mpic 1 1
+			0x00000 0 0 3 &mpic 2 1
+			0x00000 0 0 4 &mpic 3 1>;
+
+		interrupt-parent = <&mpic>;
+		interrupts = <26 2>;
+		bus-range = <0 0xff>;
+		ranges = <0x02000000 0 0xb0000000 0xb0000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xaf000000 0 0x08000000>;
+		clock-frequency = <33333333>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xa000a000 0x1000>;
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		pcie@0 {
+			reg = <0 0 0 0 0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x02000000 0 0xb0000000 0x02000000 0
+			          0xb0000000 0 0x10000000
+				  0x01000000 0 0x00000000 0x01000000 0
+				  0x00000000 0 0x08000000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/tqm8548.dts b/arch/powerpc/boot/dts/tqm8548.dts
new file mode 100644
index 0000000000..12a64410f3
--- /dev/null
+++ b/arch/powerpc/boot/dts/tqm8548.dts
@@ -0,0 +1,495 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * TQM8548 Device Tree Source
+ *
+ * Copyright 2006 Freescale Semiconductor Inc.
+ * Copyright 2008 Wolfgang Grandegger <wg@denx.de>
+ */
+
+/dts-v1/;
+
+/ {
+	model = "tqc,tqm8548";
+	compatible = "tqc,tqm8548";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+		pci1 = &pci1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8548@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;	// 32 bytes
+			i-cache-line-size = <32>;	// 32 bytes
+			d-cache-size = <0x8000>;	// L1, 32K
+			i-cache-size = <0x8000>;	// L1, 32K
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000>;	// Filled in by U-Boot
+	};
+
+	soc@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		ranges = <0x0 0xe0000000 0x100000>;
+		bus-frequency = <0>;
+		compatible = "fsl,mpc8548-immr", "simple-bus";
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <10>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,mpc8548-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,mpc8548-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,mpc8548-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x80000>;	// L2, 512K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+
+			dtt@48 {
+				compatible = "national,lm75";
+				reg = <0x48>;
+			};
+
+			rtc@68 {
+				compatible = "dallas,ds1337";
+				reg = <0x68>;
+			};
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8548-dma", "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8548-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8548-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8548-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8548-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy2>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy1: ethernet-phy@0 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <1>;
+				};
+				phy2: ethernet-phy@1 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <2>;
+				};
+				phy3: ethernet-phy@3 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <3>;
+				};
+				phy4: ethernet-phy@4 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <4>;
+				};
+				phy5: ethernet-phy@5 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <5>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
+			phy-handle = <&phy1>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet2: ethernet@26000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <2>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x26000 0x1000>;
+			ranges = <0x0 0x26000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <31 2 32 2 33 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi2>;
+			phy-handle = <&phy4>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi2: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet3: ethernet@27000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <3>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x27000 0x1000>;
+			ranges = <0x0 0x27000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <37 2 38 2 39 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi3>;
+			phy-handle = <&phy5>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi3: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;	// reg base, size
+			clock-frequency = <0>;	// should we fill in in uboot?
+			current-speed = <115200>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;	// reg base, size
+			clock-frequency = <0>;	// should we fill in in uboot?
+			current-speed = <115200>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		global-utilities@e0000 {	// global utilities reg
+			compatible = "fsl,mpc8548-guts";
+			reg = <0xe0000 0x1000>;
+			fsl,has-rstcr;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+	};
+
+	localbus@e0005000 {
+		compatible = "fsl,mpc8548-localbus", "fsl,pq3-localbus",
+			     "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		reg = <0xe0005000 0x100>;	// BRx, ORx, etc.
+		interrupt-parent = <&mpic>;
+		interrupts = <19 2>;
+
+		ranges = <
+			0 0x0 0xfc000000 0x04000000	// NOR FLASH bank 1
+			1 0x0 0xf8000000 0x08000000	// NOR FLASH bank 0
+			2 0x0 0xe3000000 0x00008000	// CAN (2 x CC770)
+			3 0x0 0xe3010000 0x00008000	// NAND FLASH
+
+		>;
+
+		flash@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <1 0x0 0x8000000>;
+			bank-width = <4>;
+			device-width = <1>;
+
+			partition@0 {
+				label = "kernel";
+				reg = <0x00000000 0x00200000>;
+			};
+			partition@200000 {
+				label = "root";
+				reg = <0x00200000 0x00300000>;
+			};
+			partition@500000 {
+				label = "user";
+				reg = <0x00500000 0x07a00000>;
+			};
+			partition@7f00000 {
+				label = "env1";
+				reg = <0x07f00000 0x00040000>;
+			};
+			partition@7f40000 {
+				label = "env2";
+				reg = <0x07f40000 0x00040000>;
+			};
+			partition@7f80000 {
+				label = "u-boot";
+				reg = <0x07f80000 0x00080000>;
+				read-only;
+			};
+		};
+
+		/* Note: CAN support needs be enabled in U-Boot */
+		can@2,0 {
+			compatible = "bosch,cc770"; // Bosch CC770
+			reg = <2 0x0 0x100>;
+			interrupts = <4 1>;
+			interrupt-parent = <&mpic>;
+			bosch,external-clock-frequency = <16000000>;
+			bosch,disconnect-rx1-input;
+			bosch,disconnect-tx1-output;
+			bosch,iso-low-speed-mux;
+			bosch,clock-out-frequency = <16000000>;
+		};
+
+		can@2,100 {
+			compatible = "bosch,cc770"; // Bosch CC770
+			reg = <2 0x100 0x100>;
+			interrupts = <4 1>;
+			interrupt-parent = <&mpic>;
+			bosch,external-clock-frequency = <16000000>;
+			bosch,disconnect-rx1-input;
+			bosch,disconnect-tx1-output;
+			bosch,iso-low-speed-mux;
+		};
+
+		/* Note: NAND support needs to be enabled in U-Boot */
+		upm@3,0 {
+			#address-cells = <0>;
+			#size-cells = <0>;
+			compatible = "tqc,tqm8548-upm-nand", "fsl,upm-nand";
+			reg = <3 0x0 0x800>;
+			fsl,upm-addr-offset = <0x10>;
+			fsl,upm-cmd-offset = <0x08>;
+			/* Micron MT29F8G08FAB multi-chip device */
+			fsl,upm-addr-line-cs-offsets = <0x0 0x200>;
+			fsl,upm-wait-flags = <0x5>;
+			chip-delay = <25>; // in micro-seconds
+
+			nand@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+
+				partition@0 {
+					    label = "fs";
+					    reg = <0x00000000 0x10000000>;
+				};
+			};
+		};
+	};
+
+	pci0: pci@e0008000 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci";
+		device_type = "pci";
+		reg = <0xe0008000 0x1000>;
+		clock-frequency = <33333333>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+				/* IDSEL 28 */
+				 0xe000 0 0 1 &mpic 2 1
+				 0xe000 0 0 2 &mpic 3 1
+				 0xe000 0 0 3 &mpic 6 1
+				 0xe000 0 0 4 &mpic 5 1
+
+				/* IDSEL 11 */
+				 0x5800 0 0 1 &mpic 6 1
+				 0x5800 0 0 2 &mpic 5 1
+				 >;
+
+		interrupt-parent = <&mpic>;
+		interrupts = <24 2>;
+		bus-range = <0 0>;
+		ranges = <0x02000000 0 0x80000000 0x80000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xe2000000 0 0x01000000>;
+	};
+
+	pci1: pcie@e000a000 {
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 (PEX) */
+			0x00000 0 0 1 &mpic 0 1
+			0x00000 0 0 2 &mpic 1 1
+			0x00000 0 0 3 &mpic 2 1
+			0x00000 0 0 4 &mpic 3 1>;
+
+		interrupt-parent = <&mpic>;
+		interrupts = <26 2>;
+		bus-range = <0 0xff>;
+		ranges = <0x02000000 0 0xc0000000 0xc0000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xef000000 0 0x08000000>;
+		clock-frequency = <33333333>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xe000a000 0x1000>;
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		pcie@0 {
+			reg = <0 0 0 0 0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x02000000 0 0xc0000000 0x02000000 0
+			          0xc0000000 0 0x20000000
+				  0x01000000 0 0x00000000 0x01000000 0
+				  0x00000000 0 0x08000000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/tqm8555.dts b/arch/powerpc/boot/dts/tqm8555.dts
new file mode 100644
index 0000000000..4be05b7d22
--- /dev/null
+++ b/arch/powerpc/boot/dts/tqm8555.dts
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * TQM 8555 Device Tree Source
+ *
+ * Copyright 2008 Freescale Semiconductor Inc.
+ */
+
+/dts-v1/;
+
+/include/ "fsl/e500v1_power_isa.dtsi"
+
+/ {
+	model = "tqc,tqm8555";
+	compatible = "tqc,tqm8555";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8555@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <32768>;
+			i-cache-size = <32768>;
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>;
+	};
+
+	soc@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		ranges = <0x0 0xe0000000 0x100000>;
+		bus-frequency = <0>;
+		compatible = "fsl,mpc8555-immr", "simple-bus";
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <8>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,mpc8555-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,mpc8540-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,mpc8540-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;
+			cache-size = <0x40000>;	// L2, 256K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+
+			dtt@48 {
+				compatible = "national,lm75";
+				reg = <0x48>;
+			};
+
+			rtc@68 {
+				compatible = "dallas,ds1337";
+				reg = <0x68>;
+			};
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8555-dma", "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8555-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8555-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8555-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8555-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy2>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy1: ethernet-phy@1 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <1>;
+				};
+				phy2: ethernet-phy@2 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <2>;
+				};
+				phy3: ethernet-phy@3 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <3>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
+			phy-handle = <&phy1>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>; 	// reg base, size
+			clock-frequency = <0>; 	// should we fill in in uboot?
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;	// reg base, size
+			clock-frequency = <0>; 	// should we fill in in uboot?
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <45 2>;
+			interrupt-parent = <&mpic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x7e>;
+			fsl,descriptor-types-mask = <0x01010ebf>;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			device_type = "open-pic";
+			compatible = "chrp,open-pic";
+		};
+
+		cpm@919c0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8555-cpm", "fsl,cpm2", "simple-bus";
+			reg = <0x919c0 0x30>;
+			ranges;
+
+			muram@80000 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges = <0 0x80000 0x10000>;
+
+				data@0 {
+					compatible = "fsl,cpm-muram-data";
+					reg = <0 0x2000 0x9000 0x1000>;
+				};
+			};
+
+			brg@919f0 {
+				compatible = "fsl,mpc8555-brg",
+				             "fsl,cpm2-brg",
+				             "fsl,cpm-brg";
+				reg = <0x919f0 0x10 0x915f0 0x10>;
+				clock-frequency = <0>;
+			};
+
+			cpmpic: pic@90c00 {
+				interrupt-controller;
+				#address-cells = <0>;
+				#interrupt-cells = <2>;
+				interrupts = <46 2>;
+				interrupt-parent = <&mpic>;
+				reg = <0x90c00 0x80>;
+				compatible = "fsl,mpc8555-cpm-pic", "fsl,cpm2-pic";
+			};
+		};
+	};
+
+	pci0: pci@e0008000 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci";
+		device_type = "pci";
+		reg = <0xe0008000 0x1000>;
+		clock-frequency = <66666666>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+				/* IDSEL 28 */
+				 0xe000 0 0 1 &mpic 2 1
+				 0xe000 0 0 2 &mpic 3 1
+				 0xe000 0 0 3 &mpic 6 1
+				 0xe000 0 0 4 &mpic 5 1
+
+				/* IDSEL 11 */
+				 0x5800 0 0 1 &mpic 6 1
+				 0x5800 0 0 2 &mpic 5 1
+				 >;
+
+		interrupt-parent = <&mpic>;
+		interrupts = <24 2>;
+		bus-range = <0 0>;
+		ranges = <0x02000000 0 0x80000000 0x80000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xe2000000 0 0x01000000>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/tqm8560.dts b/arch/powerpc/boot/dts/tqm8560.dts
new file mode 100644
index 0000000000..8ea4850242
--- /dev/null
+++ b/arch/powerpc/boot/dts/tqm8560.dts
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * TQM 8560 Device Tree Source
+ *
+ * Copyright 2008 Freescale Semiconductor Inc.
+ * Copyright 2008 Wolfgang Grandegger <wg@grandegger.com>
+ */
+
+/dts-v1/;
+
+/include/ "fsl/e500v1_power_isa.dtsi"
+
+/ {
+	model = "tqc,tqm8560";
+	compatible = "tqc,tqm8560";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8560@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <32768>;
+			i-cache-size = <32768>;
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x10000000>;
+	};
+
+	soc@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		ranges = <0x0 0xe0000000 0x100000>;
+		bus-frequency = <0>;
+		compatible = "fsl,mpc8560-immr", "simple-bus";
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <8>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,mpc8560-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,mpc8540-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,mpc8540-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;
+			cache-size = <0x40000>;	// L2, 256K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+
+			dtt@48 {
+				compatible = "national,lm75";
+				reg = <0x48>;
+			};
+
+			rtc@68 {
+				compatible = "dallas,ds1337";
+				reg = <0x68>;
+			};
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8560-dma", "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8560-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8560-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8560-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8560-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy2>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy1: ethernet-phy@1 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <1>;
+				};
+				phy2: ethernet-phy@2 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <2>;
+				};
+				phy3: ethernet-phy@3 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <3>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "TSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
+			phy-handle = <&phy1>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			device_type = "open-pic";
+			compatible = "chrp,open-pic";
+		};
+
+		cpm@919c0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8560-cpm", "fsl,cpm2", "simple-bus";
+			reg = <0x919c0 0x30>;
+			ranges;
+
+			muram@80000 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges = <0 0x80000 0x10000>;
+
+				data@0 {
+					compatible = "fsl,cpm-muram-data";
+					reg = <0 0x4000 0x9000 0x2000>;
+				};
+			};
+
+			brg@919f0 {
+				compatible = "fsl,mpc8560-brg",
+				             "fsl,cpm2-brg",
+				             "fsl,cpm-brg";
+				reg = <0x919f0 0x10 0x915f0 0x10>;
+				clock-frequency = <0>;
+			};
+
+			cpmpic: pic@90c00 {
+				interrupt-controller;
+				#address-cells = <0>;
+				#interrupt-cells = <2>;
+				interrupts = <46 2>;
+				interrupt-parent = <&mpic>;
+				reg = <0x90c00 0x80>;
+				compatible = "fsl,mpc8560-cpm-pic", "fsl,cpm2-pic";
+			};
+
+			serial0: serial@91a00 {
+				device_type = "serial";
+				compatible = "fsl,mpc8560-scc-uart",
+				             "fsl,cpm2-scc-uart";
+				reg = <0x91a00 0x20 0x88000 0x100>;
+				fsl,cpm-brg = <1>;
+				fsl,cpm-command = <0x800000>;
+				current-speed = <115200>;
+				interrupts = <40 8>;
+				interrupt-parent = <&cpmpic>;
+			};
+
+			serial1: serial@91a20 {
+				device_type = "serial";
+				compatible = "fsl,mpc8560-scc-uart",
+				             "fsl,cpm2-scc-uart";
+				reg = <0x91a20 0x20 0x88100 0x100>;
+				fsl,cpm-brg = <2>;
+				fsl,cpm-command = <0x4a00000>;
+				current-speed = <115200>;
+				interrupts = <41 8>;
+				interrupt-parent = <&cpmpic>;
+			};
+
+			enet2: ethernet@91340 {
+				device_type = "network";
+				compatible = "fsl,mpc8560-fcc-enet",
+				             "fsl,cpm2-fcc-enet";
+				reg = <0x91340 0x20 0x88600 0x100 0x913d0 0x1>;
+				local-mac-address = [ 00 00 00 00 00 00 ];
+				fsl,cpm-command = <0x1a400300>;
+				interrupts = <34 8>;
+				interrupt-parent = <&cpmpic>;
+				phy-handle = <&phy3>;
+			};
+		};
+	};
+
+	localbus@e0005000 {
+		compatible = "fsl,mpc8560-localbus", "fsl,pq3-localbus",
+			     "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		reg = <0xe0005000 0x100>;	// BRx, ORx, etc.
+		interrupt-parent = <&mpic>;
+		interrupts = <19 2>;
+
+		ranges = <
+			0 0x0 0xfc000000 0x04000000	// NOR FLASH bank 1
+			1 0x0 0xf8000000 0x08000000	// NOR FLASH bank 0
+			2 0x0 0xe3000000 0x00008000	// CAN (2 x i82527)
+		>;
+
+		flash@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <1 0x0 0x8000000>;
+			bank-width = <4>;
+			device-width = <1>;
+
+			partition@0 {
+				label = "kernel";
+				reg = <0x00000000 0x00200000>;
+			};
+			partition@200000 {
+				label = "root";
+				reg = <0x00200000 0x00300000>;
+			};
+			partition@500000 {
+				label = "user";
+				reg = <0x00500000 0x07a00000>;
+			};
+			partition@7f00000 {
+				label = "env1";
+				reg = <0x07f00000 0x00040000>;
+			};
+			partition@7f40000 {
+				label = "env2";
+				reg = <0x07f40000 0x00040000>;
+			};
+			partition@7f80000 {
+				label = "u-boot";
+				reg = <0x07f80000 0x00080000>;
+				read-only;
+			};
+		};
+
+		/* Note: CAN support needs be enabled in U-Boot */
+		can0@2,0 {
+			compatible = "intel,82527"; // Bosch CC770
+			reg = <2 0x0 0x100>;
+			interrupts = <4 1>;
+			interrupt-parent = <&mpic>;
+		};
+
+		can1@2,100 {
+			compatible = "intel,82527"; // Bosch CC770
+			reg = <2 0x100 0x100>;
+			interrupts = <4 1>;
+			interrupt-parent = <&mpic>;
+		};
+	};
+
+	pci0: pci@e0008000 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci";
+		device_type = "pci";
+		reg = <0xe0008000 0x1000>;
+		clock-frequency = <66666666>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+				/* IDSEL 28 */
+				 0xe000 0 0 1 &mpic 2 1
+				 0xe000 0 0 2 &mpic 3 1
+				 0xe000 0 0 3 &mpic 6 1
+				 0xe000 0 0 4 &mpic 5 1
+
+				/* IDSEL 11 */
+				 0x5800 0 0 1 &mpic 6 1
+				 0x5800 0 0 2 &mpic 5 1
+				 >;
+
+		interrupt-parent = <&mpic>;
+		interrupts = <24 2>;
+		bus-range = <0 0>;
+		ranges = <0x02000000 0 0x80000000 0x80000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xe2000000 0 0x01000000>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/tqm8xx.dts b/arch/powerpc/boot/dts/tqm8xx.dts
new file mode 100644
index 0000000000..d16cdfd812
--- /dev/null
+++ b/arch/powerpc/boot/dts/tqm8xx.dts
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * TQM8XX Device Tree Source
+ *
+ * Heiko Schocher <hs@denx.de>
+ * 2010 DENX Software Engineering GmbH
+ */
+
+/dts-v1/;
+
+/ {
+	model = "TQM8xx";
+	compatible = "tqc,tqm8xx";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &eth0;
+		ethernet1 = &eth1;
+		mdio1 = &phy1;
+		serial0 = &smc1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,860@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <16>;	// 16 bytes
+			i-cache-line-size = <16>;	// 16 bytes
+			d-cache-size = <0x1000>;		// L1, 4K
+			i-cache-size = <0x1000>;		// L1, 4K
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			interrupts = <15 2>;	// decrementer interrupt
+			interrupt-parent = <&PIC>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x2000000>;
+	};
+
+	localbus@fff00100 {
+		compatible = "fsl,mpc860-localbus", "fsl,pq1-localbus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		reg = <0xfff00100 0x40>;
+
+		ranges = <
+			0x0 0x0 0x40000000 0x800000
+			0x3 0x0 0xc0000000 0x200
+		>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x800000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			bank-width = <4>;
+			device-width = <2>;
+		};
+
+		/* Note: CAN support needs be enabled in U-Boot */
+		can@3,0 {
+			compatible = "intc,82527";
+			reg = <3 0x0 0x80>;
+			interrupts = <8 1>;
+			interrupt-parent = <&PIC>;
+			bosch,external-clock-frequency = <16000000>;
+			bosch,disconnect-rx1-input;
+			bosch,disconnect-tx1-output;
+			bosch,iso-low-speed-mux;
+			bosch,clock-out-frequency = <16000000>;
+		};
+
+		can@3,100 {
+			compatible = "intc,82527";
+			reg = <3 0x100 0x80>;
+			interrupts = <8 1>;
+			interrupt-parent = <&PIC>;
+			bosch,external-clock-frequency = <16000000>;
+			bosch,disconnect-rx1-input;
+			bosch,disconnect-tx1-output;
+			bosch,iso-low-speed-mux;
+		};
+	};
+
+	soc@fff00000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		ranges = <0x0 0xfff00000 0x00004000>;
+
+		phy1: mdio@e00 {
+			compatible = "fsl,mpc866-fec-mdio", "fsl,pq1-fec-mdio";
+			reg = <0xe00 0x188>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			PHY: ethernet-phy@f {
+				reg = <0xf>;
+			};
+		};
+
+		eth1: ethernet@e00 {
+			device_type = "network";
+			compatible = "fsl,mpc866-fec-enet",
+			             "fsl,pq1-fec-enet";
+			reg = <0xe00 0x188>;
+			interrupts = <3 1>;
+			interrupt-parent = <&PIC>;
+			phy-handle = <&PHY>;
+			linux,network-index = <1>;
+		};
+
+		PIC: pic@0 {
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			reg = <0x0 0x24>;
+			compatible = "fsl,mpc860-pic", "fsl,pq1-pic";
+		};
+
+		cpm@9c0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc860-cpm", "fsl,cpm1";
+			ranges;
+			reg = <0x9c0 0x40>;
+			brg-frequency = <0>;
+			interrupts = <0 2>;	// cpm error interrupt
+			interrupt-parent = <&CPM_PIC>;
+
+			muram@2000 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				ranges = <0x0 0x2000 0x2000>;
+
+				data@0 {
+					compatible = "fsl,cpm-muram-data";
+					reg = <0x0 0x2000>;
+				};
+			};
+
+			brg@9f0 {
+				compatible = "fsl,mpc860-brg",
+					     "fsl,cpm1-brg",
+					     "fsl,cpm-brg";
+				reg = <0x9f0 0x10>;
+				clock-frequency = <0>;
+			};
+
+			CPM_PIC: pic@930 {
+				interrupt-controller;
+				#address-cells = <0>;
+				#interrupt-cells = <1>;
+				interrupts = <5 2 0 2>;
+				interrupt-parent = <&PIC>;
+				reg = <0x930 0x20>;
+				compatible = "fsl,mpc860-cpm-pic",
+				             "fsl,cpm1-pic";
+			};
+
+
+			smc1: serial@a80 {
+				device_type = "serial";
+				compatible = "fsl,mpc860-smc-uart",
+				             "fsl,cpm1-smc-uart";
+				reg = <0xa80 0x10 0x3e80 0x40>;
+				interrupts = <4>;
+				interrupt-parent = <&CPM_PIC>;
+				fsl,cpm-brg = <1>;
+				fsl,cpm-command = <0x90>;
+			};
+
+			eth0: ethernet@a00 {
+				device_type = "network";
+				compatible = "fsl,mpc860-scc-enet",
+				             "fsl,cpm1-scc-enet";
+				reg = <0xa00 0x18 0x3c00 0x100>;
+				interrupts = <30>;
+				interrupt-parent = <&CPM_PIC>;
+				fsl,cpm-command = <0000>;
+				linux,network-index = <0>;
+				fixed-link = <0 0 10 0 0>;
+			};
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/turris1x.dts b/arch/powerpc/boot/dts/turris1x.dts
new file mode 100644
index 0000000000..dff1ea074d
--- /dev/null
+++ b/arch/powerpc/boot/dts/turris1x.dts
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Turris 1.x Device Tree Source
+ *
+ * Copyright 2013 - 2022 CZ.NIC z.s.p.o. (http://www.nic.cz/)
+ *
+ * Pinout, Schematics and Altium hardware design files are open source
+ * and available at: https://docs.turris.cz/hw/turris-1x/turris-1x/
+ */
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/leds/common.h>
+/include/ "fsl/p2020si-pre.dtsi"
+
+/ {
+	model = "Turris 1.x";
+	compatible = "cznic,turris1x";
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+		spi0 = &spi0;
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	soc: soc@ffe00000 {
+		ranges = <0x0 0x0 0xffe00000 0x00100000>;
+
+		i2c@3000 {
+			/* PCA9557PW GPIO controller for boot config */
+			gpio-controller@18 {
+				compatible = "nxp,pca9557";
+				label = "bootcfg";
+				reg = <0x18>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+
+			/* STM32F030R8T6 MCU for power control */
+			power-control@2a {
+				/*
+				 * Turris Power Control firmware runs on STM32F0 MCU.
+				 * This firmware is open source and available at:
+				 * https://gitlab.nic.cz/turris/hw/turris_power_control
+				 */
+				reg = <0x2a>;
+			};
+
+			/* DDR3 SPD/EEPROM PSWP instruction */
+			eeprom@32 {
+				reg = <0x32>;
+			};
+
+			/* SA56004ED temperature control */
+			temperature-sensor@4c {
+				compatible = "nxp,sa56004";
+				reg = <0x4c>;
+				interrupt-parent = <&gpio>;
+				interrupts = <12 IRQ_TYPE_LEVEL_LOW>, /* GPIO12 - ALERT pin */
+					     <13 IRQ_TYPE_LEVEL_LOW>; /* GPIO13 - CRIT pin */
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				/* Local temperature sensor (SA56004ED internal) */
+				channel@0 {
+					reg = <0>;
+					label = "board";
+				};
+
+				/* Remote temperature sensor (D+/D- connected to P2020 CPU Temperature Diode) */
+				channel@1 {
+					reg = <1>;
+					label = "cpu";
+				};
+			};
+
+			/* DDR3 SPD/EEPROM */
+			eeprom@52 {
+				compatible = "atmel,spd";
+				reg = <0x52>;
+			};
+
+			/* MCP79402-I/ST Protected EEPROM */
+			eeprom@57 {
+				reg = <0x57>;
+			};
+
+			/* ATSHA204-TH-DA-T crypto module */
+			crypto@64 {
+				compatible = "atmel,atsha204";
+				reg = <0x64>;
+			};
+
+			/* IDT6V49205BNLGI clock generator */
+			clock-generator@69 {
+				compatible = "idt,6v49205b";
+				reg = <0x69>;
+			};
+
+			/* MCP79402-I/ST RTC */
+			rtc@6f {
+				compatible = "microchip,mcp7940x";
+				reg = <0x6f>;
+				interrupt-parent = <&gpio>;
+				interrupts = <14 0>; /* GPIO14 - MFP pin */
+			};
+		};
+
+		/* SPI on connector P1 */
+		spi0: spi@7000 {
+		};
+
+		gpio: gpio-controller@fc00 {
+			#interrupt-cells = <2>;
+			interrupt-controller;
+		};
+
+		/* Connected to SMSC USB2412-DZK 2-Port USB 2.0 Hub Controller */
+		usb@22000 {
+			phy_type = "ulpi";
+			dr_mode = "host";
+		};
+
+		enet0: ethernet@24000 {
+			/* Connected to port 6 of QCA8337N-AL3C switch */
+			phy-connection-type = "rgmii-id";
+
+			fixed-link {
+				speed = <1000>;
+				full-duplex;
+			};
+		};
+
+		mdio@24520 {
+			/* KSZ9031RNXCA ethernet phy for WAN port */
+			phy: ethernet-phy@7 {
+				interrupts = <3 1 0 0>;
+				reg = <0x7>;
+			};
+
+			/* QCA8337N-AL3C switch with integrated ethernet PHYs for LAN ports */
+			switch@10 {
+				compatible = "qca,qca8337";
+				interrupts = <2 1 0 0>;
+				reg = <0x10>;
+
+				ports {
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					port@0 {
+						reg = <0>;
+						label = "cpu";
+						ethernet = <&enet1>;
+						phy-mode = "rgmii-id";
+
+						fixed-link {
+							speed = <1000>;
+							full-duplex;
+						};
+					};
+
+					port@1 {
+						reg = <1>;
+						label = "lan5";
+					};
+
+					port@2 {
+						reg = <2>;
+						label = "lan4";
+					};
+
+					port@3 {
+						reg = <3>;
+						label = "lan3";
+					};
+
+					port@4 {
+						reg = <4>;
+						label = "lan2";
+					};
+
+					port@5 {
+						reg = <5>;
+						label = "lan1";
+					};
+
+					port@6 {
+						reg = <6>;
+						label = "cpu";
+						ethernet = <&enet0>;
+						phy-mode = "rgmii-id";
+
+						fixed-link {
+							speed = <1000>;
+							full-duplex;
+						};
+					};
+				};
+			};
+		};
+
+		ptp_clock@24e00 {
+			fsl,tclk-period = <5>;
+			fsl,tmr-prsc = <200>;
+			fsl,tmr-add = <0xcccccccd>;
+			fsl,tmr-fiper1 = <0x3b9ac9fb>;
+			fsl,tmr-fiper2 = <0x0001869b>;
+			fsl,max-adj = <249999999>;
+		};
+
+		enet1: ethernet@25000 {
+			/* Connected to port 0 of QCA8337N-AL3C switch */
+			phy-connection-type = "rgmii-id";
+
+			fixed-link {
+				speed = <1000>;
+				full-duplex;
+			};
+		};
+
+		mdio@25520 {
+			status = "disabled";
+		};
+
+		enet2: ethernet@26000 {
+			/* Connected to KSZ9031RNXCA ethernet phy (WAN port) */
+			label = "wan";
+			phy-handle = <&phy>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		mdio@26520 {
+			status = "disabled";
+		};
+
+		sdhc@2e000 {
+			bus-width = <4>;
+			cd-gpios = <&gpio 8 GPIO_ACTIVE_LOW>;
+		};
+	};
+
+	lbc: localbus@ffe05000 {
+		reg = <0 0xffe05000 0 0x1000>;
+
+		ranges = <0x0 0x0 0x0 0xef000000 0x01000000>, /* NOR */
+			 <0x1 0x0 0x0 0xff800000 0x00040000>, /* NAND */
+			 <0x3 0x0 0x0 0xffa00000 0x00020000>; /* CPLD */
+
+		/* S29GL128P90TFIR10 NOR */
+		nor@0,0 {
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x01000000>;
+			bank-width = <2>;
+			device-width = <1>;
+
+			partitions {
+				compatible = "fixed-partitions";
+				#address-cells = <1>;
+				#size-cells = <1>;
+
+				partition@0 {
+					/* 128 kB for Device Tree Blob */
+					reg = <0x00000000 0x00020000>;
+					label = "dtb";
+				};
+
+				partition@20000 {
+					/* 1.7 MB for Linux Kernel Image */
+					reg = <0x00020000 0x001a0000>;
+					label = "kernel";
+				};
+
+				partition@1c0000 {
+					/* 1.5 MB for Rescue JFFS2 Root File System */
+					reg = <0x001c0000 0x00180000>;
+					label = "rescue";
+				};
+
+				partition@340000 {
+					/* 11 MB for TAR.XZ Archive with Factory content of NAND Root File System */
+					reg = <0x00340000 0x00b00000>;
+					label = "factory";
+				};
+
+				partition@e40000 {
+					/* 768 kB for Certificates JFFS2 File System */
+					reg = <0x00e40000 0x000c0000>;
+					label = "certificates";
+				};
+
+				/* free unused space 0x00f00000-0x00f20000 */
+
+				partition@f20000 {
+					/* 128 kB for U-Boot Environment Variables */
+					reg = <0x00f20000 0x00020000>;
+					label = "u-boot-env";
+				};
+
+				partition@f40000 {
+					/* 768 kB for U-Boot Bootloader Image */
+					reg = <0x00f40000 0x000c0000>;
+					label = "u-boot";
+				};
+			};
+		};
+
+		/* MT29F2G08ABAEAWP:E NAND */
+		nand@1,0 {
+			compatible = "fsl,p2020-fcm-nand", "fsl,elbc-fcm-nand";
+			reg = <0x1 0x0 0x00040000>;
+			nand-ecc-mode = "soft";
+			nand-ecc-algo = "bch";
+
+			partitions {
+				compatible = "fixed-partitions";
+				#address-cells = <1>;
+				#size-cells = <1>;
+
+				partition@0 {
+					/* 256 MB for UBI with one volume: UBIFS Root File System */
+					reg = <0x00000000 0x10000000>;
+					label = "rootfs";
+				};
+			};
+		};
+
+		/* LCMXO1200C-3FTN256C FPGA */
+		cpld@3,0 {
+			/*
+			 * Turris CPLD firmware which runs on this Lattice FPGA,
+			 * is extended version of P1021RDB-PC CPLD v4.1 firmware.
+			 * It is backward compatible with its original version
+			 * and the only extension is support for Turris LEDs.
+			 * Turris CPLD firmware is open source and available at:
+			 * https://gitlab.nic.cz/turris/hw/turris_cpld/-/blob/master/CZ_NIC_Router_CPLD.v
+			 */
+			compatible = "cznic,turris1x-cpld", "fsl,p1021rdb-pc-cpld", "simple-bus", "syscon";
+			reg = <0x3 0x0 0x30>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0x0 0x3 0x0 0x00020000>;
+
+			/* MAX6370KA+T watchdog */
+			watchdog@2 {
+				/*
+				 * CPLD firmware maps SET0, SET1 and SET2
+				 * input logic of MAX6370KA+T chip to CPLD
+				 * memory space at byte offset 0x2. WDI
+				 * input logic is outside of the CPLD and
+				 * connected via external GPIO.
+				 */
+				compatible = "maxim,max6370";
+				reg = <0x02 0x01>;
+				gpios = <&gpio 11 GPIO_ACTIVE_LOW>;
+			};
+
+			reboot@d {
+				/*
+				 * CPLD firmware which manages system reset and
+				 * watchdog registers has bugs. It does not
+				 * autoclear system reset register after change
+				 * and watchdog ignores reset line on immediate
+				 * succeeding reset cycle triggered by watchdog.
+				 * These bugs have to be workarounded in U-Boot
+				 * bootloader. So use system reset via syscon as
+				 * a last resort because older U-Boot versions
+				 * do not have workaround for watchdog.
+				 *
+				 * Reset method via rstcr's global-utilities
+				 * (the preferred one) has priority level 128,
+				 * watchdog has priority level 0 and default
+				 * syscon-reboot priority level is 192.
+				 *
+				 * So define syscon-reboot with custom priority
+				 * level 64 (between rstcr and watchdog) because
+				 * rstcr should stay as default preferred reset
+				 * method and reset via watchdog is more broken
+				 * than system reset via syscon.
+				 */
+				compatible = "syscon-reboot";
+				reg = <0x0d 0x01>;
+				offset = <0x0d>;
+				mask = <0x01>;
+				value = <0x01>;
+				priority = <64>;
+			};
+
+			led-controller@13 {
+				/*
+				 * LEDs are controlled by CPLD firmware.
+				 * All five LAN LEDs share common RGB settings
+				 * and so it is not possible to set different
+				 * colors on different LAN ports.
+				 */
+				compatible = "cznic,turris1x-leds";
+				reg = <0x13 0x1d>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				multi-led@0 {
+					reg = <0x0>;
+					color = <LED_COLOR_ID_RGB>;
+					function = LED_FUNCTION_WAN;
+				};
+
+				multi-led@1 {
+					reg = <0x1>;
+					color = <LED_COLOR_ID_RGB>;
+					function = LED_FUNCTION_LAN;
+					function-enumerator = <5>;
+				};
+
+				multi-led@2 {
+					reg = <0x2>;
+					color = <LED_COLOR_ID_RGB>;
+					function = LED_FUNCTION_LAN;
+					function-enumerator = <4>;
+				};
+
+				multi-led@3 {
+					reg = <0x3>;
+					color = <LED_COLOR_ID_RGB>;
+					function = LED_FUNCTION_LAN;
+					function-enumerator = <3>;
+				};
+
+				multi-led@4 {
+					reg = <0x4>;
+					color = <LED_COLOR_ID_RGB>;
+					function = LED_FUNCTION_LAN;
+					function-enumerator = <2>;
+				};
+
+				multi-led@5 {
+					reg = <0x5>;
+					color = <LED_COLOR_ID_RGB>;
+					function = LED_FUNCTION_LAN;
+					function-enumerator = <1>;
+				};
+
+				multi-led@6 {
+					reg = <0x6>;
+					color = <LED_COLOR_ID_RGB>;
+					function = LED_FUNCTION_WLAN;
+				};
+
+				multi-led@7 {
+					reg = <0x7>;
+					color = <LED_COLOR_ID_RGB>;
+					function = LED_FUNCTION_POWER;
+				};
+			};
+		};
+	};
+
+	pci2: pcie@ffe08000 {
+		/*
+		 * PCIe bus for on-board TUSB7340RKM USB 3.0 xHCI controller.
+		 * This xHCI controller is available only on Turris 1.1 boards.
+		 * Turris 1.0 boards have nothing connected to this PCIe bus,
+		 * so system would see only PCIe Root Port of this PCIe Root
+		 * Complex. TUSB7340RKM xHCI controller has four SuperSpeed
+		 * channels. Channel 0 is connected to the front USB 3.0 port,
+		 * channel 1 (but only USB 2.0 subset) to USB 2.0 pins on mPCIe
+		 * slot 1 (CN5), channels 2 and 3 to connector P600.
+		 *
+		 * P2020 PCIe Root Port does not use PCIe MEM and xHCI controller
+		 * uses 64kB + 8kB of PCIe MEM. No PCIe IO is used or required.
+		 * So allocate 128kB of PCIe MEM for this PCIe bus.
+		 */
+		reg = <0 0xffe08000 0 0x1000>;
+		ranges = <0x02000000 0x0 0xc0000000 0 0xc0000000 0x0 0x00020000>, /* MEM */
+			 <0x01000000 0x0 0x00000000 0 0xffc20000 0x0 0x00010000>; /* IO */
+
+		pcie@0 {
+			ranges;
+		};
+	};
+
+	pci1: pcie@ffe09000 {
+		/* PCIe bus on mPCIe slot 2 (CN6) for expansion mPCIe card */
+		reg = <0 0xffe09000 0 0x1000>;
+		ranges = <0x02000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000>, /* MEM */
+			 <0x01000000 0x0 0x00000000 0 0xffc10000 0x0 0x00010000>; /* IO */
+
+		pcie@0 {
+			ranges;
+		};
+	};
+
+	pci0: pcie@ffe0a000 {
+		/*
+		 * PCIe bus on mPCIe slot 1 (CN5) for expansion mPCIe card.
+		 * Turris 1.1 boards have in this mPCIe slot additional USB 2.0
+		 * pins via channel 1 of TUSB7340RKM xHCI controller and also
+		 * additional SIM card slot, both for USB-based WWAN cards.
+		 */
+		reg = <0 0xffe0a000 0 0x1000>;
+		ranges = <0x02000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000>, /* MEM */
+			 <0x01000000 0x0 0x00000000 0 0xffc00000 0x0 0x00010000>; /* IO */
+
+		pcie@0 {
+			ranges;
+		};
+	};
+};
+
+/include/ "fsl/p2020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/uc101.dts b/arch/powerpc/boot/dts/uc101.dts
new file mode 100644
index 0000000000..2e34d01917
--- /dev/null
+++ b/arch/powerpc/boot/dts/uc101.dts
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Manroland uc101 board Device Tree Source
+ *
+ * Copyright (C) 2009 DENX Software Engineering GmbH
+ * Heiko Schocher <hs@denx.de>
+ * Copyright 2006-2007 Secret Lab Technologies Ltd.
+ */
+
+/include/ "mpc5200b.dtsi"
+
+&gpt0 { gpio-controller; };
+&gpt1 { gpio-controller; };
+&gpt2 { gpio-controller; };
+&gpt3 { gpio-controller; };
+&gpt4 { gpio-controller; };
+&gpt5 { gpio-controller; };
+&gpt6 { gpio-controller; };
+&gpt7 { gpio-controller; };
+
+/ {
+	model = "manroland,uc101";
+	compatible = "manroland,uc101";
+
+	soc5200@f0000000 {
+		rtc@800 {
+			status = "disabled";
+		};
+
+		can@900 {
+			status = "disabled";
+		};
+
+		can@980 {
+			status = "disabled";
+		};
+
+		spi@f00 {
+			status = "disabled";
+		};
+
+		usb@1000 {
+			status = "disabled";
+		};
+
+		psc@2000 {	// PSC1
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		psc@2200 {	// PSC2
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		psc@2400 {	// PSC3
+			status = "disabled";
+		};
+
+		psc@2600 {	// PSC4
+			status = "disabled";
+		};
+
+		psc@2800 {	// PSC5
+			status = "disabled";
+		};
+
+		psc@2c00 {	// PSC6
+			compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+		};
+
+		ethernet@3000 {
+			phy-handle = <&phy0>;
+		};
+
+		mdio@3000 {
+			phy0: ethernet-phy@0 {
+				compatible = "intel,lxt971";
+				reg = <0>;
+			};
+		};
+
+		i2c@3d00 {
+			status = "disabled";
+		};
+
+		i2c@3d40 {
+			fsl,preserve-clocking;
+			clock-frequency = <400000>;
+
+			hwmon@2c {
+				compatible = "ad,adm9240";
+				reg = <0x2c>;
+			};
+			rtc@51 {
+				compatible = "nxp,pcf8563";
+				reg = <0x51>;
+			};
+		};
+	};
+
+	pci@f0000d00 {
+		status = "disabled";
+	};
+
+	localbus {
+		ranges = <0 0 0xff800000 0x00800000
+			  1 0 0x80000000 0x00800000
+			  3 0 0x80000000 0x00800000>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x00800000>;
+			bank-width = <2>;
+			device-width = <2>;
+			#size-cells = <1>;
+			#address-cells = <1>;
+
+			partition@0 {
+				label = "DTS";
+				reg = <0x0 0x00100000>;
+			};
+			partition@100000 {
+				label = "Kernel";
+				reg = <0x100000 0x00200000>;
+			};
+			partition@300000 {
+				label = "RootFS";
+				reg = <0x00300000 0x00200000>;
+			};
+			partition@500000 {
+				label = "user";
+				reg = <0x00500000 0x00200000>;
+			};
+			partition@700000 {
+				label = "U-Boot";
+				reg = <0x00700000 0x00040000>;
+			};
+			partition@740000 {
+				label = "Env";
+				reg = <0x00740000 0x00010000>;
+			};
+			partition@750000 {
+				label = "red. Env";
+				reg = <0x00750000 0x00010000>;
+			};
+			partition@760000 {
+				label = "reserve";
+				reg = <0x00760000 0x000a0000>;
+			};
+		};
+
+	};
+};
diff --git a/arch/powerpc/boot/dts/warp.dts b/arch/powerpc/boot/dts/warp.dts
new file mode 100644
index 0000000000..aa62d08e97
--- /dev/null
+++ b/arch/powerpc/boot/dts/warp.dts
@@ -0,0 +1,307 @@
+/*
+ * Device Tree Source for PIKA Warp
+ *
+ * Copyright (c) 2008-2009 PIKA Technologies
+ *   Sean MacLennan <smaclennan@pikatech.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "pika,warp";
+	compatible = "pika,warp";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		serial0 = &UART0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,440EP";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by zImage */
+			timebase-frequency = <0>; /* Filled in by zImage */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by zImage */
+	};
+
+	UIC0: interrupt-controller0 {
+		compatible = "ibm,uic-440ep","ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-440ep","ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	SDR0: sdr {
+		compatible = "ibm,sdr-440ep";
+		dcr-reg = <0x00e 0x002>;
+	};
+
+	CPR0: cpr {
+		compatible = "ibm,cpr-440ep";
+		dcr-reg = <0x00c 0x002>;
+	};
+
+	plb {
+		compatible = "ibm,plb-440ep", "ibm,plb-440gp", "ibm,plb4";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by zImage */
+
+		SDRAM0: sdram {
+			compatible = "ibm,sdram-440ep", "ibm,sdram-405gp";
+			dcr-reg = <0x010 0x002>;
+		};
+
+		DMA0: dma {
+			compatible = "ibm,dma-440ep", "ibm,dma-440gp";
+			dcr-reg = <0x100 0x027>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-440ep", "ibm,mcmal-440gp", "ibm,mcmal";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <4>;
+			num-rx-chans = <2>;
+			interrupt-parent = <&MAL0>;
+			interrupts = <0x0 0x1 0x2 0x3 0x4>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = </*TXEOB*/ 0x0 &UIC0 0xa 0x4
+					/*RXEOB*/ 0x1 &UIC0 0xb 0x4
+					/*SERR*/  0x2 &UIC1 0x0 0x4
+					/*TXDE*/  0x3 &UIC1 0x1 0x4
+					/*RXDE*/  0x4 &UIC1 0x2 0x4>;
+		};
+
+		POB0: opb {
+		  	compatible = "ibm,opb-440ep", "ibm,opb-440gp", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+		  	ranges = <0x00000000 0x00000000 0x00000000 0x80000000
+			          0x80000000 0x00000000 0x80000000 0x80000000>;
+		  	interrupt-parent = <&UIC1>;
+		  	interrupts = <0x7 0x4>;
+		  	clock-frequency = <0>; /* Filled in by zImage */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-440ep", "ibm,ebc-440gp", "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by zImage */
+				interrupts = <0x5 0x1>;
+				interrupt-parent = <&UIC1>;
+
+				fpga@2,0 {
+					compatible = "pika,fpga";
+			   		reg = <0x00000002 0x00000000 0x00001000>;
+					interrupts = <0x18 0x8>;
+					interrupt-parent = <&UIC0>;
+				};
+
+				fpga@2,2000 {
+					compatible = "pika,fpga-sgl";
+			   		reg = <0x00000002 0x00002000 0x00000200>;
+				};
+
+				fpga@2,4000 {
+					compatible = "pika,fpga-sd";
+					reg = <0x00000002 0x00004000 0x00004000>;
+				};
+
+				nor@0,0 {
+					compatible = "amd,s29gl032a", "cfi-flash";
+					bank-width = <2>;
+					reg = <0x00000000 0x00000000 0x00400000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+
+					partition@0 {
+						label = "splash";
+						reg = <0x00000000 0x00010000>;
+					};
+					partition@300000 {
+						label = "fpga";
+						reg = <0x0300000 0x00040000>;
+					};
+					partition@340000 {
+						label = "env";
+						reg = <0x0340000 0x00040000>;
+					};
+					partition@380000 {
+						label = "u-boot";
+						reg = <0x0380000 0x00080000>;
+					};
+				};
+
+				ndfc@1,0 {
+					compatible = "ibm,ndfc";
+					reg = <0x00000001 0x00000000 0x00002000>;
+					ccr = <0x00001000>;
+					bank-settings = <0x80002222>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+
+					nand {
+						#address-cells = <1>;
+						#size-cells = <1>;
+
+						partition@0 {
+							label = "kernel";
+							reg = <0x00000000 0x00200000>;
+						};
+						partition@200000 {
+							label = "root";
+							reg = <0x00200000 0x03E00000>;
+						};
+						partition@40000000 {
+							label = "persistent";
+							reg = <0x04000000 0x04000000>;
+						};
+						partition@80000000 {
+							label = "persistent1";
+							reg = <0x08000000 0x04000000>;
+						};
+						partition@C0000000 {
+							label = "persistent2";
+							reg = <0x0C000000 0x04000000>;
+						};
+					};
+				};
+			};
+
+			UART0: serial@ef600300 {
+		   		device_type = "serial";
+		   		compatible = "ns16550";
+		   		reg = <0xef600300 0x00000008>;
+		   		virtual-reg = <0xef600300>;
+		   		clock-frequency = <0>; /* Filled in by zImage */
+		   		current-speed = <115200>;
+		   		interrupt-parent = <&UIC0>;
+		   		interrupts = <0x0 0x4>;
+	   		};
+
+			IIC0: i2c@ef600700 {
+				compatible = "ibm,iic-440ep", "ibm,iic-440gp", "ibm,iic";
+				reg = <0xef600700 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				ad7414@4a {
+					compatible = "adi,ad7414";
+					reg = <0x4a>;
+					interrupts = <0x19 0x8>;
+					interrupt-parent = <&UIC0>;
+				};
+
+				/* This will create 52 and 53 */
+				at24@52 {
+					compatible = "atmel,24c04";
+					reg = <0x52>;
+				};
+			};
+
+			GPIO0: gpio@ef600b00 {
+				compatible = "ibm,ppc4xx-gpio";
+				reg = <0xef600b00 0x00000048>;
+				#gpio-cells = <2>;
+				gpio-controller;
+			};
+
+			GPIO1: gpio@ef600c00 {
+				compatible = "ibm,ppc4xx-gpio";
+				reg = <0xef600c00 0x00000048>;
+				#gpio-cells = <2>;
+				gpio-controller;
+			};
+
+			power-leds {
+				compatible = "warp-power-leds";
+				green {
+					gpios = <&GPIO1 0 0>;
+				};
+				red {
+					gpios = <&GPIO1 1 0>;
+				};
+			};
+
+			ZMII0: emac-zmii@ef600d00 {
+				compatible = "ibm,zmii-440ep", "ibm,zmii-440gp", "ibm,zmii";
+				reg = <0xef600d00 0x0000000c>;
+			};
+
+			EMAC0: ethernet@ef600e00 {
+				device_type = "network";
+				compatible = "ibm,emac-440ep", "ibm,emac-440gp", "ibm,emac";
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x1c 0x4 0x1d 0x4>;
+				reg = <0xef600e00 0x00000070>;
+				local-mac-address = [000000000000];
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0 1>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <1500>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rmii";
+				phy-map = <0x00000000>;
+				zmii-device = <&ZMII0>;
+				zmii-channel = <0>;
+			};
+
+			usb@ef601000 {
+				compatible = "ohci-be";
+				reg = <0xef601000 0x00000080>;
+				interrupts = <0x8 0x1 0x9 0x1>;
+				interrupt-parent = < &UIC1 >;
+			};
+		};
+	};
+
+	chosen {
+		stdout-path = "/plb/opb/serial@ef600300";
+	};
+};
diff --git a/arch/powerpc/boot/dts/wii.dts b/arch/powerpc/boot/dts/wii.dts
new file mode 100644
index 0000000000..e46143c323
--- /dev/null
+++ b/arch/powerpc/boot/dts/wii.dts
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/boot/dts/wii.dts
+ *
+ * Nintendo Wii platform device tree source
+ * Copyright (C) 2008-2009 The GameCube Linux Team
+ * Copyright (C) 2008,2009 Albert Herranz
+ */
+
+/dts-v1/;
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+/*
+ * This is commented-out for now.
+ * Until a later patch is merged, the kernel can use only the first
+ * contiguous RAM range and will BUG() if the memreserve is outside
+ * that range.
+ */
+/*/memreserve/ 0x10000000 0x0004000;*/	/* DSP RAM */
+
+/ {
+	model = "nintendo,wii";
+	compatible = "nintendo,wii";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	chosen {
+		bootargs = "root=/dev/mmcblk0p2 rootwait udbg-immortal";
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x01800000	/* MEM1 24MB 1T-SRAM */
+		       0x10000000 0x04000000>;	/* MEM2 64MB GDDR3 */
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,broadway@0 {
+			device_type = "cpu";
+			reg = <0>;
+			clock-frequency = <729000000>; /* 729MHz */
+			bus-frequency = <243000000>; /* 243MHz core-to-bus 3x */
+			timebase-frequency = <60750000>; /* 243MHz / 4 */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+		};
+	};
+
+	/* devices contained in the hollywood chipset */
+	hollywood {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "nintendo,hollywood";
+                ranges = <0x0c000000 0x0c000000 0x01000000
+			  0x0d000000 0x0d000000 0x00800000
+			  0x0d800000 0x0d800000 0x00800000>;
+		interrupt-parent = <&PIC0>;
+
+		video@c002000 {
+			compatible = "nintendo,hollywood-vi",
+					"nintendo,flipper-vi";
+			reg = <0x0c002000 0x100>;
+			interrupts = <8>;
+		};
+
+		processor-interface@c003000 {
+			compatible = "nintendo,hollywood-pi",
+					"nintendo,flipper-pi";
+			reg = <0x0c003000 0x100>;
+
+			PIC0: pic0 {
+				#interrupt-cells = <1>;
+				compatible = "nintendo,flipper-pic";
+				interrupt-controller;
+			};
+		};
+
+		dsp@c005000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "nintendo,hollywood-dsp",
+					"nintendo,flipper-dsp";
+			reg = <0x0c005000 0x200>;
+			interrupts = <6>;
+		};
+
+		gamepad-controller@d006400 {
+			compatible = "nintendo,hollywood-si",
+					"nintendo,flipper-si";
+			reg = <0x0d006400 0x100>;
+			interrupts = <3>;
+		};
+
+		audio@c006c00 {
+			compatible = "nintendo,hollywood-ai",
+					"nintendo,flipper-ai";
+			reg = <0x0d006c00 0x20>;
+			interrupts = <6>;
+		};
+
+		/* External Interface bus */
+		exi@d006800 {
+			compatible = "nintendo,hollywood-exi",
+					"nintendo,flipper-exi";
+			reg = <0x0d006800 0x40>;
+			virtual-reg = <0x0d006800>;
+			interrupts = <4>;
+		};
+
+		usb@d040000 {
+			compatible = "nintendo,hollywood-usb-ehci",
+					"usb-ehci";
+			reg = <0x0d040000 0x100>;
+			interrupts = <4>;
+			interrupt-parent = <&PIC1>;
+		};
+
+		usb@d050000 {
+			compatible = "nintendo,hollywood-usb-ohci",
+					"usb-ohci";
+			reg = <0x0d050000 0x100>;
+			interrupts = <5>;
+			interrupt-parent = <&PIC1>;
+		};
+
+		usb@d060000 {
+			compatible = "nintendo,hollywood-usb-ohci",
+					"usb-ohci";
+			reg = <0x0d060000 0x100>;
+			interrupts = <6>;
+			interrupt-parent = <&PIC1>;
+		};
+
+		sd@d070000 {
+			compatible = "nintendo,hollywood-sdhci",
+					"sdhci";
+			reg = <0x0d070000 0x200>;
+			interrupts = <7>;
+			interrupt-parent = <&PIC1>;
+		};
+
+		sdio@d080000 {
+			compatible = "nintendo,hollywood-sdhci",
+					"sdhci";
+			reg = <0x0d080000 0x200>;
+			interrupts = <8>;
+			interrupt-parent = <&PIC1>;
+		};
+
+		ipc@d000000 {
+			compatible = "nintendo,hollywood-ipc";
+			reg = <0x0d000000 0x10>;
+			interrupts = <30>;
+			interrupt-parent = <&PIC1>;
+		};
+
+		PIC1: pic1@d800030 {
+			#interrupt-cells = <1>;
+			compatible = "nintendo,hollywood-pic";
+			reg = <0x0d800030 0x10>;
+			interrupt-controller;
+			interrupts = <14>;
+		};
+
+		srnprot@d800060 {
+			compatible = "nintendo,hollywood-srnprot";
+			reg = <0x0d800060 0x4>;
+		};
+
+		GPIO: gpio@d8000c0 {
+			#gpio-cells = <2>;
+			compatible = "nintendo,hollywood-gpio";
+			reg = <0x0d8000c0 0x40>;
+			gpio-controller;
+			ngpios = <24>;
+
+			gpio-line-names =
+				"POWER", "SHUTDOWN", "FAN", "DC_DC",
+				"DI_SPIN", "SLOT_LED", "EJECT_BTN", "SLOT_IN",
+				"SENSOR_BAR", "DO_EJECT", "EEP_CS", "EEP_CLK",
+				"EEP_MOSI", "EEP_MISO", "AVE_SCL", "AVE_SDA",
+				"DEBUG0", "DEBUG1", "DEBUG2", "DEBUG3",
+				"DEBUG4", "DEBUG5", "DEBUG6", "DEBUG7";
+
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			interrupts = <10>;
+			interrupt-parent = <&PIC1>;
+
+			/*
+			 * This is commented out while a standard binding
+			 * for i2c over gpio is defined.
+			 */
+			/*
+			i2c-video {
+				#address-cells = <1>;
+				#size-cells = <0>;
+			        compatible = "i2c-gpio";
+
+			        gpios = <&GPIO 15 0
+			                 &GPIO 14 0>;
+			        clock-frequency = <250000>;
+				no-clock-stretching;
+			        scl-is-open-drain;
+			        sda-is-open-drain;
+			        sda-enforce-dir;
+
+			        AVE: audio-video-encoder@70 {
+			                compatible = "nintendo,wii-audio-video-encoder";
+			                reg = <0x70>;
+			        };
+			};
+			*/
+		};
+
+		control@d800100 {
+			compatible = "nintendo,hollywood-control";
+			/*
+			 * Both the address and length are wrong, according to
+			 * Wiibrew this should be <0x0d800000 0x400>, but it
+			 * requires refactoring the PIC1, GPIO and OTP nodes
+			 * before changing that.
+			 */
+			reg = <0x0d800100 0xa0>;
+		};
+
+		otp@d8001ec {
+			compatible = "nintendo,hollywood-otp";
+			reg = <0x0d8001ec 0x8>;
+		};
+
+		disk@d806000 {
+			compatible = "nintendo,hollywood-di";
+			reg = <0x0d806000 0x40>;
+			interrupts = <2>;
+		};
+	};
+
+	gpio-leds {
+		compatible = "gpio-leds";
+
+		/* This is the blue LED in the disk drive slot */
+		drive-slot {
+			label = "wii:blue:drive_slot";
+			gpios = <&GPIO 5 GPIO_ACTIVE_HIGH>;
+			panic-indicator;
+		};
+	};
+
+	gpio-keys {
+		compatible = "gpio-keys";
+
+		power {
+			label = "Power Button";
+			gpios = <&GPIO 0 GPIO_ACTIVE_HIGH>;
+			linux,code = <KEY_POWER>;
+		};
+
+		eject {
+			label = "Eject Button";
+			gpios = <&GPIO 6 GPIO_ACTIVE_HIGH>;
+			linux,code = <KEY_EJECTCD>;
+		};
+	};
+};
+
diff --git a/arch/powerpc/boot/dts/xcalibur1501.dts b/arch/powerpc/boot/dts/xcalibur1501.dts
new file mode 100644
index 0000000000..46c25bda95
--- /dev/null
+++ b/arch/powerpc/boot/dts/xcalibur1501.dts
@@ -0,0 +1,693 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2008 Extreme Engineering Solutions, Inc.
+ * Based on MPC8572DS device tree from Freescale Semiconductor, Inc.
+ *
+ * XCalibur1501 6U CompactPCI single-board computer based on MPC8572E
+ */
+
+/dts-v1/;
+/ {
+	model = "xes,xcalibur1501";
+	compatible = "xes,xcalibur1501", "xes,MPC8572";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci2 = &pci2;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8572@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;	// 32 bytes
+			i-cache-line-size = <32>;	// 32 bytes
+			d-cache-size = <0x8000>;		// L1, 32K
+			i-cache-size = <0x8000>;		// L1, 32K
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			next-level-cache = <&L2>;
+		};
+
+		PowerPC,8572@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			d-cache-line-size = <32>;	// 32 bytes
+			i-cache-line-size = <32>;	// 32 bytes
+			d-cache-size = <0x8000>;		// L1, 32K
+			i-cache-size = <0x8000>;		// L1, 32K
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x0 0x0 0x0>;	// Filled in by U-Boot
+	};
+
+	localbus@ef005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8572-elbc", "fsl,elbc", "simple-bus";
+		reg = <0 0xef005000 0 0x1000>;
+		interrupts = <19 2>;
+		interrupt-parent = <&mpic>;
+		/* Local bus region mappings */
+		ranges = <0 0 0 0xf8000000 0x8000000  /* CS0: Flash 1 */
+			  1 0 0 0xf0000000 0x8000000  /* CS1: Flash 2 */
+			  2 0 0 0xef800000 0x40000    /* CS2: NAND CE1 */
+			  3 0 0 0xef840000 0x40000    /* CS3: NAND CE2 */
+			  4 0 0 0xe9000000 0x100000>; /* CS4: USB */
+
+		nor-boot@0,0 {
+			compatible = "amd,s29gl01gp", "cfi-flash";
+			bank-width = <2>;
+			reg = <0 0 0x8000000>; /* 128MB */
+			#address-cells = <1>;
+			#size-cells = <1>;
+			partition@0 {
+				label = "Primary user space";
+				reg = <0x00000000 0x6f00000>; /* 111 MB */
+			};
+			partition@6f00000 {
+				label = "Primary kernel";
+				reg = <0x6f00000 0x1000000>; /* 16 MB */
+			};
+			partition@7f00000 {
+				label = "Primary DTB";
+				reg = <0x7f00000 0x40000>; /* 256 KB */
+			};
+			partition@7f40000 {
+				label = "Primary U-Boot environment";
+				reg = <0x7f40000 0x40000>; /* 256 KB */
+			};
+			partition@7f80000 {
+				label = "Primary U-Boot";
+				reg = <0x7f80000 0x80000>; /* 512 KB */
+				read-only;
+			};
+		};
+
+		nor-alternate@1,0 {
+			compatible = "amd,s29gl01gp", "cfi-flash";
+			bank-width = <2>;
+			//reg = <0xf0000000 0x08000000>; /* 128MB */
+			reg = <1 0 0x8000000>; /* 128MB */
+			#address-cells = <1>;
+			#size-cells = <1>;
+			partition@0 {
+				label = "Secondary user space";
+				reg = <0x00000000 0x6f00000>; /* 111 MB */
+			};
+			partition@6f00000 {
+				label = "Secondary kernel";
+				reg = <0x6f00000 0x1000000>; /* 16 MB */
+			};
+			partition@7f00000 {
+				label = "Secondary DTB";
+				reg = <0x7f00000 0x40000>; /* 256 KB */
+			};
+			partition@7f40000 {
+				label = "Secondary U-Boot environment";
+				reg = <0x7f40000 0x40000>; /* 256 KB */
+			};
+			partition@7f80000 {
+				label = "Secondary U-Boot";
+				reg = <0x7f80000 0x80000>; /* 512 KB */
+				read-only;
+			};
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			/*
+			 * Actual part could be ST Micro NAND08GW3B2A (1 GB),
+			 * Micron MT29F8G08DAA (2x 512 MB), or Micron
+			 * MT29F16G08FAA (2x 1 GB), depending on the build
+			 * configuration
+			 */
+			compatible = "fsl,mpc8572-fcm-nand",
+				     "fsl,elbc-fcm-nand";
+			reg = <2 0 0x40000>;
+			/* U-Boot should fix this up if chip size > 1 GB */
+			partition@0 {
+				label = "NAND Filesystem";
+				reg = <0 0x40000000>;
+			};
+		};
+
+		usb@4,0 {
+			compatible = "nxp,usb-isp1761";
+			reg = <4 0 0x100000>;
+			bus-width = <32>;
+			interrupt-parent = <&mpic>;
+			interrupts = <10 1>;
+		};
+	};
+
+	soc8572@ef000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,mpc8572-immr", "simple-bus";
+		ranges = <0x0 0 0xef000000 0x100000>;
+		bus-frequency = <0>;		// Filled out by uboot.
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <12>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,mpc8572-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,mpc8572-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		memory-controller@6000 {
+			compatible = "fsl,mpc8572-memory-controller";
+			reg = <0x6000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,mpc8572-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x100000>; // L2, 1M
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+
+			temp-sensor@48 {
+				compatible = "dallas,ds1631", "dallas,ds1621";
+				reg = <0x48>;
+			};
+
+			temp-sensor@4c {
+				compatible = "adi,adt7461";
+				reg = <0x4c>;
+			};
+
+			cpu-supervisor@51 {
+				compatible = "dallas,ds4510";
+				reg = <0x51>;
+			};
+
+			eeprom@54 {
+				compatible = "atmel,at24c128b";
+				reg = <0x54>;
+			};
+
+			rtc@68 {
+				compatible = "st,m41t00",
+				             "dallas,ds1338";
+				reg = <0x68>;
+			};
+
+			pcie-switch@6a {
+				compatible = "plx,pex8648";
+				reg = <0x6a>;
+			};
+
+			/* On-board signals for VID, flash, serial */
+			gpio1: gpio@18 {
+				compatible = "nxp,pca9557";
+				reg = <0x18>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+
+			/* PMC0/XMC0 signals */
+			gpio2: gpio@1c {
+				compatible = "nxp,pca9557";
+				reg = <0x1c>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+
+			/* PMC1/XMC1 signals */
+			gpio3: gpio@1d {
+				compatible = "nxp,pca9557";
+				reg = <0x1d>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+
+			/* CompactPCI signals (sysen, GA[4:0]) */
+			gpio4: gpio@1e {
+				compatible = "nxp,pca9557";
+				reg = <0x1e>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+
+			/* CompactPCI J5 GPIO and FAL/DEG/PRST */
+			gpio5: gpio@1f {
+				compatible = "nxp,pca9557";
+				reg = <0x1f>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		dma@c300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8572-dma", "fsl,eloplus-dma";
+			reg = <0xc300 0x4>;
+			ranges = <0x0 0xc100 0x200>;
+			cell-index = <1>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <76 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <77 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <78 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <79 2>;
+			};
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8572-dma", "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		/* eTSEC 1 front panel 0 */
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy0>;
+			phy-connection-type = "sgmii";
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy0: ethernet-phy@1 {
+					interrupt-parent = <&mpic>;
+					interrupts = <4 1>;
+					reg = <0x1>;
+				};
+				phy1: ethernet-phy@2 {
+					interrupt-parent = <&mpic>;
+					interrupts = <4 1>;
+					reg = <0x2>;
+				};
+				phy2: ethernet-phy@3 {
+					interrupt-parent = <&mpic>;
+					interrupts = <5 1>;
+					reg = <0x3>;
+				};
+				phy3: ethernet-phy@4 {
+					interrupt-parent = <&mpic>;
+					interrupts = <5 1>;
+					reg = <0x4>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		/* eTSEC 2 front panel 1 */
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
+			phy-handle = <&phy1>;
+			phy-connection-type = "sgmii";
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		/* eTSEC 3 PICMG2.16 backplane port 0 */
+		enet2: ethernet@26000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <2>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x26000 0x1000>;
+			ranges = <0x0 0x26000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <31 2 32 2 33 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi2>;
+			phy-handle = <&phy2>;
+			phy-connection-type = "sgmii";
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi2: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		/* eTSEC 4 PICMG2.16 backplane port 1 */
+		enet3: ethernet@27000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <3>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x27000 0x1000>;
+			ranges = <0x0 0x27000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <37 2 38 2 39 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi3>;
+			phy-handle = <&phy3>;
+			phy-connection-type = "sgmii";
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi3: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		/* UART0 */
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		/* UART1 */
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		global-utilities@e0000 {	//global utilities block
+			compatible = "fsl,mpc8572-guts";
+			reg = <0xe0000 0x1000>;
+			fsl,has-rstcr;
+		};
+
+		msi@41600 {
+			compatible = "fsl,mpc8572-msi", "fsl,mpic-msi";
+			reg = <0x41600 0x80>;
+			msi-available-ranges = <0 0x100>;
+			interrupts = <
+				0xe0 0
+				0xe1 0
+				0xe2 0
+				0xe3 0
+				0xe4 0
+				0xe5 0
+				0xe6 0
+				0xe7 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.0", "fsl,sec2.4", "fsl,sec2.2",
+				     "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <45 2 58 2>;
+			interrupt-parent = <&mpic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x9fe>;
+			fsl,descriptor-types-mask = <0x3ab0ebf>;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+
+		gpio0: gpio@f000 {
+			compatible = "fsl,mpc8572-gpio";
+			reg = <0xf000 0x1000>;
+			interrupts = <47 2>;
+			interrupt-parent = <&mpic>;
+			#gpio-cells = <2>;
+			gpio-controller;
+		};
+
+		gpio-leds {
+			compatible = "gpio-leds";
+
+			heartbeat {
+				label = "Heartbeat";
+				gpios = <&gpio0 4 1>;
+				linux,default-trigger = "heartbeat";
+			};
+
+			yellow {
+				label = "Yellow";
+				gpios = <&gpio0 5 1>;
+			};
+
+			red {
+				label = "Red";
+				gpios = <&gpio0 6 1>;
+			};
+
+			green {
+				label = "Green";
+				gpios = <&gpio0 7 1>;
+			};
+		};
+
+		/* PME (pattern-matcher) */
+		pme@10000 {
+			compatible = "fsl,mpc8572-pme", "pme8572";
+			reg = <0x10000 0x5000>;
+			interrupts = <57 2 64 2 65 2 66 2 67 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		tlu@2f000 {
+			compatible = "fsl,mpc8572-tlu", "fsl_tlu";
+			reg = <0x2f000 0x1000>;
+			interrupts = <61 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		tlu@15000 {
+			compatible = "fsl,mpc8572-tlu", "fsl_tlu";
+			reg = <0x15000 0x1000>;
+			interrupts = <75 2>;
+			interrupt-parent = <&mpic>;
+		};
+	};
+
+	/*
+	 * PCI Express controller 3 @ ef008000 is not used.
+	 * This would have been pci0 on other mpc85xx platforms.
+	 *
+	 * PCI Express controller 2 @ ef009000 is not used.
+	 * This would have been pci1 on other mpc85xx platforms.
+	 */
+
+	/* PCI Express controller 1, wired to PEX8648 PCIe switch */
+	pci2: pcie@ef00a000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xef00a000 0 0x1000>;
+		bus-range = <0 255>;
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x40000000
+			  0x1000000 0x0 0x00000000 0 0xe8000000 0x0 0x10000>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <26 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0x0 0x0 0x0 0x1 &mpic 0x0 0x1
+			0x0 0x0 0x0 0x2 &mpic 0x1 0x1
+			0x0 0x0 0x0 0x3 &mpic 0x2 0x1
+			0x0 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x40000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/xpedite5200.dts b/arch/powerpc/boot/dts/xpedite5200.dts
new file mode 100644
index 0000000000..74b346f2d4
--- /dev/null
+++ b/arch/powerpc/boot/dts/xpedite5200.dts
@@ -0,0 +1,465 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2009 Extreme Engineering Solutions, Inc.
+ * Based on TQM8548 device tree
+ *
+ * XPedite5200 PrPMC/XMC module based on MPC8548E
+ */
+
+/dts-v1/;
+
+/ {
+	model = "xes,xpedite5200";
+	compatible = "xes,xpedite5200", "xes,MPC8548";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8548@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;	// 32 bytes
+			i-cache-line-size = <32>;	// 32 bytes
+			d-cache-size = <0x8000>;	// L1, 32K
+			i-cache-size = <0x8000>;	// L1, 32K
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x0>;	// Filled in by U-Boot
+	};
+
+	soc@ef000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		ranges = <0x0 0xef000000 0x100000>;
+		bus-frequency = <0>;
+		compatible = "fsl,mpc8548-immr", "simple-bus";
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <12>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,mpc8548-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,mpc8548-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,mpc8548-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x80000>;	// L2, 512K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		/* On-card I2C */
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+
+			/*
+			 * Board GPIO:
+			 * 	0: BRD_CFG0 (1: P14 IO present)
+			 * 	1: BRD_CFG1 (1: FP ethernet present)
+			 * 	2: BRD_CFG2 (1: XMC IO present)
+			 * 	3: XMC root complex indicator
+			 * 	4: Flash boot device indicator
+			 * 	5: Flash write protect enable
+			 * 	6: PMC monarch indicator
+			 * 	7: PMC EREADY
+			 */
+			gpio1: gpio@18 {
+				compatible = "nxp,pca9556";
+				reg = <0x18>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+
+			/* P14 GPIO */
+			gpio2: gpio@19 {
+				compatible = "nxp,pca9556";
+				reg = <0x19>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+
+			eeprom@50 {
+				compatible = "atmel,at24c16";
+				reg = <0x50>;
+			};
+
+			rtc@68 {
+				compatible = "st,m41t00",
+					     "dallas,ds1338";
+				reg = <0x68>;
+			};
+
+			dtt@34 {
+				compatible = "maxim,max1237";
+				reg = <0x34>;
+			};
+		};
+
+		/* Off-card I2C */
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8548-dma", "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8548-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8548-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8548-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8548-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		/* eTSEC1: Front panel port 0 */
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy0>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy0: ethernet-phy@1 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <0x1>;
+				};
+				phy1: ethernet-phy@2 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <0x2>;
+				};
+				phy2: ethernet-phy@3 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <0x3>;
+				};
+				phy3: ethernet-phy@4 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <0x4>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		/* eTSEC2: Front panel port 1 */
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
+			phy-handle = <&phy1>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		/* eTSEC3: Rear panel port 2 */
+		enet2: ethernet@26000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <2>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x26000 0x1000>;
+			ranges = <0x0 0x26000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <31 2 32 2 33 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi2>;
+			phy-handle = <&phy2>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi2: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		/* eTSEC4: Rear panel port 3 */
+		enet3: ethernet@27000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <3>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x27000 0x1000>;
+			ranges = <0x0 0x27000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <37 2 38 2 39 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi3>;
+			phy-handle = <&phy3>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi3: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			current-speed = <115200>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			current-speed = <115200>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		global-utilities@e0000 {	// global utilities reg
+			compatible = "fsl,mpc8548-guts";
+			reg = <0xe0000 0x1000>;
+			fsl,has-rstcr;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+	};
+
+	localbus@ef005000 {
+		compatible = "fsl,mpc8548-localbus", "fsl,pq3-localbus",
+			     "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		reg = <0xef005000 0x100>;	// BRx, ORx, etc.
+		interrupt-parent = <&mpic>;
+		interrupts = <19 2>;
+
+		ranges = <
+			0 0x0 0xfc000000 0x04000000	// NOR boot flash
+			1 0x0 0xf8000000 0x04000000	// NOR expansion flash
+			2 0x0 0xef800000 0x00010000	// NAND CE1
+			3 0x0 0xef840000 0x00010000	// NAND CE2
+		>;
+
+		nor-boot@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0 0x0 0x4000000>;
+			bank-width = <2>;
+
+			partition@0 {
+				label = "Primary OS";
+				reg = <0x00000000 0x180000>;
+			};
+			partition@180000 {
+				label = "Secondary OS";
+				reg = <0x00180000 0x180000>;
+			};
+			partition@300000 {
+				label = "User";
+				reg = <0x00300000 0x3c80000>;
+			};
+			partition@3f80000 {
+				label = "Boot firmware";
+				reg = <0x03f80000 0x80000>;
+			};
+		};
+
+		nor-alternate@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <1 0x0 0x4000000>;
+			bank-width = <2>;
+
+			partition@0 {
+				label = "Filesystem";
+				reg = <0x00000000 0x3f80000>;
+			};
+			partition@3f80000 {
+				label = "Alternate boot firmware";
+				reg = <0x03f80000 0x80000>;
+			};
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "xes,address-ctl-nand";
+			reg = <2 0x0 0x10000>;
+			cle-line = <0x8>;	/* CLE tied to A3 */
+			ale-line = <0x10>;	/* ALE tied to A4 */
+
+			/* U-Boot should fix this up */
+			partition@0 {
+				label = "NAND Filesystem";
+				reg = <0 0x40000000>;
+			};
+		};
+	};
+
+	/* PMC interface */
+	pci0: pci@ef008000 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci";
+		device_type = "pci";
+		reg = <0xef008000 0x1000>;
+		clock-frequency = <33333333>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+				/* IDSEL */
+				 0xe000 0 0 1 &mpic 2 1
+				 0xe000 0 0 2 &mpic 3 1>;
+
+		interrupt-parent = <&mpic>;
+		interrupts = <24 2>;
+		bus-range = <0 0>;
+		ranges = <0x02000000 0 0x80000000 0x80000000 0 0x40000000
+			  0x01000000 0 0x00000000 0xe8000000 0 0x00800000>;
+	};
+
+	/* XMC PCIe is not yet enabled in U-Boot on XPedite5200 */
+};
diff --git a/arch/powerpc/boot/dts/xpedite5200_xmon.dts b/arch/powerpc/boot/dts/xpedite5200_xmon.dts
new file mode 100644
index 0000000000..d491c7a8f9
--- /dev/null
+++ b/arch/powerpc/boot/dts/xpedite5200_xmon.dts
@@ -0,0 +1,505 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2009 Extreme Engineering Solutions, Inc.
+ * Based on TQM8548 device tree
+ *
+ * XPedite5200 PrPMC/XMC module based on MPC8548E.  This dts is for the
+ * xMon boot loader memory map which differs from U-Boot's.
+ */
+
+/dts-v1/;
+
+/ {
+	model = "xes,xpedite5200";
+	compatible = "xes,xpedite5200", "xes,MPC8548";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	form-factor = "PMC/XMC";
+	boot-bank = <0x0>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		ethernet3 = &enet3;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+		pci1 = &pci1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8548@0 {
+			device_type = "cpu";
+			reg = <0>;
+			d-cache-line-size = <32>;	// 32 bytes
+			i-cache-line-size = <32>;	// 32 bytes
+			d-cache-size = <0x8000>;	// L1, 32K
+			i-cache-size = <0x8000>;	// L1, 32K
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x0>;	// Filled in by boot loader
+	};
+
+	soc@ef000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		ranges = <0x0 0xef000000 0x100000>;
+		bus-frequency = <0>;
+		compatible = "fsl,mpc8548-immr", "simple-bus";
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <12>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,mpc8548-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,mpc8548-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,mpc8548-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x80000>;	// L2, 512K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		/* On-card I2C */
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+
+			/*
+			 * Board GPIO:
+			 * 	0: BRD_CFG0 (1: P14 IO present)
+			 * 	1: BRD_CFG1 (1: FP ethernet present)
+			 * 	2: BRD_CFG2 (1: XMC IO present)
+			 * 	3: XMC root complex indicator
+			 * 	4: Flash boot device indicator
+			 * 	5: Flash write protect enable
+			 * 	6: PMC monarch indicator
+			 * 	7: PMC EREADY
+			 */
+			gpio1: gpio@18 {
+				compatible = "nxp,pca9556";
+				reg = <0x18>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+
+			/* P14 GPIO */
+			gpio2: gpio@19 {
+				compatible = "nxp,pca9556";
+				reg = <0x19>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+
+			eeprom@50 {
+				compatible = "atmel,at24c16";
+				reg = <0x50>;
+			};
+
+			rtc@68 {
+				compatible = "st,m41t00",
+					     "dallas,ds1338";
+				reg = <0x68>;
+			};
+
+			dtt@34 {
+				compatible = "maxim,max1237";
+				reg = <0x34>;
+			};
+		};
+
+		/* Off-card I2C */
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8548-dma", "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8548-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8548-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8548-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8548-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		/* eTSEC1: Front panel port 0 */
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy0>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy0: ethernet-phy@1 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <0x1>;
+				};
+				phy1: ethernet-phy@2 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <0x2>;
+				};
+				phy2: ethernet-phy@3 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <0x3>;
+				};
+				phy3: ethernet-phy@4 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <0x4>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		/* eTSEC2: Front panel port 1 */
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
+			phy-handle = <&phy1>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		/* eTSEC3: Rear panel port 2 */
+		enet2: ethernet@26000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <2>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x26000 0x1000>;
+			ranges = <0x0 0x26000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <31 2 32 2 33 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi2>;
+			phy-handle = <&phy2>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi2: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		/* eTSEC4: Rear panel port 3 */
+		enet3: ethernet@27000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <3>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x27000 0x1000>;
+			ranges = <0x0 0x27000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <37 2 38 2 39 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi3>;
+			phy-handle = <&phy3>;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi3: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			current-speed = <9600>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			current-speed = <9600>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		global-utilities@e0000 {	// global utilities reg
+			compatible = "fsl,mpc8548-guts";
+			reg = <0xe0000 0x1000>;
+			fsl,has-rstcr;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+	};
+
+	localbus@ef005000 {
+		compatible = "fsl,mpc8548-localbus", "fsl,pq3-localbus",
+			     "simple-bus";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		reg = <0xef005000 0x100>;	// BRx, ORx, etc.
+		interrupt-parent = <&mpic>;
+		interrupts = <19 2>;
+
+		ranges = <
+			0 0x0 0xf8000000 0x08000000	// NOR boot flash
+			1 0x0 0xf0000000 0x08000000	// NOR expansion flash
+			2 0x0 0xe8000000 0x00010000	// NAND CE1
+			3 0x0 0xe8010000 0x00010000	// NAND CE2
+		>;
+
+		nor-boot@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0 0x0 0x4000000>;
+			bank-width = <2>;
+
+			partition@0 {
+				label = "Primary OS";
+				reg = <0x00000000 0x180000>;
+			};
+			partition@180000 {
+				label = "Secondary OS";
+				reg = <0x00180000 0x180000>;
+			};
+			partition@300000 {
+				label = "User";
+				reg = <0x00300000 0x3c80000>;
+			};
+			partition@3f80000 {
+				label = "Boot firmware";
+				reg = <0x03f80000 0x80000>;
+			};
+		};
+
+		nor-alternate@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <1 0x0 0x4000000>;
+			bank-width = <2>;
+
+			partition@0 {
+				label = "Filesystem";
+				reg = <0x00000000 0x3f80000>;
+			};
+			partition@3f80000 {
+				label = "Alternate boot firmware";
+				reg = <0x03f80000 0x80000>;
+			};
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "xes,address-ctl-nand";
+			reg = <2 0x0 0x10000>;
+			cle-line = <0x8>;	/* CLE tied to A3 */
+			ale-line = <0x10>;	/* ALE tied to A4 */
+
+			partition@0 {
+				label = "NAND Filesystem";
+				reg = <0 0x40000000>;
+			};
+		};
+	};
+
+	/* PMC interface */
+	pci0: pci@ef008000 {
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci";
+		device_type = "pci";
+		reg = <0xef008000 0x1000>;
+		clock-frequency = <33333333>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+				/* IDSEL */
+				 0xe000 0 0 1 &mpic 2 1
+				 0xe000 0 0 2 &mpic 3 1>;
+
+		interrupt-parent = <&mpic>;
+		interrupts = <24 2>;
+		bus-range = <0 0>;
+		ranges = <0x02000000 0 0x80000000 0x80000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xd0000000 0 0x01000000>;
+	};
+
+	/* XMC PCIe */
+	pci1: pcie@ef00a000 {
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0x00000 0 0 1 &mpic 0 1
+			0x00000 0 0 2 &mpic 1 1
+			0x00000 0 0 3 &mpic 2 1
+			0x00000 0 0 4 &mpic 3 1>;
+
+		interrupt-parent = <&mpic>;
+		interrupts = <26 2>;
+		bus-range = <0 0xff>;
+		ranges = <0x02000000 0 0xa0000000 0xa0000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xd1000000 0 0x01000000>;
+		clock-frequency = <33333333>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xef00a000 0x1000>;
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		pcie@0 {
+			reg = <0 0 0 0 0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x02000000 0 0xc0000000 0x02000000 0
+			          0xc0000000 0 0x20000000
+				  0x01000000 0 0x00000000 0x01000000 0
+				  0x00000000 0 0x08000000>;
+		};
+	};
+
+	/* Needed for dtbImage boot wrapper compatibility */
+	chosen {
+		stdout-path = &serial0;
+	};
+};
diff --git a/arch/powerpc/boot/dts/xpedite5301.dts b/arch/powerpc/boot/dts/xpedite5301.dts
new file mode 100644
index 0000000000..12184e1796
--- /dev/null
+++ b/arch/powerpc/boot/dts/xpedite5301.dts
@@ -0,0 +1,637 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2008 Extreme Engineering Solutions, Inc.
+ * Based on MPC8572DS device tree from Freescale Semiconductor, Inc.
+ *
+ * XPedite5301 PMC/XMC module based on MPC8572E
+ */
+
+/dts-v1/;
+/ {
+	model = "xes,xpedite5301";
+	compatible = "xes,xpedite5301", "xes,MPC8572";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	form-factor = "PMC/XMC";
+	boot-bank = <0x0>;	/* 0: Primary flash, 1: Secondary flash */
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci1 = &pci1;
+		pci2 = &pci2;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8572@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;	// 32 bytes
+			i-cache-line-size = <32>;	// 32 bytes
+			d-cache-size = <0x8000>;		// L1, 32K
+			i-cache-size = <0x8000>;		// L1, 32K
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			next-level-cache = <&L2>;
+		};
+
+		PowerPC,8572@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			d-cache-line-size = <32>;	// 32 bytes
+			i-cache-line-size = <32>;	// 32 bytes
+			d-cache-size = <0x8000>;		// L1, 32K
+			i-cache-size = <0x8000>;		// L1, 32K
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x0 0x0 0x0>;	// Filled in by U-Boot
+	};
+
+	localbus@ef005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8572-elbc", "fsl,elbc", "simple-bus";
+		reg = <0 0xef005000 0 0x1000>;
+		interrupts = <19 2>;
+		interrupt-parent = <&mpic>;
+		/* Local bus region mappings */
+		ranges = <0 0 0 0xf8000000 0x8000000 /* CS0: Boot flash */
+			  1 0 0 0xf0000000 0x8000000 /* CS1: Alternate flash */
+			  2 0 0 0xef800000 0x40000   /* CS2: NAND CE1 */
+			  3 0 0 0xef840000 0x40000>; /* CS3: NAND CE2 */
+
+		nor-boot@0,0 {
+			compatible = "amd,s29gl01gp", "cfi-flash";
+			bank-width = <2>;
+			reg = <0 0 0x8000000>; /* 128MB */
+			#address-cells = <1>;
+			#size-cells = <1>;
+			partition@0 {
+				label = "Primary user space";
+				reg = <0x00000000 0x6f00000>; /* 111 MB */
+			};
+			partition@6f00000 {
+				label = "Primary kernel";
+				reg = <0x6f00000 0x1000000>; /* 16 MB */
+			};
+			partition@7f00000 {
+				label = "Primary DTB";
+				reg = <0x7f00000 0x40000>; /* 256 KB */
+			};
+			partition@7f40000 {
+				label = "Primary U-Boot environment";
+				reg = <0x7f40000 0x40000>; /* 256 KB */
+			};
+			partition@7f80000 {
+				label = "Primary U-Boot";
+				reg = <0x7f80000 0x80000>; /* 512 KB */
+				read-only;
+			};
+		};
+
+		nor-alternate@1,0 {
+			compatible = "amd,s29gl01gp", "cfi-flash";
+			bank-width = <2>;
+			//reg = <0xf0000000 0x08000000>; /* 128MB */
+			reg = <1 0 0x8000000>; /* 128MB */
+			#address-cells = <1>;
+			#size-cells = <1>;
+			partition@0 {
+				label = "Secondary user space";
+				reg = <0x00000000 0x6f00000>; /* 111 MB */
+			};
+			partition@6f00000 {
+				label = "Secondary kernel";
+				reg = <0x6f00000 0x1000000>; /* 16 MB */
+			};
+			partition@7f00000 {
+				label = "Secondary DTB";
+				reg = <0x7f00000 0x40000>; /* 256 KB */
+			};
+			partition@7f40000 {
+				label = "Secondary U-Boot environment";
+				reg = <0x7f40000 0x40000>; /* 256 KB */
+			};
+			partition@7f80000 {
+				label = "Secondary U-Boot";
+				reg = <0x7f80000 0x80000>; /* 512 KB */
+				read-only;
+			};
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			/*
+			 * Actual part could be ST Micro NAND08GW3B2A (1 GB),
+			 * Micron MT29F8G08DAA (2x 512 MB), or Micron
+			 * MT29F16G08FAA (2x 1 GB), depending on the build
+			 * configuration
+			 */
+			compatible = "fsl,mpc8572-fcm-nand",
+				     "fsl,elbc-fcm-nand";
+			reg = <2 0 0x40000>;
+			/* U-Boot should fix this up if chip size > 1 GB */
+			partition@0 {
+				label = "NAND Filesystem";
+				reg = <0 0x40000000>;
+			};
+		};
+
+	};
+
+	soc8572@ef000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,mpc8572-immr", "simple-bus";
+		ranges = <0x0 0 0xef000000 0x100000>;
+		bus-frequency = <0>;		// Filled out by uboot.
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <12>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,mpc8572-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,mpc8572-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		memory-controller@6000 {
+			compatible = "fsl,mpc8572-memory-controller";
+			reg = <0x6000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,mpc8572-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x100000>; // L2, 1M
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+
+			temp-sensor@48 {
+				compatible = "dallas,ds1631", "dallas,ds1621";
+				reg = <0x48>;
+			};
+
+			temp-sensor@4c {
+				compatible = "adi,adt7461";
+				reg = <0x4c>;
+			};
+
+			cpu-supervisor@51 {
+				compatible = "dallas,ds4510";
+				reg = <0x51>;
+			};
+
+			eeprom@54 {
+				compatible = "atmel,at24c128b";
+				reg = <0x54>;
+			};
+
+			rtc@68 {
+				compatible = "st,m41t00",
+				             "dallas,ds1338";
+				reg = <0x68>;
+			};
+
+			pcie-switch@70 {
+				compatible = "plx,pex8518";
+				reg = <0x70>;
+			};
+
+			gpio1: gpio@18 {
+				compatible = "nxp,pca9557";
+				reg = <0x18>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+
+			gpio2: gpio@1c {
+				compatible = "nxp,pca9557";
+				reg = <0x1c>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+
+			gpio3: gpio@1e {
+				compatible = "nxp,pca9557";
+				reg = <0x1e>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+
+			gpio4: gpio@1f {
+				compatible = "nxp,pca9557";
+				reg = <0x1f>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		dma@c300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8572-dma", "fsl,eloplus-dma";
+			reg = <0xc300 0x4>;
+			ranges = <0x0 0xc100 0x200>;
+			cell-index = <1>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <76 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <77 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <78 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <79 2>;
+			};
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8572-dma", "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		/* eTSEC 1 */
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy0>;
+			phy-connection-type = "sgmii";
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy0: ethernet-phy@1 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <0x1>;
+				};
+				phy1: ethernet-phy@2 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <0x2>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		/* eTSEC 2 */
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
+			phy-handle = <&phy1>;
+			phy-connection-type = "sgmii";
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		/* UART0 */
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		/* UART1 */
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		global-utilities@e0000 {	//global utilities block
+			compatible = "fsl,mpc8572-guts";
+			reg = <0xe0000 0x1000>;
+			fsl,has-rstcr;
+		};
+
+		msi@41600 {
+			compatible = "fsl,mpc8572-msi", "fsl,mpic-msi";
+			reg = <0x41600 0x80>;
+			msi-available-ranges = <0 0x100>;
+			interrupts = <
+				0xe0 0
+				0xe1 0
+				0xe2 0
+				0xe3 0
+				0xe4 0
+				0xe5 0
+				0xe6 0
+				0xe7 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.0", "fsl,sec2.4", "fsl,sec2.2",
+				     "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <45 2 58 2>;
+			interrupt-parent = <&mpic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x9fe>;
+			fsl,descriptor-types-mask = <0x3ab0ebf>;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+
+		gpio0: gpio@f000 {
+			compatible = "fsl,mpc8572-gpio";
+			reg = <0xf000 0x1000>;
+			interrupts = <47 2>;
+			interrupt-parent = <&mpic>;
+			#gpio-cells = <2>;
+			gpio-controller;
+		};
+
+		gpio-leds {
+			compatible = "gpio-leds";
+
+			heartbeat {
+				label = "Heartbeat";
+				gpios = <&gpio0 4 1>;
+				linux,default-trigger = "heartbeat";
+			};
+
+			yellow {
+				label = "Yellow";
+				gpios = <&gpio0 5 1>;
+			};
+
+			red {
+				label = "Red";
+				gpios = <&gpio0 6 1>;
+			};
+
+			green {
+				label = "Green";
+				gpios = <&gpio0 7 1>;
+			};
+		};
+
+		/* PME (pattern-matcher) */
+		pme@10000 {
+			compatible = "fsl,mpc8572-pme", "pme8572";
+			reg = <0x10000 0x5000>;
+			interrupts = <57 2 64 2 65 2 66 2 67 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		tlu@2f000 {
+			compatible = "fsl,mpc8572-tlu", "fsl_tlu";
+			reg = <0x2f000 0x1000>;
+			interrupts = <61 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		tlu@15000 {
+			compatible = "fsl,mpc8572-tlu", "fsl_tlu";
+			reg = <0x15000 0x1000>;
+			interrupts = <75 2>;
+			interrupt-parent = <&mpic>;
+		};
+	};
+
+	/*
+	 * PCI Express controller 3 @ ef008000 is not used.
+	 * This would have been pci0 on other mpc85xx platforms.
+	 */
+
+	/* PCI Express controller 2, wired to XMC P15 connector */
+	pci1: pcie@ef009000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xef009000 0 0x1000>;
+		bus-range = <0 255>;
+		ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x10000000
+			  0x1000000 0x0 0x00000000 0 0xe8800000 0x0 0x00010000>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <25 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0x0 0x0 0x0 0x1 &mpic 0x4 0x1
+			0x0 0x0 0x0 0x2 &mpic 0x5 0x1
+			0x0 0x0 0x0 0x3 &mpic 0x6 0x1
+			0x0 0x0 0x0 0x4 &mpic 0x7 0x1
+			>;
+		pcie@0 {
+			reg = <0x00000000 0x00000000 0x00000000 0x00000000 0x00000000>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0xc0000000
+				  0x2000000 0x0 0xc0000000
+				  0x0 0x10000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	/* PCI Express controller 1, wired to PEX8112 for PMC interface */
+	pci2: pcie@ef00a000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xef00a000 0 0x1000>;
+		bus-range = <0 255>;
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x40000000
+			  0x1000000 0x0 0x00000000 0 0xe8000000 0x0 0x10000>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <26 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0x0 0x0 0x0 0x1 &mpic 0x0 0x1
+			0x0 0x0 0x0 0x2 &mpic 0x1 0x1
+			0x0 0x0 0x0 0x3 &mpic 0x2 0x1
+			0x0 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x40000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/xpedite5330.dts b/arch/powerpc/boot/dts/xpedite5330.dts
new file mode 100644
index 0000000000..e8fc90c52a
--- /dev/null
+++ b/arch/powerpc/boot/dts/xpedite5330.dts
@@ -0,0 +1,704 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2008 Extreme Engineering Solutions, Inc.
+ * Based on MPC8572DS device tree from Freescale Semiconductor, Inc.
+ *
+ * XPedite5330 3U CompactPCI module based on MPC8572E
+ */
+
+/dts-v1/;
+/ {
+	model = "xes,xpedite5330";
+	compatible = "xes,xpedite5330", "xes,MPC8572";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	form-factor = "3U CompactPCI";
+	boot-bank = <0x0>;	/* 0: Primary flash, 1: Secondary flash */
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+	};
+
+	pmcslots {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		pmcslot@0 {
+			cell-index = <0>;
+			/*
+			 * boolean properties (true if defined):
+			 *     monarch;
+			 *     module-present;
+			 */
+		};
+	};
+
+	xmcslots {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		xmcslot@0 {
+			cell-index = <0>;
+			/*
+			 * boolean properties (true if defined):
+			 *     module-present;
+			 */
+		};
+	};
+
+	cpci {
+		/*
+		 * boolean properties (true if defined):
+		 *     system-controller;
+		 */
+		system-controller;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8572@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;	// 32 bytes
+			i-cache-line-size = <32>;	// 32 bytes
+			d-cache-size = <0x8000>;		// L1, 32K
+			i-cache-size = <0x8000>;		// L1, 32K
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			next-level-cache = <&L2>;
+		};
+
+		PowerPC,8572@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			d-cache-line-size = <32>;	// 32 bytes
+			i-cache-line-size = <32>;	// 32 bytes
+			d-cache-size = <0x8000>;		// L1, 32K
+			i-cache-size = <0x8000>;		// L1, 32K
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x0 0x0 0x0>;	// Filled in by U-Boot
+	};
+
+	localbus@ef005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8572-elbc", "fsl,elbc", "simple-bus";
+		reg = <0 0xef005000 0 0x1000>;
+		interrupts = <19 2>;
+		interrupt-parent = <&mpic>;
+		/* Local bus region mappings */
+		ranges = <0 0 0 0xf8000000 0x8000000 /* CS0: Boot flash */
+			  1 0 0 0xf0000000 0x8000000 /* CS1: Alternate flash */
+			  2 0 0 0xef800000 0x40000   /* CS2: NAND CE1 */
+			  3 0 0 0xef840000 0x40000>; /* CS3: NAND CE2 */
+
+		nor-boot@0,0 {
+			compatible = "amd,s29gl01gp", "cfi-flash";
+			bank-width = <2>;
+			reg = <0 0 0x8000000>; /* 128MB */
+			#address-cells = <1>;
+			#size-cells = <1>;
+			partition@0 {
+				label = "Primary user space";
+				reg = <0x00000000 0x6f00000>; /* 111 MB */
+			};
+			partition@6f00000 {
+				label = "Primary kernel";
+				reg = <0x6f00000 0x1000000>; /* 16 MB */
+			};
+			partition@7f00000 {
+				label = "Primary DTB";
+				reg = <0x7f00000 0x40000>; /* 256 KB */
+			};
+			partition@7f40000 {
+				label = "Primary U-Boot environment";
+				reg = <0x7f40000 0x40000>; /* 256 KB */
+			};
+			partition@7f80000 {
+				label = "Primary U-Boot";
+				reg = <0x7f80000 0x80000>; /* 512 KB */
+				read-only;
+			};
+		};
+
+		nor-alternate@1,0 {
+			compatible = "amd,s29gl01gp", "cfi-flash";
+			bank-width = <2>;
+			//reg = <0xf0000000 0x08000000>; /* 128MB */
+			reg = <1 0 0x8000000>; /* 128MB */
+			#address-cells = <1>;
+			#size-cells = <1>;
+			partition@0 {
+				label = "Secondary user space";
+				reg = <0x00000000 0x6f00000>; /* 111 MB */
+			};
+			partition@6f00000 {
+				label = "Secondary kernel";
+				reg = <0x6f00000 0x1000000>; /* 16 MB */
+			};
+			partition@7f00000 {
+				label = "Secondary DTB";
+				reg = <0x7f00000 0x40000>; /* 256 KB */
+			};
+			partition@7f40000 {
+				label = "Secondary U-Boot environment";
+				reg = <0x7f40000 0x40000>; /* 256 KB */
+			};
+			partition@7f80000 {
+				label = "Secondary U-Boot";
+				reg = <0x7f80000 0x80000>; /* 512 KB */
+				read-only;
+			};
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			/*
+			 * Actual part could be ST Micro NAND08GW3B2A (1 GB),
+			 * Micron MT29F8G08DAA (2x 512 MB), or Micron
+			 * MT29F16G08FAA (2x 1 GB), depending on the build
+			 * configuration
+			 */
+			compatible = "fsl,mpc8572-fcm-nand",
+				     "fsl,elbc-fcm-nand";
+			reg = <2 0 0x40000>;
+			/* U-Boot should fix this up if chip size > 1 GB */
+			partition@0 {
+				label = "NAND Filesystem";
+				reg = <0 0x40000000>;
+			};
+		};
+
+	};
+
+	soc8572@ef000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,mpc8572-immr", "simple-bus";
+		ranges = <0x0 0 0xef000000 0x100000>;
+		bus-frequency = <0>;		// Filled out by uboot.
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <12>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,mpc8572-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,mpc8572-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		memory-controller@6000 {
+			compatible = "fsl,mpc8572-memory-controller";
+			reg = <0x6000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,mpc8572-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x100000>; // L2, 1M
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+
+			temp-sensor@48 {
+				compatible = "dallas,ds1631", "dallas,ds1621";
+				reg = <0x48>;
+			};
+
+			temp-sensor@4c {
+				compatible = "adi,adt7461";
+				reg = <0x4c>;
+			};
+
+			cpu-supervisor@51 {
+				compatible = "dallas,ds4510";
+				reg = <0x51>;
+			};
+
+			eeprom@54 {
+				compatible = "atmel,at24c128b";
+				reg = <0x54>;
+			};
+
+			rtc@68 {
+				compatible = "st,m41t00",
+				             "dallas,ds1338";
+				reg = <0x68>;
+			};
+
+			pcie-switch@70 {
+				compatible = "plx,pex8518";
+				reg = <0x70>;
+			};
+
+			gpio1: gpio@18 {
+				compatible = "nxp,pca9557";
+				reg = <0x18>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+
+			gpio2: gpio@1c {
+				compatible = "nxp,pca9557";
+				reg = <0x1c>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+
+			gpio3: gpio@1e {
+				compatible = "nxp,pca9557";
+				reg = <0x1e>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+
+			gpio4: gpio@1f {
+				compatible = "nxp,pca9557";
+				reg = <0x1f>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		dma@c300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8572-dma", "fsl,eloplus-dma";
+			reg = <0xc300 0x4>;
+			ranges = <0x0 0xc100 0x200>;
+			cell-index = <1>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <76 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <77 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <78 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <79 2>;
+			};
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8572-dma", "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		/* eTSEC 1 */
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy0>;
+			phy-connection-type = "sgmii";
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy0: ethernet-phy@1 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <0x1>;
+				};
+				phy1: ethernet-phy@2 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <0x2>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		/* eTSEC 2 */
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
+			phy-handle = <&phy1>;
+			phy-connection-type = "sgmii";
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		/* UART0 */
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		/* UART1 */
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		global-utilities@e0000 {	//global utilities block
+			compatible = "fsl,mpc8572-guts";
+			reg = <0xe0000 0x1000>;
+			fsl,has-rstcr;
+		};
+
+		msi@41600 {
+			compatible = "fsl,mpc8572-msi", "fsl,mpic-msi";
+			reg = <0x41600 0x80>;
+			msi-available-ranges = <0 0x100>;
+			interrupts = <
+				0xe0 0
+				0xe1 0
+				0xe2 0
+				0xe3 0
+				0xe4 0
+				0xe5 0
+				0xe6 0
+				0xe7 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.0", "fsl,sec2.4", "fsl,sec2.2",
+				     "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <45 2 58 2>;
+			interrupt-parent = <&mpic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x9fe>;
+			fsl,descriptor-types-mask = <0x3ab0ebf>;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+
+		gpio0: gpio@f000 {
+			compatible = "fsl,mpc8572-gpio";
+			reg = <0xf000 0x1000>;
+			interrupts = <47 2>;
+			interrupt-parent = <&mpic>;
+			#gpio-cells = <2>;
+			gpio-controller;
+		};
+
+		gpio-leds {
+			compatible = "gpio-leds";
+
+			heartbeat {
+				label = "Heartbeat";
+				gpios = <&gpio0 4 1>;
+				linux,default-trigger = "heartbeat";
+			};
+
+			yellow {
+				label = "Yellow";
+				gpios = <&gpio0 5 1>;
+			};
+
+			red {
+				label = "Red";
+				gpios = <&gpio0 6 1>;
+			};
+
+			green {
+				label = "Green";
+				gpios = <&gpio0 7 1>;
+			};
+		};
+
+		/* PME (pattern-matcher) */
+		pme@10000 {
+			compatible = "fsl,mpc8572-pme", "pme8572";
+			reg = <0x10000 0x5000>;
+			interrupts = <57 2 64 2 65 2 66 2 67 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		tlu@2f000 {
+			compatible = "fsl,mpc8572-tlu", "fsl_tlu";
+			reg = <0x2f000 0x1000>;
+			interrupts = <61 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		tlu@15000 {
+			compatible = "fsl,mpc8572-tlu", "fsl_tlu";
+			reg = <0x15000 0x1000>;
+			interrupts = <75 2>;
+			interrupt-parent = <&mpic>;
+		};
+	};
+
+	/* PCI Express controller 3 - CompactPCI bus via PEX8112 bridge */
+	pci0: pcie@ef008000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xef008000 0 0x1000>;
+		bus-range = <0 255>;
+		ranges = <0x2000000 0x0 0xe0000000 0 0xe0000000 0x0 0x10000000
+			  0x1000000 0x0 0x00000000 0 0xe9000000 0x0 0x10000>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <24 2>;
+		interrupt-map-mask = <0xff00 0x0 0x0 0x7>;
+		interrupt-map = <
+			0x0 0x0 0x0 0x1 &mpic 0x0 0x1
+			0x0 0x0 0x0 0x2 &mpic 0x1 0x1
+			0x0 0x0 0x0 0x3 &mpic 0x2 0x1
+			0x0 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x02000000 0x0 0xe0000000
+				  0x02000000 0x0 0xe0000000
+				  0x0 0x10000000
+
+				  0x01000000 0x0 0x0
+				  0x01000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	/* PCI Express controller 2, PMC module via PEX8112 bridge */
+	pci1: pcie@ef009000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xef009000 0 0x1000>;
+		bus-range = <0 255>;
+		ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x10000000
+			  0x1000000 0x0 0x00000000 0 0xe8800000 0x0 0x10000>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <25 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0x0 0x0 0x0 0x1 &mpic 0x4 0x1
+			0x0 0x0 0x0 0x2 &mpic 0x5 0x1
+			0x0 0x0 0x0 0x3 &mpic 0x6 0x1
+			0x0 0x0 0x0 0x4 &mpic 0x7 0x1
+			>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0xc0000000
+				  0x2000000 0x0 0xc0000000
+				  0x0 0x10000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	/* PCI Express controller 1, XMC P15 */
+	pci2: pcie@ef00a000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xef00a000 0 0x1000>;
+		bus-range = <0 255>;
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x40000000
+			  0x1000000 0x0 0x00000000 0 0xe8000000 0x0 0x10000>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <26 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0x0 0x0 0x0 0x1 &mpic 0x0 0x1
+			0x0 0x0 0x0 0x2 &mpic 0x1 0x1
+			0x0 0x0 0x0 0x3 &mpic 0x2 0x1
+			0x0 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x40000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/xpedite5370.dts b/arch/powerpc/boot/dts/xpedite5370.dts
new file mode 100644
index 0000000000..2b5aa2f3a7
--- /dev/null
+++ b/arch/powerpc/boot/dts/xpedite5370.dts
@@ -0,0 +1,635 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2008 Extreme Engineering Solutions, Inc.
+ * Based on MPC8572DS device tree from Freescale Semiconductor, Inc.
+ *
+ * XPedite5370 3U VPX single-board computer based on MPC8572E
+ */
+
+/dts-v1/;
+/ {
+	model = "xes,xpedite5370";
+	compatible = "xes,xpedite5370", "xes,MPC8572";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci1 = &pci1;
+		pci2 = &pci2;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8572@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;	// 32 bytes
+			i-cache-line-size = <32>;	// 32 bytes
+			d-cache-size = <0x8000>;		// L1, 32K
+			i-cache-size = <0x8000>;		// L1, 32K
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			next-level-cache = <&L2>;
+		};
+
+		PowerPC,8572@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			d-cache-line-size = <32>;	// 32 bytes
+			i-cache-line-size = <32>;	// 32 bytes
+			d-cache-size = <0x8000>;		// L1, 32K
+			i-cache-size = <0x8000>;		// L1, 32K
+			timebase-frequency = <0>;
+			bus-frequency = <0>;
+			clock-frequency = <0>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x0 0x0 0x0>;	// Filled in by U-Boot
+	};
+
+	localbus@ef005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8572-elbc", "fsl,elbc", "simple-bus";
+		reg = <0 0xef005000 0 0x1000>;
+		interrupts = <19 2>;
+		interrupt-parent = <&mpic>;
+		/* Local bus region mappings */
+		ranges = <0 0 0 0xf8000000 0x8000000 /* CS0: Boot flash */
+			  1 0 0 0xf0000000 0x8000000 /* CS1: Alternate flash */
+			  2 0 0 0xef800000 0x40000   /* CS2: NAND CE1 */
+			  3 0 0 0xef840000 0x40000>; /* CS3: NAND CE2 */
+
+		nor-boot@0,0 {
+			compatible = "amd,s29gl01gp", "cfi-flash";
+			bank-width = <2>;
+			reg = <0 0 0x8000000>; /* 128MB */
+			#address-cells = <1>;
+			#size-cells = <1>;
+			partition@0 {
+				label = "Primary user space";
+				reg = <0x00000000 0x6f00000>; /* 111 MB */
+			};
+			partition@6f00000 {
+				label = "Primary kernel";
+				reg = <0x6f00000 0x1000000>; /* 16 MB */
+			};
+			partition@7f00000 {
+				label = "Primary DTB";
+				reg = <0x7f00000 0x40000>; /* 256 KB */
+			};
+			partition@7f40000 {
+				label = "Primary U-Boot environment";
+				reg = <0x7f40000 0x40000>; /* 256 KB */
+			};
+			partition@7f80000 {
+				label = "Primary U-Boot";
+				reg = <0x7f80000 0x80000>; /* 512 KB */
+				read-only;
+			};
+		};
+
+		nor-alternate@1,0 {
+			compatible = "amd,s29gl01gp", "cfi-flash";
+			bank-width = <2>;
+			//reg = <0xf0000000 0x08000000>; /* 128MB */
+			reg = <1 0 0x8000000>; /* 128MB */
+			#address-cells = <1>;
+			#size-cells = <1>;
+			partition@0 {
+				label = "Secondary user space";
+				reg = <0x00000000 0x6f00000>; /* 111 MB */
+			};
+			partition@6f00000 {
+				label = "Secondary kernel";
+				reg = <0x6f00000 0x1000000>; /* 16 MB */
+			};
+			partition@7f00000 {
+				label = "Secondary DTB";
+				reg = <0x7f00000 0x40000>; /* 256 KB */
+			};
+			partition@7f40000 {
+				label = "Secondary U-Boot environment";
+				reg = <0x7f40000 0x40000>; /* 256 KB */
+			};
+			partition@7f80000 {
+				label = "Secondary U-Boot";
+				reg = <0x7f80000 0x80000>; /* 512 KB */
+				read-only;
+			};
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			/*
+			 * Actual part could be ST Micro NAND08GW3B2A (1 GB),
+			 * Micron MT29F8G08DAA (2x 512 MB), or Micron
+			 * MT29F16G08FAA (2x 1 GB), depending on the build
+			 * configuration
+			 */
+			compatible = "fsl,mpc8572-fcm-nand",
+				     "fsl,elbc-fcm-nand";
+			reg = <2 0 0x40000>;
+			/* U-Boot should fix this up if chip size > 1 GB */
+			partition@0 {
+				label = "NAND Filesystem";
+				reg = <0 0x40000000>;
+			};
+		};
+
+	};
+
+	soc8572@ef000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,mpc8572-immr", "simple-bus";
+		ranges = <0x0 0 0xef000000 0x100000>;
+		bus-frequency = <0>;		// Filled out by uboot.
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <12>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,mpc8572-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,mpc8572-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		memory-controller@6000 {
+			compatible = "fsl,mpc8572-memory-controller";
+			reg = <0x6000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,mpc8572-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x100000>; // L2, 1M
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+
+			temp-sensor@48 {
+				compatible = "dallas,ds1631", "dallas,ds1621";
+				reg = <0x48>;
+			};
+
+			temp-sensor@4c {
+				compatible = "adi,adt7461";
+				reg = <0x4c>;
+			};
+
+			cpu-supervisor@51 {
+				compatible = "dallas,ds4510";
+				reg = <0x51>;
+			};
+
+			eeprom@54 {
+				compatible = "atmel,at24c128b";
+				reg = <0x54>;
+			};
+
+			rtc@68 {
+				compatible = "st,m41t00",
+				             "dallas,ds1338";
+				reg = <0x68>;
+			};
+
+			pcie-switch@70 {
+				compatible = "plx,pex8518";
+				reg = <0x70>;
+			};
+
+			gpio1: gpio@18 {
+				compatible = "nxp,pca9557";
+				reg = <0x18>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+
+			gpio2: gpio@1c {
+				compatible = "nxp,pca9557";
+				reg = <0x1c>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+
+			gpio3: gpio@1e {
+				compatible = "nxp,pca9557";
+				reg = <0x1e>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+
+			gpio4: gpio@1f {
+				compatible = "nxp,pca9557";
+				reg = <0x1f>;
+				#gpio-cells = <2>;
+				gpio-controller;
+				polarity = <0x00>;
+			};
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		dma@c300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8572-dma", "fsl,eloplus-dma";
+			reg = <0xc300 0x4>;
+			ranges = <0x0 0xc100 0x200>;
+			cell-index = <1>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <76 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <77 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <78 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <79 2>;
+			};
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8572-dma", "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,mpc8572-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		/* eTSEC 1 */
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy0>;
+			phy-connection-type = "sgmii";
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy0: ethernet-phy@1 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <0x1>;
+				};
+				phy1: ethernet-phy@2 {
+					interrupt-parent = <&mpic>;
+					interrupts = <8 1>;
+					reg = <0x2>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		/* eTSEC 2 */
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
+			phy-handle = <&phy1>;
+			phy-connection-type = "sgmii";
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		/* UART0 */
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		/* UART1 */
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "fsl,ns16550", "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		global-utilities@e0000 {	//global utilities block
+			compatible = "fsl,mpc8572-guts";
+			reg = <0xe0000 0x1000>;
+			fsl,has-rstcr;
+		};
+
+		msi@41600 {
+			compatible = "fsl,mpc8572-msi", "fsl,mpic-msi";
+			reg = <0x41600 0x80>;
+			msi-available-ranges = <0 0x100>;
+			interrupts = <
+				0xe0 0
+				0xe1 0
+				0xe2 0
+				0xe3 0
+				0xe4 0
+				0xe5 0
+				0xe6 0
+				0xe7 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.0", "fsl,sec2.4", "fsl,sec2.2",
+				     "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <45 2 58 2>;
+			interrupt-parent = <&mpic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x9fe>;
+			fsl,descriptor-types-mask = <0x3ab0ebf>;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+
+		gpio0: gpio@f000 {
+			compatible = "fsl,mpc8572-gpio";
+			reg = <0xf000 0x1000>;
+			interrupts = <47 2>;
+			interrupt-parent = <&mpic>;
+			#gpio-cells = <2>;
+			gpio-controller;
+		};
+
+		gpio-leds {
+			compatible = "gpio-leds";
+
+			heartbeat {
+				label = "Heartbeat";
+				gpios = <&gpio0 4 1>;
+				linux,default-trigger = "heartbeat";
+			};
+
+			yellow {
+				label = "Yellow";
+				gpios = <&gpio0 5 1>;
+			};
+
+			red {
+				label = "Red";
+				gpios = <&gpio0 6 1>;
+			};
+
+			green {
+				label = "Green";
+				gpios = <&gpio0 7 1>;
+			};
+		};
+
+		/* PME (pattern-matcher) */
+		pme@10000 {
+			compatible = "fsl,mpc8572-pme", "pme8572";
+			reg = <0x10000 0x5000>;
+			interrupts = <57 2 64 2 65 2 66 2 67 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		tlu@2f000 {
+			compatible = "fsl,mpc8572-tlu", "fsl_tlu";
+			reg = <0x2f000 0x1000>;
+			interrupts = <61 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		tlu@15000 {
+			compatible = "fsl,mpc8572-tlu", "fsl_tlu";
+			reg = <0x15000 0x1000>;
+			interrupts = <75 2>;
+			interrupt-parent = <&mpic>;
+		};
+	};
+
+	/*
+	 * PCI Express controller 3 @ ef008000 is not used.
+	 * This would have been pci0 on other mpc85xx platforms.
+	 */
+
+	/* PCI Express controller 2, wired to VPX P1,P2 backplane */
+	pci1: pcie@ef009000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xef009000 0 0x1000>;
+		bus-range = <0 255>;
+		ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x10000000
+			  0x1000000 0x0 0x00000000 0 0xe8800000 0x0 0x00010000>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <25 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0x0 0x0 0x0 0x1 &mpic 0x4 0x1
+			0x0 0x0 0x0 0x2 &mpic 0x5 0x1
+			0x0 0x0 0x0 0x3 &mpic 0x6 0x1
+			0x0 0x0 0x0 0x4 &mpic 0x7 0x1
+			>;
+		pcie@0 {
+			reg = <0x00000000 0x00000000 0x00000000 0x00000000 0x00000000>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0xc0000000
+				  0x2000000 0x0 0xc0000000
+				  0x0 0x10000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	/* PCI Express controller 1, wired to PEX8518 PCIe switch */
+	pci2: pcie@ef00a000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xef00a000 0 0x1000>;
+		bus-range = <0 255>;
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x40000000
+			  0x1000000 0x0 0x00000000 0 0xe8000000 0x0 0x10000>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <26 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0x0 0x0 0x0 0x1 &mpic 0x0 0x1
+			0x0 0x0 0x0 0x2 &mpic 0x1 0x1
+			0x0 0x0 0x0 0x3 &mpic 0x2 0x1
+			0x0 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x40000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/yosemite.dts b/arch/powerpc/boot/dts/yosemite.dts
new file mode 100644
index 0000000000..56508785ce
--- /dev/null
+++ b/arch/powerpc/boot/dts/yosemite.dts
@@ -0,0 +1,332 @@
+/*
+ * Device Tree Source for AMCC Yosemite
+ *
+ * Copyright 2008 IBM Corp.
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "amcc,yosemite";
+	compatible = "amcc,yosemite";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		ethernet1 = &EMAC1;
+		serial0 = &UART0;
+		serial1 = &UART1;
+		serial2 = &UART2;
+		serial3 = &UART3;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,440EP";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by zImage */
+			timebase-frequency = <0>; /* Filled in by zImage */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by zImage */
+	};
+
+	UIC0: interrupt-controller0 {
+		compatible = "ibm,uic-440ep","ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic-440ep","ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	SDR0: sdr {
+		compatible = "ibm,sdr-440ep";
+		dcr-reg = <0x00e 0x002>;
+	};
+
+	CPR0: cpr {
+		compatible = "ibm,cpr-440ep";
+		dcr-reg = <0x00c 0x002>;
+	};
+
+	plb {
+		compatible = "ibm,plb-440ep", "ibm,plb-440gp", "ibm,plb4";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by zImage */
+
+		SDRAM0: sdram {
+			compatible = "ibm,sdram-440ep", "ibm,sdram-405gp";
+			dcr-reg = <0x010 0x002>;
+		};
+
+		DMA0: dma {
+			compatible = "ibm,dma-440ep", "ibm,dma-440gp";
+			dcr-reg = <0x100 0x027>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal-440ep", "ibm,mcmal-440gp", "ibm,mcmal";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <4>;
+			num-rx-chans = <2>;
+			interrupt-parent = <&MAL0>;
+			interrupts = <0x0 0x1 0x2 0x3 0x4>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-map = </*TXEOB*/ 0x0 &UIC0 0xa 0x4
+					/*RXEOB*/ 0x1 &UIC0 0xb 0x4
+					/*SERR*/  0x2 &UIC1 0x0 0x4
+					/*TXDE*/  0x3 &UIC1 0x1 0x4
+					/*RXDE*/  0x4 &UIC1 0x2 0x4>;
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb-440ep", "ibm,opb-440gp", "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			/* Bamboo is oddball in the 44x world and doesn't use the ERPN
+			 * bits.
+			 */
+			ranges = <0x00000000 0x00000000 0x00000000 0x80000000
+			          0x80000000 0x00000000 0x80000000 0x80000000>;
+			interrupt-parent = <&UIC1>;
+			interrupts = <0x7 0x4>;
+			clock-frequency = <0>; /* Filled in by zImage */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc-440ep", "ibm,ebc-440gp", "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by zImage */
+				interrupts = <0x5 0x1>;
+				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "amd,s29gl256n", "cfi-flash";
+					bank-width = <2>;
+					reg = <0x00000000 0x00000000 0x04000000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "kernel";
+						reg = <0x00000000 0x001e0000>;
+					};
+					partition@1e0000 {
+						label = "dtb";
+						reg = <0x001e0000 0x00020000>;
+					};
+					partition@200000 {
+						label = "ramdisk";
+						reg = <0x00200000 0x01400000>;
+					};
+					partition@1600000 {
+						label = "jffs2";
+						reg = <0x01600000 0x00400000>;
+					};
+					partition@1a00000 {
+						label = "user";
+						reg = <0x01a00000 0x02540000>;
+					};
+					partition@3f40000 {
+						label = "env";
+						reg = <0x03f40000 0x00040000>;
+					};
+					partition@3f80000 {
+						label = "u-boot";
+						reg = <0x03f80000 0x00080000>;
+					};
+				};
+			};
+
+			UART0: serial@ef600300 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600300 0x00000008>;
+				virtual-reg = <0xef600300>;
+				clock-frequency = <0>; /* Filled in by zImage */
+				current-speed = <115200>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x0 0x4>;
+			};
+
+			UART1: serial@ef600400 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600400 0x00000008>;
+				virtual-reg = <0xef600400>;
+				clock-frequency = <0>;
+				current-speed = <0>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x1 0x4>;
+			};
+
+			UART2: serial@ef600500 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600500 0x00000008>;
+				virtual-reg = <0xef600500>;
+				clock-frequency = <0>;
+				current-speed = <0>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x3 0x4>;
+				status = "disabled";
+			};
+
+			UART3: serial@ef600600 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600600 0x00000008>;
+				virtual-reg = <0xef600600>;
+				clock-frequency = <0>;
+				current-speed = <0>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x4 0x4>;
+				status = "disabled";
+			};
+
+			IIC0: i2c@ef600700 {
+				compatible = "ibm,iic-440ep", "ibm,iic-440gp", "ibm,iic";
+				reg = <0xef600700 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+			};
+
+			IIC1: i2c@ef600800 {
+				compatible = "ibm,iic-440ep", "ibm,iic-440gp", "ibm,iic";
+				reg = <0xef600800 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x7 0x4>;
+			};
+
+			spi@ef600900 {
+				compatible = "amcc,spi-440ep";
+				reg = <0xef600900 0x00000006>;
+				interrupts = <0x8 0x4>;
+				interrupt-parent = <&UIC0>;
+			};
+
+			ZMII0: emac-zmii@ef600d00 {
+				compatible = "ibm,zmii-440ep", "ibm,zmii-440gp", "ibm,zmii";
+				reg = <0xef600d00 0x0000000c>;
+			};
+
+			EMAC0: ethernet@ef600e00 {
+				device_type = "network";
+				compatible = "ibm,emac-440ep", "ibm,emac-440gp", "ibm,emac";
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x1c 0x4 0x1d 0x4>;
+				reg = <0xef600e00 0x00000070>;
+				local-mac-address = [000000000000];
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0 1>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <1500>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rmii";
+				phy-map = <0x00000000>;
+				zmii-device = <&ZMII0>;
+				zmii-channel = <0>;
+			};
+
+			EMAC1: ethernet@ef600f00 {
+				device_type = "network";
+				compatible = "ibm,emac-440ep", "ibm,emac-440gp", "ibm,emac";
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x1e 0x4 0x1f 0x4>;
+				reg = <0xef600f00 0x00000070>;
+				local-mac-address = [000000000000];
+				mal-device = <&MAL0>;
+				mal-tx-channel = <2 3>;
+				mal-rx-channel = <1>;
+				cell-index = <1>;
+				max-frame-size = <1500>;
+				rx-fifo-size = <4096>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rmii";
+				phy-map = <0x00000000>;
+				zmii-device = <&ZMII0>;
+				zmii-channel = <1>;
+			};
+
+			usb@ef601000 {
+				compatible = "ohci-be";
+				reg = <0xef601000 0x00000080>;
+				interrupts = <0x8 0x4 0x9 0x4>;
+				interrupt-parent = < &UIC1 >;
+			};
+		};
+
+		PCI0: pci@ec000000 {
+			device_type = "pci";
+			#interrupt-cells = <1>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			compatible = "ibm,plb440ep-pci", "ibm,plb-pci";
+			primary;
+			reg = <0x00000000 0xeec00000 0x00000008	/* Config space access */
+			       0x00000000 0xeed00000 0x00000004	/* IACK */
+			       0x00000000 0xeed00000 0x00000004	/* Special cycle */
+			       0x00000000 0xef400000 0x00000040>;	/* Internal registers */
+
+			/* Outbound ranges, one memory and one IO,
+			 * later cannot be changed. Chip supports a second
+			 * IO range but we don't use it for now
+			 */
+			ranges = <0x02000000 0x00000000 0xa0000000 0x00000000 0xa0000000 0x00000000 0x20000000
+				  0x01000000 0x00000000 0x00000000 0x00000000 0xe8000000 0x00000000 0x00010000>;
+
+			/* Inbound 2GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+
+			interrupt-map-mask = <0xf800 0x0 0x0 0x0>;
+			interrupt-map = <
+				/* IDSEL 12 */
+				0x6000 0x0 0x0 0x0 &UIC0 0x19 0x8
+			>;
+		};
+	};
+
+	chosen {
+		stdout-path = "/plb/opb/serial@ef600300";
+	};
+};
diff --git a/arch/powerpc/boot/ebony.c b/arch/powerpc/boot/ebony.c
new file mode 100644
index 0000000000..add2316d34
--- /dev/null
+++ b/arch/powerpc/boot/ebony.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2007 David Gibson, IBM Corporation.
+ *
+ * Based on earlier code:
+ *   Copyright (C) Paul Mackerras 1997.
+ *
+ *   Matt Porter <mporter@kernel.crashing.org>
+ *   Copyright 2002-2005 MontaVista Software Inc.
+ *
+ *   Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ *   Copyright (c) 2003, 2004 Zultys Technologies
+ */
+#include <stdarg.h>
+#include <stddef.h>
+#include "types.h"
+#include "elf.h"
+#include "string.h"
+#include "stdio.h"
+#include "page.h"
+#include "ops.h"
+#include "reg.h"
+#include "io.h"
+#include "dcr.h"
+#include "4xx.h"
+#include "44x.h"
+
+static u8 *ebony_mac0, *ebony_mac1;
+
+#define EBONY_FPGA_PATH		"/plb/opb/ebc/fpga"
+#define	EBONY_FPGA_FLASH_SEL	0x01
+#define EBONY_SMALL_FLASH_PATH	"/plb/opb/ebc/small-flash"
+
+static void ebony_flashsel_fixup(void)
+{
+	void *devp;
+	u32 reg[3] = {0x0, 0x0, 0x80000};
+	u8 *fpga;
+	u8 fpga_reg0 = 0x0;
+
+	devp = finddevice(EBONY_FPGA_PATH);
+	if (!devp)
+		fatal("Couldn't locate FPGA node %s\n\r", EBONY_FPGA_PATH);
+
+	if (getprop(devp, "virtual-reg", &fpga, sizeof(fpga)) != sizeof(fpga))
+		fatal("%s has missing or invalid virtual-reg property\n\r",
+		      EBONY_FPGA_PATH);
+
+	fpga_reg0 = in_8(fpga);
+
+	devp = finddevice(EBONY_SMALL_FLASH_PATH);
+	if (!devp)
+		fatal("Couldn't locate small flash node %s\n\r",
+		      EBONY_SMALL_FLASH_PATH);
+
+	if (getprop(devp, "reg", reg, sizeof(reg)) != sizeof(reg))
+		fatal("%s has reg property of unexpected size\n\r",
+		      EBONY_SMALL_FLASH_PATH);
+
+	/* Invert address bit 14 (IBM-endian) if FLASH_SEL fpga bit is set */
+	if (fpga_reg0 & EBONY_FPGA_FLASH_SEL)
+		reg[1] ^= 0x80000;
+
+	setprop(devp, "reg", reg, sizeof(reg));
+}
+
+static void ebony_fixups(void)
+{
+	// FIXME: sysclk should be derived by reading the FPGA registers
+	unsigned long sysclk = 33000000;
+
+	ibm440gp_fixup_clocks(sysclk, 6 * 1843200);
+	ibm4xx_sdram_fixup_memsize();
+	dt_fixup_mac_address_by_alias("ethernet0", ebony_mac0);
+	dt_fixup_mac_address_by_alias("ethernet1", ebony_mac1);
+	ibm4xx_fixup_ebc_ranges("/plb/opb/ebc");
+	ebony_flashsel_fixup();
+}
+
+void ebony_init(void *mac0, void *mac1)
+{
+	platform_ops.fixups = ebony_fixups;
+	platform_ops.exit = ibm44x_dbcr_reset;
+	ebony_mac0 = mac0;
+	ebony_mac1 = mac1;
+	fdt_init(_dtb_start);
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/elf.h b/arch/powerpc/boot/elf.h
new file mode 100644
index 0000000000..f6aa7c20fc
--- /dev/null
+++ b/arch/powerpc/boot/elf.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_BOOT_ELF_H_
+#define _PPC_BOOT_ELF_H_
+
+/* 32-bit ELF base types. */
+typedef unsigned int Elf32_Addr;
+typedef unsigned short Elf32_Half;
+typedef unsigned int Elf32_Off;
+typedef signed int Elf32_Sword;
+typedef unsigned int Elf32_Word;
+
+/* 64-bit ELF base types. */
+typedef unsigned long long Elf64_Addr;
+typedef unsigned short Elf64_Half;
+typedef signed short Elf64_SHalf;
+typedef unsigned long long Elf64_Off;
+typedef signed int Elf64_Sword;
+typedef unsigned int Elf64_Word;
+typedef unsigned long long Elf64_Xword;
+typedef signed long long Elf64_Sxword;
+
+/* These constants are for the segment types stored in the image headers */
+#define PT_NULL    0
+#define PT_LOAD    1
+#define PT_DYNAMIC 2
+#define PT_INTERP  3
+#define PT_NOTE    4
+#define PT_SHLIB   5
+#define PT_PHDR    6
+#define PT_TLS     7		/* Thread local storage segment */
+#define PT_LOOS    0x60000000	/* OS-specific */
+#define PT_HIOS    0x6fffffff	/* OS-specific */
+#define PT_LOPROC  0x70000000
+#define PT_HIPROC  0x7fffffff
+#define PT_GNU_EH_FRAME		0x6474e550
+
+#define PT_GNU_STACK	(PT_LOOS + 0x474e551)
+
+/* These constants define the different elf file types */
+#define ET_NONE   0
+#define ET_REL    1
+#define ET_EXEC   2
+#define ET_DYN    3
+#define ET_CORE   4
+#define ET_LOPROC 0xff00
+#define ET_HIPROC 0xffff
+
+/* These constants define the various ELF target machines */
+#define EM_NONE  0
+#define EM_PPC	       20	/* PowerPC */
+#define EM_PPC64       21	/* PowerPC64 */
+
+#define EI_NIDENT	16
+
+typedef struct elf32_hdr {
+	unsigned char e_ident[EI_NIDENT];
+	Elf32_Half e_type;
+	Elf32_Half e_machine;
+	Elf32_Word e_version;
+	Elf32_Addr e_entry;	/* Entry point */
+	Elf32_Off e_phoff;
+	Elf32_Off e_shoff;
+	Elf32_Word e_flags;
+	Elf32_Half e_ehsize;
+	Elf32_Half e_phentsize;
+	Elf32_Half e_phnum;
+	Elf32_Half e_shentsize;
+	Elf32_Half e_shnum;
+	Elf32_Half e_shstrndx;
+} Elf32_Ehdr;
+
+typedef struct elf64_hdr {
+	unsigned char e_ident[16];	/* ELF "magic number" */
+	Elf64_Half e_type;
+	Elf64_Half e_machine;
+	Elf64_Word e_version;
+	Elf64_Addr e_entry;	/* Entry point virtual address */
+	Elf64_Off e_phoff;	/* Program header table file offset */
+	Elf64_Off e_shoff;	/* Section header table file offset */
+	Elf64_Word e_flags;
+	Elf64_Half e_ehsize;
+	Elf64_Half e_phentsize;
+	Elf64_Half e_phnum;
+	Elf64_Half e_shentsize;
+	Elf64_Half e_shnum;
+	Elf64_Half e_shstrndx;
+} Elf64_Ehdr;
+
+/* These constants define the permissions on sections in the program
+   header, p_flags. */
+#define PF_R		0x4
+#define PF_W		0x2
+#define PF_X		0x1
+
+typedef struct elf32_phdr {
+	Elf32_Word p_type;
+	Elf32_Off p_offset;
+	Elf32_Addr p_vaddr;
+	Elf32_Addr p_paddr;
+	Elf32_Word p_filesz;
+	Elf32_Word p_memsz;
+	Elf32_Word p_flags;
+	Elf32_Word p_align;
+} Elf32_Phdr;
+
+typedef struct elf64_phdr {
+	Elf64_Word p_type;
+	Elf64_Word p_flags;
+	Elf64_Off p_offset;	/* Segment file offset */
+	Elf64_Addr p_vaddr;	/* Segment virtual address */
+	Elf64_Addr p_paddr;	/* Segment physical address */
+	Elf64_Xword p_filesz;	/* Segment size in file */
+	Elf64_Xword p_memsz;	/* Segment size in memory */
+	Elf64_Xword p_align;	/* Segment alignment, file & memory */
+} Elf64_Phdr;
+
+#define	EI_MAG0		0	/* e_ident[] indexes */
+#define	EI_MAG1		1
+#define	EI_MAG2		2
+#define	EI_MAG3		3
+#define	EI_CLASS	4
+#define	EI_DATA		5
+#define	EI_VERSION	6
+#define	EI_OSABI	7
+#define	EI_PAD		8
+
+#define	ELFMAG0		0x7f	/* EI_MAG */
+#define	ELFMAG1		'E'
+#define	ELFMAG2		'L'
+#define	ELFMAG3		'F'
+#define	ELFMAG		"\177ELF"
+#define	SELFMAG		4
+
+#define	ELFCLASSNONE	0	/* EI_CLASS */
+#define	ELFCLASS32	1
+#define	ELFCLASS64	2
+#define	ELFCLASSNUM	3
+
+#define ELFDATANONE	0	/* e_ident[EI_DATA] */
+#define ELFDATA2LSB	1
+#define ELFDATA2MSB	2
+
+#define EV_NONE		0	/* e_version, EI_VERSION */
+#define EV_CURRENT	1
+#define EV_NUM		2
+
+#define ELFOSABI_NONE	0
+#define ELFOSABI_LINUX	3
+
+struct elf_info {
+	unsigned long loadsize;
+	unsigned long memsize;
+	unsigned long elfoffset;
+};
+int parse_elf64(void *hdr, struct elf_info *info);
+int parse_elf32(void *hdr, struct elf_info *info);
+
+#endif				/* _PPC_BOOT_ELF_H_ */
diff --git a/arch/powerpc/boot/elf_util.c b/arch/powerpc/boot/elf_util.c
new file mode 100644
index 0000000000..9e6cbdfdc1
--- /dev/null
+++ b/arch/powerpc/boot/elf_util.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ *
+ * Updates for PPC64 by Todd Inglett, Dave Engebretsen & Peter Bergner.
+ */
+#include <stdarg.h>
+#include <stddef.h>
+#include "elf.h"
+#include "page.h"
+#include "string.h"
+#include "stdio.h"
+
+int parse_elf64(void *hdr, struct elf_info *info)
+{
+	Elf64_Ehdr *elf64 = hdr;
+	Elf64_Phdr *elf64ph;
+	unsigned int i;
+
+	if (!(elf64->e_ident[EI_MAG0]  == ELFMAG0	&&
+	      elf64->e_ident[EI_MAG1]  == ELFMAG1	&&
+	      elf64->e_ident[EI_MAG2]  == ELFMAG2	&&
+	      elf64->e_ident[EI_MAG3]  == ELFMAG3	&&
+	      elf64->e_ident[EI_CLASS] == ELFCLASS64	&&
+#ifdef __LITTLE_ENDIAN__
+	      elf64->e_ident[EI_DATA]  == ELFDATA2LSB	&&
+#else
+	      elf64->e_ident[EI_DATA]  == ELFDATA2MSB	&&
+#endif
+	      (elf64->e_type            == ET_EXEC ||
+	       elf64->e_type            == ET_DYN)	&&
+	      elf64->e_machine         == EM_PPC64))
+		return 0;
+
+	elf64ph = (Elf64_Phdr *)((unsigned long)elf64 +
+				 (unsigned long)elf64->e_phoff);
+	for (i = 0; i < (unsigned int)elf64->e_phnum; i++, elf64ph++)
+		if (elf64ph->p_type == PT_LOAD)
+			break;
+	if (i >= (unsigned int)elf64->e_phnum)
+		return 0;
+
+	info->loadsize = (unsigned long)elf64ph->p_filesz;
+	info->memsize = (unsigned long)elf64ph->p_memsz;
+	info->elfoffset = (unsigned long)elf64ph->p_offset;
+
+	return 1;
+}
+
+int parse_elf32(void *hdr, struct elf_info *info)
+{
+	Elf32_Ehdr *elf32 = hdr;
+	Elf32_Phdr *elf32ph;
+	unsigned int i;
+
+	if (!(elf32->e_ident[EI_MAG0]  == ELFMAG0	&&
+	      elf32->e_ident[EI_MAG1]  == ELFMAG1	&&
+	      elf32->e_ident[EI_MAG2]  == ELFMAG2	&&
+	      elf32->e_ident[EI_MAG3]  == ELFMAG3	&&
+	      elf32->e_ident[EI_CLASS] == ELFCLASS32	&&
+	      elf32->e_ident[EI_DATA]  == ELFDATA2MSB	&&
+	      (elf32->e_type            == ET_EXEC ||
+	       elf32->e_type            == ET_DYN)      &&
+	      elf32->e_machine         == EM_PPC))
+		return 0;
+
+	elf32ph = (Elf32_Phdr *) ((unsigned long)elf32 + elf32->e_phoff);
+	for (i = 0; i < elf32->e_phnum; i++, elf32ph++)
+		if (elf32ph->p_type == PT_LOAD)
+			break;
+	if (i >= elf32->e_phnum)
+		return 0;
+
+	info->loadsize = elf32ph->p_filesz;
+	info->memsize = elf32ph->p_memsz;
+	info->elfoffset = elf32ph->p_offset;
+	return 1;
+}
diff --git a/arch/powerpc/boot/ep8248e.c b/arch/powerpc/boot/ep8248e.c
new file mode 100644
index 0000000000..2ab9e0d8ca
--- /dev/null
+++ b/arch/powerpc/boot/ep8248e.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Embedded Planet EP8248E with PlanetCore firmware
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "planetcore.h"
+#include "pq2.h"
+
+static char *table;
+static u64 mem_size;
+
+#include <io.h>
+
+static void platform_fixups(void)
+{
+	u64 val;
+
+	dt_fixup_memory(0, mem_size);
+	planetcore_set_mac_addrs(table);
+
+	if (!planetcore_get_decimal(table, PLANETCORE_KEY_CRYSTAL_HZ, &val)) {
+		printf("No PlanetCore crystal frequency key.\r\n");
+		return;
+	}
+
+	pq2_fixup_clocks(val);
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                   unsigned long r6, unsigned long r7)
+{
+	table = (char *)r3;
+	planetcore_prepare_table(table);
+
+	if (!planetcore_get_decimal(table, PLANETCORE_KEY_MB_RAM, &mem_size))
+		return;
+
+	mem_size *= 1024 * 1024;
+	simple_alloc_init(_end, mem_size - (unsigned long)_end, 32, 64);
+
+	fdt_init(_dtb_start);
+
+	planetcore_set_stdout_path(table);
+	serial_console_init();
+	platform_ops.fixups = platform_fixups;
+}
diff --git a/arch/powerpc/boot/ep88xc.c b/arch/powerpc/boot/ep88xc.c
new file mode 100644
index 0000000000..1c277a13b3
--- /dev/null
+++ b/arch/powerpc/boot/ep88xc.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Embedded Planet EP88xC with PlanetCore firmware
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "planetcore.h"
+#include "mpc8xx.h"
+
+static char *table;
+static u64 mem_size;
+
+static void platform_fixups(void)
+{
+	u64 val;
+
+	dt_fixup_memory(0, mem_size);
+	planetcore_set_mac_addrs(table);
+
+	if (!planetcore_get_decimal(table, PLANETCORE_KEY_CRYSTAL_HZ, &val)) {
+		printf("No PlanetCore crystal frequency key.\r\n");
+		return;
+	}
+
+	mpc885_fixup_clocks(val);
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                   unsigned long r6, unsigned long r7)
+{
+	table = (char *)r3;
+	planetcore_prepare_table(table);
+
+	if (!planetcore_get_decimal(table, PLANETCORE_KEY_MB_RAM, &mem_size))
+		return;
+
+	mem_size *= 1024 * 1024;
+	simple_alloc_init(_end, mem_size - (unsigned long)_end, 32, 64);
+
+	fdt_init(_dtb_start);
+
+	planetcore_set_stdout_path(table);
+
+	serial_console_init();
+	platform_ops.fixups = platform_fixups;
+}
diff --git a/arch/powerpc/boot/epapr-wrapper.c b/arch/powerpc/boot/epapr-wrapper.c
new file mode 100644
index 0000000000..01262f50b7
--- /dev/null
+++ b/arch/powerpc/boot/epapr-wrapper.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+extern void epapr_platform_init(unsigned long r3, unsigned long r4,
+				unsigned long r5, unsigned long r6,
+				unsigned long r7);
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+		   unsigned long r6, unsigned long r7)
+{
+	epapr_platform_init(r3, r4, r5, r6, r7);
+}
diff --git a/arch/powerpc/boot/epapr.c b/arch/powerpc/boot/epapr.c
new file mode 100644
index 0000000000..7c5b26ade6
--- /dev/null
+++ b/arch/powerpc/boot/epapr.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Bootwrapper for ePAPR compliant firmwares
+ *
+ * Copyright 2010 David Gibson <david@gibson.dropbear.id.au>, IBM Corporation.
+ *
+ * Based on earlier bootwrappers by:
+ * (c) Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp,\
+ *   and
+ * Scott Wood <scottwood@freescale.com>
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "io.h"
+#include <libfdt.h>
+
+BSS_STACK(4096);
+
+#define EPAPR_SMAGIC	0x65504150
+#define EPAPR_EMAGIC	0x45504150
+
+static unsigned epapr_magic;
+static unsigned long ima_size;
+static unsigned long fdt_addr;
+
+static void platform_fixups(void)
+{
+	if ((epapr_magic != EPAPR_EMAGIC)
+	    && (epapr_magic != EPAPR_SMAGIC))
+		fatal("r6 contained 0x%08x instead of ePAPR magic number\n",
+		      epapr_magic);
+
+	if (ima_size < (unsigned long)_end)
+		printf("WARNING: Image loaded outside IMA!"
+		       " (_end=%p, ima_size=0x%lx)\n", _end, ima_size);
+	if (ima_size < fdt_addr)
+		printf("WARNING: Device tree address is outside IMA!"
+		       "(fdt_addr=0x%lx, ima_size=0x%lx)\n", fdt_addr,
+		       ima_size);
+	if (ima_size < fdt_addr + fdt_totalsize((void *)fdt_addr))
+		printf("WARNING: Device tree extends outside IMA!"
+		       " (fdt_addr=0x%lx, size=0x%x, ima_size=0x%lx\n",
+		       fdt_addr, fdt_totalsize((void *)fdt_addr), ima_size);
+}
+
+void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+			 unsigned long r6, unsigned long r7)
+{
+	epapr_magic = r6;
+	ima_size = r7;
+	fdt_addr = r3;
+
+	/* FIXME: we should process reserve entries */
+
+	simple_alloc_init(_end, ima_size - (unsigned long)_end, 32, 64);
+
+	fdt_init((void *)fdt_addr);
+
+	serial_console_init();
+	platform_ops.fixups = platform_fixups;
+}
diff --git a/arch/powerpc/boot/fixed-head.S b/arch/powerpc/boot/fixed-head.S
new file mode 100644
index 0000000000..4346c750ca
--- /dev/null
+++ b/arch/powerpc/boot/fixed-head.S
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+	.text
+	.global _zimage_start
+_zimage_start:
+	b	_zimage_start_lib
diff --git a/arch/powerpc/boot/fixup-headers.sed b/arch/powerpc/boot/fixup-headers.sed
new file mode 100644
index 0000000000..96362428eb
--- /dev/null
+++ b/arch/powerpc/boot/fixup-headers.sed
@@ -0,0 +1,12 @@
+# Copyright 2016 IBM Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 or later as
+# published by the Free Software Foundation.
+
+s@#include <linux/decompress/mm\.h>@@;
+s@\"zlib_inflate/\([^\"]*\).*@"\1"@;
+s@<linux/kernel.h>@<stddef.h>@;
+
+s@__used@@;
+s@<linux/\([^>]*\).*@"\1"@;
diff --git a/arch/powerpc/boot/fsl-soc.c b/arch/powerpc/boot/fsl-soc.c
new file mode 100644
index 0000000000..01bad8ea62
--- /dev/null
+++ b/arch/powerpc/boot/fsl-soc.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Freescale SOC support functions
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "types.h"
+#include "fsl-soc.h"
+#include "stdio.h"
+
+static u32 prop_buf[MAX_PROP_LEN / 4];
+
+u32 *fsl_get_immr(void)
+{
+	void *soc;
+	unsigned long ret = 0;
+
+	soc = find_node_by_devtype(NULL, "soc");
+	if (soc) {
+		int size;
+		u32 naddr;
+
+		size = getprop(soc, "#address-cells", prop_buf, MAX_PROP_LEN);
+		if (size == 4)
+			naddr = prop_buf[0];
+		else
+			naddr = 2;
+
+		if (naddr != 1 && naddr != 2)
+			goto err;
+
+		size = getprop(soc, "ranges", prop_buf, MAX_PROP_LEN);
+
+		if (size < 12)
+			goto err;
+		if (prop_buf[0] != 0)
+			goto err;
+		if (naddr == 2 && prop_buf[1] != 0)
+			goto err;
+
+		if (!dt_xlate_addr(soc, prop_buf + naddr, 8, &ret))
+			ret = 0;
+	}
+
+err:
+	if (!ret)
+		printf("fsl_get_immr: Failed to find immr base\r\n");
+
+	return (u32 *)ret;
+}
diff --git a/arch/powerpc/boot/fsl-soc.h b/arch/powerpc/boot/fsl-soc.h
new file mode 100644
index 0000000000..00b2cb89ff
--- /dev/null
+++ b/arch/powerpc/boot/fsl-soc.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_BOOT_FSL_SOC_H_
+#define _PPC_BOOT_FSL_SOC_H_
+
+#include "types.h"
+
+u32 *fsl_get_immr(void);
+
+#endif
diff --git a/arch/powerpc/boot/gamecube-head.S b/arch/powerpc/boot/gamecube-head.S
new file mode 100644
index 0000000000..ccf5f1045e
--- /dev/null
+++ b/arch/powerpc/boot/gamecube-head.S
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * arch/powerpc/boot/gamecube-head.S
+ *
+ * Nintendo GameCube bootwrapper entry.
+ * Copyright (C) 2004-2009 The GameCube Linux Team
+ * Copyright (C) 2008,2009 Albert Herranz
+ */
+
+#include "ppc_asm.h"
+
+/*
+ * The entry code does no assumptions regarding:
+ * - if the data and instruction caches are enabled or not
+ * - if the MMU is enabled or not
+ *
+ * We enable the caches if not already enabled, enable the MMU with an
+ * identity mapping scheme and jump to the start code.
+ */
+
+	.text
+
+	.globl _zimage_start
+_zimage_start:
+
+	/* turn the MMU off */
+	mfmsr	9
+	rlwinm	9, 9, 0, ~((1<<4)|(1<<5)) /* MSR_DR|MSR_IR */
+	bcl	20, 31, 1f
+1:
+	mflr	8
+	clrlwi	8, 8, 3		/* convert to a real address */
+	addi	8, 8, _mmu_off - 1b
+	mtsrr0	8
+	mtsrr1	9
+	rfi
+_mmu_off:
+	/* MMU disabled */
+
+	/* setup BATs */
+	isync
+	li      8, 0
+	mtspr	0x210, 8	/* IBAT0U */
+	mtspr	0x212, 8	/* IBAT1U */
+	mtspr	0x214, 8	/* IBAT2U */
+	mtspr	0x216, 8	/* IBAT3U */
+	mtspr	0x218, 8	/* DBAT0U */
+	mtspr	0x21a, 8	/* DBAT1U */
+	mtspr	0x21c, 8	/* DBAT2U */
+	mtspr	0x21e, 8	/* DBAT3U */
+
+	li	8, 0x01ff	/* first 16MiB */
+	li	9, 0x0002	/* rw */
+	mtspr	0x211, 9	/* IBAT0L */
+	mtspr	0x210, 8	/* IBAT0U */
+	mtspr	0x219, 9	/* DBAT0L */
+	mtspr	0x218, 8	/* DBAT0U */
+
+	lis	8, 0x0c00	/* I/O mem */
+	ori	8, 8, 0x3ff	/* 32MiB */
+	lis	9, 0x0c00
+	ori	9, 9, 0x002a	/* uncached, guarded, rw */
+	mtspr	0x21b, 9	/* DBAT1L */
+	mtspr	0x21a, 8	/* DBAT1U */
+
+	lis	8, 0x0100	/* next 8MiB */
+	ori	8, 8, 0x00ff	/* 8MiB */
+	lis	9, 0x0100
+	ori	9, 9, 0x0002	/* rw */
+	mtspr	0x215, 9	/* IBAT2L */
+	mtspr	0x214, 8	/* IBAT2U */
+	mtspr	0x21d, 9	/* DBAT2L */
+	mtspr	0x21c, 8	/* DBAT2U */
+
+	/* enable and invalidate the caches if not already enabled */
+	mfspr	8, 0x3f0	/* HID0 */
+	andi.	0, 8, (1<<15)		/* HID0_ICE */
+	bne	1f
+	ori	8, 8, (1<<15)|(1<<11)	/* HID0_ICE|HID0_ICFI*/
+1:
+	andi.	0, 8, (1<<14)		/* HID0_DCE */
+	bne	1f
+	ori	8, 8, (1<<14)|(1<<10)	/* HID0_DCE|HID0_DCFI*/
+1:
+	mtspr	0x3f0, 8	/* HID0 */
+	isync
+
+	/* initialize arguments */
+	li	3, 0
+	li	4, 0
+	li	5, 0
+
+	/* turn the MMU on */
+	bcl	20, 31, 1f
+1:
+	mflr	8
+	addi	8, 8, _mmu_on - 1b
+	mfmsr	9
+	ori	9, 9, (1<<4)|(1<<5) /* MSR_DR|MSR_IR */
+	mtsrr0	8
+	mtsrr1	9
+	sync
+	rfi
+_mmu_on:
+	b _zimage_start_lib
+
diff --git a/arch/powerpc/boot/gamecube.c b/arch/powerpc/boot/gamecube.c
new file mode 100644
index 0000000000..d030612fdd
--- /dev/null
+++ b/arch/powerpc/boot/gamecube.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/boot/gamecube.c
+ *
+ * Nintendo GameCube bootwrapper support
+ * Copyright (C) 2004-2009 The GameCube Linux Team
+ * Copyright (C) 2008,2009 Albert Herranz
+ */
+
+#include <stddef.h>
+#include "stdio.h"
+#include "types.h"
+#include "io.h"
+#include "ops.h"
+
+#include "ugecon.h"
+
+BSS_STACK(8192);
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5)
+{
+	u32 heapsize = 16*1024*1024 - (u32)_end;
+
+	simple_alloc_init(_end, heapsize, 32, 64);
+	fdt_init(_dtb_start);
+
+	if (ug_probe())
+		console_ops.write = ug_console_write;
+}
+
diff --git a/arch/powerpc/boot/hack-coff.c b/arch/powerpc/boot/hack-coff.c
new file mode 100644
index 0000000000..a010e124ac
--- /dev/null
+++ b/arch/powerpc/boot/hack-coff.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * hack-coff.c - hack the header of an xcoff file to fill in
+ * a few fields needed by the Open Firmware xcoff loader on
+ * Power Macs but not initialized by objcopy.
+ *
+ * Copyright (C) Paul Mackerras 1997.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include "rs6000.h"
+
+#define AOUT_MAGIC	0x010b
+
+#define get_16be(x)	((((unsigned char *)(x))[0] << 8) \
+			 + ((unsigned char *)(x))[1])
+#define put_16be(x, v)	(((unsigned char *)(x))[0] = (v) >> 8, \
+			 ((unsigned char *)(x))[1] = (v) & 0xff)
+#define get_32be(x)	((((unsigned char *)(x))[0] << 24) \
+			 + (((unsigned char *)(x))[1] << 16) \
+			 + (((unsigned char *)(x))[2] << 8) \
+			 + ((unsigned char *)(x))[3])
+
+int
+main(int ac, char **av)
+{
+    int fd;
+    int i, nsect;
+    int aoutsz;
+    struct external_filehdr fhdr;
+    AOUTHDR aout;
+    struct external_scnhdr shdr;
+
+    if (ac != 2) {
+	fprintf(stderr, "Usage: hack-coff coff-file\n");
+	exit(1);
+    }
+    if ((fd = open(av[1], 2)) == -1) {
+	perror(av[2]);
+	exit(1);
+    }
+    if (read(fd, &fhdr, sizeof(fhdr)) != sizeof(fhdr))
+	goto readerr;
+    i = get_16be(fhdr.f_magic);
+    if (i != U802TOCMAGIC && i != U802WRMAGIC && i != U802ROMAGIC) {
+	fprintf(stderr, "%s: not an xcoff file\n", av[1]);
+	exit(1);
+    }
+    aoutsz = get_16be(fhdr.f_opthdr);
+    if (read(fd, &aout, aoutsz) != aoutsz)
+	goto readerr;
+    nsect = get_16be(fhdr.f_nscns);
+    for (i = 0; i < nsect; ++i) {
+	if (read(fd, &shdr, sizeof(shdr)) != sizeof(shdr))
+	    goto readerr;
+	if (strcmp(shdr.s_name, ".text") == 0) {
+	    put_16be(aout.o_snentry, i+1);
+	    put_16be(aout.o_sntext, i+1);
+	} else if (strcmp(shdr.s_name, ".data") == 0) {
+	    put_16be(aout.o_sndata, i+1);
+	} else if (strcmp(shdr.s_name, ".bss") == 0) {
+	    put_16be(aout.o_snbss, i+1);
+	}
+    }
+    put_16be(aout.magic, AOUT_MAGIC);
+    if (lseek(fd, (long) sizeof(struct external_filehdr), 0) == -1
+	|| write(fd, &aout, aoutsz) != aoutsz) {
+	fprintf(stderr, "%s: write error\n", av[1]);
+	exit(1);
+    }
+    close(fd);
+    exit(0);
+
+readerr:
+    fprintf(stderr, "%s: read error or file too short\n", av[1]);
+    exit(1);
+}
diff --git a/arch/powerpc/boot/holly.c b/arch/powerpc/boot/holly.c
new file mode 100644
index 0000000000..557c7a0ece
--- /dev/null
+++ b/arch/powerpc/boot/holly.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2007 IBM Corporation
+ *
+ * Stephen Winiecki <stevewin@us.ibm.com>
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ *
+ * Based on earlier code:
+ * Copyright (C) Paul Mackerras 1997.
+ */
+#include <stdarg.h>
+#include <stddef.h>
+#include "types.h"
+#include "elf.h"
+#include "string.h"
+#include "stdio.h"
+#include "page.h"
+#include "ops.h"
+#include "io.h"
+
+BSS_STACK(4096);
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5)
+{
+	u32 heapsize = 0x8000000 - (u32)_end; /* 128M */
+
+	simple_alloc_init(_end, heapsize, 32, 64);
+	fdt_init(_dtb_start);
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/install.sh b/arch/powerpc/boot/install.sh
new file mode 100755
index 0000000000..461902c8a4
--- /dev/null
+++ b/arch/powerpc/boot/install.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Blatantly stolen from in arch/i386/boot/install.sh by Dave Hansen 
+#
+# "make install" script for ppc64 architecture
+#
+# Arguments:
+#   $1 - kernel version
+#   $2 - kernel image file
+#   $3 - kernel map file
+#   $4 - default install path (blank if root directory)
+
+set -e
+
+# this should work for both the pSeries zImage and the iSeries vmlinux.sm
+image_name=`basename $2`
+
+if [ -f $4/$image_name ]; then
+	mv $4/$image_name $4/$image_name.old
+fi
+
+if [ -f $4/System.map ]; then
+	mv $4/System.map $4/System.old
+fi
+
+cat $2 > $4/$image_name
+cp $3 $4/System.map
diff --git a/arch/powerpc/boot/io.h b/arch/powerpc/boot/io.h
new file mode 100644
index 0000000000..5c6f90c349
--- /dev/null
+++ b/arch/powerpc/boot/io.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _IO_H
+#define _IO_H
+
+#include "types.h"
+
+/*
+ * Low-level I/O routines.
+ *
+ * Copied from <file:arch/powerpc/include/asm/io.h> (which has no copyright)
+ */
+static inline int in_8(const volatile unsigned char *addr)
+{
+	int ret;
+
+	__asm__ __volatile__("lbz%U1%X1 %0,%1; twi 0,%0,0; isync"
+			     : "=r" (ret) : "m" (*addr));
+	return ret;
+}
+
+static inline void out_8(volatile unsigned char *addr, int val)
+{
+	__asm__ __volatile__("stb%U0%X0 %1,%0; sync"
+			     : "=m" (*addr) : "r" (val));
+}
+
+static inline unsigned in_le16(const volatile u16 *addr)
+{
+	unsigned ret;
+
+	__asm__ __volatile__("lhbrx %0,0,%1; twi 0,%0,0; isync"
+			     : "=r" (ret) : "r" (addr), "m" (*addr));
+
+	return ret;
+}
+
+static inline unsigned in_be16(const volatile u16 *addr)
+{
+	unsigned ret;
+
+	__asm__ __volatile__("lhz%U1%X1 %0,%1; twi 0,%0,0; isync"
+			     : "=r" (ret) : "m" (*addr));
+	return ret;
+}
+
+static inline void out_le16(volatile u16 *addr, int val)
+{
+	__asm__ __volatile__("sthbrx %1,0,%2; sync" : "=m" (*addr)
+			     : "r" (val), "r" (addr));
+}
+
+static inline void out_be16(volatile u16 *addr, int val)
+{
+	__asm__ __volatile__("sth%U0%X0 %1,%0; sync"
+			     : "=m" (*addr) : "r" (val));
+}
+
+static inline unsigned in_le32(const volatile unsigned *addr)
+{
+	unsigned ret;
+
+	__asm__ __volatile__("lwbrx %0,0,%1; twi 0,%0,0; isync"
+			     : "=r" (ret) : "r" (addr), "m" (*addr));
+	return ret;
+}
+
+static inline unsigned in_be32(const volatile unsigned *addr)
+{
+	unsigned ret;
+
+	__asm__ __volatile__("lwz%U1%X1 %0,%1; twi 0,%0,0; isync"
+			     : "=r" (ret) : "m" (*addr));
+	return ret;
+}
+
+static inline void out_le32(volatile unsigned *addr, int val)
+{
+	__asm__ __volatile__("stwbrx %1,0,%2; sync" : "=m" (*addr)
+			     : "r" (val), "r" (addr));
+}
+
+static inline void out_be32(volatile unsigned *addr, int val)
+{
+	__asm__ __volatile__("stw%U0%X0 %1,%0; sync"
+			     : "=m" (*addr) : "r" (val));
+}
+
+static inline void sync(void)
+{
+	asm volatile("sync" : : : "memory");
+}
+
+static inline void eieio(void)
+{
+	asm volatile("eieio" : : : "memory");
+}
+
+static inline void barrier(void)
+{
+	asm volatile("" : : : "memory");
+}
+
+#endif /* _IO_H */
diff --git a/arch/powerpc/boot/libfdt-wrapper.c b/arch/powerpc/boot/libfdt-wrapper.c
new file mode 100644
index 0000000000..217d0d7a6a
--- /dev/null
+++ b/arch/powerpc/boot/libfdt-wrapper.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file does the necessary interface mapping between the bootwrapper
+ * device tree operations and the interface provided by shared source
+ * files flatdevicetree.[ch].
+ *
+ * Copyright 2007 David Gibson, IBM Corporation.
+ */
+
+#include <stddef.h>
+#include <stdio.h>
+#include <page.h>
+#include <libfdt.h>
+#include "ops.h"
+
+#define DEBUG	0
+#define BAD_ERROR(err)	(((err) < 0) \
+			 && ((err) != -FDT_ERR_NOTFOUND) \
+			 && ((err) != -FDT_ERR_EXISTS))
+
+#define check_err(err) \
+	({ \
+		if (BAD_ERROR(err) || ((err < 0) && DEBUG)) \
+			printf("%s():%d  %s\n\r", __func__, __LINE__, \
+			       fdt_strerror(err)); \
+		if (BAD_ERROR(err)) \
+			exit(); \
+		(err < 0) ? -1 : 0; \
+	})
+
+#define offset_devp(off)	\
+	({ \
+		unsigned long _offset = (off); \
+		check_err(_offset) ? NULL : (void *)(_offset+1); \
+	})
+
+#define devp_offset_find(devp)	(((unsigned long)(devp))-1)
+#define devp_offset(devp)	(devp ? ((unsigned long)(devp))-1 : 0)
+
+static void *fdt;
+static void *buf; /* = NULL */
+
+#define EXPAND_GRANULARITY	1024
+
+static void expand_buf(int minexpand)
+{
+	int size = fdt_totalsize(fdt);
+	int rc;
+
+	size = _ALIGN(size + minexpand, EXPAND_GRANULARITY);
+	buf = platform_ops.realloc(buf, size);
+	if (!buf)
+		fatal("Couldn't find %d bytes to expand device tree\n\r", size);
+	rc = fdt_open_into(fdt, buf, size);
+	if (rc != 0)
+		fatal("Couldn't expand fdt into new buffer: %s\n\r",
+		      fdt_strerror(rc));
+
+	fdt = buf;
+}
+
+static void *fdt_wrapper_finddevice(const char *path)
+{
+	return offset_devp(fdt_path_offset(fdt, path));
+}
+
+static int fdt_wrapper_getprop(const void *devp, const char *name,
+			       void *buf, const int buflen)
+{
+	const void *p;
+	int len;
+
+	p = fdt_getprop(fdt, devp_offset(devp), name, &len);
+	if (!p)
+		return check_err(len);
+	memcpy(buf, p, min(len, buflen));
+	return len;
+}
+
+static int fdt_wrapper_setprop(const void *devp, const char *name,
+			       const void *buf, const int len)
+{
+	int rc;
+
+	rc = fdt_setprop(fdt, devp_offset(devp), name, buf, len);
+	if (rc == -FDT_ERR_NOSPACE) {
+		expand_buf(len + 16);
+		rc = fdt_setprop(fdt, devp_offset(devp), name, buf, len);
+	}
+
+	return check_err(rc);
+}
+
+static int fdt_wrapper_del_node(const void *devp)
+{
+	return fdt_del_node(fdt, devp_offset(devp));
+}
+
+static void *fdt_wrapper_get_parent(const void *devp)
+{
+	return offset_devp(fdt_parent_offset(fdt, devp_offset(devp)));
+}
+
+static void *fdt_wrapper_create_node(const void *devp, const char *name)
+{
+	int offset;
+
+	offset = fdt_add_subnode(fdt, devp_offset(devp), name);
+	if (offset == -FDT_ERR_NOSPACE) {
+		expand_buf(strlen(name) + 16);
+		offset = fdt_add_subnode(fdt, devp_offset(devp), name);
+	}
+
+	return offset_devp(offset);
+}
+
+static void *fdt_wrapper_find_node_by_prop_value(const void *prev,
+						 const char *name,
+						 const char *val,
+						 int len)
+{
+	int offset = fdt_node_offset_by_prop_value(fdt, devp_offset_find(prev),
+	                                           name, val, len);
+	return offset_devp(offset);
+}
+
+static void *fdt_wrapper_find_node_by_compatible(const void *prev,
+						 const char *val)
+{
+	int offset = fdt_node_offset_by_compatible(fdt, devp_offset_find(prev),
+	                                           val);
+	return offset_devp(offset);
+}
+
+static char *fdt_wrapper_get_path(const void *devp, char *buf, int len)
+{
+	int rc;
+
+	rc = fdt_get_path(fdt, devp_offset(devp), buf, len);
+	if (check_err(rc))
+		return NULL;
+	return buf;
+}
+
+static unsigned long fdt_wrapper_finalize(void)
+{
+	int rc;
+
+	rc = fdt_pack(fdt);
+	if (rc != 0)
+		fatal("Couldn't pack flat tree: %s\n\r",
+		      fdt_strerror(rc));
+	return (unsigned long)fdt;
+}
+
+void fdt_init(void *blob)
+{
+	int err;
+	int bufsize;
+
+	dt_ops.finddevice = fdt_wrapper_finddevice;
+	dt_ops.getprop = fdt_wrapper_getprop;
+	dt_ops.setprop = fdt_wrapper_setprop;
+	dt_ops.get_parent = fdt_wrapper_get_parent;
+	dt_ops.create_node = fdt_wrapper_create_node;
+	dt_ops.find_node_by_prop_value = fdt_wrapper_find_node_by_prop_value;
+	dt_ops.find_node_by_compatible = fdt_wrapper_find_node_by_compatible;
+	dt_ops.del_node = fdt_wrapper_del_node;
+	dt_ops.get_path = fdt_wrapper_get_path;
+	dt_ops.finalize = fdt_wrapper_finalize;
+
+	/* Make sure the dt blob is the right version and so forth */
+	fdt = blob;
+	bufsize = fdt_totalsize(fdt) + EXPAND_GRANULARITY;
+	buf = malloc(bufsize);
+	if(!buf)
+		fatal("malloc failed. can't relocate the device tree\n\r");
+
+	err = fdt_open_into(fdt, buf, bufsize);
+
+	if (err != 0)
+		fatal("fdt_init(): %s\n\r", fdt_strerror(err));
+
+	fdt = buf;
+}
diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h
new file mode 100644
index 0000000000..9757d4f633
--- /dev/null
+++ b/arch/powerpc/boot/libfdt_env.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARCH_POWERPC_BOOT_LIBFDT_ENV_H
+#define _ARCH_POWERPC_BOOT_LIBFDT_ENV_H
+
+#include <types.h>
+#include <string.h>
+
+#define INT_MAX			((int)(~0U>>1))
+#define UINT32_MAX		((u32)~0U)
+#define INT32_MAX		((s32)(UINT32_MAX >> 1))
+
+#include "of.h"
+
+typedef unsigned long uintptr_t;
+
+typedef __be16 fdt16_t;
+typedef __be32 fdt32_t;
+typedef __be64 fdt64_t;
+
+#define fdt16_to_cpu(x)		be16_to_cpu(x)
+#define cpu_to_fdt16(x)		cpu_to_be16(x)
+#define fdt32_to_cpu(x)		be32_to_cpu(x)
+#define cpu_to_fdt32(x)		cpu_to_be32(x)
+#define fdt64_to_cpu(x)		be64_to_cpu(x)
+#define cpu_to_fdt64(x)		cpu_to_be64(x)
+
+#endif /* _ARCH_POWERPC_BOOT_LIBFDT_ENV_H */
diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c
new file mode 100644
index 0000000000..cae31a6e8f
--- /dev/null
+++ b/arch/powerpc/boot/main.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ *
+ * Updates for PPC64 by Todd Inglett, Dave Engebretsen & Peter Bergner.
+ */
+#include <stdarg.h>
+#include <stddef.h>
+#include "elf.h"
+#include "page.h"
+#include "string.h"
+#include "stdio.h"
+#include "ops.h"
+#include "reg.h"
+
+struct addr_range {
+	void *addr;
+	unsigned long size;
+};
+
+#undef DEBUG
+
+static struct addr_range prep_kernel(void)
+{
+	char elfheader[256];
+	unsigned char *vmlinuz_addr = (unsigned char *)_vmlinux_start;
+	unsigned long vmlinuz_size = _vmlinux_end - _vmlinux_start;
+	void *addr = 0;
+	struct elf_info ei;
+	long len;
+	int uncompressed_image = 0;
+
+	len = partial_decompress(vmlinuz_addr, vmlinuz_size,
+		elfheader, sizeof(elfheader), 0);
+	/* assume uncompressed data if -1 is returned */
+	if (len == -1) {
+		uncompressed_image = 1;
+		memcpy(elfheader, vmlinuz_addr, sizeof(elfheader));
+		printf("No valid compressed data found, assume uncompressed data\n\r");
+	}
+
+	if (!parse_elf64(elfheader, &ei) && !parse_elf32(elfheader, &ei))
+		fatal("Error: not a valid PPC32 or PPC64 ELF file!\n\r");
+
+	if (platform_ops.image_hdr)
+		platform_ops.image_hdr(elfheader);
+
+	/* We need to alloc the memsize: gzip will expand the kernel
+	 * text/data, then possible rubbish we don't care about. But
+	 * the kernel bss must be claimed (it will be zero'd by the
+	 * kernel itself)
+	 */
+	printf("Allocating 0x%lx bytes for kernel...\n\r", ei.memsize);
+
+	if (platform_ops.vmlinux_alloc) {
+		addr = platform_ops.vmlinux_alloc(ei.memsize);
+	} else {
+		/*
+		 * Check if the kernel image (without bss) would overwrite the
+		 * bootwrapper. The device tree has been moved in fdt_init()
+		 * to an area allocated with malloc() (somewhere past _end).
+		 */
+		if ((unsigned long)_start < ei.loadsize)
+			fatal("Insufficient memory for kernel at address 0!"
+			       " (_start=%p, uncompressed size=%08lx)\n\r",
+			       _start, ei.loadsize);
+
+		if ((unsigned long)_end < ei.memsize)
+			fatal("The final kernel image would overwrite the "
+					"device tree\n\r");
+	}
+
+	if (uncompressed_image) {
+		memcpy(addr, vmlinuz_addr + ei.elfoffset, ei.loadsize);
+		printf("0x%lx bytes of uncompressed data copied\n\r",
+		       ei.loadsize);
+		goto out;
+	}
+
+	/* Finally, decompress the kernel */
+	printf("Decompressing (0x%p <- 0x%p:0x%p)...\n\r", addr,
+	       vmlinuz_addr, vmlinuz_addr+vmlinuz_size);
+
+	len = partial_decompress(vmlinuz_addr, vmlinuz_size,
+		addr, ei.loadsize, ei.elfoffset);
+
+	if (len < 0)
+		fatal("Decompression failed with error code %ld\n\r", len);
+
+	if (len != ei.loadsize)
+		 fatal("Decompression error: got 0x%lx bytes, expected 0x%lx.\n\r",
+			 len, ei.loadsize);
+
+	printf("Done! Decompressed 0x%lx bytes\n\r", len);
+out:
+	flush_cache(addr, ei.loadsize);
+
+	return (struct addr_range){addr, ei.memsize};
+}
+
+static struct addr_range prep_initrd(struct addr_range vmlinux, void *chosen,
+				     unsigned long initrd_addr,
+				     unsigned long initrd_size)
+{
+	/* If we have an image attached to us, it overrides anything
+	 * supplied by the loader. */
+	if (&_initrd_end > &_initrd_start) {
+		printf("Attached initrd image at 0x%p-0x%p\n\r",
+		       _initrd_start, _initrd_end);
+		initrd_addr = (unsigned long)_initrd_start;
+		initrd_size = _initrd_end - _initrd_start;
+	} else if (initrd_size > 0) {
+		printf("Using loader supplied ramdisk at 0x%lx-0x%lx\n\r",
+		       initrd_addr, initrd_addr + initrd_size);
+	}
+
+	/* If there's no initrd at all, we're done */
+	if (! initrd_size)
+		return (struct addr_range){0, 0};
+
+	/*
+	 * If the initrd is too low it will be clobbered when the
+	 * kernel relocates to its final location.  In this case,
+	 * allocate a safer place and move it.
+	 */
+	if (initrd_addr < vmlinux.size) {
+		void *old_addr = (void *)initrd_addr;
+
+		printf("Allocating 0x%lx bytes for initrd ...\n\r",
+		       initrd_size);
+		initrd_addr = (unsigned long)malloc(initrd_size);
+		if (! initrd_addr)
+			fatal("Can't allocate memory for initial "
+			       "ramdisk !\n\r");
+		printf("Relocating initrd 0x%lx <- 0x%p (0x%lx bytes)\n\r",
+		       initrd_addr, old_addr, initrd_size);
+		memmove((void *)initrd_addr, old_addr, initrd_size);
+	}
+
+	printf("initrd head: 0x%lx\n\r", *((unsigned long *)initrd_addr));
+
+	/* Tell the kernel initrd address via device tree */
+	setprop_val(chosen, "linux,initrd-start", (u32)(initrd_addr));
+	setprop_val(chosen, "linux,initrd-end", (u32)(initrd_addr+initrd_size));
+
+	return (struct addr_range){(void *)initrd_addr, initrd_size};
+}
+
+#ifdef __powerpc64__
+static void prep_esm_blob(struct addr_range vmlinux, void *chosen)
+{
+	unsigned long esm_blob_addr, esm_blob_size;
+
+	/* Do we have an ESM (Enter Secure Mode) blob? */
+	if (&_esm_blob_end <= &_esm_blob_start)
+		return;
+
+	printf("Attached ESM blob at 0x%p-0x%p\n\r",
+	       _esm_blob_start, _esm_blob_end);
+	esm_blob_addr = (unsigned long)_esm_blob_start;
+	esm_blob_size = _esm_blob_end - _esm_blob_start;
+
+	/*
+	 * If the ESM blob is too low it will be clobbered when the
+	 * kernel relocates to its final location.  In this case,
+	 * allocate a safer place and move it.
+	 */
+	if (esm_blob_addr < vmlinux.size) {
+		void *old_addr = (void *)esm_blob_addr;
+
+		printf("Allocating 0x%lx bytes for esm_blob ...\n\r",
+		       esm_blob_size);
+		esm_blob_addr = (unsigned long)malloc(esm_blob_size);
+		if (!esm_blob_addr)
+			fatal("Can't allocate memory for ESM blob !\n\r");
+		printf("Relocating ESM blob 0x%lx <- 0x%p (0x%lx bytes)\n\r",
+		       esm_blob_addr, old_addr, esm_blob_size);
+		memmove((void *)esm_blob_addr, old_addr, esm_blob_size);
+	}
+
+	/* Tell the kernel ESM blob address via device tree. */
+	setprop_val(chosen, "linux,esm-blob-start", (u32)(esm_blob_addr));
+	setprop_val(chosen, "linux,esm-blob-end", (u32)(esm_blob_addr + esm_blob_size));
+}
+#else
+static inline void prep_esm_blob(struct addr_range vmlinux, void *chosen) { }
+#endif
+
+/* A buffer that may be edited by tools operating on a zImage binary so as to
+ * edit the command line passed to vmlinux (by setting /chosen/bootargs).
+ * The buffer is put in it's own section so that tools may locate it easier.
+ */
+static char cmdline[BOOT_COMMAND_LINE_SIZE]
+	__attribute__((__section__("__builtin_cmdline")));
+
+static void prep_cmdline(void *chosen)
+{
+	unsigned int getline_timeout = 5000;
+	int v;
+	int n;
+
+	/* Wait-for-input time */
+	n = getprop(chosen, "linux,cmdline-timeout", &v, sizeof(v));
+	if (n == sizeof(v))
+		getline_timeout = v;
+
+	if (cmdline[0] == '\0')
+		getprop(chosen, "bootargs", cmdline, BOOT_COMMAND_LINE_SIZE-1);
+
+	printf("\n\rLinux/PowerPC load: %s", cmdline);
+
+	/* If possible, edit the command line */
+	if (console_ops.edit_cmdline && getline_timeout)
+		console_ops.edit_cmdline(cmdline, BOOT_COMMAND_LINE_SIZE, getline_timeout);
+
+	printf("\n\r");
+
+	/* Put the command line back into the devtree for the kernel */
+	setprop_str(chosen, "bootargs", cmdline);
+}
+
+struct platform_ops platform_ops;
+struct dt_ops dt_ops;
+struct console_ops console_ops;
+struct loader_info loader_info;
+
+void start(void)
+{
+	struct addr_range vmlinux, initrd;
+	kernel_entry_t kentry;
+	unsigned long ft_addr = 0;
+	void *chosen;
+
+	/* Do this first, because malloc() could clobber the loader's
+	 * command line.  Only use the loader command line if a
+	 * built-in command line wasn't set by an external tool */
+	if ((loader_info.cmdline_len > 0) && (cmdline[0] == '\0'))
+		memmove(cmdline, loader_info.cmdline,
+			min(loader_info.cmdline_len, BOOT_COMMAND_LINE_SIZE-1));
+
+	if (console_ops.open && (console_ops.open() < 0))
+		exit();
+	if (platform_ops.fixups)
+		platform_ops.fixups();
+
+	printf("\n\rzImage starting: loaded at 0x%p (sp: 0x%p)\n\r",
+	       _start, get_sp());
+
+	/* Ensure that the device tree has a /chosen node */
+	chosen = finddevice("/chosen");
+	if (!chosen)
+		chosen = create_node(NULL, "chosen");
+
+	vmlinux = prep_kernel();
+	initrd = prep_initrd(vmlinux, chosen,
+			     loader_info.initrd_addr, loader_info.initrd_size);
+	prep_esm_blob(vmlinux, chosen);
+	prep_cmdline(chosen);
+
+	printf("Finalizing device tree...");
+	if (dt_ops.finalize)
+		ft_addr = dt_ops.finalize();
+	if (ft_addr)
+		printf(" flat tree at 0x%lx\n\r", ft_addr);
+	else
+		printf(" using OF tree (promptr=%p)\n\r", loader_info.promptr);
+
+	if (console_ops.close)
+		console_ops.close();
+
+	kentry = (kernel_entry_t) vmlinux.addr;
+	if (ft_addr) {
+		if(platform_ops.kentry)
+			platform_ops.kentry(ft_addr, vmlinux.addr);
+		else
+			kentry(ft_addr, 0, NULL);
+	}
+	else
+		kentry((unsigned long)initrd.addr, initrd.size,
+		       loader_info.promptr);
+
+	/* console closed so printf in fatal below may not work */
+	fatal("Error: Linux kernel returned to zImage boot wrapper!\n\r");
+}
diff --git a/arch/powerpc/boot/microwatt.c b/arch/powerpc/boot/microwatt.c
new file mode 100644
index 0000000000..ca9d83617f
--- /dev/null
+++ b/arch/powerpc/boot/microwatt.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <stddef.h>
+#include "stdio.h"
+#include "types.h"
+#include "io.h"
+#include "ops.h"
+
+BSS_STACK(8192);
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5)
+{
+	unsigned long heapsize = 16*1024*1024 - (unsigned long)_end;
+
+	/*
+	 * Disable interrupts and turn off MSR_RI, since we'll
+	 * shortly be overwriting the interrupt vectors.
+	 */
+	__asm__ volatile("mtmsrd %0,1" : : "r" (0));
+
+	simple_alloc_init(_end, heapsize, 32, 64);
+	fdt_init(_dtb_start);
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/mktree.c b/arch/powerpc/boot/mktree.c
new file mode 100644
index 0000000000..dc603f3c15
--- /dev/null
+++ b/arch/powerpc/boot/mktree.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Makes a tree bootable image for IBM Evaluation boards.
+ * Basically, just take a zImage, skip the ELF header, and stuff
+ * a 32 byte header on the front.
+ *
+ * We use htonl, which is a network macro, to make sure we're doing
+ * The Right Thing on an LE machine.  It's non-obvious, but it should
+ * work on anything BSD'ish.
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <netinet/in.h>
+#ifdef __sun__
+#include <inttypes.h>
+#else
+#include <stdint.h>
+#endif
+
+/* This gets tacked on the front of the image.  There are also a few
+ * bytes allocated after the _start label used by the boot rom (see
+ * head.S for details).
+ */
+typedef struct boot_block {
+	uint32_t bb_magic;		/* 0x0052504F */
+	uint32_t bb_dest;		/* Target address of the image */
+	uint32_t bb_num_512blocks;	/* Size, rounded-up, in 512 byte blks */
+	uint32_t bb_debug_flag;	/* Run debugger or image after load */
+	uint32_t bb_entry_point;	/* The image address to start */
+	uint32_t bb_checksum;	/* 32 bit checksum including header */
+	uint32_t reserved[2];
+} boot_block_t;
+
+#define IMGBLK	512
+unsigned int	tmpbuf[IMGBLK / sizeof(unsigned int)];
+
+int main(int argc, char *argv[])
+{
+	int	in_fd, out_fd;
+	int	nblks, i;
+	unsigned int	cksum, *cp;
+	struct	stat	st;
+	boot_block_t	bt;
+
+	if (argc < 5) {
+		fprintf(stderr, "usage: %s <zImage-file> <boot-image> <load address> <entry point>\n",argv[0]);
+		exit(1);
+	}
+
+	if (stat(argv[1], &st) < 0) {
+		perror("stat");
+		exit(2);
+	}
+
+	nblks = (st.st_size + IMGBLK) / IMGBLK;
+
+	bt.bb_magic = htonl(0x0052504F);
+
+	/* If we have the optional entry point parameter, use it */
+	bt.bb_dest = htonl(strtoul(argv[3], NULL, 0));
+	bt.bb_entry_point = htonl(strtoul(argv[4], NULL, 0));
+
+	/* We know these from the linker command.
+	 * ...and then move it up into memory a little more so the
+	 * relocation can happen.
+	 */
+	bt.bb_num_512blocks = htonl(nblks);
+	bt.bb_debug_flag = 0;
+
+	bt.bb_checksum = 0;
+
+	/* To be neat and tidy :-).
+	*/
+	bt.reserved[0] = 0;
+	bt.reserved[1] = 0;
+
+	if ((in_fd = open(argv[1], O_RDONLY)) < 0) {
+		perror("zImage open");
+		exit(3);
+	}
+
+	if ((out_fd = open(argv[2], (O_RDWR | O_CREAT | O_TRUNC), 0666)) < 0) {
+		perror("bootfile open");
+		exit(3);
+	}
+
+	cksum = 0;
+	cp = (void *)&bt;
+	for (i = 0; i < sizeof(bt) / sizeof(unsigned int); i++)
+		cksum += *cp++;
+
+	/* Assume zImage is an ELF file, and skip the 64K header.
+	*/
+	if (read(in_fd, tmpbuf, sizeof(tmpbuf)) != sizeof(tmpbuf)) {
+		fprintf(stderr, "%s is too small to be an ELF image\n",
+				argv[1]);
+		exit(4);
+	}
+
+	if (tmpbuf[0] != htonl(0x7f454c46)) {
+		fprintf(stderr, "%s is not an ELF image\n", argv[1]);
+		exit(4);
+	}
+
+	if (lseek(in_fd, (64 * 1024), SEEK_SET) < 0) {
+		fprintf(stderr, "%s failed to seek in ELF image\n", argv[1]);
+		exit(4);
+	}
+
+	nblks -= (64 * 1024) / IMGBLK;
+
+	/* And away we go......
+	*/
+	if (write(out_fd, &bt, sizeof(bt)) != sizeof(bt)) {
+		perror("boot-image write");
+		exit(5);
+	}
+
+	while (nblks-- > 0) {
+		if (read(in_fd, tmpbuf, sizeof(tmpbuf)) < 0) {
+			perror("zImage read");
+			exit(5);
+		}
+		cp = tmpbuf;
+		for (i = 0; i < sizeof(tmpbuf) / sizeof(unsigned int); i++)
+			cksum += *cp++;
+		if (write(out_fd, tmpbuf, sizeof(tmpbuf)) != sizeof(tmpbuf)) {
+			perror("boot-image write");
+			exit(5);
+		}
+	}
+
+	/* rewrite the header with the computed checksum.
+	*/
+	bt.bb_checksum = htonl(cksum);
+	if (lseek(out_fd, 0, SEEK_SET) < 0) {
+		perror("rewrite seek");
+		exit(1);
+	}
+	if (write(out_fd, &bt, sizeof(bt)) != sizeof(bt)) {
+		perror("boot-image rewrite");
+		exit(1);
+	}
+
+	exit(0);
+}
diff --git a/arch/powerpc/boot/motload-head.S b/arch/powerpc/boot/motload-head.S
new file mode 100644
index 0000000000..826dad0c19
--- /dev/null
+++ b/arch/powerpc/boot/motload-head.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "ppc_asm.h"
+
+	.text
+	.globl _zimage_start
+_zimage_start:
+	mfmsr   r10
+	rlwinm  r10,r10,0,~(1<<15)        /* Clear MSR_EE */
+	sync
+	mtmsr   r10
+	isync
+	b	_zimage_start_lib
diff --git a/arch/powerpc/boot/mpc52xx-psc.c b/arch/powerpc/boot/mpc52xx-psc.c
new file mode 100644
index 0000000000..c2c08633ee
--- /dev/null
+++ b/arch/powerpc/boot/mpc52xx-psc.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MPC5200 PSC serial console support.
+ *
+ * Author: Grant Likely <grant.likely@secretlab.ca>
+ *
+ * Copyright (c) 2007 Secret Lab Technologies Ltd.
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ *
+ * It is assumed that the firmware (or the platform file) has already set
+ * up the port.
+ */
+
+#include "types.h"
+#include "io.h"
+#include "ops.h"
+
+/* Programmable Serial Controller (PSC) status register bits */
+#define MPC52xx_PSC_SR		0x04
+#define MPC52xx_PSC_SR_RXRDY		0x0100
+#define MPC52xx_PSC_SR_RXFULL		0x0200
+#define MPC52xx_PSC_SR_TXRDY		0x0400
+#define MPC52xx_PSC_SR_TXEMP		0x0800
+
+#define MPC52xx_PSC_BUFFER	0x0C
+
+static void *psc;
+
+static int psc_open(void)
+{
+	/* Assume the firmware has already configured the PSC into
+	 * uart mode */
+	return 0;
+}
+
+static void psc_putc(unsigned char c)
+{
+	while (!(in_be16(psc + MPC52xx_PSC_SR) & MPC52xx_PSC_SR_TXRDY)) ;
+	out_8(psc + MPC52xx_PSC_BUFFER, c);
+}
+
+static unsigned char psc_tstc(void)
+{
+	return (in_be16(psc + MPC52xx_PSC_SR) & MPC52xx_PSC_SR_RXRDY) != 0;
+}
+
+static unsigned char psc_getc(void)
+{
+	while (!(in_be16(psc + MPC52xx_PSC_SR) & MPC52xx_PSC_SR_RXRDY)) ;
+	return in_8(psc + MPC52xx_PSC_BUFFER);
+}
+
+int mpc5200_psc_console_init(void *devp, struct serial_console_data *scdp)
+{
+	/* Get the base address of the psc registers */
+	if (dt_get_virtual_reg(devp, &psc, 1) < 1)
+		return -1;
+
+	scdp->open = psc_open;
+	scdp->putc = psc_putc;
+	scdp->getc = psc_getc;
+	scdp->tstc = psc_tstc;
+
+	return 0;
+}
diff --git a/arch/powerpc/boot/mpc8xx.c b/arch/powerpc/boot/mpc8xx.c
new file mode 100644
index 0000000000..e19ef64df4
--- /dev/null
+++ b/arch/powerpc/boot/mpc8xx.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * MPC8xx support functions
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "types.h"
+#include "fsl-soc.h"
+#include "mpc8xx.h"
+#include "stdio.h"
+#include "io.h"
+
+#define MPC8XX_PLPRCR (0x284/4) /* PLL and Reset Control Register */
+
+/* Return system clock from crystal frequency */
+u32 mpc885_get_clock(u32 crystal)
+{
+	u32 *immr;
+	u32 plprcr;
+	int mfi, mfn, mfd, pdf;
+	u32 ret;
+
+	immr = fsl_get_immr();
+	if (!immr) {
+		printf("mpc885_get_clock: Couldn't get IMMR base.\r\n");
+		return 0;
+	}
+
+	plprcr = in_be32(&immr[MPC8XX_PLPRCR]);
+
+	mfi = (plprcr >> 16) & 15;
+	if (mfi < 5) {
+		printf("Warning: PLPRCR[MFI] value of %d out-of-bounds\r\n",
+		       mfi);
+		mfi = 5;
+	}
+
+	pdf = (plprcr >> 1) & 0xf;
+	mfd = (plprcr >> 22) & 0x1f;
+	mfn = (plprcr >> 27) & 0x1f;
+
+	ret = crystal * mfi;
+
+	if (mfn != 0)
+		ret += crystal * mfn / (mfd + 1);
+
+	return ret / (pdf + 1);
+}
+
+/* Set common device tree fields based on the given clock frequencies. */
+void mpc8xx_set_clocks(u32 sysclk)
+{
+	void *node;
+
+	dt_fixup_cpu_clocks(sysclk, sysclk / 16, sysclk);
+
+	node = finddevice("/soc/cpm");
+	if (node)
+		setprop(node, "clock-frequency", &sysclk, 4);
+
+	node = finddevice("/soc/cpm/brg");
+	if (node)
+		setprop(node, "clock-frequency", &sysclk, 4);
+}
+
+int mpc885_fixup_clocks(u32 crystal)
+{
+	u32 sysclk = mpc885_get_clock(crystal);
+	if (!sysclk)
+		return 0;
+
+	mpc8xx_set_clocks(sysclk);
+	return 1;
+}
diff --git a/arch/powerpc/boot/mpc8xx.h b/arch/powerpc/boot/mpc8xx.h
new file mode 100644
index 0000000000..3852ed9004
--- /dev/null
+++ b/arch/powerpc/boot/mpc8xx.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_BOOT_MPC8xx_H_
+#define _PPC_BOOT_MPC8xx_H_
+
+#include "types.h"
+
+void mpc8xx_set_clocks(u32 sysclk);
+
+u32 mpc885_get_clock(u32 crystal);
+int mpc885_fixup_clocks(u32 crystal);
+
+#endif
diff --git a/arch/powerpc/boot/mvme5100.c b/arch/powerpc/boot/mvme5100.c
new file mode 100644
index 0000000000..51453d0ec9
--- /dev/null
+++ b/arch/powerpc/boot/mvme5100.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Motorola/Emerson MVME5100 with PPCBug firmware.
+ *
+ * Author: Stephen Chivers <schivers@csc.com>
+ *
+ * Copyright 2013 CSC Australia Pty. Ltd.
+ */
+#include "types.h"
+#include "ops.h"
+#include "io.h"
+
+BSS_STACK(4096);
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5)
+{
+	u32			heapsize;
+
+	heapsize = 0x8000000 - (u32)_end; /* 128M */
+	simple_alloc_init(_end, heapsize, 32, 64);
+	fdt_init(_dtb_start);
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/mvme7100.c b/arch/powerpc/boot/mvme7100.c
new file mode 100644
index 0000000000..1e218454ab
--- /dev/null
+++ b/arch/powerpc/boot/mvme7100.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Motload compatibility for the Emerson/Artesyn MVME7100
+ *
+ * Copyright 2016 Elettra-Sincrotrone Trieste S.C.p.A.
+ *
+ * Author: Alessio Igor Bogani <alessio.bogani@elettra.eu>
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "cuboot.h"
+
+#define TARGET_86xx
+#define TARGET_HAS_ETH1
+#define TARGET_HAS_ETH2
+#define TARGET_HAS_ETH3
+#include "ppcboot.h"
+
+static bd_t bd;
+
+BSS_STACK(16384);
+
+static void mvme7100_fixups(void)
+{
+	void *devp;
+	unsigned long busfreq = bd.bi_busfreq * 1000000;
+
+	dt_fixup_cpu_clocks(bd.bi_intfreq * 1000000, busfreq / 4, busfreq);
+
+	devp = finddevice("/soc@f1000000");
+	if (devp)
+		setprop(devp, "bus-frequency", &busfreq, sizeof(busfreq));
+
+	devp = finddevice("/soc/serial@4500");
+	if (devp)
+		setprop(devp, "clock-frequency", &busfreq, sizeof(busfreq));
+
+	dt_fixup_memory(bd.bi_memstart, bd.bi_memsize);
+
+	dt_fixup_mac_address_by_alias("ethernet0", bd.bi_enetaddr);
+	dt_fixup_mac_address_by_alias("ethernet1", bd.bi_enet1addr);
+	dt_fixup_mac_address_by_alias("ethernet2", bd.bi_enet2addr);
+	dt_fixup_mac_address_by_alias("ethernet3", bd.bi_enet3addr);
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+		   unsigned long r6, unsigned long r7)
+{
+	CUBOOT_INIT();
+	fdt_init(_dtb_start);
+	serial_console_init();
+	platform_ops.fixups = mvme7100_fixups;
+}
diff --git a/arch/powerpc/boot/ns16550.c b/arch/powerpc/boot/ns16550.c
new file mode 100644
index 0000000000..f16d2be1d0
--- /dev/null
+++ b/arch/powerpc/boot/ns16550.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * 16550 serial console support.
+ *
+ * Original copied from <file:arch/ppc/boot/common/ns16550.c>
+ * (which had no copyright)
+ * Modifications: 2006 (c) MontaVista Software, Inc.
+ *
+ * Modified by: Mark A. Greer <mgreer@mvista.com>
+ */
+#include <stdarg.h>
+#include <stddef.h>
+#include "types.h"
+#include "string.h"
+#include "stdio.h"
+#include "io.h"
+#include "ops.h"
+#include "of.h"
+
+#define UART_DLL	0	/* Out: Divisor Latch Low */
+#define UART_DLM	1	/* Out: Divisor Latch High */
+#define UART_FCR	2	/* Out: FIFO Control Register */
+#define UART_LCR	3	/* Out: Line Control Register */
+#define UART_MCR	4	/* Out: Modem Control Register */
+#define UART_LSR	5	/* In:  Line Status Register */
+#define UART_LSR_THRE	0x20	/* Transmit-hold-register empty */
+#define UART_LSR_DR	0x01	/* Receiver data ready */
+#define UART_MSR	6	/* In:  Modem Status Register */
+#define UART_SCR	7	/* I/O: Scratch Register */
+
+static unsigned char *reg_base;
+static u32 reg_shift;
+
+static int ns16550_open(void)
+{
+	out_8(reg_base + (UART_FCR << reg_shift), 0x06);
+	return 0;
+}
+
+static void ns16550_putc(unsigned char c)
+{
+	while ((in_8(reg_base + (UART_LSR << reg_shift)) & UART_LSR_THRE) == 0);
+	out_8(reg_base, c);
+}
+
+static unsigned char ns16550_getc(void)
+{
+	while ((in_8(reg_base + (UART_LSR << reg_shift)) & UART_LSR_DR) == 0);
+	return in_8(reg_base);
+}
+
+static u8 ns16550_tstc(void)
+{
+	return ((in_8(reg_base + (UART_LSR << reg_shift)) & UART_LSR_DR) != 0);
+}
+
+int ns16550_console_init(void *devp, struct serial_console_data *scdp)
+{
+	int n;
+	u32 reg_offset;
+
+	if (dt_get_virtual_reg(devp, (void **)&reg_base, 1) < 1) {
+		printf("virt reg parse fail...\r\n");
+		return -1;
+	}
+
+	n = getprop(devp, "reg-offset", &reg_offset, sizeof(reg_offset));
+	if (n == sizeof(reg_offset))
+		reg_base += be32_to_cpu(reg_offset);
+
+	n = getprop(devp, "reg-shift", &reg_shift, sizeof(reg_shift));
+	if (n != sizeof(reg_shift))
+		reg_shift = 0;
+	else
+		reg_shift = be32_to_cpu(reg_shift);
+
+	scdp->open = ns16550_open;
+	scdp->putc = ns16550_putc;
+	scdp->getc = ns16550_getc;
+	scdp->tstc = ns16550_tstc;
+	scdp->close = NULL;
+
+	return 0;
+}
diff --git a/arch/powerpc/boot/of.c b/arch/powerpc/boot/of.c
new file mode 100644
index 0000000000..2fbd4ae60e
--- /dev/null
+++ b/arch/powerpc/boot/of.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ */
+#include <stdarg.h>
+#include <stddef.h>
+#include "types.h"
+#include "elf.h"
+#include "string.h"
+#include "stdio.h"
+#include "page.h"
+#include "ops.h"
+
+#include "of.h"
+
+/* Value picked to match that used by yaboot */
+#define PROG_START	0x01400000	/* only used on 64-bit systems */
+#define RAM_END		(512<<20)	/* Fixme: use OF */
+#define	ONE_MB		0x100000
+
+
+
+static unsigned long claim_base;
+
+void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+			 unsigned long r6, unsigned long r7);
+
+static void *of_try_claim(unsigned long size)
+{
+	unsigned long addr = 0;
+
+	if (claim_base == 0)
+		claim_base = _ALIGN_UP((unsigned long)_end, ONE_MB);
+
+	for(; claim_base < RAM_END; claim_base += ONE_MB) {
+#ifdef DEBUG
+		printf("    trying: 0x%08lx\n\r", claim_base);
+#endif
+		addr = (unsigned long) of_claim(claim_base, size, 0);
+		if (addr != PROM_ERROR)
+			break;
+	}
+	if (addr == 0)
+		return NULL;
+	claim_base = PAGE_ALIGN(claim_base + size);
+	return (void *)addr;
+}
+
+static void of_image_hdr(const void *hdr)
+{
+	const Elf64_Ehdr *elf64 = hdr;
+
+	if (elf64->e_ident[EI_CLASS] == ELFCLASS64) {
+		/*
+		 * Maintain a "magic" minimum address. This keeps some older
+		 * firmware platforms running.
+		 */
+		if (claim_base < PROG_START)
+			claim_base = PROG_START;
+	}
+}
+
+static void of_platform_init(unsigned long a1, unsigned long a2, void *promptr)
+{
+	platform_ops.image_hdr = of_image_hdr;
+	platform_ops.malloc = of_try_claim;
+	platform_ops.exit = of_exit;
+	platform_ops.vmlinux_alloc = of_vmlinux_alloc;
+
+	dt_ops.finddevice = of_finddevice;
+	dt_ops.getprop = of_getprop;
+	dt_ops.setprop = of_setprop;
+
+	of_console_init();
+
+	of_init(promptr);
+	loader_info.promptr = promptr;
+	if (a1 && a2 && a2 != 0xdeadbeef) {
+		loader_info.initrd_addr = a1;
+		loader_info.initrd_size = a2;
+	}
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+		   unsigned long r6, unsigned long r7)
+{
+	/* Detect OF vs. ePAPR boot */
+	if (r5)
+		of_platform_init(r3, r4, (void *)r5);
+	else
+		epapr_platform_init(r3, r4, r5, r6, r7);
+}
+
diff --git a/arch/powerpc/boot/of.h b/arch/powerpc/boot/of.h
new file mode 100644
index 0000000000..31b2f5dfd5
--- /dev/null
+++ b/arch/powerpc/boot/of.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_BOOT_OF_H_
+#define _PPC_BOOT_OF_H_
+
+#include "swab.h"
+
+typedef void *phandle;
+typedef u32 ihandle;
+
+void of_init(void *promptr);
+int of_call_prom(const char *service, int nargs, int nret, ...);
+unsigned int of_claim(unsigned long virt, unsigned long size,
+	unsigned long align);
+void *of_vmlinux_alloc(unsigned long size);
+void of_exit(void);
+void *of_finddevice(const char *name);
+int of_getprop(const void *phandle, const char *name, void *buf,
+	       const int buflen);
+int of_setprop(const void *phandle, const char *name, const void *buf,
+	       const int buflen);
+
+/* Console functions */
+void of_console_init(void);
+
+typedef u16			__be16;
+typedef u32			__be32;
+typedef u64			__be64;
+
+#ifdef __LITTLE_ENDIAN__
+#define cpu_to_be16(x) swab16(x)
+#define be16_to_cpu(x) swab16(x)
+#define cpu_to_be32(x) swab32(x)
+#define be32_to_cpu(x) swab32(x)
+#define cpu_to_be64(x) swab64(x)
+#define be64_to_cpu(x) swab64(x)
+#else
+#define cpu_to_be16(x) (x)
+#define be16_to_cpu(x) (x)
+#define cpu_to_be32(x) (x)
+#define be32_to_cpu(x) (x)
+#define cpu_to_be64(x) (x)
+#define be64_to_cpu(x) (x)
+#endif
+
+#define PROM_ERROR (-1u)
+
+#endif /* _PPC_BOOT_OF_H_ */
diff --git a/arch/powerpc/boot/ofconsole.c b/arch/powerpc/boot/ofconsole.c
new file mode 100644
index 0000000000..8eb0f1c452
--- /dev/null
+++ b/arch/powerpc/boot/ofconsole.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * OF console routines
+ *
+ * Copyright (C) Paul Mackerras 1997.
+ */
+#include <stddef.h>
+#include "types.h"
+#include "elf.h"
+#include "string.h"
+#include "stdio.h"
+#include "page.h"
+#include "ops.h"
+
+#include "of.h"
+
+static unsigned int of_stdout_handle;
+
+static int of_console_open(void)
+{
+	void *devp;
+
+	if (((devp = of_finddevice("/chosen")) != NULL)
+	    && (of_getprop(devp, "stdout", &of_stdout_handle,
+			   sizeof(of_stdout_handle))
+		== sizeof(of_stdout_handle))) {
+		of_stdout_handle = be32_to_cpu(of_stdout_handle);
+		return 0;
+	}
+
+	return -1;
+}
+
+static void of_console_write(const char *buf, int len)
+{
+	of_call_prom("write", 3, 1, of_stdout_handle, buf, len);
+}
+
+void of_console_init(void)
+{
+	console_ops.open = of_console_open;
+	console_ops.write = of_console_write;
+}
diff --git a/arch/powerpc/boot/oflib.c b/arch/powerpc/boot/oflib.c
new file mode 100644
index 0000000000..8759c985ef
--- /dev/null
+++ b/arch/powerpc/boot/oflib.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ */
+#include <stddef.h>
+#include "types.h"
+#include "elf.h"
+#include "string.h"
+#include "stdio.h"
+#include "page.h"
+#include "ops.h"
+
+#include "of.h"
+
+typedef u32 prom_arg_t;
+
+/* The following structure is used to communicate with open firmware.
+ * All arguments in and out are in big endian format. */
+struct prom_args {
+	__be32 service;	/* Address of service name string. */
+	__be32 nargs;	/* Number of input arguments. */
+	__be32 nret;	/* Number of output arguments. */
+	__be32 args[10];	/* Input/output arguments. */
+};
+
+#ifdef __powerpc64__
+extern int prom(void *);
+#else
+static int (*prom) (void *);
+#endif
+
+void of_init(void *promptr)
+{
+#ifndef __powerpc64__
+	prom = (int (*)(void *))promptr;
+#endif
+}
+
+#define ADDR(x)		(u32)(unsigned long)(x)
+
+int of_call_prom(const char *service, int nargs, int nret, ...)
+{
+	int i;
+	struct prom_args args;
+	va_list list;
+
+	args.service = cpu_to_be32(ADDR(service));
+	args.nargs = cpu_to_be32(nargs);
+	args.nret = cpu_to_be32(nret);
+
+	va_start(list, nret);
+	for (i = 0; i < nargs; i++)
+		args.args[i] = cpu_to_be32(va_arg(list, prom_arg_t));
+	va_end(list);
+
+	for (i = 0; i < nret; i++)
+		args.args[nargs+i] = 0;
+
+	if (prom(&args) < 0)
+		return PROM_ERROR;
+
+	return (nret > 0) ? be32_to_cpu(args.args[nargs]) : 0;
+}
+
+static int of_call_prom_ret(const char *service, int nargs, int nret,
+			    prom_arg_t *rets, ...)
+{
+	int i;
+	struct prom_args args;
+	va_list list;
+
+	args.service = cpu_to_be32(ADDR(service));
+	args.nargs = cpu_to_be32(nargs);
+	args.nret = cpu_to_be32(nret);
+
+	va_start(list, rets);
+	for (i = 0; i < nargs; i++)
+		args.args[i] = cpu_to_be32(va_arg(list, prom_arg_t));
+	va_end(list);
+
+	for (i = 0; i < nret; i++)
+		args.args[nargs+i] = 0;
+
+	if (prom(&args) < 0)
+		return PROM_ERROR;
+
+	if (rets != NULL)
+		for (i = 1; i < nret; ++i)
+			rets[i-1] = be32_to_cpu(args.args[nargs+i]);
+
+	return (nret > 0) ? be32_to_cpu(args.args[nargs]) : 0;
+}
+
+/* returns true if s2 is a prefix of s1 */
+static int string_match(const char *s1, const char *s2)
+{
+	for (; *s2; ++s2)
+		if (*s1++ != *s2)
+			return 0;
+	return 1;
+}
+
+/*
+ * Older OF's require that when claiming a specific range of addresses,
+ * we claim the physical space in the /memory node and the virtual
+ * space in the chosen mmu node, and then do a map operation to
+ * map virtual to physical.
+ */
+static int need_map = -1;
+static ihandle chosen_mmu;
+static ihandle memory;
+
+static int check_of_version(void)
+{
+	phandle oprom, chosen;
+	char version[64];
+
+	oprom = of_finddevice("/openprom");
+	if (oprom == (phandle) -1)
+		return 0;
+	if (of_getprop(oprom, "model", version, sizeof(version)) <= 0)
+		return 0;
+	version[sizeof(version)-1] = 0;
+	printf("OF version = '%s'\r\n", version);
+	if (!string_match(version, "Open Firmware, 1.")
+	    && !string_match(version, "FirmWorks,3."))
+		return 0;
+	chosen = of_finddevice("/chosen");
+	if (chosen == (phandle) -1) {
+		chosen = of_finddevice("/chosen@0");
+		if (chosen == (phandle) -1) {
+			printf("no chosen\n");
+			return 0;
+		}
+	}
+	if (of_getprop(chosen, "mmu", &chosen_mmu, sizeof(chosen_mmu)) <= 0) {
+		printf("no mmu\n");
+		return 0;
+	}
+	memory = of_call_prom("open", 1, 1, "/memory");
+	if (memory == PROM_ERROR) {
+		memory = of_call_prom("open", 1, 1, "/memory@0");
+		if (memory == PROM_ERROR) {
+			printf("no memory node\n");
+			return 0;
+		}
+	}
+	printf("old OF detected\r\n");
+	return 1;
+}
+
+unsigned int of_claim(unsigned long virt, unsigned long size,
+		      unsigned long align)
+{
+	int ret;
+	prom_arg_t result;
+
+	if (need_map < 0)
+		need_map = check_of_version();
+	if (align || !need_map)
+		return of_call_prom("claim", 3, 1, virt, size, align);
+
+	ret = of_call_prom_ret("call-method", 5, 2, &result, "claim", memory,
+			       align, size, virt);
+	if (ret != 0 || result == -1)
+		return  -1;
+	ret = of_call_prom_ret("call-method", 5, 2, &result, "claim", chosen_mmu,
+			       align, size, virt);
+	/* 0x12 == coherent + read/write */
+	ret = of_call_prom("call-method", 6, 1, "map", chosen_mmu,
+			   0x12, size, virt, virt);
+	return virt;
+}
+
+void *of_vmlinux_alloc(unsigned long size)
+{
+	unsigned long start = (unsigned long)_start, end = (unsigned long)_end;
+	unsigned long addr;
+	void *p;
+
+	/* With some older POWER4 firmware we need to claim the area the kernel
+	 * will reside in.  Newer firmwares don't need this so we just ignore
+	 * the return value.
+	 */
+	addr = (unsigned long) of_claim(start, end - start, 0);
+	printf("Trying to claim from 0x%lx to 0x%lx (0x%lx) got %lx\r\n",
+	       start, end, end - start, addr);
+
+	p = malloc(size);
+	if (!p)
+		fatal("Can't allocate memory for kernel image!\n\r");
+
+	return p;
+}
+
+void of_exit(void)
+{
+	of_call_prom("exit", 0, 0);
+}
+
+/*
+ * OF device tree routines
+ */
+void *of_finddevice(const char *name)
+{
+	return (void *) (unsigned long) of_call_prom("finddevice", 1, 1, name);
+}
+
+int of_getprop(const void *phandle, const char *name, void *buf,
+	       const int buflen)
+{
+	return of_call_prom("getprop", 4, 1, phandle, name, buf, buflen);
+}
+
+int of_setprop(const void *phandle, const char *name, const void *buf,
+	       const int buflen)
+{
+	return of_call_prom("setprop", 4, 1, phandle, name, buf, buflen);
+}
diff --git a/arch/powerpc/boot/opal-calls.S b/arch/powerpc/boot/opal-calls.S
new file mode 100644
index 0000000000..1f2f330a45
--- /dev/null
+++ b/arch/powerpc/boot/opal-calls.S
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2016 IBM Corporation.
+ */
+
+#include "ppc_asm.h"
+#include "../include/asm/opal-api.h"
+
+	.text
+
+	.globl opal_kentry
+opal_kentry:
+	/* r3 is the fdt ptr */
+	mtctr r4
+	li	r4, 0
+	li	r5, 0
+	li	r6, 0
+	li	r7, 0
+	LOAD_REG_ADDR(r11, opal)
+	ld	r8,0(r11)
+	ld	r9,8(r11)
+	bctr
+
+#define OPAL_CALL(name, token)				\
+	.globl name;					\
+name:							\
+	li	r0, token;				\
+	b	opal_call;
+
+opal_call:
+	mflr	r11
+	std	r11,16(r1)
+	mfcr	r12
+	stw	r12,8(r1)
+	mr	r13,r2
+
+	/* Set opal return address */
+	LOAD_REG_ADDR(r11, opal_return)
+	mtlr	r11
+	mfmsr	r12
+
+	/* switch to BE when we enter OPAL */
+	li	r11,MSR_LE
+	andc	r12,r12,r11
+	mtspr	SPRN_HSRR1,r12
+
+	/* load the opal call entry point and base */
+	LOAD_REG_ADDR(r11, opal)
+	ld	r12,8(r11)
+	ld	r2,0(r11)
+	mtspr	SPRN_HSRR0,r12
+	hrfid
+
+opal_return:
+	FIXUP_ENDIAN
+	mr	r2,r13;
+	lwz	r11,8(r1);
+	ld	r12,16(r1)
+	mtcr	r11;
+	mtlr	r12
+	blr
+
+OPAL_CALL(opal_console_write,			OPAL_CONSOLE_WRITE);
+OPAL_CALL(opal_console_read,			OPAL_CONSOLE_READ);
+OPAL_CALL(opal_console_write_buffer_space,	OPAL_CONSOLE_WRITE_BUFFER_SPACE);
+OPAL_CALL(opal_poll_events,			OPAL_POLL_EVENTS);
+OPAL_CALL(opal_console_flush,			OPAL_CONSOLE_FLUSH);
diff --git a/arch/powerpc/boot/opal.c b/arch/powerpc/boot/opal.c
new file mode 100644
index 0000000000..b69818ce59
--- /dev/null
+++ b/arch/powerpc/boot/opal.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 IBM Corporation.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "io.h"
+#include <libfdt.h>
+#include "../include/asm/opal-api.h"
+
+/* Global OPAL struct used by opal-call.S */
+struct opal {
+	u64 base;
+	u64 entry;
+} opal;
+
+static u32 opal_con_id;
+
+/* see opal-wrappers.S */
+int64_t opal_console_write(int64_t term_number, u64 *length, const u8 *buffer);
+int64_t opal_console_read(int64_t term_number, uint64_t *length, u8 *buffer);
+int64_t opal_console_write_buffer_space(uint64_t term_number, uint64_t *length);
+int64_t opal_console_flush(uint64_t term_number);
+int64_t opal_poll_events(uint64_t *outstanding_event_mask);
+
+void opal_kentry(unsigned long fdt_addr, void *vmlinux_addr);
+
+static int opal_con_open(void)
+{
+	/*
+	 * When OPAL loads the boot kernel it stashes the OPAL base and entry
+	 * address in r8 and r9 so the kernel can use the OPAL console
+	 * before unflattening the devicetree. While executing the wrapper will
+	 * probably trash r8 and r9 so this kentry hook restores them before
+	 * entering the decompressed kernel.
+	 */
+	platform_ops.kentry = opal_kentry;
+	return 0;
+}
+
+static void opal_con_putc(unsigned char c)
+{
+	int64_t rc;
+	uint64_t olen, len;
+
+	do {
+		rc = opal_console_write_buffer_space(opal_con_id, &olen);
+		len = be64_to_cpu(olen);
+		if (rc)
+			return;
+		opal_poll_events(NULL);
+	} while (len < 1);
+
+
+	olen = cpu_to_be64(1);
+	opal_console_write(opal_con_id, &olen, &c);
+}
+
+static void opal_con_close(void)
+{
+	opal_console_flush(opal_con_id);
+}
+
+static void opal_init(void)
+{
+	void *opal_node;
+
+	opal_node = finddevice("/ibm,opal");
+	if (!opal_node)
+		return;
+	if (getprop(opal_node, "opal-base-address", &opal.base, sizeof(u64)) < 0)
+		return;
+	opal.base = be64_to_cpu(opal.base);
+	if (getprop(opal_node, "opal-entry-address", &opal.entry, sizeof(u64)) < 0)
+		return;
+	opal.entry = be64_to_cpu(opal.entry);
+}
+
+int opal_console_init(void *devp, struct serial_console_data *scdp)
+{
+	opal_init();
+
+	if (devp) {
+		int n = getprop(devp, "reg", &opal_con_id, sizeof(u32));
+		if (n != sizeof(u32))
+			return -1;
+		opal_con_id = be32_to_cpu(opal_con_id);
+	} else
+		opal_con_id = 0;
+
+	scdp->open = opal_con_open;
+	scdp->putc = opal_con_putc;
+	scdp->close = opal_con_close;
+
+	return 0;
+}
diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h
new file mode 100644
index 0000000000..a40c2162a4
--- /dev/null
+++ b/arch/powerpc/boot/ops.h
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Global definition of all the bootwrapper operations.
+ *
+ * Author: Mark A. Greer <mgreer@mvista.com>
+ *
+ * 2006 (c) MontaVista Software, Inc.
+ */
+#ifndef _PPC_BOOT_OPS_H_
+#define _PPC_BOOT_OPS_H_
+
+#include <stddef.h>
+#include "types.h"
+#include "string.h"
+
+#define	BOOT_COMMAND_LINE_SIZE	2048
+#define	MAX_PATH_LEN		256
+#define	MAX_PROP_LEN		256 /* What should this be? */
+
+typedef void (*kernel_entry_t)(unsigned long r3, unsigned long r4, void *r5);
+
+/* Platform specific operations */
+struct platform_ops {
+	void	(*fixups)(void);
+	void	(*image_hdr)(const void *);
+	void *	(*malloc)(unsigned long size);
+	void	(*free)(void *ptr);
+	void *	(*realloc)(void *ptr, unsigned long size);
+	void	(*exit)(void);
+	void *	(*vmlinux_alloc)(unsigned long size);
+	void  	(*kentry)(unsigned long fdt_addr, void *vmlinux_addr);
+};
+extern struct platform_ops platform_ops;
+
+/* Device Tree operations */
+struct dt_ops {
+	void *	(*finddevice)(const char *name);
+	int	(*getprop)(const void *phandle, const char *name, void *buf,
+			const int buflen);
+	int	(*setprop)(const void *phandle, const char *name,
+			const void *buf, const int buflen);
+	int (*del_node)(const void *phandle);
+	void *(*get_parent)(const void *phandle);
+	/* The node must not already exist. */
+	void *(*create_node)(const void *parent, const char *name);
+	void *(*find_node_by_prop_value)(const void *prev,
+	                                 const char *propname,
+	                                 const char *propval, int proplen);
+	void *(*find_node_by_compatible)(const void *prev,
+	                                 const char *compat);
+	unsigned long (*finalize)(void);
+	char *(*get_path)(const void *phandle, char *buf, int len);
+};
+extern struct dt_ops dt_ops;
+
+/* Console operations */
+struct console_ops {
+	int	(*open)(void);
+	void	(*write)(const char *buf, int len);
+	void	(*edit_cmdline)(char *buf, int len, unsigned int getline_timeout);
+	void	(*close)(void);
+	void	*data;
+};
+extern struct console_ops console_ops;
+
+/* Serial console operations */
+struct serial_console_data {
+	int		(*open)(void);
+	void		(*putc)(unsigned char c);
+	unsigned char	(*getc)(void);
+	u8		(*tstc)(void);
+	void		(*close)(void);
+};
+
+struct loader_info {
+	void *promptr;
+	unsigned long initrd_addr, initrd_size;
+	char *cmdline;
+	int cmdline_len;
+};
+extern struct loader_info loader_info;
+
+void start(void);
+void fdt_init(void *blob);
+int serial_console_init(void);
+int ns16550_console_init(void *devp, struct serial_console_data *scdp);
+int cpm_console_init(void *devp, struct serial_console_data *scdp);
+int mpc5200_psc_console_init(void *devp, struct serial_console_data *scdp);
+int opal_console_init(void *devp, struct serial_console_data *scdp);
+void *simple_alloc_init(char *base, unsigned long heap_size,
+			unsigned long granularity, unsigned long max_allocs);
+extern void flush_cache(void *, unsigned long);
+int dt_xlate_reg(void *node, int res, unsigned long *addr, unsigned long *size);
+int dt_xlate_addr(void *node, u32 *buf, int buflen, unsigned long *xlated_addr);
+int dt_is_compatible(void *node, const char *compat);
+void dt_get_reg_format(void *node, u32 *naddr, u32 *nsize);
+int dt_get_virtual_reg(void *node, void **addr, int nres);
+
+static inline void *finddevice(const char *name)
+{
+	return (dt_ops.finddevice) ? dt_ops.finddevice(name) : NULL;
+}
+
+static inline int getprop(void *devp, const char *name, void *buf, int buflen)
+{
+	return (dt_ops.getprop) ? dt_ops.getprop(devp, name, buf, buflen) : -1;
+}
+
+static inline int setprop(void *devp, const char *name,
+                          const void *buf, int buflen)
+{
+	return (dt_ops.setprop) ? dt_ops.setprop(devp, name, buf, buflen) : -1;
+}
+#define setprop_val(devp, name, val) \
+	do { \
+		typeof(val) x = (val); \
+		setprop((devp), (name), &x, sizeof(x)); \
+	} while (0)
+
+static inline int setprop_str(void *devp, const char *name, const char *buf)
+{
+	if (dt_ops.setprop)
+		return dt_ops.setprop(devp, name, buf, strlen(buf) + 1);
+
+	return -1;
+}
+
+static inline int del_node(const void *devp)
+{
+	return dt_ops.del_node ? dt_ops.del_node(devp) : -1;
+}
+
+static inline void *get_parent(const char *devp)
+{
+	return dt_ops.get_parent ? dt_ops.get_parent(devp) : NULL;
+}
+
+static inline void *create_node(const void *parent, const char *name)
+{
+	return dt_ops.create_node ? dt_ops.create_node(parent, name) : NULL;
+}
+
+
+static inline void *find_node_by_prop_value(const void *prev,
+                                            const char *propname,
+                                            const char *propval, int proplen)
+{
+	if (dt_ops.find_node_by_prop_value)
+		return dt_ops.find_node_by_prop_value(prev, propname,
+		                                      propval, proplen);
+
+	return NULL;
+}
+
+static inline void *find_node_by_prop_value_str(const void *prev,
+                                                const char *propname,
+                                                const char *propval)
+{
+	return find_node_by_prop_value(prev, propname, propval,
+	                               strlen(propval) + 1);
+}
+
+static inline void *find_node_by_devtype(const void *prev,
+                                         const char *type)
+{
+	return find_node_by_prop_value_str(prev, "device_type", type);
+}
+
+static inline void *find_node_by_alias(const char *alias)
+{
+	void *devp = finddevice("/aliases");
+
+	if (devp) {
+		char path[MAX_PATH_LEN];
+		if (getprop(devp, alias, path, MAX_PATH_LEN) > 0)
+			return finddevice(path);
+	}
+
+	return NULL;
+}
+
+static inline void *find_node_by_compatible(const void *prev,
+                                            const char *compat)
+{
+	if (dt_ops.find_node_by_compatible)
+		return dt_ops.find_node_by_compatible(prev, compat);
+
+	return NULL;
+}
+
+void dt_fixup_memory(u64 start, u64 size);
+void dt_fixup_cpu_clocks(u32 cpufreq, u32 tbfreq, u32 busfreq);
+void dt_fixup_clock(const char *path, u32 freq);
+void dt_fixup_mac_address_by_alias(const char *alias, const u8 *addr);
+void dt_fixup_mac_address(u32 index, const u8 *addr);
+void __dt_fixup_mac_addresses(u32 startindex, ...);
+#define dt_fixup_mac_addresses(...) \
+	__dt_fixup_mac_addresses(0, __VA_ARGS__, NULL)
+
+
+static inline char *get_path(const void *phandle, char *buf, int len)
+{
+	if (dt_ops.get_path)
+		return dt_ops.get_path(phandle, buf, len);
+
+	return NULL;
+}
+
+static inline void *malloc(unsigned long size)
+{
+	return (platform_ops.malloc) ? platform_ops.malloc(size) : NULL;
+}
+
+static inline void free(void *ptr)
+{
+	if (platform_ops.free)
+		platform_ops.free(ptr);
+}
+
+static inline void exit(void)
+{
+	if (platform_ops.exit)
+		platform_ops.exit();
+	for(;;);
+}
+#define fatal(args...) { printf(args); exit(); }
+
+
+#define BSS_STACK(size) \
+	static char _bss_stack[size]; \
+	void *_platform_stack_top = _bss_stack + sizeof(_bss_stack);
+
+extern unsigned long timebase_period_ns;
+void udelay(long delay);
+
+extern char _start[];
+extern char __bss_start[];
+extern char _end[];
+extern char _vmlinux_start[];
+extern char _vmlinux_end[];
+extern char _initrd_start[];
+extern char _initrd_end[];
+extern char _dtb_start[];
+extern char _dtb_end[];
+extern char _esm_blob_start[];
+extern char _esm_blob_end[];
+
+static inline __attribute__((const))
+int __ilog2_u32(u32 n)
+{
+	int bit;
+	asm ("cntlzw %0,%1" : "=r" (bit) : "r" (n));
+	return 31 - bit;
+}
+
+long partial_decompress(void *inbuf, unsigned long input_size, void *outbuf,
+	unsigned long output_size, unsigned long skip);
+
+#endif /* _PPC_BOOT_OPS_H_ */
diff --git a/arch/powerpc/boot/page.h b/arch/powerpc/boot/page.h
new file mode 100644
index 0000000000..c3d55fc8f3
--- /dev/null
+++ b/arch/powerpc/boot/page.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _PPC_BOOT_PAGE_H
+#define _PPC_BOOT_PAGE_H
+/*
+ * Copyright (C) 2001 PPC64 Team, IBM Corp
+ */
+
+#ifdef __ASSEMBLY__
+#define ASM_CONST(x) x
+#else
+#define __ASM_CONST(x) x##UL
+#define ASM_CONST(x) __ASM_CONST(x)
+#endif
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT	12
+#define PAGE_SIZE	(ASM_CONST(1) << PAGE_SHIFT)
+#define PAGE_MASK	(~(PAGE_SIZE-1))
+
+/* align addr on a size boundary - adjust address up/down if needed */
+#define _ALIGN_UP(addr, size)	(((addr)+((size)-1))&(~((typeof(addr))(size)-1)))
+#define _ALIGN_DOWN(addr, size)	((addr)&(~((typeof(addr))(size)-1)))
+
+/* align addr on a size boundary - adjust address up if needed */
+#define _ALIGN(addr,size)     _ALIGN_UP(addr,size)
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr)	_ALIGN(addr, PAGE_SIZE)
+
+#endif				/* _PPC_BOOT_PAGE_H */
diff --git a/arch/powerpc/boot/planetcore.c b/arch/powerpc/boot/planetcore.c
new file mode 100644
index 0000000000..d5f391e342
--- /dev/null
+++ b/arch/powerpc/boot/planetcore.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PlanetCore configuration data support functions
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "stdio.h"
+#include "stdlib.h"
+#include "ops.h"
+#include "planetcore.h"
+#include "io.h"
+
+/* PlanetCore passes information to the OS in the form of
+ * a table of key=value strings, separated by newlines.
+ *
+ * The list is terminated by an empty string (i.e. two
+ * consecutive newlines).
+ *
+ * To make it easier to parse, we first convert all the
+ * newlines into null bytes.
+ */
+
+void planetcore_prepare_table(char *table)
+{
+	do {
+		if (*table == '\n')
+			*table = 0;
+
+		table++;
+	} while (*(table - 1) || *table != '\n');
+
+	*table = 0;
+}
+
+const char *planetcore_get_key(const char *table, const char *key)
+{
+	int keylen = strlen(key);
+
+	do {
+		if (!strncmp(table, key, keylen) && table[keylen] == '=')
+			return table + keylen + 1;
+
+		table += strlen(table) + 1;
+	} while (strlen(table) != 0);
+
+	return NULL;
+}
+
+int planetcore_get_decimal(const char *table, const char *key, u64 *val)
+{
+	const char *str = planetcore_get_key(table, key);
+	if (!str)
+		return 0;
+
+	*val = strtoull(str, NULL, 10);
+	return 1;
+}
+
+int planetcore_get_hex(const char *table, const char *key, u64 *val)
+{
+	const char *str = planetcore_get_key(table, key);
+	if (!str)
+		return 0;
+
+	*val = strtoull(str, NULL, 16);
+	return 1;
+}
+
+static u64 mac_table[4] = {
+	0x000000000000,
+	0x000000800000,
+	0x000000400000,
+	0x000000c00000,
+};
+
+void planetcore_set_mac_addrs(const char *table)
+{
+	u8 addr[4][6];
+	u64 int_addr;
+	u32 i;
+	int j;
+
+	if (!planetcore_get_hex(table, PLANETCORE_KEY_MAC_ADDR, &int_addr))
+		return;
+
+	for (i = 0; i < 4; i++) {
+		u64 this_dev_addr = (int_addr & ~0x000000c00000) |
+		                    mac_table[i];
+
+		for (j = 5; j >= 0; j--) {
+			addr[i][j] = this_dev_addr & 0xff;
+			this_dev_addr >>= 8;
+		}
+
+		dt_fixup_mac_address(i, addr[i]);
+	}
+}
+
+static char prop_buf[MAX_PROP_LEN];
+
+void planetcore_set_stdout_path(const char *table)
+{
+	char *path;
+	const char *label;
+	void *node, *chosen;
+
+	label = planetcore_get_key(table, PLANETCORE_KEY_SERIAL_PORT);
+	if (!label)
+		return;
+
+	node = find_node_by_prop_value_str(NULL, "linux,planetcore-label",
+	                                   label);
+	if (!node)
+		return;
+
+	path = get_path(node, prop_buf, MAX_PROP_LEN);
+	if (!path)
+		return;
+
+	chosen = finddevice("/chosen");
+	if (!chosen)
+		chosen = create_node(NULL, "chosen");
+	if (!chosen)
+		return;
+
+	setprop_str(chosen, "linux,stdout-path", path);
+}
diff --git a/arch/powerpc/boot/planetcore.h b/arch/powerpc/boot/planetcore.h
new file mode 100644
index 0000000000..5311db06c6
--- /dev/null
+++ b/arch/powerpc/boot/planetcore.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_BOOT_PLANETCORE_H_
+#define _PPC_BOOT_PLANETCORE_H_
+
+#include "types.h"
+
+#define PLANETCORE_KEY_BOARD_TYPE   "BO"
+#define PLANETCORE_KEY_BOARD_REV    "BR"
+#define PLANETCORE_KEY_MB_RAM       "D1"
+#define PLANETCORE_KEY_MAC_ADDR     "EA"
+#define PLANETCORE_KEY_FLASH_SPEED  "FS"
+#define PLANETCORE_KEY_IP_ADDR      "IP"
+#define PLANETCORE_KEY_KB_NVRAM     "NV"
+#define PLANETCORE_KEY_PROCESSOR    "PR"
+#define PLANETCORE_KEY_PROC_VARIANT "PV"
+#define PLANETCORE_KEY_SERIAL_BAUD  "SB"
+#define PLANETCORE_KEY_SERIAL_PORT  "SP"
+#define PLANETCORE_KEY_SWITCH       "SW"
+#define PLANETCORE_KEY_TEMP_OFFSET  "TC"
+#define PLANETCORE_KEY_TARGET_IP    "TIP"
+#define PLANETCORE_KEY_CRYSTAL_HZ   "XT"
+
+/* Prepare the table for processing, by turning all newlines
+ * into NULL bytes.
+ */
+void planetcore_prepare_table(char *table);
+
+/* Return the value associated with a given key in text,
+ * decimal, or hex format.
+ *
+ * Returns zero/NULL on failure, non-zero on success.
+ */
+const char *planetcore_get_key(const char *table, const char *key);
+int planetcore_get_decimal(const char *table, const char *key, u64 *val);
+int planetcore_get_hex(const char *table, const char *key, u64 *val);
+
+/* Updates the device tree local-mac-address properties based
+ * on the EA tag.
+ */
+void planetcore_set_mac_addrs(const char *table);
+
+/* Sets the linux,stdout-path in the /chosen node.  This requires the
+ * linux,planetcore-label property in each serial node.
+ */
+void planetcore_set_stdout_path(const char *table);
+
+#endif
diff --git a/arch/powerpc/boot/ppc_asm.h b/arch/powerpc/boot/ppc_asm.h
new file mode 100644
index 0000000000..a66cfd76fa
--- /dev/null
+++ b/arch/powerpc/boot/ppc_asm.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _PPC64_PPC_ASM_H
+#define _PPC64_PPC_ASM_H
+/*
+ *
+ * Definitions used by various bits of low-level assembly code on PowerPC.
+ *
+ * Copyright (C) 1995-1999 Gary Thomas, Paul Mackerras, Cort Dougan.
+ */
+
+/* Condition Register Bit Fields */
+
+#define	cr0	0
+#define	cr1	1
+#define	cr2	2
+#define	cr3	3
+#define	cr4	4
+#define	cr5	5
+#define	cr6	6
+#define	cr7	7
+
+
+/* General Purpose Registers (GPRs) */
+
+#define	r0	0
+#define	r1	1
+#define	r2	2
+#define	r3	3
+#define	r4	4
+#define	r5	5
+#define	r6	6
+#define	r7	7
+#define	r8	8
+#define	r9	9
+#define	r10	10
+#define	r11	11
+#define	r12	12
+#define	r13	13
+#define	r14	14
+#define	r15	15
+#define	r16	16
+#define	r17	17
+#define	r18	18
+#define	r19	19
+#define	r20	20
+#define	r21	21
+#define	r22	22
+#define	r23	23
+#define	r24	24
+#define	r25	25
+#define	r26	26
+#define	r27	27
+#define	r28	28
+#define	r29	29
+#define	r30	30
+#define	r31	31
+
+#define SPRN_TBRL	268
+#define SPRN_TBRU	269
+#define SPRN_HSRR0	0x13A	/* Hypervisor Save/Restore 0 */
+#define SPRN_HSRR1	0x13B	/* Hypervisor Save/Restore 1 */
+
+#define MSR_LE		0x0000000000000001
+
+#define FIXUP_ENDIAN						   \
+	tdi   0,0,0x48;	  /* Reverse endian of b . + 8		*/ \
+	b     $+44;	  /* Skip trampoline if endian is good	*/ \
+	.long 0xa600607d; /* mfmsr r11				*/ \
+	.long 0x01006b69; /* xori r11,r11,1			*/ \
+	.long 0x00004039; /* li r10,0				*/ \
+	.long 0x6401417d; /* mtmsrd r10,1			*/ \
+	.long 0x05009f42; /* bcl 20,31,$+4			*/ \
+	.long 0xa602487d; /* mflr r10				*/ \
+	.long 0x14004a39; /* addi r10,r10,20			*/ \
+	.long 0xa6035a7d; /* mtsrr0 r10				*/ \
+	.long 0xa6037b7d; /* mtsrr1 r11				*/ \
+	.long 0x2400004c  /* rfid				*/
+
+#ifdef CONFIG_PPC_8xx
+#define MFTBL(dest)			mftb dest
+#define MFTBU(dest)			mftbu dest
+#else
+#define MFTBL(dest)			mfspr dest, SPRN_TBRL
+#define MFTBU(dest)			mfspr dest, SPRN_TBRU
+#endif
+
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
+#define LOAD_REG_ADDR(reg,name)			\
+	addis	reg,r2,name@toc@ha;		\
+	addi	reg,reg,name@toc@l
+#else
+#define LOAD_REG_ADDR(reg,name)			\
+	lis	reg,name@ha;			\
+	addi	reg,reg,name@l
+#endif
+
+#endif /* _PPC64_PPC_ASM_H */
diff --git a/arch/powerpc/boot/ppcboot-hotfoot.h b/arch/powerpc/boot/ppcboot-hotfoot.h
new file mode 100644
index 0000000000..4728db95f5
--- /dev/null
+++ b/arch/powerpc/boot/ppcboot-hotfoot.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This interface is used for compatibility with old U-boots *ONLY*.
+ * Please do not imitate or extend this.
+ */
+
+/* 
+ * Unfortunately, the ESTeem Hotfoot board uses a mangled version of 
+ * ppcboot.h for historical reasons, and in the interest of having a 
+ * mainline kernel boot on the production board+bootloader, this was the 
+ * least-offensive solution.  Please direct all flames to:
+ *
+ *  Solomon Peachy <solomon@linux-wlan.com>
+ *
+ * (This header is identical to ppcboot.h except for the 
+ *  TARGET_HOTFOOT bits)
+ */
+
+/*
+ * (C) Copyright 2000, 2001
+ * Wolfgang Denk, DENX Software Engineering, wd@denx.de.
+ */
+
+#ifndef __PPCBOOT_H__
+#define __PPCBOOT_H__
+
+/*
+ * Board information passed to kernel from PPCBoot
+ *
+ * include/asm-ppc/ppcboot.h
+ */
+
+#include "types.h"
+
+typedef struct bd_info {
+	unsigned long	bi_memstart;	/* start of DRAM memory */
+	unsigned long	bi_memsize;	/* size	 of DRAM memory in bytes */
+	unsigned long	bi_flashstart;	/* start of FLASH memory */
+	unsigned long	bi_flashsize;	/* size	 of FLASH memory */
+	unsigned long	bi_flashoffset; /* reserved area for startup monitor */
+	unsigned long	bi_sramstart;	/* start of SRAM memory */
+	unsigned long	bi_sramsize;	/* size	 of SRAM memory */
+#if defined(TARGET_8xx) || defined(TARGET_CPM2) || defined(TARGET_85xx) ||\
+	defined(TARGET_83xx)
+	unsigned long	bi_immr_base;	/* base of IMMR register */
+#endif
+#if defined(TARGET_PPC_MPC52xx)
+	unsigned long   bi_mbar_base;   /* base of internal registers */
+#endif
+	unsigned long	bi_bootflags;	/* boot / reboot flag (for LynxOS) */
+	unsigned long	bi_ip_addr;	/* IP Address */
+	unsigned char	bi_enetaddr[6];	/* Ethernet address */
+#if defined(TARGET_HOTFOOT)
+	/* second onboard ethernet port */
+	unsigned char	bi_enet1addr[6];
+#define HAVE_ENET1ADDR
+#endif /* TARGET_HOOTFOOT */
+	unsigned short	bi_ethspeed;	/* Ethernet speed in Mbps */
+	unsigned long	bi_intfreq;	/* Internal Freq, in MHz */
+	unsigned long	bi_busfreq;	/* Bus Freq, in MHz */
+#if defined(TARGET_CPM2)
+	unsigned long	bi_cpmfreq;	/* CPM_CLK Freq, in MHz */
+	unsigned long	bi_brgfreq;	/* BRG_CLK Freq, in MHz */
+	unsigned long	bi_sccfreq;	/* SCC_CLK Freq, in MHz */
+	unsigned long	bi_vco;		/* VCO Out from PLL, in MHz */
+#endif
+#if defined(TARGET_PPC_MPC52xx)
+	unsigned long   bi_ipbfreq;     /* IPB Bus Freq, in MHz */
+	unsigned long   bi_pcifreq;     /* PCI Bus Freq, in MHz */
+#endif
+	unsigned long	bi_baudrate;	/* Console Baudrate */
+#if defined(TARGET_4xx)
+	unsigned char	bi_s_version[4];	/* Version of this structure */
+	unsigned char	bi_r_version[32];	/* Version of the ROM (IBM) */
+	unsigned int	bi_procfreq;	/* CPU (Internal) Freq, in Hz */
+	unsigned int	bi_plb_busfreq;	/* PLB Bus speed, in Hz */
+	unsigned int	bi_pci_busfreq;	/* PCI Bus speed, in Hz */
+	unsigned char	bi_pci_enetaddr[6];	/* PCI Ethernet MAC address */
+#endif
+#if defined(TARGET_HOTFOOT)
+	unsigned int     bi_pllouta_freq;       /* PLL OUTA speed, in Hz */
+#endif
+#if defined(TARGET_HYMOD)
+	hymod_conf_t	bi_hymod_conf;	/* hymod configuration information */
+#endif
+#if defined(TARGET_EVB64260) || defined(TARGET_405EP) || defined(TARGET_44x) || \
+	defined(TARGET_85xx) ||	defined(TARGET_83xx) || defined(TARGET_HAS_ETH1)
+	/* second onboard ethernet port */
+	unsigned char	bi_enet1addr[6];
+#define HAVE_ENET1ADDR
+#endif
+#if defined(TARGET_EVB64260) || defined(TARGET_440GX) || \
+    defined(TARGET_85xx) || defined(TARGET_HAS_ETH2)
+	/* third onboard ethernet ports */
+	unsigned char	bi_enet2addr[6];
+#define HAVE_ENET2ADDR
+#endif
+#if defined(TARGET_440GX) || defined(TARGET_HAS_ETH3)
+	/* fourth onboard ethernet ports */
+	unsigned char	bi_enet3addr[6];
+#define HAVE_ENET3ADDR
+#endif
+#if defined(TARGET_HOTFOOT)
+        int             bi_phynum[2];           /* Determines phy mapping */
+        int             bi_phymode[2];          /* Determines phy mode */
+#endif
+#if defined(TARGET_4xx)
+	unsigned int	bi_opbfreq;		/* OB clock in Hz */
+	int		bi_iic_fast[2];		/* Use fast i2c mode */
+#endif
+#if defined(TARGET_440GX)
+	int		bi_phynum[4];		/* phy mapping */
+	int		bi_phymode[4];		/* phy mode */
+#endif
+} bd_t;
+
+#define bi_tbfreq	bi_intfreq
+
+#endif	/* __PPCBOOT_H__ */
diff --git a/arch/powerpc/boot/ppcboot.h b/arch/powerpc/boot/ppcboot.h
new file mode 100644
index 0000000000..a78b0b2576
--- /dev/null
+++ b/arch/powerpc/boot/ppcboot.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This interface is used for compatibility with old U-boots *ONLY*.
+ * Please do not imitate or extend this.
+ */
+
+/*
+ * (C) Copyright 2000, 2001
+ * Wolfgang Denk, DENX Software Engineering, wd@denx.de.
+ */
+
+#ifndef __PPCBOOT_H__
+#define __PPCBOOT_H__
+
+/*
+ * Board information passed to kernel from PPCBoot
+ *
+ * include/asm-ppc/ppcboot.h
+ */
+
+#include "types.h"
+
+typedef struct bd_info {
+	unsigned long	bi_memstart;	/* start of DRAM memory */
+	unsigned long	bi_memsize;	/* size	 of DRAM memory in bytes */
+	unsigned long	bi_flashstart;	/* start of FLASH memory */
+	unsigned long	bi_flashsize;	/* size	 of FLASH memory */
+	unsigned long	bi_flashoffset; /* reserved area for startup monitor */
+	unsigned long	bi_sramstart;	/* start of SRAM memory */
+	unsigned long	bi_sramsize;	/* size	 of SRAM memory */
+#if defined(TARGET_8xx) || defined(TARGET_CPM2) || defined(TARGET_85xx) ||\
+	defined(TARGET_83xx) || defined(TARGET_86xx)
+	unsigned long	bi_immr_base;	/* base of IMMR register */
+#endif
+#if defined(TARGET_PPC_MPC52xx)
+	unsigned long   bi_mbar_base;   /* base of internal registers */
+#endif
+	unsigned long	bi_bootflags;	/* boot / reboot flag (for LynxOS) */
+	unsigned long	bi_ip_addr;	/* IP Address */
+	unsigned char	bi_enetaddr[6];	/* Ethernet address */
+	unsigned short	bi_ethspeed;	/* Ethernet speed in Mbps */
+	unsigned long	bi_intfreq;	/* Internal Freq, in MHz */
+	unsigned long	bi_busfreq;	/* Bus Freq, in MHz */
+#if defined(TARGET_CPM2)
+	unsigned long	bi_cpmfreq;	/* CPM_CLK Freq, in MHz */
+	unsigned long	bi_brgfreq;	/* BRG_CLK Freq, in MHz */
+	unsigned long	bi_sccfreq;	/* SCC_CLK Freq, in MHz */
+	unsigned long	bi_vco;		/* VCO Out from PLL, in MHz */
+#endif
+#if defined(TARGET_PPC_MPC52xx)
+	unsigned long   bi_ipbfreq;     /* IPB Bus Freq, in MHz */
+	unsigned long   bi_pcifreq;     /* PCI Bus Freq, in MHz */
+#endif
+	unsigned long	bi_baudrate;	/* Console Baudrate */
+#if defined(TARGET_4xx)
+	unsigned char	bi_s_version[4];	/* Version of this structure */
+	unsigned char	bi_r_version[32];	/* Version of the ROM (IBM) */
+	unsigned int	bi_procfreq;	/* CPU (Internal) Freq, in Hz */
+	unsigned int	bi_plb_busfreq;	/* PLB Bus speed, in Hz */
+	unsigned int	bi_pci_busfreq;	/* PCI Bus speed, in Hz */
+	unsigned char	bi_pci_enetaddr[6];	/* PCI Ethernet MAC address */
+#endif
+#if defined(TARGET_HYMOD)
+	hymod_conf_t	bi_hymod_conf;	/* hymod configuration information */
+#endif
+#if defined(TARGET_EVB64260) || defined(TARGET_405EP) || defined(TARGET_44x) || \
+	defined(TARGET_85xx) ||	defined(TARGET_83xx) || defined(TARGET_HAS_ETH1)
+	/* second onboard ethernet port */
+	unsigned char	bi_enet1addr[6];
+#define HAVE_ENET1ADDR
+#endif
+#if defined(TARGET_EVB64260) || defined(TARGET_440GX) || \
+    defined(TARGET_85xx) || defined(TARGET_HAS_ETH2)
+	/* third onboard ethernet ports */
+	unsigned char	bi_enet2addr[6];
+#define HAVE_ENET2ADDR
+#endif
+#if defined(TARGET_440GX) || defined(TARGET_HAS_ETH3)
+	/* fourth onboard ethernet ports */
+	unsigned char	bi_enet3addr[6];
+#define HAVE_ENET3ADDR
+#endif
+#if defined(TARGET_4xx)
+	unsigned int	bi_opbfreq;		/* OB clock in Hz */
+	int		bi_iic_fast[2];		/* Use fast i2c mode */
+#endif
+#if defined(TARGET_440GX)
+	int		bi_phynum[4];		/* phy mapping */
+	int		bi_phymode[4];		/* phy mode */
+#endif
+} bd_t;
+
+#define bi_tbfreq	bi_intfreq
+
+#endif	/* __PPCBOOT_H__ */
diff --git a/arch/powerpc/boot/pq2.c b/arch/powerpc/boot/pq2.c
new file mode 100644
index 0000000000..de27f1c072
--- /dev/null
+++ b/arch/powerpc/boot/pq2.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PowerQUICC II support functions
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "types.h"
+#include "fsl-soc.h"
+#include "pq2.h"
+#include "stdio.h"
+#include "io.h"
+
+#define PQ2_SCCR (0x10c80/4) /* System Clock Configuration Register */
+#define PQ2_SCMR (0x10c88/4) /* System Clock Mode Register */
+
+static int pq2_corecnf_map[] = {
+	3, 2, 2, 2, 4, 4, 5, 9, 6, 11, 8, 10, 3, 12, 7, -1,
+	6, 5, 13, 2, 14, 4, 15, 9, 0, 11, 8, 10, 16, 12, 7, -1
+};
+
+/* Get various clocks from crystal frequency.
+ * Returns zero on failure and non-zero on success.
+ */
+int pq2_get_clocks(u32 crystal, u32 *sysfreq, u32 *corefreq,
+                   u32 *timebase, u32 *brgfreq)
+{
+	u32 *immr;
+	u32 sccr, scmr, mainclk, busclk;
+	int corecnf, busdf, plldf, pllmf, dfbrg;
+
+	immr = fsl_get_immr();
+	if (!immr) {
+		printf("pq2_get_clocks: Couldn't get IMMR base.\r\n");
+		return 0;
+	}
+
+	sccr = in_be32(&immr[PQ2_SCCR]);
+	scmr = in_be32(&immr[PQ2_SCMR]);
+
+	dfbrg = sccr & 3;
+	corecnf = (scmr >> 24) & 0x1f;
+	busdf = (scmr >> 20) & 0xf;
+	plldf = (scmr >> 12) & 1;
+	pllmf = scmr & 0xfff;
+
+	mainclk = crystal * (pllmf + 1) / (plldf + 1);
+	busclk = mainclk / (busdf + 1);
+
+	if (sysfreq)
+		*sysfreq = mainclk / 2;
+	if (timebase)
+		*timebase = busclk / 4;
+	if (brgfreq)
+		*brgfreq = mainclk / (1 << ((dfbrg + 1) * 2));
+
+	if (corefreq) {
+		int coremult = pq2_corecnf_map[corecnf];
+
+		if (coremult < 0)
+			*corefreq = mainclk / 2;
+		else if (coremult == 0)
+			return 0;
+		else
+			*corefreq = busclk * coremult / 2;
+	}
+
+	return 1;
+}
+
+/* Set common device tree fields based on the given clock frequencies. */
+void pq2_set_clocks(u32 sysfreq, u32 corefreq, u32 timebase, u32 brgfreq)
+{
+	void *node;
+
+	dt_fixup_cpu_clocks(corefreq, timebase, sysfreq);
+
+	node = finddevice("/soc/cpm");
+	if (node)
+		setprop(node, "clock-frequency", &sysfreq, 4);
+
+	node = finddevice("/soc/cpm/brg");
+	if (node)
+		setprop(node, "clock-frequency", &brgfreq, 4);
+}
+
+int pq2_fixup_clocks(u32 crystal)
+{
+	u32 sysfreq, corefreq, timebase, brgfreq;
+
+	if (!pq2_get_clocks(crystal, &sysfreq, &corefreq, &timebase, &brgfreq))
+		return 0;
+
+	pq2_set_clocks(sysfreq, corefreq, timebase, brgfreq);
+	return 1;
+}
diff --git a/arch/powerpc/boot/pq2.h b/arch/powerpc/boot/pq2.h
new file mode 100644
index 0000000000..f577b3bec6
--- /dev/null
+++ b/arch/powerpc/boot/pq2.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_BOOT_PQ2_H_
+#define _PPC_BOOT_PQ2_H_
+
+#include "types.h"
+
+int pq2_get_clocks(u32 crystal, u32 *sysfreq, u32 *corefreq,
+                   u32 *timebase, u32 *brgfreq);
+void pq2_set_clocks(u32 sysfreq, u32 corefreq, u32 timebase, u32 brgfreq);
+int pq2_fixup_clocks(u32 crystal);
+
+#endif
diff --git a/arch/powerpc/boot/ps3-head.S b/arch/powerpc/boot/ps3-head.S
new file mode 100644
index 0000000000..0a4ebfcc39
--- /dev/null
+++ b/arch/powerpc/boot/ps3-head.S
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  PS3 bootwrapper entry.
+ *
+ *  Copyright (C) 2007 Sony Computer Entertainment Inc.
+ *  Copyright 2007 Sony Corp.
+ */
+
+#include "ppc_asm.h"
+
+	.machine "ppc64"
+
+	.text
+
+/*
+ * __system_reset_overlay - The PS3 first stage entry.
+ *
+ * The bootwraper build script copies the 512 bytes at symbol
+ * __system_reset_overlay to offset 0x100 of the rom image.  This symbol
+ * must occupy 512 or less bytes.
+ *
+ * The PS3 has a single processor with two threads.
+ */
+
+	.globl __system_reset_overlay
+__system_reset_overlay:
+
+	/* Switch to 32-bit mode. */
+
+	mfmsr	r9
+	clrldi	r9,r9,1
+	mtmsrd	r9
+	nop
+
+	/* Get thread number in r3 and branch. */
+
+	mfspr	r3, 0x88
+	cntlzw.	r3, r3
+	beq	1f
+
+	/* Secondary goes to __secondary_hold in kernel. */
+
+	li	r4, 0x60
+	mtctr	r4
+	bctr
+
+1:
+	/* Primary delays then goes to _zimage_start in wrapper. */
+
+	or	31, 31, 31 /* db16cyc */
+	or	31, 31, 31 /* db16cyc */
+
+	lis	r4, _zimage_start@ha
+	addi	r4, r4, _zimage_start@l
+	mtctr	r4
+	bctr
+
+	. = __system_reset_overlay + 512
+
+/*
+ * __system_reset_kernel - Place holder for the kernel reset vector.
+ *
+ * The bootwrapper build script copies 512 bytes from offset 0x100
+ * of the rom image to the symbol __system_reset_kernel.  At runtime
+ * the bootwrapper program copies the 512 bytes at __system_reset_kernel
+ * to ram address 0x100.  This symbol must occupy 512 bytes.
+ */
+
+	.globl __system_reset_kernel
+__system_reset_kernel:
+
+	. = __system_reset_kernel + 512
diff --git a/arch/powerpc/boot/ps3-hvcall.S b/arch/powerpc/boot/ps3-hvcall.S
new file mode 100644
index 0000000000..ff74102e8a
--- /dev/null
+++ b/arch/powerpc/boot/ps3-hvcall.S
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  PS3 bootwrapper hvcalls.
+ *
+ *  Copyright (C) 2007 Sony Computer Entertainment Inc.
+ *  Copyright 2007 Sony Corp.
+ */
+
+#include "ppc_asm.h"
+
+	.machine "ppc64"
+
+/*
+ * The PS3 hypervisor uses a 64 bit "C" language calling convention.
+ * The routines here marshal arguments between the 32 bit wrapper
+ * program and the 64 bit hvcalls.
+ *
+ *  wrapper           lv1
+ *  32-bit (h,l)      64-bit
+ *
+ *  1: r3,r4          <-> r3
+ *  2: r5,r6          <-> r4
+ *  3: r7,r8          <-> r5
+ *  4: r9,r10         <-> r6
+ *  5: 8(r1),12(r1)   <-> r7
+ *  6: 16(r1),20(r1)  <-> r8
+ *  7: 24(r1),28(r1)  <-> r9
+ *  8: 32(r1),36(r1)  <-> r10
+ *
+ */
+
+.macro GLOBAL name
+	.section ".text"
+	.balign 4
+	.globl \name
+\name:
+.endm
+
+.macro NO_SUPPORT name
+	GLOBAL \name
+	b ps3_no_support
+.endm
+
+.macro HVCALL num
+	li r11, \num
+	.long 0x44000022
+	extsw r3, r3
+.endm
+
+.macro SAVE_LR offset=4
+	mflr r0
+	stw r0, \offset(r1)
+.endm
+
+.macro LOAD_LR offset=4
+	lwz r0, \offset(r1)
+	mtlr r0
+.endm
+
+.macro LOAD_64_REG target,high,low
+	sldi r11, \high, 32
+	or \target, r11, \low
+.endm
+
+.macro LOAD_64_STACK target,offset
+	ld \target, \offset(r1)
+.endm
+
+.macro LOAD_R3
+	LOAD_64_REG r3,r3,r4
+.endm
+
+.macro LOAD_R4
+	LOAD_64_REG r4,r5,r6
+.endm
+
+.macro LOAD_R5
+	LOAD_64_REG r5,r7,r8
+.endm
+
+.macro LOAD_R6
+	LOAD_64_REG r6,r9,r10
+.endm
+
+.macro LOAD_R7
+	LOAD_64_STACK r7,8
+.endm
+
+.macro LOAD_R8
+	LOAD_64_STACK r8,16
+.endm
+
+.macro LOAD_R9
+	LOAD_64_STACK r9,24
+.endm
+
+.macro LOAD_R10
+	LOAD_64_STACK r10,32
+.endm
+
+.macro LOAD_REGS_0
+	stwu 1,-16(1)
+	stw 3, 8(1)
+.endm
+
+.macro LOAD_REGS_5
+	LOAD_R3
+	LOAD_R4
+	LOAD_R5
+	LOAD_R6
+	LOAD_R7
+.endm
+
+.macro LOAD_REGS_6
+	LOAD_REGS_5
+	LOAD_R8
+.endm
+
+.macro LOAD_REGS_8
+	LOAD_REGS_6
+	LOAD_R9
+	LOAD_R10
+.endm
+
+.macro STORE_REGS_0_1
+	lwz r11, 8(r1)
+	std r4, 0(r11)
+	mr r4, r3
+	li r3, 0
+	addi r1,r1,16
+.endm
+
+.macro STORE_REGS_5_2
+	lwz r11, 16(r1)
+	std r4, 0(r11)
+	lwz r11, 20(r1)
+	std r5, 0(r11)
+.endm
+
+.macro STORE_REGS_6_1
+	lwz r11, 24(r1)
+	std r4, 0(r11)
+.endm
+
+GLOBAL lv1_get_logical_ppe_id
+	SAVE_LR
+	LOAD_REGS_0
+	HVCALL 69
+	STORE_REGS_0_1
+	LOAD_LR
+	blr
+
+GLOBAL lv1_get_logical_partition_id
+	SAVE_LR
+	LOAD_REGS_0
+	HVCALL 74
+	STORE_REGS_0_1
+	LOAD_LR
+	blr
+
+GLOBAL lv1_get_repository_node_value
+	SAVE_LR
+	LOAD_REGS_5
+	HVCALL 91
+	STORE_REGS_5_2
+	LOAD_LR
+	blr
+
+GLOBAL lv1_panic
+	SAVE_LR
+	LOAD_REGS_8
+	HVCALL 255
+	LOAD_LR
+	blr
diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c
new file mode 100644
index 0000000000..f157717ae8
--- /dev/null
+++ b/arch/powerpc/boot/ps3.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  PS3 bootwrapper support.
+ *
+ *  Copyright (C) 2007 Sony Computer Entertainment Inc.
+ *  Copyright 2007 Sony Corp.
+ */
+
+#include <stdarg.h>
+#include <stddef.h>
+#include "types.h"
+#include "elf.h"
+#include "string.h"
+#include "stdio.h"
+#include "page.h"
+#include "ops.h"
+
+extern int lv1_panic(u64 in_1);
+extern int lv1_get_logical_partition_id(u64 *out_1);
+extern int lv1_get_logical_ppe_id(u64 *out_1);
+extern int lv1_get_repository_node_value(u64 in_1, u64 in_2, u64 in_3,
+	u64 in_4, u64 in_5, u64 *out_1, u64 *out_2);
+
+BSS_STACK(4096);
+
+/* A buffer that may be edited by tools operating on a zImage binary so as to
+ * edit the command line passed to vmlinux (by setting /chosen/bootargs).
+ * The buffer is put in it's own section so that tools may locate it easier.
+ */
+
+static char cmdline[BOOT_COMMAND_LINE_SIZE]
+	__attribute__((__section__("__builtin_cmdline")));
+
+static void prep_cmdline(void *chosen)
+{
+	if (cmdline[0] == '\0')
+		getprop(chosen, "bootargs", cmdline, BOOT_COMMAND_LINE_SIZE-1);
+	else
+		setprop_str(chosen, "bootargs", cmdline);
+
+	printf("cmdline: '%s'\n", cmdline);
+}
+
+static void ps3_console_write(const char *buf, int len)
+{
+}
+
+static void ps3_exit(void)
+{
+	printf("ps3_exit\n");
+
+	/* lv1_panic will shutdown the lpar. */
+
+	lv1_panic(0); /* zero = do not reboot */
+	while (1);
+}
+
+static int ps3_repository_read_rm_size(u64 *rm_size)
+{
+	int result;
+	u64 lpar_id;
+	u64 ppe_id;
+	u64 v2;
+
+	result = lv1_get_logical_partition_id(&lpar_id);
+
+	if (result)
+		return -1;
+
+	result = lv1_get_logical_ppe_id(&ppe_id);
+
+	if (result)
+		return -1;
+
+	/*
+	 * n1: 0000000062690000 : ....bi..
+	 * n2: 7075000000000000 : pu......
+	 * n3: 0000000000000001 : ........
+	 * n4: 726d5f73697a6500 : rm_size.
+	*/
+
+	result = lv1_get_repository_node_value(lpar_id, 0x0000000062690000ULL,
+		0x7075000000000000ULL, ppe_id, 0x726d5f73697a6500ULL, rm_size,
+		&v2);
+
+	printf("%s:%d: ppe_id  %lu \n", __func__, __LINE__,
+		(unsigned long)ppe_id);
+	printf("%s:%d: lpar_id %lu \n", __func__, __LINE__,
+		(unsigned long)lpar_id);
+	printf("%s:%d: rm_size %llxh \n", __func__, __LINE__, *rm_size);
+
+	return result ? -1 : 0;
+}
+
+void ps3_copy_vectors(void)
+{
+	extern char __system_reset_kernel[];
+
+	memcpy((void *)0x100, __system_reset_kernel, 512);
+	flush_cache((void *)0x100, 512);
+}
+
+void platform_init(void)
+{
+	const u32 heapsize = 0x1000000 - (u32)_end; /* 16MiB */
+	void *chosen;
+	unsigned long ft_addr;
+	u64 rm_size;
+
+	console_ops.write = ps3_console_write;
+	platform_ops.exit = ps3_exit;
+
+	printf("\n-- PS3 bootwrapper --\n");
+
+	simple_alloc_init(_end, heapsize, 32, 64);
+	fdt_init(_dtb_start);
+
+	chosen = finddevice("/chosen");
+
+	ps3_repository_read_rm_size(&rm_size);
+	dt_fixup_memory(0, rm_size);
+
+	if (&_initrd_end > &_initrd_start) {
+		setprop_val(chosen, "linux,initrd-start", (u32)(_initrd_start));
+		setprop_val(chosen, "linux,initrd-end", (u32)(_initrd_end));
+	}
+
+	prep_cmdline(chosen);
+
+	ft_addr = dt_ops.finalize();
+
+	ps3_copy_vectors();
+
+	printf(" flat tree at 0x%lx\n\r", ft_addr);
+
+	((kernel_entry_t)0)(ft_addr, 0, NULL);
+
+	ps3_exit();
+}
diff --git a/arch/powerpc/boot/pseries-head.S b/arch/powerpc/boot/pseries-head.S
new file mode 100644
index 0000000000..1b1a638ce6
--- /dev/null
+++ b/arch/powerpc/boot/pseries-head.S
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "ppc_asm.h"
+
+	.text
+
+	.globl _zimage_start
+_zimage_start:
+	FIXUP_ENDIAN
+	b _zimage_start_lib
diff --git a/arch/powerpc/boot/redboot-83xx.c b/arch/powerpc/boot/redboot-83xx.c
new file mode 100644
index 0000000000..b610e78b43
--- /dev/null
+++ b/arch/powerpc/boot/redboot-83xx.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * RedBoot firmware support
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ * Copyright (c) 2008 Codehermit
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "redboot.h"
+#include "fsl-soc.h"
+#include "io.h"
+
+static bd_t bd;
+BSS_STACK(4096);
+
+#define MHZ(x)	((x + 500000) / 1000000)
+
+static void platform_fixups(void)
+{
+	void *node;
+
+	dt_fixup_memory(bd.bi_memstart, bd.bi_memsize);
+	dt_fixup_mac_addresses(bd.bi_enetaddr);
+	dt_fixup_cpu_clocks(bd.bi_intfreq, bd.bi_busfreq / 16, bd.bi_busfreq);
+
+	node = finddevice("/soc/cpm/brg");
+	if (node) {
+		printf("BRG clock-frequency <- 0x%x (%dMHz)\r\n",
+		       bd.bi_busfreq, MHZ(bd.bi_busfreq));
+		setprop(node, "clock-frequency",  &bd.bi_busfreq, 4);
+	}
+
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+		   unsigned long r6, unsigned long r7)
+{
+	memcpy(&bd, (char *)r3, sizeof(bd));
+
+	if (bd.bi_tag != 0x42444944)
+		return;
+
+	simple_alloc_init(_end,
+			  bd.bi_memstart + bd.bi_memsize - (unsigned long)_end,
+			  32, 64);
+
+	fdt_init(_dtb_start);
+	serial_console_init();
+	platform_ops.fixups = platform_fixups;
+
+	loader_info.cmdline = (char *)bd.bi_cmdline;
+	loader_info.cmdline_len = strlen((char *)bd.bi_cmdline);
+}
diff --git a/arch/powerpc/boot/redboot-8xx.c b/arch/powerpc/boot/redboot-8xx.c
new file mode 100644
index 0000000000..d7006eeaf5
--- /dev/null
+++ b/arch/powerpc/boot/redboot-8xx.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * RedBoot firmware support
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "redboot.h"
+#include "fsl-soc.h"
+#include "io.h"
+
+static bd_t bd;
+BSS_STACK(4096);
+
+#define MHZ(x)	((x + 500000) / 1000000)
+
+static void platform_fixups(void)
+{
+	void *node;
+
+	dt_fixup_memory(bd.bi_memstart, bd.bi_memsize);
+	dt_fixup_mac_addresses(bd.bi_enetaddr);
+	dt_fixup_cpu_clocks(bd.bi_intfreq, bd.bi_busfreq / 16, bd.bi_busfreq);
+
+	node = finddevice("/soc/cpm/brg");
+	if (node) {
+		printf("BRG clock-frequency <- 0x%x (%dMHz)\r\n",
+		       bd.bi_busfreq, MHZ(bd.bi_busfreq));
+		setprop(node, "clock-frequency",  &bd.bi_busfreq, 4);
+	}
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                   unsigned long r6, unsigned long r7)
+{
+	memcpy(&bd, (char *)r3, sizeof(bd));
+
+	if (bd.bi_tag != 0x42444944)
+		return;
+
+	simple_alloc_init(_end,
+	                  bd.bi_memstart + bd.bi_memsize - (unsigned long)_end,
+	                  32, 64);
+
+	fdt_init(_dtb_start);
+	serial_console_init();
+	platform_ops.fixups = platform_fixups;
+
+	loader_info.cmdline = (char *)bd.bi_cmdline;
+	loader_info.cmdline_len = strlen((char *)bd.bi_cmdline);
+}
diff --git a/arch/powerpc/boot/redboot.h b/arch/powerpc/boot/redboot.h
new file mode 100644
index 0000000000..8f319b1add
--- /dev/null
+++ b/arch/powerpc/boot/redboot.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_REDBOOT_H
+#define _PPC_REDBOOT_H
+
+//=========================================================================
+// include/asm-ppc/redboot.h
+//   Copyright (c) 2002, 2003 Gary Thomas (<gary@mlbassoc.com>
+//   Copyright (c) 1997 Dan Malek (dmalek@jlc.net)
+
+//
+// Board specific details, as provided by RedBoot
+//
+
+/* A Board Information structure that is given to a program when
+ * RedBoot starts it up.  Note: not all fields make sense for all
+ * architectures and it's up to the platform specific code to fill
+ * in the details.
+ */
+typedef struct bd_info {
+    unsigned int   bi_tag;        /* Should be 0x42444944 "BDID" */
+    unsigned int   bi_size;       /* Size of this structure */
+    unsigned int   bi_revision;   /* revision of this structure */
+    unsigned int   bi_bdate;      /* bootstrap date, i.e. 0x19971106 */
+    unsigned int   bi_memstart;   /* Memory start address */
+    unsigned int   bi_memsize;    /* Memory (end) size in bytes */
+    unsigned int   bi_intfreq;    /* Internal Freq, in Hz */
+    unsigned int   bi_busfreq;    /* Bus Freq, in Hz */
+    unsigned int   bi_cpmfreq;    /* CPM Freq, in Hz */
+    unsigned int   bi_brgfreq;    /* BRG Freq, in Hz */
+    unsigned int   bi_vco;        /* VCO Out from PLL */
+    unsigned int   bi_pci_freq;   /* PCI Freq, in Hz */
+    unsigned int   bi_baudrate;   /* Default console baud rate */
+    unsigned int   bi_immr;       /* IMMR when called from boot rom */
+    unsigned char  bi_enetaddr[6];
+    unsigned int   bi_flashbase;  /* Physical address of FLASH memory */
+    unsigned int   bi_flashsize;  /* Length of FLASH memory */
+    int            bi_flashwidth; /* Width (8,16,32,64) */
+    unsigned char *bi_cmdline;    /* Pointer to command line */
+    unsigned char  bi_esa[3][6];  /* Ethernet station addresses */
+    unsigned int   bi_ramdisk_begin, bi_ramdisk_end;
+    struct {                      /* Information about [main] video screen */
+        short x_res;              /*   Horizontal resolution in pixels */
+        short y_res;              /*   Vertical resolution in pixels */
+        short bpp;                /*   Bits/pixel */
+        short mode;               /*   Type of pixels (packed, indexed) */
+        unsigned long fb;         /*   Pointer to frame buffer (pixel) memory */
+    } bi_video;
+    void         (*bi_cputc)(char);   /* Write a character to the RedBoot console */
+    char         (*bi_cgetc)(void);   /* Read a character from the RedBoot console */
+    int          (*bi_ctstc)(void);   /* Test for input on the RedBoot console */
+} bd_t;
+
+#define BI_REV 0x0102    /* Version 1.02 */
+
+#define bi_pci_busfreq bi_pci_freq
+#define bi_immr_base   bi_immr
+#endif
diff --git a/arch/powerpc/boot/reg.h b/arch/powerpc/boot/reg.h
new file mode 100644
index 0000000000..fd8f4fcbfc
--- /dev/null
+++ b/arch/powerpc/boot/reg.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _PPC_BOOT_REG_H
+#define _PPC_BOOT_REG_H
+/*
+ * Copyright 2007 Davud Gibson, IBM Corporation.
+ */
+
+static inline u32 mfpvr(void)
+{
+	u32 pvr;
+	asm volatile ("mfpvr	%0" : "=r"(pvr));
+	return pvr;
+}
+
+#define __stringify_1(x)	#x
+#define __stringify(x)		__stringify_1(x)
+
+#define mfspr(rn)	({unsigned long rval; \
+			asm volatile("mfspr %0," __stringify(rn) \
+				: "=r" (rval)); rval; })
+#define mtspr(rn, v)	asm volatile("mtspr " __stringify(rn) ",%0" : : "r" (v))
+
+register void *__stack_pointer asm("r1");
+#define get_sp()	(__stack_pointer)
+
+#endif	/* _PPC_BOOT_REG_H */
diff --git a/arch/powerpc/boot/rs6000.h b/arch/powerpc/boot/rs6000.h
new file mode 100644
index 0000000000..a9d879155e
--- /dev/null
+++ b/arch/powerpc/boot/rs6000.h
@@ -0,0 +1,244 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* IBM RS/6000 "XCOFF" file definitions for BFD.
+   Copyright (C) 1990, 1991 Free Software Foundation, Inc.
+   FIXME: Can someone provide a transliteration of this name into ASCII?
+   Using the following chars caused a compiler warning on HIUX (so I replaced
+   them with octal escapes), and isn't useful without an understanding of what
+   character set it is.
+   Written by Mimi Ph\373\364ng-Th\345o V\365 of IBM
+   and John Gilmore of Cygnus Support.  */
+
+/********************** FILE HEADER **********************/
+
+struct external_filehdr {
+	char f_magic[2];	/* magic number			*/
+	char f_nscns[2];	/* number of sections		*/
+	char f_timdat[4];	/* time & date stamp		*/
+	char f_symptr[4];	/* file pointer to symtab	*/
+	char f_nsyms[4];	/* number of symtab entries	*/
+	char f_opthdr[2];	/* sizeof(optional hdr)		*/
+	char f_flags[2];	/* flags			*/
+};
+
+        /* IBM RS/6000 */
+#define U802WRMAGIC     0730    /* writeable text segments **chh**      */
+#define U802ROMAGIC     0735    /* readonly sharable text segments      */
+#define U802TOCMAGIC    0737    /* readonly text segments and TOC       */
+
+#define BADMAG(x)	\
+	((x).f_magic != U802ROMAGIC && (x).f_magic != U802WRMAGIC && \
+	 (x).f_magic != U802TOCMAGIC)
+
+#define	FILHDR	struct external_filehdr
+#define	FILHSZ	20
+
+
+/********************** AOUT "OPTIONAL HEADER" **********************/
+
+
+typedef struct
+{
+  unsigned char	magic[2];	/* type of file			*/
+  unsigned char	vstamp[2];	/* version stamp		*/
+  unsigned char	tsize[4];	/* text size in bytes, padded to FW bdry */
+  unsigned char	dsize[4];	/* initialized data "  "	*/
+  unsigned char	bsize[4];	/* uninitialized data "   "	*/
+  unsigned char	entry[4];	/* entry pt.			*/
+  unsigned char	text_start[4];	/* base of text used for this file */
+  unsigned char	data_start[4];	/* base of data used for this file */
+  unsigned char	o_toc[4];	/* address of TOC */
+  unsigned char	o_snentry[2];	/* section number of entry point */
+  unsigned char	o_sntext[2];	/* section number of .text section */
+  unsigned char	o_sndata[2];	/* section number of .data section */
+  unsigned char	o_sntoc[2];	/* section number of TOC */
+  unsigned char	o_snloader[2];	/* section number of .loader section */
+  unsigned char	o_snbss[2];	/* section number of .bss section */
+  unsigned char	o_algntext[2];	/* .text alignment */
+  unsigned char	o_algndata[2];	/* .data alignment */
+  unsigned char	o_modtype[2];	/* module type (??) */
+  unsigned char o_cputype[2];	/* cpu type */
+  unsigned char	o_maxstack[4];	/* max stack size (??) */
+  unsigned char o_maxdata[4];	/* max data size (??) */
+  unsigned char	o_resv2[12];	/* reserved */
+}
+AOUTHDR;
+
+#define AOUTSZ 72
+#define SMALL_AOUTSZ (28)
+#define AOUTHDRSZ 72
+
+#define	RS6K_AOUTHDR_OMAGIC	0x0107	/* old: text & data writeable */
+#define	RS6K_AOUTHDR_NMAGIC	0x0108	/* new: text r/o, data r/w */
+#define	RS6K_AOUTHDR_ZMAGIC	0x010B	/* paged: text r/o, both page-aligned */
+
+
+/********************** SECTION HEADER **********************/
+
+
+struct external_scnhdr {
+	char		s_name[8];	/* section name			*/
+	char		s_paddr[4];	/* physical address, aliased s_nlib */
+	char		s_vaddr[4];	/* virtual address		*/
+	char		s_size[4];	/* section size			*/
+	char		s_scnptr[4];	/* file ptr to raw data for section */
+	char		s_relptr[4];	/* file ptr to relocation	*/
+	char		s_lnnoptr[4];	/* file ptr to line numbers	*/
+	char		s_nreloc[2];	/* number of relocation entries	*/
+	char		s_nlnno[2];	/* number of line number entries*/
+	char		s_flags[4];	/* flags			*/
+};
+
+/*
+ * names of "special" sections
+ */
+#define _TEXT	".text"
+#define _DATA	".data"
+#define _BSS	".bss"
+#define _PAD	".pad"
+#define _LOADER	".loader"
+
+#define	SCNHDR	struct external_scnhdr
+#define	SCNHSZ	40
+
+/* XCOFF uses a special .loader section with type STYP_LOADER.  */
+#define STYP_LOADER 0x1000
+
+/* XCOFF uses a special .debug section with type STYP_DEBUG.  */
+#define STYP_DEBUG 0x2000
+
+/* XCOFF handles line number or relocation overflow by creating
+   another section header with STYP_OVRFLO set.  */
+#define STYP_OVRFLO 0x8000
+
+/********************** LINE NUMBERS **********************/
+
+/* 1 line number entry for every "breakpointable" source line in a section.
+ * Line numbers are grouped on a per function basis; first entry in a function
+ * grouping will have l_lnno = 0 and in place of physical address will be the
+ * symbol table index of the function name.
+ */
+struct external_lineno {
+	union {
+		char l_symndx[4];	/* function name symbol index, iff l_lnno == 0*/
+		char l_paddr[4];	/* (physical) address of line number	*/
+	} l_addr;
+	char l_lnno[2];	/* line number		*/
+};
+
+
+#define	LINENO	struct external_lineno
+#define	LINESZ	6
+
+
+/********************** SYMBOLS **********************/
+
+#define E_SYMNMLEN	8	/* # characters in a symbol name	*/
+#define E_FILNMLEN	14	/* # characters in a file name		*/
+#define E_DIMNUM	4	/* # array dimensions in auxiliary entry */
+
+struct external_syment
+{
+  union {
+    char e_name[E_SYMNMLEN];
+    struct {
+      char e_zeroes[4];
+      char e_offset[4];
+    } e;
+  } e;
+  char e_value[4];
+  char e_scnum[2];
+  char e_type[2];
+  char e_sclass[1];
+  char e_numaux[1];
+};
+
+
+
+#define N_BTMASK	(017)
+#define N_TMASK		(060)
+#define N_BTSHFT	(4)
+#define N_TSHIFT	(2)
+
+
+union external_auxent {
+	struct {
+		char x_tagndx[4];	/* str, un, or enum tag indx */
+		union {
+			struct {
+			    char  x_lnno[2]; /* declaration line number */
+			    char  x_size[2]; /* str/union/array size */
+			} x_lnsz;
+			char x_fsize[4];	/* size of function */
+		} x_misc;
+		union {
+			struct {		/* if ISFCN, tag, or .bb */
+			    char x_lnnoptr[4];	/* ptr to fcn line # */
+			    char x_endndx[4];	/* entry ndx past block end */
+			} x_fcn;
+			struct {		/* if ISARY, up to 4 dimen. */
+			    char x_dimen[E_DIMNUM][2];
+			} x_ary;
+		} x_fcnary;
+		char x_tvndx[2];		/* tv index */
+	} x_sym;
+
+	union {
+		char x_fname[E_FILNMLEN];
+		struct {
+			char x_zeroes[4];
+			char x_offset[4];
+		} x_n;
+	} x_file;
+
+	struct {
+		char x_scnlen[4];			/* section length */
+		char x_nreloc[2];	/* # relocation entries */
+		char x_nlinno[2];	/* # line numbers */
+	} x_scn;
+
+        struct {
+		char x_tvfill[4];	/* tv fill value */
+		char x_tvlen[2];	/* length of .tv */
+		char x_tvran[2][2];	/* tv range */
+	} x_tv;		/* info about .tv section (in auxent of symbol .tv)) */
+
+	struct {
+		unsigned char x_scnlen[4];
+		unsigned char x_parmhash[4];
+		unsigned char x_snhash[2];
+		unsigned char x_smtyp[1];
+		unsigned char x_smclas[1];
+		unsigned char x_stab[4];
+		unsigned char x_snstab[2];
+	} x_csect;
+
+};
+
+#define	SYMENT	struct external_syment
+#define	SYMESZ	18
+#define	AUXENT	union external_auxent
+#define	AUXESZ	18
+#define DBXMASK 0x80		/* for dbx storage mask */
+#define SYMNAME_IN_DEBUG(symptr) ((symptr)->n_sclass & DBXMASK)
+
+
+
+/********************** RELOCATION DIRECTIVES **********************/
+
+
+struct external_reloc {
+  char r_vaddr[4];
+  char r_symndx[4];
+  char r_size[1];
+  char r_type[1];
+};
+
+
+#define RELOC struct external_reloc
+#define RELSZ 10
+
+#define DEFAULT_DATA_SECTION_ALIGNMENT 4
+#define DEFAULT_BSS_SECTION_ALIGNMENT 4
+#define DEFAULT_TEXT_SECTION_ALIGNMENT 4
+/* For new sections we haven't heard of before */
+#define DEFAULT_SECTION_ALIGNMENT 4
diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c
new file mode 100644
index 0000000000..c6d32a8c36
--- /dev/null
+++ b/arch/powerpc/boot/serial.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic serial console support
+ *
+ * Author: Mark A. Greer <mgreer@mvista.com>
+ *
+ * Code in serial_edit_cmdline() copied from <file:arch/ppc/boot/simple/misc.c>
+ * and was written by Matt Porter <mporter@kernel.crashing.org>.
+ *
+ * 2001,2006 (c) MontaVista Software, Inc.
+ */
+#include <stdarg.h>
+#include <stddef.h>
+#include "types.h"
+#include "string.h"
+#include "stdio.h"
+#include "io.h"
+#include "ops.h"
+
+static int serial_open(void)
+{
+	struct serial_console_data *scdp = console_ops.data;
+	return scdp->open();
+}
+
+static void serial_write(const char *buf, int len)
+{
+	struct serial_console_data *scdp = console_ops.data;
+
+	while (*buf != '\0')
+		scdp->putc(*buf++);
+}
+
+static void serial_edit_cmdline(char *buf, int len, unsigned int timeout)
+{
+	int timer = 0, count;
+	char ch, *cp;
+	struct serial_console_data *scdp = console_ops.data;
+
+	cp = buf;
+	count = strlen(buf);
+	cp = &buf[count];
+	count++;
+
+	do {
+		if (scdp->tstc()) {
+			while (((ch = scdp->getc()) != '\n') && (ch != '\r')) {
+				/* Test for backspace/delete */
+				if ((ch == '\b') || (ch == '\177')) {
+					if (cp != buf) {
+						cp--;
+						count--;
+						printf("\b \b");
+					}
+				/* Test for ^x/^u (and wipe the line) */
+				} else if ((ch == '\030') || (ch == '\025')) {
+					while (cp != buf) {
+						cp--;
+						count--;
+						printf("\b \b");
+					}
+				} else if (count < len) {
+						*cp++ = ch;
+						count++;
+						scdp->putc(ch);
+				}
+			}
+			break;  /* Exit 'timer' loop */
+		}
+		udelay(1000);  /* 1 msec */
+	} while (timer++ < timeout);
+	*cp = 0;
+}
+
+static void serial_close(void)
+{
+	struct serial_console_data *scdp = console_ops.data;
+
+	if (scdp->close)
+		scdp->close();
+}
+
+static void *serial_get_stdout_devp(void)
+{
+	void *devp;
+	char devtype[MAX_PROP_LEN];
+	char path[MAX_PATH_LEN];
+
+	devp = finddevice("/chosen");
+	if (devp == NULL)
+		goto err_out;
+
+	if (getprop(devp, "linux,stdout-path", path, MAX_PATH_LEN) > 0 ||
+		getprop(devp, "stdout-path", path, MAX_PATH_LEN) > 0) {
+		devp = finddevice(path);
+		if (devp == NULL)
+			goto err_out;
+
+		if ((getprop(devp, "device_type", devtype, sizeof(devtype)) > 0)
+				&& !strcmp(devtype, "serial"))
+			return devp;
+	}
+err_out:
+	return NULL;
+}
+
+static struct serial_console_data serial_cd;
+
+/* Node's "compatible" property determines which serial driver to use */
+int serial_console_init(void)
+{
+	void *devp;
+	int rc = -1;
+
+	devp = serial_get_stdout_devp();
+	if (devp == NULL)
+		goto err_out;
+
+	if (dt_is_compatible(devp, "ns16550") ||
+	    dt_is_compatible(devp, "pnpPNP,501"))
+		rc = ns16550_console_init(devp, &serial_cd);
+#ifdef CONFIG_CPM
+	else if (dt_is_compatible(devp, "fsl,cpm1-scc-uart") ||
+	         dt_is_compatible(devp, "fsl,cpm1-smc-uart") ||
+	         dt_is_compatible(devp, "fsl,cpm2-scc-uart") ||
+	         dt_is_compatible(devp, "fsl,cpm2-smc-uart"))
+		rc = cpm_console_init(devp, &serial_cd);
+#endif
+#ifdef CONFIG_PPC_MPC52xx
+	else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart"))
+		rc = mpc5200_psc_console_init(devp, &serial_cd);
+#endif
+#ifdef CONFIG_PPC_POWERNV
+	else if (dt_is_compatible(devp, "ibm,opal-console-raw"))
+		rc = opal_console_init(devp, &serial_cd);
+#endif
+
+	/* Add other serial console driver calls here */
+
+	if (!rc) {
+		console_ops.open = serial_open;
+		console_ops.write = serial_write;
+		console_ops.close = serial_close;
+		console_ops.data = &serial_cd;
+
+		if (serial_cd.getc)
+			console_ops.edit_cmdline = serial_edit_cmdline;
+
+		return 0;
+	}
+err_out:
+	return -1;
+}
diff --git a/arch/powerpc/boot/simple_alloc.c b/arch/powerpc/boot/simple_alloc.c
new file mode 100644
index 0000000000..267d6524ca
--- /dev/null
+++ b/arch/powerpc/boot/simple_alloc.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Implement primitive realloc(3) functionality.
+ *
+ * Author: Mark A. Greer <mgreer@mvista.com>
+ *
+ * 2006 (c) MontaVista, Software, Inc.
+ */
+
+#include <stddef.h>
+#include "types.h"
+#include "page.h"
+#include "string.h"
+#include "ops.h"
+
+#define	ENTRY_BEEN_USED	0x01
+#define	ENTRY_IN_USE	0x02
+
+static struct alloc_info {
+	unsigned long	flags;
+	unsigned long	base;
+	unsigned long	size;
+} *alloc_tbl;
+
+static unsigned long tbl_entries;
+static unsigned long alloc_min;
+static unsigned long next_base;
+static unsigned long space_left;
+
+/*
+ * First time an entry is used, its base and size are set.
+ * An entry can be freed and re-malloc'd but its base & size don't change.
+ * Should be smart enough for needs of bootwrapper.
+ */
+static void *simple_malloc(unsigned long size)
+{
+	unsigned long i;
+	struct alloc_info *p = alloc_tbl;
+
+	if (size == 0)
+		goto err_out;
+
+	size = _ALIGN_UP(size, alloc_min);
+
+	for (i=0; i<tbl_entries; i++, p++)
+		if (!(p->flags & ENTRY_BEEN_USED)) { /* never been used */
+			if (size <= space_left) {
+				p->base = next_base;
+				p->size = size;
+				p->flags = ENTRY_BEEN_USED | ENTRY_IN_USE;
+				next_base += size;
+				space_left -= size;
+				return (void *)p->base;
+			}
+			goto err_out; /* not enough space left */
+		}
+		/* reuse an entry keeping same base & size */
+		else if (!(p->flags & ENTRY_IN_USE) && (size <= p->size)) {
+			p->flags |= ENTRY_IN_USE;
+			return (void *)p->base;
+		}
+err_out:
+	return NULL;
+}
+
+static struct alloc_info *simple_find_entry(void *ptr)
+{
+	unsigned long i;
+	struct alloc_info *p = alloc_tbl;
+
+	for (i=0; i<tbl_entries; i++,p++) {
+		if (!(p->flags & ENTRY_BEEN_USED))
+			break;
+		if ((p->flags & ENTRY_IN_USE) &&
+		    (p->base == (unsigned long)ptr))
+			return p;
+	}
+	return NULL;
+}
+
+static void simple_free(void *ptr)
+{
+	struct alloc_info *p = simple_find_entry(ptr);
+
+	if (p != NULL)
+		p->flags &= ~ENTRY_IN_USE;
+}
+
+/*
+ * Change size of area pointed to by 'ptr' to 'size'.
+ * If 'ptr' is NULL, then its a malloc().  If 'size' is 0, then its a free().
+ * 'ptr' must be NULL or a pointer to a non-freed area previously returned by
+ * simple_realloc() or simple_malloc().
+ */
+static void *simple_realloc(void *ptr, unsigned long size)
+{
+	struct alloc_info *p;
+	void *new;
+
+	if (size == 0) {
+		simple_free(ptr);
+		return NULL;
+	}
+
+	if (ptr == NULL)
+		return simple_malloc(size);
+
+	p = simple_find_entry(ptr);
+	if (p == NULL) /* ptr not from simple_malloc/simple_realloc */
+		return NULL;
+	if (size <= p->size) /* fits in current block */
+		return ptr;
+
+	new = simple_malloc(size);
+	memcpy(new, ptr, p->size);
+	simple_free(ptr);
+	return new;
+}
+
+/*
+ * Returns addr of first byte after heap so caller can see if it took
+ * too much space.  If so, change args & try again.
+ */
+void *simple_alloc_init(char *base, unsigned long heap_size,
+			unsigned long granularity, unsigned long max_allocs)
+{
+	unsigned long heap_base, tbl_size;
+
+	heap_size = _ALIGN_UP(heap_size, granularity);
+	alloc_min = granularity;
+	tbl_entries = max_allocs;
+
+	tbl_size = tbl_entries * sizeof(struct alloc_info);
+
+	alloc_tbl = (struct alloc_info *)_ALIGN_UP((unsigned long)base, 8);
+	memset(alloc_tbl, 0, tbl_size);
+
+	heap_base = _ALIGN_UP((unsigned long)alloc_tbl + tbl_size, alloc_min);
+
+	next_base = heap_base;
+	space_left = heap_size;
+
+	platform_ops.malloc = simple_malloc;
+	platform_ops.free = simple_free;
+	platform_ops.realloc = simple_realloc;
+
+	return (void *)(heap_base + heap_size);
+}
diff --git a/arch/powerpc/boot/simpleboot.c b/arch/powerpc/boot/simpleboot.c
new file mode 100644
index 0000000000..c80691d838
--- /dev/null
+++ b/arch/powerpc/boot/simpleboot.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * The simple platform -- for booting when firmware doesn't supply a device
+ *                        tree or any platform configuration information.
+ *                        All data is extracted from an embedded device tree
+ *                        blob.
+ *
+ * Authors: Scott Wood <scottwood@freescale.com>
+ *          Grant Likely <grant.likely@secretlab.ca>
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ * Copyright (c) 2008 Secret Lab Technologies Ltd.
+ */
+
+#include "ops.h"
+#include "types.h"
+#include "io.h"
+#include "stdio.h"
+#include <libfdt.h>
+
+BSS_STACK(4*1024);
+
+extern int platform_specific_init(void) __attribute__((weak));
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+		   unsigned long r6, unsigned long r7)
+{
+	const u32 *na, *ns, *reg, *timebase;
+	u64 memsize64;
+	int node, size, i;
+
+	/* Make sure FDT blob is sane */
+	if (fdt_check_header(_dtb_start) != 0)
+		fatal("Invalid device tree blob\n");
+
+	/* Find the #address-cells and #size-cells properties */
+	node = fdt_path_offset(_dtb_start, "/");
+	if (node < 0)
+		fatal("Cannot find root node\n");
+	na = fdt_getprop(_dtb_start, node, "#address-cells", &size);
+	if (!na || (size != 4))
+		fatal("Cannot find #address-cells property");
+	ns = fdt_getprop(_dtb_start, node, "#size-cells", &size);
+	if (!ns || (size != 4))
+		fatal("Cannot find #size-cells property");
+
+	/* Find the memory range */
+	node = fdt_node_offset_by_prop_value(_dtb_start, -1, "device_type",
+					     "memory", sizeof("memory"));
+	if (node < 0)
+		fatal("Cannot find memory node\n");
+	reg = fdt_getprop(_dtb_start, node, "reg", &size);
+	if (size < (*na+*ns) * sizeof(u32))
+		fatal("cannot get memory range\n");
+
+	/* Only interested in memory based at 0 */
+	for (i = 0; i < *na; i++)
+		if (*reg++ != 0)
+			fatal("Memory range is not based at address 0\n");
+
+	/* get the memsize and truncate it to under 4G on 32 bit machines */
+	memsize64 = 0;
+	for (i = 0; i < *ns; i++)
+		memsize64 = (memsize64 << 32) | *reg++;
+	if (sizeof(void *) == 4 && memsize64 >= 0x100000000ULL)
+		memsize64 = 0xffffffff;
+
+	/* finally, setup the timebase */
+	node = fdt_node_offset_by_prop_value(_dtb_start, -1, "device_type",
+					     "cpu", sizeof("cpu"));
+	if (!node)
+		fatal("Cannot find cpu node\n");
+	timebase = fdt_getprop(_dtb_start, node, "timebase-frequency", &size);
+	if (timebase && (size == 4))
+		timebase_period_ns = 1000000000 / *timebase;
+
+	/* Now we have the memory size; initialize the heap */
+	simple_alloc_init(_end, memsize64 - (unsigned long)_end, 32, 64);
+
+	/* prepare the device tree and find the console */
+	fdt_init(_dtb_start);
+
+	if (platform_specific_init)
+		platform_specific_init();
+
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/stdbool.h b/arch/powerpc/boot/stdbool.h
new file mode 100644
index 0000000000..2dfe247ede
--- /dev/null
+++ b/arch/powerpc/boot/stdbool.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) IBM Corporation 2016.
+ *
+ * This file is only necessary because some of the pre-boot decompressors
+ * expect stdbool.h to be available.
+ */
+
+#include "types.h"
diff --git a/arch/powerpc/boot/stdint.h b/arch/powerpc/boot/stdint.h
new file mode 100644
index 0000000000..5cc5e87b00
--- /dev/null
+++ b/arch/powerpc/boot/stdint.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) IBM Corporation 2016.
+ *
+ * This file is only necessary because some of the pre-boot decompressors
+ * expect stdint.h to be available.
+ */
+
+#include "types.h"
diff --git a/arch/powerpc/boot/stdio.c b/arch/powerpc/boot/stdio.c
new file mode 100644
index 0000000000..31eece29f5
--- /dev/null
+++ b/arch/powerpc/boot/stdio.c
@@ -0,0 +1,354 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ */
+#include <stdarg.h>
+#include <stddef.h>
+#include "string.h"
+#include "stdio.h"
+#include "ops.h"
+
+size_t strnlen(const char * s, size_t count)
+{
+	const char *sc;
+
+	for (sc = s; count-- && *sc != '\0'; ++sc)
+		/* nothing */;
+	return sc - s;
+}
+
+char *strrchr(const char *s, int c)
+{
+	const char *last = NULL;
+	do {
+		if (*s == (char)c)
+			last = s;
+	} while (*s++);
+	return (char *)last;
+}
+
+#ifdef __powerpc64__
+
+# define do_div(n, base) ({						\
+	unsigned int __base = (base);					\
+	unsigned int __rem;						\
+	__rem = ((unsigned long long)(n)) % __base;			\
+	(n) = ((unsigned long long)(n)) / __base;			\
+	__rem;								\
+})
+
+#else
+
+extern unsigned int __div64_32(unsigned long long *dividend,
+			       unsigned int divisor);
+
+/* The unnecessary pointer compare is there
+ * to check for type safety (n must be 64bit)
+ */
+# define do_div(n,base) ({						\
+	unsigned int __base = (base);					\
+	unsigned int __rem;						\
+	(void)(((typeof((n)) *)0) == ((unsigned long long *)0));	\
+	if (((n) >> 32) == 0) {						\
+		__rem = (unsigned int)(n) % __base;			\
+		(n) = (unsigned int)(n) / __base;			\
+	} else								\
+		__rem = __div64_32(&(n), __base);			\
+	__rem;								\
+ })
+
+#endif /* __powerpc64__ */
+
+static int skip_atoi(const char **s)
+{
+	int i, c;
+
+	for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s)
+		i = i*10 + c - '0';
+	return i;
+}
+
+#define ZEROPAD	1		/* pad with zero */
+#define SIGN	2		/* unsigned/signed long */
+#define PLUS	4		/* show plus */
+#define SPACE	8		/* space if plus */
+#define LEFT	16		/* left justified */
+#define SPECIAL	32		/* 0x */
+#define LARGE	64		/* use 'ABCDEF' instead of 'abcdef' */
+
+static char * number(char * str, unsigned long long num, int base, int size, int precision, int type)
+{
+	char c,sign,tmp[66];
+	const char *digits="0123456789abcdefghijklmnopqrstuvwxyz";
+	int i;
+
+	if (type & LARGE)
+		digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+	if (type & LEFT)
+		type &= ~ZEROPAD;
+	if (base < 2 || base > 36)
+		return 0;
+	c = (type & ZEROPAD) ? '0' : ' ';
+	sign = 0;
+	if (type & SIGN) {
+		if ((signed long long)num < 0) {
+			sign = '-';
+			num = - (signed long long)num;
+			size--;
+		} else if (type & PLUS) {
+			sign = '+';
+			size--;
+		} else if (type & SPACE) {
+			sign = ' ';
+			size--;
+		}
+	}
+	if (type & SPECIAL) {
+		if (base == 16)
+			size -= 2;
+		else if (base == 8)
+			size--;
+	}
+	i = 0;
+	if (num == 0)
+		tmp[i++]='0';
+	else while (num != 0) {
+		tmp[i++] = digits[do_div(num, base)];
+	}
+	if (i > precision)
+		precision = i;
+	size -= precision;
+	if (!(type&(ZEROPAD+LEFT)))
+		while(size-->0)
+			*str++ = ' ';
+	if (sign)
+		*str++ = sign;
+	if (type & SPECIAL) {
+		if (base==8)
+			*str++ = '0';
+		else if (base==16) {
+			*str++ = '0';
+			*str++ = digits[33];
+		}
+	}
+	if (!(type & LEFT))
+		while (size-- > 0)
+			*str++ = c;
+	while (i < precision--)
+		*str++ = '0';
+	while (i-- > 0)
+		*str++ = tmp[i];
+	while (size-- > 0)
+		*str++ = ' ';
+	return str;
+}
+
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+	int len;
+	unsigned long long num;
+	int i, base;
+	char * str;
+	const char *s;
+
+	int flags;		/* flags to number() */
+
+	int field_width;	/* width of output field */
+	int precision;		/* min. # of digits for integers; max
+				   number of chars for from string */
+	int qualifier;		/* 'h', 'l', or 'L' for integer fields */
+	                        /* 'z' support added 23/7/1999 S.H.    */
+				/* 'z' changed to 'Z' --davidm 1/25/99 */
+
+	
+	for (str=buf ; *fmt ; ++fmt) {
+		if (*fmt != '%') {
+			*str++ = *fmt;
+			continue;
+		}
+			
+		/* process flags */
+		flags = 0;
+		repeat:
+			++fmt;		/* this also skips first '%' */
+			switch (*fmt) {
+				case '-': flags |= LEFT; goto repeat;
+				case '+': flags |= PLUS; goto repeat;
+				case ' ': flags |= SPACE; goto repeat;
+				case '#': flags |= SPECIAL; goto repeat;
+				case '0': flags |= ZEROPAD; goto repeat;
+				}
+		
+		/* get field width */
+		field_width = -1;
+		if ('0' <= *fmt && *fmt <= '9')
+			field_width = skip_atoi(&fmt);
+		else if (*fmt == '*') {
+			++fmt;
+			/* it's the next argument */
+			field_width = va_arg(args, int);
+			if (field_width < 0) {
+				field_width = -field_width;
+				flags |= LEFT;
+			}
+		}
+
+		/* get the precision */
+		precision = -1;
+		if (*fmt == '.') {
+			++fmt;	
+			if ('0' <= *fmt && *fmt <= '9')
+				precision = skip_atoi(&fmt);
+			else if (*fmt == '*') {
+				++fmt;
+				/* it's the next argument */
+				precision = va_arg(args, int);
+			}
+			if (precision < 0)
+				precision = 0;
+		}
+
+		/* get the conversion qualifier */
+		qualifier = -1;
+		if (*fmt == 'l' && *(fmt + 1) == 'l') {
+			qualifier = 'q';
+			fmt += 2;
+		} else if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L'
+			|| *fmt == 'Z') {
+			qualifier = *fmt;
+			++fmt;
+		}
+
+		/* default base */
+		base = 10;
+
+		switch (*fmt) {
+		case 'c':
+			if (!(flags & LEFT))
+				while (--field_width > 0)
+					*str++ = ' ';
+			*str++ = (unsigned char) va_arg(args, int);
+			while (--field_width > 0)
+				*str++ = ' ';
+			continue;
+
+		case 's':
+			s = va_arg(args, char *);
+			if (!s)
+				s = "<NULL>";
+
+			len = strnlen(s, precision);
+
+			if (!(flags & LEFT))
+				while (len < field_width--)
+					*str++ = ' ';
+			for (i = 0; i < len; ++i)
+				*str++ = *s++;
+			while (len < field_width--)
+				*str++ = ' ';
+			continue;
+
+		case 'p':
+			if (field_width == -1) {
+				field_width = 2*sizeof(void *);
+				flags |= ZEROPAD;
+			}
+			str = number(str,
+				(unsigned long) va_arg(args, void *), 16,
+				field_width, precision, flags);
+			continue;
+
+
+		case 'n':
+			if (qualifier == 'l') {
+				long * ip = va_arg(args, long *);
+				*ip = (str - buf);
+			} else if (qualifier == 'Z') {
+				size_t * ip = va_arg(args, size_t *);
+				*ip = (str - buf);
+			} else {
+				int * ip = va_arg(args, int *);
+				*ip = (str - buf);
+			}
+			continue;
+
+		case '%':
+			*str++ = '%';
+			continue;
+
+		/* integer number formats - set up the flags and "break" */
+		case 'o':
+			base = 8;
+			break;
+
+		case 'X':
+			flags |= LARGE;
+		case 'x':
+			base = 16;
+			break;
+
+		case 'd':
+		case 'i':
+			flags |= SIGN;
+		case 'u':
+			break;
+
+		default:
+			*str++ = '%';
+			if (*fmt)
+				*str++ = *fmt;
+			else
+				--fmt;
+			continue;
+		}
+		if (qualifier == 'l') {
+			num = va_arg(args, unsigned long);
+			if (flags & SIGN)
+				num = (signed long) num;
+		} else if (qualifier == 'q') {
+			num = va_arg(args, unsigned long long);
+			if (flags & SIGN)
+				num = (signed long long) num;
+		} else if (qualifier == 'Z') {
+			num = va_arg(args, size_t);
+		} else if (qualifier == 'h') {
+			num = (unsigned short) va_arg(args, int);
+			if (flags & SIGN)
+				num = (signed short) num;
+		} else {
+			num = va_arg(args, unsigned int);
+			if (flags & SIGN)
+				num = (signed int) num;
+		}
+		str = number(str, num, base, field_width, precision, flags);
+	}
+	*str = '\0';
+	return str-buf;
+}
+
+int sprintf(char * buf, const char *fmt, ...)
+{
+	va_list args;
+	int i;
+
+	va_start(args, fmt);
+	i=vsprintf(buf,fmt,args);
+	va_end(args);
+	return i;
+}
+
+static char sprint_buf[1024];
+
+int
+printf(const char *fmt, ...)
+{
+	va_list args;
+	int n;
+
+	va_start(args, fmt);
+	n = vsprintf(sprint_buf, fmt, args);
+	va_end(args);
+	if (console_ops.write)
+		console_ops.write(sprint_buf, n);
+	return n;
+}
diff --git a/arch/powerpc/boot/stdio.h b/arch/powerpc/boot/stdio.h
new file mode 100644
index 0000000000..884d5959a9
--- /dev/null
+++ b/arch/powerpc/boot/stdio.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_BOOT_STDIO_H_
+#define _PPC_BOOT_STDIO_H_
+
+#include <stdarg.h>
+
+#define	ENOMEM		12	/* Out of Memory */
+#define	EINVAL		22	/* Invalid argument */
+#define ENOSPC		28	/* No space left on device */
+
+extern int printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
+
+#define fprintf(fmt, args...)	printf(args)
+
+extern int sprintf(char *buf, const char *fmt, ...)
+	__attribute__((format(printf, 2, 3)));
+
+extern int vsprintf(char *buf, const char *fmt, va_list args);
+
+#endif				/* _PPC_BOOT_STDIO_H_ */
diff --git a/arch/powerpc/boot/stdlib.c b/arch/powerpc/boot/stdlib.c
new file mode 100644
index 0000000000..868b019d63
--- /dev/null
+++ b/arch/powerpc/boot/stdlib.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * stdlib functions
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "stdlib.h"
+
+/* Not currently supported: leading whitespace, sign, 0x prefix, zero base */
+unsigned long long int strtoull(const char *ptr, char **end, int base)
+{
+	unsigned long long ret = 0;
+
+	if (base > 36)
+		goto out;
+
+	while (*ptr) {
+		int digit;
+
+		if (*ptr >= '0' && *ptr <= '9' && *ptr < '0' + base)
+			digit = *ptr - '0';
+		else if (*ptr >= 'A' && *ptr < 'A' + base - 10)
+			digit = *ptr - 'A' + 10;
+		else if (*ptr >= 'a' && *ptr < 'a' + base - 10)
+			digit = *ptr - 'a' + 10;
+		else
+			break;
+
+		ret *= base;
+		ret += digit;
+		ptr++;
+	}
+
+out:
+	if (end)
+		*end = (char *)ptr;
+
+	return ret;
+}
diff --git a/arch/powerpc/boot/stdlib.h b/arch/powerpc/boot/stdlib.h
new file mode 100644
index 0000000000..0a61fcd10f
--- /dev/null
+++ b/arch/powerpc/boot/stdlib.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_BOOT_STDLIB_H_
+#define _PPC_BOOT_STDLIB_H_
+
+unsigned long long int strtoull(const char *ptr, char **end, int base);
+
+#endif
diff --git a/arch/powerpc/boot/string.S b/arch/powerpc/boot/string.S
new file mode 100644
index 0000000000..d2a2dbf1ee
--- /dev/null
+++ b/arch/powerpc/boot/string.S
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ *
+ * NOTE: this code runs in 32 bit mode and is packaged as ELF32.
+ */
+
+#include "ppc_asm.h"
+
+	.text
+	.globl	strcpy
+strcpy:
+	addi	r5,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r5)
+	bne	1b
+	blr
+
+	.globl	strncpy
+strncpy:
+	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r6)
+	bdnzf	2,1b		/* dec ctr, branch if ctr != 0 && !cr0.eq */
+	blr
+
+	.globl	strcat
+strcat:
+	addi	r5,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r0,1(r5)
+	cmpwi	0,r0,0
+	bne	1b
+	addi	r5,r5,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r5)
+	bne	1b
+	blr
+
+	.globl	strchr
+strchr:
+	addi	r3,r3,-1
+1:	lbzu	r0,1(r3)
+	cmpw	0,r0,r4
+	beqlr
+	cmpwi	0,r0,0
+	bne	1b
+	li	r3,0
+	blr
+
+	.globl	strcmp
+strcmp:
+	addi	r5,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r3,1(r5)
+	cmpwi	1,r3,0
+	lbzu	r0,1(r4)
+	subf.	r3,r0,r3
+	beqlr	1
+	beq	1b
+	blr
+
+	.globl	strncmp
+strncmp:
+	mtctr	r5
+	addi	r5,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r3,1(r5)
+	cmpwi	1,r3,0
+	lbzu	r0,1(r4)
+	subf.	r3,r0,r3
+	beqlr	1
+	bdnzt	eq,1b
+	blr
+
+	.globl	strlen
+strlen:
+	addi	r4,r3,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	bne	1b
+	subf	r3,r3,r4
+	blr
+
+	.globl	memset
+memset:
+	rlwimi	r4,r4,8,16,23
+	rlwimi	r4,r4,16,0,15
+	addi	r6,r3,-4
+	cmplwi	0,r5,4
+	blt	7f
+	stwu	r4,4(r6)
+	beqlr
+	andi.	r0,r6,3
+	add	r5,r0,r5
+	subf	r6,r0,r6
+	rlwinm	r0,r5,32-2,2,31
+	mtctr	r0
+	bdz	6f
+1:	stwu	r4,4(r6)
+	bdnz	1b
+6:	andi.	r5,r5,3
+7:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r6,3
+8:	stbu	r4,1(r6)
+	bdnz	8b
+	blr
+
+	.globl	memmove
+memmove:
+	cmplw	0,r3,r4
+	bgt	backwards_memcpy
+	/* fall through */
+
+	.globl	memcpy
+memcpy:
+	rlwinm.	r7,r5,32-3,3,31		/* r7 = r5 >> 3 */
+	addi	r6,r3,-4
+	addi	r4,r4,-4
+	beq	3f			/* if less than 8 bytes to do */
+	andi.	r0,r6,3			/* get dest word aligned */
+	mtctr	r7
+	bne	5f
+	andi.	r0,r4,3			/* check src word aligned too */
+	bne	3f
+1:	lwz	r7,4(r4)
+	lwzu	r8,8(r4)
+	stw	r7,4(r6)
+	stwu	r8,8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,4(r4)
+	addi	r5,r5,-4
+	stwu	r0,4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r4,r4,3
+	addi	r6,r6,3
+4:	lbzu	r0,1(r4)
+	stbu	r0,1(r6)
+	bdnz	4b
+	blr
+5:	subfic	r0,r0,4
+	cmpw	cr1,r0,r5
+	add	r7,r0,r4
+	andi.	r7,r7,3			/* will source be word-aligned too? */
+	ble	cr1,3b
+	bne	3b			/* do byte-by-byte if not */
+	mtctr	r0
+6:	lbz	r7,4(r4)
+	addi	r4,r4,1
+	stb	r7,4(r6)
+	addi	r6,r6,1
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
+
+	.globl	backwards_memcpy
+backwards_memcpy:
+	rlwinm.	r7,r5,32-3,3,31		/* r7 = r5 >> 3 */
+	add	r6,r3,r5
+	add	r4,r4,r5
+	beq	3f
+	andi.	r0,r6,3
+	mtctr	r7
+	bne	5f
+	andi.	r0,r4,3
+	bne	3f
+1:	lwz	r7,-4(r4)
+	lwzu	r8,-8(r4)
+	stw	r7,-4(r6)
+	stwu	r8,-8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,-4(r4)
+	subi	r5,r5,4
+	stwu	r0,-4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+4:	lbzu	r0,-1(r4)
+	stbu	r0,-1(r6)
+	bdnz	4b
+	blr
+5:	cmpw	cr1,r0,r5
+	subf	r7,r0,r4
+	andi.	r7,r7,3
+	ble	cr1,3b
+	bne	3b
+	mtctr	r0
+6:	lbzu	r7,-1(r4)
+	stbu	r7,-1(r6)
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
+
+	.globl	memchr
+memchr:
+	cmpwi	0,r5,0
+	blelr
+	mtctr	r5
+	addi	r3,r3,-1
+1:	lbzu	r0,1(r3)
+	cmpw	r0,r4
+	beqlr
+	bdnz	1b
+	li	r3,0
+	blr
+
+	.globl	memcmp
+memcmp:
+	cmpwi	0,r5,0
+	ble	2f
+	mtctr	r5
+	addi	r6,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r3,1(r6)
+	lbzu	r0,1(r4)
+	subf.	r3,r0,r3
+	bdnzt	2,1b
+	blr
+2:	li	r3,0
+	blr
+
+
+/*
+ * Flush the dcache and invalidate the icache for a range of addresses.
+ *
+ * flush_cache(addr, len)
+ */
+	.global	flush_cache
+flush_cache:
+	addi	4,4,0x1f	/* len = (len + 0x1f) / 0x20 */
+	rlwinm.	4,4,27,5,31
+	mtctr	4
+	beqlr
+1:	dcbf	0,3
+	icbi	0,3
+	addi	3,3,0x20
+	bdnz	1b
+	sync
+	isync
+	blr
+
diff --git a/arch/powerpc/boot/string.h b/arch/powerpc/boot/string.h
new file mode 100644
index 0000000000..8c2ec0c05e
--- /dev/null
+++ b/arch/powerpc/boot/string.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_BOOT_STRING_H_
+#define _PPC_BOOT_STRING_H_
+#include <stddef.h>
+
+extern char *strcpy(char *dest, const char *src);
+extern char *strncpy(char *dest, const char *src, size_t n);
+extern char *strcat(char *dest, const char *src);
+extern char *strchr(const char *s, int c);
+extern char *strrchr(const char *s, int c);
+extern int strcmp(const char *s1, const char *s2);
+extern int strncmp(const char *s1, const char *s2, size_t n);
+extern size_t strlen(const char *s);
+extern size_t strnlen(const char *s, size_t count);
+
+extern void *memset(void *s, int c, size_t n);
+extern void *memmove(void *dest, const void *src, unsigned long n);
+extern void *memcpy(void *dest, const void *src, unsigned long n);
+extern void *memchr(const void *s, int c, size_t n);
+extern int memcmp(const void *s1, const void *s2, size_t n);
+
+#endif	/* _PPC_BOOT_STRING_H_ */
diff --git a/arch/powerpc/boot/swab.h b/arch/powerpc/boot/swab.h
new file mode 100644
index 0000000000..11d2069fbb
--- /dev/null
+++ b/arch/powerpc/boot/swab.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_BOOT_SWAB_H_
+#define _PPC_BOOT_SWAB_H_
+
+static inline u16 swab16(u16 x)
+{
+	return  ((x & (u16)0x00ffU) << 8) |
+		((x & (u16)0xff00U) >> 8);
+}
+
+static inline u32 swab32(u32 x)
+{
+	return  ((x & (u32)0x000000ffUL) << 24) |
+		((x & (u32)0x0000ff00UL) <<  8) |
+		((x & (u32)0x00ff0000UL) >>  8) |
+		((x & (u32)0xff000000UL) >> 24);
+}
+
+static inline u64 swab64(u64 x)
+{
+	return  (u64)((x & (u64)0x00000000000000ffULL) << 56) |
+		(u64)((x & (u64)0x000000000000ff00ULL) << 40) |
+		(u64)((x & (u64)0x0000000000ff0000ULL) << 24) |
+		(u64)((x & (u64)0x00000000ff000000ULL) <<  8) |
+		(u64)((x & (u64)0x000000ff00000000ULL) >>  8) |
+		(u64)((x & (u64)0x0000ff0000000000ULL) >> 24) |
+		(u64)((x & (u64)0x00ff000000000000ULL) >> 40) |
+		(u64)((x & (u64)0xff00000000000000ULL) >> 56);
+}
+#endif /* _PPC_BOOT_SWAB_H_ */
diff --git a/arch/powerpc/boot/treeboot-akebono.c b/arch/powerpc/boot/treeboot-akebono.c
new file mode 100644
index 0000000000..e3cc259986
--- /dev/null
+++ b/arch/powerpc/boot/treeboot-akebono.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright © 2013 Tony Breeds IBM Corporation
+ * Copyright © 2013 Alistair Popple IBM Corporation
+ *
+ * Based on earlier code:
+ *   Copyright (C) Paul Mackerras 1997.
+ *
+ *   Matt Porter <mporter@kernel.crashing.org>
+ *   Copyright 2002-2005 MontaVista Software Inc.
+ *
+ *   Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ *   Copyright (c) 2003, 2004 Zultys Technologies
+ *
+ *    Copyright 2007 David Gibson, IBM Corporation.
+ *    Copyright 2010 Ben. Herrenschmidt, IBM Corporation.
+ *    Copyright © 2011 David Kleikamp IBM Corporation
+ */
+#include <stdarg.h>
+#include <stddef.h>
+#include "types.h"
+#include "elf.h"
+#include "string.h"
+#include "stdlib.h"
+#include "stdio.h"
+#include "page.h"
+#include "ops.h"
+#include "reg.h"
+#include "io.h"
+#include "dcr.h"
+#include "4xx.h"
+#include "44x.h"
+#include "libfdt.h"
+
+BSS_STACK(4096);
+
+#define SPRN_PIR	0x11E	/* Processor Identification Register */
+#define USERDATA_LEN	256	/* Length of userdata passed in by PIBS */
+#define MAX_RANKS	0x4
+#define DDR3_MR0CF	0x80010011U
+#define CCTL0_MCO2	0x8000080FU
+#define CCTL0_MCO3	0x80000810U
+#define CCTL0_MCO4	0x80000811U
+#define CCTL0_MCO5	0x80000812U
+#define CCTL0_MCO6	0x80000813U
+
+static unsigned long long ibm_akebono_memsize;
+static long long unsigned mac_addr;
+
+static unsigned long long ibm_akebono_detect_memsize(void)
+{
+	u32 reg;
+	unsigned i;
+	unsigned long long memsize = 0;
+
+	for (i = 0; i < MAX_RANKS; i++) {
+		reg = mfdcrx(DDR3_MR0CF + i);
+
+		if (!(reg & 1))
+			continue;
+
+		reg &= 0x0000f000;
+		reg >>= 12;
+		memsize += (0x800000ULL << reg);
+	}
+
+	return memsize;
+}
+
+static void ibm_akebono_fixups(void)
+{
+	void *emac;
+	u32 reg;
+
+	dt_fixup_memory(0x0ULL,  ibm_akebono_memsize);
+
+	/* Fixup the SD timeout frequency */
+	mtdcrx(CCTL0_MCO4, 0x1);
+
+	/* Disable SD high-speed mode (which seems to be broken) */
+	reg = mfdcrx(CCTL0_MCO2) & ~0x2;
+	mtdcrx(CCTL0_MCO2, reg);
+
+	/* Set the MAC address */
+	emac = finddevice("/plb/opb/ethernet");
+	if (emac > 0) {
+		if (mac_addr)
+			setprop(emac, "local-mac-address",
+				((u8 *) &mac_addr) + 2 , 6);
+	}
+}
+
+void platform_init(char *userdata)
+{
+	unsigned long end_of_ram, avail_ram;
+	u32 pir_reg;
+	int node, size;
+	const u32 *timebase;
+	int len, i, userdata_len;
+	char *end;
+
+	userdata[USERDATA_LEN - 1] = '\0';
+	userdata_len = strlen(userdata);
+	for (i = 0; i < userdata_len - 15; i++) {
+		if (strncmp(&userdata[i], "local-mac-addr=", 15) == 0) {
+			if (i > 0 && userdata[i - 1] != ' ') {
+				/* We've only found a substring ending
+				 * with local-mac-addr so this isn't
+				 * our mac address. */
+				continue;
+			}
+
+			mac_addr = strtoull(&userdata[i + 15], &end, 16);
+
+			/* Remove the "local-mac-addr=<...>" from the kernel
+			 * command line, including the tailing space if
+			 * present. */
+			if (*end == ' ')
+				end++;
+
+			len = ((int) end) - ((int) &userdata[i]);
+			memmove(&userdata[i], end,
+				userdata_len - (len + i) + 1);
+			break;
+		}
+	}
+
+	loader_info.cmdline = userdata;
+	loader_info.cmdline_len = 256;
+
+	ibm_akebono_memsize = ibm_akebono_detect_memsize();
+	if (ibm_akebono_memsize >> 32)
+		end_of_ram = ~0UL;
+	else
+		end_of_ram = ibm_akebono_memsize;
+	avail_ram = end_of_ram - (unsigned long)_end;
+
+	simple_alloc_init(_end, avail_ram, 128, 64);
+	platform_ops.fixups = ibm_akebono_fixups;
+	platform_ops.exit = ibm44x_dbcr_reset;
+	pir_reg = mfspr(SPRN_PIR);
+
+	/* Make sure FDT blob is sane */
+	if (fdt_check_header(_dtb_start) != 0)
+		fatal("Invalid device tree blob\n");
+
+	node = fdt_node_offset_by_prop_value(_dtb_start, -1, "device_type",
+					     "cpu", sizeof("cpu"));
+	if (!node)
+		fatal("Cannot find cpu node\n");
+	timebase = fdt_getprop(_dtb_start, node, "timebase-frequency", &size);
+	if (timebase && (size == 4))
+		timebase_period_ns = 1000000000 / *timebase;
+
+	fdt_set_boot_cpuid_phys(_dtb_start, pir_reg);
+	fdt_init(_dtb_start);
+
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/treeboot-bamboo.c b/arch/powerpc/boot/treeboot-bamboo.c
new file mode 100644
index 0000000000..97b5b161db
--- /dev/null
+++ b/arch/powerpc/boot/treeboot-bamboo.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright IBM Corporation, 2007
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ *
+ * Based on ebony wrapper:
+ * Copyright 2007 David Gibson, IBM Corporation.
+ */
+#include "ops.h"
+#include "stdio.h"
+#include "44x.h"
+#include "stdlib.h"
+
+BSS_STACK(4096);
+
+#define PIBS_MAC0 0xfffc0400
+#define PIBS_MAC1 0xfffc0500
+char pibs_mac0[6];
+char pibs_mac1[6];
+
+static void read_pibs_mac(void)
+{
+	unsigned long long mac64;
+
+	mac64 = strtoull((char *)PIBS_MAC0, 0, 16);
+	memcpy(&pibs_mac0, (char *)&mac64+2, 6);
+
+	mac64 = strtoull((char *)PIBS_MAC1, 0, 16);
+	memcpy(&pibs_mac1, (char *)&mac64+2, 6);
+}
+
+void platform_init(void)
+{
+	unsigned long end_of_ram = 0x8000000;
+	unsigned long avail_ram = end_of_ram - (unsigned long)_end;
+
+	simple_alloc_init(_end, avail_ram, 32, 64);
+	read_pibs_mac();
+	bamboo_init((u8 *)&pibs_mac0, (u8 *)&pibs_mac1);
+}
diff --git a/arch/powerpc/boot/treeboot-currituck.c b/arch/powerpc/boot/treeboot-currituck.c
new file mode 100644
index 0000000000..d53e8a592f
--- /dev/null
+++ b/arch/powerpc/boot/treeboot-currituck.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright © 2011 Tony Breeds IBM Corporation
+ *
+ * Based on earlier code:
+ *   Copyright (C) Paul Mackerras 1997.
+ *
+ *   Matt Porter <mporter@kernel.crashing.org>
+ *   Copyright 2002-2005 MontaVista Software Inc.
+ *
+ *   Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ *   Copyright (c) 2003, 2004 Zultys Technologies
+ *
+ *    Copyright 2007 David Gibson, IBM Corporation.
+ *    Copyright 2010 Ben. Herrenschmidt, IBM Corporation.
+ *    Copyright © 2011 David Kleikamp IBM Corporation
+ */
+#include <stdarg.h>
+#include <stddef.h>
+#include "types.h"
+#include "elf.h"
+#include "string.h"
+#include "stdio.h"
+#include "page.h"
+#include "ops.h"
+#include "reg.h"
+#include "io.h"
+#include "dcr.h"
+#include "4xx.h"
+#include "44x.h"
+#include "libfdt.h"
+
+BSS_STACK(4096);
+
+#define MAX_RANKS	0x4
+#define DDR3_MR0CF	0x80010011U
+
+static unsigned long long ibm_currituck_memsize;
+static unsigned long long ibm_currituck_detect_memsize(void)
+{
+	u32 reg;
+	unsigned i;
+	unsigned long long memsize = 0;
+
+	for(i = 0; i < MAX_RANKS; i++){
+		reg = mfdcrx(DDR3_MR0CF + i);
+
+		if (!(reg & 1))
+			continue;
+
+		reg &= 0x0000f000;
+		reg >>= 12;
+		memsize += (0x800000ULL << reg);
+	}
+
+	return memsize;
+}
+
+static void ibm_currituck_fixups(void)
+{
+	void *devp = finddevice("/");
+	u32 dma_ranges[7];
+
+	dt_fixup_memory(0x0ULL,  ibm_currituck_memsize);
+
+	while ((devp = find_node_by_devtype(devp, "pci"))) {
+		if (getprop(devp, "dma-ranges", dma_ranges, sizeof(dma_ranges)) < 0) {
+			printf("%s: Failed to get dma-ranges\r\n", __func__);
+			continue;
+		}
+
+		dma_ranges[5] = ibm_currituck_memsize >> 32;
+		dma_ranges[6] = ibm_currituck_memsize & 0xffffffffUL;
+
+		setprop(devp, "dma-ranges", dma_ranges, sizeof(dma_ranges));
+	}
+}
+
+#define SPRN_PIR	0x11E	/* Processor Identification Register */
+void platform_init(void)
+{
+	unsigned long end_of_ram, avail_ram;
+	u32 pir_reg;
+	int node, size;
+	const u32 *timebase;
+
+	ibm_currituck_memsize = ibm_currituck_detect_memsize();
+	if (ibm_currituck_memsize >> 32)
+		end_of_ram = ~0UL;
+	else
+		end_of_ram = ibm_currituck_memsize;
+	avail_ram = end_of_ram - (unsigned long)_end;
+
+	simple_alloc_init(_end, avail_ram, 128, 64);
+	platform_ops.fixups = ibm_currituck_fixups;
+	platform_ops.exit = ibm44x_dbcr_reset;
+	pir_reg = mfspr(SPRN_PIR);
+
+	/* Make sure FDT blob is sane */
+	if (fdt_check_header(_dtb_start) != 0)
+		fatal("Invalid device tree blob\n");
+
+	node = fdt_node_offset_by_prop_value(_dtb_start, -1, "device_type",
+	                                     "cpu", sizeof("cpu"));
+	if (!node)
+		fatal("Cannot find cpu node\n");
+	timebase = fdt_getprop(_dtb_start, node, "timebase-frequency", &size);
+	if (timebase && (size == 4))
+		timebase_period_ns = 1000000000 / *timebase;
+
+	fdt_set_boot_cpuid_phys(_dtb_start, pir_reg);
+	fdt_init(_dtb_start);
+
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/treeboot-ebony.c b/arch/powerpc/boot/treeboot-ebony.c
new file mode 100644
index 0000000000..332e286591
--- /dev/null
+++ b/arch/powerpc/boot/treeboot-ebony.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Old U-boot compatibility for Ebony
+ *
+ * Author: David Gibson <david@gibson.dropbear.id.au>
+ *
+ * Copyright 2007 David Gibson, IBM Corporatio.
+ *   Based on cuboot-83xx.c, which is:
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "44x.h"
+
+BSS_STACK(4096);
+
+#define OPENBIOS_MAC_BASE	0xfffffe0c
+#define OPENBIOS_MAC_OFFSET	0xc
+
+void platform_init(void)
+{
+	unsigned long end_of_ram = 0x8000000;
+	unsigned long avail_ram = end_of_ram - (unsigned long)_end;
+
+	simple_alloc_init(_end, avail_ram, 32, 64);
+	ebony_init((u8 *)OPENBIOS_MAC_BASE,
+		   (u8 *)(OPENBIOS_MAC_BASE + OPENBIOS_MAC_OFFSET));
+}
diff --git a/arch/powerpc/boot/treeboot-iss4xx.c b/arch/powerpc/boot/treeboot-iss4xx.c
new file mode 100644
index 0000000000..9ab556093c
--- /dev/null
+++ b/arch/powerpc/boot/treeboot-iss4xx.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2010 Ben. Herrenschmidt, IBM Corporation.
+ *
+ * Based on earlier code:
+ *   Copyright (C) Paul Mackerras 1997.
+ *
+ *   Matt Porter <mporter@kernel.crashing.org>
+ *   Copyright 2002-2005 MontaVista Software Inc.
+ *
+ *   Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ *   Copyright (c) 2003, 2004 Zultys Technologies
+ *
+ *    Copyright 2007 David Gibson, IBM Corporation.
+ */
+#include <stdarg.h>
+#include <stddef.h>
+#include "types.h"
+#include "elf.h"
+#include "string.h"
+#include "stdio.h"
+#include "page.h"
+#include "ops.h"
+#include "reg.h"
+#include "io.h"
+#include "dcr.h"
+#include "4xx.h"
+#include "44x.h"
+#include "libfdt.h"
+
+BSS_STACK(4096);
+
+static u32 ibm4xx_memstart;
+
+static void iss_4xx_fixups(void)
+{
+	void *memory;
+	u32 reg[3];
+
+	memory = finddevice("/memory");
+	if (!memory)
+		fatal("Can't find memory node\n");
+	/* This assumes #address-cells = 2, #size-cells =1 and that */
+	getprop(memory, "reg", reg, sizeof(reg));
+	if (reg[2])
+		/* If the device tree specifies the memory range, use it */
+		ibm4xx_memstart = reg[1];
+	else
+		/* othersize, read it from the SDRAM controller */
+		ibm4xx_sdram_fixup_memsize();
+}
+
+static void *iss_4xx_vmlinux_alloc(unsigned long size)
+{
+	return (void *)ibm4xx_memstart;
+}
+
+#define SPRN_PIR	0x11E	/* Processor Identification Register */
+void platform_init(void)
+{
+	unsigned long end_of_ram = 0x08000000;
+	unsigned long avail_ram = end_of_ram - (unsigned long)_end;
+	u32 pir_reg;
+
+	simple_alloc_init(_end, avail_ram, 128, 64);
+	platform_ops.fixups = iss_4xx_fixups;
+	platform_ops.vmlinux_alloc = iss_4xx_vmlinux_alloc;
+	platform_ops.exit = ibm44x_dbcr_reset;
+	pir_reg = mfspr(SPRN_PIR);
+	fdt_set_boot_cpuid_phys(_dtb_start, pir_reg);
+	fdt_init(_dtb_start);
+	serial_console_init();
+}
diff --git a/arch/powerpc/boot/types.h b/arch/powerpc/boot/types.h
new file mode 100644
index 0000000000..8a4c418b72
--- /dev/null
+++ b/arch/powerpc/boot/types.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TYPES_H_
+#define _TYPES_H_
+
+#include <stdbool.h>
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+typedef unsigned char		u8;
+typedef unsigned short		u16;
+typedef unsigned int		u32;
+typedef unsigned long long	u64;
+typedef signed char		s8;
+typedef short			s16;
+typedef int			s32;
+typedef long long		s64;
+
+/* required for opal-api.h */
+typedef u8  uint8_t;
+typedef u16 uint16_t;
+typedef u32 uint32_t;
+typedef u64 uint64_t;
+typedef s8  int8_t;
+typedef s16 int16_t;
+typedef s32 int32_t;
+typedef s64 int64_t;
+
+#define min(x,y) ({ \
+	typeof(x) _x = (x);	\
+	typeof(y) _y = (y);	\
+	(void) (&_x == &_y);	\
+	_x < _y ? _x : _y; })
+
+#define max(x,y) ({ \
+	typeof(x) _x = (x);	\
+	typeof(y) _y = (y);	\
+	(void) (&_x == &_y);	\
+	_x > _y ? _x : _y; })
+
+#define min_t(type, a, b) min(((type) a), ((type) b))
+#define max_t(type, a, b) max(((type) a), ((type) b))
+
+typedef int bool;
+
+#ifndef true
+#define true 1
+#endif
+
+#ifndef false
+#define false 0
+#endif
+#endif /* _TYPES_H_ */
diff --git a/arch/powerpc/boot/ugecon.c b/arch/powerpc/boot/ugecon.c
new file mode 100644
index 0000000000..938a38bd40
--- /dev/null
+++ b/arch/powerpc/boot/ugecon.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/boot/ugecon.c
+ *
+ * USB Gecko bootwrapper console.
+ * Copyright (C) 2008-2009 The GameCube Linux Team
+ * Copyright (C) 2008,2009 Albert Herranz
+ */
+
+#include <stddef.h>
+#include "stdio.h"
+#include "types.h"
+#include "io.h"
+#include "ops.h"
+
+
+#define EXI_CLK_32MHZ           5
+
+#define EXI_CSR                 0x00
+#define   EXI_CSR_CLKMASK       (0x7<<4)
+#define     EXI_CSR_CLK_32MHZ   (EXI_CLK_32MHZ<<4)
+#define   EXI_CSR_CSMASK        (0x7<<7)
+#define     EXI_CSR_CS_0        (0x1<<7)  /* Chip Select 001 */
+
+#define EXI_CR                  0x0c
+#define   EXI_CR_TSTART         (1<<0)
+#define   EXI_CR_WRITE		(1<<2)
+#define   EXI_CR_READ_WRITE     (2<<2)
+#define   EXI_CR_TLEN(len)      (((len)-1)<<4)
+
+#define EXI_DATA                0x10
+
+
+/* virtual address base for input/output, retrieved from device tree */
+static void *ug_io_base;
+
+
+static u32 ug_io_transaction(u32 in)
+{
+	u32 *csr_reg = ug_io_base + EXI_CSR;
+	u32 *data_reg = ug_io_base + EXI_DATA;
+	u32 *cr_reg = ug_io_base + EXI_CR;
+	u32 csr, data, cr;
+
+	/* select */
+	csr = EXI_CSR_CLK_32MHZ | EXI_CSR_CS_0;
+	out_be32(csr_reg, csr);
+
+	/* read/write */
+	data = in;
+	out_be32(data_reg, data);
+	cr = EXI_CR_TLEN(2) | EXI_CR_READ_WRITE | EXI_CR_TSTART;
+	out_be32(cr_reg, cr);
+
+	while (in_be32(cr_reg) & EXI_CR_TSTART)
+		barrier();
+
+	/* deselect */
+	out_be32(csr_reg, 0);
+
+	data = in_be32(data_reg);
+	return data;
+}
+
+static int ug_is_txfifo_ready(void)
+{
+	return ug_io_transaction(0xc0000000) & 0x04000000;
+}
+
+static void ug_raw_putc(char ch)
+{
+	ug_io_transaction(0xb0000000 | (ch << 20));
+}
+
+static void ug_putc(char ch)
+{
+	int count = 16;
+
+	if (!ug_io_base)
+		return;
+
+	while (!ug_is_txfifo_ready() && count--)
+		barrier();
+	if (count >= 0)
+		ug_raw_putc(ch);
+}
+
+void ug_console_write(const char *buf, int len)
+{
+	char *b = (char *)buf;
+
+	while (len--) {
+		if (*b == '\n')
+			ug_putc('\r');
+		ug_putc(*b++);
+	}
+}
+
+static int ug_is_adapter_present(void)
+{
+	if (!ug_io_base)
+		return 0;
+	return ug_io_transaction(0x90000000) == 0x04700000;
+}
+
+static void *ug_grab_exi_io_base(void)
+{
+	u32 v;
+	void *devp;
+
+	devp = find_node_by_compatible(NULL, "nintendo,flipper-exi");
+	if (devp == NULL)
+		goto err_out;
+	if (getprop(devp, "virtual-reg", &v, sizeof(v)) != sizeof(v))
+		goto err_out;
+
+	return (void *)v;
+
+err_out:
+	return NULL;
+}
+
+void *ug_probe(void)
+{
+	void *exi_io_base;
+	int i;
+
+	exi_io_base = ug_grab_exi_io_base();
+	if (!exi_io_base)
+		return NULL;
+
+	/* look for a usbgecko on memcard slots A and B */
+	for (i = 0; i < 2; i++) {
+		ug_io_base = exi_io_base + 0x14 * i;
+		if (ug_is_adapter_present())
+			break;
+	}
+	if (i == 2)
+		ug_io_base = NULL;
+	return ug_io_base;
+}
+
diff --git a/arch/powerpc/boot/ugecon.h b/arch/powerpc/boot/ugecon.h
new file mode 100644
index 0000000000..291f33f776
--- /dev/null
+++ b/arch/powerpc/boot/ugecon.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * arch/powerpc/boot/ugecon.h
+ *
+ * USB Gecko early bootwrapper console.
+ * Copyright (C) 2008-2009 The GameCube Linux Team
+ * Copyright (C) 2008,2009 Albert Herranz
+ */
+
+#ifndef __UGECON_H
+#define __UGECON_H
+
+extern void *ug_probe(void);
+
+extern void ug_putc(char ch);
+extern void ug_console_write(const char *buf, int len);
+
+#endif /* __UGECON_H */
+
diff --git a/arch/powerpc/boot/util.S b/arch/powerpc/boot/util.S
new file mode 100644
index 0000000000..6a92376daf
--- /dev/null
+++ b/arch/powerpc/boot/util.S
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copied from <file:arch/powerpc/kernel/misc_32.S>
+ *
+ * This file contains miscellaneous low-level functions.
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
+ * and Paul Mackerras.
+ *
+ * kexec bits:
+ * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
+ * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ */
+#include "ppc_asm.h"
+
+#define SPRN_PVR        0x11F   /* Processor Version Register */
+
+	.text
+
+/* udelay needs to know the period of the
+ * timebase in nanoseconds.  This used to be hardcoded to be 60ns
+ * (period of 66MHz/4).  Now a variable is used that is initialized to
+ * 60 for backward compatibility, but it can be overridden as necessary
+ * with code something like this:
+ *    extern unsigned long timebase_period_ns;
+ *    timebase_period_ns = 1000000000 / bd->bi_tbfreq;
+ */
+	.data
+	.globl timebase_period_ns
+timebase_period_ns:
+	.long	60
+
+	.text
+/*
+ * Delay for a number of microseconds
+ */
+	.globl	udelay
+udelay:
+	mulli	r4,r3,1000	/* nanoseconds */
+	/*  Change r4 to be the number of ticks using:
+	 *	(nanoseconds + (timebase_period_ns - 1 )) / timebase_period_ns
+	 *  timebase_period_ns defaults to 60 (16.6MHz) */
+	mflr	r5
+	bcl	20,31,0f
+0:	mflr	r6
+	mtlr	r5
+	addis	r5,r6,(timebase_period_ns-0b)@ha
+	lwz	r5,(timebase_period_ns-0b)@l(r5)
+	add	r4,r4,r5
+	addi	r4,r4,-1
+	divw	r4,r4,r5	/* BUS ticks */
+1:	MFTBU(r5)
+	MFTBL(r6)
+	MFTBU(r7)
+	cmpw	0,r5,r7
+	bne	1b		/* Get [synced] base time */
+	addc	r9,r6,r4	/* Compute end time */
+	addze	r8,r5
+2:	MFTBU(r5)
+	cmpw	0,r5,r8
+	blt	2b
+	bgt	3f
+	MFTBL(r6)
+	cmpw	0,r6,r9
+	blt	2b
+3:	blr
diff --git a/arch/powerpc/boot/wii-head.S b/arch/powerpc/boot/wii-head.S
new file mode 100644
index 0000000000..7b1e5a019f
--- /dev/null
+++ b/arch/powerpc/boot/wii-head.S
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * arch/powerpc/boot/wii-head.S
+ *
+ * Nintendo Wii bootwrapper entry.
+ * Copyright (C) 2008-2009 The GameCube Linux Team
+ * Copyright (C) 2008,2009 Albert Herranz
+ */
+
+#include "ppc_asm.h"
+
+/*
+ * The entry code does no assumptions regarding:
+ * - if the data and instruction caches are enabled or not
+ * - if the MMU is enabled or not
+ * - if the high BATs are enabled or not
+ *
+ * We enable the high BATs, enable the caches if not already enabled,
+ * enable the MMU with an identity mapping scheme and jump to the start code.
+ */
+
+	.text
+
+	.globl _zimage_start
+_zimage_start:
+
+	/* turn the MMU off */
+	mfmsr	9
+	rlwinm	9, 9, 0, ~((1<<4)|(1<<5)) /* MSR_DR|MSR_IR */
+	bcl	20, 31, 1f
+1:
+	mflr	8
+	clrlwi	8, 8, 3		/* convert to a real address */
+	addi	8, 8, _mmu_off - 1b
+	mtsrr0	8
+	mtsrr1	9
+	rfi
+_mmu_off:
+	/* MMU disabled */
+
+	/* setup BATs */
+	isync
+	li      8, 0
+	mtspr	0x210, 8	/* IBAT0U */
+	mtspr	0x212, 8	/* IBAT1U */
+	mtspr	0x214, 8	/* IBAT2U */
+	mtspr	0x216, 8	/* IBAT3U */
+	mtspr	0x218, 8	/* DBAT0U */
+	mtspr	0x21a, 8	/* DBAT1U */
+	mtspr	0x21c, 8	/* DBAT2U */
+	mtspr	0x21e, 8	/* DBAT3U */
+
+	mtspr	0x230, 8	/* IBAT4U */
+	mtspr	0x232, 8	/* IBAT5U */
+	mtspr	0x234, 8	/* IBAT6U */
+	mtspr	0x236, 8	/* IBAT7U */
+	mtspr	0x238, 8	/* DBAT4U */
+	mtspr	0x23a, 8	/* DBAT5U */
+	mtspr	0x23c, 8	/* DBAT6U */
+	mtspr	0x23e, 8	/* DBAT7U */
+
+	li	8, 0x01ff	/* first 16MiB */
+	li	9, 0x0002	/* rw */
+	mtspr	0x211, 9	/* IBAT0L */
+	mtspr	0x210, 8	/* IBAT0U */
+	mtspr	0x219, 9	/* DBAT0L */
+	mtspr	0x218, 8	/* DBAT0U */
+
+	lis	8, 0x0c00	/* I/O mem */
+	ori	8, 8, 0x3ff	/* 32MiB */
+	lis	9, 0x0c00
+	ori	9, 9, 0x002a	/* uncached, guarded, rw */
+	mtspr	0x21b, 9	/* DBAT1L */
+	mtspr	0x21a, 8	/* DBAT1U */
+
+	lis	8, 0x0100	/* next 8MiB */
+	ori	8, 8, 0x00ff	/* 8MiB */
+	lis	9, 0x0100
+	ori	9, 9, 0x0002	/* rw */
+	mtspr	0x215, 9	/* IBAT2L */
+	mtspr	0x214, 8	/* IBAT2U */
+	mtspr	0x21d, 9	/* DBAT2L */
+	mtspr	0x21c, 8	/* DBAT2U */
+
+	lis	8, 0x1000	/* MEM2 */
+	ori	8, 8, 0x07ff	/* 64MiB */
+	lis	9, 0x1000
+	ori	9, 9, 0x0002	/* rw */
+	mtspr	0x216, 8	/* IBAT3U */
+	mtspr	0x217, 9	/* IBAT3L */
+	mtspr	0x21e, 8	/* DBAT3U */
+	mtspr	0x21f, 9	/* DBAT3L */
+
+	/* enable the high BATs */
+	mfspr	8, 0x3f3	/* HID4 */
+	oris	8, 8, 0x0200
+	mtspr	0x3f3, 8	/* HID4 */
+
+	/* enable and invalidate the caches if not already enabled */
+	mfspr	8, 0x3f0	/* HID0 */
+	andi.	0, 8, (1<<15)		/* HID0_ICE */
+	bne	1f
+	ori	8, 8, (1<<15)|(1<<11)	/* HID0_ICE|HID0_ICFI*/
+1:
+	andi.	0, 8, (1<<14)		/* HID0_DCE */
+	bne	1f
+	ori	8, 8, (1<<14)|(1<<10)	/* HID0_DCE|HID0_DCFI*/
+1:
+	mtspr	0x3f0, 8	/* HID0 */
+	isync
+
+	/* initialize arguments */
+	li	3, 0
+	li	4, 0
+	li	5, 0
+
+	/* turn the MMU on */
+	bcl	20, 31, 1f
+1:
+	mflr	8
+	addi	8, 8, _mmu_on - 1b
+	mfmsr	9
+	ori	9, 9, (1<<4)|(1<<5) /* MSR_DR|MSR_IR */
+	mtsrr0	8
+	mtsrr1	9
+	sync
+	rfi
+_mmu_on:
+	/* turn on the front blue led (aka: yay! we got here!) */
+	lis	8, 0x0d00
+	ori	8, 8, 0x00c0
+	lwz	9, 0(8)
+	ori	9, 9, 0x20
+	stw	9, 0(8)
+
+	b _zimage_start_lib
+
diff --git a/arch/powerpc/boot/wii.c b/arch/powerpc/boot/wii.c
new file mode 100644
index 0000000000..59406ad046
--- /dev/null
+++ b/arch/powerpc/boot/wii.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/boot/wii.c
+ *
+ * Nintendo Wii bootwrapper support
+ * Copyright (C) 2008-2009 The GameCube Linux Team
+ * Copyright (C) 2008,2009 Albert Herranz
+ */
+
+#include <stddef.h>
+#include "stdio.h"
+#include "types.h"
+#include "io.h"
+#include "ops.h"
+
+#include "ugecon.h"
+
+BSS_STACK(8192);
+
+#define HW_REG(x)		((void *)(x))
+
+#define EXI_CTRL		HW_REG(0x0d800070)
+#define EXI_CTRL_ENABLE		(1<<0)
+
+#define MEM2_TOP		(0x10000000 + 64*1024*1024)
+#define FIRMWARE_DEFAULT_SIZE	(12*1024*1024)
+
+
+struct mipc_infohdr {
+	char magic[3];
+	u8 version;
+	u32 mem2_boundary;
+	u32 ipc_in;
+	size_t ipc_in_size;
+	u32 ipc_out;
+	size_t ipc_out_size;
+};
+
+static int mipc_check_address(u32 pa)
+{
+	/* only MEM2 addresses */
+	if (pa < 0x10000000 || pa > 0x14000000)
+		return -EINVAL;
+	return 0;
+}
+
+static struct mipc_infohdr *mipc_get_infohdr(void)
+{
+	struct mipc_infohdr **hdrp, *hdr;
+
+	/* 'mini' header pointer is the last word of MEM2 memory */
+	hdrp = (struct mipc_infohdr **)0x13fffffc;
+	if (mipc_check_address((u32)hdrp)) {
+		printf("mini: invalid hdrp %08X\n", (u32)hdrp);
+		hdr = NULL;
+		goto out;
+	}
+
+	hdr = *hdrp;
+	if (mipc_check_address((u32)hdr)) {
+		printf("mini: invalid hdr %08X\n", (u32)hdr);
+		hdr = NULL;
+		goto out;
+	}
+	if (memcmp(hdr->magic, "IPC", 3)) {
+		printf("mini: invalid magic\n");
+		hdr = NULL;
+		goto out;
+	}
+
+out:
+	return hdr;
+}
+
+static int mipc_get_mem2_boundary(u32 *mem2_boundary)
+{
+	struct mipc_infohdr *hdr;
+	int error;
+
+	hdr = mipc_get_infohdr();
+	if (!hdr) {
+		error = -1;
+		goto out;
+	}
+
+	if (mipc_check_address(hdr->mem2_boundary)) {
+		printf("mini: invalid mem2_boundary %08X\n",
+		       hdr->mem2_boundary);
+		error = -EINVAL;
+		goto out;
+	}
+	*mem2_boundary = hdr->mem2_boundary;
+	error = 0;
+out:
+	return error;
+
+}
+
+static void platform_fixups(void)
+{
+	void *mem;
+	u32 reg[4];
+	u32 mem2_boundary;
+	int len;
+	int error;
+
+	mem = finddevice("/memory");
+	if (!mem)
+		fatal("Can't find memory node\n");
+
+	/* two ranges of (address, size) words */
+	len = getprop(mem, "reg", reg, sizeof(reg));
+	if (len != sizeof(reg)) {
+		/* nothing to do */
+		goto out;
+	}
+
+	/* retrieve MEM2 boundary from 'mini' */
+	error = mipc_get_mem2_boundary(&mem2_boundary);
+	if (error) {
+		/* if that fails use a sane value */
+		mem2_boundary = MEM2_TOP - FIRMWARE_DEFAULT_SIZE;
+	}
+
+	if (mem2_boundary > reg[2] && mem2_boundary < reg[2] + reg[3]) {
+		reg[3] = mem2_boundary - reg[2];
+		printf("top of MEM2 @ %08X\n", reg[2] + reg[3]);
+		setprop(mem, "reg", reg, sizeof(reg));
+	}
+
+out:
+	return;
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5)
+{
+	u32 heapsize = 24*1024*1024 - (u32)_end;
+
+	simple_alloc_init(_end, heapsize, 32, 64);
+	fdt_init(_dtb_start);
+
+	/*
+	 * 'mini' boots the Broadway processor with EXI disabled.
+	 * We need it enabled before probing for the USB Gecko.
+	 */
+	out_be32(EXI_CTRL, in_be32(EXI_CTRL) | EXI_CTRL_ENABLE);
+
+	if (ug_probe())
+		console_ops.write = ug_console_write;
+
+	platform_ops.fixups = platform_fixups;
+}
+
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
new file mode 100755
index 0000000000..352d7de240
--- /dev/null
+++ b/arch/powerpc/boot/wrapper
@@ -0,0 +1,608 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+# Copyright (C) 2006 Paul Mackerras, IBM Corporation <paulus@samba.org>
+
+# This script takes a kernel binary and optionally an initrd image
+# and/or a device-tree blob, and creates a bootable zImage for a
+# given platform.
+
+# Options:
+# -o zImage	specify output file
+# -p platform	specify platform (links in $platform.o)
+# -i initrd	specify initrd file
+# -d devtree	specify device-tree blob
+# -s tree.dts	specify device-tree source file (needs dtc installed)
+# -e esm_blob   specify ESM blob for secure images
+# -c		cache $kernel.strip.gz (use if present & newer, else make)
+# -C prefix	specify command prefix for cross-building tools
+#		(strip, objcopy, ld)
+# -D dir	specify directory containing data files used by script
+#		(default ./arch/powerpc/boot)
+# -W dir	specify working directory for temporary files (default .)
+# -z		use gzip (legacy)
+# -Z zsuffix    compression to use (gz, xz or none)
+
+# Stop execution if any command fails
+set -e
+
+export LC_ALL=C
+
+# Allow for verbose output
+if [ "$V" = 1 ]; then
+    set -x
+    map="-Map wrapper.map"
+fi
+
+# defaults
+kernel=
+ofile=zImage
+platform=of
+initrd=
+dtb=
+dts=
+esm_blob=
+cacheit=
+binary=
+compression=.gz
+uboot_comp=gzip
+pie=
+format=
+notext=
+rodynamic=
+
+# cross-compilation prefix
+CROSS=
+
+# mkimage wrapper script
+MKIMAGE=$srctree/scripts/mkuboot.sh
+
+# directory for object and other files used by this script
+object=arch/powerpc/boot
+objbin=$object
+dtc=scripts/dtc/dtc
+
+# directory for working files
+tmpdir=.
+
+usage() {
+    echo 'Usage: wrapper [-o output] [-p platform] [-i initrd]' >&2
+    echo '       [-d devtree] [-s tree.dts] [-e esm_blob]' >&2
+    echo '       [-c] [-C cross-prefix] [-D datadir] [-W workingdir]' >&2
+    echo '       [-Z (gz|xz|none)] [--no-compression] [vmlinux]' >&2
+    exit 1
+}
+
+run_cmd() {
+    if [ "$V" = 1 ]; then
+        $* 2>&1
+    else
+        local msg
+
+        set +e
+        msg=$($* 2>&1)
+
+        if [ $? -ne "0" ]; then
+                echo $msg
+                exit 1
+        fi
+        set -e
+    fi
+}
+
+while [ "$#" -gt 0 ]; do
+    case "$1" in
+    -o)
+	shift
+	[ "$#" -gt 0 ] || usage
+	ofile="$1"
+	;;
+    -p)
+	shift
+	[ "$#" -gt 0 ] || usage
+	platform="$1"
+	;;
+    -i)
+	shift
+	[ "$#" -gt 0 ] || usage
+	initrd="$1"
+	;;
+    -d)
+	shift
+	[ "$#" -gt 0 ] || usage
+	dtb="$1"
+	;;
+    -e)
+	shift
+	[ "$#" -gt 0 ] || usage
+	esm_blob="$1"
+	;;
+    -s)
+	shift
+	[ "$#" -gt 0 ] || usage
+	dts="$1"
+	;;
+    -c)
+	cacheit=y
+	;;
+    -C)
+	shift
+	[ "$#" -gt 0 ] || usage
+	CROSS="$1"
+	;;
+    -D)
+	shift
+	[ "$#" -gt 0 ] || usage
+	object="$1"
+	objbin="$1"
+	;;
+    -W)
+	shift
+	[ "$#" -gt 0 ] || usage
+	tmpdir="$1"
+	;;
+    -z)
+	compression=.gz
+	uboot_comp=gzip
+	;;
+    -Z)
+	shift
+	[ "$#" -gt 0 ] || usage
+        [ "$1" != "gz" -o "$1" != "xz" -o "$1" != "lzma" -o "$1" != "lzo" -o "$1" != "none" ] || usage
+
+	compression=".$1"
+	uboot_comp=$1
+
+        if [ $compression = ".none" ]; then
+                compression=
+		uboot_comp=none
+        fi
+	if [ $uboot_comp = "gz" ]; then
+		uboot_comp=gzip
+	fi
+	;;
+    --no-gzip)
+        # a "feature" of the wrapper script is that it can be used outside
+        # the kernel tree. So keeping this around for backwards compatibility.
+        compression=
+	uboot_comp=none
+        ;;
+    -?)
+	usage
+	;;
+    *)
+	[ -z "$kernel" ] || usage
+	kernel="$1"
+	;;
+    esac
+    shift
+done
+
+
+if [ -n "$dts" ]; then
+    if [ ! -r "$dts" -a -r "$object/dts/$dts" ]; then
+	dts="$object/dts/$dts"
+    fi
+    if [ -z "$dtb" ]; then
+	dtb="$platform.dtb"
+    fi
+    $dtc -O dtb -o "$dtb" -b 0 "$dts"
+fi
+
+if [ -z "$kernel" ]; then
+    kernel=vmlinux
+fi
+
+LC_ALL=C elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`"
+case "$elfformat" in
+    elf64-powerpcle)	format=elf64lppc	;;
+    elf64-powerpc)	format=elf32ppc	;;
+    elf32-powerpc)	format=elf32ppc	;;
+esac
+
+ld_version()
+{
+    # Poached from scripts/ld-version.sh, but we don't want to call that because
+    # this script (wrapper) is distributed separately from the kernel source.
+    # Extract linker version number from stdin and turn into single number.
+    awk '{
+	gsub(".*\\)", "");
+	gsub(".*version ", "");
+	gsub("-.*", "");
+	split($1,a, ".");
+	if( length(a[3]) == "8" )
+		# a[3] is probably a date of format yyyymmdd used for release snapshots. We
+		# can assume it to be zero as it does not signify a new version as such.
+		a[3] = 0;
+	print a[1]*100000000 + a[2]*1000000 + a[3]*10000;
+	exit
+    }'
+}
+
+ld_is_lld()
+{
+	${CROSS}ld -V 2>&1 | grep -q LLD
+}
+
+# Do not include PT_INTERP segment when linking pie. Non-pie linking
+# just ignores this option.
+LD_VERSION=$(${CROSS}ld --version | ld_version)
+LD_NO_DL_MIN_VERSION=$(echo 2.26 | ld_version)
+if [ "$LD_VERSION" -ge "$LD_NO_DL_MIN_VERSION" ] ; then
+	nodl="--no-dynamic-linker"
+fi
+
+# suppress some warnings in recent ld versions
+nowarn="-z noexecstack"
+if ! ld_is_lld; then
+	if [ "$LD_VERSION" -ge "$(echo 2.39 | ld_version)" ]; then
+		nowarn="$nowarn --no-warn-rwx-segments"
+	fi
+fi
+
+platformo=$object/"$platform".o
+lds=$object/zImage.lds
+ext=strip
+objflags=-S
+tmp=$tmpdir/zImage.$$.o
+ksection=.kernel:vmlinux.strip
+isection=.kernel:initrd
+esection=.kernel:esm_blob
+link_address='0x400000'
+make_space=y
+
+
+if [ -n "$esm_blob" -a "$platform" != "pseries" ]; then
+    echo "ESM blob not support on non-pseries platforms" >&2
+    exit 1
+fi
+
+case "$platform" in
+of)
+    platformo="$object/of.o $object/epapr.o"
+    make_space=n
+    ;;
+pseries)
+    platformo="$object/pseries-head.o $object/of.o $object/epapr.o"
+    link_address='0x4000000'
+    if [ "$format" != "elf32ppc" ]; then
+	link_address=
+	pie=-pie
+    fi
+    make_space=n
+    ;;
+maple)
+    platformo="$object/of.o $object/epapr.o"
+    link_address='0x400000'
+    make_space=n
+    ;;
+pmac|chrp)
+    platformo="$object/of.o $object/epapr.o"
+    make_space=n
+    ;;
+coff)
+    platformo="$object/crt0.o $object/of.o $object/epapr.o"
+    lds=$object/zImage.coff.lds
+    link_address='0x500000'
+    make_space=n
+    pie=
+    ;;
+miboot|uboot*)
+    # miboot and U-boot want just the bare bits, not an ELF binary
+    ext=bin
+    objflags="-O binary"
+    tmp="$ofile"
+    ksection=image
+    isection=initrd
+    ;;
+cuboot*)
+    binary=y
+    compression=
+    case "$platform" in
+    *-mpc866ads|*-mpc885ads|*-adder875*|*-ep88xc)
+        platformo=$object/cuboot-8xx.o
+        ;;
+    *5200*|*-motionpro)
+        platformo=$object/cuboot-52xx.o
+        ;;
+    *-pq2fads|*-ep8248e|*-mpc8272*|*-storcenter)
+        platformo=$object/cuboot-pq2.o
+        ;;
+    *-mpc824*)
+        platformo=$object/cuboot-824x.o
+        ;;
+    *-mpc83*|*-asp834x*)
+        platformo=$object/cuboot-83xx.o
+        ;;
+    *-tqm8541|*-mpc8560*|*-tqm8560|*-tqm8555|*-ksi8560*)
+        platformo=$object/cuboot-85xx-cpm2.o
+        ;;
+    *-mpc85*|*-tqm85*)
+        platformo=$object/cuboot-85xx.o
+        ;;
+    *-amigaone)
+        link_address='0x800000'
+        ;;
+    esac
+    ;;
+ps3)
+    platformo="$object/ps3-head.o $object/ps3-hvcall.o $object/ps3.o"
+    lds=$object/zImage.ps3.lds
+    compression=
+    ext=bin
+    objflags="-O binary --set-section-flags=.bss=contents,alloc,load,data"
+    ksection=.kernel:vmlinux.bin
+    isection=.kernel:initrd
+    link_address=''
+    make_space=n
+    pie=
+    ;;
+ep88xc|ep405|ep8248e)
+    platformo="$object/fixed-head.o $object/$platform.o"
+    binary=y
+    ;;
+adder875-redboot)
+    platformo="$object/fixed-head.o $object/redboot-8xx.o"
+    binary=y
+    ;;
+simpleboot-*)
+    platformo="$object/fixed-head.o $object/simpleboot.o"
+    binary=y
+    ;;
+asp834x-redboot)
+    platformo="$object/fixed-head.o $object/redboot-83xx.o"
+    binary=y
+    ;;
+xpedite52*)
+    link_address='0x1400000'
+    platformo=$object/cuboot-85xx.o
+    ;;
+gamecube|wii)
+    link_address='0x600000'
+    platformo="$object/$platform-head.o $object/$platform.o"
+    ;;
+microwatt)
+    link_address='0x500000'
+    platformo="$object/fixed-head.o $object/$platform.o"
+    binary=y
+    ;;
+treeboot-currituck)
+    link_address='0x1000000'
+    ;;
+treeboot-akebono)
+    link_address='0x1000000'
+    ;;
+treeboot-iss4xx-mpic)
+    platformo="$object/treeboot-iss4xx.o"
+    ;;
+epapr)
+    platformo="$object/pseries-head.o $object/epapr.o $object/epapr-wrapper.o"
+    link_address='0x20000000'
+    pie=-pie
+    notext='-z notext'
+    rodynamic=$(if ${CROSS}ld -V 2>&1 | grep -q LLD ; then echo "-z rodynamic"; fi)
+    ;;
+mvme5100)
+    platformo="$object/fixed-head.o $object/mvme5100.o"
+    binary=y
+    ;;
+mvme7100)
+    platformo="$object/motload-head.o $object/mvme7100.o"
+    link_address='0x4000000'
+    binary=y
+    ;;
+esac
+
+vmz="$tmpdir/`basename \"$kernel\"`.$ext"
+
+# Calculate the vmlinux.strip size
+${CROSS}objcopy $objflags "$kernel" "$vmz.$$"
+strip_size=$(${CONFIG_SHELL} "${srctree}/scripts/file-size.sh" "$vmz.$$")
+
+if [ -z "$cacheit" -o ! -f "$vmz$compression" -o "$vmz$compression" -ot "$kernel" ]; then
+    # recompress the image if we need to
+    case $compression in
+    .xz)
+        xz --check=crc32 -f -6 "$vmz.$$"
+        ;;
+    .gz)
+        gzip -n -f -9 "$vmz.$$"
+        ;;
+    .lzma)
+        xz --format=lzma -f -6 "$vmz.$$"
+	;;
+    .lzo)
+        lzop -f -9 "$vmz.$$"
+	;;
+    *)
+        # drop the compression suffix so the stripped vmlinux is used
+        compression=
+	uboot_comp=none
+	;;
+    esac
+
+    if [ -n "$cacheit" ]; then
+	mv -f "$vmz.$$$compression" "$vmz$compression"
+    else
+	vmz="$vmz.$$"
+    fi
+else
+    rm -f $vmz.$$
+fi
+
+vmz="$vmz$compression"
+
+if [ "$make_space" = "y" ]; then
+	# Round the size to next higher MB limit
+	round_size=$(((strip_size + 0xfffff) & 0xfff00000))
+
+	round_size=0x$(printf "%x" $round_size)
+	link_addr=$(printf "%d" $link_address)
+
+	if [ $link_addr -lt $strip_size ]; then
+	    echo "INFO: Uncompressed kernel (size 0x$(printf "%x\n" $strip_size))" \
+			"overlaps the address of the wrapper($link_address)"
+	    echo "INFO: Fixing the link_address of wrapper to ($round_size)"
+	    link_address=$round_size
+	fi
+fi
+
+# Extract kernel version information, some platforms want to include
+# it in the image header
+version=`${CROSS}strings "$kernel" | grep '^Linux version [-0-9.]' | \
+    head -n1 | cut -d' ' -f3`
+if [ -n "$version" ]; then
+    uboot_version="-n Linux-$version"
+fi
+
+# physical offset of kernel image
+membase=`${CROSS}objdump -p "$kernel" | grep -m 1 LOAD | awk '{print $7}'`
+
+case "$platform" in
+uboot)
+    rm -f "$ofile"
+    ${MKIMAGE} -A ppc -O linux -T kernel -C $uboot_comp -a $membase -e $membase \
+	$uboot_version -d "$vmz" "$ofile"
+    if [ -z "$cacheit" ]; then
+	rm -f "$vmz"
+    fi
+    exit 0
+    ;;
+uboot-obs600)
+    rm -f "$ofile"
+    # obs600 wants a multi image with an initrd, so we need to put a fake
+    # one in even when building a "normal" image.
+    if [ -n "$initrd" ]; then
+	real_rd="$initrd"
+    else
+	real_rd=`mktemp`
+	echo "\0" >>"$real_rd"
+    fi
+    ${MKIMAGE} -A ppc -O linux -T multi -C gzip -a $membase -e $membase \
+	$uboot_version -d "$vmz":"$real_rd":"$dtb" "$ofile"
+    if [ -z "$initrd" ]; then
+	rm -f "$real_rd"
+    fi
+    if [ -z "$cacheit" ]; then
+	rm -f "$vmz"
+    fi
+    exit 0
+    ;;
+esac
+
+addsec() {
+    ${CROSS}objcopy $4 $1 \
+	--add-section=$3="$2" \
+	--set-section-flags=$3=contents,alloc,load,readonly,data
+}
+
+addsec $tmp "$vmz" $ksection $object/empty.o
+if [ -z "$cacheit" ]; then
+    rm -f "$vmz"
+fi
+
+if [ -n "$initrd" ]; then
+    addsec $tmp "$initrd" $isection
+fi
+
+if [ -n "$dtb" ]; then
+    addsec $tmp "$dtb" .kernel:dtb
+    if [ -n "$dts" ]; then
+	rm $dtb
+    fi
+fi
+
+if [ -n "$esm_blob" ]; then
+    addsec $tmp "$esm_blob" $esection
+fi
+
+if [ "$platform" != "miboot" ]; then
+    if [ -n "$link_address" ] ; then
+        text_start="-Ttext $link_address"
+    fi
+#link everything
+    ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $nowarn $rodynamic $notext -o "$ofile" $map \
+	$platformo $tmp $object/wrapper.a
+    rm $tmp
+fi
+
+# Some platforms need the zImage's entry point and base address
+base=0x`${CROSS}nm "$ofile" | grep ' _start$' | cut -d' ' -f1`
+entry=`${CROSS}objdump -f "$ofile" | grep '^start address ' | cut -d' ' -f3`
+
+if [ -n "$binary" ]; then
+    mv "$ofile" "$ofile".elf
+    ${CROSS}objcopy -O binary "$ofile".elf "$ofile"
+fi
+
+# post-processing needed for some platforms
+case "$platform" in
+pseries|chrp|maple)
+    $objbin/addnote "$ofile"
+    ;;
+coff)
+    ${CROSS}objcopy -O aixcoff-rs6000 --set-start "$entry" "$ofile"
+    $objbin/hack-coff "$ofile"
+    ;;
+cuboot*)
+    gzip -n -f -9 "$ofile"
+    ${MKIMAGE} -A ppc -O linux -T kernel -C gzip -a "$base" -e "$entry" \
+            $uboot_version -d "$ofile".gz "$ofile"
+    ;;
+treeboot*)
+    mv "$ofile" "$ofile.elf"
+    $objbin/mktree "$ofile.elf" "$ofile" "$base" "$entry"
+    if [ -z "$cacheit" ]; then
+	rm -f "$ofile.elf"
+    fi
+    exit 0
+    ;;
+ps3)
+    # The ps3's loader supports loading a gzipped binary image from flash
+    # rom to ram addr zero. The loader then enters the system reset
+    # vector at addr 0x100.  A bootwrapper overlay is used to arrange for
+    # a binary image of the kernel to be at addr zero, and yet have a
+    # suitable bootwrapper entry at 0x100.  To construct the final rom
+    # image 512 bytes from offset 0x100 is copied to the bootwrapper
+    # place holder at symbol __system_reset_kernel.  The 512 bytes of the
+    # bootwrapper entry code at symbol __system_reset_overlay is then
+    # copied to offset 0x100.  At runtime the bootwrapper program copies
+    # the data at __system_reset_kernel back to addr 0x100.
+
+    system_reset_overlay=0x`${CROSS}nm "$ofile" \
+        | grep ' __system_reset_overlay$'       \
+        | cut -d' ' -f1`
+    system_reset_overlay=`printf "%d" $system_reset_overlay`
+    system_reset_kernel=0x`${CROSS}nm "$ofile" \
+        | grep ' __system_reset_kernel$'       \
+        | cut -d' ' -f1`
+    system_reset_kernel=`printf "%d" $system_reset_kernel`
+    overlay_dest="256"
+    overlay_size="512"
+
+    ${CROSS}objcopy -O binary "$ofile" "$ofile.bin"
+
+    run_cmd dd if="$ofile.bin" of="$ofile.bin" conv=notrunc   \
+        skip=$overlay_dest seek=$system_reset_kernel          \
+        count=$overlay_size bs=1
+
+    run_cmd dd if="$ofile.bin" of="$ofile.bin" conv=notrunc   \
+        skip=$system_reset_overlay seek=$overlay_dest         \
+        count=$overlay_size bs=1
+
+    odir="$(dirname "$ofile.bin")"
+
+    # The ps3's flash loader has a size limit of 16 MiB for the uncompressed
+    # image.  If a compressed image that exceeded this limit is written to
+    # flash the loader will decompress that image until the 16 MiB limit is
+    # reached, then enter the system reset vector of the partially decompressed
+    # image.  No warning is issued.
+    rm -f "$odir"/{otheros,otheros-too-big}.bld
+    size=$(${CROSS}nm --no-sort --radix=d "$ofile" | grep -E ' _end$' | cut -d' ' -f1)
+    bld="otheros.bld"
+    if [ $size -gt $((0x1000000)) ]; then
+        bld="otheros-too-big.bld"
+    fi
+    gzip -n --force -9 --stdout "$ofile.bin" > "$odir/$bld"
+    ;;
+esac
diff --git a/arch/powerpc/boot/xz_config.h b/arch/powerpc/boot/xz_config.h
new file mode 100644
index 0000000000..ebfadd39e1
--- /dev/null
+++ b/arch/powerpc/boot/xz_config.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __XZ_CONFIG_H__
+#define __XZ_CONFIG_H__
+
+/*
+ * most of this is copied from lib/xz/xz_private.h, we can't use their defines
+ * since the boot wrapper is not built in the same environment as the rest of
+ * the kernel.
+ */
+
+#include "types.h"
+#include "swab.h"
+
+static inline uint32_t swab32p(void *p)
+{
+	uint32_t *q = p;
+
+	return swab32(*q);
+}
+
+#ifdef __LITTLE_ENDIAN__
+#define get_le32(p) (*((uint32_t *) (p)))
+#define cpu_to_be32(x) swab32(x)
+static inline u32 be32_to_cpup(const u32 *p)
+{
+	return swab32p((u32 *)p);
+}
+#else
+#define get_le32(p) swab32p(p)
+#define cpu_to_be32(x) (x)
+static inline u32 be32_to_cpup(const u32 *p)
+{
+	return *p;
+}
+#endif
+
+static inline uint32_t get_unaligned_be32(const void *p)
+{
+	return be32_to_cpup(p);
+}
+
+static inline void put_unaligned_be32(u32 val, void *p)
+{
+	*((u32 *)p) = cpu_to_be32(val);
+}
+
+#define memeq(a, b, size) (memcmp(a, b, size) == 0)
+#define memzero(buf, size) memset(buf, 0, size)
+
+/* prevent the inclusion of the xz-preboot MM headers */
+#define DECOMPR_MM_H
+#define memmove memmove
+#define XZ_EXTERN static
+
+/* xz.h needs to be included directly since we need enum xz_mode */
+#include "../../../include/linux/xz.h"
+
+#undef XZ_EXTERN
+
+#endif
diff --git a/arch/powerpc/boot/zImage.coff.lds.S b/arch/powerpc/boot/zImage.coff.lds.S
new file mode 100644
index 0000000000..1179512951
--- /dev/null
+++ b/arch/powerpc/boot/zImage.coff.lds.S
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+OUTPUT_ARCH(powerpc:common)
+ENTRY(_zimage_start_opd)
+EXTERN(_zimage_start_opd)
+SECTIONS
+{
+  .text      :
+  {
+    _start = .;
+    *(.text)
+    *(.fixup)
+    _etext = .;
+  }
+  . = ALIGN(4096);
+  .data    :
+  {
+    *(.rodata*)
+    *(.data*)
+    *(__builtin_*)
+    *(.sdata*)
+    *(.got2)
+
+    _dtb_start = .;
+    *(.kernel:dtb)
+    _dtb_end = .;
+
+    _vmlinux_start =  .;
+    *(.kernel:vmlinux.strip)
+    _vmlinux_end =  .;
+
+    _initrd_start =  .;
+    *(.kernel:initrd)
+    _initrd_end =  .;
+  }
+
+  . = ALIGN(4096);
+  _edata  =  .;
+  __bss_start = .;
+  .bss       :
+  {
+   *(.sbss)
+   *(.bss)
+  }
+  _end = . ;
+
+  /DISCARD/ :
+  {
+    *(.comment)
+  }
+}
diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
new file mode 100644
index 0000000000..d65cd55a6f
--- /dev/null
+++ b/arch/powerpc/boot/zImage.lds.S
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm-generic/vmlinux.lds.h>
+
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
+OUTPUT_ARCH(powerpc:common64)
+#else
+OUTPUT_ARCH(powerpc:common)
+#endif
+ENTRY(_zimage_start)
+EXTERN(_zimage_start)
+SECTIONS
+{
+  .text      :
+  {
+    _start = .;
+    *(.text)
+    *(.fixup)
+    _etext = .;
+  }
+  . = ALIGN(4096);
+  .data    :
+  {
+    *(.rodata*)
+    *(.data*)
+    *(.sdata*)
+#ifndef CONFIG_PPC64_BOOT_WRAPPER
+    *(.got2)
+#endif
+  }
+  .dynsym : { *(.dynsym) }
+  .dynstr : { *(.dynstr) }
+  .dynamic :
+  {
+    __dynamic_start = .;
+    *(.dynamic)
+  }
+
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
+  .got : ALIGN(256)
+  {
+    *(.got .toc)
+  }
+#endif
+
+  .hash : { *(.hash) }
+  .interp : { *(.interp) }
+  .rela.dyn :
+  {
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
+    __rela_dyn_start = .;
+#endif
+    *(.rela*)
+  }
+
+  . = ALIGN(8);
+  .kernel:dtb :
+  {
+    _dtb_start = .;
+    *(.kernel:dtb)
+    _dtb_end = .;
+  }
+
+  . = ALIGN(4096);
+  .kernel:vmlinux.strip :
+  {
+    _vmlinux_start =  .;
+    *(.kernel:vmlinux.strip)
+    _vmlinux_end =  .;
+  }
+
+  . = ALIGN(4096);
+  .kernel:initrd :
+  {
+    _initrd_start =  .;
+    *(.kernel:initrd)
+    _initrd_end =  .;
+  }
+
+  . = ALIGN(4096);
+  .kernel:esm_blob :
+  {
+    _esm_blob_start =  .;
+    *(.kernel:esm_blob)
+    _esm_blob_end =  .;
+  }
+
+  . = ALIGN(4096);
+  .bss       :
+  {
+    _edata  =  .;
+    __bss_start = .;
+    *(.sbss)
+    *(.bss)
+    *(COMMON)
+    _end = . ;
+  }
+}
diff --git a/arch/powerpc/boot/zImage.ps3.lds.S b/arch/powerpc/boot/zImage.ps3.lds.S
new file mode 100644
index 0000000000..d0ffb49361
--- /dev/null
+++ b/arch/powerpc/boot/zImage.ps3.lds.S
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+OUTPUT_ARCH(powerpc:common)
+ENTRY(_zimage_start)
+EXTERN(_zimage_start)
+SECTIONS
+{
+  _vmlinux_start =  .;
+  .kernel:vmlinux.bin : { *(.kernel:vmlinux.bin) }
+  _vmlinux_end =  .;
+
+  . = ALIGN(8);
+  _dtb_start = .;
+  .kernel:dtb : { *(.kernel:dtb) }
+  _dtb_end = .;
+
+  . = ALIGN(4096);
+  _initrd_start =  .;
+  .kernel:initrd : { *(.kernel:initrd) }
+  _initrd_end =  .;
+
+  _start = .;
+  .text      :
+  {
+    *(.text)
+    *(.fixup)
+  }
+  _etext = .;
+  . = ALIGN(4096);
+  .data    :
+  {
+    *(.rodata*)
+    *(.data*)
+    *(.sdata*)
+    __got2_start = .;
+    *(.got2)
+    __got2_end = .;
+  }
+
+  . = ALIGN(4096);
+  _edata  =  .;
+
+  . = ALIGN(4096);
+  __bss_start = .;
+  .bss       :
+  {
+   *(.sbss)
+   *(.bss)
+  }
+  . = ALIGN(4096);
+  _end = . ;
+}
diff --git a/arch/powerpc/configs/32-bit.config b/arch/powerpc/configs/32-bit.config
new file mode 100644
index 0000000000..ad6546850c
--- /dev/null
+++ b/arch/powerpc/configs/32-bit.config
@@ -0,0 +1 @@
+# CONFIG_PPC64 is not set
diff --git a/arch/powerpc/configs/40x/acadia_defconfig b/arch/powerpc/configs/40x/acadia_defconfig
new file mode 100644
index 0000000000..25eed86ec5
--- /dev/null
+++ b/arch/powerpc/configs/40x/acadia_defconfig
@@ -0,0 +1,61 @@
+CONFIG_40x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_ACADIA=y
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=m
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+CONFIG_IBM_EMAC_RXB=256
+CONFIG_IBM_EMAC_TXB=256
+CONFIG_IBM_EMAC_DEBUG=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
diff --git a/arch/powerpc/configs/40x/kilauea_defconfig b/arch/powerpc/configs/40x/kilauea_defconfig
new file mode 100644
index 0000000000..3549c9e950
--- /dev/null
+++ b/arch/powerpc/configs/40x/kilauea_defconfig
@@ -0,0 +1,69 @@
+CONFIG_40x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_KILAUEA=y
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_MTD_NAND_NDFC=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+CONFIG_IBM_EMAC_RXB=256
+CONFIG_IBM_EMAC_TXB=256
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_IBM_IIC=y
+CONFIG_SENSORS_LM75=y
+CONFIG_THERMAL=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_EXT2_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
diff --git a/arch/powerpc/configs/40x/klondike_defconfig b/arch/powerpc/configs/40x/klondike_defconfig
new file mode 100644
index 0000000000..a974d1e945
--- /dev/null
+++ b/arch/powerpc/configs/40x/klondike_defconfig
@@ -0,0 +1,43 @@
+CONFIG_40x=y
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_APM8018X=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_MATH_EMULATION=y
+# CONFIG_SUSPEND is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_SAS_ATTRS=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_UNIX98_PTYS is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_FTRACE is not set
diff --git a/arch/powerpc/configs/40x/makalu_defconfig b/arch/powerpc/configs/40x/makalu_defconfig
new file mode 100644
index 0000000000..4563f88acf
--- /dev/null
+++ b/arch/powerpc/configs/40x/makalu_defconfig
@@ -0,0 +1,59 @@
+CONFIG_40x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_MAKALU=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=m
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+CONFIG_IBM_EMAC_RXB=256
+CONFIG_IBM_EMAC_TXB=256
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
diff --git a/arch/powerpc/configs/40x/obs600_defconfig b/arch/powerpc/configs/40x/obs600_defconfig
new file mode 100644
index 0000000000..2a2bb3f468
--- /dev/null
+++ b/arch/powerpc/configs/40x/obs600_defconfig
@@ -0,0 +1,69 @@
+CONFIG_40x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_OBS600=y
+CONFIG_MATH_EMULATION=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_MTD_NAND_NDFC=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+CONFIG_IBM_EMAC_RXB=256
+CONFIG_IBM_EMAC_TXB=256
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_IBM_IIC=y
+CONFIG_SENSORS_LM75=y
+CONFIG_THERMAL=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_EXT2_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
diff --git a/arch/powerpc/configs/40x/walnut_defconfig b/arch/powerpc/configs/40x/walnut_defconfig
new file mode 100644
index 0000000000..9eaaf1a1d2
--- /dev/null
+++ b/arch/powerpc/configs/40x/walnut_defconfig
@@ -0,0 +1,55 @@
+CONFIG_40x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=m
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+CONFIG_EXT2_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig
new file mode 100644
index 0000000000..fde4824f23
--- /dev/null
+++ b/arch/powerpc/configs/44x/akebono_defconfig
@@ -0,0 +1,133 @@
+CONFIG_44x=y
+CONFIG_SMP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_SLUB_CPU_PARTIAL is not set
+CONFIG_PROFILING=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PPC_47x=y
+# CONFIG_EBONY is not set
+CONFIG_AKEBONO=y
+CONFIG_HIGHMEM=y
+CONFIG_HZ_100=y
+CONFIG_IRQ_ALL_CPUS=y
+# CONFIG_COMPACTION is not set
+# CONFIG_SUSPEND is not set
+CONFIG_NET=y
+CONFIG_NETDEVICES=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
+# CONFIG_SATA_PMP is not set
+CONFIG_SATA_AHCI_PLATFORM=y
+# CONFIG_ATA_SFF is not set
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_NET_VENDOR_ADAPTEC is not set
+# CONFIG_NET_VENDOR_ALTEON is not set
+# CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_ATHEROS is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_CISCO is not set
+# CONFIG_NET_VENDOR_DEC is not set
+# CONFIG_NET_VENDOR_DLINK is not set
+# CONFIG_NET_VENDOR_EMULEX is not set
+CONFIG_IBM_EMAC=y
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MELLANOX is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NVIDIA is not set
+# CONFIG_NET_VENDOR_OKI is not set
+# CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_REALTEK is not set
+# CONFIG_NET_VENDOR_RDC is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_SILAN is not set
+# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SUN is not set
+# CONFIG_NET_VENDOR_TEHUTI is not set
+# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_XILINX is not set
+# CONFIG_KEYBOARD_ATKBD is not set
+# CONFIG_MOUSE_PS2 is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C_CHARDEV=y
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+# CONFIG_USB_DEFAULT_PERSIST is not set
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+# CONFIG_USB_OHCI_HCD_PCI is not set
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_M41T80=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY_USER is not set
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_NLS_DEFAULT="n"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
+CONFIG_PPC_EARLY_DEBUG=y
+CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW=0x00010000
+CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH=0x33f
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_SHA1_PPC=y
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/44x/arches_defconfig b/arch/powerpc/configs/44x/arches_defconfig
new file mode 100644
index 0000000000..41d04e70d4
--- /dev/null
+++ b/arch/powerpc/configs/44x/arches_defconfig
@@ -0,0 +1,60 @@
+CONFIG_44x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_EBONY is not set
+CONFIG_ARCHES=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+CONFIG_IBM_EMAC_RXB=256
+CONFIG_IBM_EMAC_TXB=256
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_IBM_IIC=y
+CONFIG_SENSORS_AD7414=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
diff --git a/arch/powerpc/configs/44x/bamboo_defconfig b/arch/powerpc/configs/44x/bamboo_defconfig
new file mode 100644
index 0000000000..acbce718ea
--- /dev/null
+++ b/arch/powerpc/configs/44x/bamboo_defconfig
@@ -0,0 +1,51 @@
+CONFIG_44x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_BAMBOO=y
+# CONFIG_EBONY is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+CONFIG_EXT2_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
diff --git a/arch/powerpc/configs/44x/bluestone_defconfig b/arch/powerpc/configs/44x/bluestone_defconfig
new file mode 100644
index 0000000000..37088f250c
--- /dev/null
+++ b/arch/powerpc/configs/44x/bluestone_defconfig
@@ -0,0 +1,55 @@
+CONFIG_44x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_PCI_QUIRKS is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_BLUESTONE=y
+# CONFIG_EBONY is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+CONFIG_IBM_EMAC_RXB=256
+CONFIG_IBM_EMAC_TXB=256
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=2
+CONFIG_SERIAL_8250_RUNTIME_UARTS=2
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_IBM_IIC=y
+CONFIG_SENSORS_AD7414=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_M41T80=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS=y
diff --git a/arch/powerpc/configs/44x/canyonlands_defconfig b/arch/powerpc/configs/44x/canyonlands_defconfig
new file mode 100644
index 0000000000..61776ade57
--- /dev/null
+++ b/arch/powerpc/configs/44x/canyonlands_defconfig
@@ -0,0 +1,69 @@
+CONFIG_44x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_EBONY is not set
+CONFIG_CANYONLANDS=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_MTD_NAND_NDFC=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+CONFIG_IBM_EMAC_RXB=256
+CONFIG_IBM_EMAC_TXB=256
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_IBM_IIC=y
+CONFIG_SENSORS_AD7414=y
+CONFIG_USB=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_M41T80=y
+CONFIG_EXT2_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig
new file mode 100644
index 0000000000..7283b7d4a1
--- /dev/null
+++ b/arch/powerpc/configs/44x/currituck_defconfig
@@ -0,0 +1,89 @@
+CONFIG_44x=y
+CONFIG_SMP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_EXPERT=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_PROFILING=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PPC_47x=y
+# CONFIG_EBONY is not set
+CONFIG_CURRITUCK=y
+CONFIG_HIGHMEM=y
+CONFIG_HZ_100=y
+CONFIG_MATH_EMULATION=y
+CONFIG_IRQ_ALL_CPUS=y
+# CONFIG_SUSPEND is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
+# CONFIG_SATA_PMP is not set
+CONFIG_SATA_SIL24=y
+# CONFIG_ATA_SFF is not set
+CONFIG_NETDEVICES=y
+CONFIG_E1000E=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_IBM_IIC=y
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_M41T80=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NLS_DEFAULT="n"
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
+CONFIG_PPC_EARLY_DEBUG=y
+CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW=0x10000000
+CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH=0x200
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/44x/ebony_defconfig b/arch/powerpc/configs/44x/ebony_defconfig
new file mode 100644
index 0000000000..93d2a4e64a
--- /dev/null
+++ b/arch/powerpc/configs/44x/ebony_defconfig
@@ -0,0 +1,58 @@
+CONFIG_44x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_MATH_EMULATION=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+CONFIG_EXT2_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig
new file mode 100644
index 0000000000..509300f400
--- /dev/null
+++ b/arch/powerpc/configs/44x/eiger_defconfig
@@ -0,0 +1,91 @@
+CONFIG_44x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_EBONY is not set
+CONFIG_EIGER=y
+CONFIG_PCIEPORTBUS=y
+# CONFIG_PCIEASPM is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_MTD_NAND_NDFC=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_FUSION=y
+CONFIG_FUSION_SAS=y
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+CONFIG_IBM_EMAC_RXB=256
+CONFIG_IBM_EMAC_TXB=256
+CONFIG_E1000E=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_SERIAL_8250_NR_UARTS=2
+CONFIG_SERIAL_8250_RUNTIME_UARTS=2
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_IBM_IIC=y
+CONFIG_I2C_DEBUG_CORE=y
+CONFIG_I2C_DEBUG_ALGO=y
+CONFIG_I2C_DEBUG_BUS=y
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_DMADEVICES=y
+CONFIG_EXT2_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_CRYPTO_CRYPTD=y
+CONFIG_CRYPTO_AUTHENC=y
+CONFIG_CRYPTO_CCM=y
+CONFIG_CRYPTO_GCM=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_CTS=y
+CONFIG_CRYPTO_LRW=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_XTS=y
+CONFIG_CRYPTO_XCBC=y
+CONFIG_CRYPTO_MD4=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SHA512=y
+CONFIG_CRYPTO_ARC4=y
+CONFIG_CRYPTO_BLOWFISH=y
+CONFIG_CRYPTO_DES=y
diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig
new file mode 100644
index 0000000000..5492537f4c
--- /dev/null
+++ b/arch/powerpc/configs/44x/fsp2_defconfig
@@ -0,0 +1,121 @@
+CONFIG_44x=y
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+# CONFIG_FHANDLE is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_LZMA is not set
+# CONFIG_RD_XZ is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+CONFIG_KALLSYMS_ALL=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_EXPERT=y
+CONFIG_PROFILING=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PPC_47x=y
+# CONFIG_EBONY is not set
+CONFIG_FSP2=y
+CONFIG_476FPE_ERR46=y
+CONFIG_SWIOTLB=y
+CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
+CONFIG_CMDLINE="ip=on rw"
+# CONFIG_SUSPEND is not set
+# CONFIG_PCI is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_VLAN_8021Q=m
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
+# CONFIG_SATA_PMP is not set
+# CONFIG_ATA_SFF is not set
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_IBM_EMAC=m
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=32
+CONFIG_SERIAL_8250_RUNTIME_UARTS=32
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_IBM_IIC=y
+CONFIG_PTP_1588_CLOCK=y
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+CONFIG_WATCHDOG=y
+CONFIG_BOOKE_WDT=y
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_MMC=y
+CONFIG_MMC_DEBUG=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_OF_ARASAN=y
+CONFIG_MMC_SDHCI_ST=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_M41T80=y
+CONFIG_RESET_CONTROLLER=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_FS_WBUF_VERIFY=y
+CONFIG_JFFS2_SUMMARY=y
+CONFIG_JFFS2_FS_XATTR=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_DEFAULT="n"
+CONFIG_XZ_DEC=y
+CONFIG_PRINTK_TIME=y
+CONFIG_MESSAGE_LOGLEVEL_DEFAULT=3
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/44x/icon_defconfig b/arch/powerpc/configs/44x/icon_defconfig
new file mode 100644
index 0000000000..fb9a155735
--- /dev/null
+++ b/arch/powerpc/configs/44x/icon_defconfig
@@ -0,0 +1,87 @@
+CONFIG_44x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_EBONY is not set
+CONFIG_ICON=y
+CONFIG_HIGHMEM=y
+CONFIG_PCIEPORTBUS=y
+# CONFIG_PCIEASPM is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_FUSION=y
+CONFIG_FUSION_SAS=y
+CONFIG_FUSION_CTL=y
+CONFIG_FUSION_LOGGING=y
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+# CONFIG_WLAN is not set
+# CONFIG_MOUSE_PS2_ALPS is not set
+# CONFIG_MOUSE_PS2_LOGIPS2PP is not set
+# CONFIG_MOUSE_PS2_SYNAPTICS is not set
+# CONFIG_MOUSE_PS2_TRACKPOINT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_IBM_IIC=y
+# CONFIG_HWMON is not set
+CONFIG_MFD_SM501=y
+CONFIG_FB=y
+CONFIG_FB_SM501=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_850=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_15=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig
new file mode 100644
index 0000000000..0f6380e1e6
--- /dev/null
+++ b/arch/powerpc/configs/44x/iss476-smp_defconfig
@@ -0,0 +1,68 @@
+CONFIG_44x=y
+CONFIG_SMP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_PROFILING=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PPC_47x=y
+# CONFIG_EBONY is not set
+CONFIG_ISS4xx=y
+CONFIG_HZ_100=y
+CONFIG_MATH_EMULATION=y
+CONFIG_IRQ_ALL_CPUS=y
+CONFIG_CMDLINE="root=/dev/issblk0"
+# CONFIG_PCI is not set
+CONFIG_ADVANCED_OPTIONS=y
+CONFIG_DYNAMIC_MEMSTART=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_PPC_EARLY_DEBUG=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/44x/katmai_defconfig b/arch/powerpc/configs/44x/katmai_defconfig
new file mode 100644
index 0000000000..1a0f1c3e0e
--- /dev/null
+++ b/arch/powerpc/configs/44x/katmai_defconfig
@@ -0,0 +1,56 @@
+CONFIG_44x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_EBONY is not set
+CONFIG_KATMAI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_MACINTOSH_DRIVERS=y
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_EXT2_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
diff --git a/arch/powerpc/configs/44x/rainier_defconfig b/arch/powerpc/configs/44x/rainier_defconfig
new file mode 100644
index 0000000000..6dd67de06a
--- /dev/null
+++ b/arch/powerpc/configs/44x/rainier_defconfig
@@ -0,0 +1,62 @@
+CONFIG_44x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_EBONY is not set
+CONFIG_RAINIER=y
+CONFIG_MATH_EMULATION=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_MACINTOSH_DRIVERS=y
+CONFIG_NETDEVICES=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+CONFIG_EXT2_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_PPC_EARLY_DEBUG=y
+CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW=0xef600300
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
diff --git a/arch/powerpc/configs/44x/redwood_defconfig b/arch/powerpc/configs/44x/redwood_defconfig
new file mode 100644
index 0000000000..e28d764165
--- /dev/null
+++ b/arch/powerpc/configs/44x/redwood_defconfig
@@ -0,0 +1,90 @@
+CONFIG_44x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_EBONY is not set
+CONFIG_REDWOOD=y
+CONFIG_PCIEPORTBUS=y
+# CONFIG_PCIEASPM is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_FUSION=y
+CONFIG_FUSION_SAS=y
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+CONFIG_IBM_EMAC_RXB=256
+CONFIG_IBM_EMAC_TXB=256
+CONFIG_IBM_EMAC_DEBUG=y
+CONFIG_E1000E=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_IBM_IIC=y
+CONFIG_I2C_DEBUG_CORE=y
+CONFIG_I2C_DEBUG_ALGO=y
+CONFIG_I2C_DEBUG_BUS=y
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_DMADEVICES=y
+CONFIG_EXT2_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_CRYPTO_CRYPTD=y
+CONFIG_CRYPTO_AUTHENC=y
+CONFIG_CRYPTO_CCM=y
+CONFIG_CRYPTO_GCM=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_CTS=y
+CONFIG_CRYPTO_LRW=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_XTS=y
+CONFIG_CRYPTO_XCBC=y
+CONFIG_CRYPTO_MD4=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SHA512=y
+CONFIG_CRYPTO_ARC4=y
+CONFIG_CRYPTO_BLOWFISH=y
+CONFIG_CRYPTO_DES=y
diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig
new file mode 100644
index 0000000000..51499ee636
--- /dev/null
+++ b/arch/powerpc/configs/44x/sam440ep_defconfig
@@ -0,0 +1,96 @@
+CONFIG_44x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_IKCONFIG=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_AMIGA_PARTITION=y
+# CONFIG_EBONY is not set
+CONFIG_SAM440EP=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_CHR_DEV_SG=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
+# CONFIG_SATA_PMP is not set
+CONFIG_SATA_SIL=y
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+CONFIG_INPUT_FF_MEMLESS=m
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C_IBM_IIC=y
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_FB_RADEON=y
+CONFIG_LCD_CLASS_DEVICE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_LOGO=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
+CONFIG_USB_STORAGE=m
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_M41T80=y
+CONFIG_RTC_DRV_M41T80_WDT=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS=y
+CONFIG_AUTOFS_FS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=y
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_AFFS_FS=m
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_CRC_T10DIF=y
+CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig
new file mode 100644
index 0000000000..b4984eab43
--- /dev/null
+++ b/arch/powerpc/configs/44x/sequoia_defconfig
@@ -0,0 +1,63 @@
+CONFIG_44x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_EBONY is not set
+CONFIG_SEQUOIA=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_MTD_NAND_NDFC=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+CONFIG_EXT2_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
diff --git a/arch/powerpc/configs/44x/taishan_defconfig b/arch/powerpc/configs/44x/taishan_defconfig
new file mode 100644
index 0000000000..3ea5932ab8
--- /dev/null
+++ b/arch/powerpc/configs/44x/taishan_defconfig
@@ -0,0 +1,57 @@
+CONFIG_44x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_EBONY is not set
+CONFIG_TAISHAN=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_MACINTOSH_DRIVERS=y
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+CONFIG_EXT2_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig
new file mode 100644
index 0000000000..20891c4131
--- /dev/null
+++ b/arch/powerpc/configs/44x/warp_defconfig
@@ -0,0 +1,96 @@
+CONFIG_44x=y
+CONFIG_LOCALVERSION="-pika"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_EBONY is not set
+CONFIG_WARP=y
+CONFIG_PPC4xx_GPIO=y
+CONFIG_HZ_1000=y
+CONFIG_CMDLINE="ip=on"
+# CONFIG_PCI is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+CONFIG_VLAN_8021Q=y
+# CONFIG_STANDALONE is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_MTD_NAND_NDFC=y
+CONFIG_MTD_UBI=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_EEPROM_AT24=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_SCSI_SPI_ATTRS=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_HW_RANDOM=y
+CONFIG_I2C=y
+CONFIG_I2C_IBM_IIC=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_SENSORS_AD7414=y
+CONFIG_THERMAL=y
+CONFIG_WATCHDOG=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_UBIFS_FS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_850=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_15=y
+CONFIG_NLS_UTF8=y
+CONFIG_CRC_CCITT=y
+CONFIG_CRC_T10DIF=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
diff --git a/arch/powerpc/configs/52xx/cm5200_defconfig b/arch/powerpc/configs/52xx/cm5200_defconfig
new file mode 100644
index 0000000000..2412a6bf7e
--- /dev/null
+++ b/arch/powerpc/configs/52xx/cm5200_defconfig
@@ -0,0 +1,78 @@
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_KALLSYMS is not set
+# CONFIG_EPOLL is not set
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_PPC_CHRP is not set
+CONFIG_PPC_MPC52xx=y
+CONFIG_PPC_MPC5200_SIMPLE=y
+# CONFIG_PPC_PMAC is not set
+CONFIG_PM=y
+# CONFIG_PCI is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_SG=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_NETDEVICES=y
+CONFIG_FEC_MPC52xx=y
+CONFIG_LXT_PHY=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_MPC52xx=y
+CONFIG_SERIAL_MPC52xx_CONSOLE=y
+CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=57600
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_USB=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_STORAGE=y
+CONFIG_DMADEVICES=y
+CONFIG_PPC_BESTCOMM=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
diff --git a/arch/powerpc/configs/52xx/lite5200b_defconfig b/arch/powerpc/configs/52xx/lite5200b_defconfig
new file mode 100644
index 0000000000..7db479dcbc
--- /dev/null
+++ b/arch/powerpc/configs/52xx/lite5200b_defconfig
@@ -0,0 +1,63 @@
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_KALLSYMS is not set
+# CONFIG_EPOLL is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_PPC_CHRP is not set
+CONFIG_PPC_MPC52xx=y
+CONFIG_PPC_MPC5200_SIMPLE=y
+CONFIG_PPC_LITE5200=y
+# CONFIG_PPC_PMAC is not set
+CONFIG_GEN_RTC=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=y
+CONFIG_ATA=y
+CONFIG_PATA_MPC52xx=y
+CONFIG_NETDEVICES=y
+CONFIG_FEC_MPC52xx=y
+CONFIG_LXT_PHY=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_MPC52xx=y
+CONFIG_SERIAL_MPC52xx_CONSOLE=y
+CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+# CONFIG_HWMON is not set
+CONFIG_DMADEVICES=y
+CONFIG_PPC_BESTCOMM=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
diff --git a/arch/powerpc/configs/52xx/motionpro_defconfig b/arch/powerpc/configs/52xx/motionpro_defconfig
new file mode 100644
index 0000000000..6186ead1e1
--- /dev/null
+++ b/arch/powerpc/configs/52xx/motionpro_defconfig
@@ -0,0 +1,91 @@
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_KALLSYMS is not set
+# CONFIG_EPOLL is not set
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_PPC_CHRP is not set
+CONFIG_PPC_MPC52xx=y
+CONFIG_PPC_MPC5200_SIMPLE=y
+# CONFIG_PPC_PMAC is not set
+CONFIG_PM=y
+# CONFIG_PCI is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_ROM=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_EEPROM_LEGACY=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_ATA=y
+CONFIG_PATA_MPC52xx=y
+CONFIG_NETDEVICES=y
+CONFIG_FEC_MPC52xx=y
+CONFIG_MDIO_BITBANG=y
+CONFIG_BROADCOM_PHY=y
+CONFIG_CICADA_PHY=y
+CONFIG_DAVICOM_PHY=y
+CONFIG_ICPLUS_PHY=y
+CONFIG_LXT_PHY=y
+CONFIG_MARVELL_PHY=y
+CONFIG_QSEMI_PHY=y
+CONFIG_SMSC_PHY=y
+CONFIG_VITESSE_PHY=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_MPC52xx=y
+CONFIG_SERIAL_MPC52xx_CONSOLE=y
+CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_WATCHDOG=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_DMADEVICES=y
+CONFIG_PPC_BESTCOMM=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
diff --git a/arch/powerpc/configs/52xx/pcm030_defconfig b/arch/powerpc/configs/52xx/pcm030_defconfig
new file mode 100644
index 0000000000..88fbe0d42e
--- /dev/null
+++ b/arch/powerpc/configs/52xx/pcm030_defconfig
@@ -0,0 +1,78 @@
+CONFIG_LOCALVERSION="trunk"
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_EXPERT=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_PPC_CHRP is not set
+CONFIG_PPC_MPC52xx=y
+CONFIG_PPC_MPC5200_SIMPLE=y
+# CONFIG_PPC_PMAC is not set
+CONFIG_HZ_100=y
+CONFIG_PREEMPT=y
+# CONFIG_SECCOMP is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_INET_DIAG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_PHYSMAP=y
+# CONFIG_BLK_DEV is not set
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=m
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=m
+CONFIG_PATA_MPC52xx=m
+CONFIG_NETDEVICES=y
+CONFIG_FEC_MPC52xx=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_MPC52xx=y
+CONFIG_SERIAL_MPC52xx_CONSOLE=y
+CONFIG_HW_RANDOM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+# CONFIG_HWMON is not set
+CONFIG_USB=y
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+# CONFIG_USB_OHCI_HCD_PCI is not set
+CONFIG_USB_STORAGE=m
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_PCF8563=m
+CONFIG_DMADEVICES=y
+CONFIG_PPC_BESTCOMM=y
+CONFIG_EXT2_FS=m
+CONFIG_EXT4_FS=m
+# CONFIG_DNOTIFY is not set
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=850
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_850=y
+CONFIG_NLS_ISO8859_1=y
diff --git a/arch/powerpc/configs/52xx/tqm5200_defconfig b/arch/powerpc/configs/52xx/tqm5200_defconfig
new file mode 100644
index 0000000000..688f703d8e
--- /dev/null
+++ b/arch/powerpc/configs/52xx/tqm5200_defconfig
@@ -0,0 +1,92 @@
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_KALLSYMS is not set
+# CONFIG_EPOLL is not set
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_PPC_CHRP is not set
+CONFIG_PPC_MPC52xx=y
+CONFIG_PPC_MPC5200_SIMPLE=y
+CONFIG_PPC_MPC5200_BUGFIX=y
+# CONFIG_PPC_PMAC is not set
+CONFIG_PM=y
+# CONFIG_PCI is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_ROM=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_PLATRAM=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_ATA=y
+CONFIG_PATA_MPC52xx=y
+CONFIG_PATA_PLATFORM=y
+CONFIG_NETDEVICES=y
+CONFIG_FEC_MPC52xx=y
+CONFIG_LXT_PHY=y
+CONFIG_SERIAL_MPC52xx=y
+CONFIG_SERIAL_MPC52xx_CONSOLE=y
+CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_SENSORS_LM80=y
+CONFIG_WATCHDOG=y
+CONFIG_MFD_SM501=y
+CONFIG_FB=y
+CONFIG_FB_FOREIGN_ENDIAN=y
+CONFIG_FB_SM501=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_STORAGE=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_RTC_DRV_DS1374=y
+CONFIG_DMADEVICES=y
+CONFIG_PPC_BESTCOMM=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
diff --git a/arch/powerpc/configs/64-bit.config b/arch/powerpc/configs/64-bit.config
new file mode 100644
index 0000000000..0fe6406929
--- /dev/null
+++ b/arch/powerpc/configs/64-bit.config
@@ -0,0 +1 @@
+CONFIG_PPC64=y
diff --git a/arch/powerpc/configs/83xx/asp8347_defconfig b/arch/powerpc/configs/83xx/asp8347_defconfig
new file mode 100644
index 0000000000..10192410b3
--- /dev/null
+++ b/arch/powerpc/configs/83xx/asp8347_defconfig
@@ -0,0 +1,71 @@
+CONFIG_FSL_EMB_PERFMON=y
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_KALLSYMS is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_PPC_83xx=y
+CONFIG_ASP834x=y
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_REDBOOT_PARTS=y
+CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_NETDEVICES=y
+CONFIG_GIANFAR=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_THERMAL=y
+CONFIG_WATCHDOG=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_FSL=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1374=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/83xx/kmeter1_defconfig b/arch/powerpc/configs/83xx/kmeter1_defconfig
new file mode 100644
index 0000000000..487e5e1bbf
--- /dev/null
+++ b/arch/powerpc/configs/83xx/kmeter1_defconfig
@@ -0,0 +1,66 @@
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_PPC_83xx=y
+CONFIG_KMETER1=y
+CONFIG_PREEMPT=y
+# CONFIG_SECCOMP is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+# CONFIG_IPV6 is not set
+CONFIG_TIPC=y
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_PHRAM=y
+CONFIG_MTD_UBI=y
+CONFIG_MTD_UBI_GLUEBI=y
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=y
+CONFIG_TUN=y
+CONFIG_UCC_GETH=y
+CONFIG_MARVELL_PHY=y
+CONFIG_PPP=y
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPPOE=y
+CONFIG_WAN=y
+CONFIG_HDLC=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_HW_RANDOM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_UIO=y
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_UBIFS_FS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
diff --git a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
new file mode 100644
index 0000000000..16a42e2267
--- /dev/null
+++ b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
@@ -0,0 +1,86 @@
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_KALLSYMS is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_PPC_83xx=y
+CONFIG_MPC831x_RDB=y
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_MTD_NAND_FSL_ELBC=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_SCSI=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+CONFIG_NETDEVICES=y
+CONFIG_GIANFAR=y
+CONFIG_E100=y
+CONFIG_CICADA_PHY=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_HW_RANDOM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_SPI=y
+CONFIG_SPI_BITBANG=y
+CONFIG_WATCHDOG=y
+# CONFIG_USB_HID is not set
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_FSL=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_GADGET=y
+CONFIG_USB_ETH=m
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_INTF_DEV_UIE_EMUL=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
new file mode 100644
index 0000000000..80d40ae668
--- /dev/null
+++ b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
@@ -0,0 +1,85 @@
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_KALLSYMS is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_PPC_83xx=y
+CONFIG_MPC831x_RDB=y
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_ATA=y
+CONFIG_SATA_FSL=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+CONFIG_NETDEVICES=y
+CONFIG_GIANFAR=y
+CONFIG_E100=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_HW_RANDOM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_SPI=y
+CONFIG_SPI_BITBANG=y
+CONFIG_WATCHDOG=y
+# CONFIG_USB_HID is not set
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_FSL=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_GADGET=y
+CONFIG_USB_ETH=m
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_INTF_DEV_UIE_EMUL=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig
new file mode 100644
index 0000000000..1715ff5474
--- /dev/null
+++ b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig
@@ -0,0 +1,78 @@
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_KALLSYMS is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_LDM_PARTITION=y
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_PPC_83xx=y
+CONFIG_MPC832x_RDB=y
+CONFIG_GEN_RTC=y
+CONFIG_MATH_EMULATION=y
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+CONFIG_UCC_GETH=y
+CONFIG_E1000=y
+CONFIG_ICPLUS_PHY=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_HW_RANDOM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_SPI=y
+CONFIG_SPI_BITBANG=y
+CONFIG_WATCHDOG=y
+# CONFIG_USB_HID is not set
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_SPI=y
+CONFIG_QUICC_ENGINE=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_932=y
+CONFIG_NLS_ISO8859_8=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_CRC_T10DIF=y
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig
new file mode 100644
index 0000000000..e65c005714
--- /dev/null
+++ b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig
@@ -0,0 +1,84 @@
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_KALLSYMS is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_MAC_PARTITION=y
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_PPC_83xx=y
+CONFIG_MPC834x_ITX=y
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_ATA=y
+CONFIG_SATA_SIL=y
+CONFIG_PATA_PLATFORM=y
+CONFIG_PATA_OF_PLATFORM=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+CONFIG_NETDEVICES=y
+CONFIG_GIANFAR=y
+CONFIG_CICADA_PHY=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_HW_RANDOM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_SPI=y
+CONFIG_SPI_BITBANG=y
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_FSL=y
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_INTF_DEV_UIE_EMUL=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_CRC_T10DIF=y
+CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig
new file mode 100644
index 0000000000..17714bf0ed
--- /dev/null
+++ b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig
@@ -0,0 +1,76 @@
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_KALLSYMS is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_MAC_PARTITION=y
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_PPC_83xx=y
+CONFIG_MPC834x_ITX=y
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_NETDEVICES=y
+CONFIG_GIANFAR=y
+CONFIG_CICADA_PHY=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_HW_RANDOM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_SPI=y
+CONFIG_SPI_BITBANG=y
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_FSL=y
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_INTF_DEV_UIE_EMUL=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_CRC_T10DIF=y
+CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig
new file mode 100644
index 0000000000..093df33f94
--- /dev/null
+++ b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig
@@ -0,0 +1,71 @@
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_KALLSYMS is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_PPC_83xx=y
+CONFIG_MPC836x_RDK=y
+CONFIG_QE_GPIO=y
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_NETDEVICES=y
+CONFIG_UCC_GETH=y
+CONFIG_BROADCOM_PHY=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_QE=y
+CONFIG_HW_RANDOM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_SPI=y
+CONFIG_SPI_BITBANG=y
+CONFIG_SPI_SPIDEV=y
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_FB=y
+CONFIG_FB_OF=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_QUICC_ENGINE=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_PPC_EARLY_DEBUG=y
diff --git a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
new file mode 100644
index 0000000000..58fae5131f
--- /dev/null
+++ b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
@@ -0,0 +1,80 @@
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_PPC_83xx=y
+CONFIG_MPC837x_RDB=y
+CONFIG_GEN_RTC=y
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_ATA=y
+CONFIG_SATA_FSL=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_RAID1=y
+CONFIG_MD_RAID456=y
+CONFIG_NETDEVICES=y
+CONFIG_GIANFAR=y
+CONFIG_MARVELL_PHY=y
+CONFIG_INPUT_FF_MEMLESS=m
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_WATCHDOG=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_FSL=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_CRC_T10DIF=y
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/85xx-32bit.config b/arch/powerpc/configs/85xx-32bit.config
new file mode 100644
index 0000000000..6b8894d727
--- /dev/null
+++ b/arch/powerpc/configs/85xx-32bit.config
@@ -0,0 +1,5 @@
+CONFIG_HIGHMEM=y
+CONFIG_KEXEC=y
+CONFIG_PPC_85xx=y
+CONFIG_PROC_KCORE=y
+CONFIG_PHYS_64BIT=y
diff --git a/arch/powerpc/configs/85xx-64bit.config b/arch/powerpc/configs/85xx-64bit.config
new file mode 100644
index 0000000000..4aba812228
--- /dev/null
+++ b/arch/powerpc/configs/85xx-64bit.config
@@ -0,0 +1,4 @@
+CONFIG_MATH_EMULATION=y
+CONFIG_MATH_EMULATION_HW_UNIMPLEMENTED=y
+CONFIG_PPC64=y
+CONFIG_PPC_BOOK3E_64=y
diff --git a/arch/powerpc/configs/85xx-hw.config b/arch/powerpc/configs/85xx-hw.config
new file mode 100644
index 0000000000..524db76f47
--- /dev/null
+++ b/arch/powerpc/configs/85xx-hw.config
@@ -0,0 +1,139 @@
+CONFIG_AQUANTIA_PHY=y
+CONFIG_AT803X_PHY=y
+CONFIG_ATA=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BROADCOM_PHY=y
+CONFIG_C293_PCIE=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_CICADA_PHY=y
+CONFIG_CLK_QORIQ=y
+CONFIG_CRYPTO_DEV_FSL_CAAM=y
+CONFIG_CRYPTO_DEV_TALITOS=y
+CONFIG_DAVICOM_PHY=y
+CONFIG_DMADEVICES=y
+CONFIG_E1000E=y
+CONFIG_E1000=y
+CONFIG_EDAC=y
+CONFIG_EDAC_MPC85XX=y
+CONFIG_EEPROM_AT24=y
+CONFIG_EEPROM_LEGACY=y
+CONFIG_FB_FSL_DIU=y
+CONFIG_FS_ENET=y
+CONFIG_FSL_CORENET_CF=y
+CONFIG_FSL_DMA=y
+CONFIG_FSL_HV_MANAGER=y
+CONFIG_FSL_PQ_MDIO=y
+CONFIG_FSL_RIO=y
+CONFIG_FSL_XGMAC_MDIO=y
+CONFIG_GIANFAR=y
+CONFIG_GPIO_MPC8XXX=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_CPM=m
+CONFIG_I2C_MPC=y
+CONFIG_I2C_MUX_PCA954x=y
+CONFIG_I2C_MUX=y
+CONFIG_I2C=y
+CONFIG_IGB=y
+CONFIG_INPUT_FF_MEMLESS=m
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_MARVELL_PHY=y
+CONFIG_MDIO_BUS_MUX_GPIO=y
+CONFIG_MDIO_BUS_MUX_MMIOREG=y
+CONFIG_MMC_SDHCI_OF_ESDHC=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_NAND_FSL_ELBC=y
+CONFIG_MTD_NAND_FSL_IFC=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_PLATRAM=y
+CONFIG_MTD_SPI_NOR=y
+CONFIG_NETDEVICES=y
+CONFIG_NVRAM=y
+CONFIG_PATA_ALI=y
+CONFIG_PATA_SIL680=y
+CONFIG_PATA_VIA=y
+# CONFIG_PCIEASPM is not set
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCI_MSI=y
+CONFIG_PCI=y
+CONFIG_PPC_EPAPR_HV_BYTECHAN=y
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
+CONFIG_QE_GPIO=y
+CONFIG_QUICC_ENGINE=y
+CONFIG_RAPIDIO=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_CMOS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_RTC_DRV_DS1374=y
+CONFIG_RTC_DRV_DS3232=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_FSL=y
+CONFIG_SATA_SIL24=y
+CONFIG_SATA_SIL=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SENSORS_INA2XX=y
+CONFIG_SENSORS_LM90=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_NR_UARTS=6
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_8250_RUNTIME_UARTS=6
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_QE=m
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_SND_DRIVERS is not set
+CONFIG_SND_INTEL8X0=y
+CONFIG_SND_POWERPC_SOC=y
+# CONFIG_SND_PPC is not set
+CONFIG_SND_SOC=y
+# CONFIG_SND_SUPPORT_OLD_API is not set
+# CONFIG_SND_USB is not set
+CONFIG_SND=y
+CONFIG_SOUND=y
+CONFIG_SPI_FSL_ESPI=y
+CONFIG_SPI_FSL_SPI=y
+CONFIG_SPI_GPIO=y
+CONFIG_SPI=y
+CONFIG_TERANETICS_PHY=y
+CONFIG_UCC_GETH=y
+CONFIG_USB_EHCI_FSL=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_HID=m
+CONFIG_USB_MON=y
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_VIRT_DRIVERS=y
+CONFIG_VITESSE_PHY=y
diff --git a/arch/powerpc/configs/85xx-smp.config b/arch/powerpc/configs/85xx-smp.config
new file mode 100644
index 0000000000..3b4d1e5463
--- /dev/null
+++ b/arch/powerpc/configs/85xx-smp.config
@@ -0,0 +1,2 @@
+CONFIG_NR_CPUS=24
+CONFIG_SMP=y
diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
new file mode 100644
index 0000000000..da6fc203e2
--- /dev/null
+++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
@@ -0,0 +1,232 @@
+CONFIG_PPC_85xx=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+# CONFIG_UTS_NS is not set
+# CONFIG_IPC_NS is not set
+# CONFIG_PID_NS is not set
+# CONFIG_NET_NS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_RELAY=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_PERF_EVENTS=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_GE_IMP3A=y
+CONFIG_QE_GPIO=y
+CONFIG_CPM2=y
+CONFIG_HIGHMEM=y
+CONFIG_HZ_1000=y
+CONFIG_PREEMPT=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=m
+CONFIG_MATH_EMULATION=y
+CONFIG_ARCH_FORCE_MAX_ORDER=16
+CONFIG_PCI=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCI_MSI=y
+CONFIG_PCCARD=y
+# CONFIG_PCMCIA_LOAD_CIS is not set
+CONFIG_YENTA=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NET_IPIP=m
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET6_AH=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_NET_PKTGEN=m
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_MTD_NAND_FSL_ELBC=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=131072
+CONFIG_DS1682=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_SIL24=y
+# CONFIG_ATA_SFF is not set
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_NETCONSOLE=y
+CONFIG_TUN=m
+# CONFIG_NET_VENDOR_3COM is not set
+CONFIG_FS_ENET=y
+CONFIG_UCC_GETH=y
+CONFIG_GIANFAR=y
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPPOE=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_SLIP=m
+CONFIG_SLIP_COMPRESSED=y
+CONFIG_SLIP_SMART=y
+CONFIG_SLIP_MODE_SLIP6=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=2
+CONFIG_SERIAL_8250_RUNTIME_UARTS=2
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_QE=m
+CONFIG_NVRAM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_CPM=m
+CONFIG_I2C_MPC=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_GE_FPGA=y
+CONFIG_SENSORS_LM90=y
+CONFIG_SENSORS_LM92=y
+CONFIG_WATCHDOG=y
+CONFIG_GEF_WDT=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_ORTEK=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_FSL=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
+CONFIG_USB_STORAGE=y
+CONFIG_EDAC=y
+CONFIG_EDAC_MPC85XX=y
+CONFIG_RTC_CLASS=y
+# CONFIG_RTC_INTF_PROC is not set
+CONFIG_RTC_DRV_RX8581=y
+CONFIG_DMADEVICES=y
+CONFIG_FSL_DMA=y
+CONFIG_QUICC_ENGINE=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_FUSE_FS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_CODEPAGE=850
+CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
+CONFIG_NTFS_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V4=y
+CONFIG_CIFS=m
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=y
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=y
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=y
+CONFIG_CRC_CCITT=y
+CONFIG_CRC_T10DIF=y
+CONFIG_LIBCRC32C=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_DES=y
+CONFIG_CRYPTO_DEV_TALITOS=y
diff --git a/arch/powerpc/configs/85xx/ksi8560_defconfig b/arch/powerpc/configs/85xx/ksi8560_defconfig
new file mode 100644
index 0000000000..9cb211fb6d
--- /dev/null
+++ b/arch/powerpc/configs/85xx/ksi8560_defconfig
@@ -0,0 +1,57 @@
+CONFIG_PPC_85xx=y
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+CONFIG_KSI8560=y
+CONFIG_CPM2=y
+CONFIG_GEN_RTC=y
+CONFIG_HIGHMEM=y
+CONFIG_BINFMT_MISC=y
+CONFIG_MATH_EMULATION=y
+# CONFIG_SECCOMP is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_NETDEVICES=y
+CONFIG_FS_ENET=y
+# CONFIG_FS_ENET_HAS_SCC is not set
+CONFIG_FS_ENET_MDIO_FCC=y
+CONFIG_GIANFAR=y
+CONFIG_MARVELL_PHY=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_CPM=y
+CONFIG_SERIAL_CPM_CONSOLE=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_DEBUG_FS=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEBUG_MUTEXES=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
diff --git a/arch/powerpc/configs/85xx/ppa8548_defconfig b/arch/powerpc/configs/85xx/ppa8548_defconfig
new file mode 100644
index 0000000000..4bd5f993d2
--- /dev/null
+++ b/arch/powerpc/configs/85xx/ppa8548_defconfig
@@ -0,0 +1,44 @@
+CONFIG_PPC_85xx=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_PPA8548=y
+CONFIG_FSL_LBC=y
+CONFIG_RAPIDIO=y
+CONFIG_FSL_RIO=y
+CONFIG_RAPIDIO_DMA_ENGINE=y
+CONFIG_RAPIDIO_ENUM_BASIC=y
+CONFIG_RAPIDIO_CPS_XX=y
+CONFIG_RAPIDIO_CPS_GEN2=y
+CONFIG_ADVANCED_OPTIONS=y
+CONFIG_LOWMEM_SIZE_BOOL=y
+CONFIG_LOWMEM_SIZE=0x40000000
+CONFIG_LOWMEM_CAM_NUM_BOOL=y
+CONFIG_LOWMEM_CAM_NUM=4
+CONFIG_PAGE_OFFSET_BOOL=y
+CONFIG_PAGE_OFFSET=0xb0000000
+CONFIG_KERNEL_START_BOOL=y
+CONFIG_TASK_SIZE_BOOL=y
+CONFIG_TASK_SIZE=0xb0000000
+CONFIG_NET=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_NETDEVICES=y
+CONFIG_GIANFAR=y
+CONFIG_MARVELL_PHY=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_I2C=y
+CONFIG_I2C_MPC=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_ISL1208=y
+CONFIG_FSL_DMA=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
diff --git a/arch/powerpc/configs/85xx/socrates_defconfig b/arch/powerpc/configs/85xx/socrates_defconfig
new file mode 100644
index 0000000000..7037a6d801
--- /dev/null
+++ b/arch/powerpc/configs/85xx/socrates_defconfig
@@ -0,0 +1,86 @@
+CONFIG_PPC_85xx=y
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_KALLSYMS is not set
+# CONFIG_EPOLL is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_SOCRATES=y
+CONFIG_MATH_EMULATION=y
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+CONFIG_CAN=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_MTD_NAND_SOCRATES=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_NETDEVICES=y
+CONFIG_GIANFAR=y
+CONFIG_MARVELL_PHY=y
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_TOUCHSCREEN=y
+# CONFIG_SERIO is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=2
+CONFIG_SERIAL_8250_RUNTIME_UARTS=2
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_HW_RANDOM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_SPI=y
+CONFIG_HWMON_DEBUG_CHIP=y
+CONFIG_SENSORS_LM75=y
+CONFIG_SENSORS_W83781D=y
+CONFIG_FB=y
+CONFIG_FB_MB862XX=y
+CONFIG_FB_MB862XX_LIME=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_USB=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_STORAGE=y
+CONFIG_RTC_CLASS=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_FONTS=y
diff --git a/arch/powerpc/configs/85xx/stx_gp3_defconfig b/arch/powerpc/configs/85xx/stx_gp3_defconfig
new file mode 100644
index 0000000000..e708049704
--- /dev/null
+++ b/arch/powerpc/configs/85xx/stx_gp3_defconfig
@@ -0,0 +1,67 @@
+CONFIG_PPC_85xx=y
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODVERSIONS=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_STX_GP3=y
+CONFIG_HIGHMEM=y
+CONFIG_BINFMT_MISC=m
+CONFIG_MATH_EMULATION=y
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_NET_PKTGEN=y
+# CONFIG_FW_LOADER is not set
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_SCSI=m
+CONFIG_BLK_DEV_SD=m
+CONFIG_CHR_DEV_ST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_NETDEVICES=y
+CONFIG_GIANFAR=y
+CONFIG_MARVELL_PHY=y
+CONFIG_INPUT_JOYDEV=m
+CONFIG_INPUT_EVDEV=m
+# CONFIG_VT is not set
+CONFIG_SERIAL_CPM=y
+CONFIG_SERIAL_CPM_CONSOLE=y
+CONFIG_PRINTER=m
+CONFIG_I2C_CHARDEV=m
+CONFIG_AGP=m
+CONFIG_DRM=m
+CONFIG_SOUND=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_AUTOFS_FS=y
+CONFIG_ISO9660_FS=m
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=m
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS=y
+CONFIG_CRC_CCITT=y
+CONFIG_CRC_T10DIF=m
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_BDI_SWITCH=y
diff --git a/arch/powerpc/configs/85xx/tqm8540_defconfig b/arch/powerpc/configs/85xx/tqm8540_defconfig
new file mode 100644
index 0000000000..bbf040aa1f
--- /dev/null
+++ b/arch/powerpc/configs/85xx/tqm8540_defconfig
@@ -0,0 +1,57 @@
+CONFIG_PPC_85xx=y
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_KALLSYMS is not set
+# CONFIG_EPOLL is not set
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+CONFIG_TQM8540=y
+CONFIG_GEN_RTC=y
+CONFIG_MATH_EMULATION=y
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_ATA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_PATA_VIA=y
+CONFIG_NETDEVICES=y
+CONFIG_GIANFAR=y
+CONFIG_E100=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_HWMON_DEBUG_CHIP=y
+CONFIG_SENSORS_LM75=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
diff --git a/arch/powerpc/configs/85xx/tqm8541_defconfig b/arch/powerpc/configs/85xx/tqm8541_defconfig
new file mode 100644
index 0000000000..523ad8dcfd
--- /dev/null
+++ b/arch/powerpc/configs/85xx/tqm8541_defconfig
@@ -0,0 +1,59 @@
+CONFIG_PPC_85xx=y
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_KALLSYMS is not set
+# CONFIG_EPOLL is not set
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+CONFIG_TQM8541=y
+CONFIG_GEN_RTC=y
+CONFIG_MATH_EMULATION=y
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_ATA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_PATA_VIA=y
+CONFIG_NETDEVICES=y
+CONFIG_GIANFAR=y
+CONFIG_E100=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_CPM=y
+CONFIG_SERIAL_CPM_CONSOLE=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_HWMON_DEBUG_CHIP=y
+CONFIG_SENSORS_LM75=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
diff --git a/arch/powerpc/configs/85xx/tqm8548_defconfig b/arch/powerpc/configs/85xx/tqm8548_defconfig
new file mode 100644
index 0000000000..afa1b9b633
--- /dev/null
+++ b/arch/powerpc/configs/85xx/tqm8548_defconfig
@@ -0,0 +1,66 @@
+CONFIG_PPC_85xx=y
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+CONFIG_TQM8548=y
+CONFIG_HIGHMEM=y
+CONFIG_BINFMT_MISC=y
+CONFIG_MATH_EMULATION=y
+# CONFIG_SECCOMP is not set
+CONFIG_PCI=y
+CONFIG_PCIEPORTBUS=y
+# CONFIG_PCIEASPM is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_MTD_NAND_FSL_UPM=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_NETDEVICES=y
+CONFIG_GIANFAR=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_SENSORS_LM75=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEBUG_MUTEXES=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
diff --git a/arch/powerpc/configs/85xx/tqm8555_defconfig b/arch/powerpc/configs/85xx/tqm8555_defconfig
new file mode 100644
index 0000000000..0032ce1e8c
--- /dev/null
+++ b/arch/powerpc/configs/85xx/tqm8555_defconfig
@@ -0,0 +1,59 @@
+CONFIG_PPC_85xx=y
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_KALLSYMS is not set
+# CONFIG_EPOLL is not set
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+CONFIG_TQM8555=y
+CONFIG_GEN_RTC=y
+CONFIG_MATH_EMULATION=y
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_ATA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_PATA_VIA=y
+CONFIG_NETDEVICES=y
+CONFIG_GIANFAR=y
+CONFIG_E100=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_CPM=y
+CONFIG_SERIAL_CPM_CONSOLE=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_HWMON_DEBUG_CHIP=y
+CONFIG_SENSORS_LM75=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
diff --git a/arch/powerpc/configs/85xx/tqm8560_defconfig b/arch/powerpc/configs/85xx/tqm8560_defconfig
new file mode 100644
index 0000000000..a80b971f7d
--- /dev/null
+++ b/arch/powerpc/configs/85xx/tqm8560_defconfig
@@ -0,0 +1,59 @@
+CONFIG_PPC_85xx=y
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_KALLSYMS is not set
+# CONFIG_EPOLL is not set
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+CONFIG_TQM8560=y
+CONFIG_GEN_RTC=y
+CONFIG_MATH_EMULATION=y
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_ATA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_PATA_VIA=y
+CONFIG_NETDEVICES=y
+CONFIG_GIANFAR=y
+CONFIG_E100=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_CPM=y
+CONFIG_SERIAL_CPM_CONSOLE=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_HWMON_DEBUG_CHIP=y
+CONFIG_SENSORS_LM75=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
new file mode 100644
index 0000000000..3a6381aa9f
--- /dev/null
+++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
@@ -0,0 +1,139 @@
+CONFIG_PPC_85xx=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_XES_MPC85xx=y
+CONFIG_HIGHMEM=y
+CONFIG_MATH_EMULATION=y
+CONFIG_PCI=y
+CONFIG_PCIEPORTBUS=y
+# CONFIG_PCIEASPM is not set
+CONFIG_PCI_MSI=y
+CONFIG_ADVANCED_OPTIONS=y
+CONFIG_LOWMEM_SIZE_BOOL=y
+CONFIG_LOWMEM_SIZE=0x40000000
+CONFIG_PAGE_OFFSET_BOOL=y
+CONFIG_PAGE_OFFSET=0x80000000
+CONFIG_KERNEL_START_BOOL=y
+CONFIG_TASK_SIZE_BOOL=y
+CONFIG_TASK_SIZE=0x80000000
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NET_IPIP=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_MTD=y
+CONFIG_MTD_REDBOOT_PARTS=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_CFI_STAA=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_MTD_NAND_FSL_ELBC=y
+CONFIG_MTD_NAND_FSL_UPM=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=131072
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_PATA_ALI=y
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=y
+CONFIG_GIANFAR=y
+CONFIG_E1000=y
+CONFIG_BROADCOM_PHY=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=2
+CONFIG_SERIAL_8250_RUNTIME_UARTS=2
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_RSA=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_NVRAM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_SENSORS_DS1621=y
+CONFIG_SENSORS_LM90=y
+CONFIG_WATCHDOG=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_ISP1760=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_PCA955X=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_EDAC=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_RTC_DRV_CMOS=y
+CONFIG_DMADEVICES=y
+CONFIG_FSL_DMA=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_SUMMARY=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_CRC_T10DIF=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_MD5=y
diff --git a/arch/powerpc/configs/86xx-hw.config b/arch/powerpc/configs/86xx-hw.config
new file mode 100644
index 0000000000..0cb24b33c8
--- /dev/null
+++ b/arch/powerpc/configs/86xx-hw.config
@@ -0,0 +1,102 @@
+CONFIG_ATA=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BROADCOM_PHY=y
+# CONFIG_CARDBUS is not set
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_CRC_T10DIF=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_DS1682=y
+CONFIG_EEPROM_LEGACY=y
+CONFIG_GEF_WDT=y
+CONFIG_GIANFAR=y
+CONFIG_GPIO_GE_FPGA=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HW_RANDOM=y
+CONFIG_HZ_1000=y
+CONFIG_I2C_MPC=y
+CONFIG_I2C=y
+CONFIG_INPUT_FF_MEMLESS=m
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_LE_BYTE_SWAP=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_NAND_FSL_ELBC=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_NETDEVICES=y
+CONFIG_NET_TULIP=y
+CONFIG_NVRAM=y
+CONFIG_PATA_ALI=y
+CONFIG_PCCARD=y
+CONFIG_PCI_DEBUG=y
+# CONFIG_PCIEASPM is not set
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCI=y
+# CONFIG_PCMCIA_LOAD_CIS is not set
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_CMOS=y
+CONFIG_RTC_DRV_RX8581=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_SIL24=y
+CONFIG_SATA_SIL=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SENSORS_LM90=y
+CONFIG_SENSORS_LM92=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_NR_UARTS=5
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_8250_RUNTIME_UARTS=5
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SND_INTEL8X0=y
+CONFIG_SND_MIXER_OSS=y
+CONFIG_SND_PCM_OSS=y
+# CONFIG_SND_SUPPORT_OLD_API is not set
+CONFIG_SND=y
+CONFIG_SOUND=y
+CONFIG_ULI526X=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_MON=y
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB=y
+CONFIG_VITESSE_PHY=y
+CONFIG_VME_BUS=y
+CONFIG_VME_TSI148=y
+CONFIG_WATCHDOG=y
+# CONFIG_YENTA_O2 is not set
+# CONFIG_YENTA_RICOH is not set
+# CONFIG_YENTA_TOSHIBA is not set
+CONFIG_YENTA=y
diff --git a/arch/powerpc/configs/86xx-smp.config b/arch/powerpc/configs/86xx-smp.config
new file mode 100644
index 0000000000..40ac38d303
--- /dev/null
+++ b/arch/powerpc/configs/86xx-smp.config
@@ -0,0 +1,2 @@
+CONFIG_NR_CPUS=2
+CONFIG_SMP=y
diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig
new file mode 100644
index 0000000000..7f35d5bc12
--- /dev/null
+++ b/arch/powerpc/configs/adder875_defconfig
@@ -0,0 +1,51 @@
+CONFIG_PPC_8xx=y
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_EXPERT=y
+# CONFIG_ELF_CORE is not set
+# CONFIG_BASE_FULL is not set
+# CONFIG_FUTEX is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PPC_ADDER875=y
+CONFIG_GEN_RTC=y
+CONFIG_HZ_1000=y
+# CONFIG_SECCOMP is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+CONFIG_FS_ENET=y
+# CONFIG_FS_ENET_HAS_SCC is not set
+CONFIG_DAVICOM_PHY=y
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_CPM=y
+CONFIG_SERIAL_CPM_CONSOLE=y
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_CRC32_SLICEBY4=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
diff --git a/arch/powerpc/configs/altivec.config b/arch/powerpc/configs/altivec.config
new file mode 100644
index 0000000000..58a697cb5a
--- /dev/null
+++ b/arch/powerpc/configs/altivec.config
@@ -0,0 +1 @@
+CONFIG_ALTIVEC=y
diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig
new file mode 100644
index 0000000000..200bb1ecb5
--- /dev/null
+++ b/arch/powerpc/configs/amigaone_defconfig
@@ -0,0 +1,117 @@
+CONFIG_ALTIVEC=y
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=15
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_AMIGA_PARTITION=y
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_AMIGAONE=y
+CONFIG_HIGHMEM=y
+CONFIG_BINFMT_MISC=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_ADVANCED is not set
+# CONFIG_NETFILTER_XT_TARGET_NFLOG is not set
+# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set
+# CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set
+# CONFIG_NETFILTER_XT_MATCH_STATE is not set
+# CONFIG_IP_NF_MANGLE is not set
+# CONFIG_STANDALONE is not set
+CONFIG_PARPORT=y
+CONFIG_PARPORT_PC=y
+CONFIG_PARPORT_PC_FIFO=y
+CONFIG_BLK_DEV_FD=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
+# CONFIG_SCSI_SYM53C8XX_MMIO is not set
+CONFIG_ATA=y
+CONFIG_PATA_SIL680=y
+CONFIG_PATA_VIA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_NETDEVICES=y
+CONFIG_VORTEX=y
+CONFIG_8139CP=y
+CONFIG_8139TOO=y
+CONFIG_PHYLIB=y
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_MPPE=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPPOE=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_PCSPKR=y
+CONFIG_INPUT_UINPUT=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FB_TILEBLITTING=y
+CONFIG_FB_RADEON=y
+CONFIG_FB_3DFX=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_TOPSEED=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_STORAGE=m
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_CMOS=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_ISO9660_FS=y
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_AFFS_FS=m
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=m
+CONFIG_CRC_T10DIF=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
+CONFIG_CRYPTO_CBC=m
+CONFIG_CRYPTO_PCBC=m
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/be.config b/arch/powerpc/configs/be.config
new file mode 100644
index 0000000000..c5cdc99a65
--- /dev/null
+++ b/arch/powerpc/configs/be.config
@@ -0,0 +1 @@
+CONFIG_CPU_BIG_ENDIAN=y
diff --git a/arch/powerpc/configs/book3s_32.config b/arch/powerpc/configs/book3s_32.config
new file mode 100644
index 0000000000..8721eb7b12
--- /dev/null
+++ b/arch/powerpc/configs/book3s_32.config
@@ -0,0 +1,2 @@
+CONFIG_PPC64=n
+CONFIG_PPC_BOOK3S_32=y
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig
new file mode 100644
index 0000000000..53f43a34e1
--- /dev/null
+++ b/arch/powerpc/configs/cell_defconfig
@@ -0,0 +1,205 @@
+CONFIG_PPC64=y
+CONFIG_CELL_CPU=y
+CONFIG_ALTIVEC=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=15
+CONFIG_CGROUPS=y
+CONFIG_CPUSETS=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_PPC_POWERNV is not set
+# CONFIG_PPC_PSERIES is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_PPC_PS3=y
+CONFIG_PS3_DISK=y
+CONFIG_PS3_ROM=m
+CONFIG_PS3_FLASH=m
+CONFIG_PS3_LPM=m
+CONFIG_PPC_IBM_CELL_BLADE=y
+CONFIG_RTAS_FLASH=y
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_GEN_RTC=y
+CONFIG_BINFMT_MISC=m
+CONFIG_IRQ_ALL_CPUS=y
+CONFIG_NUMA=y
+CONFIG_SCHED_SMT=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NET_IPIP=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+# CONFIG_IPV6_SIT is not set
+CONFIG_IPV6_TUNNEL=m
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_SCTP=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=131072
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_ATA=y
+CONFIG_SATA_PROMISE=y
+CONFIG_PATA_ARTOP=y
+CONFIG_PATA_PDC2027X=m
+CONFIG_PATA_SIL680=y
+CONFIG_ATA_GENERIC=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SAS=y
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_MACVLAN=m
+CONFIG_TUN=y
+CONFIG_TIGON3=y
+CONFIG_E1000=m
+CONFIG_SKGE=m
+CONFIG_SKY2=m
+CONFIG_GELIC_NET=m
+CONFIG_GELIC_WIRELESS=y
+CONFIG_SPIDER_NET=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO_I8042 is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_HVC_RTAS=y
+CONFIG_IPMI_HANDLER=m
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_SI=m
+CONFIG_IPMI_WATCHDOG=m
+CONFIG_IPMI_POWEROFF=m
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_WATCHDOG=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_HID=m
+# CONFIG_USB_HID is not set
+CONFIG_USB=m
+CONFIG_USB_MON=m
+CONFIG_USB_EHCI_HCD=m
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_STORAGE=m
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_MTHCA=m
+CONFIG_INFINIBAND_IPOIB=m
+CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=y
+CONFIG_EDAC=y
+CONFIG_EDAC_CELL=y
+CONFIG_UIO=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_AUTOFS_FS=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_HMAC=y
diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig
new file mode 100644
index 0000000000..fb314f75ad
--- /dev/null
+++ b/arch/powerpc/configs/chrp32_defconfig
@@ -0,0 +1,121 @@
+CONFIG_SMP=y
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=15
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_MAC_PARTITION=y
+# CONFIG_PPC_PMAC is not set
+CONFIG_GEN_RTC=y
+CONFIG_HIGHMEM=y
+CONFIG_BINFMT_MISC=y
+CONFIG_IRQ_ALL_CPUS=y
+CONFIG_ISA=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_ADVANCED is not set
+# CONFIG_NETFILTER_XT_TARGET_NFLOG is not set
+# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set
+# CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set
+# CONFIG_NETFILTER_XT_MATCH_STATE is not set
+# CONFIG_IP_NF_MANGLE is not set
+# CONFIG_STANDALONE is not set
+CONFIG_BLK_DEV_FD=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
+CONFIG_ATA=y
+CONFIG_PATA_VIA=y
+CONFIG_PATA_WINBOND=y
+CONFIG_ATA_GENERIC=y
+CONFIG_NETDEVICES=y
+CONFIG_PCNET32=y
+CONFIG_NET_TULIP=y
+CONFIG_MV643XX_ETH=y
+CONFIG_8139CP=y
+CONFIG_8139TOO=y
+# CONFIG_8139TOO_PIO is not set
+CONFIG_VIA_RHINE=y
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_MPPE=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPPOE=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_UINPUT=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_NVRAM=y
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FB_OF=y
+CONFIG_FB_MATROX=y
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MATROX_G=y
+CONFIG_FB_RADEON=y
+CONFIG_FB_ATY=y
+CONFIG_FB_ATY_CT=y
+CONFIG_FB_ATY_GX=y
+CONFIG_FB_3DFX=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=m
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_STORAGE=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_ISO9660_FS=y
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=m
+CONFIG_CRC_T10DIF=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
+CONFIG_CRYPTO_CBC=m
+CONFIG_CRYPTO_PCBC=m
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/corenet_base.config b/arch/powerpc/configs/corenet_base.config
new file mode 100644
index 0000000000..1c40de1e76
--- /dev/null
+++ b/arch/powerpc/configs/corenet_base.config
@@ -0,0 +1,2 @@
+CONFIG_CORENET_GENERIC=y
+CONFIG_PPC_QEMU_E500=y
diff --git a/arch/powerpc/configs/debug.config b/arch/powerpc/configs/debug.config
new file mode 100644
index 0000000000..a14ae1f20d
--- /dev/null
+++ b/arch/powerpc/configs/debug.config
@@ -0,0 +1 @@
+CONFIG_SCOM_DEBUGFS=y
diff --git a/arch/powerpc/configs/disable-werror.config b/arch/powerpc/configs/disable-werror.config
new file mode 100644
index 0000000000..7776b91da3
--- /dev/null
+++ b/arch/powerpc/configs/disable-werror.config
@@ -0,0 +1,2 @@
+# Help: Disable -Werror
+CONFIG_PPC_DISABLE_WERROR=y
diff --git a/arch/powerpc/configs/dpaa.config b/arch/powerpc/configs/dpaa.config
new file mode 100644
index 0000000000..4ffacafe40
--- /dev/null
+++ b/arch/powerpc/configs/dpaa.config
@@ -0,0 +1,5 @@
+CONFIG_FSL_DPAA=y
+CONFIG_FSL_PAMU=y
+CONFIG_FSL_FMAN=y
+CONFIG_FSL_DPAA_ETH=y
+CONFIG_CORTINA_PHY=y
diff --git a/arch/powerpc/configs/ep8248e_defconfig b/arch/powerpc/configs/ep8248e_defconfig
new file mode 100644
index 0000000000..0d8d3f41f1
--- /dev/null
+++ b/arch/powerpc/configs/ep8248e_defconfig
@@ -0,0 +1,70 @@
+CONFIG_SYSVIPC=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_EXPERT=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_PPC_82xx=y
+CONFIG_EP8248E=y
+CONFIG_BINFMT_MISC=y
+# CONFIG_SECCOMP is not set
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NETFILTER=y
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_CFI_GEOMETRY=y
+# CONFIG_MTD_MAP_BANK_WIDTH_1 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_2 is not set
+# CONFIG_MTD_CFI_I1 is not set
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_NETDEVICES=y
+CONFIG_FS_ENET=y
+# CONFIG_FS_ENET_HAS_SCC is not set
+CONFIG_DAVICOM_PHY=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_CPM=y
+CONFIG_SERIAL_CPM_CONSOLE=y
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_AUTOFS_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_BDI_SWITCH=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig
new file mode 100644
index 0000000000..a98ef6a4ab
--- /dev/null
+++ b/arch/powerpc/configs/ep88xc_defconfig
@@ -0,0 +1,53 @@
+CONFIG_PPC_8xx=y
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_EXPERT=y
+# CONFIG_ELF_CORE is not set
+# CONFIG_BASE_FULL is not set
+# CONFIG_FUTEX is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PPC_EP88XC=y
+CONFIG_GEN_RTC=y
+CONFIG_HZ_100=y
+# CONFIG_SECCOMP is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+CONFIG_FS_ENET=y
+# CONFIG_FS_ENET_HAS_SCC is not set
+CONFIG_LXT_PHY=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_CPM=y
+CONFIG_SERIAL_CPM_CONSOLE=y
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_CRC32_SLICEBY4=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config
new file mode 100644
index 0000000000..3009b0efaf
--- /dev/null
+++ b/arch/powerpc/configs/fsl-emb-nonhw.config
@@ -0,0 +1,128 @@
+CONFIG_ADFS_FS=m
+CONFIG_AFFS_FS=m
+CONFIG_AUDIT=y
+CONFIG_BEFS_FS=m
+CONFIG_BFS_FS=m
+CONFIG_BINFMT_MISC=m
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=y
+CONFIG_BLK_DEV_RAM_SIZE=131072
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_CRC_T10DIF=y
+CONFIG_CPUSETS=y
+CONFIG_CRAMFS=y
+CONFIG_CRYPTO_MD4=y
+CONFIG_CRYPTO_NULL=y
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_SHA512=y
+CONFIG_DEBUG_FS=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DEVTMPFS=y
+CONFIG_DUMMY=y
+CONFIG_EFS_FS=m
+CONFIG_EXPERT=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_FB=y
+CONFIG_FHANDLE=y
+CONFIG_FIXED_PHY=y
+CONFIG_FONT_8x16=y
+CONFIG_FONT_8x8=y
+CONFIG_FONTS=y
+CONFIG_ARCH_FORCE_MAX_ORDER=12
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAME_WARN=1024
+CONFIG_FTL=y
+CONFIG_GPIO_GENERIC_PLATFORM=y
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HPFS_FS=m
+CONFIG_HUGETLBFS=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_IKCONFIG=y
+CONFIG_INET_AH=y
+CONFIG_INET_ESP=y
+CONFIG_INET_IPCOMP=y
+CONFIG_INET=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_IP_PNP=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_SCTP=m
+CONFIG_IPV6=y
+CONFIG_ISO9660_FS=m
+CONFIG_JFFS2_FS_DEBUG=1
+CONFIG_JFFS2_FS=y
+CONFIG_JOLIET=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_MAC_PARTITION=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MSDOS_FS=m
+CONFIG_MTD_UBI=y
+CONFIG_MTD=y
+CONFIG_NET_IPIP=y
+CONFIG_NET_KEY_MIGRATE=y
+CONFIG_NET_KEY=y
+CONFIG_NET=y
+CONFIG_NFSD=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_850=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=m
+CONFIG_NO_HZ=y
+CONFIG_NTFS_FS=y
+CONFIG_PACKET=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PERF_EVENTS=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_POWER_SUPPLY=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_GPIO=y
+CONFIG_POWER_RESET_GPIO_RESTART=y
+CONFIG_QNX4FS_FS=m
+CONFIG_RCU_TRACE=y
+CONFIG_RESET_CONTROLLER=y
+CONFIG_ROOT_NFS=y
+CONFIG_SYSV_FS=m
+CONFIG_SYSVIPC=y
+CONFIG_TMPFS=y
+CONFIG_UBIFS_FS=y
+CONFIG_UDF_FS=m
+CONFIG_UFS_FS=m
+CONFIG_UIO=y
+CONFIG_UNIX=y
+CONFIG_VFAT_FS=y
+CONFIG_VXFS_FS=m
+CONFIG_XFRM_STATISTICS=y
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_XFRM_USER=y
+CONFIG_ZISOFS=y
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
new file mode 100644
index 0000000000..71d9d112c0
--- /dev/null
+++ b/arch/powerpc/configs/g5_defconfig
@@ -0,0 +1,258 @@
+CONFIG_PPC64=y
+CONFIG_ALTIVEC=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUPS=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_PPC_POWERNV is not set
+# CONFIG_PPC_PSERIES is not set
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_PMAC64=y
+CONFIG_GEN_RTC=y
+CONFIG_KEXEC=y
+CONFIG_IRQ_ALL_CPUS=y
+CONFIG_PPC_4K_PAGES=y
+CONFIG_PCI_MSI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_NET_IPIP=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_NETLINK_QUEUE=m
+CONFIG_NETFILTER_NETLINK_LOG=m
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_CDROM_PKTCDVD=m
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_ATA=y
+CONFIG_SATA_SVW=y
+CONFIG_PATA_MACIO=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+CONFIG_MD_RAID10=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_ADB_PMU=y
+CONFIG_PMAC_SMU=y
+CONFIG_MAC_EMUMOUSEBTN=y
+CONFIG_WINDFARM=y
+CONFIG_WINDFARM_PM81=y
+CONFIG_WINDFARM_PM91=y
+CONFIG_WINDFARM_PM112=y
+CONFIG_WINDFARM_PM121=y
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_TUN=m
+CONFIG_ACENIC=m
+CONFIG_ACENIC_OMIT_TIGON_I=y
+CONFIG_TIGON3=y
+CONFIG_E1000=y
+CONFIG_SUNGEM=y
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPPOE=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_USB_CATC=m
+CONFIG_USB_KAWETH=m
+CONFIG_USB_PEGASUS=m
+CONFIG_USB_RTL8150=m
+CONFIG_USB_USBNET=m
+# CONFIG_USB_NET_AX8817X is not set
+# CONFIG_USB_NET_NET1080 is not set
+# CONFIG_USB_NET_CDC_SUBSET is not set
+# CONFIG_USB_NET_ZAURUS is not set
+CONFIG_INPUT_JOYDEV=m
+CONFIG_INPUT_EVDEV=y
+# CONFIG_KEYBOARD_ATKBD is not set
+# CONFIG_MOUSE_PS2 is not set
+# CONFIG_SERIO_I8042 is not set
+# CONFIG_SERIO_SERPORT is not set
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C_CHARDEV=y
+CONFIG_AGP=m
+CONFIG_AGP_UNINORTH=m
+CONFIG_FB=y
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FB_TILEBLITTING=y
+CONFIG_FB_OF=y
+CONFIG_FB_NVIDIA=y
+CONFIG_FB_NVIDIA_I2C=y
+CONFIG_FB_RADEON=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+CONFIG_SOUND=m
+CONFIG_SND=m
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQUENCER_OSS=m
+CONFIG_SND_POWERMAC=m
+CONFIG_SND_AOA=m
+CONFIG_SND_AOA_FABRIC_LAYOUT=m
+CONFIG_SND_AOA_ONYX=m
+CONFIG_SND_AOA_TAS=m
+CONFIG_SND_AOA_TOONIE=m
+CONFIG_SND_USB_AUDIO=m
+CONFIG_HID_GYRATION=y
+CONFIG_LOGITECH_FF=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_STORAGE_DATAFAB=y
+CONFIG_USB_STORAGE_FREECOM=y
+CONFIG_USB_STORAGE_ISD200=y
+CONFIG_USB_STORAGE_SDDR09=y
+CONFIG_USB_STORAGE_SDDR55=y
+CONFIG_USB_STORAGE_JUMPSHOT=y
+CONFIG_USB_SERIAL=m
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_BELKIN=m
+CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
+CONFIG_USB_SERIAL_CYPRESS_M8=m
+CONFIG_USB_SERIAL_EMPEG=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_VISOR=m
+CONFIG_USB_SERIAL_IPAQ=m
+CONFIG_USB_SERIAL_IR=m
+CONFIG_USB_SERIAL_EDGEPORT=m
+CONFIG_USB_SERIAL_EDGEPORT_TI=m
+CONFIG_USB_SERIAL_GARMIN=m
+CONFIG_USB_SERIAL_IPW=m
+CONFIG_USB_SERIAL_KEYSPAN_PDA=m
+CONFIG_USB_SERIAL_KEYSPAN=m
+CONFIG_USB_SERIAL_KLSI=m
+CONFIG_USB_SERIAL_KOBIL_SCT=m
+CONFIG_USB_SERIAL_MCT_U232=m
+CONFIG_USB_SERIAL_PL2303=m
+CONFIG_USB_SERIAL_SAFE=m
+CONFIG_USB_SERIAL_SAFE_PADDED=y
+CONFIG_USB_SERIAL_TI=m
+CONFIG_USB_SERIAL_CYBERJACK=m
+CONFIG_USB_SERIAL_OMNINET=m
+CONFIG_USB_APPLEDISPLAY=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_REISERFS_FS=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_FS_DAX=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_CIFS=m
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_1250=y
+CONFIG_NLS_CODEPAGE_1251=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_15=y
+CONFIG_NLS_UTF8=y
+CONFIG_CRC_T10DIF=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_BOOTX_TEXT=y
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+# CONFIG_CRYPTO_HW is not set
+CONFIG_PRINTK_TIME=y
diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig
new file mode 100644
index 0000000000..d77eeb5253
--- /dev/null
+++ b/arch/powerpc/configs/gamecube_defconfig
@@ -0,0 +1,91 @@
+CONFIG_LOCALVERSION="-gcn"
+CONFIG_SYSVIPC=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_ELF_CORE is not set
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_EMBEDDED6xx=y
+CONFIG_GAMECUBE=y
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
+CONFIG_PREEMPT=y
+CONFIG_BINFMT_MISC=m
+CONFIG_KEXEC=y
+# CONFIG_SECCOMP is not set
+CONFIG_ADVANCED_OPTIONS=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_INET_DIAG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_WIRELESS is not set
+# CONFIG_STANDALONE is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=2
+CONFIG_NETDEVICES=y
+# CONFIG_WLAN is not set
+CONFIG_INPUT_FF_MEMLESS=m
+CONFIG_INPUT_JOYDEV=y
+CONFIG_INPUT_EVDEV=y
+# CONFIG_KEYBOARD_ATKBD is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_JOYSTICK=y
+# CONFIG_SERIO_I8042 is not set
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_LEGACY_PTY_COUNT=64
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=y
+CONFIG_SND_PCM_OSS=y
+# CONFIG_SND_VERBOSE_PROCFS is not set
+CONFIG_SND_SEQUENCER=y
+CONFIG_SND_SEQUENCER_OSS=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_GAMECUBE=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+# CONFIG_PROC_PAGE_MONITOR is not set
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_CIFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_CRC_CCITT=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_SCHED_TRACER=y
+CONFIG_DMA_API_DEBUG=y
+CONFIG_PPC_EARLY_DEBUG=y
diff --git a/arch/powerpc/configs/guest.config b/arch/powerpc/configs/guest.config
new file mode 100644
index 0000000000..fece834872
--- /dev/null
+++ b/arch/powerpc/configs/guest.config
@@ -0,0 +1,14 @@
+CONFIG_VIRTIO_BLK=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_VIRTIO_NET=y
+CONFIG_NET_FAILOVER=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_KVM_GUEST=y
+CONFIG_EPAPR_PARAVIRT=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VHOST_NET=y
+CONFIG_VHOST=y
+CONFIG_IBMVETH=y
+CONFIG_IBMVNIC=y
diff --git a/arch/powerpc/configs/holly_defconfig b/arch/powerpc/configs/holly_defconfig
new file mode 100644
index 0000000000..271daff47d
--- /dev/null
+++ b/arch/powerpc/configs/holly_defconfig
@@ -0,0 +1,60 @@
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_EMBEDDED6xx=y
+CONFIG_PPC_HOLLY=y
+CONFIG_GEN_RTC=y
+CONFIG_BINFMT_MISC=y
+CONFIG_CMDLINE="console=ttyS0,115200"
+# CONFIG_SECCOMP is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=131072
+CONFIG_BLK_DEV_SD=y
+CONFIG_ATA=y
+CONFIG_NETDEVICES=y
+CONFIG_VORTEX=y
+CONFIG_TSI108_ETH=y
+CONFIG_PHYLIB=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
diff --git a/arch/powerpc/configs/kvm_guest.config b/arch/powerpc/configs/kvm_guest.config
new file mode 120000
index 0000000000..a5f7a2fa74
--- /dev/null
+++ b/arch/powerpc/configs/kvm_guest.config
@@ -0,0 +1 @@
+../../../kernel/configs/kvm_guest.config
+\ No newline at end of file
diff --git a/arch/powerpc/configs/le.config b/arch/powerpc/configs/le.config
new file mode 100644
index 0000000000..ee43fdb3b8
--- /dev/null
+++ b/arch/powerpc/configs/le.config
@@ -0,0 +1 @@
+CONFIG_CPU_LITTLE_ENDIAN=y
diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig
new file mode 100644
index 0000000000..fa707de761
--- /dev/null
+++ b/arch/powerpc/configs/linkstation_defconfig
@@ -0,0 +1,139 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_EMBEDDED6xx=y
+CONFIG_LINKSTATION=y
+CONFIG_HZ_100=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_CFI_GEOMETRY=y
+# CONFIG_MTD_MAP_BANK_WIDTH_2 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_4 is not set
+# CONFIG_MTD_CFI_I2 is not set
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=2
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_EEPROM_LEGACY=m
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_ATA=y
+CONFIG_PATA_IT821X=y
+CONFIG_PATA_SIL680=y
+CONFIG_NETDEVICES=y
+CONFIG_NETCONSOLE=y
+CONFIG_TUN=m
+CONFIG_NET_TULIP=y
+CONFIG_TULIP=y
+CONFIG_TULIP_MMIO=y
+CONFIG_R8169=y
+CONFIG_INPUT_EVDEV=m
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_UINPUT=m
+# CONFIG_SERIO_I8042 is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_HW_RANDOM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_HID=m
+# CONFIG_USB_HID is not set
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_PRINTER=m
+CONFIG_USB_STORAGE=m
+CONFIG_USB_SERIAL=y
+CONFIG_USB_SERIAL_CONSOLE=y
+CONFIG_USB_SERIAL_FTDI_SIO=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_RS5C372=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_XFS_FS=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=m
+CONFIG_CIFS=m
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_UTF8=m
+CONFIG_CRC_CCITT=m
+CONFIG_CRC_T10DIF=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA1=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_DEFLATE=m
diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig
new file mode 100644
index 0000000000..c821a97f4a
--- /dev/null
+++ b/arch/powerpc/configs/maple_defconfig
@@ -0,0 +1,111 @@
+CONFIG_PPC64=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_MAC_PARTITION=y
+# CONFIG_PPC_POWERNV is not set
+# CONFIG_PPC_PSERIES is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_PPC_MAPLE=y
+CONFIG_UDBG_RTAS_CONSOLE=y
+CONFIG_GEN_RTC=y
+CONFIG_KEXEC=y
+CONFIG_IRQ_ALL_CPUS=y
+CONFIG_PPC_4K_PAGES=y
+CONFIG_PCI_MSI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IPV6 is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_IPR=y
+CONFIG_ATA=y
+CONFIG_PATA_AMD=y
+CONFIG_ATA_GENERIC=y
+CONFIG_NETDEVICES=y
+CONFIG_AMD8111_ETH=y
+CONFIG_TIGON3=y
+CONFIG_E1000=y
+CONFIG_USB_PEGASUS=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_HVC_RTAS=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_AMD8111=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_HID_GYRATION=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_ROOT_HUB_TT=y
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_SERIAL=y
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_CYPRESS_M8=m
+CONFIG_USB_SERIAL_GARMIN=m
+CONFIG_USB_SERIAL_IPW=m
+CONFIG_USB_SERIAL_KEYSPAN=y
+CONFIG_USB_SERIAL_TI=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_FS_DAX=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_DEFAULT="utf-8"
+CONFIG_NLS_UTF8=y
+CONFIG_CRC_CCITT=y
+CONFIG_CRC_T10DIF=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
+CONFIG_BOOTX_TEXT=y
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_PCBC=m
+# CONFIG_CRYPTO_HW is not set
+CONFIG_PRINTK_TIME=y
diff --git a/arch/powerpc/configs/mgcoge_defconfig b/arch/powerpc/configs/mgcoge_defconfig
new file mode 100644
index 0000000000..f65001e787
--- /dev/null
+++ b/arch/powerpc/configs/mgcoge_defconfig
@@ -0,0 +1,82 @@
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_GZIP is not set
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_PCSPKR_PLATFORM is not set
+CONFIG_EXPERT=y
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_PPC_PMAC is not set
+CONFIG_PPC_82xx=y
+CONFIG_MGCOGE=y
+CONFIG_BINFMT_MISC=y
+# CONFIG_SECCOMP is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+CONFIG_TIPC=y
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_CFI_GEOMETRY=y
+# CONFIG_MTD_MAP_BANK_WIDTH_4 is not set
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_NETDEVICES=y
+CONFIG_FS_ENET=y
+CONFIG_FS_ENET_MDIO_FCC=y
+# CONFIG_WLAN is not set
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_CPM=y
+CONFIG_SERIAL_CPM_CONSOLE=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_CPM=y
+CONFIG_SPI=y
+CONFIG_SPI_FSL_SPI=y
+# CONFIG_HWMON is not set
+CONFIG_USB_GADGET=y
+CONFIG_USB_FSL_USB2=y
+CONFIG_USB_G_SERIAL=y
+CONFIG_UIO=y
+CONFIG_EXT2_FS=y
+CONFIG_AUTOFS_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_CRAMFS=y
+CONFIG_SQUASHFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_BDI_SWITCH=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig
new file mode 100644
index 0000000000..a64fb1ef8c
--- /dev/null
+++ b/arch/powerpc/configs/microwatt_defconfig
@@ -0,0 +1,108 @@
+# CONFIG_SWAP is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT_VOLUNTARY=y
+CONFIG_TICK_CPU_ACCOUNTING=y
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_CGROUPS=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EXPERT=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+# CONFIG_SLAB_MERGE_DEFAULT is not set
+CONFIG_PPC64=y
+CONFIG_POWER9_CPU=y
+# CONFIG_PPC_64S_HASH_MMU is not set
+# CONFIG_PPC_KUEP is not set
+# CONFIG_PPC_KUAP is not set
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_NR_IRQS=64
+CONFIG_PANIC_TIMEOUT=10
+# CONFIG_PPC_POWERNV is not set
+# CONFIG_PPC_PSERIES is not set
+CONFIG_PPC_MICROWATT=y
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
+CONFIG_CPU_FREQ=y
+CONFIG_HZ_100=y
+CONFIG_PPC_4K_PAGES=y
+# CONFIG_SECCOMP is not set
+# CONFIG_MQ_IOSCHED_KYBER is not set
+# CONFIG_COREDUMP is not set
+# CONFIG_COMPACTION is not set
+# CONFIG_MIGRATION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_INET=y
+CONFIG_INET_UDP_DIAG=y
+CONFIG_INET_RAW_DIAG=y
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+# CONFIG_ALLOW_DEV_COREDUMP is not set
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_PARTITIONED_MASTER=y
+CONFIG_MTD_SPI_NOR=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_NETDEVICES=y
+CONFIG_LITEX_LITEETH=y
+# CONFIG_WLAN is not set
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SERIAL_NONSTANDARD=y
+# CONFIG_NVRAM is not set
+CONFIG_SPI=y
+CONFIG_SPI_DEBUG=y
+CONFIG_SPI_BITBANG=y
+CONFIG_SPI_SPIDEV=y
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_MMC=y
+# CONFIG_PWRSEQ_EMMC is not set
+# CONFIG_PWRSEQ_SIMPLE is not set
+CONFIG_MMC_LITEX=y
+# CONFIG_VIRTIO_MENU is not set
+CONFIG_COMMON_CLK=y
+# CONFIG_IOMMU_SUPPORT is not set
+# CONFIG_NVMEM is not set
+CONFIG_EXT4_FS=y
+# CONFIG_FILE_LOCKING is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_AUTOFS_FS=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_CRYPTO_SHA256=y
+# CONFIG_CRYPTO_HW is not set
+# CONFIG_XZ_DEC_X86 is not set
+# CONFIG_XZ_DEC_IA64 is not set
+# CONFIG_XZ_DEC_ARM is not set
+# CONFIG_XZ_DEC_ARMTHUMB is not set
+# CONFIG_XZ_DEC_SPARC is not set
+CONFIG_PRINTK_TIME=y
+# CONFIG_SYMBOLIC_ERRNAME is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_DEBUG_MISC is not set
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_FTRACE is not set
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_PPC_DISABLE_WERROR=y
+CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
+# CONFIG_XMON_DEFAULT_RO_MODE is not set
+# CONFIG_RUNTIME_TESTING_MENU is not set
diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig
new file mode 100644
index 0000000000..d24457bc57
--- /dev/null
+++ b/arch/powerpc/configs/mpc512x_defconfig
@@ -0,0 +1,116 @@
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_PPC_CHRP is not set
+CONFIG_PPC_MPC512x=y
+CONFIG_MPC512x_LPBFIFO=y
+CONFIG_MPC5121_ADS=y
+CONFIG_MPC512x_GENERIC=y
+CONFIG_PDM360NG=y
+# CONFIG_PPC_PMAC is not set
+CONFIG_HZ_1000=y
+# CONFIG_SECCOMP is not set
+# CONFIG_PCI is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+# CONFIG_INET_DIAG is not set
+# CONFIG_IPV6 is not set
+CONFIG_CAN=y
+CONFIG_CAN_VCAN=y
+CONFIG_CAN_MSCAN=y
+CONFIG_CAN_DEBUG_DEVICES=y
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_ROM=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_MTD_NAND_MPC5121_NFC=y
+CONFIG_MTD_UBI=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=1
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_EEPROM_AT24=y
+CONFIG_EEPROM_AT25=y
+CONFIG_SCSI=y
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_NETDEVICES=y
+CONFIG_FS_ENET=y
+CONFIG_MDIO_BITBANG=y
+CONFIG_BROADCOM_PHY=y
+CONFIG_CICADA_PHY=y
+CONFIG_DAVICOM_PHY=y
+CONFIG_ICPLUS_PHY=y
+CONFIG_LSI_ET1011C_PHY=y
+CONFIG_LXT_PHY=y
+CONFIG_MARVELL_PHY=y
+CONFIG_NATIONAL_PHY=y
+CONFIG_QSEMI_PHY=y
+CONFIG_REALTEK_PHY=y
+CONFIG_SMSC_PHY=y
+CONFIG_STE10XP=y
+CONFIG_VITESSE_PHY=y
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_SERIAL_MPC52xx=y
+CONFIG_SERIAL_MPC52xx_CONSOLE=y
+CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_SPI=y
+CONFIG_SPI_MPC512x_PSC=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_MPC8XXX=y
+# CONFIG_HWMON is not set
+CONFIG_MEDIA_SUPPORT=y
+CONFIG_VIDEO_ADV_DEBUG=y
+CONFIG_FB=y
+CONFIG_FB_FSL_DIU=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_FSL=y
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_STORAGE=y
+CONFIG_USB_GADGET=y
+CONFIG_USB_FSL_USB2=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_M41T80=y
+CONFIG_RTC_DRV_MPC5121=y
+CONFIG_DMADEVICES=y
+CONFIG_MPC512X_DMA=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_FS_DAX=y
+# CONFIG_DNOTIFY is not set
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_UBIFS_FS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/mpc5200_defconfig b/arch/powerpc/configs/mpc5200_defconfig
new file mode 100644
index 0000000000..c0fe5e7660
--- /dev/null
+++ b/arch/powerpc/configs/mpc5200_defconfig
@@ -0,0 +1,127 @@
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_PPC_CHRP is not set
+CONFIG_PPC_MPC52xx=y
+CONFIG_PPC_MPC5200_SIMPLE=y
+CONFIG_PPC_EFIKA=y
+CONFIG_PPC_LITE5200=y
+CONFIG_PPC_MEDIA5200=y
+CONFIG_PPC_MPC5200_BUGFIX=y
+CONFIG_PPC_MPC5200_LPBFIFO=m
+# CONFIG_PPC_PMAC is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_ROM=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_PLATRAM=y
+CONFIG_MTD_UBI=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_EEPROM_AT24=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_ATA=y
+CONFIG_PATA_MPC52xx=y
+CONFIG_PATA_PLATFORM=y
+CONFIG_NETDEVICES=y
+CONFIG_FEC_MPC52xx=y
+CONFIG_AMD_PHY=y
+CONFIG_LXT_PHY=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_SERIAL_MPC52xx=y
+CONFIG_SERIAL_MPC52xx_CONSOLE=y
+CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_SPI=y
+CONFIG_SPI_GPIO=m
+CONFIG_SPI_MPC52xx=m
+CONFIG_SPI_MPC52xx_PSC=m
+CONFIG_SPI_SPIDEV=m
+CONFIG_GPIO_SYSFS=y
+CONFIG_SENSORS_LM80=y
+CONFIG_SENSORS_LM87=m
+CONFIG_WATCHDOG=y
+CONFIG_MFD_SM501=m
+CONFIG_DRM=y
+CONFIG_FB_FOREIGN_ENDIAN=y
+CONFIG_FB_RADEON=y
+CONFIG_FB_SM501=m
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_LOGO=y
+CONFIG_SOUND=y
+CONFIG_SND=y
+# CONFIG_SND_SUPPORT_OLD_API is not set
+# CONFIG_SND_DRIVERS is not set
+# CONFIG_SND_PCI is not set
+# CONFIG_SND_PPC is not set
+# CONFIG_SND_SPI is not set
+# CONFIG_SND_USB is not set
+CONFIG_SND_SOC=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_ORTEK=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_STORAGE=y
+CONFIG_NEW_LEDS=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_RTC_DRV_DS1374=y
+CONFIG_RTC_DRV_PCF8563=m
+CONFIG_DMADEVICES=y
+CONFIG_PPC_BESTCOMM=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_UBIFS_FS=m
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DETECT_HUNG_TASK=y
diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig
new file mode 100644
index 0000000000..83c4710017
--- /dev/null
+++ b/arch/powerpc/configs/mpc83xx_defconfig
@@ -0,0 +1,104 @@
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_PPC_83xx=y
+CONFIG_MPC831x_RDB=y
+CONFIG_MPC832x_RDB=y
+CONFIG_MPC834x_ITX=y
+CONFIG_MPC836x_RDK=y
+CONFIG_MPC837x_RDB=y
+CONFIG_ASP834x=y
+CONFIG_QE_GPIO=y
+CONFIG_MATH_EMULATION=y
+CONFIG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_ESP=y
+# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_MTD_NAND_FSL_ELBC=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_ATA=y
+CONFIG_SATA_FSL=y
+CONFIG_SATA_SIL=y
+CONFIG_NETDEVICES=y
+CONFIG_UCC_GETH=y
+CONFIG_GIANFAR=y
+CONFIG_DAVICOM_PHY=y
+CONFIG_ICPLUS_PHY=y
+CONFIG_MARVELL_PHY=y
+CONFIG_VITESSE_PHY=y
+CONFIG_INPUT_FF_MEMLESS=m
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+CONFIG_WATCHDOG=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_USB=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_FSL=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_RTC_DRV_DS1374=y
+CONFIG_QUICC_ENGINE=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_CRC_T10DIF=y
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_SHA512=y
+CONFIG_CRYPTO_DEV_TALITOS=y
diff --git a/arch/powerpc/configs/mpc85xx_base.config b/arch/powerpc/configs/mpc85xx_base.config
new file mode 100644
index 0000000000..a1e4d72ed3
--- /dev/null
+++ b/arch/powerpc/configs/mpc85xx_base.config
@@ -0,0 +1,20 @@
+CONFIG_MATH_EMULATION=y
+CONFIG_MPC8536_DS=y
+CONFIG_MPC85xx_DS=y
+CONFIG_MPC85xx_MDS=y
+CONFIG_MPC85xx_RDB=y
+CONFIG_KSI8560=y
+CONFIG_MVME2500=y
+CONFIG_P1010_RDB=y
+CONFIG_P1022_DS=y
+CONFIG_P1022_RDK=y
+CONFIG_P1023_RDB=y
+CONFIG_TWR_P102x=y
+CONFIG_SOCRATES=y
+CONFIG_STX_GP3=y
+CONFIG_TQM8540=y
+CONFIG_TQM8541=y
+CONFIG_TQM8548=y
+CONFIG_TQM8555=y
+CONFIG_TQM8560=y
+CONFIG_XES_MPC85xx=y
diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig
new file mode 100644
index 0000000000..5c56d36cdf
--- /dev/null
+++ b/arch/powerpc/configs/mpc866_ads_defconfig
@@ -0,0 +1,42 @@
+CONFIG_PPC_8xx=y
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_EXPERT=y
+# CONFIG_BUG is not set
+# CONFIG_BASE_FULL is not set
+# CONFIG_EPOLL is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_MPC86XADS=y
+CONFIG_GEN_RTC=y
+CONFIG_HZ_1000=y
+CONFIG_MATH_EMULATION=y
+# CONFIG_SECCOMP is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_NETDEVICES=y
+CONFIG_FS_ENET=y
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_CPM=y
+CONFIG_SERIAL_CPM_CONSOLE=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT4_FS=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_CRC_CCITT=y
+CONFIG_CRC32_SLICEBY4=y
diff --git a/arch/powerpc/configs/mpc86xx_base.config b/arch/powerpc/configs/mpc86xx_base.config
new file mode 100644
index 0000000000..632c014b12
--- /dev/null
+++ b/arch/powerpc/configs/mpc86xx_base.config
@@ -0,0 +1,8 @@
+CONFIG_PPC_86xx=y
+CONFIG_GEF_PPC9A=y
+CONFIG_GEF_SBC310=y
+CONFIG_GEF_SBC610=y
+CONFIG_MVME7100=y
+CONFIG_HIGHMEM=y
+CONFIG_KEXEC=y
+CONFIG_PROC_KCORE=y
diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig
new file mode 100644
index 0000000000..56b876e418
--- /dev/null
+++ b/arch/powerpc/configs/mpc885_ads_defconfig
@@ -0,0 +1,81 @@
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_JIT=y
+CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_EXPERT=y
+# CONFIG_ELF_CORE is not set
+# CONFIG_BASE_FULL is not set
+# CONFIG_FUTEX is not set
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+CONFIG_PPC_8xx=y
+CONFIG_8xx_GPIO=y
+CONFIG_SMC_UCODE_PATCH=y
+CONFIG_PIN_TLB=y
+CONFIG_GEN_RTC=y
+CONFIG_HZ_100=y
+CONFIG_MATH_EMULATION=y
+CONFIG_PPC_16K_PAGES=y
+CONFIG_ADVANCED_OPTIONS=y
+# CONFIG_SECCOMP is not set
+CONFIG_STRICT_KERNEL_RWX=y
+CONFIG_MODULES=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_CFI_GEOMETRY=y
+# CONFIG_MTD_MAP_BANK_WIDTH_1 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_2 is not set
+# CONFIG_MTD_CFI_I1 is not set
+# CONFIG_MTD_CFI_I2 is not set
+CONFIG_MTD_CFI_I4=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_PHYSMAP_OF=y
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+CONFIG_FS_ENET=y
+# CONFIG_FS_ENET_HAS_SCC is not set
+CONFIG_DAVICOM_PHY=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_CPM=y
+CONFIG_SERIAL_CPM_CONSOLE=y
+CONFIG_SPI=y
+CONFIG_SPI_FSL_SPI=y
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_8xxx_WDT=y
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_DEV_TALITOS=y
+CONFIG_CRC32_SLICEBY4=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_FS=y
+CONFIG_DEBUG_VM_PGTABLE=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_BDI_SWITCH=y
+CONFIG_PPC_EARLY_DEBUG=y
+CONFIG_GENERIC_PTDUMP=y
diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig
new file mode 100644
index 0000000000..d1c7fd5bf3
--- /dev/null
+++ b/arch/powerpc/configs/mvme5100_defconfig
@@ -0,0 +1,126 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_UTS_NS is not set
+# CONFIG_IPC_NS is not set
+# CONFIG_PID_NS is not set
+# CONFIG_NET_NS is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_EMBEDDED6xx=y
+CONFIG_MVME5100=y
+CONFIG_KVM_GUEST=y
+CONFIG_HZ_100=y
+CONFIG_CMDLINE="console=ttyS0,9600 ip=dhcp root=/dev/nfs"
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_LAPB=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=2
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_EEPROM_LEGACY=m
+CONFIG_NETDEVICES=y
+CONFIG_TUN=m
+# CONFIG_NET_VENDOR_3COM is not set
+CONFIG_E100=y
+# CONFIG_WLAN is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=10
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_HW_RANDOM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+# CONFIG_HWMON is not set
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=m
+CONFIG_EXT4_FS=m
+CONFIG_XFS_FS=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=m
+CONFIG_CIFS=m
+CONFIG_NLS=y
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_UTF8=m
+CONFIG_CRC_CCITT=m
+CONFIG_CRC_T10DIF=y
+CONFIG_XZ_DEC=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=20
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA1=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_DES=y
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_DEFLATE=m
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig
new file mode 100644
index 0000000000..61993944db
--- /dev/null
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -0,0 +1,172 @@
+CONFIG_PPC64=y
+CONFIG_ALTIVEC=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_PROFILING=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_MAC_PARTITION=y
+# CONFIG_PPC_POWERNV is not set
+# CONFIG_PPC_PSERIES is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_PPC_PASEMI=y
+CONFIG_PPC_PASEMI_IOMMU=y
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_HZ_1000=y
+# CONFIG_SECCOMP is not set
+CONFIG_PCI_MSI=y
+CONFIG_PCCARD=y
+CONFIG_ELECTRA_CF=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NET_IPIP=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=y
+CONFIG_INET_ESP=y
+# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_SLRAM=y
+CONFIG_MTD_PHRAM=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_MTD_NAND_PASEMI=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_EEPROM_LEGACY=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_ATA=y
+CONFIG_SATA_SIL24=y
+CONFIG_SATA_MV=y
+CONFIG_PATA_PCMCIA=y
+CONFIG_PATA_PLATFORM=y
+CONFIG_PATA_OF_PLATFORM=y
+CONFIG_ATA_GENERIC=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+CONFIG_MD_RAID10=y
+CONFIG_MD_RAID456=y
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=y
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=y
+CONFIG_TIGON3=y
+CONFIG_E1000=y
+CONFIG_PASEMI_MAC=y
+CONFIG_MARVELL_PHY=y
+CONFIG_INPUT_JOYDEV=y
+CONFIG_INPUT_EVDEV=y
+# CONFIG_KEYBOARD_ATKBD is not set
+# CONFIG_MOUSE_PS2 is not set
+# CONFIG_SERIO is not set
+CONFIG_LEGACY_PTY_COUNT=4
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_HW_RANDOM=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_PASEMI=y
+CONFIG_SENSORS_LM85=y
+CONFIG_SENSORS_LM90=y
+CONFIG_DRM=y
+CONFIG_DRM_RADEON=y
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FB_TILEBLITTING=y
+CONFIG_FB_VGA16=y
+CONFIG_FB_NVIDIA=y
+CONFIG_FB_NVIDIA_I2C=y
+CONFIG_FB_RADEON=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+CONFIG_LOGO=y
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=y
+CONFIG_SND_PCM_OSS=y
+CONFIG_SND_SEQUENCER=y
+CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_USB_AUDIO=y
+CONFIG_SND_USB_USX2Y=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_SL811_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_EDAC=y
+CONFIG_EDAC_PASEMI=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_RAS=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT4_FS=y
+CONFIG_AUTOFS_FS=y
+CONFIG_ISO9660_FS=y
+CONFIG_UDF_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_CONFIGFS_FS=y
+CONFIG_JFFS2_FS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V4=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_CRC_CCITT=y
+CONFIG_PRINTK_TIME=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
+CONFIG_CRYPTO_MD4=y
+CONFIG_CRYPTO_SHA512=y
+CONFIG_CRYPTO_BLOWFISH=y
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
new file mode 100644
index 0000000000..a205da9ee5
--- /dev/null
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -0,0 +1,297 @@
+CONFIG_ALTIVEC=y
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_PPC_CHRP is not set
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_PMAC=y
+CONFIG_GEN_RTC=y
+CONFIG_HIGHMEM=y
+CONFIG_BINFMT_MISC=m
+CONFIG_HIBERNATION=y
+CONFIG_PM_DEBUG=y
+CONFIG_APM_EMULATION=y
+CONFIG_PCCARD=m
+CONFIG_YENTA=m
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=y
+CONFIG_INET_ESP=y
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SCTP=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_IP_DCCP=m
+CONFIG_BT=m
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_RFCOMM_TTY=y
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_HIDP=m
+CONFIG_BT_HCIBCM203X=m
+CONFIG_BT_HCIBFUSB=m
+CONFIG_CFG80211=m
+CONFIG_MAC80211=m
+CONFIG_MAC80211_LEDS=y
+# CONFIG_STANDALONE is not set
+CONFIG_CONNECTOR=y
+CONFIG_MAC_FLOPPY=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_AIC7XXX=m
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=253
+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
+CONFIG_SCSI_MESH=y
+CONFIG_SCSI_MAC53C94=y
+CONFIG_ATA=y
+CONFIG_PATA_MACIO=y
+CONFIG_PATA_PDC2027X=y
+CONFIG_PATA_WINBOND=y
+CONFIG_PATA_PCMCIA=m
+CONFIG_ATA_GENERIC=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_ADB=y
+CONFIG_ADB_CUDA=y
+CONFIG_ADB_PMU=y
+CONFIG_ADB_PMU_LED=y
+CONFIG_ADB_PMU_LED_DISK=y
+CONFIG_PMAC_APM_EMU=m
+CONFIG_PMAC_MEDIABAY=y
+CONFIG_PMAC_BACKLIGHT=y
+CONFIG_PMAC_BACKLIGHT_LEGACY=y
+CONFIG_INPUT_ADBHID=y
+CONFIG_MAC_EMUMOUSEBTN=y
+CONFIG_THERM_WINDTUNNEL=m
+CONFIG_THERM_ADT746X=m
+CONFIG_PMAC_RACKMETER=m
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+CONFIG_TUN=m
+CONFIG_PCNET32=y
+CONFIG_MACE=y
+CONFIG_BMAC=y
+CONFIG_SUNGEM=y
+CONFIG_PPP=y
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=y
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPP_ASYNC=y
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_USB_USBNET=m
+# CONFIG_USB_NET_CDC_SUBSET is not set
+CONFIG_B43=m
+CONFIG_B43LEGACY=m
+CONFIG_P54_COMMON=m
+CONFIG_INPUT_EVDEV=y
+# CONFIG_KEYBOARD_ATKBD is not set
+# CONFIG_MOUSE_PS2 is not set
+CONFIG_MOUSE_APPLETOUCH=y
+# CONFIG_SERIO_I8042 is not set
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIAL_8250=m
+CONFIG_SERIAL_PMACZILOG=y
+CONFIG_SERIAL_PMACZILOG_TTYS=y
+CONFIG_SERIAL_PMACZILOG_CONSOLE=y
+CONFIG_NVRAM=y
+CONFIG_I2C_CHARDEV=m
+CONFIG_APM_POWER=y
+CONFIG_BATTERY_PMU=y
+CONFIG_HWMON=m
+CONFIG_AGP=m
+CONFIG_AGP_UNINORTH=m
+CONFIG_DRM=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_LEGACY=y
+CONFIG_DRM_R128=m
+CONFIG_FB=y
+CONFIG_FB_OF=y
+CONFIG_FB_CONTROL=y
+CONFIG_FB_PLATINUM=y
+CONFIG_FB_VALKYRIE=y
+CONFIG_FB_CT65550=y
+CONFIG_FB_IMSTT=y
+CONFIG_FB_NVIDIA=y
+CONFIG_FB_NVIDIA_I2C=y
+CONFIG_FB_MATROX=y
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_RADEON=y
+CONFIG_FB_ATY128=y
+CONFIG_FB_ATY=y
+CONFIG_FB_ATY_CT=y
+CONFIG_FB_ATY_GX=y
+CONFIG_FB_3DFX=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+CONFIG_SOUND=m
+CONFIG_SND=m
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
+CONFIG_SND_DUMMY=m
+CONFIG_SND_POWERMAC=m
+CONFIG_SND_AOA=m
+CONFIG_SND_AOA_FABRIC_LAYOUT=m
+CONFIG_SND_AOA_ONYX=m
+CONFIG_SND_AOA_TAS=m
+CONFIG_SND_AOA_TOONIE=m
+CONFIG_SND_USB_AUDIO=m
+CONFIG_HID_GYRATION=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_TOPSEED=y
+CONFIG_USB_DYNAMIC_MINORS=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_EHCI_ROOT_HUB_TT=y
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
+CONFIG_USB_STORAGE=m
+CONFIG_USB_STORAGE_ONETOUCH=m
+CONFIG_USB_SERIAL=m
+CONFIG_USB_SERIAL_VISOR=m
+CONFIG_USB_SERIAL_IPAQ=m
+CONFIG_USB_SERIAL_KEYSPAN_PDA=m
+CONFIG_USB_SERIAL_KEYSPAN=m
+CONFIG_USB_APPLEDISPLAY=m
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_AUTOFS_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_CRC_T10DIF=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
+CONFIG_BOOTX_TEXT=y
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_DEFLATE=m
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
new file mode 100644
index 0000000000..ee84ade7a0
--- /dev/null
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -0,0 +1,345 @@
+CONFIG_PPC64=y
+CONFIG_NR_CPUS=2048
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=18
+CONFIG_LOG_CPU_MAX_BUF_SHIFT=13
+CONFIG_NUMA_BALANCING=y
+CONFIG_CGROUPS=y
+CONFIG_MEMCG=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_BPF=y
+CONFIG_USER_NS=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BPF_SYSCALL=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_SCOM_DEBUGFS is not set
+CONFIG_OPAL_PRD=y
+CONFIG_PPC_MEMTRACE=y
+# CONFIG_PPC_PSERIES is not set
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPU_IDLE=y
+CONFIG_HZ_100=y
+CONFIG_BINFMT_MISC=m
+CONFIG_PPC_TRANSACTIONAL_MEM=y
+CONFIG_PPC_UV=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
+CONFIG_IRQ_ALL_CPUS=y
+CONFIG_NUMA=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_MEMORY_FAILURE=y
+CONFIG_HWPOISON_INJECT=m
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
+CONFIG_SCHED_SMT=y
+CONFIG_PM=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_ZONE_DEVICE=y
+CONFIG_DEVICE_PRIVATE=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_NET_IPIP=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_SIT=m
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_ADVANCED is not set
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_NET_SCHED=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_BPF=m
+CONFIG_DNS_RESOLVER=y
+CONFIG_BPF_JIT=y
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_POWERNV_FLASH=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=m
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_BLK_DEV_NVME=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_FC_ATTRS=m
+CONFIG_SCSI_SRP_ATTRS=y
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_CXGB4_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_BE2ISCSI=m
+CONFIG_SCSI_AACRAID=y
+CONFIG_SCSI_MPT2SAS=m
+CONFIG_SCSI_SYM53C8XX_2=m
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
+CONFIG_SCSI_IPR=y
+CONFIG_SCSI_QLA_FC=m
+CONFIG_SCSI_QLA_ISCSI=m
+CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+# CONFIG_ATA_SFF is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_UEVENT=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_NETCONSOLE=m
+CONFIG_TUN=m
+CONFIG_VETH=m
+CONFIG_VORTEX=m
+CONFIG_ACENIC=m
+CONFIG_ACENIC_OMIT_TIGON_I=y
+CONFIG_PCNET32=m
+CONFIG_TIGON3=y
+CONFIG_BNX2X=m
+# CONFIG_CAVIUM_PTP is not set
+CONFIG_CHELSIO_T1=m
+CONFIG_BE2NET=m
+CONFIG_S2IO=m
+CONFIG_E100=y
+CONFIG_E1000=y
+CONFIG_E1000E=y
+CONFIG_IGB=y
+CONFIG_IXGBE=m
+CONFIG_I40E=m
+CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_FPGA=y
+CONFIG_MLX5_CORE_EN=y
+CONFIG_MLX5_CORE_IPOIB=y
+CONFIG_MYRI10GE=m
+CONFIG_NETXEN_NIC=m
+CONFIG_USB_NET_DRIVERS=m
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=m
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_PCI=m
+CONFIG_SERIAL_JSM=m
+CONFIG_IPMI_HANDLER=y
+CONFIG_IPMI_DEVICE_INTERFACE=y
+CONFIG_IPMI_POWERNV=y
+# CONFIG_DEVPORT is not set
+CONFIG_I2C_CHARDEV=y
+# CONFIG_PTP_1588_CLOCK is not set
+CONFIG_DRM=y
+CONFIG_DRM_AST=y
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FB_OF=y
+CONFIG_FB_MATROX=m
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MATROX_G=y
+CONFIG_FB_RADEON=m
+CONFIG_FB_IBM_GXT4500=m
+CONFIG_LCD_PLATFORM=m
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_LOGO=y
+CONFIG_HID_A4TECH=m
+CONFIG_HID_APPLE=m
+CONFIG_HID_BELKIN=m
+CONFIG_HID_CHERRY=m
+CONFIG_HID_CHICONY=m
+CONFIG_HID_CYPRESS=m
+CONFIG_HID_EZKEY=m
+CONFIG_HID_GYRATION=m
+CONFIG_HID_ITE=m
+CONFIG_HID_KENSINGTON=m
+CONFIG_HID_LOGITECH=m
+CONFIG_HID_MICROSOFT=m
+CONFIG_HID_MONTEREY=m
+CONFIG_HID_PANTHERLORD=m
+CONFIG_HID_PETALYNX=m
+CONFIG_HID_SAMSUNG=m
+CONFIG_HID_SUNPLUS=m
+CONFIG_USB_HID=m
+CONFIG_USB_HIDDEV=y
+CONFIG_USB=y
+CONFIG_USB_MON=m
+CONFIG_USB_XHCI_HCD=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PCI=m
+CONFIG_USB_STORAGE=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=m
+CONFIG_LEDS_POWERNV=m
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_MTHCA=m
+CONFIG_INFINIBAND_CXGB4=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_INFINIBAND_IPOIB=m
+CONFIG_INFINIBAND_IPOIB_CM=y
+CONFIG_INFINIBAND_SRP=m
+CONFIG_INFINIBAND_ISER=m
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_GENERIC=y
+# CONFIG_VIRTIO_MENU is not set
+CONFIG_LIBNVDIMM=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_FANOTIFY=y
+CONFIG_AUTOFS_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_OVERLAY_FS=m
+CONFIG_ISO9660_FS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_PSTORE=y
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_CIFS=m
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_SCHED_TRACER=y
+CONFIG_STACK_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_PPC_EMULATED_STATS=y
+CONFIG_CODE_PATCHING_SELFTEST=y
+CONFIG_FTR_FIXUP_SELFTEST=y
+CONFIG_MSI_BITMAP_SELFTEST=y
+CONFIG_XMON=y
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_CRC32C_VPMSUM=m
+CONFIG_CRYPTO_CRCT10DIF_VPMSUM=m
+CONFIG_CRYPTO_MD5_PPC=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA1_PPC=m
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_DEV_NX=y
+CONFIG_CRYPTO_DEV_VMX=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM_BOOK3S_64=m
+CONFIG_KVM_BOOK3S_64_HV=m
+CONFIG_VHOST_NET=m
+CONFIG_PRINTK_TIME=y
+CONFIG_PRINTK_CALLER=y
diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig
new file mode 100644
index 0000000000..7e48693775
--- /dev/null
+++ b/arch/powerpc/configs/ppc40x_defconfig
@@ -0,0 +1,74 @@
+CONFIG_40x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PPC4xx_GPIO=y
+CONFIG_ACADIA=y
+CONFIG_HOTFOOT=y
+CONFIG_KILAUEA=y
+CONFIG_MAKALU=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=m
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_UBI=m
+CONFIG_MTD_UBI_GLUEBI=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_NETDEVICES=y
+CONFIG_IBM_EMAC=y
+# CONFIG_INPUT is not set
+CONFIG_SERIO=m
+# CONFIG_SERIO_I8042 is not set
+# CONFIG_SERIO_SERPORT is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=m
+CONFIG_I2C_CHARDEV=m
+CONFIG_I2C_GPIO=m
+CONFIG_I2C_IBM_IIC=m
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+CONFIG_FB=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=m
+CONFIG_UBIFS_FS=m
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig
new file mode 100644
index 0000000000..8b595f6706
--- /dev/null
+++ b/arch/powerpc/configs/ppc44x_defconfig
@@ -0,0 +1,99 @@
+CONFIG_44x=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_BAMBOO=y
+CONFIG_BLUESTONE=y
+CONFIG_SAM440EP=y
+CONFIG_SEQUOIA=y
+CONFIG_TAISHAN=y
+CONFIG_KATMAI=y
+CONFIG_RAINIER=y
+CONFIG_WARP=y
+CONFIG_ARCHES=y
+CONFIG_CANYONLANDS=y
+CONFIG_GLACIER=y
+CONFIG_REDWOOD=y
+CONFIG_EIGER=y
+CONFIG_YOSEMITE=y
+CONFIG_PPC4xx_GPIO=y
+CONFIG_MATH_EMULATION=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_BRIDGE=m
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_RAW_NAND=m
+CONFIG_MTD_NAND_NDFC=m
+CONFIG_MTD_UBI=m
+CONFIG_MTD_UBI_GLUEBI=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_SCSI=m
+CONFIG_BLK_DEV_SD=m
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_NETDEVICES=y
+CONFIG_TUN=m
+CONFIG_IBM_EMAC=y
+# CONFIG_INPUT is not set
+CONFIG_SERIO=m
+# CONFIG_SERIO_I8042 is not set
+# CONFIG_SERIO_SERPORT is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=m
+CONFIG_I2C_CHARDEV=m
+CONFIG_I2C_GPIO=m
+CONFIG_I2C_IBM_IIC=m
+# CONFIG_HWMON is not set
+CONFIG_FB=m
+CONFIG_USB=m
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+# CONFIG_USB_OHCI_HCD_PCI is not set
+CONFIG_USB_STORAGE=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_UBIFS_FS=m
+CONFIG_CRAMFS=y
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_CRC_T10DIF=m
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+# CONFIG_CRYPTO_HW is not set
+CONFIG_VIRTUALIZATION=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
new file mode 100644
index 0000000000..6e7b9e8fd2
--- /dev/null
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -0,0 +1,473 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ_FULL=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_BPF_JIT=y
+CONFIG_BPF_LSM=y
+CONFIG_PREEMPT_VOLUNTARY=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_PSI=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=18
+CONFIG_LOG_CPU_MAX_BUF_SHIFT=13
+CONFIG_NUMA_BALANCING=y
+CONFIG_MEMCG=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_BPF=y
+CONFIG_CGROUP_MISC=y
+CONFIG_USER_NS=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_PROFILING=y
+CONFIG_PPC64=y
+CONFIG_NR_CPUS=2048
+CONFIG_DTL=y
+CONFIG_PPC_SMLPAR=y
+CONFIG_IBMEBUS=y
+CONFIG_PAPR_SCM=m
+CONFIG_PPC_SVM=y
+CONFIG_PPC_MAPLE=y
+CONFIG_PPC_PASEMI=y
+CONFIG_PPC_PASEMI_IOMMU=y
+CONFIG_PPC_PS3=y
+CONFIG_PS3_DISK=m
+CONFIG_PS3_ROM=m
+CONFIG_PS3_FLASH=m
+CONFIG_PS3_LPM=m
+CONFIG_PPC_IBM_CELL_BLADE=y
+CONFIG_RTAS_FLASH=m
+CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPU_FREQ_PMAC64=y
+CONFIG_HZ_100=y
+CONFIG_PPC_TRANSACTIONAL_MEM=y
+CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
+CONFIG_CRASH_DUMP=y
+CONFIG_FA_DUMP=y
+CONFIG_IRQ_ALL_CPUS=y
+CONFIG_SCHED_SMT=y
+CONFIG_PPC_SECURE_BOOT=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM_BOOK3S_64=m
+CONFIG_KVM_BOOK3S_64_HV=m
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_MODULE_SIG_SHA512=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_BINFMT_MISC=m
+CONFIG_ZSWAP=y
+CONFIG_Z3FOLD=y
+CONFIG_ZSMALLOC=y
+# CONFIG_SLAB_MERGE_DEFAULT is not set
+CONFIG_SLAB_FREELIST_RANDOM=y
+CONFIG_SLAB_FREELIST_HARDENED=y
+CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_ZONE_DEVICE=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_NET_IPIP=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_ADVANCED is not set
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_NET_SCHED=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_BPF=m
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_RPA=m
+CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
+CONFIG_PCCARD=y
+CONFIG_ELECTRA_CF=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_BLK_DEV_FD=y
+CONFIG_ZRAM=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_VIRTIO_BLK=m
+CONFIG_BLK_DEV_NVME=m
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_BLK_DEV_SR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_CXGB4_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_BE2ISCSI=m
+CONFIG_SCSI_MPT2SAS=m
+CONFIG_SCSI_IBMVSCSI=y
+CONFIG_SCSI_IBMVFC=m
+CONFIG_SCSI_SYM53C8XX_2=m
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
+CONFIG_SCSI_IPR=y
+CONFIG_SCSI_QLA_FC=m
+CONFIG_SCSI_QLA_ISCSI=m
+CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_SIL24=y
+CONFIG_SATA_MV=y
+CONFIG_SATA_SVW=y
+CONFIG_PATA_AMD=y
+CONFIG_PATA_MACIO=y
+CONFIG_ATA_GENERIC=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_UNSTRIPED=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
+CONFIG_DM_CACHE=m
+CONFIG_DM_WRITECACHE=m
+CONFIG_DM_EBS=m
+CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_MULTIPATH_HST=m
+CONFIG_DM_MULTIPATH_IOA=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_DUST=m
+CONFIG_DM_INIT=y
+CONFIG_DM_UEVENT=y
+CONFIG_ADB_PMU=y
+CONFIG_PMAC_SMU=y
+CONFIG_WINDFARM=y
+CONFIG_WINDFARM_PM81=y
+CONFIG_WINDFARM_PM91=y
+CONFIG_WINDFARM_PM112=y
+CONFIG_WINDFARM_PM121=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_NETCONSOLE=y
+CONFIG_TUN=m
+CONFIG_VIRTIO_NET=m
+CONFIG_VORTEX=m
+CONFIG_ACENIC=m
+CONFIG_ACENIC_OMIT_TIGON_I=y
+CONFIG_PCNET32=m
+CONFIG_TIGON3=y
+CONFIG_BNX2X=m
+CONFIG_CHELSIO_T1=m
+CONFIG_BE2NET=m
+CONFIG_IBMVETH=m
+CONFIG_EHEA=m
+CONFIG_IBMVNIC=m
+CONFIG_E100=y
+CONFIG_E1000=y
+CONFIG_E1000E=y
+CONFIG_IXGBE=m
+CONFIG_I40E=m
+CONFIG_MLX4_EN=m
+CONFIG_MYRI10GE=m
+CONFIG_S2IO=m
+CONFIG_PASEMI_MAC=y
+CONFIG_NETXEN_NIC=m
+CONFIG_SUNGEM=y
+CONFIG_GELIC_NET=m
+CONFIG_GELIC_WIRELESS=y
+CONFIG_SPIDER_NET=m
+CONFIG_BROADCOM_PHY=m
+CONFIG_MARVELL_PHY=y
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPPOE=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_INPUT_EVDEV=m
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_PCSPKR=m
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_ICOM=m
+CONFIG_SERIAL_JSM=m
+CONFIG_HVC_CONSOLE=y
+CONFIG_HVC_RTAS=y
+CONFIG_HVCS=m
+CONFIG_VIRTIO_CONSOLE=m
+CONFIG_IBM_BSR=m
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_AMD8111=y
+CONFIG_I2C_PASEMI=y
+CONFIG_FB=y
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FB_OF=y
+CONFIG_FB_MATROX=y
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MATROX_G=y
+CONFIG_FB_MATROX_I2C=m
+CONFIG_FB_MATROX_MAVEN=m
+CONFIG_FB_RADEON=y
+CONFIG_FB_IBM_GXT4500=y
+CONFIG_FB_PS3=m
+CONFIG_LCD_CLASS_DEVICE=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+CONFIG_SOUND=m
+CONFIG_SND=m
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
+CONFIG_SND_POWERMAC=m
+CONFIG_SND_AOA=m
+CONFIG_SND_AOA_FABRIC_LAYOUT=m
+CONFIG_SND_AOA_ONYX=m
+CONFIG_SND_AOA_TAS=m
+CONFIG_SND_AOA_TOONIE=m
+CONFIG_HID_GYRATION=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB=y
+CONFIG_USB_MON=m
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_STORAGE=m
+CONFIG_USB_APPLEDISPLAY=m
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=m
+CONFIG_LEDS_POWERNV=m
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_CXGB4=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_INFINIBAND_MTHCA=m
+CONFIG_INFINIBAND_IPOIB=m
+CONFIG_INFINIBAND_IPOIB_CM=y
+CONFIG_INFINIBAND_SRP=m
+CONFIG_INFINIBAND_ISER=m
+CONFIG_EDAC=y
+CONFIG_EDAC_PASEMI=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_VHOST_NET=m
+CONFIG_RAS=y
+CONFIG_LIBNVDIMM=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_AUTOFS_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_OVERLAY_FS=m
+CONFIG_ISO9660_FS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_PSTORE=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_CIFS=m
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_YAMA=y
+CONFIG_SECURITY_LOCKDOWN_LSM=y
+CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
+CONFIG_SECURITY_LANDLOCK=y
+CONFIG_INTEGRITY_SIGNATURE=y
+CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
+CONFIG_INTEGRITY_PLATFORM_KEYRING=y
+CONFIG_IMA=y
+CONFIG_IMA_KEXEC=y
+CONFIG_IMA_DEFAULT_HASH_SHA256=y
+CONFIG_IMA_WRITE_POLICY=y
+CONFIG_IMA_APPRAISE=y
+CONFIG_IMA_ARCH_POLICY=y
+CONFIG_IMA_APPRAISE_MODSIG=y
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_CRC32C_VPMSUM=m
+CONFIG_CRYPTO_CRCT10DIF_VPMSUM=m
+CONFIG_CRYPTO_VPMSUM_TESTER=m
+CONFIG_CRYPTO_MD5_PPC=m
+CONFIG_CRYPTO_SHA1_PPC=m
+CONFIG_CRYPTO_AES_GCM_P10=m
+CONFIG_CRYPTO_DEV_NX=y
+CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
+CONFIG_CRYPTO_DEV_VMX=y
+CONFIG_SYSTEM_TRUSTED_KEYRING=y
+CONFIG_SYSTEM_BLACKLIST_KEYRING=y
+CONFIG_PRINTK_TIME=y
+CONFIG_PRINTK_CALLER=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_PAGE_OWNER=y
+CONFIG_PAGE_POISONING=y
+CONFIG_DEBUG_RODATA_TEST=y
+CONFIG_DEBUG_WX=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_VM=y
+# CONFIG_DEBUG_VM_PGTABLE is not set
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_SCHEDSTATS=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_LOCK_TORTURE_TEST=m
+CONFIG_BUG_ON_DATA_CORRUPTION=y
+CONFIG_STACK_TRACER=y
+CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_IO_STRICT_DEVMEM=y
+CONFIG_PPC_EMULATED_STATS=y
+CONFIG_CODE_PATCHING_SELFTEST=y
+CONFIG_FTR_FIXUP_SELFTEST=y
+CONFIG_MSI_BITMAP_SELFTEST=y
+CONFIG_XMON=y
+CONFIG_BOOTX_TEXT=y
+CONFIG_KUNIT=m
+CONFIG_KUNIT_ALL_TESTS=m
+CONFIG_LKDTM=m
+CONFIG_TEST_MIN_HEAP=m
+CONFIG_TEST_DIV64=m
+CONFIG_BACKTRACE_SELF_TEST=m
+CONFIG_TEST_REF_TRACKER=m
+CONFIG_RBTREE_TEST=m
+CONFIG_REED_SOLOMON_TEST=m
+CONFIG_INTERVAL_TREE_TEST=m
+CONFIG_PERCPU_TEST=m
+CONFIG_ATOMIC64_SELFTEST=m
+CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
+CONFIG_STRING_SELFTEST=m
+CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
+CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
+CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_IDA=m
+CONFIG_TEST_BITOPS=m
+CONFIG_TEST_VMALLOC=m
+CONFIG_TEST_USER_COPY=m
+CONFIG_TEST_BPF=m
+CONFIG_TEST_BLACKHOLE_DEV=m
+CONFIG_FIND_BIT_BENCHMARK=m
+CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
+CONFIG_LINEAR_RANGES_TEST=m
+CONFIG_TEST_UDELAY=m
+CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
+CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_MEMINIT=m
+CONFIG_TEST_FREE_PAGES=m
+CONFIG_MEMTEST=y
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig
new file mode 100644
index 0000000000..624c371ffc
--- /dev/null
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -0,0 +1,244 @@
+CONFIG_PPC64=y
+CONFIG_PPC_BOOK3E_64=y
+CONFIG_SMP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUPS=y
+CONFIG_CPUSETS=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_MAC_PARTITION=y
+CONFIG_CORENET_GENERIC=y
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_BINFMT_MISC=m
+CONFIG_IRQ_ALL_CPUS=y
+CONFIG_SPARSEMEM_MANUAL=y
+CONFIG_PCI_MSI=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_PCCARD=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_NET_IPIP=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_ADVANCED is not set
+CONFIG_BRIDGE=m
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_BLK_DEV_FD=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SRP_ATTRS=y
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
+CONFIG_SCSI_IPR=y
+CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_ATA=y
+CONFIG_SATA_SIL24=y
+CONFIG_SATA_SVW=y
+CONFIG_PATA_AMD=y
+CONFIG_ATA_GENERIC=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+CONFIG_MD_RAID10=y
+CONFIG_MD_RAID456=y
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_MACINTOSH_DRIVERS=y
+CONFIG_WINDFARM=y
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_NETCONSOLE=y
+CONFIG_TUN=m
+CONFIG_VORTEX=y
+CONFIG_ACENIC=y
+CONFIG_ACENIC_OMIT_TIGON_I=y
+CONFIG_PCNET32=y
+CONFIG_TIGON3=y
+CONFIG_E100=y
+CONFIG_E1000=y
+CONFIG_SUNGEM=y
+CONFIG_BROADCOM_PHY=m
+CONFIG_MARVELL_PHY=y
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPPOE=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_INPUT_EVDEV=m
+CONFIG_INPUT_MISC=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_AMD8111=y
+CONFIG_FB=y
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FB_OF=y
+CONFIG_FB_MATROX=y
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MATROX_G=y
+CONFIG_FB_MATROX_I2C=m
+CONFIG_FB_MATROX_MAVEN=m
+CONFIG_FB_RADEON=y
+CONFIG_FB_IBM_GXT4500=y
+CONFIG_LCD_CLASS_DEVICE=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+CONFIG_SOUND=m
+CONFIG_SND=m
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
+CONFIG_HID_DRAGONRISE=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_STORAGE=m
+CONFIG_USB_APPLEDISPLAY=m
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_MTHCA=m
+CONFIG_INFINIBAND_IPOIB=m
+CONFIG_INFINIBAND_ISER=m
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_REISERFS_FS=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_JFS_FS=y
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_FS_DAX=y
+CONFIG_AUTOFS_FS=m
+CONFIG_ISO9660_FS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_CIFS=m
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_CRC_T10DIF=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_IRQSOFF_TRACER=y
+CONFIG_SCHED_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_CODE_PATCHING_SELFTEST=y
+CONFIG_FTR_FIXUP_SELFTEST=y
+CONFIG_MSI_BITMAP_SELFTEST=y
+CONFIG_XMON=y
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_LZO=m
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/ppc64le.config b/arch/powerpc/configs/ppc64le.config
new file mode 100644
index 0000000000..14dca1062c
--- /dev/null
+++ b/arch/powerpc/configs/ppc64le.config
@@ -0,0 +1,2 @@
+CONFIG_PPC64=y
+CONFIG_CPU_LITTLE_ENDIAN=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
new file mode 100644
index 0000000000..eaf3273372
--- /dev/null
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -0,0 +1,1111 @@
+CONFIG_FSL_EMB_PERFMON=y
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_USER_NS=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_OSF_PARTITION=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_SGI_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_PPC_MPC52xx=y
+CONFIG_PPC_EFIKA=y
+CONFIG_PPC_MPC5200_BUGFIX=y
+CONFIG_PPC_82xx=y
+CONFIG_EP8248E=y
+CONFIG_MGCOGE=y
+CONFIG_PPC_83xx=y
+CONFIG_MPC831x_RDB=y
+CONFIG_MPC832x_RDB=y
+CONFIG_MPC834x_ITX=y
+CONFIG_MPC836x_RDK=y
+CONFIG_MPC837x_RDB=y
+CONFIG_ASP834x=y
+CONFIG_PPC_86xx=y
+CONFIG_GEF_SBC610=y
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=m
+CONFIG_CPU_FREQ_GOV_ONDEMAND=m
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
+CONFIG_CPU_FREQ_PMAC=y
+CONFIG_TAU=y
+CONFIG_TAU_AVERAGE=y
+CONFIG_QE_GPIO=y
+CONFIG_MCU_MPC8349EMITX=y
+CONFIG_HIGHMEM=y
+CONFIG_HZ_1000=y
+CONFIG_PREEMPT_VOLUNTARY=y
+CONFIG_BINFMT_MISC=y
+CONFIG_HIBERNATION=y
+CONFIG_PM_DEBUG=y
+CONFIG_ISA=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCI_MSI=y
+CONFIG_PCCARD=y
+CONFIG_YENTA=y
+CONFIG_PD6729=m
+CONFIG_I82092=m
+CONFIG_I82365=m
+CONFIG_ADVANCED_OPTIONS=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_XFRM_STATISTICS=y
+CONFIG_NET_KEY=m
+CONFIG_NET_KEY_MIGRATE=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_TCP_MD5SIG=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_IPV6_MROUTE=y
+CONFIG_IPV6_PIMSM_V2=y
+CONFIG_NETLABEL=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_IP6=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_BRIDGE_EBT_NFLOG=m
+CONFIG_IP_DCCP=m
+CONFIG_TIPC=m
+CONFIG_ATM=m
+CONFIG_ATM_CLIP=m
+CONFIG_ATM_LANE=m
+CONFIG_ATM_BR2684=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_ATALK=m
+CONFIG_DEV_APPLETALK=m
+CONFIG_IPDDP=m
+CONFIG_IPDDP_ENCAP=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_ATM=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_TEXT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_BT=m
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_RFCOMM_TTY=y
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_HIDP=m
+CONFIG_BT_HCIUART=m
+CONFIG_BT_HCIUART_BCSP=y
+CONFIG_BT_HCIBCM203X=m
+CONFIG_BT_HCIBPA10X=m
+CONFIG_BT_HCIBFUSB=m
+CONFIG_BT_HCIDTL1=m
+CONFIG_BT_HCIBT3C=m
+CONFIG_BT_HCIBLUECARD=m
+CONFIG_BT_HCIVHCI=m
+CONFIG_CFG80211=m
+CONFIG_MAC80211=m
+CONFIG_MAC80211_MESH=y
+CONFIG_MAC80211_LEDS=y
+CONFIG_MAC80211_DEBUGFS=y
+CONFIG_NET_9P=m
+CONFIG_NET_9P_VIRTIO=m
+CONFIG_DEBUG_DEVRES=y
+CONFIG_CONNECTOR=y
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
+CONFIG_PARPORT_SERIAL=m
+CONFIG_PARPORT_1284=y
+CONFIG_PNP=y
+CONFIG_ISAPNP=y
+CONFIG_MAC_FLOPPY=m
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_CDROM_PKTCDVD=m
+CONFIG_VIRTIO_BLK=m
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_SENSORS_TSL2550=m
+CONFIG_EEPROM_AT24=m
+CONFIG_EEPROM_LEGACY=m
+CONFIG_EEPROM_MAX6875=m
+CONFIG_EEPROM_93CX6=m
+CONFIG_RAID_ATTRS=m
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_ENCLOSURE=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_SCSI_MESH=m
+CONFIG_SCSI_MAC53C94=m
+CONFIG_SCSI_LOWLEVEL_PCMCIA=y
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_ATA=y
+# CONFIG_SATA_PMP is not set
+CONFIG_SATA_FSL=m
+CONFIG_PDC_ADMA=m
+CONFIG_ATA_PIIX=m
+CONFIG_PATA_MACIO=y
+CONFIG_PATA_MPC52xx=m
+CONFIG_PATA_OPTIDMA=m
+CONFIG_PATA_SCH=m
+CONFIG_PATA_VIA=m
+CONFIG_PATA_PLATFORM=m
+CONFIG_PATA_OF_PLATFORM=m
+CONFIG_ATA_GENERIC=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_DEBUG=y
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_UEVENT=y
+CONFIG_FIREWIRE=m
+CONFIG_FIREWIRE_OHCI=m
+CONFIG_FIREWIRE_SBP2=m
+CONFIG_ADB=y
+CONFIG_ADB_CUDA=y
+CONFIG_ADB_PMU=y
+CONFIG_ADB_PMU_LED=y
+CONFIG_ADB_PMU_LED_DISK=y
+CONFIG_PMAC_APM_EMU=y
+CONFIG_PMAC_MEDIABAY=y
+CONFIG_PMAC_BACKLIGHT=y
+CONFIG_ADB_MACIO=y
+CONFIG_INPUT_ADBHID=y
+CONFIG_MAC_EMUMOUSEBTN=y
+CONFIG_THERM_WINDTUNNEL=m
+CONFIG_THERM_ADT746X=m
+CONFIG_WINDFARM=y
+CONFIG_PMAC_RACKMETER=m
+CONFIG_SENSORS_AMS=m
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_EQUALIZER=m
+CONFIG_NET_FC=y
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_NETCONSOLE=m
+CONFIG_TUN=m
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_ATM_TCP=m
+CONFIG_ATM_LANAI=m
+CONFIG_ATM_ENI=m
+CONFIG_ATM_NICSTAR=m
+CONFIG_ATM_IDT77252=m
+CONFIG_ATM_HE=m
+CONFIG_EL3=m
+CONFIG_PCMCIA_3C574=m
+CONFIG_PCMCIA_3C589=m
+CONFIG_VORTEX=m
+CONFIG_TYPHOON=m
+CONFIG_ADAPTEC_STARFIRE=m
+CONFIG_ACENIC=m
+CONFIG_AMD8111_ETH=m
+CONFIG_PCNET32=m
+CONFIG_PCMCIA_NMCLAN=m
+CONFIG_MACE=m
+CONFIG_BMAC=m
+CONFIG_ATL1=m
+CONFIG_B44=m
+CONFIG_BNX2=m
+CONFIG_TIGON3=m
+CONFIG_BNX2X=m
+CONFIG_CHELSIO_T1=m
+CONFIG_CHELSIO_T1_1G=y
+CONFIG_CHELSIO_T3=m
+CONFIG_NET_TULIP=y
+CONFIG_DE2104X=m
+CONFIG_TULIP=m
+CONFIG_TULIP_MMIO=y
+CONFIG_WINBOND_840=m
+CONFIG_DM9102=m
+CONFIG_ULI526X=m
+CONFIG_PCMCIA_XIRCOM=m
+CONFIG_DL2K=m
+CONFIG_SUNDANCE=m
+CONFIG_S2IO=m
+CONFIG_FEC_MPC52xx=m
+CONFIG_GIANFAR=m
+CONFIG_PCMCIA_FMVJ18X=m
+CONFIG_E100=m
+CONFIG_E1000=m
+CONFIG_E1000E=m
+CONFIG_IGB=m
+CONFIG_IXGBE=m
+CONFIG_MV643XX_ETH=m
+CONFIG_SKGE=m
+CONFIG_SKY2=m
+CONFIG_MYRI10GE=m
+CONFIG_FEALNX=m
+CONFIG_NATSEMI=m
+CONFIG_NS83820=m
+CONFIG_PCMCIA_AXNET=m
+CONFIG_NE2000=m
+CONFIG_NE2K_PCI=m
+CONFIG_PCMCIA_PCNET=m
+CONFIG_ULTRA=m
+CONFIG_FORCEDETH=m
+CONFIG_HAMACHI=m
+CONFIG_YELLOWFIN=m
+CONFIG_QLA3XXX=m
+CONFIG_NETXEN_NIC=m
+CONFIG_8139CP=m
+CONFIG_8139TOO=m
+# CONFIG_8139TOO_PIO is not set
+CONFIG_8139TOO_8129=y
+CONFIG_R8169=m
+CONFIG_R6040=m
+CONFIG_SC92031=m
+CONFIG_SIS900=m
+CONFIG_SIS190=m
+CONFIG_SFC=m
+CONFIG_PCMCIA_SMC91C92=m
+CONFIG_EPIC100=m
+CONFIG_HAPPYMEAL=m
+CONFIG_SUNGEM=m
+CONFIG_CASSINI=m
+CONFIG_NIU=m
+CONFIG_TEHUTI=m
+CONFIG_TLAN=m
+CONFIG_VIA_RHINE=m
+CONFIG_VIA_RHINE_MMIO=y
+CONFIG_VIA_VELOCITY=m
+CONFIG_PCMCIA_XIRC2PS=m
+CONFIG_FDDI=y
+CONFIG_SKFP=m
+CONFIG_NET_SB1000=m
+CONFIG_BROADCOM_PHY=m
+CONFIG_CICADA_PHY=m
+CONFIG_DAVICOM_PHY=m
+CONFIG_ICPLUS_PHY=m
+CONFIG_LXT_PHY=m
+CONFIG_MARVELL_PHY=m
+CONFIG_QSEMI_PHY=m
+CONFIG_REALTEK_PHY=m
+CONFIG_SMSC_PHY=m
+CONFIG_VITESSE_PHY=m
+CONFIG_PLIP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_MPPE=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPPOATM=m
+CONFIG_PPPOE=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_SLIP=m
+CONFIG_SLIP_COMPRESSED=y
+CONFIG_SLIP_SMART=y
+CONFIG_USB_CATC=m
+CONFIG_USB_KAWETH=m
+CONFIG_USB_PEGASUS=m
+CONFIG_USB_RTL8150=m
+CONFIG_USB_USBNET=m
+CONFIG_USB_NET_DM9601=m
+CONFIG_USB_NET_SMSC95XX=m
+CONFIG_USB_NET_GL620A=m
+CONFIG_USB_NET_PLUSB=m
+CONFIG_USB_NET_MCS7830=m
+CONFIG_USB_NET_RNDIS_HOST=m
+CONFIG_USB_ALI_M5632=y
+CONFIG_USB_AN2720=y
+CONFIG_USB_EPSON2888=y
+CONFIG_USB_KC2190=y
+CONFIG_INPUT_JOYDEV=m
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_SERIAL=m
+CONFIG_MOUSE_APPLETOUCH=m
+CONFIG_MOUSE_VSXXXAA=m
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_JOYSTICK_ANALOG=m
+CONFIG_JOYSTICK_A3D=m
+CONFIG_JOYSTICK_ADI=m
+CONFIG_JOYSTICK_COBRA=m
+CONFIG_JOYSTICK_GF2K=m
+CONFIG_JOYSTICK_GRIP=m
+CONFIG_JOYSTICK_GRIP_MP=m
+CONFIG_JOYSTICK_GUILLEMOT=m
+CONFIG_JOYSTICK_INTERACT=m
+CONFIG_JOYSTICK_SIDEWINDER=m
+CONFIG_JOYSTICK_TMDC=m
+CONFIG_JOYSTICK_IFORCE=m
+CONFIG_JOYSTICK_IFORCE_USB=y
+CONFIG_JOYSTICK_IFORCE_232=y
+CONFIG_JOYSTICK_WARRIOR=m
+CONFIG_JOYSTICK_MAGELLAN=m
+CONFIG_JOYSTICK_SPACEORB=m
+CONFIG_JOYSTICK_SPACEBALL=m
+CONFIG_JOYSTICK_STINGER=m
+CONFIG_JOYSTICK_TWIDJOY=m
+CONFIG_JOYSTICK_ZHENHUA=m
+CONFIG_JOYSTICK_DB9=m
+CONFIG_JOYSTICK_GAMECON=m
+CONFIG_JOYSTICK_TURBOGRAFX=m
+CONFIG_JOYSTICK_JOYDUMP=m
+CONFIG_JOYSTICK_XPAD=m
+CONFIG_JOYSTICK_XPAD_FF=y
+CONFIG_JOYSTICK_XPAD_LEDS=y
+CONFIG_INPUT_TABLET=y
+CONFIG_TABLET_USB_ACECAD=m
+CONFIG_TABLET_USB_AIPTEK=m
+CONFIG_TABLET_USB_KBTAB=m
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_PCSPKR=m
+CONFIG_INPUT_ATI_REMOTE2=m
+CONFIG_INPUT_KEYSPAN_REMOTE=m
+CONFIG_INPUT_POWERMATE=m
+CONFIG_INPUT_YEALINK=m
+CONFIG_INPUT_CM109=m
+CONFIG_INPUT_UINPUT=m
+CONFIG_SERIO_RAW=m
+CONFIG_GAMEPORT_NS558=m
+CONFIG_GAMEPORT_L4=m
+CONFIG_GAMEPORT_EMU10K1=m
+CONFIG_GAMEPORT_FM801=m
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_SYNCLINK_GT=m
+CONFIG_NOZOMI=m
+CONFIG_N_HDLC=m
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_CS=m
+CONFIG_SERIAL_8250_NR_UARTS=32
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SERIAL_UARTLITE=m
+CONFIG_SERIAL_PMACZILOG=m
+CONFIG_SERIAL_MPC52xx=y
+CONFIG_SERIAL_MPC52xx_CONSOLE=y
+CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200
+CONFIG_SERIAL_JSM=m
+CONFIG_PRINTER=m
+CONFIG_LP_CONSOLE=y
+CONFIG_PPDEV=m
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_VIRTIO=m
+CONFIG_NVRAM=y
+CONFIG_DTLK=m
+CONFIG_IPWIRELESS=m
+CONFIG_I2C_CHARDEV=m
+CONFIG_I2C_HYDRA=m
+CONFIG_I2C_MPC=m
+CONFIG_I2C_PCA_PLATFORM=m
+CONFIG_I2C_SIMTEC=m
+CONFIG_I2C_PARPORT=m
+CONFIG_I2C_TINY_USB=m
+CONFIG_I2C_PCA_ISA=m
+CONFIG_I2C_STUB=m
+CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_MPC8XXX=y
+CONFIG_W1=m
+CONFIG_W1_MASTER_DS2490=m
+CONFIG_W1_MASTER_DS2482=m
+CONFIG_W1_SLAVE_THERM=m
+CONFIG_W1_SLAVE_SMEM=m
+CONFIG_W1_SLAVE_DS2433=m
+CONFIG_W1_SLAVE_DS2433_CRC=y
+CONFIG_APM_POWER=m
+CONFIG_BATTERY_PMU=m
+CONFIG_HWMON=m
+CONFIG_SENSORS_AD7418=m
+CONFIG_SENSORS_ADM1021=m
+CONFIG_SENSORS_ADM1025=m
+CONFIG_SENSORS_ADM1026=m
+CONFIG_SENSORS_ADM1029=m
+CONFIG_SENSORS_ADM1031=m
+CONFIG_SENSORS_ADM9240=m
+CONFIG_SENSORS_ADT7470=m
+CONFIG_SENSORS_ATXP1=m
+CONFIG_SENSORS_DS1621=m
+CONFIG_SENSORS_F75375S=m
+CONFIG_SENSORS_GL518SM=m
+CONFIG_SENSORS_GL520SM=m
+CONFIG_SENSORS_MAX1619=m
+CONFIG_SENSORS_MAX6650=m
+CONFIG_SENSORS_LM63=m
+CONFIG_SENSORS_LM75=m
+CONFIG_SENSORS_LM77=m
+CONFIG_SENSORS_LM78=m
+CONFIG_SENSORS_LM80=m
+CONFIG_SENSORS_LM83=m
+CONFIG_SENSORS_LM85=m
+CONFIG_SENSORS_LM87=m
+CONFIG_SENSORS_LM90=m
+CONFIG_SENSORS_LM92=m
+CONFIG_SENSORS_LM93=m
+CONFIG_SENSORS_PCF8591=m
+CONFIG_SENSORS_SIS5595=m
+CONFIG_SENSORS_SMSC47M192=m
+CONFIG_SENSORS_ADS7828=m
+CONFIG_SENSORS_THMC50=m
+CONFIG_SENSORS_VIA686A=m
+CONFIG_SENSORS_VT8231=m
+CONFIG_SENSORS_W83781D=m
+CONFIG_SENSORS_W83791D=m
+CONFIG_SENSORS_W83792D=m
+CONFIG_SENSORS_W83793=m
+CONFIG_SENSORS_W83L785TS=m
+CONFIG_SENSORS_W83L786NG=m
+CONFIG_THERMAL=y
+CONFIG_WATCHDOG=y
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_WATCHDOG_RTAS=m
+CONFIG_USBPCWATCHDOG=m
+CONFIG_SSB_PCMCIAHOST=y
+CONFIG_MFD_SM501=m
+CONFIG_MFD_SM501_GPIO=y
+CONFIG_AGP=y
+CONFIG_AGP_UNINORTH=y
+CONFIG_DRM=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_LEGACY=y
+CONFIG_DRM_TDFX=m
+CONFIG_DRM_R128=m
+CONFIG_DRM_MGA=m
+CONFIG_DRM_SIS=m
+CONFIG_DRM_VIA=m
+CONFIG_DRM_SAVAGE=m
+CONFIG_FB=y
+CONFIG_FB_CIRRUS=m
+CONFIG_FB_OF=y
+CONFIG_FB_PLATINUM=y
+CONFIG_FB_VALKYRIE=y
+CONFIG_FB_CT65550=y
+CONFIG_FB_NVIDIA=y
+CONFIG_FB_NVIDIA_I2C=y
+CONFIG_FB_RIVA=m
+CONFIG_FB_MATROX=y
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MATROX_G=y
+CONFIG_FB_MATROX_I2C=m
+CONFIG_FB_MATROX_MAVEN=m
+CONFIG_FB_RADEON=y
+CONFIG_FB_ATY128=y
+CONFIG_FB_ATY=y
+CONFIG_FB_ATY_CT=y
+CONFIG_FB_ATY_GENERIC_LCD=y
+CONFIG_FB_ATY_GX=y
+CONFIG_FB_S3=m
+CONFIG_FB_SAVAGE=m
+CONFIG_FB_SAVAGE_I2C=y
+CONFIG_FB_SAVAGE_ACCEL=y
+CONFIG_FB_NEOMAGIC=m
+CONFIG_FB_KYRO=m
+CONFIG_FB_3DFX=m
+CONFIG_FB_3DFX_ACCEL=y
+CONFIG_FB_VOODOO1=m
+CONFIG_FB_TRIDENT=m
+CONFIG_FB_SM501=m
+CONFIG_FB_IBM_GXT4500=y
+CONFIG_LCD_PLATFORM=m
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_SOUND=m
+CONFIG_SND=m
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_DYNAMIC_MINORS=y
+# CONFIG_SND_SUPPORT_OLD_API is not set
+CONFIG_SND_VERBOSE_PRINTK=y
+CONFIG_SND_DEBUG=y
+CONFIG_SND_DEBUG_VERBOSE=y
+CONFIG_SND_PCM_XRUN_DEBUG=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
+CONFIG_SND_DUMMY=m
+CONFIG_SND_VIRMIDI=m
+CONFIG_SND_MTPAV=m
+CONFIG_SND_MTS64=m
+CONFIG_SND_SERIAL_U16550=m
+CONFIG_SND_MPU401=m
+CONFIG_SND_PORTMAN2X4=m
+CONFIG_SND_AC97_POWER_SAVE=y
+CONFIG_SND_AC97_POWER_SAVE_DEFAULT=5
+CONFIG_SND_AD1889=m
+CONFIG_SND_ALS300=m
+CONFIG_SND_ALS4000=m
+CONFIG_SND_ALI5451=m
+CONFIG_SND_ATIIXP=m
+CONFIG_SND_ATIIXP_MODEM=m
+CONFIG_SND_AU8810=m
+CONFIG_SND_AU8820=m
+CONFIG_SND_AU8830=m
+CONFIG_SND_AW2=m
+CONFIG_SND_AZT3328=m
+CONFIG_SND_BT87X=m
+CONFIG_SND_CA0106=m
+CONFIG_SND_CMIPCI=m
+CONFIG_SND_OXYGEN=m
+CONFIG_SND_CS4281=m
+CONFIG_SND_CS46XX=m
+CONFIG_SND_DARLA20=m
+CONFIG_SND_GINA20=m
+CONFIG_SND_LAYLA20=m
+CONFIG_SND_DARLA24=m
+CONFIG_SND_GINA24=m
+CONFIG_SND_LAYLA24=m
+CONFIG_SND_MONA=m
+CONFIG_SND_MIA=m
+CONFIG_SND_ECHO3G=m
+CONFIG_SND_INDIGO=m
+CONFIG_SND_INDIGOIO=m
+CONFIG_SND_INDIGODJ=m
+CONFIG_SND_EMU10K1=m
+CONFIG_SND_EMU10K1X=m
+CONFIG_SND_ENS1370=m
+CONFIG_SND_ENS1371=m
+CONFIG_SND_ES1938=m
+CONFIG_SND_ES1968=m
+CONFIG_SND_FM801=m
+CONFIG_SND_HDSP=m
+CONFIG_SND_HDSPM=m
+CONFIG_SND_ICE1712=m
+CONFIG_SND_ICE1724=m
+CONFIG_SND_KORG1212=m
+CONFIG_SND_MAESTRO3=m
+CONFIG_SND_MIXART=m
+CONFIG_SND_NM256=m
+CONFIG_SND_PCXHR=m
+CONFIG_SND_RIPTIDE=m
+CONFIG_SND_RME32=m
+CONFIG_SND_RME96=m
+CONFIG_SND_RME9652=m
+CONFIG_SND_SONICVIBES=m
+CONFIG_SND_TRIDENT=m
+CONFIG_SND_VIA82XX=m
+CONFIG_SND_VIA82XX_MODEM=m
+CONFIG_SND_VIRTUOSO=m
+CONFIG_SND_VX222=m
+CONFIG_SND_POWERMAC=m
+CONFIG_SND_AOA=m
+CONFIG_SND_AOA_FABRIC_LAYOUT=m
+CONFIG_SND_AOA_ONYX=m
+CONFIG_SND_AOA_TAS=m
+CONFIG_SND_AOA_TOONIE=m
+CONFIG_SND_USB_AUDIO=m
+CONFIG_SND_USB_USX2Y=m
+CONFIG_SND_USB_CAIAQ=m
+CONFIG_SND_USB_CAIAQ_INPUT=y
+# CONFIG_SND_PCMCIA is not set
+CONFIG_HIDRAW=y
+CONFIG_HID_GYRATION=y
+CONFIG_LOGITECH_FF=y
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_PANTHERLORD_FF=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_EHCI_FSL=m
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
+CONFIG_USB_UHCI_HCD=m
+CONFIG_USB_SL811_HCD=m
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
+CONFIG_USB_WDM=m
+CONFIG_USB_STORAGE=m
+CONFIG_USB_STORAGE_DATAFAB=m
+CONFIG_USB_STORAGE_FREECOM=m
+CONFIG_USB_STORAGE_USBAT=m
+CONFIG_USB_STORAGE_SDDR09=m
+CONFIG_USB_STORAGE_SDDR55=m
+CONFIG_USB_STORAGE_JUMPSHOT=m
+CONFIG_USB_STORAGE_ALAUDA=m
+CONFIG_USB_STORAGE_ONETOUCH=m
+CONFIG_USB_STORAGE_KARMA=m
+CONFIG_USB_STORAGE_CYPRESS_ATACB=m
+CONFIG_USB_MDC800=m
+CONFIG_USB_MICROTEK=m
+CONFIG_USB_USS720=m
+CONFIG_USB_SERIAL=m
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_AIRCABLE=m
+CONFIG_USB_SERIAL_ARK3116=m
+CONFIG_USB_SERIAL_BELKIN=m
+CONFIG_USB_SERIAL_CH341=m
+CONFIG_USB_SERIAL_WHITEHEAT=m
+CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
+CONFIG_USB_SERIAL_CYPRESS_M8=m
+CONFIG_USB_SERIAL_EMPEG=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_VISOR=m
+CONFIG_USB_SERIAL_IPAQ=m
+CONFIG_USB_SERIAL_IR=m
+CONFIG_USB_SERIAL_EDGEPORT=m
+CONFIG_USB_SERIAL_EDGEPORT_TI=m
+CONFIG_USB_SERIAL_IPW=m
+CONFIG_USB_SERIAL_IUU=m
+CONFIG_USB_SERIAL_KEYSPAN_PDA=m
+CONFIG_USB_SERIAL_KEYSPAN=m
+CONFIG_USB_SERIAL_KLSI=m
+CONFIG_USB_SERIAL_KOBIL_SCT=m
+CONFIG_USB_SERIAL_MCT_U232=m
+CONFIG_USB_SERIAL_MOS7720=m
+CONFIG_USB_SERIAL_MOS7840=m
+CONFIG_USB_SERIAL_NAVMAN=m
+CONFIG_USB_SERIAL_PL2303=m
+CONFIG_USB_SERIAL_OTI6858=m
+CONFIG_USB_SERIAL_SPCP8X5=m
+CONFIG_USB_SERIAL_SAFE=m
+CONFIG_USB_SERIAL_SAFE_PADDED=y
+CONFIG_USB_SERIAL_SIERRAWIRELESS=m
+CONFIG_USB_SERIAL_TI=m
+CONFIG_USB_SERIAL_CYBERJACK=m
+CONFIG_USB_SERIAL_OPTION=m
+CONFIG_USB_SERIAL_OMNINET=m
+CONFIG_USB_SERIAL_DEBUG=m
+CONFIG_USB_EMI62=m
+CONFIG_USB_EMI26=m
+CONFIG_USB_ADUTUX=m
+CONFIG_USB_SEVSEG=m
+CONFIG_USB_LEGOTOWER=m
+CONFIG_USB_LCD=m
+CONFIG_USB_IDMOUSE=m
+CONFIG_USB_APPLEDISPLAY=m
+CONFIG_USB_SISUSBVGA=m
+CONFIG_USB_LD=m
+CONFIG_USB_TRANCEVIBRATOR=m
+CONFIG_USB_IOWARRIOR=m
+CONFIG_USB_ISIGHTFW=m
+CONFIG_USB_ATM=m
+CONFIG_USB_SPEEDTOUCH=m
+CONFIG_USB_CXACRU=m
+CONFIG_USB_UEAGLEATM=m
+CONFIG_USB_XUSBATM=m
+CONFIG_LEDS_TRIGGER_TIMER=m
+CONFIG_LEDS_TRIGGER_HEARTBEAT=m
+CONFIG_LEDS_TRIGGER_BACKLIGHT=m
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
+CONFIG_ACCESSIBILITY=y
+CONFIG_A11Y_BRAILLE_CONSOLE=y
+CONFIG_EDAC=m
+CONFIG_RTC_CLASS=y
+# CONFIG_RTC_HCTOSYS is not set
+CONFIG_RTC_DRV_DS1307=m
+CONFIG_RTC_DRV_DS1374=m
+CONFIG_RTC_DRV_DS1672=m
+CONFIG_RTC_DRV_MAX6900=m
+CONFIG_RTC_DRV_RS5C372=m
+CONFIG_RTC_DRV_ISL1208=m
+CONFIG_RTC_DRV_X1205=m
+CONFIG_RTC_DRV_PCF8563=m
+CONFIG_RTC_DRV_PCF8583=m
+CONFIG_RTC_DRV_M41T80=m
+CONFIG_RTC_DRV_M41T80_WDT=y
+CONFIG_RTC_DRV_FM3130=m
+CONFIG_RTC_DRV_CMOS=y
+CONFIG_RTC_DRV_DS1511=m
+CONFIG_RTC_DRV_DS1553=m
+CONFIG_RTC_DRV_DS1742=m
+CONFIG_RTC_DRV_STK17TA8=m
+CONFIG_RTC_DRV_M48T35=m
+CONFIG_RTC_DRV_M48T59=m
+CONFIG_RTC_DRV_V3020=m
+CONFIG_DMADEVICES=y
+CONFIG_PPC_BESTCOMM=y
+CONFIG_AUXDISPLAY=y
+CONFIG_KS0108=m
+CONFIG_UIO=m
+CONFIG_UIO_CIF=m
+CONFIG_UIO_PDRV_GENIRQ=m
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_QUICC_ENGINE=y
+CONFIG_EXT2_FS=m
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_JBD2_DEBUG=y
+CONFIG_REISERFS_FS=m
+CONFIG_REISERFS_PROC_INFO=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_GFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_AUTOFS_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_AFFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_BEFS_FS=m
+CONFIG_BFS_FS=m
+CONFIG_EFS_FS=m
+CONFIG_CRAMFS=m
+CONFIG_VXFS_FS=m
+CONFIG_MINIX_FS=m
+CONFIG_OMFS_FS=m
+CONFIG_QNX4FS_FS=m
+CONFIG_ROMFS_FS=m
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_CIFS=m
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_CODA_FS=m
+CONFIG_9P_FS=m
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_HEADERS_INSTALL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_FREE=y
+CONFIG_DEBUG_OBJECTS_TIMERS=y
+CONFIG_SLUB_DEBUG_ON=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_HIGHMEM=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_SG=y
+CONFIG_FAULT_INJECTION=y
+CONFIG_FAILSLAB=y
+CONFIG_FAIL_PAGE_ALLOC=y
+CONFIG_FAIL_MAKE_REQUEST=y
+CONFIG_FAIL_IO_TIMEOUT=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
+CONFIG_SCHED_TRACER=y
+CONFIG_STACK_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_XMON=y
+CONFIG_BOOTX_TEXT=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_NETWORK_XFRM=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_DEV_HIFN_795X=m
+CONFIG_CRYPTO_DEV_HIFN_795X_RNG=y
+CONFIG_CRYPTO_DEV_TALITOS=m
+CONFIG_VIRTUALIZATION=y
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
new file mode 100644
index 0000000000..aa8bb0208b
--- /dev/null
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -0,0 +1,164 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EXPERT=y
+# CONFIG_PERF_EVENTS is not set
+CONFIG_PROFILING=y
+CONFIG_PPC64=y
+CONFIG_CELL_CPU=y
+CONFIG_ALTIVEC=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+# CONFIG_PPC_POWERNV is not set
+# CONFIG_PPC_PSERIES is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_PPC_PS3=y
+CONFIG_PS3_ADVANCED=y
+CONFIG_PS3_REPOSITORY_WRITE=y
+CONFIG_PS3_DISK=y
+CONFIG_PS3_ROM=y
+CONFIG_PS3_FLASH=y
+CONFIG_PS3_VRAM=m
+CONFIG_PS3_LPM=m
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
+CONFIG_KEXEC=y
+# CONFIG_PPC64_BIG_ENDIAN_ELF_ABI_V2 is not set
+CONFIG_PPC_4K_PAGES=y
+CONFIG_SCHED_SMT=y
+CONFIG_PM=y
+CONFIG_PM_DEBUG=y
+# CONFIG_SECCOMP is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=y
+# CONFIG_COMPAT_BRK is not set
+# CONFIG_SPARSEMEM_VMEMMAP is not set
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_INET_DIAG is not set
+CONFIG_BT=m
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_RFCOMM_TTY=y
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_HIDP=m
+CONFIG_BT_HCIBTUSB=m
+CONFIG_CFG80211=m
+CONFIG_CFG80211_WEXT=y
+CONFIG_MAC80211=m
+# CONFIG_MAC80211_RC_MINSTREL is not set
+CONFIG_UEVENT_HELPER=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=65535
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_CHR_DEV_SG=m
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_DM=m
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+CONFIG_GELIC_NET=y
+CONFIG_GELIC_WIRELESS=y
+# CONFIG_NET_VENDOR_XILINX is not set
+CONFIG_USB_USBNET=m
+# CONFIG_USB_NET_CDCETHER is not set
+# CONFIG_USB_NET_CDC_NCM is not set
+# CONFIG_USB_NET_NET1080 is not set
+# CONFIG_USB_NET_CDC_SUBSET is not set
+# CONFIG_USB_NET_ZAURUS is not set
+CONFIG_INPUT_JOYDEV=m
+CONFIG_INPUT_EVDEV=m
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_FB_PS3=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_SOUND=m
+CONFIG_SND=m
+# CONFIG_SND_DRIVERS is not set
+CONFIG_SND_USB_AUDIO=m
+CONFIG_HIDRAW=y
+CONFIG_HID_BELKIN=m
+CONFIG_HID_CHERRY=m
+CONFIG_HID_EZKEY=m
+CONFIG_HID_TWINHAN=m
+CONFIG_HID_MICROSOFT=m
+CONFIG_HID_SUNPLUS=m
+CONFIG_HID_SMARTJOYPLUS=m
+CONFIG_USB_HIDDEV=y
+CONFIG_USB=m
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_MON=m
+CONFIG_USB_EHCI_HCD=m
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_STORAGE=m
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_PS3=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=m
+CONFIG_EXT4_FS=y
+CONFIG_QUOTA=y
+CONFIG_QFMT_V2=y
+CONFIG_AUTOFS_FS=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_UDF_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_SWAP=y
+CONFIG_ROOT_NFS=y
+CONFIG_CIFS=m
+CONFIG_NLS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRC_CCITT=m
+CONFIG_CRC_T10DIF=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_LIST=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+# CONFIG_FTRACE is not set
diff --git a/arch/powerpc/configs/security.config b/arch/powerpc/configs/security.config
new file mode 100644
index 0000000000..0d54e29e2c
--- /dev/null
+++ b/arch/powerpc/configs/security.config
@@ -0,0 +1,17 @@
+# Help: Common security options for PowerPC builds
+
+# This is the equivalent of booting with lockdown=integrity
+CONFIG_SECURITY=y
+CONFIG_SECURITYFS=y
+CONFIG_SECURITY_LOCKDOWN_LSM=y
+CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
+CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY=y
+
+# These are some general, reasonably inexpensive hardening options
+CONFIG_HARDENED_USERCOPY=y
+CONFIG_FORTIFY_SOURCE=y
+CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
+
+# UBSAN bounds checking is very cheap and good for hardening
+CONFIG_UBSAN=y
+# CONFIG_UBSAN_MISC is not set
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
new file mode 100644
index 0000000000..9d44e66309
--- /dev/null
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -0,0 +1,306 @@
+CONFIG_KERNEL_XZ=y
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+# CONFIG_CPU_ISOLATION is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=20
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_GZIP is not set
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EXPERT=y
+# CONFIG_SGETMASK_SYSCALL is not set
+# CONFIG_SYSFS_SYSCALL is not set
+# CONFIG_SHMEM is not set
+# CONFIG_AIO is not set
+CONFIG_PERF_EVENTS=y
+# CONFIG_COMPAT_BRK is not set
+# CONFIG_SLAB_MERGE_DEFAULT is not set
+CONFIG_SLAB_FREELIST_RANDOM=y
+CONFIG_SLAB_FREELIST_HARDENED=y
+CONFIG_PPC64=y
+CONFIG_ALTIVEC=y
+CONFIG_VSX=y
+CONFIG_NR_CPUS=2048
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_PANIC_TIMEOUT=30
+# CONFIG_PPC_VAS is not set
+# CONFIG_PPC_PSERIES is not set
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_IDLE=y
+CONFIG_HZ_100=y
+CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
+CONFIG_PRESERVE_FA_DUMP=y
+CONFIG_IRQ_ALL_CPUS=y
+CONFIG_NUMA=y
+CONFIG_SCHED_SMT=y
+CONFIG_CMDLINE="console=tty0 console=hvc0 ipr.fast_reboot=1 quiet"
+# CONFIG_SECCOMP is not set
+# CONFIG_PPC_MEM_KEYS is not set
+CONFIG_JUMP_LABEL=y
+CONFIG_STRICT_KERNEL_RWX=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_SIG_FORCE=y
+CONFIG_MODULE_SIG_SHA512=y
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MQ_IOSCHED_DEADLINE is not set
+# CONFIG_MQ_IOSCHED_KYBER is not set
+# CONFIG_COMPACTION is not set
+# CONFIG_MIGRATION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_NET_IPIP=y
+CONFIG_SYN_COOKIES=y
+CONFIG_DNS_RESOLVER=y
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_MTD=m
+CONFIG_MTD_POWERNV_FLASH=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_VIRTIO_BLK=m
+CONFIG_BLK_DEV_NVME=m
+CONFIG_NVME_MULTIPATH=y
+CONFIG_EEPROM_AT24=m
+# CONFIG_CXL is not set
+# CONFIG_OCXL is not set
+CONFIG_BLK_DEV_SD=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_CXGB4_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_SCSI_AACRAID=m
+CONFIG_MEGARAID_NEWGEN=y
+CONFIG_MEGARAID_MM=m
+CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_SAS=m
+CONFIG_SCSI_MPT2SAS=m
+CONFIG_SCSI_IPR=m
+# CONFIG_SCSI_IPR_TRACE is not set
+# CONFIG_SCSI_IPR_DUMP is not set
+CONFIG_SCSI_QLA_FC=m
+CONFIG_SCSI_QLA_ISCSI=m
+CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=m
+# CONFIG_ATA_SFF is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_NET_VENDOR_ADAPTEC is not set
+# CONFIG_NET_VENDOR_AGERE is not set
+# CONFIG_NET_VENDOR_ALACRITECH is not set
+CONFIG_ACENIC=m
+CONFIG_ACENIC_OMIT_TIGON_I=y
+# CONFIG_NET_VENDOR_AMAZON is not set
+# CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_AQUANTIA is not set
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_ATHEROS is not set
+CONFIG_TIGON3=m
+CONFIG_BNX2X=m
+# CONFIG_NET_VENDOR_BROCADE is not set
+# CONFIG_NET_VENDOR_CADENCE is not set
+# CONFIG_NET_VENDOR_CAVIUM is not set
+CONFIG_CHELSIO_T1=m
+# CONFIG_NET_VENDOR_CISCO is not set
+# CONFIG_NET_VENDOR_CORTINA is not set
+# CONFIG_NET_VENDOR_DEC is not set
+# CONFIG_NET_VENDOR_DLINK is not set
+CONFIG_BE2NET=m
+# CONFIG_NET_VENDOR_EZCHIP is not set
+# CONFIG_NET_VENDOR_HUAWEI is not set
+CONFIG_E1000=m
+CONFIG_E1000E=m
+CONFIG_IGB=m
+CONFIG_IXGBE=m
+CONFIG_I40E=m
+# CONFIG_NET_VENDOR_MARVELL is not set
+CONFIG_MLX4_EN=m
+# CONFIG_MLX4_CORE_GEN2 is not set
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
+# CONFIG_MLX5_EN_RXNFC is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_MICROSEMI is not set
+CONFIG_MYRI10GE=m
+# CONFIG_NET_VENDOR_NATSEMI is not set
+CONFIG_S2IO=m
+# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
+# CONFIG_NET_VENDOR_NVIDIA is not set
+# CONFIG_NET_VENDOR_OKI is not set
+# CONFIG_NET_VENDOR_PACKET_ENGINES is not set
+CONFIG_NETXEN_NIC=m
+CONFIG_QED=m
+CONFIG_QEDE=m
+# CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RDC is not set
+# CONFIG_NET_VENDOR_REALTEK is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
+# CONFIG_NET_VENDOR_ROCKER is not set
+# CONFIG_NET_VENDOR_SAMSUNG is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_SFC=m
+# CONFIG_NET_VENDOR_SILAN is not set
+# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_SOCIONEXT is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SUN is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
+# CONFIG_NET_VENDOR_TEHUTI is not set
+# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_XILINX is not set
+CONFIG_PHYLIB=y
+# CONFIG_USB_NET_DRIVERS is not set
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_MISC=y
+# CONFIG_SERIO_SERPORT is not set
+# CONFIG_DEVMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_IPMI_HANDLER=y
+CONFIG_IPMI_DEVICE_INTERFACE=y
+CONFIG_IPMI_POWERNV=y
+CONFIG_IPMI_WATCHDOG=y
+CONFIG_HW_RANDOM=y
+CONFIG_TCG_TPM=y
+CONFIG_TCG_TIS_I2C_NUVOTON=y
+# CONFIG_DEVPORT is not set
+CONFIG_I2C=y
+# CONFIG_I2C_COMPAT is not set
+CONFIG_I2C_CHARDEV=y
+# CONFIG_I2C_HELPER_AUTO is not set
+CONFIG_I2C_ALGOBIT=y
+CONFIG_I2C_OPAL=m
+CONFIG_PPS=y
+CONFIG_SENSORS_IBMPOWERNV=m
+CONFIG_DRM=m
+CONFIG_DRM_AST=m
+CONFIG_FB=y
+CONFIG_FIRMWARE_EDID=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_HID_GENERIC=m
+CONFIG_HID_A4TECH=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_ITE=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB=m
+CONFIG_USB_XHCI_HCD=m
+CONFIG_USB_EHCI_HCD=m
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_STORAGE=m
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_OPAL=m
+CONFIG_RTC_DRV_GENERIC=m
+CONFIG_VIRT_DRIVERS=y
+CONFIG_VIRTIO_PCI=m
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT4_FS=m
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_ISO9660_FS=m
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_HUGETLBFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_ENCRYPTED_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_HARDENED_USERCOPY=y
+CONFIG_FORTIFY_SOURCE=y
+CONFIG_SECURITY_LOCKDOWN_LSM=y
+CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
+CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY=y
+# CONFIG_INTEGRITY is not set
+CONFIG_LSM="yama,loadpin,safesetid,integrity"
+# CONFIG_CRYPTO_HW is not set
+CONFIG_CRC16=y
+CONFIG_CRC_ITU_T=y
+CONFIG_LIBCRC32C=y
+# CONFIG_XZ_DEC_X86 is not set
+# CONFIG_XZ_DEC_IA64 is not set
+# CONFIG_XZ_DEC_ARM is not set
+# CONFIG_XZ_DEC_ARMTHUMB is not set
+# CONFIG_XZ_DEC_SPARC is not set
+CONFIG_PRINTK_TIME=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_SCHED_STACK_END_CHECK=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
+CONFIG_WQ_WATCHDOG=y
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_DEBUG_SG=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_BUG_ON_DATA_CORRUPTION=y
+# CONFIG_FTRACE is not set
+CONFIG_XMON=y
+# CONFIG_RUNTIME_TESTING_MENU is not set
diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig
new file mode 100644
index 0000000000..7a978d3969
--- /dev/null
+++ b/arch/powerpc/configs/storcenter_defconfig
@@ -0,0 +1,78 @@
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_EXPERT=y
+# CONFIG_KALLSYMS is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_EMBEDDED6xx=y
+CONFIG_STORCENTER=y
+CONFIG_HZ_100=y
+CONFIG_BINFMT_MISC=y
+CONFIG_CMDLINE="console=ttyS0,115200"
+# CONFIG_SECCOMP is not set
+CONFIG_NET=y
+CONFIG_PACKET=m
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IPV6 is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_FTL=y
+CONFIG_NFTL=y
+CONFIG_NFTL_RW=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_ATA=y
+CONFIG_PATA_VIA=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+CONFIG_MD_RAID456=y
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+CONFIG_R8169=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_SERIAL_8250_NR_UARTS=2
+CONFIG_SERIAL_8250_RUNTIME_UARTS=2
+CONFIG_NVRAM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
+# CONFIG_HWMON is not set
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_XFS_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_CRC_T10DIF=y
diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig
new file mode 100644
index 0000000000..083c2e5752
--- /dev/null
+++ b/arch/powerpc/configs/tqm8xx_defconfig
@@ -0,0 +1,60 @@
+CONFIG_PPC_8xx=y
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_EXPERT=y
+# CONFIG_ELF_CORE is not set
+# CONFIG_BASE_FULL is not set
+# CONFIG_FUTEX is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_TQM8XX=y
+# CONFIG_8xx_CPU15 is not set
+CONFIG_GEN_RTC=y
+CONFIG_HZ_100=y
+# CONFIG_SECCOMP is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_IPV6 is not set
+# CONFIG_WIRELESS is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+CONFIG_FS_ENET=y
+CONFIG_DAVICOM_PHY=y
+# CONFIG_WLAN is not set
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_CPM=y
+CONFIG_SERIAL_CPM_CONSOLE=y
+CONFIG_HW_RANDOM=y
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_CRC32_SLICEBY4=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
new file mode 100644
index 0000000000..5017a697b6
--- /dev/null
+++ b/arch/powerpc/configs/wii_defconfig
@@ -0,0 +1,126 @@
+CONFIG_LOCALVERSION="-wii"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_ELF_CORE is not set
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_EMBEDDED6xx=y
+CONFIG_WII=y
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
+CONFIG_PREEMPT=y
+CONFIG_BINFMT_MISC=m
+CONFIG_KEXEC=y
+# CONFIG_SECCOMP is not set
+CONFIG_ADVANCED_OPTIONS=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_RARP=y
+# CONFIG_INET_DIAG is not set
+# CONFIG_IPV6 is not set
+CONFIG_BT=y
+CONFIG_BT_RFCOMM=y
+CONFIG_BT_BNEP=y
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_HIDP=y
+CONFIG_CFG80211=y
+CONFIG_MAC80211=y
+# CONFIG_STANDALONE is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=2
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+# CONFIG_ETHERNET is not set
+CONFIG_B43=y
+CONFIG_B43_BUSES_SSB=y
+CONFIG_B43_SDIO=y
+# CONFIG_B43_PHY_LP is not set
+CONFIG_B43_DEBUG=y
+CONFIG_INPUT_FF_MEMLESS=m
+CONFIG_INPUT_JOYDEV=y
+CONFIG_INPUT_EVDEV=y
+# CONFIG_KEYBOARD_ATKBD is not set
+CONFIG_KEYBOARD_GPIO=y
+# CONFIG_MOUSE_PS2 is not set
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_UINPUT=y
+# CONFIG_SERIO_I8042 is not set
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_LEGACY_PTY_COUNT=64
+# CONFIG_HW_RANDOM is not set
+CONFIG_NVRAM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_GPIO=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_HLWD=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_GPIO=y
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=y
+CONFIG_SND_PCM_OSS=y
+# CONFIG_SND_VERBOSE_PROCFS is not set
+CONFIG_SND_SEQUENCER=y
+CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_HID_APPLE=m
+CONFIG_HID_WACOM=m
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_OF_HLWD=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_LEDS_TRIGGER_PANIC=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_GAMECUBE=y
+CONFIG_NVMEM_NINTENDO_OTP=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_FUSE_FS=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+# CONFIG_PROC_PAGE_MONITOR is not set
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_CIFS=m
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_CRC_CCITT=y
+CONFIG_PRINTK_TIME=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_SCHED_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_DMA_API_DEBUG=y
+CONFIG_PPC_EARLY_DEBUG=y
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/crypto/.gitignore b/arch/powerpc/crypto/.gitignore
new file mode 100644
index 0000000000..e1094f08f7
--- /dev/null
+++ b/arch/powerpc/crypto/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+aesp10-ppc.S
+ghashp10-ppc.S
diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig
new file mode 100644
index 0000000000..6fc2248ca5
--- /dev/null
+++ b/arch/powerpc/crypto/Kconfig
@@ -0,0 +1,140 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "Accelerated Cryptographic Algorithms for CPU (powerpc)"
+
+config CRYPTO_CRC32C_VPMSUM
+	tristate "CRC32c"
+	depends on PPC64 && ALTIVEC
+	select CRYPTO_HASH
+	select CRC32
+	help
+	  CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720)
+
+	  Architecture: powerpc64 using
+	  - AltiVec extensions
+
+	  Enable on POWER8 and newer processors for improved performance.
+
+config CRYPTO_CRCT10DIF_VPMSUM
+	tristate "CRC32T10DIF"
+	depends on PPC64 && ALTIVEC && CRC_T10DIF
+	select CRYPTO_HASH
+	help
+	  CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF)
+
+	  Architecture: powerpc64 using
+	  - AltiVec extensions
+
+	  Enable on POWER8 and newer processors for improved performance.
+
+config CRYPTO_VPMSUM_TESTER
+	tristate "CRC32c and CRC32T10DIF hardware acceleration tester"
+	depends on CRYPTO_CRCT10DIF_VPMSUM && CRYPTO_CRC32C_VPMSUM
+	help
+	  Stress test for CRC32c and CRCT10DIF algorithms implemented with
+	  powerpc64 AltiVec extensions (POWER8 vpmsum instructions).
+	  Unless you are testing these algorithms, you don't need this.
+
+config CRYPTO_MD5_PPC
+	tristate "Digests: MD5"
+	depends on PPC
+	select CRYPTO_HASH
+	help
+	  MD5 message digest algorithm (RFC1321)
+
+	  Architecture: powerpc
+
+config CRYPTO_SHA1_PPC
+	tristate "Hash functions: SHA-1"
+	depends on PPC
+	help
+	  SHA-1 secure hash algorithm (FIPS 180)
+
+	  Architecture: powerpc
+
+config CRYPTO_SHA1_PPC_SPE
+	tristate "Hash functions: SHA-1 (SPE)"
+	depends on PPC && SPE
+	help
+	  SHA-1 secure hash algorithm (FIPS 180)
+
+	  Architecture: powerpc using
+	  - SPE (Signal Processing Engine) extensions
+
+config CRYPTO_SHA256_PPC_SPE
+	tristate "Hash functions: SHA-224 and SHA-256 (SPE)"
+	depends on PPC && SPE
+	select CRYPTO_SHA256
+	select CRYPTO_HASH
+	help
+	  SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
+
+	  Architecture: powerpc using
+	  - SPE (Signal Processing Engine) extensions
+
+config CRYPTO_AES_PPC_SPE
+	tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (SPE)"
+	depends on PPC && SPE
+	select CRYPTO_SKCIPHER
+	help
+	  Block ciphers: AES cipher algorithms (FIPS-197)
+	  Length-preserving ciphers: AES with ECB, CBC, CTR, and XTS modes
+
+	  Architecture: powerpc using:
+	  - SPE (Signal Processing Engine) extensions
+
+	  SPE is available for:
+	  - Processor Type: Freescale 8500
+	  - CPU selection: e500 (8540)
+
+	  This module should only be used for low power (router) devices
+	  without hardware AES acceleration (e.g. caam crypto). It reduces the
+	  size of the AES tables from 16KB to 8KB + 256 bytes and mitigates
+	  timining attacks. Nevertheless it might be not as secure as other
+	  architecture specific assembler implementations that work on 1KB
+	  tables or 256 bytes S-boxes.
+
+config CRYPTO_AES_GCM_P10
+	tristate "Stitched AES/GCM acceleration support on P10 or later CPU (PPC)"
+	depends on PPC64 && CPU_LITTLE_ENDIAN && VSX
+	select CRYPTO_LIB_AES
+	select CRYPTO_ALGAPI
+	select CRYPTO_AEAD
+	select CRYPTO_SKCIPHER
+	help
+	  AEAD cipher: AES cipher algorithms (FIPS-197)
+	  GCM (Galois/Counter Mode) authenticated encryption mode (NIST SP800-38D)
+	  Architecture: powerpc64 using:
+	    - little-endian
+	    - Power10 or later features
+
+	  Support for cryptographic acceleration instructions on Power10 or
+	  later CPU. This module supports stitched acceleration for AES/GCM.
+
+config CRYPTO_CHACHA20_P10
+	tristate "Ciphers: ChaCha20, XChacha20, XChacha12 (P10 or later)"
+	depends on PPC64 && CPU_LITTLE_ENDIAN && VSX
+	select CRYPTO_SKCIPHER
+	select CRYPTO_LIB_CHACHA_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_CHACHA
+	help
+	  Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12
+	  stream cipher algorithms
+
+	  Architecture: PowerPC64
+	  - Power10 or later
+	  - Little-endian
+
+config CRYPTO_POLY1305_P10
+	tristate "Hash functions: Poly1305 (P10 or later)"
+	depends on PPC64 && CPU_LITTLE_ENDIAN && VSX
+	select CRYPTO_HASH
+	select CRYPTO_LIB_POLY1305_GENERIC
+	help
+	  Poly1305 authenticator algorithm (RFC7539)
+
+	  Architecture: PowerPC64
+	  - Power10 or later
+	  - Little-endian
+
+endmenu
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile
new file mode 100644
index 0000000000..ebdac1b9eb
--- /dev/null
+++ b/arch/powerpc/crypto/Makefile
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# powerpc/crypto/Makefile
+#
+# Arch-specific CryptoAPI modules.
+#
+
+obj-$(CONFIG_CRYPTO_AES_PPC_SPE) += aes-ppc-spe.o
+obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o
+obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o
+obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o
+obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
+obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o
+obj-$(CONFIG_CRYPTO_CRCT10DIF_VPMSUM) += crct10dif-vpmsum.o
+obj-$(CONFIG_CRYPTO_VPMSUM_TESTER) += crc-vpmsum_test.o
+obj-$(CONFIG_CRYPTO_AES_GCM_P10) += aes-gcm-p10-crypto.o
+obj-$(CONFIG_CRYPTO_CHACHA20_P10) += chacha-p10-crypto.o
+obj-$(CONFIG_CRYPTO_POLY1305_P10) += poly1305-p10-crypto.o
+
+aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
+md5-ppc-y := md5-asm.o md5-glue.o
+sha1-powerpc-y := sha1-powerpc-asm.o sha1.o
+sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o
+sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
+crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o
+crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o
+aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o aesp10-ppc.o
+chacha-p10-crypto-y := chacha-p10-glue.o chacha-p10le-8x.o
+poly1305-p10-crypto-y := poly1305-p10-glue.o poly1305-p10le_64.o
+
+quiet_cmd_perl = PERL    $@
+      cmd_perl = $(PERL) $< $(if $(CONFIG_CPU_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@
+
+targets += aesp10-ppc.S ghashp10-ppc.S
+
+$(obj)/aesp10-ppc.S $(obj)/ghashp10-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE
+	$(call if_changed,perl)
+
+OBJECT_FILES_NON_STANDARD_aesp10-ppc.o := y
+OBJECT_FILES_NON_STANDARD_ghashp10-ppc.o := y
diff --git a/arch/powerpc/crypto/aes-gcm-p10-glue.c b/arch/powerpc/crypto/aes-gcm-p10-glue.c
new file mode 100644
index 0000000000..4b6e899895
--- /dev/null
+++ b/arch/powerpc/crypto/aes-gcm-p10-glue.c
@@ -0,0 +1,343 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Glue code for accelerated AES-GCM stitched implementation for ppc64le.
+ *
+ * Copyright 2022- IBM Inc. All rights reserved
+ */
+
+#include <asm/unaligned.h>
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/b128ops.h>
+#include <crypto/gf128mul.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#define	PPC_ALIGN		16
+#define GCM_IV_SIZE		12
+
+MODULE_DESCRIPTION("PPC64le AES-GCM with Stitched implementation");
+MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("aes");
+
+asmlinkage int aes_p10_set_encrypt_key(const u8 *userKey, const int bits,
+				      void *key);
+asmlinkage void aes_p10_encrypt(const u8 *in, u8 *out, const void *key);
+asmlinkage void aes_p10_gcm_encrypt(u8 *in, u8 *out, size_t len,
+				    void *rkey, u8 *iv, void *Xi);
+asmlinkage void aes_p10_gcm_decrypt(u8 *in, u8 *out, size_t len,
+				    void *rkey, u8 *iv, void *Xi);
+asmlinkage void gcm_init_htable(unsigned char htable[256], unsigned char Xi[16]);
+asmlinkage void gcm_ghash_p10(unsigned char *Xi, unsigned char *Htable,
+		unsigned char *aad, unsigned int alen);
+
+struct aes_key {
+	u8 key[AES_MAX_KEYLENGTH];
+	u64 rounds;
+};
+
+struct gcm_ctx {
+	u8 iv[16];
+	u8 ivtag[16];
+	u8 aad_hash[16];
+	u64 aadLen;
+	u64 Plen;	/* offset 56 - used in aes_p10_gcm_{en/de}crypt */
+};
+struct Hash_ctx {
+	u8 H[16];	/* subkey */
+	u8 Htable[256];	/* Xi, Hash table(offset 32) */
+};
+
+struct p10_aes_gcm_ctx {
+	struct aes_key enc_key;
+};
+
+static void vsx_begin(void)
+{
+	preempt_disable();
+	enable_kernel_vsx();
+}
+
+static void vsx_end(void)
+{
+	disable_kernel_vsx();
+	preempt_enable();
+}
+
+static void set_subkey(unsigned char *hash)
+{
+	*(u64 *)&hash[0] = be64_to_cpup((__be64 *)&hash[0]);
+	*(u64 *)&hash[8] = be64_to_cpup((__be64 *)&hash[8]);
+}
+
+/*
+ * Compute aad if any.
+ *   - Hash aad and copy to Xi.
+ */
+static void set_aad(struct gcm_ctx *gctx, struct Hash_ctx *hash,
+		    unsigned char *aad, int alen)
+{
+	int i;
+	u8 nXi[16] = {0, };
+
+	gctx->aadLen = alen;
+	i = alen & ~0xf;
+	if (i) {
+		gcm_ghash_p10(nXi, hash->Htable+32, aad, i);
+		aad += i;
+		alen -= i;
+	}
+	if (alen) {
+		for (i = 0; i < alen; i++)
+			nXi[i] ^= aad[i];
+
+		memset(gctx->aad_hash, 0, 16);
+		gcm_ghash_p10(gctx->aad_hash, hash->Htable+32, nXi, 16);
+	} else {
+		memcpy(gctx->aad_hash, nXi, 16);
+	}
+
+	memcpy(hash->Htable, gctx->aad_hash, 16);
+}
+
+static void gcmp10_init(struct gcm_ctx *gctx, u8 *iv, unsigned char *rdkey,
+			struct Hash_ctx *hash, u8 *assoc, unsigned int assoclen)
+{
+	__be32 counter = cpu_to_be32(1);
+
+	aes_p10_encrypt(hash->H, hash->H, rdkey);
+	set_subkey(hash->H);
+	gcm_init_htable(hash->Htable+32, hash->H);
+
+	*((__be32 *)(iv+12)) = counter;
+
+	gctx->Plen = 0;
+
+	/*
+	 * Encrypt counter vector as iv tag and increment counter.
+	 */
+	aes_p10_encrypt(iv, gctx->ivtag, rdkey);
+
+	counter = cpu_to_be32(2);
+	*((__be32 *)(iv+12)) = counter;
+	memcpy(gctx->iv, iv, 16);
+
+	gctx->aadLen = assoclen;
+	memset(gctx->aad_hash, 0, 16);
+	if (assoclen)
+		set_aad(gctx, hash, assoc, assoclen);
+}
+
+static void finish_tag(struct gcm_ctx *gctx, struct Hash_ctx *hash, int len)
+{
+	int i;
+	unsigned char len_ac[16 + PPC_ALIGN];
+	unsigned char *aclen = PTR_ALIGN((void *)len_ac, PPC_ALIGN);
+	__be64 clen = cpu_to_be64(len << 3);
+	__be64 alen = cpu_to_be64(gctx->aadLen << 3);
+
+	if (len == 0 && gctx->aadLen == 0) {
+		memcpy(hash->Htable, gctx->ivtag, 16);
+		return;
+	}
+
+	/*
+	 * Len is in bits.
+	 */
+	*((__be64 *)(aclen)) = alen;
+	*((__be64 *)(aclen+8)) = clen;
+
+	/*
+	 * hash (AAD len and len)
+	 */
+	gcm_ghash_p10(hash->Htable, hash->Htable+32, aclen, 16);
+
+	for (i = 0; i < 16; i++)
+		hash->Htable[i] ^= gctx->ivtag[i];
+}
+
+static int set_authsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+	switch (authsize) {
+	case 4:
+	case 8:
+	case 12:
+	case 13:
+	case 14:
+	case 15:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int p10_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key,
+			     unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+	struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	vsx_begin();
+	ret = aes_p10_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
+	vsx_end();
+
+	return ret ? -EINVAL : 0;
+}
+
+static int p10_aes_gcm_crypt(struct aead_request *req, int enc)
+{
+	struct crypto_tfm *tfm = req->base.tfm;
+	struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm);
+	u8 databuf[sizeof(struct gcm_ctx) + PPC_ALIGN];
+	struct gcm_ctx *gctx = PTR_ALIGN((void *)databuf, PPC_ALIGN);
+	u8 hashbuf[sizeof(struct Hash_ctx) + PPC_ALIGN];
+	struct Hash_ctx *hash = PTR_ALIGN((void *)hashbuf, PPC_ALIGN);
+	struct scatter_walk assoc_sg_walk;
+	struct skcipher_walk walk;
+	u8 *assocmem = NULL;
+	u8 *assoc;
+	unsigned int assoclen = req->assoclen;
+	unsigned int cryptlen = req->cryptlen;
+	unsigned char ivbuf[AES_BLOCK_SIZE+PPC_ALIGN];
+	unsigned char *iv = PTR_ALIGN((void *)ivbuf, PPC_ALIGN);
+	int ret;
+	unsigned long auth_tag_len = crypto_aead_authsize(__crypto_aead_cast(tfm));
+	u8 otag[16];
+	int total_processed = 0;
+
+	memset(databuf, 0, sizeof(databuf));
+	memset(hashbuf, 0, sizeof(hashbuf));
+	memset(ivbuf, 0, sizeof(ivbuf));
+	memcpy(iv, req->iv, GCM_IV_SIZE);
+
+	/* Linearize assoc, if not already linear */
+	if (req->src->length >= assoclen && req->src->length) {
+		scatterwalk_start(&assoc_sg_walk, req->src);
+		assoc = scatterwalk_map(&assoc_sg_walk);
+	} else {
+		gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+			      GFP_KERNEL : GFP_ATOMIC;
+
+		/* assoc can be any length, so must be on heap */
+		assocmem = kmalloc(assoclen, flags);
+		if (unlikely(!assocmem))
+			return -ENOMEM;
+		assoc = assocmem;
+
+		scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
+	}
+
+	vsx_begin();
+	gcmp10_init(gctx, iv, (unsigned char *) &ctx->enc_key, hash, assoc, assoclen);
+	vsx_end();
+
+	if (!assocmem)
+		scatterwalk_unmap(assoc);
+	else
+		kfree(assocmem);
+
+	if (enc)
+		ret = skcipher_walk_aead_encrypt(&walk, req, false);
+	else
+		ret = skcipher_walk_aead_decrypt(&walk, req, false);
+	if (ret)
+		return ret;
+
+	while (walk.nbytes > 0 && ret == 0) {
+
+		vsx_begin();
+		if (enc)
+			aes_p10_gcm_encrypt(walk.src.virt.addr,
+					    walk.dst.virt.addr,
+					    walk.nbytes,
+					    &ctx->enc_key, gctx->iv, hash->Htable);
+		else
+			aes_p10_gcm_decrypt(walk.src.virt.addr,
+					    walk.dst.virt.addr,
+					    walk.nbytes,
+					    &ctx->enc_key, gctx->iv, hash->Htable);
+		vsx_end();
+
+		total_processed += walk.nbytes;
+		ret = skcipher_walk_done(&walk, 0);
+	}
+
+	if (ret)
+		return ret;
+
+	/* Finalize hash */
+	vsx_begin();
+	finish_tag(gctx, hash, total_processed);
+	vsx_end();
+
+	/* copy Xi to end of dst */
+	if (enc)
+		scatterwalk_map_and_copy(hash->Htable, req->dst, req->assoclen + cryptlen,
+					 auth_tag_len, 1);
+	else {
+		scatterwalk_map_and_copy(otag, req->src,
+					 req->assoclen + cryptlen - auth_tag_len,
+					 auth_tag_len, 0);
+
+		if (crypto_memneq(otag, hash->Htable, auth_tag_len)) {
+			memzero_explicit(hash->Htable, 16);
+			return -EBADMSG;
+		}
+	}
+
+	return 0;
+}
+
+static int p10_aes_gcm_encrypt(struct aead_request *req)
+{
+	return p10_aes_gcm_crypt(req, 1);
+}
+
+static int p10_aes_gcm_decrypt(struct aead_request *req)
+{
+	return p10_aes_gcm_crypt(req, 0);
+}
+
+static struct aead_alg gcm_aes_alg = {
+	.ivsize			= GCM_IV_SIZE,
+	.maxauthsize		= 16,
+
+	.setauthsize		= set_authsize,
+	.setkey			= p10_aes_gcm_setkey,
+	.encrypt		= p10_aes_gcm_encrypt,
+	.decrypt		= p10_aes_gcm_decrypt,
+
+	.base.cra_name		= "gcm(aes)",
+	.base.cra_driver_name	= "aes_gcm_p10",
+	.base.cra_priority	= 2100,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct p10_aes_gcm_ctx),
+	.base.cra_module	= THIS_MODULE,
+};
+
+static int __init p10_init(void)
+{
+	return crypto_register_aead(&gcm_aes_alg);
+}
+
+static void __exit p10_exit(void)
+{
+	crypto_unregister_aead(&gcm_aes_alg);
+}
+
+module_cpu_feature_match(PPC_MODULE_FEATURE_P10, p10_init);
+module_exit(p10_exit);
diff --git a/arch/powerpc/crypto/aes-gcm-p10.S b/arch/powerpc/crypto/aes-gcm-p10.S
new file mode 100644
index 0000000000..a51f4b2653
--- /dev/null
+++ b/arch/powerpc/crypto/aes-gcm-p10.S
@@ -0,0 +1,1521 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+ #
+ # Accelerated AES-GCM stitched implementation for ppc64le.
+ #
+ # Copyright 2022- IBM Inc. All rights reserved
+ #
+ #===================================================================================
+ # Written by Danny Tsen <dtsen@linux.ibm.com>
+ #
+ # GHASH is based on the Karatsuba multiplication method.
+ #
+ #    Xi xor X1
+ #
+ #    X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H =
+ #      (X1.h * H4.h + xX.l * H4.l + X1 * H4) +
+ #      (X2.h * H3.h + X2.l * H3.l + X2 * H3) +
+ #      (X3.h * H2.h + X3.l * H2.l + X3 * H2) +
+ #      (X4.h * H.h + X4.l * H.l + X4 * H)
+ #
+ # Xi = v0
+ # H Poly = v2
+ # Hash keys = v3 - v14
+ #     ( H.l, H, H.h)
+ #     ( H^2.l, H^2, H^2.h)
+ #     ( H^3.l, H^3, H^3.h)
+ #     ( H^4.l, H^4, H^4.h)
+ #
+ # v30 is IV
+ # v31 - counter 1
+ #
+ # AES used,
+ #     vs0 - vs14 for round keys
+ #     v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted)
+ #
+ # This implementation uses stitched AES-GCM approach to improve overall performance.
+ # AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
+ #
+ # ===================================================================================
+ #
+
+#include <asm/ppc_asm.h>
+#include <linux/linkage.h>
+
+.machine        "any"
+.text
+
+ # 4x loops
+ # v15 - v18 - input states
+ # vs1 - vs9 - round keys
+ #
+.macro Loop_aes_middle4x
+	xxlor	19+32, 1, 1
+	xxlor	20+32, 2, 2
+	xxlor	21+32, 3, 3
+	xxlor	22+32, 4, 4
+
+	vcipher	15, 15, 19
+	vcipher	16, 16, 19
+	vcipher	17, 17, 19
+	vcipher	18, 18, 19
+
+	vcipher	15, 15, 20
+	vcipher	16, 16, 20
+	vcipher	17, 17, 20
+	vcipher	18, 18, 20
+
+	vcipher	15, 15, 21
+	vcipher	16, 16, 21
+	vcipher	17, 17, 21
+	vcipher	18, 18, 21
+
+	vcipher	15, 15, 22
+	vcipher	16, 16, 22
+	vcipher	17, 17, 22
+	vcipher	18, 18, 22
+
+	xxlor	19+32, 5, 5
+	xxlor	20+32, 6, 6
+	xxlor	21+32, 7, 7
+	xxlor	22+32, 8, 8
+
+	vcipher	15, 15, 19
+	vcipher	16, 16, 19
+	vcipher	17, 17, 19
+	vcipher	18, 18, 19
+
+	vcipher	15, 15, 20
+	vcipher	16, 16, 20
+	vcipher	17, 17, 20
+	vcipher	18, 18, 20
+
+	vcipher	15, 15, 21
+	vcipher	16, 16, 21
+	vcipher	17, 17, 21
+	vcipher	18, 18, 21
+
+	vcipher	15, 15, 22
+	vcipher	16, 16, 22
+	vcipher	17, 17, 22
+	vcipher	18, 18, 22
+
+	xxlor	23+32, 9, 9
+	vcipher	15, 15, 23
+	vcipher	16, 16, 23
+	vcipher	17, 17, 23
+	vcipher	18, 18, 23
+.endm
+
+ # 8x loops
+ # v15 - v22 - input states
+ # vs1 - vs9 - round keys
+ #
+.macro Loop_aes_middle8x
+	xxlor	23+32, 1, 1
+	xxlor	24+32, 2, 2
+	xxlor	25+32, 3, 3
+	xxlor	26+32, 4, 4
+
+	vcipher	15, 15, 23
+	vcipher	16, 16, 23
+	vcipher	17, 17, 23
+	vcipher	18, 18, 23
+	vcipher	19, 19, 23
+	vcipher	20, 20, 23
+	vcipher	21, 21, 23
+	vcipher	22, 22, 23
+
+	vcipher	15, 15, 24
+	vcipher	16, 16, 24
+	vcipher	17, 17, 24
+	vcipher	18, 18, 24
+	vcipher	19, 19, 24
+	vcipher	20, 20, 24
+	vcipher	21, 21, 24
+	vcipher	22, 22, 24
+
+	vcipher	15, 15, 25
+	vcipher	16, 16, 25
+	vcipher	17, 17, 25
+	vcipher	18, 18, 25
+	vcipher	19, 19, 25
+	vcipher	20, 20, 25
+	vcipher	21, 21, 25
+	vcipher	22, 22, 25
+
+	vcipher	15, 15, 26
+	vcipher	16, 16, 26
+	vcipher	17, 17, 26
+	vcipher	18, 18, 26
+	vcipher	19, 19, 26
+	vcipher	20, 20, 26
+	vcipher	21, 21, 26
+	vcipher	22, 22, 26
+
+	xxlor	23+32, 5, 5
+	xxlor	24+32, 6, 6
+	xxlor	25+32, 7, 7
+	xxlor	26+32, 8, 8
+
+	vcipher	15, 15, 23
+	vcipher	16, 16, 23
+	vcipher	17, 17, 23
+	vcipher	18, 18, 23
+	vcipher	19, 19, 23
+	vcipher	20, 20, 23
+	vcipher	21, 21, 23
+	vcipher	22, 22, 23
+
+	vcipher	15, 15, 24
+	vcipher	16, 16, 24
+	vcipher	17, 17, 24
+	vcipher	18, 18, 24
+	vcipher	19, 19, 24
+	vcipher	20, 20, 24
+	vcipher	21, 21, 24
+	vcipher	22, 22, 24
+
+	vcipher	15, 15, 25
+	vcipher	16, 16, 25
+	vcipher	17, 17, 25
+	vcipher	18, 18, 25
+	vcipher	19, 19, 25
+	vcipher	20, 20, 25
+	vcipher	21, 21, 25
+	vcipher	22, 22, 25
+
+	vcipher	15, 15, 26
+	vcipher	16, 16, 26
+	vcipher	17, 17, 26
+	vcipher	18, 18, 26
+	vcipher	19, 19, 26
+	vcipher	20, 20, 26
+	vcipher	21, 21, 26
+	vcipher	22, 22, 26
+
+	xxlor	23+32, 9, 9
+	vcipher	15, 15, 23
+	vcipher	16, 16, 23
+	vcipher	17, 17, 23
+	vcipher	18, 18, 23
+	vcipher	19, 19, 23
+	vcipher	20, 20, 23
+	vcipher	21, 21, 23
+	vcipher	22, 22, 23
+.endm
+
+.macro Loop_aes_middle_1x
+	xxlor	19+32, 1, 1
+	xxlor	20+32, 2, 2
+	xxlor	21+32, 3, 3
+	xxlor	22+32, 4, 4
+
+	vcipher 15, 15, 19
+	vcipher 15, 15, 20
+	vcipher 15, 15, 21
+	vcipher 15, 15, 22
+
+	xxlor	19+32, 5, 5
+	xxlor	20+32, 6, 6
+	xxlor	21+32, 7, 7
+	xxlor	22+32, 8, 8
+
+	vcipher 15, 15, 19
+	vcipher 15, 15, 20
+	vcipher 15, 15, 21
+	vcipher 15, 15, 22
+
+	xxlor	19+32, 9, 9
+	vcipher 15, 15, 19
+.endm
+
+ #
+ # Compute 4x hash values based on Karatsuba method.
+ #
+.macro ppc_aes_gcm_ghash
+	vxor		15, 15, 0
+
+	vpmsumd		23, 12, 15		# H4.L * X.L
+	vpmsumd		24, 9, 16
+	vpmsumd		25, 6, 17
+	vpmsumd		26, 3, 18
+
+	vxor		23, 23, 24
+	vxor		23, 23, 25
+	vxor		23, 23, 26		# L
+
+	vpmsumd		24, 13, 15		# H4.L * X.H + H4.H * X.L
+	vpmsumd		25, 10, 16		# H3.L * X1.H + H3.H * X1.L
+	vpmsumd		26, 7, 17
+	vpmsumd		27, 4, 18
+
+	vxor		24, 24, 25
+	vxor		24, 24, 26
+	vxor		24, 24, 27		# M
+
+	# sum hash and reduction with H Poly
+	vpmsumd		28, 23, 2		# reduction
+
+	vxor		29, 29, 29
+	vsldoi		26, 24, 29, 8		# mL
+	vsldoi		29, 29, 24, 8		# mH
+	vxor		23, 23, 26		# mL + L
+
+	vsldoi		23, 23, 23, 8		# swap
+	vxor		23, 23, 28
+
+	vpmsumd		24, 14, 15		# H4.H * X.H
+	vpmsumd		25, 11, 16
+	vpmsumd		26, 8, 17
+	vpmsumd		27, 5, 18
+
+	vxor		24, 24, 25
+	vxor		24, 24, 26
+	vxor		24, 24, 27
+
+	vxor		24, 24, 29
+
+	# sum hash and reduction with H Poly
+	vsldoi		27, 23, 23, 8		# swap
+	vpmsumd		23, 23, 2
+	vxor		27, 27, 24
+	vxor		23, 23, 27
+
+	xxlor		32, 23+32, 23+32		# update hash
+
+.endm
+
+ #
+ # Combine two 4x ghash
+ # v15 - v22 - input blocks
+ #
+.macro ppc_aes_gcm_ghash2_4x
+	# first 4x hash
+	vxor		15, 15, 0		# Xi + X
+
+	vpmsumd		23, 12, 15		# H4.L * X.L
+	vpmsumd		24, 9, 16
+	vpmsumd		25, 6, 17
+	vpmsumd		26, 3, 18
+
+	vxor		23, 23, 24
+	vxor		23, 23, 25
+	vxor		23, 23, 26		# L
+
+	vpmsumd		24, 13, 15		# H4.L * X.H + H4.H * X.L
+	vpmsumd		25, 10, 16		# H3.L * X1.H + H3.H * X1.L
+	vpmsumd		26, 7, 17
+	vpmsumd		27, 4, 18
+
+	vxor		24, 24, 25
+	vxor		24, 24, 26
+
+	# sum hash and reduction with H Poly
+	vpmsumd		28, 23, 2		# reduction
+
+	vxor		29, 29, 29
+
+	vxor		24, 24, 27		# M
+	vsldoi		26, 24, 29, 8		# mL
+	vsldoi		29, 29, 24, 8		# mH
+	vxor		23, 23, 26		# mL + L
+
+	vsldoi		23, 23, 23, 8		# swap
+	vxor		23, 23, 28
+
+	vpmsumd		24, 14, 15		# H4.H * X.H
+	vpmsumd		25, 11, 16
+	vpmsumd		26, 8, 17
+	vpmsumd		27, 5, 18
+
+	vxor		24, 24, 25
+	vxor		24, 24, 26
+	vxor		24, 24, 27		# H
+
+	vxor		24, 24, 29		# H + mH
+
+	# sum hash and reduction with H Poly
+	vsldoi		27, 23, 23, 8		# swap
+	vpmsumd		23, 23, 2
+	vxor		27, 27, 24
+	vxor		27, 23, 27		# 1st Xi
+
+	# 2nd 4x hash
+	vpmsumd		24, 9, 20
+	vpmsumd		25, 6, 21
+	vpmsumd		26, 3, 22
+	vxor		19, 19, 27		# Xi + X
+	vpmsumd		23, 12, 19		# H4.L * X.L
+
+	vxor		23, 23, 24
+	vxor		23, 23, 25
+	vxor		23, 23, 26		# L
+
+	vpmsumd		24, 13, 19		# H4.L * X.H + H4.H * X.L
+	vpmsumd		25, 10, 20		# H3.L * X1.H + H3.H * X1.L
+	vpmsumd		26, 7, 21
+	vpmsumd		27, 4, 22
+
+	vxor		24, 24, 25
+	vxor		24, 24, 26
+
+	# sum hash and reduction with H Poly
+	vpmsumd		28, 23, 2		# reduction
+
+	vxor		29, 29, 29
+
+	vxor		24, 24, 27		# M
+	vsldoi		26, 24, 29, 8		# mL
+	vsldoi		29, 29, 24, 8		# mH
+	vxor		23, 23, 26		# mL + L
+
+	vsldoi		23, 23, 23, 8		# swap
+	vxor		23, 23, 28
+
+	vpmsumd		24, 14, 19		# H4.H * X.H
+	vpmsumd		25, 11, 20
+	vpmsumd		26, 8, 21
+	vpmsumd		27, 5, 22
+
+	vxor		24, 24, 25
+	vxor		24, 24, 26
+	vxor		24, 24, 27		# H
+
+	vxor		24, 24, 29		# H + mH
+
+	# sum hash and reduction with H Poly
+	vsldoi		27, 23, 23, 8		# swap
+	vpmsumd		23, 23, 2
+	vxor		27, 27, 24
+	vxor		23, 23, 27
+
+	xxlor		32, 23+32, 23+32		# update hash
+
+.endm
+
+ #
+ # Compute update single hash
+ #
+.macro ppc_update_hash_1x
+	vxor		28, 28, 0
+
+	vxor		19, 19, 19
+
+	vpmsumd		22, 3, 28		# L
+	vpmsumd		23, 4, 28		# M
+	vpmsumd		24, 5, 28		# H
+
+	vpmsumd		27, 22, 2		# reduction
+
+	vsldoi		25, 23, 19, 8		# mL
+	vsldoi		26, 19, 23, 8		# mH
+	vxor		22, 22, 25		# LL + LL
+	vxor		24, 24, 26		# HH + HH
+
+	vsldoi		22, 22, 22, 8		# swap
+	vxor		22, 22, 27
+
+	vsldoi		20, 22, 22, 8		# swap
+	vpmsumd		22, 22, 2		# reduction
+	vxor		20, 20, 24
+	vxor		22, 22, 20
+
+	vmr		0, 22			# update hash
+
+.endm
+
+.macro SAVE_REGS
+	stdu 1,-640(1)
+	mflr 0
+
+	std	14,112(1)
+	std	15,120(1)
+	std	16,128(1)
+	std	17,136(1)
+	std	18,144(1)
+	std	19,152(1)
+	std	20,160(1)
+	std	21,168(1)
+	li	9, 256
+	stvx	20, 9, 1
+	addi	9, 9, 16
+	stvx	21, 9, 1
+	addi	9, 9, 16
+	stvx	22, 9, 1
+	addi	9, 9, 16
+	stvx	23, 9, 1
+	addi	9, 9, 16
+	stvx	24, 9, 1
+	addi	9, 9, 16
+	stvx	25, 9, 1
+	addi	9, 9, 16
+	stvx	26, 9, 1
+	addi	9, 9, 16
+	stvx	27, 9, 1
+	addi	9, 9, 16
+	stvx	28, 9, 1
+	addi	9, 9, 16
+	stvx	29, 9, 1
+	addi	9, 9, 16
+	stvx	30, 9, 1
+	addi	9, 9, 16
+	stvx	31, 9, 1
+	stxv	14, 464(1)
+	stxv	15, 480(1)
+	stxv	16, 496(1)
+	stxv	17, 512(1)
+	stxv	18, 528(1)
+	stxv	19, 544(1)
+	stxv	20, 560(1)
+	stxv	21, 576(1)
+	stxv	22, 592(1)
+	std	0, 656(1)
+.endm
+
+.macro RESTORE_REGS
+	lxv	14, 464(1)
+	lxv	15, 480(1)
+	lxv	16, 496(1)
+	lxv	17, 512(1)
+	lxv	18, 528(1)
+	lxv	19, 544(1)
+	lxv	20, 560(1)
+	lxv	21, 576(1)
+	lxv	22, 592(1)
+	li	9, 256
+	lvx	20, 9, 1
+	addi	9, 9, 16
+	lvx	21, 9, 1
+	addi	9, 9, 16
+	lvx	22, 9, 1
+	addi	9, 9, 16
+	lvx	23, 9, 1
+	addi	9, 9, 16
+	lvx	24, 9, 1
+	addi	9, 9, 16
+	lvx	25, 9, 1
+	addi	9, 9, 16
+	lvx	26, 9, 1
+	addi	9, 9, 16
+	lvx	27, 9, 1
+	addi	9, 9, 16
+	lvx	28, 9, 1
+	addi	9, 9, 16
+	lvx	29, 9, 1
+	addi	9, 9, 16
+	lvx	30, 9, 1
+	addi	9, 9, 16
+	lvx	31, 9, 1
+
+	ld	0, 656(1)
+	ld      14,112(1)
+	ld      15,120(1)
+	ld      16,128(1)
+	ld      17,136(1)
+	ld      18,144(1)
+	ld      19,152(1)
+	ld      20,160(1)
+	ld	21,168(1)
+
+	mtlr	0
+	addi	1, 1, 640
+.endm
+
+.macro LOAD_HASH_TABLE
+	# Load Xi
+	lxvb16x	32, 0, 8	# load Xi
+
+	# load Hash - h^4, h^3, h^2, h
+	li	10, 32
+	lxvd2x	2+32, 10, 8	# H Poli
+	li	10, 48
+	lxvd2x	3+32, 10, 8	# Hl
+	li	10, 64
+	lxvd2x	4+32, 10, 8	# H
+	li	10, 80
+	lxvd2x	5+32, 10, 8	# Hh
+
+	li	10, 96
+	lxvd2x	6+32, 10, 8	# H^2l
+	li	10, 112
+	lxvd2x	7+32, 10, 8	# H^2
+	li	10, 128
+	lxvd2x	8+32, 10, 8	# H^2h
+
+	li	10, 144
+	lxvd2x	9+32, 10, 8	# H^3l
+	li	10, 160
+	lxvd2x	10+32, 10, 8	# H^3
+	li	10, 176
+	lxvd2x	11+32, 10, 8	# H^3h
+
+	li	10, 192
+	lxvd2x	12+32, 10, 8	# H^4l
+	li	10, 208
+	lxvd2x	13+32, 10, 8	# H^4
+	li	10, 224
+	lxvd2x	14+32, 10, 8	# H^4h
+.endm
+
+ #
+ # aes_p10_gcm_encrypt (const void *inp, void *out, size_t len,
+ #               const char *rk, unsigned char iv[16], void *Xip);
+ #
+ #    r3 - inp
+ #    r4 - out
+ #    r5 - len
+ #    r6 - AES round keys
+ #    r7 - iv and other data
+ #    r8 - Xi, HPoli, hash keys
+ #
+ #    rounds is at offset 240 in rk
+ #    Xi is at 0 in gcm_table (Xip).
+ #
+_GLOBAL(aes_p10_gcm_encrypt)
+.align 5
+
+	SAVE_REGS
+
+	LOAD_HASH_TABLE
+
+	# initialize ICB: GHASH( IV ), IV - r7
+	lxvb16x	30+32, 0, 7	# load IV  - v30
+
+	mr	12, 5		# length
+	li	11, 0		# block index
+
+	# counter 1
+	vxor	31, 31, 31
+	vspltisb 22, 1
+	vsldoi	31, 31, 22,1	# counter 1
+
+	# load round key to VSR
+	lxv	0, 0(6)
+	lxv	1, 0x10(6)
+	lxv	2, 0x20(6)
+	lxv	3, 0x30(6)
+	lxv	4, 0x40(6)
+	lxv	5, 0x50(6)
+	lxv	6, 0x60(6)
+	lxv	7, 0x70(6)
+	lxv	8, 0x80(6)
+	lxv	9, 0x90(6)
+	lxv	10, 0xa0(6)
+
+	# load rounds - 10 (128), 12 (192), 14 (256)
+	lwz	9,240(6)
+
+	#
+	# vxor	state, state, w # addroundkey
+	xxlor	32+29, 0, 0
+	vxor	15, 30, 29	# IV + round key - add round key 0
+
+	cmpdi	9, 10
+	beq	Loop_aes_gcm_8x
+
+	# load 2 more round keys (v11, v12)
+	lxv	11, 0xb0(6)
+	lxv	12, 0xc0(6)
+
+	cmpdi	9, 12
+	beq	Loop_aes_gcm_8x
+
+	# load 2 more round keys (v11, v12, v13, v14)
+	lxv	13, 0xd0(6)
+	lxv	14, 0xe0(6)
+	cmpdi	9, 14
+	beq	Loop_aes_gcm_8x
+
+	b	aes_gcm_out
+
+.align 5
+Loop_aes_gcm_8x:
+	mr	14, 3
+	mr	9, 4
+
+	#
+	# check partial block
+	#
+Continue_partial_check:
+	ld	15, 56(7)
+	cmpdi	15, 0
+	beq	Continue
+	bgt	Final_block
+	cmpdi	15, 16
+	blt	Final_block
+
+Continue:
+	# n blcoks
+	li	10, 128
+	divdu	10, 12, 10	# n 128 bytes-blocks
+	cmpdi	10, 0
+	beq	Loop_last_block
+
+	vaddudm	30, 30, 31	# IV + counter
+	vxor	16, 30, 29
+	vaddudm	30, 30, 31
+	vxor	17, 30, 29
+	vaddudm	30, 30, 31
+	vxor	18, 30, 29
+	vaddudm	30, 30, 31
+	vxor	19, 30, 29
+	vaddudm	30, 30, 31
+	vxor	20, 30, 29
+	vaddudm	30, 30, 31
+	vxor	21, 30, 29
+	vaddudm	30, 30, 31
+	vxor	22, 30, 29
+
+	mtctr	10
+
+	li	15, 16
+	li	16, 32
+	li	17, 48
+	li	18, 64
+	li	19, 80
+	li	20, 96
+	li	21, 112
+
+	lwz	10, 240(6)
+
+Loop_8x_block:
+
+	lxvb16x		15, 0, 14	# load block
+	lxvb16x		16, 15, 14	# load block
+	lxvb16x		17, 16, 14	# load block
+	lxvb16x		18, 17, 14	# load block
+	lxvb16x		19, 18, 14	# load block
+	lxvb16x		20, 19, 14	# load block
+	lxvb16x		21, 20, 14	# load block
+	lxvb16x		22, 21, 14	# load block
+	addi		14, 14, 128
+
+	Loop_aes_middle8x
+
+	xxlor	23+32, 10, 10
+
+	cmpdi	10, 10
+	beq	Do_next_ghash
+
+	# 192 bits
+	xxlor	24+32, 11, 11
+
+	vcipher	15, 15, 23
+	vcipher	16, 16, 23
+	vcipher	17, 17, 23
+	vcipher	18, 18, 23
+	vcipher	19, 19, 23
+	vcipher	20, 20, 23
+	vcipher	21, 21, 23
+	vcipher	22, 22, 23
+
+	vcipher	15, 15, 24
+	vcipher	16, 16, 24
+	vcipher	17, 17, 24
+	vcipher	18, 18, 24
+	vcipher	19, 19, 24
+	vcipher	20, 20, 24
+	vcipher	21, 21, 24
+	vcipher	22, 22, 24
+
+	xxlor	23+32, 12, 12
+
+	cmpdi	10, 12
+	beq	Do_next_ghash
+
+	# 256 bits
+	xxlor	24+32, 13, 13
+
+	vcipher	15, 15, 23
+	vcipher	16, 16, 23
+	vcipher	17, 17, 23
+	vcipher	18, 18, 23
+	vcipher	19, 19, 23
+	vcipher	20, 20, 23
+	vcipher	21, 21, 23
+	vcipher	22, 22, 23
+
+	vcipher	15, 15, 24
+	vcipher	16, 16, 24
+	vcipher	17, 17, 24
+	vcipher	18, 18, 24
+	vcipher	19, 19, 24
+	vcipher	20, 20, 24
+	vcipher	21, 21, 24
+	vcipher	22, 22, 24
+
+	xxlor	23+32, 14, 14
+
+	cmpdi	10, 14
+	beq	Do_next_ghash
+	b	aes_gcm_out
+
+Do_next_ghash:
+
+	#
+	# last round
+	vcipherlast     15, 15, 23
+	vcipherlast     16, 16, 23
+
+	xxlxor		47, 47, 15
+	stxvb16x        47, 0, 9	# store output
+	xxlxor		48, 48, 16
+	stxvb16x        48, 15, 9	# store output
+
+	vcipherlast     17, 17, 23
+	vcipherlast     18, 18, 23
+
+	xxlxor		49, 49, 17
+	stxvb16x        49, 16, 9	# store output
+	xxlxor		50, 50, 18
+	stxvb16x        50, 17, 9	# store output
+
+	vcipherlast     19, 19, 23
+	vcipherlast     20, 20, 23
+
+	xxlxor		51, 51, 19
+	stxvb16x        51, 18, 9	# store output
+	xxlxor		52, 52, 20
+	stxvb16x        52, 19, 9	# store output
+
+	vcipherlast     21, 21, 23
+	vcipherlast     22, 22, 23
+
+	xxlxor		53, 53, 21
+	stxvb16x        53, 20, 9	# store output
+	xxlxor		54, 54, 22
+	stxvb16x        54, 21, 9	# store output
+
+	addi		9, 9, 128
+
+	# ghash here
+	ppc_aes_gcm_ghash2_4x
+
+	xxlor	27+32, 0, 0
+	vaddudm 30, 30, 31		# IV + counter
+	vmr	29, 30
+	vxor    15, 30, 27		# add round key
+	vaddudm 30, 30, 31
+	vxor    16, 30, 27
+	vaddudm 30, 30, 31
+	vxor    17, 30, 27
+	vaddudm 30, 30, 31
+	vxor    18, 30, 27
+	vaddudm 30, 30, 31
+	vxor    19, 30, 27
+	vaddudm 30, 30, 31
+	vxor    20, 30, 27
+	vaddudm 30, 30, 31
+	vxor    21, 30, 27
+	vaddudm 30, 30, 31
+	vxor    22, 30, 27
+
+	addi    12, 12, -128
+	addi    11, 11, 128
+
+	bdnz	Loop_8x_block
+
+	vmr	30, 29
+	stxvb16x 30+32, 0, 7		# update IV
+
+Loop_last_block:
+	cmpdi   12, 0
+	beq     aes_gcm_out
+
+	# loop last few blocks
+	li      10, 16
+	divdu   10, 12, 10
+
+	mtctr   10
+
+	lwz	10, 240(6)
+
+	cmpdi   12, 16
+	blt     Final_block
+
+Next_rem_block:
+	lxvb16x 15, 0, 14		# load block
+
+	Loop_aes_middle_1x
+
+	xxlor	23+32, 10, 10
+
+	cmpdi	10, 10
+	beq	Do_next_1x
+
+	# 192 bits
+	xxlor	24+32, 11, 11
+
+	vcipher	15, 15, 23
+	vcipher	15, 15, 24
+
+	xxlor	23+32, 12, 12
+
+	cmpdi	10, 12
+	beq	Do_next_1x
+
+	# 256 bits
+	xxlor	24+32, 13, 13
+
+	vcipher	15, 15, 23
+	vcipher	15, 15, 24
+
+	xxlor	23+32, 14, 14
+
+	cmpdi	10, 14
+	beq	Do_next_1x
+
+Do_next_1x:
+	vcipherlast     15, 15, 23
+
+	xxlxor		47, 47, 15
+	stxvb16x	47, 0, 9	# store output
+	addi		14, 14, 16
+	addi		9, 9, 16
+
+	vmr		28, 15
+	ppc_update_hash_1x
+
+	addi		12, 12, -16
+	addi		11, 11, 16
+	xxlor		19+32, 0, 0
+	vaddudm		30, 30, 31		# IV + counter
+	vxor		15, 30, 19		# add round key
+
+	bdnz	Next_rem_block
+
+	li	15, 0
+	std	15, 56(7)		# clear partial?
+	stxvb16x 30+32, 0, 7		# update IV
+	cmpdi	12, 0
+	beq	aes_gcm_out
+
+Final_block:
+	lwz	10, 240(6)
+	Loop_aes_middle_1x
+
+	xxlor	23+32, 10, 10
+
+	cmpdi	10, 10
+	beq	Do_final_1x
+
+	# 192 bits
+	xxlor	24+32, 11, 11
+
+	vcipher	15, 15, 23
+	vcipher	15, 15, 24
+
+	xxlor	23+32, 12, 12
+
+	cmpdi	10, 12
+	beq	Do_final_1x
+
+	# 256 bits
+	xxlor	24+32, 13, 13
+
+	vcipher	15, 15, 23
+	vcipher	15, 15, 24
+
+	xxlor	23+32, 14, 14
+
+	cmpdi	10, 14
+	beq	Do_final_1x
+
+Do_final_1x:
+	vcipherlast     15, 15, 23
+
+	# check partial block
+	li	21, 0			# encrypt
+	ld	15, 56(7)		# partial?
+	cmpdi	15, 0
+	beq	Normal_block
+	bl	Do_partial_block
+
+	cmpdi	12, 0
+	ble aes_gcm_out
+
+	b Continue_partial_check
+
+Normal_block:
+	lxvb16x	15, 0, 14		# load last block
+	xxlxor	47, 47, 15
+
+	# create partial block mask
+	li	15, 16
+	sub	15, 15, 12		# index to the mask
+
+	vspltisb	16, -1		# first 16 bytes - 0xffff...ff
+	vspltisb	17, 0		# second 16 bytes - 0x0000...00
+	li	10, 192
+	stvx	16, 10, 1
+	addi	10, 10, 16
+	stvx	17, 10, 1
+
+	addi	10, 1, 192
+	lxvb16x	16, 15, 10		# load partial block mask
+	xxland	47, 47, 16
+
+	vmr	28, 15
+	ppc_update_hash_1x
+
+	# * should store only the remaining bytes.
+	bl	Write_partial_block
+
+	stxvb16x 30+32, 0, 7		# update IV
+	std	12, 56(7)		# update partial?
+	li	16, 16
+
+	stxvb16x	32, 0, 8		# write out Xi
+	stxvb16x	32, 16, 8		# write out Xi
+	b aes_gcm_out
+
+ #
+ # Compute data mask
+ #
+.macro GEN_MASK _mask _start _end
+	vspltisb	16, -1		# first 16 bytes - 0xffff...ff
+	vspltisb	17, 0		# second 16 bytes - 0x0000...00
+	li	10, 192
+	stxvb16x	17+32, 10, 1
+	add	10, 10, \_start
+	stxvb16x	16+32, 10, 1
+	add	10, 10, \_end
+	stxvb16x	17+32, 10, 1
+
+	addi	10, 1, 192
+	lxvb16x	\_mask, 0, 10		# load partial block mask
+.endm
+
+ #
+ # Handle multiple partial blocks for encrypt and decrypt
+ #   operations.
+ #
+SYM_FUNC_START_LOCAL(Do_partial_block)
+	add	17, 15, 5
+	cmpdi	17, 16
+	bgt	Big_block
+	GEN_MASK 18, 15, 5
+	b	_Partial
+SYM_FUNC_END(Do_partial_block)
+Big_block:
+	li	16, 16
+	GEN_MASK 18, 15, 16
+
+_Partial:
+	lxvb16x	17+32, 0, 14		# load last block
+	sldi	16, 15, 3
+	mtvsrdd	32+16, 0, 16
+	vsro	17, 17, 16
+	xxlxor	47, 47, 17+32
+	xxland	47, 47, 18
+
+	vxor	0, 0, 0			# clear Xi
+	vmr	28, 15
+
+	cmpdi	21, 0			# encrypt/decrypt ops?
+	beq	Skip_decrypt
+	xxland	32+28, 32+17, 18
+
+Skip_decrypt:
+
+	ppc_update_hash_1x
+
+	li	16, 16
+	lxvb16x 32+29, 16, 8
+	vxor	0, 0, 29
+	stxvb16x 32, 0, 8		# save Xi
+	stxvb16x 32, 16, 8		# save Xi
+
+	# store partial block
+	# loop the rest of the stream if any
+	sldi	16, 15, 3
+	mtvsrdd	32+16, 0, 16
+	vslo	15, 15, 16
+	#stxvb16x 15+32, 0, 9		# last block
+
+	li	16, 16
+	sub	17, 16, 15		# 16 - partial
+
+	add	16, 15, 5
+	cmpdi	16, 16
+	bgt	Larger_16
+	mr	17, 5
+Larger_16:
+
+	# write partial
+	li		10, 192
+	stxvb16x	15+32, 10, 1	# save current block
+
+	addi		10, 9, -1
+	addi		16, 1, 191
+	mtctr		17		# move partial byte count
+
+Write_last_partial:
+        lbzu		18, 1(16)
+	stbu		18, 1(10)
+        bdnz		Write_last_partial
+	# Complete loop partial
+
+	add	14, 14, 17
+	add	9, 9, 17
+	sub	12, 12, 17
+	add	11, 11, 17
+
+	add	15, 15, 5
+	cmpdi	15, 16
+	blt	Save_partial
+
+	vaddudm	30, 30, 31
+	stxvb16x 30+32, 0, 7		# update IV
+	xxlor	32+29, 0, 0
+	vxor	15, 30, 29		# IV + round key - add round key 0
+	li	15, 0
+	std	15, 56(7)		# partial done - clear
+	b	Partial_done
+Save_partial:
+	std	15, 56(7)		# partial
+
+Partial_done:
+	blr
+
+ #
+ # Write partial block
+ # r9 - output
+ # r12 - remaining bytes
+ # v15 - partial input data
+ #
+SYM_FUNC_START_LOCAL(Write_partial_block)
+	li		10, 192
+	stxvb16x	15+32, 10, 1		# last block
+
+	addi		10, 9, -1
+	addi		16, 1, 191
+
+        mtctr		12			# remaining bytes
+	li		15, 0
+
+Write_last_byte:
+        lbzu		14, 1(16)
+	stbu		14, 1(10)
+        bdnz		Write_last_byte
+	blr
+SYM_FUNC_END(Write_partial_block)
+
+aes_gcm_out:
+	# out = state
+	stxvb16x	32, 0, 8		# write out Xi
+	add	3, 11, 12		# return count
+
+	RESTORE_REGS
+	blr
+
+ #
+ # 8x Decrypt
+ #
+_GLOBAL(aes_p10_gcm_decrypt)
+.align 5
+
+	SAVE_REGS
+
+	LOAD_HASH_TABLE
+
+	# initialize ICB: GHASH( IV ), IV - r7
+	lxvb16x	30+32, 0, 7	# load IV  - v30
+
+	mr	12, 5		# length
+	li	11, 0		# block index
+
+	# counter 1
+	vxor	31, 31, 31
+	vspltisb 22, 1
+	vsldoi	31, 31, 22,1	# counter 1
+
+	# load round key to VSR
+	lxv	0, 0(6)
+	lxv	1, 0x10(6)
+	lxv	2, 0x20(6)
+	lxv	3, 0x30(6)
+	lxv	4, 0x40(6)
+	lxv	5, 0x50(6)
+	lxv	6, 0x60(6)
+	lxv	7, 0x70(6)
+	lxv	8, 0x80(6)
+	lxv	9, 0x90(6)
+	lxv	10, 0xa0(6)
+
+	# load rounds - 10 (128), 12 (192), 14 (256)
+	lwz	9,240(6)
+
+	#
+	# vxor	state, state, w # addroundkey
+	xxlor	32+29, 0, 0
+	vxor	15, 30, 29	# IV + round key - add round key 0
+
+	cmpdi	9, 10
+	beq	Loop_aes_gcm_8x_dec
+
+	# load 2 more round keys (v11, v12)
+	lxv	11, 0xb0(6)
+	lxv	12, 0xc0(6)
+
+	cmpdi	9, 12
+	beq	Loop_aes_gcm_8x_dec
+
+	# load 2 more round keys (v11, v12, v13, v14)
+	lxv	13, 0xd0(6)
+	lxv	14, 0xe0(6)
+	cmpdi	9, 14
+	beq	Loop_aes_gcm_8x_dec
+
+	b	aes_gcm_out
+
+.align 5
+Loop_aes_gcm_8x_dec:
+	mr	14, 3
+	mr	9, 4
+
+	#
+	# check partial block
+	#
+Continue_partial_check_dec:
+	ld	15, 56(7)
+	cmpdi	15, 0
+	beq	Continue_dec
+	bgt	Final_block_dec
+	cmpdi	15, 16
+	blt	Final_block_dec
+
+Continue_dec:
+	# n blcoks
+	li	10, 128
+	divdu	10, 12, 10	# n 128 bytes-blocks
+	cmpdi	10, 0
+	beq	Loop_last_block_dec
+
+	vaddudm	30, 30, 31	# IV + counter
+	vxor	16, 30, 29
+	vaddudm	30, 30, 31
+	vxor	17, 30, 29
+	vaddudm	30, 30, 31
+	vxor	18, 30, 29
+	vaddudm	30, 30, 31
+	vxor	19, 30, 29
+	vaddudm	30, 30, 31
+	vxor	20, 30, 29
+	vaddudm	30, 30, 31
+	vxor	21, 30, 29
+	vaddudm	30, 30, 31
+	vxor	22, 30, 29
+
+	mtctr	10
+
+	li	15, 16
+	li	16, 32
+	li	17, 48
+	li	18, 64
+	li	19, 80
+	li	20, 96
+	li	21, 112
+
+	lwz	10, 240(6)
+
+Loop_8x_block_dec:
+
+	lxvb16x		15, 0, 14	# load block
+	lxvb16x		16, 15, 14	# load block
+	lxvb16x		17, 16, 14	# load block
+	lxvb16x		18, 17, 14	# load block
+	lxvb16x		19, 18, 14	# load block
+	lxvb16x		20, 19, 14	# load block
+	lxvb16x		21, 20, 14	# load block
+	lxvb16x		22, 21, 14	# load block
+	addi		14, 14, 128
+
+	Loop_aes_middle8x
+
+	xxlor	23+32, 10, 10
+
+	cmpdi	10, 10
+	beq	Do_next_ghash_dec
+
+	# 192 bits
+	xxlor	24+32, 11, 11
+
+	vcipher	15, 15, 23
+	vcipher	16, 16, 23
+	vcipher	17, 17, 23
+	vcipher	18, 18, 23
+	vcipher	19, 19, 23
+	vcipher	20, 20, 23
+	vcipher	21, 21, 23
+	vcipher	22, 22, 23
+
+	vcipher	15, 15, 24
+	vcipher	16, 16, 24
+	vcipher	17, 17, 24
+	vcipher	18, 18, 24
+	vcipher	19, 19, 24
+	vcipher	20, 20, 24
+	vcipher	21, 21, 24
+	vcipher	22, 22, 24
+
+	xxlor	23+32, 12, 12
+
+	cmpdi	10, 12
+	beq	Do_next_ghash_dec
+
+	# 256 bits
+	xxlor	24+32, 13, 13
+
+	vcipher	15, 15, 23
+	vcipher	16, 16, 23
+	vcipher	17, 17, 23
+	vcipher	18, 18, 23
+	vcipher	19, 19, 23
+	vcipher	20, 20, 23
+	vcipher	21, 21, 23
+	vcipher	22, 22, 23
+
+	vcipher	15, 15, 24
+	vcipher	16, 16, 24
+	vcipher	17, 17, 24
+	vcipher	18, 18, 24
+	vcipher	19, 19, 24
+	vcipher	20, 20, 24
+	vcipher	21, 21, 24
+	vcipher	22, 22, 24
+
+	xxlor	23+32, 14, 14
+
+	cmpdi	10, 14
+	beq	Do_next_ghash_dec
+	b	aes_gcm_out
+
+Do_next_ghash_dec:
+
+	#
+	# last round
+	vcipherlast     15, 15, 23
+	vcipherlast     16, 16, 23
+
+	xxlxor		47, 47, 15
+	stxvb16x        47, 0, 9	# store output
+	xxlxor		48, 48, 16
+	stxvb16x        48, 15, 9	# store output
+
+	vcipherlast     17, 17, 23
+	vcipherlast     18, 18, 23
+
+	xxlxor		49, 49, 17
+	stxvb16x        49, 16, 9	# store output
+	xxlxor		50, 50, 18
+	stxvb16x        50, 17, 9	# store output
+
+	vcipherlast     19, 19, 23
+	vcipherlast     20, 20, 23
+
+	xxlxor		51, 51, 19
+	stxvb16x        51, 18, 9	# store output
+	xxlxor		52, 52, 20
+	stxvb16x        52, 19, 9	# store output
+
+	vcipherlast     21, 21, 23
+	vcipherlast     22, 22, 23
+
+	xxlxor		53, 53, 21
+	stxvb16x        53, 20, 9	# store output
+	xxlxor		54, 54, 22
+	stxvb16x        54, 21, 9	# store output
+
+	addi		9, 9, 128
+
+	xxlor           15+32, 15, 15
+	xxlor           16+32, 16, 16
+	xxlor           17+32, 17, 17
+	xxlor           18+32, 18, 18
+	xxlor           19+32, 19, 19
+	xxlor           20+32, 20, 20
+	xxlor           21+32, 21, 21
+	xxlor           22+32, 22, 22
+
+	# ghash here
+	ppc_aes_gcm_ghash2_4x
+
+	xxlor	27+32, 0, 0
+	vaddudm 30, 30, 31		# IV + counter
+	vmr	29, 30
+	vxor    15, 30, 27		# add round key
+	vaddudm 30, 30, 31
+	vxor    16, 30, 27
+	vaddudm 30, 30, 31
+	vxor    17, 30, 27
+	vaddudm 30, 30, 31
+	vxor    18, 30, 27
+	vaddudm 30, 30, 31
+	vxor    19, 30, 27
+	vaddudm 30, 30, 31
+	vxor    20, 30, 27
+	vaddudm 30, 30, 31
+	vxor    21, 30, 27
+	vaddudm 30, 30, 31
+	vxor    22, 30, 27
+
+	addi    12, 12, -128
+	addi    11, 11, 128
+
+	bdnz	Loop_8x_block_dec
+
+	vmr	30, 29
+	stxvb16x 30+32, 0, 7		# update IV
+
+Loop_last_block_dec:
+	cmpdi   12, 0
+	beq     aes_gcm_out
+
+	# loop last few blocks
+	li      10, 16
+	divdu   10, 12, 10
+
+	mtctr   10
+
+	lwz	10, 240(6)
+
+	cmpdi   12, 16
+	blt     Final_block_dec
+
+Next_rem_block_dec:
+	lxvb16x 15, 0, 14		# load block
+
+	Loop_aes_middle_1x
+
+	xxlor	23+32, 10, 10
+
+	cmpdi	10, 10
+	beq	Do_next_1x_dec
+
+	# 192 bits
+	xxlor	24+32, 11, 11
+
+	vcipher	15, 15, 23
+	vcipher	15, 15, 24
+
+	xxlor	23+32, 12, 12
+
+	cmpdi	10, 12
+	beq	Do_next_1x_dec
+
+	# 256 bits
+	xxlor	24+32, 13, 13
+
+	vcipher	15, 15, 23
+	vcipher	15, 15, 24
+
+	xxlor	23+32, 14, 14
+
+	cmpdi	10, 14
+	beq	Do_next_1x_dec
+
+Do_next_1x_dec:
+	vcipherlast     15, 15, 23
+
+	xxlxor		47, 47, 15
+	stxvb16x	47, 0, 9	# store output
+	addi		14, 14, 16
+	addi		9, 9, 16
+
+	xxlor           28+32, 15, 15
+	#vmr		28, 15
+	ppc_update_hash_1x
+
+	addi		12, 12, -16
+	addi		11, 11, 16
+	xxlor		19+32, 0, 0
+	vaddudm		30, 30, 31		# IV + counter
+	vxor		15, 30, 19		# add round key
+
+	bdnz	Next_rem_block_dec
+
+	li	15, 0
+	std	15, 56(7)		# clear partial?
+	stxvb16x 30+32, 0, 7		# update IV
+	cmpdi	12, 0
+	beq	aes_gcm_out
+
+Final_block_dec:
+	lwz	10, 240(6)
+	Loop_aes_middle_1x
+
+	xxlor	23+32, 10, 10
+
+	cmpdi	10, 10
+	beq	Do_final_1x_dec
+
+	# 192 bits
+	xxlor	24+32, 11, 11
+
+	vcipher	15, 15, 23
+	vcipher	15, 15, 24
+
+	xxlor	23+32, 12, 12
+
+	cmpdi	10, 12
+	beq	Do_final_1x_dec
+
+	# 256 bits
+	xxlor	24+32, 13, 13
+
+	vcipher	15, 15, 23
+	vcipher	15, 15, 24
+
+	xxlor	23+32, 14, 14
+
+	cmpdi	10, 14
+	beq	Do_final_1x_dec
+
+Do_final_1x_dec:
+	vcipherlast     15, 15, 23
+
+	# check partial block
+	li	21, 1			# decrypt
+	ld	15, 56(7)		# partial?
+	cmpdi	15, 0
+	beq	Normal_block_dec
+	bl	Do_partial_block
+	cmpdi	12, 0
+	ble aes_gcm_out
+
+	b Continue_partial_check_dec
+
+Normal_block_dec:
+	lxvb16x	15, 0, 14		# load last block
+	xxlxor	47, 47, 15
+
+	# create partial block mask
+	li	15, 16
+	sub	15, 15, 12		# index to the mask
+
+	vspltisb	16, -1		# first 16 bytes - 0xffff...ff
+	vspltisb	17, 0		# second 16 bytes - 0x0000...00
+	li	10, 192
+	stvx	16, 10, 1
+	addi	10, 10, 16
+	stvx	17, 10, 1
+
+	addi	10, 1, 192
+	lxvb16x	16, 15, 10		# load partial block mask
+	xxland	47, 47, 16
+
+	xxland	32+28, 15, 16
+	#vmr	28, 15
+	ppc_update_hash_1x
+
+	# * should store only the remaining bytes.
+	bl	Write_partial_block
+
+	stxvb16x 30+32, 0, 7		# update IV
+	std	12, 56(7)		# update partial?
+	li	16, 16
+
+	stxvb16x	32, 0, 8		# write out Xi
+	stxvb16x	32, 16, 8		# write out Xi
+	b aes_gcm_out
diff --git a/arch/powerpc/crypto/aes-spe-core.S b/arch/powerpc/crypto/aes-spe-core.S
new file mode 100644
index 0000000000..8e00eccc35
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-core.S
@@ -0,0 +1,346 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Fast AES implementation for SPE instruction set (PPC)
+ *
+ * This code makes use of the SPE SIMD instruction set as defined in
+ * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
+ * Implementation is based on optimization guide notes from
+ * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include "aes-spe-regs.h"
+
+#define	EAD(in, bpos) \
+	rlwimi		rT0,in,28-((bpos+3)%4)*8,20,27;
+
+#define DAD(in, bpos) \
+	rlwimi		rT1,in,24-((bpos+3)%4)*8,24,31;
+
+#define LWH(out, off) \
+	evlwwsplat	out,off(rT0);	/* load word high		*/
+
+#define LWL(out, off) \
+	lwz		out,off(rT0);	/* load word low		*/
+
+#define LBZ(out, tab, off) \
+	lbz		out,off(tab);	/* load byte			*/
+
+#define LAH(out, in, bpos, off) \
+	EAD(in, bpos)			/* calc addr + load word high	*/ \
+	LWH(out, off)
+
+#define LAL(out, in, bpos, off) \
+	EAD(in, bpos)			/* calc addr + load word low	*/ \
+	LWL(out, off)
+
+#define LAE(out, in, bpos) \
+	EAD(in, bpos)			/* calc addr + load enc byte	*/ \
+	LBZ(out, rT0, 8)
+
+#define LBE(out) \
+	LBZ(out, rT0, 8)		/* load enc byte		*/
+
+#define LAD(out, in, bpos) \
+	DAD(in, bpos)			/* calc addr + load dec byte	*/ \
+	LBZ(out, rT1, 0)
+
+#define LBD(out) \
+	LBZ(out, rT1, 0)
+
+/*
+ * ppc_encrypt_block: The central encryption function for a single 16 bytes
+ * block. It does no stack handling or register saving to support fast calls
+ * via bl/blr. It expects that caller has pre-xored input data with first
+ * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
+ * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
+ * and rW0-rW3 and caller must execute a final xor on the output registers.
+ * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
+ *
+ */
+_GLOBAL(ppc_encrypt_block)
+	LAH(rW4, rD1, 2, 4)
+	LAH(rW6, rD0, 3, 0)
+	LAH(rW3, rD0, 1, 8)
+ppc_encrypt_block_loop:
+	LAH(rW0, rD3, 0, 12)
+	LAL(rW0, rD0, 0, 12)
+	LAH(rW1, rD1, 0, 12)
+	LAH(rW2, rD2, 1, 8)
+	LAL(rW2, rD3, 1, 8)
+	LAL(rW3, rD1, 1, 8)
+	LAL(rW4, rD2, 2, 4)
+	LAL(rW6, rD1, 3, 0)
+	LAH(rW5, rD3, 2, 4)
+	LAL(rW5, rD0, 2, 4)
+	LAH(rW7, rD2, 3, 0)
+	evldw		rD1,16(rKP)
+	EAD(rD3, 3)
+	evxor		rW2,rW2,rW4
+	LWL(rW7, 0)
+	evxor		rW2,rW2,rW6
+	EAD(rD2, 0)
+	evxor		rD1,rD1,rW2
+	LWL(rW1, 12)
+	evxor		rD1,rD1,rW0
+	evldw		rD3,24(rKP)
+	evmergehi	rD0,rD0,rD1
+	EAD(rD1, 2)
+	evxor		rW3,rW3,rW5
+	LWH(rW4, 4)
+	evxor		rW3,rW3,rW7
+	EAD(rD0, 3)
+	evxor		rD3,rD3,rW3
+	LWH(rW6, 0)
+	evxor		rD3,rD3,rW1
+	EAD(rD0, 1)
+	evmergehi	rD2,rD2,rD3
+	LWH(rW3, 8)
+	LAH(rW0, rD3, 0, 12)
+	LAL(rW0, rD0, 0, 12)
+	LAH(rW1, rD1, 0, 12)
+	LAH(rW2, rD2, 1, 8)
+	LAL(rW2, rD3, 1, 8)
+	LAL(rW3, rD1, 1, 8)
+	LAL(rW4, rD2, 2, 4)
+	LAL(rW6, rD1, 3, 0)
+	LAH(rW5, rD3, 2, 4)
+	LAL(rW5, rD0, 2, 4)
+	LAH(rW7, rD2, 3, 0)
+	evldw		rD1,32(rKP)
+	EAD(rD3, 3)
+	evxor		rW2,rW2,rW4
+	LWL(rW7, 0)
+	evxor		rW2,rW2,rW6
+	EAD(rD2, 0)
+	evxor		rD1,rD1,rW2
+	LWL(rW1, 12)
+	evxor		rD1,rD1,rW0
+	evldw		rD3,40(rKP)
+	evmergehi	rD0,rD0,rD1
+	EAD(rD1, 2)
+	evxor		rW3,rW3,rW5
+	LWH(rW4, 4)
+	evxor		rW3,rW3,rW7
+	EAD(rD0, 3)
+	evxor		rD3,rD3,rW3
+	LWH(rW6, 0)
+	evxor		rD3,rD3,rW1
+	EAD(rD0, 1)
+	evmergehi	rD2,rD2,rD3
+	LWH(rW3, 8)
+	addi		rKP,rKP,32
+	bdnz		ppc_encrypt_block_loop
+	LAH(rW0, rD3, 0, 12)
+	LAL(rW0, rD0, 0, 12)
+	LAH(rW1, rD1, 0, 12)
+	LAH(rW2, rD2, 1, 8)
+	LAL(rW2, rD3, 1, 8)
+	LAL(rW3, rD1, 1, 8)
+	LAL(rW4, rD2, 2, 4)
+	LAH(rW5, rD3, 2, 4)
+	LAL(rW6, rD1, 3, 0)
+	LAL(rW5, rD0, 2, 4)
+	LAH(rW7, rD2, 3, 0)
+	evldw		rD1,16(rKP)
+	EAD(rD3, 3)
+	evxor		rW2,rW2,rW4
+	LWL(rW7, 0)
+	evxor		rW2,rW2,rW6
+	EAD(rD2, 0)
+	evxor		rD1,rD1,rW2
+	LWL(rW1, 12)
+	evxor		rD1,rD1,rW0
+	evldw		rD3,24(rKP)
+	evmergehi	rD0,rD0,rD1
+	EAD(rD1, 0)
+	evxor		rW3,rW3,rW5
+	LBE(rW2)
+	evxor		rW3,rW3,rW7
+	EAD(rD0, 1)
+	evxor		rD3,rD3,rW3
+	LBE(rW6)
+	evxor		rD3,rD3,rW1
+	EAD(rD0, 0)
+	evmergehi	rD2,rD2,rD3
+	LBE(rW1)
+	LAE(rW0, rD3, 0)
+	LAE(rW1, rD0, 0)
+	LAE(rW4, rD2, 1)
+	LAE(rW5, rD3, 1)
+	LAE(rW3, rD2, 0)
+	LAE(rW7, rD1, 1)
+	rlwimi		rW0,rW4,8,16,23
+	rlwimi		rW1,rW5,8,16,23
+	LAE(rW4, rD1, 2)
+	LAE(rW5, rD2, 2)
+	rlwimi		rW2,rW6,8,16,23
+	rlwimi		rW3,rW7,8,16,23
+	LAE(rW6, rD3, 2)
+	LAE(rW7, rD0, 2)
+	rlwimi		rW0,rW4,16,8,15
+	rlwimi		rW1,rW5,16,8,15
+	LAE(rW4, rD0, 3)
+	LAE(rW5, rD1, 3)
+	rlwimi		rW2,rW6,16,8,15
+	lwz		rD0,32(rKP)
+	rlwimi		rW3,rW7,16,8,15
+	lwz		rD1,36(rKP)
+	LAE(rW6, rD2, 3)
+	LAE(rW7, rD3, 3)
+	rlwimi		rW0,rW4,24,0,7
+	lwz		rD2,40(rKP)
+	rlwimi		rW1,rW5,24,0,7
+	lwz		rD3,44(rKP)
+	rlwimi		rW2,rW6,24,0,7
+	rlwimi		rW3,rW7,24,0,7
+	blr
+
+/*
+ * ppc_decrypt_block: The central decryption function for a single 16 bytes
+ * block. It does no stack handling or register saving to support fast calls
+ * via bl/blr. It expects that caller has pre-xored input data with first
+ * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
+ * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
+ * and rW0-rW3 and caller must execute a final xor on the output registers.
+ * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
+ *
+ */
+_GLOBAL(ppc_decrypt_block)
+	LAH(rW0, rD1, 0, 12)
+	LAH(rW6, rD0, 3, 0)
+	LAH(rW3, rD0, 1, 8)
+ppc_decrypt_block_loop:
+	LAH(rW1, rD3, 0, 12)
+	LAL(rW0, rD2, 0, 12)
+	LAH(rW2, rD2, 1, 8)
+	LAL(rW2, rD3, 1, 8)
+	LAH(rW4, rD3, 2, 4)
+	LAL(rW4, rD0, 2, 4)
+	LAL(rW6, rD1, 3, 0)
+	LAH(rW5, rD1, 2, 4)
+	LAH(rW7, rD2, 3, 0)
+	LAL(rW7, rD3, 3, 0)
+	LAL(rW3, rD1, 1, 8)
+	evldw		rD1,16(rKP)
+	EAD(rD0, 0)
+	evxor		rW4,rW4,rW6
+	LWL(rW1, 12)
+	evxor		rW0,rW0,rW4
+	EAD(rD2, 2)
+	evxor		rW0,rW0,rW2
+	LWL(rW5, 4)
+	evxor		rD1,rD1,rW0
+	evldw		rD3,24(rKP)
+	evmergehi	rD0,rD0,rD1
+	EAD(rD1, 0)
+	evxor		rW3,rW3,rW7
+	LWH(rW0, 12)
+	evxor		rW3,rW3,rW1
+	EAD(rD0, 3)
+	evxor		rD3,rD3,rW3
+	LWH(rW6, 0)
+	evxor		rD3,rD3,rW5
+	EAD(rD0, 1)
+	evmergehi	rD2,rD2,rD3
+	LWH(rW3, 8)
+	LAH(rW1, rD3, 0, 12)
+	LAL(rW0, rD2, 0, 12)
+	LAH(rW2, rD2, 1, 8)
+	LAL(rW2, rD3, 1, 8)
+	LAH(rW4, rD3, 2, 4)
+	LAL(rW4, rD0, 2, 4)
+	LAL(rW6, rD1, 3, 0)
+	LAH(rW5, rD1, 2, 4)
+	LAH(rW7, rD2, 3, 0)
+	LAL(rW7, rD3, 3, 0)
+	LAL(rW3, rD1, 1, 8)
+	evldw		 rD1,32(rKP)
+	EAD(rD0, 0)
+	evxor		rW4,rW4,rW6
+	LWL(rW1, 12)
+	evxor		rW0,rW0,rW4
+	EAD(rD2, 2)
+	evxor		rW0,rW0,rW2
+	LWL(rW5, 4)
+	evxor		rD1,rD1,rW0
+	evldw		rD3,40(rKP)
+	evmergehi	rD0,rD0,rD1
+	EAD(rD1, 0)
+	evxor		rW3,rW3,rW7
+	LWH(rW0, 12)
+	evxor		rW3,rW3,rW1
+	EAD(rD0, 3)
+	evxor		rD3,rD3,rW3
+	LWH(rW6, 0)
+	evxor		rD3,rD3,rW5
+	EAD(rD0, 1)
+	evmergehi	rD2,rD2,rD3
+	LWH(rW3, 8)
+	addi		rKP,rKP,32
+	bdnz		ppc_decrypt_block_loop
+	LAH(rW1, rD3, 0, 12)
+	LAL(rW0, rD2, 0, 12)
+	LAH(rW2, rD2, 1, 8)
+	LAL(rW2, rD3, 1, 8)
+	LAH(rW4, rD3, 2, 4)
+	LAL(rW4, rD0, 2, 4)
+	LAL(rW6, rD1, 3, 0)
+	LAH(rW5, rD1, 2, 4)
+	LAH(rW7, rD2, 3, 0)
+	LAL(rW7, rD3, 3, 0)
+	LAL(rW3, rD1, 1, 8)
+	evldw		 rD1,16(rKP)
+	EAD(rD0, 0)
+	evxor		rW4,rW4,rW6
+	LWL(rW1, 12)
+	evxor		rW0,rW0,rW4
+	EAD(rD2, 2)
+	evxor		rW0,rW0,rW2
+	LWL(rW5, 4)
+	evxor		rD1,rD1,rW0
+	evldw		rD3,24(rKP)
+	evmergehi	rD0,rD0,rD1
+	DAD(rD1, 0)
+	evxor		rW3,rW3,rW7
+	LBD(rW0)
+	evxor		rW3,rW3,rW1
+	DAD(rD0, 1)
+	evxor		rD3,rD3,rW3
+	LBD(rW6)
+	evxor		rD3,rD3,rW5
+	DAD(rD0, 0)
+	evmergehi	rD2,rD2,rD3
+	LBD(rW3)
+	LAD(rW2, rD3, 0)
+	LAD(rW1, rD2, 0)
+	LAD(rW4, rD2, 1)
+	LAD(rW5, rD3, 1)
+	LAD(rW7, rD1, 1)
+	rlwimi		rW0,rW4,8,16,23
+	rlwimi		rW1,rW5,8,16,23
+	LAD(rW4, rD3, 2)
+	LAD(rW5, rD0, 2)
+	rlwimi		rW2,rW6,8,16,23
+	rlwimi		rW3,rW7,8,16,23
+	LAD(rW6, rD1, 2)
+	LAD(rW7, rD2, 2)
+	rlwimi		rW0,rW4,16,8,15
+	rlwimi		rW1,rW5,16,8,15
+	LAD(rW4, rD0, 3)
+	LAD(rW5, rD1, 3)
+	rlwimi		rW2,rW6,16,8,15
+	lwz		rD0,32(rKP)
+	rlwimi		rW3,rW7,16,8,15
+	lwz		rD1,36(rKP)
+	LAD(rW6, rD2, 3)
+	LAD(rW7, rD3, 3)
+	rlwimi		rW0,rW4,24,0,7
+	lwz		rD2,40(rKP)
+	rlwimi		rW1,rW5,24,0,7
+	lwz		rD3,44(rKP)
+	rlwimi		rW2,rW6,24,0,7
+	rlwimi		rW3,rW7,24,0,7
+	blr
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
new file mode 100644
index 0000000000..efab78a3a8
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Glue code for AES implementation for SPE instructions (PPC)
+ *
+ * Based on generic implementation. The assembler module takes care
+ * about the SPE registers so it can run from interrupt context.
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#include <crypto/aes.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/crypto.h>
+#include <asm/byteorder.h>
+#include <asm/switch_to.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/xts.h>
+#include <crypto/gf128mul.h>
+#include <crypto/scatterwalk.h>
+
+/*
+ * MAX_BYTES defines the number of bytes that are allowed to be processed
+ * between preempt_disable() and preempt_enable(). e500 cores can issue two
+ * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32
+ * bit unit (SU2). One of these can be a memory access that is executed via
+ * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per
+ * 16 byte block or 25 cycles per byte. Thus 768 bytes of input data
+ * will need an estimated maximum of 20,000 cycles. Headroom for cache misses
+ * included. Even with the low end model clocked at 667 MHz this equals to a
+ * critical time window of less than 30us. The value has been chosen to
+ * process a 512 byte disk block in one or a large 1400 bytes IPsec network
+ * packet in two runs.
+ *
+ */
+#define MAX_BYTES 768
+
+struct ppc_aes_ctx {
+	u32 key_enc[AES_MAX_KEYLENGTH_U32];
+	u32 key_dec[AES_MAX_KEYLENGTH_U32];
+	u32 rounds;
+};
+
+struct ppc_xts_ctx {
+	u32 key_enc[AES_MAX_KEYLENGTH_U32];
+	u32 key_dec[AES_MAX_KEYLENGTH_U32];
+	u32 key_twk[AES_MAX_KEYLENGTH_U32];
+	u32 rounds;
+};
+
+extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds);
+extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds);
+extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+			    u32 bytes);
+extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
+			    u32 bytes);
+extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+			    u32 bytes, u8 *iv);
+extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
+			    u32 bytes, u8 *iv);
+extern void ppc_crypt_ctr  (u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+			    u32 bytes, u8 *iv);
+extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+			    u32 bytes, u8 *iv, u32 *key_twk);
+extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
+			    u32 bytes, u8 *iv, u32 *key_twk);
+
+extern void ppc_expand_key_128(u32 *key_enc, const u8 *key);
+extern void ppc_expand_key_192(u32 *key_enc, const u8 *key);
+extern void ppc_expand_key_256(u32 *key_enc, const u8 *key);
+
+extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc,
+				     unsigned int key_len);
+
+static void spe_begin(void)
+{
+	/* disable preemption and save users SPE registers if required */
+	preempt_disable();
+	enable_kernel_spe();
+}
+
+static void spe_end(void)
+{
+	disable_kernel_spe();
+	/* reenable preemption */
+	preempt_enable();
+}
+
+static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+		unsigned int key_len)
+{
+	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	switch (key_len) {
+	case AES_KEYSIZE_128:
+		ctx->rounds = 4;
+		ppc_expand_key_128(ctx->key_enc, in_key);
+		break;
+	case AES_KEYSIZE_192:
+		ctx->rounds = 5;
+		ppc_expand_key_192(ctx->key_enc, in_key);
+		break;
+	case AES_KEYSIZE_256:
+		ctx->rounds = 6;
+		ppc_expand_key_256(ctx->key_enc, in_key);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
+
+	return 0;
+}
+
+static int ppc_aes_setkey_skcipher(struct crypto_skcipher *tfm,
+				   const u8 *in_key, unsigned int key_len)
+{
+	return ppc_aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len);
+}
+
+static int ppc_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+		   unsigned int key_len)
+{
+	struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = xts_verify_key(tfm, in_key, key_len);
+	if (err)
+		return err;
+
+	key_len >>= 1;
+
+	switch (key_len) {
+	case AES_KEYSIZE_128:
+		ctx->rounds = 4;
+		ppc_expand_key_128(ctx->key_enc, in_key);
+		ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128);
+		break;
+	case AES_KEYSIZE_192:
+		ctx->rounds = 5;
+		ppc_expand_key_192(ctx->key_enc, in_key);
+		ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192);
+		break;
+	case AES_KEYSIZE_256:
+		ctx->rounds = 6;
+		ppc_expand_key_256(ctx->key_enc, in_key);
+		ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
+
+	return 0;
+}
+
+static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	spe_begin();
+	ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds);
+	spe_end();
+}
+
+static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	spe_begin();
+	ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds);
+	spe_end();
+}
+
+static int ppc_ecb_crypt(struct skcipher_request *req, bool enc)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	while ((nbytes = walk.nbytes) != 0) {
+		nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+		nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+
+		spe_begin();
+		if (enc)
+			ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_enc, ctx->rounds, nbytes);
+		else
+			ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_dec, ctx->rounds, nbytes);
+		spe_end();
+
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+	}
+
+	return err;
+}
+
+static int ppc_ecb_encrypt(struct skcipher_request *req)
+{
+	return ppc_ecb_crypt(req, true);
+}
+
+static int ppc_ecb_decrypt(struct skcipher_request *req)
+{
+	return ppc_ecb_crypt(req, false);
+}
+
+static int ppc_cbc_crypt(struct skcipher_request *req, bool enc)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	while ((nbytes = walk.nbytes) != 0) {
+		nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+		nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+
+		spe_begin();
+		if (enc)
+			ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_enc, ctx->rounds, nbytes,
+					walk.iv);
+		else
+			ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_dec, ctx->rounds, nbytes,
+					walk.iv);
+		spe_end();
+
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+	}
+
+	return err;
+}
+
+static int ppc_cbc_encrypt(struct skcipher_request *req)
+{
+	return ppc_cbc_crypt(req, true);
+}
+
+static int ppc_cbc_decrypt(struct skcipher_request *req)
+{
+	return ppc_cbc_crypt(req, false);
+}
+
+static int ppc_ctr_crypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	while ((nbytes = walk.nbytes) != 0) {
+		nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+
+		spe_begin();
+		ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
+			      ctx->key_enc, ctx->rounds, nbytes, walk.iv);
+		spe_end();
+
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+	}
+
+	return err;
+}
+
+static int ppc_xts_crypt(struct skcipher_request *req, bool enc)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
+	int err;
+	u32 *twk;
+
+	err = skcipher_walk_virt(&walk, req, false);
+	twk = ctx->key_twk;
+
+	while ((nbytes = walk.nbytes) != 0) {
+		nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+		nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+
+		spe_begin();
+		if (enc)
+			ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_enc, ctx->rounds, nbytes,
+					walk.iv, twk);
+		else
+			ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+					ctx->key_dec, ctx->rounds, nbytes,
+					walk.iv, twk);
+		spe_end();
+
+		twk = NULL;
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+	}
+
+	return err;
+}
+
+static int ppc_xts_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int tail = req->cryptlen % AES_BLOCK_SIZE;
+	int offset = req->cryptlen - tail - AES_BLOCK_SIZE;
+	struct skcipher_request subreq;
+	u8 b[2][AES_BLOCK_SIZE];
+	int err;
+
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (tail) {
+		subreq = *req;
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   req->cryptlen - tail, req->iv);
+		req = &subreq;
+	}
+
+	err = ppc_xts_crypt(req, true);
+	if (err || !tail)
+		return err;
+
+	scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE, 0);
+	memcpy(b[1], b[0], tail);
+	scatterwalk_map_and_copy(b[0], req->src, offset + AES_BLOCK_SIZE, tail, 0);
+
+	spe_begin();
+	ppc_encrypt_xts(b[0], b[0], ctx->key_enc, ctx->rounds, AES_BLOCK_SIZE,
+			req->iv, NULL);
+	spe_end();
+
+	scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1);
+
+	return 0;
+}
+
+static int ppc_xts_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int tail = req->cryptlen % AES_BLOCK_SIZE;
+	int offset = req->cryptlen - tail - AES_BLOCK_SIZE;
+	struct skcipher_request subreq;
+	u8 b[3][AES_BLOCK_SIZE];
+	le128 twk;
+	int err;
+
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (tail) {
+		subreq = *req;
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   offset, req->iv);
+		req = &subreq;
+	}
+
+	err = ppc_xts_crypt(req, false);
+	if (err || !tail)
+		return err;
+
+	scatterwalk_map_and_copy(b[1], req->src, offset, AES_BLOCK_SIZE + tail, 0);
+
+	spe_begin();
+	if (!offset)
+		ppc_encrypt_ecb(req->iv, req->iv, ctx->key_twk, ctx->rounds,
+				AES_BLOCK_SIZE);
+
+	gf128mul_x_ble(&twk, (le128 *)req->iv);
+
+	ppc_decrypt_xts(b[1], b[1], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE,
+			(u8 *)&twk, NULL);
+	memcpy(b[0], b[2], tail);
+	memcpy(b[0] + tail, b[1] + tail, AES_BLOCK_SIZE - tail);
+	ppc_decrypt_xts(b[0], b[0], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE,
+			req->iv, NULL);
+	spe_end();
+
+	scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1);
+
+	return 0;
+}
+
+/*
+ * Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen
+ * because the e500 platform can handle unaligned reads/writes very efficiently.
+ * This improves IPsec thoughput by another few percent. Additionally we assume
+ * that AES context is always aligned to at least 8 bytes because it is created
+ * with kmalloc() in the crypto infrastructure
+ */
+
+static struct crypto_alg aes_cipher_alg = {
+	.cra_name		=	"aes",
+	.cra_driver_name	=	"aes-ppc-spe",
+	.cra_priority		=	300,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct ppc_aes_ctx),
+	.cra_alignmask		=	0,
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
+			.cia_max_keysize	=	AES_MAX_KEY_SIZE,
+			.cia_setkey		=	ppc_aes_setkey,
+			.cia_encrypt		=	ppc_aes_encrypt,
+			.cia_decrypt		=	ppc_aes_decrypt
+		}
+	}
+};
+
+static struct skcipher_alg aes_skcipher_algs[] = {
+	{
+		.base.cra_name		=	"ecb(aes)",
+		.base.cra_driver_name	=	"ecb-ppc-spe",
+		.base.cra_priority	=	300,
+		.base.cra_blocksize	=	AES_BLOCK_SIZE,
+		.base.cra_ctxsize	=	sizeof(struct ppc_aes_ctx),
+		.base.cra_module	=	THIS_MODULE,
+		.min_keysize		=	AES_MIN_KEY_SIZE,
+		.max_keysize		=	AES_MAX_KEY_SIZE,
+		.setkey			=	ppc_aes_setkey_skcipher,
+		.encrypt		=	ppc_ecb_encrypt,
+		.decrypt		=	ppc_ecb_decrypt,
+	}, {
+		.base.cra_name		=	"cbc(aes)",
+		.base.cra_driver_name	=	"cbc-ppc-spe",
+		.base.cra_priority	=	300,
+		.base.cra_blocksize	=	AES_BLOCK_SIZE,
+		.base.cra_ctxsize	=	sizeof(struct ppc_aes_ctx),
+		.base.cra_module	=	THIS_MODULE,
+		.min_keysize		=	AES_MIN_KEY_SIZE,
+		.max_keysize		=	AES_MAX_KEY_SIZE,
+		.ivsize			=	AES_BLOCK_SIZE,
+		.setkey			=	ppc_aes_setkey_skcipher,
+		.encrypt		=	ppc_cbc_encrypt,
+		.decrypt		=	ppc_cbc_decrypt,
+	}, {
+		.base.cra_name		=	"ctr(aes)",
+		.base.cra_driver_name	=	"ctr-ppc-spe",
+		.base.cra_priority	=	300,
+		.base.cra_blocksize	=	1,
+		.base.cra_ctxsize	=	sizeof(struct ppc_aes_ctx),
+		.base.cra_module	=	THIS_MODULE,
+		.min_keysize		=	AES_MIN_KEY_SIZE,
+		.max_keysize		=	AES_MAX_KEY_SIZE,
+		.ivsize			=	AES_BLOCK_SIZE,
+		.setkey			=	ppc_aes_setkey_skcipher,
+		.encrypt		=	ppc_ctr_crypt,
+		.decrypt		=	ppc_ctr_crypt,
+		.chunksize		=	AES_BLOCK_SIZE,
+	}, {
+		.base.cra_name		=	"xts(aes)",
+		.base.cra_driver_name	=	"xts-ppc-spe",
+		.base.cra_priority	=	300,
+		.base.cra_blocksize	=	AES_BLOCK_SIZE,
+		.base.cra_ctxsize	=	sizeof(struct ppc_xts_ctx),
+		.base.cra_module	=	THIS_MODULE,
+		.min_keysize		=	AES_MIN_KEY_SIZE * 2,
+		.max_keysize		=	AES_MAX_KEY_SIZE * 2,
+		.ivsize			=	AES_BLOCK_SIZE,
+		.setkey			=	ppc_xts_setkey,
+		.encrypt		=	ppc_xts_encrypt,
+		.decrypt		=	ppc_xts_decrypt,
+	}
+};
+
+static int __init ppc_aes_mod_init(void)
+{
+	int err;
+
+	err = crypto_register_alg(&aes_cipher_alg);
+	if (err)
+		return err;
+
+	err = crypto_register_skciphers(aes_skcipher_algs,
+					ARRAY_SIZE(aes_skcipher_algs));
+	if (err)
+		crypto_unregister_alg(&aes_cipher_alg);
+	return err;
+}
+
+static void __exit ppc_aes_mod_fini(void)
+{
+	crypto_unregister_alg(&aes_cipher_alg);
+	crypto_unregister_skciphers(aes_skcipher_algs,
+				    ARRAY_SIZE(aes_skcipher_algs));
+}
+
+module_init(ppc_aes_mod_init);
+module_exit(ppc_aes_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized");
+
+MODULE_ALIAS_CRYPTO("aes");
+MODULE_ALIAS_CRYPTO("ecb(aes)");
+MODULE_ALIAS_CRYPTO("cbc(aes)");
+MODULE_ALIAS_CRYPTO("ctr(aes)");
+MODULE_ALIAS_CRYPTO("xts(aes)");
+MODULE_ALIAS_CRYPTO("aes-ppc-spe");
diff --git a/arch/powerpc/crypto/aes-spe-keys.S b/arch/powerpc/crypto/aes-spe-keys.S
new file mode 100644
index 0000000000..2e1bc0d099
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-keys.S
@@ -0,0 +1,278 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Key handling functions for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#include <asm/ppc_asm.h>
+
+#ifdef __BIG_ENDIAN__
+#define LOAD_KEY(d, s, off) \
+	lwz		d,off(s);
+#else
+#define LOAD_KEY(d, s, off) \
+	li		r0,off; \
+	lwbrx		d,s,r0;
+#endif
+
+#define INITIALIZE_KEY \
+	stwu		r1,-32(r1);	/* create stack frame		*/ \
+	stw		r14,8(r1);	/* save registers		*/ \
+	stw		r15,12(r1);					   \
+	stw		r16,16(r1);
+
+#define FINALIZE_KEY \
+	lwz		r14,8(r1);	/* restore registers		*/ \
+	lwz		r15,12(r1);					   \
+	lwz		r16,16(r1);					   \
+	xor		r5,r5,r5;	/* clear sensitive data		*/ \
+	xor		r6,r6,r6;					   \
+	xor		r7,r7,r7;					   \
+	xor		r8,r8,r8;					   \
+	xor		r9,r9,r9;					   \
+	xor		r10,r10,r10;					   \
+	xor		r11,r11,r11;					   \
+	xor		r12,r12,r12;					   \
+	addi		r1,r1,32;	/* cleanup stack		*/
+
+#define LS_BOX(r, t1, t2) \
+	lis		t2,PPC_AES_4K_ENCTAB@h;				   \
+	ori		t2,t2,PPC_AES_4K_ENCTAB@l;			   \
+	rlwimi		t2,r,4,20,27;					   \
+	lbz		t1,8(t2);					   \
+	rlwimi		r,t1,0,24,31;					   \
+	rlwimi		t2,r,28,20,27;					   \
+	lbz		t1,8(t2);					   \
+	rlwimi		r,t1,8,16,23;					   \
+	rlwimi		t2,r,20,20,27;					   \
+	lbz		t1,8(t2);					   \
+	rlwimi		r,t1,16,8,15;					   \
+	rlwimi		t2,r,12,20,27;					   \
+	lbz		t1,8(t2);					   \
+	rlwimi		r,t1,24,0,7;
+
+#define GF8_MUL(out, in, t1, t2) \
+	lis t1,0x8080;			/* multiplication in GF8	*/ \
+	ori t1,t1,0x8080; 						   \
+	and t1,t1,in; 							   \
+	srwi t1,t1,7; 							   \
+	mulli t1,t1,0x1b; 						   \
+	lis t2,0x7f7f; 							   \
+	ori t2,t2,0x7f7f; 						   \
+	and t2,t2,in; 							   \
+	slwi t2,t2,1; 							   \
+	xor out,t1,t2;
+
+/*
+ * ppc_expand_key_128(u32 *key_enc, const u8 *key)
+ *
+ * Expand 128 bit key into 176 bytes encryption key. It consists of
+ * key itself plus 10 rounds with 16 bytes each
+ *
+ */
+_GLOBAL(ppc_expand_key_128)
+	INITIALIZE_KEY
+	LOAD_KEY(r5,r4,0)
+	LOAD_KEY(r6,r4,4)
+	LOAD_KEY(r7,r4,8)
+	LOAD_KEY(r8,r4,12)
+	stw		r5,0(r3)	/* key[0..3] = input data	*/
+	stw		r6,4(r3)
+	stw		r7,8(r3)
+	stw		r8,12(r3)
+	li		r16,10		/* 10 expansion rounds		*/
+	lis		r0,0x0100	/* RCO(1)			*/
+ppc_expand_128_loop:
+	addi		r3,r3,16
+	mr		r14,r8		/* apply LS_BOX to 4th temp	*/
+	rotlwi		r14,r14,8
+	LS_BOX(r14, r15, r4)
+	xor		r14,r14,r0
+	xor		r5,r5,r14	/* xor next 4 keys		*/
+	xor		r6,r6,r5
+	xor		r7,r7,r6
+	xor		r8,r8,r7
+	stw		r5,0(r3)	/* store next 4 keys		*/
+	stw		r6,4(r3)
+	stw		r7,8(r3)
+	stw		r8,12(r3)
+	GF8_MUL(r0, r0, r4, r14)	/* multiply RCO by 2 in GF	*/
+	subi		r16,r16,1
+	cmpwi		r16,0
+	bt		eq,ppc_expand_128_end
+	b		ppc_expand_128_loop
+ppc_expand_128_end:
+	FINALIZE_KEY
+	blr
+
+/*
+ * ppc_expand_key_192(u32 *key_enc, const u8 *key)
+ *
+ * Expand 192 bit key into 208 bytes encryption key. It consists of key
+ * itself plus 12 rounds with 16 bytes each
+ *
+ */
+_GLOBAL(ppc_expand_key_192)
+	INITIALIZE_KEY
+	LOAD_KEY(r5,r4,0)
+	LOAD_KEY(r6,r4,4)
+	LOAD_KEY(r7,r4,8)
+	LOAD_KEY(r8,r4,12)
+	LOAD_KEY(r9,r4,16)
+	LOAD_KEY(r10,r4,20)
+	stw		r5,0(r3)
+	stw		r6,4(r3)
+	stw		r7,8(r3)
+	stw		r8,12(r3)
+	stw		r9,16(r3)
+	stw		r10,20(r3)
+	li		r16,8		/* 8 expansion rounds		*/
+	lis		r0,0x0100	/* RCO(1)			*/
+ppc_expand_192_loop:
+	addi		r3,r3,24
+	mr		r14,r10		/* apply LS_BOX to 6th temp	*/
+	rotlwi		r14,r14,8
+	LS_BOX(r14, r15, r4)
+	xor		r14,r14,r0
+	xor		r5,r5,r14	/* xor next 6 keys		*/
+	xor		r6,r6,r5
+	xor		r7,r7,r6
+	xor		r8,r8,r7
+	xor		r9,r9,r8
+	xor		r10,r10,r9
+	stw		r5,0(r3)
+	stw		r6,4(r3)
+	stw		r7,8(r3)
+	stw		r8,12(r3)
+	subi		r16,r16,1
+	cmpwi		r16,0		/* last round early kick out	*/
+	bt		eq,ppc_expand_192_end
+	stw		r9,16(r3)
+	stw		r10,20(r3)
+	GF8_MUL(r0, r0, r4, r14)	/* multiply RCO GF8		*/
+	b		ppc_expand_192_loop
+ppc_expand_192_end:
+	FINALIZE_KEY
+	blr
+
+/*
+ * ppc_expand_key_256(u32 *key_enc, const u8 *key)
+ *
+ * Expand 256 bit key into 240 bytes encryption key. It consists of key
+ * itself plus 14 rounds with 16 bytes each
+ *
+ */
+_GLOBAL(ppc_expand_key_256)
+	INITIALIZE_KEY
+	LOAD_KEY(r5,r4,0)
+	LOAD_KEY(r6,r4,4)
+	LOAD_KEY(r7,r4,8)
+	LOAD_KEY(r8,r4,12)
+	LOAD_KEY(r9,r4,16)
+	LOAD_KEY(r10,r4,20)
+	LOAD_KEY(r11,r4,24)
+	LOAD_KEY(r12,r4,28)
+	stw		r5,0(r3)
+	stw		r6,4(r3)
+	stw		r7,8(r3)
+	stw		r8,12(r3)
+	stw		r9,16(r3)
+	stw		r10,20(r3)
+	stw		r11,24(r3)
+	stw		r12,28(r3)
+	li		r16,7		/* 7 expansion rounds		*/
+	lis		r0,0x0100	/* RCO(1)			*/
+ppc_expand_256_loop:
+	addi		r3,r3,32
+	mr		r14,r12		/* apply LS_BOX to 8th temp	*/
+	rotlwi		r14,r14,8
+	LS_BOX(r14, r15, r4)
+	xor		r14,r14,r0
+	xor		r5,r5,r14	/* xor 4 keys			*/
+	xor		r6,r6,r5
+	xor		r7,r7,r6
+	xor		r8,r8,r7
+	mr		r14,r8
+	LS_BOX(r14, r15, r4)		/* apply LS_BOX to 4th temp	*/
+	xor		r9,r9,r14	/* xor 4 keys			*/
+	xor		r10,r10,r9
+	xor		r11,r11,r10
+	xor		r12,r12,r11
+	stw		r5,0(r3)
+	stw		r6,4(r3)
+	stw		r7,8(r3)
+	stw		r8,12(r3)
+	subi		r16,r16,1
+	cmpwi		r16,0		/* last round early kick out	*/
+	bt		eq,ppc_expand_256_end
+	stw		r9,16(r3)
+	stw		r10,20(r3)
+	stw		r11,24(r3)
+	stw		r12,28(r3)
+	GF8_MUL(r0, r0, r4, r14)
+	b		ppc_expand_256_loop
+ppc_expand_256_end:
+	FINALIZE_KEY
+	blr
+
+/*
+ * ppc_generate_decrypt_key: derive decryption key from encryption key
+ * number of bytes to handle are calculated from length of key (16/24/32)
+ *
+ */
+_GLOBAL(ppc_generate_decrypt_key)
+	addi		r6,r5,24
+	slwi		r6,r6,2
+	lwzx		r7,r4,r6	/* first/last 4 words are same	*/
+	stw		r7,0(r3)
+	lwz		r7,0(r4)
+	stwx		r7,r3,r6
+	addi		r6,r6,4
+	lwzx		r7,r4,r6
+	stw		r7,4(r3)
+	lwz		r7,4(r4)
+	stwx		r7,r3,r6
+	addi		r6,r6,4
+	lwzx		r7,r4,r6
+	stw		r7,8(r3)
+	lwz		r7,8(r4)
+	stwx		r7,r3,r6
+	addi		r6,r6,4
+	lwzx		r7,r4,r6
+	stw		r7,12(r3)
+	lwz		r7,12(r4)
+	stwx		r7,r3,r6
+	addi		r3,r3,16
+	add		r4,r4,r6
+	subi		r4,r4,28
+	addi		r5,r5,20
+	srwi		r5,r5,2
+ppc_generate_decrypt_block:
+	li	r6,4
+	mtctr	r6
+ppc_generate_decrypt_word:
+	lwz		r6,0(r4)
+	GF8_MUL(r7, r6, r0, r7)
+	GF8_MUL(r8, r7, r0, r8)
+	GF8_MUL(r9, r8, r0, r9)
+	xor		r10,r9,r6
+	xor		r11,r7,r8
+	xor		r11,r11,r9
+	xor		r12,r7,r10
+	rotrwi		r12,r12,24
+	xor		r11,r11,r12
+	xor		r12,r8,r10
+	rotrwi		r12,r12,16
+	xor		r11,r11,r12
+	rotrwi		r12,r10,8
+	xor		r11,r11,r12
+	stw		r11,0(r3)
+	addi		r3,r3,4
+	addi		r4,r4,4
+	bdnz		ppc_generate_decrypt_word
+	subi		r4,r4,32
+	subi		r5,r5,1
+	cmpwi		r5,0
+	bt		gt,ppc_generate_decrypt_block
+	blr
diff --git a/arch/powerpc/crypto/aes-spe-modes.S b/arch/powerpc/crypto/aes-spe-modes.S
new file mode 100644
index 0000000000..3f92a6a857
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-modes.S
@@ -0,0 +1,625 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include "aes-spe-regs.h"
+
+#ifdef __BIG_ENDIAN__			/* Macros for big endian builds	*/
+
+#define LOAD_DATA(reg, off) \
+	lwz		reg,off(rSP);	/* load with offset		*/
+#define SAVE_DATA(reg, off) \
+	stw		reg,off(rDP);	/* save with offset		*/
+#define NEXT_BLOCK \
+	addi		rSP,rSP,16;	/* increment pointers per bloc	*/ \
+	addi		rDP,rDP,16;
+#define LOAD_IV(reg, off) \
+	lwz		reg,off(rIP);	/* IV loading with offset	*/
+#define SAVE_IV(reg, off) \
+	stw		reg,off(rIP);	/* IV saving with offset	*/
+#define START_IV			/* nothing to reset		*/
+#define CBC_DEC 16			/* CBC decrement per block	*/
+#define CTR_DEC 1			/* CTR decrement one byte	*/
+
+#else					/* Macros for little endian	*/
+
+#define LOAD_DATA(reg, off) \
+	lwbrx		reg,0,rSP;	/* load reversed		*/ \
+	addi		rSP,rSP,4;	/* and increment pointer	*/
+#define SAVE_DATA(reg, off) \
+	stwbrx		reg,0,rDP;	/* save reversed		*/ \
+	addi		rDP,rDP,4;	/* and increment pointer	*/
+#define NEXT_BLOCK			/* nothing todo			*/
+#define LOAD_IV(reg, off) \
+	lwbrx		reg,0,rIP;	/* load reversed		*/ \
+	addi		rIP,rIP,4;	/* and increment pointer	*/
+#define SAVE_IV(reg, off) \
+	stwbrx		reg,0,rIP;	/* load reversed		*/ \
+	addi		rIP,rIP,4;	/* and increment pointer	*/
+#define START_IV \
+	subi		rIP,rIP,16;	/* must reset pointer		*/
+#define CBC_DEC 32			/* 2 blocks because of incs	*/
+#define CTR_DEC 17			/* 1 block because of incs	*/
+
+#endif
+
+#define SAVE_0_REGS
+#define LOAD_0_REGS
+
+#define SAVE_4_REGS \
+	stw		rI0,96(r1);	/* save 32 bit registers	*/ \
+	stw		rI1,100(r1);					   \
+	stw		rI2,104(r1);					   \
+	stw		rI3,108(r1);
+
+#define LOAD_4_REGS \
+	lwz		rI0,96(r1);	/* restore 32 bit registers	*/ \
+	lwz		rI1,100(r1);					   \
+	lwz		rI2,104(r1);					   \
+	lwz		rI3,108(r1);
+
+#define SAVE_8_REGS \
+	SAVE_4_REGS							   \
+	stw		rG0,112(r1);	/* save 32 bit registers	*/ \
+	stw		rG1,116(r1);					   \
+	stw		rG2,120(r1);					   \
+	stw		rG3,124(r1);
+
+#define LOAD_8_REGS \
+	LOAD_4_REGS							   \
+	lwz		rG0,112(r1);	/* restore 32 bit registers	*/ \
+	lwz		rG1,116(r1);					   \
+	lwz		rG2,120(r1);					   \
+	lwz		rG3,124(r1);
+
+#define INITIALIZE_CRYPT(tab,nr32bitregs) \
+	mflr		r0;						   \
+	stwu		r1,-160(r1);	/* create stack frame		*/ \
+	lis		rT0,tab@h;	/* en-/decryption table pointer	*/ \
+	stw		r0,8(r1);	/* save link register		*/ \
+	ori		rT0,rT0,tab@l;					   \
+	evstdw		r14,16(r1);					   \
+	mr		rKS,rKP;					   \
+	evstdw		r15,24(r1);	/* We must save non volatile	*/ \
+	evstdw		r16,32(r1);	/* registers. Take the chance	*/ \
+	evstdw		r17,40(r1);	/* and save the SPE part too	*/ \
+	evstdw		r18,48(r1);					   \
+	evstdw		r19,56(r1);					   \
+	evstdw		r20,64(r1);					   \
+	evstdw		r21,72(r1);					   \
+	evstdw		r22,80(r1);					   \
+	evstdw		r23,88(r1);					   \
+	SAVE_##nr32bitregs##_REGS
+
+#define FINALIZE_CRYPT(nr32bitregs) \
+	lwz		r0,8(r1);					   \
+	evldw		r14,16(r1);	/* restore SPE registers	*/ \
+	evldw		r15,24(r1);					   \
+	evldw		r16,32(r1);					   \
+	evldw		r17,40(r1);					   \
+	evldw		r18,48(r1);					   \
+	evldw		r19,56(r1);					   \
+	evldw		r20,64(r1);					   \
+	evldw		r21,72(r1);					   \
+	evldw		r22,80(r1);					   \
+	evldw		r23,88(r1);					   \
+	LOAD_##nr32bitregs##_REGS					   \
+	mtlr		r0;		/* restore link register	*/ \
+	xor		r0,r0,r0;					   \
+	stw		r0,16(r1);	/* delete sensitive data	*/ \
+	stw		r0,24(r1);	/* that we might have pushed	*/ \
+	stw		r0,32(r1);	/* from other context that runs	*/ \
+	stw		r0,40(r1);	/* the same code		*/ \
+	stw		r0,48(r1);					   \
+	stw		r0,56(r1);					   \
+	stw		r0,64(r1);					   \
+	stw		r0,72(r1);					   \
+	stw		r0,80(r1);					   \
+	stw		r0,88(r1);					   \
+	addi		r1,r1,160;	/* cleanup stack frame		*/
+
+#define ENDIAN_SWAP(t0, t1, s0, s1) \
+	rotrwi		t0,s0,8;	/* swap endianness for 2 GPRs	*/ \
+	rotrwi		t1,s1,8;					   \
+	rlwimi		t0,s0,8,8,15;					   \
+	rlwimi		t1,s1,8,8,15;					   \
+	rlwimi		t0,s0,8,24,31;					   \
+	rlwimi		t1,s1,8,24,31;
+
+#define GF128_MUL(d0, d1, d2, d3, t0) \
+	li		t0,0x87;	/* multiplication in GF128	*/ \
+	cmpwi		d3,-1;						   \
+	iselgt		t0,0,t0;					   \
+	rlwimi		d3,d2,0,0,0;	/* propagate "carry" bits	*/ \
+	rotlwi		d3,d3,1;					   \
+	rlwimi		d2,d1,0,0,0;					   \
+	rotlwi		d2,d2,1;					   \
+	rlwimi		d1,d0,0,0,0;					   \
+	slwi		d0,d0,1;	/* shift left 128 bit		*/ \
+	rotlwi		d1,d1,1;					   \
+	xor		d0,d0,t0;
+
+#define START_KEY(d0, d1, d2, d3) \
+	lwz		rW0,0(rKP);					   \
+	mtctr		rRR;						   \
+	lwz		rW1,4(rKP);					   \
+	lwz		rW2,8(rKP);					   \
+	lwz		rW3,12(rKP);					   \
+	xor		rD0,d0,rW0;					   \
+	xor		rD1,d1,rW1;					   \
+	xor		rD2,d2,rW2;					   \
+	xor		rD3,d3,rW3;
+
+/*
+ * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
+ *		   u32 rounds)
+ *
+ * called from glue layer to encrypt a single 16 byte block
+ * round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_aes)
+	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
+	LOAD_DATA(rD0, 0)
+	LOAD_DATA(rD1, 4)
+	LOAD_DATA(rD2, 8)
+	LOAD_DATA(rD3, 12)
+	START_KEY(rD0, rD1, rD2, rD3)
+	bl		ppc_encrypt_block
+	xor		rD0,rD0,rW0
+	SAVE_DATA(rD0, 0)
+	xor		rD1,rD1,rW1
+	SAVE_DATA(rD1, 4)
+	xor		rD2,rD2,rW2
+	SAVE_DATA(rD2, 8)
+	xor		rD3,rD3,rW3
+	SAVE_DATA(rD3, 12)
+	FINALIZE_CRYPT(0)
+	blr
+
+/*
+ * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
+ *		   u32 rounds)
+ *
+ * called from glue layer to decrypt a single 16 byte block
+ * round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_aes)
+	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
+	LOAD_DATA(rD0, 0)
+	addi		rT1,rT0,4096
+	LOAD_DATA(rD1, 4)
+	LOAD_DATA(rD2, 8)
+	LOAD_DATA(rD3, 12)
+	START_KEY(rD0, rD1, rD2, rD3)
+	bl		ppc_decrypt_block
+	xor		rD0,rD0,rW0
+	SAVE_DATA(rD0, 0)
+	xor		rD1,rD1,rW1
+	SAVE_DATA(rD1, 4)
+	xor		rD2,rD2,rW2
+	SAVE_DATA(rD2, 8)
+	xor		rD3,rD3,rW3
+	SAVE_DATA(rD3, 12)
+	FINALIZE_CRYPT(0)
+	blr
+
+/*
+ * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
+ *		   u32 rounds, u32 bytes);
+ *
+ * called from glue layer to encrypt multiple blocks via ECB
+ * Bytes must be larger or equal 16 and only whole blocks are
+ * processed. round values are AES128 = 4, AES192 = 5 and
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_ecb)
+	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
+ppc_encrypt_ecb_loop:
+	LOAD_DATA(rD0, 0)
+	mr		rKP,rKS
+	LOAD_DATA(rD1, 4)
+	subi		rLN,rLN,16
+	LOAD_DATA(rD2, 8)
+	cmpwi		rLN,15
+	LOAD_DATA(rD3, 12)
+	START_KEY(rD0, rD1, rD2, rD3)
+	bl		ppc_encrypt_block
+	xor		rD0,rD0,rW0
+	SAVE_DATA(rD0, 0)
+	xor		rD1,rD1,rW1
+	SAVE_DATA(rD1, 4)
+	xor		rD2,rD2,rW2
+	SAVE_DATA(rD2, 8)
+	xor		rD3,rD3,rW3
+	SAVE_DATA(rD3, 12)
+	NEXT_BLOCK
+	bt		gt,ppc_encrypt_ecb_loop
+	FINALIZE_CRYPT(0)
+	blr
+
+/*
+ * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
+ *		   u32 rounds, u32 bytes);
+ *
+ * called from glue layer to decrypt multiple blocks via ECB
+ * Bytes must be larger or equal 16 and only whole blocks are
+ * processed. round values are AES128 = 4, AES192 = 5 and
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_ecb)
+	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
+	addi		rT1,rT0,4096
+ppc_decrypt_ecb_loop:
+	LOAD_DATA(rD0, 0)
+	mr		rKP,rKS
+	LOAD_DATA(rD1, 4)
+	subi		rLN,rLN,16
+	LOAD_DATA(rD2, 8)
+	cmpwi		rLN,15
+	LOAD_DATA(rD3, 12)
+	START_KEY(rD0, rD1, rD2, rD3)
+	bl		ppc_decrypt_block
+	xor		rD0,rD0,rW0
+	SAVE_DATA(rD0, 0)
+	xor		rD1,rD1,rW1
+	SAVE_DATA(rD1, 4)
+	xor		rD2,rD2,rW2
+	SAVE_DATA(rD2, 8)
+	xor		rD3,rD3,rW3
+	SAVE_DATA(rD3, 12)
+	NEXT_BLOCK
+	bt		gt,ppc_decrypt_ecb_loop
+	FINALIZE_CRYPT(0)
+	blr
+
+/*
+ * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
+ *		   32 rounds, u32 bytes, u8 *iv);
+ *
+ * called from glue layer to encrypt multiple blocks via CBC
+ * Bytes must be larger or equal 16 and only whole blocks are
+ * processed. round values are AES128 = 4, AES192 = 5 and
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_cbc)
+	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
+	LOAD_IV(rI0, 0)
+	LOAD_IV(rI1, 4)
+	LOAD_IV(rI2, 8)
+	LOAD_IV(rI3, 12)
+ppc_encrypt_cbc_loop:
+	LOAD_DATA(rD0, 0)
+	mr		rKP,rKS
+	LOAD_DATA(rD1, 4)
+	subi		rLN,rLN,16
+	LOAD_DATA(rD2, 8)
+	cmpwi		rLN,15
+	LOAD_DATA(rD3, 12)
+	xor		rD0,rD0,rI0
+	xor		rD1,rD1,rI1
+	xor		rD2,rD2,rI2
+	xor		rD3,rD3,rI3
+	START_KEY(rD0, rD1, rD2, rD3)
+	bl		ppc_encrypt_block
+	xor		rI0,rD0,rW0
+	SAVE_DATA(rI0, 0)
+	xor		rI1,rD1,rW1
+	SAVE_DATA(rI1, 4)
+	xor		rI2,rD2,rW2
+	SAVE_DATA(rI2, 8)
+	xor		rI3,rD3,rW3
+	SAVE_DATA(rI3, 12)
+	NEXT_BLOCK
+	bt		gt,ppc_encrypt_cbc_loop
+	START_IV
+	SAVE_IV(rI0, 0)
+	SAVE_IV(rI1, 4)
+	SAVE_IV(rI2, 8)
+	SAVE_IV(rI3, 12)
+	FINALIZE_CRYPT(4)
+	blr
+
+/*
+ * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
+ *		   u32 rounds, u32 bytes, u8 *iv);
+ *
+ * called from glue layer to decrypt multiple blocks via CBC
+ * round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_cbc)
+	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
+	li		rT1,15
+	LOAD_IV(rI0, 0)
+	andc		rLN,rLN,rT1
+	LOAD_IV(rI1, 4)
+	subi		rLN,rLN,16
+	LOAD_IV(rI2, 8)
+	add		rSP,rSP,rLN	/* reverse processing		*/
+	LOAD_IV(rI3, 12)
+	add		rDP,rDP,rLN
+	LOAD_DATA(rD0, 0)
+	addi		rT1,rT0,4096
+	LOAD_DATA(rD1, 4)
+	LOAD_DATA(rD2, 8)
+	LOAD_DATA(rD3, 12)
+	START_IV
+	SAVE_IV(rD0, 0)
+	SAVE_IV(rD1, 4)
+	SAVE_IV(rD2, 8)
+	cmpwi		rLN,16
+	SAVE_IV(rD3, 12)
+	bt		lt,ppc_decrypt_cbc_end
+ppc_decrypt_cbc_loop:
+	mr		rKP,rKS
+	START_KEY(rD0, rD1, rD2, rD3)
+	bl		ppc_decrypt_block
+	subi		rLN,rLN,16
+	subi		rSP,rSP,CBC_DEC
+	xor		rW0,rD0,rW0
+	LOAD_DATA(rD0, 0)
+	xor		rW1,rD1,rW1
+	LOAD_DATA(rD1, 4)
+	xor		rW2,rD2,rW2
+	LOAD_DATA(rD2, 8)
+	xor		rW3,rD3,rW3
+	LOAD_DATA(rD3, 12)
+	xor		rW0,rW0,rD0
+	SAVE_DATA(rW0, 0)
+	xor		rW1,rW1,rD1
+	SAVE_DATA(rW1, 4)
+	xor		rW2,rW2,rD2
+	SAVE_DATA(rW2, 8)
+	xor		rW3,rW3,rD3
+	SAVE_DATA(rW3, 12)
+	cmpwi		rLN,15
+	subi		rDP,rDP,CBC_DEC
+	bt		gt,ppc_decrypt_cbc_loop
+ppc_decrypt_cbc_end:
+	mr		rKP,rKS
+	START_KEY(rD0, rD1, rD2, rD3)
+	bl		ppc_decrypt_block
+	xor		rW0,rW0,rD0
+	xor		rW1,rW1,rD1
+	xor		rW2,rW2,rD2
+	xor		rW3,rW3,rD3
+	xor		rW0,rW0,rI0	/* decrypt with initial IV	*/
+	SAVE_DATA(rW0, 0)
+	xor		rW1,rW1,rI1
+	SAVE_DATA(rW1, 4)
+	xor		rW2,rW2,rI2
+	SAVE_DATA(rW2, 8)
+	xor		rW3,rW3,rI3
+	SAVE_DATA(rW3, 12)
+	FINALIZE_CRYPT(4)
+	blr
+
+/*
+ * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
+ *		 u32 rounds, u32 bytes, u8 *iv);
+ *
+ * called from glue layer to encrypt/decrypt multiple blocks
+ * via CTR. Number of bytes does not need to be a multiple of
+ * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_crypt_ctr)
+	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
+	LOAD_IV(rI0, 0)
+	LOAD_IV(rI1, 4)
+	LOAD_IV(rI2, 8)
+	cmpwi		rLN,16
+	LOAD_IV(rI3, 12)
+	START_IV
+	bt		lt,ppc_crypt_ctr_partial
+ppc_crypt_ctr_loop:
+	mr		rKP,rKS
+	START_KEY(rI0, rI1, rI2, rI3)
+	bl		ppc_encrypt_block
+	xor		rW0,rD0,rW0
+	xor		rW1,rD1,rW1
+	xor		rW2,rD2,rW2
+	xor		rW3,rD3,rW3
+	LOAD_DATA(rD0, 0)
+	subi		rLN,rLN,16
+	LOAD_DATA(rD1, 4)
+	LOAD_DATA(rD2, 8)
+	LOAD_DATA(rD3, 12)
+	xor		rD0,rD0,rW0
+	SAVE_DATA(rD0, 0)
+	xor		rD1,rD1,rW1
+	SAVE_DATA(rD1, 4)
+	xor		rD2,rD2,rW2
+	SAVE_DATA(rD2, 8)
+	xor		rD3,rD3,rW3
+	SAVE_DATA(rD3, 12)
+	addic		rI3,rI3,1	/* increase counter			*/
+	addze		rI2,rI2
+	addze		rI1,rI1
+	addze		rI0,rI0
+	NEXT_BLOCK
+	cmpwi		rLN,15
+	bt		gt,ppc_crypt_ctr_loop
+ppc_crypt_ctr_partial:
+	cmpwi		rLN,0
+	bt		eq,ppc_crypt_ctr_end
+	mr		rKP,rKS
+	START_KEY(rI0, rI1, rI2, rI3)
+	bl		ppc_encrypt_block
+	xor		rW0,rD0,rW0
+	SAVE_IV(rW0, 0)
+	xor		rW1,rD1,rW1
+	SAVE_IV(rW1, 4)
+	xor		rW2,rD2,rW2
+	SAVE_IV(rW2, 8)
+	xor		rW3,rD3,rW3
+	SAVE_IV(rW3, 12)
+	mtctr		rLN
+	subi		rIP,rIP,CTR_DEC
+	subi		rSP,rSP,1
+	subi		rDP,rDP,1
+ppc_crypt_ctr_xorbyte:
+	lbzu		rW4,1(rIP)	/* bytewise xor for partial block	*/
+	lbzu		rW5,1(rSP)
+	xor		rW4,rW4,rW5
+	stbu		rW4,1(rDP)
+	bdnz		ppc_crypt_ctr_xorbyte
+	subf		rIP,rLN,rIP
+	addi		rIP,rIP,1
+	addic		rI3,rI3,1
+	addze		rI2,rI2
+	addze		rI1,rI1
+	addze		rI0,rI0
+ppc_crypt_ctr_end:
+	SAVE_IV(rI0, 0)
+	SAVE_IV(rI1, 4)
+	SAVE_IV(rI2, 8)
+	SAVE_IV(rI3, 12)
+	FINALIZE_CRYPT(4)
+	blr
+
+/*
+ * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
+ *		   u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
+ *
+ * called from glue layer to encrypt multiple blocks via XTS
+ * If key_twk is given, the initial IV encryption will be
+ * processed too. Round values are AES128 = 4, AES192 = 5,
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_xts)
+	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
+	LOAD_IV(rI0, 0)
+	LOAD_IV(rI1, 4)
+	LOAD_IV(rI2, 8)
+	cmpwi		rKT,0
+	LOAD_IV(rI3, 12)
+	bt		eq,ppc_encrypt_xts_notweak
+	mr		rKP,rKT
+	START_KEY(rI0, rI1, rI2, rI3)
+	bl		ppc_encrypt_block
+	xor		rI0,rD0,rW0
+	xor		rI1,rD1,rW1
+	xor		rI2,rD2,rW2
+	xor		rI3,rD3,rW3
+ppc_encrypt_xts_notweak:
+	ENDIAN_SWAP(rG0, rG1, rI0, rI1)
+	ENDIAN_SWAP(rG2, rG3, rI2, rI3)
+ppc_encrypt_xts_loop:
+	LOAD_DATA(rD0, 0)
+	mr		rKP,rKS
+	LOAD_DATA(rD1, 4)
+	subi		rLN,rLN,16
+	LOAD_DATA(rD2, 8)
+	LOAD_DATA(rD3, 12)
+	xor		rD0,rD0,rI0
+	xor		rD1,rD1,rI1
+	xor		rD2,rD2,rI2
+	xor		rD3,rD3,rI3
+	START_KEY(rD0, rD1, rD2, rD3)
+	bl		ppc_encrypt_block
+	xor		rD0,rD0,rW0
+	xor		rD1,rD1,rW1
+	xor		rD2,rD2,rW2
+	xor		rD3,rD3,rW3
+	xor		rD0,rD0,rI0
+	SAVE_DATA(rD0, 0)
+	xor		rD1,rD1,rI1
+	SAVE_DATA(rD1, 4)
+	xor		rD2,rD2,rI2
+	SAVE_DATA(rD2, 8)
+	xor		rD3,rD3,rI3
+	SAVE_DATA(rD3, 12)
+	GF128_MUL(rG0, rG1, rG2, rG3, rW0)
+	ENDIAN_SWAP(rI0, rI1, rG0, rG1)
+	ENDIAN_SWAP(rI2, rI3, rG2, rG3)
+	cmpwi		rLN,0
+	NEXT_BLOCK
+	bt		gt,ppc_encrypt_xts_loop
+	START_IV
+	SAVE_IV(rI0, 0)
+	SAVE_IV(rI1, 4)
+	SAVE_IV(rI2, 8)
+	SAVE_IV(rI3, 12)
+	FINALIZE_CRYPT(8)
+	blr
+
+/*
+ * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
+ *		   u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
+ *
+ * called from glue layer to decrypt multiple blocks via XTS
+ * If key_twk is given, the initial IV encryption will be
+ * processed too. Round values are AES128 = 4, AES192 = 5,
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_xts)
+	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
+	LOAD_IV(rI0, 0)
+	addi		rT1,rT0,4096
+	LOAD_IV(rI1, 4)
+	LOAD_IV(rI2, 8)
+	cmpwi		rKT,0
+	LOAD_IV(rI3, 12)
+	bt		eq,ppc_decrypt_xts_notweak
+	subi		rT0,rT0,4096
+	mr		rKP,rKT
+	START_KEY(rI0, rI1, rI2, rI3)
+	bl		ppc_encrypt_block
+	xor		rI0,rD0,rW0
+	xor		rI1,rD1,rW1
+	xor		rI2,rD2,rW2
+	xor		rI3,rD3,rW3
+	addi		rT0,rT0,4096
+ppc_decrypt_xts_notweak:
+	ENDIAN_SWAP(rG0, rG1, rI0, rI1)
+	ENDIAN_SWAP(rG2, rG3, rI2, rI3)
+ppc_decrypt_xts_loop:
+	LOAD_DATA(rD0, 0)
+	mr		rKP,rKS
+	LOAD_DATA(rD1, 4)
+	subi		rLN,rLN,16
+	LOAD_DATA(rD2, 8)
+	LOAD_DATA(rD3, 12)
+	xor		rD0,rD0,rI0
+	xor		rD1,rD1,rI1
+	xor		rD2,rD2,rI2
+	xor		rD3,rD3,rI3
+	START_KEY(rD0, rD1, rD2, rD3)
+	bl		ppc_decrypt_block
+	xor		rD0,rD0,rW0
+	xor		rD1,rD1,rW1
+	xor		rD2,rD2,rW2
+	xor		rD3,rD3,rW3
+	xor		rD0,rD0,rI0
+	SAVE_DATA(rD0, 0)
+	xor		rD1,rD1,rI1
+	SAVE_DATA(rD1, 4)
+	xor		rD2,rD2,rI2
+	SAVE_DATA(rD2, 8)
+	xor		rD3,rD3,rI3
+	SAVE_DATA(rD3, 12)
+	GF128_MUL(rG0, rG1, rG2, rG3, rW0)
+	ENDIAN_SWAP(rI0, rI1, rG0, rG1)
+	ENDIAN_SWAP(rI2, rI3, rG2, rG3)
+	cmpwi		rLN,0
+	NEXT_BLOCK
+	bt		gt,ppc_decrypt_xts_loop
+	START_IV
+	SAVE_IV(rI0, 0)
+	SAVE_IV(rI1, 4)
+	SAVE_IV(rI2, 8)
+	SAVE_IV(rI3, 12)
+	FINALIZE_CRYPT(8)
+	blr
diff --git a/arch/powerpc/crypto/aes-spe-regs.h b/arch/powerpc/crypto/aes-spe-regs.h
new file mode 100644
index 0000000000..2eb4c9b941
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-regs.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Common registers for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#define rKS r0	/* copy of en-/decryption key pointer			*/
+#define rDP r3	/* destination pointer					*/
+#define rSP r4	/* source pointer					*/
+#define rKP r5	/* pointer to en-/decryption key pointer		*/
+#define rRR r6	/* en-/decryption rounds				*/
+#define rLN r7	/* length of data to be processed			*/
+#define rIP r8	/* potiner to IV (CBC/CTR/XTS modes)			*/
+#define rKT r9	/* pointer to tweak key (XTS mode)			*/
+#define rT0 r11	/* pointers to en-/decryption tables			*/
+#define rT1 r10
+#define rD0 r9	/* data 						*/
+#define rD1 r14
+#define rD2 r12
+#define rD3 r15
+#define rW0 r16	/* working registers					*/
+#define rW1 r17
+#define rW2 r18
+#define rW3 r19
+#define rW4 r20
+#define rW5 r21
+#define rW6 r22
+#define rW7 r23
+#define rI0 r24	/* IV							*/
+#define rI1 r25
+#define rI2 r26
+#define rI3 r27
+#define rG0 r28	/* endian reversed tweak (XTS mode)			*/
+#define rG1 r29
+#define rG2 r30
+#define rG3 r31
diff --git a/arch/powerpc/crypto/aes-tab-4k.S b/arch/powerpc/crypto/aes-tab-4k.S
new file mode 100644
index 0000000000..ceb604bc6f
--- /dev/null
+++ b/arch/powerpc/crypto/aes-tab-4k.S
@@ -0,0 +1,326 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * 4K AES tables for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+/*
+ * These big endian AES encryption/decryption tables have been taken from
+ * crypto/aes_generic.c and are designed to be simply accessed by a combination
+ * of rlwimi/lwz instructions with a minimum of table registers (usually only
+ * one required). Thus they are aligned to 4K. The locality of rotated values
+ * is derived from the reduced offsets that are available in the SPE load
+ * instructions. E.g. evldw, evlwwsplat, ...
+ *
+ * For the safety-conscious it has to be noted that they might be vulnerable
+ * to cache timing attacks because of their size. Nevertheless in contrast to
+ * the generic tables they have been reduced from 16KB to 8KB + 256 bytes.
+ * This is a quite good tradeoff for low power devices (e.g. routers) without
+ * dedicated encryption hardware where we usually have no multiuser
+ * environment.
+ *
+ */
+
+#define R(a, b, c, d) \
+	0x##a##b##c##d, 0x##d##a##b##c, 0x##c##d##a##b, 0x##b##c##d##a
+
+.data
+.align 12
+.globl PPC_AES_4K_ENCTAB
+PPC_AES_4K_ENCTAB:
+/* encryption table, same as crypto_ft_tab in crypto/aes-generic.c */
+	.long R(c6, 63, 63, a5), R(f8, 7c, 7c, 84)
+	.long R(ee, 77, 77, 99), R(f6, 7b, 7b, 8d)
+	.long R(ff, f2, f2, 0d), R(d6, 6b, 6b, bd)
+	.long R(de, 6f, 6f, b1), R(91, c5, c5, 54)
+	.long R(60, 30, 30, 50), R(02, 01, 01, 03)
+	.long R(ce, 67, 67, a9), R(56, 2b, 2b, 7d)
+	.long R(e7, fe, fe, 19), R(b5, d7, d7, 62)
+	.long R(4d, ab, ab, e6), R(ec, 76, 76, 9a)
+	.long R(8f, ca, ca, 45), R(1f, 82, 82, 9d)
+	.long R(89, c9, c9, 40), R(fa, 7d, 7d, 87)
+	.long R(ef, fa, fa, 15), R(b2, 59, 59, eb)
+	.long R(8e, 47, 47, c9), R(fb, f0, f0, 0b)
+	.long R(41, ad, ad, ec), R(b3, d4, d4, 67)
+	.long R(5f, a2, a2, fd), R(45, af, af, ea)
+	.long R(23, 9c, 9c, bf), R(53, a4, a4, f7)
+	.long R(e4, 72, 72, 96), R(9b, c0, c0, 5b)
+	.long R(75, b7, b7, c2), R(e1, fd, fd, 1c)
+	.long R(3d, 93, 93, ae), R(4c, 26, 26, 6a)
+	.long R(6c, 36, 36, 5a), R(7e, 3f, 3f, 41)
+	.long R(f5, f7, f7, 02), R(83, cc, cc, 4f)
+	.long R(68, 34, 34, 5c), R(51, a5, a5, f4)
+	.long R(d1, e5, e5, 34), R(f9, f1, f1, 08)
+	.long R(e2, 71, 71, 93), R(ab, d8, d8, 73)
+	.long R(62, 31, 31, 53), R(2a, 15, 15, 3f)
+	.long R(08, 04, 04, 0c), R(95, c7, c7, 52)
+	.long R(46, 23, 23, 65), R(9d, c3, c3, 5e)
+	.long R(30, 18, 18, 28), R(37, 96, 96, a1)
+	.long R(0a, 05, 05, 0f), R(2f, 9a, 9a, b5)
+	.long R(0e, 07, 07, 09), R(24, 12, 12, 36)
+	.long R(1b, 80, 80, 9b), R(df, e2, e2, 3d)
+	.long R(cd, eb, eb, 26), R(4e, 27, 27, 69)
+	.long R(7f, b2, b2, cd), R(ea, 75, 75, 9f)
+	.long R(12, 09, 09, 1b), R(1d, 83, 83, 9e)
+	.long R(58, 2c, 2c, 74), R(34, 1a, 1a, 2e)
+	.long R(36, 1b, 1b, 2d), R(dc, 6e, 6e, b2)
+	.long R(b4, 5a, 5a, ee), R(5b, a0, a0, fb)
+	.long R(a4, 52, 52, f6), R(76, 3b, 3b, 4d)
+	.long R(b7, d6, d6, 61), R(7d, b3, b3, ce)
+	.long R(52, 29, 29, 7b), R(dd, e3, e3, 3e)
+	.long R(5e, 2f, 2f, 71), R(13, 84, 84, 97)
+	.long R(a6, 53, 53, f5), R(b9, d1, d1, 68)
+	.long R(00, 00, 00, 00), R(c1, ed, ed, 2c)
+	.long R(40, 20, 20, 60), R(e3, fc, fc, 1f)
+	.long R(79, b1, b1, c8), R(b6, 5b, 5b, ed)
+	.long R(d4, 6a, 6a, be), R(8d, cb, cb, 46)
+	.long R(67, be, be, d9), R(72, 39, 39, 4b)
+	.long R(94, 4a, 4a, de), R(98, 4c, 4c, d4)
+	.long R(b0, 58, 58, e8), R(85, cf, cf, 4a)
+	.long R(bb, d0, d0, 6b), R(c5, ef, ef, 2a)
+	.long R(4f, aa, aa, e5), R(ed, fb, fb, 16)
+	.long R(86, 43, 43, c5), R(9a, 4d, 4d, d7)
+	.long R(66, 33, 33, 55), R(11, 85, 85, 94)
+	.long R(8a, 45, 45, cf), R(e9, f9, f9, 10)
+	.long R(04, 02, 02, 06), R(fe, 7f, 7f, 81)
+	.long R(a0, 50, 50, f0), R(78, 3c, 3c, 44)
+	.long R(25, 9f, 9f, ba), R(4b, a8, a8, e3)
+	.long R(a2, 51, 51, f3), R(5d, a3, a3, fe)
+	.long R(80, 40, 40, c0), R(05, 8f, 8f, 8a)
+	.long R(3f, 92, 92, ad), R(21, 9d, 9d, bc)
+	.long R(70, 38, 38, 48), R(f1, f5, f5, 04)
+	.long R(63, bc, bc, df), R(77, b6, b6, c1)
+	.long R(af, da, da, 75), R(42, 21, 21, 63)
+	.long R(20, 10, 10, 30), R(e5, ff, ff, 1a)
+	.long R(fd, f3, f3, 0e), R(bf, d2, d2, 6d)
+	.long R(81, cd, cd, 4c), R(18, 0c, 0c, 14)
+	.long R(26, 13, 13, 35), R(c3, ec, ec, 2f)
+	.long R(be, 5f, 5f, e1), R(35, 97, 97, a2)
+	.long R(88, 44, 44, cc), R(2e, 17, 17, 39)
+	.long R(93, c4, c4, 57), R(55, a7, a7, f2)
+	.long R(fc, 7e, 7e, 82), R(7a, 3d, 3d, 47)
+	.long R(c8, 64, 64, ac), R(ba, 5d, 5d, e7)
+	.long R(32, 19, 19, 2b), R(e6, 73, 73, 95)
+	.long R(c0, 60, 60, a0), R(19, 81, 81, 98)
+	.long R(9e, 4f, 4f, d1), R(a3, dc, dc, 7f)
+	.long R(44, 22, 22, 66), R(54, 2a, 2a, 7e)
+	.long R(3b, 90, 90, ab), R(0b, 88, 88, 83)
+	.long R(8c, 46, 46, ca), R(c7, ee, ee, 29)
+	.long R(6b, b8, b8, d3), R(28, 14, 14, 3c)
+	.long R(a7, de, de, 79), R(bc, 5e, 5e, e2)
+	.long R(16, 0b, 0b, 1d), R(ad, db, db, 76)
+	.long R(db, e0, e0, 3b), R(64, 32, 32, 56)
+	.long R(74, 3a, 3a, 4e), R(14, 0a, 0a, 1e)
+	.long R(92, 49, 49, db), R(0c, 06, 06, 0a)
+	.long R(48, 24, 24, 6c), R(b8, 5c, 5c, e4)
+	.long R(9f, c2, c2, 5d), R(bd, d3, d3, 6e)
+	.long R(43, ac, ac, ef), R(c4, 62, 62, a6)
+	.long R(39, 91, 91, a8), R(31, 95, 95, a4)
+	.long R(d3, e4, e4, 37), R(f2, 79, 79, 8b)
+	.long R(d5, e7, e7, 32), R(8b, c8, c8, 43)
+	.long R(6e, 37, 37, 59), R(da, 6d, 6d, b7)
+	.long R(01, 8d, 8d, 8c), R(b1, d5, d5, 64)
+	.long R(9c, 4e, 4e, d2), R(49, a9, a9, e0)
+	.long R(d8, 6c, 6c, b4), R(ac, 56, 56, fa)
+	.long R(f3, f4, f4, 07), R(cf, ea, ea, 25)
+	.long R(ca, 65, 65, af), R(f4, 7a, 7a, 8e)
+	.long R(47, ae, ae, e9), R(10, 08, 08, 18)
+	.long R(6f, ba, ba, d5), R(f0, 78, 78, 88)
+	.long R(4a, 25, 25, 6f), R(5c, 2e, 2e, 72)
+	.long R(38, 1c, 1c, 24), R(57, a6, a6, f1)
+	.long R(73, b4, b4, c7), R(97, c6, c6, 51)
+	.long R(cb, e8, e8, 23), R(a1, dd, dd, 7c)
+	.long R(e8, 74, 74, 9c), R(3e, 1f, 1f, 21)
+	.long R(96, 4b, 4b, dd), R(61, bd, bd, dc)
+	.long R(0d, 8b, 8b, 86), R(0f, 8a, 8a, 85)
+	.long R(e0, 70, 70, 90), R(7c, 3e, 3e, 42)
+	.long R(71, b5, b5, c4), R(cc, 66, 66, aa)
+	.long R(90, 48, 48, d8), R(06, 03, 03, 05)
+	.long R(f7, f6, f6, 01), R(1c, 0e, 0e, 12)
+	.long R(c2, 61, 61, a3), R(6a, 35, 35, 5f)
+	.long R(ae, 57, 57, f9), R(69, b9, b9, d0)
+	.long R(17, 86, 86, 91), R(99, c1, c1, 58)
+	.long R(3a, 1d, 1d, 27), R(27, 9e, 9e, b9)
+	.long R(d9, e1, e1, 38), R(eb, f8, f8, 13)
+	.long R(2b, 98, 98, b3), R(22, 11, 11, 33)
+	.long R(d2, 69, 69, bb), R(a9, d9, d9, 70)
+	.long R(07, 8e, 8e, 89), R(33, 94, 94, a7)
+	.long R(2d, 9b, 9b, b6), R(3c, 1e, 1e, 22)
+	.long R(15, 87, 87, 92), R(c9, e9, e9, 20)
+	.long R(87, ce, ce, 49), R(aa, 55, 55, ff)
+	.long R(50, 28, 28, 78), R(a5, df, df, 7a)
+	.long R(03, 8c, 8c, 8f), R(59, a1, a1, f8)
+	.long R(09, 89, 89, 80), R(1a, 0d, 0d, 17)
+	.long R(65, bf, bf, da), R(d7, e6, e6, 31)
+	.long R(84, 42, 42, c6), R(d0, 68, 68, b8)
+	.long R(82, 41, 41, c3), R(29, 99, 99, b0)
+	.long R(5a, 2d, 2d, 77), R(1e, 0f, 0f, 11)
+	.long R(7b, b0, b0, cb), R(a8, 54, 54, fc)
+	.long R(6d, bb, bb, d6), R(2c, 16, 16, 3a)
+.globl PPC_AES_4K_DECTAB
+PPC_AES_4K_DECTAB:
+/* decryption table, same as crypto_it_tab in crypto/aes-generic.c */
+	.long R(51, f4, a7, 50), R(7e, 41, 65, 53)
+	.long R(1a, 17, a4, c3), R(3a, 27, 5e, 96)
+	.long R(3b, ab, 6b, cb), R(1f, 9d, 45, f1)
+	.long R(ac, fa, 58, ab), R(4b, e3, 03, 93)
+	.long R(20, 30, fa, 55), R(ad, 76, 6d, f6)
+	.long R(88, cc, 76, 91), R(f5, 02, 4c, 25)
+	.long R(4f, e5, d7, fc), R(c5, 2a, cb, d7)
+	.long R(26, 35, 44, 80), R(b5, 62, a3, 8f)
+	.long R(de, b1, 5a, 49), R(25, ba, 1b, 67)
+	.long R(45, ea, 0e, 98), R(5d, fe, c0, e1)
+	.long R(c3, 2f, 75, 02), R(81, 4c, f0, 12)
+	.long R(8d, 46, 97, a3), R(6b, d3, f9, c6)
+	.long R(03, 8f, 5f, e7), R(15, 92, 9c, 95)
+	.long R(bf, 6d, 7a, eb), R(95, 52, 59, da)
+	.long R(d4, be, 83, 2d), R(58, 74, 21, d3)
+	.long R(49, e0, 69, 29), R(8e, c9, c8, 44)
+	.long R(75, c2, 89, 6a), R(f4, 8e, 79, 78)
+	.long R(99, 58, 3e, 6b), R(27, b9, 71, dd)
+	.long R(be, e1, 4f, b6), R(f0, 88, ad, 17)
+	.long R(c9, 20, ac, 66), R(7d, ce, 3a, b4)
+	.long R(63, df, 4a, 18), R(e5, 1a, 31, 82)
+	.long R(97, 51, 33, 60), R(62, 53, 7f, 45)
+	.long R(b1, 64, 77, e0), R(bb, 6b, ae, 84)
+	.long R(fe, 81, a0, 1c), R(f9, 08, 2b, 94)
+	.long R(70, 48, 68, 58), R(8f, 45, fd, 19)
+	.long R(94, de, 6c, 87), R(52, 7b, f8, b7)
+	.long R(ab, 73, d3, 23), R(72, 4b, 02, e2)
+	.long R(e3, 1f, 8f, 57), R(66, 55, ab, 2a)
+	.long R(b2, eb, 28, 07), R(2f, b5, c2, 03)
+	.long R(86, c5, 7b, 9a), R(d3, 37, 08, a5)
+	.long R(30, 28, 87, f2), R(23, bf, a5, b2)
+	.long R(02, 03, 6a, ba), R(ed, 16, 82, 5c)
+	.long R(8a, cf, 1c, 2b), R(a7, 79, b4, 92)
+	.long R(f3, 07, f2, f0), R(4e, 69, e2, a1)
+	.long R(65, da, f4, cd), R(06, 05, be, d5)
+	.long R(d1, 34, 62, 1f), R(c4, a6, fe, 8a)
+	.long R(34, 2e, 53, 9d), R(a2, f3, 55, a0)
+	.long R(05, 8a, e1, 32), R(a4, f6, eb, 75)
+	.long R(0b, 83, ec, 39), R(40, 60, ef, aa)
+	.long R(5e, 71, 9f, 06), R(bd, 6e, 10, 51)
+	.long R(3e, 21, 8a, f9), R(96, dd, 06, 3d)
+	.long R(dd, 3e, 05, ae), R(4d, e6, bd, 46)
+	.long R(91, 54, 8d, b5), R(71, c4, 5d, 05)
+	.long R(04, 06, d4, 6f), R(60, 50, 15, ff)
+	.long R(19, 98, fb, 24), R(d6, bd, e9, 97)
+	.long R(89, 40, 43, cc), R(67, d9, 9e, 77)
+	.long R(b0, e8, 42, bd), R(07, 89, 8b, 88)
+	.long R(e7, 19, 5b, 38), R(79, c8, ee, db)
+	.long R(a1, 7c, 0a, 47), R(7c, 42, 0f, e9)
+	.long R(f8, 84, 1e, c9), R(00, 00, 00, 00)
+	.long R(09, 80, 86, 83), R(32, 2b, ed, 48)
+	.long R(1e, 11, 70, ac), R(6c, 5a, 72, 4e)
+	.long R(fd, 0e, ff, fb), R(0f, 85, 38, 56)
+	.long R(3d, ae, d5, 1e), R(36, 2d, 39, 27)
+	.long R(0a, 0f, d9, 64), R(68, 5c, a6, 21)
+	.long R(9b, 5b, 54, d1), R(24, 36, 2e, 3a)
+	.long R(0c, 0a, 67, b1), R(93, 57, e7, 0f)
+	.long R(b4, ee, 96, d2), R(1b, 9b, 91, 9e)
+	.long R(80, c0, c5, 4f), R(61, dc, 20, a2)
+	.long R(5a, 77, 4b, 69), R(1c, 12, 1a, 16)
+	.long R(e2, 93, ba, 0a), R(c0, a0, 2a, e5)
+	.long R(3c, 22, e0, 43), R(12, 1b, 17, 1d)
+	.long R(0e, 09, 0d, 0b), R(f2, 8b, c7, ad)
+	.long R(2d, b6, a8, b9), R(14, 1e, a9, c8)
+	.long R(57, f1, 19, 85), R(af, 75, 07, 4c)
+	.long R(ee, 99, dd, bb), R(a3, 7f, 60, fd)
+	.long R(f7, 01, 26, 9f), R(5c, 72, f5, bc)
+	.long R(44, 66, 3b, c5), R(5b, fb, 7e, 34)
+	.long R(8b, 43, 29, 76), R(cb, 23, c6, dc)
+	.long R(b6, ed, fc, 68), R(b8, e4, f1, 63)
+	.long R(d7, 31, dc, ca), R(42, 63, 85, 10)
+	.long R(13, 97, 22, 40), R(84, c6, 11, 20)
+	.long R(85, 4a, 24, 7d), R(d2, bb, 3d, f8)
+	.long R(ae, f9, 32, 11), R(c7, 29, a1, 6d)
+	.long R(1d, 9e, 2f, 4b), R(dc, b2, 30, f3)
+	.long R(0d, 86, 52, ec), R(77, c1, e3, d0)
+	.long R(2b, b3, 16, 6c), R(a9, 70, b9, 99)
+	.long R(11, 94, 48, fa), R(47, e9, 64, 22)
+	.long R(a8, fc, 8c, c4), R(a0, f0, 3f, 1a)
+	.long R(56, 7d, 2c, d8), R(22, 33, 90, ef)
+	.long R(87, 49, 4e, c7), R(d9, 38, d1, c1)
+	.long R(8c, ca, a2, fe), R(98, d4, 0b, 36)
+	.long R(a6, f5, 81, cf), R(a5, 7a, de, 28)
+	.long R(da, b7, 8e, 26), R(3f, ad, bf, a4)
+	.long R(2c, 3a, 9d, e4), R(50, 78, 92, 0d)
+	.long R(6a, 5f, cc, 9b), R(54, 7e, 46, 62)
+	.long R(f6, 8d, 13, c2), R(90, d8, b8, e8)
+	.long R(2e, 39, f7, 5e), R(82, c3, af, f5)
+	.long R(9f, 5d, 80, be), R(69, d0, 93, 7c)
+	.long R(6f, d5, 2d, a9), R(cf, 25, 12, b3)
+	.long R(c8, ac, 99, 3b), R(10, 18, 7d, a7)
+	.long R(e8, 9c, 63, 6e), R(db, 3b, bb, 7b)
+	.long R(cd, 26, 78, 09), R(6e, 59, 18, f4)
+	.long R(ec, 9a, b7, 01), R(83, 4f, 9a, a8)
+	.long R(e6, 95, 6e, 65), R(aa, ff, e6, 7e)
+	.long R(21, bc, cf, 08), R(ef, 15, e8, e6)
+	.long R(ba, e7, 9b, d9), R(4a, 6f, 36, ce)
+	.long R(ea, 9f, 09, d4), R(29, b0, 7c, d6)
+	.long R(31, a4, b2, af), R(2a, 3f, 23, 31)
+	.long R(c6, a5, 94, 30), R(35, a2, 66, c0)
+	.long R(74, 4e, bc, 37), R(fc, 82, ca, a6)
+	.long R(e0, 90, d0, b0), R(33, a7, d8, 15)
+	.long R(f1, 04, 98, 4a), R(41, ec, da, f7)
+	.long R(7f, cd, 50, 0e), R(17, 91, f6, 2f)
+	.long R(76, 4d, d6, 8d), R(43, ef, b0, 4d)
+	.long R(cc, aa, 4d, 54), R(e4, 96, 04, df)
+	.long R(9e, d1, b5, e3), R(4c, 6a, 88, 1b)
+	.long R(c1, 2c, 1f, b8), R(46, 65, 51, 7f)
+	.long R(9d, 5e, ea, 04), R(01, 8c, 35, 5d)
+	.long R(fa, 87, 74, 73), R(fb, 0b, 41, 2e)
+	.long R(b3, 67, 1d, 5a), R(92, db, d2, 52)
+	.long R(e9, 10, 56, 33), R(6d, d6, 47, 13)
+	.long R(9a, d7, 61, 8c), R(37, a1, 0c, 7a)
+	.long R(59, f8, 14, 8e), R(eb, 13, 3c, 89)
+	.long R(ce, a9, 27, ee), R(b7, 61, c9, 35)
+	.long R(e1, 1c, e5, ed), R(7a, 47, b1, 3c)
+	.long R(9c, d2, df, 59), R(55, f2, 73, 3f)
+	.long R(18, 14, ce, 79), R(73, c7, 37, bf)
+	.long R(53, f7, cd, ea), R(5f, fd, aa, 5b)
+	.long R(df, 3d, 6f, 14), R(78, 44, db, 86)
+	.long R(ca, af, f3, 81), R(b9, 68, c4, 3e)
+	.long R(38, 24, 34, 2c), R(c2, a3, 40, 5f)
+	.long R(16, 1d, c3, 72), R(bc, e2, 25, 0c)
+	.long R(28, 3c, 49, 8b), R(ff, 0d, 95, 41)
+	.long R(39, a8, 01, 71), R(08, 0c, b3, de)
+	.long R(d8, b4, e4, 9c), R(64, 56, c1, 90)
+	.long R(7b, cb, 84, 61), R(d5, 32, b6, 70)
+	.long R(48, 6c, 5c, 74), R(d0, b8, 57, 42)
+.globl PPC_AES_4K_DECTAB2
+PPC_AES_4K_DECTAB2:
+/* decryption table, same as crypto_il_tab in crypto/aes-generic.c */
+	.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+	.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+	.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+	.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+	.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+	.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+	.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+	.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+	.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+	.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+	.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+	.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+	.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+	.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+	.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+	.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+	.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+	.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+	.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+	.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+	.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+	.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+	.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+	.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+	.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+	.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+	.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+	.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+	.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+	.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+	.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+	.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
diff --git a/arch/powerpc/crypto/aesp10-ppc.pl b/arch/powerpc/crypto/aesp10-ppc.pl
new file mode 100644
index 0000000000..2c06ce2a2c
--- /dev/null
+++ b/arch/powerpc/crypto/aesp10-ppc.pl
@@ -0,0 +1,585 @@
+#! /usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+# This code is taken from CRYPTOGAMs[1] and is included here using the option
+# in the license to distribute the code under the GPL. Therefore this program
+# is free software; you can redistribute it and/or modify it under the terms of
+# the GNU General Public License version 2 as published by the Free Software
+# Foundation.
+#
+# [1] https://www.openssl.org/~appro/cryptogams/
+
+# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+#       * Redistributions of source code must retain copyright notices,
+#         this list of conditions and the following disclaimer.
+#
+#       * Redistributions in binary form must reproduce the above
+#         copyright notice, this list of conditions and the following
+#         disclaimer in the documentation and/or other materials
+#         provided with the distribution.
+#
+#       * Neither the name of the CRYPTOGAMS nor the names of its
+#         copyright holder and contributors may be used to endorse or
+#         promote products derived from this software without specific
+#         prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see https://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# This module implements support for AES instructions as per PowerISA
+# specification version 2.07, first implemented by POWER8 processor.
+# The module is endian-agnostic in sense that it supports both big-
+# and little-endian cases. Data alignment in parallelizable modes is
+# handled with VSX loads and stores, which implies MSR.VSX flag being
+# set. It should also be noted that ISA specification doesn't prohibit
+# alignment exceptions for these instructions on page boundaries.
+# Initially alignment was handled in pure AltiVec/VMX way [when data
+# is aligned programmatically, which in turn guarantees exception-
+# free execution], but it turned to hamper performance when vcipher
+# instructions are interleaved. It's reckoned that eventual
+# misalignment penalties at page boundaries are in average lower
+# than additional overhead in pure AltiVec approach.
+#
+# May 2016
+#
+# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
+# systems were measured.
+#
+######################################################################
+# Current large-block performance in cycles per byte processed with
+# 128-bit key (less is better).
+#
+#		CBC en-/decrypt	CTR	XTS
+# POWER8[le]	3.96/0.72	0.74	1.1
+# POWER8[be]	3.75/0.65	0.66	1.0
+
+$flavour = shift;
+
+if ($flavour =~ /64/) {
+	$SIZE_T	=8;
+	$LRSAVE	=2*$SIZE_T;
+	$STU	="stdu";
+	$POP	="ld";
+	$PUSH	="std";
+	$UCMP	="cmpld";
+	$SHL	="sldi";
+} elsif ($flavour =~ /32/) {
+	$SIZE_T	=4;
+	$LRSAVE	=$SIZE_T;
+	$STU	="stwu";
+	$POP	="lwz";
+	$PUSH	="stw";
+	$UCMP	="cmplw";
+	$SHL	="slwi";
+} else { die "nonsense $flavour"; }
+
+$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
+
+$FRAME=8*$SIZE_T;
+$prefix="aes_p10";
+
+$sp="r1";
+$vrsave="r12";
+
+#########################################################################
+{{{	# Key setup procedures						#
+my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
+my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
+my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
+
+$code.=<<___;
+.machine	"any"
+
+.text
+
+.align	7
+rcon:
+.long	0x01000000, 0x01000000, 0x01000000, 0x01000000	?rev
+.long	0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000	?rev
+.long	0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c	?rev
+.long	0,0,0,0						?asis
+Lconsts:
+	mflr	r0
+	bcl	20,31,\$+4
+	mflr	$ptr	 #vvvvv "distance between . and rcon
+	addi	$ptr,$ptr,-0x48
+	mtlr	r0
+	blr
+	.long	0
+	.byte	0,12,0x14,0,0,0,0,0
+.asciz	"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+
+.globl	.${prefix}_set_encrypt_key
+Lset_encrypt_key:
+	mflr		r11
+	$PUSH		r11,$LRSAVE($sp)
+
+	li		$ptr,-1
+	${UCMP}i	$inp,0
+	beq-		Lenc_key_abort		# if ($inp==0) return -1;
+	${UCMP}i	$out,0
+	beq-		Lenc_key_abort		# if ($out==0) return -1;
+	li		$ptr,-2
+	cmpwi		$bits,128
+	blt-		Lenc_key_abort
+	cmpwi		$bits,256
+	bgt-		Lenc_key_abort
+	andi.		r0,$bits,0x3f
+	bne-		Lenc_key_abort
+
+	lis		r0,0xfff0
+	mfspr		$vrsave,256
+	mtspr		256,r0
+
+	bl		Lconsts
+	mtlr		r11
+
+	neg		r9,$inp
+	lvx		$in0,0,$inp
+	addi		$inp,$inp,15		# 15 is not typo
+	lvsr		$key,0,r9		# borrow $key
+	li		r8,0x20
+	cmpwi		$bits,192
+	lvx		$in1,0,$inp
+	le?vspltisb	$mask,0x0f		# borrow $mask
+	lvx		$rcon,0,$ptr
+	le?vxor		$key,$key,$mask		# adjust for byte swap
+	lvx		$mask,r8,$ptr
+	addi		$ptr,$ptr,0x10
+	vperm		$in0,$in0,$in1,$key	# align [and byte swap in LE]
+	li		$cnt,8
+	vxor		$zero,$zero,$zero
+	mtctr		$cnt
+
+	?lvsr		$outperm,0,$out
+	vspltisb	$outmask,-1
+	lvx		$outhead,0,$out
+	?vperm		$outmask,$zero,$outmask,$outperm
+
+	blt		Loop128
+	addi		$inp,$inp,8
+	beq		L192
+	addi		$inp,$inp,8
+	b		L256
+
+.align	4
+Loop128:
+	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
+	vsldoi		$tmp,$zero,$in0,12	# >>32
+	 vperm		$outtail,$in0,$in0,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	vcipherlast	$key,$key,$rcon
+	 stvx		$stage,0,$out
+	 addi		$out,$out,16
+
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	 vadduwm	$rcon,$rcon,$rcon
+	vxor		$in0,$in0,$key
+	bdnz		Loop128
+
+	lvx		$rcon,0,$ptr		# last two round keys
+
+	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
+	vsldoi		$tmp,$zero,$in0,12	# >>32
+	 vperm		$outtail,$in0,$in0,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	vcipherlast	$key,$key,$rcon
+	 stvx		$stage,0,$out
+	 addi		$out,$out,16
+
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	 vadduwm	$rcon,$rcon,$rcon
+	vxor		$in0,$in0,$key
+
+	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
+	vsldoi		$tmp,$zero,$in0,12	# >>32
+	 vperm		$outtail,$in0,$in0,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	vcipherlast	$key,$key,$rcon
+	 stvx		$stage,0,$out
+	 addi		$out,$out,16
+
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	vxor		$in0,$in0,$key
+	 vperm		$outtail,$in0,$in0,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	 stvx		$stage,0,$out
+
+	addi		$inp,$out,15		# 15 is not typo
+	addi		$out,$out,0x50
+
+	li		$rounds,10
+	b		Ldone
+
+.align	4
+L192:
+	lvx		$tmp,0,$inp
+	li		$cnt,4
+	 vperm		$outtail,$in0,$in0,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	 stvx		$stage,0,$out
+	 addi		$out,$out,16
+	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
+	vspltisb	$key,8			# borrow $key
+	mtctr		$cnt
+	vsububm		$mask,$mask,$key	# adjust the mask
+
+Loop192:
+	vperm		$key,$in1,$in1,$mask	# roate-n-splat
+	vsldoi		$tmp,$zero,$in0,12	# >>32
+	vcipherlast	$key,$key,$rcon
+
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+
+	 vsldoi		$stage,$zero,$in1,8
+	vspltw		$tmp,$in0,3
+	vxor		$tmp,$tmp,$in1
+	vsldoi		$in1,$zero,$in1,12	# >>32
+	 vadduwm	$rcon,$rcon,$rcon
+	vxor		$in1,$in1,$tmp
+	vxor		$in0,$in0,$key
+	vxor		$in1,$in1,$key
+	 vsldoi		$stage,$stage,$in0,8
+
+	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
+	vsldoi		$tmp,$zero,$in0,12	# >>32
+	 vperm		$outtail,$stage,$stage,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	vcipherlast	$key,$key,$rcon
+	 stvx		$stage,0,$out
+	 addi		$out,$out,16
+
+	 vsldoi		$stage,$in0,$in1,8
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	 vperm		$outtail,$stage,$stage,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	 stvx		$stage,0,$out
+	 addi		$out,$out,16
+
+	vspltw		$tmp,$in0,3
+	vxor		$tmp,$tmp,$in1
+	vsldoi		$in1,$zero,$in1,12	# >>32
+	 vadduwm	$rcon,$rcon,$rcon
+	vxor		$in1,$in1,$tmp
+	vxor		$in0,$in0,$key
+	vxor		$in1,$in1,$key
+	 vperm		$outtail,$in0,$in0,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	 stvx		$stage,0,$out
+	 addi		$inp,$out,15		# 15 is not typo
+	 addi		$out,$out,16
+	bdnz		Loop192
+
+	li		$rounds,12
+	addi		$out,$out,0x20
+	b		Ldone
+
+.align	4
+L256:
+	lvx		$tmp,0,$inp
+	li		$cnt,7
+	li		$rounds,14
+	 vperm		$outtail,$in0,$in0,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	 stvx		$stage,0,$out
+	 addi		$out,$out,16
+	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
+	mtctr		$cnt
+
+Loop256:
+	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
+	vsldoi		$tmp,$zero,$in0,12	# >>32
+	 vperm		$outtail,$in1,$in1,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	vcipherlast	$key,$key,$rcon
+	 stvx		$stage,0,$out
+	 addi		$out,$out,16
+
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	 vadduwm	$rcon,$rcon,$rcon
+	vxor		$in0,$in0,$key
+	 vperm		$outtail,$in0,$in0,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	 stvx		$stage,0,$out
+	 addi		$inp,$out,15		# 15 is not typo
+	 addi		$out,$out,16
+	bdz		Ldone
+
+	vspltw		$key,$in0,3		# just splat
+	vsldoi		$tmp,$zero,$in1,12	# >>32
+	vsbox		$key,$key
+
+	vxor		$in1,$in1,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in1,$in1,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in1,$in1,$tmp
+
+	vxor		$in1,$in1,$key
+	b		Loop256
+
+.align	4
+Ldone:
+	lvx		$in1,0,$inp		# redundant in aligned case
+	vsel		$in1,$outhead,$in1,$outmask
+	stvx		$in1,0,$inp
+	li		$ptr,0
+	mtspr		256,$vrsave
+	stw		$rounds,0($out)
+
+Lenc_key_abort:
+	mr		r3,$ptr
+	blr
+	.long		0
+	.byte		0,12,0x14,1,0,0,3,0
+	.long		0
+.size	.${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
+
+.globl	.${prefix}_set_decrypt_key
+	$STU		$sp,-$FRAME($sp)
+	mflr		r10
+	$PUSH		r10,$FRAME+$LRSAVE($sp)
+	bl		Lset_encrypt_key
+	mtlr		r10
+
+	cmpwi		r3,0
+	bne-		Ldec_key_abort
+
+	slwi		$cnt,$rounds,4
+	subi		$inp,$out,240		# first round key
+	srwi		$rounds,$rounds,1
+	add		$out,$inp,$cnt		# last round key
+	mtctr		$rounds
+
+Ldeckey:
+	lwz		r0, 0($inp)
+	lwz		r6, 4($inp)
+	lwz		r7, 8($inp)
+	lwz		r8, 12($inp)
+	addi		$inp,$inp,16
+	lwz		r9, 0($out)
+	lwz		r10,4($out)
+	lwz		r11,8($out)
+	lwz		r12,12($out)
+	stw		r0, 0($out)
+	stw		r6, 4($out)
+	stw		r7, 8($out)
+	stw		r8, 12($out)
+	subi		$out,$out,16
+	stw		r9, -16($inp)
+	stw		r10,-12($inp)
+	stw		r11,-8($inp)
+	stw		r12,-4($inp)
+	bdnz		Ldeckey
+
+	xor		r3,r3,r3		# return value
+Ldec_key_abort:
+	addi		$sp,$sp,$FRAME
+	blr
+	.long		0
+	.byte		0,12,4,1,0x80,0,3,0
+	.long		0
+.size	.${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
+___
+}}}
+#########################################################################
+{{{	# Single block en- and decrypt procedures			#
+sub gen_block () {
+my $dir = shift;
+my $n   = $dir eq "de" ? "n" : "";
+my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
+
+$code.=<<___;
+.globl	.${prefix}_${dir}crypt
+	lwz		$rounds,240($key)
+	lis		r0,0xfc00
+	mfspr		$vrsave,256
+	li		$idx,15			# 15 is not typo
+	mtspr		256,r0
+
+	lvx		v0,0,$inp
+	neg		r11,$out
+	lvx		v1,$idx,$inp
+	lvsl		v2,0,$inp		# inpperm
+	le?vspltisb	v4,0x0f
+	?lvsl		v3,0,r11		# outperm
+	le?vxor		v2,v2,v4
+	li		$idx,16
+	vperm		v0,v0,v1,v2		# align [and byte swap in LE]
+	lvx		v1,0,$key
+	?lvsl		v5,0,$key		# keyperm
+	srwi		$rounds,$rounds,1
+	lvx		v2,$idx,$key
+	addi		$idx,$idx,16
+	subi		$rounds,$rounds,1
+	?vperm		v1,v1,v2,v5		# align round key
+
+	vxor		v0,v0,v1
+	lvx		v1,$idx,$key
+	addi		$idx,$idx,16
+	mtctr		$rounds
+
+Loop_${dir}c:
+	?vperm		v2,v2,v1,v5
+	v${n}cipher	v0,v0,v2
+	lvx		v2,$idx,$key
+	addi		$idx,$idx,16
+	?vperm		v1,v1,v2,v5
+	v${n}cipher	v0,v0,v1
+	lvx		v1,$idx,$key
+	addi		$idx,$idx,16
+	bdnz		Loop_${dir}c
+
+	?vperm		v2,v2,v1,v5
+	v${n}cipher	v0,v0,v2
+	lvx		v2,$idx,$key
+	?vperm		v1,v1,v2,v5
+	v${n}cipherlast	v0,v0,v1
+
+	vspltisb	v2,-1
+	vxor		v1,v1,v1
+	li		$idx,15			# 15 is not typo
+	?vperm		v2,v1,v2,v3		# outmask
+	le?vxor		v3,v3,v4
+	lvx		v1,0,$out		# outhead
+	vperm		v0,v0,v0,v3		# rotate [and byte swap in LE]
+	vsel		v1,v1,v0,v2
+	lvx		v4,$idx,$out
+	stvx		v1,0,$out
+	vsel		v0,v0,v4,v2
+	stvx		v0,$idx,$out
+
+	mtspr		256,$vrsave
+	blr
+	.long		0
+	.byte		0,12,0x14,0,0,0,3,0
+	.long		0
+.size	.${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
+___
+}
+&gen_block("en");
+&gen_block("de");
+}}}
+
+my $consts=1;
+foreach(split("\n",$code)) {
+        s/\`([^\`]*)\`/eval($1)/geo;
+
+	# constants table endian-specific conversion
+	if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
+	    my $conv=$3;
+	    my @bytes=();
+
+	    # convert to endian-agnostic format
+	    if ($1 eq "long") {
+	      foreach (split(/,\s*/,$2)) {
+		my $l = /^0/?oct:int;
+		push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
+	      }
+	    } else {
+		@bytes = map(/^0/?oct:int,split(/,\s*/,$2));
+	    }
+
+	    # little-endian conversion
+	    if ($flavour =~ /le$/o) {
+		SWITCH: for($conv)  {
+		    /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
+		    /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
+		}
+	    }
+
+	    #emit
+	    print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
+	    next;
+	}
+	$consts=0 if (m/Lconsts:/o);	# end of table
+
+	# instructions prefixed with '?' are endian-specific and need
+	# to be adjusted accordingly...
+	if ($flavour =~ /le$/o) {	# little-endian
+	    s/le\?//o		or
+	    s/be\?/#be#/o	or
+	    s/\?lvsr/lvsl/o	or
+	    s/\?lvsl/lvsr/o	or
+	    s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
+	    s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
+	    s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
+	} else {			# big-endian
+	    s/le\?/#le#/o	or
+	    s/be\?//o		or
+	    s/\?([a-z]+)/$1/o;
+	}
+
+        print $_,"\n";
+}
+
+close STDOUT;
diff --git a/arch/powerpc/crypto/chacha-p10-glue.c b/arch/powerpc/crypto/chacha-p10-glue.c
new file mode 100644
index 0000000000..74fb86b0d2
--- /dev/null
+++ b/arch/powerpc/crypto/chacha-p10-glue.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerPC P10 (ppc64le) accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright 2023- IBM Corp. All rights reserved.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/sizes.h>
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+
+asmlinkage void chacha_p10le_8x(u32 *state, u8 *dst, const u8 *src,
+				unsigned int len, int nrounds);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10);
+
+static void vsx_begin(void)
+{
+	preempt_disable();
+	enable_kernel_vsx();
+}
+
+static void vsx_end(void)
+{
+	disable_kernel_vsx();
+	preempt_enable();
+}
+
+static void chacha_p10_do_8x(u32 *state, u8 *dst, const u8 *src,
+			     unsigned int bytes, int nrounds)
+{
+	unsigned int l = bytes & ~0x0FF;
+
+	if (l > 0) {
+		chacha_p10le_8x(state, dst, src, l, nrounds);
+		bytes -= l;
+		src += l;
+		dst += l;
+		state[12] += l / CHACHA_BLOCK_SIZE;
+	}
+
+	if (bytes > 0)
+		chacha_crypt_generic(state, dst, src, bytes, nrounds);
+}
+
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+	hchacha_block_generic(state, stream, nrounds);
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+	chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+		       int nrounds)
+{
+	if (!static_branch_likely(&have_p10) || bytes <= CHACHA_BLOCK_SIZE ||
+	    !crypto_simd_usable())
+		return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+	do {
+		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
+
+		vsx_begin();
+		chacha_p10_do_8x(state, dst, src, todo, nrounds);
+		vsx_end();
+
+		bytes -= todo;
+		src += todo;
+		dst += todo;
+	} while (bytes);
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+static int chacha_p10_stream_xor(struct skcipher_request *req,
+				 const struct chacha_ctx *ctx, const u8 *iv)
+{
+	struct skcipher_walk walk;
+	u32 state[16];
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+	if (err)
+		return err;
+
+	chacha_init_generic(state, ctx->key, iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+
+		if (nbytes < walk.total)
+			nbytes = rounddown(nbytes, walk.stride);
+
+		if (!crypto_simd_usable()) {
+			chacha_crypt_generic(state, walk.dst.virt.addr,
+					     walk.src.virt.addr, nbytes,
+					     ctx->nrounds);
+		} else {
+			vsx_begin();
+			chacha_p10_do_8x(state, walk.dst.virt.addr,
+				      walk.src.virt.addr, nbytes, ctx->nrounds);
+			vsx_end();
+		}
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+static int chacha_p10(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	return chacha_p10_stream_xor(req, ctx, req->iv);
+}
+
+static int xchacha_p10(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct chacha_ctx subctx;
+	u32 state[16];
+	u8 real_iv[16];
+
+	chacha_init_generic(state, ctx->key, req->iv);
+	hchacha_block_arch(state, subctx.key, ctx->nrounds);
+	subctx.nrounds = ctx->nrounds;
+
+	memcpy(&real_iv[0], req->iv + 24, 8);
+	memcpy(&real_iv[8], req->iv + 16, 8);
+	return chacha_p10_stream_xor(req, &subctx, real_iv);
+}
+
+static struct skcipher_alg algs[] = {
+	{
+		.base.cra_name		= "chacha20",
+		.base.cra_driver_name	= "chacha20-p10",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= CHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha20_setkey,
+		.encrypt		= chacha_p10,
+		.decrypt		= chacha_p10,
+	}, {
+		.base.cra_name		= "xchacha20",
+		.base.cra_driver_name	= "xchacha20-p10",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha20_setkey,
+		.encrypt		= xchacha_p10,
+		.decrypt		= xchacha_p10,
+	}, {
+		.base.cra_name		= "xchacha12",
+		.base.cra_driver_name	= "xchacha12-p10",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha12_setkey,
+		.encrypt		= xchacha_p10,
+		.decrypt		= xchacha_p10,
+	}
+};
+
+static int __init chacha_p10_init(void)
+{
+	static_branch_enable(&have_p10);
+
+	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit chacha_p10_exit(void)
+{
+	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+module_cpu_feature_match(PPC_MODULE_FEATURE_P10, chacha_p10_init);
+module_exit(chacha_p10_exit);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (P10 accelerated)");
+MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-p10");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-p10");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-p10");
diff --git a/arch/powerpc/crypto/chacha-p10le-8x.S b/arch/powerpc/crypto/chacha-p10le-8x.S
new file mode 100644
index 0000000000..17bedb66b8
--- /dev/null
+++ b/arch/powerpc/crypto/chacha-p10le-8x.S
@@ -0,0 +1,842 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#
+# Accelerated chacha20 implementation for ppc64le.
+#
+# Copyright 2023- IBM Corp. All rights reserved
+#
+#===================================================================================
+# Written by Danny Tsen <dtsen@us.ibm.com>
+#
+# chacha_p10le_8x(u32 *state, byte *dst, const byte *src,
+#				 size_t len, int nrounds);
+#
+# do rounds,  8 quarter rounds
+# 1.  a += b; d ^= a; d <<<= 16;
+# 2.  c += d; b ^= c; b <<<= 12;
+# 3.  a += b; d ^= a; d <<<= 8;
+# 4.  c += d; b ^= c; b <<<= 7
+#
+# row1 = (row1 + row2),  row4 = row1 xor row4,  row4 rotate each word by 16
+# row3 = (row3 + row4),  row2 = row3 xor row2,  row2 rotate each word by 12
+# row1 = (row1 + row2), row4 = row1 xor row4,  row4 rotate each word by 8
+# row3 = (row3 + row4), row2 = row3 xor row2,  row2 rotate each word by 7
+#
+# 4 blocks (a b c d)
+#
+# a0 b0 c0 d0
+# a1 b1 c1 d1
+# ...
+# a4 b4 c4 d4
+# ...
+# a8 b8 c8 d8
+# ...
+# a12 b12 c12 d12
+# a13 ...
+# a14 ...
+# a15 b15 c15 d15
+#
+# Column round (v0, v4,  v8, v12, v1, v5,  v9, v13, v2, v6, v10, v14, v3, v7, v11, v15)
+# Diagnal round (v0, v5, v10, v15, v1, v6, v11, v12, v2, v7,  v8, v13, v3, v4,  v9, v14)
+#
+
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/asm-compat.h>
+#include <linux/linkage.h>
+
+.machine	"any"
+.text
+
+.macro	SAVE_GPR GPR OFFSET FRAME
+	std	\GPR,\OFFSET(\FRAME)
+.endm
+
+.macro	SAVE_VRS VRS OFFSET FRAME
+	li	16, \OFFSET
+	stvx	\VRS, 16, \FRAME
+.endm
+
+.macro	SAVE_VSX VSX OFFSET FRAME
+	li	16, \OFFSET
+	stxvx	\VSX, 16, \FRAME
+.endm
+
+.macro	RESTORE_GPR GPR OFFSET FRAME
+	ld	\GPR,\OFFSET(\FRAME)
+.endm
+
+.macro	RESTORE_VRS VRS OFFSET FRAME
+	li	16, \OFFSET
+	lvx	\VRS, 16, \FRAME
+.endm
+
+.macro	RESTORE_VSX VSX OFFSET FRAME
+	li	16, \OFFSET
+	lxvx	\VSX, 16, \FRAME
+.endm
+
+.macro SAVE_REGS
+	mflr 0
+	std 0, 16(1)
+	stdu 1,-752(1)
+
+	SAVE_GPR 14, 112, 1
+	SAVE_GPR 15, 120, 1
+	SAVE_GPR 16, 128, 1
+	SAVE_GPR 17, 136, 1
+	SAVE_GPR 18, 144, 1
+	SAVE_GPR 19, 152, 1
+	SAVE_GPR 20, 160, 1
+	SAVE_GPR 21, 168, 1
+	SAVE_GPR 22, 176, 1
+	SAVE_GPR 23, 184, 1
+	SAVE_GPR 24, 192, 1
+	SAVE_GPR 25, 200, 1
+	SAVE_GPR 26, 208, 1
+	SAVE_GPR 27, 216, 1
+	SAVE_GPR 28, 224, 1
+	SAVE_GPR 29, 232, 1
+	SAVE_GPR 30, 240, 1
+	SAVE_GPR 31, 248, 1
+
+	addi	9, 1, 256
+	SAVE_VRS 20, 0, 9
+	SAVE_VRS 21, 16, 9
+	SAVE_VRS 22, 32, 9
+	SAVE_VRS 23, 48, 9
+	SAVE_VRS 24, 64, 9
+	SAVE_VRS 25, 80, 9
+	SAVE_VRS 26, 96, 9
+	SAVE_VRS 27, 112, 9
+	SAVE_VRS 28, 128, 9
+	SAVE_VRS 29, 144, 9
+	SAVE_VRS 30, 160, 9
+	SAVE_VRS 31, 176, 9
+
+	SAVE_VSX 14, 192, 9
+	SAVE_VSX 15, 208, 9
+	SAVE_VSX 16, 224, 9
+	SAVE_VSX 17, 240, 9
+	SAVE_VSX 18, 256, 9
+	SAVE_VSX 19, 272, 9
+	SAVE_VSX 20, 288, 9
+	SAVE_VSX 21, 304, 9
+	SAVE_VSX 22, 320, 9
+	SAVE_VSX 23, 336, 9
+	SAVE_VSX 24, 352, 9
+	SAVE_VSX 25, 368, 9
+	SAVE_VSX 26, 384, 9
+	SAVE_VSX 27, 400, 9
+	SAVE_VSX 28, 416, 9
+	SAVE_VSX 29, 432, 9
+	SAVE_VSX 30, 448, 9
+	SAVE_VSX 31, 464, 9
+.endm # SAVE_REGS
+
+.macro RESTORE_REGS
+	addi	9, 1, 256
+	RESTORE_VRS 20, 0, 9
+	RESTORE_VRS 21, 16, 9
+	RESTORE_VRS 22, 32, 9
+	RESTORE_VRS 23, 48, 9
+	RESTORE_VRS 24, 64, 9
+	RESTORE_VRS 25, 80, 9
+	RESTORE_VRS 26, 96, 9
+	RESTORE_VRS 27, 112, 9
+	RESTORE_VRS 28, 128, 9
+	RESTORE_VRS 29, 144, 9
+	RESTORE_VRS 30, 160, 9
+	RESTORE_VRS 31, 176, 9
+
+	RESTORE_VSX 14, 192, 9
+	RESTORE_VSX 15, 208, 9
+	RESTORE_VSX 16, 224, 9
+	RESTORE_VSX 17, 240, 9
+	RESTORE_VSX 18, 256, 9
+	RESTORE_VSX 19, 272, 9
+	RESTORE_VSX 20, 288, 9
+	RESTORE_VSX 21, 304, 9
+	RESTORE_VSX 22, 320, 9
+	RESTORE_VSX 23, 336, 9
+	RESTORE_VSX 24, 352, 9
+	RESTORE_VSX 25, 368, 9
+	RESTORE_VSX 26, 384, 9
+	RESTORE_VSX 27, 400, 9
+	RESTORE_VSX 28, 416, 9
+	RESTORE_VSX 29, 432, 9
+	RESTORE_VSX 30, 448, 9
+	RESTORE_VSX 31, 464, 9
+
+	RESTORE_GPR 14, 112, 1
+	RESTORE_GPR 15, 120, 1
+	RESTORE_GPR 16, 128, 1
+	RESTORE_GPR 17, 136, 1
+	RESTORE_GPR 18, 144, 1
+	RESTORE_GPR 19, 152, 1
+	RESTORE_GPR 20, 160, 1
+	RESTORE_GPR 21, 168, 1
+	RESTORE_GPR 22, 176, 1
+	RESTORE_GPR 23, 184, 1
+	RESTORE_GPR 24, 192, 1
+	RESTORE_GPR 25, 200, 1
+	RESTORE_GPR 26, 208, 1
+	RESTORE_GPR 27, 216, 1
+	RESTORE_GPR 28, 224, 1
+	RESTORE_GPR 29, 232, 1
+	RESTORE_GPR 30, 240, 1
+	RESTORE_GPR 31, 248, 1
+
+	addi    1, 1, 752
+	ld 0, 16(1)
+	mtlr 0
+.endm # RESTORE_REGS
+
+.macro QT_loop_8x
+	# QR(v0, v4,  v8, v12, v1, v5,  v9, v13, v2, v6, v10, v14, v3, v7, v11, v15)
+	xxlor	0, 32+25, 32+25
+	xxlor	32+25, 20, 20
+	vadduwm 0, 0, 4
+	vadduwm 1, 1, 5
+	vadduwm 2, 2, 6
+	vadduwm 3, 3, 7
+	  vadduwm 16, 16, 20
+	  vadduwm 17, 17, 21
+	  vadduwm 18, 18, 22
+	  vadduwm 19, 19, 23
+
+	  vpermxor 12, 12, 0, 25
+	  vpermxor 13, 13, 1, 25
+	  vpermxor 14, 14, 2, 25
+	  vpermxor 15, 15, 3, 25
+	  vpermxor 28, 28, 16, 25
+	  vpermxor 29, 29, 17, 25
+	  vpermxor 30, 30, 18, 25
+	  vpermxor 31, 31, 19, 25
+	xxlor	32+25, 0, 0
+	vadduwm 8, 8, 12
+	vadduwm 9, 9, 13
+	vadduwm 10, 10, 14
+	vadduwm 11, 11, 15
+	  vadduwm 24, 24, 28
+	  vadduwm 25, 25, 29
+	  vadduwm 26, 26, 30
+	  vadduwm 27, 27, 31
+	vxor 4, 4, 8
+	vxor 5, 5, 9
+	vxor 6, 6, 10
+	vxor 7, 7, 11
+	  vxor 20, 20, 24
+	  vxor 21, 21, 25
+	  vxor 22, 22, 26
+	  vxor 23, 23, 27
+
+	xxlor	0, 32+25, 32+25
+	xxlor	32+25, 21, 21
+	vrlw 4, 4, 25  #
+	vrlw 5, 5, 25
+	vrlw 6, 6, 25
+	vrlw 7, 7, 25
+	  vrlw 20, 20, 25  #
+	  vrlw 21, 21, 25
+	  vrlw 22, 22, 25
+	  vrlw 23, 23, 25
+	xxlor	32+25, 0, 0
+	vadduwm 0, 0, 4
+	vadduwm 1, 1, 5
+	vadduwm 2, 2, 6
+	vadduwm 3, 3, 7
+	  vadduwm 16, 16, 20
+	  vadduwm 17, 17, 21
+	  vadduwm 18, 18, 22
+	  vadduwm 19, 19, 23
+
+	xxlor	0, 32+25, 32+25
+	xxlor	32+25, 22, 22
+	  vpermxor 12, 12, 0, 25
+	  vpermxor 13, 13, 1, 25
+	  vpermxor 14, 14, 2, 25
+	  vpermxor 15, 15, 3, 25
+	  vpermxor 28, 28, 16, 25
+	  vpermxor 29, 29, 17, 25
+	  vpermxor 30, 30, 18, 25
+	  vpermxor 31, 31, 19, 25
+	xxlor	32+25, 0, 0
+	vadduwm 8, 8, 12
+	vadduwm 9, 9, 13
+	vadduwm 10, 10, 14
+	vadduwm 11, 11, 15
+	  vadduwm 24, 24, 28
+	  vadduwm 25, 25, 29
+	  vadduwm 26, 26, 30
+	  vadduwm 27, 27, 31
+	xxlor	0, 32+28, 32+28
+	xxlor	32+28, 23, 23
+	vxor 4, 4, 8
+	vxor 5, 5, 9
+	vxor 6, 6, 10
+	vxor 7, 7, 11
+	  vxor 20, 20, 24
+	  vxor 21, 21, 25
+	  vxor 22, 22, 26
+	  vxor 23, 23, 27
+	vrlw 4, 4, 28  #
+	vrlw 5, 5, 28
+	vrlw 6, 6, 28
+	vrlw 7, 7, 28
+	  vrlw 20, 20, 28  #
+	  vrlw 21, 21, 28
+	  vrlw 22, 22, 28
+	  vrlw 23, 23, 28
+	xxlor	32+28, 0, 0
+
+	# QR(v0, v5, v10, v15, v1, v6, v11, v12, v2, v7,  v8, v13, v3, v4,  v9, v14)
+	xxlor	0, 32+25, 32+25
+	xxlor	32+25, 20, 20
+	vadduwm 0, 0, 5
+	vadduwm 1, 1, 6
+	vadduwm 2, 2, 7
+	vadduwm 3, 3, 4
+	  vadduwm 16, 16, 21
+	  vadduwm 17, 17, 22
+	  vadduwm 18, 18, 23
+	  vadduwm 19, 19, 20
+
+	  vpermxor 15, 15, 0, 25
+	  vpermxor 12, 12, 1, 25
+	  vpermxor 13, 13, 2, 25
+	  vpermxor 14, 14, 3, 25
+	  vpermxor 31, 31, 16, 25
+	  vpermxor 28, 28, 17, 25
+	  vpermxor 29, 29, 18, 25
+	  vpermxor 30, 30, 19, 25
+
+	xxlor	32+25, 0, 0
+	vadduwm 10, 10, 15
+	vadduwm 11, 11, 12
+	vadduwm 8, 8, 13
+	vadduwm 9, 9, 14
+	  vadduwm 26, 26, 31
+	  vadduwm 27, 27, 28
+	  vadduwm 24, 24, 29
+	  vadduwm 25, 25, 30
+	vxor 5, 5, 10
+	vxor 6, 6, 11
+	vxor 7, 7, 8
+	vxor 4, 4, 9
+	  vxor 21, 21, 26
+	  vxor 22, 22, 27
+	  vxor 23, 23, 24
+	  vxor 20, 20, 25
+
+	xxlor	0, 32+25, 32+25
+	xxlor	32+25, 21, 21
+	vrlw 5, 5, 25
+	vrlw 6, 6, 25
+	vrlw 7, 7, 25
+	vrlw 4, 4, 25
+	  vrlw 21, 21, 25
+	  vrlw 22, 22, 25
+	  vrlw 23, 23, 25
+	  vrlw 20, 20, 25
+	xxlor	32+25, 0, 0
+
+	vadduwm 0, 0, 5
+	vadduwm 1, 1, 6
+	vadduwm 2, 2, 7
+	vadduwm 3, 3, 4
+	  vadduwm 16, 16, 21
+	  vadduwm 17, 17, 22
+	  vadduwm 18, 18, 23
+	  vadduwm 19, 19, 20
+
+	xxlor	0, 32+25, 32+25
+	xxlor	32+25, 22, 22
+	  vpermxor 15, 15, 0, 25
+	  vpermxor 12, 12, 1, 25
+	  vpermxor 13, 13, 2, 25
+	  vpermxor 14, 14, 3, 25
+	  vpermxor 31, 31, 16, 25
+	  vpermxor 28, 28, 17, 25
+	  vpermxor 29, 29, 18, 25
+	  vpermxor 30, 30, 19, 25
+	xxlor	32+25, 0, 0
+
+	vadduwm 10, 10, 15
+	vadduwm 11, 11, 12
+	vadduwm 8, 8, 13
+	vadduwm 9, 9, 14
+	  vadduwm 26, 26, 31
+	  vadduwm 27, 27, 28
+	  vadduwm 24, 24, 29
+	  vadduwm 25, 25, 30
+
+	xxlor	0, 32+28, 32+28
+	xxlor	32+28, 23, 23
+	vxor 5, 5, 10
+	vxor 6, 6, 11
+	vxor 7, 7, 8
+	vxor 4, 4, 9
+	  vxor 21, 21, 26
+	  vxor 22, 22, 27
+	  vxor 23, 23, 24
+	  vxor 20, 20, 25
+	vrlw 5, 5, 28
+	vrlw 6, 6, 28
+	vrlw 7, 7, 28
+	vrlw 4, 4, 28
+	  vrlw 21, 21, 28
+	  vrlw 22, 22, 28
+	  vrlw 23, 23, 28
+	  vrlw 20, 20, 28
+	xxlor	32+28, 0, 0
+.endm
+
+.macro QT_loop_4x
+	# QR(v0, v4,  v8, v12, v1, v5,  v9, v13, v2, v6, v10, v14, v3, v7, v11, v15)
+	vadduwm 0, 0, 4
+	vadduwm 1, 1, 5
+	vadduwm 2, 2, 6
+	vadduwm 3, 3, 7
+	  vpermxor 12, 12, 0, 20
+	  vpermxor 13, 13, 1, 20
+	  vpermxor 14, 14, 2, 20
+	  vpermxor 15, 15, 3, 20
+	vadduwm 8, 8, 12
+	vadduwm 9, 9, 13
+	vadduwm 10, 10, 14
+	vadduwm 11, 11, 15
+	vxor 4, 4, 8
+	vxor 5, 5, 9
+	vxor 6, 6, 10
+	vxor 7, 7, 11
+	vrlw 4, 4, 21
+	vrlw 5, 5, 21
+	vrlw 6, 6, 21
+	vrlw 7, 7, 21
+	vadduwm 0, 0, 4
+	vadduwm 1, 1, 5
+	vadduwm 2, 2, 6
+	vadduwm 3, 3, 7
+	  vpermxor 12, 12, 0, 22
+	  vpermxor 13, 13, 1, 22
+	  vpermxor 14, 14, 2, 22
+	  vpermxor 15, 15, 3, 22
+	vadduwm 8, 8, 12
+	vadduwm 9, 9, 13
+	vadduwm 10, 10, 14
+	vadduwm 11, 11, 15
+	vxor 4, 4, 8
+	vxor 5, 5, 9
+	vxor 6, 6, 10
+	vxor 7, 7, 11
+	vrlw 4, 4, 23
+	vrlw 5, 5, 23
+	vrlw 6, 6, 23
+	vrlw 7, 7, 23
+
+	# QR(v0, v5, v10, v15, v1, v6, v11, v12, v2, v7,  v8, v13, v3, v4,  v9, v14)
+	vadduwm 0, 0, 5
+	vadduwm 1, 1, 6
+	vadduwm 2, 2, 7
+	vadduwm 3, 3, 4
+	  vpermxor 15, 15, 0, 20
+	  vpermxor 12, 12, 1, 20
+	  vpermxor 13, 13, 2, 20
+	  vpermxor 14, 14, 3, 20
+	vadduwm 10, 10, 15
+	vadduwm 11, 11, 12
+	vadduwm 8, 8, 13
+	vadduwm 9, 9, 14
+	vxor 5, 5, 10
+	vxor 6, 6, 11
+	vxor 7, 7, 8
+	vxor 4, 4, 9
+	vrlw 5, 5, 21
+	vrlw 6, 6, 21
+	vrlw 7, 7, 21
+	vrlw 4, 4, 21
+	vadduwm 0, 0, 5
+	vadduwm 1, 1, 6
+	vadduwm 2, 2, 7
+	vadduwm 3, 3, 4
+	  vpermxor 15, 15, 0, 22
+	  vpermxor 12, 12, 1, 22
+	  vpermxor 13, 13, 2, 22
+	  vpermxor 14, 14, 3, 22
+	vadduwm 10, 10, 15
+	vadduwm 11, 11, 12
+	vadduwm 8, 8, 13
+	vadduwm 9, 9, 14
+	vxor 5, 5, 10
+	vxor 6, 6, 11
+	vxor 7, 7, 8
+	vxor 4, 4, 9
+	vrlw 5, 5, 23
+	vrlw 6, 6, 23
+	vrlw 7, 7, 23
+	vrlw 4, 4, 23
+.endm
+
+# Transpose
+.macro TP_4x a0 a1 a2 a3
+	xxmrghw  10, 32+\a0, 32+\a1	# a0, a1, b0, b1
+	xxmrghw  11, 32+\a2, 32+\a3	# a2, a3, b2, b3
+	xxmrglw  12, 32+\a0, 32+\a1	# c0, c1, d0, d1
+	xxmrglw  13, 32+\a2, 32+\a3	# c2, c3, d2, d3
+	xxpermdi	32+\a0, 10, 11, 0	# a0, a1, a2, a3
+	xxpermdi	32+\a1, 10, 11, 3	# b0, b1, b2, b3
+	xxpermdi	32+\a2, 12, 13, 0	# c0, c1, c2, c3
+	xxpermdi	32+\a3, 12, 13, 3	# d0, d1, d2, d3
+.endm
+
+# key stream = working state + state
+.macro Add_state S
+	vadduwm \S+0, \S+0, 16-\S
+	vadduwm \S+4, \S+4, 17-\S
+	vadduwm \S+8, \S+8, 18-\S
+	vadduwm \S+12, \S+12, 19-\S
+
+	vadduwm \S+1, \S+1, 16-\S
+	vadduwm \S+5, \S+5, 17-\S
+	vadduwm \S+9, \S+9, 18-\S
+	vadduwm \S+13, \S+13, 19-\S
+
+	vadduwm \S+2, \S+2, 16-\S
+	vadduwm \S+6, \S+6, 17-\S
+	vadduwm \S+10, \S+10, 18-\S
+	vadduwm \S+14, \S+14, 19-\S
+
+	vadduwm	\S+3, \S+3, 16-\S
+	vadduwm	\S+7, \S+7, 17-\S
+	vadduwm	\S+11, \S+11, 18-\S
+	vadduwm	\S+15, \S+15, 19-\S
+.endm
+
+#
+# write 256 bytes
+#
+.macro Write_256 S
+	add 9, 14, 5
+	add 16, 14, 4
+	lxvw4x 0, 0, 9
+	lxvw4x 1, 17, 9
+	lxvw4x 2, 18, 9
+	lxvw4x 3, 19, 9
+	lxvw4x 4, 20, 9
+	lxvw4x 5, 21, 9
+	lxvw4x 6, 22, 9
+	lxvw4x 7, 23, 9
+	lxvw4x 8, 24, 9
+	lxvw4x 9, 25, 9
+	lxvw4x 10, 26, 9
+	lxvw4x 11, 27, 9
+	lxvw4x 12, 28, 9
+	lxvw4x 13, 29, 9
+	lxvw4x 14, 30, 9
+	lxvw4x 15, 31, 9
+
+	xxlxor \S+32, \S+32, 0
+	xxlxor \S+36, \S+36, 1
+	xxlxor \S+40, \S+40, 2
+	xxlxor \S+44, \S+44, 3
+	xxlxor \S+33, \S+33, 4
+	xxlxor \S+37, \S+37, 5
+	xxlxor \S+41, \S+41, 6
+	xxlxor \S+45, \S+45, 7
+	xxlxor \S+34, \S+34, 8
+	xxlxor \S+38, \S+38, 9
+	xxlxor \S+42, \S+42, 10
+	xxlxor \S+46, \S+46, 11
+	xxlxor \S+35, \S+35, 12
+	xxlxor \S+39, \S+39, 13
+	xxlxor \S+43, \S+43, 14
+	xxlxor \S+47, \S+47, 15
+
+	stxvw4x \S+32, 0, 16
+	stxvw4x \S+36, 17, 16
+	stxvw4x \S+40, 18, 16
+	stxvw4x \S+44, 19, 16
+
+	stxvw4x \S+33, 20, 16
+	stxvw4x \S+37, 21, 16
+	stxvw4x \S+41, 22, 16
+	stxvw4x \S+45, 23, 16
+
+	stxvw4x \S+34, 24, 16
+	stxvw4x \S+38, 25, 16
+	stxvw4x \S+42, 26, 16
+	stxvw4x \S+46, 27, 16
+
+	stxvw4x \S+35, 28, 16
+	stxvw4x \S+39, 29, 16
+	stxvw4x \S+43, 30, 16
+	stxvw4x \S+47, 31, 16
+
+.endm
+
+#
+# chacha20_p10le_8x(u32 *state, byte *dst, const byte *src, size_t len, int nrounds);
+#
+SYM_FUNC_START(chacha_p10le_8x)
+.align 5
+	cmpdi	6, 0
+	ble	Out_no_chacha
+
+	SAVE_REGS
+
+	# r17 - r31 mainly for Write_256 macro.
+	li	17, 16
+	li	18, 32
+	li	19, 48
+	li	20, 64
+	li	21, 80
+	li	22, 96
+	li	23, 112
+	li	24, 128
+	li	25, 144
+	li	26, 160
+	li	27, 176
+	li	28, 192
+	li	29, 208
+	li	30, 224
+	li	31, 240
+
+	mr 15, 6			# len
+	li 14, 0			# offset to inp and outp
+
+        lxvw4x	48, 0, 3		#  vr16, constants
+	lxvw4x	49, 17, 3		#  vr17, key 1
+	lxvw4x	50, 18, 3		#  vr18, key 2
+	lxvw4x	51, 19, 3		#  vr19, counter, nonce
+
+	# create (0, 1, 2, 3) counters
+	vspltisw 0, 0
+	vspltisw 1, 1
+	vspltisw 2, 2
+	vspltisw 3, 3
+	vmrghw	4, 0, 1
+	vmrglw	5, 2, 3
+	vsldoi	30, 4, 5, 8		# vr30 counter, 4 (0, 1, 2, 3)
+
+	vspltisw 21, 12
+	vspltisw 23, 7
+
+	addis	11, 2, permx@toc@ha
+	addi	11, 11, permx@toc@l
+	lxvw4x	32+20, 0, 11
+	lxvw4x	32+22, 17, 11
+
+	sradi	8, 7, 1
+
+	mtctr 8
+
+	# save constants to vsx
+	xxlor	16, 48, 48
+	xxlor	17, 49, 49
+	xxlor	18, 50, 50
+	xxlor	19, 51, 51
+
+	vspltisw 25, 4
+	vspltisw 26, 8
+
+	xxlor	25, 32+26, 32+26
+	xxlor	24, 32+25, 32+25
+
+	vadduwm	31, 30, 25		# counter = (0, 1, 2, 3) + (4, 4, 4, 4)
+	xxlor	30, 32+30, 32+30
+	xxlor	31, 32+31, 32+31
+
+	xxlor	20, 32+20, 32+20
+	xxlor	21, 32+21, 32+21
+	xxlor	22, 32+22, 32+22
+	xxlor	23, 32+23, 32+23
+
+	cmpdi	6, 512
+	blt	Loop_last
+
+Loop_8x:
+	xxspltw  32+0, 16, 0
+	xxspltw  32+1, 16, 1
+	xxspltw  32+2, 16, 2
+	xxspltw  32+3, 16, 3
+
+	xxspltw  32+4, 17, 0
+	xxspltw  32+5, 17, 1
+	xxspltw  32+6, 17, 2
+	xxspltw  32+7, 17, 3
+	xxspltw  32+8, 18, 0
+	xxspltw  32+9, 18, 1
+	xxspltw  32+10, 18, 2
+	xxspltw  32+11, 18, 3
+	xxspltw  32+12, 19, 0
+	xxspltw  32+13, 19, 1
+	xxspltw  32+14, 19, 2
+	xxspltw  32+15, 19, 3
+	vadduwm	12, 12, 30	# increase counter
+
+	xxspltw  32+16, 16, 0
+	xxspltw  32+17, 16, 1
+	xxspltw  32+18, 16, 2
+	xxspltw  32+19, 16, 3
+
+	xxspltw  32+20, 17, 0
+	xxspltw  32+21, 17, 1
+	xxspltw  32+22, 17, 2
+	xxspltw  32+23, 17, 3
+	xxspltw  32+24, 18, 0
+	xxspltw  32+25, 18, 1
+	xxspltw  32+26, 18, 2
+	xxspltw  32+27, 18, 3
+	xxspltw  32+28, 19, 0
+	xxspltw  32+29, 19, 1
+	vadduwm	28, 28, 31	# increase counter
+	xxspltw  32+30, 19, 2
+	xxspltw  32+31, 19, 3
+
+.align 5
+quarter_loop_8x:
+	QT_loop_8x
+
+	bdnz	quarter_loop_8x
+
+	xxlor	0, 32+30, 32+30
+	xxlor	32+30, 30, 30
+	vadduwm	12, 12, 30
+	xxlor	32+30, 0, 0
+	TP_4x 0, 1, 2, 3
+	TP_4x 4, 5, 6, 7
+	TP_4x 8, 9, 10, 11
+	TP_4x 12, 13, 14, 15
+
+	xxlor	0, 48, 48
+	xxlor	1, 49, 49
+	xxlor	2, 50, 50
+	xxlor	3, 51, 51
+	xxlor	48, 16, 16
+	xxlor	49, 17, 17
+	xxlor	50, 18, 18
+	xxlor	51, 19, 19
+	Add_state 0
+	xxlor	48, 0, 0
+	xxlor	49, 1, 1
+	xxlor	50, 2, 2
+	xxlor	51, 3, 3
+	Write_256 0
+	addi	14, 14, 256	# offset +=256
+	addi	15, 15, -256	# len -=256
+
+	xxlor	5, 32+31, 32+31
+	xxlor	32+31, 31, 31
+	vadduwm	28, 28, 31
+	xxlor	32+31, 5, 5
+	TP_4x 16+0, 16+1, 16+2, 16+3
+	TP_4x 16+4, 16+5, 16+6, 16+7
+	TP_4x 16+8, 16+9, 16+10, 16+11
+	TP_4x 16+12, 16+13, 16+14, 16+15
+
+	xxlor	32, 16, 16
+	xxlor	33, 17, 17
+	xxlor	34, 18, 18
+	xxlor	35, 19, 19
+	Add_state 16
+	Write_256 16
+	addi	14, 14, 256	# offset +=256
+	addi	15, 15, -256	# len +=256
+
+	xxlor	32+24, 24, 24
+	xxlor	32+25, 25, 25
+	xxlor	32+30, 30, 30
+	vadduwm	30, 30, 25
+	vadduwm	31, 30, 24
+	xxlor	30, 32+30, 32+30
+	xxlor	31, 32+31, 32+31
+
+	cmpdi	15, 0
+	beq	Out_loop
+
+	cmpdi	15, 512
+	blt	Loop_last
+
+	mtctr 8
+	b Loop_8x
+
+Loop_last:
+        lxvw4x	48, 0, 3		#  vr16, constants
+	lxvw4x	49, 17, 3		#  vr17, key 1
+	lxvw4x	50, 18, 3		#  vr18, key 2
+	lxvw4x	51, 19, 3		#  vr19, counter, nonce
+
+	vspltisw 21, 12
+	vspltisw 23, 7
+	addis	11, 2, permx@toc@ha
+	addi	11, 11, permx@toc@l
+	lxvw4x	32+20, 0, 11
+	lxvw4x	32+22, 17, 11
+
+	sradi	8, 7, 1
+	mtctr 8
+
+Loop_4x:
+	vspltw  0, 16, 0
+	vspltw  1, 16, 1
+	vspltw  2, 16, 2
+	vspltw  3, 16, 3
+
+	vspltw  4, 17, 0
+	vspltw  5, 17, 1
+	vspltw  6, 17, 2
+	vspltw  7, 17, 3
+	vspltw  8, 18, 0
+	vspltw  9, 18, 1
+	vspltw  10, 18, 2
+	vspltw  11, 18, 3
+	vspltw  12, 19, 0
+	vadduwm	12, 12, 30	# increase counter
+	vspltw  13, 19, 1
+	vspltw  14, 19, 2
+	vspltw  15, 19, 3
+
+.align 5
+quarter_loop:
+	QT_loop_4x
+
+	bdnz	quarter_loop
+
+	vadduwm	12, 12, 30
+	TP_4x 0, 1, 2, 3
+	TP_4x 4, 5, 6, 7
+	TP_4x 8, 9, 10, 11
+	TP_4x 12, 13, 14, 15
+
+	Add_state 0
+	Write_256 0
+	addi	14, 14, 256	# offset += 256
+	addi	15, 15, -256	# len += 256
+
+	# Update state counter
+	vspltisw 25, 4
+	vadduwm	30, 30, 25
+
+	cmpdi	15, 0
+	beq	Out_loop
+	cmpdi	15, 256
+	blt	Out_loop
+
+	mtctr 8
+	b Loop_4x
+
+Out_loop:
+	RESTORE_REGS
+	blr
+
+Out_no_chacha:
+	li	3, 0
+	blr
+SYM_FUNC_END(chacha_p10le_8x)
+
+SYM_DATA_START_LOCAL(PERMX)
+.align 5
+permx:
+.long 0x22330011, 0x66774455, 0xaabb8899, 0xeeffccdd
+.long 0x11223300, 0x55667744, 0x99aabb88, 0xddeeffcc
+SYM_DATA_END(PERMX)
diff --git a/arch/powerpc/crypto/crc-vpmsum_test.c b/arch/powerpc/crypto/crc-vpmsum_test.c
new file mode 100644
index 0000000000..c61a874a3a
--- /dev/null
+++ b/arch/powerpc/crypto/crc-vpmsum_test.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CRC vpmsum tester
+ * Copyright 2017 Daniel Axtens, IBM Corporation.
+ */
+
+#include <linux/crc-t10dif.h>
+#include <linux/crc32.h>
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/cpufeature.h>
+#include <asm/switch_to.h>
+
+static unsigned long iterations = 10000;
+
+#define MAX_CRC_LENGTH 65535
+
+
+static int __init crc_test_init(void)
+{
+	u16 crc16 = 0, verify16 = 0;
+	__le32 verify32le = 0;
+	unsigned char *data;
+	u32 verify32 = 0;
+	unsigned long i;
+	__le32 crc32;
+	int ret;
+
+	struct crypto_shash *crct10dif_tfm;
+	struct crypto_shash *crc32c_tfm;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return -ENODEV;
+
+	data = kmalloc(MAX_CRC_LENGTH, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0);
+
+	if (IS_ERR(crct10dif_tfm)) {
+		pr_err("Error allocating crc-t10dif\n");
+		goto free_buf;
+	}
+
+	crc32c_tfm = crypto_alloc_shash("crc32c", 0, 0);
+
+	if (IS_ERR(crc32c_tfm)) {
+		pr_err("Error allocating crc32c\n");
+		goto free_16;
+	}
+
+	do {
+		SHASH_DESC_ON_STACK(crct10dif_shash, crct10dif_tfm);
+		SHASH_DESC_ON_STACK(crc32c_shash, crc32c_tfm);
+
+		crct10dif_shash->tfm = crct10dif_tfm;
+		ret = crypto_shash_init(crct10dif_shash);
+
+		if (ret) {
+			pr_err("Error initing crc-t10dif\n");
+			goto free_32;
+		}
+
+
+		crc32c_shash->tfm = crc32c_tfm;
+		ret = crypto_shash_init(crc32c_shash);
+
+		if (ret) {
+			pr_err("Error initing crc32c\n");
+			goto free_32;
+		}
+
+		pr_info("crc-vpmsum_test begins, %lu iterations\n", iterations);
+		for (i=0; i<iterations; i++) {
+			size_t offset = get_random_u32_below(16);
+			size_t len = get_random_u32_below(MAX_CRC_LENGTH);
+
+			if (len <= offset)
+				continue;
+			get_random_bytes(data, len);
+			len -= offset;
+
+			crypto_shash_update(crct10dif_shash, data+offset, len);
+			crypto_shash_final(crct10dif_shash, (u8 *)(&crc16));
+			verify16 = crc_t10dif_generic(verify16, data+offset, len);
+
+
+			if (crc16 != verify16) {
+				pr_err("FAILURE in CRC16: got 0x%04x expected 0x%04x (len %lu)\n",
+				       crc16, verify16, len);
+				break;
+			}
+
+			crypto_shash_update(crc32c_shash, data+offset, len);
+			crypto_shash_final(crc32c_shash, (u8 *)(&crc32));
+			verify32 = le32_to_cpu(verify32le);
+		        verify32le = ~cpu_to_le32(__crc32c_le(~verify32, data+offset, len));
+			if (crc32 != verify32le) {
+				pr_err("FAILURE in CRC32: got 0x%08x expected 0x%08x (len %lu)\n",
+				       crc32, verify32, len);
+				break;
+			}
+		cond_resched();
+		}
+		pr_info("crc-vpmsum_test done, completed %lu iterations\n", i);
+	} while (0);
+
+free_32:
+	crypto_free_shash(crc32c_tfm);
+
+free_16:
+	crypto_free_shash(crct10dif_tfm);
+
+free_buf:
+	kfree(data);
+
+	return 0;
+}
+
+static void __exit crc_test_exit(void) {}
+
+module_init(crc_test_init);
+module_exit(crc_test_exit);
+module_param(iterations, long, 0400);
+
+MODULE_AUTHOR("Daniel Axtens <dja@axtens.net>");
+MODULE_DESCRIPTION("Vector polynomial multiply-sum CRC tester");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/crypto/crc32-vpmsum_core.S b/arch/powerpc/crypto/crc32-vpmsum_core.S
new file mode 100644
index 0000000000..b0f87f595b
--- /dev/null
+++ b/arch/powerpc/crypto/crc32-vpmsum_core.S
@@ -0,0 +1,746 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Core of the accelerated CRC algorithm.
+ * In your file, define the constants and CRC_FUNCTION_NAME
+ * Then include this file.
+ *
+ * Calculate the checksum of data that is 16 byte aligned and a multiple of
+ * 16 bytes.
+ *
+ * The first step is to reduce it to 1024 bits. We do this in 8 parallel
+ * chunks in order to mask the latency of the vpmsum instructions. If we
+ * have more than 32 kB of data to checksum we repeat this step multiple
+ * times, passing in the previous 1024 bits.
+ *
+ * The next step is to reduce the 1024 bits to 64 bits. This step adds
+ * 32 bits of 0s to the end - this matches what a CRC does. We just
+ * calculate constants that land the data in this 32 bits.
+ *
+ * We then use fixed point Barrett reduction to compute a mod n over GF(2)
+ * for n = CRC using POWER8 instructions. We use x = 32.
+ *
+ * https://en.wikipedia.org/wiki/Barrett_reduction
+ *
+ * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
+*/
+
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+
+#define MAX_SIZE	32768
+
+	.text
+
+#if defined(__BIG_ENDIAN__) && defined(REFLECT)
+#define BYTESWAP_DATA
+#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT)
+#define BYTESWAP_DATA
+#else
+#undef BYTESWAP_DATA
+#endif
+
+#define off16		r25
+#define off32		r26
+#define off48		r27
+#define off64		r28
+#define off80		r29
+#define off96		r30
+#define off112		r31
+
+#define const1		v24
+#define const2		v25
+
+#define byteswap	v26
+#define	mask_32bit	v27
+#define	mask_64bit	v28
+#define zeroes		v29
+
+#ifdef BYTESWAP_DATA
+#define VPERM(A, B, C, D) vperm	A, B, C, D
+#else
+#define VPERM(A, B, C, D)
+#endif
+
+/* unsigned int CRC_FUNCTION_NAME(unsigned int crc, void *p, unsigned long len) */
+FUNC_START(CRC_FUNCTION_NAME)
+	std	r31,-8(r1)
+	std	r30,-16(r1)
+	std	r29,-24(r1)
+	std	r28,-32(r1)
+	std	r27,-40(r1)
+	std	r26,-48(r1)
+	std	r25,-56(r1)
+
+	li	off16,16
+	li	off32,32
+	li	off48,48
+	li	off64,64
+	li	off80,80
+	li	off96,96
+	li	off112,112
+	li	r0,0
+
+	/* Enough room for saving 10 non volatile VMX registers */
+	subi	r6,r1,56+10*16
+	subi	r7,r1,56+2*16
+
+	stvx	v20,0,r6
+	stvx	v21,off16,r6
+	stvx	v22,off32,r6
+	stvx	v23,off48,r6
+	stvx	v24,off64,r6
+	stvx	v25,off80,r6
+	stvx	v26,off96,r6
+	stvx	v27,off112,r6
+	stvx	v28,0,r7
+	stvx	v29,off16,r7
+
+	mr	r10,r3
+
+	vxor	zeroes,zeroes,zeroes
+	vspltisw v0,-1
+
+	vsldoi	mask_32bit,zeroes,v0,4
+	vsldoi	mask_64bit,zeroes,v0,8
+
+	/* Get the initial value into v8 */
+	vxor	v8,v8,v8
+	MTVRD(v8, R3)
+#ifdef REFLECT
+	vsldoi	v8,zeroes,v8,8	/* shift into bottom 32 bits */
+#else
+	vsldoi	v8,v8,zeroes,4	/* shift into top 32 bits */
+#endif
+
+#ifdef BYTESWAP_DATA
+	LOAD_REG_ADDR(r3, .byteswap_constant)
+	lvx	byteswap,0,r3
+	addi	r3,r3,16
+#endif
+
+	cmpdi	r5,256
+	blt	.Lshort
+
+	rldicr	r6,r5,0,56
+
+	/* Checksum in blocks of MAX_SIZE */
+1:	lis	r7,MAX_SIZE@h
+	ori	r7,r7,MAX_SIZE@l
+	mr	r9,r7
+	cmpd	r6,r7
+	bgt	2f
+	mr	r7,r6
+2:	subf	r6,r7,r6
+
+	/* our main loop does 128 bytes at a time */
+	srdi	r7,r7,7
+
+	/*
+	 * Work out the offset into the constants table to start at. Each
+	 * constant is 16 bytes, and it is used against 128 bytes of input
+	 * data - 128 / 16 = 8
+	 */
+	sldi	r8,r7,4
+	srdi	r9,r9,3
+	subf	r8,r8,r9
+
+	/* We reduce our final 128 bytes in a separate step */
+	addi	r7,r7,-1
+	mtctr	r7
+
+	LOAD_REG_ADDR(r3, .constants)
+
+	/* Find the start of our constants */
+	add	r3,r3,r8
+
+	/* zero v0-v7 which will contain our checksums */
+	vxor	v0,v0,v0
+	vxor	v1,v1,v1
+	vxor	v2,v2,v2
+	vxor	v3,v3,v3
+	vxor	v4,v4,v4
+	vxor	v5,v5,v5
+	vxor	v6,v6,v6
+	vxor	v7,v7,v7
+
+	lvx	const1,0,r3
+
+	/*
+	 * If we are looping back to consume more data we use the values
+	 * already in v16-v23.
+	 */
+	cmpdi	r0,1
+	beq	2f
+
+	/* First warm up pass */
+	lvx	v16,0,r4
+	lvx	v17,off16,r4
+	VPERM(v16,v16,v16,byteswap)
+	VPERM(v17,v17,v17,byteswap)
+	lvx	v18,off32,r4
+	lvx	v19,off48,r4
+	VPERM(v18,v18,v18,byteswap)
+	VPERM(v19,v19,v19,byteswap)
+	lvx	v20,off64,r4
+	lvx	v21,off80,r4
+	VPERM(v20,v20,v20,byteswap)
+	VPERM(v21,v21,v21,byteswap)
+	lvx	v22,off96,r4
+	lvx	v23,off112,r4
+	VPERM(v22,v22,v22,byteswap)
+	VPERM(v23,v23,v23,byteswap)
+	addi	r4,r4,8*16
+
+	/* xor in initial value */
+	vxor	v16,v16,v8
+
+2:	bdz	.Lfirst_warm_up_done
+
+	addi	r3,r3,16
+	lvx	const2,0,r3
+
+	/* Second warm up pass */
+	VPMSUMD(v8,v16,const1)
+	lvx	v16,0,r4
+	VPERM(v16,v16,v16,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v9,v17,const1)
+	lvx	v17,off16,r4
+	VPERM(v17,v17,v17,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v10,v18,const1)
+	lvx	v18,off32,r4
+	VPERM(v18,v18,v18,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v11,v19,const1)
+	lvx	v19,off48,r4
+	VPERM(v19,v19,v19,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v12,v20,const1)
+	lvx	v20,off64,r4
+	VPERM(v20,v20,v20,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v13,v21,const1)
+	lvx	v21,off80,r4
+	VPERM(v21,v21,v21,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v14,v22,const1)
+	lvx	v22,off96,r4
+	VPERM(v22,v22,v22,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v15,v23,const1)
+	lvx	v23,off112,r4
+	VPERM(v23,v23,v23,byteswap)
+
+	addi	r4,r4,8*16
+
+	bdz	.Lfirst_cool_down
+
+	/*
+	 * main loop. We modulo schedule it such that it takes three iterations
+	 * to complete - first iteration load, second iteration vpmsum, third
+	 * iteration xor.
+	 */
+	.balign	16
+4:	lvx	const1,0,r3
+	addi	r3,r3,16
+	ori	r2,r2,0
+
+	vxor	v0,v0,v8
+	VPMSUMD(v8,v16,const2)
+	lvx	v16,0,r4
+	VPERM(v16,v16,v16,byteswap)
+	ori	r2,r2,0
+
+	vxor	v1,v1,v9
+	VPMSUMD(v9,v17,const2)
+	lvx	v17,off16,r4
+	VPERM(v17,v17,v17,byteswap)
+	ori	r2,r2,0
+
+	vxor	v2,v2,v10
+	VPMSUMD(v10,v18,const2)
+	lvx	v18,off32,r4
+	VPERM(v18,v18,v18,byteswap)
+	ori	r2,r2,0
+
+	vxor	v3,v3,v11
+	VPMSUMD(v11,v19,const2)
+	lvx	v19,off48,r4
+	VPERM(v19,v19,v19,byteswap)
+	lvx	const2,0,r3
+	ori	r2,r2,0
+
+	vxor	v4,v4,v12
+	VPMSUMD(v12,v20,const1)
+	lvx	v20,off64,r4
+	VPERM(v20,v20,v20,byteswap)
+	ori	r2,r2,0
+
+	vxor	v5,v5,v13
+	VPMSUMD(v13,v21,const1)
+	lvx	v21,off80,r4
+	VPERM(v21,v21,v21,byteswap)
+	ori	r2,r2,0
+
+	vxor	v6,v6,v14
+	VPMSUMD(v14,v22,const1)
+	lvx	v22,off96,r4
+	VPERM(v22,v22,v22,byteswap)
+	ori	r2,r2,0
+
+	vxor	v7,v7,v15
+	VPMSUMD(v15,v23,const1)
+	lvx	v23,off112,r4
+	VPERM(v23,v23,v23,byteswap)
+
+	addi	r4,r4,8*16
+
+	bdnz	4b
+
+.Lfirst_cool_down:
+	/* First cool down pass */
+	lvx	const1,0,r3
+	addi	r3,r3,16
+
+	vxor	v0,v0,v8
+	VPMSUMD(v8,v16,const1)
+	ori	r2,r2,0
+
+	vxor	v1,v1,v9
+	VPMSUMD(v9,v17,const1)
+	ori	r2,r2,0
+
+	vxor	v2,v2,v10
+	VPMSUMD(v10,v18,const1)
+	ori	r2,r2,0
+
+	vxor	v3,v3,v11
+	VPMSUMD(v11,v19,const1)
+	ori	r2,r2,0
+
+	vxor	v4,v4,v12
+	VPMSUMD(v12,v20,const1)
+	ori	r2,r2,0
+
+	vxor	v5,v5,v13
+	VPMSUMD(v13,v21,const1)
+	ori	r2,r2,0
+
+	vxor	v6,v6,v14
+	VPMSUMD(v14,v22,const1)
+	ori	r2,r2,0
+
+	vxor	v7,v7,v15
+	VPMSUMD(v15,v23,const1)
+	ori	r2,r2,0
+
+.Lsecond_cool_down:
+	/* Second cool down pass */
+	vxor	v0,v0,v8
+	vxor	v1,v1,v9
+	vxor	v2,v2,v10
+	vxor	v3,v3,v11
+	vxor	v4,v4,v12
+	vxor	v5,v5,v13
+	vxor	v6,v6,v14
+	vxor	v7,v7,v15
+
+#ifdef REFLECT
+	/*
+	 * vpmsumd produces a 96 bit result in the least significant bits
+	 * of the register. Since we are bit reflected we have to shift it
+	 * left 32 bits so it occupies the least significant bits in the
+	 * bit reflected domain.
+	 */
+	vsldoi	v0,v0,zeroes,4
+	vsldoi	v1,v1,zeroes,4
+	vsldoi	v2,v2,zeroes,4
+	vsldoi	v3,v3,zeroes,4
+	vsldoi	v4,v4,zeroes,4
+	vsldoi	v5,v5,zeroes,4
+	vsldoi	v6,v6,zeroes,4
+	vsldoi	v7,v7,zeroes,4
+#endif
+
+	/* xor with last 1024 bits */
+	lvx	v8,0,r4
+	lvx	v9,off16,r4
+	VPERM(v8,v8,v8,byteswap)
+	VPERM(v9,v9,v9,byteswap)
+	lvx	v10,off32,r4
+	lvx	v11,off48,r4
+	VPERM(v10,v10,v10,byteswap)
+	VPERM(v11,v11,v11,byteswap)
+	lvx	v12,off64,r4
+	lvx	v13,off80,r4
+	VPERM(v12,v12,v12,byteswap)
+	VPERM(v13,v13,v13,byteswap)
+	lvx	v14,off96,r4
+	lvx	v15,off112,r4
+	VPERM(v14,v14,v14,byteswap)
+	VPERM(v15,v15,v15,byteswap)
+
+	addi	r4,r4,8*16
+
+	vxor	v16,v0,v8
+	vxor	v17,v1,v9
+	vxor	v18,v2,v10
+	vxor	v19,v3,v11
+	vxor	v20,v4,v12
+	vxor	v21,v5,v13
+	vxor	v22,v6,v14
+	vxor	v23,v7,v15
+
+	li	r0,1
+	cmpdi	r6,0
+	addi	r6,r6,128
+	bne	1b
+
+	/* Work out how many bytes we have left */
+	andi.	r5,r5,127
+
+	/* Calculate where in the constant table we need to start */
+	subfic	r6,r5,128
+	add	r3,r3,r6
+
+	/* How many 16 byte chunks are in the tail */
+	srdi	r7,r5,4
+	mtctr	r7
+
+	/*
+	 * Reduce the previously calculated 1024 bits to 64 bits, shifting
+	 * 32 bits to include the trailing 32 bits of zeros
+	 */
+	lvx	v0,0,r3
+	lvx	v1,off16,r3
+	lvx	v2,off32,r3
+	lvx	v3,off48,r3
+	lvx	v4,off64,r3
+	lvx	v5,off80,r3
+	lvx	v6,off96,r3
+	lvx	v7,off112,r3
+	addi	r3,r3,8*16
+
+	VPMSUMW(v0,v16,v0)
+	VPMSUMW(v1,v17,v1)
+	VPMSUMW(v2,v18,v2)
+	VPMSUMW(v3,v19,v3)
+	VPMSUMW(v4,v20,v4)
+	VPMSUMW(v5,v21,v5)
+	VPMSUMW(v6,v22,v6)
+	VPMSUMW(v7,v23,v7)
+
+	/* Now reduce the tail (0 - 112 bytes) */
+	cmpdi	r7,0
+	beq	1f
+
+	lvx	v16,0,r4
+	lvx	v17,0,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off16,r4
+	lvx	v17,off16,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off32,r4
+	lvx	v17,off32,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off48,r4
+	lvx	v17,off48,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off64,r4
+	lvx	v17,off64,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off80,r4
+	lvx	v17,off80,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off96,r4
+	lvx	v17,off96,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+
+	/* Now xor all the parallel chunks together */
+1:	vxor	v0,v0,v1
+	vxor	v2,v2,v3
+	vxor	v4,v4,v5
+	vxor	v6,v6,v7
+
+	vxor	v0,v0,v2
+	vxor	v4,v4,v6
+
+	vxor	v0,v0,v4
+
+.Lbarrett_reduction:
+	/* Barrett constants */
+	LOAD_REG_ADDR(r3, .barrett_constants)
+
+	lvx	const1,0,r3
+	lvx	const2,off16,r3
+
+	vsldoi	v1,v0,v0,8
+	vxor	v0,v0,v1		/* xor two 64 bit results together */
+
+#ifdef REFLECT
+	/* shift left one bit */
+	vspltisb v1,1
+	vsl	v0,v0,v1
+#endif
+
+	vand	v0,v0,mask_64bit
+#ifndef REFLECT
+	/*
+	 * Now for the Barrett reduction algorithm. The idea is to calculate q,
+	 * the multiple of our polynomial that we need to subtract. By
+	 * doing the computation 2x bits higher (ie 64 bits) and shifting the
+	 * result back down 2x bits, we round down to the nearest multiple.
+	 */
+	VPMSUMD(v1,v0,const1)	/* ma */
+	vsldoi	v1,zeroes,v1,8	/* q = floor(ma/(2^64)) */
+	VPMSUMD(v1,v1,const2)	/* qn */
+	vxor	v0,v0,v1	/* a - qn, subtraction is xor in GF(2) */
+
+	/*
+	 * Get the result into r3. We need to shift it left 8 bytes:
+	 * V0 [ 0 1 2 X ]
+	 * V0 [ 0 X 2 3 ]
+	 */
+	vsldoi	v0,v0,zeroes,8	/* shift result into top 64 bits */
+#else
+	/*
+	 * The reflected version of Barrett reduction. Instead of bit
+	 * reflecting our data (which is expensive to do), we bit reflect our
+	 * constants and our algorithm, which means the intermediate data in
+	 * our vector registers goes from 0-63 instead of 63-0. We can reflect
+	 * the algorithm because we don't carry in mod 2 arithmetic.
+	 */
+	vand	v1,v0,mask_32bit	/* bottom 32 bits of a */
+	VPMSUMD(v1,v1,const1)		/* ma */
+	vand	v1,v1,mask_32bit	/* bottom 32bits of ma */
+	VPMSUMD(v1,v1,const2)		/* qn */
+	vxor	v0,v0,v1		/* a - qn, subtraction is xor in GF(2) */
+
+	/*
+	 * Since we are bit reflected, the result (ie the low 32 bits) is in
+	 * the high 32 bits. We just need to shift it left 4 bytes
+	 * V0 [ 0 1 X 3 ]
+	 * V0 [ 0 X 2 3 ]
+	 */
+	vsldoi	v0,v0,zeroes,4		/* shift result into top 64 bits of */
+#endif
+
+	/* Get it into r3 */
+	MFVRD(R3, v0)
+
+.Lout:
+	subi	r6,r1,56+10*16
+	subi	r7,r1,56+2*16
+
+	lvx	v20,0,r6
+	lvx	v21,off16,r6
+	lvx	v22,off32,r6
+	lvx	v23,off48,r6
+	lvx	v24,off64,r6
+	lvx	v25,off80,r6
+	lvx	v26,off96,r6
+	lvx	v27,off112,r6
+	lvx	v28,0,r7
+	lvx	v29,off16,r7
+
+	ld	r31,-8(r1)
+	ld	r30,-16(r1)
+	ld	r29,-24(r1)
+	ld	r28,-32(r1)
+	ld	r27,-40(r1)
+	ld	r26,-48(r1)
+	ld	r25,-56(r1)
+
+	blr
+
+.Lfirst_warm_up_done:
+	lvx	const1,0,r3
+	addi	r3,r3,16
+
+	VPMSUMD(v8,v16,const1)
+	VPMSUMD(v9,v17,const1)
+	VPMSUMD(v10,v18,const1)
+	VPMSUMD(v11,v19,const1)
+	VPMSUMD(v12,v20,const1)
+	VPMSUMD(v13,v21,const1)
+	VPMSUMD(v14,v22,const1)
+	VPMSUMD(v15,v23,const1)
+
+	b	.Lsecond_cool_down
+
+.Lshort:
+	cmpdi	r5,0
+	beq	.Lzero
+
+	LOAD_REG_ADDR(r3, .short_constants)
+
+	/* Calculate where in the constant table we need to start */
+	subfic	r6,r5,256
+	add	r3,r3,r6
+
+	/* How many 16 byte chunks? */
+	srdi	r7,r5,4
+	mtctr	r7
+
+	vxor	v19,v19,v19
+	vxor	v20,v20,v20
+
+	lvx	v0,0,r4
+	lvx	v16,0,r3
+	VPERM(v0,v0,v16,byteswap)
+	vxor	v0,v0,v8	/* xor in initial value */
+	VPMSUMW(v0,v0,v16)
+	bdz	.Lv0
+
+	lvx	v1,off16,r4
+	lvx	v17,off16,r3
+	VPERM(v1,v1,v17,byteswap)
+	VPMSUMW(v1,v1,v17)
+	bdz	.Lv1
+
+	lvx	v2,off32,r4
+	lvx	v16,off32,r3
+	VPERM(v2,v2,v16,byteswap)
+	VPMSUMW(v2,v2,v16)
+	bdz	.Lv2
+
+	lvx	v3,off48,r4
+	lvx	v17,off48,r3
+	VPERM(v3,v3,v17,byteswap)
+	VPMSUMW(v3,v3,v17)
+	bdz	.Lv3
+
+	lvx	v4,off64,r4
+	lvx	v16,off64,r3
+	VPERM(v4,v4,v16,byteswap)
+	VPMSUMW(v4,v4,v16)
+	bdz	.Lv4
+
+	lvx	v5,off80,r4
+	lvx	v17,off80,r3
+	VPERM(v5,v5,v17,byteswap)
+	VPMSUMW(v5,v5,v17)
+	bdz	.Lv5
+
+	lvx	v6,off96,r4
+	lvx	v16,off96,r3
+	VPERM(v6,v6,v16,byteswap)
+	VPMSUMW(v6,v6,v16)
+	bdz	.Lv6
+
+	lvx	v7,off112,r4
+	lvx	v17,off112,r3
+	VPERM(v7,v7,v17,byteswap)
+	VPMSUMW(v7,v7,v17)
+	bdz	.Lv7
+
+	addi	r3,r3,128
+	addi	r4,r4,128
+
+	lvx	v8,0,r4
+	lvx	v16,0,r3
+	VPERM(v8,v8,v16,byteswap)
+	VPMSUMW(v8,v8,v16)
+	bdz	.Lv8
+
+	lvx	v9,off16,r4
+	lvx	v17,off16,r3
+	VPERM(v9,v9,v17,byteswap)
+	VPMSUMW(v9,v9,v17)
+	bdz	.Lv9
+
+	lvx	v10,off32,r4
+	lvx	v16,off32,r3
+	VPERM(v10,v10,v16,byteswap)
+	VPMSUMW(v10,v10,v16)
+	bdz	.Lv10
+
+	lvx	v11,off48,r4
+	lvx	v17,off48,r3
+	VPERM(v11,v11,v17,byteswap)
+	VPMSUMW(v11,v11,v17)
+	bdz	.Lv11
+
+	lvx	v12,off64,r4
+	lvx	v16,off64,r3
+	VPERM(v12,v12,v16,byteswap)
+	VPMSUMW(v12,v12,v16)
+	bdz	.Lv12
+
+	lvx	v13,off80,r4
+	lvx	v17,off80,r3
+	VPERM(v13,v13,v17,byteswap)
+	VPMSUMW(v13,v13,v17)
+	bdz	.Lv13
+
+	lvx	v14,off96,r4
+	lvx	v16,off96,r3
+	VPERM(v14,v14,v16,byteswap)
+	VPMSUMW(v14,v14,v16)
+	bdz	.Lv14
+
+	lvx	v15,off112,r4
+	lvx	v17,off112,r3
+	VPERM(v15,v15,v17,byteswap)
+	VPMSUMW(v15,v15,v17)
+
+.Lv15:	vxor	v19,v19,v15
+.Lv14:	vxor	v20,v20,v14
+.Lv13:	vxor	v19,v19,v13
+.Lv12:	vxor	v20,v20,v12
+.Lv11:	vxor	v19,v19,v11
+.Lv10:	vxor	v20,v20,v10
+.Lv9:	vxor	v19,v19,v9
+.Lv8:	vxor	v20,v20,v8
+.Lv7:	vxor	v19,v19,v7
+.Lv6:	vxor	v20,v20,v6
+.Lv5:	vxor	v19,v19,v5
+.Lv4:	vxor	v20,v20,v4
+.Lv3:	vxor	v19,v19,v3
+.Lv2:	vxor	v20,v20,v2
+.Lv1:	vxor	v19,v19,v1
+.Lv0:	vxor	v20,v20,v0
+
+	vxor	v0,v19,v20
+
+	b	.Lbarrett_reduction
+
+.Lzero:
+	mr	r3,r10
+	b	.Lout
+
+FUNC_END(CRC_FUNCTION_NAME)
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_asm.S b/arch/powerpc/crypto/crc32c-vpmsum_asm.S
new file mode 100644
index 0000000000..bf442004ea
--- /dev/null
+++ b/arch/powerpc/crypto/crc32c-vpmsum_asm.S
@@ -0,0 +1,842 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Calculate a crc32c with vpmsum acceleration
+ *
+ * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
+ */
+	.section	.rodata
+.balign 16
+
+.byteswap_constant:
+	/* byte reverse permute constant */
+	.octa 0x0F0E0D0C0B0A09080706050403020100
+
+.constants:
+
+	/* Reduce 262144 kbits to 1024 bits */
+	/* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
+	.octa 0x00000000b6ca9e20000000009c37c408
+
+	/* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
+	.octa 0x00000000350249a800000001b51df26c
+
+	/* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
+	.octa 0x00000001862dac54000000000724b9d0
+
+	/* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
+	.octa 0x00000001d87fb48c00000001c00532fe
+
+	/* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
+	.octa 0x00000001f39b699e00000000f05a9362
+
+	/* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
+	.octa 0x0000000101da11b400000001e1007970
+
+	/* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
+	.octa 0x00000001cab571e000000000a57366ee
+
+	/* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
+	.octa 0x00000000c7020cfe0000000192011284
+
+	/* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
+	.octa 0x00000000cdaed1ae0000000162716d9a
+
+	/* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
+	.octa 0x00000001e804effc00000000cd97ecde
+
+	/* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
+	.octa 0x0000000077c3ea3a0000000058812bc0
+
+	/* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
+	.octa 0x0000000068df31b40000000088b8c12e
+
+	/* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
+	.octa 0x00000000b059b6c200000001230b234c
+
+	/* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
+	.octa 0x0000000145fb8ed800000001120b416e
+
+	/* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
+	.octa 0x00000000cbc0916800000001974aecb0
+
+	/* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
+	.octa 0x000000005ceeedc2000000008ee3f226
+
+	/* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
+	.octa 0x0000000047d74e8600000001089aba9a
+
+	/* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
+	.octa 0x00000001407e9e220000000065113872
+
+	/* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
+	.octa 0x00000001da967bda000000005c07ec10
+
+	/* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
+	.octa 0x000000006c8983680000000187590924
+
+	/* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
+	.octa 0x00000000f2d14c9800000000e35da7c6
+
+	/* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
+	.octa 0x00000001993c6ad4000000000415855a
+
+	/* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
+	.octa 0x000000014683d1ac0000000073617758
+
+	/* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
+	.octa 0x00000001a7c93e6c0000000176021d28
+
+	/* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
+	.octa 0x000000010211e90a00000001c358fd0a
+
+	/* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
+	.octa 0x000000001119403e00000001ff7a2c18
+
+	/* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
+	.octa 0x000000001c3261aa00000000f2d9f7e4
+
+	/* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
+	.octa 0x000000014e37a634000000016cf1f9c8
+
+	/* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
+	.octa 0x0000000073786c0c000000010af9279a
+
+	/* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
+	.octa 0x000000011dc037f80000000004f101e8
+
+	/* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
+	.octa 0x0000000031433dfc0000000070bcf184
+
+	/* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
+	.octa 0x000000009cde8348000000000a8de642
+
+	/* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
+	.octa 0x0000000038d3c2a60000000062ea130c
+
+	/* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
+	.octa 0x000000011b25f26000000001eb31cbb2
+
+	/* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
+	.octa 0x000000001629e6f00000000170783448
+
+	/* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
+	.octa 0x0000000160838b4c00000001a684b4c6
+
+	/* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
+	.octa 0x000000007a44011c00000000253ca5b4
+
+	/* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
+	.octa 0x00000000226f417a0000000057b4b1e2
+
+	/* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
+	.octa 0x0000000045eb2eb400000000b6bd084c
+
+	/* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
+	.octa 0x000000014459d70c0000000123c2d592
+
+	/* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
+	.octa 0x00000001d406ed8200000000159dafce
+
+	/* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
+	.octa 0x0000000160c8e1a80000000127e1a64e
+
+	/* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
+	.octa 0x0000000027ba80980000000056860754
+
+	/* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
+	.octa 0x000000006d92d01800000001e661aae8
+
+	/* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
+	.octa 0x000000012ed7e3f200000000f82c6166
+
+	/* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
+	.octa 0x000000002dc8778800000000c4f9c7ae
+
+	/* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
+	.octa 0x0000000018240bb80000000074203d20
+
+	/* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
+	.octa 0x000000001ad381580000000198173052
+
+	/* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
+	.octa 0x00000001396b78f200000001ce8aba54
+
+	/* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
+	.octa 0x000000011a68133400000001850d5d94
+
+	/* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
+	.octa 0x000000012104732e00000001d609239c
+
+	/* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
+	.octa 0x00000000a140d90c000000001595f048
+
+	/* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
+	.octa 0x00000001b7215eda0000000042ccee08
+
+	/* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
+	.octa 0x00000001aaf1df3c000000010a389d74
+
+	/* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
+	.octa 0x0000000029d15b8a000000012a840da6
+
+	/* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
+	.octa 0x00000000f1a96922000000001d181c0c
+
+	/* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
+	.octa 0x00000001ac80d03c0000000068b7d1f6
+
+	/* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
+	.octa 0x000000000f11d56a000000005b0f14fc
+
+	/* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
+	.octa 0x00000001f1c022a20000000179e9e730
+
+	/* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
+	.octa 0x0000000173d00ae200000001ce1368d6
+
+	/* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
+	.octa 0x00000001d4ffe4ac0000000112c3a84c
+
+	/* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
+	.octa 0x000000016edc5ae400000000de940fee
+
+	/* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
+	.octa 0x00000001f1a0214000000000fe896b7e
+
+	/* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
+	.octa 0x00000000ca0b28a000000001f797431c
+
+	/* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
+	.octa 0x00000001928e30a20000000053e989ba
+
+	/* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
+	.octa 0x0000000097b1b002000000003920cd16
+
+	/* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
+	.octa 0x00000000b15bf90600000001e6f579b8
+
+	/* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
+	.octa 0x00000000411c5d52000000007493cb0a
+
+	/* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
+	.octa 0x00000001c36f330000000001bdd376d8
+
+	/* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
+	.octa 0x00000001119227e0000000016badfee6
+
+	/* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
+	.octa 0x00000000114d47020000000071de5c58
+
+	/* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
+	.octa 0x00000000458b5b9800000000453f317c
+
+	/* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
+	.octa 0x000000012e31fb8e0000000121675cce
+
+	/* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
+	.octa 0x000000005cf619d800000001f409ee92
+
+	/* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
+	.octa 0x0000000063f4d8b200000000f36b9c88
+
+	/* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
+	.octa 0x000000004138dc8a0000000036b398f4
+
+	/* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
+	.octa 0x00000001d29ee8e000000001748f9adc
+
+	/* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
+	.octa 0x000000006a08ace800000001be94ec00
+
+	/* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
+	.octa 0x0000000127d4201000000000b74370d6
+
+	/* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
+	.octa 0x0000000019d76b6200000001174d0b98
+
+	/* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
+	.octa 0x00000001b1471f6e00000000befc06a4
+
+	/* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
+	.octa 0x00000001f64c19cc00000001ae125288
+
+	/* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
+	.octa 0x00000000003c0ea00000000095c19b34
+
+	/* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
+	.octa 0x000000014d73abf600000001a78496f2
+
+	/* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
+	.octa 0x00000001620eb84400000001ac5390a0
+
+	/* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
+	.octa 0x0000000147655048000000002a80ed6e
+
+	/* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
+	.octa 0x0000000067b5077e00000001fa9b0128
+
+	/* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
+	.octa 0x0000000010ffe20600000001ea94929e
+
+	/* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
+	.octa 0x000000000fee8f1e0000000125f4305c
+
+	/* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
+	.octa 0x00000001da26fbae00000001471e2002
+
+	/* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
+	.octa 0x00000001b3a8bd880000000132d2253a
+
+	/* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
+	.octa 0x00000000e8f3898e00000000f26b3592
+
+	/* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
+	.octa 0x00000000b0d0d28c00000000bc8b67b0
+
+	/* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
+	.octa 0x0000000030f2a798000000013a826ef2
+
+	/* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
+	.octa 0x000000000fba10020000000081482c84
+
+	/* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
+	.octa 0x00000000bdb9bd7200000000e77307c2
+
+	/* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
+	.octa 0x0000000075d3bf5a00000000d4a07ec8
+
+	/* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
+	.octa 0x00000000ef1f98a00000000017102100
+
+	/* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
+	.octa 0x00000000689c760200000000db406486
+
+	/* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
+	.octa 0x000000016d5fa5fe0000000192db7f88
+
+	/* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
+	.octa 0x00000001d0d2b9ca000000018bf67b1e
+
+	/* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
+	.octa 0x0000000041e7b470000000007c09163e
+
+	/* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
+	.octa 0x00000001cbb6495e000000000adac060
+
+	/* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
+	.octa 0x000000010052a0b000000000bd8316ae
+
+	/* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
+	.octa 0x00000001d8effb5c000000019f09ab54
+
+	/* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
+	.octa 0x00000001d969853c0000000125155542
+
+	/* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
+	.octa 0x00000000523ccce2000000018fdb5882
+
+	/* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
+	.octa 0x000000001e2436bc00000000e794b3f4
+
+	/* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
+	.octa 0x00000000ddd1c3a2000000016f9bb022
+
+	/* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
+	.octa 0x0000000019fcfe3800000000290c9978
+
+	/* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
+	.octa 0x00000001ce95db640000000083c0f350
+
+	/* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
+	.octa 0x00000000af5828060000000173ea6628
+
+	/* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
+	.octa 0x00000001006388f600000001c8b4e00a
+
+	/* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
+	.octa 0x0000000179eca00a00000000de95d6aa
+
+	/* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
+	.octa 0x0000000122410a6a000000010b7f7248
+
+	/* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
+	.octa 0x000000004288e87c00000001326e3a06
+
+	/* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
+	.octa 0x000000016c5490da00000000bb62c2e6
+
+	/* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
+	.octa 0x00000000d1c71f6e0000000156a4b2c2
+
+	/* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
+	.octa 0x00000001b4ce08a6000000011dfe763a
+
+	/* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
+	.octa 0x00000001466ba60c000000007bcca8e2
+
+	/* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
+	.octa 0x00000001f6c488a40000000186118faa
+
+	/* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
+	.octa 0x000000013bfb06820000000111a65a88
+
+	/* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
+	.octa 0x00000000690e9e54000000003565e1c4
+
+	/* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
+	.octa 0x00000000281346b6000000012ed02a82
+
+	/* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
+	.octa 0x000000015646402400000000c486ecfc
+
+	/* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
+	.octa 0x000000016063a8dc0000000001b951b2
+
+	/* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
+	.octa 0x0000000116a663620000000048143916
+
+	/* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
+	.octa 0x000000017e8aa4d200000001dc2ae124
+
+	/* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
+	.octa 0x00000001728eb10c00000001416c58d6
+
+	/* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
+	.octa 0x00000001b08fd7fa00000000a479744a
+
+	/* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
+	.octa 0x00000001092a16e80000000096ca3a26
+
+	/* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
+	.octa 0x00000000a505637c00000000ff223d4e
+
+	/* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
+	.octa 0x00000000d94869b2000000010e84da42
+
+	/* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
+	.octa 0x00000001c8b203ae00000001b61ba3d0
+
+	/* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
+	.octa 0x000000005704aea000000000680f2de8
+
+	/* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
+	.octa 0x000000012e295fa2000000008772a9a8
+
+	/* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
+	.octa 0x000000011d0908bc0000000155f295bc
+
+	/* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
+	.octa 0x0000000193ed97ea00000000595f9282
+
+	/* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
+	.octa 0x000000013a0f1c520000000164b1c25a
+
+	/* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
+	.octa 0x000000010c2c40c000000000fbd67c50
+
+	/* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
+	.octa 0x00000000ff6fac3e0000000096076268
+
+	/* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
+	.octa 0x000000017b3609c000000001d288e4cc
+
+	/* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
+	.octa 0x0000000088c8c92200000001eaac1bdc
+
+	/* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
+	.octa 0x00000001751baae600000001f1ea39e2
+
+	/* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
+	.octa 0x000000010795297200000001eb6506fc
+
+	/* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
+	.octa 0x0000000162b00abe000000010f806ffe
+
+	/* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
+	.octa 0x000000000d7b404c000000010408481e
+
+	/* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
+	.octa 0x00000000763b13d40000000188260534
+
+	/* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
+	.octa 0x00000000f6dc22d80000000058fc73e0
+
+	/* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
+	.octa 0x000000007daae06000000000391c59b8
+
+	/* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
+	.octa 0x000000013359ab7c000000018b638400
+
+	/* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
+	.octa 0x000000008add438a000000011738f5c4
+
+	/* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
+	.octa 0x00000001edbefdea000000008cf7c6da
+
+	/* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
+	.octa 0x000000004104e0f800000001ef97fb16
+
+	/* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
+	.octa 0x00000000b48a82220000000102130e20
+
+	/* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
+	.octa 0x00000001bcb4684400000000db968898
+
+	/* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
+	.octa 0x000000013293ce0a00000000b5047b5e
+
+	/* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
+	.octa 0x00000001710d0844000000010b90fdb2
+
+	/* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
+	.octa 0x0000000117907f6e000000004834a32e
+
+	/* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
+	.octa 0x0000000087ddf93e0000000059c8f2b0
+
+	/* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
+	.octa 0x000000005970e9b00000000122cec508
+
+	/* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
+	.octa 0x0000000185b2b7d0000000000a330cda
+
+	/* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
+	.octa 0x00000001dcee0efc000000014a47148c
+
+	/* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
+	.octa 0x0000000030da27220000000042c61cb8
+
+	/* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
+	.octa 0x000000012f925a180000000012fe6960
+
+	/* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
+	.octa 0x00000000dd2e357c00000000dbda2c20
+
+	/* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
+	.octa 0x00000000071c80de000000011122410c
+
+	/* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
+	.octa 0x000000011513140a00000000977b2070
+
+	/* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
+	.octa 0x00000001df876e8e000000014050438e
+
+	/* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
+	.octa 0x000000015f81d6ce0000000147c840e8
+
+	/* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
+	.octa 0x000000019dd94dbe00000001cc7c88ce
+
+	/* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
+	.octa 0x00000001373d206e00000001476b35a4
+
+	/* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
+	.octa 0x00000000668ccade000000013d52d508
+
+	/* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
+	.octa 0x00000001b192d268000000008e4be32e
+
+	/* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
+	.octa 0x00000000e30f3a7800000000024120fe
+
+	/* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
+	.octa 0x000000010ef1f7bc00000000ddecddb4
+
+	/* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
+	.octa 0x00000001f5ac738000000000d4d403bc
+
+	/* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
+	.octa 0x000000011822ea7000000001734b89aa
+
+	/* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
+	.octa 0x00000000c3a33848000000010e7a58d6
+
+	/* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
+	.octa 0x00000001bd151c2400000001f9f04e9c
+
+	/* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
+	.octa 0x0000000056002d7600000000b692225e
+
+	/* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
+	.octa 0x000000014657c4f4000000019b8d3f3e
+
+	/* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
+	.octa 0x0000000113742d7c00000001a874f11e
+
+	/* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
+	.octa 0x000000019c5920ba000000010d5a4254
+
+	/* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
+	.octa 0x000000005216d2d600000000bbb2f5d6
+
+	/* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
+	.octa 0x0000000136f5ad8a0000000179cc0e36
+
+	/* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
+	.octa 0x000000018b07beb600000001dca1da4a
+
+	/* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
+	.octa 0x00000000db1e93b000000000feb1a192
+
+	/* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
+	.octa 0x000000000b96fa3a00000000d1eeedd6
+
+	/* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
+	.octa 0x00000001d9968af0000000008fad9bb4
+
+	/* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
+	.octa 0x000000000e4a77a200000001884938e4
+
+	/* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
+	.octa 0x00000000508c2ac800000001bc2e9bc0
+
+	/* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
+	.octa 0x0000000021572a8000000001f9658a68
+
+	/* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
+	.octa 0x00000001b859daf2000000001b9224fc
+
+	/* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
+	.octa 0x000000016f7884740000000055b2fb84
+
+	/* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
+	.octa 0x00000001b438810e000000018b090348
+
+	/* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
+	.octa 0x0000000095ddc6f2000000011ccbd5ea
+
+	/* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
+	.octa 0x00000001d977c20c0000000007ae47f8
+
+	/* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
+	.octa 0x00000000ebedb99a0000000172acbec0
+
+	/* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
+	.octa 0x00000001df9e9e9200000001c6e3ff20
+
+	/* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
+	.octa 0x00000001a4a3f95200000000e1b38744
+
+	/* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
+	.octa 0x00000000e2f5122000000000791585b2
+
+	/* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
+	.octa 0x000000004aa01f3e00000000ac53b894
+
+	/* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
+	.octa 0x00000000b3e90a5800000001ed5f2cf4
+
+	/* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
+	.octa 0x000000000c9ca2aa00000001df48b2e0
+
+	/* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
+	.octa 0x000000015168231600000000049c1c62
+
+	/* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
+	.octa 0x0000000036fce78c000000017c460c12
+
+	/* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
+	.octa 0x000000009037dc10000000015be4da7e
+
+	/* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
+	.octa 0x00000000d3298582000000010f38f668
+
+	/* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
+	.octa 0x00000001b42e8ad60000000039f40a00
+
+	/* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
+	.octa 0x00000000142a983800000000bd4c10c4
+
+	/* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
+	.octa 0x0000000109c7f1900000000042db1d98
+
+	/* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
+	.octa 0x0000000056ff931000000001c905bae6
+
+	/* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
+	.octa 0x00000001594513aa00000000069d40ea
+
+	/* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
+	.octa 0x00000001e3b5b1e8000000008e4fbad0
+
+	/* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
+	.octa 0x000000011dd5fc080000000047bedd46
+
+	/* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
+	.octa 0x00000001675f0cc20000000026396bf8
+
+	/* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
+	.octa 0x00000000d1c8dd4400000000379beb92
+
+	/* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
+	.octa 0x0000000115ebd3d8000000000abae54a
+
+	/* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
+	.octa 0x00000001ecbd0dac0000000007e6a128
+
+	/* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
+	.octa 0x00000000cdf67af2000000000ade29d2
+
+	/* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
+	.octa 0x000000004c01ff4c00000000f974c45c
+
+	/* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
+	.octa 0x00000000f2d8657e00000000e77ac60a
+
+	/* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
+	.octa 0x000000006bae74c40000000145895816
+
+	/* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
+	.octa 0x0000000152af8aa00000000038e362be
+
+	/* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
+	.octa 0x0000000004663802000000007f991a64
+
+	/* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
+	.octa 0x00000001ab2f5afc00000000fa366d3a
+
+	/* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
+	.octa 0x0000000074a4ebd400000001a2bb34f0
+
+	/* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
+	.octa 0x00000001d7ab3a4c0000000028a9981e
+
+	/* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
+	.octa 0x00000001a8da60c600000001dbc672be
+
+	/* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
+	.octa 0x000000013cf6382000000000b04d77f6
+
+	/* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
+	.octa 0x00000000bec12e1e0000000124400d96
+
+	/* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
+	.octa 0x00000001c6368010000000014ca4b414
+
+	/* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
+	.octa 0x00000001e6e78758000000012fe2c938
+
+	/* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
+	.octa 0x000000008d7f2b3c00000001faed01e6
+
+	/* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
+	.octa 0x000000016b4a156e000000007e80ecfe
+
+	/* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
+	.octa 0x00000001c63cfeb60000000098daee94
+
+	/* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
+	.octa 0x000000015f902670000000010a04edea
+
+	/* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
+	.octa 0x00000001cd5de11e00000001c00b4524
+
+	/* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
+	.octa 0x000000001acaec540000000170296550
+
+	/* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
+	.octa 0x000000002bd0ca780000000181afaa48
+
+	/* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
+	.octa 0x0000000032d63d5c0000000185a31ffa
+
+	/* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
+	.octa 0x000000001c6d4e4c000000002469f608
+
+	/* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
+	.octa 0x0000000106a60b92000000006980102a
+
+	/* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
+	.octa 0x00000000d3855e120000000111ea9ca8
+
+	/* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
+	.octa 0x00000000e312563600000001bd1d29ce
+
+	/* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
+	.octa 0x000000009e8f7ea400000001b34b9580
+
+	/* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
+	.octa 0x00000001c82e562c000000003076054e
+
+	/* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
+	.octa 0x00000000ca9f09ce000000012a608ea4
+
+	/* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
+	.octa 0x00000000c63764e600000000784d05fe
+
+	/* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
+	.octa 0x0000000168d2e49e000000016ef0d82a
+
+	/* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
+	.octa 0x00000000e986c1480000000075bda454
+
+	/* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
+	.octa 0x00000000cfb65894000000003dc0a1c4
+
+	/* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
+	.octa 0x0000000111cadee400000000e9a5d8be
+
+	/* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
+	.octa 0x0000000171fb63ce00000001609bc4b4
+
+.short_constants:
+
+	/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */
+	/* x^1952 mod p(x)`, x^1984 mod p(x)`, x^2016 mod p(x)`, x^2048 mod p(x)` */
+	.octa 0x7fec2963e5bf80485cf015c388e56f72
+
+	/* x^1824 mod p(x)`, x^1856 mod p(x)`, x^1888 mod p(x)`, x^1920 mod p(x)` */
+	.octa 0x38e888d4844752a9963a18920246e2e6
+
+	/* x^1696 mod p(x)`, x^1728 mod p(x)`, x^1760 mod p(x)`, x^1792 mod p(x)` */
+	.octa 0x42316c00730206ad419a441956993a31
+
+	/* x^1568 mod p(x)`, x^1600 mod p(x)`, x^1632 mod p(x)`, x^1664 mod p(x)` */
+	.octa 0x543d5c543e65ddf9924752ba2b830011
+
+	/* x^1440 mod p(x)`, x^1472 mod p(x)`, x^1504 mod p(x)`, x^1536 mod p(x)` */
+	.octa 0x78e87aaf56767c9255bd7f9518e4a304
+
+	/* x^1312 mod p(x)`, x^1344 mod p(x)`, x^1376 mod p(x)`, x^1408 mod p(x)` */
+	.octa 0x8f68fcec1903da7f6d76739fe0553f1e
+
+	/* x^1184 mod p(x)`, x^1216 mod p(x)`, x^1248 mod p(x)`, x^1280 mod p(x)` */
+	.octa 0x3f4840246791d588c133722b1fe0b5c3
+
+	/* x^1056 mod p(x)`, x^1088 mod p(x)`, x^1120 mod p(x)`, x^1152 mod p(x)` */
+	.octa 0x34c96751b04de25a64b67ee0e55ef1f3
+
+	/* x^928 mod p(x)`, x^960 mod p(x)`, x^992 mod p(x)`, x^1024 mod p(x)` */
+	.octa 0x156c8e180b4a395b069db049b8fdb1e7
+
+	/* x^800 mod p(x)`, x^832 mod p(x)`, x^864 mod p(x)`, x^896 mod p(x)` */
+	.octa 0xe0b99ccbe661f7bea11bfaf3c9e90b9e
+
+	/* x^672 mod p(x)`, x^704 mod p(x)`, x^736 mod p(x)`, x^768 mod p(x)` */
+	.octa 0x041d37768cd75659817cdc5119b29a35
+
+	/* x^544 mod p(x)`, x^576 mod p(x)`, x^608 mod p(x)`, x^640 mod p(x)` */
+	.octa 0x3a0777818cfaa9651ce9d94b36c41f1c
+
+	/* x^416 mod p(x)`, x^448 mod p(x)`, x^480 mod p(x)`, x^512 mod p(x)` */
+	.octa 0x0e148e8252377a554f256efcb82be955
+
+	/* x^288 mod p(x)`, x^320 mod p(x)`, x^352 mod p(x)`, x^384 mod p(x)` */
+	.octa 0x9c25531d19e65ddeec1631edb2dea967
+
+	/* x^160 mod p(x)`, x^192 mod p(x)`, x^224 mod p(x)`, x^256 mod p(x)` */
+	.octa 0x790606ff9957c0a65d27e147510ac59a
+
+	/* x^32 mod p(x)`, x^64 mod p(x)`, x^96 mod p(x)`, x^128 mod p(x)` */
+	.octa 0x82f63b786ea2d55ca66805eb18b8ea18
+
+
+.barrett_constants:
+	/* 33 bit reflected Barrett constant m - (4^32)/n */
+	.octa 0x000000000000000000000000dea713f1	/* x^64 div p(x)` */
+	/* 33 bit reflected Barrett constant n */
+	.octa 0x00000000000000000000000105ec76f1
+
+#define CRC_FUNCTION_NAME __crc32c_vpmsum
+#define REFLECT
+#include "crc32-vpmsum_core.S"
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
new file mode 100644
index 0000000000..63760b7dbb
--- /dev/null
+++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/crc32.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/cpufeature.h>
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+
+#define CHKSUM_BLOCK_SIZE	1
+#define CHKSUM_DIGEST_SIZE	4
+
+#define VMX_ALIGN		16
+#define VMX_ALIGN_MASK		(VMX_ALIGN-1)
+
+#define VECTOR_BREAKPOINT	512
+
+u32 __crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len);
+
+static u32 crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len)
+{
+	unsigned int prealign;
+	unsigned int tail;
+
+	if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || !crypto_simd_usable())
+		return __crc32c_le(crc, p, len);
+
+	if ((unsigned long)p & VMX_ALIGN_MASK) {
+		prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
+		crc = __crc32c_le(crc, p, prealign);
+		len -= prealign;
+		p += prealign;
+	}
+
+	if (len & ~VMX_ALIGN_MASK) {
+		preempt_disable();
+		pagefault_disable();
+		enable_kernel_altivec();
+		crc = __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
+		disable_kernel_altivec();
+		pagefault_enable();
+		preempt_enable();
+	}
+
+	tail = len & VMX_ALIGN_MASK;
+	if (tail) {
+		p += len & ~VMX_ALIGN_MASK;
+		crc = __crc32c_le(crc, p, tail);
+	}
+
+	return crc;
+}
+
+static int crc32c_vpmsum_cra_init(struct crypto_tfm *tfm)
+{
+	u32 *key = crypto_tfm_ctx(tfm);
+
+	*key = ~0;
+
+	return 0;
+}
+
+/*
+ * Setting the seed allows arbitrary accumulators and flexible XOR policy
+ * If your algorithm starts with ~0, then XOR with ~0 before you set
+ * the seed.
+ */
+static int crc32c_vpmsum_setkey(struct crypto_shash *hash, const u8 *key,
+			       unsigned int keylen)
+{
+	u32 *mctx = crypto_shash_ctx(hash);
+
+	if (keylen != sizeof(u32))
+		return -EINVAL;
+	*mctx = le32_to_cpup((__le32 *)key);
+	return 0;
+}
+
+static int crc32c_vpmsum_init(struct shash_desc *desc)
+{
+	u32 *mctx = crypto_shash_ctx(desc->tfm);
+	u32 *crcp = shash_desc_ctx(desc);
+
+	*crcp = *mctx;
+
+	return 0;
+}
+
+static int crc32c_vpmsum_update(struct shash_desc *desc, const u8 *data,
+			       unsigned int len)
+{
+	u32 *crcp = shash_desc_ctx(desc);
+
+	*crcp = crc32c_vpmsum(*crcp, data, len);
+
+	return 0;
+}
+
+static int __crc32c_vpmsum_finup(u32 *crcp, const u8 *data, unsigned int len,
+				u8 *out)
+{
+	*(__le32 *)out = ~cpu_to_le32(crc32c_vpmsum(*crcp, data, len));
+
+	return 0;
+}
+
+static int crc32c_vpmsum_finup(struct shash_desc *desc, const u8 *data,
+			      unsigned int len, u8 *out)
+{
+	return __crc32c_vpmsum_finup(shash_desc_ctx(desc), data, len, out);
+}
+
+static int crc32c_vpmsum_final(struct shash_desc *desc, u8 *out)
+{
+	u32 *crcp = shash_desc_ctx(desc);
+
+	*(__le32 *)out = ~cpu_to_le32p(crcp);
+
+	return 0;
+}
+
+static int crc32c_vpmsum_digest(struct shash_desc *desc, const u8 *data,
+			       unsigned int len, u8 *out)
+{
+	return __crc32c_vpmsum_finup(crypto_shash_ctx(desc->tfm), data, len,
+				     out);
+}
+
+static struct shash_alg alg = {
+	.setkey		= crc32c_vpmsum_setkey,
+	.init		= crc32c_vpmsum_init,
+	.update		= crc32c_vpmsum_update,
+	.final		= crc32c_vpmsum_final,
+	.finup		= crc32c_vpmsum_finup,
+	.digest		= crc32c_vpmsum_digest,
+	.descsize	= sizeof(u32),
+	.digestsize	= CHKSUM_DIGEST_SIZE,
+	.base		= {
+		.cra_name		= "crc32c",
+		.cra_driver_name	= "crc32c-vpmsum",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
+		.cra_blocksize		= CHKSUM_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(u32),
+		.cra_module		= THIS_MODULE,
+		.cra_init		= crc32c_vpmsum_cra_init,
+	}
+};
+
+static int __init crc32c_vpmsum_mod_init(void)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return -ENODEV;
+
+	return crypto_register_shash(&alg);
+}
+
+static void __exit crc32c_vpmsum_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crc32c_vpmsum_mod_init);
+module_exit(crc32c_vpmsum_mod_fini);
+
+MODULE_AUTHOR("Anton Blanchard <anton@samba.org>");
+MODULE_DESCRIPTION("CRC32C using vector polynomial multiply-sum instructions");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("crc32c");
+MODULE_ALIAS_CRYPTO("crc32c-vpmsum");
diff --git a/arch/powerpc/crypto/crct10dif-vpmsum_asm.S b/arch/powerpc/crypto/crct10dif-vpmsum_asm.S
new file mode 100644
index 0000000000..f0b93a0fe1
--- /dev/null
+++ b/arch/powerpc/crypto/crct10dif-vpmsum_asm.S
@@ -0,0 +1,845 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Calculate a CRC T10DIF  with vpmsum acceleration
+ *
+ * Constants generated by crc32-vpmsum, available at
+ * https://github.com/antonblanchard/crc32-vpmsum
+ *
+ * crc32-vpmsum is
+ * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
+ */
+	.section	.rodata
+.balign 16
+
+.byteswap_constant:
+	/* byte reverse permute constant */
+	.octa 0x0F0E0D0C0B0A09080706050403020100
+
+.constants:
+
+	/* Reduce 262144 kbits to 1024 bits */
+	/* x^261184 mod p(x), x^261120 mod p(x) */
+	.octa 0x0000000056d300000000000052550000
+
+	/* x^260160 mod p(x), x^260096 mod p(x) */
+	.octa 0x00000000ee67000000000000a1e40000
+
+	/* x^259136 mod p(x), x^259072 mod p(x) */
+	.octa 0x0000000060830000000000004ad10000
+
+	/* x^258112 mod p(x), x^258048 mod p(x) */
+	.octa 0x000000008cfe0000000000009ab40000
+
+	/* x^257088 mod p(x), x^257024 mod p(x) */
+	.octa 0x000000003e93000000000000fdb50000
+
+	/* x^256064 mod p(x), x^256000 mod p(x) */
+	.octa 0x000000003c2000000000000045480000
+
+	/* x^255040 mod p(x), x^254976 mod p(x) */
+	.octa 0x00000000b1fc0000000000008d690000
+
+	/* x^254016 mod p(x), x^253952 mod p(x) */
+	.octa 0x00000000f82b00000000000024ad0000
+
+	/* x^252992 mod p(x), x^252928 mod p(x) */
+	.octa 0x0000000044420000000000009f1a0000
+
+	/* x^251968 mod p(x), x^251904 mod p(x) */
+	.octa 0x00000000e88c00000000000066ec0000
+
+	/* x^250944 mod p(x), x^250880 mod p(x) */
+	.octa 0x00000000385c000000000000c87d0000
+
+	/* x^249920 mod p(x), x^249856 mod p(x) */
+	.octa 0x000000003227000000000000c8ff0000
+
+	/* x^248896 mod p(x), x^248832 mod p(x) */
+	.octa 0x00000000a9a900000000000033440000
+
+	/* x^247872 mod p(x), x^247808 mod p(x) */
+	.octa 0x00000000abaa00000000000066eb0000
+
+	/* x^246848 mod p(x), x^246784 mod p(x) */
+	.octa 0x000000001ac3000000000000c4ef0000
+
+	/* x^245824 mod p(x), x^245760 mod p(x) */
+	.octa 0x0000000063f000000000000056f30000
+
+	/* x^244800 mod p(x), x^244736 mod p(x) */
+	.octa 0x0000000032cc00000000000002050000
+
+	/* x^243776 mod p(x), x^243712 mod p(x) */
+	.octa 0x00000000f8b5000000000000568e0000
+
+	/* x^242752 mod p(x), x^242688 mod p(x) */
+	.octa 0x000000008db100000000000064290000
+
+	/* x^241728 mod p(x), x^241664 mod p(x) */
+	.octa 0x0000000059ca0000000000006b660000
+
+	/* x^240704 mod p(x), x^240640 mod p(x) */
+	.octa 0x000000005f5c00000000000018f80000
+
+	/* x^239680 mod p(x), x^239616 mod p(x) */
+	.octa 0x0000000061af000000000000b6090000
+
+	/* x^238656 mod p(x), x^238592 mod p(x) */
+	.octa 0x00000000e29e000000000000099a0000
+
+	/* x^237632 mod p(x), x^237568 mod p(x) */
+	.octa 0x000000000975000000000000a8360000
+
+	/* x^236608 mod p(x), x^236544 mod p(x) */
+	.octa 0x0000000043900000000000004f570000
+
+	/* x^235584 mod p(x), x^235520 mod p(x) */
+	.octa 0x00000000f9cd000000000000134c0000
+
+	/* x^234560 mod p(x), x^234496 mod p(x) */
+	.octa 0x000000007c29000000000000ec380000
+
+	/* x^233536 mod p(x), x^233472 mod p(x) */
+	.octa 0x000000004c6a000000000000b0d10000
+
+	/* x^232512 mod p(x), x^232448 mod p(x) */
+	.octa 0x00000000e7290000000000007d3e0000
+
+	/* x^231488 mod p(x), x^231424 mod p(x) */
+	.octa 0x00000000f1ab000000000000f0b20000
+
+	/* x^230464 mod p(x), x^230400 mod p(x) */
+	.octa 0x0000000039db0000000000009c270000
+
+	/* x^229440 mod p(x), x^229376 mod p(x) */
+	.octa 0x000000005e2800000000000092890000
+
+	/* x^228416 mod p(x), x^228352 mod p(x) */
+	.octa 0x00000000d44e000000000000d5ee0000
+
+	/* x^227392 mod p(x), x^227328 mod p(x) */
+	.octa 0x00000000cd0a00000000000041f50000
+
+	/* x^226368 mod p(x), x^226304 mod p(x) */
+	.octa 0x00000000c5b400000000000010520000
+
+	/* x^225344 mod p(x), x^225280 mod p(x) */
+	.octa 0x00000000fd2100000000000042170000
+
+	/* x^224320 mod p(x), x^224256 mod p(x) */
+	.octa 0x000000002f2500000000000095c20000
+
+	/* x^223296 mod p(x), x^223232 mod p(x) */
+	.octa 0x000000001b0100000000000001ce0000
+
+	/* x^222272 mod p(x), x^222208 mod p(x) */
+	.octa 0x000000000d430000000000002aca0000
+
+	/* x^221248 mod p(x), x^221184 mod p(x) */
+	.octa 0x0000000030a6000000000000385e0000
+
+	/* x^220224 mod p(x), x^220160 mod p(x) */
+	.octa 0x00000000e37b0000000000006f7a0000
+
+	/* x^219200 mod p(x), x^219136 mod p(x) */
+	.octa 0x00000000873600000000000024320000
+
+	/* x^218176 mod p(x), x^218112 mod p(x) */
+	.octa 0x00000000e9fb000000000000bd9c0000
+
+	/* x^217152 mod p(x), x^217088 mod p(x) */
+	.octa 0x000000003b9500000000000054bc0000
+
+	/* x^216128 mod p(x), x^216064 mod p(x) */
+	.octa 0x00000000133e000000000000a4660000
+
+	/* x^215104 mod p(x), x^215040 mod p(x) */
+	.octa 0x00000000784500000000000079930000
+
+	/* x^214080 mod p(x), x^214016 mod p(x) */
+	.octa 0x00000000b9800000000000001bb80000
+
+	/* x^213056 mod p(x), x^212992 mod p(x) */
+	.octa 0x00000000687600000000000024400000
+
+	/* x^212032 mod p(x), x^211968 mod p(x) */
+	.octa 0x00000000aff300000000000029e10000
+
+	/* x^211008 mod p(x), x^210944 mod p(x) */
+	.octa 0x0000000024b50000000000005ded0000
+
+	/* x^209984 mod p(x), x^209920 mod p(x) */
+	.octa 0x0000000017e8000000000000b12e0000
+
+	/* x^208960 mod p(x), x^208896 mod p(x) */
+	.octa 0x00000000128400000000000026d20000
+
+	/* x^207936 mod p(x), x^207872 mod p(x) */
+	.octa 0x000000002115000000000000a32a0000
+
+	/* x^206912 mod p(x), x^206848 mod p(x) */
+	.octa 0x000000009595000000000000a1210000
+
+	/* x^205888 mod p(x), x^205824 mod p(x) */
+	.octa 0x00000000281e000000000000ee8b0000
+
+	/* x^204864 mod p(x), x^204800 mod p(x) */
+	.octa 0x0000000006010000000000003d0d0000
+
+	/* x^203840 mod p(x), x^203776 mod p(x) */
+	.octa 0x00000000e2b600000000000034e90000
+
+	/* x^202816 mod p(x), x^202752 mod p(x) */
+	.octa 0x000000001bd40000000000004cdb0000
+
+	/* x^201792 mod p(x), x^201728 mod p(x) */
+	.octa 0x00000000df2800000000000030e90000
+
+	/* x^200768 mod p(x), x^200704 mod p(x) */
+	.octa 0x0000000049c200000000000042590000
+
+	/* x^199744 mod p(x), x^199680 mod p(x) */
+	.octa 0x000000009b97000000000000df950000
+
+	/* x^198720 mod p(x), x^198656 mod p(x) */
+	.octa 0x000000006184000000000000da7b0000
+
+	/* x^197696 mod p(x), x^197632 mod p(x) */
+	.octa 0x00000000461700000000000012510000
+
+	/* x^196672 mod p(x), x^196608 mod p(x) */
+	.octa 0x000000009b40000000000000f37e0000
+
+	/* x^195648 mod p(x), x^195584 mod p(x) */
+	.octa 0x00000000eeb2000000000000ecf10000
+
+	/* x^194624 mod p(x), x^194560 mod p(x) */
+	.octa 0x00000000b2e800000000000050f20000
+
+	/* x^193600 mod p(x), x^193536 mod p(x) */
+	.octa 0x00000000f59a000000000000e0b30000
+
+	/* x^192576 mod p(x), x^192512 mod p(x) */
+	.octa 0x00000000467f0000000000004d5a0000
+
+	/* x^191552 mod p(x), x^191488 mod p(x) */
+	.octa 0x00000000da92000000000000bb010000
+
+	/* x^190528 mod p(x), x^190464 mod p(x) */
+	.octa 0x000000001e1000000000000022a40000
+
+	/* x^189504 mod p(x), x^189440 mod p(x) */
+	.octa 0x0000000058fe000000000000836f0000
+
+	/* x^188480 mod p(x), x^188416 mod p(x) */
+	.octa 0x00000000b9ce000000000000d78d0000
+
+	/* x^187456 mod p(x), x^187392 mod p(x) */
+	.octa 0x0000000022210000000000004f8d0000
+
+	/* x^186432 mod p(x), x^186368 mod p(x) */
+	.octa 0x00000000744600000000000033760000
+
+	/* x^185408 mod p(x), x^185344 mod p(x) */
+	.octa 0x000000001c2e000000000000a1e50000
+
+	/* x^184384 mod p(x), x^184320 mod p(x) */
+	.octa 0x00000000dcc8000000000000a1a40000
+
+	/* x^183360 mod p(x), x^183296 mod p(x) */
+	.octa 0x00000000910f00000000000019a20000
+
+	/* x^182336 mod p(x), x^182272 mod p(x) */
+	.octa 0x0000000055d5000000000000f6ae0000
+
+	/* x^181312 mod p(x), x^181248 mod p(x) */
+	.octa 0x00000000c8ba000000000000a7ac0000
+
+	/* x^180288 mod p(x), x^180224 mod p(x) */
+	.octa 0x0000000031f8000000000000eea20000
+
+	/* x^179264 mod p(x), x^179200 mod p(x) */
+	.octa 0x000000001966000000000000c4d90000
+
+	/* x^178240 mod p(x), x^178176 mod p(x) */
+	.octa 0x00000000b9810000000000002b470000
+
+	/* x^177216 mod p(x), x^177152 mod p(x) */
+	.octa 0x000000008303000000000000f7cf0000
+
+	/* x^176192 mod p(x), x^176128 mod p(x) */
+	.octa 0x000000002ce500000000000035b30000
+
+	/* x^175168 mod p(x), x^175104 mod p(x) */
+	.octa 0x000000002fae0000000000000c7c0000
+
+	/* x^174144 mod p(x), x^174080 mod p(x) */
+	.octa 0x00000000f50c0000000000009edf0000
+
+	/* x^173120 mod p(x), x^173056 mod p(x) */
+	.octa 0x00000000714f00000000000004cd0000
+
+	/* x^172096 mod p(x), x^172032 mod p(x) */
+	.octa 0x00000000c161000000000000541b0000
+
+	/* x^171072 mod p(x), x^171008 mod p(x) */
+	.octa 0x0000000021c8000000000000e2700000
+
+	/* x^170048 mod p(x), x^169984 mod p(x) */
+	.octa 0x00000000b93d00000000000009a60000
+
+	/* x^169024 mod p(x), x^168960 mod p(x) */
+	.octa 0x00000000fbcf000000000000761c0000
+
+	/* x^168000 mod p(x), x^167936 mod p(x) */
+	.octa 0x0000000026350000000000009db30000
+
+	/* x^166976 mod p(x), x^166912 mod p(x) */
+	.octa 0x00000000b64f0000000000003e9f0000
+
+	/* x^165952 mod p(x), x^165888 mod p(x) */
+	.octa 0x00000000bd0e00000000000078590000
+
+	/* x^164928 mod p(x), x^164864 mod p(x) */
+	.octa 0x00000000d9360000000000008bc80000
+
+	/* x^163904 mod p(x), x^163840 mod p(x) */
+	.octa 0x000000002f140000000000008c9f0000
+
+	/* x^162880 mod p(x), x^162816 mod p(x) */
+	.octa 0x000000006a270000000000006af70000
+
+	/* x^161856 mod p(x), x^161792 mod p(x) */
+	.octa 0x000000006685000000000000e5210000
+
+	/* x^160832 mod p(x), x^160768 mod p(x) */
+	.octa 0x0000000062da00000000000008290000
+
+	/* x^159808 mod p(x), x^159744 mod p(x) */
+	.octa 0x00000000bb4b000000000000e4d00000
+
+	/* x^158784 mod p(x), x^158720 mod p(x) */
+	.octa 0x00000000d2490000000000004ae10000
+
+	/* x^157760 mod p(x), x^157696 mod p(x) */
+	.octa 0x00000000c85b00000000000000e70000
+
+	/* x^156736 mod p(x), x^156672 mod p(x) */
+	.octa 0x00000000c37a00000000000015650000
+
+	/* x^155712 mod p(x), x^155648 mod p(x) */
+	.octa 0x0000000018530000000000001c2f0000
+
+	/* x^154688 mod p(x), x^154624 mod p(x) */
+	.octa 0x00000000b46600000000000037bd0000
+
+	/* x^153664 mod p(x), x^153600 mod p(x) */
+	.octa 0x00000000439b00000000000012190000
+
+	/* x^152640 mod p(x), x^152576 mod p(x) */
+	.octa 0x00000000b1260000000000005ece0000
+
+	/* x^151616 mod p(x), x^151552 mod p(x) */
+	.octa 0x00000000d8110000000000002a5e0000
+
+	/* x^150592 mod p(x), x^150528 mod p(x) */
+	.octa 0x00000000099f00000000000052330000
+
+	/* x^149568 mod p(x), x^149504 mod p(x) */
+	.octa 0x00000000f9f9000000000000f9120000
+
+	/* x^148544 mod p(x), x^148480 mod p(x) */
+	.octa 0x000000005cc00000000000000ddc0000
+
+	/* x^147520 mod p(x), x^147456 mod p(x) */
+	.octa 0x00000000343b00000000000012200000
+
+	/* x^146496 mod p(x), x^146432 mod p(x) */
+	.octa 0x000000009222000000000000d12b0000
+
+	/* x^145472 mod p(x), x^145408 mod p(x) */
+	.octa 0x00000000d781000000000000eb2d0000
+
+	/* x^144448 mod p(x), x^144384 mod p(x) */
+	.octa 0x000000000bf400000000000058970000
+
+	/* x^143424 mod p(x), x^143360 mod p(x) */
+	.octa 0x00000000094200000000000013690000
+
+	/* x^142400 mod p(x), x^142336 mod p(x) */
+	.octa 0x00000000d55100000000000051950000
+
+	/* x^141376 mod p(x), x^141312 mod p(x) */
+	.octa 0x000000008f11000000000000954b0000
+
+	/* x^140352 mod p(x), x^140288 mod p(x) */
+	.octa 0x00000000140f000000000000b29e0000
+
+	/* x^139328 mod p(x), x^139264 mod p(x) */
+	.octa 0x00000000c6db000000000000db5d0000
+
+	/* x^138304 mod p(x), x^138240 mod p(x) */
+	.octa 0x00000000715b000000000000dfaf0000
+
+	/* x^137280 mod p(x), x^137216 mod p(x) */
+	.octa 0x000000000dea000000000000e3b60000
+
+	/* x^136256 mod p(x), x^136192 mod p(x) */
+	.octa 0x000000006f94000000000000ddaf0000
+
+	/* x^135232 mod p(x), x^135168 mod p(x) */
+	.octa 0x0000000024e1000000000000e4f70000
+
+	/* x^134208 mod p(x), x^134144 mod p(x) */
+	.octa 0x000000008810000000000000aa110000
+
+	/* x^133184 mod p(x), x^133120 mod p(x) */
+	.octa 0x0000000030c2000000000000a8e60000
+
+	/* x^132160 mod p(x), x^132096 mod p(x) */
+	.octa 0x00000000e6d0000000000000ccf30000
+
+	/* x^131136 mod p(x), x^131072 mod p(x) */
+	.octa 0x000000004da000000000000079bf0000
+
+	/* x^130112 mod p(x), x^130048 mod p(x) */
+	.octa 0x000000007759000000000000b3a30000
+
+	/* x^129088 mod p(x), x^129024 mod p(x) */
+	.octa 0x00000000597400000000000028790000
+
+	/* x^128064 mod p(x), x^128000 mod p(x) */
+	.octa 0x000000007acd000000000000b5820000
+
+	/* x^127040 mod p(x), x^126976 mod p(x) */
+	.octa 0x00000000e6e400000000000026ad0000
+
+	/* x^126016 mod p(x), x^125952 mod p(x) */
+	.octa 0x000000006d49000000000000985b0000
+
+	/* x^124992 mod p(x), x^124928 mod p(x) */
+	.octa 0x000000000f0800000000000011520000
+
+	/* x^123968 mod p(x), x^123904 mod p(x) */
+	.octa 0x000000002c7f000000000000846c0000
+
+	/* x^122944 mod p(x), x^122880 mod p(x) */
+	.octa 0x000000005ce7000000000000ae1d0000
+
+	/* x^121920 mod p(x), x^121856 mod p(x) */
+	.octa 0x00000000d4cb000000000000e21d0000
+
+	/* x^120896 mod p(x), x^120832 mod p(x) */
+	.octa 0x000000003a2300000000000019bb0000
+
+	/* x^119872 mod p(x), x^119808 mod p(x) */
+	.octa 0x000000000e1700000000000095290000
+
+	/* x^118848 mod p(x), x^118784 mod p(x) */
+	.octa 0x000000006e6400000000000050d20000
+
+	/* x^117824 mod p(x), x^117760 mod p(x) */
+	.octa 0x000000008d5c0000000000000cd10000
+
+	/* x^116800 mod p(x), x^116736 mod p(x) */
+	.octa 0x00000000ef310000000000007b570000
+
+	/* x^115776 mod p(x), x^115712 mod p(x) */
+	.octa 0x00000000645d00000000000053d60000
+
+	/* x^114752 mod p(x), x^114688 mod p(x) */
+	.octa 0x0000000018fc00000000000077510000
+
+	/* x^113728 mod p(x), x^113664 mod p(x) */
+	.octa 0x000000000cb3000000000000a7b70000
+
+	/* x^112704 mod p(x), x^112640 mod p(x) */
+	.octa 0x00000000991b000000000000d0780000
+
+	/* x^111680 mod p(x), x^111616 mod p(x) */
+	.octa 0x00000000845a000000000000be3c0000
+
+	/* x^110656 mod p(x), x^110592 mod p(x) */
+	.octa 0x00000000d3a9000000000000df020000
+
+	/* x^109632 mod p(x), x^109568 mod p(x) */
+	.octa 0x0000000017d7000000000000063e0000
+
+	/* x^108608 mod p(x), x^108544 mod p(x) */
+	.octa 0x000000007a860000000000008ab40000
+
+	/* x^107584 mod p(x), x^107520 mod p(x) */
+	.octa 0x00000000fd7c000000000000c7bd0000
+
+	/* x^106560 mod p(x), x^106496 mod p(x) */
+	.octa 0x00000000a56b000000000000efd60000
+
+	/* x^105536 mod p(x), x^105472 mod p(x) */
+	.octa 0x0000000010e400000000000071380000
+
+	/* x^104512 mod p(x), x^104448 mod p(x) */
+	.octa 0x00000000994500000000000004d30000
+
+	/* x^103488 mod p(x), x^103424 mod p(x) */
+	.octa 0x00000000b83c0000000000003b0e0000
+
+	/* x^102464 mod p(x), x^102400 mod p(x) */
+	.octa 0x00000000d6c10000000000008b020000
+
+	/* x^101440 mod p(x), x^101376 mod p(x) */
+	.octa 0x000000009efc000000000000da940000
+
+	/* x^100416 mod p(x), x^100352 mod p(x) */
+	.octa 0x000000005e87000000000000f9f70000
+
+	/* x^99392 mod p(x), x^99328 mod p(x) */
+	.octa 0x000000006c9b00000000000045e40000
+
+	/* x^98368 mod p(x), x^98304 mod p(x) */
+	.octa 0x00000000178a00000000000083940000
+
+	/* x^97344 mod p(x), x^97280 mod p(x) */
+	.octa 0x00000000f0c8000000000000f0a00000
+
+	/* x^96320 mod p(x), x^96256 mod p(x) */
+	.octa 0x00000000f699000000000000b74b0000
+
+	/* x^95296 mod p(x), x^95232 mod p(x) */
+	.octa 0x00000000316d000000000000c1cf0000
+
+	/* x^94272 mod p(x), x^94208 mod p(x) */
+	.octa 0x00000000987e00000000000072680000
+
+	/* x^93248 mod p(x), x^93184 mod p(x) */
+	.octa 0x00000000acff000000000000e0ab0000
+
+	/* x^92224 mod p(x), x^92160 mod p(x) */
+	.octa 0x00000000a1f6000000000000c5a80000
+
+	/* x^91200 mod p(x), x^91136 mod p(x) */
+	.octa 0x0000000061bd000000000000cf690000
+
+	/* x^90176 mod p(x), x^90112 mod p(x) */
+	.octa 0x00000000c9f2000000000000cbcc0000
+
+	/* x^89152 mod p(x), x^89088 mod p(x) */
+	.octa 0x000000005a33000000000000de050000
+
+	/* x^88128 mod p(x), x^88064 mod p(x) */
+	.octa 0x00000000e416000000000000ccd70000
+
+	/* x^87104 mod p(x), x^87040 mod p(x) */
+	.octa 0x0000000058930000000000002f670000
+
+	/* x^86080 mod p(x), x^86016 mod p(x) */
+	.octa 0x00000000a9d3000000000000152f0000
+
+	/* x^85056 mod p(x), x^84992 mod p(x) */
+	.octa 0x00000000c114000000000000ecc20000
+
+	/* x^84032 mod p(x), x^83968 mod p(x) */
+	.octa 0x00000000b9270000000000007c890000
+
+	/* x^83008 mod p(x), x^82944 mod p(x) */
+	.octa 0x000000002e6000000000000006ee0000
+
+	/* x^81984 mod p(x), x^81920 mod p(x) */
+	.octa 0x00000000dfc600000000000009100000
+
+	/* x^80960 mod p(x), x^80896 mod p(x) */
+	.octa 0x000000004911000000000000ad4e0000
+
+	/* x^79936 mod p(x), x^79872 mod p(x) */
+	.octa 0x00000000ae1b000000000000b04d0000
+
+	/* x^78912 mod p(x), x^78848 mod p(x) */
+	.octa 0x0000000005fa000000000000e9900000
+
+	/* x^77888 mod p(x), x^77824 mod p(x) */
+	.octa 0x0000000004a1000000000000cc6f0000
+
+	/* x^76864 mod p(x), x^76800 mod p(x) */
+	.octa 0x00000000af73000000000000ed110000
+
+	/* x^75840 mod p(x), x^75776 mod p(x) */
+	.octa 0x0000000082530000000000008f7e0000
+
+	/* x^74816 mod p(x), x^74752 mod p(x) */
+	.octa 0x00000000cfdc000000000000594f0000
+
+	/* x^73792 mod p(x), x^73728 mod p(x) */
+	.octa 0x00000000a6b6000000000000a8750000
+
+	/* x^72768 mod p(x), x^72704 mod p(x) */
+	.octa 0x00000000fd76000000000000aa0c0000
+
+	/* x^71744 mod p(x), x^71680 mod p(x) */
+	.octa 0x0000000006f500000000000071db0000
+
+	/* x^70720 mod p(x), x^70656 mod p(x) */
+	.octa 0x0000000037ca000000000000ab0c0000
+
+	/* x^69696 mod p(x), x^69632 mod p(x) */
+	.octa 0x00000000d7ab000000000000b7a00000
+
+	/* x^68672 mod p(x), x^68608 mod p(x) */
+	.octa 0x00000000440800000000000090d30000
+
+	/* x^67648 mod p(x), x^67584 mod p(x) */
+	.octa 0x00000000186100000000000054730000
+
+	/* x^66624 mod p(x), x^66560 mod p(x) */
+	.octa 0x000000007368000000000000a3a20000
+
+	/* x^65600 mod p(x), x^65536 mod p(x) */
+	.octa 0x0000000026d0000000000000f9040000
+
+	/* x^64576 mod p(x), x^64512 mod p(x) */
+	.octa 0x00000000fe770000000000009c0a0000
+
+	/* x^63552 mod p(x), x^63488 mod p(x) */
+	.octa 0x000000002cba000000000000d1e70000
+
+	/* x^62528 mod p(x), x^62464 mod p(x) */
+	.octa 0x00000000f8bd0000000000005ac10000
+
+	/* x^61504 mod p(x), x^61440 mod p(x) */
+	.octa 0x000000007372000000000000d68d0000
+
+	/* x^60480 mod p(x), x^60416 mod p(x) */
+	.octa 0x00000000f37f00000000000089f60000
+
+	/* x^59456 mod p(x), x^59392 mod p(x) */
+	.octa 0x00000000078400000000000008a90000
+
+	/* x^58432 mod p(x), x^58368 mod p(x) */
+	.octa 0x00000000d3e400000000000042360000
+
+	/* x^57408 mod p(x), x^57344 mod p(x) */
+	.octa 0x00000000eba800000000000092d50000
+
+	/* x^56384 mod p(x), x^56320 mod p(x) */
+	.octa 0x00000000afbe000000000000b4d50000
+
+	/* x^55360 mod p(x), x^55296 mod p(x) */
+	.octa 0x00000000d8ca000000000000c9060000
+
+	/* x^54336 mod p(x), x^54272 mod p(x) */
+	.octa 0x00000000c2d00000000000008f4f0000
+
+	/* x^53312 mod p(x), x^53248 mod p(x) */
+	.octa 0x00000000373200000000000028690000
+
+	/* x^52288 mod p(x), x^52224 mod p(x) */
+	.octa 0x0000000046ae000000000000c3b30000
+
+	/* x^51264 mod p(x), x^51200 mod p(x) */
+	.octa 0x00000000b243000000000000f8700000
+
+	/* x^50240 mod p(x), x^50176 mod p(x) */
+	.octa 0x00000000f7f500000000000029eb0000
+
+	/* x^49216 mod p(x), x^49152 mod p(x) */
+	.octa 0x000000000c7e000000000000fe730000
+
+	/* x^48192 mod p(x), x^48128 mod p(x) */
+	.octa 0x00000000c38200000000000096000000
+
+	/* x^47168 mod p(x), x^47104 mod p(x) */
+	.octa 0x000000008956000000000000683c0000
+
+	/* x^46144 mod p(x), x^46080 mod p(x) */
+	.octa 0x00000000422d0000000000005f1e0000
+
+	/* x^45120 mod p(x), x^45056 mod p(x) */
+	.octa 0x00000000ac0f0000000000006f810000
+
+	/* x^44096 mod p(x), x^44032 mod p(x) */
+	.octa 0x00000000ce30000000000000031f0000
+
+	/* x^43072 mod p(x), x^43008 mod p(x) */
+	.octa 0x000000003d43000000000000455a0000
+
+	/* x^42048 mod p(x), x^41984 mod p(x) */
+	.octa 0x000000007ebe000000000000a6050000
+
+	/* x^41024 mod p(x), x^40960 mod p(x) */
+	.octa 0x00000000976e00000000000077eb0000
+
+	/* x^40000 mod p(x), x^39936 mod p(x) */
+	.octa 0x000000000872000000000000389c0000
+
+	/* x^38976 mod p(x), x^38912 mod p(x) */
+	.octa 0x000000008979000000000000c7b20000
+
+	/* x^37952 mod p(x), x^37888 mod p(x) */
+	.octa 0x000000005c1e0000000000001d870000
+
+	/* x^36928 mod p(x), x^36864 mod p(x) */
+	.octa 0x00000000aebb00000000000045810000
+
+	/* x^35904 mod p(x), x^35840 mod p(x) */
+	.octa 0x000000004f7e0000000000006d4a0000
+
+	/* x^34880 mod p(x), x^34816 mod p(x) */
+	.octa 0x00000000ea98000000000000b9200000
+
+	/* x^33856 mod p(x), x^33792 mod p(x) */
+	.octa 0x00000000f39600000000000022f20000
+
+	/* x^32832 mod p(x), x^32768 mod p(x) */
+	.octa 0x000000000bc500000000000041ca0000
+
+	/* x^31808 mod p(x), x^31744 mod p(x) */
+	.octa 0x00000000786400000000000078500000
+
+	/* x^30784 mod p(x), x^30720 mod p(x) */
+	.octa 0x00000000be970000000000009e7e0000
+
+	/* x^29760 mod p(x), x^29696 mod p(x) */
+	.octa 0x00000000dd6d000000000000a53c0000
+
+	/* x^28736 mod p(x), x^28672 mod p(x) */
+	.octa 0x000000004c3f00000000000039340000
+
+	/* x^27712 mod p(x), x^27648 mod p(x) */
+	.octa 0x0000000093a4000000000000b58e0000
+
+	/* x^26688 mod p(x), x^26624 mod p(x) */
+	.octa 0x0000000050fb00000000000062d40000
+
+	/* x^25664 mod p(x), x^25600 mod p(x) */
+	.octa 0x00000000f505000000000000a26f0000
+
+	/* x^24640 mod p(x), x^24576 mod p(x) */
+	.octa 0x0000000064f900000000000065e60000
+
+	/* x^23616 mod p(x), x^23552 mod p(x) */
+	.octa 0x00000000e8c2000000000000aad90000
+
+	/* x^22592 mod p(x), x^22528 mod p(x) */
+	.octa 0x00000000720b000000000000a3b00000
+
+	/* x^21568 mod p(x), x^21504 mod p(x) */
+	.octa 0x00000000e992000000000000d2680000
+
+	/* x^20544 mod p(x), x^20480 mod p(x) */
+	.octa 0x000000009132000000000000cf4c0000
+
+	/* x^19520 mod p(x), x^19456 mod p(x) */
+	.octa 0x00000000608a00000000000076610000
+
+	/* x^18496 mod p(x), x^18432 mod p(x) */
+	.octa 0x000000009948000000000000fb9f0000
+
+	/* x^17472 mod p(x), x^17408 mod p(x) */
+	.octa 0x00000000173000000000000003770000
+
+	/* x^16448 mod p(x), x^16384 mod p(x) */
+	.octa 0x000000006fe300000000000004880000
+
+	/* x^15424 mod p(x), x^15360 mod p(x) */
+	.octa 0x00000000e15300000000000056a70000
+
+	/* x^14400 mod p(x), x^14336 mod p(x) */
+	.octa 0x0000000092d60000000000009dfd0000
+
+	/* x^13376 mod p(x), x^13312 mod p(x) */
+	.octa 0x0000000002fd00000000000074c80000
+
+	/* x^12352 mod p(x), x^12288 mod p(x) */
+	.octa 0x00000000c78b000000000000a3ec0000
+
+	/* x^11328 mod p(x), x^11264 mod p(x) */
+	.octa 0x000000009262000000000000b3530000
+
+	/* x^10304 mod p(x), x^10240 mod p(x) */
+	.octa 0x0000000084f200000000000047bf0000
+
+	/* x^9280 mod p(x), x^9216 mod p(x) */
+	.octa 0x0000000067ee000000000000e97c0000
+
+	/* x^8256 mod p(x), x^8192 mod p(x) */
+	.octa 0x00000000535b00000000000091e10000
+
+	/* x^7232 mod p(x), x^7168 mod p(x) */
+	.octa 0x000000007ebb00000000000055060000
+
+	/* x^6208 mod p(x), x^6144 mod p(x) */
+	.octa 0x00000000c6a1000000000000fd360000
+
+	/* x^5184 mod p(x), x^5120 mod p(x) */
+	.octa 0x000000001be500000000000055860000
+
+	/* x^4160 mod p(x), x^4096 mod p(x) */
+	.octa 0x00000000ae0e0000000000005bd00000
+
+	/* x^3136 mod p(x), x^3072 mod p(x) */
+	.octa 0x0000000022040000000000008db20000
+
+	/* x^2112 mod p(x), x^2048 mod p(x) */
+	.octa 0x00000000c9eb000000000000efe20000
+
+	/* x^1088 mod p(x), x^1024 mod p(x) */
+	.octa 0x0000000039b400000000000051d10000
+
+.short_constants:
+
+	/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */
+	/* x^2048 mod p(x), x^2016 mod p(x), x^1984 mod p(x), x^1952 mod p(x) */
+	.octa 0xefe20000dccf00009440000033590000
+
+	/* x^1920 mod p(x), x^1888 mod p(x), x^1856 mod p(x), x^1824 mod p(x) */
+	.octa 0xee6300002f3f000062180000e0ed0000
+
+	/* x^1792 mod p(x), x^1760 mod p(x), x^1728 mod p(x), x^1696 mod p(x) */
+	.octa 0xcf5f000017ef0000ccbe000023d30000
+
+	/* x^1664 mod p(x), x^1632 mod p(x), x^1600 mod p(x), x^1568 mod p(x) */
+	.octa 0x6d0c0000a30e00000920000042630000
+
+	/* x^1536 mod p(x), x^1504 mod p(x), x^1472 mod p(x), x^1440 mod p(x) */
+	.octa 0x21d30000932b0000a7a00000efcc0000
+
+	/* x^1408 mod p(x), x^1376 mod p(x), x^1344 mod p(x), x^1312 mod p(x) */
+	.octa 0x10be00000b310000666f00000d1c0000
+
+	/* x^1280 mod p(x), x^1248 mod p(x), x^1216 mod p(x), x^1184 mod p(x) */
+	.octa 0x1f240000ce9e0000caad0000589e0000
+
+	/* x^1152 mod p(x), x^1120 mod p(x), x^1088 mod p(x), x^1056 mod p(x) */
+	.octa 0x29610000d02b000039b400007cf50000
+
+	/* x^1024 mod p(x), x^992 mod p(x), x^960 mod p(x), x^928 mod p(x) */
+	.octa 0x51d100009d9d00003c0e0000bfd60000
+
+	/* x^896 mod p(x), x^864 mod p(x), x^832 mod p(x), x^800 mod p(x) */
+	.octa 0xda390000ceae000013830000713c0000
+
+	/* x^768 mod p(x), x^736 mod p(x), x^704 mod p(x), x^672 mod p(x) */
+	.octa 0xb67800001e16000085c0000080a60000
+
+	/* x^640 mod p(x), x^608 mod p(x), x^576 mod p(x), x^544 mod p(x) */
+	.octa 0x0db40000f7f90000371d0000e6580000
+
+	/* x^512 mod p(x), x^480 mod p(x), x^448 mod p(x), x^416 mod p(x) */
+	.octa 0x87e70000044c0000aadb0000a4970000
+
+	/* x^384 mod p(x), x^352 mod p(x), x^320 mod p(x), x^288 mod p(x) */
+	.octa 0x1f990000ad180000d8b30000e7b50000
+
+	/* x^256 mod p(x), x^224 mod p(x), x^192 mod p(x), x^160 mod p(x) */
+	.octa 0xbe6c00006ee300004c1a000006df0000
+
+	/* x^128 mod p(x), x^96 mod p(x), x^64 mod p(x), x^32 mod p(x) */
+	.octa 0xfb0b00002d560000136800008bb70000
+
+
+.barrett_constants:
+	/* Barrett constant m - (4^32)/n */
+	.octa 0x000000000000000000000001f65a57f8	/* x^64 div p(x) */
+	/* Barrett constant n */
+	.octa 0x0000000000000000000000018bb70000
+
+#define CRC_FUNCTION_NAME __crct10dif_vpmsum
+#include "crc32-vpmsum_core.S"
diff --git a/arch/powerpc/crypto/crct10dif-vpmsum_glue.c b/arch/powerpc/crypto/crct10dif-vpmsum_glue.c
new file mode 100644
index 0000000000..1dc8b69151
--- /dev/null
+++ b/arch/powerpc/crypto/crct10dif-vpmsum_glue.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Calculate a CRC T10-DIF with vpmsum acceleration
+ *
+ * Copyright 2017, Daniel Axtens, IBM Corporation.
+ * [based on crc32c-vpmsum_glue.c]
+ */
+
+#include <linux/crc-t10dif.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/cpufeature.h>
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+
+#define VMX_ALIGN		16
+#define VMX_ALIGN_MASK		(VMX_ALIGN-1)
+
+#define VECTOR_BREAKPOINT	64
+
+u32 __crct10dif_vpmsum(u32 crc, unsigned char const *p, size_t len);
+
+static u16 crct10dif_vpmsum(u16 crci, unsigned char const *p, size_t len)
+{
+	unsigned int prealign;
+	unsigned int tail;
+	u32 crc = crci;
+
+	if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || !crypto_simd_usable())
+		return crc_t10dif_generic(crc, p, len);
+
+	if ((unsigned long)p & VMX_ALIGN_MASK) {
+		prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
+		crc = crc_t10dif_generic(crc, p, prealign);
+		len -= prealign;
+		p += prealign;
+	}
+
+	if (len & ~VMX_ALIGN_MASK) {
+		crc <<= 16;
+		preempt_disable();
+		pagefault_disable();
+		enable_kernel_altivec();
+		crc = __crct10dif_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
+		disable_kernel_altivec();
+		pagefault_enable();
+		preempt_enable();
+		crc >>= 16;
+	}
+
+	tail = len & VMX_ALIGN_MASK;
+	if (tail) {
+		p += len & ~VMX_ALIGN_MASK;
+		crc = crc_t10dif_generic(crc, p, tail);
+	}
+
+	return crc & 0xffff;
+}
+
+static int crct10dif_vpmsum_init(struct shash_desc *desc)
+{
+	u16 *crc = shash_desc_ctx(desc);
+
+	*crc = 0;
+	return 0;
+}
+
+static int crct10dif_vpmsum_update(struct shash_desc *desc, const u8 *data,
+			    unsigned int length)
+{
+	u16 *crc = shash_desc_ctx(desc);
+
+	*crc = crct10dif_vpmsum(*crc, data, length);
+
+	return 0;
+}
+
+
+static int crct10dif_vpmsum_final(struct shash_desc *desc, u8 *out)
+{
+	u16 *crcp = shash_desc_ctx(desc);
+
+	*(u16 *)out = *crcp;
+	return 0;
+}
+
+static struct shash_alg alg = {
+	.init		= crct10dif_vpmsum_init,
+	.update		= crct10dif_vpmsum_update,
+	.final		= crct10dif_vpmsum_final,
+	.descsize	= CRC_T10DIF_DIGEST_SIZE,
+	.digestsize	= CRC_T10DIF_DIGEST_SIZE,
+	.base		= {
+		.cra_name		= "crct10dif",
+		.cra_driver_name	= "crct10dif-vpmsum",
+		.cra_priority		= 200,
+		.cra_blocksize		= CRC_T10DIF_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+};
+
+static int __init crct10dif_vpmsum_mod_init(void)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return -ENODEV;
+
+	return crypto_register_shash(&alg);
+}
+
+static void __exit crct10dif_vpmsum_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crct10dif_vpmsum_mod_init);
+module_exit(crct10dif_vpmsum_mod_fini);
+
+MODULE_AUTHOR("Daniel Axtens <dja@axtens.net>");
+MODULE_DESCRIPTION("CRCT10DIF using vector polynomial multiply-sum instructions");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("crct10dif");
+MODULE_ALIAS_CRYPTO("crct10dif-vpmsum");
diff --git a/arch/powerpc/crypto/ghashp10-ppc.pl b/arch/powerpc/crypto/ghashp10-ppc.pl
new file mode 100644
index 0000000000..27a6b0bec6
--- /dev/null
+++ b/arch/powerpc/crypto/ghashp10-ppc.pl
@@ -0,0 +1,370 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+# This code is taken from the OpenSSL project but the author (Andy Polyakov)
+# has relicensed it under the GPLv2. Therefore this program is free software;
+# you can redistribute it and/or modify it under the terms of the GNU General
+# Public License version 2 as published by the Free Software Foundation.
+#
+# The original headers, including the original license headers, are
+# included below for completeness.
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see https://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# GHASH for PowerISA v2.07.
+#
+# July 2014
+#
+# Accurate performance measurements are problematic, because it's
+# always virtualized setup with possibly throttled processor.
+# Relative comparison is therefore more informative. This initial
+# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
+# faster than "4-bit" integer-only compiler-generated 64-bit code.
+# "Initial version" means that there is room for futher improvement.
+
+$flavour=shift;
+$output =shift;
+
+if ($flavour =~ /64/) {
+	$SIZE_T=8;
+	$LRSAVE=2*$SIZE_T;
+	$STU="stdu";
+	$POP="ld";
+	$PUSH="std";
+} elsif ($flavour =~ /32/) {
+	$SIZE_T=4;
+	$LRSAVE=$SIZE_T;
+	$STU="stwu";
+	$POP="lwz";
+	$PUSH="stw";
+} else { die "nonsense $flavour"; }
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
+
+my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6));	# argument block
+
+my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
+my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
+my ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19));
+my $vrsave="r12";
+my ($t4,$t5,$t6) = ($Hl,$H,$Hh);
+
+$code=<<___;
+.machine	"any"
+
+.text
+
+.globl	.gcm_init_p10
+	lis		r0,0xfff0
+	li		r8,0x10
+	mfspr		$vrsave,256
+	li		r9,0x20
+	mtspr		256,r0
+	li		r10,0x30
+	lvx_u		$H,0,r4			# load H
+	le?xor		r7,r7,r7
+	le?addi		r7,r7,0x8		# need a vperm start with 08
+	le?lvsr		5,0,r7
+	le?vspltisb	6,0x0f
+	le?vxor		5,5,6			# set a b-endian mask
+	le?vperm	$H,$H,$H,5
+
+	vspltisb	$xC2,-16		# 0xf0
+	vspltisb	$t0,1			# one
+	vaddubm		$xC2,$xC2,$xC2		# 0xe0
+	vxor		$zero,$zero,$zero
+	vor		$xC2,$xC2,$t0		# 0xe1
+	vsldoi		$xC2,$xC2,$zero,15	# 0xe1...
+	vsldoi		$t1,$zero,$t0,1		# ...1
+	vaddubm		$xC2,$xC2,$xC2		# 0xc2...
+	vspltisb	$t2,7
+	vor		$xC2,$xC2,$t1		# 0xc2....01
+	vspltb		$t1,$H,0		# most significant byte
+	vsl		$H,$H,$t0		# H<<=1
+	vsrab		$t1,$t1,$t2		# broadcast carry bit
+	vand		$t1,$t1,$xC2
+	vxor		$H,$H,$t1		# twisted H
+
+	vsldoi		$H,$H,$H,8		# twist even more ...
+	vsldoi		$xC2,$zero,$xC2,8	# 0xc2.0
+	vsldoi		$Hl,$zero,$H,8		# ... and split
+	vsldoi		$Hh,$H,$zero,8
+
+	stvx_u		$xC2,0,r3		# save pre-computed table
+	stvx_u		$Hl,r8,r3
+	stvx_u		$H, r9,r3
+	stvx_u		$Hh,r10,r3
+
+	mtspr		256,$vrsave
+	blr
+	.long		0
+	.byte		0,12,0x14,0,0,0,2,0
+	.long		0
+.size	.gcm_init_p10,.-.gcm_init_p10
+
+.globl	.gcm_init_htable
+	lis		r0,0xfff0
+	li		r8,0x10
+	mfspr		$vrsave,256
+	li		r9,0x20
+	mtspr		256,r0
+	li		r10,0x30
+	lvx_u		$H,0,r4			# load H
+
+	vspltisb	$xC2,-16		# 0xf0
+	vspltisb	$t0,1			# one
+	vaddubm		$xC2,$xC2,$xC2		# 0xe0
+	vxor		$zero,$zero,$zero
+	vor		$xC2,$xC2,$t0		# 0xe1
+	vsldoi		$xC2,$xC2,$zero,15	# 0xe1...
+	vsldoi		$t1,$zero,$t0,1		# ...1
+	vaddubm		$xC2,$xC2,$xC2		# 0xc2...
+	vspltisb	$t2,7
+	vor		$xC2,$xC2,$t1		# 0xc2....01
+	vspltb		$t1,$H,0		# most significant byte
+	vsl		$H,$H,$t0		# H<<=1
+	vsrab		$t1,$t1,$t2		# broadcast carry bit
+	vand		$t1,$t1,$xC2
+	vxor		$IN,$H,$t1		# twisted H
+
+	vsldoi		$H,$IN,$IN,8		# twist even more ...
+	vsldoi		$xC2,$zero,$xC2,8	# 0xc2.0
+	vsldoi		$Hl,$zero,$H,8		# ... and split
+	vsldoi		$Hh,$H,$zero,8
+
+	stvx_u		$xC2,0,r3		# save pre-computed table
+	stvx_u		$Hl,r8,r3
+	li		r8,0x40
+	stvx_u		$H, r9,r3
+	li		r9,0x50
+	stvx_u		$Hh,r10,r3
+	li		r10,0x60
+
+	vpmsumd		$Xl,$IN,$Hl		# H.lo·H.lo
+	vpmsumd		$Xm,$IN,$H		# H.hi·H.lo+H.lo·H.hi
+	vpmsumd		$Xh,$IN,$Hh		# H.hi·H.hi
+
+	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
+
+	vsldoi		$t0,$Xm,$zero,8
+	vsldoi		$t1,$zero,$Xm,8
+	vxor		$Xl,$Xl,$t0
+	vxor		$Xh,$Xh,$t1
+
+	vsldoi		$Xl,$Xl,$Xl,8
+	vxor		$Xl,$Xl,$t2
+
+	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
+	vpmsumd		$Xl,$Xl,$xC2
+	vxor		$t1,$t1,$Xh
+	vxor		$IN1,$Xl,$t1
+
+	vsldoi		$H2,$IN1,$IN1,8
+	vsldoi		$H2l,$zero,$H2,8
+	vsldoi		$H2h,$H2,$zero,8
+
+	stvx_u		$H2l,r8,r3		# save H^2
+	li		r8,0x70
+	stvx_u		$H2,r9,r3
+	li		r9,0x80
+	stvx_u		$H2h,r10,r3
+	li		r10,0x90
+
+	vpmsumd		$Xl,$IN,$H2l		# H.lo·H^2.lo
+	 vpmsumd	$Xl1,$IN1,$H2l		# H^2.lo·H^2.lo
+	vpmsumd		$Xm,$IN,$H2		# H.hi·H^2.lo+H.lo·H^2.hi
+	 vpmsumd	$Xm1,$IN1,$H2		# H^2.hi·H^2.lo+H^2.lo·H^2.hi
+	vpmsumd		$Xh,$IN,$H2h		# H.hi·H^2.hi
+	 vpmsumd	$Xh1,$IN1,$H2h		# H^2.hi·H^2.hi
+
+	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
+	 vpmsumd	$t6,$Xl1,$xC2		# 1st reduction phase
+
+	vsldoi		$t0,$Xm,$zero,8
+	vsldoi		$t1,$zero,$Xm,8
+	 vsldoi		$t4,$Xm1,$zero,8
+	 vsldoi		$t5,$zero,$Xm1,8
+	vxor		$Xl,$Xl,$t0
+	vxor		$Xh,$Xh,$t1
+	 vxor		$Xl1,$Xl1,$t4
+	 vxor		$Xh1,$Xh1,$t5
+
+	vsldoi		$Xl,$Xl,$Xl,8
+	 vsldoi		$Xl1,$Xl1,$Xl1,8
+	vxor		$Xl,$Xl,$t2
+	 vxor		$Xl1,$Xl1,$t6
+
+	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
+	 vsldoi		$t5,$Xl1,$Xl1,8		# 2nd reduction phase
+	vpmsumd		$Xl,$Xl,$xC2
+	 vpmsumd	$Xl1,$Xl1,$xC2
+	vxor		$t1,$t1,$Xh
+	 vxor		$t5,$t5,$Xh1
+	vxor		$Xl,$Xl,$t1
+	 vxor		$Xl1,$Xl1,$t5
+
+	vsldoi		$H,$Xl,$Xl,8
+	 vsldoi		$H2,$Xl1,$Xl1,8
+	vsldoi		$Hl,$zero,$H,8
+	vsldoi		$Hh,$H,$zero,8
+	 vsldoi		$H2l,$zero,$H2,8
+	 vsldoi		$H2h,$H2,$zero,8
+
+	stvx_u		$Hl,r8,r3		# save H^3
+	li		r8,0xa0
+	stvx_u		$H,r9,r3
+	li		r9,0xb0
+	stvx_u		$Hh,r10,r3
+	li		r10,0xc0
+	 stvx_u		$H2l,r8,r3		# save H^4
+	 stvx_u		$H2,r9,r3
+	 stvx_u		$H2h,r10,r3
+
+	mtspr		256,$vrsave
+	blr
+	.long		0
+	.byte		0,12,0x14,0,0,0,2,0
+	.long		0
+.size	.gcm_init_htable,.-.gcm_init_htable
+
+.globl	.gcm_gmult_p10
+	lis		r0,0xfff8
+	li		r8,0x10
+	mfspr		$vrsave,256
+	li		r9,0x20
+	mtspr		256,r0
+	li		r10,0x30
+	lvx_u		$IN,0,$Xip		# load Xi
+
+	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
+	 le?lvsl	$lemask,r0,r0
+	lvx_u		$H, r9,$Htbl
+	 le?vspltisb	$t0,0x07
+	lvx_u		$Hh,r10,$Htbl
+	 le?vxor	$lemask,$lemask,$t0
+	lvx_u		$xC2,0,$Htbl
+	 le?vperm	$IN,$IN,$IN,$lemask
+	vxor		$zero,$zero,$zero
+
+	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
+	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
+	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
+
+	vpmsumd		$t2,$Xl,$xC2		# 1st phase
+
+	vsldoi		$t0,$Xm,$zero,8
+	vsldoi		$t1,$zero,$Xm,8
+	vxor		$Xl,$Xl,$t0
+	vxor		$Xh,$Xh,$t1
+
+	vsldoi		$Xl,$Xl,$Xl,8
+	vxor		$Xl,$Xl,$t2
+
+	vsldoi		$t1,$Xl,$Xl,8		# 2nd phase
+	vpmsumd		$Xl,$Xl,$xC2
+	vxor		$t1,$t1,$Xh
+	vxor		$Xl,$Xl,$t1
+
+	le?vperm	$Xl,$Xl,$Xl,$lemask
+	stvx_u		$Xl,0,$Xip		# write out Xi
+
+	mtspr		256,$vrsave
+	blr
+	.long		0
+	.byte		0,12,0x14,0,0,0,2,0
+	.long		0
+.size	.gcm_gmult_p10,.-.gcm_gmult_p10
+
+.globl	.gcm_ghash_p10
+	lis		r0,0xfff8
+	li		r8,0x10
+	mfspr		$vrsave,256
+	li		r9,0x20
+	mtspr		256,r0
+	li		r10,0x30
+	lvx_u		$Xl,0,$Xip		# load Xi
+
+	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
+	 le?lvsl	$lemask,r0,r0
+	lvx_u		$H, r9,$Htbl
+	 le?vspltisb	$t0,0x07
+	lvx_u		$Hh,r10,$Htbl
+	 le?vxor	$lemask,$lemask,$t0
+	lvx_u		$xC2,0,$Htbl
+	 le?vperm	$Xl,$Xl,$Xl,$lemask
+	vxor		$zero,$zero,$zero
+
+	lvx_u		$IN,0,$inp
+	addi		$inp,$inp,16
+	subi		$len,$len,16
+	 le?vperm	$IN,$IN,$IN,$lemask
+	vxor		$IN,$IN,$Xl
+	b		Loop
+
+.align	5
+Loop:
+	 subic		$len,$len,16
+	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
+	 subfe.		r0,r0,r0		# borrow?-1:0
+	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
+	 and		r0,r0,$len
+	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
+	 add		$inp,$inp,r0
+
+	vpmsumd		$t2,$Xl,$xC2		# 1st phase
+
+	vsldoi		$t0,$Xm,$zero,8
+	vsldoi		$t1,$zero,$Xm,8
+	vxor		$Xl,$Xl,$t0
+	vxor		$Xh,$Xh,$t1
+
+	vsldoi		$Xl,$Xl,$Xl,8
+	vxor		$Xl,$Xl,$t2
+	 lvx_u		$IN,0,$inp
+	 addi		$inp,$inp,16
+
+	vsldoi		$t1,$Xl,$Xl,8		# 2nd phase
+	vpmsumd		$Xl,$Xl,$xC2
+	 le?vperm	$IN,$IN,$IN,$lemask
+	vxor		$t1,$t1,$Xh
+	vxor		$IN,$IN,$t1
+	vxor		$IN,$IN,$Xl
+	beq		Loop			# did $len-=16 borrow?
+
+	vxor		$Xl,$Xl,$t1
+	le?vperm	$Xl,$Xl,$Xl,$lemask
+	stvx_u		$Xl,0,$Xip		# write out Xi
+
+	mtspr		256,$vrsave
+	blr
+	.long		0
+	.byte		0,12,0x14,0,0,0,4,0
+	.long		0
+.size	.gcm_ghash_p10,.-.gcm_ghash_p10
+
+.asciz  "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+.align  2
+___
+
+foreach (split("\n",$code)) {
+	if ($flavour =~ /le$/o) {	# little-endian
+	    s/le\?//o		or
+	    s/be\?/#be#/o;
+	} else {
+	    s/le\?/#le#/o	or
+	    s/be\?//o;
+	}
+	print $_,"\n";
+}
+
+close STDOUT; # enforce flush
diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S
new file mode 100644
index 0000000000..fa6bc440cf
--- /dev/null
+++ b/arch/powerpc/crypto/md5-asm.S
@@ -0,0 +1,235 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Fast MD5 implementation for PPC
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/asm-compat.h>
+
+#define rHP	r3
+#define rWP	r4
+
+#define rH0	r0
+#define rH1	r6
+#define rH2	r7
+#define rH3	r5
+
+#define rW00	r8
+#define rW01	r9
+#define rW02	r10
+#define rW03	r11
+#define rW04	r12
+#define rW05	r14
+#define rW06	r15
+#define rW07	r16
+#define rW08	r17
+#define rW09	r18
+#define rW10	r19
+#define rW11	r20
+#define rW12	r21
+#define rW13	r22
+#define rW14	r23
+#define rW15	r24
+
+#define rT0	r25
+#define rT1	r26
+
+#define INITIALIZE \
+	PPC_STLU r1,-INT_FRAME_SIZE(r1); \
+	SAVE_GPRS(14, 26, r1)		/* push registers onto stack	*/
+
+#define FINALIZE \
+	REST_GPRS(14, 26, r1);		/* pop registers from stack	*/ \
+	addi	r1,r1,INT_FRAME_SIZE
+
+#ifdef __BIG_ENDIAN__
+#define LOAD_DATA(reg, off) \
+	lwbrx		reg,0,rWP;	/* load data			*/
+#define INC_PTR \
+	addi		rWP,rWP,4;	/* increment per word		*/
+#define NEXT_BLOCK			/* nothing to do		*/
+#else
+#define LOAD_DATA(reg, off) \
+	lwz		reg,off(rWP);	/* load data			*/
+#define INC_PTR				/* nothing to do		*/
+#define NEXT_BLOCK \
+	addi		rWP,rWP,64;	/* increment per block		*/
+#endif
+
+#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
+	LOAD_DATA(w0, off)		/*    W				*/ \
+	and		rT0,b,c;	/* 1: f = b and c		*/ \
+	INC_PTR				/*    ptr++			*/ \
+	andc		rT1,d,b;	/* 1: f' = ~b and d		*/ \
+	LOAD_DATA(w1, off+4)		/*    W				*/ \
+	or		rT0,rT0,rT1;	/* 1: f = f or f'		*/ \
+	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
+	add		a,a,rT0;	/* 1: a = a + f			*/ \
+	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
+	addis		w1,w1,k1h;	/* 2: wk = w + k		*/ \
+	add		a,a,w0;		/* 1: a = a + wk		*/ \
+	addi		w1,w1,k1l;	/* 2: wk = w + k'		*/ \
+	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
+	add		d,d,w1;		/* 2: a = a + wk		*/ \
+	add		a,a,b;		/* 1: a = a + b			*/ \
+	and		rT0,a,b;	/* 2: f = b and c		*/ \
+	andc		rT1,c,a;	/* 2: f' = ~b and d		*/ \
+	or		rT0,rT0,rT1;	/* 2: f = f or f'		*/ \
+	add		d,d,rT0;	/* 2: a = a + f			*/ \
+	INC_PTR				/*    ptr++			*/ \
+	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
+	add		d,d,a;		/* 2: a = a + b			*/
+
+#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
+	andc		rT0,c,d;	/* 1: f = c and ~d		*/ \
+	and		rT1,b,d;	/* 1: f' = b and d		*/ \
+	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
+	or		rT0,rT0,rT1;	/* 1: f = f or f'		*/ \
+	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
+	add		a,a,rT0;	/* 1: a = a + f			*/ \
+	addi		w1,w1,k1l;	/* 2: wk = w + k		*/ \
+	add		a,a,w0;		/* 1: a = a + wk		*/ \
+	addis		w1,w1,k1h;	/* 2: wk = w + k'		*/ \
+	andc		rT0,b,c;	/* 2: f = c and ~d		*/ \
+	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
+	add		a,a,b;		/* 1: a = a + b			*/ \
+	add		d,d,w1;		/* 2: a = a + wk		*/ \
+	and		rT1,a,c;	/* 2: f' = b and d		*/ \
+	or		rT0,rT0,rT1;	/* 2: f = f or f'		*/ \
+	add		d,d,rT0;	/* 2: a = a + f			*/ \
+	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
+	add		d,d,a;		/* 2: a = a +b			*/
+
+#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
+	xor		rT0,b,c;	/* 1: f' = b xor c		*/ \
+	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
+	xor		rT1,rT0,d;	/* 1: f = f xor f'		*/ \
+	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
+	add		a,a,rT1;	/* 1: a = a + f			*/ \
+	addi		w1,w1,k1l;	/* 2: wk = w + k		*/ \
+	add		a,a,w0;		/* 1: a = a + wk		*/ \
+	addis		w1,w1,k1h;	/* 2: wk = w + k'		*/ \
+	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
+	add		d,d,w1;		/* 2: a = a + wk		*/ \
+	add		a,a,b;		/* 1: a = a + b			*/ \
+	xor		rT1,rT0,a;	/* 2: f = b xor f'		*/ \
+	add		d,d,rT1;	/* 2: a = a + f			*/ \
+	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
+	add		d,d,a;		/* 2: a = a + b			*/
+
+#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
+	addi		w0,w0,k0l;	/* 1: w = w + k			*/ \
+	orc		rT0,b,d;	/* 1: f = b or ~d		*/ \
+	addis		w0,w0,k0h;	/* 1: w = w + k'		*/ \
+	xor		rT0,rT0,c;	/* 1: f = f xor c		*/ \
+	add		a,a,w0;		/* 1: a = a + wk		*/ \
+	addi		w1,w1,k1l;	/* 2: w = w + k			*/ \
+	add		a,a,rT0;	/* 1: a = a + f			*/ \
+	addis		w1,w1,k1h;	/* 2: w = w + k'		*/ \
+	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
+	add		a,a,b;		/* 1: a = a + b			*/ \
+	orc		rT0,a,c;	/* 2: f = b or ~d		*/ \
+	add		d,d,w1;		/* 2: a = a + wk		*/ \
+	xor		rT0,rT0,b;	/* 2: f = f xor c		*/ \
+	add		d,d,rT0;	/* 2: a = a + f			*/ \
+	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
+	add		d,d,a;		/* 2: a = a + b			*/
+
+_GLOBAL(ppc_md5_transform)
+	INITIALIZE
+
+	mtctr		r5
+	lwz		rH0,0(rHP)
+	lwz		rH1,4(rHP)
+	lwz		rH2,8(rHP)
+	lwz		rH3,12(rHP)
+
+ppc_md5_main:
+	R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
+		0xd76b, -23432, 0xe8c8, -18602)
+	R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
+		0x2420, 0x70db, 0xc1be, -12562)
+	R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
+		0xf57c, 0x0faf, 0x4788, -14806)
+	R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
+		0xa830, 0x4613, 0xfd47, -27391)
+	R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
+		0x6981, -26408, 0x8b45,  -2129)
+	R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
+		0xffff, 0x5bb1, 0x895d, -10306)
+	R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
+		0x6b90, 0x1122, 0xfd98, 0x7193)
+	R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
+		0xa679, 0x438e, 0x49b4, 0x0821)
+
+	R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
+		0x0d56, 0x6e0c, 0x1810, 0x6d2d)
+	R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
+		0x9d02, -32109, 0x124c, 0x2332)
+	R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
+		0x8ea7, 0x4a33, 0x0245, -18270)
+	R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
+		0x8eee,  -8608, 0xf258,  -5095)
+	R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
+		0x969d, -10697, 0x1cbe, -15288)
+	R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
+		0x3317, 0x3e99, 0xdbd9, 0x7c15)
+	R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
+		0xac4b, 0x7772, 0xd8cf, 0x331d)
+	R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
+		0x6a28, 0x6dd8, 0x219a, 0x3b68)
+
+	R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
+		0x29cb, 0x28e5, 0x4218,  -7788)
+	R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16,  9,
+		0x473f, 0x06d1, 0x3aae, 0x3036)
+	R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
+		0xaea1, -15134, 0x640b, -11295)
+	R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16,  9,
+		0x8f4c, 0x4887, 0xbc7c, -22499)
+	R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
+		0x7eb8, -27199, 0x00ea, 0x6050)
+	R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16,  9,
+		0xe01a, 0x22fe, 0x4447, 0x69c5)
+	R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
+		0xb7f3, 0x0253, 0x59b1, 0x4d5b)
+	R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16,  9,
+		0x4701, -27017, 0xc7bd, -19859)
+
+	R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
+		0x0988,  -1462, 0x4c70, -19401)
+	R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
+		0xadaf,  -5221, 0xfc99, 0x66f7)
+	R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
+		0x7e80, -16418, 0xba1e, -25587)
+	R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
+		0x4130, 0x380d, 0xe0c5, 0x738d)
+	lwz		rW00,0(rHP)
+	R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
+		0xe837, -30770, 0xde8a, 0x69e8)
+	lwz		rW14,4(rHP)
+	R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
+		0x9e79, 0x260f, 0x256d, -27941)
+	lwz		rW12,8(rHP)
+	R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
+		0xab75, -20775, 0x4f9e, -28397)
+	lwz		rW10,12(rHP)
+	R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
+		0x662b, 0x7c56, 0x11b2, 0x0358)
+
+	add		rH0,rH0,rW00
+	stw		rH0,0(rHP)
+	add		rH1,rH1,rW14
+	stw		rH1,4(rHP)
+	add		rH2,rH2,rW12
+	stw		rH2,8(rHP)
+	add		rH3,rH3,rW10
+	stw		rH3,12(rHP)
+	NEXT_BLOCK
+
+	bdnz		ppc_md5_main
+
+	FINALIZE
+	blr
diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c
new file mode 100644
index 0000000000..c24f605033
--- /dev/null
+++ b/arch/powerpc/crypto/md5-glue.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Glue code for MD5 implementation for PPC assembler
+ *
+ * Based on generic implementation.
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <crypto/md5.h>
+#include <asm/byteorder.h>
+
+extern void ppc_md5_transform(u32 *state, const u8 *src, u32 blocks);
+
+static inline void ppc_md5_clear_context(struct md5_state *sctx)
+{
+	int count = sizeof(struct md5_state) >> 2;
+	u32 *ptr = (u32 *)sctx;
+
+	/* make sure we can clear the fast way */
+	BUILD_BUG_ON(sizeof(struct md5_state) % 4);
+	do { *ptr++ = 0; } while (--count);
+}
+
+static int ppc_md5_init(struct shash_desc *desc)
+{
+	struct md5_state *sctx = shash_desc_ctx(desc);
+
+	sctx->hash[0] = MD5_H0;
+	sctx->hash[1] = MD5_H1;
+	sctx->hash[2] = MD5_H2;
+	sctx->hash[3] =	MD5_H3;
+	sctx->byte_count = 0;
+
+	return 0;
+}
+
+static int ppc_md5_update(struct shash_desc *desc, const u8 *data,
+			unsigned int len)
+{
+	struct md5_state *sctx = shash_desc_ctx(desc);
+	const unsigned int offset = sctx->byte_count & 0x3f;
+	unsigned int avail = 64 - offset;
+	const u8 *src = data;
+
+	sctx->byte_count += len;
+
+	if (avail > len) {
+		memcpy((char *)sctx->block + offset, src, len);
+		return 0;
+	}
+
+	if (offset) {
+		memcpy((char *)sctx->block + offset, src, avail);
+		ppc_md5_transform(sctx->hash, (const u8 *)sctx->block, 1);
+		len -= avail;
+		src += avail;
+	}
+
+	if (len > 63) {
+		ppc_md5_transform(sctx->hash, src, len >> 6);
+		src += len & ~0x3f;
+		len &= 0x3f;
+	}
+
+	memcpy((char *)sctx->block, src, len);
+	return 0;
+}
+
+static int ppc_md5_final(struct shash_desc *desc, u8 *out)
+{
+	struct md5_state *sctx = shash_desc_ctx(desc);
+	const unsigned int offset = sctx->byte_count & 0x3f;
+	const u8 *src = (const u8 *)sctx->block;
+	u8 *p = (u8 *)src + offset;
+	int padlen = 55 - offset;
+	__le64 *pbits = (__le64 *)((char *)sctx->block + 56);
+	__le32 *dst = (__le32 *)out;
+
+	*p++ = 0x80;
+
+	if (padlen < 0) {
+		memset(p, 0x00, padlen + sizeof (u64));
+		ppc_md5_transform(sctx->hash, src, 1);
+		p = (char *)sctx->block;
+		padlen = 56;
+	}
+
+	memset(p, 0, padlen);
+	*pbits = cpu_to_le64(sctx->byte_count << 3);
+	ppc_md5_transform(sctx->hash, src, 1);
+
+	dst[0] = cpu_to_le32(sctx->hash[0]);
+	dst[1] = cpu_to_le32(sctx->hash[1]);
+	dst[2] = cpu_to_le32(sctx->hash[2]);
+	dst[3] = cpu_to_le32(sctx->hash[3]);
+
+	ppc_md5_clear_context(sctx);
+	return 0;
+}
+
+static int ppc_md5_export(struct shash_desc *desc, void *out)
+{
+	struct md5_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+	return 0;
+}
+
+static int ppc_md5_import(struct shash_desc *desc, const void *in)
+{
+	struct md5_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+	return 0;
+}
+
+static struct shash_alg alg = {
+	.digestsize	=	MD5_DIGEST_SIZE,
+	.init		=	ppc_md5_init,
+	.update		=	ppc_md5_update,
+	.final		=	ppc_md5_final,
+	.export		=	ppc_md5_export,
+	.import		=	ppc_md5_import,
+	.descsize	=	sizeof(struct md5_state),
+	.statesize	=	sizeof(struct md5_state),
+	.base		=	{
+		.cra_name	=	"md5",
+		.cra_driver_name=	"md5-ppc",
+		.cra_priority	=	200,
+		.cra_blocksize	=	MD5_HMAC_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int __init ppc_md5_mod_init(void)
+{
+	return crypto_register_shash(&alg);
+}
+
+static void __exit ppc_md5_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(ppc_md5_mod_init);
+module_exit(ppc_md5_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, PPC assembler");
+
+MODULE_ALIAS_CRYPTO("md5");
+MODULE_ALIAS_CRYPTO("md5-ppc");
diff --git a/arch/powerpc/crypto/poly1305-p10-glue.c b/arch/powerpc/crypto/poly1305-p10-glue.c
new file mode 100644
index 0000000000..95dd708573
--- /dev/null
+++ b/arch/powerpc/crypto/poly1305-p10-glue.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Poly1305 authenticator algorithm, RFC7539.
+ *
+ * Copyright 2023- IBM Corp. All rights reserved.
+ */
+
+#include <crypto/algapi.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/jump_label.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <crypto/internal/simd.h>
+#include <linux/cpufeature.h>
+#include <asm/unaligned.h>
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+
+asmlinkage void poly1305_p10le_4blocks(void *h, const u8 *m, u32 mlen);
+asmlinkage void poly1305_64s(void *h, const u8 *m, u32 mlen, int highbit);
+asmlinkage void poly1305_emit_64(void *h, void *s, u8 *dst);
+
+static void vsx_begin(void)
+{
+	preempt_disable();
+	enable_kernel_vsx();
+}
+
+static void vsx_end(void)
+{
+	disable_kernel_vsx();
+	preempt_enable();
+}
+
+static int crypto_poly1305_p10_init(struct shash_desc *desc)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	poly1305_core_init(&dctx->h);
+	dctx->buflen = 0;
+	dctx->rset = 0;
+	dctx->sset = false;
+
+	return 0;
+}
+
+static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx,
+					       const u8 *inp, unsigned int len)
+{
+	unsigned int acc = 0;
+
+	if (unlikely(!dctx->sset)) {
+		if (!dctx->rset && len >= POLY1305_BLOCK_SIZE) {
+			struct poly1305_core_key *key = &dctx->core_r;
+
+			key->key.r64[0] = get_unaligned_le64(&inp[0]);
+			key->key.r64[1] = get_unaligned_le64(&inp[8]);
+			inp += POLY1305_BLOCK_SIZE;
+			len -= POLY1305_BLOCK_SIZE;
+			acc += POLY1305_BLOCK_SIZE;
+			dctx->rset = 1;
+		}
+		if (len >= POLY1305_BLOCK_SIZE) {
+			dctx->s[0] = get_unaligned_le32(&inp[0]);
+			dctx->s[1] = get_unaligned_le32(&inp[4]);
+			dctx->s[2] = get_unaligned_le32(&inp[8]);
+			dctx->s[3] = get_unaligned_le32(&inp[12]);
+			acc += POLY1305_BLOCK_SIZE;
+			dctx->sset = true;
+		}
+	}
+	return acc;
+}
+
+static int crypto_poly1305_p10_update(struct shash_desc *desc,
+				      const u8 *src, unsigned int srclen)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+	unsigned int bytes, used;
+
+	if (unlikely(dctx->buflen)) {
+		bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
+		memcpy(dctx->buf + dctx->buflen, src, bytes);
+		src += bytes;
+		srclen -= bytes;
+		dctx->buflen += bytes;
+
+		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+			if (likely(!crypto_poly1305_setdctxkey(dctx, dctx->buf,
+							       POLY1305_BLOCK_SIZE))) {
+				vsx_begin();
+				poly1305_64s(&dctx->h, dctx->buf,
+						  POLY1305_BLOCK_SIZE, 1);
+				vsx_end();
+			}
+			dctx->buflen = 0;
+		}
+	}
+
+	if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
+		bytes = round_down(srclen, POLY1305_BLOCK_SIZE);
+		used = crypto_poly1305_setdctxkey(dctx, src, bytes);
+		if (likely(used)) {
+			srclen -= used;
+			src += used;
+		}
+		if (crypto_simd_usable() && (srclen >= POLY1305_BLOCK_SIZE*4)) {
+			vsx_begin();
+			poly1305_p10le_4blocks(&dctx->h, src, srclen);
+			vsx_end();
+			src += srclen - (srclen % (POLY1305_BLOCK_SIZE * 4));
+			srclen %= POLY1305_BLOCK_SIZE * 4;
+		}
+		while (srclen >= POLY1305_BLOCK_SIZE) {
+			vsx_begin();
+			poly1305_64s(&dctx->h, src, POLY1305_BLOCK_SIZE, 1);
+			vsx_end();
+			srclen -= POLY1305_BLOCK_SIZE;
+			src += POLY1305_BLOCK_SIZE;
+		}
+	}
+
+	if (unlikely(srclen)) {
+		dctx->buflen = srclen;
+		memcpy(dctx->buf, src, srclen);
+	}
+
+	return 0;
+}
+
+static int crypto_poly1305_p10_final(struct shash_desc *desc, u8 *dst)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	if (unlikely(!dctx->sset))
+		return -ENOKEY;
+
+	if ((dctx->buflen)) {
+		dctx->buf[dctx->buflen++] = 1;
+		memset(dctx->buf + dctx->buflen, 0,
+		       POLY1305_BLOCK_SIZE - dctx->buflen);
+		vsx_begin();
+		poly1305_64s(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+		vsx_end();
+		dctx->buflen = 0;
+	}
+
+	poly1305_emit_64(&dctx->h, &dctx->s, dst);
+	return 0;
+}
+
+static struct shash_alg poly1305_alg = {
+	.digestsize	= POLY1305_DIGEST_SIZE,
+	.init		= crypto_poly1305_p10_init,
+	.update		= crypto_poly1305_p10_update,
+	.final		= crypto_poly1305_p10_final,
+	.descsize	= sizeof(struct poly1305_desc_ctx),
+	.base		= {
+		.cra_name		= "poly1305",
+		.cra_driver_name	= "poly1305-p10",
+		.cra_priority		= 300,
+		.cra_blocksize		= POLY1305_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	},
+};
+
+static int __init poly1305_p10_init(void)
+{
+	return crypto_register_shash(&poly1305_alg);
+}
+
+static void __exit poly1305_p10_exit(void)
+{
+	crypto_unregister_shash(&poly1305_alg);
+}
+
+module_cpu_feature_match(PPC_MODULE_FEATURE_P10, poly1305_p10_init);
+module_exit(poly1305_p10_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>");
+MODULE_DESCRIPTION("Optimized Poly1305 for P10");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-p10");
diff --git a/arch/powerpc/crypto/poly1305-p10le_64.S b/arch/powerpc/crypto/poly1305-p10le_64.S
new file mode 100644
index 0000000000..a3c1987f1e
--- /dev/null
+++ b/arch/powerpc/crypto/poly1305-p10le_64.S
@@ -0,0 +1,1075 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#
+# Accelerated poly1305 implementation for ppc64le.
+#
+# Copyright 2023- IBM Corp. All rights reserved
+#
+#===================================================================================
+# Written by Danny Tsen <dtsen@us.ibm.com>
+#
+# Poly1305 - this version mainly using vector/VSX/Scalar
+#  - 26 bits limbs
+#  - Handle multiple 64 byte blcok.
+#
+# Block size 16 bytes
+# key = (r, s)
+# clamp r &= 0x0FFFFFFC0FFFFFFC 0x0FFFFFFC0FFFFFFF
+# p = 2^130 - 5
+# a += m
+# a = (r + a) % p
+# a += s
+#
+# Improve performance by breaking down polynominal to the sum of products with
+#     h4 = m1 * r⁴ + m2 * r³ + m3 * r² + m4 * r
+#
+#  07/22/21 - this revison based on the above sum of products.  Setup r^4, r^3, r^2, r and s3, s2, s1, s0
+#             to 9 vectors for multiplications.
+#
+# setup r^4, r^3, r^2, r vectors
+#    vs    [r^1, r^3, r^2, r^4]
+#    vs0 = [r0,.....]
+#    vs1 = [r1,.....]
+#    vs2 = [r2,.....]
+#    vs3 = [r3,.....]
+#    vs4 = [r4,.....]
+#    vs5 = [r1*5,...]
+#    vs6 = [r2*5,...]
+#    vs7 = [r2*5,...]
+#    vs8 = [r4*5,...]
+#
+#  Each word in a vector consists a member of a "r/s" in [a * r/s].
+#
+# r0, r4*5, r3*5, r2*5, r1*5;
+# r1, r0,   r4*5, r3*5, r2*5;
+# r2, r1,   r0,   r4*5, r3*5;
+# r3, r2,   r1,   r0,   r4*5;
+# r4, r3,   r2,   r1,   r0  ;
+#
+#
+# poly1305_p10le_4blocks( uint8_t *k, uint32_t mlen, uint8_t *m)
+#  k = 32 bytes key
+#  r3 = k (r, s)
+#  r4 = mlen
+#  r5 = m
+#
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/asm-compat.h>
+#include <linux/linkage.h>
+
+.machine "any"
+
+.text
+
+.macro	SAVE_GPR GPR OFFSET FRAME
+	std	\GPR,\OFFSET(\FRAME)
+.endm
+
+.macro	SAVE_VRS VRS OFFSET FRAME
+	li	16, \OFFSET
+	stvx	\VRS, 16, \FRAME
+.endm
+
+.macro	SAVE_VSX VSX OFFSET FRAME
+	li	16, \OFFSET
+	stxvx	\VSX, 16, \FRAME
+.endm
+
+.macro	RESTORE_GPR GPR OFFSET FRAME
+	ld	\GPR,\OFFSET(\FRAME)
+.endm
+
+.macro	RESTORE_VRS VRS OFFSET FRAME
+	li	16, \OFFSET
+	lvx	\VRS, 16, \FRAME
+.endm
+
+.macro	RESTORE_VSX VSX OFFSET FRAME
+	li	16, \OFFSET
+	lxvx	\VSX, 16, \FRAME
+.endm
+
+.macro SAVE_REGS
+	mflr 0
+	std 0, 16(1)
+	stdu 1,-752(1)
+
+	SAVE_GPR 14, 112, 1
+	SAVE_GPR 15, 120, 1
+	SAVE_GPR 16, 128, 1
+	SAVE_GPR 17, 136, 1
+	SAVE_GPR 18, 144, 1
+	SAVE_GPR 19, 152, 1
+	SAVE_GPR 20, 160, 1
+	SAVE_GPR 21, 168, 1
+	SAVE_GPR 22, 176, 1
+	SAVE_GPR 23, 184, 1
+	SAVE_GPR 24, 192, 1
+	SAVE_GPR 25, 200, 1
+	SAVE_GPR 26, 208, 1
+	SAVE_GPR 27, 216, 1
+	SAVE_GPR 28, 224, 1
+	SAVE_GPR 29, 232, 1
+	SAVE_GPR 30, 240, 1
+	SAVE_GPR 31, 248, 1
+
+	addi	9, 1, 256
+	SAVE_VRS 20, 0, 9
+	SAVE_VRS 21, 16, 9
+	SAVE_VRS 22, 32, 9
+	SAVE_VRS 23, 48, 9
+	SAVE_VRS 24, 64, 9
+	SAVE_VRS 25, 80, 9
+	SAVE_VRS 26, 96, 9
+	SAVE_VRS 27, 112, 9
+	SAVE_VRS 28, 128, 9
+	SAVE_VRS 29, 144, 9
+	SAVE_VRS 30, 160, 9
+	SAVE_VRS 31, 176, 9
+
+	SAVE_VSX 14, 192, 9
+	SAVE_VSX 15, 208, 9
+	SAVE_VSX 16, 224, 9
+	SAVE_VSX 17, 240, 9
+	SAVE_VSX 18, 256, 9
+	SAVE_VSX 19, 272, 9
+	SAVE_VSX 20, 288, 9
+	SAVE_VSX 21, 304, 9
+	SAVE_VSX 22, 320, 9
+	SAVE_VSX 23, 336, 9
+	SAVE_VSX 24, 352, 9
+	SAVE_VSX 25, 368, 9
+	SAVE_VSX 26, 384, 9
+	SAVE_VSX 27, 400, 9
+	SAVE_VSX 28, 416, 9
+	SAVE_VSX 29, 432, 9
+	SAVE_VSX 30, 448, 9
+	SAVE_VSX 31, 464, 9
+.endm # SAVE_REGS
+
+.macro RESTORE_REGS
+	addi	9, 1, 256
+	RESTORE_VRS 20, 0, 9
+	RESTORE_VRS 21, 16, 9
+	RESTORE_VRS 22, 32, 9
+	RESTORE_VRS 23, 48, 9
+	RESTORE_VRS 24, 64, 9
+	RESTORE_VRS 25, 80, 9
+	RESTORE_VRS 26, 96, 9
+	RESTORE_VRS 27, 112, 9
+	RESTORE_VRS 28, 128, 9
+	RESTORE_VRS 29, 144, 9
+	RESTORE_VRS 30, 160, 9
+	RESTORE_VRS 31, 176, 9
+
+	RESTORE_VSX 14, 192, 9
+	RESTORE_VSX 15, 208, 9
+	RESTORE_VSX 16, 224, 9
+	RESTORE_VSX 17, 240, 9
+	RESTORE_VSX 18, 256, 9
+	RESTORE_VSX 19, 272, 9
+	RESTORE_VSX 20, 288, 9
+	RESTORE_VSX 21, 304, 9
+	RESTORE_VSX 22, 320, 9
+	RESTORE_VSX 23, 336, 9
+	RESTORE_VSX 24, 352, 9
+	RESTORE_VSX 25, 368, 9
+	RESTORE_VSX 26, 384, 9
+	RESTORE_VSX 27, 400, 9
+	RESTORE_VSX 28, 416, 9
+	RESTORE_VSX 29, 432, 9
+	RESTORE_VSX 30, 448, 9
+	RESTORE_VSX 31, 464, 9
+
+	RESTORE_GPR 14, 112, 1
+	RESTORE_GPR 15, 120, 1
+	RESTORE_GPR 16, 128, 1
+	RESTORE_GPR 17, 136, 1
+	RESTORE_GPR 18, 144, 1
+	RESTORE_GPR 19, 152, 1
+	RESTORE_GPR 20, 160, 1
+	RESTORE_GPR 21, 168, 1
+	RESTORE_GPR 22, 176, 1
+	RESTORE_GPR 23, 184, 1
+	RESTORE_GPR 24, 192, 1
+	RESTORE_GPR 25, 200, 1
+	RESTORE_GPR 26, 208, 1
+	RESTORE_GPR 27, 216, 1
+	RESTORE_GPR 28, 224, 1
+	RESTORE_GPR 29, 232, 1
+	RESTORE_GPR 30, 240, 1
+	RESTORE_GPR 31, 248, 1
+
+	addi    1, 1, 752
+	ld 0, 16(1)
+	mtlr 0
+.endm # RESTORE_REGS
+
+#
+# p[0] = a0*r0 + a1*r4*5 + a2*r3*5 + a3*r2*5 + a4*r1*5;
+# p[1] = a0*r1 + a1*r0   + a2*r4*5 + a3*r3*5 + a4*r2*5;
+# p[2] = a0*r2 + a1*r1   + a2*r0   + a3*r4*5 + a4*r3*5;
+# p[3] = a0*r3 + a1*r2   + a2*r1   + a3*r0   + a4*r4*5;
+# p[4] = a0*r4 + a1*r3   + a2*r2   + a3*r1   + a4*r0  ;
+#
+#    [r^2, r^3, r^1, r^4]
+#    [m3,  m2,  m4,  m1]
+#
+# multiply odd and even words
+.macro mul_odd
+	vmulouw	14, 4, 26
+	vmulouw	10, 5, 3
+	vmulouw	11, 6, 2
+	vmulouw	12, 7, 1
+	vmulouw	13, 8, 0
+	vmulouw	15, 4, 27
+	vaddudm	14, 14, 10
+	vaddudm	14, 14, 11
+	vmulouw	10, 5, 26
+	vmulouw	11, 6, 3
+	vaddudm	14, 14, 12
+	vaddudm	14, 14, 13	# x0
+	vaddudm	15, 15, 10
+	vaddudm	15, 15, 11
+	vmulouw	12, 7, 2
+	vmulouw	13, 8, 1
+	vaddudm	15, 15, 12
+	vaddudm	15, 15, 13	# x1
+	vmulouw	16, 4, 28
+	vmulouw	10, 5, 27
+	vmulouw	11, 6, 26
+	vaddudm	16, 16, 10
+	vaddudm	16, 16, 11
+	vmulouw	12, 7, 3
+	vmulouw	13, 8, 2
+	vaddudm	16, 16, 12
+	vaddudm	16, 16, 13	# x2
+	vmulouw	17, 4, 29
+	vmulouw	10, 5, 28
+	vmulouw	11, 6, 27
+	vaddudm	17, 17, 10
+	vaddudm	17, 17, 11
+	vmulouw	12, 7, 26
+	vmulouw	13, 8, 3
+	vaddudm	17, 17, 12
+	vaddudm	17, 17, 13	# x3
+	vmulouw	18, 4, 30
+	vmulouw	10, 5, 29
+	vmulouw	11, 6, 28
+	vaddudm	18, 18, 10
+	vaddudm	18, 18, 11
+	vmulouw	12, 7, 27
+	vmulouw	13, 8, 26
+	vaddudm	18, 18, 12
+	vaddudm	18, 18, 13	# x4
+.endm
+
+.macro mul_even
+	vmuleuw	9, 4, 26
+	vmuleuw	10, 5, 3
+	vmuleuw	11, 6, 2
+	vmuleuw	12, 7, 1
+	vmuleuw	13, 8, 0
+	vaddudm	14, 14, 9
+	vaddudm	14, 14, 10
+	vaddudm	14, 14, 11
+	vaddudm	14, 14, 12
+	vaddudm	14, 14, 13	# x0
+
+	vmuleuw	9, 4, 27
+	vmuleuw	10, 5, 26
+	vmuleuw	11, 6, 3
+	vmuleuw	12, 7, 2
+	vmuleuw	13, 8, 1
+	vaddudm	15, 15, 9
+	vaddudm	15, 15, 10
+	vaddudm	15, 15, 11
+	vaddudm	15, 15, 12
+	vaddudm	15, 15, 13	# x1
+
+	vmuleuw	9, 4, 28
+	vmuleuw	10, 5, 27
+	vmuleuw	11, 6, 26
+	vmuleuw	12, 7, 3
+	vmuleuw	13, 8, 2
+	vaddudm	16, 16, 9
+	vaddudm	16, 16, 10
+	vaddudm	16, 16, 11
+	vaddudm	16, 16, 12
+	vaddudm	16, 16, 13	# x2
+
+	vmuleuw	9, 4, 29
+	vmuleuw	10, 5, 28
+	vmuleuw	11, 6, 27
+	vmuleuw	12, 7, 26
+	vmuleuw	13, 8, 3
+	vaddudm	17, 17, 9
+	vaddudm	17, 17, 10
+	vaddudm	17, 17, 11
+	vaddudm	17, 17, 12
+	vaddudm	17, 17, 13	# x3
+
+	vmuleuw	9, 4, 30
+	vmuleuw	10, 5, 29
+	vmuleuw	11, 6, 28
+	vmuleuw	12, 7, 27
+	vmuleuw	13, 8, 26
+	vaddudm	18, 18, 9
+	vaddudm	18, 18, 10
+	vaddudm	18, 18, 11
+	vaddudm	18, 18, 12
+	vaddudm	18, 18, 13	# x4
+.endm
+
+#
+# poly1305_setup_r
+#
+# setup r^4, r^3, r^2, r vectors
+#    [r, r^3, r^2, r^4]
+#    vs0 = [r0,...]
+#    vs1 = [r1,...]
+#    vs2 = [r2,...]
+#    vs3 = [r3,...]
+#    vs4 = [r4,...]
+#    vs5 = [r4*5,...]
+#    vs6 = [r3*5,...]
+#    vs7 = [r2*5,...]
+#    vs8 = [r1*5,...]
+#
+# r0, r4*5, r3*5, r2*5, r1*5;
+# r1, r0,   r4*5, r3*5, r2*5;
+# r2, r1,   r0,   r4*5, r3*5;
+# r3, r2,   r1,   r0,   r4*5;
+# r4, r3,   r2,   r1,   r0  ;
+#
+.macro poly1305_setup_r
+
+	# save r
+	xxlor	26, 58, 58
+	xxlor	27, 59, 59
+	xxlor	28, 60, 60
+	xxlor	29, 61, 61
+	xxlor	30, 62, 62
+
+	xxlxor	31, 31, 31
+
+#    [r, r^3, r^2, r^4]
+	# compute r^2
+	vmr	4, 26
+	vmr	5, 27
+	vmr	6, 28
+	vmr	7, 29
+	vmr	8, 30
+	bl	do_mul		# r^2 r^1
+	xxpermdi 58, 58, 36, 0x3		# r0
+	xxpermdi 59, 59, 37, 0x3		# r1
+	xxpermdi 60, 60, 38, 0x3		# r2
+	xxpermdi 61, 61, 39, 0x3		# r3
+	xxpermdi 62, 62, 40, 0x3		# r4
+	xxpermdi 36, 36, 36, 0x3
+	xxpermdi 37, 37, 37, 0x3
+	xxpermdi 38, 38, 38, 0x3
+	xxpermdi 39, 39, 39, 0x3
+	xxpermdi 40, 40, 40, 0x3
+	vspltisb 13, 2
+	vsld	9, 27, 13
+	vsld	10, 28, 13
+	vsld	11, 29, 13
+	vsld	12, 30, 13
+	vaddudm	0, 9, 27
+	vaddudm	1, 10, 28
+	vaddudm	2, 11, 29
+	vaddudm	3, 12, 30
+
+	bl	do_mul		# r^4 r^3
+	vmrgow	26, 26, 4
+	vmrgow	27, 27, 5
+	vmrgow	28, 28, 6
+	vmrgow	29, 29, 7
+	vmrgow	30, 30, 8
+	vspltisb 13, 2
+	vsld	9, 27, 13
+	vsld	10, 28, 13
+	vsld	11, 29, 13
+	vsld	12, 30, 13
+	vaddudm	0, 9, 27
+	vaddudm	1, 10, 28
+	vaddudm	2, 11, 29
+	vaddudm	3, 12, 30
+
+	# r^2 r^4
+	xxlor	0, 58, 58
+	xxlor	1, 59, 59
+	xxlor	2, 60, 60
+	xxlor	3, 61, 61
+	xxlor	4, 62, 62
+	xxlor	5, 32, 32
+	xxlor	6, 33, 33
+	xxlor	7, 34, 34
+	xxlor	8, 35, 35
+
+	vspltw	9, 26, 3
+	vspltw	10, 26, 2
+	vmrgow	26, 10, 9
+	vspltw	9, 27, 3
+	vspltw	10, 27, 2
+	vmrgow	27, 10, 9
+	vspltw	9, 28, 3
+	vspltw	10, 28, 2
+	vmrgow	28, 10, 9
+	vspltw	9, 29, 3
+	vspltw	10, 29, 2
+	vmrgow	29, 10, 9
+	vspltw	9, 30, 3
+	vspltw	10, 30, 2
+	vmrgow	30, 10, 9
+
+	vsld	9, 27, 13
+	vsld	10, 28, 13
+	vsld	11, 29, 13
+	vsld	12, 30, 13
+	vaddudm	0, 9, 27
+	vaddudm	1, 10, 28
+	vaddudm	2, 11, 29
+	vaddudm	3, 12, 30
+.endm
+
+SYM_FUNC_START_LOCAL(do_mul)
+	mul_odd
+
+	# do reduction ( h %= p )
+	# carry reduction
+	vspltisb 9, 2
+	vsrd	10, 14, 31
+	vsrd	11, 17, 31
+	vand	7, 17, 25
+	vand	4, 14, 25
+	vaddudm	18, 18, 11
+	vsrd	12, 18, 31
+	vaddudm	15, 15, 10
+
+	vsrd	11, 15, 31
+	vand	8, 18, 25
+	vand	5, 15, 25
+	vaddudm	4, 4, 12
+	vsld	10, 12, 9
+	vaddudm	6, 16, 11
+
+	vsrd	13, 6, 31
+	vand	6, 6, 25
+	vaddudm	4, 4, 10
+	vsrd	10, 4, 31
+	vaddudm	7, 7, 13
+
+	vsrd	11, 7, 31
+	vand	7, 7, 25
+	vand	4, 4, 25
+	vaddudm	5, 5, 10
+	vaddudm	8, 8, 11
+	blr
+SYM_FUNC_END(do_mul)
+
+#
+# init key
+#
+.macro do_poly1305_init
+	addis	10, 2, rmask@toc@ha
+	addi	10, 10, rmask@toc@l
+
+	ld	11, 0(10)
+	ld	12, 8(10)
+
+	li	14, 16
+	li	15, 32
+	addis	10, 2, cnum@toc@ha
+	addi	10, 10, cnum@toc@l
+	lvx	25, 0, 10	# v25 - mask
+	lvx	31, 14, 10	# v31 = 1a
+	lvx	19, 15, 10	# v19 = 1 << 24
+	lxv	24, 48(10)	# vs24
+	lxv	25, 64(10)	# vs25
+
+	# initialize
+	# load key from r3 to vectors
+	ld	9, 24(3)
+	ld	10, 32(3)
+	and.	9, 9, 11
+	and.	10, 10, 12
+
+	# break 26 bits
+	extrdi	14, 9, 26, 38
+	extrdi	15, 9, 26, 12
+	extrdi	16, 9, 12, 0
+	mtvsrdd	58, 0, 14
+	insrdi	16, 10, 14, 38
+	mtvsrdd	59, 0, 15
+	extrdi	17, 10, 26, 24
+	mtvsrdd	60, 0, 16
+	extrdi	18, 10, 24, 0
+	mtvsrdd	61, 0, 17
+	mtvsrdd	62, 0, 18
+
+	# r1 = r1 * 5, r2 = r2 * 5, r3 = r3 * 5, r4 = r4 * 5
+	li	9, 5
+	mtvsrdd	36, 0, 9
+	vmulouw	0, 27, 4		# v0 = rr0
+	vmulouw	1, 28, 4		# v1 = rr1
+	vmulouw	2, 29, 4		# v2 = rr2
+	vmulouw	3, 30, 4		# v3 = rr3
+.endm
+
+#
+# poly1305_p10le_4blocks( uint8_t *k, uint32_t mlen, uint8_t *m)
+#  k = 32 bytes key
+#  r3 = k (r, s)
+#  r4 = mlen
+#  r5 = m
+#
+SYM_FUNC_START(poly1305_p10le_4blocks)
+.align 5
+	cmpdi	5, 64
+	blt	Out_no_poly1305
+
+	SAVE_REGS
+
+	do_poly1305_init
+
+	li	21, 0	# counter to message
+
+	poly1305_setup_r
+
+	# load previous H state
+	# break/convert r6 to 26 bits
+	ld	9, 0(3)
+	ld	10, 8(3)
+	ld	19, 16(3)
+	sldi	19, 19, 24
+	mtvsrdd	41, 0, 19
+	extrdi	14, 9, 26, 38
+	extrdi	15, 9, 26, 12
+	extrdi	16, 9, 12, 0
+	mtvsrdd	36, 0, 14
+	insrdi	16, 10, 14, 38
+	mtvsrdd	37, 0, 15
+	extrdi	17, 10, 26, 24
+	mtvsrdd	38, 0, 16
+	extrdi	18, 10, 24, 0
+	mtvsrdd	39, 0, 17
+	mtvsrdd	40, 0, 18
+	vor	8, 8, 9
+
+	# input m1 m2
+	add	20, 4, 21
+	xxlor	49, 24, 24
+	xxlor	50, 25, 25
+	lxvw4x	43, 0, 20
+	addi	17, 20, 16
+	lxvw4x	44, 0, 17
+	vperm	14, 11, 12, 17
+	vperm	15, 11, 12, 18
+	vand	9, 14, 25	# a0
+	vsrd	10, 14, 31	# >> 26
+	vsrd	11, 10, 31	# 12 bits left
+	vand	10, 10, 25	# a1
+	vspltisb 13, 12
+	vand	16, 15, 25
+	vsld	12, 16, 13
+	vor	11, 11, 12
+	vand	11, 11, 25	# a2
+	vspltisb 13, 14
+	vsrd	12, 15, 13	# >> 14
+	vsrd	13, 12, 31	# >> 26, a4
+	vand	12, 12, 25	# a3
+
+	vaddudm	20, 4, 9
+	vaddudm	21, 5, 10
+	vaddudm	22, 6, 11
+	vaddudm	23, 7, 12
+	vaddudm	24, 8, 13
+
+	# m3 m4
+	addi	17, 17, 16
+	lxvw4x	43, 0, 17
+	addi	17, 17, 16
+	lxvw4x	44, 0, 17
+	vperm	14, 11, 12, 17
+	vperm	15, 11, 12, 18
+	vand	9, 14, 25	# a0
+	vsrd	10, 14, 31	# >> 26
+	vsrd	11, 10, 31	# 12 bits left
+	vand	10, 10, 25	# a1
+	vspltisb 13, 12
+	vand	16, 15, 25
+	vsld	12, 16, 13
+	vspltisb 13, 14
+	vor	11, 11, 12
+	vand	11, 11, 25	# a2
+	vsrd	12, 15, 13	# >> 14
+	vsrd	13, 12, 31	# >> 26, a4
+	vand	12, 12, 25	# a3
+
+	# Smash 4 message blocks into 5 vectors of [m4,  m2,  m3,  m1]
+	vmrgow	4, 9, 20
+	vmrgow	5, 10, 21
+	vmrgow	6, 11, 22
+	vmrgow	7, 12, 23
+	vmrgow	8, 13, 24
+	vaddudm	8, 8, 19
+
+	addi	5, 5, -64	# len -= 64
+	addi	21, 21, 64	# offset += 64
+
+	li      9, 64
+	divdu   31, 5, 9
+
+	cmpdi	31, 0
+	ble	Skip_block_loop
+
+	mtctr	31
+
+# h4 =   m1 * r⁴ + m2 * r³ + m3 * r² + m4 * r
+# Rewrite the polynominal sum of product as follows,
+# h1 = (h0 + m1) * r^2,	h2 = (h0 + m2) * r^2
+# h3 = (h1 + m3) * r^2,	h4 = (h2 + m4) * r^2  --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h0 + m4) r^2
+#  .... Repeat
+# h5 = (h3 + m5) * r^2,	h6 = (h4 + m6) * r^2  -->
+# h7 = (h5 + m7) * r^2,	h8 = (h6 + m8) * r^1  --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
+#
+loop_4blocks:
+
+	# Multiply odd words and even words
+	mul_odd
+	mul_even
+	# carry reduction
+	vspltisb 9, 2
+	vsrd	10, 14, 31
+	vsrd	11, 17, 31
+	vand	7, 17, 25
+	vand	4, 14, 25
+	vaddudm	18, 18, 11
+	vsrd	12, 18, 31
+	vaddudm	15, 15, 10
+
+	vsrd	11, 15, 31
+	vand	8, 18, 25
+	vand	5, 15, 25
+	vaddudm	4, 4, 12
+	vsld	10, 12, 9
+	vaddudm	6, 16, 11
+
+	vsrd	13, 6, 31
+	vand	6, 6, 25
+	vaddudm	4, 4, 10
+	vsrd	10, 4, 31
+	vaddudm	7, 7, 13
+
+	vsrd	11, 7, 31
+	vand	7, 7, 25
+	vand	4, 4, 25
+	vaddudm	5, 5, 10
+	vaddudm	8, 8, 11
+
+	# input m1  m2  m3  m4
+	add	20, 4, 21
+	xxlor	49, 24, 24
+	xxlor	50, 25, 25
+	lxvw4x	43, 0, 20
+	addi	17, 20, 16
+	lxvw4x	44, 0, 17
+	vperm	14, 11, 12, 17
+	vperm	15, 11, 12, 18
+	addi	17, 17, 16
+	lxvw4x	43, 0, 17
+	addi	17, 17, 16
+	lxvw4x	44, 0, 17
+	vperm	17, 11, 12, 17
+	vperm	18, 11, 12, 18
+
+	vand	20, 14, 25	# a0
+	vand	9, 17, 25	# a0
+	vsrd	21, 14, 31	# >> 26
+	vsrd	22, 21, 31	# 12 bits left
+	vsrd	10, 17, 31	# >> 26
+	vsrd	11, 10, 31	# 12 bits left
+
+	vand	21, 21, 25	# a1
+	vand	10, 10, 25	# a1
+
+	vspltisb 13, 12
+	vand	16, 15, 25
+	vsld	23, 16, 13
+	vor	22, 22, 23
+	vand	22, 22, 25	# a2
+	vand	16, 18, 25
+	vsld	12, 16, 13
+	vor	11, 11, 12
+	vand	11, 11, 25	# a2
+	vspltisb 13, 14
+	vsrd	23, 15, 13	# >> 14
+	vsrd	24, 23, 31	# >> 26, a4
+	vand	23, 23, 25	# a3
+	vsrd	12, 18, 13	# >> 14
+	vsrd	13, 12, 31	# >> 26, a4
+	vand	12, 12, 25	# a3
+
+	vaddudm	4, 4, 20
+	vaddudm	5, 5, 21
+	vaddudm	6, 6, 22
+	vaddudm	7, 7, 23
+	vaddudm	8, 8, 24
+
+	# Smash 4 message blocks into 5 vectors of [m4,  m2,  m3,  m1]
+	vmrgow	4, 9, 4
+	vmrgow	5, 10, 5
+	vmrgow	6, 11, 6
+	vmrgow	7, 12, 7
+	vmrgow	8, 13, 8
+	vaddudm	8, 8, 19
+
+	addi	5, 5, -64	# len -= 64
+	addi	21, 21, 64	# offset += 64
+
+	bdnz	loop_4blocks
+
+Skip_block_loop:
+	xxlor	58, 0, 0
+	xxlor	59, 1, 1
+	xxlor	60, 2, 2
+	xxlor	61, 3, 3
+	xxlor	62, 4, 4
+	xxlor	32, 5, 5
+	xxlor	33, 6, 6
+	xxlor	34, 7, 7
+	xxlor	35, 8, 8
+
+	# Multiply odd words and even words
+	mul_odd
+	mul_even
+
+	# Sum the products.
+	xxpermdi 41, 31, 46, 0
+	xxpermdi 42, 31, 47, 0
+	vaddudm	4, 14, 9
+	xxpermdi 36, 31, 36, 3
+	vaddudm	5, 15, 10
+	xxpermdi 37, 31, 37, 3
+	xxpermdi 43, 31, 48, 0
+	vaddudm	6, 16, 11
+	xxpermdi 38, 31, 38, 3
+	xxpermdi 44, 31, 49, 0
+	vaddudm	7, 17, 12
+	xxpermdi 39, 31, 39, 3
+	xxpermdi 45, 31, 50, 0
+	vaddudm	8, 18, 13
+	xxpermdi 40, 31, 40, 3
+
+	# carry reduction
+	vspltisb 9, 2
+	vsrd	10, 4, 31
+	vsrd	11, 7, 31
+	vand	7, 7, 25
+	vand	4, 4, 25
+	vaddudm	8, 8, 11
+	vsrd	12, 8, 31
+	vaddudm	5, 5, 10
+
+	vsrd	11, 5, 31
+	vand	8, 8, 25
+	vand	5, 5, 25
+	vaddudm	4, 4, 12
+	vsld	10, 12, 9
+	vaddudm	6, 6, 11
+
+	vsrd	13, 6, 31
+	vand	6, 6, 25
+	vaddudm	4, 4, 10
+	vsrd	10, 4, 31
+	vaddudm	7, 7, 13
+
+	vsrd	11, 7, 31
+	vand	7, 7, 25
+	vand	4, 4, 25
+	vaddudm	5, 5, 10
+	vsrd	10, 5, 31
+	vand	5, 5, 25
+	vaddudm	6, 6, 10
+	vaddudm	8, 8, 11
+
+	b	do_final_update
+
+do_final_update:
+	# combine 26 bit limbs
+	# v4, v5, v6, v7 and v8 are 26 bit vectors
+	vsld	5, 5, 31
+	vor	20, 4, 5
+	vspltisb 11, 12
+	vsrd	12, 6, 11
+	vsld	6, 6, 31
+	vsld	6, 6, 31
+	vor	20, 20, 6
+	vspltisb 11, 14
+	vsld	7, 7, 11
+	vor	21, 7, 12
+	mfvsrld	16, 40		# save last 2 bytes
+	vsld	8, 8, 11
+	vsld	8, 8, 31
+	vor	21, 21, 8
+	mfvsrld	17, 52
+	mfvsrld	19, 53
+	srdi	16, 16, 24
+
+	std	17, 0(3)
+	std	19, 8(3)
+	stw	16, 16(3)
+
+Out_loop:
+	li	3, 0
+
+	RESTORE_REGS
+
+	blr
+
+Out_no_poly1305:
+	li	3, 0
+	blr
+SYM_FUNC_END(poly1305_p10le_4blocks)
+
+#
+# =======================================================================
+# The following functions implement 64 x 64 bits multiplication poly1305.
+#
+SYM_FUNC_START_LOCAL(Poly1305_init_64)
+	#  mask 0x0FFFFFFC0FFFFFFC
+	#  mask 0x0FFFFFFC0FFFFFFF
+	addis	10, 2, rmask@toc@ha
+	addi	10, 10, rmask@toc@l
+	ld	11, 0(10)
+	ld	12, 8(10)
+
+	# initialize
+	# load key from r3
+	ld	9, 24(3)
+	ld	10, 32(3)
+	and.	9, 9, 11	# cramp mask r0
+	and.	10, 10, 12	# cramp mask r1
+
+        srdi    21, 10, 2
+        add     19, 21, 10      # s1: r19 - (r1 >> 2) *5
+
+        # setup r and s
+        li      25, 0
+	mtvsrdd 32+0, 9, 19	# r0, s1
+	mtvsrdd 32+1, 10, 9	# r1, r0
+	mtvsrdd 32+2, 19, 25	# s1
+	mtvsrdd 32+3, 9, 25	# r0
+
+	blr
+SYM_FUNC_END(Poly1305_init_64)
+
+# Poly1305_mult
+# v6 = (h0, h1), v8 = h2
+# v0 = (r0, s1), v1 = (r1, r0), v2 = s1, v3 = r0
+#
+# Output: v7, v10, v11
+#
+SYM_FUNC_START_LOCAL(Poly1305_mult)
+	#
+	#	d0 = h0 * r0 + h1 * s1
+	vmsumudm	7, 6, 0, 9		# h0 * r0, h1 * s1
+
+	#	d1 = h0 * r1 + h1 * r0 + h2 * s1
+	vmsumudm	11, 6, 1, 9		# h0 * r1, h1 * r0
+	vmsumudm	10, 8, 2, 11		# d1 += h2 * s1
+
+	#       d2 = r0
+	vmsumudm	11, 8, 3, 9		# d2 = h2 * r0
+	blr
+SYM_FUNC_END(Poly1305_mult)
+
+#
+# carry reduction
+# h %=p
+#
+# Input: v7, v10, v11
+# Output: r27, r28, r29
+#
+SYM_FUNC_START_LOCAL(Carry_reduction)
+	mfvsrld	27, 32+7
+	mfvsrld	28, 32+10
+	mfvsrld	29, 32+11
+	mfvsrd	20, 32+7	# h0.h
+	mfvsrd	21, 32+10	# h1.h
+
+	addc	28, 28, 20
+	adde	29, 29, 21
+	srdi	22, 29, 0x2
+	sldi	23, 22, 0x2
+	add	23, 23, 22	# (h2 & 3) * 5
+	addc	27, 27, 23	# h0
+	addze	28, 28		# h1
+	andi.	29, 29, 0x3	# h2
+	blr
+SYM_FUNC_END(Carry_reduction)
+
+#
+# poly1305 multiplication
+# h *= r, h %= p
+#	d0 = h0 * r0 + h1 * s1
+#	d1 = h0 * r1 + h1 * r0 + h2 * s1
+#       d2 = h0 * r0
+#
+#
+# unsigned int poly1305_test_64s(unisgned char *state, const byte *src, size_t len, highbit)
+#   - no highbit if final leftover block (highbit = 0)
+#
+SYM_FUNC_START(poly1305_64s)
+	cmpdi	5, 0
+	ble	Out_no_poly1305_64
+
+	mflr 0
+	std 0, 16(1)
+	stdu 1,-400(1)
+
+	SAVE_GPR 14, 112, 1
+	SAVE_GPR 15, 120, 1
+	SAVE_GPR 16, 128, 1
+	SAVE_GPR 17, 136, 1
+	SAVE_GPR 18, 144, 1
+	SAVE_GPR 19, 152, 1
+	SAVE_GPR 20, 160, 1
+	SAVE_GPR 21, 168, 1
+	SAVE_GPR 22, 176, 1
+	SAVE_GPR 23, 184, 1
+	SAVE_GPR 24, 192, 1
+	SAVE_GPR 25, 200, 1
+	SAVE_GPR 26, 208, 1
+	SAVE_GPR 27, 216, 1
+	SAVE_GPR 28, 224, 1
+	SAVE_GPR 29, 232, 1
+	SAVE_GPR 30, 240, 1
+	SAVE_GPR 31, 248, 1
+
+	# Init poly1305
+	bl Poly1305_init_64
+
+	li 25, 0			# offset to inp and outp
+
+	add 11, 25, 4
+
+	# load h
+	# h0, h1, h2?
+        ld	27, 0(3)
+        ld	28, 8(3)
+        lwz	29, 16(3)
+
+        li      30, 16
+        divdu   31, 5, 30
+
+        mtctr   31
+
+        mr      24, 6		# highbit
+
+Loop_block_64:
+	vxor	9, 9, 9
+
+	ld	20, 0(11)
+	ld	21, 8(11)
+	addi	11, 11, 16
+
+	addc	27, 27, 20
+	adde	28, 28, 21
+	adde	29, 29, 24
+
+	li	22, 0
+	mtvsrdd	32+6, 27, 28	# h0, h1
+	mtvsrdd	32+8, 29, 22	# h2
+
+	bl	Poly1305_mult
+
+	bl	Carry_reduction
+
+	bdnz	Loop_block_64
+
+	std	27, 0(3)
+	std	28, 8(3)
+	stw	29, 16(3)
+
+	li	3, 0
+
+	RESTORE_GPR 14, 112, 1
+	RESTORE_GPR 15, 120, 1
+	RESTORE_GPR 16, 128, 1
+	RESTORE_GPR 17, 136, 1
+	RESTORE_GPR 18, 144, 1
+	RESTORE_GPR 19, 152, 1
+	RESTORE_GPR 20, 160, 1
+	RESTORE_GPR 21, 168, 1
+	RESTORE_GPR 22, 176, 1
+	RESTORE_GPR 23, 184, 1
+	RESTORE_GPR 24, 192, 1
+	RESTORE_GPR 25, 200, 1
+	RESTORE_GPR 26, 208, 1
+	RESTORE_GPR 27, 216, 1
+	RESTORE_GPR 28, 224, 1
+	RESTORE_GPR 29, 232, 1
+	RESTORE_GPR 30, 240, 1
+	RESTORE_GPR 31, 248, 1
+
+	addi    1, 1, 400
+	ld 0, 16(1)
+	mtlr 0
+
+	blr
+
+Out_no_poly1305_64:
+	li	3, 0
+	blr
+SYM_FUNC_END(poly1305_64s)
+
+#
+# Input: r3 = h, r4 = s, r5 = mac
+# mac = h + s
+#
+SYM_FUNC_START(poly1305_emit_64)
+	ld	10, 0(3)
+	ld	11, 8(3)
+	ld	12, 16(3)
+
+	# compare modulus
+	# h + 5 + (-p)
+	mr	6, 10
+	mr	7, 11
+	mr	8, 12
+	addic.	6, 6, 5
+	addze	7, 7
+	addze	8, 8
+	srdi	9, 8, 2		# overflow?
+	cmpdi	9, 0
+	beq	Skip_h64
+	mr	10, 6
+	mr	11, 7
+	mr	12, 8
+
+Skip_h64:
+	ld	6, 0(4)
+	ld	7, 8(4)
+	addc	10, 10, 6
+	adde	11, 11, 7
+	addze	12, 12
+
+	std	10, 0(5)
+	std	11, 8(5)
+	blr
+SYM_FUNC_END(poly1305_emit_64)
+
+SYM_DATA_START_LOCAL(RMASK)
+.align 5
+rmask:
+.byte	0xff, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f
+cnum:
+.long	0x03ffffff, 0x00000000, 0x03ffffff, 0x00000000
+.long	0x1a, 0x00, 0x1a, 0x00
+.long	0x01000000, 0x01000000, 0x01000000, 0x01000000
+.long	0x00010203, 0x04050607, 0x10111213, 0x14151617
+.long	0x08090a0b, 0x0c0d0e0f, 0x18191a1b, 0x1c1d1e1f
+SYM_DATA_END(RMASK)
diff --git a/arch/powerpc/crypto/ppc-xlate.pl b/arch/powerpc/crypto/ppc-xlate.pl
new file mode 100644
index 0000000000..23cca703ce
--- /dev/null
+++ b/arch/powerpc/crypto/ppc-xlate.pl
@@ -0,0 +1,229 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+# PowerPC assembler distiller by <appro>.
+
+my $flavour = shift;
+my $output = shift;
+open STDOUT,">$output" || die "can't open $output: $!";
+
+my %GLOBALS;
+my $dotinlocallabels=($flavour=~/linux/)?1:0;
+
+################################################################
+# directives which need special treatment on different platforms
+################################################################
+my $globl = sub {
+    my $junk = shift;
+    my $name = shift;
+    my $global = \$GLOBALS{$name};
+    my $ret;
+
+    $name =~ s|^[\.\_]||;
+
+    SWITCH: for ($flavour) {
+	/aix/		&& do { $name = ".$name";
+				last;
+			      };
+	/osx/		&& do { $name = "_$name";
+				last;
+			      };
+	/linux/
+			&& do {	$ret = "_GLOBAL($name)";
+				last;
+			      };
+    }
+
+    $ret = ".globl	$name\nalign 5\n$name:" if (!$ret);
+    $$global = $name;
+    $ret;
+};
+my $text = sub {
+    my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
+    $ret = ".abiversion	2\n".$ret	if ($flavour =~ /linux.*64le/);
+    $ret;
+};
+my $machine = sub {
+    my $junk = shift;
+    my $arch = shift;
+    if ($flavour =~ /osx/)
+    {	$arch =~ s/\"//g;
+	$arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
+    }
+    ".machine	$arch";
+};
+my $size = sub {
+    if ($flavour =~ /linux/)
+    {	shift;
+	my $name = shift; $name =~ s|^[\.\_]||;
+	my $ret  = ".size	$name,.-".($flavour=~/64$/?".":"").$name;
+	$ret .= "\n.size	.$name,.-.$name" if ($flavour=~/64$/);
+	$ret;
+    }
+    else
+    {	"";	}
+};
+my $asciz = sub {
+    shift;
+    my $line = join(",",@_);
+    if ($line =~ /^"(.*)"$/)
+    {	".byte	" . join(",",unpack("C*",$1),0) . "\n.align	2";	}
+    else
+    {	"";	}
+};
+my $quad = sub {
+    shift;
+    my @ret;
+    my ($hi,$lo);
+    for (@_) {
+	if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
+	{  $hi=$1?"0x$1":"0"; $lo="0x$2";  }
+	elsif (/^([0-9]+)$/o)
+	{  $hi=$1>>32; $lo=$1&0xffffffff;  } # error-prone with 32-bit perl
+	else
+	{  $hi=undef; $lo=$_; }
+
+	if (defined($hi))
+	{  push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo");  }
+	else
+	{  push(@ret,".quad	$lo");  }
+    }
+    join("\n",@ret);
+};
+
+################################################################
+# simplified mnemonics not handled by at least one assembler
+################################################################
+my $cmplw = sub {
+    my $f = shift;
+    my $cr = 0; $cr = shift if ($#_>1);
+    # Some out-of-date 32-bit GNU assembler just can't handle cmplw...
+    ($flavour =~ /linux.*32/) ?
+	"	.long	".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
+	"	cmplw	".join(',',$cr,@_);
+};
+my $bdnz = sub {
+    my $f = shift;
+    my $bo = $f=~/[\+\-]/ ? 16+9 : 16;	# optional "to be taken" hint
+    "	bc	$bo,0,".shift;
+} if ($flavour!~/linux/);
+my $bltlr = sub {
+    my $f = shift;
+    my $bo = $f=~/\-/ ? 12+2 : 12;	# optional "not to be taken" hint
+    ($flavour =~ /linux/) ?		# GNU as doesn't allow most recent hints
+	"	.long	".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
+	"	bclr	$bo,0";
+};
+my $bnelr = sub {
+    my $f = shift;
+    my $bo = $f=~/\-/ ? 4+2 : 4;	# optional "not to be taken" hint
+    ($flavour =~ /linux/) ?		# GNU as doesn't allow most recent hints
+	"	.long	".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
+	"	bclr	$bo,2";
+};
+my $beqlr = sub {
+    my $f = shift;
+    my $bo = $f=~/-/ ? 12+2 : 12;	# optional "not to be taken" hint
+    ($flavour =~ /linux/) ?		# GNU as doesn't allow most recent hints
+	"	.long	".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
+	"	bclr	$bo,2";
+};
+# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
+# arguments is 64, with "operand out of range" error.
+my $extrdi = sub {
+    my ($f,$ra,$rs,$n,$b) = @_;
+    $b = ($b+$n)&63; $n = 64-$n;
+    "	rldicl	$ra,$rs,$b,$n";
+};
+my $vmr = sub {
+    my ($f,$vx,$vy) = @_;
+    "	vor	$vx,$vy,$vy";
+};
+
+# Some ABIs specify vrsave, special-purpose register #256, as reserved
+# for system use.
+my $no_vrsave = ($flavour =~ /linux-ppc64le/);
+my $mtspr = sub {
+    my ($f,$idx,$ra) = @_;
+    if ($idx == 256 && $no_vrsave) {
+	"	or	$ra,$ra,$ra";
+    } else {
+	"	mtspr	$idx,$ra";
+    }
+};
+my $mfspr = sub {
+    my ($f,$rd,$idx) = @_;
+    if ($idx == 256 && $no_vrsave) {
+	"	li	$rd,-1";
+    } else {
+	"	mfspr	$rd,$idx";
+    }
+};
+
+# PowerISA 2.06 stuff
+sub vsxmem_op {
+    my ($f, $vrt, $ra, $rb, $op) = @_;
+    "	.long	".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
+}
+# made-up unaligned memory reference AltiVec/VMX instructions
+my $lvx_u	= sub {	vsxmem_op(@_, 844); };	# lxvd2x
+my $stvx_u	= sub {	vsxmem_op(@_, 972); };	# stxvd2x
+my $lvdx_u	= sub {	vsxmem_op(@_, 588); };	# lxsdx
+my $stvdx_u	= sub {	vsxmem_op(@_, 716); };	# stxsdx
+my $lvx_4w	= sub { vsxmem_op(@_, 780); };	# lxvw4x
+my $stvx_4w	= sub { vsxmem_op(@_, 908); };	# stxvw4x
+
+# PowerISA 2.07 stuff
+sub vcrypto_op {
+    my ($f, $vrt, $vra, $vrb, $op) = @_;
+    "	.long	".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
+}
+my $vcipher	= sub { vcrypto_op(@_, 1288); };
+my $vcipherlast	= sub { vcrypto_op(@_, 1289); };
+my $vncipher	= sub { vcrypto_op(@_, 1352); };
+my $vncipherlast= sub { vcrypto_op(@_, 1353); };
+my $vsbox	= sub { vcrypto_op(@_, 0, 1480); };
+my $vshasigmad	= sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
+my $vshasigmaw	= sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
+my $vpmsumb	= sub { vcrypto_op(@_, 1032); };
+my $vpmsumd	= sub { vcrypto_op(@_, 1224); };
+my $vpmsubh	= sub { vcrypto_op(@_, 1096); };
+my $vpmsumw	= sub { vcrypto_op(@_, 1160); };
+my $vaddudm	= sub { vcrypto_op(@_, 192);  };
+my $vadduqm	= sub { vcrypto_op(@_, 256);  };
+
+my $mtsle	= sub {
+    my ($f, $arg) = @_;
+    "	.long	".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
+};
+
+print "#include <asm/ppc_asm.h>\n" if $flavour =~ /linux/;
+
+while($line=<>) {
+
+    $line =~ s|[#!;].*$||;	# get rid of asm-style comments...
+    $line =~ s|/\*.*\*/||;	# ... and C-style comments...
+    $line =~ s|^\s+||;		# ... and skip white spaces in beginning...
+    $line =~ s|\s+$||;		# ... and at the end
+
+    {
+	$line =~ s|\b\.L(\w+)|L$1|g;	# common denominator for Locallabel
+	$line =~ s|\bL(\w+)|\.L$1|g	if ($dotinlocallabels);
+    }
+
+    {
+	$line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
+	my $c = $1; $c = "\t" if ($c eq "");
+	my $mnemonic = $2;
+	my $f = $3;
+	my $opcode = eval("\$$mnemonic");
+	$line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
+	if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
+	elsif ($mnemonic)           { $line = $c.$mnemonic.$f."\t".$line; }
+    }
+
+    print $line if ($line);
+    print "\n";
+}
+
+close STDOUT;
diff --git a/arch/powerpc/crypto/sha1-powerpc-asm.S b/arch/powerpc/crypto/sha1-powerpc-asm.S
new file mode 100644
index 0000000000..f0d5ed557a
--- /dev/null
+++ b/arch/powerpc/crypto/sha1-powerpc-asm.S
@@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * SHA-1 implementation for PowerPC.
+ *
+ * Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/asm-compat.h>
+
+#ifdef __BIG_ENDIAN__
+#define LWZ(rt, d, ra)	\
+	lwz	rt,d(ra)
+#else
+#define LWZ(rt, d, ra)	\
+	li	rt,d;	\
+	lwbrx	rt,rt,ra
+#endif
+
+/*
+ * We roll the registers for T, A, B, C, D, E around on each
+ * iteration; T on iteration t is A on iteration t+1, and so on.
+ * We use registers 7 - 12 for this.
+ */
+#define RT(t)	((((t)+5)%6)+7)
+#define RA(t)	((((t)+4)%6)+7)
+#define RB(t)	((((t)+3)%6)+7)
+#define RC(t)	((((t)+2)%6)+7)
+#define RD(t)	((((t)+1)%6)+7)
+#define RE(t)	((((t)+0)%6)+7)
+
+/* We use registers 16 - 31 for the W values */
+#define W(t)	(((t)%16)+16)
+
+#define LOADW(t)				\
+	LWZ(W(t),(t)*4,r4)
+
+#define STEPD0_LOAD(t)				\
+	andc	r0,RD(t),RB(t);		\
+	and	r6,RB(t),RC(t);		\
+	rotlwi	RT(t),RA(t),5;			\
+	or	r6,r6,r0;			\
+	add	r0,RE(t),r15;			\
+	add	RT(t),RT(t),r6;		\
+	add	r14,r0,W(t);			\
+	LWZ(W((t)+4),((t)+4)*4,r4);	\
+	rotlwi	RB(t),RB(t),30;			\
+	add	RT(t),RT(t),r14
+
+#define STEPD0_UPDATE(t)			\
+	and	r6,RB(t),RC(t);		\
+	andc	r0,RD(t),RB(t);		\
+	rotlwi	RT(t),RA(t),5;			\
+	rotlwi	RB(t),RB(t),30;			\
+	or	r6,r6,r0;			\
+	add	r0,RE(t),r15;			\
+	xor	r5,W((t)+4-3),W((t)+4-8);		\
+	add	RT(t),RT(t),r6;		\
+	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
+	add	r0,r0,W(t);			\
+	xor	W((t)+4),W((t)+4),r5;			\
+	add	RT(t),RT(t),r0;		\
+	rotlwi	W((t)+4),W((t)+4),1
+
+#define STEPD1(t)				\
+	xor	r6,RB(t),RC(t);		\
+	rotlwi	RT(t),RA(t),5;			\
+	rotlwi	RB(t),RB(t),30;			\
+	xor	r6,r6,RD(t);			\
+	add	r0,RE(t),r15;			\
+	add	RT(t),RT(t),r6;		\
+	add	r0,r0,W(t);			\
+	add	RT(t),RT(t),r0
+
+#define STEPD1_UPDATE(t)				\
+	xor	r6,RB(t),RC(t);		\
+	rotlwi	RT(t),RA(t),5;			\
+	rotlwi	RB(t),RB(t),30;			\
+	xor	r6,r6,RD(t);			\
+	add	r0,RE(t),r15;			\
+	xor	r5,W((t)+4-3),W((t)+4-8);		\
+	add	RT(t),RT(t),r6;		\
+	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
+	add	r0,r0,W(t);			\
+	xor	W((t)+4),W((t)+4),r5;			\
+	add	RT(t),RT(t),r0;		\
+	rotlwi	W((t)+4),W((t)+4),1
+
+#define STEPD2_UPDATE(t)			\
+	and	r6,RB(t),RC(t);		\
+	and	r0,RB(t),RD(t);		\
+	rotlwi	RT(t),RA(t),5;			\
+	or	r6,r6,r0;			\
+	rotlwi	RB(t),RB(t),30;			\
+	and	r0,RC(t),RD(t);		\
+	xor	r5,W((t)+4-3),W((t)+4-8);	\
+	or	r6,r6,r0;			\
+	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
+	add	r0,RE(t),r15;			\
+	add	RT(t),RT(t),r6;		\
+	add	r0,r0,W(t);			\
+	xor	W((t)+4),W((t)+4),r5;		\
+	add	RT(t),RT(t),r0;		\
+	rotlwi	W((t)+4),W((t)+4),1
+
+#define STEP0LD4(t)				\
+	STEPD0_LOAD(t);				\
+	STEPD0_LOAD((t)+1);			\
+	STEPD0_LOAD((t)+2);			\
+	STEPD0_LOAD((t)+3)
+
+#define STEPUP4(t, fn)				\
+	STEP##fn##_UPDATE(t);			\
+	STEP##fn##_UPDATE((t)+1);		\
+	STEP##fn##_UPDATE((t)+2);		\
+	STEP##fn##_UPDATE((t)+3)
+
+#define STEPUP20(t, fn)				\
+	STEPUP4(t, fn);				\
+	STEPUP4((t)+4, fn);			\
+	STEPUP4((t)+8, fn);			\
+	STEPUP4((t)+12, fn);			\
+	STEPUP4((t)+16, fn)
+
+_GLOBAL(powerpc_sha_transform)
+	PPC_STLU r1,-INT_FRAME_SIZE(r1)
+	SAVE_GPRS(14, 31, r1)
+
+	/* Load up A - E */
+	lwz	RA(0),0(r3)	/* A */
+	lwz	RB(0),4(r3)	/* B */
+	lwz	RC(0),8(r3)	/* C */
+	lwz	RD(0),12(r3)	/* D */
+	lwz	RE(0),16(r3)	/* E */
+
+	LOADW(0)
+	LOADW(1)
+	LOADW(2)
+	LOADW(3)
+
+	lis	r15,0x5a82	/* K0-19 */
+	ori	r15,r15,0x7999
+	STEP0LD4(0)
+	STEP0LD4(4)
+	STEP0LD4(8)
+	STEPUP4(12, D0)
+	STEPUP4(16, D0)
+
+	lis	r15,0x6ed9	/* K20-39 */
+	ori	r15,r15,0xeba1
+	STEPUP20(20, D1)
+
+	lis	r15,0x8f1b	/* K40-59 */
+	ori	r15,r15,0xbcdc
+	STEPUP20(40, D2)
+
+	lis	r15,0xca62	/* K60-79 */
+	ori	r15,r15,0xc1d6
+	STEPUP4(60, D1)
+	STEPUP4(64, D1)
+	STEPUP4(68, D1)
+	STEPUP4(72, D1)
+	lwz	r20,16(r3)
+	STEPD1(76)
+	lwz	r19,12(r3)
+	STEPD1(77)
+	lwz	r18,8(r3)
+	STEPD1(78)
+	lwz	r17,4(r3)
+	STEPD1(79)
+
+	lwz	r16,0(r3)
+	add	r20,RE(80),r20
+	add	RD(0),RD(80),r19
+	add	RC(0),RC(80),r18
+	add	RB(0),RB(80),r17
+	add	RA(0),RA(80),r16
+	mr	RE(0),r20
+	stw	RA(0),0(r3)
+	stw	RB(0),4(r3)
+	stw	RC(0),8(r3)
+	stw	RD(0),12(r3)
+	stw	RE(0),16(r3)
+
+	REST_GPRS(14, 31, r1)
+	addi	r1,r1,INT_FRAME_SIZE
+	blr
diff --git a/arch/powerpc/crypto/sha1-spe-asm.S b/arch/powerpc/crypto/sha1-spe-asm.S
new file mode 100644
index 0000000000..0f447523be
--- /dev/null
+++ b/arch/powerpc/crypto/sha1-spe-asm.S
@@ -0,0 +1,294 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Fast SHA-1 implementation for SPE instruction set (PPC)
+ *
+ * This code makes use of the SPE SIMD instruction set as defined in
+ * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
+ * Implementation is based on optimization guide notes from
+ * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+#define rHP	r3	/* pointer to hash value			*/
+#define rWP	r4	/* pointer to input				*/
+#define rKP	r5	/* pointer to constants				*/
+
+#define rW0	r14	/* 64 bit round words				*/
+#define rW1	r15
+#define rW2	r16
+#define rW3	r17
+#define rW4	r18
+#define rW5	r19
+#define rW6	r20
+#define rW7	r21
+
+#define rH0	r6	/* 32 bit hash values 				*/
+#define rH1	r7
+#define rH2	r8
+#define rH3	r9
+#define rH4	r10
+
+#define rT0	r22	/* 64 bit temporary				*/
+#define rT1	r0	/* 32 bit temporaries				*/
+#define rT2	r11
+#define rT3	r12
+
+#define rK	r23	/* 64 bit constant in volatile register		*/
+
+#define LOAD_K01
+
+#define LOAD_K11 \
+	evlwwsplat	rK,0(rKP);
+
+#define LOAD_K21 \
+	evlwwsplat	rK,4(rKP);
+
+#define LOAD_K31 \
+	evlwwsplat	rK,8(rKP);
+
+#define LOAD_K41 \
+	evlwwsplat	rK,12(rKP);
+
+#define INITIALIZE \
+	stwu		r1,-128(r1);	/* create stack frame		*/ \
+	evstdw		r14,8(r1);	/* We must save non volatile	*/ \
+	evstdw		r15,16(r1);	/* registers. Take the chance	*/ \
+	evstdw		r16,24(r1);	/* and save the SPE part too	*/ \
+	evstdw		r17,32(r1);					   \
+	evstdw		r18,40(r1);					   \
+	evstdw		r19,48(r1);					   \
+	evstdw		r20,56(r1);					   \
+	evstdw		r21,64(r1);					   \
+	evstdw		r22,72(r1);					   \
+	evstdw		r23,80(r1);
+
+
+#define FINALIZE \
+	evldw		r14,8(r1);	/* restore SPE registers	*/ \
+	evldw		r15,16(r1);					   \
+	evldw		r16,24(r1);					   \
+	evldw		r17,32(r1);					   \
+	evldw		r18,40(r1);					   \
+	evldw		r19,48(r1);					   \
+	evldw		r20,56(r1);					   \
+	evldw		r21,64(r1);					   \
+	evldw		r22,72(r1);					   \
+	evldw		r23,80(r1);					   \
+	xor		r0,r0,r0;					   \
+	stw		r0,8(r1);	/* Delete sensitive data	*/ \
+	stw		r0,16(r1);	/* that we might have pushed	*/ \
+	stw		r0,24(r1);	/* from other context that runs	*/ \
+	stw		r0,32(r1);	/* the same code. Assume that	*/ \
+	stw		r0,40(r1);	/* the lower part of the GPRs	*/ \
+	stw		r0,48(r1);	/* were already overwritten on	*/ \
+	stw		r0,56(r1);	/* the way down to here		*/ \
+	stw		r0,64(r1);					   \
+	stw		r0,72(r1);					   \
+	stw		r0,80(r1);					   \
+	addi		r1,r1,128;	/* cleanup stack frame		*/
+
+#ifdef __BIG_ENDIAN__
+#define LOAD_DATA(reg, off) \
+	lwz		reg,off(rWP);	/* load data			*/
+#define NEXT_BLOCK \
+	addi		rWP,rWP,64;	/* increment per block		*/
+#else
+#define LOAD_DATA(reg, off) \
+	lwbrx		reg,0,rWP;	/* load data			*/ \
+	addi		rWP,rWP,4;	/* increment per word		*/
+#define NEXT_BLOCK			/* nothing to do		*/
+#endif
+
+#define	R_00_15(a, b, c, d, e, w0, w1, k, off) \
+	LOAD_DATA(w0, off)		/* 1: W				*/ \
+	and		rT2,b,c;	/* 1: F' = B and C 		*/ \
+	LOAD_K##k##1							   \
+	andc		rT1,d,b;	/* 1: F" = ~B and D 		*/ \
+	rotrwi		rT0,a,27;	/* 1: A' = A rotl 5		*/ \
+	or		rT2,rT2,rT1;	/* 1: F = F' or F"		*/ \
+	add		e,e,rT0;	/* 1: E = E + A'		*/ \
+	rotrwi		b,b,2;		/* 1: B = B rotl 30		*/ \
+	add		e,e,w0;		/* 1: E = E + W			*/ \
+	LOAD_DATA(w1, off+4)		/* 2: W				*/ \
+	add		e,e,rT2;	/* 1: E = E + F			*/ \
+	and		rT1,a,b;	/* 2: F' = B and C 		*/ \
+	add		e,e,rK;		/* 1: E = E + K			*/ \
+	andc		rT2,c,a;	/* 2: F" = ~B and D 		*/ \
+	add		d,d,rK;		/* 2: E = E + K			*/ \
+	or		rT2,rT2,rT1;	/* 2: F = F' or F"		*/ \
+	rotrwi		rT0,e,27;	/* 2: A' = A rotl 5		*/ \
+	add		d,d,w1;		/* 2: E = E + W			*/ \
+	rotrwi		a,a,2;		/* 2: B = B rotl 30		*/ \
+	add		d,d,rT0;	/* 2: E = E + A'		*/ \
+	evmergelo	w1,w1,w0;	/*    mix W[0]/W[1]		*/ \
+	add		d,d,rT2		/* 2: E = E + F			*/
+
+#define R_16_19(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
+	and		rT2,b,c;	/* 1: F' = B and C 		*/ \
+	evmergelohi	rT0,w7,w6;	/*    W[-3]			*/ \
+	andc		rT1,d,b;	/* 1: F" = ~B and D 		*/ \
+	evxor		w0,w0,rT0;	/*    W = W[-16] xor W[-3]	*/ \
+	or		rT1,rT1,rT2;	/* 1: F = F' or F"		*/ \
+	evxor		w0,w0,w4;	/*    W = W xor W[-8]		*/ \
+	add		e,e,rT1;	/* 1: E = E + F			*/ \
+	evxor		w0,w0,w1;	/*    W = W xor W[-14]		*/ \
+	rotrwi		rT2,a,27;	/* 1: A' = A rotl 5		*/ \
+	evrlwi		w0,w0,1;	/*    W = W rotl 1		*/ \
+	add		e,e,rT2;	/* 1: E = E + A'		*/ \
+	evaddw		rT0,w0,rK;	/*    WK = W + K		*/ \
+	rotrwi		b,b,2;		/* 1: B = B rotl 30		*/ \
+	LOAD_K##k##1							   \
+	evmergehi	rT1,rT1,rT0;	/*    WK1/WK2			*/ \
+	add		e,e,rT0;	/* 1: E = E + WK		*/ \
+	add		d,d,rT1;	/* 2: E = E + WK		*/ \
+	and		rT2,a,b;	/* 2: F' = B and C 		*/ \
+	andc		rT1,c,a;	/* 2: F" = ~B and D 		*/ \
+	rotrwi		rT0,e,27;	/* 2: A' = A rotl 5		*/ \
+	or		rT1,rT1,rT2;	/* 2: F = F' or F"		*/ \
+	add		d,d,rT0;	/* 2: E = E + A'		*/ \
+	rotrwi		a,a,2;		/* 2: B = B rotl 30		*/ \
+	add		d,d,rT1		/* 2: E = E + F			*/
+
+#define R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
+	evmergelohi	rT0,w7,w6;	/*    W[-3]			*/ \
+	xor		rT2,b,c;	/* 1: F' = B xor C		*/ \
+	evxor		w0,w0,rT0;	/*    W = W[-16] xor W[-3]	*/ \
+	xor		rT2,rT2,d;	/* 1: F = F' xor D		*/ \
+	evxor		w0,w0,w4;	/*    W = W xor W[-8]		*/ \
+	add		e,e,rT2;	/* 1: E = E + F			*/ \
+	evxor		w0,w0,w1;	/*    W = W xor W[-14]		*/ \
+	rotrwi		rT2,a,27;	/* 1: A' = A rotl 5		*/ \
+	evrlwi		w0,w0,1;	/*    W = W rotl 1		*/ \
+	add		e,e,rT2;	/* 1: E = E + A'		*/ \
+	evaddw		rT0,w0,rK;	/*    WK = W + K		*/ \
+	rotrwi		b,b,2;		/* 1: B = B rotl 30		*/ \
+	LOAD_K##k##1							   \
+	evmergehi	rT1,rT1,rT0;	/*    WK1/WK2			*/ \
+	add		e,e,rT0;	/* 1: E = E + WK		*/ \
+	xor		rT2,a,b;	/* 2: F' = B xor C		*/ \
+	add		d,d,rT1;	/* 2: E = E + WK		*/ \
+	xor		rT2,rT2,c;	/* 2: F = F' xor D		*/ \
+	rotrwi		rT0,e,27;	/* 2: A' = A rotl 5		*/ \
+	add		d,d,rT2;	/* 2: E = E + F			*/ \
+	rotrwi		a,a,2;		/* 2: B = B rotl 30		*/ \
+	add		d,d,rT0		/* 2: E = E + A'		*/
+
+#define R_40_59(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
+	and		rT2,b,c;	/* 1: F' = B and C		*/ \
+	evmergelohi	rT0,w7,w6;	/*    W[-3]			*/ \
+	or		rT1,b,c;	/* 1: F" = B or C		*/ \
+	evxor		w0,w0,rT0;	/*    W = W[-16] xor W[-3]	*/ \
+	and		rT1,d,rT1;	/* 1: F" = F" and D		*/ \
+	evxor		w0,w0,w4;	/*    W = W xor W[-8]		*/ \
+	or		rT2,rT2,rT1;	/* 1: F = F' or F"		*/ \
+	evxor		w0,w0,w1;	/*    W = W xor W[-14]		*/ \
+	add		e,e,rT2;	/* 1: E = E + F			*/ \
+	evrlwi		w0,w0,1;	/*    W = W rotl 1		*/ \
+	rotrwi		rT2,a,27;	/* 1: A' = A rotl 5		*/ \
+	evaddw		rT0,w0,rK;	/*    WK = W + K		*/ \
+	add		e,e,rT2;	/* 1: E = E + A'		*/ \
+	LOAD_K##k##1							   \
+	evmergehi	rT1,rT1,rT0;	/*    WK1/WK2			*/ \
+	rotrwi		b,b,2;		/* 1: B = B rotl 30		*/ \
+	add		e,e,rT0;	/* 1: E = E + WK		*/ \
+	and		rT2,a,b;	/* 2: F' = B and C		*/ \
+	or		rT0,a,b;	/* 2: F" = B or C		*/ \
+	add		d,d,rT1;	/* 2: E = E + WK		*/ \
+	and		rT0,c,rT0;	/* 2: F" = F" and D		*/ \
+	rotrwi		a,a,2;		/* 2: B = B rotl 30		*/ \
+	or		rT2,rT2,rT0;	/* 2: F = F' or F"		*/ \
+	rotrwi		rT0,e,27;	/* 2: A' = A rotl 5		*/ \
+	add		d,d,rT2;	/* 2: E = E + F			*/ \
+	add		d,d,rT0		/* 2: E = E + A'		*/
+
+#define R_60_79(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
+	R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k)
+
+_GLOBAL(ppc_spe_sha1_transform)
+	INITIALIZE
+
+	lwz		rH0,0(rHP)
+	lwz		rH1,4(rHP)
+	mtctr		r5
+	lwz		rH2,8(rHP)
+	lis		rKP,PPC_SPE_SHA1_K@h
+	lwz		rH3,12(rHP)
+	ori		rKP,rKP,PPC_SPE_SHA1_K@l
+	lwz		rH4,16(rHP)
+
+ppc_spe_sha1_main:
+	R_00_15(rH0, rH1, rH2, rH3, rH4, rW1, rW0, 1, 0)
+	R_00_15(rH3, rH4, rH0, rH1, rH2, rW2, rW1, 0, 8)
+	R_00_15(rH1, rH2, rH3, rH4, rH0, rW3, rW2, 0, 16)
+	R_00_15(rH4, rH0, rH1, rH2, rH3, rW4, rW3, 0, 24)
+	R_00_15(rH2, rH3, rH4, rH0, rH1, rW5, rW4, 0, 32)
+	R_00_15(rH0, rH1, rH2, rH3, rH4, rW6, rW5, 0, 40)
+	R_00_15(rH3, rH4, rH0, rH1, rH2, rT3, rW6, 0, 48)
+	R_00_15(rH1, rH2, rH3, rH4, rH0, rT3, rW7, 0, 56)
+
+	R_16_19(rH4, rH0, rH1, rH2, rH3, rW0, rW1, rW4, rW6, rW7, 0)
+	R_16_19(rH2, rH3, rH4, rH0, rH1, rW1, rW2, rW5, rW7, rW0, 2)
+
+	R_20_39(rH0, rH1, rH2, rH3, rH4, rW2, rW3, rW6, rW0, rW1, 0)
+	R_20_39(rH3, rH4, rH0, rH1, rH2, rW3, rW4, rW7, rW1, rW2, 0)
+	R_20_39(rH1, rH2, rH3, rH4, rH0, rW4, rW5, rW0, rW2, rW3, 0)
+	R_20_39(rH4, rH0, rH1, rH2, rH3, rW5, rW6, rW1, rW3, rW4, 0)
+	R_20_39(rH2, rH3, rH4, rH0, rH1, rW6, rW7, rW2, rW4, rW5, 0)
+	R_20_39(rH0, rH1, rH2, rH3, rH4, rW7, rW0, rW3, rW5, rW6, 0)
+	R_20_39(rH3, rH4, rH0, rH1, rH2, rW0, rW1, rW4, rW6, rW7, 0)
+	R_20_39(rH1, rH2, rH3, rH4, rH0, rW1, rW2, rW5, rW7, rW0, 0)
+	R_20_39(rH4, rH0, rH1, rH2, rH3, rW2, rW3, rW6, rW0, rW1, 0)
+	R_20_39(rH2, rH3, rH4, rH0, rH1, rW3, rW4, rW7, rW1, rW2, 3)
+
+	R_40_59(rH0, rH1, rH2, rH3, rH4, rW4, rW5, rW0, rW2, rW3, 0)
+	R_40_59(rH3, rH4, rH0, rH1, rH2, rW5, rW6, rW1, rW3, rW4, 0)
+	R_40_59(rH1, rH2, rH3, rH4, rH0, rW6, rW7, rW2, rW4, rW5, 0)
+	R_40_59(rH4, rH0, rH1, rH2, rH3, rW7, rW0, rW3, rW5, rW6, 0)
+	R_40_59(rH2, rH3, rH4, rH0, rH1, rW0, rW1, rW4, rW6, rW7, 0)
+	R_40_59(rH0, rH1, rH2, rH3, rH4, rW1, rW2, rW5, rW7, rW0, 0)
+	R_40_59(rH3, rH4, rH0, rH1, rH2, rW2, rW3, rW6, rW0, rW1, 0)
+	R_40_59(rH1, rH2, rH3, rH4, rH0, rW3, rW4, rW7, rW1, rW2, 0)
+	R_40_59(rH4, rH0, rH1, rH2, rH3, rW4, rW5, rW0, rW2, rW3, 0)
+	R_40_59(rH2, rH3, rH4, rH0, rH1, rW5, rW6, rW1, rW3, rW4, 4)
+
+	R_60_79(rH0, rH1, rH2, rH3, rH4, rW6, rW7, rW2, rW4, rW5, 0)
+	R_60_79(rH3, rH4, rH0, rH1, rH2, rW7, rW0, rW3, rW5, rW6, 0)
+	R_60_79(rH1, rH2, rH3, rH4, rH0, rW0, rW1, rW4, rW6, rW7, 0)
+	R_60_79(rH4, rH0, rH1, rH2, rH3, rW1, rW2, rW5, rW7, rW0, 0)
+	R_60_79(rH2, rH3, rH4, rH0, rH1, rW2, rW3, rW6, rW0, rW1, 0)
+	R_60_79(rH0, rH1, rH2, rH3, rH4, rW3, rW4, rW7, rW1, rW2, 0)
+	R_60_79(rH3, rH4, rH0, rH1, rH2, rW4, rW5, rW0, rW2, rW3, 0)
+	lwz		rT3,0(rHP)
+	R_60_79(rH1, rH2, rH3, rH4, rH0, rW5, rW6, rW1, rW3, rW4, 0)
+	lwz		rW1,4(rHP)
+	R_60_79(rH4, rH0, rH1, rH2, rH3, rW6, rW7, rW2, rW4, rW5, 0)
+	lwz		rW2,8(rHP)
+	R_60_79(rH2, rH3, rH4, rH0, rH1, rW7, rW0, rW3, rW5, rW6, 0)
+	lwz		rW3,12(rHP)
+	NEXT_BLOCK
+	lwz		rW4,16(rHP)
+
+	add		rH0,rH0,rT3
+	stw		rH0,0(rHP)
+	add		rH1,rH1,rW1
+	stw		rH1,4(rHP)
+	add		rH2,rH2,rW2
+	stw		rH2,8(rHP)
+	add		rH3,rH3,rW3
+	stw		rH3,12(rHP)
+	add		rH4,rH4,rW4
+	stw		rH4,16(rHP)
+
+	bdnz		ppc_spe_sha1_main
+
+	FINALIZE
+	blr
+
+.data
+.align 4
+PPC_SPE_SHA1_K:
+	.long 0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6
diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c
new file mode 100644
index 0000000000..9170892a85
--- /dev/null
+++ b/arch/powerpc/crypto/sha1-spe-glue.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Glue code for SHA-1 implementation for SPE instructions (PPC)
+ *
+ * Based on generic implementation.
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <crypto/sha1.h>
+#include <crypto/sha1_base.h>
+#include <asm/byteorder.h>
+#include <asm/switch_to.h>
+#include <linux/hardirq.h>
+
+/*
+ * MAX_BYTES defines the number of bytes that are allowed to be processed
+ * between preempt_disable() and preempt_enable(). SHA1 takes ~1000
+ * operations per 64 bytes. e500 cores can issue two arithmetic instructions
+ * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
+ * Thus 2KB of input data will need an estimated maximum of 18,000 cycles.
+ * Headroom for cache misses included. Even with the low end model clocked
+ * at 667 MHz this equals to a critical time window of less than 27us.
+ *
+ */
+#define MAX_BYTES 2048
+
+extern void ppc_spe_sha1_transform(u32 *state, const u8 *src, u32 blocks);
+
+static void spe_begin(void)
+{
+	/* We just start SPE operations and will save SPE registers later. */
+	preempt_disable();
+	enable_kernel_spe();
+}
+
+static void spe_end(void)
+{
+	disable_kernel_spe();
+	/* reenable preemption */
+	preempt_enable();
+}
+
+static inline void ppc_sha1_clear_context(struct sha1_state *sctx)
+{
+	int count = sizeof(struct sha1_state) >> 2;
+	u32 *ptr = (u32 *)sctx;
+
+	/* make sure we can clear the fast way */
+	BUILD_BUG_ON(sizeof(struct sha1_state) % 4);
+	do { *ptr++ = 0; } while (--count);
+}
+
+static int ppc_spe_sha1_update(struct shash_desc *desc, const u8 *data,
+			unsigned int len)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	const unsigned int offset = sctx->count & 0x3f;
+	const unsigned int avail = 64 - offset;
+	unsigned int bytes;
+	const u8 *src = data;
+
+	if (avail > len) {
+		sctx->count += len;
+		memcpy((char *)sctx->buffer + offset, src, len);
+		return 0;
+	}
+
+	sctx->count += len;
+
+	if (offset) {
+		memcpy((char *)sctx->buffer + offset, src, avail);
+
+		spe_begin();
+		ppc_spe_sha1_transform(sctx->state, (const u8 *)sctx->buffer, 1);
+		spe_end();
+
+		len -= avail;
+		src += avail;
+	}
+
+	while (len > 63) {
+		bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
+		bytes = bytes & ~0x3f;
+
+		spe_begin();
+		ppc_spe_sha1_transform(sctx->state, src, bytes >> 6);
+		spe_end();
+
+		src += bytes;
+		len -= bytes;
+	}
+
+	memcpy((char *)sctx->buffer, src, len);
+	return 0;
+}
+
+static int ppc_spe_sha1_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	const unsigned int offset = sctx->count & 0x3f;
+	char *p = (char *)sctx->buffer + offset;
+	int padlen;
+	__be64 *pbits = (__be64 *)(((char *)&sctx->buffer) + 56);
+	__be32 *dst = (__be32 *)out;
+
+	padlen = 55 - offset;
+	*p++ = 0x80;
+
+	spe_begin();
+
+	if (padlen < 0) {
+		memset(p, 0x00, padlen + sizeof (u64));
+		ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
+		p = (char *)sctx->buffer;
+		padlen = 56;
+	}
+
+	memset(p, 0, padlen);
+	*pbits = cpu_to_be64(sctx->count << 3);
+	ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
+
+	spe_end();
+
+	dst[0] = cpu_to_be32(sctx->state[0]);
+	dst[1] = cpu_to_be32(sctx->state[1]);
+	dst[2] = cpu_to_be32(sctx->state[2]);
+	dst[3] = cpu_to_be32(sctx->state[3]);
+	dst[4] = cpu_to_be32(sctx->state[4]);
+
+	ppc_sha1_clear_context(sctx);
+	return 0;
+}
+
+static int ppc_spe_sha1_export(struct shash_desc *desc, void *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+	return 0;
+}
+
+static int ppc_spe_sha1_import(struct shash_desc *desc, const void *in)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+	return 0;
+}
+
+static struct shash_alg alg = {
+	.digestsize	=	SHA1_DIGEST_SIZE,
+	.init		=	sha1_base_init,
+	.update		=	ppc_spe_sha1_update,
+	.final		=	ppc_spe_sha1_final,
+	.export		=	ppc_spe_sha1_export,
+	.import		=	ppc_spe_sha1_import,
+	.descsize	=	sizeof(struct sha1_state),
+	.statesize	=	sizeof(struct sha1_state),
+	.base		=	{
+		.cra_name	=	"sha1",
+		.cra_driver_name=	"sha1-ppc-spe",
+		.cra_priority	=	300,
+		.cra_blocksize	=	SHA1_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int __init ppc_spe_sha1_mod_init(void)
+{
+	return crypto_register_shash(&alg);
+}
+
+static void __exit ppc_spe_sha1_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(ppc_spe_sha1_mod_init);
+module_exit(ppc_spe_sha1_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, SPE optimized");
+
+MODULE_ALIAS_CRYPTO("sha1");
+MODULE_ALIAS_CRYPTO("sha1-ppc-spe");
diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c
new file mode 100644
index 0000000000..f283bbd3f1
--- /dev/null
+++ b/arch/powerpc/crypto/sha1.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Cryptographic API.
+ *
+ * powerpc implementation of the SHA1 Secure Hash Algorithm.
+ *
+ * Derived from cryptoapi implementation, adapted for in-place
+ * scatterlist interface.
+ *
+ * Derived from "crypto/sha1.c"
+ * Copyright (c) Alan Smithee.
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ */
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <crypto/sha1.h>
+#include <crypto/sha1_base.h>
+#include <asm/byteorder.h>
+
+void powerpc_sha_transform(u32 *state, const u8 *src);
+
+static int powerpc_sha1_update(struct shash_desc *desc, const u8 *data,
+			       unsigned int len)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	unsigned int partial, done;
+	const u8 *src;
+
+	partial = sctx->count & 0x3f;
+	sctx->count += len;
+	done = 0;
+	src = data;
+
+	if ((partial + len) > 63) {
+
+		if (partial) {
+			done = -partial;
+			memcpy(sctx->buffer + partial, data, done + 64);
+			src = sctx->buffer;
+		}
+
+		do {
+			powerpc_sha_transform(sctx->state, src);
+			done += 64;
+			src = data + done;
+		} while (done + 63 < len);
+
+		partial = 0;
+	}
+	memcpy(sctx->buffer + partial, src, len - done);
+
+	return 0;
+}
+
+
+/* Add padding and return the message digest. */
+static int powerpc_sha1_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	__be32 *dst = (__be32 *)out;
+	u32 i, index, padlen;
+	__be64 bits;
+	static const u8 padding[64] = { 0x80, };
+
+	bits = cpu_to_be64(sctx->count << 3);
+
+	/* Pad out to 56 mod 64 */
+	index = sctx->count & 0x3f;
+	padlen = (index < 56) ? (56 - index) : ((64+56) - index);
+	powerpc_sha1_update(desc, padding, padlen);
+
+	/* Append length */
+	powerpc_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
+
+	/* Store state in digest */
+	for (i = 0; i < 5; i++)
+		dst[i] = cpu_to_be32(sctx->state[i]);
+
+	/* Wipe context */
+	memset(sctx, 0, sizeof *sctx);
+
+	return 0;
+}
+
+static int powerpc_sha1_export(struct shash_desc *desc, void *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+	return 0;
+}
+
+static int powerpc_sha1_import(struct shash_desc *desc, const void *in)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+	return 0;
+}
+
+static struct shash_alg alg = {
+	.digestsize	=	SHA1_DIGEST_SIZE,
+	.init		=	sha1_base_init,
+	.update		=	powerpc_sha1_update,
+	.final		=	powerpc_sha1_final,
+	.export		=	powerpc_sha1_export,
+	.import		=	powerpc_sha1_import,
+	.descsize	=	sizeof(struct sha1_state),
+	.statesize	=	sizeof(struct sha1_state),
+	.base		=	{
+		.cra_name	=	"sha1",
+		.cra_driver_name=	"sha1-powerpc",
+		.cra_blocksize	=	SHA1_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int __init sha1_powerpc_mod_init(void)
+{
+	return crypto_register_shash(&alg);
+}
+
+static void __exit sha1_powerpc_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(sha1_powerpc_mod_init);
+module_exit(sha1_powerpc_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm");
+
+MODULE_ALIAS_CRYPTO("sha1");
+MODULE_ALIAS_CRYPTO("sha1-powerpc");
diff --git a/arch/powerpc/crypto/sha256-spe-asm.S b/arch/powerpc/crypto/sha256-spe-asm.S
new file mode 100644
index 0000000000..cd99d71dae
--- /dev/null
+++ b/arch/powerpc/crypto/sha256-spe-asm.S
@@ -0,0 +1,318 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Fast SHA-256 implementation for SPE instruction set (PPC)
+ *
+ * This code makes use of the SPE SIMD instruction set as defined in
+ * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
+ * Implementation is based on optimization guide notes from
+ * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+#define rHP	r3	/* pointer to hash values in memory		*/
+#define rKP	r24	/* pointer to round constants			*/
+#define rWP	r4	/* pointer to input data			*/
+
+#define rH0	r5	/* 8 32 bit hash values in 8 registers		*/
+#define rH1	r6
+#define rH2	r7
+#define rH3	r8
+#define rH4	r9
+#define rH5	r10
+#define rH6	r11
+#define rH7	r12
+
+#define rW0	r14	/* 64 bit registers. 16 words in 8 registers	*/
+#define rW1	r15
+#define rW2	r16
+#define rW3	r17
+#define rW4	r18
+#define rW5	r19
+#define rW6	r20
+#define rW7	r21
+
+#define rT0	r22	/* 64 bit temporaries 				*/
+#define rT1	r23
+#define rT2	r0	/* 32 bit temporaries				*/
+#define rT3	r25
+
+#define CMP_KN_LOOP
+#define CMP_KC_LOOP \
+	cmpwi		rT1,0;
+
+#define INITIALIZE \
+	stwu		r1,-128(r1);	/* create stack frame		*/ \
+	evstdw		r14,8(r1);	/* We must save non volatile	*/ \
+	evstdw		r15,16(r1);	/* registers. Take the chance	*/ \
+	evstdw		r16,24(r1);	/* and save the SPE part too	*/ \
+	evstdw		r17,32(r1);					   \
+	evstdw		r18,40(r1);					   \
+	evstdw		r19,48(r1);					   \
+	evstdw		r20,56(r1);					   \
+	evstdw		r21,64(r1);					   \
+	evstdw		r22,72(r1);					   \
+	evstdw		r23,80(r1);					   \
+	stw		r24,88(r1);	/* save normal registers	*/ \
+	stw		r25,92(r1);
+
+
+#define FINALIZE \
+	evldw		r14,8(r1);	/* restore SPE registers	*/ \
+	evldw		r15,16(r1);					   \
+	evldw		r16,24(r1);					   \
+	evldw		r17,32(r1);					   \
+	evldw		r18,40(r1);					   \
+	evldw		r19,48(r1);					   \
+	evldw		r20,56(r1);					   \
+	evldw		r21,64(r1);					   \
+	evldw		r22,72(r1);					   \
+	evldw		r23,80(r1);					   \
+	lwz		r24,88(r1);	/* restore normal registers	*/ \
+	lwz		r25,92(r1);					   \
+	xor		r0,r0,r0;					   \
+	stw		r0,8(r1);	/* Delete sensitive data	*/ \
+	stw		r0,16(r1);	/* that we might have pushed	*/ \
+	stw		r0,24(r1);	/* from other context that runs	*/ \
+	stw		r0,32(r1);	/* the same code. Assume that	*/ \
+	stw		r0,40(r1);	/* the lower part of the GPRs	*/ \
+	stw		r0,48(r1);	/* was already overwritten on	*/ \
+	stw		r0,56(r1);	/* the way down to here		*/ \
+	stw		r0,64(r1);					   \
+	stw		r0,72(r1);					   \
+	stw		r0,80(r1);					   \
+	addi		r1,r1,128;	/* cleanup stack frame		*/
+
+#ifdef __BIG_ENDIAN__
+#define LOAD_DATA(reg, off) \
+	lwz		reg,off(rWP);	/* load data			*/
+#define NEXT_BLOCK \
+	addi		rWP,rWP,64;	/* increment per block		*/
+#else
+#define LOAD_DATA(reg, off) \
+	lwbrx		reg,0,rWP; 	/* load data			*/ \
+	addi		rWP,rWP,4;	/* increment per word		*/
+#define NEXT_BLOCK			/* nothing to do		*/
+#endif
+
+#define R_LOAD_W(a, b, c, d, e, f, g, h, w, off) \
+	LOAD_DATA(w, off)		/* 1: W				*/ \
+	rotrwi		rT0,e,6;	/* 1: S1 = e rotr 6		*/ \
+	rotrwi		rT1,e,11;	/* 1: S1' = e rotr 11		*/ \
+	rotrwi		rT2,e,25;	/* 1: S1" = e rotr 25		*/ \
+	xor		rT0,rT0,rT1;	/* 1: S1 = S1 xor S1'		*/ \
+	and		rT3,e,f;	/* 1: ch = e and f		*/ \
+	xor		rT0,rT0,rT2;	/* 1: S1 = S1 xor S1"		*/ \
+	andc		rT1,g,e;	/* 1: ch' = ~e and g		*/ \
+	lwz		rT2,off(rKP);	/* 1: K				*/ \
+	xor		rT3,rT3,rT1;	/* 1: ch = ch xor ch'		*/ \
+	add		h,h,rT0;	/* 1: temp1 = h + S1		*/ \
+	add		rT3,rT3,w;	/* 1: temp1' = ch + w		*/ \
+	rotrwi		rT0,a,2;	/* 1: S0 = a rotr 2		*/ \
+	add		h,h,rT3;	/* 1: temp1 = temp1 + temp1'	*/ \
+	rotrwi		rT1,a,13;	/* 1: S0' = a rotr 13		*/ \
+	add		h,h,rT2;	/* 1: temp1 = temp1 + K		*/ \
+	rotrwi		rT3,a,22;	/* 1: S0" = a rotr 22		*/ \
+	xor		rT0,rT0,rT1;	/* 1: S0 = S0 xor S0'		*/ \
+	add		d,d,h;		/* 1: d = d + temp1		*/ \
+	xor		rT3,rT0,rT3;	/* 1: S0 = S0 xor S0"		*/ \
+	evmergelo	w,w,w;		/*    shift W			*/ \
+	or		rT2,a,b;	/* 1: maj = a or b		*/ \
+	and		rT1,a,b;	/* 1: maj' = a and b		*/ \
+	and		rT2,rT2,c;	/* 1: maj = maj and c		*/ \
+	LOAD_DATA(w, off+4)		/* 2: W				*/ \
+	or		rT2,rT1,rT2;	/* 1: maj = maj or maj'		*/ \
+	rotrwi		rT0,d,6;	/* 2: S1 = e rotr 6		*/ \
+	add		rT3,rT3,rT2;	/* 1: temp2 = S0 + maj		*/ \
+	rotrwi		rT1,d,11;	/* 2: S1' = e rotr 11		*/ \
+	add		h,h,rT3;	/* 1: h = temp1 + temp2		*/ \
+	rotrwi		rT2,d,25;	/* 2: S1" = e rotr 25		*/ \
+	xor		rT0,rT0,rT1;	/* 2: S1 = S1 xor S1'		*/ \
+	and		rT3,d,e;	/* 2: ch = e and f		*/ \
+	xor		rT0,rT0,rT2;	/* 2: S1 = S1 xor S1"		*/ \
+	andc		rT1,f,d;	/* 2: ch' = ~e and g		*/ \
+	lwz		rT2,off+4(rKP);	/* 2: K				*/ \
+	xor		rT3,rT3,rT1;	/* 2: ch = ch xor ch'		*/ \
+	add		g,g,rT0;	/* 2: temp1 = h + S1		*/ \
+	add		rT3,rT3,w;	/* 2: temp1' = ch + w		*/ \
+	rotrwi		rT0,h,2;	/* 2: S0 = a rotr 2		*/ \
+	add		g,g,rT3;	/* 2: temp1 = temp1 + temp1'	*/ \
+	rotrwi		rT1,h,13;	/* 2: S0' = a rotr 13		*/ \
+	add		g,g,rT2;	/* 2: temp1 = temp1 + K		*/ \
+	rotrwi		rT3,h,22;	/* 2: S0" = a rotr 22		*/ \
+	xor		rT0,rT0,rT1;	/* 2: S0 = S0 xor S0'		*/ \
+	or		rT2,h,a;	/* 2: maj = a or b		*/ \
+	xor		rT3,rT0,rT3;	/* 2: S0 = S0 xor S0"		*/ \
+	and		rT1,h,a;	/* 2: maj' = a and b		*/ \
+	and		rT2,rT2,b;	/* 2: maj = maj and c		*/ \
+	add		c,c,g;		/* 2: d = d + temp1		*/ \
+	or		rT2,rT1,rT2;	/* 2: maj = maj or maj'		*/ \
+	add		rT3,rT3,rT2;	/* 2: temp2 = S0 + maj		*/ \
+	add		g,g,rT3		/* 2: h = temp1 + temp2		*/
+
+#define R_CALC_W(a, b, c, d, e, f, g, h, w0, w1, w4, w5, w7, k, off) \
+	rotrwi		rT2,e,6;	/* 1: S1 = e rotr 6		*/ \
+	evmergelohi	rT0,w0,w1;	/*    w[-15]			*/ \
+	rotrwi		rT3,e,11;	/* 1: S1' = e rotr 11		*/ \
+	evsrwiu		rT1,rT0,3;	/*    s0 = w[-15] >> 3		*/ \
+	xor		rT2,rT2,rT3;	/* 1: S1 = S1 xor S1'		*/ \
+	evrlwi		rT0,rT0,25;	/*    s0' = w[-15] rotr	7	*/ \
+	rotrwi		rT3,e,25;	/* 1: S1' = e rotr 25		*/ \
+	evxor		rT1,rT1,rT0;	/*    s0 = s0 xor s0'		*/ \
+	xor		rT2,rT2,rT3;	/* 1: S1 = S1 xor S1'		*/ \
+	evrlwi		rT0,rT0,21;	/*    s0' = w[-15] rotr 18	*/ \
+	add		h,h,rT2;	/* 1: temp1 = h + S1		*/ \
+	evxor		rT0,rT0,rT1;	/*    s0 = s0 xor s0'		*/ \
+	and		rT2,e,f;	/* 1: ch = e and f		*/ \
+	evaddw		w0,w0,rT0;	/*    w = w[-16] + s0		*/ \
+	andc		rT3,g,e;	/* 1: ch' = ~e and g		*/ \
+	evsrwiu		rT0,w7,10;	/*    s1 = w[-2] >> 10		*/ \
+	xor		rT2,rT2,rT3;	/* 1: ch = ch xor ch'		*/ \
+	evrlwi		rT1,w7,15;	/*    s1' = w[-2] rotr 17	*/ \
+	add		h,h,rT2;	/* 1: temp1 = temp1 + ch	*/ \
+	evxor		rT0,rT0,rT1;	/*    s1 = s1 xor s1'		*/ \
+	rotrwi		rT2,a,2;	/* 1: S0 = a rotr 2		*/ \
+	evrlwi		rT1,w7,13;	/*    s1' = w[-2] rotr 19	*/ \
+	rotrwi		rT3,a,13;	/* 1: S0' = a rotr 13		*/ \
+	evxor		rT0,rT0,rT1;	/*    s1 = s1 xor s1'		*/ \
+	xor		rT2,rT2,rT3;	/* 1: S0 = S0 xor S0'		*/ \
+	evldw		rT1,off(rKP);	/*    k				*/ \
+	rotrwi		rT3,a,22;	/* 1: S0' = a rotr 22		*/ \
+	evaddw		w0,w0,rT0;	/*    w = w + s1		*/ \
+	xor		rT2,rT2,rT3;	/* 1: S0 = S0 xor S0'		*/ \
+	evmergelohi	rT0,w4,w5;	/*    w[-7]			*/ \
+	and		rT3,a,b;	/* 1: maj = a and b		*/ \
+	evaddw		w0,w0,rT0;	/*    w = w + w[-7]		*/ \
+	CMP_K##k##_LOOP							   \
+	add		rT2,rT2,rT3;	/* 1: temp2 = S0 + maj		*/ \
+	evaddw		rT1,rT1,w0;	/*    wk = w + k		*/ \
+	xor		rT3,a,b;	/* 1: maj = a xor b		*/ \
+	evmergehi	rT0,rT1,rT1;	/*    wk1/wk2			*/ \
+	and		rT3,rT3,c;	/* 1: maj = maj and c		*/ \
+	add		h,h,rT0;	/* 1: temp1 = temp1 + wk	*/ \
+	add		rT2,rT2,rT3;	/* 1: temp2 = temp2 + maj	*/ \
+	add		g,g,rT1;	/* 2: temp1 = temp1 + wk	*/ \
+	add		d,d,h;		/* 1: d = d + temp1		*/ \
+	rotrwi		rT0,d,6;	/* 2: S1 = e rotr 6		*/ \
+	add		h,h,rT2;	/* 1: h = temp1 + temp2		*/ \
+	rotrwi		rT1,d,11;	/* 2: S1' = e rotr 11		*/ \
+	rotrwi		rT2,d,25;	/* 2: S" = e rotr 25		*/ \
+	xor		rT0,rT0,rT1;	/* 2: S1 = S1 xor S1'		*/ \
+	and		rT3,d,e;	/* 2: ch = e and f		*/ \
+	xor		rT0,rT0,rT2;	/* 2: S1 = S1 xor S1"		*/ \
+	andc		rT1,f,d;	/* 2: ch' = ~e and g		*/ \
+	add		g,g,rT0;	/* 2: temp1 = h + S1		*/ \
+	xor		rT3,rT3,rT1;	/* 2: ch = ch xor ch'		*/ \
+	rotrwi		rT0,h,2;	/* 2: S0 = a rotr 2		*/ \
+	add		g,g,rT3;	/* 2: temp1 = temp1 + ch	*/ \
+	rotrwi		rT1,h,13;	/* 2: S0' = a rotr 13		*/ \
+	rotrwi		rT3,h,22;	/* 2: S0" = a rotr 22		*/ \
+	xor		rT0,rT0,rT1;	/* 2: S0 = S0 xor S0'		*/ \
+	or		rT2,h,a;	/* 2: maj = a or b		*/ \
+	and		rT1,h,a;	/* 2: maj' = a and b		*/ \
+	and		rT2,rT2,b;	/* 2: maj = maj and c		*/ \
+	xor		rT3,rT0,rT3;	/* 2: S0 = S0 xor S0"		*/ \
+	or		rT2,rT1,rT2;	/* 2: maj = maj or maj'		*/ \
+	add		c,c,g;		/* 2: d = d + temp1		*/ \
+	add		rT3,rT3,rT2;	/* 2: temp2 = S0 + maj		*/ \
+	add		g,g,rT3		/* 2: h = temp1 + temp2		*/
+
+_GLOBAL(ppc_spe_sha256_transform)
+	INITIALIZE
+
+	mtctr		r5
+	lwz		rH0,0(rHP)
+	lwz		rH1,4(rHP)
+	lwz		rH2,8(rHP)
+	lwz		rH3,12(rHP)
+	lwz		rH4,16(rHP)
+	lwz		rH5,20(rHP)
+	lwz		rH6,24(rHP)
+	lwz		rH7,28(rHP)
+
+ppc_spe_sha256_main:
+	lis		rKP,PPC_SPE_SHA256_K@ha
+	addi		rKP,rKP,PPC_SPE_SHA256_K@l
+
+	R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW0, 0)
+	R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW1, 8)
+	R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW2, 16)
+	R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW3, 24)
+	R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW4, 32)
+	R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW5, 40)
+	R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW6, 48)
+	R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW7, 56)
+ppc_spe_sha256_16_rounds:
+	addi		rKP,rKP,64
+	R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
+		 rW0, rW1, rW4, rW5, rW7, N, 0)
+	R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
+		 rW1, rW2, rW5, rW6, rW0, N, 8)
+	R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
+		 rW2, rW3, rW6, rW7, rW1, N, 16)
+	R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
+		 rW3, rW4, rW7, rW0, rW2, N, 24)
+	R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
+		 rW4, rW5, rW0, rW1, rW3, N, 32)
+	R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
+		 rW5, rW6, rW1, rW2, rW4, N, 40)
+	R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
+		 rW6, rW7, rW2, rW3, rW5, N, 48)
+	R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
+		 rW7, rW0, rW3, rW4, rW6, C, 56)
+	bt		gt,ppc_spe_sha256_16_rounds
+
+	lwz		rW0,0(rHP)
+	NEXT_BLOCK
+	lwz		rW1,4(rHP)
+	lwz		rW2,8(rHP)
+	lwz		rW3,12(rHP)
+	lwz		rW4,16(rHP)
+	lwz		rW5,20(rHP)
+	lwz		rW6,24(rHP)
+	lwz		rW7,28(rHP)
+
+	add		rH0,rH0,rW0
+	stw		rH0,0(rHP)
+	add		rH1,rH1,rW1
+	stw		rH1,4(rHP)
+	add		rH2,rH2,rW2
+	stw		rH2,8(rHP)
+	add		rH3,rH3,rW3
+	stw		rH3,12(rHP)
+	add		rH4,rH4,rW4
+	stw		rH4,16(rHP)
+	add		rH5,rH5,rW5
+	stw		rH5,20(rHP)
+	add		rH6,rH6,rW6
+	stw		rH6,24(rHP)
+	add		rH7,rH7,rW7
+	stw		rH7,28(rHP)
+
+	bdnz		ppc_spe_sha256_main
+
+	FINALIZE
+	blr
+
+.data
+.align 5
+PPC_SPE_SHA256_K:
+	.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+	.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+	.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+	.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+	.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+	.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+	.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+	.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+	.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+	.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+	.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+	.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+	.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+	.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+	.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+	.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c
new file mode 100644
index 0000000000..2997d13236
--- /dev/null
+++ b/arch/powerpc/crypto/sha256-spe-glue.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Glue code for SHA-256 implementation for SPE instructions (PPC)
+ *
+ * Based on generic implementation. The assembler module takes care 
+ * about the SPE registers so it can run from interrupt context.
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <crypto/sha2.h>
+#include <crypto/sha256_base.h>
+#include <asm/byteorder.h>
+#include <asm/switch_to.h>
+#include <linux/hardirq.h>
+
+/*
+ * MAX_BYTES defines the number of bytes that are allowed to be processed
+ * between preempt_disable() and preempt_enable(). SHA256 takes ~2,000
+ * operations per 64 bytes. e500 cores can issue two arithmetic instructions
+ * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
+ * Thus 1KB of input data will need an estimated maximum of 18,000 cycles.
+ * Headroom for cache misses included. Even with the low end model clocked
+ * at 667 MHz this equals to a critical time window of less than 27us.
+ *
+ */
+#define MAX_BYTES 1024
+
+extern void ppc_spe_sha256_transform(u32 *state, const u8 *src, u32 blocks);
+
+static void spe_begin(void)
+{
+	/* We just start SPE operations and will save SPE registers later. */
+	preempt_disable();
+	enable_kernel_spe();
+}
+
+static void spe_end(void)
+{
+	disable_kernel_spe();
+	/* reenable preemption */
+	preempt_enable();
+}
+
+static inline void ppc_sha256_clear_context(struct sha256_state *sctx)
+{
+	int count = sizeof(struct sha256_state) >> 2;
+	u32 *ptr = (u32 *)sctx;
+
+	/* make sure we can clear the fast way */
+	BUILD_BUG_ON(sizeof(struct sha256_state) % 4);
+	do { *ptr++ = 0; } while (--count);
+}
+
+static int ppc_spe_sha256_update(struct shash_desc *desc, const u8 *data,
+			unsigned int len)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	const unsigned int offset = sctx->count & 0x3f;
+	const unsigned int avail = 64 - offset;
+	unsigned int bytes;
+	const u8 *src = data;
+
+	if (avail > len) {
+		sctx->count += len;
+		memcpy((char *)sctx->buf + offset, src, len);
+		return 0;
+	}
+
+	sctx->count += len;
+
+	if (offset) {
+		memcpy((char *)sctx->buf + offset, src, avail);
+
+		spe_begin();
+		ppc_spe_sha256_transform(sctx->state, (const u8 *)sctx->buf, 1);
+		spe_end();
+
+		len -= avail;
+		src += avail;
+	}
+
+	while (len > 63) {
+		/* cut input data into smaller blocks */
+		bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
+		bytes = bytes & ~0x3f;
+
+		spe_begin();
+		ppc_spe_sha256_transform(sctx->state, src, bytes >> 6);
+		spe_end();
+
+		src += bytes;
+		len -= bytes;
+	}
+
+	memcpy((char *)sctx->buf, src, len);
+	return 0;
+}
+
+static int ppc_spe_sha256_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	const unsigned int offset = sctx->count & 0x3f;
+	char *p = (char *)sctx->buf + offset;
+	int padlen;
+	__be64 *pbits = (__be64 *)(((char *)&sctx->buf) + 56);
+	__be32 *dst = (__be32 *)out;
+
+	padlen = 55 - offset;
+	*p++ = 0x80;
+
+	spe_begin();
+
+	if (padlen < 0) {
+		memset(p, 0x00, padlen + sizeof (u64));
+		ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
+		p = (char *)sctx->buf;
+		padlen = 56;
+	}
+
+	memset(p, 0, padlen);
+	*pbits = cpu_to_be64(sctx->count << 3);
+	ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
+
+	spe_end();
+
+	dst[0] = cpu_to_be32(sctx->state[0]);
+	dst[1] = cpu_to_be32(sctx->state[1]);
+	dst[2] = cpu_to_be32(sctx->state[2]);
+	dst[3] = cpu_to_be32(sctx->state[3]);
+	dst[4] = cpu_to_be32(sctx->state[4]);
+	dst[5] = cpu_to_be32(sctx->state[5]);
+	dst[6] = cpu_to_be32(sctx->state[6]);
+	dst[7] = cpu_to_be32(sctx->state[7]);
+
+	ppc_sha256_clear_context(sctx);
+	return 0;
+}
+
+static int ppc_spe_sha224_final(struct shash_desc *desc, u8 *out)
+{
+	__be32 D[SHA256_DIGEST_SIZE >> 2];
+	__be32 *dst = (__be32 *)out;
+
+	ppc_spe_sha256_final(desc, (u8 *)D);
+
+	/* avoid bytewise memcpy */
+	dst[0] = D[0];
+	dst[1] = D[1];
+	dst[2] = D[2];
+	dst[3] = D[3];
+	dst[4] = D[4];
+	dst[5] = D[5];
+	dst[6] = D[6];
+
+	/* clear sensitive data */
+	memzero_explicit(D, SHA256_DIGEST_SIZE);
+	return 0;
+}
+
+static int ppc_spe_sha256_export(struct shash_desc *desc, void *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+	return 0;
+}
+
+static int ppc_spe_sha256_import(struct shash_desc *desc, const void *in)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+	return 0;
+}
+
+static struct shash_alg algs[2] = { {
+	.digestsize	=	SHA256_DIGEST_SIZE,
+	.init		=	sha256_base_init,
+	.update		=	ppc_spe_sha256_update,
+	.final		=	ppc_spe_sha256_final,
+	.export		=	ppc_spe_sha256_export,
+	.import		=	ppc_spe_sha256_import,
+	.descsize	=	sizeof(struct sha256_state),
+	.statesize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha256",
+		.cra_driver_name=	"sha256-ppc-spe",
+		.cra_priority	=	300,
+		.cra_blocksize	=	SHA256_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+}, {
+	.digestsize	=	SHA224_DIGEST_SIZE,
+	.init		=	sha224_base_init,
+	.update		=	ppc_spe_sha256_update,
+	.final		=	ppc_spe_sha224_final,
+	.export		=	ppc_spe_sha256_export,
+	.import		=	ppc_spe_sha256_import,
+	.descsize	=	sizeof(struct sha256_state),
+	.statesize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha224",
+		.cra_driver_name=	"sha224-ppc-spe",
+		.cra_priority	=	300,
+		.cra_blocksize	=	SHA224_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
+
+static int __init ppc_spe_sha256_mod_init(void)
+{
+	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit ppc_spe_sha256_mod_fini(void)
+{
+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_init(ppc_spe_sha256_mod_init);
+module_exit(ppc_spe_sha256_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, SPE optimized");
+
+MODULE_ALIAS_CRYPTO("sha224");
+MODULE_ALIAS_CRYPTO("sha224-ppc-spe");
+MODULE_ALIAS_CRYPTO("sha256");
+MODULE_ALIAS_CRYPTO("sha256-ppc-spe");
diff --git a/arch/powerpc/include/asm/8xx_immap.h b/arch/powerpc/include/asm/8xx_immap.h
new file mode 100644
index 0000000000..f9cac46a95
--- /dev/null
+++ b/arch/powerpc/include/asm/8xx_immap.h
@@ -0,0 +1,566 @@
+/*
+ * MPC8xx Internal Memory Map
+ * Copyright (c) 1997 Dan Malek (dmalek@jlc.net)
+ *
+ * The I/O on the MPC860 is comprised of blocks of special registers
+ * and the dual port ram for the Communication Processor Module.
+ * Within this space are functional units such as the SIU, memory
+ * controller, system timers, and other control functions.  It is
+ * a combination that I found difficult to separate into logical
+ * functional files.....but anyone else is welcome to try.  -- Dan
+ */
+#ifdef __KERNEL__
+#ifndef __IMMAP_8XX__
+#define __IMMAP_8XX__
+
+/* System configuration registers.
+*/
+typedef	struct sys_conf {
+	uint	sc_siumcr;
+	uint	sc_sypcr;
+	uint	sc_swt;
+	char	res1[2];
+	ushort	sc_swsr;
+	uint	sc_sipend;
+	uint	sc_simask;
+	uint	sc_siel;
+	uint	sc_sivec;
+	uint	sc_tesr;
+	char	res2[0xc];
+	uint	sc_sdcr;
+	char	res3[0x4c];
+} sysconf8xx_t;
+
+/* PCMCIA configuration registers.
+*/
+typedef struct pcmcia_conf {
+	uint	pcmc_pbr0;
+	uint	pcmc_por0;
+	uint	pcmc_pbr1;
+	uint	pcmc_por1;
+	uint	pcmc_pbr2;
+	uint	pcmc_por2;
+	uint	pcmc_pbr3;
+	uint	pcmc_por3;
+	uint	pcmc_pbr4;
+	uint	pcmc_por4;
+	uint	pcmc_pbr5;
+	uint	pcmc_por5;
+	uint	pcmc_pbr6;
+	uint	pcmc_por6;
+	uint	pcmc_pbr7;
+	uint	pcmc_por7;
+	char	res1[0x20];
+	uint	pcmc_pgcra;
+	uint	pcmc_pgcrb;
+	uint	pcmc_pscr;
+	char	res2[4];
+	uint	pcmc_pipr;
+	char	res3[4];
+	uint	pcmc_per;
+	char	res4[4];
+} pcmconf8xx_t;
+
+/* Memory controller registers.
+*/
+typedef struct	mem_ctlr {
+	uint	memc_br0;
+	uint	memc_or0;
+	uint	memc_br1;
+	uint	memc_or1;
+	uint	memc_br2;
+	uint	memc_or2;
+	uint	memc_br3;
+	uint	memc_or3;
+	uint	memc_br4;
+	uint	memc_or4;
+	uint	memc_br5;
+	uint	memc_or5;
+	uint	memc_br6;
+	uint	memc_or6;
+	uint	memc_br7;
+	uint	memc_or7;
+	char	res1[0x24];
+	uint	memc_mar;
+	uint	memc_mcr;
+	char	res2[4];
+	uint	memc_mamr;
+	uint	memc_mbmr;
+	ushort	memc_mstat;
+	ushort	memc_mptpr;
+	uint	memc_mdr;
+	char	res3[0x80];
+} memctl8xx_t;
+
+/*-----------------------------------------------------------------------
+ * BR - Memory Controller: Base Register					16-9
+ */
+#define BR_BA_MSK	0xffff8000	/* Base Address Mask			*/
+#define BR_AT_MSK	0x00007000	/* Address Type Mask			*/
+#define BR_PS_MSK	0x00000c00	/* Port Size Mask			*/
+#define BR_PS_32	0x00000000	/* 32 bit port size			*/
+#define BR_PS_16	0x00000800	/* 16 bit port size			*/
+#define BR_PS_8		0x00000400	/*  8 bit port size			*/
+#define BR_PARE		0x00000200	/* Parity Enable			*/
+#define BR_WP		0x00000100	/* Write Protect			*/
+#define BR_MS_MSK	0x000000c0	/* Machine Select Mask			*/
+#define BR_MS_GPCM	0x00000000	/* G.P.C.M. Machine Select		*/
+#define BR_MS_UPMA	0x00000080	/* U.P.M.A Machine Select		*/
+#define BR_MS_UPMB	0x000000c0	/* U.P.M.B Machine Select		*/
+#define BR_V		0x00000001	/* Bank Valid				*/
+
+/*-----------------------------------------------------------------------
+ * OR - Memory Controller: Option Register				16-11
+ */
+#define OR_AM_MSK	0xffff8000	/* Address Mask Mask			*/
+#define OR_ATM_MSK	0x00007000	/* Address Type Mask Mask		*/
+#define OR_CSNT_SAM	0x00000800	/* Chip Select Negation Time/ Start	*/
+					/* Address Multiplex			*/
+#define OR_ACS_MSK	0x00000600	/* Address to Chip Select Setup mask	*/
+#define OR_ACS_DIV1	0x00000000	/* CS is output at the same time	*/
+#define OR_ACS_DIV4	0x00000400	/* CS is output 1/4 a clock later	*/
+#define OR_ACS_DIV2	0x00000600	/* CS is output 1/2 a clock later	*/
+#define OR_G5LA		0x00000400	/* Output #GPL5 on #GPL_A5		*/
+#define OR_G5LS		0x00000200	/* Drive #GPL high on falling edge of...*/
+#define OR_BI		0x00000100	/* Burst inhibit			*/
+#define OR_SCY_MSK	0x000000f0	/* Cycle Length in Clocks		*/
+#define OR_SCY_0_CLK	0x00000000	/* 0 clock cycles wait states		*/
+#define OR_SCY_1_CLK	0x00000010	/* 1 clock cycles wait states		*/
+#define OR_SCY_2_CLK	0x00000020	/* 2 clock cycles wait states		*/
+#define OR_SCY_3_CLK	0x00000030	/* 3 clock cycles wait states		*/
+#define OR_SCY_4_CLK	0x00000040	/* 4 clock cycles wait states		*/
+#define OR_SCY_5_CLK	0x00000050	/* 5 clock cycles wait states		*/
+#define OR_SCY_6_CLK	0x00000060	/* 6 clock cycles wait states		*/
+#define OR_SCY_7_CLK	0x00000070	/* 7 clock cycles wait states		*/
+#define OR_SCY_8_CLK	0x00000080	/* 8 clock cycles wait states		*/
+#define OR_SCY_9_CLK	0x00000090	/* 9 clock cycles wait states		*/
+#define OR_SCY_10_CLK	0x000000a0	/* 10 clock cycles wait states		*/
+#define OR_SCY_11_CLK	0x000000b0	/* 11 clock cycles wait states		*/
+#define OR_SCY_12_CLK	0x000000c0	/* 12 clock cycles wait states		*/
+#define OR_SCY_13_CLK	0x000000d0	/* 13 clock cycles wait states		*/
+#define OR_SCY_14_CLK	0x000000e0	/* 14 clock cycles wait states		*/
+#define OR_SCY_15_CLK	0x000000f0	/* 15 clock cycles wait states		*/
+#define OR_SETA		0x00000008	/* External Transfer Acknowledge	*/
+#define OR_TRLX		0x00000004	/* Timing Relaxed			*/
+#define OR_EHTR		0x00000002	/* Extended Hold Time on Read		*/
+
+/* System Integration Timers.
+*/
+typedef struct	sys_int_timers {
+	ushort	sit_tbscr;
+	char	res0[0x02];
+	uint	sit_tbreff0;
+	uint	sit_tbreff1;
+	char	res1[0x14];
+	ushort	sit_rtcsc;
+	char	res2[0x02];
+	uint	sit_rtc;
+	uint	sit_rtsec;
+	uint	sit_rtcal;
+	char	res3[0x10];
+	ushort	sit_piscr;
+	char	res4[2];
+	uint	sit_pitc;
+	uint	sit_pitr;
+	char	res5[0x34];
+} sit8xx_t;
+
+#define TBSCR_TBIRQ_MASK	((ushort)0xff00)
+#define TBSCR_REFA		((ushort)0x0080)
+#define TBSCR_REFB		((ushort)0x0040)
+#define TBSCR_REFAE		((ushort)0x0008)
+#define TBSCR_REFBE		((ushort)0x0004)
+#define TBSCR_TBF		((ushort)0x0002)
+#define TBSCR_TBE		((ushort)0x0001)
+
+#define RTCSC_RTCIRQ_MASK	((ushort)0xff00)
+#define RTCSC_SEC		((ushort)0x0080)
+#define RTCSC_ALR		((ushort)0x0040)
+#define RTCSC_38K		((ushort)0x0010)
+#define RTCSC_SIE		((ushort)0x0008)
+#define RTCSC_ALE		((ushort)0x0004)
+#define RTCSC_RTF		((ushort)0x0002)
+#define RTCSC_RTE		((ushort)0x0001)
+
+#define PISCR_PIRQ_MASK		((ushort)0xff00)
+#define PISCR_PS		((ushort)0x0080)
+#define PISCR_PIE		((ushort)0x0004)
+#define PISCR_PTF		((ushort)0x0002)
+#define PISCR_PTE		((ushort)0x0001)
+
+/* Clocks and Reset.
+*/
+typedef struct clk_and_reset {
+	uint	car_sccr;
+	uint	car_plprcr;
+	uint	car_rsr;
+	char	res[0x74];        /* Reserved area                  */
+} car8xx_t;
+
+/* System Integration Timers keys.
+*/
+typedef struct sitk {
+	uint	sitk_tbscrk;
+	uint	sitk_tbreff0k;
+	uint	sitk_tbreff1k;
+	uint	sitk_tbk;
+	char	res1[0x10];
+	uint	sitk_rtcsck;
+	uint	sitk_rtck;
+	uint	sitk_rtseck;
+	uint	sitk_rtcalk;
+	char	res2[0x10];
+	uint	sitk_piscrk;
+	uint	sitk_pitck;
+	char	res3[0x38];
+} sitk8xx_t;
+
+/* Clocks and reset keys.
+*/
+typedef struct cark {
+	uint	cark_sccrk;
+	uint	cark_plprcrk;
+	uint	cark_rsrk;
+	char	res[0x474];
+} cark8xx_t;
+
+/* The key to unlock registers maintained by keep-alive power.
+*/
+#define KAPWR_KEY	((unsigned int)0x55ccaa33)
+
+/* Video interface.  MPC823 Only.
+*/
+typedef struct vid823 {
+	ushort	vid_vccr;
+	ushort	res1;
+	u_char	vid_vsr;
+	u_char	res2;
+	u_char	vid_vcmr;
+	u_char	res3;
+	uint	vid_vbcb;
+	uint	res4;
+	uint	vid_vfcr0;
+	uint	vid_vfaa0;
+	uint	vid_vfba0;
+	uint	vid_vfcr1;
+	uint	vid_vfaa1;
+	uint	vid_vfba1;
+	u_char	res5[0x18];
+} vid823_t;
+
+/* LCD interface.  823 Only.
+*/
+typedef struct lcd {
+	uint	lcd_lccr;
+	uint	lcd_lchcr;
+	uint	lcd_lcvcr;
+	char	res1[4];
+	uint	lcd_lcfaa;
+	uint	lcd_lcfba;
+	char	lcd_lcsr;
+	char	res2[0x7];
+} lcd823_t;
+
+/* I2C
+*/
+typedef struct i2c {
+	u_char	i2c_i2mod;
+	char	res1[3];
+	u_char	i2c_i2add;
+	char	res2[3];
+	u_char	i2c_i2brg;
+	char	res3[3];
+	u_char	i2c_i2com;
+	char	res4[3];
+	u_char	i2c_i2cer;
+	char	res5[3];
+	u_char	i2c_i2cmr;
+	char	res6[0x8b];
+} i2c8xx_t;
+
+/* DMA control/status registers.
+*/
+typedef struct sdma_csr {
+	char	res1[4];
+	uint	sdma_sdar;
+	u_char	sdma_sdsr;
+	char	res3[3];
+	u_char	sdma_sdmr;
+	char	res4[3];
+	u_char	sdma_idsr1;
+	char	res5[3];
+	u_char	sdma_idmr1;
+	char	res6[3];
+	u_char	sdma_idsr2;
+	char	res7[3];
+	u_char	sdma_idmr2;
+	char	res8[0x13];
+} sdma8xx_t;
+
+/* Communication Processor Module Interrupt Controller.
+*/
+typedef struct cpm_ic {
+	ushort	cpic_civr;
+	char	res[0xe];
+	uint	cpic_cicr;
+	uint	cpic_cipr;
+	uint	cpic_cimr;
+	uint	cpic_cisr;
+} cpic8xx_t;
+
+/* Input/Output Port control/status registers.
+*/
+typedef struct io_port {
+	ushort	iop_padir;
+	ushort	iop_papar;
+	ushort	iop_paodr;
+	ushort	iop_padat;
+	char	res1[8];
+	ushort	iop_pcdir;
+	ushort	iop_pcpar;
+	ushort	iop_pcso;
+	ushort	iop_pcdat;
+	ushort	iop_pcint;
+	char	res2[6];
+	ushort	iop_pddir;
+	ushort	iop_pdpar;
+	char	res3[2];
+	ushort	iop_pddat;
+	uint	utmode;
+	char	res4[4];
+} iop8xx_t;
+
+/* Communication Processor Module Timers
+*/
+typedef struct cpm_timers {
+	ushort	cpmt_tgcr;
+	char	res1[0xe];
+	ushort	cpmt_tmr1;
+	ushort	cpmt_tmr2;
+	ushort	cpmt_trr1;
+	ushort	cpmt_trr2;
+	ushort	cpmt_tcr1;
+	ushort	cpmt_tcr2;
+	ushort	cpmt_tcn1;
+	ushort	cpmt_tcn2;
+	ushort	cpmt_tmr3;
+	ushort	cpmt_tmr4;
+	ushort	cpmt_trr3;
+	ushort	cpmt_trr4;
+	ushort	cpmt_tcr3;
+	ushort	cpmt_tcr4;
+	ushort	cpmt_tcn3;
+	ushort	cpmt_tcn4;
+	ushort	cpmt_ter1;
+	ushort	cpmt_ter2;
+	ushort	cpmt_ter3;
+	ushort	cpmt_ter4;
+	char	res2[8];
+} cpmtimer8xx_t;
+
+/* Finally, the Communication Processor stuff.....
+*/
+typedef struct scc {		/* Serial communication channels */
+	uint	scc_gsmrl;
+	uint	scc_gsmrh;
+	ushort	scc_psmr;
+	char	res1[2];
+	ushort	scc_todr;
+	ushort	scc_dsr;
+	ushort	scc_scce;
+	char	res2[2];
+	ushort	scc_sccm;
+	char	res3;
+	u_char	scc_sccs;
+	char	res4[8];
+} scc_t;
+
+typedef struct smc {		/* Serial management channels */
+	char	res1[2];
+	ushort	smc_smcmr;
+	char	res2[2];
+	u_char	smc_smce;
+	char	res3[3];
+	u_char	smc_smcm;
+	char	res4[5];
+} smc_t;
+
+/* MPC860T Fast Ethernet Controller.  It isn't part of the CPM, but
+ * it fits within the address space.
+ */
+
+typedef struct fec {
+	uint	fec_addr_low;		/* lower 32 bits of station address	*/
+	ushort	fec_addr_high;		/* upper 16 bits of station address	*/
+	ushort	res1;			/* reserved				*/
+	uint	fec_grp_hash_table_high;	/* upper 32-bits of hash table		*/
+	uint	fec_grp_hash_table_low;	/* lower 32-bits of hash table		*/
+	uint	fec_r_des_start;	/* beginning of Rx descriptor ring	*/
+	uint	fec_x_des_start;	/* beginning of Tx descriptor ring	*/
+	uint	fec_r_buff_size;	/* Rx buffer size			*/
+	uint	res2[9];		/* reserved				*/
+	uint	fec_ecntrl;		/* ethernet control register		*/
+	uint	fec_ievent;		/* interrupt event register		*/
+	uint	fec_imask;		/* interrupt mask register		*/
+	uint	fec_ivec;		/* interrupt level and vector status	*/
+	uint	fec_r_des_active;	/* Rx ring updated flag			*/
+	uint	fec_x_des_active;	/* Tx ring updated flag			*/
+	uint	res3[10];		/* reserved				*/
+	uint	fec_mii_data;		/* MII data register			*/
+	uint	fec_mii_speed;		/* MII speed control register		*/
+	uint	res4[17];		/* reserved				*/
+	uint	fec_r_bound;		/* end of RAM (read-only)		*/
+	uint	fec_r_fstart;		/* Rx FIFO start address		*/
+	uint	res5[6];		/* reserved				*/
+	uint	fec_x_fstart;		/* Tx FIFO start address		*/
+	uint	res6[17];		/* reserved				*/
+	uint	fec_fun_code;		/* fec SDMA function code		*/
+	uint	res7[3];		/* reserved				*/
+	uint	fec_r_cntrl;		/* Rx control register			*/
+	uint	fec_r_hash;		/* Rx hash register			*/
+	uint	res8[14];		/* reserved				*/
+	uint	fec_x_cntrl;		/* Tx control register			*/
+	uint	res9[0x1e];		/* reserved				*/
+} fec_t;
+
+/* The FEC and LCD color map share the same address space....
+ * I guess we will never see an 823T :-).
+ */
+union fec_lcd {
+	fec_t	fl_un_fec;
+	u_char	fl_un_cmap[0x200];
+};
+
+typedef struct comm_proc {
+	/* General control and status registers.
+	*/
+	ushort	cp_cpcr;
+	u_char	res1[2];
+	ushort	cp_rccr;
+	u_char	res2;
+	u_char	cp_rmds;
+	u_char	res3[4];
+	ushort	cp_cpmcr1;
+	ushort	cp_cpmcr2;
+	ushort	cp_cpmcr3;
+	ushort	cp_cpmcr4;
+	u_char	res4[2];
+	ushort	cp_rter;
+	u_char	res5[2];
+	ushort	cp_rtmr;
+	u_char	res6[0x14];
+
+	/* Baud rate generators.
+	*/
+	uint	cp_brgc1;
+	uint	cp_brgc2;
+	uint	cp_brgc3;
+	uint	cp_brgc4;
+
+	/* Serial Communication Channels.
+	*/
+	scc_t	cp_scc[4];
+
+	/* Serial Management Channels.
+	*/
+	smc_t	cp_smc[2];
+
+	/* Serial Peripheral Interface.
+	*/
+	ushort	cp_spmode;
+	u_char	res7[4];
+	u_char	cp_spie;
+	u_char	res8[3];
+	u_char	cp_spim;
+	u_char	res9[2];
+	u_char	cp_spcom;
+	u_char	res10[2];
+
+	/* Parallel Interface Port.
+	*/
+	u_char	res11[2];
+	ushort	cp_pipc;
+	u_char	res12[2];
+	ushort	cp_ptpr;
+	uint	cp_pbdir;
+	uint	cp_pbpar;
+	u_char	res13[2];
+	ushort	cp_pbodr;
+	uint	cp_pbdat;
+
+	/* Port E - MPC87x/88x only.
+	 */
+	uint	cp_pedir;
+	uint	cp_pepar;
+	uint	cp_peso;
+	uint	cp_peodr;
+	uint	cp_pedat;
+
+	/* Communications Processor Timing Register -
+	   Contains RMII Timing for the FECs on MPC87x/88x only.
+	*/
+	uint	cp_cptr;
+
+	/* Serial Interface and Time Slot Assignment.
+	*/
+	uint	cp_simode;
+	u_char	cp_sigmr;
+	u_char	res15;
+	u_char	cp_sistr;
+	u_char	cp_sicmr;
+	u_char	res16[4];
+	uint	cp_sicr;
+	uint	cp_sirp;
+	u_char	res17[0xc];
+
+	/* 256 bytes of MPC823 video controller RAM array.
+	*/
+	u_char	cp_vcram[0x100];
+	u_char	cp_siram[0x200];
+
+	/* The fast ethernet controller is not really part of the CPM,
+	 * but it resides in the address space.
+	 * The LCD color map is also here.
+	 */
+	union	fec_lcd	fl_un;
+#define cp_fec		fl_un.fl_un_fec
+#define lcd_cmap	fl_un.fl_un_cmap
+	char	res18[0xE00];
+
+	/* The DUET family has a second FEC here */
+	fec_t	cp_fec2;
+#define cp_fec1	cp_fec	/* consistency macro */
+
+	/* Dual Ported RAM follows.
+	 * There are many different formats for this memory area
+	 * depending upon the devices used and options chosen.
+	 * Some processors don't have all of it populated.
+	 */
+	u_char	cp_dpmem[0x1C00];	/* BD / Data / ucode */
+	u_char	cp_dparam[0x400];	/* Parameter RAM */
+} cpm8xx_t;
+
+/* Internal memory map.
+*/
+typedef struct immap {
+	sysconf8xx_t	im_siu_conf;	/* SIU Configuration */
+	pcmconf8xx_t	im_pcmcia;	/* PCMCIA Configuration */
+	memctl8xx_t	im_memctl;	/* Memory Controller */
+	sit8xx_t	im_sit;		/* System integration timers */
+	car8xx_t	im_clkrst;	/* Clocks and reset */
+	sitk8xx_t	im_sitk;	/* Sys int timer keys */
+	cark8xx_t	im_clkrstk;	/* Clocks and reset keys */
+	vid823_t	im_vid;		/* Video (823 only) */
+	lcd823_t	im_lcd;		/* LCD (823 only) */
+	i2c8xx_t	im_i2c;		/* I2C control/status */
+	sdma8xx_t	im_sdma;	/* SDMA control/status */
+	cpic8xx_t	im_cpic;	/* CPM Interrupt Controller */
+	iop8xx_t	im_ioport;	/* IO Port control/status */
+	cpmtimer8xx_t	im_cpmtimer;	/* CPM timers */
+	cpm8xx_t	im_cpm;		/* Communication processor */
+} immap_t;
+
+extern immap_t __iomem *mpc8xx_immr;
+
+#endif /* __IMMAP_8XX__ */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
new file mode 100644
index 0000000000..61a8d5555c
--- /dev/null
+++ b/arch/powerpc/include/asm/Kbuild
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+generated-y += syscall_table_32.h
+generated-y += syscall_table_64.h
+generated-y += syscall_table_spu.h
+generic-y += agp.h
+generic-y += kvm_types.h
+generic-y += mcs_spinlock.h
+generic-y += qrwlock.h
+generic-y += vtime.h
+generic-y += early_ioremap.h
diff --git a/arch/powerpc/include/asm/accounting.h b/arch/powerpc/include/asm/accounting.h
new file mode 100644
index 0000000000..6d79c31700
--- /dev/null
+++ b/arch/powerpc/include/asm/accounting.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Common time accounting prototypes and such for all ppc machines.
+ */
+
+#ifndef __POWERPC_ACCOUNTING_H
+#define __POWERPC_ACCOUNTING_H
+
+/* Stuff for accurate time accounting */
+struct cpu_accounting_data {
+	/* Accumulated cputime values to flush on ticks*/
+	unsigned long utime;
+	unsigned long stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+	unsigned long utime_scaled;
+	unsigned long stime_scaled;
+#endif
+	unsigned long gtime;
+	unsigned long hardirq_time;
+	unsigned long softirq_time;
+	unsigned long steal_time;
+	unsigned long idle_time;
+	/* Internal counters */
+	unsigned long starttime;	/* TB value snapshot */
+	unsigned long starttime_user;	/* TB value on exit to usermode */
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+	unsigned long startspurr;	/* SPURR value snapshot */
+	unsigned long utime_sspurr;	/* ->user_time when ->startspurr set */
+#endif
+};
+
+#endif
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
new file mode 100644
index 0000000000..51b093f675
--- /dev/null
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_ARCHRANDOM_H
+#define _ASM_POWERPC_ARCHRANDOM_H
+
+static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs)
+{
+	return 0;
+}
+
+size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs);
+
+#ifdef CONFIG_PPC_POWERNV
+int pnv_get_random_long(unsigned long *v);
+#endif
+
+#endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h
new file mode 100644
index 0000000000..2bc53c646c
--- /dev/null
+++ b/arch/powerpc/include/asm/asm-compat.h
@@ -0,0 +1,64 @@
+#ifndef _ASM_POWERPC_ASM_COMPAT_H
+#define _ASM_POWERPC_ASM_COMPAT_H
+
+#include <asm/asm-const.h>
+#include <asm/types.h>
+#include <asm/ppc-opcode.h>
+
+#ifdef __powerpc64__
+
+/* operations for longs and pointers */
+#define PPC_LL		stringify_in_c(ld)
+#define PPC_STL		stringify_in_c(std)
+#define PPC_STLU	stringify_in_c(stdu)
+#define PPC_LCMPI	stringify_in_c(cmpdi)
+#define PPC_LCMPLI	stringify_in_c(cmpldi)
+#define PPC_LCMP	stringify_in_c(cmpd)
+#define PPC_LONG	stringify_in_c(.8byte)
+#define PPC_LONG_ALIGN	stringify_in_c(.balign 8)
+#define PPC_TLNEI	stringify_in_c(tdnei)
+#define PPC_LLARX	stringify_in_c(ldarx)
+#define PPC_STLCX	stringify_in_c(stdcx.)
+#define PPC_CNTLZL	stringify_in_c(cntlzd)
+#define PPC_MTOCRF(FXM, RS) MTOCRF((FXM), RS)
+#define PPC_SRL		stringify_in_c(srd)
+#define PPC_LR_STKOFF	16
+#define PPC_MIN_STKFRM	112
+
+#ifdef __BIG_ENDIAN__
+#define LHZX_BE	stringify_in_c(lhzx)
+#define LWZX_BE	stringify_in_c(lwzx)
+#define LDX_BE	stringify_in_c(ldx)
+#define STWX_BE	stringify_in_c(stwx)
+#define STDX_BE	stringify_in_c(stdx)
+#else
+#define LHZX_BE	stringify_in_c(lhbrx)
+#define LWZX_BE	stringify_in_c(lwbrx)
+#define LDX_BE	stringify_in_c(ldbrx)
+#define STWX_BE	stringify_in_c(stwbrx)
+#define STDX_BE	stringify_in_c(stdbrx)
+#endif
+
+#else /* 32-bit */
+
+/* operations for longs and pointers */
+#define PPC_LL		stringify_in_c(lwz)
+#define PPC_STL		stringify_in_c(stw)
+#define PPC_STLU	stringify_in_c(stwu)
+#define PPC_LCMPI	stringify_in_c(cmpwi)
+#define PPC_LCMPLI	stringify_in_c(cmplwi)
+#define PPC_LCMP	stringify_in_c(cmpw)
+#define PPC_LONG	stringify_in_c(.long)
+#define PPC_LONG_ALIGN	stringify_in_c(.balign 4)
+#define PPC_TLNEI	stringify_in_c(twnei)
+#define PPC_LLARX	stringify_in_c(lwarx)
+#define PPC_STLCX	stringify_in_c(stwcx.)
+#define PPC_CNTLZL	stringify_in_c(cntlzw)
+#define PPC_MTOCRF	stringify_in_c(mtcrf)
+#define PPC_SRL		stringify_in_c(srw)
+#define PPC_LR_STKOFF	4
+#define PPC_MIN_STKFRM	16
+
+#endif
+
+#endif /* _ASM_POWERPC_ASM_COMPAT_H */
diff --git a/arch/powerpc/include/asm/asm-const.h b/arch/powerpc/include/asm/asm-const.h
new file mode 100644
index 0000000000..bfb3c35348
--- /dev/null
+++ b/arch/powerpc/include/asm/asm-const.h
@@ -0,0 +1,15 @@
+#ifndef _ASM_POWERPC_ASM_CONST_H
+#define _ASM_POWERPC_ASM_CONST_H
+
+#ifdef __ASSEMBLY__
+#  define stringify_in_c(...)	__VA_ARGS__
+#  define ASM_CONST(x)		x
+#else
+/* This version of stringify will deal with commas... */
+#  define __stringify_in_c(...)	#__VA_ARGS__
+#  define stringify_in_c(...)	__stringify_in_c(__VA_ARGS__) " "
+#  define __ASM_CONST(x)	x##UL
+#  define ASM_CONST(x)		__ASM_CONST(x)
+#endif
+
+#endif /* _ASM_POWERPC_ASM_CONST_H */
diff --git a/arch/powerpc/include/asm/asm-offsets.h b/arch/powerpc/include/asm/asm-offsets.h
new file mode 100644
index 0000000000..d370ee36a1
--- /dev/null
+++ b/arch/powerpc/include/asm/asm-offsets.h
@@ -0,0 +1 @@
+#include <generated/asm-offsets.h>
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
new file mode 100644
index 0000000000..274bce76f5
--- /dev/null
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_ASM_PROTOTYPES_H
+#define _ASM_POWERPC_ASM_PROTOTYPES_H
+/*
+ * This file is for C prototypes of asm symbols that are EXPORTed.
+ * It allows the modversions logic to see their prototype and
+ * generate proper CRCs for them.
+ *
+ * Copyright 2016, Daniel Axtens, IBM Corporation.
+ */
+
+#include <linux/threads.h>
+#include <asm/cacheflush.h>
+#include <asm/checksum.h>
+#include <linux/uaccess.h>
+#include <asm/epapr_hcalls.h>
+#include <asm/dcr.h>
+#include <asm/mmu_context.h>
+#include <asm/ultravisor-api.h>
+
+#include <uapi/asm/ucontext.h>
+
+/* Ultravisor */
+#if defined(CONFIG_PPC_POWERNV) || defined(CONFIG_PPC_SVM)
+long ucall_norets(unsigned long opcode, ...);
+#else
+static inline long ucall_norets(unsigned long opcode, ...)
+{
+	return U_NOT_AVAILABLE;
+}
+#endif
+
+/* OPAL */
+int64_t __opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
+		    int64_t a4, int64_t a5, int64_t a6, int64_t a7,
+		    int64_t opcode, uint64_t msr);
+
+/* misc runtime */
+void enable_machine_check(void);
+extern u64 __bswapdi2(u64);
+extern s64 __lshrdi3(s64, int);
+extern s64 __ashldi3(s64, int);
+extern s64 __ashrdi3(s64, int);
+extern int __cmpdi2(s64, s64);
+extern int __ucmpdi2(u64, u64);
+
+/* tracing */
+void _mcount(void);
+
+/* Transaction memory related */
+void tm_enable(void);
+void tm_disable(void);
+void tm_abort(uint8_t cause);
+
+struct kvm_vcpu;
+void _kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
+void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
+void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
+#else
+static inline void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
+				     bool preserve_nv) { }
+static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
+					bool preserve_nv) { }
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
+
+long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
+long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr,
+			unsigned long dabrx);
+
+#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/asm.h b/arch/powerpc/include/asm/asm.h
new file mode 100644
index 0000000000..86f46b604e
--- /dev/null
+++ b/arch/powerpc/include/asm/asm.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_ASM_H
+#define _ASM_POWERPC_ASM_H
+
+#define _ASM_PTR	" .long "
+
+#endif /* _ASM_POWERPC_ASM_H */
diff --git a/arch/powerpc/include/asm/async_tx.h b/arch/powerpc/include/asm/async_tx.h
new file mode 100644
index 0000000000..a14758426d
--- /dev/null
+++ b/arch/powerpc/include/asm/async_tx.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2008-2009 DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ */
+#ifndef _ASM_POWERPC_ASYNC_TX_H_
+#define _ASM_POWERPC_ASYNC_TX_H_
+
+#if defined(CONFIG_440SPe) || defined(CONFIG_440SP)
+extern struct dma_chan *
+ppc440spe_async_tx_find_best_channel(enum dma_transaction_type cap,
+	struct page **dst_lst, int dst_cnt, struct page **src_lst,
+	int src_cnt, size_t src_sz);
+
+#define async_tx_find_channel(dep, cap, dst_lst, dst_cnt, src_lst, \
+			      src_cnt, src_sz) \
+	ppc440spe_async_tx_find_best_channel(cap, dst_lst, dst_cnt, src_lst, \
+					     src_cnt, src_sz)
+#else
+
+#define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \
+	__async_tx_find_channel(dep, type)
+
+struct dma_chan *
+__async_tx_find_channel(struct async_submit_ctl *submit,
+			enum dma_transaction_type tx_type);
+
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
new file mode 100644
index 0000000000..5bf6a4d492
--- /dev/null
+++ b/arch/powerpc/include/asm/atomic.h
@@ -0,0 +1,452 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_ATOMIC_H_
+#define _ASM_POWERPC_ATOMIC_H_
+
+/*
+ * PowerPC atomic operations
+ */
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#include <asm/cmpxchg.h>
+#include <asm/barrier.h>
+#include <asm/asm-const.h>
+
+/*
+ * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with
+ * a "bne-" instruction at the end, so an isync is enough as a acquire barrier
+ * on the platform without lwsync.
+ */
+#define __atomic_acquire_fence()					\
+	__asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory")
+
+#define __atomic_release_fence()					\
+	__asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory")
+
+static __inline__ int arch_atomic_read(const atomic_t *v)
+{
+	int t;
+
+	/* -mprefixed can generate offsets beyond range, fall back hack */
+	if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED))
+		__asm__ __volatile__("lwz %0,0(%1)" : "=r"(t) : "b"(&v->counter));
+	else
+		__asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m<>"(v->counter));
+
+	return t;
+}
+
+static __inline__ void arch_atomic_set(atomic_t *v, int i)
+{
+	/* -mprefixed can generate offsets beyond range, fall back hack */
+	if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED))
+		__asm__ __volatile__("stw %1,0(%2)" : "=m"(v->counter) : "r"(i), "b"(&v->counter));
+	else
+		__asm__ __volatile__("stw%U0%X0 %1,%0" : "=m<>"(v->counter) : "r"(i));
+}
+
+#define ATOMIC_OP(op, asm_op, suffix, sign, ...)			\
+static __inline__ void arch_atomic_##op(int a, atomic_t *v)		\
+{									\
+	int t;								\
+									\
+	__asm__ __volatile__(						\
+"1:	lwarx	%0,0,%3		# atomic_" #op "\n"			\
+	#asm_op "%I2" suffix " %0,%0,%2\n"				\
+"	stwcx.	%0,0,%3 \n"						\
+"	bne-	1b\n"							\
+	: "=&r" (t), "+m" (v->counter)					\
+	: "r"#sign (a), "r" (&v->counter)				\
+	: "cc", ##__VA_ARGS__);						\
+}									\
+
+#define ATOMIC_OP_RETURN_RELAXED(op, asm_op, suffix, sign, ...)		\
+static inline int arch_atomic_##op##_return_relaxed(int a, atomic_t *v)	\
+{									\
+	int t;								\
+									\
+	__asm__ __volatile__(						\
+"1:	lwarx	%0,0,%3		# atomic_" #op "_return_relaxed\n"	\
+	#asm_op "%I2" suffix " %0,%0,%2\n"				\
+"	stwcx.	%0,0,%3\n"						\
+"	bne-	1b\n"							\
+	: "=&r" (t), "+m" (v->counter)					\
+	: "r"#sign (a), "r" (&v->counter)				\
+	: "cc", ##__VA_ARGS__);						\
+									\
+	return t;							\
+}
+
+#define ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign, ...)		\
+static inline int arch_atomic_fetch_##op##_relaxed(int a, atomic_t *v)	\
+{									\
+	int res, t;							\
+									\
+	__asm__ __volatile__(						\
+"1:	lwarx	%0,0,%4		# atomic_fetch_" #op "_relaxed\n"	\
+	#asm_op "%I3" suffix " %1,%0,%3\n"				\
+"	stwcx.	%1,0,%4\n"						\
+"	bne-	1b\n"							\
+	: "=&r" (res), "=&r" (t), "+m" (v->counter)			\
+	: "r"#sign (a), "r" (&v->counter)				\
+	: "cc", ##__VA_ARGS__);						\
+									\
+	return res;							\
+}
+
+#define ATOMIC_OPS(op, asm_op, suffix, sign, ...)			\
+	ATOMIC_OP(op, asm_op, suffix, sign, ##__VA_ARGS__)		\
+	ATOMIC_OP_RETURN_RELAXED(op, asm_op, suffix, sign, ##__VA_ARGS__)\
+	ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign, ##__VA_ARGS__)
+
+ATOMIC_OPS(add, add, "c", I, "xer")
+ATOMIC_OPS(sub, sub, "c", I, "xer")
+
+#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
+
+#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, asm_op, suffix, sign)				\
+	ATOMIC_OP(op, asm_op, suffix, sign)				\
+	ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign)
+
+ATOMIC_OPS(and, and, ".", K)
+ATOMIC_OPS(or, or, "", K)
+ATOMIC_OPS(xor, xor, "", K)
+
+#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_or_relaxed  arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP_RELAXED
+#undef ATOMIC_OP_RETURN_RELAXED
+#undef ATOMIC_OP
+
+/**
+ * atomic_fetch_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns the old value of @v.
+ */
+static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
+{
+	int t;
+
+	__asm__ __volatile__ (
+	PPC_ATOMIC_ENTRY_BARRIER
+"1:	lwarx	%0,0,%1		# atomic_fetch_add_unless\n\
+	cmpw	0,%0,%3 \n\
+	beq	2f \n\
+	add%I2c	%0,%0,%2 \n"
+"	stwcx.	%0,0,%1 \n\
+	bne-	1b \n"
+	PPC_ATOMIC_EXIT_BARRIER
+"	sub%I2c	%0,%0,%2 \n\
+2:"
+	: "=&r" (t)
+	: "r" (&v->counter), "rI" (a), "r" (u)
+	: "cc", "memory", "xer");
+
+	return t;
+}
+#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
+
+/*
+ * Atomically test *v and decrement if it is greater than 0.
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+static __inline__ int arch_atomic_dec_if_positive(atomic_t *v)
+{
+	int t;
+
+	__asm__ __volatile__(
+	PPC_ATOMIC_ENTRY_BARRIER
+"1:	lwarx	%0,0,%1		# atomic_dec_if_positive\n\
+	cmpwi	%0,1\n\
+	addi	%0,%0,-1\n\
+	blt-	2f\n"
+"	stwcx.	%0,0,%1\n\
+	bne-	1b"
+	PPC_ATOMIC_EXIT_BARRIER
+	"\n\
+2:"	: "=&b" (t)
+	: "r" (&v->counter)
+	: "cc", "memory");
+
+	return t;
+}
+#define arch_atomic_dec_if_positive arch_atomic_dec_if_positive
+
+#ifdef __powerpc64__
+
+#define ATOMIC64_INIT(i)	{ (i) }
+
+static __inline__ s64 arch_atomic64_read(const atomic64_t *v)
+{
+	s64 t;
+
+	/* -mprefixed can generate offsets beyond range, fall back hack */
+	if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED))
+		__asm__ __volatile__("ld %0,0(%1)" : "=r"(t) : "b"(&v->counter));
+	else
+		__asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m<>"(v->counter));
+
+	return t;
+}
+
+static __inline__ void arch_atomic64_set(atomic64_t *v, s64 i)
+{
+	/* -mprefixed can generate offsets beyond range, fall back hack */
+	if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED))
+		__asm__ __volatile__("std %1,0(%2)" : "=m"(v->counter) : "r"(i), "b"(&v->counter));
+	else
+		__asm__ __volatile__("std%U0%X0 %1,%0" : "=m<>"(v->counter) : "r"(i));
+}
+
+#define ATOMIC64_OP(op, asm_op)						\
+static __inline__ void arch_atomic64_##op(s64 a, atomic64_t *v)		\
+{									\
+	s64 t;								\
+									\
+	__asm__ __volatile__(						\
+"1:	ldarx	%0,0,%3		# atomic64_" #op "\n"			\
+	#asm_op " %0,%2,%0\n"						\
+"	stdcx.	%0,0,%3 \n"						\
+"	bne-	1b\n"							\
+	: "=&r" (t), "+m" (v->counter)					\
+	: "r" (a), "r" (&v->counter)					\
+	: "cc");							\
+}
+
+#define ATOMIC64_OP_RETURN_RELAXED(op, asm_op)				\
+static inline s64							\
+arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v)		\
+{									\
+	s64 t;								\
+									\
+	__asm__ __volatile__(						\
+"1:	ldarx	%0,0,%3		# atomic64_" #op "_return_relaxed\n"	\
+	#asm_op " %0,%2,%0\n"						\
+"	stdcx.	%0,0,%3\n"						\
+"	bne-	1b\n"							\
+	: "=&r" (t), "+m" (v->counter)					\
+	: "r" (a), "r" (&v->counter)					\
+	: "cc");							\
+									\
+	return t;							\
+}
+
+#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op)				\
+static inline s64							\
+arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v)		\
+{									\
+	s64 res, t;							\
+									\
+	__asm__ __volatile__(						\
+"1:	ldarx	%0,0,%4		# atomic64_fetch_" #op "_relaxed\n"	\
+	#asm_op " %1,%3,%0\n"						\
+"	stdcx.	%1,0,%4\n"						\
+"	bne-	1b\n"							\
+	: "=&r" (res), "=&r" (t), "+m" (v->counter)			\
+	: "r" (a), "r" (&v->counter)					\
+	: "cc");							\
+									\
+	return res;							\
+}
+
+#define ATOMIC64_OPS(op, asm_op)					\
+	ATOMIC64_OP(op, asm_op)						\
+	ATOMIC64_OP_RETURN_RELAXED(op, asm_op)				\
+	ATOMIC64_FETCH_OP_RELAXED(op, asm_op)
+
+ATOMIC64_OPS(add, add)
+ATOMIC64_OPS(sub, subf)
+
+#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
+
+#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, asm_op)					\
+	ATOMIC64_OP(op, asm_op)						\
+	ATOMIC64_FETCH_OP_RELAXED(op, asm_op)
+
+ATOMIC64_OPS(and, and)
+ATOMIC64_OPS(or, or)
+ATOMIC64_OPS(xor, xor)
+
+#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_or_relaxed  arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
+
+#undef ATOPIC64_OPS
+#undef ATOMIC64_FETCH_OP_RELAXED
+#undef ATOMIC64_OP_RETURN_RELAXED
+#undef ATOMIC64_OP
+
+static __inline__ void arch_atomic64_inc(atomic64_t *v)
+{
+	s64 t;
+
+	__asm__ __volatile__(
+"1:	ldarx	%0,0,%2		# atomic64_inc\n\
+	addic	%0,%0,1\n\
+	stdcx.	%0,0,%2 \n\
+	bne-	1b"
+	: "=&r" (t), "+m" (v->counter)
+	: "r" (&v->counter)
+	: "cc", "xer");
+}
+#define arch_atomic64_inc arch_atomic64_inc
+
+static __inline__ s64 arch_atomic64_inc_return_relaxed(atomic64_t *v)
+{
+	s64 t;
+
+	__asm__ __volatile__(
+"1:	ldarx	%0,0,%2		# atomic64_inc_return_relaxed\n"
+"	addic	%0,%0,1\n"
+"	stdcx.	%0,0,%2\n"
+"	bne-	1b"
+	: "=&r" (t), "+m" (v->counter)
+	: "r" (&v->counter)
+	: "cc", "xer");
+
+	return t;
+}
+
+static __inline__ void arch_atomic64_dec(atomic64_t *v)
+{
+	s64 t;
+
+	__asm__ __volatile__(
+"1:	ldarx	%0,0,%2		# atomic64_dec\n\
+	addic	%0,%0,-1\n\
+	stdcx.	%0,0,%2\n\
+	bne-	1b"
+	: "=&r" (t), "+m" (v->counter)
+	: "r" (&v->counter)
+	: "cc", "xer");
+}
+#define arch_atomic64_dec arch_atomic64_dec
+
+static __inline__ s64 arch_atomic64_dec_return_relaxed(atomic64_t *v)
+{
+	s64 t;
+
+	__asm__ __volatile__(
+"1:	ldarx	%0,0,%2		# atomic64_dec_return_relaxed\n"
+"	addic	%0,%0,-1\n"
+"	stdcx.	%0,0,%2\n"
+"	bne-	1b"
+	: "=&r" (t), "+m" (v->counter)
+	: "r" (&v->counter)
+	: "cc", "xer");
+
+	return t;
+}
+
+#define arch_atomic64_inc_return_relaxed arch_atomic64_inc_return_relaxed
+#define arch_atomic64_dec_return_relaxed arch_atomic64_dec_return_relaxed
+
+/*
+ * Atomically test *v and decrement if it is greater than 0.
+ * The function returns the old value of *v minus 1.
+ */
+static __inline__ s64 arch_atomic64_dec_if_positive(atomic64_t *v)
+{
+	s64 t;
+
+	__asm__ __volatile__(
+	PPC_ATOMIC_ENTRY_BARRIER
+"1:	ldarx	%0,0,%1		# atomic64_dec_if_positive\n\
+	addic.	%0,%0,-1\n\
+	blt-	2f\n\
+	stdcx.	%0,0,%1\n\
+	bne-	1b"
+	PPC_ATOMIC_EXIT_BARRIER
+	"\n\
+2:"	: "=&r" (t)
+	: "r" (&v->counter)
+	: "cc", "xer", "memory");
+
+	return t;
+}
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
+
+/**
+ * atomic64_fetch_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns the old value of @v.
+ */
+static __inline__ s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+{
+	s64 t;
+
+	__asm__ __volatile__ (
+	PPC_ATOMIC_ENTRY_BARRIER
+"1:	ldarx	%0,0,%1		# atomic64_fetch_add_unless\n\
+	cmpd	0,%0,%3 \n\
+	beq	2f \n\
+	add	%0,%2,%0 \n"
+"	stdcx.	%0,0,%1 \n\
+	bne-	1b \n"
+	PPC_ATOMIC_EXIT_BARRIER
+"	subf	%0,%2,%0 \n\
+2:"
+	: "=&r" (t)
+	: "r" (&v->counter), "r" (a), "r" (u)
+	: "cc", "memory");
+
+	return t;
+}
+#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
+
+/**
+ * atomic_inc64_not_zero - increment unless the number is zero
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically increments @v by 1, so long as @v is non-zero.
+ * Returns non-zero if @v was non-zero, and zero otherwise.
+ */
+static __inline__ int arch_atomic64_inc_not_zero(atomic64_t *v)
+{
+	s64 t1, t2;
+
+	__asm__ __volatile__ (
+	PPC_ATOMIC_ENTRY_BARRIER
+"1:	ldarx	%0,0,%2		# atomic64_inc_not_zero\n\
+	cmpdi	0,%0,0\n\
+	beq-	2f\n\
+	addic	%1,%0,1\n\
+	stdcx.	%1,0,%2\n\
+	bne-	1b\n"
+	PPC_ATOMIC_EXIT_BARRIER
+	"\n\
+2:"
+	: "=&r" (t1), "=&r" (t2)
+	: "r" (&v->counter)
+	: "cc", "xer", "memory");
+
+	return t1 != 0;
+}
+#define arch_atomic64_inc_not_zero(v) arch_atomic64_inc_not_zero((v))
+
+#endif /* __powerpc64__ */
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_ATOMIC_H_ */
diff --git a/arch/powerpc/include/asm/backlight.h b/arch/powerpc/include/asm/backlight.h
new file mode 100644
index 0000000000..1b5eab62ed
--- /dev/null
+++ b/arch/powerpc/include/asm/backlight.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Routines for handling backlight control on PowerBooks
+ *
+ * For now, implementation resides in
+ * arch/powerpc/platforms/powermac/backlight.c
+ *
+ */
+#ifndef __ASM_POWERPC_BACKLIGHT_H
+#define __ASM_POWERPC_BACKLIGHT_H
+#ifdef __KERNEL__
+
+#include <linux/fb.h>
+#include <linux/mutex.h>
+
+/* For locking instructions, see the implementation file */
+extern struct backlight_device *pmac_backlight;
+extern struct mutex pmac_backlight_mutex;
+
+extern int pmac_backlight_curve_lookup(struct fb_info *info, int value);
+
+extern int pmac_has_backlight_type(const char *type);
+
+extern void pmac_backlight_key(int direction);
+static inline void pmac_backlight_key_up(void)
+{
+	pmac_backlight_key(0);
+}
+static inline void pmac_backlight_key_down(void)
+{
+	pmac_backlight_key(1);
+}
+
+extern void pmac_backlight_set_legacy_brightness_pmu(int brightness);
+extern int pmac_backlight_set_legacy_brightness(int brightness);
+extern int pmac_backlight_get_legacy_brightness(void);
+
+extern void pmac_backlight_enable(void);
+extern void pmac_backlight_disable(void);
+
+#endif /* __KERNEL__ */
+#endif
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
new file mode 100644
index 0000000000..b95b666f03
--- /dev/null
+++ b/arch/powerpc/include/asm/barrier.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ */
+#ifndef _ASM_POWERPC_BARRIER_H
+#define _ASM_POWERPC_BARRIER_H
+
+#include <asm/asm-const.h>
+
+#ifndef __ASSEMBLY__
+#include <asm/ppc-opcode.h>
+#endif
+
+/*
+ * Memory barrier.
+ * The sync instruction guarantees that all memory accesses initiated
+ * by this processor have been performed (with respect to all other
+ * mechanisms that access memory).  The eieio instruction is a barrier
+ * providing an ordering (separately) for (a) cacheable stores and (b)
+ * loads and stores to non-cacheable memory (e.g. I/O devices).
+ *
+ * mb() prevents loads and stores being reordered across this point.
+ * rmb() prevents loads being reordered across this point.
+ * wmb() prevents stores being reordered across this point.
+ *
+ * *mb() variants without smp_ prefix must order all types of memory
+ * operations with one another. sync is the only instruction sufficient
+ * to do this.
+ *
+ * For the smp_ barriers, ordering is for cacheable memory operations
+ * only. We have to use the sync instruction for smp_mb(), since lwsync
+ * doesn't order loads with respect to previous stores.  Lwsync can be
+ * used for smp_rmb() and smp_wmb().
+ *
+ * However, on CPUs that don't support lwsync, lwsync actually maps to a
+ * heavy-weight sync, so smp_wmb() can be a lighter-weight eieio.
+ */
+#define __mb()   __asm__ __volatile__ ("sync" : : : "memory")
+#define __rmb()  __asm__ __volatile__ ("sync" : : : "memory")
+#define __wmb()  __asm__ __volatile__ ("sync" : : : "memory")
+
+/* The sub-arch has lwsync */
+#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_E500MC)
+#    define SMPWMB      LWSYNC
+#elif defined(CONFIG_BOOKE)
+#    define SMPWMB      mbar
+#else
+#    define SMPWMB      eieio
+#endif
+
+/* clang defines this macro for a builtin, which will not work with runtime patching */
+#undef __lwsync
+#define __lwsync()	__asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
+#define __dma_rmb()	__lwsync()
+#define __dma_wmb()	__asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
+
+#define __smp_lwsync()	__lwsync()
+
+#define __smp_mb()	__mb()
+#define __smp_rmb()	__lwsync()
+#define __smp_wmb()	__asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
+
+/*
+ * This is a barrier which prevents following instructions from being
+ * started until the value of the argument x is known.  For example, if
+ * x is a variable loaded from memory, this prevents following
+ * instructions from being executed until the load has been performed.
+ */
+#define data_barrier(x)	\
+	asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory");
+
+#define __smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	__smp_lwsync();							\
+	WRITE_ONCE(*p, v);						\
+} while (0)
+
+#define __smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	__smp_lwsync();							\
+	___p1;								\
+})
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#define NOSPEC_BARRIER_SLOT   nop
+#elif defined(CONFIG_PPC_E500)
+#define NOSPEC_BARRIER_SLOT   nop; nop
+#endif
+
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+/*
+ * Prevent execution of subsequent instructions until preceding branches have
+ * been fully resolved and are no longer executing speculatively.
+ */
+#define barrier_nospec_asm NOSPEC_BARRIER_FIXUP_SECTION; NOSPEC_BARRIER_SLOT
+
+// This also acts as a compiler barrier due to the memory clobber.
+#define barrier_nospec() asm (stringify_in_c(barrier_nospec_asm) ::: "memory")
+
+#else /* !CONFIG_PPC_BARRIER_NOSPEC */
+#define barrier_nospec_asm
+#define barrier_nospec()
+#endif /* CONFIG_PPC_BARRIER_NOSPEC */
+
+/*
+ * pmem_wmb() ensures that all stores for which the modification
+ * are written to persistent storage by preceding dcbfps/dcbstps
+ * instructions have updated persistent storage before any data
+ * access or data transfer caused by subsequent instructions is
+ * initiated.
+ */
+#define pmem_wmb() __asm__ __volatile__(PPC_PHWSYNC ::: "memory")
+
+#include <asm-generic/barrier.h>
+
+#endif /* _ASM_POWERPC_BARRIER_H */
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
new file mode 100644
index 0000000000..7e0f032291
--- /dev/null
+++ b/arch/powerpc/include/asm/bitops.h
@@ -0,0 +1,346 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * PowerPC atomic bit operations.
+ *
+ * Merged version by David Gibson <david@gibson.dropbear.id.au>.
+ * Based on ppc64 versions by: Dave Engebretsen, Todd Inglett, Don
+ * Reed, Pat McCarthy, Peter Bergner, Anton Blanchard.  They
+ * originally took it from the ppc32 code.
+ *
+ * Within a word, bits are numbered LSB first.  Lot's of places make
+ * this assumption by directly testing bits with (val & (1<<nr)).
+ * This can cause confusion for large (> 1 word) bitmaps on a
+ * big-endian system because, unlike little endian, the number of each
+ * bit depends on the word size.
+ *
+ * The bitop functions are defined to work on unsigned longs, so for a
+ * ppc64 system the bits end up numbered:
+ *   |63..............0|127............64|191...........128|255...........192|
+ * and on ppc32:
+ *   |31.....0|63....32|95....64|127...96|159..128|191..160|223..192|255..224|
+ *
+ * There are a few little-endian macros used mostly for filesystem
+ * bitmaps, these work on similar bit arrays layouts, but
+ * byte-oriented:
+ *   |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56|
+ *
+ * The main difference is that bit 3-5 (64b) or 3-4 (32b) in the bit
+ * number field needs to be reversed compared to the big-endian bit
+ * fields. This can be achieved by XOR with 0x38 (64b) or 0x18 (32b).
+ */
+
+#ifndef _ASM_POWERPC_BITOPS_H
+#define _ASM_POWERPC_BITOPS_H
+
+#ifdef __KERNEL__
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <linux/compiler.h>
+#include <asm/asm-compat.h>
+#include <asm/synch.h>
+
+/* PPC bit number conversion */
+#define PPC_BITLSHIFT(be)	(BITS_PER_LONG - 1 - (be))
+#define PPC_BIT(bit)		(1UL << PPC_BITLSHIFT(bit))
+#define PPC_BITMASK(bs, be)	((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
+
+/* Put a PPC bit into a "normal" bit position */
+#define PPC_BITEXTRACT(bits, ppc_bit, dst_bit)			\
+	((((bits) >> PPC_BITLSHIFT(ppc_bit)) & 1) << (dst_bit))
+
+#define PPC_BITLSHIFT32(be)	(32 - 1 - (be))
+#define PPC_BIT32(bit)		(1UL << PPC_BITLSHIFT32(bit))
+#define PPC_BITMASK32(bs, be)	((PPC_BIT32(bs) - PPC_BIT32(be))|PPC_BIT32(bs))
+
+#define PPC_BITLSHIFT8(be)	(8 - 1 - (be))
+#define PPC_BIT8(bit)		(1UL << PPC_BITLSHIFT8(bit))
+#define PPC_BITMASK8(bs, be)	((PPC_BIT8(bs) - PPC_BIT8(be))|PPC_BIT8(bs))
+
+#include <asm/barrier.h>
+
+/* Macro for generating the ***_bits() functions */
+#define DEFINE_BITOP(fn, op, prefix)		\
+static inline void fn(unsigned long mask,	\
+		volatile unsigned long *_p)	\
+{						\
+	unsigned long old;			\
+	unsigned long *p = (unsigned long *)_p;	\
+	__asm__ __volatile__ (			\
+	prefix					\
+"1:"	PPC_LLARX "%0,0,%3,0\n"			\
+	#op "%I2 %0,%0,%2\n"			\
+	PPC_STLCX "%0,0,%3\n"			\
+	"bne- 1b\n"				\
+	: "=&r" (old), "+m" (*p)		\
+	: "rK" (mask), "r" (p)			\
+	: "cc", "memory");			\
+}
+
+DEFINE_BITOP(set_bits, or, "")
+DEFINE_BITOP(change_bits, xor, "")
+
+static __always_inline bool is_rlwinm_mask_valid(unsigned long x)
+{
+	if (!x)
+		return false;
+	if (x & 1)
+		x = ~x;	// make the mask non-wrapping
+	x += x & -x;	// adding the low set bit results in at most one bit set
+
+	return !(x & (x - 1));
+}
+
+#define DEFINE_CLROP(fn, prefix)					\
+static inline void fn(unsigned long mask, volatile unsigned long *_p)	\
+{									\
+	unsigned long old;						\
+	unsigned long *p = (unsigned long *)_p;				\
+									\
+	if (IS_ENABLED(CONFIG_PPC32) &&					\
+	    __builtin_constant_p(mask) && is_rlwinm_mask_valid(~mask)) {\
+		asm volatile (						\
+			prefix						\
+		"1:"	"lwarx	%0,0,%3\n"				\
+			"rlwinm	%0,%0,0,%2\n"				\
+			"stwcx.	%0,0,%3\n"				\
+			"bne- 1b\n"					\
+			: "=&r" (old), "+m" (*p)			\
+			: "n" (~mask), "r" (p)				\
+			: "cc", "memory");				\
+	} else {							\
+		asm volatile (						\
+			prefix						\
+		"1:"	PPC_LLARX "%0,0,%3,0\n"				\
+			"andc %0,%0,%2\n"				\
+			PPC_STLCX "%0,0,%3\n"				\
+			"bne- 1b\n"					\
+			: "=&r" (old), "+m" (*p)			\
+			: "r" (mask), "r" (p)				\
+			: "cc", "memory");				\
+	}								\
+}
+
+DEFINE_CLROP(clear_bits, "")
+DEFINE_CLROP(clear_bits_unlock, PPC_RELEASE_BARRIER)
+
+static inline void arch_set_bit(int nr, volatile unsigned long *addr)
+{
+	set_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
+}
+
+static inline void arch_clear_bit(int nr, volatile unsigned long *addr)
+{
+	clear_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
+}
+
+static inline void arch_clear_bit_unlock(int nr, volatile unsigned long *addr)
+{
+	clear_bits_unlock(BIT_MASK(nr), addr + BIT_WORD(nr));
+}
+
+static inline void arch_change_bit(int nr, volatile unsigned long *addr)
+{
+	change_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
+}
+
+/* Like DEFINE_BITOP(), with changes to the arguments to 'op' and the output
+ * operands. */
+#define DEFINE_TESTOP(fn, op, prefix, postfix, eh)	\
+static inline unsigned long fn(			\
+		unsigned long mask,			\
+		volatile unsigned long *_p)		\
+{							\
+	unsigned long old, t;				\
+	unsigned long *p = (unsigned long *)_p;		\
+	__asm__ __volatile__ (				\
+	prefix						\
+"1:"	PPC_LLARX "%0,0,%3,%4\n"			\
+	#op "%I2 %1,%0,%2\n"				\
+	PPC_STLCX "%1,0,%3\n"				\
+	"bne- 1b\n"					\
+	postfix						\
+	: "=&r" (old), "=&r" (t)			\
+	: "rK" (mask), "r" (p), "n" (eh)		\
+	: "cc", "memory");				\
+	return (old & mask);				\
+}
+
+DEFINE_TESTOP(test_and_set_bits, or, PPC_ATOMIC_ENTRY_BARRIER,
+	      PPC_ATOMIC_EXIT_BARRIER, 0)
+DEFINE_TESTOP(test_and_set_bits_lock, or, "",
+	      PPC_ACQUIRE_BARRIER, IS_ENABLED(CONFIG_PPC64))
+DEFINE_TESTOP(test_and_change_bits, xor, PPC_ATOMIC_ENTRY_BARRIER,
+	      PPC_ATOMIC_EXIT_BARRIER, 0)
+
+static inline unsigned long test_and_clear_bits(unsigned long mask, volatile unsigned long *_p)
+{
+	unsigned long old, t;
+	unsigned long *p = (unsigned long *)_p;
+
+	if (IS_ENABLED(CONFIG_PPC32) &&
+	    __builtin_constant_p(mask) && is_rlwinm_mask_valid(~mask)) {
+		asm volatile (
+			PPC_ATOMIC_ENTRY_BARRIER
+		"1:"	"lwarx %0,0,%3\n"
+			"rlwinm	%1,%0,0,%2\n"
+			"stwcx. %1,0,%3\n"
+			"bne- 1b\n"
+			PPC_ATOMIC_EXIT_BARRIER
+			: "=&r" (old), "=&r" (t)
+			: "n" (~mask), "r" (p)
+			: "cc", "memory");
+	} else {
+		asm volatile (
+			PPC_ATOMIC_ENTRY_BARRIER
+		"1:"	PPC_LLARX "%0,0,%3,0\n"
+			"andc	%1,%0,%2\n"
+			PPC_STLCX "%1,0,%3\n"
+			"bne- 1b\n"
+			PPC_ATOMIC_EXIT_BARRIER
+			: "=&r" (old), "=&r" (t)
+			: "r" (mask), "r" (p)
+			: "cc", "memory");
+	}
+
+	return (old & mask);
+}
+
+static inline int arch_test_and_set_bit(unsigned long nr,
+					volatile unsigned long *addr)
+{
+	return test_and_set_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
+}
+
+static inline int arch_test_and_set_bit_lock(unsigned long nr,
+					     volatile unsigned long *addr)
+{
+	return test_and_set_bits_lock(BIT_MASK(nr),
+				addr + BIT_WORD(nr)) != 0;
+}
+
+static inline int arch_test_and_clear_bit(unsigned long nr,
+					  volatile unsigned long *addr)
+{
+	return test_and_clear_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
+}
+
+static inline int arch_test_and_change_bit(unsigned long nr,
+					   volatile unsigned long *addr)
+{
+	return test_and_change_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
+}
+
+#ifdef CONFIG_PPC64
+static inline unsigned long
+clear_bit_unlock_return_word(int nr, volatile unsigned long *addr)
+{
+	unsigned long old, t;
+	unsigned long *p = (unsigned long *)addr + BIT_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+
+	__asm__ __volatile__ (
+	PPC_RELEASE_BARRIER
+"1:"	PPC_LLARX "%0,0,%3,0\n"
+	"andc %1,%0,%2\n"
+	PPC_STLCX "%1,0,%3\n"
+	"bne- 1b\n"
+	: "=&r" (old), "=&r" (t)
+	: "r" (mask), "r" (p)
+	: "cc", "memory");
+
+	return old;
+}
+
+/*
+ * This is a special function for mm/filemap.c
+ * Bit 7 corresponds to PG_waiters.
+ */
+#define arch_clear_bit_unlock_is_negative_byte(nr, addr)		\
+	(clear_bit_unlock_return_word(nr, addr) & BIT_MASK(7))
+
+#endif /* CONFIG_PPC64 */
+
+#include <asm-generic/bitops/non-atomic.h>
+
+static inline void arch___clear_bit_unlock(int nr, volatile unsigned long *addr)
+{
+	__asm__ __volatile__(PPC_RELEASE_BARRIER "" ::: "memory");
+	__clear_bit(nr, addr);
+}
+
+/*
+ * Return the zero-based bit position (LE, not IBM bit numbering) of
+ * the most significant 1-bit in a double word.
+ */
+#define __ilog2(x)	ilog2(x)
+
+#include <asm-generic/bitops/ffz.h>
+
+#include <asm-generic/bitops/builtin-__ffs.h>
+
+#include <asm-generic/bitops/builtin-ffs.h>
+
+/*
+ * fls: find last (most-significant) bit set.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+static __always_inline int fls(unsigned int x)
+{
+	int lz;
+
+	if (__builtin_constant_p(x))
+		return x ? 32 - __builtin_clz(x) : 0;
+	asm("cntlzw %0,%1" : "=r" (lz) : "r" (x));
+	return 32 - lz;
+}
+
+#include <asm-generic/bitops/builtin-__fls.h>
+
+/*
+ * 64-bit can do this using one cntlzd (count leading zeroes doubleword)
+ * instruction; for 32-bit we use the generic version, which does two
+ * 32-bit fls calls.
+ */
+#ifdef CONFIG_PPC64
+static __always_inline int fls64(__u64 x)
+{
+	int lz;
+
+	if (__builtin_constant_p(x))
+		return x ? 64 - __builtin_clzll(x) : 0;
+	asm("cntlzd %0,%1" : "=r" (lz) : "r" (x));
+	return 64 - lz;
+}
+#else
+#include <asm-generic/bitops/fls64.h>
+#endif
+
+#ifdef CONFIG_PPC64
+unsigned int __arch_hweight8(unsigned int w);
+unsigned int __arch_hweight16(unsigned int w);
+unsigned int __arch_hweight32(unsigned int w);
+unsigned long __arch_hweight64(__u64 w);
+#include <asm-generic/bitops/const_hweight.h>
+#else
+#include <asm-generic/bitops/hweight.h>
+#endif
+
+/* wrappers that deal with KASAN instrumentation */
+#include <asm-generic/bitops/instrumented-atomic.h>
+#include <asm-generic/bitops/instrumented-lock.h>
+
+/* Little-endian versions */
+#include <asm-generic/bitops/le.h>
+
+/* Bitmap functions for the ext2 filesystem */
+
+#include <asm-generic/bitops/ext2-atomic-setbit.h>
+
+#include <asm-generic/bitops/sched.h>
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_POWERPC_BITOPS_H */
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
new file mode 100644
index 0000000000..4e14a5427a
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_32_KUP_H
+#define _ASM_POWERPC_BOOK3S_32_KUP_H
+
+#include <asm/bug.h>
+#include <asm/book3s/32/mmu-hash.h>
+#include <asm/mmu.h>
+#include <asm/synch.h>
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_PPC_KUAP
+
+#include <linux/sched.h>
+
+#define KUAP_NONE	(~0UL)
+
+static __always_inline void kuap_lock_one(unsigned long addr)
+{
+	mtsr(mfsr(addr) | SR_KS, addr);
+	isync();	/* Context sync required after mtsr() */
+}
+
+static __always_inline void kuap_unlock_one(unsigned long addr)
+{
+	mtsr(mfsr(addr) & ~SR_KS, addr);
+	isync();	/* Context sync required after mtsr() */
+}
+
+static __always_inline void uaccess_begin_32s(unsigned long addr)
+{
+	unsigned long tmp;
+
+	asm volatile(ASM_MMU_FTR_IFSET(
+		"mfsrin %0, %1;"
+		"rlwinm %0, %0, 0, %2;"
+		"mtsrin %0, %1;"
+		"isync", "", %3)
+		: "=&r"(tmp)
+		: "r"(addr), "i"(~SR_KS), "i"(MMU_FTR_KUAP)
+		: "memory");
+}
+
+static __always_inline void uaccess_end_32s(unsigned long addr)
+{
+	unsigned long tmp;
+
+	asm volatile(ASM_MMU_FTR_IFSET(
+		"mfsrin %0, %1;"
+		"oris %0, %0, %2;"
+		"mtsrin %0, %1;"
+		"isync", "", %3)
+		: "=&r"(tmp)
+		: "r"(addr), "i"(SR_KS >> 16), "i"(MMU_FTR_KUAP)
+		: "memory");
+}
+
+static __always_inline void __kuap_save_and_lock(struct pt_regs *regs)
+{
+	unsigned long kuap = current->thread.kuap;
+
+	regs->kuap = kuap;
+	if (unlikely(kuap == KUAP_NONE))
+		return;
+
+	current->thread.kuap = KUAP_NONE;
+	kuap_lock_one(kuap);
+}
+#define __kuap_save_and_lock __kuap_save_and_lock
+
+static __always_inline void kuap_user_restore(struct pt_regs *regs)
+{
+}
+
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+{
+	if (unlikely(kuap != KUAP_NONE)) {
+		current->thread.kuap = KUAP_NONE;
+		kuap_lock_one(kuap);
+	}
+
+	if (likely(regs->kuap == KUAP_NONE))
+		return;
+
+	current->thread.kuap = regs->kuap;
+
+	kuap_unlock_one(regs->kuap);
+}
+
+static __always_inline unsigned long __kuap_get_and_assert_locked(void)
+{
+	unsigned long kuap = current->thread.kuap;
+
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != KUAP_NONE);
+
+	return kuap;
+}
+#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked
+
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+					      u32 size, unsigned long dir)
+{
+	BUILD_BUG_ON(!__builtin_constant_p(dir));
+
+	if (!(dir & KUAP_WRITE))
+		return;
+
+	current->thread.kuap = (__force u32)to;
+	uaccess_begin_32s((__force u32)to);
+}
+
+static __always_inline void prevent_user_access(unsigned long dir)
+{
+	u32 kuap = current->thread.kuap;
+
+	BUILD_BUG_ON(!__builtin_constant_p(dir));
+
+	if (!(dir & KUAP_WRITE))
+		return;
+
+	current->thread.kuap = KUAP_NONE;
+	uaccess_end_32s(kuap);
+}
+
+static __always_inline unsigned long prevent_user_access_return(void)
+{
+	unsigned long flags = current->thread.kuap;
+
+	if (flags != KUAP_NONE) {
+		current->thread.kuap = KUAP_NONE;
+		uaccess_end_32s(flags);
+	}
+
+	return flags;
+}
+
+static __always_inline void restore_user_access(unsigned long flags)
+{
+	if (flags != KUAP_NONE) {
+		current->thread.kuap = flags;
+		uaccess_begin_32s(flags);
+	}
+}
+
+static __always_inline bool
+__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+	unsigned long kuap = regs->kuap;
+
+	if (!is_write)
+		return false;
+	if (kuap == KUAP_NONE)
+		return true;
+
+	/*
+	 * If faulting address doesn't match unlocked segment, change segment.
+	 * In case of unaligned store crossing two segments, emulate store.
+	 */
+	if ((kuap ^ address) & 0xf0000000) {
+		if (!(kuap & 0x0fffffff) && address > kuap - 4 && fix_alignment(regs)) {
+			regs_add_return_ip(regs, 4);
+			emulate_single_step(regs);
+		} else {
+			regs->kuap = address;
+		}
+	}
+
+	return false;
+}
+
+#endif /* CONFIG_PPC_KUAP */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_32_KUP_H */
diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
new file mode 100644
index 0000000000..78c6a5fde1
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
@@ -0,0 +1,236 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_32_MMU_HASH_H_
+#define _ASM_POWERPC_BOOK3S_32_MMU_HASH_H_
+
+/*
+ * 32-bit hash table MMU support
+ */
+
+/*
+ * BATs
+ */
+
+/* Block size masks */
+#define BL_128K	0x000
+#define BL_256K 0x001
+#define BL_512K 0x003
+#define BL_1M   0x007
+#define BL_2M   0x00F
+#define BL_4M   0x01F
+#define BL_8M   0x03F
+#define BL_16M  0x07F
+#define BL_32M  0x0FF
+#define BL_64M  0x1FF
+#define BL_128M 0x3FF
+#define BL_256M 0x7FF
+
+/* BAT Access Protection */
+#define BPP_XX	0x00		/* No access */
+#define BPP_RX	0x01		/* Read only */
+#define BPP_RW	0x02		/* Read/write */
+
+#ifndef __ASSEMBLY__
+/* Contort a phys_addr_t into the right format/bits for a BAT */
+#ifdef CONFIG_PHYS_64BIT
+#define BAT_PHYS_ADDR(x) ((u32)((x & 0x00000000fffe0000ULL) | \
+				((x & 0x0000000e00000000ULL) >> 24) | \
+				((x & 0x0000000100000000ULL) >> 30)))
+#define PHYS_BAT_ADDR(x) (((u64)(x) & 0x00000000fffe0000ULL) | \
+			  (((u64)(x) << 24) & 0x0000000e00000000ULL) | \
+			  (((u64)(x) << 30) & 0x0000000100000000ULL))
+#else
+#define BAT_PHYS_ADDR(x) (x)
+#define PHYS_BAT_ADDR(x) ((x) & 0xfffe0000)
+#endif
+
+struct ppc_bat {
+	u32 batu;
+	u32 batl;
+};
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Hash table
+ */
+
+/* Values for PP (assumes Ks=0, Kp=1) */
+#define PP_RWXX	0	/* Supervisor read/write, User none */
+#define PP_RWRX 1	/* Supervisor read/write, User read */
+#define PP_RWRW 2	/* Supervisor read/write, User read/write */
+#define PP_RXRX 3	/* Supervisor read,       User read */
+
+/* Values for Segment Registers */
+#define SR_NX	0x10000000	/* No Execute */
+#define SR_KP	0x20000000	/* User key */
+#define SR_KS	0x40000000	/* Supervisor key */
+
+#ifdef __ASSEMBLY__
+
+#include <asm/asm-offsets.h>
+
+.macro uus_addi sr reg1 reg2 imm
+	.if NUM_USER_SEGMENTS > \sr
+	addi	\reg1,\reg2,\imm
+	.endif
+.endm
+
+.macro uus_mtsr sr reg1
+	.if NUM_USER_SEGMENTS > \sr
+	mtsr	\sr, \reg1
+	.endif
+.endm
+
+/*
+ * This isync() shouldn't be necessary as the kernel is not excepted to run
+ * any instruction in userspace soon after the update of segments and 'rfi'
+ * instruction is used to return to userspace, but hash based cores
+ * (at least G3) seem to exhibit a random behaviour when the 'isync' is not
+ * there. 603 cores don't have this behaviour so don't do the 'isync' as it
+ * saves several CPU cycles.
+ */
+.macro uus_isync
+#ifdef CONFIG_PPC_BOOK3S_604
+BEGIN_MMU_FTR_SECTION
+	isync
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
+.endm
+
+.macro update_user_segments_by_4 tmp1 tmp2 tmp3 tmp4
+	uus_addi	1, \tmp2, \tmp1, 0x111
+	uus_addi	2, \tmp3, \tmp1, 0x222
+	uus_addi	3, \tmp4, \tmp1, 0x333
+
+	uus_mtsr	0, \tmp1
+	uus_mtsr	1, \tmp2
+	uus_mtsr	2, \tmp3
+	uus_mtsr	3, \tmp4
+
+	uus_addi	4, \tmp1, \tmp1, 0x444
+	uus_addi	5, \tmp2, \tmp2, 0x444
+	uus_addi	6, \tmp3, \tmp3, 0x444
+	uus_addi	7, \tmp4, \tmp4, 0x444
+
+	uus_mtsr	4, \tmp1
+	uus_mtsr	5, \tmp2
+	uus_mtsr	6, \tmp3
+	uus_mtsr	7, \tmp4
+
+	uus_addi	8, \tmp1, \tmp1, 0x444
+	uus_addi	9, \tmp2, \tmp2, 0x444
+	uus_addi	10, \tmp3, \tmp3, 0x444
+	uus_addi	11, \tmp4, \tmp4, 0x444
+
+	uus_mtsr	8, \tmp1
+	uus_mtsr	9, \tmp2
+	uus_mtsr	10, \tmp3
+	uus_mtsr	11, \tmp4
+
+	uus_addi	12, \tmp1, \tmp1, 0x444
+	uus_addi	13, \tmp2, \tmp2, 0x444
+	uus_addi	14, \tmp3, \tmp3, 0x444
+	uus_addi	15, \tmp4, \tmp4, 0x444
+
+	uus_mtsr	12, \tmp1
+	uus_mtsr	13, \tmp2
+	uus_mtsr	14, \tmp3
+	uus_mtsr	15, \tmp4
+
+	uus_isync
+.endm
+
+#else
+
+/*
+ * This macro defines the mapping from contexts to VSIDs (virtual
+ * segment IDs).  We use a skew on both the context and the high 4 bits
+ * of the 32-bit virtual address (the "effective segment ID") in order
+ * to spread out the entries in the MMU hash table.  Note, if this
+ * function is changed then hash functions will have to be
+ * changed to correspond.
+ */
+#define CTX_TO_VSID(c, id)	((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff)
+
+/*
+ * Hardware Page Table Entry
+ * Note that the xpn and x bitfields are used only by processors that
+ * support extended addressing; otherwise, those bits are reserved.
+ */
+struct hash_pte {
+	unsigned long v:1;	/* Entry is valid */
+	unsigned long vsid:24;	/* Virtual segment identifier */
+	unsigned long h:1;	/* Hash algorithm indicator */
+	unsigned long api:6;	/* Abbreviated page index */
+	unsigned long rpn:20;	/* Real (physical) page number */
+	unsigned long xpn:3;	/* Real page number bits 0-2, optional */
+	unsigned long r:1;	/* Referenced */
+	unsigned long c:1;	/* Changed */
+	unsigned long w:1;	/* Write-thru cache mode */
+	unsigned long i:1;	/* Cache inhibited */
+	unsigned long m:1;	/* Memory coherence */
+	unsigned long g:1;	/* Guarded */
+	unsigned long x:1;	/* Real page number bit 3, optional */
+	unsigned long pp:2;	/* Page protection */
+};
+
+typedef struct {
+	unsigned long id;
+	unsigned long sr0;
+	void __user *vdso;
+} mm_context_t;
+
+#ifdef CONFIG_PPC_KUEP
+#define INIT_MM_CONTEXT(mm) .context.sr0 = SR_NX
+#endif
+
+void update_bats(void);
+static inline void cleanup_cpu_mmu_context(void) { }
+
+/* patch sites */
+extern s32 patch__hash_page_A0, patch__hash_page_A1, patch__hash_page_A2;
+extern s32 patch__hash_page_B, patch__hash_page_C;
+extern s32 patch__flush_hash_A0, patch__flush_hash_A1, patch__flush_hash_A2;
+extern s32 patch__flush_hash_B;
+
+#include <asm/reg.h>
+#include <asm/task_size_32.h>
+
+static __always_inline void update_user_segment(u32 n, u32 val)
+{
+	if (n << 28 < TASK_SIZE)
+		mtsr(val + n * 0x111, n << 28);
+}
+
+static __always_inline void update_user_segments(u32 val)
+{
+	val &= 0xf0ffffff;
+
+	update_user_segment(0, val);
+	update_user_segment(1, val);
+	update_user_segment(2, val);
+	update_user_segment(3, val);
+	update_user_segment(4, val);
+	update_user_segment(5, val);
+	update_user_segment(6, val);
+	update_user_segment(7, val);
+	update_user_segment(8, val);
+	update_user_segment(9, val);
+	update_user_segment(10, val);
+	update_user_segment(11, val);
+	update_user_segment(12, val);
+	update_user_segment(13, val);
+	update_user_segment(14, val);
+	update_user_segment(15, val);
+}
+
+int __init find_free_bat(void);
+unsigned int bat_block_size(unsigned long base, unsigned long top);
+#endif /* !__ASSEMBLY__ */
+
+/* We happily ignore the smaller BATs on 601, we don't actually use
+ * those definitions on hash32 at the moment anyway
+ */
+#define mmu_virtual_psize	MMU_PAGE_4K
+#define mmu_linear_psize	MMU_PAGE_256M
+
+#endif /* _ASM_POWERPC_BOOK3S_32_MMU_HASH_H_ */
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
new file mode 100644
index 0000000000..dc5c039eb2
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_32_PGALLOC_H
+#define _ASM_POWERPC_BOOK3S_32_PGALLOC_H
+
+#include <linux/threads.h>
+#include <linux/slab.h>
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+			pgtable_gfp_flags(mm, GFP_KERNEL));
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
+}
+
+/*
+ * We don't have any real pmd's, and this code never triggers because
+ * the pgd will always be present..
+ */
+/* #define pmd_alloc_one(mm,address)       ({ BUG(); ((pmd_t *)2); }) */
+#define pmd_free(mm, x) 		do { } while (0)
+#define __pmd_free_tlb(tlb,x,a)		do { } while (0)
+/* #define pgd_populate(mm, pmd, pte)      BUG() */
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
+				       pte_t *pte)
+{
+	*pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
+				pgtable_t pte_page)
+{
+	*pmdp = __pmd(__pa(pte_page) | _PMD_PRESENT);
+}
+
+static inline void pgtable_free(void *table, unsigned index_size)
+{
+	if (!index_size) {
+		pte_fragment_free((unsigned long *)table, 0);
+	} else {
+		BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
+		kmem_cache_free(PGT_CACHE(index_size), table);
+	}
+}
+
+#define get_hugepd_cache_index(x)  (x)
+
+static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+				    void *table, int shift)
+{
+	unsigned long pgf = (unsigned long)table;
+	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+	pgf |= shift;
+	tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+	void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+	unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+	pgtable_free(table, shift);
+}
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+				  unsigned long address)
+{
+	pgtable_free_tlb(tlb, table, 0);
+}
+#endif /* _ASM_POWERPC_BOOK3S_32_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
new file mode 100644
index 0000000000..9b13eb14e2
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -0,0 +1,627 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H
+#define _ASM_POWERPC_BOOK3S_32_PGTABLE_H
+
+#include <asm-generic/pgtable-nopmd.h>
+
+/*
+ * The "classic" 32-bit implementation of the PowerPC MMU uses a hash
+ * table containing PTEs, together with a set of 16 segment registers,
+ * to define the virtual to physical address mapping.
+ *
+ * We use the hash table as an extended TLB, i.e. a cache of currently
+ * active mappings.  We maintain a two-level page table tree, much
+ * like that used by the i386, for the sake of the Linux memory
+ * management code.  Low-level assembler code in hash_low_32.S
+ * (procedure hash_page) is responsible for extracting ptes from the
+ * tree and putting them into the hash table when necessary, and
+ * updating the accessed and modified bits in the page table tree.
+ */
+
+#define _PAGE_PRESENT	0x001	/* software: pte contains a translation */
+#define _PAGE_HASHPTE	0x002	/* hash_page has made an HPTE for this pte */
+#define _PAGE_USER	0x004	/* usermode access allowed */
+#define _PAGE_GUARDED	0x008	/* G: prohibit speculative access */
+#define _PAGE_COHERENT	0x010	/* M: enforce memory coherence (SMP systems) */
+#define _PAGE_NO_CACHE	0x020	/* I: cache inhibit */
+#define _PAGE_WRITETHRU	0x040	/* W: cache write-through */
+#define _PAGE_DIRTY	0x080	/* C: page changed */
+#define _PAGE_ACCESSED	0x100	/* R: page referenced */
+#define _PAGE_EXEC	0x200	/* software: exec allowed */
+#define _PAGE_RW	0x400	/* software: user write access allowed */
+#define _PAGE_SPECIAL	0x800	/* software: Special page */
+
+#ifdef CONFIG_PTE_64BIT
+/* We never clear the high word of the pte */
+#define _PTE_NONE_MASK	(0xffffffff00000000ULL | _PAGE_HASHPTE)
+#else
+#define _PTE_NONE_MASK	_PAGE_HASHPTE
+#endif
+
+#define _PMD_PRESENT	0
+#define _PMD_PRESENT_MASK (PAGE_MASK)
+#define _PMD_BAD	(~PAGE_MASK)
+
+/* We borrow the _PAGE_USER bit to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE	_PAGE_USER
+
+/* And here we include common definitions */
+
+#define _PAGE_KERNEL_RO		0
+#define _PAGE_KERNEL_ROX	(_PAGE_EXEC)
+#define _PAGE_KERNEL_RW		(_PAGE_DIRTY | _PAGE_RW)
+#define _PAGE_KERNEL_RWX	(_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
+
+#define _PAGE_HPTEFLAGS _PAGE_HASHPTE
+
+#ifndef __ASSEMBLY__
+
+static inline bool pte_user(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_USER;
+}
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Location of the PFN in the PTE. Most 32-bit platforms use the same
+ * as _PAGE_SHIFT here (ie, naturally aligned).
+ * Platform who don't just pre-define the value so we don't override it here.
+ */
+#define PTE_RPN_SHIFT	(PAGE_SHIFT)
+
+/*
+ * The mask covered by the RPN must be a ULL on 32-bit platforms with
+ * 64-bit PTEs.
+ */
+#ifdef CONFIG_PTE_64BIT
+#define PTE_RPN_MASK	(~((1ULL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 36
+#else
+#define PTE_RPN_MASK	(~((1UL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
+#endif
+
+/*
+ * _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes.
+ */
+#define _PAGE_CHG_MASK	(PTE_RPN_MASK | _PAGE_HASHPTE | _PAGE_DIRTY | \
+			 _PAGE_ACCESSED | _PAGE_SPECIAL)
+
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED)
+#define _PAGE_BASE	(_PAGE_BASE_NC | _PAGE_COHERENT)
+
+/*
+ * Permission masks used to generate the __P and __S table.
+ *
+ * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
+ *
+ * Write permissions imply read permissions for now.
+ */
+#define PAGE_NONE	__pgprot(_PAGE_BASE)
+#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+
+/* Permission masks used for kernel mappings */
+#define PAGE_KERNEL	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
+#define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE)
+#define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE | _PAGE_GUARDED)
+#define PAGE_KERNEL_X	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX	__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
+
+#define PTE_INDEX_SIZE	PTE_SHIFT
+#define PMD_INDEX_SIZE	0
+#define PUD_INDEX_SIZE	0
+#define PGD_INDEX_SIZE	(32 - PGDIR_SHIFT)
+
+#define PMD_CACHE_INDEX	PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX	PUD_INDEX_SIZE
+
+#ifndef __ASSEMBLY__
+#define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_INDEX_SIZE)
+#define PMD_TABLE_SIZE	0
+#define PUD_TABLE_SIZE	0
+#define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
+
+/* Bits to mask out from a PMD to get to the PTE page */
+#define PMD_MASKED_BITS		(PTE_TABLE_SIZE - 1)
+#endif	/* __ASSEMBLY__ */
+
+#define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
+#define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
+
+/*
+ * The normal case is that PTEs are 32-bits and we have a 1-page
+ * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages.  -- paulus
+ *
+ * For any >32-bit physical address platform, we can use the following
+ * two level page table layout where the pgdir is 8KB and the MS 13 bits
+ * are an index to the second level table.  The combined pgdir/pmd first
+ * level has 2048 entries and the second level has 512 64-bit PTE entries.
+ * -Matt
+ */
+/* PGDIR_SHIFT determines what a top-level page table entry can map */
+#define PGDIR_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+#define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
+
+#ifndef __ASSEMBLY__
+
+int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
+void unmap_kernel_page(unsigned long va);
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
+ * value (for now) on others, from where we can start layout kernel
+ * virtual space that goes below PKMAP and FIXMAP
+ */
+#include <asm/fixmap.h>
+
+/*
+ * ioremap_bot starts at that address. Early ioremaps move down from there,
+ * until mem_init() at which point this becomes the top of the vmalloc
+ * and ioremap space
+ */
+#ifdef CONFIG_HIGHMEM
+#define IOREMAP_TOP	PKMAP_BASE
+#else
+#define IOREMAP_TOP	FIXADDR_START
+#endif
+
+/* PPC32 shares vmalloc area with ioremap */
+#define IOREMAP_START	VMALLOC_START
+#define IOREMAP_END	VMALLOC_END
+
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 16MB value just means that there will be a 64MB "hole" after the
+ * physical memory until the kernel virtual memory starts.  That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ *
+ * We no longer map larger than phys RAM with the BATs so we don't have
+ * to worry about the VMALLOC_OFFSET causing problems.  We do have to worry
+ * about clashes between our early calls to ioremap() that start growing down
+ * from ioremap_base being run into the VM area allocations (growing upwards
+ * from VMALLOC_START).  For this reason we have ioremap_bot to check when
+ * we actually run into our mappings setup in the early boot with the VM
+ * system.  This really does become a problem for machines with good amounts
+ * of RAM.  -- Cort
+ */
+#define VMALLOC_OFFSET (0x1000000) /* 16M */
+
+#define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
+
+#ifdef CONFIG_KASAN_VMALLOC
+#define VMALLOC_END	ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
+#else
+#define VMALLOC_END	ioremap_bot
+#endif
+
+#define MODULES_END	ALIGN_DOWN(PAGE_OFFSET, SZ_256M)
+#define MODULES_VADDR	(MODULES_END - SZ_256M)
+
+#ifndef __ASSEMBLY__
+#include <linux/sched.h>
+#include <linux/threads.h>
+
+/* Bits to mask out from a PGD to get to the PUD page */
+#define PGD_MASKED_BITS		0
+
+#define pte_ERROR(e) \
+	pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \
+		(unsigned long long)pte_val(e))
+#define pgd_ERROR(e) \
+	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+/*
+ * Bits in a linux-style PTE.  These match the bits in the
+ * (hardware-defined) PowerPC PTE as closely as possible.
+ */
+
+#define pte_clear(mm, addr, ptep) \
+	do { pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0); } while (0)
+
+#define pmd_none(pmd)		(!pmd_val(pmd))
+#define	pmd_bad(pmd)		(pmd_val(pmd) & _PMD_BAD)
+#define	pmd_present(pmd)	(pmd_val(pmd) & _PMD_PRESENT_MASK)
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	*pmdp = __pmd(0);
+}
+
+
+/*
+ * When flushing the tlb entry for a page, we also need to flush the hash
+ * table entry.  flush_hash_pages is assembler (for speed) in hashtable.S.
+ */
+extern int flush_hash_pages(unsigned context, unsigned long va,
+			    unsigned long pmdval, int count);
+
+/* Add an HPTE to the hash table */
+extern void add_hash_page(unsigned context, unsigned long va,
+			  unsigned long pmdval);
+
+/* Flush an entry from the TLB/hash table */
+static inline void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr)
+{
+	if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
+		unsigned long ptephys = __pa(ptep) & PAGE_MASK;
+
+		flush_hash_pages(mm->context.id, addr, ptephys, 1);
+	}
+}
+
+/*
+ * PTE updates. This function is called whenever an existing
+ * valid PTE is updated. This does -not- include set_pte_at()
+ * which nowadays only sets a new PTE.
+ *
+ * Depending on the type of MMU, we may need to use atomic updates
+ * and the PTE may be either 32 or 64 bit wide. In the later case,
+ * when using atomic updates, only the low part of the PTE is
+ * accessed atomically.
+ */
+static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p,
+				     unsigned long clr, unsigned long set, int huge)
+{
+	pte_basic_t old;
+
+	if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
+		unsigned long tmp;
+
+		asm volatile(
+#ifndef CONFIG_PTE_64BIT
+	"1:	lwarx	%0, 0, %3\n"
+	"	andc	%1, %0, %4\n"
+#else
+	"1:	lwarx	%L0, 0, %3\n"
+	"	lwz	%0, -4(%3)\n"
+	"	andc	%1, %L0, %4\n"
+#endif
+	"	or	%1, %1, %5\n"
+	"	stwcx.	%1, 0, %3\n"
+	"	bne-	1b"
+		: "=&r" (old), "=&r" (tmp), "=m" (*p)
+#ifndef CONFIG_PTE_64BIT
+		: "r" (p),
+#else
+		: "b" ((unsigned long)(p) + 4),
+#endif
+		  "r" (clr), "r" (set), "m" (*p)
+		: "cc" );
+	} else {
+		old = pte_val(*p);
+
+		*p = __pte((old & ~(pte_basic_t)clr) | set);
+	}
+
+	return old;
+}
+
+/*
+ * 2.6 calls this without flushing the TLB entry; this is wrong
+ * for our hash-based implementation, we fix that up here.
+ */
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
+					      unsigned long addr, pte_t *ptep)
+{
+	unsigned long old;
+	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
+	if (old & _PAGE_HASHPTE)
+		flush_hash_entry(mm, ptep, addr);
+
+	return (old & _PAGE_ACCESSED) != 0;
+}
+#define ptep_test_and_clear_young(__vma, __addr, __ptep) \
+	__ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep)
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+				       pte_t *ptep)
+{
+	return __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0));
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pte_t *ptep)
+{
+	pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
+}
+
+static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
+					   pte_t *ptep, pte_t entry,
+					   unsigned long address,
+					   int psize)
+{
+	unsigned long set = pte_val(entry) &
+		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
+
+	pte_update(vma->vm_mm, address, ptep, 0, set, 0);
+
+	flush_tlb_page(vma, address);
+}
+
+#define __HAVE_ARCH_PTE_SAME
+#define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0)
+
+#define pmd_pfn(pmd)		(pmd_val(pmd) >> PAGE_SHIFT)
+#define pmd_page(pmd)		pfn_to_page(pmd_pfn(pmd))
+
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs (32bit PTEs):
+ *
+ *                         1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
+ *   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *   <----------------- offset --------------------> < type -> E H P
+ *
+ *   E is the exclusive marker that is not stored in swap entries.
+ *   _PAGE_PRESENT (P) and __PAGE_HASHPTE (H) must be 0.
+ *
+ * For 64bit PTEs, the offset is extended by 32bit.
+ */
+#define __swp_type(entry)		((entry).val & 0x1f)
+#define __swp_offset(entry)		((entry).val >> 5)
+#define __swp_entry(type, offset)	((swp_entry_t) { ((type) & 0x1f) | ((offset) << 5) })
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) >> 3 })
+#define __swp_entry_to_pte(x)		((pte_t) { (x).val << 3 })
+
+static inline int pte_swp_exclusive(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_SWP_EXCLUSIVE);
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_SWP_EXCLUSIVE);
+}
+
+/* Generic accessors to PTE bits */
+static inline int pte_write(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_RW);}
+static inline int pte_read(pte_t pte)		{ return 1; }
+static inline int pte_dirty(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_DIRTY); }
+static inline int pte_young(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_ACCESSED); }
+static inline int pte_special(pte_t pte)	{ return !!(pte_val(pte) & _PAGE_SPECIAL); }
+static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
+static inline bool pte_exec(pte_t pte)		{ return pte_val(pte) & _PAGE_EXEC; }
+
+static inline int pte_present(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_PRESENT;
+}
+
+static inline bool pte_hw_valid(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_PRESENT;
+}
+
+static inline bool pte_hashpte(pte_t pte)
+{
+	return !!(pte_val(pte) & _PAGE_HASHPTE);
+}
+
+static inline bool pte_ci(pte_t pte)
+{
+	return !!(pte_val(pte) & _PAGE_NO_CACHE);
+}
+
+/*
+ * We only find page table entry in the last level
+ * Hence no need for other accessors
+ */
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+	/*
+	 * A read-only access is controlled by _PAGE_USER bit.
+	 * We have _PAGE_READ set for WRITE and EXECUTE
+	 */
+	if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte))
+		return false;
+
+	if (write && !pte_write(pte))
+		return false;
+
+	return true;
+}
+
+/* Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ *
+ * Even if PTEs can be unsigned long long, a PFN is always an unsigned
+ * long for now.
+ */
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
+{
+	return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
+		     pgprot_val(pgprot));
+}
+
+/* Generic modifiers for PTE bits */
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_RW);
+}
+
+static inline pte_t pte_exprotect(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_EXEC);
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkexec(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_EXEC);
+}
+
+static inline pte_t pte_mkpte(pte_t pte)
+{
+	return pte;
+}
+
+static inline pte_t pte_mkwrite_novma(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_RW);
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_SPECIAL);
+}
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+	return pte;
+}
+
+static inline pte_t pte_mkprivileged(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_USER);
+}
+
+static inline pte_t pte_mkuser(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_USER);
+}
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+}
+
+
+
+/* This low level function performs the actual PTE insertion
+ * Setting the PTE depends on the MMU type and other factors.
+ *
+ * First case is 32-bit in UP mode with 32-bit PTEs, we need to preserve
+ * the _PAGE_HASHPTE bit since we may not have invalidated the previous
+ * translation in the hash yet (done in a subsequent flush_tlb_xxx())
+ * and see we need to keep track that this PTE needs invalidating.
+ *
+ * Second case is 32-bit with 64-bit PTE.  In this case, we
+ * can just store as long as we do the two halves in the right order
+ * with a barrier in between. This is possible because we take care,
+ * in the hash code, to pre-invalidate if the PTE was already hashed,
+ * which synchronizes us with any concurrent invalidation.
+ * In the percpu case, we fallback to the simple update preserving
+ * the hash bits (ie, same as the non-SMP case).
+ *
+ * Third case is 32-bit in SMP mode with 32-bit PTEs. We use the
+ * helper pte_update() which does an atomic update. We need to do that
+ * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
+ * per-CPU PTE such as a kmap_atomic, we also do a simple update preserving
+ * the hash bits instead.
+ */
+static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, pte_t pte, int percpu)
+{
+	if ((!IS_ENABLED(CONFIG_SMP) && !IS_ENABLED(CONFIG_PTE_64BIT)) || percpu) {
+		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) |
+			      (pte_val(pte) & ~_PAGE_HASHPTE));
+	} else if (IS_ENABLED(CONFIG_PTE_64BIT)) {
+		if (pte_val(*ptep) & _PAGE_HASHPTE)
+			flush_hash_entry(mm, ptep, addr);
+
+		asm volatile("stw%X0 %2,%0; eieio; stw%X1 %L2,%1" :
+			     "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) :
+			     "r" (pte) : "memory");
+	} else {
+		pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, pte_val(pte), 0);
+	}
+}
+
+/*
+ * Macro to mark a page protection value as "uncacheable".
+ */
+
+#define _PAGE_CACHE_CTL	(_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
+			 _PAGE_WRITETHRU)
+
+#define pgprot_noncached pgprot_noncached
+static inline pgprot_t pgprot_noncached(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_NO_CACHE | _PAGE_GUARDED);
+}
+
+#define pgprot_noncached_wc pgprot_noncached_wc
+static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_NO_CACHE);
+}
+
+#define pgprot_cached pgprot_cached
+static inline pgprot_t pgprot_cached(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_COHERENT);
+}
+
+#define pgprot_cached_wthru pgprot_cached_wthru
+static inline pgprot_t pgprot_cached_wthru(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_COHERENT | _PAGE_WRITETHRU);
+}
+
+#define pgprot_cached_noncoherent pgprot_cached_noncoherent
+static inline pgprot_t pgprot_cached_noncoherent(pgprot_t prot)
+{
+	return __pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL);
+}
+
+#define pgprot_writecombine pgprot_writecombine
+static inline pgprot_t pgprot_writecombine(pgprot_t prot)
+{
+	return pgprot_noncached_wc(prot);
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /*  _ASM_POWERPC_BOOK3S_32_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h
new file mode 100644
index 0000000000..4be5729081
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H
+#define _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H
+
+#include <linux/build_bug.h>
+
+#define MMU_NO_CONTEXT      (0)
+/*
+ * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx
+ */
+void hash__flush_tlb_mm(struct mm_struct *mm);
+void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+void hash__flush_range(struct mm_struct *mm, unsigned long start, unsigned long end);
+
+#ifdef CONFIG_SMP
+void _tlbie(unsigned long address);
+#else
+static inline void _tlbie(unsigned long address)
+{
+	asm volatile ("tlbie %0; sync" : : "r" (address) : "memory");
+}
+#endif
+void _tlbia(void);
+
+/*
+ * Called at the end of a mmu_gather operation to make sure the
+ * TLB flush is completely done.
+ */
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+	/* 603 needs to flush the whole TLB here since it doesn't use a hash table. */
+	if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+		_tlbia();
+}
+
+static inline void flush_range(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+	start &= PAGE_MASK;
+	if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
+		hash__flush_range(mm, start, end);
+	else if (end - start <= PAGE_SIZE)
+		_tlbie(start);
+	else
+		_tlbia();
+}
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
+		hash__flush_tlb_mm(mm);
+	else
+		_tlbia();
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+	if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
+		hash__flush_tlb_page(vma, vmaddr);
+	else
+		_tlbie(vmaddr);
+}
+
+static inline void
+flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
+{
+	flush_range(vma->vm_mm, start, end);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	flush_range(&init_mm, start, end);
+}
+
+static inline void local_flush_tlb_page(struct vm_area_struct *vma,
+					unsigned long vmaddr)
+{
+	flush_tlb_page(vma, vmaddr);
+}
+
+static inline void local_flush_tlb_page_psize(struct mm_struct *mm,
+					      unsigned long vmaddr, int psize)
+{
+	BUILD_BUG();
+}
+
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	flush_tlb_mm(mm);
+}
+
+#endif /* _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
new file mode 100644
index 0000000000..6472b08fa1
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_HASH_4K_H
+#define _ASM_POWERPC_BOOK3S_64_HASH_4K_H
+
+#define H_PTE_INDEX_SIZE  9  // size: 8B << 9 = 4KB, maps: 2^9 x   4KB =   2MB
+#define H_PMD_INDEX_SIZE  7  // size: 8B << 7 = 1KB, maps: 2^7 x   2MB = 256MB
+#define H_PUD_INDEX_SIZE  9  // size: 8B << 9 = 4KB, maps: 2^9 x 256MB = 128GB
+#define H_PGD_INDEX_SIZE  9  // size: 8B << 9 = 4KB, maps: 2^9 x 128GB =  64TB
+
+/*
+ * Each context is 512TB. But on 4k we restrict our max TASK size to 64TB
+ * Hence also limit max EA bits to 64TB.
+ */
+#define MAX_EA_BITS_PER_CONTEXT		46
+
+
+/*
+ * Our page table limit us to 64TB. For 64TB physical memory, we only need 64GB
+ * of vmemmap space. To better support sparse memory layout, we use 61TB
+ * linear map range, 1TB of vmalloc, 1TB of I/O and 1TB of vmememmap.
+ */
+#define REGION_SHIFT		(40)
+#define H_KERN_MAP_SIZE		(ASM_CONST(1) << REGION_SHIFT)
+
+/*
+ * Limits the linear mapping range
+ */
+#define H_MAX_PHYSMEM_BITS	46
+
+/*
+ * Define the address range of the kernel non-linear virtual area (61TB)
+ */
+#define H_KERN_VIRT_START	ASM_CONST(0xc0003d0000000000)
+
+#ifndef __ASSEMBLY__
+#define H_PTE_TABLE_SIZE	(sizeof(pte_t) << H_PTE_INDEX_SIZE)
+#define H_PMD_TABLE_SIZE	(sizeof(pmd_t) << H_PMD_INDEX_SIZE)
+#define H_PUD_TABLE_SIZE	(sizeof(pud_t) << H_PUD_INDEX_SIZE)
+#define H_PGD_TABLE_SIZE	(sizeof(pgd_t) << H_PGD_INDEX_SIZE)
+
+#define H_PAGE_F_GIX_SHIFT	_PAGE_PA_MAX
+#define H_PAGE_F_SECOND		_RPAGE_PKEY_BIT0 /* HPTE is in 2ndary HPTEG */
+#define H_PAGE_F_GIX		(_RPAGE_RPN43 | _RPAGE_RPN42 | _RPAGE_RPN41)
+#define H_PAGE_BUSY		_RPAGE_RSV1
+#define H_PAGE_HASHPTE		_RPAGE_PKEY_BIT4
+
+/* PTE flags to conserve for HPTE identification */
+#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \
+			 H_PAGE_F_SECOND | H_PAGE_F_GIX)
+/*
+ * Not supported by 4k linux page size
+ */
+#define H_PAGE_4K_PFN	0x0
+#define H_PAGE_THP_HUGE 0x0
+#define H_PAGE_COMBO	0x0
+
+/* 8 bytes per each pte entry */
+#define H_PTE_FRAG_SIZE_SHIFT  (H_PTE_INDEX_SIZE + 3)
+#define H_PTE_FRAG_NR	(PAGE_SIZE >> H_PTE_FRAG_SIZE_SHIFT)
+#define H_PMD_FRAG_SIZE_SHIFT  (H_PMD_INDEX_SIZE + 3)
+#define H_PMD_FRAG_NR	(PAGE_SIZE >> H_PMD_FRAG_SIZE_SHIFT)
+
+/* memory key bits, only 8 keys supported */
+#define H_PTE_PKEY_BIT4	0
+#define H_PTE_PKEY_BIT3	0
+#define H_PTE_PKEY_BIT2	_RPAGE_PKEY_BIT3
+#define H_PTE_PKEY_BIT1	_RPAGE_PKEY_BIT2
+#define H_PTE_PKEY_BIT0	_RPAGE_PKEY_BIT1
+
+
+/*
+ * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range()
+ */
+#define remap_4k_pfn(vma, addr, pfn, prot)	\
+	remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot))
+
+#ifdef CONFIG_HUGETLB_PAGE
+static inline int hash__hugepd_ok(hugepd_t hpd)
+{
+	unsigned long hpdval = hpd_val(hpd);
+	/*
+	 * if it is not a pte and have hugepd shift mask
+	 * set, then it is a hugepd directory pointer
+	 */
+	if (!(hpdval & _PAGE_PTE) && (hpdval & _PAGE_PRESENT) &&
+	    ((hpdval & HUGEPD_SHIFT_MASK) != 0))
+		return true;
+	return false;
+}
+#endif
+
+/*
+ * 4K PTE format is different from 64K PTE format. Saving the hash_slot is just
+ * a matter of returning the PTE bits that need to be modified. On 64K PTE,
+ * things are a little more involved and hence needs many more parameters to
+ * accomplish the same. However we want to abstract this out from the caller by
+ * keeping the prototype consistent across the two formats.
+ */
+static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
+					 unsigned int subpg_index, unsigned long hidx,
+					 int offset)
+{
+	return (hidx << H_PAGE_F_GIX_SHIFT) &
+		(H_PAGE_F_SECOND | H_PAGE_F_GIX);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+static inline char *get_hpte_slot_array(pmd_t *pmdp)
+{
+	BUG();
+	return NULL;
+}
+
+static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
+{
+	BUG();
+	return 0;
+}
+
+static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
+					   int index)
+{
+	BUG();
+	return 0;
+}
+
+static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
+					unsigned int index, unsigned int hidx)
+{
+	BUG();
+}
+
+static inline int hash__pmd_trans_huge(pmd_t pmd)
+{
+	return 0;
+}
+
+static inline pmd_t hash__pmd_mkhuge(pmd_t pmd)
+{
+	BUG();
+	return pmd;
+}
+
+extern unsigned long hash__pmd_hugepage_update(struct mm_struct *mm,
+					   unsigned long addr, pmd_t *pmdp,
+					   unsigned long clr, unsigned long set);
+extern pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma,
+				   unsigned long address, pmd_t *pmdp);
+extern void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+					 pgtable_t pgtable);
+extern pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm,
+				       unsigned long addr, pmd_t *pmdp);
+extern int hash__has_transparent_hugepage(void);
+#endif
+
+static inline pmd_t hash__pmd_mkdevmap(pmd_t pmd)
+{
+	BUG();
+	return pmd;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_HASH_4K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
new file mode 100644
index 0000000000..0bf6fd0bf4
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -0,0 +1,291 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H
+#define _ASM_POWERPC_BOOK3S_64_HASH_64K_H
+
+#define H_PTE_INDEX_SIZE   8  // size: 8B <<  8 = 2KB, maps 2^8  x 64KB = 16MB
+#define H_PMD_INDEX_SIZE  10  // size: 8B << 10 = 8KB, maps 2^10 x 16MB = 16GB
+#define H_PUD_INDEX_SIZE  10  // size: 8B << 10 = 8KB, maps 2^10 x 16GB = 16TB
+#define H_PGD_INDEX_SIZE   8  // size: 8B <<  8 = 2KB, maps 2^8  x 16TB =  4PB
+
+/*
+ * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
+ * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
+ * page_to_nid does a page->section->node lookup
+ * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
+ * memory requirements with large number of sections.
+ * 51 bits is the max physical real address on POWER9
+ */
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME)
+#define H_MAX_PHYSMEM_BITS	51
+#else
+#define H_MAX_PHYSMEM_BITS	46
+#endif
+
+/*
+ * Each context is 512TB size. SLB miss for first context/default context
+ * is handled in the hotpath.
+ */
+#define MAX_EA_BITS_PER_CONTEXT		49
+#define REGION_SHIFT		MAX_EA_BITS_PER_CONTEXT
+
+/*
+ * We use one context for each MAP area.
+ */
+#define H_KERN_MAP_SIZE		(1UL << MAX_EA_BITS_PER_CONTEXT)
+
+/*
+ * Define the address range of the kernel non-linear virtual area
+ * 2PB
+ */
+#define H_KERN_VIRT_START	ASM_CONST(0xc008000000000000)
+
+/*
+ * 64k aligned address free up few of the lower bits of RPN for us
+ * We steal that here. For more deatils look at pte_pfn/pfn_pte()
+ */
+#define H_PAGE_COMBO	_RPAGE_RPN0 /* this is a combo 4k page */
+#define H_PAGE_4K_PFN	_RPAGE_RPN1 /* PFN is for a single 4k page */
+#define H_PAGE_BUSY	_RPAGE_RSV1     /* software: PTE & hash are busy */
+#define H_PAGE_HASHPTE	_RPAGE_RPN43	/* PTE has associated HPTE */
+
+/* memory key bits. */
+#define H_PTE_PKEY_BIT4		_RPAGE_PKEY_BIT4
+#define H_PTE_PKEY_BIT3		_RPAGE_PKEY_BIT3
+#define H_PTE_PKEY_BIT2		_RPAGE_PKEY_BIT2
+#define H_PTE_PKEY_BIT1		_RPAGE_PKEY_BIT1
+#define H_PTE_PKEY_BIT0		_RPAGE_PKEY_BIT0
+
+/*
+ * We need to differentiate between explicit huge page and THP huge
+ * page, since THP huge page also need to track real subpage details
+ */
+#define H_PAGE_THP_HUGE  H_PAGE_4K_PFN
+
+/* PTE flags to conserve for HPTE identification */
+#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | H_PAGE_COMBO)
+/*
+ * We use a 2K PTE page fragment and another 2K for storing
+ * real_pte_t hash index
+ * 8 bytes per each pte entry and another 8 bytes for storing
+ * slot details.
+ */
+#define H_PTE_FRAG_SIZE_SHIFT  (H_PTE_INDEX_SIZE + 3 + 1)
+#define H_PTE_FRAG_NR	(PAGE_SIZE >> H_PTE_FRAG_SIZE_SHIFT)
+
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
+#define H_PMD_FRAG_SIZE_SHIFT  (H_PMD_INDEX_SIZE + 3 + 1)
+#else
+#define H_PMD_FRAG_SIZE_SHIFT  (H_PMD_INDEX_SIZE + 3)
+#endif
+#define H_PMD_FRAG_NR	(PAGE_SIZE >> H_PMD_FRAG_SIZE_SHIFT)
+
+#ifndef __ASSEMBLY__
+#include <asm/errno.h>
+
+/*
+ * With 64K pages on hash table, we have a special PTE format that
+ * uses a second "half" of the page table to encode sub-page information
+ * in order to deal with 64K made of 4K HW pages. Thus we override the
+ * generic accessors and iterators here
+ */
+#define __real_pte __real_pte
+static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset)
+{
+	real_pte_t rpte;
+	unsigned long *hidxp;
+
+	rpte.pte = pte;
+
+	/*
+	 * Ensure that we do not read the hidx before we read the PTE. Because
+	 * the writer side is expected to finish writing the hidx first followed
+	 * by the PTE, by using smp_wmb(). pte_set_hash_slot() ensures that.
+	 */
+	smp_rmb();
+
+	hidxp = (unsigned long *)(ptep + offset);
+	rpte.hidx = *hidxp;
+	return rpte;
+}
+
+/*
+ * shift the hidx representation by one-modulo-0xf; i.e hidx 0 is respresented
+ * as 1, 1 as 2,... , and 0xf as 0.  This convention lets us represent a
+ * invalid hidx 0xf with a 0x0 bit value. PTEs are anyway zero'd when
+ * allocated. We dont have to zero them gain; thus save on the initialization.
+ */
+#define HIDX_UNSHIFT_BY_ONE(x) ((x + 0xfUL) & 0xfUL) /* shift backward by one */
+#define HIDX_SHIFT_BY_ONE(x) ((x + 0x1UL) & 0xfUL)   /* shift forward by one */
+#define HIDX_BITS(x, index)  (x << (index << 2))
+#define BITS_TO_HIDX(x, index)  ((x >> (index << 2)) & 0xfUL)
+#define INVALID_RPTE_HIDX  0x0UL
+
+static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
+{
+	return HIDX_UNSHIFT_BY_ONE(BITS_TO_HIDX(rpte.hidx, index));
+}
+
+/*
+ * Commit the hidx and return PTE bits that needs to be modified. The caller is
+ * expected to modify the PTE bits accordingly and commit the PTE to memory.
+ */
+static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
+					 unsigned int subpg_index,
+					 unsigned long hidx, int offset)
+{
+	unsigned long *hidxp = (unsigned long *)(ptep + offset);
+
+	rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);
+	*hidxp = rpte.hidx  | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index);
+
+	/*
+	 * Anyone reading PTE must ensure hidx bits are read after reading the
+	 * PTE by using the read-side barrier smp_rmb(). __real_pte() can be
+	 * used for that.
+	 */
+	smp_wmb();
+
+	/* No PTE bits to be modified, return 0x0UL */
+	return 0x0UL;
+}
+
+#define __rpte_to_pte(r)	((r).pte)
+extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
+/*
+ * Trick: we set __end to va + 64k, which happens works for
+ * a 16M page as well as we want only one iteration
+ */
+#define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift)	\
+	do {								\
+		unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT));	\
+		unsigned __split = (psize == MMU_PAGE_4K ||		\
+				    psize == MMU_PAGE_64K_AP);		\
+		shift = mmu_psize_defs[psize].shift;			\
+		for (index = 0; vpn < __end; index++,			\
+			     vpn += (1L << (shift - VPN_SHIFT))) {	\
+		if (!__split || __rpte_sub_valid(rpte, index))
+
+#define pte_iterate_hashed_end()  } } while(0)
+
+#define pte_pagesize_index(mm, addr, pte)	\
+	(((pte) & H_PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
+
+extern int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
+			   unsigned long pfn, unsigned long size, pgprot_t);
+static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
+				 unsigned long pfn, pgprot_t prot)
+{
+	if (pfn > (PTE_RPN_MASK >> PAGE_SHIFT)) {
+		WARN(1, "remap_4k_pfn called with wrong pfn value\n");
+		return -EINVAL;
+	}
+	return remap_pfn_range(vma, addr, pfn, PAGE_SIZE,
+			       __pgprot(pgprot_val(prot) | H_PAGE_4K_PFN));
+}
+
+#define H_PTE_TABLE_SIZE	PTE_FRAG_SIZE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE)
+#define H_PMD_TABLE_SIZE	((sizeof(pmd_t) << PMD_INDEX_SIZE) + \
+				 (sizeof(unsigned long) << PMD_INDEX_SIZE))
+#else
+#define H_PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
+#endif
+#ifdef CONFIG_HUGETLB_PAGE
+#define H_PUD_TABLE_SIZE	((sizeof(pud_t) << PUD_INDEX_SIZE) +	\
+				 (sizeof(unsigned long) << PUD_INDEX_SIZE))
+#else
+#define H_PUD_TABLE_SIZE	(sizeof(pud_t) << PUD_INDEX_SIZE)
+#endif
+#define H_PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline char *get_hpte_slot_array(pmd_t *pmdp)
+{
+	/*
+	 * The hpte hindex is stored in the pgtable whose address is in the
+	 * second half of the PMD
+	 *
+	 * Order this load with the test for pmd_trans_huge in the caller
+	 */
+	smp_rmb();
+	return *(char **)(pmdp + PTRS_PER_PMD);
+
+
+}
+/*
+ * The linux hugepage PMD now include the pmd entries followed by the address
+ * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
+ * [ 000 | 1 bit secondary | 3 bit hidx | 1 bit valid]. We use one byte per
+ * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
+ * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
+ *
+ * The top three bits are intentionally left as zero. This memory location
+ * are also used as normal page PTE pointers. So if we have any pointers
+ * left around while we collapse a hugepage, we need to make sure
+ * _PAGE_PRESENT bit of that is zero when we look at them
+ */
+static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
+{
+	return hpte_slot_array[index] & 0x1;
+}
+
+static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
+					   int index)
+{
+	return hpte_slot_array[index] >> 1;
+}
+
+static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
+					unsigned int index, unsigned int hidx)
+{
+	hpte_slot_array[index] = (hidx << 1) | 0x1;
+}
+
+/*
+ *
+ * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs
+ * page. The hugetlbfs page table walking and mangling paths are totally
+ * separated form the core VM paths and they're differentiated by
+ *  VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run.
+ *
+ * pmd_trans_huge() is defined as false at build time if
+ * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build
+ * time in such case.
+ *
+ * For ppc64 we need to differntiate from explicit hugepages from THP, because
+ * for THP we also track the subpage details at the pmd level. We don't do
+ * that for explicit huge pages.
+ *
+ */
+static inline int hash__pmd_trans_huge(pmd_t pmd)
+{
+	return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE | _PAGE_DEVMAP)) ==
+		  (_PAGE_PTE | H_PAGE_THP_HUGE));
+}
+
+static inline pmd_t hash__pmd_mkhuge(pmd_t pmd)
+{
+	return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE));
+}
+
+extern unsigned long hash__pmd_hugepage_update(struct mm_struct *mm,
+					   unsigned long addr, pmd_t *pmdp,
+					   unsigned long clr, unsigned long set);
+extern pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma,
+				   unsigned long address, pmd_t *pmdp);
+extern void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+					 pgtable_t pgtable);
+extern pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm,
+				       unsigned long addr, pmd_t *pmdp);
+extern int hash__has_transparent_hugepage(void);
+#endif /*  CONFIG_TRANSPARENT_HUGEPAGE */
+
+static inline pmd_t hash__pmd_mkdevmap(pmd_t pmd)
+{
+	return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE | _PAGE_DEVMAP));
+}
+
+#endif	/* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/hash-pkey.h b/arch/powerpc/include/asm/book3s/64/hash-pkey.h
new file mode 100644
index 0000000000..6c5564c4fa
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/hash-pkey.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_HASH_PKEY_H
+#define _ASM_POWERPC_BOOK3S_64_HASH_PKEY_H
+
+/*  We use key 3 for KERNEL */
+#define HASH_DEFAULT_KERNEL_KEY (HPTE_R_KEY_BIT0 | HPTE_R_KEY_BIT1)
+
+static inline u64 hash__vmflag_to_pte_pkey_bits(u64 vm_flags)
+{
+	return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT0 : 0x0UL) |
+		((vm_flags & VM_PKEY_BIT1) ? H_PTE_PKEY_BIT1 : 0x0UL) |
+		((vm_flags & VM_PKEY_BIT2) ? H_PTE_PKEY_BIT2 : 0x0UL) |
+		((vm_flags & VM_PKEY_BIT3) ? H_PTE_PKEY_BIT3 : 0x0UL) |
+		((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT4 : 0x0UL));
+}
+
+static inline u64 pte_to_hpte_pkey_bits(u64 pteflags, unsigned long flags)
+{
+	unsigned long pte_pkey;
+
+	pte_pkey = (((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL) |
+		    ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) |
+		    ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) |
+		    ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) |
+		    ((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL));
+
+	if (mmu_has_feature(MMU_FTR_KUAP) ||
+	    mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
+		if ((pte_pkey == 0) && (flags & HPTE_USE_KERNEL_KEY))
+			return HASH_DEFAULT_KERNEL_KEY;
+	}
+
+	return pte_pkey;
+}
+
+static inline u16 hash__pte_to_pkey_bits(u64 pteflags)
+{
+	return (((pteflags & H_PTE_PKEY_BIT4) ? 0x10 : 0x0UL) |
+		((pteflags & H_PTE_PKEY_BIT3) ? 0x8 : 0x0UL) |
+		((pteflags & H_PTE_PKEY_BIT2) ? 0x4 : 0x0UL) |
+		((pteflags & H_PTE_PKEY_BIT1) ? 0x2 : 0x0UL) |
+		((pteflags & H_PTE_PKEY_BIT0) ? 0x1 : 0x0UL));
+}
+
+#endif
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
new file mode 100644
index 0000000000..6e70ae5116
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -0,0 +1,276 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_HASH_H
+#define _ASM_POWERPC_BOOK3S_64_HASH_H
+#ifdef __KERNEL__
+
+#include <asm/asm-const.h>
+
+/*
+ * Common bits between 4K and 64K pages in a linux-style PTE.
+ * Additional bits may be defined in pgtable-hash64-*.h
+ *
+ */
+#define H_PTE_NONE_MASK		_PAGE_HPTEFLAGS
+
+#ifdef CONFIG_PPC_64K_PAGES
+#include <asm/book3s/64/hash-64k.h>
+#else
+#include <asm/book3s/64/hash-4k.h>
+#endif
+
+#define H_PTRS_PER_PTE		(1 << H_PTE_INDEX_SIZE)
+#define H_PTRS_PER_PMD		(1 << H_PMD_INDEX_SIZE)
+#define H_PTRS_PER_PUD		(1 << H_PUD_INDEX_SIZE)
+
+/* Bits to set in a PMD/PUD/PGD entry valid bit*/
+#define HASH_PMD_VAL_BITS		(0x8000000000000000UL)
+#define HASH_PUD_VAL_BITS		(0x8000000000000000UL)
+#define HASH_PGD_VAL_BITS		(0x8000000000000000UL)
+
+/*
+ * Size of EA range mapped by our pagetables.
+ */
+#define H_PGTABLE_EADDR_SIZE	(H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + \
+				 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
+#define H_PGTABLE_RANGE		(ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
+/*
+ * Top 2 bits are ignored in page table walk.
+ */
+#define EA_MASK			(~(0xcUL << 60))
+
+/*
+ * We store the slot details in the second half of page table.
+ * Increase the pud level table so that hugetlb ptes can be stored
+ * at pud level.
+ */
+#if defined(CONFIG_HUGETLB_PAGE) &&  defined(CONFIG_PPC_64K_PAGES)
+#define H_PUD_CACHE_INDEX	(H_PUD_INDEX_SIZE + 1)
+#else
+#define H_PUD_CACHE_INDEX	(H_PUD_INDEX_SIZE)
+#endif
+
+/*
+ * +------------------------------+
+ * |                              |
+ * |                              |
+ * |                              |
+ * +------------------------------+  Kernel virtual map end (0xc00e000000000000)
+ * |                              |
+ * |                              |
+ * |      512TB/16TB of vmemmap   |
+ * |                              |
+ * |                              |
+ * +------------------------------+  Kernel vmemmap  start
+ * |                              |
+ * |      512TB/16TB of IO map    |
+ * |                              |
+ * +------------------------------+  Kernel IO map start
+ * |                              |
+ * |      512TB/16TB of vmap      |
+ * |                              |
+ * +------------------------------+  Kernel virt start (0xc008000000000000)
+ * |                              |
+ * |                              |
+ * |                              |
+ * +------------------------------+  Kernel linear (0xc.....)
+ */
+
+#define H_VMALLOC_START		H_KERN_VIRT_START
+#define H_VMALLOC_SIZE		H_KERN_MAP_SIZE
+#define H_VMALLOC_END		(H_VMALLOC_START + H_VMALLOC_SIZE)
+
+#define H_KERN_IO_START		H_VMALLOC_END
+#define H_KERN_IO_SIZE		H_KERN_MAP_SIZE
+#define H_KERN_IO_END		(H_KERN_IO_START + H_KERN_IO_SIZE)
+
+#define H_VMEMMAP_START		H_KERN_IO_END
+#define H_VMEMMAP_SIZE		H_KERN_MAP_SIZE
+#define H_VMEMMAP_END		(H_VMEMMAP_START + H_VMEMMAP_SIZE)
+
+#define NON_LINEAR_REGION_ID(ea)	((((unsigned long)ea - H_KERN_VIRT_START) >> REGION_SHIFT) + 2)
+
+/*
+ * Region IDs
+ */
+#define USER_REGION_ID		0
+#define LINEAR_MAP_REGION_ID	1
+#define VMALLOC_REGION_ID	NON_LINEAR_REGION_ID(H_VMALLOC_START)
+#define IO_REGION_ID		NON_LINEAR_REGION_ID(H_KERN_IO_START)
+#define VMEMMAP_REGION_ID	NON_LINEAR_REGION_ID(H_VMEMMAP_START)
+#define INVALID_REGION_ID	(VMEMMAP_REGION_ID + 1)
+
+/*
+ * Defines the address of the vmemap area, in its own region on
+ * hash table CPUs.
+ */
+
+/* PTEIDX nibble */
+#define _PTEIDX_SECONDARY	0x8
+#define _PTEIDX_GROUP_IX	0x7
+
+#define H_PMD_BAD_BITS		(PTE_TABLE_SIZE-1)
+#define H_PUD_BAD_BITS		(PMD_TABLE_SIZE-1)
+
+#ifndef __ASSEMBLY__
+static inline int get_region_id(unsigned long ea)
+{
+	int region_id;
+	int id = (ea >> 60UL);
+
+	if (id == 0)
+		return USER_REGION_ID;
+
+	if (id != (PAGE_OFFSET >> 60))
+		return INVALID_REGION_ID;
+
+	if (ea < H_KERN_VIRT_START)
+		return LINEAR_MAP_REGION_ID;
+
+	BUILD_BUG_ON(NON_LINEAR_REGION_ID(H_VMALLOC_START) != 2);
+
+	region_id = NON_LINEAR_REGION_ID(ea);
+	return region_id;
+}
+
+static inline int hash__pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+	return (((pmd_raw(pmd_a) ^ pmd_raw(pmd_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0);
+}
+
+#define	hash__pmd_bad(pmd)		(pmd_val(pmd) & H_PMD_BAD_BITS)
+
+/*
+ * pud comparison that will work with both pte and page table pointer.
+ */
+static inline int hash__pud_same(pud_t pud_a, pud_t pud_b)
+{
+	return (((pud_raw(pud_a) ^ pud_raw(pud_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0);
+}
+#define	hash__pud_bad(pud)		(pud_val(pud) & H_PUD_BAD_BITS)
+
+static inline int hash__p4d_bad(p4d_t p4d)
+{
+	return (p4d_val(p4d) == 0);
+}
+#ifdef CONFIG_STRICT_KERNEL_RWX
+extern void hash__mark_rodata_ro(void);
+extern void hash__mark_initmem_nx(void);
+#endif
+
+extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
+			    pte_t *ptep, unsigned long pte, int huge);
+unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags);
+/* Atomic PTE updates */
+static inline unsigned long hash__pte_update(struct mm_struct *mm,
+					 unsigned long addr,
+					 pte_t *ptep, unsigned long clr,
+					 unsigned long set,
+					 int huge)
+{
+	__be64 old_be, tmp_be;
+	unsigned long old;
+
+	__asm__ __volatile__(
+	"1:	ldarx	%0,0,%3		# pte_update\n\
+	and.	%1,%0,%6\n\
+	bne-	1b \n\
+	andc	%1,%0,%4 \n\
+	or	%1,%1,%7\n\
+	stdcx.	%1,0,%3 \n\
+	bne-	1b"
+	: "=&r" (old_be), "=&r" (tmp_be), "=m" (*ptep)
+	: "r" (ptep), "r" (cpu_to_be64(clr)), "m" (*ptep),
+	  "r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set))
+	: "cc" );
+	/* huge pages use the old page table lock */
+	if (!huge)
+		assert_pte_locked(mm, addr);
+
+	old = be64_to_cpu(old_be);
+	if (old & H_PAGE_HASHPTE)
+		hpte_need_flush(mm, addr, ptep, old, huge);
+
+	return old;
+}
+
+/* Set the dirty and/or accessed bits atomically in a linux PTE, this
+ * function doesn't need to flush the hash entry
+ */
+static inline void hash__ptep_set_access_flags(pte_t *ptep, pte_t entry)
+{
+	__be64 old, tmp, val, mask;
+
+	mask = cpu_to_be64(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_READ | _PAGE_WRITE |
+			   _PAGE_EXEC | _PAGE_SOFT_DIRTY);
+
+	val = pte_raw(entry) & mask;
+
+	__asm__ __volatile__(
+	"1:	ldarx	%0,0,%4\n\
+		and.	%1,%0,%6\n\
+		bne-	1b \n\
+		or	%0,%3,%0\n\
+		stdcx.	%0,0,%4\n\
+		bne-	1b"
+	:"=&r" (old), "=&r" (tmp), "=m" (*ptep)
+	:"r" (val), "r" (ptep), "m" (*ptep), "r" (cpu_to_be64(H_PAGE_BUSY))
+	:"cc");
+}
+
+static inline int hash__pte_same(pte_t pte_a, pte_t pte_b)
+{
+	return (((pte_raw(pte_a) ^ pte_raw(pte_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0);
+}
+
+static inline int hash__pte_none(pte_t pte)
+{
+	return (pte_val(pte) & ~H_PTE_NONE_MASK) == 0;
+}
+
+unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift,
+		int ssize, real_pte_t rpte, unsigned int subpg_index);
+
+/* This low level function performs the actual PTE insertion
+ * Setting the PTE depends on the MMU type and other factors. It's
+ * an horrible mess that I'm not going to try to clean up now but
+ * I'm keeping it in one place rather than spread around
+ */
+static inline void hash__set_pte_at(struct mm_struct *mm, unsigned long addr,
+				  pte_t *ptep, pte_t pte, int percpu)
+{
+	/*
+	 * Anything else just stores the PTE normally. That covers all 64-bit
+	 * cases, and 32-bit non-hash with 32-bit PTEs.
+	 */
+	*ptep = pte;
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
+				   pmd_t *pmdp, unsigned long old_pmd);
+#else
+static inline void hpte_do_hugepage_flush(struct mm_struct *mm,
+					  unsigned long addr, pmd_t *pmdp,
+					  unsigned long old_pmd)
+{
+	WARN(1, "%s called with THP disabled\n", __func__);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+
+int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot);
+extern int __meminit hash__vmemmap_create_mapping(unsigned long start,
+					      unsigned long page_size,
+					      unsigned long phys);
+extern void hash__vmemmap_remove_mapping(unsigned long start,
+				     unsigned long page_size);
+
+int hash__create_section_mapping(unsigned long start, unsigned long end,
+				 int nid, pgprot_t prot);
+int hash__remove_section_mapping(unsigned long start, unsigned long end);
+
+void hash__kernel_map_pages(struct page *page, int numpages, int enable);
+
+#endif /* !__ASSEMBLY__ */
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
new file mode 100644
index 0000000000..aa1c67c8bf
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_HUGETLB_H
+#define _ASM_POWERPC_BOOK3S_64_HUGETLB_H
+
+#include <asm/firmware.h>
+
+/*
+ * For radix we want generic code to handle hugetlb. But then if we want
+ * both hash and radix to be enabled together we need to workaround the
+ * limitations.
+ */
+void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+
+extern void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+						unsigned long addr, pte_t *ptep,
+						pte_t old_pte, pte_t pte);
+
+static inline int hstate_get_psize(struct hstate *hstate)
+{
+	unsigned long shift;
+
+	shift = huge_page_shift(hstate);
+	if (shift == mmu_psize_defs[MMU_PAGE_2M].shift)
+		return MMU_PAGE_2M;
+	else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
+		return MMU_PAGE_1G;
+	else if (shift == mmu_psize_defs[MMU_PAGE_16M].shift)
+		return MMU_PAGE_16M;
+	else if (shift == mmu_psize_defs[MMU_PAGE_16G].shift)
+		return MMU_PAGE_16G;
+	else {
+		WARN(1, "Wrong huge page shift\n");
+		return mmu_virtual_psize;
+	}
+}
+
+#define __HAVE_ARCH_GIGANTIC_PAGE_RUNTIME_SUPPORTED
+static inline bool gigantic_page_runtime_supported(void)
+{
+	/*
+	 * We used gigantic page reservation with hypervisor assist in some case.
+	 * We cannot use runtime allocation of gigantic pages in those platforms
+	 * This is hash translation mode LPARs.
+	 */
+	if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled())
+		return false;
+
+	return true;
+}
+
+/* hugepd entry valid bit */
+#define HUGEPD_VAL_BITS		(0x8000000000000000UL)
+
+#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
+extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep);
+
+#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit
+extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep,
+					 pte_t old_pte, pte_t new_pte);
+/*
+ * This should work for other subarchs too. But right now we use the
+ * new format only for 64bit book3s
+ */
+static inline pte_t *hugepd_page(hugepd_t hpd)
+{
+	BUG_ON(!hugepd_ok(hpd));
+	/*
+	 * We have only four bits to encode, MMU page size
+	 */
+	BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf);
+	return __va(hpd_val(hpd) & HUGEPD_ADDR_MASK);
+}
+
+static inline unsigned int hugepd_mmu_psize(hugepd_t hpd)
+{
+	return (hpd_val(hpd) & HUGEPD_SHIFT_MASK) >> 2;
+}
+
+static inline unsigned int hugepd_shift(hugepd_t hpd)
+{
+	return mmu_psize_to_shift(hugepd_mmu_psize(hpd));
+}
+static inline void flush_hugetlb_page(struct vm_area_struct *vma,
+				      unsigned long vmaddr)
+{
+	if (radix_enabled())
+		return radix__flush_hugetlb_page(vma, vmaddr);
+}
+
+static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
+				    unsigned int pdshift)
+{
+	unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd);
+
+	return hugepd_page(hpd) + idx;
+}
+
+static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift)
+{
+	*hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS | (shift_to_mmu_psize(pshift) << 2));
+}
+
+void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+
+static inline int check_and_get_huge_psize(int shift)
+{
+	int mmu_psize;
+
+	if (shift > SLICE_HIGH_SHIFT)
+		return -EINVAL;
+
+	mmu_psize = shift_to_mmu_psize(shift);
+
+	/*
+	 * We need to make sure that for different page sizes reported by
+	 * firmware we only add hugetlb support for page sizes that can be
+	 * supported by linux page table layout.
+	 * For now we have
+	 * Radix: 2M and 1G
+	 * Hash: 16M and 16G
+	 */
+	if (radix_enabled()) {
+		if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G)
+			return -EINVAL;
+	} else {
+		if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
+			return -EINVAL;
+	}
+	return mmu_psize;
+}
+
+#endif
diff --git a/arch/powerpc/include/asm/book3s/64/kexec.h b/arch/powerpc/include/asm/book3s/64/kexec.h
new file mode 100644
index 0000000000..df37a76c1e
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/kexec.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_POWERPC_BOOK3S_64_KEXEC_H_
+#define _ASM_POWERPC_BOOK3S_64_KEXEC_H_
+
+#include <asm/plpar_wrappers.h>
+
+#define reset_sprs reset_sprs
+static inline void reset_sprs(void)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+		mtspr(SPRN_AMR, 0);
+		mtspr(SPRN_UAMOR, 0);
+	}
+
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		mtspr(SPRN_IAMR, 0);
+		if (cpu_has_feature(CPU_FTR_HVMODE))
+			mtspr(SPRN_CIABR, 0);
+		else
+			plpar_set_ciabr(0);
+	}
+
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		mtspr(SPRN_DEXCR, 0);
+		mtspr(SPRN_HASHKEYR, 0);
+	}
+
+	/*  Do we need isync()? We are going via a kexec reset */
+	isync();
+}
+
+#endif
diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h
new file mode 100644
index 0000000000..497a7bd31e
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -0,0 +1,418 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_KUP_H
+#define _ASM_POWERPC_BOOK3S_64_KUP_H
+
+#include <linux/const.h>
+#include <asm/reg.h>
+
+#define AMR_KUAP_BLOCK_READ	UL(0x5455555555555555)
+#define AMR_KUAP_BLOCK_WRITE	UL(0xa8aaaaaaaaaaaaaa)
+#define AMR_KUEP_BLOCKED	UL(0x5455555555555555)
+#define AMR_KUAP_BLOCKED	(AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE)
+
+#ifdef __ASSEMBLY__
+
+.macro kuap_user_restore gpr1, gpr2
+#if defined(CONFIG_PPC_PKEY)
+	BEGIN_MMU_FTR_SECTION_NESTED(67)
+	b	100f  // skip_restore_amr
+	END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY, 67)
+	/*
+	 * AMR and IAMR are going to be different when
+	 * returning to userspace.
+	 */
+	ld	\gpr1, STACK_REGS_AMR(r1)
+
+	/*
+	 * If kuap feature is not enabled, do the mtspr
+	 * only if AMR value is different.
+	 */
+	BEGIN_MMU_FTR_SECTION_NESTED(68)
+	mfspr	\gpr2, SPRN_AMR
+	cmpd	\gpr1, \gpr2
+	beq	99f
+	END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_KUAP, 68)
+
+	isync
+	mtspr	SPRN_AMR, \gpr1
+99:
+	/*
+	 * Restore IAMR only when returning to userspace
+	 */
+	ld	\gpr1, STACK_REGS_IAMR(r1)
+
+	/*
+	 * If kuep feature is not enabled, do the mtspr
+	 * only if IAMR value is different.
+	 */
+	BEGIN_MMU_FTR_SECTION_NESTED(69)
+	mfspr	\gpr2, SPRN_IAMR
+	cmpd	\gpr1, \gpr2
+	beq	100f
+	END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_BOOK3S_KUEP, 69)
+
+	isync
+	mtspr	SPRN_IAMR, \gpr1
+
+100: //skip_restore_amr
+	/* No isync required, see kuap_user_restore() */
+#endif
+.endm
+
+.macro kuap_kernel_restore gpr1, gpr2
+#if defined(CONFIG_PPC_PKEY)
+
+	BEGIN_MMU_FTR_SECTION_NESTED(67)
+	/*
+	 * AMR is going to be mostly the same since we are
+	 * returning to the kernel. Compare and do a mtspr.
+	 */
+	ld	\gpr2, STACK_REGS_AMR(r1)
+	mfspr	\gpr1, SPRN_AMR
+	cmpd	\gpr1, \gpr2
+	beq	100f
+	isync
+	mtspr	SPRN_AMR, \gpr2
+	/*
+	 * No isync required, see kuap_restore_amr()
+	 * No need to restore IAMR when returning to kernel space.
+	 */
+100:
+	END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_KUAP, 67)
+#endif
+.endm
+
+#ifdef CONFIG_PPC_KUAP
+.macro kuap_check_amr gpr1, gpr2
+#ifdef CONFIG_PPC_KUAP_DEBUG
+	BEGIN_MMU_FTR_SECTION_NESTED(67)
+	mfspr	\gpr1, SPRN_AMR
+	/* Prevent access to userspace using any key values */
+	LOAD_REG_IMMEDIATE(\gpr2, AMR_KUAP_BLOCKED)
+999:	tdne	\gpr1, \gpr2
+	EMIT_WARN_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
+	END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_KUAP, 67)
+#endif
+.endm
+#endif
+
+/*
+ *	if (pkey) {
+ *
+ *		save AMR -> stack;
+ *		if (kuap) {
+ *			if (AMR != BLOCKED)
+ *				KUAP_BLOCKED -> AMR;
+ *		}
+ *		if (from_user) {
+ *			save IAMR -> stack;
+ *			if (kuep) {
+ *				KUEP_BLOCKED ->IAMR
+ *			}
+ *		}
+ *		return;
+ *	}
+ *
+ *	if (kuap) {
+ *		if (from_kernel) {
+ *			save AMR -> stack;
+ *			if (AMR != BLOCKED)
+ *				KUAP_BLOCKED -> AMR;
+ *		}
+ *
+ *	}
+ */
+.macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr
+#if defined(CONFIG_PPC_PKEY)
+
+	/*
+	 * if both pkey and kuap is disabled, nothing to do
+	 */
+	BEGIN_MMU_FTR_SECTION_NESTED(68)
+	b	100f  // skip_save_amr
+	END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY | MMU_FTR_KUAP, 68)
+
+	/*
+	 * if pkey is disabled and we are entering from userspace
+	 * don't do anything.
+	 */
+	BEGIN_MMU_FTR_SECTION_NESTED(67)
+	.ifnb \msr_pr_cr
+	/*
+	 * Without pkey we are not changing AMR outside the kernel
+	 * hence skip this completely.
+	 */
+	bne	\msr_pr_cr, 100f  // from userspace
+	.endif
+        END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY, 67)
+
+	/*
+	 * pkey is enabled or pkey is disabled but entering from kernel
+	 */
+	mfspr	\gpr1, SPRN_AMR
+	std	\gpr1, STACK_REGS_AMR(r1)
+
+	/*
+	 * update kernel AMR with AMR_KUAP_BLOCKED only
+	 * if KUAP feature is enabled
+	 */
+	BEGIN_MMU_FTR_SECTION_NESTED(69)
+	LOAD_REG_IMMEDIATE(\gpr2, AMR_KUAP_BLOCKED)
+	cmpd	\use_cr, \gpr1, \gpr2
+	beq	\use_cr, 102f
+	/*
+	 * We don't isync here because we very recently entered via an interrupt
+	 */
+	mtspr	SPRN_AMR, \gpr2
+	isync
+102:
+	END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_KUAP, 69)
+
+	/*
+	 * if entering from kernel we don't need save IAMR
+	 */
+	.ifnb \msr_pr_cr
+	beq	\msr_pr_cr, 100f // from kernel space
+	mfspr	\gpr1, SPRN_IAMR
+	std	\gpr1, STACK_REGS_IAMR(r1)
+
+	/*
+	 * update kernel IAMR with AMR_KUEP_BLOCKED only
+	 * if KUEP feature is enabled
+	 */
+	BEGIN_MMU_FTR_SECTION_NESTED(70)
+	LOAD_REG_IMMEDIATE(\gpr2, AMR_KUEP_BLOCKED)
+	mtspr	SPRN_IAMR, \gpr2
+	isync
+	END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUEP, 70)
+	.endif
+
+100: // skip_save_amr
+#endif
+.endm
+
+#else /* !__ASSEMBLY__ */
+
+#include <linux/jump_label.h>
+#include <linux/sched.h>
+
+DECLARE_STATIC_KEY_FALSE(uaccess_flush_key);
+
+#ifdef CONFIG_PPC_PKEY
+
+extern u64 __ro_after_init default_uamor;
+extern u64 __ro_after_init default_amr;
+extern u64 __ro_after_init default_iamr;
+
+#include <asm/mmu.h>
+#include <asm/ptrace.h>
+
+/* usage of kthread_use_mm() should inherit the
+ * AMR value of the operating address space. But, the AMR value is
+ * thread-specific and we inherit the address space and not thread
+ * access restrictions. Because of this ignore AMR value when accessing
+ * userspace via kernel thread.
+ */
+static __always_inline u64 current_thread_amr(void)
+{
+	if (current->thread.regs)
+		return current->thread.regs->amr;
+	return default_amr;
+}
+
+static __always_inline u64 current_thread_iamr(void)
+{
+	if (current->thread.regs)
+		return current->thread.regs->iamr;
+	return default_iamr;
+}
+#endif /* CONFIG_PPC_PKEY */
+
+#ifdef CONFIG_PPC_KUAP
+
+static __always_inline void kuap_user_restore(struct pt_regs *regs)
+{
+	bool restore_amr = false, restore_iamr = false;
+	unsigned long amr, iamr;
+
+	if (!mmu_has_feature(MMU_FTR_PKEY))
+		return;
+
+	if (!mmu_has_feature(MMU_FTR_KUAP)) {
+		amr = mfspr(SPRN_AMR);
+		if (amr != regs->amr)
+			restore_amr = true;
+	} else {
+		restore_amr = true;
+	}
+
+	if (!mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
+		iamr = mfspr(SPRN_IAMR);
+		if (iamr != regs->iamr)
+			restore_iamr = true;
+	} else {
+		restore_iamr = true;
+	}
+
+
+	if (restore_amr || restore_iamr) {
+		isync();
+		if (restore_amr)
+			mtspr(SPRN_AMR, regs->amr);
+		if (restore_iamr)
+			mtspr(SPRN_IAMR, regs->iamr);
+	}
+	/*
+	 * No isync required here because we are about to rfi
+	 * back to previous context before any user accesses
+	 * would be made, which is a CSI.
+	 */
+}
+
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr)
+{
+	if (likely(regs->amr == amr))
+		return;
+
+	isync();
+	mtspr(SPRN_AMR, regs->amr);
+	/*
+	 * No isync required here because we are about to rfi
+	 * back to previous context before any user accesses
+	 * would be made, which is a CSI.
+	 *
+	 * No need to restore IAMR when returning to kernel space.
+	 */
+}
+
+static __always_inline unsigned long __kuap_get_and_assert_locked(void)
+{
+	unsigned long amr = mfspr(SPRN_AMR);
+
+	if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) /* kuap_check_amr() */
+		WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED);
+	return amr;
+}
+#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked
+
+/* __kuap_lock() not required, book3s/64 does that in ASM */
+
+/*
+ * We support individually allowing read or write, but we don't support nesting
+ * because that would require an expensive read/modify write of the AMR.
+ */
+
+static __always_inline unsigned long get_kuap(void)
+{
+	/*
+	 * We return AMR_KUAP_BLOCKED when we don't support KUAP because
+	 * prevent_user_access_return needs to return AMR_KUAP_BLOCKED to
+	 * cause restore_user_access to do a flush.
+	 *
+	 * This has no effect in terms of actually blocking things on hash,
+	 * so it doesn't break anything.
+	 */
+	if (!mmu_has_feature(MMU_FTR_KUAP))
+		return AMR_KUAP_BLOCKED;
+
+	return mfspr(SPRN_AMR);
+}
+
+static __always_inline void set_kuap(unsigned long value)
+{
+	if (!mmu_has_feature(MMU_FTR_KUAP))
+		return;
+
+	/*
+	 * ISA v3.0B says we need a CSI (Context Synchronising Instruction) both
+	 * before and after the move to AMR. See table 6 on page 1134.
+	 */
+	isync();
+	mtspr(SPRN_AMR, value);
+	isync();
+}
+
+static __always_inline bool
+__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+	/*
+	 * For radix this will be a storage protection fault (DSISR_PROTFAULT).
+	 * For hash this will be a key fault (DSISR_KEYFAULT)
+	 */
+	/*
+	 * We do have exception table entry, but accessing the
+	 * userspace results in fault.  This could be because we
+	 * didn't unlock the AMR or access is denied by userspace
+	 * using a key value that blocks access. We are only interested
+	 * in catching the use case of accessing without unlocking
+	 * the AMR. Hence check for BLOCK_WRITE/READ against AMR.
+	 */
+	if (is_write) {
+		return (regs->amr & AMR_KUAP_BLOCK_WRITE) == AMR_KUAP_BLOCK_WRITE;
+	}
+	return (regs->amr & AMR_KUAP_BLOCK_READ) == AMR_KUAP_BLOCK_READ;
+}
+
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+					      unsigned long size, unsigned long dir)
+{
+	unsigned long thread_amr = 0;
+
+	// This is written so we can resolve to a single case at build time
+	BUILD_BUG_ON(!__builtin_constant_p(dir));
+
+	if (mmu_has_feature(MMU_FTR_PKEY))
+		thread_amr = current_thread_amr();
+
+	if (dir == KUAP_READ)
+		set_kuap(thread_amr | AMR_KUAP_BLOCK_WRITE);
+	else if (dir == KUAP_WRITE)
+		set_kuap(thread_amr | AMR_KUAP_BLOCK_READ);
+	else if (dir == KUAP_READ_WRITE)
+		set_kuap(thread_amr);
+	else
+		BUILD_BUG();
+}
+
+#else /* CONFIG_PPC_KUAP */
+
+static __always_inline unsigned long get_kuap(void)
+{
+	return AMR_KUAP_BLOCKED;
+}
+
+static __always_inline void set_kuap(unsigned long value) { }
+
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+					      unsigned long size, unsigned long dir)
+{ }
+
+#endif /* !CONFIG_PPC_KUAP */
+
+static __always_inline void prevent_user_access(unsigned long dir)
+{
+	set_kuap(AMR_KUAP_BLOCKED);
+	if (static_branch_unlikely(&uaccess_flush_key))
+		do_uaccess_flush();
+}
+
+static __always_inline unsigned long prevent_user_access_return(void)
+{
+	unsigned long flags = get_kuap();
+
+	set_kuap(AMR_KUAP_BLOCKED);
+	if (static_branch_unlikely(&uaccess_flush_key))
+		do_uaccess_flush();
+
+	return flags;
+}
+
+static __always_inline void restore_user_access(unsigned long flags)
+{
+	set_kuap(flags);
+	if (static_branch_unlikely(&uaccess_flush_key) && flags == AMR_KUAP_BLOCKED)
+		do_uaccess_flush();
+}
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_KUP_H */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
new file mode 100644
index 0000000000..1c4eebbc69
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -0,0 +1,885 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_
+#define _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_
+/*
+ * PowerPC64 memory management structures
+ *
+ * Dave Engebretsen & Mike Corrigan <{engebret|mikejc}@us.ibm.com>
+ *   PPC64 rework.
+ */
+
+#include <asm/page.h>
+#include <asm/bug.h>
+#include <asm/asm-const.h>
+
+/*
+ * This is necessary to get the definition of PGTABLE_RANGE which we
+ * need for various slices related matters. Note that this isn't the
+ * complete pgtable.h but only a portion of it.
+ */
+#include <asm/book3s/64/pgtable.h>
+#include <asm/book3s/64/slice.h>
+#include <asm/task_size_64.h>
+#include <asm/cpu_has_feature.h>
+
+/*
+ * SLB
+ */
+
+#define SLB_NUM_BOLTED		2
+#define SLB_CACHE_ENTRIES	8
+#define SLB_MIN_SIZE		32
+
+/* Bits in the SLB ESID word */
+#define SLB_ESID_V		ASM_CONST(0x0000000008000000) /* valid */
+
+/* Bits in the SLB VSID word */
+#define SLB_VSID_SHIFT		12
+#define SLB_VSID_SHIFT_256M	SLB_VSID_SHIFT
+#define SLB_VSID_SHIFT_1T	24
+#define SLB_VSID_SSIZE_SHIFT	62
+#define SLB_VSID_B		ASM_CONST(0xc000000000000000)
+#define SLB_VSID_B_256M		ASM_CONST(0x0000000000000000)
+#define SLB_VSID_B_1T		ASM_CONST(0x4000000000000000)
+#define SLB_VSID_KS		ASM_CONST(0x0000000000000800)
+#define SLB_VSID_KP		ASM_CONST(0x0000000000000400)
+#define SLB_VSID_N		ASM_CONST(0x0000000000000200) /* no-execute */
+#define SLB_VSID_L		ASM_CONST(0x0000000000000100)
+#define SLB_VSID_C		ASM_CONST(0x0000000000000080) /* class */
+#define SLB_VSID_LP		ASM_CONST(0x0000000000000030)
+#define SLB_VSID_LP_00		ASM_CONST(0x0000000000000000)
+#define SLB_VSID_LP_01		ASM_CONST(0x0000000000000010)
+#define SLB_VSID_LP_10		ASM_CONST(0x0000000000000020)
+#define SLB_VSID_LP_11		ASM_CONST(0x0000000000000030)
+#define SLB_VSID_LLP		(SLB_VSID_L|SLB_VSID_LP)
+
+#define SLB_VSID_KERNEL		(SLB_VSID_KP)
+#define SLB_VSID_USER		(SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C)
+
+#define SLBIE_C			(0x08000000)
+#define SLBIE_SSIZE_SHIFT	25
+
+/*
+ * Hash table
+ */
+
+#define HPTES_PER_GROUP 8
+
+#define HPTE_V_SSIZE_SHIFT	62
+#define HPTE_V_AVPN_SHIFT	7
+#define HPTE_V_COMMON_BITS	ASM_CONST(0x000fffffffffffff)
+#define HPTE_V_AVPN		ASM_CONST(0x3fffffffffffff80)
+#define HPTE_V_AVPN_3_0		ASM_CONST(0x000fffffffffff80)
+#define HPTE_V_AVPN_VAL(x)	(((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT)
+#define HPTE_V_COMPARE(x,y)	(!(((x) ^ (y)) & 0xffffffffffffff80UL))
+#define HPTE_V_BOLTED		ASM_CONST(0x0000000000000010)
+#define HPTE_V_LOCK		ASM_CONST(0x0000000000000008)
+#define HPTE_V_LARGE		ASM_CONST(0x0000000000000004)
+#define HPTE_V_SECONDARY	ASM_CONST(0x0000000000000002)
+#define HPTE_V_VALID		ASM_CONST(0x0000000000000001)
+
+/*
+ * ISA 3.0 has a different HPTE format.
+ */
+#define HPTE_R_3_0_SSIZE_SHIFT	58
+#define HPTE_R_3_0_SSIZE_MASK	(3ull << HPTE_R_3_0_SSIZE_SHIFT)
+#define HPTE_R_PP0		ASM_CONST(0x8000000000000000)
+#define HPTE_R_TS		ASM_CONST(0x4000000000000000)
+#define HPTE_R_KEY_HI		ASM_CONST(0x3000000000000000)
+#define HPTE_R_KEY_BIT4		ASM_CONST(0x2000000000000000)
+#define HPTE_R_KEY_BIT3		ASM_CONST(0x1000000000000000)
+#define HPTE_R_RPN_SHIFT	12
+#define HPTE_R_RPN		ASM_CONST(0x0ffffffffffff000)
+#define HPTE_R_RPN_3_0		ASM_CONST(0x01fffffffffff000)
+#define HPTE_R_PP		ASM_CONST(0x0000000000000003)
+#define HPTE_R_PPP		ASM_CONST(0x8000000000000003)
+#define HPTE_R_N		ASM_CONST(0x0000000000000004)
+#define HPTE_R_G		ASM_CONST(0x0000000000000008)
+#define HPTE_R_M		ASM_CONST(0x0000000000000010)
+#define HPTE_R_I		ASM_CONST(0x0000000000000020)
+#define HPTE_R_W		ASM_CONST(0x0000000000000040)
+#define HPTE_R_WIMG		ASM_CONST(0x0000000000000078)
+#define HPTE_R_C		ASM_CONST(0x0000000000000080)
+#define HPTE_R_R		ASM_CONST(0x0000000000000100)
+#define HPTE_R_KEY_LO		ASM_CONST(0x0000000000000e00)
+#define HPTE_R_KEY_BIT2		ASM_CONST(0x0000000000000800)
+#define HPTE_R_KEY_BIT1		ASM_CONST(0x0000000000000400)
+#define HPTE_R_KEY_BIT0		ASM_CONST(0x0000000000000200)
+#define HPTE_R_KEY		(HPTE_R_KEY_LO | HPTE_R_KEY_HI)
+
+#define HPTE_V_1TB_SEG		ASM_CONST(0x4000000000000000)
+#define HPTE_V_VRMA_MASK	ASM_CONST(0x4001ffffff000000)
+
+/* Values for PP (assumes Ks=0, Kp=1) */
+#define PP_RWXX	0	/* Supervisor read/write, User none */
+#define PP_RWRX 1	/* Supervisor read/write, User read */
+#define PP_RWRW 2	/* Supervisor read/write, User read/write */
+#define PP_RXRX 3	/* Supervisor read,       User read */
+#define PP_RXXX	(HPTE_R_PP0 | 2)	/* Supervisor read, user none */
+
+/* Fields for tlbiel instruction in architecture 2.06 */
+#define TLBIEL_INVAL_SEL_MASK	0xc00	/* invalidation selector */
+#define  TLBIEL_INVAL_PAGE	0x000	/* invalidate a single page */
+#define  TLBIEL_INVAL_SET_LPID	0x800	/* invalidate a set for current LPID */
+#define  TLBIEL_INVAL_SET	0xc00	/* invalidate a set for all LPIDs */
+#define TLBIEL_INVAL_SET_MASK	0xfff000	/* set number to inval. */
+#define TLBIEL_INVAL_SET_SHIFT	12
+
+#define POWER7_TLB_SETS		128	/* # sets in POWER7 TLB */
+#define POWER8_TLB_SETS		512	/* # sets in POWER8 TLB */
+#define POWER9_TLB_SETS_HASH	256	/* # sets in POWER9 TLB Hash mode */
+#define POWER9_TLB_SETS_RADIX	128	/* # sets in POWER9 TLB Radix mode */
+
+#ifndef __ASSEMBLY__
+
+struct mmu_hash_ops {
+	void            (*hpte_invalidate)(unsigned long slot,
+					   unsigned long vpn,
+					   int bpsize, int apsize,
+					   int ssize, int local);
+	long		(*hpte_updatepp)(unsigned long slot,
+					 unsigned long newpp,
+					 unsigned long vpn,
+					 int bpsize, int apsize,
+					 int ssize, unsigned long flags);
+	void            (*hpte_updateboltedpp)(unsigned long newpp,
+					       unsigned long ea,
+					       int psize, int ssize);
+	long		(*hpte_insert)(unsigned long hpte_group,
+				       unsigned long vpn,
+				       unsigned long prpn,
+				       unsigned long rflags,
+				       unsigned long vflags,
+				       int psize, int apsize,
+				       int ssize);
+	long		(*hpte_remove)(unsigned long hpte_group);
+	int             (*hpte_removebolted)(unsigned long ea,
+					     int psize, int ssize);
+	void		(*flush_hash_range)(unsigned long number, int local);
+	void		(*hugepage_invalidate)(unsigned long vsid,
+					       unsigned long addr,
+					       unsigned char *hpte_slot_array,
+					       int psize, int ssize, int local);
+	int		(*resize_hpt)(unsigned long shift);
+	/*
+	 * Special for kexec.
+	 * To be called in real mode with interrupts disabled. No locks are
+	 * taken as such, concurrent access on pre POWER5 hardware could result
+	 * in a deadlock.
+	 * The linear mapping is destroyed as well.
+	 */
+	void		(*hpte_clear_all)(void);
+};
+extern struct mmu_hash_ops mmu_hash_ops;
+
+struct hash_pte {
+	__be64 v;
+	__be64 r;
+};
+
+extern struct hash_pte *htab_address;
+extern unsigned long htab_size_bytes;
+extern unsigned long htab_hash_mask;
+
+
+static inline int shift_to_mmu_psize(unsigned int shift)
+{
+	int psize;
+
+	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize)
+		if (mmu_psize_defs[psize].shift == shift)
+			return psize;
+	return -1;
+}
+
+static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
+{
+	if (mmu_psize_defs[mmu_psize].shift)
+		return mmu_psize_defs[mmu_psize].shift;
+	BUG();
+}
+
+static inline unsigned int ap_to_shift(unsigned long ap)
+{
+	int psize;
+
+	for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+		if (mmu_psize_defs[psize].ap == ap)
+			return mmu_psize_defs[psize].shift;
+	}
+
+	return -1;
+}
+
+static inline unsigned long get_sllp_encoding(int psize)
+{
+	unsigned long sllp;
+
+	sllp = ((mmu_psize_defs[psize].sllp & SLB_VSID_L) >> 6) |
+		((mmu_psize_defs[psize].sllp & SLB_VSID_LP) >> 4);
+	return sllp;
+}
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Segment sizes.
+ * These are the values used by hardware in the B field of
+ * SLB entries and the first dword of MMU hashtable entries.
+ * The B field is 2 bits; the values 2 and 3 are unused and reserved.
+ */
+#define MMU_SEGSIZE_256M	0
+#define MMU_SEGSIZE_1T		1
+
+/*
+ * encode page number shift.
+ * in order to fit the 78 bit va in a 64 bit variable we shift the va by
+ * 12 bits. This enable us to address upto 76 bit va.
+ * For hpt hash from a va we can ignore the page size bits of va and for
+ * hpte encoding we ignore up to 23 bits of va. So ignoring lower 12 bits ensure
+ * we work in all cases including 4k page size.
+ */
+#define VPN_SHIFT	12
+
+/*
+ * HPTE Large Page (LP) details
+ */
+#define LP_SHIFT	12
+#define LP_BITS		8
+#define LP_MASK(i)	((0xFF >> (i)) << LP_SHIFT)
+
+#ifndef __ASSEMBLY__
+
+static inline int slb_vsid_shift(int ssize)
+{
+	if (ssize == MMU_SEGSIZE_256M)
+		return SLB_VSID_SHIFT;
+	return SLB_VSID_SHIFT_1T;
+}
+
+static inline int segment_shift(int ssize)
+{
+	if (ssize == MMU_SEGSIZE_256M)
+		return SID_SHIFT;
+	return SID_SHIFT_1T;
+}
+
+/*
+ * This array is indexed by the LP field of the HPTE second dword.
+ * Since this field may contain some RPN bits, some entries are
+ * replicated so that we get the same value irrespective of RPN.
+ * The top 4 bits are the page size index (MMU_PAGE_*) for the
+ * actual page size, the bottom 4 bits are the base page size.
+ */
+extern u8 hpte_page_sizes[1 << LP_BITS];
+
+static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l,
+					     bool is_base_size)
+{
+	unsigned int i, lp;
+
+	if (!(h & HPTE_V_LARGE))
+		return 1ul << 12;
+
+	/* Look at the 8 bit LP value */
+	lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
+	i = hpte_page_sizes[lp];
+	if (!i)
+		return 0;
+	if (!is_base_size)
+		i >>= 4;
+	return 1ul << mmu_psize_defs[i & 0xf].shift;
+}
+
+static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
+{
+	return __hpte_page_size(h, l, 0);
+}
+
+static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l)
+{
+	return __hpte_page_size(h, l, 1);
+}
+
+/*
+ * The current system page and segment sizes
+ */
+extern int mmu_kernel_ssize;
+extern int mmu_highuser_ssize;
+extern u16 mmu_slb_size;
+extern unsigned long tce_alloc_start, tce_alloc_end;
+
+/*
+ * If the processor supports 64k normal pages but not 64k cache
+ * inhibited pages, we have to be prepared to switch processes
+ * to use 4k pages when they create cache-inhibited mappings.
+ * If this is the case, mmu_ci_restrictions will be set to 1.
+ */
+extern int mmu_ci_restrictions;
+
+/*
+ * This computes the AVPN and B fields of the first dword of a HPTE,
+ * for use when we want to match an existing PTE.  The bottom 7 bits
+ * of the returned value are zero.
+ */
+static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
+					     int ssize)
+{
+	unsigned long v;
+	/*
+	 * The AVA field omits the low-order 23 bits of the 78 bits VA.
+	 * These bits are not needed in the PTE, because the
+	 * low-order b of these bits are part of the byte offset
+	 * into the virtual page and, if b < 23, the high-order
+	 * 23-b of these bits are always used in selecting the
+	 * PTEGs to be searched
+	 */
+	v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm);
+	v <<= HPTE_V_AVPN_SHIFT;
+	v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
+	return v;
+}
+
+/*
+ * ISA v3.0 defines a new HPTE format, which differs from the old
+ * format in having smaller AVPN and ARPN fields, and the B field
+ * in the second dword instead of the first.
+ */
+static inline unsigned long hpte_old_to_new_v(unsigned long v)
+{
+	/* trim AVPN, drop B */
+	return v & HPTE_V_COMMON_BITS;
+}
+
+static inline unsigned long hpte_old_to_new_r(unsigned long v, unsigned long r)
+{
+	/* move B field from 1st to 2nd dword, trim ARPN */
+	return (r & ~HPTE_R_3_0_SSIZE_MASK) |
+		(((v) >> HPTE_V_SSIZE_SHIFT) << HPTE_R_3_0_SSIZE_SHIFT);
+}
+
+static inline unsigned long hpte_new_to_old_v(unsigned long v, unsigned long r)
+{
+	/* insert B field */
+	return (v & HPTE_V_COMMON_BITS) |
+		((r & HPTE_R_3_0_SSIZE_MASK) <<
+		 (HPTE_V_SSIZE_SHIFT - HPTE_R_3_0_SSIZE_SHIFT));
+}
+
+static inline unsigned long hpte_new_to_old_r(unsigned long r)
+{
+	/* clear out B field */
+	return r & ~HPTE_R_3_0_SSIZE_MASK;
+}
+
+static inline unsigned long hpte_get_old_v(struct hash_pte *hptep)
+{
+	unsigned long hpte_v;
+
+	hpte_v = be64_to_cpu(hptep->v);
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
+	return hpte_v;
+}
+
+/*
+ * This function sets the AVPN and L fields of the HPTE  appropriately
+ * using the base page size and actual page size.
+ */
+static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize,
+					  int actual_psize, int ssize)
+{
+	unsigned long v;
+	v = hpte_encode_avpn(vpn, base_psize, ssize);
+	if (actual_psize != MMU_PAGE_4K)
+		v |= HPTE_V_LARGE;
+	return v;
+}
+
+/*
+ * This function sets the ARPN, and LP fields of the HPTE appropriately
+ * for the page size. We assume the pa is already "clean" that is properly
+ * aligned for the requested page size
+ */
+static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize,
+					  int actual_psize)
+{
+	/* A 4K page needs no special encoding */
+	if (actual_psize == MMU_PAGE_4K)
+		return pa & HPTE_R_RPN;
+	else {
+		unsigned int penc = mmu_psize_defs[base_psize].penc[actual_psize];
+		unsigned int shift = mmu_psize_defs[actual_psize].shift;
+		return (pa & ~((1ul << shift) - 1)) | (penc << LP_SHIFT);
+	}
+}
+
+/*
+ * Build a VPN_SHIFT bit shifted va given VSID, EA and segment size.
+ */
+static inline unsigned long hpt_vpn(unsigned long ea,
+				    unsigned long vsid, int ssize)
+{
+	unsigned long mask;
+	int s_shift = segment_shift(ssize);
+
+	mask = (1ul << (s_shift - VPN_SHIFT)) - 1;
+	return (vsid << (s_shift - VPN_SHIFT)) | ((ea >> VPN_SHIFT) & mask);
+}
+
+/*
+ * This hashes a virtual address
+ */
+static inline unsigned long hpt_hash(unsigned long vpn,
+				     unsigned int shift, int ssize)
+{
+	unsigned long mask;
+	unsigned long hash, vsid;
+
+	/* VPN_SHIFT can be atmost 12 */
+	if (ssize == MMU_SEGSIZE_256M) {
+		mask = (1ul << (SID_SHIFT - VPN_SHIFT)) - 1;
+		hash = (vpn >> (SID_SHIFT - VPN_SHIFT)) ^
+			((vpn & mask) >> (shift - VPN_SHIFT));
+	} else {
+		mask = (1ul << (SID_SHIFT_1T - VPN_SHIFT)) - 1;
+		vsid = vpn >> (SID_SHIFT_1T - VPN_SHIFT);
+		hash = vsid ^ (vsid << 25) ^
+			((vpn & mask) >> (shift - VPN_SHIFT)) ;
+	}
+	return hash & 0x7fffffffffUL;
+}
+
+#define HPTE_LOCAL_UPDATE	0x1
+#define HPTE_NOHPTE_UPDATE	0x2
+#define HPTE_USE_KERNEL_KEY	0x4
+
+long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long pa,
+			   unsigned long rlags, unsigned long vflags, int psize, int ssize);
+extern int __hash_page_4K(unsigned long ea, unsigned long access,
+			  unsigned long vsid, pte_t *ptep, unsigned long trap,
+			  unsigned long flags, int ssize, int subpage_prot);
+extern int __hash_page_64K(unsigned long ea, unsigned long access,
+			   unsigned long vsid, pte_t *ptep, unsigned long trap,
+			   unsigned long flags, int ssize);
+struct mm_struct;
+unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap);
+extern int hash_page_mm(struct mm_struct *mm, unsigned long ea,
+			unsigned long access, unsigned long trap,
+			unsigned long flags);
+extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
+		     unsigned long dsisr);
+void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc);
+int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr, unsigned long msr);
+int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
+		     pte_t *ptep, unsigned long trap, unsigned long flags,
+		     int ssize, unsigned int shift, unsigned int mmu_psize);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern int __hash_page_thp(unsigned long ea, unsigned long access,
+			   unsigned long vsid, pmd_t *pmdp, unsigned long trap,
+			   unsigned long flags, int ssize, unsigned int psize);
+#else
+static inline int __hash_page_thp(unsigned long ea, unsigned long access,
+				  unsigned long vsid, pmd_t *pmdp,
+				  unsigned long trap, unsigned long flags,
+				  int ssize, unsigned int psize)
+{
+	BUG();
+	return -1;
+}
+#endif
+extern void hash_failure_debug(unsigned long ea, unsigned long access,
+			       unsigned long vsid, unsigned long trap,
+			       int ssize, int psize, int lpsize,
+			       unsigned long pte);
+extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
+			     unsigned long pstart, unsigned long prot,
+			     int psize, int ssize);
+int htab_remove_mapping(unsigned long vstart, unsigned long vend,
+			int psize, int ssize);
+extern void pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages);
+extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr);
+
+extern void hash__setup_new_exec(void);
+
+#ifdef CONFIG_PPC_PSERIES
+void hpte_init_pseries(void);
+#else
+static inline void hpte_init_pseries(void) { }
+#endif
+
+extern void hpte_init_native(void);
+
+struct slb_entry {
+	u64	esid;
+	u64	vsid;
+};
+
+extern void slb_initialize(void);
+void slb_flush_and_restore_bolted(void);
+void slb_flush_all_realmode(void);
+void __slb_restore_bolted_realmode(void);
+void slb_restore_bolted_realmode(void);
+void slb_save_contents(struct slb_entry *slb_ptr);
+void slb_dump_contents(struct slb_entry *slb_ptr);
+
+extern void slb_vmalloc_update(void);
+void preload_new_slb_context(unsigned long start, unsigned long sp);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+void slb_set_size(u16 size);
+#else
+static inline void slb_set_size(u16 size) { }
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * VSID allocation (256MB segment)
+ *
+ * We first generate a 37-bit "proto-VSID". Proto-VSIDs are generated
+ * from mmu context id and effective segment id of the address.
+ *
+ * For user processes max context id is limited to MAX_USER_CONTEXT.
+ * more details in get_user_context
+ *
+ * For kernel space get_kernel_context
+ *
+ * The proto-VSIDs are then scrambled into real VSIDs with the
+ * multiplicative hash:
+ *
+ *	VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS
+ *
+ * VSID_MULTIPLIER is prime, so in particular it is
+ * co-prime to VSID_MODULUS, making this a 1:1 scrambling function.
+ * Because the modulus is 2^n-1 we can compute it efficiently without
+ * a divide or extra multiply (see below). The scramble function gives
+ * robust scattering in the hash table (at least based on some initial
+ * results).
+ *
+ * We use VSID 0 to indicate an invalid VSID. The means we can't use context id
+ * 0, because a context id of 0 and an EA of 0 gives a proto-VSID of 0, which
+ * will produce a VSID of 0.
+ *
+ * We also need to avoid the last segment of the last context, because that
+ * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
+ * because of the modulo operation in vsid scramble.
+ */
+
+/*
+ * Max Va bits we support as of now is 68 bits. We want 19 bit
+ * context ID.
+ * Restrictions:
+ * GPU has restrictions of not able to access beyond 128TB
+ * (47 bit effective address). We also cannot do more than 20bit PID.
+ * For p4 and p5 which can only do 65 bit VA, we restrict our CONTEXT_BITS
+ * to 16 bits (ie, we can only have 2^16 pids at the same time).
+ */
+#define VA_BITS			68
+#define CONTEXT_BITS		19
+#define ESID_BITS		(VA_BITS - (SID_SHIFT + CONTEXT_BITS))
+#define ESID_BITS_1T		(VA_BITS - (SID_SHIFT_1T + CONTEXT_BITS))
+
+#define ESID_BITS_MASK		((1 << ESID_BITS) - 1)
+#define ESID_BITS_1T_MASK	((1 << ESID_BITS_1T) - 1)
+
+/*
+ * Now certain config support MAX_PHYSMEM more than 512TB. Hence we will need
+ * to use more than one context for linear mapping the kernel.
+ * For vmalloc and memmap, we use just one context with 512TB. With 64 byte
+ * struct page size, we need ony 32 TB in memmap for 2PB (51 bits (MAX_PHYSMEM_BITS)).
+ */
+#if (H_MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT)
+#define MAX_KERNEL_CTX_CNT	(1UL << (H_MAX_PHYSMEM_BITS - MAX_EA_BITS_PER_CONTEXT))
+#else
+#define MAX_KERNEL_CTX_CNT	1
+#endif
+
+#define MAX_VMALLOC_CTX_CNT	1
+#define MAX_IO_CTX_CNT		1
+#define MAX_VMEMMAP_CTX_CNT	1
+
+/*
+ * 256MB segment
+ * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments
+ * available for user + kernel mapping. VSID 0 is reserved as invalid, contexts
+ * 1-4 are used for kernel mapping. Each segment contains 2^28 bytes. Each
+ * context maps 2^49 bytes (512TB).
+ *
+ * We also need to avoid the last segment of the last context, because that
+ * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
+ * because of the modulo operation in vsid scramble.
+ *
+ */
+#define MAX_USER_CONTEXT	((ASM_CONST(1) << CONTEXT_BITS) - 2)
+
+// The + 2 accounts for INVALID_REGION and 1 more to avoid overlap with kernel
+#define MIN_USER_CONTEXT	(MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \
+				 MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT + 2)
+
+/*
+ * For platforms that support on 65bit VA we limit the context bits
+ */
+#define MAX_USER_CONTEXT_65BIT_VA ((ASM_CONST(1) << (65 - (SID_SHIFT + ESID_BITS))) - 2)
+
+/*
+ * This should be computed such that protovosid * vsid_mulitplier
+ * doesn't overflow 64 bits. The vsid_mutliplier should also be
+ * co-prime to vsid_modulus. We also need to make sure that number
+ * of bits in multiplied result (dividend) is less than twice the number of
+ * protovsid bits for our modulus optmization to work.
+ *
+ * The below table shows the current values used.
+ * |-------+------------+----------------------+------------+-------------------|
+ * |       | Prime Bits | proto VSID_BITS_65VA | Total Bits | 2* prot VSID_BITS |
+ * |-------+------------+----------------------+------------+-------------------|
+ * | 1T    |         24 |                   25 |         49 |                50 |
+ * |-------+------------+----------------------+------------+-------------------|
+ * | 256MB |         24 |                   37 |         61 |                74 |
+ * |-------+------------+----------------------+------------+-------------------|
+ *
+ * |-------+------------+----------------------+------------+--------------------|
+ * |       | Prime Bits | proto VSID_BITS_68VA | Total Bits | 2* proto VSID_BITS |
+ * |-------+------------+----------------------+------------+--------------------|
+ * | 1T    |         24 |                   28 |         52 |                 56 |
+ * |-------+------------+----------------------+------------+--------------------|
+ * | 256MB |         24 |                   40 |         64 |                 80 |
+ * |-------+------------+----------------------+------------+--------------------|
+ *
+ */
+#define VSID_MULTIPLIER_256M	ASM_CONST(12538073)	/* 24-bit prime */
+#define VSID_BITS_256M		(VA_BITS - SID_SHIFT)
+#define VSID_BITS_65_256M	(65 - SID_SHIFT)
+/*
+ * Modular multiplicative inverse of VSID_MULTIPLIER under modulo VSID_MODULUS
+ */
+#define VSID_MULINV_256M	ASM_CONST(665548017062)
+
+#define VSID_MULTIPLIER_1T	ASM_CONST(12538073)	/* 24-bit prime */
+#define VSID_BITS_1T		(VA_BITS - SID_SHIFT_1T)
+#define VSID_BITS_65_1T		(65 - SID_SHIFT_1T)
+#define VSID_MULINV_1T		ASM_CONST(209034062)
+
+/* 1TB VSID reserved for VRMA */
+#define VRMA_VSID	0x1ffffffUL
+#define USER_VSID_RANGE	(1UL << (ESID_BITS + SID_SHIFT))
+
+/* 4 bits per slice and we have one slice per 1TB */
+#define SLICE_ARRAY_SIZE	(H_PGTABLE_RANGE >> 41)
+#define LOW_SLICE_ARRAY_SZ	(BITS_PER_LONG / BITS_PER_BYTE)
+#define TASK_SLICE_ARRAY_SZ(x)	((x)->hash_context->slb_addr_limit >> 41)
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+/*
+ * For the sub-page protection option, we extend the PGD with one of
+ * these.  Basically we have a 3-level tree, with the top level being
+ * the protptrs array.  To optimize speed and memory consumption when
+ * only addresses < 4GB are being protected, pointers to the first
+ * four pages of sub-page protection words are stored in the low_prot
+ * array.
+ * Each page of sub-page protection words protects 1GB (4 bytes
+ * protects 64k).  For the 3-level tree, each page of pointers then
+ * protects 8TB.
+ */
+struct subpage_prot_table {
+	unsigned long maxaddr;	/* only addresses < this are protected */
+	unsigned int **protptrs[(TASK_SIZE_USER64 >> 43)];
+	unsigned int *low_prot[4];
+};
+
+#define SBP_L1_BITS		(PAGE_SHIFT - 2)
+#define SBP_L2_BITS		(PAGE_SHIFT - 3)
+#define SBP_L1_COUNT		(1 << SBP_L1_BITS)
+#define SBP_L2_COUNT		(1 << SBP_L2_BITS)
+#define SBP_L2_SHIFT		(PAGE_SHIFT + SBP_L1_BITS)
+#define SBP_L3_SHIFT		(SBP_L2_SHIFT + SBP_L2_BITS)
+
+extern void subpage_prot_free(struct mm_struct *mm);
+#else
+static inline void subpage_prot_free(struct mm_struct *mm) {}
+#endif /* CONFIG_PPC_SUBPAGE_PROT */
+
+/*
+ * One bit per slice. We have lower slices which cover 256MB segments
+ * upto 4G range. That gets us 16 low slices. For the rest we track slices
+ * in 1TB size.
+ */
+struct slice_mask {
+	u64 low_slices;
+	DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
+};
+
+struct hash_mm_context {
+	u16 user_psize; /* page size index */
+
+	/* SLB page size encodings*/
+	unsigned char low_slices_psize[LOW_SLICE_ARRAY_SZ];
+	unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
+	unsigned long slb_addr_limit;
+#ifdef CONFIG_PPC_64K_PAGES
+	struct slice_mask mask_64k;
+#endif
+	struct slice_mask mask_4k;
+#ifdef CONFIG_HUGETLB_PAGE
+	struct slice_mask mask_16m;
+	struct slice_mask mask_16g;
+#endif
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+	struct subpage_prot_table *spt;
+#endif /* CONFIG_PPC_SUBPAGE_PROT */
+};
+
+#if 0
+/*
+ * The code below is equivalent to this function for arguments
+ * < 2^VSID_BITS, which is all this should ever be called
+ * with.  However gcc is not clever enough to compute the
+ * modulus (2^n-1) without a second multiply.
+ */
+#define vsid_scramble(protovsid, size) \
+	((((protovsid) * VSID_MULTIPLIER_##size) % VSID_MODULUS_##size))
+
+/* simplified form avoiding mod operation */
+#define vsid_scramble(protovsid, size) \
+	({								 \
+		unsigned long x;					 \
+		x = (protovsid) * VSID_MULTIPLIER_##size;		 \
+		x = (x >> VSID_BITS_##size) + (x & VSID_MODULUS_##size); \
+		(x + ((x+1) >> VSID_BITS_##size)) & VSID_MODULUS_##size; \
+	})
+
+#else /* 1 */
+static inline unsigned long vsid_scramble(unsigned long protovsid,
+				  unsigned long vsid_multiplier, int vsid_bits)
+{
+	unsigned long vsid;
+	unsigned long vsid_modulus = ((1UL << vsid_bits) - 1);
+	/*
+	 * We have same multipler for both 256 and 1T segements now
+	 */
+	vsid = protovsid * vsid_multiplier;
+	vsid = (vsid >> vsid_bits) + (vsid & vsid_modulus);
+	return (vsid + ((vsid + 1) >> vsid_bits)) & vsid_modulus;
+}
+
+#endif /* 1 */
+
+/* Returns the segment size indicator for a user address */
+static inline int user_segment_size(unsigned long addr)
+{
+	/* Use 1T segments if possible for addresses >= 1T */
+	if (addr >= (1UL << SID_SHIFT_1T))
+		return mmu_highuser_ssize;
+	return MMU_SEGSIZE_256M;
+}
+
+static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
+				     int ssize)
+{
+	unsigned long va_bits = VA_BITS;
+	unsigned long vsid_bits;
+	unsigned long protovsid;
+
+	/*
+	 * Bad address. We return VSID 0 for that
+	 */
+	if ((ea & EA_MASK)  >= H_PGTABLE_RANGE)
+		return 0;
+
+	if (!mmu_has_feature(MMU_FTR_68_BIT_VA))
+		va_bits = 65;
+
+	if (ssize == MMU_SEGSIZE_256M) {
+		vsid_bits = va_bits - SID_SHIFT;
+		protovsid = (context << ESID_BITS) |
+			((ea >> SID_SHIFT) & ESID_BITS_MASK);
+		return vsid_scramble(protovsid, VSID_MULTIPLIER_256M, vsid_bits);
+	}
+	/* 1T segment */
+	vsid_bits = va_bits - SID_SHIFT_1T;
+	protovsid = (context << ESID_BITS_1T) |
+		((ea >> SID_SHIFT_1T) & ESID_BITS_1T_MASK);
+	return vsid_scramble(protovsid, VSID_MULTIPLIER_1T, vsid_bits);
+}
+
+/*
+ * For kernel space, we use context ids as
+ * below. Range is 512TB per context.
+ *
+ * 0x00001 -  [ 0xc000000000000000 - 0xc001ffffffffffff]
+ * 0x00002 -  [ 0xc002000000000000 - 0xc003ffffffffffff]
+ * 0x00003 -  [ 0xc004000000000000 - 0xc005ffffffffffff]
+ * 0x00004 -  [ 0xc006000000000000 - 0xc007ffffffffffff]
+ *
+ * vmap, IO, vmemap
+ *
+ * 0x00005 -  [ 0xc008000000000000 - 0xc009ffffffffffff]
+ * 0x00006 -  [ 0xc00a000000000000 - 0xc00bffffffffffff]
+ * 0x00007 -  [ 0xc00c000000000000 - 0xc00dffffffffffff]
+ *
+ */
+static inline unsigned long get_kernel_context(unsigned long ea)
+{
+	unsigned long region_id = get_region_id(ea);
+	unsigned long ctx;
+	/*
+	 * Depending on Kernel config, kernel region can have one context
+	 * or more.
+	 */
+	if (region_id == LINEAR_MAP_REGION_ID) {
+		/*
+		 * We already verified ea to be not beyond the addr limit.
+		 */
+		ctx =  1 + ((ea & EA_MASK) >> MAX_EA_BITS_PER_CONTEXT);
+	} else
+		ctx = region_id + MAX_KERNEL_CTX_CNT - 1;
+	return ctx;
+}
+
+/*
+ * This is only valid for addresses >= PAGE_OFFSET
+ */
+static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
+{
+	unsigned long context;
+
+	if (!is_kernel_addr(ea))
+		return 0;
+
+	context = get_kernel_context(ea);
+	return get_vsid(context, ea, ssize);
+}
+
+unsigned htab_shift_for_mem_size(unsigned long mem_size);
+
+enum slb_index {
+	LINEAR_INDEX	= 0, /* Kernel linear map  (0xc000000000000000) */
+	KSTACK_INDEX	= 1, /* Kernel stack map */
+};
+
+#define slb_esid_mask(ssize)	\
+	(((ssize) == MMU_SEGSIZE_256M) ? ESID_MASK : ESID_MASK_1T)
+
+static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
+					 enum slb_index index)
+{
+	return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index;
+}
+
+static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize,
+					   unsigned long flags)
+{
+	return (vsid << slb_vsid_shift(ssize)) | flags |
+		((unsigned long)ssize << SLB_VSID_SSIZE_SHIFT);
+}
+
+static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
+					 unsigned long flags)
+{
+	return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags);
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
new file mode 100644
index 0000000000..fedbc5d381
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -0,0 +1,292 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_MMU_H_
+#define _ASM_POWERPC_BOOK3S_64_MMU_H_
+
+#include <asm/page.h>
+
+#ifndef __ASSEMBLY__
+/*
+ * Page size definition
+ *
+ *    shift : is the "PAGE_SHIFT" value for that page size
+ *    sllp  : is a bit mask with the value of SLB L || LP to be or'ed
+ *            directly to a slbmte "vsid" value
+ *    penc  : is the HPTE encoding mask for the "LP" field:
+ *
+ */
+struct mmu_psize_def {
+	unsigned int	shift;	/* number of bits */
+	int		penc[MMU_PAGE_COUNT];	/* HPTE encoding */
+	unsigned int	tlbiel;	/* tlbiel supported for that page size */
+	unsigned long	avpnm;	/* bits to mask out in AVPN in the HPTE */
+	unsigned long   h_rpt_pgsize; /* H_RPT_INVALIDATE page size encoding */
+	union {
+		unsigned long	sllp;	/* SLB L||LP (exact mask to use in slbmte) */
+		unsigned long ap;	/* Ap encoding used by PowerISA 3.0 */
+	};
+};
+extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+#endif /* __ASSEMBLY__ */
+
+/* 64-bit classic hash table MMU */
+#include <asm/book3s/64/mmu-hash.h>
+
+#ifndef __ASSEMBLY__
+/*
+ * ISA 3.0 partition and process table entry format
+ */
+struct prtb_entry {
+	__be64 prtb0;
+	__be64 prtb1;
+};
+extern struct prtb_entry *process_tb;
+
+struct patb_entry {
+	__be64 patb0;
+	__be64 patb1;
+};
+extern struct patb_entry *partition_tb;
+
+/* Bits in patb0 field */
+#define PATB_HR		(1UL << 63)
+#define RPDB_MASK	0x0fffffffffffff00UL
+#define RPDB_SHIFT	(1UL << 8)
+#define RTS1_SHIFT	61		/* top 2 bits of radix tree size */
+#define RTS1_MASK	(3UL << RTS1_SHIFT)
+#define RTS2_SHIFT	5		/* bottom 3 bits of radix tree size */
+#define RTS2_MASK	(7UL << RTS2_SHIFT)
+#define RPDS_MASK	0x1f		/* root page dir. size field */
+
+/* Bits in patb1 field */
+#define PATB_GR		(1UL << 63)	/* guest uses radix; must match HR */
+#define PRTS_MASK	0x1f		/* process table size field */
+#define PRTB_MASK	0x0ffffffffffff000UL
+
+/* Number of supported LPID bits */
+extern unsigned int mmu_lpid_bits;
+
+/* Number of supported PID bits */
+extern unsigned int mmu_pid_bits;
+
+/* Base PID to allocate from */
+extern unsigned int mmu_base_pid;
+
+extern unsigned long __ro_after_init memory_block_size;
+
+#define PRTB_SIZE_SHIFT	(mmu_pid_bits + 4)
+#define PRTB_ENTRIES	(1ul << mmu_pid_bits)
+
+#define PATB_SIZE_SHIFT	(mmu_lpid_bits + 4)
+#define PATB_ENTRIES	(1ul << mmu_lpid_bits)
+
+typedef unsigned long mm_context_id_t;
+struct spinlock;
+
+/* Maximum possible number of NPUs in a system. */
+#define NV_MAX_NPUS 8
+
+typedef struct {
+	union {
+		/*
+		 * We use id as the PIDR content for radix. On hash we can use
+		 * more than one id. The extended ids are used when we start
+		 * having address above 512TB. We allocate one extended id
+		 * for each 512TB. The new id is then used with the 49 bit
+		 * EA to build a new VA. We always use ESID_BITS_1T_MASK bits
+		 * from EA and new context ids to build the new VAs.
+		 */
+		mm_context_id_t id;
+#ifdef CONFIG_PPC_64S_HASH_MMU
+		mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE];
+#endif
+	};
+
+	/* Number of bits in the mm_cpumask */
+	atomic_t active_cpus;
+
+	/* Number of users of the external (Nest) MMU */
+	atomic_t copros;
+
+	/* Number of user space windows opened in process mm_context */
+	atomic_t vas_windows;
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	struct hash_mm_context *hash_context;
+#endif
+
+	void __user *vdso;
+	/*
+	 * pagetable fragment support
+	 */
+	void *pte_frag;
+	void *pmd_frag;
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+	struct list_head iommu_group_mem_list;
+#endif
+
+#ifdef CONFIG_PPC_MEM_KEYS
+	/*
+	 * Each bit represents one protection key.
+	 * bit set   -> key allocated
+	 * bit unset -> key available for allocation
+	 */
+	u32 pkey_allocation_map;
+	s16 execute_only_pkey; /* key holding execute-only protection */
+#endif
+} mm_context_t;
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
+{
+	return ctx->hash_context->user_psize;
+}
+
+static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize)
+{
+	ctx->hash_context->user_psize = user_psize;
+}
+
+static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx)
+{
+	return ctx->hash_context->low_slices_psize;
+}
+
+static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx)
+{
+	return ctx->hash_context->high_slices_psize;
+}
+
+static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx)
+{
+	return ctx->hash_context->slb_addr_limit;
+}
+
+static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit)
+{
+	ctx->hash_context->slb_addr_limit = limit;
+}
+
+static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize)
+{
+#ifdef CONFIG_PPC_64K_PAGES
+	if (psize == MMU_PAGE_64K)
+		return &ctx->hash_context->mask_64k;
+#endif
+#ifdef CONFIG_HUGETLB_PAGE
+	if (psize == MMU_PAGE_16M)
+		return &ctx->hash_context->mask_16m;
+	if (psize == MMU_PAGE_16G)
+		return &ctx->hash_context->mask_16g;
+#endif
+	BUG_ON(psize != MMU_PAGE_4K);
+
+	return &ctx->hash_context->mask_4k;
+}
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx)
+{
+	return ctx->hash_context->spt;
+}
+#endif
+
+/*
+ * The current system page and segment sizes
+ */
+extern int mmu_virtual_psize;
+extern int mmu_vmalloc_psize;
+extern int mmu_io_psize;
+#else /* CONFIG_PPC_64S_HASH_MMU */
+#ifdef CONFIG_PPC_64K_PAGES
+#define mmu_virtual_psize MMU_PAGE_64K
+#else
+#define mmu_virtual_psize MMU_PAGE_4K
+#endif
+#endif
+extern int mmu_linear_psize;
+extern int mmu_vmemmap_psize;
+
+/* MMU initialization */
+void mmu_early_init_devtree(void);
+void hash__early_init_devtree(void);
+void radix__early_init_devtree(void);
+#ifdef CONFIG_PPC_PKEY
+void pkey_early_init_devtree(void);
+#else
+static inline void pkey_early_init_devtree(void) {}
+#endif
+
+extern void hash__early_init_mmu(void);
+extern void radix__early_init_mmu(void);
+static inline void __init early_init_mmu(void)
+{
+	if (radix_enabled())
+		return radix__early_init_mmu();
+	return hash__early_init_mmu();
+}
+extern void hash__early_init_mmu_secondary(void);
+extern void radix__early_init_mmu_secondary(void);
+static inline void early_init_mmu_secondary(void)
+{
+	if (radix_enabled())
+		return radix__early_init_mmu_secondary();
+	return hash__early_init_mmu_secondary();
+}
+
+extern void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
+					 phys_addr_t first_memblock_size);
+static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+					      phys_addr_t first_memblock_size)
+{
+	/*
+	 * Hash has more strict restrictions. At this point we don't
+	 * know which translations we will pick. Hence go with hash
+	 * restrictions.
+	 */
+	if (!early_radix_enabled())
+		hash__setup_initial_memory_limit(first_memblock_base,
+						 first_memblock_size);
+}
+
+#ifdef CONFIG_PPC_PSERIES
+void __init radix_init_pseries(void);
+#else
+static inline void radix_init_pseries(void) { }
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+#define arch_clear_mm_cpumask_cpu(cpu, mm)				\
+	do {								\
+		if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {		\
+			dec_mm_active_cpus(mm);				\
+			cpumask_clear_cpu(cpu, mm_cpumask(mm));		\
+		}							\
+	} while (0)
+
+void cleanup_cpu_mmu_context(void);
+#endif
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+static inline int get_user_context(mm_context_t *ctx, unsigned long ea)
+{
+	int index = ea >> MAX_EA_BITS_PER_CONTEXT;
+
+	if (likely(index < ARRAY_SIZE(ctx->extended_id)))
+		return ctx->extended_id[index];
+
+	/* should never happen */
+	WARN_ON(1);
+	return 0;
+}
+
+static inline unsigned long get_user_vsid(mm_context_t *ctx,
+					  unsigned long ea, int ssize)
+{
+	unsigned long context = get_user_context(ctx, ea);
+
+	return get_vsid(context, ea, ssize);
+}
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
new file mode 100644
index 0000000000..dd2cff53a1
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_BOOK3S_64_PGALLOC_H
+#define _ASM_POWERPC_BOOK3S_64_PGALLOC_H
+/*
+ */
+
+#include <linux/slab.h>
+#include <linux/cpumask.h>
+#include <linux/kmemleak.h>
+#include <linux/percpu.h>
+
+struct vmemmap_backing {
+	struct vmemmap_backing *list;
+	unsigned long phys;
+	unsigned long virt_addr;
+};
+extern struct vmemmap_backing *vmemmap_list;
+
+extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
+extern void pmd_fragment_free(unsigned long *);
+extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
+extern void __tlb_remove_table(void *_table);
+void pte_frag_destroy(void *pte_frag);
+
+static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
+{
+#ifdef CONFIG_PPC_64K_PAGES
+	return (pgd_t *)__get_free_page(pgtable_gfp_flags(mm, PGALLOC_GFP));
+#else
+	struct page *page;
+	page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_RETRY_MAYFAIL),
+				4);
+	if (!page)
+		return NULL;
+	return (pgd_t *) page_address(page);
+#endif
+}
+
+static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+#ifdef CONFIG_PPC_64K_PAGES
+	free_page((unsigned long)pgd);
+#else
+	free_pages((unsigned long)pgd, 4);
+#endif
+}
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *pgd;
+
+	if (radix_enabled())
+		return radix__pgd_alloc(mm);
+
+	pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+			       pgtable_gfp_flags(mm, GFP_KERNEL));
+	if (unlikely(!pgd))
+		return pgd;
+
+	/*
+	 * Don't scan the PGD for pointers, it contains references to PUDs but
+	 * those references are not full pointers and so can't be recognised by
+	 * kmemleak.
+	 */
+	kmemleak_no_scan(pgd);
+
+	/*
+	 * With hugetlb, we don't clear the second half of the page table.
+	 * If we share the same slab cache with the pmd or pud level table,
+	 * we need to make sure we zero out the full table on alloc.
+	 * With 4K we don't store slot in the second half. Hence we don't
+	 * need to do this for 4k.
+	 */
+#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES) && \
+	(H_PGD_INDEX_SIZE == H_PUD_CACHE_INDEX)
+	memset(pgd, 0, PGD_TABLE_SIZE);
+#endif
+	return pgd;
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	if (radix_enabled())
+		return radix__pgd_free(mm, pgd);
+	kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
+}
+
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *pgd, pud_t *pud)
+{
+	*pgd =  __p4d(__pgtable_ptr_val(pud) | PGD_VAL_BITS);
+}
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	pud_t *pud;
+
+	pud = kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
+			       pgtable_gfp_flags(mm, GFP_KERNEL));
+	/*
+	 * Tell kmemleak to ignore the PUD, that means don't scan it for
+	 * pointers and don't consider it a leak. PUDs are typically only
+	 * referred to by their PGD, but kmemleak is not able to recognise those
+	 * as pointers, leading to false leak reports.
+	 */
+	kmemleak_ignore(pud);
+
+	return pud;
+}
+
+static inline void __pud_free(pud_t *pud)
+{
+	struct page *page = virt_to_page(pud);
+
+	/*
+	 * Early pud pages allocated via memblock allocator
+	 * can't be directly freed to slab. KFENCE pages have
+	 * both reserved and slab flags set so need to be freed
+	 * kmem_cache_free.
+	 */
+	if (PageReserved(page) && !PageSlab(page))
+		free_reserved_page(page);
+	else
+		kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+	return __pud_free(pud);
+}
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	*pud = __pud(__pgtable_ptr_val(pmd) | PUD_VAL_BITS);
+}
+
+static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+				  unsigned long address)
+{
+	pgtable_free_tlb(tlb, pud, PUD_INDEX);
+}
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return pmd_fragment_alloc(mm, addr);
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+	pmd_fragment_free((unsigned long *)pmd);
+}
+
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+				  unsigned long address)
+{
+	return pgtable_free_tlb(tlb, pmd, PMD_INDEX);
+}
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+				       pte_t *pte)
+{
+	*pmd = __pmd(__pgtable_ptr_val(pte) | PMD_VAL_BITS);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
+				pgtable_t pte_page)
+{
+	*pmd = __pmd(__pgtable_ptr_val(pte_page) | PMD_VAL_BITS);
+}
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+				  unsigned long address)
+{
+	pgtable_free_tlb(tlb, table, PTE_INDEX);
+}
+
+extern atomic_long_t direct_pages_count[MMU_PAGE_COUNT];
+static inline void update_page_count(int psize, long count)
+{
+	if (IS_ENABLED(CONFIG_PROC_FS))
+		atomic_long_add(count, &direct_pages_count[psize]);
+}
+
+#endif /* _ASM_POWERPC_BOOK3S_64_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
new file mode 100644
index 0000000000..48f21820af
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H
+#define _ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H
+/*
+ * hash 4k can't share hugetlb and also doesn't support THP
+ */
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_HUGETLB_PAGE
+static inline int pmd_huge(pmd_t pmd)
+{
+	/*
+	 * leaf pte for huge page
+	 */
+	if (radix_enabled())
+		return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
+	return 0;
+}
+
+static inline int pud_huge(pud_t pud)
+{
+	/*
+	 * leaf pte for huge page
+	 */
+	if (radix_enabled())
+		return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
+	return 0;
+}
+
+/*
+ * With radix , we have hugepage ptes in the pud and pmd entries. We don't
+ * need to setup hugepage directory for them. Our pte and page directory format
+ * enable us to have this enabled.
+ */
+static inline int hugepd_ok(hugepd_t hpd)
+{
+	if (radix_enabled())
+		return 0;
+	return hash__hugepd_ok(hpd);
+}
+#define is_hugepd(hpd)		(hugepd_ok(hpd))
+
+/*
+ * 16M and 16G huge page directory tables are allocated from slab cache
+ *
+ */
+#define H_16M_CACHE_INDEX (PAGE_SHIFT + H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE - 24)
+#define H_16G_CACHE_INDEX                                                      \
+	(PAGE_SHIFT + H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + H_PUD_INDEX_SIZE - 34)
+
+static inline int get_hugepd_cache_index(int index)
+{
+	switch (index) {
+	case H_16M_CACHE_INDEX:
+		return HTLB_16M_INDEX;
+	case H_16G_CACHE_INDEX:
+		return HTLB_16G_INDEX;
+	default:
+		BUG();
+	}
+	/* should not reach */
+}
+
+#endif /* CONFIG_HUGETLB_PAGE */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /*_ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
new file mode 100644
index 0000000000..2fce3498b0
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_64K_H
+#define _ASM_POWERPC_BOOK3S_64_PGTABLE_64K_H
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
+ * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
+ *
+ * Defined in such a way that we can optimize away code block at build time
+ * if CONFIG_HUGETLB_PAGE=n.
+ *
+ * returns true for pmd migration entries, THP, devmap, hugetlb
+ * But compile time dependent on CONFIG_HUGETLB_PAGE
+ */
+static inline int pmd_huge(pmd_t pmd)
+{
+	/*
+	 * leaf pte for huge page
+	 */
+	return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
+}
+
+static inline int pud_huge(pud_t pud)
+{
+	/*
+	 * leaf pte for huge page
+	 */
+	return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
+}
+
+/*
+ * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't
+ * need to setup hugepage directory for them. Our pte and page directory format
+ * enable us to have this enabled.
+ */
+static inline int hugepd_ok(hugepd_t hpd)
+{
+	return 0;
+}
+
+#define is_hugepd(pdep)			0
+
+/*
+ * This should never get called
+ */
+static inline int get_hugepd_cache_index(int index)
+{
+	BUG();
+}
+
+#endif /* CONFIG_HUGETLB_PAGE */
+
+static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
+			       unsigned long pfn, pgprot_t prot)
+{
+	if (radix_enabled())
+		BUG();
+	return hash__remap_4k_pfn(vma, addr, pfn, prot);
+}
+#endif	/* __ASSEMBLY__ */
+#endif /*_ASM_POWERPC_BOOK3S_64_PGTABLE_64K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
new file mode 100644
index 0000000000..5c497c862d
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -0,0 +1,1500 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
+#define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
+
+#include <asm-generic/pgtable-nop4d.h>
+
+#ifndef __ASSEMBLY__
+#include <linux/mmdebug.h>
+#include <linux/bug.h>
+#include <linux/sizes.h>
+#endif
+
+/*
+ * Common bits between hash and Radix page table
+ */
+
+#define _PAGE_EXEC		0x00001 /* execute permission */
+#define _PAGE_WRITE		0x00002 /* write access allowed */
+#define _PAGE_READ		0x00004	/* read access allowed */
+#define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
+#define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
+#define _PAGE_PRIVILEGED	0x00008 /* kernel access only */
+#define _PAGE_SAO		0x00010 /* Strong access order */
+#define _PAGE_NON_IDEMPOTENT	0x00020 /* non idempotent memory */
+#define _PAGE_TOLERANT		0x00030 /* tolerant memory, cache inhibited */
+#define _PAGE_DIRTY		0x00080 /* C: page changed */
+#define _PAGE_ACCESSED		0x00100 /* R: page referenced */
+/*
+ * Software bits
+ */
+#define _RPAGE_SW0		0x2000000000000000UL
+#define _RPAGE_SW1		0x00800
+#define _RPAGE_SW2		0x00400
+#define _RPAGE_SW3		0x00200
+#define _RPAGE_RSV1		0x00040UL
+
+#define _RPAGE_PKEY_BIT4	0x1000000000000000UL
+#define _RPAGE_PKEY_BIT3	0x0800000000000000UL
+#define _RPAGE_PKEY_BIT2	0x0400000000000000UL
+#define _RPAGE_PKEY_BIT1	0x0200000000000000UL
+#define _RPAGE_PKEY_BIT0	0x0100000000000000UL
+
+#define _PAGE_PTE		0x4000000000000000UL	/* distinguishes PTEs from pointers */
+#define _PAGE_PRESENT		0x8000000000000000UL	/* pte contains a translation */
+/*
+ * We need to mark a pmd pte invalid while splitting. We can do that by clearing
+ * the _PAGE_PRESENT bit. But then that will be taken as a swap pte. In order to
+ * differentiate between two use a SW field when invalidating.
+ *
+ * We do that temporary invalidate for regular pte entry in ptep_set_access_flags
+ *
+ * This is used only when _PAGE_PRESENT is cleared.
+ */
+#define _PAGE_INVALID		_RPAGE_SW0
+
+/*
+ * Top and bottom bits of RPN which can be used by hash
+ * translation mode, because we expect them to be zero
+ * otherwise.
+ */
+#define _RPAGE_RPN0		0x01000
+#define _RPAGE_RPN1		0x02000
+#define _RPAGE_RPN43		0x0080000000000000UL
+#define _RPAGE_RPN42		0x0040000000000000UL
+#define _RPAGE_RPN41		0x0020000000000000UL
+
+/* Max physical address bit as per radix table */
+#define _RPAGE_PA_MAX		56
+
+/*
+ * Max physical address bit we will use for now.
+ *
+ * This is mostly a hardware limitation and for now Power9 has
+ * a 51 bit limit.
+ *
+ * This is different from the number of physical bit required to address
+ * the last byte of memory. That is defined by MAX_PHYSMEM_BITS.
+ * MAX_PHYSMEM_BITS is a linux limitation imposed by the maximum
+ * number of sections we can support (SECTIONS_SHIFT).
+ *
+ * This is different from Radix page table limitation above and
+ * should always be less than that. The limit is done such that
+ * we can overload the bits between _RPAGE_PA_MAX and _PAGE_PA_MAX
+ * for hash linux page table specific bits.
+ *
+ * In order to be compatible with future hardware generations we keep
+ * some offsets and limit this for now to 53
+ */
+#define _PAGE_PA_MAX		53
+
+#define _PAGE_SOFT_DIRTY	_RPAGE_SW3 /* software: software dirty tracking */
+#define _PAGE_SPECIAL		_RPAGE_SW2 /* software: special page */
+#define _PAGE_DEVMAP		_RPAGE_SW1 /* software: ZONE_DEVICE page */
+
+/*
+ * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
+ * Instead of fixing all of them, add an alternate define which
+ * maps CI pte mapping.
+ */
+#define _PAGE_NO_CACHE		_PAGE_TOLERANT
+/*
+ * We support _RPAGE_PA_MAX bit real address in pte. On the linux side
+ * we are limited by _PAGE_PA_MAX. Clear everything above _PAGE_PA_MAX
+ * and every thing below PAGE_SHIFT;
+ */
+#define PTE_RPN_MASK	(((1UL << _PAGE_PA_MAX) - 1) & (PAGE_MASK))
+#define PTE_RPN_SHIFT	PAGE_SHIFT
+/*
+ * set of bits not changed in pmd_modify. Even though we have hash specific bits
+ * in here, on radix we expect them to be zero.
+ */
+#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
+			 _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \
+			 _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
+/*
+ * user access blocked by key
+ */
+#define _PAGE_KERNEL_RW		(_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY)
+#define _PAGE_KERNEL_RO		 (_PAGE_PRIVILEGED | _PAGE_READ)
+#define _PAGE_KERNEL_ROX	 (_PAGE_PRIVILEGED | _PAGE_READ | _PAGE_EXEC)
+#define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
+/*
+ * _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes
+ */
+#define _PAGE_CHG_MASK	(PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
+			 _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE |	\
+			 _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
+
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED)
+#define _PAGE_BASE	(_PAGE_BASE_NC)
+
+/* Permission masks used to generate the __P and __S table,
+ *
+ * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
+ *
+ * Write permissions imply read permissions for now (we could make write-only
+ * pages on BookE but we don't bother for now). Execute permission control is
+ * possible on platforms that define _PAGE_EXEC
+ */
+#define PAGE_NONE	__pgprot(_PAGE_BASE | _PAGE_PRIVILEGED)
+#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_RW)
+#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_READ)
+#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
+#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_READ)
+#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
+/* Radix only, Hash uses PAGE_READONLY_X + execute-only pkey instead */
+#define PAGE_EXECONLY	__pgprot(_PAGE_BASE | _PAGE_EXEC)
+
+/* Permission masks used for kernel mappings */
+#define PAGE_KERNEL	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
+#define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_TOLERANT)
+#define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NON_IDEMPOTENT)
+#define PAGE_KERNEL_X	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX	__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
+
+#ifndef __ASSEMBLY__
+/*
+ * page table defines
+ */
+extern unsigned long __pte_index_size;
+extern unsigned long __pmd_index_size;
+extern unsigned long __pud_index_size;
+extern unsigned long __pgd_index_size;
+extern unsigned long __pud_cache_index;
+#define PTE_INDEX_SIZE  __pte_index_size
+#define PMD_INDEX_SIZE  __pmd_index_size
+#define PUD_INDEX_SIZE  __pud_index_size
+#define PGD_INDEX_SIZE  __pgd_index_size
+/* pmd table use page table fragments */
+#define PMD_CACHE_INDEX  0
+#define PUD_CACHE_INDEX __pud_cache_index
+/*
+ * Because of use of pte fragments and THP, size of page table
+ * are not always derived out of index size above.
+ */
+extern unsigned long __pte_table_size;
+extern unsigned long __pmd_table_size;
+extern unsigned long __pud_table_size;
+extern unsigned long __pgd_table_size;
+#define PTE_TABLE_SIZE	__pte_table_size
+#define PMD_TABLE_SIZE	__pmd_table_size
+#define PUD_TABLE_SIZE	__pud_table_size
+#define PGD_TABLE_SIZE	__pgd_table_size
+
+extern unsigned long __pmd_val_bits;
+extern unsigned long __pud_val_bits;
+extern unsigned long __pgd_val_bits;
+#define PMD_VAL_BITS	__pmd_val_bits
+#define PUD_VAL_BITS	__pud_val_bits
+#define PGD_VAL_BITS	__pgd_val_bits
+
+extern unsigned long __pte_frag_nr;
+#define PTE_FRAG_NR __pte_frag_nr
+extern unsigned long __pte_frag_size_shift;
+#define PTE_FRAG_SIZE_SHIFT __pte_frag_size_shift
+#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
+
+extern unsigned long __pmd_frag_nr;
+#define PMD_FRAG_NR __pmd_frag_nr
+extern unsigned long __pmd_frag_size_shift;
+#define PMD_FRAG_SIZE_SHIFT __pmd_frag_size_shift
+#define PMD_FRAG_SIZE (1UL << PMD_FRAG_SIZE_SHIFT)
+
+#define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
+#define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PUD	(1 << PUD_INDEX_SIZE)
+#define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
+
+#define MAX_PTRS_PER_PTE ((H_PTRS_PER_PTE > R_PTRS_PER_PTE) ? H_PTRS_PER_PTE : R_PTRS_PER_PTE)
+#define MAX_PTRS_PER_PMD ((H_PTRS_PER_PMD > R_PTRS_PER_PMD) ? H_PTRS_PER_PMD : R_PTRS_PER_PMD)
+#define MAX_PTRS_PER_PUD ((H_PTRS_PER_PUD > R_PTRS_PER_PUD) ? H_PTRS_PER_PUD : R_PTRS_PER_PUD)
+#define MAX_PTRS_PER_PGD	(1 << (H_PGD_INDEX_SIZE > RADIX_PGD_INDEX_SIZE ? \
+				       H_PGD_INDEX_SIZE : RADIX_PGD_INDEX_SIZE))
+
+/* PMD_SHIFT determines what a second-level page table entry can map */
+#define PMD_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+
+/* PUD_SHIFT determines what a third-level page table entry can map */
+#define PUD_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
+#define PUD_SIZE	(1UL << PUD_SHIFT)
+#define PUD_MASK	(~(PUD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+#define PGDIR_SHIFT	(PUD_SHIFT + PUD_INDEX_SIZE)
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+/* Bits to mask out from a PMD to get to the PTE page */
+#define PMD_MASKED_BITS		0xc0000000000000ffUL
+/* Bits to mask out from a PUD to get to the PMD page */
+#define PUD_MASKED_BITS		0xc0000000000000ffUL
+/* Bits to mask out from a PGD to get to the PUD page */
+#define P4D_MASKED_BITS		0xc0000000000000ffUL
+
+/*
+ * Used as an indicator for rcu callback functions
+ */
+enum pgtable_index {
+	PTE_INDEX = 0,
+	PMD_INDEX,
+	PUD_INDEX,
+	PGD_INDEX,
+	/*
+	 * Below are used with 4k page size and hugetlb
+	 */
+	HTLB_16M_INDEX,
+	HTLB_16G_INDEX,
+};
+
+extern unsigned long __vmalloc_start;
+extern unsigned long __vmalloc_end;
+#define VMALLOC_START	__vmalloc_start
+#define VMALLOC_END	__vmalloc_end
+
+static inline unsigned int ioremap_max_order(void)
+{
+	if (radix_enabled())
+		return PUD_SHIFT;
+	return 7 + PAGE_SHIFT; /* default from linux/vmalloc.h */
+}
+#define IOREMAP_MAX_ORDER ioremap_max_order()
+
+extern unsigned long __kernel_virt_start;
+extern unsigned long __kernel_io_start;
+extern unsigned long __kernel_io_end;
+#define KERN_VIRT_START __kernel_virt_start
+#define KERN_IO_START  __kernel_io_start
+#define KERN_IO_END __kernel_io_end
+
+extern struct page *vmemmap;
+extern unsigned long pci_io_base;
+#endif /* __ASSEMBLY__ */
+
+#include <asm/book3s/64/hash.h>
+#include <asm/book3s/64/radix.h>
+
+#if H_MAX_PHYSMEM_BITS > R_MAX_PHYSMEM_BITS
+#define  MAX_PHYSMEM_BITS	H_MAX_PHYSMEM_BITS
+#else
+#define  MAX_PHYSMEM_BITS	R_MAX_PHYSMEM_BITS
+#endif
+
+
+#ifdef CONFIG_PPC_64K_PAGES
+#include <asm/book3s/64/pgtable-64k.h>
+#else
+#include <asm/book3s/64/pgtable-4k.h>
+#endif
+
+#include <asm/barrier.h>
+/*
+ * IO space itself carved into the PIO region (ISA and PHB IO space) and
+ * the ioremap space
+ *
+ *  ISA_IO_BASE = KERN_IO_START, 64K reserved area
+ *  PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
+ * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE
+ */
+#define FULL_IO_SIZE	0x80000000ul
+#define  ISA_IO_BASE	(KERN_IO_START)
+#define  ISA_IO_END	(KERN_IO_START + 0x10000ul)
+#define  PHB_IO_BASE	(ISA_IO_END)
+#define  PHB_IO_END	(KERN_IO_START + FULL_IO_SIZE)
+#define IOREMAP_BASE	(PHB_IO_END)
+#define IOREMAP_START	(ioremap_bot)
+#define IOREMAP_END	(KERN_IO_END - FIXADDR_SIZE)
+#define FIXADDR_SIZE	SZ_32M
+
+#ifndef __ASSEMBLY__
+
+/*
+ * This is the default implementation of various PTE accessors, it's
+ * used in all cases except Book3S with 64K pages where we have a
+ * concept of sub-pages
+ */
+#ifndef __real_pte
+
+#define __real_pte(e, p, o)		((real_pte_t){(e)})
+#define __rpte_to_pte(r)	((r).pte)
+#define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT)
+
+#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift)       \
+	do {							         \
+		index = 0;					         \
+		shift = mmu_psize_defs[psize].shift;		         \
+
+#define pte_iterate_hashed_end() } while(0)
+
+/*
+ * We expect this to be called only for user addresses or kernel virtual
+ * addresses other than the linear mapping.
+ */
+#define pte_pagesize_index(mm, addr, pte)	MMU_PAGE_4K
+
+#endif /* __real_pte */
+
+static inline unsigned long pte_update(struct mm_struct *mm, unsigned long addr,
+				       pte_t *ptep, unsigned long clr,
+				       unsigned long set, int huge)
+{
+	if (radix_enabled())
+		return radix__pte_update(mm, addr, ptep, clr, set, huge);
+	return hash__pte_update(mm, addr, ptep, clr, set, huge);
+}
+/*
+ * For hash even if we have _PAGE_ACCESSED = 0, we do a pte_update.
+ * We currently remove entries from the hashtable regardless of whether
+ * the entry was young or dirty.
+ *
+ * We should be more intelligent about this but for the moment we override
+ * these functions and force a tlb flush unconditionally
+ * For radix: H_PAGE_HASHPTE should be zero. Hence we can use the same
+ * function for both hash and radix.
+ */
+static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
+					      unsigned long addr, pte_t *ptep)
+{
+	unsigned long old;
+
+	if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
+		return 0;
+	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
+	return (old & _PAGE_ACCESSED) != 0;
+}
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define ptep_test_and_clear_young(__vma, __addr, __ptep)	\
+({								\
+	__ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
+})
+
+/*
+ * On Book3S CPUs, clearing the accessed bit without a TLB flush
+ * doesn't cause data corruption. [ It could cause incorrect
+ * page aging and the (mistaken) reclaim of hot pages, but the
+ * chance of that should be relatively low. ]
+ *
+ * So as a performance optimization don't flush the TLB when
+ * clearing the accessed bit, it will eventually be flushed by
+ * a context switch or a VM operation anyway. [ In the rare
+ * event of it not getting flushed for a long time the delay
+ * shouldn't really matter because there's no real memory
+ * pressure for swapout to react to. ]
+ *
+ * Note: this optimisation also exists in pte_needs_flush() and
+ * huge_pmd_needs_flush().
+ */
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+#define ptep_clear_flush_young ptep_test_and_clear_young
+
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+#define pmdp_clear_flush_young pmdp_test_and_clear_young
+
+static inline int pte_write(pte_t pte)
+{
+	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
+}
+
+static inline int pte_read(pte_t pte)
+{
+	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_READ));
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pte_t *ptep)
+{
+	if (pte_write(*ptep))
+		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
+}
+
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	if (pte_write(*ptep))
+		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
+}
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+				       unsigned long addr, pte_t *ptep)
+{
+	unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
+	return __pte(old);
+}
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
+static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
+					    unsigned long addr,
+					    pte_t *ptep, int full)
+{
+	if (full && radix_enabled()) {
+		/*
+		 * We know that this is a full mm pte clear and
+		 * hence can be sure there is no parallel set_pte.
+		 */
+		return radix__ptep_get_and_clear_full(mm, addr, ptep, full);
+	}
+	return ptep_get_and_clear(mm, addr, ptep);
+}
+
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
+			     pte_t * ptep)
+{
+	pte_update(mm, addr, ptep, ~0UL, 0, 0);
+}
+
+static inline int pte_dirty(pte_t pte)
+{
+	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DIRTY));
+}
+
+static inline int pte_young(pte_t pte)
+{
+	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_ACCESSED));
+}
+
+static inline int pte_special(pte_t pte)
+{
+	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SPECIAL));
+}
+
+static inline bool pte_exec(pte_t pte)
+{
+	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_EXEC));
+}
+
+
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+static inline bool pte_soft_dirty(pte_t pte)
+{
+	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SOFT_DIRTY));
+}
+
+static inline pte_t pte_mksoft_dirty(pte_t pte)
+{
+	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SOFT_DIRTY));
+}
+
+static inline pte_t pte_clear_soft_dirty(pte_t pte)
+{
+	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SOFT_DIRTY));
+}
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
+#ifdef CONFIG_NUMA_BALANCING
+static inline int pte_protnone(pte_t pte)
+{
+	return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE | _PAGE_RWX)) ==
+		cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE);
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
+static inline bool pte_hw_valid(pte_t pte)
+{
+	return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE)) ==
+		cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE);
+}
+
+static inline int pte_present(pte_t pte)
+{
+	/*
+	 * A pte is considerent present if _PAGE_PRESENT is set.
+	 * We also need to consider the pte present which is marked
+	 * invalid during ptep_set_access_flags. Hence we look for _PAGE_INVALID
+	 * if we find _PAGE_PRESENT cleared.
+	 */
+
+	if (pte_hw_valid(pte))
+		return true;
+	return (pte_raw(pte) & cpu_to_be64(_PAGE_INVALID | _PAGE_PTE)) ==
+		cpu_to_be64(_PAGE_INVALID | _PAGE_PTE);
+}
+
+#ifdef CONFIG_PPC_MEM_KEYS
+extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute);
+#else
+static inline bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
+{
+	return true;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
+static inline bool pte_user(pte_t pte)
+{
+	return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+	/*
+	 * _PAGE_READ is needed for any access and will be
+	 * cleared for PROT_NONE
+	 */
+	if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte))
+		return false;
+
+	if (write && !pte_write(pte))
+		return false;
+
+	return arch_pte_access_permitted(pte_val(pte), write, 0);
+}
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ *
+ * Even if PTEs can be unsigned long long, a PFN is always an unsigned
+ * long for now.
+ */
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
+{
+	VM_BUG_ON(pfn >> (64 - PAGE_SHIFT));
+	VM_BUG_ON((pfn << PAGE_SHIFT) & ~PTE_RPN_MASK);
+
+	return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot) | _PAGE_PTE);
+}
+
+/* Generic modifiers for PTE bits */
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
+}
+
+static inline pte_t pte_exprotect(pte_t pte)
+{
+	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_EXEC));
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_DIRTY));
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_ACCESSED));
+}
+
+static inline pte_t pte_mkexec(pte_t pte)
+{
+	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_EXEC));
+}
+
+static inline pte_t pte_mkwrite_novma(pte_t pte)
+{
+	/*
+	 * write implies read, hence set both
+	 */
+	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_RW));
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_DIRTY | _PAGE_SOFT_DIRTY));
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_ACCESSED));
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL));
+}
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+	return pte;
+}
+
+static inline pte_t pte_mkdevmap(pte_t pte)
+{
+	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL | _PAGE_DEVMAP));
+}
+
+static inline pte_t pte_mkprivileged(pte_t pte)
+{
+	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
+static inline pte_t pte_mkuser(pte_t pte)
+{
+	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED));
+}
+
+/*
+ * This is potentially called with a pmd as the argument, in which case it's not
+ * safe to check _PAGE_DEVMAP unless we also confirm that _PAGE_PTE is set.
+ * That's because the bit we use for _PAGE_DEVMAP is not reserved for software
+ * use in page directory entries (ie. non-ptes).
+ */
+static inline int pte_devmap(pte_t pte)
+{
+	u64 mask = cpu_to_be64(_PAGE_DEVMAP | _PAGE_PTE);
+
+	return (pte_raw(pte) & mask) == mask;
+}
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	/* FIXME!! check whether this need to be a conditional */
+	return __pte_raw((pte_raw(pte) & cpu_to_be64(_PAGE_CHG_MASK)) |
+			 cpu_to_be64(pgprot_val(newprot)));
+}
+
+/* Encode and de-code a swap entry */
+#define MAX_SWAPFILES_CHECK() do { \
+	BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
+	/*							\
+	 * Don't have overlapping bits with _PAGE_HPTEFLAGS	\
+	 * We filter HPTEFLAGS on set_pte.			\
+	 */							\
+	BUILD_BUG_ON(_PAGE_HPTEFLAGS & SWP_TYPE_MASK); \
+	BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY);	\
+	BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_EXCLUSIVE);	\
+	} while (0)
+
+#define SWP_TYPE_BITS 5
+#define SWP_TYPE_MASK		((1UL << SWP_TYPE_BITS) - 1)
+#define __swp_type(x)		((x).val & SWP_TYPE_MASK)
+#define __swp_offset(x)		(((x).val & PTE_RPN_MASK) >> PAGE_SHIFT)
+#define __swp_entry(type, offset)	((swp_entry_t) { \
+				(type) | (((offset) << PAGE_SHIFT) & PTE_RPN_MASK)})
+/*
+ * swp_entry_t must be independent of pte bits. We build a swp_entry_t from
+ * swap type and offset we get from swap and convert that to pte to find a
+ * matching pte in linux page table.
+ * Clear bits not found in swap entries here.
+ */
+#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
+#define __swp_entry_to_pte(x)	__pte((x).val | _PAGE_PTE)
+#define __pmd_to_swp_entry(pmd)	(__pte_to_swp_entry(pmd_pte(pmd)))
+#define __swp_entry_to_pmd(x)	(pte_pmd(__swp_entry_to_pte(x)))
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SWP_SOFT_DIRTY	_PAGE_SOFT_DIRTY
+#else
+#define _PAGE_SWP_SOFT_DIRTY	0UL
+#endif /* CONFIG_MEM_SOFT_DIRTY */
+
+#define _PAGE_SWP_EXCLUSIVE	_PAGE_NON_IDEMPOTENT
+
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
+{
+	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_SOFT_DIRTY));
+}
+
+static inline bool pte_swp_soft_dirty(pte_t pte)
+{
+	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SWP_SOFT_DIRTY));
+}
+
+static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
+{
+	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SWP_SOFT_DIRTY));
+}
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_EXCLUSIVE));
+}
+
+static inline int pte_swp_exclusive(pte_t pte)
+{
+	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SWP_EXCLUSIVE));
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SWP_EXCLUSIVE));
+}
+
+static inline bool check_pte_access(unsigned long access, unsigned long ptev)
+{
+	/*
+	 * This check for _PAGE_RWX and _PAGE_PRESENT bits
+	 */
+	if (access & ~ptev)
+		return false;
+	/*
+	 * This check for access to privilege space
+	 */
+	if ((access & _PAGE_PRIVILEGED) != (ptev & _PAGE_PRIVILEGED))
+		return false;
+
+	return true;
+}
+/*
+ * Generic functions with hash/radix callbacks
+ */
+
+static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
+					   pte_t *ptep, pte_t entry,
+					   unsigned long address,
+					   int psize)
+{
+	if (radix_enabled())
+		return radix__ptep_set_access_flags(vma, ptep, entry,
+						    address, psize);
+	return hash__ptep_set_access_flags(ptep, entry);
+}
+
+#define __HAVE_ARCH_PTE_SAME
+static inline int pte_same(pte_t pte_a, pte_t pte_b)
+{
+	if (radix_enabled())
+		return radix__pte_same(pte_a, pte_b);
+	return hash__pte_same(pte_a, pte_b);
+}
+
+static inline int pte_none(pte_t pte)
+{
+	if (radix_enabled())
+		return radix__pte_none(pte);
+	return hash__pte_none(pte);
+}
+
+static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, pte_t pte, int percpu)
+{
+
+	VM_WARN_ON(!(pte_raw(pte) & cpu_to_be64(_PAGE_PTE)));
+	/*
+	 * Keep the _PAGE_PTE added till we are sure we handle _PAGE_PTE
+	 * in all the callers.
+	 */
+	pte = __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE));
+
+	if (radix_enabled())
+		return radix__set_pte_at(mm, addr, ptep, pte, percpu);
+	return hash__set_pte_at(mm, addr, ptep, pte, percpu);
+}
+
+#define _PAGE_CACHE_CTL	(_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
+
+#define pgprot_noncached pgprot_noncached
+static inline pgprot_t pgprot_noncached(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_NON_IDEMPOTENT);
+}
+
+#define pgprot_noncached_wc pgprot_noncached_wc
+static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_TOLERANT);
+}
+
+#define pgprot_cached pgprot_cached
+static inline pgprot_t pgprot_cached(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL));
+}
+
+#define pgprot_writecombine pgprot_writecombine
+static inline pgprot_t pgprot_writecombine(pgprot_t prot)
+{
+	return pgprot_noncached_wc(prot);
+}
+/*
+ * check a pte mapping have cache inhibited property
+ */
+static inline bool pte_ci(pte_t pte)
+{
+	__be64 pte_v = pte_raw(pte);
+
+	if (((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_TOLERANT)) ||
+	    ((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_NON_IDEMPOTENT)))
+		return true;
+	return false;
+}
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
+		/*
+		 * Don't use this if we can possibly have a hash page table
+		 * entry mapping this.
+		 */
+		WARN_ON((pmd_val(*pmdp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
+	}
+	*pmdp = __pmd(0);
+}
+
+static inline int pmd_none(pmd_t pmd)
+{
+	return !pmd_raw(pmd);
+}
+
+static inline int pmd_present(pmd_t pmd)
+{
+	/*
+	 * A pmd is considerent present if _PAGE_PRESENT is set.
+	 * We also need to consider the pmd present which is marked
+	 * invalid during a split. Hence we look for _PAGE_INVALID
+	 * if we find _PAGE_PRESENT cleared.
+	 */
+	if (pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID))
+		return true;
+
+	return false;
+}
+
+static inline int pmd_is_serializing(pmd_t pmd)
+{
+	/*
+	 * If the pmd is undergoing a split, the _PAGE_PRESENT bit is clear
+	 * and _PAGE_INVALID is set (see pmd_present, pmdp_invalidate).
+	 *
+	 * This condition may also occur when flushing a pmd while flushing
+	 * it (see ptep_modify_prot_start), so callers must ensure this
+	 * case is fine as well.
+	 */
+	if ((pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)) ==
+						cpu_to_be64(_PAGE_INVALID))
+		return true;
+
+	return false;
+}
+
+static inline int pmd_bad(pmd_t pmd)
+{
+	if (radix_enabled())
+		return radix__pmd_bad(pmd);
+	return hash__pmd_bad(pmd);
+}
+
+static inline void pud_clear(pud_t *pudp)
+{
+	if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
+		/*
+		 * Don't use this if we can possibly have a hash page table
+		 * entry mapping this.
+		 */
+		WARN_ON((pud_val(*pudp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
+	}
+	*pudp = __pud(0);
+}
+
+static inline int pud_none(pud_t pud)
+{
+	return !pud_raw(pud);
+}
+
+static inline int pud_present(pud_t pud)
+{
+	return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT));
+}
+
+extern struct page *pud_page(pud_t pud);
+extern struct page *pmd_page(pmd_t pmd);
+static inline pte_t pud_pte(pud_t pud)
+{
+	return __pte_raw(pud_raw(pud));
+}
+
+static inline pud_t pte_pud(pte_t pte)
+{
+	return __pud_raw(pte_raw(pte));
+}
+
+static inline pte_t *pudp_ptep(pud_t *pud)
+{
+	return (pte_t *)pud;
+}
+
+#define pud_pfn(pud)		pte_pfn(pud_pte(pud))
+#define pud_dirty(pud)		pte_dirty(pud_pte(pud))
+#define pud_young(pud)		pte_young(pud_pte(pud))
+#define pud_mkold(pud)		pte_pud(pte_mkold(pud_pte(pud)))
+#define pud_wrprotect(pud)	pte_pud(pte_wrprotect(pud_pte(pud)))
+#define pud_mkdirty(pud)	pte_pud(pte_mkdirty(pud_pte(pud)))
+#define pud_mkclean(pud)	pte_pud(pte_mkclean(pud_pte(pud)))
+#define pud_mkyoung(pud)	pte_pud(pte_mkyoung(pud_pte(pud)))
+#define pud_mkwrite(pud)	pte_pud(pte_mkwrite_novma(pud_pte(pud)))
+#define pud_write(pud)		pte_write(pud_pte(pud))
+
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+#define pud_soft_dirty(pmd)    pte_soft_dirty(pud_pte(pud))
+#define pud_mksoft_dirty(pmd)  pte_pud(pte_mksoft_dirty(pud_pte(pud)))
+#define pud_clear_soft_dirty(pmd) pte_pud(pte_clear_soft_dirty(pud_pte(pud)))
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
+static inline int pud_bad(pud_t pud)
+{
+	if (radix_enabled())
+		return radix__pud_bad(pud);
+	return hash__pud_bad(pud);
+}
+
+#define pud_access_permitted pud_access_permitted
+static inline bool pud_access_permitted(pud_t pud, bool write)
+{
+	return pte_access_permitted(pud_pte(pud), write);
+}
+
+#define __p4d_raw(x)	((p4d_t) { __pgd_raw(x) })
+static inline __be64 p4d_raw(p4d_t x)
+{
+	return pgd_raw(x.pgd);
+}
+
+#define p4d_write(p4d)		pte_write(p4d_pte(p4d))
+
+static inline void p4d_clear(p4d_t *p4dp)
+{
+	*p4dp = __p4d(0);
+}
+
+static inline int p4d_none(p4d_t p4d)
+{
+	return !p4d_raw(p4d);
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+	return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PRESENT));
+}
+
+static inline pte_t p4d_pte(p4d_t p4d)
+{
+	return __pte_raw(p4d_raw(p4d));
+}
+
+static inline p4d_t pte_p4d(pte_t pte)
+{
+	return __p4d_raw(pte_raw(pte));
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+	if (radix_enabled())
+		return radix__p4d_bad(p4d);
+	return hash__p4d_bad(p4d);
+}
+
+#define p4d_access_permitted p4d_access_permitted
+static inline bool p4d_access_permitted(p4d_t p4d, bool write)
+{
+	return pte_access_permitted(p4d_pte(p4d), write);
+}
+
+extern struct page *p4d_page(p4d_t p4d);
+
+/* Pointers in the page table tree are physical addresses */
+#define __pgtable_ptr_val(ptr)	__pa(ptr)
+
+static inline pud_t *p4d_pgtable(p4d_t p4d)
+{
+	return (pud_t *)__va(p4d_val(p4d) & ~P4D_MASKED_BITS);
+}
+
+static inline pmd_t *pud_pgtable(pud_t pud)
+{
+	return (pmd_t *)__va(pud_val(pud) & ~PUD_MASKED_BITS);
+}
+
+#define pte_ERROR(e) \
+	pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pmd_ERROR(e) \
+	pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pud_ERROR(e) \
+	pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
+#define pgd_ERROR(e) \
+	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
+{
+	if (radix_enabled()) {
+#if defined(CONFIG_PPC_RADIX_MMU) && defined(DEBUG_VM)
+		unsigned long page_size = 1 << mmu_psize_defs[mmu_io_psize].shift;
+		WARN((page_size != PAGE_SIZE), "I/O page size != PAGE_SIZE");
+#endif
+		return radix__map_kernel_page(ea, pa, prot, PAGE_SIZE);
+	}
+	return hash__map_kernel_page(ea, pa, prot);
+}
+
+void unmap_kernel_page(unsigned long va);
+
+static inline int __meminit vmemmap_create_mapping(unsigned long start,
+						   unsigned long page_size,
+						   unsigned long phys)
+{
+	if (radix_enabled())
+		return radix__vmemmap_create_mapping(start, page_size, phys);
+	return hash__vmemmap_create_mapping(start, page_size, phys);
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static inline void vmemmap_remove_mapping(unsigned long start,
+					  unsigned long page_size)
+{
+	if (radix_enabled())
+		return radix__vmemmap_remove_mapping(start, page_size);
+	return hash__vmemmap_remove_mapping(start, page_size);
+}
+#endif
+
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
+static inline void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	if (radix_enabled())
+		radix__kernel_map_pages(page, numpages, enable);
+	else
+		hash__kernel_map_pages(page, numpages, enable);
+}
+#endif
+
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+	return __pte_raw(pmd_raw(pmd));
+}
+
+static inline pmd_t pte_pmd(pte_t pte)
+{
+	return __pmd_raw(pte_raw(pte));
+}
+
+static inline pte_t *pmdp_ptep(pmd_t *pmd)
+{
+	return (pte_t *)pmd;
+}
+#define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
+#define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
+#define pmd_young(pmd)		pte_young(pmd_pte(pmd))
+#define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
+#define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
+#define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
+#define pmd_mkclean(pmd)	pte_pmd(pte_mkclean(pmd_pte(pmd)))
+#define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+#define pmd_mkwrite_novma(pmd)	pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)))
+
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+#define pmd_soft_dirty(pmd)    pte_soft_dirty(pmd_pte(pmd))
+#define pmd_mksoft_dirty(pmd)  pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
+#define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)))
+
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+#define pmd_swp_mksoft_dirty(pmd)	pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd)))
+#define pmd_swp_soft_dirty(pmd)		pte_swp_soft_dirty(pmd_pte(pmd))
+#define pmd_swp_clear_soft_dirty(pmd)	pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd)))
+#endif
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
+#ifdef CONFIG_NUMA_BALANCING
+static inline int pmd_protnone(pmd_t pmd)
+{
+	return pte_protnone(pmd_pte(pmd));
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
+#define pmd_write(pmd)		pte_write(pmd_pte(pmd))
+
+#define pmd_access_permitted pmd_access_permitted
+static inline bool pmd_access_permitted(pmd_t pmd, bool write)
+{
+	/*
+	 * pmdp_invalidate sets this combination (which is not caught by
+	 * !pte_present() check in pte_access_permitted), to prevent
+	 * lock-free lookups, as part of the serialize_against_pte_lookup()
+	 * synchronisation.
+	 *
+	 * This also catches the case where the PTE's hardware PRESENT bit is
+	 * cleared while TLB is flushed, which is suboptimal but should not
+	 * be frequent.
+	 */
+	if (pmd_is_serializing(pmd))
+		return false;
+
+	return pte_access_permitted(pmd_pte(pmd), write);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
+extern pud_t pfn_pud(unsigned long pfn, pgprot_t pgprot);
+extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
+extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
+extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		       pmd_t *pmdp, pmd_t pmd);
+extern void set_pud_at(struct mm_struct *mm, unsigned long addr,
+		       pud_t *pudp, pud_t pud);
+
+static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
+					unsigned long addr, pmd_t *pmd)
+{
+}
+
+static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
+					unsigned long addr, pud_t *pud)
+{
+}
+
+extern int hash__has_transparent_hugepage(void);
+static inline int has_transparent_hugepage(void)
+{
+	if (radix_enabled())
+		return radix__has_transparent_hugepage();
+	return hash__has_transparent_hugepage();
+}
+#define has_transparent_hugepage has_transparent_hugepage
+
+static inline int has_transparent_pud_hugepage(void)
+{
+	if (radix_enabled())
+		return radix__has_transparent_pud_hugepage();
+	return 0;
+}
+#define has_transparent_pud_hugepage has_transparent_pud_hugepage
+
+static inline unsigned long
+pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
+		    unsigned long clr, unsigned long set)
+{
+	if (radix_enabled())
+		return radix__pmd_hugepage_update(mm, addr, pmdp, clr, set);
+	return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set);
+}
+
+static inline unsigned long
+pud_hugepage_update(struct mm_struct *mm, unsigned long addr, pud_t *pudp,
+		    unsigned long clr, unsigned long set)
+{
+	if (radix_enabled())
+		return radix__pud_hugepage_update(mm, addr, pudp, clr, set);
+	BUG();
+	return pud_val(*pudp);
+}
+
+/*
+ * returns true for pmd migration entries, THP, devmap, hugetlb
+ * But compile time dependent on THP config
+ */
+static inline int pmd_large(pmd_t pmd)
+{
+	return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
+}
+
+static inline int pud_large(pud_t pud)
+{
+	return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
+}
+
+/*
+ * For radix we should always find H_PAGE_HASHPTE zero. Hence
+ * the below will work for radix too
+ */
+static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
+					      unsigned long addr, pmd_t *pmdp)
+{
+	unsigned long old;
+
+	if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
+		return 0;
+	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
+	return ((old & _PAGE_ACCESSED) != 0);
+}
+
+static inline int __pudp_test_and_clear_young(struct mm_struct *mm,
+					      unsigned long addr, pud_t *pudp)
+{
+	unsigned long old;
+
+	if ((pud_raw(*pudp) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
+		return 0;
+	old = pud_hugepage_update(mm, addr, pudp, _PAGE_ACCESSED, 0);
+	return ((old & _PAGE_ACCESSED) != 0);
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pmd_t *pmdp)
+{
+	if (pmd_write(*pmdp))
+		pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
+}
+
+#define __HAVE_ARCH_PUDP_SET_WRPROTECT
+static inline void pudp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pud_t *pudp)
+{
+	if (pud_write(*pudp))
+		pud_hugepage_update(mm, addr, pudp, _PAGE_WRITE, 0);
+}
+
+/*
+ * Only returns true for a THP. False for pmd migration entry.
+ * We also need to return true when we come across a pte that
+ * in between a thp split. While splitting THP, we mark the pmd
+ * invalid (pmdp_invalidate()) before we set it with pte page
+ * address. A pmd_trans_huge() check against a pmd entry during that time
+ * should return true.
+ * We should not call this on a hugetlb entry. We should check for HugeTLB
+ * entry using vma->vm_flags
+ * The page table walk rule is explained in Documentation/mm/transhuge.rst
+ */
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+	if (!pmd_present(pmd))
+		return false;
+
+	if (radix_enabled())
+		return radix__pmd_trans_huge(pmd);
+	return hash__pmd_trans_huge(pmd);
+}
+
+static inline int pud_trans_huge(pud_t pud)
+{
+	if (!pud_present(pud))
+		return false;
+
+	if (radix_enabled())
+		return radix__pud_trans_huge(pud);
+	return 0;
+}
+
+
+#define __HAVE_ARCH_PMD_SAME
+static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+	if (radix_enabled())
+		return radix__pmd_same(pmd_a, pmd_b);
+	return hash__pmd_same(pmd_a, pmd_b);
+}
+
+#define pud_same pud_same
+static inline int pud_same(pud_t pud_a, pud_t pud_b)
+{
+	if (radix_enabled())
+		return radix__pud_same(pud_a, pud_b);
+	return hash__pud_same(pud_a, pud_b);
+}
+
+
+static inline pmd_t __pmd_mkhuge(pmd_t pmd)
+{
+	if (radix_enabled())
+		return radix__pmd_mkhuge(pmd);
+	return hash__pmd_mkhuge(pmd);
+}
+
+static inline pud_t __pud_mkhuge(pud_t pud)
+{
+	if (radix_enabled())
+		return radix__pud_mkhuge(pud);
+	BUG();
+	return pud;
+}
+
+/*
+ * pfn_pmd return a pmd_t that can be used as pmd pte entry.
+ */
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+#ifdef CONFIG_DEBUG_VM
+	if (radix_enabled())
+		WARN_ON((pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)) == 0);
+	else
+		WARN_ON((pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE | H_PAGE_THP_HUGE)) !=
+			cpu_to_be64(_PAGE_PTE | H_PAGE_THP_HUGE));
+#endif
+	return pmd;
+}
+
+static inline pud_t pud_mkhuge(pud_t pud)
+{
+#ifdef CONFIG_DEBUG_VM
+	if (radix_enabled())
+		WARN_ON((pud_raw(pud) & cpu_to_be64(_PAGE_PTE)) == 0);
+	else
+		WARN_ON(1);
+#endif
+	return pud;
+}
+
+
+#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+extern int pmdp_set_access_flags(struct vm_area_struct *vma,
+				 unsigned long address, pmd_t *pmdp,
+				 pmd_t entry, int dirty);
+#define __HAVE_ARCH_PUDP_SET_ACCESS_FLAGS
+extern int pudp_set_access_flags(struct vm_area_struct *vma,
+				 unsigned long address, pud_t *pudp,
+				 pud_t entry, int dirty);
+
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+				     unsigned long address, pmd_t *pmdp);
+#define __HAVE_ARCH_PUDP_TEST_AND_CLEAR_YOUNG
+extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
+				     unsigned long address, pud_t *pudp);
+
+
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
+static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pmd_t *pmdp)
+{
+	if (radix_enabled())
+		return radix__pmdp_huge_get_and_clear(mm, addr, pmdp);
+	return hash__pmdp_huge_get_and_clear(mm, addr, pmdp);
+}
+
+#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
+static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pud_t *pudp)
+{
+	if (radix_enabled())
+		return radix__pudp_huge_get_and_clear(mm, addr, pudp);
+	BUG();
+	return *pudp;
+}
+
+static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
+					unsigned long address, pmd_t *pmdp)
+{
+	if (radix_enabled())
+		return radix__pmdp_collapse_flush(vma, address, pmdp);
+	return hash__pmdp_collapse_flush(vma, address, pmdp);
+}
+#define pmdp_collapse_flush pmdp_collapse_flush
+
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
+pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
+				   unsigned long addr,
+				   pmd_t *pmdp, int full);
+
+#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
+pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
+				   unsigned long addr,
+				   pud_t *pudp, int full);
+
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+static inline void pgtable_trans_huge_deposit(struct mm_struct *mm,
+					      pmd_t *pmdp, pgtable_t pgtable)
+{
+	if (radix_enabled())
+		return radix__pgtable_trans_huge_deposit(mm, pmdp, pgtable);
+	return hash__pgtable_trans_huge_deposit(mm, pmdp, pgtable);
+}
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm,
+						    pmd_t *pmdp)
+{
+	if (radix_enabled())
+		return radix__pgtable_trans_huge_withdraw(mm, pmdp);
+	return hash__pgtable_trans_huge_withdraw(mm, pmdp);
+}
+
+#define __HAVE_ARCH_PMDP_INVALIDATE
+extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+			     pmd_t *pmdp);
+
+#define pmd_move_must_withdraw pmd_move_must_withdraw
+struct spinlock;
+extern int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
+				  struct spinlock *old_pmd_ptl,
+				  struct vm_area_struct *vma);
+/*
+ * Hash translation mode use the deposited table to store hash pte
+ * slot information.
+ */
+#define arch_needs_pgtable_deposit arch_needs_pgtable_deposit
+static inline bool arch_needs_pgtable_deposit(void)
+{
+	if (radix_enabled())
+		return false;
+	return true;
+}
+extern void serialize_against_pte_lookup(struct mm_struct *mm);
+
+
+static inline pmd_t pmd_mkdevmap(pmd_t pmd)
+{
+	if (radix_enabled())
+		return radix__pmd_mkdevmap(pmd);
+	return hash__pmd_mkdevmap(pmd);
+}
+
+static inline pud_t pud_mkdevmap(pud_t pud)
+{
+	if (radix_enabled())
+		return radix__pud_mkdevmap(pud);
+	BUG();
+	return pud;
+}
+
+static inline int pmd_devmap(pmd_t pmd)
+{
+	return pte_devmap(pmd_pte(pmd));
+}
+
+static inline int pud_devmap(pud_t pud)
+{
+	return pte_devmap(pud_pte(pud));
+}
+
+static inline int pgd_devmap(pgd_t pgd)
+{
+	return 0;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
+pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
+void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
+			     pte_t *, pte_t, pte_t);
+
+/*
+ * Returns true for a R -> RW upgrade of pte
+ */
+static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_val)
+{
+	if (!(old_val & _PAGE_READ))
+		return false;
+
+	if ((!(old_val & _PAGE_WRITE)) && (new_val & _PAGE_WRITE))
+		return true;
+
+	return false;
+}
+
+/*
+ * Like pmd_huge() and pmd_large(), but works regardless of config options
+ */
+#define pmd_is_leaf pmd_is_leaf
+#define pmd_leaf pmd_is_leaf
+static inline bool pmd_is_leaf(pmd_t pmd)
+{
+	return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
+}
+
+#define pud_is_leaf pud_is_leaf
+#define pud_leaf pud_is_leaf
+static inline bool pud_is_leaf(pud_t pud)
+{
+	return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/pkeys.h b/arch/powerpc/include/asm/book3s/64/pkeys.h
new file mode 100644
index 0000000000..5b178139f3
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/pkeys.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _ASM_POWERPC_BOOK3S_64_PKEYS_H
+#define _ASM_POWERPC_BOOK3S_64_PKEYS_H
+
+#include <asm/book3s/64/hash-pkey.h>
+
+static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags)
+{
+	if (!mmu_has_feature(MMU_FTR_PKEY))
+		return 0x0UL;
+
+	if (radix_enabled())
+		BUG();
+	return hash__vmflag_to_pte_pkey_bits(vm_flags);
+}
+
+static inline u16 pte_to_pkey_bits(u64 pteflags)
+{
+	if (radix_enabled())
+		BUG();
+	return hash__pte_to_pkey_bits(pteflags);
+}
+
+#endif /*_ASM_POWERPC_KEYS_H */
diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h
new file mode 100644
index 0000000000..035ceecd6d
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_PGTABLE_RADIX_4K_H
+#define _ASM_POWERPC_PGTABLE_RADIX_4K_H
+
+/*
+ * For 4K page size supported index is 13/9/9/9
+ */
+#define RADIX_PTE_INDEX_SIZE   9  // size: 8B <<  9 =  4KB, maps 2^9  x    4K =   2MB
+#define RADIX_PMD_INDEX_SIZE   9  // size: 8B <<  9 =  4KB, maps 2^9  x   2MB =   1GB
+#define RADIX_PUD_INDEX_SIZE   9  // size: 8B <<  9 =  4KB, maps 2^9  x   1GB = 512GB
+#define RADIX_PGD_INDEX_SIZE  13  // size: 8B << 13 = 64KB, maps 2^13 x 512GB =   4PB
+
+/*
+ * One fragment per page
+ */
+#define RADIX_PTE_FRAG_SIZE_SHIFT  (RADIX_PTE_INDEX_SIZE + 3)
+#define RADIX_PTE_FRAG_NR	(PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT)
+
+#define RADIX_PMD_FRAG_SIZE_SHIFT  (RADIX_PMD_INDEX_SIZE + 3)
+#define RADIX_PMD_FRAG_NR	(PAGE_SIZE >> RADIX_PMD_FRAG_SIZE_SHIFT)
+
+#endif /* _ASM_POWERPC_PGTABLE_RADIX_4K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/radix-64k.h b/arch/powerpc/include/asm/book3s/64/radix-64k.h
new file mode 100644
index 0000000000..54e33828b0
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/radix-64k.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_PGTABLE_RADIX_64K_H
+#define _ASM_POWERPC_PGTABLE_RADIX_64K_H
+
+/*
+ * For 64K page size supported index is 13/9/9/5
+ */
+#define RADIX_PTE_INDEX_SIZE   5  // size: 8B <<  5 = 256B, maps 2^5  x   64K =   2MB
+#define RADIX_PMD_INDEX_SIZE   9  // size: 8B <<  9 =  4KB, maps 2^9  x   2MB =   1GB
+#define RADIX_PUD_INDEX_SIZE   9  // size: 8B <<  9 =  4KB, maps 2^9  x   1GB = 512GB
+#define RADIX_PGD_INDEX_SIZE  13  // size: 8B << 13 = 64KB, maps 2^13 x 512GB =   4PB
+
+/*
+ * We use a 256 byte PTE page fragment in radix
+ * 8 bytes per each PTE entry.
+ */
+#define RADIX_PTE_FRAG_SIZE_SHIFT  (RADIX_PTE_INDEX_SIZE + 3)
+#define RADIX_PTE_FRAG_NR	(PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT)
+
+#define RADIX_PMD_FRAG_SIZE_SHIFT  (RADIX_PMD_INDEX_SIZE + 3)
+#define RADIX_PMD_FRAG_NR	(PAGE_SIZE >> RADIX_PMD_FRAG_SIZE_SHIFT)
+
+#endif /* _ASM_POWERPC_PGTABLE_RADIX_64K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
new file mode 100644
index 0000000000..357e23a403
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -0,0 +1,378 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_PGTABLE_RADIX_H
+#define _ASM_POWERPC_PGTABLE_RADIX_H
+
+#include <asm/asm-const.h>
+
+#ifndef __ASSEMBLY__
+#include <asm/cmpxchg.h>
+#endif
+
+#ifdef CONFIG_PPC_64K_PAGES
+#include <asm/book3s/64/radix-64k.h>
+#else
+#include <asm/book3s/64/radix-4k.h>
+#endif
+
+#ifndef __ASSEMBLY__
+#include <asm/book3s/64/tlbflush-radix.h>
+#include <asm/cpu_has_feature.h>
+#endif
+
+/* An empty PTE can still have a R or C writeback */
+#define RADIX_PTE_NONE_MASK		(_PAGE_DIRTY | _PAGE_ACCESSED)
+
+/* Bits to set in a RPMD/RPUD/RPGD */
+#define RADIX_PMD_VAL_BITS		(0x8000000000000000UL | RADIX_PTE_INDEX_SIZE)
+#define RADIX_PUD_VAL_BITS		(0x8000000000000000UL | RADIX_PMD_INDEX_SIZE)
+#define RADIX_PGD_VAL_BITS		(0x8000000000000000UL | RADIX_PUD_INDEX_SIZE)
+
+/* Don't have anything in the reserved bits and leaf bits */
+#define RADIX_PMD_BAD_BITS		0x60000000000000e0UL
+#define RADIX_PUD_BAD_BITS		0x60000000000000e0UL
+#define RADIX_P4D_BAD_BITS		0x60000000000000e0UL
+
+#define RADIX_PMD_SHIFT		(PAGE_SHIFT + RADIX_PTE_INDEX_SIZE)
+#define RADIX_PUD_SHIFT		(RADIX_PMD_SHIFT + RADIX_PMD_INDEX_SIZE)
+#define RADIX_PGD_SHIFT		(RADIX_PUD_SHIFT + RADIX_PUD_INDEX_SIZE)
+
+#define R_PTRS_PER_PTE		(1 << RADIX_PTE_INDEX_SIZE)
+#define R_PTRS_PER_PMD		(1 << RADIX_PMD_INDEX_SIZE)
+#define R_PTRS_PER_PUD		(1 << RADIX_PUD_INDEX_SIZE)
+
+/*
+ * Size of EA range mapped by our pagetables.
+ */
+#define RADIX_PGTABLE_EADDR_SIZE (RADIX_PTE_INDEX_SIZE + RADIX_PMD_INDEX_SIZE +	\
+			      RADIX_PUD_INDEX_SIZE + RADIX_PGD_INDEX_SIZE + PAGE_SHIFT)
+#define RADIX_PGTABLE_RANGE (ASM_CONST(1) << RADIX_PGTABLE_EADDR_SIZE)
+
+/*
+ * We support 52 bit address space, Use top bit for kernel
+ * virtual mapping. Also make sure kernel fit in the top
+ * quadrant.
+ *
+ *           +------------------+
+ *           +------------------+  Kernel virtual map (0xc008000000000000)
+ *           |                  |
+ *           |                  |
+ *           |                  |
+ * 0b11......+------------------+  Kernel linear map (0xc....)
+ *           |                  |
+ *           |     2 quadrant   |
+ *           |                  |
+ * 0b10......+------------------+
+ *           |                  |
+ *           |    1 quadrant    |
+ *           |                  |
+ * 0b01......+------------------+
+ *           |                  |
+ *           |    0 quadrant    |
+ *           |                  |
+ * 0b00......+------------------+
+ *
+ *
+ * 3rd quadrant expanded:
+ * +------------------------------+  Highest address (0xc010000000000000)
+ * +------------------------------+  KASAN shadow end (0xc00fc00000000000)
+ * |                              |
+ * |                              |
+ * +------------------------------+  Kernel vmemmap end/shadow start (0xc00e000000000000)
+ * |                              |
+ * |           512TB		  |
+ * |                              |
+ * +------------------------------+  Kernel IO map end/vmemap start
+ * |                              |
+ * |           512TB		  |
+ * |                              |
+ * +------------------------------+  Kernel vmap end/ IO map start
+ * |                              |
+ * |           512TB		  |
+ * |                              |
+ * +------------------------------+  Kernel virt start (0xc008000000000000)
+ * |                              |
+ * |                              |
+ * |                              |
+ * +------------------------------+  Kernel linear (0xc.....)
+ */
+
+/* For the sizes of the shadow area, see kasan.h */
+
+/*
+ * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
+ * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
+ * page_to_nid does a page->section->node lookup
+ * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
+ * memory requirements with large number of sections.
+ * 51 bits is the max physical real address on POWER9
+ */
+
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME)
+#define R_MAX_PHYSMEM_BITS	51
+#else
+#define R_MAX_PHYSMEM_BITS	46
+#endif
+
+#define RADIX_KERN_VIRT_START	ASM_CONST(0xc008000000000000)
+/*
+ * 49 =  MAX_EA_BITS_PER_CONTEXT (hash specific). To make sure we pick
+ * the same value as hash.
+ */
+#define RADIX_KERN_MAP_SIZE	(1UL << 49)
+
+#define RADIX_VMALLOC_START	RADIX_KERN_VIRT_START
+#define RADIX_VMALLOC_SIZE	RADIX_KERN_MAP_SIZE
+#define RADIX_VMALLOC_END	(RADIX_VMALLOC_START + RADIX_VMALLOC_SIZE)
+
+#define RADIX_KERN_IO_START	RADIX_VMALLOC_END
+#define RADIX_KERN_IO_SIZE	RADIX_KERN_MAP_SIZE
+#define RADIX_KERN_IO_END	(RADIX_KERN_IO_START + RADIX_KERN_IO_SIZE)
+
+#define RADIX_VMEMMAP_START	RADIX_KERN_IO_END
+#define RADIX_VMEMMAP_SIZE	RADIX_KERN_MAP_SIZE
+#define RADIX_VMEMMAP_END	(RADIX_VMEMMAP_START + RADIX_VMEMMAP_SIZE)
+
+#ifndef __ASSEMBLY__
+#define RADIX_PTE_TABLE_SIZE	(sizeof(pte_t) << RADIX_PTE_INDEX_SIZE)
+#define RADIX_PMD_TABLE_SIZE	(sizeof(pmd_t) << RADIX_PMD_INDEX_SIZE)
+#define RADIX_PUD_TABLE_SIZE	(sizeof(pud_t) << RADIX_PUD_INDEX_SIZE)
+#define RADIX_PGD_TABLE_SIZE	(sizeof(pgd_t) << RADIX_PGD_INDEX_SIZE)
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+extern void radix__mark_rodata_ro(void);
+extern void radix__mark_initmem_nx(void);
+#endif
+
+extern void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
+					 pte_t entry, unsigned long address,
+					 int psize);
+
+extern void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
+					   unsigned long addr, pte_t *ptep,
+					   pte_t old_pte, pte_t pte);
+
+static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
+					       unsigned long set)
+{
+	__be64 old_be, tmp_be;
+
+	__asm__ __volatile__(
+	"1:	ldarx	%0,0,%3		# pte_update\n"
+	"	andc	%1,%0,%5	\n"
+	"	or	%1,%1,%4	\n"
+	"	stdcx.	%1,0,%3		\n"
+	"	bne-	1b"
+	: "=&r" (old_be), "=&r" (tmp_be), "=m" (*ptep)
+	: "r" (ptep), "r" (cpu_to_be64(set)), "r" (cpu_to_be64(clr))
+	: "cc" );
+
+	return be64_to_cpu(old_be);
+}
+
+static inline unsigned long radix__pte_update(struct mm_struct *mm,
+					unsigned long addr,
+					pte_t *ptep, unsigned long clr,
+					unsigned long set,
+					int huge)
+{
+	unsigned long old_pte;
+
+	old_pte = __radix_pte_update(ptep, clr, set);
+	if (!huge)
+		assert_pte_locked(mm, addr);
+
+	return old_pte;
+}
+
+static inline pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm,
+						   unsigned long addr,
+						   pte_t *ptep, int full)
+{
+	unsigned long old_pte;
+
+	if (full) {
+		old_pte = pte_val(*ptep);
+		*ptep = __pte(0);
+	} else
+		old_pte = radix__pte_update(mm, addr, ptep, ~0ul, 0, 0);
+
+	return __pte(old_pte);
+}
+
+static inline int radix__pte_same(pte_t pte_a, pte_t pte_b)
+{
+	return ((pte_raw(pte_a) ^ pte_raw(pte_b)) == 0);
+}
+
+static inline int radix__pte_none(pte_t pte)
+{
+	return (pte_val(pte) & ~RADIX_PTE_NONE_MASK) == 0;
+}
+
+static inline void radix__set_pte_at(struct mm_struct *mm, unsigned long addr,
+				 pte_t *ptep, pte_t pte, int percpu)
+{
+	*ptep = pte;
+
+	/*
+	 * The architecture suggests a ptesync after setting the pte, which
+	 * orders the store that updates the pte with subsequent page table
+	 * walk accesses which may load the pte. Without this it may be
+	 * possible for a subsequent access to result in spurious fault.
+	 *
+	 * This is not necessary for correctness, because a spurious fault
+	 * is tolerated by the page fault handler, and this store will
+	 * eventually be seen. In testing, there was no noticable increase
+	 * in user faults on POWER9. Avoiding ptesync here is a significant
+	 * win for things like fork. If a future microarchitecture benefits
+	 * from ptesync, it should probably go into update_mmu_cache, rather
+	 * than set_pte_at (which is used to set ptes unrelated to faults).
+	 *
+	 * Spurious faults from the kernel memory are not tolerated, so there
+	 * is a ptesync in flush_cache_vmap, and __map_kernel_page() follows
+	 * the pte update sequence from ISA Book III 6.10 Translation Table
+	 * Update Synchronization Requirements.
+	 */
+}
+
+static inline int radix__pmd_bad(pmd_t pmd)
+{
+	return !!(pmd_val(pmd) & RADIX_PMD_BAD_BITS);
+}
+
+static inline int radix__pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+	return ((pmd_raw(pmd_a) ^ pmd_raw(pmd_b)) == 0);
+}
+
+static inline int radix__pud_bad(pud_t pud)
+{
+	return !!(pud_val(pud) & RADIX_PUD_BAD_BITS);
+}
+
+static inline int radix__pud_same(pud_t pud_a, pud_t pud_b)
+{
+	return ((pud_raw(pud_a) ^ pud_raw(pud_b)) == 0);
+}
+
+static inline int radix__p4d_bad(p4d_t p4d)
+{
+	return !!(p4d_val(p4d) & RADIX_P4D_BAD_BITS);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+static inline int radix__pmd_trans_huge(pmd_t pmd)
+{
+	return (pmd_val(pmd) & (_PAGE_PTE | _PAGE_DEVMAP)) == _PAGE_PTE;
+}
+
+static inline pmd_t radix__pmd_mkhuge(pmd_t pmd)
+{
+	return __pmd(pmd_val(pmd) | _PAGE_PTE);
+}
+
+static inline int radix__pud_trans_huge(pud_t pud)
+{
+	return (pud_val(pud) & (_PAGE_PTE | _PAGE_DEVMAP)) == _PAGE_PTE;
+}
+
+static inline pud_t radix__pud_mkhuge(pud_t pud)
+{
+	return __pud(pud_val(pud) | _PAGE_PTE);
+}
+
+extern unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
+					  pmd_t *pmdp, unsigned long clr,
+					  unsigned long set);
+extern unsigned long radix__pud_hugepage_update(struct mm_struct *mm, unsigned long addr,
+						pud_t *pudp, unsigned long clr,
+						unsigned long set);
+extern pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma,
+				  unsigned long address, pmd_t *pmdp);
+extern void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+					pgtable_t pgtable);
+extern pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+extern pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
+				      unsigned long addr, pmd_t *pmdp);
+pud_t radix__pudp_huge_get_and_clear(struct mm_struct *mm,
+				     unsigned long addr, pud_t *pudp);
+
+static inline int radix__has_transparent_hugepage(void)
+{
+	/* For radix 2M at PMD level means thp */
+	if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT)
+		return 1;
+	return 0;
+}
+
+static inline int radix__has_transparent_pud_hugepage(void)
+{
+	/* For radix 1G at PUD level means pud hugepage support */
+	if (mmu_psize_defs[MMU_PAGE_1G].shift == PUD_SHIFT)
+		return 1;
+	return 0;
+}
+#endif
+
+static inline pmd_t radix__pmd_mkdevmap(pmd_t pmd)
+{
+	return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP));
+}
+
+static inline pud_t radix__pud_mkdevmap(pud_t pud)
+{
+	return __pud(pud_val(pud) | (_PAGE_PTE | _PAGE_DEVMAP));
+}
+
+struct vmem_altmap;
+struct dev_pagemap;
+extern int __meminit radix__vmemmap_create_mapping(unsigned long start,
+					     unsigned long page_size,
+					     unsigned long phys);
+int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end,
+				      int node, struct vmem_altmap *altmap);
+void __ref radix__vmemmap_free(unsigned long start, unsigned long end,
+			       struct vmem_altmap *altmap);
+extern void radix__vmemmap_remove_mapping(unsigned long start,
+				    unsigned long page_size);
+
+extern int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+				 pgprot_t flags, unsigned int psz);
+
+static inline unsigned long radix__get_tree_size(void)
+{
+	unsigned long rts_field;
+	/*
+	 * We support 52 bits, hence:
+	 * bits 52 - 31 = 21, 0b10101
+	 * RTS encoding details
+	 * bits 0 - 3 of rts -> bits 6 - 8 unsigned long
+	 * bits 4 - 5 of rts -> bits 62 - 63 of unsigned long
+	 */
+	rts_field = (0x5UL << 5); /* 6 - 8 bits */
+	rts_field |= (0x2UL << 61);
+
+	return rts_field;
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int radix__create_section_mapping(unsigned long start, unsigned long end,
+				  int nid, pgprot_t prot);
+int radix__remove_section_mapping(unsigned long start, unsigned long end);
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+void radix__kernel_map_pages(struct page *page, int numpages, int enable);
+
+#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
+#define vmemmap_can_optimize vmemmap_can_optimize
+bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap);
+#endif
+
+#define vmemmap_populate_compound_pages vmemmap_populate_compound_pages
+int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
+					      unsigned long start,
+					      unsigned long end, int node,
+					      struct dev_pagemap *pgmap);
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h
new file mode 100644
index 0000000000..5fbe18544c
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/slice.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H
+#define _ASM_POWERPC_BOOK3S_64_SLICE_H
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+#ifdef CONFIG_HUGETLB_PAGE
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+#endif
+
+#define SLICE_LOW_SHIFT		28
+#define SLICE_LOW_TOP		(0x100000000ul)
+#define SLICE_NUM_LOW		(SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
+#define GET_LOW_SLICE_INDEX(addr)	((addr) >> SLICE_LOW_SHIFT)
+
+#define SLICE_HIGH_SHIFT	40
+#define SLICE_NUM_HIGH		(H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
+#define GET_HIGH_SLICE_INDEX(addr)	((addr) >> SLICE_HIGH_SHIFT)
+
+#define SLB_ADDR_LIMIT_DEFAULT	DEFAULT_MAP_WINDOW_USER64
+
+struct mm_struct;
+
+unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
+				      unsigned long flags, unsigned int psize,
+				      int topdown);
+
+unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr);
+
+void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
+			   unsigned long len, unsigned int psize);
+
+void slice_init_new_context_exec(struct mm_struct *mm);
+void slice_setup_new_exec(void);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
new file mode 100644
index 0000000000..146287d958
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H
+#define _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H
+
+/*
+ * TLB flushing for 64-bit hash-MMU CPUs
+ */
+
+#include <linux/percpu.h>
+#include <asm/page.h>
+
+#define PPC64_TLB_BATCH_NR 192
+
+struct ppc64_tlb_batch {
+	int			active;
+	unsigned long		index;
+	struct mm_struct	*mm;
+	real_pte_t		pte[PPC64_TLB_BATCH_NR];
+	unsigned long		vpn[PPC64_TLB_BATCH_NR];
+	unsigned int		psize;
+	int			ssize;
+};
+DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
+
+extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch);
+
+#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
+
+static inline void arch_enter_lazy_mmu_mode(void)
+{
+	struct ppc64_tlb_batch *batch;
+
+	if (radix_enabled())
+		return;
+	/*
+	 * apply_to_page_range can call us this preempt enabled when
+	 * operating on kernel page tables.
+	 */
+	preempt_disable();
+	batch = this_cpu_ptr(&ppc64_tlb_batch);
+	batch->active = 1;
+}
+
+static inline void arch_leave_lazy_mmu_mode(void)
+{
+	struct ppc64_tlb_batch *batch;
+
+	if (radix_enabled())
+		return;
+	batch = this_cpu_ptr(&ppc64_tlb_batch);
+
+	if (batch->index)
+		__flush_tlb_pending(batch);
+	batch->active = 0;
+	preempt_enable();
+}
+
+#define arch_flush_lazy_mmu_mode()      do {} while (0)
+
+extern void hash__tlbiel_all(unsigned int action);
+
+extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize,
+			    int ssize, unsigned long flags);
+extern void flush_hash_range(unsigned long number, int local);
+extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
+				pmd_t *pmdp, unsigned int psize, int ssize,
+				unsigned long flags);
+
+struct mmu_gather;
+extern void hash__tlb_flush(struct mmu_gather *tlb);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+/* Private function for use by PCI IO mapping code */
+extern void __flush_hash_table_range(unsigned long start, unsigned long end);
+void flush_hash_table_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr);
+#else
+static inline void __flush_hash_table_range(unsigned long start, unsigned long end) { }
+#endif
+#endif /*  _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H */
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
new file mode 100644
index 0000000000..a38542259f
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_TLBFLUSH_RADIX_H
+#define _ASM_POWERPC_TLBFLUSH_RADIX_H
+
+#include <asm/hvcall.h>
+
+#define RIC_FLUSH_TLB 0
+#define RIC_FLUSH_PWC 1
+#define RIC_FLUSH_ALL 2
+
+struct vm_area_struct;
+struct mm_struct;
+struct mmu_gather;
+
+static inline u64 psize_to_rpti_pgsize(unsigned long psize)
+{
+	if (psize == MMU_PAGE_4K)
+		return H_RPTI_PAGE_4K;
+	if (psize == MMU_PAGE_64K)
+		return H_RPTI_PAGE_64K;
+	if (psize == MMU_PAGE_2M)
+		return H_RPTI_PAGE_2M;
+	if (psize == MMU_PAGE_1G)
+		return H_RPTI_PAGE_1G;
+	return H_RPTI_PAGE_ALL;
+}
+
+static inline int mmu_get_ap(int psize)
+{
+	return mmu_psize_defs[psize].ap;
+}
+
+#ifdef CONFIG_PPC_RADIX_MMU
+extern void radix__tlbiel_all(unsigned int action);
+extern void radix__flush_tlb_lpid_page(unsigned int lpid,
+					unsigned long addr,
+					unsigned long page_size);
+extern void radix__flush_pwc_lpid(unsigned int lpid);
+extern void radix__flush_all_lpid(unsigned int lpid);
+extern void radix__flush_all_lpid_guest(unsigned int lpid);
+#else
+static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); }
+static inline void radix__flush_tlb_lpid_page(unsigned int lpid,
+					unsigned long addr,
+					unsigned long page_size)
+{
+	WARN_ON(1);
+}
+static inline void radix__flush_pwc_lpid(unsigned int lpid)
+{
+	WARN_ON(1);
+}
+static inline void radix__flush_all_lpid(unsigned int lpid)
+{
+	WARN_ON(1);
+}
+static inline void radix__flush_all_lpid_guest(unsigned int lpid)
+{
+	WARN_ON(1);
+}
+#endif
+
+extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma,
+					   unsigned long start, unsigned long end);
+extern void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+					 unsigned long end, int psize);
+void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
+				      unsigned long end, int psize);
+extern void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
+				       unsigned long start, unsigned long end);
+extern void radix__flush_pud_tlb_range(struct vm_area_struct *vma,
+				       unsigned long start, unsigned long end);
+extern void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			    unsigned long end);
+extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end);
+
+extern void radix__local_flush_tlb_mm(struct mm_struct *mm);
+extern void radix__local_flush_all_mm(struct mm_struct *mm);
+extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+					      int psize);
+extern void radix__tlb_flush(struct mmu_gather *tlb);
+#ifdef CONFIG_SMP
+extern void radix__flush_tlb_mm(struct mm_struct *mm);
+extern void radix__flush_all_mm(struct mm_struct *mm);
+extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+					int psize);
+#else
+#define radix__flush_tlb_mm(mm)		radix__local_flush_tlb_mm(mm)
+#define radix__flush_all_mm(mm)		radix__local_flush_all_mm(mm)
+#define radix__flush_tlb_page(vma,addr)	radix__local_flush_tlb_page(vma,addr)
+#define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p)
+#endif
+extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
+extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr);
+extern void radix__flush_tlb_all(void);
+
+#endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
new file mode 100644
index 0000000000..1950c1b825
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -0,0 +1,230 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H
+#define _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H
+
+#define MMU_NO_CONTEXT	~0UL
+
+#include <linux/mm_types.h>
+#include <linux/mmu_notifier.h>
+#include <asm/book3s/64/tlbflush-hash.h>
+#include <asm/book3s/64/tlbflush-radix.h>
+
+/* TLB flush actions. Used as argument to tlbiel_all() */
+enum {
+	TLB_INVAL_SCOPE_GLOBAL = 0,	/* invalidate all TLBs */
+	TLB_INVAL_SCOPE_LPID = 1,	/* invalidate TLBs for current LPID */
+};
+
+static inline void tlbiel_all(void)
+{
+	/*
+	 * This is used for host machine check and bootup.
+	 *
+	 * This uses early_radix_enabled and implementations use
+	 * early_cpu_has_feature etc because that works early in boot
+	 * and this is the machine check path which is not performance
+	 * critical.
+	 */
+	if (early_radix_enabled())
+		radix__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL);
+	else
+		hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL);
+}
+
+static inline void tlbiel_all_lpid(bool radix)
+{
+	/*
+	 * This is used for guest machine check.
+	 */
+	if (radix)
+		radix__tlbiel_all(TLB_INVAL_SCOPE_LPID);
+	else
+		hash__tlbiel_all(TLB_INVAL_SCOPE_LPID);
+}
+
+
+#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
+static inline void flush_pmd_tlb_range(struct vm_area_struct *vma,
+				       unsigned long start, unsigned long end)
+{
+	if (radix_enabled())
+		radix__flush_pmd_tlb_range(vma, start, end);
+}
+
+#define __HAVE_ARCH_FLUSH_PUD_TLB_RANGE
+static inline void flush_pud_tlb_range(struct vm_area_struct *vma,
+				       unsigned long start, unsigned long end)
+{
+	if (radix_enabled())
+		radix__flush_pud_tlb_range(vma, start, end);
+}
+
+#define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
+static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
+					   unsigned long start,
+					   unsigned long end)
+{
+	if (radix_enabled())
+		radix__flush_hugetlb_tlb_range(vma, start, end);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+	if (radix_enabled())
+		radix__flush_tlb_range(vma, start, end);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+					  unsigned long end)
+{
+	if (radix_enabled())
+		radix__flush_tlb_kernel_range(start, end);
+}
+
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	if (radix_enabled())
+		radix__local_flush_tlb_mm(mm);
+}
+
+static inline void local_flush_tlb_page(struct vm_area_struct *vma,
+					unsigned long vmaddr)
+{
+	if (radix_enabled())
+		radix__local_flush_tlb_page(vma, vmaddr);
+}
+
+static inline void local_flush_tlb_page_psize(struct mm_struct *mm,
+					      unsigned long vmaddr, int psize)
+{
+	if (radix_enabled())
+		radix__local_flush_tlb_page_psize(mm, vmaddr, psize);
+}
+
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+	if (radix_enabled())
+		radix__tlb_flush(tlb);
+	else
+		hash__tlb_flush(tlb);
+}
+
+#ifdef CONFIG_SMP
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	if (radix_enabled())
+		radix__flush_tlb_mm(mm);
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+				  unsigned long vmaddr)
+{
+	if (radix_enabled())
+		radix__flush_tlb_page(vma, vmaddr);
+}
+#else
+#define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
+#define flush_tlb_page(vma, addr)	local_flush_tlb_page(vma, addr)
+#endif /* CONFIG_SMP */
+
+#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault
+static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
+						unsigned long address,
+						pte_t *ptep)
+{
+	/*
+	 * Book3S 64 does not require spurious fault flushes because the PTE
+	 * must be re-fetched in case of an access permission problem. So the
+	 * only reason for a spurious fault should be concurrent modification
+	 * to the PTE, in which case the PTE will eventually be re-fetched by
+	 * the MMU when it attempts the access again.
+	 *
+	 * See: Power ISA Version 3.1B, 6.10.1.2 Modifying a Translation Table
+	 * Entry, Setting a Reference or Change Bit or Upgrading Access
+	 * Authority (PTE Subject to Atomic Hardware Updates):
+	 *
+	 * "If the only change being made to a valid PTE that is subject to
+	 *  atomic hardware updates is to set the Reference or Change bit to
+	 *  1 or to upgrade access authority, a simpler sequence suffices
+	 *  because the translation hardware will refetch the PTE if an
+	 *  access is attempted for which the only problems were reference
+	 *  and/or change bits needing to be set or insufficient access
+	 *  authority."
+	 *
+	 * The nest MMU in POWER9 does not perform this PTE re-fetch, but
+	 * it avoids the spurious fault problem by flushing the TLB before
+	 * upgrading PTE permissions, see radix__ptep_set_access_flags.
+	 */
+}
+
+static inline bool __pte_protnone(unsigned long pte)
+{
+	return (pte & (pgprot_val(PAGE_NONE) | _PAGE_RWX)) == pgprot_val(PAGE_NONE);
+}
+
+static inline bool __pte_flags_need_flush(unsigned long oldval,
+					  unsigned long newval)
+{
+	unsigned long delta = oldval ^ newval;
+
+	/*
+	 * The return value of this function doesn't matter for hash,
+	 * ptep_modify_prot_start() does a pte_update() which does or schedules
+	 * any necessary hash table update and flush.
+	 */
+	if (!radix_enabled())
+		return true;
+
+	/*
+	 * We do not expect kernel mappings or non-PTEs or not-present PTEs.
+	 */
+	VM_WARN_ON_ONCE(!__pte_protnone(oldval) && oldval & _PAGE_PRIVILEGED);
+	VM_WARN_ON_ONCE(!__pte_protnone(newval) && newval & _PAGE_PRIVILEGED);
+	VM_WARN_ON_ONCE(!(oldval & _PAGE_PTE));
+	VM_WARN_ON_ONCE(!(newval & _PAGE_PTE));
+	VM_WARN_ON_ONCE(!(oldval & _PAGE_PRESENT));
+	VM_WARN_ON_ONCE(!(newval & _PAGE_PRESENT));
+
+	/*
+	*  Must flush on any change except READ, WRITE, EXEC, DIRTY, ACCESSED.
+	*
+	 * In theory, some changed software bits could be tolerated, in
+	 * practice those should rarely if ever matter.
+	 */
+
+	if (delta & ~(_PAGE_RWX | _PAGE_DIRTY | _PAGE_ACCESSED))
+		return true;
+
+	/*
+	 * If any of the above was present in old but cleared in new, flush.
+	 * With the exception of _PAGE_ACCESSED, don't worry about flushing
+	 * if that was cleared (see the comment in ptep_clear_flush_young()).
+	 */
+	if ((delta & ~_PAGE_ACCESSED) & oldval)
+		return true;
+
+	return false;
+}
+
+static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte)
+{
+	return __pte_flags_need_flush(pte_val(oldpte), pte_val(newpte));
+}
+#define pte_needs_flush pte_needs_flush
+
+static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
+{
+	return __pte_flags_need_flush(pmd_val(oldpmd), pmd_val(newpmd));
+}
+#define huge_pmd_needs_flush huge_pmd_needs_flush
+
+extern bool tlbie_capable;
+extern bool tlbie_enabled;
+
+static inline bool cputlb_use_tlbie(void)
+{
+	return tlbie_enabled;
+}
+
+#endif /*  _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/book3s/pgalloc.h b/arch/powerpc/include/asm/book3s/pgalloc.h
new file mode 100644
index 0000000000..6b178ca143
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/pgalloc.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_PGALLOC_H
+#define _ASM_POWERPC_BOOK3S_PGALLOC_H
+
+#include <linux/mm.h>
+
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+
+#ifdef CONFIG_PPC64
+#include <asm/book3s/64/pgalloc.h>
+#else
+#include <asm/book3s/32/pgalloc.h>
+#endif
+
+#endif /* _ASM_POWERPC_BOOK3S_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
new file mode 100644
index 0000000000..3b7bd36a23
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_PGTABLE_H
+#define _ASM_POWERPC_BOOK3S_PGTABLE_H
+
+#ifdef CONFIG_PPC64
+#include <asm/book3s/64/pgtable.h>
+#else
+#include <asm/book3s/32/pgtable.h>
+#endif
+
+#ifndef __ASSEMBLY__
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+				 pte_t *ptep, pte_t entry, int dirty);
+
+struct file;
+extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+				     unsigned long size, pgprot_t vma_prot);
+#define __HAVE_PHYS_MEM_ACCESS_PROT
+
+void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
+
+/*
+ * This gets called at the end of handling a page fault, when
+ * the kernel has put a new PTE into the page table for the process.
+ * We use it to ensure coherency between the i-cache and d-cache
+ * for the page which has just been mapped in.
+ * On machines which use an MMU hash table, we use this to put a
+ * corresponding HPTE into the hash table ahead of time, instead of
+ * waiting for the inevitable extra hash-table miss exception.
+ */
+static inline void update_mmu_cache_range(struct vm_fault *vmf,
+		struct vm_area_struct *vma, unsigned long address,
+		pte_t *ptep, unsigned int nr)
+{
+	if (IS_ENABLED(CONFIG_PPC32) && !mmu_has_feature(MMU_FTR_HPTE_TABLE))
+		return;
+	if (radix_enabled())
+		return;
+	__update_mmu_cache(vma, address, ptep);
+}
+
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/powerpc/include/asm/book3s/tlbflush.h b/arch/powerpc/include/asm/book3s/tlbflush.h
new file mode 100644
index 0000000000..dec11de410
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/tlbflush.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_TLBFLUSH_H
+#define _ASM_POWERPC_BOOK3S_TLBFLUSH_H
+
+#ifdef CONFIG_PPC64
+#include <asm/book3s/64/tlbflush.h>
+#else
+#include <asm/book3s/32/tlbflush.h>
+#endif
+
+#endif /* _ASM_POWERPC_BOOK3S_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/bootx.h b/arch/powerpc/include/asm/bootx.h
new file mode 100644
index 0000000000..1c121f3c52
--- /dev/null
+++ b/arch/powerpc/include/asm/bootx.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This file describes the structure passed from the BootX application
+ * (for MacOS) when it is used to boot Linux.
+ *
+ * Written by Benjamin Herrenschmidt.
+ */
+
+#ifndef __ASM_BOOTX_H__
+#define __ASM_BOOTX_H__
+
+#include <uapi/asm/bootx.h>
+
+/* (*) The format of the colormap is 256 * 3 * 2 bytes. Each color index
+ * is represented by 3 short words containing a 16 bits (unsigned) color
+ * component. Later versions may contain the gamma table for direct-color
+ * devices here.
+ */
+#define BOOTX_COLORTABLE_SIZE    (256UL*3UL*2UL)
+
+/* BootX passes the device-tree using a format that comes from earlier
+ * ppc32 kernels. This used to match what is in prom.h, but not anymore
+ * so we now define it here
+ */
+struct bootx_dt_prop {
+	u32	name;
+	int	length;
+	u32	value;
+	u32	next;
+};
+
+struct bootx_dt_node {
+	u32	unused0;
+	u32	unused1;
+	u32	phandle;	/* not really available */
+	u32	unused2;
+	u32	unused3;
+	u32	unused4;
+	u32	unused5;
+	u32	full_name;
+	u32	properties;
+	u32	parent;
+	u32	child;
+	u32	sibling;
+	u32	next;
+	u32	allnext;
+};
+
+extern void bootx_init(unsigned long r4, unsigned long phys);
+
+#endif
diff --git a/arch/powerpc/include/asm/bpf_perf_event.h b/arch/powerpc/include/asm/bpf_perf_event.h
new file mode 100644
index 0000000000..e8a7b4ffb5
--- /dev/null
+++ b/arch/powerpc/include/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BPF_PERF_EVENT_H
+#define _ASM_POWERPC_BPF_PERF_EVENT_H
+
+#include <asm/ptrace.h>
+
+typedef struct user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _ASM_POWERPC_BPF_PERF_EVENT_H */
diff --git a/arch/powerpc/include/asm/btext.h b/arch/powerpc/include/asm/btext.h
new file mode 100644
index 0000000000..860f8868f1
--- /dev/null
+++ b/arch/powerpc/include/asm/btext.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Definitions for using the procedures in btext.c.
+ *
+ * Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ */
+#ifndef __PPC_BTEXT_H
+#define __PPC_BTEXT_H
+#ifdef __KERNEL__
+
+extern int btext_find_display(int allow_nonstdout);
+extern void btext_update_display(unsigned long phys, int width, int height,
+				 int depth, int pitch);
+extern void btext_setup_display(int width, int height, int depth, int pitch,
+				unsigned long address);
+#ifdef CONFIG_PPC32
+extern void btext_prepare_BAT(void);
+#else
+static inline void btext_prepare_BAT(void) { }
+#endif
+extern void btext_map(void);
+extern void btext_unmap(void);
+
+extern void btext_drawchar(char c);
+extern void btext_drawstring(const char *str);
+void __init btext_drawhex(unsigned long v);
+void __init btext_drawtext(const char *c, unsigned int len);
+
+void __init btext_clearscreen(void);
+void __init btext_flushscreen(void);
+void __init btext_flushline(void);
+
+#endif /* __KERNEL__ */
+#endif /* __PPC_BTEXT_H */
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
new file mode 100644
index 0000000000..1db485aacb
--- /dev/null
+++ b/arch/powerpc/include/asm/bug.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BUG_H
+#define _ASM_POWERPC_BUG_H
+#ifdef __KERNEL__
+
+#include <asm/asm-compat.h>
+
+#ifdef CONFIG_BUG
+
+#ifdef __ASSEMBLY__
+#include <asm/asm-offsets.h>
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+.macro EMIT_BUG_ENTRY addr,file,line,flags
+	 .section __bug_table,"aw"
+5001:	 .4byte \addr - .
+	 .4byte 5002f - .
+	 .short \line, \flags
+	 .org 5001b+BUG_ENTRY_SIZE
+	 .previous
+	 .section .rodata,"a"
+5002:	 .asciz "\file"
+	 .previous
+.endm
+#else
+.macro EMIT_BUG_ENTRY addr,file,line,flags
+	 .section __bug_table,"aw"
+5001:	 .4byte \addr - .
+	 .short \flags
+	 .org 5001b+BUG_ENTRY_SIZE
+	 .previous
+.endm
+#endif /* verbose */
+
+#else /* !__ASSEMBLY__ */
+/* _EMIT_BUG_ENTRY expects args %0,%1,%2,%3 to be FILE, LINE, flags and
+   sizeof(struct bug_entry), respectively */
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+#define _EMIT_BUG_ENTRY				\
+	".section __bug_table,\"aw\"\n"		\
+	"2:	.4byte 1b - .\n"		\
+	"	.4byte %0 - .\n"		\
+	"	.short %1, %2\n"		\
+	".org 2b+%3\n"				\
+	".previous\n"
+#else
+#define _EMIT_BUG_ENTRY				\
+	".section __bug_table,\"aw\"\n"		\
+	"2:	.4byte 1b - .\n"		\
+	"	.short %2\n"			\
+	".org 2b+%3\n"				\
+	".previous\n"
+#endif
+
+#define BUG_ENTRY(insn, flags, ...)			\
+	__asm__ __volatile__(				\
+		"1:	" insn "\n"			\
+		_EMIT_BUG_ENTRY				\
+		: : "i" (__FILE__), "i" (__LINE__),	\
+		  "i" (flags),				\
+		  "i" (sizeof(struct bug_entry)),	\
+		  ##__VA_ARGS__)
+
+/*
+ * BUG_ON() and WARN_ON() do their best to cooperate with compile-time
+ * optimisations. However depending on the complexity of the condition
+ * some compiler versions may not produce optimal results.
+ */
+
+#define BUG() do {						\
+	BUG_ENTRY("twi 31, 0, 0", 0);				\
+	unreachable();						\
+} while (0)
+#define HAVE_ARCH_BUG
+
+#define __WARN_FLAGS(flags) BUG_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags))
+
+#ifdef CONFIG_PPC64
+#define BUG_ON(x) do {						\
+	if (__builtin_constant_p(x)) {				\
+		if (x)						\
+			BUG();					\
+	} else {						\
+		BUG_ENTRY(PPC_TLNEI " %4, 0", 0, "r" ((__force long)(x)));	\
+	}							\
+} while (0)
+
+#define WARN_ON(x) ({						\
+	int __ret_warn_on = !!(x);				\
+	if (__builtin_constant_p(__ret_warn_on)) {		\
+		if (__ret_warn_on)				\
+			__WARN();				\
+	} else {						\
+		BUG_ENTRY(PPC_TLNEI " %4, 0",			\
+			  BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN),	\
+			  "r" (__ret_warn_on));	\
+	}							\
+	unlikely(__ret_warn_on);				\
+})
+
+#define HAVE_ARCH_BUG_ON
+#define HAVE_ARCH_WARN_ON
+#endif
+
+#endif /* __ASSEMBLY __ */
+#else
+#ifdef __ASSEMBLY__
+.macro EMIT_BUG_ENTRY addr,file,line,flags
+.endm
+#else /* !__ASSEMBLY__ */
+#define _EMIT_BUG_ENTRY
+#endif
+#endif /* CONFIG_BUG */
+
+#define EMIT_WARN_ENTRY EMIT_BUG_ENTRY
+
+#include <asm-generic/bug.h>
+
+#ifndef __ASSEMBLY__
+
+struct pt_regs;
+void hash__do_page_fault(struct pt_regs *);
+void bad_page_fault(struct pt_regs *, int);
+void emulate_single_step(struct pt_regs *regs);
+extern void _exception(int, struct pt_regs *, int, unsigned long);
+extern void _exception_pkey(struct pt_regs *, unsigned long, int);
+extern void die(const char *, struct pt_regs *, long);
+void die_mce(const char *str, struct pt_regs *regs, long err);
+extern bool die_will_crash(void);
+extern void panic_flush_kmsg_start(void);
+extern void panic_flush_kmsg_end(void);
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_BUG_H */
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
new file mode 100644
index 0000000000..69232231d2
--- /dev/null
+++ b/arch/powerpc/include/asm/cache.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_CACHE_H
+#define _ASM_POWERPC_CACHE_H
+
+#ifdef __KERNEL__
+
+
+/* bytes per L1 cache line */
+#if defined(CONFIG_PPC_8xx)
+#define L1_CACHE_SHIFT		4
+#define MAX_COPY_PREFETCH	1
+#define IFETCH_ALIGN_SHIFT	2
+#elif defined(CONFIG_PPC_E500MC)
+#define L1_CACHE_SHIFT		6
+#define MAX_COPY_PREFETCH	4
+#define IFETCH_ALIGN_SHIFT	3
+#elif defined(CONFIG_PPC32)
+#define MAX_COPY_PREFETCH	4
+#define IFETCH_ALIGN_SHIFT	3	/* 603 fetches 2 insn at a time */
+#if defined(CONFIG_PPC_47x)
+#define L1_CACHE_SHIFT		7
+#else
+#define L1_CACHE_SHIFT		5
+#endif
+#else /* CONFIG_PPC64 */
+#define L1_CACHE_SHIFT		7
+#define IFETCH_ALIGN_SHIFT	4 /* POWER8,9 */
+#endif
+
+#define	L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
+
+#define	SMP_CACHE_BYTES		L1_CACHE_BYTES
+
+#define IFETCH_ALIGN_BYTES	(1 << IFETCH_ALIGN_SHIFT)
+
+#ifdef CONFIG_NOT_COHERENT_CACHE
+#define ARCH_DMA_MINALIGN	L1_CACHE_BYTES
+#endif
+
+#if !defined(__ASSEMBLY__)
+#ifdef CONFIG_PPC64
+
+struct ppc_cache_info {
+	u32 size;
+	u32 line_size;
+	u32 block_size;	/* L1 only */
+	u32 log_block_size;
+	u32 blocks_per_page;
+	u32 sets;
+	u32 assoc;
+};
+
+struct ppc64_caches {
+	struct ppc_cache_info l1d;
+	struct ppc_cache_info l1i;
+	struct ppc_cache_info l2;
+	struct ppc_cache_info l3;
+};
+
+extern struct ppc64_caches ppc64_caches;
+
+static inline u32 l1_dcache_shift(void)
+{
+	return ppc64_caches.l1d.log_block_size;
+}
+
+static inline u32 l1_dcache_bytes(void)
+{
+	return ppc64_caches.l1d.block_size;
+}
+
+static inline u32 l1_icache_shift(void)
+{
+	return ppc64_caches.l1i.log_block_size;
+}
+
+static inline u32 l1_icache_bytes(void)
+{
+	return ppc64_caches.l1i.block_size;
+}
+#else
+static inline u32 l1_dcache_shift(void)
+{
+	return L1_CACHE_SHIFT;
+}
+
+static inline u32 l1_dcache_bytes(void)
+{
+	return L1_CACHE_BYTES;
+}
+
+static inline u32 l1_icache_shift(void)
+{
+	return L1_CACHE_SHIFT;
+}
+
+static inline u32 l1_icache_bytes(void)
+{
+	return L1_CACHE_BYTES;
+}
+
+#endif
+
+#define __read_mostly __section(".data..read_mostly")
+
+#ifdef CONFIG_PPC_BOOK3S_32
+extern long _get_L2CR(void);
+extern long _get_L3CR(void);
+extern void _set_L2CR(unsigned long);
+extern void _set_L3CR(unsigned long);
+#else
+#define _get_L2CR()	0L
+#define _get_L3CR()	0L
+#define _set_L2CR(val)	do { } while(0)
+#define _set_L3CR(val)	do { } while(0)
+#endif
+
+static inline void dcbz(void *addr)
+{
+	__asm__ __volatile__ ("dcbz 0, %0" : : "r"(addr) : "memory");
+}
+
+static inline void dcbi(void *addr)
+{
+	__asm__ __volatile__ ("dcbi 0, %0" : : "r"(addr) : "memory");
+}
+
+static inline void dcbf(void *addr)
+{
+	__asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory");
+}
+
+static inline void dcbst(void *addr)
+{
+	__asm__ __volatile__ ("dcbst 0, %0" : : "r"(addr) : "memory");
+}
+
+static inline void icbi(void *addr)
+{
+	asm volatile ("icbi 0, %0" : : "r"(addr) : "memory");
+}
+
+static inline void iccci(void *addr)
+{
+	asm volatile ("iccci 0, %0" : : "r"(addr) : "memory");
+}
+
+#endif /* !__ASSEMBLY__ */
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_CACHE_H */
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
new file mode 100644
index 0000000000..ef7d2de33b
--- /dev/null
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ */
+#ifndef _ASM_POWERPC_CACHEFLUSH_H
+#define _ASM_POWERPC_CACHEFLUSH_H
+
+#include <linux/mm.h>
+#include <asm/cputable.h>
+#include <asm/cpu_has_feature.h>
+
+/*
+ * This flag is used to indicate that the page pointed to by a pte is clean
+ * and does not require cleaning before returning it to the user.
+ */
+#define PG_dcache_clean PG_arch_1
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * Book3s has no ptesync after setting a pte, so without this ptesync it's
+ * possible for a kernel virtual mapping access to return a spurious fault
+ * if it's accessed right after the pte is set. The page fault handler does
+ * not expect this type of fault. flush_cache_vmap is not exactly the right
+ * place to put this, but it seems to work well enough.
+ */
+static inline void flush_cache_vmap(unsigned long start, unsigned long end)
+{
+	asm volatile("ptesync" ::: "memory");
+}
+#define flush_cache_vmap flush_cache_vmap
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
+/*
+ * This is called when a page has been modified by the kernel.
+ * It just marks the page as not i-cache clean.  We do the i-cache
+ * flush later when the page is given to a user process, if necessary.
+ */
+static inline void flush_dcache_folio(struct folio *folio)
+{
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_dcache_clean, &folio->flags))
+		clear_bit(PG_dcache_clean, &folio->flags);
+}
+#define flush_dcache_folio flush_dcache_folio
+
+static inline void flush_dcache_page(struct page *page)
+{
+	flush_dcache_folio(page_folio(page));
+}
+
+void flush_icache_range(unsigned long start, unsigned long stop);
+#define flush_icache_range flush_icache_range
+
+void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
+		unsigned long addr, int len);
+#define flush_icache_user_page flush_icache_user_page
+
+void flush_dcache_icache_folio(struct folio *folio);
+
+/**
+ * flush_dcache_range(): Write any modified data cache blocks out to memory and
+ * invalidate them. Does not invalidate the corresponding instruction cache
+ * blocks.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+static inline void flush_dcache_range(unsigned long start, unsigned long stop)
+{
+	unsigned long shift = l1_dcache_shift();
+	unsigned long bytes = l1_dcache_bytes();
+	void *addr = (void *)(start & ~(bytes - 1));
+	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+	unsigned long i;
+
+	if (IS_ENABLED(CONFIG_PPC64))
+		mb();	/* sync */
+
+	for (i = 0; i < size >> shift; i++, addr += bytes)
+		dcbf(addr);
+	mb();	/* sync */
+
+}
+
+/*
+ * Write any modified data cache blocks out to memory.
+ * Does not invalidate the corresponding cache lines (especially for
+ * any corresponding instruction cache).
+ */
+static inline void clean_dcache_range(unsigned long start, unsigned long stop)
+{
+	unsigned long shift = l1_dcache_shift();
+	unsigned long bytes = l1_dcache_bytes();
+	void *addr = (void *)(start & ~(bytes - 1));
+	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+	unsigned long i;
+
+	for (i = 0; i < size >> shift; i++, addr += bytes)
+		dcbst(addr);
+	mb();	/* sync */
+}
+
+/*
+ * Like above, but invalidate the D-cache.  This is used by the 8xx
+ * to invalidate the cache so the PPC core doesn't get stale data
+ * from the CPM (no cache snooping here :-).
+ */
+static inline void invalidate_dcache_range(unsigned long start,
+					   unsigned long stop)
+{
+	unsigned long shift = l1_dcache_shift();
+	unsigned long bytes = l1_dcache_bytes();
+	void *addr = (void *)(start & ~(bytes - 1));
+	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+	unsigned long i;
+
+	for (i = 0; i < size >> shift; i++, addr += bytes)
+		dcbi(addr);
+	mb();	/* sync */
+}
+
+#ifdef CONFIG_4xx
+static inline void flush_instruction_cache(void)
+{
+	iccci((void *)KERNELBASE);
+	isync();
+}
+#else
+void flush_instruction_cache(void);
+#endif
+
+#include <asm-generic/cacheflush.h>
+
+#endif /* _ASM_POWERPC_CACHEFLUSH_H */
diff --git a/arch/powerpc/include/asm/cell-pmu.h b/arch/powerpc/include/asm/cell-pmu.h
new file mode 100644
index 0000000000..6a79b5d1c4
--- /dev/null
+++ b/arch/powerpc/include/asm/cell-pmu.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Cell Broadband Engine Performance Monitor
+ *
+ * (C) Copyright IBM Corporation 2006
+ *
+ * Author:
+ *   David Erb (djerb@us.ibm.com)
+ *   Kevin Corry (kevcorry@us.ibm.com)
+ */
+
+#ifndef __ASM_CELL_PMU_H__
+#define __ASM_CELL_PMU_H__
+
+/* The Cell PMU has four hardware performance counters, which can be
+ * configured as four 32-bit counters or eight 16-bit counters.
+ */
+#define NR_PHYS_CTRS 4
+#define NR_CTRS      (NR_PHYS_CTRS * 2)
+
+/* Macros for the pm_control register. */
+#define CBE_PM_16BIT_CTR(ctr)              (1 << (24 - ((ctr) & (NR_PHYS_CTRS - 1))))
+#define CBE_PM_ENABLE_PERF_MON             0x80000000
+#define CBE_PM_STOP_AT_MAX                 0x40000000
+#define CBE_PM_TRACE_MODE_GET(pm_control)  (((pm_control) >> 28) & 0x3)
+#define CBE_PM_TRACE_MODE_SET(mode)        (((mode)  & 0x3) << 28)
+#define CBE_PM_TRACE_BUF_OVFLW(bit)        (((bit) & 0x1) << 17)
+#define CBE_PM_COUNT_MODE_SET(count)       (((count) & 0x3) << 18)
+#define CBE_PM_FREEZE_ALL_CTRS             0x00100000
+#define CBE_PM_ENABLE_EXT_TRACE            0x00008000
+#define CBE_PM_SPU_ADDR_TRACE_SET(msk)     (((msk) & 0x3) << 9)
+
+/* Macros for the trace_address register. */
+#define CBE_PM_TRACE_BUF_FULL              0x00000800
+#define CBE_PM_TRACE_BUF_EMPTY             0x00000400
+#define CBE_PM_TRACE_BUF_DATA_COUNT(ta)    ((ta) & 0x3ff)
+#define CBE_PM_TRACE_BUF_MAX_COUNT         0x400
+
+/* Macros for the pm07_control registers. */
+#define CBE_PM_CTR_INPUT_MUX(pm07_control) (((pm07_control) >> 26) & 0x3f)
+#define CBE_PM_CTR_INPUT_CONTROL           0x02000000
+#define CBE_PM_CTR_POLARITY                0x01000000
+#define CBE_PM_CTR_COUNT_CYCLES            0x00800000
+#define CBE_PM_CTR_ENABLE                  0x00400000
+#define PM07_CTR_INPUT_MUX(x)              (((x) & 0x3F) << 26)
+#define PM07_CTR_INPUT_CONTROL(x)          (((x) & 1) << 25)
+#define PM07_CTR_POLARITY(x)               (((x) & 1) << 24)
+#define PM07_CTR_COUNT_CYCLES(x)           (((x) & 1) << 23)
+#define PM07_CTR_ENABLE(x)                 (((x) & 1) << 22)
+
+/* Macros for the pm_status register. */
+#define CBE_PM_CTR_OVERFLOW_INTR(ctr)      (1 << (31 - ((ctr) & 7)))
+
+enum pm_reg_name {
+	group_control,
+	debug_bus_control,
+	trace_address,
+	ext_tr_timer,
+	pm_status,
+	pm_control,
+	pm_interval,
+	pm_start_stop,
+};
+
+/* Routines for reading/writing the PMU registers. */
+extern u32  cbe_read_phys_ctr(u32 cpu, u32 phys_ctr);
+extern void cbe_write_phys_ctr(u32 cpu, u32 phys_ctr, u32 val);
+extern u32  cbe_read_ctr(u32 cpu, u32 ctr);
+extern void cbe_write_ctr(u32 cpu, u32 ctr, u32 val);
+
+extern u32  cbe_read_pm07_control(u32 cpu, u32 ctr);
+extern void cbe_write_pm07_control(u32 cpu, u32 ctr, u32 val);
+extern u32  cbe_read_pm(u32 cpu, enum pm_reg_name reg);
+extern void cbe_write_pm(u32 cpu, enum pm_reg_name reg, u32 val);
+
+extern u32  cbe_get_ctr_size(u32 cpu, u32 phys_ctr);
+extern void cbe_set_ctr_size(u32 cpu, u32 phys_ctr, u32 ctr_size);
+
+extern void cbe_enable_pm(u32 cpu);
+extern void cbe_disable_pm(u32 cpu);
+
+extern void cbe_read_trace_buffer(u32 cpu, u64 *buf);
+
+extern void cbe_enable_pm_interrupts(u32 cpu, u32 thread, u32 mask);
+extern void cbe_disable_pm_interrupts(u32 cpu);
+extern u32  cbe_get_and_clear_pm_interrupts(u32 cpu);
+extern void cbe_sync_irq(int node);
+
+#define CBE_COUNT_SUPERVISOR_MODE       0
+#define CBE_COUNT_HYPERVISOR_MODE       1
+#define CBE_COUNT_PROBLEM_MODE          2
+#define CBE_COUNT_ALL_MODES             3
+
+#endif /* __ASM_CELL_PMU_H__ */
diff --git a/arch/powerpc/include/asm/cell-regs.h b/arch/powerpc/include/asm/cell-regs.h
new file mode 100644
index 0000000000..e1c431ef30
--- /dev/null
+++ b/arch/powerpc/include/asm/cell-regs.h
@@ -0,0 +1,327 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * cbe_regs.h
+ *
+ * This file is intended to hold the various register definitions for CBE
+ * on-chip system devices (memory controller, IO controller, etc...)
+ *
+ * (C) Copyright IBM Corporation 2001,2006
+ *
+ * Authors: Maximino Aguilar (maguilar@us.ibm.com)
+ *          David J. Erb (djerb@us.ibm.com)
+ *
+ * (c) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
+ */
+
+#ifndef CBE_REGS_H
+#define CBE_REGS_H
+
+#include <asm/cell-pmu.h>
+
+/*
+ *
+ * Some HID register definitions
+ *
+ */
+
+/* CBE specific HID0 bits */
+#define HID0_CBE_THERM_WAKEUP	0x0000020000000000ul
+#define HID0_CBE_SYSERR_WAKEUP	0x0000008000000000ul
+#define HID0_CBE_THERM_INT_EN	0x0000000400000000ul
+#define HID0_CBE_SYSERR_INT_EN	0x0000000200000000ul
+
+#define MAX_CBE		2
+
+/*
+ *
+ * Pervasive unit register definitions
+ *
+ */
+
+union spe_reg {
+	u64 val;
+	u8 spe[8];
+};
+
+union ppe_spe_reg {
+	u64 val;
+	struct {
+		u32 ppe;
+		u32 spe;
+	};
+};
+
+
+struct cbe_pmd_regs {
+	/* Debug Bus Control */
+	u64	pad_0x0000;					/* 0x0000 */
+
+	u64	group_control;					/* 0x0008 */
+
+	u8	pad_0x0010_0x00a8 [0x00a8 - 0x0010];		/* 0x0010 */
+
+	u64	debug_bus_control;				/* 0x00a8 */
+
+	u8	pad_0x00b0_0x0100 [0x0100 - 0x00b0];		/* 0x00b0 */
+
+	u64	trace_aux_data;					/* 0x0100 */
+	u64	trace_buffer_0_63;				/* 0x0108 */
+	u64	trace_buffer_64_127;				/* 0x0110 */
+	u64	trace_address;					/* 0x0118 */
+	u64	ext_tr_timer;					/* 0x0120 */
+
+	u8	pad_0x0128_0x0400 [0x0400 - 0x0128];		/* 0x0128 */
+
+	/* Performance Monitor */
+	u64	pm_status;					/* 0x0400 */
+	u64	pm_control;					/* 0x0408 */
+	u64	pm_interval;					/* 0x0410 */
+	u64	pm_ctr[4];					/* 0x0418 */
+	u64	pm_start_stop;					/* 0x0438 */
+	u64	pm07_control[8];				/* 0x0440 */
+
+	u8	pad_0x0480_0x0800 [0x0800 - 0x0480];		/* 0x0480 */
+
+	/* Thermal Sensor Registers */
+	union	spe_reg	ts_ctsr1;				/* 0x0800 */
+	u64	ts_ctsr2;					/* 0x0808 */
+	union	spe_reg	ts_mtsr1;				/* 0x0810 */
+	u64	ts_mtsr2;					/* 0x0818 */
+	union	spe_reg	ts_itr1;				/* 0x0820 */
+	u64	ts_itr2;					/* 0x0828 */
+	u64	ts_gitr;					/* 0x0830 */
+	u64	ts_isr;						/* 0x0838 */
+	u64	ts_imr;						/* 0x0840 */
+	union	spe_reg	tm_cr1;					/* 0x0848 */
+	u64	tm_cr2;						/* 0x0850 */
+	u64	tm_simr;					/* 0x0858 */
+	union	ppe_spe_reg tm_tpr;				/* 0x0860 */
+	union	spe_reg	tm_str1;				/* 0x0868 */
+	u64	tm_str2;					/* 0x0870 */
+	union	ppe_spe_reg tm_tsr;				/* 0x0878 */
+
+	/* Power Management */
+	u64	pmcr;						/* 0x0880 */
+#define CBE_PMD_PAUSE_ZERO_CONTROL	0x10000
+	u64	pmsr;						/* 0x0888 */
+
+	/* Time Base Register */
+	u64	tbr;						/* 0x0890 */
+
+	u8	pad_0x0898_0x0c00 [0x0c00 - 0x0898];		/* 0x0898 */
+
+	/* Fault Isolation Registers */
+	u64	checkstop_fir;					/* 0x0c00 */
+	u64	recoverable_fir;				/* 0x0c08 */
+	u64	spec_att_mchk_fir;				/* 0x0c10 */
+	u32	fir_mode_reg;					/* 0x0c18 */
+	u8	pad_0x0c1c_0x0c20 [4];				/* 0x0c1c */
+#define CBE_PMD_FIR_MODE_M8		0x00800
+	u64	fir_enable_mask;				/* 0x0c20 */
+
+	u8	pad_0x0c28_0x0ca8 [0x0ca8 - 0x0c28];		/* 0x0c28 */
+	u64	ras_esc_0;					/* 0x0ca8 */
+	u8	pad_0x0cb0_0x1000 [0x1000 - 0x0cb0];		/* 0x0cb0 */
+};
+
+extern struct cbe_pmd_regs __iomem *cbe_get_pmd_regs(struct device_node *np);
+extern struct cbe_pmd_regs __iomem *cbe_get_cpu_pmd_regs(int cpu);
+
+/*
+ * PMU shadow registers
+ *
+ * Many of the registers in the performance monitoring unit are write-only,
+ * so we need to save a copy of what we write to those registers.
+ *
+ * The actual data counters are read/write. However, writing to the counters
+ * only takes effect if the PMU is enabled. Otherwise the value is stored in
+ * a hardware latch until the next time the PMU is enabled. So we save a copy
+ * of the counter values if we need to read them back while the PMU is
+ * disabled. The counter_value_in_latch field is a bitmap indicating which
+ * counters currently have a value waiting to be written.
+ */
+
+struct cbe_pmd_shadow_regs {
+	u32 group_control;
+	u32 debug_bus_control;
+	u32 trace_address;
+	u32 ext_tr_timer;
+	u32 pm_status;
+	u32 pm_control;
+	u32 pm_interval;
+	u32 pm_start_stop;
+	u32 pm07_control[NR_CTRS];
+
+	u32 pm_ctr[NR_PHYS_CTRS];
+	u32 counter_value_in_latch;
+};
+
+extern struct cbe_pmd_shadow_regs *cbe_get_pmd_shadow_regs(struct device_node *np);
+extern struct cbe_pmd_shadow_regs *cbe_get_cpu_pmd_shadow_regs(int cpu);
+
+/*
+ *
+ * IIC unit register definitions
+ *
+ */
+
+struct cbe_iic_pending_bits {
+	u32 data;
+	u8 flags;
+	u8 class;
+	u8 source;
+	u8 prio;
+};
+
+#define CBE_IIC_IRQ_VALID	0x80
+#define CBE_IIC_IRQ_IPI		0x40
+
+struct cbe_iic_thread_regs {
+	struct cbe_iic_pending_bits pending;
+	struct cbe_iic_pending_bits pending_destr;
+	u64 generate;
+	u64 prio;
+};
+
+struct cbe_iic_regs {
+	u8	pad_0x0000_0x0400[0x0400 - 0x0000];		/* 0x0000 */
+
+	/* IIC interrupt registers */
+	struct	cbe_iic_thread_regs thread[2];			/* 0x0400 */
+
+	u64	iic_ir;						/* 0x0440 */
+#define CBE_IIC_IR_PRIO(x)      (((x) & 0xf) << 12)
+#define CBE_IIC_IR_DEST_NODE(x) (((x) & 0xf) << 4)
+#define CBE_IIC_IR_DEST_UNIT(x) ((x) & 0xf)
+#define CBE_IIC_IR_IOC_0        0x0
+#define CBE_IIC_IR_IOC_1S       0xb
+#define CBE_IIC_IR_PT_0         0xe
+#define CBE_IIC_IR_PT_1         0xf
+
+	u64	iic_is;						/* 0x0448 */
+#define CBE_IIC_IS_PMI		0x2
+
+	u8	pad_0x0450_0x0500[0x0500 - 0x0450];		/* 0x0450 */
+
+	/* IOC FIR */
+	u64	ioc_fir_reset;					/* 0x0500 */
+	u64	ioc_fir_set;					/* 0x0508 */
+	u64	ioc_checkstop_enable;				/* 0x0510 */
+	u64	ioc_fir_error_mask;				/* 0x0518 */
+	u64	ioc_syserr_enable;				/* 0x0520 */
+	u64	ioc_fir;					/* 0x0528 */
+
+	u8	pad_0x0530_0x1000[0x1000 - 0x0530];		/* 0x0530 */
+};
+
+extern struct cbe_iic_regs __iomem *cbe_get_iic_regs(struct device_node *np);
+extern struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu);
+
+
+struct cbe_mic_tm_regs {
+	u8	pad_0x0000_0x0040[0x0040 - 0x0000];		/* 0x0000 */
+
+	u64	mic_ctl_cnfg2;					/* 0x0040 */
+#define CBE_MIC_ENABLE_AUX_TRC		0x8000000000000000LL
+#define CBE_MIC_DISABLE_PWR_SAV_2	0x0200000000000000LL
+#define CBE_MIC_DISABLE_AUX_TRC_WRAP	0x0100000000000000LL
+#define CBE_MIC_ENABLE_AUX_TRC_INT	0x0080000000000000LL
+
+	u64	pad_0x0048;					/* 0x0048 */
+
+	u64	mic_aux_trc_base;				/* 0x0050 */
+	u64	mic_aux_trc_max_addr;				/* 0x0058 */
+	u64	mic_aux_trc_cur_addr;				/* 0x0060 */
+	u64	mic_aux_trc_grf_addr;				/* 0x0068 */
+	u64	mic_aux_trc_grf_data;				/* 0x0070 */
+
+	u64	pad_0x0078;					/* 0x0078 */
+
+	u64	mic_ctl_cnfg_0;					/* 0x0080 */
+#define CBE_MIC_DISABLE_PWR_SAV_0	0x8000000000000000LL
+
+	u64	pad_0x0088;					/* 0x0088 */
+
+	u64	slow_fast_timer_0;				/* 0x0090 */
+	u64	slow_next_timer_0;				/* 0x0098 */
+
+	u8	pad_0x00a0_0x00f8[0x00f8 - 0x00a0];		/* 0x00a0 */
+	u64    	mic_df_ecc_address_0;				/* 0x00f8 */
+
+	u8	pad_0x0100_0x01b8[0x01b8 - 0x0100];		/* 0x0100 */
+	u64    	mic_df_ecc_address_1;				/* 0x01b8 */
+
+	u64	mic_ctl_cnfg_1;					/* 0x01c0 */
+#define CBE_MIC_DISABLE_PWR_SAV_1	0x8000000000000000LL
+
+	u64	pad_0x01c8;					/* 0x01c8 */
+
+	u64	slow_fast_timer_1;				/* 0x01d0 */
+	u64	slow_next_timer_1;				/* 0x01d8 */
+
+	u8	pad_0x01e0_0x0208[0x0208 - 0x01e0];		/* 0x01e0 */
+	u64	mic_exc;					/* 0x0208 */
+#define CBE_MIC_EXC_BLOCK_SCRUB		0x0800000000000000ULL
+#define CBE_MIC_EXC_FAST_SCRUB		0x0100000000000000ULL
+
+	u64	mic_mnt_cfg;					/* 0x0210 */
+#define CBE_MIC_MNT_CFG_CHAN_0_POP	0x0002000000000000ULL
+#define CBE_MIC_MNT_CFG_CHAN_1_POP	0x0004000000000000ULL
+
+	u64	mic_df_config;					/* 0x0218 */
+#define CBE_MIC_ECC_DISABLE_0		0x4000000000000000ULL
+#define CBE_MIC_ECC_REP_SINGLE_0	0x2000000000000000ULL
+#define CBE_MIC_ECC_DISABLE_1		0x0080000000000000ULL
+#define CBE_MIC_ECC_REP_SINGLE_1	0x0040000000000000ULL
+
+	u8	pad_0x0220_0x0230[0x0230 - 0x0220];		/* 0x0220 */
+	u64	mic_fir;					/* 0x0230 */
+#define CBE_MIC_FIR_ECC_SINGLE_0_ERR	0x0200000000000000ULL
+#define CBE_MIC_FIR_ECC_MULTI_0_ERR	0x0100000000000000ULL
+#define CBE_MIC_FIR_ECC_SINGLE_1_ERR	0x0080000000000000ULL
+#define CBE_MIC_FIR_ECC_MULTI_1_ERR	0x0040000000000000ULL
+#define CBE_MIC_FIR_ECC_ERR_MASK	0xffff000000000000ULL
+#define CBE_MIC_FIR_ECC_SINGLE_0_CTE	0x0000020000000000ULL
+#define CBE_MIC_FIR_ECC_MULTI_0_CTE	0x0000010000000000ULL
+#define CBE_MIC_FIR_ECC_SINGLE_1_CTE	0x0000008000000000ULL
+#define CBE_MIC_FIR_ECC_MULTI_1_CTE	0x0000004000000000ULL
+#define CBE_MIC_FIR_ECC_CTE_MASK	0x0000ffff00000000ULL
+#define CBE_MIC_FIR_ECC_SINGLE_0_RESET	0x0000000002000000ULL
+#define CBE_MIC_FIR_ECC_MULTI_0_RESET	0x0000000001000000ULL
+#define CBE_MIC_FIR_ECC_SINGLE_1_RESET	0x0000000000800000ULL
+#define CBE_MIC_FIR_ECC_MULTI_1_RESET	0x0000000000400000ULL
+#define CBE_MIC_FIR_ECC_RESET_MASK	0x00000000ffff0000ULL
+#define CBE_MIC_FIR_ECC_SINGLE_0_SET	0x0000000000000200ULL
+#define CBE_MIC_FIR_ECC_MULTI_0_SET	0x0000000000000100ULL
+#define CBE_MIC_FIR_ECC_SINGLE_1_SET	0x0000000000000080ULL
+#define CBE_MIC_FIR_ECC_MULTI_1_SET	0x0000000000000040ULL
+#define CBE_MIC_FIR_ECC_SET_MASK	0x000000000000ffffULL
+	u64	mic_fir_debug;					/* 0x0238 */
+
+	u8	pad_0x0240_0x1000[0x1000 - 0x0240];		/* 0x0240 */
+};
+
+extern struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np);
+extern struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu);
+
+
+/* Cell page table entries */
+#define CBE_IOPTE_PP_W		0x8000000000000000ul /* protection: write */
+#define CBE_IOPTE_PP_R		0x4000000000000000ul /* protection: read */
+#define CBE_IOPTE_M		0x2000000000000000ul /* coherency required */
+#define CBE_IOPTE_SO_R		0x1000000000000000ul /* ordering: writes */
+#define CBE_IOPTE_SO_RW		0x1800000000000000ul /* ordering: r & w */
+#define CBE_IOPTE_RPN_Mask	0x07fffffffffff000ul /* RPN */
+#define CBE_IOPTE_H		0x0000000000000800ul /* cache hint */
+#define CBE_IOPTE_IOID_Mask	0x00000000000007fful /* ioid */
+
+/* some utility functions to deal with SMT */
+extern u32 cbe_get_hw_thread_id(int cpu);
+extern u32 cbe_cpu_to_node(int cpu);
+extern u32 cbe_node_to_cpu(int node);
+
+/* Init this module early */
+extern void cbe_regs_init(void);
+
+
+#endif /* CBE_REGS_H */
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
new file mode 100644
index 0000000000..4b573a3b7e
--- /dev/null
+++ b/arch/powerpc/include/asm/checksum.h
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_CHECKSUM_H
+#define _ASM_POWERPC_CHECKSUM_H
+#ifdef __KERNEL__
+
+/*
+ */
+
+#include <linux/bitops.h>
+#include <linux/in6.h>
+/*
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in "sum" (32-bit), while copying the block to dst.
+ * If an access exception occurs on src or dst, it stores -EFAULT
+ * to *src_err or *dst_err respectively (if that pointer is not
+ * NULL), and, for an error on src, zeroes the rest of dst.
+ *
+ * Like csum_partial, this must be called with even lengths,
+ * except for the last fragment.
+ */
+extern __wsum csum_partial_copy_generic(const void *src, void *dst, int len);
+
+#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
+extern __wsum csum_and_copy_from_user(const void __user *src, void *dst,
+				      int len);
+#define HAVE_CSUM_COPY_USER
+extern __wsum csum_and_copy_to_user(const void *src, void __user *dst,
+				    int len);
+
+#define _HAVE_ARCH_CSUM_AND_COPY
+#define csum_partial_copy_nocheck(src, dst, len)   \
+        csum_partial_copy_generic((src), (dst), (len))
+
+
+/*
+ * turns a 32-bit partial checksum (e.g. from csum_partial) into a
+ * 1's complement 16-bit checksum.
+ */
+static inline __sum16 csum_fold(__wsum sum)
+{
+	u32 tmp = (__force u32)sum;
+
+	/*
+	 * swap the two 16-bit halves of sum
+	 * if there is a carry from adding the two 16-bit halves,
+	 * it will carry from the lower half into the upper half,
+	 * giving us the correct sum in the upper half.
+	 */
+	return (__force __sum16)(~(tmp + rol32(tmp, 16)) >> 16);
+}
+
+static inline u32 from64to32(u64 x)
+{
+	return (x + ror64(x, 32)) >> 32;
+}
+
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
+					__u8 proto, __wsum sum)
+{
+#ifdef __powerpc64__
+	u64 s = (__force u32)sum;
+
+	s += (__force u32)saddr;
+	s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN__
+	s += proto + len;
+#else
+	s += (proto + len) << 8;
+#endif
+	return (__force __wsum) from64to32(s);
+#else
+    __asm__("\n\
+	addc %0,%0,%1 \n\
+	adde %0,%0,%2 \n\
+	adde %0,%0,%3 \n\
+	addze %0,%0 \n\
+	"
+	: "=r" (sum)
+	: "r" (daddr), "r"(saddr), "r"(proto + len), "0"(sum));
+	return sum;
+#endif
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
+					__u8 proto, __wsum sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+#define HAVE_ARCH_CSUM_ADD
+static __always_inline __wsum csum_add(__wsum csum, __wsum addend)
+{
+#ifdef __powerpc64__
+	u64 res = (__force u64)csum;
+
+	res += (__force u64)addend;
+	return (__force __wsum)((u32)res + (res >> 32));
+#else
+	if (__builtin_constant_p(csum) && csum == 0)
+		return addend;
+	if (__builtin_constant_p(addend) && addend == 0)
+		return csum;
+
+	asm("addc %0,%0,%1;"
+	    "addze %0,%0;"
+	    : "+r" (csum) : "r" (addend) : "xer");
+	return csum;
+#endif
+}
+
+#define HAVE_ARCH_CSUM_SHIFT
+static __always_inline __wsum csum_shift(__wsum sum, int offset)
+{
+	/* rotate sum to align it with a 16b boundary */
+	return (__force __wsum)rol32((__force u32)sum, (offset & 1) << 3);
+}
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.  ihl is the number
+ * of 32-bit words and is always >= 5.
+ */
+static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl)
+{
+	const u32 *ptr = (const u32 *)iph + 1;
+#ifdef __powerpc64__
+	unsigned int i;
+	u64 s = *(const u32 *)iph;
+
+	for (i = 0; i < ihl - 1; i++, ptr++)
+		s += *ptr;
+	return (__force __wsum)from64to32(s);
+#else
+	__wsum sum, tmp;
+
+	asm("mtctr %3;"
+	    "addc %0,%4,%5;"
+	    "1: lwzu %1, 4(%2);"
+	    "adde %0,%0,%1;"
+	    "bdnz 1b;"
+	    "addze %0,%0;"
+	    : "=r" (sum), "=r" (tmp), "+b" (ptr)
+	    : "r" (ihl - 2), "r" (*(const u32 *)iph), "r" (*ptr)
+	    : "ctr", "xer", "memory");
+
+	return sum;
+#endif
+}
+
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	return csum_fold(ip_fast_csum_nofold(iph, ihl));
+}
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+__wsum __csum_partial(const void *buff, int len, __wsum sum);
+
+static __always_inline __wsum csum_partial(const void *buff, int len, __wsum sum)
+{
+	if (__builtin_constant_p(len) && len <= 16 && (len & 1) == 0) {
+		if (len == 2)
+			sum = csum_add(sum, (__force __wsum)*(const u16 *)buff);
+		if (len >= 4)
+			sum = csum_add(sum, (__force __wsum)*(const u32 *)buff);
+		if (len == 6)
+			sum = csum_add(sum, (__force __wsum)
+					    *(const u16 *)(buff + 4));
+		if (len >= 8)
+			sum = csum_add(sum, (__force __wsum)
+					    *(const u32 *)(buff + 4));
+		if (len == 10)
+			sum = csum_add(sum, (__force __wsum)
+					    *(const u16 *)(buff + 8));
+		if (len >= 12)
+			sum = csum_add(sum, (__force __wsum)
+					    *(const u32 *)(buff + 8));
+		if (len == 14)
+			sum = csum_add(sum, (__force __wsum)
+					    *(const u16 *)(buff + 12));
+		if (len >= 16)
+			sum = csum_add(sum, (__force __wsum)
+					    *(const u32 *)(buff + 12));
+	} else if (__builtin_constant_p(len) && (len & 3) == 0) {
+		sum = csum_add(sum, ip_fast_csum_nofold(buff, len >> 2));
+	} else {
+		sum = __csum_partial(buff, len, sum);
+	}
+	return sum;
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+static inline __sum16 ip_compute_csum(const void *buff, int len)
+{
+	return csum_fold(csum_partial(buff, len, 0));
+}
+
+#define _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+			const struct in6_addr *daddr,
+			__u32 len, __u8 proto, __wsum sum);
+
+#endif /* __KERNEL__ */
+#endif
diff --git a/arch/powerpc/include/asm/clocksource.h b/arch/powerpc/include/asm/clocksource.h
new file mode 100644
index 0000000000..0a26ef13a3
--- /dev/null
+++ b/arch/powerpc/include/asm/clocksource.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_CLOCKSOURCE_H
+#define _ASM_POWERPC_CLOCKSOURCE_H
+
+#include <asm/vdso/clocksource.h>
+
+#endif /* _ASM_POWERPC_CLOCKSOURCE_H */
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
new file mode 100644
index 0000000000..dbb50c06f0
--- /dev/null
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -0,0 +1,760 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_CMPXCHG_H_
+#define _ASM_POWERPC_CMPXCHG_H_
+
+#ifdef __KERNEL__
+#include <linux/compiler.h>
+#include <asm/synch.h>
+#include <linux/bug.h>
+
+#ifdef __BIG_ENDIAN
+#define BITOFF_CAL(size, off)	((sizeof(u32) - size - off) * BITS_PER_BYTE)
+#else
+#define BITOFF_CAL(size, off)	(off * BITS_PER_BYTE)
+#endif
+
+#define XCHG_GEN(type, sfx, cl)				\
+static inline u32 __xchg_##type##sfx(volatile void *p, u32 val)	\
+{								\
+	unsigned int prev, prev_mask, tmp, bitoff, off;		\
+								\
+	off = (unsigned long)p % sizeof(u32);			\
+	bitoff = BITOFF_CAL(sizeof(type), off);			\
+	p -= off;						\
+	val <<= bitoff;						\
+	prev_mask = (u32)(type)-1 << bitoff;			\
+								\
+	__asm__ __volatile__(					\
+"1:	lwarx   %0,0,%3\n"					\
+"	andc	%1,%0,%5\n"					\
+"	or	%1,%1,%4\n"					\
+"	stwcx.	%1,0,%3\n"					\
+"	bne-	1b\n"						\
+	: "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p)		\
+	: "r" (p), "r" (val), "r" (prev_mask)			\
+	: "cc", cl);						\
+								\
+	return prev >> bitoff;					\
+}
+
+#define CMPXCHG_GEN(type, sfx, br, br2, cl)			\
+static inline							\
+u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new)	\
+{								\
+	unsigned int prev, prev_mask, tmp, bitoff, off;		\
+								\
+	off = (unsigned long)p % sizeof(u32);			\
+	bitoff = BITOFF_CAL(sizeof(type), off);			\
+	p -= off;						\
+	old <<= bitoff;						\
+	new <<= bitoff;						\
+	prev_mask = (u32)(type)-1 << bitoff;			\
+								\
+	__asm__ __volatile__(					\
+	br							\
+"1:	lwarx   %0,0,%3\n"					\
+"	and	%1,%0,%6\n"					\
+"	cmpw	0,%1,%4\n"					\
+"	bne-	2f\n"						\
+"	andc	%1,%0,%6\n"					\
+"	or	%1,%1,%5\n"					\
+"	stwcx.  %1,0,%3\n"					\
+"	bne-    1b\n"						\
+	br2							\
+	"\n"							\
+"2:"								\
+	: "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p)		\
+	: "r" (p), "r" (old), "r" (new), "r" (prev_mask)	\
+	: "cc", cl);						\
+								\
+	return prev >> bitoff;					\
+}
+
+/*
+ * Atomic exchange
+ *
+ * Changes the memory location '*p' to be val and returns
+ * the previous value stored there.
+ */
+
+#ifndef CONFIG_PPC_HAS_LBARX_LHARX
+XCHG_GEN(u8, _local, "memory");
+XCHG_GEN(u8, _relaxed, "cc");
+XCHG_GEN(u16, _local, "memory");
+XCHG_GEN(u16, _relaxed, "cc");
+#else
+static __always_inline unsigned long
+__xchg_u8_local(volatile void *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lbarx	%0,0,%2		# __xchg_u8_local\n"
+"	stbcx.	%3,0,%2 \n"
+"	bne-	1b"
+	: "=&r" (prev), "+m" (*(volatile unsigned char *)p)
+	: "r" (p), "r" (val)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u8_relaxed(u8 *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lbarx	%0,0,%2		# __xchg_u8_relaxed\n"
+"	stbcx.	%3,0,%2\n"
+"	bne-	1b"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (val)
+	: "cc");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_local(volatile void *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lharx	%0,0,%2		# __xchg_u16_local\n"
+"	sthcx.	%3,0,%2\n"
+"	bne-	1b"
+	: "=&r" (prev), "+m" (*(volatile unsigned short *)p)
+	: "r" (p), "r" (val)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_relaxed(u16 *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lharx	%0,0,%2		# __xchg_u16_relaxed\n"
+"	sthcx.	%3,0,%2\n"
+"	bne-	1b"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (val)
+	: "cc");
+
+	return prev;
+}
+#endif
+
+static __always_inline unsigned long
+__xchg_u32_local(volatile void *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lwarx	%0,0,%2 \n"
+"	stwcx.	%3,0,%2 \n\
+	bne-	1b"
+	: "=&r" (prev), "+m" (*(volatile unsigned int *)p)
+	: "r" (p), "r" (val)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u32_relaxed(u32 *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lwarx	%0,0,%2\n"
+"	stwcx.	%3,0,%2\n"
+"	bne-	1b"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (val)
+	: "cc");
+
+	return prev;
+}
+
+#ifdef CONFIG_PPC64
+static __always_inline unsigned long
+__xchg_u64_local(volatile void *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	ldarx	%0,0,%2 \n"
+"	stdcx.	%3,0,%2 \n\
+	bne-	1b"
+	: "=&r" (prev), "+m" (*(volatile unsigned long *)p)
+	: "r" (p), "r" (val)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u64_relaxed(u64 *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	ldarx	%0,0,%2\n"
+"	stdcx.	%3,0,%2\n"
+"	bne-	1b"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (val)
+	: "cc");
+
+	return prev;
+}
+#endif
+
+static __always_inline unsigned long
+__xchg_local(void *ptr, unsigned long x, unsigned int size)
+{
+	switch (size) {
+	case 1:
+		return __xchg_u8_local(ptr, x);
+	case 2:
+		return __xchg_u16_local(ptr, x);
+	case 4:
+		return __xchg_u32_local(ptr, x);
+#ifdef CONFIG_PPC64
+	case 8:
+		return __xchg_u64_local(ptr, x);
+#endif
+	}
+	BUILD_BUG_ON_MSG(1, "Unsupported size for __xchg_local");
+	return x;
+}
+
+static __always_inline unsigned long
+__xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
+{
+	switch (size) {
+	case 1:
+		return __xchg_u8_relaxed(ptr, x);
+	case 2:
+		return __xchg_u16_relaxed(ptr, x);
+	case 4:
+		return __xchg_u32_relaxed(ptr, x);
+#ifdef CONFIG_PPC64
+	case 8:
+		return __xchg_u64_relaxed(ptr, x);
+#endif
+	}
+	BUILD_BUG_ON_MSG(1, "Unsupported size for __xchg_relaxed");
+	return x;
+}
+#define arch_xchg_local(ptr,x)						     \
+  ({									     \
+     __typeof__(*(ptr)) _x_ = (x);					     \
+     (__typeof__(*(ptr))) __xchg_local((ptr),				     \
+     		(unsigned long)_x_, sizeof(*(ptr))); 			     \
+  })
+
+#define arch_xchg_relaxed(ptr, x)					\
+({									\
+	__typeof__(*(ptr)) _x_ = (x);					\
+	(__typeof__(*(ptr))) __xchg_relaxed((ptr),			\
+			(unsigned long)_x_, sizeof(*(ptr)));		\
+})
+
+/*
+ * Compare and exchange - if *p == old, set it to new,
+ * and return the old value of *p.
+ */
+#ifndef CONFIG_PPC_HAS_LBARX_LHARX
+CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
+CMPXCHG_GEN(u8, _local, , , "memory");
+CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
+CMPXCHG_GEN(u8, _relaxed, , , "cc");
+CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
+CMPXCHG_GEN(u16, _local, , , "memory");
+CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
+CMPXCHG_GEN(u16, _relaxed, , , "cc");
+#else
+static __always_inline unsigned long
+__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+	PPC_ATOMIC_ENTRY_BARRIER
+"1:	lbarx	%0,0,%2		# __cmpxchg_u8\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	stbcx.	%4,0,%2\n"
+"	bne-	1b"
+	PPC_ATOMIC_EXIT_BARRIER
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old,
+			unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+"1:	lbarx	%0,0,%2		# __cmpxchg_u8_local\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	stbcx.	%4,0,%2\n"
+"	bne-	1b\n"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	lbarx	%0,0,%2		# __cmpxchg_u8_relaxed\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	stbcx.	%4,0,%2\n"
+"	bne-	1b\n"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_acquire(u8 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	lbarx	%0,0,%2		# __cmpxchg_u8_acquire\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	stbcx.	%4,0,%2\n"
+"	bne-	1b\n"
+	PPC_ACQUIRE_BARRIER
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16(volatile unsigned short *p, unsigned long old, unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+	PPC_ATOMIC_ENTRY_BARRIER
+"1:	lharx	%0,0,%2		# __cmpxchg_u16\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	sthcx.	%4,0,%2\n"
+"	bne-	1b\n"
+	PPC_ATOMIC_EXIT_BARRIER
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_local(volatile unsigned short *p, unsigned long old,
+			unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+"1:	lharx	%0,0,%2		# __cmpxchg_u16_local\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	sthcx.	%4,0,%2\n"
+"	bne-	1b"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_relaxed(u16 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	lharx	%0,0,%2		# __cmpxchg_u16_relaxed\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	sthcx.	%4,0,%2\n"
+"	bne-	1b\n"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_acquire(u16 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	lharx	%0,0,%2		# __cmpxchg_u16_acquire\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	sthcx.	%4,0,%2\n"
+"	bne-	1b\n"
+	PPC_ACQUIRE_BARRIER
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+#endif
+
+static __always_inline unsigned long
+__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+	PPC_ATOMIC_ENTRY_BARRIER
+"1:	lwarx	%0,0,%2		# __cmpxchg_u32\n\
+	cmpw	0,%0,%3\n\
+	bne-	2f\n"
+"	stwcx.	%4,0,%2\n\
+	bne-	1b"
+	PPC_ATOMIC_EXIT_BARRIER
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u32_local(volatile unsigned int *p, unsigned long old,
+			unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+"1:	lwarx	%0,0,%2		# __cmpxchg_u32\n\
+	cmpw	0,%0,%3\n\
+	bne-	2f\n"
+"	stwcx.	%4,0,%2\n\
+	bne-	1b"
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u32_relaxed(u32 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	lwarx	%0,0,%2		# __cmpxchg_u32_relaxed\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	stwcx.	%4,0,%2\n"
+"	bne-	1b\n"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc");
+
+	return prev;
+}
+
+/*
+ * cmpxchg family don't have order guarantee if cmp part fails, therefore we
+ * can avoid superfluous barriers if we use assembly code to implement
+ * cmpxchg() and cmpxchg_acquire(), however we don't do the similar for
+ * cmpxchg_release() because that will result in putting a barrier in the
+ * middle of a ll/sc loop, which is probably a bad idea. For example, this
+ * might cause the conditional store more likely to fail.
+ */
+static __always_inline unsigned long
+__cmpxchg_u32_acquire(u32 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	lwarx	%0,0,%2		# __cmpxchg_u32_acquire\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	stwcx.	%4,0,%2\n"
+"	bne-	1b\n"
+	PPC_ACQUIRE_BARRIER
+	"\n"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+#ifdef CONFIG_PPC64
+static __always_inline unsigned long
+__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+	PPC_ATOMIC_ENTRY_BARRIER
+"1:	ldarx	%0,0,%2		# __cmpxchg_u64\n\
+	cmpd	0,%0,%3\n\
+	bne-	2f\n\
+	stdcx.	%4,0,%2\n\
+	bne-	1b"
+	PPC_ATOMIC_EXIT_BARRIER
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u64_local(volatile unsigned long *p, unsigned long old,
+			unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	ldarx	%0,0,%2		# __cmpxchg_u64\n\
+	cmpd	0,%0,%3\n\
+	bne-	2f\n\
+	stdcx.	%4,0,%2\n\
+	bne-	1b"
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u64_relaxed(u64 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	ldarx	%0,0,%2		# __cmpxchg_u64_relaxed\n"
+"	cmpd	0,%0,%3\n"
+"	bne-	2f\n"
+"	stdcx.	%4,0,%2\n"
+"	bne-	1b\n"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u64_acquire(u64 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	ldarx	%0,0,%2		# __cmpxchg_u64_acquire\n"
+"	cmpd	0,%0,%3\n"
+"	bne-	2f\n"
+"	stdcx.	%4,0,%2\n"
+"	bne-	1b\n"
+	PPC_ACQUIRE_BARRIER
+	"\n"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+#endif
+
+static __always_inline unsigned long
+__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
+	  unsigned int size)
+{
+	switch (size) {
+	case 1:
+		return __cmpxchg_u8(ptr, old, new);
+	case 2:
+		return __cmpxchg_u16(ptr, old, new);
+	case 4:
+		return __cmpxchg_u32(ptr, old, new);
+#ifdef CONFIG_PPC64
+	case 8:
+		return __cmpxchg_u64(ptr, old, new);
+#endif
+	}
+	BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg");
+	return old;
+}
+
+static __always_inline unsigned long
+__cmpxchg_local(void *ptr, unsigned long old, unsigned long new,
+	  unsigned int size)
+{
+	switch (size) {
+	case 1:
+		return __cmpxchg_u8_local(ptr, old, new);
+	case 2:
+		return __cmpxchg_u16_local(ptr, old, new);
+	case 4:
+		return __cmpxchg_u32_local(ptr, old, new);
+#ifdef CONFIG_PPC64
+	case 8:
+		return __cmpxchg_u64_local(ptr, old, new);
+#endif
+	}
+	BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_local");
+	return old;
+}
+
+static __always_inline unsigned long
+__cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
+		  unsigned int size)
+{
+	switch (size) {
+	case 1:
+		return __cmpxchg_u8_relaxed(ptr, old, new);
+	case 2:
+		return __cmpxchg_u16_relaxed(ptr, old, new);
+	case 4:
+		return __cmpxchg_u32_relaxed(ptr, old, new);
+#ifdef CONFIG_PPC64
+	case 8:
+		return __cmpxchg_u64_relaxed(ptr, old, new);
+#endif
+	}
+	BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_relaxed");
+	return old;
+}
+
+static __always_inline unsigned long
+__cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
+		  unsigned int size)
+{
+	switch (size) {
+	case 1:
+		return __cmpxchg_u8_acquire(ptr, old, new);
+	case 2:
+		return __cmpxchg_u16_acquire(ptr, old, new);
+	case 4:
+		return __cmpxchg_u32_acquire(ptr, old, new);
+#ifdef CONFIG_PPC64
+	case 8:
+		return __cmpxchg_u64_acquire(ptr, old, new);
+#endif
+	}
+	BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_acquire");
+	return old;
+}
+#define arch_cmpxchg(ptr, o, n)						 \
+  ({									 \
+     __typeof__(*(ptr)) _o_ = (o);					 \
+     __typeof__(*(ptr)) _n_ = (n);					 \
+     (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,		 \
+				    (unsigned long)_n_, sizeof(*(ptr))); \
+  })
+
+
+#define arch_cmpxchg_local(ptr, o, n)					 \
+  ({									 \
+     __typeof__(*(ptr)) _o_ = (o);					 \
+     __typeof__(*(ptr)) _n_ = (n);					 \
+     (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_,	 \
+				    (unsigned long)_n_, sizeof(*(ptr))); \
+  })
+
+#define arch_cmpxchg_relaxed(ptr, o, n)					\
+({									\
+	__typeof__(*(ptr)) _o_ = (o);					\
+	__typeof__(*(ptr)) _n_ = (n);					\
+	(__typeof__(*(ptr))) __cmpxchg_relaxed((ptr),			\
+			(unsigned long)_o_, (unsigned long)_n_,		\
+			sizeof(*(ptr)));				\
+})
+
+#define arch_cmpxchg_acquire(ptr, o, n)					\
+({									\
+	__typeof__(*(ptr)) _o_ = (o);					\
+	__typeof__(*(ptr)) _n_ = (n);					\
+	(__typeof__(*(ptr))) __cmpxchg_acquire((ptr),			\
+			(unsigned long)_o_, (unsigned long)_n_,		\
+			sizeof(*(ptr)));				\
+})
+#ifdef CONFIG_PPC64
+#define arch_cmpxchg64(ptr, o, n)					\
+  ({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	arch_cmpxchg((ptr), (o), (n));					\
+  })
+#define arch_cmpxchg64_local(ptr, o, n)					\
+  ({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	arch_cmpxchg_local((ptr), (o), (n));				\
+  })
+#define arch_cmpxchg64_relaxed(ptr, o, n)				\
+({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	arch_cmpxchg_relaxed((ptr), (o), (n));				\
+})
+#define arch_cmpxchg64_acquire(ptr, o, n)				\
+({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	arch_cmpxchg_acquire((ptr), (o), (n));				\
+})
+#else
+#include <asm-generic/cmpxchg-local.h>
+#define arch_cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_CMPXCHG_H_ */
diff --git a/arch/powerpc/include/asm/code-patching-asm.h b/arch/powerpc/include/asm/code-patching-asm.h
new file mode 100644
index 0000000000..ed7b144849
--- /dev/null
+++ b/arch/powerpc/include/asm/code-patching-asm.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright 2018, Michael Ellerman, IBM Corporation.
+ */
+#ifndef _ASM_POWERPC_CODE_PATCHING_ASM_H
+#define _ASM_POWERPC_CODE_PATCHING_ASM_H
+
+/* Define a "site" that can be patched */
+.macro patch_site label name
+	.pushsection ".rodata"
+	.balign 4
+	.global \name
+\name:
+	.4byte	\label - .
+	.popsection
+.endm
+
+#endif /* _ASM_POWERPC_CODE_PATCHING_ASM_H */
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
new file mode 100644
index 0000000000..3f881548fb
--- /dev/null
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_CODE_PATCHING_H
+#define _ASM_POWERPC_CODE_PATCHING_H
+
+/*
+ * Copyright 2008, Michael Ellerman, IBM Corporation.
+ */
+
+#include <asm/types.h>
+#include <asm/ppc-opcode.h>
+#include <linux/string.h>
+#include <linux/kallsyms.h>
+#include <asm/asm-compat.h>
+#include <asm/inst.h>
+
+/* Flags for create_branch:
+ * "b"   == create_branch(addr, target, 0);
+ * "ba"  == create_branch(addr, target, BRANCH_ABSOLUTE);
+ * "bl"  == create_branch(addr, target, BRANCH_SET_LINK);
+ * "bla" == create_branch(addr, target, BRANCH_ABSOLUTE | BRANCH_SET_LINK);
+ */
+#define BRANCH_SET_LINK	0x1
+#define BRANCH_ABSOLUTE	0x2
+
+/*
+ * Powerpc branch instruction is :
+ *
+ *  0         6                 30   31
+ *  +---------+----------------+---+---+
+ *  | opcode  |     LI         |AA |LK |
+ *  +---------+----------------+---+---+
+ *  Where AA = 0 and LK = 0
+ *
+ * LI is a signed 24 bits integer. The real branch offset is computed
+ * by: imm32 = SignExtend(LI:'0b00', 32);
+ *
+ * So the maximum forward branch should be:
+ *   (0x007fffff << 2) = 0x01fffffc =  0x1fffffc
+ * The maximum backward branch should be:
+ *   (0xff800000 << 2) = 0xfe000000 = -0x2000000
+ */
+static inline bool is_offset_in_branch_range(long offset)
+{
+	return (offset >= -0x2000000 && offset <= 0x1fffffc && !(offset & 0x3));
+}
+
+static inline bool is_offset_in_cond_branch_range(long offset)
+{
+	return offset >= -0x8000 && offset <= 0x7fff && !(offset & 0x3);
+}
+
+static inline int create_branch(ppc_inst_t *instr, const u32 *addr,
+				unsigned long target, int flags)
+{
+	long offset;
+
+	*instr = ppc_inst(0);
+	offset = target;
+	if (! (flags & BRANCH_ABSOLUTE))
+		offset = offset - (unsigned long)addr;
+
+	/* Check we can represent the target in the instruction format */
+	if (!is_offset_in_branch_range(offset))
+		return 1;
+
+	/* Mask out the flags and target, so they don't step on each other. */
+	*instr = ppc_inst(0x48000000 | (flags & 0x3) | (offset & 0x03FFFFFC));
+
+	return 0;
+}
+
+int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
+		       unsigned long target, int flags);
+int patch_branch(u32 *addr, unsigned long target, int flags);
+int patch_instruction(u32 *addr, ppc_inst_t instr);
+int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
+
+static inline unsigned long patch_site_addr(s32 *site)
+{
+	return (unsigned long)site + *site;
+}
+
+static inline int patch_instruction_site(s32 *site, ppc_inst_t instr)
+{
+	return patch_instruction((u32 *)patch_site_addr(site), instr);
+}
+
+static inline int patch_branch_site(s32 *site, unsigned long target, int flags)
+{
+	return patch_branch((u32 *)patch_site_addr(site), target, flags);
+}
+
+static inline int modify_instruction(unsigned int *addr, unsigned int clr,
+				     unsigned int set)
+{
+	return patch_instruction(addr, ppc_inst((*addr & ~clr) | set));
+}
+
+static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned int set)
+{
+	return modify_instruction((unsigned int *)patch_site_addr(site), clr, set);
+}
+
+static inline unsigned int branch_opcode(ppc_inst_t instr)
+{
+	return ppc_inst_primary_opcode(instr) & 0x3F;
+}
+
+static inline int instr_is_branch_iform(ppc_inst_t instr)
+{
+	return branch_opcode(instr) == 18;
+}
+
+static inline int instr_is_branch_bform(ppc_inst_t instr)
+{
+	return branch_opcode(instr) == 16;
+}
+
+int instr_is_relative_branch(ppc_inst_t instr);
+int instr_is_relative_link_branch(ppc_inst_t instr);
+unsigned long branch_target(const u32 *instr);
+int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src);
+bool is_conditional_branch(ppc_inst_t instr);
+
+#define OP_RT_RA_MASK	0xffff0000UL
+#define LIS_R2		(PPC_RAW_LIS(_R2, 0))
+#define ADDIS_R2_R12	(PPC_RAW_ADDIS(_R2, _R12, 0))
+#define ADDI_R2_R2	(PPC_RAW_ADDI(_R2, _R2, 0))
+
+
+static inline unsigned long ppc_function_entry(void *func)
+{
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+	u32 *insn = func;
+
+	/*
+	 * A PPC64 ABIv2 function may have a local and a global entry
+	 * point. We need to use the local entry point when patching
+	 * functions, so identify and step over the global entry point
+	 * sequence.
+	 *
+	 * The global entry point sequence is always of the form:
+	 *
+	 * addis r2,r12,XXXX
+	 * addi  r2,r2,XXXX
+	 *
+	 * A linker optimisation may convert the addis to lis:
+	 *
+	 * lis   r2,XXXX
+	 * addi  r2,r2,XXXX
+	 */
+	if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+	     ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
+	    ((*(insn+1) & OP_RT_RA_MASK) == ADDI_R2_R2))
+		return (unsigned long)(insn + 2);
+	else
+		return (unsigned long)func;
+#elif defined(CONFIG_PPC64_ELF_ABI_V1)
+	/*
+	 * On PPC64 ABIv1 the function pointer actually points to the
+	 * function's descriptor. The first entry in the descriptor is the
+	 * address of the function text.
+	 */
+	return ((struct func_desc *)func)->addr;
+#else
+	return (unsigned long)func;
+#endif
+}
+
+static inline unsigned long ppc_global_function_entry(void *func)
+{
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+	/* PPC64 ABIv2 the global entry point is at the address */
+	return (unsigned long)func;
+#else
+	/* All other cases there is no change vs ppc_function_entry() */
+	return ppc_function_entry(func);
+#endif
+}
+
+/*
+ * Wrapper around kallsyms_lookup() to return function entry address:
+ * - For ABIv1, we lookup the dot variant.
+ * - For ABIv2, we return the local entry point.
+ */
+static inline unsigned long ppc_kallsyms_lookup_name(const char *name)
+{
+	unsigned long addr;
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+	/* check for dot variant */
+	char dot_name[1 + KSYM_NAME_LEN];
+	bool dot_appended = false;
+
+	if (strnlen(name, KSYM_NAME_LEN) >= KSYM_NAME_LEN)
+		return 0;
+
+	if (name[0] != '.') {
+		dot_name[0] = '.';
+		dot_name[1] = '\0';
+		strlcat(dot_name, name, sizeof(dot_name));
+		dot_appended = true;
+	} else {
+		dot_name[0] = '\0';
+		strlcat(dot_name, name, sizeof(dot_name));
+	}
+	addr = kallsyms_lookup_name(dot_name);
+	if (!addr && dot_appended)
+		/* Let's try the original non-dot symbol lookup	*/
+		addr = kallsyms_lookup_name(name);
+#elif defined(CONFIG_PPC64_ELF_ABI_V2)
+	addr = kallsyms_lookup_name(name);
+	if (addr)
+		addr = ppc_function_entry((void *)addr);
+#else
+	addr = kallsyms_lookup_name(name);
+#endif
+	return addr;
+}
+
+/*
+ * Some instruction encodings commonly used in dynamic ftracing
+ * and function live patching.
+ */
+
+/* This must match the definition of STK_GOT in <asm/ppc_asm.h> */
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+#define R2_STACK_OFFSET         24
+#else
+#define R2_STACK_OFFSET         40
+#endif
+
+#define PPC_INST_LD_TOC		PPC_RAW_LD(_R2, _R1, R2_STACK_OFFSET)
+
+/* usually preceded by a mflr r0 */
+#define PPC_INST_STD_LR		PPC_RAW_STD(_R0, _R1, PPC_LR_STKOFF)
+
+#endif /* _ASM_POWERPC_CODE_PATCHING_H */
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
new file mode 100644
index 0000000000..dda4091fd0
--- /dev/null
+++ b/arch/powerpc/include/asm/compat.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_COMPAT_H
+#define _ASM_POWERPC_COMPAT_H
+#ifdef __KERNEL__
+/*
+ * Architecture specific compatibility types
+ */
+#include <linux/types.h>
+#include <linux/sched.h>
+
+#define compat_ipc_pid_t compat_ipc_pid_t
+typedef u16		compat_ipc_pid_t;
+
+#define compat_ipc64_perm compat_ipc64_perm
+
+#include <asm-generic/compat.h>
+
+#ifdef __BIG_ENDIAN__
+#define COMPAT_UTS_MACHINE	"ppc\0\0"
+#else
+#define COMPAT_UTS_MACHINE	"ppcle\0\0"
+#endif
+
+typedef s16		compat_nlink_t;
+
+struct compat_stat {
+	compat_dev_t	st_dev;
+	compat_ino_t	st_ino;
+	compat_mode_t	st_mode;
+	compat_nlink_t	st_nlink;
+	__compat_uid32_t	st_uid;
+	__compat_gid32_t	st_gid;
+	compat_dev_t	st_rdev;
+	compat_off_t	st_size;
+	compat_off_t	st_blksize;
+	compat_off_t	st_blocks;
+	old_time32_t	st_atime;
+	u32		st_atime_nsec;
+	old_time32_t	st_mtime;
+	u32		st_mtime_nsec;
+	old_time32_t	st_ctime;
+	u32		st_ctime_nsec;
+	u32		__unused4[2];
+};
+
+/*
+ * ipc64_perm is actually 32/64bit clean but since the compat layer refers to
+ * it we may as well define it.
+ */
+struct compat_ipc64_perm {
+	compat_key_t key;
+	__compat_uid_t uid;
+	__compat_gid_t gid;
+	__compat_uid_t cuid;
+	__compat_gid_t cgid;
+	compat_mode_t mode;
+	unsigned int seq;
+	unsigned int __pad2;
+	unsigned long __unused1;	/* yes they really are 64bit pads */
+	unsigned long __unused2;
+};
+
+struct compat_semid64_ds {
+	struct compat_ipc64_perm sem_perm;
+	unsigned int sem_otime_high;
+	unsigned int sem_otime;
+	unsigned int sem_ctime_high;
+	unsigned int sem_ctime;
+	compat_ulong_t sem_nsems;
+	compat_ulong_t __unused3;
+	compat_ulong_t __unused4;
+};
+
+struct compat_msqid64_ds {
+	struct compat_ipc64_perm msg_perm;
+	unsigned int msg_stime_high;
+	unsigned int msg_stime;
+	unsigned int msg_rtime_high;
+	unsigned int msg_rtime;
+	unsigned int msg_ctime_high;
+	unsigned int msg_ctime;
+	compat_ulong_t msg_cbytes;
+	compat_ulong_t msg_qnum;
+	compat_ulong_t msg_qbytes;
+	compat_pid_t msg_lspid;
+	compat_pid_t msg_lrpid;
+	compat_ulong_t __unused4;
+	compat_ulong_t __unused5;
+};
+
+struct compat_shmid64_ds {
+	struct compat_ipc64_perm shm_perm;
+	unsigned int shm_atime_high;
+	unsigned int shm_atime;
+	unsigned int shm_dtime_high;
+	unsigned int shm_dtime;
+	unsigned int shm_ctime_high;
+	unsigned int shm_ctime;
+	unsigned int __unused4;
+	compat_size_t shm_segsz;
+	compat_pid_t shm_cpid;
+	compat_pid_t shm_lpid;
+	compat_ulong_t shm_nattch;
+	compat_ulong_t __unused5;
+	compat_ulong_t __unused6;
+};
+
+static inline int is_compat_task(void)
+{
+	return is_32bit_task();
+}
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_COMPAT_H */
diff --git a/arch/powerpc/include/asm/context_tracking.h b/arch/powerpc/include/asm/context_tracking.h
new file mode 100644
index 0000000000..4b63931c49
--- /dev/null
+++ b/arch/powerpc/include/asm/context_tracking.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_CONTEXT_TRACKING_H
+#define _ASM_POWERPC_CONTEXT_TRACKING_H
+
+#ifdef CONFIG_CONTEXT_TRACKING_USER
+#define SCHEDULE_USER bl	schedule_user
+#else
+#define SCHEDULE_USER bl	schedule
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/copro.h b/arch/powerpc/include/asm/copro.h
new file mode 100644
index 0000000000..fd2e166ea0
--- /dev/null
+++ b/arch/powerpc/include/asm/copro.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2014 IBM Corp.
+ */
+
+#ifndef _ASM_POWERPC_COPRO_H
+#define _ASM_POWERPC_COPRO_H
+
+#include <linux/mm_types.h>
+
+struct copro_slb
+{
+	u64 esid, vsid;
+};
+
+int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
+			  unsigned long dsisr, vm_fault_t *flt);
+
+int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb);
+
+
+#ifdef CONFIG_PPC_COPRO_BASE
+void copro_flush_all_slbs(struct mm_struct *mm);
+#else
+static inline void copro_flush_all_slbs(struct mm_struct *mm) {}
+#endif
+#endif /* _ASM_POWERPC_COPRO_H */
diff --git a/arch/powerpc/include/asm/cpm.h b/arch/powerpc/include/asm/cpm.h
new file mode 100644
index 0000000000..ce483b0f8a
--- /dev/null
+++ b/arch/powerpc/include/asm/cpm.h
@@ -0,0 +1 @@
+#include <soc/fsl/cpm.h>
diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h
new file mode 100644
index 0000000000..3bdd74739c
--- /dev/null
+++ b/arch/powerpc/include/asm/cpm1.h
@@ -0,0 +1,612 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * MPC8xx Communication Processor Module.
+ * Copyright (c) 1997 Dan Malek (dmalek@jlc.net)
+ *
+ * This file contains structures and information for the communication
+ * processor channels.  Some CPM control and status is available
+ * through the MPC8xx internal memory map.  See immap.h for details.
+ * This file only contains what I need for the moment, not the total
+ * CPM capabilities.  I (or someone else) will add definitions as they
+ * are needed.  -- Dan
+ *
+ * On the MBX board, EPPC-Bug loads CPM microcode into the first 512
+ * bytes of the DP RAM and relocates the I2C parameter area to the
+ * IDMA1 space.  The remaining DP RAM is available for buffer descriptors
+ * or other use.
+ */
+#ifndef __CPM1__
+#define __CPM1__
+
+#include <linux/init.h>
+#include <asm/8xx_immap.h>
+#include <asm/ptrace.h>
+#include <asm/cpm.h>
+
+/* CPM Command register.
+*/
+#define CPM_CR_RST	((ushort)0x8000)
+#define CPM_CR_OPCODE	((ushort)0x0f00)
+#define CPM_CR_CHAN	((ushort)0x00f0)
+#define CPM_CR_FLG	((ushort)0x0001)
+
+/* Channel numbers.
+*/
+#define CPM_CR_CH_SCC1		((ushort)0x0000)
+#define CPM_CR_CH_I2C		((ushort)0x0001)	/* I2C and IDMA1 */
+#define CPM_CR_CH_SCC2		((ushort)0x0004)
+#define CPM_CR_CH_SPI		((ushort)0x0005)	/* SPI / IDMA2 / Timers */
+#define CPM_CR_CH_TIMER		CPM_CR_CH_SPI
+#define CPM_CR_CH_SCC3		((ushort)0x0008)
+#define CPM_CR_CH_SMC1		((ushort)0x0009)	/* SMC1 / DSP1 */
+#define CPM_CR_CH_SCC4		((ushort)0x000c)
+#define CPM_CR_CH_SMC2		((ushort)0x000d)	/* SMC2 / DSP2 */
+
+#define mk_cr_cmd(CH, CMD)	((CMD << 8) | (CH << 4))
+
+/* Export the base address of the communication processor registers
+ * and dual port ram.
+ */
+extern cpm8xx_t __iomem *cpmp; /* Pointer to comm processor */
+
+#define cpm_dpalloc cpm_muram_alloc
+#define cpm_dpfree cpm_muram_free
+#define cpm_dpram_addr cpm_muram_addr
+#define cpm_dpram_phys cpm_muram_dma
+
+extern void cpm_setbrg(uint brg, uint rate);
+
+extern void __init cpm_load_patch(cpm8xx_t *cp);
+
+extern void cpm_reset(void);
+
+/* Parameter RAM offsets.
+*/
+#define PROFF_SCC1	((uint)0x0000)
+#define PROFF_IIC	((uint)0x0080)
+#define PROFF_SCC2	((uint)0x0100)
+#define PROFF_SPI	((uint)0x0180)
+#define PROFF_SCC3	((uint)0x0200)
+#define PROFF_SMC1	((uint)0x0280)
+#define PROFF_DSP1	((uint)0x02c0)
+#define PROFF_SCC4	((uint)0x0300)
+#define PROFF_SMC2	((uint)0x0380)
+
+/* Define enough so I can at least use the serial port as a UART.
+ * The MBX uses SMC1 as the host serial port.
+ */
+typedef struct smc_uart {
+	ushort	smc_rbase;	/* Rx Buffer descriptor base address */
+	ushort	smc_tbase;	/* Tx Buffer descriptor base address */
+	u_char	smc_rfcr;	/* Rx function code */
+	u_char	smc_tfcr;	/* Tx function code */
+	ushort	smc_mrblr;	/* Max receive buffer length */
+	uint	smc_rstate;	/* Internal */
+	uint	smc_idp;	/* Internal */
+	ushort	smc_rbptr;	/* Internal */
+	ushort	smc_ibc;	/* Internal */
+	uint	smc_rxtmp;	/* Internal */
+	uint	smc_tstate;	/* Internal */
+	uint	smc_tdp;	/* Internal */
+	ushort	smc_tbptr;	/* Internal */
+	ushort	smc_tbc;	/* Internal */
+	uint	smc_txtmp;	/* Internal */
+	ushort	smc_maxidl;	/* Maximum idle characters */
+	ushort	smc_tmpidl;	/* Temporary idle counter */
+	ushort	smc_brklen;	/* Last received break length */
+	ushort	smc_brkec;	/* rcv'd break condition counter */
+	ushort	smc_brkcr;	/* xmt break count register */
+	ushort	smc_rmask;	/* Temporary bit mask */
+	char	res1[8];	/* Reserved */
+	ushort	smc_rpbase;	/* Relocation pointer */
+} smc_uart_t;
+
+/* Function code bits.
+*/
+#define SMC_EB	((u_char)0x10)	/* Set big endian byte order */
+
+/* SMC uart mode register.
+*/
+#define	SMCMR_REN	((ushort)0x0001)
+#define SMCMR_TEN	((ushort)0x0002)
+#define SMCMR_DM	((ushort)0x000c)
+#define SMCMR_SM_GCI	((ushort)0x0000)
+#define SMCMR_SM_UART	((ushort)0x0020)
+#define SMCMR_SM_TRANS	((ushort)0x0030)
+#define SMCMR_SM_MASK	((ushort)0x0030)
+#define SMCMR_PM_EVEN	((ushort)0x0100)	/* Even parity, else odd */
+#define SMCMR_REVD	SMCMR_PM_EVEN
+#define SMCMR_PEN	((ushort)0x0200)	/* Parity enable */
+#define SMCMR_BS	SMCMR_PEN
+#define SMCMR_SL	((ushort)0x0400)	/* Two stops, else one */
+#define SMCR_CLEN_MASK	((ushort)0x7800)	/* Character length */
+#define smcr_mk_clen(C)	(((C) << 11) & SMCR_CLEN_MASK)
+
+/* SMC2 as Centronics parallel printer.  It is half duplex, in that
+ * it can only receive or transmit.  The parameter ram values for
+ * each direction are either unique or properly overlap, so we can
+ * include them in one structure.
+ */
+typedef struct smc_centronics {
+	ushort	scent_rbase;
+	ushort	scent_tbase;
+	u_char	scent_cfcr;
+	u_char	scent_smask;
+	ushort	scent_mrblr;
+	uint	scent_rstate;
+	uint	scent_r_ptr;
+	ushort	scent_rbptr;
+	ushort	scent_r_cnt;
+	uint	scent_rtemp;
+	uint	scent_tstate;
+	uint	scent_t_ptr;
+	ushort	scent_tbptr;
+	ushort	scent_t_cnt;
+	uint	scent_ttemp;
+	ushort	scent_max_sl;
+	ushort	scent_sl_cnt;
+	ushort	scent_character1;
+	ushort	scent_character2;
+	ushort	scent_character3;
+	ushort	scent_character4;
+	ushort	scent_character5;
+	ushort	scent_character6;
+	ushort	scent_character7;
+	ushort	scent_character8;
+	ushort	scent_rccm;
+	ushort	scent_rccr;
+} smc_cent_t;
+
+/* Centronics Status Mask Register.
+*/
+#define SMC_CENT_F	((u_char)0x08)
+#define SMC_CENT_PE	((u_char)0x04)
+#define SMC_CENT_S	((u_char)0x02)
+
+/* SMC Event and Mask register.
+*/
+#define	SMCM_BRKE	((unsigned char)0x40)	/* When in UART Mode */
+#define	SMCM_BRK	((unsigned char)0x10)	/* When in UART Mode */
+#define	SMCM_TXE	((unsigned char)0x10)	/* When in Transparent Mode */
+#define	SMCM_BSY	((unsigned char)0x04)
+#define	SMCM_TX		((unsigned char)0x02)
+#define	SMCM_RX		((unsigned char)0x01)
+
+/* Baud rate generators.
+*/
+#define CPM_BRG_RST		((uint)0x00020000)
+#define CPM_BRG_EN		((uint)0x00010000)
+#define CPM_BRG_EXTC_INT	((uint)0x00000000)
+#define CPM_BRG_EXTC_CLK2	((uint)0x00004000)
+#define CPM_BRG_EXTC_CLK6	((uint)0x00008000)
+#define CPM_BRG_ATB		((uint)0x00002000)
+#define CPM_BRG_CD_MASK		((uint)0x00001ffe)
+#define CPM_BRG_DIV16		((uint)0x00000001)
+
+/* SI Clock Route Register
+*/
+#define SICR_RCLK_SCC1_BRG1	((uint)0x00000000)
+#define SICR_TCLK_SCC1_BRG1	((uint)0x00000000)
+#define SICR_RCLK_SCC2_BRG2	((uint)0x00000800)
+#define SICR_TCLK_SCC2_BRG2	((uint)0x00000100)
+#define SICR_RCLK_SCC3_BRG3	((uint)0x00100000)
+#define SICR_TCLK_SCC3_BRG3	((uint)0x00020000)
+#define SICR_RCLK_SCC4_BRG4	((uint)0x18000000)
+#define SICR_TCLK_SCC4_BRG4	((uint)0x03000000)
+
+/* SCCs.
+*/
+#define SCC_GSMRH_IRP		((uint)0x00040000)
+#define SCC_GSMRH_GDE		((uint)0x00010000)
+#define SCC_GSMRH_TCRC_CCITT	((uint)0x00008000)
+#define SCC_GSMRH_TCRC_BISYNC	((uint)0x00004000)
+#define SCC_GSMRH_TCRC_HDLC	((uint)0x00000000)
+#define SCC_GSMRH_REVD		((uint)0x00002000)
+#define SCC_GSMRH_TRX		((uint)0x00001000)
+#define SCC_GSMRH_TTX		((uint)0x00000800)
+#define SCC_GSMRH_CDP		((uint)0x00000400)
+#define SCC_GSMRH_CTSP		((uint)0x00000200)
+#define SCC_GSMRH_CDS		((uint)0x00000100)
+#define SCC_GSMRH_CTSS		((uint)0x00000080)
+#define SCC_GSMRH_TFL		((uint)0x00000040)
+#define SCC_GSMRH_RFW		((uint)0x00000020)
+#define SCC_GSMRH_TXSY		((uint)0x00000010)
+#define SCC_GSMRH_SYNL16	((uint)0x0000000c)
+#define SCC_GSMRH_SYNL8		((uint)0x00000008)
+#define SCC_GSMRH_SYNL4		((uint)0x00000004)
+#define SCC_GSMRH_RTSM		((uint)0x00000002)
+#define SCC_GSMRH_RSYN		((uint)0x00000001)
+
+#define SCC_GSMRL_SIR		((uint)0x80000000)	/* SCC2 only */
+#define SCC_GSMRL_EDGE_NONE	((uint)0x60000000)
+#define SCC_GSMRL_EDGE_NEG	((uint)0x40000000)
+#define SCC_GSMRL_EDGE_POS	((uint)0x20000000)
+#define SCC_GSMRL_EDGE_BOTH	((uint)0x00000000)
+#define SCC_GSMRL_TCI		((uint)0x10000000)
+#define SCC_GSMRL_TSNC_3	((uint)0x0c000000)
+#define SCC_GSMRL_TSNC_4	((uint)0x08000000)
+#define SCC_GSMRL_TSNC_14	((uint)0x04000000)
+#define SCC_GSMRL_TSNC_INF	((uint)0x00000000)
+#define SCC_GSMRL_RINV		((uint)0x02000000)
+#define SCC_GSMRL_TINV		((uint)0x01000000)
+#define SCC_GSMRL_TPL_128	((uint)0x00c00000)
+#define SCC_GSMRL_TPL_64	((uint)0x00a00000)
+#define SCC_GSMRL_TPL_48	((uint)0x00800000)
+#define SCC_GSMRL_TPL_32	((uint)0x00600000)
+#define SCC_GSMRL_TPL_16	((uint)0x00400000)
+#define SCC_GSMRL_TPL_8		((uint)0x00200000)
+#define SCC_GSMRL_TPL_NONE	((uint)0x00000000)
+#define SCC_GSMRL_TPP_ALL1	((uint)0x00180000)
+#define SCC_GSMRL_TPP_01	((uint)0x00100000)
+#define SCC_GSMRL_TPP_10	((uint)0x00080000)
+#define SCC_GSMRL_TPP_ZEROS	((uint)0x00000000)
+#define SCC_GSMRL_TEND		((uint)0x00040000)
+#define SCC_GSMRL_TDCR_32	((uint)0x00030000)
+#define SCC_GSMRL_TDCR_16	((uint)0x00020000)
+#define SCC_GSMRL_TDCR_8	((uint)0x00010000)
+#define SCC_GSMRL_TDCR_1	((uint)0x00000000)
+#define SCC_GSMRL_RDCR_32	((uint)0x0000c000)
+#define SCC_GSMRL_RDCR_16	((uint)0x00008000)
+#define SCC_GSMRL_RDCR_8	((uint)0x00004000)
+#define SCC_GSMRL_RDCR_1	((uint)0x00000000)
+#define SCC_GSMRL_RENC_DFMAN	((uint)0x00003000)
+#define SCC_GSMRL_RENC_MANCH	((uint)0x00002000)
+#define SCC_GSMRL_RENC_FM0	((uint)0x00001000)
+#define SCC_GSMRL_RENC_NRZI	((uint)0x00000800)
+#define SCC_GSMRL_RENC_NRZ	((uint)0x00000000)
+#define SCC_GSMRL_TENC_DFMAN	((uint)0x00000600)
+#define SCC_GSMRL_TENC_MANCH	((uint)0x00000400)
+#define SCC_GSMRL_TENC_FM0	((uint)0x00000200)
+#define SCC_GSMRL_TENC_NRZI	((uint)0x00000100)
+#define SCC_GSMRL_TENC_NRZ	((uint)0x00000000)
+#define SCC_GSMRL_DIAG_LE	((uint)0x000000c0)	/* Loop and echo */
+#define SCC_GSMRL_DIAG_ECHO	((uint)0x00000080)
+#define SCC_GSMRL_DIAG_LOOP	((uint)0x00000040)
+#define SCC_GSMRL_DIAG_NORM	((uint)0x00000000)
+#define SCC_GSMRL_ENR		((uint)0x00000020)
+#define SCC_GSMRL_ENT		((uint)0x00000010)
+#define SCC_GSMRL_MODE_ENET	((uint)0x0000000c)
+#define SCC_GSMRL_MODE_QMC	((uint)0x0000000a)
+#define SCC_GSMRL_MODE_DDCMP	((uint)0x00000009)
+#define SCC_GSMRL_MODE_BISYNC	((uint)0x00000008)
+#define SCC_GSMRL_MODE_V14	((uint)0x00000007)
+#define SCC_GSMRL_MODE_AHDLC	((uint)0x00000006)
+#define SCC_GSMRL_MODE_PROFIBUS	((uint)0x00000005)
+#define SCC_GSMRL_MODE_UART	((uint)0x00000004)
+#define SCC_GSMRL_MODE_SS7	((uint)0x00000003)
+#define SCC_GSMRL_MODE_ATALK	((uint)0x00000002)
+#define SCC_GSMRL_MODE_HDLC	((uint)0x00000000)
+
+#define SCC_TODR_TOD		((ushort)0x8000)
+
+/* SCC Event and Mask register.
+*/
+#define	SCCM_TXE	((unsigned char)0x10)
+#define	SCCM_BSY	((unsigned char)0x04)
+#define	SCCM_TX		((unsigned char)0x02)
+#define	SCCM_RX		((unsigned char)0x01)
+
+typedef struct scc_param {
+	ushort	scc_rbase;	/* Rx Buffer descriptor base address */
+	ushort	scc_tbase;	/* Tx Buffer descriptor base address */
+	u_char	scc_rfcr;	/* Rx function code */
+	u_char	scc_tfcr;	/* Tx function code */
+	ushort	scc_mrblr;	/* Max receive buffer length */
+	uint	scc_rstate;	/* Internal */
+	uint	scc_idp;	/* Internal */
+	ushort	scc_rbptr;	/* Internal */
+	ushort	scc_ibc;	/* Internal */
+	uint	scc_rxtmp;	/* Internal */
+	uint	scc_tstate;	/* Internal */
+	uint	scc_tdp;	/* Internal */
+	ushort	scc_tbptr;	/* Internal */
+	ushort	scc_tbc;	/* Internal */
+	uint	scc_txtmp;	/* Internal */
+	uint	scc_rcrc;	/* Internal */
+	uint	scc_tcrc;	/* Internal */
+} sccp_t;
+
+/* Function code bits.
+*/
+#define SCC_EB	((u_char)0x10)	/* Set big endian byte order */
+
+/* CPM Ethernet through SCCx.
+ */
+typedef struct scc_enet {
+	sccp_t	sen_genscc;
+	uint	sen_cpres;	/* Preset CRC */
+	uint	sen_cmask;	/* Constant mask for CRC */
+	uint	sen_crcec;	/* CRC Error counter */
+	uint	sen_alec;	/* alignment error counter */
+	uint	sen_disfc;	/* discard frame counter */
+	ushort	sen_pads;	/* Tx short frame pad character */
+	ushort	sen_retlim;	/* Retry limit threshold */
+	ushort	sen_retcnt;	/* Retry limit counter */
+	ushort	sen_maxflr;	/* maximum frame length register */
+	ushort	sen_minflr;	/* minimum frame length register */
+	ushort	sen_maxd1;	/* maximum DMA1 length */
+	ushort	sen_maxd2;	/* maximum DMA2 length */
+	ushort	sen_maxd;	/* Rx max DMA */
+	ushort	sen_dmacnt;	/* Rx DMA counter */
+	ushort	sen_maxb;	/* Max BD byte count */
+	ushort	sen_gaddr1;	/* Group address filter */
+	ushort	sen_gaddr2;
+	ushort	sen_gaddr3;
+	ushort	sen_gaddr4;
+	uint	sen_tbuf0data0;	/* Save area 0 - current frame */
+	uint	sen_tbuf0data1;	/* Save area 1 - current frame */
+	uint	sen_tbuf0rba;	/* Internal */
+	uint	sen_tbuf0crc;	/* Internal */
+	ushort	sen_tbuf0bcnt;	/* Internal */
+	ushort	sen_paddrh;	/* physical address (MSB) */
+	ushort	sen_paddrm;
+	ushort	sen_paddrl;	/* physical address (LSB) */
+	ushort	sen_pper;	/* persistence */
+	ushort	sen_rfbdptr;	/* Rx first BD pointer */
+	ushort	sen_tfbdptr;	/* Tx first BD pointer */
+	ushort	sen_tlbdptr;	/* Tx last BD pointer */
+	uint	sen_tbuf1data0;	/* Save area 0 - current frame */
+	uint	sen_tbuf1data1;	/* Save area 1 - current frame */
+	uint	sen_tbuf1rba;	/* Internal */
+	uint	sen_tbuf1crc;	/* Internal */
+	ushort	sen_tbuf1bcnt;	/* Internal */
+	ushort	sen_txlen;	/* Tx Frame length counter */
+	ushort	sen_iaddr1;	/* Individual address filter */
+	ushort	sen_iaddr2;
+	ushort	sen_iaddr3;
+	ushort	sen_iaddr4;
+	ushort	sen_boffcnt;	/* Backoff counter */
+
+	/* NOTE: Some versions of the manual have the following items
+	 * incorrectly documented.  Below is the proper order.
+	 */
+	ushort	sen_taddrh;	/* temp address (MSB) */
+	ushort	sen_taddrm;
+	ushort	sen_taddrl;	/* temp address (LSB) */
+} scc_enet_t;
+
+/* SCC Event register as used by Ethernet.
+*/
+#define SCCE_ENET_GRA	((ushort)0x0080)	/* Graceful stop complete */
+#define SCCE_ENET_TXE	((ushort)0x0010)	/* Transmit Error */
+#define SCCE_ENET_RXF	((ushort)0x0008)	/* Full frame received */
+#define SCCE_ENET_BSY	((ushort)0x0004)	/* All incoming buffers full */
+#define SCCE_ENET_TXB	((ushort)0x0002)	/* A buffer was transmitted */
+#define SCCE_ENET_RXB	((ushort)0x0001)	/* A buffer was received */
+
+/* SCC Mode Register (PMSR) as used by Ethernet.
+*/
+#define SCC_PSMR_HBC	((ushort)0x8000)	/* Enable heartbeat */
+#define SCC_PSMR_FC	((ushort)0x4000)	/* Force collision */
+#define SCC_PSMR_RSH	((ushort)0x2000)	/* Receive short frames */
+#define SCC_PSMR_IAM	((ushort)0x1000)	/* Check individual hash */
+#define SCC_PSMR_ENCRC	((ushort)0x0800)	/* Ethernet CRC mode */
+#define SCC_PSMR_PRO	((ushort)0x0200)	/* Promiscuous mode */
+#define SCC_PSMR_BRO	((ushort)0x0100)	/* Catch broadcast pkts */
+#define SCC_PSMR_SBT	((ushort)0x0080)	/* Special backoff timer */
+#define SCC_PSMR_LPB	((ushort)0x0040)	/* Set Loopback mode */
+#define SCC_PSMR_SIP	((ushort)0x0020)	/* Sample Input Pins */
+#define SCC_PSMR_LCW	((ushort)0x0010)	/* Late collision window */
+#define SCC_PSMR_NIB22	((ushort)0x000a)	/* Start frame search */
+#define SCC_PSMR_FDE	((ushort)0x0001)	/* Full duplex enable */
+
+/* SCC as UART
+*/
+typedef struct scc_uart {
+	sccp_t	scc_genscc;
+	char	res1[8];	/* Reserved */
+	ushort	scc_maxidl;	/* Maximum idle chars */
+	ushort	scc_idlc;	/* temp idle counter */
+	ushort	scc_brkcr;	/* Break count register */
+	ushort	scc_parec;	/* receive parity error counter */
+	ushort	scc_frmec;	/* receive framing error counter */
+	ushort	scc_nosec;	/* receive noise counter */
+	ushort	scc_brkec;	/* receive break condition counter */
+	ushort	scc_brkln;	/* last received break length */
+	ushort	scc_uaddr1;	/* UART address character 1 */
+	ushort	scc_uaddr2;	/* UART address character 2 */
+	ushort	scc_rtemp;	/* Temp storage */
+	ushort	scc_toseq;	/* Transmit out of sequence char */
+	ushort	scc_char1;	/* control character 1 */
+	ushort	scc_char2;	/* control character 2 */
+	ushort	scc_char3;	/* control character 3 */
+	ushort	scc_char4;	/* control character 4 */
+	ushort	scc_char5;	/* control character 5 */
+	ushort	scc_char6;	/* control character 6 */
+	ushort	scc_char7;	/* control character 7 */
+	ushort	scc_char8;	/* control character 8 */
+	ushort	scc_rccm;	/* receive control character mask */
+	ushort	scc_rccr;	/* receive control character register */
+	ushort	scc_rlbc;	/* receive last break character */
+} scc_uart_t;
+
+/* SCC Event and Mask registers when it is used as a UART.
+*/
+#define UART_SCCM_GLR		((ushort)0x1000)
+#define UART_SCCM_GLT		((ushort)0x0800)
+#define UART_SCCM_AB		((ushort)0x0200)
+#define UART_SCCM_IDL		((ushort)0x0100)
+#define UART_SCCM_GRA		((ushort)0x0080)
+#define UART_SCCM_BRKE		((ushort)0x0040)
+#define UART_SCCM_BRKS		((ushort)0x0020)
+#define UART_SCCM_CCR		((ushort)0x0008)
+#define UART_SCCM_BSY		((ushort)0x0004)
+#define UART_SCCM_TX		((ushort)0x0002)
+#define UART_SCCM_RX		((ushort)0x0001)
+
+/* The SCC PMSR when used as a UART.
+*/
+#define SCU_PSMR_FLC		((ushort)0x8000)
+#define SCU_PSMR_SL		((ushort)0x4000)
+#define SCU_PSMR_CL		((ushort)0x3000)
+#define SCU_PSMR_UM		((ushort)0x0c00)
+#define SCU_PSMR_FRZ		((ushort)0x0200)
+#define SCU_PSMR_RZS		((ushort)0x0100)
+#define SCU_PSMR_SYN		((ushort)0x0080)
+#define SCU_PSMR_DRT		((ushort)0x0040)
+#define SCU_PSMR_PEN		((ushort)0x0010)
+#define SCU_PSMR_RPM		((ushort)0x000c)
+#define SCU_PSMR_REVP		((ushort)0x0008)
+#define SCU_PSMR_TPM		((ushort)0x0003)
+#define SCU_PSMR_TEVP		((ushort)0x0002)
+
+/* CPM Transparent mode SCC.
+ */
+typedef struct scc_trans {
+	sccp_t	st_genscc;
+	uint	st_cpres;	/* Preset CRC */
+	uint	st_cmask;	/* Constant mask for CRC */
+} scc_trans_t;
+
+/* IIC parameter RAM.
+*/
+typedef struct iic {
+	ushort	iic_rbase;	/* Rx Buffer descriptor base address */
+	ushort	iic_tbase;	/* Tx Buffer descriptor base address */
+	u_char	iic_rfcr;	/* Rx function code */
+	u_char	iic_tfcr;	/* Tx function code */
+	ushort	iic_mrblr;	/* Max receive buffer length */
+	uint	iic_rstate;	/* Internal */
+	uint	iic_rdp;	/* Internal */
+	ushort	iic_rbptr;	/* Internal */
+	ushort	iic_rbc;	/* Internal */
+	uint	iic_rxtmp;	/* Internal */
+	uint	iic_tstate;	/* Internal */
+	uint	iic_tdp;	/* Internal */
+	ushort	iic_tbptr;	/* Internal */
+	ushort	iic_tbc;	/* Internal */
+	uint	iic_txtmp;	/* Internal */
+	char	res1[4];	/* Reserved */
+	ushort	iic_rpbase;	/* Relocation pointer */
+	char	res2[2];	/* Reserved */
+} iic_t;
+
+/*
+ * RISC Controller Configuration Register definitons
+ */
+#define RCCR_TIME	0x8000			/* RISC Timer Enable */
+#define RCCR_TIMEP(t)	(((t) & 0x3F)<<8)	/* RISC Timer Period */
+#define RCCR_TIME_MASK	0x00FF			/* not RISC Timer related bits */
+
+/* RISC Timer Parameter RAM offset */
+#define PROFF_RTMR	((uint)0x01B0)
+
+typedef struct risc_timer_pram {
+	unsigned short	tm_base;	/* RISC Timer Table Base Address */
+	unsigned short	tm_ptr;		/* RISC Timer Table Pointer (internal) */
+	unsigned short	r_tmr;		/* RISC Timer Mode Register */
+	unsigned short	r_tmv;		/* RISC Timer Valid Register */
+	unsigned long	tm_cmd;		/* RISC Timer Command Register */
+	unsigned long	tm_cnt;		/* RISC Timer Internal Count */
+} rt_pram_t;
+
+/* Bits in RISC Timer Command Register */
+#define TM_CMD_VALID	0x80000000	/* Valid - Enables the timer */
+#define TM_CMD_RESTART	0x40000000	/* Restart - for automatic restart */
+#define TM_CMD_PWM	0x20000000	/* Run in Pulse Width Modulation Mode */
+#define TM_CMD_NUM(n)	(((n)&0xF)<<16)	/* Timer Number */
+#define TM_CMD_PERIOD(p) ((p)&0xFFFF)	/* Timer Period */
+
+/* CPM interrupts.  There are nearly 32 interrupts generated by CPM
+ * channels or devices.  All of these are presented to the PPC core
+ * as a single interrupt.  The CPM interrupt handler dispatches its
+ * own handlers, in a similar fashion to the PPC core handler.  We
+ * use the table as defined in the manuals (i.e. no special high
+ * priority and SCC1 == SCCa, etc...).
+ */
+#define CPMVEC_NR		32
+#define	CPMVEC_PIO_PC15		((ushort)0x1f)
+#define	CPMVEC_SCC1		((ushort)0x1e)
+#define	CPMVEC_SCC2		((ushort)0x1d)
+#define	CPMVEC_SCC3		((ushort)0x1c)
+#define	CPMVEC_SCC4		((ushort)0x1b)
+#define	CPMVEC_PIO_PC14		((ushort)0x1a)
+#define	CPMVEC_TIMER1		((ushort)0x19)
+#define	CPMVEC_PIO_PC13		((ushort)0x18)
+#define	CPMVEC_PIO_PC12		((ushort)0x17)
+#define	CPMVEC_SDMA_CB_ERR	((ushort)0x16)
+#define CPMVEC_IDMA1		((ushort)0x15)
+#define CPMVEC_IDMA2		((ushort)0x14)
+#define CPMVEC_TIMER2		((ushort)0x12)
+#define CPMVEC_RISCTIMER	((ushort)0x11)
+#define CPMVEC_I2C		((ushort)0x10)
+#define	CPMVEC_PIO_PC11		((ushort)0x0f)
+#define	CPMVEC_PIO_PC10		((ushort)0x0e)
+#define CPMVEC_TIMER3		((ushort)0x0c)
+#define	CPMVEC_PIO_PC9		((ushort)0x0b)
+#define	CPMVEC_PIO_PC8		((ushort)0x0a)
+#define	CPMVEC_PIO_PC7		((ushort)0x09)
+#define CPMVEC_TIMER4		((ushort)0x07)
+#define	CPMVEC_PIO_PC6		((ushort)0x06)
+#define	CPMVEC_SPI		((ushort)0x05)
+#define	CPMVEC_SMC1		((ushort)0x04)
+#define	CPMVEC_SMC2		((ushort)0x03)
+#define	CPMVEC_PIO_PC5		((ushort)0x02)
+#define	CPMVEC_PIO_PC4		((ushort)0x01)
+#define	CPMVEC_ERROR		((ushort)0x00)
+
+/* CPM interrupt configuration vector.
+*/
+#define	CICR_SCD_SCC4		((uint)0x00c00000)	/* SCC4 @ SCCd */
+#define	CICR_SCC_SCC3		((uint)0x00200000)	/* SCC3 @ SCCc */
+#define	CICR_SCB_SCC2		((uint)0x00040000)	/* SCC2 @ SCCb */
+#define	CICR_SCA_SCC1		((uint)0x00000000)	/* SCC1 @ SCCa */
+#define CICR_IRL_MASK		((uint)0x0000e000)	/* Core interrupt */
+#define CICR_HP_MASK		((uint)0x00001f00)	/* Hi-pri int. */
+#define CICR_IEN		((uint)0x00000080)	/* Int. enable */
+#define CICR_SPS		((uint)0x00000001)	/* SCC Spread */
+
+#define CPM_PIN_INPUT     0
+#define CPM_PIN_OUTPUT    1
+#define CPM_PIN_PRIMARY   0
+#define CPM_PIN_SECONDARY 2
+#define CPM_PIN_GPIO      4
+#define CPM_PIN_OPENDRAIN 8
+#define CPM_PIN_FALLEDGE  16
+#define CPM_PIN_ANYEDGE   0
+
+enum cpm_port {
+	CPM_PORTA,
+	CPM_PORTB,
+	CPM_PORTC,
+	CPM_PORTD,
+	CPM_PORTE,
+};
+
+void cpm1_set_pin(enum cpm_port port, int pin, int flags);
+
+enum cpm_clk_dir {
+	CPM_CLK_RX,
+	CPM_CLK_TX,
+	CPM_CLK_RTX
+};
+
+enum cpm_clk_target {
+	CPM_CLK_SCC1,
+	CPM_CLK_SCC2,
+	CPM_CLK_SCC3,
+	CPM_CLK_SCC4,
+	CPM_CLK_SMC1,
+	CPM_CLK_SMC2,
+};
+
+enum cpm_clk {
+	CPM_BRG1,	/* Baud Rate Generator  1 */
+	CPM_BRG2,	/* Baud Rate Generator  2 */
+	CPM_BRG3,	/* Baud Rate Generator  3 */
+	CPM_BRG4,	/* Baud Rate Generator  4 */
+	CPM_CLK1,	/* Clock  1 */
+	CPM_CLK2,	/* Clock  2 */
+	CPM_CLK3,	/* Clock  3 */
+	CPM_CLK4,	/* Clock  4 */
+	CPM_CLK5,	/* Clock  5 */
+	CPM_CLK6,	/* Clock  6 */
+	CPM_CLK7,	/* Clock  7 */
+	CPM_CLK8,	/* Clock  8 */
+};
+
+int cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode);
+int cpm1_gpiochip_add16(struct device *dev);
+int cpm1_gpiochip_add32(struct device *dev);
+
+#endif /* __CPM1__ */
diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h
new file mode 100644
index 0000000000..249d43cc64
--- /dev/null
+++ b/arch/powerpc/include/asm/cpm2.h
@@ -0,0 +1,1152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Communication Processor Module v2.
+ *
+ * This file contains structures and information for the communication
+ * processor channels found in the dual port RAM or parameter RAM.
+ * All CPM control and status is available through the CPM2 internal
+ * memory map.  See immap_cpm2.h for details.
+ */
+#ifdef __KERNEL__
+#ifndef __CPM2__
+#define __CPM2__
+
+#include <asm/immap_cpm2.h>
+#include <asm/cpm.h>
+#include <sysdev/fsl_soc.h>
+
+/* CPM Command register.
+*/
+#define CPM_CR_RST	((uint)0x80000000)
+#define CPM_CR_PAGE	((uint)0x7c000000)
+#define CPM_CR_SBLOCK	((uint)0x03e00000)
+#define CPM_CR_FLG	((uint)0x00010000)
+#define CPM_CR_MCN	((uint)0x00003fc0)
+#define CPM_CR_OPCODE	((uint)0x0000000f)
+
+/* Device sub-block and page codes.
+*/
+#define CPM_CR_SCC1_SBLOCK	(0x04)
+#define CPM_CR_SCC2_SBLOCK	(0x05)
+#define CPM_CR_SCC3_SBLOCK	(0x06)
+#define CPM_CR_SCC4_SBLOCK	(0x07)
+#define CPM_CR_SMC1_SBLOCK	(0x08)
+#define CPM_CR_SMC2_SBLOCK	(0x09)
+#define CPM_CR_SPI_SBLOCK	(0x0a)
+#define CPM_CR_I2C_SBLOCK	(0x0b)
+#define CPM_CR_TIMER_SBLOCK	(0x0f)
+#define CPM_CR_RAND_SBLOCK	(0x0e)
+#define CPM_CR_FCC1_SBLOCK	(0x10)
+#define CPM_CR_FCC2_SBLOCK	(0x11)
+#define CPM_CR_FCC3_SBLOCK	(0x12)
+#define CPM_CR_IDMA1_SBLOCK	(0x14)
+#define CPM_CR_IDMA2_SBLOCK	(0x15)
+#define CPM_CR_IDMA3_SBLOCK	(0x16)
+#define CPM_CR_IDMA4_SBLOCK	(0x17)
+#define CPM_CR_MCC1_SBLOCK	(0x1c)
+
+#define CPM_CR_FCC_SBLOCK(x)	(x + 0x10)
+
+#define CPM_CR_SCC1_PAGE	(0x00)
+#define CPM_CR_SCC2_PAGE	(0x01)
+#define CPM_CR_SCC3_PAGE	(0x02)
+#define CPM_CR_SCC4_PAGE	(0x03)
+#define CPM_CR_SMC1_PAGE	(0x07)
+#define CPM_CR_SMC2_PAGE	(0x08)
+#define CPM_CR_SPI_PAGE		(0x09)
+#define CPM_CR_I2C_PAGE		(0x0a)
+#define CPM_CR_TIMER_PAGE	(0x0a)
+#define CPM_CR_RAND_PAGE	(0x0a)
+#define CPM_CR_FCC1_PAGE	(0x04)
+#define CPM_CR_FCC2_PAGE	(0x05)
+#define CPM_CR_FCC3_PAGE	(0x06)
+#define CPM_CR_IDMA1_PAGE	(0x07)
+#define CPM_CR_IDMA2_PAGE	(0x08)
+#define CPM_CR_IDMA3_PAGE	(0x09)
+#define CPM_CR_IDMA4_PAGE	(0x0a)
+#define CPM_CR_MCC1_PAGE	(0x07)
+#define CPM_CR_MCC2_PAGE	(0x08)
+
+#define CPM_CR_FCC_PAGE(x)	(x + 0x04)
+
+/* CPM2-specific opcodes (see cpm.h for common opcodes)
+*/
+#define CPM_CR_START_IDMA	((ushort)0x0009)
+
+#define mk_cr_cmd(PG, SBC, MCN, OP) \
+	((PG << 26) | (SBC << 21) | (MCN << 6) | OP)
+
+/* The number of pages of host memory we allocate for CPM.  This is
+ * done early in kernel initialization to get physically contiguous
+ * pages.
+ */
+#define NUM_CPM_HOST_PAGES	2
+
+/* Export the base address of the communication processor registers
+ * and dual port ram.
+ */
+extern cpm_cpm2_t __iomem *cpmp; /* Pointer to comm processor */
+
+#define cpm_dpalloc cpm_muram_alloc
+#define cpm_dpfree cpm_muram_free
+#define cpm_dpram_addr cpm_muram_addr
+
+extern void cpm2_reset(void);
+
+/* Baud rate generators.
+*/
+#define CPM_BRG_RST		((uint)0x00020000)
+#define CPM_BRG_EN		((uint)0x00010000)
+#define CPM_BRG_EXTC_INT	((uint)0x00000000)
+#define CPM_BRG_EXTC_CLK3_9	((uint)0x00004000)
+#define CPM_BRG_EXTC_CLK5_15	((uint)0x00008000)
+#define CPM_BRG_ATB		((uint)0x00002000)
+#define CPM_BRG_CD_MASK		((uint)0x00001ffe)
+#define CPM_BRG_DIV16		((uint)0x00000001)
+
+#define CPM2_BRG_INT_CLK	(get_brgfreq())
+#define CPM2_BRG_UART_CLK	(CPM2_BRG_INT_CLK/16)
+
+extern void __cpm2_setbrg(uint brg, uint rate, uint clk, int div16, int src);
+
+/* This function is used by UARTS, or anything else that uses a 16x
+ * oversampled clock.
+ */
+static inline void cpm_setbrg(uint brg, uint rate)
+{
+	__cpm2_setbrg(brg, rate, CPM2_BRG_UART_CLK, 0, CPM_BRG_EXTC_INT);
+}
+
+/* This function is used to set high speed synchronous baud rate
+ * clocks.
+ */
+static inline void cpm2_fastbrg(uint brg, uint rate, int div16)
+{
+	__cpm2_setbrg(brg, rate, CPM2_BRG_INT_CLK, div16, CPM_BRG_EXTC_INT);
+}
+
+/* Parameter RAM offsets from the base.
+*/
+#define PROFF_SCC1		((uint)0x8000)
+#define PROFF_SCC2		((uint)0x8100)
+#define PROFF_SCC3		((uint)0x8200)
+#define PROFF_SCC4		((uint)0x8300)
+#define PROFF_FCC1		((uint)0x8400)
+#define PROFF_FCC2		((uint)0x8500)
+#define PROFF_FCC3		((uint)0x8600)
+#define PROFF_MCC1		((uint)0x8700)
+#define PROFF_SMC1_BASE		((uint)0x87fc)
+#define PROFF_IDMA1_BASE	((uint)0x87fe)
+#define PROFF_MCC2		((uint)0x8800)
+#define PROFF_SMC2_BASE		((uint)0x88fc)
+#define PROFF_IDMA2_BASE	((uint)0x88fe)
+#define PROFF_SPI_BASE		((uint)0x89fc)
+#define PROFF_IDMA3_BASE	((uint)0x89fe)
+#define PROFF_TIMERS		((uint)0x8ae0)
+#define PROFF_REVNUM		((uint)0x8af0)
+#define PROFF_RAND		((uint)0x8af8)
+#define PROFF_I2C_BASE		((uint)0x8afc)
+#define PROFF_IDMA4_BASE	((uint)0x8afe)
+
+#define PROFF_SCC_SIZE		((uint)0x100)
+#define PROFF_FCC_SIZE		((uint)0x100)
+#define PROFF_SMC_SIZE		((uint)64)
+
+/* The SMCs are relocated to any of the first eight DPRAM pages.
+ * We will fix these at the first locations of DPRAM, until we
+ * get some microcode patches :-).
+ * The parameter ram space for the SMCs is fifty-some bytes, and
+ * they are required to start on a 64 byte boundary.
+ */
+#define PROFF_SMC1	(0)
+#define PROFF_SMC2	(64)
+
+
+/* Define enough so I can at least use the serial port as a UART.
+ */
+typedef struct smc_uart {
+	ushort	smc_rbase;	/* Rx Buffer descriptor base address */
+	ushort	smc_tbase;	/* Tx Buffer descriptor base address */
+	u_char	smc_rfcr;	/* Rx function code */
+	u_char	smc_tfcr;	/* Tx function code */
+	ushort	smc_mrblr;	/* Max receive buffer length */
+	uint	smc_rstate;	/* Internal */
+	uint	smc_idp;	/* Internal */
+	ushort	smc_rbptr;	/* Internal */
+	ushort	smc_ibc;	/* Internal */
+	uint	smc_rxtmp;	/* Internal */
+	uint	smc_tstate;	/* Internal */
+	uint	smc_tdp;	/* Internal */
+	ushort	smc_tbptr;	/* Internal */
+	ushort	smc_tbc;	/* Internal */
+	uint	smc_txtmp;	/* Internal */
+	ushort	smc_maxidl;	/* Maximum idle characters */
+	ushort	smc_tmpidl;	/* Temporary idle counter */
+	ushort	smc_brklen;	/* Last received break length */
+	ushort	smc_brkec;	/* rcv'd break condition counter */
+	ushort	smc_brkcr;	/* xmt break count register */
+	ushort	smc_rmask;	/* Temporary bit mask */
+	uint	smc_stmp;	/* SDMA Temp */
+} smc_uart_t;
+
+/* SMC uart mode register (Internal memory map).
+*/
+#define SMCMR_REN	((ushort)0x0001)
+#define SMCMR_TEN	((ushort)0x0002)
+#define SMCMR_DM	((ushort)0x000c)
+#define SMCMR_SM_GCI	((ushort)0x0000)
+#define SMCMR_SM_UART	((ushort)0x0020)
+#define SMCMR_SM_TRANS	((ushort)0x0030)
+#define SMCMR_SM_MASK	((ushort)0x0030)
+#define SMCMR_PM_EVEN	((ushort)0x0100)	/* Even parity, else odd */
+#define SMCMR_REVD	SMCMR_PM_EVEN
+#define SMCMR_PEN	((ushort)0x0200)	/* Parity enable */
+#define SMCMR_BS	SMCMR_PEN
+#define SMCMR_SL	((ushort)0x0400)	/* Two stops, else one */
+#define SMCR_CLEN_MASK	((ushort)0x7800)	/* Character length */
+#define smcr_mk_clen(C)	(((C) << 11) & SMCR_CLEN_MASK)
+
+/* SMC Event and Mask register.
+*/
+#define SMCM_BRKE       ((unsigned char)0x40)   /* When in UART Mode */
+#define SMCM_BRK        ((unsigned char)0x10)   /* When in UART Mode */
+#define SMCM_TXE	((unsigned char)0x10)
+#define SMCM_BSY	((unsigned char)0x04)
+#define SMCM_TX		((unsigned char)0x02)
+#define SMCM_RX		((unsigned char)0x01)
+
+/* SCCs.
+*/
+#define SCC_GSMRH_IRP		((uint)0x00040000)
+#define SCC_GSMRH_GDE		((uint)0x00010000)
+#define SCC_GSMRH_TCRC_CCITT	((uint)0x00008000)
+#define SCC_GSMRH_TCRC_BISYNC	((uint)0x00004000)
+#define SCC_GSMRH_TCRC_HDLC	((uint)0x00000000)
+#define SCC_GSMRH_REVD		((uint)0x00002000)
+#define SCC_GSMRH_TRX		((uint)0x00001000)
+#define SCC_GSMRH_TTX		((uint)0x00000800)
+#define SCC_GSMRH_CDP		((uint)0x00000400)
+#define SCC_GSMRH_CTSP		((uint)0x00000200)
+#define SCC_GSMRH_CDS		((uint)0x00000100)
+#define SCC_GSMRH_CTSS		((uint)0x00000080)
+#define SCC_GSMRH_TFL		((uint)0x00000040)
+#define SCC_GSMRH_RFW		((uint)0x00000020)
+#define SCC_GSMRH_TXSY		((uint)0x00000010)
+#define SCC_GSMRH_SYNL16	((uint)0x0000000c)
+#define SCC_GSMRH_SYNL8		((uint)0x00000008)
+#define SCC_GSMRH_SYNL4		((uint)0x00000004)
+#define SCC_GSMRH_RTSM		((uint)0x00000002)
+#define SCC_GSMRH_RSYN		((uint)0x00000001)
+
+#define SCC_GSMRL_SIR		((uint)0x80000000)	/* SCC2 only */
+#define SCC_GSMRL_EDGE_NONE	((uint)0x60000000)
+#define SCC_GSMRL_EDGE_NEG	((uint)0x40000000)
+#define SCC_GSMRL_EDGE_POS	((uint)0x20000000)
+#define SCC_GSMRL_EDGE_BOTH	((uint)0x00000000)
+#define SCC_GSMRL_TCI		((uint)0x10000000)
+#define SCC_GSMRL_TSNC_3	((uint)0x0c000000)
+#define SCC_GSMRL_TSNC_4	((uint)0x08000000)
+#define SCC_GSMRL_TSNC_14	((uint)0x04000000)
+#define SCC_GSMRL_TSNC_INF	((uint)0x00000000)
+#define SCC_GSMRL_RINV		((uint)0x02000000)
+#define SCC_GSMRL_TINV		((uint)0x01000000)
+#define SCC_GSMRL_TPL_128	((uint)0x00c00000)
+#define SCC_GSMRL_TPL_64	((uint)0x00a00000)
+#define SCC_GSMRL_TPL_48	((uint)0x00800000)
+#define SCC_GSMRL_TPL_32	((uint)0x00600000)
+#define SCC_GSMRL_TPL_16	((uint)0x00400000)
+#define SCC_GSMRL_TPL_8		((uint)0x00200000)
+#define SCC_GSMRL_TPL_NONE	((uint)0x00000000)
+#define SCC_GSMRL_TPP_ALL1	((uint)0x00180000)
+#define SCC_GSMRL_TPP_01	((uint)0x00100000)
+#define SCC_GSMRL_TPP_10	((uint)0x00080000)
+#define SCC_GSMRL_TPP_ZEROS	((uint)0x00000000)
+#define SCC_GSMRL_TEND		((uint)0x00040000)
+#define SCC_GSMRL_TDCR_32	((uint)0x00030000)
+#define SCC_GSMRL_TDCR_16	((uint)0x00020000)
+#define SCC_GSMRL_TDCR_8	((uint)0x00010000)
+#define SCC_GSMRL_TDCR_1	((uint)0x00000000)
+#define SCC_GSMRL_RDCR_32	((uint)0x0000c000)
+#define SCC_GSMRL_RDCR_16	((uint)0x00008000)
+#define SCC_GSMRL_RDCR_8	((uint)0x00004000)
+#define SCC_GSMRL_RDCR_1	((uint)0x00000000)
+#define SCC_GSMRL_RENC_DFMAN	((uint)0x00003000)
+#define SCC_GSMRL_RENC_MANCH	((uint)0x00002000)
+#define SCC_GSMRL_RENC_FM0	((uint)0x00001000)
+#define SCC_GSMRL_RENC_NRZI	((uint)0x00000800)
+#define SCC_GSMRL_RENC_NRZ	((uint)0x00000000)
+#define SCC_GSMRL_TENC_DFMAN	((uint)0x00000600)
+#define SCC_GSMRL_TENC_MANCH	((uint)0x00000400)
+#define SCC_GSMRL_TENC_FM0	((uint)0x00000200)
+#define SCC_GSMRL_TENC_NRZI	((uint)0x00000100)
+#define SCC_GSMRL_TENC_NRZ	((uint)0x00000000)
+#define SCC_GSMRL_DIAG_LE	((uint)0x000000c0)	/* Loop and echo */
+#define SCC_GSMRL_DIAG_ECHO	((uint)0x00000080)
+#define SCC_GSMRL_DIAG_LOOP	((uint)0x00000040)
+#define SCC_GSMRL_DIAG_NORM	((uint)0x00000000)
+#define SCC_GSMRL_ENR		((uint)0x00000020)
+#define SCC_GSMRL_ENT		((uint)0x00000010)
+#define SCC_GSMRL_MODE_ENET	((uint)0x0000000c)
+#define SCC_GSMRL_MODE_DDCMP	((uint)0x00000009)
+#define SCC_GSMRL_MODE_BISYNC	((uint)0x00000008)
+#define SCC_GSMRL_MODE_V14	((uint)0x00000007)
+#define SCC_GSMRL_MODE_AHDLC	((uint)0x00000006)
+#define SCC_GSMRL_MODE_PROFIBUS	((uint)0x00000005)
+#define SCC_GSMRL_MODE_UART	((uint)0x00000004)
+#define SCC_GSMRL_MODE_SS7	((uint)0x00000003)
+#define SCC_GSMRL_MODE_ATALK	((uint)0x00000002)
+#define SCC_GSMRL_MODE_HDLC	((uint)0x00000000)
+
+#define SCC_TODR_TOD		((ushort)0x8000)
+
+/* SCC Event and Mask register.
+*/
+#define SCCM_TXE	((unsigned char)0x10)
+#define SCCM_BSY	((unsigned char)0x04)
+#define SCCM_TX		((unsigned char)0x02)
+#define SCCM_RX		((unsigned char)0x01)
+
+typedef struct scc_param {
+	ushort	scc_rbase;	/* Rx Buffer descriptor base address */
+	ushort	scc_tbase;	/* Tx Buffer descriptor base address */
+	u_char	scc_rfcr;	/* Rx function code */
+	u_char	scc_tfcr;	/* Tx function code */
+	ushort	scc_mrblr;	/* Max receive buffer length */
+	uint	scc_rstate;	/* Internal */
+	uint	scc_idp;	/* Internal */
+	ushort	scc_rbptr;	/* Internal */
+	ushort	scc_ibc;	/* Internal */
+	uint	scc_rxtmp;	/* Internal */
+	uint	scc_tstate;	/* Internal */
+	uint	scc_tdp;	/* Internal */
+	ushort	scc_tbptr;	/* Internal */
+	ushort	scc_tbc;	/* Internal */
+	uint	scc_txtmp;	/* Internal */
+	uint	scc_rcrc;	/* Internal */
+	uint	scc_tcrc;	/* Internal */
+} sccp_t;
+
+/* Function code bits.
+*/
+#define SCC_EB	((u_char) 0x10)	/* Set big endian byte order */
+#define SCC_GBL	((u_char) 0x20) /* Snooping enabled */
+
+/* CPM Ethernet through SCC1.
+ */
+typedef struct scc_enet {
+	sccp_t	sen_genscc;
+	uint	sen_cpres;	/* Preset CRC */
+	uint	sen_cmask;	/* Constant mask for CRC */
+	uint	sen_crcec;	/* CRC Error counter */
+	uint	sen_alec;	/* alignment error counter */
+	uint	sen_disfc;	/* discard frame counter */
+	ushort	sen_pads;	/* Tx short frame pad character */
+	ushort	sen_retlim;	/* Retry limit threshold */
+	ushort	sen_retcnt;	/* Retry limit counter */
+	ushort	sen_maxflr;	/* maximum frame length register */
+	ushort	sen_minflr;	/* minimum frame length register */
+	ushort	sen_maxd1;	/* maximum DMA1 length */
+	ushort	sen_maxd2;	/* maximum DMA2 length */
+	ushort	sen_maxd;	/* Rx max DMA */
+	ushort	sen_dmacnt;	/* Rx DMA counter */
+	ushort	sen_maxb;	/* Max BD byte count */
+	ushort	sen_gaddr1;	/* Group address filter */
+	ushort	sen_gaddr2;
+	ushort	sen_gaddr3;
+	ushort	sen_gaddr4;
+	uint	sen_tbuf0data0;	/* Save area 0 - current frame */
+	uint	sen_tbuf0data1;	/* Save area 1 - current frame */
+	uint	sen_tbuf0rba;	/* Internal */
+	uint	sen_tbuf0crc;	/* Internal */
+	ushort	sen_tbuf0bcnt;	/* Internal */
+	ushort	sen_paddrh;	/* physical address (MSB) */
+	ushort	sen_paddrm;
+	ushort	sen_paddrl;	/* physical address (LSB) */
+	ushort	sen_pper;	/* persistence */
+	ushort	sen_rfbdptr;	/* Rx first BD pointer */
+	ushort	sen_tfbdptr;	/* Tx first BD pointer */
+	ushort	sen_tlbdptr;	/* Tx last BD pointer */
+	uint	sen_tbuf1data0;	/* Save area 0 - current frame */
+	uint	sen_tbuf1data1;	/* Save area 1 - current frame */
+	uint	sen_tbuf1rba;	/* Internal */
+	uint	sen_tbuf1crc;	/* Internal */
+	ushort	sen_tbuf1bcnt;	/* Internal */
+	ushort	sen_txlen;	/* Tx Frame length counter */
+	ushort	sen_iaddr1;	/* Individual address filter */
+	ushort	sen_iaddr2;
+	ushort	sen_iaddr3;
+	ushort	sen_iaddr4;
+	ushort	sen_boffcnt;	/* Backoff counter */
+
+	/* NOTE: Some versions of the manual have the following items
+	 * incorrectly documented.  Below is the proper order.
+	 */
+	ushort	sen_taddrh;	/* temp address (MSB) */
+	ushort	sen_taddrm;
+	ushort	sen_taddrl;	/* temp address (LSB) */
+} scc_enet_t;
+
+
+/* SCC Event register as used by Ethernet.
+*/
+#define SCCE_ENET_GRA	((ushort)0x0080)	/* Graceful stop complete */
+#define SCCE_ENET_TXE	((ushort)0x0010)	/* Transmit Error */
+#define SCCE_ENET_RXF	((ushort)0x0008)	/* Full frame received */
+#define SCCE_ENET_BSY	((ushort)0x0004)	/* All incoming buffers full */
+#define SCCE_ENET_TXB	((ushort)0x0002)	/* A buffer was transmitted */
+#define SCCE_ENET_RXB	((ushort)0x0001)	/* A buffer was received */
+
+/* SCC Mode Register (PSMR) as used by Ethernet.
+*/
+#define SCC_PSMR_HBC	((ushort)0x8000)	/* Enable heartbeat */
+#define SCC_PSMR_FC	((ushort)0x4000)	/* Force collision */
+#define SCC_PSMR_RSH	((ushort)0x2000)	/* Receive short frames */
+#define SCC_PSMR_IAM	((ushort)0x1000)	/* Check individual hash */
+#define SCC_PSMR_ENCRC	((ushort)0x0800)	/* Ethernet CRC mode */
+#define SCC_PSMR_PRO	((ushort)0x0200)	/* Promiscuous mode */
+#define SCC_PSMR_BRO	((ushort)0x0100)	/* Catch broadcast pkts */
+#define SCC_PSMR_SBT	((ushort)0x0080)	/* Special backoff timer */
+#define SCC_PSMR_LPB	((ushort)0x0040)	/* Set Loopback mode */
+#define SCC_PSMR_SIP	((ushort)0x0020)	/* Sample Input Pins */
+#define SCC_PSMR_LCW	((ushort)0x0010)	/* Late collision window */
+#define SCC_PSMR_NIB22	((ushort)0x000a)	/* Start frame search */
+#define SCC_PSMR_FDE	((ushort)0x0001)	/* Full duplex enable */
+
+/* SCC as UART
+*/
+typedef struct scc_uart {
+	sccp_t	scc_genscc;
+	uint	scc_res1;	/* Reserved */
+	uint	scc_res2;	/* Reserved */
+	ushort	scc_maxidl;	/* Maximum idle chars */
+	ushort	scc_idlc;	/* temp idle counter */
+	ushort	scc_brkcr;	/* Break count register */
+	ushort	scc_parec;	/* receive parity error counter */
+	ushort	scc_frmec;	/* receive framing error counter */
+	ushort	scc_nosec;	/* receive noise counter */
+	ushort	scc_brkec;	/* receive break condition counter */
+	ushort	scc_brkln;	/* last received break length */
+	ushort	scc_uaddr1;	/* UART address character 1 */
+	ushort	scc_uaddr2;	/* UART address character 2 */
+	ushort	scc_rtemp;	/* Temp storage */
+	ushort	scc_toseq;	/* Transmit out of sequence char */
+	ushort	scc_char1;	/* control character 1 */
+	ushort	scc_char2;	/* control character 2 */
+	ushort	scc_char3;	/* control character 3 */
+	ushort	scc_char4;	/* control character 4 */
+	ushort	scc_char5;	/* control character 5 */
+	ushort	scc_char6;	/* control character 6 */
+	ushort	scc_char7;	/* control character 7 */
+	ushort	scc_char8;	/* control character 8 */
+	ushort	scc_rccm;	/* receive control character mask */
+	ushort	scc_rccr;	/* receive control character register */
+	ushort	scc_rlbc;	/* receive last break character */
+} scc_uart_t;
+
+/* SCC Event and Mask registers when it is used as a UART.
+*/
+#define UART_SCCM_GLR		((ushort)0x1000)
+#define UART_SCCM_GLT		((ushort)0x0800)
+#define UART_SCCM_AB		((ushort)0x0200)
+#define UART_SCCM_IDL		((ushort)0x0100)
+#define UART_SCCM_GRA		((ushort)0x0080)
+#define UART_SCCM_BRKE		((ushort)0x0040)
+#define UART_SCCM_BRKS		((ushort)0x0020)
+#define UART_SCCM_CCR		((ushort)0x0008)
+#define UART_SCCM_BSY		((ushort)0x0004)
+#define UART_SCCM_TX		((ushort)0x0002)
+#define UART_SCCM_RX		((ushort)0x0001)
+
+/* The SCC PSMR when used as a UART.
+*/
+#define SCU_PSMR_FLC		((ushort)0x8000)
+#define SCU_PSMR_SL		((ushort)0x4000)
+#define SCU_PSMR_CL		((ushort)0x3000)
+#define SCU_PSMR_UM		((ushort)0x0c00)
+#define SCU_PSMR_FRZ		((ushort)0x0200)
+#define SCU_PSMR_RZS		((ushort)0x0100)
+#define SCU_PSMR_SYN		((ushort)0x0080)
+#define SCU_PSMR_DRT		((ushort)0x0040)
+#define SCU_PSMR_PEN		((ushort)0x0010)
+#define SCU_PSMR_RPM		((ushort)0x000c)
+#define SCU_PSMR_REVP		((ushort)0x0008)
+#define SCU_PSMR_TPM		((ushort)0x0003)
+#define SCU_PSMR_TEVP		((ushort)0x0002)
+
+/* CPM Transparent mode SCC.
+ */
+typedef struct scc_trans {
+	sccp_t	st_genscc;
+	uint	st_cpres;	/* Preset CRC */
+	uint	st_cmask;	/* Constant mask for CRC */
+} scc_trans_t;
+
+/* How about some FCCs.....
+*/
+#define FCC_GFMR_DIAG_NORM	((uint)0x00000000)
+#define FCC_GFMR_DIAG_LE	((uint)0x40000000)
+#define FCC_GFMR_DIAG_AE	((uint)0x80000000)
+#define FCC_GFMR_DIAG_ALE	((uint)0xc0000000)
+#define FCC_GFMR_TCI		((uint)0x20000000)
+#define FCC_GFMR_TRX		((uint)0x10000000)
+#define FCC_GFMR_TTX		((uint)0x08000000)
+#define FCC_GFMR_CDP		((uint)0x04000000)
+#define FCC_GFMR_CTSP		((uint)0x02000000)
+#define FCC_GFMR_CDS		((uint)0x01000000)
+#define FCC_GFMR_CTSS		((uint)0x00800000)
+#define FCC_GFMR_SYNL_NONE	((uint)0x00000000)
+#define FCC_GFMR_SYNL_AUTO	((uint)0x00004000)
+#define FCC_GFMR_SYNL_8		((uint)0x00008000)
+#define FCC_GFMR_SYNL_16	((uint)0x0000c000)
+#define FCC_GFMR_RTSM		((uint)0x00002000)
+#define FCC_GFMR_RENC_NRZ	((uint)0x00000000)
+#define FCC_GFMR_RENC_NRZI	((uint)0x00000800)
+#define FCC_GFMR_REVD		((uint)0x00000400)
+#define FCC_GFMR_TENC_NRZ	((uint)0x00000000)
+#define FCC_GFMR_TENC_NRZI	((uint)0x00000100)
+#define FCC_GFMR_TCRC_16	((uint)0x00000000)
+#define FCC_GFMR_TCRC_32	((uint)0x00000080)
+#define FCC_GFMR_ENR		((uint)0x00000020)
+#define FCC_GFMR_ENT		((uint)0x00000010)
+#define FCC_GFMR_MODE_ENET	((uint)0x0000000c)
+#define FCC_GFMR_MODE_ATM	((uint)0x0000000a)
+#define FCC_GFMR_MODE_HDLC	((uint)0x00000000)
+
+/* Generic FCC parameter ram.
+*/
+typedef struct fcc_param {
+	ushort	fcc_riptr;	/* Rx Internal temp pointer */
+	ushort	fcc_tiptr;	/* Tx Internal temp pointer */
+	ushort	fcc_res1;
+	ushort	fcc_mrblr;	/* Max receive buffer length, mod 32 bytes */
+	uint	fcc_rstate;	/* Upper byte is Func code, must be set */
+	uint	fcc_rbase;	/* Receive BD base */
+	ushort	fcc_rbdstat;	/* RxBD status */
+	ushort	fcc_rbdlen;	/* RxBD down counter */
+	uint	fcc_rdptr;	/* RxBD internal data pointer */
+	uint	fcc_tstate;	/* Upper byte is Func code, must be set */
+	uint	fcc_tbase;	/* Transmit BD base */
+	ushort	fcc_tbdstat;	/* TxBD status */
+	ushort	fcc_tbdlen;	/* TxBD down counter */
+	uint	fcc_tdptr;	/* TxBD internal data pointer */
+	uint	fcc_rbptr;	/* Rx BD Internal buf pointer */
+	uint	fcc_tbptr;	/* Tx BD Internal buf pointer */
+	uint	fcc_rcrc;	/* Rx temp CRC */
+	uint	fcc_res2;
+	uint	fcc_tcrc;	/* Tx temp CRC */
+} fccp_t;
+
+
+/* Ethernet controller through FCC.
+*/
+typedef struct fcc_enet {
+	fccp_t	fen_genfcc;
+	uint	fen_statbuf;	/* Internal status buffer */
+	uint	fen_camptr;	/* CAM address */
+	uint	fen_cmask;	/* Constant mask for CRC */
+	uint	fen_cpres;	/* Preset CRC */
+	uint	fen_crcec;	/* CRC Error counter */
+	uint	fen_alec;	/* alignment error counter */
+	uint	fen_disfc;	/* discard frame counter */
+	ushort	fen_retlim;	/* Retry limit */
+	ushort	fen_retcnt;	/* Retry counter */
+	ushort	fen_pper;	/* Persistence */
+	ushort	fen_boffcnt;	/* backoff counter */
+	uint	fen_gaddrh;	/* Group address filter, high 32-bits */
+	uint	fen_gaddrl;	/* Group address filter, low 32-bits */
+	ushort	fen_tfcstat;	/* out of sequence TxBD */
+	ushort	fen_tfclen;
+	uint	fen_tfcptr;
+	ushort	fen_mflr;	/* Maximum frame length (1518) */
+	ushort	fen_paddrh;	/* MAC address */
+	ushort	fen_paddrm;
+	ushort	fen_paddrl;
+	ushort	fen_ibdcount;	/* Internal BD counter */
+	ushort	fen_ibdstart;	/* Internal BD start pointer */
+	ushort	fen_ibdend;	/* Internal BD end pointer */
+	ushort	fen_txlen;	/* Internal Tx frame length counter */
+	uint	fen_ibdbase[8]; /* Internal use */
+	uint	fen_iaddrh;	/* Individual address filter */
+	uint	fen_iaddrl;
+	ushort	fen_minflr;	/* Minimum frame length (64) */
+	ushort	fen_taddrh;	/* Filter transfer MAC address */
+	ushort	fen_taddrm;
+	ushort	fen_taddrl;
+	ushort	fen_padptr;	/* Pointer to pad byte buffer */
+	ushort	fen_cftype;	/* control frame type */
+	ushort	fen_cfrange;	/* control frame range */
+	ushort	fen_maxb;	/* maximum BD count */
+	ushort	fen_maxd1;	/* Max DMA1 length (1520) */
+	ushort	fen_maxd2;	/* Max DMA2 length (1520) */
+	ushort	fen_maxd;	/* internal max DMA count */
+	ushort	fen_dmacnt;	/* internal DMA counter */
+	uint	fen_octc;	/* Total octect counter */
+	uint	fen_colc;	/* Total collision counter */
+	uint	fen_broc;	/* Total broadcast packet counter */
+	uint	fen_mulc;	/* Total multicast packet count */
+	uint	fen_uspc;	/* Total packets < 64 bytes */
+	uint	fen_frgc;	/* Total packets < 64 bytes with errors */
+	uint	fen_ospc;	/* Total packets > 1518 */
+	uint	fen_jbrc;	/* Total packets > 1518 with errors */
+	uint	fen_p64c;	/* Total packets == 64 bytes */
+	uint	fen_p65c;	/* Total packets 64 < bytes <= 127 */
+	uint	fen_p128c;	/* Total packets 127 < bytes <= 255 */
+	uint	fen_p256c;	/* Total packets 256 < bytes <= 511 */
+	uint	fen_p512c;	/* Total packets 512 < bytes <= 1023 */
+	uint	fen_p1024c;	/* Total packets 1024 < bytes <= 1518 */
+	uint	fen_cambuf;	/* Internal CAM buffer pointer */
+	ushort	fen_rfthr;	/* Received frames threshold */
+	ushort	fen_rfcnt;	/* Received frames count */
+} fcc_enet_t;
+
+/* FCC Event/Mask register as used by Ethernet.
+*/
+#define FCC_ENET_GRA	((ushort)0x0080)	/* Graceful stop complete */
+#define FCC_ENET_RXC	((ushort)0x0040)	/* Control Frame Received */
+#define FCC_ENET_TXC	((ushort)0x0020)	/* Out of seq. Tx sent */
+#define FCC_ENET_TXE	((ushort)0x0010)	/* Transmit Error */
+#define FCC_ENET_RXF	((ushort)0x0008)	/* Full frame received */
+#define FCC_ENET_BSY	((ushort)0x0004)	/* Busy.  Rx Frame dropped */
+#define FCC_ENET_TXB	((ushort)0x0002)	/* A buffer was transmitted */
+#define FCC_ENET_RXB	((ushort)0x0001)	/* A buffer was received */
+
+/* FCC Mode Register (FPSMR) as used by Ethernet.
+*/
+#define FCC_PSMR_HBC	((uint)0x80000000)	/* Enable heartbeat */
+#define FCC_PSMR_FC	((uint)0x40000000)	/* Force Collision */
+#define FCC_PSMR_SBT	((uint)0x20000000)	/* Stop backoff timer */
+#define FCC_PSMR_LPB	((uint)0x10000000)	/* Local protect. 1 = FDX */
+#define FCC_PSMR_LCW	((uint)0x08000000)	/* Late collision select */
+#define FCC_PSMR_FDE	((uint)0x04000000)	/* Full Duplex Enable */
+#define FCC_PSMR_MON	((uint)0x02000000)	/* RMON Enable */
+#define FCC_PSMR_PRO	((uint)0x00400000)	/* Promiscuous Enable */
+#define FCC_PSMR_FCE	((uint)0x00200000)	/* Flow Control Enable */
+#define FCC_PSMR_RSH	((uint)0x00100000)	/* Receive Short Frames */
+#define FCC_PSMR_CAM	((uint)0x00000400)	/* CAM enable */
+#define FCC_PSMR_BRO	((uint)0x00000200)	/* Broadcast pkt discard */
+#define FCC_PSMR_ENCRC	((uint)0x00000080)	/* Use 32-bit CRC */
+
+/* IIC parameter RAM.
+*/
+typedef struct iic {
+	ushort	iic_rbase;	/* Rx Buffer descriptor base address */
+	ushort	iic_tbase;	/* Tx Buffer descriptor base address */
+	u_char	iic_rfcr;	/* Rx function code */
+	u_char	iic_tfcr;	/* Tx function code */
+	ushort	iic_mrblr;	/* Max receive buffer length */
+	uint	iic_rstate;	/* Internal */
+	uint	iic_rdp;	/* Internal */
+	ushort	iic_rbptr;	/* Internal */
+	ushort	iic_rbc;	/* Internal */
+	uint	iic_rxtmp;	/* Internal */
+	uint	iic_tstate;	/* Internal */
+	uint	iic_tdp;	/* Internal */
+	ushort	iic_tbptr;	/* Internal */
+	ushort	iic_tbc;	/* Internal */
+	uint	iic_txtmp;	/* Internal */
+} iic_t;
+
+/* IDMA parameter RAM
+*/
+typedef struct idma {
+	ushort ibase;		/* IDMA buffer descriptor table base address */
+	ushort dcm;		/* DMA channel mode */
+	ushort ibdptr;		/* IDMA current buffer descriptor pointer */
+	ushort dpr_buf;		/* IDMA transfer buffer base address */
+	ushort buf_inv;		/* internal buffer inventory */
+	ushort ss_max;		/* steady-state maximum transfer size */
+	ushort dpr_in_ptr;	/* write pointer inside the internal buffer */
+	ushort sts;		/* source transfer size */
+	ushort dpr_out_ptr;	/* read pointer inside the internal buffer */
+	ushort seob;		/* source end of burst */
+	ushort deob;		/* destination end of burst */
+	ushort dts;		/* destination transfer size */
+	ushort ret_add;		/* return address when working in ERM=1 mode */
+	ushort res0;		/* reserved */
+	uint   bd_cnt;		/* internal byte count */
+	uint   s_ptr;		/* source internal data pointer */
+	uint   d_ptr;		/* destination internal data pointer */
+	uint   istate;		/* internal state */
+	u_char res1[20];	/* pad to 64-byte length */
+} idma_t;
+
+/* DMA channel mode bit fields
+*/
+#define IDMA_DCM_FB		((ushort)0x8000) /* fly-by mode */
+#define IDMA_DCM_LP		((ushort)0x4000) /* low priority */
+#define IDMA_DCM_TC2		((ushort)0x0400) /* value driven on TC[2] */
+#define IDMA_DCM_DMA_WRAP_MASK	((ushort)0x01c0) /* mask for DMA wrap */
+#define IDMA_DCM_DMA_WRAP_64	((ushort)0x0000) /* 64-byte DMA xfer buffer */
+#define IDMA_DCM_DMA_WRAP_128	((ushort)0x0040) /* 128-byte DMA xfer buffer */
+#define IDMA_DCM_DMA_WRAP_256	((ushort)0x0080) /* 256-byte DMA xfer buffer */
+#define IDMA_DCM_DMA_WRAP_512	((ushort)0x00c0) /* 512-byte DMA xfer buffer */
+#define IDMA_DCM_DMA_WRAP_1024	((ushort)0x0100) /* 1024-byte DMA xfer buffer */
+#define IDMA_DCM_DMA_WRAP_2048	((ushort)0x0140) /* 2048-byte DMA xfer buffer */
+#define IDMA_DCM_SINC		((ushort)0x0020) /* source inc addr */
+#define IDMA_DCM_DINC		((ushort)0x0010) /* destination inc addr */
+#define IDMA_DCM_ERM		((ushort)0x0008) /* external request mode */
+#define IDMA_DCM_DT		((ushort)0x0004) /* DONE treatment */
+#define IDMA_DCM_SD_MASK	((ushort)0x0003) /* mask for SD bit field */
+#define IDMA_DCM_SD_MEM2MEM	((ushort)0x0000) /* memory-to-memory xfer */
+#define IDMA_DCM_SD_PER2MEM	((ushort)0x0002) /* peripheral-to-memory xfer */
+#define IDMA_DCM_SD_MEM2PER	((ushort)0x0001) /* memory-to-peripheral xfer */
+
+/* IDMA Buffer Descriptors
+*/
+typedef struct idma_bd {
+	uint flags;
+	uint len;	/* data length */
+	uint src;	/* source data buffer pointer */
+	uint dst;	/* destination data buffer pointer */
+} idma_bd_t;
+
+/* IDMA buffer descriptor flag bit fields
+*/
+#define IDMA_BD_V	((uint)0x80000000)	/* valid */
+#define IDMA_BD_W	((uint)0x20000000)	/* wrap */
+#define IDMA_BD_I	((uint)0x10000000)	/* interrupt */
+#define IDMA_BD_L	((uint)0x08000000)	/* last */
+#define IDMA_BD_CM	((uint)0x02000000)	/* continuous mode */
+#define IDMA_BD_SDN	((uint)0x00400000)	/* source done */
+#define IDMA_BD_DDN	((uint)0x00200000)	/* destination done */
+#define IDMA_BD_DGBL	((uint)0x00100000)	/* destination global */
+#define IDMA_BD_DBO_LE	((uint)0x00040000)	/* little-end dest byte order */
+#define IDMA_BD_DBO_BE	((uint)0x00080000)	/* big-end dest byte order */
+#define IDMA_BD_DDTB	((uint)0x00010000)	/* destination data bus */
+#define IDMA_BD_SGBL	((uint)0x00002000)	/* source global */
+#define IDMA_BD_SBO_LE	((uint)0x00000800)	/* little-end src byte order */
+#define IDMA_BD_SBO_BE	((uint)0x00001000)	/* big-end src byte order */
+#define IDMA_BD_SDTB	((uint)0x00000200)	/* source data bus */
+
+/* per-channel IDMA registers
+*/
+typedef struct im_idma {
+	u_char idsr;			/* IDMAn event status register */
+	u_char res0[3];
+	u_char idmr;			/* IDMAn event mask register */
+	u_char res1[3];
+} im_idma_t;
+
+/* IDMA event register bit fields
+*/
+#define IDMA_EVENT_SC	((unsigned char)0x08)	/* stop completed */
+#define IDMA_EVENT_OB	((unsigned char)0x04)	/* out of buffers */
+#define IDMA_EVENT_EDN	((unsigned char)0x02)	/* external DONE asserted */
+#define IDMA_EVENT_BC	((unsigned char)0x01)	/* buffer descriptor complete */
+
+/* RISC Controller Configuration Register (RCCR) bit fields
+*/
+#define RCCR_TIME	((uint)0x80000000) /* timer enable */
+#define RCCR_TIMEP_MASK	((uint)0x3f000000) /* mask for timer period bit field */
+#define RCCR_DR0M	((uint)0x00800000) /* IDMA0 request mode */
+#define RCCR_DR1M	((uint)0x00400000) /* IDMA1 request mode */
+#define RCCR_DR2M	((uint)0x00000080) /* IDMA2 request mode */
+#define RCCR_DR3M	((uint)0x00000040) /* IDMA3 request mode */
+#define RCCR_DR0QP_MASK	((uint)0x00300000) /* mask for IDMA0 req priority */
+#define RCCR_DR0QP_HIGH ((uint)0x00000000) /* IDMA0 has high req priority */
+#define RCCR_DR0QP_MED	((uint)0x00100000) /* IDMA0 has medium req priority */
+#define RCCR_DR0QP_LOW	((uint)0x00200000) /* IDMA0 has low req priority */
+#define RCCR_DR1QP_MASK	((uint)0x00030000) /* mask for IDMA1 req priority */
+#define RCCR_DR1QP_HIGH ((uint)0x00000000) /* IDMA1 has high req priority */
+#define RCCR_DR1QP_MED	((uint)0x00010000) /* IDMA1 has medium req priority */
+#define RCCR_DR1QP_LOW	((uint)0x00020000) /* IDMA1 has low req priority */
+#define RCCR_DR2QP_MASK	((uint)0x00000030) /* mask for IDMA2 req priority */
+#define RCCR_DR2QP_HIGH ((uint)0x00000000) /* IDMA2 has high req priority */
+#define RCCR_DR2QP_MED	((uint)0x00000010) /* IDMA2 has medium req priority */
+#define RCCR_DR2QP_LOW	((uint)0x00000020) /* IDMA2 has low req priority */
+#define RCCR_DR3QP_MASK	((uint)0x00000003) /* mask for IDMA3 req priority */
+#define RCCR_DR3QP_HIGH ((uint)0x00000000) /* IDMA3 has high req priority */
+#define RCCR_DR3QP_MED	((uint)0x00000001) /* IDMA3 has medium req priority */
+#define RCCR_DR3QP_LOW	((uint)0x00000002) /* IDMA3 has low req priority */
+#define RCCR_EIE	((uint)0x00080000) /* external interrupt enable */
+#define RCCR_SCD	((uint)0x00040000) /* scheduler configuration */
+#define RCCR_ERAM_MASK	((uint)0x0000e000) /* mask for enable RAM microcode */
+#define RCCR_ERAM_0KB	((uint)0x00000000) /* use 0KB of dpram for microcode */
+#define RCCR_ERAM_2KB	((uint)0x00002000) /* use 2KB of dpram for microcode */
+#define RCCR_ERAM_4KB	((uint)0x00004000) /* use 4KB of dpram for microcode */
+#define RCCR_ERAM_6KB	((uint)0x00006000) /* use 6KB of dpram for microcode */
+#define RCCR_ERAM_8KB	((uint)0x00008000) /* use 8KB of dpram for microcode */
+#define RCCR_ERAM_10KB	((uint)0x0000a000) /* use 10KB of dpram for microcode */
+#define RCCR_ERAM_12KB	((uint)0x0000c000) /* use 12KB of dpram for microcode */
+#define RCCR_EDM0	((uint)0x00000800) /* DREQ0 edge detect mode */
+#define RCCR_EDM1	((uint)0x00000400) /* DREQ1 edge detect mode */
+#define RCCR_EDM2	((uint)0x00000200) /* DREQ2 edge detect mode */
+#define RCCR_EDM3	((uint)0x00000100) /* DREQ3 edge detect mode */
+#define RCCR_DEM01	((uint)0x00000008) /* DONE0/DONE1 edge detect mode */
+#define RCCR_DEM23	((uint)0x00000004) /* DONE2/DONE3 edge detect mode */
+
+/*-----------------------------------------------------------------------
+ * CMXFCR - CMX FCC Clock Route Register
+ */
+#define CMXFCR_FC1         0x40000000   /* FCC1 connection              */
+#define CMXFCR_RF1CS_MSK   0x38000000   /* Receive FCC1 Clock Source Mask */
+#define CMXFCR_TF1CS_MSK   0x07000000   /* Transmit FCC1 Clock Source Mask */
+#define CMXFCR_FC2         0x00400000   /* FCC2 connection              */
+#define CMXFCR_RF2CS_MSK   0x00380000   /* Receive FCC2 Clock Source Mask */
+#define CMXFCR_TF2CS_MSK   0x00070000   /* Transmit FCC2 Clock Source Mask */
+#define CMXFCR_FC3         0x00004000   /* FCC3 connection              */
+#define CMXFCR_RF3CS_MSK   0x00003800   /* Receive FCC3 Clock Source Mask */
+#define CMXFCR_TF3CS_MSK   0x00000700   /* Transmit FCC3 Clock Source Mask */
+
+#define CMXFCR_RF1CS_BRG5  0x00000000   /* Receive FCC1 Clock Source is BRG5 */
+#define CMXFCR_RF1CS_BRG6  0x08000000   /* Receive FCC1 Clock Source is BRG6 */
+#define CMXFCR_RF1CS_BRG7  0x10000000   /* Receive FCC1 Clock Source is BRG7 */
+#define CMXFCR_RF1CS_BRG8  0x18000000   /* Receive FCC1 Clock Source is BRG8 */
+#define CMXFCR_RF1CS_CLK9  0x20000000   /* Receive FCC1 Clock Source is CLK9 */
+#define CMXFCR_RF1CS_CLK10 0x28000000   /* Receive FCC1 Clock Source is CLK10 */
+#define CMXFCR_RF1CS_CLK11 0x30000000   /* Receive FCC1 Clock Source is CLK11 */
+#define CMXFCR_RF1CS_CLK12 0x38000000   /* Receive FCC1 Clock Source is CLK12 */
+
+#define CMXFCR_TF1CS_BRG5  0x00000000   /* Transmit FCC1 Clock Source is BRG5 */
+#define CMXFCR_TF1CS_BRG6  0x01000000   /* Transmit FCC1 Clock Source is BRG6 */
+#define CMXFCR_TF1CS_BRG7  0x02000000   /* Transmit FCC1 Clock Source is BRG7 */
+#define CMXFCR_TF1CS_BRG8  0x03000000   /* Transmit FCC1 Clock Source is BRG8 */
+#define CMXFCR_TF1CS_CLK9  0x04000000   /* Transmit FCC1 Clock Source is CLK9 */
+#define CMXFCR_TF1CS_CLK10 0x05000000   /* Transmit FCC1 Clock Source is CLK10 */
+#define CMXFCR_TF1CS_CLK11 0x06000000   /* Transmit FCC1 Clock Source is CLK11 */
+#define CMXFCR_TF1CS_CLK12 0x07000000   /* Transmit FCC1 Clock Source is CLK12 */
+
+#define CMXFCR_RF2CS_BRG5  0x00000000   /* Receive FCC2 Clock Source is BRG5 */
+#define CMXFCR_RF2CS_BRG6  0x00080000   /* Receive FCC2 Clock Source is BRG6 */
+#define CMXFCR_RF2CS_BRG7  0x00100000   /* Receive FCC2 Clock Source is BRG7 */
+#define CMXFCR_RF2CS_BRG8  0x00180000   /* Receive FCC2 Clock Source is BRG8 */
+#define CMXFCR_RF2CS_CLK13 0x00200000   /* Receive FCC2 Clock Source is CLK13 */
+#define CMXFCR_RF2CS_CLK14 0x00280000   /* Receive FCC2 Clock Source is CLK14 */
+#define CMXFCR_RF2CS_CLK15 0x00300000   /* Receive FCC2 Clock Source is CLK15 */
+#define CMXFCR_RF2CS_CLK16 0x00380000   /* Receive FCC2 Clock Source is CLK16 */
+
+#define CMXFCR_TF2CS_BRG5  0x00000000   /* Transmit FCC2 Clock Source is BRG5 */
+#define CMXFCR_TF2CS_BRG6  0x00010000   /* Transmit FCC2 Clock Source is BRG6 */
+#define CMXFCR_TF2CS_BRG7  0x00020000   /* Transmit FCC2 Clock Source is BRG7 */
+#define CMXFCR_TF2CS_BRG8  0x00030000   /* Transmit FCC2 Clock Source is BRG8 */
+#define CMXFCR_TF2CS_CLK13 0x00040000   /* Transmit FCC2 Clock Source is CLK13 */
+#define CMXFCR_TF2CS_CLK14 0x00050000   /* Transmit FCC2 Clock Source is CLK14 */
+#define CMXFCR_TF2CS_CLK15 0x00060000   /* Transmit FCC2 Clock Source is CLK15 */
+#define CMXFCR_TF2CS_CLK16 0x00070000   /* Transmit FCC2 Clock Source is CLK16 */
+
+#define CMXFCR_RF3CS_BRG5  0x00000000   /* Receive FCC3 Clock Source is BRG5 */
+#define CMXFCR_RF3CS_BRG6  0x00000800   /* Receive FCC3 Clock Source is BRG6 */
+#define CMXFCR_RF3CS_BRG7  0x00001000   /* Receive FCC3 Clock Source is BRG7 */
+#define CMXFCR_RF3CS_BRG8  0x00001800   /* Receive FCC3 Clock Source is BRG8 */
+#define CMXFCR_RF3CS_CLK13 0x00002000   /* Receive FCC3 Clock Source is CLK13 */
+#define CMXFCR_RF3CS_CLK14 0x00002800   /* Receive FCC3 Clock Source is CLK14 */
+#define CMXFCR_RF3CS_CLK15 0x00003000   /* Receive FCC3 Clock Source is CLK15 */
+#define CMXFCR_RF3CS_CLK16 0x00003800   /* Receive FCC3 Clock Source is CLK16 */
+
+#define CMXFCR_TF3CS_BRG5  0x00000000   /* Transmit FCC3 Clock Source is BRG5 */
+#define CMXFCR_TF3CS_BRG6  0x00000100   /* Transmit FCC3 Clock Source is BRG6 */
+#define CMXFCR_TF3CS_BRG7  0x00000200   /* Transmit FCC3 Clock Source is BRG7 */
+#define CMXFCR_TF3CS_BRG8  0x00000300   /* Transmit FCC3 Clock Source is BRG8 */
+#define CMXFCR_TF3CS_CLK13 0x00000400   /* Transmit FCC3 Clock Source is CLK13 */
+#define CMXFCR_TF3CS_CLK14 0x00000500   /* Transmit FCC3 Clock Source is CLK14 */
+#define CMXFCR_TF3CS_CLK15 0x00000600   /* Transmit FCC3 Clock Source is CLK15 */
+#define CMXFCR_TF3CS_CLK16 0x00000700   /* Transmit FCC3 Clock Source is CLK16 */
+
+/*-----------------------------------------------------------------------
+ * CMXSCR - CMX SCC Clock Route Register
+ */
+#define CMXSCR_GR1         0x80000000   /* Grant Support of SCC1        */
+#define CMXSCR_SC1         0x40000000   /* SCC1 connection              */
+#define CMXSCR_RS1CS_MSK   0x38000000   /* Receive SCC1 Clock Source Mask */
+#define CMXSCR_TS1CS_MSK   0x07000000   /* Transmit SCC1 Clock Source Mask */
+#define CMXSCR_GR2         0x00800000   /* Grant Support of SCC2        */
+#define CMXSCR_SC2         0x00400000   /* SCC2 connection              */
+#define CMXSCR_RS2CS_MSK   0x00380000   /* Receive SCC2 Clock Source Mask */
+#define CMXSCR_TS2CS_MSK   0x00070000   /* Transmit SCC2 Clock Source Mask */
+#define CMXSCR_GR3         0x00008000   /* Grant Support of SCC3        */
+#define CMXSCR_SC3         0x00004000   /* SCC3 connection              */
+#define CMXSCR_RS3CS_MSK   0x00003800   /* Receive SCC3 Clock Source Mask */
+#define CMXSCR_TS3CS_MSK   0x00000700   /* Transmit SCC3 Clock Source Mask */
+#define CMXSCR_GR4         0x00000080   /* Grant Support of SCC4        */
+#define CMXSCR_SC4         0x00000040   /* SCC4 connection              */
+#define CMXSCR_RS4CS_MSK   0x00000038   /* Receive SCC4 Clock Source Mask */
+#define CMXSCR_TS4CS_MSK   0x00000007   /* Transmit SCC4 Clock Source Mask */
+
+#define CMXSCR_RS1CS_BRG1  0x00000000   /* SCC1 Rx Clock Source is BRG1 */
+#define CMXSCR_RS1CS_BRG2  0x08000000   /* SCC1 Rx Clock Source is BRG2 */
+#define CMXSCR_RS1CS_BRG3  0x10000000   /* SCC1 Rx Clock Source is BRG3 */
+#define CMXSCR_RS1CS_BRG4  0x18000000   /* SCC1 Rx Clock Source is BRG4 */
+#define CMXSCR_RS1CS_CLK11 0x20000000   /* SCC1 Rx Clock Source is CLK11 */
+#define CMXSCR_RS1CS_CLK12 0x28000000   /* SCC1 Rx Clock Source is CLK12 */
+#define CMXSCR_RS1CS_CLK3  0x30000000   /* SCC1 Rx Clock Source is CLK3 */
+#define CMXSCR_RS1CS_CLK4  0x38000000   /* SCC1 Rx Clock Source is CLK4 */
+
+#define CMXSCR_TS1CS_BRG1  0x00000000   /* SCC1 Tx Clock Source is BRG1 */
+#define CMXSCR_TS1CS_BRG2  0x01000000   /* SCC1 Tx Clock Source is BRG2 */
+#define CMXSCR_TS1CS_BRG3  0x02000000   /* SCC1 Tx Clock Source is BRG3 */
+#define CMXSCR_TS1CS_BRG4  0x03000000   /* SCC1 Tx Clock Source is BRG4 */
+#define CMXSCR_TS1CS_CLK11 0x04000000   /* SCC1 Tx Clock Source is CLK11 */
+#define CMXSCR_TS1CS_CLK12 0x05000000   /* SCC1 Tx Clock Source is CLK12 */
+#define CMXSCR_TS1CS_CLK3  0x06000000   /* SCC1 Tx Clock Source is CLK3 */
+#define CMXSCR_TS1CS_CLK4  0x07000000   /* SCC1 Tx Clock Source is CLK4 */
+
+#define CMXSCR_RS2CS_BRG1  0x00000000   /* SCC2 Rx Clock Source is BRG1 */
+#define CMXSCR_RS2CS_BRG2  0x00080000   /* SCC2 Rx Clock Source is BRG2 */
+#define CMXSCR_RS2CS_BRG3  0x00100000   /* SCC2 Rx Clock Source is BRG3 */
+#define CMXSCR_RS2CS_BRG4  0x00180000   /* SCC2 Rx Clock Source is BRG4 */
+#define CMXSCR_RS2CS_CLK11 0x00200000   /* SCC2 Rx Clock Source is CLK11 */
+#define CMXSCR_RS2CS_CLK12 0x00280000   /* SCC2 Rx Clock Source is CLK12 */
+#define CMXSCR_RS2CS_CLK3  0x00300000   /* SCC2 Rx Clock Source is CLK3 */
+#define CMXSCR_RS2CS_CLK4  0x00380000   /* SCC2 Rx Clock Source is CLK4 */
+
+#define CMXSCR_TS2CS_BRG1  0x00000000   /* SCC2 Tx Clock Source is BRG1 */
+#define CMXSCR_TS2CS_BRG2  0x00010000   /* SCC2 Tx Clock Source is BRG2 */
+#define CMXSCR_TS2CS_BRG3  0x00020000   /* SCC2 Tx Clock Source is BRG3 */
+#define CMXSCR_TS2CS_BRG4  0x00030000   /* SCC2 Tx Clock Source is BRG4 */
+#define CMXSCR_TS2CS_CLK11 0x00040000   /* SCC2 Tx Clock Source is CLK11 */
+#define CMXSCR_TS2CS_CLK12 0x00050000   /* SCC2 Tx Clock Source is CLK12 */
+#define CMXSCR_TS2CS_CLK3  0x00060000   /* SCC2 Tx Clock Source is CLK3 */
+#define CMXSCR_TS2CS_CLK4  0x00070000   /* SCC2 Tx Clock Source is CLK4 */
+
+#define CMXSCR_RS3CS_BRG1  0x00000000   /* SCC3 Rx Clock Source is BRG1 */
+#define CMXSCR_RS3CS_BRG2  0x00000800   /* SCC3 Rx Clock Source is BRG2 */
+#define CMXSCR_RS3CS_BRG3  0x00001000   /* SCC3 Rx Clock Source is BRG3 */
+#define CMXSCR_RS3CS_BRG4  0x00001800   /* SCC3 Rx Clock Source is BRG4 */
+#define CMXSCR_RS3CS_CLK5  0x00002000   /* SCC3 Rx Clock Source is CLK5 */
+#define CMXSCR_RS3CS_CLK6  0x00002800   /* SCC3 Rx Clock Source is CLK6 */
+#define CMXSCR_RS3CS_CLK7  0x00003000   /* SCC3 Rx Clock Source is CLK7 */
+#define CMXSCR_RS3CS_CLK8  0x00003800   /* SCC3 Rx Clock Source is CLK8 */
+
+#define CMXSCR_TS3CS_BRG1  0x00000000   /* SCC3 Tx Clock Source is BRG1 */
+#define CMXSCR_TS3CS_BRG2  0x00000100   /* SCC3 Tx Clock Source is BRG2 */
+#define CMXSCR_TS3CS_BRG3  0x00000200   /* SCC3 Tx Clock Source is BRG3 */
+#define CMXSCR_TS3CS_BRG4  0x00000300   /* SCC3 Tx Clock Source is BRG4 */
+#define CMXSCR_TS3CS_CLK5  0x00000400   /* SCC3 Tx Clock Source is CLK5 */
+#define CMXSCR_TS3CS_CLK6  0x00000500   /* SCC3 Tx Clock Source is CLK6 */
+#define CMXSCR_TS3CS_CLK7  0x00000600   /* SCC3 Tx Clock Source is CLK7 */
+#define CMXSCR_TS3CS_CLK8  0x00000700   /* SCC3 Tx Clock Source is CLK8 */
+
+#define CMXSCR_RS4CS_BRG1  0x00000000   /* SCC4 Rx Clock Source is BRG1 */
+#define CMXSCR_RS4CS_BRG2  0x00000008   /* SCC4 Rx Clock Source is BRG2 */
+#define CMXSCR_RS4CS_BRG3  0x00000010   /* SCC4 Rx Clock Source is BRG3 */
+#define CMXSCR_RS4CS_BRG4  0x00000018   /* SCC4 Rx Clock Source is BRG4 */
+#define CMXSCR_RS4CS_CLK5  0x00000020   /* SCC4 Rx Clock Source is CLK5 */
+#define CMXSCR_RS4CS_CLK6  0x00000028   /* SCC4 Rx Clock Source is CLK6 */
+#define CMXSCR_RS4CS_CLK7  0x00000030   /* SCC4 Rx Clock Source is CLK7 */
+#define CMXSCR_RS4CS_CLK8  0x00000038   /* SCC4 Rx Clock Source is CLK8 */
+
+#define CMXSCR_TS4CS_BRG1  0x00000000   /* SCC4 Tx Clock Source is BRG1 */
+#define CMXSCR_TS4CS_BRG2  0x00000001   /* SCC4 Tx Clock Source is BRG2 */
+#define CMXSCR_TS4CS_BRG3  0x00000002   /* SCC4 Tx Clock Source is BRG3 */
+#define CMXSCR_TS4CS_BRG4  0x00000003   /* SCC4 Tx Clock Source is BRG4 */
+#define CMXSCR_TS4CS_CLK5  0x00000004   /* SCC4 Tx Clock Source is CLK5 */
+#define CMXSCR_TS4CS_CLK6  0x00000005   /* SCC4 Tx Clock Source is CLK6 */
+#define CMXSCR_TS4CS_CLK7  0x00000006   /* SCC4 Tx Clock Source is CLK7 */
+#define CMXSCR_TS4CS_CLK8  0x00000007   /* SCC4 Tx Clock Source is CLK8 */
+
+/*-----------------------------------------------------------------------
+ * SIUMCR - SIU Module Configuration Register				 4-31
+ */
+#define SIUMCR_BBD	0x80000000	/* Bus Busy Disable		*/
+#define SIUMCR_ESE	0x40000000	/* External Snoop Enable	*/
+#define SIUMCR_PBSE	0x20000000	/* Parity Byte Select Enable	*/
+#define SIUMCR_CDIS	0x10000000	/* Core Disable			*/
+#define SIUMCR_DPPC00	0x00000000	/* Data Parity Pins Configuration*/
+#define SIUMCR_DPPC01	0x04000000	/* - " -			*/
+#define SIUMCR_DPPC10	0x08000000	/* - " -			*/
+#define SIUMCR_DPPC11	0x0c000000	/* - " -			*/
+#define SIUMCR_L2CPC00	0x00000000	/* L2 Cache Pins Configuration	*/
+#define SIUMCR_L2CPC01	0x01000000	/* - " -			*/
+#define SIUMCR_L2CPC10	0x02000000	/* - " -			*/
+#define SIUMCR_L2CPC11	0x03000000	/* - " -			*/
+#define SIUMCR_LBPC00	0x00000000	/* Local Bus Pins Configuration	*/
+#define SIUMCR_LBPC01	0x00400000	/* - " -			*/
+#define SIUMCR_LBPC10	0x00800000	/* - " -			*/
+#define SIUMCR_LBPC11	0x00c00000	/* - " -			*/
+#define SIUMCR_APPC00	0x00000000	/* Address Parity Pins Configuration*/
+#define SIUMCR_APPC01	0x00100000	/* - " -			*/
+#define SIUMCR_APPC10	0x00200000	/* - " -			*/
+#define SIUMCR_APPC11	0x00300000	/* - " -			*/
+#define SIUMCR_CS10PC00	0x00000000	/* CS10 Pin Configuration	*/
+#define SIUMCR_CS10PC01	0x00040000	/* - " -			*/
+#define SIUMCR_CS10PC10	0x00080000	/* - " -			*/
+#define SIUMCR_CS10PC11	0x000c0000	/* - " -			*/
+#define SIUMCR_BCTLC00	0x00000000	/* Buffer Control Configuration	*/
+#define SIUMCR_BCTLC01	0x00010000	/* - " -			*/
+#define SIUMCR_BCTLC10	0x00020000	/* - " -			*/
+#define SIUMCR_BCTLC11	0x00030000	/* - " -			*/
+#define SIUMCR_MMR00	0x00000000	/* Mask Masters Requests	*/
+#define SIUMCR_MMR01	0x00004000	/* - " -			*/
+#define SIUMCR_MMR10	0x00008000	/* - " -			*/
+#define SIUMCR_MMR11	0x0000c000	/* - " -			*/
+#define SIUMCR_LPBSE	0x00002000	/* LocalBus Parity Byte Select Enable*/
+
+/*-----------------------------------------------------------------------
+ * SCCR - System Clock Control Register					 9-8
+*/
+#define SCCR_PCI_MODE	0x00000100	/* PCI Mode	*/
+#define SCCR_PCI_MODCK	0x00000080	/* Value of PCI_MODCK pin	*/
+#define SCCR_PCIDF_MSK	0x00000078	/* PCI division factor	*/
+#define SCCR_PCIDF_SHIFT 3
+
+#ifndef CPM_IMMR_OFFSET
+#define CPM_IMMR_OFFSET	0x101a8
+#endif
+
+#define FCC_PSMR_RMII	((uint)0x00020000)	/* Use RMII interface */
+
+/* FCC iop & clock configuration. BSP code is responsible to define Fx_RXCLK & Fx_TXCLK
+ * in order to use clock-computing stuff below for the FCC x
+ */
+
+/* Automatically generates register configurations */
+#define PC_CLK(x)	((uint)(1<<(x-1)))	/* FCC CLK I/O ports */
+
+#define CMXFCR_RF1CS(x)	((uint)((x-5)<<27))	/* FCC1 Receive Clock Source */
+#define CMXFCR_TF1CS(x)	((uint)((x-5)<<24))	/* FCC1 Transmit Clock Source */
+#define CMXFCR_RF2CS(x)	((uint)((x-9)<<19))	/* FCC2 Receive Clock Source */
+#define CMXFCR_TF2CS(x) ((uint)((x-9)<<16))	/* FCC2 Transmit Clock Source */
+#define CMXFCR_RF3CS(x)	((uint)((x-9)<<11))	/* FCC3 Receive Clock Source */
+#define CMXFCR_TF3CS(x) ((uint)((x-9)<<8))	/* FCC3 Transmit Clock Source */
+
+#define PC_F1RXCLK	PC_CLK(F1_RXCLK)
+#define PC_F1TXCLK	PC_CLK(F1_TXCLK)
+#define CMX1_CLK_ROUTE	(CMXFCR_RF1CS(F1_RXCLK) | CMXFCR_TF1CS(F1_TXCLK))
+#define CMX1_CLK_MASK	((uint)0xff000000)
+
+#define PC_F2RXCLK	PC_CLK(F2_RXCLK)
+#define PC_F2TXCLK	PC_CLK(F2_TXCLK)
+#define CMX2_CLK_ROUTE	(CMXFCR_RF2CS(F2_RXCLK) | CMXFCR_TF2CS(F2_TXCLK))
+#define CMX2_CLK_MASK	((uint)0x00ff0000)
+
+#define PC_F3RXCLK	PC_CLK(F3_RXCLK)
+#define PC_F3TXCLK	PC_CLK(F3_TXCLK)
+#define CMX3_CLK_ROUTE	(CMXFCR_RF3CS(F3_RXCLK) | CMXFCR_TF3CS(F3_TXCLK))
+#define CMX3_CLK_MASK	((uint)0x0000ff00)
+
+#define CPMUX_CLK_MASK (CMX3_CLK_MASK | CMX2_CLK_MASK)
+#define CPMUX_CLK_ROUTE (CMX3_CLK_ROUTE | CMX2_CLK_ROUTE)
+
+#define CLK_TRX (PC_F3TXCLK | PC_F3RXCLK | PC_F2TXCLK | PC_F2RXCLK)
+
+/* I/O Pin assignment for FCC1.  I don't yet know the best way to do this,
+ * but there is little variation among the choices.
+ */
+#define PA1_COL		0x00000001U
+#define PA1_CRS		0x00000002U
+#define PA1_TXER	0x00000004U
+#define PA1_TXEN	0x00000008U
+#define PA1_RXDV	0x00000010U
+#define PA1_RXER	0x00000020U
+#define PA1_TXDAT	0x00003c00U
+#define PA1_RXDAT	0x0003c000U
+#define PA1_PSORA0	(PA1_RXDAT | PA1_TXDAT)
+#define PA1_PSORA1	(PA1_COL | PA1_CRS | PA1_TXER | PA1_TXEN | \
+		PA1_RXDV | PA1_RXER)
+#define PA1_DIRA0	(PA1_RXDAT | PA1_CRS | PA1_COL | PA1_RXER | PA1_RXDV)
+#define PA1_DIRA1	(PA1_TXDAT | PA1_TXEN | PA1_TXER)
+
+
+/* I/O Pin assignment for FCC2.  I don't yet know the best way to do this,
+ * but there is little variation among the choices.
+ */
+#define PB2_TXER	0x00000001U
+#define PB2_RXDV	0x00000002U
+#define PB2_TXEN	0x00000004U
+#define PB2_RXER	0x00000008U
+#define PB2_COL		0x00000010U
+#define PB2_CRS		0x00000020U
+#define PB2_TXDAT	0x000003c0U
+#define PB2_RXDAT	0x00003c00U
+#define PB2_PSORB0	(PB2_RXDAT | PB2_TXDAT | PB2_CRS | PB2_COL | \
+		PB2_RXER | PB2_RXDV | PB2_TXER)
+#define PB2_PSORB1	(PB2_TXEN)
+#define PB2_DIRB0	(PB2_RXDAT | PB2_CRS | PB2_COL | PB2_RXER | PB2_RXDV)
+#define PB2_DIRB1	(PB2_TXDAT | PB2_TXEN | PB2_TXER)
+
+
+/* I/O Pin assignment for FCC3.  I don't yet know the best way to do this,
+ * but there is little variation among the choices.
+ */
+#define PB3_RXDV	0x00004000U
+#define PB3_RXER	0x00008000U
+#define PB3_TXER	0x00010000U
+#define PB3_TXEN	0x00020000U
+#define PB3_COL		0x00040000U
+#define PB3_CRS		0x00080000U
+#define PB3_TXDAT	0x0f000000U
+#define PC3_TXDAT	0x00000010U
+#define PB3_RXDAT	0x00f00000U
+#define PB3_PSORB0	(PB3_RXDAT | PB3_TXDAT | PB3_CRS | PB3_COL | \
+		PB3_RXER | PB3_RXDV | PB3_TXER | PB3_TXEN)
+#define PB3_PSORB1	0
+#define PB3_DIRB0	(PB3_RXDAT | PB3_CRS | PB3_COL | PB3_RXER | PB3_RXDV)
+#define PB3_DIRB1	(PB3_TXDAT | PB3_TXEN | PB3_TXER)
+#define PC3_DIRC1	(PC3_TXDAT)
+
+/* Handy macro to specify mem for FCCs*/
+#define FCC_MEM_OFFSET(x) (CPM_FCC_SPECIAL_BASE + (x*128))
+#define FCC1_MEM_OFFSET FCC_MEM_OFFSET(0)
+#define FCC2_MEM_OFFSET FCC_MEM_OFFSET(1)
+#define FCC3_MEM_OFFSET FCC_MEM_OFFSET(2)
+
+/* Pipeline Maximum Depth */
+#define MPC82XX_BCR_PLDP 0x00800000
+
+/* Clocks and GRG's */
+
+enum cpm_clk_dir {
+	CPM_CLK_RX,
+	CPM_CLK_TX,
+	CPM_CLK_RTX
+};
+
+enum cpm_clk_target {
+	CPM_CLK_SCC1,
+	CPM_CLK_SCC2,
+	CPM_CLK_SCC3,
+	CPM_CLK_SCC4,
+	CPM_CLK_FCC1,
+	CPM_CLK_FCC2,
+	CPM_CLK_FCC3,
+	CPM_CLK_SMC1,
+	CPM_CLK_SMC2,
+};
+
+enum cpm_clk {
+	CPM_CLK_NONE = 0,
+	CPM_BRG1,	/* Baud Rate Generator  1 */
+	CPM_BRG2,	/* Baud Rate Generator  2 */
+	CPM_BRG3,	/* Baud Rate Generator  3 */
+	CPM_BRG4,	/* Baud Rate Generator  4 */
+	CPM_BRG5,	/* Baud Rate Generator  5 */
+	CPM_BRG6,	/* Baud Rate Generator  6 */
+	CPM_BRG7,	/* Baud Rate Generator  7 */
+	CPM_BRG8,	/* Baud Rate Generator  8 */
+	CPM_CLK1,	/* Clock  1 */
+	CPM_CLK2,	/* Clock  2 */
+	CPM_CLK3,	/* Clock  3 */
+	CPM_CLK4,	/* Clock  4 */
+	CPM_CLK5,	/* Clock  5 */
+	CPM_CLK6,	/* Clock  6 */
+	CPM_CLK7,	/* Clock  7 */
+	CPM_CLK8,	/* Clock  8 */
+	CPM_CLK9,	/* Clock  9 */
+	CPM_CLK10,	/* Clock 10 */
+	CPM_CLK11,	/* Clock 11 */
+	CPM_CLK12,	/* Clock 12 */
+	CPM_CLK13,	/* Clock 13 */
+	CPM_CLK14,	/* Clock 14 */
+	CPM_CLK15,	/* Clock 15 */
+	CPM_CLK16,	/* Clock 16 */
+	CPM_CLK17,	/* Clock 17 */
+	CPM_CLK18,	/* Clock 18 */
+	CPM_CLK19,	/* Clock 19 */
+	CPM_CLK20,	/* Clock 20 */
+	CPM_CLK_DUMMY
+};
+
+int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode);
+int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock);
+
+#define CPM_PIN_INPUT     0
+#define CPM_PIN_OUTPUT    1
+#define CPM_PIN_PRIMARY   0
+#define CPM_PIN_SECONDARY 2
+#define CPM_PIN_GPIO      4
+#define CPM_PIN_OPENDRAIN 8
+
+void __init cpm2_set_pin(int port, int pin, int flags);
+
+#endif /* __CPM2__ */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h
new file mode 100644
index 0000000000..727d4b3219
--- /dev/null
+++ b/arch/powerpc/include/asm/cpu_has_feature.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_POWERPC_CPU_HAS_FEATURE_H
+#define __ASM_POWERPC_CPU_HAS_FEATURE_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bug.h>
+#include <asm/cputable.h>
+
+static __always_inline bool early_cpu_has_feature(unsigned long feature)
+{
+	return !!((CPU_FTRS_ALWAYS & feature) ||
+		  (CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature));
+}
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
+#include <linux/jump_label.h>
+
+#define NUM_CPU_FTR_KEYS	BITS_PER_LONG
+
+extern struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS];
+
+static __always_inline bool cpu_has_feature(unsigned long feature)
+{
+	int i;
+
+#ifndef __clang__ /* clang can't cope with this */
+	BUILD_BUG_ON(!__builtin_constant_p(feature));
+#endif
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG
+	if (!static_key_initialized) {
+		printk("Warning! cpu_has_feature() used prior to jump label init!\n");
+		dump_stack();
+		return early_cpu_has_feature(feature);
+	}
+#endif
+
+	if (CPU_FTRS_ALWAYS & feature)
+		return true;
+
+	if (!(CPU_FTRS_POSSIBLE & feature))
+		return false;
+
+	i = __builtin_ctzl(feature);
+	return static_branch_likely(&cpu_feature_keys[i]);
+}
+#else
+static __always_inline bool cpu_has_feature(unsigned long feature)
+{
+	return early_cpu_has_feature(feature);
+}
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_POWERPC_CPU_HAS_FEATURE_H */
diff --git a/arch/powerpc/include/asm/cpu_setup.h b/arch/powerpc/include/asm/cpu_setup.h
new file mode 100644
index 0000000000..30e2fe3895
--- /dev/null
+++ b/arch/powerpc/include/asm/cpu_setup.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 IBM Corporation
+ */
+
+#ifndef _ASM_POWERPC_CPU_SETUP_H
+#define _ASM_POWERPC_CPU_SETUP_H
+void __setup_cpu_power7(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_power8(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_power9(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_power10(unsigned long offset, struct cpu_spec *spec);
+void __restore_cpu_power7(void);
+void __restore_cpu_power8(void);
+void __restore_cpu_power9(void);
+void __restore_cpu_power10(void);
+
+void __setup_cpu_e500v1(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_e500v2(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_e500mc(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_440ep(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_440epx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_440gx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_440grx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_440spe(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_440x5(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_460ex(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_460gt(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_460sx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_apm821xx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_603(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_604(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_750(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_750cx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_750fx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_7400(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_7410(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_745x(unsigned long offset, struct cpu_spec *spec);
+
+void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec *spec);
+void __restore_cpu_pa6t(void);
+void __restore_cpu_ppc970(void);
+
+void __setup_cpu_e5500(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_e6500(unsigned long offset, struct cpu_spec *spec);
+void __restore_cpu_e5500(void);
+void __restore_cpu_e6500(void);
+#endif /* _ASM_POWERPC_CPU_SETUP_H */
diff --git a/arch/powerpc/include/asm/cpufeature.h b/arch/powerpc/include/asm/cpufeature.h
new file mode 100644
index 0000000000..2dcc66225e
--- /dev/null
+++ b/arch/powerpc/include/asm/cpufeature.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * CPU feature definitions for module loading, used by
+ * module_cpu_feature_match(), see asm/cputable.h for powerpc CPU features.
+ *
+ * Copyright 2016 Alastair D'Silva, IBM Corporation.
+ */
+
+#ifndef __ASM_POWERPC_CPUFEATURE_H
+#define __ASM_POWERPC_CPUFEATURE_H
+
+#include <asm/cputable.h>
+
+/* Keep these in step with powerpc/include/asm/cputable.h */
+#define MAX_CPU_FEATURES (2 * 32)
+
+/*
+ * Currently we don't have a need for any of the feature bits defined in
+ * cpu_user_features. When we do, they should be defined such as:
+ *
+ * #define PPC_MODULE_FEATURE_32 (ilog2(PPC_FEATURE_32))
+ */
+
+#define PPC_MODULE_FEATURE_VEC_CRYPTO			(32 + ilog2(PPC_FEATURE2_VEC_CRYPTO))
+#define PPC_MODULE_FEATURE_P10				(32 + ilog2(PPC_FEATURE2_ARCH_3_1))
+
+#define cpu_feature(x)		(x)
+
+static inline bool cpu_have_feature(unsigned int num)
+{
+	if (num < 32)
+		return !!(cur_cpu_spec->cpu_user_features & 1UL << num);
+	else
+		return !!(cur_cpu_spec->cpu_user_features2 & 1UL << (num - 32));
+}
+
+#endif /* __ASM_POWERPC_CPUFEATURE_H */
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
new file mode 100644
index 0000000000..0cce5dc7fb
--- /dev/null
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_CPUIDLE_H
+#define _ASM_POWERPC_CPUIDLE_H
+
+#ifdef CONFIG_PPC_POWERNV
+/* Thread state used in powernv idle state management */
+#define PNV_THREAD_RUNNING              0
+#define PNV_THREAD_NAP                  1
+#define PNV_THREAD_SLEEP                2
+#define PNV_THREAD_WINKLE               3
+
+/*
+ * Core state used in powernv idle for POWER8.
+ *
+ * The lock bit synchronizes updates to the state, as well as parts of the
+ * sleep/wake code (see kernel/idle_book3s.S).
+ *
+ * Bottom 8 bits track the idle state of each thread. Bit is cleared before
+ * the thread executes an idle instruction (nap/sleep/winkle).
+ *
+ * Then there is winkle tracking. A core does not lose complete state
+ * until every thread is in winkle. So the winkle count field counts the
+ * number of threads in winkle (small window of false positives is okay
+ * around the sleep/wake, so long as there are no false negatives).
+ *
+ * When the winkle count reaches 8 (the COUNT_ALL_BIT becomes set), then
+ * the THREAD_WINKLE_BITS are set, which indicate which threads have not
+ * yet woken from the winkle state.
+ */
+#define NR_PNV_CORE_IDLE_LOCK_BIT		28
+#define PNV_CORE_IDLE_LOCK_BIT			(1ULL << NR_PNV_CORE_IDLE_LOCK_BIT)
+
+#define PNV_CORE_IDLE_WINKLE_COUNT_SHIFT	16
+#define PNV_CORE_IDLE_WINKLE_COUNT		0x00010000
+#define PNV_CORE_IDLE_WINKLE_COUNT_BITS		0x000F0000
+#define PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT	8
+#define PNV_CORE_IDLE_THREAD_WINKLE_BITS	0x0000FF00
+
+#define PNV_CORE_IDLE_THREAD_BITS       	0x000000FF
+
+/*
+ * ============================ NOTE =================================
+ * The older firmware populates only the RL field in the psscr_val and
+ * sets the psscr_mask to 0xf. On such a firmware, the kernel sets the
+ * remaining PSSCR fields to default values as follows:
+ *
+ * - ESL and EC bits are to 1. So wakeup from any stop state will be
+ *   at vector 0x100.
+ *
+ * - MTL and PSLL are set to the maximum allowed value as per the ISA,
+ *    i.e. 15.
+ *
+ * - The Transition Rate, TR is set to the Maximum value 3.
+ */
+#define PSSCR_HV_DEFAULT_VAL    (PSSCR_ESL | PSSCR_EC |		    \
+				PSSCR_PSLL_MASK | PSSCR_TR_MASK |   \
+				PSSCR_MTL_MASK)
+
+#define PSSCR_HV_DEFAULT_MASK   (PSSCR_ESL | PSSCR_EC |		    \
+				PSSCR_PSLL_MASK | PSSCR_TR_MASK |   \
+				PSSCR_MTL_MASK | PSSCR_RL_MASK)
+#define PSSCR_EC_SHIFT    20
+#define PSSCR_ESL_SHIFT   21
+#define GET_PSSCR_EC(x)   (((x) & PSSCR_EC) >> PSSCR_EC_SHIFT)
+#define GET_PSSCR_ESL(x)  (((x) & PSSCR_ESL) >> PSSCR_ESL_SHIFT)
+#define GET_PSSCR_RL(x)   ((x) & PSSCR_RL_MASK)
+
+#define ERR_EC_ESL_MISMATCH		-1
+#define ERR_DEEP_STATE_ESL_MISMATCH	-2
+
+#ifndef __ASSEMBLY__
+
+#define PNV_IDLE_NAME_LEN    16
+struct pnv_idle_states_t {
+	char name[PNV_IDLE_NAME_LEN];
+	u32 latency_ns;
+	u32 residency_ns;
+	u64 psscr_val;
+	u64 psscr_mask;
+	u32 flags;
+	bool valid;
+};
+
+extern struct pnv_idle_states_t *pnv_idle_states;
+extern int nr_pnv_idle_states;
+
+unsigned long pnv_cpu_offline(unsigned int cpu);
+int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags);
+static inline void report_invalid_psscr_val(u64 psscr_val, int err)
+{
+	switch (err) {
+	case ERR_EC_ESL_MISMATCH:
+		pr_warn("Invalid psscr 0x%016llx : ESL,EC bits unequal",
+			psscr_val);
+		break;
+	case ERR_DEEP_STATE_ESL_MISMATCH:
+		pr_warn("Invalid psscr 0x%016llx : ESL cleared for deep stop-state",
+			psscr_val);
+	}
+}
+#endif
+
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
new file mode 100644
index 0000000000..8765d51583
--- /dev/null
+++ b/arch/powerpc/include/asm/cputable.h
@@ -0,0 +1,609 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_POWERPC_CPUTABLE_H
+#define __ASM_POWERPC_CPUTABLE_H
+
+
+#include <linux/types.h>
+#include <uapi/asm/cputable.h>
+#include <asm/asm-const.h>
+
+#ifndef __ASSEMBLY__
+
+/* This structure can grow, it's real size is used by head.S code
+ * via the mkdefs mechanism.
+ */
+struct cpu_spec;
+
+typedef	void (*cpu_setup_t)(unsigned long offset, struct cpu_spec* spec);
+typedef	void (*cpu_restore_t)(void);
+
+enum powerpc_pmc_type {
+	PPC_PMC_DEFAULT = 0,
+	PPC_PMC_IBM = 1,
+	PPC_PMC_PA6T = 2,
+	PPC_PMC_G4 = 3,
+};
+
+struct pt_regs;
+
+extern int machine_check_generic(struct pt_regs *regs);
+extern int machine_check_4xx(struct pt_regs *regs);
+extern int machine_check_440A(struct pt_regs *regs);
+extern int machine_check_e500mc(struct pt_regs *regs);
+extern int machine_check_e500(struct pt_regs *regs);
+extern int machine_check_47x(struct pt_regs *regs);
+int machine_check_8xx(struct pt_regs *regs);
+int machine_check_83xx(struct pt_regs *regs);
+
+extern void cpu_down_flush_e500v2(void);
+extern void cpu_down_flush_e500mc(void);
+extern void cpu_down_flush_e5500(void);
+extern void cpu_down_flush_e6500(void);
+
+/* NOTE WELL: Update identify_cpu() if fields are added or removed! */
+struct cpu_spec {
+	/* CPU is matched via (PVR & pvr_mask) == pvr_value */
+	unsigned int	pvr_mask;
+	unsigned int	pvr_value;
+
+	char		*cpu_name;
+	unsigned long	cpu_features;		/* Kernel features */
+	unsigned int	cpu_user_features;	/* Userland features */
+	unsigned int	cpu_user_features2;	/* Userland features v2 */
+	unsigned int	mmu_features;		/* MMU features */
+
+	/* cache line sizes */
+	unsigned int	icache_bsize;
+	unsigned int	dcache_bsize;
+
+	/* flush caches inside the current cpu */
+	void (*cpu_down_flush)(void);
+
+	/* number of performance monitor counters */
+	unsigned int	num_pmcs;
+	enum powerpc_pmc_type pmc_type;
+
+	/* this is called to initialize various CPU bits like L1 cache,
+	 * BHT, SPD, etc... from head.S before branching to identify_machine
+	 */
+	cpu_setup_t	cpu_setup;
+	/* Used to restore cpu setup on secondary processors and at resume */
+	cpu_restore_t	cpu_restore;
+
+	/* Name of processor class, for the ELF AT_PLATFORM entry */
+	char		*platform;
+
+	/* Processor specific machine check handling. Return negative
+	 * if the error is fatal, 1 if it was fully recovered and 0 to
+	 * pass up (not CPU originated) */
+	int		(*machine_check)(struct pt_regs *regs);
+
+	/*
+	 * Processor specific early machine check handler which is
+	 * called in real mode to handle SLB and TLB errors.
+	 */
+	long		(*machine_check_early)(struct pt_regs *regs);
+};
+
+extern struct cpu_spec		*cur_cpu_spec;
+
+extern unsigned int __start___ftr_fixup, __stop___ftr_fixup;
+
+extern void set_cur_cpu_spec(struct cpu_spec *s);
+extern struct cpu_spec *identify_cpu(unsigned long offset, unsigned int pvr);
+extern void identify_cpu_name(unsigned int pvr);
+extern void do_feature_fixups(unsigned long value, void *fixup_start,
+			      void *fixup_end);
+
+extern const char *powerpc_base_platform;
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
+extern void cpu_feature_keys_init(void);
+#else
+static inline void cpu_feature_keys_init(void) { }
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+/* CPU kernel features */
+
+/* Definitions for features that we have on both 32-bit and 64-bit chips */
+#define CPU_FTR_COHERENT_ICACHE		ASM_CONST(0x00000001)
+#define CPU_FTR_ALTIVEC			ASM_CONST(0x00000002)
+#define CPU_FTR_DBELL			ASM_CONST(0x00000004)
+#define CPU_FTR_CAN_NAP			ASM_CONST(0x00000008)
+#define CPU_FTR_DEBUG_LVL_EXC		ASM_CONST(0x00000010)
+// ASM_CONST(0x00000020) Free
+#define CPU_FTR_FPU_UNAVAILABLE		ASM_CONST(0x00000040)
+#define CPU_FTR_LWSYNC			ASM_CONST(0x00000080)
+#define CPU_FTR_NOEXECUTE		ASM_CONST(0x00000100)
+#define CPU_FTR_EMB_HV			ASM_CONST(0x00000200)
+
+/* Definitions for features that only exist on 32-bit chips */
+#ifdef CONFIG_PPC32
+#define CPU_FTR_L2CR			ASM_CONST(0x00002000)
+#define CPU_FTR_SPEC7450		ASM_CONST(0x00004000)
+#define CPU_FTR_TAU			ASM_CONST(0x00008000)
+#define CPU_FTR_CAN_DOZE		ASM_CONST(0x00010000)
+#define CPU_FTR_L3CR			ASM_CONST(0x00040000)
+#define CPU_FTR_L3_DISABLE_NAP		ASM_CONST(0x00080000)
+#define CPU_FTR_NAP_DISABLE_L2_PR	ASM_CONST(0x00100000)
+#define CPU_FTR_DUAL_PLL_750FX		ASM_CONST(0x00200000)
+#define CPU_FTR_NO_DPM			ASM_CONST(0x00400000)
+#define CPU_FTR_476_DD2			ASM_CONST(0x00800000)
+#define CPU_FTR_NEED_COHERENT		ASM_CONST(0x01000000)
+#define CPU_FTR_NO_BTIC			ASM_CONST(0x02000000)
+#define CPU_FTR_PPC_LE			ASM_CONST(0x04000000)
+#define CPU_FTR_SPE			ASM_CONST(0x10000000)
+#define CPU_FTR_NEED_PAIRED_STWCX	ASM_CONST(0x20000000)
+#define CPU_FTR_INDEXED_DCR		ASM_CONST(0x40000000)
+
+#else	/* CONFIG_PPC32 */
+/* Define these to 0 for the sake of tests in common code */
+#define CPU_FTR_PPC_LE			(0)
+#define CPU_FTR_SPE			(0)
+#endif
+
+/*
+ * Definitions for the 64-bit processor unique features;
+ * on 32-bit, make the names available but defined to be 0.
+ */
+#ifdef __powerpc64__
+#define LONG_ASM_CONST(x)		ASM_CONST(x)
+#else
+#define LONG_ASM_CONST(x)		0
+#endif
+
+#define CPU_FTR_REAL_LE			LONG_ASM_CONST(0x0000000000001000)
+#define CPU_FTR_HVMODE			LONG_ASM_CONST(0x0000000000002000)
+#define CPU_FTR_ARCH_206		LONG_ASM_CONST(0x0000000000008000)
+#define CPU_FTR_ARCH_207S		LONG_ASM_CONST(0x0000000000010000)
+#define CPU_FTR_ARCH_300		LONG_ASM_CONST(0x0000000000020000)
+#define CPU_FTR_MMCRA			LONG_ASM_CONST(0x0000000000040000)
+#define CPU_FTR_CTRL			LONG_ASM_CONST(0x0000000000080000)
+#define CPU_FTR_SMT			LONG_ASM_CONST(0x0000000000100000)
+#define CPU_FTR_PAUSE_ZERO		LONG_ASM_CONST(0x0000000000200000)
+#define CPU_FTR_PURR			LONG_ASM_CONST(0x0000000000400000)
+#define CPU_FTR_CELL_TB_BUG		LONG_ASM_CONST(0x0000000000800000)
+#define CPU_FTR_SPURR			LONG_ASM_CONST(0x0000000001000000)
+#define CPU_FTR_DSCR			LONG_ASM_CONST(0x0000000002000000)
+#define CPU_FTR_VSX			LONG_ASM_CONST(0x0000000004000000)
+#define CPU_FTR_SAO			LONG_ASM_CONST(0x0000000008000000)
+#define CPU_FTR_CP_USE_DCBTZ		LONG_ASM_CONST(0x0000000010000000)
+#define CPU_FTR_UNALIGNED_LD_STD	LONG_ASM_CONST(0x0000000020000000)
+#define CPU_FTR_ASYM_SMT		LONG_ASM_CONST(0x0000000040000000)
+#define CPU_FTR_STCX_CHECKS_ADDRESS	LONG_ASM_CONST(0x0000000080000000)
+#define CPU_FTR_POPCNTB			LONG_ASM_CONST(0x0000000100000000)
+#define CPU_FTR_POPCNTD			LONG_ASM_CONST(0x0000000200000000)
+/* LONG_ASM_CONST(0x0000000400000000) Free */
+#define CPU_FTR_VMX_COPY		LONG_ASM_CONST(0x0000000800000000)
+#define CPU_FTR_TM			LONG_ASM_CONST(0x0000001000000000)
+#define CPU_FTR_CFAR			LONG_ASM_CONST(0x0000002000000000)
+#define	CPU_FTR_HAS_PPR			LONG_ASM_CONST(0x0000004000000000)
+#define CPU_FTR_DAWR			LONG_ASM_CONST(0x0000008000000000)
+#define CPU_FTR_DABRX			LONG_ASM_CONST(0x0000010000000000)
+#define CPU_FTR_PMAO_BUG		LONG_ASM_CONST(0x0000020000000000)
+#define CPU_FTR_POWER9_DD2_1		LONG_ASM_CONST(0x0000080000000000)
+#define CPU_FTR_P9_TM_HV_ASSIST		LONG_ASM_CONST(0x0000100000000000)
+#define CPU_FTR_P9_TM_XER_SO_BUG	LONG_ASM_CONST(0x0000200000000000)
+#define CPU_FTR_P9_TLBIE_STQ_BUG	LONG_ASM_CONST(0x0000400000000000)
+#define CPU_FTR_P9_TIDR			LONG_ASM_CONST(0x0000800000000000)
+#define CPU_FTR_P9_TLBIE_ERAT_BUG	LONG_ASM_CONST(0x0001000000000000)
+#define CPU_FTR_P9_RADIX_PREFETCH_BUG	LONG_ASM_CONST(0x0002000000000000)
+#define CPU_FTR_ARCH_31			LONG_ASM_CONST(0x0004000000000000)
+#define CPU_FTR_DAWR1			LONG_ASM_CONST(0x0008000000000000)
+#define CPU_FTR_DEXCR_NPHIE		LONG_ASM_CONST(0x0010000000000000)
+
+#ifndef __ASSEMBLY__
+
+#define CPU_FTR_PPCAS_ARCH_V2	(CPU_FTR_NOEXECUTE)
+
+/* We only set the altivec features if the kernel was compiled with altivec
+ * support
+ */
+#ifdef CONFIG_ALTIVEC
+#define CPU_FTR_ALTIVEC_COMP	CPU_FTR_ALTIVEC
+#define PPC_FEATURE_HAS_ALTIVEC_COMP PPC_FEATURE_HAS_ALTIVEC
+#else
+#define CPU_FTR_ALTIVEC_COMP	0
+#define PPC_FEATURE_HAS_ALTIVEC_COMP    0
+#endif
+
+/* We only set the VSX features if the kernel was compiled with VSX
+ * support
+ */
+#ifdef CONFIG_VSX
+#define CPU_FTR_VSX_COMP	CPU_FTR_VSX
+#define PPC_FEATURE_HAS_VSX_COMP PPC_FEATURE_HAS_VSX
+#else
+#define CPU_FTR_VSX_COMP	0
+#define PPC_FEATURE_HAS_VSX_COMP    0
+#endif
+
+/* We only set the spe features if the kernel was compiled with spe
+ * support
+ */
+#ifdef CONFIG_SPE
+#define CPU_FTR_SPE_COMP	CPU_FTR_SPE
+#define PPC_FEATURE_HAS_SPE_COMP PPC_FEATURE_HAS_SPE
+#define PPC_FEATURE_HAS_EFP_SINGLE_COMP PPC_FEATURE_HAS_EFP_SINGLE
+#define PPC_FEATURE_HAS_EFP_DOUBLE_COMP PPC_FEATURE_HAS_EFP_DOUBLE
+#else
+#define CPU_FTR_SPE_COMP	0
+#define PPC_FEATURE_HAS_SPE_COMP    0
+#define PPC_FEATURE_HAS_EFP_SINGLE_COMP 0
+#define PPC_FEATURE_HAS_EFP_DOUBLE_COMP 0
+#endif
+
+/* We only set the TM feature if the kernel was compiled with TM supprt */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+#define CPU_FTR_TM_COMP			CPU_FTR_TM
+#define PPC_FEATURE2_HTM_COMP		PPC_FEATURE2_HTM
+#define PPC_FEATURE2_HTM_NOSC_COMP	PPC_FEATURE2_HTM_NOSC
+#else
+#define CPU_FTR_TM_COMP			0
+#define PPC_FEATURE2_HTM_COMP		0
+#define PPC_FEATURE2_HTM_NOSC_COMP	0
+#endif
+
+/* We need to mark all pages as being coherent if we're SMP or we have a
+ * 74[45]x and an MPC107 host bridge. Also 83xx and PowerQUICC II
+ * require it for PCI "streaming/prefetch" to work properly.
+ * This is also required by 52xx family.
+ */
+#if defined(CONFIG_SMP) || defined(CONFIG_MPC10X_BRIDGE) \
+	|| defined(CONFIG_PPC_83xx) || defined(CONFIG_PPC_82xx) \
+	|| defined(CONFIG_PPC_MPC52xx)
+#define CPU_FTR_COMMON                  CPU_FTR_NEED_COHERENT
+#else
+#define CPU_FTR_COMMON                  0
+#endif
+
+/* The powersave features NAP & DOZE seems to confuse BDI when
+   debugging. So if a BDI is used, disable theses
+ */
+#ifndef CONFIG_BDI_SWITCH
+#define CPU_FTR_MAYBE_CAN_DOZE	CPU_FTR_CAN_DOZE
+#define CPU_FTR_MAYBE_CAN_NAP	CPU_FTR_CAN_NAP
+#else
+#define CPU_FTR_MAYBE_CAN_DOZE	0
+#define CPU_FTR_MAYBE_CAN_NAP	0
+#endif
+
+#define CPU_FTRS_603	(CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
+	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE | CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_604	(CPU_FTR_COMMON | CPU_FTR_PPC_LE)
+#define CPU_FTRS_740_NOTAU	(CPU_FTR_COMMON | \
+	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
+	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
+#define CPU_FTRS_740	(CPU_FTR_COMMON | \
+	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
+	    CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \
+	    CPU_FTR_PPC_LE)
+#define CPU_FTRS_750	(CPU_FTR_COMMON | \
+	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
+	    CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \
+	    CPU_FTR_PPC_LE)
+#define CPU_FTRS_750CL	(CPU_FTRS_750)
+#define CPU_FTRS_750FX1	(CPU_FTRS_750 | CPU_FTR_DUAL_PLL_750FX | CPU_FTR_NO_DPM)
+#define CPU_FTRS_750FX2	(CPU_FTRS_750 | CPU_FTR_NO_DPM)
+#define CPU_FTRS_750FX	(CPU_FTRS_750 | CPU_FTR_DUAL_PLL_750FX)
+#define CPU_FTRS_750GX	(CPU_FTRS_750FX)
+#define CPU_FTRS_7400_NOTAU	(CPU_FTR_COMMON | \
+	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
+	    CPU_FTR_ALTIVEC_COMP | \
+	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
+#define CPU_FTRS_7400	(CPU_FTR_COMMON | \
+	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
+	    CPU_FTR_TAU | CPU_FTR_ALTIVEC_COMP | \
+	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
+#define CPU_FTRS_7450_20	(CPU_FTR_COMMON | \
+	    CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+	    CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
+	    CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
+#define CPU_FTRS_7450_21	(CPU_FTR_COMMON | \
+	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+	    CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
+	    CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
+	    CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
+#define CPU_FTRS_7450_23	(CPU_FTR_COMMON | \
+	    CPU_FTR_NEED_PAIRED_STWCX | \
+	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+	    CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
+	    CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
+#define CPU_FTRS_7455_1	(CPU_FTR_COMMON | \
+	    CPU_FTR_NEED_PAIRED_STWCX | \
+	    CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | \
+	    CPU_FTR_SPEC7450 | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
+#define CPU_FTRS_7455_20	(CPU_FTR_COMMON | \
+	    CPU_FTR_NEED_PAIRED_STWCX | \
+	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+	    CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
+	    CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
+	    CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
+#define CPU_FTRS_7455	(CPU_FTR_COMMON | \
+	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+	    CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
+	    CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
+#define CPU_FTRS_7447_10	(CPU_FTR_COMMON | \
+	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+	    CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
+	    CPU_FTR_NEED_COHERENT | CPU_FTR_NO_BTIC | CPU_FTR_PPC_LE | \
+	    CPU_FTR_NEED_PAIRED_STWCX)
+#define CPU_FTRS_7447	(CPU_FTR_COMMON | \
+	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+	    CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
+	    CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
+#define CPU_FTRS_7447A	(CPU_FTR_COMMON | \
+	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+	    CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
+	    CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
+#define CPU_FTRS_7448	(CPU_FTR_COMMON | \
+	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+	    CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
+	    CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
+#define CPU_FTRS_82XX	(CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_G2_LE	(CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
+	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_E300	(CPU_FTR_MAYBE_CAN_DOZE | \
+	    CPU_FTR_MAYBE_CAN_NAP | \
+	    CPU_FTR_COMMON  | CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_E300C2	(CPU_FTR_MAYBE_CAN_DOZE | \
+	    CPU_FTR_MAYBE_CAN_NAP | \
+	    CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE  | CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_CLASSIC32	(CPU_FTR_COMMON)
+#define CPU_FTRS_8XX	(CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_40X	(CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_44X	(CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_440x6	(CPU_FTR_NOEXECUTE | \
+	    CPU_FTR_INDEXED_DCR)
+#define CPU_FTRS_47X	(CPU_FTRS_440x6)
+#define CPU_FTRS_E500	(CPU_FTR_MAYBE_CAN_DOZE | \
+	    CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \
+	    CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_E500_2	(CPU_FTR_MAYBE_CAN_DOZE | \
+	    CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \
+	    CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_E500MC	( \
+	    CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+	    CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV)
+/*
+ * e5500/e6500 erratum A-006958 is a timebase bug that can use the
+ * same workaround as CPU_FTR_CELL_TB_BUG.
+ */
+#define CPU_FTRS_E5500	( \
+	    CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+	    CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+	    CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_CELL_TB_BUG)
+#define CPU_FTRS_E6500	( \
+	    CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+	    CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+	    CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_ALTIVEC_COMP | \
+	    CPU_FTR_CELL_TB_BUG | CPU_FTR_SMT)
+
+/* 64-bit CPUs */
+#define CPU_FTRS_PPC970	(CPU_FTR_LWSYNC | \
+	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \
+	    CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS | \
+	    CPU_FTR_HVMODE | CPU_FTR_DABRX)
+#define CPU_FTRS_POWER5	(CPU_FTR_LWSYNC | \
+	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
+	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_PURR | \
+	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_DABRX)
+#define CPU_FTRS_POWER6 (CPU_FTR_LWSYNC | \
+	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
+	    CPU_FTR_COHERENT_ICACHE | \
+	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
+	    CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
+	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR | \
+	    CPU_FTR_DABRX)
+#define CPU_FTRS_POWER7 (CPU_FTR_LWSYNC | \
+	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
+	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
+	    CPU_FTR_COHERENT_ICACHE | \
+	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
+	    CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT | \
+	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+	    CPU_FTR_CFAR | CPU_FTR_HVMODE | \
+	    CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX )
+#define CPU_FTRS_POWER8 (CPU_FTR_LWSYNC | \
+	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
+	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
+	    CPU_FTR_COHERENT_ICACHE | \
+	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
+	    CPU_FTR_DSCR | CPU_FTR_SAO  | \
+	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+	    CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
+	    CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
+	    CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP )
+#define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
+#define CPU_FTRS_POWER9 (CPU_FTR_LWSYNC | \
+	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
+	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
+	    CPU_FTR_COHERENT_ICACHE | \
+	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
+	    CPU_FTR_DSCR | CPU_FTR_SAO  | \
+	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+	    CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
+	    CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
+	    CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_P9_TLBIE_STQ_BUG | \
+	    CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR)
+#define CPU_FTRS_POWER9_DD2_0 (CPU_FTRS_POWER9 | CPU_FTR_P9_RADIX_PREFETCH_BUG)
+#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | \
+			       CPU_FTR_P9_RADIX_PREFETCH_BUG | \
+			       CPU_FTR_POWER9_DD2_1)
+#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
+			       CPU_FTR_P9_TM_HV_ASSIST | \
+			       CPU_FTR_P9_TM_XER_SO_BUG)
+#define CPU_FTRS_POWER9_DD2_3 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
+			       CPU_FTR_P9_TM_HV_ASSIST | \
+			       CPU_FTR_P9_TM_XER_SO_BUG | \
+			       CPU_FTR_DAWR)
+#define CPU_FTRS_POWER10 (CPU_FTR_LWSYNC | \
+	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
+	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
+	    CPU_FTR_COHERENT_ICACHE | \
+	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
+	    CPU_FTR_DSCR | CPU_FTR_SAO  | \
+	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+	    CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
+	    CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
+	    CPU_FTR_ARCH_300 | CPU_FTR_ARCH_31 | \
+	    CPU_FTR_DAWR | CPU_FTR_DAWR1 | \
+	    CPU_FTR_DEXCR_NPHIE)
+#define CPU_FTRS_CELL	(CPU_FTR_LWSYNC | \
+	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
+	    CPU_FTR_PAUSE_ZERO  | CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \
+	    CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_DABRX)
+#define CPU_FTRS_PA6T (CPU_FTR_LWSYNC | \
+	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | \
+	    CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_DABRX)
+#define CPU_FTRS_COMPATIBLE	(CPU_FTR_PPCAS_ARCH_V2)
+
+#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3E_64
+#define CPU_FTRS_POSSIBLE	(CPU_FTRS_E6500 | CPU_FTRS_E5500)
+#else
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define CPU_FTRS_POSSIBLE	\
+	    (CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \
+	     CPU_FTR_ALTIVEC_COMP | CPU_FTR_VSX_COMP | CPU_FTRS_POWER9 | \
+	     CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | \
+	     CPU_FTRS_POWER9_DD2_3 | CPU_FTRS_POWER10)
+#else
+#define CPU_FTRS_POSSIBLE	\
+	    (CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
+	     CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
+	     CPU_FTRS_POWER8 | CPU_FTRS_CELL | CPU_FTRS_PA6T | \
+	     CPU_FTR_VSX_COMP | CPU_FTR_ALTIVEC_COMP | CPU_FTRS_POWER9 | \
+	     CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | \
+	     CPU_FTRS_POWER9_DD2_3 | CPU_FTRS_POWER10)
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
+#endif
+#else
+enum {
+	CPU_FTRS_POSSIBLE =
+#ifdef CONFIG_PPC_BOOK3S_604
+	    CPU_FTRS_604 | CPU_FTRS_740_NOTAU |
+	    CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 |
+	    CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX |
+	    CPU_FTRS_7400_NOTAU | CPU_FTRS_7400 | CPU_FTRS_7450_20 |
+	    CPU_FTRS_7450_21 | CPU_FTRS_7450_23 | CPU_FTRS_7455_1 |
+	    CPU_FTRS_7455_20 | CPU_FTRS_7455 | CPU_FTRS_7447_10 |
+	    CPU_FTRS_7447 | CPU_FTRS_7447A |
+	    CPU_FTRS_CLASSIC32 |
+#endif
+#ifdef CONFIG_PPC_BOOK3S_603
+	    CPU_FTRS_603 | CPU_FTRS_82XX |
+	    CPU_FTRS_G2_LE | CPU_FTRS_E300 | CPU_FTRS_E300C2 |
+#endif
+#ifdef CONFIG_PPC_8xx
+	    CPU_FTRS_8XX |
+#endif
+#ifdef CONFIG_40x
+	    CPU_FTRS_40X |
+#endif
+#ifdef CONFIG_PPC_47x
+	    CPU_FTRS_47X | CPU_FTR_476_DD2 |
+#elif defined(CONFIG_44x)
+	    CPU_FTRS_44X | CPU_FTRS_440x6 |
+#endif
+#ifdef CONFIG_PPC_E500
+	    CPU_FTRS_E500 | CPU_FTRS_E500_2 |
+#endif
+#ifdef CONFIG_PPC_E500MC
+	    CPU_FTRS_E500MC | CPU_FTRS_E5500 | CPU_FTRS_E6500 |
+#endif
+	    0,
+};
+#endif /* __powerpc64__ */
+
+#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3E_64
+#define CPU_FTRS_ALWAYS		(CPU_FTRS_E6500 & CPU_FTRS_E5500)
+#else
+
+#ifdef CONFIG_PPC_DT_CPU_FTRS
+#define CPU_FTRS_DT_CPU_BASE			\
+	(CPU_FTR_LWSYNC |			\
+	 CPU_FTR_FPU_UNAVAILABLE |		\
+	 CPU_FTR_NOEXECUTE |			\
+	 CPU_FTR_COHERENT_ICACHE |		\
+	 CPU_FTR_STCX_CHECKS_ADDRESS |		\
+	 CPU_FTR_POPCNTB | CPU_FTR_POPCNTD |	\
+	 CPU_FTR_DAWR |				\
+	 CPU_FTR_ARCH_206 |			\
+	 CPU_FTR_ARCH_207S)
+#else
+#define CPU_FTRS_DT_CPU_BASE	(~0ul)
+#endif
+
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define CPU_FTRS_ALWAYS \
+	    (CPU_FTRS_POSSIBLE & ~CPU_FTR_HVMODE & CPU_FTRS_POWER7 & \
+	     CPU_FTRS_POWER8E & CPU_FTRS_POWER8 & CPU_FTRS_POWER9 & \
+	     CPU_FTRS_POWER9_DD2_1 & CPU_FTRS_POWER9_DD2_2 & \
+	     CPU_FTRS_POWER10 & CPU_FTRS_DT_CPU_BASE)
+#else
+#define CPU_FTRS_ALWAYS		\
+	    (CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
+	     CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \
+	     CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \
+	     ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE & CPU_FTRS_POWER9 & \
+	     CPU_FTRS_POWER9_DD2_1 & CPU_FTRS_POWER9_DD2_2 & \
+	     CPU_FTRS_POWER10 & CPU_FTRS_DT_CPU_BASE)
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
+#endif
+#else
+enum {
+	CPU_FTRS_ALWAYS =
+#ifdef CONFIG_PPC_BOOK3S_604
+	    CPU_FTRS_604 & CPU_FTRS_740_NOTAU &
+	    CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 &
+	    CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX &
+	    CPU_FTRS_7400_NOTAU & CPU_FTRS_7400 & CPU_FTRS_7450_20 &
+	    CPU_FTRS_7450_21 & CPU_FTRS_7450_23 & CPU_FTRS_7455_1 &
+	    CPU_FTRS_7455_20 & CPU_FTRS_7455 & CPU_FTRS_7447_10 &
+	    CPU_FTRS_7447 & CPU_FTRS_7447A &
+	    CPU_FTRS_CLASSIC32 &
+#endif
+#ifdef CONFIG_PPC_BOOK3S_603
+	    CPU_FTRS_603 & CPU_FTRS_82XX &
+	    CPU_FTRS_G2_LE & CPU_FTRS_E300 & CPU_FTRS_E300C2 &
+#endif
+#ifdef CONFIG_PPC_8xx
+	    CPU_FTRS_8XX &
+#endif
+#ifdef CONFIG_40x
+	    CPU_FTRS_40X &
+#endif
+#ifdef CONFIG_PPC_47x
+	    CPU_FTRS_47X &
+#elif defined(CONFIG_44x)
+	    CPU_FTRS_44X & CPU_FTRS_440x6 &
+#endif
+#ifdef CONFIG_PPC_E500
+	    CPU_FTRS_E500 & CPU_FTRS_E500_2 &
+#endif
+#ifdef CONFIG_PPC_E500MC
+	    CPU_FTRS_E500MC & CPU_FTRS_E5500 & CPU_FTRS_E6500 &
+#endif
+	    ~CPU_FTR_EMB_HV &	/* can be removed at runtime */
+	    CPU_FTRS_POSSIBLE,
+};
+#endif /* __powerpc64__ */
+
+/*
+ * Maximum number of hw breakpoint supported on powerpc. Number of
+ * breakpoints supported by actual hw might be less than this, which
+ * is decided at run time in nr_wp_slots().
+ */
+#define HBP_NUM_MAX	2
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_POWERPC_CPUTABLE_H */
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
new file mode 100644
index 0000000000..f26c430f39
--- /dev/null
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_CPUTHREADS_H
+#define _ASM_POWERPC_CPUTHREADS_H
+
+#ifndef __ASSEMBLY__
+#include <linux/cpumask.h>
+#include <asm/cpu_has_feature.h>
+
+/*
+ * Mapping of threads to cores
+ *
+ * Note: This implementation is limited to a power of 2 number of
+ * threads per core and the same number for each core in the system
+ * (though it would work if some processors had less threads as long
+ * as the CPU numbers are still allocated, just not brought online).
+ *
+ * However, the API allows for a different implementation in the future
+ * if needed, as long as you only use the functions and not the variables
+ * directly.
+ */
+
+#ifdef CONFIG_SMP
+extern int threads_per_core;
+extern int threads_per_subcore;
+extern int threads_shift;
+extern cpumask_t threads_core_mask;
+#else
+#define threads_per_core	1
+#define threads_per_subcore	1
+#define threads_shift		0
+#define has_big_cores		0
+#define threads_core_mask	(*get_cpu_mask(0))
+#endif
+
+static inline int cpu_nr_cores(void)
+{
+	return nr_cpu_ids >> threads_shift;
+}
+
+#ifdef CONFIG_SMP
+int cpu_core_index_of_thread(int cpu);
+int cpu_first_thread_of_core(int core);
+#else
+static inline int cpu_core_index_of_thread(int cpu) { return cpu; }
+static inline int cpu_first_thread_of_core(int core) { return core; }
+#endif
+
+static inline int cpu_thread_in_core(int cpu)
+{
+	return cpu & (threads_per_core - 1);
+}
+
+static inline int cpu_thread_in_subcore(int cpu)
+{
+	return cpu & (threads_per_subcore - 1);
+}
+
+static inline int cpu_first_thread_sibling(int cpu)
+{
+	return cpu & ~(threads_per_core - 1);
+}
+
+static inline int cpu_last_thread_sibling(int cpu)
+{
+	return cpu | (threads_per_core - 1);
+}
+
+/*
+ * tlb_thread_siblings are siblings which share a TLB. This is not
+ * architected, is not something a hypervisor could emulate and a future
+ * CPU may change behaviour even in compat mode, so this should only be
+ * used on PowerNV, and only with care.
+ */
+static inline int cpu_first_tlb_thread_sibling(int cpu)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
+		return cpu & ~0x6;	/* Big Core */
+	else
+		return cpu_first_thread_sibling(cpu);
+}
+
+static inline int cpu_last_tlb_thread_sibling(int cpu)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
+		return cpu | 0x6;	/* Big Core */
+	else
+		return cpu_last_thread_sibling(cpu);
+}
+
+static inline int cpu_tlb_thread_sibling_step(void)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
+		return 2;		/* Big Core */
+	else
+		return 1;
+}
+
+static inline u32 get_tensr(void)
+{
+#ifdef	CONFIG_BOOKE
+	if (cpu_has_feature(CPU_FTR_SMT))
+		return mfspr(SPRN_TENSR);
+#endif
+	return 1;
+}
+
+void book3e_start_thread(int thread, unsigned long addr);
+void book3e_stop_thread(int thread);
+
+#endif /* __ASSEMBLY__ */
+
+#define INVALID_THREAD_HWID	0x0fff
+
+#endif /* _ASM_POWERPC_CPUTHREADS_H */
+
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
new file mode 100644
index 0000000000..4961fb38e4
--- /dev/null
+++ b/arch/powerpc/include/asm/cputime.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Definitions for measuring cputime on powerpc machines.
+ *
+ * Copyright (C) 2006 Paul Mackerras, IBM Corp.
+ *
+ * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in
+ * the same units as the timebase.  Otherwise we measure cpu time
+ * in jiffies using the generic definitions.
+ */
+
+#ifndef __POWERPC_CPUTIME_H
+#define __POWERPC_CPUTIME_H
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+
+#include <linux/types.h>
+#include <linux/time.h>
+#include <asm/div64.h>
+#include <asm/time.h>
+#include <asm/param.h>
+#include <asm/firmware.h>
+
+#ifdef __KERNEL__
+#define cputime_to_nsecs(cputime) tb_to_ns(cputime)
+
+/*
+ * PPC64 uses PACA which is task independent for storing accounting data while
+ * PPC32 uses struct thread_info, therefore at task switch the accounting data
+ * has to be populated in the new task
+ */
+#ifdef CONFIG_PPC64
+#define get_accounting(tsk)	(&get_paca()->accounting)
+#define raw_get_accounting(tsk)	(&local_paca->accounting)
+static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
+
+#else
+#define get_accounting(tsk)	(&task_thread_info(tsk)->accounting)
+#define raw_get_accounting(tsk)	get_accounting(tsk)
+/*
+ * Called from the context switch with interrupts disabled, to charge all
+ * accumulated times to the current process, and to prepare accounting on
+ * the next process.
+ */
+static inline void arch_vtime_task_switch(struct task_struct *prev)
+{
+	struct cpu_accounting_data *acct = get_accounting(current);
+	struct cpu_accounting_data *acct0 = get_accounting(prev);
+
+	acct->starttime = acct0->starttime;
+}
+#endif
+
+/*
+ * account_cpu_user_entry/exit runs "unreconciled", so can't trace,
+ * can't use get_paca()
+ */
+static notrace inline void account_cpu_user_entry(void)
+{
+	unsigned long tb = mftb();
+	struct cpu_accounting_data *acct = raw_get_accounting(current);
+
+	acct->utime += (tb - acct->starttime_user);
+	acct->starttime = tb;
+}
+
+static notrace inline void account_cpu_user_exit(void)
+{
+	unsigned long tb = mftb();
+	struct cpu_accounting_data *acct = raw_get_accounting(current);
+
+	acct->stime += (tb - acct->starttime);
+	acct->starttime_user = tb;
+}
+
+static notrace inline void account_stolen_time(void)
+{
+#ifdef CONFIG_PPC_SPLPAR
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		struct lppaca *lp = local_paca->lppaca_ptr;
+
+		if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx)))
+			pseries_accumulate_stolen_time();
+	}
+#endif
+}
+
+#endif /* __KERNEL__ */
+#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+static inline void account_cpu_user_entry(void)
+{
+}
+static inline void account_cpu_user_exit(void)
+{
+}
+static notrace inline void account_stolen_time(void)
+{
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+#endif /* __POWERPC_CPUTIME_H */
diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h b/arch/powerpc/include/asm/crashdump-ppc64.h
new file mode 100644
index 0000000000..68d9717cc5
--- /dev/null
+++ b/arch/powerpc/include/asm/crashdump-ppc64.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_CRASHDUMP_PPC64_H
+#define _ASM_POWERPC_CRASHDUMP_PPC64_H
+
+/*
+ * Backup region - first 64KB of System RAM
+ *
+ * If ever the below macros are to be changed, please be judicious.
+ * The implicit assumptions are:
+ *     - start, end & size are less than UINT32_MAX.
+ *     - start & size are at least 8 byte aligned.
+ *
+ * For implementation details: arch/powerpc/purgatory/trampoline_64.S
+ */
+#define BACKUP_SRC_START	0
+#define BACKUP_SRC_END		0xffff
+#define BACKUP_SRC_SIZE		(BACKUP_SRC_END - BACKUP_SRC_START + 1)
+
+#endif /* __ASM_POWERPC_CRASHDUMP_PPC64_H */
diff --git a/arch/powerpc/include/asm/current.h b/arch/powerpc/include/asm/current.h
new file mode 100644
index 0000000000..bbfb948004
--- /dev/null
+++ b/arch/powerpc/include/asm/current.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_CURRENT_H
+#define _ASM_POWERPC_CURRENT_H
+#ifdef __KERNEL__
+
+/*
+ */
+
+struct task_struct;
+
+#ifdef __powerpc64__
+#include <linux/stddef.h>
+#include <asm/paca.h>
+
+static inline struct task_struct *get_current(void)
+{
+	struct task_struct *task;
+
+	/* get_current can be cached by the compiler, so no volatile */
+	asm ("ld %0,%1(13)"
+	: "=r" (task)
+	: "i" (offsetof(struct paca_struct, __current)));
+
+	return task;
+}
+#define current	get_current()
+
+#else
+
+/*
+ * We keep `current' in r2 for speed.
+ */
+register struct task_struct *current asm ("r2");
+
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_CURRENT_H */
diff --git a/arch/powerpc/include/asm/dbdma.h b/arch/powerpc/include/asm/dbdma.h
new file mode 100644
index 0000000000..4785c1716b
--- /dev/null
+++ b/arch/powerpc/include/asm/dbdma.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Definitions for using the Apple Descriptor-Based DMA controller
+ * in Power Macintosh computers.
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ */
+
+#ifdef __KERNEL__
+#ifndef _ASM_DBDMA_H_
+#define _ASM_DBDMA_H_
+/*
+ * DBDMA control/status registers.  All little-endian.
+ */
+struct dbdma_regs {
+    unsigned int control;	/* lets you change bits in status */
+    unsigned int status;	/* DMA and device status bits (see below) */
+    unsigned int cmdptr_hi;	/* upper 32 bits of command address */
+    unsigned int cmdptr;	/* (lower 32 bits of) command address (phys) */
+    unsigned int intr_sel;	/* select interrupt condition bit */
+    unsigned int br_sel;	/* select branch condition bit */
+    unsigned int wait_sel;	/* select wait condition bit */
+    unsigned int xfer_mode;
+    unsigned int data2ptr_hi;
+    unsigned int data2ptr;
+    unsigned int res1;
+    unsigned int address_hi;
+    unsigned int br_addr_hi;
+    unsigned int res2[3];
+};
+
+/* Bits in control and status registers */
+#define RUN	0x8000
+#define PAUSE	0x4000
+#define FLUSH	0x2000
+#define WAKE	0x1000
+#define DEAD	0x0800
+#define ACTIVE	0x0400
+#define BT	0x0100
+#define DEVSTAT	0x00ff
+
+/*
+ * DBDMA command structure.  These fields are all little-endian!
+ */
+struct dbdma_cmd {
+	__le16 req_count;	/* requested byte transfer count */
+	__le16 command;		/* command word (has bit-fields) */
+	__le32 phy_addr;	/* physical data address */
+	__le32 cmd_dep;		/* command-dependent field */
+	__le16 res_count;	/* residual count after completion */
+	__le16 xfer_status;	/* transfer status */
+};
+
+/* DBDMA command values in command field */
+#define OUTPUT_MORE	0	/* transfer memory data to stream */
+#define OUTPUT_LAST	0x1000	/* ditto followed by end marker */
+#define INPUT_MORE	0x2000	/* transfer stream data to memory */
+#define INPUT_LAST	0x3000	/* ditto, expect end marker */
+#define STORE_WORD	0x4000	/* write word (4 bytes) to device reg */
+#define LOAD_WORD	0x5000	/* read word (4 bytes) from device reg */
+#define DBDMA_NOP	0x6000	/* do nothing */
+#define DBDMA_STOP	0x7000	/* suspend processing */
+
+/* Key values in command field */
+#define KEY_STREAM0	0	/* usual data stream */
+#define KEY_STREAM1	0x100	/* control/status stream */
+#define KEY_STREAM2	0x200	/* device-dependent stream */
+#define KEY_STREAM3	0x300	/* device-dependent stream */
+#define KEY_REGS	0x500	/* device register space */
+#define KEY_SYSTEM	0x600	/* system memory-mapped space */
+#define KEY_DEVICE	0x700	/* device memory-mapped space */
+
+/* Interrupt control values in command field */
+#define INTR_NEVER	0	/* don't interrupt */
+#define INTR_IFSET	0x10	/* intr if condition bit is 1 */
+#define INTR_IFCLR	0x20	/* intr if condition bit is 0 */
+#define INTR_ALWAYS	0x30	/* always interrupt */
+
+/* Branch control values in command field */
+#define BR_NEVER	0	/* don't branch */
+#define BR_IFSET	0x4	/* branch if condition bit is 1 */
+#define BR_IFCLR	0x8	/* branch if condition bit is 0 */
+#define BR_ALWAYS	0xc	/* always branch */
+
+/* Wait control values in command field */
+#define WAIT_NEVER	0	/* don't wait */
+#define WAIT_IFSET	1	/* wait if condition bit is 1 */
+#define WAIT_IFCLR	2	/* wait if condition bit is 0 */
+#define WAIT_ALWAYS	3	/* always wait */
+
+/* Align an address for a DBDMA command structure */
+#define DBDMA_ALIGN(x)	(((unsigned long)(x) + sizeof(struct dbdma_cmd) - 1) \
+			 & -sizeof(struct dbdma_cmd))
+
+/* Useful macros */
+#define DBDMA_DO_STOP(regs) do {				\
+	out_le32(&((regs)->control), (RUN|FLUSH)<<16);		\
+	while(in_le32(&((regs)->status)) & (ACTIVE|FLUSH))	\
+		; \
+} while(0)
+
+#define DBDMA_DO_RESET(regs) do {				\
+	out_le32(&((regs)->control), (ACTIVE|DEAD|WAKE|FLUSH|PAUSE|RUN)<<16);\
+	while(in_le32(&((regs)->status)) & (RUN)) \
+		; \
+} while(0)
+
+#endif /* _ASM_DBDMA_H_ */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
new file mode 100644
index 0000000000..3e9da22a27
--- /dev/null
+++ b/arch/powerpc/include/asm/dbell.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2009 Freescale Semiconductor, Inc.
+ *
+ * provides masks and opcode images for use by code generation, emulation
+ * and for instructions that older assemblers might not know about
+ */
+#ifndef _ASM_POWERPC_DBELL_H
+#define _ASM_POWERPC_DBELL_H
+
+#include <linux/smp.h>
+#include <linux/threads.h>
+
+#include <asm/cputhreads.h>
+#include <asm/ppc-opcode.h>
+#include <asm/feature-fixups.h>
+#include <asm/kvm_ppc.h>
+
+#define PPC_DBELL_MSG_BRDCAST	(0x04000000)
+#define PPC_DBELL_TYPE(x)	(((x) & 0xf) << (63-36))
+#define PPC_DBELL_TYPE_MASK	PPC_DBELL_TYPE(0xf)
+#define PPC_DBELL_LPID(x)	((x) << (63 - 49))
+#define PPC_DBELL_PIR_MASK	0x3fff
+enum ppc_dbell {
+	PPC_DBELL = 0,		/* doorbell */
+	PPC_DBELL_CRIT = 1,	/* critical doorbell */
+	PPC_G_DBELL = 2,	/* guest doorbell */
+	PPC_G_DBELL_CRIT = 3,	/* guest critical doorbell */
+	PPC_G_DBELL_MC = 4,	/* guest mcheck doorbell */
+	PPC_DBELL_SERVER = 5,	/* doorbell on server */
+};
+
+#ifdef CONFIG_PPC_BOOK3S
+
+#define PPC_DBELL_MSGTYPE		PPC_DBELL_SERVER
+
+static inline void _ppc_msgsnd(u32 msg)
+{
+	__asm__ __volatile__ (ASM_FTR_IFSET(PPC_MSGSND(%1), PPC_MSGSNDP(%1), %0)
+				: : "i" (CPU_FTR_HVMODE), "r" (msg));
+}
+
+/* sync before sending message */
+static inline void ppc_msgsnd_sync(void)
+{
+	__asm__ __volatile__ ("sync" : : : "memory");
+}
+
+/* sync after taking message interrupt */
+static inline void ppc_msgsync(void)
+{
+	/* sync is not required when taking messages from the same core */
+	__asm__ __volatile__ (ASM_FTR_IFSET(PPC_MSGSYNC " ; lwsync", "", %0)
+				: : "i" (CPU_FTR_HVMODE|CPU_FTR_ARCH_300));
+}
+
+static inline void _ppc_msgclr(u32 msg)
+{
+	__asm__ __volatile__ (ASM_FTR_IFSET(PPC_MSGCLR(%1), PPC_MSGCLRP(%1), %0)
+				: : "i" (CPU_FTR_HVMODE), "r" (msg));
+}
+
+static inline void ppc_msgclr(enum ppc_dbell type)
+{
+	u32 msg = PPC_DBELL_TYPE(type);
+
+	_ppc_msgclr(msg);
+}
+
+#else /* CONFIG_PPC_BOOK3S */
+
+#define PPC_DBELL_MSGTYPE		PPC_DBELL
+
+static inline void _ppc_msgsnd(u32 msg)
+{
+	__asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+}
+
+/* sync before sending message */
+static inline void ppc_msgsnd_sync(void)
+{
+	__asm__ __volatile__ ("sync" : : : "memory");
+}
+
+/* sync after taking message interrupt */
+static inline void ppc_msgsync(void)
+{
+}
+
+#endif /* CONFIG_PPC_BOOK3S */
+
+extern void doorbell_exception(struct pt_regs *regs);
+
+static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag)
+{
+	u32 msg = PPC_DBELL_TYPE(type) | (flags & PPC_DBELL_MSG_BRDCAST) |
+			(tag & 0x07ffffff);
+
+	_ppc_msgsnd(msg);
+}
+
+#ifdef CONFIG_SMP
+
+/*
+ * Doorbells must only be used if CPU_FTR_DBELL is available.
+ * msgsnd is used in HV, and msgsndp is used in !HV.
+ *
+ * These should be used by platform code that is aware of restrictions.
+ * Other arch code should use ->cause_ipi.
+ *
+ * doorbell_global_ipi() sends a dbell to any target CPU.
+ * Must be used only by architectures that address msgsnd target
+ * by PIR/get_hard_smp_processor_id.
+ */
+static inline void doorbell_global_ipi(int cpu)
+{
+	u32 tag = get_hard_smp_processor_id(cpu);
+
+	kvmppc_set_host_ipi(cpu);
+	/* Order previous accesses vs. msgsnd, which is treated as a store */
+	ppc_msgsnd_sync();
+	ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
+}
+
+/*
+ * doorbell_core_ipi() sends a dbell to a target CPU in the same core.
+ * Must be used only by architectures that address msgsnd target
+ * by TIR/cpu_thread_in_core.
+ */
+static inline void doorbell_core_ipi(int cpu)
+{
+	u32 tag = cpu_thread_in_core(cpu);
+
+	kvmppc_set_host_ipi(cpu);
+	/* Order previous accesses vs. msgsnd, which is treated as a store */
+	ppc_msgsnd_sync();
+	ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
+}
+
+/*
+ * Attempt to cause a core doorbell if destination is on the same core.
+ * Returns 1 on success, 0 on failure.
+ */
+static inline int doorbell_try_core_ipi(int cpu)
+{
+	int this_cpu = get_cpu();
+	int ret = 0;
+
+	if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
+		doorbell_core_ipi(cpu);
+		ret = 1;
+	}
+
+	put_cpu();
+
+	return ret;
+}
+
+#endif /* CONFIG_SMP */
+
+#endif /* _ASM_POWERPC_DBELL_H */
diff --git a/arch/powerpc/include/asm/dcr-generic.h b/arch/powerpc/include/asm/dcr-generic.h
new file mode 100644
index 0000000000..099c28dd40
--- /dev/null
+++ b/arch/powerpc/include/asm/dcr-generic.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * (c) Copyright 2006 Benjamin Herrenschmidt, IBM Corp.
+ *                    <benh@kernel.crashing.org>
+ */
+
+#ifndef _ASM_POWERPC_DCR_GENERIC_H
+#define _ASM_POWERPC_DCR_GENERIC_H
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+enum host_type_t {DCR_HOST_MMIO, DCR_HOST_NATIVE, DCR_HOST_INVALID};
+
+typedef struct {
+	enum host_type_t type;
+	union {
+		dcr_host_mmio_t mmio;
+		dcr_host_native_t native;
+	} host;
+} dcr_host_t;
+
+extern bool dcr_map_ok_generic(dcr_host_t host);
+
+extern dcr_host_t dcr_map_generic(struct device_node *dev, unsigned int dcr_n,
+			  unsigned int dcr_c);
+extern void dcr_unmap_generic(dcr_host_t host, unsigned int dcr_c);
+
+extern u32 dcr_read_generic(dcr_host_t host, unsigned int dcr_n);
+
+extern void dcr_write_generic(dcr_host_t host, unsigned int dcr_n, u32 value);
+
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_DCR_GENERIC_H */
+
+
diff --git a/arch/powerpc/include/asm/dcr-mmio.h b/arch/powerpc/include/asm/dcr-mmio.h
new file mode 100644
index 0000000000..fc6d93ef4a
--- /dev/null
+++ b/arch/powerpc/include/asm/dcr-mmio.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * (c) Copyright 2006 Benjamin Herrenschmidt, IBM Corp.
+ *                    <benh@kernel.crashing.org>
+ */
+
+#ifndef _ASM_POWERPC_DCR_MMIO_H
+#define _ASM_POWERPC_DCR_MMIO_H
+#ifdef __KERNEL__
+
+#include <asm/io.h>
+
+typedef struct {
+	void __iomem *token;
+	unsigned int stride;
+	unsigned int base;
+} dcr_host_mmio_t;
+
+static inline bool dcr_map_ok_mmio(dcr_host_mmio_t host)
+{
+	return host.token != NULL;
+}
+
+extern dcr_host_mmio_t dcr_map_mmio(struct device_node *dev,
+				    unsigned int dcr_n,
+				    unsigned int dcr_c);
+extern void dcr_unmap_mmio(dcr_host_mmio_t host, unsigned int dcr_c);
+
+static inline u32 dcr_read_mmio(dcr_host_mmio_t host, unsigned int dcr_n)
+{
+	return in_be32(host.token + ((host.base + dcr_n) * host.stride));
+}
+
+static inline void dcr_write_mmio(dcr_host_mmio_t host,
+				  unsigned int dcr_n,
+				  u32 value)
+{
+	out_be32(host.token + ((host.base + dcr_n) * host.stride), value);
+}
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_DCR_MMIO_H */
+
+
diff --git a/arch/powerpc/include/asm/dcr-native.h b/arch/powerpc/include/asm/dcr-native.h
new file mode 100644
index 0000000000..a920599645
--- /dev/null
+++ b/arch/powerpc/include/asm/dcr-native.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * (c) Copyright 2006 Benjamin Herrenschmidt, IBM Corp.
+ *                    <benh@kernel.crashing.org>
+ */
+
+#ifndef _ASM_POWERPC_DCR_NATIVE_H
+#define _ASM_POWERPC_DCR_NATIVE_H
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+#include <linux/spinlock.h>
+#include <asm/cputable.h>
+#include <asm/cpu_has_feature.h>
+#include <linux/stringify.h>
+
+typedef struct {
+	unsigned int base;
+} dcr_host_native_t;
+
+static inline bool dcr_map_ok_native(dcr_host_native_t host)
+{
+	return true;
+}
+
+#define dcr_map_native(dev, dcr_n, dcr_c) \
+	((dcr_host_native_t){ .base = (dcr_n) })
+#define dcr_unmap_native(host, dcr_c)		do {} while (0)
+#define dcr_read_native(host, dcr_n)		mfdcr(dcr_n + host.base)
+#define dcr_write_native(host, dcr_n, value)	mtdcr(dcr_n + host.base, value)
+
+/* Table based DCR accessors */
+extern void __mtdcr(unsigned int reg, unsigned int val);
+extern unsigned int __mfdcr(unsigned int reg);
+
+/* mfdcrx/mtdcrx instruction based accessors. We hand code
+ * the opcodes in order not to depend on newer binutils
+ */
+static inline unsigned int mfdcrx(unsigned int reg)
+{
+	unsigned int ret;
+	asm volatile(".long 0x7c000206 | (%0 << 21) | (%1 << 16)"
+		     : "=r" (ret) : "r" (reg));
+	return ret;
+}
+
+static inline void mtdcrx(unsigned int reg, unsigned int val)
+{
+	asm volatile(".long 0x7c000306 | (%0 << 21) | (%1 << 16)"
+		     : : "r" (val), "r" (reg));
+}
+
+#define mfdcr(rn)						\
+	({unsigned int rval;					\
+	if (__builtin_constant_p(rn) && rn < 1024)		\
+		asm volatile("mfdcr %0, %1" : "=r" (rval)	\
+			      : "n" (rn));			\
+	else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR)))	\
+		rval = mfdcrx(rn);				\
+	else							\
+		rval = __mfdcr(rn);				\
+	rval;})
+
+#define mtdcr(rn, v)						\
+do {								\
+	if (__builtin_constant_p(rn) && rn < 1024)		\
+		asm volatile("mtdcr %0, %1"			\
+			      : : "n" (rn), "r" (v));		\
+	else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR)))	\
+		mtdcrx(rn, v);					\
+	else							\
+		__mtdcr(rn, v);					\
+} while (0)
+
+/* R/W of indirect DCRs make use of standard naming conventions for DCRs */
+extern spinlock_t dcr_ind_lock;
+
+static inline unsigned __mfdcri(int base_addr, int base_data, int reg)
+{
+	unsigned long flags;
+	unsigned int val;
+
+	spin_lock_irqsave(&dcr_ind_lock, flags);
+	if (cpu_has_feature(CPU_FTR_INDEXED_DCR)) {
+		mtdcrx(base_addr, reg);
+		val = mfdcrx(base_data);
+	} else {
+		__mtdcr(base_addr, reg);
+		val = __mfdcr(base_data);
+	}
+	spin_unlock_irqrestore(&dcr_ind_lock, flags);
+	return val;
+}
+
+static inline void __mtdcri(int base_addr, int base_data, int reg,
+			    unsigned val)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dcr_ind_lock, flags);
+	if (cpu_has_feature(CPU_FTR_INDEXED_DCR)) {
+		mtdcrx(base_addr, reg);
+		mtdcrx(base_data, val);
+	} else {
+		__mtdcr(base_addr, reg);
+		__mtdcr(base_data, val);
+	}
+	spin_unlock_irqrestore(&dcr_ind_lock, flags);
+}
+
+static inline void __dcri_clrset(int base_addr, int base_data, int reg,
+				 unsigned clr, unsigned set)
+{
+	unsigned long flags;
+	unsigned int val;
+
+	spin_lock_irqsave(&dcr_ind_lock, flags);
+	if (cpu_has_feature(CPU_FTR_INDEXED_DCR)) {
+		mtdcrx(base_addr, reg);
+		val = (mfdcrx(base_data) & ~clr) | set;
+		mtdcrx(base_data, val);
+	} else {
+		__mtdcr(base_addr, reg);
+		val = (__mfdcr(base_data) & ~clr) | set;
+		__mtdcr(base_data, val);
+	}
+	spin_unlock_irqrestore(&dcr_ind_lock, flags);
+}
+
+#define mfdcri(base, reg)	__mfdcri(DCRN_ ## base ## _CONFIG_ADDR,	\
+					 DCRN_ ## base ## _CONFIG_DATA,	\
+					 reg)
+
+#define mtdcri(base, reg, data)	__mtdcri(DCRN_ ## base ## _CONFIG_ADDR,	\
+					 DCRN_ ## base ## _CONFIG_DATA,	\
+					 reg, data)
+
+#define dcri_clrset(base, reg, clr, set)	__dcri_clrset(DCRN_ ## base ## _CONFIG_ADDR,	\
+							      DCRN_ ## base ## _CONFIG_DATA,	\
+							      reg, clr, set)
+
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_DCR_NATIVE_H */
diff --git a/arch/powerpc/include/asm/dcr-regs.h b/arch/powerpc/include/asm/dcr-regs.h
new file mode 100644
index 0000000000..5c1a4973f4
--- /dev/null
+++ b/arch/powerpc/include/asm/dcr-regs.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common DCR / SDR / CPR register definitions used on various IBM/AMCC
+ * 4xx processors
+ *
+ *    Copyright 2007 Benjamin Herrenschmidt, IBM Corp
+ *                   <benh@kernel.crashing.org>
+ *
+ * Mostly lifted from asm-ppc/ibm4xx.h by
+ *
+ *    Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
+ *
+ */
+
+#ifndef __DCR_REGS_H__
+#define __DCR_REGS_H__
+
+/*
+ * Most DCRs used for controlling devices such as the MAL, DMA engine,
+ * etc... are obtained for the device tree.
+ *
+ * The definitions in this files are fixed DCRs and indirect DCRs that
+ * are commonly used outside of specific drivers or refer to core
+ * common registers that may occasionally have to be tweaked outside
+ * of the driver main register set
+ */
+
+/* CPRs (440GX and 440SP/440SPe) */
+#define DCRN_CPR0_CONFIG_ADDR	0xc
+#define DCRN_CPR0_CONFIG_DATA	0xd
+
+/* SDRs (440GX and 440SP/440SPe) */
+#define DCRN_SDR0_CONFIG_ADDR 	0xe
+#define DCRN_SDR0_CONFIG_DATA	0xf
+
+#define SDR0_PFC0		0x4100
+#define SDR0_PFC1		0x4101
+#define SDR0_PFC1_EPS		0x1c00000
+#define SDR0_PFC1_EPS_SHIFT	22
+#define SDR0_PFC1_RMII		0x02000000
+#define SDR0_MFR		0x4300
+#define SDR0_MFR_TAH0 		0x80000000  	/* TAHOE0 Enable */
+#define SDR0_MFR_TAH1 		0x40000000  	/* TAHOE1 Enable */
+#define SDR0_MFR_PCM  		0x10000000  	/* PPC440GP irq compat mode */
+#define SDR0_MFR_ECS  		0x08000000  	/* EMAC int clk */
+#define SDR0_MFR_T0TXFL		0x00080000
+#define SDR0_MFR_T0TXFH		0x00040000
+#define SDR0_MFR_T1TXFL		0x00020000
+#define SDR0_MFR_T1TXFH		0x00010000
+#define SDR0_MFR_E0TXFL		0x00008000
+#define SDR0_MFR_E0TXFH		0x00004000
+#define SDR0_MFR_E0RXFL		0x00002000
+#define SDR0_MFR_E0RXFH		0x00001000
+#define SDR0_MFR_E1TXFL		0x00000800
+#define SDR0_MFR_E1TXFH		0x00000400
+#define SDR0_MFR_E1RXFL		0x00000200
+#define SDR0_MFR_E1RXFH		0x00000100
+#define SDR0_MFR_E2TXFL		0x00000080
+#define SDR0_MFR_E2TXFH		0x00000040
+#define SDR0_MFR_E2RXFL		0x00000020
+#define SDR0_MFR_E2RXFH		0x00000010
+#define SDR0_MFR_E3TXFL		0x00000008
+#define SDR0_MFR_E3TXFH		0x00000004
+#define SDR0_MFR_E3RXFL		0x00000002
+#define SDR0_MFR_E3RXFH		0x00000001
+#define SDR0_UART0		0x0120
+#define SDR0_UART1		0x0121
+#define SDR0_UART2		0x0122
+#define SDR0_UART3		0x0123
+#define SDR0_CUST0		0x4000
+
+/* SDR for 405EZ */
+#define DCRN_SDR_ICINTSTAT	0x4510
+#define ICINTSTAT_ICRX	0x80000000
+#define ICINTSTAT_ICTX0	0x40000000
+#define ICINTSTAT_ICTX1 0x20000000
+#define ICINTSTAT_ICTX	0x60000000
+
+/* SDRs (460EX/460GT) */
+#define SDR0_ETH_CFG		0x4103
+#define SDR0_ETH_CFG_ECS	0x00000100	/* EMAC int clk source */
+
+/*
+ * All those DCR register addresses are offsets from the base address
+ * for the SRAM0 controller (e.g. 0x20 on 440GX). The base address is
+ * excluded here and configured in the device tree.
+ */
+#define DCRN_SRAM0_SB0CR	0x00
+#define DCRN_SRAM0_SB1CR	0x01
+#define DCRN_SRAM0_SB2CR	0x02
+#define DCRN_SRAM0_SB3CR	0x03
+#define  SRAM_SBCR_BU_MASK	0x00000180
+#define  SRAM_SBCR_BS_64KB	0x00000800
+#define  SRAM_SBCR_BU_RO	0x00000080
+#define  SRAM_SBCR_BU_RW	0x00000180
+#define DCRN_SRAM0_BEAR		0x04
+#define DCRN_SRAM0_BESR0	0x05
+#define DCRN_SRAM0_BESR1	0x06
+#define DCRN_SRAM0_PMEG		0x07
+#define DCRN_SRAM0_CID		0x08
+#define DCRN_SRAM0_REVID	0x09
+#define DCRN_SRAM0_DPC		0x0a
+#define  SRAM_DPC_ENABLE	0x80000000
+
+/*
+ * All those DCR register addresses are offsets from the base address
+ * for the SRAM0 controller (e.g. 0x30 on 440GX). The base address is
+ * excluded here and configured in the device tree.
+ */
+#define DCRN_L2C0_CFG		0x00
+#define  L2C_CFG_L2M		0x80000000
+#define  L2C_CFG_ICU		0x40000000
+#define  L2C_CFG_DCU		0x20000000
+#define  L2C_CFG_DCW_MASK	0x1e000000
+#define  L2C_CFG_TPC		0x01000000
+#define  L2C_CFG_CPC		0x00800000
+#define  L2C_CFG_FRAN		0x00200000
+#define  L2C_CFG_SS_MASK	0x00180000
+#define  L2C_CFG_SS_256		0x00000000
+#define  L2C_CFG_CPIM		0x00040000
+#define  L2C_CFG_TPIM		0x00020000
+#define  L2C_CFG_LIM		0x00010000
+#define  L2C_CFG_PMUX_MASK	0x00007000
+#define  L2C_CFG_PMUX_SNP	0x00000000
+#define  L2C_CFG_PMUX_IF	0x00001000
+#define  L2C_CFG_PMUX_DF	0x00002000
+#define  L2C_CFG_PMUX_DS	0x00003000
+#define  L2C_CFG_PMIM		0x00000800
+#define  L2C_CFG_TPEI		0x00000400
+#define  L2C_CFG_CPEI		0x00000200
+#define  L2C_CFG_NAM		0x00000100
+#define  L2C_CFG_SMCM		0x00000080
+#define  L2C_CFG_NBRM		0x00000040
+#define  L2C_CFG_RDBW		0x00000008	/* only 460EX/GT */
+#define DCRN_L2C0_CMD		0x01
+#define  L2C_CMD_CLR		0x80000000
+#define  L2C_CMD_DIAG		0x40000000
+#define  L2C_CMD_INV		0x20000000
+#define  L2C_CMD_CCP		0x10000000
+#define  L2C_CMD_CTE		0x08000000
+#define  L2C_CMD_STRC		0x04000000
+#define  L2C_CMD_STPC		0x02000000
+#define  L2C_CMD_RPMC		0x01000000
+#define  L2C_CMD_HCC		0x00800000
+#define DCRN_L2C0_ADDR		0x02
+#define DCRN_L2C0_DATA		0x03
+#define DCRN_L2C0_SR		0x04
+#define  L2C_SR_CC		0x80000000
+#define  L2C_SR_CPE		0x40000000
+#define  L2C_SR_TPE		0x20000000
+#define  L2C_SR_LRU		0x10000000
+#define  L2C_SR_PCS		0x08000000
+#define DCRN_L2C0_REVID		0x05
+#define DCRN_L2C0_SNP0		0x06
+#define DCRN_L2C0_SNP1		0x07
+#define  L2C_SNP_BA_MASK	0xffff0000
+#define  L2C_SNP_SSR_MASK	0x0000f000
+#define  L2C_SNP_SSR_32G	0x0000f000
+#define  L2C_SNP_ESR		0x00000800
+
+/*
+ * DCR register offsets for 440SP/440SPe I2O/DMA controller.
+ * The base address is configured in the device tree.
+ */
+#define DCRN_I2O0_IBAL		0x006
+#define DCRN_I2O0_IBAH		0x007
+#define I2O_REG_ENABLE		0x00000001	/* Enable I2O/DMA access */
+
+/* 440SP/440SPe Software Reset DCR */
+#define DCRN_SDR0_SRST		0x0200
+#define DCRN_SDR0_SRST_I2ODMA	(0x80000000 >> 15)	/* Reset I2O/DMA */
+
+/* 440SP/440SPe Memory Queue DCR offsets */
+#define DCRN_MQ0_XORBA		0x04
+#define DCRN_MQ0_CF2H		0x06
+#define DCRN_MQ0_CFBHL		0x0f
+#define DCRN_MQ0_BAUH		0x10
+
+/* HB/LL Paths Configuration Register */
+#define MQ0_CFBHL_TPLM		28
+#define MQ0_CFBHL_HBCL		23
+#define MQ0_CFBHL_POLY		15
+
+#endif /* __DCR_REGS_H__ */
diff --git a/arch/powerpc/include/asm/dcr.h b/arch/powerpc/include/asm/dcr.h
new file mode 100644
index 0000000000..64030e3a1f
--- /dev/null
+++ b/arch/powerpc/include/asm/dcr.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * (c) Copyright 2006 Benjamin Herrenschmidt, IBM Corp.
+ *                    <benh@kernel.crashing.org>
+ */
+
+#ifndef _ASM_POWERPC_DCR_H
+#define _ASM_POWERPC_DCR_H
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_PPC_DCR
+
+#ifdef CONFIG_PPC_DCR_NATIVE
+#include <asm/dcr-native.h>
+#endif
+
+#ifdef CONFIG_PPC_DCR_MMIO
+#include <asm/dcr-mmio.h>
+#endif
+
+
+/* Indirection layer for providing both NATIVE and MMIO support. */
+
+#if defined(CONFIG_PPC_DCR_NATIVE) && defined(CONFIG_PPC_DCR_MMIO)
+
+#include <asm/dcr-generic.h>
+
+#define DCR_MAP_OK(host)	dcr_map_ok_generic(host)
+#define dcr_map(dev, dcr_n, dcr_c) dcr_map_generic(dev, dcr_n, dcr_c)
+#define dcr_unmap(host, dcr_c) dcr_unmap_generic(host, dcr_c)
+#define dcr_read(host, dcr_n) dcr_read_generic(host, dcr_n)
+#define dcr_write(host, dcr_n, value) dcr_write_generic(host, dcr_n, value)
+
+#else
+
+#ifdef CONFIG_PPC_DCR_NATIVE
+typedef dcr_host_native_t dcr_host_t;
+#define DCR_MAP_OK(host)	dcr_map_ok_native(host)
+#define dcr_map(dev, dcr_n, dcr_c) dcr_map_native(dev, dcr_n, dcr_c)
+#define dcr_unmap(host, dcr_c) dcr_unmap_native(host, dcr_c)
+#define dcr_read(host, dcr_n) dcr_read_native(host, dcr_n)
+#define dcr_write(host, dcr_n, value) dcr_write_native(host, dcr_n, value)
+#else
+typedef dcr_host_mmio_t dcr_host_t;
+#define DCR_MAP_OK(host)	dcr_map_ok_mmio(host)
+#define dcr_map(dev, dcr_n, dcr_c) dcr_map_mmio(dev, dcr_n, dcr_c)
+#define dcr_unmap(host, dcr_c) dcr_unmap_mmio(host, dcr_c)
+#define dcr_read(host, dcr_n) dcr_read_mmio(host, dcr_n)
+#define dcr_write(host, dcr_n, value) dcr_write_mmio(host, dcr_n, value)
+#endif
+
+#endif /* defined(CONFIG_PPC_DCR_NATIVE) && defined(CONFIG_PPC_DCR_MMIO) */
+
+/*
+ * additional helpers to read the DCR * base from the device-tree
+ */
+struct device_node;
+extern unsigned int dcr_resource_start(const struct device_node *np,
+				       unsigned int index);
+extern unsigned int dcr_resource_len(const struct device_node *np,
+				     unsigned int index);
+#endif /* CONFIG_PPC_DCR */
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_DCR_H */
diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
new file mode 100644
index 0000000000..51c744608f
--- /dev/null
+++ b/arch/powerpc/include/asm/debug.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ */
+#ifndef _ASM_POWERPC_DEBUG_H
+#define _ASM_POWERPC_DEBUG_H
+
+#include <asm/hw_breakpoint.h>
+
+struct pt_regs;
+
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
+
+extern int (*__debugger)(struct pt_regs *regs);
+extern int (*__debugger_ipi)(struct pt_regs *regs);
+extern int (*__debugger_bpt)(struct pt_regs *regs);
+extern int (*__debugger_sstep)(struct pt_regs *regs);
+extern int (*__debugger_iabr_match)(struct pt_regs *regs);
+extern int (*__debugger_break_match)(struct pt_regs *regs);
+extern int (*__debugger_fault_handler)(struct pt_regs *regs);
+
+#define DEBUGGER_BOILERPLATE(__NAME) \
+static inline int __NAME(struct pt_regs *regs) \
+{ \
+	if (unlikely(__ ## __NAME)) \
+		return __ ## __NAME(regs); \
+	return 0; \
+}
+
+DEBUGGER_BOILERPLATE(debugger)
+DEBUGGER_BOILERPLATE(debugger_ipi)
+DEBUGGER_BOILERPLATE(debugger_bpt)
+DEBUGGER_BOILERPLATE(debugger_sstep)
+DEBUGGER_BOILERPLATE(debugger_iabr_match)
+DEBUGGER_BOILERPLATE(debugger_break_match)
+DEBUGGER_BOILERPLATE(debugger_fault_handler)
+
+#else
+static inline int debugger(struct pt_regs *regs) { return 0; }
+static inline int debugger_ipi(struct pt_regs *regs) { return 0; }
+static inline int debugger_bpt(struct pt_regs *regs) { return 0; }
+static inline int debugger_sstep(struct pt_regs *regs) { return 0; }
+static inline int debugger_iabr_match(struct pt_regs *regs) { return 0; }
+static inline int debugger_break_match(struct pt_regs *regs) { return 0; }
+static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
+#endif
+
+void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk);
+void suspend_breakpoints(void);
+void restore_breakpoints(void);
+bool ppc_breakpoint_available(void);
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+extern void do_send_trap(struct pt_regs *regs, unsigned long address,
+			 unsigned long error_code, int brkpt);
+#endif
+
+#endif /* _ASM_POWERPC_DEBUG_H */
diff --git a/arch/powerpc/include/asm/delay.h b/arch/powerpc/include/asm/delay.h
new file mode 100644
index 0000000000..51bb8c1476
--- /dev/null
+++ b/arch/powerpc/include/asm/delay.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_DELAY_H
+#define _ASM_POWERPC_DELAY_H
+#ifdef __KERNEL__
+
+#include <linux/processor.h>
+#include <asm/time.h>
+
+/*
+ * Copyright 1996, Paul Mackerras.
+ * Copyright (C) 2009 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * PPC64 Support added by Dave Engebretsen, Todd Inglett, Mike Corrigan,
+ * Anton Blanchard.
+ */
+
+extern void __delay(unsigned long loops);
+extern void udelay(unsigned long usecs);
+
+/*
+ * On shared processor machines the generic implementation of mdelay can
+ * result in large errors. While each iteration of the loop inside mdelay
+ * is supposed to take 1ms, the hypervisor could sleep our partition for
+ * longer (eg 10ms). With the right timing these errors can add up.
+ *
+ * Since there is no 32bit overflow issue on 64bit kernels, just call
+ * udelay directly.
+ */
+#ifdef CONFIG_PPC64
+#define mdelay(n)	udelay((n) * 1000)
+#endif
+
+/**
+ * spin_event_timeout - spin until a condition gets true or a timeout elapses
+ * @condition: a C expression to evalate
+ * @timeout: timeout, in microseconds
+ * @delay: the number of microseconds to delay between each evaluation of
+ *         @condition
+ *
+ * The process spins until the condition evaluates to true (non-zero) or the
+ * timeout elapses.  The return value of this macro is the value of
+ * @condition when the loop terminates. This allows you to determine the cause
+ * of the loop terminates.  If the return value is zero, then you know a
+ * timeout has occurred.
+ *
+ * This primary purpose of this macro is to poll on a hardware register
+ * until a status bit changes.  The timeout ensures that the loop still
+ * terminates even if the bit never changes.  The delay is for devices that
+ * need a delay in between successive reads.
+ *
+ * gcc will optimize out the if-statement if @delay is a constant.
+ */
+#define spin_event_timeout(condition, timeout, delay)                          \
+({                                                                             \
+	typeof(condition) __ret;                                               \
+	unsigned long __loops = tb_ticks_per_usec * timeout;                   \
+	unsigned long __start = mftb();                                     \
+                                                                               \
+	if (delay) {                                                           \
+		while (!(__ret = (condition)) &&                               \
+				(tb_ticks_since(__start) <= __loops))          \
+			udelay(delay);                                         \
+	} else {                                                               \
+		spin_begin();                                                  \
+		while (!(__ret = (condition)) &&                               \
+				(tb_ticks_since(__start) <= __loops))          \
+			spin_cpu_relax();                                      \
+		spin_end();                                                    \
+	}                                                                      \
+	if (!__ret)                                                            \
+		__ret = (condition);                                           \
+	__ret;		                                                       \
+})
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_DELAY_H */
diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h
new file mode 100644
index 0000000000..47ed639f3b
--- /dev/null
+++ b/arch/powerpc/include/asm/device.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Arch specific extensions to struct device
+ */
+#ifndef _ASM_POWERPC_DEVICE_H
+#define _ASM_POWERPC_DEVICE_H
+
+struct device_node;
+#ifdef CONFIG_PPC64
+struct pci_dn;
+struct iommu_table;
+#endif
+
+/*
+ * Arch extensions to struct device.
+ *
+ * When adding fields, consider macio_add_one_device in
+ * drivers/macintosh/macio_asic.c
+ */
+struct dev_archdata {
+	/*
+	 * These two used to be a union. However, with the hybrid ops we need
+	 * both so here we store both a DMA offset for direct mappings and
+	 * an iommu_table for remapped DMA.
+	 */
+	dma_addr_t		dma_offset;
+
+#ifdef CONFIG_PPC64
+	struct iommu_table	*iommu_table_base;
+#endif
+
+#ifdef CONFIG_PPC64
+	struct pci_dn		*pci_data;
+#endif
+#ifdef CONFIG_EEH
+	struct eeh_dev		*edev;
+#endif
+#ifdef CONFIG_FAIL_IOMMU
+	int fail_iommu;
+#endif
+#ifdef CONFIG_CXL_BASE
+	struct cxl_context	*cxl_ctx;
+#endif
+#ifdef CONFIG_PCI_IOV
+	void *iov_data;
+#endif
+};
+
+struct pdev_archdata {
+	u64 dma_mask;
+	/*
+	 * Pointer to nvdimm_pmu structure, to handle the unregistering
+	 * of pmu device
+	 */
+	void *priv;
+};
+
+#endif /* _ASM_POWERPC_DEVICE_H */
diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h
new file mode 100644
index 0000000000..8d2ebc36d5
--- /dev/null
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __ASM_PPC_DISASSEMBLE_H__
+#define __ASM_PPC_DISASSEMBLE_H__
+
+#include <linux/types.h>
+
+static inline unsigned int get_op(u32 inst)
+{
+	return inst >> 26;
+}
+
+static inline unsigned int get_xop(u32 inst)
+{
+	return (inst >> 1) & 0x3ff;
+}
+
+static inline unsigned int get_sprn(u32 inst)
+{
+	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_dcrn(u32 inst)
+{
+	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_tmrn(u32 inst)
+{
+	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_rt(u32 inst)
+{
+	return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_rs(u32 inst)
+{
+	return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_ra(u32 inst)
+{
+	return (inst >> 16) & 0x1f;
+}
+
+static inline unsigned int get_rb(u32 inst)
+{
+	return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_rc(u32 inst)
+{
+	return inst & 0x1;
+}
+
+static inline unsigned int get_ws(u32 inst)
+{
+	return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_d(u32 inst)
+{
+	return inst & 0xffff;
+}
+
+static inline unsigned int get_oc(u32 inst)
+{
+	return (inst >> 11) & 0x7fff;
+}
+
+static inline unsigned int get_tx_or_sx(u32 inst)
+{
+	return (inst) & 0x1;
+}
+
+#define IS_XFORM(inst)	(get_op(inst)  == 31)
+#define IS_DSFORM(inst)	(get_op(inst) >= 56)
+
+/*
+ * Create a DSISR value from the instruction
+ */
+static inline unsigned make_dsisr(unsigned instr)
+{
+	unsigned dsisr;
+
+
+	/* bits  6:15 --> 22:31 */
+	dsisr = (instr & 0x03ff0000) >> 16;
+
+	if (IS_XFORM(instr)) {
+		/* bits 29:30 --> 15:16 */
+		dsisr |= (instr & 0x00000006) << 14;
+		/* bit     25 -->    17 */
+		dsisr |= (instr & 0x00000040) << 8;
+		/* bits 21:24 --> 18:21 */
+		dsisr |= (instr & 0x00000780) << 3;
+	} else {
+		/* bit      5 -->    17 */
+		dsisr |= (instr & 0x04000000) >> 12;
+		/* bits  1: 4 --> 18:21 */
+		dsisr |= (instr & 0x78000000) >> 17;
+		/* bits 30:31 --> 12:13 */
+		if (IS_DSFORM(instr))
+			dsisr |= (instr & 0x00000003) << 18;
+	}
+
+	return dsisr;
+}
+#endif /* __ASM_PPC_DISASSEMBLE_H__ */
diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h
new file mode 100644
index 0000000000..128304cbee
--- /dev/null
+++ b/arch/powerpc/include/asm/dma-direct.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_POWERPC_DMA_DIRECT_H
+#define ASM_POWERPC_DMA_DIRECT_H 1
+
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	return paddr + dev->archdata.dma_offset;
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	return daddr - dev->archdata.dma_offset;
+}
+#endif /* ASM_POWERPC_DMA_DIRECT_H */
diff --git a/arch/powerpc/include/asm/dma.h b/arch/powerpc/include/asm/dma.h
new file mode 100644
index 0000000000..d97c66d9ae
--- /dev/null
+++ b/arch/powerpc/include/asm/dma.h
@@ -0,0 +1,344 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_DMA_H
+#define _ASM_POWERPC_DMA_H
+#ifdef __KERNEL__
+
+/*
+ * Defines for using and allocating dma channels.
+ * Written by Hennus Bergman, 1992.
+ * High DMA channel support & info by Hannu Savolainen
+ * and John Boyd, Nov. 1992.
+ * Changes for ppc sound by Christoph Nadig
+ */
+
+/*
+ * Note: Adapted for PowerPC by Gary Thomas
+ * Modified by Cort Dougan <cort@cs.nmt.edu>
+ *
+ * None of this really applies for Power Macintoshes.  There is
+ * basically just enough here to get kernel/dma.c to compile.
+ */
+
+#include <asm/io.h>
+#include <linux/spinlock.h>
+
+#ifndef MAX_DMA_CHANNELS
+#define MAX_DMA_CHANNELS	8
+#endif
+
+/* The maximum address that we can perform a DMA transfer to on this platform */
+/* Doesn't really apply... */
+#define MAX_DMA_ADDRESS		(~0UL)
+
+#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER
+#define dma_outb	outb_p
+#else
+#define dma_outb	outb
+#endif
+
+#define dma_inb		inb
+
+/*
+ * NOTES about DMA transfers:
+ *
+ *  controller 1: channels 0-3, byte operations, ports 00-1F
+ *  controller 2: channels 4-7, word operations, ports C0-DF
+ *
+ *  - ALL registers are 8 bits only, regardless of transfer size
+ *  - channel 4 is not used - cascades 1 into 2.
+ *  - channels 0-3 are byte - addresses/counts are for physical bytes
+ *  - channels 5-7 are word - addresses/counts are for physical words
+ *  - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
+ *  - transfer count loaded to registers is 1 less than actual count
+ *  - controller 2 offsets are all even (2x offsets for controller 1)
+ *  - page registers for 5-7 don't use data bit 0, represent 128K pages
+ *  - page registers for 0-3 use bit 0, represent 64K pages
+ *
+ * On CHRP, the W83C553F (and VLSI Tollgate?) support full 32 bit addressing.
+ * Note that addresses loaded into registers must be _physical_ addresses,
+ * not logical addresses (which may differ if paging is active).
+ *
+ *  Address mapping for channels 0-3:
+ *
+ *   A23 ... A16 A15 ... A8  A7 ... A0    (Physical addresses)
+ *    |  ...  |   |  ... |   |  ... |
+ *    |  ...  |   |  ... |   |  ... |
+ *    |  ...  |   |  ... |   |  ... |
+ *   P7  ...  P0  A7 ... A0  A7 ... A0
+ * |    Page    | Addr MSB | Addr LSB |   (DMA registers)
+ *
+ *  Address mapping for channels 5-7:
+ *
+ *   A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0    (Physical addresses)
+ *    |  ...  |   \   \   ... \  \  \  ... \  \
+ *    |  ...  |    \   \   ... \  \  \  ... \  (not used)
+ *    |  ...  |     \   \   ... \  \  \  ... \
+ *   P7  ...  P1 (0) A7 A6  ... A0 A7 A6 ... A0
+ * |      Page      |  Addr MSB   |  Addr LSB  |   (DMA registers)
+ *
+ * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
+ * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
+ * the hardware level, so odd-byte transfers aren't possible).
+ *
+ * Transfer count (_not # bytes_) is limited to 64K, represented as actual
+ * count - 1 : 64K => 0xFFFF, 1 => 0x0000.  Thus, count is always 1 or more,
+ * and up to 128K bytes may be transferred on channels 5-7 in one operation.
+ *
+ */
+
+/* 8237 DMA controllers */
+#define IO_DMA1_BASE	0x00	/* 8 bit slave DMA, channels 0..3 */
+#define IO_DMA2_BASE	0xC0	/* 16 bit master DMA, ch 4(=slave input)..7 */
+
+/* DMA controller registers */
+#define DMA1_CMD_REG		0x08	/* command register (w) */
+#define DMA1_STAT_REG		0x08	/* status register (r) */
+#define DMA1_REQ_REG		0x09	/* request register (w) */
+#define DMA1_MASK_REG		0x0A	/* single-channel mask (w) */
+#define DMA1_MODE_REG		0x0B	/* mode register (w) */
+#define DMA1_CLEAR_FF_REG	0x0C	/* clear pointer flip-flop (w) */
+#define DMA1_TEMP_REG		0x0D	/* Temporary Register (r) */
+#define DMA1_RESET_REG		0x0D	/* Master Clear (w) */
+#define DMA1_CLR_MASK_REG	0x0E	/* Clear Mask */
+#define DMA1_MASK_ALL_REG	0x0F	/* all-channels mask (w) */
+
+#define DMA2_CMD_REG		0xD0	/* command register (w) */
+#define DMA2_STAT_REG		0xD0	/* status register (r) */
+#define DMA2_REQ_REG		0xD2	/* request register (w) */
+#define DMA2_MASK_REG		0xD4	/* single-channel mask (w) */
+#define DMA2_MODE_REG		0xD6	/* mode register (w) */
+#define DMA2_CLEAR_FF_REG	0xD8	/* clear pointer flip-flop (w) */
+#define DMA2_TEMP_REG		0xDA	/* Temporary Register (r) */
+#define DMA2_RESET_REG		0xDA	/* Master Clear (w) */
+#define DMA2_CLR_MASK_REG	0xDC	/* Clear Mask */
+#define DMA2_MASK_ALL_REG	0xDE	/* all-channels mask (w) */
+
+#define DMA_ADDR_0		0x00	/* DMA address registers */
+#define DMA_ADDR_1		0x02
+#define DMA_ADDR_2		0x04
+#define DMA_ADDR_3		0x06
+#define DMA_ADDR_4		0xC0
+#define DMA_ADDR_5		0xC4
+#define DMA_ADDR_6		0xC8
+#define DMA_ADDR_7		0xCC
+
+#define DMA_CNT_0		0x01	/* DMA count registers */
+#define DMA_CNT_1		0x03
+#define DMA_CNT_2		0x05
+#define DMA_CNT_3		0x07
+#define DMA_CNT_4		0xC2
+#define DMA_CNT_5		0xC6
+#define DMA_CNT_6		0xCA
+#define DMA_CNT_7		0xCE
+
+#define DMA_LO_PAGE_0		0x87	/* DMA page registers */
+#define DMA_LO_PAGE_1		0x83
+#define DMA_LO_PAGE_2		0x81
+#define DMA_LO_PAGE_3		0x82
+#define DMA_LO_PAGE_5		0x8B
+#define DMA_LO_PAGE_6		0x89
+#define DMA_LO_PAGE_7		0x8A
+
+#define DMA_HI_PAGE_0		0x487	/* DMA page registers */
+#define DMA_HI_PAGE_1		0x483
+#define DMA_HI_PAGE_2		0x481
+#define DMA_HI_PAGE_3		0x482
+#define DMA_HI_PAGE_5		0x48B
+#define DMA_HI_PAGE_6		0x489
+#define DMA_HI_PAGE_7		0x48A
+
+#define DMA1_EXT_REG		0x40B
+#define DMA2_EXT_REG		0x4D6
+
+#ifndef __powerpc64__
+    /* in arch/powerpc/kernel/setup_32.c -- Cort */
+    extern unsigned int DMA_MODE_WRITE;
+    extern unsigned int DMA_MODE_READ;
+#else
+    #define DMA_MODE_READ	0x44	/* I/O to memory, no autoinit, increment, single mode */
+    #define DMA_MODE_WRITE	0x48	/* memory to I/O, no autoinit, increment, single mode */
+#endif
+
+#define DMA_MODE_CASCADE	0xC0	/* pass thru DREQ->HRQ, DACK<-HLDA only */
+
+#define DMA_AUTOINIT		0x10
+
+extern spinlock_t dma_spin_lock;
+
+static __inline__ unsigned long claim_dma_lock(void)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&dma_spin_lock, flags);
+	return flags;
+}
+
+static __inline__ void release_dma_lock(unsigned long flags)
+{
+	spin_unlock_irqrestore(&dma_spin_lock, flags);
+}
+
+/* enable/disable a specific DMA channel */
+static __inline__ void enable_dma(unsigned int dmanr)
+{
+	unsigned char ucDmaCmd = 0x00;
+
+	if (dmanr != 4) {
+		dma_outb(0, DMA2_MASK_REG);	/* This may not be enabled */
+		dma_outb(ucDmaCmd, DMA2_CMD_REG);	/* Enable group */
+	}
+	if (dmanr <= 3) {
+		dma_outb(dmanr, DMA1_MASK_REG);
+		dma_outb(ucDmaCmd, DMA1_CMD_REG);	/* Enable group */
+	} else {
+		dma_outb(dmanr & 3, DMA2_MASK_REG);
+	}
+}
+
+static __inline__ void disable_dma(unsigned int dmanr)
+{
+	if (dmanr <= 3)
+		dma_outb(dmanr | 4, DMA1_MASK_REG);
+	else
+		dma_outb((dmanr & 3) | 4, DMA2_MASK_REG);
+}
+
+/* Clear the 'DMA Pointer Flip Flop'.
+ * Write 0 for LSB/MSB, 1 for MSB/LSB access.
+ * Use this once to initialize the FF to a known state.
+ * After that, keep track of it. :-)
+ * --- In order to do that, the DMA routines below should ---
+ * --- only be used while interrupts are disabled! ---
+ */
+static __inline__ void clear_dma_ff(unsigned int dmanr)
+{
+	if (dmanr <= 3)
+		dma_outb(0, DMA1_CLEAR_FF_REG);
+	else
+		dma_outb(0, DMA2_CLEAR_FF_REG);
+}
+
+/* set mode (above) for a specific DMA channel */
+static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
+{
+	if (dmanr <= 3)
+		dma_outb(mode | dmanr, DMA1_MODE_REG);
+	else
+		dma_outb(mode | (dmanr & 3), DMA2_MODE_REG);
+}
+
+/* Set only the page register bits of the transfer address.
+ * This is used for successive transfers when we know the contents of
+ * the lower 16 bits of the DMA current address register, but a 64k boundary
+ * may have been crossed.
+ */
+static __inline__ void set_dma_page(unsigned int dmanr, int pagenr)
+{
+	switch (dmanr) {
+	case 0:
+		dma_outb(pagenr, DMA_LO_PAGE_0);
+		dma_outb(pagenr >> 8, DMA_HI_PAGE_0);
+		break;
+	case 1:
+		dma_outb(pagenr, DMA_LO_PAGE_1);
+		dma_outb(pagenr >> 8, DMA_HI_PAGE_1);
+		break;
+	case 2:
+		dma_outb(pagenr, DMA_LO_PAGE_2);
+		dma_outb(pagenr >> 8, DMA_HI_PAGE_2);
+		break;
+	case 3:
+		dma_outb(pagenr, DMA_LO_PAGE_3);
+		dma_outb(pagenr >> 8, DMA_HI_PAGE_3);
+		break;
+	case 5:
+		dma_outb(pagenr & 0xfe, DMA_LO_PAGE_5);
+		dma_outb(pagenr >> 8, DMA_HI_PAGE_5);
+		break;
+	case 6:
+		dma_outb(pagenr & 0xfe, DMA_LO_PAGE_6);
+		dma_outb(pagenr >> 8, DMA_HI_PAGE_6);
+		break;
+	case 7:
+		dma_outb(pagenr & 0xfe, DMA_LO_PAGE_7);
+		dma_outb(pagenr >> 8, DMA_HI_PAGE_7);
+		break;
+	}
+}
+
+/* Set transfer address & page bits for specific DMA channel.
+ * Assumes dma flipflop is clear.
+ */
+static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int phys)
+{
+	if (dmanr <= 3) {
+		dma_outb(phys & 0xff,
+			 ((dmanr & 3) << 1) + IO_DMA1_BASE);
+		dma_outb((phys >> 8) & 0xff,
+			 ((dmanr & 3) << 1) + IO_DMA1_BASE);
+	} else {
+		dma_outb((phys >> 1) & 0xff,
+			 ((dmanr & 3) << 2) + IO_DMA2_BASE);
+		dma_outb((phys >> 9) & 0xff,
+			 ((dmanr & 3) << 2) + IO_DMA2_BASE);
+	}
+	set_dma_page(dmanr, phys >> 16);
+}
+
+
+/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for
+ * a specific DMA channel.
+ * You must ensure the parameters are valid.
+ * NOTE: from a manual: "the number of transfers is one more
+ * than the initial word count"! This is taken into account.
+ * Assumes dma flip-flop is clear.
+ * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
+ */
+static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
+{
+	count--;
+	if (dmanr <= 3) {
+		dma_outb(count & 0xff,
+			 ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE);
+		dma_outb((count >> 8) & 0xff,
+			 ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE);
+	} else {
+		dma_outb((count >> 1) & 0xff,
+			 ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE);
+		dma_outb((count >> 9) & 0xff,
+			 ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE);
+	}
+}
+
+
+/* Get DMA residue count. After a DMA transfer, this
+ * should return zero. Reading this while a DMA transfer is
+ * still in progress will return unpredictable results.
+ * If called before the channel has been used, it may return 1.
+ * Otherwise, it returns the number of _bytes_ left to transfer.
+ *
+ * Assumes DMA flip-flop is clear.
+ */
+static __inline__ int get_dma_residue(unsigned int dmanr)
+{
+	unsigned int io_port = (dmanr <= 3)
+	    ? ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE
+	    : ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE;
+
+	/* using short to get 16-bit wrap around */
+	unsigned short count;
+
+	count = 1 + dma_inb(io_port);
+	count += dma_inb(io_port) << 8;
+
+	return (dmanr <= 3) ? count : (count << 1);
+}
+
+/* These are in kernel/dma.c: */
+
+/* reserve a DMA channel */
+extern int request_dma(unsigned int dmanr, const char *device_id);
+/* release it again */
+extern void free_dma(unsigned int dmanr);
+
+#endif /* __KERNEL__ */
+#endif	/* _ASM_POWERPC_DMA_H */
diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
new file mode 100644
index 0000000000..13bf6dee8e
--- /dev/null
+++ b/arch/powerpc/include/asm/drmem.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * drmem.h: Power specific logical memory block representation
+ *
+ * Copyright 2017 IBM Corporation
+ */
+
+#ifndef _ASM_POWERPC_LMB_H
+#define _ASM_POWERPC_LMB_H
+
+#include <linux/sched.h>
+
+struct drmem_lmb {
+	u64     base_addr;
+	u32     drc_index;
+	u32     aa_index;
+	u32     flags;
+};
+
+struct drmem_lmb_info {
+	struct drmem_lmb        *lmbs;
+	int                     n_lmbs;
+	u64                     lmb_size;
+};
+
+struct device_node;
+struct property;
+
+extern struct drmem_lmb_info *drmem_info;
+
+static inline struct drmem_lmb *drmem_lmb_next(struct drmem_lmb *lmb,
+					       const struct drmem_lmb *start)
+{
+	/*
+	 * DLPAR code paths can take several milliseconds per element
+	 * when interacting with firmware. Ensure that we don't
+	 * unfairly monopolize the CPU.
+	 */
+	if (((++lmb - start) % 16) == 0)
+		cond_resched();
+
+	return lmb;
+}
+
+#define for_each_drmem_lmb_in_range(lmb, start, end)		\
+	for ((lmb) = (start); (lmb) < (end); lmb = drmem_lmb_next(lmb, start))
+
+#define for_each_drmem_lmb(lmb)					\
+	for_each_drmem_lmb_in_range((lmb),			\
+		&drmem_info->lmbs[0],				\
+		&drmem_info->lmbs[drmem_info->n_lmbs])
+
+/*
+ * The of_drconf_cell_v1 struct defines the layout of the LMB data
+ * specified in the ibm,dynamic-memory device tree property.
+ * The property itself is a 32-bit value specifying the number of
+ * LMBs followed by an array of of_drconf_cell_v1 entries, one
+ * per LMB.
+ */
+struct of_drconf_cell_v1 {
+	__be64	base_addr;
+	__be32	drc_index;
+	__be32	reserved;
+	__be32	aa_index;
+	__be32	flags;
+};
+
+/*
+ * Version 2 of the ibm,dynamic-memory property is defined as a
+ * 32-bit value specifying the number of LMB sets followed by an
+ * array of of_drconf_cell_v2 entries, one per LMB set.
+ */
+struct of_drconf_cell_v2 {
+	u32	seq_lmbs;
+	u64	base_addr;
+	u32	drc_index;
+	u32	aa_index;
+	u32	flags;
+} __packed;
+
+#define DRCONF_MEM_ASSIGNED	0x00000008
+#define DRCONF_MEM_AI_INVALID	0x00000040
+#define DRCONF_MEM_RESERVED	0x00000080
+#define DRCONF_MEM_HOTREMOVABLE	0x00000100
+
+static inline u64 drmem_lmb_size(void)
+{
+	return drmem_info->lmb_size;
+}
+
+#define DRMEM_LMB_RESERVED	0x80000000
+
+static inline void drmem_mark_lmb_reserved(struct drmem_lmb *lmb)
+{
+	lmb->flags |= DRMEM_LMB_RESERVED;
+}
+
+static inline void drmem_remove_lmb_reservation(struct drmem_lmb *lmb)
+{
+	lmb->flags &= ~DRMEM_LMB_RESERVED;
+}
+
+static inline bool drmem_lmb_reserved(struct drmem_lmb *lmb)
+{
+	return lmb->flags & DRMEM_LMB_RESERVED;
+}
+
+u64 drmem_lmb_memory_max(void);
+int walk_drmem_lmbs(struct device_node *dn, void *data,
+		    int (*func)(struct drmem_lmb *, const __be32 **, void *));
+int drmem_update_dt(void);
+
+#ifdef CONFIG_PPC_PSERIES
+int __init
+walk_drmem_lmbs_early(unsigned long node, void *data,
+		      int (*func)(struct drmem_lmb *, const __be32 **, void *));
+void drmem_update_lmbs(struct property *prop);
+#endif
+
+static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
+{
+	lmb->aa_index = 0xffffffff;
+}
+
+#endif /* _ASM_POWERPC_LMB_H */
diff --git a/arch/powerpc/include/asm/dt_cpu_ftrs.h b/arch/powerpc/include/asm/dt_cpu_ftrs.h
new file mode 100644
index 0000000000..0c729e2d0e
--- /dev/null
+++ b/arch/powerpc/include/asm/dt_cpu_ftrs.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_POWERPC_DT_CPU_FTRS_H
+#define __ASM_POWERPC_DT_CPU_FTRS_H
+
+/*
+ *  Copyright 2017, IBM Corporation
+ *  cpufeatures is the new way to discover CPU features with /cpus/features
+ *  devicetree. This supersedes PVR based discovery ("cputable"), and older
+ *  device tree feature advertisement.
+ */
+
+#include <linux/types.h>
+#include <uapi/asm/cputable.h>
+
+#ifdef CONFIG_PPC_DT_CPU_FTRS
+bool dt_cpu_ftrs_init(void *fdt);
+void dt_cpu_ftrs_scan(void);
+bool dt_cpu_ftrs_in_use(void);
+#else
+static inline bool dt_cpu_ftrs_init(void *fdt) { return false; }
+static inline void dt_cpu_ftrs_scan(void) { }
+static inline bool dt_cpu_ftrs_in_use(void) { return false; }
+#endif
+
+#endif /* __ASM_POWERPC_DT_CPU_FTRS_H */
diff --git a/arch/powerpc/include/asm/dtl.h b/arch/powerpc/include/asm/dtl.h
new file mode 100644
index 0000000000..d6f43d149f
--- /dev/null
+++ b/arch/powerpc/include/asm/dtl.h
@@ -0,0 +1,43 @@
+#ifndef _ASM_POWERPC_DTL_H
+#define _ASM_POWERPC_DTL_H
+
+#include <asm/lppaca.h>
+#include <linux/spinlock_types.h>
+
+/*
+ * Layout of entries in the hypervisor's dispatch trace log buffer.
+ */
+struct dtl_entry {
+	u8	dispatch_reason;
+	u8	preempt_reason;
+	__be16	processor_id;
+	__be32	enqueue_to_dispatch_time;
+	__be32	ready_to_enqueue_time;
+	__be32	waiting_to_ready_time;
+	__be64	timebase;
+	__be64	fault_addr;
+	__be64	srr0;
+	__be64	srr1;
+};
+
+#define DISPATCH_LOG_BYTES	4096	/* bytes per cpu */
+#define N_DISPATCH_LOG		(DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
+
+/*
+ * Dispatch trace log event enable mask:
+ *   0x1: voluntary virtual processor waits
+ *   0x2: time-slice preempts
+ *   0x4: virtual partition memory page faults
+ */
+#define DTL_LOG_CEDE		0x1
+#define DTL_LOG_PREEMPT		0x2
+#define DTL_LOG_FAULT		0x4
+#define DTL_LOG_ALL		(DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT)
+
+extern struct kmem_cache *dtl_cache;
+extern rwlock_t dtl_access_lock;
+
+extern void register_dtl_buffer(int cpu);
+extern void alloc_dtl_buffers(unsigned long *time_limit);
+
+#endif /* _ASM_POWERPC_DTL_H */
diff --git a/arch/powerpc/include/asm/edac.h b/arch/powerpc/include/asm/edac.h
new file mode 100644
index 0000000000..5571e23d25
--- /dev/null
+++ b/arch/powerpc/include/asm/edac.h
@@ -0,0 +1,40 @@
+/*
+ * PPC EDAC common defs
+ *
+ * Author: Dave Jiang <djiang@mvista.com>
+ *
+ * 2007 (c) MontaVista Software, Inc. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#ifndef ASM_EDAC_H
+#define ASM_EDAC_H
+/*
+ * ECC atomic, DMA, SMP and interrupt safe scrub function.
+ * Implements the per arch edac_atomic_scrub() that EDAC use for software
+ * ECC scrubbing.  It reads memory and then writes back the original
+ * value, allowing the hardware to detect and correct memory errors.
+ */
+static __inline__ void edac_atomic_scrub(void *va, u32 size)
+{
+	unsigned int *virt_addr = va;
+	unsigned int temp;
+	unsigned int i;
+
+	for (i = 0; i < size / sizeof(*virt_addr); i++, virt_addr++) {
+		/* Very carefully read and write to memory atomically
+		 * so we are interrupt, DMA and SMP safe.
+		 */
+		__asm__ __volatile__ ("\n\
+				1:	lwarx	%0,0,%1\n\
+					stwcx.	%0,0,%1\n\
+					bne-	1b\n\
+					isync"
+					: "=&r"(temp)
+					: "r"(virt_addr)
+					: "cr0", "memory");
+	}
+}
+
+#endif
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
new file mode 100644
index 0000000000..514dd056c2
--- /dev/null
+++ b/arch/powerpc/include/asm/eeh.h
@@ -0,0 +1,461 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001  Dave Engebretsen & Todd Inglett IBM Corporation.
+ * Copyright 2001-2012 IBM Corporation.
+ */
+
+#ifndef _POWERPC_EEH_H
+#define _POWERPC_EEH_H
+#ifdef __KERNEL__
+
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/string.h>
+#include <linux/time.h>
+#include <linux/atomic.h>
+
+#include <uapi/asm/eeh.h>
+
+struct pci_dev;
+struct pci_bus;
+struct pci_dn;
+
+#ifdef CONFIG_EEH
+
+/* EEH subsystem flags */
+#define EEH_ENABLED		0x01	/* EEH enabled			     */
+#define EEH_FORCE_DISABLED	0x02	/* EEH disabled			     */
+#define EEH_PROBE_MODE_DEV	0x04	/* From PCI device		     */
+#define EEH_PROBE_MODE_DEVTREE	0x08	/* From device tree		     */
+#define EEH_ENABLE_IO_FOR_LOG	0x20	/* Enable IO for log		     */
+#define EEH_EARLY_DUMP_LOG	0x40	/* Dump log immediately		     */
+
+/*
+ * Delay for PE reset, all in ms
+ *
+ * PCI specification has reset hold time of 100 milliseconds.
+ * We have 250 milliseconds here. The PCI bus settlement time
+ * is specified as 1.5 seconds and we have 1.8 seconds.
+ */
+#define EEH_PE_RST_HOLD_TIME		250
+#define EEH_PE_RST_SETTLE_TIME		1800
+
+/*
+ * The struct is used to trace PE related EEH functionality.
+ * In theory, there will have one instance of the struct to
+ * be created against particular PE. In nature, PEs correlate
+ * to each other. the struct has to reflect that hierarchy in
+ * order to easily pick up those affected PEs when one particular
+ * PE has EEH errors.
+ *
+ * Also, one particular PE might be composed of PCI device, PCI
+ * bus and its subordinate components. The struct also need ship
+ * the information. Further more, one particular PE is only meaingful
+ * in the corresponding PHB. Therefore, the root PEs should be created
+ * against existing PHBs in on-to-one fashion.
+ */
+#define EEH_PE_INVALID	(1 << 0)	/* Invalid   */
+#define EEH_PE_PHB	(1 << 1)	/* PHB PE    */
+#define EEH_PE_DEVICE 	(1 << 2)	/* Device PE */
+#define EEH_PE_BUS	(1 << 3)	/* Bus PE    */
+#define EEH_PE_VF	(1 << 4)	/* VF PE     */
+
+#define EEH_PE_ISOLATED		(1 << 0)	/* Isolated PE		*/
+#define EEH_PE_RECOVERING	(1 << 1)	/* Recovering PE	*/
+#define EEH_PE_CFG_BLOCKED	(1 << 2)	/* Block config access	*/
+#define EEH_PE_RESET		(1 << 3)	/* PE reset in progress */
+
+#define EEH_PE_KEEP		(1 << 8)	/* Keep PE on hotplug	*/
+#define EEH_PE_CFG_RESTRICTED	(1 << 9)	/* Block config on error */
+#define EEH_PE_REMOVED		(1 << 10)	/* Removed permanently	*/
+#define EEH_PE_PRI_BUS		(1 << 11)	/* Cached primary bus   */
+
+struct eeh_pe {
+	int type;			/* PE type: PHB/Bus/Device	*/
+	int state;			/* PE EEH dependent mode	*/
+	int addr;			/* PE configuration address	*/
+	struct pci_controller *phb;	/* Associated PHB		*/
+	struct pci_bus *bus;		/* Top PCI bus for bus PE	*/
+	int check_count;		/* Times of ignored error	*/
+	int freeze_count;		/* Times of froze up		*/
+	time64_t tstamp;		/* Time on first-time freeze	*/
+	int false_positives;		/* Times of reported #ff's	*/
+	atomic_t pass_dev_cnt;		/* Count of passed through devs	*/
+	struct eeh_pe *parent;		/* Parent PE			*/
+	void *data;			/* PE auxillary data		*/
+	struct list_head child_list;	/* List of PEs below this PE	*/
+	struct list_head child;		/* Memb. child_list/eeh_phb_pe	*/
+	struct list_head edevs;		/* List of eeh_dev in this PE	*/
+
+#ifdef CONFIG_STACKTRACE
+	/*
+	 * Saved stack trace. When we find a PE freeze in eeh_dev_check_failure
+	 * the stack trace is saved here so we can print it in the recovery
+	 * thread if it turns out to due to a real problem rather than
+	 * a hot-remove.
+	 *
+	 * A max of 64 entries might be overkill, but it also might not be.
+	 */
+	unsigned long stack_trace[64];
+	int trace_entries;
+#endif /* CONFIG_STACKTRACE */
+};
+
+#define eeh_pe_for_each_dev(pe, edev, tmp) \
+		list_for_each_entry_safe(edev, tmp, &pe->edevs, entry)
+
+#define eeh_for_each_pe(root, pe) \
+	for (pe = root; pe; pe = eeh_pe_next(pe, root))
+
+static inline bool eeh_pe_passed(struct eeh_pe *pe)
+{
+	return pe ? !!atomic_read(&pe->pass_dev_cnt) : false;
+}
+
+/*
+ * The struct is used to trace EEH state for the associated
+ * PCI device node or PCI device. In future, it might
+ * represent PE as well so that the EEH device to form
+ * another tree except the currently existing tree of PCI
+ * buses and PCI devices
+ */
+#define EEH_DEV_BRIDGE		(1 << 0)	/* PCI bridge		*/
+#define EEH_DEV_ROOT_PORT	(1 << 1)	/* PCIe root port	*/
+#define EEH_DEV_DS_PORT		(1 << 2)	/* Downstream port	*/
+#define EEH_DEV_IRQ_DISABLED	(1 << 3)	/* Interrupt disabled	*/
+#define EEH_DEV_DISCONNECTED	(1 << 4)	/* Removing from PE	*/
+
+#define EEH_DEV_NO_HANDLER	(1 << 8)	/* No error handler	*/
+#define EEH_DEV_SYSFS		(1 << 9)	/* Sysfs created	*/
+#define EEH_DEV_REMOVED		(1 << 10)	/* Removed permanently	*/
+
+struct eeh_dev {
+	int mode;			/* EEH mode			*/
+	int bdfn;			/* bdfn of device (for cfg ops) */
+	struct pci_controller *controller;
+	int pe_config_addr;		/* PE config address		*/
+	u32 config_space[16];		/* Saved PCI config space	*/
+	int pcix_cap;			/* Saved PCIx capability	*/
+	int pcie_cap;			/* Saved PCIe capability	*/
+	int aer_cap;			/* Saved AER capability		*/
+	int af_cap;			/* Saved AF capability		*/
+	struct eeh_pe *pe;		/* Associated PE		*/
+	struct list_head entry;		/* Membership in eeh_pe.edevs	*/
+	struct list_head rmv_entry;	/* Membership in rmv_list	*/
+	struct pci_dn *pdn;		/* Associated PCI device node	*/
+	struct pci_dev *pdev;		/* Associated PCI device	*/
+	bool in_error;			/* Error flag for edev		*/
+
+	/* VF specific properties */
+	struct pci_dev *physfn;		/* Associated SRIOV PF		*/
+	int vf_index;			/* Index of this VF 		*/
+};
+
+/* "fmt" must be a simple literal string */
+#define EEH_EDEV_PRINT(level, edev, fmt, ...) \
+	pr_##level("PCI %04x:%02x:%02x.%x#%04x: EEH: " fmt, \
+	(edev)->controller->global_number, PCI_BUSNO((edev)->bdfn), \
+	PCI_SLOT((edev)->bdfn), PCI_FUNC((edev)->bdfn), \
+	((edev)->pe ? (edev)->pe_config_addr : 0xffff), ##__VA_ARGS__)
+#define eeh_edev_dbg(edev, fmt, ...) EEH_EDEV_PRINT(debug, (edev), fmt, ##__VA_ARGS__)
+#define eeh_edev_info(edev, fmt, ...) EEH_EDEV_PRINT(info, (edev), fmt, ##__VA_ARGS__)
+#define eeh_edev_warn(edev, fmt, ...) EEH_EDEV_PRINT(warn, (edev), fmt, ##__VA_ARGS__)
+#define eeh_edev_err(edev, fmt, ...) EEH_EDEV_PRINT(err, (edev), fmt, ##__VA_ARGS__)
+
+static inline struct pci_dn *eeh_dev_to_pdn(struct eeh_dev *edev)
+{
+	return edev ? edev->pdn : NULL;
+}
+
+static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev)
+{
+	return edev ? edev->pdev : NULL;
+}
+
+static inline struct eeh_pe *eeh_dev_to_pe(struct eeh_dev* edev)
+{
+	return edev ? edev->pe : NULL;
+}
+
+/* Return values from eeh_ops::next_error */
+enum {
+	EEH_NEXT_ERR_NONE = 0,
+	EEH_NEXT_ERR_INF,
+	EEH_NEXT_ERR_FROZEN_PE,
+	EEH_NEXT_ERR_FENCED_PHB,
+	EEH_NEXT_ERR_DEAD_PHB,
+	EEH_NEXT_ERR_DEAD_IOC
+};
+
+/*
+ * The struct is used to trace the registered EEH operation
+ * callback functions. Actually, those operation callback
+ * functions are heavily platform dependent. That means the
+ * platform should register its own EEH operation callback
+ * functions before any EEH further operations.
+ */
+#define EEH_OPT_DISABLE		0	/* EEH disable	*/
+#define EEH_OPT_ENABLE		1	/* EEH enable	*/
+#define EEH_OPT_THAW_MMIO	2	/* MMIO enable	*/
+#define EEH_OPT_THAW_DMA	3	/* DMA enable	*/
+#define EEH_OPT_FREEZE_PE	4	/* Freeze PE	*/
+#define EEH_STATE_UNAVAILABLE	(1 << 0)	/* State unavailable	*/
+#define EEH_STATE_NOT_SUPPORT	(1 << 1)	/* EEH not supported	*/
+#define EEH_STATE_RESET_ACTIVE	(1 << 2)	/* Active reset		*/
+#define EEH_STATE_MMIO_ACTIVE	(1 << 3)	/* Active MMIO		*/
+#define EEH_STATE_DMA_ACTIVE	(1 << 4)	/* Active DMA		*/
+#define EEH_STATE_MMIO_ENABLED	(1 << 5)	/* MMIO enabled		*/
+#define EEH_STATE_DMA_ENABLED	(1 << 6)	/* DMA enabled		*/
+#define EEH_RESET_DEACTIVATE	0	/* Deactivate the PE reset	*/
+#define EEH_RESET_HOT		1	/* Hot reset			*/
+#define EEH_RESET_FUNDAMENTAL	3	/* Fundamental reset		*/
+#define EEH_LOG_TEMP		1	/* EEH temporary error log	*/
+#define EEH_LOG_PERM		2	/* EEH permanent error log	*/
+
+struct eeh_ops {
+	char *name;
+	struct eeh_dev *(*probe)(struct pci_dev *pdev);
+	int (*set_option)(struct eeh_pe *pe, int option);
+	int (*get_state)(struct eeh_pe *pe, int *delay);
+	int (*reset)(struct eeh_pe *pe, int option);
+	int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len);
+	int (*configure_bridge)(struct eeh_pe *pe);
+	int (*err_inject)(struct eeh_pe *pe, int type, int func,
+			  unsigned long addr, unsigned long mask);
+	int (*read_config)(struct eeh_dev *edev, int where, int size, u32 *val);
+	int (*write_config)(struct eeh_dev *edev, int where, int size, u32 val);
+	int (*next_error)(struct eeh_pe **pe);
+	int (*restore_config)(struct eeh_dev *edev);
+	int (*notify_resume)(struct eeh_dev *edev);
+};
+
+extern int eeh_subsystem_flags;
+extern u32 eeh_max_freezes;
+extern bool eeh_debugfs_no_recover;
+extern struct eeh_ops *eeh_ops;
+extern raw_spinlock_t confirm_error_lock;
+
+static inline void eeh_add_flag(int flag)
+{
+	eeh_subsystem_flags |= flag;
+}
+
+static inline void eeh_clear_flag(int flag)
+{
+	eeh_subsystem_flags &= ~flag;
+}
+
+static inline bool eeh_has_flag(int flag)
+{
+        return !!(eeh_subsystem_flags & flag);
+}
+
+static inline bool eeh_enabled(void)
+{
+	return eeh_has_flag(EEH_ENABLED) && !eeh_has_flag(EEH_FORCE_DISABLED);
+}
+
+static inline void eeh_serialize_lock(unsigned long *flags)
+{
+	raw_spin_lock_irqsave(&confirm_error_lock, *flags);
+}
+
+static inline void eeh_serialize_unlock(unsigned long flags)
+{
+	raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
+}
+
+static inline bool eeh_state_active(int state)
+{
+	return (state & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
+	== (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+}
+
+typedef void (*eeh_edev_traverse_func)(struct eeh_dev *edev, void *flag);
+typedef void *(*eeh_pe_traverse_func)(struct eeh_pe *pe, void *flag);
+void eeh_set_pe_aux_size(int size);
+int eeh_phb_pe_create(struct pci_controller *phb);
+int eeh_wait_state(struct eeh_pe *pe, int max_wait);
+struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
+struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root);
+struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no);
+int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent);
+int eeh_pe_tree_remove(struct eeh_dev *edev);
+void eeh_pe_update_time_stamp(struct eeh_pe *pe);
+void *eeh_pe_traverse(struct eeh_pe *root,
+		      eeh_pe_traverse_func fn, void *flag);
+void eeh_pe_dev_traverse(struct eeh_pe *root,
+			 eeh_edev_traverse_func fn, void *flag);
+void eeh_pe_restore_bars(struct eeh_pe *pe);
+const char *eeh_pe_loc_get(struct eeh_pe *pe);
+struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
+
+void eeh_show_enabled(void);
+int __init eeh_init(struct eeh_ops *ops);
+int eeh_check_failure(const volatile void __iomem *token);
+int eeh_dev_check_failure(struct eeh_dev *edev);
+void eeh_addr_cache_init(void);
+void eeh_probe_device(struct pci_dev *pdev);
+void eeh_remove_device(struct pci_dev *);
+int eeh_unfreeze_pe(struct eeh_pe *pe);
+int eeh_pe_reset_and_recover(struct eeh_pe *pe);
+int eeh_dev_open(struct pci_dev *pdev);
+void eeh_dev_release(struct pci_dev *pdev);
+struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group);
+int eeh_pe_set_option(struct eeh_pe *pe, int option);
+int eeh_pe_get_state(struct eeh_pe *pe);
+int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed);
+int eeh_pe_configure(struct eeh_pe *pe);
+int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
+		      unsigned long addr, unsigned long mask);
+
+/**
+ * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
+ *
+ * If this macro yields TRUE, the caller relays to eeh_check_failure()
+ * which does further tests out of line.
+ */
+#define EEH_POSSIBLE_ERROR(val, type)	((val) == (type)~0 && eeh_enabled())
+
+/*
+ * Reads from a device which has been isolated by EEH will return
+ * all 1s.  This macro gives an all-1s value of the given size (in
+ * bytes: 1, 2, or 4) for comparing with the result of a read.
+ */
+#define EEH_IO_ERROR_VALUE(size)	(~0U >> ((4 - (size)) * 8))
+
+#else /* !CONFIG_EEH */
+
+static inline bool eeh_enabled(void)
+{
+        return false;
+}
+
+static inline void eeh_show_enabled(void) { }
+
+static inline int eeh_check_failure(const volatile void __iomem *token)
+{
+	return 0;
+}
+
+#define eeh_dev_check_failure(x) (0)
+
+static inline void eeh_addr_cache_init(void) { }
+
+static inline void eeh_probe_device(struct pci_dev *dev) { }
+
+static inline void eeh_remove_device(struct pci_dev *dev) { }
+
+#define EEH_POSSIBLE_ERROR(val, type) (0)
+#define EEH_IO_ERROR_VALUE(size) (-1UL)
+static inline int eeh_phb_pe_create(struct pci_controller *phb) { return 0; }
+#endif /* CONFIG_EEH */
+
+#if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_EEH)
+void pseries_eeh_init_edev_recursive(struct pci_dn *pdn);
+#endif
+
+#ifdef CONFIG_PPC64
+/*
+ * MMIO read/write operations with EEH support.
+ */
+static inline u8 eeh_readb(const volatile void __iomem *addr)
+{
+	u8 val = in_8(addr);
+	if (EEH_POSSIBLE_ERROR(val, u8))
+		eeh_check_failure(addr);
+	return val;
+}
+
+static inline u16 eeh_readw(const volatile void __iomem *addr)
+{
+	u16 val = in_le16(addr);
+	if (EEH_POSSIBLE_ERROR(val, u16))
+		eeh_check_failure(addr);
+	return val;
+}
+
+static inline u32 eeh_readl(const volatile void __iomem *addr)
+{
+	u32 val = in_le32(addr);
+	if (EEH_POSSIBLE_ERROR(val, u32))
+		eeh_check_failure(addr);
+	return val;
+}
+
+static inline u64 eeh_readq(const volatile void __iomem *addr)
+{
+	u64 val = in_le64(addr);
+	if (EEH_POSSIBLE_ERROR(val, u64))
+		eeh_check_failure(addr);
+	return val;
+}
+
+static inline u16 eeh_readw_be(const volatile void __iomem *addr)
+{
+	u16 val = in_be16(addr);
+	if (EEH_POSSIBLE_ERROR(val, u16))
+		eeh_check_failure(addr);
+	return val;
+}
+
+static inline u32 eeh_readl_be(const volatile void __iomem *addr)
+{
+	u32 val = in_be32(addr);
+	if (EEH_POSSIBLE_ERROR(val, u32))
+		eeh_check_failure(addr);
+	return val;
+}
+
+static inline u64 eeh_readq_be(const volatile void __iomem *addr)
+{
+	u64 val = in_be64(addr);
+	if (EEH_POSSIBLE_ERROR(val, u64))
+		eeh_check_failure(addr);
+	return val;
+}
+
+static inline void eeh_memcpy_fromio(void *dest, const
+				     volatile void __iomem *src,
+				     unsigned long n)
+{
+	_memcpy_fromio(dest, src, n);
+
+	/* Look for ffff's here at dest[n].  Assume that at least 4 bytes
+	 * were copied. Check all four bytes.
+	 */
+	if (n >= 4 && EEH_POSSIBLE_ERROR(*((u32 *)(dest + n - 4)), u32))
+		eeh_check_failure(src);
+}
+
+/* in-string eeh macros */
+static inline void eeh_readsb(const volatile void __iomem *addr, void * buf,
+			      int ns)
+{
+	_insb(addr, buf, ns);
+	if (EEH_POSSIBLE_ERROR((*(((u8*)buf)+ns-1)), u8))
+		eeh_check_failure(addr);
+}
+
+static inline void eeh_readsw(const volatile void __iomem *addr, void * buf,
+			      int ns)
+{
+	_insw(addr, buf, ns);
+	if (EEH_POSSIBLE_ERROR((*(((u16*)buf)+ns-1)), u16))
+		eeh_check_failure(addr);
+}
+
+static inline void eeh_readsl(const volatile void __iomem *addr, void * buf,
+			      int nl)
+{
+	_insl(addr, buf, nl);
+	if (EEH_POSSIBLE_ERROR((*(((u32*)buf)+nl-1)), u32))
+		eeh_check_failure(addr);
+}
+
+
+void __init eeh_cache_debugfs_init(void);
+
+#endif /* CONFIG_PPC64 */
+#endif /* __KERNEL__ */
+#endif /* _POWERPC_EEH_H */
diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h
new file mode 100644
index 0000000000..dadde7d52f
--- /dev/null
+++ b/arch/powerpc/include/asm/eeh_event.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *
+ * Copyright (c) 2005 Linas Vepstas <linas@linas.org>
+ */
+
+#ifndef ASM_POWERPC_EEH_EVENT_H
+#define ASM_POWERPC_EEH_EVENT_H
+#ifdef __KERNEL__
+
+/*
+ * structure holding pci controller data that describes a
+ * change in the isolation status of a PCI slot.  A pointer
+ * to this struct is passed as the data pointer in a notify
+ * callback.
+ */
+struct eeh_event {
+	struct list_head	list;	/* to form event queue	*/
+	struct eeh_pe		*pe;	/* EEH PE		*/
+};
+
+int eeh_event_init(void);
+int eeh_send_failure_event(struct eeh_pe *pe);
+int __eeh_send_failure_event(struct eeh_pe *pe);
+void eeh_remove_event(struct eeh_pe *pe, bool force);
+void eeh_handle_normal_event(struct eeh_pe *pe);
+void eeh_handle_special_event(void);
+
+#endif /* __KERNEL__ */
+#endif /* ASM_POWERPC_EEH_EVENT_H */
diff --git a/arch/powerpc/include/asm/ehv_pic.h b/arch/powerpc/include/asm/ehv_pic.h
new file mode 100644
index 0000000000..dc7d48e3ea
--- /dev/null
+++ b/arch/powerpc/include/asm/ehv_pic.h
@@ -0,0 +1,40 @@
+/*
+ * EHV_PIC private definitions and structure.
+ *
+ * Copyright 2008-2010 Freescale Semiconductor, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+#ifndef __EHV_PIC_H__
+#define __EHV_PIC_H__
+
+#include <linux/irq.h>
+
+#define NR_EHV_PIC_INTS 1024
+
+#define EHV_PIC_INFO(name) EHV_PIC_##name
+
+#define EHV_PIC_VECPRI_POLARITY_NEGATIVE 0
+#define EHV_PIC_VECPRI_POLARITY_POSITIVE 1
+#define EHV_PIC_VECPRI_SENSE_EDGE 0
+#define EHV_PIC_VECPRI_SENSE_LEVEL 0x2
+#define EHV_PIC_VECPRI_POLARITY_MASK 0x1
+#define EHV_PIC_VECPRI_SENSE_MASK 0x2
+
+struct ehv_pic {
+	/* The remapper for this EHV_PIC */
+	struct irq_domain	*irqhost;
+
+	/* The "linux" controller struct */
+	struct irq_chip	hc_irq;
+
+	/* core int flag */
+	int coreint_flag;
+};
+
+void ehv_pic_init(void);
+unsigned int ehv_pic_get_irq(void);
+
+#endif /* __EHV_PIC_H__ */
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
new file mode 100644
index 0000000000..79f1c480b5
--- /dev/null
+++ b/arch/powerpc/include/asm/elf.h
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * ELF register definitions..
+ */
+#ifndef _ASM_POWERPC_ELF_H
+#define _ASM_POWERPC_ELF_H
+
+#include <linux/sched.h>	/* for task_struct */
+#include <asm/page.h>
+#include <asm/string.h>
+#include <uapi/asm/elf.h>
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) ((x)->e_machine == ELF_ARCH)
+#define compat_elf_check_arch(x)	((x)->e_machine == EM_PPC)
+
+#define CORE_DUMP_USE_REGSET
+#define ELF_EXEC_PAGESIZE	PAGE_SIZE
+
+/*
+ * This is the base location for PIE (ET_DYN with INTERP) loads. On
+ * 64-bit, this is raised to 4GB to leave the entire 32-bit address
+ * space open for things that want to use the area for 32-bit pointers.
+ */
+#define ELF_ET_DYN_BASE		(is_32bit_task() ? 0x000400000UL : \
+						   0x100000000UL)
+
+#define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0)
+
+/*
+ * Our registers are always unsigned longs, whether we're a 32 bit
+ * process or 64 bit, on either a 64 bit or 32 bit kernel.
+ *
+ * This macro relies on elf_regs[i] having the right type to truncate to,
+ * either u32 or u64.  It defines the body of the elf_core_copy_regs
+ * function, either the native one with elf_gregset_t elf_regs or
+ * the 32-bit one with elf_gregset_t32 elf_regs.
+ */
+#define PPC_ELF_CORE_COPY_REGS(elf_regs, regs) \
+	int i, nregs = min(sizeof(*regs) / sizeof(unsigned long), \
+			   (size_t)ELF_NGREG);			  \
+	for (i = 0; i < nregs; i++) \
+		elf_regs[i] = ((unsigned long *) regs)[i]; \
+	memset(&elf_regs[i], 0, (ELF_NGREG - i) * sizeof(elf_regs[0]))
+
+/* Common routine for both 32-bit and 64-bit native processes */
+static inline void ppc_elf_core_copy_regs(elf_gregset_t elf_regs,
+					  struct pt_regs *regs)
+{
+	PPC_ELF_CORE_COPY_REGS(elf_regs, regs);
+}
+#define ELF_CORE_COPY_REGS(gregs, regs) ppc_elf_core_copy_regs(gregs, regs);
+
+/* ELF_HWCAP yields a mask that user programs can use to figure out what
+   instruction set this cpu supports.  This could be done in userspace,
+   but it's not easy, and we've already done it here.  */
+# define ELF_HWCAP	(cur_cpu_spec->cpu_user_features)
+# define ELF_HWCAP2	(cur_cpu_spec->cpu_user_features2)
+
+/* This yields a string that ld.so will use to load implementation
+   specific libraries for optimization.  This is more specific in
+   intent than poking at uname or /proc/cpuinfo.  */
+
+#define ELF_PLATFORM	(cur_cpu_spec->platform)
+
+/* While ELF_PLATFORM indicates the ISA supported by the platform, it
+ * may not accurately reflect the underlying behavior of the hardware
+ * (as in the case of running in Power5+ compatibility mode on a
+ * Power6 machine).  ELF_BASE_PLATFORM allows ld.so to load libraries
+ * that are tuned for the real hardware.
+ */
+#define ELF_BASE_PLATFORM (powerpc_base_platform)
+
+#ifdef __powerpc64__
+# define ELF_PLAT_INIT(_r, load_addr)	do {	\
+	_r->gpr[2] = load_addr; 		\
+} while (0)
+#endif /* __powerpc64__ */
+
+#ifdef __powerpc64__
+# define SET_PERSONALITY(ex)					\
+do {								\
+	if (((ex).e_flags & 0x3) == 2)				\
+		set_thread_flag(TIF_ELF2ABI);			\
+	else							\
+		clear_thread_flag(TIF_ELF2ABI);			\
+	if ((ex).e_ident[EI_CLASS] == ELFCLASS32)		\
+		set_thread_flag(TIF_32BIT);			\
+	else							\
+		clear_thread_flag(TIF_32BIT);			\
+	if (personality(current->personality) != PER_LINUX32)	\
+		set_personality(PER_LINUX |			\
+			(current->personality & (~PER_MASK)));	\
+} while (0)
+/*
+ * An executable for which elf_read_implies_exec() returns TRUE will
+ * have the READ_IMPLIES_EXEC personality flag set automatically. This
+ * is only required to work around bugs in old 32bit toolchains. Since
+ * the 64bit ABI has never had these issues dont enable the workaround
+ * even if we have an executable stack.
+ */
+# define elf_read_implies_exec(ex, exec_stk) (is_32bit_task() ? \
+		(exec_stk == EXSTACK_DEFAULT) : 0)
+#else 
+# define elf_read_implies_exec(ex, exec_stk) (exec_stk == EXSTACK_DEFAULT)
+#endif /* __powerpc64__ */
+
+extern int dcache_bsize;
+extern int icache_bsize;
+extern int ucache_bsize;
+
+/* vDSO has arch_setup_additional_pages */
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+				       int uses_interp);
+#define VDSO_AUX_ENT(a,b) NEW_AUX_ENT(a,b)
+
+/* 1GB for 64bit, 8MB for 32bit */
+#define STACK_RND_MASK (is_32bit_task() ? \
+	(0x7ff >> (PAGE_SHIFT - 12)) : \
+	(0x3ffff >> (PAGE_SHIFT - 12)))
+
+#ifdef CONFIG_SPU_BASE
+/* Notes used in ET_CORE. Note name is "SPU/<fd>/<filename>". */
+#define NT_SPU		1
+
+#define ARCH_HAVE_EXTRA_ELF_NOTES
+
+#endif /* CONFIG_SPU_BASE */
+
+#ifdef CONFIG_PPC64
+
+#define get_cache_geometry(level) \
+	(ppc64_caches.level.assoc << 16 | ppc64_caches.level.line_size)
+
+#define ARCH_DLINFO_CACHE_GEOMETRY					\
+	NEW_AUX_ENT(AT_L1I_CACHESIZE, ppc64_caches.l1i.size);		\
+	NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, get_cache_geometry(l1i));	\
+	NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1d.size);		\
+	NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1d));	\
+	NEW_AUX_ENT(AT_L2_CACHESIZE, ppc64_caches.l2.size);		\
+	NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, get_cache_geometry(l2));	\
+	NEW_AUX_ENT(AT_L3_CACHESIZE, ppc64_caches.l3.size);		\
+	NEW_AUX_ENT(AT_L3_CACHEGEOMETRY, get_cache_geometry(l3))
+
+#else
+#define ARCH_DLINFO_CACHE_GEOMETRY
+#endif
+
+/*
+ * The requirements here are:
+ * - keep the final alignment of sp (sp & 0xf)
+ * - make sure the 32-bit value at the first 16 byte aligned position of
+ *   AUXV is greater than 16 for glibc compatibility.
+ *   AT_IGNOREPPC is used for that.
+ * - for compatibility with glibc ARCH_DLINFO must always be defined on PPC,
+ *   even if DLINFO_ARCH_ITEMS goes to zero or is undefined.
+ * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes
+ */
+#define COMMON_ARCH_DLINFO						\
+do {									\
+	/* Handle glibc compatibility. */				\
+	NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC);			\
+	NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC);			\
+	/* Cache size items */						\
+	NEW_AUX_ENT(AT_DCACHEBSIZE, dcache_bsize);			\
+	NEW_AUX_ENT(AT_ICACHEBSIZE, icache_bsize);			\
+	NEW_AUX_ENT(AT_UCACHEBSIZE, 0);					\
+	VDSO_AUX_ENT(AT_SYSINFO_EHDR, (unsigned long)current->mm->context.vdso);\
+	ARCH_DLINFO_CACHE_GEOMETRY;					\
+} while (0)
+
+#define ARCH_DLINFO							\
+do {									\
+	COMMON_ARCH_DLINFO;						\
+	NEW_AUX_ENT(AT_MINSIGSTKSZ, get_min_sigframe_size());		\
+} while (0)
+
+#define COMPAT_ARCH_DLINFO						\
+do {									\
+	COMMON_ARCH_DLINFO;						\
+	NEW_AUX_ENT(AT_MINSIGSTKSZ, get_min_sigframe_size_compat());	\
+} while (0)
+
+/* Relocate the kernel image to @final_address */
+void relocate(unsigned long final_address);
+
+struct func_desc {
+	unsigned long addr;
+	unsigned long toc;
+	unsigned long env;
+};
+
+#endif /* _ASM_POWERPC_ELF_H */
diff --git a/arch/powerpc/include/asm/elfnote.h b/arch/powerpc/include/asm/elfnote.h
new file mode 100644
index 0000000000..a201b6e9ae
--- /dev/null
+++ b/arch/powerpc/include/asm/elfnote.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PowerPC ELF notes.
+ *
+ * Copyright 2019, IBM Corporation
+ */
+
+#ifndef __ASM_POWERPC_ELFNOTE_H__
+#define __ASM_POWERPC_ELFNOTE_H__
+
+/*
+ * These note types should live in a SHT_NOTE segment and have
+ * "PowerPC" in the name field.
+ */
+
+/*
+ * The capabilities supported/required by this kernel (bitmap).
+ *
+ * This type uses a bitmap as "desc" field. Each bit is described
+ * in arch/powerpc/kernel/note.S
+ */
+#define PPC_ELFNOTE_CAPABILITIES 1
+
+#endif /* __ASM_POWERPC_ELFNOTE_H__ */
diff --git a/arch/powerpc/include/asm/emergency-restart.h b/arch/powerpc/include/asm/emergency-restart.h
new file mode 100644
index 0000000000..3711bd9d50
--- /dev/null
+++ b/arch/powerpc/include/asm/emergency-restart.h
@@ -0,0 +1 @@
+#include <asm-generic/emergency-restart.h>
diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h
new file mode 100644
index 0000000000..800cb21000
--- /dev/null
+++ b/arch/powerpc/include/asm/emulated_ops.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  Copyright 2007 Sony Corporation
+ */
+
+#ifndef _ASM_POWERPC_EMULATED_OPS_H
+#define _ASM_POWERPC_EMULATED_OPS_H
+
+#include <linux/atomic.h>
+#include <linux/perf_event.h>
+
+
+#ifdef CONFIG_PPC_EMULATED_STATS
+
+struct ppc_emulated_entry {
+	const char *name;
+	atomic_t val;
+};
+
+extern struct ppc_emulated {
+#ifdef CONFIG_ALTIVEC
+	struct ppc_emulated_entry altivec;
+#endif
+	struct ppc_emulated_entry dcba;
+	struct ppc_emulated_entry dcbz;
+	struct ppc_emulated_entry fp_pair;
+	struct ppc_emulated_entry isel;
+	struct ppc_emulated_entry mcrxr;
+	struct ppc_emulated_entry mfpvr;
+	struct ppc_emulated_entry multiple;
+	struct ppc_emulated_entry popcntb;
+	struct ppc_emulated_entry spe;
+	struct ppc_emulated_entry string;
+	struct ppc_emulated_entry sync;
+	struct ppc_emulated_entry unaligned;
+#ifdef CONFIG_MATH_EMULATION
+	struct ppc_emulated_entry math;
+#endif
+#ifdef CONFIG_VSX
+	struct ppc_emulated_entry vsx;
+#endif
+#ifdef CONFIG_PPC64
+	struct ppc_emulated_entry mfdscr;
+	struct ppc_emulated_entry mtdscr;
+	struct ppc_emulated_entry lq_stq;
+	struct ppc_emulated_entry lxvw4x;
+	struct ppc_emulated_entry lxvh8x;
+	struct ppc_emulated_entry lxvd2x;
+	struct ppc_emulated_entry lxvb16x;
+#endif
+} ppc_emulated;
+
+extern u32 ppc_warn_emulated;
+
+extern void ppc_warn_emulated_print(const char *type);
+
+#define __PPC_WARN_EMULATED(type)					 \
+	do {								 \
+		atomic_inc(&ppc_emulated.type.val);			 \
+		if (ppc_warn_emulated)					 \
+			ppc_warn_emulated_print(ppc_emulated.type.name); \
+	} while (0)
+
+#else /* !CONFIG_PPC_EMULATED_STATS */
+
+#define __PPC_WARN_EMULATED(type)	do { } while (0)
+
+#endif /* !CONFIG_PPC_EMULATED_STATS */
+
+#define PPC_WARN_EMULATED(type, regs)					\
+	do {								\
+		perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,		\
+			1, regs, 0);					\
+		__PPC_WARN_EMULATED(type);				\
+	} while (0)
+
+#define PPC_WARN_ALIGNMENT(type, regs)					\
+	do {								\
+		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS,		\
+			1, regs, regs->dar);				\
+		__PPC_WARN_EMULATED(type);				\
+	} while (0)
+
+#endif /* _ASM_POWERPC_EMULATED_OPS_H */
diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h
new file mode 100644
index 0000000000..cdf3c6df51
--- /dev/null
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -0,0 +1,575 @@
+/*
+ * ePAPR hcall interface
+ *
+ * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ *
+ * Author: Timur Tabi <timur@freescale.com>
+ *
+ * This file is provided under a dual BSD/GPL license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* A "hypercall" is an "sc 1" instruction.  This header file provides C
+ * wrapper functions for the ePAPR hypervisor interface.  It is inteded
+ * for use by Linux device drivers and other operating systems.
+ *
+ * The hypercalls are implemented as inline assembly, rather than assembly
+ * language functions in a .S file, for optimization.  It allows
+ * the caller to issue the hypercall instruction directly, improving both
+ * performance and memory footprint.
+ */
+
+#ifndef _EPAPR_HCALLS_H
+#define _EPAPR_HCALLS_H
+
+#include <uapi/asm/epapr_hcalls.h>
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/byteorder.h>
+
+/*
+ * Hypercall register clobber list
+ *
+ * These macros are used to define the list of clobbered registers during a
+ * hypercall.  Technically, registers r0 and r3-r12 are always clobbered,
+ * but the gcc inline assembly syntax does not allow us to specify registers
+ * on the clobber list that are also on the input/output list.  Therefore,
+ * the lists of clobbered registers depends on the number of register
+ * parameters ("+r" and "=r") passed to the hypercall.
+ *
+ * Each assembly block should use one of the HCALL_CLOBBERSx macros.  As a
+ * general rule, 'x' is the number of parameters passed to the assembly
+ * block *except* for r11.
+ *
+ * If you're not sure, just use the smallest value of 'x' that does not
+ * generate a compilation error.  Because these are static inline functions,
+ * the compiler will only check the clobber list for a function if you
+ * compile code that calls that function.
+ *
+ * r3 and r11 are not included in any clobbers list because they are always
+ * listed as output registers.
+ *
+ * XER, CTR, and LR are currently listed as clobbers because it's uncertain
+ * whether they will be clobbered.
+ *
+ * Note that r11 can be used as an output parameter.
+ *
+ * The "memory" clobber is only necessary for hcalls where the Hypervisor
+ * will read or write guest memory. However, we add it to all hcalls because
+ * the impact is minimal, and we want to ensure that it's present for the
+ * hcalls that need it.
+*/
+
+/* List of common clobbered registers.  Do not use this macro. */
+#define EV_HCALL_CLOBBERS "r0", "r12", "xer", "ctr", "lr", "cc", "memory"
+
+#define EV_HCALL_CLOBBERS8 EV_HCALL_CLOBBERS
+#define EV_HCALL_CLOBBERS7 EV_HCALL_CLOBBERS8, "r10"
+#define EV_HCALL_CLOBBERS6 EV_HCALL_CLOBBERS7, "r9"
+#define EV_HCALL_CLOBBERS5 EV_HCALL_CLOBBERS6, "r8"
+#define EV_HCALL_CLOBBERS4 EV_HCALL_CLOBBERS5, "r7"
+#define EV_HCALL_CLOBBERS3 EV_HCALL_CLOBBERS4, "r6"
+#define EV_HCALL_CLOBBERS2 EV_HCALL_CLOBBERS3, "r5"
+#define EV_HCALL_CLOBBERS1 EV_HCALL_CLOBBERS2, "r4"
+
+extern bool epapr_paravirt_enabled;
+extern u32 epapr_hypercall_start[];
+
+#ifdef CONFIG_EPAPR_PARAVIRT
+int __init epapr_paravirt_early_init(void);
+#else
+static inline int epapr_paravirt_early_init(void) { return 0; }
+#endif
+
+/*
+ * We use "uintptr_t" to define a register because it's guaranteed to be a
+ * 32-bit integer on a 32-bit platform, and a 64-bit integer on a 64-bit
+ * platform.
+ *
+ * All registers are either input/output or output only.  Registers that are
+ * initialized before making the hypercall are input/output.  All
+ * input/output registers are represented with "+r".  Output-only registers
+ * are represented with "=r".  Do not specify any unused registers.  The
+ * clobber list will tell the compiler that the hypercall modifies those
+ * registers, which is good enough.
+ */
+
+/**
+ * ev_int_set_config - configure the specified interrupt
+ * @interrupt: the interrupt number
+ * @config: configuration for this interrupt
+ * @priority: interrupt priority
+ * @destination: destination CPU number
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_int_set_config(unsigned int interrupt,
+	uint32_t config, unsigned int priority, uint32_t destination)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+	register uintptr_t r5 __asm__("r5");
+	register uintptr_t r6 __asm__("r6");
+
+	r11 = EV_HCALL_TOKEN(EV_INT_SET_CONFIG);
+	r3  = interrupt;
+	r4  = config;
+	r5  = priority;
+	r6  = destination;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6)
+		: : EV_HCALL_CLOBBERS4
+	);
+
+	return r3;
+}
+
+/**
+ * ev_int_get_config - return the config of the specified interrupt
+ * @interrupt: the interrupt number
+ * @config: returned configuration for this interrupt
+ * @priority: returned interrupt priority
+ * @destination: returned destination CPU number
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_int_get_config(unsigned int interrupt,
+	uint32_t *config, unsigned int *priority, uint32_t *destination)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+	register uintptr_t r5 __asm__("r5");
+	register uintptr_t r6 __asm__("r6");
+
+	r11 = EV_HCALL_TOKEN(EV_INT_GET_CONFIG);
+	r3 = interrupt;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5), "=r" (r6)
+		: : EV_HCALL_CLOBBERS4
+	);
+
+	*config = r4;
+	*priority = r5;
+	*destination = r6;
+
+	return r3;
+}
+
+/**
+ * ev_int_set_mask - sets the mask for the specified interrupt source
+ * @interrupt: the interrupt number
+ * @mask: 0=enable interrupts, 1=disable interrupts
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_int_set_mask(unsigned int interrupt,
+	unsigned int mask)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+
+	r11 = EV_HCALL_TOKEN(EV_INT_SET_MASK);
+	r3 = interrupt;
+	r4 = mask;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "+r" (r4)
+		: : EV_HCALL_CLOBBERS2
+	);
+
+	return r3;
+}
+
+/**
+ * ev_int_get_mask - returns the mask for the specified interrupt source
+ * @interrupt: the interrupt number
+ * @mask: returned mask for this interrupt (0=enabled, 1=disabled)
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_int_get_mask(unsigned int interrupt,
+	unsigned int *mask)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+
+	r11 = EV_HCALL_TOKEN(EV_INT_GET_MASK);
+	r3 = interrupt;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "=r" (r4)
+		: : EV_HCALL_CLOBBERS2
+	);
+
+	*mask = r4;
+
+	return r3;
+}
+
+/**
+ * ev_int_eoi - signal the end of interrupt processing
+ * @interrupt: the interrupt number
+ *
+ * This function signals the end of processing for the specified
+ * interrupt, which must be the interrupt currently in service. By
+ * definition, this is also the highest-priority interrupt.
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_int_eoi(unsigned int interrupt)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+
+	r11 = EV_HCALL_TOKEN(EV_INT_EOI);
+	r3 = interrupt;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3)
+		: : EV_HCALL_CLOBBERS1
+	);
+
+	return r3;
+}
+
+/**
+ * ev_byte_channel_send - send characters to a byte stream
+ * @handle: byte stream handle
+ * @count: (input) num of chars to send, (output) num chars sent
+ * @buffer: pointer to a 16-byte buffer
+ *
+ * @buffer must be at least 16 bytes long, because all 16 bytes will be
+ * read from memory into registers, even if count < 16.
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_byte_channel_send(unsigned int handle,
+	unsigned int *count, const char buffer[EV_BYTE_CHANNEL_MAX_BYTES])
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+	register uintptr_t r5 __asm__("r5");
+	register uintptr_t r6 __asm__("r6");
+	register uintptr_t r7 __asm__("r7");
+	register uintptr_t r8 __asm__("r8");
+	const uint32_t *p = (const uint32_t *) buffer;
+
+	r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_SEND);
+	r3 = handle;
+	r4 = *count;
+	r5 = be32_to_cpu(p[0]);
+	r6 = be32_to_cpu(p[1]);
+	r7 = be32_to_cpu(p[2]);
+	r8 = be32_to_cpu(p[3]);
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3),
+		  "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8)
+		: : EV_HCALL_CLOBBERS6
+	);
+
+	*count = r4;
+
+	return r3;
+}
+
+/**
+ * ev_byte_channel_receive - fetch characters from a byte channel
+ * @handle: byte channel handle
+ * @count: (input) max num of chars to receive, (output) num chars received
+ * @buffer: pointer to a 16-byte buffer
+ *
+ * The size of @buffer must be at least 16 bytes, even if you request fewer
+ * than 16 characters, because we always write 16 bytes to @buffer.  This is
+ * for performance reasons.
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_byte_channel_receive(unsigned int handle,
+	unsigned int *count, char buffer[EV_BYTE_CHANNEL_MAX_BYTES])
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+	register uintptr_t r5 __asm__("r5");
+	register uintptr_t r6 __asm__("r6");
+	register uintptr_t r7 __asm__("r7");
+	register uintptr_t r8 __asm__("r8");
+	uint32_t *p = (uint32_t *) buffer;
+
+	r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_RECEIVE);
+	r3 = handle;
+	r4 = *count;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "+r" (r4),
+		  "=r" (r5), "=r" (r6), "=r" (r7), "=r" (r8)
+		: : EV_HCALL_CLOBBERS6
+	);
+
+	*count = r4;
+	p[0] = cpu_to_be32(r5);
+	p[1] = cpu_to_be32(r6);
+	p[2] = cpu_to_be32(r7);
+	p[3] = cpu_to_be32(r8);
+
+	return r3;
+}
+
+/**
+ * ev_byte_channel_poll - returns the status of the byte channel buffers
+ * @handle: byte channel handle
+ * @rx_count: returned count of bytes in receive queue
+ * @tx_count: returned count of free space in transmit queue
+ *
+ * This function reports the amount of data in the receive queue (i.e. the
+ * number of bytes you can read), and the amount of free space in the transmit
+ * queue (i.e. the number of bytes you can write).
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_byte_channel_poll(unsigned int handle,
+	unsigned int *rx_count,	unsigned int *tx_count)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+	register uintptr_t r5 __asm__("r5");
+
+	r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_POLL);
+	r3 = handle;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5)
+		: : EV_HCALL_CLOBBERS3
+	);
+
+	*rx_count = r4;
+	*tx_count = r5;
+
+	return r3;
+}
+
+/**
+ * ev_int_iack - acknowledge an interrupt
+ * @handle: handle to the target interrupt controller
+ * @vector: returned interrupt vector
+ *
+ * If handle is zero, the function returns the next interrupt source
+ * number to be handled irrespective of the hierarchy or cascading
+ * of interrupt controllers. If non-zero, specifies a handle to the
+ * interrupt controller that is the target of the acknowledge.
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_int_iack(unsigned int handle,
+	unsigned int *vector)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+
+	r11 = EV_HCALL_TOKEN(EV_INT_IACK);
+	r3 = handle;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "=r" (r4)
+		: : EV_HCALL_CLOBBERS2
+	);
+
+	*vector = r4;
+
+	return r3;
+}
+
+/**
+ * ev_doorbell_send - send a doorbell to another partition
+ * @handle: doorbell send handle
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_doorbell_send(unsigned int handle)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+
+	r11 = EV_HCALL_TOKEN(EV_DOORBELL_SEND);
+	r3 = handle;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3)
+		: : EV_HCALL_CLOBBERS1
+	);
+
+	return r3;
+}
+
+/**
+ * ev_idle -- wait for next interrupt on this core
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_idle(void)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+
+	r11 = EV_HCALL_TOKEN(EV_IDLE);
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "=r" (r3)
+		: : EV_HCALL_CLOBBERS1
+	);
+
+	return r3;
+}
+
+#ifdef CONFIG_EPAPR_PARAVIRT
+static inline unsigned long epapr_hypercall(unsigned long *in,
+			    unsigned long *out,
+			    unsigned long nr)
+{
+	register unsigned long r0 asm("r0");
+	register unsigned long r3 asm("r3") = in[0];
+	register unsigned long r4 asm("r4") = in[1];
+	register unsigned long r5 asm("r5") = in[2];
+	register unsigned long r6 asm("r6") = in[3];
+	register unsigned long r7 asm("r7") = in[4];
+	register unsigned long r8 asm("r8") = in[5];
+	register unsigned long r9 asm("r9") = in[6];
+	register unsigned long r10 asm("r10") = in[7];
+	register unsigned long r11 asm("r11") = nr;
+	register unsigned long r12 asm("r12");
+
+	asm volatile("bl	epapr_hypercall_start"
+		     : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
+		       "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11),
+		       "=r"(r12)
+		     : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8),
+		       "r"(r9), "r"(r10), "r"(r11)
+		     : "memory", "cc", "xer", "ctr", "lr");
+
+	out[0] = r4;
+	out[1] = r5;
+	out[2] = r6;
+	out[3] = r7;
+	out[4] = r8;
+	out[5] = r9;
+	out[6] = r10;
+	out[7] = r11;
+
+	return r3;
+}
+#else
+static unsigned long epapr_hypercall(unsigned long *in,
+				   unsigned long *out,
+				   unsigned long nr)
+{
+	return EV_UNIMPLEMENTED;
+}
+#endif
+
+static inline long epapr_hypercall0_1(unsigned int nr, unsigned long *r2)
+{
+	unsigned long in[8] = {0};
+	unsigned long out[8];
+	unsigned long r;
+
+	r = epapr_hypercall(in, out, nr);
+	*r2 = out[0];
+
+	return r;
+}
+
+static inline long epapr_hypercall0(unsigned int nr)
+{
+	unsigned long in[8] = {0};
+	unsigned long out[8];
+
+	return epapr_hypercall(in, out, nr);
+}
+
+static inline long epapr_hypercall1(unsigned int nr, unsigned long p1)
+{
+	unsigned long in[8] = {0};
+	unsigned long out[8];
+
+	in[0] = p1;
+	return epapr_hypercall(in, out, nr);
+}
+
+static inline long epapr_hypercall2(unsigned int nr, unsigned long p1,
+				    unsigned long p2)
+{
+	unsigned long in[8] = {0};
+	unsigned long out[8];
+
+	in[0] = p1;
+	in[1] = p2;
+	return epapr_hypercall(in, out, nr);
+}
+
+static inline long epapr_hypercall3(unsigned int nr, unsigned long p1,
+				    unsigned long p2, unsigned long p3)
+{
+	unsigned long in[8] = {0};
+	unsigned long out[8];
+
+	in[0] = p1;
+	in[1] = p2;
+	in[2] = p3;
+	return epapr_hypercall(in, out, nr);
+}
+
+static inline long epapr_hypercall4(unsigned int nr, unsigned long p1,
+				    unsigned long p2, unsigned long p3,
+				    unsigned long p4)
+{
+	unsigned long in[8] = {0};
+	unsigned long out[8];
+
+	in[0] = p1;
+	in[1] = p2;
+	in[2] = p3;
+	in[3] = p4;
+	return epapr_hypercall(in, out, nr);
+}
+#endif /* !__ASSEMBLY__ */
+#endif /* _EPAPR_HCALLS_H */
diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h
new file mode 100644
index 0000000000..b1ef1e92c3
--- /dev/null
+++ b/arch/powerpc/include/asm/exception-64e.h
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Definitions for use by exception code on Book3-E
+ *
+ *  Copyright (C) 2008 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
+ */
+#ifndef _ASM_POWERPC_EXCEPTION_64E_H
+#define _ASM_POWERPC_EXCEPTION_64E_H
+
+/*
+ * SPRGs usage an other considerations...
+ *
+ * Since TLB miss and other standard exceptions can be interrupted by
+ * critical exceptions which can themselves be interrupted by machine
+ * checks, and since the two later can themselves cause a TLB miss when
+ * hitting the linear mapping for the kernel stacks, we need to be a bit
+ * creative on how we use SPRGs.
+ *
+ * The base idea is that we have one SRPG reserved for critical and one
+ * for machine check interrupts. Those are used to save a GPR that can
+ * then be used to get the PACA, and store as much context as we need
+ * to save in there. That includes saving the SPRGs used by the TLB miss
+ * handler for linear mapping misses and the associated SRR0/1 due to
+ * the above re-entrancy issue.
+ *
+ * So here's the current usage pattern. It's done regardless of which
+ * SPRGs are user-readable though, thus we might have to change some of
+ * this later. In order to do that more easily, we use special constants
+ * for naming them
+ *
+ * WARNING: Some of these SPRGs are user readable. We need to do something
+ * about it as some point by making sure they can't be used to leak kernel
+ * critical data
+ */
+
+#define PACA_EXGDBELL PACA_EXGEN
+
+/* We are out of SPRGs so we save some things in the PACA. The normal
+ * exception frame is smaller than the CRIT or MC one though
+ */
+#define EX_R1		(0 * 8)
+#define EX_CR		(1 * 8)
+#define EX_R10		(2 * 8)
+#define EX_R11		(3 * 8)
+#define EX_R14		(4 * 8)
+#define EX_R15		(5 * 8)
+
+/*
+ * The TLB miss exception uses different slots.
+ *
+ * The bolted variant uses only the first six fields,
+ * which in combination with pgd and kernel_pgd fits in
+ * one 64-byte cache line.
+ */
+
+#define EX_TLB_R10	( 0 * 8)
+#define EX_TLB_R11	( 1 * 8)
+#define EX_TLB_R14	( 2 * 8)
+#define EX_TLB_R15	( 3 * 8)
+#define EX_TLB_R16	( 4 * 8)
+#define EX_TLB_CR	( 5 * 8)
+#define EX_TLB_R12	( 6 * 8)
+#define EX_TLB_R13	( 7 * 8)
+#define EX_TLB_DEAR	( 8 * 8) /* Level 0 and 2 only */
+#define EX_TLB_ESR	( 9 * 8) /* Level 0 and 2 only */
+#define EX_TLB_SRR0	(10 * 8)
+#define EX_TLB_SRR1	(11 * 8)
+#define EX_TLB_R7	(12 * 8)
+#define EX_TLB_SIZE	(13 * 8)
+
+#define	START_EXCEPTION(label)						\
+	.globl exc_##label##_book3e;					\
+exc_##label##_book3e:
+
+/* TLB miss exception prolog
+ *
+ * This prolog handles re-entrancy (up to 3 levels supported in the PACA
+ * though we currently don't test for overflow). It provides you with a
+ * re-entrancy safe working space of r10...r16 and CR with r12 being used
+ * as the exception area pointer in the PACA for that level of re-entrancy
+ * and r13 containing the PACA pointer.
+ *
+ * SRR0 and SRR1 are saved, but DEAR and ESR are not, since they don't apply
+ * as-is for instruction exceptions. It's up to the actual exception code
+ * to save them as well if required.
+ */
+#define TLB_MISS_PROLOG							    \
+	mtspr	SPRN_SPRG_TLB_SCRATCH,r12;				    \
+	mfspr	r12,SPRN_SPRG_TLB_EXFRAME;				    \
+	std	r10,EX_TLB_R10(r12);					    \
+	mfcr	r10;							    \
+	std	r11,EX_TLB_R11(r12);					    \
+	mfspr	r11,SPRN_SPRG_TLB_SCRATCH;				    \
+	std	r13,EX_TLB_R13(r12);					    \
+	mfspr	r13,SPRN_SPRG_PACA;					    \
+	std	r14,EX_TLB_R14(r12);					    \
+	addi	r14,r12,EX_TLB_SIZE;					    \
+	std	r15,EX_TLB_R15(r12);					    \
+	mfspr	r15,SPRN_SRR1;						    \
+	std	r16,EX_TLB_R16(r12);					    \
+	mfspr	r16,SPRN_SRR0;						    \
+	std	r10,EX_TLB_CR(r12);					    \
+	std	r11,EX_TLB_R12(r12);					    \
+	mtspr	SPRN_SPRG_TLB_EXFRAME,r14;				    \
+	std	r15,EX_TLB_SRR1(r12);					    \
+	std	r16,EX_TLB_SRR0(r12);
+
+/* And these are the matching epilogs that restores things
+ *
+ * There are 3 epilogs:
+ *
+ * - SUCCESS       : Unwinds one level
+ * - ERROR         : restore from level 0 and reset
+ * - ERROR_SPECIAL : restore from current level and reset
+ *
+ * Normal errors use ERROR, that is, they restore the initial fault context
+ * and trigger a fault. However, there is a special case for linear mapping
+ * errors. Those should basically never happen, but if they do happen, we
+ * want the error to point out the context that did that linear mapping
+ * fault, not the initial level 0 (basically, we got a bogus PGF or something
+ * like that). For userland errors on the linear mapping, there is no
+ * difference since those are always level 0 anyway
+ */
+
+#define TLB_MISS_RESTORE(freg)						    \
+	ld	r14,EX_TLB_CR(r12);					    \
+	ld	r10,EX_TLB_R10(r12);					    \
+	ld	r15,EX_TLB_SRR0(r12);					    \
+	ld	r16,EX_TLB_SRR1(r12);					    \
+	mtspr	SPRN_SPRG_TLB_EXFRAME,freg;				    \
+	ld	r11,EX_TLB_R11(r12);					    \
+	mtcr	r14;							    \
+	ld	r13,EX_TLB_R13(r12);					    \
+	ld	r14,EX_TLB_R14(r12);					    \
+	mtspr	SPRN_SRR0,r15;						    \
+	ld	r15,EX_TLB_R15(r12);					    \
+	mtspr	SPRN_SRR1,r16;						    \
+	ld	r16,EX_TLB_R16(r12);					    \
+	ld	r12,EX_TLB_R12(r12);					    \
+
+#define TLB_MISS_EPILOG_SUCCESS						    \
+	TLB_MISS_RESTORE(r12)
+
+#define TLB_MISS_EPILOG_ERROR						    \
+	addi	r12,r13,PACA_EXTLB;					    \
+	TLB_MISS_RESTORE(r12)
+
+#define TLB_MISS_EPILOG_ERROR_SPECIAL					    \
+	addi	r11,r13,PACA_EXTLB;					    \
+	TLB_MISS_RESTORE(r11)
+
+#ifndef __ASSEMBLY__
+extern unsigned int interrupt_base_book3e;
+#endif
+
+#define SET_IVOR(vector_number, vector_offset)	\
+	LOAD_REG_ADDR(r3,interrupt_base_book3e);\
+	ori	r3,r3,vector_offset@l;		\
+	mtspr	SPRN_IVOR##vector_number,r3;
+/*
+ * powerpc relies on return from interrupt/syscall being context synchronising
+ * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional
+ * synchronisation instructions.
+ */
+#define RFI_TO_KERNEL							\
+	rfi
+
+#define RFI_TO_USER							\
+	rfi
+
+#endif /* _ASM_POWERPC_EXCEPTION_64E_H */
+
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
new file mode 100644
index 0000000000..bb6f78fcf9
--- /dev/null
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_EXCEPTION_H
+#define _ASM_POWERPC_EXCEPTION_H
+/*
+ * Extracted from head_64.S
+ *
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *  Adapted for Power Macintosh by Paul Mackerras.
+ *  Low-level exception handlers and MMU support
+ *  rewritten by Paul Mackerras.
+ *    Copyright (C) 1996 Paul Mackerras.
+ *
+ *  Adapted for 64bit PowerPC by Dave Engebretsen, Peter Bergner, and
+ *    Mike Corrigan {engebret|bergner|mikejc}@us.ibm.com
+ *
+ *  This file contains the low-level support and setup for the
+ *  PowerPC-64 platform, including trap and interrupt dispatch.
+ */
+/*
+ * The following macros define the code that appears as
+ * the prologue to each of the exception handlers.  They
+ * are split into two parts to allow a single kernel binary
+ * to be used for pSeries and iSeries.
+ *
+ * We make as much of the exception code common between native
+ * exception handlers (including pSeries LPAR) and iSeries LPAR
+ * implementations as possible.
+ */
+#include <asm/feature-fixups.h>
+
+/* PACA save area size in u64 units (exgen, exmc, etc) */
+#define EX_SIZE		10
+
+/* PACA save area offsets */
+#define EX_R9		0
+#define EX_R10		8
+#define EX_R11		16
+#define EX_R12		24
+#define EX_R13		32
+#define EX_DAR		40
+#define EX_DSISR	48
+#define EX_CCR		52
+#define EX_CFAR		56
+#define EX_PPR		64
+#define EX_CTR		72
+
+/*
+ * maximum recursive depth of MCE exceptions
+ */
+#define MAX_MCE_DEPTH	4
+
+#ifdef __ASSEMBLY__
+
+#define STF_ENTRY_BARRIER_SLOT						\
+	STF_ENTRY_BARRIER_FIXUP_SECTION;				\
+	nop;								\
+	nop;								\
+	nop
+
+#define STF_EXIT_BARRIER_SLOT						\
+	STF_EXIT_BARRIER_FIXUP_SECTION;					\
+	nop;								\
+	nop;								\
+	nop;								\
+	nop;								\
+	nop;								\
+	nop
+
+#define ENTRY_FLUSH_SLOT						\
+	ENTRY_FLUSH_FIXUP_SECTION;					\
+	nop;								\
+	nop;								\
+	nop;
+
+#define SCV_ENTRY_FLUSH_SLOT						\
+	SCV_ENTRY_FLUSH_FIXUP_SECTION;					\
+	nop;								\
+	nop;								\
+	nop;
+
+/*
+ * r10 must be free to use, r13 must be paca
+ */
+#define INTERRUPT_TO_KERNEL						\
+	STF_ENTRY_BARRIER_SLOT;						\
+	ENTRY_FLUSH_SLOT
+
+/*
+ * r10, ctr must be free to use, r13 must be paca
+ */
+#define SCV_INTERRUPT_TO_KERNEL						\
+	STF_ENTRY_BARRIER_SLOT;						\
+	SCV_ENTRY_FLUSH_SLOT
+
+/*
+ * Macros for annotating the expected destination of (h)rfid
+ *
+ * The nop instructions allow us to insert one or more instructions to flush the
+ * L1-D cache when returning to userspace or a guest.
+ *
+ * powerpc relies on return from interrupt/syscall being context synchronising
+ * (which hrfid, rfid, and rfscv are) to support ARCH_HAS_MEMBARRIER_SYNC_CORE
+ * without additional synchronisation instructions.
+ *
+ * soft-masked interrupt replay does not include a context-synchronising rfid,
+ * but those always return to kernel, the sync is only required when returning
+ * to user.
+ */
+#define RFI_FLUSH_SLOT							\
+	RFI_FLUSH_FIXUP_SECTION;					\
+	nop;								\
+	nop;								\
+	nop
+
+#define RFI_TO_KERNEL							\
+	rfid
+
+#define RFI_TO_USER							\
+	STF_EXIT_BARRIER_SLOT;						\
+	RFI_FLUSH_SLOT;							\
+	rfid;								\
+	b	rfi_flush_fallback
+
+#define RFI_TO_USER_OR_KERNEL						\
+	STF_EXIT_BARRIER_SLOT;						\
+	RFI_FLUSH_SLOT;							\
+	rfid;								\
+	b	rfi_flush_fallback
+
+#define RFI_TO_GUEST							\
+	STF_EXIT_BARRIER_SLOT;						\
+	RFI_FLUSH_SLOT;							\
+	rfid;								\
+	b	rfi_flush_fallback
+
+#define HRFI_TO_KERNEL							\
+	hrfid
+
+#define HRFI_TO_USER							\
+	STF_EXIT_BARRIER_SLOT;						\
+	RFI_FLUSH_SLOT;							\
+	hrfid;								\
+	b	hrfi_flush_fallback
+
+#define HRFI_TO_USER_OR_KERNEL						\
+	STF_EXIT_BARRIER_SLOT;						\
+	RFI_FLUSH_SLOT;							\
+	hrfid;								\
+	b	hrfi_flush_fallback
+
+#define HRFI_TO_GUEST							\
+	STF_EXIT_BARRIER_SLOT;						\
+	RFI_FLUSH_SLOT;							\
+	hrfid;								\
+	b	hrfi_flush_fallback
+
+#define HRFI_TO_UNKNOWN							\
+	STF_EXIT_BARRIER_SLOT;						\
+	RFI_FLUSH_SLOT;							\
+	hrfid;								\
+	b	hrfi_flush_fallback
+
+#define RFSCV_TO_USER							\
+	STF_EXIT_BARRIER_SLOT;						\
+	RFI_FLUSH_SLOT;							\
+	RFSCV;								\
+	b	rfscv_flush_fallback
+
+#else /* __ASSEMBLY__ */
+/* Prototype for function defined in exceptions-64s.S */
+void do_uaccess_flush(void);
+#endif /* __ASSEMBLY__ */
+
+#endif	/* _ASM_POWERPC_EXCEPTION_H */
diff --git a/arch/powerpc/include/asm/exec.h b/arch/powerpc/include/asm/exec.h
new file mode 100644
index 0000000000..92cac48512
--- /dev/null
+++ b/arch/powerpc/include/asm/exec.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ */
+#ifndef _ASM_POWERPC_EXEC_H
+#define _ASM_POWERPC_EXEC_H
+
+extern unsigned long arch_align_stack(unsigned long sp);
+
+#endif /* _ASM_POWERPC_EXEC_H */
diff --git a/arch/powerpc/include/asm/extable.h b/arch/powerpc/include/asm/extable.h
new file mode 100644
index 0000000000..26ce2e5c0f
--- /dev/null
+++ b/arch/powerpc/include/asm/extable.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARCH_POWERPC_EXTABLE_H
+#define _ARCH_POWERPC_EXTABLE_H
+
+/*
+ * The exception table consists of pairs of relative addresses: the first is
+ * the address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out what
+ * to do.
+ *
+ * All the routines below use bits of fixup code that are out of line with the
+ * main instruction path.  This means when everything is well, we don't even
+ * have to jump over them.  Further, they do not intrude on our cache or tlb
+ * entries.
+ */
+
+#define ARCH_HAS_RELATIVE_EXTABLE
+
+#ifndef __ASSEMBLY__
+
+struct exception_table_entry {
+	int insn;
+	int fixup;
+};
+
+static inline unsigned long extable_fixup(const struct exception_table_entry *x)
+{
+	return (unsigned long)&x->fixup + x->fixup;
+}
+
+#endif
+
+/*
+ * Helper macro for exception table entries
+ */
+#define EX_TABLE(_fault, _target)		\
+	stringify_in_c(.section __ex_table,"a";)\
+	stringify_in_c(.balign 4;)		\
+	stringify_in_c(.long (_fault) - . ;)	\
+	stringify_in_c(.long (_target) - . ;)	\
+	stringify_in_c(.previous)
+
+#endif
diff --git a/arch/powerpc/include/asm/fadump-internal.h b/arch/powerpc/include/asm/fadump-internal.h
new file mode 100644
index 0000000000..27f9e11eda
--- /dev/null
+++ b/arch/powerpc/include/asm/fadump-internal.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Firmware-Assisted Dump internal code.
+ *
+ * Copyright 2011, Mahesh Salgaonkar, IBM Corporation.
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_FADUMP_INTERNAL_H
+#define _ASM_POWERPC_FADUMP_INTERNAL_H
+
+/* Maximum number of memory regions kernel supports */
+#define FADUMP_MAX_MEM_REGS			128
+
+#ifndef CONFIG_PRESERVE_FA_DUMP
+
+/* The upper limit percentage for user specified boot memory size (25%) */
+#define MAX_BOOT_MEM_RATIO			4
+
+#define memblock_num_regions(memblock_type)	(memblock.memblock_type.cnt)
+
+/* FAD commands */
+#define FADUMP_REGISTER			1
+#define FADUMP_UNREGISTER		2
+#define FADUMP_INVALIDATE		3
+
+/*
+ * Copy the ascii values for first 8 characters from a string into u64
+ * variable at their respective indexes.
+ * e.g.
+ *  The string "FADMPINF" will be converted into 0x4641444d50494e46
+ */
+static inline u64 fadump_str_to_u64(const char *str)
+{
+	u64 val = 0;
+	int i;
+
+	for (i = 0; i < sizeof(val); i++)
+		val = (*str) ? (val << 8) | *str++ : val << 8;
+	return val;
+}
+
+#define FADUMP_CPU_UNKNOWN		(~((u32)0))
+
+#define FADUMP_CRASH_INFO_MAGIC		fadump_str_to_u64("FADMPINF")
+
+/* fadump crash info structure */
+struct fadump_crash_info_header {
+	u64		magic_number;
+	u64		elfcorehdr_addr;
+	u32		crashing_cpu;
+	struct pt_regs	regs;
+	struct cpumask	cpu_mask;
+};
+
+struct fadump_memory_range {
+	u64	base;
+	u64	size;
+};
+
+/* fadump memory ranges info */
+#define RNG_NAME_SZ			16
+struct fadump_mrange_info {
+	char				name[RNG_NAME_SZ];
+	struct fadump_memory_range	*mem_ranges;
+	u32				mem_ranges_sz;
+	u32				mem_range_cnt;
+	u32				max_mem_ranges;
+	bool				is_static;
+};
+
+/* Platform specific callback functions */
+struct fadump_ops;
+
+/* Firmware-assisted dump configuration details. */
+struct fw_dump {
+	unsigned long	reserve_dump_area_start;
+	unsigned long	reserve_dump_area_size;
+	/* cmd line option during boot */
+	unsigned long	reserve_bootvar;
+
+	unsigned long	cpu_state_data_size;
+	u64		cpu_state_dest_vaddr;
+	u32		cpu_state_data_version;
+	u32		cpu_state_entry_size;
+
+	unsigned long	hpte_region_size;
+
+	unsigned long	boot_memory_size;
+	u64		boot_mem_dest_addr;
+	u64		boot_mem_addr[FADUMP_MAX_MEM_REGS];
+	u64		boot_mem_sz[FADUMP_MAX_MEM_REGS];
+	u64		boot_mem_top;
+	u64		boot_mem_regs_cnt;
+
+	unsigned long	fadumphdr_addr;
+	unsigned long	cpu_notes_buf_vaddr;
+	unsigned long	cpu_notes_buf_size;
+
+	/*
+	 * Maximum size supported by firmware to copy from source to
+	 * destination address per entry.
+	 */
+	u64		max_copy_size;
+	u64		kernel_metadata;
+
+	int		ibm_configure_kernel_dump;
+
+	unsigned long	fadump_enabled:1;
+	unsigned long	fadump_supported:1;
+	unsigned long	dump_active:1;
+	unsigned long	dump_registered:1;
+	unsigned long	nocma:1;
+
+	struct fadump_ops	*ops;
+};
+
+struct fadump_ops {
+	u64	(*fadump_init_mem_struct)(struct fw_dump *fadump_conf);
+	u64	(*fadump_get_metadata_size)(void);
+	int	(*fadump_setup_metadata)(struct fw_dump *fadump_conf);
+	u64	(*fadump_get_bootmem_min)(void);
+	int	(*fadump_register)(struct fw_dump *fadump_conf);
+	int	(*fadump_unregister)(struct fw_dump *fadump_conf);
+	int	(*fadump_invalidate)(struct fw_dump *fadump_conf);
+	void	(*fadump_cleanup)(struct fw_dump *fadump_conf);
+	int	(*fadump_process)(struct fw_dump *fadump_conf);
+	void	(*fadump_region_show)(struct fw_dump *fadump_conf,
+				      struct seq_file *m);
+	void	(*fadump_trigger)(struct fadump_crash_info_header *fdh,
+				  const char *msg);
+};
+
+/* Helper functions */
+s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus);
+void fadump_free_cpu_notes_buf(void);
+u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs);
+void __init fadump_update_elfcore_header(char *bufp);
+bool is_fadump_boot_mem_contiguous(void);
+bool is_fadump_reserved_mem_contiguous(void);
+
+#else /* !CONFIG_PRESERVE_FA_DUMP */
+
+/* Firmware-assisted dump configuration details. */
+struct fw_dump {
+	u64	boot_mem_top;
+	u64	dump_active;
+};
+
+#endif /* CONFIG_PRESERVE_FA_DUMP */
+
+#ifdef CONFIG_PPC_PSERIES
+extern void rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node);
+#else
+static inline void
+rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) { }
+#endif
+
+#ifdef CONFIG_PPC_POWERNV
+extern void opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node);
+#else
+static inline void
+opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) { }
+#endif
+
+#endif /* _ASM_POWERPC_FADUMP_INTERNAL_H */
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
new file mode 100644
index 0000000000..526a6a6473
--- /dev/null
+++ b/arch/powerpc/include/asm/fadump.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Firmware Assisted dump header file.
+ *
+ * Copyright 2011 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_POWERPC_FADUMP_H
+#define _ASM_POWERPC_FADUMP_H
+
+#ifdef CONFIG_FA_DUMP
+
+extern int crashing_cpu;
+
+extern int is_fadump_memory_area(u64 addr, ulong size);
+extern int setup_fadump(void);
+extern int is_fadump_active(void);
+extern int should_fadump_crash(void);
+extern void crash_fadump(struct pt_regs *, const char *);
+extern void fadump_cleanup(void);
+
+#else	/* CONFIG_FA_DUMP */
+static inline int is_fadump_active(void) { return 0; }
+static inline int should_fadump_crash(void) { return 0; }
+static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
+static inline void fadump_cleanup(void) { }
+#endif /* !CONFIG_FA_DUMP */
+
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
+extern int early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+				      int depth, void *data);
+extern int fadump_reserve_mem(void);
+#endif
+#endif /* _ASM_POWERPC_FADUMP_H */
diff --git a/arch/powerpc/include/asm/fb.h b/arch/powerpc/include/asm/fb.h
new file mode 100644
index 0000000000..5f1a2e5f76
--- /dev/null
+++ b/arch/powerpc/include/asm/fb.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_FB_H_
+#define _ASM_FB_H_
+
+#include <linux/fs.h>
+
+#include <asm/page.h>
+
+static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
+				unsigned long off)
+{
+	vma->vm_page_prot = phys_mem_access_prot(file, off >> PAGE_SHIFT,
+						 vma->vm_end - vma->vm_start,
+						 vma->vm_page_prot);
+}
+#define fb_pgprotect fb_pgprotect
+
+#include <asm-generic/fb.h>
+
+#endif /* _ASM_FB_H_ */
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
new file mode 100644
index 0000000000..77824bd289
--- /dev/null
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -0,0 +1,299 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef __ASM_POWERPC_FEATURE_FIXUPS_H
+#define __ASM_POWERPC_FEATURE_FIXUPS_H
+
+#include <asm/asm-const.h>
+
+/*
+ */
+
+/*
+ * Feature section common macros
+ *
+ * Note that the entries now contain offsets between the table entry
+ * and the code rather than absolute code pointers in order to be
+ * useable with the vdso shared library. There is also an assumption
+ * that values will be negative, that is, the fixup table has to be
+ * located after the code it fixes up.
+ */
+#if defined(CONFIG_PPC64) && !defined(__powerpc64__)
+/* 64 bits kernel, 32 bits code (ie. vdso32) */
+#define FTR_ENTRY_LONG		.8byte
+#define FTR_ENTRY_OFFSET	.long 0xffffffff; .long
+#elif defined(CONFIG_PPC64)
+#define FTR_ENTRY_LONG		.8byte
+#define FTR_ENTRY_OFFSET	.8byte
+#else
+#define FTR_ENTRY_LONG		.long
+#define FTR_ENTRY_OFFSET	.long
+#endif
+
+#define START_FTR_SECTION(label)	label##1:
+
+#define FTR_SECTION_ELSE_NESTED(label)			\
+label##2:						\
+	.pushsection __ftr_alt_##label,"a";		\
+	.align 2;					\
+label##3:
+
+
+#ifndef CONFIG_CC_IS_CLANG
+#define CHECK_ALT_SIZE(else_size, body_size)			\
+	.ifgt (else_size) - (body_size);			\
+	.error "Feature section else case larger than body";	\
+	.endif;
+#else
+/*
+ * If we use the ifgt syntax above, clang's assembler complains about the
+ * expression being non-absolute when the code appears in an inline assembly
+ * statement.
+ * As a workaround use an .org directive that has no effect if the else case
+ * instructions are smaller than the body, but fails otherwise.
+ */
+#define CHECK_ALT_SIZE(else_size, body_size)			\
+	.org . + ((else_size) > (body_size));
+#endif
+
+#define MAKE_FTR_SECTION_ENTRY(msk, val, label, sect)		\
+label##4:							\
+	.popsection;						\
+	.pushsection sect,"a";					\
+	.align 3;						\
+label##5:							\
+	FTR_ENTRY_LONG msk;					\
+	FTR_ENTRY_LONG val;					\
+	FTR_ENTRY_OFFSET label##1b-label##5b;			\
+	FTR_ENTRY_OFFSET label##2b-label##5b;			\
+	FTR_ENTRY_OFFSET label##3b-label##5b;			\
+	FTR_ENTRY_OFFSET label##4b-label##5b;			\
+	CHECK_ALT_SIZE((label##4b-label##3b), (label##2b-label##1b)); \
+	.popsection;
+
+
+/* CPU feature dependent sections */
+#define BEGIN_FTR_SECTION_NESTED(label)	START_FTR_SECTION(label)
+#define BEGIN_FTR_SECTION		START_FTR_SECTION(97)
+
+#define END_FTR_SECTION_NESTED(msk, val, label) 		\
+	FTR_SECTION_ELSE_NESTED(label)				\
+	MAKE_FTR_SECTION_ENTRY(msk, val, label, __ftr_fixup)
+
+#define END_FTR_SECTION(msk, val)		\
+	END_FTR_SECTION_NESTED(msk, val, 97)
+
+#define END_FTR_SECTION_NESTED_IFSET(msk, label)	\
+	END_FTR_SECTION_NESTED((msk), (msk), label)
+
+#define END_FTR_SECTION_IFSET(msk)	END_FTR_SECTION((msk), (msk))
+#define END_FTR_SECTION_IFCLR(msk)	END_FTR_SECTION((msk), 0)
+
+/* CPU feature sections with alternatives, use BEGIN_FTR_SECTION to start */
+#define FTR_SECTION_ELSE	FTR_SECTION_ELSE_NESTED(97)
+#define ALT_FTR_SECTION_END_NESTED(msk, val, label)	\
+	MAKE_FTR_SECTION_ENTRY(msk, val, label, __ftr_fixup)
+#define ALT_FTR_SECTION_END_NESTED_IFSET(msk, label)	\
+	ALT_FTR_SECTION_END_NESTED(msk, msk, label)
+#define ALT_FTR_SECTION_END_NESTED_IFCLR(msk, label)	\
+	ALT_FTR_SECTION_END_NESTED(msk, 0, label)
+#define ALT_FTR_SECTION_END(msk, val)	\
+	ALT_FTR_SECTION_END_NESTED(msk, val, 97)
+#define ALT_FTR_SECTION_END_IFSET(msk)	\
+	ALT_FTR_SECTION_END_NESTED_IFSET(msk, 97)
+#define ALT_FTR_SECTION_END_IFCLR(msk)	\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(msk, 97)
+
+/* MMU feature dependent sections */
+#define BEGIN_MMU_FTR_SECTION_NESTED(label)	START_FTR_SECTION(label)
+#define BEGIN_MMU_FTR_SECTION			START_FTR_SECTION(97)
+
+#define END_MMU_FTR_SECTION_NESTED(msk, val, label) 		\
+	FTR_SECTION_ELSE_NESTED(label)				\
+	MAKE_FTR_SECTION_ENTRY(msk, val, label, __mmu_ftr_fixup)
+
+#define END_MMU_FTR_SECTION(msk, val)		\
+	END_MMU_FTR_SECTION_NESTED(msk, val, 97)
+
+#define END_MMU_FTR_SECTION_NESTED_IFSET(msk, label)	\
+	END_MMU_FTR_SECTION_NESTED((msk), (msk), label)
+
+#define END_MMU_FTR_SECTION_NESTED_IFCLR(msk, label)	\
+	END_MMU_FTR_SECTION_NESTED((msk), 0, label)
+
+#define END_MMU_FTR_SECTION_IFSET(msk)	END_MMU_FTR_SECTION((msk), (msk))
+#define END_MMU_FTR_SECTION_IFCLR(msk)	END_MMU_FTR_SECTION((msk), 0)
+
+/* MMU feature sections with alternatives, use BEGIN_FTR_SECTION to start */
+#define MMU_FTR_SECTION_ELSE_NESTED(label)	FTR_SECTION_ELSE_NESTED(label)
+#define MMU_FTR_SECTION_ELSE	MMU_FTR_SECTION_ELSE_NESTED(97)
+#define ALT_MMU_FTR_SECTION_END_NESTED(msk, val, label)	\
+	MAKE_FTR_SECTION_ENTRY(msk, val, label, __mmu_ftr_fixup)
+#define ALT_MMU_FTR_SECTION_END_NESTED_IFSET(msk, label)	\
+	ALT_MMU_FTR_SECTION_END_NESTED(msk, msk, label)
+#define ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(msk, label)	\
+	ALT_MMU_FTR_SECTION_END_NESTED(msk, 0, label)
+#define ALT_MMU_FTR_SECTION_END(msk, val)	\
+	ALT_MMU_FTR_SECTION_END_NESTED(msk, val, 97)
+#define ALT_MMU_FTR_SECTION_END_IFSET(msk)	\
+	ALT_MMU_FTR_SECTION_END_NESTED_IFSET(msk, 97)
+#define ALT_MMU_FTR_SECTION_END_IFCLR(msk)	\
+	ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(msk, 97)
+
+/* Firmware feature dependent sections */
+#define BEGIN_FW_FTR_SECTION_NESTED(label)	START_FTR_SECTION(label)
+#define BEGIN_FW_FTR_SECTION			START_FTR_SECTION(97)
+
+#define END_FW_FTR_SECTION_NESTED(msk, val, label) 		\
+	FTR_SECTION_ELSE_NESTED(label)				\
+	MAKE_FTR_SECTION_ENTRY(msk, val, label, __fw_ftr_fixup)
+
+#define END_FW_FTR_SECTION(msk, val)		\
+	END_FW_FTR_SECTION_NESTED(msk, val, 97)
+
+#define END_FW_FTR_SECTION_IFSET(msk)	END_FW_FTR_SECTION((msk), (msk))
+#define END_FW_FTR_SECTION_IFCLR(msk)	END_FW_FTR_SECTION((msk), 0)
+
+/* Firmware feature sections with alternatives */
+#define FW_FTR_SECTION_ELSE_NESTED(label)	FTR_SECTION_ELSE_NESTED(label)
+#define FW_FTR_SECTION_ELSE	FTR_SECTION_ELSE_NESTED(97)
+#define ALT_FW_FTR_SECTION_END_NESTED(msk, val, label)	\
+	MAKE_FTR_SECTION_ENTRY(msk, val, label, __fw_ftr_fixup)
+#define ALT_FW_FTR_SECTION_END_NESTED_IFSET(msk, label)	\
+	ALT_FW_FTR_SECTION_END_NESTED(msk, msk, label)
+#define ALT_FW_FTR_SECTION_END_NESTED_IFCLR(msk, label)	\
+	ALT_FW_FTR_SECTION_END_NESTED(msk, 0, label)
+#define ALT_FW_FTR_SECTION_END(msk, val)	\
+	ALT_FW_FTR_SECTION_END_NESTED(msk, val, 97)
+#define ALT_FW_FTR_SECTION_END_IFSET(msk)	\
+	ALT_FW_FTR_SECTION_END_NESTED_IFSET(msk, 97)
+#define ALT_FW_FTR_SECTION_END_IFCLR(msk)	\
+	ALT_FW_FTR_SECTION_END_NESTED_IFCLR(msk, 97)
+
+#ifndef __ASSEMBLY__
+
+#define ASM_FTR_IF(section_if, section_else, msk, val)	\
+	stringify_in_c(BEGIN_FTR_SECTION)			\
+	section_if "; "						\
+	stringify_in_c(FTR_SECTION_ELSE)			\
+	section_else "; "					\
+	stringify_in_c(ALT_FTR_SECTION_END((msk), (val)))
+
+#define ASM_FTR_IFSET(section_if, section_else, msk)	\
+	ASM_FTR_IF(section_if, section_else, (msk), (msk))
+
+#define ASM_FTR_IFCLR(section_if, section_else, msk)	\
+	ASM_FTR_IF(section_if, section_else, (msk), 0)
+
+#define ASM_MMU_FTR_IF(section_if, section_else, msk, val)	\
+	stringify_in_c(BEGIN_MMU_FTR_SECTION)			\
+	section_if "; "						\
+	stringify_in_c(MMU_FTR_SECTION_ELSE)			\
+	section_else "; "					\
+	stringify_in_c(ALT_MMU_FTR_SECTION_END((msk), (val)))
+
+#define ASM_MMU_FTR_IFSET(section_if, section_else, msk)	\
+	ASM_MMU_FTR_IF(section_if, section_else, (msk), (msk))
+
+#define ASM_MMU_FTR_IFCLR(section_if, section_else, msk)	\
+	ASM_MMU_FTR_IF(section_if, section_else, (msk), 0)
+
+#endif /* __ASSEMBLY__ */
+
+/* LWSYNC feature sections */
+#define START_LWSYNC_SECTION(label)	label##1:
+#define MAKE_LWSYNC_SECTION_ENTRY(label, sect)		\
+label##2:						\
+	.pushsection sect,"a";				\
+	.align 2;					\
+label##3:					       	\
+	FTR_ENTRY_OFFSET label##1b-label##3b;		\
+	.popsection;
+
+#define STF_ENTRY_BARRIER_FIXUP_SECTION			\
+953:							\
+	.pushsection __stf_entry_barrier_fixup,"a";	\
+	.align 2;					\
+954:							\
+	FTR_ENTRY_OFFSET 953b-954b;			\
+	.popsection;
+
+#define STF_EXIT_BARRIER_FIXUP_SECTION			\
+955:							\
+	.pushsection __stf_exit_barrier_fixup,"a";	\
+	.align 2;					\
+956:							\
+	FTR_ENTRY_OFFSET 955b-956b;			\
+	.popsection;
+
+#define UACCESS_FLUSH_FIXUP_SECTION			\
+959:							\
+	.pushsection __uaccess_flush_fixup,"a";		\
+	.align 2;					\
+960:							\
+	FTR_ENTRY_OFFSET 959b-960b;			\
+	.popsection;
+
+#define ENTRY_FLUSH_FIXUP_SECTION			\
+957:							\
+	.pushsection __entry_flush_fixup,"a";		\
+	.align 2;					\
+958:							\
+	FTR_ENTRY_OFFSET 957b-958b;			\
+	.popsection;
+
+#define SCV_ENTRY_FLUSH_FIXUP_SECTION			\
+957:							\
+	.pushsection __scv_entry_flush_fixup,"a";	\
+	.align 2;					\
+958:							\
+	FTR_ENTRY_OFFSET 957b-958b;			\
+	.popsection;
+
+#define RFI_FLUSH_FIXUP_SECTION				\
+951:							\
+	.pushsection __rfi_flush_fixup,"a";		\
+	.align 2;					\
+952:							\
+	FTR_ENTRY_OFFSET 951b-952b;			\
+	.popsection;
+
+#define NOSPEC_BARRIER_FIXUP_SECTION			\
+953:							\
+	.pushsection __barrier_nospec_fixup,"a";	\
+	.align 2;					\
+954:							\
+	FTR_ENTRY_OFFSET 953b-954b;			\
+	.popsection;
+
+#define START_BTB_FLUSH_SECTION			\
+955:							\
+
+#define END_BTB_FLUSH_SECTION			\
+956:							\
+	.pushsection __btb_flush_fixup,"a";	\
+	.align 2;							\
+957:						\
+	FTR_ENTRY_OFFSET 955b-957b;			\
+	FTR_ENTRY_OFFSET 956b-957b;			\
+	.popsection;
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+extern long stf_barrier_fallback;
+extern long entry_flush_fallback;
+extern long scv_entry_flush_fallback;
+extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup;
+extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup;
+extern long __start___uaccess_flush_fixup, __stop___uaccess_flush_fixup;
+extern long __start___entry_flush_fixup, __stop___entry_flush_fixup;
+extern long __start___scv_entry_flush_fixup, __stop___scv_entry_flush_fixup;
+extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
+extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup;
+extern long __start__btb_flush_fixup, __stop__btb_flush_fixup;
+
+void apply_feature_fixups(void);
+void update_mmu_feature_fixups(unsigned long mask);
+void setup_feature_keys(void);
+#endif
+
+#endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
new file mode 100644
index 0000000000..69ae9cf57d
--- /dev/null
+++ b/arch/powerpc/include/asm/firmware.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  Modifications for ppc64:
+ *      Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ */
+#ifndef __ASM_POWERPC_FIRMWARE_H
+#define __ASM_POWERPC_FIRMWARE_H
+
+#ifdef __KERNEL__
+
+#include <asm/asm-const.h>
+
+/* firmware feature bitmask values */
+
+#define FW_FEATURE_PFT		ASM_CONST(0x0000000000000001)
+#define FW_FEATURE_TCE		ASM_CONST(0x0000000000000002)
+#define FW_FEATURE_SPRG0	ASM_CONST(0x0000000000000004)
+#define FW_FEATURE_DABR		ASM_CONST(0x0000000000000008)
+#define FW_FEATURE_COPY		ASM_CONST(0x0000000000000010)
+#define FW_FEATURE_ASR		ASM_CONST(0x0000000000000020)
+#define FW_FEATURE_DEBUG	ASM_CONST(0x0000000000000040)
+#define FW_FEATURE_TERM		ASM_CONST(0x0000000000000080)
+#define FW_FEATURE_PERF		ASM_CONST(0x0000000000000100)
+#define FW_FEATURE_DUMP		ASM_CONST(0x0000000000000200)
+#define FW_FEATURE_INTERRUPT	ASM_CONST(0x0000000000000400)
+#define FW_FEATURE_MIGRATE	ASM_CONST(0x0000000000000800)
+#define FW_FEATURE_PERFMON	ASM_CONST(0x0000000000001000)
+#define FW_FEATURE_CRQ		ASM_CONST(0x0000000000002000)
+#define FW_FEATURE_VIO		ASM_CONST(0x0000000000004000)
+#define FW_FEATURE_RDMA		ASM_CONST(0x0000000000008000)
+#define FW_FEATURE_LLAN		ASM_CONST(0x0000000000010000)
+#define FW_FEATURE_BULK_REMOVE	ASM_CONST(0x0000000000020000)
+#define FW_FEATURE_XDABR	ASM_CONST(0x0000000000040000)
+#define FW_FEATURE_PUT_TCE_IND	ASM_CONST(0x0000000000080000)
+#define FW_FEATURE_SPLPAR	ASM_CONST(0x0000000000100000)
+#define FW_FEATURE_LPAR		ASM_CONST(0x0000000000400000)
+#define FW_FEATURE_PS3_LV1	ASM_CONST(0x0000000000800000)
+#define FW_FEATURE_HPT_RESIZE	ASM_CONST(0x0000000001000000)
+#define FW_FEATURE_CMO		ASM_CONST(0x0000000002000000)
+#define FW_FEATURE_VPHN		ASM_CONST(0x0000000004000000)
+#define FW_FEATURE_XCMO		ASM_CONST(0x0000000008000000)
+#define FW_FEATURE_OPAL		ASM_CONST(0x0000000010000000)
+#define FW_FEATURE_SET_MODE	ASM_CONST(0x0000000040000000)
+#define FW_FEATURE_BEST_ENERGY	ASM_CONST(0x0000000080000000)
+#define FW_FEATURE_FORM1_AFFINITY ASM_CONST(0x0000000100000000)
+#define FW_FEATURE_PRRN		ASM_CONST(0x0000000200000000)
+#define FW_FEATURE_DRMEM_V2	ASM_CONST(0x0000000400000000)
+#define FW_FEATURE_DRC_INFO	ASM_CONST(0x0000000800000000)
+#define FW_FEATURE_BLOCK_REMOVE ASM_CONST(0x0000001000000000)
+#define FW_FEATURE_PAPR_SCM 	ASM_CONST(0x0000002000000000)
+#define FW_FEATURE_ULTRAVISOR	ASM_CONST(0x0000004000000000)
+#define FW_FEATURE_STUFF_TCE	ASM_CONST(0x0000008000000000)
+#define FW_FEATURE_RPT_INVALIDATE ASM_CONST(0x0000010000000000)
+#define FW_FEATURE_FORM2_AFFINITY ASM_CONST(0x0000020000000000)
+#define FW_FEATURE_ENERGY_SCALE_INFO ASM_CONST(0x0000040000000000)
+#define FW_FEATURE_WATCHDOG	ASM_CONST(0x0000080000000000)
+#define FW_FEATURE_PLPKS	ASM_CONST(0x0000100000000000)
+
+#ifndef __ASSEMBLY__
+
+enum {
+#ifdef CONFIG_PPC64
+	FW_FEATURE_PSERIES_POSSIBLE = FW_FEATURE_PFT | FW_FEATURE_TCE |
+		FW_FEATURE_SPRG0 | FW_FEATURE_DABR | FW_FEATURE_COPY |
+		FW_FEATURE_ASR | FW_FEATURE_DEBUG | FW_FEATURE_TERM |
+		FW_FEATURE_PERF | FW_FEATURE_DUMP | FW_FEATURE_INTERRUPT |
+		FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ |
+		FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN |
+		FW_FEATURE_BULK_REMOVE | FW_FEATURE_XDABR |
+		FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE |
+		FW_FEATURE_SPLPAR | FW_FEATURE_LPAR |
+		FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO |
+		FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
+		FW_FEATURE_FORM1_AFFINITY | FW_FEATURE_PRRN |
+		FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
+		FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE |
+		FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR |
+		FW_FEATURE_RPT_INVALIDATE | FW_FEATURE_FORM2_AFFINITY |
+		FW_FEATURE_ENERGY_SCALE_INFO | FW_FEATURE_WATCHDOG |
+		FW_FEATURE_PLPKS,
+	FW_FEATURE_PSERIES_ALWAYS = 0,
+	FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR,
+	FW_FEATURE_POWERNV_ALWAYS = 0,
+	FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
+	FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
+	FW_FEATURE_NATIVE_POSSIBLE = 0,
+	FW_FEATURE_NATIVE_ALWAYS = 0,
+	FW_FEATURE_POSSIBLE =
+#ifdef CONFIG_PPC_PSERIES
+		FW_FEATURE_PSERIES_POSSIBLE |
+#endif
+#ifdef CONFIG_PPC_POWERNV
+		FW_FEATURE_POWERNV_POSSIBLE |
+#endif
+#ifdef CONFIG_PPC_PS3
+		FW_FEATURE_PS3_POSSIBLE |
+#endif
+#ifdef CONFIG_PPC_HASH_MMU_NATIVE
+		FW_FEATURE_NATIVE_ALWAYS |
+#endif
+		0,
+	FW_FEATURE_ALWAYS =
+#ifdef CONFIG_PPC_PSERIES
+		FW_FEATURE_PSERIES_ALWAYS &
+#endif
+#ifdef CONFIG_PPC_POWERNV
+		FW_FEATURE_POWERNV_ALWAYS &
+#endif
+#ifdef CONFIG_PPC_PS3
+		FW_FEATURE_PS3_ALWAYS &
+#endif
+#ifdef CONFIG_PPC_HASH_MMU_NATIVE
+		FW_FEATURE_NATIVE_ALWAYS &
+#endif
+		FW_FEATURE_POSSIBLE,
+
+#else /* CONFIG_PPC64 */
+	FW_FEATURE_POSSIBLE = 0,
+	FW_FEATURE_ALWAYS = 0,
+#endif
+};
+
+/* This is used to identify firmware features which are available
+ * to the kernel.
+ */
+extern unsigned long	powerpc_firmware_features;
+
+#define firmware_has_feature(feature)					\
+	((FW_FEATURE_ALWAYS & (feature)) ||				\
+		(FW_FEATURE_POSSIBLE & powerpc_firmware_features & (feature)))
+
+extern void system_reset_fwnmi(void);
+extern void machine_check_fwnmi(void);
+
+/* This is true if we are using the firmware NMI handler (typically LPAR) */
+extern int fwnmi_active;
+extern int ibm_nmi_interlock_token;
+
+extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup;
+
+#ifdef CONFIG_PPC_PSERIES
+void pseries_probe_fw_features(void);
+#else
+static inline void pseries_probe_fw_features(void) { }
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+#endif /* __ASM_POWERPC_FIRMWARE_H */
diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
new file mode 100644
index 0000000000..a832aeafe5
--- /dev/null
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -0,0 +1,123 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ *
+ * Copyright 2008 Freescale Semiconductor Inc.
+ *   Port to powerpc added by Kumar Gala
+ */
+
+#ifndef _ASM_FIXMAP_H
+#define _ASM_FIXMAP_H
+
+#ifndef __ASSEMBLY__
+#include <linux/sizes.h>
+#include <linux/pgtable.h>
+#include <asm/page.h>
+#ifdef CONFIG_HIGHMEM
+#include <linux/threads.h>
+#include <asm/kmap_size.h>
+#endif
+
+#ifdef CONFIG_PPC64
+#define FIXADDR_TOP	(IOREMAP_END + FIXADDR_SIZE)
+#else
+#define FIXADDR_SIZE	0
+#ifdef CONFIG_KASAN
+#include <asm/kasan.h>
+#define FIXADDR_TOP	(KASAN_SHADOW_START - PAGE_SIZE)
+#else
+#define FIXADDR_TOP	((unsigned long)(-PAGE_SIZE))
+#endif
+#endif
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process. We allocate these special addresses
+ * from the end of virtual memory (0xfffff000) backwards.
+ * Also this lets us do fail-safe vmalloc(), we
+ * can guarantee that these special addresses and
+ * vmalloc()-ed addresses never overlap.
+ *
+ * these 'compile-time allocated' memory buffers are
+ * fixed-size 4k pages. (or larger if used with an increment
+ * highger than 1) use fixmap_set(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ */
+enum fixed_addresses {
+	FIX_HOLE,
+#ifdef CONFIG_PPC32
+	/* reserve the top 128K for early debugging purposes */
+	FIX_EARLY_DEBUG_TOP = FIX_HOLE,
+	FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128K, PAGE_SIZE)/PAGE_SIZE)-1,
+#ifdef CONFIG_HIGHMEM
+	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
+	FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1,
+#endif
+#ifdef CONFIG_PPC_8xx
+	/* For IMMR we need an aligned 512K area */
+#define FIX_IMMR_SIZE	(512 * 1024 / PAGE_SIZE)
+	FIX_IMMR_START,
+	FIX_IMMR_BASE = __ALIGN_MASK(FIX_IMMR_START, FIX_IMMR_SIZE - 1) - 1 +
+		       FIX_IMMR_SIZE,
+#endif
+#ifdef CONFIG_PPC_83xx
+	/* For IMMR we need an aligned 2M area */
+#define FIX_IMMR_SIZE	(SZ_2M / PAGE_SIZE)
+	FIX_IMMR_START,
+	FIX_IMMR_BASE = __ALIGN_MASK(FIX_IMMR_START, FIX_IMMR_SIZE - 1) - 1 +
+		       FIX_IMMR_SIZE,
+#endif
+	/* FIX_PCIE_MCFG, */
+#endif /* CONFIG_PPC32 */
+	__end_of_permanent_fixed_addresses,
+
+#define NR_FIX_BTMAPS		(SZ_256K / PAGE_SIZE)
+#define FIX_BTMAPS_SLOTS	16
+#define TOTAL_FIX_BTMAPS	(NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
+
+	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+	FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
+	__end_of_fixed_addresses
+};
+
+#define __FIXADDR_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START		(FIXADDR_TOP - __FIXADDR_SIZE)
+
+#define FIXMAP_ALIGNED_SIZE	(ALIGN(FIXADDR_TOP, PGDIR_SIZE) - \
+				 ALIGN_DOWN(FIXADDR_START, PGDIR_SIZE))
+#define FIXMAP_PTE_SIZE	(FIXMAP_ALIGNED_SIZE / PGDIR_SIZE * PTE_TABLE_SIZE)
+
+#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NCG
+#define FIXMAP_PAGE_IO	PAGE_KERNEL_NCG
+
+#include <asm-generic/fixmap.h>
+
+static inline void __set_fixmap(enum fixed_addresses idx,
+				phys_addr_t phys, pgprot_t flags)
+{
+	BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && __FIXADDR_SIZE > FIXADDR_SIZE);
+
+	if (__builtin_constant_p(idx))
+		BUILD_BUG_ON(idx >= __end_of_fixed_addresses);
+	else if (WARN_ON(idx >= __end_of_fixed_addresses))
+		return;
+	if (pgprot_val(flags))
+		map_kernel_page(__fix_to_virt(idx), phys, flags);
+	else
+		unmap_kernel_page(__fix_to_virt(idx));
+}
+
+#define __early_set_fixmap	__set_fixmap
+
+#endif /* !__ASSEMBLY__ */
+#endif
diff --git a/arch/powerpc/include/asm/floppy.h b/arch/powerpc/include/asm/floppy.h
new file mode 100644
index 0000000000..f8ce178b43
--- /dev/null
+++ b/arch/powerpc/include/asm/floppy.h
@@ -0,0 +1,214 @@
+/*
+ * Architecture specific parts of the Floppy driver
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1995
+ */
+#ifndef __ASM_POWERPC_FLOPPY_H
+#define __ASM_POWERPC_FLOPPY_H
+#ifdef __KERNEL__
+
+#include <asm/machdep.h>
+
+#define fd_inb(base, reg)		inb_p((base) + (reg))
+#define fd_outb(value, base, reg)	outb_p(value, (base) + (reg))
+
+#define fd_enable_dma()         enable_dma(FLOPPY_DMA)
+#define fd_disable_dma()	 fd_ops->_disable_dma(FLOPPY_DMA)
+#define fd_free_dma()           fd_ops->_free_dma(FLOPPY_DMA)
+#define fd_clear_dma_ff()       clear_dma_ff(FLOPPY_DMA)
+#define fd_set_dma_mode(mode)   set_dma_mode(FLOPPY_DMA, mode)
+#define fd_set_dma_count(count) set_dma_count(FLOPPY_DMA, count)
+#define fd_get_dma_residue()    fd_ops->_get_dma_residue(FLOPPY_DMA)
+#define fd_enable_irq()         enable_irq(FLOPPY_IRQ)
+#define fd_disable_irq()        disable_irq(FLOPPY_IRQ)
+#define fd_free_irq()           free_irq(FLOPPY_IRQ, NULL);
+
+#include <linux/pci.h>
+#include <asm/ppc-pci.h>	/* for isa_bridge_pcidev */
+
+#define fd_dma_setup(addr,size,mode,io) fd_ops->_dma_setup(addr,size,mode,io)
+
+static int fd_request_dma(void);
+
+struct fd_dma_ops {
+	void (*_disable_dma)(unsigned int dmanr);
+	void (*_free_dma)(unsigned int dmanr);
+	int (*_get_dma_residue)(unsigned int dummy);
+	int (*_dma_setup)(char *addr, unsigned long size, int mode, int io);
+};
+
+static int virtual_dma_count;
+static int virtual_dma_residue;
+static char *virtual_dma_addr;
+static int virtual_dma_mode;
+static int doing_vdma;
+static struct fd_dma_ops *fd_ops;
+
+static irqreturn_t floppy_hardint(int irq, void *dev_id)
+{
+	unsigned char st;
+	int lcount;
+	char *lptr;
+
+	if (!doing_vdma)
+		return floppy_interrupt(irq, dev_id);
+
+
+	st = 1;
+	for (lcount=virtual_dma_count, lptr=virtual_dma_addr;
+	     lcount; lcount--, lptr++) {
+		st = inb(virtual_dma_port + FD_STATUS);
+		st &= STATUS_DMA | STATUS_READY;
+		if (st != (STATUS_DMA | STATUS_READY))
+			break;
+		if (virtual_dma_mode)
+			outb_p(*lptr, virtual_dma_port + FD_DATA);
+		else
+			*lptr = inb_p(virtual_dma_port + FD_DATA);
+	}
+	virtual_dma_count = lcount;
+	virtual_dma_addr = lptr;
+	st = inb(virtual_dma_port + FD_STATUS);
+
+	if (st == STATUS_DMA)
+		return IRQ_HANDLED;
+	if (!(st & STATUS_DMA)) {
+		virtual_dma_residue += virtual_dma_count;
+		virtual_dma_count=0;
+		doing_vdma = 0;
+		floppy_interrupt(irq, dev_id);
+		return IRQ_HANDLED;
+	}
+	return IRQ_HANDLED;
+}
+
+static void vdma_disable_dma(unsigned int dummy)
+{
+	doing_vdma = 0;
+	virtual_dma_residue += virtual_dma_count;
+	virtual_dma_count=0;
+}
+
+static void vdma_nop(unsigned int dummy)
+{
+}
+
+
+static int vdma_get_dma_residue(unsigned int dummy)
+{
+	return virtual_dma_count + virtual_dma_residue;
+}
+
+
+static int fd_request_irq(void)
+{
+	if (can_use_virtual_dma)
+		return request_irq(FLOPPY_IRQ, floppy_hardint,
+				   0, "floppy", NULL);
+	else
+		return request_irq(FLOPPY_IRQ, floppy_interrupt,
+				   0, "floppy", NULL);
+}
+
+static int vdma_dma_setup(char *addr, unsigned long size, int mode, int io)
+{
+	doing_vdma = 1;
+	virtual_dma_port = io;
+	virtual_dma_mode = (mode  == DMA_MODE_WRITE);
+	virtual_dma_addr = addr;
+	virtual_dma_count = size;
+	virtual_dma_residue = 0;
+	return 0;
+}
+
+static int hard_dma_setup(char *addr, unsigned long size, int mode, int io)
+{
+	static unsigned long prev_size;
+	static dma_addr_t bus_addr = 0;
+	static char *prev_addr;
+	static int prev_dir;
+	int dir;
+
+	doing_vdma = 0;
+	dir = (mode == DMA_MODE_READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+
+	if (bus_addr 
+	    && (addr != prev_addr || size != prev_size || dir != prev_dir)) {
+		/* different from last time -- unmap prev */
+		dma_unmap_single(&isa_bridge_pcidev->dev, bus_addr, prev_size,
+				 prev_dir);
+		bus_addr = 0;
+	}
+
+	if (!bus_addr)	/* need to map it */
+		bus_addr = dma_map_single(&isa_bridge_pcidev->dev, addr, size,
+					  dir);
+
+	/* remember this one as prev */
+	prev_addr = addr;
+	prev_size = size;
+	prev_dir = dir;
+
+	fd_clear_dma_ff();
+	fd_set_dma_mode(mode);
+	set_dma_addr(FLOPPY_DMA, bus_addr);
+	fd_set_dma_count(size);
+	virtual_dma_port = io;
+	fd_enable_dma();
+
+	return 0;
+}
+
+static struct fd_dma_ops real_dma_ops =
+{
+	._disable_dma = disable_dma,
+	._free_dma = free_dma,
+	._get_dma_residue = get_dma_residue,
+	._dma_setup = hard_dma_setup
+};
+
+static struct fd_dma_ops virt_dma_ops =
+{
+	._disable_dma = vdma_disable_dma,
+	._free_dma = vdma_nop,
+	._get_dma_residue = vdma_get_dma_residue,
+	._dma_setup = vdma_dma_setup
+};
+
+static int fd_request_dma(void)
+{
+	if (can_use_virtual_dma & 1) {
+		fd_ops = &virt_dma_ops;
+		return 0;
+	}
+	else {
+		fd_ops = &real_dma_ops;
+		return request_dma(FLOPPY_DMA, "floppy");
+	}
+}
+
+static int FDC1 = 0x3f0;
+static int FDC2 = -1;
+
+/*
+ * Again, the CMOS information not available
+ */
+#define FLOPPY0_TYPE 6
+#define FLOPPY1_TYPE 0
+
+#define N_FDC 2			/* Don't change this! */
+#define N_DRIVE 8
+
+/*
+ * The PowerPC has no problems with floppy DMA crossing 64k borders.
+ */
+#define CROSS_64KB(a,s)	(0)
+
+#define EXTRA_FLOPPY_PARAMS
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_POWERPC_FLOPPY_H */
diff --git a/arch/powerpc/include/asm/fsl_gtm.h b/arch/powerpc/include/asm/fsl_gtm.h
new file mode 100644
index 0000000000..6ff6876509
--- /dev/null
+++ b/arch/powerpc/include/asm/fsl_gtm.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Freescale General-purpose Timers Module
+ *
+ * Copyright 2006 Freescale Semiconductor, Inc.
+ *               Shlomi Gridish <gridish@freescale.com>
+ *               Jerry Huang <Chang-Ming.Huang@freescale.com>
+ * Copyright (c) MontaVista Software, Inc. 2008.
+ *               Anton Vorontsov <avorontsov@ru.mvista.com>
+ */
+
+#ifndef __ASM_FSL_GTM_H
+#define __ASM_FSL_GTM_H
+
+#include <linux/types.h>
+
+struct gtm;
+
+struct gtm_timer {
+	unsigned int irq;
+
+	struct gtm *gtm;
+	bool requested;
+	u8 __iomem *gtcfr;
+	__be16 __iomem *gtmdr;
+	__be16 __iomem *gtpsr;
+	__be16 __iomem *gtcnr;
+	__be16 __iomem *gtrfr;
+	__be16 __iomem *gtevr;
+};
+
+extern struct gtm_timer *gtm_get_timer16(void);
+extern struct gtm_timer *gtm_get_specific_timer16(struct gtm *gtm,
+						  unsigned int timer);
+extern void gtm_put_timer16(struct gtm_timer *tmr);
+extern int gtm_set_timer16(struct gtm_timer *tmr, unsigned long usec,
+			     bool reload);
+extern int gtm_set_exact_timer16(struct gtm_timer *tmr, u16 usec,
+				 bool reload);
+extern void gtm_stop_timer16(struct gtm_timer *tmr);
+extern void gtm_ack_timer16(struct gtm_timer *tmr, u16 events);
+
+#endif /* __ASM_FSL_GTM_H */
diff --git a/arch/powerpc/include/asm/fsl_hcalls.h b/arch/powerpc/include/asm/fsl_hcalls.h
new file mode 100644
index 0000000000..b889d13547
--- /dev/null
+++ b/arch/powerpc/include/asm/fsl_hcalls.h
@@ -0,0 +1,655 @@
+/*
+ * Freescale hypervisor call interface
+ *
+ * Copyright 2008-2010 Freescale Semiconductor, Inc.
+ *
+ * Author: Timur Tabi <timur@freescale.com>
+ *
+ * This file is provided under a dual BSD/GPL license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _FSL_HCALLS_H
+#define _FSL_HCALLS_H
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/byteorder.h>
+#include <asm/epapr_hcalls.h>
+
+#define FH_API_VERSION			1
+
+#define FH_ERR_GET_INFO			1
+#define FH_PARTITION_GET_DTPROP		2
+#define FH_PARTITION_SET_DTPROP		3
+#define FH_PARTITION_RESTART		4
+#define FH_PARTITION_GET_STATUS		5
+#define FH_PARTITION_START		6
+#define FH_PARTITION_STOP		7
+#define FH_PARTITION_MEMCPY		8
+#define FH_DMA_ENABLE			9
+#define FH_DMA_DISABLE			10
+#define FH_SEND_NMI			11
+#define FH_VMPIC_GET_MSIR		12
+#define FH_SYSTEM_RESET			13
+#define FH_GET_CORE_STATE		14
+#define FH_ENTER_NAP			15
+#define FH_EXIT_NAP			16
+#define FH_CLAIM_DEVICE			17
+#define FH_PARTITION_STOP_DMA		18
+
+/* vendor ID: Freescale Semiconductor */
+#define FH_HCALL_TOKEN(num)		_EV_HCALL_TOKEN(EV_FSL_VENDOR_ID, num)
+
+/*
+ * We use "uintptr_t" to define a register because it's guaranteed to be a
+ * 32-bit integer on a 32-bit platform, and a 64-bit integer on a 64-bit
+ * platform.
+ *
+ * All registers are either input/output or output only.  Registers that are
+ * initialized before making the hypercall are input/output.  All
+ * input/output registers are represented with "+r".  Output-only registers
+ * are represented with "=r".  Do not specify any unused registers.  The
+ * clobber list will tell the compiler that the hypercall modifies those
+ * registers, which is good enough.
+ */
+
+/**
+ * fh_send_nmi - send NMI to virtual cpu(s).
+ * @vcpu_mask: send NMI to virtual cpu(s) specified by this mask.
+ *
+ * Returns 0 for success, or EINVAL for invalid vcpu_mask.
+ */
+static inline unsigned int fh_send_nmi(unsigned int vcpu_mask)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+
+	r11 = FH_HCALL_TOKEN(FH_SEND_NMI);
+	r3 = vcpu_mask;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3)
+		: : EV_HCALL_CLOBBERS1
+	);
+
+	return r3;
+}
+
+/* Arbitrary limits to avoid excessive memory allocation in hypervisor */
+#define FH_DTPROP_MAX_PATHLEN 4096
+#define FH_DTPROP_MAX_PROPLEN 32768
+
+/**
+ * fh_partition_get_dtprop - get a property from a guest device tree.
+ * @handle: handle of partition whose device tree is to be accessed
+ * @dtpath_addr: physical address of device tree path to access
+ * @propname_addr: physical address of name of property
+ * @propvalue_addr: physical address of property value buffer
+ * @propvalue_len: length of buffer on entry, length of property on return
+ *
+ * Returns zero on success, non-zero on error.
+ */
+static inline unsigned int fh_partition_get_dtprop(int handle,
+						   uint64_t dtpath_addr,
+						   uint64_t propname_addr,
+						   uint64_t propvalue_addr,
+						   uint32_t *propvalue_len)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+	register uintptr_t r5 __asm__("r5");
+	register uintptr_t r6 __asm__("r6");
+	register uintptr_t r7 __asm__("r7");
+	register uintptr_t r8 __asm__("r8");
+	register uintptr_t r9 __asm__("r9");
+	register uintptr_t r10 __asm__("r10");
+
+	r11 = FH_HCALL_TOKEN(FH_PARTITION_GET_DTPROP);
+	r3 = handle;
+
+#ifdef CONFIG_PHYS_64BIT
+	r4 = dtpath_addr >> 32;
+	r6 = propname_addr >> 32;
+	r8 = propvalue_addr >> 32;
+#else
+	r4 = 0;
+	r6 = 0;
+	r8 = 0;
+#endif
+	r5 = (uint32_t)dtpath_addr;
+	r7 = (uint32_t)propname_addr;
+	r9 = (uint32_t)propvalue_addr;
+	r10 = *propvalue_len;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11),
+		  "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7),
+		  "+r" (r8), "+r" (r9), "+r" (r10)
+		: : EV_HCALL_CLOBBERS8
+	);
+
+	*propvalue_len = r4;
+	return r3;
+}
+
+/**
+ * Set a property in a guest device tree.
+ * @handle: handle of partition whose device tree is to be accessed
+ * @dtpath_addr: physical address of device tree path to access
+ * @propname_addr: physical address of name of property
+ * @propvalue_addr: physical address of property value
+ * @propvalue_len: length of property
+ *
+ * Returns zero on success, non-zero on error.
+ */
+static inline unsigned int fh_partition_set_dtprop(int handle,
+						   uint64_t dtpath_addr,
+						   uint64_t propname_addr,
+						   uint64_t propvalue_addr,
+						   uint32_t propvalue_len)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+	register uintptr_t r6 __asm__("r6");
+	register uintptr_t r8 __asm__("r8");
+	register uintptr_t r5 __asm__("r5");
+	register uintptr_t r7 __asm__("r7");
+	register uintptr_t r9 __asm__("r9");
+	register uintptr_t r10 __asm__("r10");
+
+	r11 = FH_HCALL_TOKEN(FH_PARTITION_SET_DTPROP);
+	r3 = handle;
+
+#ifdef CONFIG_PHYS_64BIT
+	r4 = dtpath_addr >> 32;
+	r6 = propname_addr >> 32;
+	r8 = propvalue_addr >> 32;
+#else
+	r4 = 0;
+	r6 = 0;
+	r8 = 0;
+#endif
+	r5 = (uint32_t)dtpath_addr;
+	r7 = (uint32_t)propname_addr;
+	r9 = (uint32_t)propvalue_addr;
+	r10 = propvalue_len;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11),
+		  "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7),
+		  "+r" (r8), "+r" (r9), "+r" (r10)
+		: : EV_HCALL_CLOBBERS8
+	);
+
+	return r3;
+}
+
+/**
+ * fh_partition_restart - reboot the current partition
+ * @partition: partition ID
+ *
+ * Returns an error code if reboot failed.  Does not return if it succeeds.
+ */
+static inline unsigned int fh_partition_restart(unsigned int partition)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+
+	r11 = FH_HCALL_TOKEN(FH_PARTITION_RESTART);
+	r3 = partition;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3)
+		: : EV_HCALL_CLOBBERS1
+	);
+
+	return r3;
+}
+
+#define FH_PARTITION_STOPPED	0
+#define FH_PARTITION_RUNNING	1
+#define FH_PARTITION_STARTING	2
+#define FH_PARTITION_STOPPING	3
+#define FH_PARTITION_PAUSING	4
+#define FH_PARTITION_PAUSED	5
+#define FH_PARTITION_RESUMING	6
+
+/**
+ * fh_partition_get_status - gets the status of a partition
+ * @partition: partition ID
+ * @status: returned status code
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int fh_partition_get_status(unsigned int partition,
+	unsigned int *status)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+
+	r11 = FH_HCALL_TOKEN(FH_PARTITION_GET_STATUS);
+	r3 = partition;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "=r" (r4)
+		: : EV_HCALL_CLOBBERS2
+	);
+
+	*status = r4;
+
+	return r3;
+}
+
+/**
+ * fh_partition_start - boots and starts execution of the specified partition
+ * @partition: partition ID
+ * @entry_point: guest physical address to start execution
+ *
+ * The hypervisor creates a 1-to-1 virtual/physical IMA mapping, so at boot
+ * time, guest physical address are the same as guest virtual addresses.
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int fh_partition_start(unsigned int partition,
+	uint32_t entry_point, int load)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+	register uintptr_t r5 __asm__("r5");
+
+	r11 = FH_HCALL_TOKEN(FH_PARTITION_START);
+	r3 = partition;
+	r4 = entry_point;
+	r5 = load;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5)
+		: : EV_HCALL_CLOBBERS3
+	);
+
+	return r3;
+}
+
+/**
+ * fh_partition_stop - stops another partition
+ * @partition: partition ID
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int fh_partition_stop(unsigned int partition)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+
+	r11 = FH_HCALL_TOKEN(FH_PARTITION_STOP);
+	r3 = partition;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3)
+		: : EV_HCALL_CLOBBERS1
+	);
+
+	return r3;
+}
+
+/**
+ * struct fh_sg_list: definition of the fh_partition_memcpy S/G list
+ * @source: guest physical address to copy from
+ * @target: guest physical address to copy to
+ * @size: number of bytes to copy
+ * @reserved: reserved, must be zero
+ *
+ * The scatter/gather list for fh_partition_memcpy() is an array of these
+ * structures.  The array must be guest physically contiguous.
+ *
+ * This structure must be aligned on 32-byte boundary, so that no single
+ * strucuture can span two pages.
+ */
+struct fh_sg_list {
+	uint64_t source;   /**< guest physical address to copy from */
+	uint64_t target;   /**< guest physical address to copy to */
+	uint64_t size;     /**< number of bytes to copy */
+	uint64_t reserved; /**< reserved, must be zero */
+} __attribute__ ((aligned(32)));
+
+/**
+ * fh_partition_memcpy - copies data from one guest to another
+ * @source: the ID of the partition to copy from
+ * @target: the ID of the partition to copy to
+ * @sg_list: guest physical address of an array of &fh_sg_list structures
+ * @count: the number of entries in @sg_list
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int fh_partition_memcpy(unsigned int source,
+	unsigned int target, phys_addr_t sg_list, unsigned int count)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+	register uintptr_t r5 __asm__("r5");
+	register uintptr_t r6 __asm__("r6");
+	register uintptr_t r7 __asm__("r7");
+
+	r11 = FH_HCALL_TOKEN(FH_PARTITION_MEMCPY);
+	r3 = source;
+	r4 = target;
+	r5 = (uint32_t) sg_list;
+
+#ifdef CONFIG_PHYS_64BIT
+	r6 = sg_list >> 32;
+#else
+	r6 = 0;
+#endif
+	r7 = count;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11),
+		  "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7)
+		: : EV_HCALL_CLOBBERS5
+	);
+
+	return r3;
+}
+
+/**
+ * fh_dma_enable - enable DMA for the specified device
+ * @liodn: the LIODN of the I/O device for which to enable DMA
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int fh_dma_enable(unsigned int liodn)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+
+	r11 = FH_HCALL_TOKEN(FH_DMA_ENABLE);
+	r3 = liodn;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3)
+		: : EV_HCALL_CLOBBERS1
+	);
+
+	return r3;
+}
+
+/**
+ * fh_dma_disable - disable DMA for the specified device
+ * @liodn: the LIODN of the I/O device for which to disable DMA
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int fh_dma_disable(unsigned int liodn)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+
+	r11 = FH_HCALL_TOKEN(FH_DMA_DISABLE);
+	r3 = liodn;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3)
+		: : EV_HCALL_CLOBBERS1
+	);
+
+	return r3;
+}
+
+
+/**
+ * fh_vmpic_get_msir - returns the MPIC-MSI register value
+ * @interrupt: the interrupt number
+ * @msir_val: returned MPIC-MSI register value
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int fh_vmpic_get_msir(unsigned int interrupt,
+	unsigned int *msir_val)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+
+	r11 = FH_HCALL_TOKEN(FH_VMPIC_GET_MSIR);
+	r3 = interrupt;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "=r" (r4)
+		: : EV_HCALL_CLOBBERS2
+	);
+
+	*msir_val = r4;
+
+	return r3;
+}
+
+/**
+ * fh_system_reset - reset the system
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int fh_system_reset(void)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+
+	r11 = FH_HCALL_TOKEN(FH_SYSTEM_RESET);
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "=r" (r3)
+		: : EV_HCALL_CLOBBERS1
+	);
+
+	return r3;
+}
+
+
+/**
+ * fh_err_get_info - get platform error information
+ * @queue id:
+ * 0 for guest error event queue
+ * 1 for global error event queue
+ *
+ * @pointer to store the platform error data:
+ * platform error data is returned in registers r4 - r11
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int fh_err_get_info(int queue, uint32_t *bufsize,
+	uint32_t addr_hi, uint32_t addr_lo, int peek)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+	register uintptr_t r5 __asm__("r5");
+	register uintptr_t r6 __asm__("r6");
+	register uintptr_t r7 __asm__("r7");
+
+	r11 = FH_HCALL_TOKEN(FH_ERR_GET_INFO);
+	r3 = queue;
+	r4 = *bufsize;
+	r5 = addr_hi;
+	r6 = addr_lo;
+	r7 = peek;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6),
+		  "+r" (r7)
+		: : EV_HCALL_CLOBBERS5
+	);
+
+	*bufsize = r4;
+
+	return r3;
+}
+
+
+#define FH_VCPU_RUN	0
+#define FH_VCPU_IDLE	1
+#define FH_VCPU_NAP	2
+
+/**
+ * fh_get_core_state - get the state of a vcpu
+ *
+ * @handle: handle of partition containing the vcpu
+ * @vcpu: vcpu number within the partition
+ * @state:the current state of the vcpu, see FH_VCPU_*
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int fh_get_core_state(unsigned int handle,
+	unsigned int vcpu, unsigned int *state)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+
+	r11 = FH_HCALL_TOKEN(FH_GET_CORE_STATE);
+	r3 = handle;
+	r4 = vcpu;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "+r" (r4)
+		: : EV_HCALL_CLOBBERS2
+	);
+
+	*state = r4;
+	return r3;
+}
+
+/**
+ * fh_enter_nap - enter nap on a vcpu
+ *
+ * Note that though the API supports entering nap on a vcpu other
+ * than the caller, this may not be implmented and may return EINVAL.
+ *
+ * @handle: handle of partition containing the vcpu
+ * @vcpu: vcpu number within the partition
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int fh_enter_nap(unsigned int handle, unsigned int vcpu)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+
+	r11 = FH_HCALL_TOKEN(FH_ENTER_NAP);
+	r3 = handle;
+	r4 = vcpu;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "+r" (r4)
+		: : EV_HCALL_CLOBBERS2
+	);
+
+	return r3;
+}
+
+/**
+ * fh_exit_nap - exit nap on a vcpu
+ * @handle: handle of partition containing the vcpu
+ * @vcpu: vcpu number within the partition
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int fh_exit_nap(unsigned int handle, unsigned int vcpu)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+
+	r11 = FH_HCALL_TOKEN(FH_EXIT_NAP);
+	r3 = handle;
+	r4 = vcpu;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "+r" (r4)
+		: : EV_HCALL_CLOBBERS2
+	);
+
+	return r3;
+}
+/**
+ * fh_claim_device - claim a "claimable" shared device
+ * @handle: fsl,hv-device-handle of node to claim
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int fh_claim_device(unsigned int handle)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+
+	r11 = FH_HCALL_TOKEN(FH_CLAIM_DEVICE);
+	r3 = handle;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3)
+		: : EV_HCALL_CLOBBERS1
+	);
+
+	return r3;
+}
+
+/**
+ * Run deferred DMA disabling on a partition's private devices
+ *
+ * This applies to devices which a partition owns either privately,
+ * or which are claimable and still actively owned by that partition,
+ * and which do not have the no-dma-disable property.
+ *
+ * @handle: partition (must be stopped) whose DMA is to be disabled
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int fh_partition_stop_dma(unsigned int handle)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+
+	r11 = FH_HCALL_TOKEN(FH_PARTITION_STOP_DMA);
+	r3 = handle;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3)
+		: : EV_HCALL_CLOBBERS1
+	);
+
+	return r3;
+}
+#endif
diff --git a/arch/powerpc/include/asm/fsl_lbc.h b/arch/powerpc/include/asm/fsl_lbc.h
new file mode 100644
index 0000000000..c4af5ee716
--- /dev/null
+++ b/arch/powerpc/include/asm/fsl_lbc.h
@@ -0,0 +1,296 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Freescale Local Bus Controller
+ *
+ * Copyright © 2006-2007, 2010 Freescale Semiconductor
+ *
+ * Authors: Nick Spence <nick.spence@freescale.com>,
+ *          Scott Wood <scottwood@freescale.com>
+ *          Jack Lan <jack.lan@freescale.com>
+ */
+
+#ifndef __ASM_FSL_LBC_H
+#define __ASM_FSL_LBC_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/device.h>
+#include <linux/spinlock.h>
+
+struct fsl_lbc_bank {
+	__be32 br;             /**< Base Register  */
+#define BR_BA           0xFFFF8000
+#define BR_BA_SHIFT             15
+#define BR_PS           0x00001800
+#define BR_PS_SHIFT             11
+#define BR_PS_8         0x00000800  /* Port Size 8 bit */
+#define BR_PS_16        0x00001000  /* Port Size 16 bit */
+#define BR_PS_32        0x00001800  /* Port Size 32 bit */
+#define BR_DECC         0x00000600
+#define BR_DECC_SHIFT            9
+#define BR_DECC_OFF     0x00000000  /* HW ECC checking and generation off */
+#define BR_DECC_CHK     0x00000200  /* HW ECC checking on, generation off */
+#define BR_DECC_CHK_GEN 0x00000400  /* HW ECC checking and generation on */
+#define BR_WP           0x00000100
+#define BR_WP_SHIFT              8
+#define BR_MSEL         0x000000E0
+#define BR_MSEL_SHIFT            5
+#define BR_MS_GPCM      0x00000000  /* GPCM */
+#define BR_MS_FCM       0x00000020  /* FCM */
+#define BR_MS_SDRAM     0x00000060  /* SDRAM */
+#define BR_MS_UPMA      0x00000080  /* UPMA */
+#define BR_MS_UPMB      0x000000A0  /* UPMB */
+#define BR_MS_UPMC      0x000000C0  /* UPMC */
+#define BR_V            0x00000001
+#define BR_V_SHIFT               0
+#define BR_RES          ~(BR_BA|BR_PS|BR_DECC|BR_WP|BR_MSEL|BR_V)
+
+	__be32 or;             /**< Base Register  */
+#define OR0 0x5004
+#define OR1 0x500C
+#define OR2 0x5014
+#define OR3 0x501C
+#define OR4 0x5024
+#define OR5 0x502C
+#define OR6 0x5034
+#define OR7 0x503C
+
+#define OR_FCM_AM               0xFFFF8000
+#define OR_FCM_AM_SHIFT                 15
+#define OR_FCM_BCTLD            0x00001000
+#define OR_FCM_BCTLD_SHIFT              12
+#define OR_FCM_PGS              0x00000400
+#define OR_FCM_PGS_SHIFT                10
+#define OR_FCM_CSCT             0x00000200
+#define OR_FCM_CSCT_SHIFT                9
+#define OR_FCM_CST              0x00000100
+#define OR_FCM_CST_SHIFT                 8
+#define OR_FCM_CHT              0x00000080
+#define OR_FCM_CHT_SHIFT                 7
+#define OR_FCM_SCY              0x00000070
+#define OR_FCM_SCY_SHIFT                 4
+#define OR_FCM_SCY_1            0x00000010
+#define OR_FCM_SCY_2            0x00000020
+#define OR_FCM_SCY_3            0x00000030
+#define OR_FCM_SCY_4            0x00000040
+#define OR_FCM_SCY_5            0x00000050
+#define OR_FCM_SCY_6            0x00000060
+#define OR_FCM_SCY_7            0x00000070
+#define OR_FCM_RST              0x00000008
+#define OR_FCM_RST_SHIFT                 3
+#define OR_FCM_TRLX             0x00000004
+#define OR_FCM_TRLX_SHIFT                2
+#define OR_FCM_EHTR             0x00000002
+#define OR_FCM_EHTR_SHIFT                1
+
+#define OR_GPCM_AM		0xFFFF8000
+#define OR_GPCM_AM_SHIFT		15
+};
+
+struct fsl_lbc_regs {
+	struct fsl_lbc_bank bank[12];
+	u8 res0[0x8];
+	__be32 mar;             /**< UPM Address Register */
+	u8 res1[0x4];
+	__be32 mamr;            /**< UPMA Mode Register */
+#define MxMR_OP_NO	(0 << 28) /**< normal operation */
+#define MxMR_OP_WA	(1 << 28) /**< write array */
+#define MxMR_OP_RA	(2 << 28) /**< read array */
+#define MxMR_OP_RP	(3 << 28) /**< run pattern */
+#define MxMR_MAD	0x3f      /**< machine address */
+	__be32 mbmr;            /**< UPMB Mode Register */
+	__be32 mcmr;            /**< UPMC Mode Register */
+	u8 res2[0x8];
+	__be32 mrtpr;           /**< Memory Refresh Timer Prescaler Register */
+	__be32 mdr;             /**< UPM Data Register */
+	u8 res3[0x4];
+	__be32 lsor;            /**< Special Operation Initiation Register */
+	__be32 lsdmr;           /**< SDRAM Mode Register */
+	u8 res4[0x8];
+	__be32 lurt;            /**< UPM Refresh Timer */
+	__be32 lsrt;            /**< SDRAM Refresh Timer */
+	u8 res5[0x8];
+	__be32 ltesr;           /**< Transfer Error Status Register */
+#define LTESR_BM   0x80000000
+#define LTESR_FCT  0x40000000
+#define LTESR_PAR  0x20000000
+#define LTESR_WP   0x04000000
+#define LTESR_ATMW 0x00800000
+#define LTESR_ATMR 0x00400000
+#define LTESR_CS   0x00080000
+#define LTESR_UPM  0x00000002
+#define LTESR_CC   0x00000001
+#define LTESR_NAND_MASK (LTESR_FCT | LTESR_PAR | LTESR_CC)
+#define LTESR_MASK      (LTESR_BM | LTESR_FCT | LTESR_PAR | LTESR_WP \
+			 | LTESR_ATMW | LTESR_ATMR | LTESR_CS | LTESR_UPM \
+			 | LTESR_CC)
+#define LTESR_CLEAR	0xFFFFFFFF
+#define LTECCR_CLEAR	0xFFFFFFFF
+#define LTESR_STATUS	LTESR_MASK
+#define LTEIR_ENABLE	LTESR_MASK
+#define LTEDR_ENABLE	0x00000000
+	__be32 ltedr;           /**< Transfer Error Disable Register */
+	__be32 lteir;           /**< Transfer Error Interrupt Register */
+	__be32 lteatr;          /**< Transfer Error Attributes Register */
+	__be32 ltear;           /**< Transfer Error Address Register */
+	__be32 lteccr;          /**< Transfer Error ECC Register */
+	u8 res6[0x8];
+	__be32 lbcr;            /**< Configuration Register */
+#define LBCR_LDIS  0x80000000
+#define LBCR_LDIS_SHIFT    31
+#define LBCR_BCTLC 0x00C00000
+#define LBCR_BCTLC_SHIFT   22
+#define LBCR_AHD   0x00200000
+#define LBCR_LPBSE 0x00020000
+#define LBCR_LPBSE_SHIFT   17
+#define LBCR_EPAR  0x00010000
+#define LBCR_EPAR_SHIFT    16
+#define LBCR_BMT   0x0000FF00
+#define LBCR_BMT_SHIFT      8
+#define LBCR_BMTPS 0x0000000F
+#define LBCR_BMTPS_SHIFT    0
+#define LBCR_INIT  0x00040000
+	__be32 lcrr;            /**< Clock Ratio Register */
+#define LCRR_DBYP    0x80000000
+#define LCRR_DBYP_SHIFT      31
+#define LCRR_BUFCMDC 0x30000000
+#define LCRR_BUFCMDC_SHIFT   28
+#define LCRR_ECL     0x03000000
+#define LCRR_ECL_SHIFT       24
+#define LCRR_EADC    0x00030000
+#define LCRR_EADC_SHIFT      16
+#define LCRR_CLKDIV  0x0000000F
+#define LCRR_CLKDIV_SHIFT     0
+	u8 res7[0x8];
+	__be32 fmr;             /**< Flash Mode Register */
+#define FMR_CWTO     0x0000F000
+#define FMR_CWTO_SHIFT       12
+#define FMR_BOOT     0x00000800
+#define FMR_ECCM     0x00000100
+#define FMR_AL       0x00000030
+#define FMR_AL_SHIFT          4
+#define FMR_OP       0x00000003
+#define FMR_OP_SHIFT          0
+	__be32 fir;             /**< Flash Instruction Register */
+#define FIR_OP0      0xF0000000
+#define FIR_OP0_SHIFT        28
+#define FIR_OP1      0x0F000000
+#define FIR_OP1_SHIFT        24
+#define FIR_OP2      0x00F00000
+#define FIR_OP2_SHIFT        20
+#define FIR_OP3      0x000F0000
+#define FIR_OP3_SHIFT        16
+#define FIR_OP4      0x0000F000
+#define FIR_OP4_SHIFT        12
+#define FIR_OP5      0x00000F00
+#define FIR_OP5_SHIFT         8
+#define FIR_OP6      0x000000F0
+#define FIR_OP6_SHIFT         4
+#define FIR_OP7      0x0000000F
+#define FIR_OP7_SHIFT         0
+#define FIR_OP_NOP   0x0	/* No operation and end of sequence */
+#define FIR_OP_CA    0x1        /* Issue current column address */
+#define FIR_OP_PA    0x2        /* Issue current block+page address */
+#define FIR_OP_UA    0x3        /* Issue user defined address */
+#define FIR_OP_CM0   0x4        /* Issue command from FCR[CMD0] */
+#define FIR_OP_CM1   0x5        /* Issue command from FCR[CMD1] */
+#define FIR_OP_CM2   0x6        /* Issue command from FCR[CMD2] */
+#define FIR_OP_CM3   0x7        /* Issue command from FCR[CMD3] */
+#define FIR_OP_WB    0x8        /* Write FBCR bytes from FCM buffer */
+#define FIR_OP_WS    0x9        /* Write 1 or 2 bytes from MDR[AS] */
+#define FIR_OP_RB    0xA        /* Read FBCR bytes to FCM buffer */
+#define FIR_OP_RS    0xB        /* Read 1 or 2 bytes to MDR[AS] */
+#define FIR_OP_CW0   0xC        /* Wait then issue FCR[CMD0] */
+#define FIR_OP_CW1   0xD        /* Wait then issue FCR[CMD1] */
+#define FIR_OP_RBW   0xE        /* Wait then read FBCR bytes */
+#define FIR_OP_RSW   0xE        /* Wait then read 1 or 2 bytes */
+	__be32 fcr;             /**< Flash Command Register */
+#define FCR_CMD0     0xFF000000
+#define FCR_CMD0_SHIFT       24
+#define FCR_CMD1     0x00FF0000
+#define FCR_CMD1_SHIFT       16
+#define FCR_CMD2     0x0000FF00
+#define FCR_CMD2_SHIFT        8
+#define FCR_CMD3     0x000000FF
+#define FCR_CMD3_SHIFT        0
+	__be32 fbar;            /**< Flash Block Address Register */
+#define FBAR_BLK     0x00FFFFFF
+	__be32 fpar;            /**< Flash Page Address Register */
+#define FPAR_SP_PI   0x00007C00
+#define FPAR_SP_PI_SHIFT     10
+#define FPAR_SP_MS   0x00000200
+#define FPAR_SP_CI   0x000001FF
+#define FPAR_SP_CI_SHIFT      0
+#define FPAR_LP_PI   0x0003F000
+#define FPAR_LP_PI_SHIFT     12
+#define FPAR_LP_MS   0x00000800
+#define FPAR_LP_CI   0x000007FF
+#define FPAR_LP_CI_SHIFT      0
+	__be32 fbcr;            /**< Flash Byte Count Register */
+#define FBCR_BC      0x00000FFF
+};
+
+/*
+ * FSL UPM routines
+ */
+struct fsl_upm {
+	__be32 __iomem *mxmr;
+	int width;
+};
+
+extern u32 fsl_lbc_addr(phys_addr_t addr_base);
+extern int fsl_lbc_find(phys_addr_t addr_base);
+extern int fsl_upm_find(phys_addr_t addr_base, struct fsl_upm *upm);
+
+/**
+ * fsl_upm_start_pattern - start UPM patterns execution
+ * @upm:	pointer to the fsl_upm structure obtained via fsl_upm_find
+ * @pat_offset:	UPM pattern offset for the command to be executed
+ *
+ * This routine programmes UPM so the next memory access that hits an UPM
+ * will trigger pattern execution, starting at pat_offset.
+ */
+static inline void fsl_upm_start_pattern(struct fsl_upm *upm, u8 pat_offset)
+{
+	clrsetbits_be32(upm->mxmr, MxMR_MAD, MxMR_OP_RP | pat_offset);
+}
+
+/**
+ * fsl_upm_end_pattern - end UPM patterns execution
+ * @upm:	pointer to the fsl_upm structure obtained via fsl_upm_find
+ *
+ * This routine reverts UPM to normal operation mode.
+ */
+static inline void fsl_upm_end_pattern(struct fsl_upm *upm)
+{
+	clrbits32(upm->mxmr, MxMR_OP_RP);
+
+	while (in_be32(upm->mxmr) & MxMR_OP_RP)
+		cpu_relax();
+}
+
+/* overview of the fsl lbc controller */
+
+struct fsl_lbc_ctrl {
+	/* device info */
+	struct device			*dev;
+	struct fsl_lbc_regs __iomem	*regs;
+	int				irq[2];
+	wait_queue_head_t		irq_wait;
+	spinlock_t			lock;
+	void				*nand;
+
+	/* status read from LTESR by irq handler */
+	unsigned int			irq_status;
+
+#ifdef CONFIG_SUSPEND
+	/* save regs when system go to deep-sleep */
+	struct fsl_lbc_regs		*saved_regs;
+#endif
+};
+
+extern int fsl_upm_run_pattern(struct fsl_upm *upm, void __iomem *io_base,
+			       u32 mar);
+extern struct fsl_lbc_ctrl *fsl_lbc_ctrl_dev;
+
+#endif /* __ASM_FSL_LBC_H */
diff --git a/arch/powerpc/include/asm/fsl_pamu_stash.h b/arch/powerpc/include/asm/fsl_pamu_stash.h
new file mode 100644
index 0000000000..c0fbadb70b
--- /dev/null
+++ b/arch/powerpc/include/asm/fsl_pamu_stash.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright (C) 2013 Freescale Semiconductor, Inc.
+ */
+
+#ifndef __FSL_PAMU_STASH_H
+#define __FSL_PAMU_STASH_H
+
+struct iommu_domain;
+
+/* cache stash targets */
+enum pamu_stash_target {
+	PAMU_ATTR_CACHE_L1 = 1,
+	PAMU_ATTR_CACHE_L2,
+	PAMU_ATTR_CACHE_L3,
+};
+
+int fsl_pamu_configure_l1_stash(struct iommu_domain *domain, u32 cpu);
+
+#endif  /* __FSL_PAMU_STASH_H */
diff --git a/arch/powerpc/include/asm/fsl_pm.h b/arch/powerpc/include/asm/fsl_pm.h
new file mode 100644
index 0000000000..61a4c97732
--- /dev/null
+++ b/arch/powerpc/include/asm/fsl_pm.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Support Power Management
+ *
+ * Copyright 2014-2015 Freescale Semiconductor Inc.
+ */
+#ifndef __PPC_FSL_PM_H
+#define __PPC_FSL_PM_H
+
+#define E500_PM_PH10	1
+#define E500_PM_PH15	2
+#define E500_PM_PH20	3
+#define E500_PM_PH30	4
+#define E500_PM_DOZE	E500_PM_PH10
+#define E500_PM_NAP	E500_PM_PH15
+
+#define PLAT_PM_SLEEP	20
+#define PLAT_PM_LPM20	30
+
+#define FSL_PM_SLEEP		(1 << 0)
+#define FSL_PM_DEEP_SLEEP	(1 << 1)
+
+struct fsl_pm_ops {
+	/* mask pending interrupts to the RCPM from MPIC */
+	void (*irq_mask)(int cpu);
+
+	/* unmask pending interrupts to the RCPM from MPIC */
+	void (*irq_unmask)(int cpu);
+	void (*cpu_enter_state)(int cpu, int state);
+	void (*cpu_exit_state)(int cpu, int state);
+	void (*cpu_up_prepare)(int cpu);
+	void (*cpu_die)(int cpu);
+	int (*plat_enter_sleep)(void);
+	void (*freeze_time_base)(bool freeze);
+
+	/* keep the power of IP blocks during sleep/deep sleep */
+	void (*set_ip_power)(bool enable, u32 mask);
+
+	/* get platform supported power management modes */
+	unsigned int (*get_pm_modes)(void);
+};
+
+extern const struct fsl_pm_ops *qoriq_pm_ops;
+
+int __init fsl_rcpm_init(void);
+
+#endif /* __PPC_FSL_PM_H */
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
new file mode 100644
index 0000000000..9e5a39b6a3
--- /dev/null
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_FTRACE
+#define _ASM_POWERPC_FTRACE
+
+#include <asm/types.h>
+
+#ifdef CONFIG_FUNCTION_TRACER
+#define MCOUNT_ADDR		((unsigned long)(_mcount))
+#define MCOUNT_INSN_SIZE	4 /* sizeof mcount call */
+
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+
+/* Ignore unused weak functions which will have larger offsets */
+#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)
+#define FTRACE_MCOUNT_MAX_OFFSET	16
+#elif defined(CONFIG_PPC32)
+#define FTRACE_MCOUNT_MAX_OFFSET	8
+#endif
+
+#ifndef __ASSEMBLY__
+extern void _mcount(void);
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY))
+		addr += MCOUNT_INSN_SIZE;
+
+       return addr;
+}
+
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
+				    unsigned long sp);
+
+struct module;
+struct dyn_ftrace;
+struct dyn_arch_ftrace {
+	struct module *mod;
+};
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
+#define ftrace_need_init_nop()	(true)
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
+#define ftrace_init_nop ftrace_init_nop
+
+struct ftrace_regs {
+	struct pt_regs regs;
+};
+
+static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs)
+{
+	/* We clear regs.msr in ftrace_call */
+	return fregs->regs.msr ? &fregs->regs : NULL;
+}
+
+static __always_inline void
+ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
+				    unsigned long ip)
+{
+	regs_set_return_ip(&fregs->regs, ip);
+}
+
+static __always_inline unsigned long
+ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs)
+{
+	return instruction_pointer(&fregs->regs);
+}
+
+#define ftrace_regs_get_argument(fregs, n) \
+	regs_get_kernel_argument(&(fregs)->regs, n)
+#define ftrace_regs_get_stack_pointer(fregs) \
+	kernel_stack_pointer(&(fregs)->regs)
+#define ftrace_regs_return_value(fregs) \
+	regs_return_value(&(fregs)->regs)
+#define ftrace_regs_set_return_value(fregs, ret) \
+	regs_set_return_value(&(fregs)->regs, ret)
+#define ftrace_override_function_with_return(fregs) \
+	override_function_with_return(&(fregs)->regs)
+#define ftrace_regs_query_register_offset(name) \
+	regs_query_register_offset(name)
+
+struct ftrace_ops;
+
+#define ftrace_graph_func ftrace_graph_func
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+		       struct ftrace_ops *op, struct ftrace_regs *fregs);
+#endif
+#endif /* __ASSEMBLY__ */
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#endif
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_FTRACE_SYSCALLS
+/*
+ * Some syscall entry functions on powerpc start with "ppc_" (fork and clone,
+ * for instance) or ppc32_/ppc64_. We should also match the sys_ variant with
+ * those.
+ */
+#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
+static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
+{
+	return !strcmp(sym, name) ||
+		(!strncmp(sym, "__se_sys", 8) && !strcmp(sym + 5, name)) ||
+		(!strncmp(sym, "ppc_", 4) && !strcmp(sym + 4, name + 4)) ||
+		(!strncmp(sym, "ppc32_", 6) && !strcmp(sym + 6, name + 4)) ||
+		(!strncmp(sym, "ppc64_", 6) && !strcmp(sym + 6, name + 4));
+}
+#endif /* CONFIG_FTRACE_SYSCALLS */
+
+#if defined(CONFIG_PPC64) && defined(CONFIG_FUNCTION_TRACER)
+#include <asm/paca.h>
+
+static inline void this_cpu_disable_ftrace(void)
+{
+	get_paca()->ftrace_enabled = 0;
+}
+
+static inline void this_cpu_enable_ftrace(void)
+{
+	get_paca()->ftrace_enabled = 1;
+}
+
+/* Disable ftrace on this CPU if possible (may not be implemented) */
+static inline void this_cpu_set_ftrace_enabled(u8 ftrace_enabled)
+{
+	get_paca()->ftrace_enabled = ftrace_enabled;
+}
+
+static inline u8 this_cpu_get_ftrace_enabled(void)
+{
+	return get_paca()->ftrace_enabled;
+}
+#else /* CONFIG_PPC64 */
+static inline void this_cpu_disable_ftrace(void) { }
+static inline void this_cpu_enable_ftrace(void) { }
+static inline void this_cpu_set_ftrace_enabled(u8 ftrace_enabled) { }
+static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; }
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_FUNCTION_TRACER
+extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
+void ftrace_free_init_tramp(void);
+#else
+static inline void ftrace_free_init_tramp(void) { }
+#endif
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_FTRACE */
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
new file mode 100644
index 0000000000..b3001f8b2c
--- /dev/null
+++ b/arch/powerpc/include/asm/futex.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_FUTEX_H
+#define _ASM_POWERPC_FUTEX_H
+
+#ifdef __KERNEL__
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <asm/errno.h>
+#include <asm/synch.h>
+
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
+  __asm__ __volatile ( \
+	PPC_ATOMIC_ENTRY_BARRIER \
+"1:	lwarx	%0,0,%2\n" \
+	insn \
+"2:	stwcx.	%1,0,%2\n" \
+	"bne-	1b\n" \
+	PPC_ATOMIC_EXIT_BARRIER \
+	"li	%1,0\n" \
+"3:	.section .fixup,\"ax\"\n" \
+"4:	li	%1,%3\n" \
+	"b	3b\n" \
+	".previous\n" \
+	EX_TABLE(1b, 4b) \
+	EX_TABLE(2b, 4b) \
+	: "=&r" (oldval), "=&r" (ret) \
+	: "b" (uaddr), "i" (-EFAULT), "r" (oparg) \
+	: "cr0", "memory")
+
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+		u32 __user *uaddr)
+{
+	int oldval = 0, ret;
+
+	if (!user_access_begin(uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op("mr %1,%4\n", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op("add %1,%0,%4\n", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op("or %1,%0,%4\n", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op("andc %1,%0,%4\n", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op("xor %1,%0,%4\n", ret, oldval, uaddr, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+	user_access_end();
+
+	*oval = oldval;
+
+	return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+			      u32 oldval, u32 newval)
+{
+	int ret = 0;
+	u32 prev;
+
+	if (!user_access_begin(uaddr, sizeof(u32)))
+		return -EFAULT;
+
+        __asm__ __volatile__ (
+        PPC_ATOMIC_ENTRY_BARRIER
+"1:     lwarx   %1,0,%3         # futex_atomic_cmpxchg_inatomic\n\
+        cmpw    0,%1,%4\n\
+        bne-    3f\n"
+"2:     stwcx.  %5,0,%3\n\
+        bne-    1b\n"
+        PPC_ATOMIC_EXIT_BARRIER
+"3:	.section .fixup,\"ax\"\n\
+4:	li	%0,%6\n\
+	b	3b\n\
+	.previous\n"
+	EX_TABLE(1b, 4b)
+	EX_TABLE(2b, 4b)
+        : "+r" (ret), "=&r" (prev), "+m" (*uaddr)
+        : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT)
+        : "cc", "memory");
+
+	user_access_end();
+
+	*uval = prev;
+
+        return ret;
+}
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_FUTEX_H */
diff --git a/arch/powerpc/include/asm/grackle.h b/arch/powerpc/include/asm/grackle.h
new file mode 100644
index 0000000000..7376e3fa15
--- /dev/null
+++ b/arch/powerpc/include/asm/grackle.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_GRACKLE_H
+#define _ASM_POWERPC_GRACKLE_H
+#ifdef __KERNEL__
+/*
+ * Functions for setting up and using a MPC106 northbridge
+ */
+
+#include <asm/pci-bridge.h>
+
+extern void setup_grackle(struct pci_controller *hose);
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_GRACKLE_H */
diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h
new file mode 100644
index 0000000000..f133b5930a
--- /dev/null
+++ b/arch/powerpc/include/asm/hardirq.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_HARDIRQ_H
+#define _ASM_POWERPC_HARDIRQ_H
+
+#include <linux/threads.h>
+#include <linux/irq.h>
+
+typedef struct {
+	unsigned int __softirq_pending;
+	unsigned int timer_irqs_event;
+	unsigned int broadcast_irqs_event;
+	unsigned int timer_irqs_others;
+	unsigned int pmu_irqs;
+	unsigned int mce_exceptions;
+	unsigned int spurious_irqs;
+	unsigned int sreset_irqs;
+#ifdef CONFIG_PPC_WATCHDOG
+	unsigned int soft_nmi_irqs;
+#endif
+#ifdef CONFIG_PPC_DOORBELL
+	unsigned int doorbell_irqs;
+#endif
+} ____cacheline_aligned irq_cpustat_t;
+
+DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+
+#define __ARCH_IRQ_STAT
+#define __ARCH_IRQ_EXIT_IRQS_DISABLED
+
+static inline void ack_bad_irq(unsigned int irq)
+{
+	printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
+}
+
+extern u64 arch_irq_stat_cpu(unsigned int cpu);
+#define arch_irq_stat_cpu	arch_irq_stat_cpu
+
+#endif /* _ASM_POWERPC_HARDIRQ_H */
diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h
new file mode 100644
index 0000000000..d73153b027
--- /dev/null
+++ b/arch/powerpc/include/asm/head-64.h
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_HEAD_64_H
+#define _ASM_POWERPC_HEAD_64_H
+
+#include <asm/cache.h>
+
+#ifdef __ASSEMBLY__
+/*
+ * We can't do CPP stringification and concatination directly into the section
+ * name for some reason, so these macros can do it for us.
+ */
+.macro define_ftsec name
+	.section ".head.text.\name\()","ax",@progbits
+.endm
+.macro define_data_ftsec name
+	.section ".head.data.\name\()","a",@progbits
+.endm
+.macro use_ftsec name
+	.section ".head.text.\name\()","ax",@progbits
+.endm
+
+/*
+ * Fixed (location) sections are used by opening fixed sections and emitting
+ * fixed section entries into them before closing them. Multiple fixed sections
+ * can be open at any time.
+ *
+ * Each fixed section created in a .S file must have corresponding linkage
+ * directives including location, added to  arch/powerpc/kernel/vmlinux.lds.S
+ *
+ * For each fixed section, code is generated into it in the order which it
+ * appears in the source.  Fixed section entries can be placed at a fixed
+ * location within the section using _LOCATION postifx variants. These must
+ * be ordered according to their relative placements within the section.
+ *
+ * OPEN_FIXED_SECTION(section_name, start_address, end_address)
+ * FIXED_SECTION_ENTRY_BEGIN(section_name, label1)
+ *
+ * USE_FIXED_SECTION(section_name)
+ * label3:
+ *     li  r10,128
+ *     mv  r11,r10
+
+ * FIXED_SECTION_ENTRY_BEGIN_LOCATION(section_name, label2, start_address, size)
+ * FIXED_SECTION_ENTRY_END_LOCATION(section_name, label2, start_address, size)
+ * CLOSE_FIXED_SECTION(section_name)
+ *
+ * ZERO_FIXED_SECTION can be used to emit zeroed data.
+ *
+ * Troubleshooting:
+ * - If the build dies with "Error: attempt to move .org backwards" at
+ *   CLOSE_FIXED_SECTION() or elsewhere, there may be something
+ *   unexpected being added there. Remove the '. = x_len' line, rebuild, and
+ *   check what is pushing the section down.
+ * - If the build dies in linking, check arch/powerpc/tools/head_check.sh
+ *   comments.
+ * - If the kernel crashes or hangs in very early boot, it could be linker
+ *   stubs at the start of the main text.
+ */
+
+#define OPEN_FIXED_SECTION(sname, start, end)			\
+	sname##_start = (start);				\
+	sname##_end = (end);					\
+	sname##_len = (end) - (start);				\
+	define_ftsec sname;					\
+	. = 0x0;						\
+start_##sname:
+
+/*
+ * .linker_stub_catch section is used to catch linker stubs from being
+ * inserted in our .text section, above the start_text label (which breaks
+ * the ABS_ADDR calculation). See kernel/vmlinux.lds.S and tools/head_check.sh
+ * for more details. We would prefer to just keep a cacheline (0x80), but
+ * 0x100 seems to be how the linker aligns branch stub groups.
+ */
+#ifdef CONFIG_LD_HEAD_STUB_CATCH
+#define OPEN_TEXT_SECTION(start)				\
+	.section ".linker_stub_catch","ax",@progbits;		\
+linker_stub_catch:						\
+	. = 0x4;						\
+	text_start = (start) + 0x100;				\
+	.section ".text","ax",@progbits;			\
+	.balign 0x100;						\
+start_text:
+#else
+#define OPEN_TEXT_SECTION(start)				\
+	text_start = (start);					\
+	.section ".text","ax",@progbits;			\
+	. = 0x0;						\
+start_text:
+#endif
+
+#define ZERO_FIXED_SECTION(sname, start, end)			\
+	sname##_start = (start);				\
+	sname##_end = (end);					\
+	sname##_len = (end) - (start);				\
+	define_data_ftsec sname;				\
+	. = 0x0;						\
+	. = sname##_len;
+
+#define USE_FIXED_SECTION(sname)				\
+	use_ftsec sname;
+
+#define USE_TEXT_SECTION()					\
+	.text
+
+#define CLOSE_FIXED_SECTION(sname)				\
+	USE_FIXED_SECTION(sname);				\
+	. = sname##_len;					\
+end_##sname:
+
+
+#define __FIXED_SECTION_ENTRY_BEGIN(sname, name, __align)	\
+	USE_FIXED_SECTION(sname);				\
+	.balign __align;					\
+	.global name;						\
+name:
+
+#define FIXED_SECTION_ENTRY_BEGIN(sname, name)			\
+	__FIXED_SECTION_ENTRY_BEGIN(sname, name, IFETCH_ALIGN_BYTES)
+
+#define FIXED_SECTION_ENTRY_BEGIN_LOCATION(sname, name, start, size) \
+	USE_FIXED_SECTION(sname);				\
+	name##_start = (start);					\
+	.if ((start) % (size) != 0);				\
+	.error "Fixed section exception vector misalignment";	\
+	.endif;							\
+	.if ((size) != 0x20) && ((size) != 0x80) && ((size) != 0x100) && ((size) != 0x1000); \
+	.error "Fixed section exception vector bad size";	\
+	.endif;							\
+	.if (start) < sname##_start;				\
+	.error "Fixed section underflow";			\
+	.abort;							\
+	.endif;							\
+	. = (start) - sname##_start;				\
+	.global name;						\
+name:
+
+#define FIXED_SECTION_ENTRY_END_LOCATION(sname, name, start, size) \
+	.if (start) + (size) > sname##_end;			\
+	.error "Fixed section overflow";			\
+	.abort;							\
+	.endif;							\
+	.if (. - name > (start) + (size) - name##_start);	\
+	.error "Fixed entry overflow";				\
+	.abort;							\
+	.endif;							\
+	. = ((start) + (size) - sname##_start);			\
+
+
+/*
+ * These macros are used to change symbols in other fixed sections to be
+ * absolute or related to our current fixed section.
+ *
+ * - DEFINE_FIXED_SYMBOL / FIXED_SYMBOL_ABS_ADDR is used to find the
+ *   absolute address of a symbol within a fixed section, from any section.
+ *
+ * - ABS_ADDR is used to find the absolute address of any symbol, from within
+ *   a fixed section.
+ */
+// define label as being _in_ sname
+#define DEFINE_FIXED_SYMBOL(label, sname) \
+	label##_absolute = (label - start_ ## sname + sname ## _start)
+
+#define FIXED_SYMBOL_ABS_ADDR(label)				\
+	(label##_absolute)
+
+// find label from _within_ sname
+#define ABS_ADDR(label, sname) (label - start_ ## sname + sname ## _start)
+
+#endif /* __ASSEMBLY__ */
+
+#endif	/* _ASM_POWERPC_HEAD_64_H */
diff --git a/arch/powerpc/include/asm/heathrow.h b/arch/powerpc/include/asm/heathrow.h
new file mode 100644
index 0000000000..8bc5b16876
--- /dev/null
+++ b/arch/powerpc/include/asm/heathrow.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_HEATHROW_H
+#define _ASM_POWERPC_HEATHROW_H
+#ifdef __KERNEL__
+/*
+ * heathrow.h: definitions for using the "Heathrow" I/O controller chip.
+ *
+ * Grabbed from Open Firmware definitions on a PowerBook G3 Series
+ *
+ * Copyright (C) 1997 Paul Mackerras.
+ */
+
+/* Front light color on Yikes/B&W G3. 32 bits */
+#define HEATHROW_FRONT_LIGHT		0x32 /* (set to 0 or 0xffffffff) */
+
+/* Brightness/contrast (gossamer iMac ?). 8 bits */
+#define HEATHROW_BRIGHTNESS_CNTL	0x32
+#define HEATHROW_CONTRAST_CNTL		0x33
+
+/* offset from ohare base for feature control register */
+#define HEATHROW_MBCR			0x34	/* Media bay control */
+#define HEATHROW_FCR			0x38	/* Feature control */
+#define HEATHROW_AUX_CNTL_REG		0x3c	/* Aux control */
+
+/*
+ * Bits in feature control register.
+ * Bits postfixed with a _N are in inverse logic
+ */
+#define HRW_SCC_TRANS_EN_N	0x00000001	/* Also controls modem power */
+#define HRW_BAY_POWER_N		0x00000002
+#define HRW_BAY_PCI_ENABLE	0x00000004
+#define HRW_BAY_IDE_ENABLE	0x00000008
+#define HRW_BAY_FLOPPY_ENABLE	0x00000010
+#define HRW_IDE0_ENABLE		0x00000020
+#define HRW_IDE0_RESET_N	0x00000040
+#define HRW_BAY_DEV_MASK	0x0000001c
+#define HRW_BAY_RESET_N		0x00000080
+#define HRW_IOBUS_ENABLE	0x00000100	/* Internal IDE ? */
+#define HRW_SCC_ENABLE		0x00000200
+#define HRW_MESH_ENABLE		0x00000400
+#define HRW_SWIM_ENABLE		0x00000800
+#define HRW_SOUND_POWER_N	0x00001000
+#define HRW_SOUND_CLK_ENABLE	0x00002000
+#define HRW_SCCA_IO		0x00004000
+#define HRW_SCCB_IO		0x00008000
+#define HRW_PORT_OR_DESK_VIA_N	0x00010000	/* This one is 0 on PowerBook */
+#define HRW_PWM_MON_ID_N	0x00020000	/* ??? (0) */
+#define HRW_HOOK_MB_CNT_N	0x00040000	/* ??? (0) */
+#define HRW_SWIM_CLONE_FLOPPY	0x00080000	/* ??? (0) */
+#define HRW_AUD_RUN22		0x00100000	/* ??? (1) */
+#define HRW_SCSI_LINK_MODE	0x00200000	/* Read ??? (1) */
+#define HRW_ARB_BYPASS		0x00400000	/* Disable internal PCI arbitrer */
+#define HRW_IDE1_RESET_N	0x00800000	/* Media bay */
+#define HRW_SLOW_SCC_PCLK	0x01000000	/* ??? (0) */
+#define HRW_RESET_SCC		0x02000000
+#define HRW_MFDC_CELL_ENABLE	0x04000000	/* ??? (0) */
+#define HRW_USE_MFDC		0x08000000	/* ??? (0) */
+#define HRW_BMAC_IO_ENABLE	0x60000000	/* two bits, not documented in OF */
+#define HRW_BMAC_RESET		0x80000000	/* not documented in OF */
+
+/* We OR those features at boot on desktop G3s */
+#define HRW_DEFAULTS		(HRW_SCCA_IO | HRW_SCCB_IO | HRW_SCC_ENABLE)
+
+/* Looks like Heathrow has some sort of GPIOs as well... */
+#define HRW_GPIO_MODEM_RESET	0x6d
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_HEATHROW_H */
diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h
new file mode 100644
index 0000000000..c0fcd1bbdb
--- /dev/null
+++ b/arch/powerpc/include/asm/highmem.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * highmem.h: virtual kernel memory mappings for high memory
+ *
+ * PowerPC version, stolen from the i386 version.
+ *
+ * Used in CONFIG_HIGHMEM systems for memory pages which
+ * are not addressable by direct kernel virtual addresses.
+ *
+ * Copyright (C) 1999 Gerhard Wichert, Siemens AG
+ *		      Gerhard.Wichert@pdb.siemens.de
+ *
+ *
+ * Redesigned the x86 32-bit VM architecture to deal with
+ * up to 16 Terrabyte physical memory. With current x86 CPUs
+ * we now support up to 64 Gigabytes physical RAM.
+ *
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+#ifndef _ASM_HIGHMEM_H
+#define _ASM_HIGHMEM_H
+
+#ifdef __KERNEL__
+
+#include <linux/interrupt.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+#include <asm/fixmap.h>
+
+extern pte_t *pkmap_page_table;
+
+/*
+ * Right now we initialize only a single pte table. It can be extended
+ * easily, subsequent pte tables have to be allocated in one physical
+ * chunk of RAM.
+ */
+/*
+ * We use one full pte table with 4K pages. And with 16K/64K/256K pages pte
+ * table covers enough memory (32MB/512MB/2GB resp.), so that both FIXMAP
+ * and PKMAP can be placed in a single pte table. We use 512 pages for PKMAP
+ * in case of 16K/64K/256K page sizes.
+ */
+#ifdef CONFIG_PPC_4K_PAGES
+#define PKMAP_ORDER	PTE_SHIFT
+#else
+#define PKMAP_ORDER	9
+#endif
+#define LAST_PKMAP	(1 << PKMAP_ORDER)
+#ifndef CONFIG_PPC_4K_PAGES
+#define PKMAP_BASE	(FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1))
+#else
+#define PKMAP_BASE	((FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK)
+#endif
+#define LAST_PKMAP_MASK	(LAST_PKMAP-1)
+#define PKMAP_NR(virt)  ((virt-PKMAP_BASE) >> PAGE_SHIFT)
+#define PKMAP_ADDR(nr)  (PKMAP_BASE + ((nr) << PAGE_SHIFT))
+
+#define flush_cache_kmaps()	flush_cache_all()
+
+#define arch_kmap_local_set_pte(mm, vaddr, ptep, ptev)	\
+	__set_pte_at(mm, vaddr, ptep, ptev, 1)
+#define arch_kmap_local_post_map(vaddr, pteval)	\
+	local_flush_tlb_page(NULL, vaddr)
+#define arch_kmap_local_post_unmap(vaddr)	\
+	local_flush_tlb_page(NULL, vaddr)
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_HIGHMEM_H */
diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h
new file mode 100644
index 0000000000..155748460c
--- /dev/null
+++ b/arch/powerpc/include/asm/hmi.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Hypervisor Maintenance Interrupt header file.
+ *
+ * Copyright 2015 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_PPC64_HMI_H__
+#define __ASM_PPC64_HMI_H__
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+
+#define	CORE_TB_RESYNC_REQ_BIT		63
+#define MAX_SUBCORE_PER_CORE		4
+
+/*
+ * sibling_subcore_state structure is used to co-ordinate all threads
+ * during HMI to avoid TB corruption. This structure is allocated once
+ * per each core and shared by all threads on that core.
+ */
+struct sibling_subcore_state {
+	unsigned long	flags;
+	u8		in_guest[MAX_SUBCORE_PER_CORE];
+};
+
+extern void wait_for_subcore_guest_exit(void);
+extern void wait_for_tb_resync(void);
+#else
+static inline void wait_for_subcore_guest_exit(void) { }
+static inline void wait_for_tb_resync(void) { }
+#endif
+
+struct pt_regs;
+extern long hmi_handle_debugtrig(struct pt_regs *regs);
+
+#endif /* __ASM_PPC64_HMI_H__ */
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
new file mode 100644
index 0000000000..ea71f7245a
--- /dev/null
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_HUGETLB_H
+#define _ASM_POWERPC_HUGETLB_H
+
+#ifdef CONFIG_HUGETLB_PAGE
+#include <asm/page.h>
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/book3s/64/hugetlb.h>
+#elif defined(CONFIG_PPC_E500)
+#include <asm/nohash/hugetlb-e500.h>
+#elif defined(CONFIG_PPC_8xx)
+#include <asm/nohash/32/hugetlb-8xx.h>
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+extern bool hugetlb_disabled;
+
+void __init hugetlbpage_init_defaultsize(void);
+
+int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
+			   unsigned long len);
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+					 unsigned long addr,
+					 unsigned long len)
+{
+	if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU) && !radix_enabled())
+		return slice_is_hugepage_only_range(mm, addr, len);
+	return 0;
+}
+#define is_hugepage_only_range is_hugepage_only_range
+
+#define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE
+void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
+			    unsigned long end, unsigned long floor,
+			    unsigned long ceiling);
+
+#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 1));
+}
+
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
+static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
+					  unsigned long addr, pte_t *ptep)
+{
+	pte_t pte;
+
+	pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+	flush_hugetlb_page(vma, addr);
+	return pte;
+}
+
+#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
+int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+			       unsigned long addr, pte_t *ptep,
+			       pte_t pte, int dirty);
+
+void gigantic_hugetlb_cma_reserve(void) __init;
+#include <asm-generic/hugetlb.h>
+
+#else /* ! CONFIG_HUGETLB_PAGE */
+static inline void flush_hugetlb_page(struct vm_area_struct *vma,
+				      unsigned long vmaddr)
+{
+}
+
+#define hugepd_shift(x) 0
+static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
+				    unsigned pdshift)
+{
+	return NULL;
+}
+
+
+static inline void __init gigantic_hugetlb_cma_reserve(void)
+{
+}
+
+static inline void __init hugetlbpage_init_defaultsize(void)
+{
+}
+#endif /* CONFIG_HUGETLB_PAGE */
+
+#endif /* _ASM_POWERPC_HUGETLB_H */
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
new file mode 100644
index 0000000000..c099780385
--- /dev/null
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -0,0 +1,673 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_HVCALL_H
+#define _ASM_POWERPC_HVCALL_H
+#ifdef __KERNEL__
+
+#define HVSC			.long 0x44000022
+
+#define H_SUCCESS	0
+#define H_BUSY		1	/* Hardware busy -- retry later */
+#define H_CLOSED	2	/* Resource closed */
+#define H_NOT_AVAILABLE 3
+#define H_CONSTRAINED	4	/* Resource request constrained to max allowed */
+#define H_PARTIAL       5
+#define H_IN_PROGRESS	14	/* Kind of like busy */
+#define H_PAGE_REGISTERED 15
+#define H_PARTIAL_STORE   16
+#define H_PENDING	17	/* returned from H_POLL_PENDING */
+#define H_CONTINUE	18	/* Returned from H_Join on success */
+#define H_LONG_BUSY_START_RANGE		9900  /* Start of long busy range */
+#define H_LONG_BUSY_ORDER_1_MSEC	9900  /* Long busy, hint that 1msec \
+						 is a good time to retry */
+#define H_LONG_BUSY_ORDER_10_MSEC	9901  /* Long busy, hint that 10msec \
+						 is a good time to retry */
+#define H_LONG_BUSY_ORDER_100_MSEC 	9902  /* Long busy, hint that 100msec \
+						 is a good time to retry */
+#define H_LONG_BUSY_ORDER_1_SEC		9903  /* Long busy, hint that 1sec \
+						 is a good time to retry */
+#define H_LONG_BUSY_ORDER_10_SEC	9904  /* Long busy, hint that 10sec \
+						 is a good time to retry */
+#define H_LONG_BUSY_ORDER_100_SEC	9905  /* Long busy, hint that 100sec \
+						 is a good time to retry */
+#define H_LONG_BUSY_END_RANGE		9905  /* End of long busy range */
+
+/* Internal value used in book3s_hv kvm support; not returned to guests */
+#define H_TOO_HARD	9999
+
+#define H_HARDWARE	-1	/* Hardware error */
+#define H_FUNCTION	-2	/* Function not supported */
+#define H_PRIVILEGE	-3	/* Caller not privileged */
+#define H_PARAMETER	-4	/* Parameter invalid, out-of-range or conflicting */
+#define H_BAD_MODE	-5	/* Illegal msr value */
+#define H_PTEG_FULL	-6	/* PTEG is full */
+#define H_NOT_FOUND	-7	/* PTE was not found" */
+#define H_RESERVED_DABR	-8	/* DABR address is reserved by the hypervisor on this processor" */
+#define H_NO_MEM	-9
+#define H_AUTHORITY	-10
+#define H_PERMISSION	-11
+#define H_DROPPED	-12
+#define H_SOURCE_PARM	-13
+#define H_DEST_PARM	-14
+#define H_REMOTE_PARM	-15
+#define H_RESOURCE	-16
+#define H_ADAPTER_PARM  -17
+#define H_RH_PARM       -18
+#define H_RCQ_PARM      -19
+#define H_SCQ_PARM      -20
+#define H_EQ_PARM       -21
+#define H_RT_PARM       -22
+#define H_ST_PARM       -23
+#define H_SIGT_PARM     -24
+#define H_TOKEN_PARM    -25
+#define H_MLENGTH_PARM  -27
+#define H_MEM_PARM      -28
+#define H_MEM_ACCESS_PARM -29
+#define H_ATTR_PARM     -30
+#define H_PORT_PARM     -31
+#define H_MCG_PARM      -32
+#define H_VL_PARM       -33
+#define H_TSIZE_PARM    -34
+#define H_TRACE_PARM    -35
+
+#define H_MASK_PARM     -37
+#define H_MCG_FULL      -38
+#define H_ALIAS_EXIST   -39
+#define H_P_COUNTER     -40
+#define H_TABLE_FULL    -41
+#define H_ALT_TABLE     -42
+#define H_MR_CONDITION  -43
+#define H_NOT_ENOUGH_RESOURCES -44
+#define H_R_STATE       -45
+#define H_RESCINDED     -46
+#define H_ABORTED	-54
+#define H_P2		-55
+#define H_P3		-56
+#define H_P4		-57
+#define H_P5		-58
+#define H_P6		-59
+#define H_P7		-60
+#define H_P8		-61
+#define H_P9		-62
+#define H_NOOP		-63
+#define H_TOO_BIG	-64
+#define H_UNSUPPORTED	-67
+#define H_OVERLAP	-68
+#define H_INTERRUPT	-69
+#define H_BAD_DATA	-70
+#define H_NOT_ACTIVE	-71
+#define H_SG_LIST	-72
+#define H_OP_MODE	-73
+#define H_COP_HW	-74
+#define H_STATE		-75
+#define H_IN_USE	-77
+#define H_UNSUPPORTED_FLAG_START	-256
+#define H_UNSUPPORTED_FLAG_END		-511
+#define H_MULTI_THREADS_ACTIVE	-9005
+#define H_OUTSTANDING_COP_OPS	-9006
+
+
+/* Long Busy is a condition that can be returned by the firmware
+ * when a call cannot be completed now, but the identical call
+ * should be retried later.  This prevents calls blocking in the
+ * firmware for long periods of time.  Annoyingly the firmware can return
+ * a range of return codes, hinting at how long we should wait before
+ * retrying.  If you don't care for the hint, the macro below is a good
+ * way to check for the long_busy return codes
+ */
+#define H_IS_LONG_BUSY(x)  ((x >= H_LONG_BUSY_START_RANGE) \
+			     && (x <= H_LONG_BUSY_END_RANGE))
+
+/* Flags */
+#define H_LARGE_PAGE		(1UL<<(63-16))
+#define H_EXACT			(1UL<<(63-24))	/* Use exact PTE or return H_PTEG_FULL */
+#define H_R_XLATE		(1UL<<(63-25))	/* include a valid logical page num in the pte if the valid bit is set */
+#define H_READ_4		(1UL<<(63-26))	/* Return 4 PTEs */
+#define H_PAGE_STATE_CHANGE	(1UL<<(63-28))
+#define H_PAGE_UNUSED		((1UL<<(63-29)) | (1UL<<(63-30)))
+#define H_PAGE_SET_UNUSED	(H_PAGE_STATE_CHANGE | H_PAGE_UNUSED)
+#define H_PAGE_SET_LOANED	(H_PAGE_SET_UNUSED | (1UL<<(63-31)))
+#define H_PAGE_SET_ACTIVE	H_PAGE_STATE_CHANGE
+#define H_AVPN			(1UL<<(63-32))	/* An avpn is provided as a sanity test */
+#define H_ANDCOND		(1UL<<(63-33))
+#define H_LOCAL			(1UL<<(63-35))
+#define H_ICACHE_INVALIDATE	(1UL<<(63-40))	/* icbi, etc.  (ignored for IO pages) */
+#define H_ICACHE_SYNCHRONIZE	(1UL<<(63-41))	/* dcbst, icbi, etc (ignored for IO pages */
+#define H_COALESCE_CAND	(1UL<<(63-42))	/* page is a good candidate for coalescing */
+#define H_ZERO_PAGE		(1UL<<(63-48))	/* zero the page before mapping (ignored for IO pages) */
+#define H_COPY_PAGE		(1UL<<(63-49))
+#define H_N			(1UL<<(63-61))
+#define H_PP1			(1UL<<(63-62))
+#define H_PP2			(1UL<<(63-63))
+
+/* Flags for H_REGISTER_VPA subfunction field */
+#define H_VPA_FUNC_SHIFT	(63-18)	/* Bit posn of subfunction code */
+#define H_VPA_FUNC_MASK		7UL
+#define H_VPA_REG_VPA		1UL	/* Register Virtual Processor Area */
+#define H_VPA_REG_DTL		2UL	/* Register Dispatch Trace Log */
+#define H_VPA_REG_SLB		3UL	/* Register SLB shadow buffer */
+#define H_VPA_DEREG_VPA		5UL	/* Deregister Virtual Processor Area */
+#define H_VPA_DEREG_DTL		6UL	/* Deregister Dispatch Trace Log */
+#define H_VPA_DEREG_SLB		7UL	/* Deregister SLB shadow buffer */
+
+/* VASI States */
+#define H_VASI_INVALID          0
+#define H_VASI_ENABLED          1
+#define H_VASI_ABORTED          2
+#define H_VASI_SUSPENDING       3
+#define H_VASI_SUSPENDED        4
+#define H_VASI_RESUMED          5
+#define H_VASI_COMPLETED        6
+
+/* VASI signal codes. Only the Cancel code is valid for H_VASI_SIGNAL. */
+#define H_VASI_SIGNAL_CANCEL    1
+#define H_VASI_SIGNAL_ABORT     2
+#define H_VASI_SIGNAL_SUSPEND   3
+#define H_VASI_SIGNAL_COMPLETE  4
+#define H_VASI_SIGNAL_ENABLE    5
+#define H_VASI_SIGNAL_FAILOVER  6
+
+/* Each control block has to be on a 4K boundary */
+#define H_CB_ALIGNMENT          4096
+
+/* pSeries hypervisor opcodes */
+#define H_REMOVE		0x04
+#define H_ENTER			0x08
+#define H_READ			0x0c
+#define H_CLEAR_MOD		0x10
+#define H_CLEAR_REF		0x14
+#define H_PROTECT		0x18
+#define H_GET_TCE		0x1c
+#define H_PUT_TCE		0x20
+#define H_SET_SPRG0		0x24
+#define H_SET_DABR		0x28
+#define H_PAGE_INIT		0x2c
+#define H_SET_ASR		0x30
+#define H_ASR_ON		0x34
+#define H_ASR_OFF		0x38
+#define H_LOGICAL_CI_LOAD	0x3c
+#define H_LOGICAL_CI_STORE	0x40
+#define H_LOGICAL_CACHE_LOAD	0x44
+#define H_LOGICAL_CACHE_STORE	0x48
+#define H_LOGICAL_ICBI		0x4c
+#define H_LOGICAL_DCBF		0x50
+#define H_GET_TERM_CHAR		0x54
+#define H_PUT_TERM_CHAR		0x58
+#define H_REAL_TO_LOGICAL	0x5c
+#define H_HYPERVISOR_DATA	0x60
+#define H_EOI			0x64
+#define H_CPPR			0x68
+#define H_IPI			0x6c
+#define H_IPOLL			0x70
+#define H_XIRR			0x74
+#define H_PERFMON		0x7c
+#define H_MIGRATE_DMA		0x78
+#define H_REGISTER_VPA		0xDC
+#define H_CEDE			0xE0
+#define H_CONFER		0xE4
+#define H_PROD			0xE8
+#define H_GET_PPP		0xEC
+#define H_SET_PPP		0xF0
+#define H_PURR			0xF4
+#define H_PIC			0xF8
+#define H_REG_CRQ		0xFC
+#define H_FREE_CRQ		0x100
+#define H_VIO_SIGNAL		0x104
+#define H_SEND_CRQ		0x108
+#define H_COPY_RDMA		0x110
+#define H_REGISTER_LOGICAL_LAN	0x114
+#define H_FREE_LOGICAL_LAN	0x118
+#define H_ADD_LOGICAL_LAN_BUFFER 0x11C
+#define H_SEND_LOGICAL_LAN	0x120
+#define H_BULK_REMOVE		0x124
+#define H_MULTICAST_CTRL	0x130
+#define H_SET_XDABR		0x134
+#define H_STUFF_TCE		0x138
+#define H_PUT_TCE_INDIRECT	0x13C
+#define H_CHANGE_LOGICAL_LAN_MAC 0x14C
+#define H_VTERM_PARTNER_INFO	0x150
+#define H_REGISTER_VTERM	0x154
+#define H_FREE_VTERM		0x158
+#define H_RESET_EVENTS          0x15C
+#define H_ALLOC_RESOURCE        0x160
+#define H_FREE_RESOURCE         0x164
+#define H_MODIFY_QP             0x168
+#define H_QUERY_QP              0x16C
+#define H_REREGISTER_PMR        0x170
+#define H_REGISTER_SMR          0x174
+#define H_QUERY_MR              0x178
+#define H_QUERY_MW              0x17C
+#define H_QUERY_HCA             0x180
+#define H_QUERY_PORT            0x184
+#define H_MODIFY_PORT           0x188
+#define H_DEFINE_AQP1           0x18C
+#define H_GET_TRACE_BUFFER      0x190
+#define H_DEFINE_AQP0           0x194
+#define H_RESIZE_MR             0x198
+#define H_ATTACH_MCQP           0x19C
+#define H_DETACH_MCQP           0x1A0
+#define H_CREATE_RPT            0x1A4
+#define H_REMOVE_RPT            0x1A8
+#define H_REGISTER_RPAGES       0x1AC
+#define H_DISABLE_AND_GET       0x1B0
+#define H_ERROR_DATA            0x1B4
+#define H_GET_HCA_INFO          0x1B8
+#define H_GET_PERF_COUNT        0x1BC
+#define H_MANAGE_TRACE          0x1C0
+#define H_GET_CPU_CHARACTERISTICS 0x1C8
+#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
+#define H_QUERY_INT_STATE       0x1E4
+#define H_POLL_PENDING		0x1D8
+#define H_ILLAN_ATTRIBUTES	0x244
+#define H_MODIFY_HEA_QP		0x250
+#define H_QUERY_HEA_QP		0x254
+#define H_QUERY_HEA		0x258
+#define H_QUERY_HEA_PORT	0x25C
+#define H_MODIFY_HEA_PORT	0x260
+#define H_REG_BCMC		0x264
+#define H_DEREG_BCMC		0x268
+#define H_REGISTER_HEA_RPAGES	0x26C
+#define H_DISABLE_AND_GET_HEA	0x270
+#define H_GET_HEA_INFO		0x274
+#define H_ALLOC_HEA_RESOURCE	0x278
+#define H_ADD_CONN		0x284
+#define H_DEL_CONN		0x288
+#define H_JOIN			0x298
+#define H_VASI_SIGNAL           0x2A0
+#define H_VASI_STATE            0x2A4
+#define H_VIOCTL		0x2A8
+#define H_ENABLE_CRQ		0x2B0
+#define H_GET_EM_PARMS		0x2B8
+#define H_SET_MPP		0x2D0
+#define H_GET_MPP		0x2D4
+#define H_REG_SUB_CRQ		0x2DC
+#define H_HOME_NODE_ASSOCIATIVITY 0x2EC
+#define H_FREE_SUB_CRQ		0x2E0
+#define H_SEND_SUB_CRQ		0x2E4
+#define H_SEND_SUB_CRQ_INDIRECT	0x2E8
+#define H_BEST_ENERGY		0x2F4
+#define H_XIRR_X		0x2FC
+#define H_RANDOM		0x300
+#define H_COP			0x304
+#define H_GET_MPP_X		0x314
+#define H_SET_MODE		0x31C
+#define H_BLOCK_REMOVE		0x328
+#define H_CLEAR_HPT		0x358
+#define H_REQUEST_VMC		0x360
+#define H_RESIZE_HPT_PREPARE	0x36C
+#define H_RESIZE_HPT_COMMIT	0x370
+#define H_REGISTER_PROC_TBL	0x37C
+#define H_SIGNAL_SYS_RESET	0x380
+#define H_ALLOCATE_VAS_WINDOW	0x388
+#define H_MODIFY_VAS_WINDOW	0x38C
+#define H_DEALLOCATE_VAS_WINDOW	0x390
+#define H_QUERY_VAS_WINDOW	0x394
+#define H_QUERY_VAS_CAPABILITIES	0x398
+#define H_QUERY_NX_CAPABILITIES	0x39C
+#define H_GET_NX_FAULT		0x3A0
+#define H_INT_GET_SOURCE_INFO   0x3A8
+#define H_INT_SET_SOURCE_CONFIG 0x3AC
+#define H_INT_GET_SOURCE_CONFIG 0x3B0
+#define H_INT_GET_QUEUE_INFO    0x3B4
+#define H_INT_SET_QUEUE_CONFIG  0x3B8
+#define H_INT_GET_QUEUE_CONFIG  0x3BC
+#define H_INT_SET_OS_REPORTING_LINE 0x3C0
+#define H_INT_GET_OS_REPORTING_LINE 0x3C4
+#define H_INT_ESB               0x3C8
+#define H_INT_SYNC              0x3CC
+#define H_INT_RESET             0x3D0
+#define H_SCM_READ_METADATA     0x3E4
+#define H_SCM_WRITE_METADATA    0x3E8
+#define H_SCM_BIND_MEM          0x3EC
+#define H_SCM_UNBIND_MEM        0x3F0
+#define H_SCM_QUERY_BLOCK_MEM_BINDING 0x3F4
+#define H_SCM_QUERY_LOGICAL_MEM_BINDING 0x3F8
+#define H_SCM_UNBIND_ALL        0x3FC
+#define H_SCM_HEALTH            0x400
+#define H_SCM_PERFORMANCE_STATS 0x418
+#define H_PKS_GET_CONFIG	0x41C
+#define H_PKS_SET_PASSWORD	0x420
+#define H_PKS_GEN_PASSWORD	0x424
+#define H_PKS_WRITE_OBJECT	0x42C
+#define H_PKS_GEN_KEY		0x430
+#define H_PKS_READ_OBJECT	0x434
+#define H_PKS_REMOVE_OBJECT	0x438
+#define H_PKS_CONFIRM_OBJECT_FLUSHED	0x43C
+#define H_RPT_INVALIDATE	0x448
+#define H_SCM_FLUSH		0x44C
+#define H_GET_ENERGY_SCALE_INFO	0x450
+#define H_PKS_SIGNED_UPDATE	0x454
+#define H_WATCHDOG		0x45C
+#define MAX_HCALL_OPCODE	H_WATCHDOG
+
+/* Scope args for H_SCM_UNBIND_ALL */
+#define H_UNBIND_SCOPE_ALL (0x1)
+#define H_UNBIND_SCOPE_DRC (0x2)
+
+/* H_VIOCTL functions */
+#define H_GET_VIOA_DUMP_SIZE	0x01
+#define H_GET_VIOA_DUMP		0x02
+#define H_GET_ILLAN_NUM_VLAN_IDS 0x03
+#define H_GET_ILLAN_VLAN_ID_LIST 0x04
+#define H_GET_ILLAN_SWITCH_ID	0x05
+#define H_DISABLE_MIGRATION	0x06
+#define H_ENABLE_MIGRATION	0x07
+#define H_GET_PARTNER_INFO	0x08
+#define H_GET_PARTNER_WWPN_LIST	0x09
+#define H_DISABLE_ALL_VIO_INTS	0x0A
+#define H_DISABLE_VIO_INTERRUPT	0x0B
+#define H_ENABLE_VIO_INTERRUPT	0x0C
+#define H_GET_SESSION_TOKEN	0x19
+#define H_SESSION_ERR_DETECTED	0x1A
+
+
+/* Platform specific hcalls, used by KVM */
+#define H_RTAS			0xf000
+
+/*
+ * Platform specific hcalls, used by QEMU/SLOF. These are ignored by
+ * KVM and only kept here so we can identify them during tracing.
+ */
+#define H_LOGICAL_MEMOP  0xF001
+#define H_CAS            0XF002
+#define H_UPDATE_DT      0XF003
+
+/* "Platform specific hcalls", provided by PHYP */
+#define H_GET_24X7_CATALOG_PAGE	0xF078
+#define H_GET_24X7_DATA		0xF07C
+#define H_GET_PERF_COUNTER_INFO	0xF080
+
+/* Platform-specific hcalls used for nested HV KVM */
+#define H_SET_PARTITION_TABLE	0xF800
+#define H_ENTER_NESTED		0xF804
+#define H_TLB_INVALIDATE	0xF808
+#define H_COPY_TOFROM_GUEST	0xF80C
+
+/* Flags for H_SVM_PAGE_IN */
+#define H_PAGE_IN_SHARED        0x1
+
+/* Platform-specific hcalls used by the Ultravisor */
+#define H_SVM_PAGE_IN		0xEF00
+#define H_SVM_PAGE_OUT		0xEF04
+#define H_SVM_INIT_START	0xEF08
+#define H_SVM_INIT_DONE		0xEF0C
+#define H_SVM_INIT_ABORT	0xEF14
+
+/* Values for 2nd argument to H_SET_MODE */
+#define H_SET_MODE_RESOURCE_SET_CIABR		1
+#define H_SET_MODE_RESOURCE_SET_DAWR0		2
+#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE	3
+#define H_SET_MODE_RESOURCE_LE			4
+#define H_SET_MODE_RESOURCE_SET_DAWR1		5
+
+/* Values for argument to H_SIGNAL_SYS_RESET */
+#define H_SIGNAL_SYS_RESET_ALL			-1
+#define H_SIGNAL_SYS_RESET_ALL_OTHERS		-2
+/* >= 0 values are CPU number */
+
+/* H_GET_CPU_CHARACTERISTICS return values */
+#define H_CPU_CHAR_SPEC_BAR_ORI31	(1ull << 63) // IBM bit 0
+#define H_CPU_CHAR_BCCTRL_SERIALISED	(1ull << 62) // IBM bit 1
+#define H_CPU_CHAR_L1D_FLUSH_ORI30	(1ull << 61) // IBM bit 2
+#define H_CPU_CHAR_L1D_FLUSH_TRIG2	(1ull << 60) // IBM bit 3
+#define H_CPU_CHAR_L1D_THREAD_PRIV	(1ull << 59) // IBM bit 4
+#define H_CPU_CHAR_BRANCH_HINTS_HONORED	(1ull << 58) // IBM bit 5
+#define H_CPU_CHAR_THREAD_RECONFIG_CTRL	(1ull << 57) // IBM bit 6
+#define H_CPU_CHAR_COUNT_CACHE_DISABLED	(1ull << 56) // IBM bit 7
+#define H_CPU_CHAR_BCCTR_FLUSH_ASSIST	(1ull << 54) // IBM bit 9
+#define H_CPU_CHAR_BCCTR_LINK_FLUSH_ASSIST (1ull << 52) // IBM bit 11
+
+#define H_CPU_BEHAV_FAVOUR_SECURITY	(1ull << 63) // IBM bit 0
+#define H_CPU_BEHAV_L1D_FLUSH_PR	(1ull << 62) // IBM bit 1
+#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR	(1ull << 61) // IBM bit 2
+#define H_CPU_BEHAV_FAVOUR_SECURITY_H	(1ull << 60) // IBM bit 3
+#define H_CPU_BEHAV_FLUSH_COUNT_CACHE	(1ull << 58) // IBM bit 5
+#define H_CPU_BEHAV_FLUSH_LINK_STACK	(1ull << 57) // IBM bit 6
+#define H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY	(1ull << 56) // IBM bit 7
+#define H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS (1ull << 55) // IBM bit 8
+#define H_CPU_BEHAV_NO_STF_BARRIER	(1ull << 54) // IBM bit 9
+
+/* Flag values used in H_REGISTER_PROC_TBL hcall */
+#define PROC_TABLE_OP_MASK	0x18
+#define PROC_TABLE_DEREG	0x10
+#define PROC_TABLE_NEW		0x18
+#define PROC_TABLE_TYPE_MASK	0x06
+#define PROC_TABLE_HPT_SLB	0x00
+#define PROC_TABLE_HPT_PT	0x02
+#define PROC_TABLE_RADIX	0x04
+#define PROC_TABLE_GTSE		0x01
+
+/*
+ * Defines for
+ * H_RPT_INVALIDATE - Invalidate RPT translation lookaside information.
+ */
+
+/* Type of translation to invalidate (type) */
+#define H_RPTI_TYPE_NESTED	0x0001	/* Invalidate nested guest partition-scope */
+#define H_RPTI_TYPE_TLB		0x0002	/* Invalidate TLB */
+#define H_RPTI_TYPE_PWC		0x0004	/* Invalidate Page Walk Cache */
+/* Invalidate caching of Process Table Entries if H_RPTI_TYPE_NESTED is clear */
+#define H_RPTI_TYPE_PRT		0x0008
+/* Invalidate caching of Partition Table Entries if H_RPTI_TYPE_NESTED is set */
+#define H_RPTI_TYPE_PAT		0x0008
+#define H_RPTI_TYPE_ALL		(H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \
+				 H_RPTI_TYPE_PRT)
+#define H_RPTI_TYPE_NESTED_ALL	(H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \
+				 H_RPTI_TYPE_PAT)
+
+/* Invalidation targets (target) */
+#define H_RPTI_TARGET_CMMU		0x01 /* All virtual processors in the partition */
+#define H_RPTI_TARGET_CMMU_LOCAL	0x02 /* Current virtual processor */
+/* All nest/accelerator agents in use by the partition */
+#define H_RPTI_TARGET_NMMU		0x04
+
+/* Page size mask (page sizes) */
+#define H_RPTI_PAGE_4K	0x01
+#define H_RPTI_PAGE_64K	0x02
+#define H_RPTI_PAGE_2M	0x04
+#define H_RPTI_PAGE_1G	0x08
+#define H_RPTI_PAGE_ALL (-1UL)
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+/**
+ * plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments
+ * @opcode: The hypervisor call to make.
+ *
+ * This call supports up to 7 arguments and only returns the status of
+ * the hcall. Use this version where possible, its slightly faster than
+ * the other plpar_hcalls.
+ */
+long plpar_hcall_norets(unsigned long opcode, ...);
+
+/* Variant which does not do hcall tracing */
+long plpar_hcall_norets_notrace(unsigned long opcode, ...);
+
+/**
+ * plpar_hcall: - Make a pseries hypervisor call
+ * @opcode: The hypervisor call to make.
+ * @retbuf: Buffer to store up to 4 return arguments in.
+ *
+ * This call supports up to 6 arguments and 4 return arguments. Use
+ * PLPAR_HCALL_BUFSIZE to size the return argument buffer.
+ *
+ * Used for all but the craziest of phyp interfaces (see plpar_hcall9)
+ */
+#define PLPAR_HCALL_BUFSIZE 4
+long plpar_hcall(unsigned long opcode, unsigned long *retbuf, ...);
+
+/**
+ * plpar_hcall_raw: - Make a hypervisor call without calculating hcall stats
+ * @opcode: The hypervisor call to make.
+ * @retbuf: Buffer to store up to 4 return arguments in.
+ *
+ * This call supports up to 6 arguments and 4 return arguments. Use
+ * PLPAR_HCALL_BUFSIZE to size the return argument buffer.
+ *
+ * Used when phyp interface needs to be called in real mode. Similar to
+ * plpar_hcall, but plpar_hcall_raw works in real mode and does not
+ * calculate hypervisor call statistics.
+ */
+long plpar_hcall_raw(unsigned long opcode, unsigned long *retbuf, ...);
+
+/**
+ * plpar_hcall9: - Make a pseries hypervisor call with up to 9 return arguments
+ * @opcode: The hypervisor call to make.
+ * @retbuf: Buffer to store up to 9 return arguments in.
+ *
+ * This call supports up to 9 arguments and 9 return arguments. Use
+ * PLPAR_HCALL9_BUFSIZE to size the return argument buffer.
+ */
+#define PLPAR_HCALL9_BUFSIZE 9
+long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...);
+long plpar_hcall9_raw(unsigned long opcode, unsigned long *retbuf, ...);
+
+/* pseries hcall tracing */
+extern struct static_key hcall_tracepoint_key;
+void __trace_hcall_entry(unsigned long opcode, unsigned long *args);
+void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf);
+
+struct hvcall_mpp_data {
+	unsigned long entitled_mem;
+	unsigned long mapped_mem;
+	unsigned short group_num;
+	unsigned short pool_num;
+	unsigned char mem_weight;
+	unsigned char unallocated_mem_weight;
+	unsigned long unallocated_entitlement;  /* value in bytes */
+	unsigned long pool_size;
+	signed long loan_request;
+	unsigned long backing_mem;
+};
+
+int h_get_mpp(struct hvcall_mpp_data *);
+
+struct hvcall_mpp_x_data {
+	unsigned long coalesced_bytes;
+	unsigned long pool_coalesced_bytes;
+	unsigned long pool_purr_cycles;
+	unsigned long pool_spurr_cycles;
+	unsigned long reserved[3];
+};
+
+int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data);
+
+static inline unsigned int get_longbusy_msecs(int longbusy_rc)
+{
+	switch (longbusy_rc) {
+	case H_LONG_BUSY_ORDER_1_MSEC:
+		return 1;
+	case H_LONG_BUSY_ORDER_10_MSEC:
+		return 10;
+	case H_LONG_BUSY_ORDER_100_MSEC:
+		return 100;
+	case H_LONG_BUSY_ORDER_1_SEC:
+		return 1000;
+	case H_LONG_BUSY_ORDER_10_SEC:
+		return 10000;
+	case H_LONG_BUSY_ORDER_100_SEC:
+		return 100000;
+	default:
+		return 1;
+	}
+}
+
+struct h_cpu_char_result {
+	u64 character;
+	u64 behaviour;
+};
+
+/*
+ * Register state for entering a nested guest with H_ENTER_NESTED.
+ * New member must be added at the end.
+ */
+struct hv_guest_state {
+	u64 version;		/* version of this structure layout, must be first */
+	u32 lpid;
+	u32 vcpu_token;
+	/* These registers are hypervisor privileged (at least for writing) */
+	u64 lpcr;
+	u64 pcr;
+	u64 amor;
+	u64 dpdes;
+	u64 hfscr;
+	s64 tb_offset;
+	u64 dawr0;
+	u64 dawrx0;
+	u64 ciabr;
+	u64 hdec_expiry;
+	u64 purr;
+	u64 spurr;
+	u64 ic;
+	u64 vtb;
+	u64 hdar;
+	u64 hdsisr;
+	u64 heir;
+	u64 asdr;
+	/* These are OS privileged but need to be set late in guest entry */
+	u64 srr0;
+	u64 srr1;
+	u64 sprg[4];
+	u64 pidr;
+	u64 cfar;
+	u64 ppr;
+	/* Version 1 ends here */
+	u64 dawr1;
+	u64 dawrx1;
+	/* Version 2 ends here */
+};
+
+/* Latest version of hv_guest_state structure */
+#define HV_GUEST_STATE_VERSION	2
+
+static inline int hv_guest_state_size(unsigned int version)
+{
+	switch (version) {
+	case 1:
+		return offsetofend(struct hv_guest_state, ppr);
+	case 2:
+		return offsetofend(struct hv_guest_state, dawrx1);
+	default:
+		return -1;
+	}
+}
+
+/*
+ * From the document "H_GetPerformanceCounterInfo Interface" v1.07
+ *
+ * H_GET_PERF_COUNTER_INFO argument
+ */
+struct hv_get_perf_counter_info_params {
+	__be32 counter_request; /* I */
+	__be32 starting_index;  /* IO */
+	__be16 secondary_index; /* IO */
+	__be16 returned_values; /* O */
+	__be32 detail_rc; /* O, only needed when called via *_norets() */
+
+	/*
+	 * O, size each of counter_value element in bytes, only set for version
+	 * >= 0x3
+	 */
+	__be16 cv_element_size;
+
+	/* I, 0 (zero) for versions < 0x3 */
+	__u8 counter_info_version_in;
+
+	/* O, 0 (zero) if version < 0x3. Must be set to 0 when making hcall */
+	__u8 counter_info_version_out;
+	__u8 reserved[0xC];
+	__u8 counter_value[];
+} __packed;
+
+#define HGPCI_REQ_BUFFER_SIZE	4096
+#define HGPCI_MAX_DATA_BYTES \
+	(HGPCI_REQ_BUFFER_SIZE - sizeof(struct hv_get_perf_counter_info_params))
+
+struct hv_gpci_request_buffer {
+	struct hv_get_perf_counter_info_params params;
+	uint8_t bytes[HGPCI_MAX_DATA_BYTES];
+} __packed;
+
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/arch/powerpc/include/asm/hvconsole.h b/arch/powerpc/include/asm/hvconsole.h
new file mode 100644
index 0000000000..ccb2034506
--- /dev/null
+++ b/arch/powerpc/include/asm/hvconsole.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * hvconsole.h
+ * Copyright (C) 2004 Ryan S Arnold, IBM Corporation
+ *
+ * LPAR console support.
+ */
+
+#ifndef _PPC64_HVCONSOLE_H
+#define _PPC64_HVCONSOLE_H
+#ifdef __KERNEL__
+
+/*
+ * PSeries firmware will only send/recv up to 16 bytes of character data per
+ * hcall.
+ */
+#define MAX_VIO_PUT_CHARS	16
+#define SIZE_VIO_GET_CHARS	16
+
+/*
+ * Vio firmware always attempts to fetch MAX_VIO_GET_CHARS chars.  The 'count'
+ * parm is included to conform to put_chars() function pointer template
+ */
+extern int hvc_get_chars(uint32_t vtermno, char *buf, int count);
+extern int hvc_put_chars(uint32_t vtermno, const char *buf, int count);
+
+/* Provided by HVC VIO */
+void hvc_vio_init_early(void);
+
+#endif /* __KERNEL__ */
+#endif /* _PPC64_HVCONSOLE_H */
diff --git a/arch/powerpc/include/asm/hvcserver.h b/arch/powerpc/include/asm/hvcserver.h
new file mode 100644
index 0000000000..2b20403e9f
--- /dev/null
+++ b/arch/powerpc/include/asm/hvcserver.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * hvcserver.h
+ * Copyright (C) 2004 Ryan S Arnold, IBM Corporation
+ *
+ * PPC64 virtual I/O console server support.
+ */
+
+#ifndef _PPC64_HVCSERVER_H
+#define _PPC64_HVCSERVER_H
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+
+/* Converged Location Code length */
+#define HVCS_CLC_LENGTH	79
+
+/**
+ * hvcs_partner_info - an element in a list of partner info
+ * @node: list_head denoting this partner_info struct's position in the list of
+ *	partner info.
+ * @unit_address: The partner unit address of this entry.
+ * @partition_ID: The partner partition ID of this entry.
+ * @location_code: The converged location code of this entry + 1 char for the
+ *	null-term.
+ *
+ * This structure outlines the format that partner info is presented to a caller
+ * of the hvcs partner info fetching functions.  These are strung together into
+ * a list using linux kernel lists.
+ */
+struct hvcs_partner_info {
+	struct list_head node;
+	uint32_t unit_address;
+	uint32_t partition_ID;
+	char location_code[HVCS_CLC_LENGTH + 1]; /* CLC + 1 null-term char */
+};
+
+extern int hvcs_free_partner_info(struct list_head *head);
+extern int hvcs_get_partner_info(uint32_t unit_address,
+		struct list_head *head, unsigned long *pi_buff);
+extern int hvcs_register_connection(uint32_t unit_address,
+		uint32_t p_partition_ID, uint32_t p_unit_address);
+extern int hvcs_free_connection(uint32_t unit_address);
+
+#endif /* __KERNEL__ */
+#endif /* _PPC64_HVCSERVER_H */
diff --git a/arch/powerpc/include/asm/hvsi.h b/arch/powerpc/include/asm/hvsi.h
new file mode 100644
index 0000000000..464a7519ed
--- /dev/null
+++ b/arch/powerpc/include/asm/hvsi.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _HVSI_H
+#define _HVSI_H
+
+#define VS_DATA_PACKET_HEADER           0xff
+#define VS_CONTROL_PACKET_HEADER        0xfe
+#define VS_QUERY_PACKET_HEADER          0xfd
+#define VS_QUERY_RESPONSE_PACKET_HEADER 0xfc
+
+/* control verbs */
+#define VSV_SET_MODEM_CTL    1 /* to service processor only */
+#define VSV_MODEM_CTL_UPDATE 2 /* from service processor only */
+#define VSV_CLOSE_PROTOCOL   3
+
+/* query verbs */
+#define VSV_SEND_VERSION_NUMBER 1
+#define VSV_SEND_MODEM_CTL_STATUS 2
+
+/* yes, these masks are not consecutive. */
+#define HVSI_TSDTR 0x01
+#define HVSI_TSCD  0x20
+
+#define HVSI_MAX_OUTGOING_DATA 12
+#define HVSI_VERSION 1
+
+struct hvsi_header {
+	uint8_t  type;
+	uint8_t  len;
+	__be16 seqno;
+} __attribute__((packed));
+
+struct hvsi_data {
+	struct hvsi_header hdr;
+	uint8_t  data[HVSI_MAX_OUTGOING_DATA];
+} __attribute__((packed));
+
+struct hvsi_control {
+	struct hvsi_header hdr;
+	__be16 verb;
+	/* optional depending on verb: */
+	__be32 word;
+	__be32 mask;
+} __attribute__((packed));
+
+struct hvsi_query {
+	struct hvsi_header hdr;
+	__be16 verb;
+} __attribute__((packed));
+
+struct hvsi_query_response {
+	struct hvsi_header hdr;
+	__be16 verb;
+	__be16 query_seqno;
+	union {
+		uint8_t  version;
+		__be32 mctrl_word;
+	} u;
+} __attribute__((packed));
+
+/* hvsi lib struct definitions */
+#define HVSI_INBUF_SIZE		255
+struct tty_struct;
+struct hvsi_priv {
+	unsigned int	inbuf_len;	/* data in input buffer */
+	unsigned char	inbuf[HVSI_INBUF_SIZE];
+	unsigned int	inbuf_cur;	/* Cursor in input buffer */
+	unsigned int	inbuf_pktlen;	/* packet length from cursor */
+	atomic_t	seqno;		/* packet sequence number */
+	unsigned int	opened:1;	/* driver opened */
+	unsigned int	established:1;	/* protocol established */
+	unsigned int 	is_console:1;	/* used as a kernel console device */
+	unsigned int	mctrl_update:1;	/* modem control updated */
+	unsigned short	mctrl;		/* modem control */
+	struct tty_struct *tty;		/* tty structure */
+	int (*get_chars)(uint32_t termno, char *buf, int count);
+	int (*put_chars)(uint32_t termno, const char *buf, int count);
+	uint32_t	termno;
+};
+
+/* hvsi lib functions */
+struct hvc_struct;
+extern void hvsilib_init(struct hvsi_priv *pv,
+			 int (*get_chars)(uint32_t termno, char *buf, int count),
+			 int (*put_chars)(uint32_t termno, const char *buf,
+					  int count),
+			 int termno, int is_console);
+extern int hvsilib_open(struct hvsi_priv *pv, struct hvc_struct *hp);
+extern void hvsilib_close(struct hvsi_priv *pv, struct hvc_struct *hp);
+extern int hvsilib_read_mctrl(struct hvsi_priv *pv);
+extern int hvsilib_write_mctrl(struct hvsi_priv *pv, int dtr);
+extern void hvsilib_establish(struct hvsi_priv *pv);
+extern int hvsilib_get_chars(struct hvsi_priv *pv, char *buf, int count);
+extern int hvsilib_put_chars(struct hvsi_priv *pv, const char *buf, int count);
+
+#endif /* _HVSI_H */
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
new file mode 100644
index 0000000000..66db0147d5
--- /dev/null
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * PowerPC BookIII S hardware breakpoint definitions
+ *
+ * Copyright 2010, IBM Corporation.
+ * Author: K.Prasad <prasad@linux.vnet.ibm.com>
+ */
+
+#ifndef _PPC_BOOK3S_64_HW_BREAKPOINT_H
+#define _PPC_BOOK3S_64_HW_BREAKPOINT_H
+
+#include <asm/cpu_has_feature.h>
+
+#ifdef	__KERNEL__
+struct arch_hw_breakpoint {
+	unsigned long	address;
+	u16		type;
+	u16		len; /* length of the target data symbol */
+	u16		hw_len; /* length programmed in hw */
+	u8		flags;
+	bool		perf_single_step; /* temporarily uninstalled for a perf single step */
+};
+
+/* Note: Don't change the first 6 bits below as they are in the same order
+ * as the dabr and dabrx.
+ */
+#define HW_BRK_TYPE_READ		0x01
+#define HW_BRK_TYPE_WRITE		0x02
+#define HW_BRK_TYPE_TRANSLATE		0x04
+#define HW_BRK_TYPE_USER		0x08
+#define HW_BRK_TYPE_KERNEL		0x10
+#define HW_BRK_TYPE_HYP			0x20
+#define HW_BRK_TYPE_EXTRANEOUS_IRQ	0x80
+
+/* bits that overlap with the bottom 3 bits of the dabr */
+#define HW_BRK_TYPE_RDWR	(HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE)
+#define HW_BRK_TYPE_DABR	(HW_BRK_TYPE_RDWR | HW_BRK_TYPE_TRANSLATE)
+#define HW_BRK_TYPE_PRIV_ALL	(HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \
+				 HW_BRK_TYPE_HYP)
+
+#define HW_BRK_FLAG_DISABLED	0x1
+
+/* Minimum granularity */
+#ifdef CONFIG_PPC_8xx
+#define HW_BREAKPOINT_SIZE  0x4
+#else
+#define HW_BREAKPOINT_SIZE  0x8
+#endif
+#define HW_BREAKPOINT_SIZE_QUADWORD	0x10
+
+#define DABR_MAX_LEN	8
+#define DAWR_MAX_LEN	512
+
+static inline int nr_wp_slots(void)
+{
+	return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1;
+}
+
+bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr,
+			  unsigned long ea, int type, int size,
+			  struct arch_hw_breakpoint *info);
+
+void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr,
+			 int *type, int *size, unsigned long *ea);
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+#include <linux/kdebug.h>
+#include <asm/reg.h>
+#include <asm/debug.h>
+
+struct perf_event_attr;
+struct perf_event;
+struct pmu;
+struct perf_sample_data;
+struct task_struct;
+
+extern int hw_breakpoint_slots(int type);
+extern int arch_bp_generic_fields(int type, int *gen_bp_type);
+extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
+extern int hw_breakpoint_arch_parse(struct perf_event *bp,
+				    const struct perf_event_attr *attr,
+				    struct arch_hw_breakpoint *hw);
+extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
+						unsigned long val, void *data);
+int arch_install_hw_breakpoint(struct perf_event *bp);
+void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+void hw_breakpoint_pmu_read(struct perf_event *bp);
+extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
+
+extern struct pmu perf_ops_bp;
+extern void ptrace_triggered(struct perf_event *bp,
+			struct perf_sample_data *data, struct pt_regs *regs);
+static inline void hw_breakpoint_disable(void)
+{
+	int i;
+	struct arch_hw_breakpoint null_brk = {0};
+
+	if (!ppc_breakpoint_available())
+		return;
+
+	for (i = 0; i < nr_wp_slots(); i++)
+		__set_breakpoint(i, &null_brk);
+}
+extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
+int hw_breakpoint_handler(struct die_args *args);
+
+#else	/* CONFIG_HAVE_HW_BREAKPOINT */
+static inline void hw_breakpoint_disable(void) { }
+static inline void thread_change_pc(struct task_struct *tsk,
+					struct pt_regs *regs) { }
+
+#endif	/* CONFIG_HAVE_HW_BREAKPOINT */
+
+
+#ifdef CONFIG_PPC_DAWR
+extern bool dawr_force_enable;
+static inline bool dawr_enabled(void)
+{
+	return dawr_force_enable;
+}
+int set_dawr(int nr, struct arch_hw_breakpoint *brk);
+#else
+static inline bool dawr_enabled(void) { return false; }
+static inline int set_dawr(int nr, struct arch_hw_breakpoint *brk) { return -1; }
+#endif
+
+#endif	/* __KERNEL__ */
+#endif	/* _PPC_BOOK3S_64_HW_BREAKPOINT_H */
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
new file mode 100644
index 0000000000..317659fdea
--- /dev/null
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -0,0 +1,521 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ */
+#ifndef _ASM_POWERPC_HW_IRQ_H
+#define _ASM_POWERPC_HW_IRQ_H
+
+#ifdef __KERNEL__
+
+#include <linux/errno.h>
+#include <linux/compiler.h>
+#include <asm/ptrace.h>
+#include <asm/processor.h>
+
+#ifdef CONFIG_PPC64
+
+/*
+ * PACA flags in paca->irq_happened.
+ *
+ * This bits are set when interrupts occur while soft-disabled
+ * and allow a proper replay.
+ *
+ * The PACA_IRQ_HARD_DIS is set whenever we hard disable. It is almost
+ * always in synch with the MSR[EE] state, except:
+ * - A window in interrupt entry, where hardware disables MSR[EE] and that
+ *   must be "reconciled" with the soft mask state.
+ * - NMI interrupts that hit in awkward places, until they fix the state.
+ * - When local irqs are being enabled and state is being fixed up.
+ * - When returning from an interrupt there are some windows where this
+ *   can become out of synch, but gets fixed before the RFI or before
+ *   executing the next user instruction (see arch/powerpc/kernel/interrupt.c).
+ */
+#define PACA_IRQ_HARD_DIS	0x01
+#define PACA_IRQ_DBELL		0x02
+#define PACA_IRQ_EE		0x04
+#define PACA_IRQ_DEC		0x08 /* Or FIT */
+#define PACA_IRQ_HMI		0x10
+#define PACA_IRQ_PMI		0x20
+#define PACA_IRQ_REPLAYING	0x40
+
+/*
+ * Some soft-masked interrupts must be hard masked until they are replayed
+ * (e.g., because the soft-masked handler does not clear the exception).
+ * Interrupt replay itself must remain hard masked too.
+ */
+#ifdef CONFIG_PPC_BOOK3S
+#define PACA_IRQ_MUST_HARD_MASK	(PACA_IRQ_EE|PACA_IRQ_PMI|PACA_IRQ_REPLAYING)
+#else
+#define PACA_IRQ_MUST_HARD_MASK	(PACA_IRQ_EE|PACA_IRQ_REPLAYING)
+#endif
+
+#endif /* CONFIG_PPC64 */
+
+/*
+ * flags for paca->irq_soft_mask
+ */
+#define IRQS_ENABLED		0
+#define IRQS_DISABLED		1 /* local_irq_disable() interrupts */
+#define IRQS_PMI_DISABLED	2
+#define IRQS_ALL_DISABLED	(IRQS_DISABLED | IRQS_PMI_DISABLED)
+
+#ifndef __ASSEMBLY__
+
+static inline void __hard_irq_enable(void)
+{
+	if (IS_ENABLED(CONFIG_BOOKE_OR_40x))
+		wrtee(MSR_EE);
+	else if (IS_ENABLED(CONFIG_PPC_8xx))
+		wrtspr(SPRN_EIE);
+	else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+		__mtmsrd(MSR_EE | MSR_RI, 1);
+	else
+		mtmsr(mfmsr() | MSR_EE);
+}
+
+static inline void __hard_irq_disable(void)
+{
+	if (IS_ENABLED(CONFIG_BOOKE_OR_40x))
+		wrtee(0);
+	else if (IS_ENABLED(CONFIG_PPC_8xx))
+		wrtspr(SPRN_EID);
+	else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+		__mtmsrd(MSR_RI, 1);
+	else
+		mtmsr(mfmsr() & ~MSR_EE);
+}
+
+static inline void __hard_EE_RI_disable(void)
+{
+	if (IS_ENABLED(CONFIG_BOOKE_OR_40x))
+		wrtee(0);
+	else if (IS_ENABLED(CONFIG_PPC_8xx))
+		wrtspr(SPRN_NRI);
+	else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+		__mtmsrd(0, 1);
+	else
+		mtmsr(mfmsr() & ~(MSR_EE | MSR_RI));
+}
+
+static inline void __hard_RI_enable(void)
+{
+	if (IS_ENABLED(CONFIG_BOOKE_OR_40x))
+		return;
+
+	if (IS_ENABLED(CONFIG_PPC_8xx))
+		wrtspr(SPRN_EID);
+	else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+		__mtmsrd(MSR_RI, 1);
+	else
+		mtmsr(mfmsr() | MSR_RI);
+}
+
+#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+
+static inline notrace unsigned long irq_soft_mask_return(void)
+{
+	unsigned long flags;
+
+	asm volatile(
+		"lbz %0,%1(13)"
+		: "=r" (flags)
+		: "i" (offsetof(struct paca_struct, irq_soft_mask)));
+
+	return flags;
+}
+
+/*
+ * The "memory" clobber acts as both a compiler barrier
+ * for the critical section and as a clobber because
+ * we changed paca->irq_soft_mask
+ */
+static inline notrace void irq_soft_mask_set(unsigned long mask)
+{
+	/*
+	 * The irq mask must always include the STD bit if any are set.
+	 *
+	 * and interrupts don't get replayed until the standard
+	 * interrupt (local_irq_disable()) is unmasked.
+	 *
+	 * Other masks must only provide additional masking beyond
+	 * the standard, and they are also not replayed until the
+	 * standard interrupt becomes unmasked.
+	 *
+	 * This could be changed, but it will require partial
+	 * unmasks to be replayed, among other things. For now, take
+	 * the simple approach.
+	 */
+	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+		WARN_ON(mask && !(mask & IRQS_DISABLED));
+
+	asm volatile(
+		"stb %0,%1(13)"
+		:
+		: "r" (mask),
+		  "i" (offsetof(struct paca_struct, irq_soft_mask))
+		: "memory");
+}
+
+static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask)
+{
+	unsigned long flags = irq_soft_mask_return();
+
+	irq_soft_mask_set(mask);
+
+	return flags;
+}
+
+static inline notrace unsigned long irq_soft_mask_or_return(unsigned long mask)
+{
+	unsigned long flags = irq_soft_mask_return();
+
+	irq_soft_mask_set(flags | mask);
+
+	return flags;
+}
+
+static inline notrace unsigned long irq_soft_mask_andc_return(unsigned long mask)
+{
+	unsigned long flags = irq_soft_mask_return();
+
+	irq_soft_mask_set(flags & ~mask);
+
+	return flags;
+}
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	return irq_soft_mask_return();
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	irq_soft_mask_set(IRQS_DISABLED);
+}
+
+extern void arch_local_irq_restore(unsigned long);
+
+static inline void arch_local_irq_enable(void)
+{
+	arch_local_irq_restore(IRQS_ENABLED);
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	return irq_soft_mask_or_return(IRQS_DISABLED);
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return flags & IRQS_DISABLED;
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+static inline void set_pmi_irq_pending(void)
+{
+	/*
+	 * Invoked from PMU callback functions to set PMI bit in the paca.
+	 * This has to be called with irq's disabled (via hard_irq_disable()).
+	 */
+	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+		WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+	get_paca()->irq_happened |= PACA_IRQ_PMI;
+}
+
+static inline void clear_pmi_irq_pending(void)
+{
+	/*
+	 * Invoked from PMU callback functions to clear the pending PMI bit
+	 * in the paca.
+	 */
+	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+		WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+	get_paca()->irq_happened &= ~PACA_IRQ_PMI;
+}
+
+static inline bool pmi_irq_pending(void)
+{
+	/*
+	 * Invoked from PMU callback functions to check if there is a pending
+	 * PMI bit in the paca.
+	 */
+	if (get_paca()->irq_happened & PACA_IRQ_PMI)
+		return true;
+
+	return false;
+}
+
+#ifdef CONFIG_PPC_BOOK3S
+/*
+ * To support disabling and enabling of irq with PMI, set of
+ * new powerpc_local_irq_pmu_save() and powerpc_local_irq_restore()
+ * functions are added. These macros are implemented using generic
+ * linux local_irq_* code from include/linux/irqflags.h.
+ */
+#define raw_local_irq_pmu_save(flags)					\
+	do {								\
+		typecheck(unsigned long, flags);			\
+		flags = irq_soft_mask_or_return(IRQS_DISABLED |	\
+				IRQS_PMI_DISABLED);			\
+	} while(0)
+
+#define raw_local_irq_pmu_restore(flags)				\
+	do {								\
+		typecheck(unsigned long, flags);			\
+		arch_local_irq_restore(flags);				\
+	} while(0)
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+#define powerpc_local_irq_pmu_save(flags)			\
+	 do {							\
+		raw_local_irq_pmu_save(flags);			\
+		if (!raw_irqs_disabled_flags(flags))		\
+			trace_hardirqs_off();			\
+	} while(0)
+#define powerpc_local_irq_pmu_restore(flags)			\
+	do {							\
+		if (!raw_irqs_disabled_flags(flags))		\
+			trace_hardirqs_on();			\
+		raw_local_irq_pmu_restore(flags);		\
+	} while(0)
+#else
+#define powerpc_local_irq_pmu_save(flags)			\
+	do {							\
+		raw_local_irq_pmu_save(flags);			\
+	} while(0)
+#define powerpc_local_irq_pmu_restore(flags)			\
+	do {							\
+		raw_local_irq_pmu_restore(flags);		\
+	} while (0)
+#endif  /* CONFIG_TRACE_IRQFLAGS */
+
+#endif /* CONFIG_PPC_BOOK3S */
+
+#define hard_irq_disable()	do {					\
+	unsigned long flags;						\
+	__hard_irq_disable();						\
+	flags = irq_soft_mask_set_return(IRQS_ALL_DISABLED);		\
+	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;			\
+	if (!arch_irqs_disabled_flags(flags)) {				\
+		asm volatile("std%X0 %1,%0" : "=m" (local_paca->saved_r1) \
+					    : "r" (current_stack_pointer)); \
+		trace_hardirqs_off();					\
+	}								\
+} while(0)
+
+static inline bool __lazy_irq_pending(u8 irq_happened)
+{
+	return !!(irq_happened & ~PACA_IRQ_HARD_DIS);
+}
+
+/*
+ * Check if a lazy IRQ is pending. Should be called with IRQs hard disabled.
+ */
+static inline bool lazy_irq_pending(void)
+{
+	return __lazy_irq_pending(get_paca()->irq_happened);
+}
+
+/*
+ * Check if a lazy IRQ is pending, with no debugging checks.
+ * Should be called with IRQs hard disabled.
+ * For use in RI disabled code or other constrained situations.
+ */
+static inline bool lazy_irq_pending_nocheck(void)
+{
+	return __lazy_irq_pending(local_paca->irq_happened);
+}
+
+bool power_pmu_wants_prompt_pmi(void);
+
+/*
+ * This is called by asynchronous interrupts to check whether to
+ * conditionally re-enable hard interrupts after having cleared
+ * the source of the interrupt. They are kept disabled if there
+ * is a different soft-masked interrupt pending that requires hard
+ * masking.
+ */
+static inline bool should_hard_irq_enable(struct pt_regs *regs)
+{
+	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+		WARN_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+		WARN_ON(!(get_paca()->irq_happened & PACA_IRQ_HARD_DIS));
+		WARN_ON(mfmsr() & MSR_EE);
+	}
+
+	if (!IS_ENABLED(CONFIG_PERF_EVENTS))
+		return false;
+	/*
+	 * If the PMU is not running, there is not much reason to enable
+	 * MSR[EE] in irq handlers because any interrupts would just be
+	 * soft-masked.
+	 *
+	 * TODO: Add test for 64e
+	 */
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) {
+		if (!power_pmu_wants_prompt_pmi())
+			return false;
+		/*
+		 * If PMIs are disabled then IRQs should be disabled as well,
+		 * so we shouldn't see this condition, check for it just in
+		 * case because we are about to enable PMIs.
+		 */
+		if (WARN_ON_ONCE(regs->softe & IRQS_PMI_DISABLED))
+			return false;
+	}
+
+	if (get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)
+		return false;
+
+	return true;
+}
+
+/*
+ * Do the hard enabling, only call this if should_hard_irq_enable is true.
+ * This allows PMI interrupts to profile irq handlers.
+ */
+static inline void do_hard_irq_enable(void)
+{
+	/*
+	 * Asynch interrupts come in with IRQS_ALL_DISABLED,
+	 * PACA_IRQ_HARD_DIS, and MSR[EE]=0.
+	 */
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+		irq_soft_mask_andc_return(IRQS_PMI_DISABLED);
+	get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
+	__hard_irq_enable();
+}
+
+static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
+{
+	return (regs->softe & IRQS_DISABLED);
+}
+
+extern bool prep_irq_for_idle(void);
+extern bool prep_irq_for_idle_irqsoff(void);
+extern void irq_set_pending_from_srr1(unsigned long srr1);
+
+#define fini_irq_for_idle_irqsoff() trace_hardirqs_off();
+
+extern void force_external_irq_replay(void);
+
+static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val)
+{
+	regs->softe = val;
+}
+#else /* CONFIG_PPC64 */
+
+static inline notrace unsigned long irq_soft_mask_return(void)
+{
+	return 0;
+}
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	return mfmsr();
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	if (IS_ENABLED(CONFIG_BOOKE))
+		wrtee(flags);
+	else
+		mtmsr(flags);
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags = arch_local_save_flags();
+
+	if (IS_ENABLED(CONFIG_BOOKE))
+		wrtee(0);
+	else if (IS_ENABLED(CONFIG_PPC_8xx))
+		wrtspr(SPRN_EID);
+	else
+		mtmsr(flags & ~MSR_EE);
+
+	return flags;
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	__hard_irq_disable();
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	__hard_irq_enable();
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return (flags & MSR_EE) == 0;
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#define hard_irq_disable()		arch_local_irq_disable()
+
+static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
+{
+	return !(regs->msr & MSR_EE);
+}
+
+static __always_inline bool should_hard_irq_enable(struct pt_regs *regs)
+{
+	return false;
+}
+
+static inline void do_hard_irq_enable(void)
+{
+	BUILD_BUG();
+}
+
+static inline void clear_pmi_irq_pending(void) { }
+static inline void set_pmi_irq_pending(void) { }
+static inline bool pmi_irq_pending(void) { return false; }
+
+static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val)
+{
+}
+#endif /* CONFIG_PPC64 */
+
+static inline unsigned long mtmsr_isync_irqsafe(unsigned long msr)
+{
+#ifdef CONFIG_PPC64
+	if (arch_irqs_disabled()) {
+		/*
+		 * With soft-masking, MSR[EE] can change from 1 to 0
+		 * asynchronously when irqs are disabled, and we don't want to
+		 * set MSR[EE] back to 1 here if that has happened. A race-free
+		 * way to do this is ensure EE is already 0. Another way it
+		 * could be done is with a RESTART_TABLE handler, but that's
+		 * probably overkill here.
+		 */
+		msr &= ~MSR_EE;
+		mtmsr_isync(msr);
+		irq_soft_mask_set(IRQS_ALL_DISABLED);
+		local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+	} else
+#endif
+		mtmsr_isync(msr);
+
+	return msr;
+}
+
+
+#define ARCH_IRQ_INIT_FLAGS	IRQ_NOREQUEST
+
+#endif  /* __ASSEMBLY__ */
+#endif	/* __KERNEL__ */
+#endif	/* _ASM_POWERPC_HW_IRQ_H */
diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h
new file mode 100644
index 0000000000..d024447283
--- /dev/null
+++ b/arch/powerpc/include/asm/hydra.h
@@ -0,0 +1,99 @@
+/*
+ *  include/asm-ppc/hydra.h -- Mac I/O `Hydra' definitions
+ *
+ *  Copyright (C) 1997 Geert Uytterhoeven
+ *
+ *  This file is based on the following documentation:
+ *
+ *	Macintosh Technology in the Common Hardware Reference Platform
+ *	Apple Computer, Inc.
+ *
+ *	© Copyright 1995 Apple Computer, Inc. All rights reserved.
+ *
+ *  It's available online from https://www.cpu.lu/~mlan/ftp/MacTech.pdf
+ *  You can obtain paper copies of this book from computer bookstores or by
+ *  writing Morgan Kaufmann Publishers, Inc., 340 Pine Street, Sixth Floor, San
+ *  Francisco, CA 94104. Reference ISBN 1-55860-393-X.
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of this archive
+ *  for more details.
+ */
+
+#ifndef _ASMPPC_HYDRA_H
+#define _ASMPPC_HYDRA_H
+
+#ifdef __KERNEL__
+
+struct Hydra {
+    /* DBDMA Controller Register Space */
+    char Pad1[0x30];
+    u_int CachePD;
+    u_int IDs;
+    u_int Feature_Control;
+    char Pad2[0x7fc4];
+    /* DBDMA Channel Register Space */
+    char SCSI_DMA[0x100];
+    char Pad3[0x300];
+    char SCCA_Tx_DMA[0x100];
+    char SCCA_Rx_DMA[0x100];
+    char SCCB_Tx_DMA[0x100];
+    char SCCB_Rx_DMA[0x100];
+    char Pad4[0x7800];
+    /* Device Register Space */
+    char SCSI[0x1000];
+    char ADB[0x1000];
+    char SCC_Legacy[0x1000];
+    char SCC[0x1000];
+    char Pad9[0x2000];
+    char VIA[0x2000];
+    char Pad10[0x28000];
+    char OpenPIC[0x40000];
+};
+
+extern volatile struct Hydra __iomem *Hydra;
+
+
+    /*
+     *  Feature Control Register
+     */
+
+#define HYDRA_FC_SCC_CELL_EN	0x00000001	/* Enable SCC Clock */
+#define HYDRA_FC_SCSI_CELL_EN	0x00000002	/* Enable SCSI Clock */
+#define HYDRA_FC_SCCA_ENABLE	0x00000004	/* Enable SCC A Lines */
+#define HYDRA_FC_SCCB_ENABLE	0x00000008	/* Enable SCC B Lines */
+#define HYDRA_FC_ARB_BYPASS	0x00000010	/* Bypass Internal Arbiter */
+#define HYDRA_FC_RESET_SCC	0x00000020	/* Reset SCC */
+#define HYDRA_FC_MPIC_ENABLE	0x00000040	/* Enable OpenPIC */
+#define HYDRA_FC_SLOW_SCC_PCLK	0x00000080	/* 1=15.6672, 0=25 MHz */
+#define HYDRA_FC_MPIC_IS_MASTER	0x00000100	/* OpenPIC Master Mode */
+
+
+    /*
+     *  OpenPIC Interrupt Sources
+     */
+
+#define HYDRA_INT_SIO		0
+#define HYDRA_INT_SCSI_DMA	1
+#define HYDRA_INT_SCCA_TX_DMA	2
+#define HYDRA_INT_SCCA_RX_DMA	3
+#define HYDRA_INT_SCCB_TX_DMA	4
+#define HYDRA_INT_SCCB_RX_DMA	5
+#define HYDRA_INT_SCSI		6
+#define HYDRA_INT_SCCA		7
+#define HYDRA_INT_SCCB		8
+#define HYDRA_INT_VIA		9
+#define HYDRA_INT_ADB		10
+#define HYDRA_INT_ADB_NMI	11
+#define HYDRA_INT_EXT1		12	/* PCI IRQW */
+#define HYDRA_INT_EXT2		13	/* PCI IRQX */
+#define HYDRA_INT_EXT3		14	/* PCI IRQY */
+#define HYDRA_INT_EXT4		15	/* PCI IRQZ */
+#define HYDRA_INT_EXT5		16	/* IDE Primary/Secondary */
+#define HYDRA_INT_EXT6		17	/* IDE Secondary */
+#define HYDRA_INT_EXT7		18	/* Power Off Request */
+#define HYDRA_INT_SPARE		19
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASMPPC_HYDRA_H */
diff --git a/arch/powerpc/include/asm/i8259.h b/arch/powerpc/include/asm/i8259.h
new file mode 100644
index 0000000000..75481d363c
--- /dev/null
+++ b/arch/powerpc/include/asm/i8259.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_I8259_H
+#define _ASM_POWERPC_I8259_H
+#ifdef __KERNEL__
+
+#include <linux/irq.h>
+
+extern void i8259_init(struct device_node *node, unsigned long intack_addr);
+extern unsigned int i8259_irq(void);
+struct irq_domain *__init i8259_get_host(void);
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_I8259_H */
diff --git a/arch/powerpc/include/asm/ibmebus.h b/arch/powerpc/include/asm/ibmebus.h
new file mode 100644
index 0000000000..6f33253a36
--- /dev/null
+++ b/arch/powerpc/include/asm/ibmebus.h
@@ -0,0 +1,62 @@
+/*
+ * IBM PowerPC eBus Infrastructure Support.
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *  Joachim Fenkes <fenkes@de.ibm.com>
+ *  Heiko J Schick <schickhj@de.ibm.com>
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_EBUS_H
+#define _ASM_EBUS_H
+#ifdef __KERNEL__
+
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+
+struct platform_driver;
+
+extern struct bus_type ibmebus_bus_type;
+
+int ibmebus_register_driver(struct platform_driver *drv);
+void ibmebus_unregister_driver(struct platform_driver *drv);
+
+int ibmebus_request_irq(u32 ist, irq_handler_t handler,
+			unsigned long irq_flags, const char *devname,
+			void *dev_id);
+void ibmebus_free_irq(u32 ist, void *dev_id);
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_IBMEBUS_H */
diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h
new file mode 100644
index 0000000000..f6599ccb30
--- /dev/null
+++ b/arch/powerpc/include/asm/icswx.h
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * ICSWX api
+ *
+ * Copyright (C) 2015 IBM Corp.
+ *
+ * This provides the Initiate Coprocessor Store Word Indexed (ICSWX)
+ * instruction.  This instruction is used to communicate with PowerPC
+ * coprocessors.  This also provides definitions of the structures used
+ * to communicate with the coprocessor.
+ *
+ * The RFC02130: Coprocessor Architecture document is the reference for
+ * everything in this file unless otherwise noted.
+ */
+#ifndef _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_
+#define _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_
+
+#include <asm/ppc-opcode.h> /* for PPC_ICSWX */
+
+/* Chapter 6.5.8 Coprocessor-Completion Block (CCB) */
+
+#define CCB_VALUE		(0x3fffffffffffffff)
+#define CCB_ADDRESS		(0xfffffffffffffff8)
+#define CCB_CM			(0x0000000000000007)
+#define CCB_CM0			(0x0000000000000004)
+#define CCB_CM12		(0x0000000000000003)
+
+#define CCB_CM0_ALL_COMPLETIONS	(0x0)
+#define CCB_CM0_LAST_IN_CHAIN	(0x4)
+#define CCB_CM12_STORE		(0x0)
+#define CCB_CM12_INTERRUPT	(0x1)
+
+#define CCB_SIZE		(0x10)
+#define CCB_ALIGN		CCB_SIZE
+
+struct coprocessor_completion_block {
+	__be64 value;
+	__be64 address;
+} __packed __aligned(CCB_ALIGN);
+
+
+/* Chapter 6.5.7 Coprocessor-Status Block (CSB) */
+
+#define CSB_V			(0x80)
+#define CSB_F			(0x04)
+#define CSB_CH			(0x03)
+#define CSB_CE_INCOMPLETE	(0x80)
+#define CSB_CE_TERMINATION	(0x40)
+#define CSB_CE_TPBC		(0x20)
+
+#define CSB_CC_SUCCESS		(0)
+#define CSB_CC_INVALID_ALIGN	(1)
+#define CSB_CC_OPERAND_OVERLAP	(2)
+#define CSB_CC_DATA_LENGTH	(3)
+#define CSB_CC_TRANSLATION	(5)
+#define CSB_CC_PROTECTION	(6)
+#define CSB_CC_RD_EXTERNAL	(7)
+#define CSB_CC_INVALID_OPERAND	(8)
+#define CSB_CC_PRIVILEGE	(9)
+#define CSB_CC_INTERNAL		(10)
+#define CSB_CC_WR_EXTERNAL	(12)
+#define CSB_CC_NOSPC		(13)
+#define CSB_CC_EXCESSIVE_DDE	(14)
+#define CSB_CC_WR_TRANSLATION	(15)
+#define CSB_CC_WR_PROTECTION	(16)
+#define CSB_CC_UNKNOWN_CODE	(17)
+#define CSB_CC_ABORT		(18)
+#define CSB_CC_EXCEED_BYTE_COUNT	(19)	/* P9 or later */
+#define CSB_CC_TRANSPORT	(20)
+#define CSB_CC_INVALID_CRB	(21)	/* P9 or later */
+#define CSB_CC_INVALID_DDE	(30)	/* P9 or later */
+#define CSB_CC_SEGMENTED_DDL	(31)
+#define CSB_CC_PROGRESS_POINT	(32)
+#define CSB_CC_DDE_OVERFLOW	(33)
+#define CSB_CC_SESSION		(34)
+#define CSB_CC_PROVISION	(36)
+#define CSB_CC_CHAIN		(37)
+#define CSB_CC_SEQUENCE		(38)
+#define CSB_CC_HW		(39)
+/* P9 DD2 NX Workbook 3.2 (Table 4-36): Address translation fault */
+#define	CSB_CC_FAULT_ADDRESS	(250)
+
+#define CSB_SIZE		(0x10)
+#define CSB_ALIGN		CSB_SIZE
+
+struct coprocessor_status_block {
+	u8 flags;
+	u8 cs;
+	u8 cc;
+	u8 ce;
+	__be32 count;
+	__be64 address;
+} __packed __aligned(CSB_ALIGN);
+
+
+/* Chapter 6.5.10 Data-Descriptor List (DDL)
+ * each list contains one or more Data-Descriptor Entries (DDE)
+ */
+
+#define DDE_P			(0x8000)
+
+#define DDE_SIZE		(0x10)
+#define DDE_ALIGN		DDE_SIZE
+
+struct data_descriptor_entry {
+	__be16 flags;
+	u8 count;
+	u8 index;
+	__be32 length;
+	__be64 address;
+} __packed __aligned(DDE_ALIGN);
+
+/* 4.3.2 NX-stamped Fault CRB */
+
+#define NX_STAMP_ALIGN          (0x10)
+
+struct nx_fault_stamp {
+	__be64 fault_storage_addr;
+	__be16 reserved;
+	__u8   flags;
+	__u8   fault_status;
+	__be32 pswid;
+} __packed __aligned(NX_STAMP_ALIGN);
+
+/* Chapter 6.5.2 Coprocessor-Request Block (CRB) */
+
+#define CRB_SIZE		(0x80)
+#define CRB_ALIGN		(0x100) /* Errata: requires 256 alignment */
+
+/* Coprocessor Status Block field
+ *   ADDRESS	address of CSB
+ *   C		CCB is valid
+ *   AT		0 = addrs are virtual, 1 = addrs are phys
+ *   M		enable perf monitor
+ */
+#define CRB_CSB_ADDRESS		(0xfffffffffffffff0)
+#define CRB_CSB_C		(0x0000000000000008)
+#define CRB_CSB_AT		(0x0000000000000002)
+#define CRB_CSB_M		(0x0000000000000001)
+
+struct coprocessor_request_block {
+	__be32 ccw;
+	__be32 flags;
+	__be64 csb_addr;
+
+	struct data_descriptor_entry source;
+	struct data_descriptor_entry target;
+
+	struct coprocessor_completion_block ccb;
+
+	union {
+		struct nx_fault_stamp nx;
+		u8 reserved[16];
+	} stamp;
+
+	u8 reserved[32];
+
+	struct coprocessor_status_block csb;
+} __aligned(128);
+
+/* RFC02167 Initiate Coprocessor Instructions document
+ * Chapter 8.2.1.1.1 RS
+ * Chapter 8.2.3 Coprocessor Directive
+ * Chapter 8.2.4 Execution
+ *
+ * The CCW must be converted to BE before passing to icswx()
+ */
+
+#define CCW_PS			(0xff000000)
+#define CCW_CT			(0x00ff0000)
+#define CCW_CD			(0x0000ffff)
+#define CCW_CL			(0x0000c000)
+
+
+/* RFC02167 Initiate Coprocessor Instructions document
+ * Chapter 8.2.1 Initiate Coprocessor Store Word Indexed (ICSWX)
+ * Chapter 8.2.4.1 Condition Register 0
+ */
+
+#define ICSWX_INITIATED		(0x8)
+#define ICSWX_BUSY		(0x4)
+#define ICSWX_REJECTED		(0x2)
+#define ICSWX_XERS0		(0x1)	/* undefined or set from XERSO. */
+
+static inline int icswx(__be32 ccw, struct coprocessor_request_block *crb)
+{
+	__be64 ccw_reg = ccw;
+	u32 cr;
+
+	/* NB: the same structures are used by VAS-NX */
+	BUILD_BUG_ON(sizeof(*crb) != 128);
+
+	__asm__ __volatile__(
+	PPC_ICSWX(%1,0,%2) "\n"
+	"mfcr %0\n"
+	: "=r" (cr)
+	: "r" (ccw_reg), "r" (crb)
+	: "cr0", "memory");
+
+	return (int)((cr >> 28) & 0xf);
+}
+
+
+#endif /* _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_ */
diff --git a/arch/powerpc/include/asm/idle.h b/arch/powerpc/include/asm/idle.h
new file mode 100644
index 0000000000..00f3606673
--- /dev/null
+++ b/arch/powerpc/include/asm/idle.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_IDLE_H
+#define _ASM_POWERPC_IDLE_H
+#include <asm/runlatch.h>
+#include <asm/paca.h>
+
+#ifdef CONFIG_PPC_PSERIES
+DECLARE_PER_CPU(u64, idle_spurr_cycles);
+DECLARE_PER_CPU(u64, idle_entry_purr_snap);
+DECLARE_PER_CPU(u64, idle_entry_spurr_snap);
+
+static __always_inline void snapshot_purr_idle_entry(void)
+{
+	*this_cpu_ptr(&idle_entry_purr_snap) = mfspr(SPRN_PURR);
+}
+
+static __always_inline void snapshot_spurr_idle_entry(void)
+{
+	*this_cpu_ptr(&idle_entry_spurr_snap) = mfspr(SPRN_SPURR);
+}
+
+static __always_inline void update_idle_purr_accounting(void)
+{
+	u64 wait_cycles;
+	u64 in_purr = *this_cpu_ptr(&idle_entry_purr_snap);
+
+	wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles);
+	wait_cycles += mfspr(SPRN_PURR) - in_purr;
+	get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles);
+}
+
+static __always_inline void update_idle_spurr_accounting(void)
+{
+	u64 *idle_spurr_cycles_ptr = this_cpu_ptr(&idle_spurr_cycles);
+	u64 in_spurr = *this_cpu_ptr(&idle_entry_spurr_snap);
+
+	*idle_spurr_cycles_ptr += mfspr(SPRN_SPURR) - in_spurr;
+}
+
+static __always_inline void pseries_idle_prolog(void)
+{
+	ppc64_runlatch_off();
+	snapshot_purr_idle_entry();
+	snapshot_spurr_idle_entry();
+	/*
+	 * Indicate to the HV that we are idle. Now would be
+	 * a good time to find other work to dispatch.
+	 */
+	get_lppaca()->idle = 1;
+}
+
+static __always_inline void pseries_idle_epilog(void)
+{
+	update_idle_purr_accounting();
+	update_idle_spurr_accounting();
+	get_lppaca()->idle = 0;
+	ppc64_runlatch_on();
+}
+
+static inline u64 read_this_idle_purr(void)
+{
+	/*
+	 * If we are reading from an idle context, update the
+	 * idle-purr cycles corresponding to the last idle period.
+	 * Since the idle context is not yet over, take a fresh
+	 * snapshot of the idle-purr.
+	 */
+	if (unlikely(get_lppaca()->idle == 1)) {
+		update_idle_purr_accounting();
+		snapshot_purr_idle_entry();
+	}
+
+	return be64_to_cpu(get_lppaca()->wait_state_cycles);
+}
+
+static inline u64 read_this_idle_spurr(void)
+{
+	/*
+	 * If we are reading from an idle context, update the
+	 * idle-spurr cycles corresponding to the last idle period.
+	 * Since the idle context is not yet over, take a fresh
+	 * snapshot of the idle-spurr.
+	 */
+	if (get_lppaca()->idle == 1) {
+		update_idle_spurr_accounting();
+		snapshot_spurr_idle_entry();
+	}
+
+	return *this_cpu_ptr(&idle_spurr_cycles);
+}
+
+#endif /* CONFIG_PPC_PSERIES */
+#endif
diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
new file mode 100644
index 0000000000..699a88584a
--- /dev/null
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef __ASM_POWERPC_IMC_PMU_H
+#define __ASM_POWERPC_IMC_PMU_H
+
+/*
+ * IMC Nest Performance Monitor counter support.
+ *
+ * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ *           (C) 2017 Anju T Sudhakar, IBM Corporation.
+ *           (C) 2017 Hemant K Shaw, IBM Corporation.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/io.h>
+#include <asm/opal.h>
+
+/*
+ * Compatibility macros for IMC devices
+ */
+#define IMC_DTB_COMPAT			"ibm,opal-in-memory-counters"
+#define IMC_DTB_UNIT_COMPAT		"ibm,imc-counters"
+
+
+/*
+ * LDBAR: Counter address and Enable/Disable macro.
+ * perf/imc-pmu.c has the LDBAR layout information.
+ */
+#define THREAD_IMC_LDBAR_MASK           0x0003ffffffffe000ULL
+#define THREAD_IMC_ENABLE               0x8000000000000000ULL
+#define TRACE_IMC_ENABLE		0x4000000000000000ULL
+
+/*
+ * For debugfs interface for imc-mode and imc-command
+ */
+#define IMC_CNTL_BLK_OFFSET		0x3FC00
+#define IMC_CNTL_BLK_CMD_OFFSET		8
+#define IMC_CNTL_BLK_MODE_OFFSET	32
+
+/*
+ * Structure to hold memory address information for imc units.
+ */
+struct imc_mem_info {
+	u64 *vbase;
+	u32 id;
+};
+
+/*
+ * Place holder for nest pmu events and values.
+ */
+struct imc_events {
+	u32 value;
+	char *name;
+	char *unit;
+	char *scale;
+};
+
+/*
+ * Trace IMC hardware updates a 64bytes record on
+ * Core Performance Monitoring Counter (CPMC)
+ * overflow. Here is the layout for the trace imc record
+ *
+ * DW 0 : Timebase
+ * DW 1 : Program Counter
+ * DW 2 : PIDR information
+ * DW 3 : CPMC1
+ * DW 4 : CPMC2
+ * DW 5 : CPMC3
+ * Dw 6 : CPMC4
+ * DW 7 : Timebase
+ * .....
+ *
+ * The following is the data structure to hold trace imc data.
+ */
+struct trace_imc_data {
+	u64 tb1;
+	u64 ip;
+	u64 val;
+	u64 cpmc1;
+	u64 cpmc2;
+	u64 cpmc3;
+	u64 cpmc4;
+	u64 tb2;
+};
+
+/* Event attribute array index */
+#define IMC_FORMAT_ATTR		0
+#define IMC_EVENT_ATTR		1
+#define IMC_CPUMASK_ATTR	2
+#define IMC_NULL_ATTR		3
+
+/* PMU Format attribute macros */
+#define IMC_EVENT_OFFSET_MASK	0xffffffffULL
+
+/*
+ * Macro to mask bits 0:21 of first double word(which is the timebase) to
+ * compare with 8th double word (timebase) of trace imc record data.
+ */
+#define IMC_TRACE_RECORD_TB1_MASK      0x3ffffffffffULL
+
+/*
+ * Bit 0:1 in third DW of IMC trace record
+ * specifies the MSR[HV PR] values.
+ */
+#define IMC_TRACE_RECORD_VAL_HVPR(x)	((x) >> 62)
+
+/*
+ * Device tree parser code detects IMC pmu support and
+ * registers new IMC pmus. This structure will hold the
+ * pmu functions, events, counter memory information
+ * and attrs for each imc pmu and will be referenced at
+ * the time of pmu registration.
+ */
+struct imc_pmu {
+	struct pmu pmu;
+	struct imc_mem_info *mem_info;
+	struct imc_events *events;
+	/*
+	 * Attribute groups for the PMU. Slot 0 used for
+	 * format attribute, slot 1 used for cpusmask attribute,
+	 * slot 2 used for event attribute. Slot 3 keep as
+	 * NULL.
+	 */
+	const struct attribute_group *attr_groups[4];
+	u32 counter_mem_size;
+	int domain;
+	/*
+	 * flag to notify whether the memory is mmaped
+	 * or allocated by kernel.
+	 */
+	bool imc_counter_mmaped;
+};
+
+/*
+ * Structure to hold id, lock and reference count for the imc events which
+ * are inited.
+ */
+struct imc_pmu_ref {
+	spinlock_t lock;
+	unsigned int id;
+	int refc;
+};
+
+/*
+ * In-Memory Collection Counters type.
+ * Data comes from Device tree.
+ * Three device type are supported.
+ */
+
+enum {
+	IMC_TYPE_THREAD		= 0x1,
+	IMC_TYPE_TRACE		= 0x2,
+	IMC_TYPE_CORE		= 0x4,
+	IMC_TYPE_CHIP           = 0x10,
+};
+
+/*
+ * Domains for IMC PMUs
+ */
+#define IMC_DOMAIN_NEST		1
+#define IMC_DOMAIN_CORE		2
+#define IMC_DOMAIN_THREAD	3
+/* For trace-imc the domain is still thread but it operates in trace-mode */
+#define IMC_DOMAIN_TRACE	4
+
+extern int init_imc_pmu(struct device_node *parent,
+				struct imc_pmu *pmu_ptr, int pmu_id);
+extern void thread_imc_disable(void);
+extern int get_max_nest_dev(void);
+extern void unregister_thread_imc(void);
+#endif /* __ASM_POWERPC_IMC_PMU_H */
diff --git a/arch/powerpc/include/asm/immap_cpm2.h b/arch/powerpc/include/asm/immap_cpm2.h
new file mode 100644
index 0000000000..845d5b3fb2
--- /dev/null
+++ b/arch/powerpc/include/asm/immap_cpm2.h
@@ -0,0 +1,648 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * CPM2 Internal Memory Map
+ * Copyright (c) 1999 Dan Malek (dmalek@jlc.net)
+ *
+ * The Internal Memory Map for devices with CPM2 on them.  This
+ * is the superset of all CPM2 devices (8260, 8266, 8280, 8272,
+ * 8560).
+ */
+#ifdef __KERNEL__
+#ifndef __IMMAP_CPM2__
+#define __IMMAP_CPM2__
+
+#include <linux/types.h>
+
+/* System configuration registers.
+*/
+typedef	struct sys_82xx_conf {
+	u32	sc_siumcr;
+	u32	sc_sypcr;
+	u8	res1[6];
+	u16	sc_swsr;
+	u8	res2[20];
+	u32	sc_bcr;
+	u8	sc_ppc_acr;
+	u8	res3[3];
+	u32	sc_ppc_alrh;
+	u32	sc_ppc_alrl;
+	u8	sc_lcl_acr;
+	u8	res4[3];
+	u32	sc_lcl_alrh;
+	u32	sc_lcl_alrl;
+	u32	sc_tescr1;
+	u32	sc_tescr2;
+	u32	sc_ltescr1;
+	u32	sc_ltescr2;
+	u32	sc_pdtea;
+	u8	sc_pdtem;
+	u8	res5[3];
+	u32	sc_ldtea;
+	u8	sc_ldtem;
+	u8	res6[163];
+} sysconf_82xx_cpm2_t;
+
+typedef	struct sys_85xx_conf {
+	u32	sc_cear;
+	u16	sc_ceer;
+	u16	sc_cemr;
+	u8	res1[70];
+	u32	sc_smaer;
+	u8	res2[4];
+	u32	sc_smevr;
+	u32	sc_smctr;
+	u32	sc_lmaer;
+	u8	res3[4];
+	u32	sc_lmevr;
+	u32	sc_lmctr;
+	u8	res4[144];
+} sysconf_85xx_cpm2_t;
+
+typedef union sys_conf {
+	sysconf_82xx_cpm2_t	siu_82xx;
+	sysconf_85xx_cpm2_t	siu_85xx;
+} sysconf_cpm2_t;
+
+
+
+/* Memory controller registers.
+*/
+typedef struct	mem_ctlr {
+	u32	memc_br0;
+	u32	memc_or0;
+	u32	memc_br1;
+	u32	memc_or1;
+	u32	memc_br2;
+	u32	memc_or2;
+	u32	memc_br3;
+	u32	memc_or3;
+	u32	memc_br4;
+	u32	memc_or4;
+	u32	memc_br5;
+	u32	memc_or5;
+	u32	memc_br6;
+	u32	memc_or6;
+	u32	memc_br7;
+	u32	memc_or7;
+	u32	memc_br8;
+	u32	memc_or8;
+	u32	memc_br9;
+	u32	memc_or9;
+	u32	memc_br10;
+	u32	memc_or10;
+	u32	memc_br11;
+	u32	memc_or11;
+	u8	res1[8];
+	u32	memc_mar;
+	u8	res2[4];
+	u32	memc_mamr;
+	u32	memc_mbmr;
+	u32	memc_mcmr;
+	u8	res3[8];
+	u16	memc_mptpr;
+	u8	res4[2];
+	u32	memc_mdr;
+	u8	res5[4];
+	u32	memc_psdmr;
+	u32	memc_lsdmr;
+	u8	memc_purt;
+	u8	res6[3];
+	u8	memc_psrt;
+	u8	res7[3];
+	u8	memc_lurt;
+	u8	res8[3];
+	u8	memc_lsrt;
+	u8	res9[3];
+	u32	memc_immr;
+	u32	memc_pcibr0;
+	u32	memc_pcibr1;
+	u8	res10[16];
+	u32	memc_pcimsk0;
+	u32	memc_pcimsk1;
+	u8	res11[52];
+} memctl_cpm2_t;
+
+/* System Integration Timers.
+*/
+typedef struct	sys_int_timers {
+	u8	res1[32];
+	u16	sit_tmcntsc;
+	u8	res2[2];
+	u32	sit_tmcnt;
+	u8	res3[4];
+	u32	sit_tmcntal;
+	u8	res4[16];
+	u16	sit_piscr;
+	u8	res5[2];
+	u32	sit_pitc;
+	u32	sit_pitr;
+	u8      res6[94];
+	u8	res7[390];
+} sit_cpm2_t;
+
+#define PISCR_PIRQ_MASK		((u16)0xff00)
+#define PISCR_PS		((u16)0x0080)
+#define PISCR_PIE		((u16)0x0004)
+#define PISCR_PTF		((u16)0x0002)
+#define PISCR_PTE		((u16)0x0001)
+
+/* PCI Controller.
+*/
+typedef struct pci_ctlr {
+	u32	pci_omisr;
+	u32	pci_omimr;
+	u8	res1[8];
+	u32	pci_ifqpr;
+	u32	pci_ofqpr;
+	u8	res2[8];
+	u32	pci_imr0;
+	u32	pci_imr1;
+	u32	pci_omr0;
+	u32	pci_omr1;
+	u32	pci_odr;
+	u8	res3[4];
+	u32	pci_idr;
+	u8	res4[20];
+	u32	pci_imisr;
+	u32	pci_imimr;
+	u8	res5[24];
+	u32	pci_ifhpr;
+	u8	res6[4];
+	u32	pci_iftpr;
+	u8	res7[4];
+	u32	pci_iphpr;
+	u8	res8[4];
+	u32	pci_iptpr;
+	u8	res9[4];
+	u32	pci_ofhpr;
+	u8	res10[4];
+	u32	pci_oftpr;
+	u8	res11[4];
+	u32	pci_ophpr;
+	u8	res12[4];
+	u32	pci_optpr;
+	u8	res13[8];
+	u32	pci_mucr;
+	u8	res14[8];
+	u32	pci_qbar;
+	u8	res15[12];
+	u32	pci_dmamr0;
+	u32	pci_dmasr0;
+	u32	pci_dmacdar0;
+	u8	res16[4];
+	u32	pci_dmasar0;
+	u8	res17[4];
+	u32	pci_dmadar0;
+	u8	res18[4];
+	u32	pci_dmabcr0;
+	u32	pci_dmandar0;
+	u8	res19[86];
+	u32	pci_dmamr1;
+	u32	pci_dmasr1;
+	u32	pci_dmacdar1;
+	u8	res20[4];
+	u32	pci_dmasar1;
+	u8	res21[4];
+	u32	pci_dmadar1;
+	u8	res22[4];
+	u32	pci_dmabcr1;
+	u32	pci_dmandar1;
+	u8	res23[88];
+	u32	pci_dmamr2;
+	u32	pci_dmasr2;
+	u32	pci_dmacdar2;
+	u8	res24[4];
+	u32	pci_dmasar2;
+	u8	res25[4];
+	u32	pci_dmadar2;
+	u8	res26[4];
+	u32	pci_dmabcr2;
+	u32	pci_dmandar2;
+	u8	res27[88];
+	u32	pci_dmamr3;
+	u32	pci_dmasr3;
+	u32	pci_dmacdar3;
+	u8	res28[4];
+	u32	pci_dmasar3;
+	u8	res29[4];
+	u32	pci_dmadar3;
+	u8	res30[4];
+	u32	pci_dmabcr3;
+	u32	pci_dmandar3;
+	u8	res31[344];
+	u32	pci_potar0;
+	u8	res32[4];
+	u32	pci_pobar0;
+	u8	res33[4];
+	u32	pci_pocmr0;
+	u8	res34[4];
+	u32	pci_potar1;
+	u8	res35[4];
+	u32	pci_pobar1;
+	u8	res36[4];
+	u32	pci_pocmr1;
+	u8	res37[4];
+	u32	pci_potar2;
+	u8	res38[4];
+	u32	pci_pobar2;
+	u8	res39[4];
+	u32	pci_pocmr2;
+	u8	res40[50];
+	u32	pci_ptcr;
+	u32	pci_gpcr;
+	u32	pci_gcr;
+	u32	pci_esr;
+	u32	pci_emr;
+	u32	pci_ecr;
+	u32	pci_eacr;
+	u8	res41[4];
+	u32	pci_edcr;
+	u8	res42[4];
+	u32	pci_eccr;
+	u8	res43[44];
+	u32	pci_pitar1;
+	u8	res44[4];
+	u32	pci_pibar1;
+	u8	res45[4];
+	u32	pci_picmr1;
+	u8	res46[4];
+	u32	pci_pitar0;
+	u8	res47[4];
+	u32	pci_pibar0;
+	u8	res48[4];
+	u32	pci_picmr0;
+	u8	res49[4];
+	u32	pci_cfg_addr;
+	u32	pci_cfg_data;
+	u32	pci_int_ack;
+	u8	res50[756];
+} pci_cpm2_t;
+
+/* Interrupt Controller.
+*/
+typedef struct interrupt_controller {
+	u16	ic_sicr;
+	u8	res1[2];
+	u32	ic_sivec;
+	u32	ic_sipnrh;
+	u32	ic_sipnrl;
+	u32	ic_siprr;
+	u32	ic_scprrh;
+	u32	ic_scprrl;
+	u32	ic_simrh;
+	u32	ic_simrl;
+	u32	ic_siexr;
+	u8	res2[88];
+} intctl_cpm2_t;
+
+/* Clocks and Reset.
+*/
+typedef struct clk_and_reset {
+	u32	car_sccr;
+	u8	res1[4];
+	u32	car_scmr;
+	u8	res2[4];
+	u32	car_rsr;
+	u32	car_rmr;
+	u8	res[104];
+} car_cpm2_t;
+
+/* Input/Output Port control/status registers.
+ * Names consistent with processor manual, although they are different
+ * from the original 8xx names.......
+ */
+typedef struct io_port {
+	u32	iop_pdira;
+	u32	iop_ppara;
+	u32	iop_psora;
+	u32	iop_podra;
+	u32	iop_pdata;
+	u8	res1[12];
+	u32	iop_pdirb;
+	u32	iop_pparb;
+	u32	iop_psorb;
+	u32	iop_podrb;
+	u32	iop_pdatb;
+	u8	res2[12];
+	u32	iop_pdirc;
+	u32	iop_pparc;
+	u32	iop_psorc;
+	u32	iop_podrc;
+	u32	iop_pdatc;
+	u8	res3[12];
+	u32	iop_pdird;
+	u32	iop_ppard;
+	u32	iop_psord;
+	u32	iop_podrd;
+	u32	iop_pdatd;
+	u8	res4[12];
+} iop_cpm2_t;
+
+/* Communication Processor Module Timers
+*/
+typedef struct cpm_timers {
+	u8	cpmt_tgcr1;
+	u8	res1[3];
+	u8	cpmt_tgcr2;
+	u8	res2[11];
+	u16	cpmt_tmr1;
+	u16	cpmt_tmr2;
+	u16	cpmt_trr1;
+	u16	cpmt_trr2;
+	u16	cpmt_tcr1;
+	u16	cpmt_tcr2;
+	u16	cpmt_tcn1;
+	u16	cpmt_tcn2;
+	u16	cpmt_tmr3;
+	u16	cpmt_tmr4;
+	u16	cpmt_trr3;
+	u16	cpmt_trr4;
+	u16	cpmt_tcr3;
+	u16	cpmt_tcr4;
+	u16	cpmt_tcn3;
+	u16	cpmt_tcn4;
+	u16	cpmt_ter1;
+	u16	cpmt_ter2;
+	u16	cpmt_ter3;
+	u16	cpmt_ter4;
+	u8	res3[584];
+} cpmtimer_cpm2_t;
+
+/* DMA control/status registers.
+*/
+typedef struct sdma_csr {
+	u8	res0[24];
+	u8	sdma_sdsr;
+	u8	res1[3];
+	u8	sdma_sdmr;
+	u8	res2[3];
+	u8	sdma_idsr1;
+	u8	res3[3];
+	u8	sdma_idmr1;
+	u8	res4[3];
+	u8	sdma_idsr2;
+	u8	res5[3];
+	u8	sdma_idmr2;
+	u8	res6[3];
+	u8	sdma_idsr3;
+	u8	res7[3];
+	u8	sdma_idmr3;
+	u8	res8[3];
+	u8	sdma_idsr4;
+	u8	res9[3];
+	u8	sdma_idmr4;
+	u8	res10[707];
+} sdma_cpm2_t;
+
+/* Fast controllers
+*/
+typedef struct fcc {
+	u32	fcc_gfmr;
+	u32	fcc_fpsmr;
+	u16	fcc_ftodr;
+	u8	res1[2];
+	u16	fcc_fdsr;
+	u8	res2[2];
+	u16	fcc_fcce;
+	u8	res3[2];
+	u16	fcc_fccm;
+	u8	res4[2];
+	u8	fcc_fccs;
+	u8	res5[3];
+	u8	fcc_ftirr_phy[4];
+} fcc_t;
+
+/* Fast controllers continued
+ */
+typedef struct fcc_c {
+	u32	fcc_firper;
+	u32	fcc_firer;
+	u32	fcc_firsr_hi;
+	u32	fcc_firsr_lo;
+	u8	fcc_gfemr;
+	u8	res1[15];
+} fcc_c_t;
+
+/* TC Layer
+ */
+typedef struct tclayer {
+	u16	tc_tcmode;
+	u16	tc_cdsmr;
+	u16	tc_tcer;
+	u16	tc_rcc;
+	u16	tc_tcmr;
+	u16	tc_fcc;
+	u16	tc_ccc;
+	u16	tc_icc;
+	u16	tc_tcc;
+	u16	tc_ecc;
+	u8	res1[12];
+} tclayer_t;
+
+
+/* I2C
+*/
+typedef struct i2c {
+	u8	i2c_i2mod;
+	u8	res1[3];
+	u8	i2c_i2add;
+	u8	res2[3];
+	u8	i2c_i2brg;
+	u8	res3[3];
+	u8	i2c_i2com;
+	u8	res4[3];
+	u8	i2c_i2cer;
+	u8	res5[3];
+	u8	i2c_i2cmr;
+	u8	res6[331];
+} i2c_cpm2_t;
+
+typedef struct scc {		/* Serial communication channels */
+	u32	scc_gsmrl;
+	u32	scc_gsmrh;
+	u16	scc_psmr;
+	u8	res1[2];
+	u16	scc_todr;
+	u16	scc_dsr;
+	u16	scc_scce;
+	u8	res2[2];
+	u16	scc_sccm;
+	u8	res3;
+	u8	scc_sccs;
+	u8	res4[8];
+} scc_t;
+
+typedef struct smc {		/* Serial management channels */
+	u8	res1[2];
+	u16	smc_smcmr;
+	u8	res2[2];
+	u8	smc_smce;
+	u8	res3[3];
+	u8	smc_smcm;
+	u8	res4[5];
+} smc_t;
+
+/* Serial Peripheral Interface.
+*/
+typedef struct spi_ctrl {
+	u16	spi_spmode;
+	u8	res1[4];
+	u8	spi_spie;
+	u8	res2[3];
+	u8	spi_spim;
+	u8	res3[2];
+	u8	spi_spcom;
+	u8	res4[82];
+} spictl_cpm2_t;
+
+/* CPM Mux.
+*/
+typedef struct cpmux {
+	u8	cmx_si1cr;
+	u8	res1;
+	u8	cmx_si2cr;
+	u8	res2;
+	u32	cmx_fcr;
+	u32	cmx_scr;
+	u8	cmx_smr;
+	u8	res3;
+	u16	cmx_uar;
+	u8	res4[16];
+} cpmux_t;
+
+/* SIRAM control
+*/
+typedef struct siram {
+	u16	si_amr;
+	u16	si_bmr;
+	u16	si_cmr;
+	u16	si_dmr;
+	u8	si_gmr;
+	u8	res1;
+	u8	si_cmdr;
+	u8	res2;
+	u8	si_str;
+	u8	res3;
+	u16	si_rsr;
+} siramctl_t;
+
+typedef struct mcc {
+	u16	mcc_mcce;
+	u8	res1[2];
+	u16	mcc_mccm;
+	u8	res2[2];
+	u8	mcc_mccf;
+	u8	res3[7];
+} mcc_t;
+
+typedef struct comm_proc {
+	u32	cp_cpcr;
+	u32	cp_rccr;
+	u8	res1[14];
+	u16	cp_rter;
+	u8	res2[2];
+	u16	cp_rtmr;
+	u16	cp_rtscr;
+	u8	res3[2];
+	u32	cp_rtsr;
+	u8	res4[12];
+} cpm_cpm2_t;
+
+/* USB Controller.
+*/
+typedef struct cpm_usb_ctlr {
+	u8	usb_usmod;
+	u8	usb_usadr;
+	u8	usb_uscom;
+	u8	res1[1];
+	__be16  usb_usep[4];
+	u8	res2[4];
+	__be16  usb_usber;
+	u8	res3[2];
+	__be16  usb_usbmr;
+	u8	usb_usbs;
+	u8	res4[7];
+} usb_cpm2_t;
+
+/* ...and the whole thing wrapped up....
+*/
+
+typedef struct immap {
+	/* Some references are into the unique and known dpram spaces,
+	 * others are from the generic base.
+	 */
+#define im_dprambase	im_dpram1
+	u8		im_dpram1[16*1024];
+	u8		res1[16*1024];
+	u8		im_dpram2[4*1024];
+	u8		res2[8*1024];
+	u8		im_dpram3[4*1024];
+	u8		res3[16*1024];
+
+	sysconf_cpm2_t	im_siu_conf;	/* SIU Configuration */
+	memctl_cpm2_t	im_memctl;	/* Memory Controller */
+	sit_cpm2_t	im_sit;		/* System Integration Timers */
+	pci_cpm2_t	im_pci;		/* PCI Controller */
+	intctl_cpm2_t	im_intctl;	/* Interrupt Controller */
+	car_cpm2_t	im_clkrst;	/* Clocks and reset */
+	iop_cpm2_t	im_ioport;	/* IO Port control/status */
+	cpmtimer_cpm2_t	im_cpmtimer;	/* CPM timers */
+	sdma_cpm2_t	im_sdma;	/* SDMA control/status */
+
+	fcc_t		im_fcc[3];	/* Three FCCs */
+	u8		res4z[32];
+	fcc_c_t		im_fcc_c[3];	/* Continued FCCs */
+
+	u8		res4[32];
+
+	tclayer_t	im_tclayer[8];	/* Eight TCLayers */
+	u16		tc_tcgsr;
+	u16		tc_tcger;
+
+	/* First set of baud rate generators.
+	*/
+	u8		res[236];
+	u32		im_brgc5;
+	u32		im_brgc6;
+	u32		im_brgc7;
+	u32		im_brgc8;
+
+	u8		res5[608];
+
+	i2c_cpm2_t	im_i2c;		/* I2C control/status */
+	cpm_cpm2_t	im_cpm;		/* Communication processor */
+
+	/* Second set of baud rate generators.
+	*/
+	u32		im_brgc1;
+	u32		im_brgc2;
+	u32		im_brgc3;
+	u32		im_brgc4;
+
+	scc_t		im_scc[4];	/* Four SCCs */
+	smc_t		im_smc[2];	/* Couple of SMCs */
+	spictl_cpm2_t	im_spi;		/* A SPI */
+	cpmux_t		im_cpmux;	/* CPM clock route mux */
+	siramctl_t	im_siramctl1;	/* First SI RAM Control */
+	mcc_t		im_mcc1;	/* First MCC */
+	siramctl_t	im_siramctl2;	/* Second SI RAM Control */
+	mcc_t		im_mcc2;	/* Second MCC */
+	usb_cpm2_t	im_usb;		/* USB Controller */
+
+	u8		res6[1153];
+
+	u16		im_si1txram[256];
+	u8		res7[512];
+	u16		im_si1rxram[256];
+	u8		res8[512];
+	u16		im_si2txram[256];
+	u8		res9[512];
+	u16		im_si2rxram[256];
+	u8		res10[512];
+	u8		res11[4096];
+} cpm2_map_t;
+
+extern cpm2_map_t __iomem *cpm2_immr;
+
+#endif /* __IMMAP_CPM2__ */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
new file mode 100644
index 0000000000..684d3f4532
--- /dev/null
+++ b/arch/powerpc/include/asm/inst.h
@@ -0,0 +1,170 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_INST_H
+#define _ASM_POWERPC_INST_H
+
+#include <asm/ppc-opcode.h>
+#include <asm/reg.h>
+#include <asm/disassemble.h>
+#include <asm/uaccess.h>
+
+#define ___get_user_instr(gu_op, dest, ptr)				\
+({									\
+	long __gui_ret;							\
+	u32 __user *__gui_ptr = (u32 __user *)ptr;			\
+	ppc_inst_t __gui_inst;						\
+	unsigned int __prefix, __suffix;				\
+									\
+	__chk_user_ptr(ptr);						\
+	__gui_ret = gu_op(__prefix, __gui_ptr);				\
+	if (__gui_ret == 0) {						\
+		if (IS_ENABLED(CONFIG_PPC64) && (__prefix >> 26) == OP_PREFIX) { \
+			__gui_ret = gu_op(__suffix, __gui_ptr + 1);	\
+			__gui_inst = ppc_inst_prefix(__prefix, __suffix); \
+		} else {						\
+			__gui_inst = ppc_inst(__prefix);		\
+		}							\
+		if (__gui_ret == 0)					\
+			(dest) = __gui_inst;				\
+	}								\
+	__gui_ret;							\
+})
+
+#define get_user_instr(x, ptr) ___get_user_instr(get_user, x, ptr)
+
+#define __get_user_instr(x, ptr) ___get_user_instr(__get_user, x, ptr)
+
+/*
+ * Instruction data type for POWER
+ */
+
+#if defined(CONFIG_PPC64) || defined(__CHECKER__)
+static inline u32 ppc_inst_val(ppc_inst_t x)
+{
+	return x.val;
+}
+
+#define ppc_inst(x) ((ppc_inst_t){ .val = (x) })
+
+#else
+static inline u32 ppc_inst_val(ppc_inst_t x)
+{
+	return x;
+}
+#define ppc_inst(x) (x)
+#endif
+
+static inline int ppc_inst_primary_opcode(ppc_inst_t x)
+{
+	return ppc_inst_val(x) >> 26;
+}
+
+#ifdef CONFIG_PPC64
+#define ppc_inst_prefix(x, y) ((ppc_inst_t){ .val = (x), .suffix = (y) })
+
+static inline u32 ppc_inst_suffix(ppc_inst_t x)
+{
+	return x.suffix;
+}
+
+#else
+#define ppc_inst_prefix(x, y) ((void)y, ppc_inst(x))
+
+static inline u32 ppc_inst_suffix(ppc_inst_t x)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PPC64 */
+
+static inline ppc_inst_t ppc_inst_read(const u32 *ptr)
+{
+	if (IS_ENABLED(CONFIG_PPC64) && (*ptr >> 26) == OP_PREFIX)
+		return ppc_inst_prefix(*ptr, *(ptr + 1));
+	else
+		return ppc_inst(*ptr);
+}
+
+static inline bool ppc_inst_prefixed(ppc_inst_t x)
+{
+	return IS_ENABLED(CONFIG_PPC64) && ppc_inst_primary_opcode(x) == OP_PREFIX;
+}
+
+static inline ppc_inst_t ppc_inst_swab(ppc_inst_t x)
+{
+	return ppc_inst_prefix(swab32(ppc_inst_val(x)), swab32(ppc_inst_suffix(x)));
+}
+
+static inline bool ppc_inst_equal(ppc_inst_t x, ppc_inst_t y)
+{
+	if (ppc_inst_val(x) != ppc_inst_val(y))
+		return false;
+	if (!ppc_inst_prefixed(x))
+		return true;
+	return ppc_inst_suffix(x) == ppc_inst_suffix(y);
+}
+
+static inline int ppc_inst_len(ppc_inst_t x)
+{
+	return ppc_inst_prefixed(x) ? 8 : 4;
+}
+
+/*
+ * Return the address of the next instruction, if the instruction @value was
+ * located at @location.
+ */
+static inline u32 *ppc_inst_next(u32 *location, u32 *value)
+{
+	ppc_inst_t tmp;
+
+	tmp = ppc_inst_read(value);
+
+	return (void *)location + ppc_inst_len(tmp);
+}
+
+static inline unsigned long ppc_inst_as_ulong(ppc_inst_t x)
+{
+	if (IS_ENABLED(CONFIG_PPC32))
+		return ppc_inst_val(x);
+	else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+		return (u64)ppc_inst_suffix(x) << 32 | ppc_inst_val(x);
+	else
+		return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x);
+}
+
+static inline void ppc_inst_write(u32 *ptr, ppc_inst_t x)
+{
+	if (!ppc_inst_prefixed(x))
+		*ptr = ppc_inst_val(x);
+	else
+		*(u64 *)ptr = ppc_inst_as_ulong(x);
+}
+
+static inline int __copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src)
+{
+	unsigned int val, suffix;
+
+/* See https://github.com/ClangBuiltLinux/linux/issues/1521 */
+#if defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 140000
+	val = suffix = 0;
+#endif
+	__get_kernel_nofault(&val, src, u32, Efault);
+	if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) {
+		__get_kernel_nofault(&suffix, src + 1, u32, Efault);
+		*inst = ppc_inst_prefix(val, suffix);
+	} else {
+		*inst = ppc_inst(val);
+	}
+	return 0;
+Efault:
+	return -EFAULT;
+}
+
+static inline int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src)
+{
+	if (unlikely(!is_kernel_addr((unsigned long)src)))
+		return -ERANGE;
+
+	return __copy_inst_from_kernel_nofault(inst, src);
+}
+
+#endif /* _ASM_POWERPC_INST_H */
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
new file mode 100644
index 0000000000..a4196ab1d0
--- /dev/null
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -0,0 +1,670 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_INTERRUPT_H
+#define _ASM_POWERPC_INTERRUPT_H
+
+/* BookE/4xx */
+#define INTERRUPT_CRITICAL_INPUT  0x100
+
+/* BookE */
+#define INTERRUPT_DEBUG           0xd00
+#ifdef CONFIG_BOOKE
+#define INTERRUPT_PERFMON         0x260
+#define INTERRUPT_DOORBELL        0x280
+#endif
+
+/* BookS/4xx/8xx */
+#define INTERRUPT_MACHINE_CHECK   0x200
+
+/* BookS/8xx */
+#define INTERRUPT_SYSTEM_RESET    0x100
+
+/* BookS */
+#define INTERRUPT_DATA_SEGMENT    0x380
+#define INTERRUPT_INST_SEGMENT    0x480
+#define INTERRUPT_TRACE           0xd00
+#define INTERRUPT_H_DATA_STORAGE  0xe00
+#define INTERRUPT_HMI			0xe60
+#define INTERRUPT_H_FAC_UNAVAIL   0xf80
+#ifdef CONFIG_PPC_BOOK3S
+#define INTERRUPT_DOORBELL        0xa00
+#define INTERRUPT_PERFMON         0xf00
+#define INTERRUPT_ALTIVEC_UNAVAIL	0xf20
+#endif
+
+/* BookE/BookS/4xx/8xx */
+#define INTERRUPT_DATA_STORAGE    0x300
+#define INTERRUPT_INST_STORAGE    0x400
+#define INTERRUPT_EXTERNAL		0x500
+#define INTERRUPT_ALIGNMENT       0x600
+#define INTERRUPT_PROGRAM         0x700
+#define INTERRUPT_SYSCALL         0xc00
+#define INTERRUPT_TRACE			0xd00
+
+/* BookE/BookS/44x */
+#define INTERRUPT_FP_UNAVAIL      0x800
+
+/* BookE/BookS/44x/8xx */
+#define INTERRUPT_DECREMENTER     0x900
+
+#ifndef INTERRUPT_PERFMON
+#define INTERRUPT_PERFMON         0x0
+#endif
+
+/* 8xx */
+#define INTERRUPT_SOFT_EMU_8xx		0x1000
+#define INTERRUPT_INST_TLB_MISS_8xx	0x1100
+#define INTERRUPT_DATA_TLB_MISS_8xx	0x1200
+#define INTERRUPT_INST_TLB_ERROR_8xx	0x1300
+#define INTERRUPT_DATA_TLB_ERROR_8xx	0x1400
+#define INTERRUPT_DATA_BREAKPOINT_8xx	0x1c00
+#define INTERRUPT_INST_BREAKPOINT_8xx	0x1d00
+
+/* 603 */
+#define INTERRUPT_INST_TLB_MISS_603		0x1000
+#define INTERRUPT_DATA_LOAD_TLB_MISS_603	0x1100
+#define INTERRUPT_DATA_STORE_TLB_MISS_603	0x1200
+
+#ifndef __ASSEMBLY__
+
+#include <linux/context_tracking.h>
+#include <linux/hardirq.h>
+#include <asm/cputime.h>
+#include <asm/firmware.h>
+#include <asm/ftrace.h>
+#include <asm/kprobes.h>
+#include <asm/runlatch.h>
+
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+/*
+ * WARN/BUG is handled with a program interrupt so minimise checks here to
+ * avoid recursion and maximise the chance of getting the first oops handled.
+ */
+#define INT_SOFT_MASK_BUG_ON(regs, cond)				\
+do {									\
+	if ((user_mode(regs) || (TRAP(regs) != INTERRUPT_PROGRAM)))	\
+		BUG_ON(cond);						\
+} while (0)
+#else
+#define INT_SOFT_MASK_BUG_ON(regs, cond)
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+extern char __end_soft_masked[];
+bool search_kernel_soft_mask_table(unsigned long addr);
+unsigned long search_kernel_restart_table(unsigned long addr);
+
+DECLARE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
+
+static inline bool is_implicit_soft_masked(struct pt_regs *regs)
+{
+	if (regs->msr & MSR_PR)
+		return false;
+
+	if (regs->nip >= (unsigned long)__end_soft_masked)
+		return false;
+
+	return search_kernel_soft_mask_table(regs->nip);
+}
+
+static inline void srr_regs_clobbered(void)
+{
+	local_paca->srr_valid = 0;
+	local_paca->hsrr_valid = 0;
+}
+#else
+static inline unsigned long search_kernel_restart_table(unsigned long addr)
+{
+	return 0;
+}
+
+static inline bool is_implicit_soft_masked(struct pt_regs *regs)
+{
+	return false;
+}
+
+static inline void srr_regs_clobbered(void)
+{
+}
+#endif
+
+static inline void nap_adjust_return(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_970_NAP
+	if (unlikely(test_thread_local_flags(_TLF_NAPPING))) {
+		/* Can avoid a test-and-clear because NMIs do not call this */
+		clear_thread_local_flags(_TLF_NAPPING);
+		regs_set_return_ip(regs, (unsigned long)power4_idle_nap_return);
+	}
+#endif
+}
+
+static inline void booke_restore_dbcr0(void)
+{
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+	unsigned long dbcr0 = current->thread.debug.dbcr0;
+
+	if (IS_ENABLED(CONFIG_PPC32) && unlikely(dbcr0 & DBCR0_IDM)) {
+		mtspr(SPRN_DBSR, -1);
+		mtspr(SPRN_DBCR0, global_dbcr0[smp_processor_id()]);
+	}
+#endif
+}
+
+static inline void interrupt_enter_prepare(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC64
+	irq_soft_mask_set(IRQS_ALL_DISABLED);
+
+	/*
+	 * If the interrupt was taken with HARD_DIS clear, then enable MSR[EE].
+	 * Asynchronous interrupts get here with HARD_DIS set (see below), so
+	 * this enables MSR[EE] for synchronous interrupts. IRQs remain
+	 * soft-masked. The interrupt handler may later call
+	 * interrupt_cond_local_irq_enable() to achieve a regular process
+	 * context.
+	 */
+	if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) {
+		INT_SOFT_MASK_BUG_ON(regs, !(regs->msr & MSR_EE));
+		__hard_irq_enable();
+	} else {
+		__hard_RI_enable();
+	}
+	/* Enable MSR[RI] early, to support kernel SLB and hash faults */
+#endif
+
+	if (!arch_irq_disabled_regs(regs))
+		trace_hardirqs_off();
+
+	if (user_mode(regs)) {
+		kuap_lock();
+		CT_WARN_ON(ct_state() != CONTEXT_USER);
+		user_exit_irqoff();
+
+		account_cpu_user_entry();
+		account_stolen_time();
+	} else {
+		kuap_save_and_lock(regs);
+		/*
+		 * CT_WARN_ON comes here via program_check_exception,
+		 * so avoid recursion.
+		 */
+		if (TRAP(regs) != INTERRUPT_PROGRAM)
+			CT_WARN_ON(ct_state() != CONTEXT_KERNEL &&
+				   ct_state() != CONTEXT_IDLE);
+		INT_SOFT_MASK_BUG_ON(regs, is_implicit_soft_masked(regs));
+		INT_SOFT_MASK_BUG_ON(regs, arch_irq_disabled_regs(regs) &&
+					   search_kernel_restart_table(regs->nip));
+	}
+	INT_SOFT_MASK_BUG_ON(regs, !arch_irq_disabled_regs(regs) &&
+				   !(regs->msr & MSR_EE));
+
+	booke_restore_dbcr0();
+}
+
+/*
+ * Care should be taken to note that interrupt_exit_prepare and
+ * interrupt_async_exit_prepare do not necessarily return immediately to
+ * regs context (e.g., if regs is usermode, we don't necessarily return to
+ * user mode). Other interrupts might be taken between here and return,
+ * context switch / preemption may occur in the exit path after this, or a
+ * signal may be delivered, etc.
+ *
+ * The real interrupt exit code is platform specific, e.g.,
+ * interrupt_exit_user_prepare / interrupt_exit_kernel_prepare for 64s.
+ *
+ * However interrupt_nmi_exit_prepare does return directly to regs, because
+ * NMIs do not do "exit work" or replay soft-masked interrupts.
+ */
+static inline void interrupt_exit_prepare(struct pt_regs *regs)
+{
+}
+
+static inline void interrupt_async_enter_prepare(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC64
+	/* Ensure interrupt_enter_prepare does not enable MSR[EE] */
+	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+#endif
+	interrupt_enter_prepare(regs);
+#ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * RI=1 is set by interrupt_enter_prepare, so this thread flags access
+	 * has to come afterward (it can cause SLB faults).
+	 */
+	if (cpu_has_feature(CPU_FTR_CTRL) &&
+	    !test_thread_local_flags(_TLF_RUNLATCH))
+		__ppc64_runlatch_on();
+#endif
+	irq_enter();
+}
+
+static inline void interrupt_async_exit_prepare(struct pt_regs *regs)
+{
+	/*
+	 * Adjust at exit so the main handler sees the true NIA. This must
+	 * come before irq_exit() because irq_exit can enable interrupts, and
+	 * if another interrupt is taken before nap_adjust_return has run
+	 * here, then that interrupt would return directly to idle nap return.
+	 */
+	nap_adjust_return(regs);
+
+	irq_exit();
+	interrupt_exit_prepare(regs);
+}
+
+struct interrupt_nmi_state {
+#ifdef CONFIG_PPC64
+	u8 irq_soft_mask;
+	u8 irq_happened;
+	u8 ftrace_enabled;
+	u64 softe;
+#endif
+};
+
+static inline bool nmi_disables_ftrace(struct pt_regs *regs)
+{
+	/* Allow DEC and PMI to be traced when they are soft-NMI */
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) {
+		if (TRAP(regs) == INTERRUPT_DECREMENTER)
+		       return false;
+		if (TRAP(regs) == INTERRUPT_PERFMON)
+		       return false;
+	}
+	if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) {
+		if (TRAP(regs) == INTERRUPT_PERFMON)
+			return false;
+	}
+
+	return true;
+}
+
+static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
+{
+#ifdef CONFIG_PPC64
+	state->irq_soft_mask = local_paca->irq_soft_mask;
+	state->irq_happened = local_paca->irq_happened;
+	state->softe = regs->softe;
+
+	/*
+	 * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does
+	 * the right thing, and set IRQ_HARD_DIS. We do not want to reconcile
+	 * because that goes through irq tracing which we don't want in NMI.
+	 */
+	local_paca->irq_soft_mask = IRQS_ALL_DISABLED;
+	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+	if (!(regs->msr & MSR_EE) || is_implicit_soft_masked(regs)) {
+		/*
+		 * Adjust regs->softe to be soft-masked if it had not been
+		 * reconcied (e.g., interrupt entry with MSR[EE]=0 but softe
+		 * not yet set disabled), or if it was in an implicit soft
+		 * masked state. This makes arch_irq_disabled_regs(regs)
+		 * behave as expected.
+		 */
+		regs->softe = IRQS_ALL_DISABLED;
+	}
+
+	__hard_RI_enable();
+
+	/* Don't do any per-CPU operations until interrupt state is fixed */
+
+	if (nmi_disables_ftrace(regs)) {
+		state->ftrace_enabled = this_cpu_get_ftrace_enabled();
+		this_cpu_set_ftrace_enabled(0);
+	}
+#endif
+
+	/* If data relocations are enabled, it's safe to use nmi_enter() */
+	if (mfmsr() & MSR_DR) {
+		nmi_enter();
+		return;
+	}
+
+	/*
+	 * But do not use nmi_enter() for pseries hash guest taking a real-mode
+	 * NMI because not everything it touches is within the RMA limit.
+	 */
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
+	    firmware_has_feature(FW_FEATURE_LPAR) &&
+	    !radix_enabled())
+		return;
+
+	/*
+	 * Likewise, don't use it if we have some form of instrumentation (like
+	 * KASAN shadow) that is not safe to access in real mode (even on radix)
+	 */
+	if (IS_ENABLED(CONFIG_KASAN))
+		return;
+
+	/* Otherwise, it should be safe to call it */
+	nmi_enter();
+}
+
+static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
+{
+	if (mfmsr() & MSR_DR) {
+		// nmi_exit if relocations are on
+		nmi_exit();
+	} else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
+		   firmware_has_feature(FW_FEATURE_LPAR) &&
+		   !radix_enabled()) {
+		// no nmi_exit for a pseries hash guest taking a real mode exception
+	} else if (IS_ENABLED(CONFIG_KASAN)) {
+		// no nmi_exit for KASAN in real mode
+	} else {
+		nmi_exit();
+	}
+
+	/*
+	 * nmi does not call nap_adjust_return because nmi should not create
+	 * new work to do (must use irq_work for that).
+	 */
+
+#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S
+	if (arch_irq_disabled_regs(regs)) {
+		unsigned long rst = search_kernel_restart_table(regs->nip);
+		if (rst)
+			regs_set_return_ip(regs, rst);
+	}
+#endif
+
+	if (nmi_disables_ftrace(regs))
+		this_cpu_set_ftrace_enabled(state->ftrace_enabled);
+
+	/* Check we didn't change the pending interrupt mask. */
+	WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened);
+	regs->softe = state->softe;
+	local_paca->irq_happened = state->irq_happened;
+	local_paca->irq_soft_mask = state->irq_soft_mask;
+#endif
+}
+
+/*
+ * Don't use noinstr here like x86, but rather add NOKPROBE_SYMBOL to each
+ * function definition. The reason for this is the noinstr section is placed
+ * after the main text section, i.e., very far away from the interrupt entry
+ * asm. That creates problems with fitting linker stubs when building large
+ * kernels.
+ */
+#define interrupt_handler __visible noinline notrace __no_kcsan __no_sanitize_address
+
+/**
+ * DECLARE_INTERRUPT_HANDLER_RAW - Declare raw interrupt handler function
+ * @func:	Function name of the entry point
+ * @returns:	Returns a value back to asm caller
+ */
+#define DECLARE_INTERRUPT_HANDLER_RAW(func)				\
+	__visible long func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER_RAW - Define raw interrupt handler function
+ * @func:	Function name of the entry point
+ * @returns:	Returns a value back to asm caller
+ *
+ * @func is called from ASM entry code.
+ *
+ * This is a plain function which does no tracing, reconciling, etc.
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ *
+ * raw interrupt handlers must not enable or disable interrupts, or
+ * schedule, tracing and instrumentation (ftrace, lockdep, etc) would
+ * not be advisable either, although may be possible in a pinch, the
+ * trace will look odd at least.
+ *
+ * A raw handler may call one of the other interrupt handler functions
+ * to be converted into that interrupt context without these restrictions.
+ *
+ * On PPC64, _RAW handlers may return with fast_interrupt_return.
+ *
+ * Specific handlers may have additional restrictions.
+ */
+#define DEFINE_INTERRUPT_HANDLER_RAW(func)				\
+static __always_inline __no_sanitize_address __no_kcsan long		\
+____##func(struct pt_regs *regs);					\
+									\
+interrupt_handler long func(struct pt_regs *regs)			\
+{									\
+	long ret;							\
+									\
+	__hard_RI_enable();						\
+									\
+	ret = ____##func (regs);					\
+									\
+	return ret;							\
+}									\
+NOKPROBE_SYMBOL(func);							\
+									\
+static __always_inline __no_sanitize_address __no_kcsan long		\
+____##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_INTERRUPT_HANDLER - Declare synchronous interrupt handler function
+ * @func:	Function name of the entry point
+ */
+#define DECLARE_INTERRUPT_HANDLER(func)					\
+	__visible void func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER - Define synchronous interrupt handler function
+ * @func:	Function name of the entry point
+ *
+ * @func is called from ASM entry code.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ */
+#define DEFINE_INTERRUPT_HANDLER(func)					\
+static __always_inline void ____##func(struct pt_regs *regs);		\
+									\
+interrupt_handler void func(struct pt_regs *regs)			\
+{									\
+	interrupt_enter_prepare(regs);					\
+									\
+	____##func (regs);						\
+									\
+	interrupt_exit_prepare(regs);					\
+}									\
+NOKPROBE_SYMBOL(func);							\
+									\
+static __always_inline void ____##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_INTERRUPT_HANDLER_RET - Declare synchronous interrupt handler function
+ * @func:	Function name of the entry point
+ * @returns:	Returns a value back to asm caller
+ */
+#define DECLARE_INTERRUPT_HANDLER_RET(func)				\
+	__visible long func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER_RET - Define synchronous interrupt handler function
+ * @func:	Function name of the entry point
+ * @returns:	Returns a value back to asm caller
+ *
+ * @func is called from ASM entry code.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ */
+#define DEFINE_INTERRUPT_HANDLER_RET(func)				\
+static __always_inline long ____##func(struct pt_regs *regs);		\
+									\
+interrupt_handler long func(struct pt_regs *regs)			\
+{									\
+	long ret;							\
+									\
+	interrupt_enter_prepare(regs);					\
+									\
+	ret = ____##func (regs);					\
+									\
+	interrupt_exit_prepare(regs);					\
+									\
+	return ret;							\
+}									\
+NOKPROBE_SYMBOL(func);							\
+									\
+static __always_inline long ____##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_INTERRUPT_HANDLER_ASYNC - Declare asynchronous interrupt handler function
+ * @func:	Function name of the entry point
+ */
+#define DECLARE_INTERRUPT_HANDLER_ASYNC(func)				\
+	__visible void func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER_ASYNC - Define asynchronous interrupt handler function
+ * @func:	Function name of the entry point
+ *
+ * @func is called from ASM entry code.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ */
+#define DEFINE_INTERRUPT_HANDLER_ASYNC(func)				\
+static __always_inline void ____##func(struct pt_regs *regs);		\
+									\
+interrupt_handler void func(struct pt_regs *regs)			\
+{									\
+	interrupt_async_enter_prepare(regs);				\
+									\
+	____##func (regs);						\
+									\
+	interrupt_async_exit_prepare(regs);				\
+}									\
+NOKPROBE_SYMBOL(func);							\
+									\
+static __always_inline void ____##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_INTERRUPT_HANDLER_NMI - Declare NMI interrupt handler function
+ * @func:	Function name of the entry point
+ * @returns:	Returns a value back to asm caller
+ */
+#define DECLARE_INTERRUPT_HANDLER_NMI(func)				\
+	__visible long func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER_NMI - Define NMI interrupt handler function
+ * @func:	Function name of the entry point
+ * @returns:	Returns a value back to asm caller
+ *
+ * @func is called from ASM entry code.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ */
+#define DEFINE_INTERRUPT_HANDLER_NMI(func)				\
+static __always_inline __no_sanitize_address __no_kcsan long		\
+____##func(struct pt_regs *regs);					\
+									\
+interrupt_handler long func(struct pt_regs *regs)			\
+{									\
+	struct interrupt_nmi_state state;				\
+	long ret;							\
+									\
+	interrupt_nmi_enter_prepare(regs, &state);			\
+									\
+	ret = ____##func (regs);					\
+									\
+	interrupt_nmi_exit_prepare(regs, &state);			\
+									\
+	return ret;							\
+}									\
+NOKPROBE_SYMBOL(func);							\
+									\
+static __always_inline  __no_sanitize_address __no_kcsan long		\
+____##func(struct pt_regs *regs)
+
+
+/* Interrupt handlers */
+/* kernel/traps.c */
+DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception);
+#ifdef CONFIG_PPC_BOOK3S_64
+DECLARE_INTERRUPT_HANDLER_RAW(machine_check_early_boot);
+DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async);
+#endif
+DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception);
+DECLARE_INTERRUPT_HANDLER(SMIException);
+DECLARE_INTERRUPT_HANDLER(handle_hmi_exception);
+DECLARE_INTERRUPT_HANDLER(unknown_exception);
+DECLARE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception);
+DECLARE_INTERRUPT_HANDLER_NMI(unknown_nmi_exception);
+DECLARE_INTERRUPT_HANDLER(instruction_breakpoint_exception);
+DECLARE_INTERRUPT_HANDLER(RunModeException);
+DECLARE_INTERRUPT_HANDLER(single_step_exception);
+DECLARE_INTERRUPT_HANDLER(program_check_exception);
+DECLARE_INTERRUPT_HANDLER(emulation_assist_interrupt);
+DECLARE_INTERRUPT_HANDLER(alignment_exception);
+DECLARE_INTERRUPT_HANDLER(StackOverflow);
+DECLARE_INTERRUPT_HANDLER(stack_overflow_exception);
+DECLARE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception);
+DECLARE_INTERRUPT_HANDLER(altivec_unavailable_exception);
+DECLARE_INTERRUPT_HANDLER(vsx_unavailable_exception);
+DECLARE_INTERRUPT_HANDLER(facility_unavailable_exception);
+DECLARE_INTERRUPT_HANDLER(fp_unavailable_tm);
+DECLARE_INTERRUPT_HANDLER(altivec_unavailable_tm);
+DECLARE_INTERRUPT_HANDLER(vsx_unavailable_tm);
+DECLARE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi);
+DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async);
+DECLARE_INTERRUPT_HANDLER_RAW(performance_monitor_exception);
+DECLARE_INTERRUPT_HANDLER(DebugException);
+DECLARE_INTERRUPT_HANDLER(altivec_assist_exception);
+DECLARE_INTERRUPT_HANDLER(CacheLockingException);
+DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException);
+DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException);
+DECLARE_INTERRUPT_HANDLER_NMI(WatchdogException);
+DECLARE_INTERRUPT_HANDLER(kernel_bad_stack);
+
+/* slb.c */
+DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault);
+DECLARE_INTERRUPT_HANDLER(do_bad_segment_interrupt);
+
+/* hash_utils.c */
+DECLARE_INTERRUPT_HANDLER(do_hash_fault);
+
+/* fault.c */
+DECLARE_INTERRUPT_HANDLER(do_page_fault);
+DECLARE_INTERRUPT_HANDLER(do_bad_page_fault_segv);
+
+/* process.c */
+DECLARE_INTERRUPT_HANDLER(do_break);
+
+/* time.c */
+DECLARE_INTERRUPT_HANDLER_ASYNC(timer_interrupt);
+
+/* mce.c */
+DECLARE_INTERRUPT_HANDLER_NMI(machine_check_early);
+DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode);
+
+DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException);
+
+/* irq.c */
+DECLARE_INTERRUPT_HANDLER_ASYNC(do_IRQ);
+
+void __noreturn unrecoverable_exception(struct pt_regs *regs);
+
+void replay_system_reset(void);
+void replay_soft_interrupts(void);
+
+static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs)
+{
+	if (!arch_irq_disabled_regs(regs))
+		local_irq_enable();
+}
+
+long system_call_exception(struct pt_regs *regs, unsigned long r0);
+notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv);
+notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs);
+notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs);
+#ifdef CONFIG_PPC64
+unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs);
+unsigned long interrupt_exit_user_restart(struct pt_regs *regs);
+unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs);
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_INTERRUPT_H */
diff --git a/arch/powerpc/include/asm/io-defs.h b/arch/powerpc/include/asm/io-defs.h
new file mode 100644
index 0000000000..faf8617cc5
--- /dev/null
+++ b/arch/powerpc/include/asm/io-defs.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* This file is meant to be include multiple times by other headers */
+/* last 2 argments are used by platforms/cell/io-workarounds.[ch] */
+
+DEF_PCI_AC_RET(readb, u8, (const PCI_IO_ADDR addr), (addr), mem, addr)
+DEF_PCI_AC_RET(readw, u16, (const PCI_IO_ADDR addr), (addr), mem, addr)
+DEF_PCI_AC_RET(readl, u32, (const PCI_IO_ADDR addr), (addr), mem, addr)
+DEF_PCI_AC_RET(readw_be, u16, (const PCI_IO_ADDR addr), (addr), mem, addr)
+DEF_PCI_AC_RET(readl_be, u32, (const PCI_IO_ADDR addr), (addr), mem, addr)
+DEF_PCI_AC_NORET(writeb, (u8 val, PCI_IO_ADDR addr), (val, addr), mem, addr)
+DEF_PCI_AC_NORET(writew, (u16 val, PCI_IO_ADDR addr), (val, addr), mem, addr)
+DEF_PCI_AC_NORET(writel, (u32 val, PCI_IO_ADDR addr), (val, addr), mem, addr)
+DEF_PCI_AC_NORET(writew_be, (u16 val, PCI_IO_ADDR addr), (val, addr), mem, addr)
+DEF_PCI_AC_NORET(writel_be, (u32 val, PCI_IO_ADDR addr), (val, addr), mem, addr)
+
+#ifdef __powerpc64__
+DEF_PCI_AC_RET(readq, u64, (const PCI_IO_ADDR addr), (addr), mem, addr)
+DEF_PCI_AC_RET(readq_be, u64, (const PCI_IO_ADDR addr), (addr), mem, addr)
+DEF_PCI_AC_NORET(writeq, (u64 val, PCI_IO_ADDR addr), (val, addr), mem, addr)
+DEF_PCI_AC_NORET(writeq_be, (u64 val, PCI_IO_ADDR addr), (val, addr), mem, addr)
+#endif /* __powerpc64__ */
+
+DEF_PCI_AC_RET(inb, u8, (unsigned long port), (port), pio, port)
+DEF_PCI_AC_RET(inw, u16, (unsigned long port), (port), pio, port)
+DEF_PCI_AC_RET(inl, u32, (unsigned long port), (port), pio, port)
+DEF_PCI_AC_NORET(outb, (u8 val, unsigned long port), (val, port), pio, port)
+DEF_PCI_AC_NORET(outw, (u16 val, unsigned long port), (val, port), pio, port)
+DEF_PCI_AC_NORET(outl, (u32 val, unsigned long port), (val, port), pio, port)
+
+DEF_PCI_AC_NORET(readsb, (const PCI_IO_ADDR a, void *b, unsigned long c),
+		 (a, b, c), mem, a)
+DEF_PCI_AC_NORET(readsw, (const PCI_IO_ADDR a, void *b, unsigned long c),
+		 (a, b, c), mem, a)
+DEF_PCI_AC_NORET(readsl, (const PCI_IO_ADDR a, void *b, unsigned long c),
+		 (a, b, c), mem, a)
+DEF_PCI_AC_NORET(writesb, (PCI_IO_ADDR a, const void *b, unsigned long c),
+		 (a, b, c), mem, a)
+DEF_PCI_AC_NORET(writesw, (PCI_IO_ADDR a, const void *b, unsigned long c),
+		 (a, b, c), mem, a)
+DEF_PCI_AC_NORET(writesl, (PCI_IO_ADDR a, const void *b, unsigned long c),
+		 (a, b, c), mem, a)
+
+DEF_PCI_AC_NORET(insb, (unsigned long p, void *b, unsigned long c),
+		 (p, b, c), pio, p)
+DEF_PCI_AC_NORET(insw, (unsigned long p, void *b, unsigned long c),
+		 (p, b, c), pio, p)
+DEF_PCI_AC_NORET(insl, (unsigned long p, void *b, unsigned long c),
+		 (p, b, c), pio, p)
+DEF_PCI_AC_NORET(outsb, (unsigned long p, const void *b, unsigned long c),
+		 (p, b, c), pio, p)
+DEF_PCI_AC_NORET(outsw, (unsigned long p, const void *b, unsigned long c),
+		 (p, b, c), pio, p)
+DEF_PCI_AC_NORET(outsl, (unsigned long p, const void *b, unsigned long c),
+		 (p, b, c), pio, p)
+
+DEF_PCI_AC_NORET(memset_io, (PCI_IO_ADDR a, int c, unsigned long n),
+		 (a, c, n), mem, a)
+DEF_PCI_AC_NORET(memcpy_fromio, (void *d, const PCI_IO_ADDR s, unsigned long n),
+		 (d, s, n), mem, s)
+DEF_PCI_AC_NORET(memcpy_toio, (PCI_IO_ADDR d, const void *s, unsigned long n),
+		 (d, s, n), mem, d)
diff --git a/arch/powerpc/include/asm/io-workarounds.h b/arch/powerpc/include/asm/io-workarounds.h
new file mode 100644
index 0000000000..3cce499fbe
--- /dev/null
+++ b/arch/powerpc/include/asm/io-workarounds.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Support PCI IO workaround
+ *
+ * (C) Copyright 2007-2008 TOSHIBA CORPORATION
+ */
+
+#ifndef _IO_WORKAROUNDS_H
+#define _IO_WORKAROUNDS_H
+
+#ifdef CONFIG_PPC_IO_WORKAROUNDS
+#include <linux/io.h>
+#include <asm/pci-bridge.h>
+
+/* Bus info */
+struct iowa_bus {
+	struct pci_controller *phb;
+	struct ppc_pci_io *ops;
+	void   *private;
+};
+
+void iowa_register_bus(struct pci_controller *, struct ppc_pci_io *,
+		       int (*)(struct iowa_bus *, void *), void *);
+struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR);
+struct iowa_bus *iowa_pio_find_bus(unsigned long);
+
+extern struct ppc_pci_io spiderpci_ops;
+extern int spiderpci_iowa_init(struct iowa_bus *, void *);
+
+#define SPIDER_PCI_REG_BASE		0xd000
+#define SPIDER_PCI_REG_SIZE		0x1000
+#define SPIDER_PCI_VCI_CNTL_STAT	0x0110
+#define SPIDER_PCI_DUMMY_READ		0x0810
+#define SPIDER_PCI_DUMMY_READ_BASE	0x0814
+
+#endif
+
+#if defined(CONFIG_PPC_IO_WORKAROUNDS) && defined(CONFIG_PPC_INDIRECT_MMIO)
+extern bool io_workaround_inited;
+
+static inline bool iowa_is_active(void)
+{
+	return unlikely(io_workaround_inited);
+}
+#else
+static inline bool iowa_is_active(void)
+{
+	return false;
+}
+#endif
+
+void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
+			   pgprot_t prot, void *caller);
+
+#endif /* _IO_WORKAROUNDS_H */
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
new file mode 100644
index 0000000000..0732b743e0
--- /dev/null
+++ b/arch/powerpc/include/asm/io.h
@@ -0,0 +1,1054 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_IO_H
+#define _ASM_POWERPC_IO_H
+#ifdef __KERNEL__
+
+/*
+ */
+
+/* Check of existence of legacy devices */
+extern int check_legacy_ioport(unsigned long base_port);
+#define I8042_DATA_REG	0x60
+#define FDC_BASE	0x3f0
+
+#if defined(CONFIG_PPC64) && defined(CONFIG_PCI)
+extern struct pci_dev *isa_bridge_pcidev;
+/*
+ * has legacy ISA devices ?
+ */
+#define arch_has_dev_port()	(isa_bridge_pcidev != NULL || isa_io_special)
+#endif
+
+#include <linux/device.h>
+#include <linux/compiler.h>
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <asm/byteorder.h>
+#include <asm/synch.h>
+#include <asm/delay.h>
+#include <asm/mmiowb.h>
+#include <asm/mmu.h>
+
+#define SIO_CONFIG_RA	0x398
+#define SIO_CONFIG_RD	0x399
+
+/* 32 bits uses slightly different variables for the various IO
+ * bases. Most of this file only uses _IO_BASE though which we
+ * define properly based on the platform
+ */
+#ifndef CONFIG_PCI
+#define _IO_BASE	0
+#define _ISA_MEM_BASE	0
+#define PCI_DRAM_OFFSET 0
+#elif defined(CONFIG_PPC32)
+#define _IO_BASE	isa_io_base
+#define _ISA_MEM_BASE	isa_mem_base
+#define PCI_DRAM_OFFSET	pci_dram_offset
+#else
+#define _IO_BASE	pci_io_base
+#define _ISA_MEM_BASE	isa_mem_base
+#define PCI_DRAM_OFFSET	0
+#endif
+
+extern unsigned long isa_io_base;
+extern unsigned long pci_io_base;
+extern unsigned long pci_dram_offset;
+
+extern resource_size_t isa_mem_base;
+
+/* Boolean set by platform if PIO accesses are suppored while _IO_BASE
+ * is not set or addresses cannot be translated to MMIO. This is typically
+ * set when the platform supports "special" PIO accesses via a non memory
+ * mapped mechanism, and allows things like the early udbg UART code to
+ * function.
+ */
+extern bool isa_io_special;
+
+#ifdef CONFIG_PPC32
+#if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO)
+#error CONFIG_PPC_INDIRECT_{PIO,MMIO} are not yet supported on 32 bits
+#endif
+#endif
+
+/*
+ *
+ * Low level MMIO accessors
+ *
+ * This provides the non-bus specific accessors to MMIO. Those are PowerPC
+ * specific and thus shouldn't be used in generic code. The accessors
+ * provided here are:
+ *
+ *	in_8, in_le16, in_be16, in_le32, in_be32, in_le64, in_be64
+ *	out_8, out_le16, out_be16, out_le32, out_be32, out_le64, out_be64
+ *	_insb, _insw_ns, _insl_ns, _outsb, _outsw_ns, _outsl_ns
+ *
+ * Those operate directly on a kernel virtual address. Note that the prototype
+ * for the out_* accessors has the arguments in opposite order from the usual
+ * linux PCI accessors. Unlike those, they take the address first and the value
+ * next.
+ *
+ * Note: I might drop the _ns suffix on the stream operations soon as it is
+ * simply normal for stream operations to not swap in the first place.
+ *
+ */
+
+/* -mprefixed can generate offsets beyond range, fall back hack */
+#ifdef CONFIG_PPC_KERNEL_PREFIXED
+#define DEF_MMIO_IN_X(name, size, insn)				\
+static inline u##size name(const volatile u##size __iomem *addr)	\
+{									\
+	u##size ret;							\
+	__asm__ __volatile__("sync;"#insn" %0,0,%1;twi 0,%0,0;isync"	\
+		: "=r" (ret) : "r" (addr) : "memory");			\
+	return ret;							\
+}
+
+#define DEF_MMIO_OUT_X(name, size, insn)				\
+static inline void name(volatile u##size __iomem *addr, u##size val)	\
+{									\
+	__asm__ __volatile__("sync;"#insn" %1,0,%0"			\
+		: : "r" (addr), "r" (val) : "memory");			\
+	mmiowb_set_pending();						\
+}
+
+#define DEF_MMIO_IN_D(name, size, insn)				\
+static inline u##size name(const volatile u##size __iomem *addr)	\
+{									\
+	u##size ret;							\
+	__asm__ __volatile__("sync;"#insn" %0,0(%1);twi 0,%0,0;isync"\
+		: "=r" (ret) : "b" (addr) : "memory");	\
+	return ret;							\
+}
+
+#define DEF_MMIO_OUT_D(name, size, insn)				\
+static inline void name(volatile u##size __iomem *addr, u##size val)	\
+{									\
+	__asm__ __volatile__("sync;"#insn" %1,0(%0)"			\
+		: : "b" (addr), "r" (val) : "memory");	\
+	mmiowb_set_pending();						\
+}
+#else
+#define DEF_MMIO_IN_X(name, size, insn)				\
+static inline u##size name(const volatile u##size __iomem *addr)	\
+{									\
+	u##size ret;							\
+	__asm__ __volatile__("sync;"#insn" %0,%y1;twi 0,%0,0;isync"	\
+		: "=r" (ret) : "Z" (*addr) : "memory");			\
+	return ret;							\
+}
+
+#define DEF_MMIO_OUT_X(name, size, insn)				\
+static inline void name(volatile u##size __iomem *addr, u##size val)	\
+{									\
+	__asm__ __volatile__("sync;"#insn" %1,%y0"			\
+		: "=Z" (*addr) : "r" (val) : "memory");			\
+	mmiowb_set_pending();						\
+}
+
+#define DEF_MMIO_IN_D(name, size, insn)				\
+static inline u##size name(const volatile u##size __iomem *addr)	\
+{									\
+	u##size ret;							\
+	__asm__ __volatile__("sync;"#insn"%U1%X1 %0,%1;twi 0,%0,0;isync"\
+		: "=r" (ret) : "m<>" (*addr) : "memory");	\
+	return ret;							\
+}
+
+#define DEF_MMIO_OUT_D(name, size, insn)				\
+static inline void name(volatile u##size __iomem *addr, u##size val)	\
+{									\
+	__asm__ __volatile__("sync;"#insn"%U0%X0 %1,%0"			\
+		: "=m<>" (*addr) : "r" (val) : "memory");	\
+	mmiowb_set_pending();						\
+}
+#endif
+
+DEF_MMIO_IN_D(in_8,     8, lbz);
+DEF_MMIO_OUT_D(out_8,   8, stb);
+
+#ifdef __BIG_ENDIAN__
+DEF_MMIO_IN_D(in_be16, 16, lhz);
+DEF_MMIO_IN_D(in_be32, 32, lwz);
+DEF_MMIO_IN_X(in_le16, 16, lhbrx);
+DEF_MMIO_IN_X(in_le32, 32, lwbrx);
+
+DEF_MMIO_OUT_D(out_be16, 16, sth);
+DEF_MMIO_OUT_D(out_be32, 32, stw);
+DEF_MMIO_OUT_X(out_le16, 16, sthbrx);
+DEF_MMIO_OUT_X(out_le32, 32, stwbrx);
+#else
+DEF_MMIO_IN_X(in_be16, 16, lhbrx);
+DEF_MMIO_IN_X(in_be32, 32, lwbrx);
+DEF_MMIO_IN_D(in_le16, 16, lhz);
+DEF_MMIO_IN_D(in_le32, 32, lwz);
+
+DEF_MMIO_OUT_X(out_be16, 16, sthbrx);
+DEF_MMIO_OUT_X(out_be32, 32, stwbrx);
+DEF_MMIO_OUT_D(out_le16, 16, sth);
+DEF_MMIO_OUT_D(out_le32, 32, stw);
+
+#endif /* __BIG_ENDIAN */
+
+#ifdef __powerpc64__
+
+#ifdef __BIG_ENDIAN__
+DEF_MMIO_OUT_D(out_be64, 64, std);
+DEF_MMIO_IN_D(in_be64, 64, ld);
+
+/* There is no asm instructions for 64 bits reverse loads and stores */
+static inline u64 in_le64(const volatile u64 __iomem *addr)
+{
+	return swab64(in_be64(addr));
+}
+
+static inline void out_le64(volatile u64 __iomem *addr, u64 val)
+{
+	out_be64(addr, swab64(val));
+}
+#else
+DEF_MMIO_OUT_D(out_le64, 64, std);
+DEF_MMIO_IN_D(in_le64, 64, ld);
+
+/* There is no asm instructions for 64 bits reverse loads and stores */
+static inline u64 in_be64(const volatile u64 __iomem *addr)
+{
+	return swab64(in_le64(addr));
+}
+
+static inline void out_be64(volatile u64 __iomem *addr, u64 val)
+{
+	out_le64(addr, swab64(val));
+}
+
+#endif
+#endif /* __powerpc64__ */
+
+/*
+ * Low level IO stream instructions are defined out of line for now
+ */
+extern void _insb(const volatile u8 __iomem *addr, void *buf, long count);
+extern void _outsb(volatile u8 __iomem *addr,const void *buf,long count);
+extern void _insw_ns(const volatile u16 __iomem *addr, void *buf, long count);
+extern void _outsw_ns(volatile u16 __iomem *addr, const void *buf, long count);
+extern void _insl_ns(const volatile u32 __iomem *addr, void *buf, long count);
+extern void _outsl_ns(volatile u32 __iomem *addr, const void *buf, long count);
+
+/* The _ns naming is historical and will be removed. For now, just #define
+ * the non _ns equivalent names
+ */
+#define _insw	_insw_ns
+#define _insl	_insl_ns
+#define _outsw	_outsw_ns
+#define _outsl	_outsl_ns
+
+
+/*
+ * memset_io, memcpy_toio, memcpy_fromio base implementations are out of line
+ */
+
+extern void _memset_io(volatile void __iomem *addr, int c, unsigned long n);
+extern void _memcpy_fromio(void *dest, const volatile void __iomem *src,
+			   unsigned long n);
+extern void _memcpy_toio(volatile void __iomem *dest, const void *src,
+			 unsigned long n);
+
+/*
+ *
+ * PCI and standard ISA accessors
+ *
+ * Those are globally defined linux accessors for devices on PCI or ISA
+ * busses. They follow the Linux defined semantics. The current implementation
+ * for PowerPC is as close as possible to the x86 version of these, and thus
+ * provides fairly heavy weight barriers for the non-raw versions
+ *
+ * In addition, they support a hook mechanism when CONFIG_PPC_INDIRECT_MMIO
+ * or CONFIG_PPC_INDIRECT_PIO are set allowing the platform to provide its
+ * own implementation of some or all of the accessors.
+ */
+
+/*
+ * Include the EEH definitions when EEH is enabled only so they don't get
+ * in the way when building for 32 bits
+ */
+#ifdef CONFIG_EEH
+#include <asm/eeh.h>
+#endif
+
+/* Shortcut to the MMIO argument pointer */
+#define PCI_IO_ADDR	volatile void __iomem *
+
+/* Indirect IO address tokens:
+ *
+ * When CONFIG_PPC_INDIRECT_MMIO is set, the platform can provide hooks
+ * on all MMIOs. (Note that this is all 64 bits only for now)
+ *
+ * To help platforms who may need to differentiate MMIO addresses in
+ * their hooks, a bitfield is reserved for use by the platform near the
+ * top of MMIO addresses (not PIO, those have to cope the hard way).
+ *
+ * The highest address in the kernel virtual space are:
+ *
+ *  d0003fffffffffff	# with Hash MMU
+ *  c00fffffffffffff	# with Radix MMU
+ *
+ * The top 4 bits are reserved as the region ID on hash, leaving us 8 bits
+ * that can be used for the field.
+ *
+ * The direct IO mapping operations will then mask off those bits
+ * before doing the actual access, though that only happen when
+ * CONFIG_PPC_INDIRECT_MMIO is set, thus be careful when you use that
+ * mechanism
+ *
+ * For PIO, there is a separate CONFIG_PPC_INDIRECT_PIO which makes
+ * all PIO functions call through a hook.
+ */
+
+#ifdef CONFIG_PPC_INDIRECT_MMIO
+#define PCI_IO_IND_TOKEN_SHIFT	52
+#define PCI_IO_IND_TOKEN_MASK	(0xfful << PCI_IO_IND_TOKEN_SHIFT)
+#define PCI_FIX_ADDR(addr)						\
+	((PCI_IO_ADDR)(((unsigned long)(addr)) & ~PCI_IO_IND_TOKEN_MASK))
+#define PCI_GET_ADDR_TOKEN(addr)					\
+	(((unsigned long)(addr) & PCI_IO_IND_TOKEN_MASK) >> 		\
+		PCI_IO_IND_TOKEN_SHIFT)
+#define PCI_SET_ADDR_TOKEN(addr, token) 				\
+do {									\
+	unsigned long __a = (unsigned long)(addr);			\
+	__a &= ~PCI_IO_IND_TOKEN_MASK;					\
+	__a |= ((unsigned long)(token)) << PCI_IO_IND_TOKEN_SHIFT;	\
+	(addr) = (void __iomem *)__a;					\
+} while(0)
+#else
+#define PCI_FIX_ADDR(addr) (addr)
+#endif
+
+
+/*
+ * Non ordered and non-swapping "raw" accessors
+ */
+
+static inline unsigned char __raw_readb(const volatile void __iomem *addr)
+{
+	return *(volatile unsigned char __force *)PCI_FIX_ADDR(addr);
+}
+#define __raw_readb __raw_readb
+
+static inline unsigned short __raw_readw(const volatile void __iomem *addr)
+{
+	return *(volatile unsigned short __force *)PCI_FIX_ADDR(addr);
+}
+#define __raw_readw __raw_readw
+
+static inline unsigned int __raw_readl(const volatile void __iomem *addr)
+{
+	return *(volatile unsigned int __force *)PCI_FIX_ADDR(addr);
+}
+#define __raw_readl __raw_readl
+
+static inline void __raw_writeb(unsigned char v, volatile void __iomem *addr)
+{
+	*(volatile unsigned char __force *)PCI_FIX_ADDR(addr) = v;
+}
+#define __raw_writeb __raw_writeb
+
+static inline void __raw_writew(unsigned short v, volatile void __iomem *addr)
+{
+	*(volatile unsigned short __force *)PCI_FIX_ADDR(addr) = v;
+}
+#define __raw_writew __raw_writew
+
+static inline void __raw_writel(unsigned int v, volatile void __iomem *addr)
+{
+	*(volatile unsigned int __force *)PCI_FIX_ADDR(addr) = v;
+}
+#define __raw_writel __raw_writel
+
+#ifdef __powerpc64__
+static inline unsigned long __raw_readq(const volatile void __iomem *addr)
+{
+	return *(volatile unsigned long __force *)PCI_FIX_ADDR(addr);
+}
+#define __raw_readq __raw_readq
+
+static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr)
+{
+	*(volatile unsigned long __force *)PCI_FIX_ADDR(addr) = v;
+}
+#define __raw_writeq __raw_writeq
+
+static inline void __raw_writeq_be(unsigned long v, volatile void __iomem *addr)
+{
+	__raw_writeq((__force unsigned long)cpu_to_be64(v), addr);
+}
+#define __raw_writeq_be __raw_writeq_be
+
+/*
+ * Real mode versions of the above. Those instructions are only supposed
+ * to be used in hypervisor real mode as per the architecture spec.
+ */
+static inline void __raw_rm_writeb(u8 val, volatile void __iomem *paddr)
+{
+	__asm__ __volatile__(".machine push;   \
+			      .machine power6; \
+			      stbcix %0,0,%1;  \
+			      .machine pop;"
+		: : "r" (val), "r" (paddr) : "memory");
+}
+
+static inline void __raw_rm_writew(u16 val, volatile void __iomem *paddr)
+{
+	__asm__ __volatile__(".machine push;   \
+			      .machine power6; \
+			      sthcix %0,0,%1;  \
+			      .machine pop;"
+		: : "r" (val), "r" (paddr) : "memory");
+}
+
+static inline void __raw_rm_writel(u32 val, volatile void __iomem *paddr)
+{
+	__asm__ __volatile__(".machine push;   \
+			      .machine power6; \
+			      stwcix %0,0,%1;  \
+			      .machine pop;"
+		: : "r" (val), "r" (paddr) : "memory");
+}
+
+static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
+{
+	__asm__ __volatile__(".machine push;   \
+			      .machine power6; \
+			      stdcix %0,0,%1;  \
+			      .machine pop;"
+		: : "r" (val), "r" (paddr) : "memory");
+}
+
+static inline void __raw_rm_writeq_be(u64 val, volatile void __iomem *paddr)
+{
+	__raw_rm_writeq((__force u64)cpu_to_be64(val), paddr);
+}
+
+static inline u8 __raw_rm_readb(volatile void __iomem *paddr)
+{
+	u8 ret;
+	__asm__ __volatile__(".machine push;   \
+			      .machine power6; \
+			      lbzcix %0,0, %1; \
+			      .machine pop;"
+			     : "=r" (ret) : "r" (paddr) : "memory");
+	return ret;
+}
+
+static inline u16 __raw_rm_readw(volatile void __iomem *paddr)
+{
+	u16 ret;
+	__asm__ __volatile__(".machine push;   \
+			      .machine power6; \
+			      lhzcix %0,0, %1; \
+			      .machine pop;"
+			     : "=r" (ret) : "r" (paddr) : "memory");
+	return ret;
+}
+
+static inline u32 __raw_rm_readl(volatile void __iomem *paddr)
+{
+	u32 ret;
+	__asm__ __volatile__(".machine push;   \
+			      .machine power6; \
+			      lwzcix %0,0, %1; \
+			      .machine pop;"
+			     : "=r" (ret) : "r" (paddr) : "memory");
+	return ret;
+}
+
+static inline u64 __raw_rm_readq(volatile void __iomem *paddr)
+{
+	u64 ret;
+	__asm__ __volatile__(".machine push;   \
+			      .machine power6; \
+			      ldcix %0,0, %1;  \
+			      .machine pop;"
+			     : "=r" (ret) : "r" (paddr) : "memory");
+	return ret;
+}
+#endif /* __powerpc64__ */
+
+/*
+ *
+ * PCI PIO and MMIO accessors.
+ *
+ *
+ * On 32 bits, PIO operations have a recovery mechanism in case they trigger
+ * machine checks (which they occasionally do when probing non existing
+ * IO ports on some platforms, like PowerMac and 8xx).
+ * I always found it to be of dubious reliability and I am tempted to get
+ * rid of it one of these days. So if you think it's important to keep it,
+ * please voice up asap. We never had it for 64 bits and I do not intend
+ * to port it over
+ */
+
+#ifdef CONFIG_PPC32
+
+#define __do_in_asm(name, op)				\
+static inline unsigned int name(unsigned int port)	\
+{							\
+	unsigned int x;					\
+	__asm__ __volatile__(				\
+		"sync\n"				\
+		"0:"	op "	%0,0,%1\n"		\
+		"1:	twi	0,%0,0\n"		\
+		"2:	isync\n"			\
+		"3:	nop\n"				\
+		"4:\n"					\
+		".section .fixup,\"ax\"\n"		\
+		"5:	li	%0,-1\n"		\
+		"	b	4b\n"			\
+		".previous\n"				\
+		EX_TABLE(0b, 5b)			\
+		EX_TABLE(1b, 5b)			\
+		EX_TABLE(2b, 5b)			\
+		EX_TABLE(3b, 5b)			\
+		: "=&r" (x)				\
+		: "r" (port + _IO_BASE)			\
+		: "memory");  				\
+	return x;					\
+}
+
+#define __do_out_asm(name, op)				\
+static inline void name(unsigned int val, unsigned int port) \
+{							\
+	__asm__ __volatile__(				\
+		"sync\n"				\
+		"0:" op " %0,0,%1\n"			\
+		"1:	sync\n"				\
+		"2:\n"					\
+		EX_TABLE(0b, 2b)			\
+		EX_TABLE(1b, 2b)			\
+		: : "r" (val), "r" (port + _IO_BASE)	\
+		: "memory");   	   	   		\
+}
+
+__do_in_asm(_rec_inb, "lbzx")
+__do_in_asm(_rec_inw, "lhbrx")
+__do_in_asm(_rec_inl, "lwbrx")
+__do_out_asm(_rec_outb, "stbx")
+__do_out_asm(_rec_outw, "sthbrx")
+__do_out_asm(_rec_outl, "stwbrx")
+
+#endif /* CONFIG_PPC32 */
+
+/* The "__do_*" operations below provide the actual "base" implementation
+ * for each of the defined accessors. Some of them use the out_* functions
+ * directly, some of them still use EEH, though we might change that in the
+ * future. Those macros below provide the necessary argument swapping and
+ * handling of the IO base for PIO.
+ *
+ * They are themselves used by the macros that define the actual accessors
+ * and can be used by the hooks if any.
+ *
+ * Note that PIO operations are always defined in terms of their corresonding
+ * MMIO operations. That allows platforms like iSeries who want to modify the
+ * behaviour of both to only hook on the MMIO version and get both. It's also
+ * possible to hook directly at the toplevel PIO operation if they have to
+ * be handled differently
+ */
+#define __do_writeb(val, addr)	out_8(PCI_FIX_ADDR(addr), val)
+#define __do_writew(val, addr)	out_le16(PCI_FIX_ADDR(addr), val)
+#define __do_writel(val, addr)	out_le32(PCI_FIX_ADDR(addr), val)
+#define __do_writeq(val, addr)	out_le64(PCI_FIX_ADDR(addr), val)
+#define __do_writew_be(val, addr) out_be16(PCI_FIX_ADDR(addr), val)
+#define __do_writel_be(val, addr) out_be32(PCI_FIX_ADDR(addr), val)
+#define __do_writeq_be(val, addr) out_be64(PCI_FIX_ADDR(addr), val)
+
+#ifdef CONFIG_EEH
+#define __do_readb(addr)	eeh_readb(PCI_FIX_ADDR(addr))
+#define __do_readw(addr)	eeh_readw(PCI_FIX_ADDR(addr))
+#define __do_readl(addr)	eeh_readl(PCI_FIX_ADDR(addr))
+#define __do_readq(addr)	eeh_readq(PCI_FIX_ADDR(addr))
+#define __do_readw_be(addr)	eeh_readw_be(PCI_FIX_ADDR(addr))
+#define __do_readl_be(addr)	eeh_readl_be(PCI_FIX_ADDR(addr))
+#define __do_readq_be(addr)	eeh_readq_be(PCI_FIX_ADDR(addr))
+#else /* CONFIG_EEH */
+#define __do_readb(addr)	in_8(PCI_FIX_ADDR(addr))
+#define __do_readw(addr)	in_le16(PCI_FIX_ADDR(addr))
+#define __do_readl(addr)	in_le32(PCI_FIX_ADDR(addr))
+#define __do_readq(addr)	in_le64(PCI_FIX_ADDR(addr))
+#define __do_readw_be(addr)	in_be16(PCI_FIX_ADDR(addr))
+#define __do_readl_be(addr)	in_be32(PCI_FIX_ADDR(addr))
+#define __do_readq_be(addr)	in_be64(PCI_FIX_ADDR(addr))
+#endif /* !defined(CONFIG_EEH) */
+
+#ifdef CONFIG_PPC32
+#define __do_outb(val, port)	_rec_outb(val, port)
+#define __do_outw(val, port)	_rec_outw(val, port)
+#define __do_outl(val, port)	_rec_outl(val, port)
+#define __do_inb(port)		_rec_inb(port)
+#define __do_inw(port)		_rec_inw(port)
+#define __do_inl(port)		_rec_inl(port)
+#else /* CONFIG_PPC32 */
+#define __do_outb(val, port)	writeb(val,(PCI_IO_ADDR)_IO_BASE+port);
+#define __do_outw(val, port)	writew(val,(PCI_IO_ADDR)_IO_BASE+port);
+#define __do_outl(val, port)	writel(val,(PCI_IO_ADDR)_IO_BASE+port);
+#define __do_inb(port)		readb((PCI_IO_ADDR)_IO_BASE + port);
+#define __do_inw(port)		readw((PCI_IO_ADDR)_IO_BASE + port);
+#define __do_inl(port)		readl((PCI_IO_ADDR)_IO_BASE + port);
+#endif /* !CONFIG_PPC32 */
+
+#ifdef CONFIG_EEH
+#define __do_readsb(a, b, n)	eeh_readsb(PCI_FIX_ADDR(a), (b), (n))
+#define __do_readsw(a, b, n)	eeh_readsw(PCI_FIX_ADDR(a), (b), (n))
+#define __do_readsl(a, b, n)	eeh_readsl(PCI_FIX_ADDR(a), (b), (n))
+#else /* CONFIG_EEH */
+#define __do_readsb(a, b, n)	_insb(PCI_FIX_ADDR(a), (b), (n))
+#define __do_readsw(a, b, n)	_insw(PCI_FIX_ADDR(a), (b), (n))
+#define __do_readsl(a, b, n)	_insl(PCI_FIX_ADDR(a), (b), (n))
+#endif /* !CONFIG_EEH */
+#define __do_writesb(a, b, n)	_outsb(PCI_FIX_ADDR(a),(b),(n))
+#define __do_writesw(a, b, n)	_outsw(PCI_FIX_ADDR(a),(b),(n))
+#define __do_writesl(a, b, n)	_outsl(PCI_FIX_ADDR(a),(b),(n))
+
+#define __do_insb(p, b, n)	readsb((PCI_IO_ADDR)_IO_BASE+(p), (b), (n))
+#define __do_insw(p, b, n)	readsw((PCI_IO_ADDR)_IO_BASE+(p), (b), (n))
+#define __do_insl(p, b, n)	readsl((PCI_IO_ADDR)_IO_BASE+(p), (b), (n))
+#define __do_outsb(p, b, n)	writesb((PCI_IO_ADDR)_IO_BASE+(p),(b),(n))
+#define __do_outsw(p, b, n)	writesw((PCI_IO_ADDR)_IO_BASE+(p),(b),(n))
+#define __do_outsl(p, b, n)	writesl((PCI_IO_ADDR)_IO_BASE+(p),(b),(n))
+
+#define __do_memset_io(addr, c, n)	\
+				_memset_io(PCI_FIX_ADDR(addr), c, n)
+#define __do_memcpy_toio(dst, src, n)	\
+				_memcpy_toio(PCI_FIX_ADDR(dst), src, n)
+
+#ifdef CONFIG_EEH
+#define __do_memcpy_fromio(dst, src, n)	\
+				eeh_memcpy_fromio(dst, PCI_FIX_ADDR(src), n)
+#else /* CONFIG_EEH */
+#define __do_memcpy_fromio(dst, src, n)	\
+				_memcpy_fromio(dst,PCI_FIX_ADDR(src),n)
+#endif /* !CONFIG_EEH */
+
+#ifdef CONFIG_PPC_INDIRECT_PIO
+#define DEF_PCI_HOOK_pio(x)	x
+#else
+#define DEF_PCI_HOOK_pio(x)	NULL
+#endif
+
+#ifdef CONFIG_PPC_INDIRECT_MMIO
+#define DEF_PCI_HOOK_mem(x)	x
+#else
+#define DEF_PCI_HOOK_mem(x)	NULL
+#endif
+
+/* Structure containing all the hooks */
+extern struct ppc_pci_io {
+
+#define DEF_PCI_AC_RET(name, ret, at, al, space, aa)	ret (*name) at;
+#define DEF_PCI_AC_NORET(name, at, al, space, aa)	void (*name) at;
+
+#include <asm/io-defs.h>
+
+#undef DEF_PCI_AC_RET
+#undef DEF_PCI_AC_NORET
+
+} ppc_pci_io;
+
+/* The inline wrappers */
+#define DEF_PCI_AC_RET(name, ret, at, al, space, aa)		\
+static inline ret name at					\
+{								\
+	if (DEF_PCI_HOOK_##space(ppc_pci_io.name) != NULL)	\
+		return ppc_pci_io.name al;			\
+	return __do_##name al;					\
+}
+
+#define DEF_PCI_AC_NORET(name, at, al, space, aa)		\
+static inline void name at					\
+{								\
+	if (DEF_PCI_HOOK_##space(ppc_pci_io.name) != NULL)		\
+		ppc_pci_io.name al;				\
+	else							\
+		__do_##name al;					\
+}
+
+#include <asm/io-defs.h>
+
+#undef DEF_PCI_AC_RET
+#undef DEF_PCI_AC_NORET
+
+/* Some drivers check for the presence of readq & writeq with
+ * a #ifdef, so we make them happy here.
+ */
+#define readb readb
+#define readw readw
+#define readl readl
+#define writeb writeb
+#define writew writew
+#define writel writel
+#define readsb readsb
+#define readsw readsw
+#define readsl readsl
+#define writesb writesb
+#define writesw writesw
+#define writesl writesl
+#define inb inb
+#define inw inw
+#define inl inl
+#define outb outb
+#define outw outw
+#define outl outl
+#define insb insb
+#define insw insw
+#define insl insl
+#define outsb outsb
+#define outsw outsw
+#define outsl outsl
+#ifdef __powerpc64__
+#define readq	readq
+#define writeq	writeq
+#endif
+#define memset_io memset_io
+#define memcpy_fromio memcpy_fromio
+#define memcpy_toio memcpy_toio
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
+/*
+ * We don't do relaxed operations yet, at least not with this semantic
+ */
+#define readb_relaxed(addr)	readb(addr)
+#define readw_relaxed(addr)	readw(addr)
+#define readl_relaxed(addr)	readl(addr)
+#define readq_relaxed(addr)	readq(addr)
+#define writeb_relaxed(v, addr)	writeb(v, addr)
+#define writew_relaxed(v, addr)	writew(v, addr)
+#define writel_relaxed(v, addr)	writel(v, addr)
+#define writeq_relaxed(v, addr)	writeq(v, addr)
+
+#ifndef CONFIG_GENERIC_IOMAP
+/*
+ * Here comes the implementation of the IOMAP interfaces.
+ */
+static inline unsigned int ioread16be(const void __iomem *addr)
+{
+	return readw_be(addr);
+}
+#define ioread16be ioread16be
+
+static inline unsigned int ioread32be(const void __iomem *addr)
+{
+	return readl_be(addr);
+}
+#define ioread32be ioread32be
+
+#ifdef __powerpc64__
+static inline u64 ioread64_lo_hi(const void __iomem *addr)
+{
+	return readq(addr);
+}
+#define ioread64_lo_hi ioread64_lo_hi
+
+static inline u64 ioread64_hi_lo(const void __iomem *addr)
+{
+	return readq(addr);
+}
+#define ioread64_hi_lo ioread64_hi_lo
+
+static inline u64 ioread64be(const void __iomem *addr)
+{
+	return readq_be(addr);
+}
+#define ioread64be ioread64be
+
+static inline u64 ioread64be_lo_hi(const void __iomem *addr)
+{
+	return readq_be(addr);
+}
+#define ioread64be_lo_hi ioread64be_lo_hi
+
+static inline u64 ioread64be_hi_lo(const void __iomem *addr)
+{
+	return readq_be(addr);
+}
+#define ioread64be_hi_lo ioread64be_hi_lo
+#endif /* __powerpc64__ */
+
+static inline void iowrite16be(u16 val, void __iomem *addr)
+{
+	writew_be(val, addr);
+}
+#define iowrite16be iowrite16be
+
+static inline void iowrite32be(u32 val, void __iomem *addr)
+{
+	writel_be(val, addr);
+}
+#define iowrite32be iowrite32be
+
+#ifdef __powerpc64__
+static inline void iowrite64_lo_hi(u64 val, void __iomem *addr)
+{
+	writeq(val, addr);
+}
+#define iowrite64_lo_hi iowrite64_lo_hi
+
+static inline void iowrite64_hi_lo(u64 val, void __iomem *addr)
+{
+	writeq(val, addr);
+}
+#define iowrite64_hi_lo iowrite64_hi_lo
+
+static inline void iowrite64be(u64 val, void __iomem *addr)
+{
+	writeq_be(val, addr);
+}
+#define iowrite64be iowrite64be
+
+static inline void iowrite64be_lo_hi(u64 val, void __iomem *addr)
+{
+	writeq_be(val, addr);
+}
+#define iowrite64be_lo_hi iowrite64be_lo_hi
+
+static inline void iowrite64be_hi_lo(u64 val, void __iomem *addr)
+{
+	writeq_be(val, addr);
+}
+#define iowrite64be_hi_lo iowrite64be_hi_lo
+#endif /* __powerpc64__ */
+
+struct pci_dev;
+void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
+#define pci_iounmap pci_iounmap
+void __iomem *ioport_map(unsigned long port, unsigned int len);
+#define ioport_map ioport_map
+#endif
+
+static inline void iosync(void)
+{
+        __asm__ __volatile__ ("sync" : : : "memory");
+}
+
+/* Enforce in-order execution of data I/O.
+ * No distinction between read/write on PPC; use eieio for all three.
+ * Those are fairly week though. They don't provide a barrier between
+ * MMIO and cacheable storage nor do they provide a barrier vs. locks,
+ * they only provide barriers between 2 __raw MMIO operations and
+ * possibly break write combining.
+ */
+#define iobarrier_rw() eieio()
+#define iobarrier_r()  eieio()
+#define iobarrier_w()  eieio()
+
+
+/*
+ * output pause versions need a delay at least for the
+ * w83c105 ide controller in a p610.
+ */
+#define inb_p(port)             inb(port)
+#define outb_p(val, port)       (udelay(1), outb((val), (port)))
+#define inw_p(port)             inw(port)
+#define outw_p(val, port)       (udelay(1), outw((val), (port)))
+#define inl_p(port)             inl(port)
+#define outl_p(val, port)       (udelay(1), outl((val), (port)))
+
+
+#define IO_SPACE_LIMIT ~(0UL)
+
+/**
+ * ioremap     -   map bus memory into CPU space
+ * @address:   bus address of the memory
+ * @size:      size of the resource to map
+ *
+ * ioremap performs a platform specific sequence of operations to
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
+ * writew/writel functions and the other mmio helpers. The returned
+ * address is not guaranteed to be usable directly as a virtual
+ * address.
+ *
+ * We provide a few variations of it:
+ *
+ * * ioremap is the standard one and provides non-cacheable guarded mappings
+ *   and can be hooked by the platform via ppc_md
+ *
+ * * ioremap_prot allows to specify the page flags as an argument and can
+ *   also be hooked by the platform via ppc_md.
+ *
+ * * ioremap_wc enables write combining
+ *
+ * * ioremap_wt enables write through
+ *
+ * * ioremap_coherent maps coherent cached memory
+ *
+ * * iounmap undoes such a mapping and can be hooked
+ *
+ * * __ioremap_caller is the same as above but takes an explicit caller
+ *   reference rather than using __builtin_return_address(0)
+ *
+ */
+extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
+#define ioremap ioremap
+#define ioremap_prot ioremap_prot
+extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size);
+#define ioremap_wc ioremap_wc
+
+#ifdef CONFIG_PPC32
+void __iomem *ioremap_wt(phys_addr_t address, unsigned long size);
+#define ioremap_wt ioremap_wt
+#endif
+
+void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size);
+#define ioremap_uc(addr, size)		ioremap((addr), (size))
+#define ioremap_cache(addr, size) \
+	ioremap_prot((addr), (size), pgprot_val(PAGE_KERNEL))
+
+#define iounmap iounmap
+
+void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size);
+
+int early_ioremap_range(unsigned long ea, phys_addr_t pa,
+			unsigned long size, pgprot_t prot);
+
+extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size,
+				      pgprot_t prot, void *caller);
+
+/*
+ * When CONFIG_PPC_INDIRECT_PIO is set, we use the generic iomap implementation
+ * which needs some additional definitions here. They basically allow PIO
+ * space overall to be 1GB. This will work as long as we never try to use
+ * iomap to map MMIO below 1GB which should be fine on ppc64
+ */
+#define HAVE_ARCH_PIO_SIZE		1
+#define PIO_OFFSET			0x00000000UL
+#define PIO_MASK			(FULL_IO_SIZE - 1)
+#define PIO_RESERVED			(FULL_IO_SIZE)
+
+#define mmio_read16be(addr)		readw_be(addr)
+#define mmio_read32be(addr)		readl_be(addr)
+#define mmio_read64be(addr)		readq_be(addr)
+#define mmio_write16be(val, addr)	writew_be(val, addr)
+#define mmio_write32be(val, addr)	writel_be(val, addr)
+#define mmio_write64be(val, addr)	writeq_be(val, addr)
+#define mmio_insb(addr, dst, count)	readsb(addr, dst, count)
+#define mmio_insw(addr, dst, count)	readsw(addr, dst, count)
+#define mmio_insl(addr, dst, count)	readsl(addr, dst, count)
+#define mmio_outsb(addr, src, count)	writesb(addr, src, count)
+#define mmio_outsw(addr, src, count)	writesw(addr, src, count)
+#define mmio_outsl(addr, src, count)	writesl(addr, src, count)
+
+/**
+ *	virt_to_phys	-	map virtual addresses to physical
+ *	@address: address to remap
+ *
+ *	The returned physical address is the physical (CPU) mapping for
+ *	the memory address given. It is only valid to use this function on
+ *	addresses directly mapped or allocated via kmalloc.
+ *
+ *	This function does not give bus mappings for DMA transfers. In
+ *	almost all conceivable cases a device driver should not be using
+ *	this function
+ */
+static inline unsigned long virt_to_phys(volatile void * address)
+{
+	WARN_ON(IS_ENABLED(CONFIG_DEBUG_VIRTUAL) && !virt_addr_valid(address));
+
+	return __pa((unsigned long)address);
+}
+#define virt_to_phys virt_to_phys
+
+/**
+ *	phys_to_virt	-	map physical address to virtual
+ *	@address: address to remap
+ *
+ *	The returned virtual address is a current CPU mapping for
+ *	the memory address given. It is only valid to use this function on
+ *	addresses that have a kernel mapping
+ *
+ *	This function does not handle bus mappings for DMA transfers. In
+ *	almost all conceivable cases a device driver should not be using
+ *	this function
+ */
+static inline void * phys_to_virt(unsigned long address)
+{
+	return (void *)__va(address);
+}
+#define phys_to_virt phys_to_virt
+
+/*
+ * Change "struct page" to physical address.
+ */
+static inline phys_addr_t page_to_phys(struct page *page)
+{
+	unsigned long pfn = page_to_pfn(page);
+
+	WARN_ON(IS_ENABLED(CONFIG_DEBUG_VIRTUAL) && !pfn_valid(pfn));
+
+	return PFN_PHYS(pfn);
+}
+
+/*
+ * 32 bits still uses virt_to_bus() for it's implementation of DMA
+ * mappings se we have to keep it defined here. We also have some old
+ * drivers (shame shame shame) that use bus_to_virt() and haven't been
+ * fixed yet so I need to define it here.
+ */
+#ifdef CONFIG_PPC32
+
+static inline unsigned long virt_to_bus(volatile void * address)
+{
+        if (address == NULL)
+		return 0;
+        return __pa(address) + PCI_DRAM_OFFSET;
+}
+#define virt_to_bus virt_to_bus
+
+static inline void * bus_to_virt(unsigned long address)
+{
+        if (address == 0)
+		return NULL;
+        return __va(address - PCI_DRAM_OFFSET);
+}
+#define bus_to_virt bus_to_virt
+
+#endif /* CONFIG_PPC32 */
+
+/* access ports */
+#define setbits32(_addr, _v) out_be32((_addr), in_be32(_addr) |  (_v))
+#define clrbits32(_addr, _v) out_be32((_addr), in_be32(_addr) & ~(_v))
+
+#define setbits16(_addr, _v) out_be16((_addr), in_be16(_addr) |  (_v))
+#define clrbits16(_addr, _v) out_be16((_addr), in_be16(_addr) & ~(_v))
+
+#define setbits8(_addr, _v) out_8((_addr), in_8(_addr) |  (_v))
+#define clrbits8(_addr, _v) out_8((_addr), in_8(_addr) & ~(_v))
+
+/* Clear and set bits in one shot.  These macros can be used to clear and
+ * set multiple bits in a register using a single read-modify-write.  These
+ * macros can also be used to set a multiple-bit bit pattern using a mask,
+ * by specifying the mask in the 'clear' parameter and the new bit pattern
+ * in the 'set' parameter.
+ */
+
+#define clrsetbits(type, addr, clear, set) \
+	out_##type((addr), (in_##type(addr) & ~(clear)) | (set))
+
+#ifdef __powerpc64__
+#define clrsetbits_be64(addr, clear, set) clrsetbits(be64, addr, clear, set)
+#define clrsetbits_le64(addr, clear, set) clrsetbits(le64, addr, clear, set)
+#endif
+
+#define clrsetbits_be32(addr, clear, set) clrsetbits(be32, addr, clear, set)
+#define clrsetbits_le32(addr, clear, set) clrsetbits(le32, addr, clear, set)
+
+#define clrsetbits_be16(addr, clear, set) clrsetbits(be16, addr, clear, set)
+#define clrsetbits_le16(addr, clear, set) clrsetbits(le16, addr, clear, set)
+
+#define clrsetbits_8(addr, clear, set) clrsetbits(8, addr, clear, set)
+
+#include <asm-generic/io.h>
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_POWERPC_IO_H */
diff --git a/arch/powerpc/include/asm/io_event_irq.h b/arch/powerpc/include/asm/io_event_irq.h
new file mode 100644
index 0000000000..290c7530d1
--- /dev/null
+++ b/arch/powerpc/include/asm/io_event_irq.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2010, 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation
+ */
+
+#ifndef _ASM_POWERPC_IO_EVENT_IRQ_H
+#define _ASM_POWERPC_IO_EVENT_IRQ_H
+
+#include <linux/types.h>
+#include <linux/notifier.h>
+
+#define PSERIES_IOEI_RPC_MAX_LEN 216
+
+#define PSERIES_IOEI_TYPE_ERR_DETECTED		0x01
+#define PSERIES_IOEI_TYPE_ERR_RECOVERED		0x02
+#define PSERIES_IOEI_TYPE_EVENT			0x03
+#define PSERIES_IOEI_TYPE_RPC_PASS_THRU		0x04
+
+#define PSERIES_IOEI_SUBTYPE_NOT_APP		0x00
+#define PSERIES_IOEI_SUBTYPE_REBALANCE_REQ	0x01
+#define PSERIES_IOEI_SUBTYPE_NODE_ONLINE	0x03
+#define PSERIES_IOEI_SUBTYPE_NODE_OFFLINE	0x04
+#define PSERIES_IOEI_SUBTYPE_DUMP_SIZE_CHANGE	0x05
+#define PSERIES_IOEI_SUBTYPE_TORRENT_IRV_UPDATE	0x06
+#define PSERIES_IOEI_SUBTYPE_TORRENT_HFI_CFGED	0x07
+
+#define PSERIES_IOEI_SCOPE_NOT_APP		0x00
+#define PSERIES_IOEI_SCOPE_RIO_HUB		0x36
+#define PSERIES_IOEI_SCOPE_RIO_BRIDGE		0x37
+#define PSERIES_IOEI_SCOPE_PHB			0x38
+#define PSERIES_IOEI_SCOPE_EADS_GLOBAL		0x39
+#define PSERIES_IOEI_SCOPE_EADS_SLOT		0x3A
+#define PSERIES_IOEI_SCOPE_TORRENT_HUB		0x3B
+#define PSERIES_IOEI_SCOPE_SERVICE_PROC		0x51
+
+/* Platform Event Log Format, Version 6, data portition of IO event section */
+struct pseries_io_event {
+	uint8_t event_type;		/* 0x00 IO-Event Type		*/
+	uint8_t rpc_data_len;		/* 0x01 RPC data length		*/
+	uint8_t scope;			/* 0x02 Error/Event Scope	*/
+	uint8_t event_subtype;		/* 0x03 I/O-Event Sub-Type	*/
+	uint32_t drc_index;		/* 0x04 DRC Index		*/
+	uint8_t rpc_data[PSERIES_IOEI_RPC_MAX_LEN];
+					/* 0x08 RPC Data (0-216 bytes,	*/
+					/* padded to 4 bytes alignment)	*/
+};
+
+extern struct atomic_notifier_head pseries_ioei_notifier_list;
+
+#endif /* _ASM_POWERPC_IO_EVENT_IRQ_H */
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
new file mode 100644
index 0000000000..0266959435
--- /dev/null
+++ b/arch/powerpc/include/asm/iommu.h
@@ -0,0 +1,319 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
+ * Rewrite, cleanup:
+ * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
+ */
+
+#ifndef _ASM_IOMMU_H
+#define _ASM_IOMMU_H
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/dma-map-ops.h>
+#include <linux/bitops.h>
+#include <asm/machdep.h>
+#include <asm/types.h>
+#include <asm/pci-bridge.h>
+#include <asm/asm-const.h>
+
+#define IOMMU_PAGE_SHIFT_4K      12
+#define IOMMU_PAGE_SIZE_4K       (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K)
+#define IOMMU_PAGE_MASK_4K       (~((1 << IOMMU_PAGE_SHIFT_4K) - 1))
+#define IOMMU_PAGE_ALIGN_4K(addr) ALIGN(addr, IOMMU_PAGE_SIZE_4K)
+
+#define IOMMU_PAGE_SIZE(tblptr) (ASM_CONST(1) << (tblptr)->it_page_shift)
+#define IOMMU_PAGE_MASK(tblptr) (~((1 << (tblptr)->it_page_shift) - 1))
+#define IOMMU_PAGE_ALIGN(addr, tblptr) ALIGN(addr, IOMMU_PAGE_SIZE(tblptr))
+
+#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
+#define DMA64_PROPNAME "linux,dma64-ddr-window-info"
+
+/* Boot time flags */
+extern int iommu_is_off;
+extern int iommu_force_on;
+
+struct iommu_table_ops {
+	/*
+	 * When called with direction==DMA_NONE, it is equal to clear().
+	 * uaddr is a linear map address.
+	 */
+	int (*set)(struct iommu_table *tbl,
+			long index, long npages,
+			unsigned long uaddr,
+			enum dma_data_direction direction,
+			unsigned long attrs);
+#ifdef CONFIG_IOMMU_API
+	/*
+	 * Exchanges existing TCE with new TCE plus direction bits;
+	 * returns old TCE and DMA direction mask.
+	 * @tce is a physical address.
+	 */
+	int (*xchg_no_kill)(struct iommu_table *tbl,
+			long index,
+			unsigned long *hpa,
+			enum dma_data_direction *direction);
+
+	void (*tce_kill)(struct iommu_table *tbl,
+			unsigned long index,
+			unsigned long pages);
+
+	__be64 *(*useraddrptr)(struct iommu_table *tbl, long index, bool alloc);
+#endif
+	void (*clear)(struct iommu_table *tbl,
+			long index, long npages);
+	/* get() returns a physical address */
+	unsigned long (*get)(struct iommu_table *tbl, long index);
+	void (*flush)(struct iommu_table *tbl);
+	void (*free)(struct iommu_table *tbl);
+};
+
+/* These are used by VIO */
+extern struct iommu_table_ops iommu_table_lpar_multi_ops;
+extern struct iommu_table_ops iommu_table_pseries_ops;
+
+/*
+ * IOMAP_MAX_ORDER defines the largest contiguous block
+ * of dma space we can get.  IOMAP_MAX_ORDER = 13
+ * allows up to 2**12 pages (4096 * 4096) = 16 MB
+ */
+#define IOMAP_MAX_ORDER		13
+
+#define IOMMU_POOL_HASHBITS	2
+#define IOMMU_NR_POOLS		(1 << IOMMU_POOL_HASHBITS)
+
+struct iommu_pool {
+	unsigned long start;
+	unsigned long end;
+	unsigned long hint;
+	spinlock_t lock;
+} ____cacheline_aligned_in_smp;
+
+struct iommu_table {
+	unsigned long  it_busno;     /* Bus number this table belongs to */
+	unsigned long  it_size;      /* Size of iommu table in entries */
+	unsigned long  it_indirect_levels;
+	unsigned long  it_level_size;
+	unsigned long  it_allocated_size;
+	unsigned long  it_offset;    /* Offset into global table */
+	unsigned long  it_base;      /* mapped address of tce table */
+	unsigned long  it_index;     /* which iommu table this is */
+	unsigned long  it_type;      /* type: PCI or Virtual Bus */
+	unsigned long  it_blocksize; /* Entries in each block (cacheline) */
+	unsigned long  poolsize;
+	unsigned long  nr_pools;
+	struct iommu_pool large_pool;
+	struct iommu_pool pools[IOMMU_NR_POOLS];
+	unsigned long *it_map;       /* A simple allocation bitmap for now */
+	unsigned long  it_page_shift;/* table iommu page size */
+	struct list_head it_group_list;/* List of iommu_table_group_link */
+	__be64 *it_userspace; /* userspace view of the table */
+	struct iommu_table_ops *it_ops;
+	struct kref    it_kref;
+	int it_nid;
+	unsigned long it_reserved_start; /* Start of not-DMA-able (MMIO) area */
+	unsigned long it_reserved_end;
+};
+
+#define IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry) \
+		((tbl)->it_ops->useraddrptr((tbl), (entry), false))
+#define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
+		((tbl)->it_ops->useraddrptr((tbl), (entry), true))
+
+/* Pure 2^n version of get_order */
+static inline __attribute_const__
+int get_iommu_order(unsigned long size, struct iommu_table *tbl)
+{
+	return __ilog2((size - 1) >> tbl->it_page_shift) + 1;
+}
+
+
+struct scatterlist;
+
+#ifdef CONFIG_PPC64
+
+static inline void set_iommu_table_base(struct device *dev,
+					struct iommu_table *base)
+{
+	dev->archdata.iommu_table_base = base;
+}
+
+static inline void *get_iommu_table_base(struct device *dev)
+{
+	return dev->archdata.iommu_table_base;
+}
+
+extern int dma_iommu_dma_supported(struct device *dev, u64 mask);
+
+extern struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl);
+extern int iommu_tce_table_put(struct iommu_table *tbl);
+
+/* Initializes an iommu_table based in values set in the passed-in
+ * structure
+ */
+extern struct iommu_table *iommu_init_table(struct iommu_table *tbl,
+		int nid, unsigned long res_start, unsigned long res_end);
+bool iommu_table_in_use(struct iommu_table *tbl);
+
+#define IOMMU_TABLE_GROUP_MAX_TABLES	2
+
+struct iommu_table_group;
+
+struct iommu_table_group_ops {
+	unsigned long (*get_table_size)(
+			__u32 page_shift,
+			__u64 window_size,
+			__u32 levels);
+	long (*create_table)(struct iommu_table_group *table_group,
+			int num,
+			__u32 page_shift,
+			__u64 window_size,
+			__u32 levels,
+			struct iommu_table **ptbl);
+	long (*set_window)(struct iommu_table_group *table_group,
+			int num,
+			struct iommu_table *tblnew);
+	long (*unset_window)(struct iommu_table_group *table_group,
+			int num);
+	/* Switch ownership from platform code to external user (e.g. VFIO) */
+	long (*take_ownership)(struct iommu_table_group *table_group);
+	/* Switch ownership from external user (e.g. VFIO) back to core */
+	void (*release_ownership)(struct iommu_table_group *table_group);
+};
+
+struct iommu_table_group_link {
+	struct list_head next;
+	struct rcu_head rcu;
+	struct iommu_table_group *table_group;
+};
+
+struct iommu_table_group {
+	/* IOMMU properties */
+	__u32 tce32_start;
+	__u32 tce32_size;
+	__u64 pgsizes; /* Bitmap of supported page sizes */
+	__u32 max_dynamic_windows_supported;
+	__u32 max_levels;
+
+	struct iommu_group *group;
+	struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
+	struct iommu_table_group_ops *ops;
+};
+
+#ifdef CONFIG_IOMMU_API
+
+extern void iommu_register_group(struct iommu_table_group *table_group,
+				 int pci_domain_number, unsigned long pe_num);
+extern int iommu_add_device(struct iommu_table_group *table_group,
+		struct device *dev);
+extern long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
+		unsigned long entry, unsigned long *hpa,
+		enum dma_data_direction *direction);
+extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
+		struct iommu_table *tbl,
+		unsigned long entry, unsigned long *hpa,
+		enum dma_data_direction *direction);
+extern void iommu_tce_kill(struct iommu_table *tbl,
+		unsigned long entry, unsigned long pages);
+
+extern struct iommu_table_group_ops spapr_tce_table_group_ops;
+#else
+static inline void iommu_register_group(struct iommu_table_group *table_group,
+					int pci_domain_number,
+					unsigned long pe_num)
+{
+}
+
+static inline int iommu_add_device(struct iommu_table_group *table_group,
+		struct device *dev)
+{
+	return 0;
+}
+#endif /* !CONFIG_IOMMU_API */
+
+u64 dma_iommu_get_required_mask(struct device *dev);
+#else
+
+static inline void *get_iommu_table_base(struct device *dev)
+{
+	return NULL;
+}
+
+static inline int dma_iommu_dma_supported(struct device *dev, u64 mask)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PPC64 */
+
+extern int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
+			    struct scatterlist *sglist, int nelems,
+			    unsigned long mask,
+			    enum dma_data_direction direction,
+			    unsigned long attrs);
+extern void ppc_iommu_unmap_sg(struct iommu_table *tbl,
+			       struct scatterlist *sglist,
+			       int nelems,
+			       enum dma_data_direction direction,
+			       unsigned long attrs);
+
+extern void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
+				  size_t size, dma_addr_t *dma_handle,
+				  unsigned long mask, gfp_t flag, int node);
+extern void iommu_free_coherent(struct iommu_table *tbl, size_t size,
+				void *vaddr, dma_addr_t dma_handle);
+extern dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
+				 struct page *page, unsigned long offset,
+				 size_t size, unsigned long mask,
+				 enum dma_data_direction direction,
+				 unsigned long attrs);
+extern void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
+			     size_t size, enum dma_data_direction direction,
+			     unsigned long attrs);
+
+void __init iommu_init_early_pSeries(void);
+extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops);
+extern void iommu_init_early_pasemi(void);
+
+#if defined(CONFIG_PPC64) && defined(CONFIG_PM)
+static inline void iommu_restore(void)
+{
+	if (ppc_md.iommu_restore)
+		ppc_md.iommu_restore();
+}
+#endif
+
+/* The API to support IOMMU operations for VFIO */
+extern int iommu_tce_check_ioba(unsigned long page_shift,
+		unsigned long offset, unsigned long size,
+		unsigned long ioba, unsigned long npages);
+extern int iommu_tce_check_gpa(unsigned long page_shift,
+		unsigned long gpa);
+
+#define iommu_tce_clear_param_check(tbl, ioba, tce_value, npages) \
+		(iommu_tce_check_ioba((tbl)->it_page_shift,       \
+				(tbl)->it_offset, (tbl)->it_size, \
+				(ioba), (npages)) || (tce_value))
+#define iommu_tce_put_param_check(tbl, ioba, gpa)                 \
+		(iommu_tce_check_ioba((tbl)->it_page_shift,       \
+				(tbl)->it_offset, (tbl)->it_size, \
+				(ioba), 1) ||                     \
+		iommu_tce_check_gpa((tbl)->it_page_shift, (gpa)))
+
+extern void iommu_flush_tce(struct iommu_table *tbl);
+
+extern enum dma_data_direction iommu_tce_direction(unsigned long tce);
+extern unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir);
+
+#ifdef CONFIG_PPC_CELL_NATIVE
+extern bool iommu_fixed_is_weak;
+#else
+#define iommu_fixed_is_weak false
+#endif
+
+extern const struct dma_map_ops dma_iommu_ops;
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_IOMMU_H */
diff --git a/arch/powerpc/include/asm/ipic.h b/arch/powerpc/include/asm/ipic.h
new file mode 100644
index 0000000000..b47ca7dc71
--- /dev/null
+++ b/arch/powerpc/include/asm/ipic.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * IPIC external definitions and structure.
+ *
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
+ *
+ * Copyright 2005 Freescale Semiconductor, Inc
+ */
+#ifdef __KERNEL__
+#ifndef __ASM_IPIC_H__
+#define __ASM_IPIC_H__
+
+#include <linux/irq.h>
+
+/* Flags when we init the IPIC */
+#define IPIC_SPREADMODE_GRP_A	0x00000001
+#define IPIC_SPREADMODE_GRP_B	0x00000002
+#define IPIC_SPREADMODE_GRP_C	0x00000004
+#define IPIC_SPREADMODE_GRP_D	0x00000008
+#define IPIC_SPREADMODE_MIX_A	0x00000010
+#define IPIC_SPREADMODE_MIX_B	0x00000020
+#define IPIC_DISABLE_MCP_OUT	0x00000040
+#define IPIC_IRQ0_MCP		0x00000080
+
+/* IPIC registers offsets */
+#define IPIC_SICFR	0x00	/* System Global Interrupt Configuration Register */
+#define IPIC_SIVCR	0x04	/* System Global Interrupt Vector Register */
+#define IPIC_SIPNR_H	0x08	/* System Internal Interrupt Pending Register (HIGH) */
+#define IPIC_SIPNR_L	0x0C	/* System Internal Interrupt Pending Register (LOW) */
+#define IPIC_SIPRR_A	0x10	/* System Internal Interrupt group A Priority Register */
+#define IPIC_SIPRR_B	0x14	/* System Internal Interrupt group B Priority Register */
+#define IPIC_SIPRR_C	0x18	/* System Internal Interrupt group C Priority Register */
+#define IPIC_SIPRR_D	0x1C	/* System Internal Interrupt group D Priority Register */
+#define IPIC_SIMSR_H	0x20	/* System Internal Interrupt Mask Register (HIGH) */
+#define IPIC_SIMSR_L	0x24	/* System Internal Interrupt Mask Register (LOW) */
+#define IPIC_SICNR	0x28	/* System Internal Interrupt Control Register */
+#define IPIC_SEPNR	0x2C	/* System External Interrupt Pending Register */
+#define IPIC_SMPRR_A	0x30	/* System Mixed Interrupt group A Priority Register */
+#define IPIC_SMPRR_B	0x34	/* System Mixed Interrupt group B Priority Register */
+#define IPIC_SEMSR	0x38	/* System External Interrupt Mask Register */
+#define IPIC_SECNR	0x3C	/* System External Interrupt Control Register */
+#define IPIC_SERSR	0x40	/* System Error Status Register */
+#define IPIC_SERMR	0x44	/* System Error Mask Register */
+#define IPIC_SERCR	0x48	/* System Error Control Register */
+#define IPIC_SIFCR_H	0x50	/* System Internal Interrupt Force Register (HIGH) */
+#define IPIC_SIFCR_L	0x54	/* System Internal Interrupt Force Register (LOW) */
+#define IPIC_SEFCR	0x58	/* System External Interrupt Force Register */
+#define IPIC_SERFR	0x5C	/* System Error Force Register */
+#define IPIC_SCVCR	0x60	/* System Critical Interrupt Vector Register */
+#define IPIC_SMVCR	0x64	/* System Management Interrupt Vector Register */
+
+enum ipic_prio_grp {
+	IPIC_INT_GRP_A = IPIC_SIPRR_A,
+	IPIC_INT_GRP_D = IPIC_SIPRR_D,
+	IPIC_MIX_GRP_A = IPIC_SMPRR_A,
+	IPIC_MIX_GRP_B = IPIC_SMPRR_B,
+};
+
+enum ipic_mcp_irq {
+	IPIC_MCP_IRQ0 = 0,
+	IPIC_MCP_WDT  = 1,
+	IPIC_MCP_SBA  = 2,
+	IPIC_MCP_PCI1 = 5,
+	IPIC_MCP_PCI2 = 6,
+	IPIC_MCP_MU   = 7,
+};
+
+void __init ipic_set_default_priority(void);
+extern u32 ipic_get_mcp_status(void);
+extern void ipic_clear_mcp_status(u32 mask);
+
+extern struct ipic * ipic_init(struct device_node *node, unsigned int flags);
+extern unsigned int ipic_get_irq(void);
+
+#endif /* __ASM_IPIC_H__ */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
new file mode 100644
index 0000000000..ba1a5974e7
--- /dev/null
+++ b/arch/powerpc/include/asm/irq.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifdef __KERNEL__
+#ifndef _ASM_POWERPC_IRQ_H
+#define _ASM_POWERPC_IRQ_H
+
+/*
+ */
+
+#include <linux/threads.h>
+#include <linux/list.h>
+#include <linux/radix-tree.h>
+
+#include <asm/types.h>
+#include <linux/atomic.h>
+
+
+extern atomic_t ppc_n_lost_interrupts;
+
+/* Total number of virq in the platform */
+#define NR_IRQS		CONFIG_NR_IRQS
+
+/* Number of irqs reserved for a legacy isa controller */
+#define NR_IRQS_LEGACY		16
+
+extern irq_hw_number_t virq_to_hw(unsigned int virq);
+
+static __inline__ int irq_canonicalize(int irq)
+{
+	return irq;
+}
+
+extern int distribute_irqs;
+
+struct pt_regs;
+
+#ifdef CONFIG_BOOKE_OR_40x
+/*
+ * Per-cpu stacks for handling critical, debug and machine check
+ * level interrupts.
+ */
+extern void *critirq_ctx[NR_CPUS];
+extern void *dbgirq_ctx[NR_CPUS];
+extern void *mcheckirq_ctx[NR_CPUS];
+#endif
+
+/*
+ * Per-cpu stacks for handling hard and soft interrupts.
+ */
+extern void *hardirq_ctx[NR_CPUS];
+extern void *softirq_ctx[NR_CPUS];
+
+void __do_IRQ(struct pt_regs *regs);
+
+int irq_choose_cpu(const struct cpumask *mask);
+
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
+extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
+					   int exclude_cpu);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
+#endif
+
+#endif /* _ASM_IRQ_H */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/irq_work.h b/arch/powerpc/include/asm/irq_work.h
new file mode 100644
index 0000000000..b8b0be8f1a
--- /dev/null
+++ b/arch/powerpc/include/asm/irq_work.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_IRQ_WORK_H
+#define _ASM_POWERPC_IRQ_WORK_H
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+	return true;
+}
+extern void arch_irq_work_raise(void);
+
+#endif /* _ASM_POWERPC_IRQ_WORK_H */
diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h
new file mode 100644
index 0000000000..47d4671292
--- /dev/null
+++ b/arch/powerpc/include/asm/irqflags.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * IRQ flags handling
+ */
+#ifndef _ASM_IRQFLAGS_H
+#define _ASM_IRQFLAGS_H
+
+#ifndef __ASSEMBLY__
+/*
+ * Get definitions for arch_local_save_flags(x), etc.
+ */
+#include <asm/hw_irq.h>
+
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/isa-bridge.h b/arch/powerpc/include/asm/isa-bridge.h
new file mode 100644
index 0000000000..47295894bf
--- /dev/null
+++ b/arch/powerpc/include/asm/isa-bridge.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ISA_BRIDGE_H
+#define __ISA_BRIDGE_H
+
+#ifdef CONFIG_PPC64
+
+extern void isa_bridge_find_early(struct pci_controller *hose);
+extern void isa_bridge_init_non_pci(struct device_node *np);
+
+static inline int isa_vaddr_is_ioport(void __iomem *address)
+{
+	/* Check if address hits the reserved legacy IO range */
+	unsigned long ea = (unsigned long)address;
+	return ea >= ISA_IO_BASE && ea < ISA_IO_END;
+}
+
+#else
+
+static inline int isa_vaddr_is_ioport(void __iomem *address)
+{
+	/* No specific ISA handling on ppc32 at this stage, it
+	 * all goes through PCI
+	 */
+	return 0;
+}
+
+#endif
+
+#endif /* __ISA_BRIDGE_H */
+
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
new file mode 100644
index 0000000000..93ce3ec253
--- /dev/null
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_JUMP_LABEL_H
+#define _ASM_POWERPC_JUMP_LABEL_H
+
+/*
+ * Copyright 2010 Michael Ellerman, IBM Corp.
+ */
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+#include <asm/feature-fixups.h>
+#include <asm/asm-const.h>
+
+#define JUMP_ENTRY_TYPE		stringify_in_c(FTR_ENTRY_LONG)
+#define JUMP_LABEL_NOP_SIZE	4
+
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:\n\t"
+		 "nop # arch_static_branch\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 ".long 1b - ., %l[l_yes] - .\n\t"
+		 JUMP_ENTRY_TYPE "%c0 - .\n\t"
+		 ".popsection \n\t"
+		 : :  "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:\n\t"
+		 "b %l[l_yes] # arch_static_branch_jump\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 ".long 1b - ., %l[l_yes] - .\n\t"
+		 JUMP_ENTRY_TYPE "%c0 - .\n\t"
+		 ".popsection \n\t"
+		 : :  "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+#else
+#define ARCH_STATIC_BRANCH(LABEL, KEY)		\
+1098:	nop;					\
+	.pushsection __jump_table, "aw";	\
+	.long 1098b - ., LABEL - .;		\
+	FTR_ENTRY_LONG KEY - .;			\
+	.popsection
+#endif
+
+#endif /* _ASM_POWERPC_JUMP_LABEL_H */
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
new file mode 100644
index 0000000000..365d272009
--- /dev/null
+++ b/arch/powerpc/include/asm/kasan.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#if defined(CONFIG_KASAN) && !defined(CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX)
+#define _GLOBAL_KASAN(fn)	_GLOBAL(__##fn)
+#define _GLOBAL_TOC_KASAN(fn)	_GLOBAL_TOC(__##fn)
+#define EXPORT_SYMBOL_KASAN(fn)	EXPORT_SYMBOL(__##fn)
+#else
+#define _GLOBAL_KASAN(fn)	_GLOBAL(fn)
+#define _GLOBAL_TOC_KASAN(fn)	_GLOBAL_TOC(fn)
+#define EXPORT_SYMBOL_KASAN(fn)
+#endif
+
+#ifndef __ASSEMBLY__
+
+#include <asm/page.h>
+#include <linux/sizes.h>
+
+#define KASAN_SHADOW_SCALE_SHIFT	3
+
+#if defined(CONFIG_MODULES) && defined(CONFIG_PPC32)
+#define KASAN_KERN_START	ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M)
+#else
+#define KASAN_KERN_START	PAGE_OFFSET
+#endif
+
+#define KASAN_SHADOW_START	(KASAN_SHADOW_OFFSET + \
+				 (KASAN_KERN_START >> KASAN_SHADOW_SCALE_SHIFT))
+
+#define KASAN_SHADOW_OFFSET	ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET)
+
+#ifdef CONFIG_PPC32
+#define KASAN_SHADOW_END	(-(-KASAN_SHADOW_START >> KASAN_SHADOW_SCALE_SHIFT))
+#elif defined(CONFIG_PPC_BOOK3S_64)
+/*
+ * The shadow ends before the highest accessible address
+ * because we don't need a shadow for the shadow. Instead:
+ * c00e000000000000 << 3 + a80e000000000000 = c00fc00000000000
+ */
+#define KASAN_SHADOW_END 0xc00fc00000000000UL
+
+#else
+
+/*
+ * The shadow ends before the highest accessible address
+ * because we don't need a shadow for the shadow.
+ * But it doesn't hurt to have a shadow for the shadow,
+ * keep shadow end aligned eases things.
+ */
+#define KASAN_SHADOW_END 0xc000200000000000UL
+
+#endif
+
+#ifdef CONFIG_KASAN
+#ifdef CONFIG_PPC_BOOK3S_64
+DECLARE_STATIC_KEY_FALSE(powerpc_kasan_enabled_key);
+
+static __always_inline bool kasan_arch_is_ready(void)
+{
+	if (static_branch_likely(&powerpc_kasan_enabled_key))
+		return true;
+	return false;
+}
+
+#define kasan_arch_is_ready kasan_arch_is_ready
+#endif
+
+void kasan_early_init(void);
+void kasan_mmu_init(void);
+void kasan_init(void);
+void kasan_late_init(void);
+#else
+static inline void kasan_init(void) { }
+static inline void kasan_mmu_init(void) { }
+static inline void kasan_late_init(void) { }
+#endif
+
+void kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte);
+int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end);
+int kasan_init_region(void *start, size_t size);
+
+#endif /* __ASSEMBLY */
+#endif
diff --git a/arch/powerpc/include/asm/kdebug.h b/arch/powerpc/include/asm/kdebug.h
new file mode 100644
index 0000000000..0f7c1ef37d
--- /dev/null
+++ b/arch/powerpc/include/asm/kdebug.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_KDEBUG_H
+#define _ASM_POWERPC_KDEBUG_H
+#ifdef __KERNEL__
+
+/* Grossly misnamed. */
+enum die_val {
+	DIE_OOPS = 1,
+	DIE_IABR_MATCH,
+	DIE_DABR_MATCH,
+	DIE_BPT,
+	DIE_SSTEP,
+};
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_KDEBUG_H */
diff --git a/arch/powerpc/include/asm/kdump.h b/arch/powerpc/include/asm/kdump.h
new file mode 100644
index 0000000000..fd128d1e52
--- /dev/null
+++ b/arch/powerpc/include/asm/kdump.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC64_KDUMP_H
+#define _PPC64_KDUMP_H
+
+#include <asm/page.h>
+
+#define KDUMP_KERNELBASE	0x2000000
+
+/* How many bytes to reserve at zero for kdump. The reserve limit should
+ * be greater or equal to the trampoline's end address.
+ * Reserve to the end of the FWNMI area, see head_64.S */
+#define KDUMP_RESERVE_LIMIT	0x10000 /* 64K */
+
+#ifdef CONFIG_CRASH_DUMP
+
+/*
+ * On PPC64 translation is disabled during trampoline setup, so we use
+ * physical addresses. Though on PPC32 translation is already enabled,
+ * so we can't do the same. Luckily create_trampoline() creates relative
+ * branches, so we can just add the PAGE_OFFSET and don't worry about it.
+ */
+#ifdef __powerpc64__
+#define KDUMP_TRAMPOLINE_START	0x0100
+#define KDUMP_TRAMPOLINE_END	0x3000
+#else
+#define KDUMP_TRAMPOLINE_START	(0x0100 + PAGE_OFFSET)
+#define KDUMP_TRAMPOLINE_END	(0x3000 + PAGE_OFFSET)
+#endif /* __powerpc64__ */
+
+#define KDUMP_MIN_TCE_ENTRIES	2048
+
+#endif /* CONFIG_CRASH_DUMP */
+
+#ifndef __ASSEMBLY__
+
+#if defined(CONFIG_CRASH_DUMP) && !defined(CONFIG_NONSTATIC_KERNEL)
+extern void reserve_kdump_trampoline(void);
+extern void setup_kdump_trampoline(void);
+#else
+/* !CRASH_DUMP || !NONSTATIC_KERNEL */
+static inline void reserve_kdump_trampoline(void) { ; }
+static inline void setup_kdump_trampoline(void) { ; }
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __PPC64_KDUMP_H */
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
new file mode 100644
index 0000000000..a1ddba01e7
--- /dev/null
+++ b/arch/powerpc/include/asm/kexec.h
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_KEXEC_H
+#define _ASM_POWERPC_KEXEC_H
+#ifdef __KERNEL__
+
+#if defined(CONFIG_PPC_85xx) || defined(CONFIG_44x)
+
+/*
+ * On FSL-BookE we setup a 1:1 mapping which covers the first 2GiB of memory
+ * and therefore we can only deal with memory within this range
+ */
+#define KEXEC_SOURCE_MEMORY_LIMIT	(2 * 1024 * 1024 * 1024UL - 1)
+#define KEXEC_DESTINATION_MEMORY_LIMIT	(2 * 1024 * 1024 * 1024UL - 1)
+#define KEXEC_CONTROL_MEMORY_LIMIT	(2 * 1024 * 1024 * 1024UL - 1)
+
+#else
+
+/*
+ * Maximum page that is mapped directly into kernel memory.
+ * XXX: Since we copy virt we can use any page we allocate
+ */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/*
+ * Maximum address we can reach in physical address mode.
+ * XXX: I want to allow initrd in highmem. Otherwise set to rmo on LPAR.
+ */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control code buffer */
+#ifdef __powerpc64__
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+#else
+/* TASK_SIZE, probably left over from use_mm ?? */
+#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+#endif
+#endif
+
+#define KEXEC_CONTROL_PAGE_SIZE 4096
+
+/* The native architecture */
+#ifdef __powerpc64__
+#define KEXEC_ARCH KEXEC_ARCH_PPC64
+#else
+#define KEXEC_ARCH KEXEC_ARCH_PPC
+#endif
+
+#define KEXEC_STATE_NONE 0
+#define KEXEC_STATE_IRQS_OFF 1
+#define KEXEC_STATE_REAL_MODE 2
+
+#ifndef __ASSEMBLY__
+#include <asm/reg.h>
+
+typedef void (*crash_shutdown_t)(void);
+
+#ifdef CONFIG_KEXEC_CORE
+
+/*
+ * This function is responsible for capturing register states if coming
+ * via panic or invoking dump using sysrq-trigger.
+ */
+static inline void crash_setup_regs(struct pt_regs *newregs,
+					struct pt_regs *oldregs)
+{
+	if (oldregs)
+		memcpy(newregs, oldregs, sizeof(*newregs));
+	else
+		ppc_save_regs(newregs);
+}
+
+extern void kexec_smp_wait(void);	/* get and clear naca physid, wait for
+					  master to copy new code to 0 */
+extern int crashing_cpu;
+extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *));
+extern void crash_ipi_callback(struct pt_regs *);
+extern int crash_wake_offline;
+
+struct kimage;
+struct pt_regs;
+extern void default_machine_kexec(struct kimage *image);
+extern void default_machine_crash_shutdown(struct pt_regs *regs);
+extern int crash_shutdown_register(crash_shutdown_t handler);
+extern int crash_shutdown_unregister(crash_shutdown_t handler);
+
+extern void crash_kexec_prepare(void);
+extern void crash_kexec_secondary(struct pt_regs *regs);
+int __init overlaps_crashkernel(unsigned long start, unsigned long size);
+extern void reserve_crashkernel(void);
+extern void machine_kexec_mask_interrupts(void);
+
+static inline bool kdump_in_progress(void)
+{
+	return crashing_cpu >= 0;
+}
+
+void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_code_buffer,
+			 unsigned long start_address) __noreturn;
+
+void kexec_copy_flush(struct kimage *image);
+
+#if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_PPC_RTAS)
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
+#define crash_free_reserved_phys_range crash_free_reserved_phys_range
+#endif
+
+#ifdef CONFIG_KEXEC_FILE
+extern const struct kexec_file_ops kexec_elf64_ops;
+
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+	struct crash_mem *exclude_ranges;
+
+	unsigned long backup_start;
+	void *backup_buf;
+	void *fdt;
+};
+
+char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
+			  unsigned long cmdline_len);
+int setup_purgatory(struct kimage *image, const void *slave_code,
+		    const void *fdt, unsigned long kernel_load_addr,
+		    unsigned long fdt_load_addr);
+
+#ifdef CONFIG_PPC64
+struct kexec_buf;
+
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len);
+#define arch_kexec_kernel_image_probe arch_kexec_kernel_image_probe
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image);
+#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
+
+int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf);
+#define arch_kexec_locate_mem_hole arch_kexec_locate_mem_hole
+
+int load_crashdump_segments_ppc64(struct kimage *image,
+				  struct kexec_buf *kbuf);
+int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
+			  const void *fdt, unsigned long kernel_load_addr,
+			  unsigned long fdt_load_addr);
+unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image);
+int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
+			unsigned long initrd_load_addr,
+			unsigned long initrd_len, const char *cmdline);
+#endif /* CONFIG_PPC64 */
+
+#endif /* CONFIG_KEXEC_FILE */
+
+#else /* !CONFIG_KEXEC_CORE */
+static inline void crash_kexec_secondary(struct pt_regs *regs) { }
+
+static inline int overlaps_crashkernel(unsigned long start, unsigned long size)
+{
+	return 0;
+}
+
+static inline void reserve_crashkernel(void) { ; }
+
+static inline int crash_shutdown_register(crash_shutdown_t handler)
+{
+	return 0;
+}
+
+static inline int crash_shutdown_unregister(crash_shutdown_t handler)
+{
+	return 0;
+}
+
+static inline bool kdump_in_progress(void)
+{
+	return false;
+}
+
+static inline void crash_ipi_callback(struct pt_regs *regs) { }
+
+static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
+{
+}
+
+#endif /* CONFIG_KEXEC_CORE */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/book3s/64/kexec.h>
+#endif
+
+#ifndef reset_sprs
+#define reset_sprs reset_sprs
+static inline void reset_sprs(void)
+{
+}
+#endif
+
+#endif /* ! __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_KEXEC_H */
diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h
new file mode 100644
index 0000000000..f83866a19e
--- /dev/null
+++ b/arch/powerpc/include/asm/kexec_ranges.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_KEXEC_RANGES_H
+#define _ASM_POWERPC_KEXEC_RANGES_H
+
+#define MEM_RANGE_CHUNK_SZ		2048	/* Memory ranges size chunk */
+
+void sort_memory_ranges(struct crash_mem *mrngs, bool merge);
+struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges);
+int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
+int add_tce_mem_ranges(struct crash_mem **mem_ranges);
+int add_initrd_mem_range(struct crash_mem **mem_ranges);
+#ifdef CONFIG_PPC_64S_HASH_MMU
+int add_htab_mem_range(struct crash_mem **mem_ranges);
+#else
+static inline int add_htab_mem_range(struct crash_mem **mem_ranges)
+{
+	return 0;
+}
+#endif
+int add_kernel_mem_range(struct crash_mem **mem_ranges);
+int add_rtas_mem_range(struct crash_mem **mem_ranges);
+int add_opal_mem_range(struct crash_mem **mem_ranges);
+int add_reserved_mem_ranges(struct crash_mem **mem_ranges);
+
+#endif /* _ASM_POWERPC_KEXEC_RANGES_H */
diff --git a/arch/powerpc/include/asm/keylargo.h b/arch/powerpc/include/asm/keylargo.h
new file mode 100644
index 0000000000..debdf54800
--- /dev/null
+++ b/arch/powerpc/include/asm/keylargo.h
@@ -0,0 +1,262 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_KEYLARGO_H
+#define _ASM_POWERPC_KEYLARGO_H
+#ifdef __KERNEL__
+/*
+ * keylargo.h: definitions for using the "KeyLargo" I/O controller chip.
+ *
+ */
+
+/* "Pangea" chipset has keylargo device-id 0x25 while core99
+ * has device-id 0x22. The rev. of the pangea one is 0, so we
+ * fake an artificial rev. in keylargo_rev by oring 0x100
+ */
+#define KL_PANGEA_REV		0x100
+
+/* offset from base for feature control registers */
+#define KEYLARGO_MBCR		0x34	/* KL Only, Media bay control/status */
+#define KEYLARGO_FCR0		0x38
+#define KEYLARGO_FCR1		0x3c
+#define KEYLARGO_FCR2		0x40
+#define KEYLARGO_FCR3		0x44
+#define KEYLARGO_FCR4		0x48
+#define KEYLARGO_FCR5		0x4c	/* Pangea only */
+
+/* K2 additional FCRs */
+#define K2_FCR6			0x34
+#define K2_FCR7			0x30
+#define K2_FCR8			0x2c
+#define K2_FCR9			0x28
+#define K2_FCR10		0x24
+
+/* GPIO registers */
+#define KEYLARGO_GPIO_LEVELS0		0x50
+#define KEYLARGO_GPIO_LEVELS1		0x54
+#define KEYLARGO_GPIO_EXTINT_0		0x58
+#define KEYLARGO_GPIO_EXTINT_CNT	18
+#define KEYLARGO_GPIO_0			0x6A
+#define KEYLARGO_GPIO_CNT		17
+#define KEYLARGO_GPIO_EXTINT_DUAL_EDGE	0x80
+#define KEYLARGO_GPIO_OUTPUT_ENABLE	0x04
+#define KEYLARGO_GPIO_OUTOUT_DATA	0x01
+#define KEYLARGO_GPIO_INPUT_DATA	0x02
+
+/* K2 does only extint GPIOs and does 51 of them */
+#define K2_GPIO_EXTINT_0		0x58
+#define K2_GPIO_EXTINT_CNT		51
+
+/* Specific GPIO regs */
+
+#define KL_GPIO_MODEM_RESET		(KEYLARGO_GPIO_0+0x03)
+#define KL_GPIO_MODEM_POWER		(KEYLARGO_GPIO_0+0x02) /* Pangea */
+
+#define KL_GPIO_SOUND_POWER		(KEYLARGO_GPIO_0+0x05)
+
+/* Hrm... this one is only to be used on Pismo. It seems to also
+ * control the timebase enable on other machines. Still to be
+ * experimented... --BenH.
+ */
+#define KL_GPIO_FW_CABLE_POWER		(KEYLARGO_GPIO_0+0x09)
+#define KL_GPIO_TB_ENABLE		(KEYLARGO_GPIO_0+0x09)
+
+#define KL_GPIO_ETH_PHY_RESET		(KEYLARGO_GPIO_0+0x10)
+
+#define KL_GPIO_EXTINT_CPU1		(KEYLARGO_GPIO_0+0x0a)
+#define KL_GPIO_EXTINT_CPU1_ASSERT	0x04
+#define KL_GPIO_EXTINT_CPU1_RELEASE	0x38
+
+#define KL_GPIO_RESET_CPU0		(KEYLARGO_GPIO_EXTINT_0+0x03)
+#define KL_GPIO_RESET_CPU1		(KEYLARGO_GPIO_EXTINT_0+0x04)
+#define KL_GPIO_RESET_CPU2		(KEYLARGO_GPIO_EXTINT_0+0x0f)
+#define KL_GPIO_RESET_CPU3		(KEYLARGO_GPIO_EXTINT_0+0x10)
+
+#define KL_GPIO_PMU_MESSAGE_IRQ		(KEYLARGO_GPIO_EXTINT_0+0x09)
+#define KL_GPIO_PMU_MESSAGE_BIT		KEYLARGO_GPIO_INPUT_DATA
+
+#define KL_GPIO_MEDIABAY_IRQ		(KEYLARGO_GPIO_EXTINT_0+0x0e)
+
+#define KL_GPIO_AIRPORT_0		(KEYLARGO_GPIO_EXTINT_0+0x0a)
+#define KL_GPIO_AIRPORT_1		(KEYLARGO_GPIO_EXTINT_0+0x0d)
+#define KL_GPIO_AIRPORT_2		(KEYLARGO_GPIO_0+0x0d)
+#define KL_GPIO_AIRPORT_3		(KEYLARGO_GPIO_0+0x0e)
+#define KL_GPIO_AIRPORT_4		(KEYLARGO_GPIO_0+0x0f)
+
+/*
+ * Bits in feature control register. Those bits different for K2 are
+ * listed separately
+ */
+#define KL_MBCR_MB0_PCI_ENABLE		0x00000800	/* exist ? */
+#define KL_MBCR_MB0_IDE_ENABLE		0x00001000
+#define KL_MBCR_MB0_FLOPPY_ENABLE	0x00002000	/* exist ? */
+#define KL_MBCR_MB0_SOUND_ENABLE	0x00004000	/* hrm... */
+#define KL_MBCR_MB0_DEV_MASK		0x00007800
+#define KL_MBCR_MB0_DEV_POWER		0x00000400
+#define KL_MBCR_MB0_DEV_RESET		0x00000200
+#define KL_MBCR_MB0_ENABLE		0x00000100
+#define KL_MBCR_MB1_PCI_ENABLE		0x08000000	/* exist ? */
+#define KL_MBCR_MB1_IDE_ENABLE		0x10000000
+#define KL_MBCR_MB1_FLOPPY_ENABLE	0x20000000	/* exist ? */
+#define KL_MBCR_MB1_SOUND_ENABLE	0x40000000	/* hrm... */
+#define KL_MBCR_MB1_DEV_MASK		0x78000000
+#define KL_MBCR_MB1_DEV_POWER		0x04000000
+#define KL_MBCR_MB1_DEV_RESET		0x02000000
+#define KL_MBCR_MB1_ENABLE		0x01000000
+
+#define KL0_SCC_B_INTF_ENABLE		0x00000001	/* (KL Only) */
+#define KL0_SCC_A_INTF_ENABLE		0x00000002
+#define KL0_SCC_SLOWPCLK		0x00000004
+#define KL0_SCC_RESET			0x00000008
+#define KL0_SCCA_ENABLE			0x00000010
+#define KL0_SCCB_ENABLE			0x00000020
+#define KL0_SCC_CELL_ENABLE		0x00000040
+#define KL0_IRDA_HIGH_BAND		0x00000100	/* (KL Only) */
+#define KL0_IRDA_SOURCE2_SEL		0x00000200	/* (KL Only) */
+#define KL0_IRDA_SOURCE1_SEL		0x00000400	/* (KL Only) */
+#define KL0_PG_USB0_PMI_ENABLE		0x00000400	/* (Pangea/Intrepid Only) */
+#define KL0_IRDA_RESET			0x00000800	/* (KL Only) */
+#define KL0_PG_USB0_REF_SUSPEND_SEL	0x00000800	/* (Pangea/Intrepid Only) */
+#define KL0_IRDA_DEFAULT1		0x00001000	/* (KL Only) */
+#define KL0_PG_USB0_REF_SUSPEND		0x00001000	/* (Pangea/Intrepid Only) */
+#define KL0_IRDA_DEFAULT0		0x00002000	/* (KL Only) */
+#define KL0_PG_USB0_PAD_SUSPEND		0x00002000	/* (Pangea/Intrepid Only) */
+#define KL0_IRDA_FAST_CONNECT		0x00004000	/* (KL Only) */
+#define KL0_PG_USB1_PMI_ENABLE		0x00004000	/* (Pangea/Intrepid Only) */
+#define KL0_IRDA_ENABLE			0x00008000	/* (KL Only) */
+#define KL0_PG_USB1_REF_SUSPEND_SEL	0x00008000	/* (Pangea/Intrepid Only) */
+#define KL0_IRDA_CLK32_ENABLE		0x00010000	/* (KL Only) */
+#define KL0_PG_USB1_REF_SUSPEND		0x00010000	/* (Pangea/Intrepid Only) */
+#define KL0_IRDA_CLK19_ENABLE		0x00020000	/* (KL Only) */
+#define KL0_PG_USB1_PAD_SUSPEND		0x00020000	/* (Pangea/Intrepid Only) */
+#define KL0_USB0_PAD_SUSPEND0		0x00040000
+#define KL0_USB0_PAD_SUSPEND1		0x00080000
+#define KL0_USB0_CELL_ENABLE		0x00100000
+#define KL0_USB1_PAD_SUSPEND0		0x00400000
+#define KL0_USB1_PAD_SUSPEND1		0x00800000
+#define KL0_USB1_CELL_ENABLE		0x01000000
+#define KL0_USB_REF_SUSPEND		0x10000000	/* (KL Only) */
+
+#define KL0_SERIAL_ENABLE		(KL0_SCC_B_INTF_ENABLE | \
+					KL0_SCC_SLOWPCLK | \
+					KL0_SCC_CELL_ENABLE | KL0_SCCA_ENABLE)
+
+#define KL1_USB2_PMI_ENABLE		0x00000001	/* Intrepid only */
+#define KL1_AUDIO_SEL_22MCLK		0x00000002	/* KL/Pangea only */
+#define KL1_USB2_REF_SUSPEND_SEL	0x00000002	/* Intrepid only */
+#define KL1_USB2_REF_SUSPEND		0x00000004	/* Intrepid only */
+#define KL1_AUDIO_CLK_ENABLE_BIT	0x00000008	/* KL/Pangea only */
+#define KL1_USB2_PAD_SUSPEND_SEL	0x00000008	/* Intrepid only */
+#define KL1_USB2_PAD_SUSPEND0		0x00000010	/* Intrepid only */
+#define KL1_AUDIO_CLK_OUT_ENABLE	0x00000020	/* KL/Pangea only */
+#define KL1_USB2_PAD_SUSPEND1		0x00000020	/* Intrepid only */
+#define KL1_AUDIO_CELL_ENABLE		0x00000040	/* KL/Pangea only */
+#define KL1_USB2_CELL_ENABLE		0x00000040	/* Intrepid only */
+#define KL1_AUDIO_CHOOSE		0x00000080	/* KL/Pangea only */
+#define KL1_I2S0_CHOOSE			0x00000200	/* KL Only */
+#define KL1_I2S0_CELL_ENABLE		0x00000400
+#define KL1_I2S0_CLK_ENABLE_BIT		0x00001000
+#define KL1_I2S0_ENABLE			0x00002000
+#define KL1_I2S1_CELL_ENABLE		0x00020000
+#define KL1_I2S1_CLK_ENABLE_BIT		0x00080000
+#define KL1_I2S1_ENABLE			0x00100000
+#define KL1_EIDE0_ENABLE		0x00800000	/* KL/Intrepid Only */
+#define KL1_EIDE0_RESET_N		0x01000000	/* KL/Intrepid Only */
+#define KL1_EIDE1_ENABLE		0x04000000	/* KL Only */
+#define KL1_EIDE1_RESET_N		0x08000000	/* KL Only */
+#define KL1_UIDE_ENABLE			0x20000000	/* KL/Pangea Only */
+#define KL1_UIDE_RESET_N		0x40000000	/* KL/Pangea Only */
+
+#define KL2_IOBUS_ENABLE		0x00000002
+#define KL2_SLEEP_STATE_BIT		0x00000100	/* KL Only */
+#define KL2_PG_STOP_ALL_CLOCKS		0x00000100	/* Pangea Only */
+#define KL2_MPIC_ENABLE			0x00020000
+#define KL2_CARDSLOT_RESET		0x00040000	/* Pangea/Intrepid Only */
+#define KL2_ALT_DATA_OUT		0x02000000	/* KL Only ??? */
+#define KL2_MEM_IS_BIG			0x04000000
+#define KL2_CARDSEL_16			0x08000000
+
+#define KL3_SHUTDOWN_PLL_TOTAL		0x00000001	/* KL/Pangea only */
+#define KL3_SHUTDOWN_PLLKW6		0x00000002	/* KL/Pangea only */
+#define KL3_IT_SHUTDOWN_PLL3		0x00000002	/* Intrepid only */
+#define KL3_SHUTDOWN_PLLKW4		0x00000004	/* KL/Pangea only */
+#define KL3_IT_SHUTDOWN_PLL2		0x00000004	/* Intrepid only */
+#define KL3_SHUTDOWN_PLLKW35		0x00000008	/* KL/Pangea only */
+#define KL3_IT_SHUTDOWN_PLL1		0x00000008	/* Intrepid only */
+#define KL3_SHUTDOWN_PLLKW12		0x00000010	/* KL Only */
+#define KL3_IT_ENABLE_PLL3_SHUTDOWN	0x00000010	/* Intrepid only */
+#define KL3_PLL_RESET			0x00000020	/* KL/Pangea only */
+#define KL3_IT_ENABLE_PLL2_SHUTDOWN	0x00000020	/* Intrepid only */
+#define KL3_IT_ENABLE_PLL1_SHUTDOWN	0x00000010	/* Intrepid only */
+#define KL3_SHUTDOWN_PLL2X		0x00000080	/* KL Only */
+#define KL3_CLK66_ENABLE		0x00000100	/* KL Only */
+#define KL3_CLK49_ENABLE		0x00000200
+#define KL3_CLK45_ENABLE		0x00000400
+#define KL3_CLK31_ENABLE		0x00000800	/* KL/Pangea only */
+#define KL3_TIMER_CLK18_ENABLE		0x00001000
+#define KL3_I2S1_CLK18_ENABLE		0x00002000
+#define KL3_I2S0_CLK18_ENABLE		0x00004000
+#define KL3_VIA_CLK16_ENABLE		0x00008000	/* KL/Pangea only */
+#define KL3_IT_VIA_CLK32_ENABLE		0x00008000	/* Intrepid only */
+#define KL3_STOPPING33_ENABLED		0x00080000	/* KL Only */
+#define KL3_PG_PLL_ENABLE_TEST		0x00080000	/* Pangea Only */
+
+/* Intrepid USB bus 2, port 0,1 */
+#define KL3_IT_PORT_WAKEUP_ENABLE(p)		(0x00080000 << ((p)<<3))
+#define KL3_IT_PORT_RESUME_WAKE_EN(p)		(0x00040000 << ((p)<<3))
+#define KL3_IT_PORT_CONNECT_WAKE_EN(p)		(0x00020000 << ((p)<<3))
+#define KL3_IT_PORT_DISCONNECT_WAKE_EN(p)	(0x00010000 << ((p)<<3))
+#define KL3_IT_PORT_RESUME_STAT(p)		(0x00300000 << ((p)<<3))
+#define KL3_IT_PORT_CONNECT_STAT(p)		(0x00200000 << ((p)<<3))
+#define KL3_IT_PORT_DISCONNECT_STAT(p)		(0x00100000 << ((p)<<3))
+
+/* Port 0,1 : bus 0, port 2,3 : bus 1 */
+#define KL4_PORT_WAKEUP_ENABLE(p)	(0x00000008 << ((p)<<3))
+#define KL4_PORT_RESUME_WAKE_EN(p)	(0x00000004 << ((p)<<3))
+#define KL4_PORT_CONNECT_WAKE_EN(p)	(0x00000002 << ((p)<<3))
+#define KL4_PORT_DISCONNECT_WAKE_EN(p)	(0x00000001 << ((p)<<3))
+#define KL4_PORT_RESUME_STAT(p)		(0x00000040 << ((p)<<3))
+#define KL4_PORT_CONNECT_STAT(p)	(0x00000020 << ((p)<<3))
+#define KL4_PORT_DISCONNECT_STAT(p)	(0x00000010 << ((p)<<3))
+
+/* Pangea and Intrepid only */
+#define KL5_VIA_USE_CLK31		0000000001	/* Pangea Only */
+#define KL5_SCC_USE_CLK31		0x00000002	/* Pangea Only */
+#define KL5_PWM_CLK32_EN		0x00000004
+#define KL5_CLK3_68_EN			0x00000010
+#define KL5_CLK32_EN			0x00000020
+
+
+/* K2 definitions */
+#define K2_FCR0_USB0_SWRESET		0x00200000
+#define K2_FCR0_USB1_SWRESET		0x02000000
+#define K2_FCR0_RING_PME_DISABLE	0x08000000
+
+#define K2_FCR1_PCI1_BUS_RESET_N	0x00000010
+#define K2_FCR1_PCI1_SLEEP_RESET_EN	0x00000020
+#define K2_FCR1_I2S0_CELL_ENABLE	0x00000400
+#define K2_FCR1_I2S0_RESET		0x00000800
+#define K2_FCR1_I2S0_CLK_ENABLE_BIT	0x00001000
+#define K2_FCR1_I2S0_ENABLE    		0x00002000
+#define K2_FCR1_PCI1_CLK_ENABLE		0x00004000
+#define K2_FCR1_FW_CLK_ENABLE		0x00008000
+#define K2_FCR1_FW_RESET_N		0x00010000
+#define K2_FCR1_I2S1_CELL_ENABLE	0x00020000
+#define K2_FCR1_I2S1_CLK_ENABLE_BIT	0x00080000
+#define K2_FCR1_I2S1_ENABLE		0x00100000
+#define K2_FCR1_GMAC_CLK_ENABLE		0x00400000
+#define K2_FCR1_GMAC_POWER_DOWN		0x00800000
+#define K2_FCR1_GMAC_RESET_N		0x01000000
+#define K2_FCR1_SATA_CLK_ENABLE		0x02000000
+#define K2_FCR1_SATA_POWER_DOWN		0x04000000
+#define K2_FCR1_SATA_RESET_N		0x08000000
+#define K2_FCR1_UATA_CLK_ENABLE		0x10000000
+#define K2_FCR1_UATA_RESET_N		0x40000000
+#define K2_FCR1_UATA_CHOOSE_CLK66	0x80000000
+
+/* Shasta definitions */
+#define SH_FCR1_I2S2_CELL_ENABLE	0x00000010
+#define SH_FCR1_I2S2_CLK_ENABLE_BIT	0x00000040
+#define SH_FCR1_I2S2_ENABLE		0x00000080
+#define SH_FCR3_I2S2_CLK18_ENABLE	0x00008000
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_KEYLARGO_H */
diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
new file mode 100644
index 0000000000..424ceef82a
--- /dev/null
+++ b/arch/powerpc/include/asm/kfence.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * powerpc KFENCE support.
+ *
+ * Copyright (C) 2020 CS GROUP France
+ */
+
+#ifndef __ASM_POWERPC_KFENCE_H
+#define __ASM_POWERPC_KFENCE_H
+
+#include <linux/mm.h>
+#include <asm/pgtable.h>
+
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+#define ARCH_FUNC_PREFIX "."
+#endif
+
+static inline bool arch_kfence_init_pool(void)
+{
+	return true;
+}
+
+#ifdef CONFIG_PPC64
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+	struct page *page = virt_to_page((void *)addr);
+
+	__kernel_map_pages(page, 1, !protect);
+
+	return true;
+}
+#else
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+	pte_t *kpte = virt_to_kpte(addr);
+
+	if (protect) {
+		pte_update(&init_mm, addr, kpte, _PAGE_PRESENT, 0, 0);
+		flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+	} else {
+		pte_update(&init_mm, addr, kpte, 0, _PAGE_PRESENT, 0);
+	}
+
+	return true;
+}
+#endif
+
+#endif /* __ASM_POWERPC_KFENCE_H */
diff --git a/arch/powerpc/include/asm/kgdb.h b/arch/powerpc/include/asm/kgdb.h
new file mode 100644
index 0000000000..715c18b753
--- /dev/null
+++ b/arch/powerpc/include/asm/kgdb.h
@@ -0,0 +1,67 @@
+/*
+ * The PowerPC (32/64) specific defines / externs for KGDB.  Based on
+ * the previous 32bit and 64bit specific files, which had the following
+ * copyrights:
+ *
+ * PPC64 Mods (C) 2005 Frank Rowand (frowand@mvista.com)
+ * PPC Mods (C) 2004 Tom Rini (trini@mvista.com)
+ * PPC Mods (C) 2003 John Whitney (john.whitney@timesys.com)
+ * PPC Mods (C) 1998 Michael Tesch (tesch@cs.wisc.edu)
+ *
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ * Author: Tom Rini <trini@kernel.crashing.org>
+ *
+ * 2006 (c) MontaVista Software, Inc. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#ifdef __KERNEL__
+#ifndef __POWERPC_KGDB_H__
+#define __POWERPC_KGDB_H__
+
+#ifndef __ASSEMBLY__
+
+#define BREAK_INSTR_SIZE	4
+#define BUFMAX			((NUMREGBYTES * 2) + 512)
+#define OUTBUFMAX		((NUMREGBYTES * 2) + 512)
+
+#define BREAK_INSTR		0x7d821008	/* twge r2, r2 */
+
+static inline void arch_kgdb_breakpoint(void)
+{
+	asm(stringify_in_c(.long BREAK_INSTR));
+}
+#define CACHE_FLUSH_IS_SAFE	1
+#define DBG_MAX_REG_NUM     70
+
+/* The number bytes of registers we have to save depends on a few
+ * things.  For 64bit we default to not including vector registers and
+ * vector state registers. */
+#ifdef CONFIG_PPC64
+/*
+ * 64 bit (8 byte) registers:
+ *   32 gpr, 32 fpr, nip, msr, link, ctr
+ * 32 bit (4 byte) registers:
+ *   ccr, xer, fpscr
+ */
+#define NUMREGBYTES		((68 * 8) + (3 * 4))
+#define NUMCRITREGBYTES		184
+#else /* CONFIG_PPC32 */
+/* On non-E500 family PPC32 we determine the size by picking the last
+ * register we need, but on E500 we skip sections so we list what we
+ * need to store, and add it up. */
+#ifndef CONFIG_PPC_E500
+#define MAXREG			(PT_FPSCR+1)
+#else
+/* 32 GPRs (8 bytes), nip, msr, ccr, link, ctr, xer, acc (8 bytes), spefscr*/
+#define MAXREG                 ((32*2)+6+2+1)
+#endif
+#define NUMREGBYTES		(MAXREG * sizeof(int))
+/* CR/LR, R1, R2, R13-R31 inclusive. */
+#define NUMCRITREGBYTES		(23 * sizeof(int))
+#endif /* 32/64 */
+#endif /* !(__ASSEMBLY__) */
+#endif /* !__POWERPC_KGDB_H__ */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
new file mode 100644
index 0000000000..c8e4b4fd4e
--- /dev/null
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_KPROBES_H
+#define _ASM_POWERPC_KPROBES_H
+
+#include <asm-generic/kprobes.h>
+
+#ifdef __KERNEL__
+/*
+ *  Kernel Probes (KProbes)
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ *		Probes initial implementation ( includes suggestions from
+ *		Rusty Russell).
+ * 2004-Nov	Modified for PPC64 by Ananth N Mavinakayanahalli
+ *		<ananth@in.ibm.com>
+ */
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+#include <linux/module.h>
+#include <asm/probes.h>
+#include <asm/code-patching.h>
+
+#ifdef CONFIG_KPROBES
+#define  __ARCH_WANT_KPROBES_INSN_SLOT
+
+struct pt_regs;
+struct kprobe;
+
+typedef u32 kprobe_opcode_t;
+
+extern kprobe_opcode_t optinsn_slot;
+
+/* Optinsn template address */
+extern kprobe_opcode_t optprobe_template_entry[];
+extern kprobe_opcode_t optprobe_template_op_address[];
+extern kprobe_opcode_t optprobe_template_call_handler[];
+extern kprobe_opcode_t optprobe_template_insn[];
+extern kprobe_opcode_t optprobe_template_call_emulate[];
+extern kprobe_opcode_t optprobe_template_ret[];
+extern kprobe_opcode_t optprobe_template_end[];
+
+/* Fixed instruction size for powerpc */
+#define MAX_INSN_SIZE		2
+#define MAX_OPTIMIZED_LENGTH	sizeof(kprobe_opcode_t)	/* 4 bytes */
+#define MAX_OPTINSN_SIZE	(optprobe_template_end - optprobe_template_entry)
+#define RELATIVEJUMP_SIZE	sizeof(kprobe_opcode_t)	/* 4 bytes */
+
+#define flush_insn_slot(p)	do { } while (0)
+#define kretprobe_blacklist_size 0
+
+void __kretprobe_trampoline(void);
+extern void arch_remove_kprobe(struct kprobe *p);
+
+/* Architecture specific copy of original instruction */
+struct arch_specific_insn {
+	/* copy of original instruction */
+	kprobe_opcode_t *insn;
+	/*
+	 * Set in kprobes code, initially to 0. If the instruction can be
+	 * eumulated, this is set to 1, if not, to -1.
+	 */
+	int boostable;
+};
+
+struct prev_kprobe {
+	struct kprobe *kp;
+	unsigned long status;
+	unsigned long saved_msr;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+	unsigned long kprobe_status;
+	unsigned long kprobe_saved_msr;
+	struct prev_kprobe prev_kprobe;
+};
+
+struct arch_optimized_insn {
+	kprobe_opcode_t copied_insn[1];
+	/* detour buffer */
+	kprobe_opcode_t *insn;
+};
+
+extern int kprobe_exceptions_notify(struct notifier_block *self,
+					unsigned long val, void *data);
+extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+extern int kprobe_handler(struct pt_regs *regs);
+extern int kprobe_post_handler(struct pt_regs *regs);
+#else
+static inline int kprobe_handler(struct pt_regs *regs) { return 0; }
+static inline int kprobe_post_handler(struct pt_regs *regs) { return 0; }
+#endif /* CONFIG_KPROBES */
+#endif /* __KERNEL__ */
+#endif	/* _ASM_POWERPC_KPROBES_H */
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
new file mode 100644
index 0000000000..ad7e8c5aec
--- /dev/null
+++ b/arch/powerpc/include/asm/kup.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_KUP_H_
+#define _ASM_POWERPC_KUP_H_
+
+#define KUAP_READ	1
+#define KUAP_WRITE	2
+#define KUAP_READ_WRITE	(KUAP_READ | KUAP_WRITE)
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+static __always_inline bool kuap_is_disabled(void);
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/book3s/64/kup.h>
+#endif
+
+#ifdef CONFIG_PPC_8xx
+#include <asm/nohash/32/kup-8xx.h>
+#endif
+
+#ifdef CONFIG_BOOKE_OR_40x
+#include <asm/nohash/kup-booke.h>
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+#include <asm/book3s/32/kup.h>
+#endif
+
+#ifdef __ASSEMBLY__
+#ifndef CONFIG_PPC_KUAP
+.macro kuap_check_amr	gpr1, gpr2
+.endm
+
+#endif
+
+#else /* !__ASSEMBLY__ */
+
+extern bool disable_kuep;
+extern bool disable_kuap;
+
+#include <linux/pgtable.h>
+
+void setup_kup(void);
+void setup_kuep(bool disabled);
+
+#ifdef CONFIG_PPC_KUAP
+void setup_kuap(bool disabled);
+
+static __always_inline bool kuap_is_disabled(void)
+{
+	return !mmu_has_feature(MMU_FTR_KUAP);
+}
+#else
+static inline void setup_kuap(bool disabled) { }
+
+static __always_inline bool kuap_is_disabled(void) { return true; }
+
+static __always_inline bool
+__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+	return false;
+}
+
+static __always_inline void kuap_user_restore(struct pt_regs *regs) { }
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { }
+
+/*
+ * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush
+ * the L1D cache after user accesses. Only include the empty stubs for other
+ * platforms.
+ */
+#ifndef CONFIG_PPC_BOOK3S_64
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+					      unsigned long size, unsigned long dir) { }
+static __always_inline void prevent_user_access(unsigned long dir) { }
+static __always_inline unsigned long prevent_user_access_return(void) { return 0UL; }
+static __always_inline void restore_user_access(unsigned long flags) { }
+#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_KUAP */
+
+static __always_inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+	if (kuap_is_disabled())
+		return false;
+
+	return __bad_kuap_fault(regs, address, is_write);
+}
+
+static __always_inline void kuap_lock(void)
+{
+#ifdef __kuap_lock
+	if (kuap_is_disabled())
+		return;
+
+	__kuap_lock();
+#endif
+}
+
+static __always_inline void kuap_save_and_lock(struct pt_regs *regs)
+{
+#ifdef __kuap_save_and_lock
+	if (kuap_is_disabled())
+		return;
+
+	__kuap_save_and_lock(regs);
+#endif
+}
+
+static __always_inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr)
+{
+	if (kuap_is_disabled())
+		return;
+
+	__kuap_kernel_restore(regs, amr);
+}
+
+static __always_inline unsigned long kuap_get_and_assert_locked(void)
+{
+#ifdef __kuap_get_and_assert_locked
+	if (!kuap_is_disabled())
+		return __kuap_get_and_assert_locked();
+#endif
+	return 0;
+}
+
+static __always_inline void kuap_assert_locked(void)
+{
+	if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+		kuap_get_and_assert_locked();
+}
+
+static __always_inline void allow_read_from_user(const void __user *from, unsigned long size)
+{
+	barrier_nospec();
+	allow_user_access(NULL, from, size, KUAP_READ);
+}
+
+static __always_inline void allow_write_to_user(void __user *to, unsigned long size)
+{
+	allow_user_access(to, NULL, size, KUAP_WRITE);
+}
+
+static __always_inline void allow_read_write_user(void __user *to, const void __user *from,
+						  unsigned long size)
+{
+	barrier_nospec();
+	allow_user_access(to, from, size, KUAP_READ_WRITE);
+}
+
+static __always_inline void prevent_read_from_user(const void __user *from, unsigned long size)
+{
+	prevent_user_access(KUAP_READ);
+}
+
+static __always_inline void prevent_write_to_user(void __user *to, unsigned long size)
+{
+	prevent_user_access(KUAP_WRITE);
+}
+
+static __always_inline void prevent_read_write_user(void __user *to, const void __user *from,
+						    unsigned long size)
+{
+	prevent_user_access(KUAP_READ_WRITE);
+}
+
+static __always_inline void prevent_current_access_user(void)
+{
+	prevent_user_access(KUAP_READ_WRITE);
+}
+
+static __always_inline void prevent_current_read_from_user(void)
+{
+	prevent_user_access(KUAP_READ);
+}
+
+static __always_inline void prevent_current_write_to_user(void)
+{
+	prevent_user_access(KUAP_WRITE);
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_KUAP_H_ */
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
new file mode 100644
index 0000000000..d68d71987d
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __POWERPC_KVM_ASM_H__
+#define __POWERPC_KVM_ASM_H__
+
+#ifdef __ASSEMBLY__
+#ifdef CONFIG_64BIT
+#define PPC_STD(sreg, offset, areg)  std sreg, (offset)(areg)
+#define PPC_LD(treg, offset, areg)   ld treg, (offset)(areg)
+#else
+#define PPC_STD(sreg, offset, areg)  stw sreg, (offset+4)(areg)
+#define PPC_LD(treg, offset, areg)   lwz treg, (offset+4)(areg)
+#endif
+#endif
+
+/* IVPR must be 64KiB-aligned. */
+#define VCPU_SIZE_ORDER 4
+#define VCPU_SIZE_LOG   (VCPU_SIZE_ORDER + 12)
+#define VCPU_SIZE_BYTES (1<<VCPU_SIZE_LOG)
+
+#define BOOKE_INTERRUPT_CRITICAL 0
+#define BOOKE_INTERRUPT_MACHINE_CHECK 1
+#define BOOKE_INTERRUPT_DATA_STORAGE 2
+#define BOOKE_INTERRUPT_INST_STORAGE 3
+#define BOOKE_INTERRUPT_EXTERNAL 4
+#define BOOKE_INTERRUPT_ALIGNMENT 5
+#define BOOKE_INTERRUPT_PROGRAM 6
+#define BOOKE_INTERRUPT_FP_UNAVAIL 7
+#define BOOKE_INTERRUPT_SYSCALL 8
+#define BOOKE_INTERRUPT_AP_UNAVAIL 9
+#define BOOKE_INTERRUPT_DECREMENTER 10
+#define BOOKE_INTERRUPT_FIT 11
+#define BOOKE_INTERRUPT_WATCHDOG 12
+#define BOOKE_INTERRUPT_DTLB_MISS 13
+#define BOOKE_INTERRUPT_ITLB_MISS 14
+#define BOOKE_INTERRUPT_DEBUG 15
+
+/* E500 */
+#ifdef CONFIG_SPE_POSSIBLE
+#define BOOKE_INTERRUPT_SPE_UNAVAIL 32
+#define BOOKE_INTERRUPT_SPE_FP_DATA 33
+#define BOOKE_INTERRUPT_SPE_FP_ROUND 34
+#endif
+
+#ifdef CONFIG_PPC_E500MC
+#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL 32
+#define BOOKE_INTERRUPT_ALTIVEC_ASSIST 33
+#endif
+
+#define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35
+#define BOOKE_INTERRUPT_DOORBELL 36
+#define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37
+
+/* booke_hv */
+#define BOOKE_INTERRUPT_GUEST_DBELL 38
+#define BOOKE_INTERRUPT_GUEST_DBELL_CRIT 39
+#define BOOKE_INTERRUPT_HV_SYSCALL 40
+#define BOOKE_INTERRUPT_HV_PRIV 41
+#define BOOKE_INTERRUPT_LRAT_ERROR 42
+
+/* book3s */
+
+#define BOOK3S_INTERRUPT_SYSTEM_RESET	0x100
+#define BOOK3S_INTERRUPT_MACHINE_CHECK	0x200
+#define BOOK3S_INTERRUPT_DATA_STORAGE	0x300
+#define BOOK3S_INTERRUPT_DATA_SEGMENT	0x380
+#define BOOK3S_INTERRUPT_INST_STORAGE	0x400
+#define BOOK3S_INTERRUPT_INST_SEGMENT	0x480
+#define BOOK3S_INTERRUPT_EXTERNAL	0x500
+#define BOOK3S_INTERRUPT_EXTERNAL_HV	0x502
+#define BOOK3S_INTERRUPT_ALIGNMENT	0x600
+#define BOOK3S_INTERRUPT_PROGRAM	0x700
+#define BOOK3S_INTERRUPT_FP_UNAVAIL	0x800
+#define BOOK3S_INTERRUPT_DECREMENTER	0x900
+#define BOOK3S_INTERRUPT_HV_DECREMENTER	0x980
+#define BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER	0x1980
+#define BOOK3S_INTERRUPT_DOORBELL	0xa00
+#define BOOK3S_INTERRUPT_SYSCALL	0xc00
+#define BOOK3S_INTERRUPT_TRACE		0xd00
+#define BOOK3S_INTERRUPT_H_DATA_STORAGE	0xe00
+#define BOOK3S_INTERRUPT_H_INST_STORAGE	0xe20
+#define BOOK3S_INTERRUPT_H_EMUL_ASSIST	0xe40
+#define BOOK3S_INTERRUPT_HMI		0xe60
+#define BOOK3S_INTERRUPT_H_DOORBELL	0xe80
+#define BOOK3S_INTERRUPT_H_VIRT		0xea0
+#define BOOK3S_INTERRUPT_PERFMON	0xf00
+#define BOOK3S_INTERRUPT_ALTIVEC	0xf20
+#define BOOK3S_INTERRUPT_VSX		0xf40
+#define BOOK3S_INTERRUPT_FAC_UNAVAIL	0xf60
+#define BOOK3S_INTERRUPT_H_FAC_UNAVAIL	0xf80
+
+/* book3s_hv */
+
+#define BOOK3S_INTERRUPT_HV_SOFTPATCH	0x1500
+
+/*
+ * Special trap used to indicate to host that this is a
+ * passthrough interrupt that could not be handled
+ * completely in the guest.
+ */
+#define BOOK3S_INTERRUPT_HV_RM_HARD	0x5555
+
+#define BOOK3S_IRQPRIO_SYSTEM_RESET		0
+#define BOOK3S_IRQPRIO_DATA_SEGMENT		1
+#define BOOK3S_IRQPRIO_INST_SEGMENT		2
+#define BOOK3S_IRQPRIO_DATA_STORAGE		3
+#define BOOK3S_IRQPRIO_INST_STORAGE		4
+#define BOOK3S_IRQPRIO_ALIGNMENT		5
+#define BOOK3S_IRQPRIO_PROGRAM			6
+#define BOOK3S_IRQPRIO_FP_UNAVAIL		7
+#define BOOK3S_IRQPRIO_ALTIVEC			8
+#define BOOK3S_IRQPRIO_VSX			9
+#define BOOK3S_IRQPRIO_FAC_UNAVAIL		10
+#define BOOK3S_IRQPRIO_SYSCALL			11
+#define BOOK3S_IRQPRIO_MACHINE_CHECK		12
+#define BOOK3S_IRQPRIO_DEBUG			13
+#define BOOK3S_IRQPRIO_EXTERNAL			14
+#define BOOK3S_IRQPRIO_DECREMENTER		15
+#define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR	16
+#define BOOK3S_IRQPRIO_MAX			17
+
+#define BOOK3S_HFLAG_DCBZ32			0x1
+#define BOOK3S_HFLAG_SLB			0x2
+#define BOOK3S_HFLAG_PAIRED_SINGLE		0x4
+#define BOOK3S_HFLAG_NATIVE_PS			0x8
+#define BOOK3S_HFLAG_MULTI_PGSIZE		0x10
+#define BOOK3S_HFLAG_NEW_TLBIE			0x20
+#define BOOK3S_HFLAG_SPLIT_HACK			0x40
+
+#define RESUME_FLAG_NV          (1<<0)  /* Reload guest nonvolatile state? */
+#define RESUME_FLAG_HOST        (1<<1)  /* Resume host? */
+#define RESUME_FLAG_ARCH1	(1<<2)
+#define RESUME_FLAG_ARCH2	(1<<3)
+
+#define RESUME_GUEST            0
+#define RESUME_GUEST_NV         RESUME_FLAG_NV
+#define RESUME_HOST             RESUME_FLAG_HOST
+#define RESUME_HOST_NV          (RESUME_FLAG_HOST|RESUME_FLAG_NV)
+
+#define KVM_GUEST_MODE_NONE	0
+#define KVM_GUEST_MODE_GUEST	1
+#define KVM_GUEST_MODE_SKIP	2
+#define KVM_GUEST_MODE_GUEST_HV	3
+#define KVM_GUEST_MODE_HOST_HV	4
+#define KVM_GUEST_MODE_HV_P9	5 /* ISA >= v3.0 path */
+
+#define KVM_INST_FETCH_FAILED	-1
+
+/* Extract PO and XOP opcode fields */
+#define PO_XOP_OPCODE_MASK 0xfc0007fe
+
+#endif /* __POWERPC_KVM_ASM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
new file mode 100644
index 0000000000..bbf5e2c5fe
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -0,0 +1,486 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#ifndef __ASM_KVM_BOOK3S_H__
+#define __ASM_KVM_BOOK3S_H__
+
+#include <linux/types.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_book3s_asm.h>
+
+struct kvmppc_bat {
+	u64 raw;
+	u32 bepi;
+	u32 bepi_mask;
+	u32 brpn;
+	u8 wimg;
+	u8 pp;
+	bool vs		: 1;
+	bool vp		: 1;
+};
+
+struct kvmppc_sid_map {
+	u64 guest_vsid;
+	u64 guest_esid;
+	u64 host_vsid;
+	bool valid	: 1;
+};
+
+#define SID_MAP_BITS    9
+#define SID_MAP_NUM     (1 << SID_MAP_BITS)
+#define SID_MAP_MASK    (SID_MAP_NUM - 1)
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#define SID_CONTEXTS	1
+#else
+#define SID_CONTEXTS	128
+#define VSID_POOL_SIZE	(SID_CONTEXTS * 16)
+#endif
+
+struct hpte_cache {
+	struct hlist_node list_pte;
+	struct hlist_node list_pte_long;
+	struct hlist_node list_vpte;
+	struct hlist_node list_vpte_long;
+#ifdef CONFIG_PPC_BOOK3S_64
+	struct hlist_node list_vpte_64k;
+#endif
+	struct rcu_head rcu_head;
+	u64 host_vpn;
+	u64 pfn;
+	ulong slot;
+	struct kvmppc_pte pte;
+	int pagesize;
+};
+
+/*
+ * Struct for a virtual core.
+ * Note: entry_exit_map combines a bitmap of threads that have entered
+ * in the bottom 8 bits and a bitmap of threads that have exited in the
+ * next 8 bits.  This is so that we can atomically set the entry bit
+ * iff the exit map is 0 without taking a lock.
+ */
+struct kvmppc_vcore {
+	int n_runnable;
+	int num_threads;
+	int entry_exit_map;
+	int napping_threads;
+	int first_vcpuid;
+	u16 pcpu;
+	u16 last_cpu;
+	u8 vcore_state;
+	u8 in_guest;
+	struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
+	struct list_head preempt_list;
+	spinlock_t lock;
+	struct rcuwait wait;
+	spinlock_t stoltb_lock;	/* protects stolen_tb and preempt_tb */
+	u64 stolen_tb;
+	u64 preempt_tb;
+	struct kvm_vcpu *runner;
+	struct kvm *kvm;
+	u64 tb_offset;		/* guest timebase - host timebase */
+	u64 tb_offset_applied;	/* timebase offset currently in force */
+	ulong lpcr;
+	u32 arch_compat;
+	ulong pcr;
+	ulong dpdes;		/* doorbell state (POWER8) */
+	ulong vtb;		/* virtual timebase */
+	ulong conferring_threads;
+	unsigned int halt_poll_ns;
+	atomic_t online_count;
+};
+
+struct kvmppc_vcpu_book3s {
+	struct kvmppc_sid_map sid_map[SID_MAP_NUM];
+	struct {
+		u64 esid;
+		u64 vsid;
+	} slb_shadow[64];
+	u8 slb_shadow_max;
+	struct kvmppc_bat ibat[8];
+	struct kvmppc_bat dbat[8];
+	u64 hid[6];
+	u64 gqr[8];
+	u64 sdr1;
+	u64 hior;
+	u64 msr_mask;
+	u64 vtb;
+#ifdef CONFIG_PPC_BOOK3S_32
+	u32 vsid_pool[VSID_POOL_SIZE];
+	u32 vsid_next;
+#else
+	u64 proto_vsid_first;
+	u64 proto_vsid_max;
+	u64 proto_vsid_next;
+#endif
+	int context_id[SID_CONTEXTS];
+
+	bool hior_explicit;		/* HIOR is set by ioctl, not PVR */
+
+	struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE];
+	struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG];
+	struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE];
+	struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG];
+#ifdef CONFIG_PPC_BOOK3S_64
+	struct hlist_head hpte_hash_vpte_64k[HPTEG_HASH_NUM_VPTE_64K];
+#endif
+	int hpte_cache_count;
+	spinlock_t mmu_lock;
+};
+
+#define VSID_REAL	0x07ffffffffc00000ULL
+#define VSID_BAT	0x07ffffffffb00000ULL
+#define VSID_64K	0x0800000000000000ULL
+#define VSID_1T		0x1000000000000000ULL
+#define VSID_REAL_DR	0x2000000000000000ULL
+#define VSID_REAL_IR	0x4000000000000000ULL
+#define VSID_PR		0x8000000000000000ULL
+
+extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong ea, ulong ea_mask);
+extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask);
+extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end);
+extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr);
+extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu);
+extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte,
+			       bool iswrite);
+extern void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
+extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
+extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size);
+extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
+extern int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
+			unsigned long addr, unsigned long status);
+extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr,
+			unsigned long slb_v, unsigned long valid);
+extern int kvmppc_hv_emulate_mmio(struct kvm_vcpu *vcpu,
+			unsigned long gpa, gva_t ea, int is_store);
+
+extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
+extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte);
+extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu);
+extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
+extern int kvmppc_mmu_hpte_sysinit(void);
+extern void kvmppc_mmu_hpte_sysexit(void);
+extern int kvmppc_mmu_hv_init(void);
+extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
+
+extern int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu,
+			unsigned long ea, unsigned long dsisr);
+extern unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
+					gva_t eaddr, void *to, void *from,
+					unsigned long n);
+extern long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
+					void *to, unsigned long n);
+extern long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
+				      void *from, unsigned long n);
+extern int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
+				      struct kvmppc_pte *gpte, u64 root,
+				      u64 *pte_ret_p);
+extern int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
+			struct kvmppc_pte *gpte, u64 table,
+			int table_index, u64 *pte_ret_p);
+extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
+			struct kvmppc_pte *gpte, bool data, bool iswrite);
+extern void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
+				    unsigned int pshift, unsigned int lpid);
+extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
+			unsigned int shift,
+			const struct kvm_memory_slot *memslot,
+			unsigned int lpid);
+extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested,
+				    bool writing, unsigned long gpa,
+				    unsigned int lpid);
+extern int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
+				unsigned long gpa,
+				struct kvm_memory_slot *memslot,
+				bool writing, bool kvm_ro,
+				pte_t *inserted_pte, unsigned int *levelp);
+extern int kvmppc_init_vm_radix(struct kvm *kvm);
+extern void kvmppc_free_radix(struct kvm *kvm);
+extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd,
+				      unsigned int lpid);
+extern int kvmppc_radix_init(void);
+extern void kvmppc_radix_exit(void);
+extern void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+			    unsigned long gfn);
+extern bool kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+			  unsigned long gfn);
+extern bool kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+			       unsigned long gfn);
+extern long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
+			struct kvm_memory_slot *memslot, unsigned long *map);
+extern void kvmppc_radix_flush_memslot(struct kvm *kvm,
+			const struct kvm_memory_slot *memslot);
+extern int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
+
+/* XXX remove this export when load_last_inst() is generic */
+extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
+extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
+extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
+					  unsigned int vec);
+extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags);
+extern void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac);
+extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
+			   bool upper, u32 val);
+extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
+extern int kvmppc_emulate_paired_single(struct kvm_vcpu *vcpu);
+extern kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa,
+			bool writing, bool *writable);
+extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
+			unsigned long *rmap, long pte_index, int realmode);
+extern void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot,
+			unsigned long gfn, unsigned long psize);
+extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
+			unsigned long pte_index);
+void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
+			unsigned long pte_index);
+extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
+			unsigned long *nb_ret);
+extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
+			unsigned long gpa, bool dirty);
+extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
+			long pte_index, unsigned long pteh, unsigned long ptel,
+			pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
+extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
+			unsigned long pte_index, unsigned long avpn,
+			unsigned long *hpret);
+extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
+			struct kvm_memory_slot *memslot, unsigned long *map);
+extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
+			struct kvm_memory_slot *memslot,
+			unsigned long *map);
+extern unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm,
+			unsigned long lpcr);
+extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
+			unsigned long mask);
+extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);
+
+extern int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu);
+extern int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu);
+extern void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu);
+
+extern void kvmppc_entry_trampoline(void);
+extern void kvmppc_hv_entry_trampoline(void);
+extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
+extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst);
+extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
+extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm);
+extern int kvmppc_hcall_impl_pr(unsigned long cmd);
+extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd);
+extern void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu);
+
+long kvmppc_read_intr(void);
+void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr);
+void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu);
+void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu);
+void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu);
+void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu);
+#else
+static inline void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {}
+#endif
+
+long kvmhv_nested_init(void);
+void kvmhv_nested_exit(void);
+void kvmhv_vm_nested_init(struct kvm *kvm);
+long kvmhv_set_partition_table(struct kvm_vcpu *vcpu);
+long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu);
+void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
+void kvmhv_release_all_nested(struct kvm *kvm);
+long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
+long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
+long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
+			     unsigned long type, unsigned long pg_sizes,
+			     unsigned long start, unsigned long end);
+int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
+			  u64 time_limit, unsigned long lpcr);
+void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
+void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
+				   struct hv_guest_state *hr);
+long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu);
+
+void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
+
+extern int kvm_irq_bypass;
+
+static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.book3s;
+}
+
+/* Also add subarch specific defines */
+
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+#include <asm/kvm_book3s_32.h>
+#endif
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+#include <asm/kvm_book3s_64.h>
+#endif
+
+static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
+{
+	vcpu->arch.regs.gpr[num] = val;
+}
+
+static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
+{
+	return vcpu->arch.regs.gpr[num];
+}
+
+static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
+{
+	vcpu->arch.regs.ccr = val;
+}
+
+static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.regs.ccr;
+}
+
+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
+{
+	vcpu->arch.regs.xer = val;
+}
+
+static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.regs.xer;
+}
+
+static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
+{
+	vcpu->arch.regs.ctr = val;
+}
+
+static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.regs.ctr;
+}
+
+static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
+{
+	vcpu->arch.regs.link = val;
+}
+
+static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.regs.link;
+}
+
+static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
+{
+	vcpu->arch.regs.nip = val;
+}
+
+static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.regs.nip;
+}
+
+static inline u64 kvmppc_get_msr(struct kvm_vcpu *vcpu);
+static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
+{
+	return (kvmppc_get_msr(vcpu) & MSR_LE) != (MSR_KERNEL & MSR_LE);
+}
+
+static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.fault_dar;
+}
+
+/* Expiry time of vcpu DEC relative to host TB */
+static inline u64 kvmppc_dec_expires_host_tb(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.dec_expires - vcpu->arch.vcore->tb_offset;
+}
+
+static inline bool is_kvmppc_resume_guest(int r)
+{
+	return (r == RESUME_GUEST || r == RESUME_GUEST_NV);
+}
+
+static inline bool is_kvmppc_hv_enabled(struct kvm *kvm);
+static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu)
+{
+	/* Only PR KVM supports the magic page */
+	return !is_kvmppc_hv_enabled(vcpu->kvm);
+}
+
+extern int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu);
+extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
+
+/* Magic register values loaded into r3 and r4 before the 'sc' assembly
+ * instruction for the OSI hypercalls */
+#define OSI_SC_MAGIC_R3			0x113724FA
+#define OSI_SC_MAGIC_R4			0x77810F9B
+
+#define INS_DCBZ			0x7c0007ec
+/* TO = 31 for unconditional trap */
+#define INS_TW				0x7fe00008
+
+#define SPLIT_HACK_MASK			0xff000000
+#define SPLIT_HACK_OFFS			0xfb000000
+
+/*
+ * This packs a VCPU ID from the [0..KVM_MAX_VCPU_IDS) space down to the
+ * [0..KVM_MAX_VCPUS) space, using knowledge of the guest's core stride
+ * (but not its actual threading mode, which is not available) to avoid
+ * collisions.
+ *
+ * The implementation leaves VCPU IDs from the range [0..KVM_MAX_VCPUS) (block
+ * 0) unchanged: if the guest is filling each VCORE completely then it will be
+ * using consecutive IDs and it will fill the space without any packing.
+ *
+ * For higher VCPU IDs, the packed ID is based on the VCPU ID modulo
+ * KVM_MAX_VCPUS (effectively masking off the top bits) and then an offset is
+ * added to avoid collisions.
+ *
+ * VCPU IDs in the range [KVM_MAX_VCPUS..(KVM_MAX_VCPUS*2)) (block 1) are only
+ * possible if the guest is leaving at least 1/2 of each VCORE empty, so IDs
+ * can be safely packed into the second half of each VCORE by adding an offset
+ * of (stride / 2).
+ *
+ * Similarly, if VCPU IDs in the range [(KVM_MAX_VCPUS*2)..(KVM_MAX_VCPUS*4))
+ * (blocks 2 and 3) are seen, the guest must be leaving at least 3/4 of each
+ * VCORE empty so packed IDs can be offset by (stride / 4) and (stride * 3 / 4).
+ *
+ * Finally, VCPU IDs from blocks 5..7 will only be seen if the guest is using a
+ * stride of 8 and 1 thread per core so the remaining offsets of 1, 5, 3 and 7
+ * must be free to use.
+ *
+ * (The offsets for each block are stored in block_offsets[], indexed by the
+ * block number if the stride is 8. For cases where the guest's stride is less
+ * than 8, we can re-use the block_offsets array by multiplying the block
+ * number by (MAX_SMT_THREADS / stride) to reach the correct entry.)
+ */
+static inline u32 kvmppc_pack_vcpu_id(struct kvm *kvm, u32 id)
+{
+	const int block_offsets[MAX_SMT_THREADS] = {0, 4, 2, 6, 1, 5, 3, 7};
+	int stride = kvm->arch.emul_smt_mode;
+	int block = (id / KVM_MAX_VCPUS) * (MAX_SMT_THREADS / stride);
+	u32 packed_id;
+
+	if (WARN_ONCE(block >= MAX_SMT_THREADS, "VCPU ID too large to pack"))
+		return 0;
+	packed_id = (id % KVM_MAX_VCPUS) + block_offsets[block];
+	if (WARN_ONCE(packed_id >= KVM_MAX_VCPUS, "VCPU ID packing failed"))
+		return 0;
+	return packed_id;
+}
+
+#endif /* __ASM_KVM_BOOK3S_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_32.h b/arch/powerpc/include/asm/kvm_book3s_32.h
new file mode 100644
index 0000000000..e9d2e84631
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_book3s_32.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright SUSE Linux Products GmbH 2010
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#ifndef __ASM_KVM_BOOK3S_32_H__
+#define __ASM_KVM_BOOK3S_32_H__
+
+static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.shadow_vcpu;
+}
+
+static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
+{
+}
+
+#define PTE_SIZE	12
+#define VSID_ALL	0
+#define SR_INVALID	0x00000001	/* VSID 1 should always be unused */
+#define SR_KP		0x20000000
+#define PTE_V		0x80000000
+#define PTE_SEC		0x00000040
+#define PTE_M		0x00000010
+#define PTE_R		0x00000100
+#define PTE_C		0x00000080
+
+#define SID_SHIFT	28
+#define ESID_MASK	0xf0000000
+#define VSID_MASK	0x00fffffff0000000ULL
+#define VPN_SHIFT	12
+
+#endif /* __ASM_KVM_BOOK3S_32_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
new file mode 100644
index 0000000000..d49065af08
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -0,0 +1,682 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright SUSE Linux Products GmbH 2010
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#ifndef __ASM_KVM_BOOK3S_64_H__
+#define __ASM_KVM_BOOK3S_64_H__
+
+#include <linux/string.h>
+#include <asm/bitops.h>
+#include <asm/book3s/64/mmu-hash.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/ppc-opcode.h>
+#include <asm/pte-walk.h>
+
+/*
+ * Structure for a nested guest, that is, for a guest that is managed by
+ * one of our guests.
+ */
+struct kvm_nested_guest {
+	struct kvm *l1_host;		/* L1 VM that owns this nested guest */
+	int l1_lpid;			/* lpid L1 guest thinks this guest is */
+	int shadow_lpid;		/* real lpid of this nested guest */
+	pgd_t *shadow_pgtable;		/* our page table for this guest */
+	u64 l1_gr_to_hr;		/* L1's addr of part'n-scoped table */
+	u64 process_table;		/* process table entry for this guest */
+	long refcnt;			/* number of pointers to this struct */
+	struct mutex tlb_lock;		/* serialize page faults and tlbies */
+	struct kvm_nested_guest *next;
+	cpumask_t need_tlb_flush;
+	short prev_cpu[NR_CPUS];
+	u8 radix;			/* is this nested guest radix */
+};
+
+/*
+ * We define a nested rmap entry as a single 64-bit quantity
+ * 0xFFF0000000000000	12-bit lpid field
+ * 0x000FFFFFFFFFF000	40-bit guest 4k page frame number
+ * 0x0000000000000001	1-bit  single entry flag
+ */
+#define RMAP_NESTED_LPID_MASK		0xFFF0000000000000UL
+#define RMAP_NESTED_LPID_SHIFT		(52)
+#define RMAP_NESTED_GPA_MASK		0x000FFFFFFFFFF000UL
+#define RMAP_NESTED_IS_SINGLE_ENTRY	0x0000000000000001UL
+
+/* Structure for a nested guest rmap entry */
+struct rmap_nested {
+	struct llist_node list;
+	u64 rmap;
+};
+
+/*
+ * for_each_nest_rmap_safe - iterate over the list of nested rmap entries
+ *			     safe against removal of the list entry or NULL list
+ * @pos:	a (struct rmap_nested *) to use as a loop cursor
+ * @node:	pointer to the first entry
+ *		NOTE: this can be NULL
+ * @rmapp:	an (unsigned long *) in which to return the rmap entries on each
+ *		iteration
+ *		NOTE: this must point to already allocated memory
+ *
+ * The nested_rmap is a llist of (struct rmap_nested) entries pointed to by the
+ * rmap entry in the memslot. The list is always terminated by a "single entry"
+ * stored in the list element of the final entry of the llist. If there is ONLY
+ * a single entry then this is itself in the rmap entry of the memslot, not a
+ * llist head pointer.
+ *
+ * Note that the iterator below assumes that a nested rmap entry is always
+ * non-zero.  This is true for our usage because the LPID field is always
+ * non-zero (zero is reserved for the host).
+ *
+ * This should be used to iterate over the list of rmap_nested entries with
+ * processing done on the u64 rmap value given by each iteration. This is safe
+ * against removal of list entries and it is always safe to call free on (pos).
+ *
+ * e.g.
+ * struct rmap_nested *cursor;
+ * struct llist_node *first;
+ * unsigned long rmap;
+ * for_each_nest_rmap_safe(cursor, first, &rmap) {
+ *	do_something(rmap);
+ *	free(cursor);
+ * }
+ */
+#define for_each_nest_rmap_safe(pos, node, rmapp)			       \
+	for ((pos) = llist_entry((node), typeof(*(pos)), list);		       \
+	     (node) &&							       \
+	     (*(rmapp) = ((RMAP_NESTED_IS_SINGLE_ENTRY & ((u64) (node))) ?     \
+			  ((u64) (node)) : ((pos)->rmap))) &&		       \
+	     (((node) = ((RMAP_NESTED_IS_SINGLE_ENTRY & ((u64) (node))) ?      \
+			 ((struct llist_node *) ((pos) = NULL)) :	       \
+			 (pos)->list.next)), true);			       \
+	     (pos) = llist_entry((node), typeof(*(pos)), list))
+
+struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
+					  bool create);
+void kvmhv_put_nested(struct kvm_nested_guest *gp);
+int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid);
+
+/* Encoding of first parameter for H_TLB_INVALIDATE */
+#define H_TLBIE_P1_ENC(ric, prs, r)	(___PPC_RIC(ric) | ___PPC_PRS(prs) | \
+					 ___PPC_R(r))
+
+/* Power architecture requires HPT is at least 256kiB, at most 64TiB */
+#define PPC_MIN_HPT_ORDER	18
+#define PPC_MAX_HPT_ORDER	46
+
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
+{
+	preempt_disable();
+	return &get_paca()->shadow_vcpu;
+}
+
+static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
+{
+	preempt_enable();
+}
+#endif
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+
+static inline bool kvm_is_radix(struct kvm *kvm)
+{
+	return kvm->arch.radix;
+}
+
+static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
+{
+	bool radix;
+
+	if (vcpu->arch.nested)
+		radix = vcpu->arch.nested->radix;
+	else
+		radix = kvm_is_radix(vcpu->kvm);
+
+	return radix;
+}
+
+unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr);
+
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb);
+
+#define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
+#endif
+
+/*
+ * Invalid HDSISR value which is used to indicate when HW has not set the reg.
+ * Used to work around an errata.
+ */
+#define HDSISR_CANARY	0x7fff
+
+/*
+ * We use a lock bit in HPTE dword 0 to synchronize updates and
+ * accesses to each HPTE, and another bit to indicate non-present
+ * HPTEs.
+ */
+#define HPTE_V_HVLOCK	0x40UL
+#define HPTE_V_ABSENT	0x20UL
+
+/*
+ * We use this bit in the guest_rpte field of the revmap entry
+ * to indicate a modified HPTE.
+ */
+#define HPTE_GR_MODIFIED	(1ul << 62)
+
+/* These bits are reserved in the guest view of the HPTE */
+#define HPTE_GR_RESERVED	HPTE_GR_MODIFIED
+
+static inline long try_lock_hpte(__be64 *hpte, unsigned long bits)
+{
+	unsigned long tmp, old;
+	__be64 be_lockbit, be_bits;
+
+	/*
+	 * We load/store in native endian, but the HTAB is in big endian. If
+	 * we byte swap all data we apply on the PTE we're implicitly correct
+	 * again.
+	 */
+	be_lockbit = cpu_to_be64(HPTE_V_HVLOCK);
+	be_bits = cpu_to_be64(bits);
+
+	asm volatile("	ldarx	%0,0,%2\n"
+		     "	and.	%1,%0,%3\n"
+		     "	bne	2f\n"
+		     "	or	%0,%0,%4\n"
+		     "  stdcx.	%0,0,%2\n"
+		     "	beq+	2f\n"
+		     "	mr	%1,%3\n"
+		     "2:	isync"
+		     : "=&r" (tmp), "=&r" (old)
+		     : "r" (hpte), "r" (be_bits), "r" (be_lockbit)
+		     : "cc", "memory");
+	return old == 0;
+}
+
+static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v)
+{
+	hpte_v &= ~HPTE_V_HVLOCK;
+	asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
+	hpte[0] = cpu_to_be64(hpte_v);
+}
+
+/* Without barrier */
+static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v)
+{
+	hpte_v &= ~HPTE_V_HVLOCK;
+	hpte[0] = cpu_to_be64(hpte_v);
+}
+
+/*
+ * These functions encode knowledge of the POWER7/8/9 hardware
+ * interpretations of the HPTE LP (large page size) field.
+ */
+static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
+{
+	unsigned int lphi;
+
+	if (!(h & HPTE_V_LARGE))
+		return 12;	/* 4kB */
+	lphi = (l >> 16) & 0xf;
+	switch ((l >> 12) & 0xf) {
+	case 0:
+		return !lphi ? 24 : 0;		/* 16MB */
+		break;
+	case 1:
+		return 16;			/* 64kB */
+		break;
+	case 3:
+		return !lphi ? 34 : 0;		/* 16GB */
+		break;
+	case 7:
+		return (16 << 8) + 12;		/* 64kB in 4kB */
+		break;
+	case 8:
+		if (!lphi)
+			return (24 << 8) + 16;	/* 16MB in 64kkB */
+		if (lphi == 3)
+			return (24 << 8) + 12;	/* 16MB in 4kB */
+		break;
+	}
+	return 0;
+}
+
+static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l)
+{
+	return kvmppc_hpte_page_shifts(h, l) & 0xff;
+}
+
+static inline int kvmppc_hpte_actual_page_shift(unsigned long h, unsigned long l)
+{
+	int tmp = kvmppc_hpte_page_shifts(h, l);
+
+	if (tmp >= 0x100)
+		tmp >>= 8;
+	return tmp;
+}
+
+static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r)
+{
+	int shift = kvmppc_hpte_actual_page_shift(v, r);
+
+	if (shift)
+		return 1ul << shift;
+	return 0;
+}
+
+static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift)
+{
+	switch (base_shift) {
+	case 12:
+		switch (actual_shift) {
+		case 12:
+			return 0;
+		case 16:
+			return 7;
+		case 24:
+			return 0x38;
+		}
+		break;
+	case 16:
+		switch (actual_shift) {
+		case 16:
+			return 1;
+		case 24:
+			return 8;
+		}
+		break;
+	case 24:
+		return 0;
+	}
+	return -1;
+}
+
+static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
+					     unsigned long pte_index)
+{
+	int a_pgshift, b_pgshift;
+	unsigned long rb = 0, va_low, sllp;
+
+	b_pgshift = a_pgshift = kvmppc_hpte_page_shifts(v, r);
+	if (a_pgshift >= 0x100) {
+		b_pgshift &= 0xff;
+		a_pgshift >>= 8;
+	}
+
+	/*
+	 * Ignore the top 14 bits of va
+	 * v have top two bits covering segment size, hence move
+	 * by 16 bits, Also clear the lower HPTE_V_AVPN_SHIFT (7) bits.
+	 * AVA field in v also have the lower 23 bits ignored.
+	 * For base page size 4K we need 14 .. 65 bits (so need to
+	 * collect extra 11 bits)
+	 * For others we need 14..14+i
+	 */
+	/* This covers 14..54 bits of va*/
+	rb = (v & ~0x7fUL) << 16;		/* AVA field */
+
+	/*
+	 * AVA in v had cleared lower 23 bits. We need to derive
+	 * that from pteg index
+	 */
+	va_low = pte_index >> 3;
+	if (v & HPTE_V_SECONDARY)
+		va_low = ~va_low;
+	/*
+	 * get the vpn bits from va_low using reverse of hashing.
+	 * In v we have va with 23 bits dropped and then left shifted
+	 * HPTE_V_AVPN_SHIFT (7) bits. Now to find vsid we need
+	 * right shift it with (SID_SHIFT - (23 - 7))
+	 */
+	if (!(v & HPTE_V_1TB_SEG))
+		va_low ^= v >> (SID_SHIFT - 16);
+	else
+		va_low ^= v >> (SID_SHIFT_1T - 16);
+	va_low &= 0x7ff;
+
+	if (b_pgshift <= 12) {
+		if (a_pgshift > 12) {
+			sllp = (a_pgshift == 16) ? 5 : 4;
+			rb |= sllp << 5;	/*  AP field */
+		}
+		rb |= (va_low & 0x7ff) << 12;	/* remaining 11 bits of AVA */
+	} else {
+		int aval_shift;
+		/*
+		 * remaining bits of AVA/LP fields
+		 * Also contain the rr bits of LP
+		 */
+		rb |= (va_low << b_pgshift) & 0x7ff000;
+		/*
+		 * Now clear not needed LP bits based on actual psize
+		 */
+		rb &= ~((1ul << a_pgshift) - 1);
+		/*
+		 * AVAL field 58..77 - base_page_shift bits of va
+		 * we have space for 58..64 bits, Missing bits should
+		 * be zero filled. +1 is to take care of L bit shift
+		 */
+		aval_shift = 64 - (77 - b_pgshift) + 1;
+		rb |= ((va_low << aval_shift) & 0xfe);
+
+		rb |= 1;		/* L field */
+		rb |= r & 0xff000 & ((1ul << a_pgshift) - 1); /* LP field */
+	}
+	/*
+	 * This sets both bits of the B field in the PTE. 0b1x values are
+	 * reserved, but those will have been filtered by kvmppc_do_h_enter.
+	 */
+	rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8;	/* B field */
+	return rb;
+}
+
+static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
+{
+	return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
+}
+
+static inline int hpte_is_writable(unsigned long ptel)
+{
+	unsigned long pp = ptel & (HPTE_R_PP0 | HPTE_R_PP);
+
+	return pp != PP_RXRX && pp != PP_RXXX;
+}
+
+static inline unsigned long hpte_make_readonly(unsigned long ptel)
+{
+	if ((ptel & HPTE_R_PP0) || (ptel & HPTE_R_PP) == PP_RWXX)
+		ptel = (ptel & ~HPTE_R_PP) | PP_RXXX;
+	else
+		ptel |= PP_RXRX;
+	return ptel;
+}
+
+static inline bool hpte_cache_flags_ok(unsigned long hptel, bool is_ci)
+{
+	unsigned int wimg = hptel & HPTE_R_WIMG;
+
+	/* Handle SAO */
+	if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) &&
+	    cpu_has_feature(CPU_FTR_ARCH_206))
+		wimg = HPTE_R_M;
+
+	if (!is_ci)
+		return wimg == HPTE_R_M;
+	/*
+	 * if host is mapped cache inhibited, make sure hptel also have
+	 * cache inhibited.
+	 */
+	if (wimg & HPTE_R_W) /* FIXME!! is this ok for all guest. ? */
+		return false;
+	return !!(wimg & HPTE_R_I);
+}
+
+/*
+ * If it's present and writable, atomically set dirty and referenced bits and
+ * return the PTE, otherwise return 0.
+ */
+static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
+{
+	pte_t old_pte, new_pte = __pte(0);
+
+	while (1) {
+		/*
+		 * Make sure we don't reload from ptep
+		 */
+		old_pte = READ_ONCE(*ptep);
+		/*
+		 * wait until H_PAGE_BUSY is clear then set it atomically
+		 */
+		if (unlikely(pte_val(old_pte) & H_PAGE_BUSY)) {
+			cpu_relax();
+			continue;
+		}
+		/* If pte is not present return None */
+		if (unlikely(!pte_present(old_pte)))
+			return __pte(0);
+
+		new_pte = pte_mkyoung(old_pte);
+		if (writing && pte_write(old_pte))
+			new_pte = pte_mkdirty(new_pte);
+
+		if (pte_xchg(ptep, old_pte, new_pte))
+			break;
+	}
+	return new_pte;
+}
+
+static inline bool hpte_read_permission(unsigned long pp, unsigned long key)
+{
+	if (key)
+		return PP_RWRX <= pp && pp <= PP_RXRX;
+	return true;
+}
+
+static inline bool hpte_write_permission(unsigned long pp, unsigned long key)
+{
+	if (key)
+		return pp == PP_RWRW;
+	return pp <= PP_RWRW;
+}
+
+static inline int hpte_get_skey_perm(unsigned long hpte_r, unsigned long amr)
+{
+	unsigned long skey;
+
+	skey = ((hpte_r & HPTE_R_KEY_HI) >> 57) |
+		((hpte_r & HPTE_R_KEY_LO) >> 9);
+	return (amr >> (62 - 2 * skey)) & 3;
+}
+
+static inline void lock_rmap(unsigned long *rmap)
+{
+	do {
+		while (test_bit(KVMPPC_RMAP_LOCK_BIT, rmap))
+			cpu_relax();
+	} while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmap));
+}
+
+static inline void unlock_rmap(unsigned long *rmap)
+{
+	__clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmap);
+}
+
+static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
+				   unsigned long pagesize)
+{
+	unsigned long mask = (pagesize >> PAGE_SHIFT) - 1;
+
+	if (pagesize <= PAGE_SIZE)
+		return true;
+	return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
+}
+
+/*
+ * This works for 4k, 64k and 16M pages on POWER7,
+ * and 4k and 16M pages on PPC970.
+ */
+static inline unsigned long slb_pgsize_encoding(unsigned long psize)
+{
+	unsigned long senc = 0;
+
+	if (psize > 0x1000) {
+		senc = SLB_VSID_L;
+		if (psize == 0x10000)
+			senc |= SLB_VSID_LP_01;
+	}
+	return senc;
+}
+
+static inline int is_vrma_hpte(unsigned long hpte_v)
+{
+	return (hpte_v & ~0xffffffUL) ==
+		(HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
+}
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+/*
+ * Note modification of an HPTE; set the HPTE modified bit
+ * if anyone is interested.
+ */
+static inline void note_hpte_modification(struct kvm *kvm,
+					  struct revmap_entry *rev)
+{
+	if (atomic_read(&kvm->arch.hpte_mod_interest))
+		rev->guest_rpte |= HPTE_GR_MODIFIED;
+}
+
+/*
+ * Like kvm_memslots(), but for use in real mode when we can't do
+ * any RCU stuff (since the secondary threads are offline from the
+ * kernel's point of view), and we can't print anything.
+ * Thus we use rcu_dereference_raw() rather than rcu_dereference_check().
+ */
+static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
+{
+	return rcu_dereference_raw_check(kvm->memslots[0]);
+}
+
+extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
+extern void kvmhv_radix_debugfs_init(struct kvm *kvm);
+
+extern void kvmhv_rm_send_ipi(int cpu);
+
+static inline unsigned long kvmppc_hpt_npte(struct kvm_hpt_info *hpt)
+{
+	/* HPTEs are 2**4 bytes long */
+	return 1UL << (hpt->order - 4);
+}
+
+static inline unsigned long kvmppc_hpt_mask(struct kvm_hpt_info *hpt)
+{
+	/* 128 (2**7) bytes in each HPTEG */
+	return (1UL << (hpt->order - 7)) - 1;
+}
+
+/* Set bits in a dirty bitmap, which is in LE format */
+static inline void set_dirty_bits(unsigned long *map, unsigned long i,
+				  unsigned long npages)
+{
+
+	if (npages >= 8)
+		memset((char *)map + i / 8, 0xff, npages / 8);
+	else
+		for (; npages; ++i, --npages)
+			__set_bit_le(i, map);
+}
+
+static inline void set_dirty_bits_atomic(unsigned long *map, unsigned long i,
+					 unsigned long npages)
+{
+	if (npages >= 8)
+		memset((char *)map + i / 8, 0xff, npages / 8);
+	else
+		for (; npages; ++i, --npages)
+			set_bit_le(i, map);
+}
+
+static inline u64 sanitize_msr(u64 msr)
+{
+	msr &= ~MSR_HV;
+	msr |= MSR_ME;
+	return msr;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.regs.ccr  = vcpu->arch.cr_tm;
+	vcpu->arch.regs.xer = vcpu->arch.xer_tm;
+	vcpu->arch.regs.link  = vcpu->arch.lr_tm;
+	vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
+	vcpu->arch.amr = vcpu->arch.amr_tm;
+	vcpu->arch.ppr = vcpu->arch.ppr_tm;
+	vcpu->arch.dscr = vcpu->arch.dscr_tm;
+	vcpu->arch.tar = vcpu->arch.tar_tm;
+	memcpy(vcpu->arch.regs.gpr, vcpu->arch.gpr_tm,
+	       sizeof(vcpu->arch.regs.gpr));
+	vcpu->arch.fp  = vcpu->arch.fp_tm;
+	vcpu->arch.vr  = vcpu->arch.vr_tm;
+	vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
+}
+
+static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.cr_tm  = vcpu->arch.regs.ccr;
+	vcpu->arch.xer_tm = vcpu->arch.regs.xer;
+	vcpu->arch.lr_tm  = vcpu->arch.regs.link;
+	vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
+	vcpu->arch.amr_tm = vcpu->arch.amr;
+	vcpu->arch.ppr_tm = vcpu->arch.ppr;
+	vcpu->arch.dscr_tm = vcpu->arch.dscr;
+	vcpu->arch.tar_tm = vcpu->arch.tar;
+	memcpy(vcpu->arch.gpr_tm, vcpu->arch.regs.gpr,
+	       sizeof(vcpu->arch.regs.gpr));
+	vcpu->arch.fp_tm  = vcpu->arch.fp;
+	vcpu->arch.vr_tm  = vcpu->arch.vr;
+	vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+extern int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
+			     unsigned long gpa, unsigned int level,
+			     unsigned long mmu_seq, unsigned int lpid,
+			     unsigned long *rmapp, struct rmap_nested **n_rmap);
+extern void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
+				   struct rmap_nested **n_rmap);
+extern void kvmhv_update_nest_rmap_rc_list(struct kvm *kvm, unsigned long *rmapp,
+					   unsigned long clr, unsigned long set,
+					   unsigned long hpa, unsigned long nbytes);
+extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
+				const struct kvm_memory_slot *memslot,
+				unsigned long gpa, unsigned long hpa,
+				unsigned long nbytes);
+
+static inline pte_t *
+find_kvm_secondary_pte_unlocked(struct kvm *kvm, unsigned long ea,
+				unsigned *hshift)
+{
+	pte_t *pte;
+
+	pte = __find_linux_pte(kvm->arch.pgtable, ea, NULL, hshift);
+	return pte;
+}
+
+static inline pte_t *find_kvm_secondary_pte(struct kvm *kvm, unsigned long ea,
+					    unsigned *hshift)
+{
+	pte_t *pte;
+
+	VM_WARN(!spin_is_locked(&kvm->mmu_lock),
+		"%s called with kvm mmu_lock not held \n", __func__);
+	pte = __find_linux_pte(kvm->arch.pgtable, ea, NULL, hshift);
+
+	return pte;
+}
+
+static inline pte_t *find_kvm_host_pte(struct kvm *kvm, unsigned long mmu_seq,
+				       unsigned long ea, unsigned *hshift)
+{
+	pte_t *pte;
+
+	VM_WARN(!spin_is_locked(&kvm->mmu_lock),
+		"%s called with kvm mmu_lock not held \n", __func__);
+
+	if (mmu_invalidate_retry(kvm, mmu_seq))
+		return NULL;
+
+	pte = __find_linux_pte(kvm->mm->pgd, ea, NULL, hshift);
+
+	return pte;
+}
+
+extern pte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid,
+					unsigned long ea, unsigned *hshift);
+
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
+#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
new file mode 100644
index 0000000000..a367979386
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#ifndef __ASM_KVM_BOOK3S_ASM_H__
+#define __ASM_KVM_BOOK3S_ASM_H__
+
+/* XICS ICP register offsets */
+#define XICS_XIRR		4
+#define XICS_MFRR		0xc
+#define XICS_IPI		2	/* interrupt source # for IPIs */
+
+/* Maximum number of threads per physical core */
+#define MAX_SMT_THREADS		8
+
+/* Maximum number of subcores per physical core */
+#define MAX_SUBCORES		4
+
+#ifdef __ASSEMBLY__
+
+#ifdef CONFIG_KVM_BOOK3S_HANDLER
+
+#include <asm/kvm_asm.h>
+
+.macro DO_KVM intno
+	.if (\intno == BOOK3S_INTERRUPT_SYSTEM_RESET) || \
+	    (\intno == BOOK3S_INTERRUPT_MACHINE_CHECK) || \
+	    (\intno == BOOK3S_INTERRUPT_DATA_STORAGE) || \
+	    (\intno == BOOK3S_INTERRUPT_INST_STORAGE) || \
+	    (\intno == BOOK3S_INTERRUPT_DATA_SEGMENT) || \
+	    (\intno == BOOK3S_INTERRUPT_INST_SEGMENT) || \
+	    (\intno == BOOK3S_INTERRUPT_EXTERNAL) || \
+	    (\intno == BOOK3S_INTERRUPT_EXTERNAL_HV) || \
+	    (\intno == BOOK3S_INTERRUPT_ALIGNMENT) || \
+	    (\intno == BOOK3S_INTERRUPT_PROGRAM) || \
+	    (\intno == BOOK3S_INTERRUPT_FP_UNAVAIL) || \
+	    (\intno == BOOK3S_INTERRUPT_DECREMENTER) || \
+	    (\intno == BOOK3S_INTERRUPT_SYSCALL) || \
+	    (\intno == BOOK3S_INTERRUPT_TRACE) || \
+	    (\intno == BOOK3S_INTERRUPT_PERFMON) || \
+	    (\intno == BOOK3S_INTERRUPT_ALTIVEC) || \
+	    (\intno == BOOK3S_INTERRUPT_VSX)
+
+	b	kvmppc_trampoline_\intno
+kvmppc_resume_\intno:
+
+	.endif
+.endm
+
+#else
+
+.macro DO_KVM intno
+.endm
+
+#endif /* CONFIG_KVM_BOOK3S_HANDLER */
+
+#else  /*__ASSEMBLY__ */
+
+struct kvmppc_vcore;
+
+/* Struct used for coordinating micro-threading (split-core) mode changes */
+struct kvm_split_mode {
+	unsigned long	rpr;
+	unsigned long	pmmar;
+	unsigned long	ldbar;
+	u8		subcore_size;
+	u8		do_nap;
+	u8		napped[MAX_SMT_THREADS];
+	struct kvmppc_vcore *vc[MAX_SUBCORES];
+};
+
+/*
+ * This struct goes in the PACA on 64-bit processors.  It is used
+ * to store host state that needs to be saved when we enter a guest
+ * and restored when we exit, but isn't specific to any particular
+ * guest or vcpu.  It also has some scratch fields used by the guest
+ * exit code.
+ */
+struct kvmppc_host_state {
+	ulong host_r1;
+	ulong host_r2;
+	ulong host_msr;
+	ulong vmhandler;
+	ulong scratch0;
+	ulong scratch1;
+	ulong scratch2;
+	u8 in_guest;
+	u8 restore_hid5;
+	u8 napping;
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	u8 hwthread_req;
+	u8 hwthread_state;
+	u8 host_ipi;
+	u8 ptid;		/* thread number within subcore when split */
+	u8 fake_suspend;
+	struct kvm_vcpu *kvm_vcpu;
+	struct kvmppc_vcore *kvm_vcore;
+	void __iomem *xics_phys;
+	void __iomem *xive_tima_phys;
+	void __iomem *xive_tima_virt;
+	u32 saved_xirr;
+	u64 dabr;
+	u64 host_mmcr[7];	/* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */
+	u32 host_pmc[8];
+	u64 host_purr;
+	u64 host_spurr;
+	u64 host_dscr;
+	u64 dec_expires;
+	struct kvm_split_mode *kvm_split_mode;
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	u64 cfar;
+	u64 ppr;
+	u64 host_fscr;
+#endif
+};
+
+struct kvmppc_book3s_shadow_vcpu {
+	bool in_use;
+	ulong gpr[14];
+	u32 cr;
+	ulong xer;
+	ulong ctr;
+	ulong lr;
+	ulong pc;
+
+	ulong shadow_srr1;
+	ulong fault_dar;
+	u32 fault_dsisr;
+	u32 last_inst;
+
+#ifdef CONFIG_PPC_BOOK3S_32
+	u32     sr[16];			/* Guest SRs */
+
+	struct kvmppc_host_state hstate;
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	u8 slb_max;			/* highest used guest slb entry */
+	struct  {
+		u64     esid;
+		u64     vsid;
+	} slb[64];			/* guest SLB */
+	u64 shadow_fscr;
+#endif
+};
+
+#endif /*__ASSEMBLY__ */
+
+/* Values for kvm_state */
+#define KVM_HWTHREAD_IN_KERNEL	0
+#define KVM_HWTHREAD_IN_IDLE	1
+#define KVM_HWTHREAD_IN_KVM	2
+
+#endif /* __ASM_KVM_BOOK3S_ASM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
new file mode 100644
index 0000000000..0a6319448c
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KVM_BOOK3S_UVMEM_H__
+#define __ASM_KVM_BOOK3S_UVMEM_H__
+
+#ifdef CONFIG_PPC_UV
+int kvmppc_uvmem_init(void);
+void kvmppc_uvmem_free(void);
+bool kvmppc_uvmem_available(void);
+int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot);
+void kvmppc_uvmem_slot_free(struct kvm *kvm,
+			    const struct kvm_memory_slot *slot);
+unsigned long kvmppc_h_svm_page_in(struct kvm *kvm,
+				   unsigned long gra,
+				   unsigned long flags,
+				   unsigned long page_shift);
+unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
+				    unsigned long gra,
+				    unsigned long flags,
+				    unsigned long page_shift);
+unsigned long kvmppc_h_svm_init_start(struct kvm *kvm);
+unsigned long kvmppc_h_svm_init_done(struct kvm *kvm);
+int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn);
+unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm);
+void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
+			     struct kvm *kvm, bool skip_page_out);
+int kvmppc_uvmem_memslot_create(struct kvm *kvm,
+		const struct kvm_memory_slot *new);
+void kvmppc_uvmem_memslot_delete(struct kvm *kvm,
+		const struct kvm_memory_slot *old);
+#else
+static inline int kvmppc_uvmem_init(void)
+{
+	return 0;
+}
+
+static inline void kvmppc_uvmem_free(void) { }
+
+static inline bool kvmppc_uvmem_available(void)
+{
+	return false;
+}
+
+static inline int
+kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
+{
+	return 0;
+}
+
+static inline void
+kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot) { }
+
+static inline unsigned long
+kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra,
+		     unsigned long flags, unsigned long page_shift)
+{
+	return H_UNSUPPORTED;
+}
+
+static inline unsigned long
+kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gra,
+		      unsigned long flags, unsigned long page_shift)
+{
+	return H_UNSUPPORTED;
+}
+
+static inline unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
+{
+	return H_UNSUPPORTED;
+}
+
+static inline unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
+{
+	return H_UNSUPPORTED;
+}
+
+static inline unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
+{
+	return H_UNSUPPORTED;
+}
+
+static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
+{
+	return -EFAULT;
+}
+
+static inline void
+kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
+			struct kvm *kvm, bool skip_page_out) { }
+
+static inline int  kvmppc_uvmem_memslot_create(struct kvm *kvm,
+		const struct kvm_memory_slot *new)
+{
+	return H_UNSUPPORTED;
+}
+
+static inline void  kvmppc_uvmem_memslot_delete(struct kvm *kvm,
+		const struct kvm_memory_slot *old) { }
+
+#endif /* CONFIG_PPC_UV */
+#endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
new file mode 100644
index 0000000000..0c3401b2e1
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright SUSE Linux Products GmbH 2010
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#ifndef __ASM_KVM_BOOKE_H__
+#define __ASM_KVM_BOOKE_H__
+
+#include <linux/types.h>
+#include <linux/kvm_host.h>
+
+/*
+ * Number of available lpids. Only the low-order 6 bits of LPID rgister are
+ * implemented on e500mc+ cores.
+ */
+#define KVMPPC_NR_LPIDS                        64
+
+#define KVMPPC_INST_EHPRIV		0x7c00021c
+#define EHPRIV_OC_SHIFT			11
+/* "ehpriv 1" : ehpriv with OC = 1 is used for debug emulation */
+#define EHPRIV_OC_DEBUG			1
+
+static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
+{
+	vcpu->arch.regs.gpr[num] = val;
+}
+
+static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
+{
+	return vcpu->arch.regs.gpr[num];
+}
+
+static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
+{
+	vcpu->arch.regs.ccr = val;
+}
+
+static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.regs.ccr;
+}
+
+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
+{
+	vcpu->arch.regs.xer = val;
+}
+
+static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.regs.xer;
+}
+
+static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
+{
+	/* XXX Would need to check TLB entry */
+	return false;
+}
+
+static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
+{
+	vcpu->arch.regs.ctr = val;
+}
+
+static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.regs.ctr;
+}
+
+static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
+{
+	vcpu->arch.regs.link = val;
+}
+
+static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.regs.link;
+}
+
+static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
+{
+	vcpu->arch.regs.nip = val;
+}
+
+static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.regs.nip;
+}
+
+#ifdef CONFIG_BOOKE
+static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.fault_dear;
+}
+#endif
+
+static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu)
+{
+	/* Magic page is only supported on e500v2 */
+#ifdef CONFIG_KVM_E500V2
+	return true;
+#else
+	return false;
+#endif
+}
+#endif /* __ASM_KVM_BOOKE_H__ */
diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
new file mode 100644
index 0000000000..7487ef5821
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2010-2011 Freescale Semiconductor, Inc.
+ */
+
+#ifndef ASM_KVM_BOOKE_HV_ASM_H
+#define ASM_KVM_BOOKE_HV_ASM_H
+
+#include <asm/feature-fixups.h>
+
+#ifdef __ASSEMBLY__
+
+/*
+ * All exceptions from guest state must go through KVM
+ * (except for those which are delivered directly to the guest) --
+ * there are no exceptions for which we fall through directly to
+ * the normal host handler.
+ *
+ * 32-bit host
+ * Expected inputs (normal exceptions):
+ *   SCRATCH0 = saved r10
+ *   r10 = thread struct
+ *   r11 = appropriate SRR1 variant (currently used as scratch)
+ *   r13 = saved CR
+ *   *(r10 + THREAD_NORMSAVE(0)) = saved r11
+ *   *(r10 + THREAD_NORMSAVE(2)) = saved r13
+ *
+ * Expected inputs (crit/mcheck/debug exceptions):
+ *   appropriate SCRATCH = saved r8
+ *   r8 = exception level stack frame
+ *   r9 = *(r8 + _CCR) = saved CR
+ *   r11 = appropriate SRR1 variant (currently used as scratch)
+ *   *(r8 + GPR9) = saved r9
+ *   *(r8 + GPR10) = saved r10 (r10 not yet clobbered)
+ *   *(r8 + GPR11) = saved r11
+ *
+ * 64-bit host
+ * Expected inputs (GEN/GDBELL/DBG/CRIT/MC exception types):
+ *  r10 = saved CR
+ *  r13 = PACA_POINTER
+ *  *(r13 + PACA_EX##type + EX_R10) = saved r10
+ *  *(r13 + PACA_EX##type + EX_R11) = saved r11
+ *  SPRN_SPRG_##type##_SCRATCH = saved r13
+ *
+ * Expected inputs (TLB exception type):
+ *  r10 = saved CR
+ *  r12 = extlb pointer
+ *  r13 = PACA_POINTER
+ *  *(r12 + EX_TLB_R10) = saved r10
+ *  *(r12 + EX_TLB_R11) = saved r11
+ *  *(r12 + EX_TLB_R13) = saved r13
+ *  SPRN_SPRG_GEN_SCRATCH = saved r12
+ *
+ * Only the bolted version of TLB miss exception handlers is supported now.
+ */
+.macro DO_KVM intno srr1
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+	mtocrf	0x80, r11	/* check MSR[GS] without clobbering reg */
+	bf	3, 1975f
+	b	kvmppc_handler_\intno\()_\srr1
+1975:
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+#endif
+.endm
+
+#endif /*__ASSEMBLY__ */
+#endif /* ASM_KVM_BOOKE_HV_ASM_H */
diff --git a/arch/powerpc/include/asm/kvm_fpu.h b/arch/powerpc/include/asm/kvm_fpu.h
new file mode 100644
index 0000000000..25df316b7e
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_fpu.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright Novell Inc. 2010
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#ifndef __ASM_KVM_FPU_H__
+#define __ASM_KVM_FPU_H__
+
+#include <linux/types.h>
+
+extern void fps_fres(u64 *fpscr, u32 *dst, u32 *src1);
+extern void fps_frsqrte(u64 *fpscr, u32 *dst, u32 *src1);
+extern void fps_fsqrts(u64 *fpscr, u32 *dst, u32 *src1);
+
+extern void fps_fadds(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2);
+extern void fps_fdivs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2);
+extern void fps_fmuls(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2);
+extern void fps_fsubs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2);
+
+extern void fps_fmadds(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2,
+		       u32 *src3);
+extern void fps_fmsubs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2,
+		       u32 *src3);
+extern void fps_fnmadds(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2,
+		        u32 *src3);
+extern void fps_fnmsubs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2,
+		        u32 *src3);
+extern void fps_fsel(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2,
+		     u32 *src3);
+
+#define FPD_ONE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \
+				u64 *dst, u64 *src1);
+#define FPD_TWO_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \
+				u64 *dst, u64 *src1, u64 *src2);
+#define FPD_THREE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \
+				u64 *dst, u64 *src1, u64 *src2, u64 *src3);
+
+extern void fpd_fcmpu(u64 *fpscr, u32 *cr, u64 *src1, u64 *src2);
+extern void fpd_fcmpo(u64 *fpscr, u32 *cr, u64 *src1, u64 *src2);
+
+FPD_ONE_IN(fsqrts)
+FPD_ONE_IN(frsqrtes)
+FPD_ONE_IN(fres)
+FPD_ONE_IN(frsp)
+FPD_ONE_IN(fctiw)
+FPD_ONE_IN(fctiwz)
+FPD_ONE_IN(fsqrt)
+FPD_ONE_IN(fre)
+FPD_ONE_IN(frsqrte)
+FPD_ONE_IN(fneg)
+FPD_ONE_IN(fabs)
+FPD_TWO_IN(fadds)
+FPD_TWO_IN(fsubs)
+FPD_TWO_IN(fdivs)
+FPD_TWO_IN(fmuls)
+FPD_TWO_IN(fcpsgn)
+FPD_TWO_IN(fdiv)
+FPD_TWO_IN(fadd)
+FPD_TWO_IN(fmul)
+FPD_TWO_IN(fsub)
+FPD_THREE_IN(fmsubs)
+FPD_THREE_IN(fmadds)
+FPD_THREE_IN(fnmsubs)
+FPD_THREE_IN(fnmadds)
+FPD_THREE_IN(fsel)
+FPD_THREE_IN(fmsub)
+FPD_THREE_IN(fmadd)
+FPD_THREE_IN(fnmsub)
+FPD_THREE_IN(fnmadd)
+
+extern void kvm_cvt_fd(u32 *from, u64 *to);
+extern void kvm_cvt_df(u64 *from, u32 *to);
+
+#endif
diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h
new file mode 100644
index 0000000000..68e499abdb
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_guest.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 IBM Corporation
+ */
+
+#ifndef _ASM_POWERPC_KVM_GUEST_H_
+#define _ASM_POWERPC_KVM_GUEST_H_
+
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
+#include <linux/jump_label.h>
+
+DECLARE_STATIC_KEY_FALSE(kvm_guest);
+
+static inline bool is_kvm_guest(void)
+{
+	return static_branch_unlikely(&kvm_guest);
+}
+
+int __init check_kvm_guest(void);
+#else
+static inline bool is_kvm_guest(void) { return false; }
+static inline int check_kvm_guest(void) { return 0; }
+#endif
+
+#endif /* _ASM_POWERPC_KVM_GUEST_H_ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
new file mode 100644
index 0000000000..14ee0dece8
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -0,0 +1,886 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __POWERPC_KVM_HOST_H__
+#define __POWERPC_KVM_HOST_H__
+
+#include <linux/mutex.h>
+#include <linux/hrtimer.h>
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <linux/kvm_types.h>
+#include <linux/threads.h>
+#include <linux/spinlock.h>
+#include <linux/kvm_para.h>
+#include <linux/list.h>
+#include <linux/atomic.h>
+#include <asm/kvm_asm.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cacheflush.h>
+#include <asm/hvcall.h>
+#include <asm/mce.h>
+
+#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
+
+#define KVM_MAX_VCPUS		NR_CPUS
+#define KVM_MAX_VCORES		NR_CPUS
+
+#include <asm/cputhreads.h>
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+#include <asm/kvm_book3s_asm.h>		/* for MAX_SMT_THREADS */
+#define KVM_MAX_VCPU_IDS	(MAX_SMT_THREADS * KVM_MAX_VCORES)
+
+/*
+ * Limit the nested partition table to 4096 entries (because that's what
+ * hardware supports). Both guest and host use this value.
+ */
+#define KVM_MAX_NESTED_GUESTS_SHIFT	12
+
+#else
+#define KVM_MAX_VCPU_IDS	KVM_MAX_VCPUS
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
+#define __KVM_HAVE_ARCH_INTC_INITIALIZED
+
+#define KVM_HALT_POLL_NS_DEFAULT 10000	/* 10 us */
+
+/* These values are internal and can be increased later */
+#define KVM_NR_IRQCHIPS          1
+#define KVM_IRQCHIP_NUM_PINS     256
+
+/* PPC-specific vcpu->requests bit members */
+#define KVM_REQ_WATCHDOG	KVM_ARCH_REQ(0)
+#define KVM_REQ_EPR_EXIT	KVM_ARCH_REQ(1)
+#define KVM_REQ_PENDING_TIMER	KVM_ARCH_REQ(2)
+
+#include <linux/mmu_notifier.h>
+
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+
+#define HPTEG_CACHE_NUM			(1 << 15)
+#define HPTEG_HASH_BITS_PTE		13
+#define HPTEG_HASH_BITS_PTE_LONG	12
+#define HPTEG_HASH_BITS_VPTE		13
+#define HPTEG_HASH_BITS_VPTE_LONG	5
+#define HPTEG_HASH_BITS_VPTE_64K	11
+#define HPTEG_HASH_NUM_PTE		(1 << HPTEG_HASH_BITS_PTE)
+#define HPTEG_HASH_NUM_PTE_LONG		(1 << HPTEG_HASH_BITS_PTE_LONG)
+#define HPTEG_HASH_NUM_VPTE		(1 << HPTEG_HASH_BITS_VPTE)
+#define HPTEG_HASH_NUM_VPTE_LONG	(1 << HPTEG_HASH_BITS_VPTE_LONG)
+#define HPTEG_HASH_NUM_VPTE_64K		(1 << HPTEG_HASH_BITS_VPTE_64K)
+
+/* Physical Address Mask - allowed range of real mode RAM access */
+#define KVM_PAM			0x0fffffffffffffffULL
+
+struct lppaca;
+struct slb_shadow;
+struct dtl_entry;
+
+struct kvmppc_vcpu_book3s;
+struct kvmppc_book3s_shadow_vcpu;
+struct kvm_nested_guest;
+
+struct kvm_vm_stat {
+	struct kvm_vm_stat_generic generic;
+	u64 num_2M_pages;
+	u64 num_1G_pages;
+};
+
+struct kvm_vcpu_stat {
+	struct kvm_vcpu_stat_generic generic;
+	u64 sum_exits;
+	u64 mmio_exits;
+	u64 signal_exits;
+	u64 light_exits;
+	/* Account for special types of light exits: */
+	u64 itlb_real_miss_exits;
+	u64 itlb_virt_miss_exits;
+	u64 dtlb_real_miss_exits;
+	u64 dtlb_virt_miss_exits;
+	u64 syscall_exits;
+	u64 isi_exits;
+	u64 dsi_exits;
+	u64 emulated_inst_exits;
+	u64 dec_exits;
+	u64 ext_intr_exits;
+	u64 halt_successful_wait;
+	u64 dbell_exits;
+	u64 gdbell_exits;
+	u64 ld;
+	u64 st;
+#ifdef CONFIG_PPC_BOOK3S
+	u64 pf_storage;
+	u64 pf_instruc;
+	u64 sp_storage;
+	u64 sp_instruc;
+	u64 queue_intr;
+	u64 ld_slow;
+	u64 st_slow;
+#endif
+	u64 pthru_all;
+	u64 pthru_host;
+	u64 pthru_bad_aff;
+};
+
+enum kvm_exit_types {
+	MMIO_EXITS,
+	SIGNAL_EXITS,
+	ITLB_REAL_MISS_EXITS,
+	ITLB_VIRT_MISS_EXITS,
+	DTLB_REAL_MISS_EXITS,
+	DTLB_VIRT_MISS_EXITS,
+	SYSCALL_EXITS,
+	ISI_EXITS,
+	DSI_EXITS,
+	EMULATED_INST_EXITS,
+	EMULATED_MTMSRWE_EXITS,
+	EMULATED_WRTEE_EXITS,
+	EMULATED_MTSPR_EXITS,
+	EMULATED_MFSPR_EXITS,
+	EMULATED_MTMSR_EXITS,
+	EMULATED_MFMSR_EXITS,
+	EMULATED_TLBSX_EXITS,
+	EMULATED_TLBWE_EXITS,
+	EMULATED_RFI_EXITS,
+	EMULATED_RFCI_EXITS,
+	EMULATED_RFDI_EXITS,
+	DEC_EXITS,
+	EXT_INTR_EXITS,
+	HALT_WAKEUP,
+	USR_PR_INST,
+	FP_UNAVAIL,
+	DEBUG_EXITS,
+	TIMEINGUEST,
+	DBELL_EXITS,
+	GDBELL_EXITS,
+	__NUMBER_OF_KVM_EXIT_TYPES
+};
+
+/* allow access to big endian 32bit upper/lower parts and 64bit var */
+struct kvmppc_exit_timing {
+	union {
+		u64 tv64;
+		struct {
+			u32 tbu, tbl;
+		} tv32;
+	};
+};
+
+struct kvmppc_pginfo {
+	unsigned long pfn;
+	atomic_t refcnt;
+};
+
+struct kvmppc_spapr_tce_iommu_table {
+	struct rcu_head rcu;
+	struct list_head next;
+	struct iommu_table *tbl;
+	struct kref kref;
+};
+
+#define TCES_PER_PAGE	(PAGE_SIZE / sizeof(u64))
+
+struct kvmppc_spapr_tce_table {
+	struct list_head list;
+	struct kvm *kvm;
+	u64 liobn;
+	struct rcu_head rcu;
+	u32 page_shift;
+	u64 offset;		/* in pages */
+	u64 size;		/* window size in pages */
+	struct list_head iommu_tables;
+	struct mutex alloc_lock;
+	struct page *pages[];
+};
+
+/* XICS components, defined in book3s_xics.c */
+struct kvmppc_xics;
+struct kvmppc_icp;
+extern struct kvm_device_ops kvm_xics_ops;
+
+/* XIVE components, defined in book3s_xive.c */
+struct kvmppc_xive;
+struct kvmppc_xive_vcpu;
+extern struct kvm_device_ops kvm_xive_ops;
+extern struct kvm_device_ops kvm_xive_native_ops;
+
+struct kvmppc_passthru_irqmap;
+
+/*
+ * The reverse mapping array has one entry for each HPTE,
+ * which stores the guest's view of the second word of the HPTE
+ * (including the guest physical address of the mapping),
+ * plus forward and backward pointers in a doubly-linked ring
+ * of HPTEs that map the same host page.  The pointers in this
+ * ring are 32-bit HPTE indexes, to save space.
+ */
+struct revmap_entry {
+	unsigned long guest_rpte;
+	unsigned int forw, back;
+};
+
+/*
+ * The rmap array of size number of guest pages is allocated for each memslot.
+ * This array is used to store usage specific information about the guest page.
+ * Below are the encodings of the various possible usage types.
+ */
+/* Free bits which can be used to define a new usage */
+#define KVMPPC_RMAP_TYPE_MASK	0xff00000000000000
+#define KVMPPC_RMAP_NESTED	0xc000000000000000	/* Nested rmap array */
+#define KVMPPC_RMAP_HPT		0x0100000000000000	/* HPT guest */
+
+/*
+ * rmap usage definition for a hash page table (hpt) guest:
+ * 0x0000080000000000	Lock bit
+ * 0x0000018000000000	RC bits
+ * 0x0000000100000000	Present bit
+ * 0x00000000ffffffff	HPT index bits
+ * The bottom 32 bits are the index in the guest HPT of a HPTE that points to
+ * the page.
+ */
+#define KVMPPC_RMAP_LOCK_BIT	43
+#define KVMPPC_RMAP_RC_SHIFT	32
+#define KVMPPC_RMAP_REFERENCED	(HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
+#define KVMPPC_RMAP_PRESENT	0x100000000ul
+#define KVMPPC_RMAP_INDEX	0xfffffffful
+
+struct kvm_arch_memory_slot {
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	unsigned long *rmap;
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+};
+
+struct kvm_hpt_info {
+	/* Host virtual (linear mapping) address of guest HPT */
+	unsigned long virt;
+	/* Array of reverse mapping entries for each guest HPTE */
+	struct revmap_entry *rev;
+	/* Guest HPT size is 2**(order) bytes */
+	u32 order;
+	/* 1 if HPT allocated with CMA, 0 otherwise */
+	int cma;
+};
+
+struct kvm_resize_hpt;
+
+/* Flag values for kvm_arch.secure_guest */
+#define KVMPPC_SECURE_INIT_START 0x1 /* H_SVM_INIT_START has been called */
+#define KVMPPC_SECURE_INIT_DONE  0x2 /* H_SVM_INIT_DONE completed */
+#define KVMPPC_SECURE_INIT_ABORT 0x4 /* H_SVM_INIT_ABORT issued */
+
+struct kvm_arch {
+	unsigned int lpid;
+	unsigned int smt_mode;		/* # vcpus per virtual core */
+	unsigned int emul_smt_mode;	/* emualted SMT mode, on P9 */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	unsigned int tlb_sets;
+	struct kvm_hpt_info hpt;
+	atomic64_t mmio_update;
+	unsigned int host_lpid;
+	unsigned long host_lpcr;
+	unsigned long sdr1;
+	unsigned long host_sdr1;
+	unsigned long lpcr;
+	unsigned long vrma_slb_v;
+	int mmu_ready;
+	atomic_t vcpus_running;
+	u32 online_vcores;
+	atomic_t hpte_mod_interest;
+	cpumask_t need_tlb_flush;
+	u8 radix;
+	u8 fwnmi_enabled;
+	u8 secure_guest;
+	u8 svm_enabled;
+	bool nested_enable;
+	bool dawr1_enabled;
+	pgd_t *pgtable;
+	u64 process_table;
+	struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	struct mutex hpt_mutex;
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	struct list_head spapr_tce_tables;
+	struct list_head rtas_tokens;
+	struct mutex rtas_token_lock;
+	DECLARE_BITMAP(enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
+#endif
+#ifdef CONFIG_KVM_MPIC
+	struct openpic *mpic;
+#endif
+#ifdef CONFIG_KVM_XICS
+	struct kvmppc_xics *xics;
+	struct kvmppc_xics *xics_device;
+	struct kvmppc_xive *xive;    /* Current XIVE device in use */
+	struct {
+		struct kvmppc_xive *native;
+		struct kvmppc_xive *xics_on_xive;
+	} xive_devices;
+	struct kvmppc_passthru_irqmap *pimap;
+#endif
+	struct kvmppc_ops *kvm_ops;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	struct mutex uvmem_lock;
+	struct list_head uvmem_pfns;
+	struct mutex mmu_setup_lock;	/* nests inside vcpu mutexes */
+	u64 l1_ptcr;
+	struct idr kvm_nested_guest_idr;
+	/* This array can grow quite large, keep it at the end */
+	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
+#endif
+};
+
+#define VCORE_ENTRY_MAP(vc)	((vc)->entry_exit_map & 0xff)
+#define VCORE_EXIT_MAP(vc)	((vc)->entry_exit_map >> 8)
+#define VCORE_IS_EXITING(vc)	(VCORE_EXIT_MAP(vc) != 0)
+
+/* This bit is used when a vcore exit is triggered from outside the vcore */
+#define VCORE_EXIT_REQ		0x10000
+
+/*
+ * Values for vcore_state.
+ * Note that these are arranged such that lower values
+ * (< VCORE_SLEEPING) don't require stolen time accounting
+ * on load/unload, and higher values do.
+ */
+#define VCORE_INACTIVE	0
+#define VCORE_PREEMPT	1
+#define VCORE_PIGGYBACK	2
+#define VCORE_SLEEPING	3
+#define VCORE_RUNNING	4
+#define VCORE_EXITING	5
+#define VCORE_POLLING	6
+
+/*
+ * Struct used to manage memory for a virtual processor area
+ * registered by a PAPR guest.  There are three types of area
+ * that a guest can register.
+ */
+struct kvmppc_vpa {
+	unsigned long gpa;	/* Current guest phys addr */
+	void *pinned_addr;	/* Address in kernel linear mapping */
+	void *pinned_end;	/* End of region */
+	unsigned long next_gpa;	/* Guest phys addr for update */
+	unsigned long len;	/* Number of bytes required */
+	u8 update_pending;	/* 1 => update pinned_addr from next_gpa */
+	bool dirty;		/* true => area has been modified by kernel */
+};
+
+struct kvmppc_pte {
+	ulong eaddr;
+	u64 vpage;
+	ulong raddr;
+	bool may_read		: 1;
+	bool may_write		: 1;
+	bool may_execute	: 1;
+	unsigned long wimg;
+	unsigned long rc;
+	u8 page_size;		/* MMU_PAGE_xxx */
+	u8 page_shift;
+};
+
+struct kvmppc_mmu {
+	/* book3s_64 only */
+	void (*slbmte)(struct kvm_vcpu *vcpu, u64 rb, u64 rs);
+	u64  (*slbmfee)(struct kvm_vcpu *vcpu, u64 slb_nr);
+	u64  (*slbmfev)(struct kvm_vcpu *vcpu, u64 slb_nr);
+	int  (*slbfee)(struct kvm_vcpu *vcpu, gva_t eaddr, ulong *ret_slb);
+	void (*slbie)(struct kvm_vcpu *vcpu, u64 slb_nr);
+	void (*slbia)(struct kvm_vcpu *vcpu);
+	/* book3s */
+	void (*mtsrin)(struct kvm_vcpu *vcpu, u32 srnum, ulong value);
+	u32  (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum);
+	int  (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr,
+		      struct kvmppc_pte *pte, bool data, bool iswrite);
+	void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large);
+	int  (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid);
+	u64  (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data);
+	bool (*is_dcbz32)(struct kvm_vcpu *vcpu);
+};
+
+struct kvmppc_slb {
+	u64 esid;
+	u64 vsid;
+	u64 orige;
+	u64 origv;
+	bool valid	: 1;
+	bool Ks		: 1;
+	bool Kp		: 1;
+	bool nx		: 1;
+	bool large	: 1;	/* PTEs are 16MB */
+	bool tb		: 1;	/* 1TB segment */
+	bool class	: 1;
+	u8 base_page_size;	/* MMU_PAGE_xxx */
+};
+
+/* Struct used to accumulate timing information in HV real mode code */
+struct kvmhv_tb_accumulator {
+	u64	seqcount;	/* used to synchronize access, also count * 2 */
+	u64	tb_total;	/* total time in timebase ticks */
+	u64	tb_min;		/* min time */
+	u64	tb_max;		/* max time */
+};
+
+#ifdef CONFIG_PPC_BOOK3S_64
+struct kvmppc_irq_map {
+	u32	r_hwirq;
+	u32	v_hwirq;
+	struct irq_desc *desc;
+};
+
+#define	KVMPPC_PIRQ_MAPPED	1024
+struct kvmppc_passthru_irqmap {
+	int n_mapped;
+	struct kvmppc_irq_map mapped[KVMPPC_PIRQ_MAPPED];
+};
+#endif
+
+# ifdef CONFIG_PPC_E500
+#define KVMPPC_BOOKE_IAC_NUM	2
+#define KVMPPC_BOOKE_DAC_NUM	2
+# else
+#define KVMPPC_BOOKE_IAC_NUM	4
+#define KVMPPC_BOOKE_DAC_NUM	2
+# endif
+#define KVMPPC_BOOKE_MAX_IAC	4
+#define KVMPPC_BOOKE_MAX_DAC	2
+
+/* KVMPPC_EPR_USER takes precedence over KVMPPC_EPR_KERNEL */
+#define KVMPPC_EPR_NONE		0 /* EPR not supported */
+#define KVMPPC_EPR_USER		1 /* exit to userspace to fill EPR */
+#define KVMPPC_EPR_KERNEL	2 /* in-kernel irqchip */
+
+#define KVMPPC_IRQ_DEFAULT	0
+#define KVMPPC_IRQ_MPIC		1
+#define KVMPPC_IRQ_XICS		2 /* Includes a XIVE option */
+#define KVMPPC_IRQ_XIVE		3 /* XIVE native exploitation mode */
+
+#define MMIO_HPTE_CACHE_SIZE	4
+
+struct mmio_hpte_cache_entry {
+	unsigned long hpte_v;
+	unsigned long hpte_r;
+	unsigned long rpte;
+	unsigned long pte_index;
+	unsigned long eaddr;
+	unsigned long slb_v;
+	long mmio_update;
+	unsigned int slb_base_pshift;
+};
+
+struct mmio_hpte_cache {
+	struct mmio_hpte_cache_entry entry[MMIO_HPTE_CACHE_SIZE];
+	unsigned int index;
+};
+
+#define KVMPPC_VSX_COPY_NONE		0
+#define KVMPPC_VSX_COPY_WORD		1
+#define KVMPPC_VSX_COPY_DWORD		2
+#define KVMPPC_VSX_COPY_DWORD_LOAD_DUMP	3
+#define KVMPPC_VSX_COPY_WORD_LOAD_DUMP	4
+
+#define KVMPPC_VMX_COPY_BYTE		8
+#define KVMPPC_VMX_COPY_HWORD		9
+#define KVMPPC_VMX_COPY_WORD		10
+#define KVMPPC_VMX_COPY_DWORD		11
+
+struct openpic;
+
+/* W0 and W1 of a XIVE thread management context */
+union xive_tma_w01 {
+	struct {
+		u8	nsr;
+		u8	cppr;
+		u8	ipb;
+		u8	lsmfb;
+		u8	ack;
+		u8	inc;
+		u8	age;
+		u8	pipr;
+	};
+	__be64 w01;
+};
+
+struct kvm_vcpu_arch {
+	ulong host_stack;
+	u32 host_pid;
+#ifdef CONFIG_PPC_BOOK3S
+	struct kvmppc_slb slb[64];
+	int slb_max;		/* 1 + index of last valid entry in slb[] */
+	int slb_nr;		/* total number of entries in SLB */
+	struct kvmppc_mmu mmu;
+	struct kvmppc_vcpu_book3s *book3s;
+#endif
+#ifdef CONFIG_PPC_BOOK3S_32
+	struct kvmppc_book3s_shadow_vcpu *shadow_vcpu;
+#endif
+
+	/*
+	 * This is passed along to the HV via H_ENTER_NESTED. Align to
+	 * prevent it crossing a real 4K page.
+	 */
+	struct pt_regs regs __aligned(512);
+
+	struct thread_fp_state fp;
+
+#ifdef CONFIG_SPE
+	ulong evr[32];
+	ulong spefscr;
+	ulong host_spefscr;
+	u64 acc;
+#endif
+#ifdef CONFIG_ALTIVEC
+	struct thread_vr_state vr;
+#endif
+
+#ifdef CONFIG_KVM_BOOKE_HV
+	u32 host_mas4;
+	u32 host_mas6;
+	u32 shadow_epcr;
+	u32 shadow_msrp;
+	u32 eplc;
+	u32 epsc;
+	u32 oldpir;
+#endif
+
+#if defined(CONFIG_BOOKE)
+#if defined(CONFIG_KVM_BOOKE_HV) || defined(CONFIG_64BIT)
+	u32 epcr;
+#endif
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S
+	/* For Gekko paired singles */
+	u32 qpr[32];
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S
+	ulong tar;
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S
+	ulong hflags;
+	ulong guest_owned_ext;
+	ulong purr;
+	ulong spurr;
+	ulong ic;
+	ulong dscr;
+	ulong amr;
+	ulong uamor;
+	ulong iamr;
+	u32 ctrl;
+	u32 dabrx;
+	ulong dabr;
+	ulong dawr0;
+	ulong dawrx0;
+	ulong dawr1;
+	ulong dawrx1;
+	ulong ciabr;
+	ulong cfar;
+	ulong ppr;
+	u32 pspb;
+	u8 load_ebb;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	u8 load_tm;
+#endif
+	ulong fscr;
+	ulong shadow_fscr;
+	ulong ebbhr;
+	ulong ebbrr;
+	ulong bescr;
+	ulong csigr;
+	ulong tacr;
+	ulong tcscr;
+	ulong acop;
+	ulong wort;
+	ulong tid;
+	ulong psscr;
+	ulong hfscr;
+	ulong shadow_srr1;
+#endif
+	u32 vrsave; /* also USPRG0 */
+	u32 mmucr;
+	/* shadow_msr is unused for BookE HV */
+	ulong shadow_msr;
+	ulong csrr0;
+	ulong csrr1;
+	ulong dsrr0;
+	ulong dsrr1;
+	ulong mcsrr0;
+	ulong mcsrr1;
+	ulong mcsr;
+	ulong dec;
+#ifdef CONFIG_BOOKE
+	u32 decar;
+#endif
+	/* Time base value when we entered the guest */
+	u64 entry_tb;
+	u64 entry_vtb;
+	u64 entry_ic;
+	u32 tcr;
+	ulong tsr; /* we need to perform set/clr_bits() which requires ulong */
+	u32 ivor[64];
+	ulong ivpr;
+	u32 pvr;
+
+	u32 shadow_pid;
+	u32 shadow_pid1;
+	u32 pid;
+	u32 swap_pid;
+
+	u32 ccr0;
+	u32 ccr1;
+	u32 dbsr;
+
+	u64 mmcr[4];	/* MMCR0, MMCR1, MMCR2, MMCR3 */
+	u64 mmcra;
+	u64 mmcrs;
+	u32 pmc[8];
+	u32 spmc[2];
+	u64 siar;
+	u64 sdar;
+	u64 sier[3];
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	u64 tfhar;
+	u64 texasr;
+	u64 tfiar;
+	u64 orig_texasr;
+
+	u32 cr_tm;
+	u64 xer_tm;
+	u64 lr_tm;
+	u64 ctr_tm;
+	u64 amr_tm;
+	u64 ppr_tm;
+	u64 dscr_tm;
+	u64 tar_tm;
+
+	ulong gpr_tm[32];
+
+	struct thread_fp_state fp_tm;
+
+	struct thread_vr_state vr_tm;
+	u32 vrsave_tm; /* also USPRG0 */
+#endif
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+	struct mutex exit_timing_lock;
+	struct kvmppc_exit_timing timing_exit;
+	struct kvmppc_exit_timing timing_last_enter;
+	u32 last_exit_type;
+	u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+	u64 timing_last_exit;
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S
+	ulong fault_dar;
+	u32 fault_dsisr;
+	unsigned long intr_msr;
+	/*
+	 * POWER9 and later: fault_gpa contains the guest real address of page
+	 * fault for a radix guest, or segment descriptor (equivalent to result
+	 * from slbmfev of SLB entry that translated the EA) for hash guests.
+	 */
+	ulong fault_gpa;
+#endif
+
+#ifdef CONFIG_BOOKE
+	ulong fault_dear;
+	ulong fault_esr;
+	ulong queued_dear;
+	ulong queued_esr;
+	spinlock_t wdt_lock;
+	struct timer_list wdt_timer;
+	u32 tlbcfg[4];
+	u32 tlbps[4];
+	u32 mmucfg;
+	u32 eptcfg;
+	u32 epr;
+	u64 sprg9;
+	u32 pwrmgtcr0;
+	u32 crit_save;
+	/* guest debug registers*/
+	struct debug_reg dbg_reg;
+#endif
+	gpa_t paddr_accessed;
+	gva_t vaddr_accessed;
+	pgd_t *pgdir;
+
+	u16 io_gpr; /* GPR used as IO source/target */
+	u8 mmio_host_swabbed;
+	u8 mmio_sign_extend;
+	/* conversion between single and double precision */
+	u8 mmio_sp64_extend;
+	/*
+	 * Number of simulations for vsx.
+	 * If we use 2*8bytes to simulate 1*16bytes,
+	 * then the number should be 2 and
+	 * mmio_copy_type=KVMPPC_VSX_COPY_DWORD.
+	 * If we use 4*4bytes to simulate 1*16bytes,
+	 * the number should be 4 and
+	 * mmio_vsx_copy_type=KVMPPC_VSX_COPY_WORD.
+	 */
+	u8 mmio_vsx_copy_nums;
+	u8 mmio_vsx_offset;
+	u8 mmio_vmx_copy_nums;
+	u8 mmio_vmx_offset;
+	u8 mmio_copy_type;
+	u8 osi_needed;
+	u8 osi_enabled;
+	u8 papr_enabled;
+	u8 watchdog_enabled;
+	u8 sane;
+	u8 cpu_type;
+	u8 hcall_needed;
+	u8 epr_flags; /* KVMPPC_EPR_xxx */
+	u8 epr_needed;
+	u8 external_oneshot;	/* clear external irq after delivery */
+
+	u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
+
+	struct hrtimer dec_timer;
+	u64 dec_jiffies;
+	u64 dec_expires;	/* Relative to guest timebase. */
+	unsigned long pending_exceptions;
+	u8 ceded;
+	u8 prodded;
+	u8 doorbell_request;
+	u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
+	unsigned long last_inst;
+
+	struct rcuwait wait;
+	struct rcuwait *waitp;
+	struct kvmppc_vcore *vcore;
+	int ret;
+	int trap;
+	int state;
+	int ptid;
+	int thread_cpu;
+	int prev_cpu;
+	bool timer_running;
+	wait_queue_head_t cpu_run;
+	struct machine_check_event mce_evt; /* Valid if trap == 0x200 */
+
+	struct kvm_vcpu_arch_shared *shared;
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
+	bool shared_big_endian;
+#endif
+	unsigned long magic_page_pa; /* phys addr to map the magic page to */
+	unsigned long magic_page_ea; /* effect. addr to map the magic page to */
+	bool disable_kernel_nx;
+
+	int irq_type;		/* one of KVM_IRQ_* */
+	int irq_cpu_id;
+	struct openpic *mpic;	/* KVM_IRQ_MPIC */
+#ifdef CONFIG_KVM_XICS
+	struct kvmppc_icp *icp; /* XICS presentation controller */
+	struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
+	__be32 xive_cam_word;    /* Cooked W2 in proper endian with valid bit */
+	u8 xive_pushed;		 /* Is the VP pushed on the physical CPU ? */
+	u8 xive_esc_on;		 /* Is the escalation irq enabled ? */
+	union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
+	u64 xive_esc_raddr;	 /* Escalation interrupt ESB real addr */
+	u64 xive_esc_vaddr;	 /* Escalation interrupt ESB virt addr */
+#endif
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	struct kvm_vcpu_arch_shared shregs;
+
+	struct mmio_hpte_cache mmio_cache;
+	unsigned long pgfault_addr;
+	long pgfault_index;
+	unsigned long pgfault_hpte[2];
+	struct mmio_hpte_cache_entry *pgfault_cache;
+
+	struct task_struct *run_task;
+
+	spinlock_t vpa_update_lock;
+	struct kvmppc_vpa vpa;
+	struct kvmppc_vpa dtl;
+	struct dtl_entry *dtl_ptr;
+	unsigned long dtl_index;
+	u64 stolen_logged;
+	struct kvmppc_vpa slb_shadow;
+
+	spinlock_t tbacct_lock;
+	u64 busy_stolen;
+	u64 busy_preempt;
+
+	u64 emul_inst;
+
+	u32 online;
+
+	u64 hfscr_permitted;	/* A mask of permitted HFSCR facilities */
+
+	/* For support of nested guests */
+	struct kvm_nested_guest *nested;
+	u64 nested_hfscr;	/* HFSCR that the L1 requested for the nested guest */
+	u32 nested_vcpu_id;
+	gpa_t nested_io_gpr;
+#endif
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+	struct kvmhv_tb_accumulator *cur_activity;	/* What we're timing */
+	u64	cur_tb_start;			/* when it started */
+#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING
+	struct kvmhv_tb_accumulator vcpu_entry;
+	struct kvmhv_tb_accumulator vcpu_exit;
+	struct kvmhv_tb_accumulator in_guest;
+	struct kvmhv_tb_accumulator hcall;
+	struct kvmhv_tb_accumulator pg_fault;
+	struct kvmhv_tb_accumulator guest_entry;
+	struct kvmhv_tb_accumulator guest_exit;
+#else
+	struct kvmhv_tb_accumulator rm_entry;	/* real-mode entry code */
+	struct kvmhv_tb_accumulator rm_intr;	/* real-mode intr handling */
+	struct kvmhv_tb_accumulator rm_exit;	/* real-mode exit code */
+	struct kvmhv_tb_accumulator guest_time;	/* guest execution */
+	struct kvmhv_tb_accumulator cede_time;	/* time napping inside guest */
+#endif
+#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
+};
+
+#define VCPU_FPR(vcpu, i)	(vcpu)->arch.fp.fpr[i][TS_FPROFFSET]
+#define VCPU_VSX_FPR(vcpu, i, j)	((vcpu)->arch.fp.fpr[i][j])
+#define VCPU_VSX_VR(vcpu, i)		((vcpu)->arch.vr.vr[i])
+
+/* Values for vcpu->arch.state */
+#define KVMPPC_VCPU_NOTREADY		0
+#define KVMPPC_VCPU_RUNNABLE		1
+#define KVMPPC_VCPU_BUSY_IN_HOST	2
+
+/* Values for vcpu->arch.io_gpr */
+#define KVM_MMIO_REG_MASK	0x003f
+#define KVM_MMIO_REG_EXT_MASK	0xffc0
+#define KVM_MMIO_REG_GPR	0x0000
+#define KVM_MMIO_REG_FPR	0x0040
+#define KVM_MMIO_REG_QPR	0x0080
+#define KVM_MMIO_REG_FQPR	0x00c0
+#define KVM_MMIO_REG_VSX	0x0100
+#define KVM_MMIO_REG_VMX	0x0180
+#define KVM_MMIO_REG_NESTED_GPR	0xffc0
+
+
+#define __KVM_HAVE_ARCH_WQP
+#define __KVM_HAVE_CREATE_DEVICE
+
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+
+#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
new file mode 100644
index 0000000000..abe1b5e825
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+#ifndef __POWERPC_KVM_PARA_H__
+#define __POWERPC_KVM_PARA_H__
+
+#include <asm/kvm_guest.h>
+
+#include <uapi/asm/kvm_para.h>
+
+static inline int kvm_para_available(void)
+{
+	return IS_ENABLED(CONFIG_KVM_GUEST) && is_kvm_guest();
+}
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+	unsigned long r;
+
+	if (!kvm_para_available())
+		return 0;
+
+	if(epapr_hypercall0_1(KVM_HCALL_TOKEN(KVM_HC_FEATURES), &r))
+		return 0;
+
+	return r;
+}
+
+static inline unsigned int kvm_arch_para_hints(void)
+{
+	return 0;
+}
+
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+	return false;
+}
+
+#endif /* __POWERPC_KVM_PARA_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
new file mode 100644
index 0000000000..b4da8514af
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -0,0 +1,1077 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __POWERPC_KVM_PPC_H__
+#define __POWERPC_KVM_PPC_H__
+
+/* This file exists just so we can dereference kvm_vcpu, avoiding nested header
+ * dependencies. */
+
+#include <linux/mutex.h>
+#include <linux/timer.h>
+#include <linux/types.h>
+#include <linux/kvm_types.h>
+#include <linux/kvm_host.h>
+#include <linux/bug.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/kvm_book3s.h>
+#else
+#include <asm/kvm_booke.h>
+#endif
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+#include <asm/paca.h>
+#include <asm/xive.h>
+#include <asm/cpu_has_feature.h>
+#endif
+#include <asm/inst.h>
+
+/*
+ * KVMPPC_INST_SW_BREAKPOINT is debug Instruction
+ * for supporting software breakpoint.
+ */
+#define KVMPPC_INST_SW_BREAKPOINT	0x00dddd00
+
+enum emulation_result {
+	EMULATE_DONE,         /* no further processing */
+	EMULATE_DO_MMIO,      /* kvm_run filled with MMIO request */
+	EMULATE_FAIL,         /* can't emulate this instruction */
+	EMULATE_AGAIN,        /* something went wrong. go again */
+	EMULATE_EXIT_USER,    /* emulation requires exit to user-space */
+};
+
+enum instruction_fetch_type {
+	INST_GENERIC,
+	INST_SC,		/* system call */
+};
+
+enum xlate_instdata {
+	XLATE_INST,		/* translate instruction address */
+	XLATE_DATA		/* translate data address */
+};
+
+enum xlate_readwrite {
+	XLATE_READ,		/* check for read permissions */
+	XLATE_WRITE		/* check for write permissions */
+};
+
+extern int kvmppc_vcpu_run(struct kvm_vcpu *vcpu);
+extern int __kvmppc_vcpu_run(struct kvm_vcpu *vcpu);
+extern void kvmppc_handler_highmem(void);
+
+extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu);
+extern int kvmppc_handle_load(struct kvm_vcpu *vcpu,
+                              unsigned int rt, unsigned int bytes,
+			      int is_default_endian);
+extern int kvmppc_handle_loads(struct kvm_vcpu *vcpu,
+                               unsigned int rt, unsigned int bytes,
+			       int is_default_endian);
+extern int kvmppc_handle_vsx_load(struct kvm_vcpu *vcpu,
+				unsigned int rt, unsigned int bytes,
+			int is_default_endian, int mmio_sign_extend);
+extern int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu,
+		unsigned int rt, unsigned int bytes, int is_default_endian);
+extern int kvmppc_handle_vmx_store(struct kvm_vcpu *vcpu,
+		unsigned int rs, unsigned int bytes, int is_default_endian);
+extern int kvmppc_handle_store(struct kvm_vcpu *vcpu,
+			       u64 val, unsigned int bytes,
+			       int is_default_endian);
+extern int kvmppc_handle_vsx_store(struct kvm_vcpu *vcpu,
+				int rs, unsigned int bytes,
+				int is_default_endian);
+
+extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
+				 enum instruction_fetch_type type,
+				 unsigned long *inst);
+
+extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
+		     bool data);
+extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
+		     bool data);
+extern int kvmppc_emulate_instruction(struct kvm_vcpu *vcpu);
+extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu);
+extern int kvmppc_emulate_mmio(struct kvm_vcpu *vcpu);
+extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
+extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
+extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu);
+extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
+extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
+extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
+
+/* Core-specific hooks */
+
+extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
+                           unsigned int gtlb_idx);
+extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
+extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
+                              gva_t eaddr);
+extern void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu);
+extern int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr,
+			enum xlate_instdata xlid, enum xlate_readwrite xlrw,
+			struct kvmppc_pte *pte);
+
+extern int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+                                      struct kvm_translation *tr);
+
+extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
+
+extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
+
+extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu,
+					    ulong srr1_flags);
+extern void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu,
+				      ulong srr1_flags);
+extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu,
+					ulong srr1_flags);
+extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu,
+					  ulong srr1_flags);
+extern void kvmppc_core_queue_vsx_unavail(struct kvm_vcpu *vcpu,
+					  ulong srr1_flags);
+extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
+                                       struct kvm_interrupt *irq);
+extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu,
+					ulong dear_flags,
+					ulong esr_flags);
+extern void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
+					   ulong srr1_flags,
+					   ulong dar,
+					   ulong dsisr);
+extern void kvmppc_core_queue_itlb_miss(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu,
+					   ulong srr1_flags);
+
+extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu);
+
+extern int kvmppc_booke_init(void);
+extern void kvmppc_booke_exit(void);
+
+extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu);
+extern void kvmppc_map_magic(struct kvm_vcpu *vcpu);
+
+extern int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order);
+extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info);
+extern int kvmppc_alloc_reset_hpt(struct kvm *kvm, int order);
+extern void kvmppc_free_hpt(struct kvm_hpt_info *info);
+extern void kvmppc_rmap_reset(struct kvm *kvm);
+extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
+			struct kvm_memory_slot *memslot, unsigned long porder);
+extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
+extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
+		struct iommu_group *grp);
+extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
+		struct iommu_group *grp);
+extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
+extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);
+extern void kvmppc_setup_partition_table(struct kvm *kvm);
+
+extern int kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
+				struct kvm_create_spapr_tce_64 *args);
+#define kvmppc_ioba_validate(stt, ioba, npages)                         \
+		(iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
+				(stt)->size, (ioba), (npages)) ?        \
+				H_PARAMETER : H_SUCCESS)
+extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+			     unsigned long ioba, unsigned long tce);
+extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+		unsigned long liobn, unsigned long ioba,
+		unsigned long tce_list, unsigned long npages);
+extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
+		unsigned long liobn, unsigned long ioba,
+		unsigned long tce_value, unsigned long npages);
+extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+			     unsigned long ioba);
+extern struct page *kvm_alloc_hpt_cma(unsigned long nr_pages);
+extern void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages);
+extern int kvmppc_core_init_vm(struct kvm *kvm);
+extern void kvmppc_core_destroy_vm(struct kvm *kvm);
+extern void kvmppc_core_free_memslot(struct kvm *kvm,
+				     struct kvm_memory_slot *slot);
+extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+				const struct kvm_memory_slot *old,
+				struct kvm_memory_slot *new,
+				enum kvm_mr_change change);
+extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
+				struct kvm_memory_slot *old,
+				const struct kvm_memory_slot *new,
+				enum kvm_mr_change change);
+extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
+				      struct kvm_ppc_smmu_info *info);
+extern void kvmppc_core_flush_memslot(struct kvm *kvm,
+				      struct kvm_memory_slot *memslot);
+
+extern int kvmppc_bookehv_init(void);
+extern void kvmppc_bookehv_exit(void);
+
+extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
+
+extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
+extern int kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
+					   struct kvm_ppc_resize_hpt *rhpt);
+extern int kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
+					  struct kvm_ppc_resize_hpt *rhpt);
+
+int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
+
+extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
+extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
+extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
+
+extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server,
+				u32 priority);
+extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
+				u32 *priority);
+extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
+extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
+
+void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu);
+void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu);
+
+union kvmppc_one_reg {
+	u32	wval;
+	u64	dval;
+	vector128 vval;
+	u64	vsxval[2];
+	u32	vsx32val[4];
+	u16	vsx16val[8];
+	u8	vsx8val[16];
+	struct {
+		u64	addr;
+		u64	length;
+	}	vpaval;
+	u64	xive_timaval[2];
+};
+
+struct kvmppc_ops {
+	struct module *owner;
+	int (*get_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+	int (*set_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+	int (*get_one_reg)(struct kvm_vcpu *vcpu, u64 id,
+			   union kvmppc_one_reg *val);
+	int (*set_one_reg)(struct kvm_vcpu *vcpu, u64 id,
+			   union kvmppc_one_reg *val);
+	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
+	void (*vcpu_put)(struct kvm_vcpu *vcpu);
+	void (*inject_interrupt)(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
+	void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr);
+	int (*vcpu_run)(struct kvm_vcpu *vcpu);
+	int (*vcpu_create)(struct kvm_vcpu *vcpu);
+	void (*vcpu_free)(struct kvm_vcpu *vcpu);
+	int (*check_requests)(struct kvm_vcpu *vcpu);
+	int (*get_dirty_log)(struct kvm *kvm, struct kvm_dirty_log *log);
+	void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot);
+	int (*prepare_memory_region)(struct kvm *kvm,
+				     const struct kvm_memory_slot *old,
+				     struct kvm_memory_slot *new,
+				     enum kvm_mr_change change);
+	void (*commit_memory_region)(struct kvm *kvm,
+				     struct kvm_memory_slot *old,
+				     const struct kvm_memory_slot *new,
+				     enum kvm_mr_change change);
+	bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range);
+	bool (*age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
+	bool (*test_age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
+	bool (*set_spte_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
+	void (*free_memslot)(struct kvm_memory_slot *slot);
+	int (*init_vm)(struct kvm *kvm);
+	void (*destroy_vm)(struct kvm *kvm);
+	int (*get_smmu_info)(struct kvm *kvm, struct kvm_ppc_smmu_info *info);
+	int (*emulate_op)(struct kvm_vcpu *vcpu,
+			  unsigned int inst, int *advance);
+	int (*emulate_mtspr)(struct kvm_vcpu *vcpu, int sprn, ulong spr_val);
+	int (*emulate_mfspr)(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val);
+	void (*fast_vcpu_kick)(struct kvm_vcpu *vcpu);
+	int (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl,
+			     unsigned long arg);
+	int (*hcall_implemented)(unsigned long hcall);
+	int (*irq_bypass_add_producer)(struct irq_bypass_consumer *,
+				       struct irq_bypass_producer *);
+	void (*irq_bypass_del_producer)(struct irq_bypass_consumer *,
+					struct irq_bypass_producer *);
+	int (*configure_mmu)(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg);
+	int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
+	int (*set_smt_mode)(struct kvm *kvm, unsigned long mode,
+			    unsigned long flags);
+	void (*giveup_ext)(struct kvm_vcpu *vcpu, ulong msr);
+	int (*enable_nested)(struct kvm *kvm);
+	int (*load_from_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
+			       int size);
+	int (*store_to_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
+			      int size);
+	int (*enable_svm)(struct kvm *kvm);
+	int (*svm_off)(struct kvm *kvm);
+	int (*enable_dawr1)(struct kvm *kvm);
+	bool (*hash_v3_possible)(void);
+	int (*create_vm_debugfs)(struct kvm *kvm);
+	int (*create_vcpu_debugfs)(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry);
+};
+
+extern struct kvmppc_ops *kvmppc_hv_ops;
+extern struct kvmppc_ops *kvmppc_pr_ops;
+
+static inline int kvmppc_get_last_inst(struct kvm_vcpu *vcpu,
+				enum instruction_fetch_type type, ppc_inst_t *inst)
+{
+	int ret = EMULATE_DONE;
+	u32 fetched_inst;
+
+	/* Load the instruction manually if it failed to do so in the
+	 * exit path */
+	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
+		ret = kvmppc_load_last_inst(vcpu, type, &vcpu->arch.last_inst);
+
+	/*  Write fetch_failed unswapped if the fetch failed */
+	if (ret != EMULATE_DONE) {
+		*inst = ppc_inst(KVM_INST_FETCH_FAILED);
+		return ret;
+	}
+
+#ifdef CONFIG_PPC64
+	/* Is this a prefixed instruction? */
+	if ((vcpu->arch.last_inst >> 32) != 0) {
+		u32 prefix = vcpu->arch.last_inst >> 32;
+		u32 suffix = vcpu->arch.last_inst;
+		if (kvmppc_need_byteswap(vcpu)) {
+			prefix = swab32(prefix);
+			suffix = swab32(suffix);
+		}
+		*inst = ppc_inst_prefix(prefix, suffix);
+		return EMULATE_DONE;
+	}
+#endif
+
+	fetched_inst = kvmppc_need_byteswap(vcpu) ?
+		swab32(vcpu->arch.last_inst) :
+		vcpu->arch.last_inst;
+	*inst = ppc_inst(fetched_inst);
+	return EMULATE_DONE;
+}
+
+static inline bool is_kvmppc_hv_enabled(struct kvm *kvm)
+{
+	return kvm->arch.kvm_ops == kvmppc_hv_ops;
+}
+
+extern int kvmppc_hwrng_present(void);
+
+/*
+ * Cuts out inst bits with ordering according to spec.
+ * That means the leftmost bit is zero. All given bits are included.
+ */
+static inline u32 kvmppc_get_field(u64 inst, int msb, int lsb)
+{
+	u32 r;
+	u32 mask;
+
+	BUG_ON(msb > lsb);
+
+	mask = (1 << (lsb - msb + 1)) - 1;
+	r = (inst >> (63 - lsb)) & mask;
+
+	return r;
+}
+
+/*
+ * Replaces inst bits with ordering according to spec.
+ */
+static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value)
+{
+	u32 r;
+	u32 mask;
+
+	BUG_ON(msb > lsb);
+
+	mask = ((1 << (lsb - msb + 1)) - 1) << (63 - lsb);
+	r = (inst & ~mask) | ((value << (63 - lsb)) & mask);
+
+	return r;
+}
+
+#define one_reg_size(id)	\
+	(1ul << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+#define get_reg_val(id, reg)	({		\
+	union kvmppc_one_reg __u;		\
+	switch (one_reg_size(id)) {		\
+	case 4: __u.wval = (reg); break;	\
+	case 8: __u.dval = (reg); break;	\
+	default: BUG();				\
+	}					\
+	__u;					\
+})
+
+
+#define set_reg_val(id, val)	({		\
+	u64 __v;				\
+	switch (one_reg_size(id)) {		\
+	case 4: __v = (val).wval; break;	\
+	case 8: __v = (val).dval; break;	\
+	default: BUG();				\
+	}					\
+	__v;					\
+})
+
+int kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+
+int kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *);
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *);
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
+
+struct openpic;
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+extern void kvm_cma_reserve(void) __init;
+static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
+{
+	paca_ptrs[cpu]->kvm_hstate.xics_phys = (void __iomem *)addr;
+}
+
+static inline void kvmppc_set_xive_tima(int cpu,
+					unsigned long phys_addr,
+					void __iomem *virt_addr)
+{
+	paca_ptrs[cpu]->kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr;
+	paca_ptrs[cpu]->kvm_hstate.xive_tima_virt = virt_addr;
+}
+
+static inline u32 kvmppc_get_xics_latch(void)
+{
+	u32 xirr;
+
+	xirr = get_paca()->kvm_hstate.saved_xirr;
+	get_paca()->kvm_hstate.saved_xirr = 0;
+	return xirr;
+}
+
+/*
+ * To avoid the need to unnecessarily exit fully to the host kernel, an IPI to
+ * a CPU thread that's running/napping inside of a guest is by default regarded
+ * as a request to wake the CPU (if needed) and continue execution within the
+ * guest, potentially to process new state like externally-generated
+ * interrupts or IPIs sent from within the guest itself (e.g. H_PROD/H_IPI).
+ *
+ * To force an exit to the host kernel, kvmppc_set_host_ipi() must be called
+ * prior to issuing the IPI to set the corresponding 'host_ipi' flag in the
+ * target CPU's PACA. To avoid unnecessary exits to the host, this flag should
+ * be immediately cleared via kvmppc_clear_host_ipi() by the IPI handler on
+ * the receiving side prior to processing the IPI work.
+ *
+ * NOTE:
+ *
+ * We currently issue an smp_mb() at the beginning of kvmppc_set_host_ipi().
+ * This is to guard against sequences such as the following:
+ *
+ *      CPU
+ *        X: smp_muxed_ipi_set_message():
+ *        X:   smp_mb()
+ *        X:   message[RESCHEDULE] = 1
+ *        X: doorbell_global_ipi(42):
+ *        X:   kvmppc_set_host_ipi(42)
+ *        X:   ppc_msgsnd_sync()/smp_mb()
+ *        X:   ppc_msgsnd() -> 42
+ *       42: doorbell_exception(): // from CPU X
+ *       42:   ppc_msgsync()
+ *      105: smp_muxed_ipi_set_message():
+ *      105:   smb_mb()
+ *           // STORE DEFERRED DUE TO RE-ORDERING
+ *    --105:   message[CALL_FUNCTION] = 1
+ *    | 105: doorbell_global_ipi(42):
+ *    | 105:   kvmppc_set_host_ipi(42)
+ *    |  42:   kvmppc_clear_host_ipi(42)
+ *    |  42: smp_ipi_demux_relaxed()
+ *    |  42: // returns to executing guest
+ *    |      // RE-ORDERED STORE COMPLETES
+ *    ->105:   message[CALL_FUNCTION] = 1
+ *      105:   ppc_msgsnd_sync()/smp_mb()
+ *      105:   ppc_msgsnd() -> 42
+ *       42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
+ *      105: // hangs waiting on 42 to process messages/call_single_queue
+ *
+ * We also issue an smp_mb() at the end of kvmppc_clear_host_ipi(). This is
+ * to guard against sequences such as the following (as well as to create
+ * a read-side pairing with the barrier in kvmppc_set_host_ipi()):
+ *
+ *      CPU
+ *        X: smp_muxed_ipi_set_message():
+ *        X:   smp_mb()
+ *        X:   message[RESCHEDULE] = 1
+ *        X: doorbell_global_ipi(42):
+ *        X:   kvmppc_set_host_ipi(42)
+ *        X:   ppc_msgsnd_sync()/smp_mb()
+ *        X:   ppc_msgsnd() -> 42
+ *       42: doorbell_exception(): // from CPU X
+ *       42:   ppc_msgsync()
+ *           // STORE DEFERRED DUE TO RE-ORDERING
+ *    -- 42:   kvmppc_clear_host_ipi(42)
+ *    |  42: smp_ipi_demux_relaxed()
+ *    | 105: smp_muxed_ipi_set_message():
+ *    | 105:   smb_mb()
+ *    | 105:   message[CALL_FUNCTION] = 1
+ *    | 105: doorbell_global_ipi(42):
+ *    | 105:   kvmppc_set_host_ipi(42)
+ *    |      // RE-ORDERED STORE COMPLETES
+ *    -> 42:   kvmppc_clear_host_ipi(42)
+ *       42: // returns to executing guest
+ *      105:   ppc_msgsnd_sync()/smp_mb()
+ *      105:   ppc_msgsnd() -> 42
+ *       42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
+ *      105: // hangs waiting on 42 to process messages/call_single_queue
+ */
+static inline void kvmppc_set_host_ipi(int cpu)
+{
+	/*
+	 * order stores of IPI messages vs. setting of host_ipi flag
+	 *
+	 * pairs with the barrier in kvmppc_clear_host_ipi()
+	 */
+	smp_mb();
+	WRITE_ONCE(paca_ptrs[cpu]->kvm_hstate.host_ipi, 1);
+}
+
+static inline void kvmppc_clear_host_ipi(int cpu)
+{
+	WRITE_ONCE(paca_ptrs[cpu]->kvm_hstate.host_ipi, 0);
+	/*
+	 * order clearing of host_ipi flag vs. processing of IPI messages
+	 *
+	 * pairs with the barrier in kvmppc_set_host_ipi()
+	 */
+	smp_mb();
+}
+
+static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+	vcpu->kvm->arch.kvm_ops->fast_vcpu_kick(vcpu);
+}
+
+extern void kvm_hv_vm_activated(void);
+extern void kvm_hv_vm_deactivated(void);
+extern bool kvm_hv_mode_active(void);
+
+extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu);
+
+#else
+static inline void __init kvm_cma_reserve(void)
+{}
+
+static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
+{}
+
+static inline void kvmppc_set_xive_tima(int cpu,
+					unsigned long phys_addr,
+					void __iomem *virt_addr)
+{}
+
+static inline u32 kvmppc_get_xics_latch(void)
+{
+	return 0;
+}
+
+static inline void kvmppc_set_host_ipi(int cpu)
+{}
+
+static inline void kvmppc_clear_host_ipi(int cpu)
+{}
+
+static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+	kvm_vcpu_kick(vcpu);
+}
+
+static inline bool kvm_hv_mode_active(void)		{ return false; }
+
+#endif
+
+#ifdef CONFIG_PPC_PSERIES
+static inline bool kvmhv_on_pseries(void)
+{
+	return !cpu_has_feature(CPU_FTR_HVMODE);
+}
+#else
+static inline bool kvmhv_on_pseries(void)
+{
+	return false;
+}
+#endif
+
+#ifdef CONFIG_KVM_XICS
+static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
+}
+
+static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
+				struct kvm *kvm)
+{
+	if (kvm && kvm_irq_bypass)
+		return kvm->arch.pimap;
+	return NULL;
+}
+
+extern void kvmppc_alloc_host_rm_ops(void);
+extern void kvmppc_free_host_rm_ops(void);
+extern void kvmppc_free_pimap(struct kvm *kvm);
+extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
+extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
+extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
+extern int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req);
+extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
+extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
+extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
+			struct kvm_vcpu *vcpu, u32 cpu);
+extern void kvmppc_xics_ipi_action(void);
+extern void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long guest_irq,
+				   unsigned long host_irq);
+extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
+				   unsigned long host_irq);
+extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, __be32 xirr,
+					struct kvmppc_irq_map *irq_map,
+					struct kvmppc_passthru_irqmap *pimap,
+					bool *again);
+
+extern int kvmppc_xics_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
+			       int level, bool line_status);
+
+extern int h_ipi_redirect;
+#else
+static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
+				struct kvm *kvm)
+	{ return NULL; }
+static inline void kvmppc_alloc_host_rm_ops(void) {}
+static inline void kvmppc_free_host_rm_ops(void) {}
+static inline void kvmppc_free_pimap(struct kvm *kvm) {}
+static inline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
+	{ return 0; }
+static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
+	{ return 0; }
+static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
+static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
+	{ return 0; }
+static inline int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+	{ return 0; }
+#endif
+
+#ifdef CONFIG_KVM_XIVE
+/*
+ * Below the first "xive" is the "eXternal Interrupt Virtualization Engine"
+ * ie. P9 new interrupt controller, while the second "xive" is the legacy
+ * "eXternal Interrupt Vector Entry" which is the configuration of an
+ * interrupt on the "xics" interrupt controller on P8 and earlier. Those
+ * two function consume or produce a legacy "XIVE" state from the
+ * new "XIVE" interrupt controller.
+ */
+extern int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
+				u32 priority);
+extern int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
+				u32 *priority);
+extern int kvmppc_xive_int_on(struct kvm *kvm, u32 irq);
+extern int kvmppc_xive_int_off(struct kvm *kvm, u32 irq);
+
+extern int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
+				    struct kvm_vcpu *vcpu, u32 cpu);
+extern void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu);
+extern int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
+				  unsigned long host_irq);
+extern int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
+				  unsigned long host_irq);
+extern u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu);
+extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
+
+extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
+			       int level, bool line_status);
+extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu);
+extern bool kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu);
+
+static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.irq_type == KVMPPC_IRQ_XIVE;
+}
+
+extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+					   struct kvm_vcpu *vcpu, u32 cpu);
+extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
+extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
+				     union kvmppc_one_reg *val);
+extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
+				     union kvmppc_one_reg *val);
+extern bool kvmppc_xive_native_supported(void);
+
+#else
+static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
+				       u32 priority) { return -1; }
+static inline int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
+				       u32 *priority) { return -1; }
+static inline int kvmppc_xive_int_on(struct kvm *kvm, u32 irq) { return -1; }
+static inline int kvmppc_xive_int_off(struct kvm *kvm, u32 irq) { return -1; }
+
+static inline int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
+					   struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
+static inline void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
+static inline int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
+					 struct irq_desc *host_desc) { return -ENODEV; }
+static inline int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
+					 struct irq_desc *host_desc) { return -ENODEV; }
+static inline u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu) { return 0; }
+static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { return -ENOENT; }
+
+static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
+				      int level, bool line_status) { return -ENODEV; }
+static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
+static inline void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) { }
+static inline bool kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) { return true; }
+
+static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
+	{ return 0; }
+static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+			  struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
+static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
+static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
+					    union kvmppc_one_reg *val)
+{ return 0; }
+static inline int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
+					    union kvmppc_one_reg *val)
+{ return -ENOENT; }
+
+#endif /* CONFIG_KVM_XIVE */
+
+#if defined(CONFIG_PPC_POWERNV) && defined(CONFIG_KVM_BOOK3S_64_HANDLER)
+static inline bool xics_on_xive(void)
+{
+	return xive_enabled() && cpu_has_feature(CPU_FTR_HVMODE);
+}
+#else
+static inline bool xics_on_xive(void)
+{
+	return false;
+}
+#endif
+
+/*
+ * Prototypes for functions called only from assembler code.
+ * Having prototypes reduces sparse errors.
+ */
+long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+			 unsigned long ioba, unsigned long tce);
+long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+				  unsigned long liobn, unsigned long ioba,
+				  unsigned long tce_list, unsigned long npages);
+long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
+			   unsigned long liobn, unsigned long ioba,
+			   unsigned long tce_value, unsigned long npages);
+long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
+                            unsigned int yield_count);
+long kvmppc_rm_h_random(struct kvm_vcpu *vcpu);
+void kvmhv_commence_exit(int trap);
+void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
+void kvmppc_subcore_enter_guest(void);
+void kvmppc_subcore_exit_guest(void);
+long kvmppc_realmode_hmi_handler(void);
+long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu);
+long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+                    long pte_index, unsigned long pteh, unsigned long ptel);
+long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
+                     unsigned long pte_index, unsigned long avpn);
+long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu);
+long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
+                      unsigned long pte_index, unsigned long avpn);
+long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
+                   unsigned long pte_index);
+long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
+                        unsigned long pte_index);
+long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
+                        unsigned long pte_index);
+long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
+			   unsigned long dest, unsigned long src);
+long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
+                          unsigned long slb_v, unsigned int status, bool data);
+void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu);
+
+/*
+ * Host-side operations we want to set up while running in real
+ * mode in the guest operating on the xics.
+ * Currently only VCPU wakeup is supported.
+ */
+
+union kvmppc_rm_state {
+	unsigned long raw;
+	struct {
+		u32 in_host;
+		u32 rm_action;
+	};
+};
+
+struct kvmppc_host_rm_core {
+	union kvmppc_rm_state rm_state;
+	void *rm_data;
+	char pad[112];
+};
+
+struct kvmppc_host_rm_ops {
+	struct kvmppc_host_rm_core	*rm_core;
+	void		(*vcpu_kick)(struct kvm_vcpu *vcpu);
+};
+
+extern struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
+
+static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+	return mfspr(SPRN_GEPR);
+#elif defined(CONFIG_BOOKE)
+	return vcpu->arch.epr;
+#else
+	return 0;
+#endif
+}
+
+static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+	mtspr(SPRN_GEPR, epr);
+#elif defined(CONFIG_BOOKE)
+	vcpu->arch.epr = epr;
+#endif
+}
+
+#ifdef CONFIG_KVM_MPIC
+
+void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu);
+int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
+			     u32 cpu);
+void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu);
+
+#else
+
+static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline int kvmppc_mpic_connect_vcpu(struct kvm_device *dev,
+		struct kvm_vcpu *vcpu, u32 cpu)
+{
+	return -EINVAL;
+}
+
+static inline void kvmppc_mpic_disconnect_vcpu(struct openpic *opp,
+		struct kvm_vcpu *vcpu)
+{
+}
+
+#endif /* CONFIG_KVM_MPIC */
+
+int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
+			      struct kvm_config_tlb *cfg);
+int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
+			     struct kvm_dirty_tlb *cfg);
+
+long kvmppc_alloc_lpid(void);
+void kvmppc_free_lpid(long lpid);
+void kvmppc_init_lpid(unsigned long nr_lpids);
+
+static inline void kvmppc_mmu_flush_icache(kvm_pfn_t pfn)
+{
+	struct folio *folio;
+	/*
+	 * We can only access pages that the kernel maps
+	 * as memory. Bail out for unmapped ones.
+	 */
+	if (!pfn_valid(pfn))
+		return;
+
+	/* Clear i-cache for new pages */
+	folio = page_folio(pfn_to_page(pfn));
+	if (!test_bit(PG_dcache_clean, &folio->flags)) {
+		flush_dcache_icache_folio(folio);
+		set_bit(PG_dcache_clean, &folio->flags);
+	}
+}
+
+/*
+ * Shared struct helpers. The shared struct can be little or big endian,
+ * depending on the guest endianness. So expose helpers to all of them.
+ */
+static inline bool kvmppc_shared_big_endian(struct kvm_vcpu *vcpu)
+{
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
+	/* Only Book3S_64 PR supports bi-endian for now */
+	return vcpu->arch.shared_big_endian;
+#elif defined(CONFIG_PPC_BOOK3S_64) && defined(__LITTLE_ENDIAN__)
+	/* Book3s_64 HV on little endian is always little endian */
+	return false;
+#else
+	return true;
+#endif
+}
+
+#define SPRNG_WRAPPER_GET(reg, bookehv_spr)				\
+static inline ulong kvmppc_get_##reg(struct kvm_vcpu *vcpu)		\
+{									\
+	return mfspr(bookehv_spr);					\
+}									\
+
+#define SPRNG_WRAPPER_SET(reg, bookehv_spr)				\
+static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, ulong val)	\
+{									\
+	mtspr(bookehv_spr, val);						\
+}									\
+
+#define SHARED_WRAPPER_GET(reg, size)					\
+static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu)		\
+{									\
+	if (kvmppc_shared_big_endian(vcpu))				\
+	       return be##size##_to_cpu(vcpu->arch.shared->reg);	\
+	else								\
+	       return le##size##_to_cpu(vcpu->arch.shared->reg);	\
+}									\
+
+#define SHARED_WRAPPER_SET(reg, size)					\
+static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val)	\
+{									\
+	if (kvmppc_shared_big_endian(vcpu))				\
+	       vcpu->arch.shared->reg = cpu_to_be##size(val);		\
+	else								\
+	       vcpu->arch.shared->reg = cpu_to_le##size(val);		\
+}									\
+
+#define SHARED_WRAPPER(reg, size)					\
+	SHARED_WRAPPER_GET(reg, size)					\
+	SHARED_WRAPPER_SET(reg, size)					\
+
+#define SPRNG_WRAPPER(reg, bookehv_spr)					\
+	SPRNG_WRAPPER_GET(reg, bookehv_spr)				\
+	SPRNG_WRAPPER_SET(reg, bookehv_spr)				\
+
+#ifdef CONFIG_KVM_BOOKE_HV
+
+#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr)			\
+	SPRNG_WRAPPER(reg, bookehv_spr)					\
+
+#else
+
+#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr)			\
+	SHARED_WRAPPER(reg, size)					\
+
+#endif
+
+SHARED_WRAPPER(critical, 64)
+SHARED_SPRNG_WRAPPER(sprg0, 64, SPRN_GSPRG0)
+SHARED_SPRNG_WRAPPER(sprg1, 64, SPRN_GSPRG1)
+SHARED_SPRNG_WRAPPER(sprg2, 64, SPRN_GSPRG2)
+SHARED_SPRNG_WRAPPER(sprg3, 64, SPRN_GSPRG3)
+SHARED_SPRNG_WRAPPER(srr0, 64, SPRN_GSRR0)
+SHARED_SPRNG_WRAPPER(srr1, 64, SPRN_GSRR1)
+SHARED_SPRNG_WRAPPER(dar, 64, SPRN_GDEAR)
+SHARED_SPRNG_WRAPPER(esr, 64, SPRN_GESR)
+SHARED_WRAPPER_GET(msr, 64)
+static inline void kvmppc_set_msr_fast(struct kvm_vcpu *vcpu, u64 val)
+{
+	if (kvmppc_shared_big_endian(vcpu))
+	       vcpu->arch.shared->msr = cpu_to_be64(val);
+	else
+	       vcpu->arch.shared->msr = cpu_to_le64(val);
+}
+SHARED_WRAPPER(dsisr, 32)
+SHARED_WRAPPER(int_pending, 32)
+SHARED_WRAPPER(sprg4, 64)
+SHARED_WRAPPER(sprg5, 64)
+SHARED_WRAPPER(sprg6, 64)
+SHARED_WRAPPER(sprg7, 64)
+
+static inline u32 kvmppc_get_sr(struct kvm_vcpu *vcpu, int nr)
+{
+	if (kvmppc_shared_big_endian(vcpu))
+	       return be32_to_cpu(vcpu->arch.shared->sr[nr]);
+	else
+	       return le32_to_cpu(vcpu->arch.shared->sr[nr]);
+}
+
+static inline void kvmppc_set_sr(struct kvm_vcpu *vcpu, int nr, u32 val)
+{
+	if (kvmppc_shared_big_endian(vcpu))
+	       vcpu->arch.shared->sr[nr] = cpu_to_be32(val);
+	else
+	       vcpu->arch.shared->sr[nr] = cpu_to_le32(val);
+}
+
+/*
+ * Please call after prepare_to_enter. This function puts the lazy ee and irq
+ * disabled tracking state back to normal mode, without actually enabling
+ * interrupts.
+ */
+static inline void kvmppc_fix_ee_before_entry(void)
+{
+	trace_hardirqs_on();
+
+#ifdef CONFIG_PPC64
+	/*
+	 * To avoid races, the caller must have gone directly from having
+	 * interrupts fully-enabled to hard-disabled.
+	 */
+	WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS);
+
+	/* Only need to enable IRQs by hard enabling them after this */
+	local_paca->irq_happened = 0;
+	irq_soft_mask_set(IRQS_ENABLED);
+#endif
+}
+
+static inline void kvmppc_fix_ee_after_exit(void)
+{
+#ifdef CONFIG_PPC64
+	/* Only need to enable IRQs by hard enabling them after this */
+	local_paca->irq_happened = PACA_IRQ_HARD_DIS;
+	irq_soft_mask_set(IRQS_ALL_DISABLED);
+#endif
+
+	trace_hardirqs_off();
+}
+
+
+static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
+{
+	ulong ea;
+	ulong msr_64bit = 0;
+
+	ea = kvmppc_get_gpr(vcpu, rb);
+	if (ra)
+		ea += kvmppc_get_gpr(vcpu, ra);
+
+#if defined(CONFIG_PPC_BOOK3E_64)
+	msr_64bit = MSR_CM;
+#elif defined(CONFIG_PPC_BOOK3S_64)
+	msr_64bit = MSR_SF;
+#endif
+
+	if (!(kvmppc_get_msr(vcpu) & msr_64bit))
+		ea = (uint32_t)ea;
+
+	return ea;
+}
+
+extern void xics_wake_cpu(int cpu);
+
+#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/include/asm/libata-portmap.h b/arch/powerpc/include/asm/libata-portmap.h
new file mode 100644
index 0000000000..7c602da625
--- /dev/null
+++ b/arch/powerpc/include/asm/libata-portmap.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_POWERPC_LIBATA_PORTMAP_H
+#define __ASM_POWERPC_LIBATA_PORTMAP_H
+
+#define ATA_PRIMARY_IRQ(dev)	pci_get_legacy_ide_irq(dev, 0)
+
+#define ATA_SECONDARY_IRQ(dev)	pci_get_legacy_ide_irq(dev, 1)
+
+#endif
diff --git a/arch/powerpc/include/asm/linkage.h b/arch/powerpc/include/asm/linkage.h
new file mode 100644
index 0000000000..b88d1d2cf3
--- /dev/null
+++ b/arch/powerpc/include/asm/linkage.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_LINKAGE_H
+#define _ASM_POWERPC_LINKAGE_H
+
+#include <asm/types.h>
+
+#define __ALIGN		.align 2
+#define __ALIGN_STR	".align 2"
+
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+#define cond_syscall(x) \
+	asm ("\t.weak " #x "\n\t.set " #x ", sys_ni_syscall\n"		\
+	     "\t.weak ." #x "\n\t.set ." #x ", .sys_ni_syscall\n")
+#define SYSCALL_ALIAS(alias, name)					\
+	asm ("\t.globl " #alias "\n\t.set " #alias ", " #name "\n"	\
+	     "\t.globl ." #alias "\n\t.set ." #alias ", ." #name)
+#endif
+
+#endif	/* _ASM_POWERPC_LINKAGE_H */
diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h
new file mode 100644
index 0000000000..d044a1fd4f
--- /dev/null
+++ b/arch/powerpc/include/asm/livepatch.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * livepatch.h - powerpc-specific Kernel Live Patching Core
+ *
+ * Copyright (C) 2015-2016, SUSE, IBM Corp.
+ */
+#ifndef _ASM_POWERPC_LIVEPATCH_H
+#define _ASM_POWERPC_LIVEPATCH_H
+
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+
+#ifdef CONFIG_LIVEPATCH_64
+static inline void klp_init_thread_info(struct task_struct *p)
+{
+	/* + 1 to account for STACK_END_MAGIC */
+	task_thread_info(p)->livepatch_sp = end_of_stack(p) + 1;
+}
+#else
+static inline void klp_init_thread_info(struct task_struct *p) { }
+#endif
+
+#endif /* _ASM_POWERPC_LIVEPATCH_H */
diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
new file mode 100644
index 0000000000..45492fb5bf
--- /dev/null
+++ b/arch/powerpc/include/asm/local.h
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARCH_POWERPC_LOCAL_H
+#define _ARCH_POWERPC_LOCAL_H
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+#include <linux/percpu.h>
+#include <linux/atomic.h>
+#include <linux/irqflags.h>
+
+#include <asm/hw_irq.h>
+
+typedef struct
+{
+	long v;
+} local_t;
+
+#define LOCAL_INIT(i)	{ (i) }
+
+static __inline__ long local_read(const local_t *l)
+{
+	return READ_ONCE(l->v);
+}
+
+static __inline__ void local_set(local_t *l, long i)
+{
+	WRITE_ONCE(l->v, i);
+}
+
+#define LOCAL_OP(op, c_op)						\
+static __inline__ void local_##op(long i, local_t *l)			\
+{									\
+	unsigned long flags;						\
+									\
+	powerpc_local_irq_pmu_save(flags);				\
+	l->v c_op i;						\
+	powerpc_local_irq_pmu_restore(flags);				\
+}
+
+#define LOCAL_OP_RETURN(op, c_op)					\
+static __inline__ long local_##op##_return(long a, local_t *l)		\
+{									\
+	long t;								\
+	unsigned long flags;						\
+									\
+	powerpc_local_irq_pmu_save(flags);				\
+	t = (l->v c_op a);						\
+	powerpc_local_irq_pmu_restore(flags);				\
+									\
+	return t;							\
+}
+
+#define LOCAL_OPS(op, c_op)		\
+	LOCAL_OP(op, c_op)		\
+	LOCAL_OP_RETURN(op, c_op)
+
+LOCAL_OPS(add, +=)
+LOCAL_OPS(sub, -=)
+
+#define local_add_negative(a, l)	(local_add_return((a), (l)) < 0)
+#define local_inc_return(l)		local_add_return(1LL, l)
+#define local_inc(l)			local_inc_return(l)
+
+/*
+ * local_inc_and_test - increment and test
+ * @l: pointer of type local_t
+ *
+ * Atomically increments @l by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+#define local_inc_and_test(l)		(local_inc_return(l) == 0)
+
+#define local_dec_return(l)		local_sub_return(1LL, l)
+#define local_dec(l)			local_dec_return(l)
+#define local_sub_and_test(a, l)	(local_sub_return((a), (l)) == 0)
+#define local_dec_and_test(l)		(local_dec_return((l)) == 0)
+
+static __inline__ long local_cmpxchg(local_t *l, long o, long n)
+{
+	long t;
+	unsigned long flags;
+
+	powerpc_local_irq_pmu_save(flags);
+	t = l->v;
+	if (t == o)
+		l->v = n;
+	powerpc_local_irq_pmu_restore(flags);
+
+	return t;
+}
+
+static __inline__ bool local_try_cmpxchg(local_t *l, long *po, long n)
+{
+	long o = *po, r;
+
+	r = local_cmpxchg(l, o, n);
+	if (unlikely(r != o))
+		*po = r;
+
+	return likely(r == o);
+}
+
+static __inline__ long local_xchg(local_t *l, long n)
+{
+	long t;
+	unsigned long flags;
+
+	powerpc_local_irq_pmu_save(flags);
+	t = l->v;
+	l->v = n;
+	powerpc_local_irq_pmu_restore(flags);
+
+	return t;
+}
+
+/**
+ * local_add_unless - add unless the number is a given value
+ * @l: pointer of type local_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @l, so long as it was not @u.
+ * Returns non-zero if @l was not @u, and zero otherwise.
+ */
+static __inline__ int local_add_unless(local_t *l, long a, long u)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	powerpc_local_irq_pmu_save(flags);
+	if (l->v != u) {
+		l->v += a;
+		ret = 1;
+	}
+	powerpc_local_irq_pmu_restore(flags);
+
+	return ret;
+}
+
+#define local_inc_not_zero(l)		local_add_unless((l), 1, 0)
+
+/* Use these for per-cpu local_t variables: on some archs they are
+ * much more efficient than these naive implementations.  Note they take
+ * a variable, not an address.
+ */
+
+#define __local_inc(l)		((l)->v++)
+#define __local_dec(l)		((l)->v++)
+#define __local_add(i,l)	((l)->v+=(i))
+#define __local_sub(i,l)	((l)->v-=(i))
+
+#else /* CONFIG_PPC64 */
+
+#include <asm-generic/local.h>
+
+#endif /* CONFIG_PPC64 */
+
+#endif /* _ARCH_POWERPC_LOCAL_H */
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
new file mode 100644
index 0000000000..61ec2447da
--- /dev/null
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * lppaca.h
+ * Copyright (C) 2001  Mike Corrigan IBM Corporation
+ */
+#ifndef _ASM_POWERPC_LPPACA_H
+#define _ASM_POWERPC_LPPACA_H
+
+#ifdef __KERNEL__
+
+/*
+ * These definitions relate to hypervisors that only exist when using
+ * a server type processor
+ */
+#ifdef CONFIG_PPC_BOOK3S
+
+/*
+ * This control block contains the data that is shared between the
+ * hypervisor and the OS.
+ */
+#include <linux/cache.h>
+#include <linux/threads.h>
+#include <asm/types.h>
+#include <asm/mmu.h>
+#include <asm/firmware.h>
+#include <asm/paca.h>
+
+/*
+ * The lppaca is the "virtual processor area" registered with the hypervisor,
+ * H_REGISTER_VPA etc.
+ *
+ * According to PAPR, the structure is 640 bytes long, must be L1 cache line
+ * aligned, and must not cross a 4kB boundary. Its size field must be at
+ * least 640 bytes (but may be more).
+ *
+ * Pre-v4.14 KVM hypervisors reject the VPA if its size field is smaller than
+ * 1kB, so we dynamically allocate 1kB and advertise size as 1kB, but keep
+ * this structure as the canonical 640 byte size.
+ */
+struct lppaca {
+	/* cacheline 1 contains read-only data */
+
+	__be32	desc;			/* Eye catcher 0xD397D781 */
+	__be16	size;			/* Size of this struct */
+	u8	reserved1[3];
+	u8	__old_status;		/* Old status, including shared proc */
+	u8	reserved3[14];
+	volatile __be32 dyn_hw_node_id;	/* Dynamic hardware node id */
+	volatile __be32 dyn_hw_proc_id;	/* Dynamic hardware proc id */
+	u8	reserved4[56];
+	volatile u8 vphn_assoc_counts[8]; /* Virtual processor home node */
+					  /* associativity change counters */
+	u8	reserved5[32];
+
+	/* cacheline 2 contains local read-write data */
+
+	u8	reserved6[48];
+	u8	cede_latency_hint;
+	u8	ebb_regs_in_use;
+	u8	reserved7[6];
+	u8	dtl_enable_mask;	/* Dispatch Trace Log mask */
+	u8	donate_dedicated_cpu;	/* Donate dedicated CPU cycles */
+	u8	fpregs_in_use;
+	u8	pmcregs_in_use;
+	u8	reserved8[28];
+	__be64	wait_state_cycles;	/* Wait cycles for this proc */
+	u8	reserved9[28];
+	__be16	slb_count;		/* # of SLBs to maintain */
+	u8	idle;			/* Indicate OS is idle */
+	u8	vmxregs_in_use;
+
+	/* cacheline 3 is shared with other processors */
+
+	/*
+	 * This is the yield_count.  An "odd" value (low bit on) means that
+	 * the processor is yielded (either because of an OS yield or a
+	 * hypervisor preempt).  An even value implies that the processor is
+	 * currently executing.
+	 * NOTE: Even dedicated processor partitions can yield so this
+	 * field cannot be used to determine if we are shared or dedicated.
+	 */
+	volatile __be32 yield_count;
+	volatile __be32 dispersion_count; /* dispatch changed physical cpu */
+	volatile __be64 cmo_faults;	/* CMO page fault count */
+	volatile __be64 cmo_fault_time;	/* CMO page fault time */
+	u8	reserved10[64];		/* [S]PURR expropriated/donated */
+	volatile __be64 enqueue_dispatch_tb; /* Total TB enqueue->dispatch */
+	volatile __be64 ready_enqueue_tb; /* Total TB ready->enqueue */
+	volatile __be64 wait_ready_tb;	/* Total TB wait->ready */
+	u8	reserved11[16];
+
+	/* cacheline 4-5 */
+
+	__be32	page_ins;		/* CMO Hint - # page ins by OS */
+	u8	reserved12[148];
+	volatile __be64 dtl_idx;	/* Dispatch Trace Log head index */
+	u8	reserved13[96];
+} ____cacheline_aligned;
+
+#define lppaca_of(cpu)	(*paca_ptrs[cpu]->lppaca_ptr)
+
+/*
+ * We are using a non architected field to determine if a partition is
+ * shared or dedicated. This currently works on both KVM and PHYP, but
+ * we will have to transition to something better.
+ */
+#define LPPACA_OLD_SHARED_PROC		2
+
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * All CPUs should have the same shared proc value, so directly access the PACA
+ * to avoid false positives from DEBUG_PREEMPT.
+ */
+static inline bool lppaca_shared_proc(void)
+{
+	struct lppaca *l = local_paca->lppaca_ptr;
+
+	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+		return false;
+	return !!(l->__old_status & LPPACA_OLD_SHARED_PROC);
+}
+
+#define get_lppaca()	(get_paca()->lppaca_ptr)
+#endif
+
+/*
+ * SLB shadow buffer structure as defined in the PAPR.  The save_area
+ * contains adjacent ESID and VSID pairs for each shadowed SLB.  The
+ * ESID is stored in the lower 64bits, then the VSID.
+ */
+struct slb_shadow {
+	__be32	persistent;		/* Number of persistent SLBs */
+	__be32	buffer_length;		/* Total shadow buffer length */
+	__be64	reserved;
+	struct	{
+		__be64     esid;
+		__be64	vsid;
+	} save_area[SLB_NUM_BOLTED];
+} ____cacheline_aligned;
+
+#endif /* CONFIG_PPC_BOOK3S */
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_LPPACA_H */
diff --git a/arch/powerpc/include/asm/lv1call.h b/arch/powerpc/include/asm/lv1call.h
new file mode 100644
index 0000000000..b11501b301
--- /dev/null
+++ b/arch/powerpc/include/asm/lv1call.h
@@ -0,0 +1,337 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  PS3 hvcall interface.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *  Copyright 2003, 2004 (c) MontaVista Software, Inc.
+ */
+
+#if !defined(_ASM_POWERPC_LV1CALL_H)
+#define _ASM_POWERPC_LV1CALL_H
+
+#if !defined(__ASSEMBLY__)
+
+#include <linux/types.h>
+#include <linux/export.h>
+
+/* lv1 call declaration macros */
+
+#define LV1_1_IN_ARG_DECL u64 in_1
+#define LV1_2_IN_ARG_DECL LV1_1_IN_ARG_DECL, u64 in_2
+#define LV1_3_IN_ARG_DECL LV1_2_IN_ARG_DECL, u64 in_3
+#define LV1_4_IN_ARG_DECL LV1_3_IN_ARG_DECL, u64 in_4
+#define LV1_5_IN_ARG_DECL LV1_4_IN_ARG_DECL, u64 in_5
+#define LV1_6_IN_ARG_DECL LV1_5_IN_ARG_DECL, u64 in_6
+#define LV1_7_IN_ARG_DECL LV1_6_IN_ARG_DECL, u64 in_7
+#define LV1_8_IN_ARG_DECL LV1_7_IN_ARG_DECL, u64 in_8
+#define LV1_1_OUT_ARG_DECL u64 *out_1
+#define LV1_2_OUT_ARG_DECL LV1_1_OUT_ARG_DECL, u64 *out_2
+#define LV1_3_OUT_ARG_DECL LV1_2_OUT_ARG_DECL, u64 *out_3
+#define LV1_4_OUT_ARG_DECL LV1_3_OUT_ARG_DECL, u64 *out_4
+#define LV1_5_OUT_ARG_DECL LV1_4_OUT_ARG_DECL, u64 *out_5
+#define LV1_6_OUT_ARG_DECL LV1_5_OUT_ARG_DECL, u64 *out_6
+#define LV1_7_OUT_ARG_DECL LV1_6_OUT_ARG_DECL, u64 *out_7
+
+#define LV1_0_IN_0_OUT_ARG_DECL void
+#define LV1_1_IN_0_OUT_ARG_DECL LV1_1_IN_ARG_DECL
+#define LV1_2_IN_0_OUT_ARG_DECL LV1_2_IN_ARG_DECL
+#define LV1_3_IN_0_OUT_ARG_DECL LV1_3_IN_ARG_DECL
+#define LV1_4_IN_0_OUT_ARG_DECL LV1_4_IN_ARG_DECL
+#define LV1_5_IN_0_OUT_ARG_DECL LV1_5_IN_ARG_DECL
+#define LV1_6_IN_0_OUT_ARG_DECL LV1_6_IN_ARG_DECL
+#define LV1_7_IN_0_OUT_ARG_DECL LV1_7_IN_ARG_DECL
+
+#define LV1_0_IN_1_OUT_ARG_DECL                    LV1_1_OUT_ARG_DECL
+#define LV1_1_IN_1_OUT_ARG_DECL LV1_1_IN_ARG_DECL, LV1_1_OUT_ARG_DECL
+#define LV1_2_IN_1_OUT_ARG_DECL LV1_2_IN_ARG_DECL, LV1_1_OUT_ARG_DECL
+#define LV1_3_IN_1_OUT_ARG_DECL LV1_3_IN_ARG_DECL, LV1_1_OUT_ARG_DECL
+#define LV1_4_IN_1_OUT_ARG_DECL LV1_4_IN_ARG_DECL, LV1_1_OUT_ARG_DECL
+#define LV1_5_IN_1_OUT_ARG_DECL LV1_5_IN_ARG_DECL, LV1_1_OUT_ARG_DECL
+#define LV1_6_IN_1_OUT_ARG_DECL LV1_6_IN_ARG_DECL, LV1_1_OUT_ARG_DECL
+#define LV1_7_IN_1_OUT_ARG_DECL LV1_7_IN_ARG_DECL, LV1_1_OUT_ARG_DECL
+#define LV1_8_IN_1_OUT_ARG_DECL LV1_8_IN_ARG_DECL, LV1_1_OUT_ARG_DECL
+
+#define LV1_0_IN_2_OUT_ARG_DECL                    LV1_2_OUT_ARG_DECL
+#define LV1_1_IN_2_OUT_ARG_DECL LV1_1_IN_ARG_DECL, LV1_2_OUT_ARG_DECL
+#define LV1_2_IN_2_OUT_ARG_DECL LV1_2_IN_ARG_DECL, LV1_2_OUT_ARG_DECL
+#define LV1_3_IN_2_OUT_ARG_DECL LV1_3_IN_ARG_DECL, LV1_2_OUT_ARG_DECL
+#define LV1_4_IN_2_OUT_ARG_DECL LV1_4_IN_ARG_DECL, LV1_2_OUT_ARG_DECL
+#define LV1_5_IN_2_OUT_ARG_DECL LV1_5_IN_ARG_DECL, LV1_2_OUT_ARG_DECL
+#define LV1_6_IN_2_OUT_ARG_DECL LV1_6_IN_ARG_DECL, LV1_2_OUT_ARG_DECL
+#define LV1_7_IN_2_OUT_ARG_DECL LV1_7_IN_ARG_DECL, LV1_2_OUT_ARG_DECL
+
+#define LV1_0_IN_3_OUT_ARG_DECL                    LV1_3_OUT_ARG_DECL
+#define LV1_1_IN_3_OUT_ARG_DECL LV1_1_IN_ARG_DECL, LV1_3_OUT_ARG_DECL
+#define LV1_2_IN_3_OUT_ARG_DECL LV1_2_IN_ARG_DECL, LV1_3_OUT_ARG_DECL
+#define LV1_3_IN_3_OUT_ARG_DECL LV1_3_IN_ARG_DECL, LV1_3_OUT_ARG_DECL
+#define LV1_4_IN_3_OUT_ARG_DECL LV1_4_IN_ARG_DECL, LV1_3_OUT_ARG_DECL
+#define LV1_5_IN_3_OUT_ARG_DECL LV1_5_IN_ARG_DECL, LV1_3_OUT_ARG_DECL
+#define LV1_6_IN_3_OUT_ARG_DECL LV1_6_IN_ARG_DECL, LV1_3_OUT_ARG_DECL
+#define LV1_7_IN_3_OUT_ARG_DECL LV1_7_IN_ARG_DECL, LV1_3_OUT_ARG_DECL
+
+#define LV1_0_IN_4_OUT_ARG_DECL                    LV1_4_OUT_ARG_DECL
+#define LV1_1_IN_4_OUT_ARG_DECL LV1_1_IN_ARG_DECL, LV1_4_OUT_ARG_DECL
+#define LV1_2_IN_4_OUT_ARG_DECL LV1_2_IN_ARG_DECL, LV1_4_OUT_ARG_DECL
+#define LV1_3_IN_4_OUT_ARG_DECL LV1_3_IN_ARG_DECL, LV1_4_OUT_ARG_DECL
+#define LV1_4_IN_4_OUT_ARG_DECL LV1_4_IN_ARG_DECL, LV1_4_OUT_ARG_DECL
+#define LV1_5_IN_4_OUT_ARG_DECL LV1_5_IN_ARG_DECL, LV1_4_OUT_ARG_DECL
+#define LV1_6_IN_4_OUT_ARG_DECL LV1_6_IN_ARG_DECL, LV1_4_OUT_ARG_DECL
+#define LV1_7_IN_4_OUT_ARG_DECL LV1_7_IN_ARG_DECL, LV1_4_OUT_ARG_DECL
+
+#define LV1_0_IN_5_OUT_ARG_DECL                    LV1_5_OUT_ARG_DECL
+#define LV1_1_IN_5_OUT_ARG_DECL LV1_1_IN_ARG_DECL, LV1_5_OUT_ARG_DECL
+#define LV1_2_IN_5_OUT_ARG_DECL LV1_2_IN_ARG_DECL, LV1_5_OUT_ARG_DECL
+#define LV1_3_IN_5_OUT_ARG_DECL LV1_3_IN_ARG_DECL, LV1_5_OUT_ARG_DECL
+#define LV1_4_IN_5_OUT_ARG_DECL LV1_4_IN_ARG_DECL, LV1_5_OUT_ARG_DECL
+#define LV1_5_IN_5_OUT_ARG_DECL LV1_5_IN_ARG_DECL, LV1_5_OUT_ARG_DECL
+#define LV1_6_IN_5_OUT_ARG_DECL LV1_6_IN_ARG_DECL, LV1_5_OUT_ARG_DECL
+#define LV1_7_IN_5_OUT_ARG_DECL LV1_7_IN_ARG_DECL, LV1_5_OUT_ARG_DECL
+
+#define LV1_0_IN_6_OUT_ARG_DECL                    LV1_6_OUT_ARG_DECL
+#define LV1_1_IN_6_OUT_ARG_DECL LV1_1_IN_ARG_DECL, LV1_6_OUT_ARG_DECL
+#define LV1_2_IN_6_OUT_ARG_DECL LV1_2_IN_ARG_DECL, LV1_6_OUT_ARG_DECL
+#define LV1_3_IN_6_OUT_ARG_DECL LV1_3_IN_ARG_DECL, LV1_6_OUT_ARG_DECL
+#define LV1_4_IN_6_OUT_ARG_DECL LV1_4_IN_ARG_DECL, LV1_6_OUT_ARG_DECL
+#define LV1_5_IN_6_OUT_ARG_DECL LV1_5_IN_ARG_DECL, LV1_6_OUT_ARG_DECL
+#define LV1_6_IN_6_OUT_ARG_DECL LV1_6_IN_ARG_DECL, LV1_6_OUT_ARG_DECL
+#define LV1_7_IN_6_OUT_ARG_DECL LV1_7_IN_ARG_DECL, LV1_6_OUT_ARG_DECL
+
+#define LV1_0_IN_7_OUT_ARG_DECL                    LV1_7_OUT_ARG_DECL
+#define LV1_1_IN_7_OUT_ARG_DECL LV1_1_IN_ARG_DECL, LV1_7_OUT_ARG_DECL
+#define LV1_2_IN_7_OUT_ARG_DECL LV1_2_IN_ARG_DECL, LV1_7_OUT_ARG_DECL
+#define LV1_3_IN_7_OUT_ARG_DECL LV1_3_IN_ARG_DECL, LV1_7_OUT_ARG_DECL
+#define LV1_4_IN_7_OUT_ARG_DECL LV1_4_IN_ARG_DECL, LV1_7_OUT_ARG_DECL
+#define LV1_5_IN_7_OUT_ARG_DECL LV1_5_IN_ARG_DECL, LV1_7_OUT_ARG_DECL
+#define LV1_6_IN_7_OUT_ARG_DECL LV1_6_IN_ARG_DECL, LV1_7_OUT_ARG_DECL
+#define LV1_7_IN_7_OUT_ARG_DECL LV1_7_IN_ARG_DECL, LV1_7_OUT_ARG_DECL
+
+#define LV1_1_IN_ARGS in_1
+#define LV1_2_IN_ARGS LV1_1_IN_ARGS, in_2
+#define LV1_3_IN_ARGS LV1_2_IN_ARGS, in_3
+#define LV1_4_IN_ARGS LV1_3_IN_ARGS, in_4
+#define LV1_5_IN_ARGS LV1_4_IN_ARGS, in_5
+#define LV1_6_IN_ARGS LV1_5_IN_ARGS, in_6
+#define LV1_7_IN_ARGS LV1_6_IN_ARGS, in_7
+#define LV1_8_IN_ARGS LV1_7_IN_ARGS, in_8
+
+#define LV1_1_OUT_ARGS out_1
+#define LV1_2_OUT_ARGS LV1_1_OUT_ARGS, out_2
+#define LV1_3_OUT_ARGS LV1_2_OUT_ARGS, out_3
+#define LV1_4_OUT_ARGS LV1_3_OUT_ARGS, out_4
+#define LV1_5_OUT_ARGS LV1_4_OUT_ARGS, out_5
+#define LV1_6_OUT_ARGS LV1_5_OUT_ARGS, out_6
+#define LV1_7_OUT_ARGS LV1_6_OUT_ARGS, out_7
+
+#define LV1_0_IN_0_OUT_ARGS
+#define LV1_1_IN_0_OUT_ARGS LV1_1_IN_ARGS
+#define LV1_2_IN_0_OUT_ARGS LV1_2_IN_ARGS
+#define LV1_3_IN_0_OUT_ARGS LV1_3_IN_ARGS
+#define LV1_4_IN_0_OUT_ARGS LV1_4_IN_ARGS
+#define LV1_5_IN_0_OUT_ARGS LV1_5_IN_ARGS
+#define LV1_6_IN_0_OUT_ARGS LV1_6_IN_ARGS
+#define LV1_7_IN_0_OUT_ARGS LV1_7_IN_ARGS
+
+#define LV1_0_IN_1_OUT_ARGS                LV1_1_OUT_ARGS
+#define LV1_1_IN_1_OUT_ARGS LV1_1_IN_ARGS, LV1_1_OUT_ARGS
+#define LV1_2_IN_1_OUT_ARGS LV1_2_IN_ARGS, LV1_1_OUT_ARGS
+#define LV1_3_IN_1_OUT_ARGS LV1_3_IN_ARGS, LV1_1_OUT_ARGS
+#define LV1_4_IN_1_OUT_ARGS LV1_4_IN_ARGS, LV1_1_OUT_ARGS
+#define LV1_5_IN_1_OUT_ARGS LV1_5_IN_ARGS, LV1_1_OUT_ARGS
+#define LV1_6_IN_1_OUT_ARGS LV1_6_IN_ARGS, LV1_1_OUT_ARGS
+#define LV1_7_IN_1_OUT_ARGS LV1_7_IN_ARGS, LV1_1_OUT_ARGS
+#define LV1_8_IN_1_OUT_ARGS LV1_8_IN_ARGS, LV1_1_OUT_ARGS
+
+#define LV1_0_IN_2_OUT_ARGS                LV1_2_OUT_ARGS
+#define LV1_1_IN_2_OUT_ARGS LV1_1_IN_ARGS, LV1_2_OUT_ARGS
+#define LV1_2_IN_2_OUT_ARGS LV1_2_IN_ARGS, LV1_2_OUT_ARGS
+#define LV1_3_IN_2_OUT_ARGS LV1_3_IN_ARGS, LV1_2_OUT_ARGS
+#define LV1_4_IN_2_OUT_ARGS LV1_4_IN_ARGS, LV1_2_OUT_ARGS
+#define LV1_5_IN_2_OUT_ARGS LV1_5_IN_ARGS, LV1_2_OUT_ARGS
+#define LV1_6_IN_2_OUT_ARGS LV1_6_IN_ARGS, LV1_2_OUT_ARGS
+#define LV1_7_IN_2_OUT_ARGS LV1_7_IN_ARGS, LV1_2_OUT_ARGS
+
+#define LV1_0_IN_3_OUT_ARGS                LV1_3_OUT_ARGS
+#define LV1_1_IN_3_OUT_ARGS LV1_1_IN_ARGS, LV1_3_OUT_ARGS
+#define LV1_2_IN_3_OUT_ARGS LV1_2_IN_ARGS, LV1_3_OUT_ARGS
+#define LV1_3_IN_3_OUT_ARGS LV1_3_IN_ARGS, LV1_3_OUT_ARGS
+#define LV1_4_IN_3_OUT_ARGS LV1_4_IN_ARGS, LV1_3_OUT_ARGS
+#define LV1_5_IN_3_OUT_ARGS LV1_5_IN_ARGS, LV1_3_OUT_ARGS
+#define LV1_6_IN_3_OUT_ARGS LV1_6_IN_ARGS, LV1_3_OUT_ARGS
+#define LV1_7_IN_3_OUT_ARGS LV1_7_IN_ARGS, LV1_3_OUT_ARGS
+
+#define LV1_0_IN_4_OUT_ARGS                LV1_4_OUT_ARGS
+#define LV1_1_IN_4_OUT_ARGS LV1_1_IN_ARGS, LV1_4_OUT_ARGS
+#define LV1_2_IN_4_OUT_ARGS LV1_2_IN_ARGS, LV1_4_OUT_ARGS
+#define LV1_3_IN_4_OUT_ARGS LV1_3_IN_ARGS, LV1_4_OUT_ARGS
+#define LV1_4_IN_4_OUT_ARGS LV1_4_IN_ARGS, LV1_4_OUT_ARGS
+#define LV1_5_IN_4_OUT_ARGS LV1_5_IN_ARGS, LV1_4_OUT_ARGS
+#define LV1_6_IN_4_OUT_ARGS LV1_6_IN_ARGS, LV1_4_OUT_ARGS
+#define LV1_7_IN_4_OUT_ARGS LV1_7_IN_ARGS, LV1_4_OUT_ARGS
+
+#define LV1_0_IN_5_OUT_ARGS                LV1_5_OUT_ARGS
+#define LV1_1_IN_5_OUT_ARGS LV1_1_IN_ARGS, LV1_5_OUT_ARGS
+#define LV1_2_IN_5_OUT_ARGS LV1_2_IN_ARGS, LV1_5_OUT_ARGS
+#define LV1_3_IN_5_OUT_ARGS LV1_3_IN_ARGS, LV1_5_OUT_ARGS
+#define LV1_4_IN_5_OUT_ARGS LV1_4_IN_ARGS, LV1_5_OUT_ARGS
+#define LV1_5_IN_5_OUT_ARGS LV1_5_IN_ARGS, LV1_5_OUT_ARGS
+#define LV1_6_IN_5_OUT_ARGS LV1_6_IN_ARGS, LV1_5_OUT_ARGS
+#define LV1_7_IN_5_OUT_ARGS LV1_7_IN_ARGS, LV1_5_OUT_ARGS
+
+#define LV1_0_IN_6_OUT_ARGS                LV1_6_OUT_ARGS
+#define LV1_1_IN_6_OUT_ARGS LV1_1_IN_ARGS, LV1_6_OUT_ARGS
+#define LV1_2_IN_6_OUT_ARGS LV1_2_IN_ARGS, LV1_6_OUT_ARGS
+#define LV1_3_IN_6_OUT_ARGS LV1_3_IN_ARGS, LV1_6_OUT_ARGS
+#define LV1_4_IN_6_OUT_ARGS LV1_4_IN_ARGS, LV1_6_OUT_ARGS
+#define LV1_5_IN_6_OUT_ARGS LV1_5_IN_ARGS, LV1_6_OUT_ARGS
+#define LV1_6_IN_6_OUT_ARGS LV1_6_IN_ARGS, LV1_6_OUT_ARGS
+#define LV1_7_IN_6_OUT_ARGS LV1_7_IN_ARGS, LV1_6_OUT_ARGS
+
+#define LV1_0_IN_7_OUT_ARGS                LV1_7_OUT_ARGS
+#define LV1_1_IN_7_OUT_ARGS LV1_1_IN_ARGS, LV1_7_OUT_ARGS
+#define LV1_2_IN_7_OUT_ARGS LV1_2_IN_ARGS, LV1_7_OUT_ARGS
+#define LV1_3_IN_7_OUT_ARGS LV1_3_IN_ARGS, LV1_7_OUT_ARGS
+#define LV1_4_IN_7_OUT_ARGS LV1_4_IN_ARGS, LV1_7_OUT_ARGS
+#define LV1_5_IN_7_OUT_ARGS LV1_5_IN_ARGS, LV1_7_OUT_ARGS
+#define LV1_6_IN_7_OUT_ARGS LV1_6_IN_ARGS, LV1_7_OUT_ARGS
+#define LV1_7_IN_7_OUT_ARGS LV1_7_IN_ARGS, LV1_7_OUT_ARGS
+
+/*
+ * This LV1_CALL() macro is for use by callers.  It expands into an
+ * inline call wrapper and an underscored HV call declaration.  The
+ * wrapper can be used to instrument the lv1 call interface.  The
+ * file lv1call.S defines its own LV1_CALL() macro to expand into
+ * the actual underscored call definition.
+ */
+
+#if !defined(LV1_CALL)
+#define LV1_CALL(name, in, out, num)                               \
+  extern s64 _lv1_##name(LV1_##in##_IN_##out##_OUT_ARG_DECL);      \
+  static inline int lv1_##name(LV1_##in##_IN_##out##_OUT_ARG_DECL) \
+    {return _lv1_##name(LV1_##in##_IN_##out##_OUT_ARGS);}
+#endif
+
+#endif /* !defined(__ASSEMBLY__) */
+
+/* lv1 call table */
+
+LV1_CALL(allocate_memory,                               4, 2,   0 )
+LV1_CALL(write_htab_entry,                              4, 0,   1 )
+LV1_CALL(construct_virtual_address_space,               3, 2,   2 )
+LV1_CALL(invalidate_htab_entries,                       5, 0,   3 )
+LV1_CALL(get_virtual_address_space_id_of_ppe,           0, 1,   4 )
+LV1_CALL(query_logical_partition_address_region_info,   1, 5,   6 )
+LV1_CALL(select_virtual_address_space,                  1, 0,   7 )
+LV1_CALL(pause,                                         1, 0,   9 )
+LV1_CALL(destruct_virtual_address_space,                1, 0,  10 )
+LV1_CALL(configure_irq_state_bitmap,                    3, 0,  11 )
+LV1_CALL(connect_irq_plug_ext,                          5, 0,  12 )
+LV1_CALL(release_memory,                                1, 0,  13 )
+LV1_CALL(put_iopte,                                     5, 0,  15 )
+LV1_CALL(disconnect_irq_plug_ext,                       3, 0,  17 )
+LV1_CALL(construct_event_receive_port,                  0, 1,  18 )
+LV1_CALL(destruct_event_receive_port,                   1, 0,  19 )
+LV1_CALL(send_event_locally,                            1, 0,  24 )
+LV1_CALL(end_of_interrupt,                              1, 0,  27 )
+LV1_CALL(connect_irq_plug,                              2, 0,  28 )
+LV1_CALL(disconnect_irq_plug,                           1, 0,  29 )
+LV1_CALL(end_of_interrupt_ext,                          3, 0,  30 )
+LV1_CALL(did_update_interrupt_mask,                     2, 0,  31 )
+LV1_CALL(shutdown_logical_partition,                    1, 0,  44 )
+LV1_CALL(destruct_logical_spe,                          1, 0,  54 )
+LV1_CALL(construct_logical_spe,                         7, 6,  57 )
+LV1_CALL(set_spe_interrupt_mask,                        3, 0,  61 )
+LV1_CALL(set_spe_transition_notifier,                   3, 0,  64 )
+LV1_CALL(disable_logical_spe,                           2, 0,  65 )
+LV1_CALL(clear_spe_interrupt_status,                    4, 0,  66 )
+LV1_CALL(get_spe_interrupt_status,                      2, 1,  67 )
+LV1_CALL(get_logical_ppe_id,                            0, 1,  69 )
+LV1_CALL(set_interrupt_mask,                            5, 0,  73 )
+LV1_CALL(get_logical_partition_id,                      0, 1,  74 )
+LV1_CALL(configure_execution_time_variable,             1, 0,  77 )
+LV1_CALL(get_spe_irq_outlet,                            2, 1,  78 )
+LV1_CALL(set_spe_privilege_state_area_1_register,       3, 0,  79 )
+LV1_CALL(create_repository_node,                        6, 0,  90 )
+LV1_CALL(read_repository_node,                          5, 2,  91 )
+LV1_CALL(write_repository_node,                         6, 0,  92 )
+LV1_CALL(delete_repository_node,                        4, 0,  93 )
+LV1_CALL(read_htab_entries,                             2, 5,  95 )
+LV1_CALL(set_dabr,                                      2, 0,  96 )
+LV1_CALL(get_total_execution_time,                      2, 1, 103 )
+LV1_CALL(allocate_io_segment,                           3, 1, 116 )
+LV1_CALL(release_io_segment,                            2, 0, 117 )
+LV1_CALL(construct_io_irq_outlet,                       1, 1, 120 )
+LV1_CALL(destruct_io_irq_outlet,                        1, 0, 121 )
+LV1_CALL(map_htab,                                      1, 1, 122 )
+LV1_CALL(unmap_htab,                                    1, 0, 123 )
+LV1_CALL(get_version_info,                              0, 2, 127 )
+LV1_CALL(insert_htab_entry,                             6, 3, 158 )
+LV1_CALL(read_virtual_uart,                             3, 1, 162 )
+LV1_CALL(write_virtual_uart,                            3, 1, 163 )
+LV1_CALL(set_virtual_uart_param,                        3, 0, 164 )
+LV1_CALL(get_virtual_uart_param,                        2, 1, 165 )
+LV1_CALL(configure_virtual_uart_irq,                    1, 1, 166 )
+LV1_CALL(open_device,                                   3, 0, 170 )
+LV1_CALL(close_device,                                  2, 0, 171 )
+LV1_CALL(map_device_mmio_region,                        5, 1, 172 )
+LV1_CALL(unmap_device_mmio_region,                      3, 0, 173 )
+LV1_CALL(allocate_device_dma_region,                    5, 1, 174 )
+LV1_CALL(free_device_dma_region,                        3, 0, 175 )
+LV1_CALL(map_device_dma_region,                         6, 0, 176 )
+LV1_CALL(unmap_device_dma_region,                       4, 0, 177 )
+LV1_CALL(net_add_multicast_address,                     4, 0, 185 )
+LV1_CALL(net_remove_multicast_address,                  4, 0, 186 )
+LV1_CALL(net_start_tx_dma,                              4, 0, 187 )
+LV1_CALL(net_stop_tx_dma,                               2, 0, 188 )
+LV1_CALL(net_start_rx_dma,                              4, 0, 189 )
+LV1_CALL(net_stop_rx_dma,                               2, 0, 190 )
+LV1_CALL(net_set_interrupt_status_indicator,            4, 0, 191 )
+LV1_CALL(net_set_interrupt_mask,                        4, 0, 193 )
+LV1_CALL(net_control,                                   6, 2, 194 )
+LV1_CALL(connect_interrupt_event_receive_port,          4, 0, 197 )
+LV1_CALL(disconnect_interrupt_event_receive_port,       4, 0, 198 )
+LV1_CALL(get_spe_all_interrupt_statuses,                1, 1, 199 )
+LV1_CALL(deconfigure_virtual_uart_irq,                  0, 0, 202 )
+LV1_CALL(enable_logical_spe,                            2, 0, 207 )
+LV1_CALL(gpu_open,                                      1, 0, 210 )
+LV1_CALL(gpu_close,                                     0, 0, 211 )
+LV1_CALL(gpu_device_map,                                1, 2, 212 )
+LV1_CALL(gpu_device_unmap,                              1, 0, 213 )
+LV1_CALL(gpu_memory_allocate,                           5, 2, 214 )
+LV1_CALL(gpu_memory_free,                               1, 0, 216 )
+LV1_CALL(gpu_context_allocate,                          2, 5, 217 )
+LV1_CALL(gpu_context_free,                              1, 0, 218 )
+LV1_CALL(gpu_context_iomap,                             5, 0, 221 )
+LV1_CALL(gpu_context_attribute,                         6, 0, 225 )
+LV1_CALL(gpu_context_intr,                              1, 1, 227 )
+LV1_CALL(gpu_attribute,                                 3, 0, 228 )
+LV1_CALL(get_rtc,                                       0, 2, 232 )
+LV1_CALL(set_ppe_periodic_tracer_frequency,             1, 0, 240 )
+LV1_CALL(start_ppe_periodic_tracer,                     5, 0, 241 )
+LV1_CALL(stop_ppe_periodic_tracer,                      1, 1, 242 )
+LV1_CALL(storage_read,                                  6, 1, 245 )
+LV1_CALL(storage_write,                                 6, 1, 246 )
+LV1_CALL(storage_send_device_command,                   6, 1, 248 )
+LV1_CALL(storage_get_async_status,                      1, 2, 249 )
+LV1_CALL(storage_check_async_status,                    2, 1, 254 )
+LV1_CALL(panic,                                         1, 0, 255 )
+LV1_CALL(construct_lpm,                                 6, 3, 140 )
+LV1_CALL(destruct_lpm,                                  1, 0, 141 )
+LV1_CALL(start_lpm,                                     1, 0, 142 )
+LV1_CALL(stop_lpm,                                      1, 1, 143 )
+LV1_CALL(copy_lpm_trace_buffer,                         3, 1, 144 )
+LV1_CALL(add_lpm_event_bookmark,                        5, 0, 145 )
+LV1_CALL(delete_lpm_event_bookmark,                     3, 0, 146 )
+LV1_CALL(set_lpm_interrupt_mask,                        3, 1, 147 )
+LV1_CALL(get_lpm_interrupt_status,                      1, 1, 148 )
+LV1_CALL(set_lpm_general_control,                       5, 2, 149 )
+LV1_CALL(set_lpm_interval,                              3, 1, 150 )
+LV1_CALL(set_lpm_trigger_control,                       3, 1, 151 )
+LV1_CALL(set_lpm_counter_control,                       4, 1, 152 )
+LV1_CALL(set_lpm_group_control,                         3, 1, 153 )
+LV1_CALL(set_lpm_debug_bus_control,                     3, 1, 154 )
+LV1_CALL(set_lpm_counter,                               5, 2, 155 )
+LV1_CALL(set_lpm_signal,                                7, 0, 156 )
+LV1_CALL(set_lpm_spr_trigger,                           2, 0, 157 )
+
+#endif
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
new file mode 100644
index 0000000000..4f6e7d7ee3
--- /dev/null
+++ b/arch/powerpc/include/asm/machdep.h
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_MACHDEP_H
+#define _ASM_POWERPC_MACHDEP_H
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/export.h>
+
+struct pt_regs;
+struct pci_bus;	
+struct device_node;
+struct iommu_table;
+struct rtc_time;
+struct file;
+struct pci_controller;
+struct kimage;
+struct pci_host_bridge;
+
+struct machdep_calls {
+	const char	*name;
+	const char	*compatible;
+#ifdef CONFIG_PPC64
+#ifdef CONFIG_PM
+	void		(*iommu_restore)(void);
+#endif
+#ifdef CONFIG_MEMORY_HOTPLUG
+	unsigned long	(*memory_block_size)(void);
+#endif
+#endif /* CONFIG_PPC64 */
+
+	void		(*dma_set_mask)(struct device *dev, u64 dma_mask);
+
+	int		(*probe)(void);
+	void		(*setup_arch)(void); /* Optional, may be NULL */
+	/* Optional, may be NULL. */
+	void		(*show_cpuinfo)(struct seq_file *m);
+	/* Returns the current operating frequency of "cpu" in Hz */
+	unsigned long  	(*get_proc_freq)(unsigned int cpu);
+
+	void		(*init_IRQ)(void);
+
+	/* Return an irq, or 0 to indicate there are none pending. */
+	unsigned int	(*get_irq)(void);
+
+	/* PCI stuff */
+	/* Called after allocating resources */
+	void		(*pcibios_fixup)(void);
+	void		(*pci_irq_fixup)(struct pci_dev *dev);
+	int		(*pcibios_root_bridge_prepare)(struct pci_host_bridge
+				*bridge);
+
+	/* finds all the pci_controllers present at boot */
+	void 		(*discover_phbs)(void);
+
+	/* To setup PHBs when using automatic OF platform driver for PCI */
+	int		(*pci_setup_phb)(struct pci_controller *host);
+
+	void __noreturn	(*restart)(char *cmd);
+	void __noreturn (*halt)(void);
+	void		(*panic)(char *str);
+
+	long		(*time_init)(void); /* Optional, may be NULL */
+
+	int		(*set_rtc_time)(struct rtc_time *);
+	void		(*get_rtc_time)(struct rtc_time *);
+	time64_t	(*get_boot_time)(void);
+
+	void		(*calibrate_decr)(void);
+
+	void		(*progress)(char *, unsigned short);
+
+	/* Interface for platform error logging */
+	void 		(*log_error)(char *buf, unsigned int err_type, int fatal);
+
+	unsigned char 	(*nvram_read_val)(int addr);
+	void		(*nvram_write_val)(int addr, unsigned char val);
+	ssize_t		(*nvram_write)(char *buf, size_t count, loff_t *index);
+	ssize_t		(*nvram_read)(char *buf, size_t count, loff_t *index);	
+	ssize_t		(*nvram_size)(void);		
+	void		(*nvram_sync)(void);
+
+	/* Exception handlers */
+	int		(*system_reset_exception)(struct pt_regs *regs);
+	int 		(*machine_check_exception)(struct pt_regs *regs);
+	int		(*handle_hmi_exception)(struct pt_regs *regs);
+
+	/* Early exception handlers called in realmode */
+	int		(*hmi_exception_early)(struct pt_regs *regs);
+	long		(*machine_check_early)(struct pt_regs *regs);
+
+	/* Called during machine check exception to retrive fixup address. */
+	bool		(*mce_check_early_recovery)(struct pt_regs *regs);
+
+	void            (*machine_check_log_err)(void);
+
+	/* Motherboard/chipset features. This is a kind of general purpose
+	 * hook used to control some machine specific features (like reset
+	 * lines, chip power control, etc...).
+	 */
+	long	 	(*feature_call)(unsigned int feature, ...);
+
+	/* Get legacy PCI/IDE interrupt mapping */ 
+	int		(*pci_get_legacy_ide_irq)(struct pci_dev *dev, int channel);
+	
+	/* Get access protection for /dev/mem */
+	pgprot_t	(*phys_mem_access_prot)(struct file *file,
+						unsigned long pfn,
+						unsigned long size,
+						pgprot_t vma_prot);
+
+	/*
+	 * Function for waiting for work with reduced power in idle loop;
+	 * called with interrupts disabled.
+	 */
+	void		(*power_save)(void);
+
+	/* Function to enable performance monitor counters for this
+	   platform, called once per cpu. */
+	void		(*enable_pmcs)(void);
+
+	/* Set DABR for this platform, leave empty for default implementation */
+	int		(*set_dabr)(unsigned long dabr,
+				    unsigned long dabrx);
+
+	/* Set DAWR for this platform, leave empty for default implementation */
+	int		(*set_dawr)(int nr, unsigned long dawr,
+				    unsigned long dawrx);
+
+#ifdef CONFIG_PPC32	/* XXX for now */
+	/* A general init function, called by ppc_init in init/main.c.
+	   May be NULL. */
+	void		(*init)(void);
+
+	/*
+	 * optional PCI "hooks"
+	 */
+	/* Called at then very end of pcibios_init() */
+	void (*pcibios_after_init)(void);
+
+#endif /* CONFIG_PPC32 */
+
+	/* Called in indirect_* to avoid touching devices */
+	int (*pci_exclude_device)(struct pci_controller *, unsigned char, unsigned char);
+
+	/* Called after PPC generic resource fixup to perform
+	   machine specific fixups */
+	void (*pcibios_fixup_resources)(struct pci_dev *);
+
+	/* Called for each PCI bus in the system when it's probed */
+	void (*pcibios_fixup_bus)(struct pci_bus *);
+
+	/* Called after scan and before resource survey */
+	void (*pcibios_fixup_phb)(struct pci_controller *hose);
+
+	/*
+	 * Called after device has been added to bus and
+	 * before sysfs has been created.
+	 */
+	void (*pcibios_bus_add_device)(struct pci_dev *pdev);
+
+	resource_size_t (*pcibios_default_alignment)(void);
+
+#ifdef CONFIG_PCI_IOV
+	void (*pcibios_fixup_sriov)(struct pci_dev *pdev);
+	resource_size_t (*pcibios_iov_resource_alignment)(struct pci_dev *, int resno);
+	int (*pcibios_sriov_enable)(struct pci_dev *pdev, u16 num_vfs);
+	int (*pcibios_sriov_disable)(struct pci_dev *pdev);
+#endif /* CONFIG_PCI_IOV */
+
+	/* Called to shutdown machine specific hardware not already controlled
+	 * by other drivers.
+	 */
+	void (*machine_shutdown)(void);
+
+#ifdef CONFIG_KEXEC_CORE
+	void (*kexec_cpu_down)(int crash_shutdown, int secondary);
+
+	/* Called to perform the _real_ kexec.
+	 * Do NOT allocate memory or fail here. We are past the point of
+	 * no return.
+	 */
+	void (*machine_kexec)(struct kimage *image);
+#endif /* CONFIG_KEXEC_CORE */
+
+#ifdef CONFIG_SUSPEND
+	/* These are called to disable and enable, respectively, IRQs when
+	 * entering a suspend state.  If NULL, then the generic versions
+	 * will be called.  The generic versions disable/enable the
+	 * decrementer along with interrupts.
+	 */
+	void (*suspend_disable_irqs)(void);
+	void (*suspend_enable_irqs)(void);
+#endif
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+	ssize_t (*cpu_probe)(const char *, size_t);
+	ssize_t (*cpu_release)(const char *, size_t);
+#endif
+
+	int (*get_random_seed)(unsigned long *v);
+};
+
+extern void e500_idle(void);
+extern void power4_idle(void);
+extern void ppc6xx_idle(void);
+
+/*
+ * ppc_md contains a copy of the machine description structure for the
+ * current platform. machine_id contains the initial address where the
+ * description was found during boot.
+ */
+extern struct machdep_calls ppc_md;
+extern struct machdep_calls *machine_id;
+
+#define __machine_desc __section(".machine.desc")
+
+#define define_machine(name)					\
+	extern struct machdep_calls mach_##name;		\
+	EXPORT_SYMBOL(mach_##name);				\
+	struct machdep_calls mach_##name __machine_desc =
+
+static inline bool __machine_is(const struct machdep_calls *md)
+{
+	WARN_ON(!machine_id); // complain if used before probe_machine()
+	return machine_id == md;
+}
+
+#define machine_is(name)                                        \
+	({                                                      \
+		extern struct machdep_calls mach_##name __weak; \
+		__machine_is(&mach_##name);                     \
+	})
+
+static inline void log_error(char *buf, unsigned int err_type, int fatal)
+{
+	if (ppc_md.log_error)
+		ppc_md.log_error(buf, err_type, fatal);
+}
+
+#define __define_machine_initcall(mach, fn, id) \
+	static int __init __machine_initcall_##mach##_##fn(void) { \
+		if (machine_is(mach)) return fn(); \
+		return 0; \
+	} \
+	__define_initcall(__machine_initcall_##mach##_##fn, id);
+
+#define machine_early_initcall(mach, fn)	__define_machine_initcall(mach, fn, early)
+#define machine_core_initcall(mach, fn)		__define_machine_initcall(mach, fn, 1)
+#define machine_core_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 1s)
+#define machine_postcore_initcall(mach, fn)	__define_machine_initcall(mach, fn, 2)
+#define machine_postcore_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 2s)
+#define machine_arch_initcall(mach, fn)		__define_machine_initcall(mach, fn, 3)
+#define machine_arch_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 3s)
+#define machine_subsys_initcall(mach, fn)	__define_machine_initcall(mach, fn, 4)
+#define machine_subsys_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 4s)
+#define machine_fs_initcall(mach, fn)		__define_machine_initcall(mach, fn, 5)
+#define machine_fs_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 5s)
+#define machine_rootfs_initcall(mach, fn)	__define_machine_initcall(mach, fn, rootfs)
+#define machine_device_initcall(mach, fn)	__define_machine_initcall(mach, fn, 6)
+#define machine_device_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 6s)
+#define machine_late_initcall(mach, fn)		__define_machine_initcall(mach, fn, 7)
+#define machine_late_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 7s)
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_MACHDEP_H */
diff --git a/arch/powerpc/include/asm/macio.h b/arch/powerpc/include/asm/macio.h
new file mode 100644
index 0000000000..3a07c62973
--- /dev/null
+++ b/arch/powerpc/include/asm/macio.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __MACIO_ASIC_H__
+#define __MACIO_ASIC_H__
+#ifdef __KERNEL__
+
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+extern struct bus_type macio_bus_type;
+
+/* MacIO device driver is defined later */
+struct macio_driver;
+struct macio_chip;
+
+#define MACIO_DEV_COUNT_RESOURCES	8
+#define MACIO_DEV_COUNT_IRQS		8
+
+/*
+ * the macio_bus structure is used to describe a "virtual" bus
+ * within a MacIO ASIC. It's typically provided by a macio_pci_asic
+ * PCI device, but could be provided differently as well (nubus
+ * machines using a fake OF tree).
+ *
+ * The pdev field can be NULL on non-PCI machines
+ */
+struct macio_bus
+{
+	struct macio_chip	*chip;		/* macio_chip (private use) */
+	int			index;		/* macio chip index in system */
+#ifdef CONFIG_PCI
+	struct pci_dev		*pdev;		/* PCI device hosting this bus */
+#endif
+};
+
+/*
+ * the macio_dev structure is used to describe a device
+ * within an Apple MacIO ASIC.
+ */
+struct macio_dev
+{
+	struct macio_bus	*bus;		/* macio bus this device is on */
+	struct macio_dev	*media_bay;	/* Device is part of a media bay */
+	struct platform_device	ofdev;
+	struct device_dma_parameters dma_parms; /* ide needs that */
+	int			n_resources;
+	struct resource		resource[MACIO_DEV_COUNT_RESOURCES];
+	int			n_interrupts;
+	struct resource		interrupt[MACIO_DEV_COUNT_IRQS];
+};
+#define	to_macio_device(d) container_of(d, struct macio_dev, ofdev.dev)
+#define	of_to_macio_device(d) container_of(d, struct macio_dev, ofdev)
+
+extern struct macio_dev *macio_dev_get(struct macio_dev *dev);
+extern void macio_dev_put(struct macio_dev *dev);
+
+/*
+ * Accessors to resources & interrupts and other device
+ * fields
+ */
+
+static inline int macio_resource_count(struct macio_dev *dev)
+{
+	return dev->n_resources;
+}
+
+static inline unsigned long macio_resource_start(struct macio_dev *dev, int resource_no)
+{
+	return dev->resource[resource_no].start;
+}
+
+static inline unsigned long macio_resource_end(struct macio_dev *dev, int resource_no)
+{
+	return dev->resource[resource_no].end;
+}
+
+static inline unsigned long macio_resource_len(struct macio_dev *dev, int resource_no)
+{
+	struct resource *res = &dev->resource[resource_no];
+	if (res->start == 0 || res->end == 0 || res->end < res->start)
+		return 0;
+	return resource_size(res);
+}
+
+extern int macio_enable_devres(struct macio_dev *dev);
+
+extern int macio_request_resource(struct macio_dev *dev, int resource_no, const char *name);
+extern void macio_release_resource(struct macio_dev *dev, int resource_no);
+extern int macio_request_resources(struct macio_dev *dev, const char *name);
+extern void macio_release_resources(struct macio_dev *dev);
+
+static inline int macio_irq_count(struct macio_dev *dev)
+{
+	return dev->n_interrupts;
+}
+
+static inline int macio_irq(struct macio_dev *dev, int irq_no)
+{
+	return dev->interrupt[irq_no].start;
+}
+
+static inline void macio_set_drvdata(struct macio_dev *dev, void *data)
+{
+	dev_set_drvdata(&dev->ofdev.dev, data);
+}
+
+static inline void* macio_get_drvdata(struct macio_dev *dev)
+{
+	return dev_get_drvdata(&dev->ofdev.dev);
+}
+
+static inline struct device_node *macio_get_of_node(struct macio_dev *mdev)
+{
+	return mdev->ofdev.dev.of_node;
+}
+
+#ifdef CONFIG_PCI
+static inline struct pci_dev *macio_get_pci_dev(struct macio_dev *mdev)
+{
+	return mdev->bus->pdev;
+}
+#endif
+
+/*
+ * A driver for a mac-io chip based device
+ */
+struct macio_driver
+{
+	int	(*probe)(struct macio_dev* dev, const struct of_device_id *match);
+	int	(*remove)(struct macio_dev* dev);
+
+	int	(*suspend)(struct macio_dev* dev, pm_message_t state);
+	int	(*resume)(struct macio_dev* dev);
+	int	(*shutdown)(struct macio_dev* dev);
+
+#ifdef CONFIG_PMAC_MEDIABAY
+	void	(*mediabay_event)(struct macio_dev* dev, int mb_state);
+#endif
+	struct device_driver	driver;
+};
+#define	to_macio_driver(drv) container_of(drv,struct macio_driver, driver)
+
+extern int macio_register_driver(struct macio_driver *);
+extern void macio_unregister_driver(struct macio_driver *);
+
+#endif /* __KERNEL__ */
+#endif /* __MACIO_ASIC_H__ */
diff --git a/arch/powerpc/include/asm/mc146818rtc.h b/arch/powerpc/include/asm/mc146818rtc.h
new file mode 100644
index 0000000000..d9e4ecd410
--- /dev/null
+++ b/arch/powerpc/include/asm/mc146818rtc.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_MC146818RTC_H
+#define _ASM_POWERPC_MC146818RTC_H
+
+/*
+ * Machine dependent access functions for RTC registers.
+ */
+
+#ifdef __KERNEL__
+
+#include <asm/io.h>
+
+#ifndef RTC_PORT
+#define RTC_PORT(x)	(0x70 + (x))
+#define RTC_ALWAYS_BCD	1	/* RTC operates in binary mode */
+#endif
+
+/*
+ * The yet supported machines all access the RTC index register via
+ * an ISA port access but the way to access the date register differs ...
+ */
+#define CMOS_READ(addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+inb_p(RTC_PORT(1)); \
+})
+#define CMOS_WRITE(val, addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+outb_p((val),RTC_PORT(1)); \
+})
+
+#endif	/* __KERNEL__ */
+#endif	/* _ASM_POWERPC_MC146818RTC_H */
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
new file mode 100644
index 0000000000..c9f0936bd3
--- /dev/null
+++ b/arch/powerpc/include/asm/mce.h
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Machine check exception header file.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_PPC64_MCE_H__
+#define __ASM_PPC64_MCE_H__
+
+#include <linux/bitops.h>
+
+enum MCE_Version {
+	MCE_V1 = 1,
+};
+
+enum MCE_Severity {
+	MCE_SEV_NO_ERROR = 0,
+	MCE_SEV_WARNING = 1,
+	MCE_SEV_SEVERE = 2,
+	MCE_SEV_FATAL = 3,
+};
+
+enum MCE_Disposition {
+	MCE_DISPOSITION_RECOVERED = 0,
+	MCE_DISPOSITION_NOT_RECOVERED = 1,
+};
+
+enum MCE_Initiator {
+	MCE_INITIATOR_UNKNOWN = 0,
+	MCE_INITIATOR_CPU = 1,
+	MCE_INITIATOR_PCI = 2,
+	MCE_INITIATOR_ISA = 3,
+	MCE_INITIATOR_MEMORY= 4,
+	MCE_INITIATOR_POWERMGM = 5,
+};
+
+enum MCE_ErrorType {
+	MCE_ERROR_TYPE_UNKNOWN = 0,
+	MCE_ERROR_TYPE_UE = 1,
+	MCE_ERROR_TYPE_SLB = 2,
+	MCE_ERROR_TYPE_ERAT = 3,
+	MCE_ERROR_TYPE_TLB = 4,
+	MCE_ERROR_TYPE_USER = 5,
+	MCE_ERROR_TYPE_RA = 6,
+	MCE_ERROR_TYPE_LINK = 7,
+	MCE_ERROR_TYPE_DCACHE = 8,
+	MCE_ERROR_TYPE_ICACHE = 9,
+};
+
+enum MCE_ErrorClass {
+	MCE_ECLASS_UNKNOWN = 0,
+	MCE_ECLASS_HARDWARE,
+	MCE_ECLASS_HARD_INDETERMINATE,
+	MCE_ECLASS_SOFTWARE,
+	MCE_ECLASS_SOFT_INDETERMINATE,
+};
+
+enum MCE_UeErrorType {
+	MCE_UE_ERROR_INDETERMINATE = 0,
+	MCE_UE_ERROR_IFETCH = 1,
+	MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH = 2,
+	MCE_UE_ERROR_LOAD_STORE = 3,
+	MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 4,
+};
+
+enum MCE_SlbErrorType {
+	MCE_SLB_ERROR_INDETERMINATE = 0,
+	MCE_SLB_ERROR_PARITY = 1,
+	MCE_SLB_ERROR_MULTIHIT = 2,
+};
+
+enum MCE_EratErrorType {
+	MCE_ERAT_ERROR_INDETERMINATE = 0,
+	MCE_ERAT_ERROR_PARITY = 1,
+	MCE_ERAT_ERROR_MULTIHIT = 2,
+};
+
+enum MCE_TlbErrorType {
+	MCE_TLB_ERROR_INDETERMINATE = 0,
+	MCE_TLB_ERROR_PARITY = 1,
+	MCE_TLB_ERROR_MULTIHIT = 2,
+};
+
+enum MCE_UserErrorType {
+	MCE_USER_ERROR_INDETERMINATE = 0,
+	MCE_USER_ERROR_TLBIE = 1,
+	MCE_USER_ERROR_SCV = 2,
+};
+
+enum MCE_RaErrorType {
+	MCE_RA_ERROR_INDETERMINATE = 0,
+	MCE_RA_ERROR_IFETCH = 1,
+	MCE_RA_ERROR_IFETCH_FOREIGN = 2,
+	MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 3,
+	MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 4,
+	MCE_RA_ERROR_LOAD = 5,
+	MCE_RA_ERROR_STORE = 6,
+	MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 7,
+	MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 8,
+	MCE_RA_ERROR_LOAD_STORE_FOREIGN = 9,
+};
+
+enum MCE_LinkErrorType {
+	MCE_LINK_ERROR_INDETERMINATE = 0,
+	MCE_LINK_ERROR_IFETCH_TIMEOUT = 1,
+	MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT = 2,
+	MCE_LINK_ERROR_LOAD_TIMEOUT = 3,
+	MCE_LINK_ERROR_STORE_TIMEOUT = 4,
+	MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT = 5,
+};
+
+struct machine_check_event {
+	enum MCE_Version	version:8;
+	u8			in_use;
+	enum MCE_Severity	severity:8;
+	enum MCE_Initiator	initiator:8;
+	enum MCE_ErrorType	error_type:8;
+	enum MCE_ErrorClass	error_class:8;
+	enum MCE_Disposition	disposition:8;
+	bool			sync_error;
+	u16			cpu;
+	u64			gpr3;
+	u64			srr0;
+	u64			srr1;
+	union {
+		struct {
+			enum MCE_UeErrorType ue_error_type:8;
+			u8		effective_address_provided;
+			u8		physical_address_provided;
+			u8		ignore_event;
+			u8		reserved_1[4];
+			u64		effective_address;
+			u64		physical_address;
+			u8		reserved_2[8];
+		} ue_error;
+
+		struct {
+			enum MCE_SlbErrorType slb_error_type:8;
+			u8		effective_address_provided;
+			u8		reserved_1[6];
+			u64		effective_address;
+			u8		reserved_2[16];
+		} slb_error;
+
+		struct {
+			enum MCE_EratErrorType erat_error_type:8;
+			u8		effective_address_provided;
+			u8		reserved_1[6];
+			u64		effective_address;
+			u8		reserved_2[16];
+		} erat_error;
+
+		struct {
+			enum MCE_TlbErrorType tlb_error_type:8;
+			u8		effective_address_provided;
+			u8		reserved_1[6];
+			u64		effective_address;
+			u8		reserved_2[16];
+		} tlb_error;
+
+		struct {
+			enum MCE_UserErrorType user_error_type:8;
+			u8		effective_address_provided;
+			u8		reserved_1[6];
+			u64		effective_address;
+			u8		reserved_2[16];
+		} user_error;
+
+		struct {
+			enum MCE_RaErrorType ra_error_type:8;
+			u8		effective_address_provided;
+			u8		reserved_1[6];
+			u64		effective_address;
+			u8		reserved_2[16];
+		} ra_error;
+
+		struct {
+			enum MCE_LinkErrorType link_error_type:8;
+			u8		effective_address_provided;
+			u8		reserved_1[6];
+			u64		effective_address;
+			u8		reserved_2[16];
+		} link_error;
+	} u;
+};
+
+struct mce_error_info {
+	enum MCE_ErrorType error_type:8;
+	union {
+		enum MCE_UeErrorType ue_error_type:8;
+		enum MCE_SlbErrorType slb_error_type:8;
+		enum MCE_EratErrorType erat_error_type:8;
+		enum MCE_TlbErrorType tlb_error_type:8;
+		enum MCE_UserErrorType user_error_type:8;
+		enum MCE_RaErrorType ra_error_type:8;
+		enum MCE_LinkErrorType link_error_type:8;
+	} u;
+	enum MCE_Severity	severity:8;
+	enum MCE_Initiator	initiator:8;
+	enum MCE_ErrorClass	error_class:8;
+	bool			sync_error;
+	bool			ignore_event;
+};
+
+#define MAX_MC_EVT	10
+
+struct mce_info {
+	int mce_nest_count;
+	struct machine_check_event mce_event[MAX_MC_EVT];
+	/* Queue for delayed MCE events. */
+	int mce_queue_count;
+	struct machine_check_event mce_event_queue[MAX_MC_EVT];
+	/* Queue for delayed MCE UE events. */
+	int mce_ue_count;
+	struct machine_check_event  mce_ue_event_queue[MAX_MC_EVT];
+};
+
+/* Release flags for get_mce_event() */
+#define MCE_EVENT_RELEASE	true
+#define MCE_EVENT_DONTRELEASE	false
+
+struct pt_regs;
+struct notifier_block;
+
+extern void save_mce_event(struct pt_regs *regs, long handled,
+			   struct mce_error_info *mce_err, uint64_t nip,
+			   uint64_t addr, uint64_t phys_addr);
+extern int get_mce_event(struct machine_check_event *mce, bool release);
+extern void release_mce_event(void);
+extern void machine_check_queue_event(void);
+extern void machine_check_print_event_info(struct machine_check_event *evt,
+					   bool user_mode, bool in_guest);
+unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr);
+extern void mce_common_process_ue(struct pt_regs *regs,
+				  struct mce_error_info *mce_err);
+void mce_irq_work_queue(void);
+int mce_register_notifier(struct notifier_block *nb);
+int mce_unregister_notifier(struct notifier_block *nb);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void mce_run_irq_context_handlers(void);
+#else
+static inline void mce_run_irq_context_handlers(void) { };
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void set_mce_pending_irq_work(void);
+void clear_mce_pending_irq_work(void);
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void flush_and_reload_slb(void);
+void flush_erat(void);
+long __machine_check_early_realmode_p7(struct pt_regs *regs);
+long __machine_check_early_realmode_p8(struct pt_regs *regs);
+long __machine_check_early_realmode_p9(struct pt_regs *regs);
+long __machine_check_early_realmode_p10(struct pt_regs *regs);
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void mce_init(void);
+#else
+static inline void mce_init(void) { };
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/include/asm/mediabay.h b/arch/powerpc/include/asm/mediabay.h
new file mode 100644
index 0000000000..230fda4707
--- /dev/null
+++ b/arch/powerpc/include/asm/mediabay.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * mediabay.h: definitions for using the media bay
+ * on PowerBook 3400 and similar computers.
+ *
+ * Copyright (C) 1997 Paul Mackerras.
+ */
+#ifndef _PPC_MEDIABAY_H
+#define _PPC_MEDIABAY_H
+
+#ifdef __KERNEL__
+
+#define MB_FD		0	/* media bay contains floppy drive (automatic eject ?) */
+#define MB_FD1		1	/* media bay contains floppy drive (manual eject ?) */
+#define MB_SOUND	2	/* sound device ? */
+#define MB_CD		3	/* media bay contains ATA drive such as CD or ZIP */
+#define MB_PCI		5	/* media bay contains a PCI device */
+#define MB_POWER	6	/* media bay contains a Power device (???) */
+#define MB_NO		7	/* media bay contains nothing */
+
+struct macio_dev;
+
+#ifdef CONFIG_PMAC_MEDIABAY
+
+/* Check the content type of the bay, returns MB_NO if the bay is still
+ * transitionning
+ */
+extern int check_media_bay(struct macio_dev *bay);
+
+/* The ATA driver uses the calls below to temporarily hold on the
+ * media bay callbacks while initializing the interface
+ */
+extern void lock_media_bay(struct macio_dev *bay);
+extern void unlock_media_bay(struct macio_dev *bay);
+
+#else
+
+static inline int check_media_bay(struct macio_dev *bay)
+{
+	return MB_NO;
+}
+
+static inline void lock_media_bay(struct macio_dev *bay) { }
+static inline void unlock_media_bay(struct macio_dev *bay) { }
+
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _PPC_MEDIABAY_H */
diff --git a/arch/powerpc/include/asm/mem_encrypt.h b/arch/powerpc/include/asm/mem_encrypt.h
new file mode 100644
index 0000000000..2f26b8fc8d
--- /dev/null
+++ b/arch/powerpc/include/asm/mem_encrypt.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * SVM helper functions
+ *
+ * Copyright 2018 IBM Corporation
+ */
+
+#ifndef _ASM_POWERPC_MEM_ENCRYPT_H
+#define _ASM_POWERPC_MEM_ENCRYPT_H
+
+#include <asm/svm.h>
+
+static inline bool force_dma_unencrypted(struct device *dev)
+{
+	return is_secure_guest();
+}
+
+int set_memory_encrypted(unsigned long addr, int numpages);
+int set_memory_decrypted(unsigned long addr, int numpages);
+
+#endif /* _ASM_POWERPC_MEM_ENCRYPT_H */
diff --git a/arch/powerpc/include/asm/membarrier.h b/arch/powerpc/include/asm/membarrier.h
new file mode 100644
index 0000000000..de7f791579
--- /dev/null
+++ b/arch/powerpc/include/asm/membarrier.h
@@ -0,0 +1,28 @@
+#ifndef _ASM_POWERPC_MEMBARRIER_H
+#define _ASM_POWERPC_MEMBARRIER_H
+
+static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
+					     struct mm_struct *next,
+					     struct task_struct *tsk)
+{
+	/*
+	 * Only need the full barrier when switching between processes.
+	 * Barrier when switching from kernel to userspace is not
+	 * required here, given that it is implied by mmdrop(). Barrier
+	 * when switching from userspace to kernel is not needed after
+	 * store to rq->curr.
+	 */
+	if (IS_ENABLED(CONFIG_SMP) &&
+	    likely(!(atomic_read(&next->membarrier_state) &
+		     (MEMBARRIER_STATE_PRIVATE_EXPEDITED |
+		      MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev))
+		return;
+
+	/*
+	 * The membarrier system call requires a full memory barrier
+	 * after storing to rq->curr, before going back to user-space.
+	 */
+	smp_mb();
+}
+
+#endif /* _ASM_POWERPC_MEMBARRIER_H */
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
new file mode 100644
index 0000000000..17a77d47ed
--- /dev/null
+++ b/arch/powerpc/include/asm/mman.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ */
+#ifndef _ASM_POWERPC_MMAN_H
+#define _ASM_POWERPC_MMAN_H
+
+#include <uapi/asm/mman.h>
+
+#ifdef CONFIG_PPC64
+
+#include <asm/cputable.h>
+#include <linux/mm.h>
+#include <linux/pkeys.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/firmware.h>
+
+static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
+		unsigned long pkey)
+{
+#ifdef CONFIG_PPC_MEM_KEYS
+	return (((prot & PROT_SAO) ? VM_SAO : 0) | pkey_to_vmflag_bits(pkey));
+#else
+	return ((prot & PROT_SAO) ? VM_SAO : 0);
+#endif
+}
+#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
+
+static inline bool arch_validate_prot(unsigned long prot, unsigned long addr)
+{
+	if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_SAO))
+		return false;
+	if (prot & PROT_SAO) {
+		if (!cpu_has_feature(CPU_FTR_SAO))
+			return false;
+		if (firmware_has_feature(FW_FEATURE_LPAR) &&
+		    !IS_ENABLED(CONFIG_PPC_PROT_SAO_LPAR))
+			return false;
+	}
+	return true;
+}
+#define arch_validate_prot arch_validate_prot
+
+#endif /* CONFIG_PPC64 */
+#endif	/* _ASM_POWERPC_MMAN_H */
diff --git a/arch/powerpc/include/asm/mmiowb.h b/arch/powerpc/include/asm/mmiowb.h
new file mode 100644
index 0000000000..74a00127eb
--- /dev/null
+++ b/arch/powerpc/include/asm/mmiowb.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_MMIOWB_H
+#define _ASM_POWERPC_MMIOWB_H
+
+#ifdef CONFIG_MMIOWB
+
+#include <linux/compiler.h>
+#include <asm/barrier.h>
+#include <asm/paca.h>
+
+#define arch_mmiowb_state()	(&local_paca->mmiowb_state)
+#define mmiowb()		mb()
+
+#endif /* CONFIG_MMIOWB */
+
+#include <asm-generic/mmiowb.h>
+
+#endif	/* _ASM_POWERPC_MMIOWB_H */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
new file mode 100644
index 0000000000..52cc25864a
--- /dev/null
+++ b/arch/powerpc/include/asm/mmu.h
@@ -0,0 +1,416 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_MMU_H_
+#define _ASM_POWERPC_MMU_H_
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+
+#include <asm/asm-const.h>
+
+/*
+ * MMU features bit definitions
+ */
+
+/*
+ * MMU families
+ */
+#define MMU_FTR_HPTE_TABLE		ASM_CONST(0x00000001)
+#define MMU_FTR_TYPE_8xx		ASM_CONST(0x00000002)
+#define MMU_FTR_TYPE_40x		ASM_CONST(0x00000004)
+#define MMU_FTR_TYPE_44x		ASM_CONST(0x00000008)
+#define MMU_FTR_TYPE_FSL_E		ASM_CONST(0x00000010)
+#define MMU_FTR_TYPE_47x		ASM_CONST(0x00000020)
+
+/* Radix page table supported and enabled */
+#define MMU_FTR_TYPE_RADIX		ASM_CONST(0x00000040)
+
+/*
+ * Individual features below.
+ */
+
+/*
+ * Supports KUAP feature
+ * key 0 controlling userspace addresses on radix
+ * Key 3 on hash
+ */
+#define MMU_FTR_KUAP		ASM_CONST(0x00000200)
+
+/*
+ * Supports KUEP feature
+ * key 0 controlling userspace addresses on radix
+ * Key 3 on hash
+ */
+#define MMU_FTR_BOOK3S_KUEP		ASM_CONST(0x00000400)
+
+/*
+ * Support for memory protection keys.
+ */
+#define MMU_FTR_PKEY			ASM_CONST(0x00000800)
+
+/* Guest Translation Shootdown Enable */
+#define MMU_FTR_GTSE			ASM_CONST(0x00001000)
+
+/*
+ * Support for 68 bit VA space. We added that from ISA 2.05
+ */
+#define MMU_FTR_68_BIT_VA		ASM_CONST(0x00002000)
+/*
+ * Kernel read only support.
+ * We added the ppp value 0b110 in ISA 2.04.
+ */
+#define MMU_FTR_KERNEL_RO		ASM_CONST(0x00004000)
+
+/*
+ * We need to clear top 16bits of va (from the remaining 64 bits )in
+ * tlbie* instructions
+ */
+#define MMU_FTR_TLBIE_CROP_VA		ASM_CONST(0x00008000)
+
+/* Enable use of high BAT registers */
+#define MMU_FTR_USE_HIGH_BATS		ASM_CONST(0x00010000)
+
+/* Enable >32-bit physical addresses on 32-bit processor, only used
+ * by CONFIG_PPC_BOOK3S_32 currently as BookE supports that from day 1
+ */
+#define MMU_FTR_BIG_PHYS		ASM_CONST(0x00020000)
+
+/* Enable use of broadcast TLB invalidations. We don't always set it
+ * on processors that support it due to other constraints with the
+ * use of such invalidations
+ */
+#define MMU_FTR_USE_TLBIVAX_BCAST	ASM_CONST(0x00040000)
+
+/* Enable use of tlbilx invalidate instructions.
+ */
+#define MMU_FTR_USE_TLBILX		ASM_CONST(0x00080000)
+
+/* This indicates that the processor cannot handle multiple outstanding
+ * broadcast tlbivax or tlbsync. This makes the code use a spinlock
+ * around such invalidate forms.
+ */
+#define MMU_FTR_LOCK_BCAST_INVAL	ASM_CONST(0x00100000)
+
+/* This indicates that the processor doesn't handle way selection
+ * properly and needs SW to track and update the LRU state.  This
+ * is specific to an errata on e300c2/c3/c4 class parts
+ */
+#define MMU_FTR_NEED_DTLB_SW_LRU	ASM_CONST(0x00200000)
+
+/* Doesn't support the B bit (1T segment) in SLBIE
+ */
+#define MMU_FTR_NO_SLBIE_B		ASM_CONST(0x02000000)
+
+/* Support 16M large pages
+ */
+#define MMU_FTR_16M_PAGE		ASM_CONST(0x04000000)
+
+/* Supports TLBIEL variant
+ */
+#define MMU_FTR_TLBIEL			ASM_CONST(0x08000000)
+
+/* Supports tlbies w/o locking
+ */
+#define MMU_FTR_LOCKLESS_TLBIE		ASM_CONST(0x10000000)
+
+/* Large pages can be marked CI
+ */
+#define MMU_FTR_CI_LARGE_PAGE		ASM_CONST(0x20000000)
+
+/* 1T segments available
+ */
+#define MMU_FTR_1T_SEGMENT		ASM_CONST(0x40000000)
+
+// NX paste RMA reject in DSI
+#define MMU_FTR_NX_DSI			ASM_CONST(0x80000000)
+
+/* MMU feature bit sets for various CPUs */
+#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2	(MMU_FTR_HPTE_TABLE | MMU_FTR_TLBIEL | MMU_FTR_16M_PAGE)
+#define MMU_FTRS_POWER		MMU_FTRS_DEFAULT_HPTE_ARCH_V2
+#define MMU_FTRS_PPC970		MMU_FTRS_POWER | MMU_FTR_TLBIE_CROP_VA
+#define MMU_FTRS_POWER5		MMU_FTRS_POWER | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER6		MMU_FTRS_POWER5 | MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA
+#define MMU_FTRS_POWER7		MMU_FTRS_POWER6
+#define MMU_FTRS_POWER8		MMU_FTRS_POWER6
+#define MMU_FTRS_POWER9		MMU_FTRS_POWER6
+#define MMU_FTRS_POWER10	MMU_FTRS_POWER6
+#define MMU_FTRS_CELL		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
+				MMU_FTR_CI_LARGE_PAGE
+#define MMU_FTRS_PA6T		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
+				MMU_FTR_CI_LARGE_PAGE | MMU_FTR_NO_SLBIE_B
+#ifndef __ASSEMBLY__
+#include <linux/bug.h>
+#include <asm/cputable.h>
+#include <asm/page.h>
+
+typedef pte_t *pgtable_t;
+
+enum {
+	MMU_FTRS_POSSIBLE =
+#if defined(CONFIG_PPC_BOOK3S_604)
+		MMU_FTR_HPTE_TABLE |
+#endif
+#ifdef CONFIG_PPC_8xx
+		MMU_FTR_TYPE_8xx |
+#endif
+#ifdef CONFIG_40x
+		MMU_FTR_TYPE_40x |
+#endif
+#ifdef CONFIG_PPC_47x
+		MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL |
+#elif defined(CONFIG_44x)
+		MMU_FTR_TYPE_44x |
+#endif
+#ifdef CONFIG_PPC_E500
+		MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX |
+#endif
+#ifdef CONFIG_PPC_BOOK3S_32
+		MMU_FTR_USE_HIGH_BATS |
+#endif
+#ifdef CONFIG_PPC_83xx
+		MMU_FTR_NEED_DTLB_SW_LRU |
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+		MMU_FTR_KERNEL_RO |
+#ifdef CONFIG_PPC_64S_HASH_MMU
+		MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL |
+		MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE |
+		MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA |
+		MMU_FTR_68_BIT_VA | MMU_FTR_HPTE_TABLE |
+#endif
+#ifdef CONFIG_PPC_RADIX_MMU
+		MMU_FTR_TYPE_RADIX |
+		MMU_FTR_GTSE | MMU_FTR_NX_DSI |
+#endif /* CONFIG_PPC_RADIX_MMU */
+#endif
+#ifdef CONFIG_PPC_KUAP
+	MMU_FTR_KUAP |
+#endif /* CONFIG_PPC_KUAP */
+#ifdef CONFIG_PPC_MEM_KEYS
+	MMU_FTR_PKEY |
+#endif
+#ifdef CONFIG_PPC_KUEP
+	MMU_FTR_BOOK3S_KUEP |
+#endif /* CONFIG_PPC_KUAP */
+
+		0,
+};
+
+#if defined(CONFIG_PPC_BOOK3S_604) && !defined(CONFIG_PPC_BOOK3S_603)
+#define MMU_FTRS_ALWAYS		MMU_FTR_HPTE_TABLE
+#endif
+#ifdef CONFIG_PPC_8xx
+#define MMU_FTRS_ALWAYS		MMU_FTR_TYPE_8xx
+#endif
+#ifdef CONFIG_40x
+#define MMU_FTRS_ALWAYS		MMU_FTR_TYPE_40x
+#endif
+#ifdef CONFIG_PPC_47x
+#define MMU_FTRS_ALWAYS		MMU_FTR_TYPE_47x
+#elif defined(CONFIG_44x)
+#define MMU_FTRS_ALWAYS		MMU_FTR_TYPE_44x
+#endif
+#ifdef CONFIG_PPC_E500
+#define MMU_FTRS_ALWAYS		MMU_FTR_TYPE_FSL_E
+#endif
+
+/* BOOK3S_64 options */
+#if defined(CONFIG_PPC_RADIX_MMU) && !defined(CONFIG_PPC_64S_HASH_MMU)
+#define MMU_FTRS_ALWAYS		MMU_FTR_TYPE_RADIX
+#elif !defined(CONFIG_PPC_RADIX_MMU) && defined(CONFIG_PPC_64S_HASH_MMU)
+#define MMU_FTRS_ALWAYS		MMU_FTR_HPTE_TABLE
+#endif
+
+#ifndef MMU_FTRS_ALWAYS
+#define MMU_FTRS_ALWAYS		0
+#endif
+
+static __always_inline bool early_mmu_has_feature(unsigned long feature)
+{
+	if (MMU_FTRS_ALWAYS & feature)
+		return true;
+
+	return !!(MMU_FTRS_POSSIBLE & cur_cpu_spec->mmu_features & feature);
+}
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
+#include <linux/jump_label.h>
+
+#define NUM_MMU_FTR_KEYS	32
+
+extern struct static_key_true mmu_feature_keys[NUM_MMU_FTR_KEYS];
+
+extern void mmu_feature_keys_init(void);
+
+static __always_inline bool mmu_has_feature(unsigned long feature)
+{
+	int i;
+
+#ifndef __clang__ /* clang can't cope with this */
+	BUILD_BUG_ON(!__builtin_constant_p(feature));
+#endif
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG
+	if (!static_key_initialized) {
+		printk("Warning! mmu_has_feature() used prior to jump label init!\n");
+		dump_stack();
+		return early_mmu_has_feature(feature);
+	}
+#endif
+
+	if (MMU_FTRS_ALWAYS & feature)
+		return true;
+
+	if (!(MMU_FTRS_POSSIBLE & feature))
+		return false;
+
+	i = __builtin_ctzl(feature);
+	return static_branch_likely(&mmu_feature_keys[i]);
+}
+
+static inline void mmu_clear_feature(unsigned long feature)
+{
+	int i;
+
+	i = __builtin_ctzl(feature);
+	cur_cpu_spec->mmu_features &= ~feature;
+	static_branch_disable(&mmu_feature_keys[i]);
+}
+#else
+
+static inline void mmu_feature_keys_init(void)
+{
+
+}
+
+static __always_inline bool mmu_has_feature(unsigned long feature)
+{
+	return early_mmu_has_feature(feature);
+}
+
+static inline void mmu_clear_feature(unsigned long feature)
+{
+	cur_cpu_spec->mmu_features &= ~feature;
+}
+#endif /* CONFIG_JUMP_LABEL */
+
+extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup;
+
+#ifdef CONFIG_PPC64
+/* This is our real memory area size on ppc64 server, on embedded, we
+ * make it match the size our of bolted TLB area
+ */
+extern u64 ppc64_rma_size;
+
+/* Cleanup function used by kexec */
+extern void mmu_cleanup_all(void);
+extern void radix__mmu_cleanup_all(void);
+
+/* Functions for creating and updating partition table on POWER9 */
+extern void mmu_partition_table_init(void);
+extern void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
+					  unsigned long dw1, bool flush);
+#endif /* CONFIG_PPC64 */
+
+struct mm_struct;
+#ifdef CONFIG_DEBUG_VM
+extern void assert_pte_locked(struct mm_struct *mm, unsigned long addr);
+#else /* CONFIG_DEBUG_VM */
+static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
+{
+}
+#endif /* !CONFIG_DEBUG_VM */
+
+static __always_inline bool radix_enabled(void)
+{
+	return mmu_has_feature(MMU_FTR_TYPE_RADIX);
+}
+
+static __always_inline bool early_radix_enabled(void)
+{
+	return early_mmu_has_feature(MMU_FTR_TYPE_RADIX);
+}
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+static inline bool strict_kernel_rwx_enabled(void)
+{
+	return rodata_enabled;
+}
+#else
+static inline bool strict_kernel_rwx_enabled(void)
+{
+	return false;
+}
+#endif
+
+static inline bool strict_module_rwx_enabled(void)
+{
+	return IS_ENABLED(CONFIG_STRICT_MODULE_RWX) && strict_kernel_rwx_enabled();
+}
+#endif /* !__ASSEMBLY__ */
+
+/* The kernel use the constants below to index in the page sizes array.
+ * The use of fixed constants for this purpose is better for performances
+ * of the low level hash refill handlers.
+ *
+ * A non supported page size has a "shift" field set to 0
+ *
+ * Any new page size being implemented can get a new entry in here. Whether
+ * the kernel will use it or not is a different matter though. The actual page
+ * size used by hugetlbfs is not defined here and may be made variable
+ *
+ * Note: This array ended up being a false good idea as it's growing to the
+ * point where I wonder if we should replace it with something different,
+ * to think about, feedback welcome. --BenH.
+ */
+
+/* These are #defines as they have to be used in assembly */
+#define MMU_PAGE_4K	0
+#define MMU_PAGE_16K	1
+#define MMU_PAGE_64K	2
+#define MMU_PAGE_64K_AP	3	/* "Admixed pages" (hash64 only) */
+#define MMU_PAGE_256K	4
+#define MMU_PAGE_512K	5
+#define MMU_PAGE_1M	6
+#define MMU_PAGE_2M	7
+#define MMU_PAGE_4M	8
+#define MMU_PAGE_8M	9
+#define MMU_PAGE_16M	10
+#define MMU_PAGE_64M	11
+#define MMU_PAGE_256M	12
+#define MMU_PAGE_1G	13
+#define MMU_PAGE_16G	14
+#define MMU_PAGE_64G	15
+
+/*
+ * N.B. we need to change the type of hpte_page_sizes if this gets to be > 16
+ * Also we need to change he type of mm_context.low/high_slices_psize.
+ */
+#define MMU_PAGE_COUNT	16
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/book3s/64/mmu.h>
+#else /* CONFIG_PPC_BOOK3S_64 */
+
+#ifndef __ASSEMBLY__
+/* MMU initialization */
+extern void early_init_mmu(void);
+extern void early_init_mmu_secondary(void);
+extern void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				       phys_addr_t first_memblock_size);
+static inline void mmu_early_init_devtree(void) { }
+
+static inline void pkey_early_init_devtree(void) {}
+
+extern void *abatron_pteptrs[2];
+#endif /* __ASSEMBLY__ */
+#endif
+
+#if defined(CONFIG_PPC_BOOK3S_32)
+/* 32-bit classic hash table MMU */
+#include <asm/book3s/32/mmu-hash.h>
+#elif defined(CONFIG_PPC_MMU_NOHASH)
+#include <asm/nohash/mmu.h>
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_MMU_H_ */
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
new file mode 100644
index 0000000000..37bffa0f79
--- /dev/null
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -0,0 +1,304 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_POWERPC_MMU_CONTEXT_H
+#define __ASM_POWERPC_MMU_CONTEXT_H
+#ifdef __KERNEL__
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <asm/mmu.h>	
+#include <asm/cputable.h>
+#include <asm/cputhreads.h>
+
+/*
+ * Most if the context management is out of line
+ */
+#define init_new_context init_new_context
+extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
+#define destroy_context destroy_context
+extern void destroy_context(struct mm_struct *mm);
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+struct mm_iommu_table_group_mem_t;
+
+extern bool mm_iommu_preregistered(struct mm_struct *mm);
+extern long mm_iommu_new(struct mm_struct *mm,
+		unsigned long ua, unsigned long entries,
+		struct mm_iommu_table_group_mem_t **pmem);
+extern long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
+		unsigned long entries, unsigned long dev_hpa,
+		struct mm_iommu_table_group_mem_t **pmem);
+extern long mm_iommu_put(struct mm_struct *mm,
+		struct mm_iommu_table_group_mem_t *mem);
+extern void mm_iommu_init(struct mm_struct *mm);
+extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
+		unsigned long ua, unsigned long size);
+extern struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
+		unsigned long ua, unsigned long entries);
+extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+		unsigned long ua, unsigned int pageshift, unsigned long *hpa);
+extern bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
+		unsigned int pageshift, unsigned long *size);
+extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
+extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
+#else
+static inline bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
+		unsigned int pageshift, unsigned long *size)
+{
+	return false;
+}
+static inline void mm_iommu_init(struct mm_struct *mm) { }
+#endif
+extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+extern void radix__switch_mmu_context(struct mm_struct *prev,
+				      struct mm_struct *next);
+static inline void switch_mmu_context(struct mm_struct *prev,
+				      struct mm_struct *next,
+				      struct task_struct *tsk)
+{
+	if (radix_enabled())
+		return radix__switch_mmu_context(prev, next);
+	return switch_slb(tsk, next);
+}
+
+extern int hash__alloc_context_id(void);
+void __init hash__reserve_context_id(int id);
+extern void __destroy_context(int context_id);
+static inline void mmu_context_init(void) { }
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+static inline int alloc_extended_context(struct mm_struct *mm,
+					 unsigned long ea)
+{
+	int context_id;
+
+	int index = ea >> MAX_EA_BITS_PER_CONTEXT;
+
+	context_id = hash__alloc_context_id();
+	if (context_id < 0)
+		return context_id;
+
+	VM_WARN_ON(mm->context.extended_id[index]);
+	mm->context.extended_id[index] = context_id;
+	return context_id;
+}
+
+static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
+{
+	int context_id;
+
+	context_id = get_user_context(&mm->context, ea);
+	if (!context_id)
+		return true;
+	return false;
+}
+#endif
+
+#else
+extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
+			       struct task_struct *tsk);
+extern unsigned long __init_new_context(void);
+extern void __destroy_context(unsigned long context_id);
+extern void mmu_context_init(void);
+static inline int alloc_extended_context(struct mm_struct *mm,
+					 unsigned long ea)
+{
+	/* non book3s_64 should never find this called */
+	WARN_ON(1);
+	return -ENOMEM;
+}
+
+static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
+{
+	return false;
+}
+#endif
+
+extern int use_cop(unsigned long acop, struct mm_struct *mm);
+extern void drop_cop(unsigned long acop, struct mm_struct *mm);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline void inc_mm_active_cpus(struct mm_struct *mm)
+{
+	atomic_inc(&mm->context.active_cpus);
+}
+
+static inline void dec_mm_active_cpus(struct mm_struct *mm)
+{
+	VM_WARN_ON_ONCE(atomic_read(&mm->context.active_cpus) <= 0);
+	atomic_dec(&mm->context.active_cpus);
+}
+
+static inline void mm_context_add_copro(struct mm_struct *mm)
+{
+	/*
+	 * If any copro is in use, increment the active CPU count
+	 * in order to force TLB invalidations to be global as to
+	 * propagate to the Nest MMU.
+	 */
+	if (atomic_inc_return(&mm->context.copros) == 1)
+		inc_mm_active_cpus(mm);
+}
+
+static inline void mm_context_remove_copro(struct mm_struct *mm)
+{
+	int c;
+
+	/*
+	 * When removing the last copro, we need to broadcast a global
+	 * flush of the full mm, as the next TLBI may be local and the
+	 * nMMU and/or PSL need to be cleaned up.
+	 *
+	 * Both the 'copros' and 'active_cpus' counts are looked at in
+	 * radix__flush_all_mm() to determine the scope (local/global)
+	 * of the TLBIs, so we need to flush first before decrementing
+	 * 'copros'. If this API is used by several callers for the
+	 * same context, it can lead to over-flushing. It's hopefully
+	 * not common enough to be a problem.
+	 *
+	 * Skip on hash, as we don't know how to do the proper flush
+	 * for the time being. Invalidations will remain global if
+	 * used on hash. Note that we can't drop 'copros' either, as
+	 * it could make some invalidations local with no flush
+	 * in-between.
+	 */
+	if (radix_enabled()) {
+		radix__flush_all_mm(mm);
+
+		c = atomic_dec_if_positive(&mm->context.copros);
+		/* Detect imbalance between add and remove */
+		WARN_ON(c < 0);
+
+		if (c == 0)
+			dec_mm_active_cpus(mm);
+	}
+}
+
+/*
+ * vas_windows counter shows number of open windows in the mm
+ * context. During context switch, use this counter to clear the
+ * foreign real address mapping (CP_ABORT) for the thread / process
+ * that intend to use COPY/PASTE. When a process closes all windows,
+ * disable CP_ABORT which is expensive to run.
+ *
+ * For user context, register a copro so that TLBIs are seen by the
+ * nest MMU. mm_context_add/remove_vas_window() are used only for user
+ * space windows.
+ */
+static inline void mm_context_add_vas_window(struct mm_struct *mm)
+{
+	atomic_inc(&mm->context.vas_windows);
+	mm_context_add_copro(mm);
+}
+
+static inline void mm_context_remove_vas_window(struct mm_struct *mm)
+{
+	int v;
+
+	mm_context_remove_copro(mm);
+	v = atomic_dec_if_positive(&mm->context.vas_windows);
+
+	/* Detect imbalance between add and remove */
+	WARN_ON(v < 0);
+}
+#else
+static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
+static inline void dec_mm_active_cpus(struct mm_struct *mm) { }
+static inline void mm_context_add_copro(struct mm_struct *mm) { }
+static inline void mm_context_remove_copro(struct mm_struct *mm) { }
+#endif
+
+#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
+void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
+			     unsigned long type, unsigned long pg_sizes,
+			     unsigned long start, unsigned long end);
+#else
+static inline void do_h_rpt_invalidate_prt(unsigned long pid,
+					   unsigned long lpid,
+					   unsigned long type,
+					   unsigned long pg_sizes,
+					   unsigned long start,
+					   unsigned long end) { }
+#endif
+
+extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+			       struct task_struct *tsk);
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+			     struct task_struct *tsk)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	switch_mm_irqs_off(prev, next, tsk);
+	local_irq_restore(flags);
+}
+#define switch_mm_irqs_off switch_mm_irqs_off
+
+/*
+ * After we have set current->mm to a new value, this activates
+ * the context for the new mm so we see the new mappings.
+ */
+#define activate_mm activate_mm
+static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
+{
+	switch_mm_irqs_off(prev, next, current);
+}
+
+/* We don't currently use enter_lazy_tlb() for anything */
+#ifdef CONFIG_PPC_BOOK3E_64
+#define enter_lazy_tlb enter_lazy_tlb
+static inline void enter_lazy_tlb(struct mm_struct *mm,
+				  struct task_struct *tsk)
+{
+	/* 64-bit Book3E keeps track of current PGD in the PACA */
+	get_paca()->pgd = NULL;
+}
+#endif
+
+extern void arch_exit_mmap(struct mm_struct *mm);
+
+static inline void arch_unmap(struct mm_struct *mm,
+			      unsigned long start, unsigned long end)
+{
+	unsigned long vdso_base = (unsigned long)mm->context.vdso;
+
+	if (start <= vdso_base && vdso_base < end)
+		mm->context.vdso = NULL;
+}
+
+#ifdef CONFIG_PPC_MEM_KEYS
+bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
+			       bool execute, bool foreign);
+void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm);
+#else /* CONFIG_PPC_MEM_KEYS */
+static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+		bool write, bool execute, bool foreign)
+{
+	/* by default, allow everything */
+	return true;
+}
+
+#define pkey_mm_init(mm)
+#define arch_dup_pkeys(oldmm, mm)
+
+static inline u64 pte_to_hpte_pkey_bits(u64 pteflags, unsigned long flags)
+{
+	return 0x0UL;
+}
+
+#endif /* CONFIG_PPC_MEM_KEYS */
+
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+				struct mm_struct *mm)
+{
+	arch_dup_pkeys(oldmm, mm);
+	return 0;
+}
+
+#include <asm-generic/mmu_context.h>
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_POWERPC_MMU_CONTEXT_H */
diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h
new file mode 100644
index 0000000000..4c6c6dbd18
--- /dev/null
+++ b/arch/powerpc/include/asm/mmzone.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Written by Kanoj Sarcar (kanoj@sgi.com) Aug 99
+ *
+ * PowerPC64 port:
+ * Copyright (C) 2002 Anton Blanchard, IBM Corp.
+ */
+#ifndef _ASM_MMZONE_H_
+#define _ASM_MMZONE_H_
+#ifdef __KERNEL__
+
+#include <linux/cpumask.h>
+
+/*
+ * generic non-linear memory support:
+ *
+ * 1) we will not split memory into more chunks than will fit into the
+ *    flags field of the struct page
+ */
+
+#ifdef CONFIG_NUMA
+
+extern struct pglist_data *node_data[];
+/*
+ * Return a pointer to the node data for node n.
+ */
+#define NODE_DATA(nid)		(node_data[nid])
+
+/*
+ * Following are specific to this numa platform.
+ */
+
+extern int numa_cpu_lookup_table[];
+extern cpumask_var_t node_to_cpumask_map[];
+#ifdef CONFIG_MEMORY_HOTPLUG
+extern unsigned long max_pfn;
+u64 memory_hotplug_max(void);
+#else
+#define memory_hotplug_max() memblock_end_of_DRAM()
+#endif
+
+#else
+#define memory_hotplug_max() memblock_end_of_DRAM()
+#endif /* CONFIG_NUMA */
+#ifdef CONFIG_FA_DUMP
+#define __HAVE_ARCH_RESERVED_KERNEL_PAGES
+#endif
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+extern int create_section_mapping(unsigned long start, unsigned long end,
+				  int nid, pgprot_t prot);
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_MMZONE_H_ */
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
new file mode 100644
index 0000000000..a8e2e8339f
--- /dev/null
+++ b/arch/powerpc/include/asm/module.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_MODULE_H
+#define _ASM_POWERPC_MODULE_H
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+#include <asm/bug.h>
+#include <asm-generic/module.h>
+
+#ifndef __powerpc64__
+/*
+ * Thanks to Paul M for explaining this.
+ *
+ * PPC can only do rel jumps += 32MB, and often the kernel and other
+ * modules are further away than this.  So, we jump to a table of
+ * trampolines attached to the module (the Procedure Linkage Table)
+ * whenever that happens.
+ */
+
+struct ppc_plt_entry {
+	/* 16 byte jump instruction sequence (4 instructions) */
+	unsigned int jump[4];
+};
+#endif	/* __powerpc64__ */
+
+
+struct mod_arch_specific {
+#ifdef __powerpc64__
+	unsigned int stubs_section;	/* Index of stubs section in module */
+#ifdef CONFIG_PPC_KERNEL_PCREL
+	unsigned int got_section;	/* What section is the GOT? */
+	unsigned int pcpu_section;	/* .data..percpu section */
+#else
+	unsigned int toc_section;	/* What section is the TOC? */
+	bool toc_fixed;			/* Have we fixed up .TOC.? */
+#endif
+
+	/* For module function descriptor dereference */
+	unsigned long start_opd;
+	unsigned long end_opd;
+#else /* powerpc64 */
+	/* Indices of PLT sections within module. */
+	unsigned int core_plt_section;
+	unsigned int init_plt_section;
+#endif /* powerpc64 */
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+	unsigned long tramp;
+	unsigned long tramp_regs;
+#endif
+
+	/* List of BUG addresses, source line numbers and filenames */
+	struct list_head bug_list;
+	struct bug_entry *bug_table;
+	unsigned int num_bugs;
+};
+
+/*
+ * Select ELF headers.
+ * Make empty sections for module_frob_arch_sections to expand.
+ */
+
+#ifdef __powerpc64__
+#    ifdef MODULE
+	asm(".section .stubs,\"ax\",@nobits; .align 3; .previous");
+#        ifdef CONFIG_PPC_KERNEL_PCREL
+	    asm(".section .mygot,\"a\",@nobits; .align 3; .previous");
+#        endif
+#    endif
+#else
+#    ifdef MODULE
+	asm(".section .plt,\"ax\",@nobits; .align 3; .previous");
+	asm(".section .init.plt,\"ax\",@nobits; .align 3; .previous");
+#    endif	/* MODULE */
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+int module_trampoline_target(struct module *mod, unsigned long trampoline,
+			     unsigned long *target);
+int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs);
+#else
+static inline int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs)
+{
+	return 0;
+}
+#endif
+
+#endif /* __KERNEL__ */
+#endif	/* _ASM_POWERPC_MODULE_H */
diff --git a/arch/powerpc/include/asm/module.lds.h b/arch/powerpc/include/asm/module.lds.h
new file mode 100644
index 0000000000..cea5dc124b
--- /dev/null
+++ b/arch/powerpc/include/asm/module.lds.h
@@ -0,0 +1,8 @@
+/* Force alignment of .toc section.  */
+SECTIONS
+{
+	.toc 0 : ALIGN(256)
+	{
+		*(.got .toc)
+	}
+}
diff --git a/arch/powerpc/include/asm/mpc5121.h b/arch/powerpc/include/asm/mpc5121.h
new file mode 100644
index 0000000000..9ae49e743b
--- /dev/null
+++ b/arch/powerpc/include/asm/mpc5121.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * MPC5121 Prototypes and definitions
+ */
+
+#ifndef __ASM_POWERPC_MPC5121_H__
+#define __ASM_POWERPC_MPC5121_H__
+
+/* MPC512x Reset module registers */
+struct mpc512x_reset_module {
+	u32	rcwlr;	/* Reset Configuration Word Low Register */
+	u32	rcwhr;	/* Reset Configuration Word High Register */
+	u32	reserved1;
+	u32	reserved2;
+	u32	rsr;	/* Reset Status Register */
+	u32	rmr;	/* Reset Mode Register */
+	u32	rpr;	/* Reset Protection Register */
+	u32	rcr;	/* Reset Control Register */
+	u32	rcer;	/* Reset Control Enable Register */
+};
+
+/*
+ * Clock Control Module
+ */
+struct mpc512x_ccm {
+	u32	spmr;	/* System PLL Mode Register */
+	u32	sccr1;	/* System Clock Control Register 1 */
+	u32	sccr2;	/* System Clock Control Register 2 */
+	u32	scfr1;	/* System Clock Frequency Register 1 */
+	u32	scfr2;	/* System Clock Frequency Register 2 */
+	u32	scfr2s;	/* System Clock Frequency Shadow Register 2 */
+	u32	bcr;	/* Bread Crumb Register */
+	u32	psc_ccr[12];	/* PSC Clock Control Registers */
+	u32	spccr;	/* SPDIF Clock Control Register */
+	u32	cccr;	/* CFM Clock Control Register */
+	u32	dccr;	/* DIU Clock Control Register */
+	u32	mscan_ccr[4];	/* MSCAN Clock Control Registers */
+	u32	out_ccr[4];	/* OUT CLK Configure Registers */
+	u32	rsv0[2];	/* Reserved */
+	u32	scfr3;		/* System Clock Frequency Register 3 */
+	u32	rsv1[3];	/* Reserved */
+	u32	spll_lock_cnt;	/* System PLL Lock Counter */
+	u8	res[0x6c];	/* Reserved */
+};
+
+/*
+ * LPC Module
+ */
+struct mpc512x_lpc {
+	u32	cs_cfg[8];	/* CS config */
+	u32	cs_ctrl;	/* CS Control Register */
+	u32	cs_status;	/* CS Status Register */
+	u32	burst_ctrl;	/* CS Burst Control Register */
+	u32	deadcycle_ctrl;	/* CS Deadcycle Control Register */
+	u32	holdcycle_ctrl;	/* CS Holdcycle Control Register */
+	u32	alt;		/* Address Latch Timing Register */
+};
+
+int mpc512x_cs_config(unsigned int cs, u32 val);
+
+/*
+ * SCLPC Module (LPB FIFO)
+ */
+struct mpc512x_lpbfifo {
+	u32	pkt_size;	/* SCLPC Packet Size Register */
+	u32	start_addr;	/* SCLPC Start Address Register */
+	u32	ctrl;		/* SCLPC Control Register */
+	u32	enable;		/* SCLPC Enable Register */
+	u32	reserved1;
+	u32	status;		/* SCLPC Status Register */
+	u32	bytes_done;	/* SCLPC Bytes Done Register */
+	u32	emb_sc;		/* EMB Share Counter Register */
+	u32	emb_pc;		/* EMB Pause Control Register */
+	u32	reserved2[7];
+	u32	data_word;	/* LPC RX/TX FIFO Data Word Register */
+	u32	fifo_status;	/* LPC RX/TX FIFO Status Register */
+	u32	fifo_ctrl;	/* LPC RX/TX FIFO Control Register */
+	u32	fifo_alarm;	/* LPC RX/TX FIFO Alarm Register */
+};
+
+#define MPC512X_SCLPC_START		(1 << 31)
+#define MPC512X_SCLPC_CS(x)		(((x) & 0x7) << 24)
+#define MPC512X_SCLPC_FLUSH		(1 << 17)
+#define MPC512X_SCLPC_READ		(1 << 16)
+#define MPC512X_SCLPC_DAI		(1 << 8)
+#define MPC512X_SCLPC_BPT(x)		((x) & 0x3f)
+#define MPC512X_SCLPC_RESET		(1 << 24)
+#define MPC512X_SCLPC_FIFO_RESET	(1 << 16)
+#define MPC512X_SCLPC_ABORT_INT_ENABLE	(1 << 9)
+#define MPC512X_SCLPC_NORM_INT_ENABLE	(1 << 8)
+#define MPC512X_SCLPC_ENABLE		(1 << 0)
+#define MPC512X_SCLPC_SUCCESS		(1 << 24)
+#define MPC512X_SCLPC_FIFO_CTRL(x)	(((x) & 0x7) << 24)
+#define MPC512X_SCLPC_FIFO_ALARM(x)	((x) & 0x3ff)
+
+enum lpb_dev_portsize {
+	LPB_DEV_PORTSIZE_UNDEFINED = 0,
+	LPB_DEV_PORTSIZE_1_BYTE = 1,
+	LPB_DEV_PORTSIZE_2_BYTES = 2,
+	LPB_DEV_PORTSIZE_4_BYTES = 4,
+	LPB_DEV_PORTSIZE_8_BYTES = 8
+};
+
+enum mpc512x_lpbfifo_req_dir {
+	MPC512X_LPBFIFO_REQ_DIR_READ,
+	MPC512X_LPBFIFO_REQ_DIR_WRITE
+};
+
+struct mpc512x_lpbfifo_request {
+	phys_addr_t dev_phys_addr; /* physical address of some device on LPB */
+	void *ram_virt_addr; /* virtual address of some region in RAM */
+	u32 size;
+	enum lpb_dev_portsize portsize;
+	enum mpc512x_lpbfifo_req_dir dir;
+	void (*callback)(struct mpc512x_lpbfifo_request *);
+};
+
+int mpc512x_lpbfifo_submit(struct mpc512x_lpbfifo_request *req);
+
+#endif /* __ASM_POWERPC_MPC5121_H__ */
diff --git a/arch/powerpc/include/asm/mpc52xx.h b/arch/powerpc/include/asm/mpc52xx.h
new file mode 100644
index 0000000000..01ae6c351e
--- /dev/null
+++ b/arch/powerpc/include/asm/mpc52xx.h
@@ -0,0 +1,324 @@
+/*
+ * Prototypes, etc. for the Freescale MPC52xx embedded cpu chips
+ * May need to be cleaned as the port goes on ...
+ *
+ * Copyright (C) 2004-2005 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2003 MontaVista, Software, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __ASM_POWERPC_MPC52xx_H__
+#define __ASM_POWERPC_MPC52xx_H__
+
+#ifndef __ASSEMBLY__
+#include <asm/types.h>
+#include <asm/mpc5xxx.h>
+#endif /* __ASSEMBLY__ */
+
+#include <linux/suspend.h>
+
+/* Variants of the 5200(B) */
+#define MPC5200_SVR		0x80110010
+#define MPC5200_SVR_MASK	0xfffffff0
+#define MPC5200B_SVR		0x80110020
+#define MPC5200B_SVR_MASK	0xfffffff0
+
+/* ======================================================================== */
+/* Structures mapping of some unit register set                             */
+/* ======================================================================== */
+
+#ifndef __ASSEMBLY__
+
+/* Memory Mapping Control */
+struct mpc52xx_mmap_ctl {
+	u32 mbar;		/* MMAP_CTRL + 0x00 */
+
+	u32 cs0_start;		/* MMAP_CTRL + 0x04 */
+	u32 cs0_stop;		/* MMAP_CTRL + 0x08 */
+	u32 cs1_start;		/* MMAP_CTRL + 0x0c */
+	u32 cs1_stop;		/* MMAP_CTRL + 0x10 */
+	u32 cs2_start;		/* MMAP_CTRL + 0x14 */
+	u32 cs2_stop;		/* MMAP_CTRL + 0x18 */
+	u32 cs3_start;		/* MMAP_CTRL + 0x1c */
+	u32 cs3_stop;		/* MMAP_CTRL + 0x20 */
+	u32 cs4_start;		/* MMAP_CTRL + 0x24 */
+	u32 cs4_stop;		/* MMAP_CTRL + 0x28 */
+	u32 cs5_start;		/* MMAP_CTRL + 0x2c */
+	u32 cs5_stop;		/* MMAP_CTRL + 0x30 */
+
+	u32 sdram0;		/* MMAP_CTRL + 0x34 */
+	u32 sdram1;		/* MMAP_CTRL + 0X38 */
+
+	u32 reserved[4];	/* MMAP_CTRL + 0x3c .. 0x48 */
+
+	u32 boot_start;		/* MMAP_CTRL + 0x4c */
+	u32 boot_stop;		/* MMAP_CTRL + 0x50 */
+
+	u32 ipbi_ws_ctrl;	/* MMAP_CTRL + 0x54 */
+
+	u32 cs6_start;		/* MMAP_CTRL + 0x58 */
+	u32 cs6_stop;		/* MMAP_CTRL + 0x5c */
+	u32 cs7_start;		/* MMAP_CTRL + 0x60 */
+	u32 cs7_stop;		/* MMAP_CTRL + 0x64 */
+};
+
+/* SDRAM control */
+struct mpc52xx_sdram {
+	u32 mode;		/* SDRAM + 0x00 */
+	u32 ctrl;		/* SDRAM + 0x04 */
+	u32 config1;		/* SDRAM + 0x08 */
+	u32 config2;		/* SDRAM + 0x0c */
+};
+
+/* SDMA */
+struct mpc52xx_sdma {
+	u32 taskBar;		/* SDMA + 0x00 */
+	u32 currentPointer;	/* SDMA + 0x04 */
+	u32 endPointer;		/* SDMA + 0x08 */
+	u32 variablePointer;	/* SDMA + 0x0c */
+
+	u8 IntVect1;		/* SDMA + 0x10 */
+	u8 IntVect2;		/* SDMA + 0x11 */
+	u16 PtdCntrl;		/* SDMA + 0x12 */
+
+	u32 IntPend;		/* SDMA + 0x14 */
+	u32 IntMask;		/* SDMA + 0x18 */
+
+	u16 tcr[16];		/* SDMA + 0x1c .. 0x3a */
+
+	u8 ipr[32];		/* SDMA + 0x3c .. 0x5b */
+
+	u32 cReqSelect;		/* SDMA + 0x5c */
+	u32 task_size0;		/* SDMA + 0x60 */
+	u32 task_size1;		/* SDMA + 0x64 */
+	u32 MDEDebug;		/* SDMA + 0x68 */
+	u32 ADSDebug;		/* SDMA + 0x6c */
+	u32 Value1;		/* SDMA + 0x70 */
+	u32 Value2;		/* SDMA + 0x74 */
+	u32 Control;		/* SDMA + 0x78 */
+	u32 Status;		/* SDMA + 0x7c */
+	u32 PTDDebug;		/* SDMA + 0x80 */
+};
+
+/* GPT */
+struct mpc52xx_gpt {
+	u32 mode;		/* GPTx + 0x00 */
+	u32 count;		/* GPTx + 0x04 */
+	u32 pwm;		/* GPTx + 0x08 */
+	u32 status;		/* GPTx + 0X0c */
+};
+
+/* GPIO */
+struct mpc52xx_gpio {
+	u32 port_config;	/* GPIO + 0x00 */
+	u32 simple_gpioe;	/* GPIO + 0x04 */
+	u32 simple_ode;		/* GPIO + 0x08 */
+	u32 simple_ddr;		/* GPIO + 0x0c */
+	u32 simple_dvo;		/* GPIO + 0x10 */
+	u32 simple_ival;	/* GPIO + 0x14 */
+	u8 outo_gpioe;		/* GPIO + 0x18 */
+	u8 reserved1[3];	/* GPIO + 0x19 */
+	u8 outo_dvo;		/* GPIO + 0x1c */
+	u8 reserved2[3];	/* GPIO + 0x1d */
+	u8 sint_gpioe;		/* GPIO + 0x20 */
+	u8 reserved3[3];	/* GPIO + 0x21 */
+	u8 sint_ode;		/* GPIO + 0x24 */
+	u8 reserved4[3];	/* GPIO + 0x25 */
+	u8 sint_ddr;		/* GPIO + 0x28 */
+	u8 reserved5[3];	/* GPIO + 0x29 */
+	u8 sint_dvo;		/* GPIO + 0x2c */
+	u8 reserved6[3];	/* GPIO + 0x2d */
+	u8 sint_inten;		/* GPIO + 0x30 */
+	u8 reserved7[3];	/* GPIO + 0x31 */
+	u16 sint_itype;		/* GPIO + 0x34 */
+	u16 reserved8;		/* GPIO + 0x36 */
+	u8 gpio_control;	/* GPIO + 0x38 */
+	u8 reserved9[3];	/* GPIO + 0x39 */
+	u8 sint_istat;		/* GPIO + 0x3c */
+	u8 sint_ival;		/* GPIO + 0x3d */
+	u8 bus_errs;		/* GPIO + 0x3e */
+	u8 reserved10;		/* GPIO + 0x3f */
+};
+
+#define MPC52xx_GPIO_PSC_CONFIG_UART_WITHOUT_CD	4
+#define MPC52xx_GPIO_PSC_CONFIG_UART_WITH_CD	5
+#define MPC52xx_GPIO_PCI_DIS			(1<<15)
+
+/* GPIO with WakeUp*/
+struct mpc52xx_gpio_wkup {
+	u8 wkup_gpioe;		/* GPIO_WKUP + 0x00 */
+	u8 reserved1[3];	/* GPIO_WKUP + 0x03 */
+	u8 wkup_ode;		/* GPIO_WKUP + 0x04 */
+	u8 reserved2[3];	/* GPIO_WKUP + 0x05 */
+	u8 wkup_ddr;		/* GPIO_WKUP + 0x08 */
+	u8 reserved3[3];	/* GPIO_WKUP + 0x09 */
+	u8 wkup_dvo;		/* GPIO_WKUP + 0x0C */
+	u8 reserved4[3];	/* GPIO_WKUP + 0x0D */
+	u8 wkup_inten;		/* GPIO_WKUP + 0x10 */
+	u8 reserved5[3];	/* GPIO_WKUP + 0x11 */
+	u8 wkup_iinten;		/* GPIO_WKUP + 0x14 */
+	u8 reserved6[3];	/* GPIO_WKUP + 0x15 */
+	u16 wkup_itype;		/* GPIO_WKUP + 0x18 */
+	u8 reserved7[2];	/* GPIO_WKUP + 0x1A */
+	u8 wkup_maste;		/* GPIO_WKUP + 0x1C */
+	u8 reserved8[3];	/* GPIO_WKUP + 0x1D */
+	u8 wkup_ival;		/* GPIO_WKUP + 0x20 */
+	u8 reserved9[3];	/* GPIO_WKUP + 0x21 */
+	u8 wkup_istat;		/* GPIO_WKUP + 0x24 */
+	u8 reserved10[3];	/* GPIO_WKUP + 0x25 */
+};
+
+/* XLB Bus control */
+struct mpc52xx_xlb {
+	u8 reserved[0x40];
+	u32 config;		/* XLB + 0x40 */
+	u32 version;		/* XLB + 0x44 */
+	u32 status;		/* XLB + 0x48 */
+	u32 int_enable;		/* XLB + 0x4c */
+	u32 addr_capture;	/* XLB + 0x50 */
+	u32 bus_sig_capture;	/* XLB + 0x54 */
+	u32 addr_timeout;	/* XLB + 0x58 */
+	u32 data_timeout;	/* XLB + 0x5c */
+	u32 bus_act_timeout;	/* XLB + 0x60 */
+	u32 master_pri_enable;	/* XLB + 0x64 */
+	u32 master_priority;	/* XLB + 0x68 */
+	u32 base_address;	/* XLB + 0x6c */
+	u32 snoop_window;	/* XLB + 0x70 */
+};
+
+#define MPC52xx_XLB_CFG_PLDIS		(1 << 31)
+#define MPC52xx_XLB_CFG_SNOOP		(1 << 15)
+
+/* Clock Distribution control */
+struct mpc52xx_cdm {
+	u32 jtag_id;		/* CDM + 0x00  reg0 read only */
+	u32 rstcfg;		/* CDM + 0x04  reg1 read only */
+	u32 breadcrumb;		/* CDM + 0x08  reg2 */
+
+	u8 mem_clk_sel;		/* CDM + 0x0c  reg3 byte0 */
+	u8 xlb_clk_sel;		/* CDM + 0x0d  reg3 byte1 read only */
+	u8 ipb_clk_sel;		/* CDM + 0x0e  reg3 byte2 */
+	u8 pci_clk_sel;		/* CDM + 0x0f  reg3 byte3 */
+
+	u8 ext_48mhz_en;	/* CDM + 0x10  reg4 byte0 */
+	u8 fd_enable;		/* CDM + 0x11  reg4 byte1 */
+	u16 fd_counters;	/* CDM + 0x12  reg4 byte2,3 */
+
+	u32 clk_enables;	/* CDM + 0x14  reg5 */
+
+	u8 osc_disable;		/* CDM + 0x18  reg6 byte0 */
+	u8 reserved0[3];	/* CDM + 0x19  reg6 byte1,2,3 */
+
+	u8 ccs_sleep_enable;	/* CDM + 0x1c  reg7 byte0 */
+	u8 osc_sleep_enable;	/* CDM + 0x1d  reg7 byte1 */
+	u8 reserved1;		/* CDM + 0x1e  reg7 byte2 */
+	u8 ccs_qreq_test;	/* CDM + 0x1f  reg7 byte3 */
+
+	u8 soft_reset;		/* CDM + 0x20  u8 byte0 */
+	u8 no_ckstp;		/* CDM + 0x21  u8 byte0 */
+	u8 reserved2[2];	/* CDM + 0x22  u8 byte1,2,3 */
+
+	u8 pll_lock;		/* CDM + 0x24  reg9 byte0 */
+	u8 pll_looselock;	/* CDM + 0x25  reg9 byte1 */
+	u8 pll_sm_lockwin;	/* CDM + 0x26  reg9 byte2 */
+	u8 reserved3;		/* CDM + 0x27  reg9 byte3 */
+
+	u16 reserved4;		/* CDM + 0x28  reg10 byte0,1 */
+	u16 mclken_div_psc1;	/* CDM + 0x2a  reg10 byte2,3 */
+
+	u16 reserved5;		/* CDM + 0x2c  reg11 byte0,1 */
+	u16 mclken_div_psc2;	/* CDM + 0x2e  reg11 byte2,3 */
+
+	u16 reserved6;		/* CDM + 0x30  reg12 byte0,1 */
+	u16 mclken_div_psc3;	/* CDM + 0x32  reg12 byte2,3 */
+
+	u16 reserved7;		/* CDM + 0x34  reg13 byte0,1 */
+	u16 mclken_div_psc6;	/* CDM + 0x36  reg13 byte2,3 */
+};
+
+/* Interrupt controller Register set */
+struct mpc52xx_intr {
+	u32 per_mask;		/* INTR + 0x00 */
+	u32 per_pri1;		/* INTR + 0x04 */
+	u32 per_pri2;		/* INTR + 0x08 */
+	u32 per_pri3;		/* INTR + 0x0c */
+	u32 ctrl;		/* INTR + 0x10 */
+	u32 main_mask;		/* INTR + 0x14 */
+	u32 main_pri1;		/* INTR + 0x18 */
+	u32 main_pri2;		/* INTR + 0x1c */
+	u32 reserved1;		/* INTR + 0x20 */
+	u32 enc_status;		/* INTR + 0x24 */
+	u32 crit_status;	/* INTR + 0x28 */
+	u32 main_status;	/* INTR + 0x2c */
+	u32 per_status;		/* INTR + 0x30 */
+	u32 reserved2;		/* INTR + 0x34 */
+	u32 per_error;		/* INTR + 0x38 */
+};
+
+#endif /* __ASSEMBLY__ */
+
+
+/* ========================================================================= */
+/* Prototypes for MPC52xx sysdev                                             */
+/* ========================================================================= */
+
+#ifndef __ASSEMBLY__
+
+struct device_node;
+
+/* mpc52xx_common.c */
+extern void mpc5200_setup_xlb_arbiter(void);
+extern void mpc52xx_declare_of_platform_devices(void);
+extern int mpc5200_psc_ac97_gpio_reset(int psc_number);
+extern void mpc52xx_map_common_devices(void);
+extern int mpc52xx_set_psc_clkdiv(int psc_id, int clkdiv);
+extern void __noreturn mpc52xx_restart(char *cmd);
+
+/* mpc52xx_gpt.c */
+struct mpc52xx_gpt_priv;
+extern struct mpc52xx_gpt_priv *mpc52xx_gpt_from_irq(int irq);
+extern int mpc52xx_gpt_start_timer(struct mpc52xx_gpt_priv *gpt, u64 period,
+                            int continuous);
+extern u64 mpc52xx_gpt_timer_period(struct mpc52xx_gpt_priv *gpt);
+extern int mpc52xx_gpt_stop_timer(struct mpc52xx_gpt_priv *gpt);
+
+/* mpc52xx_pic.c */
+extern void mpc52xx_init_irq(void);
+extern unsigned int mpc52xx_get_irq(void);
+
+/* mpc52xx_pci.c */
+#ifdef CONFIG_PCI
+extern int __init mpc52xx_add_bridge(struct device_node *node);
+extern void __init mpc52xx_setup_pci(void);
+#else
+static inline void mpc52xx_setup_pci(void) { }
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#ifdef CONFIG_PM
+struct mpc52xx_suspend {
+	void (*board_suspend_prepare)(void __iomem *mbar);
+	void (*board_resume_finish)(void __iomem *mbar);
+};
+
+extern struct mpc52xx_suspend mpc52xx_suspend;
+extern int __init mpc52xx_pm_init(void);
+extern int mpc52xx_set_wakeup_gpio(u8 pin, u8 level);
+
+/* lite5200 calls mpc5200 suspend functions, so here they are */
+extern int mpc52xx_pm_prepare(void);
+extern int mpc52xx_pm_enter(suspend_state_t);
+extern void mpc52xx_pm_finish(void);
+extern char saved_sram[0x4000]; /* reuse buffer from mpc52xx suspend */
+
+#ifdef CONFIG_PPC_LITE5200
+int __init lite5200_pm_init(void);
+#endif
+#endif /* CONFIG_PM */
+
+#endif /* __ASM_POWERPC_MPC52xx_H__ */
+
diff --git a/arch/powerpc/include/asm/mpc52xx_psc.h b/arch/powerpc/include/asm/mpc52xx_psc.h
new file mode 100644
index 0000000000..ec995b2892
--- /dev/null
+++ b/arch/powerpc/include/asm/mpc52xx_psc.h
@@ -0,0 +1,352 @@
+/*
+ * include/asm-ppc/mpc52xx_psc.h
+ *
+ * Definitions of consts/structs to drive the Freescale MPC52xx OnChip
+ * PSCs. Theses are shared between multiple drivers since a PSC can be
+ * UART, AC97, IR, I2S, ... So this header is in asm-ppc.
+ *
+ *
+ * Maintainer : Sylvain Munaut <tnt@246tNt.com>
+ *
+ * Based/Extracted from some header of the 2.4 originally written by
+ * Dale Farnsworth <dfarnsworth@mvista.com>
+ *
+ * Copyright (C) 2004 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2003 MontaVista, Software, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __ASM_MPC52xx_PSC_H__
+#define __ASM_MPC52xx_PSC_H__
+
+#include <asm/types.h>
+
+/* Max number of PSCs */
+#ifdef CONFIG_PPC_MPC512x
+#define MPC52xx_PSC_MAXNUM     12
+#else
+#define MPC52xx_PSC_MAXNUM	6
+#endif
+
+/* Programmable Serial Controller (PSC) status register bits */
+#define MPC52xx_PSC_SR_UNEX_RX	0x0001
+#define MPC52xx_PSC_SR_DATA_VAL	0x0002
+#define MPC52xx_PSC_SR_DATA_OVR	0x0004
+#define MPC52xx_PSC_SR_CMDSEND	0x0008
+#define MPC52xx_PSC_SR_CDE	0x0080
+#define MPC52xx_PSC_SR_RXRDY	0x0100
+#define MPC52xx_PSC_SR_RXFULL	0x0200
+#define MPC52xx_PSC_SR_TXRDY	0x0400
+#define MPC52xx_PSC_SR_TXEMP	0x0800
+#define MPC52xx_PSC_SR_OE	0x1000
+#define MPC52xx_PSC_SR_PE	0x2000
+#define MPC52xx_PSC_SR_FE	0x4000
+#define MPC52xx_PSC_SR_RB	0x8000
+
+/* PSC Command values */
+#define MPC52xx_PSC_RX_ENABLE		0x0001
+#define MPC52xx_PSC_RX_DISABLE		0x0002
+#define MPC52xx_PSC_TX_ENABLE		0x0004
+#define MPC52xx_PSC_TX_DISABLE		0x0008
+#define MPC52xx_PSC_SEL_MODE_REG_1	0x0010
+#define MPC52xx_PSC_RST_RX		0x0020
+#define MPC52xx_PSC_RST_TX		0x0030
+#define MPC52xx_PSC_RST_ERR_STAT	0x0040
+#define MPC52xx_PSC_RST_BRK_CHG_INT	0x0050
+#define MPC52xx_PSC_START_BRK		0x0060
+#define MPC52xx_PSC_STOP_BRK		0x0070
+
+/* PSC TxRx FIFO status bits */
+#define MPC52xx_PSC_RXTX_FIFO_ERR	0x0040
+#define MPC52xx_PSC_RXTX_FIFO_UF	0x0020
+#define MPC52xx_PSC_RXTX_FIFO_OF	0x0010
+#define MPC52xx_PSC_RXTX_FIFO_FR	0x0008
+#define MPC52xx_PSC_RXTX_FIFO_FULL	0x0004
+#define MPC52xx_PSC_RXTX_FIFO_ALARM	0x0002
+#define MPC52xx_PSC_RXTX_FIFO_EMPTY	0x0001
+
+/* PSC interrupt status/mask bits */
+#define MPC52xx_PSC_IMR_UNEX_RX_SLOT 0x0001
+#define MPC52xx_PSC_IMR_DATA_VALID	0x0002
+#define MPC52xx_PSC_IMR_DATA_OVR	0x0004
+#define MPC52xx_PSC_IMR_CMD_SEND	0x0008
+#define MPC52xx_PSC_IMR_ERROR		0x0040
+#define MPC52xx_PSC_IMR_DEOF		0x0080
+#define MPC52xx_PSC_IMR_TXRDY		0x0100
+#define MPC52xx_PSC_IMR_RXRDY		0x0200
+#define MPC52xx_PSC_IMR_DB		0x0400
+#define MPC52xx_PSC_IMR_TXEMP		0x0800
+#define MPC52xx_PSC_IMR_ORERR		0x1000
+#define MPC52xx_PSC_IMR_IPC		0x8000
+
+/* PSC input port change bits */
+#define MPC52xx_PSC_CTS			0x01
+#define MPC52xx_PSC_DCD			0x02
+#define MPC52xx_PSC_D_CTS		0x10
+#define MPC52xx_PSC_D_DCD		0x20
+
+/* PSC acr bits */
+#define MPC52xx_PSC_IEC_CTS		0x01
+#define MPC52xx_PSC_IEC_DCD		0x02
+
+/* PSC output port bits */
+#define MPC52xx_PSC_OP_RTS		0x01
+#define MPC52xx_PSC_OP_RES		0x02
+
+/* PSC mode fields */
+#define MPC52xx_PSC_MODE_5_BITS			0x00
+#define MPC52xx_PSC_MODE_6_BITS			0x01
+#define MPC52xx_PSC_MODE_7_BITS			0x02
+#define MPC52xx_PSC_MODE_8_BITS			0x03
+#define MPC52xx_PSC_MODE_BITS_MASK		0x03
+#define MPC52xx_PSC_MODE_PAREVEN		0x00
+#define MPC52xx_PSC_MODE_PARODD			0x04
+#define MPC52xx_PSC_MODE_PARFORCE		0x08
+#define MPC52xx_PSC_MODE_PARNONE		0x10
+#define MPC52xx_PSC_MODE_ERR			0x20
+#define MPC52xx_PSC_MODE_FFULL			0x40
+#define MPC52xx_PSC_MODE_RXRTS			0x80
+
+#define MPC52xx_PSC_MODE_ONE_STOP_5_BITS	0x00
+#define MPC52xx_PSC_MODE_ONE_STOP		0x07
+#define MPC52xx_PSC_MODE_TWO_STOP		0x0f
+#define MPC52xx_PSC_MODE_TXCTS			0x10
+
+#define MPC52xx_PSC_RFNUM_MASK	0x01ff
+
+#define MPC52xx_PSC_SICR_DTS1			(1 << 29)
+#define MPC52xx_PSC_SICR_SHDR			(1 << 28)
+#define MPC52xx_PSC_SICR_SIM_MASK		(0xf << 24)
+#define MPC52xx_PSC_SICR_SIM_UART		(0x0 << 24)
+#define MPC52xx_PSC_SICR_SIM_UART_DCD		(0x8 << 24)
+#define MPC52xx_PSC_SICR_SIM_CODEC_8		(0x1 << 24)
+#define MPC52xx_PSC_SICR_SIM_CODEC_16		(0x2 << 24)
+#define MPC52xx_PSC_SICR_SIM_AC97		(0x3 << 24)
+#define MPC52xx_PSC_SICR_SIM_SIR		(0x8 << 24)
+#define MPC52xx_PSC_SICR_SIM_SIR_DCD		(0xc << 24)
+#define MPC52xx_PSC_SICR_SIM_MIR		(0x5 << 24)
+#define MPC52xx_PSC_SICR_SIM_FIR		(0x6 << 24)
+#define MPC52xx_PSC_SICR_SIM_CODEC_24		(0x7 << 24)
+#define MPC52xx_PSC_SICR_SIM_CODEC_32		(0xf << 24)
+#define MPC52xx_PSC_SICR_ACRB			(0x8 << 24)
+#define MPC52xx_PSC_SICR_AWR			(1 << 30)
+#define MPC52xx_PSC_SICR_GENCLK			(1 << 23)
+#define MPC52xx_PSC_SICR_I2S			(1 << 22)
+#define MPC52xx_PSC_SICR_CLKPOL			(1 << 21)
+#define MPC52xx_PSC_SICR_SYNCPOL		(1 << 20)
+#define MPC52xx_PSC_SICR_CELLSLAVE		(1 << 19)
+#define MPC52xx_PSC_SICR_CELL2XCLK		(1 << 18)
+#define MPC52xx_PSC_SICR_ESAI			(1 << 17)
+#define MPC52xx_PSC_SICR_ENAC97			(1 << 16)
+#define MPC52xx_PSC_SICR_SPI			(1 << 15)
+#define MPC52xx_PSC_SICR_MSTR			(1 << 14)
+#define MPC52xx_PSC_SICR_CPOL			(1 << 13)
+#define MPC52xx_PSC_SICR_CPHA			(1 << 12)
+#define MPC52xx_PSC_SICR_USEEOF			(1 << 11)
+#define MPC52xx_PSC_SICR_DISABLEEOF		(1 << 10)
+
+/* Structure of the hardware registers */
+struct mpc52xx_psc {
+	union {
+		u8	mode;		/* PSC + 0x00 */
+		u8	mr2;
+	};
+	u8		reserved0[3];
+	union {				/* PSC + 0x04 */
+		u16	status;
+		u16	clock_select;
+	} sr_csr;
+#define mpc52xx_psc_status	sr_csr.status
+#define mpc52xx_psc_clock_select sr_csr.clock_select
+	u16		reserved1;
+	u8		command;	/* PSC + 0x08 */
+	u8		reserved2[3];
+	union {				/* PSC + 0x0c */
+		u8	buffer_8;
+		u16	buffer_16;
+		u32	buffer_32;
+	} buffer;
+#define mpc52xx_psc_buffer_8	buffer.buffer_8
+#define mpc52xx_psc_buffer_16	buffer.buffer_16
+#define mpc52xx_psc_buffer_32	buffer.buffer_32
+	union {				/* PSC + 0x10 */
+		u8	ipcr;
+		u8	acr;
+	} ipcr_acr;
+#define mpc52xx_psc_ipcr	ipcr_acr.ipcr
+#define mpc52xx_psc_acr		ipcr_acr.acr
+	u8		reserved3[3];
+	union {				/* PSC + 0x14 */
+		u16	isr;
+		u16	imr;
+	} isr_imr;
+#define mpc52xx_psc_isr		isr_imr.isr
+#define mpc52xx_psc_imr		isr_imr.imr
+	u16		reserved4;
+	u8		ctur;		/* PSC + 0x18 */
+	u8		reserved5[3];
+	u8		ctlr;		/* PSC + 0x1c */
+	u8		reserved6[3];
+	/* BitClkDiv field of CCR is byte swapped in
+	 * the hardware for mpc5200/b compatibility */
+	u32		ccr;		/* PSC + 0x20 */
+	u32		ac97_slots;	/* PSC + 0x24 */
+	u32		ac97_cmd;	/* PSC + 0x28 */
+	u32		ac97_data;	/* PSC + 0x2c */
+	u8		ivr;		/* PSC + 0x30 */
+	u8		reserved8[3];
+	u8		ip;		/* PSC + 0x34 */
+	u8		reserved9[3];
+	u8		op1;		/* PSC + 0x38 */
+	u8		reserved10[3];
+	u8		op0;		/* PSC + 0x3c */
+	u8		reserved11[3];
+	u32		sicr;		/* PSC + 0x40 */
+	u8		ircr1;		/* PSC + 0x44 */
+	u8		reserved13[3];
+	u8		ircr2;		/* PSC + 0x44 */
+	u8		reserved14[3];
+	u8		irsdr;		/* PSC + 0x4c */
+	u8		reserved15[3];
+	u8		irmdr;		/* PSC + 0x50 */
+	u8		reserved16[3];
+	u8		irfdr;		/* PSC + 0x54 */
+	u8		reserved17[3];
+};
+
+struct mpc52xx_psc_fifo {
+	u16		rfnum;		/* PSC + 0x58 */
+	u16		reserved18;
+	u16		tfnum;		/* PSC + 0x5c */
+	u16		reserved19;
+	u32		rfdata;		/* PSC + 0x60 */
+	u16		rfstat;		/* PSC + 0x64 */
+	u16		reserved20;
+	u8		rfcntl;		/* PSC + 0x68 */
+	u8		reserved21[5];
+	u16		rfalarm;	/* PSC + 0x6e */
+	u16		reserved22;
+	u16		rfrptr;		/* PSC + 0x72 */
+	u16		reserved23;
+	u16		rfwptr;		/* PSC + 0x76 */
+	u16		reserved24;
+	u16		rflrfptr;	/* PSC + 0x7a */
+	u16		reserved25;
+	u16		rflwfptr;	/* PSC + 0x7e */
+	u32		tfdata;		/* PSC + 0x80 */
+	u16		tfstat;		/* PSC + 0x84 */
+	u16		reserved26;
+	u8		tfcntl;		/* PSC + 0x88 */
+	u8		reserved27[5];
+	u16		tfalarm;	/* PSC + 0x8e */
+	u16		reserved28;
+	u16		tfrptr;		/* PSC + 0x92 */
+	u16		reserved29;
+	u16		tfwptr;		/* PSC + 0x96 */
+	u16		reserved30;
+	u16		tflrfptr;	/* PSC + 0x9a */
+	u16		reserved31;
+	u16		tflwfptr;	/* PSC + 0x9e */
+};
+
+#define MPC512x_PSC_FIFO_EOF		0x100
+#define MPC512x_PSC_FIFO_RESET_SLICE	0x80
+#define MPC512x_PSC_FIFO_ENABLE_SLICE	0x01
+#define MPC512x_PSC_FIFO_ENABLE_DMA	0x04
+
+#define MPC512x_PSC_FIFO_EMPTY		0x1
+#define MPC512x_PSC_FIFO_FULL		0x2
+#define MPC512x_PSC_FIFO_ALARM		0x4
+#define MPC512x_PSC_FIFO_URERR		0x8
+
+struct mpc512x_psc_fifo {
+	u32		reserved1[10];
+	u32		txcmd;		/* PSC + 0x80 */
+	u32		txalarm;	/* PSC + 0x84 */
+	u32		txsr;		/* PSC + 0x88 */
+	u32		txisr;		/* PSC + 0x8c */
+	u32		tximr;		/* PSC + 0x90 */
+	u32		txcnt;		/* PSC + 0x94 */
+	u32		txptr;		/* PSC + 0x98 */
+	u32		txsz;		/* PSC + 0x9c */
+	u32		reserved2[7];
+	union {
+		u8	txdata_8;
+		u16	txdata_16;
+		u32	txdata_32;
+	} txdata; 			/* PSC + 0xbc */
+#define txdata_8 txdata.txdata_8
+#define txdata_16 txdata.txdata_16
+#define txdata_32 txdata.txdata_32
+	u32		rxcmd;		/* PSC + 0xc0 */
+	u32		rxalarm;	/* PSC + 0xc4 */
+	u32		rxsr;		/* PSC + 0xc8 */
+	u32		rxisr;		/* PSC + 0xcc */
+	u32		rximr;		/* PSC + 0xd0 */
+	u32		rxcnt;		/* PSC + 0xd4 */
+	u32		rxptr;		/* PSC + 0xd8 */
+	u32		rxsz;		/* PSC + 0xdc */
+	u32		reserved3[7];
+	union {
+		u8	rxdata_8;
+		u16	rxdata_16;
+		u32	rxdata_32;
+	} rxdata; 			/* PSC + 0xfc */
+#define rxdata_8 rxdata.rxdata_8
+#define rxdata_16 rxdata.rxdata_16
+#define rxdata_32 rxdata.rxdata_32
+};
+
+struct mpc5125_psc {
+	u8		mr1;			/* PSC + 0x00 */
+	u8		reserved0[3];
+	u8		mr2;			/* PSC + 0x04 */
+	u8		reserved1[3];
+	struct {
+		u16		status;		/* PSC + 0x08 */
+		u8		reserved2[2];
+		u8		clock_select;	/* PSC + 0x0c */
+		u8		reserved3[3];
+	} sr_csr;
+	u8		command;		/* PSC + 0x10 */
+	u8		reserved4[3];
+	union {					/* PSC + 0x14 */
+		u8		buffer_8;
+		u16		buffer_16;
+		u32		buffer_32;
+	} buffer;
+	struct {
+		u8		ipcr;		/* PSC + 0x18 */
+		u8		reserved5[3];
+		u8		acr;		/* PSC + 0x1c */
+		u8		reserved6[3];
+	} ipcr_acr;
+	struct {
+		u16		isr;		/* PSC + 0x20 */
+		u8		reserved7[2];
+		u16		imr;		/* PSC + 0x24 */
+		u8		reserved8[2];
+	} isr_imr;
+	u8		ctur;			/* PSC + 0x28 */
+	u8		reserved9[3];
+	u8		ctlr;			/* PSC + 0x2c */
+	u8		reserved10[3];
+	u32		ccr;			/* PSC + 0x30 */
+	u32		ac97slots;		/* PSC + 0x34 */
+	u32		ac97cmd;		/* PSC + 0x38 */
+	u32		ac97data;		/* PSC + 0x3c */
+	u8		reserved11[4];
+	u8		ip;			/* PSC + 0x44 */
+	u8		reserved12[3];
+	u8		op1;			/* PSC + 0x48 */
+	u8		reserved13[3];
+	u8		op0;			/* PSC + 0x4c */
+	u8		reserved14[3];
+	u32		sicr;			/* PSC + 0x50 */
+	u8		reserved15[4];	/* make eq. sizeof(mpc52xx_psc) */
+};
+
+#endif  /* __ASM_MPC52xx_PSC_H__ */
diff --git a/arch/powerpc/include/asm/mpc5xxx.h b/arch/powerpc/include/asm/mpc5xxx.h
new file mode 100644
index 0000000000..44db263804
--- /dev/null
+++ b/arch/powerpc/include/asm/mpc5xxx.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: John Rigby, <jrigby@freescale.com>, Friday Apr 13 2007
+ *
+ * Description:
+ * MPC5xxx Prototypes and definitions
+ */
+
+#ifndef __ASM_POWERPC_MPC5xxx_H__
+#define __ASM_POWERPC_MPC5xxx_H__
+
+#include <linux/property.h>
+
+unsigned long mpc5xxx_fwnode_get_bus_frequency(struct fwnode_handle *fwnode);
+
+static inline unsigned long mpc5xxx_get_bus_frequency(struct device *dev)
+{
+	return mpc5xxx_fwnode_get_bus_frequency(dev_fwnode(dev));
+}
+
+#endif /* __ASM_POWERPC_MPC5xxx_H__ */
+
diff --git a/arch/powerpc/include/asm/mpc6xx.h b/arch/powerpc/include/asm/mpc6xx.h
new file mode 100644
index 0000000000..6ed9f4ccc7
--- /dev/null
+++ b/arch/powerpc/include/asm/mpc6xx.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_POWERPC_MPC6xx_H
+#define __ASM_POWERPC_MPC6xx_H
+
+void mpc6xx_enter_standby(void);
+
+#endif
diff --git a/arch/powerpc/include/asm/mpc85xx.h b/arch/powerpc/include/asm/mpc85xx.h
new file mode 100644
index 0000000000..21aabc3230
--- /dev/null
+++ b/arch/powerpc/include/asm/mpc85xx.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * MPC85xx cpu type detection
+ *
+ * Copyright 2011-2012 Freescale Semiconductor, Inc.
+ */
+
+#ifndef __ASM_PPC_MPC85XX_H
+#define __ASM_PPC_MPC85XX_H
+
+#define SVR_REV(svr)	((svr) & 0xFF)		/* SOC design resision */
+#define SVR_MAJ(svr)	(((svr) >>  4) & 0xF)	/* Major revision field*/
+#define SVR_MIN(svr)	(((svr) >>  0) & 0xF)	/* Minor revision field*/
+
+/* Some parts define SVR[0:23] as the SOC version */
+#define SVR_SOC_VER(svr) (((svr) >> 8) & 0xFFF7FF)	/* SOC Version fields */
+
+#define SVR_8533	0x803400
+#define SVR_8535	0x803701
+#define SVR_8536	0x803700
+#define SVR_8540	0x803000
+#define SVR_8541	0x807200
+#define SVR_8543	0x803200
+#define SVR_8544	0x803401
+#define SVR_8545	0x803102
+#define SVR_8547	0x803101
+#define SVR_8548	0x803100
+#define SVR_8555	0x807100
+#define SVR_8560	0x807000
+#define SVR_8567	0x807501
+#define SVR_8568	0x807500
+#define SVR_8569	0x808000
+#define SVR_8572	0x80E000
+#define SVR_P1010	0x80F100
+#define SVR_P1011	0x80E500
+#define SVR_P1012	0x80E501
+#define SVR_P1013	0x80E700
+#define SVR_P1014	0x80F101
+#define SVR_P1017	0x80F700
+#define SVR_P1020	0x80E400
+#define SVR_P1021	0x80E401
+#define SVR_P1022	0x80E600
+#define SVR_P1023	0x80F600
+#define SVR_P1024	0x80E402
+#define SVR_P1025	0x80E403
+#define SVR_P2010	0x80E300
+#define SVR_P2020	0x80E200
+#define SVR_P2040	0x821000
+#define SVR_P2041	0x821001
+#define SVR_P3041	0x821103
+#define SVR_P4040	0x820100
+#define SVR_P4080	0x820000
+#define SVR_P5010	0x822100
+#define SVR_P5020	0x822000
+#define SVR_P5021	0X820500
+#define SVR_P5040	0x820400
+#define SVR_T4240	0x824000
+#define SVR_T4120	0x824001
+#define SVR_T4160	0x824100
+#define SVR_T4080	0x824102
+#define SVR_C291	0x850000
+#define SVR_C292	0x850020
+#define SVR_C293	0x850030
+#define SVR_B4860	0X868000
+#define SVR_G4860	0x868001
+#define SVR_G4060	0x868003
+#define SVR_B4440	0x868100
+#define SVR_G4440	0x868101
+#define SVR_B4420	0x868102
+#define SVR_B4220	0x868103
+#define SVR_T1040	0x852000
+#define SVR_T1041	0x852001
+#define SVR_T1042	0x852002
+#define SVR_T1020	0x852100
+#define SVR_T1021	0x852101
+#define SVR_T1022	0x852102
+#define SVR_T2080	0x853000
+#define SVR_T2081	0x853100
+
+#define SVR_8610	0x80A000
+#define SVR_8641	0x809000
+#define SVR_8641D	0x809001
+
+#define SVR_9130	0x860001
+#define SVR_9131	0x860000
+#define SVR_9132	0x861000
+#define SVR_9232	0x861400
+
+#define SVR_Unknown	0xFFFFFF
+
+#endif
diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h
new file mode 100644
index 0000000000..58353c5bd3
--- /dev/null
+++ b/arch/powerpc/include/asm/mpic.h
@@ -0,0 +1,497 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_MPIC_H
+#define _ASM_POWERPC_MPIC_H
+#ifdef __KERNEL__
+
+#include <linux/irq.h>
+#include <asm/dcr.h>
+#include <asm/msi_bitmap.h>
+
+/*
+ * Global registers
+ */
+
+#define MPIC_GREG_BASE			0x01000
+
+#define MPIC_GREG_FEATURE_0		0x00000
+#define		MPIC_GREG_FEATURE_LAST_SRC_MASK		0x07ff0000
+#define		MPIC_GREG_FEATURE_LAST_SRC_SHIFT	16
+#define		MPIC_GREG_FEATURE_LAST_CPU_MASK		0x00001f00
+#define		MPIC_GREG_FEATURE_LAST_CPU_SHIFT	8
+#define		MPIC_GREG_FEATURE_VERSION_MASK		0xff
+#define MPIC_GREG_FEATURE_1		0x00010
+#define MPIC_GREG_GLOBAL_CONF_0		0x00020
+#define		MPIC_GREG_GCONF_RESET			0x80000000
+/* On the FSL mpic implementations the Mode field is expand to be
+ * 2 bits wide:
+ *	0b00 = pass through (interrupts routed to IRQ0)
+ *	0b01 = Mixed mode
+ *	0b10 = reserved
+ *	0b11 = External proxy / coreint
+ */
+#define		MPIC_GREG_GCONF_COREINT			0x60000000
+#define		MPIC_GREG_GCONF_8259_PTHROU_DIS		0x20000000
+#define		MPIC_GREG_GCONF_NO_BIAS			0x10000000
+#define		MPIC_GREG_GCONF_BASE_MASK		0x000fffff
+#define		MPIC_GREG_GCONF_MCK			0x08000000
+#define MPIC_GREG_GLOBAL_CONF_1		0x00030
+#define MPIC_GREG_VENDOR_0		0x00040
+#define MPIC_GREG_VENDOR_1		0x00050
+#define MPIC_GREG_VENDOR_2		0x00060
+#define MPIC_GREG_VENDOR_3		0x00070
+#define MPIC_GREG_VENDOR_ID		0x00080
+#define 	MPIC_GREG_VENDOR_ID_STEPPING_MASK	0x00ff0000
+#define 	MPIC_GREG_VENDOR_ID_STEPPING_SHIFT	16
+#define 	MPIC_GREG_VENDOR_ID_DEVICE_ID_MASK	0x0000ff00
+#define 	MPIC_GREG_VENDOR_ID_DEVICE_ID_SHIFT	8
+#define 	MPIC_GREG_VENDOR_ID_VENDOR_ID_MASK	0x000000ff
+#define MPIC_GREG_PROCESSOR_INIT	0x00090
+#define MPIC_GREG_IPI_VECTOR_PRI_0	0x000a0
+#define MPIC_GREG_IPI_VECTOR_PRI_1	0x000b0
+#define MPIC_GREG_IPI_VECTOR_PRI_2	0x000c0
+#define MPIC_GREG_IPI_VECTOR_PRI_3	0x000d0
+#define MPIC_GREG_IPI_STRIDE		0x10
+#define MPIC_GREG_SPURIOUS		0x000e0
+#define MPIC_GREG_TIMER_FREQ		0x000f0
+
+/*
+ *
+ * Timer registers
+ */
+#define MPIC_TIMER_BASE			0x01100
+#define MPIC_TIMER_STRIDE		0x40
+#define MPIC_TIMER_GROUP_STRIDE		0x1000
+
+#define MPIC_TIMER_CURRENT_CNT		0x00000
+#define MPIC_TIMER_BASE_CNT		0x00010
+#define MPIC_TIMER_VECTOR_PRI		0x00020
+#define MPIC_TIMER_DESTINATION		0x00030
+
+/*
+ * Per-Processor registers
+ */
+
+#define MPIC_CPU_THISBASE		0x00000
+#define MPIC_CPU_BASE			0x20000
+#define MPIC_CPU_STRIDE			0x01000
+
+#define MPIC_CPU_IPI_DISPATCH_0		0x00040
+#define MPIC_CPU_IPI_DISPATCH_1		0x00050
+#define MPIC_CPU_IPI_DISPATCH_2		0x00060
+#define MPIC_CPU_IPI_DISPATCH_3		0x00070
+#define MPIC_CPU_IPI_DISPATCH_STRIDE	0x00010
+#define MPIC_CPU_CURRENT_TASK_PRI	0x00080
+#define 	MPIC_CPU_TASKPRI_MASK			0x0000000f
+#define MPIC_CPU_WHOAMI			0x00090
+#define 	MPIC_CPU_WHOAMI_MASK			0x0000001f
+#define MPIC_CPU_INTACK			0x000a0
+#define MPIC_CPU_EOI			0x000b0
+#define MPIC_CPU_MCACK			0x000c0
+
+/*
+ * Per-source registers
+ */
+
+#define MPIC_IRQ_BASE			0x10000
+#define MPIC_IRQ_STRIDE			0x00020
+#define MPIC_IRQ_VECTOR_PRI		0x00000
+#define 	MPIC_VECPRI_MASK			0x80000000
+#define 	MPIC_VECPRI_ACTIVITY			0x40000000	/* Read Only */
+#define 	MPIC_VECPRI_PRIORITY_MASK		0x000f0000
+#define 	MPIC_VECPRI_PRIORITY_SHIFT		16
+#define 	MPIC_VECPRI_VECTOR_MASK			0x000007ff
+#define 	MPIC_VECPRI_POLARITY_POSITIVE		0x00800000
+#define 	MPIC_VECPRI_POLARITY_NEGATIVE		0x00000000
+#define 	MPIC_VECPRI_POLARITY_MASK		0x00800000
+#define 	MPIC_VECPRI_SENSE_LEVEL			0x00400000
+#define 	MPIC_VECPRI_SENSE_EDGE			0x00000000
+#define 	MPIC_VECPRI_SENSE_MASK			0x00400000
+#define MPIC_IRQ_DESTINATION		0x00010
+
+#define MPIC_FSL_BRR1			0x00000
+#define 	MPIC_FSL_BRR1_VER			0x0000ffff
+
+#define MPIC_MAX_IRQ_SOURCES	2048
+#define MPIC_MAX_CPUS		32
+#define MPIC_MAX_ISU		32
+
+#define MPIC_MAX_ERR      32
+#define MPIC_FSL_ERR_INT  16
+
+/*
+ * Tsi108 implementation of MPIC has many differences from the original one
+ */
+
+/*
+ * Global registers
+ */
+
+#define TSI108_GREG_BASE		0x00000
+#define TSI108_GREG_FEATURE_0		0x00000
+#define TSI108_GREG_GLOBAL_CONF_0	0x00004
+#define TSI108_GREG_VENDOR_ID		0x0000c
+#define TSI108_GREG_IPI_VECTOR_PRI_0	0x00204		/* Doorbell 0 */
+#define TSI108_GREG_IPI_STRIDE		0x0c
+#define TSI108_GREG_SPURIOUS		0x00010
+#define TSI108_GREG_TIMER_FREQ		0x00014
+
+/*
+ * Timer registers
+ */
+#define TSI108_TIMER_BASE		0x0030
+#define TSI108_TIMER_STRIDE		0x10
+#define TSI108_TIMER_CURRENT_CNT	0x00000
+#define TSI108_TIMER_BASE_CNT		0x00004
+#define TSI108_TIMER_VECTOR_PRI		0x00008
+#define TSI108_TIMER_DESTINATION	0x0000c
+
+/*
+ * Per-Processor registers
+ */
+#define TSI108_CPU_BASE			0x00300
+#define TSI108_CPU_STRIDE		0x00040
+#define TSI108_CPU_IPI_DISPATCH_0	0x00200
+#define TSI108_CPU_IPI_DISPATCH_STRIDE	0x00000
+#define TSI108_CPU_CURRENT_TASK_PRI	0x00000
+#define TSI108_CPU_WHOAMI		0xffffffff
+#define TSI108_CPU_INTACK		0x00004
+#define TSI108_CPU_EOI			0x00008
+#define TSI108_CPU_MCACK		0x00004 /* Doesn't really exist here */
+
+/*
+ * Per-source registers
+ */
+#define TSI108_IRQ_BASE			0x00100
+#define TSI108_IRQ_STRIDE		0x00008
+#define TSI108_IRQ_VECTOR_PRI		0x00000
+#define TSI108_VECPRI_VECTOR_MASK	0x000000ff
+#define TSI108_VECPRI_POLARITY_POSITIVE	0x01000000
+#define TSI108_VECPRI_POLARITY_NEGATIVE	0x00000000
+#define TSI108_VECPRI_SENSE_LEVEL	0x02000000
+#define TSI108_VECPRI_SENSE_EDGE	0x00000000
+#define TSI108_VECPRI_POLARITY_MASK	0x01000000
+#define TSI108_VECPRI_SENSE_MASK	0x02000000
+#define TSI108_IRQ_DESTINATION		0x00004
+
+/* weird mpic register indices and mask bits in the HW info array */
+enum {
+	MPIC_IDX_GREG_BASE = 0,
+	MPIC_IDX_GREG_FEATURE_0,
+	MPIC_IDX_GREG_GLOBAL_CONF_0,
+	MPIC_IDX_GREG_VENDOR_ID,
+	MPIC_IDX_GREG_IPI_VECTOR_PRI_0,
+	MPIC_IDX_GREG_IPI_STRIDE,
+	MPIC_IDX_GREG_SPURIOUS,
+	MPIC_IDX_GREG_TIMER_FREQ,
+
+	MPIC_IDX_TIMER_BASE,
+	MPIC_IDX_TIMER_STRIDE,
+	MPIC_IDX_TIMER_CURRENT_CNT,
+	MPIC_IDX_TIMER_BASE_CNT,
+	MPIC_IDX_TIMER_VECTOR_PRI,
+	MPIC_IDX_TIMER_DESTINATION,
+
+	MPIC_IDX_CPU_BASE,
+	MPIC_IDX_CPU_STRIDE,
+	MPIC_IDX_CPU_IPI_DISPATCH_0,
+	MPIC_IDX_CPU_IPI_DISPATCH_STRIDE,
+	MPIC_IDX_CPU_CURRENT_TASK_PRI,
+	MPIC_IDX_CPU_WHOAMI,
+	MPIC_IDX_CPU_INTACK,
+	MPIC_IDX_CPU_EOI,
+	MPIC_IDX_CPU_MCACK,
+
+	MPIC_IDX_IRQ_BASE,
+	MPIC_IDX_IRQ_STRIDE,
+	MPIC_IDX_IRQ_VECTOR_PRI,
+
+	MPIC_IDX_VECPRI_VECTOR_MASK,
+	MPIC_IDX_VECPRI_POLARITY_POSITIVE,
+	MPIC_IDX_VECPRI_POLARITY_NEGATIVE,
+	MPIC_IDX_VECPRI_SENSE_LEVEL,
+	MPIC_IDX_VECPRI_SENSE_EDGE,
+	MPIC_IDX_VECPRI_POLARITY_MASK,
+	MPIC_IDX_VECPRI_SENSE_MASK,
+	MPIC_IDX_IRQ_DESTINATION,
+	MPIC_IDX_END
+};
+
+
+#ifdef CONFIG_MPIC_U3_HT_IRQS
+/* Fixup table entry */
+struct mpic_irq_fixup
+{
+	u8 __iomem	*base;
+	u8 __iomem	*applebase;
+	u32		data;
+	unsigned int	index;
+};
+#endif /* CONFIG_MPIC_U3_HT_IRQS */
+
+
+enum mpic_reg_type {
+	mpic_access_mmio_le,
+	mpic_access_mmio_be,
+#ifdef CONFIG_PPC_DCR
+	mpic_access_dcr
+#endif
+};
+
+struct mpic_reg_bank {
+	u32 __iomem	*base;
+#ifdef CONFIG_PPC_DCR
+	dcr_host_t	dhost;
+#endif /* CONFIG_PPC_DCR */
+};
+
+struct mpic_irq_save {
+	u32		vecprio,
+			dest;
+#ifdef CONFIG_MPIC_U3_HT_IRQS
+	u32		fixup_data;
+#endif
+};
+
+/* The instance data of a given MPIC */
+struct mpic
+{
+	/* The OpenFirmware dt node for this MPIC */
+	struct device_node *node;
+
+	/* The remapper for this MPIC */
+	struct irq_domain	*irqhost;
+
+	/* The "linux" controller struct */
+	struct irq_chip		hc_irq;
+#ifdef CONFIG_MPIC_U3_HT_IRQS
+	struct irq_chip		hc_ht_irq;
+#endif
+#ifdef CONFIG_SMP
+	struct irq_chip		hc_ipi;
+#endif
+	struct irq_chip		hc_tm;
+	struct irq_chip		hc_err;
+	const char		*name;
+	/* Flags */
+	unsigned int		flags;
+	/* How many irq sources in a given ISU */
+	unsigned int		isu_size;
+	unsigned int		isu_shift;
+	unsigned int		isu_mask;
+	/* Number of sources */
+	unsigned int		num_sources;
+
+	/* vector numbers used for internal sources (ipi/timers) */
+	unsigned int		ipi_vecs[4];
+	unsigned int		timer_vecs[8];
+	/* vector numbers used for FSL MPIC error interrupts */
+	unsigned int		err_int_vecs[MPIC_MAX_ERR];
+
+	/* Spurious vector to program into unused sources */
+	unsigned int		spurious_vec;
+
+#ifdef CONFIG_MPIC_U3_HT_IRQS
+	/* The fixup table */
+	struct mpic_irq_fixup	*fixups;
+	raw_spinlock_t	fixup_lock;
+#endif
+
+	/* Register access method */
+	enum mpic_reg_type	reg_type;
+
+	/* The physical base address of the MPIC */
+	phys_addr_t paddr;
+
+	/* The various ioremap'ed bases */
+	struct mpic_reg_bank	thiscpuregs;
+	struct mpic_reg_bank	gregs;
+	struct mpic_reg_bank	tmregs;
+	struct mpic_reg_bank	cpuregs[MPIC_MAX_CPUS];
+	struct mpic_reg_bank	isus[MPIC_MAX_ISU];
+
+	/* ioremap'ed base for error interrupt registers */
+	u32 __iomem	*err_regs;
+
+	/* Protected sources */
+	unsigned long		*protected;
+
+#ifdef CONFIG_MPIC_WEIRD
+	/* Pointer to HW info array */
+	u32			*hw_set;
+#endif
+
+#ifdef CONFIG_PCI_MSI
+	struct msi_bitmap	msi_bitmap;
+#endif
+
+#ifdef CONFIG_MPIC_BROKEN_REGREAD
+	u32			isu_reg0_shadow[MPIC_MAX_IRQ_SOURCES];
+#endif
+
+	/* link */
+	struct mpic		*next;
+
+#ifdef CONFIG_PM
+	struct mpic_irq_save	*save_data;
+#endif
+};
+
+extern struct bus_type mpic_subsys;
+
+/*
+ * MPIC flags (passed to mpic_alloc)
+ *
+ * The top 4 bits contain an MPIC bhw id that is used to index the
+ * register offsets and some masks when CONFIG_MPIC_WEIRD is set.
+ * Note setting any ID (leaving those bits to 0) means standard MPIC
+ */
+
+/*
+ * This is a secondary ("chained") controller; it only uses the CPU0
+ * registers.  Primary controllers have IPIs and affinity control.
+ */
+#define MPIC_SECONDARY			0x00000001
+
+/* Set this for a big-endian MPIC */
+#define MPIC_BIG_ENDIAN			0x00000002
+/* Broken U3 MPIC */
+#define MPIC_U3_HT_IRQS			0x00000004
+/* Broken IPI registers (autodetected) */
+#define MPIC_BROKEN_IPI			0x00000008
+/* Spurious vector requires EOI */
+#define MPIC_SPV_EOI			0x00000020
+/* No passthrough disable */
+#define MPIC_NO_PTHROU_DIS		0x00000040
+/* DCR based MPIC */
+#define MPIC_USES_DCR			0x00000080
+/* MPIC has 11-bit vector fields (or larger) */
+#define MPIC_LARGE_VECTORS		0x00000100
+/* Enable delivery of prio 15 interrupts as MCK instead of EE */
+#define MPIC_ENABLE_MCK			0x00000200
+/* Disable bias among target selection, spread interrupts evenly */
+#define MPIC_NO_BIAS			0x00000400
+/* Destination only supports a single CPU at a time */
+#define MPIC_SINGLE_DEST_CPU		0x00001000
+/* Enable CoreInt delivery of interrupts */
+#define MPIC_ENABLE_COREINT		0x00002000
+/* Do not reset the MPIC during initialization */
+#define MPIC_NO_RESET			0x00004000
+/* Freescale MPIC (compatible includes "fsl,mpic") */
+#define MPIC_FSL			0x00008000
+/* Freescale MPIC supports EIMR (error interrupt mask register).
+ * This flag is set for MPIC version >= 4.1 (version determined
+ * from the BRR1 register).
+*/
+#define MPIC_FSL_HAS_EIMR		0x00010000
+
+/* MPIC HW modification ID */
+#define MPIC_REGSET_MASK		0xf0000000
+#define MPIC_REGSET(val)		(((val) & 0xf ) << 28)
+#define MPIC_GET_REGSET(flags)		(((flags) >> 28) & 0xf)
+
+#define	MPIC_REGSET_STANDARD		MPIC_REGSET(0)	/* Original MPIC */
+#define	MPIC_REGSET_TSI108		MPIC_REGSET(1)	/* Tsi108/109 PIC */
+
+/* Get the version of primary MPIC */
+#ifdef CONFIG_MPIC
+extern u32 fsl_mpic_primary_get_version(void);
+#else
+static inline u32 fsl_mpic_primary_get_version(void)
+{
+	return 0;
+}
+#endif
+
+/* Allocate the controller structure and setup the linux irq descs
+ * for the range if interrupts passed in. No HW initialization is
+ * actually performed.
+ * 
+ * @phys_addr:	physial base address of the MPIC
+ * @flags:	flags, see constants above
+ * @isu_size:	number of interrupts in an ISU. Use 0 to use a
+ *              standard ISU-less setup (aka powermac)
+ * @irq_offset: first irq number to assign to this mpic
+ * @irq_count:  number of irqs to use with this mpic IRQ sources. Pass 0
+ *	        to match the number of sources
+ * @ipi_offset: first irq number to assign to this mpic IPI sources,
+ *		used only on primary mpic
+ * @senses:	array of sense values
+ * @senses_num: number of entries in the array
+ *
+ * Note about the sense array. If none is passed, all interrupts are
+ * setup to be level negative unless MPIC_U3_HT_IRQS is set in which
+ * case they are edge positive (and the array is ignored anyway).
+ * The values in the array start at the first source of the MPIC,
+ * that is senses[0] correspond to linux irq "irq_offset".
+ */
+extern struct mpic *mpic_alloc(struct device_node *node,
+			       phys_addr_t phys_addr,
+			       unsigned int flags,
+			       unsigned int isu_size,
+			       unsigned int irq_count,
+			       const char *name);
+
+/* Assign ISUs, to call before mpic_init()
+ *
+ * @mpic:	controller structure as returned by mpic_alloc()
+ * @isu_num:	ISU number
+ * @phys_addr:	physical address of the ISU
+ */
+extern void mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
+			    phys_addr_t phys_addr);
+
+
+/* Initialize the controller. After this has been called, none of the above
+ * should be called again for this mpic
+ */
+extern void mpic_init(struct mpic *mpic);
+
+/*
+ * All of the following functions must only be used after the
+ * ISUs have been assigned and the controller fully initialized
+ * with mpic_init()
+ */
+
+
+/* Change the priority of an interrupt. Default is 8 for irqs and
+ * 10 for IPIs. You can call this on both IPIs and IRQ numbers, but the
+ * IPI number is then the offset'ed (linux irq number mapped to the IPI)
+ */
+extern void mpic_irq_set_priority(unsigned int irq, unsigned int pri);
+
+/* Setup a non-boot CPU */
+extern void mpic_setup_this_cpu(void);
+
+/* Clean up for kexec (or cpu offline or ...) */
+extern void mpic_teardown_this_cpu(int secondary);
+
+/* Get the current cpu priority for this cpu (0..15) */
+extern int mpic_cpu_get_priority(void);
+
+/* Set the current cpu priority for this cpu */
+extern void mpic_cpu_set_priority(int prio);
+
+/* Request IPIs on primary mpic */
+void __init mpic_request_ipis(void);
+
+/* Send a message (IPI) to a given target (cpu number or MSG_*) */
+void smp_mpic_message_pass(int target, int msg);
+
+/* Unmask a specific virq */
+extern void mpic_unmask_irq(struct irq_data *d);
+/* Mask a specific virq */
+extern void mpic_mask_irq(struct irq_data *d);
+/* EOI a specific virq */
+extern void mpic_end_irq(struct irq_data *d);
+
+/* Fetch interrupt from a given mpic */
+extern unsigned int mpic_get_one_irq(struct mpic *mpic);
+/* This one gets from the primary mpic */
+extern unsigned int mpic_get_irq(void);
+/* This one gets from the primary mpic via CoreInt*/
+extern unsigned int mpic_get_coreint_irq(void);
+/* Fetch Machine Check interrupt from primary mpic */
+extern unsigned int mpic_get_mcirq(void);
+
+#endif /* __KERNEL__ */
+#endif	/* _ASM_POWERPC_MPIC_H */
diff --git a/arch/powerpc/include/asm/mpic_msgr.h b/arch/powerpc/include/asm/mpic_msgr.h
new file mode 100644
index 0000000000..cd25eeced2
--- /dev/null
+++ b/arch/powerpc/include/asm/mpic_msgr.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2011-2012, Meador Inge, Mentor Graphics Corporation.
+ */
+
+#ifndef _ASM_MPIC_MSGR_H
+#define _ASM_MPIC_MSGR_H
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <asm/smp.h>
+#include <asm/io.h>
+
+struct mpic_msgr {
+	u32 __iomem *base;
+	u32 __iomem *mer;
+	int irq;
+	unsigned char in_use;
+	raw_spinlock_t lock;
+	int num;
+};
+
+/* Get a message register
+ *
+ * @reg_num:	the MPIC message register to get
+ *
+ * A pointer to the message register is returned.  If
+ * the message register asked for is already in use, then
+ * EBUSY is returned.  If the number given is not associated
+ * with an actual message register, then ENODEV is returned.
+ * Successfully getting the register marks it as in use.
+ */
+extern struct mpic_msgr *mpic_msgr_get(unsigned int reg_num);
+
+/* Relinquish a message register
+ *
+ * @msgr:	the message register to return
+ *
+ * Disables the given message register and marks it as free.
+ * After this call has completed successully the message
+ * register is available to be acquired by a call to
+ * mpic_msgr_get.
+ */
+extern void mpic_msgr_put(struct mpic_msgr *msgr);
+
+/* Enable a message register
+ *
+ * @msgr:	the message register to enable
+ *
+ * The given message register is enabled for sending
+ * messages.
+ */
+extern void mpic_msgr_enable(struct mpic_msgr *msgr);
+
+/* Disable a message register
+ *
+ * @msgr:	the message register to disable
+ *
+ * The given message register is disabled for sending
+ * messages.
+ */
+extern void mpic_msgr_disable(struct mpic_msgr *msgr);
+
+/* Write a message to a message register
+ *
+ * @msgr:	the message register to write to
+ * @message:	the message to write
+ *
+ * The given 32-bit message is written to the given message
+ * register.  Writing to an enabled message registers fires
+ * an interrupt.
+ */
+static inline void mpic_msgr_write(struct mpic_msgr *msgr, u32 message)
+{
+	out_be32(msgr->base, message);
+}
+
+/* Read a message from a message register
+ *
+ * @msgr:	the message register to read from
+ *
+ * Returns the 32-bit value currently in the given message register.
+ * Upon reading the register any interrupts for that register are
+ * cleared.
+ */
+static inline u32 mpic_msgr_read(struct mpic_msgr *msgr)
+{
+	return in_be32(msgr->base);
+}
+
+/* Clear a message register
+ *
+ * @msgr:	the message register to clear
+ *
+ * Clears any interrupts associated with the given message register.
+ */
+static inline void mpic_msgr_clear(struct mpic_msgr *msgr)
+{
+	(void) mpic_msgr_read(msgr);
+}
+
+/* Set the destination CPU for the message register
+ *
+ * @msgr:	the message register whose destination is to be set
+ * @cpu_num:	the Linux CPU number to bind the message register to
+ *
+ * Note that the CPU number given is the CPU number used by the kernel
+ * and *not* the actual hardware CPU number.
+ */
+static inline void mpic_msgr_set_destination(struct mpic_msgr *msgr,
+					     u32 cpu_num)
+{
+	out_be32(msgr->base, 1 << get_hard_smp_processor_id(cpu_num));
+}
+
+/* Get the IRQ number for the message register
+ * @msgr:	the message register whose IRQ is to be returned
+ *
+ * Returns the IRQ number associated with the given message register.
+ * 0 is returned if this message register is not capable of receiving
+ * interrupts.  What message register can and cannot receive interrupts is
+ * specified in the device tree for the system.
+ */
+static inline int mpic_msgr_get_irq(struct mpic_msgr *msgr)
+{
+	return msgr->irq;
+}
+
+#endif
diff --git a/arch/powerpc/include/asm/mpic_timer.h b/arch/powerpc/include/asm/mpic_timer.h
new file mode 100644
index 0000000000..d33e4149be
--- /dev/null
+++ b/arch/powerpc/include/asm/mpic_timer.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * arch/powerpc/include/asm/mpic_timer.h
+ *
+ * Header file for Mpic Global Timer
+ *
+ * Copyright 2013 Freescale Semiconductor, Inc.
+ *
+ * Author: Wang Dongsheng <Dongsheng.Wang@freescale.com>
+ *	   Li Yang <leoli@freescale.com>
+ */
+
+#ifndef __MPIC_TIMER__
+#define __MPIC_TIMER__
+
+#include <linux/interrupt.h>
+#include <linux/time.h>
+
+struct mpic_timer {
+	void			*dev;
+	struct cascade_priv	*cascade_handle;
+	unsigned int		num;
+	unsigned int		irq;
+};
+
+#ifdef CONFIG_MPIC_TIMER
+struct mpic_timer *mpic_request_timer(irq_handler_t fn,  void *dev,
+		time64_t time);
+void mpic_start_timer(struct mpic_timer *handle);
+void mpic_stop_timer(struct mpic_timer *handle);
+void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time);
+void mpic_free_timer(struct mpic_timer *handle);
+#else
+struct mpic_timer *mpic_request_timer(irq_handler_t fn,  void *dev,
+		time64_t time) { return NULL; }
+void mpic_start_timer(struct mpic_timer *handle) { }
+void mpic_stop_timer(struct mpic_timer *handle) { }
+void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time) { }
+void mpic_free_timer(struct mpic_timer *handle) { }
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/msi_bitmap.h b/arch/powerpc/include/asm/msi_bitmap.h
new file mode 100644
index 0000000000..55c2f7db9c
--- /dev/null
+++ b/arch/powerpc/include/asm/msi_bitmap.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _POWERPC_SYSDEV_MSI_BITMAP_H
+#define _POWERPC_SYSDEV_MSI_BITMAP_H
+
+/*
+ * Copyright 2008, Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/of.h>
+#include <asm/irq.h>
+
+struct msi_bitmap {
+	struct device_node	*of_node;
+	unsigned long		*bitmap;
+	spinlock_t		lock;
+	unsigned int		irq_count;
+	bool		 	bitmap_from_slab;
+};
+
+int msi_bitmap_alloc_hwirqs(struct msi_bitmap *bmp, int num);
+void msi_bitmap_free_hwirqs(struct msi_bitmap *bmp, unsigned int offset,
+			    unsigned int num);
+void msi_bitmap_reserve_hwirq(struct msi_bitmap *bmp, unsigned int hwirq);
+
+int msi_bitmap_reserve_dt_hwirqs(struct msi_bitmap *bmp);
+
+int msi_bitmap_alloc(struct msi_bitmap *bmp, unsigned int irq_count,
+		     struct device_node *of_node);
+void msi_bitmap_free(struct msi_bitmap *bmp);
+
+#endif /* _POWERPC_SYSDEV_MSI_BITMAP_H */
diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
new file mode 100644
index 0000000000..49a75340c3
--- /dev/null
+++ b/arch/powerpc/include/asm/nmi.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_NMI_H
+#define _ASM_NMI_H
+
+#ifdef CONFIG_PPC_WATCHDOG
+long soft_nmi_interrupt(struct pt_regs *regs);
+void watchdog_hardlockup_set_timeout_pct(u64 pct);
+#else
+static inline void watchdog_hardlockup_set_timeout_pct(u64 pct) {}
+#endif
+
+extern void hv_nmi_check_nonrecoverable(struct pt_regs *regs);
+
+#endif /* _ASM_NMI_H */
diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
new file mode 100644
index 0000000000..92df40c6cc
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H
+#define _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H
+
+#define PAGE_SHIFT_8M		23
+
+static inline pte_t *hugepd_page(hugepd_t hpd)
+{
+	BUG_ON(!hugepd_ok(hpd));
+
+	return (pte_t *)__va(hpd_val(hpd) & ~HUGEPD_SHIFT_MASK);
+}
+
+static inline unsigned int hugepd_shift(hugepd_t hpd)
+{
+	return PAGE_SHIFT_8M;
+}
+
+static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
+				    unsigned int pdshift)
+{
+	unsigned long idx = (addr & (SZ_4M - 1)) >> PAGE_SHIFT;
+
+	return hugepd_page(hpd) + idx;
+}
+
+static inline void flush_hugetlb_page(struct vm_area_struct *vma,
+				      unsigned long vmaddr)
+{
+	flush_tlb_page(vma, vmaddr);
+}
+
+static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift)
+{
+	*hpdp = __hugepd(__pa(new) | _PMD_USER | _PMD_PRESENT | _PMD_PAGE_8M);
+}
+
+static inline void hugepd_populate_kernel(hugepd_t *hpdp, pte_t *new, unsigned int pshift)
+{
+	*hpdp = __hugepd(__pa(new) | _PMD_PRESENT | _PMD_PAGE_8M);
+}
+
+static inline int check_and_get_huge_psize(int shift)
+{
+	return shift_to_mmu_psize(shift);
+}
+
+#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+		     pte_t pte, unsigned long sz);
+
+#define __HAVE_ARCH_HUGE_PTE_CLEAR
+static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+				  pte_t *ptep, unsigned long sz)
+{
+	pte_update(mm, addr, ptep, ~0UL, 0, 1);
+}
+
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	unsigned long clr = ~pte_val(pte_wrprotect(__pte(~0)));
+	unsigned long set = pte_val(pte_wrprotect(__pte(0)));
+
+	pte_update(mm, addr, ptep, clr, set, 1);
+}
+
+#ifdef CONFIG_PPC_4K_PAGES
+static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
+{
+	size_t size = 1UL << shift;
+
+	if (size == SZ_16K)
+		return __pte(pte_val(entry) | _PAGE_SPS);
+	else
+		return __pte(pte_val(entry) | _PAGE_SPS | _PAGE_HUGE);
+}
+#define arch_make_huge_pte arch_make_huge_pte
+#endif
+
+#endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
new file mode 100644
index 0000000000..46bc5925e5
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_KUP_8XX_H_
+#define _ASM_POWERPC_KUP_8XX_H_
+
+#include <asm/bug.h>
+#include <asm/mmu.h>
+
+#ifdef CONFIG_PPC_KUAP
+
+#ifndef __ASSEMBLY__
+
+#include <asm/reg.h>
+
+static __always_inline void __kuap_save_and_lock(struct pt_regs *regs)
+{
+	regs->kuap = mfspr(SPRN_MD_AP);
+	mtspr(SPRN_MD_AP, MD_APG_KUAP);
+}
+#define __kuap_save_and_lock __kuap_save_and_lock
+
+static __always_inline void kuap_user_restore(struct pt_regs *regs)
+{
+}
+
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+{
+	mtspr(SPRN_MD_AP, regs->kuap);
+}
+
+#ifdef CONFIG_PPC_KUAP_DEBUG
+static __always_inline unsigned long __kuap_get_and_assert_locked(void)
+{
+	WARN_ON_ONCE(mfspr(SPRN_MD_AP) >> 16 != MD_APG_KUAP >> 16);
+
+	return 0;
+}
+#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked
+#endif
+
+static __always_inline void uaccess_begin_8xx(unsigned long val)
+{
+	asm(ASM_MMU_FTR_IFSET("mtspr %0, %1", "", %2) : :
+	    "i"(SPRN_MD_AP), "r"(val), "i"(MMU_FTR_KUAP) : "memory");
+}
+
+static __always_inline void uaccess_end_8xx(void)
+{
+	asm(ASM_MMU_FTR_IFSET("mtspr %0, %1", "", %2) : :
+	    "i"(SPRN_MD_AP), "r"(MD_APG_KUAP), "i"(MMU_FTR_KUAP) : "memory");
+}
+
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+					      unsigned long size, unsigned long dir)
+{
+	uaccess_begin_8xx(MD_APG_INIT);
+}
+
+static __always_inline void prevent_user_access(unsigned long dir)
+{
+	uaccess_end_8xx();
+}
+
+static __always_inline unsigned long prevent_user_access_return(void)
+{
+	unsigned long flags;
+
+	flags = mfspr(SPRN_MD_AP);
+
+	uaccess_end_8xx();
+
+	return flags;
+}
+
+static __always_inline void restore_user_access(unsigned long flags)
+{
+	uaccess_begin_8xx(flags);
+}
+
+static __always_inline bool
+__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+	return !((regs->kuap ^ MD_APG_KUAP) & 0xff000000);
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* CONFIG_PPC_KUAP */
+
+#endif /* _ASM_POWERPC_KUP_8XX_H_ */
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-40x.h b/arch/powerpc/include/asm/nohash/32/mmu-40x.h
new file mode 100644
index 0000000000..8a8f13a22c
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/mmu-40x.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_MMU_40X_H_
+#define _ASM_POWERPC_MMU_40X_H_
+
+/*
+ * PPC40x support
+ */
+
+#define PPC40X_TLB_SIZE 64
+
+/*
+ * TLB entries are defined by a "high" tag portion and a "low" data
+ * portion.  On all architectures, the data portion is 32-bits.
+ *
+ * TLB entries are managed entirely under software control by reading,
+ * writing, and searchoing using the 4xx-specific tlbre, tlbwr, and tlbsx
+ * instructions.
+ */
+
+#define	TLB_LO          1
+#define	TLB_HI          0
+
+#define	TLB_DATA        TLB_LO
+#define	TLB_TAG         TLB_HI
+
+/* Tag portion */
+
+#define TLB_EPN_MASK    0xFFFFFC00      /* Effective Page Number */
+#define TLB_PAGESZ_MASK 0x00000380
+#define TLB_PAGESZ(x)   (((x) & 0x7) << 7)
+#define   PAGESZ_1K		0
+#define   PAGESZ_4K             1
+#define   PAGESZ_16K            2
+#define   PAGESZ_64K            3
+#define   PAGESZ_256K           4
+#define   PAGESZ_1M             5
+#define   PAGESZ_4M             6
+#define   PAGESZ_16M            7
+#define TLB_VALID       0x00000040      /* Entry is valid */
+
+/* Data portion */
+
+#define TLB_RPN_MASK    0xFFFFFC00      /* Real Page Number */
+#define TLB_PERM_MASK   0x00000300
+#define TLB_EX          0x00000200      /* Instruction execution allowed */
+#define TLB_WR          0x00000100      /* Writes permitted */
+#define TLB_ZSEL_MASK   0x000000F0
+#define TLB_ZSEL(x)     (((x) & 0xF) << 4)
+#define TLB_ATTR_MASK   0x0000000F
+#define TLB_W           0x00000008      /* Caching is write-through */
+#define TLB_I           0x00000004      /* Caching is inhibited */
+#define TLB_M           0x00000002      /* Memory is coherent */
+#define TLB_G           0x00000001      /* Memory is guarded from prefetch */
+
+#ifndef __ASSEMBLY__
+
+typedef struct {
+	unsigned int	id;
+	unsigned int	active;
+	void __user	*vdso;
+} mm_context_t;
+
+#endif /* !__ASSEMBLY__ */
+
+#define mmu_virtual_psize	MMU_PAGE_4K
+#define mmu_linear_psize	MMU_PAGE_256M
+
+#endif /* _ASM_POWERPC_MMU_40X_H_ */
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-44x.h b/arch/powerpc/include/asm/nohash/32/mmu-44x.h
new file mode 100644
index 0000000000..2d92a39d8f
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/mmu-44x.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_MMU_44X_H_
+#define _ASM_POWERPC_MMU_44X_H_
+/*
+ * PPC440 support
+ */
+
+#include <asm/asm-const.h>
+
+#define PPC44x_MMUCR_TID	0x000000ff
+#define PPC44x_MMUCR_STS	0x00010000
+
+#define	PPC44x_TLB_PAGEID	0
+#define	PPC44x_TLB_XLAT		1
+#define	PPC44x_TLB_ATTRIB	2
+
+/* Page identification fields */
+#define PPC44x_TLB_EPN_MASK	0xfffffc00      /* Effective Page Number */
+#define	PPC44x_TLB_VALID	0x00000200      /* Valid flag */
+#define PPC44x_TLB_TS		0x00000100	/* Translation address space */
+#define PPC44x_TLB_1K		0x00000000	/* Page sizes */
+#define PPC44x_TLB_4K		0x00000010
+#define PPC44x_TLB_16K		0x00000020
+#define PPC44x_TLB_64K		0x00000030
+#define PPC44x_TLB_256K		0x00000040
+#define PPC44x_TLB_1M		0x00000050
+#define PPC44x_TLB_16M		0x00000070
+#define	PPC44x_TLB_256M		0x00000090
+
+/* Translation fields */
+#define PPC44x_TLB_RPN_MASK	0xfffffc00      /* Real Page Number */
+#define	PPC44x_TLB_ERPN_MASK	0x0000000f
+
+/* Storage attribute and access control fields */
+#define PPC44x_TLB_ATTR_MASK	0x0000ff80
+#define PPC44x_TLB_U0		0x00008000      /* User 0 */
+#define PPC44x_TLB_U1		0x00004000      /* User 1 */
+#define PPC44x_TLB_U2		0x00002000      /* User 2 */
+#define PPC44x_TLB_U3		0x00001000      /* User 3 */
+#define PPC44x_TLB_W		0x00000800      /* Caching is write-through */
+#define PPC44x_TLB_I		0x00000400      /* Caching is inhibited */
+#define PPC44x_TLB_M		0x00000200      /* Memory is coherent */
+#define PPC44x_TLB_G		0x00000100      /* Memory is guarded */
+#define PPC44x_TLB_E		0x00000080      /* Memory is little endian */
+
+#define PPC44x_TLB_PERM_MASK	0x0000003f
+#define PPC44x_TLB_UX		0x00000020      /* User execution */
+#define PPC44x_TLB_UW		0x00000010      /* User write */
+#define PPC44x_TLB_UR		0x00000008      /* User read */
+#define PPC44x_TLB_SX		0x00000004      /* Super execution */
+#define PPC44x_TLB_SW		0x00000002      /* Super write */
+#define PPC44x_TLB_SR		0x00000001      /* Super read */
+
+/* Number of TLB entries */
+#define PPC44x_TLB_SIZE		64
+
+/* 47x bits */
+#define PPC47x_MMUCR_TID	0x0000ffff
+#define PPC47x_MMUCR_STS	0x00010000
+
+/* Page identification fields */
+#define PPC47x_TLB0_EPN_MASK	0xfffff000      /* Effective Page Number */
+#define PPC47x_TLB0_VALID	0x00000800      /* Valid flag */
+#define PPC47x_TLB0_TS		0x00000400	/* Translation address space */
+#define PPC47x_TLB0_4K		0x00000000
+#define PPC47x_TLB0_16K		0x00000010
+#define PPC47x_TLB0_64K		0x00000030
+#define PPC47x_TLB0_1M		0x00000070
+#define PPC47x_TLB0_16M		0x000000f0
+#define PPC47x_TLB0_256M	0x000001f0
+#define PPC47x_TLB0_1G		0x000003f0
+#define PPC47x_TLB0_BOLTED_R	0x00000008	/* tlbre only */
+
+/* Translation fields */
+#define PPC47x_TLB1_RPN_MASK	0xfffff000      /* Real Page Number */
+#define PPC47x_TLB1_ERPN_MASK	0x000003ff
+
+/* Storage attribute and access control fields */
+#define PPC47x_TLB2_ATTR_MASK	0x0003ff80
+#define PPC47x_TLB2_IL1I	0x00020000      /* Memory is guarded */
+#define PPC47x_TLB2_IL1D	0x00010000      /* Memory is guarded */
+#define PPC47x_TLB2_U0		0x00008000      /* User 0 */
+#define PPC47x_TLB2_U1		0x00004000      /* User 1 */
+#define PPC47x_TLB2_U2		0x00002000      /* User 2 */
+#define PPC47x_TLB2_U3		0x00001000      /* User 3 */
+#define PPC47x_TLB2_W		0x00000800      /* Caching is write-through */
+#define PPC47x_TLB2_I		0x00000400      /* Caching is inhibited */
+#define PPC47x_TLB2_M		0x00000200      /* Memory is coherent */
+#define PPC47x_TLB2_G		0x00000100      /* Memory is guarded */
+#define PPC47x_TLB2_E		0x00000080      /* Memory is little endian */
+#define PPC47x_TLB2_PERM_MASK	0x0000003f
+#define PPC47x_TLB2_UX		0x00000020      /* User execution */
+#define PPC47x_TLB2_UW		0x00000010      /* User write */
+#define PPC47x_TLB2_UR		0x00000008      /* User read */
+#define PPC47x_TLB2_SX		0x00000004      /* Super execution */
+#define PPC47x_TLB2_SW		0x00000002      /* Super write */
+#define PPC47x_TLB2_SR		0x00000001      /* Super read */
+#define PPC47x_TLB2_U_RWX	(PPC47x_TLB2_UX|PPC47x_TLB2_UW|PPC47x_TLB2_UR)
+#define PPC47x_TLB2_S_RWX	(PPC47x_TLB2_SX|PPC47x_TLB2_SW|PPC47x_TLB2_SR)
+#define PPC47x_TLB2_S_RW	(PPC47x_TLB2_SW | PPC47x_TLB2_SR)
+#define PPC47x_TLB2_IMG		(PPC47x_TLB2_I | PPC47x_TLB2_M | PPC47x_TLB2_G)
+
+#ifndef __ASSEMBLY__
+
+extern unsigned int tlb_44x_hwater;
+extern unsigned int tlb_44x_index;
+
+typedef struct {
+	unsigned int	id;
+	unsigned int	active;
+	void __user	*vdso;
+} mm_context_t;
+
+/* patch sites */
+extern s32 patch__tlb_44x_hwater_D, patch__tlb_44x_hwater_I;
+
+#endif /* !__ASSEMBLY__ */
+
+#ifndef CONFIG_PPC_EARLY_DEBUG_44x
+#define PPC44x_EARLY_TLBS	1
+#else
+#define PPC44x_EARLY_TLBS	2
+#define PPC44x_EARLY_DEBUG_VIRTADDR	(ASM_CONST(0xf0000000) \
+	| (ASM_CONST(CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW) & 0xffff))
+#endif
+
+/* Size of the TLBs used for pinning in lowmem */
+#define PPC_PIN_SIZE	(1 << 28)	/* 256M */
+
+#if defined(CONFIG_PPC_4K_PAGES)
+#define PPC44x_TLBE_SIZE	PPC44x_TLB_4K
+#define PPC47x_TLBE_SIZE	PPC47x_TLB0_4K
+#define mmu_virtual_psize	MMU_PAGE_4K
+#elif defined(CONFIG_PPC_16K_PAGES)
+#define PPC44x_TLBE_SIZE	PPC44x_TLB_16K
+#define PPC47x_TLBE_SIZE	PPC47x_TLB0_16K
+#define mmu_virtual_psize	MMU_PAGE_16K
+#elif defined(CONFIG_PPC_64K_PAGES)
+#define PPC44x_TLBE_SIZE	PPC44x_TLB_64K
+#define PPC47x_TLBE_SIZE	PPC47x_TLB0_64K
+#define mmu_virtual_psize	MMU_PAGE_64K
+#elif defined(CONFIG_PPC_256K_PAGES)
+#define PPC44x_TLBE_SIZE	PPC44x_TLB_256K
+#define mmu_virtual_psize	MMU_PAGE_256K
+#else
+#error "Unsupported PAGE_SIZE"
+#endif
+
+#define mmu_linear_psize	MMU_PAGE_256M
+
+#define PPC44x_PGD_OFF_SHIFT	(32 - PGDIR_SHIFT + PGD_T_LOG2)
+#define PPC44x_PGD_OFF_MASK_BIT	(PGDIR_SHIFT - PGD_T_LOG2)
+#define PPC44x_PTE_ADD_SHIFT	(32 - PGDIR_SHIFT + PTE_SHIFT + PTE_T_LOG2)
+#define PPC44x_PTE_ADD_MASK_BIT	(32 - PTE_T_LOG2 - PTE_SHIFT)
+
+#endif /* _ASM_POWERPC_MMU_44X_H_ */
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
new file mode 100644
index 0000000000..0e93a4728c
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_MMU_8XX_H_
+#define _ASM_POWERPC_MMU_8XX_H_
+/*
+ * PPC8xx support
+ */
+
+/* Control/status registers for the MPC8xx.
+ * A write operation to these registers causes serialized access.
+ * During software tablewalk, the registers used perform mask/shift-add
+ * operations when written/read.  A TLB entry is created when the Mx_RPN
+ * is written, and the contents of several registers are used to
+ * create the entry.
+ */
+#define SPRN_MI_CTR	784	/* Instruction TLB control register */
+#define MI_GPM		0x80000000	/* Set domain manager mode */
+#define MI_PPM		0x40000000	/* Set subpage protection */
+#define MI_CIDEF	0x20000000	/* Set cache inhibit when MMU dis */
+#define MI_RSV4I	0x08000000	/* Reserve 4 TLB entries */
+#define MI_PPCS		0x02000000	/* Use MI_RPN prob/priv state */
+#define MI_IDXMASK	0x00001f00	/* TLB index to be loaded */
+
+/* These are the Ks and Kp from the PowerPC books.  For proper operation,
+ * Ks = 0, Kp = 1.
+ */
+#define SPRN_MI_AP	786
+#define MI_Ks		0x80000000	/* Should not be set */
+#define MI_Kp		0x40000000	/* Should always be set */
+
+/*
+ * All pages' PP data bits are set to either 001 or 011 by copying _PAGE_EXEC
+ * into bit 21 in the ITLBmiss handler (bit 21 is the middle bit), which means
+ * respectively NA for All or X for Supervisor and no access for User.
+ * Then we use the APG to say whether accesses are according to Page rules or
+ * "all Supervisor" rules (Access to all)
+ * _PAGE_ACCESSED is also managed via APG. When _PAGE_ACCESSED is not set, say
+ * "all User" rules, that will lead to NA for all.
+ * Therefore, we define 4 APG groups. lsb is _PAGE_ACCESSED
+ * 0 => Kernel => 11 (all accesses performed according as user iaw page definition)
+ * 1 => Kernel+Accessed => 01 (all accesses performed according to page definition)
+ * 2 => User => 11 (all accesses performed according as user iaw page definition)
+ * 3 => User+Accessed => 10 (all accesses performed according to swaped page definition) for KUEP
+ * 4-15 => Not Used
+ */
+#define MI_APG_INIT	0xde000000
+
+/* The effective page number register.  When read, contains the information
+ * about the last instruction TLB miss.  When MI_RPN is written, bits in
+ * this register are used to create the TLB entry.
+ */
+#define SPRN_MI_EPN	787
+#define MI_EPNMASK	0xfffff000	/* Effective page number for entry */
+#define MI_EVALID	0x00000200	/* Entry is valid */
+#define MI_ASIDMASK	0x0000000f	/* ASID match value */
+					/* Reset value is undefined */
+
+/* A "level 1" or "segment" or whatever you want to call it register.
+ * For the instruction TLB, it contains bits that get loaded into the
+ * TLB entry when the MI_RPN is written.
+ */
+#define SPRN_MI_TWC	789
+#define MI_APG		0x000001e0	/* Access protection group (0) */
+#define MI_GUARDED	0x00000010	/* Guarded storage */
+#define MI_PSMASK	0x0000000c	/* Mask of page size bits */
+#define MI_PS8MEG	0x0000000c	/* 8M page size */
+#define MI_PS512K	0x00000004	/* 512K page size */
+#define MI_PS4K_16K	0x00000000	/* 4K or 16K page size */
+#define MI_SVALID	0x00000001	/* Segment entry is valid */
+					/* Reset value is undefined */
+
+/* Real page number.  Defined by the pte.  Writing this register
+ * causes a TLB entry to be created for the instruction TLB, using
+ * additional information from the MI_EPN, and MI_TWC registers.
+ */
+#define SPRN_MI_RPN	790
+#define MI_SPS16K	0x00000008	/* Small page size (0 = 4k, 1 = 16k) */
+
+/* Define an RPN value for mapping kernel memory to large virtual
+ * pages for boot initialization.  This has real page number of 0,
+ * large page size, shared page, cache enabled, and valid.
+ * Also mark all subpages valid and write access.
+ */
+#define MI_BOOTINIT	0x000001fd
+
+#define SPRN_MD_CTR	792	/* Data TLB control register */
+#define MD_GPM		0x80000000	/* Set domain manager mode */
+#define MD_PPM		0x40000000	/* Set subpage protection */
+#define MD_CIDEF	0x20000000	/* Set cache inhibit when MMU dis */
+#define MD_WTDEF	0x10000000	/* Set writethrough when MMU dis */
+#define MD_RSV4I	0x08000000	/* Reserve 4 TLB entries */
+#define MD_TWAM		0x04000000	/* Use 4K page hardware assist */
+#define MD_PPCS		0x02000000	/* Use MI_RPN prob/priv state */
+#define MD_IDXMASK	0x00001f00	/* TLB index to be loaded */
+
+#define SPRN_M_CASID	793	/* Address space ID (context) to match */
+#define MC_ASIDMASK	0x0000000f	/* Bits used for ASID value */
+
+
+/* These are the Ks and Kp from the PowerPC books.  For proper operation,
+ * Ks = 0, Kp = 1.
+ */
+#define SPRN_MD_AP	794
+#define MD_Ks		0x80000000	/* Should not be set */
+#define MD_Kp		0x40000000	/* Should always be set */
+
+/* See explanation above at the definition of MI_APG_INIT */
+#define MD_APG_INIT	0xdc000000
+#define MD_APG_KUAP	0xde000000
+
+/* The effective page number register.  When read, contains the information
+ * about the last instruction TLB miss.  When MD_RPN is written, bits in
+ * this register are used to create the TLB entry.
+ */
+#define SPRN_MD_EPN	795
+#define MD_EPNMASK	0xfffff000	/* Effective page number for entry */
+#define MD_EVALID	0x00000200	/* Entry is valid */
+#define MD_ASIDMASK	0x0000000f	/* ASID match value */
+					/* Reset value is undefined */
+
+/* The pointer to the base address of the first level page table.
+ * During a software tablewalk, reading this register provides the address
+ * of the entry associated with MD_EPN.
+ */
+#define SPRN_M_TWB	796
+#define	M_L1TB		0xfffff000	/* Level 1 table base address */
+#define M_L1INDX	0x00000ffc	/* Level 1 index, when read */
+					/* Reset value is undefined */
+
+/* A "level 1" or "segment" or whatever you want to call it register.
+ * For the data TLB, it contains bits that get loaded into the TLB entry
+ * when the MD_RPN is written.  It is also provides the hardware assist
+ * for finding the PTE address during software tablewalk.
+ */
+#define SPRN_MD_TWC	797
+#define MD_L2TB		0xfffff000	/* Level 2 table base address */
+#define MD_L2INDX	0xfffffe00	/* Level 2 index (*pte), when read */
+#define MD_APG		0x000001e0	/* Access protection group (0) */
+#define MD_GUARDED	0x00000010	/* Guarded storage */
+#define MD_PSMASK	0x0000000c	/* Mask of page size bits */
+#define MD_PS8MEG	0x0000000c	/* 8M page size */
+#define MD_PS512K	0x00000004	/* 512K page size */
+#define MD_PS4K_16K	0x00000000	/* 4K or 16K page size */
+#define MD_WT		0x00000002	/* Use writethrough page attribute */
+#define MD_SVALID	0x00000001	/* Segment entry is valid */
+					/* Reset value is undefined */
+
+
+/* Real page number.  Defined by the pte.  Writing this register
+ * causes a TLB entry to be created for the data TLB, using
+ * additional information from the MD_EPN, and MD_TWC registers.
+ */
+#define SPRN_MD_RPN	798
+#define MD_SPS16K	0x00000008	/* Small page size (0 = 4k, 1 = 16k) */
+
+/* This is a temporary storage register that could be used to save
+ * a processor working register during a tablewalk.
+ */
+#define SPRN_M_TW	799
+
+#if defined(CONFIG_PPC_4K_PAGES)
+#define mmu_virtual_psize	MMU_PAGE_4K
+#elif defined(CONFIG_PPC_16K_PAGES)
+#define mmu_virtual_psize	MMU_PAGE_16K
+#define PTE_FRAG_NR		4
+#define PTE_FRAG_SIZE_SHIFT	12
+#define PTE_FRAG_SIZE		(1UL << 12)
+#else
+#error "Unsupported PAGE_SIZE"
+#endif
+
+#define mmu_linear_psize	MMU_PAGE_8M
+
+#define MODULES_VADDR	(PAGE_OFFSET - SZ_256M)
+#define MODULES_END	PAGE_OFFSET
+
+#ifndef __ASSEMBLY__
+
+#include <linux/mmdebug.h>
+#include <linux/sizes.h>
+
+void mmu_pin_tlb(unsigned long top, bool readonly);
+
+typedef struct {
+	unsigned int id;
+	unsigned int active;
+	void __user *vdso;
+	void *pte_frag;
+} mm_context_t;
+
+#define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
+#define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE))
+
+/* Page size definitions, common between 32 and 64-bit
+ *
+ *    shift : is the "PAGE_SHIFT" value for that page size
+ *    penc  : is the pte encoding mask
+ *
+ */
+struct mmu_psize_def {
+	unsigned int	shift;	/* number of bits */
+	unsigned int	enc;	/* PTE encoding */
+	unsigned int    ind;    /* Corresponding indirect page size shift */
+	unsigned int	flags;
+#define MMU_PAGE_SIZE_DIRECT	0x1	/* Supported as a direct size */
+#define MMU_PAGE_SIZE_INDIRECT	0x2	/* Supported as an indirect size */
+};
+
+extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+
+static inline int shift_to_mmu_psize(unsigned int shift)
+{
+	int psize;
+
+	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize)
+		if (mmu_psize_defs[psize].shift == shift)
+			return psize;
+	return -1;
+}
+
+static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
+{
+	if (mmu_psize_defs[mmu_psize].shift)
+		return mmu_psize_defs[mmu_psize].shift;
+	BUG();
+}
+
+static inline bool arch_vmap_try_size(unsigned long addr, unsigned long end, u64 pfn,
+				      unsigned int max_page_shift, unsigned long size)
+{
+	if (end - addr < size)
+		return false;
+
+	if ((1UL << max_page_shift) < size)
+		return false;
+
+	if (!IS_ALIGNED(addr, size))
+		return false;
+
+	if (!IS_ALIGNED(PFN_PHYS(pfn), size))
+		return false;
+
+	return true;
+}
+
+static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end,
+							 u64 pfn, unsigned int max_page_shift)
+{
+	if (arch_vmap_try_size(addr, end, pfn, max_page_shift, SZ_512K))
+		return SZ_512K;
+	if (PAGE_SIZE == SZ_16K)
+		return SZ_16K;
+	if (arch_vmap_try_size(addr, end, pfn, max_page_shift, SZ_16K))
+		return SZ_16K;
+	return PAGE_SIZE;
+}
+#define arch_vmap_pte_range_map_size arch_vmap_pte_range_map_size
+
+static inline int arch_vmap_pte_supported_shift(unsigned long size)
+{
+	if (size >= SZ_512K)
+		return 19;
+	else if (size >= SZ_16K)
+		return 14;
+	else
+		return PAGE_SHIFT;
+}
+#define arch_vmap_pte_supported_shift arch_vmap_pte_supported_shift
+
+/* patch sites */
+extern s32 patch__itlbmiss_exit_1, patch__dtlbmiss_exit_1;
+extern s32 patch__itlbmiss_perf, patch__dtlbmiss_perf;
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_MMU_8XX_H_ */
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
new file mode 100644
index 0000000000..11eac371e7
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_PGALLOC_32_H
+#define _ASM_POWERPC_PGALLOC_32_H
+
+#include <linux/threads.h>
+#include <linux/slab.h>
+
+/*
+ * We don't have any real pmd's, and this code never triggers because
+ * the pgd will always be present..
+ */
+/* #define pmd_alloc_one(mm,address)       ({ BUG(); ((pmd_t *)2); }) */
+#define pmd_free(mm, x) 		do { } while (0)
+#define __pmd_free_tlb(tlb,x,a)		do { } while (0)
+/* #define pgd_populate(mm, pmd, pte)      BUG() */
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
+				       pte_t *pte)
+{
+	if (IS_ENABLED(CONFIG_BOOKE))
+		*pmdp = __pmd((unsigned long)pte | _PMD_PRESENT);
+	else
+		*pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
+				pgtable_t pte_page)
+{
+	if (IS_ENABLED(CONFIG_BOOKE))
+		*pmdp = __pmd((unsigned long)pte_page | _PMD_PRESENT);
+	else
+		*pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT);
+}
+
+#endif /* _ASM_POWERPC_PGALLOC_32_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
new file mode 100644
index 0000000000..f99c53a5f1
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -0,0 +1,389 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H
+#define _ASM_POWERPC_NOHASH_32_PGTABLE_H
+
+#include <asm-generic/pgtable-nopmd.h>
+
+#ifndef __ASSEMBLY__
+#include <linux/sched.h>
+#include <linux/threads.h>
+#include <asm/mmu.h>			/* For sub-arch specific PPC_PIN_SIZE */
+
+#ifdef CONFIG_44x
+extern int icache_44x_need_flush;
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#define PTE_INDEX_SIZE	PTE_SHIFT
+#define PMD_INDEX_SIZE	0
+#define PUD_INDEX_SIZE	0
+#define PGD_INDEX_SIZE	(32 - PGDIR_SHIFT)
+
+#define PMD_CACHE_INDEX	PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX	PUD_INDEX_SIZE
+
+#ifndef __ASSEMBLY__
+#define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_INDEX_SIZE)
+#define PMD_TABLE_SIZE	0
+#define PUD_TABLE_SIZE	0
+#define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
+
+#define PMD_MASKED_BITS (PTE_TABLE_SIZE - 1)
+#endif	/* __ASSEMBLY__ */
+
+#define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
+#define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
+
+/*
+ * The normal case is that PTEs are 32-bits and we have a 1-page
+ * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages.  -- paulus
+ *
+ * For any >32-bit physical address platform, we can use the following
+ * two level page table layout where the pgdir is 8KB and the MS 13 bits
+ * are an index to the second level table.  The combined pgdir/pmd first
+ * level has 2048 entries and the second level has 512 64-bit PTE entries.
+ * -Matt
+ */
+/* PGDIR_SHIFT determines what a top-level page table entry can map */
+#define PGDIR_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+/* Bits to mask out from a PGD to get to the PUD page */
+#define PGD_MASKED_BITS		0
+
+#define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
+
+#define pte_ERROR(e) \
+	pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \
+		(unsigned long long)pte_val(e))
+#define pgd_ERROR(e) \
+	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+#ifndef __ASSEMBLY__
+
+int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
+void unmap_kernel_page(unsigned long va);
+
+#endif /* !__ASSEMBLY__ */
+
+
+/*
+ * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
+ * value (for now) on others, from where we can start layout kernel
+ * virtual space that goes below PKMAP and FIXMAP
+ */
+#include <asm/fixmap.h>
+
+/*
+ * ioremap_bot starts at that address. Early ioremaps move down from there,
+ * until mem_init() at which point this becomes the top of the vmalloc
+ * and ioremap space
+ */
+#ifdef CONFIG_HIGHMEM
+#define IOREMAP_TOP	PKMAP_BASE
+#else
+#define IOREMAP_TOP	FIXADDR_START
+#endif
+
+/* PPC32 shares vmalloc area with ioremap */
+#define IOREMAP_START	VMALLOC_START
+#define IOREMAP_END	VMALLOC_END
+
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 16MB value just means that there will be a 64MB "hole" after the
+ * physical memory until the kernel virtual memory starts.  That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ *
+ * We no longer map larger than phys RAM with the BATs so we don't have
+ * to worry about the VMALLOC_OFFSET causing problems.  We do have to worry
+ * about clashes between our early calls to ioremap() that start growing down
+ * from IOREMAP_TOP being run into the VM area allocations (growing upwards
+ * from VMALLOC_START).  For this reason we have ioremap_bot to check when
+ * we actually run into our mappings setup in the early boot with the VM
+ * system.  This really does become a problem for machines with good amounts
+ * of RAM.  -- Cort
+ */
+#define VMALLOC_OFFSET (0x1000000) /* 16M */
+#ifdef PPC_PIN_SIZE
+#define VMALLOC_START (((ALIGN((long)high_memory, PPC_PIN_SIZE) + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
+#else
+#define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
+#endif
+
+#ifdef CONFIG_KASAN_VMALLOC
+#define VMALLOC_END	ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
+#else
+#define VMALLOC_END	ioremap_bot
+#endif
+
+/*
+ * Bits in a linux-style PTE.  These match the bits in the
+ * (hardware-defined) PowerPC PTE as closely as possible.
+ */
+
+#if defined(CONFIG_40x)
+#include <asm/nohash/32/pte-40x.h>
+#elif defined(CONFIG_44x)
+#include <asm/nohash/32/pte-44x.h>
+#elif defined(CONFIG_PPC_85xx) && defined(CONFIG_PTE_64BIT)
+#include <asm/nohash/pte-e500.h>
+#elif defined(CONFIG_PPC_85xx)
+#include <asm/nohash/32/pte-85xx.h>
+#elif defined(CONFIG_PPC_8xx)
+#include <asm/nohash/32/pte-8xx.h>
+#endif
+
+/*
+ * Location of the PFN in the PTE. Most 32-bit platforms use the same
+ * as _PAGE_SHIFT here (ie, naturally aligned).
+ * Platform who don't just pre-define the value so we don't override it here.
+ */
+#ifndef PTE_RPN_SHIFT
+#define PTE_RPN_SHIFT	(PAGE_SHIFT)
+#endif
+
+/*
+ * The mask covered by the RPN must be a ULL on 32-bit platforms with
+ * 64-bit PTEs.
+ */
+#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
+#define PTE_RPN_MASK	(~((1ULL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 36
+#else
+#define PTE_RPN_MASK	(~((1UL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
+#endif
+
+/*
+ * _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes.
+ */
+#define _PAGE_CHG_MASK	(PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPECIAL)
+
+#ifndef __ASSEMBLY__
+
+#define pte_clear(mm, addr, ptep) \
+	do { pte_update(mm, addr, ptep, ~0, 0, 0); } while (0)
+
+#ifndef pte_mkwrite_novma
+static inline pte_t pte_mkwrite_novma(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_RW);
+}
+#endif
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+#ifndef pte_wrprotect
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_RW);
+}
+#endif
+
+#ifndef pte_mkexec
+static inline pte_t pte_mkexec(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_EXEC);
+}
+#endif
+
+#define pmd_none(pmd)		(!pmd_val(pmd))
+#define	pmd_bad(pmd)		(pmd_val(pmd) & _PMD_BAD)
+#define	pmd_present(pmd)	(pmd_val(pmd) & _PMD_PRESENT_MASK)
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	*pmdp = __pmd(0);
+}
+
+/*
+ * PTE updates. This function is called whenever an existing
+ * valid PTE is updated. This does -not- include set_pte_at()
+ * which nowadays only sets a new PTE.
+ *
+ * Depending on the type of MMU, we may need to use atomic updates
+ * and the PTE may be either 32 or 64 bit wide. In the later case,
+ * when using atomic updates, only the low part of the PTE is
+ * accessed atomically.
+ *
+ * In addition, on 44x, we also maintain a global flag indicating
+ * that an executable user mapping was modified, which is needed
+ * to properly flush the virtually tagged instruction cache of
+ * those implementations.
+ *
+ * On the 8xx, the page tables are a bit special. For 16k pages, we have
+ * 4 identical entries. For 512k pages, we have 128 entries as if it was
+ * 4k pages, but they are flagged as 512k pages for the hardware.
+ * For other page sizes, we have a single entry in the table.
+ */
+#ifdef CONFIG_PPC_8xx
+static pmd_t *pmd_off(struct mm_struct *mm, unsigned long addr);
+static int hugepd_ok(hugepd_t hpd);
+
+static int number_of_cells_per_pte(pmd_t *pmd, pte_basic_t val, int huge)
+{
+	if (!huge)
+		return PAGE_SIZE / SZ_4K;
+	else if (hugepd_ok(*((hugepd_t *)pmd)))
+		return 1;
+	else if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !(val & _PAGE_HUGE))
+		return SZ_16K / SZ_4K;
+	else
+		return SZ_512K / SZ_4K;
+}
+
+static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p,
+				     unsigned long clr, unsigned long set, int huge)
+{
+	pte_basic_t *entry = (pte_basic_t *)p;
+	pte_basic_t old = pte_val(*p);
+	pte_basic_t new = (old & ~(pte_basic_t)clr) | set;
+	int num, i;
+	pmd_t *pmd = pmd_off(mm, addr);
+
+	num = number_of_cells_per_pte(pmd, new, huge);
+
+	for (i = 0; i < num; i += PAGE_SIZE / SZ_4K, new += PAGE_SIZE) {
+		*entry++ = new;
+		if (IS_ENABLED(CONFIG_PPC_16K_PAGES) && num != 1) {
+			*entry++ = new;
+			*entry++ = new;
+			*entry++ = new;
+		}
+	}
+
+	return old;
+}
+
+#ifdef CONFIG_PPC_16K_PAGES
+#define ptep_get ptep_get
+static inline pte_t ptep_get(pte_t *ptep)
+{
+	pte_basic_t val = READ_ONCE(ptep->pte);
+	pte_t pte = {val, val, val, val};
+
+	return pte;
+}
+#endif /* CONFIG_PPC_16K_PAGES */
+
+#else
+static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p,
+				     unsigned long clr, unsigned long set, int huge)
+{
+	pte_basic_t old = pte_val(*p);
+	pte_basic_t new = (old & ~(pte_basic_t)clr) | set;
+
+	*p = __pte(new);
+
+#ifdef CONFIG_44x
+	if ((old & _PAGE_USER) && (old & _PAGE_EXEC))
+		icache_44x_need_flush = 1;
+#endif
+	return old;
+}
+#endif
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
+					      unsigned long addr, pte_t *ptep)
+{
+	unsigned long old;
+	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
+	return (old & _PAGE_ACCESSED) != 0;
+}
+#define ptep_test_and_clear_young(__vma, __addr, __ptep) \
+	__ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep)
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+				       pte_t *ptep)
+{
+	return __pte(pte_update(mm, addr, ptep, ~0, 0, 0));
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+#ifndef ptep_set_wrprotect
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pte_t *ptep)
+{
+	pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
+}
+#endif
+
+#ifndef __ptep_set_access_flags
+static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
+					   pte_t *ptep, pte_t entry,
+					   unsigned long address,
+					   int psize)
+{
+	unsigned long set = pte_val(entry) &
+			    (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
+	int huge = psize > mmu_virtual_psize ? 1 : 0;
+
+	pte_update(vma->vm_mm, address, ptep, 0, set, huge);
+
+	flush_tlb_page(vma, address);
+}
+#endif
+
+static inline int pte_young(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_ACCESSED;
+}
+
+/*
+ * Note that on Book E processors, the pmd contains the kernel virtual
+ * (lowmem) address of the pte page.  The physical address is less useful
+ * because everything runs with translation enabled (even the TLB miss
+ * handler).  On everything else the pmd contains the physical address
+ * of the pte page.  -- paulus
+ */
+#ifndef CONFIG_BOOKE
+#define pmd_pfn(pmd)		(pmd_val(pmd) >> PAGE_SHIFT)
+#else
+#define pmd_page_vaddr(pmd)	\
+	((const void *)(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1)))
+#define pmd_pfn(pmd)		(__pa(pmd_val(pmd)) >> PAGE_SHIFT)
+#endif
+
+#define pmd_page(pmd)		pfn_to_page(pmd_pfn(pmd))
+
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs (32bit PTEs):
+ *
+ *                         1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
+ *   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *   <------------------ offset -------------------> < type -> E 0 0
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ *
+ * For 64bit PTEs, the offset is extended by 32bit.
+ */
+#define __swp_type(entry)		((entry).val & 0x1f)
+#define __swp_offset(entry)		((entry).val >> 5)
+#define __swp_entry(type, offset)	((swp_entry_t) { ((type) & 0x1f) | ((offset) << 5) })
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) >> 3 })
+#define __swp_entry_to_pte(x)		((pte_t) { (x).val << 3 })
+
+/* We borrow LSB 2 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE	0x000004
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_POWERPC_NOHASH_32_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-40x.h b/arch/powerpc/include/asm/nohash/32/pte-40x.h
new file mode 100644
index 0000000000..0b4e5f8ce3
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/pte-40x.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_32_PTE_40x_H
+#define _ASM_POWERPC_NOHASH_32_PTE_40x_H
+#ifdef __KERNEL__
+
+/*
+ * At present, all PowerPC 400-class processors share a similar TLB
+ * architecture. The instruction and data sides share a unified,
+ * 64-entry, fully-associative TLB which is maintained totally under
+ * software control. In addition, the instruction side has a
+ * hardware-managed, 4-entry, fully-associative TLB which serves as a
+ * first level to the shared TLB. These two TLBs are known as the UTLB
+ * and ITLB, respectively (see "mmu.h" for definitions).
+ *
+ * There are several potential gotchas here.  The 40x hardware TLBLO
+ * field looks like this:
+ *
+ * 0  1  2  3  4  ... 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+ * RPN.....................  0  0 EX WR ZSEL.......  W  I  M  G
+ *
+ * Where possible we make the Linux PTE bits match up with this
+ *
+ * - bits 20 and 21 must be cleared, because we use 4k pages (40x can
+ *   support down to 1k pages), this is done in the TLBMiss exception
+ *   handler.
+ * - We use only zones 0 (for kernel pages) and 1 (for user pages)
+ *   of the 16 available.  Bit 24-26 of the TLB are cleared in the TLB
+ *   miss handler.  Bit 27 is PAGE_USER, thus selecting the correct
+ *   zone.
+ * - PRESENT *must* be in the bottom two bits because swap PTEs
+ *   use the top 30 bits.  Because 40x doesn't support SMP anyway, M is
+ *   irrelevant so we borrow it for PAGE_PRESENT.  Bit 30
+ *   is cleared in the TLB miss handler before the TLB entry is loaded.
+ * - All other bits of the PTE are loaded into TLBLO without
+ *   modification, leaving us only the bits 20, 21, 24, 25, 26, 30 for
+ *   software PTE bits.  We actually use bits 21, 24, 25, and
+ *   30 respectively for the software bits: ACCESSED, DIRTY, RW, and
+ *   PRESENT.
+ */
+
+#define	_PAGE_GUARDED	0x001	/* G: page is guarded from prefetch */
+#define _PAGE_PRESENT	0x002	/* software: PTE contains a translation */
+#define	_PAGE_NO_CACHE	0x004	/* I: caching is inhibited */
+#define	_PAGE_WRITETHRU	0x008	/* W: caching is write-through */
+#define	_PAGE_USER	0x010	/* matches one of the zone permission bits */
+#define	_PAGE_SPECIAL	0x020	/* software: Special page */
+#define	_PAGE_DIRTY	0x080	/* software: dirty page */
+#define _PAGE_RW	0x100	/* hardware: WR, anded with dirty in exception */
+#define _PAGE_EXEC	0x200	/* hardware: EX permission */
+#define _PAGE_ACCESSED	0x400	/* software: R: page referenced */
+
+/* No page size encoding in the linux PTE */
+#define _PAGE_PSIZE		0
+
+/* cache related flags non existing on 40x */
+#define _PAGE_COHERENT	0
+
+#define _PAGE_KERNEL_RO		0
+#define _PAGE_KERNEL_ROX	_PAGE_EXEC
+#define _PAGE_KERNEL_RW		(_PAGE_DIRTY | _PAGE_RW)
+#define _PAGE_KERNEL_RWX	(_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
+
+#define _PMD_PRESENT	0x400	/* PMD points to page of PTEs */
+#define _PMD_PRESENT_MASK	_PMD_PRESENT
+#define _PMD_BAD	0x802
+#define _PMD_SIZE_4M	0x0c0
+#define _PMD_SIZE_16M	0x0e0
+#define _PMD_USER	0
+
+#define _PTE_NONE_MASK	0
+
+#define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED)
+#define _PAGE_BASE	(_PAGE_BASE_NC)
+
+/* Permission masks used to generate the __P and __S table */
+#define PAGE_NONE	__pgprot(_PAGE_BASE)
+#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+
+#endif /* __KERNEL__ */
+#endif /*  _ASM_POWERPC_NOHASH_32_PTE_40x_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-44x.h b/arch/powerpc/include/asm/nohash/32/pte-44x.h
new file mode 100644
index 0000000000..b7ed13cee1
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/pte-44x.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_32_PTE_44x_H
+#define _ASM_POWERPC_NOHASH_32_PTE_44x_H
+#ifdef __KERNEL__
+
+/*
+ * Definitions for PPC440
+ *
+ * Because of the 3 word TLB entries to support 36-bit addressing,
+ * the attribute are difficult to map in such a fashion that they
+ * are easily loaded during exception processing.  I decided to
+ * organize the entry so the ERPN is the only portion in the
+ * upper word of the PTE and the attribute bits below are packed
+ * in as sensibly as they can be in the area below a 4KB page size
+ * oriented RPN.  This at least makes it easy to load the RPN and
+ * ERPN fields in the TLB. -Matt
+ *
+ * This isn't entirely true anymore, at least some bits are now
+ * easier to move into the TLB from the PTE. -BenH.
+ *
+ * Note that these bits preclude future use of a page size
+ * less than 4KB.
+ *
+ *
+ * PPC 440 core has following TLB attribute fields;
+ *
+ *   TLB1:
+ *   0  1  2  3  4  ... 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+ *   RPN.................................  -  -  -  -  -  - ERPN.......
+ *
+ *   TLB2:
+ *   0  1  2  3  4  ... 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+ *   -  -  -  -  -    - U0 U1 U2 U3 W  I  M  G  E   - UX UW UR SX SW SR
+ *
+ * Newer 440 cores (440x6 as used on AMCC 460EX/460GT) have additional
+ * TLB2 storage attribute fields. Those are:
+ *
+ *   TLB2:
+ *   0...10    11   12   13   14   15   16...31
+ *   no change WL1  IL1I IL1D IL2I IL2D no change
+ *
+ * There are some constrains and options, to decide mapping software bits
+ * into TLB entry.
+ *
+ *   - PRESENT *must* be in the bottom three bits because swap cache
+ *     entries use the top 29 bits for TLB2.
+ *
+ *   - CACHE COHERENT bit (M) has no effect on original PPC440 cores,
+ *     because it doesn't support SMP. However, some later 460 variants
+ *     have -some- form of SMP support and so I keep the bit there for
+ *     future use
+ *
+ * With the PPC 44x Linux implementation, the 0-11th LSBs of the PTE are used
+ * for memory protection related functions (see PTE structure in
+ * include/asm-ppc/mmu.h).  The _PAGE_XXX definitions in this file map to the
+ * above bits.  Note that the bit values are CPU specific, not architecture
+ * specific.
+ *
+ * The kernel PTE entry can be an ordinary PTE mapping a page or a special swap
+ * PTE. In case of a swap PTE, LSB 2-24 are used to store information regarding
+ * the swap entry. However LSB 0-1 still hold protection values, for example,
+ * to distinguish swap PTEs from ordinary PTEs, and must be used with care.
+ */
+
+#define _PAGE_PRESENT	0x00000001		/* S: PTE valid */
+#define _PAGE_RW	0x00000002		/* S: Write permission */
+#define _PAGE_EXEC	0x00000004		/* H: Execute permission */
+#define _PAGE_ACCESSED	0x00000008		/* S: Page referenced */
+#define _PAGE_DIRTY	0x00000010		/* S: Page dirty */
+#define _PAGE_SPECIAL	0x00000020		/* S: Special page */
+#define _PAGE_USER	0x00000040		/* S: User page */
+#define _PAGE_ENDIAN	0x00000080		/* H: E bit */
+#define _PAGE_GUARDED	0x00000100		/* H: G bit */
+#define _PAGE_COHERENT	0x00000200		/* H: M bit */
+#define _PAGE_NO_CACHE	0x00000400		/* H: I bit */
+#define _PAGE_WRITETHRU	0x00000800		/* H: W bit */
+
+/* No page size encoding in the linux PTE */
+#define _PAGE_PSIZE		0
+
+#define _PAGE_KERNEL_RO		0
+#define _PAGE_KERNEL_ROX	_PAGE_EXEC
+#define _PAGE_KERNEL_RW		(_PAGE_DIRTY | _PAGE_RW)
+#define _PAGE_KERNEL_RWX	(_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
+
+/* TODO: Add large page lowmem mapping support */
+#define _PMD_PRESENT	0
+#define _PMD_PRESENT_MASK (PAGE_MASK)
+#define _PMD_BAD	(~PAGE_MASK)
+#define _PMD_USER	0
+
+/* ERPN in a PTE never gets cleared, ignore it */
+#define _PTE_NONE_MASK	0xffffffff00000000ULL
+
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED)
+#if defined(CONFIG_SMP)
+#define _PAGE_BASE	(_PAGE_BASE_NC | _PAGE_COHERENT)
+#else
+#define _PAGE_BASE	(_PAGE_BASE_NC)
+#endif
+
+/* Permission masks used to generate the __P and __S table */
+#define PAGE_NONE	__pgprot(_PAGE_BASE)
+#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+
+#endif /* __KERNEL__ */
+#endif /*  _ASM_POWERPC_NOHASH_32_PTE_44x_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-85xx.h b/arch/powerpc/include/asm/nohash/32/pte-85xx.h
new file mode 100644
index 0000000000..16451df5dd
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/pte-85xx.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_32_PTE_85xx_H
+#define _ASM_POWERPC_NOHASH_32_PTE_85xx_H
+#ifdef __KERNEL__
+
+/* PTE bit definitions for Freescale BookE SW loaded TLB MMU based
+ * processors
+ *
+   MMU Assist Register 3:
+
+   32 33 34 35 36  ... 50 51 52 53 54 55 56 57 58 59 60 61 62 63
+   RPN......................  0  0 U0 U1 U2 U3 UX SX UW SW UR SR
+
+   - PRESENT *must* be in the bottom two bits because swap PTEs use
+     the top 30 bits.
+
+*/
+
+/* Definitions for FSL Book-E Cores */
+#define _PAGE_PRESENT	0x00001	/* S: PTE contains a translation */
+#define _PAGE_USER	0x00002	/* S: User page (maps to UR) */
+#define _PAGE_RW	0x00004	/* S: Write permission (SW) */
+#define _PAGE_DIRTY	0x00008	/* S: Page dirty */
+#define _PAGE_EXEC	0x00010	/* H: SX permission */
+#define _PAGE_ACCESSED	0x00020	/* S: Page referenced */
+
+#define _PAGE_ENDIAN	0x00040	/* H: E bit */
+#define _PAGE_GUARDED	0x00080	/* H: G bit */
+#define _PAGE_COHERENT	0x00100	/* H: M bit */
+#define _PAGE_NO_CACHE	0x00200	/* H: I bit */
+#define _PAGE_WRITETHRU	0x00400	/* H: W bit */
+#define _PAGE_SPECIAL	0x00800 /* S: Special page */
+
+#define _PAGE_KERNEL_RO		0
+#define _PAGE_KERNEL_ROX	_PAGE_EXEC
+#define _PAGE_KERNEL_RW		(_PAGE_DIRTY | _PAGE_RW)
+#define _PAGE_KERNEL_RWX	(_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
+
+/* No page size encoding in the linux PTE */
+#define _PAGE_PSIZE		0
+
+#define _PMD_PRESENT	0
+#define _PMD_PRESENT_MASK (PAGE_MASK)
+#define _PMD_BAD	(~PAGE_MASK)
+#define _PMD_USER	0
+
+#define _PTE_NONE_MASK	0
+
+#define PTE_WIMGE_SHIFT (6)
+
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED)
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
+#define _PAGE_BASE	(_PAGE_BASE_NC | _PAGE_COHERENT)
+#else
+#define _PAGE_BASE	(_PAGE_BASE_NC)
+#endif
+
+/* Permission masks used to generate the __P and __S table */
+#define PAGE_NONE	__pgprot(_PAGE_BASE)
+#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+
+#endif /* __KERNEL__ */
+#endif /*  _ASM_POWERPC_NOHASH_32_PTE_FSL_85xx_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
new file mode 100644
index 0000000000..e6fe1d5731
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_32_PTE_8xx_H
+#define _ASM_POWERPC_NOHASH_32_PTE_8xx_H
+#ifdef __KERNEL__
+
+/*
+ * The PowerPC MPC8xx uses a TLB with hardware assisted, software tablewalk.
+ * We also use the two level tables, but we can put the real bits in them
+ * needed for the TLB and tablewalk.  These definitions require Mx_CTR.PPM = 0,
+ * Mx_CTR.PPCS = 0, and MD_CTR.TWAM = 1.  The level 2 descriptor has
+ * additional page protection (when Mx_CTR.PPCS = 1) that allows TLB hit
+ * based upon user/super access.  The TLB does not have accessed nor write
+ * protect.  We assume that if the TLB get loaded with an entry it is
+ * accessed, and overload the changed bit for write protect.  We use
+ * two bits in the software pte that are supposed to be set to zero in
+ * the TLB entry (24 and 25) for these indicators.  Although the level 1
+ * descriptor contains the guarded and writethrough/copyback bits, we can
+ * set these at the page level since they get copied from the Mx_TWC
+ * register when the TLB entry is loaded.  We will use bit 27 for guard, since
+ * that is where it exists in the MD_TWC, and bit 26 for writethrough.
+ * These will get masked from the level 2 descriptor at TLB load time, and
+ * copied to the MD_TWC before it gets loaded.
+ * Large page sizes added.  We currently support two sizes, 4K and 8M.
+ * This also allows a TLB hander optimization because we can directly
+ * load the PMD into MD_TWC.  The 8M pages are only used for kernel
+ * mapping of well known areas.  The PMD (PGD) entries contain control
+ * flags in addition to the address, so care must be taken that the
+ * software no longer assumes these are only pointers.
+ */
+
+/* Definitions for 8xx embedded chips. */
+#define _PAGE_PRESENT	0x0001	/* V: Page is valid */
+#define _PAGE_NO_CACHE	0x0002	/* CI: cache inhibit */
+#define _PAGE_SH	0x0004	/* SH: No ASID (context) compare */
+#define _PAGE_SPS	0x0008	/* SPS: Small Page Size (1 if 16k, 512k or 8M)*/
+#define _PAGE_DIRTY	0x0100	/* C: page changed */
+
+/* These 4 software bits must be masked out when the L2 entry is loaded
+ * into the TLB.
+ */
+#define _PAGE_GUARDED	0x0010	/* Copied to L1 G entry in DTLB */
+#define _PAGE_ACCESSED	0x0020	/* Copied to L1 APG 1 entry in I/DTLB */
+#define _PAGE_EXEC	0x0040	/* Copied to PP (bit 21) in ITLB */
+#define _PAGE_SPECIAL	0x0080	/* SW entry */
+
+#define _PAGE_NA	0x0200	/* Supervisor NA, User no access */
+#define _PAGE_RO	0x0600	/* Supervisor RO, User no access */
+
+#define _PAGE_HUGE	0x0800	/* Copied to L1 PS bit 29 */
+
+/* cache related flags non existing on 8xx */
+#define _PAGE_COHERENT	0
+#define _PAGE_WRITETHRU	0
+
+#define _PAGE_KERNEL_RO		(_PAGE_SH | _PAGE_RO)
+#define _PAGE_KERNEL_ROX	(_PAGE_SH | _PAGE_RO | _PAGE_EXEC)
+#define _PAGE_KERNEL_RW		(_PAGE_SH | _PAGE_DIRTY)
+#define _PAGE_KERNEL_RWX	(_PAGE_SH | _PAGE_DIRTY | _PAGE_EXEC)
+
+#define _PMD_PRESENT	0x0001
+#define _PMD_PRESENT_MASK	_PMD_PRESENT
+#define _PMD_BAD	0x0f90
+#define _PMD_PAGE_MASK	0x000c
+#define _PMD_PAGE_8M	0x000c
+#define _PMD_PAGE_512K	0x0004
+#define _PMD_ACCESSED	0x0020	/* APG 1 */
+#define _PMD_USER	0x0040	/* APG 2 */
+
+#define _PTE_NONE_MASK	0
+
+#ifdef CONFIG_PPC_16K_PAGES
+#define _PAGE_PSIZE	_PAGE_SPS
+#else
+#define _PAGE_PSIZE		0
+#endif
+
+#define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
+#define _PAGE_BASE	(_PAGE_BASE_NC)
+
+/* Permission masks used to generate the __P and __S table */
+#define PAGE_NONE	__pgprot(_PAGE_BASE | _PAGE_NA)
+#define PAGE_SHARED	__pgprot(_PAGE_BASE)
+#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_EXEC)
+#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_RO)
+#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_RO | _PAGE_EXEC)
+#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_RO)
+#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_RO | _PAGE_EXEC)
+
+#ifndef __ASSEMBLY__
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_RO);
+}
+
+#define pte_wrprotect pte_wrprotect
+
+static inline int pte_read(pte_t pte)
+{
+	return (pte_val(pte) & _PAGE_RO) != _PAGE_NA;
+}
+
+#define pte_read pte_read
+
+static inline int pte_write(pte_t pte)
+{
+	return !(pte_val(pte) & _PAGE_RO);
+}
+
+#define pte_write pte_write
+
+static inline pte_t pte_mkwrite_novma(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_RO);
+}
+
+#define pte_mkwrite_novma pte_mkwrite_novma
+
+static inline bool pte_user(pte_t pte)
+{
+	return !(pte_val(pte) & _PAGE_SH);
+}
+
+#define pte_user pte_user
+
+static inline pte_t pte_mkprivileged(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_SH);
+}
+
+#define pte_mkprivileged pte_mkprivileged
+
+static inline pte_t pte_mkuser(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_SH);
+}
+
+#define pte_mkuser pte_mkuser
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_SPS | _PAGE_HUGE);
+}
+
+#define pte_mkhuge pte_mkhuge
+
+static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p,
+				     unsigned long clr, unsigned long set, int huge);
+
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	pte_update(mm, addr, ptep, 0, _PAGE_RO, 0);
+}
+#define ptep_set_wrprotect ptep_set_wrprotect
+
+static inline void __ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
+					   pte_t entry, unsigned long address, int psize)
+{
+	unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_EXEC);
+	unsigned long clr = ~pte_val(entry) & _PAGE_RO;
+	int huge = psize > mmu_virtual_psize ? 1 : 0;
+
+	pte_update(vma->vm_mm, address, ptep, clr, set, huge);
+
+	flush_tlb_page(vma, address);
+}
+#define __ptep_set_access_flags __ptep_set_access_flags
+
+static inline unsigned long pgd_leaf_size(pgd_t pgd)
+{
+	if (pgd_val(pgd) & _PMD_PAGE_8M)
+		return SZ_8M;
+	return SZ_4M;
+}
+
+#define pgd_leaf_size pgd_leaf_size
+
+static inline unsigned long pte_leaf_size(pte_t pte)
+{
+	pte_basic_t val = pte_val(pte);
+
+	if (val & _PAGE_HUGE)
+		return SZ_512K;
+	if (val & _PAGE_SPS)
+		return SZ_16K;
+	return SZ_4K;
+}
+
+#define pte_leaf_size pte_leaf_size
+
+#endif
+
+#endif /* __KERNEL__ */
+#endif /*  _ASM_POWERPC_NOHASH_32_PTE_8xx_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
new file mode 100644
index 0000000000..e50b211bec
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_PGALLOC_64_H
+#define _ASM_POWERPC_PGALLOC_64_H
+/*
+ */
+
+#include <linux/slab.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
+
+struct vmemmap_backing {
+	struct vmemmap_backing *list;
+	unsigned long phys;
+	unsigned long virt_addr;
+};
+extern struct vmemmap_backing *vmemmap_list;
+
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
+{
+	p4d_set(p4d, (unsigned long)pud);
+}
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+			pgtable_gfp_flags(mm, GFP_KERNEL));
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+	kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
+}
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	pud_set(pud, (unsigned long)pmd);
+}
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+				       pte_t *pte)
+{
+	pmd_set(pmd, (unsigned long)pte);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
+				pgtable_t pte_page)
+{
+	pmd_set(pmd, (unsigned long)pte_page);
+}
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
+			pgtable_gfp_flags(mm, GFP_KERNEL));
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+	kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
+}
+
+#define __pmd_free_tlb(tlb, pmd, addr)		      \
+	pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
+#define __pud_free_tlb(tlb, pud, addr)		      \
+	pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
+
+#endif /* _ASM_POWERPC_PGALLOC_64_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
new file mode 100644
index 0000000000..10f5cf444d
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H
+#define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H
+
+#include <asm-generic/pgtable-nop4d.h>
+
+/*
+ * Entries per page directory level.  The PTE level must use a 64b record
+ * for each page table entry.  The PMD and PGD level use a 32b record for
+ * each entry by assuming that each entry is page aligned.
+ */
+#define PTE_INDEX_SIZE  9
+#define PMD_INDEX_SIZE  7
+#define PUD_INDEX_SIZE  9
+#define PGD_INDEX_SIZE  9
+
+#ifndef __ASSEMBLY__
+#define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_INDEX_SIZE)
+#define PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
+#define PUD_TABLE_SIZE	(sizeof(pud_t) << PUD_INDEX_SIZE)
+#define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
+#endif	/* __ASSEMBLY__ */
+
+#define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
+#define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PUD	(1 << PUD_INDEX_SIZE)
+#define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
+
+/* PMD_SHIFT determines what a second-level page table entry can map */
+#define PMD_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+
+/* PUD_SHIFT determines what a third-level page table entry can map */
+#define PUD_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
+#define PUD_SIZE	(1UL << PUD_SHIFT)
+#define PUD_MASK	(~(PUD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+#define PGDIR_SHIFT	(PUD_SHIFT + PUD_INDEX_SIZE)
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+/* Bits to mask out from a PMD to get to the PTE page */
+#define PMD_MASKED_BITS		0
+/* Bits to mask out from a PUD to get to the PMD page */
+#define PUD_MASKED_BITS		0
+/* Bits to mask out from a P4D to get to the PUD page */
+#define P4D_MASKED_BITS		0
+
+
+/*
+ * 4-level page tables related bits
+ */
+
+#define p4d_none(p4d)		(!p4d_val(p4d))
+#define p4d_bad(p4d)		(p4d_val(p4d) == 0)
+#define p4d_present(p4d)	(p4d_val(p4d) != 0)
+
+#ifndef __ASSEMBLY__
+
+static inline pud_t *p4d_pgtable(p4d_t p4d)
+{
+	return (pud_t *) (p4d_val(p4d) & ~P4D_MASKED_BITS);
+}
+
+static inline void p4d_clear(p4d_t *p4dp)
+{
+	*p4dp = __p4d(0);
+}
+
+static inline pte_t p4d_pte(p4d_t p4d)
+{
+	return __pte(p4d_val(p4d));
+}
+
+static inline p4d_t pte_p4d(pte_t pte)
+{
+	return __p4d(pte_val(pte));
+}
+extern struct page *p4d_page(p4d_t p4d);
+
+#endif /* !__ASSEMBLY__ */
+
+#define pud_ERROR(e) \
+	pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
+
+/*
+ * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */
+#define remap_4k_pfn(vma, addr, pfn, prot)	\
+	remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot))
+
+#endif /* _ _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
new file mode 100644
index 0000000000..eb6891e34c
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -0,0 +1,328 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_H
+#define _ASM_POWERPC_NOHASH_64_PGTABLE_H
+/*
+ * This file contains the functions and defines necessary to modify and use
+ * the ppc64 non-hashed page table.
+ */
+
+#include <linux/sizes.h>
+
+#include <asm/nohash/64/pgtable-4k.h>
+#include <asm/barrier.h>
+#include <asm/asm-const.h>
+
+/*
+ * Size of EA range mapped by our pagetables.
+ */
+#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
+			    PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
+#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
+
+#define PMD_CACHE_INDEX	PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX PUD_INDEX_SIZE
+
+/*
+ * Define the address range of the kernel non-linear virtual area
+ */
+#define KERN_VIRT_START ASM_CONST(0xc000100000000000)
+#define KERN_VIRT_SIZE	ASM_CONST(0x0000100000000000)
+
+/*
+ * The vmalloc space starts at the beginning of that region, and
+ * occupies a quarter of it on Book3E
+ * (we keep a quarter for the virtual memmap)
+ */
+#define VMALLOC_START	KERN_VIRT_START
+#define VMALLOC_SIZE	(KERN_VIRT_SIZE >> 2)
+#define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
+
+/*
+ * The third quarter of the kernel virtual space is used for IO mappings,
+ * it's itself carved into the PIO region (ISA and PHB IO space) and
+ * the ioremap space
+ *
+ *  ISA_IO_BASE = KERN_IO_START, 64K reserved area
+ *  PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
+ * IOREMAP_BASE = ISA_IO_BASE + 2G to KERN_IO_START + KERN_IO_SIZE
+ */
+#define KERN_IO_START	(KERN_VIRT_START + (KERN_VIRT_SIZE >> 1))
+#define KERN_IO_SIZE	(KERN_VIRT_SIZE >> 2)
+#define FULL_IO_SIZE	0x80000000ul
+#define  ISA_IO_BASE	(KERN_IO_START)
+#define  ISA_IO_END	(KERN_IO_START + 0x10000ul)
+#define  PHB_IO_BASE	(ISA_IO_END)
+#define  PHB_IO_END	(KERN_IO_START + FULL_IO_SIZE)
+#define IOREMAP_BASE	(PHB_IO_END)
+#define IOREMAP_START	(ioremap_bot)
+#define IOREMAP_END	(KERN_IO_START + KERN_IO_SIZE - FIXADDR_SIZE)
+#define FIXADDR_SIZE	SZ_32M
+
+/*
+ * Defines the address of the vmemap area, in its own region on
+ * after the vmalloc space on Book3E
+ */
+#define VMEMMAP_BASE		VMALLOC_END
+#define VMEMMAP_END		KERN_IO_START
+#define vmemmap			((struct page *)VMEMMAP_BASE)
+
+
+/*
+ * Include the PTE bits definitions
+ */
+#include <asm/nohash/pte-e500.h>
+
+#define PTE_RPN_MASK	(~((1UL << PTE_RPN_SHIFT) - 1))
+
+/*
+ * _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes.
+ */
+#define _PAGE_CHG_MASK	(PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPECIAL)
+
+#define H_PAGE_4K_PFN 0
+
+#ifndef __ASSEMBLY__
+/* pte_clear moved to later in this file */
+
+static inline pte_t pte_mkwrite_novma(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_RW);
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_RW);
+}
+
+#define PMD_BAD_BITS		(PTE_TABLE_SIZE-1)
+#define PUD_BAD_BITS		(PMD_TABLE_SIZE-1)
+
+static inline void pmd_set(pmd_t *pmdp, unsigned long val)
+{
+	*pmdp = __pmd(val);
+}
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	*pmdp = __pmd(0);
+}
+
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+	return __pte(pmd_val(pmd));
+}
+
+#define pmd_none(pmd)		(!pmd_val(pmd))
+#define	pmd_bad(pmd)		(!is_kernel_addr(pmd_val(pmd)) \
+				 || (pmd_val(pmd) & PMD_BAD_BITS))
+#define	pmd_present(pmd)	(!pmd_none(pmd))
+#define pmd_page_vaddr(pmd)	((const void *)(pmd_val(pmd) & ~PMD_MASKED_BITS))
+extern struct page *pmd_page(pmd_t pmd);
+#define pmd_pfn(pmd)		(page_to_pfn(pmd_page(pmd)))
+
+static inline void pud_set(pud_t *pudp, unsigned long val)
+{
+	*pudp = __pud(val);
+}
+
+static inline void pud_clear(pud_t *pudp)
+{
+	*pudp = __pud(0);
+}
+
+#define pud_none(pud)		(!pud_val(pud))
+#define	pud_bad(pud)		(!is_kernel_addr(pud_val(pud)) \
+				 || (pud_val(pud) & PUD_BAD_BITS))
+#define pud_present(pud)	(pud_val(pud) != 0)
+
+static inline pmd_t *pud_pgtable(pud_t pud)
+{
+	return (pmd_t *)(pud_val(pud) & ~PUD_MASKED_BITS);
+}
+
+extern struct page *pud_page(pud_t pud);
+
+static inline pte_t pud_pte(pud_t pud)
+{
+	return __pte(pud_val(pud));
+}
+
+static inline pud_t pte_pud(pte_t pte)
+{
+	return __pud(pte_val(pte));
+}
+#define pud_write(pud)		pte_write(pud_pte(pud))
+#define p4d_write(pgd)		pte_write(p4d_pte(p4d))
+
+static inline void p4d_set(p4d_t *p4dp, unsigned long val)
+{
+	*p4dp = __p4d(val);
+}
+
+/* Atomic PTE updates */
+static inline unsigned long pte_update(struct mm_struct *mm,
+				       unsigned long addr,
+				       pte_t *ptep, unsigned long clr,
+				       unsigned long set,
+				       int huge)
+{
+	unsigned long old = pte_val(*ptep);
+	*ptep = __pte((old & ~clr) | set);
+
+	/* huge pages use the old page table lock */
+	if (!huge)
+		assert_pte_locked(mm, addr);
+
+	return old;
+}
+
+static inline int pte_young(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_ACCESSED;
+}
+
+static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
+					      unsigned long addr, pte_t *ptep)
+{
+	unsigned long old;
+
+	if (!pte_young(*ptep))
+		return 0;
+	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
+	return (old & _PAGE_ACCESSED) != 0;
+}
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define ptep_test_and_clear_young(__vma, __addr, __ptep)		   \
+({									   \
+	int __r;							   \
+	__r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
+	__r;								   \
+})
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pte_t *ptep)
+{
+
+	if ((pte_val(*ptep) & _PAGE_RW) == 0)
+		return;
+
+	pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
+}
+
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	if ((pte_val(*ptep) & _PAGE_RW) == 0)
+		return;
+
+	pte_update(mm, addr, ptep, _PAGE_RW, 0, 1);
+}
+
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+#define ptep_clear_flush_young(__vma, __address, __ptep)		\
+({									\
+	int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \
+						  __ptep);		\
+	__young;							\
+})
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+				       unsigned long addr, pte_t *ptep)
+{
+	unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
+	return __pte(old);
+}
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
+			     pte_t * ptep)
+{
+	pte_update(mm, addr, ptep, ~0UL, 0, 0);
+}
+
+
+/* Set the dirty and/or accessed bits atomically in a linux PTE */
+static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
+					   pte_t *ptep, pte_t entry,
+					   unsigned long address,
+					   int psize)
+{
+	unsigned long bits = pte_val(entry) &
+		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
+
+	unsigned long old = pte_val(*ptep);
+	*ptep = __pte(old | bits);
+
+	flush_tlb_page(vma, address);
+}
+
+#define pte_ERROR(e) \
+	pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pmd_ERROR(e) \
+	pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pgd_ERROR(e) \
+	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ *                         1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
+ *   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *   <-------------------------- offset ----------------------------
+ *
+ *   3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 6 6 6 6
+ *   2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3
+ *   --------------> <----------- zero ------------> E < type -> 0 0
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ */
+#define MAX_SWAPFILES_CHECK() do { \
+	BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
+	} while (0)
+
+#define SWP_TYPE_BITS 5
+#define __swp_type(x)		(((x).val >> 2) \
+				& ((1UL << SWP_TYPE_BITS) - 1))
+#define __swp_offset(x)		((x).val >> PTE_RPN_SHIFT)
+#define __swp_entry(type, offset)	((swp_entry_t) { \
+					(((type) & 0x1f) << 2) \
+					| ((offset) << PTE_RPN_SHIFT) })
+
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
+#define __swp_entry_to_pte(x)		__pte((x).val)
+
+/* We borrow MSB 56 (LSB 7) to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE	0x80
+
+int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot);
+void unmap_kernel_page(unsigned long va);
+extern int __meminit vmemmap_create_mapping(unsigned long start,
+					    unsigned long page_size,
+					    unsigned long phys);
+extern void vmemmap_remove_mapping(unsigned long start,
+				   unsigned long page_size);
+void __patch_exception(int exc, unsigned long addr);
+#define patch_exception(exc, name) do { \
+	extern unsigned int name; \
+	__patch_exception((exc), (unsigned long)&name); \
+} while (0)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_NOHASH_64_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/nohash/hugetlb-e500.h b/arch/powerpc/include/asm/nohash/hugetlb-e500.h
new file mode 100644
index 0000000000..8f04ad20e0
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/hugetlb-e500.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_HUGETLB_E500_H
+#define _ASM_POWERPC_NOHASH_HUGETLB_E500_H
+
+static inline pte_t *hugepd_page(hugepd_t hpd)
+{
+	if (WARN_ON(!hugepd_ok(hpd)))
+		return NULL;
+
+	return (pte_t *)((hpd_val(hpd) & ~HUGEPD_SHIFT_MASK) | PD_HUGE);
+}
+
+static inline unsigned int hugepd_shift(hugepd_t hpd)
+{
+	return hpd_val(hpd) & HUGEPD_SHIFT_MASK;
+}
+
+static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
+				    unsigned int pdshift)
+{
+	/*
+	 * On FSL BookE, we have multiple higher-level table entries that
+	 * point to the same hugepte.  Just use the first one since they're all
+	 * identical.  So for that case, idx=0.
+	 */
+	return hugepd_page(hpd);
+}
+
+void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+
+static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift)
+{
+	/* We use the old format for PPC_E500 */
+	*hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift);
+}
+
+static inline int check_and_get_huge_psize(int shift)
+{
+	if (shift & 1)	/* Not a power of 4 */
+		return -EINVAL;
+
+	return shift_to_mmu_psize(shift);
+}
+
+#endif /* _ASM_POWERPC_NOHASH_HUGETLB_E500_H */
diff --git a/arch/powerpc/include/asm/nohash/kup-booke.h b/arch/powerpc/include/asm/nohash/kup-booke.h
new file mode 100644
index 0000000000..0c7c325813
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/kup-booke.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_KUP_BOOKE_H_
+#define _ASM_POWERPC_KUP_BOOKE_H_
+
+#include <asm/bug.h>
+#include <asm/mmu.h>
+
+#ifdef CONFIG_PPC_KUAP
+
+#ifdef __ASSEMBLY__
+
+.macro kuap_check_amr	gpr1, gpr2
+.endm
+
+#else
+
+#include <linux/sched.h>
+
+#include <asm/reg.h>
+
+static __always_inline void __kuap_lock(void)
+{
+	mtspr(SPRN_PID, 0);
+	isync();
+}
+#define __kuap_lock __kuap_lock
+
+static __always_inline void __kuap_save_and_lock(struct pt_regs *regs)
+{
+	regs->kuap = mfspr(SPRN_PID);
+	mtspr(SPRN_PID, 0);
+	isync();
+}
+#define __kuap_save_and_lock __kuap_save_and_lock
+
+static __always_inline void kuap_user_restore(struct pt_regs *regs)
+{
+	if (kuap_is_disabled())
+		return;
+
+	mtspr(SPRN_PID, current->thread.pid);
+
+	/* Context synchronisation is performed by rfi */
+}
+
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+{
+	if (regs->kuap)
+		mtspr(SPRN_PID, current->thread.pid);
+
+	/* Context synchronisation is performed by rfi */
+}
+
+#ifdef CONFIG_PPC_KUAP_DEBUG
+static __always_inline unsigned long __kuap_get_and_assert_locked(void)
+{
+	WARN_ON_ONCE(mfspr(SPRN_PID));
+
+	return 0;
+}
+#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked
+#endif
+
+static __always_inline void uaccess_begin_booke(unsigned long val)
+{
+	asm(ASM_MMU_FTR_IFSET("mtspr %0, %1; isync", "", %2) : :
+	    "i"(SPRN_PID), "r"(val), "i"(MMU_FTR_KUAP) : "memory");
+}
+
+static __always_inline void uaccess_end_booke(void)
+{
+	asm(ASM_MMU_FTR_IFSET("mtspr %0, %1; isync", "", %2) : :
+	    "i"(SPRN_PID), "r"(0), "i"(MMU_FTR_KUAP) : "memory");
+}
+
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+					      unsigned long size, unsigned long dir)
+{
+	uaccess_begin_booke(current->thread.pid);
+}
+
+static __always_inline void prevent_user_access(unsigned long dir)
+{
+	uaccess_end_booke();
+}
+
+static __always_inline unsigned long prevent_user_access_return(void)
+{
+	unsigned long flags = mfspr(SPRN_PID);
+
+	uaccess_end_booke();
+
+	return flags;
+}
+
+static __always_inline void restore_user_access(unsigned long flags)
+{
+	if (flags)
+		uaccess_begin_booke(current->thread.pid);
+}
+
+static __always_inline bool
+__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+	return !regs->kuap;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* CONFIG_PPC_KUAP */
+
+#endif /* _ASM_POWERPC_KUP_BOOKE_H_ */
diff --git a/arch/powerpc/include/asm/nohash/mmu-e500.h b/arch/powerpc/include/asm/nohash/mmu-e500.h
new file mode 100644
index 0000000000..6ddced0415
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/mmu-e500.h
@@ -0,0 +1,327 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_MMU_BOOK3E_H_
+#define _ASM_POWERPC_MMU_BOOK3E_H_
+/*
+ * Freescale Book-E/Book-3e (ISA 2.06+) MMU support
+ */
+
+/* Book-3e defined page sizes */
+#define BOOK3E_PAGESZ_1K	0
+#define BOOK3E_PAGESZ_2K	1
+#define BOOK3E_PAGESZ_4K	2
+#define BOOK3E_PAGESZ_8K	3
+#define BOOK3E_PAGESZ_16K	4
+#define BOOK3E_PAGESZ_32K	5
+#define BOOK3E_PAGESZ_64K	6
+#define BOOK3E_PAGESZ_128K	7
+#define BOOK3E_PAGESZ_256K	8
+#define BOOK3E_PAGESZ_512K	9
+#define BOOK3E_PAGESZ_1M	10
+#define BOOK3E_PAGESZ_2M	11
+#define BOOK3E_PAGESZ_4M	12
+#define BOOK3E_PAGESZ_8M	13
+#define BOOK3E_PAGESZ_16M	14
+#define BOOK3E_PAGESZ_32M	15
+#define BOOK3E_PAGESZ_64M	16
+#define BOOK3E_PAGESZ_128M	17
+#define BOOK3E_PAGESZ_256M	18
+#define BOOK3E_PAGESZ_512M	19
+#define BOOK3E_PAGESZ_1GB	20
+#define BOOK3E_PAGESZ_2GB	21
+#define BOOK3E_PAGESZ_4GB	22
+#define BOOK3E_PAGESZ_8GB	23
+#define BOOK3E_PAGESZ_16GB	24
+#define BOOK3E_PAGESZ_32GB	25
+#define BOOK3E_PAGESZ_64GB	26
+#define BOOK3E_PAGESZ_128GB	27
+#define BOOK3E_PAGESZ_256GB	28
+#define BOOK3E_PAGESZ_512GB	29
+#define BOOK3E_PAGESZ_1TB	30
+#define BOOK3E_PAGESZ_2TB	31
+
+/* MAS registers bit definitions */
+
+#define MAS0_TLBSEL_MASK	0x30000000
+#define MAS0_TLBSEL_SHIFT	28
+#define MAS0_TLBSEL(x)		(((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK)
+#define MAS0_GET_TLBSEL(mas0)	(((mas0) & MAS0_TLBSEL_MASK) >> \
+			MAS0_TLBSEL_SHIFT)
+#define MAS0_ESEL_MASK		0x0FFF0000
+#define MAS0_ESEL_SHIFT		16
+#define MAS0_ESEL(x)		(((x) << MAS0_ESEL_SHIFT) & MAS0_ESEL_MASK)
+#define MAS0_NV(x)		((x) & 0x00000FFF)
+#define MAS0_HES		0x00004000
+#define MAS0_WQ_ALLWAYS		0x00000000
+#define MAS0_WQ_COND		0x00001000
+#define MAS0_WQ_CLR_RSRV       	0x00002000
+
+#define MAS1_VALID		0x80000000
+#define MAS1_IPROT		0x40000000
+#define MAS1_TID(x)		(((x) << 16) & 0x3FFF0000)
+#define MAS1_IND		0x00002000
+#define MAS1_TS			0x00001000
+#define MAS1_TSIZE_MASK		0x00000f80
+#define MAS1_TSIZE_SHIFT	7
+#define MAS1_TSIZE(x)		(((x) << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK)
+#define MAS1_GET_TSIZE(mas1)	(((mas1) & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT)
+
+#define MAS2_EPN		(~0xFFFUL)
+#define MAS2_X0			0x00000040
+#define MAS2_X1			0x00000020
+#define MAS2_W			0x00000010
+#define MAS2_I			0x00000008
+#define MAS2_M			0x00000004
+#define MAS2_G			0x00000002
+#define MAS2_E			0x00000001
+#define MAS2_WIMGE_MASK		0x0000001f
+#define MAS2_EPN_MASK(size)		(~0 << (size + 10))
+
+#define MAS3_RPN		0xFFFFF000
+#define MAS3_U0			0x00000200
+#define MAS3_U1			0x00000100
+#define MAS3_U2			0x00000080
+#define MAS3_U3			0x00000040
+#define MAS3_UX			0x00000020
+#define MAS3_SX			0x00000010
+#define MAS3_UW			0x00000008
+#define MAS3_SW			0x00000004
+#define MAS3_UR			0x00000002
+#define MAS3_SR			0x00000001
+#define MAS3_BAP_MASK		0x0000003f
+#define MAS3_SPSIZE		0x0000003e
+#define MAS3_SPSIZE_SHIFT	1
+
+#define MAS4_TLBSEL_MASK	MAS0_TLBSEL_MASK
+#define MAS4_TLBSELD(x) 	MAS0_TLBSEL(x)
+#define MAS4_INDD		0x00008000	/* Default IND */
+#define MAS4_TSIZED(x)		MAS1_TSIZE(x)
+#define MAS4_X0D		0x00000040
+#define MAS4_X1D		0x00000020
+#define MAS4_WD			0x00000010
+#define MAS4_ID			0x00000008
+#define MAS4_MD			0x00000004
+#define MAS4_GD			0x00000002
+#define MAS4_ED			0x00000001
+#define MAS4_WIMGED_MASK	0x0000001f	/* Default WIMGE */
+#define MAS4_WIMGED_SHIFT	0
+#define MAS4_VLED		MAS4_X1D	/* Default VLE */
+#define MAS4_ACMD		0x000000c0	/* Default ACM */
+#define MAS4_ACMD_SHIFT		6
+#define MAS4_TSIZED_MASK	0x00000f80	/* Default TSIZE */
+#define MAS4_TSIZED_SHIFT	7
+
+#define MAS5_SGS		0x80000000
+
+#define MAS6_SPID0		0x3FFF0000
+#define MAS6_SPID1		0x00007FFE
+#define MAS6_ISIZE(x)		MAS1_TSIZE(x)
+#define MAS6_SAS		0x00000001
+#define MAS6_SPID		MAS6_SPID0
+#define MAS6_SIND 		0x00000002	/* Indirect page */
+#define MAS6_SIND_SHIFT		1
+#define MAS6_SPID_MASK		0x3fff0000
+#define MAS6_SPID_SHIFT		16
+#define MAS6_ISIZE_MASK		0x00000f80
+#define MAS6_ISIZE_SHIFT	7
+
+#define MAS7_RPN		0xFFFFFFFF
+
+#define MAS8_TGS		0x80000000 /* Guest space */
+#define MAS8_VF			0x40000000 /* Virtualization Fault */
+#define MAS8_TLPID		0x000000ff
+
+/* Bit definitions for MMUCFG */
+#define MMUCFG_MAVN	0x00000003	/* MMU Architecture Version Number */
+#define MMUCFG_MAVN_V1	0x00000000	/* v1.0 */
+#define MMUCFG_MAVN_V2	0x00000001	/* v2.0 */
+#define MMUCFG_NTLBS	0x0000000c	/* Number of TLBs */
+#define MMUCFG_PIDSIZE	0x000007c0	/* PID Reg Size */
+#define MMUCFG_TWC	0x00008000	/* TLB Write Conditional (v2.0) */
+#define MMUCFG_LRAT	0x00010000	/* LRAT Supported (v2.0) */
+#define MMUCFG_RASIZE	0x00fe0000	/* Real Addr Size */
+#define MMUCFG_LPIDSIZE	0x0f000000	/* LPID Reg Size */
+
+/* Bit definitions for MMUCSR0 */
+#define MMUCSR0_TLB1FI	0x00000002	/* TLB1 Flash invalidate */
+#define MMUCSR0_TLB0FI	0x00000004	/* TLB0 Flash invalidate */
+#define MMUCSR0_TLB2FI	0x00000040	/* TLB2 Flash invalidate */
+#define MMUCSR0_TLB3FI	0x00000020	/* TLB3 Flash invalidate */
+#define MMUCSR0_TLBFI	(MMUCSR0_TLB0FI | MMUCSR0_TLB1FI | \
+			 MMUCSR0_TLB2FI | MMUCSR0_TLB3FI)
+#define MMUCSR0_TLB0PS	0x00000780	/* TLB0 Page Size */
+#define MMUCSR0_TLB1PS	0x00007800	/* TLB1 Page Size */
+#define MMUCSR0_TLB2PS	0x00078000	/* TLB2 Page Size */
+#define MMUCSR0_TLB3PS	0x00780000	/* TLB3 Page Size */
+
+/* MMUCFG bits */
+#define MMUCFG_MAVN_NASK	0x00000003
+#define MMUCFG_MAVN_V1_0	0x00000000
+#define MMUCFG_MAVN_V2_0	0x00000001
+#define MMUCFG_NTLB_MASK	0x0000000c
+#define MMUCFG_NTLB_SHIFT	2
+#define MMUCFG_PIDSIZE_MASK	0x000007c0
+#define MMUCFG_PIDSIZE_SHIFT	6
+#define MMUCFG_TWC		0x00008000
+#define MMUCFG_LRAT		0x00010000
+#define MMUCFG_RASIZE_MASK	0x00fe0000
+#define MMUCFG_RASIZE_SHIFT	17
+#define MMUCFG_LPIDSIZE_MASK	0x0f000000
+#define MMUCFG_LPIDSIZE_SHIFT	24
+
+/* TLBnCFG encoding */
+#define TLBnCFG_N_ENTRY		0x00000fff	/* number of entries */
+#define TLBnCFG_HES		0x00002000	/* HW select supported */
+#define TLBnCFG_IPROT		0x00008000	/* IPROT supported */
+#define TLBnCFG_GTWE		0x00010000	/* Guest can write */
+#define TLBnCFG_IND		0x00020000	/* IND entries supported */
+#define TLBnCFG_PT		0x00040000	/* Can load from page table */
+#define TLBnCFG_MINSIZE		0x00f00000	/* Minimum Page Size (v1.0) */
+#define TLBnCFG_MINSIZE_SHIFT	20
+#define TLBnCFG_MAXSIZE		0x000f0000	/* Maximum Page Size (v1.0) */
+#define TLBnCFG_MAXSIZE_SHIFT	16
+#define TLBnCFG_ASSOC		0xff000000	/* Associativity */
+#define TLBnCFG_ASSOC_SHIFT	24
+
+/* TLBnPS encoding */
+#define TLBnPS_4K		0x00000004
+#define TLBnPS_8K		0x00000008
+#define TLBnPS_16K		0x00000010
+#define TLBnPS_32K		0x00000020
+#define TLBnPS_64K		0x00000040
+#define TLBnPS_128K		0x00000080
+#define TLBnPS_256K		0x00000100
+#define TLBnPS_512K		0x00000200
+#define TLBnPS_1M 		0x00000400
+#define TLBnPS_2M 		0x00000800
+#define TLBnPS_4M 		0x00001000
+#define TLBnPS_8M 		0x00002000
+#define TLBnPS_16M		0x00004000
+#define TLBnPS_32M		0x00008000
+#define TLBnPS_64M		0x00010000
+#define TLBnPS_128M		0x00020000
+#define TLBnPS_256M		0x00040000
+#define TLBnPS_512M		0x00080000
+#define TLBnPS_1G		0x00100000
+#define TLBnPS_2G		0x00200000
+#define TLBnPS_4G		0x00400000
+#define TLBnPS_8G		0x00800000
+#define TLBnPS_16G		0x01000000
+#define TLBnPS_32G		0x02000000
+#define TLBnPS_64G		0x04000000
+#define TLBnPS_128G		0x08000000
+#define TLBnPS_256G		0x10000000
+
+/* tlbilx action encoding */
+#define TLBILX_T_ALL			0
+#define TLBILX_T_TID			1
+#define TLBILX_T_FULLMATCH		3
+#define TLBILX_T_CLASS0			4
+#define TLBILX_T_CLASS1			5
+#define TLBILX_T_CLASS2			6
+#define TLBILX_T_CLASS3			7
+
+/*
+ * The mapping only needs to be cache-coherent on SMP, except on
+ * Freescale e500mc derivatives where it's also needed for coherent DMA.
+ */
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
+#define MAS2_M_IF_NEEDED	MAS2_M
+#else
+#define MAS2_M_IF_NEEDED	0
+#endif
+
+#ifndef __ASSEMBLY__
+#include <asm/bug.h>
+
+extern unsigned int tlbcam_index;
+
+typedef struct {
+	unsigned int	id;
+	unsigned int	active;
+	void __user	*vdso;
+} mm_context_t;
+
+/* Page size definitions, common between 32 and 64-bit
+ *
+ *    shift : is the "PAGE_SHIFT" value for that page size
+ *    penc  : is the pte encoding mask
+ *
+ */
+struct mmu_psize_def
+{
+	unsigned int	shift;	/* number of bits */
+	unsigned int	enc;	/* PTE encoding */
+	unsigned int    ind;    /* Corresponding indirect page size shift */
+	unsigned int	flags;
+#define MMU_PAGE_SIZE_DIRECT	0x1	/* Supported as a direct size */
+#define MMU_PAGE_SIZE_INDIRECT	0x2	/* Supported as an indirect size */
+};
+extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+
+static inline int shift_to_mmu_psize(unsigned int shift)
+{
+	int psize;
+
+	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize)
+		if (mmu_psize_defs[psize].shift == shift)
+			return psize;
+	return -1;
+}
+
+static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
+{
+	if (mmu_psize_defs[mmu_psize].shift)
+		return mmu_psize_defs[mmu_psize].shift;
+	BUG();
+}
+
+/* The page sizes use the same names as 64-bit hash but are
+ * constants
+ */
+#if defined(CONFIG_PPC_4K_PAGES)
+#define mmu_virtual_psize	MMU_PAGE_4K
+#else
+#error Unsupported page size
+#endif
+
+extern int mmu_linear_psize;
+extern int mmu_vmemmap_psize;
+
+struct tlb_core_data {
+	/*
+	 * Per-core spinlock for e6500 TLB handlers (no tlbsrx.)
+	 * Must be the first struct element.
+	 */
+	u8 lock;
+
+	/* For software way selection, as on Freescale TLB1 */
+	u8 esel_next, esel_max, esel_first;
+};
+
+#ifdef CONFIG_PPC64
+extern unsigned long linear_map_top;
+extern int book3e_htw_mode;
+
+#define PPC_HTW_NONE	0
+#define PPC_HTW_IBM	1
+#define PPC_HTW_E6500	2
+
+/*
+ * 64-bit booke platforms don't load the tlb in the tlb miss handler code.
+ * HUGETLB_NEED_PRELOAD handles this - it causes huge_ptep_set_access_flags to
+ * return 1, indicating that the tlb requires preloading.
+ */
+#define HUGETLB_NEED_PRELOAD
+
+#define mmu_cleanup_all NULL
+
+#define MAX_PHYSMEM_BITS        44
+
+#endif
+
+#include <asm/percpu.h>
+DECLARE_PER_CPU(int, next_tlbcam_idx);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_MMU_BOOK3E_H_ */
diff --git a/arch/powerpc/include/asm/nohash/mmu.h b/arch/powerpc/include/asm/nohash/mmu.h
new file mode 100644
index 0000000000..e264be219f
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/mmu.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_MMU_H_
+#define _ASM_POWERPC_NOHASH_MMU_H_
+
+#if defined(CONFIG_40x)
+/* 40x-style software loaded TLB */
+#include <asm/nohash/32/mmu-40x.h>
+#elif defined(CONFIG_44x)
+/* 44x-style software loaded TLB */
+#include <asm/nohash/32/mmu-44x.h>
+#elif defined(CONFIG_PPC_E500)
+/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
+#include <asm/nohash/mmu-e500.h>
+#elif defined (CONFIG_PPC_8xx)
+/* Motorola/Freescale 8xx software loaded TLB */
+#include <asm/nohash/32/mmu-8xx.h>
+#endif
+
+#endif /* _ASM_POWERPC_NOHASH_MMU_H_ */
diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h
new file mode 100644
index 0000000000..4b62376318
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/pgalloc.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_PGALLOC_H
+#define _ASM_POWERPC_NOHASH_PGALLOC_H
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+#ifdef CONFIG_PPC64
+extern void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address);
+#else
+/* 44x etc which is BOOKE not BOOK3E */
+static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
+				     unsigned long address)
+{
+
+}
+#endif /* !CONFIG_PPC_BOOK3E_64 */
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+			pgtable_gfp_flags(mm, GFP_KERNEL));
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
+}
+
+#ifdef CONFIG_PPC64
+#include <asm/nohash/64/pgalloc.h>
+#else
+#include <asm/nohash/32/pgalloc.h>
+#endif
+
+static inline void pgtable_free(void *table, int shift)
+{
+	if (!shift) {
+		pte_fragment_free((unsigned long *)table, 0);
+	} else {
+		BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+		kmem_cache_free(PGT_CACHE(shift), table);
+	}
+}
+
+#define get_hugepd_cache_index(x)	(x)
+
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+	unsigned long pgf = (unsigned long)table;
+
+	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+	pgf |= shift;
+	tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+	void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+	unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+	pgtable_free(table, shift);
+}
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+				  unsigned long address)
+{
+	tlb_flush_pgtable(tlb, address);
+	pgtable_free_tlb(tlb, table, 0);
+}
+#endif /* _ASM_POWERPC_NOHASH_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
new file mode 100644
index 0000000000..c721478c59
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -0,0 +1,288 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_PGTABLE_H
+#define _ASM_POWERPC_NOHASH_PGTABLE_H
+
+#if defined(CONFIG_PPC64)
+#include <asm/nohash/64/pgtable.h>
+#else
+#include <asm/nohash/32/pgtable.h>
+#endif
+
+/* Permission masks used for kernel mappings */
+#define PAGE_KERNEL	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
+#define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE)
+#define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE | _PAGE_GUARDED)
+#define PAGE_KERNEL_X	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX	__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
+
+#ifndef __ASSEMBLY__
+
+/* Generic accessors to PTE bits */
+#ifndef pte_write
+static inline int pte_write(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_RW;
+}
+#endif
+#ifndef pte_read
+static inline int pte_read(pte_t pte)		{ return 1; }
+#endif
+static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
+static inline int pte_special(pte_t pte)	{ return pte_val(pte) & _PAGE_SPECIAL; }
+static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
+static inline bool pte_hashpte(pte_t pte)	{ return false; }
+static inline bool pte_ci(pte_t pte)		{ return pte_val(pte) & _PAGE_NO_CACHE; }
+static inline bool pte_exec(pte_t pte)		{ return pte_val(pte) & _PAGE_EXEC; }
+
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * These work without NUMA balancing but the kernel does not care. See the
+ * comment in include/linux/pgtable.h . On powerpc, this will only
+ * work for user pages and always return true for kernel pages.
+ */
+static inline int pte_protnone(pte_t pte)
+{
+	return pte_present(pte) && !pte_user(pte);
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+	return pte_protnone(pmd_pte(pmd));
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
+static inline int pte_present(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_PRESENT;
+}
+
+static inline bool pte_hw_valid(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_PRESENT;
+}
+
+/*
+ * Don't just check for any non zero bits in __PAGE_USER, since for book3e
+ * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
+ * _PAGE_USER.  Need to explicitly match _PAGE_BAP_UR bit in that case too.
+ */
+#ifndef pte_user
+static inline bool pte_user(pte_t pte)
+{
+	return (pte_val(pte) & _PAGE_USER) == _PAGE_USER;
+}
+#endif
+
+/*
+ * We only find page table entry in the last level
+ * Hence no need for other accessors
+ */
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+	/*
+	 * A read-only access is controlled by _PAGE_USER bit.
+	 * We have _PAGE_READ set for WRITE and EXECUTE
+	 */
+	if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte))
+		return false;
+
+	if (write && !pte_write(pte))
+		return false;
+
+	return true;
+}
+
+/* Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ *
+ * Even if PTEs can be unsigned long long, a PFN is always an unsigned
+ * long for now.
+ */
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) {
+	return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
+		     pgprot_val(pgprot)); }
+
+/* Generic modifiers for PTE bits */
+static inline pte_t pte_exprotect(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_EXEC);
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_SPECIAL);
+}
+
+#ifndef pte_mkhuge
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+	return __pte(pte_val(pte));
+}
+#endif
+
+#ifndef pte_mkprivileged
+static inline pte_t pte_mkprivileged(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_USER);
+}
+#endif
+
+#ifndef pte_mkuser
+static inline pte_t pte_mkuser(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_USER);
+}
+#endif
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+}
+
+static inline int pte_swp_exclusive(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_SWP_EXCLUSIVE);
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_SWP_EXCLUSIVE);
+}
+
+/* This low level function performs the actual PTE insertion
+ * Setting the PTE depends on the MMU type and other factors. It's
+ * an horrible mess that I'm not going to try to clean up now but
+ * I'm keeping it in one place rather than spread around
+ */
+static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, pte_t pte, int percpu)
+{
+	/* Second case is 32-bit with 64-bit PTE.  In this case, we
+	 * can just store as long as we do the two halves in the right order
+	 * with a barrier in between.
+	 * In the percpu case, we also fallback to the simple update
+	 */
+	if (IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_PTE_64BIT) && !percpu) {
+		__asm__ __volatile__("\
+			stw%X0 %2,%0\n\
+			mbar\n\
+			stw%X1 %L2,%1"
+		: "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
+		: "r" (pte) : "memory");
+		return;
+	}
+	/* Anything else just stores the PTE normally. That covers all 64-bit
+	 * cases, and 32-bit non-hash with 32-bit PTEs.
+	 */
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)
+	ptep->pte3 = ptep->pte2 = ptep->pte1 = ptep->pte = pte_val(pte);
+#else
+	*ptep = pte;
+#endif
+
+	/*
+	 * With hardware tablewalk, a sync is needed to ensure that
+	 * subsequent accesses see the PTE we just wrote.  Unlike userspace
+	 * mappings, we can't tolerate spurious faults, so make sure
+	 * the new PTE will be seen the first time.
+	 */
+	if (IS_ENABLED(CONFIG_PPC_BOOK3E_64) && is_kernel_addr(addr))
+		mb();
+}
+
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+				 pte_t *ptep, pte_t entry, int dirty);
+
+/*
+ * Macro to mark a page protection value as "uncacheable".
+ */
+
+#define _PAGE_CACHE_CTL	(_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
+			 _PAGE_WRITETHRU)
+
+#define pgprot_noncached(prot)	  (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+				            _PAGE_NO_CACHE | _PAGE_GUARDED))
+
+#define pgprot_noncached_wc(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+				            _PAGE_NO_CACHE))
+
+#define pgprot_cached(prot)       (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+				            _PAGE_COHERENT))
+
+#if _PAGE_WRITETHRU != 0
+#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+				            _PAGE_COHERENT | _PAGE_WRITETHRU))
+#else
+#define pgprot_cached_wthru(prot)	pgprot_noncached(prot)
+#endif
+
+#define pgprot_cached_noncoherent(prot) \
+		(__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL))
+
+#define pgprot_writecombine pgprot_noncached_wc
+
+struct file;
+extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+				     unsigned long size, pgprot_t vma_prot);
+#define __HAVE_PHYS_MEM_ACCESS_PROT
+
+#ifdef CONFIG_HUGETLB_PAGE
+static inline int hugepd_ok(hugepd_t hpd)
+{
+#ifdef CONFIG_PPC_8xx
+	return ((hpd_val(hpd) & _PMD_PAGE_MASK) == _PMD_PAGE_8M);
+#else
+	/* We clear the top bit to indicate hugepd */
+	return (hpd_val(hpd) && (hpd_val(hpd) & PD_HUGE) == 0);
+#endif
+}
+
+static inline int pmd_huge(pmd_t pmd)
+{
+	return 0;
+}
+
+static inline int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
+#define is_hugepd(hpd)		(hugepd_ok(hpd))
+#endif
+
+/*
+ * This gets called at the end of handling a page fault, when
+ * the kernel has put a new PTE into the page table for the process.
+ * We use it to ensure coherency between the i-cache and d-cache
+ * for the page which has just been mapped in.
+ */
+#if defined(CONFIG_PPC_E500) && defined(CONFIG_HUGETLB_PAGE)
+void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep, unsigned int nr);
+#else
+static inline void update_mmu_cache_range(struct vm_fault *vmf,
+		struct vm_area_struct *vma, unsigned long address,
+		pte_t *ptep, unsigned int nr) {}
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/powerpc/include/asm/nohash/pte-e500.h b/arch/powerpc/include/asm/nohash/pte-e500.h
new file mode 100644
index 0000000000..d8924cbd61
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/pte-e500.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_PTE_E500_H
+#define _ASM_POWERPC_NOHASH_PTE_E500_H
+#ifdef __KERNEL__
+
+/* PTE bit definitions for processors compliant to the Book3E
+ * architecture 2.06 or later. The position of the PTE bits
+ * matches the HW definition of the optional Embedded Page Table
+ * category.
+ */
+
+/* Architected bits */
+#define _PAGE_PRESENT	0x000001 /* software: pte contains a translation */
+#define _PAGE_SW1	0x000002
+#define _PAGE_BAP_SR	0x000004
+#define _PAGE_BAP_UR	0x000008
+#define _PAGE_BAP_SW	0x000010
+#define _PAGE_BAP_UW	0x000020
+#define _PAGE_BAP_SX	0x000040
+#define _PAGE_BAP_UX	0x000080
+#define _PAGE_PSIZE_MSK	0x000f00
+#define _PAGE_PSIZE_4K	0x000200
+#define _PAGE_PSIZE_8K	0x000300
+#define _PAGE_PSIZE_16K	0x000400
+#define _PAGE_PSIZE_32K	0x000500
+#define _PAGE_PSIZE_64K	0x000600
+#define _PAGE_PSIZE_128K	0x000700
+#define _PAGE_PSIZE_256K	0x000800
+#define _PAGE_PSIZE_512K	0x000900
+#define _PAGE_PSIZE_1M	0x000a00
+#define _PAGE_PSIZE_2M	0x000b00
+#define _PAGE_PSIZE_4M	0x000c00
+#define _PAGE_PSIZE_8M	0x000d00
+#define _PAGE_PSIZE_16M	0x000e00
+#define _PAGE_PSIZE_32M	0x000f00
+#define _PAGE_DIRTY	0x001000 /* C: page changed */
+#define _PAGE_SW0	0x002000
+#define _PAGE_U3	0x004000
+#define _PAGE_U2	0x008000
+#define _PAGE_U1	0x010000
+#define _PAGE_U0	0x020000
+#define _PAGE_ACCESSED	0x040000
+#define _PAGE_ENDIAN	0x080000
+#define _PAGE_GUARDED	0x100000
+#define _PAGE_COHERENT	0x200000 /* M: enforce memory coherence */
+#define _PAGE_NO_CACHE	0x400000 /* I: cache inhibit */
+#define _PAGE_WRITETHRU	0x800000 /* W: cache write-through */
+
+/* "Higher level" linux bit combinations */
+#define _PAGE_EXEC		(_PAGE_BAP_SX | _PAGE_BAP_UX) /* .. and was cache cleaned */
+#define _PAGE_RW		(_PAGE_BAP_SW | _PAGE_BAP_UW) /* User write permission */
+#define _PAGE_KERNEL_RW		(_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY)
+#define _PAGE_KERNEL_RO		(_PAGE_BAP_SR)
+#define _PAGE_KERNEL_RWX	(_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY | _PAGE_BAP_SX)
+#define _PAGE_KERNEL_ROX	(_PAGE_BAP_SR | _PAGE_BAP_SX)
+#define _PAGE_USER		(_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */
+#define _PAGE_PRIVILEGED	(_PAGE_BAP_SR)
+
+#define _PAGE_SPECIAL	_PAGE_SW0
+
+/* Base page size */
+#define _PAGE_PSIZE	_PAGE_PSIZE_4K
+#define	PTE_RPN_SHIFT	(24)
+
+#define PTE_WIMGE_SHIFT (19)
+#define PTE_BAP_SHIFT	(2)
+
+/* On 32-bit, we never clear the top part of the PTE */
+#ifdef CONFIG_PPC32
+#define _PTE_NONE_MASK	0xffffffff00000000ULL
+#define _PMD_PRESENT	0
+#define _PMD_PRESENT_MASK (PAGE_MASK)
+#define _PMD_BAD	(~PAGE_MASK)
+#define _PMD_USER	0
+#else
+#define _PTE_NONE_MASK	0
+#endif
+
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
+#if defined(CONFIG_SMP)
+#define _PAGE_BASE	(_PAGE_BASE_NC | _PAGE_COHERENT)
+#else
+#define _PAGE_BASE	(_PAGE_BASE_NC)
+#endif
+
+/* Permission masks used to generate the __P and __S table */
+#define PAGE_NONE	__pgprot(_PAGE_BASE)
+#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_BAP_UX)
+#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_BAP_UX)
+#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_BAP_UX)
+
+#ifndef __ASSEMBLY__
+static inline pte_t pte_mkprivileged(pte_t pte)
+{
+	return __pte((pte_val(pte) & ~_PAGE_USER) | _PAGE_PRIVILEGED);
+}
+
+#define pte_mkprivileged pte_mkprivileged
+
+static inline pte_t pte_mkuser(pte_t pte)
+{
+	return __pte((pte_val(pte) & ~_PAGE_PRIVILEGED) | _PAGE_USER);
+}
+
+#define pte_mkuser pte_mkuser
+
+static inline pte_t pte_mkexec(pte_t pte)
+{
+	if (pte_val(pte) & _PAGE_BAP_UR)
+		return __pte((pte_val(pte) & ~_PAGE_BAP_SX) | _PAGE_BAP_UX);
+	else
+		return __pte((pte_val(pte) & ~_PAGE_BAP_UX) | _PAGE_BAP_SX);
+}
+#define pte_mkexec pte_mkexec
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+#endif /*  _ASM_POWERPC_NOHASH_PTE_E500_H */
diff --git a/arch/powerpc/include/asm/nohash/tlbflush.h b/arch/powerpc/include/asm/nohash/tlbflush.h
new file mode 100644
index 0000000000..9a2cf83ea4
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/tlbflush.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_TLBFLUSH_H
+#define _ASM_POWERPC_NOHASH_TLBFLUSH_H
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - local_flush_tlb_mm(mm, full) flushes the specified mm context on
+ *                           the local processor
+ *  - local_flush_tlb_page(vma, vmaddr) flushes one page on the local processor
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ *
+ */
+
+/*
+ * TLB flushing for software loaded TLB chips
+ *
+ * TODO: (CONFIG_PPC_85xx) determine if flush_tlb_range &
+ * flush_tlb_kernel_range are best implemented as tlbia vs
+ * specific tlbie's
+ */
+
+struct vm_area_struct;
+struct mm_struct;
+
+#define MMU_NO_CONTEXT      	((unsigned int)-1)
+
+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			    unsigned long end);
+
+#ifdef CONFIG_PPC_8xx
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	unsigned int pid = READ_ONCE(mm->context.id);
+
+	if (pid != MMU_NO_CONTEXT)
+		asm volatile ("sync; tlbia; isync" : : : "memory");
+}
+
+static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+	asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory");
+}
+
+static inline void local_flush_tlb_page_psize(struct mm_struct *mm,
+					      unsigned long vmaddr, int psize)
+{
+	asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory");
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	start &= PAGE_MASK;
+
+	if (end - start <= PAGE_SIZE)
+		asm volatile ("tlbie %0; sync" : : "r" (start) : "memory");
+	else
+		asm volatile ("sync; tlbia; isync" : : : "memory");
+}
+#else
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+extern void local_flush_tlb_mm(struct mm_struct *mm);
+extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+void local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize);
+
+extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+				   int tsize, int ind);
+#endif
+
+#ifdef CONFIG_SMP
+extern void flush_tlb_mm(struct mm_struct *mm);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+extern void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+			     int tsize, int ind);
+#else
+#define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
+#define flush_tlb_page(vma,addr)	local_flush_tlb_page(vma,addr)
+#define __flush_tlb_page(mm,addr,p,i)	__local_flush_tlb_page(mm,addr,p,i)
+#endif
+
+#endif /* _ASM_POWERPC_NOHASH_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h
new file mode 100644
index 0000000000..eda7fac350
--- /dev/null
+++ b/arch/powerpc/include/asm/nvram.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * NVRAM definitions and access functions.
+ */
+#ifndef _ASM_POWERPC_NVRAM_H
+#define _ASM_POWERPC_NVRAM_H
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <uapi/asm/nvram.h>
+
+/*
+ * Set oops header version to distinguish between old and new format header.
+ * lnx,oops-log partition max size is 4000, header version > 4000 will
+ * help in identifying new header.
+ */
+#define OOPS_HDR_VERSION 5000
+
+struct err_log_info {
+	__be32 error_type;
+	__be32 seq_num;
+};
+
+struct nvram_os_partition {
+	const char *name;
+	int req_size;	/* desired size, in bytes */
+	int min_size;	/* minimum acceptable size (0 means req_size) */
+	long size;	/* size of data portion (excluding err_log_info) */
+	long index;	/* offset of data portion of partition */
+	bool os_partition; /* partition initialized by OS, not FW */
+};
+
+struct oops_log_info {
+	__be16 version;
+	__be16 report_length;
+	__be64 timestamp;
+} __attribute__((packed));
+
+extern struct nvram_os_partition oops_log_partition;
+
+#ifdef CONFIG_PPC_PSERIES
+extern struct nvram_os_partition rtas_log_partition;
+
+extern int nvram_write_error_log(char * buff, int length,
+					 unsigned int err_type, unsigned int err_seq);
+extern int nvram_read_error_log(char * buff, int length,
+					 unsigned int * err_type, unsigned int *err_seq);
+extern int nvram_clear_error_log(void);
+extern int pSeries_nvram_init(void);
+#endif /* CONFIG_PPC_PSERIES */
+
+#ifdef CONFIG_MMIO_NVRAM
+extern int mmio_nvram_init(void);
+#else
+static inline int mmio_nvram_init(void)
+{
+	return -ENODEV;
+}
+#endif
+
+extern int __init nvram_scan_partitions(void);
+extern loff_t nvram_create_partition(const char *name, int sig,
+				     int req_size, int min_size);
+extern int nvram_remove_partition(const char *name, int sig,
+					const char *exceptions[]);
+extern int nvram_get_partition_size(loff_t data_index);
+extern loff_t nvram_find_partition(const char *name, int sig, int *out_size);
+
+/* Return partition offset in nvram */
+extern int	pmac_get_partition(int partition);
+
+/* Direct access to XPRAM on PowerMacs */
+extern u8	pmac_xpram_read(int xpaddr);
+extern void	pmac_xpram_write(int xpaddr, u8 data);
+
+/* Initialize NVRAM OS partition */
+extern int __init nvram_init_os_partition(struct nvram_os_partition *part);
+
+/* Initialize NVRAM oops partition */
+extern void __init nvram_init_oops_partition(int rtas_partition_exists);
+
+/* Read a NVRAM partition */
+extern int nvram_read_partition(struct nvram_os_partition *part, char *buff,
+				int length, unsigned int *err_type,
+				unsigned int *error_log_cnt);
+
+/* Write to NVRAM OS partition */
+extern int nvram_write_os_partition(struct nvram_os_partition *part,
+				    char *buff, int length,
+				    unsigned int err_type,
+				    unsigned int error_log_cnt);
+
+#endif /* _ASM_POWERPC_NVRAM_H */
diff --git a/arch/powerpc/include/asm/ohare.h b/arch/powerpc/include/asm/ohare.h
new file mode 100644
index 0000000000..da3371fc34
--- /dev/null
+++ b/arch/powerpc/include/asm/ohare.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_OHARE_H
+#define _ASM_POWERPC_OHARE_H
+#ifdef __KERNEL__
+/*
+ * ohare.h: definitions for using the "O'Hare" I/O controller chip.
+ *
+ * Copyright (C) 1997 Paul Mackerras.
+ *
+ * BenH: Changed to match those of heathrow (but not all of them). Please
+ *       check if I didn't break anything (especially the media bay).
+ */
+
+/* offset from ohare base for feature control register */
+#define OHARE_MBCR	0x34
+#define OHARE_FCR	0x38
+
+/*
+ * Bits in feature control register.
+ * These were mostly derived by experiment on a powerbook 3400
+ * and may differ for other machines.
+ */
+#define OH_SCC_RESET		1
+#define OH_BAY_POWER_N		2	/* a guess */
+#define OH_BAY_PCI_ENABLE	4	/* a guess */
+#define OH_BAY_IDE_ENABLE	8
+#define OH_BAY_FLOPPY_ENABLE	0x10
+#define OH_IDE0_ENABLE		0x20
+#define OH_IDE0_RESET_N		0x40	/* a guess */
+#define OH_BAY_DEV_MASK		0x1c
+#define OH_BAY_RESET_N		0x80
+#define OH_IOBUS_ENABLE		0x100	/* IOBUS seems to be IDE */
+#define OH_SCC_ENABLE		0x200
+#define OH_MESH_ENABLE		0x400
+#define OH_FLOPPY_ENABLE	0x800
+#define OH_SCCA_IO		0x4000
+#define OH_SCCB_IO		0x8000
+#define OH_VIA_ENABLE		0x10000	/* Is apparently wrong, to be verified */
+#define OH_IDE1_RESET_N		0x800000
+
+/*
+ * Bits to set in the feature control register on PowerBooks.
+ */
+#define PBOOK_FEATURES		(OH_IDE_ENABLE | OH_SCC_ENABLE | \
+				 OH_MESH_ENABLE | OH_SCCA_IO | OH_SCCB_IO)
+
+/*
+ * A magic value to put into the feature control register of the
+ * "ohare" I/O controller on Starmaxes to enable the IDE CD interface.
+ * Contributed by Harry Eaton.
+ */
+#define STARMAX_FEATURES	0xbeff7a
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_OHARE_H */
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
new file mode 100644
index 0000000000..a2bc4b95e7
--- /dev/null
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -0,0 +1,1188 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * OPAL API definitions.
+ *
+ * Copyright 2011-2015 IBM Corp.
+ */
+
+#ifndef __OPAL_API_H
+#define __OPAL_API_H
+
+/****** OPAL APIs ******/
+
+/* Return codes */
+#define OPAL_SUCCESS		0
+#define OPAL_PARAMETER		-1
+#define OPAL_BUSY		-2
+#define OPAL_PARTIAL		-3
+#define OPAL_CONSTRAINED	-4
+#define OPAL_CLOSED		-5
+#define OPAL_HARDWARE		-6
+#define OPAL_UNSUPPORTED	-7
+#define OPAL_PERMISSION		-8
+#define OPAL_NO_MEM		-9
+#define OPAL_RESOURCE		-10
+#define OPAL_INTERNAL_ERROR	-11
+#define OPAL_BUSY_EVENT		-12
+#define OPAL_HARDWARE_FROZEN	-13
+#define OPAL_WRONG_STATE	-14
+#define OPAL_ASYNC_COMPLETION	-15
+#define OPAL_EMPTY		-16
+#define OPAL_I2C_TIMEOUT	-17
+#define OPAL_I2C_INVALID_CMD	-18
+#define OPAL_I2C_LBUS_PARITY	-19
+#define OPAL_I2C_BKEND_OVERRUN	-20
+#define OPAL_I2C_BKEND_ACCESS	-21
+#define OPAL_I2C_ARBT_LOST	-22
+#define OPAL_I2C_NACK_RCVD	-23
+#define OPAL_I2C_STOP_ERR	-24
+#define OPAL_XIVE_PROVISIONING	-31
+#define OPAL_XIVE_FREE_ACTIVE	-32
+#define OPAL_TIMEOUT		-33
+
+/* API Tokens (in r0) */
+#define OPAL_INVALID_CALL		       -1
+#define OPAL_TEST				0
+#define OPAL_CONSOLE_WRITE			1
+#define OPAL_CONSOLE_READ			2
+#define OPAL_RTC_READ				3
+#define OPAL_RTC_WRITE				4
+#define OPAL_CEC_POWER_DOWN			5
+#define OPAL_CEC_REBOOT				6
+#define OPAL_READ_NVRAM				7
+#define OPAL_WRITE_NVRAM			8
+#define OPAL_HANDLE_INTERRUPT			9
+#define OPAL_POLL_EVENTS			10
+#define OPAL_PCI_SET_HUB_TCE_MEMORY		11
+#define OPAL_PCI_SET_PHB_TCE_MEMORY		12
+#define OPAL_PCI_CONFIG_READ_BYTE		13
+#define OPAL_PCI_CONFIG_READ_HALF_WORD  	14
+#define OPAL_PCI_CONFIG_READ_WORD		15
+#define OPAL_PCI_CONFIG_WRITE_BYTE		16
+#define OPAL_PCI_CONFIG_WRITE_HALF_WORD		17
+#define OPAL_PCI_CONFIG_WRITE_WORD		18
+#define OPAL_SET_XIVE				19
+#define OPAL_GET_XIVE				20
+#define OPAL_GET_COMPLETION_TOKEN_STATUS	21 /* obsolete */
+#define OPAL_REGISTER_OPAL_EXCEPTION_HANDLER	22
+#define OPAL_PCI_EEH_FREEZE_STATUS		23
+#define OPAL_PCI_SHPC				24
+#define OPAL_CONSOLE_WRITE_BUFFER_SPACE		25
+#define OPAL_PCI_EEH_FREEZE_CLEAR		26
+#define OPAL_PCI_PHB_MMIO_ENABLE		27
+#define OPAL_PCI_SET_PHB_MEM_WINDOW		28
+#define OPAL_PCI_MAP_PE_MMIO_WINDOW		29
+#define OPAL_PCI_SET_PHB_TABLE_MEMORY		30
+#define OPAL_PCI_SET_PE				31
+#define OPAL_PCI_SET_PELTV			32
+#define OPAL_PCI_SET_MVE			33
+#define OPAL_PCI_SET_MVE_ENABLE			34
+#define OPAL_PCI_GET_XIVE_REISSUE		35
+#define OPAL_PCI_SET_XIVE_REISSUE		36
+#define OPAL_PCI_SET_XIVE_PE			37
+#define OPAL_GET_XIVE_SOURCE			38
+#define OPAL_GET_MSI_32				39
+#define OPAL_GET_MSI_64				40
+#define OPAL_START_CPU				41
+#define OPAL_QUERY_CPU_STATUS			42
+#define OPAL_WRITE_OPPANEL			43 /* unimplemented */
+#define OPAL_PCI_MAP_PE_DMA_WINDOW		44
+#define OPAL_PCI_MAP_PE_DMA_WINDOW_REAL		45
+#define OPAL_PCI_RESET				49
+#define OPAL_PCI_GET_HUB_DIAG_DATA		50
+#define OPAL_PCI_GET_PHB_DIAG_DATA		51
+#define OPAL_PCI_FENCE_PHB			52
+#define OPAL_PCI_REINIT				53
+#define OPAL_PCI_MASK_PE_ERROR			54
+#define OPAL_SET_SLOT_LED_STATUS		55
+#define OPAL_GET_EPOW_STATUS			56
+#define OPAL_SET_SYSTEM_ATTENTION_LED		57
+#define OPAL_RESERVED1				58
+#define OPAL_RESERVED2				59
+#define OPAL_PCI_NEXT_ERROR			60
+#define OPAL_PCI_EEH_FREEZE_STATUS2		61
+#define OPAL_PCI_POLL				62
+#define OPAL_PCI_MSI_EOI			63
+#define OPAL_PCI_GET_PHB_DIAG_DATA2		64
+#define OPAL_XSCOM_READ				65
+#define OPAL_XSCOM_WRITE			66
+#define OPAL_LPC_READ				67
+#define OPAL_LPC_WRITE				68
+#define OPAL_RETURN_CPU				69
+#define OPAL_REINIT_CPUS			70
+#define OPAL_ELOG_READ				71
+#define OPAL_ELOG_WRITE				72
+#define OPAL_ELOG_ACK				73
+#define OPAL_ELOG_RESEND			74
+#define OPAL_ELOG_SIZE				75
+#define OPAL_FLASH_VALIDATE			76
+#define OPAL_FLASH_MANAGE			77
+#define OPAL_FLASH_UPDATE			78
+#define OPAL_RESYNC_TIMEBASE			79
+#define OPAL_CHECK_TOKEN			80
+#define OPAL_DUMP_INIT				81
+#define OPAL_DUMP_INFO				82
+#define OPAL_DUMP_READ				83
+#define OPAL_DUMP_ACK				84
+#define OPAL_GET_MSG				85
+#define OPAL_CHECK_ASYNC_COMPLETION		86
+#define OPAL_SYNC_HOST_REBOOT			87
+#define OPAL_SENSOR_READ			88
+#define OPAL_GET_PARAM				89
+#define OPAL_SET_PARAM				90
+#define OPAL_DUMP_RESEND			91
+#define OPAL_ELOG_SEND				92	/* Deprecated */
+#define OPAL_PCI_SET_PHB_CAPI_MODE		93
+#define OPAL_DUMP_INFO2				94
+#define OPAL_WRITE_OPPANEL_ASYNC		95
+#define OPAL_PCI_ERR_INJECT			96
+#define OPAL_PCI_EEH_FREEZE_SET			97
+#define OPAL_HANDLE_HMI				98
+#define OPAL_CONFIG_CPU_IDLE_STATE		99
+#define OPAL_SLW_SET_REG			100
+#define OPAL_REGISTER_DUMP_REGION		101
+#define OPAL_UNREGISTER_DUMP_REGION		102
+#define OPAL_WRITE_TPO				103
+#define OPAL_READ_TPO				104
+#define OPAL_GET_DPO_STATUS			105
+#define OPAL_OLD_I2C_REQUEST			106	/* Deprecated */
+#define OPAL_IPMI_SEND				107
+#define OPAL_IPMI_RECV				108
+#define OPAL_I2C_REQUEST			109
+#define OPAL_FLASH_READ				110
+#define OPAL_FLASH_WRITE			111
+#define OPAL_FLASH_ERASE			112
+#define OPAL_PRD_MSG				113
+#define OPAL_LEDS_GET_INDICATOR			114
+#define OPAL_LEDS_SET_INDICATOR			115
+#define OPAL_CEC_REBOOT2			116
+#define OPAL_CONSOLE_FLUSH			117
+#define OPAL_GET_DEVICE_TREE			118
+#define OPAL_PCI_GET_PRESENCE_STATE		119
+#define OPAL_PCI_GET_POWER_STATE		120
+#define OPAL_PCI_SET_POWER_STATE		121
+#define OPAL_INT_GET_XIRR			122
+#define	OPAL_INT_SET_CPPR			123
+#define OPAL_INT_EOI				124
+#define OPAL_INT_SET_MFRR			125
+#define OPAL_PCI_TCE_KILL			126
+#define OPAL_NMMU_SET_PTCR			127
+#define OPAL_XIVE_RESET				128
+#define OPAL_XIVE_GET_IRQ_INFO			129
+#define OPAL_XIVE_GET_IRQ_CONFIG		130
+#define OPAL_XIVE_SET_IRQ_CONFIG		131
+#define OPAL_XIVE_GET_QUEUE_INFO		132
+#define OPAL_XIVE_SET_QUEUE_INFO		133
+#define OPAL_XIVE_DONATE_PAGE			134
+#define OPAL_XIVE_ALLOCATE_VP_BLOCK		135
+#define OPAL_XIVE_FREE_VP_BLOCK			136
+#define OPAL_XIVE_GET_VP_INFO			137
+#define OPAL_XIVE_SET_VP_INFO			138
+#define OPAL_XIVE_ALLOCATE_IRQ			139
+#define OPAL_XIVE_FREE_IRQ			140
+#define OPAL_XIVE_SYNC				141
+#define OPAL_XIVE_DUMP				142
+#define OPAL_XIVE_GET_QUEUE_STATE		143
+#define OPAL_XIVE_SET_QUEUE_STATE		144
+#define OPAL_SIGNAL_SYSTEM_RESET		145
+#define OPAL_NPU_INIT_CONTEXT			146
+#define OPAL_NPU_DESTROY_CONTEXT		147
+#define OPAL_NPU_MAP_LPAR			148
+#define OPAL_IMC_COUNTERS_INIT			149
+#define OPAL_IMC_COUNTERS_START			150
+#define OPAL_IMC_COUNTERS_STOP			151
+#define OPAL_GET_POWERCAP			152
+#define OPAL_SET_POWERCAP			153
+#define OPAL_GET_POWER_SHIFT_RATIO		154
+#define OPAL_SET_POWER_SHIFT_RATIO		155
+#define OPAL_SENSOR_GROUP_CLEAR			156
+#define OPAL_PCI_SET_P2P			157
+#define OPAL_QUIESCE				158
+#define OPAL_NPU_SPA_SETUP			159
+#define OPAL_NPU_SPA_CLEAR_CACHE		160
+#define OPAL_NPU_TL_SET				161
+#define OPAL_SENSOR_READ_U64			162
+#define OPAL_SENSOR_GROUP_ENABLE		163
+#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR		164
+#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR		165
+#define OPAL_HANDLE_HMI2			166
+#define	OPAL_NX_COPROC_INIT			167
+#define OPAL_XIVE_GET_VP_STATE			170
+#define OPAL_MPIPL_UPDATE			173
+#define OPAL_MPIPL_REGISTER_TAG			174
+#define OPAL_MPIPL_QUERY_TAG			175
+#define OPAL_SECVAR_GET				176
+#define OPAL_SECVAR_GET_NEXT			177
+#define OPAL_SECVAR_ENQUEUE_UPDATE		178
+#define OPAL_LAST				178
+
+#define QUIESCE_HOLD			1 /* Spin all calls at entry */
+#define QUIESCE_REJECT			2 /* Fail all calls with OPAL_BUSY */
+#define QUIESCE_LOCK_BREAK		3 /* Set to ignore locks. */
+#define QUIESCE_RESUME			4 /* Un-quiesce */
+#define QUIESCE_RESUME_FAST_REBOOT	5 /* Un-quiesce, fast reboot */
+
+/* Device tree flags */
+
+/*
+ * Flags set in power-mgmt nodes in device tree describing
+ * idle states that are supported in the platform.
+ */
+
+#define OPAL_PM_TIMEBASE_STOP		0x00000002
+#define OPAL_PM_LOSE_HYP_CONTEXT	0x00002000
+#define OPAL_PM_LOSE_FULL_CONTEXT	0x00004000
+#define OPAL_PM_NAP_ENABLED		0x00010000
+#define OPAL_PM_SLEEP_ENABLED		0x00020000
+#define OPAL_PM_WINKLE_ENABLED		0x00040000
+#define OPAL_PM_SLEEP_ENABLED_ER1	0x00080000 /* with workaround */
+#define OPAL_PM_STOP_INST_FAST		0x00100000
+#define OPAL_PM_STOP_INST_DEEP		0x00200000
+
+/*
+ * OPAL_CONFIG_CPU_IDLE_STATE parameters
+ */
+#define OPAL_CONFIG_IDLE_FASTSLEEP	1
+#define OPAL_CONFIG_IDLE_UNDO		0
+#define OPAL_CONFIG_IDLE_APPLY		1
+
+#ifndef __ASSEMBLY__
+
+/* Other enums */
+enum OpalFreezeState {
+	OPAL_EEH_STOPPED_NOT_FROZEN = 0,
+	OPAL_EEH_STOPPED_MMIO_FREEZE = 1,
+	OPAL_EEH_STOPPED_DMA_FREEZE = 2,
+	OPAL_EEH_STOPPED_MMIO_DMA_FREEZE = 3,
+	OPAL_EEH_STOPPED_RESET = 4,
+	OPAL_EEH_STOPPED_TEMP_UNAVAIL = 5,
+	OPAL_EEH_STOPPED_PERM_UNAVAIL = 6
+};
+
+enum OpalEehFreezeActionToken {
+	OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO = 1,
+	OPAL_EEH_ACTION_CLEAR_FREEZE_DMA = 2,
+	OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3,
+
+	OPAL_EEH_ACTION_SET_FREEZE_MMIO = 1,
+	OPAL_EEH_ACTION_SET_FREEZE_DMA  = 2,
+	OPAL_EEH_ACTION_SET_FREEZE_ALL  = 3
+};
+
+enum OpalPciStatusToken {
+	OPAL_EEH_NO_ERROR	= 0,
+	OPAL_EEH_IOC_ERROR	= 1,
+	OPAL_EEH_PHB_ERROR	= 2,
+	OPAL_EEH_PE_ERROR	= 3,
+	OPAL_EEH_PE_MMIO_ERROR	= 4,
+	OPAL_EEH_PE_DMA_ERROR	= 5
+};
+
+enum OpalPciErrorSeverity {
+	OPAL_EEH_SEV_NO_ERROR	= 0,
+	OPAL_EEH_SEV_IOC_DEAD	= 1,
+	OPAL_EEH_SEV_PHB_DEAD	= 2,
+	OPAL_EEH_SEV_PHB_FENCED	= 3,
+	OPAL_EEH_SEV_PE_ER	= 4,
+	OPAL_EEH_SEV_INF	= 5
+};
+
+enum OpalErrinjectType {
+	OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR	= 0,
+	OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64	= 1,
+};
+
+enum OpalErrinjectFunc {
+	/* IOA bus specific errors */
+	OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR	= 0,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_DATA	= 1,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_IO_ADDR	= 2,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_IO_DATA	= 3,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_ADDR	= 4,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_DATA	= 5,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_ADDR	= 6,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_DATA	= 7,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_IO_ADDR	= 8,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_IO_DATA	= 9,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_ADDR	= 10,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_DATA	= 11,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_ADDR	= 12,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_DATA	= 13,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_MASTER	= 14,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_TARGET	= 15,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_ADDR	= 16,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_DATA	= 17,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_MASTER	= 18,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET	= 19,
+};
+
+enum OpalMmioWindowType {
+	OPAL_M32_WINDOW_TYPE = 1,
+	OPAL_M64_WINDOW_TYPE = 2,
+	OPAL_IO_WINDOW_TYPE  = 3
+};
+
+enum OpalExceptionHandler {
+	OPAL_MACHINE_CHECK_HANDLER	    = 1,
+	OPAL_HYPERVISOR_MAINTENANCE_HANDLER = 2,
+	OPAL_SOFTPATCH_HANDLER		    = 3
+};
+
+enum OpalPendingState {
+	OPAL_EVENT_OPAL_INTERNAL   = 0x1,
+	OPAL_EVENT_NVRAM	   = 0x2,
+	OPAL_EVENT_RTC		   = 0x4,
+	OPAL_EVENT_CONSOLE_OUTPUT  = 0x8,
+	OPAL_EVENT_CONSOLE_INPUT   = 0x10,
+	OPAL_EVENT_ERROR_LOG_AVAIL = 0x20,
+	OPAL_EVENT_ERROR_LOG	   = 0x40,
+	OPAL_EVENT_EPOW		   = 0x80,
+	OPAL_EVENT_LED_STATUS	   = 0x100,
+	OPAL_EVENT_PCI_ERROR	   = 0x200,
+	OPAL_EVENT_DUMP_AVAIL	   = 0x400,
+	OPAL_EVENT_MSG_PENDING	   = 0x800,
+};
+
+enum OpalThreadStatus {
+	OPAL_THREAD_INACTIVE = 0x0,
+	OPAL_THREAD_STARTED = 0x1,
+	OPAL_THREAD_UNAVAILABLE = 0x2 /* opal-v3 */
+};
+
+enum OpalPciBusCompare {
+	OpalPciBusAny	= 0,	/* Any bus number match */
+	OpalPciBus3Bits	= 2,	/* Match top 3 bits of bus number */
+	OpalPciBus4Bits	= 3,	/* Match top 4 bits of bus number */
+	OpalPciBus5Bits	= 4,	/* Match top 5 bits of bus number */
+	OpalPciBus6Bits	= 5,	/* Match top 6 bits of bus number */
+	OpalPciBus7Bits	= 6,	/* Match top 7 bits of bus number */
+	OpalPciBusAll	= 7,	/* Match bus number exactly */
+};
+
+enum OpalDeviceCompare {
+	OPAL_IGNORE_RID_DEVICE_NUMBER = 0,
+	OPAL_COMPARE_RID_DEVICE_NUMBER = 1
+};
+
+enum OpalFuncCompare {
+	OPAL_IGNORE_RID_FUNCTION_NUMBER = 0,
+	OPAL_COMPARE_RID_FUNCTION_NUMBER = 1
+};
+
+enum OpalPeAction {
+	OPAL_UNMAP_PE = 0,
+	OPAL_MAP_PE = 1
+};
+
+enum OpalPeltvAction {
+	OPAL_REMOVE_PE_FROM_DOMAIN = 0,
+	OPAL_ADD_PE_TO_DOMAIN = 1
+};
+
+enum OpalMveEnableAction {
+	OPAL_DISABLE_MVE = 0,
+	OPAL_ENABLE_MVE = 1
+};
+
+enum OpalM64Action {
+	OPAL_DISABLE_M64 = 0,
+	OPAL_ENABLE_M64_SPLIT = 1,
+	OPAL_ENABLE_M64_NON_SPLIT = 2
+};
+
+enum OpalPciResetScope {
+	OPAL_RESET_PHB_COMPLETE		= 1,
+	OPAL_RESET_PCI_LINK		= 2,
+	OPAL_RESET_PHB_ERROR		= 3,
+	OPAL_RESET_PCI_HOT		= 4,
+	OPAL_RESET_PCI_FUNDAMENTAL	= 5,
+	OPAL_RESET_PCI_IODA_TABLE	= 6
+};
+
+enum OpalPciReinitScope {
+	/*
+	 * Note: we chose values that do not overlap
+	 * OpalPciResetScope as OPAL v2 used the same
+	 * enum for both
+	 */
+	OPAL_REINIT_PCI_DEV = 1000
+};
+
+enum OpalPciResetState {
+	OPAL_DEASSERT_RESET = 0,
+	OPAL_ASSERT_RESET   = 1
+};
+
+enum OpalPciSlotPresence {
+	OPAL_PCI_SLOT_EMPTY	= 0,
+	OPAL_PCI_SLOT_PRESENT	= 1
+};
+
+enum OpalPciSlotPower {
+	OPAL_PCI_SLOT_POWER_OFF	= 0,
+	OPAL_PCI_SLOT_POWER_ON	= 1,
+	OPAL_PCI_SLOT_OFFLINE	= 2,
+	OPAL_PCI_SLOT_ONLINE	= 3
+};
+
+enum OpalSlotLedType {
+	OPAL_SLOT_LED_TYPE_ID = 0,	/* IDENTIFY LED */
+	OPAL_SLOT_LED_TYPE_FAULT = 1,	/* FAULT LED */
+	OPAL_SLOT_LED_TYPE_ATTN = 2,	/* System Attention LED */
+	OPAL_SLOT_LED_TYPE_MAX = 3
+};
+
+enum OpalSlotLedState {
+	OPAL_SLOT_LED_STATE_OFF = 0,	/* LED is OFF */
+	OPAL_SLOT_LED_STATE_ON = 1	/* LED is ON */
+};
+
+/*
+ * Address cycle types for LPC accesses. These also correspond
+ * to the content of the first cell of the "reg" property for
+ * device nodes on the LPC bus
+ */
+enum OpalLPCAddressType {
+	OPAL_LPC_MEM	= 0,
+	OPAL_LPC_IO	= 1,
+	OPAL_LPC_FW	= 2,
+};
+
+enum opal_msg_type {
+	OPAL_MSG_ASYNC_COMP	= 0,	/* params[0] = token, params[1] = rc,
+					 * additional params function-specific
+					 */
+	OPAL_MSG_MEM_ERR	= 1,
+	OPAL_MSG_EPOW		= 2,
+	OPAL_MSG_SHUTDOWN	= 3,	/* params[0] = 1 reboot, 0 shutdown */
+	OPAL_MSG_HMI_EVT	= 4,
+	OPAL_MSG_DPO		= 5,
+	OPAL_MSG_PRD		= 6,
+	OPAL_MSG_OCC		= 7,
+	OPAL_MSG_PRD2		= 8,
+	OPAL_MSG_TYPE_MAX,
+};
+
+struct opal_msg {
+	__be32 msg_type;
+	__be32 reserved;
+	__be64 params[8];
+};
+
+/* System parameter permission */
+enum OpalSysparamPerm {
+	OPAL_SYSPARAM_READ  = 0x1,
+	OPAL_SYSPARAM_WRITE = 0x2,
+	OPAL_SYSPARAM_RW    = (OPAL_SYSPARAM_READ | OPAL_SYSPARAM_WRITE),
+};
+
+enum {
+	OPAL_IPMI_MSG_FORMAT_VERSION_1 = 1,
+};
+
+struct opal_ipmi_msg {
+	uint8_t version;
+	uint8_t netfn;
+	uint8_t cmd;
+	uint8_t data[];
+};
+
+/* FSP memory errors handling */
+enum OpalMemErr_Version {
+	OpalMemErr_V1 = 1,
+};
+
+enum OpalMemErrType {
+	OPAL_MEM_ERR_TYPE_RESILIENCE	= 0,
+	OPAL_MEM_ERR_TYPE_DYN_DALLOC,
+};
+
+/* Memory Reilience error type */
+enum OpalMemErr_ResilErrType {
+	OPAL_MEM_RESILIENCE_CE		= 0,
+	OPAL_MEM_RESILIENCE_UE,
+	OPAL_MEM_RESILIENCE_UE_SCRUB,
+};
+
+/* Dynamic Memory Deallocation type */
+enum OpalMemErr_DynErrType {
+	OPAL_MEM_DYNAMIC_DEALLOC	= 0,
+};
+
+struct OpalMemoryErrorData {
+	enum OpalMemErr_Version	version:8;	/* 0x00 */
+	enum OpalMemErrType	type:8;		/* 0x01 */
+	__be16			flags;		/* 0x02 */
+	uint8_t			reserved_1[4];	/* 0x04 */
+
+	union {
+		/* Memory Resilience corrected/uncorrected error info */
+		struct {
+			enum OpalMemErr_ResilErrType	resil_err_type:8;
+			uint8_t				reserved_1[7];
+			__be64				physical_address_start;
+			__be64				physical_address_end;
+		} resilience;
+		/* Dynamic memory deallocation error info */
+		struct {
+			enum OpalMemErr_DynErrType	dyn_err_type:8;
+			uint8_t				reserved_1[7];
+			__be64				physical_address_start;
+			__be64				physical_address_end;
+		} dyn_dealloc;
+	} u;
+};
+
+/* HMI interrupt event */
+enum OpalHMI_Version {
+	OpalHMIEvt_V1 = 1,
+	OpalHMIEvt_V2 = 2,
+};
+
+enum OpalHMI_Severity {
+	OpalHMI_SEV_NO_ERROR = 0,
+	OpalHMI_SEV_WARNING = 1,
+	OpalHMI_SEV_ERROR_SYNC = 2,
+	OpalHMI_SEV_FATAL = 3,
+};
+
+enum OpalHMI_Disposition {
+	OpalHMI_DISPOSITION_RECOVERED = 0,
+	OpalHMI_DISPOSITION_NOT_RECOVERED = 1,
+};
+
+enum OpalHMI_ErrType {
+	OpalHMI_ERROR_MALFUNC_ALERT	= 0,
+	OpalHMI_ERROR_PROC_RECOV_DONE,
+	OpalHMI_ERROR_PROC_RECOV_DONE_AGAIN,
+	OpalHMI_ERROR_PROC_RECOV_MASKED,
+	OpalHMI_ERROR_TFAC,
+	OpalHMI_ERROR_TFMR_PARITY,
+	OpalHMI_ERROR_HA_OVERFLOW_WARN,
+	OpalHMI_ERROR_XSCOM_FAIL,
+	OpalHMI_ERROR_XSCOM_DONE,
+	OpalHMI_ERROR_SCOM_FIR,
+	OpalHMI_ERROR_DEBUG_TRIG_FIR,
+	OpalHMI_ERROR_HYP_RESOURCE,
+	OpalHMI_ERROR_CAPP_RECOVERY,
+};
+
+enum OpalHMI_XstopType {
+	CHECKSTOP_TYPE_UNKNOWN	=	0,
+	CHECKSTOP_TYPE_CORE	=	1,
+	CHECKSTOP_TYPE_NX	=	2,
+	CHECKSTOP_TYPE_NPU	=	3
+};
+
+enum OpalHMI_CoreXstopReason {
+	CORE_CHECKSTOP_IFU_REGFILE		= 0x00000001,
+	CORE_CHECKSTOP_IFU_LOGIC		= 0x00000002,
+	CORE_CHECKSTOP_PC_DURING_RECOV		= 0x00000004,
+	CORE_CHECKSTOP_ISU_REGFILE		= 0x00000008,
+	CORE_CHECKSTOP_ISU_LOGIC		= 0x00000010,
+	CORE_CHECKSTOP_FXU_LOGIC		= 0x00000020,
+	CORE_CHECKSTOP_VSU_LOGIC		= 0x00000040,
+	CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE	= 0x00000080,
+	CORE_CHECKSTOP_LSU_REGFILE		= 0x00000100,
+	CORE_CHECKSTOP_PC_FWD_PROGRESS		= 0x00000200,
+	CORE_CHECKSTOP_LSU_LOGIC		= 0x00000400,
+	CORE_CHECKSTOP_PC_LOGIC			= 0x00000800,
+	CORE_CHECKSTOP_PC_HYP_RESOURCE		= 0x00001000,
+	CORE_CHECKSTOP_PC_HANG_RECOV_FAILED	= 0x00002000,
+	CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED	= 0x00004000,
+	CORE_CHECKSTOP_PC_DEBUG_TRIG_ERR_INJ	= 0x00008000,
+	CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ	= 0x00010000,
+};
+
+enum OpalHMI_NestAccelXstopReason {
+	NX_CHECKSTOP_SHM_INVAL_STATE_ERR	= 0x00000001,
+	NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1	= 0x00000002,
+	NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2	= 0x00000004,
+	NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR	= 0x00000008,
+	NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR	= 0x00000010,
+	NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR	= 0x00000020,
+	NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR	= 0x00000040,
+	NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR	= 0x00000080,
+	NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR	= 0x00000100,
+	NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR	= 0x00000200,
+	NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR	= 0x00000400,
+	NX_CHECKSTOP_DMA_CRB_UE			= 0x00000800,
+	NX_CHECKSTOP_DMA_CRB_SUE		= 0x00001000,
+	NX_CHECKSTOP_PBI_ISN_UE			= 0x00002000,
+};
+
+struct OpalHMIEvent {
+	uint8_t		version;	/* 0x00 */
+	uint8_t		severity;	/* 0x01 */
+	uint8_t		type;		/* 0x02 */
+	uint8_t		disposition;	/* 0x03 */
+	uint8_t		reserved_1[4];	/* 0x04 */
+
+	__be64		hmer;
+	/* TFMR register. Valid only for TFAC and TFMR_PARITY error type. */
+	__be64		tfmr;
+
+	/* version 2 and later */
+	union {
+		/*
+		 * checkstop info (Core/NX).
+		 * Valid for OpalHMI_ERROR_MALFUNC_ALERT.
+		 */
+		struct {
+			uint8_t	xstop_type;	/* enum OpalHMI_XstopType */
+			uint8_t reserved_1[3];
+			__be32  xstop_reason;
+			union {
+				__be32 pir;	/* for CHECKSTOP_TYPE_CORE */
+				__be32 chip_id;	/* for CHECKSTOP_TYPE_NX */
+			} u;
+		} xstop_error;
+	} u;
+};
+
+/* OPAL_HANDLE_HMI2 out_flags */
+enum {
+	OPAL_HMI_FLAGS_TB_RESYNC	= (1ull << 0), /* Timebase has been resynced */
+	OPAL_HMI_FLAGS_DEC_LOST		= (1ull << 1), /* DEC lost, needs to be reprogrammed */
+	OPAL_HMI_FLAGS_HDEC_LOST	= (1ull << 2), /* HDEC lost, needs to be reprogrammed */
+	OPAL_HMI_FLAGS_TOD_TB_FAIL	= (1ull << 3), /* TOD/TB recovery failed. */
+	OPAL_HMI_FLAGS_NEW_EVENT	= (1ull << 63), /* An event has been created */
+};
+
+enum {
+	OPAL_P7IOC_DIAG_TYPE_NONE	= 0,
+	OPAL_P7IOC_DIAG_TYPE_RGC	= 1,
+	OPAL_P7IOC_DIAG_TYPE_BI		= 2,
+	OPAL_P7IOC_DIAG_TYPE_CI		= 3,
+	OPAL_P7IOC_DIAG_TYPE_MISC	= 4,
+	OPAL_P7IOC_DIAG_TYPE_I2C	= 5,
+	OPAL_P7IOC_DIAG_TYPE_LAST	= 6
+};
+
+struct OpalIoP7IOCErrorData {
+	__be16 type;
+
+	/* GEM */
+	__be64 gemXfir;
+	__be64 gemRfir;
+	__be64 gemRirqfir;
+	__be64 gemMask;
+	__be64 gemRwof;
+
+	/* LEM */
+	__be64 lemFir;
+	__be64 lemErrMask;
+	__be64 lemAction0;
+	__be64 lemAction1;
+	__be64 lemWof;
+
+	union {
+		struct OpalIoP7IOCRgcErrorData {
+			__be64 rgcStatus;	/* 3E1C10 */
+			__be64 rgcLdcp;		/* 3E1C18 */
+		}rgc;
+		struct OpalIoP7IOCBiErrorData {
+			__be64 biLdcp0;		/* 3C0100, 3C0118 */
+			__be64 biLdcp1;		/* 3C0108, 3C0120 */
+			__be64 biLdcp2;		/* 3C0110, 3C0128 */
+			__be64 biFenceStatus;	/* 3C0130, 3C0130 */
+
+			uint8_t biDownbound;	/* BI Downbound or Upbound */
+		}bi;
+		struct OpalIoP7IOCCiErrorData {
+			__be64 ciPortStatus;	/* 3Dn008 */
+			__be64 ciPortLdcp;	/* 3Dn010 */
+
+			uint8_t ciPort;		/* Index of CI port: 0/1 */
+		}ci;
+	};
+};
+
+/**
+ * This structure defines the overlay which will be used to store PHB error
+ * data upon request.
+ */
+enum {
+	OPAL_PHB_ERROR_DATA_VERSION_1 = 1,
+};
+
+enum {
+	OPAL_PHB_ERROR_DATA_TYPE_P7IOC = 1,
+	OPAL_PHB_ERROR_DATA_TYPE_PHB3 = 2,
+	OPAL_PHB_ERROR_DATA_TYPE_PHB4 = 3
+};
+
+enum {
+	OPAL_P7IOC_NUM_PEST_REGS = 128,
+	OPAL_PHB3_NUM_PEST_REGS = 256,
+	OPAL_PHB4_NUM_PEST_REGS = 512
+};
+
+struct OpalIoPhbErrorCommon {
+	__be32 version;
+	__be32 ioType;
+	__be32 len;
+};
+
+struct OpalIoP7IOCPhbErrorData {
+	struct OpalIoPhbErrorCommon common;
+
+	__be32 brdgCtl;
+
+	// P7IOC utl regs
+	__be32 portStatusReg;
+	__be32 rootCmplxStatus;
+	__be32 busAgentStatus;
+
+	// P7IOC cfg regs
+	__be32 deviceStatus;
+	__be32 slotStatus;
+	__be32 linkStatus;
+	__be32 devCmdStatus;
+	__be32 devSecStatus;
+
+	// cfg AER regs
+	__be32 rootErrorStatus;
+	__be32 uncorrErrorStatus;
+	__be32 corrErrorStatus;
+	__be32 tlpHdr1;
+	__be32 tlpHdr2;
+	__be32 tlpHdr3;
+	__be32 tlpHdr4;
+	__be32 sourceId;
+
+	__be32 rsv3;
+
+	// Record data about the call to allocate a buffer.
+	__be64 errorClass;
+	__be64 correlator;
+
+	//P7IOC MMIO Error Regs
+	__be64 p7iocPlssr;                // n120
+	__be64 p7iocCsr;                  // n110
+	__be64 lemFir;                    // nC00
+	__be64 lemErrorMask;              // nC18
+	__be64 lemWOF;                    // nC40
+	__be64 phbErrorStatus;            // nC80
+	__be64 phbFirstErrorStatus;       // nC88
+	__be64 phbErrorLog0;              // nCC0
+	__be64 phbErrorLog1;              // nCC8
+	__be64 mmioErrorStatus;           // nD00
+	__be64 mmioFirstErrorStatus;      // nD08
+	__be64 mmioErrorLog0;             // nD40
+	__be64 mmioErrorLog1;             // nD48
+	__be64 dma0ErrorStatus;           // nD80
+	__be64 dma0FirstErrorStatus;      // nD88
+	__be64 dma0ErrorLog0;             // nDC0
+	__be64 dma0ErrorLog1;             // nDC8
+	__be64 dma1ErrorStatus;           // nE00
+	__be64 dma1FirstErrorStatus;      // nE08
+	__be64 dma1ErrorLog0;             // nE40
+	__be64 dma1ErrorLog1;             // nE48
+	__be64 pestA[OPAL_P7IOC_NUM_PEST_REGS];
+	__be64 pestB[OPAL_P7IOC_NUM_PEST_REGS];
+};
+
+struct OpalIoPhb3ErrorData {
+	struct OpalIoPhbErrorCommon common;
+
+	__be32 brdgCtl;
+
+	/* PHB3 UTL regs */
+	__be32 portStatusReg;
+	__be32 rootCmplxStatus;
+	__be32 busAgentStatus;
+
+	/* PHB3 cfg regs */
+	__be32 deviceStatus;
+	__be32 slotStatus;
+	__be32 linkStatus;
+	__be32 devCmdStatus;
+	__be32 devSecStatus;
+
+	/* cfg AER regs */
+	__be32 rootErrorStatus;
+	__be32 uncorrErrorStatus;
+	__be32 corrErrorStatus;
+	__be32 tlpHdr1;
+	__be32 tlpHdr2;
+	__be32 tlpHdr3;
+	__be32 tlpHdr4;
+	__be32 sourceId;
+
+	__be32 rsv3;
+
+	/* Record data about the call to allocate a buffer */
+	__be64 errorClass;
+	__be64 correlator;
+
+	/* PHB3 MMIO Error Regs */
+	__be64 nFir;			/* 000 */
+	__be64 nFirMask;		/* 003 */
+	__be64 nFirWOF;		/* 008 */
+	__be64 phbPlssr;		/* 120 */
+	__be64 phbCsr;		/* 110 */
+	__be64 lemFir;		/* C00 */
+	__be64 lemErrorMask;		/* C18 */
+	__be64 lemWOF;		/* C40 */
+	__be64 phbErrorStatus;	/* C80 */
+	__be64 phbFirstErrorStatus;	/* C88 */
+	__be64 phbErrorLog0;		/* CC0 */
+	__be64 phbErrorLog1;		/* CC8 */
+	__be64 mmioErrorStatus;	/* D00 */
+	__be64 mmioFirstErrorStatus;	/* D08 */
+	__be64 mmioErrorLog0;		/* D40 */
+	__be64 mmioErrorLog1;		/* D48 */
+	__be64 dma0ErrorStatus;	/* D80 */
+	__be64 dma0FirstErrorStatus;	/* D88 */
+	__be64 dma0ErrorLog0;		/* DC0 */
+	__be64 dma0ErrorLog1;		/* DC8 */
+	__be64 dma1ErrorStatus;	/* E00 */
+	__be64 dma1FirstErrorStatus;	/* E08 */
+	__be64 dma1ErrorLog0;		/* E40 */
+	__be64 dma1ErrorLog1;		/* E48 */
+	__be64 pestA[OPAL_PHB3_NUM_PEST_REGS];
+	__be64 pestB[OPAL_PHB3_NUM_PEST_REGS];
+};
+
+struct OpalIoPhb4ErrorData {
+	struct OpalIoPhbErrorCommon common;
+
+	__be32 brdgCtl;
+
+	/* PHB4 cfg regs */
+	__be32 deviceStatus;
+	__be32 slotStatus;
+	__be32 linkStatus;
+	__be32 devCmdStatus;
+	__be32 devSecStatus;
+
+	/* cfg AER regs */
+	__be32 rootErrorStatus;
+	__be32 uncorrErrorStatus;
+	__be32 corrErrorStatus;
+	__be32 tlpHdr1;
+	__be32 tlpHdr2;
+	__be32 tlpHdr3;
+	__be32 tlpHdr4;
+	__be32 sourceId;
+
+	/* PHB4 ETU Error Regs */
+	__be64 nFir;				/* 000 */
+	__be64 nFirMask;			/* 003 */
+	__be64 nFirWOF;				/* 008 */
+	__be64 phbPlssr;			/* 120 */
+	__be64 phbCsr;				/* 110 */
+	__be64 lemFir;				/* C00 */
+	__be64 lemErrorMask;			/* C18 */
+	__be64 lemWOF;				/* C40 */
+	__be64 phbErrorStatus;			/* C80 */
+	__be64 phbFirstErrorStatus;		/* C88 */
+	__be64 phbErrorLog0;			/* CC0 */
+	__be64 phbErrorLog1;			/* CC8 */
+	__be64 phbTxeErrorStatus;		/* D00 */
+	__be64 phbTxeFirstErrorStatus;		/* D08 */
+	__be64 phbTxeErrorLog0;			/* D40 */
+	__be64 phbTxeErrorLog1;			/* D48 */
+	__be64 phbRxeArbErrorStatus;		/* D80 */
+	__be64 phbRxeArbFirstErrorStatus;	/* D88 */
+	__be64 phbRxeArbErrorLog0;		/* DC0 */
+	__be64 phbRxeArbErrorLog1;		/* DC8 */
+	__be64 phbRxeMrgErrorStatus;		/* E00 */
+	__be64 phbRxeMrgFirstErrorStatus;	/* E08 */
+	__be64 phbRxeMrgErrorLog0;		/* E40 */
+	__be64 phbRxeMrgErrorLog1;		/* E48 */
+	__be64 phbRxeTceErrorStatus;		/* E80 */
+	__be64 phbRxeTceFirstErrorStatus;	/* E88 */
+	__be64 phbRxeTceErrorLog0;		/* EC0 */
+	__be64 phbRxeTceErrorLog1;		/* EC8 */
+
+	/* PHB4 REGB Error Regs */
+	__be64 phbPblErrorStatus;		/* 1900 */
+	__be64 phbPblFirstErrorStatus;		/* 1908 */
+	__be64 phbPblErrorLog0;			/* 1940 */
+	__be64 phbPblErrorLog1;			/* 1948 */
+	__be64 phbPcieDlpErrorLog1;		/* 1AA0 */
+	__be64 phbPcieDlpErrorLog2;		/* 1AA8 */
+	__be64 phbPcieDlpErrorStatus;		/* 1AB0 */
+	__be64 phbRegbErrorStatus;		/* 1C00 */
+	__be64 phbRegbFirstErrorStatus;		/* 1C08 */
+	__be64 phbRegbErrorLog0;		/* 1C40 */
+	__be64 phbRegbErrorLog1;		/* 1C48 */
+
+	__be64 pestA[OPAL_PHB4_NUM_PEST_REGS];
+	__be64 pestB[OPAL_PHB4_NUM_PEST_REGS];
+};
+
+enum {
+	OPAL_REINIT_CPUS_HILE_BE	= (1 << 0),
+	OPAL_REINIT_CPUS_HILE_LE	= (1 << 1),
+
+	/* These two define the base MMU mode of the host on P9
+	 *
+	 * On P9 Nimbus DD2.0 and Cumlus (and later), KVM can still
+	 * create hash guests in "radix" mode with care (full core
+	 * switch only).
+	 */
+	OPAL_REINIT_CPUS_MMU_HASH	= (1 << 2),
+	OPAL_REINIT_CPUS_MMU_RADIX	= (1 << 3),
+
+	OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED = (1 << 4),
+};
+
+typedef struct oppanel_line {
+	__be64 line;
+	__be64 line_len;
+} oppanel_line_t;
+
+enum opal_prd_msg_type {
+	OPAL_PRD_MSG_TYPE_INIT = 0,	/* HBRT --> OPAL */
+	OPAL_PRD_MSG_TYPE_FINI,		/* HBRT/kernel --> OPAL */
+	OPAL_PRD_MSG_TYPE_ATTN,		/* HBRT <-- OPAL */
+	OPAL_PRD_MSG_TYPE_ATTN_ACK,	/* HBRT --> OPAL */
+	OPAL_PRD_MSG_TYPE_OCC_ERROR,	/* HBRT <-- OPAL */
+	OPAL_PRD_MSG_TYPE_OCC_RESET,	/* HBRT <-- OPAL */
+};
+
+struct opal_prd_msg_header {
+	uint8_t		type;
+	uint8_t		pad[1];
+	__be16		size;
+};
+
+struct opal_prd_msg;
+
+#define OCC_RESET                       0
+#define OCC_LOAD                        1
+#define OCC_THROTTLE                    2
+#define OCC_MAX_THROTTLE_STATUS         5
+
+struct opal_occ_msg {
+	__be64 type;
+	__be64 chip;
+	__be64 throttle_status;
+};
+
+/*
+ * SG entries
+ *
+ * WARNING: The current implementation requires each entry
+ * to represent a block that is 4k aligned *and* each block
+ * size except the last one in the list to be as well.
+ */
+struct opal_sg_entry {
+	__be64 data;
+	__be64 length;
+};
+
+/*
+ * Candidate image SG list.
+ *
+ * length = VER | length
+ */
+struct opal_sg_list {
+	__be64 length;
+	__be64 next;
+	struct opal_sg_entry entry[];
+};
+
+/*
+ * Dump region ID range usable by the OS
+ */
+#define OPAL_DUMP_REGION_HOST_START		0x80
+#define OPAL_DUMP_REGION_LOG_BUF		0x80
+#define OPAL_DUMP_REGION_HOST_END		0xFF
+
+/* CAPI modes for PHB */
+enum {
+	OPAL_PHB_CAPI_MODE_PCIE		= 0,
+	OPAL_PHB_CAPI_MODE_CAPI		= 1,
+	OPAL_PHB_CAPI_MODE_SNOOP_OFF    = 2,
+	OPAL_PHB_CAPI_MODE_SNOOP_ON	= 3,
+	OPAL_PHB_CAPI_MODE_DMA		= 4,
+	OPAL_PHB_CAPI_MODE_DMA_TVT1	= 5,
+};
+
+/* OPAL I2C request */
+struct opal_i2c_request {
+	uint8_t	type;
+#define OPAL_I2C_RAW_READ	0
+#define OPAL_I2C_RAW_WRITE	1
+#define OPAL_I2C_SM_READ	2
+#define OPAL_I2C_SM_WRITE	3
+	uint8_t flags;
+#define OPAL_I2C_ADDR_10	0x01	/* Not supported yet */
+	uint8_t	subaddr_sz;		/* Max 4 */
+	uint8_t reserved;
+	__be16 addr;			/* 7 or 10 bit address */
+	__be16 reserved2;
+	__be32 subaddr;		/* Sub-address if any */
+	__be32 size;			/* Data size */
+	__be64 buffer_ra;		/* Buffer real address */
+};
+
+/*
+ * EPOW status sharing (OPAL and the host)
+ *
+ * The host will pass on OPAL, a buffer of length OPAL_SYSEPOW_MAX
+ * with individual elements being 16 bits wide to fetch the system
+ * wide EPOW status. Each element in the buffer will contain the
+ * EPOW status in it's bit representation for a particular EPOW sub
+ * class as defined here. So multiple detailed EPOW status bits
+ * specific for any sub class can be represented in a single buffer
+ * element as it's bit representation.
+ */
+
+/* System EPOW type */
+enum OpalSysEpow {
+	OPAL_SYSEPOW_POWER	= 0,	/* Power EPOW */
+	OPAL_SYSEPOW_TEMP	= 1,	/* Temperature EPOW */
+	OPAL_SYSEPOW_COOLING	= 2,	/* Cooling EPOW */
+	OPAL_SYSEPOW_MAX	= 3,	/* Max EPOW categories */
+};
+
+/* Power EPOW */
+enum OpalSysPower {
+	OPAL_SYSPOWER_UPS	= 0x0001, /* System on UPS power */
+	OPAL_SYSPOWER_CHNG	= 0x0002, /* System power config change */
+	OPAL_SYSPOWER_FAIL	= 0x0004, /* System impending power failure */
+	OPAL_SYSPOWER_INCL	= 0x0008, /* System incomplete power */
+};
+
+/* Temperature EPOW */
+enum OpalSysTemp {
+	OPAL_SYSTEMP_AMB	= 0x0001, /* System over ambient temperature */
+	OPAL_SYSTEMP_INT	= 0x0002, /* System over internal temperature */
+	OPAL_SYSTEMP_HMD	= 0x0004, /* System over ambient humidity */
+};
+
+/* Cooling EPOW */
+enum OpalSysCooling {
+	OPAL_SYSCOOL_INSF	= 0x0001, /* System insufficient cooling */
+};
+
+/* Argument to OPAL_CEC_REBOOT2() */
+enum {
+	OPAL_REBOOT_NORMAL		= 0,
+	OPAL_REBOOT_PLATFORM_ERROR	= 1,
+	OPAL_REBOOT_FULL_IPL		= 2,
+	OPAL_REBOOT_MPIPL		= 3,
+	OPAL_REBOOT_FAST		= 4,
+};
+
+/* Argument to OPAL_PCI_TCE_KILL */
+enum {
+	OPAL_PCI_TCE_KILL_PAGES,
+	OPAL_PCI_TCE_KILL_PE,
+	OPAL_PCI_TCE_KILL_ALL,
+};
+
+/* The xive operation mode indicates the active "API" and
+ * corresponds to the "mode" parameter of the opal_xive_reset()
+ * call
+ */
+enum {
+	OPAL_XIVE_MODE_EMU	= 0,
+	OPAL_XIVE_MODE_EXPL	= 1,
+};
+
+/* Flags for OPAL_XIVE_GET_IRQ_INFO */
+enum {
+	OPAL_XIVE_IRQ_TRIGGER_PAGE	= 0x00000001,
+	OPAL_XIVE_IRQ_STORE_EOI		= 0x00000002,
+	OPAL_XIVE_IRQ_LSI		= 0x00000004,
+	OPAL_XIVE_IRQ_SHIFT_BUG		= 0x00000008, /* P9 DD1.0 workaround */
+	OPAL_XIVE_IRQ_MASK_VIA_FW	= 0x00000010, /* P9 DD1.0 workaround */
+	OPAL_XIVE_IRQ_EOI_VIA_FW	= 0x00000020, /* P9 DD1.0 workaround */
+	OPAL_XIVE_IRQ_STORE_EOI2	= 0x00000040,
+};
+
+/* Flags for OPAL_XIVE_GET/SET_QUEUE_INFO */
+enum {
+	OPAL_XIVE_EQ_ENABLED		= 0x00000001,
+	OPAL_XIVE_EQ_ALWAYS_NOTIFY	= 0x00000002,
+	OPAL_XIVE_EQ_ESCALATE		= 0x00000004,
+};
+
+/* Flags for OPAL_XIVE_GET/SET_VP_INFO */
+enum {
+	OPAL_XIVE_VP_ENABLED		= 0x00000001,
+	OPAL_XIVE_VP_SINGLE_ESCALATION	= 0x00000002,
+};
+
+/* "Any chip" replacement for chip ID for allocation functions */
+enum {
+	OPAL_XIVE_ANY_CHIP		= 0xffffffff,
+};
+
+/* Xive sync options */
+enum {
+	/* This bits are cumulative, arg is a girq */
+	XIVE_SYNC_EAS			= 0x00000001, /* Sync irq source */
+	XIVE_SYNC_QUEUE			= 0x00000002, /* Sync irq target */
+};
+
+/* Dump options */
+enum {
+	XIVE_DUMP_TM_HYP	= 0,
+	XIVE_DUMP_TM_POOL	= 1,
+	XIVE_DUMP_TM_OS		= 2,
+	XIVE_DUMP_TM_USER	= 3,
+	XIVE_DUMP_VP		= 4,
+	XIVE_DUMP_EMU_STATE	= 5,
+};
+
+/* "type" argument options for OPAL_IMC_COUNTERS_* calls */
+enum {
+	OPAL_IMC_COUNTERS_NEST = 1,
+	OPAL_IMC_COUNTERS_CORE = 2,
+	OPAL_IMC_COUNTERS_TRACE = 3,
+};
+
+
+/* PCI p2p descriptor */
+#define OPAL_PCI_P2P_ENABLE		0x1
+#define OPAL_PCI_P2P_LOAD		0x2
+#define OPAL_PCI_P2P_STORE		0x4
+
+/* MPIPL update operations */
+enum opal_mpipl_ops {
+	OPAL_MPIPL_ADD_RANGE			= 0,
+	OPAL_MPIPL_REMOVE_RANGE			= 1,
+	OPAL_MPIPL_REMOVE_ALL			= 2,
+	OPAL_MPIPL_FREE_PRESERVED_MEMORY	= 3,
+};
+
+/* Tag will point to various metadata area. Kernel will
+ * use tag to get metadata value.
+ */
+enum opal_mpipl_tags {
+	OPAL_MPIPL_TAG_CPU	= 0,
+	OPAL_MPIPL_TAG_OPAL	= 1,
+	OPAL_MPIPL_TAG_KERNEL	= 2,
+	OPAL_MPIPL_TAG_BOOT_MEM	= 3,
+};
+
+/* Preserved memory details */
+struct opal_mpipl_region {
+	__be64	src;
+	__be64	dest;
+	__be64	size;
+};
+
+/* Structure version */
+#define OPAL_MPIPL_VERSION		0x01
+
+struct opal_mpipl_fadump {
+	u8	version;
+	u8	reserved[7];
+	__be32	crashing_pir;	/* OPAL crashing CPU PIR */
+	__be32	cpu_data_version;
+	__be32	cpu_data_size;
+	__be32	region_cnt;
+	struct	opal_mpipl_region region[];
+} __packed;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
new file mode 100644
index 0000000000..a9b31cc258
--- /dev/null
+++ b/arch/powerpc/include/asm/opal.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * PowerNV OPAL definitions.
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+#ifndef _ASM_POWERPC_OPAL_H
+#define _ASM_POWERPC_OPAL_H
+
+#include <asm/opal-api.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/notifier.h>
+
+/* We calculate number of sg entries based on PAGE_SIZE */
+#define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry))
+
+/* Default time to sleep or delay between OPAL_BUSY/OPAL_BUSY_EVENT loops */
+#define OPAL_BUSY_DELAY_MS	10
+
+/* /sys/firmware/opal */
+extern struct kobject *opal_kobj;
+
+/* /ibm,opal */
+extern struct device_node *opal_node;
+
+/* API functions */
+int64_t opal_invalid_call(void);
+int64_t opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
+			uint64_t lpcr);
+int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn,
+			uint64_t addr, uint64_t PE_mask);
+int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn,
+				uint64_t PE_handle);
+int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, long cap,
+			uint64_t rate_phys, uint32_t size);
+
+int64_t opal_console_write(int64_t term_number, __be64 *length,
+			   const uint8_t *buffer);
+int64_t opal_console_read(int64_t term_number, __be64 *length,
+			  uint8_t *buffer);
+int64_t opal_console_write_buffer_space(int64_t term_number,
+					__be64 *length);
+int64_t opal_console_flush(int64_t term_number);
+int64_t opal_rtc_read(__be32 *year_month_day,
+		      __be64 *hour_minute_second_millisecond);
+int64_t opal_rtc_write(uint32_t year_month_day,
+		       uint64_t hour_minute_second_millisecond);
+int64_t opal_tpo_read(uint64_t token, __be32 *year_mon_day, __be32 *hour_min);
+int64_t opal_tpo_write(uint64_t token, uint32_t year_mon_day,
+		       uint32_t hour_min);
+int64_t opal_cec_power_down(uint64_t request);
+int64_t opal_cec_reboot(void);
+int64_t opal_cec_reboot2(uint32_t reboot_type, const char *diag);
+int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset);
+int64_t opal_write_nvram(uint64_t buffer, uint64_t size, uint64_t offset);
+int64_t opal_handle_interrupt(uint64_t isn, __be64 *outstanding_event_mask);
+int64_t opal_poll_events(__be64 *outstanding_event_mask);
+int64_t opal_pci_set_hub_tce_memory(uint64_t hub_id, uint64_t tce_mem_addr,
+				    uint64_t tce_mem_size);
+int64_t opal_pci_set_phb_tce_memory(uint64_t phb_id, uint64_t tce_mem_addr,
+				    uint64_t tce_mem_size);
+int64_t opal_pci_config_read_byte(uint64_t phb_id, uint64_t bus_dev_func,
+				  uint64_t offset, uint8_t *data);
+int64_t opal_pci_config_read_half_word(uint64_t phb_id, uint64_t bus_dev_func,
+				       uint64_t offset, __be16 *data);
+int64_t opal_pci_config_read_word(uint64_t phb_id, uint64_t bus_dev_func,
+				  uint64_t offset, __be32 *data);
+int64_t opal_pci_config_write_byte(uint64_t phb_id, uint64_t bus_dev_func,
+				   uint64_t offset, uint8_t data);
+int64_t opal_pci_config_write_half_word(uint64_t phb_id, uint64_t bus_dev_func,
+					uint64_t offset, uint16_t data);
+int64_t opal_pci_config_write_word(uint64_t phb_id, uint64_t bus_dev_func,
+				   uint64_t offset, uint32_t data);
+int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority);
+int64_t opal_get_xive(uint32_t isn, __be16 *server, uint8_t *priority);
+int64_t opal_register_exception_handler(uint64_t opal_exception,
+					uint64_t handler_address,
+					uint64_t glue_cache_line);
+int64_t opal_pci_eeh_freeze_status(uint64_t phb_id, uint64_t pe_number,
+				   uint8_t *freeze_state,
+				   __be16 *pci_error_type,
+				   __be64 *phb_status);
+int64_t opal_pci_eeh_freeze_clear(uint64_t phb_id, uint64_t pe_number,
+				  uint64_t eeh_action_token);
+int64_t opal_pci_eeh_freeze_set(uint64_t phb_id, uint64_t pe_number,
+				uint64_t eeh_action_token);
+int64_t opal_pci_err_inject(uint64_t phb_id, uint32_t pe_no, uint32_t type,
+			    uint32_t func, uint64_t addr, uint64_t mask);
+int64_t opal_pci_shpc(uint64_t phb_id, uint64_t shpc_action, uint8_t *state);
+
+
+
+int64_t opal_pci_phb_mmio_enable(uint64_t phb_id, uint16_t window_type,
+				 uint16_t window_num, uint16_t enable);
+int64_t opal_pci_set_phb_mem_window(uint64_t phb_id, uint16_t window_type,
+				    uint16_t window_num,
+				    uint64_t starting_real_address,
+				    uint64_t starting_pci_address,
+				    uint64_t size);
+int64_t opal_pci_map_pe_mmio_window(uint64_t phb_id, uint16_t pe_number,
+				    uint16_t window_type, uint16_t window_num,
+				    uint16_t segment_num);
+int64_t opal_pci_set_phb_table_memory(uint64_t phb_id, uint64_t rtt_addr,
+				      uint64_t ivt_addr, uint64_t ivt_len,
+				      uint64_t reject_array_addr,
+				      uint64_t peltv_addr);
+int64_t opal_pci_set_pe(uint64_t phb_id, uint64_t pe_number, uint64_t bus_dev_func,
+			uint8_t bus_compare, uint8_t dev_compare, uint8_t func_compare,
+			uint8_t pe_action);
+int64_t opal_pci_set_peltv(uint64_t phb_id, uint32_t parent_pe, uint32_t child_pe,
+			   uint8_t state);
+int64_t opal_pci_get_xive_reissue(uint64_t phb_id, uint32_t xive_number,
+				  uint8_t *p_bit, uint8_t *q_bit);
+int64_t opal_pci_set_xive_reissue(uint64_t phb_id, uint32_t xive_number,
+				  uint8_t p_bit, uint8_t q_bit);
+int64_t opal_pci_msi_eoi(uint64_t phb_id, uint32_t hw_irq);
+int64_t opal_pci_set_xive_pe(uint64_t phb_id, uint32_t pe_number,
+			     uint32_t xive_num);
+int64_t opal_get_xive_source(uint64_t phb_id, uint32_t xive_num,
+			     __be32 *interrupt_source_number);
+int64_t opal_get_msi_32(uint64_t phb_id, uint32_t mve_number, uint32_t xive_num,
+			uint8_t msi_range, __be32 *msi_address,
+			__be32 *message_data);
+int64_t opal_get_msi_64(uint64_t phb_id, uint32_t mve_number,
+			uint32_t xive_num, uint8_t msi_range,
+			__be64 *msi_address, __be32 *message_data);
+int64_t opal_start_cpu(uint64_t thread_number, uint64_t start_address);
+int64_t opal_query_cpu_status(uint64_t thread_number, uint8_t *thread_status);
+int64_t opal_write_oppanel(oppanel_line_t *lines, uint64_t num_lines);
+int64_t opal_pci_map_pe_dma_window(uint64_t phb_id, uint16_t pe_number, uint16_t window_id,
+				   uint16_t tce_levels, uint64_t tce_table_addr,
+				   uint64_t tce_table_size, uint64_t tce_page_size);
+int64_t opal_pci_map_pe_dma_window_real(uint64_t phb_id, uint16_t pe_number,
+					uint16_t dma_window_number, uint64_t pci_start_addr,
+					uint64_t pci_mem_size);
+int64_t opal_pci_reset(uint64_t id, uint8_t reset_scope, uint8_t assert_state);
+
+int64_t opal_pci_get_hub_diag_data(uint64_t hub_id, void *diag_buffer,
+				   uint64_t diag_buffer_len);
+int64_t opal_pci_get_phb_diag_data(uint64_t phb_id, void *diag_buffer,
+				   uint64_t diag_buffer_len);
+int64_t opal_pci_get_phb_diag_data2(uint64_t phb_id, void *diag_buffer,
+				    uint64_t diag_buffer_len);
+int64_t opal_pci_fence_phb(uint64_t phb_id);
+int64_t opal_pci_reinit(uint64_t phb_id, uint64_t reinit_scope, uint64_t data);
+int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t error_type, uint8_t mask_action);
+int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action);
+int64_t opal_get_epow_status(__be16 *epow_status, __be16 *num_epow_classes);
+int64_t opal_get_dpo_status(__be64 *dpo_timeout);
+int64_t opal_set_system_attention_led(uint8_t led_action);
+int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe,
+			    __be16 *pci_error_type, __be16 *severity);
+int64_t opal_pci_poll(uint64_t id);
+int64_t opal_return_cpu(void);
+int64_t opal_check_token(uint64_t token);
+int64_t opal_reinit_cpus(uint64_t flags);
+
+int64_t opal_xscom_read(uint32_t gcid, uint64_t pcb_addr, __be64 *val);
+int64_t opal_xscom_write(uint32_t gcid, uint64_t pcb_addr, uint64_t val);
+
+int64_t opal_lpc_write(uint32_t chip_id, enum OpalLPCAddressType addr_type,
+		       uint32_t addr, uint32_t data, uint32_t sz);
+int64_t opal_lpc_read(uint32_t chip_id, enum OpalLPCAddressType addr_type,
+		      uint32_t addr, __be32 *data, uint32_t sz);
+
+int64_t opal_read_elog(uint64_t buffer, uint64_t size, uint64_t log_id);
+int64_t opal_get_elog_size(__be64 *log_id, __be64 *size, __be64 *elog_type);
+int64_t opal_write_elog(uint64_t buffer, uint64_t size, uint64_t offset);
+int64_t opal_send_ack_elog(uint64_t log_id);
+void opal_resend_pending_logs(void);
+
+int64_t opal_validate_flash(uint64_t buffer, uint32_t *size, uint32_t *result);
+int64_t opal_manage_flash(uint8_t op);
+int64_t opal_update_flash(uint64_t blk_list);
+int64_t opal_dump_init(uint8_t dump_type);
+int64_t opal_dump_info(__be32 *dump_id, __be32 *dump_size);
+int64_t opal_dump_info2(__be32 *dump_id, __be32 *dump_size, __be32 *dump_type);
+int64_t opal_dump_read(uint32_t dump_id, uint64_t buffer);
+int64_t opal_dump_ack(uint32_t dump_id);
+int64_t opal_dump_resend_notification(void);
+
+int64_t opal_get_msg(uint64_t buffer, uint64_t size);
+int64_t opal_write_oppanel_async(uint64_t token, oppanel_line_t *lines,
+					uint64_t num_lines);
+int64_t opal_check_completion(uint64_t buffer, uint64_t size, uint64_t token);
+int64_t opal_sync_host_reboot(void);
+int64_t opal_get_param(uint64_t token, uint32_t param_id, uint64_t buffer,
+		uint64_t length);
+int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer,
+		uint64_t length);
+int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
+int64_t opal_sensor_read_u64(u32 sensor_hndl, int token, __be64 *sensor_data);
+int64_t opal_handle_hmi(void);
+int64_t opal_handle_hmi2(__be64 *out_flags);
+int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
+int64_t opal_unregister_dump_region(uint32_t id);
+int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
+int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
+int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number);
+int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr);
+int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr);
+int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
+		uint64_t msg_len);
+int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
+		uint64_t *msg_len);
+int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id,
+			 struct opal_i2c_request *oreq);
+int64_t opal_prd_msg(struct opal_prd_msg *msg);
+int64_t opal_leds_get_ind(char *loc_code, __be64 *led_mask,
+			  __be64 *led_value, __be64 *max_led_type);
+int64_t opal_leds_set_ind(uint64_t token, char *loc_code, const u64 led_mask,
+			  const u64 led_value, __be64 *max_led_type);
+
+int64_t opal_flash_read(uint64_t id, uint64_t offset, uint64_t buf,
+		uint64_t size, uint64_t token);
+int64_t opal_flash_write(uint64_t id, uint64_t offset, uint64_t buf,
+		uint64_t size, uint64_t token);
+int64_t opal_flash_erase(uint64_t id, uint64_t offset, uint64_t size,
+		uint64_t token);
+int64_t opal_get_device_tree(uint32_t phandle, uint64_t buf, uint64_t len);
+int64_t opal_pci_get_presence_state(uint64_t id, uint64_t data);
+int64_t opal_pci_get_power_state(uint64_t id, uint64_t data);
+int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id,
+				 uint64_t data);
+int64_t opal_pci_poll2(uint64_t id, uint64_t data);
+
+int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll);
+int64_t opal_int_set_cppr(uint8_t cppr);
+int64_t opal_int_eoi(uint32_t xirr);
+int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr);
+int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type,
+			  uint32_t pe_num, uint32_t tce_size,
+			  uint64_t dma_addr, uint32_t npages);
+int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr);
+int64_t opal_xive_reset(uint64_t version);
+int64_t opal_xive_get_irq_info(uint32_t girq,
+			       __be64 *out_flags,
+			       __be64 *out_eoi_page,
+			       __be64 *out_trig_page,
+			       __be32 *out_esb_shift,
+			       __be32 *out_src_chip);
+int64_t opal_xive_get_irq_config(uint32_t girq, __be64 *out_vp,
+				 uint8_t *out_prio, __be32 *out_lirq);
+int64_t opal_xive_set_irq_config(uint32_t girq, uint64_t vp, uint8_t prio,
+				 uint32_t lirq);
+int64_t opal_xive_get_queue_info(uint64_t vp, uint32_t prio,
+				 __be64 *out_qpage,
+				 __be64 *out_qsize,
+				 __be64 *out_qeoi_page,
+				 __be32 *out_escalate_irq,
+				 __be64 *out_qflags);
+int64_t opal_xive_set_queue_info(uint64_t vp, uint32_t prio,
+				 uint64_t qpage,
+				 uint64_t qsize,
+				 uint64_t qflags);
+int64_t opal_xive_donate_page(uint32_t chip_id, uint64_t addr);
+int64_t opal_xive_alloc_vp_block(uint32_t alloc_order);
+int64_t opal_xive_free_vp_block(uint64_t vp);
+int64_t opal_xive_get_vp_info(uint64_t vp,
+			      __be64 *out_flags,
+			      __be64 *out_cam_value,
+			      __be64 *out_report_cl_pair,
+			      __be32 *out_chip_id);
+int64_t opal_xive_set_vp_info(uint64_t vp,
+			      uint64_t flags,
+			      uint64_t report_cl_pair);
+int64_t opal_xive_allocate_irq_raw(uint32_t chip_id);
+int64_t opal_xive_free_irq(uint32_t girq);
+int64_t opal_xive_sync(uint32_t type, uint32_t id);
+int64_t opal_xive_dump(uint32_t type, uint32_t id);
+int64_t opal_xive_get_queue_state(uint64_t vp, uint32_t prio,
+				  __be32 *out_qtoggle,
+				  __be32 *out_qindex);
+int64_t opal_xive_set_queue_state(uint64_t vp, uint32_t prio,
+				  uint32_t qtoggle,
+				  uint32_t qindex);
+int64_t opal_xive_get_vp_state(uint64_t vp, __be64 *out_w01);
+
+int64_t opal_imc_counters_init(uint32_t type, uint64_t address,
+							uint64_t cpu_pir);
+int64_t opal_imc_counters_start(uint32_t type, uint64_t cpu_pir);
+int64_t opal_imc_counters_stop(uint32_t type, uint64_t cpu_pir);
+
+int opal_get_powercap(u32 handle, int token, u32 *pcap);
+int opal_set_powercap(u32 handle, int token, u32 pcap);
+int opal_get_power_shift_ratio(u32 handle, int token, u32 *psr);
+int opal_set_power_shift_ratio(u32 handle, int token, u32 psr);
+int opal_sensor_group_clear(u32 group_hndl, int token);
+int opal_sensor_group_enable(u32 group_hndl, int token, bool enable);
+int opal_nx_coproc_init(uint32_t chip_id, uint32_t ct);
+
+int opal_secvar_get(const char *key, uint64_t key_len, u8 *data,
+		    uint64_t *data_size);
+int opal_secvar_get_next(const char *key, uint64_t *key_len,
+			 uint64_t key_buf_size);
+int opal_secvar_enqueue_update(const char *key, uint64_t key_len, u8 *data,
+			       uint64_t data_size);
+
+s64 opal_mpipl_update(enum opal_mpipl_ops op, u64 src, u64 dest, u64 size);
+s64 opal_mpipl_register_tag(enum opal_mpipl_tags tag, u64 addr);
+s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, __be64 *addr);
+
+s64 opal_signal_system_reset(s32 cpu);
+s64 opal_quiesce(u64 shutdown_type, s32 cpu);
+
+/* Internal functions */
+extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
+				   int depth, void *data);
+extern int early_init_dt_scan_recoverable_ranges(unsigned long node,
+				 const char *uname, int depth, void *data);
+void __init opal_configure_cores(void);
+
+extern int opal_get_chars(uint32_t vtermno, char *buf, int count);
+extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len);
+extern int opal_put_chars_atomic(uint32_t vtermno, const char *buf, int total_len);
+extern int opal_flush_chars(uint32_t vtermno, bool wait);
+extern int opal_flush_console(uint32_t vtermno);
+
+extern void hvc_opal_init_early(void);
+
+extern int opal_message_notifier_register(enum opal_msg_type msg_type,
+						struct notifier_block *nb);
+extern int opal_message_notifier_unregister(enum opal_msg_type msg_type,
+					    struct notifier_block *nb);
+
+extern int opal_async_get_token_interruptible(void);
+extern int opal_async_release_token(int token);
+extern int opal_async_wait_response(uint64_t token, struct opal_msg *msg);
+extern int opal_async_wait_response_interruptible(uint64_t token,
+		struct opal_msg *msg);
+extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data);
+extern int opal_get_sensor_data_u64(u32 sensor_hndl, u64 *sensor_data);
+extern int sensor_group_enable(u32 grp_hndl, bool enable);
+
+struct rtc_time;
+extern time64_t opal_get_boot_time(void);
+extern void opal_nvram_init(void);
+extern void opal_flash_update_init(void);
+extern void opal_flash_update_print_message(void);
+extern int opal_elog_init(void);
+extern void opal_platform_dump_init(void);
+extern void opal_sys_param_init(void);
+extern void opal_msglog_init(void);
+extern void opal_msglog_sysfs_init(void);
+extern int opal_async_comp_init(void);
+extern int opal_sensor_init(void);
+extern int opal_hmi_handler_init(void);
+extern int opal_event_init(void);
+int opal_power_control_init(void);
+
+extern int opal_machine_check(struct pt_regs *regs);
+extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
+extern int opal_hmi_exception_early(struct pt_regs *regs);
+extern int opal_hmi_exception_early2(struct pt_regs *regs);
+extern int opal_handle_hmi_exception(struct pt_regs *regs);
+
+extern void opal_shutdown(void);
+extern int opal_resync_timebase(void);
+
+extern void opal_lpc_init(void);
+
+extern void opal_kmsg_init(void);
+
+extern int opal_event_request(unsigned int opal_event_nr);
+
+struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
+					     unsigned long vmalloc_size);
+void opal_free_sg_list(struct opal_sg_list *sg);
+
+extern int opal_error_code(int rc);
+
+ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count);
+
+static inline int opal_get_async_rc(struct opal_msg msg)
+{
+	if (msg.msg_type != OPAL_MSG_ASYNC_COMP)
+		return OPAL_PARAMETER;
+	else
+		return be64_to_cpu(msg.params[1]);
+}
+
+void opal_wake_poller(void);
+
+void opal_powercap_init(void);
+void opal_psr_init(void);
+void opal_sensor_groups_init(void);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_OPAL_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
new file mode 100644
index 0000000000..e667d455ec
--- /dev/null
+++ b/arch/powerpc/include/asm/paca.h
@@ -0,0 +1,303 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This control block defines the PACA which defines the processor
+ * specific data for each logical processor on the system.
+ * There are some pointers defined that are utilized by PLIC.
+ *
+ * C 2001 PPC 64 Team, IBM Corp
+ */
+#ifndef _ASM_POWERPC_PACA_H
+#define _ASM_POWERPC_PACA_H
+#ifdef __KERNEL__
+
+#ifdef CONFIG_PPC64
+
+#include <linux/cache.h>
+#include <linux/string.h>
+#include <asm/types.h>
+#include <asm/mmu.h>
+#include <asm/page.h>
+#ifdef CONFIG_PPC_BOOK3E_64
+#include <asm/exception-64e.h>
+#else
+#include <asm/exception-64s.h>
+#endif
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+#include <asm/kvm_book3s_asm.h>
+#endif
+#include <asm/accounting.h>
+#include <asm/hmi.h>
+#include <asm/cpuidle.h>
+#include <asm/atomic.h>
+#include <asm/mce.h>
+
+#include <asm-generic/mmiowb_types.h>
+
+register struct paca_struct *local_paca asm("r13");
+
+#if defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_SMP)
+extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */
+/*
+ * Add standard checks that preemption cannot occur when using get_paca():
+ * otherwise the paca_struct it points to may be the wrong one just after.
+ */
+#define get_paca()	((void) debug_smp_processor_id(), local_paca)
+#else
+#define get_paca()	local_paca
+#endif
+
+#define get_slb_shadow()	(get_paca()->slb_shadow_ptr)
+
+struct task_struct;
+struct rtas_args;
+struct lppaca;
+
+/*
+ * Defines the layout of the paca.
+ *
+ * This structure is not directly accessed by firmware or the service
+ * processor.
+ */
+struct paca_struct {
+#ifdef CONFIG_PPC_PSERIES
+	/*
+	 * Because hw_cpu_id, unlike other paca fields, is accessed
+	 * routinely from other CPUs (from the IRQ code), we stick to
+	 * read-only (after boot) fields in the first cacheline to
+	 * avoid cacheline bouncing.
+	 */
+
+	struct lppaca *lppaca_ptr;	/* Pointer to LpPaca for PLIC */
+#endif /* CONFIG_PPC_PSERIES */
+
+	/*
+	 * MAGIC: the spinlock functions in arch/powerpc/lib/locks.c 
+	 * load lock_token and paca_index with a single lwz
+	 * instruction.  They must travel together and be properly
+	 * aligned.
+	 */
+#ifdef __BIG_ENDIAN__
+	u16 lock_token;			/* Constant 0x8000, used in locks */
+	u16 paca_index;			/* Logical processor number */
+#else
+	u16 paca_index;			/* Logical processor number */
+	u16 lock_token;			/* Constant 0x8000, used in locks */
+#endif
+
+#ifndef CONFIG_PPC_KERNEL_PCREL
+	u64 kernel_toc;			/* Kernel TOC address */
+#endif
+	u64 kernelbase;			/* Base address of kernel */
+	u64 kernel_msr;			/* MSR while running in kernel */
+	void *emergency_sp;		/* pointer to emergency stack */
+	u64 data_offset;		/* per cpu data offset */
+	s16 hw_cpu_id;			/* Physical processor number */
+	u8 cpu_start;			/* At startup, processor spins until */
+					/* this becomes non-zero. */
+	u8 kexec_state;		/* set when kexec down has irqs off */
+#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	struct slb_shadow *slb_shadow_ptr;
+#endif
+	struct dtl_entry *dispatch_log;
+	struct dtl_entry *dispatch_log_end;
+#endif
+	u64 dscr_default;		/* per-CPU default DSCR */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * Now, starting in cacheline 2, the exception save areas
+	 */
+	/* used for most interrupts/exceptions */
+	u64 exgen[EX_SIZE] __attribute__((aligned(0x80)));
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	/* SLB related definitions */
+	u16 vmalloc_sllp;
+	u8 slb_cache_ptr;
+	u8 stab_rr;			/* stab/slb round-robin counter */
+#ifdef CONFIG_DEBUG_VM
+	u8 in_kernel_slb_handler;
+#endif
+	u32 slb_used_bitmap;		/* Bitmaps for first 32 SLB entries. */
+	u32 slb_kern_bitmap;
+	u32 slb_cache[SLB_CACHE_ENTRIES];
+#endif
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_BOOK3E_64
+	u64 exgen[8] __aligned(0x40);
+	/* Keep pgd in the same cacheline as the start of extlb */
+	pgd_t *pgd __aligned(0x40); /* Current PGD */
+	pgd_t *kernel_pgd;		/* Kernel PGD */
+
+	/* Shared by all threads of a core -- points to tcd of first thread */
+	struct tlb_core_data *tcd_ptr;
+
+	/*
+	 * We can have up to 3 levels of reentrancy in the TLB miss handler,
+	 * in each of four exception levels (normal, crit, mcheck, debug).
+	 */
+	u64 extlb[12][EX_TLB_SIZE / sizeof(u64)];
+	u64 exmc[8];		/* used for machine checks */
+	u64 excrit[8];		/* used for crit interrupts */
+	u64 exdbg[8];		/* used for debug interrupts */
+
+	/* Kernel stack pointers for use by special exceptions */
+	void *mc_kstack;
+	void *crit_kstack;
+	void *dbg_kstack;
+
+	struct tlb_core_data tcd;
+#endif /* CONFIG_PPC_BOOK3E_64 */
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
+	unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
+#endif
+
+	/*
+	 * then miscellaneous read-write fields
+	 */
+	struct task_struct *__current;	/* Pointer to current */
+	u64 kstack;			/* Saved Kernel stack addr */
+	u64 saved_r1;			/* r1 save for RTAS calls or PM or EE=0 */
+	u64 saved_msr;			/* MSR saved here by enter_rtas */
+#ifdef CONFIG_PPC64
+	u64 exit_save_r1;		/* Syscall/interrupt R1 save */
+#endif
+#ifdef CONFIG_PPC_BOOK3E_64
+	u16 trap_save;			/* Used when bad stack is encountered */
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	u8 hsrr_valid;			/* HSRRs set for HRFID */
+	u8 srr_valid;			/* SRRs set for RFID */
+#endif
+	u8 irq_soft_mask;		/* mask for irq soft masking */
+	u8 irq_happened;		/* irq happened while soft-disabled */
+	u8 irq_work_pending;		/* IRQ_WORK interrupt while soft-disable */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	u8 pmcregs_in_use;		/* pseries puts this in lppaca */
+#endif
+	u64 sprg_vdso;			/* Saved user-visible sprg */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	u64 tm_scratch;                 /* TM scratch area for reclaim */
+#endif
+
+#ifdef CONFIG_PPC_POWERNV
+	/* PowerNV idle fields */
+	/* PNV_CORE_IDLE_* bits, all siblings work on thread 0 paca */
+	unsigned long idle_lock; /* A value of 1 means acquired */
+	unsigned long idle_state;
+	union {
+		/* P7/P8 specific fields */
+		struct {
+			/* PNV_THREAD_RUNNING/NAP/SLEEP	*/
+			u8 thread_idle_state;
+			/* Mask to denote subcore sibling threads */
+			u8 subcore_sibling_mask;
+		};
+
+		/* P9 specific fields */
+		struct {
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+			/* The PSSCR value that the kernel requested before going to stop */
+			u64 requested_psscr;
+			/* Flag to request this thread not to stop */
+			atomic_t dont_stop;
+#endif
+		};
+	};
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* Non-maskable exceptions that are not performance critical */
+	u64 exnmi[EX_SIZE];	/* used for system reset (nmi) */
+	u64 exmc[EX_SIZE];	/* used for machine checks */
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* Exclusive stacks for system reset and machine check exception. */
+	void *nmi_emergency_sp;
+	void *mc_emergency_sp;
+
+	u16 in_nmi;			/* In nmi handler */
+
+	/*
+	 * Flag to check whether we are in machine check early handler
+	 * and already using emergency stack.
+	 */
+	u16 in_mce;
+	u8 hmi_event_available;		/* HMI event is available */
+	u8 hmi_p9_special_emu;		/* HMI P9 special emulation */
+	u32 hmi_irqs;			/* HMI irq stat */
+#endif
+	u8 ftrace_enabled;		/* Hard disable ftrace */
+
+	/* Stuff for accurate time accounting */
+	struct cpu_accounting_data accounting;
+	u64 dtl_ridx;			/* read index in dispatch log */
+	struct dtl_entry *dtl_curr;	/* pointer corresponding to dtl_ridx */
+
+#ifdef CONFIG_KVM_BOOK3S_HANDLER
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	/* We use this to store guest state in */
+	struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
+#endif
+	struct kvmppc_host_state kvm_hstate;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	/*
+	 * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
+	 * more details
+	 */
+	struct sibling_subcore_state *sibling_subcore_state;
+#endif
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * rfi fallback flush must be in its own cacheline to prevent
+	 * other paca data leaking into the L1d
+	 */
+	u64 exrfi[EX_SIZE] __aligned(0x80);
+	void *rfi_flush_fallback_area;
+	u64 l1d_flush_size;
+#endif
+#ifdef CONFIG_PPC_PSERIES
+	u8 *mce_data_buf;		/* buffer to hold per cpu rtas errlog */
+#endif /* CONFIG_PPC_PSERIES */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	/* Capture SLB related old contents in MCE handler. */
+	struct slb_entry *mce_faulty_slbs;
+	u16 slb_save_cache_ptr;
+#endif
+#endif /* CONFIG_PPC_BOOK3S_64 */
+#ifdef CONFIG_STACKPROTECTOR
+	unsigned long canary;
+#endif
+#ifdef CONFIG_MMIOWB
+	struct mmiowb_state mmiowb_state;
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	struct mce_info *mce_info;
+	u8 mce_pending_irq_work;
+#endif /* CONFIG_PPC_BOOK3S_64 */
+} ____cacheline_aligned;
+
+extern void copy_mm_to_paca(struct mm_struct *mm);
+extern struct paca_struct **paca_ptrs;
+extern void initialise_paca(struct paca_struct *new_paca, int cpu);
+extern void setup_paca(struct paca_struct *new_paca);
+extern void allocate_paca_ptrs(void);
+extern void allocate_paca(int cpu);
+extern void free_unused_pacas(void);
+
+#else /* CONFIG_PPC64 */
+
+static inline void allocate_paca(int cpu) { }
+static inline void free_unused_pacas(void) { }
+
+#endif /* CONFIG_PPC64 */
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_PACA_H */
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
new file mode 100644
index 0000000000..e5fcc79b5b
--- /dev/null
+++ b/arch/powerpc/include/asm/page.h
@@ -0,0 +1,335 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_PAGE_H
+#define _ASM_POWERPC_PAGE_H
+
+/*
+ * Copyright (C) 2001,2005 IBM Corporation.
+ */
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#else
+#include <asm/types.h>
+#endif
+#include <asm/asm-const.h>
+
+/*
+ * On regular PPC32 page size is 4K (but we support 4K/16K/64K/256K pages
+ * on PPC44x and 4K/16K on 8xx). For PPC64 we support either 4K or 64K software
+ * page size. When using 64K pages however, whether we are really supporting
+ * 64K pages in HW or not is irrelevant to those definitions.
+ */
+#define PAGE_SHIFT		CONFIG_PPC_PAGE_SHIFT
+#define PAGE_SIZE		(ASM_CONST(1) << PAGE_SHIFT)
+
+#ifndef __ASSEMBLY__
+#ifndef CONFIG_HUGETLB_PAGE
+#define HPAGE_SHIFT PAGE_SHIFT
+#elif defined(CONFIG_PPC_BOOK3S_64)
+extern unsigned int hpage_shift;
+#define HPAGE_SHIFT hpage_shift
+#elif defined(CONFIG_PPC_8xx)
+#define HPAGE_SHIFT		19	/* 512k pages */
+#elif defined(CONFIG_PPC_E500)
+#define HPAGE_SHIFT		22	/* 4M pages */
+#endif
+#define HPAGE_SIZE		((1UL) << HPAGE_SHIFT)
+#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+#define HUGE_MAX_HSTATE		(MMU_PAGE_COUNT-1)
+#endif
+
+/*
+ * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we
+ * assign PAGE_MASK to a larger type it gets extended the way we want
+ * (i.e. with 1s in the high bits)
+ */
+#define PAGE_MASK      (~((1 << PAGE_SHIFT) - 1))
+
+/*
+ * KERNELBASE is the virtual address of the start of the kernel, it's often
+ * the same as PAGE_OFFSET, but _might not be_.
+ *
+ * The kdump dump kernel is one example where KERNELBASE != PAGE_OFFSET.
+ *
+ * PAGE_OFFSET is the virtual address of the start of lowmem.
+ *
+ * PHYSICAL_START is the physical address of the start of the kernel.
+ *
+ * MEMORY_START is the physical address of the start of lowmem.
+ *
+ * KERNELBASE, PAGE_OFFSET, and PHYSICAL_START are all configurable on
+ * ppc32 and based on how they are set we determine MEMORY_START.
+ *
+ * For the linear mapping the following equation should be true:
+ * KERNELBASE - PAGE_OFFSET = PHYSICAL_START - MEMORY_START
+ *
+ * Also, KERNELBASE >= PAGE_OFFSET and PHYSICAL_START >= MEMORY_START
+ *
+ * There are two ways to determine a physical address from a virtual one:
+ * va = pa + PAGE_OFFSET - MEMORY_START
+ * va = pa + KERNELBASE - PHYSICAL_START
+ *
+ * If you want to know something's offset from the start of the kernel you
+ * should subtract KERNELBASE.
+ *
+ * If you want to test if something's a kernel address, use is_kernel_addr().
+ */
+
+#define KERNELBASE      ASM_CONST(CONFIG_KERNEL_START)
+#define PAGE_OFFSET	ASM_CONST(CONFIG_PAGE_OFFSET)
+#define LOAD_OFFSET	ASM_CONST((CONFIG_KERNEL_START-CONFIG_PHYSICAL_START))
+
+#if defined(CONFIG_NONSTATIC_KERNEL)
+#ifndef __ASSEMBLY__
+
+extern phys_addr_t memstart_addr;
+extern phys_addr_t kernstart_addr;
+
+#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC32)
+extern long long virt_phys_offset;
+#endif
+
+#endif /* __ASSEMBLY__ */
+#define PHYSICAL_START	kernstart_addr
+
+#else	/* !CONFIG_NONSTATIC_KERNEL */
+#define PHYSICAL_START	ASM_CONST(CONFIG_PHYSICAL_START)
+#endif
+
+/* See Description below for VIRT_PHYS_OFFSET */
+#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE)
+#ifdef CONFIG_RELOCATABLE
+#define VIRT_PHYS_OFFSET virt_phys_offset
+#else
+#define VIRT_PHYS_OFFSET (KERNELBASE - PHYSICAL_START)
+#endif
+#endif
+
+#ifdef CONFIG_PPC64
+#define MEMORY_START	0UL
+#elif defined(CONFIG_NONSTATIC_KERNEL)
+#define MEMORY_START	memstart_addr
+#else
+#define MEMORY_START	(PHYSICAL_START + PAGE_OFFSET - KERNELBASE)
+#endif
+
+#ifdef CONFIG_FLATMEM
+#define ARCH_PFN_OFFSET		((unsigned long)(MEMORY_START >> PAGE_SHIFT))
+#endif
+
+/*
+ * On Book-E parts we need __va to parse the device tree and we can't
+ * determine MEMORY_START until then.  However we can determine PHYSICAL_START
+ * from information at hand (program counter, TLB lookup).
+ *
+ * On BookE with RELOCATABLE && PPC32
+ *
+ *   With RELOCATABLE && PPC32,  we support loading the kernel at any physical
+ *   address without any restriction on the page alignment.
+ *
+ *   We find the runtime address of _stext and relocate ourselves based on 
+ *   the following calculation:
+ *
+ *  	  virtual_base = ALIGN_DOWN(KERNELBASE,256M) +
+ *  				MODULO(_stext.run,256M)
+ *   and create the following mapping:
+ *
+ * 	  ALIGN_DOWN(_stext.run,256M) => ALIGN_DOWN(KERNELBASE,256M)
+ *
+ *   When we process relocations, we cannot depend on the
+ *   existing equation for the __va()/__pa() translations:
+ *
+ * 	   __va(x) = (x)  - PHYSICAL_START + KERNELBASE
+ *
+ *   Where:
+ *   	 PHYSICAL_START = kernstart_addr = Physical address of _stext
+ *  	 KERNELBASE = Compiled virtual address of _stext.
+ *
+ *   This formula holds true iff, kernel load address is TLB page aligned.
+ *
+ *   In our case, we need to also account for the shift in the kernel Virtual 
+ *   address.
+ *
+ *   E.g.,
+ *
+ *   Let the kernel be loaded at 64MB and KERNELBASE be 0xc0000000 (same as PAGE_OFFSET).
+ *   In this case, we would be mapping 0 to 0xc0000000, and kernstart_addr = 64M
+ *
+ *   Now __va(1MB) = (0x100000) - (0x4000000) + 0xc0000000
+ *                 = 0xbc100000 , which is wrong.
+ *
+ *   Rather, it should be : 0xc0000000 + 0x100000 = 0xc0100000
+ *      	according to our mapping.
+ *
+ *   Hence we use the following formula to get the translations right:
+ *
+ * 	  __va(x) = (x) - [ PHYSICAL_START - Effective KERNELBASE ]
+ *
+ * 	  Where :
+ * 		PHYSICAL_START = dynamic load address.(kernstart_addr variable)
+ * 		Effective KERNELBASE = virtual_base =
+ * 				     = ALIGN_DOWN(KERNELBASE,256M) +
+ * 						MODULO(PHYSICAL_START,256M)
+ *
+ * 	To make the cost of __va() / __pa() more light weight, we introduce
+ * 	a new variable virt_phys_offset, which will hold :
+ *
+ * 	virt_phys_offset = Effective KERNELBASE - PHYSICAL_START
+ * 			 = ALIGN_DOWN(KERNELBASE,256M) - 
+ * 			 	ALIGN_DOWN(PHYSICALSTART,256M)
+ *
+ * 	Hence :
+ *
+ * 	__va(x) = x - PHYSICAL_START + Effective KERNELBASE
+ * 		= x + virt_phys_offset
+ *
+ * 		and
+ * 	__pa(x) = x + PHYSICAL_START - Effective KERNELBASE
+ * 		= x - virt_phys_offset
+ * 		
+ * On non-Book-E PPC64 PAGE_OFFSET and MEMORY_START are constants so use
+ * the other definitions for __va & __pa.
+ */
+#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE)
+#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET))
+#define __pa(x) ((phys_addr_t)(unsigned long)(x) - VIRT_PHYS_OFFSET)
+#else
+#ifdef CONFIG_PPC64
+
+#define VIRTUAL_WARN_ON(x)	WARN_ON(IS_ENABLED(CONFIG_DEBUG_VIRTUAL) && (x))
+
+/*
+ * gcc miscompiles (unsigned long)(&static_var) - PAGE_OFFSET
+ * with -mcmodel=medium, so we use & and | instead of - and + on 64-bit.
+ * This also results in better code generation.
+ */
+#define __va(x)								\
+({									\
+	VIRTUAL_WARN_ON((unsigned long)(x) >= PAGE_OFFSET);		\
+	(void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET);	\
+})
+
+#define __pa(x)								\
+({									\
+	VIRTUAL_WARN_ON((unsigned long)(x) < PAGE_OFFSET);		\
+	(unsigned long)(x) & 0x0fffffffffffffffUL;			\
+})
+
+#else /* 32-bit, non book E */
+#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + PAGE_OFFSET - MEMORY_START))
+#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET + MEMORY_START)
+#endif
+#endif
+
+#ifndef __ASSEMBLY__
+static inline unsigned long virt_to_pfn(const void *kaddr)
+{
+	return __pa(kaddr) >> PAGE_SHIFT;
+}
+
+static inline const void *pfn_to_kaddr(unsigned long pfn)
+{
+	return __va(pfn << PAGE_SHIFT);
+}
+#endif
+
+#define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
+#define virt_addr_valid(vaddr)	({					\
+	unsigned long _addr = (unsigned long)vaddr;			\
+	_addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory &&	\
+	pfn_valid(virt_to_pfn((void *)_addr));				\
+})
+
+/*
+ * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI,
+ * and needs to be executable.  This means the whole heap ends
+ * up being executable.
+ */
+#define VM_DATA_DEFAULT_FLAGS32	VM_DATA_FLAGS_TSK_EXEC
+#define VM_DATA_DEFAULT_FLAGS64	VM_DATA_FLAGS_NON_EXEC
+
+#ifdef __powerpc64__
+#include <asm/page_64.h>
+#else
+#include <asm/page_32.h>
+#endif
+
+/*
+ * Don't compare things with KERNELBASE or PAGE_OFFSET to test for
+ * "kernelness", use is_kernel_addr() - it should do what you want.
+ */
+#ifdef CONFIG_PPC_BOOK3E_64
+#define is_kernel_addr(x)	((x) >= 0x8000000000000000ul)
+#elif defined(CONFIG_PPC_BOOK3S_64)
+#define is_kernel_addr(x)	((x) >= PAGE_OFFSET)
+#else
+#define is_kernel_addr(x)	((x) >= TASK_SIZE)
+#endif
+
+#ifndef CONFIG_PPC_BOOK3S_64
+/*
+ * Use the top bit of the higher-level page table entries to indicate whether
+ * the entries we point to contain hugepages.  This works because we know that
+ * the page tables live in kernel space.  If we ever decide to support having
+ * page tables at arbitrary addresses, this breaks and will have to change.
+ */
+#ifdef CONFIG_PPC64
+#define PD_HUGE 0x8000000000000000UL
+#else
+#define PD_HUGE 0x80000000
+#endif
+
+#else	/* CONFIG_PPC_BOOK3S_64 */
+/*
+ * Book3S 64 stores real addresses in the hugepd entries to
+ * avoid overlaps with _PAGE_PRESENT and _PAGE_PTE.
+ */
+#define HUGEPD_ADDR_MASK	(0x0ffffffffffffffful & ~HUGEPD_SHIFT_MASK)
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+/*
+ * Some number of bits at the level of the page table that points to
+ * a hugepte are used to encode the size.  This masks those bits.
+ * On 8xx, HW assistance requires 4k alignment for the hugepte.
+ */
+#ifdef CONFIG_PPC_8xx
+#define HUGEPD_SHIFT_MASK     0xfff
+#else
+#define HUGEPD_SHIFT_MASK     0x3f
+#endif
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/pgtable-be-types.h>
+#else
+#include <asm/pgtable-types.h>
+#endif
+
+struct page;
+extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
+extern void copy_user_page(void *to, void *from, unsigned long vaddr,
+		struct page *p);
+extern int devmem_is_allowed(unsigned long pfn);
+
+#ifdef CONFIG_PPC_SMLPAR
+void arch_free_page(struct page *page, int order);
+#define HAVE_ARCH_FREE_PAGE
+#endif
+
+struct vm_area_struct;
+
+extern unsigned long kernstart_virt_addr;
+
+static inline unsigned long kaslr_offset(void)
+{
+	return kernstart_virt_addr - KERNELBASE;
+}
+
+#include <asm-generic/memory_model.h>
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
new file mode 100644
index 0000000000..b9ac9e3a77
--- /dev/null
+++ b/arch/powerpc/include/asm/page_32.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_PAGE_32_H
+#define _ASM_POWERPC_PAGE_32_H
+
+#include <asm/cache.h>
+
+#if defined(CONFIG_PHYSICAL_ALIGN) && (CONFIG_PHYSICAL_START != 0)
+#if (CONFIG_PHYSICAL_START % CONFIG_PHYSICAL_ALIGN) != 0
+#error "CONFIG_PHYSICAL_START must be a multiple of CONFIG_PHYSICAL_ALIGN"
+#endif
+#endif
+
+#define VM_DATA_DEFAULT_FLAGS	VM_DATA_DEFAULT_FLAGS32
+
+#if defined(CONFIG_PPC_256K_PAGES) || \
+    (defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES))
+#define PTE_SHIFT	(PAGE_SHIFT - PTE_T_LOG2 - 2)	/* 1/4 of a page */
+#else
+#define PTE_SHIFT	(PAGE_SHIFT - PTE_T_LOG2)	/* full page */
+#endif
+
+#ifndef __ASSEMBLY__
+/*
+ * The basic type of a PTE - 64 bits for those CPUs with > 32 bit
+ * physical addressing.
+ */
+#ifdef CONFIG_PTE_64BIT
+typedef unsigned long long pte_basic_t;
+#else
+typedef unsigned long pte_basic_t;
+#endif
+
+#include <asm/bug.h>
+
+/*
+ * Clear page using the dcbz instruction, which doesn't cause any
+ * memory traffic (except to write out any cache lines which get
+ * displaced).  This only works on cacheable memory.
+ */
+static inline void clear_page(void *addr)
+{
+	unsigned int i;
+
+	WARN_ON((unsigned long)addr & (L1_CACHE_BYTES - 1));
+
+	for (i = 0; i < PAGE_SIZE / L1_CACHE_BYTES; i++, addr += L1_CACHE_BYTES)
+		dcbz(addr);
+}
+extern void copy_page(void *to, void *from);
+
+#include <asm-generic/getorder.h>
+
+#define PGD_T_LOG2	(__builtin_ffs(sizeof(pgd_t)) - 1)
+#define PTE_T_LOG2	(__builtin_ffs(sizeof(pte_t)) - 1)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_PAGE_32_H */
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
new file mode 100644
index 0000000000..79a9b7c6a1
--- /dev/null
+++ b/arch/powerpc/include/asm/page_64.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_PAGE_64_H
+#define _ASM_POWERPC_PAGE_64_H
+
+/*
+ * Copyright (C) 2001 PPC64 Team, IBM Corp
+ */
+
+#include <asm/asm-const.h>
+
+/*
+ * We always define HW_PAGE_SHIFT to 12 as use of 64K pages remains Linux
+ * specific, every notion of page number shared with the firmware, TCEs,
+ * iommu, etc... still uses a page size of 4K.
+ */
+#define HW_PAGE_SHIFT		12
+#define HW_PAGE_SIZE		(ASM_CONST(1) << HW_PAGE_SHIFT)
+#define HW_PAGE_MASK		(~(HW_PAGE_SIZE-1))
+
+/*
+ * PAGE_FACTOR is the number of bits factor between PAGE_SHIFT and
+ * HW_PAGE_SHIFT, that is 4K pages.
+ */
+#define PAGE_FACTOR		(PAGE_SHIFT - HW_PAGE_SHIFT)
+
+/* Segment size; normal 256M segments */
+#define SID_SHIFT		28
+#define SID_MASK		ASM_CONST(0xfffffffff)
+#define ESID_MASK		0xfffffffff0000000UL
+#define GET_ESID(x)		(((x) >> SID_SHIFT) & SID_MASK)
+
+/* 1T segments */
+#define SID_SHIFT_1T		40
+#define SID_MASK_1T		0xffffffUL
+#define ESID_MASK_1T		0xffffff0000000000UL
+#define GET_ESID_1T(x)		(((x) >> SID_SHIFT_1T) & SID_MASK_1T)
+
+#ifndef __ASSEMBLY__
+#include <asm/cache.h>
+
+typedef unsigned long pte_basic_t;
+
+static inline void clear_page(void *addr)
+{
+	unsigned long iterations;
+	unsigned long onex, twox, fourx, eightx;
+
+	iterations = ppc64_caches.l1d.blocks_per_page / 8;
+
+	/*
+	 * Some verisions of gcc use multiply instructions to
+	 * calculate the offsets so lets give it a hand to
+	 * do better.
+	 */
+	onex = ppc64_caches.l1d.block_size;
+	twox = onex << 1;
+	fourx = onex << 2;
+	eightx = onex << 3;
+
+	asm volatile(
+	"mtctr	%1	# clear_page\n\
+	.balign	16\n\
+1:	dcbz	0,%0\n\
+	dcbz	%3,%0\n\
+	dcbz	%4,%0\n\
+	dcbz	%5,%0\n\
+	dcbz	%6,%0\n\
+	dcbz	%7,%0\n\
+	dcbz	%8,%0\n\
+	dcbz	%9,%0\n\
+	add	%0,%0,%10\n\
+	bdnz+	1b"
+	: "=&r" (addr)
+	: "r" (iterations), "0" (addr), "b" (onex), "b" (twox),
+		"b" (twox+onex), "b" (fourx), "b" (fourx+onex),
+		"b" (twox+fourx), "b" (eightx-onex), "r" (eightx)
+	: "ctr", "memory");
+}
+
+extern void copy_page(void *to, void *from);
+
+/* Log 2 of page table size */
+extern u64 ppc64_pft_size;
+
+#endif /* __ASSEMBLY__ */
+
+#define VM_DATA_DEFAULT_FLAGS \
+	(is_32bit_task() ? \
+	 VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
+
+/*
+ * This is the default if a program doesn't have a PT_GNU_STACK
+ * program header entry. The PPC64 ELF ABI has a non executable stack
+ * stack by default, so in the absence of a PT_GNU_STACK program header
+ * we turn execute permission off.
+ */
+#define VM_STACK_DEFAULT_FLAGS32	VM_DATA_FLAGS_EXEC
+#define VM_STACK_DEFAULT_FLAGS64	VM_DATA_FLAGS_NON_EXEC
+
+#define VM_STACK_DEFAULT_FLAGS \
+	(is_32bit_task() ? \
+	 VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64)
+
+#include <asm-generic/getorder.h>
+
+#endif /* _ASM_POWERPC_PAGE_64_H */
diff --git a/arch/powerpc/include/asm/papr-sysparm.h b/arch/powerpc/include/asm/papr-sysparm.h
new file mode 100644
index 0000000000..f5fdbd8ae9
--- /dev/null
+++ b/arch/powerpc/include/asm/papr-sysparm.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_PAPR_SYSPARM_H
+#define _ASM_POWERPC_PAPR_SYSPARM_H
+
+typedef struct {
+	const u32 token;
+} papr_sysparm_t;
+
+#define mk_papr_sysparm(x_) ((papr_sysparm_t){ .token = x_, })
+
+/*
+ * Derived from the "Defined Parameters" table in PAPR 7.3.16 System
+ * Parameters Option. Where the spec says "characteristics", we use
+ * "attrs" in the symbolic names to keep them from getting too
+ * unwieldy.
+ */
+#define PAPR_SYSPARM_SHARED_PROC_LPAR_ATTRS        mk_papr_sysparm(20)
+#define PAPR_SYSPARM_PROC_MODULE_INFO              mk_papr_sysparm(43)
+#define PAPR_SYSPARM_COOP_MEM_OVERCOMMIT_ATTRS     mk_papr_sysparm(44)
+#define PAPR_SYSPARM_TLB_BLOCK_INVALIDATE_ATTRS    mk_papr_sysparm(50)
+#define PAPR_SYSPARM_LPAR_NAME                     mk_papr_sysparm(55)
+
+enum {
+	PAPR_SYSPARM_MAX_INPUT  = 1024,
+	PAPR_SYSPARM_MAX_OUTPUT = 4000,
+};
+
+struct papr_sysparm_buf {
+	__be16 len;
+	char val[PAPR_SYSPARM_MAX_OUTPUT];
+};
+
+struct papr_sysparm_buf *papr_sysparm_buf_alloc(void);
+void papr_sysparm_buf_free(struct papr_sysparm_buf *buf);
+int papr_sysparm_set(papr_sysparm_t param, const struct papr_sysparm_buf *buf);
+int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf);
+
+#endif /* _ASM_POWERPC_PAPR_SYSPARM_H */
diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
new file mode 100644
index 0000000000..e08513d731
--- /dev/null
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_PARAVIRT_H
+#define _ASM_POWERPC_PARAVIRT_H
+
+#include <linux/jump_label.h>
+#include <asm/smp.h>
+#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+#include <asm/lppaca.h>
+#include <asm/hvcall.h>
+#endif
+
+#ifdef CONFIG_PPC_SPLPAR
+#include <linux/smp.h>
+#include <asm/kvm_guest.h>
+#include <asm/cputhreads.h>
+
+DECLARE_STATIC_KEY_FALSE(shared_processor);
+
+static inline bool is_shared_processor(void)
+{
+	return static_branch_unlikely(&shared_processor);
+}
+
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 pseries_paravirt_steal_clock(int cpu);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+	return pseries_paravirt_steal_clock(cpu);
+}
+#endif
+
+/* If bit 0 is set, the cpu has been ceded, conferred, or preempted */
+static inline u32 yield_count_of(int cpu)
+{
+	__be32 yield_count = READ_ONCE(lppaca_of(cpu).yield_count);
+	return be32_to_cpu(yield_count);
+}
+
+/*
+ * Spinlock code confers and prods, so don't trace the hcalls because the
+ * tracing code takes spinlocks which can cause recursion deadlocks.
+ *
+ * These calls are made while the lock is not held: the lock slowpath yields if
+ * it can not acquire the lock, and unlock slow path might prod if a waiter has
+ * yielded). So this may not be a problem for simple spin locks because the
+ * tracing does not technically recurse on the lock, but we avoid it anyway.
+ *
+ * However the queued spin lock contended path is more strictly ordered: the
+ * H_CONFER hcall is made after the task has queued itself on the lock, so then
+ * recursing on that lock will cause the task to then queue up again behind the
+ * first instance (or worse: queued spinlocks use tricks that assume a context
+ * never waits on more than one spinlock, so such recursion may cause random
+ * corruption in the lock code).
+ */
+static inline void yield_to_preempted(int cpu, u32 yield_count)
+{
+	plpar_hcall_norets_notrace(H_CONFER, get_hard_smp_processor_id(cpu), yield_count);
+}
+
+static inline void prod_cpu(int cpu)
+{
+	plpar_hcall_norets_notrace(H_PROD, get_hard_smp_processor_id(cpu));
+}
+
+static inline void yield_to_any(void)
+{
+	plpar_hcall_norets_notrace(H_CONFER, -1, 0);
+}
+#else
+static inline bool is_shared_processor(void)
+{
+	return false;
+}
+
+static inline u32 yield_count_of(int cpu)
+{
+	return 0;
+}
+
+extern void ___bad_yield_to_preempted(void);
+static inline void yield_to_preempted(int cpu, u32 yield_count)
+{
+	___bad_yield_to_preempted(); /* This would be a bug */
+}
+
+extern void ___bad_yield_to_any(void);
+static inline void yield_to_any(void)
+{
+	___bad_yield_to_any(); /* This would be a bug */
+}
+
+extern void ___bad_prod_cpu(void);
+static inline void prod_cpu(int cpu)
+{
+	___bad_prod_cpu(); /* This would be a bug */
+}
+
+#endif
+
+#define vcpu_is_preempted vcpu_is_preempted
+static inline bool vcpu_is_preempted(int cpu)
+{
+	/*
+	 * The dispatch/yield bit alone is an imperfect indicator of
+	 * whether the hypervisor has dispatched @cpu to run on a physical
+	 * processor. When it is clear, @cpu is definitely not preempted.
+	 * But when it is set, it means only that it *might* be, subject to
+	 * other conditions. So we check other properties of the VM and
+	 * @cpu first, resorting to the yield count last.
+	 */
+
+	/*
+	 * Hypervisor preemption isn't possible in dedicated processor
+	 * mode by definition.
+	 */
+	if (!is_shared_processor())
+		return false;
+
+#ifdef CONFIG_PPC_SPLPAR
+	if (!is_kvm_guest()) {
+		int first_cpu;
+
+		/*
+		 * The result of vcpu_is_preempted() is used in a
+		 * speculative way, and is always subject to invalidation
+		 * by events internal and external to Linux. While we can
+		 * be called in preemptable context (in the Linux sense),
+		 * we're not accessing per-cpu resources in a way that can
+		 * race destructively with Linux scheduler preemption and
+		 * migration, and callers can tolerate the potential for
+		 * error introduced by sampling the CPU index without
+		 * pinning the task to it. So it is permissible to use
+		 * raw_smp_processor_id() here to defeat the preempt debug
+		 * warnings that can arise from using smp_processor_id()
+		 * in arbitrary contexts.
+		 */
+		first_cpu = cpu_first_thread_sibling(raw_smp_processor_id());
+
+		/*
+		 * The PowerVM hypervisor dispatches VMs on a whole core
+		 * basis. So we know that a thread sibling of the local CPU
+		 * cannot have been preempted by the hypervisor, even if it
+		 * has called H_CONFER, which will set the yield bit.
+		 */
+		if (cpu_first_thread_sibling(cpu) == first_cpu)
+			return false;
+	}
+#endif
+
+	if (yield_count_of(cpu) & 1)
+		return true;
+	return false;
+}
+
+static inline bool pv_is_native_spin_unlock(void)
+{
+	return !is_shared_processor();
+}
+
+#endif /* _ASM_POWERPC_PARAVIRT_H */
diff --git a/arch/powerpc/include/asm/paravirt_api_clock.h b/arch/powerpc/include/asm/paravirt_api_clock.h
new file mode 100644
index 0000000000..d25ca7ac57
--- /dev/null
+++ b/arch/powerpc/include/asm/paravirt_api_clock.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/paravirt.h>
diff --git a/arch/powerpc/include/asm/parport.h b/arch/powerpc/include/asm/parport.h
new file mode 100644
index 0000000000..42cc321ed7
--- /dev/null
+++ b/arch/powerpc/include/asm/parport.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * parport.h: platform-specific PC-style parport initialisation
+ *
+ * Copyright (C) 1999, 2000  Tim Waugh <tim@cyberelk.demon.co.uk>
+ *
+ * This file should only be included by drivers/parport/parport_pc.c.
+ */
+
+#ifndef _ASM_POWERPC_PARPORT_H
+#define _ASM_POWERPC_PARPORT_H
+#ifdef __KERNEL__
+
+#include <linux/of_irq.h>
+
+static int parport_pc_find_nonpci_ports (int autoirq, int autodma)
+{
+	struct device_node *np;
+	const u32 *prop;
+	u32 io1, io2;
+	int propsize;
+	int count = 0;
+	int virq;
+
+	for_each_compatible_node(np, "parallel", "pnpPNP,400") {
+		prop = of_get_property(np, "reg", &propsize);
+		if (!prop || propsize > 6*sizeof(u32))
+			continue;
+		io1 = prop[1]; io2 = prop[2];
+
+		virq = irq_of_parse_and_map(np, 0);
+		if (!virq)
+			continue;
+
+		if (parport_pc_probe_port(io1, io2, virq, autodma, NULL, 0)
+				!= NULL)
+			count++;
+	}
+	return count;
+}
+
+#endif /* __KERNEL__ */
+#endif /* !(_ASM_POWERPC_PARPORT_H) */
diff --git a/arch/powerpc/include/asm/pasemi_dma.h b/arch/powerpc/include/asm/pasemi_dma.h
new file mode 100644
index 0000000000..712a0b3212
--- /dev/null
+++ b/arch/powerpc/include/asm/pasemi_dma.h
@@ -0,0 +1,526 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2006-2008 PA Semi, Inc
+ *
+ * Hardware register layout and descriptor formats for the on-board
+ * DMA engine on PA Semi PWRficient. Used by ethernet, function and security
+ * drivers.
+ */
+
+#ifndef ASM_PASEMI_DMA_H
+#define ASM_PASEMI_DMA_H
+
+/* status register layout in IOB region, at 0xfb800000 */
+struct pasdma_status {
+	u64 rx_sta[64];		/* RX channel status */
+	u64 tx_sta[20];		/* TX channel status */
+};
+
+
+/* All these registers live in the PCI configuration space for the DMA PCI
+ * device. Use the normal PCI config access functions for them.
+ */
+enum {
+	PAS_DMA_CAP_TXCH  = 0x44,	/* Transmit Channel Info      */
+	PAS_DMA_CAP_RXCH  = 0x48,	/* Transmit Channel Info      */
+	PAS_DMA_CAP_IFI	  = 0x4c,	/* Interface Info	      */
+	PAS_DMA_COM_TXCMD = 0x100,	/* Transmit Command Register  */
+	PAS_DMA_COM_TXSTA = 0x104,	/* Transmit Status Register   */
+	PAS_DMA_COM_RXCMD = 0x108,	/* Receive Command Register   */
+	PAS_DMA_COM_RXSTA = 0x10c,	/* Receive Status Register    */
+	PAS_DMA_COM_CFG   = 0x114,	/* Common config reg	      */
+	PAS_DMA_TXF_SFLG0 = 0x140,	/* Set flags                  */
+	PAS_DMA_TXF_SFLG1 = 0x144,	/* Set flags                  */
+	PAS_DMA_TXF_CFLG0 = 0x148,	/* Set flags                  */
+	PAS_DMA_TXF_CFLG1 = 0x14c,	/* Set flags                  */
+};
+
+
+#define PAS_DMA_CAP_TXCH_TCHN_M	0x00ff0000 /* # of TX channels */
+#define PAS_DMA_CAP_TXCH_TCHN_S	16
+
+#define PAS_DMA_CAP_RXCH_RCHN_M	0x00ff0000 /* # of RX channels */
+#define PAS_DMA_CAP_RXCH_RCHN_S	16
+
+#define PAS_DMA_CAP_IFI_IOFF_M	0xff000000 /* Cfg reg for intf pointers */
+#define PAS_DMA_CAP_IFI_IOFF_S	24
+#define PAS_DMA_CAP_IFI_NIN_M	0x00ff0000 /* # of interfaces */
+#define PAS_DMA_CAP_IFI_NIN_S	16
+
+#define PAS_DMA_COM_TXCMD_EN	0x00000001 /* enable */
+#define PAS_DMA_COM_TXSTA_ACT	0x00000001 /* active */
+#define PAS_DMA_COM_RXCMD_EN	0x00000001 /* enable */
+#define PAS_DMA_COM_RXSTA_ACT	0x00000001 /* active */
+
+
+/* Per-interface and per-channel registers */
+#define _PAS_DMA_RXINT_STRIDE		0x20
+#define PAS_DMA_RXINT_RCMDSTA(i)	(0x200+(i)*_PAS_DMA_RXINT_STRIDE)
+#define    PAS_DMA_RXINT_RCMDSTA_EN	0x00000001
+#define    PAS_DMA_RXINT_RCMDSTA_ST	0x00000002
+#define    PAS_DMA_RXINT_RCMDSTA_MBT	0x00000008
+#define    PAS_DMA_RXINT_RCMDSTA_MDR	0x00000010
+#define    PAS_DMA_RXINT_RCMDSTA_MOO	0x00000020
+#define    PAS_DMA_RXINT_RCMDSTA_MBP	0x00000040
+#define    PAS_DMA_RXINT_RCMDSTA_BT	0x00000800
+#define    PAS_DMA_RXINT_RCMDSTA_DR	0x00001000
+#define    PAS_DMA_RXINT_RCMDSTA_OO	0x00002000
+#define    PAS_DMA_RXINT_RCMDSTA_BP	0x00004000
+#define    PAS_DMA_RXINT_RCMDSTA_TB	0x00008000
+#define    PAS_DMA_RXINT_RCMDSTA_ACT	0x00010000
+#define    PAS_DMA_RXINT_RCMDSTA_DROPS_M	0xfffe0000
+#define    PAS_DMA_RXINT_RCMDSTA_DROPS_S	17
+#define PAS_DMA_RXINT_CFG(i)		(0x204+(i)*_PAS_DMA_RXINT_STRIDE)
+#define    PAS_DMA_RXINT_CFG_RBP	0x80000000
+#define    PAS_DMA_RXINT_CFG_ITRR	0x40000000
+#define    PAS_DMA_RXINT_CFG_DHL_M	0x07000000
+#define    PAS_DMA_RXINT_CFG_DHL_S	24
+#define    PAS_DMA_RXINT_CFG_DHL(x)	(((x) << PAS_DMA_RXINT_CFG_DHL_S) & \
+					 PAS_DMA_RXINT_CFG_DHL_M)
+#define    PAS_DMA_RXINT_CFG_ITR	0x00400000
+#define    PAS_DMA_RXINT_CFG_LW		0x00200000
+#define    PAS_DMA_RXINT_CFG_L2		0x00100000
+#define    PAS_DMA_RXINT_CFG_HEN	0x00080000
+#define    PAS_DMA_RXINT_CFG_WIF	0x00000002
+#define    PAS_DMA_RXINT_CFG_WIL	0x00000001
+
+#define PAS_DMA_RXINT_INCR(i)		(0x210+(i)*_PAS_DMA_RXINT_STRIDE)
+#define    PAS_DMA_RXINT_INCR_INCR_M	0x0000ffff
+#define    PAS_DMA_RXINT_INCR_INCR_S	0
+#define    PAS_DMA_RXINT_INCR_INCR(x)	((x) & 0x0000ffff)
+#define PAS_DMA_RXINT_BASEL(i)		(0x218+(i)*_PAS_DMA_RXINT_STRIDE)
+#define    PAS_DMA_RXINT_BASEL_BRBL(x)	((x) & ~0x3f)
+#define PAS_DMA_RXINT_BASEU(i)		(0x21c+(i)*_PAS_DMA_RXINT_STRIDE)
+#define    PAS_DMA_RXINT_BASEU_BRBH(x)	((x) & 0xfff)
+#define    PAS_DMA_RXINT_BASEU_SIZ_M	0x3fff0000	/* # of cache lines worth of buffer ring */
+#define    PAS_DMA_RXINT_BASEU_SIZ_S	16		/* 0 = 16K */
+#define    PAS_DMA_RXINT_BASEU_SIZ(x)	(((x) << PAS_DMA_RXINT_BASEU_SIZ_S) & \
+					 PAS_DMA_RXINT_BASEU_SIZ_M)
+
+
+#define _PAS_DMA_TXCHAN_STRIDE	0x20    /* Size per channel		*/
+#define _PAS_DMA_TXCHAN_TCMDSTA	0x300	/* Command / Status		*/
+#define _PAS_DMA_TXCHAN_CFG	0x304	/* Configuration		*/
+#define _PAS_DMA_TXCHAN_DSCRBU	0x308	/* Descriptor BU Allocation	*/
+#define _PAS_DMA_TXCHAN_INCR	0x310	/* Descriptor increment		*/
+#define _PAS_DMA_TXCHAN_CNT	0x314	/* Descriptor count/offset	*/
+#define _PAS_DMA_TXCHAN_BASEL	0x318	/* Descriptor ring base (low)	*/
+#define _PAS_DMA_TXCHAN_BASEU	0x31c	/*			(high)	*/
+#define PAS_DMA_TXCHAN_TCMDSTA(c) (0x300+(c)*_PAS_DMA_TXCHAN_STRIDE)
+#define    PAS_DMA_TXCHAN_TCMDSTA_EN	0x00000001	/* Enabled */
+#define    PAS_DMA_TXCHAN_TCMDSTA_ST	0x00000002	/* Stop interface */
+#define    PAS_DMA_TXCHAN_TCMDSTA_ACT	0x00010000	/* Active */
+#define    PAS_DMA_TXCHAN_TCMDSTA_SZ	0x00000800
+#define    PAS_DMA_TXCHAN_TCMDSTA_DB	0x00000400
+#define    PAS_DMA_TXCHAN_TCMDSTA_DE	0x00000200
+#define    PAS_DMA_TXCHAN_TCMDSTA_DA	0x00000100
+#define PAS_DMA_TXCHAN_CFG(c)     (0x304+(c)*_PAS_DMA_TXCHAN_STRIDE)
+#define    PAS_DMA_TXCHAN_CFG_TY_IFACE	0x00000000	/* Type = interface */
+#define    PAS_DMA_TXCHAN_CFG_TY_COPY	0x00000001	/* Type = copy only */
+#define    PAS_DMA_TXCHAN_CFG_TY_FUNC	0x00000002	/* Type = function */
+#define    PAS_DMA_TXCHAN_CFG_TY_XOR	0x00000003	/* Type = xor only */
+#define    PAS_DMA_TXCHAN_CFG_TATTR_M	0x0000003c
+#define    PAS_DMA_TXCHAN_CFG_TATTR_S	2
+#define    PAS_DMA_TXCHAN_CFG_TATTR(x)	(((x) << PAS_DMA_TXCHAN_CFG_TATTR_S) & \
+					 PAS_DMA_TXCHAN_CFG_TATTR_M)
+#define    PAS_DMA_TXCHAN_CFG_LPDQ	0x00000800
+#define    PAS_DMA_TXCHAN_CFG_LPSQ	0x00000400
+#define    PAS_DMA_TXCHAN_CFG_WT_M	0x000003c0
+#define    PAS_DMA_TXCHAN_CFG_WT_S	6
+#define    PAS_DMA_TXCHAN_CFG_WT(x)	(((x) << PAS_DMA_TXCHAN_CFG_WT_S) & \
+					 PAS_DMA_TXCHAN_CFG_WT_M)
+#define    PAS_DMA_TXCHAN_CFG_TRD	0x00010000	/* translate data */
+#define    PAS_DMA_TXCHAN_CFG_TRR	0x00008000	/* translate rings */
+#define    PAS_DMA_TXCHAN_CFG_UP	0x00004000	/* update tx descr when sent */
+#define    PAS_DMA_TXCHAN_CFG_CL	0x00002000	/* Clean last line */
+#define    PAS_DMA_TXCHAN_CFG_CF	0x00001000	/* Clean first line */
+#define PAS_DMA_TXCHAN_INCR(c)    (0x310+(c)*_PAS_DMA_TXCHAN_STRIDE)
+#define PAS_DMA_TXCHAN_BASEL(c)   (0x318+(c)*_PAS_DMA_TXCHAN_STRIDE)
+#define    PAS_DMA_TXCHAN_BASEL_BRBL_M	0xffffffc0
+#define    PAS_DMA_TXCHAN_BASEL_BRBL_S	0
+#define    PAS_DMA_TXCHAN_BASEL_BRBL(x)	(((x) << PAS_DMA_TXCHAN_BASEL_BRBL_S) & \
+					 PAS_DMA_TXCHAN_BASEL_BRBL_M)
+#define PAS_DMA_TXCHAN_BASEU(c)   (0x31c+(c)*_PAS_DMA_TXCHAN_STRIDE)
+#define    PAS_DMA_TXCHAN_BASEU_BRBH_M	0x00000fff
+#define    PAS_DMA_TXCHAN_BASEU_BRBH_S	0
+#define    PAS_DMA_TXCHAN_BASEU_BRBH(x)	(((x) << PAS_DMA_TXCHAN_BASEU_BRBH_S) & \
+					 PAS_DMA_TXCHAN_BASEU_BRBH_M)
+/* # of cache lines worth of buffer ring */
+#define    PAS_DMA_TXCHAN_BASEU_SIZ_M	0x3fff0000
+#define    PAS_DMA_TXCHAN_BASEU_SIZ_S	16		/* 0 = 16K */
+#define    PAS_DMA_TXCHAN_BASEU_SIZ(x)	(((x) << PAS_DMA_TXCHAN_BASEU_SIZ_S) & \
+					 PAS_DMA_TXCHAN_BASEU_SIZ_M)
+
+#define _PAS_DMA_RXCHAN_STRIDE	0x20    /* Size per channel		*/
+#define _PAS_DMA_RXCHAN_CCMDSTA	0x800	/* Command / Status		*/
+#define _PAS_DMA_RXCHAN_CFG	0x804	/* Configuration		*/
+#define _PAS_DMA_RXCHAN_INCR	0x810	/* Descriptor increment		*/
+#define _PAS_DMA_RXCHAN_CNT	0x814	/* Descriptor count/offset	*/
+#define _PAS_DMA_RXCHAN_BASEL	0x818	/* Descriptor ring base (low)	*/
+#define _PAS_DMA_RXCHAN_BASEU	0x81c	/*			(high)	*/
+#define PAS_DMA_RXCHAN_CCMDSTA(c) (0x800+(c)*_PAS_DMA_RXCHAN_STRIDE)
+#define    PAS_DMA_RXCHAN_CCMDSTA_EN	0x00000001	/* Enabled */
+#define    PAS_DMA_RXCHAN_CCMDSTA_ST	0x00000002	/* Stop interface */
+#define    PAS_DMA_RXCHAN_CCMDSTA_ACT	0x00010000	/* Active */
+#define    PAS_DMA_RXCHAN_CCMDSTA_DU	0x00020000
+#define    PAS_DMA_RXCHAN_CCMDSTA_OD	0x00002000
+#define    PAS_DMA_RXCHAN_CCMDSTA_FD	0x00001000
+#define    PAS_DMA_RXCHAN_CCMDSTA_DT	0x00000800
+#define PAS_DMA_RXCHAN_CFG(c)     (0x804+(c)*_PAS_DMA_RXCHAN_STRIDE)
+#define    PAS_DMA_RXCHAN_CFG_CTR	0x00000400
+#define    PAS_DMA_RXCHAN_CFG_HBU_M	0x00000380
+#define    PAS_DMA_RXCHAN_CFG_HBU_S	7
+#define    PAS_DMA_RXCHAN_CFG_HBU(x)	(((x) << PAS_DMA_RXCHAN_CFG_HBU_S) & \
+					 PAS_DMA_RXCHAN_CFG_HBU_M)
+#define PAS_DMA_RXCHAN_INCR(c)    (0x810+(c)*_PAS_DMA_RXCHAN_STRIDE)
+#define PAS_DMA_RXCHAN_BASEL(c)   (0x818+(c)*_PAS_DMA_RXCHAN_STRIDE)
+#define    PAS_DMA_RXCHAN_BASEL_BRBL_M	0xffffffc0
+#define    PAS_DMA_RXCHAN_BASEL_BRBL_S	0
+#define    PAS_DMA_RXCHAN_BASEL_BRBL(x)	(((x) << PAS_DMA_RXCHAN_BASEL_BRBL_S) & \
+					 PAS_DMA_RXCHAN_BASEL_BRBL_M)
+#define PAS_DMA_RXCHAN_BASEU(c)   (0x81c+(c)*_PAS_DMA_RXCHAN_STRIDE)
+#define    PAS_DMA_RXCHAN_BASEU_BRBH_M	0x00000fff
+#define    PAS_DMA_RXCHAN_BASEU_BRBH_S	0
+#define    PAS_DMA_RXCHAN_BASEU_BRBH(x)	(((x) << PAS_DMA_RXCHAN_BASEU_BRBH_S) & \
+					 PAS_DMA_RXCHAN_BASEU_BRBH_M)
+/* # of cache lines worth of buffer ring */
+#define    PAS_DMA_RXCHAN_BASEU_SIZ_M	0x3fff0000
+#define    PAS_DMA_RXCHAN_BASEU_SIZ_S	16		/* 0 = 16K */
+#define    PAS_DMA_RXCHAN_BASEU_SIZ(x)	(((x) << PAS_DMA_RXCHAN_BASEU_SIZ_S) & \
+					 PAS_DMA_RXCHAN_BASEU_SIZ_M)
+
+#define    PAS_STATUS_PCNT_M		0x000000000000ffffull
+#define    PAS_STATUS_PCNT_S		0
+#define    PAS_STATUS_DCNT_M		0x00000000ffff0000ull
+#define    PAS_STATUS_DCNT_S		16
+#define    PAS_STATUS_BPCNT_M		0x0000ffff00000000ull
+#define    PAS_STATUS_BPCNT_S		32
+#define    PAS_STATUS_CAUSE_M		0xf000000000000000ull
+#define    PAS_STATUS_TIMER		0x1000000000000000ull
+#define    PAS_STATUS_ERROR		0x2000000000000000ull
+#define    PAS_STATUS_SOFT		0x4000000000000000ull
+#define    PAS_STATUS_INT		0x8000000000000000ull
+
+#define PAS_IOB_COM_PKTHDRCNT		0x120
+#define    PAS_IOB_COM_PKTHDRCNT_PKTHDR1_M	0x0fff0000
+#define    PAS_IOB_COM_PKTHDRCNT_PKTHDR1_S	16
+#define    PAS_IOB_COM_PKTHDRCNT_PKTHDR0_M	0x00000fff
+#define    PAS_IOB_COM_PKTHDRCNT_PKTHDR0_S	0
+
+#define PAS_IOB_DMA_RXCH_CFG(i)		(0x1100 + (i)*4)
+#define    PAS_IOB_DMA_RXCH_CFG_CNTTH_M		0x00000fff
+#define    PAS_IOB_DMA_RXCH_CFG_CNTTH_S		0
+#define    PAS_IOB_DMA_RXCH_CFG_CNTTH(x)	(((x) << PAS_IOB_DMA_RXCH_CFG_CNTTH_S) & \
+						 PAS_IOB_DMA_RXCH_CFG_CNTTH_M)
+#define PAS_IOB_DMA_TXCH_CFG(i)		(0x1200 + (i)*4)
+#define    PAS_IOB_DMA_TXCH_CFG_CNTTH_M		0x00000fff
+#define    PAS_IOB_DMA_TXCH_CFG_CNTTH_S		0
+#define    PAS_IOB_DMA_TXCH_CFG_CNTTH(x)	(((x) << PAS_IOB_DMA_TXCH_CFG_CNTTH_S) & \
+						 PAS_IOB_DMA_TXCH_CFG_CNTTH_M)
+#define PAS_IOB_DMA_RXCH_STAT(i)	(0x1300 + (i)*4)
+#define    PAS_IOB_DMA_RXCH_STAT_INTGEN	0x00001000
+#define    PAS_IOB_DMA_RXCH_STAT_CNTDEL_M	0x00000fff
+#define    PAS_IOB_DMA_RXCH_STAT_CNTDEL_S	0
+#define    PAS_IOB_DMA_RXCH_STAT_CNTDEL(x)	(((x) << PAS_IOB_DMA_RXCH_STAT_CNTDEL_S) &\
+						 PAS_IOB_DMA_RXCH_STAT_CNTDEL_M)
+#define PAS_IOB_DMA_TXCH_STAT(i)	(0x1400 + (i)*4)
+#define    PAS_IOB_DMA_TXCH_STAT_INTGEN	0x00001000
+#define    PAS_IOB_DMA_TXCH_STAT_CNTDEL_M	0x00000fff
+#define    PAS_IOB_DMA_TXCH_STAT_CNTDEL_S	0
+#define    PAS_IOB_DMA_TXCH_STAT_CNTDEL(x)	(((x) << PAS_IOB_DMA_TXCH_STAT_CNTDEL_S) &\
+						 PAS_IOB_DMA_TXCH_STAT_CNTDEL_M)
+#define PAS_IOB_DMA_RXCH_RESET(i)	(0x1500 + (i)*4)
+#define    PAS_IOB_DMA_RXCH_RESET_PCNT_M	0xffff0000
+#define    PAS_IOB_DMA_RXCH_RESET_PCNT_S	16
+#define    PAS_IOB_DMA_RXCH_RESET_PCNT(x)	(((x) << PAS_IOB_DMA_RXCH_RESET_PCNT_S) & \
+						 PAS_IOB_DMA_RXCH_RESET_PCNT_M)
+#define    PAS_IOB_DMA_RXCH_RESET_PCNTRST	0x00000020
+#define    PAS_IOB_DMA_RXCH_RESET_DCNTRST	0x00000010
+#define    PAS_IOB_DMA_RXCH_RESET_TINTC		0x00000008
+#define    PAS_IOB_DMA_RXCH_RESET_DINTC		0x00000004
+#define    PAS_IOB_DMA_RXCH_RESET_SINTC		0x00000002
+#define    PAS_IOB_DMA_RXCH_RESET_PINTC		0x00000001
+#define PAS_IOB_DMA_TXCH_RESET(i)	(0x1600 + (i)*4)
+#define    PAS_IOB_DMA_TXCH_RESET_PCNT_M	0xffff0000
+#define    PAS_IOB_DMA_TXCH_RESET_PCNT_S	16
+#define    PAS_IOB_DMA_TXCH_RESET_PCNT(x)	(((x) << PAS_IOB_DMA_TXCH_RESET_PCNT_S) & \
+						 PAS_IOB_DMA_TXCH_RESET_PCNT_M)
+#define    PAS_IOB_DMA_TXCH_RESET_PCNTRST	0x00000020
+#define    PAS_IOB_DMA_TXCH_RESET_DCNTRST	0x00000010
+#define    PAS_IOB_DMA_TXCH_RESET_TINTC		0x00000008
+#define    PAS_IOB_DMA_TXCH_RESET_DINTC		0x00000004
+#define    PAS_IOB_DMA_TXCH_RESET_SINTC		0x00000002
+#define    PAS_IOB_DMA_TXCH_RESET_PINTC		0x00000001
+
+#define PAS_IOB_DMA_COM_TIMEOUTCFG		0x1700
+#define    PAS_IOB_DMA_COM_TIMEOUTCFG_TCNT_M	0x00ffffff
+#define    PAS_IOB_DMA_COM_TIMEOUTCFG_TCNT_S	0
+#define    PAS_IOB_DMA_COM_TIMEOUTCFG_TCNT(x)	(((x) << PAS_IOB_DMA_COM_TIMEOUTCFG_TCNT_S) & \
+						 PAS_IOB_DMA_COM_TIMEOUTCFG_TCNT_M)
+
+/* Transmit descriptor fields */
+#define	XCT_MACTX_T		0x8000000000000000ull
+#define	XCT_MACTX_ST		0x4000000000000000ull
+#define XCT_MACTX_NORES		0x0000000000000000ull
+#define XCT_MACTX_8BRES		0x1000000000000000ull
+#define XCT_MACTX_24BRES	0x2000000000000000ull
+#define XCT_MACTX_40BRES	0x3000000000000000ull
+#define XCT_MACTX_I		0x0800000000000000ull
+#define XCT_MACTX_O		0x0400000000000000ull
+#define XCT_MACTX_E		0x0200000000000000ull
+#define XCT_MACTX_VLAN_M	0x0180000000000000ull
+#define XCT_MACTX_VLAN_NOP	0x0000000000000000ull
+#define XCT_MACTX_VLAN_REMOVE	0x0080000000000000ull
+#define XCT_MACTX_VLAN_INSERT   0x0100000000000000ull
+#define XCT_MACTX_VLAN_REPLACE  0x0180000000000000ull
+#define XCT_MACTX_CRC_M		0x0060000000000000ull
+#define XCT_MACTX_CRC_NOP	0x0000000000000000ull
+#define XCT_MACTX_CRC_INSERT	0x0020000000000000ull
+#define XCT_MACTX_CRC_PAD	0x0040000000000000ull
+#define XCT_MACTX_CRC_REPLACE	0x0060000000000000ull
+#define XCT_MACTX_SS		0x0010000000000000ull
+#define XCT_MACTX_LLEN_M	0x00007fff00000000ull
+#define XCT_MACTX_LLEN_S	32ull
+#define XCT_MACTX_LLEN(x)	((((long)(x)) << XCT_MACTX_LLEN_S) & \
+				 XCT_MACTX_LLEN_M)
+#define XCT_MACTX_IPH_M		0x00000000f8000000ull
+#define XCT_MACTX_IPH_S		27ull
+#define XCT_MACTX_IPH(x)	((((long)(x)) << XCT_MACTX_IPH_S) & \
+				 XCT_MACTX_IPH_M)
+#define XCT_MACTX_IPO_M		0x0000000007c00000ull
+#define XCT_MACTX_IPO_S		22ull
+#define XCT_MACTX_IPO(x)	((((long)(x)) << XCT_MACTX_IPO_S) & \
+				 XCT_MACTX_IPO_M)
+#define XCT_MACTX_CSUM_M	0x0000000000000060ull
+#define XCT_MACTX_CSUM_NOP	0x0000000000000000ull
+#define XCT_MACTX_CSUM_TCP	0x0000000000000040ull
+#define XCT_MACTX_CSUM_UDP	0x0000000000000060ull
+#define XCT_MACTX_V6		0x0000000000000010ull
+#define XCT_MACTX_C		0x0000000000000004ull
+#define XCT_MACTX_AL2		0x0000000000000002ull
+
+/* Receive descriptor fields */
+#define	XCT_MACRX_T		0x8000000000000000ull
+#define	XCT_MACRX_ST		0x4000000000000000ull
+#define XCT_MACRX_RR_M		0x3000000000000000ull
+#define XCT_MACRX_RR_NORES	0x0000000000000000ull
+#define XCT_MACRX_RR_8BRES	0x1000000000000000ull
+#define XCT_MACRX_O		0x0400000000000000ull
+#define XCT_MACRX_E		0x0200000000000000ull
+#define XCT_MACRX_FF		0x0100000000000000ull
+#define XCT_MACRX_PF		0x0080000000000000ull
+#define XCT_MACRX_OB		0x0040000000000000ull
+#define XCT_MACRX_OD		0x0020000000000000ull
+#define XCT_MACRX_FS		0x0010000000000000ull
+#define XCT_MACRX_NB_M		0x000fc00000000000ull
+#define XCT_MACRX_NB_S		46ULL
+#define XCT_MACRX_NB(x)		((((long)(x)) << XCT_MACRX_NB_S) & \
+				 XCT_MACRX_NB_M)
+#define XCT_MACRX_LLEN_M	0x00003fff00000000ull
+#define XCT_MACRX_LLEN_S	32ULL
+#define XCT_MACRX_LLEN(x)	((((long)(x)) << XCT_MACRX_LLEN_S) & \
+				 XCT_MACRX_LLEN_M)
+#define XCT_MACRX_CRC		0x0000000080000000ull
+#define XCT_MACRX_LEN_M		0x0000000060000000ull
+#define XCT_MACRX_LEN_TOOSHORT	0x0000000020000000ull
+#define XCT_MACRX_LEN_BELOWMIN	0x0000000040000000ull
+#define XCT_MACRX_LEN_TRUNC	0x0000000060000000ull
+#define XCT_MACRX_CAST_M	0x0000000018000000ull
+#define XCT_MACRX_CAST_UNI	0x0000000000000000ull
+#define XCT_MACRX_CAST_MULTI	0x0000000008000000ull
+#define XCT_MACRX_CAST_BROAD	0x0000000010000000ull
+#define XCT_MACRX_CAST_PAUSE	0x0000000018000000ull
+#define XCT_MACRX_VLC_M		0x0000000006000000ull
+#define XCT_MACRX_FM		0x0000000001000000ull
+#define XCT_MACRX_HTY_M		0x0000000000c00000ull
+#define XCT_MACRX_HTY_IPV4_OK	0x0000000000000000ull
+#define XCT_MACRX_HTY_IPV6 	0x0000000000400000ull
+#define XCT_MACRX_HTY_IPV4_BAD	0x0000000000800000ull
+#define XCT_MACRX_HTY_NONIP	0x0000000000c00000ull
+#define XCT_MACRX_IPP_M		0x00000000003f0000ull
+#define XCT_MACRX_IPP_S		16
+#define XCT_MACRX_CSUM_M	0x000000000000ffffull
+#define XCT_MACRX_CSUM_S	0
+
+#define XCT_PTR_T		0x8000000000000000ull
+#define XCT_PTR_LEN_M		0x7ffff00000000000ull
+#define XCT_PTR_LEN_S		44
+#define XCT_PTR_LEN(x)		((((long)(x)) << XCT_PTR_LEN_S) & \
+				 XCT_PTR_LEN_M)
+#define XCT_PTR_ADDR_M		0x00000fffffffffffull
+#define XCT_PTR_ADDR_S		0
+#define XCT_PTR_ADDR(x)		((((long)(x)) << XCT_PTR_ADDR_S) & \
+				 XCT_PTR_ADDR_M)
+
+/* Receive interface 8byte result fields */
+#define XCT_RXRES_8B_L4O_M	0xff00000000000000ull
+#define XCT_RXRES_8B_L4O_S	56
+#define XCT_RXRES_8B_RULE_M	0x00ffff0000000000ull
+#define XCT_RXRES_8B_RULE_S	40
+#define XCT_RXRES_8B_EVAL_M	0x000000ffff000000ull
+#define XCT_RXRES_8B_EVAL_S	24
+#define XCT_RXRES_8B_HTYPE_M	0x0000000000f00000ull
+#define XCT_RXRES_8B_HASH_M	0x00000000000fffffull
+#define XCT_RXRES_8B_HASH_S	0
+
+/* Receive interface buffer fields */
+#define XCT_RXB_LEN_M		0x0ffff00000000000ull
+#define XCT_RXB_LEN_S		44
+#define XCT_RXB_LEN(x)		((((long)(x)) << XCT_RXB_LEN_S) & \
+				 XCT_RXB_LEN_M)
+#define XCT_RXB_ADDR_M		0x00000fffffffffffull
+#define XCT_RXB_ADDR_S		0
+#define XCT_RXB_ADDR(x)		((((long)(x)) << XCT_RXB_ADDR_S) & \
+				 XCT_RXB_ADDR_M)
+
+/* Copy descriptor fields */
+#define XCT_COPY_T		0x8000000000000000ull
+#define XCT_COPY_ST		0x4000000000000000ull
+#define XCT_COPY_RR_M		0x3000000000000000ull
+#define XCT_COPY_RR_NORES	0x0000000000000000ull
+#define XCT_COPY_RR_8BRES	0x1000000000000000ull
+#define XCT_COPY_RR_24BRES	0x2000000000000000ull
+#define XCT_COPY_RR_40BRES	0x3000000000000000ull
+#define XCT_COPY_I		0x0800000000000000ull
+#define XCT_COPY_O		0x0400000000000000ull
+#define XCT_COPY_E		0x0200000000000000ull
+#define XCT_COPY_STY_ZERO	0x01c0000000000000ull
+#define XCT_COPY_DTY_PREF	0x0038000000000000ull
+#define XCT_COPY_LLEN_M		0x0007ffff00000000ull
+#define XCT_COPY_LLEN_S		32
+#define XCT_COPY_LLEN(x)	((((long)(x)) << XCT_COPY_LLEN_S) & \
+				 XCT_COPY_LLEN_M)
+#define XCT_COPY_SE		0x0000000000000001ull
+
+/* Function descriptor fields */
+#define XCT_FUN_T		0x8000000000000000ull
+#define XCT_FUN_ST		0x4000000000000000ull
+#define XCT_FUN_RR_M		0x3000000000000000ull
+#define XCT_FUN_RR_NORES	0x0000000000000000ull
+#define XCT_FUN_RR_8BRES	0x1000000000000000ull
+#define XCT_FUN_RR_24BRES	0x2000000000000000ull
+#define XCT_FUN_RR_40BRES	0x3000000000000000ull
+#define XCT_FUN_I		0x0800000000000000ull
+#define XCT_FUN_O		0x0400000000000000ull
+#define XCT_FUN_E		0x0200000000000000ull
+#define XCT_FUN_FUN_M		0x01c0000000000000ull
+#define XCT_FUN_FUN_S		54
+#define XCT_FUN_FUN(x)		((((long)(x)) << XCT_FUN_FUN_S) & XCT_FUN_FUN_M)
+#define XCT_FUN_CRM_M		0x0038000000000000ull
+#define XCT_FUN_CRM_NOP		0x0000000000000000ull
+#define XCT_FUN_CRM_SIG		0x0008000000000000ull
+#define XCT_FUN_LLEN_M		0x0007ffff00000000ull
+#define XCT_FUN_LLEN_S		32
+#define XCT_FUN_LLEN(x)		((((long)(x)) << XCT_FUN_LLEN_S) & XCT_FUN_LLEN_M)
+#define XCT_FUN_SHL_M		0x00000000f8000000ull
+#define XCT_FUN_SHL_S		27
+#define XCT_FUN_SHL(x)		((((long)(x)) << XCT_FUN_SHL_S) & XCT_FUN_SHL_M)
+#define XCT_FUN_CHL_M		0x0000000007c00000ull
+#define XCT_FUN_HSZ_M		0x00000000003c0000ull
+#define XCT_FUN_ALG_M		0x0000000000038000ull
+#define XCT_FUN_HP		0x0000000000004000ull
+#define XCT_FUN_BCM_M		0x0000000000003800ull
+#define XCT_FUN_BCP_M		0x0000000000000600ull
+#define XCT_FUN_SIG_M		0x00000000000001f0ull
+#define XCT_FUN_SIG_TCP4	0x0000000000000140ull
+#define XCT_FUN_SIG_TCP6	0x0000000000000150ull
+#define XCT_FUN_SIG_UDP4	0x0000000000000160ull
+#define XCT_FUN_SIG_UDP6	0x0000000000000170ull
+#define XCT_FUN_A		0x0000000000000008ull
+#define XCT_FUN_C		0x0000000000000004ull
+#define XCT_FUN_AL2		0x0000000000000002ull
+#define XCT_FUN_SE		0x0000000000000001ull
+
+/* Function descriptor 8byte result fields */
+#define XCT_FUNRES_8B_CS_M	0x0000ffff00000000ull
+#define XCT_FUNRES_8B_CS_S	32
+#define XCT_FUNRES_8B_CRC_M	0x00000000ffffffffull
+#define XCT_FUNRES_8B_CRC_S	0
+
+/* Control descriptor fields */
+#define CTRL_CMD_T		0x8000000000000000ull
+#define CTRL_CMD_META_EVT	0x2000000000000000ull
+#define CTRL_CMD_O		0x0400000000000000ull
+#define CTRL_CMD_ETYPE_M	0x0038000000000000ull
+#define CTRL_CMD_ETYPE_EXT	0x0000000000000000ull
+#define CTRL_CMD_ETYPE_WSET	0x0020000000000000ull
+#define CTRL_CMD_ETYPE_WCLR	0x0028000000000000ull
+#define CTRL_CMD_ETYPE_SET	0x0030000000000000ull
+#define CTRL_CMD_ETYPE_CLR	0x0038000000000000ull
+#define CTRL_CMD_REG_M		0x000000000000007full
+#define CTRL_CMD_REG_S		0
+#define CTRL_CMD_REG(x)		((((long)(x)) << CTRL_CMD_REG_S) & \
+				 CTRL_CMD_REG_M)
+
+
+
+/* Prototypes for the shared DMA functions in the platform code. */
+
+/* DMA TX Channel type. Right now only limitations used are event types 0/1,
+ * for event-triggered DMA transactions.
+ */
+
+enum pasemi_dmachan_type {
+	RXCHAN = 0,		/* Any RX chan */
+	TXCHAN = 1,		/* Any TX chan */
+	TXCHAN_EVT0 = 0x1001,	/* TX chan in event class 0 (chan 0-9) */
+	TXCHAN_EVT1 = 0x2001,	/* TX chan in event class 1 (chan 10-19) */
+};
+
+struct pasemi_dmachan {
+	int		 chno;		/* Channel number */
+	enum pasemi_dmachan_type chan_type;	/* TX / RX */
+	u64		*status;	/* Ptr to cacheable status */
+	int		 irq;		/* IRQ used by channel */
+	unsigned int	 ring_size;	/* size of allocated ring */
+	dma_addr_t	 ring_dma;	/* DMA address for ring */
+	u64		*ring_virt;	/* Virt address for ring */
+	void		*priv;		/* Ptr to start of client struct */
+};
+
+/* Read/write the different registers in the I/O Bridge, Ethernet
+ * and DMA Controller
+ */
+extern unsigned int pasemi_read_iob_reg(unsigned int reg);
+extern void pasemi_write_iob_reg(unsigned int reg, unsigned int val);
+
+extern unsigned int pasemi_read_mac_reg(int intf, unsigned int reg);
+extern void pasemi_write_mac_reg(int intf, unsigned int reg, unsigned int val);
+
+extern unsigned int pasemi_read_dma_reg(unsigned int reg);
+extern void pasemi_write_dma_reg(unsigned int reg, unsigned int val);
+
+/* Channel management routines */
+
+extern void *pasemi_dma_alloc_chan(enum pasemi_dmachan_type type,
+				   int total_size, int offset);
+extern void pasemi_dma_free_chan(struct pasemi_dmachan *chan);
+
+extern void pasemi_dma_start_chan(const struct pasemi_dmachan *chan,
+				  const u32 cmdsta);
+extern int pasemi_dma_stop_chan(const struct pasemi_dmachan *chan);
+
+/* Common routines to allocate rings and buffers */
+
+extern int pasemi_dma_alloc_ring(struct pasemi_dmachan *chan, int ring_size);
+extern void pasemi_dma_free_ring(struct pasemi_dmachan *chan);
+
+extern void *pasemi_dma_alloc_buf(struct pasemi_dmachan *chan, int size,
+				  dma_addr_t *handle);
+extern void pasemi_dma_free_buf(struct pasemi_dmachan *chan, int size,
+				dma_addr_t *handle);
+
+/* Routines to allocate flags (events) for channel synchronization */
+extern int  pasemi_dma_alloc_flag(void);
+extern void pasemi_dma_free_flag(int flag);
+extern void pasemi_dma_set_flag(int flag);
+extern void pasemi_dma_clear_flag(int flag);
+
+/* Routines to allocate function engines */
+extern int  pasemi_dma_alloc_fun(void);
+extern void pasemi_dma_free_fun(int fun);
+
+/* Initialize the library, must be called before any other functions */
+extern int pasemi_dma_init(void);
+
+#endif /* ASM_PASEMI_DMA_H */
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
new file mode 100644
index 0000000000..2aa3a091ef
--- /dev/null
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -0,0 +1,305 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_PCI_BRIDGE_H
+#define _ASM_POWERPC_PCI_BRIDGE_H
+#ifdef __KERNEL__
+/*
+ */
+#include <linux/pci.h>
+#include <linux/list.h>
+#include <linux/ioport.h>
+#include <linux/numa.h>
+#include <linux/iommu.h>
+
+struct device_node;
+
+/*
+ * PCI controller operations
+ */
+struct pci_controller_ops {
+	void		(*dma_dev_setup)(struct pci_dev *pdev);
+	void		(*dma_bus_setup)(struct pci_bus *bus);
+	bool		(*iommu_bypass_supported)(struct pci_dev *pdev,
+				u64 mask);
+
+	int		(*probe_mode)(struct pci_bus *bus);
+
+	/* Called when pci_enable_device() is called. Returns true to
+	 * allow assignment/enabling of the device. */
+	bool		(*enable_device_hook)(struct pci_dev *pdev);
+
+	void		(*disable_device)(struct pci_dev *pdev);
+
+	void		(*release_device)(struct pci_dev *pdev);
+
+	/* Called during PCI resource reassignment */
+	resource_size_t (*window_alignment)(struct pci_bus *bus,
+					    unsigned long type);
+	void		(*setup_bridge)(struct pci_bus *bus,
+					unsigned long type);
+	void		(*reset_secondary_bus)(struct pci_dev *pdev);
+
+#ifdef CONFIG_PCI_MSI
+	int		(*setup_msi_irqs)(struct pci_dev *pdev,
+					  int nvec, int type);
+	void		(*teardown_msi_irqs)(struct pci_dev *pdev);
+#endif
+
+	void		(*shutdown)(struct pci_controller *hose);
+
+	struct iommu_group *(*device_group)(struct pci_controller *hose,
+					    struct pci_dev *pdev);
+};
+
+/*
+ * Structure of a PCI controller (host bridge)
+ */
+struct pci_controller {
+	struct pci_bus *bus;
+	char is_dynamic;
+#ifdef CONFIG_PPC64
+	int node;
+#endif
+	struct device_node *dn;
+	struct list_head list_node;
+	struct device *parent;
+
+	int first_busno;
+	int last_busno;
+	int self_busno;
+	struct resource busn;
+
+	void __iomem *io_base_virt;
+#ifdef CONFIG_PPC64
+	void __iomem *io_base_alloc;
+#endif
+	resource_size_t io_base_phys;
+	resource_size_t pci_io_size;
+
+	/* Some machines have a special region to forward the ISA
+	 * "memory" cycles such as VGA memory regions. Left to 0
+	 * if unsupported
+	 */
+	resource_size_t	isa_mem_phys;
+	resource_size_t	isa_mem_size;
+
+	struct pci_controller_ops controller_ops;
+	struct pci_ops *ops;
+	unsigned int __iomem *cfg_addr;
+	void __iomem *cfg_data;
+
+	/*
+	 * Used for variants of PCI indirect handling and possible quirks:
+	 *  SET_CFG_TYPE - used on 4xx or any PHB that does explicit type0/1
+	 *  EXT_REG - provides access to PCI-e extended registers
+	 *  SURPRESS_PRIMARY_BUS - we suppress the setting of PCI_PRIMARY_BUS
+	 *   on Freescale PCI-e controllers since they used the PCI_PRIMARY_BUS
+	 *   to determine which bus number to match on when generating type0
+	 *   config cycles
+	 *  NO_PCIE_LINK - the Freescale PCI-e controllers have issues with
+	 *   hanging if we don't have link and try to do config cycles to
+	 *   anything but the PHB.  Only allow talking to the PHB if this is
+	 *   set.
+	 *  BIG_ENDIAN - cfg_addr is a big endian register
+	 *  BROKEN_MRM - the 440EPx/GRx chips have an errata that causes hangs on
+	 *   the PLB4.  Effectively disable MRM commands by setting this.
+	 *  FSL_CFG_REG_LINK - Freescale controller version in which the PCIe
+	 *   link status is in a RC PCIe cfg register (vs being a SoC register)
+	 */
+#define PPC_INDIRECT_TYPE_SET_CFG_TYPE		0x00000001
+#define PPC_INDIRECT_TYPE_EXT_REG		0x00000002
+#define PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS	0x00000004
+#define PPC_INDIRECT_TYPE_NO_PCIE_LINK		0x00000008
+#define PPC_INDIRECT_TYPE_BIG_ENDIAN		0x00000010
+#define PPC_INDIRECT_TYPE_BROKEN_MRM		0x00000020
+#define PPC_INDIRECT_TYPE_FSL_CFG_REG_LINK	0x00000040
+	u32 indirect_type;
+	/* Currently, we limit ourselves to 1 IO range and 3 mem
+	 * ranges since the common pci_bus structure can't handle more
+	 */
+	struct resource	io_resource;
+	struct resource mem_resources[3];
+	resource_size_t mem_offset[3];
+	int global_number;		/* PCI domain number */
+
+	resource_size_t dma_window_base_cur;
+	resource_size_t dma_window_size;
+
+#ifdef CONFIG_PPC64
+	unsigned long buid;
+	struct pci_dn *pci_data;
+#endif	/* CONFIG_PPC64 */
+
+	void *private_data;
+
+	/* IRQ domain hierarchy */
+	struct irq_domain	*dev_domain;
+	struct irq_domain	*msi_domain;
+	struct fwnode_handle	*fwnode;
+
+	/* iommu_ops support */
+	struct iommu_device	iommu;
+};
+
+/* These are used for config access before all the PCI probing
+   has been done. */
+extern int early_read_config_byte(struct pci_controller *hose, int bus,
+			int dev_fn, int where, u8 *val);
+extern int early_read_config_word(struct pci_controller *hose, int bus,
+			int dev_fn, int where, u16 *val);
+extern int early_read_config_dword(struct pci_controller *hose, int bus,
+			int dev_fn, int where, u32 *val);
+extern int early_write_config_byte(struct pci_controller *hose, int bus,
+			int dev_fn, int where, u8 val);
+extern int early_write_config_word(struct pci_controller *hose, int bus,
+			int dev_fn, int where, u16 val);
+extern int early_write_config_dword(struct pci_controller *hose, int bus,
+			int dev_fn, int where, u32 val);
+
+extern int early_find_capability(struct pci_controller *hose, int bus,
+				 int dev_fn, int cap);
+
+extern void setup_indirect_pci(struct pci_controller* hose,
+			       resource_size_t cfg_addr,
+			       resource_size_t cfg_data, u32 flags);
+
+extern int indirect_read_config(struct pci_bus *bus, unsigned int devfn,
+				int offset, int len, u32 *val);
+
+extern int __indirect_read_config(struct pci_controller *hose,
+				  unsigned char bus_number, unsigned int devfn,
+				  int offset, int len, u32 *val);
+
+extern int indirect_write_config(struct pci_bus *bus, unsigned int devfn,
+				 int offset, int len, u32 val);
+
+static inline struct pci_controller *pci_bus_to_host(const struct pci_bus *bus)
+{
+	return bus->sysdata;
+}
+
+#ifdef CONFIG_PPC_PMAC
+extern int pci_device_from_OF_node(struct device_node *node,
+				   u8 *bus, u8 *devfn);
+#endif
+#ifndef CONFIG_PPC64
+
+#ifdef CONFIG_PPC_PCI_OF_BUS_MAP
+extern void pci_create_OF_bus_map(void);
+#else
+static inline void pci_create_OF_bus_map(void) {}
+#endif
+
+#else	/* CONFIG_PPC64 */
+
+/*
+ * PCI stuff, for nodes representing PCI devices, pointed to
+ * by device_node->data.
+ */
+struct iommu_table;
+
+struct pci_dn {
+	int     flags;
+#define PCI_DN_FLAG_IOV_VF	0x01
+#define PCI_DN_FLAG_DEAD	0x02    /* Device has been hot-removed */
+
+	int	busno;			/* pci bus number */
+	int	devfn;			/* pci device and function number */
+	int	vendor_id;		/* Vendor ID */
+	int	device_id;		/* Device ID */
+	int	class_code;		/* Device class code */
+
+	struct  pci_dn *parent;
+	struct  pci_controller *phb;	/* for pci devices */
+	struct	iommu_table_group *table_group;	/* for phb's or bridges */
+
+	int	pci_ext_config_space;	/* for pci devices */
+#ifdef CONFIG_EEH
+	struct eeh_dev *edev;		/* eeh device */
+#endif
+#define IODA_INVALID_PE		0xFFFFFFFF
+	unsigned int pe_number;
+#ifdef CONFIG_PCI_IOV
+	u16     vfs_expanded;		/* number of VFs IOV BAR expanded */
+	u16     num_vfs;		/* number of VFs enabled*/
+	unsigned int *pe_num_map;	/* PE# for the first VF PE or array */
+	bool    m64_single_mode;	/* Use M64 BAR in Single Mode */
+#define IODA_INVALID_M64        (-1)
+	int     (*m64_map)[PCI_SRIOV_NUM_BARS];	/* Only used on powernv */
+	int     last_allow_rc;			/* Only used on pseries */
+#endif /* CONFIG_PCI_IOV */
+	int	mps;			/* Maximum Payload Size */
+	struct list_head child_list;
+	struct list_head list;
+	struct resource holes[PCI_SRIOV_NUM_BARS];
+};
+
+/* Get the pointer to a device_node's pci_dn */
+#define PCI_DN(dn)	((struct pci_dn *) (dn)->data)
+
+extern struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus,
+					   int devfn);
+extern struct pci_dn *pci_get_pdn(struct pci_dev *pdev);
+extern struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
+					       struct device_node *dn);
+extern void pci_remove_device_node_info(struct device_node *dn);
+
+#ifdef CONFIG_PCI_IOV
+struct pci_dn *add_sriov_vf_pdns(struct pci_dev *pdev);
+void remove_sriov_vf_pdns(struct pci_dev *pdev);
+#endif
+
+#if defined(CONFIG_EEH)
+static inline struct eeh_dev *pdn_to_eeh_dev(struct pci_dn *pdn)
+{
+	return pdn ? pdn->edev : NULL;
+}
+#else
+#define pdn_to_eeh_dev(x)	(NULL)
+#endif
+
+/** Find the bus corresponding to the indicated device node */
+extern struct pci_bus *pci_find_bus_by_node(struct device_node *dn);
+
+/** Remove all of the PCI devices under this bus */
+extern void pci_hp_remove_devices(struct pci_bus *bus);
+
+/** Discover new pci devices under this bus, and add them */
+extern void pci_hp_add_devices(struct pci_bus *bus);
+
+extern int pcibios_unmap_io_space(struct pci_bus *bus);
+extern int pcibios_map_io_space(struct pci_bus *bus);
+
+#ifdef CONFIG_NUMA
+#define PHB_SET_NODE(PHB, NODE)		((PHB)->node = (NODE))
+#else
+#define PHB_SET_NODE(PHB, NODE)		((PHB)->node = NUMA_NO_NODE)
+#endif
+
+#endif	/* CONFIG_PPC64 */
+
+/* Get the PCI host controller for an OF device */
+extern struct pci_controller *pci_find_hose_for_OF_device(
+			struct device_node* node);
+
+extern struct pci_controller *pci_find_controller_for_domain(int domain_nr);
+
+/* Fill up host controller resources from the OF node */
+extern void pci_process_bridge_OF_ranges(struct pci_controller *hose,
+			struct device_node *dev, int primary);
+
+/* Allocate & free a PCI host bridge structure */
+extern struct pci_controller *pcibios_alloc_controller(struct device_node *dev);
+extern void pcibios_free_controller(struct pci_controller *phb);
+extern void pcibios_free_controller_deferred(struct pci_host_bridge *bridge);
+
+#ifdef CONFIG_PCI
+extern int pcibios_vaddr_is_ioport(void __iomem *address);
+#else
+static inline int pcibios_vaddr_is_ioport(void __iomem *address)
+{
+	return 0;
+}
+#endif	/* CONFIG_PCI */
+
+#endif	/* __KERNEL__ */
+#endif	/* _ASM_POWERPC_PCI_BRIDGE_H */
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
new file mode 100644
index 0000000000..f5078a7dd8
--- /dev/null
+++ b/arch/powerpc/include/asm/pci.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef __ASM_POWERPC_PCI_H
+#define __ASM_POWERPC_PCI_H
+#ifdef __KERNEL__
+
+/*
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/dma-map-ops.h>
+#include <linux/scatterlist.h>
+
+#include <asm/machdep.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+
+/* Return values for pci_controller_ops.probe_mode function */
+#define PCI_PROBE_NONE		-1	/* Don't look at this bus at all */
+#define PCI_PROBE_NORMAL	0	/* Do normal PCI probing */
+#define PCI_PROBE_DEVTREE	1	/* Instantiate from device tree */
+
+#define PCIBIOS_MIN_IO		0x1000
+#define PCIBIOS_MIN_MEM		0x10000000
+
+/* Values for the `which' argument to sys_pciconfig_iobase syscall.  */
+#define IOBASE_BRIDGE_NUMBER	0
+#define IOBASE_MEMORY		1
+#define IOBASE_IO		2
+#define IOBASE_ISA_IO		3
+#define IOBASE_ISA_MEM		4
+
+/*
+ * Set this to 1 if you want the kernel to re-assign all PCI
+ * bus numbers (don't do that on ppc64 yet !)
+ */
+#define pcibios_assign_all_busses() \
+	(pci_has_flag(PCI_REASSIGN_ALL_BUS))
+
+static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
+{
+	if (ppc_md.pci_get_legacy_ide_irq)
+		return ppc_md.pci_get_legacy_ide_irq(dev, channel);
+	return channel ? 15 : 14;
+}
+
+#ifdef CONFIG_PCI
+void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops);
+#else	/* CONFIG_PCI */
+#define set_pci_dma_ops(d)
+#endif
+
+#ifdef CONFIG_PPC64
+
+/*
+ * We want to avoid touching the cacheline size or MWI bit.
+ * pSeries firmware sets the cacheline size (which is not the cpu cacheline
+ * size in all cases) and hardware treats MWI the same as memory write.
+ */
+#define PCI_DISABLE_MWI
+
+#endif /* CONFIG_PPC64 */
+
+extern int pci_domain_nr(struct pci_bus *bus);
+
+/* Decide whether to display the domain number in /proc */
+extern int pci_proc_domain(struct pci_bus *bus);
+
+struct vm_area_struct;
+
+/* Tell PCI code what kind of PCI resource mappings we support */
+#define HAVE_PCI_MMAP			1
+#define ARCH_GENERIC_PCI_MMAP_RESOURCE	1
+#define arch_can_pci_mmap_io()		1
+#define arch_can_pci_mmap_wc()		1
+
+extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
+			   size_t count);
+extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val,
+			   size_t count);
+extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
+				      struct vm_area_struct *vma,
+				      enum pci_mmap_state mmap_state);
+extern void pci_adjust_legacy_attr(struct pci_bus *bus,
+				   enum pci_mmap_state mmap_type);
+#define HAVE_PCI_LEGACY	1
+
+extern void pcibios_claim_one_bus(struct pci_bus *b);
+
+extern void pcibios_finish_adding_to_bus(struct pci_bus *bus);
+
+extern void pcibios_resource_survey(void);
+
+extern struct pci_controller *init_phb_dynamic(struct device_node *dn);
+extern int remove_phb_dynamic(struct pci_controller *phb);
+
+extern struct pci_dev *of_create_pci_dev(struct device_node *node,
+					struct pci_bus *bus, int devfn);
+
+extern unsigned int pci_parse_of_flags(u32 addr0, int bridge);
+
+extern void of_scan_pci_bridge(struct pci_dev *dev);
+
+extern void of_scan_bus(struct device_node *node, struct pci_bus *bus);
+extern void of_rescan_bus(struct device_node *node, struct pci_bus *bus);
+
+struct file;
+extern pgprot_t	pci_phys_mem_access_prot(struct file *file,
+					 unsigned long pfn,
+					 unsigned long size,
+					 pgprot_t prot);
+
+extern resource_size_t pcibios_io_space_offset(struct pci_controller *hose);
+extern void pcibios_setup_bus_self(struct pci_bus *bus);
+extern void pcibios_setup_phb_io_space(struct pci_controller *hose);
+extern void pcibios_scan_phb(struct pci_controller *hose);
+
+#endif	/* __KERNEL__ */
+
+#endif /* __ASM_POWERPC_PCI_H */
diff --git a/arch/powerpc/include/asm/percpu.h b/arch/powerpc/include/asm/percpu.h
new file mode 100644
index 0000000000..8e5b7d0b85
--- /dev/null
+++ b/arch/powerpc/include/asm/percpu.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_PERCPU_H_
+#define _ASM_POWERPC_PERCPU_H_
+#ifdef __powerpc64__
+
+/*
+ * Same as asm-generic/percpu.h, except that we store the per cpu offset
+ * in the paca. Based on the x86-64 implementation.
+ */
+
+#ifdef CONFIG_SMP
+
+#define __my_cpu_offset local_paca->data_offset
+
+#endif /* CONFIG_SMP */
+#endif /* __powerpc64__ */
+
+#include <asm-generic/percpu.h>
+
+#include <asm/paca.h>
+
+#endif /* _ASM_POWERPC_PERCPU_H_ */
diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
new file mode 100644
index 0000000000..164e910bf6
--- /dev/null
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Performance event support - hardware-specific disambiguation
+ *
+ * For now this is a compile-time decision, but eventually it should be
+ * runtime.  This would allow multiplatform perf event support for e300 (fsl
+ * embedded perf counters) plus server/classic, and would accommodate
+ * devices other than the core which provide their own performance counters.
+ *
+ * Copyright 2010 Freescale Semiconductor, Inc.
+ */
+
+#ifdef CONFIG_PPC_PERF_CTRS
+#include <asm/perf_event_server.h>
+#else
+static inline bool is_sier_available(void) { return false; }
+static inline unsigned long get_pmcs_ext_regs(int idx) { return 0; }
+#endif
+
+#ifdef CONFIG_FSL_EMB_PERF_EVENT
+#include <asm/perf_event_fsl_emb.h>
+#endif
+
+#ifdef CONFIG_PERF_EVENTS
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+
+#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
+
+/*
+ * Overload regs->result to specify whether we should use the MSR (result
+ * is zero) or the SIAR (result is non zero).
+ */
+#define perf_arch_fetch_caller_regs(regs, __ip)			\
+	do {							\
+		(regs)->result = 0;				\
+		(regs)->nip = __ip;				\
+		(regs)->gpr[1] = current_stack_frame();		\
+		asm volatile("mfmsr %0" : "=r" ((regs)->msr));	\
+	} while (0)
+
+/* To support perf_regs sier update */
+extern bool is_sier_available(void);
+extern unsigned long get_pmcs_ext_regs(int idx);
+/* To define perf extended regs mask value */
+extern u64 PERF_REG_EXTENDED_MASK;
+#define PERF_REG_EXTENDED_MASK	PERF_REG_EXTENDED_MASK
+#endif
diff --git a/arch/powerpc/include/asm/perf_event_fsl_emb.h b/arch/powerpc/include/asm/perf_event_fsl_emb.h
new file mode 100644
index 0000000000..c4d9ceb03e
--- /dev/null
+++ b/arch/powerpc/include/asm/perf_event_fsl_emb.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Performance event support - Freescale embedded specific definitions.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ * Copyright 2010 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/types.h>
+#include <asm/hw_irq.h>
+
+#define MAX_HWEVENTS 6
+
+/* event flags */
+#define FSL_EMB_EVENT_VALID      1
+#define FSL_EMB_EVENT_RESTRICTED 2
+
+/* upper half of event flags is PMLCb */
+#define FSL_EMB_EVENT_THRESHMUL  0x0000070000000000ULL
+#define FSL_EMB_EVENT_THRESH     0x0000003f00000000ULL
+
+struct fsl_emb_pmu {
+	const char	*name;
+	int		n_counter; /* total number of counters */
+
+	/*
+	 * The number of contiguous counters starting at zero that
+	 * can hold restricted events, or zero if there are no
+	 * restricted events.
+	 *
+	 * This isn't a very flexible method of expressing constraints,
+	 * but it's very simple and is adequate for existing chips.
+	 */
+	int		n_restricted;
+
+	/* Returns event flags and PMLCb (FSL_EMB_EVENT_*) */
+	u64		(*xlate_event)(u64 event_id);
+
+	int		n_generic;
+	int		*generic_events;
+	int		(*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+			       [PERF_COUNT_HW_CACHE_OP_MAX]
+			       [PERF_COUNT_HW_CACHE_RESULT_MAX];
+};
+
+int register_fsl_emb_pmu(struct fsl_emb_pmu *);
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
new file mode 100644
index 0000000000..e2221d29fd
--- /dev/null
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Performance event support - PowerPC classic/server specific definitions.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ */
+
+#include <linux/types.h>
+#include <asm/hw_irq.h>
+#include <linux/device.h>
+#include <uapi/asm/perf_event.h>
+
+/* Update perf_event_print_debug() if this changes */
+#define MAX_HWEVENTS		8
+#define MAX_EVENT_ALTERNATIVES	8
+#define MAX_LIMITED_HWCOUNTERS	2
+
+struct perf_event;
+
+struct mmcr_regs {
+	unsigned long mmcr0;
+	unsigned long mmcr1;
+	unsigned long mmcr2;
+	unsigned long mmcra;
+	unsigned long mmcr3;
+};
+/*
+ * This struct provides the constants and functions needed to
+ * describe the PMU on a particular POWER-family CPU.
+ */
+struct power_pmu {
+	const char	*name;
+	int		n_counter;
+	int		max_alternatives;
+	unsigned long	add_fields;
+	unsigned long	test_adder;
+	int		(*compute_mmcr)(u64 events[], int n_ev,
+				unsigned int hwc[], struct mmcr_regs *mmcr,
+				struct perf_event *pevents[], u32 flags);
+	int		(*get_constraint)(u64 event_id, unsigned long *mskp,
+				unsigned long *valp, u64 event_config1);
+	int		(*get_alternatives)(u64 event_id, unsigned int flags,
+				u64 alt[]);
+	void		(*get_mem_data_src)(union perf_mem_data_src *dsrc,
+				u32 flags, struct pt_regs *regs);
+	void		(*get_mem_weight)(u64 *weight, u64 type);
+	unsigned long	group_constraint_mask;
+	unsigned long	group_constraint_val;
+	u64             (*bhrb_filter_map)(u64 branch_sample_type);
+	void            (*config_bhrb)(u64 pmu_bhrb_filter);
+	void		(*disable_pmc)(unsigned int pmc, struct mmcr_regs *mmcr);
+	int		(*limited_pmc_event)(u64 event_id);
+	u32		flags;
+	const struct attribute_group	**attr_groups;
+	int		n_generic;
+	int		*generic_events;
+	u64		(*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+			       [PERF_COUNT_HW_CACHE_OP_MAX]
+			       [PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+	int		n_blacklist_ev;
+	int 		*blacklist_ev;
+	/* BHRB entries in the PMU */
+	int		bhrb_nr;
+	/*
+	 * set this flag with `PERF_PMU_CAP_EXTENDED_REGS` if
+	 * the pmu supports extended perf regs capability
+	 */
+	int		capabilities;
+	/*
+	 * Function to check event code for values which are
+	 * reserved. Function takes struct perf_event as input,
+	 * since event code could be spread in attr.config*
+	 */
+	int		(*check_attr_config)(struct perf_event *ev);
+};
+
+/*
+ * Values for power_pmu.flags
+ */
+#define PPMU_LIMITED_PMC5_6	0x00000001 /* PMC5/6 have limited function */
+#define PPMU_ALT_SIPR		0x00000002 /* uses alternate posn for SIPR/HV */
+#define PPMU_NO_SIPR		0x00000004 /* no SIPR/HV in MMCRA at all */
+#define PPMU_NO_CONT_SAMPLING	0x00000008 /* no continuous sampling */
+#define PPMU_SIAR_VALID		0x00000010 /* Processor has SIAR Valid bit */
+#define PPMU_HAS_SSLOT		0x00000020 /* Has sampled slot in MMCRA */
+#define PPMU_HAS_SIER		0x00000040 /* Has SIER */
+#define PPMU_ARCH_207S		0x00000080 /* PMC is architecture v2.07S */
+#define PPMU_NO_SIAR		0x00000100 /* Do not use SIAR */
+#define PPMU_ARCH_31		0x00000200 /* Has MMCR3, SIER2 and SIER3 */
+#define PPMU_P10_DD1		0x00000400 /* Is power10 DD1 processor version */
+#define PPMU_HAS_ATTR_CONFIG1	0x00000800 /* Using config1 attribute */
+
+/*
+ * Values for flags to get_alternatives()
+ */
+#define PPMU_LIMITED_PMC_OK	1	/* can put this on a limited PMC */
+#define PPMU_LIMITED_PMC_REQD	2	/* have to put this on a limited PMC */
+#define PPMU_ONLY_COUNT_RUN	4	/* only counting in run state */
+
+int __init register_power_pmu(struct power_pmu *pmu);
+
+struct pt_regs;
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long int read_bhrb(int n);
+
+/*
+ * Only override the default definitions in include/linux/perf_event.h
+ * if we have hardware PMU support.
+ */
+#ifdef CONFIG_PPC_PERF_CTRS
+#define perf_misc_flags(regs)	perf_misc_flags(regs)
+#endif
+
+/*
+ * The power_pmu.get_constraint function returns a 32/64-bit value and
+ * a 32/64-bit mask that express the constraints between this event_id and
+ * other events.
+ *
+ * The value and mask are divided up into (non-overlapping) bitfields
+ * of three different types:
+ *
+ * Select field: this expresses the constraint that some set of bits
+ * in MMCR* needs to be set to a specific value for this event_id.  For a
+ * select field, the mask contains 1s in every bit of the field, and
+ * the value contains a unique value for each possible setting of the
+ * MMCR* bits.  The constraint checking code will ensure that two events
+ * that set the same field in their masks have the same value in their
+ * value dwords.
+ *
+ * Add field: this expresses the constraint that there can be at most
+ * N events in a particular class.  A field of k bits can be used for
+ * N <= 2^(k-1) - 1.  The mask has the most significant bit of the field
+ * set (and the other bits 0), and the value has only the least significant
+ * bit of the field set.  In addition, the 'add_fields' and 'test_adder'
+ * in the struct power_pmu for this processor come into play.  The
+ * add_fields value contains 1 in the LSB of the field, and the
+ * test_adder contains 2^(k-1) - 1 - N in the field.
+ *
+ * NAND field: this expresses the constraint that you may not have events
+ * in all of a set of classes.  (For example, on PPC970, you can't select
+ * events from the FPU, ISU and IDU simultaneously, although any two are
+ * possible.)  For N classes, the field is N+1 bits wide, and each class
+ * is assigned one bit from the least-significant N bits.  The mask has
+ * only the most-significant bit set, and the value has only the bit
+ * for the event_id's class set.  The test_adder has the least significant
+ * bit set in the field.
+ *
+ * If an event_id is not subject to the constraint expressed by a particular
+ * field, then it will have 0 in both the mask and value for that field.
+ */
+
+extern ssize_t power_events_sysfs_show(struct device *dev,
+				struct device_attribute *attr, char *page);
+
+/*
+ * EVENT_VAR() is same as PMU_EVENT_VAR with a suffix.
+ *
+ * Having a suffix allows us to have aliases in sysfs - eg: the generic
+ * event 'cpu-cycles' can have two entries in sysfs: 'cpu-cycles' and
+ * 'PM_CYC' where the latter is the name by which the event is known in
+ * POWER CPU specification.
+ *
+ * Similarly, some hardware and cache events use the same event code. Eg.
+ * on POWER8, both "cache-references" and "L1-dcache-loads" events refer
+ * to the same event, PM_LD_REF_L1.  The suffix, allows us to have two
+ * sysfs objects for the same event and thus two entries/aliases in sysfs.
+ */
+#define	EVENT_VAR(_id, _suffix)		event_attr_##_id##_suffix
+#define	EVENT_PTR(_id, _suffix)		&EVENT_VAR(_id, _suffix).attr.attr
+
+#define	EVENT_ATTR(_name, _id, _suffix)					\
+	PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), _id,		\
+			power_events_sysfs_show)
+
+#define	GENERIC_EVENT_ATTR(_name, _id)	EVENT_ATTR(_name, _id, _g)
+#define	GENERIC_EVENT_PTR(_id)		EVENT_PTR(_id, _g)
+
+#define	CACHE_EVENT_ATTR(_name, _id)	EVENT_ATTR(_name, _id, _c)
+#define	CACHE_EVENT_PTR(_id)		EVENT_PTR(_id, _c)
+
+#define	POWER_EVENT_ATTR(_name, _id)	EVENT_ATTR(_name, _id, _p)
+#define	POWER_EVENT_PTR(_id)		EVENT_PTR(_id, _p)
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
new file mode 100644
index 0000000000..3a971e2a8c
--- /dev/null
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_PGALLOC_H
+#define _ASM_POWERPC_PGALLOC_H
+
+#include <linux/mm.h>
+
+#ifndef MODULE
+static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp)
+{
+	if (unlikely(mm == &init_mm))
+		return gfp;
+	return gfp | __GFP_ACCOUNT;
+}
+#else /* !MODULE */
+static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp)
+{
+	return gfp | __GFP_ACCOUNT;
+}
+#endif /* MODULE */
+
+#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
+
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel);
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
+{
+	return (pte_t *)pte_fragment_alloc(mm, 1);
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
+{
+	return (pgtable_t)pte_fragment_alloc(mm, 0);
+}
+
+void pte_frag_destroy(void *pte_frag);
+void pte_fragment_free(unsigned long *table, int kernel);
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	pte_fragment_free((unsigned long *)pte, 1);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
+{
+	pte_fragment_free((unsigned long *)ptepage, 0);
+}
+
+/* arch use pte_free_defer() implementation in arch/powerpc/mm/pgtable-frag.c */
+#define pte_free_defer pte_free_defer
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
+
+/*
+ * Functions that deal with pagetables that could be at any level of
+ * the table need to be passed an "index_size" so they know how to
+ * handle allocation.  For PTE pages, the allocation size will be
+ * (2^index_size * sizeof(pointer)) and allocations are drawn from
+ * the kmem_cache in PGT_CACHE(index_size).
+ *
+ * The maximum index size needs to be big enough to allow any
+ * pagetable sizes we need, but small enough to fit in the low bits of
+ * any page table pointer.  In other words all pagetables, even tiny
+ * ones, must be aligned to allow at least enough low 0 bits to
+ * contain this value.  This value is also used as a mask, so it must
+ * be one less than a power of two.
+ */
+#define MAX_PGTABLE_INDEX_SIZE	0xf
+
+extern struct kmem_cache *pgtable_cache[];
+#define PGT_CACHE(shift) pgtable_cache[shift]
+
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/book3s/pgalloc.h>
+#else
+#include <asm/nohash/pgalloc.h>
+#endif
+
+#endif /* _ASM_POWERPC_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h
new file mode 100644
index 0000000000..82633200b5
--- /dev/null
+++ b/arch/powerpc/include/asm/pgtable-be-types.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_PGTABLE_BE_TYPES_H
+#define _ASM_POWERPC_PGTABLE_BE_TYPES_H
+
+#include <asm/cmpxchg.h>
+
+/* PTE level */
+typedef struct { __be64 pte; } pte_t;
+#define __pte(x)	((pte_t) { cpu_to_be64(x) })
+#define __pte_raw(x)	((pte_t) { (x) })
+static inline unsigned long pte_val(pte_t x)
+{
+	return be64_to_cpu(x.pte);
+}
+
+static inline __be64 pte_raw(pte_t x)
+{
+	return x.pte;
+}
+
+/* PMD level */
+#ifdef CONFIG_PPC64
+typedef struct { __be64 pmd; } pmd_t;
+#define __pmd(x)	((pmd_t) { cpu_to_be64(x) })
+#define __pmd_raw(x)	((pmd_t) { (x) })
+static inline unsigned long pmd_val(pmd_t x)
+{
+	return be64_to_cpu(x.pmd);
+}
+
+static inline __be64 pmd_raw(pmd_t x)
+{
+	return x.pmd;
+}
+
+/* 64 bit always use 4 level table. */
+typedef struct { __be64 pud; } pud_t;
+#define __pud(x)	((pud_t) { cpu_to_be64(x) })
+#define __pud_raw(x)	((pud_t) { (x) })
+static inline unsigned long pud_val(pud_t x)
+{
+	return be64_to_cpu(x.pud);
+}
+
+static inline __be64 pud_raw(pud_t x)
+{
+	return x.pud;
+}
+
+#endif /* CONFIG_PPC64 */
+
+/* PGD level */
+typedef struct { __be64 pgd; } pgd_t;
+#define __pgd(x)	((pgd_t) { cpu_to_be64(x) })
+#define __pgd_raw(x)	((pgd_t) { (x) })
+static inline unsigned long pgd_val(pgd_t x)
+{
+	return be64_to_cpu(x.pgd);
+}
+
+static inline __be64 pgd_raw(pgd_t x)
+{
+	return x.pgd;
+}
+
+/* Page protection bits */
+typedef struct { unsigned long pgprot; } pgprot_t;
+#define pgprot_val(x)	((x).pgprot)
+#define __pgprot(x)	((pgprot_t) { (x) })
+
+/*
+ * With hash config 64k pages additionally define a bigger "real PTE" type that
+ * gathers the "second half" part of the PTE for pseudo 64k pages
+ */
+#ifdef CONFIG_PPC_64K_PAGES
+typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
+#else
+typedef struct { pte_t pte; } real_pte_t;
+#endif
+
+static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
+{
+	unsigned long *p = (unsigned long *)ptep;
+	__be64 prev;
+
+	/* See comment in switch_mm_irqs_off() */
+	prev = (__force __be64)__cmpxchg_u64(p, (__force unsigned long)pte_raw(old),
+					     (__force unsigned long)pte_raw(new));
+
+	return pte_raw(old) == prev;
+}
+
+static inline bool pmd_xchg(pmd_t *pmdp, pmd_t old, pmd_t new)
+{
+	unsigned long *p = (unsigned long *)pmdp;
+	__be64 prev;
+
+	prev = (__force __be64)__cmpxchg_u64(p, (__force unsigned long)pmd_raw(old),
+					     (__force unsigned long)pmd_raw(new));
+
+	return pmd_raw(old) == prev;
+}
+
+#ifdef CONFIG_ARCH_HAS_HUGEPD
+typedef struct { __be64 pdbe; } hugepd_t;
+#define __hugepd(x) ((hugepd_t) { cpu_to_be64(x) })
+
+static inline unsigned long hpd_val(hugepd_t x)
+{
+	return be64_to_cpu(x.pdbe);
+}
+#endif
+
+#endif /* _ASM_POWERPC_PGTABLE_BE_TYPES_H */
diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
new file mode 100644
index 0000000000..082c85cc09
--- /dev/null
+++ b/arch/powerpc/include/asm/pgtable-types.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_PGTABLE_TYPES_H
+#define _ASM_POWERPC_PGTABLE_TYPES_H
+
+#if defined(__CHECKER__) || !defined(CONFIG_PPC32)
+#define STRICT_MM_TYPECHECKS
+#endif
+
+/* PTE level */
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)
+typedef struct { pte_basic_t pte, pte1, pte2, pte3; } pte_t;
+#elif defined(STRICT_MM_TYPECHECKS)
+typedef struct { pte_basic_t pte; } pte_t;
+#else
+typedef pte_basic_t pte_t;
+#endif
+
+#if defined(STRICT_MM_TYPECHECKS) || \
+    (defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES))
+#define __pte(x)	((pte_t) { (x) })
+static inline pte_basic_t pte_val(pte_t x)
+{
+	return x.pte;
+}
+#else
+#define __pte(x)	((pte_t)(x))
+static inline pte_basic_t pte_val(pte_t x)
+{
+	return x;
+}
+#endif
+
+/* PMD level */
+#ifdef CONFIG_PPC64
+typedef struct { unsigned long pmd; } pmd_t;
+#define __pmd(x)	((pmd_t) { (x) })
+static inline unsigned long pmd_val(pmd_t x)
+{
+	return x.pmd;
+}
+
+/* 64 bit always use 4 level table. */
+typedef struct { unsigned long pud; } pud_t;
+#define __pud(x)	((pud_t) { (x) })
+static inline unsigned long pud_val(pud_t x)
+{
+	return x.pud;
+}
+#endif /* CONFIG_PPC64 */
+
+/* PGD level */
+typedef struct { unsigned long pgd; } pgd_t;
+#define __pgd(x)	((pgd_t) { (x) })
+static inline unsigned long pgd_val(pgd_t x)
+{
+	return x.pgd;
+}
+
+/* Page protection bits */
+typedef struct { unsigned long pgprot; } pgprot_t;
+#define pgprot_val(x)	((x).pgprot)
+#define __pgprot(x)	((pgprot_t) { (x) })
+
+/*
+ * With hash config 64k pages additionally define a bigger "real PTE" type that
+ * gathers the "second half" part of the PTE for pseudo 64k pages
+ */
+#ifdef CONFIG_PPC_64K_PAGES
+typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
+#else
+typedef struct { pte_t pte; } real_pte_t;
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/cmpxchg.h>
+
+static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
+{
+	unsigned long *p = (unsigned long *)ptep;
+
+	/* See comment in switch_mm_irqs_off() */
+	return pte_val(old) == __cmpxchg_u64(p, pte_val(old), pte_val(new));
+}
+#endif
+
+#ifdef CONFIG_ARCH_HAS_HUGEPD
+typedef struct { unsigned long pd; } hugepd_t;
+#define __hugepd(x) ((hugepd_t) { (x) })
+static inline unsigned long hpd_val(hugepd_t x)
+{
+	return x.pd;
+}
+#endif
+
+#endif /* _ASM_POWERPC_PGTABLE_TYPES_H */
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
new file mode 100644
index 0000000000..d0ee46de24
--- /dev/null
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_PGTABLE_H
+#define _ASM_POWERPC_PGTABLE_H
+
+#ifndef __ASSEMBLY__
+#include <linux/mmdebug.h>
+#include <linux/mmzone.h>
+#include <asm/processor.h>		/* For TASK_SIZE */
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/tlbflush.h>
+
+struct mm_struct;
+
+#endif /* !__ASSEMBLY__ */
+
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/book3s/pgtable.h>
+#else
+#include <asm/nohash/pgtable.h>
+#endif /* !CONFIG_PPC_BOOK3S */
+
+/*
+ * Protection used for kernel text. We want the debuggers to be able to
+ * set breakpoints anywhere, so don't write protect the kernel text
+ * on platforms where such control is possible.
+ */
+#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) || \
+	defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
+#define PAGE_KERNEL_TEXT	PAGE_KERNEL_X
+#else
+#define PAGE_KERNEL_TEXT	PAGE_KERNEL_ROX
+#endif
+
+/* Make modules code happy. We don't set RO yet */
+#define PAGE_KERNEL_EXEC	PAGE_KERNEL_X
+
+/* Advertise special mapping type for AGP */
+#define PAGE_AGP		(PAGE_KERNEL_NC)
+#define HAVE_PAGE_AGP
+
+#ifndef __ASSEMBLY__
+
+void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+		pte_t pte, unsigned int nr);
+#define set_ptes set_ptes
+#define update_mmu_cache(vma, addr, ptep) \
+	update_mmu_cache_range(NULL, vma, addr, ptep, 1)
+
+#ifndef MAX_PTRS_PER_PGD
+#define MAX_PTRS_PER_PGD PTRS_PER_PGD
+#endif
+
+/* Keep these as a macros to avoid include dependency mess */
+#define pte_page(x)		pfn_to_page(pte_pfn(x))
+#define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
+
+static inline unsigned long pte_pfn(pte_t pte)
+{
+	return (pte_val(pte) & PTE_RPN_MASK) >> PTE_RPN_SHIFT;
+}
+
+/*
+ * Select all bits except the pfn
+ */
+static inline pgprot_t pte_pgprot(pte_t pte)
+{
+	unsigned long pte_flags;
+
+	pte_flags = pte_val(pte) & ~PTE_RPN_MASK;
+	return __pgprot(pte_flags);
+}
+
+#ifndef pmd_page_vaddr
+static inline const void *pmd_page_vaddr(pmd_t pmd)
+{
+	return __va(pmd_val(pmd) & ~PMD_MASKED_BITS);
+}
+#define pmd_page_vaddr pmd_page_vaddr
+#endif
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+extern pgd_t swapper_pg_dir[];
+
+extern void paging_init(void);
+void poking_init(void);
+
+extern unsigned long ioremap_bot;
+extern const pgprot_t protection_map[16];
+
+#ifndef CONFIG_TRANSPARENT_HUGEPAGE
+#define pmd_large(pmd)		0
+#endif
+
+/* can we use this in kvm */
+unsigned long vmalloc_to_phys(void *vmalloc_addr);
+
+void pgtable_cache_add(unsigned int shift);
+
+pte_t *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va);
+
+#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32)
+void mark_initmem_nx(void);
+#else
+static inline void mark_initmem_nx(void) { }
+#endif
+
+/*
+ * When used, PTE_FRAG_NR is defined in subarch pgtable.h
+ * so we are sure it is included when arriving here.
+ */
+#ifdef PTE_FRAG_NR
+static inline void *pte_frag_get(mm_context_t *ctx)
+{
+	return ctx->pte_frag;
+}
+
+static inline void pte_frag_set(mm_context_t *ctx, void *p)
+{
+	ctx->pte_frag = p;
+}
+#else
+#define PTE_FRAG_NR		1
+#define PTE_FRAG_SIZE_SHIFT	PAGE_SHIFT
+#define PTE_FRAG_SIZE		(1UL << PTE_FRAG_SIZE_SHIFT)
+
+static inline void *pte_frag_get(mm_context_t *ctx)
+{
+	return NULL;
+}
+
+static inline void pte_frag_set(mm_context_t *ctx, void *p)
+{
+}
+#endif
+
+#ifndef pmd_is_leaf
+#define pmd_is_leaf pmd_is_leaf
+static inline bool pmd_is_leaf(pmd_t pmd)
+{
+	return false;
+}
+#endif
+
+#ifndef pud_is_leaf
+#define pud_is_leaf pud_is_leaf
+static inline bool pud_is_leaf(pud_t pud)
+{
+	return false;
+}
+#endif
+
+#ifndef p4d_is_leaf
+#define p4d_is_leaf p4d_is_leaf
+static inline bool p4d_is_leaf(p4d_t p4d)
+{
+	return false;
+}
+#endif
+
+#define pmd_pgtable pmd_pgtable
+static inline pgtable_t pmd_pgtable(pmd_t pmd)
+{
+	return (pgtable_t)pmd_page_vaddr(pmd);
+}
+
+#ifdef CONFIG_PPC64
+int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size);
+bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
+			   unsigned long page_size);
+/*
+ * mm/memory_hotplug.c:mhp_supports_memmap_on_memory goes into details
+ * some of the restrictions. We don't check for PMD_SIZE because our
+ * vmemmap allocation code can fallback correctly. The pageblock
+ * alignment requirement is met using altmap->reserve blocks.
+ */
+#define arch_supports_memmap_on_memory arch_supports_memmap_on_memory
+static inline bool arch_supports_memmap_on_memory(unsigned long vmemmap_size)
+{
+	if (!radix_enabled())
+		return false;
+	/*
+	 * With 4K page size and 2M PMD_SIZE, we can align
+	 * things better with memory block size value
+	 * starting from 128MB. Hence align things with PMD_SIZE.
+	 */
+	if (IS_ENABLED(CONFIG_PPC_4K_PAGES))
+		return IS_ALIGNED(vmemmap_size, PMD_SIZE);
+	return true;
+}
+
+#endif /* CONFIG_PPC64 */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h
new file mode 100644
index 0000000000..59a2c7dbc7
--- /dev/null
+++ b/arch/powerpc/include/asm/pkeys.h
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * PowerPC Memory Protection Keys management
+ *
+ * Copyright 2017, Ram Pai, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_KEYS_H
+#define _ASM_POWERPC_KEYS_H
+
+#include <linux/jump_label.h>
+#include <asm/firmware.h>
+
+extern int num_pkey;
+extern u32 reserved_allocation_mask; /* bits set for reserved keys */
+
+#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \
+			    VM_PKEY_BIT3 | VM_PKEY_BIT4)
+
+/* Override any generic PKEY permission defines */
+#define PKEY_DISABLE_EXECUTE   0x4
+#define PKEY_ACCESS_MASK       (PKEY_DISABLE_ACCESS | \
+				PKEY_DISABLE_WRITE  | \
+				PKEY_DISABLE_EXECUTE)
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/book3s/64/pkeys.h>
+#else
+#error "Not supported"
+#endif
+
+
+static inline u64 pkey_to_vmflag_bits(u16 pkey)
+{
+	return (((u64)pkey << VM_PKEY_SHIFT) & ARCH_VM_PKEY_FLAGS);
+}
+
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+	if (!mmu_has_feature(MMU_FTR_PKEY))
+		return 0;
+	return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT;
+}
+
+static inline int arch_max_pkey(void)
+{
+	return num_pkey;
+}
+
+#define pkey_alloc_mask(pkey) (0x1 << pkey)
+
+#define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map)
+
+#define __mm_pkey_allocated(mm, pkey) {	\
+	mm_pkey_allocation_map(mm) |= pkey_alloc_mask(pkey); \
+}
+
+#define __mm_pkey_free(mm, pkey) {	\
+	mm_pkey_allocation_map(mm) &= ~pkey_alloc_mask(pkey);	\
+}
+
+#define __mm_pkey_is_allocated(mm, pkey)	\
+	(mm_pkey_allocation_map(mm) & pkey_alloc_mask(pkey))
+
+#define __mm_pkey_is_reserved(pkey) (reserved_allocation_mask & \
+				       pkey_alloc_mask(pkey))
+
+static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
+{
+	if (pkey < 0 || pkey >= arch_max_pkey())
+		return false;
+
+	/* Reserved keys are never allocated. */
+	if (__mm_pkey_is_reserved(pkey))
+		return false;
+
+	return __mm_pkey_is_allocated(mm, pkey);
+}
+
+/*
+ * Returns a positive, 5-bit key on success, or -1 on failure.
+ * Relies on the mmap_lock to protect against concurrency in mm_pkey_alloc() and
+ * mm_pkey_free().
+ */
+static inline int mm_pkey_alloc(struct mm_struct *mm)
+{
+	/*
+	 * Note: this is the one and only place we make sure that the pkey is
+	 * valid as far as the hardware is concerned. The rest of the kernel
+	 * trusts that only good, valid pkeys come out of here.
+	 */
+	u32 all_pkeys_mask = (u32)(~(0x0));
+	int ret;
+
+	if (!mmu_has_feature(MMU_FTR_PKEY))
+		return -1;
+	/*
+	 * Are we out of pkeys? We must handle this specially because ffz()
+	 * behavior is undefined if there are no zeros.
+	 */
+	if (mm_pkey_allocation_map(mm) == all_pkeys_mask)
+		return -1;
+
+	ret = ffz((u32)mm_pkey_allocation_map(mm));
+	__mm_pkey_allocated(mm, ret);
+
+	return ret;
+}
+
+static inline int mm_pkey_free(struct mm_struct *mm, int pkey)
+{
+	if (!mmu_has_feature(MMU_FTR_PKEY))
+		return -1;
+
+	if (!mm_pkey_is_allocated(mm, pkey))
+		return -EINVAL;
+
+	__mm_pkey_free(mm, pkey);
+
+	return 0;
+}
+
+/*
+ * Try to dedicate one of the protection keys to be used as an
+ * execute-only protection key.
+ */
+extern int execute_only_pkey(struct mm_struct *mm);
+extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma,
+					 int prot, int pkey);
+static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
+					      int prot, int pkey)
+{
+	if (!mmu_has_feature(MMU_FTR_PKEY))
+		return 0;
+
+	/*
+	 * Is this an mprotect_pkey() call? If so, never override the value that
+	 * came from the user.
+	 */
+	if (pkey != -1)
+		return pkey;
+
+	return __arch_override_mprotect_pkey(vma, prot, pkey);
+}
+
+extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+				       unsigned long init_val);
+static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+					    unsigned long init_val)
+{
+	if (!mmu_has_feature(MMU_FTR_PKEY))
+		return -EINVAL;
+
+	/*
+	 * userspace should not change pkey-0 permissions.
+	 * pkey-0 is associated with every page in the kernel.
+	 * If userspace denies any permission on pkey-0, the
+	 * kernel cannot operate.
+	 */
+	if (pkey == 0)
+		return init_val ? -EINVAL : 0;
+
+	return __arch_set_user_pkey_access(tsk, pkey, init_val);
+}
+
+static inline bool arch_pkeys_enabled(void)
+{
+	return mmu_has_feature(MMU_FTR_PKEY);
+}
+
+extern void pkey_mm_init(struct mm_struct *mm);
+#endif /*_ASM_POWERPC_KEYS_H */
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
new file mode 100644
index 0000000000..fe3d0ea005
--- /dev/null
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -0,0 +1,412 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_PLPAR_WRAPPERS_H
+#define _ASM_POWERPC_PLPAR_WRAPPERS_H
+
+#ifdef CONFIG_PPC_PSERIES
+
+#include <linux/string.h>
+#include <linux/irqflags.h>
+
+#include <asm/hvcall.h>
+#include <asm/paca.h>
+#include <asm/lppaca.h>
+#include <asm/page.h>
+
+static inline long poll_pending(void)
+{
+	return plpar_hcall_norets(H_POLL_PENDING);
+}
+
+static inline u8 get_cede_latency_hint(void)
+{
+	return get_lppaca()->cede_latency_hint;
+}
+
+static inline void set_cede_latency_hint(u8 latency_hint)
+{
+	get_lppaca()->cede_latency_hint = latency_hint;
+}
+
+static inline long cede_processor(void)
+{
+	/*
+	 * We cannot call tracepoints inside RCU idle regions which
+	 * means we must not trace H_CEDE.
+	 */
+	return plpar_hcall_norets_notrace(H_CEDE);
+}
+
+static inline long extended_cede_processor(unsigned long latency_hint)
+{
+	long rc;
+	u8 old_latency_hint = get_cede_latency_hint();
+
+	set_cede_latency_hint(latency_hint);
+
+	rc = cede_processor();
+
+	/* Ensure that H_CEDE returns with IRQs on */
+	if (WARN_ON(IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && !(mfmsr() & MSR_EE)))
+		__hard_irq_enable();
+
+	set_cede_latency_hint(old_latency_hint);
+
+	return rc;
+}
+
+static inline long vpa_call(unsigned long flags, unsigned long cpu,
+		unsigned long vpa)
+{
+	flags = flags << H_VPA_FUNC_SHIFT;
+
+	return plpar_hcall_norets(H_REGISTER_VPA, flags, cpu, vpa);
+}
+
+static inline long unregister_vpa(unsigned long cpu)
+{
+	return vpa_call(H_VPA_DEREG_VPA, cpu, 0);
+}
+
+static inline long register_vpa(unsigned long cpu, unsigned long vpa)
+{
+	return vpa_call(H_VPA_REG_VPA, cpu, vpa);
+}
+
+static inline long unregister_slb_shadow(unsigned long cpu)
+{
+	return vpa_call(H_VPA_DEREG_SLB, cpu, 0);
+}
+
+static inline long register_slb_shadow(unsigned long cpu, unsigned long vpa)
+{
+	return vpa_call(H_VPA_REG_SLB, cpu, vpa);
+}
+
+static inline long unregister_dtl(unsigned long cpu)
+{
+	return vpa_call(H_VPA_DEREG_DTL, cpu, 0);
+}
+
+static inline long register_dtl(unsigned long cpu, unsigned long vpa)
+{
+	return vpa_call(H_VPA_REG_DTL, cpu, vpa);
+}
+
+extern void vpa_init(int cpu);
+
+static inline long plpar_pte_enter(unsigned long flags,
+		unsigned long hpte_group, unsigned long hpte_v,
+		unsigned long hpte_r, unsigned long *slot)
+{
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_ENTER, retbuf, flags, hpte_group, hpte_v, hpte_r);
+
+	*slot = retbuf[0];
+
+	return rc;
+}
+
+static inline long plpar_pte_remove(unsigned long flags, unsigned long ptex,
+		unsigned long avpn, unsigned long *old_pteh_ret,
+		unsigned long *old_ptel_ret)
+{
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_REMOVE, retbuf, flags, ptex, avpn);
+
+	*old_pteh_ret = retbuf[0];
+	*old_ptel_ret = retbuf[1];
+
+	return rc;
+}
+
+/* plpar_pte_remove_raw can be called in real mode. It calls plpar_hcall_raw */
+static inline long plpar_pte_remove_raw(unsigned long flags, unsigned long ptex,
+		unsigned long avpn, unsigned long *old_pteh_ret,
+		unsigned long *old_ptel_ret)
+{
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall_raw(H_REMOVE, retbuf, flags, ptex, avpn);
+
+	*old_pteh_ret = retbuf[0];
+	*old_ptel_ret = retbuf[1];
+
+	return rc;
+}
+
+static inline long plpar_pte_read(unsigned long flags, unsigned long ptex,
+		unsigned long *old_pteh_ret, unsigned long *old_ptel_ret)
+{
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_READ, retbuf, flags, ptex);
+
+	*old_pteh_ret = retbuf[0];
+	*old_ptel_ret = retbuf[1];
+
+	return rc;
+}
+
+/* plpar_pte_read_raw can be called in real mode. It calls plpar_hcall_raw */
+static inline long plpar_pte_read_raw(unsigned long flags, unsigned long ptex,
+		unsigned long *old_pteh_ret, unsigned long *old_ptel_ret)
+{
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall_raw(H_READ, retbuf, flags, ptex);
+
+	*old_pteh_ret = retbuf[0];
+	*old_ptel_ret = retbuf[1];
+
+	return rc;
+}
+
+/*
+ * ptes must be 8*sizeof(unsigned long)
+ */
+static inline long plpar_pte_read_4(unsigned long flags, unsigned long ptex,
+				    unsigned long *ptes)
+
+{
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+	rc = plpar_hcall9(H_READ, retbuf, flags | H_READ_4, ptex);
+
+	memcpy(ptes, retbuf, 8*sizeof(unsigned long));
+
+	return rc;
+}
+
+/*
+ * plpar_pte_read_4_raw can be called in real mode.
+ * ptes must be 8*sizeof(unsigned long)
+ */
+static inline long plpar_pte_read_4_raw(unsigned long flags, unsigned long ptex,
+					unsigned long *ptes)
+
+{
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+	rc = plpar_hcall9_raw(H_READ, retbuf, flags | H_READ_4, ptex);
+
+	memcpy(ptes, retbuf, 8*sizeof(unsigned long));
+
+	return rc;
+}
+
+static inline long plpar_pte_protect(unsigned long flags, unsigned long ptex,
+		unsigned long avpn)
+{
+	return plpar_hcall_norets(H_PROTECT, flags, ptex, avpn);
+}
+
+static inline long plpar_resize_hpt_prepare(unsigned long flags,
+					    unsigned long shift)
+{
+	return plpar_hcall_norets(H_RESIZE_HPT_PREPARE, flags, shift);
+}
+
+static inline long plpar_resize_hpt_commit(unsigned long flags,
+					   unsigned long shift)
+{
+	return plpar_hcall_norets(H_RESIZE_HPT_COMMIT, flags, shift);
+}
+
+static inline long plpar_tce_get(unsigned long liobn, unsigned long ioba,
+		unsigned long *tce_ret)
+{
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_GET_TCE, retbuf, liobn, ioba);
+
+	*tce_ret = retbuf[0];
+
+	return rc;
+}
+
+static inline long plpar_tce_put(unsigned long liobn, unsigned long ioba,
+		unsigned long tceval)
+{
+	return plpar_hcall_norets(H_PUT_TCE, liobn, ioba, tceval);
+}
+
+static inline long plpar_tce_put_indirect(unsigned long liobn,
+		unsigned long ioba, unsigned long page, unsigned long count)
+{
+	return plpar_hcall_norets(H_PUT_TCE_INDIRECT, liobn, ioba, page, count);
+}
+
+static inline long plpar_tce_stuff(unsigned long liobn, unsigned long ioba,
+		unsigned long tceval, unsigned long count)
+{
+	return plpar_hcall_norets(H_STUFF_TCE, liobn, ioba, tceval, count);
+}
+
+/* Set various resource mode parameters */
+static inline long plpar_set_mode(unsigned long mflags, unsigned long resource,
+		unsigned long value1, unsigned long value2)
+{
+	return plpar_hcall_norets(H_SET_MODE, mflags, resource, value1, value2);
+}
+
+/*
+ * Enable relocation on exceptions on this partition
+ *
+ * Note: this call has a partition wide scope and can take a while to complete.
+ * If it returns H_LONG_BUSY_* it should be retried periodically until it
+ * returns H_SUCCESS.
+ */
+static inline long enable_reloc_on_exceptions(void)
+{
+	/* mflags = 3: Exceptions at 0xC000000000004000 */
+	return plpar_set_mode(3, H_SET_MODE_RESOURCE_ADDR_TRANS_MODE, 0, 0);
+}
+
+/*
+ * Disable relocation on exceptions on this partition
+ *
+ * Note: this call has a partition wide scope and can take a while to complete.
+ * If it returns H_LONG_BUSY_* it should be retried periodically until it
+ * returns H_SUCCESS.
+ */
+static inline long disable_reloc_on_exceptions(void) {
+	return plpar_set_mode(0, H_SET_MODE_RESOURCE_ADDR_TRANS_MODE, 0, 0);
+}
+
+/*
+ * Take exceptions in big endian mode on this partition
+ *
+ * Note: this call has a partition wide scope and can take a while to complete.
+ * If it returns H_LONG_BUSY_* it should be retried periodically until it
+ * returns H_SUCCESS.
+ */
+static inline long enable_big_endian_exceptions(void)
+{
+	/* mflags = 0: big endian exceptions */
+	return plpar_set_mode(0, H_SET_MODE_RESOURCE_LE, 0, 0);
+}
+
+/*
+ * Take exceptions in little endian mode on this partition
+ *
+ * Note: this call has a partition wide scope and can take a while to complete.
+ * If it returns H_LONG_BUSY_* it should be retried periodically until it
+ * returns H_SUCCESS.
+ */
+static inline long enable_little_endian_exceptions(void)
+{
+	/* mflags = 1: little endian exceptions */
+	return plpar_set_mode(1, H_SET_MODE_RESOURCE_LE, 0, 0);
+}
+
+static inline long plpar_set_ciabr(unsigned long ciabr)
+{
+	return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_CIABR, ciabr, 0);
+}
+
+static inline long plpar_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0)
+{
+	return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR0, dawr0, dawrx0);
+}
+
+static inline long plpar_set_watchpoint1(unsigned long dawr1, unsigned long dawrx1)
+{
+	return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR1, dawr1, dawrx1);
+}
+
+static inline long plpar_signal_sys_reset(long cpu)
+{
+	return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
+}
+
+static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
+	if (rc == H_SUCCESS) {
+		p->character = retbuf[0];
+		p->behaviour = retbuf[1];
+	}
+
+	return rc;
+}
+
+/*
+ * Wrapper to H_RPT_INVALIDATE hcall that handles return values appropriately
+ *
+ * - Returns H_SUCCESS on success
+ * - For H_BUSY return value, we retry the hcall.
+ * - For any other hcall failures, attempt a full flush once before
+ *   resorting to BUG().
+ *
+ * Note: This hcall is expected to fail only very rarely. The correct
+ * error recovery of killing the process/guest will be eventually
+ * needed.
+ */
+static inline long pseries_rpt_invalidate(u32 pid, u64 target, u64 type,
+					  u64 page_sizes, u64 start, u64 end)
+{
+	long rc;
+	unsigned long all;
+
+	while (true) {
+		rc = plpar_hcall_norets(H_RPT_INVALIDATE, pid, target, type,
+					page_sizes, start, end);
+		if (rc == H_BUSY) {
+			cpu_relax();
+			continue;
+		} else if (rc == H_SUCCESS)
+			return rc;
+
+		/* Flush request failed, try with a full flush once */
+		if (type & H_RPTI_TYPE_NESTED)
+			all = H_RPTI_TYPE_NESTED | H_RPTI_TYPE_NESTED_ALL;
+		else
+			all = H_RPTI_TYPE_ALL;
+retry:
+		rc = plpar_hcall_norets(H_RPT_INVALIDATE, pid, target,
+					all, page_sizes, 0, -1UL);
+		if (rc == H_BUSY) {
+			cpu_relax();
+			goto retry;
+		} else if (rc == H_SUCCESS)
+			return rc;
+
+		BUG();
+	}
+}
+
+#else /* !CONFIG_PPC_PSERIES */
+
+static inline long plpar_set_ciabr(unsigned long ciabr)
+{
+	return 0;
+}
+
+static inline long plpar_pte_read_4(unsigned long flags, unsigned long ptex,
+				    unsigned long *ptes)
+{
+	return 0;
+}
+
+static inline long pseries_rpt_invalidate(u32 pid, u64 target, u64 type,
+					  u64 page_sizes, u64 start, u64 end)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PPC_PSERIES */
+
+#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/include/asm/plpks.h b/arch/powerpc/include/asm/plpks.h
new file mode 100644
index 0000000000..23b77027c9
--- /dev/null
+++ b/arch/powerpc/include/asm/plpks.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 IBM Corporation
+ * Author: Nayna Jain <nayna@linux.ibm.com>
+ *
+ * Platform keystore for pseries LPAR(PLPKS).
+ */
+
+#ifndef _ASM_POWERPC_PLPKS_H
+#define _ASM_POWERPC_PLPKS_H
+
+#ifdef CONFIG_PSERIES_PLPKS
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+// Object policy flags from supported_policies
+#define PLPKS_OSSECBOOTAUDIT	PPC_BIT32(1) // OS secure boot must be audit/enforce
+#define PLPKS_OSSECBOOTENFORCE	PPC_BIT32(2) // OS secure boot must be enforce
+#define PLPKS_PWSET		PPC_BIT32(3) // No access without password set
+#define PLPKS_WORLDREADABLE	PPC_BIT32(4) // Readable without authentication
+#define PLPKS_IMMUTABLE		PPC_BIT32(5) // Once written, object cannot be removed
+#define PLPKS_TRANSIENT		PPC_BIT32(6) // Object does not persist through reboot
+#define PLPKS_SIGNEDUPDATE	PPC_BIT32(7) // Object can only be modified by signed updates
+#define PLPKS_HVPROVISIONED	PPC_BIT32(28) // Hypervisor has provisioned this object
+
+// Signature algorithm flags from signed_update_algorithms
+#define PLPKS_ALG_RSA2048	PPC_BIT(0)
+#define PLPKS_ALG_RSA4096	PPC_BIT(1)
+
+// Object label OS metadata flags
+#define PLPKS_VAR_LINUX		0x02
+#define PLPKS_VAR_COMMON	0x04
+
+// Flags for which consumer owns an object is owned by
+#define PLPKS_FW_OWNER			0x1
+#define PLPKS_BOOTLOADER_OWNER		0x2
+#define PLPKS_OS_OWNER			0x3
+
+// Flags for label metadata fields
+#define PLPKS_LABEL_VERSION		0
+#define PLPKS_MAX_LABEL_ATTR_SIZE	16
+#define PLPKS_MAX_NAME_SIZE		239
+#define PLPKS_MAX_DATA_SIZE		4000
+
+// Timeouts for PLPKS operations
+#define PLPKS_MAX_TIMEOUT		5000 // msec
+#define PLPKS_FLUSH_SLEEP		10 // msec
+#define PLPKS_FLUSH_SLEEP_RANGE		400
+
+struct plpks_var {
+	char *component;
+	u8 *name;
+	u8 *data;
+	u32 policy;
+	u16 namelen;
+	u16 datalen;
+	u8 os;
+};
+
+struct plpks_var_name {
+	u8  *name;
+	u16 namelen;
+};
+
+struct plpks_var_name_list {
+	u32 varcount;
+	struct plpks_var_name varlist[];
+};
+
+/**
+ * Updates the authenticated variable. It expects NULL as the component.
+ */
+int plpks_signed_update_var(struct plpks_var *var, u64 flags);
+
+/**
+ * Writes the specified var and its data to PKS.
+ * Any caller of PKS driver should present a valid component type for
+ * their variable.
+ */
+int plpks_write_var(struct plpks_var var);
+
+/**
+ * Removes the specified var and its data from PKS.
+ */
+int plpks_remove_var(char *component, u8 varos,
+		     struct plpks_var_name vname);
+
+/**
+ * Returns the data for the specified os variable.
+ *
+ * Caller must allocate a buffer in var->data with length in var->datalen.
+ * If no buffer is provided, var->datalen will be populated with the object's
+ * size.
+ */
+int plpks_read_os_var(struct plpks_var *var);
+
+/**
+ * Returns the data for the specified firmware variable.
+ *
+ * Caller must allocate a buffer in var->data with length in var->datalen.
+ * If no buffer is provided, var->datalen will be populated with the object's
+ * size.
+ */
+int plpks_read_fw_var(struct plpks_var *var);
+
+/**
+ * Returns the data for the specified bootloader variable.
+ *
+ * Caller must allocate a buffer in var->data with length in var->datalen.
+ * If no buffer is provided, var->datalen will be populated with the object's
+ * size.
+ */
+int plpks_read_bootloader_var(struct plpks_var *var);
+
+/**
+ * Returns if PKS is available on this LPAR.
+ */
+bool plpks_is_available(void);
+
+/**
+ * Returns version of the Platform KeyStore.
+ */
+u8 plpks_get_version(void);
+
+/**
+ * Returns hypervisor storage overhead per object, not including the size of
+ * the object or label. Only valid for config version >= 2
+ */
+u16 plpks_get_objoverhead(void);
+
+/**
+ * Returns maximum password size. Must be >= 32 bytes
+ */
+u16 plpks_get_maxpwsize(void);
+
+/**
+ * Returns maximum object size supported by Platform KeyStore.
+ */
+u16 plpks_get_maxobjectsize(void);
+
+/**
+ * Returns maximum object label size supported by Platform KeyStore.
+ */
+u16 plpks_get_maxobjectlabelsize(void);
+
+/**
+ * Returns total size of the configured Platform KeyStore.
+ */
+u32 plpks_get_totalsize(void);
+
+/**
+ * Returns used space from the total size of the Platform KeyStore.
+ */
+u32 plpks_get_usedspace(void);
+
+/**
+ * Returns bitmask of policies supported by the hypervisor.
+ */
+u32 plpks_get_supportedpolicies(void);
+
+/**
+ * Returns maximum byte size of a single object supported by the hypervisor.
+ * Only valid for config version >= 3
+ */
+u32 plpks_get_maxlargeobjectsize(void);
+
+/**
+ * Returns bitmask of signature algorithms supported for signed updates.
+ * Only valid for config version >= 3
+ */
+u64 plpks_get_signedupdatealgorithms(void);
+
+/**
+ * Returns the length of the PLPKS password in bytes.
+ */
+u16 plpks_get_passwordlen(void);
+
+/**
+ * Called in early init to retrieve and clear the PLPKS password from the DT.
+ */
+void plpks_early_init_devtree(void);
+
+/**
+ * Populates the FDT with the PLPKS password to prepare for kexec.
+ */
+int plpks_populate_fdt(void *fdt);
+#else // CONFIG_PSERIES_PLPKS
+static inline bool plpks_is_available(void) { return false; }
+static inline u16 plpks_get_passwordlen(void) { BUILD_BUG(); }
+static inline void plpks_early_init_devtree(void) { }
+static inline int plpks_populate_fdt(void *fdt) { BUILD_BUG(); }
+#endif // CONFIG_PSERIES_PLPKS
+
+#endif // _ASM_POWERPC_PLPKS_H
diff --git a/arch/powerpc/include/asm/pmac_feature.h b/arch/powerpc/include/asm/pmac_feature.h
new file mode 100644
index 0000000000..2495866f2e
--- /dev/null
+++ b/arch/powerpc/include/asm/pmac_feature.h
@@ -0,0 +1,417 @@
+/*
+ * Definition of platform feature hooks for PowerMacs
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Paul Mackerras &
+ *                    Ben. Herrenschmidt.
+ *
+ *
+ * Note: I removed media-bay details from the feature stuff, I believe it's
+ *       not worth it, the media-bay driver can directly use the mac-io
+ *       ASIC registers.
+ *
+ * Implementation note: Currently, none of these functions will block.
+ * However, they may internally protect themselves with a spinlock
+ * for way too long. Be prepared for at least some of these to block
+ * in the future.
+ *
+ * Unless specifically defined, the result code is assumed to be an
+ * error when negative, 0 is the default success result. Some functions
+ * may return additional positive result values.
+ *
+ * To keep implementation simple, all feature calls are assumed to have
+ * the prototype parameters (struct device_node* node, int value).
+ * When either is not used, pass 0.
+ */
+
+#ifdef __KERNEL__
+#ifndef __ASM_POWERPC_PMAC_FEATURE_H
+#define __ASM_POWERPC_PMAC_FEATURE_H
+
+#include <asm/macio.h>
+#include <asm/machdep.h>
+
+/*
+ * Known Mac motherboard models
+ *
+ * Please, report any error here to benh@kernel.crashing.org, thanks !
+ *
+ * Note that I don't fully maintain this list for Core99 & MacRISC2
+ * and I'm considering removing all NewWorld entries from it and
+ * entirely rely on the model string.
+ */
+
+/* PowerSurge are the first generation of PCI Pmacs. This include
+ * all of the Grand-Central based machines. We currently don't
+ * differentiate most of them.
+ */
+#define PMAC_TYPE_PSURGE		0x10	/* PowerSurge */
+#define PMAC_TYPE_ANS			0x11	/* Apple Network Server */
+
+/* Here is the infamous serie of OHare based machines
+ */
+#define PMAC_TYPE_COMET			0x20	/* Believed to be PowerBook 2400 */
+#define PMAC_TYPE_HOOPER		0x21	/* Believed to be PowerBook 3400 */
+#define PMAC_TYPE_KANGA			0x22	/* PowerBook 3500 (first G3) */
+#define PMAC_TYPE_ALCHEMY		0x23	/* Alchemy motherboard base */
+#define PMAC_TYPE_GAZELLE		0x24	/* Spartacus, some 5xxx/6xxx */
+#define PMAC_TYPE_UNKNOWN_OHARE		0x2f	/* Unknown, but OHare based */
+
+/* Here are the Heathrow based machines
+ * FIXME: Differenciate wallstreet,mainstreet,wallstreetII
+ */
+#define PMAC_TYPE_GOSSAMER		0x30	/* Gossamer motherboard */
+#define PMAC_TYPE_SILK			0x31	/* Desktop PowerMac G3 */
+#define PMAC_TYPE_WALLSTREET		0x32	/* Wallstreet/Mainstreet PowerBook*/
+#define PMAC_TYPE_UNKNOWN_HEATHROW	0x3f	/* Unknown but heathrow based */
+
+/* Here are newworld machines based on Paddington (heathrow derivative)
+ */
+#define PMAC_TYPE_101_PBOOK		0x40	/* 101 PowerBook (aka Lombard) */
+#define PMAC_TYPE_ORIG_IMAC		0x41	/* First generation iMac */
+#define PMAC_TYPE_YOSEMITE		0x42	/* B&W G3 */
+#define PMAC_TYPE_YIKES			0x43	/* Yikes G4 (PCI graphics) */
+#define PMAC_TYPE_UNKNOWN_PADDINGTON	0x4f	/* Unknown but paddington based */
+
+/* Core99 machines based on UniNorth 1.0 and 1.5
+ *
+ * Note: A single entry here may cover several actual models according
+ * to the device-tree. (Sawtooth is most tower G4s, FW_IMAC is most
+ * FireWire based iMacs, etc...). Those machines are too similar to be
+ * distinguished here, when they need to be differencied, use the
+ * device-tree "model" or "compatible" property.
+ */
+#define PMAC_TYPE_ORIG_IBOOK		0x40	/* First iBook model (no firewire) */
+#define PMAC_TYPE_SAWTOOTH		0x41	/* Desktop G4s */
+#define PMAC_TYPE_FW_IMAC		0x42	/* FireWire iMacs (except Pangea based) */
+#define PMAC_TYPE_FW_IBOOK		0x43	/* FireWire iBooks (except iBook2) */
+#define PMAC_TYPE_CUBE			0x44	/* Cube PowerMac */
+#define PMAC_TYPE_QUICKSILVER		0x45	/* QuickSilver G4s */
+#define PMAC_TYPE_PISMO			0x46	/* Pismo PowerBook */
+#define PMAC_TYPE_TITANIUM		0x47	/* Titanium PowerBook */
+#define PMAC_TYPE_TITANIUM2		0x48	/* Titanium II PowerBook (no L3, M6) */
+#define PMAC_TYPE_TITANIUM3		0x49	/* Titanium III PowerBook (with L3 & M7) */
+#define PMAC_TYPE_TITANIUM4		0x50	/* Titanium IV PowerBook (with L3 & M9) */
+#define PMAC_TYPE_EMAC			0x50	/* eMac */
+#define PMAC_TYPE_UNKNOWN_CORE99	0x5f
+
+/* MacRisc2 with UniNorth 2.0 */
+#define PMAC_TYPE_RACKMAC		0x80	/* XServe */
+#define PMAC_TYPE_WINDTUNNEL		0x81
+
+/* MacRISC2 machines based on the Pangea chipset
+ */
+#define PMAC_TYPE_PANGEA_IMAC		0x100	/* Flower Power iMac */
+#define PMAC_TYPE_IBOOK2		0x101	/* iBook2 (polycarbonate) */
+#define PMAC_TYPE_FLAT_PANEL_IMAC	0x102	/* Flat panel iMac */
+#define PMAC_TYPE_UNKNOWN_PANGEA	0x10f
+
+/* MacRISC2 machines based on the Intrepid chipset
+ */
+#define PMAC_TYPE_UNKNOWN_INTREPID	0x11f	/* Generic */
+
+/* MacRISC4 / G5 machines. We don't have per-machine selection here anymore,
+ * but rather machine families
+ */
+#define PMAC_TYPE_POWERMAC_G5		0x150	/* U3 & U3H based */
+#define PMAC_TYPE_POWERMAC_G5_U3L	0x151	/* U3L based desktop */
+#define PMAC_TYPE_IMAC_G5		0x152	/* iMac G5 */
+#define PMAC_TYPE_XSERVE_G5		0x153	/* Xserve G5 */
+#define PMAC_TYPE_UNKNOWN_K2		0x19f	/* Any other K2 based */
+#define PMAC_TYPE_UNKNOWN_SHASTA       	0x19e	/* Any other Shasta based */
+
+/*
+ * Motherboard flags
+ */
+
+#define PMAC_MB_CAN_SLEEP		0x00000001
+#define PMAC_MB_HAS_FW_POWER		0x00000002
+#define PMAC_MB_OLD_CORE99		0x00000004
+#define PMAC_MB_MOBILE			0x00000008
+#define PMAC_MB_MAY_SLEEP		0x00000010
+
+/*
+ * Feature calls supported on pmac
+ *
+ */
+
+/*
+ * Use this inline wrapper
+ */
+struct device_node;
+
+static inline long pmac_call_feature(int selector, struct device_node* node,
+					long param, long value)
+{
+	if (!ppc_md.feature_call || !machine_is(powermac))
+		return -ENODEV;
+	return ppc_md.feature_call(selector, node, param, value);
+}
+
+/* PMAC_FTR_SERIAL_ENABLE	(struct device_node* node, int param, int value)
+ * enable/disable an SCC side. Pass the node corresponding to the
+ * channel side as a parameter.
+ * param is the type of port
+ * if param is ored with PMAC_SCC_FLAG_XMON, then the SCC is locked enabled
+ * for use by xmon.
+ */
+#define PMAC_FTR_SCC_ENABLE		PMAC_FTR_DEF(0)
+	#define PMAC_SCC_ASYNC		0
+	#define PMAC_SCC_IRDA		1
+	#define PMAC_SCC_I2S1		2
+	#define PMAC_SCC_FLAG_XMON	0x00001000
+
+/* PMAC_FTR_MODEM_ENABLE	(struct device_node* node, 0, int value)
+ * enable/disable the internal modem.
+ */
+#define PMAC_FTR_MODEM_ENABLE		PMAC_FTR_DEF(1)
+
+/* PMAC_FTR_SWIM3_ENABLE	(struct device_node* node, 0,int value)
+ * enable/disable the swim3 (floppy) cell of a mac-io ASIC
+ */
+#define PMAC_FTR_SWIM3_ENABLE		PMAC_FTR_DEF(2)
+
+/* PMAC_FTR_MESH_ENABLE		(struct device_node* node, 0, int value)
+ * enable/disable the mesh (scsi) cell of a mac-io ASIC
+ */
+#define PMAC_FTR_MESH_ENABLE		PMAC_FTR_DEF(3)
+
+/* PMAC_FTR_IDE_ENABLE		(struct device_node* node, int busID, int value)
+ * enable/disable an IDE port of a mac-io ASIC
+ * pass the busID parameter
+ */
+#define PMAC_FTR_IDE_ENABLE		PMAC_FTR_DEF(4)
+
+/* PMAC_FTR_IDE_RESET		(struct device_node* node, int busID, int value)
+ * assert(1)/release(0) an IDE reset line (mac-io IDE only)
+ */
+#define PMAC_FTR_IDE_RESET		PMAC_FTR_DEF(5)
+
+/* PMAC_FTR_BMAC_ENABLE		(struct device_node* node, 0, int value)
+ * enable/disable the bmac (ethernet) cell of a mac-io ASIC, also drive
+ * it's reset line
+ */
+#define PMAC_FTR_BMAC_ENABLE		PMAC_FTR_DEF(6)
+
+/* PMAC_FTR_GMAC_ENABLE		(struct device_node* node, 0, int value)
+ * enable/disable the gmac (ethernet) cell of an uninorth ASIC. This
+ * control the cell's clock.
+ */
+#define PMAC_FTR_GMAC_ENABLE		PMAC_FTR_DEF(7)
+
+/* PMAC_FTR_GMAC_PHY_RESET	(struct device_node* node, 0, 0)
+ * Perform a HW reset of the PHY connected to a gmac controller.
+ * Pass the gmac device node, not the PHY node.
+ */
+#define PMAC_FTR_GMAC_PHY_RESET		PMAC_FTR_DEF(8)
+
+/* PMAC_FTR_SOUND_CHIP_ENABLE	(struct device_node* node, 0, int value)
+ * enable/disable the sound chip, whatever it is and provided it can
+ * actually be controlled
+ */
+#define PMAC_FTR_SOUND_CHIP_ENABLE	PMAC_FTR_DEF(9)
+
+/* -- add various tweaks related to sound routing -- */
+
+/* PMAC_FTR_AIRPORT_ENABLE	(struct device_node* node, 0, int value)
+ * enable/disable the airport card
+ */
+#define PMAC_FTR_AIRPORT_ENABLE		PMAC_FTR_DEF(10)
+
+/* PMAC_FTR_RESET_CPU		(NULL, int cpu_nr, 0)
+ * toggle the reset line of a CPU on an uninorth-based SMP machine
+ */
+#define PMAC_FTR_RESET_CPU		PMAC_FTR_DEF(11)
+
+/* PMAC_FTR_USB_ENABLE		(struct device_node* node, 0, int value)
+ * enable/disable an USB cell, along with the power of the USB "pad"
+ * on keylargo based machines
+ */
+#define PMAC_FTR_USB_ENABLE		PMAC_FTR_DEF(12)
+
+/* PMAC_FTR_1394_ENABLE		(struct device_node* node, 0, int value)
+ * enable/disable the firewire cell of an uninorth ASIC.
+ */
+#define PMAC_FTR_1394_ENABLE		PMAC_FTR_DEF(13)
+
+/* PMAC_FTR_1394_CABLE_POWER	(struct device_node* node, 0, int value)
+ * enable/disable the firewire cable power supply of the uninorth
+ * firewire cell
+ */
+#define PMAC_FTR_1394_CABLE_POWER	PMAC_FTR_DEF(14)
+
+/* PMAC_FTR_SLEEP_STATE		(struct device_node* node, 0, int value)
+ * set the sleep state of the motherboard.
+ *
+ * Pass -1 as value to query for sleep capability
+ * Pass 1 to set IOs to sleep
+ * Pass 0 to set IOs to wake
+ */
+#define PMAC_FTR_SLEEP_STATE		PMAC_FTR_DEF(15)
+
+/* PMAC_FTR_GET_MB_INFO		(NULL, selector, 0)
+ *
+ * returns some motherboard infos.
+ * selector: 0  - model id
+ *           1  - model flags (capabilities)
+ *           2  - model name (cast to const char *)
+ */
+#define PMAC_FTR_GET_MB_INFO		PMAC_FTR_DEF(16)
+#define   PMAC_MB_INFO_MODEL	0
+#define   PMAC_MB_INFO_FLAGS	1
+#define   PMAC_MB_INFO_NAME	2
+
+/* PMAC_FTR_READ_GPIO		(NULL, int index, 0)
+ *
+ * read a GPIO from a mac-io controller of type KeyLargo or Pangea.
+ * the value returned is a byte (positive), or a negative error code
+ */
+#define PMAC_FTR_READ_GPIO		PMAC_FTR_DEF(17)
+
+/* PMAC_FTR_WRITE_GPIO		(NULL, int index, int value)
+ *
+ * write a GPIO of a mac-io controller of type KeyLargo or Pangea.
+ */
+#define PMAC_FTR_WRITE_GPIO		PMAC_FTR_DEF(18)
+
+/* PMAC_FTR_ENABLE_MPIC
+ *
+ * Enable the MPIC cell
+ */
+#define PMAC_FTR_ENABLE_MPIC		PMAC_FTR_DEF(19)
+
+/* PMAC_FTR_AACK_DELAY_ENABLE	(NULL, int enable, 0)
+ *
+ * Enable/disable the AACK delay on the northbridge for systems using DFS
+ */
+#define PMAC_FTR_AACK_DELAY_ENABLE     	PMAC_FTR_DEF(20)
+
+/* PMAC_FTR_DEVICE_CAN_WAKE
+ *
+ * Used by video drivers to inform system that they can actually perform
+ * wakeup from sleep
+ */
+#define PMAC_FTR_DEVICE_CAN_WAKE	PMAC_FTR_DEF(22)
+
+
+/* Don't use those directly, they are for the sake of pmac_setup.c */
+extern long pmac_do_feature_call(unsigned int selector, ...);
+extern void pmac_feature_init(void);
+
+/* Video suspend tweak */
+extern void pmac_set_early_video_resume(void (*proc)(void *data), void *data);
+extern void pmac_call_early_video_resume(void);
+
+#define PMAC_FTR_DEF(x) ((0x6660000) | (x))
+
+/* The AGP driver registers itself here */
+extern void pmac_register_agp_pm(struct pci_dev *bridge,
+				 int (*suspend)(struct pci_dev *bridge),
+				 int (*resume)(struct pci_dev *bridge));
+
+/* Those are meant to be used by video drivers to deal with AGP
+ * suspend resume properly
+ */
+extern void pmac_suspend_agp_for_card(struct pci_dev *dev);
+extern void pmac_resume_agp_for_card(struct pci_dev *dev);
+
+/*
+ * The part below is for use by macio_asic.c only, do not rely
+ * on the data structures or constants below in a normal driver
+ *
+ */
+
+#define MAX_MACIO_CHIPS		2
+
+enum {
+	macio_unknown = 0,
+	macio_grand_central,
+	macio_ohare,
+	macio_ohareII,
+	macio_heathrow,
+	macio_gatwick,
+	macio_paddington,
+	macio_keylargo,
+	macio_pangea,
+	macio_intrepid,
+	macio_keylargo2,
+	macio_shasta,
+};
+
+struct macio_chip
+{
+	struct device_node	*of_node;
+	int			type;
+	const char		*name;
+	int			rev;
+	volatile u32		__iomem *base;
+	unsigned long		flags;
+
+	/* For use by macio_asic PCI driver */
+	struct macio_bus	lbus;
+};
+
+extern struct macio_chip macio_chips[MAX_MACIO_CHIPS];
+
+#define MACIO_FLAG_SCCA_ON	0x00000001
+#define MACIO_FLAG_SCCB_ON	0x00000002
+#define MACIO_FLAG_SCC_LOCKED	0x00000004
+#define MACIO_FLAG_AIRPORT_ON	0x00000010
+#define MACIO_FLAG_FW_SUPPORTED	0x00000020
+
+extern struct macio_chip* macio_find(struct device_node* child, int type);
+
+#define MACIO_FCR32(macio, r)	((macio)->base + ((r) >> 2))
+#define MACIO_FCR8(macio, r)	(((volatile u8 __iomem *)((macio)->base)) + (r))
+
+#define MACIO_IN32(r)		(in_le32(MACIO_FCR32(macio,r)))
+#define MACIO_OUT32(r,v)	(out_le32(MACIO_FCR32(macio,r), (v)))
+#define MACIO_BIS(r,v)		(MACIO_OUT32((r), MACIO_IN32(r) | (v)))
+#define MACIO_BIC(r,v)		(MACIO_OUT32((r), MACIO_IN32(r) & ~(v)))
+#define MACIO_IN8(r)		(in_8(MACIO_FCR8(macio,r)))
+#define MACIO_OUT8(r,v)		(out_8(MACIO_FCR8(macio,r), (v)))
+
+/*
+ * Those are exported by pmac feature for internal use by arch code
+ * only like the platform function callbacks, do not use directly in drivers
+ */
+extern raw_spinlock_t feature_lock;
+extern struct device_node *uninorth_node;
+extern u32 __iomem *uninorth_base;
+
+/*
+ * Uninorth reg. access. Note that Uni-N regs are big endian
+ */
+
+#define UN_REG(r)	(uninorth_base + ((r) >> 2))
+#define UN_IN(r)	(in_be32(UN_REG(r)))
+#define UN_OUT(r,v)	(out_be32(UN_REG(r), (v)))
+#define UN_BIS(r,v)	(UN_OUT((r), UN_IN(r) | (v)))
+#define UN_BIC(r,v)	(UN_OUT((r), UN_IN(r) & ~(v)))
+
+/* Uninorth variant:
+ *
+ * 0 = not uninorth
+ * 1 = U1.x or U2.x
+ * 3 = U3
+ * 4 = U4
+ */
+extern int pmac_get_uninorth_variant(void);
+
+/*
+ * Power macintoshes have either a CUDA, PMU or SMU controlling
+ * system reset, power, NVRAM, RTC.
+ */
+typedef enum sys_ctrler_kind {
+	SYS_CTRLER_UNKNOWN = 0,
+	SYS_CTRLER_CUDA = 1,
+	SYS_CTRLER_PMU = 2,
+	SYS_CTRLER_SMU = 3,
+} sys_ctrler_t;
+extern sys_ctrler_t sys_ctrler;
+
+#endif /* __ASM_POWERPC_PMAC_FEATURE_H */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/pmac_low_i2c.h b/arch/powerpc/include/asm/pmac_low_i2c.h
new file mode 100644
index 0000000000..21bd7297c8
--- /dev/null
+++ b/arch/powerpc/include/asm/pmac_low_i2c.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* 
+ *  include/asm-ppc/pmac_low_i2c.h
+ *
+ *  Copyright (C) 2003 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+#ifndef __PMAC_LOW_I2C_H__
+#define __PMAC_LOW_I2C_H__
+#ifdef __KERNEL__
+
+/* i2c mode (based on the platform functions format) */
+enum {
+	pmac_i2c_mode_dumb	= 1,
+	pmac_i2c_mode_std	= 2,
+	pmac_i2c_mode_stdsub	= 3,
+	pmac_i2c_mode_combined	= 4,
+};
+
+/* RW bit in address */
+enum {
+	pmac_i2c_read		= 0x01,
+	pmac_i2c_write		= 0x00
+};
+
+/* i2c bus type */
+enum {
+	pmac_i2c_bus_keywest	= 0,
+	pmac_i2c_bus_pmu	= 1,
+	pmac_i2c_bus_smu	= 2,
+};
+
+/* i2c bus features */
+enum {
+	/* can_largesub : supports >1 byte subaddresses (SMU only) */
+	pmac_i2c_can_largesub	= 0x00000001u,
+
+	/* multibus : device node holds multiple busses, bus number is
+	 * encoded in bits 0xff00 of "reg" of a given device
+	 */
+	pmac_i2c_multibus	= 0x00000002u,
+};
+
+/* i2c busses in the system */
+struct pmac_i2c_bus;
+struct i2c_adapter;
+
+/* Init, called early during boot */
+extern int pmac_i2c_init(void);
+
+/* Lookup an i2c bus for a device-node. The node can be either the bus
+ * node itself or a device below it. In the case of a multibus, the bus
+ * node itself is the controller node, else, it's a child of the controller
+ * node
+ */
+extern struct pmac_i2c_bus *pmac_i2c_find_bus(struct device_node *node);
+
+/* Get the address for an i2c device. This strips the bus number if
+ * necessary. The 7 bits address is returned 1 bit right shifted so that the
+ * direction can be directly ored in
+ */
+extern u8 pmac_i2c_get_dev_addr(struct device_node *device);
+
+/* Get infos about a bus */
+extern struct device_node *pmac_i2c_get_controller(struct pmac_i2c_bus *bus);
+extern struct device_node *pmac_i2c_get_bus_node(struct pmac_i2c_bus *bus);
+extern int pmac_i2c_get_type(struct pmac_i2c_bus *bus);
+extern int pmac_i2c_get_flags(struct pmac_i2c_bus *bus);
+extern int pmac_i2c_get_channel(struct pmac_i2c_bus *bus);
+
+/* i2c layer adapter helpers */
+extern struct i2c_adapter *pmac_i2c_get_adapter(struct pmac_i2c_bus *bus);
+extern struct pmac_i2c_bus *pmac_i2c_adapter_to_bus(struct i2c_adapter *adapter);
+
+/* March a device or bus with an i2c adapter structure, to be used by drivers
+ * to match device-tree nodes with i2c adapters during adapter discovery
+ * callbacks
+ */
+extern int pmac_i2c_match_adapter(struct device_node *dev,
+				  struct i2c_adapter *adapter);
+
+
+/* (legacy) Locking functions exposed to i2c-keywest */
+extern int pmac_low_i2c_lock(struct device_node *np);
+extern int pmac_low_i2c_unlock(struct device_node *np);
+
+/* Access functions for platform code */
+extern int pmac_i2c_open(struct pmac_i2c_bus *bus, int polled);
+extern void pmac_i2c_close(struct pmac_i2c_bus *bus);
+extern int pmac_i2c_setmode(struct pmac_i2c_bus *bus, int mode);
+extern int pmac_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
+			 u32 subaddr, u8 *data,  int len);
+
+/* Suspend/resume code called by via-pmu directly for now */
+extern void pmac_pfunc_i2c_suspend(void);
+extern void pmac_pfunc_i2c_resume(void);
+
+#endif /* __KERNEL__ */
+#endif /* __PMAC_LOW_I2C_H__ */
diff --git a/arch/powerpc/include/asm/pmac_pfunc.h b/arch/powerpc/include/asm/pmac_pfunc.h
new file mode 100644
index 0000000000..cee4e9f5b8
--- /dev/null
+++ b/arch/powerpc/include/asm/pmac_pfunc.h
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PMAC_PFUNC_H__
+#define __PMAC_PFUNC_H__
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+/* Flags in command lists */
+#define PMF_FLAGS_ON_INIT		0x80000000u
+#define PMF_FLGAS_ON_TERM		0x40000000u
+#define PMF_FLAGS_ON_SLEEP		0x20000000u
+#define PMF_FLAGS_ON_WAKE		0x10000000u
+#define PMF_FLAGS_ON_DEMAND		0x08000000u
+#define PMF_FLAGS_INT_GEN		0x04000000u
+#define PMF_FLAGS_HIGH_SPEED		0x02000000u
+#define PMF_FLAGS_LOW_SPEED		0x01000000u
+#define PMF_FLAGS_SIDE_EFFECTS		0x00800000u
+
+/*
+ * Arguments to a platform function call.
+ *
+ * NOTE: By convention, pointer arguments point to an u32
+ */
+struct pmf_args {
+	union {
+		u32 v;
+		u32 *p;
+	} u[4];
+	unsigned int count;
+};
+
+/*
+ * A driver capable of interpreting commands provides a handlers
+ * structure filled with whatever handlers are implemented by this
+ * driver. Non implemented handlers are left NULL.
+ *
+ * PMF_STD_ARGS are the same arguments that are passed to the parser
+ * and that gets passed back to the various handlers.
+ *
+ * Interpreting a given function always start with a begin() call which
+ * returns an instance data to be passed around subsequent calls, and
+ * ends with an end() call. This allows the low level driver to implement
+ * locking policy or per-function instance data.
+ *
+ * For interrupt capable functions, irq_enable() is called when a client
+ * registers, and irq_disable() is called when the last client unregisters
+ * Note that irq_enable & irq_disable are called within a semaphore held
+ * by the core, thus you should not try to register yourself to some other
+ * pmf interrupt during those calls.
+ */
+
+#define PMF_STD_ARGS	struct pmf_function *func, void *instdata, \
+		        struct pmf_args *args
+
+struct pmf_function;
+
+struct pmf_handlers {
+	void * (*begin)(struct pmf_function *func, struct pmf_args *args);
+	void (*end)(struct pmf_function *func, void *instdata);
+
+	int (*irq_enable)(struct pmf_function *func);
+	int (*irq_disable)(struct pmf_function *func);
+
+	int (*write_gpio)(PMF_STD_ARGS, u8 value, u8 mask);
+	int (*read_gpio)(PMF_STD_ARGS, u8 mask, int rshift, u8 xor);
+
+	int (*write_reg32)(PMF_STD_ARGS, u32 offset, u32 value, u32 mask);
+	int (*read_reg32)(PMF_STD_ARGS, u32 offset);
+	int (*write_reg16)(PMF_STD_ARGS, u32 offset, u16 value, u16 mask);
+	int (*read_reg16)(PMF_STD_ARGS, u32 offset);
+	int (*write_reg8)(PMF_STD_ARGS, u32 offset, u8 value, u8 mask);
+	int (*read_reg8)(PMF_STD_ARGS, u32 offset);
+
+	int (*delay)(PMF_STD_ARGS, u32 duration);
+
+	int (*wait_reg32)(PMF_STD_ARGS, u32 offset, u32 value, u32 mask);
+	int (*wait_reg16)(PMF_STD_ARGS, u32 offset, u16 value, u16 mask);
+	int (*wait_reg8)(PMF_STD_ARGS, u32 offset, u8 value, u8 mask);
+
+	int (*read_i2c)(PMF_STD_ARGS, u32 len);
+	int (*write_i2c)(PMF_STD_ARGS, u32 len, const u8 *data);
+	int (*rmw_i2c)(PMF_STD_ARGS, u32 masklen, u32 valuelen, u32 totallen,
+		       const u8 *maskdata, const u8 *valuedata);
+
+	int (*read_cfg)(PMF_STD_ARGS, u32 offset, u32 len);
+	int (*write_cfg)(PMF_STD_ARGS, u32 offset, u32 len, const u8 *data);
+	int (*rmw_cfg)(PMF_STD_ARGS, u32 offset, u32 masklen, u32 valuelen,
+		       u32 totallen, const u8 *maskdata, const u8 *valuedata);
+
+	int (*read_i2c_sub)(PMF_STD_ARGS, u8 subaddr, u32 len);
+	int (*write_i2c_sub)(PMF_STD_ARGS, u8 subaddr, u32 len, const u8 *data);
+	int (*set_i2c_mode)(PMF_STD_ARGS, int mode);
+	int (*rmw_i2c_sub)(PMF_STD_ARGS, u8 subaddr, u32 masklen, u32 valuelen,
+			   u32 totallen, const u8 *maskdata,
+			   const u8 *valuedata);
+
+	int (*read_reg32_msrx)(PMF_STD_ARGS, u32 offset, u32 mask, u32 shift,
+			       u32 xor);
+	int (*read_reg16_msrx)(PMF_STD_ARGS, u32 offset, u32 mask, u32 shift,
+			       u32 xor);
+	int (*read_reg8_msrx)(PMF_STD_ARGS, u32 offset, u32 mask, u32 shift,
+			      u32 xor);
+
+	int (*write_reg32_slm)(PMF_STD_ARGS, u32 offset, u32 shift, u32 mask);
+	int (*write_reg16_slm)(PMF_STD_ARGS, u32 offset, u32 shift, u32 mask);
+	int (*write_reg8_slm)(PMF_STD_ARGS, u32 offset, u32 shift, u32 mask);
+
+	int (*mask_and_compare)(PMF_STD_ARGS, u32 len, const u8 *maskdata,
+				const u8 *valuedata);
+
+	struct module *owner;
+};
+
+
+/*
+ * Drivers who expose platform functions register at init time, this
+ * causes the platform functions for that device node to be parsed in
+ * advance and associated with the device. The data structures are
+ * partially public so a driver can walk the list of platform functions
+ * and eventually inspect the flags
+ */
+struct pmf_device;
+
+struct pmf_function {
+	/* All functions for a given driver are linked */
+	struct list_head	link;
+
+	/* Function node & driver data */
+	struct device_node	*node;
+	void			*driver_data;
+
+	/* For internal use by core */
+	struct pmf_device	*dev;
+
+	/* The name is the "xxx" in "platform-do-xxx", this is how
+	 * platform functions are identified by this code. Some functions
+	 * only operate for a given target, in which case the phandle is
+	 * here (or 0 if the filter doesn't apply)
+	 */
+	const char		*name;
+	u32			phandle;
+
+	/* The flags for that function. You can have several functions
+	 * with the same name and different flag
+	 */
+	u32			flags;
+
+	/* The actual tokenized function blob */
+	const void		*data;
+	unsigned int		length;
+
+	/* Interrupt clients */
+	struct list_head	irq_clients;
+
+	/* Refcounting */
+	struct kref		ref;
+};
+
+/*
+ * For platform functions that are interrupts, one can register
+ * irq_client structures. You canNOT use the same structure twice
+ * as it contains a link member. Also, the callback is called with
+ * a spinlock held, you must not call back into any of the pmf_* functions
+ * from within that callback
+ */
+struct pmf_irq_client {
+	void			(*handler)(void *data);
+	void			*data;
+	struct module		*owner;
+	struct list_head	link;
+	struct pmf_function	*func;
+};
+
+
+/*
+ * Register/Unregister a function-capable driver and its handlers
+ */
+extern int pmf_register_driver(struct device_node *np,
+			      struct pmf_handlers *handlers,
+			      void *driverdata);
+
+extern void pmf_unregister_driver(struct device_node *np);
+
+
+/*
+ * Register/Unregister interrupt clients
+ */
+extern int pmf_register_irq_client(struct device_node *np,
+				   const char *name,
+				   struct pmf_irq_client *client);
+
+extern void pmf_unregister_irq_client(struct pmf_irq_client *client);
+
+/*
+ * Called by the handlers when an irq happens
+ */
+extern void pmf_do_irq(struct pmf_function *func);
+
+
+/*
+ * Low level call to platform functions.
+ *
+ * The phandle can filter on the target object for functions that have
+ * multiple targets, the flags allow you to restrict the call to a given
+ * combination of flags.
+ *
+ * The args array contains as many arguments as is required by the function,
+ * this is dependent on the function you are calling, unfortunately Apple
+ * mechanism provides no way to encode that so you have to get it right at
+ * the call site. Some functions require no args, in which case, you can
+ * pass NULL.
+ *
+ * You can also pass NULL to the name. This will match any function that has
+ * the appropriate combination of flags & phandle or you can pass 0 to the
+ * phandle to match any
+ */
+extern int pmf_do_functions(struct device_node *np, const char *name,
+			    u32 phandle, u32 flags, struct pmf_args *args);
+
+
+
+/*
+ * High level call to a platform function.
+ *
+ * This one looks for the platform-xxx first so you should call it to the
+ * actual target if any. It will fallback to platform-do-xxx if it can't
+ * find one. It will also exclusively target functions that have
+ * the "OnDemand" flag.
+ */
+
+extern int pmf_call_function(struct device_node *target, const char *name,
+			     struct pmf_args *args);
+
+
+/*
+ * For low latency interrupt usage, you can lookup for on-demand functions
+ * using the functions below
+ */
+
+extern struct pmf_function *pmf_find_function(struct device_node *target,
+					      const char *name);
+
+extern struct pmf_function * pmf_get_function(struct pmf_function *func);
+extern void pmf_put_function(struct pmf_function *func);
+
+extern int pmf_call_one(struct pmf_function *func, struct pmf_args *args);
+
+int pmac_pfunc_base_install(void);
+
+/* Suspend/resume code called by via-pmu directly for now */
+extern void pmac_pfunc_base_suspend(void);
+extern void pmac_pfunc_base_resume(void);
+
+#endif /* __PMAC_PFUNC_H__ */
diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h
new file mode 100644
index 0000000000..3c09109e70
--- /dev/null
+++ b/arch/powerpc/include/asm/pmc.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * pmc.h
+ * Copyright (C) 2004  David Gibson, IBM Corporation
+ */
+#ifndef _POWERPC_PMC_H
+#define _POWERPC_PMC_H
+#ifdef __KERNEL__
+
+#include <asm/ptrace.h>
+
+typedef void (*perf_irq_t)(struct pt_regs *);
+extern perf_irq_t perf_irq;
+
+int reserve_pmc_hardware(perf_irq_t new_perf_irq);
+void release_pmc_hardware(void);
+void ppc_enable_pmcs(void);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/lppaca.h>
+#include <asm/firmware.h>
+
+static inline void ppc_set_pmu_inuse(int inuse)
+{
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+	if (firmware_has_feature(FW_FEATURE_LPAR)) {
+#ifdef CONFIG_PPC_PSERIES
+		get_lppaca()->pmcregs_in_use = inuse;
+#endif
+	}
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	get_paca()->pmcregs_in_use = inuse;
+#endif
+#endif
+}
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static inline int ppc_get_pmu_inuse(void)
+{
+	return get_paca()->pmcregs_in_use;
+}
+#endif
+
+extern void power4_enable_pmcs(void);
+
+#else /* CONFIG_PPC64 */
+
+static inline void ppc_set_pmu_inuse(int inuse) { }
+
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _POWERPC_PMC_H */
diff --git a/arch/powerpc/include/asm/pmi.h b/arch/powerpc/include/asm/pmi.h
new file mode 100644
index 0000000000..478f0a2fe7
--- /dev/null
+++ b/arch/powerpc/include/asm/pmi.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _POWERPC_PMI_H
+#define _POWERPC_PMI_H
+
+/*
+ * Definitions for talking with PMI device on PowerPC
+ *
+ * PMI (Platform Management Interrupt) is a way to communicate
+ * with the BMC (Baseboard Management Controller) via interrupts.
+ * Unlike IPMI it is bidirectional and has a low latency.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ */
+
+#ifdef __KERNEL__
+
+#define PMI_TYPE_FREQ_CHANGE	0x01
+#define PMI_TYPE_POWER_BUTTON	0x02
+#define PMI_READ_TYPE		0
+#define PMI_READ_DATA0		1
+#define PMI_READ_DATA1		2
+#define PMI_READ_DATA2		3
+#define PMI_WRITE_TYPE		4
+#define PMI_WRITE_DATA0		5
+#define PMI_WRITE_DATA1		6
+#define PMI_WRITE_DATA2		7
+
+#define PMI_ACK			0x80
+
+#define PMI_TIMEOUT		100
+
+typedef struct {
+	u8	type;
+	u8	data0;
+	u8	data1;
+	u8	data2;
+} pmi_message_t;
+
+struct pmi_handler {
+	struct list_head node;
+	u8 type;
+	void (*handle_pmi_message) (pmi_message_t);
+};
+
+int pmi_register_handler(struct pmi_handler *);
+void pmi_unregister_handler(struct pmi_handler *);
+
+int pmi_send_message(pmi_message_t);
+
+#endif /* __KERNEL__ */
+#endif /* _POWERPC_PMI_H */
diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h
new file mode 100644
index 0000000000..9acd1fbf11
--- /dev/null
+++ b/arch/powerpc/include/asm/pnv-ocxl.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+// Copyright 2017 IBM Corp.
+#ifndef _ASM_PNV_OCXL_H
+#define _ASM_PNV_OCXL_H
+
+#include <linux/bitfield.h>
+#include <linux/pci.h>
+
+#define PNV_OCXL_TL_MAX_TEMPLATE        63
+#define PNV_OCXL_TL_BITS_PER_RATE       4
+#define PNV_OCXL_TL_RATE_BUF_SIZE       ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8)
+
+#define PNV_OCXL_ATSD_TIMEOUT		1
+
+/* TLB Management Instructions */
+#define PNV_OCXL_ATSD_LNCH		0x00
+/* Radix Invalidate */
+#define   PNV_OCXL_ATSD_LNCH_R		PPC_BIT(0)
+/* Radix Invalidation Control
+ * 0b00 Just invalidate TLB.
+ * 0b01 Invalidate just Page Walk Cache.
+ * 0b10 Invalidate TLB, Page Walk Cache, and any
+ * caching of Partition and Process Table Entries.
+ */
+#define   PNV_OCXL_ATSD_LNCH_RIC	PPC_BITMASK(1, 2)
+/* Number and Page Size of translations to be invalidated */
+#define   PNV_OCXL_ATSD_LNCH_LP		PPC_BITMASK(3, 10)
+/* Invalidation Criteria
+ * 0b00 Invalidate just the target VA.
+ * 0b01 Invalidate matching PID.
+ */
+#define   PNV_OCXL_ATSD_LNCH_IS		PPC_BITMASK(11, 12)
+/* 0b1: Process Scope, 0b0: Partition Scope */
+#define   PNV_OCXL_ATSD_LNCH_PRS	PPC_BIT(13)
+/* Invalidation Flag */
+#define   PNV_OCXL_ATSD_LNCH_B		PPC_BIT(14)
+/* Actual Page Size to be invalidated
+ * 000 4KB
+ * 101 64KB
+ * 001 2MB
+ * 010 1GB
+ */
+#define   PNV_OCXL_ATSD_LNCH_AP		PPC_BITMASK(15, 17)
+/* Defines the large page select
+ * L=0b0 for 4KB pages
+ * L=0b1 for large pages)
+ */
+#define   PNV_OCXL_ATSD_LNCH_L		PPC_BIT(18)
+/* Process ID */
+#define   PNV_OCXL_ATSD_LNCH_PID	PPC_BITMASK(19, 38)
+/* NoFlush – Assumed to be 0b0 */
+#define   PNV_OCXL_ATSD_LNCH_F		PPC_BIT(39)
+#define   PNV_OCXL_ATSD_LNCH_OCAPI_SLBI	PPC_BIT(40)
+#define   PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON	PPC_BIT(41)
+#define PNV_OCXL_ATSD_AVA		0x08
+#define   PNV_OCXL_ATSD_AVA_AVA		PPC_BITMASK(0, 51)
+#define PNV_OCXL_ATSD_STAT		0x10
+
+int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, u16 *supported);
+int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count);
+
+int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
+			char *rate_buf, int rate_buf_size);
+int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
+			 uint64_t rate_buf_phys, int rate_buf_size);
+
+int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq);
+void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
+			     void __iomem *tfc, void __iomem *pe_handle);
+int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
+			  void __iomem **dar, void __iomem **tfc,
+			  void __iomem **pe_handle);
+
+int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, void **platform_data);
+void pnv_ocxl_spa_release(void *platform_data);
+int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle);
+
+int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,
+		      uint64_t lpcr, void __iomem **arva);
+void pnv_ocxl_unmap_lpar(void __iomem *arva);
+void pnv_ocxl_tlb_invalidate(void __iomem *arva,
+			     unsigned long pid,
+			     unsigned long addr,
+			     unsigned long page_size);
+#endif /* _ASM_PNV_OCXL_H */
diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
new file mode 100644
index 0000000000..8afc92860d
--- /dev/null
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2014 IBM Corp.
+ */
+
+#ifndef _ASM_PNV_PCI_H
+#define _ASM_PNV_PCI_H
+
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <misc/cxl-base.h>
+#include <asm/opal-api.h>
+
+#define PCI_SLOT_ID_PREFIX	(1UL << 63)
+#define PCI_SLOT_ID(phb_id, bdfn)	\
+	(PCI_SLOT_ID_PREFIX | ((uint64_t)(bdfn) << 16) | (phb_id))
+#define PCI_PHB_SLOT_ID(phb_id)		(phb_id)
+
+extern int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id);
+extern int pnv_pci_get_device_tree(uint32_t phandle, void *buf, uint64_t len);
+extern int pnv_pci_get_presence_state(uint64_t id, uint8_t *state);
+extern int pnv_pci_get_power_state(uint64_t id, uint8_t *state);
+extern int pnv_pci_set_power_state(uint64_t id, uint8_t state,
+				   struct opal_msg *msg);
+
+extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr,
+				  int enable);
+int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
+int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
+			   unsigned int virq);
+int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num);
+void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num);
+int pnv_cxl_get_irq_count(struct pci_dev *dev);
+struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev);
+int64_t pnv_opal_pci_msi_eoi(struct irq_data *d);
+bool is_pnv_opal_msi(struct irq_chip *chip);
+
+#ifdef CONFIG_CXL_BASE
+int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs,
+			       struct pci_dev *dev, int num);
+void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs,
+				  struct pci_dev *dev);
+#endif
+
+struct pnv_php_slot {
+	struct hotplug_slot		slot;
+	uint64_t			id;
+	char				*name;
+	int				slot_no;
+	unsigned int			flags;
+#define PNV_PHP_FLAG_BROKEN_PDC		0x1
+	struct kref			kref;
+#define PNV_PHP_STATE_INITIALIZED	0
+#define PNV_PHP_STATE_REGISTERED	1
+#define PNV_PHP_STATE_POPULATED		2
+#define PNV_PHP_STATE_OFFLINE		3
+	int				state;
+	int				irq;
+	struct workqueue_struct		*wq;
+	struct device_node		*dn;
+	struct pci_dev			*pdev;
+	struct pci_bus			*bus;
+	bool				power_state_check;
+	u8				attention_state;
+	void				*fdt;
+	void				*dt;
+	struct of_changeset		ocs;
+	struct pnv_php_slot		*parent;
+	struct list_head		children;
+	struct list_head		link;
+};
+extern struct pnv_php_slot *pnv_php_find_slot(struct device_node *dn);
+extern int pnv_php_set_slot_power_state(struct hotplug_slot *slot,
+					uint8_t state);
+
+#endif
diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
new file mode 100644
index 0000000000..e1a8587187
--- /dev/null
+++ b/arch/powerpc/include/asm/powernv.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2017 IBM Corp.
+ */
+
+#ifndef _ASM_POWERNV_H
+#define _ASM_POWERNV_H
+
+#ifdef CONFIG_PPC_POWERNV
+extern void powernv_set_nmmu_ptcr(unsigned long ptcr);
+
+void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val);
+
+void pnv_tm_init(void);
+#else
+static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { }
+
+static inline void pnv_tm_init(void) { }
+#endif
+
+#endif /* _ASM_POWERNV_H */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
new file mode 100644
index 0000000000..005601243d
--- /dev/null
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -0,0 +1,701 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2009 Freescale Semiconductor, Inc.
+ *
+ * provides masks and opcode images for use by code generation, emulation
+ * and for instructions that older assemblers might not know about
+ */
+#ifndef _ASM_POWERPC_PPC_OPCODE_H
+#define _ASM_POWERPC_PPC_OPCODE_H
+
+#include <asm/asm-const.h>
+
+#define	__REG_R0	0
+#define	__REG_R1	1
+#define	__REG_R2	2
+#define	__REG_R3	3
+#define	__REG_R4	4
+#define	__REG_R5	5
+#define	__REG_R6	6
+#define	__REG_R7	7
+#define	__REG_R8	8
+#define	__REG_R9	9
+#define	__REG_R10	10
+#define	__REG_R11	11
+#define	__REG_R12	12
+#define	__REG_R13	13
+#define	__REG_R14	14
+#define	__REG_R15	15
+#define	__REG_R16	16
+#define	__REG_R17	17
+#define	__REG_R18	18
+#define	__REG_R19	19
+#define	__REG_R20	20
+#define	__REG_R21	21
+#define	__REG_R22	22
+#define	__REG_R23	23
+#define	__REG_R24	24
+#define	__REG_R25	25
+#define	__REG_R26	26
+#define	__REG_R27	27
+#define	__REG_R28	28
+#define	__REG_R29	29
+#define	__REG_R30	30
+#define	__REG_R31	31
+
+#define	__REGA0_0	0
+#define	__REGA0_R1	1
+#define	__REGA0_R2	2
+#define	__REGA0_R3	3
+#define	__REGA0_R4	4
+#define	__REGA0_R5	5
+#define	__REGA0_R6	6
+#define	__REGA0_R7	7
+#define	__REGA0_R8	8
+#define	__REGA0_R9	9
+#define	__REGA0_R10	10
+#define	__REGA0_R11	11
+#define	__REGA0_R12	12
+#define	__REGA0_R13	13
+#define	__REGA0_R14	14
+#define	__REGA0_R15	15
+#define	__REGA0_R16	16
+#define	__REGA0_R17	17
+#define	__REGA0_R18	18
+#define	__REGA0_R19	19
+#define	__REGA0_R20	20
+#define	__REGA0_R21	21
+#define	__REGA0_R22	22
+#define	__REGA0_R23	23
+#define	__REGA0_R24	24
+#define	__REGA0_R25	25
+#define	__REGA0_R26	26
+#define	__REGA0_R27	27
+#define	__REGA0_R28	28
+#define	__REGA0_R29	29
+#define	__REGA0_R30	30
+#define	__REGA0_R31	31
+
+/* For use with PPC_RAW_() macros */
+#define	_R0	0
+#define	_R1	1
+#define	_R2	2
+#define	_R3	3
+#define	_R4	4
+#define	_R5	5
+#define	_R6	6
+#define	_R7	7
+#define	_R8	8
+#define	_R9	9
+#define	_R10	10
+#define	_R11	11
+#define	_R12	12
+#define	_R13	13
+#define	_R14	14
+#define	_R15	15
+#define	_R16	16
+#define	_R17	17
+#define	_R18	18
+#define	_R19	19
+#define	_R20	20
+#define	_R21	21
+#define	_R22	22
+#define	_R23	23
+#define	_R24	24
+#define	_R25	25
+#define	_R26	26
+#define	_R27	27
+#define	_R28	28
+#define	_R29	29
+#define	_R30	30
+#define	_R31	31
+
+#define IMM_L(i)               ((uintptr_t)(i) & 0xffff)
+#define IMM_DS(i)              ((uintptr_t)(i) & 0xfffc)
+#define IMM_DQ(i)              ((uintptr_t)(i) & 0xfff0)
+#define IMM_D0(i)              (((uintptr_t)(i) >> 16) & 0x3ffff)
+#define IMM_D1(i)              IMM_L(i)
+
+/*
+ * 16-bit immediate helper macros: HA() is for use with sign-extending instrs
+ * (e.g. LD, ADDI).  If the bottom 16 bits is "-ve", add another bit into the
+ * top half to negate the effect (i.e. 0xffff + 1 = 0x(1)0000).
+ *
+ * XXX: should these mask out possible sign bits?
+ */
+#define IMM_H(i)                ((uintptr_t)(i)>>16)
+#define IMM_HA(i)               (((uintptr_t)(i)>>16) +                       \
+					(((uintptr_t)(i) & 0x8000) >> 15))
+
+/*
+ * 18-bit immediate helper for prefix 18-bit upper immediate si0 field.
+ */
+#define IMM_H18(i)              (((uintptr_t)(i)>>16) & 0x3ffff)
+
+
+/* opcode and xopcode for instructions */
+#define OP_PREFIX	1
+#define OP_TRAP_64	2
+#define OP_TRAP		3
+#define OP_SC		17
+#define OP_19		19
+#define OP_31		31
+#define OP_LWZ		32
+#define OP_LWZU		33
+#define OP_LBZ		34
+#define OP_LBZU		35
+#define OP_STW		36
+#define OP_STWU		37
+#define OP_STB		38
+#define OP_STBU		39
+#define OP_LHZ		40
+#define OP_LHZU		41
+#define OP_LHA		42
+#define OP_LHAU		43
+#define OP_STH		44
+#define OP_STHU		45
+#define OP_LMW		46
+#define OP_STMW		47
+#define OP_LFS		48
+#define OP_LFSU		49
+#define OP_LFD		50
+#define OP_LFDU		51
+#define OP_STFS		52
+#define OP_STFSU	53
+#define OP_STFD		54
+#define OP_STFDU	55
+#define OP_LQ		56
+#define OP_LD		58
+#define OP_STD		62
+
+#define OP_19_XOP_RFID		18
+#define OP_19_XOP_RFMCI		38
+#define OP_19_XOP_RFDI		39
+#define OP_19_XOP_RFI		50
+#define OP_19_XOP_RFCI		51
+#define OP_19_XOP_RFSCV		82
+#define OP_19_XOP_HRFID		274
+#define OP_19_XOP_URFID		306
+#define OP_19_XOP_STOP		370
+#define OP_19_XOP_DOZE		402
+#define OP_19_XOP_NAP		434
+#define OP_19_XOP_SLEEP		466
+#define OP_19_XOP_RVWINKLE	498
+
+#define OP_31_XOP_TRAP      4
+#define OP_31_XOP_LDX       21
+#define OP_31_XOP_LWZX      23
+#define OP_31_XOP_LDUX      53
+#define OP_31_XOP_DCBST     54
+#define OP_31_XOP_LWZUX     55
+#define OP_31_XOP_TRAP_64   68
+#define OP_31_XOP_DCBF      86
+#define OP_31_XOP_LBZX      87
+#define OP_31_XOP_STDX      149
+#define OP_31_XOP_STWX      151
+#define OP_31_XOP_STDUX     181
+#define OP_31_XOP_STWUX     183
+#define OP_31_XOP_STBX      215
+#define OP_31_XOP_LBZUX     119
+#define OP_31_XOP_STBUX     247
+#define OP_31_XOP_LHZX      279
+#define OP_31_XOP_LHZUX     311
+#define OP_31_XOP_MSGSNDP   142
+#define OP_31_XOP_MSGCLRP   174
+#define OP_31_XOP_MTMSR     146
+#define OP_31_XOP_MTMSRD    178
+#define OP_31_XOP_TLBIE     306
+#define OP_31_XOP_MFSPR     339
+#define OP_31_XOP_LWAX      341
+#define OP_31_XOP_LHAX      343
+#define OP_31_XOP_LWAUX     373
+#define OP_31_XOP_LHAUX     375
+#define OP_31_XOP_STHX      407
+#define OP_31_XOP_STHUX     439
+#define OP_31_XOP_MTSPR     467
+#define OP_31_XOP_DCBI      470
+#define OP_31_XOP_LDBRX     532
+#define OP_31_XOP_LWBRX     534
+#define OP_31_XOP_TLBSYNC   566
+#define OP_31_XOP_STDBRX    660
+#define OP_31_XOP_STWBRX    662
+#define OP_31_XOP_STFSX	    663
+#define OP_31_XOP_STFSUX    695
+#define OP_31_XOP_STFDX     727
+#define OP_31_XOP_HASHCHK   754
+#define OP_31_XOP_STFDUX    759
+#define OP_31_XOP_LHBRX     790
+#define OP_31_XOP_LFIWAX    855
+#define OP_31_XOP_LFIWZX    887
+#define OP_31_XOP_STHBRX    918
+#define OP_31_XOP_STFIWX    983
+
+/* VSX Scalar Load Instructions */
+#define OP_31_XOP_LXSDX         588
+#define OP_31_XOP_LXSSPX        524
+#define OP_31_XOP_LXSIWAX       76
+#define OP_31_XOP_LXSIWZX       12
+
+/* VSX Scalar Store Instructions */
+#define OP_31_XOP_STXSDX        716
+#define OP_31_XOP_STXSSPX       652
+#define OP_31_XOP_STXSIWX       140
+
+/* VSX Vector Load Instructions */
+#define OP_31_XOP_LXVD2X        844
+#define OP_31_XOP_LXVW4X        780
+
+/* VSX Vector Load and Splat Instruction */
+#define OP_31_XOP_LXVDSX        332
+
+/* VSX Vector Store Instructions */
+#define OP_31_XOP_STXVD2X       972
+#define OP_31_XOP_STXVW4X       908
+
+#define OP_31_XOP_LFSX          535
+#define OP_31_XOP_LFSUX         567
+#define OP_31_XOP_LFDX          599
+#define OP_31_XOP_LFDUX		631
+
+/* VMX Vector Load Instructions */
+#define OP_31_XOP_LVX           103
+
+/* VMX Vector Store Instructions */
+#define OP_31_XOP_STVX          231
+
+/* sorted alphabetically */
+#define PPC_INST_BCCTR_FLUSH		0x4c400420
+#define PPC_INST_COPY			0x7c20060c
+#define PPC_INST_DCBA			0x7c0005ec
+#define PPC_INST_DCBA_MASK		0xfc0007fe
+#define PPC_INST_DSSALL			0x7e00066c
+#define PPC_INST_ISEL			0x7c00001e
+#define PPC_INST_ISEL_MASK		0xfc00003e
+#define PPC_INST_LSWI			0x7c0004aa
+#define PPC_INST_LSWX			0x7c00042a
+#define PPC_INST_LWSYNC			0x7c2004ac
+#define PPC_INST_SYNC			0x7c0004ac
+#define PPC_INST_SYNC_MASK		0xfc0007fe
+#define PPC_INST_MCRXR			0x7c000400
+#define PPC_INST_MCRXR_MASK		0xfc0007fe
+#define PPC_INST_MFSPR_PVR		0x7c1f42a6
+#define PPC_INST_MFSPR_PVR_MASK		0xfc1ffffe
+#define PPC_INST_MTMSRD			0x7c000164
+#define PPC_INST_PASTE			0x7c20070d
+#define PPC_INST_PASTE_MASK		0xfc2007ff
+#define PPC_INST_POPCNTB		0x7c0000f4
+#define PPC_INST_POPCNTB_MASK		0xfc0007fe
+#define PPC_INST_RFEBB			0x4c000124
+#define PPC_INST_RFID			0x4c000024
+#define PPC_INST_MFSPR_DSCR		0x7c1102a6
+#define PPC_INST_MFSPR_DSCR_MASK	0xfc1ffffe
+#define PPC_INST_MTSPR_DSCR		0x7c1103a6
+#define PPC_INST_MTSPR_DSCR_MASK	0xfc1ffffe
+#define PPC_INST_MFSPR_DSCR_USER	0x7c0302a6
+#define PPC_INST_MFSPR_DSCR_USER_MASK	0xfc1ffffe
+#define PPC_INST_MTSPR_DSCR_USER	0x7c0303a6
+#define PPC_INST_MTSPR_DSCR_USER_MASK	0xfc1ffffe
+#define PPC_INST_STRING			0x7c00042a
+#define PPC_INST_STRING_MASK		0xfc0007fe
+#define PPC_INST_STRING_GEN_MASK	0xfc00067e
+#define PPC_INST_STSWI			0x7c0005aa
+#define PPC_INST_STSWX			0x7c00052a
+#define PPC_INST_TRECHKPT		0x7c0007dd
+#define PPC_INST_TRECLAIM		0x7c00075d
+#define PPC_INST_TSR			0x7c0005dd
+#define PPC_INST_BRANCH_COND		0x40800000
+
+/* Prefixes */
+#define PPC_INST_LFS			0xc0000000
+#define PPC_INST_STFS			0xd0000000
+#define PPC_INST_LFD			0xc8000000
+#define PPC_INST_STFD			0xd8000000
+#define PPC_PREFIX_MLS			0x06000000
+#define PPC_PREFIX_8LS			0x04000000
+
+/* Prefixed instructions */
+#define PPC_INST_PADDI			0x38000000
+#define PPC_INST_PLD			0xe4000000
+#define PPC_INST_PSTD			0xf4000000
+
+/* macros to insert fields into opcodes */
+#define ___PPC_RA(a)	(((a) & 0x1f) << 16)
+#define ___PPC_RB(b)	(((b) & 0x1f) << 11)
+#define ___PPC_RC(c)	(((c) & 0x1f) << 6)
+#define ___PPC_RS(s)	(((s) & 0x1f) << 21)
+#define ___PPC_RT(t)	___PPC_RS(t)
+#define ___PPC_R(r)	(((r) & 0x1) << 16)
+#define ___PPC_PRS(prs)	(((prs) & 0x1) << 17)
+#define ___PPC_RIC(ric)	(((ric) & 0x3) << 18)
+#define __PPC_RA(a)	___PPC_RA(__REG_##a)
+#define __PPC_RA0(a)	___PPC_RA(__REGA0_##a)
+#define __PPC_RB(b)	___PPC_RB(__REG_##b)
+#define __PPC_RS(s)	___PPC_RS(__REG_##s)
+#define __PPC_RT(t)	___PPC_RT(__REG_##t)
+#define __PPC_XA(a)	((((a) & 0x1f) << 16) | (((a) & 0x20) >> 3))
+#define __PPC_XB(b)	((((b) & 0x1f) << 11) | (((b) & 0x20) >> 4))
+#define __PPC_XS(s)	((((s) & 0x1f) << 21) | (((s) & 0x20) >> 5))
+#define __PPC_XT(s)	__PPC_XS(s)
+#define __PPC_XSP(s)	((((s) & 0x1e) | (((s) >> 5) & 0x1)) << 21)
+#define __PPC_XTP(s)	__PPC_XSP(s)
+#define __PPC_T_TLB(t)	(((t) & 0x3) << 21)
+#define __PPC_PL(p)	(((p) & 0x3) << 16)
+#define __PPC_WC(w)	(((w) & 0x3) << 21)
+#define __PPC_WS(w)	(((w) & 0x1f) << 11)
+#define __PPC_SH(s)	__PPC_WS(s)
+#define __PPC_SH64(s)	(__PPC_SH(s) | (((s) & 0x20) >> 4))
+#define __PPC_MB(s)	___PPC_RC(s)
+#define __PPC_ME(s)	(((s) & 0x1f) << 1)
+#define __PPC_MB64(s)	(__PPC_MB(s) | ((s) & 0x20))
+#define __PPC_ME64(s)	__PPC_MB64(s)
+#define __PPC_BI(s)	(((s) & 0x1f) << 16)
+#define __PPC_CT(t)	(((t) & 0x0f) << 21)
+#define __PPC_SPR(r)	((((r) & 0x1f) << 16) | ((((r) >> 5) & 0x1f) << 11))
+#define __PPC_RC21	(0x1 << 10)
+#define __PPC_PRFX_R(r)	(((r) & 0x1) << 20)
+#define __PPC_EH(eh)	(((eh) & 0x1) << 0)
+
+/*
+ * Both low and high 16 bits are added as SIGNED additions, so if low 16 bits
+ * has high bit set, high 16 bits must be adjusted. These macros do that (stolen
+ * from binutils).
+ */
+#define PPC_LO(v)	((v) & 0xffff)
+#define PPC_HI(v)	(((v) >> 16) & 0xffff)
+#define PPC_HA(v)	PPC_HI((v) + 0x8000)
+#define PPC_HIGHER(v)	(((v) >> 32) & 0xffff)
+#define PPC_HIGHEST(v)	(((v) >> 48) & 0xffff)
+
+/* LI Field */
+#define PPC_LI_MASK	0x03fffffc
+#define PPC_LI(v)	((v) & PPC_LI_MASK)
+
+/* Base instruction encoding */
+#define PPC_RAW_CP_ABORT		(0x7c00068c)
+#define PPC_RAW_COPY(a, b)		(PPC_INST_COPY | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_DARN(t, l)		(0x7c0005e6 | ___PPC_RT(t) | (((l) & 0x3) << 16))
+#define PPC_RAW_DCBAL(a, b)		(0x7c2005ec | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_RAW_DCBZL(a, b)		(0x7c2007ec | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_RAW_LQARX(t, a, b, eh)	(0x7c000228 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | __PPC_EH(eh))
+#define PPC_RAW_LDARX(t, a, b, eh)	(0x7c0000a8 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | __PPC_EH(eh))
+#define PPC_RAW_LWARX(t, a, b, eh)	(0x7c000028 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | __PPC_EH(eh))
+#define PPC_RAW_PHWSYNC			(0x7c8004ac)
+#define PPC_RAW_PLWSYNC			(0x7ca004ac)
+#define PPC_RAW_STQCX(t, a, b)		(0x7c00016d | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_MADDHD(t, a, b, c)	(0x10000030 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | ___PPC_RC(c))
+#define PPC_RAW_MADDHDU(t, a, b, c)	(0x10000031 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | ___PPC_RC(c))
+#define PPC_RAW_MADDLD(t, a, b, c)	(0x10000033 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | ___PPC_RC(c))
+#define PPC_RAW_MSGSND(b)		(0x7c00019c | ___PPC_RB(b))
+#define PPC_RAW_MSGSYNC			(0x7c0006ec)
+#define PPC_RAW_MSGCLR(b)		(0x7c0001dc | ___PPC_RB(b))
+#define PPC_RAW_MSGSNDP(b)		(0x7c00011c | ___PPC_RB(b))
+#define PPC_RAW_MSGCLRP(b)		(0x7c00015c | ___PPC_RB(b))
+#define PPC_RAW_PASTE(a, b)		(0x7c20070d | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_POPCNTB(a, s)		(PPC_INST_POPCNTB | __PPC_RA(a) | __PPC_RS(s))
+#define PPC_RAW_POPCNTD(a, s)		(0x7c0003f4 | __PPC_RA(a) | __PPC_RS(s))
+#define PPC_RAW_POPCNTW(a, s)		(0x7c0002f4 | __PPC_RA(a) | __PPC_RS(s))
+#define PPC_RAW_RFCI			(0x4c000066)
+#define PPC_RAW_RFDI			(0x4c00004e)
+#define PPC_RAW_RFMCI			(0x4c00004c)
+#define PPC_RAW_TLBILX_LPID		(0x7c000024)
+#define PPC_RAW_TLBILX(t, a, b)		(0x7c000024 | __PPC_T_TLB(t) | 	__PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_WAIT_v203		(0x7c00007c)
+#define PPC_RAW_WAIT(w, p)		(0x7c00003c | __PPC_WC(w) | __PPC_PL(p))
+#define PPC_RAW_TLBIE(lp, a)		(0x7c000264 | ___PPC_RB(a) | ___PPC_RS(lp))
+#define PPC_RAW_TLBIE_5(rb, rs, ric, prs, r) \
+	(0x7c000264 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r))
+#define PPC_RAW_TLBIEL(rb, rs, ric, prs, r) \
+	(0x7c000224 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r))
+#define PPC_RAW_TLBIEL_v205(rb, l)	(0x7c000224 | ___PPC_RB(rb) | (l << 21))
+#define PPC_RAW_TLBSRX_DOT(a, b)	(0x7c0006a5 | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_TLBIVAX(a, b)		(0x7c000624 | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_ERATWE(s, a, w)		(0x7c0001a6 | __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
+#define PPC_RAW_ERATRE(s, a, w)		(0x7c000166 | __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
+#define PPC_RAW_ERATILX(t, a, b)	(0x7c000066 | __PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_ERATIVAX(s, a, b)	(0x7c000666 | __PPC_RS(s) | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_ERATSX(t, a, w)		(0x7c000126 | __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_ERATSX_DOT(t, a, w)	(0x7c000127 | __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_SLBFEE_DOT(t, b)	(0x7c0007a7 | __PPC_RT(t) | __PPC_RB(b))
+#define __PPC_RAW_SLBFEE_DOT(t, b)	(0x7c0007a7 | ___PPC_RT(t) | ___PPC_RB(b))
+#define PPC_RAW_ICBT(c, a, b)		(0x7c00002c | __PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_LBZCIX(t, a, b)		(0x7c0006aa | __PPC_RT(t) | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_RAW_STBCIX(s, a, b)		(0x7c0007aa | __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_RAW_DCBFPS(a, b)		(0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (4 << 21))
+#define PPC_RAW_DCBSTPS(a, b)		(0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (6 << 21))
+#define PPC_RAW_SC()			(0x44000002)
+#define PPC_RAW_SYNC()			(0x7c0004ac)
+#define PPC_RAW_ISYNC()			(0x4c00012c)
+
+/*
+ * Define what the VSX XX1 form instructions will look like, then add
+ * the 128 bit load store instructions based on that.
+ */
+#define VSX_XX1(s, a, b)		(__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b))
+#define VSX_XX3(t, a, b)		(__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b))
+#define PPC_RAW_STXVD2X(s, a, b)	(0x7c000798 | VSX_XX1((s), a, b))
+#define PPC_RAW_LXVD2X(s, a, b)		(0x7c000698 | VSX_XX1((s), a, b))
+#define PPC_RAW_MFVRD(a, t)		(0x7c000066 | VSX_XX1((t) + 32, a, R0))
+#define PPC_RAW_MTVRD(t, a)		(0x7c000166 | VSX_XX1((t) + 32, a, R0))
+#define PPC_RAW_VPMSUMW(t, a, b)	(0x10000488 | VSX_XX3((t), a, b))
+#define PPC_RAW_VPMSUMD(t, a, b)	(0x100004c8 | VSX_XX3((t), a, b))
+#define PPC_RAW_XXLOR(t, a, b)		(0xf0000490 | VSX_XX3((t), a, b))
+#define PPC_RAW_XXSWAPD(t, a)		(0xf0000250 | VSX_XX3((t), a, a))
+#define PPC_RAW_XVCPSGNDP(t, a, b)	((0xf0000780 | VSX_XX3((t), (a), (b))))
+#define PPC_RAW_VPERMXOR(vrt, vra, vrb, vrc) \
+	((0x1000002d | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6)))
+#define PPC_RAW_LXVP(xtp, a, i)		(0x18000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_DQ(i))
+#define PPC_RAW_STXVP(xsp, a, i)	(0x18000001 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_DQ(i))
+#define PPC_RAW_LXVPX(xtp, a, b)	(0x7c00029a | __PPC_XTP(xtp) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_STXVPX(xsp, a, b)	(0x7c00039a | __PPC_XSP(xsp) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_PLXVP_P(xtp, i, a, pr)	(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i))
+#define PPC_RAW_PLXVP_S(xtp, i, a, pr)	(0xe8000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_D1(i))
+#define PPC_RAW_PSTXVP_P(xsp, i, a, pr)	(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i))
+#define PPC_RAW_PSTXVP_S(xsp, i, a, pr)	(0xf8000000 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_D1(i))
+#define PPC_RAW_NAP			(0x4c000364)
+#define PPC_RAW_SLEEP			(0x4c0003a4)
+#define PPC_RAW_WINKLE			(0x4c0003e4)
+#define PPC_RAW_STOP			(0x4c0002e4)
+#define PPC_RAW_CLRBHRB			(0x7c00035c)
+#define PPC_RAW_MFBHRBE(r, n)		(0x7c00025c | __PPC_RT(r) | (((n) & 0x3ff) << 11))
+#define PPC_RAW_TRECHKPT		(PPC_INST_TRECHKPT)
+#define PPC_RAW_TRECLAIM(r)		(PPC_INST_TRECLAIM | __PPC_RA(r))
+#define PPC_RAW_TABORT(r)		(0x7c00071d | __PPC_RA(r))
+#define TMRN(x)				((((x) & 0x1f) << 16) | (((x) & 0x3e0) << 6))
+#define PPC_RAW_MTTMR(tmr, r)		(0x7c0003dc | TMRN(tmr) | ___PPC_RS(r))
+#define PPC_RAW_MFTMR(tmr, r)		(0x7c0002dc | TMRN(tmr) | ___PPC_RT(r))
+#define PPC_RAW_ICSWX(s, a, b)		(0x7c00032d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ICSWEPX(s, a, b)	(0x7c00076d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_SLBIA(IH)		(0x7c0003e4 | (((IH) & 0x7) << 21))
+#define PPC_RAW_VCMPEQUD_RC(vrt, vra, vrb) \
+	(0x100000c7 | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | __PPC_RC21)
+#define PPC_RAW_VCMPEQUB_RC(vrt, vra, vrb) \
+	(0x10000006 | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | __PPC_RC21)
+#define PPC_RAW_LD(r, base, i)		(0xe8000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_DS(i))
+#define PPC_RAW_LWZ(r, base, i)		(0x80000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_LWZX(t, a, b)		(0x7c00002e | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_STD(r, base, i)		(0xf8000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_DS(i))
+#define PPC_RAW_STDCX(s, a, b)		(0x7c0001ad | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_LFSX(t, a, b)		(0x7c00042e | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_STFSX(s, a, b)		(0x7c00052e | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_LFDX(t, a, b)		(0x7c0004ae | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_STFDX(s, a, b)		(0x7c0005ae | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_LVX(t, a, b)		(0x7c0000ce | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_STVX(s, a, b)		(0x7c0001ce | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ADDE(t, a, b)		(0x7c000114 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ADDZE(t, a)		(0x7c000194 | ___PPC_RT(t) | ___PPC_RA(a))
+#define PPC_RAW_ADDME(t, a)		(0x7c0001d4 | ___PPC_RT(t) | ___PPC_RA(a))
+#define PPC_RAW_ADD(t, a, b)		(0x7c000214 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ADD_DOT(t, a, b)	(0x7c000214 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
+#define PPC_RAW_ADDC(t, a, b)		(0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ADDC_DOT(t, a, b)	(0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
+#define PPC_RAW_NOP()			PPC_RAW_ORI(0, 0, 0)
+#define PPC_RAW_BLR()			(0x4e800020)
+#define PPC_RAW_BLRL()			(0x4e800021)
+#define PPC_RAW_MTLR(r)			(0x7c0803a6 | ___PPC_RT(r))
+#define PPC_RAW_MFLR(t)			(0x7c0802a6 | ___PPC_RT(t))
+#define PPC_RAW_BCTR()			(0x4e800420)
+#define PPC_RAW_BCTRL()			(0x4e800421)
+#define PPC_RAW_MTCTR(r)		(0x7c0903a6 | ___PPC_RT(r))
+#define PPC_RAW_ADDI(d, a, i)		(0x38000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_LI(r, i)		PPC_RAW_ADDI(r, 0, i)
+#define PPC_RAW_ADDIS(d, a, i)		(0x3c000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_ADDIC(d, a, i)		(0x30000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_ADDIC_DOT(d, a, i)	(0x34000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_LIS(r, i)		PPC_RAW_ADDIS(r, 0, i)
+#define PPC_RAW_STDX(r, base, b)	(0x7c00012a | ___PPC_RS(r) | ___PPC_RA(base) | ___PPC_RB(b))
+#define PPC_RAW_STDU(r, base, i)	(0xf8000001 | ___PPC_RS(r) | ___PPC_RA(base) | ((i) & 0xfffc))
+#define PPC_RAW_STW(r, base, i)		(0x90000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_STWU(r, base, i)	(0x94000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_STH(r, base, i)		(0xb0000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_STB(r, base, i)		(0x98000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_LBZ(r, base, i)		(0x88000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_LDX(r, base, b)		(0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+#define PPC_RAW_LHZ(r, base, i)		(0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_LHBRX(r, base, b)	(0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+#define PPC_RAW_LWBRX(r, base, b)	(0x7c00042c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+#define PPC_RAW_LDBRX(r, base, b)	(0x7c000428 | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+#define PPC_RAW_STWCX(s, a, b)		(0x7c00012d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_CMPWI(a, i)		(0x2c000000 | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_CMPDI(a, i)		(0x2c200000 | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_CMPW(a, b)		(0x7c000000 | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_CMPD(a, b)		(0x7c200000 | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_CMPLWI(a, i)		(0x28000000 | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_CMPLDI(a, i)		(0x28200000 | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_CMPLW(a, b)		(0x7c000040 | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_CMPLD(a, b)		(0x7c200040 | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_SUB(d, a, b)		(0x7c000050 | ___PPC_RT(d) | ___PPC_RB(a) | ___PPC_RA(b))
+#define PPC_RAW_SUBFC(d, a, b)		(0x7c000010 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_SUBFE(d, a, b)		(0x7c000110 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_SUBFIC(d, a, i)		(0x20000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_SUBFZE(d, a)		(0x7c000190 | ___PPC_RT(d) | ___PPC_RA(a))
+#define PPC_RAW_MULD(d, a, b)		(0x7c0001d2 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_MULW(d, a, b)		(0x7c0001d6 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_MULHWU(d, a, b)		(0x7c000016 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_MULI(d, a, i)		(0x1c000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_DIVWU(d, a, b)		(0x7c000396 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_DIVDU(d, a, b)		(0x7c000392 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_DIVDE(t, a, b)		(0x7c000352 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_DIVDE_DOT(t, a, b)	(0x7c000352 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
+#define PPC_RAW_DIVDEU(t, a, b)		(0x7c000312 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_DIVDEU_DOT(t, a, b)	(0x7c000312 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
+#define PPC_RAW_AND(d, a, b)		(0x7c000038 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
+#define PPC_RAW_ANDI(d, a, i)		(0x70000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_ANDIS(d, a, i)		(0x74000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_AND_DOT(d, a, b)	(0x7c000039 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
+#define PPC_RAW_OR(d, a, b)		(0x7c000378 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
+#define PPC_RAW_MR(d, a)		PPC_RAW_OR(d, a, a)
+#define PPC_RAW_ORI(d, a, i)		(0x60000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_ORIS(d, a, i)		(0x64000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_NOR(d, a, b)		(0x7c0000f8 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
+#define PPC_RAW_XOR(d, a, b)		(0x7c000278 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
+#define PPC_RAW_XORI(d, a, i)		(0x68000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_XORIS(d, a, i)		(0x6c000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_EXTSW(d, a)		(0x7c0007b4 | ___PPC_RA(d) | ___PPC_RS(a))
+#define PPC_RAW_SLW(d, a, s)		(0x7c000030 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
+#define PPC_RAW_SLD(d, a, s)		(0x7c000036 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
+#define PPC_RAW_SRW(d, a, s)		(0x7c000430 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
+#define PPC_RAW_SRAW(d, a, s)		(0x7c000630 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
+#define PPC_RAW_SRAWI(d, a, i)		(0x7c000670 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i))
+#define PPC_RAW_SRD(d, a, s)		(0x7c000436 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
+#define PPC_RAW_SRAD(d, a, s)		(0x7c000634 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
+#define PPC_RAW_SRADI(d, a, i)		(0x7c000674 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i))
+#define PPC_RAW_RLWINM(d, a, i, mb, me)	(0x54000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me))
+#define PPC_RAW_RLWINM_DOT(d, a, i, mb, me) \
+					(0x54000001 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me))
+#define PPC_RAW_RLWIMI(d, a, i, mb, me) (0x50000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me))
+#define PPC_RAW_RLDICL(d, a, i, mb)     (0x78000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_MB64(mb))
+#define PPC_RAW_RLDICR(d, a, i, me)     (0x78000004 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_ME64(me))
+
+/* slwi = rlwinm Rx, Ry, n, 0, 31-n */
+#define PPC_RAW_SLWI(d, a, i)		PPC_RAW_RLWINM(d, a, i, 0, 31-(i))
+/* srwi = rlwinm Rx, Ry, 32-n, n, 31 */
+#define PPC_RAW_SRWI(d, a, i)		PPC_RAW_RLWINM(d, a, 32-(i), i, 31)
+/* sldi = rldicr Rx, Ry, n, 63-n */
+#define PPC_RAW_SLDI(d, a, i)		PPC_RAW_RLDICR(d, a, i, 63-(i))
+/* sldi = rldicl Rx, Ry, 64-n, n */
+#define PPC_RAW_SRDI(d, a, i)		PPC_RAW_RLDICL(d, a, 64-(i), i)
+
+#define PPC_RAW_NEG(d, a)		(0x7c0000d0 | ___PPC_RT(d) | ___PPC_RA(a))
+
+#define PPC_RAW_MFSPR(d, spr)		(0x7c0002a6 | ___PPC_RT(d) | __PPC_SPR(spr))
+#define PPC_RAW_MTSPR(spr, d)		(0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr))
+#define PPC_RAW_EIEIO()			(0x7c0006ac)
+
+#define PPC_RAW_BRANCH(offset)		(0x48000000 | PPC_LI(offset))
+#define PPC_RAW_BL(offset)		(0x48000001 | PPC_LI(offset))
+#define PPC_RAW_TW(t0, a, b)		(0x7c000008 | ___PPC_RS(t0) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_TRAP()			PPC_RAW_TW(31, 0, 0)
+#define PPC_RAW_SETB(t, bfa)		(0x7c000100 | ___PPC_RT(t) | ___PPC_RA((bfa) << 2))
+
+/* Deal with instructions that older assemblers aren't aware of */
+#define	PPC_BCCTR_FLUSH		stringify_in_c(.long PPC_INST_BCCTR_FLUSH)
+#define	PPC_CP_ABORT		stringify_in_c(.long PPC_RAW_CP_ABORT)
+#define	PPC_COPY(a, b)		stringify_in_c(.long PPC_RAW_COPY(a, b))
+#define PPC_DARN(t, l)		stringify_in_c(.long PPC_RAW_DARN(t, l))
+#define	PPC_DCBAL(a, b)		stringify_in_c(.long PPC_RAW_DCBAL(a, b))
+#define	PPC_DCBZL(a, b)		stringify_in_c(.long PPC_RAW_DCBZL(a, b))
+#define	PPC_DIVDE(t, a, b)	stringify_in_c(.long PPC_RAW_DIVDE(t, a, b))
+#define	PPC_DIVDEU(t, a, b)	stringify_in_c(.long PPC_RAW_DIVDEU(t, a, b))
+#define PPC_DSSALL		stringify_in_c(.long PPC_INST_DSSALL)
+#define PPC_LQARX(t, a, b, eh)	stringify_in_c(.long PPC_RAW_LQARX(t, a, b, eh))
+#define PPC_STQCX(t, a, b)	stringify_in_c(.long PPC_RAW_STQCX(t, a, b))
+#define PPC_MADDHD(t, a, b, c)	stringify_in_c(.long PPC_RAW_MADDHD(t, a, b, c))
+#define PPC_MADDHDU(t, a, b, c)	stringify_in_c(.long PPC_RAW_MADDHDU(t, a, b, c))
+#define PPC_MADDLD(t, a, b, c)	stringify_in_c(.long PPC_RAW_MADDLD(t, a, b, c))
+#define PPC_MSGSND(b)		stringify_in_c(.long PPC_RAW_MSGSND(b))
+#define PPC_MSGSYNC		stringify_in_c(.long PPC_RAW_MSGSYNC)
+#define PPC_MSGCLR(b)		stringify_in_c(.long PPC_RAW_MSGCLR(b))
+#define PPC_MSGSNDP(b)		stringify_in_c(.long PPC_RAW_MSGSNDP(b))
+#define PPC_MSGCLRP(b)		stringify_in_c(.long PPC_RAW_MSGCLRP(b))
+#define PPC_PASTE(a, b)		stringify_in_c(.long PPC_RAW_PASTE(a, b))
+#define PPC_POPCNTB(a, s)	stringify_in_c(.long PPC_RAW_POPCNTB(a, s))
+#define PPC_POPCNTD(a, s)	stringify_in_c(.long PPC_RAW_POPCNTD(a, s))
+#define PPC_POPCNTW(a, s)	stringify_in_c(.long PPC_RAW_POPCNTW(a, s))
+#define PPC_RFCI		stringify_in_c(.long PPC_RAW_RFCI)
+#define PPC_RFDI		stringify_in_c(.long PPC_RAW_RFDI)
+#define PPC_RFMCI		stringify_in_c(.long PPC_RAW_RFMCI)
+#define PPC_TLBILX(t, a, b)	stringify_in_c(.long PPC_RAW_TLBILX(t, a, b))
+#define PPC_TLBILX_ALL(a, b)	PPC_TLBILX(0, a, b)
+#define PPC_TLBILX_PID(a, b)	PPC_TLBILX(1, a, b)
+#define PPC_TLBILX_LPID		stringify_in_c(.long PPC_RAW_TLBILX_LPID)
+#define PPC_TLBILX_VA(a, b)	PPC_TLBILX(3, a, b)
+#define PPC_WAIT_v203		stringify_in_c(.long PPC_RAW_WAIT_v203)
+#define PPC_WAIT(w, p)		stringify_in_c(.long PPC_RAW_WAIT(w, p))
+#define PPC_TLBIE(lp, a) 	stringify_in_c(.long PPC_RAW_TLBIE(lp, a))
+#define	PPC_TLBIE_5(rb, rs, ric, prs, r) \
+				stringify_in_c(.long PPC_RAW_TLBIE_5(rb, rs, ric, prs, r))
+#define	PPC_TLBIEL(rb,rs,ric,prs,r) \
+				stringify_in_c(.long PPC_RAW_TLBIEL(rb, rs, ric, prs, r))
+#define PPC_TLBIEL_v205(rb, l)	stringify_in_c(.long PPC_RAW_TLBIEL_v205(rb, l))
+#define PPC_TLBSRX_DOT(a, b)	stringify_in_c(.long PPC_RAW_TLBSRX_DOT(a, b))
+#define PPC_TLBIVAX(a, b)	stringify_in_c(.long PPC_RAW_TLBIVAX(a, b))
+
+#define PPC_ERATWE(s, a, w)	stringify_in_c(.long PPC_RAW_ERATWE(s, a, w))
+#define PPC_ERATRE(s, a, w)	stringify_in_c(.long PPC_RAW_ERATRE(a, a, w))
+#define PPC_ERATILX(t, a, b)	stringify_in_c(.long PPC_RAW_ERATILX(t, a, b))
+#define PPC_ERATIVAX(s, a, b)	stringify_in_c(.long PPC_RAW_ERATIVAX(s, a, b))
+#define PPC_ERATSX(t, a, w)	stringify_in_c(.long PPC_RAW_ERATSX(t, a, w))
+#define PPC_ERATSX_DOT(t, a, w)	stringify_in_c(.long PPC_RAW_ERATSX_DOT(t, a, w))
+#define PPC_SLBFEE_DOT(t, b)	stringify_in_c(.long PPC_RAW_SLBFEE_DOT(t, b))
+#define __PPC_SLBFEE_DOT(t, b)	stringify_in_c(.long __PPC_RAW_SLBFEE_DOT(t, b))
+#define PPC_ICBT(c, a, b)	stringify_in_c(.long PPC_RAW_ICBT(c, a, b))
+/* PASemi instructions */
+#define LBZCIX(t, a, b)		stringify_in_c(.long PPC_RAW_LBZCIX(t, a, b))
+#define STBCIX(s, a, b)		stringify_in_c(.long PPC_RAW_STBCIX(s, a, b))
+#define PPC_DCBFPS(a, b)	stringify_in_c(.long PPC_RAW_DCBFPS(a, b))
+#define PPC_DCBSTPS(a, b)	stringify_in_c(.long PPC_RAW_DCBSTPS(a, b))
+#define PPC_PHWSYNC		stringify_in_c(.long PPC_RAW_PHWSYNC)
+#define PPC_PLWSYNC		stringify_in_c(.long PPC_RAW_PLWSYNC)
+#define STXVD2X(s, a, b)	stringify_in_c(.long PPC_RAW_STXVD2X(s, a, b))
+#define LXVD2X(s, a, b)		stringify_in_c(.long PPC_RAW_LXVD2X(s, a, b))
+#define MFVRD(a, t)		stringify_in_c(.long PPC_RAW_MFVRD(a, t))
+#define MTVRD(t, a)		stringify_in_c(.long PPC_RAW_MTVRD(t, a))
+#define VPMSUMW(t, a, b)	stringify_in_c(.long PPC_RAW_VPMSUMW(t, a, b))
+#define VPMSUMD(t, a, b)	stringify_in_c(.long PPC_RAW_VPMSUMD(t, a, b))
+#define XXLOR(t, a, b)		stringify_in_c(.long PPC_RAW_XXLOR(t, a, b))
+#define XXSWAPD(t, a)		stringify_in_c(.long PPC_RAW_XXSWAPD(t, a))
+#define XVCPSGNDP(t, a, b)	stringify_in_c(.long (PPC_RAW_XVCPSGNDP(t, a, b)))
+
+#define VPERMXOR(vrt, vra, vrb, vrc)				\
+	stringify_in_c(.long (PPC_RAW_VPERMXOR(vrt, vra, vrb, vrc)))
+
+#define PPC_NAP			stringify_in_c(.long PPC_RAW_NAP)
+#define PPC_SLEEP		stringify_in_c(.long PPC_RAW_SLEEP)
+#define PPC_WINKLE		stringify_in_c(.long PPC_RAW_WINKLE)
+
+#define PPC_STOP		stringify_in_c(.long PPC_RAW_STOP)
+
+/* BHRB instructions */
+#define PPC_CLRBHRB		stringify_in_c(.long PPC_RAW_CLRBHRB)
+#define PPC_MFBHRBE(r, n)	stringify_in_c(.long PPC_RAW_MFBHRBE(r, n))
+
+/* Transactional memory instructions */
+#define TRECHKPT		stringify_in_c(.long PPC_RAW_TRECHKPT)
+#define TRECLAIM(r)		stringify_in_c(.long PPC_RAW_TRECLAIM(r))
+#define TABORT(r)		stringify_in_c(.long PPC_RAW_TABORT(r))
+
+/* book3e thread control instructions */
+#define MTTMR(tmr, r)		stringify_in_c(.long PPC_RAW_MTTMR(tmr, r))
+#define MFTMR(tmr, r)		stringify_in_c(.long PPC_RAW_MFTMR(tmr, r))
+
+/* Coprocessor instructions */
+#define PPC_ICSWX(s, a, b)	stringify_in_c(.long PPC_RAW_ICSWX(s, a, b))
+#define PPC_ICSWEPX(s, a, b)	stringify_in_c(.long PPC_RAW_ICSWEPX(s, a, b))
+
+#define PPC_SLBIA(IH)	stringify_in_c(.long PPC_RAW_SLBIA(IH))
+
+/*
+ * These may only be used on ISA v3.0 or later (aka. CPU_FTR_ARCH_300, radix
+ * implies CPU_FTR_ARCH_300). USER/GUEST invalidates may only be used by radix
+ * mode (on HPT these would also invalidate various SLBEs which may not be
+ * desired).
+ */
+#define PPC_ISA_3_0_INVALIDATE_ERAT	PPC_SLBIA(7)
+#define PPC_RADIX_INVALIDATE_ERAT_USER	PPC_SLBIA(3)
+#define PPC_RADIX_INVALIDATE_ERAT_GUEST	PPC_SLBIA(6)
+
+#define VCMPEQUD_RC(vrt, vra, vrb)	stringify_in_c(.long PPC_RAW_VCMPEQUD_RC(vrt, vra, vrb))
+
+#define VCMPEQUB_RC(vrt, vra, vrb)	stringify_in_c(.long PPC_RAW_VCMPEQUB_RC(vrt, vra, vrb))
+
+#endif /* _ASM_POWERPC_PPC_OPCODE_H */
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
new file mode 100644
index 0000000000..d9fcff5750
--- /dev/null
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * c 2001 PPC 64 Team, IBM Corp
+ */
+#ifndef _ASM_POWERPC_PPC_PCI_H
+#define _ASM_POWERPC_PPC_PCI_H
+#ifdef __KERNEL__
+
+#ifdef CONFIG_PCI
+
+#include <linux/pci.h>
+#include <asm/pci-bridge.h>
+
+extern unsigned long isa_io_base;
+
+extern struct list_head hose_list;
+
+extern struct pci_dev *isa_bridge_pcidev;	/* may be NULL if no ISA bus */
+
+/** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */
+#define BUID_HI(buid) upper_32_bits(buid)
+#define BUID_LO(buid) lower_32_bits(buid)
+
+/* PCI device_node operations */
+struct device_node;
+struct pci_dn;
+
+void *pci_traverse_device_nodes(struct device_node *start,
+				void *(*fn)(struct device_node *, void *),
+				void *data);
+extern void pci_devs_phb_init_dynamic(struct pci_controller *phb);
+
+/* From rtas_pci.h */
+extern void init_pci_config_tokens (void);
+extern unsigned long get_phb_buid (struct device_node *);
+extern int rtas_setup_phb(struct pci_controller *phb);
+
+#ifdef CONFIG_EEH
+
+void eeh_addr_cache_insert_dev(struct pci_dev *dev);
+void eeh_addr_cache_rmv_dev(struct pci_dev *dev);
+struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr);
+void eeh_slot_error_detail(struct eeh_pe *pe, int severity);
+int eeh_pci_enable(struct eeh_pe *pe, int function);
+int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed);
+void eeh_save_bars(struct eeh_dev *edev);
+int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
+int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
+void eeh_pe_state_mark(struct eeh_pe *pe, int state);
+void eeh_pe_mark_isolated(struct eeh_pe *pe);
+void eeh_pe_state_clear(struct eeh_pe *pe, int state, bool include_passed);
+void eeh_pe_state_mark_with_cfg(struct eeh_pe *pe, int state);
+void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode);
+
+void eeh_sysfs_add_device(struct pci_dev *pdev);
+void eeh_sysfs_remove_device(struct pci_dev *pdev);
+
+#endif /* CONFIG_EEH */
+
+#ifdef CONFIG_FSL_ULI1575
+void __init uli_init(void);
+#endif /* CONFIG_FSL_ULI1575 */
+
+#define PCI_BUSNO(bdfn) ((bdfn >> 8) & 0xff)
+
+#else /* CONFIG_PCI */
+static inline void init_pci_config_tokens(void) { }
+#endif /* !CONFIG_PCI */
+
+#if !defined(CONFIG_PCI) || !defined(CONFIG_FSL_ULI1575)
+static inline void __init uli_init(void) {}
+#endif /* !defined(CONFIG_PCI) || !defined(CONFIG_FSL_ULI1575) */
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_PPC_PCI_H */
diff --git a/arch/powerpc/include/asm/ppc4xx.h b/arch/powerpc/include/asm/ppc4xx.h
new file mode 100644
index 0000000000..b37119e485
--- /dev/null
+++ b/arch/powerpc/include/asm/ppc4xx.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * PPC4xx Prototypes and definitions
+ *
+ * Copyright 2008 DENX Software Engineering, Stefan Roese <sr@denx.de>
+ */
+
+#ifndef __ASM_POWERPC_PPC4xx_H__
+#define __ASM_POWERPC_PPC4xx_H__
+
+extern void __noreturn ppc4xx_reset_system(char *cmd);
+
+#endif /* __ASM_POWERPC_PPC4xx_H__ */
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
new file mode 100644
index 0000000000..e7792aa135
--- /dev/null
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -0,0 +1,886 @@
+/*
+ * Copyright (C) 1995-1999 Gary Thomas, Paul Mackerras, Cort Dougan.
+ */
+#ifndef _ASM_POWERPC_PPC_ASM_H
+#define _ASM_POWERPC_PPC_ASM_H
+
+#include <linux/stringify.h>
+#include <asm/asm-compat.h>
+#include <asm/processor.h>
+#include <asm/ppc-opcode.h>
+#include <asm/firmware.h>
+#include <asm/feature-fixups.h>
+#include <asm/extable.h>
+
+#ifdef __ASSEMBLY__
+
+#define SZL			(BITS_PER_LONG/8)
+
+/*
+ * This expands to a sequence of operations with reg incrementing from
+ * start to end inclusive, of this form:
+ *
+ *   op  reg, (offset + (width * reg))(base)
+ *
+ * Note that offset is not the offset of the first operation unless start
+ * is zero (or width is zero).
+ */
+.macro OP_REGS op, width, start, end, base, offset
+	.Lreg=\start
+	.rept (\end - \start + 1)
+	\op	.Lreg, \offset + \width * .Lreg(\base)
+	.Lreg=.Lreg+1
+	.endr
+.endm
+
+/*
+ * This expands to a sequence of register clears for regs start to end
+ * inclusive, of the form:
+ *
+ *   li rN, 0
+ */
+.macro ZEROIZE_REGS start, end
+	.Lreg=\start
+	.rept (\end - \start + 1)
+	li	.Lreg, 0
+	.Lreg=.Lreg+1
+	.endr
+.endm
+
+/*
+ * Macros for storing registers into and loading registers from
+ * exception frames.
+ */
+#ifdef __powerpc64__
+#define SAVE_GPRS(start, end, base)	OP_REGS std, 8, start, end, base, GPR0
+#define REST_GPRS(start, end, base)	OP_REGS ld, 8, start, end, base, GPR0
+#define SAVE_NVGPRS(base)		SAVE_GPRS(14, 31, base)
+#define REST_NVGPRS(base)		REST_GPRS(14, 31, base)
+#else
+#define SAVE_GPRS(start, end, base)	OP_REGS stw, 4, start, end, base, GPR0
+#define REST_GPRS(start, end, base)	OP_REGS lwz, 4, start, end, base, GPR0
+#define SAVE_NVGPRS(base)		SAVE_GPRS(13, 31, base)
+#define REST_NVGPRS(base)		REST_GPRS(13, 31, base)
+#endif
+
+#define	ZEROIZE_GPRS(start, end)	ZEROIZE_REGS start, end
+#ifdef __powerpc64__
+#define	ZEROIZE_NVGPRS()		ZEROIZE_GPRS(14, 31)
+#else
+#define	ZEROIZE_NVGPRS()		ZEROIZE_GPRS(13, 31)
+#endif
+#define	ZEROIZE_GPR(n)			ZEROIZE_GPRS(n, n)
+
+#define SAVE_GPR(n, base)		SAVE_GPRS(n, n, base)
+#define REST_GPR(n, base)		REST_GPRS(n, n, base)
+
+/* macros for handling user register sanitisation */
+#ifdef CONFIG_INTERRUPT_SANITIZE_REGISTERS
+#define SANITIZE_SYSCALL_GPRS()			ZEROIZE_GPR(0);		\
+						ZEROIZE_GPRS(5, 12);	\
+						ZEROIZE_NVGPRS()
+#define SANITIZE_GPR(n)				ZEROIZE_GPR(n)
+#define SANITIZE_GPRS(start, end)		ZEROIZE_GPRS(start, end)
+#define SANITIZE_NVGPRS()			ZEROIZE_NVGPRS()
+#define SANITIZE_RESTORE_NVGPRS()		REST_NVGPRS(r1)
+#define HANDLER_RESTORE_NVGPRS()
+#else
+#define SANITIZE_SYSCALL_GPRS()
+#define SANITIZE_GPR(n)
+#define SANITIZE_GPRS(start, end)
+#define SANITIZE_NVGPRS()
+#define SANITIZE_RESTORE_NVGPRS()
+#define HANDLER_RESTORE_NVGPRS()		REST_NVGPRS(r1)
+#endif /* CONFIG_INTERRUPT_SANITIZE_REGISTERS */
+
+#define SAVE_FPR(n, base)	stfd	n,8*TS_FPRWIDTH*(n)(base)
+#define SAVE_2FPRS(n, base)	SAVE_FPR(n, base); SAVE_FPR(n+1, base)
+#define SAVE_4FPRS(n, base)	SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base)
+#define SAVE_8FPRS(n, base)	SAVE_4FPRS(n, base); SAVE_4FPRS(n+4, base)
+#define SAVE_16FPRS(n, base)	SAVE_8FPRS(n, base); SAVE_8FPRS(n+8, base)
+#define SAVE_32FPRS(n, base)	SAVE_16FPRS(n, base); SAVE_16FPRS(n+16, base)
+#define REST_FPR(n, base)	lfd	n,8*TS_FPRWIDTH*(n)(base)
+#define REST_2FPRS(n, base)	REST_FPR(n, base); REST_FPR(n+1, base)
+#define REST_4FPRS(n, base)	REST_2FPRS(n, base); REST_2FPRS(n+2, base)
+#define REST_8FPRS(n, base)	REST_4FPRS(n, base); REST_4FPRS(n+4, base)
+#define REST_16FPRS(n, base)	REST_8FPRS(n, base); REST_8FPRS(n+8, base)
+#define REST_32FPRS(n, base)	REST_16FPRS(n, base); REST_16FPRS(n+16, base)
+
+#define SAVE_VR(n,b,base)	li b,16*(n);  stvx n,base,b
+#define SAVE_2VRS(n,b,base)	SAVE_VR(n,b,base); SAVE_VR(n+1,b,base)
+#define SAVE_4VRS(n,b,base)	SAVE_2VRS(n,b,base); SAVE_2VRS(n+2,b,base)
+#define SAVE_8VRS(n,b,base)	SAVE_4VRS(n,b,base); SAVE_4VRS(n+4,b,base)
+#define SAVE_16VRS(n,b,base)	SAVE_8VRS(n,b,base); SAVE_8VRS(n+8,b,base)
+#define SAVE_32VRS(n,b,base)	SAVE_16VRS(n,b,base); SAVE_16VRS(n+16,b,base)
+#define REST_VR(n,b,base)	li b,16*(n); lvx n,base,b
+#define REST_2VRS(n,b,base)	REST_VR(n,b,base); REST_VR(n+1,b,base)
+#define REST_4VRS(n,b,base)	REST_2VRS(n,b,base); REST_2VRS(n+2,b,base)
+#define REST_8VRS(n,b,base)	REST_4VRS(n,b,base); REST_4VRS(n+4,b,base)
+#define REST_16VRS(n,b,base)	REST_8VRS(n,b,base); REST_8VRS(n+8,b,base)
+#define REST_32VRS(n,b,base)	REST_16VRS(n,b,base); REST_16VRS(n+16,b,base)
+
+#ifdef __BIG_ENDIAN__
+#define STXVD2X_ROT(n,b,base)		STXVD2X(n,b,base)
+#define LXVD2X_ROT(n,b,base)		LXVD2X(n,b,base)
+#else
+#define STXVD2X_ROT(n,b,base)		XXSWAPD(n,n);		\
+					STXVD2X(n,b,base);	\
+					XXSWAPD(n,n)
+
+#define LXVD2X_ROT(n,b,base)		LXVD2X(n,b,base);	\
+					XXSWAPD(n,n)
+#endif
+/* Save the lower 32 VSRs in the thread VSR region */
+#define SAVE_VSR(n,b,base)	li b,16*(n);  STXVD2X_ROT(n,R##base,R##b)
+#define SAVE_2VSRS(n,b,base)	SAVE_VSR(n,b,base); SAVE_VSR(n+1,b,base)
+#define SAVE_4VSRS(n,b,base)	SAVE_2VSRS(n,b,base); SAVE_2VSRS(n+2,b,base)
+#define SAVE_8VSRS(n,b,base)	SAVE_4VSRS(n,b,base); SAVE_4VSRS(n+4,b,base)
+#define SAVE_16VSRS(n,b,base)	SAVE_8VSRS(n,b,base); SAVE_8VSRS(n+8,b,base)
+#define SAVE_32VSRS(n,b,base)	SAVE_16VSRS(n,b,base); SAVE_16VSRS(n+16,b,base)
+#define REST_VSR(n,b,base)	li b,16*(n); LXVD2X_ROT(n,R##base,R##b)
+#define REST_2VSRS(n,b,base)	REST_VSR(n,b,base); REST_VSR(n+1,b,base)
+#define REST_4VSRS(n,b,base)	REST_2VSRS(n,b,base); REST_2VSRS(n+2,b,base)
+#define REST_8VSRS(n,b,base)	REST_4VSRS(n,b,base); REST_4VSRS(n+4,b,base)
+#define REST_16VSRS(n,b,base)	REST_8VSRS(n,b,base); REST_8VSRS(n+8,b,base)
+#define REST_32VSRS(n,b,base)	REST_16VSRS(n,b,base); REST_16VSRS(n+16,b,base)
+
+/*
+ * b = base register for addressing, o = base offset from register of 1st EVR
+ * n = first EVR, s = scratch
+ */
+#define SAVE_EVR(n,s,b,o)	evmergehi s,s,n; stw s,o+4*(n)(b)
+#define SAVE_2EVRS(n,s,b,o)	SAVE_EVR(n,s,b,o); SAVE_EVR(n+1,s,b,o)
+#define SAVE_4EVRS(n,s,b,o)	SAVE_2EVRS(n,s,b,o); SAVE_2EVRS(n+2,s,b,o)
+#define SAVE_8EVRS(n,s,b,o)	SAVE_4EVRS(n,s,b,o); SAVE_4EVRS(n+4,s,b,o)
+#define SAVE_16EVRS(n,s,b,o)	SAVE_8EVRS(n,s,b,o); SAVE_8EVRS(n+8,s,b,o)
+#define SAVE_32EVRS(n,s,b,o)	SAVE_16EVRS(n,s,b,o); SAVE_16EVRS(n+16,s,b,o)
+#define REST_EVR(n,s,b,o)	lwz s,o+4*(n)(b); evmergelo n,s,n
+#define REST_2EVRS(n,s,b,o)	REST_EVR(n,s,b,o); REST_EVR(n+1,s,b,o)
+#define REST_4EVRS(n,s,b,o)	REST_2EVRS(n,s,b,o); REST_2EVRS(n+2,s,b,o)
+#define REST_8EVRS(n,s,b,o)	REST_4EVRS(n,s,b,o); REST_4EVRS(n+4,s,b,o)
+#define REST_16EVRS(n,s,b,o)	REST_8EVRS(n,s,b,o); REST_8EVRS(n+8,s,b,o)
+#define REST_32EVRS(n,s,b,o)	REST_16EVRS(n,s,b,o); REST_16EVRS(n+16,s,b,o)
+
+/* Macros to adjust thread priority for hardware multithreading */
+#define HMT_VERY_LOW	or	31,31,31	# very low priority
+#define HMT_LOW		or	1,1,1
+#define HMT_MEDIUM_LOW  or	6,6,6		# medium low priority
+#define HMT_MEDIUM	or	2,2,2
+#define HMT_MEDIUM_HIGH or	5,5,5		# medium high priority
+#define HMT_HIGH	or	3,3,3
+#define HMT_EXTRA_HIGH	or	7,7,7		# power7 only
+
+#ifdef CONFIG_PPC64
+#define ULONG_SIZE 	8
+#else
+#define ULONG_SIZE	4
+#endif
+#define __VCPU_GPR(n)	(VCPU_GPRS + (n * ULONG_SIZE))
+#define VCPU_GPR(n)	__VCPU_GPR(__REG_##n)
+
+#ifdef __KERNEL__
+
+/*
+ * Used to name C functions called from asm
+ */
+#ifdef CONFIG_PPC_KERNEL_PCREL
+#define CFUNC(name) name@notoc
+#else
+#define CFUNC(name) name
+#endif
+
+/*
+ * We use __powerpc64__ here because we want the compat VDSO to use the 32-bit
+ * version below in the else case of the ifdef.
+ */
+#ifdef __powerpc64__
+
+#define STACKFRAMESIZE 256
+#define __STK_REG(i)   (112 + ((i)-14)*8)
+#define STK_REG(i)     __STK_REG(__REG_##i)
+
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+#define STK_GOT		24
+#define __STK_PARAM(i)	(32 + ((i)-3)*8)
+#else
+#define STK_GOT		40
+#define __STK_PARAM(i)	(48 + ((i)-3)*8)
+#endif
+#define STK_PARAM(i)	__STK_PARAM(__REG_##i)
+
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+
+#define _GLOBAL(name) \
+	.align 2 ; \
+	.type name,@function; \
+	.globl name; \
+name:
+
+#ifdef CONFIG_PPC_KERNEL_PCREL
+#define _GLOBAL_TOC _GLOBAL
+#else
+#define _GLOBAL_TOC(name) \
+	.align 2 ; \
+	.type name,@function; \
+	.globl name; \
+name: \
+0:	addis r2,r12,(.TOC.-0b)@ha; \
+	addi r2,r2,(.TOC.-0b)@l; \
+	.localentry name,.-name
+#endif
+
+#define DOTSYM(a)	a
+
+#else
+
+#define XGLUE(a,b) a##b
+#define GLUE(a,b) XGLUE(a,b)
+
+#define _GLOBAL(name) \
+	.align 2 ; \
+	.globl name; \
+	.globl GLUE(.,name); \
+	.pushsection ".opd","aw"; \
+name: \
+	.quad GLUE(.,name); \
+	.quad .TOC.@tocbase; \
+	.quad 0; \
+	.popsection; \
+	.type GLUE(.,name),@function; \
+GLUE(.,name):
+
+#define _GLOBAL_TOC(name) _GLOBAL(name)
+
+#define DOTSYM(a)	GLUE(.,a)
+
+#endif
+
+#else /* 32-bit */
+
+#define _GLOBAL(n)	\
+	.globl n;	\
+n:
+
+#define _GLOBAL_TOC(name) _GLOBAL(name)
+
+#define DOTSYM(a)	a
+
+#endif
+
+/*
+ * __kprobes (the C annotation) puts the symbol into the .kprobes.text
+ * section, which gets emitted at the end of regular text.
+ *
+ * _ASM_NOKPROBE_SYMBOL and NOKPROBE_SYMBOL just adds the symbol to
+ * a blacklist. The former is for core kprobe functions/data, the
+ * latter is for those that incdentially must be excluded from probing
+ * and allows them to be linked at more optimal location within text.
+ */
+#ifdef CONFIG_KPROBES
+#define _ASM_NOKPROBE_SYMBOL(entry)			\
+	.pushsection "_kprobe_blacklist","aw";		\
+	PPC_LONG (entry) ;				\
+	.popsection
+#else
+#define _ASM_NOKPROBE_SYMBOL(entry)
+#endif
+
+#define FUNC_START(name)	_GLOBAL(name)
+#define FUNC_END(name)
+
+/* 
+ * LOAD_REG_IMMEDIATE(rn, expr)
+ *   Loads the value of the constant expression 'expr' into register 'rn'
+ *   using immediate instructions only.  Use this when it's important not
+ *   to reference other data (i.e. on ppc64 when the TOC pointer is not
+ *   valid) and when 'expr' is a constant or absolute address.
+ *
+ * LOAD_REG_ADDR(rn, name)
+ *   Loads the address of label 'name' into register 'rn'.  Use this when
+ *   you don't particularly need immediate instructions only, but you need
+ *   the whole address in one register (e.g. it's a structure address and
+ *   you want to access various offsets within it).  On ppc32 this is
+ *   identical to LOAD_REG_IMMEDIATE.
+ *
+ * LOAD_REG_ADDR_PIC(rn, name)
+ *   Loads the address of label 'name' into register 'run'. Use this when
+ *   the kernel doesn't run at the linked or relocated address. Please
+ *   note that this macro will clobber the lr register.
+ *
+ * LOAD_REG_ADDRBASE(rn, name)
+ * ADDROFF(name)
+ *   LOAD_REG_ADDRBASE loads part of the address of label 'name' into
+ *   register 'rn'.  ADDROFF(name) returns the remainder of the address as
+ *   a constant expression.  ADDROFF(name) is a signed expression < 16 bits
+ *   in size, so is suitable for use directly as an offset in load and store
+ *   instructions.  Use this when loading/storing a single word or less as:
+ *      LOAD_REG_ADDRBASE(rX, name)
+ *      ld	rY,ADDROFF(name)(rX)
+ */
+
+/* Be careful, this will clobber the lr register. */
+#define LOAD_REG_ADDR_PIC(reg, name)		\
+	bcl	20,31,$+4;			\
+0:	mflr	reg;				\
+	addis	reg,reg,(name - 0b)@ha;		\
+	addi	reg,reg,(name - 0b)@l;
+
+#if defined(__powerpc64__) && defined(HAVE_AS_ATHIGH)
+#define __AS_ATHIGH high
+#else
+#define __AS_ATHIGH h
+#endif
+
+.macro __LOAD_REG_IMMEDIATE_32 r, x
+	.if (\x) >= 0x8000 || (\x) < -0x8000
+		lis \r, (\x)@__AS_ATHIGH
+		.if (\x) & 0xffff != 0
+			ori \r, \r, (\x)@l
+		.endif
+	.else
+		li \r, (\x)@l
+	.endif
+.endm
+
+.macro __LOAD_REG_IMMEDIATE r, x
+	.if (\x) >= 0x80000000 || (\x) < -0x80000000
+		__LOAD_REG_IMMEDIATE_32 \r, (\x) >> 32
+		sldi	\r, \r, 32
+		.if (\x) & 0xffff0000 != 0
+			oris \r, \r, (\x)@__AS_ATHIGH
+		.endif
+		.if (\x) & 0xffff != 0
+			ori \r, \r, (\x)@l
+		.endif
+	.else
+		__LOAD_REG_IMMEDIATE_32 \r, \x
+	.endif
+.endm
+
+#ifdef __powerpc64__
+
+#ifdef CONFIG_PPC_KERNEL_PCREL
+#define __LOAD_PACA_TOC(reg)			\
+	li	reg,-1
+#else
+#define __LOAD_PACA_TOC(reg)			\
+	ld	reg,PACATOC(r13)
+#endif
+
+#define LOAD_PACA_TOC()				\
+	__LOAD_PACA_TOC(r2)
+
+#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE reg, expr
+
+#define LOAD_REG_IMMEDIATE_SYM(reg, tmp, expr)	\
+	lis	tmp, (expr)@highest;		\
+	lis	reg, (expr)@__AS_ATHIGH;	\
+	ori	tmp, tmp, (expr)@higher;	\
+	ori	reg, reg, (expr)@l;		\
+	rldimi	reg, tmp, 32, 0
+
+#ifdef CONFIG_PPC_KERNEL_PCREL
+#define LOAD_REG_ADDR(reg,name)			\
+	pla	reg,name@pcrel
+
+#else
+#define LOAD_REG_ADDR(reg,name)			\
+	addis	reg,r2,name@toc@ha;		\
+	addi	reg,reg,name@toc@l
+#endif
+
+#ifdef CONFIG_PPC_BOOK3E_64
+/*
+ * This is used in register-constrained interrupt handlers. Not to be used
+ * by BOOK3S. ld complains with "got/toc optimization is not supported" if r2
+ * is not used for the TOC offset, so use @got(tocreg). If the interrupt
+ * handlers saved r2 instead, LOAD_REG_ADDR could be used.
+ */
+#define LOAD_REG_ADDR_ALTTOC(reg,tocreg,name)	\
+	ld	reg,name@got(tocreg)
+#endif
+
+#define LOAD_REG_ADDRBASE(reg,name)	LOAD_REG_ADDR(reg,name)
+#define ADDROFF(name)			0
+
+/* offsets for stack frame layout */
+#define LRSAVE	16
+
+/*
+ * GCC stack frames follow a different pattern on 32 vs 64. This can be used
+ * to make asm frames be consistent with C.
+ */
+#define PPC_CREATE_STACK_FRAME(size)			\
+	mflr		r0;				\
+	std		r0,16(r1);			\
+	stdu		r1,-(size)(r1)
+
+#else /* 32-bit */
+
+#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE_32 reg, expr
+
+#define LOAD_REG_IMMEDIATE_SYM(reg,expr)		\
+	lis	reg,(expr)@ha;		\
+	addi	reg,reg,(expr)@l;
+
+#define LOAD_REG_ADDR(reg,name)		LOAD_REG_IMMEDIATE_SYM(reg, name)
+
+#define LOAD_REG_ADDRBASE(reg, name)	lis	reg,name@ha
+#define ADDROFF(name)			name@l
+
+/* offsets for stack frame layout */
+#define LRSAVE	4
+
+#define PPC_CREATE_STACK_FRAME(size)			\
+	stwu		r1,-(size)(r1);			\
+	mflr		r0;				\
+	stw		r0,(size+4)(r1)
+
+#endif
+
+/* various errata or part fixups */
+#if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_E500)
+#define MFTB(dest)			\
+90:	mfspr dest, SPRN_TBRL;		\
+BEGIN_FTR_SECTION_NESTED(96);		\
+	cmpwi dest,0;			\
+	beq-  90b;			\
+END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
+#else
+#define MFTB(dest)			MFTBL(dest)
+#endif
+
+#ifdef CONFIG_PPC_8xx
+#define MFTBL(dest)			mftb dest
+#define MFTBU(dest)			mftbu dest
+#else
+#define MFTBL(dest)			mfspr dest, SPRN_TBRL
+#define MFTBU(dest)			mfspr dest, SPRN_TBRU
+#endif
+
+#ifndef CONFIG_SMP
+#define TLBSYNC
+#else
+#define TLBSYNC		tlbsync; sync
+#endif
+
+#ifdef CONFIG_PPC64
+#define MTOCRF(FXM, RS)			\
+	BEGIN_FTR_SECTION_NESTED(848);	\
+	mtcrf	(FXM), RS;		\
+	FTR_SECTION_ELSE_NESTED(848);	\
+	mtocrf (FXM), RS;		\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_NOEXECUTE, 848)
+#endif
+
+/*
+ * This instruction is not implemented on the PPC 603 or 601; however, on
+ * the 403GCX and 405GP tlbia IS defined and tlbie is not.
+ * All of these instructions exist in the 8xx, they have magical powers,
+ * and they must be used.
+ */
+
+#if !defined(CONFIG_4xx) && !defined(CONFIG_PPC_8xx)
+#define tlbia					\
+	li	r4,1024;			\
+	mtctr	r4;				\
+	lis	r4,KERNELBASE@h;		\
+	.machine push;				\
+	.machine "power4";			\
+0:	tlbie	r4;				\
+	.machine pop;				\
+	addi	r4,r4,0x1000;			\
+	bdnz	0b
+#endif
+
+
+#ifdef CONFIG_IBM440EP_ERR42
+#define PPC440EP_ERR42 isync
+#else
+#define PPC440EP_ERR42
+#endif
+
+/* The following stops all load and store data streams associated with stream
+ * ID (ie. streams created explicitly).  The embedded and server mnemonics for
+ * dcbt are different so this must only be used for server.
+ */
+#define DCBT_BOOK3S_STOP_ALL_STREAM_IDS(scratch)	\
+       lis     scratch,0x60000000@h;			\
+       dcbt    0,scratch,0b01010
+
+/*
+ * toreal/fromreal/tophys/tovirt macros. 32-bit BookE makes them
+ * keep the address intact to be compatible with code shared with
+ * 32-bit classic.
+ *
+ * On the other hand, I find it useful to have them behave as expected
+ * by their name (ie always do the addition) on 64-bit BookE
+ */
+#if defined(CONFIG_BOOKE) && !defined(CONFIG_PPC64)
+#define toreal(rd)
+#define fromreal(rd)
+
+/*
+ * We use addis to ensure compatibility with the "classic" ppc versions of
+ * these macros, which use rs = 0 to get the tophys offset in rd, rather than
+ * converting the address in r0, and so this version has to do that too
+ * (i.e. set register rd to 0 when rs == 0).
+ */
+#define tophys(rd,rs)				\
+	addis	rd,rs,0
+
+#define tovirt(rd,rs)				\
+	addis	rd,rs,0
+
+#elif defined(CONFIG_PPC64)
+#define toreal(rd)		/* we can access c000... in real mode */
+#define fromreal(rd)
+
+#define tophys(rd,rs)                           \
+	clrldi	rd,rs,2
+
+#define tovirt(rd,rs)                           \
+	rotldi	rd,rs,16;			\
+	ori	rd,rd,((KERNELBASE>>48)&0xFFFF);\
+	rotldi	rd,rd,48
+#else
+#define toreal(rd)	tophys(rd,rd)
+#define fromreal(rd)	tovirt(rd,rd)
+
+#define tophys(rd, rs)	addis	rd, rs, -PAGE_OFFSET@h
+#define tovirt(rd, rs)	addis	rd, rs, PAGE_OFFSET@h
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#define MTMSRD(r)	mtmsrd	r
+#define MTMSR_EERI(reg)	mtmsrd	reg,1
+#else
+#define MTMSRD(r)	mtmsr	r
+#define MTMSR_EERI(reg)	mtmsr	reg
+#endif
+
+#endif /* __KERNEL__ */
+
+/* The boring bits... */
+
+/* Condition Register Bit Fields */
+
+#define	cr0	0
+#define	cr1	1
+#define	cr2	2
+#define	cr3	3
+#define	cr4	4
+#define	cr5	5
+#define	cr6	6
+#define	cr7	7
+
+
+/*
+ * General Purpose Registers (GPRs)
+ *
+ * The lower case r0-r31 should be used in preference to the upper
+ * case R0-R31 as they provide more error checking in the assembler.
+ * Use R0-31 only when really nessesary.
+ */
+
+#define	r0	%r0
+#define	r1	%r1
+#define	r2	%r2
+#define	r3	%r3
+#define	r4	%r4
+#define	r5	%r5
+#define	r6	%r6
+#define	r7	%r7
+#define	r8	%r8
+#define	r9	%r9
+#define	r10	%r10
+#define	r11	%r11
+#define	r12	%r12
+#define	r13	%r13
+#define	r14	%r14
+#define	r15	%r15
+#define	r16	%r16
+#define	r17	%r17
+#define	r18	%r18
+#define	r19	%r19
+#define	r20	%r20
+#define	r21	%r21
+#define	r22	%r22
+#define	r23	%r23
+#define	r24	%r24
+#define	r25	%r25
+#define	r26	%r26
+#define	r27	%r27
+#define	r28	%r28
+#define	r29	%r29
+#define	r30	%r30
+#define	r31	%r31
+
+
+/* Floating Point Registers (FPRs) */
+
+#define	fr0	0
+#define	fr1	1
+#define	fr2	2
+#define	fr3	3
+#define	fr4	4
+#define	fr5	5
+#define	fr6	6
+#define	fr7	7
+#define	fr8	8
+#define	fr9	9
+#define	fr10	10
+#define	fr11	11
+#define	fr12	12
+#define	fr13	13
+#define	fr14	14
+#define	fr15	15
+#define	fr16	16
+#define	fr17	17
+#define	fr18	18
+#define	fr19	19
+#define	fr20	20
+#define	fr21	21
+#define	fr22	22
+#define	fr23	23
+#define	fr24	24
+#define	fr25	25
+#define	fr26	26
+#define	fr27	27
+#define	fr28	28
+#define	fr29	29
+#define	fr30	30
+#define	fr31	31
+
+/* AltiVec Registers (VPRs) */
+
+#define	v0	0
+#define	v1	1
+#define	v2	2
+#define	v3	3
+#define	v4	4
+#define	v5	5
+#define	v6	6
+#define	v7	7
+#define	v8	8
+#define	v9	9
+#define	v10	10
+#define	v11	11
+#define	v12	12
+#define	v13	13
+#define	v14	14
+#define	v15	15
+#define	v16	16
+#define	v17	17
+#define	v18	18
+#define	v19	19
+#define	v20	20
+#define	v21	21
+#define	v22	22
+#define	v23	23
+#define	v24	24
+#define	v25	25
+#define	v26	26
+#define	v27	27
+#define	v28	28
+#define	v29	29
+#define	v30	30
+#define	v31	31
+
+/* VSX Registers (VSRs) */
+
+#define	vs0	0
+#define	vs1	1
+#define	vs2	2
+#define	vs3	3
+#define	vs4	4
+#define	vs5	5
+#define	vs6	6
+#define	vs7	7
+#define	vs8	8
+#define	vs9	9
+#define	vs10	10
+#define	vs11	11
+#define	vs12	12
+#define	vs13	13
+#define	vs14	14
+#define	vs15	15
+#define	vs16	16
+#define	vs17	17
+#define	vs18	18
+#define	vs19	19
+#define	vs20	20
+#define	vs21	21
+#define	vs22	22
+#define	vs23	23
+#define	vs24	24
+#define	vs25	25
+#define	vs26	26
+#define	vs27	27
+#define	vs28	28
+#define	vs29	29
+#define	vs30	30
+#define	vs31	31
+#define	vs32	32
+#define	vs33	33
+#define	vs34	34
+#define	vs35	35
+#define	vs36	36
+#define	vs37	37
+#define	vs38	38
+#define	vs39	39
+#define	vs40	40
+#define	vs41	41
+#define	vs42	42
+#define	vs43	43
+#define	vs44	44
+#define	vs45	45
+#define	vs46	46
+#define	vs47	47
+#define	vs48	48
+#define	vs49	49
+#define	vs50	50
+#define	vs51	51
+#define	vs52	52
+#define	vs53	53
+#define	vs54	54
+#define	vs55	55
+#define	vs56	56
+#define	vs57	57
+#define	vs58	58
+#define	vs59	59
+#define	vs60	60
+#define	vs61	61
+#define	vs62	62
+#define	vs63	63
+
+/* SPE Registers (EVPRs) */
+
+#define	evr0	0
+#define	evr1	1
+#define	evr2	2
+#define	evr3	3
+#define	evr4	4
+#define	evr5	5
+#define	evr6	6
+#define	evr7	7
+#define	evr8	8
+#define	evr9	9
+#define	evr10	10
+#define	evr11	11
+#define	evr12	12
+#define	evr13	13
+#define	evr14	14
+#define	evr15	15
+#define	evr16	16
+#define	evr17	17
+#define	evr18	18
+#define	evr19	19
+#define	evr20	20
+#define	evr21	21
+#define	evr22	22
+#define	evr23	23
+#define	evr24	24
+#define	evr25	25
+#define	evr26	26
+#define	evr27	27
+#define	evr28	28
+#define	evr29	29
+#define	evr30	30
+#define	evr31	31
+
+#define RFSCV	.long 0x4c0000a4
+
+/*
+ * Create an endian fixup trampoline
+ *
+ * This starts with a "tdi 0,0,0x48" instruction which is
+ * essentially a "trap never", and thus akin to a nop.
+ *
+ * The opcode for this instruction read with the wrong endian
+ * however results in a b . + 8
+ *
+ * So essentially we use that trick to execute the following
+ * trampoline in "reverse endian" if we are running with the
+ * MSR_LE bit set the "wrong" way for whatever endianness the
+ * kernel is built for.
+ */
+
+#ifdef CONFIG_PPC_BOOK3E_64
+#define FIXUP_ENDIAN
+#else
+/*
+ * This version may be used in HV or non-HV context.
+ * MSR[EE] must be disabled.
+ */
+#define FIXUP_ENDIAN						   \
+	tdi   0,0,0x48;	  /* Reverse endian of b . + 8		*/ \
+	b     191f;	  /* Skip trampoline if endian is good	*/ \
+	.long 0xa600607d; /* mfmsr r11				*/ \
+	.long 0x01006b69; /* xori r11,r11,1			*/ \
+	.long 0x00004039; /* li r10,0				*/ \
+	.long 0x6401417d; /* mtmsrd r10,1			*/ \
+	.long 0x05009f42; /* bcl 20,31,$+4			*/ \
+	.long 0xa602487d; /* mflr r10				*/ \
+	.long 0x14004a39; /* addi r10,r10,20			*/ \
+	.long 0xa6035a7d; /* mtsrr0 r10				*/ \
+	.long 0xa6037b7d; /* mtsrr1 r11				*/ \
+	.long 0x2400004c; /* rfid				*/ \
+191:
+
+/*
+ * This version that may only be used with MSR[HV]=1
+ * - Does not clear MSR[RI], so more robust.
+ * - Slightly smaller and faster.
+ */
+#define FIXUP_ENDIAN_HV						   \
+	tdi   0,0,0x48;	  /* Reverse endian of b . + 8		*/ \
+	b     191f;	  /* Skip trampoline if endian is good	*/ \
+	.long 0xa600607d; /* mfmsr r11				*/ \
+	.long 0x01006b69; /* xori r11,r11,1			*/ \
+	.long 0x05009f42; /* bcl 20,31,$+4			*/ \
+	.long 0xa602487d; /* mflr r10				*/ \
+	.long 0x14004a39; /* addi r10,r10,20			*/ \
+	.long 0xa64b5a7d; /* mthsrr0 r10			*/ \
+	.long 0xa64b7b7d; /* mthsrr1 r11			*/ \
+	.long 0x2402004c; /* hrfid				*/ \
+191:
+
+#endif /* !CONFIG_PPC_BOOK3E_64 */
+
+#endif /*  __ASSEMBLY__ */
+
+#define SOFT_MASK_TABLE(_start, _end)		\
+	stringify_in_c(.section __soft_mask_table,"a";)\
+	stringify_in_c(.balign 8;)		\
+	stringify_in_c(.llong (_start);)	\
+	stringify_in_c(.llong (_end);)		\
+	stringify_in_c(.previous)
+
+#define RESTART_TABLE(_start, _end, _target)	\
+	stringify_in_c(.section __restart_table,"a";)\
+	stringify_in_c(.balign 8;)		\
+	stringify_in_c(.llong (_start);)	\
+	stringify_in_c(.llong (_end);)		\
+	stringify_in_c(.llong (_target);)	\
+	stringify_in_c(.previous)
+
+#ifdef CONFIG_PPC_E500
+#define BTB_FLUSH(reg)			\
+	lis reg,BUCSR_INIT@h;		\
+	ori reg,reg,BUCSR_INIT@l;	\
+	mtspr SPRN_BUCSR,reg;		\
+	isync;
+#else
+#define BTB_FLUSH(reg)
+#endif /* CONFIG_PPC_E500 */
+
+#if defined(CONFIG_PPC64_ELF_ABI_V1)
+#define STACK_FRAME_PARAMS 48
+#elif defined(CONFIG_PPC64_ELF_ABI_V2)
+#define STACK_FRAME_PARAMS 32
+#elif defined(CONFIG_PPC32)
+#define STACK_FRAME_PARAMS 8
+#endif
+
+#endif /* _ASM_POWERPC_PPC_ASM_H */
diff --git a/arch/powerpc/include/asm/probes.h b/arch/powerpc/include/asm/probes.h
new file mode 100644
index 0000000000..e77a2ed7d9
--- /dev/null
+++ b/arch/powerpc/include/asm/probes.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_PROBES_H
+#define _ASM_POWERPC_PROBES_H
+#ifdef __KERNEL__
+/*
+ * Definitions common to probes files
+ *
+ * Copyright IBM Corporation, 2012
+ */
+#include <linux/types.h>
+#include <asm/disassemble.h>
+#include <asm/ppc-opcode.h>
+
+#define BREAKPOINT_INSTRUCTION	PPC_RAW_TRAP()	/* trap */
+
+/* Trap definitions per ISA */
+#define IS_TW(instr)		(((instr) & 0xfc0007fe) == 0x7c000008)
+#define IS_TD(instr)		(((instr) & 0xfc0007fe) == 0x7c000088)
+#define IS_TDI(instr)		(((instr) & 0xfc000000) == 0x08000000)
+#define IS_TWI(instr)		(((instr) & 0xfc000000) == 0x0c000000)
+
+#ifdef CONFIG_PPC64
+#define is_trap(instr)		(IS_TW(instr) || IS_TD(instr) || \
+				IS_TWI(instr) || IS_TDI(instr))
+#else
+#define is_trap(instr)		(IS_TW(instr) || IS_TWI(instr))
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+#define MSR_SINGLESTEP	(MSR_DE)
+#else
+#define MSR_SINGLESTEP	(MSR_SE)
+#endif
+
+static inline bool can_single_step(u32 inst)
+{
+	switch (get_op(inst)) {
+	case OP_TRAP_64:	return false;
+	case OP_TRAP:		return false;
+	case OP_SC:		return false;
+	case OP_19:
+		switch (get_xop(inst)) {
+		case OP_19_XOP_RFID:		return false;
+		case OP_19_XOP_RFMCI:		return false;
+		case OP_19_XOP_RFDI:		return false;
+		case OP_19_XOP_RFI:		return false;
+		case OP_19_XOP_RFCI:		return false;
+		case OP_19_XOP_RFSCV:		return false;
+		case OP_19_XOP_HRFID:		return false;
+		case OP_19_XOP_URFID:		return false;
+		case OP_19_XOP_STOP:		return false;
+		case OP_19_XOP_DOZE:		return false;
+		case OP_19_XOP_NAP:		return false;
+		case OP_19_XOP_SLEEP:		return false;
+		case OP_19_XOP_RVWINKLE:	return false;
+		}
+		break;
+	case OP_31:
+		switch (get_xop(inst)) {
+		case OP_31_XOP_TRAP:		return false;
+		case OP_31_XOP_TRAP_64:		return false;
+		case OP_31_XOP_MTMSR:		return false;
+		case OP_31_XOP_MTMSRD:		return false;
+		}
+		break;
+	}
+	return true;
+}
+
+/* Enable single stepping for the current task */
+static inline void enable_single_step(struct pt_regs *regs)
+{
+	regs_set_return_msr(regs, regs->msr | MSR_SINGLESTEP);
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+	/*
+	 * We turn off Critical Input Exception(CE) to ensure that the single
+	 * step will be for the instruction we have the probe on; if we don't,
+	 * it is possible we'd get the single step reported for CE.
+	 */
+	regs_set_return_msr(regs, regs->msr & ~MSR_CE);
+	mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
+#ifdef CONFIG_PPC_47x
+	isync();
+#endif
+#endif
+}
+
+
+#endif /* __KERNEL__ */
+#endif	/* _ASM_POWERPC_PROBES_H */
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
new file mode 100644
index 0000000000..b2c51d337e
--- /dev/null
+++ b/arch/powerpc/include/asm/processor.h
@@ -0,0 +1,453 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_PROCESSOR_H
+#define _ASM_POWERPC_PROCESSOR_H
+
+/*
+ * Copyright (C) 2001 PPC 64 Team, IBM Corp
+ */
+
+#include <vdso/processor.h>
+
+#include <asm/reg.h>
+
+#ifdef CONFIG_VSX
+#define TS_FPRWIDTH 2
+
+#ifdef __BIG_ENDIAN__
+#define TS_FPROFFSET 0
+#define TS_VSRLOWOFFSET 1
+#else
+#define TS_FPROFFSET 1
+#define TS_VSRLOWOFFSET 0
+#endif
+
+#else
+#define TS_FPRWIDTH 1
+#define TS_FPROFFSET 0
+#endif
+
+#ifdef CONFIG_PPC64
+/* Default SMT priority is set to 3. Use 11- 13bits to save priority. */
+#define PPR_PRIORITY 3
+#ifdef __ASSEMBLY__
+#define DEFAULT_PPR (PPR_PRIORITY << 50)
+#else
+#define DEFAULT_PPR ((u64)PPR_PRIORITY << 50)
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_PPC64 */
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#include <linux/thread_info.h>
+#include <asm/ptrace.h>
+#include <asm/hw_breakpoint.h>
+
+/* We do _not_ want to define new machine types at all, those must die
+ * in favor of using the device-tree
+ * -- BenH.
+ */
+
+/* PREP sub-platform types. Unused */
+#define _PREP_Motorola	0x01	/* motorola prep */
+#define _PREP_Firm	0x02	/* firmworks prep */
+#define _PREP_IBM	0x00	/* ibm prep */
+#define _PREP_Bull	0x03	/* bull prep */
+
+/* CHRP sub-platform types. These are arbitrary */
+#define _CHRP_Motorola	0x04	/* motorola chrp, the cobra */
+#define _CHRP_IBM	0x05	/* IBM chrp, the longtrail and longtrail 2 */
+#define _CHRP_Pegasos	0x06	/* Genesi/bplan's Pegasos and Pegasos2 */
+#define _CHRP_briq	0x07	/* TotalImpact's briQ */
+
+#if defined(__KERNEL__) && defined(CONFIG_PPC32)
+
+extern int _chrp_type;
+
+#endif /* defined(__KERNEL__) && defined(CONFIG_PPC32) */
+
+#ifdef __KERNEL__
+
+#ifdef CONFIG_PPC64
+#include <asm/task_size_64.h>
+#else
+#include <asm/task_size_32.h>
+#endif
+
+struct task_struct;
+void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp);
+
+#define TS_FPR(i) fp_state.fpr[i][TS_FPROFFSET]
+#define TS_CKFPR(i) ckfp_state.fpr[i][TS_FPROFFSET]
+
+/* FP and VSX 0-31 register set */
+struct thread_fp_state {
+	u64	fpr[32][TS_FPRWIDTH] __attribute__((aligned(16)));
+	u64	fpscr;		/* Floating point status */
+};
+
+/* Complete AltiVec register set including VSCR */
+struct thread_vr_state {
+	vector128	vr[32] __attribute__((aligned(16)));
+	vector128	vscr __attribute__((aligned(16)));
+};
+
+struct debug_reg {
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+	/*
+	 * The following help to manage the use of Debug Control Registers
+	 * om the BookE platforms.
+	 */
+	uint32_t	dbcr0;
+	uint32_t	dbcr1;
+#ifdef CONFIG_BOOKE
+	uint32_t	dbcr2;
+#endif
+	/*
+	 * The stored value of the DBSR register will be the value at the
+	 * last debug interrupt. This register can only be read from the
+	 * user (will never be written to) and has value while helping to
+	 * describe the reason for the last debug trap.  Torez
+	 */
+	uint32_t	dbsr;
+	/*
+	 * The following will contain addresses used by debug applications
+	 * to help trace and trap on particular address locations.
+	 * The bits in the Debug Control Registers above help define which
+	 * of the following registers will contain valid data and/or addresses.
+	 */
+	unsigned long	iac1;
+	unsigned long	iac2;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	unsigned long	iac3;
+	unsigned long	iac4;
+#endif
+	unsigned long	dac1;
+	unsigned long	dac2;
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+	unsigned long	dvc1;
+	unsigned long	dvc2;
+#endif
+#endif
+};
+
+struct thread_struct {
+	unsigned long	ksp;		/* Kernel stack pointer */
+
+#ifdef CONFIG_PPC64
+	unsigned long	ksp_vsid;
+#endif
+	struct pt_regs	*regs;		/* Pointer to saved register state */
+#ifdef CONFIG_BOOKE
+	/* BookE base exception scratch space; align on cacheline */
+	unsigned long	normsave[8] ____cacheline_aligned;
+#endif
+#ifdef CONFIG_PPC32
+	void		*pgdir;		/* root of page-table tree */
+#ifdef CONFIG_PPC_RTAS
+	unsigned long	rtas_sp;	/* stack pointer for when in RTAS */
+#endif
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
+	unsigned long	kuap;		/* opened segments for user access */
+#endif
+	unsigned long	srr0;
+	unsigned long	srr1;
+	unsigned long	dar;
+	unsigned long	dsisr;
+#ifdef CONFIG_PPC_BOOK3S_32
+	unsigned long	r0, r3, r4, r5, r6, r8, r9, r11;
+	unsigned long	lr, ctr;
+	unsigned long	sr0;
+#endif
+#endif /* CONFIG_PPC32 */
+#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP)
+	unsigned long	pid;	/* value written in PID reg. at interrupt exit */
+#endif
+	/* Debug Registers */
+	struct debug_reg debug;
+#ifdef CONFIG_PPC_FPU_REGS
+	struct thread_fp_state	fp_state;
+	struct thread_fp_state	*fp_save_area;
+#endif
+	int		fpexc_mode;	/* floating-point exception mode */
+	unsigned int	align_ctl;	/* alignment handling control */
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	struct perf_event *ptrace_bps[HBP_NUM_MAX];
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+	struct arch_hw_breakpoint hw_brk[HBP_NUM_MAX]; /* hardware breakpoint info */
+	unsigned long	trap_nr;	/* last trap # on this thread */
+	u8 load_slb;			/* Ages out SLB preload cache entries */
+	u8 load_fp;
+#ifdef CONFIG_ALTIVEC
+	u8 load_vec;
+	struct thread_vr_state vr_state;
+	struct thread_vr_state *vr_save_area;
+	unsigned long	vrsave;
+	int		used_vr;	/* set if process has used altivec */
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+	/* VSR status */
+	int		used_vsr;	/* set if process has used VSX */
+#endif /* CONFIG_VSX */
+#ifdef CONFIG_SPE
+	struct_group(spe,
+		unsigned long	evr[32];	/* upper 32-bits of SPE regs */
+		u64		acc;		/* Accumulator */
+	);
+	unsigned long	spefscr;	/* SPE & eFP status */
+	unsigned long	spefscr_last;	/* SPEFSCR value on last prctl
+					   call or trap return */
+	int		used_spe;	/* set if process has used spe */
+#endif /* CONFIG_SPE */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	u8	load_tm;
+	u64		tm_tfhar;	/* Transaction fail handler addr */
+	u64		tm_texasr;	/* Transaction exception & summary */
+	u64		tm_tfiar;	/* Transaction fail instr address reg */
+	struct pt_regs	ckpt_regs;	/* Checkpointed registers */
+
+	unsigned long	tm_tar;
+	unsigned long	tm_ppr;
+	unsigned long	tm_dscr;
+	unsigned long   tm_amr;
+
+	/*
+	 * Checkpointed FP and VSX 0-31 register set.
+	 *
+	 * When a transaction is active/signalled/scheduled etc., *regs is the
+	 * most recent set of/speculated GPRs with ckpt_regs being the older
+	 * checkpointed regs to which we roll back if transaction aborts.
+	 *
+	 * These are analogous to how ckpt_regs and pt_regs work
+	 */
+	struct thread_fp_state ckfp_state; /* Checkpointed FP state */
+	struct thread_vr_state ckvr_state; /* Checkpointed VR state */
+	unsigned long	ckvrsave; /* Checkpointed VRSAVE */
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+	void*		kvm_shadow_vcpu; /* KVM internal data */
+#endif /* CONFIG_KVM_BOOK3S_32_HANDLER */
+#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
+	struct kvm_vcpu	*kvm_vcpu;
+#endif
+#ifdef CONFIG_PPC64
+	unsigned long	dscr;
+	unsigned long	fscr;
+	/*
+	 * This member element dscr_inherit indicates that the process
+	 * has explicitly attempted and changed the DSCR register value
+	 * for itself. Hence kernel wont use the default CPU DSCR value
+	 * contained in the PACA structure anymore during process context
+	 * switch. Once this variable is set, this behaviour will also be
+	 * inherited to all the children of this process from that point
+	 * onwards.
+	 */
+	int		dscr_inherit;
+	unsigned long	tidr;
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	unsigned long	tar;
+	unsigned long	ebbrr;
+	unsigned long	ebbhr;
+	unsigned long	bescr;
+	unsigned long	siar;
+	unsigned long	sdar;
+	unsigned long	sier;
+	unsigned long	mmcr2;
+	unsigned 	mmcr0;
+
+	unsigned 	used_ebb;
+	unsigned long   mmcr3;
+	unsigned long   sier2;
+	unsigned long   sier3;
+	unsigned long	hashkeyr;
+
+#endif
+};
+
+#define ARCH_MIN_TASKALIGN 16
+
+#define INIT_SP		(sizeof(init_stack) + (unsigned long) &init_stack)
+#define INIT_SP_LIMIT	((unsigned long)&init_stack)
+
+#ifdef CONFIG_SPE
+#define SPEFSCR_INIT \
+	.spefscr = SPEFSCR_FINVE | SPEFSCR_FDBZE | SPEFSCR_FUNFE | SPEFSCR_FOVFE, \
+	.spefscr_last = SPEFSCR_FINVE | SPEFSCR_FDBZE | SPEFSCR_FUNFE | SPEFSCR_FOVFE,
+#else
+#define SPEFSCR_INIT
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+#define SR0_INIT	.sr0 = IS_ENABLED(CONFIG_PPC_KUEP) ? SR_NX : 0,
+#else
+#define SR0_INIT
+#endif
+
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
+#define INIT_THREAD { \
+	.ksp = INIT_SP, \
+	.pgdir = swapper_pg_dir, \
+	.kuap = ~0UL, /* KUAP_NONE */ \
+	.fpexc_mode = MSR_FE0 | MSR_FE1, \
+	SPEFSCR_INIT \
+	SR0_INIT \
+}
+#elif defined(CONFIG_PPC32)
+#define INIT_THREAD { \
+	.ksp = INIT_SP, \
+	.pgdir = swapper_pg_dir, \
+	.fpexc_mode = MSR_FE0 | MSR_FE1, \
+	SPEFSCR_INIT \
+	SR0_INIT \
+}
+#else
+#define INIT_THREAD  { \
+	.ksp = INIT_SP, \
+	.fpexc_mode = 0, \
+}
+#endif
+
+#define task_pt_regs(tsk)	((tsk)->thread.regs)
+
+unsigned long __get_wchan(struct task_struct *p);
+
+#define KSTK_EIP(tsk)  ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0)
+#define KSTK_ESP(tsk)  ((tsk)->thread.regs? (tsk)->thread.regs->gpr[1]: 0)
+
+/* Get/set floating-point exception mode */
+#define GET_FPEXC_CTL(tsk, adr) get_fpexc_mode((tsk), (adr))
+#define SET_FPEXC_CTL(tsk, val) set_fpexc_mode((tsk), (val))
+
+extern int get_fpexc_mode(struct task_struct *tsk, unsigned long adr);
+extern int set_fpexc_mode(struct task_struct *tsk, unsigned int val);
+
+#define GET_ENDIAN(tsk, adr) get_endian((tsk), (adr))
+#define SET_ENDIAN(tsk, val) set_endian((tsk), (val))
+
+extern int get_endian(struct task_struct *tsk, unsigned long adr);
+extern int set_endian(struct task_struct *tsk, unsigned int val);
+
+#define GET_UNALIGN_CTL(tsk, adr)	get_unalign_ctl((tsk), (adr))
+#define SET_UNALIGN_CTL(tsk, val)	set_unalign_ctl((tsk), (val))
+
+extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr);
+extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
+
+extern void load_fp_state(struct thread_fp_state *fp);
+extern void store_fp_state(struct thread_fp_state *fp);
+extern void load_vr_state(struct thread_vr_state *vr);
+extern void store_vr_state(struct thread_vr_state *vr);
+
+static inline unsigned int __unpack_fe01(unsigned long msr_bits)
+{
+	return ((msr_bits & MSR_FE0) >> 10) | ((msr_bits & MSR_FE1) >> 8);
+}
+
+static inline unsigned long __pack_fe01(unsigned int fpmode)
+{
+	return ((fpmode << 10) & MSR_FE0) | ((fpmode << 8) & MSR_FE1);
+}
+
+#ifdef CONFIG_PPC64
+
+#define spin_begin()							\
+	asm volatile(ASM_FTR_IFCLR(					\
+		"or 1,1,1", /* HMT_LOW */				\
+		"nop", /* v3.1 uses pause_short in cpu_relax instead */	\
+		%0) :: "i" (CPU_FTR_ARCH_31) : "memory")
+
+#define spin_cpu_relax()						\
+	asm volatile(ASM_FTR_IFCLR(					\
+		"nop", /* Before v3.1 use priority nops in spin_begin/end */ \
+		PPC_WAIT(2, 0),	/* aka pause_short */			\
+		%0) :: "i" (CPU_FTR_ARCH_31) : "memory")
+
+#define spin_end()							\
+	asm volatile(ASM_FTR_IFCLR(					\
+		"or 2,2,2", /* HMT_MEDIUM */				\
+		"nop",							\
+		%0) :: "i" (CPU_FTR_ARCH_31) : "memory")
+
+#endif
+
+/*
+ * Check that a certain kernel stack pointer is a valid (minimum sized)
+ * stack frame in task_struct p.
+ */
+int validate_sp(unsigned long sp, struct task_struct *p);
+
+/*
+ * validate the stack frame of a particular minimum size, used for when we are
+ * looking at a certain object in the stack beyond the minimum.
+ */
+int validate_sp_size(unsigned long sp, struct task_struct *p,
+		     unsigned long nbytes);
+
+/*
+ * Prefetch macros.
+ */
+#define ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCHW
+
+static inline void prefetch(const void *x)
+{
+	if (unlikely(!x))
+		return;
+
+	__asm__ __volatile__ ("dcbt 0,%0" : : "r" (x));
+}
+
+static inline void prefetchw(const void *x)
+{
+	if (unlikely(!x))
+		return;
+
+	__asm__ __volatile__ ("dcbtst 0,%0" : : "r" (x));
+}
+
+/* asm stubs */
+extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val);
+extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
+extern unsigned long isa206_idle_insn_mayloss(unsigned long type);
+#ifdef CONFIG_PPC_970_NAP
+extern void power4_idle_nap(void);
+void power4_idle_nap_return(void);
+#endif
+
+extern unsigned long cpuidle_disable;
+enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
+
+extern int powersave_nap;	/* set if nap mode can be used in idle loop */
+
+extern void power7_idle_type(unsigned long type);
+extern void arch300_idle_type(unsigned long stop_psscr_val,
+			      unsigned long stop_psscr_mask);
+void pnv_power9_force_smt4_catch(void);
+void pnv_power9_force_smt4_release(void);
+
+extern int fix_alignment(struct pt_regs *);
+
+#ifdef CONFIG_PPC64
+/*
+ * We handle most unaligned accesses in hardware. On the other hand 
+ * unaligned DMA can be very expensive on some ppc64 IO chips (it does
+ * powers of 2 writes until it reaches sufficient alignment).
+ *
+ * Based on this we disable the IP header alignment in network drivers.
+ */
+#define NET_IP_ALIGN	0
+#endif
+
+int do_mathemu(struct pt_regs *regs);
+int do_spe_mathemu(struct pt_regs *regs);
+int speround_handler(struct pt_regs *regs);
+
+/* VMX copying */
+int enter_vmx_usercopy(void);
+int exit_vmx_usercopy(void);
+int enter_vmx_ops(void);
+void *exit_vmx_ops(void *dest);
+
+#endif /* __KERNEL__ */
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_POWERPC_PROCESSOR_H */
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
new file mode 100644
index 0000000000..c0107d8ddd
--- /dev/null
+++ b/arch/powerpc/include/asm/prom.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _POWERPC_PROM_H
+#define _POWERPC_PROM_H
+#ifdef __KERNEL__
+
+/*
+ * Definitions for talking to the Open Firmware PROM on
+ * Power Macintosh computers.
+ *
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ * Updates for PPC64 by Peter Bergner & David Engebretsen, IBM Corp.
+ */
+#include <linux/types.h>
+#include <asm/firmware.h>
+
+struct device_node;
+struct property;
+
+#define OF_DT_BEGIN_NODE	0x1		/* Start of node, full name */
+#define OF_DT_END_NODE		0x2		/* End node */
+#define OF_DT_PROP		0x3		/* Property: name off, size,
+						 * content */
+#define OF_DT_NOP		0x4		/* nop */
+#define OF_DT_END		0x9
+
+#define OF_DT_VERSION		0x10
+
+/*
+ * This is what gets passed to the kernel by prom_init or kexec
+ *
+ * The dt struct contains the device tree structure, full pathes and
+ * property contents. The dt strings contain a separate block with just
+ * the strings for the property names, and is fully page aligned and
+ * self contained in a page, so that it can be kept around by the kernel,
+ * each property name appears only once in this page (cheap compression)
+ *
+ * the mem_rsvmap contains a map of reserved ranges of physical memory,
+ * passing it here instead of in the device-tree itself greatly simplifies
+ * the job of everybody. It's just a list of u64 pairs (base/size) that
+ * ends when size is 0
+ */
+struct boot_param_header {
+	__be32	magic;			/* magic word OF_DT_HEADER */
+	__be32	totalsize;		/* total size of DT block */
+	__be32	off_dt_struct;		/* offset to structure */
+	__be32	off_dt_strings;		/* offset to strings */
+	__be32	off_mem_rsvmap;		/* offset to memory reserve map */
+	__be32	version;		/* format version */
+	__be32	last_comp_version;	/* last compatible version */
+	/* version 2 fields below */
+	__be32	boot_cpuid_phys;	/* Physical CPU id we're booting on */
+	/* version 3 fields below */
+	__be32	dt_strings_size;	/* size of the DT strings block */
+	/* version 17 fields below */
+	__be32	dt_struct_size;		/* size of the DT structure block */
+};
+
+/*
+ * OF address retreival & translation
+ */
+
+/* Parse the ibm,dma-window property of an OF node into the busno, phys and
+ * size parameters.
+ */
+void of_parse_dma_window(struct device_node *dn, const __be32 *dma_window,
+			 unsigned long *busno, unsigned long *phys,
+			 unsigned long *size);
+
+extern void of_instantiate_rtc(void);
+
+extern int of_get_ibm_chip_id(struct device_node *np);
+
+struct of_drc_info {
+	char *drc_type;
+	char *drc_name_prefix;
+	u32 drc_index_start;
+	u32 drc_name_suffix_start;
+	u32 num_sequential_elems;
+	u32 sequential_inc;
+	u32 drc_power_domain;
+	u32 last_drc_index;
+};
+
+extern int of_read_drc_info_cell(struct property **prop,
+			const __be32 **curval, struct of_drc_info *data);
+
+extern unsigned int boot_cpu_node_count;
+
+/*
+ * There are two methods for telling firmware what our capabilities are.
+ * Newer machines have an "ibm,client-architecture-support" method on the
+ * root node.  For older machines, we have to call the "process-elf-header"
+ * method in the /packages/elf-loader node, passing it a fake 32-bit
+ * ELF header containing a couple of PT_NOTE sections that contain
+ * structures that contain various information.
+ */
+
+/* New method - extensible architecture description vector. */
+
+/* Option vector bits - generic bits in byte 1 */
+#define OV_IGNORE		0x80	/* ignore this vector */
+#define OV_CESSATION_POLICY	0x40	/* halt if unsupported option present*/
+
+/* Option vector 1: processor architectures supported */
+#define OV1_PPC_2_00		0x80	/* set if we support PowerPC 2.00 */
+#define OV1_PPC_2_01		0x40	/* set if we support PowerPC 2.01 */
+#define OV1_PPC_2_02		0x20	/* set if we support PowerPC 2.02 */
+#define OV1_PPC_2_03		0x10	/* set if we support PowerPC 2.03 */
+#define OV1_PPC_2_04		0x08	/* set if we support PowerPC 2.04 */
+#define OV1_PPC_2_05		0x04	/* set if we support PowerPC 2.05 */
+#define OV1_PPC_2_06		0x02	/* set if we support PowerPC 2.06 */
+#define OV1_PPC_2_07		0x01	/* set if we support PowerPC 2.07 */
+
+#define OV1_PPC_3_00		0x80	/* set if we support PowerPC 3.00 */
+#define OV1_PPC_3_1			0x40	/* set if we support PowerPC 3.1 */
+
+/* Option vector 2: Open Firmware options supported */
+#define OV2_REAL_MODE		0x20	/* set if we want OF in real mode */
+
+/* Option vector 3: processor options supported */
+#define OV3_FP			0x80	/* floating point */
+#define OV3_VMX			0x40	/* VMX/Altivec */
+#define OV3_DFP			0x20	/* decimal FP */
+
+/* Option vector 4: IBM PAPR implementation */
+#define OV4_MIN_ENT_CAP		0x01	/* minimum VP entitled capacity */
+
+/* Option vector 5: PAPR/OF options supported
+ * These bits are also used in firmware_has_feature() to validate
+ * the capabilities reported for vector 5 in the device tree so we
+ * encode the vector index in the define and use the OV5_FEAT()
+ * and OV5_INDX() macros to extract the desired information.
+ */
+#define OV5_FEAT(x)	((x) & 0xff)
+#define OV5_INDX(x)	((x) >> 8)
+#define OV5_LPAR		0x0280	/* logical partitioning supported */
+#define OV5_SPLPAR		0x0240	/* shared-processor LPAR supported */
+/* ibm,dynamic-reconfiguration-memory property supported */
+#define OV5_DRCONF_MEMORY	0x0220
+#define OV5_LARGE_PAGES		0x0210	/* large pages supported */
+#define OV5_DONATE_DEDICATE_CPU	0x0202	/* donate dedicated CPU support */
+#define OV5_MSI			0x0201	/* PCIe/MSI support */
+#define OV5_CMO			0x0480	/* Cooperative Memory Overcommitment */
+#define OV5_XCMO		0x0440	/* Page Coalescing */
+#define OV5_FORM1_AFFINITY	0x0580	/* FORM1 NUMA affinity */
+#define OV5_PRRN		0x0540	/* Platform Resource Reassignment */
+#define OV5_FORM2_AFFINITY	0x0520	/* Form2 NUMA affinity */
+#define OV5_HP_EVT		0x0604	/* Hot Plug Event support */
+#define OV5_RESIZE_HPT		0x0601	/* Hash Page Table resizing */
+#define OV5_PFO_HW_RNG		0x1180	/* PFO Random Number Generator */
+#define OV5_PFO_HW_842		0x1140	/* PFO Compression Accelerator */
+#define OV5_PFO_HW_ENCR		0x1120	/* PFO Encryption Accelerator */
+#define OV5_SUB_PROCESSORS	0x1501	/* 1,2,or 4 Sub-Processors supported */
+#define OV5_DRMEM_V2		0x1680	/* ibm,dynamic-reconfiguration-v2 */
+#define OV5_XIVE_SUPPORT	0x17C0	/* XIVE Exploitation Support Mask */
+#define OV5_XIVE_LEGACY		0x1700	/* XIVE legacy mode Only */
+#define OV5_XIVE_EXPLOIT	0x1740	/* XIVE exploitation mode Only */
+#define OV5_XIVE_EITHER		0x1780	/* XIVE legacy or exploitation mode */
+/* MMU Base Architecture */
+#define OV5_MMU_SUPPORT		0x18C0	/* MMU Mode Support Mask */
+#define OV5_MMU_HASH		0x1800	/* Hash MMU Only */
+#define OV5_MMU_RADIX		0x1840	/* Radix MMU Only */
+#define OV5_MMU_EITHER		0x1880	/* Hash or Radix Supported */
+#define OV5_MMU_DYNAMIC		0x18C0	/* Hash or Radix Can Switch Later */
+#define OV5_NMMU		0x1820	/* Nest MMU Available */
+/* Hash Table Extensions */
+#define OV5_HASH_SEG_TBL	0x1980	/* In Memory Segment Tables Available */
+#define OV5_HASH_GTSE		0x1940	/* Guest Translation Shoot Down Avail */
+/* Radix Table Extensions */
+#define OV5_RADIX_GTSE		0x1A40	/* Guest Translation Shoot Down Avail */
+#define OV5_DRC_INFO		0x1640	/* Redef Prop Structures: drc-info   */
+
+/* Option Vector 6: IBM PAPR hints */
+#define OV6_LINUX		0x02	/* Linux is our OS */
+
+#endif /* __KERNEL__ */
+#endif /* _POWERPC_PROM_H */
diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h
new file mode 100644
index 0000000000..a5f36546a0
--- /dev/null
+++ b/arch/powerpc/include/asm/ps3.h
@@ -0,0 +1,517 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  PS3 platform declarations.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ */
+
+#if !defined(_ASM_POWERPC_PS3_H)
+#define _ASM_POWERPC_PS3_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include <asm/cell-pmu.h>
+
+union ps3_firmware_version {
+	u64 raw;
+	struct {
+		u16 pad;
+		u16 major;
+		u16 minor;
+		u16 rev;
+	};
+};
+
+void ps3_get_firmware_version(union ps3_firmware_version *v);
+int ps3_compare_firmware_version(u16 major, u16 minor, u16 rev);
+
+/* 'Other OS' area */
+
+enum ps3_param_av_multi_out {
+	PS3_PARAM_AV_MULTI_OUT_NTSC = 0,
+	PS3_PARAM_AV_MULTI_OUT_PAL_RGB = 1,
+	PS3_PARAM_AV_MULTI_OUT_PAL_YCBCR = 2,
+	PS3_PARAM_AV_MULTI_OUT_SECAM = 3,
+};
+
+enum ps3_param_av_multi_out ps3_os_area_get_av_multi_out(void);
+
+extern u64 ps3_os_area_get_rtc_diff(void);
+extern void ps3_os_area_set_rtc_diff(u64 rtc_diff);
+
+struct ps3_os_area_flash_ops {
+	ssize_t (*read)(void *buf, size_t count, loff_t pos);
+	ssize_t (*write)(const void *buf, size_t count, loff_t pos);
+};
+
+extern void ps3_os_area_flash_register(const struct ps3_os_area_flash_ops *ops);
+
+/* dma routines */
+
+enum ps3_dma_page_size {
+	PS3_DMA_4K = 12U,
+	PS3_DMA_64K = 16U,
+	PS3_DMA_1M = 20U,
+	PS3_DMA_16M = 24U,
+};
+
+enum ps3_dma_region_type {
+	PS3_DMA_OTHER = 0,
+	PS3_DMA_INTERNAL = 2,
+};
+
+struct ps3_dma_region_ops;
+
+/**
+ * struct ps3_dma_region - A per device dma state variables structure
+ * @did: The HV device id.
+ * @page_size: The ioc pagesize.
+ * @region_type: The HV region type.
+ * @bus_addr: The 'translated' bus address of the region.
+ * @len: The length in bytes of the region.
+ * @offset: The offset from the start of memory of the region.
+ * @dma_mask: Device dma_mask.
+ * @ioid: The IOID of the device who owns this region
+ * @chunk_list: Opaque variable used by the ioc page manager.
+ * @region_ops: struct ps3_dma_region_ops - dma region operations
+ */
+
+struct ps3_dma_region {
+	struct ps3_system_bus_device *dev;
+	/* device variables */
+	const struct ps3_dma_region_ops *region_ops;
+	unsigned char ioid;
+	enum ps3_dma_page_size page_size;
+	enum ps3_dma_region_type region_type;
+	unsigned long len;
+	unsigned long offset;
+	u64 dma_mask;
+
+	/* driver variables  (set by ps3_dma_region_create) */
+	unsigned long bus_addr;
+	struct {
+		spinlock_t lock;
+		struct list_head head;
+	} chunk_list;
+};
+
+struct ps3_dma_region_ops {
+	int (*create)(struct ps3_dma_region *);
+	int (*free)(struct ps3_dma_region *);
+	int (*map)(struct ps3_dma_region *,
+		   unsigned long virt_addr,
+		   unsigned long len,
+		   dma_addr_t *bus_addr,
+		   u64 iopte_pp);
+	int (*unmap)(struct ps3_dma_region *,
+		     dma_addr_t bus_addr,
+		     unsigned long len);
+};
+/**
+ * struct ps3_dma_region_init - Helper to initialize structure variables
+ *
+ * Helper to properly initialize variables prior to calling
+ * ps3_system_bus_device_register.
+ */
+
+struct ps3_system_bus_device;
+
+int ps3_dma_region_init(struct ps3_system_bus_device *dev,
+	struct ps3_dma_region *r, enum ps3_dma_page_size page_size,
+	enum ps3_dma_region_type region_type, void *addr, unsigned long len);
+int ps3_dma_region_create(struct ps3_dma_region *r);
+int ps3_dma_region_free(struct ps3_dma_region *r);
+int ps3_dma_map(struct ps3_dma_region *r, unsigned long virt_addr,
+	unsigned long len, dma_addr_t *bus_addr,
+	u64 iopte_pp);
+int ps3_dma_unmap(struct ps3_dma_region *r, dma_addr_t bus_addr,
+	unsigned long len);
+
+/* mmio routines */
+
+enum ps3_mmio_page_size {
+	PS3_MMIO_4K = 12U,
+	PS3_MMIO_64K = 16U
+};
+
+struct ps3_mmio_region_ops;
+/**
+ * struct ps3_mmio_region - a per device mmio state variables structure
+ *
+ * Current systems can be supported with a single region per device.
+ */
+
+struct ps3_mmio_region {
+	struct ps3_system_bus_device *dev;
+	const struct ps3_mmio_region_ops *mmio_ops;
+	unsigned long bus_addr;
+	unsigned long len;
+	enum ps3_mmio_page_size page_size;
+	unsigned long lpar_addr;
+};
+
+struct ps3_mmio_region_ops {
+	int (*create)(struct ps3_mmio_region *);
+	int (*free)(struct ps3_mmio_region *);
+};
+/**
+ * struct ps3_mmio_region_init - Helper to initialize structure variables
+ *
+ * Helper to properly initialize variables prior to calling
+ * ps3_system_bus_device_register.
+ */
+
+int ps3_mmio_region_init(struct ps3_system_bus_device *dev,
+	struct ps3_mmio_region *r, unsigned long bus_addr, unsigned long len,
+	enum ps3_mmio_page_size page_size);
+int ps3_mmio_region_create(struct ps3_mmio_region *r);
+int ps3_free_mmio_region(struct ps3_mmio_region *r);
+unsigned long ps3_mm_phys_to_lpar(unsigned long phys_addr);
+
+/* inrerrupt routines */
+
+enum ps3_cpu_binding {
+	PS3_BINDING_CPU_ANY = -1,
+	PS3_BINDING_CPU_0 = 0,
+	PS3_BINDING_CPU_1 = 1,
+};
+
+int ps3_irq_plug_setup(enum ps3_cpu_binding cpu, unsigned long outlet,
+	unsigned int *virq);
+int ps3_irq_plug_destroy(unsigned int virq);
+int ps3_event_receive_port_setup(enum ps3_cpu_binding cpu, unsigned int *virq);
+int ps3_event_receive_port_destroy(unsigned int virq);
+int ps3_send_event_locally(unsigned int virq);
+
+int ps3_io_irq_setup(enum ps3_cpu_binding cpu, unsigned int interrupt_id,
+	unsigned int *virq);
+int ps3_io_irq_destroy(unsigned int virq);
+int ps3_vuart_irq_setup(enum ps3_cpu_binding cpu, void* virt_addr_bmp,
+	unsigned int *virq);
+int ps3_vuart_irq_destroy(unsigned int virq);
+int ps3_spe_irq_setup(enum ps3_cpu_binding cpu, unsigned long spe_id,
+	unsigned int class, unsigned int *virq);
+int ps3_spe_irq_destroy(unsigned int virq);
+
+int ps3_sb_event_receive_port_setup(struct ps3_system_bus_device *dev,
+	enum ps3_cpu_binding cpu, unsigned int *virq);
+int ps3_sb_event_receive_port_destroy(struct ps3_system_bus_device *dev,
+	unsigned int virq);
+
+/* lv1 result codes */
+
+enum lv1_result {
+	LV1_SUCCESS                     = 0,
+	/* not used                       -1 */
+	LV1_RESOURCE_SHORTAGE           = -2,
+	LV1_NO_PRIVILEGE                = -3,
+	LV1_DENIED_BY_POLICY            = -4,
+	LV1_ACCESS_VIOLATION            = -5,
+	LV1_NO_ENTRY                    = -6,
+	LV1_DUPLICATE_ENTRY             = -7,
+	LV1_TYPE_MISMATCH               = -8,
+	LV1_BUSY                        = -9,
+	LV1_EMPTY                       = -10,
+	LV1_WRONG_STATE                 = -11,
+	/* not used                       -12 */
+	LV1_NO_MATCH                    = -13,
+	LV1_ALREADY_CONNECTED           = -14,
+	LV1_UNSUPPORTED_PARAMETER_VALUE = -15,
+	LV1_CONDITION_NOT_SATISFIED     = -16,
+	LV1_ILLEGAL_PARAMETER_VALUE     = -17,
+	LV1_BAD_OPTION                  = -18,
+	LV1_IMPLEMENTATION_LIMITATION   = -19,
+	LV1_NOT_IMPLEMENTED             = -20,
+	LV1_INVALID_CLASS_ID            = -21,
+	LV1_CONSTRAINT_NOT_SATISFIED    = -22,
+	LV1_ALIGNMENT_ERROR             = -23,
+	LV1_HARDWARE_ERROR              = -24,
+	LV1_INVALID_DATA_FORMAT         = -25,
+	LV1_INVALID_OPERATION           = -26,
+	LV1_INTERNAL_ERROR              = -32768,
+};
+
+static inline const char* ps3_result(int result)
+{
+#if defined(DEBUG) || defined(PS3_VERBOSE_RESULT) || defined(CONFIG_PS3_VERBOSE_RESULT)
+	switch (result) {
+	case LV1_SUCCESS:
+		return "LV1_SUCCESS (0)";
+	case -1:
+		return "** unknown result ** (-1)";
+	case LV1_RESOURCE_SHORTAGE:
+		return "LV1_RESOURCE_SHORTAGE (-2)";
+	case LV1_NO_PRIVILEGE:
+		return "LV1_NO_PRIVILEGE (-3)";
+	case LV1_DENIED_BY_POLICY:
+		return "LV1_DENIED_BY_POLICY (-4)";
+	case LV1_ACCESS_VIOLATION:
+		return "LV1_ACCESS_VIOLATION (-5)";
+	case LV1_NO_ENTRY:
+		return "LV1_NO_ENTRY (-6)";
+	case LV1_DUPLICATE_ENTRY:
+		return "LV1_DUPLICATE_ENTRY (-7)";
+	case LV1_TYPE_MISMATCH:
+		return "LV1_TYPE_MISMATCH (-8)";
+	case LV1_BUSY:
+		return "LV1_BUSY (-9)";
+	case LV1_EMPTY:
+		return "LV1_EMPTY (-10)";
+	case LV1_WRONG_STATE:
+		return "LV1_WRONG_STATE (-11)";
+	case -12:
+		return "** unknown result ** (-12)";
+	case LV1_NO_MATCH:
+		return "LV1_NO_MATCH (-13)";
+	case LV1_ALREADY_CONNECTED:
+		return "LV1_ALREADY_CONNECTED (-14)";
+	case LV1_UNSUPPORTED_PARAMETER_VALUE:
+		return "LV1_UNSUPPORTED_PARAMETER_VALUE (-15)";
+	case LV1_CONDITION_NOT_SATISFIED:
+		return "LV1_CONDITION_NOT_SATISFIED (-16)";
+	case LV1_ILLEGAL_PARAMETER_VALUE:
+		return "LV1_ILLEGAL_PARAMETER_VALUE (-17)";
+	case LV1_BAD_OPTION:
+		return "LV1_BAD_OPTION (-18)";
+	case LV1_IMPLEMENTATION_LIMITATION:
+		return "LV1_IMPLEMENTATION_LIMITATION (-19)";
+	case LV1_NOT_IMPLEMENTED:
+		return "LV1_NOT_IMPLEMENTED (-20)";
+	case LV1_INVALID_CLASS_ID:
+		return "LV1_INVALID_CLASS_ID (-21)";
+	case LV1_CONSTRAINT_NOT_SATISFIED:
+		return "LV1_CONSTRAINT_NOT_SATISFIED (-22)";
+	case LV1_ALIGNMENT_ERROR:
+		return "LV1_ALIGNMENT_ERROR (-23)";
+	case LV1_HARDWARE_ERROR:
+		return "LV1_HARDWARE_ERROR (-24)";
+	case LV1_INVALID_DATA_FORMAT:
+		return "LV1_INVALID_DATA_FORMAT (-25)";
+	case LV1_INVALID_OPERATION:
+		return "LV1_INVALID_OPERATION (-26)";
+	case LV1_INTERNAL_ERROR:
+		return "LV1_INTERNAL_ERROR (-32768)";
+	default:
+		BUG();
+		return "** unknown result **";
+	};
+#else
+	return "";
+#endif
+}
+
+/* system bus routines */
+
+enum ps3_match_id {
+	PS3_MATCH_ID_EHCI		= 1,
+	PS3_MATCH_ID_OHCI		= 2,
+	PS3_MATCH_ID_GELIC		= 3,
+	PS3_MATCH_ID_AV_SETTINGS	= 4,
+	PS3_MATCH_ID_SYSTEM_MANAGER	= 5,
+	PS3_MATCH_ID_STOR_DISK		= 6,
+	PS3_MATCH_ID_STOR_ROM		= 7,
+	PS3_MATCH_ID_STOR_FLASH		= 8,
+	PS3_MATCH_ID_SOUND		= 9,
+	PS3_MATCH_ID_GPU		= 10,
+	PS3_MATCH_ID_LPM		= 11,
+};
+
+enum ps3_match_sub_id {
+	PS3_MATCH_SUB_ID_GPU_FB		= 1,
+	PS3_MATCH_SUB_ID_GPU_RAMDISK	= 2,
+};
+
+#define PS3_MODULE_ALIAS_EHCI		"ps3:1:0"
+#define PS3_MODULE_ALIAS_OHCI		"ps3:2:0"
+#define PS3_MODULE_ALIAS_GELIC		"ps3:3:0"
+#define PS3_MODULE_ALIAS_AV_SETTINGS	"ps3:4:0"
+#define PS3_MODULE_ALIAS_SYSTEM_MANAGER	"ps3:5:0"
+#define PS3_MODULE_ALIAS_STOR_DISK	"ps3:6:0"
+#define PS3_MODULE_ALIAS_STOR_ROM	"ps3:7:0"
+#define PS3_MODULE_ALIAS_STOR_FLASH	"ps3:8:0"
+#define PS3_MODULE_ALIAS_SOUND		"ps3:9:0"
+#define PS3_MODULE_ALIAS_GPU_FB		"ps3:10:1"
+#define PS3_MODULE_ALIAS_GPU_RAMDISK	"ps3:10:2"
+#define PS3_MODULE_ALIAS_LPM		"ps3:11:0"
+
+enum ps3_system_bus_device_type {
+	PS3_DEVICE_TYPE_IOC0 = 1,
+	PS3_DEVICE_TYPE_SB,
+	PS3_DEVICE_TYPE_VUART,
+	PS3_DEVICE_TYPE_LPM,
+};
+
+/**
+ * struct ps3_system_bus_device - a device on the system bus
+ */
+
+struct ps3_system_bus_device {
+	enum ps3_match_id match_id;
+	enum ps3_match_sub_id match_sub_id;
+	enum ps3_system_bus_device_type dev_type;
+
+	u64 bus_id;                       /* SB */
+	u64 dev_id;                       /* SB */
+	unsigned int interrupt_id;        /* SB */
+	struct ps3_dma_region *d_region;  /* SB, IOC0 */
+	struct ps3_mmio_region *m_region; /* SB, IOC0*/
+	unsigned int port_number;         /* VUART */
+	struct {                          /* LPM */
+		u64 node_id;
+		u64 pu_id;
+		u64 rights;
+	} lpm;
+
+/*	struct iommu_table *iommu_table; -- waiting for BenH's cleanups */
+	struct device core;
+	void *driver_priv; /* private driver variables */
+};
+
+int ps3_open_hv_device(struct ps3_system_bus_device *dev);
+int ps3_close_hv_device(struct ps3_system_bus_device *dev);
+
+/**
+ * struct ps3_system_bus_driver - a driver for a device on the system bus
+ */
+
+struct ps3_system_bus_driver {
+	enum ps3_match_id match_id;
+	enum ps3_match_sub_id match_sub_id;
+	struct device_driver core;
+	int (*probe)(struct ps3_system_bus_device *);
+	void (*remove)(struct ps3_system_bus_device *);
+	void (*shutdown)(struct ps3_system_bus_device *);
+/*	int (*suspend)(struct ps3_system_bus_device *, pm_message_t); */
+/*	int (*resume)(struct ps3_system_bus_device *); */
+};
+
+int ps3_system_bus_device_register(struct ps3_system_bus_device *dev);
+int ps3_system_bus_driver_register(struct ps3_system_bus_driver *drv);
+void ps3_system_bus_driver_unregister(struct ps3_system_bus_driver *drv);
+
+static inline struct ps3_system_bus_driver *ps3_drv_to_system_bus_drv(
+	struct device_driver *_drv)
+{
+	return container_of(_drv, struct ps3_system_bus_driver, core);
+}
+static inline struct ps3_system_bus_device *ps3_dev_to_system_bus_dev(
+	const struct device *_dev)
+{
+	return container_of(_dev, struct ps3_system_bus_device, core);
+}
+static inline struct ps3_system_bus_driver *
+	ps3_system_bus_dev_to_system_bus_drv(struct ps3_system_bus_device *_dev)
+{
+	BUG_ON(!_dev);
+	BUG_ON(!_dev->core.driver);
+	return ps3_drv_to_system_bus_drv(_dev->core.driver);
+}
+
+/**
+ * ps3_system_bus_set_drvdata -
+ * @dev: device structure
+ * @data: Data to set
+ */
+
+static inline void ps3_system_bus_set_drvdata(
+	struct ps3_system_bus_device *dev, void *data)
+{
+	dev_set_drvdata(&dev->core, data);
+}
+static inline void *ps3_system_bus_get_drvdata(
+	struct ps3_system_bus_device *dev)
+{
+	return dev_get_drvdata(&dev->core);
+}
+
+/* system manager */
+
+struct ps3_sys_manager_ops {
+	struct ps3_system_bus_device *dev;
+	void (*power_off)(struct ps3_system_bus_device *dev);
+	void (*restart)(struct ps3_system_bus_device *dev);
+};
+
+void ps3_sys_manager_register_ops(const struct ps3_sys_manager_ops *ops);
+void __noreturn ps3_sys_manager_power_off(void);
+void __noreturn ps3_sys_manager_restart(void);
+void __noreturn ps3_sys_manager_halt(void);
+int ps3_sys_manager_get_wol(void);
+void ps3_sys_manager_set_wol(int state);
+
+struct ps3_prealloc {
+    const char *name;
+    void *address;
+    unsigned long size;
+    unsigned long align;
+};
+
+extern struct ps3_prealloc ps3fb_videomemory;
+extern struct ps3_prealloc ps3flash_bounce_buffer;
+
+/* logical performance monitor */
+
+/**
+ * enum ps3_lpm_rights - Rigths granted by the system policy module.
+ *
+ * @PS3_LPM_RIGHTS_USE_LPM: The right to use the lpm.
+ * @PS3_LPM_RIGHTS_USE_TB: The right to use the internal trace buffer.
+ */
+
+enum ps3_lpm_rights {
+	PS3_LPM_RIGHTS_USE_LPM = 0x001,
+	PS3_LPM_RIGHTS_USE_TB = 0x100,
+};
+
+/**
+ * enum ps3_lpm_tb_type - Type of trace buffer lv1 should use.
+ *
+ * @PS3_LPM_TB_TYPE_NONE: Do not use a trace buffer.
+ * @PS3_LPM_RIGHTS_USE_TB: Use the lv1 internal trace buffer.  Must have
+ *  rights @PS3_LPM_RIGHTS_USE_TB.
+ */
+
+enum ps3_lpm_tb_type {
+	PS3_LPM_TB_TYPE_NONE = 0,
+	PS3_LPM_TB_TYPE_INTERNAL = 1,
+};
+
+int ps3_lpm_open(enum ps3_lpm_tb_type tb_type, void *tb_cache,
+	u64 tb_cache_size);
+int ps3_lpm_close(void);
+int ps3_lpm_copy_tb(unsigned long offset, void *buf, unsigned long count,
+	unsigned long *bytes_copied);
+int ps3_lpm_copy_tb_to_user(unsigned long offset, void __user *buf,
+	unsigned long count, unsigned long *bytes_copied);
+void ps3_set_bookmark(u64 bookmark);
+void ps3_set_pm_bookmark(u64 tag, u64 incident, u64 th_id);
+int ps3_set_signal(u64 rtas_signal_group, u8 signal_bit, u16 sub_unit,
+	u8 bus_word);
+
+u32 ps3_read_phys_ctr(u32 cpu, u32 phys_ctr);
+void ps3_write_phys_ctr(u32 cpu, u32 phys_ctr, u32 val);
+u32 ps3_read_ctr(u32 cpu, u32 ctr);
+void ps3_write_ctr(u32 cpu, u32 ctr, u32 val);
+
+u32 ps3_read_pm07_control(u32 cpu, u32 ctr);
+void ps3_write_pm07_control(u32 cpu, u32 ctr, u32 val);
+u32 ps3_read_pm(u32 cpu, enum pm_reg_name reg);
+void ps3_write_pm(u32 cpu, enum pm_reg_name reg, u32 val);
+
+u32 ps3_get_ctr_size(u32 cpu, u32 phys_ctr);
+void ps3_set_ctr_size(u32 cpu, u32 phys_ctr, u32 ctr_size);
+
+void ps3_enable_pm(u32 cpu);
+void ps3_disable_pm(u32 cpu);
+void ps3_enable_pm_interrupts(u32 cpu, u32 thread, u32 mask);
+void ps3_disable_pm_interrupts(u32 cpu);
+
+u32 ps3_get_and_clear_pm_interrupts(u32 cpu);
+void ps3_sync_irq(int node);
+u32 ps3_get_hw_thread_id(int cpu);
+u64 ps3_get_spe_id(void *arg);
+
+void ps3_early_mm_init(void);
+
+#endif
diff --git a/arch/powerpc/include/asm/ps3av.h b/arch/powerpc/include/asm/ps3av.h
new file mode 100644
index 0000000000..c8b0f2ffcd
--- /dev/null
+++ b/arch/powerpc/include/asm/ps3av.h
@@ -0,0 +1,729 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  PS3 AV backend support.
+ *
+ *  Copyright (C) 2007 Sony Computer Entertainment Inc.
+ *  Copyright 2007 Sony Corp.
+ */
+
+#ifndef _ASM_POWERPC_PS3AV_H_
+#define _ASM_POWERPC_PS3AV_H_
+
+/** command for ioctl() **/
+#define PS3AV_VERSION 0x205	/* version of ps3av command */
+
+#define PS3AV_CID_AV_INIT              0x00000001
+#define PS3AV_CID_AV_FIN               0x00000002
+#define PS3AV_CID_AV_GET_HW_CONF       0x00000003
+#define PS3AV_CID_AV_GET_MONITOR_INFO  0x00000004
+#define PS3AV_CID_AV_ENABLE_EVENT      0x00000006
+#define PS3AV_CID_AV_DISABLE_EVENT     0x00000007
+#define PS3AV_CID_AV_TV_MUTE           0x0000000a
+
+#define PS3AV_CID_AV_VIDEO_CS          0x00010001
+#define PS3AV_CID_AV_VIDEO_MUTE        0x00010002
+#define PS3AV_CID_AV_VIDEO_DISABLE_SIG 0x00010003
+#define PS3AV_CID_AV_AUDIO_PARAM       0x00020001
+#define PS3AV_CID_AV_AUDIO_MUTE        0x00020002
+#define PS3AV_CID_AV_HDMI_MODE         0x00040001
+
+#define PS3AV_CID_VIDEO_INIT           0x01000001
+#define PS3AV_CID_VIDEO_MODE           0x01000002
+#define PS3AV_CID_VIDEO_FORMAT         0x01000004
+#define PS3AV_CID_VIDEO_PITCH          0x01000005
+
+#define PS3AV_CID_AUDIO_INIT           0x02000001
+#define PS3AV_CID_AUDIO_MODE           0x02000002
+#define PS3AV_CID_AUDIO_MUTE           0x02000003
+#define PS3AV_CID_AUDIO_ACTIVE         0x02000004
+#define PS3AV_CID_AUDIO_INACTIVE       0x02000005
+#define PS3AV_CID_AUDIO_SPDIF_BIT      0x02000006
+#define PS3AV_CID_AUDIO_CTRL           0x02000007
+
+#define PS3AV_CID_EVENT_UNPLUGGED      0x10000001
+#define PS3AV_CID_EVENT_PLUGGED        0x10000002
+#define PS3AV_CID_EVENT_HDCP_DONE      0x10000003
+#define PS3AV_CID_EVENT_HDCP_FAIL      0x10000004
+#define PS3AV_CID_EVENT_HDCP_AUTH      0x10000005
+#define PS3AV_CID_EVENT_HDCP_ERROR     0x10000006
+
+#define PS3AV_CID_AVB_PARAM            0x04000001
+
+/* max backend ports */
+#define PS3AV_HDMI_MAX                 2	/* HDMI_0 HDMI_1 */
+#define PS3AV_AVMULTI_MAX              1	/* AVMULTI_0 */
+#define PS3AV_AV_PORT_MAX              (PS3AV_HDMI_MAX + PS3AV_AVMULTI_MAX)
+#define PS3AV_OPT_PORT_MAX             1	/* SPDIF0 */
+#define PS3AV_HEAD_MAX                 2	/* HEAD_A HEAD_B */
+
+/* num of pkt for PS3AV_CID_AVB_PARAM */
+#define PS3AV_AVB_NUM_VIDEO            PS3AV_HEAD_MAX
+#define PS3AV_AVB_NUM_AUDIO            0	/* not supported */
+#define PS3AV_AVB_NUM_AV_VIDEO         PS3AV_AV_PORT_MAX
+#define PS3AV_AVB_NUM_AV_AUDIO         PS3AV_HDMI_MAX
+
+#define PS3AV_MUTE_PORT_MAX            1	/* num of ports in mute pkt */
+
+/* event_bit */
+#define PS3AV_CMD_EVENT_BIT_UNPLUGGED			(1 << 0)
+#define PS3AV_CMD_EVENT_BIT_PLUGGED			(1 << 1)
+#define PS3AV_CMD_EVENT_BIT_HDCP_DONE			(1 << 2)
+#define PS3AV_CMD_EVENT_BIT_HDCP_FAIL			(1 << 3)
+#define PS3AV_CMD_EVENT_BIT_HDCP_REAUTH			(1 << 4)
+#define PS3AV_CMD_EVENT_BIT_HDCP_TOPOLOGY		(1 << 5)
+
+/* common params */
+/* mute */
+#define PS3AV_CMD_MUTE_OFF				0x0000
+#define PS3AV_CMD_MUTE_ON				0x0001
+/* avport */
+#define PS3AV_CMD_AVPORT_HDMI_0				0x0000
+#define PS3AV_CMD_AVPORT_HDMI_1				0x0001
+#define PS3AV_CMD_AVPORT_AVMULTI_0			0x0010
+#define PS3AV_CMD_AVPORT_SPDIF_0			0x0020
+#define PS3AV_CMD_AVPORT_SPDIF_1			0x0021
+
+/* for av backend */
+/* av_mclk */
+#define PS3AV_CMD_AV_MCLK_128				0x0000
+#define PS3AV_CMD_AV_MCLK_256				0x0001
+#define PS3AV_CMD_AV_MCLK_512				0x0003
+/* av_inputlen */
+#define PS3AV_CMD_AV_INPUTLEN_16			0x02
+#define PS3AV_CMD_AV_INPUTLEN_20			0x0a
+#define PS3AV_CMD_AV_INPUTLEN_24			0x0b
+/* av_layout */
+#define PS3AV_CMD_AV_LAYOUT_32				(1 << 0)
+#define PS3AV_CMD_AV_LAYOUT_44				(1 << 1)
+#define PS3AV_CMD_AV_LAYOUT_48				(1 << 2)
+#define PS3AV_CMD_AV_LAYOUT_88				(1 << 3)
+#define PS3AV_CMD_AV_LAYOUT_96				(1 << 4)
+#define PS3AV_CMD_AV_LAYOUT_176				(1 << 5)
+#define PS3AV_CMD_AV_LAYOUT_192				(1 << 6)
+/* hdmi_mode */
+#define PS3AV_CMD_AV_HDMI_MODE_NORMAL			0xff
+#define PS3AV_CMD_AV_HDMI_HDCP_OFF			0x01
+#define PS3AV_CMD_AV_HDMI_EDID_PASS			0x80
+#define PS3AV_CMD_AV_HDMI_DVI				0x40
+
+/* for video module */
+/* video_head */
+#define PS3AV_CMD_VIDEO_HEAD_A				0x0000
+#define PS3AV_CMD_VIDEO_HEAD_B				0x0001
+/* video_cs_out video_cs_in */
+#define PS3AV_CMD_VIDEO_CS_NONE				0x0000
+#define PS3AV_CMD_VIDEO_CS_RGB_8			0x0001
+#define PS3AV_CMD_VIDEO_CS_YUV444_8			0x0002
+#define PS3AV_CMD_VIDEO_CS_YUV422_8			0x0003
+#define PS3AV_CMD_VIDEO_CS_XVYCC_8			0x0004
+#define PS3AV_CMD_VIDEO_CS_RGB_10			0x0005
+#define PS3AV_CMD_VIDEO_CS_YUV444_10			0x0006
+#define PS3AV_CMD_VIDEO_CS_YUV422_10			0x0007
+#define PS3AV_CMD_VIDEO_CS_XVYCC_10			0x0008
+#define PS3AV_CMD_VIDEO_CS_RGB_12			0x0009
+#define PS3AV_CMD_VIDEO_CS_YUV444_12			0x000a
+#define PS3AV_CMD_VIDEO_CS_YUV422_12			0x000b
+#define PS3AV_CMD_VIDEO_CS_XVYCC_12			0x000c
+/* video_vid */
+#define PS3AV_CMD_VIDEO_VID_NONE			0x0000
+#define PS3AV_CMD_VIDEO_VID_480I			0x0001
+#define PS3AV_CMD_VIDEO_VID_576I			0x0003
+#define PS3AV_CMD_VIDEO_VID_480P			0x0005
+#define PS3AV_CMD_VIDEO_VID_576P			0x0006
+#define PS3AV_CMD_VIDEO_VID_1080I_60HZ			0x0007
+#define PS3AV_CMD_VIDEO_VID_1080I_50HZ			0x0008
+#define PS3AV_CMD_VIDEO_VID_720P_60HZ			0x0009
+#define PS3AV_CMD_VIDEO_VID_720P_50HZ			0x000a
+#define PS3AV_CMD_VIDEO_VID_1080P_60HZ			0x000b
+#define PS3AV_CMD_VIDEO_VID_1080P_50HZ			0x000c
+#define PS3AV_CMD_VIDEO_VID_WXGA			0x000d
+#define PS3AV_CMD_VIDEO_VID_SXGA			0x000e
+#define PS3AV_CMD_VIDEO_VID_WUXGA			0x000f
+#define PS3AV_CMD_VIDEO_VID_480I_A			0x0010
+/* video_format */
+#define PS3AV_CMD_VIDEO_FORMAT_BLACK			0x0000
+#define PS3AV_CMD_VIDEO_FORMAT_ARGB_8BIT		0x0007
+/* video_order */
+#define PS3AV_CMD_VIDEO_ORDER_RGB			0x0000
+#define PS3AV_CMD_VIDEO_ORDER_BGR			0x0001
+/* video_fmt */
+#define PS3AV_CMD_VIDEO_FMT_X8R8G8B8			0x0000
+/* video_out_format */
+#define PS3AV_CMD_VIDEO_OUT_FORMAT_RGB_12BIT		0x0000
+/* video_cl_cnv */
+#define PS3AV_CMD_VIDEO_CL_CNV_ENABLE_LUT		0x0000
+#define PS3AV_CMD_VIDEO_CL_CNV_DISABLE_LUT		0x0010
+/* video_sync */
+#define PS3AV_CMD_VIDEO_SYNC_VSYNC			0x0001
+#define PS3AV_CMD_VIDEO_SYNC_CSYNC			0x0004
+#define PS3AV_CMD_VIDEO_SYNC_HSYNC			0x0010
+
+/* for audio module */
+/* num_of_ch */
+#define PS3AV_CMD_AUDIO_NUM_OF_CH_2			0x0000
+#define PS3AV_CMD_AUDIO_NUM_OF_CH_3			0x0001
+#define PS3AV_CMD_AUDIO_NUM_OF_CH_4			0x0002
+#define PS3AV_CMD_AUDIO_NUM_OF_CH_5			0x0003
+#define PS3AV_CMD_AUDIO_NUM_OF_CH_6			0x0004
+#define PS3AV_CMD_AUDIO_NUM_OF_CH_7			0x0005
+#define PS3AV_CMD_AUDIO_NUM_OF_CH_8			0x0006
+/* audio_fs */
+#define PS3AV_CMD_AUDIO_FS_32K				0x0001
+#define PS3AV_CMD_AUDIO_FS_44K				0x0002
+#define PS3AV_CMD_AUDIO_FS_48K				0x0003
+#define PS3AV_CMD_AUDIO_FS_88K				0x0004
+#define PS3AV_CMD_AUDIO_FS_96K				0x0005
+#define PS3AV_CMD_AUDIO_FS_176K				0x0006
+#define PS3AV_CMD_AUDIO_FS_192K				0x0007
+/* audio_word_bits */
+#define PS3AV_CMD_AUDIO_WORD_BITS_16			0x0001
+#define PS3AV_CMD_AUDIO_WORD_BITS_20			0x0002
+#define PS3AV_CMD_AUDIO_WORD_BITS_24			0x0003
+/* audio_format */
+#define PS3AV_CMD_AUDIO_FORMAT_PCM			0x0001
+#define PS3AV_CMD_AUDIO_FORMAT_BITSTREAM		0x00ff
+/* audio_source */
+#define PS3AV_CMD_AUDIO_SOURCE_SERIAL			0x0000
+#define PS3AV_CMD_AUDIO_SOURCE_SPDIF			0x0001
+/* audio_swap */
+#define PS3AV_CMD_AUDIO_SWAP_0				0x0000
+#define PS3AV_CMD_AUDIO_SWAP_1				0x0000
+/* audio_map */
+#define PS3AV_CMD_AUDIO_MAP_OUTPUT_0			0x0000
+#define PS3AV_CMD_AUDIO_MAP_OUTPUT_1			0x0001
+#define PS3AV_CMD_AUDIO_MAP_OUTPUT_2			0x0002
+#define PS3AV_CMD_AUDIO_MAP_OUTPUT_3			0x0003
+/* audio_layout */
+#define PS3AV_CMD_AUDIO_LAYOUT_2CH			0x0000
+#define PS3AV_CMD_AUDIO_LAYOUT_6CH			0x000b	/* LREClr */
+#define PS3AV_CMD_AUDIO_LAYOUT_8CH			0x001f	/* LREClrXY */
+/* audio_downmix */
+#define PS3AV_CMD_AUDIO_DOWNMIX_PERMITTED		0x0000
+#define PS3AV_CMD_AUDIO_DOWNMIX_PROHIBITED		0x0001
+
+/* audio_port */
+#define PS3AV_CMD_AUDIO_PORT_HDMI_0			( 1 << 0 )
+#define PS3AV_CMD_AUDIO_PORT_HDMI_1			( 1 << 1 )
+#define PS3AV_CMD_AUDIO_PORT_AVMULTI_0			( 1 << 10 )
+#define PS3AV_CMD_AUDIO_PORT_SPDIF_0			( 1 << 20 )
+#define PS3AV_CMD_AUDIO_PORT_SPDIF_1			( 1 << 21 )
+
+/* audio_ctrl_id */
+#define PS3AV_CMD_AUDIO_CTRL_ID_DAC_RESET		0x0000
+#define PS3AV_CMD_AUDIO_CTRL_ID_DAC_DE_EMPHASIS		0x0001
+#define PS3AV_CMD_AUDIO_CTRL_ID_AVCLK			0x0002
+/* audio_ctrl_data[0] reset */
+#define PS3AV_CMD_AUDIO_CTRL_RESET_NEGATE		0x0000
+#define PS3AV_CMD_AUDIO_CTRL_RESET_ASSERT		0x0001
+/* audio_ctrl_data[0] de-emphasis */
+#define PS3AV_CMD_AUDIO_CTRL_DE_EMPHASIS_OFF		0x0000
+#define PS3AV_CMD_AUDIO_CTRL_DE_EMPHASIS_ON		0x0001
+/* audio_ctrl_data[0] avclk */
+#define PS3AV_CMD_AUDIO_CTRL_AVCLK_22			0x0000
+#define PS3AV_CMD_AUDIO_CTRL_AVCLK_18			0x0001
+
+/* av_vid */
+/* do not use these params directly, use vid_video2av */
+#define PS3AV_CMD_AV_VID_480I				0x0000
+#define PS3AV_CMD_AV_VID_480P				0x0001
+#define PS3AV_CMD_AV_VID_720P_60HZ			0x0002
+#define PS3AV_CMD_AV_VID_1080I_60HZ			0x0003
+#define PS3AV_CMD_AV_VID_1080P_60HZ			0x0004
+#define PS3AV_CMD_AV_VID_576I				0x0005
+#define PS3AV_CMD_AV_VID_576P				0x0006
+#define PS3AV_CMD_AV_VID_720P_50HZ			0x0007
+#define PS3AV_CMD_AV_VID_1080I_50HZ			0x0008
+#define PS3AV_CMD_AV_VID_1080P_50HZ			0x0009
+#define PS3AV_CMD_AV_VID_WXGA				0x000a
+#define PS3AV_CMD_AV_VID_SXGA				0x000b
+#define PS3AV_CMD_AV_VID_WUXGA				0x000c
+/* av_cs_out av_cs_in */
+/* use cs_video2av() */
+#define PS3AV_CMD_AV_CS_RGB_8				0x0000
+#define PS3AV_CMD_AV_CS_YUV444_8			0x0001
+#define PS3AV_CMD_AV_CS_YUV422_8			0x0002
+#define PS3AV_CMD_AV_CS_XVYCC_8				0x0003
+#define PS3AV_CMD_AV_CS_RGB_10				0x0004
+#define PS3AV_CMD_AV_CS_YUV444_10			0x0005
+#define PS3AV_CMD_AV_CS_YUV422_10			0x0006
+#define PS3AV_CMD_AV_CS_XVYCC_10			0x0007
+#define PS3AV_CMD_AV_CS_RGB_12				0x0008
+#define PS3AV_CMD_AV_CS_YUV444_12			0x0009
+#define PS3AV_CMD_AV_CS_YUV422_12			0x000a
+#define PS3AV_CMD_AV_CS_XVYCC_12			0x000b
+#define PS3AV_CMD_AV_CS_8				0x0000
+#define PS3AV_CMD_AV_CS_10				0x0001
+#define PS3AV_CMD_AV_CS_12				0x0002
+/* dither */
+#define PS3AV_CMD_AV_DITHER_OFF				0x0000
+#define PS3AV_CMD_AV_DITHER_ON				0x0001
+#define PS3AV_CMD_AV_DITHER_8BIT			0x0000
+#define PS3AV_CMD_AV_DITHER_10BIT			0x0002
+#define PS3AV_CMD_AV_DITHER_12BIT			0x0004
+/* super_white */
+#define PS3AV_CMD_AV_SUPER_WHITE_OFF			0x0000
+#define PS3AV_CMD_AV_SUPER_WHITE_ON			0x0001
+/* aspect */
+#define PS3AV_CMD_AV_ASPECT_16_9			0x0000
+#define PS3AV_CMD_AV_ASPECT_4_3				0x0001
+/* video_cs_cnv() */
+#define PS3AV_CMD_VIDEO_CS_RGB				0x0001
+#define PS3AV_CMD_VIDEO_CS_YUV422			0x0002
+#define PS3AV_CMD_VIDEO_CS_YUV444			0x0003
+
+/* for broadcast automode */
+#define PS3AV_RESBIT_720x480P			0x0003	/* 0x0001 | 0x0002 */
+#define PS3AV_RESBIT_720x576P			0x0003	/* 0x0001 | 0x0002 */
+#define PS3AV_RESBIT_1280x720P			0x0004
+#define PS3AV_RESBIT_1920x1080I			0x0008
+#define PS3AV_RESBIT_1920x1080P			0x4000
+#define PS3AV_RES_MASK_60			(PS3AV_RESBIT_720x480P \
+						| PS3AV_RESBIT_1280x720P \
+						| PS3AV_RESBIT_1920x1080I \
+						| PS3AV_RESBIT_1920x1080P)
+#define PS3AV_RES_MASK_50			(PS3AV_RESBIT_720x576P \
+						| PS3AV_RESBIT_1280x720P \
+						| PS3AV_RESBIT_1920x1080I \
+						| PS3AV_RESBIT_1920x1080P)
+
+/* for VESA automode */
+#define PS3AV_RESBIT_VGA			0x0001
+#define PS3AV_RESBIT_WXGA			0x0002
+#define PS3AV_RESBIT_SXGA			0x0004
+#define PS3AV_RESBIT_WUXGA			0x0008
+#define PS3AV_RES_MASK_VESA			(PS3AV_RESBIT_WXGA |\
+						 PS3AV_RESBIT_SXGA |\
+						 PS3AV_RESBIT_WUXGA)
+
+#define PS3AV_MONITOR_TYPE_HDMI			1	/* HDMI */
+#define PS3AV_MONITOR_TYPE_DVI			2	/* DVI */
+
+
+/* for video mode */
+enum ps3av_mode_num {
+	PS3AV_MODE_AUTO				= 0,
+	PS3AV_MODE_480I				= 1,
+	PS3AV_MODE_480P				= 2,
+	PS3AV_MODE_720P60			= 3,
+	PS3AV_MODE_1080I60			= 4,
+	PS3AV_MODE_1080P60			= 5,
+	PS3AV_MODE_576I				= 6,
+	PS3AV_MODE_576P				= 7,
+	PS3AV_MODE_720P50			= 8,
+	PS3AV_MODE_1080I50			= 9,
+	PS3AV_MODE_1080P50			= 10,
+	PS3AV_MODE_WXGA				= 11,
+	PS3AV_MODE_SXGA				= 12,
+	PS3AV_MODE_WUXGA			= 13,
+};
+
+#define PS3AV_MODE_MASK				0x000F
+#define PS3AV_MODE_HDCP_OFF			0x1000	/* Retail PS3 product doesn't support this */
+#define PS3AV_MODE_DITHER			0x0800
+#define PS3AV_MODE_COLOR			0x0400
+#define PS3AV_MODE_WHITE			0x0200
+#define PS3AV_MODE_FULL				0x0080
+#define PS3AV_MODE_DVI				0x0040
+#define PS3AV_MODE_RGB				0x0020
+
+
+#define PS3AV_DEFAULT_HDMI_MODE_ID_REG_60	PS3AV_MODE_480P
+#define PS3AV_DEFAULT_AVMULTI_MODE_ID_REG_60	PS3AV_MODE_480I
+#define PS3AV_DEFAULT_HDMI_MODE_ID_REG_50	PS3AV_MODE_576P
+#define PS3AV_DEFAULT_AVMULTI_MODE_ID_REG_50	PS3AV_MODE_576I
+
+#define PS3AV_REGION_60				0x01
+#define PS3AV_REGION_50				0x02
+#define PS3AV_REGION_RGB			0x10
+
+#define get_status(buf)				(((__u32 *)buf)[2])
+#define PS3AV_HDR_SIZE				4	/* version + size */
+
+
+/** command packet structure **/
+struct ps3av_send_hdr {
+	u16 version;
+	u16 size;		/* size of command packet */
+	u32 cid;		/* command id */
+};
+
+struct ps3av_reply_hdr {
+	u16 version;
+	u16 size;
+	u32 cid;
+	u32 status;
+};
+
+/* backend: initialization */
+struct ps3av_pkt_av_init {
+	struct ps3av_send_hdr send_hdr;
+	u32 event_bit;
+};
+
+/* backend: finalize */
+struct ps3av_pkt_av_fin {
+	struct ps3av_send_hdr send_hdr;
+	/* recv */
+	u32 reserved;
+};
+
+/* backend: get port */
+struct ps3av_pkt_av_get_hw_conf {
+	struct ps3av_send_hdr send_hdr;
+	/* recv */
+	u32 status;
+	u16 num_of_hdmi;	/* out: number of hdmi */
+	u16 num_of_avmulti;	/* out: number of avmulti */
+	u16 num_of_spdif;	/* out: number of hdmi */
+	u16 reserved;
+};
+
+/* backend: get monitor info */
+struct ps3av_info_resolution {
+	u32 res_bits;
+	u32 native;
+};
+
+struct ps3av_info_cs {
+	u8 rgb;
+	u8 yuv444;
+	u8 yuv422;
+	u8 reserved;
+};
+
+struct ps3av_info_color {
+	u16 red_x;
+	u16 red_y;
+	u16 green_x;
+	u16 green_y;
+	u16 blue_x;
+	u16 blue_y;
+	u16 white_x;
+	u16 white_y;
+	u32 gamma;
+};
+
+struct ps3av_info_audio {
+	u8 type;
+	u8 max_num_of_ch;
+	u8 fs;
+	u8 sbit;
+};
+
+struct ps3av_info_monitor {
+	u8 avport;
+	u8 monitor_id[10];
+	u8 monitor_type;
+	u8 monitor_name[16];
+	struct ps3av_info_resolution res_60;
+	struct ps3av_info_resolution res_50;
+	struct ps3av_info_resolution res_other;
+	struct ps3av_info_resolution res_vesa;
+	struct ps3av_info_cs cs;
+	struct ps3av_info_color color;
+	u8 supported_ai;
+	u8 speaker_info;
+	u8 num_of_audio_block;
+	struct ps3av_info_audio audio[0];	/* 0 or more audio blocks */
+	u8 reserved[169];
+} __attribute__ ((packed));
+
+struct ps3av_pkt_av_get_monitor_info {
+	struct ps3av_send_hdr send_hdr;
+	u16 avport;		/* in: avport */
+	u16 reserved;
+	/* recv */
+	struct ps3av_info_monitor info;	/* out: monitor info */
+};
+
+/* backend: enable/disable event */
+struct ps3av_pkt_av_event {
+	struct ps3av_send_hdr send_hdr;
+	u32 event_bit;		/* in */
+};
+
+/* backend: video cs param */
+struct ps3av_pkt_av_video_cs {
+	struct ps3av_send_hdr send_hdr;
+	u16 avport;		/* in: avport */
+	u16 av_vid;		/* in: video resolution */
+	u16 av_cs_out;		/* in: output color space */
+	u16 av_cs_in;		/* in: input color space */
+	u8 dither;		/* in: dither bit length */
+	u8 bitlen_out;		/* in: bit length */
+	u8 super_white;		/* in: super white */
+	u8 aspect;		/* in: aspect ratio */
+};
+
+/* backend: video mute */
+struct ps3av_av_mute {
+	u16 avport;		/* in: avport */
+	u16 mute;		/* in: mute on/off */
+};
+
+struct ps3av_pkt_av_video_mute {
+	struct ps3av_send_hdr send_hdr;
+	struct ps3av_av_mute mute[PS3AV_MUTE_PORT_MAX];
+};
+
+/* backend: video disable signal */
+struct ps3av_pkt_av_video_disable_sig {
+	struct ps3av_send_hdr send_hdr;
+	u16 avport;		/* in: avport */
+	u16 reserved;
+};
+
+/* backend: audio param */
+struct ps3av_audio_info_frame {
+	struct pb1_bit {
+		u8 ct:4;
+		u8 rsv:1;
+		u8 cc:3;
+	} pb1;
+	struct pb2_bit {
+		u8 rsv:3;
+		u8 sf:3;
+		u8 ss:2;
+	} pb2;
+	u8 pb3;
+	u8 pb4;
+	struct pb5_bit {
+		u8 dm:1;
+		u8 lsv:4;
+		u8 rsv:3;
+	} pb5;
+};
+
+struct ps3av_pkt_av_audio_param {
+	struct ps3av_send_hdr send_hdr;
+	u16 avport;		/* in: avport */
+	u16 reserved;
+	u8 mclk;		/* in: audio mclk */
+	u8 ns[3];		/* in: audio ns val */
+	u8 enable;		/* in: audio enable */
+	u8 swaplr;		/* in: audio swap */
+	u8 fifomap;		/* in: audio fifomap */
+	u8 inputctrl;		/* in: audio input ctrl */
+	u8 inputlen;		/* in: sample bit size */
+	u8 layout;		/* in: speaker layout param */
+	struct ps3av_audio_info_frame info;	/* in: info */
+	u8 chstat[5];		/* in: ch stat */
+};
+
+/* backend: audio_mute */
+struct ps3av_pkt_av_audio_mute {
+	struct ps3av_send_hdr send_hdr;
+	struct ps3av_av_mute mute[PS3AV_MUTE_PORT_MAX];
+};
+
+/* backend: hdmi_mode */
+struct ps3av_pkt_av_hdmi_mode {
+	struct ps3av_send_hdr send_hdr;
+	u8 mode;		/* in: hdmi_mode */
+	u8 reserved0;
+	u8 reserved1;
+	u8 reserved2;
+};
+
+/* backend: tv_mute */
+struct ps3av_pkt_av_tv_mute {
+	struct ps3av_send_hdr send_hdr;
+	u16 avport;		/* in: avport HDMI only */
+	u16 mute;		/* in: mute */
+};
+
+/* video: initialize */
+struct ps3av_pkt_video_init {
+	struct ps3av_send_hdr send_hdr;
+	/* recv */
+	u32 reserved;
+};
+
+/* video: mode setting */
+struct ps3av_pkt_video_mode {
+	struct ps3av_send_hdr send_hdr;
+	u32 video_head;		/* in: head */
+	u32 reserved;
+	u32 video_vid;		/* in: video resolution */
+	u16 reserved1;
+	u16 width;		/* in: width in pixel */
+	u16 reserved2;
+	u16 height;		/* in: height in pixel */
+	u32 pitch;		/* in: line size in byte */
+	u32 video_out_format;	/* in: out format */
+	u32 video_format;	/* in: input frame buffer format */
+	u8 reserved3;
+	u8 video_cl_cnv;	/* in: color conversion */
+	u16 video_order;	/* in: input RGB order */
+	u32 reserved4;
+};
+
+/* video: format */
+struct ps3av_pkt_video_format {
+	struct ps3av_send_hdr send_hdr;
+	u32 video_head;		/* in: head */
+	u32 video_format;	/* in: frame buffer format */
+	u8 reserved;
+	u8 video_cl_cnv;	/* in: color conversion */
+	u16 video_order;	/* in: input RGB order */
+};
+
+/* video: pitch */
+struct ps3av_pkt_video_pitch {
+	u16 version;
+	u16 size;		/* size of command packet */
+	u32 cid;		/* command id */
+	u32 video_head;		/* in: head */
+	u32 pitch;		/* in: line size in byte */
+};
+
+/* audio: initialize */
+struct ps3av_pkt_audio_init {
+	struct ps3av_send_hdr send_hdr;
+	/* recv */
+	u32 reserved;
+};
+
+/* audio: mode setting */
+struct ps3av_pkt_audio_mode {
+	struct ps3av_send_hdr send_hdr;
+	u8 avport;		/* in: avport */
+	u8 reserved0[3];
+	u32 mask;		/* in: mask */
+	u32 audio_num_of_ch;	/* in: number of ch */
+	u32 audio_fs;		/* in: sampling freq */
+	u32 audio_word_bits;	/* in: sample bit size */
+	u32 audio_format;	/* in: audio output format */
+	u32 audio_source;	/* in: audio source */
+	u8 audio_enable[4];	/* in: audio enable */
+	u8 audio_swap[4];	/* in: audio swap */
+	u8 audio_map[4];	/* in: audio map */
+	u32 audio_layout;	/* in: speaker layout */
+	u32 audio_downmix;	/* in: audio downmix permission */
+	u32 audio_downmix_level;
+	u8 audio_cs_info[8];	/* in: IEC channel status */
+};
+
+/* audio: mute */
+struct ps3av_audio_mute {
+	u8 avport;		/* in: opt_port optical */
+	u8 reserved[3];
+	u32 mute;		/* in: mute */
+};
+
+struct ps3av_pkt_audio_mute {
+	struct ps3av_send_hdr send_hdr;
+	struct ps3av_audio_mute mute[PS3AV_OPT_PORT_MAX];
+};
+
+/* audio: active/inactive */
+struct ps3av_pkt_audio_active {
+	struct ps3av_send_hdr send_hdr;
+	u32 audio_port;		/* in: audio active/inactive port */
+};
+
+/* audio: SPDIF user bit */
+struct ps3av_pkt_audio_spdif_bit {
+	u16 version;
+	u16 size;		/* size of command packet */
+	u32 cid;		/* command id */
+	u8 avport;		/* in: avport SPDIF only */
+	u8 reserved[3];
+	u32 audio_port;		/* in: SPDIF only */
+	u32 spdif_bit_data[12];	/* in: user bit data */
+};
+
+/* audio: audio control */
+struct ps3av_pkt_audio_ctrl {
+	u16 version;
+	u16 size;		/* size of command packet */
+	u32 cid;		/* command id */
+	u32 audio_ctrl_id;	/* in: control id */
+	u32 audio_ctrl_data[4];	/* in: control data */
+};
+
+/* avb:param */
+#define PS3AV_PKT_AVB_PARAM_MAX_BUF_SIZE	\
+	(PS3AV_AVB_NUM_VIDEO*sizeof(struct ps3av_pkt_video_mode) + \
+	 PS3AV_AVB_NUM_AUDIO*sizeof(struct ps3av_pkt_audio_mode) + \
+	 PS3AV_AVB_NUM_AV_VIDEO*sizeof(struct ps3av_pkt_av_video_cs) + \
+	 PS3AV_AVB_NUM_AV_AUDIO*sizeof(struct ps3av_pkt_av_audio_param))
+
+struct ps3av_pkt_avb_param {
+	struct ps3av_send_hdr send_hdr;
+	u16 num_of_video_pkt;
+	u16 num_of_audio_pkt;
+	u16 num_of_av_video_pkt;
+	u16 num_of_av_audio_pkt;
+	/*
+	 * The actual buffer layout depends on the fields above:
+	 *
+	 * struct ps3av_pkt_video_mode video[num_of_video_pkt];
+	 * struct ps3av_pkt_audio_mode audio[num_of_audio_pkt];
+	 * struct ps3av_pkt_av_video_cs av_video[num_of_av_video_pkt];
+	 * struct ps3av_pkt_av_audio_param av_audio[num_of_av_audio_pkt];
+	 */
+	u8 buf[PS3AV_PKT_AVB_PARAM_MAX_BUF_SIZE];
+};
+
+/* channel status */
+extern u8 ps3av_mode_cs_info[];
+
+/** command status **/
+#define PS3AV_STATUS_SUCCESS			0x0000	/* success */
+#define PS3AV_STATUS_RECEIVE_VUART_ERROR	0x0001	/* receive vuart error */
+#define PS3AV_STATUS_SYSCON_COMMUNICATE_FAIL	0x0002	/* syscon communication error */
+#define PS3AV_STATUS_INVALID_COMMAND		0x0003	/* obsolete invalid CID */
+#define PS3AV_STATUS_INVALID_PORT		0x0004	/* invalid port number */
+#define PS3AV_STATUS_INVALID_VID		0x0005	/* invalid video format */
+#define PS3AV_STATUS_INVALID_COLOR_SPACE	0x0006	/* invalid video colose space */
+#define PS3AV_STATUS_INVALID_FS			0x0007	/* invalid audio sampling freq */
+#define PS3AV_STATUS_INVALID_AUDIO_CH		0x0008	/* invalid audio channel number */
+#define PS3AV_STATUS_UNSUPPORTED_VERSION	0x0009	/* version mismatch  */
+#define PS3AV_STATUS_INVALID_SAMPLE_SIZE	0x000a	/* invalid audio sample bit size */
+#define PS3AV_STATUS_FAILURE			0x000b	/* other failures */
+#define PS3AV_STATUS_UNSUPPORTED_COMMAND	0x000c	/* unsupported cid */
+#define PS3AV_STATUS_BUFFER_OVERFLOW		0x000d	/* write buffer overflow */
+#define PS3AV_STATUS_INVALID_VIDEO_PARAM	0x000e	/* invalid video param */
+#define PS3AV_STATUS_NO_SEL			0x000f	/* not exist selector */
+#define PS3AV_STATUS_INVALID_AV_PARAM		0x0010	/* invalid backend param */
+#define PS3AV_STATUS_INVALID_AUDIO_PARAM	0x0011	/* invalid audio param */
+#define PS3AV_STATUS_UNSUPPORTED_HDMI_MODE	0x0012	/* unsupported hdmi mode */
+#define PS3AV_STATUS_NO_SYNC_HEAD		0x0013	/* sync head failed */
+
+extern void ps3av_set_hdr(u32, u16, struct ps3av_send_hdr *);
+extern int ps3av_do_pkt(u32, u16, size_t, struct ps3av_send_hdr *);
+
+extern int ps3av_cmd_init(void);
+extern int ps3av_cmd_fin(void);
+extern int ps3av_cmd_av_video_mute(int, u32 *, u32);
+extern int ps3av_cmd_av_video_disable_sig(u32);
+extern int ps3av_cmd_av_tv_mute(u32, u32);
+extern int ps3av_cmd_enable_event(void);
+extern int ps3av_cmd_av_hdmi_mode(u8);
+extern u32 ps3av_cmd_set_av_video_cs(void *, u32, int, int, int, u32);
+extern u32 ps3av_cmd_set_video_mode(void *, u32, int, int, u32);
+extern int ps3av_cmd_video_format_black(u32, u32, u32);
+extern int ps3av_cmd_av_audio_mute(int, u32 *, u32);
+extern u32 ps3av_cmd_set_av_audio_param(void *, u32,
+					const struct ps3av_pkt_audio_mode *,
+					u32);
+extern void ps3av_cmd_set_audio_mode(struct ps3av_pkt_audio_mode *, u32, u32,
+				     u32, u32, u32, u32);
+extern int ps3av_cmd_audio_mode(struct ps3av_pkt_audio_mode *);
+extern int ps3av_cmd_audio_mute(int, u32 *, u32);
+extern int ps3av_cmd_audio_active(int, u32);
+extern int ps3av_cmd_avb_param(struct ps3av_pkt_avb_param *, u32);
+extern int ps3av_cmd_av_get_hw_conf(struct ps3av_pkt_av_get_hw_conf *);
+extern int ps3av_cmd_video_get_monitor_info(struct ps3av_pkt_av_get_monitor_info *,
+					    u32);
+
+extern int ps3av_set_video_mode(int);
+extern int ps3av_set_audio_mode(u32, u32, u32, u32, u32);
+extern int ps3av_get_auto_mode(void);
+extern int ps3av_get_mode(void);
+extern int ps3av_video_mode2res(u32, u32 *, u32 *);
+extern int ps3av_video_mute(int);
+extern int ps3av_audio_mute(int);
+extern int ps3av_audio_mute_analog(int);
+#endif	/* _ASM_POWERPC_PS3AV_H_ */
diff --git a/arch/powerpc/include/asm/ps3gpu.h b/arch/powerpc/include/asm/ps3gpu.h
new file mode 100644
index 0000000000..9645c30471
--- /dev/null
+++ b/arch/powerpc/include/asm/ps3gpu.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  PS3 GPU declarations.
+ *
+ *  Copyright 2009 Sony Corporation
+ */
+
+#ifndef _ASM_POWERPC_PS3GPU_H
+#define _ASM_POWERPC_PS3GPU_H
+
+#include <linux/mutex.h>
+
+#include <asm/lv1call.h>
+
+
+#define L1GPU_CONTEXT_ATTRIBUTE_DISPLAY_SYNC	0x101
+#define L1GPU_CONTEXT_ATTRIBUTE_DISPLAY_FLIP	0x102
+
+#define L1GPU_CONTEXT_ATTRIBUTE_FB_SETUP	0x600
+#define L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT		0x601
+#define L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT_SYNC	0x602
+#define L1GPU_CONTEXT_ATTRIBUTE_FB_CLOSE	0x603
+
+#define L1GPU_FB_BLIT_WAIT_FOR_COMPLETION	(1ULL << 32)
+
+#define L1GPU_DISPLAY_SYNC_HSYNC		1
+#define L1GPU_DISPLAY_SYNC_VSYNC		2
+
+
+/* mutex synchronizing GPU accesses and video mode changes */
+extern struct mutex ps3_gpu_mutex;
+
+
+static inline int lv1_gpu_display_sync(u64 context_handle, u64 head,
+				       u64 ddr_offset)
+{
+	return lv1_gpu_context_attribute(context_handle,
+					 L1GPU_CONTEXT_ATTRIBUTE_DISPLAY_SYNC,
+					 head, ddr_offset, 0, 0);
+}
+
+static inline int lv1_gpu_display_flip(u64 context_handle, u64 head,
+				       u64 ddr_offset)
+{
+	return lv1_gpu_context_attribute(context_handle,
+					 L1GPU_CONTEXT_ATTRIBUTE_DISPLAY_FLIP,
+					 head, ddr_offset, 0, 0);
+}
+
+static inline int lv1_gpu_fb_setup(u64 context_handle, u64 xdr_lpar,
+				   u64 xdr_size, u64 ioif_offset)
+{
+	return lv1_gpu_context_attribute(context_handle,
+					 L1GPU_CONTEXT_ATTRIBUTE_FB_SETUP,
+					 xdr_lpar, xdr_size, ioif_offset, 0);
+}
+
+static inline int lv1_gpu_fb_blit(u64 context_handle, u64 ddr_offset,
+				  u64 ioif_offset, u64 sync_width, u64 pitch)
+{
+	return lv1_gpu_context_attribute(context_handle,
+					 L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT,
+					 ddr_offset, ioif_offset, sync_width,
+					 pitch);
+}
+
+static inline int lv1_gpu_fb_close(u64 context_handle)
+{
+	return lv1_gpu_context_attribute(context_handle,
+					 L1GPU_CONTEXT_ATTRIBUTE_FB_CLOSE, 0,
+					 0, 0, 0);
+}
+
+#endif /* _ASM_POWERPC_PS3GPU_H */
diff --git a/arch/powerpc/include/asm/ps3stor.h b/arch/powerpc/include/asm/ps3stor.h
new file mode 100644
index 0000000000..1d8279014f
--- /dev/null
+++ b/arch/powerpc/include/asm/ps3stor.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * PS3 Storage Devices
+ *
+ * Copyright (C) 2007 Sony Computer Entertainment Inc.
+ * Copyright 2007 Sony Corp.
+ */
+
+#ifndef _ASM_POWERPC_PS3STOR_H_
+#define _ASM_POWERPC_PS3STOR_H_
+
+#include <linux/interrupt.h>
+
+#include <asm/ps3.h>
+
+
+struct ps3_storage_region {
+	unsigned int id;
+	u64 start;
+	u64 size;
+};
+
+struct ps3_storage_device {
+	struct ps3_system_bus_device sbd;
+
+	struct ps3_dma_region dma_region;
+	unsigned int irq;
+	u64 blk_size;
+
+	u64 tag;
+	u64 lv1_status;
+	struct completion done;
+
+	unsigned long bounce_size;
+	void *bounce_buf;
+	u64 bounce_lpar;
+	dma_addr_t bounce_dma;
+
+	unsigned int num_regions;
+	unsigned long accessible_regions;
+	unsigned int region_idx;		/* first accessible region */
+	struct ps3_storage_region regions[];	/* Must be last */
+};
+
+static inline struct ps3_storage_device *to_ps3_storage_device(struct device *dev)
+{
+	return container_of(dev, struct ps3_storage_device, sbd.core);
+}
+
+extern int ps3stor_setup(struct ps3_storage_device *dev,
+			 irq_handler_t handler);
+extern void ps3stor_teardown(struct ps3_storage_device *dev);
+extern u64 ps3stor_read_write_sectors(struct ps3_storage_device *dev, u64 lpar,
+				      u64 start_sector, u64 sectors,
+				      int write);
+extern u64 ps3stor_send_command(struct ps3_storage_device *dev, u64 cmd,
+				u64 arg1, u64 arg2, u64 arg3, u64 arg4);
+
+#endif /* _ASM_POWERPC_PS3STOR_H_ */
diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h
new file mode 100644
index 0000000000..73c22c579a
--- /dev/null
+++ b/arch/powerpc/include/asm/pte-walk.h
@@ -0,0 +1,63 @@
+#ifndef _ASM_POWERPC_PTE_WALK_H
+#define _ASM_POWERPC_PTE_WALK_H
+
+#include <linux/sched.h>
+
+/* Don't use this directly */
+extern pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
+			       bool *is_thp, unsigned *hshift);
+
+static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea,
+				    bool *is_thp, unsigned *hshift)
+{
+	pte_t *pte;
+
+	VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__);
+	pte = __find_linux_pte(pgdir, ea, is_thp, hshift);
+
+#if defined(CONFIG_DEBUG_VM) &&						\
+	!(defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE))
+	/*
+	 * We should not find huge page if these configs are not enabled.
+	 */
+	if (hshift)
+		WARN_ON(*hshift);
+#endif
+	return pte;
+}
+
+static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift)
+{
+	pgd_t *pgdir = init_mm.pgd;
+	return __find_linux_pte(pgdir, ea, NULL, hshift);
+}
+
+/*
+ * Convert a kernel vmap virtual address (vmalloc or ioremap space) to a
+ * physical address, without taking locks. This can be used in real-mode.
+ */
+static inline phys_addr_t ppc_find_vmap_phys(unsigned long addr)
+{
+	pte_t *ptep;
+	phys_addr_t pa;
+	int hugepage_shift;
+
+	/*
+	 * init_mm does not free page tables, and does not do THP. It may
+	 * have huge pages from huge vmalloc / ioremap etc.
+	 */
+	ptep = find_init_mm_pte(addr, &hugepage_shift);
+	if (WARN_ON(!ptep))
+		return 0;
+
+	pa = PFN_PHYS(pte_pfn(*ptep));
+
+	if (!hugepage_shift)
+		hugepage_shift = PAGE_SHIFT;
+
+	pa |= addr & ((1ul << hugepage_shift) - 1);
+
+	return pa;
+}
+
+#endif /* _ASM_POWERPC_PTE_WALK_H */
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
new file mode 100644
index 0000000000..9db8b16567
--- /dev/null
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -0,0 +1,412 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001 PPC64 Team, IBM Corp
+ *
+ * This struct defines the way the registers are stored on the
+ * kernel stack during a system call or other kernel entry.
+ *
+ * this should only contain volatile regs
+ * since we can keep non-volatile in the thread_struct
+ * should set this up when only volatiles are saved
+ * by intr code.
+ *
+ * Since this is going on the stack, *CARE MUST BE TAKEN* to insure
+ * that the overall structure is a multiple of 16 bytes in length.
+ *
+ * Note that the offsets of the fields in this struct correspond with
+ * the PT_* values below.  This simplifies arch/powerpc/kernel/ptrace.c.
+ */
+#ifndef _ASM_POWERPC_PTRACE_H
+#define _ASM_POWERPC_PTRACE_H
+
+#include <linux/err.h>
+#include <uapi/asm/ptrace.h>
+#include <asm/asm-const.h>
+#include <asm/reg.h>
+
+#ifndef __ASSEMBLY__
+struct pt_regs
+{
+	union {
+		struct user_pt_regs user_regs;
+		struct {
+			unsigned long gpr[32];
+			unsigned long nip;
+			unsigned long msr;
+			unsigned long orig_gpr3;
+			unsigned long ctr;
+			unsigned long link;
+			unsigned long xer;
+			unsigned long ccr;
+#ifdef CONFIG_PPC64
+			unsigned long softe;
+#else
+			unsigned long mq;
+#endif
+			unsigned long trap;
+			union {
+				unsigned long dar;
+				unsigned long dear;
+			};
+			union {
+				unsigned long dsisr;
+				unsigned long esr;
+			};
+			unsigned long result;
+		};
+	};
+#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_KUAP)
+	union {
+		struct {
+#ifdef CONFIG_PPC64
+			unsigned long ppr;
+			unsigned long exit_result;
+#endif
+			union {
+#ifdef CONFIG_PPC_KUAP
+				unsigned long kuap;
+#endif
+#ifdef CONFIG_PPC_PKEY
+				unsigned long amr;
+#endif
+			};
+#ifdef CONFIG_PPC_PKEY
+			unsigned long iamr;
+#endif
+		};
+		unsigned long __pad[4];	/* Maintain 16 byte interrupt stack alignment */
+	};
+#endif
+#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE)
+	struct { /* Must be a multiple of 16 bytes */
+		unsigned long mas0;
+		unsigned long mas1;
+		unsigned long mas2;
+		unsigned long mas3;
+		unsigned long mas6;
+		unsigned long mas7;
+		unsigned long srr0;
+		unsigned long srr1;
+		unsigned long csrr0;
+		unsigned long csrr1;
+		unsigned long dsrr0;
+		unsigned long dsrr1;
+	};
+#endif
+};
+#endif
+
+
+// Always displays as "REGS" in memory dumps
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define STACK_FRAME_REGS_MARKER	ASM_CONST(0x52454753)
+#else
+#define STACK_FRAME_REGS_MARKER	ASM_CONST(0x53474552)
+#endif
+
+#ifdef __powerpc64__
+
+/*
+ * Size of redzone that userspace is allowed to use below the stack
+ * pointer.  This is 288 in the 64-bit big-endian ELF ABI, and 512 in
+ * the new ELFv2 little-endian ABI, so we allow the larger amount.
+ *
+ * For kernel code we allow a 288-byte redzone, in order to conserve
+ * kernel stack space; gcc currently only uses 288 bytes, and will
+ * hopefully allow explicit control of the redzone size in future.
+ */
+#define USER_REDZONE_SIZE	512
+#define KERNEL_REDZONE_SIZE	288
+
+#define STACK_FRAME_LR_SAVE	2	/* Location of LR in stack frame */
+
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+#define STACK_FRAME_MIN_SIZE	32
+#define STACK_USER_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE + 16)
+#define STACK_INT_FRAME_REGS	(STACK_FRAME_MIN_SIZE + 16)
+#define STACK_INT_FRAME_MARKER	STACK_FRAME_MIN_SIZE
+#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE + 16)
+#define STACK_SWITCH_FRAME_REGS	(STACK_FRAME_MIN_SIZE + 16)
+#else
+/*
+ * The ELFv1 ABI specifies 48 bytes plus a minimum 64 byte parameter save
+ * area. This parameter area is not used by calls to C from interrupt entry,
+ * so the second from last one of those is used for the frame marker.
+ */
+#define STACK_FRAME_MIN_SIZE	112
+#define STACK_USER_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE)
+#define STACK_INT_FRAME_REGS	STACK_FRAME_MIN_SIZE
+#define STACK_INT_FRAME_MARKER	(STACK_FRAME_MIN_SIZE - 16)
+#define STACK_SWITCH_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE)
+#define STACK_SWITCH_FRAME_REGS	STACK_FRAME_MIN_SIZE
+#endif
+
+/* Size of dummy stack frame allocated when calling signal handler. */
+#define __SIGNAL_FRAMESIZE	128
+#define __SIGNAL_FRAMESIZE32	64
+
+#else /* __powerpc64__ */
+
+#define USER_REDZONE_SIZE	0
+#define KERNEL_REDZONE_SIZE	0
+#define STACK_FRAME_MIN_SIZE	16
+#define STACK_FRAME_LR_SAVE	1	/* Location of LR in stack frame */
+#define STACK_USER_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE)
+#define STACK_INT_FRAME_REGS	STACK_FRAME_MIN_SIZE
+#define STACK_INT_FRAME_MARKER	(STACK_FRAME_MIN_SIZE - 8)
+#define STACK_SWITCH_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE)
+#define STACK_SWITCH_FRAME_REGS	STACK_FRAME_MIN_SIZE
+
+/* Size of stack frame allocated when calling signal handler. */
+#define __SIGNAL_FRAMESIZE	64
+
+#endif /* __powerpc64__ */
+
+#define STACK_INT_FRAME_SIZE	(KERNEL_REDZONE_SIZE + STACK_USER_INT_FRAME_SIZE)
+#define STACK_INT_FRAME_MARKER_LONGS	(STACK_INT_FRAME_MARKER/sizeof(long))
+
+#ifndef __ASSEMBLY__
+#include <asm/paca.h>
+
+#ifdef CONFIG_SMP
+extern unsigned long profile_pc(struct pt_regs *regs);
+#else
+#define profile_pc(regs) instruction_pointer(regs)
+#endif
+
+long do_syscall_trace_enter(struct pt_regs *regs);
+void do_syscall_trace_leave(struct pt_regs *regs);
+
+static inline void set_return_regs_changed(void)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+	WRITE_ONCE(local_paca->hsrr_valid, 0);
+	WRITE_ONCE(local_paca->srr_valid, 0);
+#endif
+}
+
+static inline void regs_set_return_ip(struct pt_regs *regs, unsigned long ip)
+{
+	regs->nip = ip;
+	set_return_regs_changed();
+}
+
+static inline void regs_set_return_msr(struct pt_regs *regs, unsigned long msr)
+{
+	regs->msr = msr;
+	set_return_regs_changed();
+}
+
+static inline void regs_add_return_ip(struct pt_regs *regs, long offset)
+{
+	regs_set_return_ip(regs, regs->nip + offset);
+}
+
+static inline unsigned long instruction_pointer(struct pt_regs *regs)
+{
+	return regs->nip;
+}
+
+static inline void instruction_pointer_set(struct pt_regs *regs,
+		unsigned long val)
+{
+	regs_set_return_ip(regs, val);
+}
+
+static inline unsigned long user_stack_pointer(struct pt_regs *regs)
+{
+	return regs->gpr[1];
+}
+
+static inline unsigned long frame_pointer(struct pt_regs *regs)
+{
+	return 0;
+}
+
+#define user_mode(regs) (((regs)->msr & MSR_PR) != 0)
+
+#define force_successful_syscall_return()   \
+	do { \
+		set_thread_flag(TIF_NOERROR); \
+	} while(0)
+
+#define current_pt_regs() \
+	((struct pt_regs *)((unsigned long)task_stack_page(current) + THREAD_SIZE) - 1)
+
+/*
+ * The 4 low bits (0xf) are available as flags to overload the trap word,
+ * because interrupt vectors have minimum alignment of 0x10. TRAP_FLAGS_MASK
+ * must cover the bits used as flags, including bit 0 which is used as the
+ * "norestart" bit.
+ */
+#ifdef __powerpc64__
+#define TRAP_FLAGS_MASK		0x1
+#else
+/*
+ * On 4xx we use bit 1 in the trap word to indicate whether the exception
+ * is a critical exception (1 means it is).
+ */
+#define TRAP_FLAGS_MASK		0xf
+#define IS_CRITICAL_EXC(regs)	(((regs)->trap & 2) != 0)
+#define IS_MCHECK_EXC(regs)	(((regs)->trap & 4) != 0)
+#define IS_DEBUG_EXC(regs)	(((regs)->trap & 8) != 0)
+#endif /* __powerpc64__ */
+#define TRAP(regs)		((regs)->trap & ~TRAP_FLAGS_MASK)
+
+static __always_inline void set_trap(struct pt_regs *regs, unsigned long val)
+{
+	regs->trap = (regs->trap & TRAP_FLAGS_MASK) | (val & ~TRAP_FLAGS_MASK);
+}
+
+static inline bool trap_is_scv(struct pt_regs *regs)
+{
+	return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x3000);
+}
+
+static inline bool trap_is_unsupported_scv(struct pt_regs *regs)
+{
+	return IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x7ff0;
+}
+
+static inline bool trap_is_syscall(struct pt_regs *regs)
+{
+	return (trap_is_scv(regs) || TRAP(regs) == 0xc00);
+}
+
+static inline bool trap_norestart(struct pt_regs *regs)
+{
+	return regs->trap & 0x1;
+}
+
+static __always_inline void set_trap_norestart(struct pt_regs *regs)
+{
+	regs->trap |= 0x1;
+}
+
+#define kernel_stack_pointer(regs) ((regs)->gpr[1])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+	if (trap_is_scv(regs))
+		return !IS_ERR_VALUE((unsigned long)regs->gpr[3]);
+	else
+		return !(regs->ccr & 0x10000000);
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	if (trap_is_scv(regs))
+		return regs->gpr[3];
+
+	if (is_syscall_success(regs))
+		return regs->gpr[3];
+	else
+		return -regs->gpr[3];
+}
+
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+{
+	regs->gpr[3] = rc;
+}
+
+static inline bool cpu_has_msr_ri(void)
+{
+	return !IS_ENABLED(CONFIG_BOOKE_OR_40x);
+}
+
+static inline bool regs_is_unrecoverable(struct pt_regs *regs)
+{
+	return unlikely(cpu_has_msr_ri() && !(regs->msr & MSR_RI));
+}
+
+static inline void regs_set_recoverable(struct pt_regs *regs)
+{
+	if (cpu_has_msr_ri())
+		regs_set_return_msr(regs, regs->msr | MSR_RI);
+}
+
+static inline void regs_set_unrecoverable(struct pt_regs *regs)
+{
+	if (cpu_has_msr_ri())
+		regs_set_return_msr(regs, regs->msr & ~MSR_RI);
+}
+
+#define arch_has_single_step()	(1)
+#define arch_has_block_step()	(true)
+#define ARCH_HAS_USER_SINGLE_STEP_REPORT
+
+/*
+ * kprobe-based event tracer support
+ */
+
+#include <linux/stddef.h>
+#include <linux/thread_info.h>
+extern int regs_query_register_offset(const char *name);
+extern const char *regs_query_register_name(unsigned int offset);
+#define MAX_REG_OFFSET (offsetof(struct pt_regs, dsisr))
+
+/**
+ * regs_get_register() - get register value from its offset
+ * @regs:	   pt_regs from which register value is gotten
+ * @offset:    offset number of the register.
+ *
+ * regs_get_register returns the value of a register whose offset from @regs.
+ * The @offset is the offset of the register in struct pt_regs.
+ * If @offset is bigger than MAX_REG_OFFSET, this returns 0.
+ */
+static inline unsigned long regs_get_register(struct pt_regs *regs,
+						unsigned int offset)
+{
+	if (unlikely(offset > MAX_REG_OFFSET))
+		return 0;
+	return *(unsigned long *)((unsigned long)regs + offset);
+}
+
+/**
+ * regs_within_kernel_stack() - check the address in the stack
+ * @regs:      pt_regs which contains kernel stack pointer.
+ * @addr:      address which is checked.
+ *
+ * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
+ * If @addr is within the kernel stack, it returns true. If not, returns false.
+ */
+
+static inline bool regs_within_kernel_stack(struct pt_regs *regs,
+						unsigned long addr)
+{
+	return ((addr & ~(THREAD_SIZE - 1))  ==
+		(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:	pt_regs which contains kernel stack pointer.
+ * @n:		stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specified by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
+						      unsigned int n)
+{
+	unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+	addr += n;
+	if (regs_within_kernel_stack(regs, (unsigned long)addr))
+		return *addr;
+	else
+		return 0;
+}
+
+#endif /* __ASSEMBLY__ */
+
+#ifndef __powerpc64__
+/* We need PT_SOFTE defined at all time to avoid #ifdefs */
+#define PT_SOFTE PT_MQ
+#else /* __powerpc64__ */
+#define PT_FPSCR32 (PT_FPR0 + 2*32 + 1)	/* each FP reg occupies 2 32-bit userspace slots */
+#define PT_VR0_32 164	/* each Vector reg occupies 4 slots in 32-bit */
+#define PT_VSCR_32 (PT_VR0 + 32*4 + 3)
+#define PT_VRSAVE_32 (PT_VR0 + 33*4)
+#define PT_VSR0_32 300 	/* each VSR reg occupies 4 slots in 32-bit */
+#endif /* __powerpc64__ */
+#endif /* _ASM_POWERPC_PTRACE_H */
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
new file mode 100644
index 0000000000..28a53fb69b
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_QSPINLOCK_H
+#define _ASM_POWERPC_QSPINLOCK_H
+
+#include <linux/compiler.h>
+#include <asm/qspinlock_types.h>
+#include <asm/paravirt.h>
+
+#ifdef CONFIG_PPC64
+/*
+ * Use the EH=1 hint for accesses that result in the lock being acquired.
+ * The hardware is supposed to optimise this pattern by holding the lock
+ * cacheline longer, and releasing when a store to the same memory (the
+ * unlock) is performed.
+ */
+#define _Q_SPIN_EH_HINT 1
+#else
+#define _Q_SPIN_EH_HINT 0
+#endif
+
+/*
+ * The trylock itself may steal. This makes trylocks slightly stronger, and
+ * makes locks slightly more efficient when stealing.
+ *
+ * This is compile-time, so if true then there may always be stealers, so the
+ * nosteal paths become unused.
+ */
+#define _Q_SPIN_TRY_LOCK_STEAL 1
+
+/*
+ * Put a speculation barrier after testing the lock/node and finding it
+ * busy. Try to prevent pointless speculation in slow paths.
+ *
+ * Slows down the lockstorm microbenchmark with no stealing, where locking
+ * is purely FIFO through the queue. May have more benefit in real workload
+ * where speculating into the wrong place could have a greater cost.
+ */
+#define _Q_SPIN_SPEC_BARRIER 0
+
+#ifdef CONFIG_PPC64
+/*
+ * Execute a miso instruction after passing the MCS lock ownership to the
+ * queue head. Miso is intended to make stores visible to other CPUs sooner.
+ *
+ * This seems to make the lockstorm microbenchmark nospin test go slightly
+ * faster on POWER10, but disable for now.
+ */
+#define _Q_SPIN_MISO 0
+#else
+#define _Q_SPIN_MISO 0
+#endif
+
+#ifdef CONFIG_PPC64
+/*
+ * This executes miso after an unlock of the lock word, having ownership
+ * pass to the next CPU sooner. This will slow the uncontended path to some
+ * degree. Not evidence it helps yet.
+ */
+#define _Q_SPIN_MISO_UNLOCK 0
+#else
+#define _Q_SPIN_MISO_UNLOCK 0
+#endif
+
+/*
+ * Seems to slow down lockstorm microbenchmark, suspect queue node just
+ * has to become shared again right afterwards when its waiter spins on
+ * the lock field.
+ */
+#define _Q_SPIN_PREFETCH_NEXT 0
+
+static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
+{
+	return READ_ONCE(lock->val);
+}
+
+static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
+{
+	return !lock.val;
+}
+
+static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
+{
+	return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK);
+}
+
+static __always_inline u32 queued_spin_encode_locked_val(void)
+{
+	/* XXX: make this use lock value in paca like simple spinlocks? */
+	return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET);
+}
+
+static __always_inline int __queued_spin_trylock_nosteal(struct qspinlock *lock)
+{
+	u32 new = queued_spin_encode_locked_val();
+	u32 prev;
+
+	/* Trylock succeeds only when unlocked and no queued nodes */
+	asm volatile(
+"1:	lwarx	%0,0,%1,%3	# __queued_spin_trylock_nosteal		\n"
+"	cmpwi	0,%0,0							\n"
+"	bne-	2f							\n"
+"	stwcx.	%2,0,%1							\n"
+"	bne-	1b							\n"
+"\t"	PPC_ACQUIRE_BARRIER "						\n"
+"2:									\n"
+	: "=&r" (prev)
+	: "r" (&lock->val), "r" (new),
+	  "i" (_Q_SPIN_EH_HINT)
+	: "cr0", "memory");
+
+	return likely(prev == 0);
+}
+
+static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock)
+{
+	u32 new = queued_spin_encode_locked_val();
+	u32 prev, tmp;
+
+	/* Trylock may get ahead of queued nodes if it finds unlocked */
+	asm volatile(
+"1:	lwarx	%0,0,%2,%5	# __queued_spin_trylock_steal		\n"
+"	andc.	%1,%0,%4						\n"
+"	bne-	2f							\n"
+"	and	%1,%0,%4						\n"
+"	or	%1,%1,%3						\n"
+"	stwcx.	%1,0,%2							\n"
+"	bne-	1b							\n"
+"\t"	PPC_ACQUIRE_BARRIER "						\n"
+"2:									\n"
+	: "=&r" (prev), "=&r" (tmp)
+	: "r" (&lock->val), "r" (new), "r" (_Q_TAIL_CPU_MASK),
+	  "i" (_Q_SPIN_EH_HINT)
+	: "cr0", "memory");
+
+	return likely(!(prev & ~_Q_TAIL_CPU_MASK));
+}
+
+static __always_inline int queued_spin_trylock(struct qspinlock *lock)
+{
+	if (!_Q_SPIN_TRY_LOCK_STEAL)
+		return __queued_spin_trylock_nosteal(lock);
+	else
+		return __queued_spin_trylock_steal(lock);
+}
+
+void queued_spin_lock_slowpath(struct qspinlock *lock);
+
+static __always_inline void queued_spin_lock(struct qspinlock *lock)
+{
+	if (!queued_spin_trylock(lock))
+		queued_spin_lock_slowpath(lock);
+}
+
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+	smp_store_release(&lock->locked, 0);
+	if (_Q_SPIN_MISO_UNLOCK)
+		asm volatile("miso" ::: "memory");
+}
+
+#define arch_spin_is_locked(l)		queued_spin_is_locked(l)
+#define arch_spin_is_contended(l)	queued_spin_is_contended(l)
+#define arch_spin_value_unlocked(l)	queued_spin_value_unlocked(l)
+#define arch_spin_lock(l)		queued_spin_lock(l)
+#define arch_spin_trylock(l)		queued_spin_trylock(l)
+#define arch_spin_unlock(l)		queued_spin_unlock(l)
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void pv_spinlocks_init(void);
+#else
+static inline void pv_spinlocks_init(void) { }
+#endif
+
+#endif /* _ASM_POWERPC_QSPINLOCK_H */
diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
new file mode 100644
index 0000000000..4766a7aa03
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock_types.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_QSPINLOCK_TYPES_H
+#define _ASM_POWERPC_QSPINLOCK_TYPES_H
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+typedef struct qspinlock {
+	union {
+		u32 val;
+
+#ifdef __LITTLE_ENDIAN
+		struct {
+			u16	locked;
+			u8	reserved[2];
+		};
+#else
+		struct {
+			u8	reserved[2];
+			u16	locked;
+		};
+#endif
+	};
+} arch_spinlock_t;
+
+#define	__ARCH_SPIN_LOCK_UNLOCKED	{ { .val = 0 } }
+
+/*
+ * Bitfields in the lock word:
+ *
+ *     0: locked bit
+ *  1-14: lock holder cpu
+ *    15: lock owner or queuer vcpus observed to be preempted bit
+ *    16: must queue bit
+ * 17-31: tail cpu (+1)
+ */
+#define	_Q_SET_MASK(type)	(((1U << _Q_ ## type ## _BITS) - 1)\
+				      << _Q_ ## type ## _OFFSET)
+/* 0x00000001 */
+#define _Q_LOCKED_OFFSET	0
+#define _Q_LOCKED_BITS		1
+#define _Q_LOCKED_VAL		(1U << _Q_LOCKED_OFFSET)
+
+/* 0x00007ffe */
+#define _Q_OWNER_CPU_OFFSET	1
+#define _Q_OWNER_CPU_BITS	14
+#define _Q_OWNER_CPU_MASK	_Q_SET_MASK(OWNER_CPU)
+
+#if CONFIG_NR_CPUS > (1U << _Q_OWNER_CPU_BITS)
+#error "qspinlock does not support such large CONFIG_NR_CPUS"
+#endif
+
+/* 0x00008000 */
+#define _Q_SLEEPY_OFFSET	15
+#define _Q_SLEEPY_BITS		1
+#define _Q_SLEEPY_VAL		(1U << _Q_SLEEPY_OFFSET)
+
+/* 0x00010000 */
+#define _Q_MUST_Q_OFFSET	16
+#define _Q_MUST_Q_BITS		1
+#define _Q_MUST_Q_VAL		(1U << _Q_MUST_Q_OFFSET)
+
+/* 0xfffe0000 */
+#define _Q_TAIL_CPU_OFFSET	17
+#define _Q_TAIL_CPU_BITS	15
+#define _Q_TAIL_CPU_MASK	_Q_SET_MASK(TAIL_CPU)
+
+#if CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)
+#error "qspinlock does not support such large CONFIG_NR_CPUS"
+#endif
+
+#endif /* _ASM_POWERPC_QSPINLOCK_TYPES_H */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
new file mode 100644
index 0000000000..4ae4ab9090
--- /dev/null
+++ b/arch/powerpc/include/asm/reg.h
@@ -0,0 +1,1473 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Contains the definition of registers common to all PowerPC variants.
+ * If a register definition has been changed in a different PowerPC
+ * variant, we will case it in #ifndef XXX ... #endif, and have the
+ * number used in the Programming Environments Manual For 32-Bit
+ * Implementations of the PowerPC Architecture (a.k.a. Green Book) here.
+ */
+
+#ifndef _ASM_POWERPC_REG_H
+#define _ASM_POWERPC_REG_H
+#ifdef __KERNEL__
+
+#include <linux/stringify.h>
+#include <linux/const.h>
+#include <asm/cputable.h>
+#include <asm/asm-const.h>
+#include <asm/feature-fixups.h>
+
+/* Pickup Book E specific registers. */
+#ifdef CONFIG_BOOKE_OR_40x
+#include <asm/reg_booke.h>
+#endif
+
+#ifdef CONFIG_FSL_EMB_PERFMON
+#include <asm/reg_fsl_emb.h>
+#endif
+
+#include <asm/reg_8xx.h>
+
+#define MSR_SF_LG	63              /* Enable 64 bit mode */
+#define MSR_HV_LG 	60              /* Hypervisor state */
+#define MSR_TS_T_LG	34		/* Trans Mem state: Transactional */
+#define MSR_TS_S_LG	33		/* Trans Mem state: Suspended */
+#define MSR_TS_LG	33		/* Trans Mem state (2 bits) */
+#define MSR_TM_LG	32		/* Trans Mem Available */
+#define MSR_VEC_LG	25	        /* Enable AltiVec */
+#define MSR_VSX_LG	23		/* Enable VSX */
+#define MSR_S_LG	22		/* Secure state */
+#define MSR_POW_LG	18		/* Enable Power Management */
+#define MSR_WE_LG	18		/* Wait State Enable */
+#define MSR_TGPR_LG	17		/* TLB Update registers in use */
+#define MSR_CE_LG	17		/* Critical Interrupt Enable */
+#define MSR_ILE_LG	16		/* Interrupt Little Endian */
+#define MSR_EE_LG	15		/* External Interrupt Enable */
+#define MSR_PR_LG	14		/* Problem State / Privilege Level */
+#define MSR_FP_LG	13		/* Floating Point enable */
+#define MSR_ME_LG	12		/* Machine Check Enable */
+#define MSR_FE0_LG	11		/* Floating Exception mode 0 */
+#define MSR_SE_LG	10		/* Single Step */
+#define MSR_BE_LG	9		/* Branch Trace */
+#define MSR_DE_LG	9 		/* Debug Exception Enable */
+#define MSR_FE1_LG	8		/* Floating Exception mode 1 */
+#define MSR_IP_LG	6		/* Exception prefix 0x000/0xFFF */
+#define MSR_IR_LG	5 		/* Instruction Relocate */
+#define MSR_DR_LG	4 		/* Data Relocate */
+#define MSR_PE_LG	3		/* Protection Enable */
+#define MSR_PX_LG	2		/* Protection Exclusive Mode */
+#define MSR_PMM_LG	2		/* Performance monitor */
+#define MSR_RI_LG	1		/* Recoverable Exception */
+#define MSR_LE_LG	0 		/* Little Endian */
+
+#ifdef __ASSEMBLY__
+#define __MASK(X)	(1<<(X))
+#else
+#define __MASK(X)	(1UL<<(X))
+#endif
+
+#ifdef CONFIG_PPC64
+#define MSR_SF		__MASK(MSR_SF_LG)	/* Enable 64 bit mode */
+#define MSR_HV 		__MASK(MSR_HV_LG)	/* Hypervisor state */
+#define MSR_S		__MASK(MSR_S_LG)	/* Secure state */
+#else
+/* so tests for these bits fail on 32-bit */
+#define MSR_SF		0
+#define MSR_HV		0
+#define MSR_S		0
+#endif
+
+/*
+ * To be used in shared book E/book S, this avoids needing to worry about
+ * book S/book E in shared code
+ */
+#ifndef MSR_SPE
+#define MSR_SPE 	0
+#endif
+
+#define MSR_VEC		__MASK(MSR_VEC_LG)	/* Enable AltiVec */
+#define MSR_VSX		__MASK(MSR_VSX_LG)	/* Enable VSX */
+#define MSR_POW		__MASK(MSR_POW_LG)	/* Enable Power Management */
+#define MSR_WE		__MASK(MSR_WE_LG)	/* Wait State Enable */
+#define MSR_TGPR	__MASK(MSR_TGPR_LG)	/* TLB Update registers in use */
+#define MSR_CE		__MASK(MSR_CE_LG)	/* Critical Interrupt Enable */
+#define MSR_ILE		__MASK(MSR_ILE_LG)	/* Interrupt Little Endian */
+#define MSR_EE		__MASK(MSR_EE_LG)	/* External Interrupt Enable */
+#define MSR_PR		__MASK(MSR_PR_LG)	/* Problem State / Privilege Level */
+#define MSR_FP		__MASK(MSR_FP_LG)	/* Floating Point enable */
+#define MSR_ME		__MASK(MSR_ME_LG)	/* Machine Check Enable */
+#define MSR_FE0		__MASK(MSR_FE0_LG)	/* Floating Exception mode 0 */
+#define MSR_SE		__MASK(MSR_SE_LG)	/* Single Step */
+#define MSR_BE		__MASK(MSR_BE_LG)	/* Branch Trace */
+#define MSR_DE		__MASK(MSR_DE_LG)	/* Debug Exception Enable */
+#define MSR_FE1		__MASK(MSR_FE1_LG)	/* Floating Exception mode 1 */
+#define MSR_IP		__MASK(MSR_IP_LG)	/* Exception prefix 0x000/0xFFF */
+#define MSR_IR		__MASK(MSR_IR_LG)	/* Instruction Relocate */
+#define MSR_DR		__MASK(MSR_DR_LG)	/* Data Relocate */
+#define MSR_PE		__MASK(MSR_PE_LG)	/* Protection Enable */
+#define MSR_PX		__MASK(MSR_PX_LG)	/* Protection Exclusive Mode */
+#ifndef MSR_PMM
+#define MSR_PMM		__MASK(MSR_PMM_LG)	/* Performance monitor */
+#endif
+#define MSR_RI		__MASK(MSR_RI_LG)	/* Recoverable Exception */
+#define MSR_LE		__MASK(MSR_LE_LG)	/* Little Endian */
+
+#define MSR_TM		__MASK(MSR_TM_LG)	/* Transactional Mem Available */
+#define MSR_TS_N	0			/*  Non-transactional */
+#define MSR_TS_S	__MASK(MSR_TS_S_LG)	/*  Transaction Suspended */
+#define MSR_TS_T	__MASK(MSR_TS_T_LG)	/*  Transaction Transactional */
+#define MSR_TS_MASK	(MSR_TS_T | MSR_TS_S)   /* Transaction State bits */
+#define MSR_TM_RESV(x) (((x) & MSR_TS_MASK) == MSR_TS_MASK) /* Reserved */
+#define MSR_TM_TRANSACTIONAL(x)	(((x) & MSR_TS_MASK) == MSR_TS_T)
+#define MSR_TM_SUSPENDED(x)	(((x) & MSR_TS_MASK) == MSR_TS_S)
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+#define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */
+#else
+#define MSR_TM_ACTIVE(x) ((void)(x), 0)
+#endif
+
+#if defined(CONFIG_PPC_BOOK3S_64)
+#define MSR_64BIT	MSR_SF
+
+/* Server variant */
+#define __MSR		(MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_HV)
+#ifdef __BIG_ENDIAN__
+#define MSR_		__MSR
+#define MSR_IDLE	(MSR_ME | MSR_SF | MSR_HV)
+#else
+#define MSR_		(__MSR | MSR_LE)
+#define MSR_IDLE	(MSR_ME | MSR_SF | MSR_HV | MSR_LE)
+#endif
+#define MSR_KERNEL	(MSR_ | MSR_64BIT)
+#define MSR_USER32	(MSR_ | MSR_PR | MSR_EE)
+#define MSR_USER64	(MSR_USER32 | MSR_64BIT)
+#elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx)
+/* Default MSR for kernel mode. */
+#define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_IR|MSR_DR)
+#define MSR_USER	(MSR_KERNEL|MSR_PR|MSR_EE)
+#endif
+
+#ifndef MSR_64BIT
+#define MSR_64BIT	0
+#endif
+
+/* Condition Register related */
+#define CR0_SHIFT	28
+#define CR0_MASK	0xF
+#define CR0_TBEGIN_FAILURE	(0x2 << 28) /* 0b0010 */
+
+
+/* Power Management - Processor Stop Status and Control Register Fields */
+#define PSSCR_RL_MASK		0x0000000F /* Requested Level */
+#define PSSCR_MTL_MASK		0x000000F0 /* Maximum Transition Level */
+#define PSSCR_TR_MASK		0x00000300 /* Transition State */
+#define PSSCR_PSLL_MASK		0x000F0000 /* Power-Saving Level Limit */
+#define PSSCR_EC		0x00100000 /* Exit Criterion */
+#define PSSCR_ESL		0x00200000 /* Enable State Loss */
+#define PSSCR_SD		0x00400000 /* Status Disable */
+#define PSSCR_PLS	0xf000000000000000 /* Power-saving Level Status */
+#define PSSCR_PLS_SHIFT	60
+#define PSSCR_GUEST_VIS	0xf0000000000003ffUL /* Guest-visible PSSCR fields */
+#define PSSCR_FAKE_SUSPEND	0x00000400 /* Fake-suspend bit (P9 DD2.2) */
+#define PSSCR_FAKE_SUSPEND_LG	10	   /* Fake-suspend bit position */
+
+/* Floating Point Status and Control Register (FPSCR) Fields */
+#define FPSCR_FX	0x80000000	/* FPU exception summary */
+#define FPSCR_FEX	0x40000000	/* FPU enabled exception summary */
+#define FPSCR_VX	0x20000000	/* Invalid operation summary */
+#define FPSCR_OX	0x10000000	/* Overflow exception summary */
+#define FPSCR_UX	0x08000000	/* Underflow exception summary */
+#define FPSCR_ZX	0x04000000	/* Zero-divide exception summary */
+#define FPSCR_XX	0x02000000	/* Inexact exception summary */
+#define FPSCR_VXSNAN	0x01000000	/* Invalid op for SNaN */
+#define FPSCR_VXISI	0x00800000	/* Invalid op for Inv - Inv */
+#define FPSCR_VXIDI	0x00400000	/* Invalid op for Inv / Inv */
+#define FPSCR_VXZDZ	0x00200000	/* Invalid op for Zero / Zero */
+#define FPSCR_VXIMZ	0x00100000	/* Invalid op for Inv * Zero */
+#define FPSCR_VXVC	0x00080000	/* Invalid op for Compare */
+#define FPSCR_FR	0x00040000	/* Fraction rounded */
+#define FPSCR_FI	0x00020000	/* Fraction inexact */
+#define FPSCR_FPRF	0x0001f000	/* FPU Result Flags */
+#define FPSCR_FPCC	0x0000f000	/* FPU Condition Codes */
+#define FPSCR_VXSOFT	0x00000400	/* Invalid op for software request */
+#define FPSCR_VXSQRT	0x00000200	/* Invalid op for square root */
+#define FPSCR_VXCVI	0x00000100	/* Invalid op for integer convert */
+#define FPSCR_VE	0x00000080	/* Invalid op exception enable */
+#define FPSCR_OE	0x00000040	/* IEEE overflow exception enable */
+#define FPSCR_UE	0x00000020	/* IEEE underflow exception enable */
+#define FPSCR_ZE	0x00000010	/* IEEE zero divide exception enable */
+#define FPSCR_XE	0x00000008	/* FP inexact exception enable */
+#define FPSCR_NI	0x00000004	/* FPU non IEEE-Mode */
+#define FPSCR_RN	0x00000003	/* FPU rounding control */
+
+/* Bit definitions for SPEFSCR. */
+#define SPEFSCR_SOVH	0x80000000	/* Summary integer overflow high */
+#define SPEFSCR_OVH	0x40000000	/* Integer overflow high */
+#define SPEFSCR_FGH	0x20000000	/* Embedded FP guard bit high */
+#define SPEFSCR_FXH	0x10000000	/* Embedded FP sticky bit high */
+#define SPEFSCR_FINVH	0x08000000	/* Embedded FP invalid operation high */
+#define SPEFSCR_FDBZH	0x04000000	/* Embedded FP div by zero high */
+#define SPEFSCR_FUNFH	0x02000000	/* Embedded FP underflow high */
+#define SPEFSCR_FOVFH	0x01000000	/* Embedded FP overflow high */
+#define SPEFSCR_FINXS	0x00200000	/* Embedded FP inexact sticky */
+#define SPEFSCR_FINVS	0x00100000	/* Embedded FP invalid op. sticky */
+#define SPEFSCR_FDBZS	0x00080000	/* Embedded FP div by zero sticky */
+#define SPEFSCR_FUNFS	0x00040000	/* Embedded FP underflow sticky */
+#define SPEFSCR_FOVFS	0x00020000	/* Embedded FP overflow sticky */
+#define SPEFSCR_MODE	0x00010000	/* Embedded FP mode */
+#define SPEFSCR_SOV	0x00008000	/* Integer summary overflow */
+#define SPEFSCR_OV	0x00004000	/* Integer overflow */
+#define SPEFSCR_FG	0x00002000	/* Embedded FP guard bit */
+#define SPEFSCR_FX	0x00001000	/* Embedded FP sticky bit */
+#define SPEFSCR_FINV	0x00000800	/* Embedded FP invalid operation */
+#define SPEFSCR_FDBZ	0x00000400	/* Embedded FP div by zero */
+#define SPEFSCR_FUNF	0x00000200	/* Embedded FP underflow */
+#define SPEFSCR_FOVF	0x00000100	/* Embedded FP overflow */
+#define SPEFSCR_FINXE	0x00000040	/* Embedded FP inexact enable */
+#define SPEFSCR_FINVE	0x00000020	/* Embedded FP invalid op. enable */
+#define SPEFSCR_FDBZE	0x00000010	/* Embedded FP div by zero enable */
+#define SPEFSCR_FUNFE	0x00000008	/* Embedded FP underflow enable */
+#define SPEFSCR_FOVFE	0x00000004	/* Embedded FP overflow enable */
+#define SPEFSCR_FRMC 	0x00000003	/* Embedded FP rounding mode control */
+
+/* Special Purpose Registers (SPRNs)*/
+
+#ifdef CONFIG_40x
+#define SPRN_PID	0x3B1	/* Process ID */
+#else
+#define SPRN_PID	0x030	/* Process ID */
+#ifdef CONFIG_BOOKE
+#define SPRN_PID0	SPRN_PID/* Process ID Register 0 */
+#endif
+#endif
+
+#define SPRN_CTR	0x009	/* Count Register */
+#define SPRN_DSCR	0x11
+#define SPRN_CFAR	0x1c	/* Come From Address Register */
+#define SPRN_AMR	0x1d	/* Authority Mask Register */
+#define SPRN_UAMOR	0x9d	/* User Authority Mask Override Register */
+#define SPRN_AMOR	0x15d	/* Authority Mask Override Register */
+#define SPRN_ACOP	0x1F	/* Available Coprocessor Register */
+#define SPRN_TFIAR	0x81	/* Transaction Failure Inst Addr   */
+#define SPRN_TEXASR	0x82	/* Transaction EXception & Summary */
+#define SPRN_TEXASRU	0x83	/* ''	   ''	   ''	 Upper 32  */
+
+#define TEXASR_FC_LG	(63 - 7)	/* Failure Code */
+#define TEXASR_AB_LG	(63 - 31)	/* Abort */
+#define TEXASR_SU_LG	(63 - 32)	/* Suspend */
+#define TEXASR_HV_LG	(63 - 34)	/* Hypervisor state*/
+#define TEXASR_PR_LG	(63 - 35)	/* Privilege level */
+#define TEXASR_FS_LG	(63 - 36)	/* failure summary */
+#define TEXASR_EX_LG	(63 - 37)	/* TFIAR exact bit */
+#define TEXASR_ROT_LG	(63 - 38)	/* ROT bit */
+
+#define   TEXASR_ABORT	__MASK(TEXASR_AB_LG) /* terminated by tabort or treclaim */
+#define   TEXASR_SUSP	__MASK(TEXASR_SU_LG) /* tx failed in suspended state */
+#define   TEXASR_HV	__MASK(TEXASR_HV_LG) /* MSR[HV] when failure occurred */
+#define   TEXASR_PR	__MASK(TEXASR_PR_LG) /* MSR[PR] when failure occurred */
+#define   TEXASR_FS	__MASK(TEXASR_FS_LG) /* TEXASR Failure Summary */
+#define   TEXASR_EXACT	__MASK(TEXASR_EX_LG) /* TFIAR value is exact */
+#define   TEXASR_ROT	__MASK(TEXASR_ROT_LG)
+#define   TEXASR_FC	(ASM_CONST(0xFF) << TEXASR_FC_LG)
+
+#define SPRN_TFHAR	0x80	/* Transaction Failure Handler Addr */
+
+#define SPRN_TIDR	144	/* Thread ID register */
+#define SPRN_CTRLF	0x088
+#define SPRN_CTRLT	0x098
+#define   CTRL_CT	0xc0000000	/* current thread */
+#define   CTRL_CT0	0x80000000	/* thread 0 */
+#define   CTRL_CT1	0x40000000	/* thread 1 */
+#define   CTRL_TE	0x00c00000	/* thread enable */
+#define   CTRL_RUNLATCH	0x1
+#define SPRN_DAWR0	0xB4
+#define SPRN_DAWR1	0xB5
+#define SPRN_RPR	0xBA	/* Relative Priority Register */
+#define SPRN_CIABR	0xBB
+#define   CIABR_PRIV		0x3
+#define   CIABR_PRIV_USER	1
+#define   CIABR_PRIV_SUPER	2
+#define   CIABR_PRIV_HYPER	3
+#define SPRN_DAWRX0	0xBC
+#define SPRN_DAWRX1	0xBD
+#define   DAWRX_USER	__MASK(0)
+#define   DAWRX_KERNEL	__MASK(1)
+#define   DAWRX_HYP	__MASK(2)
+#define   DAWRX_WTI	__MASK(3)
+#define   DAWRX_WT	__MASK(4)
+#define   DAWRX_DR	__MASK(5)
+#define   DAWRX_DW	__MASK(6)
+#define SPRN_DABR	0x3F5	/* Data Address Breakpoint Register */
+#define SPRN_DABR2	0x13D	/* e300 */
+#define SPRN_DABRX	0x3F7	/* Data Address Breakpoint Register Extension */
+#define   DABRX_USER	__MASK(0)
+#define   DABRX_KERNEL	__MASK(1)
+#define   DABRX_HYP	__MASK(2)
+#define   DABRX_BTI	__MASK(3)
+#define   DABRX_ALL     (DABRX_BTI | DABRX_HYP | DABRX_KERNEL | DABRX_USER)
+#define SPRN_DAR	0x013	/* Data Address Register */
+#define SPRN_DBCR	0x136	/* e300 Data Breakpoint Control Reg */
+#define SPRN_DSISR	0x012	/* Data Storage Interrupt Status Register */
+#define   DSISR_BAD_DIRECT_ST	0x80000000 /* Obsolete: Direct store error */
+#define   DSISR_NOHPTE		0x40000000 /* no translation found */
+#define   DSISR_ATTR_CONFLICT	0x20000000 /* P9: Process vs. Partition attr */
+#define   DSISR_NOEXEC_OR_G	0x10000000 /* Alias of SRR1 bit, see below */
+#define   DSISR_PROTFAULT	0x08000000 /* protection fault */
+#define   DSISR_BADACCESS	0x04000000 /* bad access to CI or G */
+#define   DSISR_ISSTORE		0x02000000 /* access was a store */
+#define   DSISR_DABRMATCH	0x00400000 /* hit data breakpoint */
+#define   DSISR_NOSEGMENT	0x00200000 /* STAB miss (unsupported) */
+#define   DSISR_KEYFAULT	0x00200000 /* Storage Key fault */
+#define   DSISR_BAD_EXT_CTRL	0x00100000 /* Obsolete: External ctrl error */
+#define   DSISR_UNSUPP_MMU	0x00080000 /* P9: Unsupported MMU config */
+#define   DSISR_SET_RC		0x00040000 /* P9: Failed setting of R/C bits */
+#define   DSISR_PRTABLE_FAULT   0x00020000 /* P9: Fault on process table */
+#define   DSISR_ICSWX_NO_CT     0x00004000 /* P7: icswx unavailable cp type */
+#define   DSISR_BAD_COPYPASTE   0x00000008 /* P9: Copy/Paste on wrong memtype */
+#define   DSISR_BAD_AMO		0x00000004 /* P9: Incorrect AMO opcode */
+#define   DSISR_BAD_CI_LDST	0x00000002 /* P8: Bad HV CI load/store */
+
+/*
+ * DSISR_NOEXEC_OR_G doesn't actually exist. This bit is always
+ * 0 on DSIs. However, on ISIs, the corresponding bit in SRR1
+ * indicates an attempt at executing from a no-execute PTE
+ * or segment or from a guarded page.
+ *
+ * We add a definition here for completeness as we alias
+ * DSISR and SRR1 in do_page_fault.
+ */
+
+/*
+ * DSISR bits that are treated as a fault. Any bit set
+ * here will skip hash_page, and cause do_page_fault to
+ * trigger a SIGBUS or SIGSEGV:
+ */
+#define   DSISR_BAD_FAULT_32S	(DSISR_BAD_DIRECT_ST	| \
+				 DSISR_BADACCESS	| \
+				 DSISR_BAD_EXT_CTRL)
+#define	  DSISR_BAD_FAULT_64S	(DSISR_BAD_FAULT_32S	| \
+				 DSISR_ATTR_CONFLICT	| \
+				 DSISR_UNSUPP_MMU	| \
+				 DSISR_PRTABLE_FAULT	| \
+				 DSISR_ICSWX_NO_CT	| \
+				 DSISR_BAD_COPYPASTE	| \
+				 DSISR_BAD_AMO		| \
+				 DSISR_BAD_CI_LDST)
+/*
+ * These bits are equivalent in SRR1 and DSISR for 0x400
+ * instruction access interrupts on Book3S
+ */
+#define   DSISR_SRR1_MATCH_32S	(DSISR_NOHPTE		| \
+				 DSISR_NOEXEC_OR_G	| \
+				 DSISR_PROTFAULT)
+#define   DSISR_SRR1_MATCH_64S	(DSISR_SRR1_MATCH_32S	| \
+				 DSISR_KEYFAULT		| \
+				 DSISR_UNSUPP_MMU	| \
+				 DSISR_SET_RC		| \
+				 DSISR_PRTABLE_FAULT)
+
+#define SPRN_TBRL	0x10C	/* Time Base Read Lower Register (user, R/O) */
+#define SPRN_TBRU	0x10D	/* Time Base Read Upper Register (user, R/O) */
+#define SPRN_CIR	0x11B	/* Chip Information Register (hyper, R/0) */
+#define SPRN_TBWL	0x11C	/* Time Base Lower Register (super, R/W) */
+#define SPRN_TBWU	0x11D	/* Time Base Upper Register (super, R/W) */
+#define SPRN_TBU40	0x11E	/* Timebase upper 40 bits (hyper, R/W) */
+#define SPRN_SPURR	0x134	/* Scaled PURR */
+#define SPRN_HSPRG0	0x130	/* Hypervisor Scratch 0 */
+#define SPRN_HSPRG1	0x131	/* Hypervisor Scratch 1 */
+#define SPRN_HDSISR     0x132
+#define SPRN_HDAR       0x133
+#define SPRN_HDEC	0x136	/* Hypervisor Decrementer */
+#define SPRN_HIOR	0x137	/* 970 Hypervisor interrupt offset */
+#define SPRN_RMOR	0x138	/* Real mode offset register */
+#define SPRN_HRMOR	0x139	/* Real mode offset register */
+#define SPRN_HDEXCR_RO	0x1C7	/* Hypervisor DEXCR (non-privileged, readonly) */
+#define SPRN_HASHKEYR	0x1D4	/* Non-privileged hashst/hashchk key register */
+#define SPRN_HDEXCR	0x1D7	/* Hypervisor dynamic execution control register */
+#define SPRN_DEXCR_RO	0x32C	/* DEXCR (non-privileged, readonly) */
+#define SPRN_ASDR	0x330	/* Access segment descriptor register */
+#define SPRN_DEXCR	0x33C	/* Dynamic execution control register */
+#define   DEXCR_PR_SBHE	  0x80000000UL /* 0: Speculative Branch Hint Enable */
+#define   DEXCR_PR_IBRTPD 0x10000000UL /* 3: Indirect Branch Recurrent Target Prediction Disable */
+#define   DEXCR_PR_SRAPD  0x08000000UL /* 4: Subroutine Return Address Prediction Disable */
+#define   DEXCR_PR_NPHIE  0x04000000UL /* 5: Non-Privileged Hash Instruction Enable */
+#define   DEXCR_INIT	DEXCR_PR_NPHIE	/* Fixed DEXCR value to initialise all CPUs with */
+#define SPRN_IC		0x350	/* Virtual Instruction Count */
+#define SPRN_VTB	0x351	/* Virtual Time Base */
+#define SPRN_LDBAR	0x352	/* LD Base Address Register */
+#define SPRN_PMICR	0x354   /* Power Management Idle Control Reg */
+#define SPRN_PMSR	0x355   /* Power Management Status Reg */
+#define SPRN_PMMAR	0x356	/* Power Management Memory Activity Register */
+#define SPRN_PSSCR	0x357	/* Processor Stop Status and Control Register (ISA 3.0) */
+#define SPRN_PSSCR_PR	0x337	/* PSSCR ISA 3.0, privileged mode access */
+#define SPRN_TRIG2	0x372
+#define SPRN_PMCR	0x374	/* Power Management Control Register */
+#define SPRN_RWMR	0x375	/* Region-Weighting Mode Register */
+
+/* HFSCR and FSCR bit numbers are the same */
+#define FSCR_PREFIX_LG	13	/* Enable Prefix Instructions */
+#define FSCR_SCV_LG	12	/* Enable System Call Vectored */
+#define FSCR_MSGP_LG	10	/* Enable MSGP */
+#define FSCR_TAR_LG	8	/* Enable Target Address Register */
+#define FSCR_EBB_LG	7	/* Enable Event Based Branching */
+#define FSCR_TM_LG	5	/* Enable Transactional Memory */
+#define FSCR_BHRB_LG	4	/* Enable Branch History Rolling Buffer*/
+#define FSCR_PM_LG	3	/* Enable prob/priv access to PMU SPRs */
+#define FSCR_DSCR_LG	2	/* Enable Data Stream Control Register */
+#define FSCR_VECVSX_LG	1	/* Enable VMX/VSX  */
+#define FSCR_FP_LG	0	/* Enable Floating Point */
+#define SPRN_FSCR	0x099	/* Facility Status & Control Register */
+#define   FSCR_PREFIX	__MASK(FSCR_PREFIX_LG)
+#define   FSCR_SCV	__MASK(FSCR_SCV_LG)
+#define   FSCR_TAR	__MASK(FSCR_TAR_LG)
+#define   FSCR_EBB	__MASK(FSCR_EBB_LG)
+#define   FSCR_DSCR	__MASK(FSCR_DSCR_LG)
+#define   FSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56)	/* interrupt cause */
+#define SPRN_HFSCR	0xbe	/* HV=1 Facility Status & Control Register */
+#define   HFSCR_PREFIX	__MASK(FSCR_PREFIX_LG)
+#define   HFSCR_MSGP	__MASK(FSCR_MSGP_LG)
+#define   HFSCR_TAR	__MASK(FSCR_TAR_LG)
+#define   HFSCR_EBB	__MASK(FSCR_EBB_LG)
+#define   HFSCR_TM	__MASK(FSCR_TM_LG)
+#define   HFSCR_PM	__MASK(FSCR_PM_LG)
+#define   HFSCR_BHRB	__MASK(FSCR_BHRB_LG)
+#define   HFSCR_DSCR	__MASK(FSCR_DSCR_LG)
+#define   HFSCR_VECVSX	__MASK(FSCR_VECVSX_LG)
+#define   HFSCR_FP	__MASK(FSCR_FP_LG)
+#define   HFSCR_INTR_CAUSE FSCR_INTR_CAUSE
+#define SPRN_TAR	0x32f	/* Target Address Register */
+#define SPRN_LPCR	0x13E	/* LPAR Control Register */
+#define   LPCR_VPM0		ASM_CONST(0x8000000000000000)
+#define   LPCR_VPM1		ASM_CONST(0x4000000000000000)
+#define   LPCR_ISL		ASM_CONST(0x2000000000000000)
+#define   LPCR_VC_SH		61
+#define   LPCR_DPFD_SH		52
+#define   LPCR_DPFD		(ASM_CONST(7) << LPCR_DPFD_SH)
+#define   LPCR_VRMASD_SH	47
+#define   LPCR_VRMASD		(ASM_CONST(0x1f) << LPCR_VRMASD_SH)
+#define   LPCR_VRMA_L		ASM_CONST(0x0008000000000000)
+#define   LPCR_VRMA_LP0		ASM_CONST(0x0001000000000000)
+#define   LPCR_VRMA_LP1		ASM_CONST(0x0000800000000000)
+#define   LPCR_RMLS		0x1C000000	/* Implementation dependent RMO limit sel */
+#define   LPCR_RMLS_SH		26
+#define   LPCR_HAIL		ASM_CONST(0x0000000004000000)   /* HV AIL (ISAv3.1) */
+#define   LPCR_ILE		ASM_CONST(0x0000000002000000)   /* !HV irqs set MSR:LE */
+#define   LPCR_AIL		ASM_CONST(0x0000000001800000)	/* Alternate interrupt location */
+#define   LPCR_AIL_0		ASM_CONST(0x0000000000000000)	/* MMU off exception offset 0x0 */
+#define   LPCR_AIL_3		ASM_CONST(0x0000000001800000)   /* MMU on exception offset 0xc00...4xxx */
+#define   LPCR_ONL		ASM_CONST(0x0000000000040000)	/* online - PURR/SPURR count */
+#define   LPCR_LD		ASM_CONST(0x0000000000020000)	/* large decremeter */
+#define   LPCR_PECE		ASM_CONST(0x000000000001f000)	/* powersave exit cause enable */
+#define     LPCR_PECEDP	ASM_CONST(0x0000000000010000)	/* directed priv dbells cause exit */
+#define     LPCR_PECEDH	ASM_CONST(0x0000000000008000)	/* directed hyp dbells cause exit */
+#define     LPCR_PECE0		ASM_CONST(0x0000000000004000)	/* ext. exceptions can cause exit */
+#define     LPCR_PECE1		ASM_CONST(0x0000000000002000)	/* decrementer can cause exit */
+#define     LPCR_PECE2		ASM_CONST(0x0000000000001000)	/* machine check etc can cause exit */
+#define     LPCR_PECE_HVEE	ASM_CONST(0x0000400000000000)	/* P9 Wakeup on HV interrupts */
+#define   LPCR_MER		ASM_CONST(0x0000000000000800)	/* Mediated External Exception */
+#define   LPCR_MER_SH		11
+#define	  LPCR_GTSE		ASM_CONST(0x0000000000000400)  	/* Guest Translation Shootdown Enable */
+#define   LPCR_TC		ASM_CONST(0x0000000000000200)	/* Translation control */
+#define   LPCR_HEIC		ASM_CONST(0x0000000000000010)   /* Hypervisor External Interrupt Control */
+#define   LPCR_LPES		0x0000000c
+#define   LPCR_LPES0		ASM_CONST(0x0000000000000008)      /* LPAR Env selector 0 */
+#define   LPCR_LPES1		ASM_CONST(0x0000000000000004)      /* LPAR Env selector 1 */
+#define   LPCR_LPES_SH		2
+#define   LPCR_RMI		ASM_CONST(0x0000000000000002)      /* real mode is cache inhibit */
+#define   LPCR_HVICE		ASM_CONST(0x0000000000000002)      /* P9: HV interrupt enable */
+#define   LPCR_HDICE		ASM_CONST(0x0000000000000001)      /* Hyp Decr enable (HV,PR,EE) */
+#define   LPCR_UPRT		ASM_CONST(0x0000000000400000)      /* Use Process Table (ISA 3) */
+#define   LPCR_HR		ASM_CONST(0x0000000000100000)
+#ifndef SPRN_LPID
+#define SPRN_LPID	0x13F	/* Logical Partition Identifier */
+#endif
+#define	SPRN_HMER	0x150	/* Hypervisor maintenance exception reg */
+#define   HMER_DEBUG_TRIG	(1ul << (63 - 17)) /* Debug trigger */
+#define	SPRN_HMEER	0x151	/* Hyp maintenance exception enable reg */
+#define SPRN_PCR	0x152	/* Processor compatibility register */
+#define   PCR_VEC_DIS	(__MASK(63-0))	/* Vec. disable (bit NA since POWER8) */
+#define   PCR_VSX_DIS	(__MASK(63-1))	/* VSX disable (bit NA since POWER8) */
+#define   PCR_TM_DIS	(__MASK(63-2))	/* Trans. memory disable (POWER8) */
+#define   PCR_MMA_DIS	(__MASK(63-3)) /* Matrix-Multiply Accelerator */
+#define   PCR_HIGH_BITS	(PCR_MMA_DIS | PCR_VEC_DIS | PCR_VSX_DIS | PCR_TM_DIS)
+/*
+ * These bits are used in the function kvmppc_set_arch_compat() to specify and
+ * determine both the compatibility level which we want to emulate and the
+ * compatibility level which the host is capable of emulating.
+ */
+#define   PCR_ARCH_300	0x10		/* Architecture 3.00 */
+#define   PCR_ARCH_207	0x8		/* Architecture 2.07 */
+#define   PCR_ARCH_206	0x4		/* Architecture 2.06 */
+#define   PCR_ARCH_205	0x2		/* Architecture 2.05 */
+#define   PCR_LOW_BITS	(PCR_ARCH_207 | PCR_ARCH_206 | PCR_ARCH_205 | PCR_ARCH_300)
+#define   PCR_MASK	~(PCR_HIGH_BITS | PCR_LOW_BITS)	/* PCR Reserved Bits */
+#define	SPRN_HEIR	0x153	/* Hypervisor Emulated Instruction Register */
+#define SPRN_TLBINDEXR	0x154	/* P7 TLB control register */
+#define SPRN_TLBVPNR	0x155	/* P7 TLB control register */
+#define SPRN_TLBRPNR	0x156	/* P7 TLB control register */
+#define SPRN_TLBLPIDR	0x157	/* P7 TLB control register */
+#define SPRN_DBAT0L	0x219	/* Data BAT 0 Lower Register */
+#define SPRN_DBAT0U	0x218	/* Data BAT 0 Upper Register */
+#define SPRN_DBAT1L	0x21B	/* Data BAT 1 Lower Register */
+#define SPRN_DBAT1U	0x21A	/* Data BAT 1 Upper Register */
+#define SPRN_DBAT2L	0x21D	/* Data BAT 2 Lower Register */
+#define SPRN_DBAT2U	0x21C	/* Data BAT 2 Upper Register */
+#define SPRN_DBAT3L	0x21F	/* Data BAT 3 Lower Register */
+#define SPRN_DBAT3U	0x21E	/* Data BAT 3 Upper Register */
+#define SPRN_DBAT4L	0x239	/* Data BAT 4 Lower Register */
+#define SPRN_DBAT4U	0x238	/* Data BAT 4 Upper Register */
+#define SPRN_DBAT5L	0x23B	/* Data BAT 5 Lower Register */
+#define SPRN_DBAT5U	0x23A	/* Data BAT 5 Upper Register */
+#define SPRN_DBAT6L	0x23D	/* Data BAT 6 Lower Register */
+#define SPRN_DBAT6U	0x23C	/* Data BAT 6 Upper Register */
+#define SPRN_DBAT7L	0x23F	/* Data BAT 7 Lower Register */
+#define SPRN_DBAT7U	0x23E	/* Data BAT 7 Upper Register */
+#define SPRN_PPR	0x380	/* SMT Thread status Register */
+#define SPRN_TSCR	0x399	/* Thread Switch Control Register */
+
+#define SPRN_DEC	0x016		/* Decrement Register */
+#define SPRN_PIT	0x3DB		/* Programmable Interval Timer (40x/BOOKE) */
+
+#define SPRN_DER	0x095		/* Debug Enable Register */
+#define DER_RSTE	0x40000000	/* Reset Interrupt */
+#define DER_CHSTPE	0x20000000	/* Check Stop */
+#define DER_MCIE	0x10000000	/* Machine Check Interrupt */
+#define DER_EXTIE	0x02000000	/* External Interrupt */
+#define DER_ALIE	0x01000000	/* Alignment Interrupt */
+#define DER_PRIE	0x00800000	/* Program Interrupt */
+#define DER_FPUVIE	0x00400000	/* FP Unavailable Interrupt */
+#define DER_DECIE	0x00200000	/* Decrementer Interrupt */
+#define DER_SYSIE	0x00040000	/* System Call Interrupt */
+#define DER_TRE		0x00020000	/* Trace Interrupt */
+#define DER_SEIE	0x00004000	/* FP SW Emulation Interrupt */
+#define DER_ITLBMSE	0x00002000	/* Imp. Spec. Instruction TLB Miss */
+#define DER_ITLBERE	0x00001000	/* Imp. Spec. Instruction TLB Error */
+#define DER_DTLBMSE	0x00000800	/* Imp. Spec. Data TLB Miss */
+#define DER_DTLBERE	0x00000400	/* Imp. Spec. Data TLB Error */
+#define DER_LBRKE	0x00000008	/* Load/Store Breakpoint Interrupt */
+#define DER_IBRKE	0x00000004	/* Instruction Breakpoint Interrupt */
+#define DER_EBRKE	0x00000002	/* External Breakpoint Interrupt */
+#define DER_DPIE	0x00000001	/* Dev. Port Nonmaskable Request */
+#define SPRN_DMISS	0x3D0		/* Data TLB Miss Register */
+#define SPRN_DHDES	0x0B1		/* Directed Hyp. Doorbell Exc. State */
+#define SPRN_DPDES	0x0B0		/* Directed Priv. Doorbell Exc. State */
+#define SPRN_EAR	0x11A		/* External Address Register */
+#define SPRN_HASH1	0x3D2		/* Primary Hash Address Register */
+#define SPRN_HASH2	0x3D3		/* Secondary Hash Address Register */
+#define SPRN_HID0	0x3F0		/* Hardware Implementation Register 0 */
+#define HID0_HDICE_SH	(63 - 23)	/* 970 HDEC interrupt enable */
+#define HID0_EMCP	(1<<31)		/* Enable Machine Check pin */
+#define HID0_EBA	(1<<29)		/* Enable Bus Address Parity */
+#define HID0_EBD	(1<<28)		/* Enable Bus Data Parity */
+#define HID0_SBCLK	(1<<27)
+#define HID0_EICE	(1<<26)
+#define HID0_TBEN	(1<<26)		/* Timebase enable - 745x */
+#define HID0_ECLK	(1<<25)
+#define HID0_PAR	(1<<24)
+#define HID0_STEN	(1<<24)		/* Software table search enable - 745x */
+#define HID0_HIGH_BAT	(1<<23)		/* Enable high BATs - 7455 */
+#define HID0_DOZE	(1<<23)
+#define HID0_NAP	(1<<22)
+#define HID0_SLEEP	(1<<21)
+#define HID0_DPM	(1<<20)
+#define HID0_BHTCLR	(1<<18)		/* Clear branch history table - 7450 */
+#define HID0_XAEN	(1<<17)		/* Extended addressing enable - 7450 */
+#define HID0_NHR	(1<<16)		/* Not hard reset (software bit-7450)*/
+#define HID0_ICE	(1<<15)		/* Instruction Cache Enable */
+#define HID0_DCE	(1<<14)		/* Data Cache Enable */
+#define HID0_ILOCK	(1<<13)		/* Instruction Cache Lock */
+#define HID0_DLOCK	(1<<12)		/* Data Cache Lock */
+#define HID0_ICFI	(1<<11)		/* Instr. Cache Flash Invalidate */
+#define HID0_DCI	(1<<10)		/* Data Cache Invalidate */
+#define HID0_SPD	(1<<9)		/* Speculative disable */
+#define HID0_DAPUEN	(1<<8)		/* Debug APU enable */
+#define HID0_SGE	(1<<7)		/* Store Gathering Enable */
+#define HID0_SIED	(1<<7)		/* Serial Instr. Execution [Disable] */
+#define HID0_DCFA	(1<<6)		/* Data Cache Flush Assist */
+#define HID0_LRSTK	(1<<4)		/* Link register stack - 745x */
+#define HID0_BTIC	(1<<5)		/* Branch Target Instr Cache Enable */
+#define HID0_ABE	(1<<3)		/* Address Broadcast Enable */
+#define HID0_FOLD	(1<<3)		/* Branch Folding enable - 745x */
+#define HID0_BHTE	(1<<2)		/* Branch History Table Enable */
+#define HID0_BTCD	(1<<1)		/* Branch target cache disable */
+#define HID0_NOPDST	(1<<1)		/* No-op dst, dstt, etc. instr. */
+#define HID0_NOPTI	(1<<0)		/* No-op dcbt and dcbst instr. */
+/* POWER8 HID0 bits */
+#define HID0_POWER8_4LPARMODE	__MASK(61)
+#define HID0_POWER8_2LPARMODE	__MASK(57)
+#define HID0_POWER8_1TO2LPAR	__MASK(52)
+#define HID0_POWER8_1TO4LPAR	__MASK(51)
+#define HID0_POWER8_DYNLPARDIS	__MASK(48)
+
+/* POWER9 HID0 bits */
+#define HID0_POWER9_RADIX	__MASK(63 - 8)
+
+#define SPRN_HID1	0x3F1		/* Hardware Implementation Register 1 */
+#ifdef CONFIG_PPC_BOOK3S_32
+#define HID1_EMCP	(1<<31)		/* 7450 Machine Check Pin Enable */
+#define HID1_DFS	(1<<22)		/* 7447A Dynamic Frequency Scaling */
+#define HID1_PC0	(1<<16)		/* 7450 PLL_CFG[0] */
+#define HID1_PC1	(1<<15)		/* 7450 PLL_CFG[1] */
+#define HID1_PC2	(1<<14)		/* 7450 PLL_CFG[2] */
+#define HID1_PC3	(1<<13)		/* 7450 PLL_CFG[3] */
+#define HID1_SYNCBE	(1<<11)		/* 7450 ABE for sync, eieio */
+#define HID1_ABE	(1<<10)		/* 7450 Address Broadcast Enable */
+#define HID1_PS		(1<<16)		/* 750FX PLL selection */
+#endif
+#define SPRN_HID2	0x3F8		/* Hardware Implementation Register 2 */
+#define SPRN_HID2_GEKKO	0x398		/* Gekko HID2 Register */
+#define SPRN_IABR	0x3F2	/* Instruction Address Breakpoint Register */
+#define SPRN_IABR2	0x3FA		/* 83xx */
+#define SPRN_IBCR	0x135		/* 83xx Insn Breakpoint Control Reg */
+#define SPRN_IAMR	0x03D		/* Instr. Authority Mask Reg */
+#define SPRN_HID4	0x3F4		/* 970 HID4 */
+#define  HID4_LPES0	 (1ul << (63-0)) /* LPAR env. sel. bit 0 */
+#define	 HID4_RMLS2_SH	 (63 - 2)	/* Real mode limit bottom 2 bits */
+#define	 HID4_LPID5_SH	 (63 - 6)	/* partition ID bottom 4 bits */
+#define	 HID4_RMOR_SH	 (63 - 22)	/* real mode offset (16 bits) */
+#define  HID4_RMOR	 (0xFFFFul << HID4_RMOR_SH)
+#define  HID4_LPES1	 (1 << (63-57))	/* LPAR env. sel. bit 1 */
+#define  HID4_RMLS0_SH	 (63 - 58)	/* Real mode limit top bit */
+#define	 HID4_LPID1_SH	 0		/* partition ID top 2 bits */
+#define SPRN_HID4_GEKKO	0x3F3		/* Gekko HID4 */
+#define SPRN_HID5	0x3F6		/* 970 HID5 */
+#define SPRN_HID6	0x3F9	/* BE HID 6 */
+#define   HID6_LB	(0x0F<<12) /* Concurrent Large Page Modes */
+#define   HID6_DLP	(1<<20)	/* Disable all large page modes (4K only) */
+#define SPRN_TSC_CELL	0x399	/* Thread switch control on Cell */
+#define   TSC_CELL_DEC_ENABLE_0	0x400000 /* Decrementer Interrupt */
+#define   TSC_CELL_DEC_ENABLE_1	0x200000 /* Decrementer Interrupt */
+#define   TSC_CELL_EE_ENABLE	0x100000 /* External Interrupt */
+#define   TSC_CELL_EE_BOOST	0x080000 /* External Interrupt Boost */
+#define SPRN_TSC 	0x3FD	/* Thread switch control on others */
+#define SPRN_TST 	0x3FC	/* Thread switch timeout on others */
+#if !defined(SPRN_IAC1) && !defined(SPRN_IAC2)
+#define SPRN_IAC1	0x3F4		/* Instruction Address Compare 1 */
+#define SPRN_IAC2	0x3F5		/* Instruction Address Compare 2 */
+#endif
+#define SPRN_IBAT0L	0x211		/* Instruction BAT 0 Lower Register */
+#define SPRN_IBAT0U	0x210		/* Instruction BAT 0 Upper Register */
+#define SPRN_IBAT1L	0x213		/* Instruction BAT 1 Lower Register */
+#define SPRN_IBAT1U	0x212		/* Instruction BAT 1 Upper Register */
+#define SPRN_IBAT2L	0x215		/* Instruction BAT 2 Lower Register */
+#define SPRN_IBAT2U	0x214		/* Instruction BAT 2 Upper Register */
+#define SPRN_IBAT3L	0x217		/* Instruction BAT 3 Lower Register */
+#define SPRN_IBAT3U	0x216		/* Instruction BAT 3 Upper Register */
+#define SPRN_IBAT4L	0x231		/* Instruction BAT 4 Lower Register */
+#define SPRN_IBAT4U	0x230		/* Instruction BAT 4 Upper Register */
+#define SPRN_IBAT5L	0x233		/* Instruction BAT 5 Lower Register */
+#define SPRN_IBAT5U	0x232		/* Instruction BAT 5 Upper Register */
+#define SPRN_IBAT6L	0x235		/* Instruction BAT 6 Lower Register */
+#define SPRN_IBAT6U	0x234		/* Instruction BAT 6 Upper Register */
+#define SPRN_IBAT7L	0x237		/* Instruction BAT 7 Lower Register */
+#define SPRN_IBAT7U	0x236		/* Instruction BAT 7 Upper Register */
+#define SPRN_ICMP	0x3D5		/* Instruction TLB Compare Register */
+#define SPRN_ICTC	0x3FB	/* Instruction Cache Throttling Control Reg */
+#ifndef SPRN_ICTRL
+#define SPRN_ICTRL	0x3F3	/* 1011 7450 icache and interrupt ctrl */
+#endif
+#define ICTRL_EICE	0x08000000	/* enable icache parity errs */
+#define ICTRL_EDC	0x04000000	/* enable dcache parity errs */
+#define ICTRL_EICP	0x00000100	/* enable icache par. check */
+#define SPRN_IMISS	0x3D4		/* Instruction TLB Miss Register */
+#define SPRN_IMMR	0x27E		/* Internal Memory Map Register */
+#define SPRN_L2CR	0x3F9		/* Level 2 Cache Control Register */
+#define SPRN_L2CR2	0x3f8
+#define L2CR_L2E		0x80000000	/* L2 enable */
+#define L2CR_L2PE		0x40000000	/* L2 parity enable */
+#define L2CR_L2SIZ_MASK		0x30000000	/* L2 size mask */
+#define L2CR_L2SIZ_256KB	0x10000000	/* L2 size 256KB */
+#define L2CR_L2SIZ_512KB	0x20000000	/* L2 size 512KB */
+#define L2CR_L2SIZ_1MB		0x30000000	/* L2 size 1MB */
+#define L2CR_L2CLK_MASK		0x0e000000	/* L2 clock mask */
+#define L2CR_L2CLK_DISABLED	0x00000000	/* L2 clock disabled */
+#define L2CR_L2CLK_DIV1		0x02000000	/* L2 clock / 1 */
+#define L2CR_L2CLK_DIV1_5	0x04000000	/* L2 clock / 1.5 */
+#define L2CR_L2CLK_DIV2		0x08000000	/* L2 clock / 2 */
+#define L2CR_L2CLK_DIV2_5	0x0a000000	/* L2 clock / 2.5 */
+#define L2CR_L2CLK_DIV3		0x0c000000	/* L2 clock / 3 */
+#define L2CR_L2RAM_MASK		0x01800000	/* L2 RAM type mask */
+#define L2CR_L2RAM_FLOW		0x00000000	/* L2 RAM flow through */
+#define L2CR_L2RAM_PIPE		0x01000000	/* L2 RAM pipelined */
+#define L2CR_L2RAM_PIPE_LW	0x01800000	/* L2 RAM pipelined latewr */
+#define L2CR_L2DO		0x00400000	/* L2 data only */
+#define L2CR_L2I		0x00200000	/* L2 global invalidate */
+#define L2CR_L2CTL		0x00100000	/* L2 RAM control */
+#define L2CR_L2WT		0x00080000	/* L2 write-through */
+#define L2CR_L2TS		0x00040000	/* L2 test support */
+#define L2CR_L2OH_MASK		0x00030000	/* L2 output hold mask */
+#define L2CR_L2OH_0_5		0x00000000	/* L2 output hold 0.5 ns */
+#define L2CR_L2OH_1_0		0x00010000	/* L2 output hold 1.0 ns */
+#define L2CR_L2SL		0x00008000	/* L2 DLL slow */
+#define L2CR_L2DF		0x00004000	/* L2 differential clock */
+#define L2CR_L2BYP		0x00002000	/* L2 DLL bypass */
+#define L2CR_L2IP		0x00000001	/* L2 GI in progress */
+#define L2CR_L2IO_745x		0x00100000	/* L2 instr. only (745x) */
+#define L2CR_L2DO_745x		0x00010000	/* L2 data only (745x) */
+#define L2CR_L2REP_745x		0x00001000	/* L2 repl. algorithm (745x) */
+#define L2CR_L2HWF_745x		0x00000800	/* L2 hardware flush (745x) */
+#define SPRN_L3CR		0x3FA	/* Level 3 Cache Control Register */
+#define L3CR_L3E		0x80000000	/* L3 enable */
+#define L3CR_L3PE		0x40000000	/* L3 data parity enable */
+#define L3CR_L3APE		0x20000000	/* L3 addr parity enable */
+#define L3CR_L3SIZ		0x10000000	/* L3 size */
+#define L3CR_L3CLKEN		0x08000000	/* L3 clock enable */
+#define L3CR_L3RES		0x04000000	/* L3 special reserved bit */
+#define L3CR_L3CLKDIV		0x03800000	/* L3 clock divisor */
+#define L3CR_L3IO		0x00400000	/* L3 instruction only */
+#define L3CR_L3SPO		0x00040000	/* L3 sample point override */
+#define L3CR_L3CKSP		0x00030000	/* L3 clock sample point */
+#define L3CR_L3PSP		0x0000e000	/* L3 P-clock sample point */
+#define L3CR_L3REP		0x00001000	/* L3 replacement algorithm */
+#define L3CR_L3HWF		0x00000800	/* L3 hardware flush */
+#define L3CR_L3I		0x00000400	/* L3 global invalidate */
+#define L3CR_L3RT		0x00000300	/* L3 SRAM type */
+#define L3CR_L3NIRCA		0x00000080	/* L3 non-integer ratio clock adj. */
+#define L3CR_L3DO		0x00000040	/* L3 data only mode */
+#define L3CR_PMEN		0x00000004	/* L3 private memory enable */
+#define L3CR_PMSIZ		0x00000001	/* L3 private memory size */
+
+#define SPRN_MSSCR0	0x3f6	/* Memory Subsystem Control Register 0 */
+#define SPRN_MSSSR0	0x3f7	/* Memory Subsystem Status Register 1 */
+#define SPRN_LDSTCR	0x3f8	/* Load/Store control register */
+#define SPRN_LDSTDB	0x3f4	/* */
+#define SPRN_LR		0x008	/* Link Register */
+#ifndef SPRN_PIR
+#define SPRN_PIR	0x3FF	/* Processor Identification Register */
+#endif
+#define SPRN_TIR	0x1BE	/* Thread Identification Register */
+#define SPRN_PTCR	0x1D0	/* Partition table control Register */
+#define SPRN_PSPB	0x09F	/* Problem State Priority Boost reg */
+#define SPRN_PTEHI	0x3D5	/* 981 7450 PTE HI word (S/W TLB load) */
+#define SPRN_PTELO	0x3D6	/* 982 7450 PTE LO word (S/W TLB load) */
+#define SPRN_PURR	0x135	/* Processor Utilization of Resources Reg */
+#define SPRN_PVR	0x11F	/* Processor Version Register */
+#define SPRN_RPA	0x3D6	/* Required Physical Address Register */
+#define SPRN_SDA	0x3BF	/* Sampled Data Address Register */
+#define SPRN_SDR1	0x019	/* MMU Hash Base Register */
+#define SPRN_ASR	0x118   /* Address Space Register */
+#define SPRN_SIA	0x3BB	/* Sampled Instruction Address Register */
+#define SPRN_SPRG0	0x110	/* Special Purpose Register General 0 */
+#define SPRN_SPRG1	0x111	/* Special Purpose Register General 1 */
+#define SPRN_SPRG2	0x112	/* Special Purpose Register General 2 */
+#define SPRN_SPRG3	0x113	/* Special Purpose Register General 3 */
+#define SPRN_USPRG3	0x103	/* SPRG3 userspace read */
+#define SPRN_SPRG4	0x114	/* Special Purpose Register General 4 */
+#define SPRN_USPRG4	0x104	/* SPRG4 userspace read */
+#define SPRN_SPRG5	0x115	/* Special Purpose Register General 5 */
+#define SPRN_USPRG5	0x105	/* SPRG5 userspace read */
+#define SPRN_SPRG6	0x116	/* Special Purpose Register General 6 */
+#define SPRN_USPRG6	0x106	/* SPRG6 userspace read */
+#define SPRN_SPRG7	0x117	/* Special Purpose Register General 7 */
+#define SPRN_USPRG7	0x107	/* SPRG7 userspace read */
+#define SPRN_SRR0	0x01A	/* Save/Restore Register 0 */
+#define SPRN_SRR1	0x01B	/* Save/Restore Register 1 */
+
+#ifdef CONFIG_PPC_BOOK3S
+/*
+ * Bits loaded from MSR upon interrupt.
+ * PPC (64-bit) bits 33-36,42-47 are interrupt dependent, the others are
+ * loaded from MSR. The exception is that SRESET and MCE do not always load
+ * bit 62 (RI) from MSR. Don't use PPC_BITMASK for this because 32-bit uses
+ * it.
+ */
+#define   SRR1_MSR_BITS		(~0x783f0000UL)
+#endif
+
+#define   SRR1_ISI_NOPT		0x40000000 /* ISI: Not found in hash */
+#define   SRR1_ISI_N_G_OR_CIP	0x10000000 /* ISI: Access is no-exec or G or CI for a prefixed instruction */
+#define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
+#define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
+#define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 and 9 */
+#define   SRR1_WAKEMCE_RESVD	0x003c0000 /* Unused/reserved value used by MCE wakeup to indicate cause to idle wakeup handler */
+#define   SRR1_WAKESYSERR	0x00300000 /* System error */
+#define   SRR1_WAKEEE		0x00200000 /* External interrupt */
+#define   SRR1_WAKEHVI		0x00240000 /* Hypervisor Virtualization Interrupt (P9) */
+#define   SRR1_WAKEMT		0x00280000 /* mtctrl */
+#define	  SRR1_WAKEHMI		0x00280000 /* Hypervisor maintenance */
+#define   SRR1_WAKEDEC		0x00180000 /* Decrementer interrupt */
+#define   SRR1_WAKEDBELL	0x00140000 /* Privileged doorbell on P8 */
+#define   SRR1_WAKETHERM	0x00100000 /* Thermal management interrupt */
+#define	  SRR1_WAKERESET	0x00100000 /* System reset */
+#define   SRR1_WAKEHDBELL	0x000c0000 /* Hypervisor doorbell on P8 */
+#define	  SRR1_WAKESTATE	0x00030000 /* Powersave exit mask [46:47] */
+#define	  SRR1_WS_HVLOSS	0x00030000 /* HV resources not maintained */
+#define	  SRR1_WS_GPRLOSS	0x00020000 /* GPRs not maintained */
+#define	  SRR1_WS_NOLOSS	0x00010000 /* All resources maintained */
+#define   SRR1_PROGTM		0x00200000 /* TM Bad Thing */
+#define   SRR1_PROGFPE		0x00100000 /* Floating Point Enabled */
+#define   SRR1_PROGILL		0x00080000 /* Illegal instruction */
+#define   SRR1_PROGPRIV		0x00040000 /* Privileged instruction */
+#define   SRR1_PROGTRAP		0x00020000 /* Trap */
+#define   SRR1_PROGADDR		0x00010000 /* SRR0 contains subsequent addr */
+
+#define   SRR1_MCE_MCP		0x00080000 /* Machine check signal caused interrupt */
+#define   SRR1_BOUNDARY		0x10000000 /* Prefixed instruction crosses 64-byte boundary */
+#define   SRR1_PREFIXED		0x20000000 /* Exception caused by prefixed instruction */
+
+#define SPRN_HSRR0	0x13A	/* Save/Restore Register 0 */
+#define SPRN_HSRR1	0x13B	/* Save/Restore Register 1 */
+#define   HSRR1_DENORM		0x00100000 /* Denorm exception */
+#define   HSRR1_HISI_WRITE	0x00010000 /* HISI bcs couldn't update mem */
+
+#define SPRN_TBCTL	0x35f	/* PA6T Timebase control register */
+#define   TBCTL_FREEZE		0x0000000000000000ull /* Freeze all tbs */
+#define   TBCTL_RESTART		0x0000000100000000ull /* Restart all tbs */
+#define   TBCTL_UPDATE_UPPER	0x0000000200000000ull /* Set upper 32 bits */
+#define   TBCTL_UPDATE_LOWER	0x0000000300000000ull /* Set lower 32 bits */
+
+#ifndef SPRN_SVR
+#define SPRN_SVR	0x11E	/* System Version Register */
+#endif
+#define SPRN_THRM1	0x3FC		/* Thermal Management Register 1 */
+/* these bits were defined in inverted endian sense originally, ugh, confusing */
+#define THRM1_TIN	(1 << 31)
+#define THRM1_TIV	(1 << 30)
+#define THRM1_THRES(x)	((x&0x7f)<<23)
+#define THRM3_SITV(x)	((x & 0x1fff) << 1)
+#define THRM1_TID	(1<<2)
+#define THRM1_TIE	(1<<1)
+#define THRM1_V		(1<<0)
+#define SPRN_THRM2	0x3FD		/* Thermal Management Register 2 */
+#define SPRN_THRM3	0x3FE		/* Thermal Management Register 3 */
+#define THRM3_E		(1<<0)
+#define SPRN_TLBMISS	0x3D4		/* 980 7450 TLB Miss Register */
+#define SPRN_UMMCR0	0x3A8	/* User Monitor Mode Control Register 0 */
+#define SPRN_UMMCR1	0x3AC	/* User Monitor Mode Control Register 0 */
+#define SPRN_UPMC1	0x3A9	/* User Performance Counter Register 1 */
+#define SPRN_UPMC2	0x3AA	/* User Performance Counter Register 2 */
+#define SPRN_UPMC3	0x3AD	/* User Performance Counter Register 3 */
+#define SPRN_UPMC4	0x3AE	/* User Performance Counter Register 4 */
+#define SPRN_USIA	0x3AB	/* User Sampled Instruction Address Register */
+#define SPRN_VRSAVE	0x100	/* Vector Register Save Register */
+#define SPRN_XER	0x001	/* Fixed Point Exception Register */
+
+#define SPRN_MMCR0_GEKKO 0x3B8 /* Gekko Monitor Mode Control Register 0 */
+#define SPRN_MMCR1_GEKKO 0x3BC /* Gekko Monitor Mode Control Register 1 */
+#define SPRN_PMC1_GEKKO  0x3B9 /* Gekko Performance Monitor Control 1 */
+#define SPRN_PMC2_GEKKO  0x3BA /* Gekko Performance Monitor Control 2 */
+#define SPRN_PMC3_GEKKO  0x3BD /* Gekko Performance Monitor Control 3 */
+#define SPRN_PMC4_GEKKO  0x3BE /* Gekko Performance Monitor Control 4 */
+#define SPRN_WPAR_GEKKO  0x399 /* Gekko Write Pipe Address Register */
+
+#define SPRN_SCOMC	0x114	/* SCOM Access Control */
+#define SPRN_SCOMD	0x115	/* SCOM Access DATA */
+
+/* Performance monitor SPRs */
+#ifdef CONFIG_PPC64
+#define SPRN_MMCR0	795
+#define   MMCR0_FC	0x80000000UL /* freeze counters */
+#define   MMCR0_FCS	0x40000000UL /* freeze in supervisor state */
+#define   MMCR0_KERNEL_DISABLE MMCR0_FCS
+#define   MMCR0_FCP	0x20000000UL /* freeze in problem state */
+#define   MMCR0_PROBLEM_DISABLE MMCR0_FCP
+#define   MMCR0_FCM1	0x10000000UL /* freeze counters while MSR mark = 1 */
+#define   MMCR0_FCM0	0x08000000UL /* freeze counters while MSR mark = 0 */
+#define   MMCR0_PMXE	ASM_CONST(0x04000000) /* perf mon exception enable */
+#define   MMCR0_FCECE	ASM_CONST(0x02000000) /* freeze ctrs on enabled cond or event */
+#define   MMCR0_TBEE	0x00400000UL /* time base exception enable */
+#define   MMCR0_BHRBA	0x00200000UL /* BHRB Access allowed in userspace */
+#define   MMCR0_EBE	0x00100000UL /* Event based branch enable */
+#define   MMCR0_PMCC	0x000c0000UL /* PMC control */
+#define   MMCR0_PMCCEXT	ASM_CONST(0x00000200) /* PMCCEXT control */
+#define   MMCR0_PMCC_U6	0x00080000UL /* PMC1-6 are R/W by user (PR) */
+#define   MMCR0_PMC1CE	0x00008000UL /* PMC1 count enable*/
+#define   MMCR0_PMCjCE	ASM_CONST(0x00004000) /* PMCj count enable*/
+#define   MMCR0_TRIGGER	0x00002000UL /* TRIGGER enable */
+#define   MMCR0_PMAO_SYNC ASM_CONST(0x00000800) /* PMU intr is synchronous */
+#define   MMCR0_C56RUN	ASM_CONST(0x00000100) /* PMC5/6 count when RUN=0 */
+/* performance monitor alert has occurred, set to 0 after handling exception */
+#define   MMCR0_PMAO	ASM_CONST(0x00000080)
+#define   MMCR0_SHRFC	0x00000040UL /* SHRre freeze conditions between threads */
+#define   MMCR0_FC56	0x00000010UL /* freeze counters 5 and 6 */
+#define   MMCR0_FCTI	0x00000008UL /* freeze counters in tags inactive mode */
+#define   MMCR0_FCTA	0x00000004UL /* freeze counters in tags active mode */
+#define   MMCR0_FCWAIT	0x00000002UL /* freeze counter in WAIT state */
+#define   MMCR0_FCHV	0x00000001UL /* freeze conditions in hypervisor mode */
+#define SPRN_MMCR1	798
+#define SPRN_MMCR2	785
+#define SPRN_MMCR3	754
+#define SPRN_UMMCR2	769
+#define SPRN_UMMCR3	738
+#define SPRN_MMCRA	0x312
+#define   MMCRA_SDSYNC	0x80000000UL /* SDAR synced with SIAR */
+#define   MMCRA_SDAR_DCACHE_MISS 0x40000000UL
+#define   MMCRA_SDAR_ERAT_MISS   0x20000000UL
+#define   MMCRA_SIHV	0x10000000UL /* state of MSR HV when SIAR set */
+#define   MMCRA_SIPR	0x08000000UL /* state of MSR PR when SIAR set */
+#define   MMCRA_SLOT	0x07000000UL /* SLOT bits (37-39) */
+#define   MMCRA_SLOT_SHIFT	24
+#define   MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */
+#define   MMCRA_BHRB_DISABLE  _UL(0x2000000000) // BHRB disable bit for ISA v3.1
+#define   POWER6_MMCRA_SDSYNC 0x0000080000000000ULL	/* SDAR/SIAR synced */
+#define   POWER6_MMCRA_SIHV   0x0000040000000000ULL
+#define   POWER6_MMCRA_SIPR   0x0000020000000000ULL
+#define   POWER6_MMCRA_THRM	0x00000020UL
+#define   POWER6_MMCRA_OTHER	0x0000000EUL
+
+#define   POWER7P_MMCRA_SIAR_VALID 0x10000000	/* P7+ SIAR contents valid */
+#define   POWER7P_MMCRA_SDAR_VALID 0x08000000	/* P7+ SDAR contents valid */
+
+#define SPRN_MMCRH	316	/* Hypervisor monitor mode control register */
+#define SPRN_MMCRS	894	/* Supervisor monitor mode control register */
+#define SPRN_MMCRC	851	/* Core monitor mode control register */
+#define SPRN_EBBHR	804	/* Event based branch handler register */
+#define SPRN_EBBRR	805	/* Event based branch return register */
+#define SPRN_BESCR	806	/* Branch event status and control register */
+#define   BESCR_GE	0x8000000000000000ULL /* Global Enable */
+#define SPRN_WORT	895	/* Workload optimization register - thread */
+#define SPRN_WORC	863	/* Workload optimization register - core */
+
+#define SPRN_PMC1	787
+#define SPRN_PMC2	788
+#define SPRN_PMC3	789
+#define SPRN_PMC4	790
+#define SPRN_PMC5	791
+#define SPRN_PMC6	792
+#define SPRN_PMC7	793
+#define SPRN_PMC8	794
+#define SPRN_SIER	784
+#define   SIER_SIPR		0x2000000	/* Sampled MSR_PR */
+#define   SIER_SIHV		0x1000000	/* Sampled MSR_HV */
+#define   SIER_SIAR_VALID	0x0400000	/* SIAR contents valid */
+#define   SIER_SDAR_VALID	0x0200000	/* SDAR contents valid */
+#define SPRN_SIER2	752
+#define SPRN_SIER3	753
+#define SPRN_USIER2	736
+#define SPRN_USIER3	737
+#define SPRN_SIAR	796
+#define SPRN_SDAR	797
+#define SPRN_TACR	888
+#define SPRN_TCSCR	889
+#define SPRN_CSIGR	890
+#define SPRN_SPMC1	892
+#define SPRN_SPMC2	893
+
+/* When EBB is enabled, some of MMCR0/MMCR2/SIER are user accessible */
+#define MMCR0_USER_MASK	(MMCR0_FC | MMCR0_PMXE | MMCR0_PMAO)
+#define MMCR2_USER_MASK	0x4020100804020000UL /* (FC1P|FC2P|FC3P|FC4P|FC5P|FC6P) */
+#define SIER_USER_MASK	0x7fffffUL
+
+#define SPRN_PA6T_MMCR0 795
+#define   PA6T_MMCR0_EN0	0x0000000000000001UL
+#define   PA6T_MMCR0_EN1	0x0000000000000002UL
+#define   PA6T_MMCR0_EN2	0x0000000000000004UL
+#define   PA6T_MMCR0_EN3	0x0000000000000008UL
+#define   PA6T_MMCR0_EN4	0x0000000000000010UL
+#define   PA6T_MMCR0_EN5	0x0000000000000020UL
+#define   PA6T_MMCR0_SUPEN	0x0000000000000040UL
+#define   PA6T_MMCR0_PREN	0x0000000000000080UL
+#define   PA6T_MMCR0_HYPEN	0x0000000000000100UL
+#define   PA6T_MMCR0_FCM0	0x0000000000000200UL
+#define   PA6T_MMCR0_FCM1	0x0000000000000400UL
+#define   PA6T_MMCR0_INTGEN	0x0000000000000800UL
+#define   PA6T_MMCR0_INTEN0	0x0000000000001000UL
+#define   PA6T_MMCR0_INTEN1	0x0000000000002000UL
+#define   PA6T_MMCR0_INTEN2	0x0000000000004000UL
+#define   PA6T_MMCR0_INTEN3	0x0000000000008000UL
+#define   PA6T_MMCR0_INTEN4	0x0000000000010000UL
+#define   PA6T_MMCR0_INTEN5	0x0000000000020000UL
+#define   PA6T_MMCR0_DISCNT	0x0000000000040000UL
+#define   PA6T_MMCR0_UOP	0x0000000000080000UL
+#define   PA6T_MMCR0_TRG	0x0000000000100000UL
+#define   PA6T_MMCR0_TRGEN	0x0000000000200000UL
+#define   PA6T_MMCR0_TRGREG	0x0000000001600000UL
+#define   PA6T_MMCR0_SIARLOG	0x0000000002000000UL
+#define   PA6T_MMCR0_SDARLOG	0x0000000004000000UL
+#define   PA6T_MMCR0_PROEN	0x0000000008000000UL
+#define   PA6T_MMCR0_PROLOG	0x0000000010000000UL
+#define   PA6T_MMCR0_DAMEN2	0x0000000020000000UL
+#define   PA6T_MMCR0_DAMEN3	0x0000000040000000UL
+#define   PA6T_MMCR0_DAMEN4	0x0000000080000000UL
+#define   PA6T_MMCR0_DAMEN5	0x0000000100000000UL
+#define   PA6T_MMCR0_DAMSEL2	0x0000000200000000UL
+#define   PA6T_MMCR0_DAMSEL3	0x0000000400000000UL
+#define   PA6T_MMCR0_DAMSEL4	0x0000000800000000UL
+#define   PA6T_MMCR0_DAMSEL5	0x0000001000000000UL
+#define   PA6T_MMCR0_HANDDIS	0x0000002000000000UL
+#define   PA6T_MMCR0_PCTEN	0x0000004000000000UL
+#define   PA6T_MMCR0_SOCEN	0x0000008000000000UL
+#define   PA6T_MMCR0_SOCMOD	0x0000010000000000UL
+
+#define SPRN_PA6T_MMCR1 798
+#define   PA6T_MMCR1_ES2	0x00000000000000ffUL
+#define   PA6T_MMCR1_ES3	0x000000000000ff00UL
+#define   PA6T_MMCR1_ES4	0x0000000000ff0000UL
+#define   PA6T_MMCR1_ES5	0x00000000ff000000UL
+
+#define SPRN_PA6T_UPMC0 771	/* User PerfMon Counter 0 */
+#define SPRN_PA6T_UPMC1 772	/* ... */
+#define SPRN_PA6T_UPMC2 773
+#define SPRN_PA6T_UPMC3 774
+#define SPRN_PA6T_UPMC4 775
+#define SPRN_PA6T_UPMC5 776
+#define SPRN_PA6T_UMMCR0 779	/* User Monitor Mode Control Register 0 */
+#define SPRN_PA6T_SIAR	780	/* Sampled Instruction Address */
+#define SPRN_PA6T_UMMCR1 782	/* User Monitor Mode Control Register 1 */
+#define SPRN_PA6T_SIER	785	/* Sampled Instruction Event Register */
+#define SPRN_PA6T_PMC0	787
+#define SPRN_PA6T_PMC1	788
+#define SPRN_PA6T_PMC2	789
+#define SPRN_PA6T_PMC3	790
+#define SPRN_PA6T_PMC4	791
+#define SPRN_PA6T_PMC5	792
+#define SPRN_PA6T_TSR0	793	/* Timestamp Register 0 */
+#define SPRN_PA6T_TSR1	794	/* Timestamp Register 1 */
+#define SPRN_PA6T_TSR2	799	/* Timestamp Register 2 */
+#define SPRN_PA6T_TSR3	784	/* Timestamp Register 3 */
+
+#define SPRN_PA6T_IER	981	/* Icache Error Register */
+#define SPRN_PA6T_DER	982	/* Dcache Error Register */
+#define SPRN_PA6T_BER	862	/* BIU Error Address Register */
+#define SPRN_PA6T_MER	849	/* MMU Error Register */
+
+#define SPRN_PA6T_IMA0	880	/* Instruction Match Array 0 */
+#define SPRN_PA6T_IMA1	881	/* ... */
+#define SPRN_PA6T_IMA2	882
+#define SPRN_PA6T_IMA3	883
+#define SPRN_PA6T_IMA4	884
+#define SPRN_PA6T_IMA5	885
+#define SPRN_PA6T_IMA6	886
+#define SPRN_PA6T_IMA7	887
+#define SPRN_PA6T_IMA8	888
+#define SPRN_PA6T_IMA9	889
+#define SPRN_PA6T_BTCR	978	/* Breakpoint and Tagging Control Register */
+#define SPRN_PA6T_IMAAT	979	/* Instruction Match Array Action Table */
+#define SPRN_PA6T_PCCR	1019	/* Power Counter Control Register */
+#define SPRN_BKMK	1020	/* Cell Bookmark Register */
+#define SPRN_PA6T_RPCCR	1021	/* Retire PC Trace Control Register */
+
+
+#else /* 32-bit */
+#define SPRN_MMCR0	952	/* Monitor Mode Control Register 0 */
+#define   MMCR0_FC	0x80000000UL /* freeze counters */
+#define   MMCR0_FCS	0x40000000UL /* freeze in supervisor state */
+#define   MMCR0_FCP	0x20000000UL /* freeze in problem state */
+#define   MMCR0_FCM1	0x10000000UL /* freeze counters while MSR mark = 1 */
+#define   MMCR0_FCM0	0x08000000UL /* freeze counters while MSR mark = 0 */
+#define   MMCR0_PMXE	0x04000000UL /* performance monitor exception enable */
+#define   MMCR0_FCECE	0x02000000UL /* freeze ctrs on enabled cond or event */
+#define   MMCR0_TBEE	0x00400000UL /* time base exception enable */
+#define   MMCR0_PMC1CE	0x00008000UL /* PMC1 count enable*/
+#define   MMCR0_PMCnCE	0x00004000UL /* count enable for all but PMC 1*/
+#define   MMCR0_TRIGGER	0x00002000UL /* TRIGGER enable */
+#define   MMCR0_PMC1SEL	0x00001fc0UL /* PMC 1 Event */
+#define   MMCR0_PMC2SEL	0x0000003fUL /* PMC 2 Event */
+
+#define SPRN_MMCR1	956
+#define   MMCR1_PMC3SEL	0xf8000000UL /* PMC 3 Event */
+#define   MMCR1_PMC4SEL	0x07c00000UL /* PMC 4 Event */
+#define   MMCR1_PMC5SEL	0x003e0000UL /* PMC 5 Event */
+#define   MMCR1_PMC6SEL 0x0001f800UL /* PMC 6 Event */
+#define SPRN_MMCR2	944
+#define SPRN_PMC1	953	/* Performance Counter Register 1 */
+#define SPRN_PMC2	954	/* Performance Counter Register 2 */
+#define SPRN_PMC3	957	/* Performance Counter Register 3 */
+#define SPRN_PMC4	958	/* Performance Counter Register 4 */
+#define SPRN_PMC5	945	/* Performance Counter Register 5 */
+#define SPRN_PMC6	946	/* Performance Counter Register 6 */
+
+#define SPRN_SIAR	955	/* Sampled Instruction Address Register */
+
+/* Bit definitions for MMCR0 and PMC1 / PMC2. */
+#define MMCR0_PMC1_CYCLES	(1 << 7)
+#define MMCR0_PMC1_ICACHEMISS	(5 << 7)
+#define MMCR0_PMC1_DTLB		(6 << 7)
+#define MMCR0_PMC2_DCACHEMISS	0x6
+#define MMCR0_PMC2_CYCLES	0x1
+#define MMCR0_PMC2_ITLB		0x7
+#define MMCR0_PMC2_LOADMISSTIME	0x5
+#endif
+
+/*
+ * SPRG usage:
+ *
+ * All 64-bit:
+ *	- SPRG1 stores PACA pointer except 64-bit server in
+ *        HV mode in which case it is HSPRG0
+ *
+ * 64-bit server:
+ *	- SPRG0 scratch for TM recheckpoint/reclaim (reserved for HV on Power4)
+ *	- SPRG2 scratch for exception vectors
+ *	- SPRG3 CPU and NUMA node for VDSO getcpu (user visible)
+ *      - HSPRG0 stores PACA in HV mode
+ *      - HSPRG1 scratch for "HV" exceptions
+ *
+ * 64-bit embedded
+ *	- SPRG0 generic exception scratch
+ *	- SPRG2 TLB exception stack
+ *	- SPRG3 critical exception scratch (user visible, sorry!)
+ *	- SPRG4 unused (user visible)
+ *	- SPRG6 TLB miss scratch (user visible, sorry !)
+ *	- SPRG7 CPU and NUMA node for VDSO getcpu (user visible)
+ *	- SPRG8 machine check exception scratch
+ *	- SPRG9 debug exception scratch
+ *
+ * All 32-bit:
+ *	- SPRG3 current thread_struct physical addr pointer
+ *        (virtual on BookE, physical on others)
+ *
+ * 32-bit classic:
+ *	- SPRG0 scratch for exception vectors
+ *	- SPRG1 scratch for exception vectors
+ *	- SPRG2 indicator that we are in RTAS
+ *	- SPRG4 (603 only) pseudo TLB LRU data
+ *
+ * 32-bit 40x:
+ *	- SPRG0 scratch for exception vectors
+ *	- SPRG1 scratch for exception vectors
+ *	- SPRG2 scratch for exception vectors
+ *	- SPRG4 scratch for exception vectors (not 403)
+ *	- SPRG5 scratch for exception vectors (not 403)
+ *	- SPRG6 scratch for exception vectors (not 403)
+ *	- SPRG7 scratch for exception vectors (not 403)
+ *
+ * 32-bit 440 and FSL BookE:
+ *	- SPRG0 scratch for exception vectors
+ *	- SPRG1 scratch for exception vectors (*)
+ *	- SPRG2 scratch for crit interrupts handler
+ *	- SPRG4 scratch for exception vectors
+ *	- SPRG5 scratch for exception vectors
+ *	- SPRG6 scratch for machine check handler
+ *	- SPRG7 scratch for exception vectors
+ *	- SPRG9 scratch for debug vectors (e500 only)
+ *
+ *      Additionally, BookE separates "read" and "write"
+ *      of those registers. That allows to use the userspace
+ *      readable variant for reads, which can avoid a fault
+ *      with KVM type virtualization.
+ *
+ * 32-bit 8xx:
+ *	- SPRG0 scratch for exception vectors
+ *	- SPRG1 scratch for exception vectors
+ *	- SPRG2 scratch for exception vectors
+ *
+ */
+#ifdef CONFIG_PPC64
+#define SPRN_SPRG_PACA 		SPRN_SPRG1
+#else
+#define SPRN_SPRG_THREAD 	SPRN_SPRG3
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#define SPRN_SPRG_SCRATCH0	SPRN_SPRG2
+#define SPRN_SPRG_HPACA		SPRN_HSPRG0
+#define SPRN_SPRG_HSCRATCH0	SPRN_HSPRG1
+#define SPRN_SPRG_VDSO_READ	SPRN_USPRG3
+#define SPRN_SPRG_VDSO_WRITE	SPRN_SPRG3
+
+#define GET_PACA(rX)					\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_PACA;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_HPACA;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66)
+
+#define SET_PACA(rX)					\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mtspr	SPRN_SPRG_PACA,rX;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mtspr	SPRN_SPRG_HPACA,rX;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66)
+
+#define GET_SCRATCH0(rX)				\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_SCRATCH0;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_HSCRATCH0;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66)
+
+#define SET_SCRATCH0(rX)				\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mtspr	SPRN_SPRG_SCRATCH0,rX;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mtspr	SPRN_SPRG_HSCRATCH0,rX;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66)
+
+#else /* CONFIG_PPC_BOOK3S_64 */
+#define GET_SCRATCH0(rX)	mfspr	rX,SPRN_SPRG_SCRATCH0
+#define SET_SCRATCH0(rX)	mtspr	SPRN_SPRG_SCRATCH0,rX
+
+#endif
+
+#ifdef CONFIG_PPC_BOOK3E_64
+#define SPRN_SPRG_MC_SCRATCH	SPRN_SPRG8
+#define SPRN_SPRG_CRIT_SCRATCH	SPRN_SPRG3
+#define SPRN_SPRG_DBG_SCRATCH	SPRN_SPRG9
+#define SPRN_SPRG_TLB_EXFRAME	SPRN_SPRG2
+#define SPRN_SPRG_TLB_SCRATCH	SPRN_SPRG6
+#define SPRN_SPRG_GEN_SCRATCH	SPRN_SPRG0
+#define SPRN_SPRG_GDBELL_SCRATCH SPRN_SPRG_GEN_SCRATCH
+#define SPRN_SPRG_VDSO_READ	SPRN_USPRG7
+#define SPRN_SPRG_VDSO_WRITE	SPRN_SPRG7
+
+#define SET_PACA(rX)	mtspr	SPRN_SPRG_PACA,rX
+#define GET_PACA(rX)	mfspr	rX,SPRN_SPRG_PACA
+
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+#define SPRN_SPRG_SCRATCH0	SPRN_SPRG0
+#define SPRN_SPRG_SCRATCH1	SPRN_SPRG1
+#define SPRN_SPRG_SCRATCH2	SPRN_SPRG2
+#define SPRN_SPRG_603_LRU	SPRN_SPRG4
+#endif
+
+#ifdef CONFIG_40x
+#define SPRN_SPRG_SCRATCH0	SPRN_SPRG0
+#define SPRN_SPRG_SCRATCH1	SPRN_SPRG1
+#define SPRN_SPRG_SCRATCH2	SPRN_SPRG2
+#define SPRN_SPRG_SCRATCH3	SPRN_SPRG4
+#define SPRN_SPRG_SCRATCH4	SPRN_SPRG5
+#define SPRN_SPRG_SCRATCH5	SPRN_SPRG6
+#define SPRN_SPRG_SCRATCH6	SPRN_SPRG7
+#endif
+
+#ifdef CONFIG_BOOKE
+#define SPRN_SPRG_RSCRATCH0	SPRN_SPRG0
+#define SPRN_SPRG_WSCRATCH0	SPRN_SPRG0
+#define SPRN_SPRG_RSCRATCH1	SPRN_SPRG1
+#define SPRN_SPRG_WSCRATCH1	SPRN_SPRG1
+#define SPRN_SPRG_RSCRATCH_CRIT	SPRN_SPRG2
+#define SPRN_SPRG_WSCRATCH_CRIT	SPRN_SPRG2
+#define SPRN_SPRG_RSCRATCH2	SPRN_SPRG4R
+#define SPRN_SPRG_WSCRATCH2	SPRN_SPRG4W
+#define SPRN_SPRG_RSCRATCH3	SPRN_SPRG5R
+#define SPRN_SPRG_WSCRATCH3	SPRN_SPRG5W
+#define SPRN_SPRG_RSCRATCH_MC	SPRN_SPRG1
+#define SPRN_SPRG_WSCRATCH_MC	SPRN_SPRG1
+#define SPRN_SPRG_RSCRATCH4	SPRN_SPRG7R
+#define SPRN_SPRG_WSCRATCH4	SPRN_SPRG7W
+#define SPRN_SPRG_RSCRATCH_DBG	SPRN_SPRG9
+#define SPRN_SPRG_WSCRATCH_DBG	SPRN_SPRG9
+#endif
+
+#ifdef CONFIG_PPC_8xx
+#define SPRN_SPRG_SCRATCH0	SPRN_SPRG0
+#define SPRN_SPRG_SCRATCH1	SPRN_SPRG1
+#define SPRN_SPRG_SCRATCH2	SPRN_SPRG2
+#endif
+
+
+
+/*
+ * An mtfsf instruction with the L bit set. On CPUs that support this a
+ * full 64bits of FPSCR is restored and on other CPUs the L bit is ignored.
+ *
+ * Until binutils gets the new form of mtfsf, hardwire the instruction.
+ */
+#ifdef CONFIG_PPC64
+#define MTFSF_L(REG) \
+	.long (0xfc00058e | ((0xff) << 17) | ((REG) << 11) | (1 << 25))
+#else
+#define MTFSF_L(REG)	mtfsf	0xff, (REG)
+#endif
+
+/* Processor Version Register (PVR) field extraction */
+
+#define PVR_VER(pvr)	(((pvr) >>  16) & 0xFFFF)	/* Version field */
+#define PVR_REV(pvr)	(((pvr) >>   0) & 0xFFFF)	/* Revison field */
+
+#define pvr_version_is(pvr)	(PVR_VER(mfspr(SPRN_PVR)) == (pvr))
+
+/*
+ * IBM has further subdivided the standard PowerPC 16-bit version and
+ * revision subfields of the PVR for the PowerPC 403s into the following:
+ */
+
+#define PVR_FAM(pvr)	(((pvr) >> 20) & 0xFFF)	/* Family field */
+#define PVR_MEM(pvr)	(((pvr) >> 16) & 0xF)	/* Member field */
+#define PVR_CORE(pvr)	(((pvr) >> 12) & 0xF)	/* Core field */
+#define PVR_CFG(pvr)	(((pvr) >>  8) & 0xF)	/* Configuration field */
+#define PVR_MAJ(pvr)	(((pvr) >>  4) & 0xF)	/* Major revision field */
+#define PVR_MIN(pvr)	(((pvr) >>  0) & 0xF)	/* Minor revision field */
+
+/* Processor Version Numbers */
+
+#define PVR_403GA	0x00200000
+#define PVR_403GB	0x00200100
+#define PVR_403GC	0x00200200
+#define PVR_403GCX	0x00201400
+#define PVR_405GP	0x40110000
+#define PVR_476		0x11a52000
+#define PVR_476FPE	0x7ff50000
+#define PVR_STB03XXX	0x40310000
+#define PVR_NP405H	0x41410000
+#define PVR_NP405L	0x41610000
+#define PVR_601		0x00010000
+#define PVR_602		0x00050000
+#define PVR_603		0x00030000
+#define PVR_603e	0x00060000
+#define PVR_603ev	0x00070000
+#define PVR_603r	0x00071000
+#define PVR_604		0x00040000
+#define PVR_604e	0x00090000
+#define PVR_604r	0x000A0000
+#define PVR_620		0x00140000
+#define PVR_740		0x00080000
+#define PVR_750		PVR_740
+#define PVR_740P	0x10080000
+#define PVR_750P	PVR_740P
+#define PVR_7400	0x000C0000
+#define PVR_7410	0x800C0000
+#define PVR_7450	0x80000000
+#define PVR_8540	0x80200000
+#define PVR_8560	0x80200000
+#define PVR_VER_E500V1	0x8020
+#define PVR_VER_E500V2	0x8021
+#define PVR_VER_E500MC	0x8023
+#define PVR_VER_E5500	0x8024
+#define PVR_VER_E6500	0x8040
+#define PVR_VER_7450	0x8000
+#define PVR_VER_7455	0x8001
+#define PVR_VER_7447	0x8002
+#define PVR_VER_7447A	0x8003
+#define PVR_VER_7448	0x8004
+
+/*
+ * For the 8xx processors, all of them report the same PVR family for
+ * the PowerPC core. The various versions of these processors must be
+ * differentiated by the version number in the Communication Processor
+ * Module (CPM).
+ */
+#define PVR_8xx		0x00500000
+
+#define PVR_8240	0x00810100
+#define PVR_8245	0x80811014
+#define PVR_8260	PVR_8240
+
+/* 476 Simulator seems to currently have the PVR of the 602... */
+#define PVR_476_ISS	0x00052000
+
+/* 64-bit processors */
+#define PVR_NORTHSTAR	0x0033
+#define PVR_PULSAR	0x0034
+#define PVR_POWER4	0x0035
+#define PVR_ICESTAR	0x0036
+#define PVR_SSTAR	0x0037
+#define PVR_POWER4p	0x0038
+#define PVR_970		0x0039
+#define PVR_POWER5	0x003A
+#define PVR_POWER5p	0x003B
+#define PVR_970FX	0x003C
+#define PVR_POWER6	0x003E
+#define PVR_POWER7	0x003F
+#define PVR_630		0x0040
+#define PVR_630p	0x0041
+#define PVR_970MP	0x0044
+#define PVR_970GX	0x0045
+#define PVR_POWER7p	0x004A
+#define PVR_POWER8E	0x004B
+#define PVR_POWER8NVL	0x004C
+#define PVR_POWER8	0x004D
+#define PVR_POWER9	0x004E
+#define PVR_POWER10	0x0080
+#define PVR_BE		0x0070
+#define PVR_PA6T	0x0090
+
+/* "Logical" PVR values defined in PAPR, representing architecture levels */
+#define PVR_ARCH_204	0x0f000001
+#define PVR_ARCH_205	0x0f000002
+#define PVR_ARCH_206	0x0f000003
+#define PVR_ARCH_206p	0x0f100003
+#define PVR_ARCH_207	0x0f000004
+#define PVR_ARCH_300	0x0f000005
+#define PVR_ARCH_31	0x0f000006
+
+/* Macros for setting and retrieving special purpose registers */
+#ifndef __ASSEMBLY__
+
+#if defined(CONFIG_PPC64) || defined(__CHECKER__)
+typedef struct {
+	u32 val;
+#ifdef CONFIG_PPC64
+	u32 suffix;
+#endif
+} __packed ppc_inst_t;
+#else
+typedef u32 ppc_inst_t;
+#endif
+
+#define mfmsr()		({unsigned long rval; \
+			asm volatile("mfmsr %0" : "=r" (rval) : \
+						: "memory"); rval;})
+#ifdef CONFIG_PPC_BOOK3S_64
+#define __mtmsrd(v, l)	asm volatile("mtmsrd %0," __stringify(l) \
+				     : : "r" (v) : "memory")
+#define mtmsr(v)	__mtmsrd((v), 0)
+#define __MTMSR		"mtmsrd"
+#else
+#define mtmsr(v)	asm volatile("mtmsr %0" : \
+				     : "r" ((unsigned long)(v)) \
+				     : "memory")
+#define __mtmsrd(v, l)	BUILD_BUG()
+#define __MTMSR		"mtmsr"
+#endif
+
+static inline void mtmsr_isync(unsigned long val)
+{
+	asm volatile(__MTMSR " %0; " ASM_FTR_IFCLR("isync", "nop", %1) : :
+			"r" (val), "i" (CPU_FTR_ARCH_206) : "memory");
+}
+
+#define mfspr(rn)	({unsigned long rval; \
+			asm volatile("mfspr %0," __stringify(rn) \
+				: "=r" (rval)); rval;})
+#define mtspr(rn, v)	asm volatile("mtspr " __stringify(rn) ",%0" : \
+				     : "r" ((unsigned long)(v)) \
+				     : "memory")
+#define wrtspr(rn)	asm volatile("mtspr " __stringify(rn) ",2" : : : "memory")
+
+static inline void wrtee(unsigned long val)
+{
+	if (__builtin_constant_p(val))
+		asm volatile("wrteei %0" : : "i" ((val & MSR_EE) ? 1 : 0) : "memory");
+	else
+		asm volatile("wrtee %0" : : "r" (val) : "memory");
+}
+
+extern unsigned long msr_check_and_set(unsigned long bits);
+extern bool strict_msr_control;
+extern void __msr_check_and_clear(unsigned long bits);
+static inline void msr_check_and_clear(unsigned long bits)
+{
+	if (strict_msr_control)
+		__msr_check_and_clear(bits);
+}
+
+#ifdef CONFIG_PPC32
+static inline u32 mfsr(u32 idx)
+{
+	u32 val;
+
+	if (__builtin_constant_p(idx))
+		asm volatile("mfsr %0, %1" : "=r" (val): "i" (idx >> 28));
+	else
+		asm volatile("mfsrin %0, %1" : "=r" (val): "r" (idx));
+
+	return val;
+}
+
+static inline void mtsr(u32 val, u32 idx)
+{
+	if (__builtin_constant_p(idx))
+		asm volatile("mtsr %1, %0" : : "r" (val), "i" (idx >> 28));
+	else
+		asm volatile("mtsrin %0, %1" : : "r" (val), "r" (idx));
+}
+#endif
+
+extern unsigned long current_stack_frame(void);
+
+register unsigned long current_stack_pointer asm("r1");
+
+extern unsigned long scom970_read(unsigned int address);
+extern void scom970_write(unsigned int address, unsigned long value);
+
+struct pt_regs;
+
+extern void ppc_save_regs(struct pt_regs *regs);
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_REG_H */
diff --git a/arch/powerpc/include/asm/reg_8xx.h b/arch/powerpc/include/asm/reg_8xx.h
new file mode 100644
index 0000000000..299ee7be0f
--- /dev/null
+++ b/arch/powerpc/include/asm/reg_8xx.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Contains register definitions common to PowerPC 8xx CPUs.  Notice
+ */
+#ifndef _ASM_POWERPC_REG_8xx_H
+#define _ASM_POWERPC_REG_8xx_H
+
+/* Cache control on the MPC8xx is provided through some additional
+ * special purpose registers.
+ */
+#define SPRN_IC_CST	560	/* Instruction cache control/status */
+#define SPRN_IC_ADR	561	/* Address needed for some commands */
+#define SPRN_IC_DAT	562	/* Read-only data register */
+#define SPRN_DC_CST	568	/* Data cache control/status */
+#define SPRN_DC_ADR	569	/* Address needed for some commands */
+#define SPRN_DC_DAT	570	/* Read-only data register */
+
+/* Misc Debug */
+#define SPRN_DPDR	630
+#define SPRN_MI_CAM	816
+#define SPRN_MI_RAM0	817
+#define SPRN_MI_RAM1	818
+#define SPRN_MD_CAM	824
+#define SPRN_MD_RAM0	825
+#define SPRN_MD_RAM1	826
+
+/* Special MSR manipulation registers */
+#define SPRN_EIE	80	/* External interrupt enable (EE=1, RI=1) */
+#define SPRN_EID	81	/* External interrupt disable (EE=0, RI=1) */
+#define SPRN_NRI	82	/* Non recoverable interrupt (EE=0, RI=0) */
+
+/* Debug registers */
+#define SPRN_CMPA	144
+#define SPRN_COUNTA	150
+#define SPRN_CMPE	152
+#define SPRN_CMPF	153
+#define SPRN_LCTRL1	156
+#define   LCTRL1_CTE_GT		0xc0000000
+#define   LCTRL1_CTF_LT		0x14000000
+#define   LCTRL1_CRWE_RW	0x00000000
+#define   LCTRL1_CRWE_RO	0x00040000
+#define   LCTRL1_CRWE_WO	0x000c0000
+#define   LCTRL1_CRWF_RW	0x00000000
+#define   LCTRL1_CRWF_RO	0x00010000
+#define   LCTRL1_CRWF_WO	0x00030000
+#define SPRN_LCTRL2	157
+#define   LCTRL2_LW0EN		0x80000000
+#define   LCTRL2_LW0LA_E	0x00000000
+#define   LCTRL2_LW0LA_F	0x04000000
+#define   LCTRL2_LW0LA_EandF	0x08000000
+#define   LCTRL2_LW0LADC	0x02000000
+#define   LCTRL2_SLW0EN		0x00000002
+#ifdef CONFIG_PPC_8xx
+#define SPRN_ICTRL	158
+#endif
+#define SPRN_BAR	159
+
+/* Commands.  Only the first few are available to the instruction cache.
+*/
+#define	IDC_ENABLE	0x02000000	/* Cache enable */
+#define IDC_DISABLE	0x04000000	/* Cache disable */
+#define IDC_LDLCK	0x06000000	/* Load and lock */
+#define IDC_UNLINE	0x08000000	/* Unlock line */
+#define IDC_UNALL	0x0a000000	/* Unlock all */
+#define IDC_INVALL	0x0c000000	/* Invalidate all */
+
+#define DC_FLINE	0x0e000000	/* Flush data cache line */
+#define DC_SFWT		0x01000000	/* Set forced writethrough mode */
+#define DC_CFWT		0x03000000	/* Clear forced writethrough mode */
+#define DC_SLES		0x05000000	/* Set little endian swap mode */
+#define DC_CLES		0x07000000	/* Clear little endian swap mode */
+
+/* Status.
+*/
+#define IDC_ENABLED	0x80000000	/* Cache is enabled */
+#define IDC_CERR1	0x00200000	/* Cache error 1 */
+#define IDC_CERR2	0x00100000	/* Cache error 2 */
+#define IDC_CERR3	0x00080000	/* Cache error 3 */
+
+#define DC_DFWT		0x40000000	/* Data cache is forced write through */
+#define DC_LES		0x20000000	/* Caches are little endian mode */
+
+#endif /* _ASM_POWERPC_REG_8xx_H */
diff --git a/arch/powerpc/include/asm/reg_a2.h b/arch/powerpc/include/asm/reg_a2.h
new file mode 100644
index 0000000000..74fba29e94
--- /dev/null
+++ b/arch/powerpc/include/asm/reg_a2.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Register definitions specific to the A2 core
+ *
+ *  Copyright (C) 2008 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
+ */
+
+#ifndef __ASM_POWERPC_REG_A2_H__
+#define __ASM_POWERPC_REG_A2_H__
+
+#include <asm/asm-const.h>
+
+#define SPRN_TENSR	0x1b5
+#define SPRN_TENS	0x1b6	/* Thread ENable Set */
+#define SPRN_TENC	0x1b7	/* Thread ENable Clear */
+
+#define SPRN_A2_CCR0	0x3f0	/* Core Configuration Register 0 */
+#define SPRN_A2_CCR1	0x3f1	/* Core Configuration Register 1 */
+#define SPRN_A2_CCR2	0x3f2	/* Core Configuration Register 2 */
+#define SPRN_MMUCR0	0x3fc	/* MMU Control Register 0 */
+#define SPRN_MMUCR1	0x3fd	/* MMU Control Register 1 */
+#define SPRN_MMUCR2	0x3fe	/* MMU Control Register 2 */
+#define SPRN_MMUCR3	0x3ff	/* MMU Control Register 3 */
+
+#define SPRN_IAR	0x372
+
+#define SPRN_IUCR0	0x3f3
+#define IUCR0_ICBI_ACK	0x1000
+
+#define SPRN_XUCR0	0x3f6	/* Execution Unit Config Register 0 */
+
+#define A2_IERAT_SIZE	16
+#define A2_DERAT_SIZE	32
+
+/* A2 MMUCR0 bits */
+#define MMUCR0_ECL	0x80000000	/* Extended Class for TLB fills */
+#define MMUCR0_TID_NZ	0x40000000	/* TID is non-zero */
+#define MMUCR0_TS	0x10000000	/* Translation space for TLB fills */
+#define MMUCR0_TGS	0x20000000	/* Guest space for TLB fills */
+#define MMUCR0_TLBSEL	0x0c000000	/* TLB or ERAT target for TLB fills */
+#define MMUCR0_TLBSEL_U	0x00000000	/*  TLBSEL = UTLB */
+#define MMUCR0_TLBSEL_I	0x08000000	/*  TLBSEL = I-ERAT */
+#define MMUCR0_TLBSEL_D	0x0c000000	/*  TLBSEL = D-ERAT */
+#define MMUCR0_LOCKSRSH	0x02000000	/* Use TLB lock on tlbsx. */
+#define MMUCR0_TID_MASK	0x000000ff	/* TID field */
+
+/* A2 MMUCR1 bits */
+#define MMUCR1_IRRE		0x80000000	/* I-ERAT round robin enable */
+#define MMUCR1_DRRE		0x40000000	/* D-ERAT round robin enable */
+#define MMUCR1_REE		0x20000000	/* Reference Exception Enable*/
+#define MMUCR1_CEE		0x10000000	/* Change exception enable */
+#define MMUCR1_CSINV_ALL	0x00000000	/* Inval ERAT on all CS evts */
+#define MMUCR1_CSINV_NISYNC	0x04000000	/* Inval ERAT on all ex isync*/
+#define MMUCR1_CSINV_NEVER	0x0c000000	/* Don't inval ERAT on CS */
+#define MMUCR1_ICTID		0x00080000	/* IERAT class field as TID */
+#define MMUCR1_ITTID		0x00040000	/* IERAT thdid field as TID */
+#define MMUCR1_DCTID		0x00020000	/* DERAT class field as TID */
+#define MMUCR1_DTTID		0x00010000	/* DERAT thdid field as TID */
+#define MMUCR1_DCCD		0x00008000	/* DERAT class ignore */
+#define MMUCR1_TLBWE_BINV	0x00004000	/* back invalidate on tlbwe */
+
+/* A2 MMUCR2 bits */
+#define MMUCR2_PSSEL_SHIFT	4
+
+/* A2 MMUCR3 bits */
+#define MMUCR3_THID		0x0000000f	/* Thread ID */
+
+/* *** ERAT TLB bits definitions */
+#define TLB0_EPN_MASK		ASM_CONST(0xfffffffffffff000)
+#define TLB0_CLASS_MASK		ASM_CONST(0x0000000000000c00)
+#define TLB0_CLASS_00		ASM_CONST(0x0000000000000000)
+#define TLB0_CLASS_01		ASM_CONST(0x0000000000000400)
+#define TLB0_CLASS_10		ASM_CONST(0x0000000000000800)
+#define TLB0_CLASS_11		ASM_CONST(0x0000000000000c00)
+#define TLB0_V			ASM_CONST(0x0000000000000200)
+#define TLB0_X			ASM_CONST(0x0000000000000100)
+#define TLB0_SIZE_MASK		ASM_CONST(0x00000000000000f0)
+#define TLB0_SIZE_4K		ASM_CONST(0x0000000000000010)
+#define TLB0_SIZE_64K		ASM_CONST(0x0000000000000030)
+#define TLB0_SIZE_1M		ASM_CONST(0x0000000000000050)
+#define TLB0_SIZE_16M		ASM_CONST(0x0000000000000070)
+#define TLB0_SIZE_1G		ASM_CONST(0x00000000000000a0)
+#define TLB0_THDID_MASK		ASM_CONST(0x000000000000000f)
+#define TLB0_THDID_0		ASM_CONST(0x0000000000000001)
+#define TLB0_THDID_1		ASM_CONST(0x0000000000000002)
+#define TLB0_THDID_2		ASM_CONST(0x0000000000000004)
+#define TLB0_THDID_3		ASM_CONST(0x0000000000000008)
+#define TLB0_THDID_ALL		ASM_CONST(0x000000000000000f)
+
+#define TLB1_RESVATTR		ASM_CONST(0x00f0000000000000)
+#define TLB1_U0			ASM_CONST(0x0008000000000000)
+#define TLB1_U1			ASM_CONST(0x0004000000000000)
+#define TLB1_U2			ASM_CONST(0x0002000000000000)
+#define TLB1_U3			ASM_CONST(0x0001000000000000)
+#define TLB1_R			ASM_CONST(0x0000800000000000)
+#define TLB1_C			ASM_CONST(0x0000400000000000)
+#define TLB1_RPN_MASK		ASM_CONST(0x000003fffffff000)
+#define TLB1_W			ASM_CONST(0x0000000000000800)
+#define TLB1_I			ASM_CONST(0x0000000000000400)
+#define TLB1_M			ASM_CONST(0x0000000000000200)
+#define TLB1_G			ASM_CONST(0x0000000000000100)
+#define TLB1_E			ASM_CONST(0x0000000000000080)
+#define TLB1_VF			ASM_CONST(0x0000000000000040)
+#define TLB1_UX			ASM_CONST(0x0000000000000020)
+#define TLB1_SX			ASM_CONST(0x0000000000000010)
+#define TLB1_UW			ASM_CONST(0x0000000000000008)
+#define TLB1_SW			ASM_CONST(0x0000000000000004)
+#define TLB1_UR			ASM_CONST(0x0000000000000002)
+#define TLB1_SR			ASM_CONST(0x0000000000000001)
+
+/* A2 erativax attributes definitions */
+#define ERATIVAX_RS_IS_ALL		0x000
+#define ERATIVAX_RS_IS_TID		0x040
+#define ERATIVAX_RS_IS_CLASS		0x080
+#define ERATIVAX_RS_IS_FULLMATCH	0x0c0
+#define ERATIVAX_CLASS_00		0x000
+#define ERATIVAX_CLASS_01		0x010
+#define ERATIVAX_CLASS_10		0x020
+#define ERATIVAX_CLASS_11		0x030
+#define ERATIVAX_PSIZE_4K		(TLB_PSIZE_4K >> 1)
+#define ERATIVAX_PSIZE_64K		(TLB_PSIZE_64K >> 1)
+#define ERATIVAX_PSIZE_1M		(TLB_PSIZE_1M >> 1)
+#define ERATIVAX_PSIZE_16M		(TLB_PSIZE_16M >> 1)
+#define ERATIVAX_PSIZE_1G		(TLB_PSIZE_1G >> 1)
+
+/* A2 eratilx attributes definitions */
+#define ERATILX_T_ALL			0
+#define ERATILX_T_TID			1
+#define ERATILX_T_TGS			2
+#define ERATILX_T_FULLMATCH		3
+#define ERATILX_T_CLASS0		4
+#define ERATILX_T_CLASS1		5
+#define ERATILX_T_CLASS2		6
+#define ERATILX_T_CLASS3		7
+
+/* XUCR0 bits */
+#define XUCR0_TRACE_UM_T0		0x40000000	/* Thread 0 */
+#define XUCR0_TRACE_UM_T1		0x20000000	/* Thread 1 */
+#define XUCR0_TRACE_UM_T2		0x10000000	/* Thread 2 */
+#define XUCR0_TRACE_UM_T3		0x08000000	/* Thread 3 */
+
+/* A2 CCR0 register */
+#define A2_CCR0_PME_DISABLED		0x00000000
+#define A2_CCR0_PME_SLEEP		0x40000000
+#define A2_CCR0_PME_RVW			0x80000000
+#define A2_CCR0_PME_DISABLED2		0xc0000000
+
+/* A2 CCR2 register */
+#define A2_CCR2_ERAT_ONLY_MODE		0x00000001
+#define A2_CCR2_ENABLE_ICSWX		0x00000002
+#define A2_CCR2_ENABLE_PC		0x20000000
+#define A2_CCR2_ENABLE_TRACE		0x40000000
+
+#endif /* __ASM_POWERPC_REG_A2_H__ */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
new file mode 100644
index 0000000000..af56980b6c
--- /dev/null
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -0,0 +1,700 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Contains register definitions common to the Book E PowerPC
+ * specification.  Notice that while the IBM-40x series of CPUs
+ * are not true Book E PowerPCs, they borrowed a number of features
+ * before Book E was finalized, and are included here as well.  Unfortunately,
+ * they sometimes used different locations than true Book E CPUs did.
+ *
+ * Copyright 2009-2010 Freescale Semiconductor, Inc.
+ */
+#ifdef __KERNEL__
+#ifndef __ASM_POWERPC_REG_BOOKE_H__
+#define __ASM_POWERPC_REG_BOOKE_H__
+
+#include <asm/ppc-opcode.h>
+
+/* Machine State Register (MSR) Fields */
+#define MSR_GS_LG	28	/* Guest state */
+#define MSR_UCLE_LG	26	/* User-mode cache lock enable */
+#define MSR_SPE_LG	25	/* Enable SPE */
+#define MSR_DWE_LG	10	/* Debug Wait Enable */
+#define MSR_UBLE_LG	10	/* BTB lock enable (e500) */
+#define MSR_IS_LG	MSR_IR_LG /* Instruction Space */
+#define MSR_DS_LG	MSR_DR_LG /* Data Space */
+#define MSR_PMM_LG	2	/* Performance monitor mark bit */
+#define MSR_CM_LG	31	/* Computation Mode (0=32-bit, 1=64-bit) */
+
+#define MSR_GS		__MASK(MSR_GS_LG)
+#define MSR_UCLE	__MASK(MSR_UCLE_LG)
+#define MSR_SPE		__MASK(MSR_SPE_LG)
+#define MSR_DWE		__MASK(MSR_DWE_LG)
+#define MSR_UBLE	__MASK(MSR_UBLE_LG)
+#define MSR_IS		__MASK(MSR_IS_LG)
+#define MSR_DS		__MASK(MSR_DS_LG)
+#define MSR_PMM		__MASK(MSR_PMM_LG)
+#define MSR_CM		__MASK(MSR_CM_LG)
+
+#if defined(CONFIG_PPC_BOOK3E_64)
+#define MSR_64BIT	MSR_CM
+
+#define MSR_		(MSR_ME | MSR_RI | MSR_CE)
+#define MSR_KERNEL	(MSR_ | MSR_64BIT)
+#define MSR_USER32	(MSR_ | MSR_PR | MSR_EE)
+#define MSR_USER64	(MSR_USER32 | MSR_64BIT)
+#elif defined (CONFIG_40x)
+#define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_IR|MSR_DR|MSR_CE)
+#define MSR_USER	(MSR_KERNEL|MSR_PR|MSR_EE)
+#else
+#define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_CE)
+#define MSR_USER	(MSR_KERNEL|MSR_PR|MSR_EE)
+#endif
+
+/* Special Purpose Registers (SPRNs)*/
+#define SPRN_DECAR	0x036	/* Decrementer Auto Reload Register */
+#define SPRN_IVPR	0x03F	/* Interrupt Vector Prefix Register */
+#define SPRN_USPRG0	0x100	/* User Special Purpose Register General 0 */
+#define SPRN_SPRG3R	0x103	/* Special Purpose Register General 3 Read */
+#define SPRN_SPRG4R	0x104	/* Special Purpose Register General 4 Read */
+#define SPRN_SPRG5R	0x105	/* Special Purpose Register General 5 Read */
+#define SPRN_SPRG6R	0x106	/* Special Purpose Register General 6 Read */
+#define SPRN_SPRG7R	0x107	/* Special Purpose Register General 7 Read */
+#define SPRN_SPRG4W	0x114	/* Special Purpose Register General 4 Write */
+#define SPRN_SPRG5W	0x115	/* Special Purpose Register General 5 Write */
+#define SPRN_SPRG6W	0x116	/* Special Purpose Register General 6 Write */
+#define SPRN_SPRG7W	0x117	/* Special Purpose Register General 7 Write */
+#define SPRN_EPCR	0x133	/* Embedded Processor Control Register */
+#define SPRN_DBCR2	0x136	/* Debug Control Register 2 */
+#define SPRN_DBCR4	0x233	/* Debug Control Register 4 */
+#define SPRN_MSRP	0x137	/* MSR Protect Register */
+#define SPRN_IAC3	0x13A	/* Instruction Address Compare 3 */
+#define SPRN_IAC4	0x13B	/* Instruction Address Compare 4 */
+#define SPRN_DVC1	0x13E	/* Data Value Compare Register 1 */
+#define SPRN_DVC2	0x13F	/* Data Value Compare Register 2 */
+#define SPRN_LPID	0x152	/* Logical Partition ID */
+#define SPRN_MAS8	0x155	/* MMU Assist Register 8 */
+#define SPRN_TLB0PS	0x158	/* TLB 0 Page Size Register */
+#define SPRN_TLB1PS	0x159	/* TLB 1 Page Size Register */
+#define SPRN_MAS5_MAS6	0x15c	/* MMU Assist Register 5 || 6 */
+#define SPRN_MAS8_MAS1	0x15d	/* MMU Assist Register 8 || 1 */
+#define SPRN_EPTCFG	0x15e	/* Embedded Page Table Config */
+#define SPRN_GSPRG0	0x170	/* Guest SPRG0 */
+#define SPRN_GSPRG1	0x171	/* Guest SPRG1 */
+#define SPRN_GSPRG2	0x172	/* Guest SPRG2 */
+#define SPRN_GSPRG3	0x173	/* Guest SPRG3 */
+#define SPRN_MAS7_MAS3	0x174	/* MMU Assist Register 7 || 3 */
+#define SPRN_MAS0_MAS1	0x175	/* MMU Assist Register 0 || 1 */
+#define SPRN_GSRR0	0x17A	/* Guest SRR0 */
+#define SPRN_GSRR1	0x17B	/* Guest SRR1 */
+#define SPRN_GEPR	0x17C	/* Guest EPR */
+#define SPRN_GDEAR	0x17D	/* Guest DEAR */
+#define SPRN_GPIR	0x17E	/* Guest PIR */
+#define SPRN_GESR	0x17F	/* Guest Exception Syndrome Register */
+#define SPRN_IVOR0	0x190	/* Interrupt Vector Offset Register 0 */
+#define SPRN_IVOR1	0x191	/* Interrupt Vector Offset Register 1 */
+#define SPRN_IVOR2	0x192	/* Interrupt Vector Offset Register 2 */
+#define SPRN_IVOR3	0x193	/* Interrupt Vector Offset Register 3 */
+#define SPRN_IVOR4	0x194	/* Interrupt Vector Offset Register 4 */
+#define SPRN_IVOR5	0x195	/* Interrupt Vector Offset Register 5 */
+#define SPRN_IVOR6	0x196	/* Interrupt Vector Offset Register 6 */
+#define SPRN_IVOR7	0x197	/* Interrupt Vector Offset Register 7 */
+#define SPRN_IVOR8	0x198	/* Interrupt Vector Offset Register 8 */
+#define SPRN_IVOR9	0x199	/* Interrupt Vector Offset Register 9 */
+#define SPRN_IVOR10	0x19A	/* Interrupt Vector Offset Register 10 */
+#define SPRN_IVOR11	0x19B	/* Interrupt Vector Offset Register 11 */
+#define SPRN_IVOR12	0x19C	/* Interrupt Vector Offset Register 12 */
+#define SPRN_IVOR13	0x19D	/* Interrupt Vector Offset Register 13 */
+#define SPRN_IVOR14	0x19E	/* Interrupt Vector Offset Register 14 */
+#define SPRN_IVOR15	0x19F	/* Interrupt Vector Offset Register 15 */
+#define SPRN_IVOR38	0x1B0	/* Interrupt Vector Offset Register 38 */
+#define SPRN_IVOR39	0x1B1	/* Interrupt Vector Offset Register 39 */
+#define SPRN_IVOR40	0x1B2	/* Interrupt Vector Offset Register 40 */
+#define SPRN_IVOR41	0x1B3	/* Interrupt Vector Offset Register 41 */
+#define SPRN_IVOR42	0x1B4	/* Interrupt Vector Offset Register 42 */
+#define SPRN_GIVOR2	0x1B8	/* Guest IVOR2 */
+#define SPRN_GIVOR3	0x1B9	/* Guest IVOR3 */
+#define SPRN_GIVOR4	0x1BA	/* Guest IVOR4 */
+#define SPRN_GIVOR8	0x1BB	/* Guest IVOR8 */
+#define SPRN_GIVOR13	0x1BC	/* Guest IVOR13 */
+#define SPRN_GIVOR14	0x1BD	/* Guest IVOR14 */
+#define SPRN_GIVPR	0x1BF	/* Guest IVPR */
+#define SPRN_SPEFSCR	0x200	/* SPE & Embedded FP Status & Control */
+#define SPRN_BBEAR	0x201	/* Branch Buffer Entry Address Register */
+#define SPRN_BBTAR	0x202	/* Branch Buffer Target Address Register */
+#define SPRN_L1CFG0	0x203	/* L1 Cache Configure Register 0 */
+#define SPRN_L1CFG1	0x204	/* L1 Cache Configure Register 1 */
+#define SPRN_ATB	0x20E	/* Alternate Time Base */
+#define SPRN_ATBL	0x20E	/* Alternate Time Base Lower */
+#define SPRN_ATBU	0x20F	/* Alternate Time Base Upper */
+#define SPRN_IVOR32	0x210	/* Interrupt Vector Offset Register 32 */
+#define SPRN_IVOR33	0x211	/* Interrupt Vector Offset Register 33 */
+#define SPRN_IVOR34	0x212	/* Interrupt Vector Offset Register 34 */
+#define SPRN_IVOR35	0x213	/* Interrupt Vector Offset Register 35 */
+#define SPRN_IVOR36	0x214	/* Interrupt Vector Offset Register 36 */
+#define SPRN_IVOR37	0x215	/* Interrupt Vector Offset Register 37 */
+#define SPRN_MCARU	0x239	/* Machine Check Address Register Upper */
+#define SPRN_MCSRR0	0x23A	/* Machine Check Save and Restore Register 0 */
+#define SPRN_MCSRR1	0x23B	/* Machine Check Save and Restore Register 1 */
+#define SPRN_MCSR	0x23C	/* Machine Check Status Register */
+#define SPRN_MCAR	0x23D	/* Machine Check Address Register */
+#define SPRN_DSRR0	0x23E	/* Debug Save and Restore Register 0 */
+#define SPRN_DSRR1	0x23F	/* Debug Save and Restore Register 1 */
+#define SPRN_SPRG8	0x25C	/* Special Purpose Register General 8 */
+#define SPRN_SPRG9	0x25D	/* Special Purpose Register General 9 */
+#define SPRN_L1CSR2	0x25E	/* L1 Cache Control and Status Register 2 */
+#define SPRN_MAS0	0x270	/* MMU Assist Register 0 */
+#define SPRN_MAS1	0x271	/* MMU Assist Register 1 */
+#define SPRN_MAS2	0x272	/* MMU Assist Register 2 */
+#define SPRN_MAS3	0x273	/* MMU Assist Register 3 */
+#define SPRN_MAS4	0x274	/* MMU Assist Register 4 */
+#define SPRN_MAS5	0x153	/* MMU Assist Register 5 */
+#define SPRN_MAS6	0x276	/* MMU Assist Register 6 */
+#define SPRN_PID1	0x279	/* Process ID Register 1 */
+#define SPRN_PID2	0x27A	/* Process ID Register 2 */
+#define SPRN_TLB0CFG	0x2B0	/* TLB 0 Config Register */
+#define SPRN_TLB1CFG	0x2B1	/* TLB 1 Config Register */
+#define SPRN_TLB2CFG	0x2B2	/* TLB 2 Config Register */
+#define SPRN_TLB3CFG	0x2B3	/* TLB 3 Config Register */
+#define SPRN_EPR	0x2BE	/* External Proxy Register */
+#define SPRN_CCR1	0x378	/* Core Configuration Register 1 */
+#define SPRN_ZPR	0x3B0	/* Zone Protection Register (40x) */
+#define SPRN_MAS7	0x3B0	/* MMU Assist Register 7 */
+#define SPRN_MMUCR	0x3B2	/* MMU Control Register */
+#define SPRN_CCR0	0x3B3	/* Core Configuration Register 0 */
+#define SPRN_EPLC	0x3B3	/* External Process ID Load Context */
+#define SPRN_EPSC	0x3B4	/* External Process ID Store Context */
+#define SPRN_SGR	0x3B9	/* Storage Guarded Register */
+#define SPRN_DCWR	0x3BA	/* Data Cache Write-thru Register */
+#define SPRN_SLER	0x3BB	/* Little-endian real mode */
+#define SPRN_SU0R	0x3BC	/* "User 0" real mode (40x) */
+#define SPRN_DCMP	0x3D1	/* Data TLB Compare Register */
+#define SPRN_ICDBDR	0x3D3	/* Instruction Cache Debug Data Register */
+#define SPRN_EVPR	0x3D6	/* Exception Vector Prefix Register */
+#define SPRN_L1CSR0	0x3F2	/* L1 Cache Control and Status Register 0 */
+#define SPRN_L1CSR1	0x3F3	/* L1 Cache Control and Status Register 1 */
+#define SPRN_MMUCSR0	0x3F4	/* MMU Control and Status Register 0 */
+#define SPRN_MMUCFG	0x3F7	/* MMU Configuration Register */
+#define SPRN_BUCSR	0x3F5	/* Branch Unit Control and Status */
+#define SPRN_L2CSR0	0x3F9	/* L2 Data Cache Control and Status Register 0 */
+#define SPRN_L2CSR1	0x3FA	/* L2 Data Cache Control and Status Register 1 */
+#define SPRN_DCCR	0x3FA	/* Data Cache Cacheability Register */
+#define SPRN_ICCR	0x3FB	/* Instruction Cache Cacheability Register */
+#define SPRN_PWRMGTCR0	0x3FB	/* Power management control register 0 */
+#define SPRN_SVR	0x3FF	/* System Version Register */
+
+/*
+ * SPRs which have conflicting definitions on true Book E versus classic,
+ * or IBM 40x.
+ */
+#ifdef CONFIG_BOOKE
+#define SPRN_CSRR0	0x03A	/* Critical Save and Restore Register 0 */
+#define SPRN_CSRR1	0x03B	/* Critical Save and Restore Register 1 */
+#define SPRN_DEAR	0x03D	/* Data Error Address Register */
+#define SPRN_ESR	0x03E	/* Exception Syndrome Register */
+#define SPRN_PIR	0x11E	/* Processor Identification Register */
+#define SPRN_DBSR	0x130	/* Debug Status Register */
+#define SPRN_DBCR0	0x134	/* Debug Control Register 0 */
+#define SPRN_DBCR1	0x135	/* Debug Control Register 1 */
+#define SPRN_IAC1	0x138	/* Instruction Address Compare 1 */
+#define SPRN_IAC2	0x139	/* Instruction Address Compare 2 */
+#define SPRN_DAC1	0x13C	/* Data Address Compare 1 */
+#define SPRN_DAC2	0x13D	/* Data Address Compare 2 */
+#define SPRN_TSR	0x150	/* Timer Status Register */
+#define SPRN_TCR	0x154	/* Timer Control Register */
+#endif /* Book E */
+#ifdef CONFIG_40x
+#define SPRN_DBCR1	0x3BD	/* Debug Control Register 1 */		
+#define SPRN_ESR	0x3D4	/* Exception Syndrome Register */
+#define SPRN_DEAR	0x3D5	/* Data Error Address Register */
+#define SPRN_TSR	0x3D8	/* Timer Status Register */
+#define SPRN_TCR	0x3DA	/* Timer Control Register */
+#define SPRN_SRR2	0x3DE	/* Save/Restore Register 2 */
+#define SPRN_SRR3	0x3DF	/* Save/Restore Register 3 */
+#define SPRN_DBSR	0x3F0	/* Debug Status Register */		
+#define SPRN_DBCR0	0x3F2	/* Debug Control Register 0 */
+#define SPRN_DAC1	0x3F6	/* Data Address Compare 1 */
+#define SPRN_DAC2	0x3F7	/* Data Address Compare 2 */
+#define SPRN_CSRR0	SPRN_SRR2 /* Critical Save and Restore Register 0 */
+#define SPRN_CSRR1	SPRN_SRR3 /* Critical Save and Restore Register 1 */
+#endif
+#define SPRN_HACOP	0x15F	/* Hypervisor Available Coprocessor Register */
+
+/* Bit definitions for CCR1. */
+#define	CCR1_DPC	0x00000100 /* Disable L1 I-Cache/D-Cache parity checking */
+#define	CCR1_TCS	0x00000080 /* Timer Clock Select */
+
+/* Bit definitions for PWRMGTCR0. */
+#define PWRMGTCR0_PW20_WAIT		(1 << 14) /* PW20 state enable bit */
+#define PWRMGTCR0_PW20_ENT_SHIFT	8
+#define PWRMGTCR0_PW20_ENT		0x3F00
+#define PWRMGTCR0_AV_IDLE_PD_EN		(1 << 22) /* Altivec idle enable */
+#define PWRMGTCR0_AV_IDLE_CNT_SHIFT	16
+#define PWRMGTCR0_AV_IDLE_CNT		0x3F0000
+
+/* Bit definitions for the MCSR. */
+#define MCSR_MCS	0x80000000 /* Machine Check Summary */
+#define MCSR_IB		0x40000000 /* Instruction PLB Error */
+#define MCSR_DRB	0x20000000 /* Data Read PLB Error */
+#define MCSR_DWB	0x10000000 /* Data Write PLB Error */
+#define MCSR_TLBP	0x08000000 /* TLB Parity Error */
+#define MCSR_ICP	0x04000000 /* I-Cache Parity Error */
+#define MCSR_DCSP	0x02000000 /* D-Cache Search Parity Error */
+#define MCSR_DCFP	0x01000000 /* D-Cache Flush Parity Error */
+#define MCSR_IMPE	0x00800000 /* Imprecise Machine Check Exception */
+
+#define PPC47x_MCSR_GPR	0x01000000 /* GPR parity error */
+#define PPC47x_MCSR_FPR	0x00800000 /* FPR parity error */
+#define PPC47x_MCSR_IPR	0x00400000 /* Imprecise Machine Check Exception */
+
+#ifdef CONFIG_PPC_E500
+/* All e500 */
+#define MCSR_MCP 	0x80000000UL /* Machine Check Input Pin */
+#define MCSR_ICPERR 	0x40000000UL /* I-Cache Parity Error */
+
+/* e500v1/v2 */
+#define MCSR_DCP_PERR 	0x20000000UL /* D-Cache Push Parity Error */
+#define MCSR_DCPERR 	0x10000000UL /* D-Cache Parity Error */
+#define MCSR_BUS_IAERR 	0x00000080UL /* Instruction Address Error */
+#define MCSR_BUS_RAERR 	0x00000040UL /* Read Address Error */
+#define MCSR_BUS_WAERR 	0x00000020UL /* Write Address Error */
+#define MCSR_BUS_IBERR 	0x00000010UL /* Instruction Data Error */
+#define MCSR_BUS_RBERR 	0x00000008UL /* Read Data Bus Error */
+#define MCSR_BUS_WBERR 	0x00000004UL /* Write Data Bus Error */
+#define MCSR_BUS_IPERR 	0x00000002UL /* Instruction parity Error */
+#define MCSR_BUS_RPERR 	0x00000001UL /* Read parity Error */
+
+/* e500mc */
+#define MCSR_DCPERR_MC	0x20000000UL /* D-Cache Parity Error */
+#define MCSR_L2MMU_MHIT	0x08000000UL /* Hit on multiple TLB entries */
+#define MCSR_NMI	0x00100000UL /* Non-Maskable Interrupt */
+#define MCSR_MAV	0x00080000UL /* MCAR address valid */
+#define MCSR_MEA	0x00040000UL /* MCAR is effective address */
+#define MCSR_IF		0x00010000UL /* Instruction Fetch */
+#define MCSR_LD		0x00008000UL /* Load */
+#define MCSR_ST		0x00004000UL /* Store */
+#define MCSR_LDG	0x00002000UL /* Guarded Load */
+#define MCSR_TLBSYNC	0x00000002UL /* Multiple tlbsyncs detected */
+#define MCSR_BSL2_ERR	0x00000001UL /* Backside L2 cache error */
+
+#define MSRP_UCLEP	0x04000000 /* Protect MSR[UCLE] */
+#define MSRP_DEP	0x00000200 /* Protect MSR[DE] */
+#define MSRP_PMMP	0x00000004 /* Protect MSR[PMM] */
+#endif
+
+/* Bit definitions for the HID1 */
+#ifdef CONFIG_PPC_E500
+/* e500v1/v2 */
+#define HID1_PLL_CFG_MASK 0xfc000000	/* PLL_CFG input pins */
+#define HID1_RFXE	0x00020000	/* Read fault exception enable */
+#define HID1_R1DPE	0x00008000	/* R1 data bus parity enable */
+#define HID1_R2DPE	0x00004000	/* R2 data bus parity enable */
+#define HID1_ASTME	0x00002000	/* Address bus streaming mode enable */
+#define HID1_ABE	0x00001000	/* Address broadcast enable */
+#define HID1_MPXTT	0x00000400	/* MPX re-map transfer type */
+#define HID1_ATS	0x00000080	/* Atomic status */
+#define HID1_MID_MASK	0x0000000f	/* MID input pins */
+#endif
+
+/* Bit definitions for the DBSR. */
+/*
+ * DBSR bits which have conflicting definitions on true Book E versus IBM 40x.
+ */
+#ifdef CONFIG_BOOKE
+#define DBSR_IDE	0x80000000	/* Imprecise Debug Event */
+#define DBSR_MRR	0x30000000	/* Most Recent Reset */
+#define DBSR_IC		0x08000000	/* Instruction Completion */
+#define DBSR_BT		0x04000000	/* Branch Taken */
+#define DBSR_IRPT	0x02000000	/* Exception Debug Event */
+#define DBSR_TIE	0x01000000	/* Trap Instruction Event */
+#define DBSR_IAC1	0x00800000	/* Instr Address Compare 1 Event */
+#define DBSR_IAC2	0x00400000	/* Instr Address Compare 2 Event */
+#define DBSR_IAC3	0x00200000	/* Instr Address Compare 3 Event */
+#define DBSR_IAC4	0x00100000	/* Instr Address Compare 4 Event */
+#define DBSR_DAC1R	0x00080000	/* Data Addr Compare 1 Read Event */
+#define DBSR_DAC1W	0x00040000	/* Data Addr Compare 1 Write Event */
+#define DBSR_DAC2R	0x00020000	/* Data Addr Compare 2 Read Event */
+#define DBSR_DAC2W	0x00010000	/* Data Addr Compare 2 Write Event */
+#define DBSR_RET	0x00008000	/* Return Debug Event */
+#define DBSR_CIRPT	0x00000040	/* Critical Interrupt Taken Event */
+#define DBSR_CRET	0x00000020	/* Critical Return Debug Event */
+#define DBSR_IAC12ATS	0x00000002	/* Instr Address Compare 1/2 Toggle */
+#define DBSR_IAC34ATS	0x00000001	/* Instr Address Compare 3/4 Toggle */
+#endif
+#ifdef CONFIG_40x
+#define DBSR_IC		0x80000000	/* Instruction Completion */
+#define DBSR_BT		0x40000000	/* Branch taken */
+#define DBSR_IRPT	0x20000000	/* Exception Debug Event */
+#define DBSR_TIE	0x10000000	/* Trap Instruction debug Event */
+#define DBSR_IAC1	0x04000000	/* Instruction Address Compare 1 Event */
+#define DBSR_IAC2	0x02000000	/* Instruction Address Compare 2 Event */
+#define DBSR_IAC3	0x00080000	/* Instruction Address Compare 3 Event */
+#define DBSR_IAC4	0x00040000	/* Instruction Address Compare 4 Event */
+#define DBSR_DAC1R	0x01000000	/* Data Address Compare 1 Read Event */
+#define DBSR_DAC1W	0x00800000	/* Data Address Compare 1 Write Event */
+#define DBSR_DAC2R	0x00400000	/* Data Address Compare 2 Read Event */
+#define DBSR_DAC2W	0x00200000	/* Data Address Compare 2 Write Event */
+#endif
+
+/* Bit definitions related to the ESR. */
+#define ESR_MCI		0x80000000	/* Machine Check - Instruction */
+#define ESR_IMCP	0x80000000	/* Instr. Machine Check - Protection */
+#define ESR_IMCN	0x40000000	/* Instr. Machine Check - Non-config */
+#define ESR_IMCB	0x20000000	/* Instr. Machine Check - Bus error */
+#define ESR_IMCT	0x10000000	/* Instr. Machine Check - Timeout */
+#define ESR_PIL		0x08000000	/* Program Exception - Illegal */
+#define ESR_PPR		0x04000000	/* Program Exception - Privileged */
+#define ESR_PTR		0x02000000	/* Program Exception - Trap */
+#define ESR_FP		0x01000000	/* Floating Point Operation */
+#define ESR_DST		0x00800000	/* Storage Exception - Data miss */
+#define ESR_DIZ		0x00400000	/* Storage Exception - Zone fault */
+#define ESR_ST		0x00800000	/* Store Operation */
+#define ESR_DLK		0x00200000	/* Data Cache Locking */
+#define ESR_ILK		0x00100000	/* Instr. Cache Locking */
+#define ESR_PUO		0x00040000	/* Unimplemented Operation exception */
+#define ESR_BO		0x00020000	/* Byte Ordering */
+#define ESR_SPV		0x00000080	/* Signal Processing operation */
+
+/* Bit definitions related to the DBCR0. */
+#if defined(CONFIG_40x)
+#define DBCR0_EDM	0x80000000	/* External Debug Mode */
+#define DBCR0_IDM	0x40000000	/* Internal Debug Mode */
+#define DBCR0_RST	0x30000000	/* all the bits in the RST field */
+#define DBCR0_RST_SYSTEM 0x30000000	/* System Reset */
+#define DBCR0_RST_CHIP	0x20000000	/* Chip Reset */
+#define DBCR0_RST_CORE	0x10000000	/* Core Reset */
+#define DBCR0_RST_NONE	0x00000000	/* No Reset */
+#define DBCR0_IC	0x08000000	/* Instruction Completion */
+#define DBCR0_ICMP	DBCR0_IC
+#define DBCR0_BT	0x04000000	/* Branch Taken */
+#define DBCR0_BRT	DBCR0_BT
+#define DBCR0_EDE	0x02000000	/* Exception Debug Event */
+#define DBCR0_IRPT	DBCR0_EDE
+#define DBCR0_TDE	0x01000000	/* TRAP Debug Event */
+#define DBCR0_IA1	0x00800000	/* Instr Addr compare 1 enable */
+#define DBCR0_IAC1	DBCR0_IA1
+#define DBCR0_IA2	0x00400000	/* Instr Addr compare 2 enable */
+#define DBCR0_IAC2	DBCR0_IA2
+#define DBCR0_IA12	0x00200000	/* Instr Addr 1-2 range enable */
+#define DBCR0_IA12X	0x00100000	/* Instr Addr 1-2 range eXclusive */
+#define DBCR0_IA3	0x00080000	/* Instr Addr compare 3 enable */
+#define DBCR0_IAC3	DBCR0_IA3
+#define DBCR0_IA4	0x00040000	/* Instr Addr compare 4 enable */
+#define DBCR0_IAC4	DBCR0_IA4
+#define DBCR0_IA34	0x00020000	/* Instr Addr 3-4 range Enable */
+#define DBCR0_IA34X	0x00010000	/* Instr Addr 3-4 range eXclusive */
+#define DBCR0_IA12T	0x00008000	/* Instr Addr 1-2 range Toggle */
+#define DBCR0_IA34T	0x00004000	/* Instr Addr 3-4 range Toggle */
+#define DBCR0_FT	0x00000001	/* Freeze Timers on debug event */
+
+#define dbcr_iac_range(task)	((task)->thread.debug.dbcr0)
+#define DBCR_IAC12I	DBCR0_IA12			/* Range Inclusive */
+#define DBCR_IAC12X	(DBCR0_IA12 | DBCR0_IA12X)	/* Range Exclusive */
+#define DBCR_IAC12MODE	(DBCR0_IA12 | DBCR0_IA12X)	/* IAC 1-2 Mode Bits */
+#define DBCR_IAC34I	DBCR0_IA34			/* Range Inclusive */
+#define DBCR_IAC34X	(DBCR0_IA34 | DBCR0_IA34X)	/* Range Exclusive */
+#define DBCR_IAC34MODE	(DBCR0_IA34 | DBCR0_IA34X)	/* IAC 3-4 Mode Bits */
+
+/* Bit definitions related to the DBCR1. */
+#define DBCR1_DAC1R	0x80000000	/* DAC1 Read Debug Event */
+#define DBCR1_DAC2R	0x40000000	/* DAC2 Read Debug Event */
+#define DBCR1_DAC1W	0x20000000	/* DAC1 Write Debug Event */
+#define DBCR1_DAC2W	0x10000000	/* DAC2 Write Debug Event */
+
+#define dbcr_dac(task)	((task)->thread.debug.dbcr1)
+#define DBCR_DAC1R	DBCR1_DAC1R
+#define DBCR_DAC1W	DBCR1_DAC1W
+#define DBCR_DAC2R	DBCR1_DAC2R
+#define DBCR_DAC2W	DBCR1_DAC2W
+
+/*
+ * Are there any active Debug Events represented in the
+ * Debug Control Registers?
+ */
+#define DBCR0_ACTIVE_EVENTS	(DBCR0_ICMP | DBCR0_IAC1 | DBCR0_IAC2 | \
+				 DBCR0_IAC3 | DBCR0_IAC4)
+#define DBCR1_ACTIVE_EVENTS	(DBCR1_DAC1R | DBCR1_DAC2R | \
+				 DBCR1_DAC1W | DBCR1_DAC2W)
+#define DBCR_ACTIVE_EVENTS(dbcr0, dbcr1)  (((dbcr0) & DBCR0_ACTIVE_EVENTS) || \
+					   ((dbcr1) & DBCR1_ACTIVE_EVENTS))
+
+#elif defined(CONFIG_BOOKE)
+#define DBCR0_EDM	0x80000000	/* External Debug Mode */
+#define DBCR0_IDM	0x40000000	/* Internal Debug Mode */
+#define DBCR0_RST	0x30000000	/* all the bits in the RST field */
+/* DBCR0_RST_* is 44x specific and not followed in fsl booke */
+#define DBCR0_RST_SYSTEM 0x30000000	/* System Reset */
+#define DBCR0_RST_CHIP	0x20000000	/* Chip Reset */
+#define DBCR0_RST_CORE	0x10000000	/* Core Reset */
+#define DBCR0_RST_NONE	0x00000000	/* No Reset */
+#define DBCR0_ICMP	0x08000000	/* Instruction Completion */
+#define DBCR0_IC	DBCR0_ICMP
+#define DBCR0_BRT	0x04000000	/* Branch Taken */
+#define DBCR0_BT	DBCR0_BRT
+#define DBCR0_IRPT	0x02000000	/* Exception Debug Event */
+#define DBCR0_TDE	0x01000000	/* TRAP Debug Event */
+#define DBCR0_TIE	DBCR0_TDE
+#define DBCR0_IAC1	0x00800000	/* Instr Addr compare 1 enable */
+#define DBCR0_IAC2	0x00400000	/* Instr Addr compare 2 enable */
+#define DBCR0_IAC3	0x00200000	/* Instr Addr compare 3 enable */
+#define DBCR0_IAC4	0x00100000	/* Instr Addr compare 4 enable */
+#define DBCR0_DAC1R	0x00080000	/* DAC 1 Read enable */
+#define DBCR0_DAC1W	0x00040000	/* DAC 1 Write enable */
+#define DBCR0_DAC2R	0x00020000	/* DAC 2 Read enable */
+#define DBCR0_DAC2W	0x00010000	/* DAC 2 Write enable */
+#define DBCR0_RET	0x00008000	/* Return Debug Event */
+#define DBCR0_CIRPT	0x00000040	/* Critical Interrupt Taken Event */
+#define DBCR0_CRET	0x00000020	/* Critical Return Debug Event */
+#define DBCR0_FT	0x00000001	/* Freeze Timers on debug event */
+
+#define dbcr_dac(task)	((task)->thread.debug.dbcr0)
+#define DBCR_DAC1R	DBCR0_DAC1R
+#define DBCR_DAC1W	DBCR0_DAC1W
+#define DBCR_DAC2R	DBCR0_DAC2R
+#define DBCR_DAC2W	DBCR0_DAC2W
+
+/* Bit definitions related to the DBCR1. */
+#define DBCR1_IAC1US	0xC0000000	/* Instr Addr Cmp 1 Sup/User   */
+#define DBCR1_IAC1ER	0x30000000	/* Instr Addr Cmp 1 Eff/Real */
+#define DBCR1_IAC1ER_01	0x10000000	/* reserved */
+#define DBCR1_IAC1ER_10	0x20000000	/* Instr Addr Cmp 1 Eff/Real MSR[IS]=0 */
+#define DBCR1_IAC1ER_11	0x30000000	/* Instr Addr Cmp 1 Eff/Real MSR[IS]=1 */
+#define DBCR1_IAC2US	0x0C000000	/* Instr Addr Cmp 2 Sup/User   */
+#define DBCR1_IAC2ER	0x03000000	/* Instr Addr Cmp 2 Eff/Real */
+#define DBCR1_IAC2ER_01	0x01000000	/* reserved */
+#define DBCR1_IAC2ER_10	0x02000000	/* Instr Addr Cmp 2 Eff/Real MSR[IS]=0 */
+#define DBCR1_IAC2ER_11	0x03000000	/* Instr Addr Cmp 2 Eff/Real MSR[IS]=1 */
+#define DBCR1_IAC12M	0x00800000	/* Instr Addr 1-2 range enable */
+#define DBCR1_IAC12MX	0x00C00000	/* Instr Addr 1-2 range eXclusive */
+#define DBCR1_IAC12AT	0x00010000	/* Instr Addr 1-2 range Toggle */
+#define DBCR1_IAC3US	0x0000C000	/* Instr Addr Cmp 3 Sup/User   */
+#define DBCR1_IAC3ER	0x00003000	/* Instr Addr Cmp 3 Eff/Real */
+#define DBCR1_IAC3ER_01	0x00001000	/* reserved */
+#define DBCR1_IAC3ER_10	0x00002000	/* Instr Addr Cmp 3 Eff/Real MSR[IS]=0 */
+#define DBCR1_IAC3ER_11	0x00003000	/* Instr Addr Cmp 3 Eff/Real MSR[IS]=1 */
+#define DBCR1_IAC4US	0x00000C00	/* Instr Addr Cmp 4 Sup/User   */
+#define DBCR1_IAC4ER	0x00000300	/* Instr Addr Cmp 4 Eff/Real */
+#define DBCR1_IAC4ER_01	0x00000100	/* Instr Addr Cmp 4 Eff/Real MSR[IS]=0 */
+#define DBCR1_IAC4ER_10	0x00000200	/* Instr Addr Cmp 4 Eff/Real MSR[IS]=0 */
+#define DBCR1_IAC4ER_11	0x00000300	/* Instr Addr Cmp 4 Eff/Real MSR[IS]=1 */
+#define DBCR1_IAC34M	0x00000080	/* Instr Addr 3-4 range enable */
+#define DBCR1_IAC34MX	0x000000C0	/* Instr Addr 3-4 range eXclusive */
+#define DBCR1_IAC34AT	0x00000001	/* Instr Addr 3-4 range Toggle */
+
+#define dbcr_iac_range(task)	((task)->thread.debug.dbcr1)
+#define DBCR_IAC12I	DBCR1_IAC12M	/* Range Inclusive */
+#define DBCR_IAC12X	DBCR1_IAC12MX	/* Range Exclusive */
+#define DBCR_IAC12MODE	DBCR1_IAC12MX	/* IAC 1-2 Mode Bits */
+#define DBCR_IAC34I	DBCR1_IAC34M	/* Range Inclusive */
+#define DBCR_IAC34X	DBCR1_IAC34MX	/* Range Exclusive */
+#define DBCR_IAC34MODE	DBCR1_IAC34MX	/* IAC 3-4 Mode Bits */
+
+/* Bit definitions related to the DBCR2. */
+#define DBCR2_DAC1US	0xC0000000	/* Data Addr Cmp 1 Sup/User   */
+#define DBCR2_DAC1ER	0x30000000	/* Data Addr Cmp 1 Eff/Real */
+#define DBCR2_DAC2US	0x0C000000	/* Data Addr Cmp 2 Sup/User   */
+#define DBCR2_DAC2ER	0x03000000	/* Data Addr Cmp 2 Eff/Real */
+#define DBCR2_DAC12M	0x00800000	/* DAC 1-2 range enable */
+#define DBCR2_DAC12MM	0x00400000	/* DAC 1-2 Mask mode*/
+#define DBCR2_DAC12MX	0x00C00000	/* DAC 1-2 range eXclusive */
+#define DBCR2_DAC12MODE	0x00C00000	/* DAC 1-2 Mode Bits */
+#define DBCR2_DAC12A	0x00200000	/* DAC 1-2 Asynchronous */
+#define DBCR2_DVC1M	0x000C0000	/* Data Value Comp 1 Mode */
+#define DBCR2_DVC1M_SHIFT	18	/* # of bits to shift DBCR2_DVC1M */
+#define DBCR2_DVC2M	0x00030000	/* Data Value Comp 2 Mode */
+#define DBCR2_DVC2M_SHIFT	16	/* # of bits to shift DBCR2_DVC2M */
+#define DBCR2_DVC1BE	0x00000F00	/* Data Value Comp 1 Byte */
+#define DBCR2_DVC1BE_SHIFT	8	/* # of bits to shift DBCR2_DVC1BE */
+#define DBCR2_DVC2BE	0x0000000F	/* Data Value Comp 2 Byte */
+#define DBCR2_DVC2BE_SHIFT	0	/* # of bits to shift DBCR2_DVC2BE */
+
+/*
+ * Are there any active Debug Events represented in the
+ * Debug Control Registers?
+ */
+#define DBCR0_ACTIVE_EVENTS  (DBCR0_ICMP | DBCR0_IAC1 | DBCR0_IAC2 | \
+			      DBCR0_IAC3 | DBCR0_IAC4 | DBCR0_DAC1R | \
+			      DBCR0_DAC1W  | DBCR0_DAC2R | DBCR0_DAC2W)
+#define DBCR1_ACTIVE_EVENTS	0
+
+#define DBCR_ACTIVE_EVENTS(dbcr0, dbcr1)  (((dbcr0) & DBCR0_ACTIVE_EVENTS) || \
+					   ((dbcr1) & DBCR1_ACTIVE_EVENTS))
+#endif /* #elif defined(CONFIG_BOOKE) */
+
+/* Bit definitions related to the TCR. */
+#define TCR_WP(x)	(((x)&0x3)<<30)	/* WDT Period */
+#define TCR_WP_MASK	TCR_WP(3)
+#define WP_2_17		0		/* 2^17 clocks */
+#define WP_2_21		1		/* 2^21 clocks */
+#define WP_2_25		2		/* 2^25 clocks */
+#define WP_2_29		3		/* 2^29 clocks */
+#define TCR_WRC(x)	(((x)&0x3)<<28)	/* WDT Reset Control */
+#define TCR_WRC_MASK	TCR_WRC(3)
+#define WRC_NONE	0		/* No reset will occur */
+#define WRC_CORE	1		/* Core reset will occur */
+#define WRC_CHIP	2		/* Chip reset will occur */
+#define WRC_SYSTEM	3		/* System reset will occur */
+#define TCR_WIE		0x08000000	/* WDT Interrupt Enable */
+#define TCR_PIE		0x04000000	/* PIT Interrupt Enable */
+#define TCR_DIE		TCR_PIE		/* DEC Interrupt Enable */
+#define TCR_FP(x)	(((x)&0x3)<<24)	/* FIT Period */
+#define TCR_FP_MASK	TCR_FP(3)
+#define FP_2_9		0		/* 2^9 clocks */
+#define FP_2_13		1		/* 2^13 clocks */
+#define FP_2_17		2		/* 2^17 clocks */
+#define FP_2_21		3		/* 2^21 clocks */
+#define TCR_FIE		0x00800000	/* FIT Interrupt Enable */
+#define TCR_ARE		0x00400000	/* Auto Reload Enable */
+
+#ifdef CONFIG_PPC_E500
+#define TCR_GET_WP(tcr)  ((((tcr) & 0xC0000000) >> 30) | \
+			      (((tcr) & 0x1E0000) >> 15))
+#else
+#define TCR_GET_WP(tcr)  (((tcr) & 0xC0000000) >> 30)
+#endif
+
+/* Bit definitions for the TSR. */
+#define TSR_ENW		0x80000000	/* Enable Next Watchdog */
+#define TSR_WIS		0x40000000	/* WDT Interrupt Status */
+#define TSR_WRS(x)	(((x)&0x3)<<28)	/* WDT Reset Status */
+#define WRS_NONE	0		/* No WDT reset occurred */
+#define WRS_CORE	1		/* WDT forced core reset */
+#define WRS_CHIP	2		/* WDT forced chip reset */
+#define WRS_SYSTEM	3		/* WDT forced system reset */
+#define TSR_PIS		0x08000000	/* PIT Interrupt Status */
+#define TSR_DIS		TSR_PIS		/* DEC Interrupt Status */
+#define TSR_FIS		0x04000000	/* FIT Interrupt Status */
+
+/* Bit definitions for the DCCR. */
+#define DCCR_NOCACHE	0		/* Noncacheable */
+#define DCCR_CACHE	1		/* Cacheable */
+
+/* Bit definitions for DCWR. */
+#define DCWR_COPY	0		/* Copy-back */
+#define DCWR_WRITE	1		/* Write-through */
+
+/* Bit definitions for ICCR. */
+#define ICCR_NOCACHE	0		/* Noncacheable */
+#define ICCR_CACHE	1		/* Cacheable */
+
+/* Bit definitions for L1CSR0. */
+#define L1CSR0_CPE	0x00010000	/* Data Cache Parity Enable */
+#define L1CSR0_CUL	0x00000400	/* Data Cache Unable to Lock */
+#define L1CSR0_CLFC	0x00000100	/* Cache Lock Bits Flash Clear */
+#define L1CSR0_DCFI	0x00000002	/* Data Cache Flash Invalidate */
+#define L1CSR0_CFI	0x00000002	/* Cache Flash Invalidate */
+#define L1CSR0_DCE	0x00000001	/* Data Cache Enable */
+
+/* Bit definitions for L1CSR1. */
+#define L1CSR1_CPE	0x00010000	/* Instruction Cache Parity Enable */
+#define L1CSR1_ICLFR	0x00000100	/* Instr Cache Lock Bits Flash Reset */
+#define L1CSR1_ICFI	0x00000002	/* Instr Cache Flash Invalidate */
+#define L1CSR1_ICE	0x00000001	/* Instr Cache Enable */
+
+/* Bit definitions for L1CSR2. */
+#define L1CSR2_DCWS	0x40000000	/* Data Cache write shadow */
+
+/* Bit definitions for BUCSR. */
+#define BUCSR_STAC_EN	0x01000000	/* Segment Target Address Cache */
+#define BUCSR_LS_EN	0x00400000	/* Link Stack */
+#define BUCSR_BBFI	0x00000200	/* Branch Buffer flash invalidate */
+#define BUCSR_BPEN	0x00000001	/* Branch prediction enable */
+#define BUCSR_INIT	(BUCSR_STAC_EN | BUCSR_LS_EN | BUCSR_BBFI | BUCSR_BPEN)
+
+/* Bit definitions for L2CSR0. */
+#define L2CSR0_L2E	0x80000000	/* L2 Cache Enable */
+#define L2CSR0_L2PE	0x40000000	/* L2 Cache Parity/ECC Enable */
+#define L2CSR0_L2WP	0x1c000000	/* L2 I/D Way Partioning */
+#define L2CSR0_L2CM	0x03000000	/* L2 Cache Coherency Mode */
+#define L2CSR0_L2FI	0x00200000	/* L2 Cache Flash Invalidate */
+#define L2CSR0_L2IO	0x00100000	/* L2 Cache Instruction Only */
+#define L2CSR0_L2DO	0x00010000	/* L2 Cache Data Only */
+#define L2CSR0_L2REP	0x00003000	/* L2 Line Replacement Algo */
+#define L2CSR0_L2FL	0x00000800	/* L2 Cache Flush */
+#define L2CSR0_L2LFC	0x00000400	/* L2 Cache Lock Flash Clear */
+#define L2CSR0_L2LOA	0x00000080	/* L2 Cache Lock Overflow Allocate */
+#define L2CSR0_L2LO	0x00000020	/* L2 Cache Lock Overflow */
+
+/* Bit definitions for SGR. */
+#define SGR_NORMAL	0		/* Speculative fetching allowed. */
+#define SGR_GUARDED	1		/* Speculative fetching disallowed. */
+
+/* Bit definitions for EPCR */
+#define SPRN_EPCR_EXTGS		0x80000000	/* External Input interrupt
+						 * directed to Guest state */
+#define SPRN_EPCR_DTLBGS	0x40000000	/* Data TLB Error interrupt
+						 * directed to guest state */
+#define SPRN_EPCR_ITLBGS	0x20000000	/* Instr. TLB error interrupt
+						 * directed to guest state */
+#define SPRN_EPCR_DSIGS		0x10000000	/* Data Storage interrupt
+						 * directed to guest state */
+#define SPRN_EPCR_ISIGS		0x08000000	/* Instr. Storage interrupt
+						 * directed to guest state */
+#define SPRN_EPCR_DUVD		0x04000000	/* Disable Hypervisor Debug */
+#define SPRN_EPCR_ICM		0x02000000	/* Interrupt computation mode
+						 * (copied to MSR:CM on intr) */
+#define SPRN_EPCR_GICM		0x01000000	/* Guest Interrupt Comp. mode */
+#define SPRN_EPCR_DGTMI		0x00800000	/* Disable TLB Guest Management
+						 * instructions */
+#define SPRN_EPCR_DMIUH		0x00400000	/* Disable MAS Interrupt updates
+						 * for hypervisor */
+
+/* Bit definitions for EPLC/EPSC */
+#define EPC_EPR		0x80000000 /* 1 = user, 0 = kernel */
+#define EPC_EPR_SHIFT	31
+#define EPC_EAS		0x40000000 /* Address Space */
+#define EPC_EAS_SHIFT	30
+#define EPC_EGS		0x20000000 /* 1 = guest, 0 = hypervisor */
+#define EPC_EGS_SHIFT	29
+#define EPC_ELPID	0x00ff0000
+#define EPC_ELPID_SHIFT	16
+#define EPC_EPID	0x00003fff
+#define EPC_EPID_SHIFT	0
+
+/* Some 476 specific registers */
+#define SPRN_SSPCR		830
+#define SPRN_USPCR		831
+#define SPRN_ISPCR		829
+#define SPRN_MMUBE0		820
+#define MMUBE0_IBE0_SHIFT	24
+#define MMUBE0_IBE1_SHIFT	16
+#define MMUBE0_IBE2_SHIFT	8
+#define MMUBE0_VBE0		0x00000004
+#define MMUBE0_VBE1		0x00000002
+#define MMUBE0_VBE2		0x00000001
+#define SPRN_MMUBE1		821
+#define MMUBE1_IBE3_SHIFT	24
+#define MMUBE1_IBE4_SHIFT	16
+#define MMUBE1_IBE5_SHIFT	8
+#define MMUBE1_VBE3		0x00000004
+#define MMUBE1_VBE4		0x00000002
+#define MMUBE1_VBE5		0x00000001
+
+#define TMRN_TMCFG0      16	/* Thread Management Configuration Register 0 */
+#define TMRN_TMCFG0_NPRIBITS       0x003f0000 /* Bits of thread priority */
+#define TMRN_TMCFG0_NPRIBITS_SHIFT 16
+#define TMRN_TMCFG0_NATHRD         0x00003f00 /* Number of active threads */
+#define TMRN_TMCFG0_NATHRD_SHIFT   8
+#define TMRN_TMCFG0_NTHRD          0x0000003f /* Number of threads */
+#define TMRN_IMSR0	0x120	/* Initial MSR Register 0 (e6500) */
+#define TMRN_IMSR1	0x121	/* Initial MSR Register 1 (e6500) */
+#define TMRN_INIA0	0x140	/* Next Instruction Address Register 0 */
+#define TMRN_INIA1	0x141	/* Next Instruction Address Register 1 */
+#define SPRN_TENSR	0x1b5	/* Thread Enable Status Register */
+#define SPRN_TENS	0x1b6	/* Thread Enable Set Register */
+#define SPRN_TENC	0x1b7	/* Thread Enable Clear Register */
+
+#define TEN_THREAD(x)	(1 << (x))
+
+#ifndef __ASSEMBLY__
+#define mftmr(rn)	({unsigned long rval; \
+			asm volatile(MFTMR(rn, %0) : "=r" (rval)); rval;})
+#define mttmr(rn, v)	asm volatile(MTTMR(rn, %0) : \
+				     : "r" ((unsigned long)(v)) \
+				     : "memory")
+
+extern unsigned long global_dbcr0[];
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_POWERPC_REG_BOOKE_H__ */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/reg_fsl_emb.h b/arch/powerpc/include/asm/reg_fsl_emb.h
new file mode 100644
index 0000000000..a21f529c43
--- /dev/null
+++ b/arch/powerpc/include/asm/reg_fsl_emb.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Contains register definitions for the Freescale Embedded Performance
+ * Monitor.
+ */
+#ifdef __KERNEL__
+#ifndef __ASM_POWERPC_REG_FSL_EMB_H__
+#define __ASM_POWERPC_REG_FSL_EMB_H__
+
+#include <linux/stringify.h>
+
+#ifndef __ASSEMBLY__
+/* Performance Monitor Registers */
+#define mfpmr(rn)	({unsigned int rval; \
+			asm volatile("mfpmr %0," __stringify(rn) \
+				     : "=r" (rval)); rval;})
+#define mtpmr(rn, v)	asm volatile("mtpmr " __stringify(rn) ",%0" : : "r" (v))
+#endif /* __ASSEMBLY__ */
+
+/* Freescale Book E Performance Monitor APU Registers */
+#define PMRN_PMC0	0x010	/* Performance Monitor Counter 0 */
+#define PMRN_PMC1	0x011	/* Performance Monitor Counter 1 */
+#define PMRN_PMC2	0x012	/* Performance Monitor Counter 2 */
+#define PMRN_PMC3	0x013	/* Performance Monitor Counter 3 */
+#define PMRN_PMC4	0x014	/* Performance Monitor Counter 4 */
+#define PMRN_PMC5	0x015	/* Performance Monitor Counter 5 */
+#define PMRN_PMLCA0	0x090	/* PM Local Control A0 */
+#define PMRN_PMLCA1	0x091	/* PM Local Control A1 */
+#define PMRN_PMLCA2	0x092	/* PM Local Control A2 */
+#define PMRN_PMLCA3	0x093	/* PM Local Control A3 */
+#define PMRN_PMLCA4	0x094	/* PM Local Control A4 */
+#define PMRN_PMLCA5	0x095	/* PM Local Control A5 */
+
+#define PMLCA_FC	0x80000000	/* Freeze Counter */
+#define PMLCA_FCS	0x40000000	/* Freeze in Supervisor */
+#define PMLCA_FCU	0x20000000	/* Freeze in User */
+#define PMLCA_FCM1	0x10000000	/* Freeze when PMM==1 */
+#define PMLCA_FCM0	0x08000000	/* Freeze when PMM==0 */
+#define PMLCA_CE	0x04000000	/* Condition Enable */
+#define PMLCA_FGCS1	0x00000002	/* Freeze in guest state */
+#define PMLCA_FGCS0	0x00000001	/* Freeze in hypervisor state */
+
+#define PMLCA_EVENT_MASK 0x01ff0000	/* Event field */
+#define PMLCA_EVENT_SHIFT	16
+
+#define PMRN_PMLCB0	0x110	/* PM Local Control B0 */
+#define PMRN_PMLCB1	0x111	/* PM Local Control B1 */
+#define PMRN_PMLCB2	0x112	/* PM Local Control B2 */
+#define PMRN_PMLCB3	0x113	/* PM Local Control B3 */
+#define PMRN_PMLCB4	0x114	/* PM Local Control B4 */
+#define PMRN_PMLCB5	0x115	/* PM Local Control B5 */
+
+#define PMLCB_THRESHMUL_MASK	0x0700	/* Threshold Multiple Field */
+#define PMLCB_THRESHMUL_SHIFT	8
+
+#define PMLCB_THRESHOLD_MASK	0x003f	/* Threshold Field */
+#define PMLCB_THRESHOLD_SHIFT	0
+
+#define PMRN_PMGC0	0x190	/* PM Global Control 0 */
+
+#define PMGC0_FAC	0x80000000	/* Freeze all Counters */
+#define PMGC0_PMIE	0x40000000	/* Interrupt Enable */
+#define PMGC0_FCECE	0x20000000	/* Freeze countes on
+					   Enabled Condition or
+					   Event */
+
+#define PMRN_UPMC0	0x000	/* User Performance Monitor Counter 0 */
+#define PMRN_UPMC1	0x001	/* User Performance Monitor Counter 1 */
+#define PMRN_UPMC2	0x002	/* User Performance Monitor Counter 2 */
+#define PMRN_UPMC3	0x003	/* User Performance Monitor Counter 3 */
+#define PMRN_UPMC4	0x004	/* User Performance Monitor Counter 4 */
+#define PMRN_UPMC5	0x005	/* User Performance Monitor Counter 5 */
+#define PMRN_UPMLCA0	0x080	/* User PM Local Control A0 */
+#define PMRN_UPMLCA1	0x081	/* User PM Local Control A1 */
+#define PMRN_UPMLCA2	0x082	/* User PM Local Control A2 */
+#define PMRN_UPMLCA3	0x083	/* User PM Local Control A3 */
+#define PMRN_UPMLCA4	0x084	/* User PM Local Control A4 */
+#define PMRN_UPMLCA5	0x085	/* User PM Local Control A5 */
+#define PMRN_UPMLCB0	0x100	/* User PM Local Control B0 */
+#define PMRN_UPMLCB1	0x101	/* User PM Local Control B1 */
+#define PMRN_UPMLCB2	0x102	/* User PM Local Control B2 */
+#define PMRN_UPMLCB3	0x103	/* User PM Local Control B3 */
+#define PMRN_UPMLCB4	0x104	/* User PM Local Control B4 */
+#define PMRN_UPMLCB5	0x105	/* User PM Local Control B5 */
+#define PMRN_UPMGC0	0x180	/* User PM Global Control 0 */
+
+
+#endif /* __ASM_POWERPC_REG_FSL_EMB_H__ */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/rheap.h b/arch/powerpc/include/asm/rheap.h
new file mode 100644
index 0000000000..8e83703d67
--- /dev/null
+++ b/arch/powerpc/include/asm/rheap.h
@@ -0,0 +1,92 @@
+/*
+ * include/asm-ppc/rheap.h
+ *
+ * Header file for the implementation of a remote heap.
+ *
+ * Author: Pantelis Antoniou <panto@intracom.gr>
+ *
+ * 2004 (c) INTRACOM S.A. Greece. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+
+#ifndef __ASM_PPC_RHEAP_H__
+#define __ASM_PPC_RHEAP_H__
+
+#include <linux/list.h>
+
+typedef struct _rh_block {
+	struct list_head list;
+	unsigned long start;
+	int size;
+	const char *owner;
+} rh_block_t;
+
+typedef struct _rh_info {
+	unsigned int alignment;
+	int max_blocks;
+	int empty_slots;
+	rh_block_t *block;
+	struct list_head empty_list;
+	struct list_head free_list;
+	struct list_head taken_list;
+	unsigned int flags;
+} rh_info_t;
+
+#define RHIF_STATIC_INFO	0x1
+#define RHIF_STATIC_BLOCK	0x2
+
+typedef struct _rh_stats {
+	unsigned long start;
+	int size;
+	const char *owner;
+} rh_stats_t;
+
+#define RHGS_FREE	0
+#define RHGS_TAKEN	1
+
+/* Create a remote heap dynamically */
+extern rh_info_t *rh_create(unsigned int alignment);
+
+/* Destroy a remote heap, created by rh_create() */
+extern void rh_destroy(rh_info_t * info);
+
+/* Initialize in place a remote info block */
+extern void rh_init(rh_info_t * info, unsigned int alignment, int max_blocks,
+		    rh_block_t * block);
+
+/* Attach a free region to manage */
+extern int rh_attach_region(rh_info_t * info, unsigned long start, int size);
+
+/* Detach a free region */
+extern unsigned long rh_detach_region(rh_info_t * info, unsigned long start, int size);
+
+/* Allocate the given size from the remote heap (with alignment) */
+extern unsigned long rh_alloc_align(rh_info_t * info, int size, int alignment,
+		const char *owner);
+
+/* Allocate the given size from the remote heap */
+extern unsigned long rh_alloc(rh_info_t * info, int size, const char *owner);
+
+/* Allocate the given size from the given address */
+extern unsigned long rh_alloc_fixed(rh_info_t * info, unsigned long start, int size,
+			    const char *owner);
+
+/* Free the allocated area */
+extern int rh_free(rh_info_t * info, unsigned long start);
+
+/* Get stats for debugging purposes */
+extern int rh_get_stats(rh_info_t * info, int what, int max_stats,
+			rh_stats_t * stats);
+
+/* Simple dump of remote heap info */
+extern void rh_dump(rh_info_t * info);
+
+/* Simple dump of remote info block */
+void rh_dump_blk(rh_info_t *info, rh_block_t *blk);
+
+/* Set owner of taken block */
+extern int rh_set_owner(rh_info_t * info, unsigned long start, const char *owner);
+
+#endif				/* __ASM_PPC_RHEAP_H__ */
diff --git a/arch/powerpc/include/asm/rio.h b/arch/powerpc/include/asm/rio.h
new file mode 100644
index 0000000000..0e57cda2a6
--- /dev/null
+++ b/arch/powerpc/include/asm/rio.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * RapidIO architecture support
+ *
+ * Copyright 2005 MontaVista Software, Inc.
+ * Matt Porter <mporter@kernel.crashing.org>
+ */
+
+#ifndef ASM_PPC_RIO_H
+#define ASM_PPC_RIO_H
+
+#ifdef CONFIG_FSL_RIO
+extern int fsl_rio_mcheck_exception(struct pt_regs *);
+#else
+static inline int fsl_rio_mcheck_exception(struct pt_regs *regs) {return 0; }
+#endif
+
+#endif				/* ASM_PPC_RIO_H */
diff --git a/arch/powerpc/include/asm/rtas-types.h b/arch/powerpc/include/asm/rtas-types.h
new file mode 100644
index 0000000000..9d5b16803c
--- /dev/null
+++ b/arch/powerpc/include/asm/rtas-types.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_RTAS_TYPES_H
+#define _ASM_POWERPC_RTAS_TYPES_H
+
+#include <linux/compiler_attributes.h>
+
+typedef __be32 rtas_arg_t;
+
+struct rtas_args {
+	__be32 token;
+	__be32 nargs;
+	__be32 nret;
+	rtas_arg_t args[16];
+	rtas_arg_t *rets;     /* Pointer to return values in args[]. */
+} __aligned(8);
+
+struct rtas_t {
+	unsigned long entry;		/* physical address pointer */
+	unsigned long base;		/* physical address pointer */
+	unsigned long size;
+	struct device_node *dev;	/* virtual address pointer */
+};
+
+struct rtas_error_log {
+	/* Byte 0 */
+	u8		byte0;			/* Architectural version */
+
+	/* Byte 1 */
+	u8		byte1;
+	/* XXXXXXXX
+	 * XXX		3: Severity level of error
+	 *    XX	2: Degree of recovery
+	 *      X	1: Extended log present?
+	 *       XX	2: Reserved
+	 */
+
+	/* Byte 2 */
+	u8		byte2;
+	/* XXXXXXXX
+	 * XXXX		4: Initiator of event
+	 *     XXXX	4: Target of failed operation
+	 */
+	u8		byte3;			/* General event or error*/
+	__be32		extended_log_length;	/* length in bytes */
+	unsigned char	buffer[1];		/* Start of extended log */
+						/* Variable length.      */
+};
+
+/* RTAS general extended event log, Version 6. The extended log starts
+ * from "buffer" field of struct rtas_error_log defined above.
+ */
+struct rtas_ext_event_log_v6 {
+	/* Byte 0 */
+	u8 byte0;
+	/* XXXXXXXX
+	 * X		1: Log valid
+	 *  X		1: Unrecoverable error
+	 *   X		1: Recoverable (correctable or successfully retried)
+	 *    X		1: Bypassed unrecoverable error (degraded operation)
+	 *     X	1: Predictive error
+	 *      X	1: "New" log (always 1 for data returned from RTAS)
+	 *       X	1: Big Endian
+	 *        X	1: Reserved
+	 */
+
+	/* Byte 1 */
+	u8 byte1;			/* reserved */
+
+	/* Byte 2 */
+	u8 byte2;
+	/* XXXXXXXX
+	 * X		1: Set to 1 (indicating log is in PowerPC format)
+	 *  XXX		3: Reserved
+	 *     XXXX	4: Log format used for bytes 12-2047
+	 */
+
+	/* Byte 3 */
+	u8 byte3;			/* reserved */
+	/* Byte 4-11 */
+	u8 reserved[8];			/* reserved */
+	/* Byte 12-15 */
+	__be32  company_id;		/* Company ID of the company	*/
+					/* that defines the format for	*/
+					/* the vendor specific log type	*/
+	/* Byte 16-end of log */
+	u8 vendor_log[1];		/* Start of vendor specific log	*/
+					/* Variable length.		*/
+};
+
+/* Vendor specific Platform Event Log Format, Version 6, section header */
+struct pseries_errorlog {
+	__be16 id;			/* 0x00 2-byte ASCII section ID	*/
+	__be16 length;			/* 0x02 Section length in bytes	*/
+	u8 version;			/* 0x04 Section version		*/
+	u8 subtype;			/* 0x05 Section subtype		*/
+	__be16 creator_component;	/* 0x06 Creator component ID	*/
+	u8 data[];			/* 0x08 Start of section data	*/
+};
+
+/* RTAS pseries hotplug errorlog section */
+struct pseries_hp_errorlog {
+	u8	resource;
+	u8	action;
+	u8	id_type;
+	u8	reserved;
+	union {
+		__be32	drc_index;
+		__be32	drc_count;
+		struct { __be32 count, index; } ic;
+		char	drc_name[1];
+	} _drc_u;
+};
+
+#endif /* _ASM_POWERPC_RTAS_TYPES_H */
diff --git a/arch/powerpc/include/asm/rtas-work-area.h b/arch/powerpc/include/asm/rtas-work-area.h
new file mode 100644
index 0000000000..251a395dbd
--- /dev/null
+++ b/arch/powerpc/include/asm/rtas-work-area.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_RTAS_WORK_AREA_H
+#define _ASM_POWERPC_RTAS_WORK_AREA_H
+
+#include <linux/build_bug.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+
+#include <asm/page.h>
+
+/**
+ * struct rtas_work_area - RTAS work area descriptor.
+ *
+ * Descriptor for a "work area" in PAPR terminology that satisfies
+ * RTAS addressing requirements.
+ */
+struct rtas_work_area {
+	/* private: Use the APIs provided below. */
+	char *buf;
+	size_t size;
+};
+
+enum {
+	/* Maximum allocation size, enforced at build time. */
+	RTAS_WORK_AREA_MAX_ALLOC_SZ = SZ_128K,
+};
+
+/**
+ * rtas_work_area_alloc() - Acquire a work area of the requested size.
+ * @size_: Allocation size. Must be compile-time constant and not more
+ *         than %RTAS_WORK_AREA_MAX_ALLOC_SZ.
+ *
+ * Allocate a buffer suitable for passing to RTAS functions that have
+ * a memory address parameter, often (but not always) referred to as a
+ * "work area" in PAPR. Although callers are allowed to block while
+ * holding a work area, the amount of memory reserved for this purpose
+ * is limited, and allocations should be short-lived. A good guideline
+ * is to release any allocated work area before returning from a
+ * system call.
+ *
+ * This function does not fail. It blocks until the allocation
+ * succeeds. To prevent deadlocks, callers are discouraged from
+ * allocating more than one work area simultaneously in a single task
+ * context.
+ *
+ * Context: This function may sleep.
+ * Return: A &struct rtas_work_area descriptor for the allocated work area.
+ */
+#define rtas_work_area_alloc(size_) ({				\
+	static_assert(__builtin_constant_p(size_));		\
+	static_assert((size_) > 0);				\
+	static_assert((size_) <= RTAS_WORK_AREA_MAX_ALLOC_SZ);	\
+	__rtas_work_area_alloc(size_);				\
+})
+
+/*
+ * Do not call __rtas_work_area_alloc() directly. Use
+ * rtas_work_area_alloc().
+ */
+struct rtas_work_area *__rtas_work_area_alloc(size_t size);
+
+/**
+ * rtas_work_area_free() - Release a work area.
+ * @area: Work area descriptor as returned from rtas_work_area_alloc().
+ *
+ * Return a work area buffer to the pool.
+ */
+void rtas_work_area_free(struct rtas_work_area *area);
+
+static inline char *rtas_work_area_raw_buf(const struct rtas_work_area *area)
+{
+	return area->buf;
+}
+
+static inline size_t rtas_work_area_size(const struct rtas_work_area *area)
+{
+	return area->size;
+}
+
+static inline phys_addr_t rtas_work_area_phys(const struct rtas_work_area *area)
+{
+	return __pa(area->buf);
+}
+
+/*
+ * Early setup for the work area allocator. Call from
+ * rtas_initialize() only.
+ */
+
+#ifdef CONFIG_PPC_PSERIES
+void rtas_work_area_reserve_arena(phys_addr_t limit);
+#else /* CONFIG_PPC_PSERIES */
+static inline void rtas_work_area_reserve_arena(phys_addr_t limit) {}
+#endif /* CONFIG_PPC_PSERIES */
+
+#endif /* _ASM_POWERPC_RTAS_WORK_AREA_H */
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
new file mode 100644
index 0000000000..c697c3c746
--- /dev/null
+++ b/arch/powerpc/include/asm/rtas.h
@@ -0,0 +1,554 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _POWERPC_RTAS_H
+#define _POWERPC_RTAS_H
+#ifdef __KERNEL__
+
+#include <linux/spinlock.h>
+#include <asm/page.h>
+#include <asm/rtas-types.h>
+#include <linux/time.h>
+#include <linux/cpumask.h>
+
+/*
+ * Definitions for talking to the RTAS on CHRP machines.
+ *
+ * Copyright (C) 2001 Peter Bergner
+ * Copyright (C) 2001 PPC 64 Team, IBM Corp
+ */
+
+enum rtas_function_index {
+	RTAS_FNIDX__CHECK_EXCEPTION,
+	RTAS_FNIDX__DISPLAY_CHARACTER,
+	RTAS_FNIDX__EVENT_SCAN,
+	RTAS_FNIDX__FREEZE_TIME_BASE,
+	RTAS_FNIDX__GET_POWER_LEVEL,
+	RTAS_FNIDX__GET_SENSOR_STATE,
+	RTAS_FNIDX__GET_TERM_CHAR,
+	RTAS_FNIDX__GET_TIME_OF_DAY,
+	RTAS_FNIDX__IBM_ACTIVATE_FIRMWARE,
+	RTAS_FNIDX__IBM_CBE_START_PTCAL,
+	RTAS_FNIDX__IBM_CBE_STOP_PTCAL,
+	RTAS_FNIDX__IBM_CHANGE_MSI,
+	RTAS_FNIDX__IBM_CLOSE_ERRINJCT,
+	RTAS_FNIDX__IBM_CONFIGURE_BRIDGE,
+	RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR,
+	RTAS_FNIDX__IBM_CONFIGURE_KERNEL_DUMP,
+	RTAS_FNIDX__IBM_CONFIGURE_PE,
+	RTAS_FNIDX__IBM_CREATE_PE_DMA_WINDOW,
+	RTAS_FNIDX__IBM_DISPLAY_MESSAGE,
+	RTAS_FNIDX__IBM_ERRINJCT,
+	RTAS_FNIDX__IBM_EXTI2C,
+	RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO,
+	RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO2,
+	RTAS_FNIDX__IBM_GET_DYNAMIC_SENSOR_STATE,
+	RTAS_FNIDX__IBM_GET_INDICES,
+	RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY,
+	RTAS_FNIDX__IBM_GET_SYSTEM_PARAMETER,
+	RTAS_FNIDX__IBM_GET_VPD,
+	RTAS_FNIDX__IBM_GET_XIVE,
+	RTAS_FNIDX__IBM_INT_OFF,
+	RTAS_FNIDX__IBM_INT_ON,
+	RTAS_FNIDX__IBM_IO_QUIESCE_ACK,
+	RTAS_FNIDX__IBM_LPAR_PERFTOOLS,
+	RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE,
+	RTAS_FNIDX__IBM_MANAGE_STORAGE_PRESERVATION,
+	RTAS_FNIDX__IBM_NMI_INTERLOCK,
+	RTAS_FNIDX__IBM_NMI_REGISTER,
+	RTAS_FNIDX__IBM_OPEN_ERRINJCT,
+	RTAS_FNIDX__IBM_OPEN_SRIOV_ALLOW_UNFREEZE,
+	RTAS_FNIDX__IBM_OPEN_SRIOV_MAP_PE_NUMBER,
+	RTAS_FNIDX__IBM_OS_TERM,
+	RTAS_FNIDX__IBM_PARTNER_CONTROL,
+	RTAS_FNIDX__IBM_PHYSICAL_ATTESTATION,
+	RTAS_FNIDX__IBM_PLATFORM_DUMP,
+	RTAS_FNIDX__IBM_POWER_OFF_UPS,
+	RTAS_FNIDX__IBM_QUERY_INTERRUPT_SOURCE_NUMBER,
+	RTAS_FNIDX__IBM_QUERY_PE_DMA_WINDOW,
+	RTAS_FNIDX__IBM_READ_PCI_CONFIG,
+	RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE,
+	RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2,
+	RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW,
+	RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS,
+	RTAS_FNIDX__IBM_SCAN_LOG_DUMP,
+	RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR,
+	RTAS_FNIDX__IBM_SET_EEH_OPTION,
+	RTAS_FNIDX__IBM_SET_SLOT_RESET,
+	RTAS_FNIDX__IBM_SET_SYSTEM_PARAMETER,
+	RTAS_FNIDX__IBM_SET_XIVE,
+	RTAS_FNIDX__IBM_SLOT_ERROR_DETAIL,
+	RTAS_FNIDX__IBM_SUSPEND_ME,
+	RTAS_FNIDX__IBM_TUNE_DMA_PARMS,
+	RTAS_FNIDX__IBM_UPDATE_FLASH_64_AND_REBOOT,
+	RTAS_FNIDX__IBM_UPDATE_NODES,
+	RTAS_FNIDX__IBM_UPDATE_PROPERTIES,
+	RTAS_FNIDX__IBM_VALIDATE_FLASH_IMAGE,
+	RTAS_FNIDX__IBM_WRITE_PCI_CONFIG,
+	RTAS_FNIDX__NVRAM_FETCH,
+	RTAS_FNIDX__NVRAM_STORE,
+	RTAS_FNIDX__POWER_OFF,
+	RTAS_FNIDX__PUT_TERM_CHAR,
+	RTAS_FNIDX__QUERY_CPU_STOPPED_STATE,
+	RTAS_FNIDX__READ_PCI_CONFIG,
+	RTAS_FNIDX__RTAS_LAST_ERROR,
+	RTAS_FNIDX__SET_INDICATOR,
+	RTAS_FNIDX__SET_POWER_LEVEL,
+	RTAS_FNIDX__SET_TIME_FOR_POWER_ON,
+	RTAS_FNIDX__SET_TIME_OF_DAY,
+	RTAS_FNIDX__START_CPU,
+	RTAS_FNIDX__STOP_SELF,
+	RTAS_FNIDX__SYSTEM_REBOOT,
+	RTAS_FNIDX__THAW_TIME_BASE,
+	RTAS_FNIDX__WRITE_PCI_CONFIG,
+};
+
+/*
+ * Opaque handle for client code to refer to RTAS functions. All valid
+ * function handles are build-time constants prefixed with RTAS_FN_.
+ */
+typedef struct {
+	const enum rtas_function_index index;
+} rtas_fn_handle_t;
+
+
+#define rtas_fn_handle(x_) ((const rtas_fn_handle_t) { .index = x_, })
+
+#define RTAS_FN_CHECK_EXCEPTION                   rtas_fn_handle(RTAS_FNIDX__CHECK_EXCEPTION)
+#define RTAS_FN_DISPLAY_CHARACTER                 rtas_fn_handle(RTAS_FNIDX__DISPLAY_CHARACTER)
+#define RTAS_FN_EVENT_SCAN                        rtas_fn_handle(RTAS_FNIDX__EVENT_SCAN)
+#define RTAS_FN_FREEZE_TIME_BASE                  rtas_fn_handle(RTAS_FNIDX__FREEZE_TIME_BASE)
+#define RTAS_FN_GET_POWER_LEVEL                   rtas_fn_handle(RTAS_FNIDX__GET_POWER_LEVEL)
+#define RTAS_FN_GET_SENSOR_STATE                  rtas_fn_handle(RTAS_FNIDX__GET_SENSOR_STATE)
+#define RTAS_FN_GET_TERM_CHAR                     rtas_fn_handle(RTAS_FNIDX__GET_TERM_CHAR)
+#define RTAS_FN_GET_TIME_OF_DAY                   rtas_fn_handle(RTAS_FNIDX__GET_TIME_OF_DAY)
+#define RTAS_FN_IBM_ACTIVATE_FIRMWARE             rtas_fn_handle(RTAS_FNIDX__IBM_ACTIVATE_FIRMWARE)
+#define RTAS_FN_IBM_CBE_START_PTCAL               rtas_fn_handle(RTAS_FNIDX__IBM_CBE_START_PTCAL)
+#define RTAS_FN_IBM_CBE_STOP_PTCAL                rtas_fn_handle(RTAS_FNIDX__IBM_CBE_STOP_PTCAL)
+#define RTAS_FN_IBM_CHANGE_MSI                    rtas_fn_handle(RTAS_FNIDX__IBM_CHANGE_MSI)
+#define RTAS_FN_IBM_CLOSE_ERRINJCT                rtas_fn_handle(RTAS_FNIDX__IBM_CLOSE_ERRINJCT)
+#define RTAS_FN_IBM_CONFIGURE_BRIDGE              rtas_fn_handle(RTAS_FNIDX__IBM_CONFIGURE_BRIDGE)
+#define RTAS_FN_IBM_CONFIGURE_CONNECTOR           rtas_fn_handle(RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR)
+#define RTAS_FN_IBM_CONFIGURE_KERNEL_DUMP         rtas_fn_handle(RTAS_FNIDX__IBM_CONFIGURE_KERNEL_DUMP)
+#define RTAS_FN_IBM_CONFIGURE_PE                  rtas_fn_handle(RTAS_FNIDX__IBM_CONFIGURE_PE)
+#define RTAS_FN_IBM_CREATE_PE_DMA_WINDOW          rtas_fn_handle(RTAS_FNIDX__IBM_CREATE_PE_DMA_WINDOW)
+#define RTAS_FN_IBM_DISPLAY_MESSAGE               rtas_fn_handle(RTAS_FNIDX__IBM_DISPLAY_MESSAGE)
+#define RTAS_FN_IBM_ERRINJCT                      rtas_fn_handle(RTAS_FNIDX__IBM_ERRINJCT)
+#define RTAS_FN_IBM_EXTI2C                        rtas_fn_handle(RTAS_FNIDX__IBM_EXTI2C)
+#define RTAS_FN_IBM_GET_CONFIG_ADDR_INFO          rtas_fn_handle(RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO)
+#define RTAS_FN_IBM_GET_CONFIG_ADDR_INFO2         rtas_fn_handle(RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO2)
+#define RTAS_FN_IBM_GET_DYNAMIC_SENSOR_STATE      rtas_fn_handle(RTAS_FNIDX__IBM_GET_DYNAMIC_SENSOR_STATE)
+#define RTAS_FN_IBM_GET_INDICES                   rtas_fn_handle(RTAS_FNIDX__IBM_GET_INDICES)
+#define RTAS_FN_IBM_GET_RIO_TOPOLOGY              rtas_fn_handle(RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY)
+#define RTAS_FN_IBM_GET_SYSTEM_PARAMETER          rtas_fn_handle(RTAS_FNIDX__IBM_GET_SYSTEM_PARAMETER)
+#define RTAS_FN_IBM_GET_VPD                       rtas_fn_handle(RTAS_FNIDX__IBM_GET_VPD)
+#define RTAS_FN_IBM_GET_XIVE                      rtas_fn_handle(RTAS_FNIDX__IBM_GET_XIVE)
+#define RTAS_FN_IBM_INT_OFF                       rtas_fn_handle(RTAS_FNIDX__IBM_INT_OFF)
+#define RTAS_FN_IBM_INT_ON                        rtas_fn_handle(RTAS_FNIDX__IBM_INT_ON)
+#define RTAS_FN_IBM_IO_QUIESCE_ACK                rtas_fn_handle(RTAS_FNIDX__IBM_IO_QUIESCE_ACK)
+#define RTAS_FN_IBM_LPAR_PERFTOOLS                rtas_fn_handle(RTAS_FNIDX__IBM_LPAR_PERFTOOLS)
+#define RTAS_FN_IBM_MANAGE_FLASH_IMAGE            rtas_fn_handle(RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE)
+#define RTAS_FN_IBM_MANAGE_STORAGE_PRESERVATION   rtas_fn_handle(RTAS_FNIDX__IBM_MANAGE_STORAGE_PRESERVATION)
+#define RTAS_FN_IBM_NMI_INTERLOCK                 rtas_fn_handle(RTAS_FNIDX__IBM_NMI_INTERLOCK)
+#define RTAS_FN_IBM_NMI_REGISTER                  rtas_fn_handle(RTAS_FNIDX__IBM_NMI_REGISTER)
+#define RTAS_FN_IBM_OPEN_ERRINJCT                 rtas_fn_handle(RTAS_FNIDX__IBM_OPEN_ERRINJCT)
+#define RTAS_FN_IBM_OPEN_SRIOV_ALLOW_UNFREEZE     rtas_fn_handle(RTAS_FNIDX__IBM_OPEN_SRIOV_ALLOW_UNFREEZE)
+#define RTAS_FN_IBM_OPEN_SRIOV_MAP_PE_NUMBER      rtas_fn_handle(RTAS_FNIDX__IBM_OPEN_SRIOV_MAP_PE_NUMBER)
+#define RTAS_FN_IBM_OS_TERM                       rtas_fn_handle(RTAS_FNIDX__IBM_OS_TERM)
+#define RTAS_FN_IBM_PARTNER_CONTROL               rtas_fn_handle(RTAS_FNIDX__IBM_PARTNER_CONTROL)
+#define RTAS_FN_IBM_PHYSICAL_ATTESTATION          rtas_fn_handle(RTAS_FNIDX__IBM_PHYSICAL_ATTESTATION)
+#define RTAS_FN_IBM_PLATFORM_DUMP                 rtas_fn_handle(RTAS_FNIDX__IBM_PLATFORM_DUMP)
+#define RTAS_FN_IBM_POWER_OFF_UPS                 rtas_fn_handle(RTAS_FNIDX__IBM_POWER_OFF_UPS)
+#define RTAS_FN_IBM_QUERY_INTERRUPT_SOURCE_NUMBER rtas_fn_handle(RTAS_FNIDX__IBM_QUERY_INTERRUPT_SOURCE_NUMBER)
+#define RTAS_FN_IBM_QUERY_PE_DMA_WINDOW           rtas_fn_handle(RTAS_FNIDX__IBM_QUERY_PE_DMA_WINDOW)
+#define RTAS_FN_IBM_READ_PCI_CONFIG               rtas_fn_handle(RTAS_FNIDX__IBM_READ_PCI_CONFIG)
+#define RTAS_FN_IBM_READ_SLOT_RESET_STATE         rtas_fn_handle(RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE)
+#define RTAS_FN_IBM_READ_SLOT_RESET_STATE2        rtas_fn_handle(RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2)
+#define RTAS_FN_IBM_REMOVE_PE_DMA_WINDOW          rtas_fn_handle(RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW)
+#define RTAS_FN_IBM_RESET_PE_DMA_WINDOWS          rtas_fn_handle(RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS)
+#define RTAS_FN_IBM_SCAN_LOG_DUMP                 rtas_fn_handle(RTAS_FNIDX__IBM_SCAN_LOG_DUMP)
+#define RTAS_FN_IBM_SET_DYNAMIC_INDICATOR         rtas_fn_handle(RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR)
+#define RTAS_FN_IBM_SET_EEH_OPTION                rtas_fn_handle(RTAS_FNIDX__IBM_SET_EEH_OPTION)
+#define RTAS_FN_IBM_SET_SLOT_RESET                rtas_fn_handle(RTAS_FNIDX__IBM_SET_SLOT_RESET)
+#define RTAS_FN_IBM_SET_SYSTEM_PARAMETER          rtas_fn_handle(RTAS_FNIDX__IBM_SET_SYSTEM_PARAMETER)
+#define RTAS_FN_IBM_SET_XIVE                      rtas_fn_handle(RTAS_FNIDX__IBM_SET_XIVE)
+#define RTAS_FN_IBM_SLOT_ERROR_DETAIL             rtas_fn_handle(RTAS_FNIDX__IBM_SLOT_ERROR_DETAIL)
+#define RTAS_FN_IBM_SUSPEND_ME                    rtas_fn_handle(RTAS_FNIDX__IBM_SUSPEND_ME)
+#define RTAS_FN_IBM_TUNE_DMA_PARMS                rtas_fn_handle(RTAS_FNIDX__IBM_TUNE_DMA_PARMS)
+#define RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT    rtas_fn_handle(RTAS_FNIDX__IBM_UPDATE_FLASH_64_AND_REBOOT)
+#define RTAS_FN_IBM_UPDATE_NODES                  rtas_fn_handle(RTAS_FNIDX__IBM_UPDATE_NODES)
+#define RTAS_FN_IBM_UPDATE_PROPERTIES             rtas_fn_handle(RTAS_FNIDX__IBM_UPDATE_PROPERTIES)
+#define RTAS_FN_IBM_VALIDATE_FLASH_IMAGE          rtas_fn_handle(RTAS_FNIDX__IBM_VALIDATE_FLASH_IMAGE)
+#define RTAS_FN_IBM_WRITE_PCI_CONFIG              rtas_fn_handle(RTAS_FNIDX__IBM_WRITE_PCI_CONFIG)
+#define RTAS_FN_NVRAM_FETCH                       rtas_fn_handle(RTAS_FNIDX__NVRAM_FETCH)
+#define RTAS_FN_NVRAM_STORE                       rtas_fn_handle(RTAS_FNIDX__NVRAM_STORE)
+#define RTAS_FN_POWER_OFF                         rtas_fn_handle(RTAS_FNIDX__POWER_OFF)
+#define RTAS_FN_PUT_TERM_CHAR                     rtas_fn_handle(RTAS_FNIDX__PUT_TERM_CHAR)
+#define RTAS_FN_QUERY_CPU_STOPPED_STATE           rtas_fn_handle(RTAS_FNIDX__QUERY_CPU_STOPPED_STATE)
+#define RTAS_FN_READ_PCI_CONFIG                   rtas_fn_handle(RTAS_FNIDX__READ_PCI_CONFIG)
+#define RTAS_FN_RTAS_LAST_ERROR                   rtas_fn_handle(RTAS_FNIDX__RTAS_LAST_ERROR)
+#define RTAS_FN_SET_INDICATOR                     rtas_fn_handle(RTAS_FNIDX__SET_INDICATOR)
+#define RTAS_FN_SET_POWER_LEVEL                   rtas_fn_handle(RTAS_FNIDX__SET_POWER_LEVEL)
+#define RTAS_FN_SET_TIME_FOR_POWER_ON             rtas_fn_handle(RTAS_FNIDX__SET_TIME_FOR_POWER_ON)
+#define RTAS_FN_SET_TIME_OF_DAY                   rtas_fn_handle(RTAS_FNIDX__SET_TIME_OF_DAY)
+#define RTAS_FN_START_CPU                         rtas_fn_handle(RTAS_FNIDX__START_CPU)
+#define RTAS_FN_STOP_SELF                         rtas_fn_handle(RTAS_FNIDX__STOP_SELF)
+#define RTAS_FN_SYSTEM_REBOOT                     rtas_fn_handle(RTAS_FNIDX__SYSTEM_REBOOT)
+#define RTAS_FN_THAW_TIME_BASE                    rtas_fn_handle(RTAS_FNIDX__THAW_TIME_BASE)
+#define RTAS_FN_WRITE_PCI_CONFIG                  rtas_fn_handle(RTAS_FNIDX__WRITE_PCI_CONFIG)
+
+#define RTAS_UNKNOWN_SERVICE (-1)
+#define RTAS_INSTANTIATE_MAX (1ULL<<30) /* Don't instantiate rtas at/above this value */
+
+/* Memory set aside for sys_rtas to use with calls that need a work area. */
+#define RTAS_USER_REGION_SIZE (64 * 1024)
+
+/* RTAS return status codes */
+#define RTAS_HARDWARE_ERROR	-1    /* Hardware Error */
+#define RTAS_BUSY		-2    /* RTAS Busy */
+#define RTAS_INVALID_PARAMETER	-3    /* Invalid indicator/domain/sensor etc. */
+#define RTAS_EXTENDED_DELAY_MIN	9900
+#define RTAS_EXTENDED_DELAY_MAX	9905
+
+/* statuses specific to ibm,suspend-me */
+#define RTAS_SUSPEND_ABORTED     9000 /* Suspension aborted */
+#define RTAS_NOT_SUSPENDABLE    -9004 /* Partition not suspendable */
+#define RTAS_THREADS_ACTIVE     -9005 /* Multiple processor threads active */
+#define RTAS_OUTSTANDING_COPROC -9006 /* Outstanding coprocessor operations */
+
+/* RTAS event classes */
+#define RTAS_INTERNAL_ERROR		0x80000000 /* set bit 0 */
+#define RTAS_EPOW_WARNING		0x40000000 /* set bit 1 */
+#define RTAS_HOTPLUG_EVENTS		0x10000000 /* set bit 3 */
+#define RTAS_IO_EVENTS			0x08000000 /* set bit 4 */
+#define RTAS_EVENT_SCAN_ALL_EVENTS	0xffffffff
+
+/* RTAS event severity */
+#define RTAS_SEVERITY_FATAL		0x5
+#define RTAS_SEVERITY_ERROR		0x4
+#define RTAS_SEVERITY_ERROR_SYNC	0x3
+#define RTAS_SEVERITY_WARNING		0x2
+#define RTAS_SEVERITY_EVENT		0x1
+#define RTAS_SEVERITY_NO_ERROR		0x0
+
+/* RTAS event disposition */
+#define RTAS_DISP_FULLY_RECOVERED	0x0
+#define RTAS_DISP_LIMITED_RECOVERY	0x1
+#define RTAS_DISP_NOT_RECOVERED		0x2
+
+/* RTAS event initiator */
+#define RTAS_INITIATOR_UNKNOWN		0x0
+#define RTAS_INITIATOR_CPU		0x1
+#define RTAS_INITIATOR_PCI		0x2
+#define RTAS_INITIATOR_ISA		0x3
+#define RTAS_INITIATOR_MEMORY		0x4
+#define RTAS_INITIATOR_POWERMGM		0x5
+
+/* RTAS event target */
+#define RTAS_TARGET_UNKNOWN		0x0
+#define RTAS_TARGET_CPU			0x1
+#define RTAS_TARGET_PCI			0x2
+#define RTAS_TARGET_ISA			0x3
+#define RTAS_TARGET_MEMORY		0x4
+#define RTAS_TARGET_POWERMGM		0x5
+
+/* RTAS event type */
+#define RTAS_TYPE_RETRY			0x01
+#define RTAS_TYPE_TCE_ERR		0x02
+#define RTAS_TYPE_INTERN_DEV_FAIL	0x03
+#define RTAS_TYPE_TIMEOUT		0x04
+#define RTAS_TYPE_DATA_PARITY		0x05
+#define RTAS_TYPE_ADDR_PARITY		0x06
+#define RTAS_TYPE_CACHE_PARITY		0x07
+#define RTAS_TYPE_ADDR_INVALID		0x08
+#define RTAS_TYPE_ECC_UNCORR		0x09
+#define RTAS_TYPE_ECC_CORR		0x0a
+#define RTAS_TYPE_EPOW			0x40
+#define RTAS_TYPE_PLATFORM		0xE0
+#define RTAS_TYPE_IO			0xE1
+#define RTAS_TYPE_INFO			0xE2
+#define RTAS_TYPE_DEALLOC		0xE3
+#define RTAS_TYPE_DUMP			0xE4
+#define RTAS_TYPE_HOTPLUG		0xE5
+/* I don't add PowerMGM events right now, this is a different topic */ 
+#define RTAS_TYPE_PMGM_POWER_SW_ON	0x60
+#define RTAS_TYPE_PMGM_POWER_SW_OFF	0x61
+#define RTAS_TYPE_PMGM_LID_OPEN		0x62
+#define RTAS_TYPE_PMGM_LID_CLOSE	0x63
+#define RTAS_TYPE_PMGM_SLEEP_BTN	0x64
+#define RTAS_TYPE_PMGM_WAKE_BTN		0x65
+#define RTAS_TYPE_PMGM_BATTERY_WARN	0x66
+#define RTAS_TYPE_PMGM_BATTERY_CRIT	0x67
+#define RTAS_TYPE_PMGM_SWITCH_TO_BAT	0x68
+#define RTAS_TYPE_PMGM_SWITCH_TO_AC	0x69
+#define RTAS_TYPE_PMGM_KBD_OR_MOUSE	0x6a
+#define RTAS_TYPE_PMGM_ENCLOS_OPEN	0x6b
+#define RTAS_TYPE_PMGM_ENCLOS_CLOSED	0x6c
+#define RTAS_TYPE_PMGM_RING_INDICATE	0x6d
+#define RTAS_TYPE_PMGM_LAN_ATTENTION	0x6e
+#define RTAS_TYPE_PMGM_TIME_ALARM	0x6f
+#define RTAS_TYPE_PMGM_CONFIG_CHANGE	0x70
+#define RTAS_TYPE_PMGM_SERVICE_PROC	0x71
+/* Platform Resource Reassignment Notification */
+#define RTAS_TYPE_PRRN			0xA0
+
+/* RTAS check-exception vector offset */
+#define RTAS_VECTOR_EXTERNAL_INTERRUPT	0x500
+
+static inline uint8_t rtas_error_severity(const struct rtas_error_log *elog)
+{
+	return (elog->byte1 & 0xE0) >> 5;
+}
+
+static inline uint8_t rtas_error_disposition(const struct rtas_error_log *elog)
+{
+	return (elog->byte1 & 0x18) >> 3;
+}
+
+static inline
+void rtas_set_disposition_recovered(struct rtas_error_log *elog)
+{
+	elog->byte1 &= ~0x18;
+	elog->byte1 |= (RTAS_DISP_FULLY_RECOVERED << 3);
+}
+
+static inline uint8_t rtas_error_extended(const struct rtas_error_log *elog)
+{
+	return (elog->byte1 & 0x04) >> 2;
+}
+
+static inline uint8_t rtas_error_initiator(const struct rtas_error_log *elog)
+{
+	return (elog->byte2 & 0xf0) >> 4;
+}
+
+#define rtas_error_type(x)	((x)->byte3)
+
+static inline
+uint32_t rtas_error_extended_log_length(const struct rtas_error_log *elog)
+{
+	return be32_to_cpu(elog->extended_log_length);
+}
+
+#define RTAS_V6EXT_LOG_FORMAT_EVENT_LOG	14
+
+#define RTAS_V6EXT_COMPANY_ID_IBM	(('I' << 24) | ('B' << 16) | ('M' << 8))
+
+static
+inline uint8_t rtas_ext_event_log_format(struct rtas_ext_event_log_v6 *ext_log)
+{
+	return ext_log->byte2 & 0x0F;
+}
+
+static
+inline uint32_t rtas_ext_event_company_id(struct rtas_ext_event_log_v6 *ext_log)
+{
+	return be32_to_cpu(ext_log->company_id);
+}
+
+/* pSeries event log format */
+
+/* Two bytes ASCII section IDs */
+#define PSERIES_ELOG_SECT_ID_PRIV_HDR		(('P' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_USER_HDR		(('U' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_PRIMARY_SRC	(('P' << 8) | 'S')
+#define PSERIES_ELOG_SECT_ID_EXTENDED_UH	(('E' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_FAILING_MTMS	(('M' << 8) | 'T')
+#define PSERIES_ELOG_SECT_ID_SECONDARY_SRC	(('S' << 8) | 'S')
+#define PSERIES_ELOG_SECT_ID_DUMP_LOCATOR	(('D' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_FW_ERROR		(('S' << 8) | 'W')
+#define PSERIES_ELOG_SECT_ID_IMPACT_PART_ID	(('L' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_LOGIC_RESOURCE_ID	(('L' << 8) | 'R')
+#define PSERIES_ELOG_SECT_ID_HMC_ID		(('H' << 8) | 'M')
+#define PSERIES_ELOG_SECT_ID_EPOW		(('E' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_IO_EVENT		(('I' << 8) | 'E')
+#define PSERIES_ELOG_SECT_ID_MANUFACT_INFO	(('M' << 8) | 'I')
+#define PSERIES_ELOG_SECT_ID_CALL_HOME		(('C' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_USER_DEF		(('U' << 8) | 'D')
+#define PSERIES_ELOG_SECT_ID_HOTPLUG		(('H' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_MCE		(('M' << 8) | 'C')
+
+static
+inline uint16_t pseries_errorlog_id(struct pseries_errorlog *sect)
+{
+	return be16_to_cpu(sect->id);
+}
+
+static
+inline uint16_t pseries_errorlog_length(struct pseries_errorlog *sect)
+{
+	return be16_to_cpu(sect->length);
+}
+
+#define PSERIES_HP_ELOG_RESOURCE_CPU	1
+#define PSERIES_HP_ELOG_RESOURCE_MEM	2
+#define PSERIES_HP_ELOG_RESOURCE_SLOT	3
+#define PSERIES_HP_ELOG_RESOURCE_PHB	4
+#define PSERIES_HP_ELOG_RESOURCE_PMEM   6
+
+#define PSERIES_HP_ELOG_ACTION_ADD	1
+#define PSERIES_HP_ELOG_ACTION_REMOVE	2
+
+#define PSERIES_HP_ELOG_ID_DRC_NAME	1
+#define PSERIES_HP_ELOG_ID_DRC_INDEX	2
+#define PSERIES_HP_ELOG_ID_DRC_COUNT	3
+#define PSERIES_HP_ELOG_ID_DRC_IC	4
+
+struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
+					      uint16_t section_id);
+
+/*
+ * This can be set by the rtas_flash module so that it can get called
+ * as the absolutely last thing before the kernel terminates.
+ */
+extern void (*rtas_flash_term_hook)(int);
+
+extern struct rtas_t rtas;
+
+s32 rtas_function_token(const rtas_fn_handle_t handle);
+static inline bool rtas_function_implemented(const rtas_fn_handle_t handle)
+{
+	return rtas_function_token(handle) != RTAS_UNKNOWN_SERVICE;
+}
+extern int rtas_token(const char *service);
+extern int rtas_service_present(const char *service);
+extern int rtas_call(int token, int, int, int *, ...);
+void rtas_call_unlocked(struct rtas_args *args, int token, int nargs,
+			int nret, ...);
+extern void __noreturn rtas_restart(char *cmd);
+extern void rtas_power_off(void);
+extern void __noreturn rtas_halt(void);
+extern void rtas_os_term(char *str);
+void rtas_activate_firmware(void);
+extern int rtas_get_sensor(int sensor, int index, int *state);
+extern int rtas_get_sensor_fast(int sensor, int index, int *state);
+extern int rtas_get_power_level(int powerdomain, int *level);
+extern int rtas_set_power_level(int powerdomain, int level, int *setlevel);
+extern bool rtas_indicator_present(int token, int *maxindex);
+extern int rtas_set_indicator(int indicator, int index, int new_value);
+extern int rtas_set_indicator_fast(int indicator, int index, int new_value);
+extern void rtas_progress(char *s, unsigned short hex);
+int rtas_ibm_suspend_me(int *fw_status);
+int rtas_error_rc(int rtas_rc);
+
+struct rtc_time;
+extern time64_t rtas_get_boot_time(void);
+extern void rtas_get_rtc_time(struct rtc_time *rtc_time);
+extern int rtas_set_rtc_time(struct rtc_time *rtc_time);
+
+extern unsigned int rtas_busy_delay_time(int status);
+bool rtas_busy_delay(int status);
+
+extern int early_init_dt_scan_rtas(unsigned long node,
+		const char *uname, int depth, void *data);
+
+extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
+
+#ifdef CONFIG_PPC_PSERIES
+extern time64_t last_rtas_event;
+extern int clobbering_unread_rtas_event(void);
+extern void post_mobility_fixup(void);
+int rtas_syscall_dispatch_ibm_suspend_me(u64 handle);
+#else
+static inline int clobbering_unread_rtas_event(void) { return 0; }
+static inline int rtas_syscall_dispatch_ibm_suspend_me(u64 handle)
+{
+	return -EINVAL;
+}
+#endif
+
+#ifdef CONFIG_PPC_RTAS_DAEMON
+extern void rtas_cancel_event_scan(void);
+#else
+static inline void rtas_cancel_event_scan(void) { }
+#endif
+
+/* Error types logged.  */
+#define ERR_FLAG_ALREADY_LOGGED	0x0
+#define ERR_FLAG_BOOT		0x1 	/* log was pulled from NVRAM on boot */
+#define ERR_TYPE_RTAS_LOG	0x2	/* from rtas event-scan */
+#define ERR_TYPE_KERNEL_PANIC	0x4	/* from die()/panic() */
+#define ERR_TYPE_KERNEL_PANIC_GZ 0x8	/* ditto, compressed */
+
+/* All the types and not flags */
+#define ERR_TYPE_MASK \
+	(ERR_TYPE_RTAS_LOG | ERR_TYPE_KERNEL_PANIC | ERR_TYPE_KERNEL_PANIC_GZ)
+
+#define RTAS_DEBUG KERN_DEBUG "RTAS: "
+ 
+#define RTAS_ERROR_LOG_MAX 2048
+
+/*
+ * Return the firmware-specified size of the error log buffer
+ *  for all rtas calls that require an error buffer argument.
+ *  This includes 'check-exception' and 'rtas-last-error'.
+ */
+extern int rtas_get_error_log_max(void);
+
+/* Event Scan Parameters */
+#define EVENT_SCAN_ALL_EVENTS	0xf0000000
+#define SURVEILLANCE_TOKEN	9000
+#define LOG_NUMBER		64		/* must be a power of two */
+#define LOG_NUMBER_MASK		(LOG_NUMBER-1)
+
+/* Some RTAS ops require a data buffer and that buffer must be < 4G.
+ * Rather than having a memory allocator, just use this buffer
+ * (get the lock first), make the RTAS call.  Copy the data instead
+ * of holding the buffer for long.
+ */
+
+#define RTAS_DATA_BUF_SIZE 4096
+extern spinlock_t rtas_data_buf_lock;
+extern char rtas_data_buf[RTAS_DATA_BUF_SIZE];
+
+/* RMO buffer reserved for user-space RTAS use */
+extern unsigned long rtas_rmo_buf;
+
+#define GLOBAL_INTERRUPT_QUEUE 9005
+
+/**
+ * rtas_config_addr - Format a busno, devfn and reg for RTAS.
+ * @busno: The bus number.
+ * @devfn: The device and function number as encoded by PCI_DEVFN().
+ * @reg: The register number.
+ *
+ * This function encodes the given busno, devfn and register number as
+ * required for RTAS calls that take a "config_addr" parameter.
+ * See PAPR requirement 7.3.4-1 for more info.
+ */
+static inline u32 rtas_config_addr(int busno, int devfn, int reg)
+{
+	return ((reg & 0xf00) << 20) | ((busno & 0xff) << 16) |
+			(devfn << 8) | (reg & 0xff);
+}
+
+extern void rtas_give_timebase(void);
+extern void rtas_take_timebase(void);
+
+#ifdef CONFIG_PPC_RTAS
+static inline int page_is_rtas_user_buf(unsigned long pfn)
+{
+	unsigned long paddr = (pfn << PAGE_SHIFT);
+	if (paddr >= rtas_rmo_buf && paddr < (rtas_rmo_buf + RTAS_USER_REGION_SIZE))
+		return 1;
+	return 0;
+}
+
+/* Not the best place to put pSeries_coalesce_init, will be fixed when we
+ * move some of the rtas suspend-me stuff to pseries */
+extern void pSeries_coalesce_init(void);
+void rtas_initialize(void);
+#else
+static inline int page_is_rtas_user_buf(unsigned long pfn) { return 0;}
+static inline void pSeries_coalesce_init(void) { }
+static inline void rtas_initialize(void) { }
+#endif
+
+extern int call_rtas(const char *, int, int, unsigned long *, ...);
+
+#ifdef CONFIG_HV_PERF_CTRS
+void read_24x7_sys_info(void);
+#else
+static inline void read_24x7_sys_info(void) { }
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _POWERPC_RTAS_H */
diff --git a/arch/powerpc/include/asm/runlatch.h b/arch/powerpc/include/asm/runlatch.h
new file mode 100644
index 0000000000..ceb66d761f
--- /dev/null
+++ b/arch/powerpc/include/asm/runlatch.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ */
+#ifndef _ASM_POWERPC_RUNLATCH_H
+#define _ASM_POWERPC_RUNLATCH_H
+
+#ifdef CONFIG_PPC64
+
+extern void __ppc64_runlatch_on(void);
+extern void __ppc64_runlatch_off(void);
+
+/*
+ * We manually hard enable-disable, this is called
+ * in the idle loop and we don't want to mess up
+ * with soft-disable/enable & interrupt replay.
+ */
+#define ppc64_runlatch_off()					\
+	do {							\
+		if (cpu_has_feature(CPU_FTR_CTRL) &&		\
+		    test_thread_local_flags(_TLF_RUNLATCH)) {	\
+			__hard_irq_disable();			\
+			__ppc64_runlatch_off();			\
+			if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) \
+				__hard_irq_enable();		\
+		}      						\
+	} while (0)
+
+#define ppc64_runlatch_on()					\
+	do {							\
+		if (cpu_has_feature(CPU_FTR_CTRL) &&		\
+		    !test_thread_local_flags(_TLF_RUNLATCH)) {	\
+			__hard_irq_disable();			\
+			__ppc64_runlatch_on();			\
+			if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) \
+				__hard_irq_enable();		\
+		}      						\
+	} while (0)
+#else
+#define ppc64_runlatch_on()
+#define ppc64_runlatch_off()
+#endif /* CONFIG_PPC64 */
+
+#endif /* _ASM_POWERPC_RUNLATCH_H */
diff --git a/arch/powerpc/include/asm/seccomp.h b/arch/powerpc/include/asm/seccomp.h
new file mode 100644
index 0000000000..ac2033f134
--- /dev/null
+++ b/arch/powerpc/include/asm/seccomp.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SECCOMP_H
+#define _ASM_POWERPC_SECCOMP_H
+
+#include <linux/unistd.h>
+
+#define __NR_seccomp_sigreturn_32 __NR_sigreturn
+
+#include <asm-generic/seccomp.h>
+
+#ifdef __LITTLE_ENDIAN__
+#define __SECCOMP_ARCH_LE		__AUDIT_ARCH_LE
+#define __SECCOMP_ARCH_LE_NAME		"le"
+#else
+#define __SECCOMP_ARCH_LE		0
+#define __SECCOMP_ARCH_LE_NAME
+#endif
+
+#ifdef CONFIG_PPC64
+# define SECCOMP_ARCH_NATIVE		(AUDIT_ARCH_PPC64 | __SECCOMP_ARCH_LE)
+# define SECCOMP_ARCH_NATIVE_NR		NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME	"ppc64" __SECCOMP_ARCH_LE_NAME
+# ifdef CONFIG_COMPAT
+#  define SECCOMP_ARCH_COMPAT		(AUDIT_ARCH_PPC | __SECCOMP_ARCH_LE)
+#  define SECCOMP_ARCH_COMPAT_NR	NR_syscalls
+#  define SECCOMP_ARCH_COMPAT_NAME	"ppc" __SECCOMP_ARCH_LE_NAME
+# endif
+#else /* !CONFIG_PPC64 */
+# define SECCOMP_ARCH_NATIVE		(AUDIT_ARCH_PPC | __SECCOMP_ARCH_LE)
+# define SECCOMP_ARCH_NATIVE_NR		NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME	"ppc" __SECCOMP_ARCH_LE_NAME
+#endif
+
+#endif	/* _ASM_POWERPC_SECCOMP_H */
diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
new file mode 100644
index 0000000000..ea26665f82
--- /dev/null
+++ b/arch/powerpc/include/asm/sections.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SECTIONS_H
+#define _ASM_POWERPC_SECTIONS_H
+#ifdef __KERNEL__
+
+#include <linux/elf.h>
+#include <linux/uaccess.h>
+
+#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS
+typedef struct func_desc func_desc_t;
+#endif
+
+#include <asm-generic/sections.h>
+
+extern char __head_end[];
+extern char __srwx_boundary[];
+
+/* Patch sites */
+extern s32 patch__call_flush_branch_caches1;
+extern s32 patch__call_flush_branch_caches2;
+extern s32 patch__call_flush_branch_caches3;
+extern s32 patch__flush_count_cache_return;
+extern s32 patch__flush_link_stack_return;
+extern s32 patch__call_kvm_flush_link_stack;
+extern s32 patch__call_kvm_flush_link_stack_p9;
+extern s32 patch__memset_nocache, patch__memcpy_nocache;
+
+extern long flush_branch_caches;
+extern long kvm_flush_link_stack;
+
+#ifdef __powerpc64__
+
+extern char __start_interrupts[];
+extern char __end_interrupts[];
+
+#ifdef CONFIG_PPC_POWERNV
+extern char start_real_trampolines[];
+extern char end_real_trampolines[];
+extern char start_virt_trampolines[];
+extern char end_virt_trampolines[];
+#endif
+
+/*
+ * This assumes the kernel is never compiled -mcmodel=small or
+ * the total .toc is always less than 64k.
+ */
+static inline unsigned long kernel_toc_addr(void)
+{
+#ifdef CONFIG_PPC_KERNEL_PCREL
+	BUILD_BUG();
+	return -1UL;
+#else
+	unsigned long toc_ptr;
+
+	asm volatile("mr %0, 2" : "=r" (toc_ptr));
+	return toc_ptr;
+#endif
+}
+
+static inline int overlaps_interrupt_vector_text(unsigned long start,
+							unsigned long end)
+{
+	unsigned long real_start, real_end;
+	real_start = __start_interrupts - _stext;
+	real_end = __end_interrupts - _stext;
+
+	return start < (unsigned long)__va(real_end) &&
+		(unsigned long)__va(real_start) < end;
+}
+
+static inline int overlaps_kernel_text(unsigned long start, unsigned long end)
+{
+	return start < (unsigned long)__init_end &&
+		(unsigned long)_stext < end;
+}
+
+#else
+static inline unsigned long kernel_toc_addr(void) { BUILD_BUG(); return -1UL; }
+#endif
+
+#endif /* __KERNEL__ */
+#endif	/* _ASM_POWERPC_SECTIONS_H */
diff --git a/arch/powerpc/include/asm/secure_boot.h b/arch/powerpc/include/asm/secure_boot.h
new file mode 100644
index 0000000000..a2ff556916
--- /dev/null
+++ b/arch/powerpc/include/asm/secure_boot.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Secure boot definitions
+ *
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ */
+#ifndef _ASM_POWER_SECURE_BOOT_H
+#define _ASM_POWER_SECURE_BOOT_H
+
+#ifdef CONFIG_PPC_SECURE_BOOT
+
+bool is_ppc_secureboot_enabled(void);
+bool is_ppc_trustedboot_enabled(void);
+
+#else
+
+static inline bool is_ppc_secureboot_enabled(void)
+{
+	return false;
+}
+
+static inline bool is_ppc_trustedboot_enabled(void)
+{
+	return false;
+}
+
+#endif
+#endif
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
new file mode 100644
index 0000000000..27574f218b
--- /dev/null
+++ b/arch/powerpc/include/asm/security_features.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Security related feature bit definitions.
+ *
+ * Copyright 2018, Michael Ellerman, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_SECURITY_FEATURES_H
+#define _ASM_POWERPC_SECURITY_FEATURES_H
+
+
+extern u64 powerpc_security_features;
+extern bool rfi_flush;
+
+/* These are bit flags */
+enum stf_barrier_type {
+	STF_BARRIER_NONE	= 0x1,
+	STF_BARRIER_FALLBACK	= 0x2,
+	STF_BARRIER_EIEIO	= 0x4,
+	STF_BARRIER_SYNC_ORI	= 0x8,
+};
+
+void setup_stf_barrier(void);
+void do_stf_barrier_fixups(enum stf_barrier_type types);
+void setup_count_cache_flush(void);
+
+static inline void security_ftr_set(u64 feature)
+{
+	powerpc_security_features |= feature;
+}
+
+static inline void security_ftr_clear(u64 feature)
+{
+	powerpc_security_features &= ~feature;
+}
+
+static inline bool security_ftr_enabled(u64 feature)
+{
+	return !!(powerpc_security_features & feature);
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+enum stf_barrier_type stf_barrier_type_get(void);
+#else
+static inline enum stf_barrier_type stf_barrier_type_get(void) { return STF_BARRIER_NONE; }
+#endif
+
+// Features indicating support for Spectre/Meltdown mitigations
+
+// The L1-D cache can be flushed with ori r30,r30,0
+#define SEC_FTR_L1D_FLUSH_ORI30		0x0000000000000001ull
+
+// The L1-D cache can be flushed with mtspr 882,r0 (aka SPRN_TRIG2)
+#define SEC_FTR_L1D_FLUSH_TRIG2		0x0000000000000002ull
+
+// ori r31,r31,0 acts as a speculation barrier
+#define SEC_FTR_SPEC_BAR_ORI31		0x0000000000000004ull
+
+// Speculation past bctr is disabled
+#define SEC_FTR_BCCTRL_SERIALISED	0x0000000000000008ull
+
+// Entries in L1-D are private to a SMT thread
+#define SEC_FTR_L1D_THREAD_PRIV		0x0000000000000010ull
+
+// Indirect branch prediction cache disabled
+#define SEC_FTR_COUNT_CACHE_DISABLED	0x0000000000000020ull
+
+// bcctr 2,0,0 triggers a hardware assisted count cache flush
+#define SEC_FTR_BCCTR_FLUSH_ASSIST	0x0000000000000800ull
+
+// bcctr 2,0,0 triggers a hardware assisted link stack flush
+#define SEC_FTR_BCCTR_LINK_FLUSH_ASSIST	0x0000000000002000ull
+
+// Features indicating need for Spectre/Meltdown mitigations
+
+// The L1-D cache should be flushed on MSR[HV] 1->0 transition (hypervisor to guest)
+#define SEC_FTR_L1D_FLUSH_HV		0x0000000000000040ull
+
+// The L1-D cache should be flushed on MSR[PR] 0->1 transition (kernel to userspace)
+#define SEC_FTR_L1D_FLUSH_PR		0x0000000000000080ull
+
+// A speculation barrier should be used for bounds checks (Spectre variant 1)
+#define SEC_FTR_BNDS_CHK_SPEC_BAR	0x0000000000000100ull
+
+// Firmware configuration indicates user favours security over performance
+#define SEC_FTR_FAVOUR_SECURITY		0x0000000000000200ull
+
+// Software required to flush count cache on context switch
+#define SEC_FTR_FLUSH_COUNT_CACHE	0x0000000000000400ull
+
+// Software required to flush link stack on context switch
+#define SEC_FTR_FLUSH_LINK_STACK	0x0000000000001000ull
+
+// The L1-D cache should be flushed when entering the kernel
+#define SEC_FTR_L1D_FLUSH_ENTRY		0x0000000000004000ull
+
+// The L1-D cache should be flushed after user accesses from the kernel
+#define SEC_FTR_L1D_FLUSH_UACCESS	0x0000000000008000ull
+
+// The STF flush should be executed on privilege state switch
+#define SEC_FTR_STF_BARRIER		0x0000000000010000ull
+
+// Features enabled by default
+#define SEC_FTR_DEFAULT \
+	(SEC_FTR_L1D_FLUSH_HV | \
+	 SEC_FTR_L1D_FLUSH_PR | \
+	 SEC_FTR_BNDS_CHK_SPEC_BAR | \
+	 SEC_FTR_L1D_FLUSH_ENTRY | \
+	 SEC_FTR_L1D_FLUSH_UACCESS | \
+	 SEC_FTR_STF_BARRIER | \
+	 SEC_FTR_FAVOUR_SECURITY)
+
+#endif /* _ASM_POWERPC_SECURITY_FEATURES_H */
diff --git a/arch/powerpc/include/asm/secvar.h b/arch/powerpc/include/asm/secvar.h
new file mode 100644
index 0000000000..4828e0ab7e
--- /dev/null
+++ b/arch/powerpc/include/asm/secvar.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ *
+ * PowerPC secure variable operations.
+ */
+#ifndef SECVAR_OPS_H
+#define SECVAR_OPS_H
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/sysfs.h>
+
+extern const struct secvar_operations *secvar_ops;
+
+struct secvar_operations {
+	int (*get)(const char *key, u64 key_len, u8 *data, u64 *data_size);
+	int (*get_next)(const char *key, u64 *key_len, u64 keybufsize);
+	int (*set)(const char *key, u64 key_len, u8 *data, u64 data_size);
+	ssize_t (*format)(char *buf, size_t bufsize);
+	int (*max_size)(u64 *max_size);
+	const struct attribute **config_attrs;
+
+	// NULL-terminated array of fixed variable names
+	// Only used if get_next() isn't provided
+	const char * const *var_names;
+};
+
+#ifdef CONFIG_PPC_SECURE_BOOT
+
+int set_secvar_ops(const struct secvar_operations *ops);
+
+#else
+
+static inline int set_secvar_ops(const struct secvar_operations *ops) { return 0; }
+
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/serial.h b/arch/powerpc/include/asm/serial.h
new file mode 100644
index 0000000000..cd6c18d0e6
--- /dev/null
+++ b/arch/powerpc/include/asm/serial.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ */
+#ifndef _ASM_POWERPC_SERIAL_H
+#define _ASM_POWERPC_SERIAL_H
+
+/*
+ * Serial ports are not listed here, because they are discovered
+ * through the device tree.
+ */
+
+/* Default baud base if not found in device-tree */
+#define BASE_BAUD ( 1843200 / 16 )
+
+#ifdef CONFIG_PPC_UDBG_16550
+extern void find_legacy_serial_ports(void);
+#else
+#define find_legacy_serial_ports()	do { } while (0)
+#endif
+
+#endif /* _PPC64_SERIAL_H */
diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h
new file mode 100644
index 0000000000..7ebc807aa8
--- /dev/null
+++ b/arch/powerpc/include/asm/set_memory.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SET_MEMORY_H
+#define _ASM_POWERPC_SET_MEMORY_H
+
+#define SET_MEMORY_RO	0
+#define SET_MEMORY_RW	1
+#define SET_MEMORY_NX	2
+#define SET_MEMORY_X	3
+#define SET_MEMORY_NP	4	/* Set memory non present */
+#define SET_MEMORY_P	5	/* Set memory present */
+
+int change_memory_attr(unsigned long addr, int numpages, long action);
+
+static inline int set_memory_ro(unsigned long addr, int numpages)
+{
+	return change_memory_attr(addr, numpages, SET_MEMORY_RO);
+}
+
+static inline int set_memory_rw(unsigned long addr, int numpages)
+{
+	return change_memory_attr(addr, numpages, SET_MEMORY_RW);
+}
+
+static inline int set_memory_nx(unsigned long addr, int numpages)
+{
+	return change_memory_attr(addr, numpages, SET_MEMORY_NX);
+}
+
+static inline int set_memory_x(unsigned long addr, int numpages)
+{
+	return change_memory_attr(addr, numpages, SET_MEMORY_X);
+}
+
+static inline int set_memory_np(unsigned long addr, int numpages)
+{
+	return change_memory_attr(addr, numpages, SET_MEMORY_NP);
+}
+
+static inline int set_memory_p(unsigned long addr, int numpages)
+{
+	return change_memory_attr(addr, numpages, SET_MEMORY_P);
+}
+
+#endif
diff --git a/arch/powerpc/include/asm/setjmp.h b/arch/powerpc/include/asm/setjmp.h
new file mode 100644
index 0000000000..f798e80e41
--- /dev/null
+++ b/arch/powerpc/include/asm/setjmp.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright © 2008 Michael Neuling IBM Corporation
+ */
+#ifndef _ASM_POWERPC_SETJMP_H
+#define _ASM_POWERPC_SETJMP_H
+
+#define JMP_BUF_LEN    23
+
+typedef long jmp_buf[JMP_BUF_LEN];
+
+extern int setjmp(jmp_buf env) __attribute__((returns_twice));
+extern void longjmp(jmp_buf env, int val) __attribute__((noreturn));
+
+#endif /* _ASM_POWERPC_SETJMP_H */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
new file mode 100644
index 0000000000..eed74c1fb8
--- /dev/null
+++ b/arch/powerpc/include/asm/setup.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SETUP_H
+#define _ASM_POWERPC_SETUP_H
+
+#include <uapi/asm/setup.h>
+
+#ifndef __ASSEMBLY__
+extern void ppc_printk_progress(char *s, unsigned short hex);
+
+extern unsigned long long memory_limit;
+
+struct device_node;
+
+/* Used in very early kernel initialization. */
+extern unsigned long reloc_offset(void);
+extern unsigned long add_reloc_offset(unsigned long);
+extern void reloc_got2(unsigned long);
+
+#define PTRRELOC(x)	((typeof(x)) add_reloc_offset((unsigned long)(x)))
+
+void check_for_initrd(void);
+void mem_topology_setup(void);
+void initmem_init(void);
+void setup_panic(void);
+#define ARCH_PANIC_TIMEOUT 180
+
+#ifdef CONFIG_PPC_PSERIES
+extern bool pseries_reloc_on_exception(void);
+extern bool pseries_enable_reloc_on_exc(void);
+extern void pseries_disable_reloc_on_exc(void);
+extern void pseries_big_endian_exceptions(void);
+void __init pseries_little_endian_exceptions(void);
+#else
+static inline bool pseries_reloc_on_exception(void) { return false; }
+static inline bool pseries_enable_reloc_on_exc(void) { return false; }
+static inline void pseries_disable_reloc_on_exc(void) {}
+static inline void pseries_big_endian_exceptions(void) {}
+static inline void pseries_little_endian_exceptions(void) {}
+#endif /* CONFIG_PPC_PSERIES */
+
+void rfi_flush_enable(bool enable);
+
+/* These are bit flags */
+enum l1d_flush_type {
+	L1D_FLUSH_NONE		= 0x1,
+	L1D_FLUSH_FALLBACK	= 0x2,
+	L1D_FLUSH_ORI		= 0x4,
+	L1D_FLUSH_MTTRIG	= 0x8,
+};
+
+void setup_rfi_flush(enum l1d_flush_type, bool enable);
+void setup_entry_flush(bool enable);
+void setup_uaccess_flush(bool enable);
+void do_rfi_flush_fixups(enum l1d_flush_type types);
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+void __init setup_barrier_nospec(void);
+#else
+static inline void setup_barrier_nospec(void) { }
+#endif
+void do_uaccess_flush_fixups(enum l1d_flush_type types);
+void do_entry_flush_fixups(enum l1d_flush_type types);
+void do_barrier_nospec_fixups(bool enable);
+extern bool barrier_nospec_enabled;
+
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+void do_barrier_nospec_fixups_range(bool enable, void *start, void *end);
+#else
+static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { }
+#endif
+
+#ifdef CONFIG_PPC_E500
+void __init setup_spectre_v2(void);
+#else
+static inline void setup_spectre_v2(void) {}
+#endif
+void __init do_btb_flush_fixups(void);
+
+#ifdef CONFIG_PPC32
+unsigned long __init early_init(unsigned long dt_ptr);
+void __init machine_init(u64 dt_ptr);
+#endif
+void __init early_setup(unsigned long dt_ptr);
+void early_setup_secondary(void);
+
+/* prom_init (OpenFirmware) */
+unsigned long __init prom_init(unsigned long r3, unsigned long r4,
+			       unsigned long pp, unsigned long r6,
+			       unsigned long r7, unsigned long kbase);
+
+extern struct seq_buf ppc_hw_desc;
+
+#endif /* !__ASSEMBLY__ */
+
+#endif	/* _ASM_POWERPC_SETUP_H */
+
diff --git a/arch/powerpc/include/asm/sfp-machine.h b/arch/powerpc/include/asm/sfp-machine.h
new file mode 100644
index 0000000000..8b957aabb8
--- /dev/null
+++ b/arch/powerpc/include/asm/sfp-machine.h
@@ -0,0 +1,343 @@
+/* Machine-dependent software floating-point definitions.  PPC version.
+   Copyright (C) 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, write to the Free Software Foundation, Inc.,
+   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+   Actually, this is a PPC (32bit) version, written based on the
+   i386, sparc, and sparc64 versions, by me,
+   Peter Maydell (pmaydell@chiark.greenend.org.uk).
+   Comments are by and large also mine, although they may be inaccurate.
+
+   In picking out asm fragments I've gone with the lowest common
+   denominator, which also happens to be the hardware I have :->
+   That is, a SPARC without hardware multiply and divide.
+ */
+
+/* basic word size definitions */
+#define _FP_W_TYPE_SIZE		32
+#define _FP_W_TYPE		unsigned int
+#define _FP_WS_TYPE		signed int
+#define _FP_I_TYPE		int
+
+#define __ll_B			((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t)		((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t)	((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+/* You can optionally code some things like addition in asm. For
+ * example, i386 defines __FP_FRAC_ADD_2 as asm. If you don't
+ * then you get a fragment of C code [if you change an #ifdef 0
+ * in op-2.h] or a call to add_ssaaaa (see below).
+ * Good places to look for asm fragments to use are gcc and glibc.
+ * gcc's longlong.h is useful.
+ */
+
+/* We need to know how to multiply and divide. If the host word size
+ * is >= 2*fracbits you can use FP_MUL_MEAT_n_imm(t,R,X,Y) which
+ * codes the multiply with whatever gcc does to 'a * b'.
+ * _FP_MUL_MEAT_n_wide(t,R,X,Y,f) is used when you have an asm
+ * function that can multiply two 1W values and get a 2W result.
+ * Otherwise you're stuck with _FP_MUL_MEAT_n_hard(t,R,X,Y) which
+ * does bitshifting to avoid overflow.
+ * For division there is FP_DIV_MEAT_n_imm(t,R,X,Y,f) for word size
+ * >= 2*fracbits, where f is either _FP_DIV_HELP_imm or
+ * _FP_DIV_HELP_ldiv (see op-1.h).
+ * _FP_DIV_MEAT_udiv() is if you have asm to do 2W/1W => (1W, 1W).
+ * [GCC and glibc have longlong.h which has the asm macro udiv_qrnnd
+ * to do this.]
+ * In general, 'n' is the number of words required to hold the type,
+ * and 't' is either S, D or Q for single/double/quad.
+ *           -- PMM
+ */
+/* Example: SPARC64:
+ * #define _FP_MUL_MEAT_S(R,X,Y)	_FP_MUL_MEAT_1_imm(S,R,X,Y)
+ * #define _FP_MUL_MEAT_D(R,X,Y)	_FP_MUL_MEAT_1_wide(D,R,X,Y,umul_ppmm)
+ * #define _FP_MUL_MEAT_Q(R,X,Y)	_FP_MUL_MEAT_2_wide(Q,R,X,Y,umul_ppmm)
+ *
+ * #define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm)
+ * #define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_1_udiv(D,R,X,Y)
+ * #define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_2_udiv_64(Q,R,X,Y)
+ *
+ * Example: i386:
+ * #define _FP_MUL_MEAT_S(R,X,Y)   _FP_MUL_MEAT_1_wide(S,R,X,Y,_i386_mul_32_64)
+ * #define _FP_MUL_MEAT_D(R,X,Y)   _FP_MUL_MEAT_2_wide(D,R,X,Y,_i386_mul_32_64)
+ *
+ * #define _FP_DIV_MEAT_S(R,X,Y)   _FP_DIV_MEAT_1_udiv(S,R,X,Y,_i386_div_64_32)
+ * #define _FP_DIV_MEAT_D(R,X,Y)   _FP_DIV_MEAT_2_udiv_64(D,R,X,Y)
+ */
+
+#define _FP_MUL_MEAT_S(R,X,Y)   _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y)   _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_udiv_norm(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
+
+/* These macros define what NaN looks like. They're supposed to expand to
+ * a comma-separated set of 32bit unsigned ints that encode NaN.
+ */
+#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_S		0
+#define _FP_NANSIGN_D		0
+#define _FP_NANSIGN_Q		0
+
+#define _FP_KEEPNANFRACP 1
+
+#ifdef FP_EX_BOOKE_E500_SPE
+#define FP_EX_INEXACT		(1 << 21)
+#define FP_EX_INVALID		(1 << 20)
+#define FP_EX_DIVZERO		(1 << 19)
+#define FP_EX_UNDERFLOW		(1 << 18)
+#define FP_EX_OVERFLOW		(1 << 17)
+#define FP_INHIBIT_RESULTS	0
+
+#define __FPU_FPSCR	(current->thread.spefscr)
+#define __FPU_ENABLED_EXC		\
+({					\
+	(__FPU_FPSCR >> 2) & 0x1f;	\
+})
+#else
+/* Exception flags.  We use the bit positions of the appropriate bits
+   in the FPSCR, which also correspond to the FE_* bits.  This makes
+   everything easier ;-).  */
+#define FP_EX_INVALID         (1 << (31 - 2))
+#define FP_EX_INVALID_SNAN	EFLAG_VXSNAN
+#define FP_EX_INVALID_ISI	EFLAG_VXISI
+#define FP_EX_INVALID_IDI	EFLAG_VXIDI
+#define FP_EX_INVALID_ZDZ	EFLAG_VXZDZ
+#define FP_EX_INVALID_IMZ	EFLAG_VXIMZ
+#define FP_EX_OVERFLOW        (1 << (31 - 3))
+#define FP_EX_UNDERFLOW       (1 << (31 - 4))
+#define FP_EX_DIVZERO         (1 << (31 - 5))
+#define FP_EX_INEXACT         (1 << (31 - 6))
+
+#define __FPU_FPSCR	(current->thread.fp_state.fpscr)
+
+/* We only actually write to the destination register
+ * if exceptions signalled (if any) will not trap.
+ */
+#define __FPU_ENABLED_EXC \
+({						\
+	(__FPU_FPSCR >> 3) & 0x1f;	\
+})
+
+#endif
+
+/*
+ * If one NaN is signaling and the other is not,
+ * we choose that one, otherwise we choose X.
+ */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+  do {								\
+    if ((_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)		\
+	&& !(_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs))	\
+      {								\
+	R##_s = X##_s;						\
+	_FP_FRAC_COPY_##wc(R,X);				\
+      }								\
+    else							\
+      {								\
+	R##_s = Y##_s;						\
+	_FP_FRAC_COPY_##wc(R,Y);				\
+      }								\
+    R##_c = FP_CLS_NAN;						\
+  } while (0)
+
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#define __FPU_TRAP_P(bits) \
+	((__FPU_ENABLED_EXC & (bits)) != 0)
+
+#define __FP_PACK_S(val,X)			\
+({  int __exc = _FP_PACK_CANONICAL(S,1,X);	\
+    if(!__exc || !__FPU_TRAP_P(__exc))		\
+        _FP_PACK_RAW_1_P(S,val,X);		\
+    __exc;					\
+})
+
+#define __FP_PACK_D(val,X)			\
+   do {									\
+	_FP_PACK_CANONICAL(D, 2, X);					\
+	if (!FP_CUR_EXCEPTIONS || !__FPU_TRAP_P(FP_CUR_EXCEPTIONS))	\
+		_FP_PACK_RAW_2_P(D, val, X);				\
+   } while (0)
+
+#define __FP_PACK_DS(val,X)							\
+   do {										\
+	   FP_DECL_S(__X);							\
+	   FP_CONV(S, D, 1, 2, __X, X);						\
+	   _FP_PACK_CANONICAL(S, 1, __X);					\
+	   if (!FP_CUR_EXCEPTIONS || !__FPU_TRAP_P(FP_CUR_EXCEPTIONS)) {	\
+		   _FP_UNPACK_CANONICAL(S, 1, __X);				\
+		   FP_CONV(D, S, 2, 1, X, __X);					\
+		   _FP_PACK_CANONICAL(D, 2, X);					\
+		   if (!FP_CUR_EXCEPTIONS || !__FPU_TRAP_P(FP_CUR_EXCEPTIONS))	\
+		   _FP_PACK_RAW_2_P(D, val, X);					\
+	   }									\
+   } while (0)
+
+/* Obtain the current rounding mode. */
+#define FP_ROUNDMODE			\
+({					\
+	__FPU_FPSCR & 0x3;		\
+})
+
+/* the asm fragments go here: all these are taken from glibc-2.0.5's
+ * stdlib/longlong.h
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+/* add_ssaaaa is used in op-2.h and should be equivalent to
+ * #define add_ssaaaa(sh,sl,ah,al,bh,bl) (sh = ah+bh+ (( sl = al+bl) < al))
+ * add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+ * high_addend_2, low_addend_2) adds two UWtype integers, composed by
+ * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
+ * respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
+ * (i.e. carry out) is not stored anywhere, and is lost.
+ */
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (bh) && (bh) == 0)				\
+      __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"		\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
+      __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"		\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+    else								\
+      __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"		\
+	     : "=r" (sh), "=&r" (sl)					\
+	     : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));		\
+  } while (0)
+
+/* sub_ddmmss is used in op-2.h and udivmodti4.c and should be equivalent to
+ * #define sub_ddmmss(sh, sl, ah, al, bh, bl) (sh = ah-bh - ((sl = al-bl) > al))
+ * sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
+ * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
+ * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
+ * LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
+ * and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
+ * and is lost.
+ */
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (ah) && (ah) == 0)				\
+      __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"	\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)		\
+      __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"	\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == 0)			\
+      __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"		\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
+      __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"		\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+    else								\
+      __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"	\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "r" (bh), "rI" (al), "r" (bl));		\
+  } while (0)
+
+/* asm fragments for mul and div */
+
+/* umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
+ * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
+ * word product in HIGH_PROD and LOW_PROD.
+ */
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    USItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+
+/* udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ * denominator) divides a UDWtype, composed by the UWtype integers
+ * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+ * in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
+ * than DENOMINATOR for correct operation.  If, in addition, the most
+ * significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+ * UDIV_NEEDS_NORMALIZATION is defined to 1.
+ */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do {									\
+    UWtype __d1, __d0, __q1, __q0;					\
+    UWtype __r1, __r0, __m;						\
+    __d1 = __ll_highpart (d);						\
+    __d0 = __ll_lowpart (d);						\
+									\
+    __r1 = (n1) % __d1;							\
+    __q1 = (n1) / __d1;							\
+    __m = (UWtype) __q1 * __d0;						\
+    __r1 = __r1 * __ll_B | __ll_highpart (n0);				\
+    if (__r1 < __m)							\
+      {									\
+	__q1--, __r1 += (d);						\
+	if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
+	  if (__r1 < __m)						\
+	    __q1--, __r1 += (d);					\
+      }									\
+    __r1 -= __m;							\
+									\
+    __r0 = __r1 % __d1;							\
+    __q0 = __r1 / __d1;							\
+    __m = (UWtype) __q0 * __d0;						\
+    __r0 = __r0 * __ll_B | __ll_lowpart (n0);				\
+    if (__r0 < __m)							\
+      {									\
+	__q0--, __r0 += (d);						\
+	if (__r0 >= (d))						\
+	  if (__r0 < __m)						\
+	    __q0--, __r0 += (d);					\
+      }									\
+    __r0 -= __m;							\
+									\
+    (q) = (UWtype) __q1 * __ll_B | __q0;				\
+    (r) = __r0;								\
+  } while (0)
+
+#define UDIV_NEEDS_NORMALIZATION 1
+
+#define abort()								\
+	return 0
+
+#ifdef __BIG_ENDIAN
+#define __BYTE_ORDER __BIG_ENDIAN
+#else
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#endif
+
+/* Exception flags. */
+#define EFLAG_INVALID		(1 << (31 - 2))
+#define EFLAG_OVERFLOW		(1 << (31 - 3))
+#define EFLAG_UNDERFLOW		(1 << (31 - 4))
+#define EFLAG_DIVZERO		(1 << (31 - 5))
+#define EFLAG_INEXACT		(1 << (31 - 6))
+
+#define EFLAG_VXSNAN		(1 << (31 - 7))
+#define EFLAG_VXISI		(1 << (31 - 8))
+#define EFLAG_VXIDI		(1 << (31 - 9))
+#define EFLAG_VXZDZ		(1 << (31 - 10))
+#define EFLAG_VXIMZ		(1 << (31 - 11))
+#define EFLAG_VXVC		(1 << (31 - 12))
+#define EFLAG_VXSOFT		(1 << (31 - 21))
+#define EFLAG_VXSQRT		(1 << (31 - 22))
+#define EFLAG_VXCVI		(1 << (31 - 23))
diff --git a/arch/powerpc/include/asm/shmparam.h b/arch/powerpc/include/asm/shmparam.h
new file mode 100644
index 0000000000..bc09688395
--- /dev/null
+++ b/arch/powerpc/include/asm/shmparam.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SHMPARAM_H
+#define _ASM_POWERPC_SHMPARAM_H
+
+#define	SHMLBA PAGE_SIZE		 /* attach addr a multiple of this */
+
+#endif	/* _ASM_POWERPC_SHMPARAM_H */
diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h
new file mode 100644
index 0000000000..922d43700f
--- /dev/null
+++ b/arch/powerpc/include/asm/signal.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SIGNAL_H
+#define _ASM_POWERPC_SIGNAL_H
+
+#define __ARCH_HAS_SA_RESTORER
+#include <uapi/asm/signal.h>
+#include <uapi/asm/ptrace.h>
+
+struct pt_regs;
+void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
+
+unsigned long get_min_sigframe_size_32(void);
+unsigned long get_min_sigframe_size_64(void);
+unsigned long get_min_sigframe_size(void);
+unsigned long get_min_sigframe_size_compat(void);
+
+#endif /* _ASM_POWERPC_SIGNAL_H */
diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h
new file mode 100644
index 0000000000..4dd12dcb9e
--- /dev/null
+++ b/arch/powerpc/include/asm/simple_spinlock.h
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_H
+#define _ASM_POWERPC_SIMPLE_SPINLOCK_H
+
+/*
+ * Simple spin lock operations.
+ *
+ * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
+ *	Rework to support virtual processors
+ *
+ * Type of int is used as a full 64b word is not necessary.
+ *
+ * (the type definitions are in asm/simple_spinlock_types.h)
+ */
+#include <linux/irqflags.h>
+#include <linux/kcsan-checks.h>
+#include <asm/paravirt.h>
+#include <asm/paca.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+
+#ifdef CONFIG_PPC64
+/* use 0x800000yy when locked, where yy == CPU number */
+#ifdef __BIG_ENDIAN__
+#define LOCK_TOKEN	(*(u32 *)(&get_paca()->lock_token))
+#else
+#define LOCK_TOKEN	(*(u32 *)(&get_paca()->paca_index))
+#endif
+#else
+#define LOCK_TOKEN	1
+#endif
+
+static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+	return lock.slock == 0;
+}
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	return !arch_spin_value_unlocked(READ_ONCE(*lock));
+}
+
+/*
+ * This returns the old value in the lock, so we succeeded
+ * in getting the lock if the return value is 0.
+ */
+static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
+{
+	unsigned long tmp, token;
+	unsigned int eh = IS_ENABLED(CONFIG_PPC64);
+
+	token = LOCK_TOKEN;
+	__asm__ __volatile__(
+"1:	lwarx		%0,0,%2,%[eh]\n\
+	cmpwi		0,%0,0\n\
+	bne-		2f\n\
+	stwcx.		%1,0,%2\n\
+	bne-		1b\n"
+	PPC_ACQUIRE_BARRIER
+"2:"
+	: "=&r" (tmp)
+	: "r" (token), "r" (&lock->slock), [eh] "n" (eh)
+	: "cr0", "memory");
+
+	return tmp;
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	return __arch_spin_trylock(lock) == 0;
+}
+
+/*
+ * On a system with shared processors (that is, where a physical
+ * processor is multiplexed between several virtual processors),
+ * there is no point spinning on a lock if the holder of the lock
+ * isn't currently scheduled on a physical processor.  Instead
+ * we detect this situation and ask the hypervisor to give the
+ * rest of our timeslice to the lock holder.
+ *
+ * So that we can tell which virtual processor is holding a lock,
+ * we put 0x80000000 | smp_processor_id() in the lock when it is
+ * held.  Conveniently, we have a word in the paca that holds this
+ * value.
+ */
+
+#if defined(CONFIG_PPC_SPLPAR)
+/* We only yield to the hypervisor if we are in shared processor mode */
+void splpar_spin_yield(arch_spinlock_t *lock);
+void splpar_rw_yield(arch_rwlock_t *lock);
+#else /* SPLPAR */
+static inline void splpar_spin_yield(arch_spinlock_t *lock) {}
+static inline void splpar_rw_yield(arch_rwlock_t *lock) {}
+#endif
+
+static inline void spin_yield(arch_spinlock_t *lock)
+{
+	if (is_shared_processor())
+		splpar_spin_yield(lock);
+	else
+		barrier();
+}
+
+static inline void rw_yield(arch_rwlock_t *lock)
+{
+	if (is_shared_processor())
+		splpar_rw_yield(lock);
+	else
+		barrier();
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	while (1) {
+		if (likely(__arch_spin_trylock(lock) == 0))
+			break;
+		do {
+			HMT_low();
+			if (is_shared_processor())
+				splpar_spin_yield(lock);
+		} while (unlikely(lock->slock != 0));
+		HMT_medium();
+	}
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	kcsan_mb();
+	__asm__ __volatile__("# arch_spin_unlock\n\t"
+				PPC_RELEASE_BARRIER: : :"memory");
+	lock->slock = 0;
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+
+#ifdef CONFIG_PPC64
+#define __DO_SIGN_EXTEND	"extsw	%0,%0\n"
+#define WRLOCK_TOKEN		LOCK_TOKEN	/* it's negative */
+#else
+#define __DO_SIGN_EXTEND
+#define WRLOCK_TOKEN		(-1)
+#endif
+
+/*
+ * This returns the old value in the lock + 1,
+ * so we got a read lock if the return value is > 0.
+ */
+static inline long __arch_read_trylock(arch_rwlock_t *rw)
+{
+	long tmp;
+	unsigned int eh = IS_ENABLED(CONFIG_PPC64);
+
+	__asm__ __volatile__(
+"1:	lwarx		%0,0,%1,%[eh]\n"
+	__DO_SIGN_EXTEND
+"	addic.		%0,%0,1\n\
+	ble-		2f\n"
+"	stwcx.		%0,0,%1\n\
+	bne-		1b\n"
+	PPC_ACQUIRE_BARRIER
+"2:"	: "=&r" (tmp)
+	: "r" (&rw->lock), [eh] "n" (eh)
+	: "cr0", "xer", "memory");
+
+	return tmp;
+}
+
+/*
+ * This returns the old value in the lock,
+ * so we got the write lock if the return value is 0.
+ */
+static inline long __arch_write_trylock(arch_rwlock_t *rw)
+{
+	long tmp, token;
+	unsigned int eh = IS_ENABLED(CONFIG_PPC64);
+
+	token = WRLOCK_TOKEN;
+	__asm__ __volatile__(
+"1:	lwarx		%0,0,%2,%[eh]\n\
+	cmpwi		0,%0,0\n\
+	bne-		2f\n"
+"	stwcx.		%1,0,%2\n\
+	bne-		1b\n"
+	PPC_ACQUIRE_BARRIER
+"2:"	: "=&r" (tmp)
+	: "r" (token), "r" (&rw->lock), [eh] "n" (eh)
+	: "cr0", "memory");
+
+	return tmp;
+}
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	while (1) {
+		if (likely(__arch_read_trylock(rw) > 0))
+			break;
+		do {
+			HMT_low();
+			if (is_shared_processor())
+				splpar_rw_yield(rw);
+		} while (unlikely(rw->lock < 0));
+		HMT_medium();
+	}
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	while (1) {
+		if (likely(__arch_write_trylock(rw) == 0))
+			break;
+		do {
+			HMT_low();
+			if (is_shared_processor())
+				splpar_rw_yield(rw);
+		} while (unlikely(rw->lock != 0));
+		HMT_medium();
+	}
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	return __arch_read_trylock(rw) > 0;
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	return __arch_write_trylock(rw) == 0;
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	long tmp;
+
+	__asm__ __volatile__(
+	"# read_unlock\n\t"
+	PPC_RELEASE_BARRIER
+"1:	lwarx		%0,0,%1\n\
+	addic		%0,%0,-1\n"
+"	stwcx.		%0,0,%1\n\
+	bne-		1b"
+	: "=&r"(tmp)
+	: "r"(&rw->lock)
+	: "cr0", "xer", "memory");
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	__asm__ __volatile__("# write_unlock\n\t"
+				PPC_RELEASE_BARRIER: : :"memory");
+	rw->lock = 0;
+}
+
+#define arch_spin_relax(lock)	spin_yield(lock)
+#define arch_read_relax(lock)	rw_yield(lock)
+#define arch_write_relax(lock)	rw_yield(lock)
+
+#endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_H */
diff --git a/arch/powerpc/include/asm/simple_spinlock_types.h b/arch/powerpc/include/asm/simple_spinlock_types.h
new file mode 100644
index 0000000000..0824333806
--- /dev/null
+++ b/arch/powerpc/include/asm/simple_spinlock_types.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
+#define _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
+# error "please don't include this file directly"
+#endif
+
+typedef struct {
+	volatile unsigned int slock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+
+typedef struct {
+	volatile signed int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
+
+#endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
new file mode 100644
index 0000000000..aaaa576d0e
--- /dev/null
+++ b/arch/powerpc/include/asm/smp.h
@@ -0,0 +1,271 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* 
+ * smp.h: PowerPC-specific SMP code.
+ *
+ * Original was a copy of sparc smp.h.  Now heavily modified
+ * for PPC.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996-2001 Cort Dougan <cort@fsmlabs.com>
+ */
+
+#ifndef _ASM_POWERPC_SMP_H
+#define _ASM_POWERPC_SMP_H
+#ifdef __KERNEL__
+
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/kernel.h>
+#include <linux/irqreturn.h>
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+#endif
+#include <asm/percpu.h>
+
+extern int boot_cpuid;
+extern int boot_cpu_hwid; /* PPC64 only */
+extern int spinning_secondaries;
+extern u32 *cpu_to_phys_id;
+extern bool coregroup_enabled;
+
+extern int cpu_to_chip_id(int cpu);
+extern int *chip_id_lookup_table;
+
+DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
+DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+DECLARE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
+
+#ifdef CONFIG_SMP
+
+struct smp_ops_t {
+	void  (*message_pass)(int cpu, int msg);
+#ifdef CONFIG_PPC_SMP_MUXED_IPI
+	void  (*cause_ipi)(int cpu);
+#endif
+	int   (*cause_nmi_ipi)(int cpu);
+	void  (*probe)(void);
+	int   (*kick_cpu)(int nr);
+	int   (*prepare_cpu)(int nr);
+	void  (*setup_cpu)(int nr);
+	void  (*bringup_done)(void);
+	void  (*take_timebase)(void);
+	void  (*give_timebase)(void);
+	int   (*cpu_disable)(void);
+	void  (*cpu_die)(unsigned int nr);
+	int   (*cpu_bootable)(unsigned int nr);
+#ifdef CONFIG_HOTPLUG_CPU
+	void  (*cpu_offline_self)(void);
+#endif
+};
+
+extern struct task_struct *secondary_current;
+
+void start_secondary(void *unused);
+extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
+extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
+extern void smp_send_debugger_break(void);
+extern void __noreturn start_secondary_resume(void);
+extern void smp_generic_give_timebase(void);
+extern void smp_generic_take_timebase(void);
+
+DECLARE_PER_CPU(unsigned int, cpu_pvr);
+
+#ifdef CONFIG_HOTPLUG_CPU
+int generic_cpu_disable(void);
+void generic_cpu_die(unsigned int cpu);
+void generic_set_cpu_dead(unsigned int cpu);
+void generic_set_cpu_up(unsigned int cpu);
+int generic_check_cpu_restart(unsigned int cpu);
+int is_cpu_dead(unsigned int cpu);
+#else
+#define generic_set_cpu_up(i)	do { } while (0)
+#endif
+
+#ifdef CONFIG_PPC64
+#define raw_smp_processor_id()	(local_paca->paca_index)
+#define hard_smp_processor_id() (get_paca()->hw_cpu_id)
+#else
+/* 32-bit */
+extern int smp_hw_index[];
+
+#define raw_smp_processor_id()		(current_thread_info()->cpu)
+#define hard_smp_processor_id() 	(smp_hw_index[smp_processor_id()])
+
+static inline int get_hard_smp_processor_id(int cpu)
+{
+	return smp_hw_index[cpu];
+}
+
+static inline void set_hard_smp_processor_id(int cpu, int phys)
+{
+	smp_hw_index[cpu] = phys;
+}
+#endif
+
+DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DECLARE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
+DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
+DECLARE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
+
+static inline struct cpumask *cpu_sibling_mask(int cpu)
+{
+	return per_cpu(cpu_sibling_map, cpu);
+}
+
+static inline struct cpumask *cpu_core_mask(int cpu)
+{
+	return per_cpu(cpu_core_map, cpu);
+}
+
+static inline struct cpumask *cpu_l2_cache_mask(int cpu)
+{
+	return per_cpu(cpu_l2_cache_map, cpu);
+}
+
+static inline struct cpumask *cpu_smallcore_mask(int cpu)
+{
+	return per_cpu(cpu_smallcore_map, cpu);
+}
+
+extern int cpu_to_core_id(int cpu);
+
+extern bool has_big_cores;
+extern bool thread_group_shares_l2;
+extern bool thread_group_shares_l3;
+
+#define cpu_smt_mask cpu_smt_mask
+#ifdef CONFIG_SCHED_SMT
+static inline const struct cpumask *cpu_smt_mask(int cpu)
+{
+	if (has_big_cores)
+		return per_cpu(cpu_smallcore_map, cpu);
+
+	return per_cpu(cpu_sibling_map, cpu);
+}
+#endif /* CONFIG_SCHED_SMT */
+
+/* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
+ *
+ * Make sure this matches openpic_request_IPIs in open_pic.c, or what shows up
+ * in /proc/interrupts will be wrong!!! --Troy */
+#define PPC_MSG_CALL_FUNCTION	0
+#define PPC_MSG_RESCHEDULE	1
+#define PPC_MSG_TICK_BROADCAST	2
+#define PPC_MSG_NMI_IPI		3
+
+/* This is only used by the powernv kernel */
+#define PPC_MSG_RM_HOST_ACTION	4
+
+#define NMI_IPI_ALL_OTHERS		-2
+
+#ifdef CONFIG_NMI_IPI
+extern int smp_handle_nmi_ipi(struct pt_regs *regs);
+#else
+static inline int smp_handle_nmi_ipi(struct pt_regs *regs) { return 0; }
+#endif
+
+/* for irq controllers that have dedicated ipis per message (4) */
+extern int smp_request_message_ipi(int virq, int message);
+extern const char *smp_ipi_name[];
+
+/* for irq controllers with only a single ipi */
+extern void smp_muxed_ipi_message_pass(int cpu, int msg);
+extern void smp_muxed_ipi_set_message(int cpu, int msg);
+extern irqreturn_t smp_ipi_demux(void);
+extern irqreturn_t smp_ipi_demux_relaxed(void);
+
+void smp_init_pSeries(void);
+void smp_init_cell(void);
+void smp_setup_cpu_maps(void);
+
+extern int __cpu_disable(void);
+extern void __cpu_die(unsigned int cpu);
+
+#else
+/* for UP */
+#define hard_smp_processor_id()		get_hard_smp_processor_id(0)
+#define smp_setup_cpu_maps()
+#define thread_group_shares_l2  0
+#define thread_group_shares_l3	0
+static inline const struct cpumask *cpu_sibling_mask(int cpu)
+{
+	return cpumask_of(cpu);
+}
+
+static inline const struct cpumask *cpu_smallcore_mask(int cpu)
+{
+	return cpumask_of(cpu);
+}
+
+static inline const struct cpumask *cpu_l2_cache_mask(int cpu)
+{
+	return cpumask_of(cpu);
+}
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_PPC64
+static inline int get_hard_smp_processor_id(int cpu)
+{
+	return paca_ptrs[cpu]->hw_cpu_id;
+}
+
+static inline void set_hard_smp_processor_id(int cpu, int phys)
+{
+	paca_ptrs[cpu]->hw_cpu_id = phys;
+}
+#else
+/* 32-bit */
+#ifndef CONFIG_SMP
+extern int boot_cpuid_phys;
+static inline int get_hard_smp_processor_id(int cpu)
+{
+	return boot_cpuid_phys;
+}
+
+static inline void set_hard_smp_processor_id(int cpu, int phys)
+{
+	boot_cpuid_phys = phys;
+}
+#endif /* !CONFIG_SMP */
+#endif /* !CONFIG_PPC64 */
+
+#if defined(CONFIG_PPC64) && (defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE))
+extern void smp_release_cpus(void);
+#else
+static inline void smp_release_cpus(void) { }
+#endif
+
+extern int smt_enabled_at_boot;
+
+extern void smp_mpic_probe(void);
+extern void smp_mpic_setup_cpu(int cpu);
+extern int smp_generic_kick_cpu(int nr);
+extern int smp_generic_cpu_bootable(unsigned int nr);
+
+
+extern void smp_generic_give_timebase(void);
+extern void smp_generic_take_timebase(void);
+
+extern struct smp_ops_t *smp_ops;
+
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+
+/* Definitions relative to the secondary CPU spin loop
+ * and entry point. Not all of them exist on both 32 and
+ * 64-bit but defining them all here doesn't harm
+ */
+extern void generic_secondary_smp_init(void);
+extern unsigned long __secondary_hold_spinloop;
+extern unsigned long __secondary_hold_acknowledge;
+extern char __secondary_hold;
+extern unsigned int booting_thread_hwid;
+
+extern void __early_start(void);
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_SMP_H) */
diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h
new file mode 100644
index 0000000000..2ac6ab9030
--- /dev/null
+++ b/arch/powerpc/include/asm/smu.h
@@ -0,0 +1,694 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SMU_H
+#define _SMU_H
+
+/*
+ * Definitions for talking to the SMU chip in newer G5 PowerMacs
+ */
+#ifdef __KERNEL__
+#include <linux/list.h>
+#endif
+#include <linux/types.h>
+
+/*
+ * Known SMU commands
+ *
+ * Most of what is below comes from looking at the Open Firmware driver,
+ * though this is still incomplete and could use better documentation here
+ * or there...
+ */
+
+
+/*
+ * Partition info commands
+ *
+ * These commands are used to retrieve the sdb-partition-XX datas from
+ * the SMU. The length is always 2. First byte is the subcommand code
+ * and second byte is the partition ID.
+ *
+ * The reply is 6 bytes:
+ *
+ *  - 0..1 : partition address
+ *  - 2    : a byte containing the partition ID
+ *  - 3    : length (maybe other bits are rest of header ?)
+ *
+ * The data must then be obtained with calls to another command:
+ * SMU_CMD_MISC_ee_GET_DATABLOCK_REC (described below).
+ */
+#define SMU_CMD_PARTITION_COMMAND		0x3e
+#define   SMU_CMD_PARTITION_LATEST		0x01
+#define   SMU_CMD_PARTITION_BASE		0x02
+#define   SMU_CMD_PARTITION_UPDATE		0x03
+
+
+/*
+ * Fan control
+ *
+ * This is a "mux" for fan control commands. The command seem to
+ * act differently based on the number of arguments. With 1 byte
+ * of argument, this seem to be queries for fans status, setpoint,
+ * etc..., while with 0xe arguments, we will set the fans speeds.
+ *
+ * Queries (1 byte arg):
+ * ---------------------
+ *
+ * arg=0x01: read RPM fans status
+ * arg=0x02: read RPM fans setpoint
+ * arg=0x11: read PWM fans status
+ * arg=0x12: read PWM fans setpoint
+ *
+ * the "status" queries return the current speed while the "setpoint" ones
+ * return the programmed/target speed. It _seems_ that the result is a bit
+ * mask in the first byte of active/available fans, followed by 6 words (16
+ * bits) containing the requested speed.
+ *
+ * Setpoint (14 bytes arg):
+ * ------------------------
+ *
+ * first arg byte is 0 for RPM fans and 0x10 for PWM. Second arg byte is the
+ * mask of fans affected by the command. Followed by 6 words containing the
+ * setpoint value for selected fans in the mask (or 0 if mask value is 0)
+ */
+#define SMU_CMD_FAN_COMMAND			0x4a
+
+
+/*
+ * Battery access
+ *
+ * Same command number as the PMU, could it be same syntax ?
+ */
+#define SMU_CMD_BATTERY_COMMAND			0x6f
+#define   SMU_CMD_GET_BATTERY_INFO		0x00
+
+/*
+ * Real time clock control
+ *
+ * This is a "mux", first data byte contains the "sub" command.
+ * The "RTC" part of the SMU controls the date, time, powerup
+ * timer, but also a PRAM
+ *
+ * Dates are in BCD format on 7 bytes:
+ * [sec] [min] [hour] [weekday] [month day] [month] [year]
+ * with month being 1 based and year minus 100
+ */
+#define SMU_CMD_RTC_COMMAND			0x8e
+#define   SMU_CMD_RTC_SET_PWRUP_TIMER		0x00 /* i: 7 bytes date */
+#define   SMU_CMD_RTC_GET_PWRUP_TIMER		0x01 /* o: 7 bytes date */
+#define   SMU_CMD_RTC_STOP_PWRUP_TIMER		0x02
+#define   SMU_CMD_RTC_SET_PRAM_BYTE_ACC		0x20 /* i: 1 byte (address?) */
+#define   SMU_CMD_RTC_SET_PRAM_AUTOINC		0x21 /* i: 1 byte (data?) */
+#define   SMU_CMD_RTC_SET_PRAM_LO_BYTES 	0x22 /* i: 10 bytes */
+#define   SMU_CMD_RTC_SET_PRAM_HI_BYTES 	0x23 /* i: 10 bytes */
+#define   SMU_CMD_RTC_GET_PRAM_BYTE		0x28 /* i: 1 bytes (address?) */
+#define   SMU_CMD_RTC_GET_PRAM_LO_BYTES 	0x29 /* o: 10 bytes */
+#define   SMU_CMD_RTC_GET_PRAM_HI_BYTES 	0x2a /* o: 10 bytes */
+#define	  SMU_CMD_RTC_SET_DATETIME		0x80 /* i: 7 bytes date */
+#define   SMU_CMD_RTC_GET_DATETIME		0x81 /* o: 7 bytes date */
+
+ /*
+  * i2c commands
+  *
+  * To issue an i2c command, first is to send a parameter block to
+  * the SMU. This is a command of type 0x9a with 9 bytes of header
+  * eventually followed by data for a write:
+  *
+  * 0: bus number (from device-tree usually, SMU has lots of busses !)
+  * 1: transfer type/format (see below)
+  * 2: device address. For combined and combined4 type transfers, this
+  *    is the "write" version of the address (bit 0x01 cleared)
+  * 3: subaddress length (0..3)
+  * 4: subaddress byte 0 (or only byte for subaddress length 1)
+  * 5: subaddress byte 1
+  * 6: subaddress byte 2
+  * 7: combined address (device address for combined mode data phase)
+  * 8: data length
+  *
+  * The transfer types are the same good old Apple ones it seems,
+  * that is:
+  *   - 0x00: Simple transfer
+  *   - 0x01: Subaddress transfer (addr write + data tx, no restart)
+  *   - 0x02: Combined transfer (addr write + restart + data tx)
+  *
+  * This is then followed by actual data for a write.
+  *
+  * At this point, the OF driver seems to have a limitation on transfer
+  * sizes of 0xd bytes on reads and 0x5 bytes on writes. I do not know
+  * whether this is just an OF limit due to some temporary buffer size
+  * or if this is an SMU imposed limit. This driver has the same limitation
+  * for now as I use a 0x10 bytes temporary buffer as well
+  *
+  * Once that is completed, a response is expected from the SMU. This is
+  * obtained via a command of type 0x9a with a length of 1 byte containing
+  * 0 as the data byte. OF also fills the rest of the data buffer with 0xff's
+  * though I can't tell yet if this is actually necessary. Once this command
+  * is complete, at this point, all I can tell is what OF does. OF tests
+  * byte 0 of the reply:
+  *   - on read, 0xfe or 0xfc : bus is busy, wait (see below) or nak ?
+  *   - on read, 0x00 or 0x01 : reply is in buffer (after the byte 0)
+  *   - on write, < 0 -> failure (immediate exit)
+  *   - else, OF just exists (without error, weird)
+  *
+  * So on read, there is this wait-for-busy thing when getting a 0xfc or
+  * 0xfe result. OF does a loop of up to 64 retries, waiting 20ms and
+  * doing the above again until either the retries expire or the result
+  * is no longer 0xfe or 0xfc
+  *
+  * The Darwin I2C driver is less subtle though. On any non-success status
+  * from the response command, it waits 5ms and tries again up to 20 times,
+  * it doesn't differentiate between fatal errors or "busy" status.
+  *
+  * This driver provides an asynchronous paramblock based i2c command
+  * interface to be used either directly by low level code or by a higher
+  * level driver interfacing to the linux i2c layer. The current
+  * implementation of this relies on working timers & timer interrupts
+  * though, so be careful of calling context for now. This may be "fixed"
+  * in the future by adding a polling facility.
+  */
+#define SMU_CMD_I2C_COMMAND			0x9a
+          /* transfer types */
+#define   SMU_I2C_TRANSFER_SIMPLE	0x00
+#define   SMU_I2C_TRANSFER_STDSUB	0x01
+#define   SMU_I2C_TRANSFER_COMBINED	0x02
+
+/*
+ * Power supply control
+ *
+ * The "sub" command is an ASCII string in the data, the
+ * data length is that of the string.
+ *
+ * The VSLEW command can be used to get or set the voltage slewing.
+ *  - length 5 (only "VSLEW") : it returns "DONE" and 3 bytes of
+ *    reply at data offset 6, 7 and 8.
+ *  - length 8 ("VSLEWxyz") has 3 additional bytes appended, and is
+ *    used to set the voltage slewing point. The SMU replies with "DONE"
+ * I yet have to figure out their exact meaning of those 3 bytes in
+ * both cases. They seem to be:
+ *  x = processor mask
+ *  y = op. point index
+ *  z = processor freq. step index
+ * I haven't yet deciphered result codes
+ *
+ */
+#define SMU_CMD_POWER_COMMAND			0xaa
+#define   SMU_CMD_POWER_RESTART		       	"RESTART"
+#define   SMU_CMD_POWER_SHUTDOWN		"SHUTDOWN"
+#define   SMU_CMD_POWER_VOLTAGE_SLEW		"VSLEW"
+
+/*
+ * Read ADC sensors
+ *
+ * This command takes one byte of parameter: the sensor ID (or "reg"
+ * value in the device-tree) and returns a 16 bits value
+ */
+#define SMU_CMD_READ_ADC			0xd8
+
+
+/* Misc commands
+ *
+ * This command seem to be a grab bag of various things
+ *
+ * Parameters:
+ *   1: subcommand
+ */
+#define SMU_CMD_MISC_df_COMMAND			0xdf
+
+/*
+ * Sets "system ready" status
+ *
+ * I did not yet understand how it exactly works or what it does.
+ *
+ * Guessing from OF code, 0x02 activates the display backlight. Apple uses/used
+ * the same codebase for all OF versions. On PowerBooks, this command would
+ * enable the backlight. For the G5s, it only activates the front LED. However,
+ * don't take this for granted.
+ *
+ * Parameters:
+ *   2: status [0x00, 0x01 or 0x02]
+ */
+#define   SMU_CMD_MISC_df_SET_DISPLAY_LIT	0x02
+
+/*
+ * Sets mode of power switch.
+ *
+ * What this actually does is not yet known. Maybe it enables some interrupt.
+ *
+ * Parameters:
+ *   2: enable power switch? [0x00 or 0x01]
+ *   3 (optional): enable nmi? [0x00 or 0x01]
+ *
+ * Returns:
+ *   If parameter 2 is 0x00 and parameter 3 is not specified, returns whether
+ *   NMI is enabled. Otherwise unknown.
+ */
+#define   SMU_CMD_MISC_df_NMI_OPTION		0x04
+
+/* Sets LED dimm offset.
+ *
+ * The front LED dimms itself during sleep. Its brightness (or, well, the PWM
+ * frequency) depends on current time. Therefore, the SMU needs to know the
+ * timezone.
+ *
+ * Parameters:
+ *   2-8: unknown (BCD coding)
+ */
+#define   SMU_CMD_MISC_df_DIMM_OFFSET		0x99
+
+
+/*
+ * Version info commands
+ *
+ * Parameters:
+ *   1 (optional): Specifies version part to retrieve
+ *
+ * Returns:
+ *   Version value
+ */
+#define SMU_CMD_VERSION_COMMAND			0xea
+#define   SMU_VERSION_RUNNING			0x00
+#define   SMU_VERSION_BASE			0x01
+#define   SMU_VERSION_UPDATE			0x02
+
+
+/*
+ * Switches
+ *
+ * These are switches whose status seems to be known to the SMU.
+ *
+ * Parameters:
+ *   none
+ *
+ * Result:
+ *   Switch bits (ORed, see below)
+ */
+#define SMU_CMD_SWITCHES			0xdc
+
+/* Switches bits */
+#define SMU_SWITCH_CASE_CLOSED			0x01
+#define SMU_SWITCH_AC_POWER			0x04
+#define SMU_SWITCH_POWER_SWITCH			0x08
+
+
+/*
+ * Misc commands
+ *
+ * This command seem to be a grab bag of various things
+ *
+ * SMU_CMD_MISC_ee_GET_DATABLOCK_REC is used, among others, to
+ * transfer blocks of data from the SMU. So far, I've decrypted it's
+ * usage to retrieve partition data. In order to do that, you have to
+ * break your transfer in "chunks" since that command cannot transfer
+ * more than a chunk at a time. The chunk size used by OF is 0xe bytes,
+ * but it seems that the darwin driver will let you do 0x1e bytes if
+ * your "PMU" version is >= 0x30. You can get the "PMU" version apparently
+ * either in the last 16 bits of property "smu-version-pmu" or as the 16
+ * bytes at offset 1 of "smu-version-info"
+ *
+ * For each chunk, the command takes 7 bytes of arguments:
+ *  byte 0: subcommand code (0x02)
+ *  byte 1: 0x04 (always, I don't know what it means, maybe the address
+ *                space to use or some other nicety. It's hard coded in OF)
+ *  byte 2..5: SMU address of the chunk (big endian 32 bits)
+ *  byte 6: size to transfer (up to max chunk size)
+ *
+ * The data is returned directly
+ */
+#define SMU_CMD_MISC_ee_COMMAND			0xee
+#define   SMU_CMD_MISC_ee_GET_DATABLOCK_REC	0x02
+
+/* Retrieves currently used watts.
+ *
+ * Parameters:
+ *   1: 0x03 (Meaning unknown)
+ */
+#define   SMU_CMD_MISC_ee_GET_WATTS		0x03
+
+#define   SMU_CMD_MISC_ee_LEDS_CTRL		0x04 /* i: 00 (00,01) [00] */
+#define   SMU_CMD_MISC_ee_GET_DATA		0x05 /* i: 00 , o: ?? */
+
+
+/*
+ * Power related commands
+ *
+ * Parameters:
+ *   1: subcommand
+ */
+#define SMU_CMD_POWER_EVENTS_COMMAND		0x8f
+
+/* SMU_POWER_EVENTS subcommands */
+enum {
+	SMU_PWR_GET_POWERUP_EVENTS      = 0x00,
+	SMU_PWR_SET_POWERUP_EVENTS      = 0x01,
+	SMU_PWR_CLR_POWERUP_EVENTS      = 0x02,
+	SMU_PWR_GET_WAKEUP_EVENTS       = 0x03,
+	SMU_PWR_SET_WAKEUP_EVENTS       = 0x04,
+	SMU_PWR_CLR_WAKEUP_EVENTS       = 0x05,
+
+	/*
+	 * Get last shutdown cause
+	 *
+	 * Returns:
+	 *   1 byte (signed char): Last shutdown cause. Exact meaning unknown.
+	 */
+	SMU_PWR_LAST_SHUTDOWN_CAUSE	= 0x07,
+
+	/*
+	 * Sets or gets server ID. Meaning or use is unknown.
+	 *
+	 * Parameters:
+	 *   2 (optional): Set server ID (1 byte)
+	 *
+	 * Returns:
+	 *   1 byte (server ID?)
+	 */
+	SMU_PWR_SERVER_ID		= 0x08,
+};
+
+/* Power events wakeup bits */
+enum {
+	SMU_PWR_WAKEUP_KEY              = 0x01, /* Wake on key press */
+	SMU_PWR_WAKEUP_AC_INSERT        = 0x02, /* Wake on AC adapter plug */
+	SMU_PWR_WAKEUP_AC_CHANGE        = 0x04,
+	SMU_PWR_WAKEUP_LID_OPEN         = 0x08,
+	SMU_PWR_WAKEUP_RING             = 0x10,
+};
+
+
+/*
+ * - Kernel side interface -
+ */
+
+#ifdef __KERNEL__
+
+/*
+ * Asynchronous SMU commands
+ *
+ * Fill up this structure and submit it via smu_queue_command(),
+ * and get notified by the optional done() callback, or because
+ * status becomes != 1
+ */
+
+struct smu_cmd;
+
+struct smu_cmd
+{
+	/* public */
+	u8			cmd;		/* command */
+	int			data_len;	/* data len */
+	int			reply_len;	/* reply len */
+	void			*data_buf;	/* data buffer */
+	void			*reply_buf;	/* reply buffer */
+	int			status;		/* command status */
+	void			(*done)(struct smu_cmd *cmd, void *misc);
+	void			*misc;
+
+	/* private */
+	struct list_head	link;
+};
+
+/*
+ * Queues an SMU command, all fields have to be initialized
+ */
+extern int smu_queue_cmd(struct smu_cmd *cmd);
+
+/*
+ * Simple command wrapper. This structure embeds a small buffer
+ * to ease sending simple SMU commands from the stack
+ */
+struct smu_simple_cmd
+{
+	struct smu_cmd	cmd;
+	u8	       	buffer[16];
+};
+
+/*
+ * Queues a simple command. All fields will be initialized by that
+ * function
+ */
+extern int smu_queue_simple(struct smu_simple_cmd *scmd, u8 command,
+			    unsigned int data_len,
+			    void (*done)(struct smu_cmd *cmd, void *misc),
+			    void *misc,
+			    ...);
+
+/*
+ * Completion helper. Pass it to smu_queue_simple or as 'done'
+ * member to smu_queue_cmd, it will call complete() on the struct
+ * completion passed in the "misc" argument
+ */
+extern void smu_done_complete(struct smu_cmd *cmd, void *misc);
+
+/*
+ * Synchronous helpers. Will spin-wait for completion of a command
+ */
+extern void smu_spinwait_cmd(struct smu_cmd *cmd);
+
+static inline void smu_spinwait_simple(struct smu_simple_cmd *scmd)
+{
+	smu_spinwait_cmd(&scmd->cmd);
+}
+
+/*
+ * Poll routine to call if blocked with irqs off
+ */
+extern void smu_poll(void);
+
+
+/*
+ * Init routine, presence check....
+ */
+int __init smu_init(void);
+extern int smu_present(void);
+struct platform_device;
+extern struct platform_device *smu_get_ofdev(void);
+
+
+/*
+ * Common command wrappers
+ */
+extern void smu_shutdown(void);
+extern void smu_restart(void);
+struct rtc_time;
+extern int smu_get_rtc_time(struct rtc_time *time, int spinwait);
+extern int smu_set_rtc_time(struct rtc_time *time, int spinwait);
+
+/*
+ * Kernel asynchronous i2c interface
+ */
+
+#define SMU_I2C_READ_MAX	0x1d
+#define SMU_I2C_WRITE_MAX	0x15
+
+/* SMU i2c header, exactly matches i2c header on wire */
+struct smu_i2c_param
+{
+	u8	bus;		/* SMU bus ID (from device tree) */
+	u8	type;		/* i2c transfer type */
+	u8	devaddr;	/* device address (includes direction) */
+	u8	sublen;		/* subaddress length */
+	u8	subaddr[3];	/* subaddress */
+	u8	caddr;		/* combined address, filled by SMU driver */
+	u8	datalen;	/* length of transfer */
+	u8	data[SMU_I2C_READ_MAX];	/* data */
+};
+
+struct smu_i2c_cmd
+{
+	/* public */
+	struct smu_i2c_param	info;
+	void			(*done)(struct smu_i2c_cmd *cmd, void *misc);
+	void			*misc;
+	int			status; /* 1 = pending, 0 = ok, <0 = fail */
+
+	/* private */
+	struct smu_cmd		scmd;
+	int			read;
+	int			stage;
+	int			retries;
+	u8			pdata[32];
+	struct list_head	link;
+};
+
+/*
+ * Call this to queue an i2c command to the SMU. You must fill info,
+ * including info.data for a write, done and misc.
+ * For now, no polling interface is provided so you have to use completion
+ * callback.
+ */
+extern int smu_queue_i2c(struct smu_i2c_cmd *cmd);
+
+
+#endif /* __KERNEL__ */
+
+
+/*
+ * - SMU "sdb" partitions informations -
+ */
+
+
+/*
+ * Partition header format
+ */
+struct smu_sdbp_header {
+	__u8	id;
+	__u8	len;
+	__u8	version;
+	__u8	flags;
+};
+
+
+ /*
+ * demangle 16 and 32 bits integer in some SMU partitions
+ * (currently, afaik, this concerns only the FVT partition
+ * (0x12)
+ */
+#define SMU_U16_MIX(x)	le16_to_cpu(x)
+#define SMU_U32_MIX(x)  ((((x) & 0xff00ff00u) >> 8)|(((x) & 0x00ff00ffu) << 8))
+
+
+/* This is the definition of the SMU sdb-partition-0x12 table (called
+ * CPU F/V/T operating points in Darwin). The definition for all those
+ * SMU tables should be moved to some separate file
+ */
+#define SMU_SDB_FVT_ID			0x12
+
+struct smu_sdbp_fvt {
+	__u32	sysclk;			/* Base SysClk frequency in Hz for
+					 * this operating point. Value need to
+					 * be unmixed with SMU_U32_MIX()
+					 */
+	__u8	pad;
+	__u8	maxtemp;		/* Max temp. supported by this
+					 * operating point
+					 */
+
+	__u16	volts[3];		/* CPU core voltage for the 3
+					 * PowerTune modes, a mode with
+					 * 0V = not supported. Value need
+					 * to be unmixed with SMU_U16_MIX()
+					 */
+};
+
+/* This partition contains voltage & current sensor calibration
+ * informations
+ */
+#define SMU_SDB_CPUVCP_ID		0x21
+
+struct smu_sdbp_cpuvcp {
+	__u16	volt_scale;		/* u4.12 fixed point */
+	__s16	volt_offset;		/* s4.12 fixed point */
+	__u16	curr_scale;		/* u4.12 fixed point */
+	__s16	curr_offset;		/* s4.12 fixed point */
+	__s32	power_quads[3];		/* s4.28 fixed point */
+};
+
+/* This partition contains CPU thermal diode calibration
+ */
+#define SMU_SDB_CPUDIODE_ID		0x18
+
+struct smu_sdbp_cpudiode {
+	__u16	m_value;		/* u1.15 fixed point */
+	__s16	b_value;		/* s10.6 fixed point */
+
+};
+
+/* This partition contains Slots power calibration
+ */
+#define SMU_SDB_SLOTSPOW_ID		0x78
+
+struct smu_sdbp_slotspow {
+	__u16	pow_scale;		/* u4.12 fixed point */
+	__s16	pow_offset;		/* s4.12 fixed point */
+};
+
+/* This partition contains machine specific version information about
+ * the sensor/control layout
+ */
+#define SMU_SDB_SENSORTREE_ID		0x25
+
+struct smu_sdbp_sensortree {
+	__u8	model_id;
+	__u8	unknown[3];
+};
+
+/* This partition contains CPU thermal control PID informations. So far
+ * only single CPU machines have been seen with an SMU, so we assume this
+ * carries only informations for those
+ */
+#define SMU_SDB_CPUPIDDATA_ID		0x17
+
+struct smu_sdbp_cpupiddata {
+	__u8	unknown1;
+	__u8	target_temp_delta;
+	__u8	unknown2;
+	__u8	history_len;
+	__s16	power_adj;
+	__u16	max_power;
+	__s32	gp,gr,gd;
+};
+
+
+/* Other partitions without known structures */
+#define SMU_SDB_DEBUG_SWITCHES_ID	0x05
+
+#ifdef __KERNEL__
+/*
+ * This returns the pointer to an SMU "sdb" partition data or NULL
+ * if not found. The data format is described below
+ */
+extern const struct smu_sdbp_header *smu_get_sdb_partition(int id,
+					unsigned int *size);
+
+/* Get "sdb" partition data from an SMU satellite */
+extern struct smu_sdbp_header *smu_sat_get_sdb_partition(unsigned int sat_id,
+					int id, unsigned int *size);
+
+
+#endif /* __KERNEL__ */
+
+
+/*
+ * - Userland interface -
+ */
+
+/*
+ * A given instance of the device can be configured for 2 different
+ * things at the moment:
+ *
+ *  - sending SMU commands (default at open() time)
+ *  - receiving SMU events (not yet implemented)
+ *
+ * Commands are written with write() of a command block. They can be
+ * "driver" commands (for example to switch to event reception mode)
+ * or real SMU commands. They are made of a header followed by command
+ * data if any.
+ *
+ * For SMU commands (not for driver commands), you can then read() back
+ * a reply. The reader will be blocked or not depending on how the device
+ * file is opened. poll() isn't implemented yet. The reply will consist
+ * of a header as well, followed by the reply data if any. You should
+ * always provide a buffer large enough for the maximum reply data, I
+ * recommand one page.
+ *
+ * It is illegal to send SMU commands through a file descriptor configured
+ * for events reception
+ *
+ */
+struct smu_user_cmd_hdr
+{
+	__u32		cmdtype;
+#define SMU_CMDTYPE_SMU			0	/* SMU command */
+#define SMU_CMDTYPE_WANTS_EVENTS	1	/* switch fd to events mode */
+#define SMU_CMDTYPE_GET_PARTITION	2	/* retrieve an sdb partition */
+
+	__u8		cmd;			/* SMU command byte */
+	__u8		pad[3];			/* padding */
+	__u32		data_len;		/* Length of data following */
+};
+
+struct smu_user_reply_hdr
+{
+	__u32		status;			/* Command status */
+	__u32		reply_len;		/* Length of data follwing */
+};
+
+#endif /*  _SMU_H */
diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
new file mode 100644
index 0000000000..d072866842
--- /dev/null
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SPARSEMEM_H
+#define _ASM_POWERPC_SPARSEMEM_H 1
+#ifdef __KERNEL__
+
+#ifdef CONFIG_SPARSEMEM
+/*
+ * SECTION_SIZE_BITS		2^N: how big each section will be
+ * MAX_PHYSMEM_BITS		2^N: how much memory we can have in that space
+ */
+#define SECTION_SIZE_BITS       24
+
+#endif /* CONFIG_SPARSEMEM */
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+extern int remove_section_mapping(unsigned long start, unsigned long end);
+extern int memory_add_physaddr_to_nid(u64 start);
+#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
+
+#ifdef CONFIG_NUMA
+extern int hot_add_scn_to_nid(unsigned long scn_addr);
+#else
+static inline int hot_add_scn_to_nid(unsigned long scn_addr)
+{
+	return 0;
+}
+#endif /* CONFIG_NUMA */
+#endif /* CONFIG_MEMORY_HOTPLUG */
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_SPARSEMEM_H */
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
new file mode 100644
index 0000000000..7dafca8e3f
--- /dev/null
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+#ifdef __KERNEL__
+
+#ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+#include <asm/qspinlock.h>
+#include <asm/qrwlock.h>
+#else
+#include <asm/simple_spinlock.h>
+#endif
+
+/* See include/linux/spinlock.h */
+#define smp_mb__after_spinlock()	smp_mb()
+
+#ifndef CONFIG_PPC_QUEUED_SPINLOCKS
+static inline void pv_spinlocks_init(void) { }
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
new file mode 100644
index 0000000000..40b01446cf
--- /dev/null
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SPINLOCK_TYPES_H
+#define _ASM_POWERPC_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
+# error "please don't include this file directly"
+#endif
+
+#ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+#include <asm/qspinlock_types.h>
+#include <asm-generic/qrwlock_types.h>
+#else
+#include <asm/simple_spinlock_types.h>
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h
new file mode 100644
index 0000000000..96ad4510c8
--- /dev/null
+++ b/arch/powerpc/include/asm/spu.h
@@ -0,0 +1,679 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SPU core / file system interface and HW structures
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+
+#ifndef _SPU_H
+#define _SPU_H
+#ifdef __KERNEL__
+
+#include <linux/workqueue.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <asm/reg.h>
+#include <asm/copro.h>
+
+#define LS_SIZE (256 * 1024)
+#define LS_ADDR_MASK (LS_SIZE - 1)
+
+#define MFC_PUT_CMD             0x20
+#define MFC_PUTS_CMD            0x28
+#define MFC_PUTR_CMD            0x30
+#define MFC_PUTF_CMD            0x22
+#define MFC_PUTB_CMD            0x21
+#define MFC_PUTFS_CMD           0x2A
+#define MFC_PUTBS_CMD           0x29
+#define MFC_PUTRF_CMD           0x32
+#define MFC_PUTRB_CMD           0x31
+#define MFC_PUTL_CMD            0x24
+#define MFC_PUTRL_CMD           0x34
+#define MFC_PUTLF_CMD           0x26
+#define MFC_PUTLB_CMD           0x25
+#define MFC_PUTRLF_CMD          0x36
+#define MFC_PUTRLB_CMD          0x35
+
+#define MFC_GET_CMD             0x40
+#define MFC_GETS_CMD            0x48
+#define MFC_GETF_CMD            0x42
+#define MFC_GETB_CMD            0x41
+#define MFC_GETFS_CMD           0x4A
+#define MFC_GETBS_CMD           0x49
+#define MFC_GETL_CMD            0x44
+#define MFC_GETLF_CMD           0x46
+#define MFC_GETLB_CMD           0x45
+
+#define MFC_SDCRT_CMD           0x80
+#define MFC_SDCRTST_CMD         0x81
+#define MFC_SDCRZ_CMD           0x89
+#define MFC_SDCRS_CMD           0x8D
+#define MFC_SDCRF_CMD           0x8F
+
+#define MFC_GETLLAR_CMD         0xD0
+#define MFC_PUTLLC_CMD          0xB4
+#define MFC_PUTLLUC_CMD         0xB0
+#define MFC_PUTQLLUC_CMD        0xB8
+#define MFC_SNDSIG_CMD          0xA0
+#define MFC_SNDSIGB_CMD         0xA1
+#define MFC_SNDSIGF_CMD         0xA2
+#define MFC_BARRIER_CMD         0xC0
+#define MFC_EIEIO_CMD           0xC8
+#define MFC_SYNC_CMD            0xCC
+
+#define MFC_MIN_DMA_SIZE_SHIFT  4       /* 16 bytes */
+#define MFC_MAX_DMA_SIZE_SHIFT  14      /* 16384 bytes */
+#define MFC_MIN_DMA_SIZE        (1 << MFC_MIN_DMA_SIZE_SHIFT)
+#define MFC_MAX_DMA_SIZE        (1 << MFC_MAX_DMA_SIZE_SHIFT)
+#define MFC_MIN_DMA_SIZE_MASK   (MFC_MIN_DMA_SIZE - 1)
+#define MFC_MAX_DMA_SIZE_MASK   (MFC_MAX_DMA_SIZE - 1)
+#define MFC_MIN_DMA_LIST_SIZE   0x0008  /*   8 bytes */
+#define MFC_MAX_DMA_LIST_SIZE   0x4000  /* 16K bytes */
+
+#define MFC_TAGID_TO_TAGMASK(tag_id)  (1 << (tag_id & 0x1F))
+
+/* Events for Channels 0-2 */
+#define MFC_DMA_TAG_STATUS_UPDATE_EVENT     0x00000001
+#define MFC_DMA_TAG_CMD_STALL_NOTIFY_EVENT  0x00000002
+#define MFC_DMA_QUEUE_AVAILABLE_EVENT       0x00000008
+#define MFC_SPU_MAILBOX_WRITTEN_EVENT       0x00000010
+#define MFC_DECREMENTER_EVENT               0x00000020
+#define MFC_PU_INT_MAILBOX_AVAILABLE_EVENT  0x00000040
+#define MFC_PU_MAILBOX_AVAILABLE_EVENT      0x00000080
+#define MFC_SIGNAL_2_EVENT                  0x00000100
+#define MFC_SIGNAL_1_EVENT                  0x00000200
+#define MFC_LLR_LOST_EVENT                  0x00000400
+#define MFC_PRIV_ATTN_EVENT                 0x00000800
+#define MFC_MULTI_SRC_EVENT                 0x00001000
+
+/* Flag indicating progress during context switch. */
+#define SPU_CONTEXT_SWITCH_PENDING	0UL
+#define SPU_CONTEXT_FAULT_PENDING	1UL
+
+struct spu_context;
+struct spu_runqueue;
+struct spu_lscsa;
+struct device_node;
+
+enum spu_utilization_state {
+	SPU_UTIL_USER,
+	SPU_UTIL_SYSTEM,
+	SPU_UTIL_IOWAIT,
+	SPU_UTIL_IDLE_LOADED,
+	SPU_UTIL_MAX
+};
+
+struct spu {
+	const char *name;
+	unsigned long local_store_phys;
+	u8 *local_store;
+	unsigned long problem_phys;
+	struct spu_problem __iomem *problem;
+	struct spu_priv2 __iomem *priv2;
+	struct list_head cbe_list;
+	struct list_head full_list;
+	enum { SPU_FREE, SPU_USED } alloc_state;
+	int number;
+	unsigned int irqs[3];
+	u32 node;
+	unsigned long flags;
+	u64 class_0_pending;
+	u64 class_0_dar;
+	u64 class_1_dar;
+	u64 class_1_dsisr;
+	size_t ls_size;
+	unsigned int slb_replace;
+	struct mm_struct *mm;
+	struct spu_context *ctx;
+	struct spu_runqueue *rq;
+	unsigned long long timestamp;
+	pid_t pid;
+	pid_t tgid;
+	spinlock_t register_lock;
+
+	void (* wbox_callback)(struct spu *spu);
+	void (* ibox_callback)(struct spu *spu);
+	void (* stop_callback)(struct spu *spu, int irq);
+	void (* mfc_callback)(struct spu *spu);
+
+	char irq_c0[8];
+	char irq_c1[8];
+	char irq_c2[8];
+
+	u64 spe_id;
+
+	void* pdata; /* platform private data */
+
+	/* of based platforms only */
+	struct device_node *devnode;
+
+	/* native only */
+	struct spu_priv1 __iomem *priv1;
+
+	/* beat only */
+	u64 shadow_int_mask_RW[3];
+
+	struct device dev;
+
+	int has_mem_affinity;
+	struct list_head aff_list;
+
+	struct {
+		/* protected by interrupt reentrancy */
+		enum spu_utilization_state util_state;
+		unsigned long long tstamp;
+		unsigned long long times[SPU_UTIL_MAX];
+		unsigned long long vol_ctx_switch;
+		unsigned long long invol_ctx_switch;
+		unsigned long long min_flt;
+		unsigned long long maj_flt;
+		unsigned long long hash_flt;
+		unsigned long long slb_flt;
+		unsigned long long class2_intr;
+		unsigned long long libassist;
+	} stats;
+};
+
+struct cbe_spu_info {
+	struct mutex list_mutex;
+	struct list_head spus;
+	int n_spus;
+	int nr_active;
+	atomic_t busy_spus;
+	atomic_t reserved_spus;
+};
+
+extern struct cbe_spu_info cbe_spu_info[];
+
+void spu_init_channels(struct spu *spu);
+void spu_irq_setaffinity(struct spu *spu, int cpu);
+
+void spu_setup_kernel_slbs(struct spu *spu, struct spu_lscsa *lscsa,
+		void *code, int code_size);
+
+extern void spu_invalidate_slbs(struct spu *spu);
+extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm);
+int spu_64k_pages_available(void);
+
+/* Calls from the memory management to the SPU */
+struct mm_struct;
+extern void spu_flush_all_slbs(struct mm_struct *mm);
+
+/* system callbacks from the SPU */
+struct spu_syscall_block {
+	u64 nr_ret;
+	u64 parm[6];
+};
+extern long spu_sys_callback(struct spu_syscall_block *s);
+
+/* syscalls implemented in spufs */
+struct file;
+struct coredump_params;
+struct spufs_calls {
+	long (*create_thread)(const char __user *name,
+					unsigned int flags, umode_t mode,
+					struct file *neighbor);
+	long (*spu_run)(struct file *filp, __u32 __user *unpc,
+						__u32 __user *ustatus);
+	int (*coredump_extra_notes_size)(void);
+	int (*coredump_extra_notes_write)(struct coredump_params *cprm);
+	void (*notify_spus_active)(void);
+	struct module *owner;
+};
+
+/* return status from spu_run, same as in libspe */
+#define SPE_EVENT_DMA_ALIGNMENT		0x0008	/*A DMA alignment error */
+#define SPE_EVENT_SPE_ERROR		0x0010	/*An illegal instruction error*/
+#define SPE_EVENT_SPE_DATA_SEGMENT	0x0020	/*A DMA segmentation error    */
+#define SPE_EVENT_SPE_DATA_STORAGE	0x0040	/*A DMA storage error */
+#define SPE_EVENT_INVALID_DMA		0x0800	/* Invalid MFC DMA */
+
+/*
+ * Flags for sys_spu_create.
+ */
+#define SPU_CREATE_EVENTS_ENABLED	0x0001
+#define SPU_CREATE_GANG			0x0002
+#define SPU_CREATE_NOSCHED		0x0004
+#define SPU_CREATE_ISOLATE		0x0008
+#define SPU_CREATE_AFFINITY_SPU		0x0010
+#define SPU_CREATE_AFFINITY_MEM		0x0020
+
+#define SPU_CREATE_FLAG_ALL		0x003f /* mask of all valid flags */
+
+
+int register_spu_syscalls(struct spufs_calls *calls);
+void unregister_spu_syscalls(struct spufs_calls *calls);
+
+int spu_add_dev_attr(struct device_attribute *attr);
+void spu_remove_dev_attr(struct device_attribute *attr);
+
+int spu_add_dev_attr_group(const struct attribute_group *attrs);
+void spu_remove_dev_attr_group(const struct attribute_group *attrs);
+
+extern void notify_spus_active(void);
+extern void do_notify_spus_active(void);
+
+/*
+ * This defines the Local Store, Problem Area and Privilege Area of an SPU.
+ */
+
+union mfc_tag_size_class_cmd {
+	struct {
+		u16 mfc_size;
+		u16 mfc_tag;
+		u8  pad;
+		u8  mfc_rclassid;
+		u16 mfc_cmd;
+	} u;
+	struct {
+		u32 mfc_size_tag32;
+		u32 mfc_class_cmd32;
+	} by32;
+	u64 all64;
+};
+
+struct mfc_cq_sr {
+	u64 mfc_cq_data0_RW;
+	u64 mfc_cq_data1_RW;
+	u64 mfc_cq_data2_RW;
+	u64 mfc_cq_data3_RW;
+};
+
+struct spu_problem {
+#define MS_SYNC_PENDING         1L
+	u64 spc_mssync_RW;					/* 0x0000 */
+	u8  pad_0x0008_0x3000[0x3000 - 0x0008];
+
+	/* DMA Area */
+	u8  pad_0x3000_0x3004[0x4];				/* 0x3000 */
+	u32 mfc_lsa_W;						/* 0x3004 */
+	u64 mfc_ea_W;						/* 0x3008 */
+	union mfc_tag_size_class_cmd mfc_union_W;			/* 0x3010 */
+	u8  pad_0x3018_0x3104[0xec];				/* 0x3018 */
+	u32 dma_qstatus_R;					/* 0x3104 */
+	u8  pad_0x3108_0x3204[0xfc];				/* 0x3108 */
+	u32 dma_querytype_RW;					/* 0x3204 */
+	u8  pad_0x3208_0x321c[0x14];				/* 0x3208 */
+	u32 dma_querymask_RW;					/* 0x321c */
+	u8  pad_0x3220_0x322c[0xc];				/* 0x3220 */
+	u32 dma_tagstatus_R;					/* 0x322c */
+#define DMA_TAGSTATUS_INTR_ANY	1u
+#define DMA_TAGSTATUS_INTR_ALL	2u
+	u8  pad_0x3230_0x4000[0x4000 - 0x3230]; 		/* 0x3230 */
+
+	/* SPU Control Area */
+	u8  pad_0x4000_0x4004[0x4];				/* 0x4000 */
+	u32 pu_mb_R;						/* 0x4004 */
+	u8  pad_0x4008_0x400c[0x4];				/* 0x4008 */
+	u32 spu_mb_W;						/* 0x400c */
+	u8  pad_0x4010_0x4014[0x4];				/* 0x4010 */
+	u32 mb_stat_R;						/* 0x4014 */
+	u8  pad_0x4018_0x401c[0x4];				/* 0x4018 */
+	u32 spu_runcntl_RW;					/* 0x401c */
+#define SPU_RUNCNTL_STOP	0L
+#define SPU_RUNCNTL_RUNNABLE	1L
+#define SPU_RUNCNTL_ISOLATE	2L
+	u8  pad_0x4020_0x4024[0x4];				/* 0x4020 */
+	u32 spu_status_R;					/* 0x4024 */
+#define SPU_STOP_STATUS_SHIFT           16
+#define SPU_STATUS_STOPPED		0x0
+#define SPU_STATUS_RUNNING		0x1
+#define SPU_STATUS_STOPPED_BY_STOP	0x2
+#define SPU_STATUS_STOPPED_BY_HALT	0x4
+#define SPU_STATUS_WAITING_FOR_CHANNEL	0x8
+#define SPU_STATUS_SINGLE_STEP		0x10
+#define SPU_STATUS_INVALID_INSTR        0x20
+#define SPU_STATUS_INVALID_CH           0x40
+#define SPU_STATUS_ISOLATED_STATE       0x80
+#define SPU_STATUS_ISOLATED_LOAD_STATUS 0x200
+#define SPU_STATUS_ISOLATED_EXIT_STATUS 0x400
+	u8  pad_0x4028_0x402c[0x4];				/* 0x4028 */
+	u32 spu_spe_R;						/* 0x402c */
+	u8  pad_0x4030_0x4034[0x4];				/* 0x4030 */
+	u32 spu_npc_RW;						/* 0x4034 */
+	u8  pad_0x4038_0x14000[0x14000 - 0x4038];		/* 0x4038 */
+
+	/* Signal Notification Area */
+	u8  pad_0x14000_0x1400c[0xc];				/* 0x14000 */
+	u32 signal_notify1;					/* 0x1400c */
+	u8  pad_0x14010_0x1c00c[0x7ffc];			/* 0x14010 */
+	u32 signal_notify2;					/* 0x1c00c */
+} __attribute__ ((aligned(0x20000)));
+
+/* SPU Privilege 2 State Area */
+struct spu_priv2 {
+	/* MFC Registers */
+	u8  pad_0x0000_0x1100[0x1100 - 0x0000]; 		/* 0x0000 */
+
+	/* SLB Management Registers */
+	u8  pad_0x1100_0x1108[0x8];				/* 0x1100 */
+	u64 slb_index_W;					/* 0x1108 */
+#define SLB_INDEX_MASK				0x7L
+	u64 slb_esid_RW;					/* 0x1110 */
+	u64 slb_vsid_RW;					/* 0x1118 */
+#define SLB_VSID_SUPERVISOR_STATE	(0x1ull << 11)
+#define SLB_VSID_SUPERVISOR_STATE_MASK	(0x1ull << 11)
+#define SLB_VSID_PROBLEM_STATE		(0x1ull << 10)
+#define SLB_VSID_PROBLEM_STATE_MASK	(0x1ull << 10)
+#define SLB_VSID_EXECUTE_SEGMENT	(0x1ull << 9)
+#define SLB_VSID_NO_EXECUTE_SEGMENT	(0x1ull << 9)
+#define SLB_VSID_EXECUTE_SEGMENT_MASK	(0x1ull << 9)
+#define SLB_VSID_4K_PAGE		(0x0 << 8)
+#define SLB_VSID_LARGE_PAGE		(0x1ull << 8)
+#define SLB_VSID_PAGE_SIZE_MASK		(0x1ull << 8)
+#define SLB_VSID_CLASS_MASK		(0x1ull << 7)
+#define SLB_VSID_VIRTUAL_PAGE_SIZE_MASK	(0x1ull << 6)
+	u64 slb_invalidate_entry_W;				/* 0x1120 */
+	u64 slb_invalidate_all_W;				/* 0x1128 */
+	u8  pad_0x1130_0x2000[0x2000 - 0x1130]; 		/* 0x1130 */
+
+	/* Context Save / Restore Area */
+	struct mfc_cq_sr spuq[16];				/* 0x2000 */
+	struct mfc_cq_sr puq[8];				/* 0x2200 */
+	u8  pad_0x2300_0x3000[0x3000 - 0x2300]; 		/* 0x2300 */
+
+	/* MFC Control */
+	u64 mfc_control_RW;					/* 0x3000 */
+#define MFC_CNTL_RESUME_DMA_QUEUE		(0ull << 0)
+#define MFC_CNTL_SUSPEND_DMA_QUEUE		(1ull << 0)
+#define MFC_CNTL_SUSPEND_DMA_QUEUE_MASK		(1ull << 0)
+#define MFC_CNTL_SUSPEND_MASK			(1ull << 4)
+#define MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION	(0ull << 8)
+#define MFC_CNTL_SUSPEND_IN_PROGRESS		(1ull << 8)
+#define MFC_CNTL_SUSPEND_COMPLETE		(3ull << 8)
+#define MFC_CNTL_SUSPEND_DMA_STATUS_MASK	(3ull << 8)
+#define MFC_CNTL_DMA_QUEUES_EMPTY		(1ull << 14)
+#define MFC_CNTL_DMA_QUEUES_EMPTY_MASK		(1ull << 14)
+#define MFC_CNTL_PURGE_DMA_REQUEST		(1ull << 15)
+#define MFC_CNTL_PURGE_DMA_IN_PROGRESS		(1ull << 24)
+#define MFC_CNTL_PURGE_DMA_COMPLETE		(3ull << 24)
+#define MFC_CNTL_PURGE_DMA_STATUS_MASK		(3ull << 24)
+#define MFC_CNTL_RESTART_DMA_COMMAND		(1ull << 32)
+#define MFC_CNTL_DMA_COMMAND_REISSUE_PENDING	(1ull << 32)
+#define MFC_CNTL_DMA_COMMAND_REISSUE_STATUS_MASK (1ull << 32)
+#define MFC_CNTL_MFC_PRIVILEGE_STATE		(2ull << 33)
+#define MFC_CNTL_MFC_PROBLEM_STATE		(3ull << 33)
+#define MFC_CNTL_MFC_KEY_PROTECTION_STATE_MASK	(3ull << 33)
+#define MFC_CNTL_DECREMENTER_HALTED		(1ull << 35)
+#define MFC_CNTL_DECREMENTER_RUNNING		(1ull << 40)
+#define MFC_CNTL_DECREMENTER_STATUS_MASK	(1ull << 40)
+	u8  pad_0x3008_0x4000[0x4000 - 0x3008]; 		/* 0x3008 */
+
+	/* Interrupt Mailbox */
+	u64 puint_mb_R;						/* 0x4000 */
+	u8  pad_0x4008_0x4040[0x4040 - 0x4008]; 		/* 0x4008 */
+
+	/* SPU Control */
+	u64 spu_privcntl_RW;					/* 0x4040 */
+#define SPU_PRIVCNTL_MODE_NORMAL		(0x0ull << 0)
+#define SPU_PRIVCNTL_MODE_SINGLE_STEP		(0x1ull << 0)
+#define SPU_PRIVCNTL_MODE_MASK			(0x1ull << 0)
+#define SPU_PRIVCNTL_NO_ATTENTION_EVENT		(0x0ull << 1)
+#define SPU_PRIVCNTL_ATTENTION_EVENT		(0x1ull << 1)
+#define SPU_PRIVCNTL_ATTENTION_EVENT_MASK	(0x1ull << 1)
+#define SPU_PRIVCNT_LOAD_REQUEST_NORMAL		(0x0ull << 2)
+#define SPU_PRIVCNT_LOAD_REQUEST_ENABLE_MASK	(0x1ull << 2)
+	u8  pad_0x4048_0x4058[0x10];				/* 0x4048 */
+	u64 spu_lslr_RW;					/* 0x4058 */
+	u64 spu_chnlcntptr_RW;					/* 0x4060 */
+	u64 spu_chnlcnt_RW;					/* 0x4068 */
+	u64 spu_chnldata_RW;					/* 0x4070 */
+	u64 spu_cfg_RW;						/* 0x4078 */
+	u8  pad_0x4080_0x5000[0x5000 - 0x4080]; 		/* 0x4080 */
+
+	/* PV2_ImplRegs: Implementation-specific privileged-state 2 regs */
+	u64 spu_pm_trace_tag_status_RW;				/* 0x5000 */
+	u64 spu_tag_status_query_RW;				/* 0x5008 */
+#define TAG_STATUS_QUERY_CONDITION_BITS (0x3ull << 32)
+#define TAG_STATUS_QUERY_MASK_BITS (0xffffffffull)
+	u64 spu_cmd_buf1_RW;					/* 0x5010 */
+#define SPU_COMMAND_BUFFER_1_LSA_BITS (0x7ffffull << 32)
+#define SPU_COMMAND_BUFFER_1_EAH_BITS (0xffffffffull)
+	u64 spu_cmd_buf2_RW;					/* 0x5018 */
+#define SPU_COMMAND_BUFFER_2_EAL_BITS ((0xffffffffull) << 32)
+#define SPU_COMMAND_BUFFER_2_TS_BITS (0xffffull << 16)
+#define SPU_COMMAND_BUFFER_2_TAG_BITS (0x3full)
+	u64 spu_atomic_status_RW;				/* 0x5020 */
+} __attribute__ ((aligned(0x20000)));
+
+/* SPU Privilege 1 State Area */
+struct spu_priv1 {
+	/* Control and Configuration Area */
+	u64 mfc_sr1_RW;						/* 0x000 */
+#define MFC_STATE1_LOCAL_STORAGE_DECODE_MASK	0x01ull
+#define MFC_STATE1_BUS_TLBIE_MASK		0x02ull
+#define MFC_STATE1_REAL_MODE_OFFSET_ENABLE_MASK	0x04ull
+#define MFC_STATE1_PROBLEM_STATE_MASK		0x08ull
+#define MFC_STATE1_RELOCATE_MASK		0x10ull
+#define MFC_STATE1_MASTER_RUN_CONTROL_MASK	0x20ull
+#define MFC_STATE1_TABLE_SEARCH_MASK		0x40ull
+	u64 mfc_lpid_RW;					/* 0x008 */
+	u64 spu_idr_RW;						/* 0x010 */
+	u64 mfc_vr_RO;						/* 0x018 */
+#define MFC_VERSION_BITS		(0xffff << 16)
+#define MFC_REVISION_BITS		(0xffff)
+#define MFC_GET_VERSION_BITS(vr)	(((vr) & MFC_VERSION_BITS) >> 16)
+#define MFC_GET_REVISION_BITS(vr)	((vr) & MFC_REVISION_BITS)
+	u64 spu_vr_RO;						/* 0x020 */
+#define SPU_VERSION_BITS		(0xffff << 16)
+#define SPU_REVISION_BITS		(0xffff)
+#define SPU_GET_VERSION_BITS(vr)	(vr & SPU_VERSION_BITS) >> 16
+#define SPU_GET_REVISION_BITS(vr)	(vr & SPU_REVISION_BITS)
+	u8  pad_0x28_0x100[0x100 - 0x28];			/* 0x28 */
+
+	/* Interrupt Area */
+	u64 int_mask_RW[3];					/* 0x100 */
+#define CLASS0_ENABLE_DMA_ALIGNMENT_INTR		0x1L
+#define CLASS0_ENABLE_INVALID_DMA_COMMAND_INTR		0x2L
+#define CLASS0_ENABLE_SPU_ERROR_INTR			0x4L
+#define CLASS0_ENABLE_MFC_FIR_INTR			0x8L
+#define CLASS1_ENABLE_SEGMENT_FAULT_INTR		0x1L
+#define CLASS1_ENABLE_STORAGE_FAULT_INTR		0x2L
+#define CLASS1_ENABLE_LS_COMPARE_SUSPEND_ON_GET_INTR	0x4L
+#define CLASS1_ENABLE_LS_COMPARE_SUSPEND_ON_PUT_INTR	0x8L
+#define CLASS2_ENABLE_MAILBOX_INTR			0x1L
+#define CLASS2_ENABLE_SPU_STOP_INTR			0x2L
+#define CLASS2_ENABLE_SPU_HALT_INTR			0x4L
+#define CLASS2_ENABLE_SPU_DMA_TAG_GROUP_COMPLETE_INTR	0x8L
+#define CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR		0x10L
+	u8  pad_0x118_0x140[0x28];				/* 0x118 */
+	u64 int_stat_RW[3];					/* 0x140 */
+#define CLASS0_DMA_ALIGNMENT_INTR			0x1L
+#define CLASS0_INVALID_DMA_COMMAND_INTR			0x2L
+#define CLASS0_SPU_ERROR_INTR				0x4L
+#define CLASS0_INTR_MASK				0x7L
+#define CLASS1_SEGMENT_FAULT_INTR			0x1L
+#define CLASS1_STORAGE_FAULT_INTR			0x2L
+#define CLASS1_LS_COMPARE_SUSPEND_ON_GET_INTR		0x4L
+#define CLASS1_LS_COMPARE_SUSPEND_ON_PUT_INTR		0x8L
+#define CLASS1_INTR_MASK				0xfL
+#define CLASS2_MAILBOX_INTR				0x1L
+#define CLASS2_SPU_STOP_INTR				0x2L
+#define CLASS2_SPU_HALT_INTR				0x4L
+#define CLASS2_SPU_DMA_TAG_GROUP_COMPLETE_INTR		0x8L
+#define CLASS2_MAILBOX_THRESHOLD_INTR			0x10L
+#define CLASS2_INTR_MASK				0x1fL
+	u8  pad_0x158_0x180[0x28];				/* 0x158 */
+	u64 int_route_RW;					/* 0x180 */
+
+	/* Interrupt Routing */
+	u8  pad_0x188_0x200[0x200 - 0x188];			/* 0x188 */
+
+	/* Atomic Unit Control Area */
+	u64 mfc_atomic_flush_RW;				/* 0x200 */
+#define mfc_atomic_flush_enable			0x1L
+	u8  pad_0x208_0x280[0x78];				/* 0x208 */
+	u64 resource_allocation_groupID_RW;			/* 0x280 */
+	u64 resource_allocation_enable_RW; 			/* 0x288 */
+	u8  pad_0x290_0x3c8[0x3c8 - 0x290];			/* 0x290 */
+
+	/* SPU_Cache_ImplRegs: Implementation-dependent cache registers */
+
+	u64 smf_sbi_signal_sel;					/* 0x3c8 */
+#define smf_sbi_mask_lsb	56
+#define smf_sbi_shift		(63 - smf_sbi_mask_lsb)
+#define smf_sbi_mask		(0x301LL << smf_sbi_shift)
+#define smf_sbi_bus0_bits	(0x001LL << smf_sbi_shift)
+#define smf_sbi_bus2_bits	(0x100LL << smf_sbi_shift)
+#define smf_sbi2_bus0_bits	(0x201LL << smf_sbi_shift)
+#define smf_sbi2_bus2_bits	(0x300LL << smf_sbi_shift)
+	u64 smf_ato_signal_sel;					/* 0x3d0 */
+#define smf_ato_mask_lsb	35
+#define smf_ato_shift		(63 - smf_ato_mask_lsb)
+#define smf_ato_mask		(0x3LL << smf_ato_shift)
+#define smf_ato_bus0_bits	(0x2LL << smf_ato_shift)
+#define smf_ato_bus2_bits	(0x1LL << smf_ato_shift)
+	u8  pad_0x3d8_0x400[0x400 - 0x3d8];			/* 0x3d8 */
+
+	/* TLB Management Registers */
+	u64 mfc_sdr_RW;						/* 0x400 */
+	u8  pad_0x408_0x500[0xf8];				/* 0x408 */
+	u64 tlb_index_hint_RO;					/* 0x500 */
+	u64 tlb_index_W;					/* 0x508 */
+	u64 tlb_vpn_RW;						/* 0x510 */
+	u64 tlb_rpn_RW;						/* 0x518 */
+	u8  pad_0x520_0x540[0x20];				/* 0x520 */
+	u64 tlb_invalidate_entry_W;				/* 0x540 */
+	u64 tlb_invalidate_all_W;				/* 0x548 */
+	u8  pad_0x550_0x580[0x580 - 0x550];			/* 0x550 */
+
+	/* SPU_MMU_ImplRegs: Implementation-dependent MMU registers */
+	u64 smm_hid;						/* 0x580 */
+#define PAGE_SIZE_MASK		0xf000000000000000ull
+#define PAGE_SIZE_16MB_64KB	0x2000000000000000ull
+	u8  pad_0x588_0x600[0x600 - 0x588];			/* 0x588 */
+
+	/* MFC Status/Control Area */
+	u64 mfc_accr_RW;					/* 0x600 */
+#define MFC_ACCR_EA_ACCESS_GET		(1 << 0)
+#define MFC_ACCR_EA_ACCESS_PUT		(1 << 1)
+#define MFC_ACCR_LS_ACCESS_GET		(1 << 3)
+#define MFC_ACCR_LS_ACCESS_PUT		(1 << 4)
+	u8  pad_0x608_0x610[0x8];				/* 0x608 */
+	u64 mfc_dsisr_RW;					/* 0x610 */
+#define MFC_DSISR_PTE_NOT_FOUND		(1 << 30)
+#define MFC_DSISR_ACCESS_DENIED		(1 << 27)
+#define MFC_DSISR_ATOMIC		(1 << 26)
+#define MFC_DSISR_ACCESS_PUT		(1 << 25)
+#define MFC_DSISR_ADDR_MATCH		(1 << 22)
+#define MFC_DSISR_LS			(1 << 17)
+#define MFC_DSISR_L			(1 << 16)
+#define MFC_DSISR_ADDRESS_OVERFLOW	(1 << 0)
+	u8  pad_0x618_0x620[0x8];				/* 0x618 */
+	u64 mfc_dar_RW;						/* 0x620 */
+	u8  pad_0x628_0x700[0x700 - 0x628];			/* 0x628 */
+
+	/* Replacement Management Table (RMT) Area */
+	u64 rmt_index_RW;					/* 0x700 */
+	u8  pad_0x708_0x710[0x8];				/* 0x708 */
+	u64 rmt_data1_RW;					/* 0x710 */
+	u8  pad_0x718_0x800[0x800 - 0x718];			/* 0x718 */
+
+	/* Control/Configuration Registers */
+	u64 mfc_dsir_R;						/* 0x800 */
+#define MFC_DSIR_Q			(1 << 31)
+#define MFC_DSIR_SPU_QUEUE		MFC_DSIR_Q
+	u64 mfc_lsacr_RW;					/* 0x808 */
+#define MFC_LSACR_COMPARE_MASK		((~0ull) << 32)
+#define MFC_LSACR_COMPARE_ADDR		((~0ull) >> 32)
+	u64 mfc_lscrr_R;					/* 0x810 */
+#define MFC_LSCRR_Q			(1 << 31)
+#define MFC_LSCRR_SPU_QUEUE		MFC_LSCRR_Q
+#define MFC_LSCRR_QI_SHIFT		32
+#define MFC_LSCRR_QI_MASK		((~0ull) << MFC_LSCRR_QI_SHIFT)
+	u8  pad_0x818_0x820[0x8];				/* 0x818 */
+	u64 mfc_tclass_id_RW;					/* 0x820 */
+#define MFC_TCLASS_ID_ENABLE		(1L << 0L)
+#define MFC_TCLASS_SLOT2_ENABLE		(1L << 5L)
+#define MFC_TCLASS_SLOT1_ENABLE		(1L << 6L)
+#define MFC_TCLASS_SLOT0_ENABLE		(1L << 7L)
+#define MFC_TCLASS_QUOTA_2_SHIFT	8L
+#define MFC_TCLASS_QUOTA_1_SHIFT	16L
+#define MFC_TCLASS_QUOTA_0_SHIFT	24L
+#define MFC_TCLASS_QUOTA_2_MASK		(0x1FL << MFC_TCLASS_QUOTA_2_SHIFT)
+#define MFC_TCLASS_QUOTA_1_MASK		(0x1FL << MFC_TCLASS_QUOTA_1_SHIFT)
+#define MFC_TCLASS_QUOTA_0_MASK		(0x1FL << MFC_TCLASS_QUOTA_0_SHIFT)
+	u8  pad_0x828_0x900[0x900 - 0x828];			/* 0x828 */
+
+	/* Real Mode Support Registers */
+	u64 mfc_rm_boundary;					/* 0x900 */
+	u8  pad_0x908_0x938[0x30];				/* 0x908 */
+	u64 smf_dma_signal_sel;					/* 0x938 */
+#define mfc_dma1_mask_lsb	41
+#define mfc_dma1_shift		(63 - mfc_dma1_mask_lsb)
+#define mfc_dma1_mask		(0x3LL << mfc_dma1_shift)
+#define mfc_dma1_bits		(0x1LL << mfc_dma1_shift)
+#define mfc_dma2_mask_lsb	43
+#define mfc_dma2_shift		(63 - mfc_dma2_mask_lsb)
+#define mfc_dma2_mask		(0x3LL << mfc_dma2_shift)
+#define mfc_dma2_bits		(0x1LL << mfc_dma2_shift)
+	u8  pad_0x940_0xa38[0xf8];				/* 0x940 */
+	u64 smm_signal_sel;					/* 0xa38 */
+#define smm_sig_mask_lsb	12
+#define smm_sig_shift		(63 - smm_sig_mask_lsb)
+#define smm_sig_mask		(0x3LL << smm_sig_shift)
+#define smm_sig_bus0_bits	(0x2LL << smm_sig_shift)
+#define smm_sig_bus2_bits	(0x1LL << smm_sig_shift)
+	u8  pad_0xa40_0xc00[0xc00 - 0xa40];			/* 0xa40 */
+
+	/* DMA Command Error Area */
+	u64 mfc_cer_R;						/* 0xc00 */
+#define MFC_CER_Q		(1 << 31)
+#define MFC_CER_SPU_QUEUE	MFC_CER_Q
+	u8  pad_0xc08_0x1000[0x1000 - 0xc08];			/* 0xc08 */
+
+	/* PV1_ImplRegs: Implementation-dependent privileged-state 1 regs */
+	/* DMA Command Error Area */
+	u64 spu_ecc_cntl_RW;					/* 0x1000 */
+#define SPU_ECC_CNTL_E			(1ull << 0ull)
+#define SPU_ECC_CNTL_ENABLE		SPU_ECC_CNTL_E
+#define SPU_ECC_CNTL_DISABLE		(~SPU_ECC_CNTL_E & 1L)
+#define SPU_ECC_CNTL_S			(1ull << 1ull)
+#define SPU_ECC_STOP_AFTER_ERROR	SPU_ECC_CNTL_S
+#define SPU_ECC_CONTINUE_AFTER_ERROR	(~SPU_ECC_CNTL_S & 2L)
+#define SPU_ECC_CNTL_B			(1ull << 2ull)
+#define SPU_ECC_BACKGROUND_ENABLE	SPU_ECC_CNTL_B
+#define SPU_ECC_BACKGROUND_DISABLE	(~SPU_ECC_CNTL_B & 4L)
+#define SPU_ECC_CNTL_I_SHIFT		3ull
+#define SPU_ECC_CNTL_I_MASK		(3ull << SPU_ECC_CNTL_I_SHIFT)
+#define SPU_ECC_WRITE_ALWAYS		(~SPU_ECC_CNTL_I & 12L)
+#define SPU_ECC_WRITE_CORRECTABLE	(1ull << SPU_ECC_CNTL_I_SHIFT)
+#define SPU_ECC_WRITE_UNCORRECTABLE	(3ull << SPU_ECC_CNTL_I_SHIFT)
+#define SPU_ECC_CNTL_D			(1ull << 5ull)
+#define SPU_ECC_DETECTION_ENABLE	SPU_ECC_CNTL_D
+#define SPU_ECC_DETECTION_DISABLE	(~SPU_ECC_CNTL_D & 32L)
+	u64 spu_ecc_stat_RW;					/* 0x1008 */
+#define SPU_ECC_CORRECTED_ERROR		(1ull << 0ul)
+#define SPU_ECC_UNCORRECTED_ERROR	(1ull << 1ul)
+#define SPU_ECC_SCRUB_COMPLETE		(1ull << 2ul)
+#define SPU_ECC_SCRUB_IN_PROGRESS	(1ull << 3ul)
+#define SPU_ECC_INSTRUCTION_ERROR	(1ull << 4ul)
+#define SPU_ECC_DATA_ERROR		(1ull << 5ul)
+#define SPU_ECC_DMA_ERROR		(1ull << 6ul)
+#define SPU_ECC_STATUS_CNT_MASK		(256ull << 8)
+	u64 spu_ecc_addr_RW;					/* 0x1010 */
+	u64 spu_err_mask_RW;					/* 0x1018 */
+#define SPU_ERR_ILLEGAL_INSTR		(1ull << 0ul)
+#define SPU_ERR_ILLEGAL_CHANNEL		(1ull << 1ul)
+	u8  pad_0x1020_0x1028[0x1028 - 0x1020];			/* 0x1020 */
+
+	/* SPU Debug-Trace Bus (DTB) Selection Registers */
+	u64 spu_trig0_sel;					/* 0x1028 */
+	u64 spu_trig1_sel;					/* 0x1030 */
+	u64 spu_trig2_sel;					/* 0x1038 */
+	u64 spu_trig3_sel;					/* 0x1040 */
+	u64 spu_trace_sel;					/* 0x1048 */
+#define spu_trace_sel_mask		0x1f1fLL
+#define spu_trace_sel_bus0_bits		0x1000LL
+#define spu_trace_sel_bus2_bits		0x0010LL
+	u64 spu_event0_sel;					/* 0x1050 */
+	u64 spu_event1_sel;					/* 0x1058 */
+	u64 spu_event2_sel;					/* 0x1060 */
+	u64 spu_event3_sel;					/* 0x1068 */
+	u64 spu_trace_cntl;					/* 0x1070 */
+} __attribute__ ((aligned(0x2000)));
+
+#endif /* __KERNEL__ */
+#endif
diff --git a/arch/powerpc/include/asm/spu_csa.h b/arch/powerpc/include/asm/spu_csa.h
new file mode 100644
index 0000000000..c33df961c0
--- /dev/null
+++ b/arch/powerpc/include/asm/spu_csa.h
@@ -0,0 +1,247 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * spu_csa.h: Definitions for SPU context save area (CSA).
+ *
+ * (C) Copyright IBM 2005
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ */
+
+#ifndef _SPU_CSA_H_
+#define _SPU_CSA_H_
+#ifdef __KERNEL__
+
+/*
+ * Total number of 128-bit registers.
+ */
+#define NR_SPU_GPRS         	128
+#define NR_SPU_SPRS         	9
+#define NR_SPU_REGS_PAD	    	7
+#define NR_SPU_SPILL_REGS   	144	/* GPRS + SPRS + PAD */
+#define SIZEOF_SPU_SPILL_REGS	NR_SPU_SPILL_REGS * 16
+
+#define SPU_SAVE_COMPLETE      	0x3FFB
+#define SPU_RESTORE_COMPLETE   	0x3FFC
+
+/*
+ * Definitions for various 'stopped' status conditions,
+ * to be recreated during context restore.
+ */
+#define SPU_STOPPED_STATUS_P    1
+#define SPU_STOPPED_STATUS_I    2
+#define SPU_STOPPED_STATUS_H    3
+#define SPU_STOPPED_STATUS_S    4
+#define SPU_STOPPED_STATUS_S_I  5
+#define SPU_STOPPED_STATUS_S_P  6
+#define SPU_STOPPED_STATUS_P_H  7
+#define SPU_STOPPED_STATUS_P_I  8
+#define SPU_STOPPED_STATUS_R    9
+
+/*
+ * Definitions for software decrementer status flag.
+ */
+#define SPU_DECR_STATUS_RUNNING 0x1
+#define SPU_DECR_STATUS_WRAPPED 0x2
+
+#ifndef  __ASSEMBLY__
+/**
+ * spu_reg128 - generic 128-bit register definition.
+ */
+struct spu_reg128 {
+	u32 slot[4];
+};
+
+/**
+ * struct spu_lscsa - Local Store Context Save Area.
+ * @gprs: Array of saved registers.
+ * @fpcr: Saved floating point status control register.
+ * @decr: Saved decrementer value.
+ * @decr_status: Indicates software decrementer status flags.
+ * @ppu_mb: Saved PPU mailbox data.
+ * @ppuint_mb: Saved PPU interrupting mailbox data.
+ * @tag_mask: Saved tag group mask.
+ * @event_mask: Saved event mask.
+ * @srr0: Saved SRR0.
+ * @stopped_status: Conditions to be recreated by restore.
+ * @ls: Saved contents of Local Storage Area.
+ *
+ * The LSCSA represents state that is primarily saved and
+ * restored by SPU-side code.
+ */
+struct spu_lscsa {
+	struct spu_reg128 gprs[128];
+	struct spu_reg128 fpcr;
+	struct spu_reg128 decr;
+	struct spu_reg128 decr_status;
+	struct spu_reg128 ppu_mb;
+	struct spu_reg128 ppuint_mb;
+	struct spu_reg128 tag_mask;
+	struct spu_reg128 event_mask;
+	struct spu_reg128 srr0;
+	struct spu_reg128 stopped_status;
+
+	/*
+	 * 'ls' must be page-aligned on all configurations.
+	 * Since we don't want to rely on having the spu-gcc
+	 * installed to build the kernel and this structure
+	 * is used in the SPU-side code, make it 64k-page
+	 * aligned for now.
+	 */
+	unsigned char ls[LS_SIZE] __attribute__((aligned(65536)));
+};
+
+#ifndef __SPU__
+/*
+ * struct spu_problem_collapsed - condensed problem state area, w/o pads.
+ */
+struct spu_problem_collapsed {
+	u64 spc_mssync_RW;
+	u32 mfc_lsa_W;
+	u32 unused_pad0;
+	u64 mfc_ea_W;
+	union mfc_tag_size_class_cmd mfc_union_W;
+	u32 dma_qstatus_R;
+	u32 dma_querytype_RW;
+	u32 dma_querymask_RW;
+	u32 dma_tagstatus_R;
+	u32 pu_mb_R;
+	u32 spu_mb_W;
+	u32 mb_stat_R;
+	u32 spu_runcntl_RW;
+	u32 spu_status_R;
+	u32 spu_spc_R;
+	u32 spu_npc_RW;
+	u32 signal_notify1;
+	u32 signal_notify2;
+	u32 unused_pad1;
+};
+
+/*
+ * struct spu_priv1_collapsed - condensed privileged 1 area, w/o pads.
+ */
+struct spu_priv1_collapsed {
+	u64 mfc_sr1_RW;
+	u64 mfc_lpid_RW;
+	u64 spu_idr_RW;
+	u64 mfc_vr_RO;
+	u64 spu_vr_RO;
+	u64 int_mask_class0_RW;
+	u64 int_mask_class1_RW;
+	u64 int_mask_class2_RW;
+	u64 int_stat_class0_RW;
+	u64 int_stat_class1_RW;
+	u64 int_stat_class2_RW;
+	u64 int_route_RW;
+	u64 mfc_atomic_flush_RW;
+	u64 resource_allocation_groupID_RW;
+	u64 resource_allocation_enable_RW;
+	u64 mfc_fir_R;
+	u64 mfc_fir_status_or_W;
+	u64 mfc_fir_status_and_W;
+	u64 mfc_fir_mask_R;
+	u64 mfc_fir_mask_or_W;
+	u64 mfc_fir_mask_and_W;
+	u64 mfc_fir_chkstp_enable_RW;
+	u64 smf_sbi_signal_sel;
+	u64 smf_ato_signal_sel;
+	u64 tlb_index_hint_RO;
+	u64 tlb_index_W;
+	u64 tlb_vpn_RW;
+	u64 tlb_rpn_RW;
+	u64 tlb_invalidate_entry_W;
+	u64 tlb_invalidate_all_W;
+	u64 smm_hid;
+	u64 mfc_accr_RW;
+	u64 mfc_dsisr_RW;
+	u64 mfc_dar_RW;
+	u64 rmt_index_RW;
+	u64 rmt_data1_RW;
+	u64 mfc_dsir_R;
+	u64 mfc_lsacr_RW;
+	u64 mfc_lscrr_R;
+	u64 mfc_tclass_id_RW;
+	u64 mfc_rm_boundary;
+	u64 smf_dma_signal_sel;
+	u64 smm_signal_sel;
+	u64 mfc_cer_R;
+	u64 pu_ecc_cntl_RW;
+	u64 pu_ecc_stat_RW;
+	u64 spu_ecc_addr_RW;
+	u64 spu_err_mask_RW;
+	u64 spu_trig0_sel;
+	u64 spu_trig1_sel;
+	u64 spu_trig2_sel;
+	u64 spu_trig3_sel;
+	u64 spu_trace_sel;
+	u64 spu_event0_sel;
+	u64 spu_event1_sel;
+	u64 spu_event2_sel;
+	u64 spu_event3_sel;
+	u64 spu_trace_cntl;
+};
+
+/*
+ * struct spu_priv2_collapsed - condensed privileged 2 area, w/o pads.
+ */
+struct spu_priv2_collapsed {
+	u64 slb_index_W;
+	u64 slb_esid_RW;
+	u64 slb_vsid_RW;
+	u64 slb_invalidate_entry_W;
+	u64 slb_invalidate_all_W;
+	struct mfc_cq_sr spuq[16];
+	struct mfc_cq_sr puq[8];
+	u64 mfc_control_RW;
+	u64 puint_mb_R;
+	u64 spu_privcntl_RW;
+	u64 spu_lslr_RW;
+	u64 spu_chnlcntptr_RW;
+	u64 spu_chnlcnt_RW;
+	u64 spu_chnldata_RW;
+	u64 spu_cfg_RW;
+	u64 spu_tag_status_query_RW;
+	u64 spu_cmd_buf1_RW;
+	u64 spu_cmd_buf2_RW;
+	u64 spu_atomic_status_RW;
+};
+
+/**
+ * struct spu_state
+ * @lscsa: Local Store Context Save Area.
+ * @prob: Collapsed Problem State Area, w/o pads.
+ * @priv1: Collapsed Privileged 1 Area, w/o pads.
+ * @priv2: Collapsed Privileged 2 Area, w/o pads.
+ * @spu_chnlcnt_RW: Array of saved channel counts.
+ * @spu_chnldata_RW: Array of saved channel data.
+ * @suspend_time: Time stamp when decrementer disabled.
+ *
+ * Structure representing the whole of the SPU
+ * context save area (CSA).  This struct contains
+ * all of the state necessary to suspend and then
+ * later optionally resume execution of an SPU
+ * context.
+ *
+ * The @lscsa region is by far the largest, and is
+ * allocated separately so that it may either be
+ * pinned or mapped to/from application memory, as
+ * appropriate for the OS environment.
+ */
+struct spu_state {
+	struct spu_lscsa *lscsa;
+	struct spu_problem_collapsed prob;
+	struct spu_priv1_collapsed priv1;
+	struct spu_priv2_collapsed priv2;
+	u64 spu_chnlcnt_RW[32];
+	u64 spu_chnldata_RW[32];
+	u32 spu_mailbox_data[4];
+	u32 pu_mailbox_data[1];
+	u64 class_0_dar, class_0_pending;
+	u64 class_1_dar, class_1_dsisr;
+	unsigned long suspend_time;
+	spinlock_t register_lock;
+};
+
+#endif /* !__SPU__ */
+#endif /* __KERNEL__ */
+#endif /* !__ASSEMBLY__ */
+#endif /* _SPU_CSA_H_ */
diff --git a/arch/powerpc/include/asm/spu_info.h b/arch/powerpc/include/asm/spu_info.h
new file mode 100644
index 0000000000..732431034a
--- /dev/null
+++ b/arch/powerpc/include/asm/spu_info.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SPU info structures
+ *
+ * (C) Copyright 2006 IBM Corp.
+ *
+ * Author: Dwayne Grant McConnell <decimal@us.ibm.com>
+ */
+#ifndef _SPU_INFO_H
+#define _SPU_INFO_H
+
+#include <asm/spu.h>
+#include <uapi/asm/spu_info.h>
+
+#endif
diff --git a/arch/powerpc/include/asm/spu_priv1.h b/arch/powerpc/include/asm/spu_priv1.h
new file mode 100644
index 0000000000..2167d756e6
--- /dev/null
+++ b/arch/powerpc/include/asm/spu_priv1.h
@@ -0,0 +1,224 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Defines an spu hypervisor abstraction layer.
+ *
+ *  Copyright 2006 Sony Corp.
+ */
+
+#if !defined(_SPU_PRIV1_H)
+#define _SPU_PRIV1_H
+#if defined(__KERNEL__)
+
+#include <linux/types.h>
+
+struct spu;
+struct spu_context;
+
+/* access to priv1 registers */
+
+struct spu_priv1_ops {
+	void (*int_mask_and) (struct spu *spu, int class, u64 mask);
+	void (*int_mask_or) (struct spu *spu, int class, u64 mask);
+	void (*int_mask_set) (struct spu *spu, int class, u64 mask);
+	u64 (*int_mask_get) (struct spu *spu, int class);
+	void (*int_stat_clear) (struct spu *spu, int class, u64 stat);
+	u64 (*int_stat_get) (struct spu *spu, int class);
+	void (*cpu_affinity_set) (struct spu *spu, int cpu);
+	u64 (*mfc_dar_get) (struct spu *spu);
+	u64 (*mfc_dsisr_get) (struct spu *spu);
+	void (*mfc_dsisr_set) (struct spu *spu, u64 dsisr);
+	void (*mfc_sdr_setup) (struct spu *spu);
+	void (*mfc_sr1_set) (struct spu *spu, u64 sr1);
+	u64 (*mfc_sr1_get) (struct spu *spu);
+	void (*mfc_tclass_id_set) (struct spu *spu, u64 tclass_id);
+	u64 (*mfc_tclass_id_get) (struct spu *spu);
+	void (*tlb_invalidate) (struct spu *spu);
+	void (*resource_allocation_groupID_set) (struct spu *spu, u64 id);
+	u64 (*resource_allocation_groupID_get) (struct spu *spu);
+	void (*resource_allocation_enable_set) (struct spu *spu, u64 enable);
+	u64 (*resource_allocation_enable_get) (struct spu *spu);
+};
+
+extern const struct spu_priv1_ops* spu_priv1_ops;
+
+static inline void
+spu_int_mask_and (struct spu *spu, int class, u64 mask)
+{
+	spu_priv1_ops->int_mask_and(spu, class, mask);
+}
+
+static inline void
+spu_int_mask_or (struct spu *spu, int class, u64 mask)
+{
+	spu_priv1_ops->int_mask_or(spu, class, mask);
+}
+
+static inline void
+spu_int_mask_set (struct spu *spu, int class, u64 mask)
+{
+	spu_priv1_ops->int_mask_set(spu, class, mask);
+}
+
+static inline u64
+spu_int_mask_get (struct spu *spu, int class)
+{
+	return spu_priv1_ops->int_mask_get(spu, class);
+}
+
+static inline void
+spu_int_stat_clear (struct spu *spu, int class, u64 stat)
+{
+	spu_priv1_ops->int_stat_clear(spu, class, stat);
+}
+
+static inline u64
+spu_int_stat_get (struct spu *spu, int class)
+{
+	return spu_priv1_ops->int_stat_get (spu, class);
+}
+
+static inline void
+spu_cpu_affinity_set (struct spu *spu, int cpu)
+{
+	spu_priv1_ops->cpu_affinity_set(spu, cpu);
+}
+
+static inline u64
+spu_mfc_dar_get (struct spu *spu)
+{
+	return spu_priv1_ops->mfc_dar_get(spu);
+}
+
+static inline u64
+spu_mfc_dsisr_get (struct spu *spu)
+{
+	return spu_priv1_ops->mfc_dsisr_get(spu);
+}
+
+static inline void
+spu_mfc_dsisr_set (struct spu *spu, u64 dsisr)
+{
+	spu_priv1_ops->mfc_dsisr_set(spu, dsisr);
+}
+
+static inline void
+spu_mfc_sdr_setup (struct spu *spu)
+{
+	spu_priv1_ops->mfc_sdr_setup(spu);
+}
+
+static inline void
+spu_mfc_sr1_set (struct spu *spu, u64 sr1)
+{
+	spu_priv1_ops->mfc_sr1_set(spu, sr1);
+}
+
+static inline u64
+spu_mfc_sr1_get (struct spu *spu)
+{
+	return spu_priv1_ops->mfc_sr1_get(spu);
+}
+
+static inline void
+spu_mfc_tclass_id_set (struct spu *spu, u64 tclass_id)
+{
+	spu_priv1_ops->mfc_tclass_id_set(spu, tclass_id);
+}
+
+static inline u64
+spu_mfc_tclass_id_get (struct spu *spu)
+{
+	return spu_priv1_ops->mfc_tclass_id_get(spu);
+}
+
+static inline void
+spu_tlb_invalidate (struct spu *spu)
+{
+	spu_priv1_ops->tlb_invalidate(spu);
+}
+
+static inline void
+spu_resource_allocation_groupID_set (struct spu *spu, u64 id)
+{
+	spu_priv1_ops->resource_allocation_groupID_set(spu, id);
+}
+
+static inline u64
+spu_resource_allocation_groupID_get (struct spu *spu)
+{
+	return spu_priv1_ops->resource_allocation_groupID_get(spu);
+}
+
+static inline void
+spu_resource_allocation_enable_set (struct spu *spu, u64 enable)
+{
+	spu_priv1_ops->resource_allocation_enable_set(spu, enable);
+}
+
+static inline u64
+spu_resource_allocation_enable_get (struct spu *spu)
+{
+	return spu_priv1_ops->resource_allocation_enable_get(spu);
+}
+
+/* spu management abstraction */
+
+struct spu_management_ops {
+	int (*enumerate_spus)(int (*fn)(void *data));
+	int (*create_spu)(struct spu *spu, void *data);
+	int (*destroy_spu)(struct spu *spu);
+	void (*enable_spu)(struct spu_context *ctx);
+	void (*disable_spu)(struct spu_context *ctx);
+	int (*init_affinity)(void);
+};
+
+extern const struct spu_management_ops* spu_management_ops;
+
+static inline int
+spu_enumerate_spus (int (*fn)(void *data))
+{
+	return spu_management_ops->enumerate_spus(fn);
+}
+
+static inline int
+spu_create_spu (struct spu *spu, void *data)
+{
+	return spu_management_ops->create_spu(spu, data);
+}
+
+static inline int
+spu_destroy_spu (struct spu *spu)
+{
+	return spu_management_ops->destroy_spu(spu);
+}
+
+static inline int
+spu_init_affinity (void)
+{
+	return spu_management_ops->init_affinity();
+}
+
+static inline void
+spu_enable_spu (struct spu_context *ctx)
+{
+	spu_management_ops->enable_spu(ctx);
+}
+
+static inline void
+spu_disable_spu (struct spu_context *ctx)
+{
+	spu_management_ops->disable_spu(ctx);
+}
+
+/*
+ * The declarations following are put here for convenience
+ * and only intended to be used by the platform setup code.
+ */
+
+extern const struct spu_priv1_ops spu_priv1_mmio_ops;
+extern const struct spu_priv1_ops spu_priv1_beat_ops;
+
+extern const struct spu_management_ops spu_management_of_ops;
+
+#endif /* __KERNEL__ */
+#endif
diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
new file mode 100644
index 0000000000..50950deedb
--- /dev/null
+++ b/arch/powerpc/include/asm/sstep.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ */
+#include <asm/inst.h>
+
+struct pt_regs;
+
+/*
+ * We don't allow single-stepping an mtmsrd that would clear
+ * MSR_RI, since that would make the exception unrecoverable.
+ * Since we need to single-step to proceed from a breakpoint,
+ * we don't allow putting a breakpoint on an mtmsrd instruction.
+ * Similarly we don't allow breakpoints on rfid instructions.
+ * These macros tell us if an instruction is a mtmsrd or rfid.
+ * Note that these return true for both mtmsr/rfi (32-bit)
+ * and mtmsrd/rfid (64-bit).
+ */
+#define IS_MTMSRD(instr)	((ppc_inst_val(instr) & 0xfc0007be) == 0x7c000124)
+#define IS_RFID(instr)		((ppc_inst_val(instr) & 0xfc0007be) == 0x4c000024)
+
+enum instruction_type {
+	COMPUTE,		/* arith/logical/CR op, etc. */
+	LOAD,			/* load and store types need to be contiguous */
+	LOAD_MULTI,
+	LOAD_FP,
+	LOAD_VMX,
+	LOAD_VSX,
+	STORE,
+	STORE_MULTI,
+	STORE_FP,
+	STORE_VMX,
+	STORE_VSX,
+	LARX,
+	STCX,
+	BRANCH,
+	MFSPR,
+	MTSPR,
+	CACHEOP,
+	BARRIER,
+	SYSCALL,
+	SYSCALL_VECTORED_0,
+	MFMSR,
+	MTMSR,
+	RFI,
+	INTERRUPT,
+	UNKNOWN
+};
+
+#define INSTR_TYPE_MASK	0x1f
+
+#define OP_IS_LOAD(type)	((LOAD <= (type) && (type) <= LOAD_VSX) || (type) == LARX)
+#define OP_IS_STORE(type)	((STORE <= (type) && (type) <= STORE_VSX) || (type) == STCX)
+#define OP_IS_LOAD_STORE(type)	(LOAD <= (type) && (type) <= STCX)
+
+/* Compute flags, ORed in with type */
+#define SETREG		0x20
+#define SETCC		0x40
+#define SETXER		0x80
+
+/* Branch flags, ORed in with type */
+#define SETLK		0x20
+#define BRTAKEN		0x40
+#define DECCTR		0x80
+
+/* Load/store flags, ORed in with type */
+#define SIGNEXT		0x20
+#define UPDATE		0x40	/* matches bit in opcode 31 instructions */
+#define BYTEREV		0x80
+#define FPCONV		0x100
+
+/* Barrier type field, ORed in with type */
+#define BARRIER_MASK	0xe0
+#define BARRIER_SYNC	0x00
+#define BARRIER_ISYNC	0x20
+#define BARRIER_EIEIO	0x40
+#define BARRIER_LWSYNC	0x60
+#define BARRIER_PTESYNC	0x80
+
+/* Cacheop values, ORed in with type */
+#define CACHEOP_MASK	0x700
+#define DCBST		0
+#define DCBF		0x100
+#define DCBTST		0x200
+#define DCBT		0x300
+#define ICBI		0x400
+#define DCBZ		0x500
+
+/* VSX flags values */
+#define VSX_FPCONV	1	/* do floating point SP/DP conversion */
+#define VSX_SPLAT	2	/* store loaded value into all elements */
+#define VSX_LDLEFT	4	/* load VSX register from left */
+#define VSX_CHECK_VEC	8	/* check MSR_VEC not MSR_VSX for reg >= 32 */
+
+/* Prefixed flag, ORed in with type */
+#define PREFIXED       0x800
+
+/* Size field in type word */
+#define SIZE(n)		((n) << 12)
+#define GETSIZE(w)	((w) >> 12)
+
+#define GETTYPE(t)	((t) & INSTR_TYPE_MASK)
+#define GETLENGTH(t)   (((t) & PREFIXED) ? 8 : 4)
+
+#define MKOP(t, f, s)	((t) | (f) | SIZE(s))
+
+/* Prefix instruction operands */
+#define GET_PREFIX_RA(i)	(((i) >> 16) & 0x1f)
+#define GET_PREFIX_R(i)		((i) & (1ul << 20))
+
+extern s32 patch__exec_instr;
+
+struct instruction_op {
+	int type;
+	int reg;
+	unsigned long val;
+	/* For LOAD/STORE/LARX/STCX */
+	unsigned long ea;
+	int update_reg;
+	/* For MFSPR */
+	int spr;
+	u32 ccval;
+	u32 xerval;
+	u8 element_size;	/* for VSX/VMX loads/stores */
+	u8 vsx_flags;
+};
+
+union vsx_reg {
+	u8	b[16];
+	u16	h[8];
+	u32	w[4];
+	unsigned long d[2];
+	float	fp[4];
+	double	dp[2];
+	__vector128 v;
+};
+
+/*
+ * Decode an instruction, and return information about it in *op
+ * without changing *regs.
+ *
+ * Return value is 1 if the instruction can be emulated just by
+ * updating *regs with the information in *op, -1 if we need the
+ * GPRs but *regs doesn't contain the full register set, or 0
+ * otherwise.
+ */
+extern int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
+			 ppc_inst_t instr);
+
+/*
+ * Emulate an instruction that can be executed just by updating
+ * fields in *regs.
+ */
+void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op);
+
+/*
+ * Emulate instructions that cause a transfer of control,
+ * arithmetic/logical instructions, loads and stores,
+ * cache operations and barriers.
+ *
+ * Returns 1 if the instruction was emulated successfully,
+ * 0 if it could not be emulated, or -1 for an instruction that
+ * should not be emulated (rfid, mtmsrd clearing MSR_RI, etc.).
+ */
+int emulate_step(struct pt_regs *regs, ppc_inst_t instr);
+
+/*
+ * Emulate a load or store instruction by reading/writing the
+ * memory of the current process.  FP/VMX/VSX registers are assumed
+ * to hold live values if the appropriate enable bit in regs->msr is
+ * set; otherwise this will use the saved values in the thread struct
+ * for user-mode accesses.
+ */
+extern int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op);
+
+extern void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg,
+			     const void *mem, bool cross_endian);
+extern void emulate_vsx_store(struct instruction_op *op,
+			      const union vsx_reg *reg, void *mem,
+			      bool cross_endian);
+extern int emulate_dcbz(unsigned long ea, struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h
new file mode 100644
index 0000000000..283c346478
--- /dev/null
+++ b/arch/powerpc/include/asm/stackprotector.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * GCC stack protector support.
+ *
+ */
+
+#ifndef _ASM_STACKPROTECTOR_H
+#define _ASM_STACKPROTECTOR_H
+
+#include <asm/reg.h>
+#include <asm/current.h>
+#include <asm/paca.h>
+
+/*
+ * Initialize the stackprotector canary value.
+ *
+ * NOTE: this must only be called from functions that never return,
+ * and it must always be inlined.
+ */
+static __always_inline void boot_init_stack_canary(void)
+{
+	unsigned long canary = get_random_canary();
+
+	current->stack_canary = canary;
+#ifdef CONFIG_PPC64
+	get_paca()->canary = canary;
+#endif
+}
+
+#endif	/* _ASM_STACKPROTECTOR_H */
diff --git a/arch/powerpc/include/asm/stacktrace.h b/arch/powerpc/include/asm/stacktrace.h
new file mode 100644
index 0000000000..6149b53b3b
--- /dev/null
+++ b/arch/powerpc/include/asm/stacktrace.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Stack trace functions.
+ *
+ * Copyright 2018, Murilo Opsfelder Araujo, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_STACKTRACE_H
+#define _ASM_POWERPC_STACKTRACE_H
+
+void show_user_instructions(struct pt_regs *regs);
+
+#endif /* _ASM_POWERPC_STACKTRACE_H */
diff --git a/arch/powerpc/include/asm/static_call.h b/arch/powerpc/include/asm/static_call.h
new file mode 100644
index 0000000000..de1018cc52
--- /dev/null
+++ b/arch/powerpc/include/asm/static_call.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_STATIC_CALL_H
+#define _ASM_POWERPC_STATIC_CALL_H
+
+#define __PPC_SCT(name, inst)					\
+	asm(".pushsection .text, \"ax\"				\n"	\
+	    ".align 5						\n"	\
+	    ".globl " STATIC_CALL_TRAMP_STR(name) "		\n"	\
+	    STATIC_CALL_TRAMP_STR(name) ":			\n"	\
+	    inst "						\n"	\
+	    "	lis	12,2f@ha				\n"	\
+	    "	lwz	12,2f@l(12)				\n"	\
+	    "	mtctr	12					\n"	\
+	    "	bctr						\n"	\
+	    "1:	li	3, 0					\n"	\
+	    "	blr						\n"	\
+	    "2:	.long 0						\n"	\
+	    ".type " STATIC_CALL_TRAMP_STR(name) ", @function	\n"	\
+	    ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
+	    ".popsection					\n")
+
+#define PPC_SCT_RET0		20		/* Offset of label 1 */
+#define PPC_SCT_DATA		28		/* Offset of label 2 */
+
+#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)	__PPC_SCT(name, "b " #func)
+#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)	__PPC_SCT(name, "blr")
+#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)	__PPC_SCT(name, "b .+20")
+
+#endif /* _ASM_POWERPC_STATIC_CALL_H */
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
new file mode 100644
index 0000000000..60ba22770f
--- /dev/null
+++ b/arch/powerpc/include/asm/string.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_STRING_H
+#define _ASM_POWERPC_STRING_H
+
+#ifdef __KERNEL__
+
+#ifndef CONFIG_KASAN
+#define __HAVE_ARCH_STRNCPY
+#define __HAVE_ARCH_STRNCMP
+#define __HAVE_ARCH_MEMCHR
+#define __HAVE_ARCH_MEMCMP
+#define __HAVE_ARCH_MEMSET16
+#endif
+
+#define __HAVE_ARCH_MEMSET
+#define __HAVE_ARCH_MEMCPY
+#define __HAVE_ARCH_MEMMOVE
+#define __HAVE_ARCH_MEMCPY_FLUSHCACHE
+
+extern char * strcpy(char *,const char *);
+extern char * strncpy(char *,const char *, __kernel_size_t);
+extern __kernel_size_t strlen(const char *);
+extern int strcmp(const char *,const char *);
+extern int strncmp(const char *, const char *, __kernel_size_t);
+extern char * strcat(char *, const char *);
+extern void * memset(void *,int,__kernel_size_t);
+extern void * memcpy(void *,const void *,__kernel_size_t);
+extern void * memmove(void *,const void *,__kernel_size_t);
+extern int memcmp(const void *,const void *,__kernel_size_t);
+extern void * memchr(const void *,int,__kernel_size_t);
+void memcpy_flushcache(void *dest, const void *src, size_t size);
+
+#ifdef CONFIG_KASAN
+/* __mem variants are used by KASAN to implement instrumented meminstrinsics. */
+#ifdef CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX
+#define __memset memset
+#define __memcpy memcpy
+#define __memmove memmove
+#else /* CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX */
+void *__memset(void *s, int c, __kernel_size_t count);
+void *__memcpy(void *to, const void *from, __kernel_size_t n);
+void *__memmove(void *to, const void *from, __kernel_size_t n);
+#ifndef __SANITIZE_ADDRESS__
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+#define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+#endif /* !__SANITIZE_ADDRESS__ */
+#endif /* CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX */
+#endif /* CONFIG_KASAN */
+
+#ifdef CONFIG_PPC64
+#ifndef CONFIG_KASAN
+#define __HAVE_ARCH_MEMSET32
+#define __HAVE_ARCH_MEMSET64
+
+extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
+extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
+extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
+
+static inline void *memset16(uint16_t *p, uint16_t v, __kernel_size_t n)
+{
+	return __memset16(p, v, n * 2);
+}
+
+static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n)
+{
+	return __memset32(p, v, n * 4);
+}
+
+static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
+{
+	return __memset64(p, v, n * 8);
+}
+#endif
+#else
+#ifndef CONFIG_KASAN
+#define __HAVE_ARCH_STRLEN
+#endif
+
+extern void *memset16(uint16_t *, uint16_t, __kernel_size_t);
+#endif
+#endif /* __KERNEL__ */
+
+#endif	/* _ASM_POWERPC_STRING_H */
diff --git a/arch/powerpc/include/asm/svm.h b/arch/powerpc/include/asm/svm.h
new file mode 100644
index 0000000000..a02bd54b89
--- /dev/null
+++ b/arch/powerpc/include/asm/svm.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * SVM helper functions
+ *
+ * Copyright 2018 Anshuman Khandual, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_SVM_H
+#define _ASM_POWERPC_SVM_H
+
+#ifdef CONFIG_PPC_SVM
+
+#include <asm/reg.h>
+
+static inline bool is_secure_guest(void)
+{
+	return mfmsr() & MSR_S;
+}
+
+void dtl_cache_ctor(void *addr);
+#define get_dtl_cache_ctor()	(is_secure_guest() ? dtl_cache_ctor : NULL)
+
+#else /* CONFIG_PPC_SVM */
+
+static inline bool is_secure_guest(void)
+{
+	return false;
+}
+
+#define get_dtl_cache_ctor() NULL
+
+#endif /* CONFIG_PPC_SVM */
+#endif /* _ASM_POWERPC_SVM_H */
diff --git a/arch/powerpc/include/asm/swab.h b/arch/powerpc/include/asm/swab.h
new file mode 100644
index 0000000000..f4cfdc1246
--- /dev/null
+++ b/arch/powerpc/include/asm/swab.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ */
+#ifndef _ASM_POWERPC_SWAB_H
+#define _ASM_POWERPC_SWAB_H
+
+#include <uapi/asm/swab.h>
+
+#endif /* _ASM_POWERPC_SWAB_H */
diff --git a/arch/powerpc/include/asm/swiotlb.h b/arch/powerpc/include/asm/swiotlb.h
new file mode 100644
index 0000000000..4203b5e0a8
--- /dev/null
+++ b/arch/powerpc/include/asm/swiotlb.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2009 Becky Bruce, Freescale Semiconductor
+ */
+
+#ifndef __ASM_SWIOTLB_H
+#define __ASM_SWIOTLB_H
+
+#include <linux/swiotlb.h>
+
+extern unsigned int ppc_swiotlb_enable;
+extern unsigned int ppc_swiotlb_flags;
+
+#ifdef CONFIG_SWIOTLB
+void swiotlb_detect_4g(void);
+#else
+static inline void swiotlb_detect_4g(void) {}
+#endif
+
+#endif /* __ASM_SWIOTLB_H */
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
new file mode 100644
index 0000000000..aee25e3ebf
--- /dev/null
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ */
+#ifndef _ASM_POWERPC_SWITCH_TO_H
+#define _ASM_POWERPC_SWITCH_TO_H
+
+#include <linux/sched.h>
+#include <asm/reg.h>
+
+struct thread_struct;
+struct task_struct;
+struct pt_regs;
+
+extern struct task_struct *__switch_to(struct task_struct *,
+	struct task_struct *);
+#define switch_to(prev, next, last)	((last) = __switch_to((prev), (next)))
+
+extern struct task_struct *_switch(struct thread_struct *prev,
+				   struct thread_struct *next);
+
+extern void switch_booke_debug_regs(struct debug_reg *new_debug);
+
+extern int emulate_altivec(struct pt_regs *);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void restore_math(struct pt_regs *regs);
+#else
+static inline void restore_math(struct pt_regs *regs)
+{
+}
+#endif
+
+void restore_tm_state(struct pt_regs *regs);
+
+extern void flush_all_to_thread(struct task_struct *);
+extern void giveup_all(struct task_struct *);
+
+#ifdef CONFIG_PPC_FPU
+extern void enable_kernel_fp(void);
+extern void flush_fp_to_thread(struct task_struct *);
+extern void giveup_fpu(struct task_struct *);
+extern void save_fpu(struct task_struct *);
+static inline void disable_kernel_fp(void)
+{
+	msr_check_and_clear(MSR_FP);
+}
+#else
+static inline void save_fpu(struct task_struct *t) { }
+static inline void flush_fp_to_thread(struct task_struct *t) { }
+#endif
+
+#ifdef CONFIG_ALTIVEC
+extern void enable_kernel_altivec(void);
+extern void flush_altivec_to_thread(struct task_struct *);
+extern void giveup_altivec(struct task_struct *);
+extern void save_altivec(struct task_struct *);
+static inline void disable_kernel_altivec(void)
+{
+	msr_check_and_clear(MSR_VEC);
+}
+#else
+static inline void save_altivec(struct task_struct *t) { }
+static inline void __giveup_altivec(struct task_struct *t) { }
+static inline void enable_kernel_altivec(void)
+{
+	BUILD_BUG();
+}
+
+static inline void disable_kernel_altivec(void)
+{
+	BUILD_BUG();
+}
+#endif
+
+#ifdef CONFIG_VSX
+extern void enable_kernel_vsx(void);
+extern void flush_vsx_to_thread(struct task_struct *);
+static inline void disable_kernel_vsx(void)
+{
+	msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
+}
+#else
+static inline void enable_kernel_vsx(void)
+{
+	BUILD_BUG();
+}
+
+static inline void disable_kernel_vsx(void)
+{
+	BUILD_BUG();
+}
+#endif
+
+#ifdef CONFIG_SPE
+extern void enable_kernel_spe(void);
+extern void flush_spe_to_thread(struct task_struct *);
+extern void giveup_spe(struct task_struct *);
+extern void __giveup_spe(struct task_struct *);
+static inline void disable_kernel_spe(void)
+{
+	msr_check_and_clear(MSR_SPE);
+}
+#else
+static inline void __giveup_spe(struct task_struct *t) { }
+#endif
+
+static inline void clear_task_ebb(struct task_struct *t)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+    /* EBB perf events are not inherited, so clear all EBB state. */
+    t->thread.ebbrr = 0;
+    t->thread.ebbhr = 0;
+    t->thread.bescr = 0;
+    t->thread.mmcr2 = 0;
+    t->thread.mmcr0 = 0;
+    t->thread.siar = 0;
+    t->thread.sdar = 0;
+    t->thread.sier = 0;
+    t->thread.used_ebb = 0;
+#endif
+}
+
+void kvmppc_save_user_regs(void);
+void kvmppc_save_current_sprs(void);
+
+extern int set_thread_tidr(struct task_struct *t);
+
+#endif /* _ASM_POWERPC_SWITCH_TO_H */
diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h
new file mode 100644
index 0000000000..b0b4c64870
--- /dev/null
+++ b/arch/powerpc/include/asm/synch.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SYNCH_H 
+#define _ASM_POWERPC_SYNCH_H 
+#ifdef __KERNEL__
+
+#include <asm/cputable.h>
+#include <asm/feature-fixups.h>
+#include <asm/ppc-opcode.h>
+
+#ifndef __ASSEMBLY__
+extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup;
+extern void do_lwsync_fixups(unsigned long value, void *fixup_start,
+			     void *fixup_end);
+
+static inline void eieio(void)
+{
+	if (IS_ENABLED(CONFIG_BOOKE))
+		__asm__ __volatile__ ("mbar" : : : "memory");
+	else
+		__asm__ __volatile__ ("eieio" : : : "memory");
+}
+
+static inline void isync(void)
+{
+	__asm__ __volatile__ ("isync" : : : "memory");
+}
+
+static inline void ppc_after_tlbiel_barrier(void)
+{
+	asm volatile("ptesync": : :"memory");
+	/*
+	 * POWER9, POWER10 need a cp_abort after tlbiel to ensure the copy is
+	 * invalidated correctly. If this is not done, the paste can take data
+	 * from the physical address that was translated at copy time.
+	 *
+	 * POWER9 in practice does not need this, because address spaces with
+	 * accelerators mapped will use tlbie (which does invalidate the copy)
+	 * to invalidate translations. It's not possible to limit POWER10 this
+	 * way due to local copy-paste.
+	 */
+	asm volatile(ASM_FTR_IFSET(PPC_CP_ABORT, "", %0) : : "i" (CPU_FTR_ARCH_31) : "memory");
+}
+#endif /* __ASSEMBLY__ */
+
+#if defined(__powerpc64__)
+#    define LWSYNC	lwsync
+#elif defined(CONFIG_PPC_E500)
+#    define LWSYNC					\
+	START_LWSYNC_SECTION(96);			\
+	sync;						\
+	MAKE_LWSYNC_SECTION_ENTRY(96, __lwsync_fixup);
+#else
+#    define LWSYNC	sync
+#endif
+
+#ifdef CONFIG_SMP
+#define __PPC_ACQUIRE_BARRIER				\
+	START_LWSYNC_SECTION(97);			\
+	isync;						\
+	MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup);
+#define PPC_ACQUIRE_BARRIER	 "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER)
+#define PPC_RELEASE_BARRIER	 stringify_in_c(LWSYNC) "\n"
+#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(sync) "\n"
+#define PPC_ATOMIC_EXIT_BARRIER	 "\n" stringify_in_c(sync) "\n"
+#else
+#define PPC_ACQUIRE_BARRIER
+#define PPC_RELEASE_BARRIER
+#define PPC_ATOMIC_ENTRY_BARRIER
+#define PPC_ATOMIC_EXIT_BARRIER
+#endif
+
+#endif /* __KERNEL__ */
+#endif	/* _ASM_POWERPC_SYNCH_H */
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
new file mode 100644
index 0000000000..3dd36c5e33
--- /dev/null
+++ b/arch/powerpc/include/asm/syscall.h
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Access to user system call parameters and results
+ *
+ * Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+ *
+ * See asm-generic/syscall.h for descriptions of what we must do here.
+ */
+
+#ifndef _ASM_SYSCALL_H
+#define _ASM_SYSCALL_H	1
+
+#include <uapi/linux/audit.h>
+#include <linux/sched.h>
+#include <linux/thread_info.h>
+
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+typedef long (*syscall_fn)(const struct pt_regs *);
+#else
+typedef long (*syscall_fn)(unsigned long, unsigned long, unsigned long,
+			   unsigned long, unsigned long, unsigned long);
+#endif
+
+/* ftrace syscalls requires exporting the sys_call_table */
+extern const syscall_fn sys_call_table[];
+extern const syscall_fn compat_sys_call_table[];
+
+static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
+{
+	/*
+	 * Note that we are returning an int here. That means 0xffffffff, ie.
+	 * 32-bit negative 1, will be interpreted as -1 on a 64-bit kernel.
+	 * This is important for seccomp so that compat tasks can set r0 = -1
+	 * to reject the syscall.
+	 */
+	if (trap_is_syscall(regs))
+		return regs->gpr[0];
+	else
+		return -1;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	regs->gpr[3] = regs->orig_gpr3;
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	if (trap_is_scv(regs)) {
+		unsigned long error = regs->gpr[3];
+
+		return IS_ERR_VALUE(error) ? error : 0;
+	} else {
+		/*
+		 * If the system call failed,
+		 * regs->gpr[3] contains a positive ERRORCODE.
+		 */
+		return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0;
+	}
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->gpr[3];
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	if (trap_is_scv(regs)) {
+		regs->gpr[3] = (long) error ?: val;
+	} else {
+		/*
+		 * In the general case it's not obvious that we must deal with
+		 * CCR here, as the syscall exit path will also do that for us.
+		 * However there are some places, eg. the signal code, which
+		 * check ccr to decide if the value in r3 is actually an error.
+		 */
+		if (error) {
+			regs->ccr |= 0x10000000L;
+			regs->gpr[3] = error;
+		} else {
+			regs->ccr &= ~0x10000000L;
+			regs->gpr[3] = val;
+		}
+	}
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned long *args)
+{
+	unsigned long val, mask = -1UL;
+	unsigned int n = 6;
+
+	if (is_tsk_32bit_task(task))
+		mask = 0xffffffff;
+
+	while (n--) {
+		if (n == 0)
+			val = regs->orig_gpr3;
+		else
+			val = regs->gpr[3 + n];
+
+		args[n] = val & mask;
+	}
+}
+
+static inline int syscall_get_arch(struct task_struct *task)
+{
+	if (is_tsk_32bit_task(task))
+		return AUDIT_ARCH_PPC;
+	else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+		return AUDIT_ARCH_PPC64LE;
+	else
+		return AUDIT_ARCH_PPC64;
+}
+#endif	/* _ASM_SYSCALL_H */
diff --git a/arch/powerpc/include/asm/syscall_wrapper.h b/arch/powerpc/include/asm/syscall_wrapper.h
new file mode 100644
index 0000000000..67486c67e8
--- /dev/null
+++ b/arch/powerpc/include/asm/syscall_wrapper.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * syscall_wrapper.h - powerpc specific wrappers to syscall definitions
+ *
+ * Based on arch/{x86,arm64}/include/asm/syscall_wrapper.h
+ */
+
+#ifndef __ASM_POWERPC_SYSCALL_WRAPPER_H
+#define __ASM_POWERPC_SYSCALL_WRAPPER_H
+
+struct pt_regs;
+
+#define SC_POWERPC_REGS_TO_ARGS(x, ...)				\
+	__MAP(x,__SC_ARGS					\
+	      ,,regs->gpr[3],,regs->gpr[4],,regs->gpr[5]	\
+	      ,,regs->gpr[6],,regs->gpr[7],,regs->gpr[8])
+
+#define __SYSCALL_DEFINEx(x, name, ...)						\
+	long sys##name(const struct pt_regs *regs);			\
+	ALLOW_ERROR_INJECTION(sys##name, ERRNO);			\
+	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));		\
+	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
+	long sys##name(const struct pt_regs *regs)			\
+	{									\
+		return __se_sys##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__));	\
+	}									\
+	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))		\
+	{									\
+		long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
+		__MAP(x,__SC_TEST,__VA_ARGS__);					\
+		__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));		\
+		return ret;							\
+	}									\
+	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+
+#define SYSCALL_DEFINE0(sname)							\
+	SYSCALL_METADATA(_##sname, 0);						\
+	long sys_##sname(const struct pt_regs *__unused);		\
+	ALLOW_ERROR_INJECTION(sys_##sname, ERRNO);			\
+	long sys_##sname(const struct pt_regs *__unused)
+
+#define COND_SYSCALL(name)							\
+	long sys_##name(const struct pt_regs *regs);			\
+	long __weak sys_##name(const struct pt_regs *regs)		\
+	{									\
+		return sys_ni_syscall();					\
+	}
+
+#endif // __ASM_POWERPC_SYSCALL_WRAPPER_H
diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
new file mode 100644
index 0000000000..6d51b007b5
--- /dev/null
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_POWERPC_SYSCALLS_H
+#define __ASM_POWERPC_SYSCALLS_H
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/compat.h>
+
+#include <asm/syscall.h>
+#ifdef CONFIG_PPC64
+#include <asm/syscalls_32.h>
+#endif
+#include <asm/unistd.h>
+#include <asm/ucontext.h>
+
+#ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+long sys_ni_syscall(void);
+#else
+long sys_ni_syscall(const struct pt_regs *regs);
+#endif
+
+struct rtas_args;
+
+/*
+ * long long munging:
+ * The 32 bit ABI passes long longs in an odd even register pair.
+ * High and low parts are swapped depending on endian mode,
+ * so define a macro (similar to mips linux32) to handle that.
+ */
+#ifdef __LITTLE_ENDIAN__
+#define merge_64(low, high) (((u64)high << 32) | low)
+#else
+#define merge_64(high, low) (((u64)high << 32) | low)
+#endif
+
+/*
+ * PowerPC architecture-specific syscalls
+ */
+
+#ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+
+long sys_rtas(struct rtas_args __user *uargs);
+
+#ifdef CONFIG_PPC64
+long sys_ppc64_personality(unsigned long personality);
+#ifdef CONFIG_COMPAT
+long compat_sys_ppc64_personality(unsigned long personality);
+#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_PPC64 */
+
+long sys_swapcontext(struct ucontext __user *old_ctx,
+		     struct ucontext __user *new_ctx, long ctx_size);
+long sys_mmap(unsigned long addr, size_t len,
+	      unsigned long prot, unsigned long flags,
+	      unsigned long fd, off_t offset);
+long sys_mmap2(unsigned long addr, size_t len,
+	       unsigned long prot, unsigned long flags,
+	       unsigned long fd, unsigned long pgoff);
+long sys_switch_endian(void);
+
+#ifdef CONFIG_PPC32
+long sys_sigreturn(void);
+long sys_debug_setcontext(struct ucontext __user *ctx, int ndbg,
+			  struct sig_dbg_op __user *dbg);
+#endif
+
+long sys_rt_sigreturn(void);
+
+long sys_subpage_prot(unsigned long addr,
+		      unsigned long len, u32 __user *map);
+
+#ifdef CONFIG_COMPAT
+long compat_sys_swapcontext(struct ucontext32 __user *old_ctx,
+			    struct ucontext32 __user *new_ctx,
+			    int ctx_size);
+long compat_sys_old_getrlimit(unsigned int resource,
+			      struct compat_rlimit __user *rlim);
+long compat_sys_sigreturn(void);
+long compat_sys_rt_sigreturn(void);
+#endif /* CONFIG_COMPAT */
+
+/*
+ * Architecture specific signatures required by long long munging:
+ * The 32 bit ABI passes long longs in an odd even register pair.
+ * The following signatures provide a machine long parameter for
+ * each register that will be supplied. The implementation is
+ * responsible for combining parameter pairs.
+ */
+
+#ifdef CONFIG_PPC32
+long sys_ppc_pread64(unsigned int fd,
+		     char __user *ubuf, compat_size_t count,
+		     u32 reg6, u32 pos1, u32 pos2);
+long sys_ppc_pwrite64(unsigned int fd,
+		      const char __user *ubuf, compat_size_t count,
+		      u32 reg6, u32 pos1, u32 pos2);
+long sys_ppc_readahead(int fd, u32 r4,
+		       u32 offset1, u32 offset2, u32 count);
+long sys_ppc_truncate64(const char __user *path, u32 reg4,
+		        unsigned long len1, unsigned long len2);
+long sys_ppc_ftruncate64(unsigned int fd, u32 reg4,
+			 unsigned long len1, unsigned long len2);
+long sys_ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
+			 size_t len, int advice);
+long sys_ppc_sync_file_range2(int fd, unsigned int flags,
+			      unsigned int offset1,
+			      unsigned int offset2,
+			      unsigned int nbytes1,
+			      unsigned int nbytes2);
+long sys_ppc_fallocate(int fd, int mode, u32 offset1, u32 offset2,
+		       u32 len1, u32 len2);
+#endif
+#ifdef CONFIG_COMPAT
+long compat_sys_mmap2(unsigned long addr, size_t len,
+		      unsigned long prot, unsigned long flags,
+		      unsigned long fd, unsigned long pgoff);
+long compat_sys_ppc_pread64(unsigned int fd,
+			    char __user *ubuf, compat_size_t count,
+			    u32 reg6, u32 pos1, u32 pos2);
+long compat_sys_ppc_pwrite64(unsigned int fd,
+			     const char __user *ubuf, compat_size_t count,
+			     u32 reg6, u32 pos1, u32 pos2);
+long compat_sys_ppc_readahead(int fd, u32 r4,
+			      u32 offset1, u32 offset2, u32 count);
+long compat_sys_ppc_truncate64(const char __user *path, u32 reg4,
+			       unsigned long len1, unsigned long len2);
+long compat_sys_ppc_ftruncate64(unsigned int fd, u32 reg4,
+				unsigned long len1, unsigned long len2);
+long compat_sys_ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
+				size_t len, int advice);
+long compat_sys_ppc_sync_file_range2(int fd, unsigned int flags,
+				     unsigned int offset1,
+				     unsigned int offset2,
+				     unsigned int nbytes1,
+				     unsigned int nbytes2);
+#endif /* CONFIG_COMPAT */
+
+#if defined(CONFIG_PPC32) || defined(CONFIG_COMPAT)
+long sys_ppc_fadvise64_64(int fd, int advice,
+			  u32 offset_high, u32 offset_low,
+			  u32 len_high, u32 len_low);
+#endif
+
+#else
+
+#define __SYSCALL_WITH_COMPAT(nr, native, compat)	__SYSCALL(nr, native)
+#define __SYSCALL(nr, entry) \
+	long entry(const struct pt_regs *regs);
+
+#ifdef CONFIG_PPC64
+#include <asm/syscall_table_64.h>
+#else
+#include <asm/syscall_table_32.h>
+#endif /* CONFIG_PPC64 */
+
+#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_POWERPC_SYSCALLS_H */
diff --git a/arch/powerpc/include/asm/syscalls_32.h b/arch/powerpc/include/asm/syscalls_32.h
new file mode 100644
index 0000000000..749255568b
--- /dev/null
+++ b/arch/powerpc/include/asm/syscalls_32.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_SYSCALLS_32_H
+#define _ASM_POWERPC_SYSCALLS_32_H
+
+#include <linux/compat.h>
+#include <asm/siginfo.h>
+#include <asm/signal.h>
+
+/*
+ * Data types and macros for providing 32b PowerPC support.
+ */
+
+/* These are here to support 32-bit syscalls on a 64-bit kernel. */
+
+struct pt_regs32 {
+	unsigned int gpr[32];
+	unsigned int nip;
+	unsigned int msr;
+	unsigned int orig_gpr3;		/* Used for restarting system calls */
+	unsigned int ctr;
+	unsigned int link;
+	unsigned int xer;
+	unsigned int ccr;
+	unsigned int mq;		/* 601 only (not used at present) */
+	unsigned int trap;		/* Reason for being here */
+	unsigned int dar;		/* Fault registers */
+	unsigned int dsisr;
+	unsigned int result;		/* Result of a system call */
+};
+
+struct sigcontext32 {
+	unsigned int	_unused[4];
+	int		signal;
+	compat_uptr_t	handler;
+	unsigned int	oldmask;
+	compat_uptr_t	regs;  /* 4 byte pointer to the pt_regs32 structure. */
+};
+
+struct mcontext32 {
+	elf_gregset_t32		mc_gregs;
+	elf_fpregset_t		mc_fregs;
+	unsigned int		mc_pad[2];
+	elf_vrregset_t32	mc_vregs __attribute__((__aligned__(16)));
+	elf_vsrreghalf_t32      mc_vsregs __attribute__((__aligned__(16)));
+};
+
+struct ucontext32 { 
+	unsigned int	  	uc_flags;
+	unsigned int 	  	uc_link;
+	compat_stack_t	 	uc_stack;
+	int		 	uc_pad[7];
+	compat_uptr_t		uc_regs;	/* points to uc_mcontext field */
+	compat_sigset_t	 	uc_sigmask;	/* mask last for extensibility */
+	/* glibc has 1024-bit signal masks, ours are 64-bit */
+	int		 	uc_maskext[30];
+	int		 	uc_pad2[3];
+	struct mcontext32	uc_mcontext;
+};
+
+#endif  // _ASM_POWERPC_SYSCALLS_32_H
diff --git a/arch/powerpc/include/asm/task_size_32.h b/arch/powerpc/include/asm/task_size_32.h
new file mode 100644
index 0000000000..de7290ee77
--- /dev/null
+++ b/arch/powerpc/include/asm/task_size_32.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_TASK_SIZE_32_H
+#define _ASM_POWERPC_TASK_SIZE_32_H
+
+#if CONFIG_TASK_SIZE > CONFIG_KERNEL_START
+#error User TASK_SIZE overlaps with KERNEL_START address
+#endif
+
+#define TASK_SIZE (CONFIG_TASK_SIZE)
+
+/*
+ * This decides where the kernel will search for a free chunk of vm space during
+ * mmap's.
+ */
+#define TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3)
+
+#define DEFAULT_MAP_WINDOW TASK_SIZE
+#define STACK_TOP TASK_SIZE
+#define STACK_TOP_MAX STACK_TOP
+
+#endif /* _ASM_POWERPC_TASK_SIZE_32_H */
diff --git a/arch/powerpc/include/asm/task_size_64.h b/arch/powerpc/include/asm/task_size_64.h
new file mode 100644
index 0000000000..5a709951c9
--- /dev/null
+++ b/arch/powerpc/include/asm/task_size_64.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_TASK_SIZE_64_H
+#define _ASM_POWERPC_TASK_SIZE_64_H
+
+/*
+ * 64-bit user address space can have multiple limits
+ * For now supported values are:
+ */
+#define TASK_SIZE_64TB  (0x0000400000000000UL)
+#define TASK_SIZE_128TB (0x0000800000000000UL)
+#define TASK_SIZE_512TB (0x0002000000000000UL)
+#define TASK_SIZE_1PB   (0x0004000000000000UL)
+#define TASK_SIZE_2PB   (0x0008000000000000UL)
+
+/*
+ * With 52 bits in the address we can support up to 4PB of range.
+ */
+#define TASK_SIZE_4PB   (0x0010000000000000UL)
+
+/*
+ * For now 512TB is only supported with book3s and 64K linux page size.
+ */
+#ifdef CONFIG_PPC_64K_PAGES
+/*
+ * Max value currently used:
+ */
+#define TASK_SIZE_USER64		TASK_SIZE_4PB
+#define DEFAULT_MAP_WINDOW_USER64	TASK_SIZE_128TB
+#define TASK_CONTEXT_SIZE		TASK_SIZE_512TB
+#else
+#define TASK_SIZE_USER64		TASK_SIZE_64TB
+#define DEFAULT_MAP_WINDOW_USER64	TASK_SIZE_64TB
+
+/*
+ * We don't need to allocate extended context ids for 4K page size, because we
+ * limit the max effective address on this config to 64TB.
+ */
+#define TASK_CONTEXT_SIZE TASK_SIZE_64TB
+#endif
+
+/*
+ * 32-bit user address space is 4GB - 1 page
+ * (this 1 page is needed so referencing of 0xFFFFFFFF generates EFAULT
+ */
+#define TASK_SIZE_USER32 (0x0000000100000000UL - (1 * PAGE_SIZE))
+
+#define TASK_SIZE (is_32bit_task() ? TASK_SIZE_USER32 : TASK_SIZE_USER64)
+
+#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
+#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4))
+
+/*
+ * This decides where the kernel will search for a free chunk of vm space during
+ * mmap's.
+ */
+#define TASK_UNMAPPED_BASE	\
+	((is_32bit_task()) ? TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64)
+
+/*
+ * Initial task size value for user applications. For book3s 64 we start
+ * with 128TB and conditionally enable upto 512TB
+ */
+#ifdef CONFIG_PPC_BOOK3S_64
+#define DEFAULT_MAP_WINDOW	\
+	((is_32bit_task()) ? TASK_SIZE_USER32 : DEFAULT_MAP_WINDOW_USER64)
+#else
+#define DEFAULT_MAP_WINDOW	TASK_SIZE
+#endif
+
+#define STACK_TOP_USER64 DEFAULT_MAP_WINDOW_USER64
+#define STACK_TOP_USER32 TASK_SIZE_USER32
+#define STACK_TOP_MAX TASK_SIZE_USER64
+#define STACK_TOP (is_32bit_task() ? STACK_TOP_USER32 : STACK_TOP_USER64)
+
+#define arch_get_mmap_base(addr, base) \
+	(((addr) > DEFAULT_MAP_WINDOW) ? (base) + TASK_SIZE - DEFAULT_MAP_WINDOW : (base))
+
+#define arch_get_mmap_end(addr, len, flags) \
+	(((addr) > DEFAULT_MAP_WINDOW) || \
+	 (((flags) & MAP_FIXED) && ((addr) + (len) > DEFAULT_MAP_WINDOW)) ? TASK_SIZE : \
+									    DEFAULT_MAP_WINDOW)
+
+#endif /* _ASM_POWERPC_TASK_SIZE_64_H */
diff --git a/arch/powerpc/include/asm/tce.h b/arch/powerpc/include/asm/tce.h
new file mode 100644
index 0000000000..0c34d2756d
--- /dev/null
+++ b/arch/powerpc/include/asm/tce.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
+ * Rewrite, cleanup:
+ * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
+ */
+
+#ifndef _ASM_POWERPC_TCE_H
+#define _ASM_POWERPC_TCE_H
+#ifdef __KERNEL__
+
+#include <asm/iommu.h>
+
+/*
+ * Tces come in two formats, one for the virtual bus and a different
+ * format for PCI.  PCI TCEs can have hardware or software maintianed
+ * coherency.
+ */
+#define TCE_VB			0
+#define TCE_PCI			1
+
+#define TCE_ENTRY_SIZE		8		/* each TCE is 64 bits */
+#define TCE_VALID		0x800		/* TCE valid */
+#define TCE_ALLIO		0x400		/* TCE valid for all lpars */
+#define TCE_PCI_WRITE		0x2		/* write from PCI allowed */
+#define TCE_PCI_READ		0x1		/* read from PCI allowed */
+#define TCE_VB_WRITE		0x1		/* write from VB allowed */
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_TCE_H */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
new file mode 100644
index 0000000000..bf5dde1a41
--- /dev/null
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -0,0 +1,233 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* thread_info.h: PowerPC low-level thread information
+ * adapted from the i386 version by Paul Mackerras
+ *
+ * Copyright (C) 2002  David Howells (dhowells@redhat.com)
+ * - Incorporating suggestions made by Linus Torvalds and Dave Miller
+ */
+
+#ifndef _ASM_POWERPC_THREAD_INFO_H
+#define _ASM_POWERPC_THREAD_INFO_H
+
+#include <asm/asm-const.h>
+#include <asm/page.h>
+
+#ifdef __KERNEL__
+
+#ifdef CONFIG_KASAN
+#define MIN_THREAD_SHIFT	(CONFIG_THREAD_SHIFT + 1)
+#else
+#define MIN_THREAD_SHIFT	CONFIG_THREAD_SHIFT
+#endif
+
+#if defined(CONFIG_VMAP_STACK) && MIN_THREAD_SHIFT < PAGE_SHIFT
+#define THREAD_SHIFT		PAGE_SHIFT
+#else
+#define THREAD_SHIFT		MIN_THREAD_SHIFT
+#endif
+
+#define THREAD_SIZE		(1 << THREAD_SHIFT)
+
+/*
+ * By aligning VMAP'd stacks to 2 * THREAD_SIZE, we can detect overflow by
+ * checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry
+ * assembly.
+ */
+#ifdef CONFIG_VMAP_STACK
+#define THREAD_ALIGN_SHIFT	(THREAD_SHIFT + 1)
+#else
+#define THREAD_ALIGN_SHIFT	THREAD_SHIFT
+#endif
+
+#define THREAD_ALIGN		(1 << THREAD_ALIGN_SHIFT)
+
+#ifndef __ASSEMBLY__
+#include <linux/cache.h>
+#include <asm/processor.h>
+#include <asm/accounting.h>
+#include <asm/ppc_asm.h>
+
+#define SLB_PRELOAD_NR	16U
+/*
+ * low level task data.
+ */
+struct thread_info {
+	int		preempt_count;		/* 0 => preemptable,
+						   <0 => BUG */
+#ifdef CONFIG_SMP
+	unsigned int	cpu;
+#endif
+	unsigned long	local_flags;		/* private flags for thread */
+#ifdef CONFIG_LIVEPATCH_64
+	unsigned long *livepatch_sp;
+#endif
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC32)
+	struct cpu_accounting_data accounting;
+#endif
+	unsigned char slb_preload_nr;
+	unsigned char slb_preload_tail;
+	u32 slb_preload_esid[SLB_PRELOAD_NR];
+
+	/* low level flags - has atomic operations done on it */
+	unsigned long	flags ____cacheline_aligned_in_smp;
+};
+
+/*
+ * macros/functions for gaining access to the thread information structure
+ */
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.preempt_count = INIT_PREEMPT_COUNT,	\
+	.flags =	0,			\
+}
+
+#define THREAD_SIZE_ORDER	(THREAD_SHIFT - PAGE_SHIFT)
+
+/* how to get the thread information struct from C */
+extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
+
+void arch_setup_new_exec(void);
+#define arch_setup_new_exec arch_setup_new_exec
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * thread information flag bit numbers
+ */
+#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
+#define TIF_SIGPENDING		1	/* signal pending */
+#define TIF_NEED_RESCHED	2	/* rescheduling necessary */
+#define TIF_NOTIFY_SIGNAL	3	/* signal notifications exist */
+#define TIF_SYSCALL_EMU		4	/* syscall emulation active */
+#define TIF_RESTORE_TM		5	/* need to restore TM FP/VEC/VSX */
+#define TIF_PATCH_PENDING	6	/* pending live patching update */
+#define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
+#define TIF_SINGLESTEP		8	/* singlestepping active */
+#define TIF_SECCOMP		10	/* secure computing */
+#define TIF_RESTOREALL		11	/* Restore all regs (implies NOERROR) */
+#define TIF_NOERROR		12	/* Force successful syscall return */
+#define TIF_NOTIFY_RESUME	13	/* callback before returning to user */
+#define TIF_UPROBE		14	/* breakpointed or single-stepping */
+#define TIF_SYSCALL_TRACEPOINT	15	/* syscall tracepoint instrumentation */
+#define TIF_EMULATE_STACK_STORE	16	/* Is an instruction emulation
+						for stack store? */
+#define TIF_MEMDIE		17	/* is terminating due to OOM killer */
+#if defined(CONFIG_PPC64)
+#define TIF_ELF2ABI		18	/* function descriptors must die! */
+#endif
+#define TIF_POLLING_NRFLAG	19	/* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_32BIT		20	/* 32 bit binary */
+
+/* as above, but as bit values */
+#define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+#define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
+#define _TIF_NOTIFY_SIGNAL	(1<<TIF_NOTIFY_SIGNAL)
+#define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
+#define _TIF_32BIT		(1<<TIF_32BIT)
+#define _TIF_RESTORE_TM		(1<<TIF_RESTORE_TM)
+#define _TIF_PATCH_PENDING	(1<<TIF_PATCH_PENDING)
+#define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
+#define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
+#define _TIF_SECCOMP		(1<<TIF_SECCOMP)
+#define _TIF_RESTOREALL		(1<<TIF_RESTOREALL)
+#define _TIF_NOERROR		(1<<TIF_NOERROR)
+#define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
+#define _TIF_UPROBE		(1<<TIF_UPROBE)
+#define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
+#define _TIF_EMULATE_STACK_STORE	(1<<TIF_EMULATE_STACK_STORE)
+#define _TIF_SYSCALL_EMU	(1<<TIF_SYSCALL_EMU)
+#define _TIF_SYSCALL_DOTRACE	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+				 _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
+				 _TIF_SYSCALL_EMU)
+
+#define _TIF_USER_WORK_MASK	(_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+				 _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
+				 _TIF_RESTORE_TM | _TIF_PATCH_PENDING | \
+				 _TIF_NOTIFY_SIGNAL)
+#define _TIF_PERSYSCALL_MASK	(_TIF_RESTOREALL|_TIF_NOERROR)
+
+/* Bits in local_flags */
+/* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
+#define TLF_NAPPING		0	/* idle thread enabled NAP mode */
+#define TLF_SLEEPING		1	/* suspend code enabled SLEEP mode */
+#define TLF_LAZY_MMU		3	/* tlb_batch is active */
+#define TLF_RUNLATCH		4	/* Is the runlatch enabled? */
+
+#define _TLF_NAPPING		(1 << TLF_NAPPING)
+#define _TLF_SLEEPING		(1 << TLF_SLEEPING)
+#define _TLF_LAZY_MMU		(1 << TLF_LAZY_MMU)
+#define _TLF_RUNLATCH		(1 << TLF_RUNLATCH)
+
+#ifndef __ASSEMBLY__
+
+static inline void clear_thread_local_flags(unsigned int flags)
+{
+	struct thread_info *ti = current_thread_info();
+	ti->local_flags &= ~flags;
+}
+
+static inline bool test_thread_local_flags(unsigned int flags)
+{
+	struct thread_info *ti = current_thread_info();
+	return (ti->local_flags & flags) != 0;
+}
+
+#ifdef CONFIG_COMPAT
+#define is_32bit_task()	(test_thread_flag(TIF_32BIT))
+#define is_tsk_32bit_task(tsk)	(test_tsk_thread_flag(tsk, TIF_32BIT))
+#define clear_tsk_compat_task(tsk) (clear_tsk_thread_flag(p, TIF_32BIT))
+#else
+#define is_32bit_task()	(IS_ENABLED(CONFIG_PPC32))
+#define is_tsk_32bit_task(tsk)	(IS_ENABLED(CONFIG_PPC32))
+#define clear_tsk_compat_task(tsk) do { } while (0)
+#endif
+
+#if defined(CONFIG_PPC64)
+#define is_elf2_task() (test_thread_flag(TIF_ELF2ABI))
+#else
+#define is_elf2_task() (0)
+#endif
+
+/*
+ * Walks up the stack frames to make sure that the specified object is
+ * entirely contained by a single stack frame.
+ *
+ * Returns:
+ *	GOOD_FRAME	if within a frame
+ *	BAD_STACK	if placed across a frame boundary (or outside stack)
+ */
+static inline int arch_within_stack_frames(const void * const stack,
+					   const void * const stackend,
+					   const void *obj, unsigned long len)
+{
+	const void *params;
+	const void *frame;
+
+	params = *(const void * const *)current_stack_pointer + STACK_FRAME_PARAMS;
+	frame = **(const void * const * const *)current_stack_pointer;
+
+	/*
+	 * low -----------------------------------------------------------> high
+	 * [backchain][metadata][params][local vars][saved registers][backchain]
+	 *                      ^------------------------------------^
+	 *                      |  allows copies only in this region |
+	 *                      |                                    |
+	 *                    params                               frame
+	 * The metadata region contains the saved LR, CR etc.
+	 */
+	while (stack <= frame && frame < stackend) {
+		if (obj + len <= frame)
+			return obj >= params ? GOOD_FRAME : BAD_STACK;
+		params = frame + STACK_FRAME_PARAMS;
+		frame = *(const void * const *)frame;
+	}
+
+	return BAD_STACK;
+}
+
+#endif	/* !__ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_POWERPC_THREAD_INFO_H */
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
new file mode 100644
index 0000000000..9f50766c46
--- /dev/null
+++ b/arch/powerpc/include/asm/time.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Common time prototypes and such for all ppc machines.
+ *
+ * Written by Cort Dougan (cort@cs.nmt.edu) to merge
+ * Paul Mackerras' version and mine for PReP and Pmac.
+ */
+
+#ifndef __POWERPC_TIME_H
+#define __POWERPC_TIME_H
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#include <linux/percpu.h>
+
+#include <asm/processor.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/vdso/timebase.h>
+
+/* time.c */
+extern u64 decrementer_max;
+
+extern unsigned long tb_ticks_per_jiffy;
+extern unsigned long tb_ticks_per_usec;
+extern unsigned long tb_ticks_per_sec;
+extern struct clock_event_device decrementer_clockevent;
+extern u64 decrementer_max;
+
+
+extern void generic_calibrate_decr(void);
+
+/* Some sane defaults: 125 MHz timebase, 1GHz processor */
+extern unsigned long ppc_proc_freq;
+#define DEFAULT_PROC_FREQ	(DEFAULT_TB_FREQ * 8)
+extern unsigned long ppc_tb_freq;
+#define DEFAULT_TB_FREQ		125000000UL
+
+extern bool tb_invalid;
+
+struct div_result {
+	u64 result_high;
+	u64 result_low;
+};
+
+static inline u64 get_vtb(void)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		return mfspr(SPRN_VTB);
+
+	return 0;
+}
+
+/* Accessor functions for the decrementer register.
+ * The 4xx doesn't even have a decrementer.  I tried to use the
+ * generic timer interrupt code, which seems OK, with the 4xx PIT
+ * in auto-reload mode.  The problem is PIT stops counting when it
+ * hits zero.  If it would wrap, we could use it just like a decrementer.
+ */
+static inline u64 get_dec(void)
+{
+	if (IS_ENABLED(CONFIG_40x))
+		return mfspr(SPRN_PIT);
+
+	return mfspr(SPRN_DEC);
+}
+
+/*
+ * Note: Book E and 4xx processors differ from other PowerPC processors
+ * in when the decrementer generates its interrupt: on the 1 to 0
+ * transition for Book E/4xx, but on the 0 to -1 transition for others.
+ */
+static inline void set_dec(u64 val)
+{
+	if (IS_ENABLED(CONFIG_40x))
+		mtspr(SPRN_PIT, (u32)val);
+	else if (IS_ENABLED(CONFIG_BOOKE))
+		mtspr(SPRN_DEC, val);
+	else
+		mtspr(SPRN_DEC, val - 1);
+}
+
+static inline unsigned long tb_ticks_since(unsigned long tstamp)
+{
+	return mftb() - tstamp;
+}
+
+#define mulhwu(x,y) \
+({unsigned z; asm ("mulhwu %0,%1,%2" : "=r" (z) : "r" (x), "r" (y)); z;})
+
+#ifdef CONFIG_PPC64
+#define mulhdu(x,y) \
+({unsigned long z; asm ("mulhdu %0,%1,%2" : "=r" (z) : "r" (x), "r" (y)); z;})
+#else
+extern u64 mulhdu(u64, u64);
+#endif
+
+extern void div128_by_32(u64 dividend_high, u64 dividend_low,
+			 unsigned divisor, struct div_result *dr);
+
+extern void secondary_cpu_time_init(void);
+extern void __init time_init(void);
+
+DECLARE_PER_CPU(u64, decrementers_next_tb);
+
+static inline u64 timer_get_next_tb(void)
+{
+	return __this_cpu_read(decrementers_next_tb);
+}
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void timer_rearm_host_dec(u64 now);
+#endif
+
+/* Convert timebase ticks to nanoseconds */
+unsigned long long tb_to_ns(unsigned long long tb_ticks);
+
+void timer_broadcast_interrupt(void);
+
+/* SPLPAR and VIRT_CPU_ACCOUNTING_NATIVE */
+void pseries_accumulate_stolen_time(void);
+u64 pseries_calculate_stolen_time(u64 stop_tb);
+
+#endif /* __KERNEL__ */
+#endif /* __POWERPC_TIME_H */
diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h
new file mode 100644
index 0000000000..14b4489de5
--- /dev/null
+++ b/arch/powerpc/include/asm/timex.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_TIMEX_H
+#define _ASM_POWERPC_TIMEX_H
+
+#ifdef __KERNEL__
+
+/*
+ * PowerPC architecture timex specifications
+ */
+
+#include <asm/cputable.h>
+#include <asm/vdso/timebase.h>
+
+#define CLOCK_TICK_RATE	1024000 /* Underlying HZ */
+
+typedef unsigned long cycles_t;
+
+static inline cycles_t get_cycles(void)
+{
+	return mftb();
+}
+#define get_cycles get_cycles
+
+#endif	/* __KERNEL__ */
+#endif	/* _ASM_POWERPC_TIMEX_H */
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
new file mode 100644
index 0000000000..b3de6102a9
--- /dev/null
+++ b/arch/powerpc/include/asm/tlb.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *	TLB shootdown specifics for powerpc
+ *
+ * Copyright (C) 2002 Anton Blanchard, IBM Corp.
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ */
+#ifndef _ASM_POWERPC_TLB_H
+#define _ASM_POWERPC_TLB_H
+#ifdef __KERNEL__
+
+#ifndef __powerpc64__
+#include <linux/pgtable.h>
+#endif
+#ifndef __powerpc64__
+#include <asm/page.h>
+#include <asm/mmu.h>
+#endif
+
+#include <linux/pagemap.h>
+
+#define __tlb_remove_tlb_entry	__tlb_remove_tlb_entry
+
+#define tlb_flush tlb_flush
+extern void tlb_flush(struct mmu_gather *tlb);
+/*
+ * book3s:
+ * Hash does not use the linux page-tables, so we can avoid
+ * the TLB invalidate for page-table freeing, Radix otoh does use the
+ * page-tables and needs the TLBI.
+ *
+ * nohash:
+ * We still do TLB invalidate in the __pte_free_tlb routine before we
+ * add the page table pages to mmu gather table batch.
+ */
+#define tlb_needs_table_invalidate()	radix_enabled()
+
+/* Get the generic bits... */
+#include <asm-generic/tlb.h>
+
+static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
+					  unsigned long address)
+{
+#ifdef CONFIG_PPC_BOOK3S_32
+	if (pte_val(*ptep) & _PAGE_HASHPTE)
+		flush_hash_entry(tlb->mm, ptep, address);
+#endif
+}
+
+#ifdef CONFIG_SMP
+static inline int mm_is_core_local(struct mm_struct *mm)
+{
+	return cpumask_subset(mm_cpumask(mm),
+			      topology_sibling_cpumask(smp_processor_id()));
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline int mm_is_thread_local(struct mm_struct *mm)
+{
+	if (atomic_read(&mm->context.active_cpus) > 1)
+		return false;
+	return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm));
+}
+#else /* CONFIG_PPC_BOOK3S_64 */
+static inline int mm_is_thread_local(struct mm_struct *mm)
+{
+	return cpumask_equal(mm_cpumask(mm),
+			      cpumask_of(smp_processor_id()));
+}
+#endif /* !CONFIG_PPC_BOOK3S_64 */
+
+#else /* CONFIG_SMP */
+static inline int mm_is_core_local(struct mm_struct *mm)
+{
+	return 1;
+}
+
+static inline int mm_is_thread_local(struct mm_struct *mm)
+{
+	return 1;
+}
+#endif
+
+#define arch_supports_page_table_move arch_supports_page_table_move
+static inline bool arch_supports_page_table_move(void)
+{
+	return radix_enabled();
+}
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_POWERPC_TLB_H */
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
new file mode 100644
index 0000000000..61fba43bf8
--- /dev/null
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_TLBFLUSH_H
+#define _ASM_POWERPC_TLBFLUSH_H
+
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/book3s/tlbflush.h>
+#else
+#include <asm/nohash/tlbflush.h>
+#endif /* !CONFIG_PPC_BOOK3S */
+
+#endif /* _ASM_POWERPC_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h
new file mode 100644
index 0000000000..e94f6db5e3
--- /dev/null
+++ b/arch/powerpc/include/asm/tm.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Transactional memory support routines to reclaim and recheckpoint
+ * transactional process state.
+ *
+ * Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation.
+ */
+
+#include <uapi/asm/tm.h>
+
+#ifndef __ASSEMBLY__
+
+extern void tm_reclaim(struct thread_struct *thread,
+		       uint8_t cause);
+extern void tm_reclaim_current(uint8_t cause);
+extern void tm_recheckpoint(struct thread_struct *thread);
+extern void tm_save_sprs(struct thread_struct *thread);
+extern void tm_restore_sprs(struct thread_struct *thread);
+
+extern bool tm_suspend_disabled;
+
+#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
new file mode 100644
index 0000000000..f4e6f2dd04
--- /dev/null
+++ b/arch/powerpc/include/asm/topology.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_TOPOLOGY_H
+#define _ASM_POWERPC_TOPOLOGY_H
+#ifdef __KERNEL__
+
+
+struct device;
+struct device_node;
+struct drmem_lmb;
+
+#ifdef CONFIG_NUMA
+
+/*
+ * If zone_reclaim_mode is enabled, a RECLAIM_DISTANCE of 10 will mean that
+ * all zones on all nodes will be eligible for zone_reclaim().
+ */
+#define RECLAIM_DISTANCE 10
+
+#include <asm/mmzone.h>
+
+#define cpumask_of_node(node) ((node) == -1 ?				\
+			       cpu_all_mask :				\
+			       node_to_cpumask_map[node])
+
+struct pci_bus;
+#ifdef CONFIG_PCI
+extern int pcibus_to_node(struct pci_bus *bus);
+#else
+static inline int pcibus_to_node(struct pci_bus *bus)
+{
+	return -1;
+}
+#endif
+
+#define cpumask_of_pcibus(bus)	(pcibus_to_node(bus) == -1 ?		\
+				 cpu_all_mask :				\
+				 cpumask_of_node(pcibus_to_node(bus)))
+
+int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc);
+extern int __node_distance(int, int);
+#define node_distance(a, b) __node_distance(a, b)
+
+extern void __init dump_numa_cpu_topology(void);
+
+extern int sysfs_add_device_to_node(struct device *dev, int nid);
+extern void sysfs_remove_device_from_node(struct device *dev, int nid);
+
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
+{
+	numa_cpu_lookup_table[cpu] = node;
+}
+
+static inline int early_cpu_to_node(int cpu)
+{
+	int nid;
+
+	nid = numa_cpu_lookup_table[cpu];
+
+	/*
+	 * Fall back to node 0 if nid is unset (it should be, except bugs).
+	 * This allows callers to safely do NODE_DATA(early_cpu_to_node(cpu)).
+	 */
+	return (nid < 0) ? 0 : nid;
+}
+
+int of_drconf_to_nid_single(struct drmem_lmb *lmb);
+void update_numa_distance(struct device_node *node);
+
+extern void map_cpu_to_node(int cpu, int node);
+#ifdef CONFIG_HOTPLUG_CPU
+extern void unmap_cpu_from_node(unsigned long cpu);
+#endif /* CONFIG_HOTPLUG_CPU */
+
+#else
+
+static inline int early_cpu_to_node(int cpu) { return 0; }
+
+static inline void dump_numa_cpu_topology(void) {}
+
+static inline int sysfs_add_device_to_node(struct device *dev, int nid)
+{
+	return 0;
+}
+
+static inline void sysfs_remove_device_from_node(struct device *dev,
+						int nid)
+{
+}
+
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
+
+static inline int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+{
+	return 0;
+}
+
+static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb)
+{
+	return first_online_node;
+}
+
+static inline void update_numa_distance(struct device_node *node) {}
+
+#ifdef CONFIG_SMP
+static inline void map_cpu_to_node(int cpu, int node) {}
+#ifdef CONFIG_HOTPLUG_CPU
+static inline void unmap_cpu_from_node(unsigned long cpu) {}
+#endif /* CONFIG_HOTPLUG_CPU */
+#endif /* CONFIG_SMP */
+
+#endif /* CONFIG_NUMA */
+
+#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
+void find_and_update_cpu_nid(int cpu);
+extern int cpu_to_coregroup_id(int cpu);
+#else
+static inline void find_and_update_cpu_nid(int cpu) {}
+static inline int cpu_to_coregroup_id(int cpu)
+{
+#ifdef CONFIG_SMP
+	return cpu_to_core_id(cpu);
+#else
+	return 0;
+#endif
+}
+
+#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
+
+#include <asm-generic/topology.h>
+
+#ifdef CONFIG_SMP
+#include <asm/cputable.h>
+
+#ifdef CONFIG_PPC64
+#include <asm/smp.h>
+
+#define topology_physical_package_id(cpu)	(cpu_to_chip_id(cpu))
+
+#define topology_sibling_cpumask(cpu)	(per_cpu(cpu_sibling_map, cpu))
+#define topology_core_cpumask(cpu)	(per_cpu(cpu_core_map, cpu))
+#define topology_core_id(cpu)		(cpu_to_core_id(cpu))
+
+#endif
+#endif
+
+#ifdef CONFIG_HOTPLUG_SMT
+#include <linux/cpu_smt.h>
+#include <asm/cputhreads.h>
+
+static inline bool topology_is_primary_thread(unsigned int cpu)
+{
+	return cpu == cpu_first_thread_sibling(cpu);
+}
+
+static inline bool topology_smt_thread_allowed(unsigned int cpu)
+{
+	return cpu_thread_in_core(cpu) < cpu_smt_num_threads;
+}
+#endif
+
+#endif /* __KERNEL__ */
+#endif	/* _ASM_POWERPC_TOPOLOGY_H */
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
new file mode 100644
index 0000000000..82cc2c6704
--- /dev/null
+++ b/arch/powerpc/include/asm/trace.h
@@ -0,0 +1,346 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM powerpc
+
+#if !defined(_TRACE_POWERPC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_POWERPC_H
+
+#include <linux/tracepoint.h>
+
+struct pt_regs;
+
+DECLARE_EVENT_CLASS(ppc64_interrupt_class,
+
+	TP_PROTO(struct pt_regs *regs),
+
+	TP_ARGS(regs),
+
+	TP_STRUCT__entry(
+		__field(struct pt_regs *, regs)
+	),
+
+	TP_fast_assign(
+		__entry->regs = regs;
+	),
+
+	TP_printk("pt_regs=%p", __entry->regs)
+);
+
+DEFINE_EVENT(ppc64_interrupt_class, irq_entry,
+
+	TP_PROTO(struct pt_regs *regs),
+
+	TP_ARGS(regs)
+);
+
+DEFINE_EVENT(ppc64_interrupt_class, irq_exit,
+
+	TP_PROTO(struct pt_regs *regs),
+
+	TP_ARGS(regs)
+);
+
+DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_entry,
+
+	TP_PROTO(struct pt_regs *regs),
+
+	TP_ARGS(regs)
+);
+
+DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit,
+
+	TP_PROTO(struct pt_regs *regs),
+
+	TP_ARGS(regs)
+);
+
+#ifdef CONFIG_PPC_DOORBELL
+DEFINE_EVENT(ppc64_interrupt_class, doorbell_entry,
+
+	TP_PROTO(struct pt_regs *regs),
+
+	TP_ARGS(regs)
+);
+
+DEFINE_EVENT(ppc64_interrupt_class, doorbell_exit,
+
+	TP_PROTO(struct pt_regs *regs),
+
+	TP_ARGS(regs)
+);
+#endif
+
+#ifdef CONFIG_PPC_PSERIES
+extern int hcall_tracepoint_regfunc(void);
+extern void hcall_tracepoint_unregfunc(void);
+
+TRACE_EVENT_FN_COND(hcall_entry,
+
+	TP_PROTO(unsigned long opcode, unsigned long *args),
+
+	TP_ARGS(opcode, args),
+
+	TP_CONDITION(cpu_online(raw_smp_processor_id())),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, opcode)
+	),
+
+	TP_fast_assign(
+		__entry->opcode = opcode;
+	),
+
+	TP_printk("opcode=%lu", __entry->opcode),
+
+	hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
+);
+
+TRACE_EVENT_FN_COND(hcall_exit,
+
+	TP_PROTO(unsigned long opcode, long retval, unsigned long *retbuf),
+
+	TP_ARGS(opcode, retval, retbuf),
+
+	TP_CONDITION(cpu_online(raw_smp_processor_id())),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, opcode)
+		__field(long, retval)
+	),
+
+	TP_fast_assign(
+		__entry->opcode = opcode;
+		__entry->retval = retval;
+	),
+
+	TP_printk("opcode=%lu retval=%ld", __entry->opcode, __entry->retval),
+
+	hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
+);
+#endif
+
+#ifdef CONFIG_PPC_RTAS
+
+#include <asm/rtas-types.h>
+
+TRACE_EVENT(rtas_input,
+
+	TP_PROTO(struct rtas_args *rtas_args, const char *name),
+
+	TP_ARGS(rtas_args, name),
+
+	TP_STRUCT__entry(
+		__field(__u32, nargs)
+		__string(name, name)
+		__dynamic_array(__u32, inputs, be32_to_cpu(rtas_args->nargs))
+	),
+
+	TP_fast_assign(
+		__entry->nargs = be32_to_cpu(rtas_args->nargs);
+		__assign_str(name, name);
+		be32_to_cpu_array(__get_dynamic_array(inputs), rtas_args->args, __entry->nargs);
+	),
+
+	TP_printk("%s arguments: %s", __get_str(name),
+		  __print_array(__get_dynamic_array(inputs), __entry->nargs, 4)
+	)
+);
+
+TRACE_EVENT(rtas_output,
+
+	TP_PROTO(struct rtas_args *rtas_args, const char *name),
+
+	TP_ARGS(rtas_args, name),
+
+	TP_STRUCT__entry(
+		__field(__u32, nr_other)
+		__field(__s32, status)
+		__string(name, name)
+		__dynamic_array(__u32, other_outputs, be32_to_cpu(rtas_args->nret) - 1)
+	),
+
+	TP_fast_assign(
+		__entry->nr_other = be32_to_cpu(rtas_args->nret) - 1;
+		__entry->status = be32_to_cpu(rtas_args->rets[0]);
+		__assign_str(name, name);
+		be32_to_cpu_array(__get_dynamic_array(other_outputs),
+				  &rtas_args->rets[1], __entry->nr_other);
+	),
+
+	TP_printk("%s status: %d, other outputs: %s", __get_str(name), __entry->status,
+		  __print_array(__get_dynamic_array(other_outputs),
+				__entry->nr_other, 4)
+	)
+);
+
+DECLARE_EVENT_CLASS(rtas_parameter_block,
+
+	TP_PROTO(struct rtas_args *rtas_args),
+
+	TP_ARGS(rtas_args),
+
+	TP_STRUCT__entry(
+		__field(u32, token)
+		__field(u32, nargs)
+		__field(u32, nret)
+		__array(__u32, params, 16)
+	),
+
+	TP_fast_assign(
+		__entry->token = be32_to_cpu(rtas_args->token);
+		__entry->nargs = be32_to_cpu(rtas_args->nargs);
+		__entry->nret = be32_to_cpu(rtas_args->nret);
+		be32_to_cpu_array(__entry->params, rtas_args->args, ARRAY_SIZE(rtas_args->args));
+	),
+
+	TP_printk("token=%u nargs=%u nret=%u params:"
+		  " [0]=0x%08x [1]=0x%08x [2]=0x%08x [3]=0x%08x"
+		  " [4]=0x%08x [5]=0x%08x [6]=0x%08x [7]=0x%08x"
+		  " [8]=0x%08x [9]=0x%08x [10]=0x%08x [11]=0x%08x"
+		  " [12]=0x%08x [13]=0x%08x [14]=0x%08x [15]=0x%08x",
+		  __entry->token, __entry->nargs, __entry->nret,
+		  __entry->params[0], __entry->params[1], __entry->params[2], __entry->params[3],
+		  __entry->params[4], __entry->params[5], __entry->params[6], __entry->params[7],
+		  __entry->params[8], __entry->params[9], __entry->params[10], __entry->params[11],
+		  __entry->params[12], __entry->params[13], __entry->params[14], __entry->params[15]
+	)
+);
+
+DEFINE_EVENT(rtas_parameter_block, rtas_ll_entry,
+
+	TP_PROTO(struct rtas_args *rtas_args),
+
+	TP_ARGS(rtas_args)
+);
+
+DEFINE_EVENT(rtas_parameter_block, rtas_ll_exit,
+
+	TP_PROTO(struct rtas_args *rtas_args),
+
+	TP_ARGS(rtas_args)
+);
+
+#endif /* CONFIG_PPC_RTAS */
+
+#ifdef CONFIG_PPC_POWERNV
+extern int opal_tracepoint_regfunc(void);
+extern void opal_tracepoint_unregfunc(void);
+
+TRACE_EVENT_FN(opal_entry,
+
+	TP_PROTO(unsigned long opcode, unsigned long *args),
+
+	TP_ARGS(opcode, args),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, opcode)
+	),
+
+	TP_fast_assign(
+		__entry->opcode = opcode;
+	),
+
+	TP_printk("opcode=%lu", __entry->opcode),
+
+	opal_tracepoint_regfunc, opal_tracepoint_unregfunc
+);
+
+TRACE_EVENT_FN(opal_exit,
+
+	TP_PROTO(unsigned long opcode, unsigned long retval),
+
+	TP_ARGS(opcode, retval),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, opcode)
+		__field(unsigned long, retval)
+	),
+
+	TP_fast_assign(
+		__entry->opcode = opcode;
+		__entry->retval = retval;
+	),
+
+	TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),
+
+	opal_tracepoint_regfunc, opal_tracepoint_unregfunc
+);
+#endif
+
+TRACE_EVENT(hash_fault,
+
+	    TP_PROTO(unsigned long addr, unsigned long access, unsigned long trap),
+	    TP_ARGS(addr, access, trap),
+	    TP_STRUCT__entry(
+		    __field(unsigned long, addr)
+		    __field(unsigned long, access)
+		    __field(unsigned long, trap)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->addr = addr;
+		    __entry->access = access;
+		    __entry->trap = trap;
+		    ),
+
+	    TP_printk("hash fault with addr 0x%lx and access = 0x%lx trap = 0x%lx",
+		      __entry->addr, __entry->access, __entry->trap)
+);
+
+
+TRACE_EVENT(tlbie,
+
+	TP_PROTO(unsigned long lpid, unsigned long local, unsigned long rb,
+		unsigned long rs, unsigned long ric, unsigned long prs,
+		unsigned long r),
+	TP_ARGS(lpid, local, rb, rs, ric, prs, r),
+	TP_STRUCT__entry(
+		__field(unsigned long, lpid)
+		__field(unsigned long, local)
+		__field(unsigned long, rb)
+		__field(unsigned long, rs)
+		__field(unsigned long, ric)
+		__field(unsigned long, prs)
+		__field(unsigned long, r)
+		),
+
+	TP_fast_assign(
+		__entry->lpid = lpid;
+		__entry->local = local;
+		__entry->rb = rb;
+		__entry->rs = rs;
+		__entry->ric = ric;
+		__entry->prs = prs;
+		__entry->r = r;
+		),
+
+	TP_printk("lpid=%ld, local=%ld, rb=0x%lx, rs=0x%lx, ric=0x%lx, "
+		"prs=0x%lx, r=0x%lx", __entry->lpid, __entry->local,
+		__entry->rb, __entry->rs, __entry->ric, __entry->prs,
+		__entry->r)
+);
+
+TRACE_EVENT(tlbia,
+
+	TP_PROTO(unsigned long id),
+	TP_ARGS(id),
+	TP_STRUCT__entry(
+		__field(unsigned long, id)
+		),
+
+	TP_fast_assign(
+		__entry->id = id;
+		),
+
+	TP_printk("ctx.id=0x%lx", __entry->id)
+);
+
+#endif /* _TRACE_POWERPC_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH asm
+#define TRACE_INCLUDE_FILE trace
+
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/include/asm/trace_clock.h b/arch/powerpc/include/asm/trace_clock.h
new file mode 100644
index 0000000000..ef70c2f797
--- /dev/null
+++ b/arch/powerpc/include/asm/trace_clock.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright (C) 2015 Naveen N. Rao, IBM Corporation
+ */
+
+#ifndef _ASM_PPC_TRACE_CLOCK_H
+#define _ASM_PPC_TRACE_CLOCK_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+extern u64 notrace trace_clock_ppc_tb(void);
+
+#define ARCH_TRACE_CLOCKS { trace_clock_ppc_tb, "ppc-tb", 0 },
+
+#endif  /* _ASM_PPC_TRACE_CLOCK_H */
diff --git a/arch/powerpc/include/asm/tsi108.h b/arch/powerpc/include/asm/tsi108.h
new file mode 100644
index 0000000000..8a2b6427d3
--- /dev/null
+++ b/arch/powerpc/include/asm/tsi108.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * common routine and memory layout for Tundra TSI108(Grendel) host bridge
+ * memory controller.
+ *
+ * Author: Jacob Pan (jacob.pan@freescale.com)
+ *	   Alex Bounine (alexandreb@tundra.com)
+ *
+ * Copyright 2004-2006 Freescale Semiconductor, Inc.
+ */
+
+#ifndef __PPC_KERNEL_TSI108_H
+#define __PPC_KERNEL_TSI108_H
+
+#include <asm/pci-bridge.h>
+
+/* Size of entire register space */
+#define TSI108_REG_SIZE		(0x10000)
+
+/* Sizes of register spaces for individual blocks */
+#define TSI108_HLP_SIZE		0x1000
+#define TSI108_PCI_SIZE		0x1000
+#define TSI108_CLK_SIZE		0x1000
+#define TSI108_PB_SIZE		0x1000
+#define TSI108_SD_SIZE		0x1000
+#define TSI108_DMA_SIZE		0x1000
+#define TSI108_ETH_SIZE		0x1000
+#define TSI108_I2C_SIZE		0x400
+#define TSI108_MPIC_SIZE	0x400
+#define TSI108_UART0_SIZE	0x200
+#define TSI108_GPIO_SIZE	0x200
+#define TSI108_UART1_SIZE	0x200
+
+/* Offsets within Tsi108(A) CSR space for individual blocks */
+#define TSI108_HLP_OFFSET	0x0000
+#define TSI108_PCI_OFFSET	0x1000
+#define TSI108_CLK_OFFSET	0x2000
+#define TSI108_PB_OFFSET	0x3000
+#define TSI108_SD_OFFSET	0x4000
+#define TSI108_DMA_OFFSET	0x5000
+#define TSI108_ETH_OFFSET	0x6000
+#define TSI108_I2C_OFFSET	0x7000
+#define TSI108_MPIC_OFFSET	0x7400
+#define TSI108_UART0_OFFSET	0x7800
+#define TSI108_GPIO_OFFSET	0x7A00
+#define TSI108_UART1_OFFSET	0x7C00
+
+/* Tsi108 registers used by common code components */
+#define TSI108_PCI_CSR		(0x004)
+#define TSI108_PCI_IRP_CFG_CTL	(0x180)
+#define TSI108_PCI_IRP_STAT	(0x184)
+#define TSI108_PCI_IRP_ENABLE	(0x188)
+#define TSI108_PCI_IRP_INTAD	(0x18C)
+
+#define TSI108_PCI_IRP_STAT_P_INT	(0x00400000)
+#define TSI108_PCI_IRP_ENABLE_P_INT	(0x00400000)
+
+#define TSI108_CG_PWRUP_STATUS	(0x234)
+
+#define TSI108_PB_ISR		(0x00C)
+#define TSI108_PB_ERRCS		(0x404)
+#define TSI108_PB_AERR		(0x408)
+
+#define TSI108_PB_ERRCS_ES		(1 << 1)
+#define TSI108_PB_ISR_PBS_RD_ERR	(1 << 8)
+
+#define TSI108_PCI_CFG_SIZE		(0x01000000)
+
+/*
+ * PHY Configuration Options
+ *
+ * Specify "bcm54xx" in the compatible property of your device tree phy
+ * nodes if your board uses the Broadcom PHYs
+ */
+#define TSI108_PHY_MV88E	0	/* Marvel 88Exxxx PHY */
+#define TSI108_PHY_BCM54XX	1	/* Broadcom BCM54xx PHY */
+
+/* Global variables */
+
+extern u32 tsi108_pci_cfg_base;
+/* Exported functions */
+
+extern int tsi108_direct_write_config(struct pci_bus *bus, unsigned int devfn,
+				      int offset, int len, u32 val);
+extern int tsi108_direct_read_config(struct pci_bus *bus, unsigned int devfn,
+				     int offset, int len, u32 * val);
+extern void tsi108_clear_pci_error(u32 pci_cfg_base);
+
+extern phys_addr_t get_csrbase(void);
+
+typedef struct {
+	u32 regs;		/* hw registers base address */
+	u32 phyregs;		/* phy registers base address */
+	u16 phy;		/* phy address */
+	u16 irq_num;		/* irq number */
+	u8 mac_addr[6];		/* phy mac address */
+	u16 phy_type;	/* type of phy on board */
+} hw_info;
+
+extern u32 get_vir_csrbase(void);
+extern u32 tsi108_csr_vir_base;
+
+static inline u32 tsi108_read_reg(u32 reg_offset)
+{
+	return in_be32((volatile u32 *)(tsi108_csr_vir_base + reg_offset));
+}
+
+static inline void tsi108_write_reg(u32 reg_offset, u32 val)
+{
+	out_be32((volatile u32 *)(tsi108_csr_vir_base + reg_offset), val);
+}
+
+#endif				/* __PPC_KERNEL_TSI108_H */
diff --git a/arch/powerpc/include/asm/tsi108_irq.h b/arch/powerpc/include/asm/tsi108_irq.h
new file mode 100644
index 0000000000..df602ca4cc
--- /dev/null
+++ b/arch/powerpc/include/asm/tsi108_irq.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * (C) Copyright 2005 Tundra Semiconductor Corp.
+ * Alex Bounine, <alexandreb at tundra.com).
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ */
+
+/*
+ * definitions for interrupt controller initialization and external interrupt
+ * demultiplexing on TSI108EMU/SVB boards.
+ */
+
+#ifndef _ASM_POWERPC_TSI108_IRQ_H
+#define _ASM_POWERPC_TSI108_IRQ_H
+
+/*
+ * Tsi108 interrupts
+ */
+#ifndef TSI108_IRQ_REG_BASE
+#define TSI108_IRQ_REG_BASE		0
+#endif
+
+#define TSI108_IRQ(x)		(TSI108_IRQ_REG_BASE + (x))
+
+#define TSI108_MAX_VECTORS	(36 + 4)	/* 36 sources + PCI INT demux */
+#define MAX_TASK_PRIO	0xF
+
+#define TSI108_IRQ_SPURIOUS	(TSI108_MAX_VECTORS)
+
+#define DEFAULT_PRIO_LVL	10	/* initial priority level */
+
+/* Interrupt vectors assignment to external and internal
+ * sources of requests. */
+
+/* EXTERNAL INTERRUPT SOURCES */
+
+#define IRQ_TSI108_EXT_INT0	TSI108_IRQ(0)	/* External Source at INT[0] */
+#define IRQ_TSI108_EXT_INT1	TSI108_IRQ(1)	/* External Source at INT[1] */
+#define IRQ_TSI108_EXT_INT2	TSI108_IRQ(2)	/* External Source at INT[2] */
+#define IRQ_TSI108_EXT_INT3	TSI108_IRQ(3)	/* External Source at INT[3] */
+
+/* INTERNAL INTERRUPT SOURCES */
+
+#define IRQ_TSI108_RESERVED0	TSI108_IRQ(4)	/* Reserved IRQ */
+#define IRQ_TSI108_RESERVED1	TSI108_IRQ(5)	/* Reserved IRQ */
+#define IRQ_TSI108_RESERVED2	TSI108_IRQ(6)	/* Reserved IRQ */
+#define IRQ_TSI108_RESERVED3	TSI108_IRQ(7)	/* Reserved IRQ */
+#define IRQ_TSI108_DMA0		TSI108_IRQ(8)	/* DMA0 */
+#define IRQ_TSI108_DMA1		TSI108_IRQ(9)	/* DMA1 */
+#define IRQ_TSI108_DMA2		TSI108_IRQ(10)	/* DMA2 */
+#define IRQ_TSI108_DMA3		TSI108_IRQ(11)	/* DMA3 */
+#define IRQ_TSI108_UART0	TSI108_IRQ(12)	/* UART0 */
+#define IRQ_TSI108_UART1	TSI108_IRQ(13)	/* UART1 */
+#define IRQ_TSI108_I2C		TSI108_IRQ(14)	/* I2C */
+#define IRQ_TSI108_GPIO		TSI108_IRQ(15)	/* GPIO */
+#define IRQ_TSI108_GIGE0	TSI108_IRQ(16)	/* GIGE0 */
+#define IRQ_TSI108_GIGE1	TSI108_IRQ(17)	/* GIGE1 */
+#define IRQ_TSI108_RESERVED4	TSI108_IRQ(18)	/* Reserved IRQ */
+#define IRQ_TSI108_HLP		TSI108_IRQ(19)	/* HLP */
+#define IRQ_TSI108_SDRAM	TSI108_IRQ(20)	/* SDC */
+#define IRQ_TSI108_PROC_IF	TSI108_IRQ(21)	/* Processor IF */
+#define IRQ_TSI108_RESERVED5	TSI108_IRQ(22)	/* Reserved IRQ */
+#define IRQ_TSI108_PCI		TSI108_IRQ(23)	/* PCI/X block */
+
+#define IRQ_TSI108_MBOX0	TSI108_IRQ(24)	/* Mailbox 0 register */
+#define IRQ_TSI108_MBOX1	TSI108_IRQ(25)	/* Mailbox 1 register */
+#define IRQ_TSI108_MBOX2	TSI108_IRQ(26)	/* Mailbox 2 register */
+#define IRQ_TSI108_MBOX3	TSI108_IRQ(27)	/* Mailbox 3 register */
+
+#define IRQ_TSI108_DBELL0	TSI108_IRQ(28)	/* Doorbell 0 */
+#define IRQ_TSI108_DBELL1	TSI108_IRQ(29)	/* Doorbell 1 */
+#define IRQ_TSI108_DBELL2	TSI108_IRQ(30)	/* Doorbell 2 */
+#define IRQ_TSI108_DBELL3	TSI108_IRQ(31)	/* Doorbell 3 */
+
+#define IRQ_TSI108_TIMER0	TSI108_IRQ(32)	/* Global Timer 0 */
+#define IRQ_TSI108_TIMER1	TSI108_IRQ(33)	/* Global Timer 1 */
+#define IRQ_TSI108_TIMER2	TSI108_IRQ(34)	/* Global Timer 2 */
+#define IRQ_TSI108_TIMER3	TSI108_IRQ(35)	/* Global Timer 3 */
+
+/*
+ * PCI bus INTA# - INTD# lines demultiplexor
+ */
+#define IRQ_PCI_INTAD_BASE	TSI108_IRQ(36)
+#define IRQ_PCI_INTA		(IRQ_PCI_INTAD_BASE + 0)
+#define IRQ_PCI_INTB		(IRQ_PCI_INTAD_BASE + 1)
+#define IRQ_PCI_INTC		(IRQ_PCI_INTAD_BASE + 2)
+#define IRQ_PCI_INTD		(IRQ_PCI_INTAD_BASE + 3)
+#define NUM_PCI_IRQS		(4)
+
+/* number of entries in vector dispatch table */
+#define IRQ_TSI108_TAB_SIZE	(TSI108_MAX_VECTORS + 1)
+
+/* Mapping of MPIC outputs to processors' interrupt pins */
+
+#define IDIR_INT_OUT0		0x1
+#define IDIR_INT_OUT1		0x2
+#define IDIR_INT_OUT2		0x4
+#define IDIR_INT_OUT3		0x8
+
+/*---------------------------------------------------------------
+ * IRQ line configuration parameters */
+
+/* Interrupt delivery modes */
+typedef enum {
+	TSI108_IRQ_DIRECTED,
+	TSI108_IRQ_DISTRIBUTED,
+} TSI108_IRQ_MODE;
+#endif				/*  _ASM_POWERPC_TSI108_IRQ_H */
diff --git a/arch/powerpc/include/asm/tsi108_pci.h b/arch/powerpc/include/asm/tsi108_pci.h
new file mode 100644
index 0000000000..fb6f626691
--- /dev/null
+++ b/arch/powerpc/include/asm/tsi108_pci.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2007 IBM Corp
+ */
+
+#ifndef _ASM_POWERPC_TSI108_PCI_H
+#define _ASM_POWERPC_TSI108_PCI_H
+
+#include <asm/tsi108.h>
+
+/* Register definitions */
+#define TSI108_PCI_P2O_BAR0 (TSI108_PCI_OFFSET + 0x10)
+#define TSI108_PCI_P2O_BAR0_UPPER (TSI108_PCI_OFFSET + 0x14)
+#define TSI108_PCI_P2O_BAR2 (TSI108_PCI_OFFSET + 0x18)
+#define TSI108_PCI_P2O_BAR2_UPPER (TSI108_PCI_OFFSET + 0x1c)
+#define TSI108_PCI_P2O_PAGE_SIZES (TSI108_PCI_OFFSET + 0x4c)
+#define TSI108_PCI_PFAB_BAR0 (TSI108_PCI_OFFSET + 0x204)
+#define TSI108_PCI_PFAB_BAR0_UPPER (TSI108_PCI_OFFSET + 0x208)
+#define TSI108_PCI_PFAB_IO (TSI108_PCI_OFFSET + 0x20c)
+#define TSI108_PCI_PFAB_IO_UPPER (TSI108_PCI_OFFSET + 0x210)
+#define TSI108_PCI_PFAB_MEM32 (TSI108_PCI_OFFSET + 0x214)
+#define TSI108_PCI_PFAB_PFM3 (TSI108_PCI_OFFSET + 0x220)
+#define TSI108_PCI_PFAB_PFM4 (TSI108_PCI_OFFSET + 0x230)
+
+extern int tsi108_setup_pci(struct device_node *dev, u32 cfg_phys, int primary);
+extern void tsi108_pci_int_init(struct device_node *node);
+extern void tsi108_irq_cascade(struct irq_desc *desc);
+extern void tsi108_clear_pci_cfg_error(void);
+
+#endif				/*  _ASM_POWERPC_TSI108_PCI_H */
diff --git a/arch/powerpc/include/asm/types.h b/arch/powerpc/include/asm/types.h
new file mode 100644
index 0000000000..93157a661d
--- /dev/null
+++ b/arch/powerpc/include/asm/types.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file is never included by application software unless
+ * explicitly requested (e.g., via linux/types.h) in which case the
+ * application is Linux specific so (user-) name space pollution is
+ * not a major issue.  However, for interoperability, libraries still
+ * need to be careful to avoid a name clashes.
+ */
+#ifndef _ASM_POWERPC_TYPES_H
+#define _ASM_POWERPC_TYPES_H
+
+#include <uapi/asm/types.h>
+
+#ifndef __ASSEMBLY__
+
+typedef __vector128 vector128;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_TYPES_H */
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
new file mode 100644
index 0000000000..fb725ec779
--- /dev/null
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -0,0 +1,493 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARCH_POWERPC_UACCESS_H
+#define _ARCH_POWERPC_UACCESS_H
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/extable.h>
+#include <asm/kup.h>
+
+#ifdef __powerpc64__
+/* We use TASK_SIZE_USER64 as TASK_SIZE is not constant */
+#define TASK_SIZE_MAX		TASK_SIZE_USER64
+#endif
+
+#include <asm-generic/access_ok.h>
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ *
+ * This gets kind of ugly. We want to return _two_ values in "get_user()"
+ * and yet we don't want to do any pointers, because that is too much
+ * of a performance impact. Thus we have a few rather ugly macros here,
+ * and hide all the ugliness from the user.
+ *
+ * The "__xxx" versions of the user access functions are versions that
+ * do not verify the address space, that must have been done previously
+ * with a separate "access_ok()" call (this is used when we do multiple
+ * accesses to the same area of user memory).
+ *
+ * As we use the same address space for kernel and user data on the
+ * PowerPC, we can just do these as direct assignments.  (Of course, the
+ * exception handling means that it's no longer "just"...)
+ *
+ */
+#define __put_user(x, ptr)					\
+({								\
+	long __pu_err;						\
+	__typeof__(*(ptr)) __user *__pu_addr = (ptr);		\
+	__typeof__(*(ptr)) __pu_val = (__typeof__(*(ptr)))(x);	\
+	__typeof__(sizeof(*(ptr))) __pu_size = sizeof(*(ptr));	\
+								\
+	might_fault();						\
+	do {							\
+		__label__ __pu_failed;				\
+								\
+		allow_write_to_user(__pu_addr, __pu_size);	\
+		__put_user_size_goto(__pu_val, __pu_addr, __pu_size, __pu_failed);	\
+		prevent_write_to_user(__pu_addr, __pu_size);	\
+		__pu_err = 0;					\
+		break;						\
+								\
+__pu_failed:							\
+		prevent_write_to_user(__pu_addr, __pu_size);	\
+		__pu_err = -EFAULT;				\
+	} while (0);						\
+								\
+	__pu_err;						\
+})
+
+#define put_user(x, ptr)						\
+({									\
+	__typeof__(*(ptr)) __user *_pu_addr = (ptr);			\
+									\
+	access_ok(_pu_addr, sizeof(*(ptr))) ?				\
+		  __put_user(x, _pu_addr) : -EFAULT;			\
+})
+
+/*
+ * We don't tell gcc that we are accessing memory, but this is OK
+ * because we do not write to any memory gcc knows about, so there
+ * are no aliasing issues.
+ */
+/* -mprefixed can generate offsets beyond range, fall back hack */
+#ifdef CONFIG_PPC_KERNEL_PREFIXED
+#define __put_user_asm_goto(x, addr, label, op)			\
+	asm_volatile_goto(					\
+		"1:	" op " %0,0(%1)	# put_user\n"		\
+		EX_TABLE(1b, %l2)				\
+		:						\
+		: "r" (x), "b" (addr)				\
+		:						\
+		: label)
+#else
+#define __put_user_asm_goto(x, addr, label, op)			\
+	asm_volatile_goto(					\
+		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\
+		EX_TABLE(1b, %l2)				\
+		:						\
+		: "r" (x), "m<>" (*addr)			\
+		:						\
+		: label)
+#endif
+
+#ifdef __powerpc64__
+#define __put_user_asm2_goto(x, ptr, label)			\
+	__put_user_asm_goto(x, ptr, label, "std")
+#else /* __powerpc64__ */
+#define __put_user_asm2_goto(x, addr, label)			\
+	asm_volatile_goto(					\
+		"1:	stw%X1 %0, %1\n"			\
+		"2:	stw%X1 %L0, %L1\n"			\
+		EX_TABLE(1b, %l2)				\
+		EX_TABLE(2b, %l2)				\
+		:						\
+		: "r" (x), "m" (*addr)				\
+		:						\
+		: label)
+#endif /* __powerpc64__ */
+
+#define __put_user_size_goto(x, ptr, size, label)		\
+do {								\
+	__typeof__(*(ptr)) __user *__pus_addr = (ptr);		\
+								\
+	switch (size) {						\
+	case 1: __put_user_asm_goto(x, __pus_addr, label, "stb"); break;	\
+	case 2: __put_user_asm_goto(x, __pus_addr, label, "sth"); break;	\
+	case 4: __put_user_asm_goto(x, __pus_addr, label, "stw"); break;	\
+	case 8: __put_user_asm2_goto(x, __pus_addr, label); break;		\
+	default: BUILD_BUG();					\
+	}							\
+} while (0)
+
+/*
+ * This does an atomic 128 byte aligned load from userspace.
+ * Upto caller to do enable_kernel_vmx() before calling!
+ */
+#define __get_user_atomic_128_aligned(kaddr, uaddr, err)		\
+	__asm__ __volatile__(				\
+		".machine push\n"			\
+		".machine altivec\n"			\
+		"1:	lvx  0,0,%1	# get user\n"	\
+		" 	stvx 0,0,%2	# put kernel\n"	\
+		".machine pop\n"			\
+		"2:\n"					\
+		".section .fixup,\"ax\"\n"		\
+		"3:	li %0,%3\n"			\
+		"	b 2b\n"				\
+		".previous\n"				\
+		EX_TABLE(1b, 3b)			\
+		: "=r" (err)			\
+		: "b" (uaddr), "b" (kaddr), "i" (-EFAULT), "0" (err))
+
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+/* -mprefixed can generate offsets beyond range, fall back hack */
+#ifdef CONFIG_PPC_KERNEL_PREFIXED
+#define __get_user_asm_goto(x, addr, label, op)			\
+	asm_volatile_goto(					\
+		"1:	"op" %0,0(%1)	# get_user\n"		\
+		EX_TABLE(1b, %l2)				\
+		: "=r" (x)					\
+		: "b" (addr)					\
+		:						\
+		: label)
+#else
+#define __get_user_asm_goto(x, addr, label, op)			\
+	asm_volatile_goto(					\
+		"1:	"op"%U1%X1 %0, %1	# get_user\n"	\
+		EX_TABLE(1b, %l2)				\
+		: "=r" (x)					\
+		: "m<>" (*addr)					\
+		:						\
+		: label)
+#endif
+
+#ifdef __powerpc64__
+#define __get_user_asm2_goto(x, addr, label)			\
+	__get_user_asm_goto(x, addr, label, "ld")
+#else /* __powerpc64__ */
+#define __get_user_asm2_goto(x, addr, label)			\
+	asm_volatile_goto(					\
+		"1:	lwz%X1 %0, %1\n"			\
+		"2:	lwz%X1 %L0, %L1\n"			\
+		EX_TABLE(1b, %l2)				\
+		EX_TABLE(2b, %l2)				\
+		: "=&r" (x)					\
+		: "m" (*addr)					\
+		:						\
+		: label)
+#endif /* __powerpc64__ */
+
+#define __get_user_size_goto(x, ptr, size, label)				\
+do {										\
+	BUILD_BUG_ON(size > sizeof(x));						\
+	switch (size) {								\
+	case 1: __get_user_asm_goto(x, (u8 __user *)ptr, label, "lbz"); break;	\
+	case 2: __get_user_asm_goto(x, (u16 __user *)ptr, label, "lhz"); break;	\
+	case 4: __get_user_asm_goto(x, (u32 __user *)ptr, label, "lwz"); break;	\
+	case 8: __get_user_asm2_goto(x, (u64 __user *)ptr, label);  break;	\
+	default: x = 0; BUILD_BUG();						\
+	}									\
+} while (0)
+
+#define __get_user_size_allowed(x, ptr, size, retval)			\
+do {									\
+		__label__ __gus_failed;					\
+									\
+		__get_user_size_goto(x, ptr, size, __gus_failed);	\
+		retval = 0;						\
+		break;							\
+__gus_failed:								\
+		x = 0;							\
+		retval = -EFAULT;					\
+} while (0)
+
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
+#define __get_user_asm(x, addr, err, op)		\
+	__asm__ __volatile__(				\
+		"1:	"op"%U2%X2 %1, %2	# get_user\n"	\
+		"2:\n"					\
+		".section .fixup,\"ax\"\n"		\
+		"3:	li %0,%3\n"			\
+		"	li %1,0\n"			\
+		"	b 2b\n"				\
+		".previous\n"				\
+		EX_TABLE(1b, 3b)			\
+		: "=r" (err), "=r" (x)			\
+		: "m<>" (*addr), "i" (-EFAULT), "0" (err))
+
+#ifdef __powerpc64__
+#define __get_user_asm2(x, addr, err)			\
+	__get_user_asm(x, addr, err, "ld")
+#else /* __powerpc64__ */
+#define __get_user_asm2(x, addr, err)			\
+	__asm__ __volatile__(				\
+		"1:	lwz%X2 %1, %2\n"			\
+		"2:	lwz%X2 %L1, %L2\n"		\
+		"3:\n"					\
+		".section .fixup,\"ax\"\n"		\
+		"4:	li %0,%3\n"			\
+		"	li %1,0\n"			\
+		"	li %1+1,0\n"			\
+		"	b 3b\n"				\
+		".previous\n"				\
+		EX_TABLE(1b, 4b)			\
+		EX_TABLE(2b, 4b)			\
+		: "=r" (err), "=&r" (x)			\
+		: "m" (*addr), "i" (-EFAULT), "0" (err))
+#endif /* __powerpc64__ */
+
+#define __get_user_size_allowed(x, ptr, size, retval)		\
+do {								\
+	retval = 0;						\
+	BUILD_BUG_ON(size > sizeof(x));				\
+	switch (size) {						\
+	case 1: __get_user_asm(x, (u8 __user *)ptr, retval, "lbz"); break;	\
+	case 2: __get_user_asm(x, (u16 __user *)ptr, retval, "lhz"); break;	\
+	case 4: __get_user_asm(x, (u32 __user *)ptr, retval, "lwz"); break;	\
+	case 8: __get_user_asm2(x, (u64 __user *)ptr, retval);  break;	\
+	default: x = 0; BUILD_BUG();				\
+	}							\
+} while (0)
+
+#define __get_user_size_goto(x, ptr, size, label)		\
+do {								\
+	long __gus_retval;					\
+								\
+	__get_user_size_allowed(x, ptr, size, __gus_retval);	\
+	if (__gus_retval)					\
+		goto label;					\
+} while (0)
+
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
+/*
+ * This is a type: either unsigned long, if the argument fits into
+ * that type, or otherwise unsigned long long.
+ */
+#define __long_type(x) \
+	__typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
+
+#define __get_user(x, ptr)					\
+({								\
+	long __gu_err;						\
+	__long_type(*(ptr)) __gu_val;				\
+	__typeof__(*(ptr)) __user *__gu_addr = (ptr);	\
+	__typeof__(sizeof(*(ptr))) __gu_size = sizeof(*(ptr));	\
+								\
+	might_fault();					\
+	allow_read_from_user(__gu_addr, __gu_size);		\
+	__get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err);	\
+	prevent_read_from_user(__gu_addr, __gu_size);		\
+	(x) = (__typeof__(*(ptr)))__gu_val;			\
+								\
+	__gu_err;						\
+})
+
+#define get_user(x, ptr)						\
+({									\
+	__typeof__(*(ptr)) __user *_gu_addr = (ptr);			\
+									\
+	access_ok(_gu_addr, sizeof(*(ptr))) ?				\
+		  __get_user(x, _gu_addr) :				\
+		  ((x) = (__force __typeof__(*(ptr)))0, -EFAULT);	\
+})
+
+/* more complex routines */
+
+extern unsigned long __copy_tofrom_user(void __user *to,
+		const void __user *from, unsigned long size);
+
+#ifdef __powerpc64__
+static inline unsigned long
+raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
+{
+	unsigned long ret;
+
+	allow_read_write_user(to, from, n);
+	ret = __copy_tofrom_user(to, from, n);
+	prevent_read_write_user(to, from, n);
+	return ret;
+}
+#endif /* __powerpc64__ */
+
+static inline unsigned long raw_copy_from_user(void *to,
+		const void __user *from, unsigned long n)
+{
+	unsigned long ret;
+
+	allow_read_from_user(from, n);
+	ret = __copy_tofrom_user((__force void __user *)to, from, n);
+	prevent_read_from_user(from, n);
+	return ret;
+}
+
+static inline unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	unsigned long ret;
+
+	allow_write_to_user(to, n);
+	ret = __copy_tofrom_user(to, (__force const void __user *)from, n);
+	prevent_write_to_user(to, n);
+	return ret;
+}
+
+unsigned long __arch_clear_user(void __user *addr, unsigned long size);
+
+static inline unsigned long __clear_user(void __user *addr, unsigned long size)
+{
+	unsigned long ret;
+
+	might_fault();
+	allow_write_to_user(addr, size);
+	ret = __arch_clear_user(addr, size);
+	prevent_write_to_user(addr, size);
+	return ret;
+}
+
+static inline unsigned long clear_user(void __user *addr, unsigned long size)
+{
+	return likely(access_ok(addr, size)) ? __clear_user(addr, size) : size;
+}
+
+extern long strncpy_from_user(char *dst, const char __user *src, long count);
+extern __must_check long strnlen_user(const char __user *str, long n);
+
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+unsigned long __must_check
+copy_mc_generic(void *to, const void *from, unsigned long size);
+
+static inline unsigned long __must_check
+copy_mc_to_kernel(void *to, const void *from, unsigned long size)
+{
+	return copy_mc_generic(to, from, size);
+}
+#define copy_mc_to_kernel copy_mc_to_kernel
+
+static inline unsigned long __must_check
+copy_mc_to_user(void __user *to, const void *from, unsigned long n)
+{
+	if (check_copy_size(from, n, true)) {
+		if (access_ok(to, n)) {
+			allow_write_to_user(to, n);
+			n = copy_mc_generic((void *)to, from, n);
+			prevent_write_to_user(to, n);
+		}
+	}
+
+	return n;
+}
+#endif
+
+extern long __copy_from_user_flushcache(void *dst, const void __user *src,
+		unsigned size);
+
+static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
+{
+	if (unlikely(!access_ok(ptr, len)))
+		return false;
+
+	might_fault();
+
+	allow_read_write_user((void __user *)ptr, ptr, len);
+	return true;
+}
+#define user_access_begin	user_access_begin
+#define user_access_end		prevent_current_access_user
+#define user_access_save	prevent_user_access_return
+#define user_access_restore	restore_user_access
+
+static __must_check __always_inline bool
+user_read_access_begin(const void __user *ptr, size_t len)
+{
+	if (unlikely(!access_ok(ptr, len)))
+		return false;
+
+	might_fault();
+
+	allow_read_from_user(ptr, len);
+	return true;
+}
+#define user_read_access_begin	user_read_access_begin
+#define user_read_access_end		prevent_current_read_from_user
+
+static __must_check __always_inline bool
+user_write_access_begin(const void __user *ptr, size_t len)
+{
+	if (unlikely(!access_ok(ptr, len)))
+		return false;
+
+	might_fault();
+
+	allow_write_to_user((void __user *)ptr, len);
+	return true;
+}
+#define user_write_access_begin	user_write_access_begin
+#define user_write_access_end		prevent_current_write_to_user
+
+#define unsafe_get_user(x, p, e) do {					\
+	__long_type(*(p)) __gu_val;				\
+	__typeof__(*(p)) __user *__gu_addr = (p);		\
+								\
+	__get_user_size_goto(__gu_val, __gu_addr, sizeof(*(p)), e); \
+	(x) = (__typeof__(*(p)))__gu_val;			\
+} while (0)
+
+#define unsafe_put_user(x, p, e) \
+	__put_user_size_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e)
+
+#define unsafe_copy_from_user(d, s, l, e) \
+do {											\
+	u8 *_dst = (u8 *)(d);								\
+	const u8 __user *_src = (const u8 __user *)(s);					\
+	size_t _len = (l);								\
+	int _i;										\
+											\
+	for (_i = 0; _i < (_len & ~(sizeof(u64) - 1)); _i += sizeof(u64))		\
+		unsafe_get_user(*(u64 *)(_dst + _i), (u64 __user *)(_src + _i), e);	\
+	if (_len & 4) {									\
+		unsafe_get_user(*(u32 *)(_dst + _i), (u32 __user *)(_src + _i), e);	\
+		_i += 4;								\
+	}										\
+	if (_len & 2) {									\
+		unsafe_get_user(*(u16 *)(_dst + _i), (u16 __user *)(_src + _i), e);	\
+		_i += 2;								\
+	}										\
+	if (_len & 1)									\
+		unsafe_get_user(*(u8 *)(_dst + _i), (u8 __user *)(_src + _i), e);	\
+} while (0)
+
+#define unsafe_copy_to_user(d, s, l, e) \
+do {									\
+	u8 __user *_dst = (u8 __user *)(d);				\
+	const u8 *_src = (const u8 *)(s);				\
+	size_t _len = (l);						\
+	int _i;								\
+									\
+	for (_i = 0; _i < (_len & ~(sizeof(u64) - 1)); _i += sizeof(u64))	\
+		unsafe_put_user(*(u64 *)(_src + _i), (u64 __user *)(_dst + _i), e); \
+	if (_len & 4) {							\
+		unsafe_put_user(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), e); \
+		_i += 4;						\
+	}								\
+	if (_len & 2) {							\
+		unsafe_put_user(*(u16*)(_src + _i), (u16 __user *)(_dst + _i), e); \
+		_i += 2;						\
+	}								\
+	if (_len & 1) \
+		unsafe_put_user(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e); \
+} while (0)
+
+#define __get_kernel_nofault(dst, src, type, err_label)			\
+	__get_user_size_goto(*((type *)(dst)),				\
+		(__force type __user *)(src), sizeof(type), err_label)
+
+#define __put_kernel_nofault(dst, src, type, err_label)			\
+	__put_user_size_goto(*((type *)(src)),				\
+		(__force type __user *)(dst), sizeof(type), err_label)
+
+#endif	/* _ARCH_POWERPC_UACCESS_H */
diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h
new file mode 100644
index 0000000000..b1f094728b
--- /dev/null
+++ b/arch/powerpc/include/asm/udbg.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * (c) 2001, 2006 IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_UDBG_H
+#define _ASM_POWERPC_UDBG_H
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+#include <linux/init.h>
+
+extern void (*udbg_putc)(char c);
+extern void (*udbg_flush)(void);
+extern int (*udbg_getc)(void);
+extern int (*udbg_getc_poll)(void);
+
+void udbg_puts(const char *s);
+int udbg_write(const char *s, int n);
+
+void register_early_udbg_console(void);
+void udbg_printf(const char *fmt, ...)
+	__attribute__ ((format (printf, 1, 2)));
+void udbg_progress(char *s, unsigned short hex);
+
+void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride);
+void __init udbg_uart_init_pio(unsigned long port, unsigned int stride);
+
+void __init udbg_uart_setup(unsigned int speed, unsigned int clock);
+unsigned int __init udbg_probe_uart_speed(unsigned int clock);
+
+struct device_node;
+void __init udbg_scc_init(int force_scc);
+int udbg_adb_init(int force_btext);
+void udbg_adb_init_early(void);
+
+void __init udbg_early_init(void);
+void __init udbg_init_debug_lpar(void);
+void __init udbg_init_debug_lpar_hvsi(void);
+void __init udbg_init_pmac_realmode(void);
+void __init udbg_init_maple_realmode(void);
+void __init udbg_init_pas_realmode(void);
+void __init udbg_init_rtas_panel(void);
+void __init udbg_init_rtas_console(void);
+void __init udbg_init_btext(void);
+void __init udbg_init_44x_as1(void);
+void __init udbg_init_40x_realmode(void);
+void __init udbg_init_cpm(void);
+void __init udbg_init_usbgecko(void);
+void __init udbg_init_memcons(void);
+void __init udbg_init_ehv_bc(void);
+void __init udbg_init_ps3gelic(void);
+void __init udbg_init_debug_opal_raw(void);
+void __init udbg_init_debug_opal_hvsi(void);
+void __init udbg_init_debug_16550(void);
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_UDBG_H */
diff --git a/arch/powerpc/include/asm/uic.h b/arch/powerpc/include/asm/uic.h
new file mode 100644
index 0000000000..7b7bd15b1c
--- /dev/null
+++ b/arch/powerpc/include/asm/uic.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * IBM PPC4xx UIC external definitions and structure.
+ *
+ * Maintainer: David Gibson <dwg@au1.ibm.com>
+ * Copyright 2007 IBM Corporation.
+ */
+#ifndef _ASM_POWERPC_UIC_H
+#define _ASM_POWERPC_UIC_H
+
+#ifdef __KERNEL__
+
+extern void __init uic_init_tree(void);
+extern unsigned int uic_get_irq(void);
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_UIC_H */
diff --git a/arch/powerpc/include/asm/ultravisor-api.h b/arch/powerpc/include/asm/ultravisor-api.h
new file mode 100644
index 0000000000..b66f6db7be
--- /dev/null
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Ultravisor API.
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#ifndef _ASM_POWERPC_ULTRAVISOR_API_H
+#define _ASM_POWERPC_ULTRAVISOR_API_H
+
+#include <asm/hvcall.h>
+
+/* Return codes */
+#define U_BUSY			H_BUSY
+#define U_FUNCTION		H_FUNCTION
+#define U_NOT_AVAILABLE		H_NOT_AVAILABLE
+#define U_P2			H_P2
+#define U_P3			H_P3
+#define U_P4			H_P4
+#define U_P5			H_P5
+#define U_PARAMETER		H_PARAMETER
+#define U_PERMISSION		H_PERMISSION
+#define U_SUCCESS		H_SUCCESS
+
+/* opcodes */
+#define UV_WRITE_PATE			0xF104
+#define UV_RETURN			0xF11C
+#define UV_ESM				0xF110
+#define UV_REGISTER_MEM_SLOT		0xF120
+#define UV_UNREGISTER_MEM_SLOT		0xF124
+#define UV_PAGE_IN			0xF128
+#define UV_PAGE_OUT			0xF12C
+#define UV_SHARE_PAGE			0xF130
+#define UV_UNSHARE_PAGE			0xF134
+#define UV_UNSHARE_ALL_PAGES		0xF140
+#define UV_PAGE_INVAL			0xF138
+#define UV_SVM_TERMINATE		0xF13C
+
+#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/include/asm/ultravisor.h b/arch/powerpc/include/asm/ultravisor.h
new file mode 100644
index 0000000000..790b0e6368
--- /dev/null
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Ultravisor definitions
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#ifndef _ASM_POWERPC_ULTRAVISOR_H
+#define _ASM_POWERPC_ULTRAVISOR_H
+
+#include <asm/asm-prototypes.h>
+#include <asm/ultravisor-api.h>
+#include <asm/firmware.h>
+
+int early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
+				  int depth, void *data);
+
+/*
+ * In ultravisor enabled systems, PTCR becomes ultravisor privileged only for
+ * writing and an attempt to write to it will cause a Hypervisor Emulation
+ * Assistance interrupt.
+ */
+static inline void set_ptcr_when_no_uv(u64 val)
+{
+	if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+		mtspr(SPRN_PTCR, val);
+}
+
+static inline int uv_register_pate(u64 lpid, u64 dw0, u64 dw1)
+{
+	return ucall_norets(UV_WRITE_PATE, lpid, dw0, dw1);
+}
+
+static inline int uv_share_page(u64 pfn, u64 npages)
+{
+	return ucall_norets(UV_SHARE_PAGE, pfn, npages);
+}
+
+static inline int uv_unshare_page(u64 pfn, u64 npages)
+{
+	return ucall_norets(UV_UNSHARE_PAGE, pfn, npages);
+}
+
+static inline int uv_unshare_all_pages(void)
+{
+	return ucall_norets(UV_UNSHARE_ALL_PAGES);
+}
+
+static inline int uv_page_in(u64 lpid, u64 src_ra, u64 dst_gpa, u64 flags,
+			     u64 page_shift)
+{
+	return ucall_norets(UV_PAGE_IN, lpid, src_ra, dst_gpa, flags,
+			    page_shift);
+}
+
+static inline int uv_page_out(u64 lpid, u64 dst_ra, u64 src_gpa, u64 flags,
+			      u64 page_shift)
+{
+	return ucall_norets(UV_PAGE_OUT, lpid, dst_ra, src_gpa, flags,
+			    page_shift);
+}
+
+static inline int uv_register_mem_slot(u64 lpid, u64 start_gpa, u64 size,
+				       u64 flags, u64 slotid)
+{
+	return ucall_norets(UV_REGISTER_MEM_SLOT, lpid, start_gpa,
+			    size, flags, slotid);
+}
+
+static inline int uv_unregister_mem_slot(u64 lpid, u64 slotid)
+{
+	return ucall_norets(UV_UNREGISTER_MEM_SLOT, lpid, slotid);
+}
+
+static inline int uv_page_inval(u64 lpid, u64 gpa, u64 page_shift)
+{
+	return ucall_norets(UV_PAGE_INVAL, lpid, gpa, page_shift);
+}
+
+static inline int uv_svm_terminate(u64 lpid)
+{
+	return ucall_norets(UV_SVM_TERMINATE, lpid);
+}
+
+#endif	/* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/include/asm/uninorth.h b/arch/powerpc/include/asm/uninorth.h
new file mode 100644
index 0000000000..e278299b9b
--- /dev/null
+++ b/arch/powerpc/include/asm/uninorth.h
@@ -0,0 +1,230 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * uninorth.h: definitions for using the "UniNorth" host bridge chip
+ *             from Apple. This chip is used on "Core99" machines
+ *	       This also includes U2 used on more recent MacRISC2/3
+ *             machines and U3 (G5) 
+ *
+ */
+#ifdef __KERNEL__
+#ifndef __ASM_UNINORTH_H__
+#define __ASM_UNINORTH_H__
+
+/*
+ * Uni-N and U3 config space reg. definitions
+ *
+ * (Little endian)
+ */
+
+/* Address ranges selection. This one should work with Bandit too */
+/* Not U3 */
+#define UNI_N_ADDR_SELECT		0x48
+#define UNI_N_ADDR_COARSE_MASK		0xffff0000	/* 256Mb regions at *0000000 */
+#define UNI_N_ADDR_FINE_MASK		0x0000ffff	/*  16Mb regions at f*000000 */
+
+/* AGP registers */
+/* Not U3 */
+#define UNI_N_CFG_GART_BASE		0x8c
+#define UNI_N_CFG_AGP_BASE		0x90
+#define UNI_N_CFG_GART_CTRL		0x94
+#define UNI_N_CFG_INTERNAL_STATUS	0x98
+#define UNI_N_CFG_GART_DUMMY_PAGE	0xa4
+
+/* UNI_N_CFG_GART_CTRL bits definitions */
+#define UNI_N_CFG_GART_INVAL		0x00000001
+#define UNI_N_CFG_GART_ENABLE		0x00000100
+#define UNI_N_CFG_GART_2xRESET		0x00010000
+#define UNI_N_CFG_GART_DISSBADET	0x00020000
+/* The following seems to only be used only on U3 <j.glisse@gmail.com> */
+#define U3_N_CFG_GART_SYNCMODE		0x00040000
+#define U3_N_CFG_GART_PERFRD		0x00080000
+#define U3_N_CFG_GART_B2BGNT		0x00200000
+#define U3_N_CFG_GART_FASTDDR		0x00400000
+
+/* My understanding of UniNorth AGP as of UniNorth rev 1.0x,
+ * revision 1.5 (x4 AGP) may need further changes.
+ *
+ * AGP_BASE register contains the base address of the AGP aperture on
+ * the AGP bus. It doesn't seem to be visible to the CPU as of UniNorth 1.x,
+ * even if decoding of this address range is enabled in the address select
+ * register. Apparently, the only supported bases are 256Mb multiples
+ * (high 4 bits of that register).
+ *
+ * GART_BASE register appear to contain the physical address of the GART
+ * in system memory in the high address bits (page aligned), and the
+ * GART size in the low order bits (number of GART pages)
+ *
+ * The GART format itself is one 32bits word per physical memory page.
+ * This word contains, in little-endian format (!!!), the physical address
+ * of the page in the high bits, and what appears to be an "enable" bit
+ * in the LSB bit (0) that must be set to 1 when the entry is valid.
+ *
+ * Obviously, the GART is not cache coherent and so any change to it
+ * must be flushed to memory (or maybe just make the GART space non
+ * cachable). AGP memory itself doesn't seem to be cache coherent neither.
+ *
+ * In order to invalidate the GART (which is probably necessary to inval
+ * the bridge internal TLBs), the following sequence has to be written,
+ * in order, to the GART_CTRL register:
+ *
+ *   UNI_N_CFG_GART_ENABLE | UNI_N_CFG_GART_INVAL
+ *   UNI_N_CFG_GART_ENABLE
+ *   UNI_N_CFG_GART_ENABLE | UNI_N_CFG_GART_2xRESET
+ *   UNI_N_CFG_GART_ENABLE
+ *
+ * As far as AGP "features" are concerned, it looks like fast write may
+ * not be supported but this has to be confirmed.
+ *
+ * Turning on AGP seem to require a double invalidate operation, one before
+ * setting the AGP command register, on after.
+ *
+ * Turning off AGP seems to require the following sequence: first wait
+ * for the AGP to be idle by reading the internal status register, then
+ * write in that order to the GART_CTRL register:
+ *
+ *   UNI_N_CFG_GART_ENABLE | UNI_N_CFG_GART_INVAL
+ *   0
+ *   UNI_N_CFG_GART_2xRESET
+ *   0
+ */
+
+/*
+ * Uni-N memory mapped reg. definitions
+ *
+ * Those registers are Big-Endian !!
+ *
+ * Their meaning come from either Darwin and/or from experiments I made with
+ * the bootrom, I'm not sure about their exact meaning yet
+ *
+ */
+
+/* Version of the UniNorth chip */
+#define UNI_N_VERSION			0x0000		/* Known versions: 3,7 and 8 */
+
+#define UNI_N_VERSION_107		0x0003		/* 1.0.7 */
+#define UNI_N_VERSION_10A		0x0007		/* 1.0.10 */
+#define UNI_N_VERSION_150		0x0011		/* 1.5 */
+#define UNI_N_VERSION_200		0x0024		/* 2.0 */
+#define UNI_N_VERSION_PANGEA		0x00C0		/* Integrated U1 + K */
+#define UNI_N_VERSION_INTREPID		0x00D2		/* Integrated U2 + K */
+#define UNI_N_VERSION_300		0x0030		/* 3.0 (U3 on G5) */
+
+/* This register is used to enable/disable various clocks */
+#define UNI_N_CLOCK_CNTL		0x0020
+#define UNI_N_CLOCK_CNTL_PCI		0x00000001	/* PCI2 clock control */
+#define UNI_N_CLOCK_CNTL_GMAC		0x00000002	/* GMAC clock control */
+#define UNI_N_CLOCK_CNTL_FW		0x00000004	/* FireWire clock control */
+#define UNI_N_CLOCK_CNTL_ATA100		0x00000010	/* ATA-100 clock control (U2) */
+
+/* Power Management control */
+#define UNI_N_POWER_MGT			0x0030
+#define UNI_N_POWER_MGT_NORMAL		0x00
+#define UNI_N_POWER_MGT_IDLE2		0x01
+#define UNI_N_POWER_MGT_SLEEP		0x02
+
+/* This register is configured by Darwin depending on the UniN
+ * revision
+ */
+#define UNI_N_ARB_CTRL			0x0040
+#define UNI_N_ARB_CTRL_QACK_DELAY_SHIFT	15
+#define UNI_N_ARB_CTRL_QACK_DELAY_MASK	0x0e1f8000
+#define UNI_N_ARB_CTRL_QACK_DELAY	0x30
+#define UNI_N_ARB_CTRL_QACK_DELAY105	0x00
+
+/* This one _might_ return the CPU number of the CPU reading it;
+ * the bootROM decides whether to boot or to sleep/spinloop depending
+ * on this register being 0 or not
+ */
+#define UNI_N_CPU_NUMBER		0x0050
+
+/* This register appear to be read by the bootROM to decide what
+ *  to do on a non-recoverable reset (powerup or wakeup)
+ */
+#define UNI_N_HWINIT_STATE		0x0070
+#define UNI_N_HWINIT_STATE_SLEEPING	0x01
+#define UNI_N_HWINIT_STATE_RUNNING	0x02
+/* This last bit appear to be used by the bootROM to know the second
+ * CPU has started and will enter it's sleep loop with IP=0
+ */
+#define UNI_N_HWINIT_STATE_CPU1_FLAG	0x10000000
+
+/* This register controls AACK delay, which is set when 2004 iBook/PowerBook
+ * is in low speed mode.
+ */
+#define UNI_N_AACK_DELAY		0x0100
+#define UNI_N_AACK_DELAY_ENABLE		0x00000001
+
+/* Clock status for Intrepid */
+#define UNI_N_CLOCK_STOP_STATUS0	0x0150
+#define UNI_N_CLOCK_STOPPED_EXTAGP	0x00200000
+#define UNI_N_CLOCK_STOPPED_AGPDEL	0x00100000
+#define UNI_N_CLOCK_STOPPED_I2S0_45_49	0x00080000
+#define UNI_N_CLOCK_STOPPED_I2S0_18	0x00040000
+#define UNI_N_CLOCK_STOPPED_I2S1_45_49	0x00020000
+#define UNI_N_CLOCK_STOPPED_I2S1_18	0x00010000
+#define UNI_N_CLOCK_STOPPED_TIMER	0x00008000
+#define UNI_N_CLOCK_STOPPED_SCC_RTCLK18	0x00004000
+#define UNI_N_CLOCK_STOPPED_SCC_RTCLK32	0x00002000
+#define UNI_N_CLOCK_STOPPED_SCC_VIA32	0x00001000
+#define UNI_N_CLOCK_STOPPED_SCC_SLOT0	0x00000800
+#define UNI_N_CLOCK_STOPPED_SCC_SLOT1	0x00000400
+#define UNI_N_CLOCK_STOPPED_SCC_SLOT2	0x00000200
+#define UNI_N_CLOCK_STOPPED_PCI_FBCLKO	0x00000100
+#define UNI_N_CLOCK_STOPPED_VEO0	0x00000080
+#define UNI_N_CLOCK_STOPPED_VEO1	0x00000040
+#define UNI_N_CLOCK_STOPPED_USB0	0x00000020
+#define UNI_N_CLOCK_STOPPED_USB1	0x00000010
+#define UNI_N_CLOCK_STOPPED_USB2	0x00000008
+#define UNI_N_CLOCK_STOPPED_32		0x00000004
+#define UNI_N_CLOCK_STOPPED_45		0x00000002
+#define UNI_N_CLOCK_STOPPED_49		0x00000001
+
+#define UNI_N_CLOCK_STOP_STATUS1	0x0160
+#define UNI_N_CLOCK_STOPPED_PLL4REF	0x00080000
+#define UNI_N_CLOCK_STOPPED_CPUDEL	0x00040000
+#define UNI_N_CLOCK_STOPPED_CPU		0x00020000
+#define UNI_N_CLOCK_STOPPED_BUF_REFCKO	0x00010000
+#define UNI_N_CLOCK_STOPPED_PCI2	0x00008000
+#define UNI_N_CLOCK_STOPPED_FW		0x00004000
+#define UNI_N_CLOCK_STOPPED_GB		0x00002000
+#define UNI_N_CLOCK_STOPPED_ATA66	0x00001000
+#define UNI_N_CLOCK_STOPPED_ATA100	0x00000800
+#define UNI_N_CLOCK_STOPPED_MAX		0x00000400
+#define UNI_N_CLOCK_STOPPED_PCI1	0x00000200
+#define UNI_N_CLOCK_STOPPED_KLPCI	0x00000100
+#define UNI_N_CLOCK_STOPPED_USB0PCI	0x00000080
+#define UNI_N_CLOCK_STOPPED_USB1PCI	0x00000040
+#define UNI_N_CLOCK_STOPPED_USB2PCI	0x00000020
+#define UNI_N_CLOCK_STOPPED_7PCI1	0x00000008
+#define UNI_N_CLOCK_STOPPED_AGP		0x00000004
+#define UNI_N_CLOCK_STOPPED_PCI0	0x00000002
+#define UNI_N_CLOCK_STOPPED_18		0x00000001
+
+/* Intrepid registe to OF do-platform-clockspreading */
+#define UNI_N_CLOCK_SPREADING		0x190
+
+/* Uninorth 1.5 rev. has additional perf. monitor registers at 0xf00-0xf50 */
+
+
+/*
+ * U3 specific registers
+ */
+
+
+/* U3 Toggle */
+#define U3_TOGGLE_REG			0x00e0
+#define U3_PMC_START_STOP		0x0001
+#define U3_MPIC_RESET			0x0002
+#define U3_MPIC_OUTPUT_ENABLE		0x0004
+
+/* U3 API PHY Config 1 */
+#define U3_API_PHY_CONFIG_1		0x23030
+
+/* U3 HyperTransport registers */
+#define U3_HT_CONFIG_BASE      		0x70000
+#define U3_HT_LINK_COMMAND		0x100
+#define U3_HT_LINK_CONFIG		0x110
+#define U3_HT_LINK_FREQ			0x120
+
+#endif /* __ASM_UNINORTH_H__ */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
new file mode 100644
index 0000000000..659a996c75
--- /dev/null
+++ b/arch/powerpc/include/asm/unistd.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains the system call numbers.
+ */
+#ifndef _ASM_POWERPC_UNISTD_H_
+#define _ASM_POWERPC_UNISTD_H_
+
+#include <uapi/asm/unistd.h>
+
+#define NR_syscalls	__NR_syscalls
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+
+#define __ARCH_WANT_NEW_STAT
+#define __ARCH_WANT_OLD_READDIR
+#define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SYS_ALARM
+#define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
+#define __ARCH_WANT_SYS_PAUSE
+#define __ARCH_WANT_SYS_SIGNAL
+#define __ARCH_WANT_SYS_TIME32
+#define __ARCH_WANT_SYS_UTIME32
+#define __ARCH_WANT_SYS_WAITPID
+#define __ARCH_WANT_SYS_SOCKETCALL
+#define __ARCH_WANT_SYS_FADVISE64
+#define __ARCH_WANT_SYS_GETPGRP
+#define __ARCH_WANT_SYS_LLSEEK
+#define __ARCH_WANT_SYS_NICE
+#define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_UNAME
+#define __ARCH_WANT_SYS_OLDUMOUNT
+#define __ARCH_WANT_SYS_SIGPENDING
+#define __ARCH_WANT_SYS_SIGPROCMASK
+#ifdef CONFIG_PPC32
+#define __ARCH_WANT_OLD_STAT
+#define __ARCH_WANT_SYS_OLD_SELECT
+#endif
+#ifdef CONFIG_PPC64
+#define __ARCH_WANT_SYS_TIME
+#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_NEWFSTATAT
+#define __ARCH_WANT_COMPAT_STAT
+#define __ARCH_WANT_COMPAT_FALLOCATE
+#define __ARCH_WANT_COMPAT_SYS_SENDFILE
+#endif
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_CLONE3
+
+#endif		/* __ASSEMBLY__ */
+#endif /* _ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h
new file mode 100644
index 0000000000..4fea116d3d
--- /dev/null
+++ b/arch/powerpc/include/asm/uprobes.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_UPROBES_H
+#define _ASM_UPROBES_H
+/*
+ * User-space Probes (UProbes) for powerpc
+ *
+ * Copyright IBM Corporation, 2007-2012
+ *
+ * Adapted from the x86 port by Ananth N Mavinakayanahalli <ananth@in.ibm.com>
+ */
+
+#include <linux/notifier.h>
+#include <asm/probes.h>
+
+typedef u32 uprobe_opcode_t;
+
+#define MAX_UINSN_BYTES		8
+#define UPROBE_XOL_SLOT_BYTES	(MAX_UINSN_BYTES)
+
+/* The following alias is needed for reference from arch-agnostic code */
+#define UPROBE_SWBP_INSN	BREAKPOINT_INSTRUCTION
+#define UPROBE_SWBP_INSN_SIZE	4 /* swbp insn size in bytes */
+
+struct arch_uprobe {
+	union {
+		u32 insn[2];
+		u32 ixol[2];
+	};
+};
+
+struct arch_uprobe_task {
+	unsigned long	saved_trap_nr;
+};
+
+#endif	/* _ASM_UPROBES_H */
diff --git a/arch/powerpc/include/asm/user.h b/arch/powerpc/include/asm/user.h
new file mode 100644
index 0000000000..7fae7e597b
--- /dev/null
+++ b/arch/powerpc/include/asm/user.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_USER_H
+#define _ASM_POWERPC_USER_H
+
+#include <asm/ptrace.h>
+#include <asm/page.h>
+
+/*
+ * Adapted from <asm-alpha/user.h>
+ *
+ * Core file format: The core file is written in such a way that gdb
+ * can understand it and provide useful information to the user (under
+ * linux we use the `trad-core' bfd, NOT the osf-core).  The file contents
+ * are as follows:
+ *
+ *  upage: 1 page consisting of a user struct that tells gdb
+ *	what is present in the file.  Directly after this is a
+ *	copy of the task_struct, which is currently not used by gdb,
+ *	but it may come in handy at some point.  All of the registers
+ *	are stored as part of the upage.  The upage should always be
+ *	only one page long.
+ *  data: The data segment follows next.  We use current->end_text to
+ *	current->brk to pick up all of the user variables, plus any memory
+ *	that may have been sbrk'ed.  No attempt is made to determine if a
+ *	page is demand-zero or if a page is totally unused, we just cover
+ *	the entire range.  All of the addresses are rounded in such a way
+ *	that an integral number of pages is written.
+ *  stack: We need the stack information in order to get a meaningful
+ *	backtrace.  We need to write the data from usp to
+ *	current->start_stack, so we round each of these in order to be able
+ *	to write an integer number of pages.
+ */
+struct user {
+	struct user_pt_regs regs;		/* entire machine state */
+	size_t		u_tsize;		/* text size (pages) */
+	size_t		u_dsize;		/* data size (pages) */
+	size_t		u_ssize;		/* stack size (pages) */
+	unsigned long	start_code;		/* text starting address */
+	unsigned long	start_data;		/* data starting address */
+	unsigned long	start_stack;		/* stack starting address */
+	long int	signal;			/* signal causing core dump */
+	unsigned long	u_ar0;			/* help gdb find registers */
+	unsigned long	magic;			/* identifies a core file */
+	char		u_comm[32];		/* user command name */
+};
+
+#endif	/* _ASM_POWERPC_USER_H */
diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
new file mode 100644
index 0000000000..c36f71e01c
--- /dev/null
+++ b/arch/powerpc/include/asm/vas.h
@@ -0,0 +1,294 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2016-17 IBM Corp.
+ */
+
+#ifndef _ASM_POWERPC_VAS_H
+#define _ASM_POWERPC_VAS_H
+#include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
+#include <asm/icswx.h>
+#include <uapi/asm/vas-api.h>
+
+/*
+ * Min and max FIFO sizes are based on Version 1.05 Section 3.1.4.25
+ * (Local FIFO Size Register) of the VAS workbook.
+ */
+#define VAS_RX_FIFO_SIZE_MIN	(1 << 10)	/* 1KB */
+#define VAS_RX_FIFO_SIZE_MAX	(8 << 20)	/* 8MB */
+
+/*
+ * Threshold Control Mode: Have paste operation fail if the number of
+ * requests in receive FIFO exceeds a threshold.
+ *
+ * NOTE: No special error code yet if paste is rejected because of these
+ *	 limits. So users can't distinguish between this and other errors.
+ */
+#define VAS_THRESH_DISABLED		0
+#define VAS_THRESH_FIFO_GT_HALF_FULL	1
+#define VAS_THRESH_FIFO_GT_QTR_FULL	2
+#define VAS_THRESH_FIFO_GT_EIGHTH_FULL	3
+
+/*
+ * VAS window Linux status bits
+ */
+#define VAS_WIN_ACTIVE		0x0	/* Used in platform independent */
+					/* vas mmap() */
+/* Window is closed in the hypervisor due to lost credit */
+#define VAS_WIN_NO_CRED_CLOSE	0x00000001
+/* Window is closed due to migration */
+#define VAS_WIN_MIGRATE_CLOSE	0x00000002
+
+/*
+ * Get/Set bit fields
+ */
+#define GET_FIELD(m, v)                (((v) & (m)) >> MASK_LSH(m))
+#define MASK_LSH(m)            (__builtin_ffsl(m) - 1)
+#define SET_FIELD(m, v, val)   \
+		(((v) & ~(m)) | ((((typeof(v))(val)) << MASK_LSH(m)) & (m)))
+
+/*
+ * Co-processor Engine type.
+ */
+enum vas_cop_type {
+	VAS_COP_TYPE_FAULT,
+	VAS_COP_TYPE_842,
+	VAS_COP_TYPE_842_HIPRI,
+	VAS_COP_TYPE_GZIP,
+	VAS_COP_TYPE_GZIP_HIPRI,
+	VAS_COP_TYPE_FTW,
+	VAS_COP_TYPE_MAX,
+};
+
+/*
+ * User space VAS windows are opened by tasks and take references
+ * to pid and mm until windows are closed.
+ * Stores pid, mm, and tgid for each window.
+ */
+struct vas_user_win_ref {
+	struct pid *pid;	/* PID of owner */
+	struct pid *tgid;	/* Thread group ID of owner */
+	struct mm_struct *mm;	/* Linux process mm_struct */
+	struct mutex mmap_mutex;	/* protects paste address mmap() */
+					/* with DLPAR close/open windows */
+	struct vm_area_struct *vma;	/* Save VMA and used in DLPAR ops */
+};
+
+/*
+ * Common VAS window struct on PowerNV and PowerVM
+ */
+struct vas_window {
+	u32 winid;
+	u32 wcreds_max;	/* Window credits */
+	u32 status;	/* Window status used in OS */
+	enum vas_cop_type cop;
+	struct vas_user_win_ref task_ref;
+	char *dbgname;
+	struct dentry *dbgdir;
+};
+
+/*
+ * User space window operations used for powernv and powerVM
+ */
+struct vas_user_win_ops {
+	struct vas_window * (*open_win)(int vas_id, u64 flags,
+				enum vas_cop_type);
+	u64 (*paste_addr)(struct vas_window *);
+	int (*close_win)(struct vas_window *);
+};
+
+static inline void put_vas_user_win_ref(struct vas_user_win_ref *ref)
+{
+	/* Drop references to pid, tgid, and mm */
+	put_pid(ref->pid);
+	put_pid(ref->tgid);
+	if (ref->mm)
+		mmdrop(ref->mm);
+}
+
+static inline void vas_user_win_add_mm_context(struct vas_user_win_ref *ref)
+{
+	mm_context_add_vas_window(ref->mm);
+	/*
+	 * Even a process that has no foreign real address mapping can
+	 * use an unpaired COPY instruction (to no real effect). Issue
+	 * CP_ABORT to clear any pending COPY and prevent a covert
+	 * channel.
+	 *
+	 * __switch_to() will issue CP_ABORT on future context switches
+	 * if process / thread has any open VAS window (Use
+	 * current->mm->context.vas_windows).
+	 */
+	asm volatile(PPC_CP_ABORT);
+}
+
+/*
+ * Receive window attributes specified by the (in-kernel) owner of window.
+ */
+struct vas_rx_win_attr {
+	u64 rx_fifo;
+	int rx_fifo_size;
+	int wcreds_max;
+
+	bool pin_win;
+	bool rej_no_credit;
+	bool tx_wcred_mode;
+	bool rx_wcred_mode;
+	bool tx_win_ord_mode;
+	bool rx_win_ord_mode;
+	bool data_stamp;
+	bool nx_win;
+	bool fault_win;
+	bool user_win;
+	bool notify_disable;
+	bool intr_disable;
+	bool notify_early;
+
+	int lnotify_lpid;
+	int lnotify_pid;
+	int lnotify_tid;
+	u32 pswid;
+
+	int tc_mode;
+};
+
+/*
+ * Window attributes specified by the in-kernel owner of a send window.
+ */
+struct vas_tx_win_attr {
+	enum vas_cop_type cop;
+	int wcreds_max;
+	int lpid;
+	int pidr;		/* hardware PID (from SPRN_PID) */
+	int pswid;
+	int rsvd_txbuf_count;
+	int tc_mode;
+
+	bool user_win;
+	bool pin_win;
+	bool rej_no_credit;
+	bool rsvd_txbuf_enable;
+	bool tx_wcred_mode;
+	bool rx_wcred_mode;
+	bool tx_win_ord_mode;
+	bool rx_win_ord_mode;
+};
+
+#ifdef CONFIG_PPC_POWERNV
+/*
+ * Helper to map a chip id to VAS id.
+ * For POWER9, this is a 1:1 mapping. In the future this maybe a 1:N
+ * mapping in which case, we will need to update this helper.
+ *
+ * Return the VAS id or -1 if no matching vasid is found.
+ */
+int chip_to_vas_id(int chipid);
+
+/*
+ * Helper to initialize receive window attributes to defaults for an
+ * NX window.
+ */
+void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop);
+
+/*
+ * Open a VAS receive window for the instance of VAS identified by @vasid
+ * Use @attr to initialize the attributes of the window.
+ *
+ * Return a handle to the window or ERR_PTR() on error.
+ */
+struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
+				   struct vas_rx_win_attr *attr);
+
+/*
+ * Helper to initialize send window attributes to defaults for an NX window.
+ */
+extern void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr,
+			enum vas_cop_type cop);
+
+/*
+ * Open a VAS send window for the instance of VAS identified by @vasid
+ * and the co-processor type @cop. Use @attr to initialize attributes
+ * of the window.
+ *
+ * Note: The instance of VAS must already have an open receive window for
+ * the coprocessor type @cop.
+ *
+ * Return a handle to the send window or ERR_PTR() on error.
+ */
+struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
+			struct vas_tx_win_attr *attr);
+
+/*
+ * Close the send or receive window identified by @win. For receive windows
+ * return -EAGAIN if there are active send windows attached to this receive
+ * window.
+ */
+int vas_win_close(struct vas_window *win);
+
+/*
+ * Copy the co-processor request block (CRB) @crb into the local L2 cache.
+ */
+int vas_copy_crb(void *crb, int offset);
+
+/*
+ * Paste a previously copied CRB (see vas_copy_crb()) from the L2 cache to
+ * the hardware address associated with the window @win. @re is expected/
+ * assumed to be true for NX windows.
+ */
+int vas_paste_crb(struct vas_window *win, int offset, bool re);
+
+int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type,
+			     const char *name);
+void vas_unregister_api_powernv(void);
+#endif
+
+#ifdef CONFIG_PPC_PSERIES
+
+/* VAS Capabilities */
+#define VAS_GZIP_QOS_FEAT	0x1
+#define VAS_GZIP_DEF_FEAT	0x2
+#define VAS_GZIP_QOS_FEAT_BIT	PPC_BIT(VAS_GZIP_QOS_FEAT) /* Bit 1 */
+#define VAS_GZIP_DEF_FEAT_BIT	PPC_BIT(VAS_GZIP_DEF_FEAT) /* Bit 2 */
+
+/* NX Capabilities */
+#define VAS_NX_GZIP_FEAT	0x1
+#define VAS_NX_GZIP_FEAT_BIT	PPC_BIT(VAS_NX_GZIP_FEAT) /* Bit 1 */
+
+/*
+ * These structs are used to retrieve overall VAS capabilities that
+ * the hypervisor provides.
+ */
+struct hv_vas_all_caps {
+	__be64  descriptor;
+	__be64  feat_type;
+} __packed __aligned(0x1000);
+
+struct vas_all_caps {
+	u64     descriptor;
+	u64     feat_type;
+};
+
+int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result);
+int vas_register_api_pseries(struct module *mod,
+			     enum vas_cop_type cop_type, const char *name);
+void vas_unregister_api_pseries(void);
+#endif
+
+/*
+ * Register / unregister coprocessor type to VAS API which will be exported
+ * to user space. Applications can use this API to open / close window
+ * which can be used to send / receive requests directly to cooprcessor.
+ *
+ * Only NX GZIP coprocessor type is supported now, but this API can be
+ * used for others in future.
+ */
+int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type,
+			    const char *name,
+			    const struct vas_user_win_ops *vops);
+void vas_unregister_coproc_api(void);
+
+int get_vas_user_win_ref(struct vas_user_win_ref *task_ref);
+void vas_update_csb(struct coprocessor_request_block *crb,
+		    struct vas_user_win_ref *task_ref);
+void vas_dump_crb(struct coprocessor_request_block *crb);
+#endif /* __ASM_POWERPC_VAS_H */
diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h
new file mode 100644
index 0000000000..7650b6ce14
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_VDSO_H
+#define _ASM_POWERPC_VDSO_H
+
+#define VDSO_VERSION_STRING	LINUX_2.6.15
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_PPC64
+#include <generated/vdso64-offsets.h>
+#endif
+
+#ifdef CONFIG_VDSO32
+#include <generated/vdso32-offsets.h>
+#endif
+
+#define VDSO64_SYMBOL(base, name) ((unsigned long)(base) + (vdso64_offset_##name))
+
+#define VDSO32_SYMBOL(base, name) ((unsigned long)(base) + (vdso32_offset_##name))
+
+int vdso_getcpu_init(void);
+
+#else /* __ASSEMBLY__ */
+
+#ifdef __VDSO64__
+#define V_FUNCTION_BEGIN(name)		\
+	.globl name;			\
+	name:				\
+
+#define V_FUNCTION_END(name)		\
+	.size name,.-name;
+
+#define V_LOCAL_FUNC(name) (name)
+#endif /* __VDSO64__ */
+
+#ifdef __VDSO32__
+
+#define V_FUNCTION_BEGIN(name)		\
+	.globl name;			\
+	.type name,@function; 		\
+	name:				\
+
+#define V_FUNCTION_END(name)		\
+	.size name,.-name;
+
+#define V_LOCAL_FUNC(name) (name)
+
+#endif /* __VDSO32__ */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_VDSO_H */
diff --git a/arch/powerpc/include/asm/vdso/clocksource.h b/arch/powerpc/include/asm/vdso/clocksource.h
new file mode 100644
index 0000000000..c1ba56b82e
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso/clocksource.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_VDSO_CLOCKSOURCE_H
+#define _ASM_POWERPC_VDSO_CLOCKSOURCE_H
+
+#define VDSO_ARCH_CLOCKMODES	VDSO_CLOCKMODE_ARCHTIMER
+
+#endif
diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h
new file mode 100644
index 0000000000..f0a4cf01e8
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_VDSO_GETTIMEOFDAY_H
+#define _ASM_POWERPC_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/page.h>
+#include <asm/vdso/timebase.h>
+#include <asm/barrier.h>
+#include <asm/unistd.h>
+#include <uapi/linux/time.h>
+
+#define VDSO_HAS_CLOCK_GETRES		1
+
+#define VDSO_HAS_TIME			1
+
+static __always_inline int do_syscall_2(const unsigned long _r0, const unsigned long _r3,
+					const unsigned long _r4)
+{
+	register long r0 asm("r0") = _r0;
+	register unsigned long r3 asm("r3") = _r3;
+	register unsigned long r4 asm("r4") = _r4;
+	register int ret asm ("r3");
+
+	asm volatile(
+		"       sc\n"
+		"	bns+	1f\n"
+		"	neg	%0, %0\n"
+		"1:\n"
+	: "=r" (ret), "+r" (r4), "+r" (r0)
+	: "r" (r3)
+	: "memory", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cr0", "ctr");
+
+	return ret;
+}
+
+static __always_inline
+int gettimeofday_fallback(struct __kernel_old_timeval *_tv, struct timezone *_tz)
+{
+	return do_syscall_2(__NR_gettimeofday, (unsigned long)_tv, (unsigned long)_tz);
+}
+
+#ifdef __powerpc64__
+
+static __always_inline
+int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+	return do_syscall_2(__NR_clock_gettime, _clkid, (unsigned long)_ts);
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+	return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts);
+}
+
+#else
+
+#define BUILD_VDSO32		1
+
+static __always_inline
+int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+	return do_syscall_2(__NR_clock_gettime64, _clkid, (unsigned long)_ts);
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+	return do_syscall_2(__NR_clock_getres_time64, _clkid, (unsigned long)_ts);
+}
+
+static __always_inline
+int clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+	return do_syscall_2(__NR_clock_gettime, _clkid, (unsigned long)_ts);
+}
+
+static __always_inline
+int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+	return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts);
+}
+#endif
+
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
+						 const struct vdso_data *vd)
+{
+	return get_tb();
+}
+
+const struct vdso_data *__arch_get_vdso_data(void);
+
+#ifdef CONFIG_TIME_NS
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
+{
+	return (void *)vd + PAGE_SIZE;
+}
+#endif
+
+static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
+{
+	return true;
+}
+#define vdso_clocksource_ok vdso_clocksource_ok
+
+/*
+ * powerpc specific delta calculation.
+ *
+ * This variant removes the masking of the subtraction because the
+ * clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX
+ * which would result in a pointless operation. The compiler cannot
+ * optimize it away as the mask comes from the vdso data and is not compile
+ * time constant.
+ */
+static __always_inline u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+{
+	return (cycles - last) * mult;
+}
+#define vdso_calc_delta vdso_calc_delta
+
+#ifndef __powerpc64__
+static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift)
+{
+	u32 hi = ns >> 32;
+	u32 lo = ns;
+
+	lo >>= shift;
+	lo |= hi << (32 - shift);
+	hi >>= shift;
+
+	if (likely(hi == 0))
+		return lo;
+
+	return ((u64)hi << 32) | lo;
+}
+#define vdso_shift_ns vdso_shift_ns
+#endif
+
+#ifdef __powerpc64__
+int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts,
+			     const struct vdso_data *vd);
+int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res,
+			    const struct vdso_data *vd);
+#else
+int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts,
+			     const struct vdso_data *vd);
+int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts,
+			       const struct vdso_data *vd);
+int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res,
+			    const struct vdso_data *vd);
+#endif
+int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz,
+			    const struct vdso_data *vd);
+__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time,
+				    const struct vdso_data *vd);
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/powerpc/include/asm/vdso/processor.h b/arch/powerpc/include/asm/vdso/processor.h
new file mode 100644
index 0000000000..80d13207c5
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso/processor.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_VDSO_PROCESSOR_H
+#define _ASM_POWERPC_VDSO_PROCESSOR_H
+
+#ifndef __ASSEMBLY__
+
+/* Macros for adjusting thread priority (hardware multi-threading) */
+#ifdef CONFIG_PPC64
+#define HMT_very_low()		asm volatile("or 31, 31, 31	# very low priority")
+#define HMT_low()		asm volatile("or 1, 1, 1	# low priority")
+#define HMT_medium_low()	asm volatile("or 6, 6, 6	# medium low priority")
+#define HMT_medium()		asm volatile("or 2, 2, 2	# medium priority")
+#define HMT_medium_high()	asm volatile("or 5, 5, 5	# medium high priority")
+#define HMT_high()		asm volatile("or 3, 3, 3	# high priority")
+#else
+#define HMT_very_low()
+#define HMT_low()
+#define HMT_medium_low()
+#define HMT_medium()
+#define HMT_medium_high()
+#define HMT_high()
+#endif
+
+#ifdef CONFIG_PPC64
+#define cpu_relax()							\
+	asm volatile(ASM_FTR_IFCLR(					\
+		/* Pre-POWER10 uses low ; medium priority nops */	\
+		"or 1,1,1 ; or 2,2,2",					\
+		/* POWER10 onward uses pause_short (wait 2,0) */	\
+		PPC_WAIT(2, 0),						\
+		%0) :: "i" (CPU_FTR_ARCH_31) : "memory")
+#else
+#define cpu_relax()	barrier()
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_VDSO_PROCESSOR_H */
diff --git a/arch/powerpc/include/asm/vdso/timebase.h b/arch/powerpc/include/asm/vdso/timebase.h
new file mode 100644
index 0000000000..e9245f86a4
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso/timebase.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Common timebase prototypes and such for all ppc machines.
+ */
+
+#ifndef _ASM_POWERPC_VDSO_TIMEBASE_H
+#define _ASM_POWERPC_VDSO_TIMEBASE_H
+
+#include <asm/reg.h>
+
+/*
+ * We use __powerpc64__ here because we want the compat VDSO to use the 32-bit
+ * version below in the else case of the ifdef.
+ */
+#if defined(__powerpc64__) && (defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_E500))
+#define mftb()		({unsigned long rval;				\
+			asm volatile(					\
+				"90:	mfspr %0, %2;\n"		\
+				ASM_FTR_IFSET(				\
+					"97:	cmpwi %0,0;\n"		\
+					"	beq- 90b;\n", "", %1)	\
+			: "=r" (rval) \
+			: "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \
+			rval;})
+#elif defined(CONFIG_PPC_8xx)
+#define mftb()		({unsigned long rval;	\
+			asm volatile("mftbl %0" : "=r" (rval)); rval;})
+#else
+#define mftb()		({unsigned long rval;	\
+			asm volatile("mfspr %0, %1" : \
+				     "=r" (rval) : "i" (SPRN_TBRL)); rval;})
+#endif /* !CONFIG_PPC_CELL */
+
+#if defined(CONFIG_PPC_8xx)
+#define mftbu()		({unsigned long rval;	\
+			asm volatile("mftbu %0" : "=r" (rval)); rval;})
+#else
+#define mftbu()		({unsigned long rval;	\
+			asm volatile("mfspr %0, %1" : "=r" (rval) : \
+				"i" (SPRN_TBRU)); rval;})
+#endif
+
+#define mttbl(v)	asm volatile("mttbl %0":: "r"(v))
+#define mttbu(v)	asm volatile("mttbu %0":: "r"(v))
+
+static __always_inline u64 get_tb(void)
+{
+	unsigned int tbhi, tblo, tbhi2;
+
+	/*
+	 * We use __powerpc64__ here not CONFIG_PPC64 because we want the compat
+	 * VDSO to use the 32-bit compatible version in the while loop below.
+	 */
+	if (__is_defined(__powerpc64__))
+		return mftb();
+
+	do {
+		tbhi = mftbu();
+		tblo = mftb();
+		tbhi2 = mftbu();
+	} while (tbhi != tbhi2);
+
+	return ((u64)tbhi << 32) | tblo;
+}
+
+static inline void set_tb(unsigned int upper, unsigned int lower)
+{
+	mtspr(SPRN_TBWL, 0);
+	mtspr(SPRN_TBWU, upper);
+	mtspr(SPRN_TBWL, lower);
+}
+
+#endif /* _ASM_POWERPC_VDSO_TIMEBASE_H */
diff --git a/arch/powerpc/include/asm/vdso/vsyscall.h b/arch/powerpc/include/asm/vdso/vsyscall.h
new file mode 100644
index 0000000000..48cf23f1e2
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso/vsyscall.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_VDSO_VSYSCALL_H
+#define _ASM_POWERPC_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/timekeeper_internal.h>
+#include <asm/vdso_datapage.h>
+
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+static __always_inline
+struct vdso_data *__arch_get_k_vdso_data(void)
+{
+	return vdso_data->data;
+}
+#define __arch_get_k_vdso_data __arch_get_k_vdso_data
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_VDSO_VSYSCALL_H */
diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h
new file mode 100644
index 0000000000..a585c8e538
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso_datapage.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _VDSO_DATAPAGE_H
+#define _VDSO_DATAPAGE_H
+#ifdef __KERNEL__
+
+/*
+ * Copyright (C) 2002 Peter Bergner <bergner@vnet.ibm.com>, IBM
+ * Copyright (C) 2005 Benjamin Herrenschmidy <benh@kernel.crashing.org>,
+ * 		      IBM Corp.
+ */
+
+
+/*
+ * Note about this structure:
+ *
+ * This structure was historically called systemcfg and exposed to
+ * userland via /proc/ppc64/systemcfg. Unfortunately, this became an
+ * ABI issue as some proprietary software started relying on being able
+ * to mmap() it, thus we have to keep the base layout at least for a
+ * few kernel versions.
+ *
+ * However, since ppc32 doesn't suffer from this backward handicap,
+ * a simpler version of the data structure is used there with only the
+ * fields actually used by the vDSO.
+ *
+ */
+
+/*
+ * If the major version changes we are incompatible.
+ * Minor version changes are a hint.
+ */
+#define SYSTEMCFG_MAJOR 1
+#define SYSTEMCFG_MINOR 1
+
+#ifndef __ASSEMBLY__
+
+#include <linux/unistd.h>
+#include <linux/time.h>
+#include <vdso/datapage.h>
+
+#define SYSCALL_MAP_SIZE      ((NR_syscalls + 31) / 32)
+
+/*
+ * So here is the ppc64 backward compatible version
+ */
+
+#ifdef CONFIG_PPC64
+
+struct vdso_arch_data {
+	__u8  eye_catcher[16];		/* Eyecatcher: SYSTEMCFG:PPC64	0x00 */
+	struct {			/* Systemcfg version numbers	     */
+		__u32 major;		/* Major number			0x10 */
+		__u32 minor;		/* Minor number			0x14 */
+	} version;
+
+	/* Note about the platform flags: it now only contains the lpar
+	 * bit. The actual platform number is dead and buried
+	 */
+	__u32 platform;			/* Platform flags		0x18 */
+	__u32 processor;		/* Processor type		0x1C */
+	__u64 processorCount;		/* # of physical processors	0x20 */
+	__u64 physicalMemorySize;	/* Size of real memory(B)	0x28 */
+	__u64 tb_orig_stamp;		/* (NU) Timebase at boot	0x30 */
+	__u64 tb_ticks_per_sec;		/* Timebase tics / sec		0x38 */
+	__u64 tb_to_xs;			/* (NU) Inverse of TB to 2^20	0x40 */
+	__u64 stamp_xsec;		/* (NU)				0x48 */
+	__u64 tb_update_count;		/* (NU) Timebase atomicity ctr	0x50 */
+	__u32 tz_minuteswest;		/* (NU) Min. west of Greenwich	0x58 */
+	__u32 tz_dsttime;		/* (NU) Type of dst correction	0x5C */
+	__u32 dcache_size;		/* L1 d-cache size		0x60 */
+	__u32 dcache_line_size;		/* L1 d-cache line size		0x64 */
+	__u32 icache_size;		/* L1 i-cache size		0x68 */
+	__u32 icache_line_size;		/* L1 i-cache line size		0x6C */
+
+	/* those additional ones don't have to be located anywhere
+	 * special as they were not part of the original systemcfg
+	 */
+	__u32 dcache_block_size;		/* L1 d-cache block size     */
+	__u32 icache_block_size;		/* L1 i-cache block size     */
+	__u32 dcache_log_block_size;		/* L1 d-cache log block size */
+	__u32 icache_log_block_size;		/* L1 i-cache log block size */
+	__u32 syscall_map[SYSCALL_MAP_SIZE];	/* Map of syscalls  */
+	__u32 compat_syscall_map[SYSCALL_MAP_SIZE];	/* Map of compat syscalls */
+
+	struct vdso_data data[CS_BASES];
+};
+
+#else /* CONFIG_PPC64 */
+
+/*
+ * And here is the simpler 32 bits version
+ */
+struct vdso_arch_data {
+	__u64 tb_ticks_per_sec;		/* Timebase tics / sec		0x38 */
+	__u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */
+	__u32 compat_syscall_map[0];	/* No compat syscalls on PPC32 */
+	struct vdso_data data[CS_BASES];
+};
+
+#endif /* CONFIG_PPC64 */
+
+extern struct vdso_arch_data *vdso_data;
+
+#else /* __ASSEMBLY__ */
+
+.macro get_datapage ptr
+	bcl	20, 31, .+4
+999:
+	mflr	\ptr
+	addis	\ptr, \ptr, (_vdso_datapage - 999b)@ha
+	addi	\ptr, \ptr, (_vdso_datapage - 999b)@l
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+#endif /* _SYSTEMCFG_H */
diff --git a/arch/powerpc/include/asm/vermagic.h b/arch/powerpc/include/asm/vermagic.h
new file mode 100644
index 0000000000..6f250fe506
--- /dev/null
+++ b/arch/powerpc/include/asm/vermagic.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_VERMAGIC_H
+#define _ASM_VERMAGIC_H
+
+#ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+#define MODULE_ARCH_VERMAGIC_FTRACE	"patchable-function-entry "
+#elif defined(CONFIG_MPROFILE_KERNEL)
+#define MODULE_ARCH_VERMAGIC_FTRACE	"mprofile-kernel "
+#else
+#define MODULE_ARCH_VERMAGIC_FTRACE	""
+#endif
+
+#ifdef CONFIG_RELOCATABLE
+#define MODULE_ARCH_VERMAGIC_RELOCATABLE	"relocatable "
+#else
+#define MODULE_ARCH_VERMAGIC_RELOCATABLE	""
+#endif
+
+#define MODULE_ARCH_VERMAGIC \
+		MODULE_ARCH_VERMAGIC_FTRACE MODULE_ARCH_VERMAGIC_RELOCATABLE
+
+#endif /* _ASM_VERMAGIC_H */
diff --git a/arch/powerpc/include/asm/vga.h b/arch/powerpc/include/asm/vga.h
new file mode 100644
index 0000000000..fcf721682a
--- /dev/null
+++ b/arch/powerpc/include/asm/vga.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_VGA_H_
+#define _ASM_POWERPC_VGA_H_
+
+#ifdef __KERNEL__
+
+/*
+ *	Access to VGA videoram
+ *
+ *	(c) 1998 Martin Mares <mj@ucw.cz>
+ */
+
+
+#include <asm/io.h>
+
+
+#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_MDA_CONSOLE)
+
+#define VT_BUF_HAVE_RW
+/*
+ *  These are only needed for supporting VGA or MDA text mode, which use little
+ *  endian byte ordering.
+ *  In other cases, we can optimize by using native byte ordering and
+ *  <linux/vt_buffer.h> has already done the right job for us.
+ */
+
+static inline void scr_writew(u16 val, volatile u16 *addr)
+{
+	*addr = cpu_to_le16(val);
+}
+
+static inline u16 scr_readw(volatile const u16 *addr)
+{
+	return le16_to_cpu(*addr);
+}
+
+#define VT_BUF_HAVE_MEMSETW
+static inline void scr_memsetw(u16 *s, u16 v, unsigned int n)
+{
+	memset16(s, cpu_to_le16(v), n / 2);
+}
+
+#define VT_BUF_HAVE_MEMCPYW
+#define VT_BUF_HAVE_MEMMOVEW
+#define scr_memcpyw	memcpy
+#define scr_memmovew	memmove
+
+#endif /* !CONFIG_VGA_CONSOLE && !CONFIG_MDA_CONSOLE */
+
+#ifdef __powerpc64__
+#define VGA_MAP_MEM(x,s) ((unsigned long) ioremap((x), s))
+#else
+#define VGA_MAP_MEM(x,s) (x)
+#endif
+
+#define vga_readb(x) (*(x))
+#define vga_writeb(x,y) (*(y) = (x))
+
+#endif	/* __KERNEL__ */
+#endif	/* _ASM_POWERPC_VGA_H_ */
diff --git a/arch/powerpc/include/asm/vio.h b/arch/powerpc/include/asm/vio.h
new file mode 100644
index 0000000000..cc9b787627
--- /dev/null
+++ b/arch/powerpc/include/asm/vio.h
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * IBM PowerPC Virtual I/O Infrastructure Support.
+ *
+ * Copyright (c) 2003 IBM Corp.
+ *  Dave Engebretsen engebret@us.ibm.com
+ *  Santiago Leon santil@us.ibm.com
+ */
+
+#ifndef _ASM_POWERPC_VIO_H
+#define _ASM_POWERPC_VIO_H
+#ifdef __KERNEL__
+
+#include <linux/errno.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/mod_devicetable.h>
+#include <linux/scatterlist.h>
+
+#include <asm/hvcall.h>
+
+/*
+ * Architecture-specific constants for drivers to
+ * extract attributes of the device using vio_get_attribute()
+ */
+#define VETH_MAC_ADDR "local-mac-address"
+#define VETH_MCAST_FILTER_SIZE "ibm,mac-address-filters"
+
+/* End architecture-specific constants */
+
+#define h_vio_signal(ua, mode) \
+  plpar_hcall_norets(H_VIO_SIGNAL, ua, mode)
+
+#define VIO_IRQ_DISABLE		0UL
+#define VIO_IRQ_ENABLE		1UL
+
+/*
+ * VIO CMO minimum entitlement for all devices and spare entitlement
+ */
+#define VIO_CMO_MIN_ENT 1562624
+
+extern struct bus_type vio_bus_type;
+
+struct iommu_table;
+
+/*
+ * Platform Facilities Option (PFO)-specific data
+ */
+
+/* Starting unit address for PFO devices on the VIO BUS */
+#define VIO_BASE_PFO_UA	0x50000000
+
+/**
+ * vio_pfo_op - PFO operation parameters
+ *
+ * @flags: h_call subfunctions and modifiers
+ * @in: Input data block logical real address
+ * @inlen: If non-negative, the length of the input data block.  If negative,
+ *	the length of the input data descriptor list in bytes.
+ * @out: Output data block logical real address
+ * @outlen: If non-negative, the length of the input data block.  If negative,
+ *	the length of the input data descriptor list in bytes.
+ * @csbcpb: Logical real address of the 4k naturally-aligned storage block
+ *	containing the CSB & optional FC field specific CPB
+ * @timeout: # of milliseconds to retry h_call, 0 for no timeout.
+ * @hcall_err: pointer to return the h_call return value, else NULL
+ */
+struct vio_pfo_op {
+	u64 flags;
+	s64 in;
+	s64 inlen;
+	s64 out;
+	s64 outlen;
+	u64 csbcpb;
+	void *done;
+	unsigned long handle;
+	unsigned int timeout;
+	long hcall_err;
+};
+
+/* End PFO specific data */
+
+enum vio_dev_family {
+	VDEVICE,	/* The OF node is a child of /vdevice */
+	PFO,		/* The OF node is a child of /ibm,platform-facilities */
+};
+
+/**
+ * vio_dev - This structure is used to describe virtual I/O devices.
+ *
+ * @desired: set from return of driver's get_desired_dma() function
+ * @entitled: bytes of IO data that has been reserved for this device.
+ * @allocated: bytes of IO data currently in use by the device.
+ * @allocs_failed: number of DMA failures due to insufficient entitlement.
+ */
+struct vio_dev {
+	const char *name;
+	const char *type;
+	uint32_t unit_address;
+	uint32_t resource_id;
+	unsigned int irq;
+	struct {
+		size_t desired;
+		size_t entitled;
+		size_t allocated;
+		atomic_t allocs_failed;
+	} cmo;
+	enum vio_dev_family family;
+	struct device dev;
+};
+
+struct vio_driver {
+	const char *name;
+	const struct vio_device_id *id_table;
+	int (*probe)(struct vio_dev *dev, const struct vio_device_id *id);
+	void (*remove)(struct vio_dev *dev);
+	void (*shutdown)(struct vio_dev *dev);
+	/* A driver must have a get_desired_dma() function to
+	 * be loaded in a CMO environment if it uses DMA.
+	 */
+	unsigned long (*get_desired_dma)(struct vio_dev *dev);
+	const struct dev_pm_ops *pm;
+	struct device_driver driver;
+};
+
+extern int __vio_register_driver(struct vio_driver *drv, struct module *owner,
+				 const char *mod_name);
+/*
+ * vio_register_driver must be a macro so that KBUILD_MODNAME can be expanded
+ */
+#define vio_register_driver(driver)		\
+	__vio_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)
+extern void vio_unregister_driver(struct vio_driver *drv);
+
+extern int vio_cmo_entitlement_update(size_t);
+extern void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired);
+
+extern void vio_unregister_device(struct vio_dev *dev);
+
+extern int vio_h_cop_sync(struct vio_dev *vdev, struct vio_pfo_op *op);
+
+struct device_node;
+
+extern struct vio_dev *vio_register_device_node(
+		struct device_node *node_vdev);
+extern const void *vio_get_attribute(struct vio_dev *vdev, char *which,
+		int *length);
+#ifdef CONFIG_PPC_PSERIES
+extern struct vio_dev *vio_find_node(struct device_node *vnode);
+extern int vio_enable_interrupts(struct vio_dev *dev);
+extern int vio_disable_interrupts(struct vio_dev *dev);
+#else
+static inline int vio_enable_interrupts(struct vio_dev *dev)
+{
+	return 0;
+}
+#endif
+
+static inline struct vio_driver *to_vio_driver(struct device_driver *drv)
+{
+	return container_of(drv, struct vio_driver, driver);
+}
+
+#define to_vio_dev(__dev)	container_of_const(__dev, struct vio_dev, dev)
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_VIO_H */
diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h
new file mode 100644
index 0000000000..4c69ece52a
--- /dev/null
+++ b/arch/powerpc/include/asm/vmalloc.h
@@ -0,0 +1,24 @@
+#ifndef _ASM_POWERPC_VMALLOC_H
+#define _ASM_POWERPC_VMALLOC_H
+
+#include <asm/mmu.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+
+#define arch_vmap_pud_supported arch_vmap_pud_supported
+static inline bool arch_vmap_pud_supported(pgprot_t prot)
+{
+	/* HPT does not cope with large pages in the vmalloc area */
+	return radix_enabled();
+}
+
+#define arch_vmap_pmd_supported arch_vmap_pmd_supported
+static inline bool arch_vmap_pmd_supported(pgprot_t prot)
+{
+	return radix_enabled();
+}
+
+#endif
+
+#endif /* _ASM_POWERPC_VMALLOC_H */
diff --git a/arch/powerpc/include/asm/vphn.h b/arch/powerpc/include/asm/vphn.h
new file mode 100644
index 0000000000..8c2f795eea
--- /dev/null
+++ b/arch/powerpc/include/asm/vphn.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_VPHN_H
+#define _ASM_POWERPC_VPHN_H
+
+/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */
+#define VPHN_REGISTER_COUNT 6
+
+/*
+ * 6 64-bit registers unpacked into up to 24 be32 associativity values. To
+ * form the complete property we have to add the length in the first cell.
+ */
+#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1)
+
+/*
+ * The H_HOME_NODE_ASSOCIATIVITY hcall takes two values for flags:
+ * 1 for retrieving associativity information for a guest cpu
+ * 2 for retrieving associativity information for a host/hypervisor cpu
+ */
+#define VPHN_FLAG_VCPU	1
+#define VPHN_FLAG_PCPU	2
+
+long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
+
+#endif // _ASM_POWERPC_VPHN_H
diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
new file mode 100644
index 0000000000..30a12d2086
--- /dev/null
+++ b/arch/powerpc/include/asm/word-at-a-time.h
@@ -0,0 +1,206 @@
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+
+/*
+ * Word-at-a-time interfaces for PowerPC.
+ */
+
+#include <linux/kernel.h>
+#include <asm/asm-compat.h>
+#include <asm/extable.h>
+
+#ifdef __BIG_ENDIAN__
+
+struct word_at_a_time {
+	const unsigned long high_bits, low_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0xfe) + 1, REPEAT_BYTE(0x7f) }
+
+/* Bit set in the bytes that have a zero */
+static inline long prep_zero_mask(unsigned long val, unsigned long rhs, const struct word_at_a_time *c)
+{
+	unsigned long mask = (val & c->low_bits) + c->low_bits;
+	return ~(mask | rhs);
+}
+
+#define create_zero_mask(mask) (mask)
+
+static inline long find_zero(unsigned long mask)
+{
+	long leading_zero_bits;
+
+	asm (PPC_CNTLZL "%0,%1" : "=r" (leading_zero_bits) : "r" (mask));
+	return leading_zero_bits >> 3;
+}
+
+static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+{
+	unsigned long rhs = val | c->low_bits;
+	*data = rhs;
+	return (val + c->high_bits) & ~rhs;
+}
+
+static inline unsigned long zero_bytemask(unsigned long mask)
+{
+	return ~1ul << __fls(mask);
+}
+
+#else
+
+#ifdef CONFIG_64BIT
+
+/* unused */
+struct word_at_a_time {
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { }
+
+/* This will give us 0xff for a NULL char and 0x00 elsewhere */
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
+{
+	unsigned long ret;
+	unsigned long zero = 0;
+
+	asm("cmpb %0,%1,%2" : "=r" (ret) : "r" (a), "r" (zero));
+	*bits = ret;
+
+	return ret;
+}
+
+static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
+{
+	return bits;
+}
+
+/* Alan Modra's little-endian strlen tail for 64-bit */
+static inline unsigned long create_zero_mask(unsigned long bits)
+{
+	unsigned long leading_zero_bits;
+	long trailing_zero_bit_mask;
+
+	asm("addi	%1,%2,-1\n\t"
+	    "andc	%1,%1,%2\n\t"
+	    "popcntd	%0,%1"
+		: "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask)
+		: "b" (bits));
+
+	return leading_zero_bits;
+}
+
+static inline unsigned long find_zero(unsigned long mask)
+{
+	return mask >> 3;
+}
+
+/* This assumes that we never ask for an all 1s bitmask */
+static inline unsigned long zero_bytemask(unsigned long mask)
+{
+	return (1UL << mask) - 1;
+}
+
+#else	/* 32-bit case */
+
+struct word_at_a_time {
+	const unsigned long one_bits, high_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
+
+/*
+ * This is largely generic for little-endian machines, but the
+ * optimal byte mask counting is probably going to be something
+ * that is architecture-specific. If you have a reliably fast
+ * bit count instruction, that might be better than the multiply
+ * and shift, for example.
+ */
+
+/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
+static inline long count_masked_bytes(long mask)
+{
+	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+	long a = (0x0ff0001+mask) >> 23;
+	/* Fix the 1 for 00 case */
+	return a & mask;
+}
+
+static inline unsigned long create_zero_mask(unsigned long bits)
+{
+	bits = (bits - 1) & ~bits;
+	return bits >> 7;
+}
+
+static inline unsigned long find_zero(unsigned long mask)
+{
+	return count_masked_bytes(mask);
+}
+
+/* Return nonzero if it has a zero */
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
+{
+	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
+	*bits = mask;
+	return mask;
+}
+
+static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
+{
+	return bits;
+}
+
+/* The mask we created is directly usable as a bytemask */
+#define zero_bytemask(mask) (mask)
+
+#endif /* CONFIG_64BIT */
+
+#endif /* __BIG_ENDIAN__ */
+
+/*
+ * We use load_unaligned_zero() in a selftest, which builds a userspace
+ * program. Some linker scripts seem to discard the .fixup section, so allow
+ * the test code to use a different section name.
+ */
+#ifndef FIXUP_SECTION
+#define FIXUP_SECTION ".fixup"
+#endif
+
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+	unsigned long ret, offset, tmp;
+
+	asm(
+	"1:	" PPC_LL "%[ret], 0(%[addr])\n"
+	"2:\n"
+	".section " FIXUP_SECTION ",\"ax\"\n"
+	"3:	"
+#ifdef __powerpc64__
+	"clrrdi		%[tmp], %[addr], 3\n\t"
+	"clrlsldi	%[offset], %[addr], 61, 3\n\t"
+	"ld		%[ret], 0(%[tmp])\n\t"
+#ifdef __BIG_ENDIAN__
+	"sld		%[ret], %[ret], %[offset]\n\t"
+#else
+	"srd		%[ret], %[ret], %[offset]\n\t"
+#endif
+#else
+	"clrrwi		%[tmp], %[addr], 2\n\t"
+	"clrlslwi	%[offset], %[addr], 30, 3\n\t"
+	"lwz		%[ret], 0(%[tmp])\n\t"
+#ifdef __BIG_ENDIAN__
+	"slw		%[ret], %[ret], %[offset]\n\t"
+#else
+	"srw		%[ret], %[ret], %[offset]\n\t"
+#endif
+#endif
+	"b	2b\n"
+	".previous\n"
+	EX_TABLE(1b, 3b)
+	: [tmp] "=&b" (tmp), [offset] "=&r" (offset), [ret] "=&r" (ret)
+	: [addr] "b" (addr), "m" (*(unsigned long *)addr));
+
+	return ret;
+}
+
+#undef FIXUP_SECTION
+
+#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
new file mode 100644
index 0000000000..89090485be
--- /dev/null
+++ b/arch/powerpc/include/asm/xics.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common definitions across all variants of ICP and ICS interrupt
+ * controllers.
+ */
+
+#ifndef _XICS_H
+#define _XICS_H
+
+#include <linux/interrupt.h>
+
+#define XICS_IPI		2
+#define XICS_IRQ_SPURIOUS	0
+
+/* Want a priority other than 0.  Various HW issues require this. */
+#define	DEFAULT_PRIORITY	5
+
+/*
+ * Mark IPIs as higher priority so we can take them inside interrupts
+ * FIXME: still true now?
+ */
+#define IPI_PRIORITY		4
+
+/* The least favored priority */
+#define LOWEST_PRIORITY		0xFF
+
+/* The number of priorities defined above */
+#define MAX_NUM_PRIORITIES	3
+
+/* Native ICP */
+#ifdef CONFIG_PPC_ICP_NATIVE
+extern int icp_native_init(void);
+extern void icp_native_flush_interrupt(void);
+extern void icp_native_cause_ipi_rm(int cpu);
+#else
+static inline int icp_native_init(void) { return -ENODEV; }
+#endif
+
+/* PAPR ICP */
+#ifdef CONFIG_PPC_ICP_HV
+int __init icp_hv_init(void);
+#else
+static inline int icp_hv_init(void) { return -ENODEV; }
+#endif
+
+#ifdef CONFIG_PPC_POWERNV
+int __init icp_opal_init(void);
+extern void icp_opal_flush_interrupt(void);
+#else
+static inline int icp_opal_init(void) { return -ENODEV; }
+#endif
+
+/* ICP ops */
+struct icp_ops {
+	unsigned int (*get_irq)(void);
+	void (*eoi)(struct irq_data *d);
+	void (*set_priority)(unsigned char prio);
+	void (*teardown_cpu)(void);
+	void (*flush_ipi)(void);
+#ifdef CONFIG_SMP
+	void (*cause_ipi)(int cpu);
+	irq_handler_t ipi_action;
+#endif
+};
+
+extern const struct icp_ops *icp_ops;
+
+#ifdef CONFIG_PPC_ICS_NATIVE
+/* Native ICS */
+extern int ics_native_init(void);
+#else
+static inline int ics_native_init(void) { return -ENODEV; }
+#endif
+
+/* RTAS ICS */
+#ifdef CONFIG_PPC_ICS_RTAS
+extern int ics_rtas_init(void);
+#else
+static inline int ics_rtas_init(void) { return -ENODEV; }
+#endif
+
+/* HAL ICS */
+#ifdef CONFIG_PPC_POWERNV
+extern int ics_opal_init(void);
+#else
+static inline int ics_opal_init(void) { return -ENODEV; }
+#endif
+
+/* ICS instance, hooked up to chip_data of an irq */
+struct ics {
+	struct list_head link;
+	int (*check)(struct ics *ics, unsigned int hwirq);
+	void (*mask_unknown)(struct ics *ics, unsigned long vec);
+	long (*get_server)(struct ics *ics, unsigned long vec);
+	int (*host_match)(struct ics *ics, struct device_node *node);
+	struct irq_chip *chip;
+	char data[];
+};
+
+/* Commons */
+extern unsigned int xics_default_server;
+extern unsigned int xics_default_distrib_server;
+extern unsigned int xics_interrupt_server_size;
+extern struct irq_domain *xics_host;
+
+struct xics_cppr {
+	unsigned char stack[MAX_NUM_PRIORITIES];
+	int index;
+};
+
+DECLARE_PER_CPU(struct xics_cppr, xics_cppr);
+
+static inline void xics_push_cppr(unsigned int vec)
+{
+	struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr);
+
+	if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1))
+		return;
+
+	if (vec == XICS_IPI)
+		os_cppr->stack[++os_cppr->index] = IPI_PRIORITY;
+	else
+		os_cppr->stack[++os_cppr->index] = DEFAULT_PRIORITY;
+}
+
+static inline unsigned char xics_pop_cppr(void)
+{
+	struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr);
+
+	if (WARN_ON(os_cppr->index < 1))
+		return LOWEST_PRIORITY;
+
+	return os_cppr->stack[--os_cppr->index];
+}
+
+static inline void xics_set_base_cppr(unsigned char cppr)
+{
+	struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr);
+
+	/* we only really want to set the priority when there's
+	 * just one cppr value on the stack
+	 */
+	WARN_ON(os_cppr->index != 0);
+
+	os_cppr->stack[0] = cppr;
+}
+
+static inline unsigned char xics_cppr_top(void)
+{
+	struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr);
+	
+	return os_cppr->stack[os_cppr->index];
+}
+
+DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi_message);
+
+extern void xics_init(void);
+extern void xics_setup_cpu(void);
+extern void xics_update_irq_servers(void);
+extern void xics_set_cpu_giq(unsigned int gserver, unsigned int join);
+extern void xics_mask_unknown_vec(unsigned int vec);
+extern void xics_smp_probe(void);
+extern void xics_register_ics(struct ics *ics);
+extern void xics_teardown_cpu(void);
+extern void xics_kexec_teardown_cpu(int secondary);
+extern void xics_migrate_irqs_away(void);
+extern void icp_native_eoi(struct irq_data *d);
+extern int xics_set_irq_type(struct irq_data *d, unsigned int flow_type);
+extern int xics_retrigger(struct irq_data *data);
+#ifdef CONFIG_SMP
+extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
+			       unsigned int strict_check);
+#else
+#define xics_get_irq_server(virq, cpumask, strict_check) (xics_default_server)
+#endif
+
+
+#endif /* _XICS_H */
diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h
new file mode 100644
index 0000000000..cf8bb6ac44
--- /dev/null
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2016,2017 IBM Corporation.
+ */
+#ifndef _ASM_POWERPC_XIVE_REGS_H
+#define _ASM_POWERPC_XIVE_REGS_H
+
+/*
+ * "magic" Event State Buffer (ESB) MMIO offsets.
+ *
+ * Each interrupt source has a 2-bit state machine called ESB
+ * which can be controlled by MMIO. It's made of 2 bits, P and
+ * Q. P indicates that an interrupt is pending (has been sent
+ * to a queue and is waiting for an EOI). Q indicates that the
+ * interrupt has been triggered while pending.
+ *
+ * This acts as a coalescing mechanism in order to guarantee
+ * that a given interrupt only occurs at most once in a queue.
+ *
+ * When doing an EOI, the Q bit will indicate if the interrupt
+ * needs to be re-triggered.
+ *
+ * The following offsets into the ESB MMIO allow to read or
+ * manipulate the PQ bits. They must be used with an 8-bytes
+ * load instruction. They all return the previous state of the
+ * interrupt (atomically).
+ *
+ * Additionally, some ESB pages support doing an EOI via a
+ * store at 0 and some ESBs support doing a trigger via a
+ * separate trigger page.
+ */
+#define XIVE_ESB_STORE_EOI	0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI	0x000 /* Load */
+#define XIVE_ESB_GET		0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00	0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01	0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10	0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11	0xf00 /* Load */
+
+/*
+ * Load-after-store ordering
+ *
+ * Adding this offset to the load address will enforce
+ * load-after-store ordering. This is required to use StoreEOI.
+ */
+#define XIVE_ESB_LD_ST_MO	0x40 /* Load-after-store ordering */
+
+#define XIVE_ESB_VAL_P		0x2
+#define XIVE_ESB_VAL_Q		0x1
+#define XIVE_ESB_INVALID	0xFF
+
+/*
+ * Thread Management (aka "TM") registers
+ */
+
+/* TM register offsets */
+#define TM_QW0_USER		0x000 /* All rings */
+#define TM_QW1_OS		0x010 /* Ring 0..2 */
+#define TM_QW2_HV_POOL		0x020 /* Ring 0..1 */
+#define TM_QW3_HV_PHYS		0x030 /* Ring 0..1 */
+
+/* Byte offsets inside a QW             QW0 QW1 QW2 QW3 */
+#define TM_NSR			0x0  /*  +   +   -   +  */
+#define TM_CPPR			0x1  /*  -   +   -   +  */
+#define TM_IPB			0x2  /*  -   +   +   +  */
+#define TM_LSMFB		0x3  /*  -   +   +   +  */
+#define TM_ACK_CNT		0x4  /*  -   +   -   -  */
+#define TM_INC			0x5  /*  -   +   -   +  */
+#define TM_AGE			0x6  /*  -   +   -   +  */
+#define TM_PIPR			0x7  /*  -   +   -   +  */
+
+#define TM_WORD0		0x0
+#define TM_WORD1		0x4
+
+/*
+ * QW word 2 contains the valid bit at the top and other fields
+ * depending on the QW.
+ */
+#define TM_WORD2		0x8
+#define   TM_QW0W2_VU		PPC_BIT32(0)
+#define   TM_QW0W2_LOGIC_SERV	PPC_BITMASK32(1,31) // XX 2,31 ?
+#define   TM_QW1W2_VO		PPC_BIT32(0)
+#define   TM_QW1W2_HO           PPC_BIT32(1) /* P10 XIVE2 */
+#define   TM_QW1W2_OS_CAM	PPC_BITMASK32(8,31)
+#define   TM_QW2W2_VP		PPC_BIT32(0)
+#define   TM_QW2W2_HP           PPC_BIT32(1) /* P10 XIVE2 */
+#define   TM_QW2W2_POOL_CAM	PPC_BITMASK32(8,31)
+#define   TM_QW3W2_VT		PPC_BIT32(0)
+#define   TM_QW3W2_HT           PPC_BIT32(1) /* P10 XIVE2 */
+#define   TM_QW3W2_LP		PPC_BIT32(6)
+#define   TM_QW3W2_LE		PPC_BIT32(7)
+#define   TM_QW3W2_T		PPC_BIT32(31)
+
+/*
+ * In addition to normal loads to "peek" and writes (only when invalid)
+ * using 4 and 8 bytes accesses, the above registers support these
+ * "special" byte operations:
+ *
+ *   - Byte load from QW0[NSR] - User level NSR (EBB)
+ *   - Byte store to QW0[NSR] - User level NSR (EBB)
+ *   - Byte load/store to QW1[CPPR] and QW3[CPPR] - CPPR access
+ *   - Byte load from QW3[TM_WORD2] - Read VT||00000||LP||LE on thrd 0
+ *                                    otherwise VT||0000000
+ *   - Byte store to QW3[TM_WORD2] - Set VT bit (and LP/LE if present)
+ *
+ * Then we have all these "special" CI ops at these offset that trigger
+ * all sorts of side effects:
+ */
+#define TM_SPC_ACK_EBB		0x800	/* Load8 ack EBB to reg*/
+#define TM_SPC_ACK_OS_REG	0x810	/* Load16 ack OS irq to reg */
+#define TM_SPC_PUSH_USR_CTX	0x808	/* Store32 Push/Validate user context */
+#define TM_SPC_PULL_USR_CTX	0x808	/* Load32 Pull/Invalidate user context */
+#define TM_SPC_SET_OS_PENDING	0x812	/* Store8 Set OS irq pending bit */
+#define TM_SPC_PULL_OS_CTX	0x818	/* Load32/Load64 Pull/Invalidate OS context to reg */
+#define TM_SPC_PULL_POOL_CTX	0x828	/* Load32/Load64 Pull/Invalidate Pool context to reg*/
+#define TM_SPC_ACK_HV_REG	0x830	/* Load16 ack HV irq to reg */
+#define TM_SPC_PULL_USR_CTX_OL	0xc08	/* Store8 Pull/Inval usr ctx to odd line */
+#define TM_SPC_ACK_OS_EL	0xc10	/* Store8 ack OS irq to even line */
+#define TM_SPC_ACK_HV_POOL_EL	0xc20	/* Store8 ack HV evt pool to even line */
+#define TM_SPC_ACK_HV_EL	0xc30	/* Store8 ack HV irq to even line */
+/* XXX more... */
+
+/* NSR fields for the various QW ack types */
+#define TM_QW0_NSR_EB		PPC_BIT8(0)
+#define TM_QW1_NSR_EO		PPC_BIT8(0)
+#define TM_QW3_NSR_HE		PPC_BITMASK8(0,1)
+#define  TM_QW3_NSR_HE_NONE	0
+#define  TM_QW3_NSR_HE_POOL	1
+#define  TM_QW3_NSR_HE_PHYS	2
+#define  TM_QW3_NSR_HE_LSI	3
+#define TM_QW3_NSR_I		PPC_BIT8(2)
+#define TM_QW3_NSR_GRP_LVL	PPC_BIT8(3,7)
+
+#endif /* _ASM_POWERPC_XIVE_REGS_H */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
new file mode 100644
index 0000000000..92930b0b5d
--- /dev/null
+++ b/arch/powerpc/include/asm/xive.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2016,2017 IBM Corporation.
+ */
+#ifndef _ASM_POWERPC_XIVE_H
+#define _ASM_POWERPC_XIVE_H
+
+#include <asm/opal-api.h>
+
+#define XIVE_INVALID_VP	0xffffffff
+
+#ifdef CONFIG_PPC_XIVE
+
+/*
+ * Thread Interrupt Management Area (TIMA)
+ *
+ * This is a global MMIO region divided in 4 pages of varying access
+ * permissions, providing access to per-cpu interrupt management
+ * functions. It always identifies the CPU doing the access based
+ * on the PowerBus initiator ID, thus we always access via the
+ * same offset regardless of where the code is executing
+ */
+extern void __iomem *xive_tima;
+extern unsigned long xive_tima_os;
+
+/*
+ * Offset in the TM area of our current execution level (provided by
+ * the backend)
+ */
+extern u32 xive_tima_offset;
+
+/*
+ * Per-irq data (irq_get_handler_data for normal IRQs), IPIs
+ * have it stored in the xive_cpu structure. We also cache
+ * for normal interrupts the current target CPU.
+ *
+ * This structure is setup by the backend for each interrupt.
+ */
+struct xive_irq_data {
+	u64 flags;
+	u64 eoi_page;
+	void __iomem *eoi_mmio;
+	u64 trig_page;
+	void __iomem *trig_mmio;
+	u32 esb_shift;
+	int src_chip;
+	u32 hw_irq;
+
+	/* Setup/used by frontend */
+	int target;
+	/*
+	 * saved_p means that there is a queue entry for this interrupt
+	 * in some CPU's queue (not including guest vcpu queues), even
+	 * if P is not set in the source ESB.
+	 * stale_p means that there is no queue entry for this interrupt
+	 * in some CPU's queue, even if P is set in the source ESB.
+	 */
+	bool saved_p;
+	bool stale_p;
+};
+#define XIVE_IRQ_FLAG_STORE_EOI	0x01
+#define XIVE_IRQ_FLAG_LSI	0x02
+/* #define XIVE_IRQ_FLAG_SHIFT_BUG	0x04 */ /* P9 DD1.0 workaround */
+/* #define XIVE_IRQ_FLAG_MASK_FW	0x08 */ /* P9 DD1.0 workaround */
+/* #define XIVE_IRQ_FLAG_EOI_FW	0x10 */ /* P9 DD1.0 workaround */
+#define XIVE_IRQ_FLAG_H_INT_ESB	0x20
+
+/* Special flag set by KVM for excalation interrupts */
+#define XIVE_IRQ_FLAG_NO_EOI	0x80
+
+#define XIVE_INVALID_CHIP_ID	-1
+
+/* A queue tracking structure in a CPU */
+struct xive_q {
+	__be32 			*qpage;
+	u32			msk;
+	u32			idx;
+	u32			toggle;
+	u64			eoi_phys;
+	u32			esc_irq;
+	atomic_t		count;
+	atomic_t		pending_count;
+	u64			guest_qaddr;
+	u32			guest_qshift;
+};
+
+/* Global enable flags for the XIVE support */
+extern bool __xive_enabled;
+
+static inline bool xive_enabled(void) { return __xive_enabled; }
+
+bool xive_spapr_init(void);
+bool xive_native_init(void);
+void xive_smp_probe(void);
+int  xive_smp_prepare_cpu(unsigned int cpu);
+void xive_smp_setup_cpu(void);
+void xive_smp_disable_cpu(void);
+void xive_teardown_cpu(void);
+void xive_shutdown(void);
+void xive_flush_interrupt(void);
+
+/* xmon hook */
+void xmon_xive_do_dump(int cpu);
+int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d);
+void xmon_xive_get_irq_all(void);
+
+/* APIs used by KVM */
+u32 xive_native_default_eq_shift(void);
+u32 xive_native_alloc_vp_block(u32 max_vcpus);
+void xive_native_free_vp_block(u32 vp_base);
+int xive_native_populate_irq_data(u32 hw_irq,
+				  struct xive_irq_data *data);
+void xive_cleanup_irq_data(struct xive_irq_data *xd);
+void xive_irq_free_data(unsigned int virq);
+void xive_native_free_irq(u32 irq);
+int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq);
+
+int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
+				__be32 *qpage, u32 order, bool can_escalate);
+void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
+
+void xive_native_sync_source(u32 hw_irq);
+void xive_native_sync_queue(u32 hw_irq);
+bool is_xive_irq(struct irq_chip *chip);
+int xive_native_enable_vp(u32 vp_id, bool single_escalation);
+int xive_native_disable_vp(u32 vp_id);
+int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
+bool xive_native_has_single_escalation(void);
+bool xive_native_has_save_restore(void);
+
+int xive_native_get_queue_info(u32 vp_id, uint32_t prio,
+			       u64 *out_qpage,
+			       u64 *out_qsize,
+			       u64 *out_qeoi_page,
+			       u32 *out_escalate_irq,
+			       u64 *out_qflags);
+
+int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle,
+				u32 *qindex);
+int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle,
+				u32 qindex);
+int xive_native_get_vp_state(u32 vp_id, u64 *out_state);
+bool xive_native_has_queue_state_support(void);
+extern u32 xive_native_alloc_irq_on_chip(u32 chip_id);
+
+static inline u32 xive_native_alloc_irq(void)
+{
+	return xive_native_alloc_irq_on_chip(OPAL_XIVE_ANY_CHIP);
+}
+
+#else
+
+static inline bool xive_enabled(void) { return false; }
+
+static inline bool xive_spapr_init(void) { return false; }
+static inline bool xive_native_init(void) { return false; }
+static inline void xive_smp_probe(void) { }
+static inline int  xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; }
+static inline void xive_smp_setup_cpu(void) { }
+static inline void xive_smp_disable_cpu(void) { }
+static inline void xive_shutdown(void) { }
+static inline void xive_flush_interrupt(void) { }
+
+static inline u32 xive_native_alloc_vp_block(u32 max_vcpus) { return XIVE_INVALID_VP; }
+static inline void xive_native_free_vp_block(u32 vp_base) { }
+
+#endif
+
+#endif /* _ASM_POWERPC_XIVE_H */
diff --git a/arch/powerpc/include/asm/xmon.h b/arch/powerpc/include/asm/xmon.h
new file mode 100644
index 0000000000..f2d44b44f4
--- /dev/null
+++ b/arch/powerpc/include/asm/xmon.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef __ASM_POWERPC_XMON_H
+#define __ASM_POWERPC_XMON_H
+
+/*
+ * Copyrignt (C) 2006 IBM Corp
+ */
+
+#ifdef __KERNEL__
+
+#include <linux/irqreturn.h>
+
+#ifdef CONFIG_XMON
+extern void xmon_setup(void);
+void __init xmon_register_spus(struct list_head *list);
+struct pt_regs;
+extern int xmon(struct pt_regs *excp);
+extern irqreturn_t xmon_irq(int, void *);
+#else
+static inline void xmon_setup(void) { }
+static inline void xmon_register_spus(struct list_head *list) { }
+#endif
+
+#if defined(CONFIG_XMON) && defined(CONFIG_SMP)
+extern int cpus_are_in_xmon(void);
+#endif
+
+extern __printf(1, 2) void xmon_printf(const char *format, ...);
+
+#endif /* __KERNEL __ */
+#endif /* __ASM_POWERPC_XMON_H */
diff --git a/arch/powerpc/include/asm/xor.h b/arch/powerpc/include/asm/xor.h
new file mode 100644
index 0000000000..37d05c11d0
--- /dev/null
+++ b/arch/powerpc/include/asm/xor.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#ifndef _ASM_POWERPC_XOR_H
+#define _ASM_POWERPC_XOR_H
+
+#ifdef CONFIG_ALTIVEC
+
+#include <asm/cputable.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/xor_altivec.h>
+
+static struct xor_block_template xor_block_altivec = {
+	.name = "altivec",
+	.do_2 = xor_altivec_2,
+	.do_3 = xor_altivec_3,
+	.do_4 = xor_altivec_4,
+	.do_5 = xor_altivec_5,
+};
+
+#define XOR_SPEED_ALTIVEC()				\
+	do {						\
+		if (cpu_has_feature(CPU_FTR_ALTIVEC))	\
+			xor_speed(&xor_block_altivec);	\
+	} while (0)
+#else
+#define XOR_SPEED_ALTIVEC()
+#endif
+
+/* Also try the generic routines. */
+#include <asm-generic/xor.h>
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES				\
+do {							\
+	xor_speed(&xor_block_8regs);			\
+	xor_speed(&xor_block_8regs_p);			\
+	xor_speed(&xor_block_32regs);			\
+	xor_speed(&xor_block_32regs_p);			\
+	XOR_SPEED_ALTIVEC();				\
+} while (0)
+
+#endif /* _ASM_POWERPC_XOR_H */
diff --git a/arch/powerpc/include/asm/xor_altivec.h b/arch/powerpc/include/asm/xor_altivec.h
new file mode 100644
index 0000000000..294620a25f
--- /dev/null
+++ b/arch/powerpc/include/asm/xor_altivec.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_XOR_ALTIVEC_H
+#define _ASM_POWERPC_XOR_ALTIVEC_H
+
+#ifdef CONFIG_ALTIVEC
+void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2);
+void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3);
+void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3,
+		   const unsigned long * __restrict p4);
+void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3,
+		   const unsigned long * __restrict p4,
+		   const unsigned long * __restrict p5);
+
+#endif
+#endif /* _ASM_POWERPC_XOR_ALTIVEC_H */
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
new file mode 100644
index 0000000000..353b70b199
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+generated-y += unistd_32.h
+generated-y += unistd_64.h
diff --git a/arch/powerpc/include/uapi/asm/auxvec.h b/arch/powerpc/include/uapi/asm/auxvec.h
new file mode 100644
index 0000000000..aa7c162154
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/auxvec.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_POWERPC_AUXVEC_H
+#define _ASM_POWERPC_AUXVEC_H
+
+/*
+ * We need to put in some extra aux table entries to tell glibc what
+ * the cache block size is, so it can use the dcbz instruction safely.
+ */
+#define AT_DCACHEBSIZE		19
+#define AT_ICACHEBSIZE		20
+#define AT_UCACHEBSIZE		21
+/* A special ignored type value for PPC, for glibc compatibility.  */
+#define AT_IGNOREPPC		22
+
+/* The vDSO location. We have to use the same value as x86 for glibc's
+ * sake :-)
+ */
+#define AT_SYSINFO_EHDR		33
+
+/*
+ * AT_*CACHEBSIZE above represent the cache *block* size which is
+ * the size that is affected by the cache management instructions.
+ *
+ * It doesn't nececssarily matches the cache *line* size which is
+ * more of a performance tuning hint. Additionally the latter can
+ * be different for the different cache levels.
+ *
+ * The set of entries below represent more extensive information
+ * about the caches, in the form of two entry per cache type,
+ * one entry containing the cache size in bytes, and the other
+ * containing the cache line size in bytes in the bottom 16 bits
+ * and the cache associativity in the next 16 bits.
+ *
+ * The associativity is such that if N is the 16-bit value, the
+ * cache is N way set associative. A value if 0xffff means fully
+ * associative, a value of 1 means directly mapped.
+ *
+ * For all these fields, a value of 0 means that the information
+ * is not known.
+ */
+
+#define AT_L1I_CACHESIZE	40
+#define AT_L1I_CACHEGEOMETRY	41
+#define AT_L1D_CACHESIZE	42
+#define AT_L1D_CACHEGEOMETRY	43
+#define AT_L2_CACHESIZE		44
+#define AT_L2_CACHEGEOMETRY	45
+#define AT_L3_CACHESIZE		46
+#define AT_L3_CACHEGEOMETRY	47
+
+#define AT_MINSIGSTKSZ		51      /* stack needed for signal delivery */
+
+#define AT_VECTOR_SIZE_ARCH	15 /* entries in ARCH_DLINFO */
+
+#endif
diff --git a/arch/powerpc/include/uapi/asm/bitsperlong.h b/arch/powerpc/include/uapi/asm/bitsperlong.h
new file mode 100644
index 0000000000..46ece3ecff
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_POWERPC_BITSPERLONG_H
+#define __ASM_POWERPC_BITSPERLONG_H
+
+#if defined(__powerpc64__)
+# define __BITS_PER_LONG 64
+#else
+# define __BITS_PER_LONG 32
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_POWERPC_BITSPERLONG_H */
diff --git a/arch/powerpc/include/uapi/asm/bootx.h b/arch/powerpc/include/uapi/asm/bootx.h
new file mode 100644
index 0000000000..6728c7e24e
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/bootx.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * This file describes the structure passed from the BootX application
+ * (for MacOS) when it is used to boot Linux.
+ *
+ * Written by Benjamin Herrenschmidt.
+ */
+
+
+#ifndef _UAPI__ASM_BOOTX_H__
+#define _UAPI__ASM_BOOTX_H__
+
+#include <linux/types.h>
+
+#ifdef macintosh
+#include <Types.h>
+#include "linux_type_defs.h"
+#endif
+
+#ifdef macintosh
+/* All this requires PowerPC alignment */
+#pragma options align=power
+#endif
+
+/* On kernel entry:
+ *
+ * r3 = 0x426f6f58    ('BooX')
+ * r4 = pointer to boot_infos
+ * r5 = NULL
+ *
+ * Data and instruction translation disabled, interrupts
+ * disabled, kernel loaded at physical 0x00000000 on PCI
+ * machines (will be different on NuBus).
+ */
+
+#define BOOT_INFO_VERSION               5
+#define BOOT_INFO_COMPATIBLE_VERSION    1
+
+/* Bit in the architecture flag mask. More to be defined in
+   future versions. Note that either BOOT_ARCH_PCI or
+   BOOT_ARCH_NUBUS is set. The other BOOT_ARCH_NUBUS_xxx are
+   set additionally when BOOT_ARCH_NUBUS is set.
+ */
+#define BOOT_ARCH_PCI                   0x00000001UL
+#define BOOT_ARCH_NUBUS                 0x00000002UL
+#define BOOT_ARCH_NUBUS_PDM             0x00000010UL
+#define BOOT_ARCH_NUBUS_PERFORMA        0x00000020UL
+#define BOOT_ARCH_NUBUS_POWERBOOK       0x00000040UL
+
+/*  Maximum number of ranges in phys memory map */
+#define MAX_MEM_MAP_SIZE				26
+
+/* This is the format of an element in the physical memory map. Note that
+   the map is optional and current BootX will only build it for pre-PCI
+   machines */
+typedef struct boot_info_map_entry
+{
+    __u32       physAddr;                /* Physical starting address */
+    __u32       size;                    /* Size in bytes */
+} boot_info_map_entry_t;
+
+
+/* Here are the boot informations that are passed to the bootstrap
+ * Note that the kernel arguments and the device tree are appended
+ * at the end of this structure. */
+typedef struct boot_infos
+{
+    /* Version of this structure */
+    __u32       version;
+    /* backward compatible down to version: */
+    __u32       compatible_version;
+
+    /* NEW (vers. 2) this holds the current _logical_ base addr of
+       the frame buffer (for use by early boot message) */
+    __u8*       logicalDisplayBase;
+
+    /* NEW (vers. 4) Apple's machine identification */
+    __u32       machineID;
+
+    /* NEW (vers. 4) Detected hw architecture */
+    __u32       architecture;
+
+    /* The device tree (internal addresses relative to the beginning of the tree,
+     * device tree offset relative to the beginning of this structure).
+     * On pre-PCI macintosh (BOOT_ARCH_PCI bit set to 0 in architecture), this
+     * field is 0.
+     */
+    __u32       deviceTreeOffset;        /* Device tree offset */
+    __u32       deviceTreeSize;          /* Size of the device tree */
+
+    /* Some infos about the current MacOS display */
+    __u32       dispDeviceRect[4];       /* left,top,right,bottom */
+    __u32       dispDeviceDepth;         /* (8, 16 or 32) */
+    __u8*       dispDeviceBase;          /* base address (physical) */
+    __u32       dispDeviceRowBytes;      /* rowbytes (in bytes) */
+    __u32       dispDeviceColorsOffset;  /* Colormap (8 bits only) or 0 (*) */
+    /* Optional offset in the registry to the current
+     * MacOS display. (Can be 0 when not detected) */
+     __u32      dispDeviceRegEntryOffset;
+
+    /* Optional pointer to boot ramdisk (offset from this structure) */
+    __u32       ramDisk;
+    __u32       ramDiskSize;             /* size of ramdisk image */
+
+    /* Kernel command line arguments (offset from this structure) */
+    __u32       kernelParamsOffset;
+
+    /* ALL BELOW NEW (vers. 4) */
+
+    /* This defines the physical memory. Valid with BOOT_ARCH_NUBUS flag
+       (non-PCI) only. On PCI, memory is contiguous and it's size is in the
+       device-tree. */
+    boot_info_map_entry_t
+    	        physMemoryMap[MAX_MEM_MAP_SIZE]; /* Where the phys memory is */
+    __u32       physMemoryMapSize;               /* How many entries in map */
+
+
+    /* The framebuffer size (optional, currently 0) */
+    __u32       frameBufferSize;         /* Represents a max size, can be 0. */
+
+    /* NEW (vers. 5) */
+
+    /* Total params size (args + colormap + device tree + ramdisk) */
+    __u32       totalParamsSize;
+
+} boot_infos_t;
+
+
+#ifdef macintosh
+#pragma options align=reset
+#endif
+
+#endif /* _UAPI__ASM_BOOTX_H__ */
diff --git a/arch/powerpc/include/uapi/asm/byteorder.h b/arch/powerpc/include/uapi/asm/byteorder.h
new file mode 100644
index 0000000000..8ef66f7d9d
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/byteorder.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+#ifndef _ASM_POWERPC_BYTEORDER_H
+#define _ASM_POWERPC_BYTEORDER_H
+
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifdef __LITTLE_ENDIAN__
+#include <linux/byteorder/little_endian.h>
+#else
+#include <linux/byteorder/big_endian.h>
+#endif
+
+#endif /* _ASM_POWERPC_BYTEORDER_H */
diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
new file mode 100644
index 0000000000..731b97dc2d
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__ASM_POWERPC_CPUTABLE_H
+#define _UAPI__ASM_POWERPC_CPUTABLE_H
+
+/* in AT_HWCAP */
+#define PPC_FEATURE_32			0x80000000
+#define PPC_FEATURE_64			0x40000000
+#define PPC_FEATURE_601_INSTR		0x20000000
+#define PPC_FEATURE_HAS_ALTIVEC		0x10000000
+#define PPC_FEATURE_HAS_FPU		0x08000000
+#define PPC_FEATURE_HAS_MMU		0x04000000
+#define PPC_FEATURE_HAS_4xxMAC		0x02000000
+#define PPC_FEATURE_UNIFIED_CACHE	0x01000000
+#define PPC_FEATURE_HAS_SPE		0x00800000
+#define PPC_FEATURE_HAS_EFP_SINGLE	0x00400000
+#define PPC_FEATURE_HAS_EFP_DOUBLE	0x00200000
+#define PPC_FEATURE_NO_TB		0x00100000
+#define PPC_FEATURE_POWER4		0x00080000
+#define PPC_FEATURE_POWER5		0x00040000
+#define PPC_FEATURE_POWER5_PLUS		0x00020000
+#define PPC_FEATURE_CELL		0x00010000
+#define PPC_FEATURE_BOOKE		0x00008000
+#define PPC_FEATURE_SMT			0x00004000
+#define PPC_FEATURE_ICACHE_SNOOP	0x00002000
+#define PPC_FEATURE_ARCH_2_05		0x00001000
+#define PPC_FEATURE_PA6T		0x00000800
+#define PPC_FEATURE_HAS_DFP		0x00000400
+#define PPC_FEATURE_POWER6_EXT		0x00000200
+#define PPC_FEATURE_ARCH_2_06		0x00000100
+#define PPC_FEATURE_HAS_VSX		0x00000080
+
+#define PPC_FEATURE_PSERIES_PERFMON_COMPAT \
+					0x00000040
+
+/* Reserved - do not use		0x00000004 */
+#define PPC_FEATURE_TRUE_LE		0x00000002
+#define PPC_FEATURE_PPC_LE		0x00000001
+
+/* in AT_HWCAP2 */
+#define PPC_FEATURE2_ARCH_2_07		0x80000000
+#define PPC_FEATURE2_HTM		0x40000000
+#define PPC_FEATURE2_DSCR		0x20000000
+#define PPC_FEATURE2_EBB		0x10000000
+#define PPC_FEATURE2_ISEL		0x08000000
+#define PPC_FEATURE2_TAR		0x04000000
+#define PPC_FEATURE2_VEC_CRYPTO		0x02000000
+#define PPC_FEATURE2_HTM_NOSC		0x01000000
+#define PPC_FEATURE2_ARCH_3_00		0x00800000 /* ISA 3.00 */
+#define PPC_FEATURE2_HAS_IEEE128	0x00400000 /* VSX IEEE Binary Float 128-bit */
+#define PPC_FEATURE2_DARN		0x00200000 /* darn random number insn */
+#define PPC_FEATURE2_SCV		0x00100000 /* scv syscall */
+#define PPC_FEATURE2_HTM_NO_SUSPEND	0x00080000 /* TM w/out suspended state */
+#define PPC_FEATURE2_ARCH_3_1		0x00040000 /* ISA 3.1 */
+#define PPC_FEATURE2_MMA		0x00020000 /* Matrix Multiply Assist */
+
+/*
+ * IMPORTANT!
+ * All future PPC_FEATURE definitions should be allocated in cooperation with
+ * OPAL / skiboot firmware, in accordance with the ibm,powerpc-cpu-features
+ * device tree binding.
+ */
+
+#endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */
diff --git a/arch/powerpc/include/uapi/asm/eeh.h b/arch/powerpc/include/uapi/asm/eeh.h
new file mode 100644
index 0000000000..28186071fa
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/eeh.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2015
+ *
+ * Authors: Gavin Shan <gwshan@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_POWERPC_EEH_H
+#define _ASM_POWERPC_EEH_H
+
+/* PE states */
+#define EEH_PE_STATE_NORMAL		0	/* Normal state		*/
+#define EEH_PE_STATE_RESET		1	/* PE reset asserted	*/
+#define EEH_PE_STATE_STOPPED_IO_DMA	2	/* Frozen PE		*/
+#define EEH_PE_STATE_STOPPED_DMA	4	/* Stopped DMA only	*/
+#define EEH_PE_STATE_UNAVAIL		5	/* Unavailable		*/
+
+/* EEH error types and functions */
+#define EEH_ERR_TYPE_32			0       /* 32-bits error	*/
+#define EEH_ERR_TYPE_64			1       /* 64-bits error	*/
+#define EEH_ERR_FUNC_MIN		0
+#define EEH_ERR_FUNC_LD_MEM_ADDR	0	/* Memory load	*/
+#define EEH_ERR_FUNC_LD_MEM_DATA	1
+#define EEH_ERR_FUNC_LD_IO_ADDR		2	/* IO load	*/
+#define EEH_ERR_FUNC_LD_IO_DATA		3
+#define EEH_ERR_FUNC_LD_CFG_ADDR	4	/* Config load	*/
+#define EEH_ERR_FUNC_LD_CFG_DATA	5
+#define EEH_ERR_FUNC_ST_MEM_ADDR	6	/* Memory store	*/
+#define EEH_ERR_FUNC_ST_MEM_DATA	7
+#define EEH_ERR_FUNC_ST_IO_ADDR		8	/* IO store	*/
+#define EEH_ERR_FUNC_ST_IO_DATA		9
+#define EEH_ERR_FUNC_ST_CFG_ADDR	10	/* Config store	*/
+#define EEH_ERR_FUNC_ST_CFG_DATA	11
+#define EEH_ERR_FUNC_DMA_RD_ADDR	12	/* DMA read	*/
+#define EEH_ERR_FUNC_DMA_RD_DATA	13
+#define EEH_ERR_FUNC_DMA_RD_MASTER	14
+#define EEH_ERR_FUNC_DMA_RD_TARGET	15
+#define EEH_ERR_FUNC_DMA_WR_ADDR	16	/* DMA write	*/
+#define EEH_ERR_FUNC_DMA_WR_DATA	17
+#define EEH_ERR_FUNC_DMA_WR_MASTER	18
+#define EEH_ERR_FUNC_DMA_WR_TARGET	19
+#define EEH_ERR_FUNC_MAX		19
+
+#endif /* _ASM_POWERPC_EEH_H */
diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h
new file mode 100644
index 0000000000..a5377f494f
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/elf.h
@@ -0,0 +1,298 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * ELF register definitions..
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _UAPI_ASM_POWERPC_ELF_H
+#define _UAPI_ASM_POWERPC_ELF_H
+
+
+#include <linux/types.h>
+
+#include <asm/ptrace.h>
+#include <asm/cputable.h>
+#include <asm/auxvec.h>
+
+/* PowerPC relocations defined by the ABIs */
+#define R_PPC_NONE		0
+#define R_PPC_ADDR32		1	/* 32bit absolute address */
+#define R_PPC_ADDR24		2	/* 26bit address, 2 bits ignored.  */
+#define R_PPC_ADDR16		3	/* 16bit absolute address */
+#define R_PPC_ADDR16_LO		4	/* lower 16bit of absolute address */
+#define R_PPC_ADDR16_HI		5	/* high 16bit of absolute address */
+#define R_PPC_ADDR16_HA		6	/* adjusted high 16bit */
+#define R_PPC_ADDR14		7	/* 16bit address, 2 bits ignored */
+#define R_PPC_ADDR14_BRTAKEN	8
+#define R_PPC_ADDR14_BRNTAKEN	9
+#define R_PPC_REL24		10	/* PC relative 26 bit */
+#define R_PPC_REL14		11	/* PC relative 16 bit */
+#define R_PPC_REL14_BRTAKEN	12
+#define R_PPC_REL14_BRNTAKEN	13
+#define R_PPC_GOT16		14
+#define R_PPC_GOT16_LO		15
+#define R_PPC_GOT16_HI		16
+#define R_PPC_GOT16_HA		17
+#define R_PPC_PLTREL24		18
+#define R_PPC_COPY		19
+#define R_PPC_GLOB_DAT		20
+#define R_PPC_JMP_SLOT		21
+#define R_PPC_RELATIVE		22
+#define R_PPC_LOCAL24PC		23
+#define R_PPC_UADDR32		24
+#define R_PPC_UADDR16		25
+#define R_PPC_REL32		26
+#define R_PPC_PLT32		27
+#define R_PPC_PLTREL32		28
+#define R_PPC_PLT16_LO		29
+#define R_PPC_PLT16_HI		30
+#define R_PPC_PLT16_HA		31
+#define R_PPC_SDAREL16		32
+#define R_PPC_SECTOFF		33
+#define R_PPC_SECTOFF_LO	34
+#define R_PPC_SECTOFF_HI	35
+#define R_PPC_SECTOFF_HA	36
+
+/* PowerPC relocations defined for the TLS access ABI.  */
+#define R_PPC_TLS		67 /* none	(sym+add)@tls */
+#define R_PPC_DTPMOD32		68 /* word32	(sym+add)@dtpmod */
+#define R_PPC_TPREL16		69 /* half16*	(sym+add)@tprel */
+#define R_PPC_TPREL16_LO	70 /* half16	(sym+add)@tprel@l */
+#define R_PPC_TPREL16_HI	71 /* half16	(sym+add)@tprel@h */
+#define R_PPC_TPREL16_HA	72 /* half16	(sym+add)@tprel@ha */
+#define R_PPC_TPREL32		73 /* word32	(sym+add)@tprel */
+#define R_PPC_DTPREL16		74 /* half16*	(sym+add)@dtprel */
+#define R_PPC_DTPREL16_LO	75 /* half16	(sym+add)@dtprel@l */
+#define R_PPC_DTPREL16_HI	76 /* half16	(sym+add)@dtprel@h */
+#define R_PPC_DTPREL16_HA	77 /* half16	(sym+add)@dtprel@ha */
+#define R_PPC_DTPREL32		78 /* word32	(sym+add)@dtprel */
+#define R_PPC_GOT_TLSGD16	79 /* half16*	(sym+add)@got@tlsgd */
+#define R_PPC_GOT_TLSGD16_LO	80 /* half16	(sym+add)@got@tlsgd@l */
+#define R_PPC_GOT_TLSGD16_HI	81 /* half16	(sym+add)@got@tlsgd@h */
+#define R_PPC_GOT_TLSGD16_HA	82 /* half16	(sym+add)@got@tlsgd@ha */
+#define R_PPC_GOT_TLSLD16	83 /* half16*	(sym+add)@got@tlsld */
+#define R_PPC_GOT_TLSLD16_LO	84 /* half16	(sym+add)@got@tlsld@l */
+#define R_PPC_GOT_TLSLD16_HI	85 /* half16	(sym+add)@got@tlsld@h */
+#define R_PPC_GOT_TLSLD16_HA	86 /* half16	(sym+add)@got@tlsld@ha */
+#define R_PPC_GOT_TPREL16	87 /* half16*	(sym+add)@got@tprel */
+#define R_PPC_GOT_TPREL16_LO	88 /* half16	(sym+add)@got@tprel@l */
+#define R_PPC_GOT_TPREL16_HI	89 /* half16	(sym+add)@got@tprel@h */
+#define R_PPC_GOT_TPREL16_HA	90 /* half16	(sym+add)@got@tprel@ha */
+#define R_PPC_GOT_DTPREL16	91 /* half16*	(sym+add)@got@dtprel */
+#define R_PPC_GOT_DTPREL16_LO	92 /* half16*	(sym+add)@got@dtprel@l */
+#define R_PPC_GOT_DTPREL16_HI	93 /* half16*	(sym+add)@got@dtprel@h */
+#define R_PPC_GOT_DTPREL16_HA	94 /* half16*	(sym+add)@got@dtprel@ha */
+
+/* keep this the last entry. */
+#define R_PPC_NUM		95
+
+
+#define ELF_NGREG	48	/* includes nip, msr, lr, etc. */
+#define ELF_NFPREG	33	/* includes fpscr */
+#define ELF_NVMX	34	/* includes all vector registers */
+#define ELF_NVSX	32	/* includes all VSX registers */
+#define ELF_NTMSPRREG	3	/* include tfhar, tfiar, texasr */
+#define ELF_NEBB	3	/* includes ebbrr, ebbhr, bescr */
+#define ELF_NPMU	5	/* includes siar, sdar, sier, mmcr2, mmcr0 */
+#define ELF_NPKEY	3	/* includes amr, iamr, uamor */
+#define ELF_NDEXCR	2	/* includes dexcr, hdexcr */
+#define ELF_NHASHKEYR	1	/* includes hashkeyr */
+
+typedef unsigned long elf_greg_t64;
+typedef elf_greg_t64 elf_gregset_t64[ELF_NGREG];
+
+typedef unsigned int elf_greg_t32;
+typedef elf_greg_t32 elf_gregset_t32[ELF_NGREG];
+typedef elf_gregset_t32 compat_elf_gregset_t;
+
+/*
+ * ELF_ARCH, CLASS, and DATA are used to set parameters in the core dumps.
+ */
+#ifdef __powerpc64__
+# define ELF_NVRREG32	33	/* includes vscr & vrsave stuffed together */
+# define ELF_NVRREG	34	/* includes vscr & vrsave in split vectors */
+# define ELF_NVSRHALFREG 32	/* Half the vsx registers */
+# define ELF_GREG_TYPE	elf_greg_t64
+# define ELF_ARCH	EM_PPC64
+# define ELF_CLASS	ELFCLASS64
+typedef elf_greg_t64 elf_greg_t;
+typedef elf_gregset_t64 elf_gregset_t;
+#else
+# define ELF_NEVRREG	34	/* includes acc (as 2) */
+# define ELF_NVRREG	33	/* includes vscr */
+# define ELF_GREG_TYPE	elf_greg_t32
+# define ELF_ARCH	EM_PPC
+# define ELF_CLASS	ELFCLASS32
+typedef elf_greg_t32 elf_greg_t;
+typedef elf_gregset_t32 elf_gregset_t;
+#endif /* __powerpc64__ */
+
+#ifdef __BIG_ENDIAN__
+#define ELF_DATA	ELFDATA2MSB
+#else
+#define ELF_DATA	ELFDATA2LSB
+#endif
+
+/* Floating point registers */
+typedef double elf_fpreg_t;
+typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
+
+/* Altivec registers */
+/*
+ * The entries with indexes 0-31 contain the corresponding vector registers. 
+ * The entry with index 32 contains the vscr as the last word (offset 12) 
+ * within the quadword.  This allows the vscr to be stored as either a 
+ * quadword (since it must be copied via a vector register to/from storage) 
+ * or as a word.  
+ *
+ * 64-bit kernel notes: The entry at index 33 contains the vrsave as the first  
+ * word (offset 0) within the quadword.
+ *
+ * This definition of the VMX state is compatible with the current PPC32 
+ * ptrace interface.  This allows signal handling and ptrace to use the same 
+ * structures.  This also simplifies the implementation of a bi-arch 
+ * (combined (32- and 64-bit) gdb.
+ *
+ * Note that it's _not_ compatible with 32 bits ucontext which stuffs the
+ * vrsave along with vscr and so only uses 33 vectors for the register set
+ */
+typedef __vector128 elf_vrreg_t;
+typedef elf_vrreg_t elf_vrregset_t[ELF_NVRREG];
+#ifdef __powerpc64__
+typedef elf_vrreg_t elf_vrregset_t32[ELF_NVRREG32];
+typedef elf_fpreg_t elf_vsrreghalf_t32[ELF_NVSRHALFREG];
+#endif
+
+/* PowerPC64 relocations defined by the ABIs */
+#define R_PPC64_NONE    R_PPC_NONE
+#define R_PPC64_ADDR32  R_PPC_ADDR32  /* 32bit absolute address.  */
+#define R_PPC64_ADDR24  R_PPC_ADDR24  /* 26bit address, word aligned.  */
+#define R_PPC64_ADDR16  R_PPC_ADDR16  /* 16bit absolute address. */
+#define R_PPC64_ADDR16_LO R_PPC_ADDR16_LO /* lower 16bits of abs. address.  */
+#define R_PPC64_ADDR16_HI R_PPC_ADDR16_HI /* high 16bits of abs. address. */
+#define R_PPC64_ADDR16_HA R_PPC_ADDR16_HA /* adjusted high 16bits.  */
+#define R_PPC64_ADDR14 R_PPC_ADDR14   /* 16bit address, word aligned.  */
+#define R_PPC64_ADDR14_BRTAKEN  R_PPC_ADDR14_BRTAKEN
+#define R_PPC64_ADDR14_BRNTAKEN R_PPC_ADDR14_BRNTAKEN
+#define R_PPC64_REL24   R_PPC_REL24 /* PC relative 26 bit, word aligned.  */
+#define R_PPC64_REL14   R_PPC_REL14 /* PC relative 16 bit. */
+#define R_PPC64_REL14_BRTAKEN   R_PPC_REL14_BRTAKEN
+#define R_PPC64_REL14_BRNTAKEN  R_PPC_REL14_BRNTAKEN
+#define R_PPC64_GOT16     R_PPC_GOT16
+#define R_PPC64_GOT16_LO  R_PPC_GOT16_LO
+#define R_PPC64_GOT16_HI  R_PPC_GOT16_HI
+#define R_PPC64_GOT16_HA  R_PPC_GOT16_HA
+
+#define R_PPC64_COPY      R_PPC_COPY
+#define R_PPC64_GLOB_DAT  R_PPC_GLOB_DAT
+#define R_PPC64_JMP_SLOT  R_PPC_JMP_SLOT
+#define R_PPC64_RELATIVE  R_PPC_RELATIVE
+
+#define R_PPC64_UADDR32   R_PPC_UADDR32
+#define R_PPC64_UADDR16   R_PPC_UADDR16
+#define R_PPC64_REL32     R_PPC_REL32
+#define R_PPC64_PLT32     R_PPC_PLT32
+#define R_PPC64_PLTREL32  R_PPC_PLTREL32
+#define R_PPC64_PLT16_LO  R_PPC_PLT16_LO
+#define R_PPC64_PLT16_HI  R_PPC_PLT16_HI
+#define R_PPC64_PLT16_HA  R_PPC_PLT16_HA
+
+#define R_PPC64_SECTOFF     R_PPC_SECTOFF
+#define R_PPC64_SECTOFF_LO  R_PPC_SECTOFF_LO
+#define R_PPC64_SECTOFF_HI  R_PPC_SECTOFF_HI
+#define R_PPC64_SECTOFF_HA  R_PPC_SECTOFF_HA
+#define R_PPC64_ADDR30          37  /* word30 (S + A - P) >> 2.  */
+#define R_PPC64_ADDR64          38  /* doubleword64 S + A.  */
+#define R_PPC64_ADDR16_HIGHER   39  /* half16 #higher(S + A).  */
+#define R_PPC64_ADDR16_HIGHERA  40  /* half16 #highera(S + A).  */
+#define R_PPC64_ADDR16_HIGHEST  41  /* half16 #highest(S + A).  */
+#define R_PPC64_ADDR16_HIGHESTA 42  /* half16 #highesta(S + A). */
+#define R_PPC64_UADDR64     43  /* doubleword64 S + A.  */
+#define R_PPC64_REL64       44  /* doubleword64 S + A - P.  */
+#define R_PPC64_PLT64       45  /* doubleword64 L + A.  */
+#define R_PPC64_PLTREL64    46  /* doubleword64 L + A - P.  */
+#define R_PPC64_TOC16       47  /* half16* S + A - .TOC.  */
+#define R_PPC64_TOC16_LO    48  /* half16 #lo(S + A - .TOC.).  */
+#define R_PPC64_TOC16_HI    49  /* half16 #hi(S + A - .TOC.).  */
+#define R_PPC64_TOC16_HA    50  /* half16 #ha(S + A - .TOC.).  */
+#define R_PPC64_TOC         51  /* doubleword64 .TOC. */
+#define R_PPC64_PLTGOT16    52  /* half16* M + A.  */
+#define R_PPC64_PLTGOT16_LO 53  /* half16 #lo(M + A).  */
+#define R_PPC64_PLTGOT16_HI 54  /* half16 #hi(M + A).  */
+#define R_PPC64_PLTGOT16_HA 55  /* half16 #ha(M + A).  */
+
+#define R_PPC64_ADDR16_DS      56 /* half16ds* (S + A) >> 2.  */
+#define R_PPC64_ADDR16_LO_DS   57 /* half16ds  #lo(S + A) >> 2.  */
+#define R_PPC64_GOT16_DS       58 /* half16ds* (G + A) >> 2.  */
+#define R_PPC64_GOT16_LO_DS    59 /* half16ds  #lo(G + A) >> 2.  */
+#define R_PPC64_PLT16_LO_DS    60 /* half16ds  #lo(L + A) >> 2.  */
+#define R_PPC64_SECTOFF_DS     61 /* half16ds* (R + A) >> 2.  */
+#define R_PPC64_SECTOFF_LO_DS  62 /* half16ds  #lo(R + A) >> 2.  */
+#define R_PPC64_TOC16_DS       63 /* half16ds* (S + A - .TOC.) >> 2.  */
+#define R_PPC64_TOC16_LO_DS    64 /* half16ds  #lo(S + A - .TOC.) >> 2.  */
+#define R_PPC64_PLTGOT16_DS    65 /* half16ds* (M + A) >> 2.  */
+#define R_PPC64_PLTGOT16_LO_DS 66 /* half16ds  #lo(M + A) >> 2.  */
+
+/* PowerPC64 relocations defined for the TLS access ABI.  */
+#define R_PPC64_TLS		67 /* none	(sym+add)@tls */
+#define R_PPC64_DTPMOD64	68 /* doubleword64 (sym+add)@dtpmod */
+#define R_PPC64_TPREL16		69 /* half16*	(sym+add)@tprel */
+#define R_PPC64_TPREL16_LO	70 /* half16	(sym+add)@tprel@l */
+#define R_PPC64_TPREL16_HI	71 /* half16	(sym+add)@tprel@h */
+#define R_PPC64_TPREL16_HA	72 /* half16	(sym+add)@tprel@ha */
+#define R_PPC64_TPREL64		73 /* doubleword64 (sym+add)@tprel */
+#define R_PPC64_DTPREL16	74 /* half16*	(sym+add)@dtprel */
+#define R_PPC64_DTPREL16_LO	75 /* half16	(sym+add)@dtprel@l */
+#define R_PPC64_DTPREL16_HI	76 /* half16	(sym+add)@dtprel@h */
+#define R_PPC64_DTPREL16_HA	77 /* half16	(sym+add)@dtprel@ha */
+#define R_PPC64_DTPREL64	78 /* doubleword64 (sym+add)@dtprel */
+#define R_PPC64_GOT_TLSGD16	79 /* half16*	(sym+add)@got@tlsgd */
+#define R_PPC64_GOT_TLSGD16_LO	80 /* half16	(sym+add)@got@tlsgd@l */
+#define R_PPC64_GOT_TLSGD16_HI	81 /* half16	(sym+add)@got@tlsgd@h */
+#define R_PPC64_GOT_TLSGD16_HA	82 /* half16	(sym+add)@got@tlsgd@ha */
+#define R_PPC64_GOT_TLSLD16	83 /* half16*	(sym+add)@got@tlsld */
+#define R_PPC64_GOT_TLSLD16_LO	84 /* half16	(sym+add)@got@tlsld@l */
+#define R_PPC64_GOT_TLSLD16_HI	85 /* half16	(sym+add)@got@tlsld@h */
+#define R_PPC64_GOT_TLSLD16_HA	86 /* half16	(sym+add)@got@tlsld@ha */
+#define R_PPC64_GOT_TPREL16_DS	87 /* half16ds*	(sym+add)@got@tprel */
+#define R_PPC64_GOT_TPREL16_LO_DS 88 /* half16ds (sym+add)@got@tprel@l */
+#define R_PPC64_GOT_TPREL16_HI	89 /* half16	(sym+add)@got@tprel@h */
+#define R_PPC64_GOT_TPREL16_HA	90 /* half16	(sym+add)@got@tprel@ha */
+#define R_PPC64_GOT_DTPREL16_DS	91 /* half16ds*	(sym+add)@got@dtprel */
+#define R_PPC64_GOT_DTPREL16_LO_DS 92 /* half16ds (sym+add)@got@dtprel@l */
+#define R_PPC64_GOT_DTPREL16_HI	93 /* half16	(sym+add)@got@dtprel@h */
+#define R_PPC64_GOT_DTPREL16_HA	94 /* half16	(sym+add)@got@dtprel@ha */
+#define R_PPC64_TPREL16_DS	95 /* half16ds*	(sym+add)@tprel */
+#define R_PPC64_TPREL16_LO_DS	96 /* half16ds	(sym+add)@tprel@l */
+#define R_PPC64_TPREL16_HIGHER	97 /* half16	(sym+add)@tprel@higher */
+#define R_PPC64_TPREL16_HIGHERA	98 /* half16	(sym+add)@tprel@highera */
+#define R_PPC64_TPREL16_HIGHEST	99 /* half16	(sym+add)@tprel@highest */
+#define R_PPC64_TPREL16_HIGHESTA 100 /* half16	(sym+add)@tprel@highesta */
+#define R_PPC64_DTPREL16_DS	101 /* half16ds* (sym+add)@dtprel */
+#define R_PPC64_DTPREL16_LO_DS	102 /* half16ds	(sym+add)@dtprel@l */
+#define R_PPC64_DTPREL16_HIGHER	103 /* half16	(sym+add)@dtprel@higher */
+#define R_PPC64_DTPREL16_HIGHERA 104 /* half16	(sym+add)@dtprel@highera */
+#define R_PPC64_DTPREL16_HIGHEST 105 /* half16	(sym+add)@dtprel@highest */
+#define R_PPC64_DTPREL16_HIGHESTA 106 /* half16	(sym+add)@dtprel@highesta */
+#define R_PPC64_TLSGD		107
+#define R_PPC64_TLSLD		108
+#define R_PPC64_TOCSAVE		109
+
+#define R_PPC64_REL24_NOTOC	116
+#define R_PPC64_ENTRY		118
+
+#define R_PPC64_PCREL34		132
+#define R_PPC64_GOT_PCREL34	133
+
+#define R_PPC64_REL16		249
+#define R_PPC64_REL16_LO	250
+#define R_PPC64_REL16_HI	251
+#define R_PPC64_REL16_HA	252
+
+/* Keep this the last entry.  */
+#define R_PPC64_NUM		253
+
+#endif /* _UAPI_ASM_POWERPC_ELF_H */
diff --git a/arch/powerpc/include/uapi/asm/epapr_hcalls.h b/arch/powerpc/include/uapi/asm/epapr_hcalls.h
new file mode 100644
index 0000000000..90a0ee6d0b
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/epapr_hcalls.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) */
+/*
+ * ePAPR hcall interface
+ *
+ * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ *
+ * Author: Timur Tabi <timur@freescale.com>
+ *
+ * This file is provided under a dual BSD/GPL license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _UAPI_ASM_POWERPC_EPAPR_HCALLS_H
+#define _UAPI_ASM_POWERPC_EPAPR_HCALLS_H
+
+#define EV_BYTE_CHANNEL_SEND		1
+#define EV_BYTE_CHANNEL_RECEIVE		2
+#define EV_BYTE_CHANNEL_POLL		3
+#define EV_INT_SET_CONFIG		4
+#define EV_INT_GET_CONFIG		5
+#define EV_INT_SET_MASK			6
+#define EV_INT_GET_MASK			7
+#define EV_INT_IACK			9
+#define EV_INT_EOI			10
+#define EV_INT_SEND_IPI			11
+#define EV_INT_SET_TASK_PRIORITY	12
+#define EV_INT_GET_TASK_PRIORITY	13
+#define EV_DOORBELL_SEND		14
+#define EV_MSGSND			15
+#define EV_IDLE				16
+
+/* vendor ID: epapr */
+#define EV_LOCAL_VENDOR_ID		0	/* for private use */
+#define EV_EPAPR_VENDOR_ID		1
+#define EV_FSL_VENDOR_ID		2	/* Freescale Semiconductor */
+#define EV_IBM_VENDOR_ID		3	/* IBM */
+#define EV_GHS_VENDOR_ID		4	/* Green Hills Software */
+#define EV_ENEA_VENDOR_ID		5	/* Enea */
+#define EV_WR_VENDOR_ID			6	/* Wind River Systems */
+#define EV_AMCC_VENDOR_ID		7	/* Applied Micro Circuits */
+#define EV_KVM_VENDOR_ID		42	/* KVM */
+
+/* The max number of bytes that a byte channel can send or receive per call */
+#define EV_BYTE_CHANNEL_MAX_BYTES	16
+
+
+#define _EV_HCALL_TOKEN(id, num) (((id) << 16) | (num))
+#define EV_HCALL_TOKEN(hcall_num) _EV_HCALL_TOKEN(EV_EPAPR_VENDOR_ID, hcall_num)
+
+/* epapr return codes */
+#define EV_SUCCESS		0
+#define EV_EPERM		1	/* Operation not permitted */
+#define EV_ENOENT		2	/*  Entry Not Found */
+#define EV_EIO			3	/* I/O error occurred */
+#define EV_EAGAIN		4	/* The operation had insufficient
+					 * resources to complete and should be
+					 * retried
+					 */
+#define EV_ENOMEM		5	/* There was insufficient memory to
+					 * complete the operation */
+#define EV_EFAULT		6	/* Bad guest address */
+#define EV_ENODEV		7	/* No such device */
+#define EV_EINVAL		8	/* An argument supplied to the hcall
+					   was out of range or invalid */
+#define EV_INTERNAL		9	/* An internal error occurred */
+#define EV_CONFIG		10	/* A configuration error was detected */
+#define EV_INVALID_STATE	11	/* The object is in an invalid state */
+#define EV_UNIMPLEMENTED	12	/* Unimplemented hypercall */
+#define EV_BUFFER_OVERFLOW	13	/* Caller-supplied buffer too small */
+
+#endif /* _UAPI_ASM_POWERPC_EPAPR_HCALLS_H */
diff --git a/arch/powerpc/include/uapi/asm/errno.h b/arch/powerpc/include/uapi/asm/errno.h
new file mode 100644
index 0000000000..4ba87de32b
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/errno.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_POWERPC_ERRNO_H
+#define _ASM_POWERPC_ERRNO_H
+
+#undef	EDEADLOCK
+#include <asm-generic/errno.h>
+
+#undef	EDEADLOCK
+#define	EDEADLOCK	58	/* File locking deadlock error */
+
+#endif	/* _ASM_POWERPC_ERRNO_H */
diff --git a/arch/powerpc/include/uapi/asm/fcntl.h b/arch/powerpc/include/uapi/asm/fcntl.h
new file mode 100644
index 0000000000..65ce08322a
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/fcntl.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_FCNTL_H
+#define _ASM_FCNTL_H
+
+#define O_DIRECTORY      040000	/* must be a directory */
+#define O_NOFOLLOW      0100000	/* don't follow links */
+#define O_LARGEFILE     0200000
+#define O_DIRECT	0400000	/* direct disk access hint */
+
+#include <asm-generic/fcntl.h>
+
+#endif /* _ASM_FCNTL_H */
diff --git a/arch/powerpc/include/uapi/asm/ioctl.h b/arch/powerpc/include/uapi/asm/ioctl.h
new file mode 100644
index 0000000000..d623af4b9c
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/ioctl.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_POWERPC_IOCTL_H
+#define _ASM_POWERPC_IOCTL_H
+
+#define _IOC_SIZEBITS	13
+#define _IOC_DIRBITS	3
+
+#define _IOC_NONE	1U
+#define _IOC_READ	2U
+#define _IOC_WRITE	4U
+
+#include <asm-generic/ioctl.h>
+
+#endif	/* _ASM_POWERPC_IOCTL_H */
diff --git a/arch/powerpc/include/uapi/asm/ioctls.h b/arch/powerpc/include/uapi/asm/ioctls.h
new file mode 100644
index 0000000000..2c145da3b7
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/ioctls.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_POWERPC_IOCTLS_H
+#define _ASM_POWERPC_IOCTLS_H
+
+#include <asm/ioctl.h>
+
+#define FIOCLEX		_IO('f', 1)
+#define FIONCLEX	_IO('f', 2)
+#define FIOASYNC	_IOW('f', 125, int)
+#define FIONBIO		_IOW('f', 126, int)
+#define FIONREAD	_IOR('f', 127, int)
+#define TIOCINQ		FIONREAD
+#define FIOQSIZE	_IOR('f', 128, loff_t)
+
+#define TIOCGETP	_IOR('t', 8, struct sgttyb)
+#define TIOCSETP	_IOW('t', 9, struct sgttyb)
+#define TIOCSETN	_IOW('t', 10, struct sgttyb)	/* TIOCSETP wo flush */
+
+#define TIOCSETC	_IOW('t', 17, struct tchars)
+#define TIOCGETC	_IOR('t', 18, struct tchars)
+#define TCGETS		_IOR('t', 19, struct termios)
+#define TCSETS		_IOW('t', 20, struct termios)
+#define TCSETSW		_IOW('t', 21, struct termios)
+#define TCSETSF		_IOW('t', 22, struct termios)
+
+#define TCGETA		_IOR('t', 23, struct termio)
+#define TCSETA		_IOW('t', 24, struct termio)
+#define TCSETAW		_IOW('t', 25, struct termio)
+#define TCSETAF		_IOW('t', 28, struct termio)
+
+#define TCSBRK		_IO('t', 29)
+#define TCXONC		_IO('t', 30)
+#define TCFLSH		_IO('t', 31)
+
+#define TIOCSWINSZ	_IOW('t', 103, struct winsize)
+#define TIOCGWINSZ	_IOR('t', 104, struct winsize)
+#define	TIOCSTART	_IO('t', 110)		/* start output, like ^Q */
+#define	TIOCSTOP	_IO('t', 111)		/* stop output, like ^S */
+#define TIOCOUTQ        _IOR('t', 115, int)     /* output queue size */
+
+#define TIOCGLTC	_IOR('t', 116, struct ltchars)
+#define TIOCSLTC	_IOW('t', 117, struct ltchars)
+#define TIOCSPGRP	_IOW('t', 118, int)
+#define TIOCGPGRP	_IOR('t', 119, int)
+
+#define TIOCEXCL	0x540C
+#define TIOCNXCL	0x540D
+#define TIOCSCTTY	0x540E
+
+#define TIOCSTI		0x5412
+#define TIOCMGET	0x5415
+#define TIOCMBIS	0x5416
+#define TIOCMBIC	0x5417
+#define TIOCMSET	0x5418
+# define TIOCM_LE	0x001
+# define TIOCM_DTR	0x002
+# define TIOCM_RTS	0x004
+# define TIOCM_ST	0x008
+# define TIOCM_SR	0x010
+# define TIOCM_CTS	0x020
+# define TIOCM_CAR	0x040
+# define TIOCM_RNG	0x080
+# define TIOCM_DSR	0x100
+# define TIOCM_CD	TIOCM_CAR
+# define TIOCM_RI	TIOCM_RNG
+#define TIOCM_OUT1	0x2000
+#define TIOCM_OUT2	0x4000
+#define TIOCM_LOOP	0x8000
+
+#define TIOCGSOFTCAR	0x5419
+#define TIOCSSOFTCAR	0x541A
+#define TIOCLINUX	0x541C
+#define TIOCCONS	0x541D
+#define TIOCGSERIAL	0x541E
+#define TIOCSSERIAL	0x541F
+#define TIOCPKT		0x5420
+# define TIOCPKT_DATA		 0
+# define TIOCPKT_FLUSHREAD	 1
+# define TIOCPKT_FLUSHWRITE	 2
+# define TIOCPKT_STOP		 4
+# define TIOCPKT_START		 8
+# define TIOCPKT_NOSTOP		16
+# define TIOCPKT_DOSTOP		32
+# define TIOCPKT_IOCTL		64
+
+
+#define TIOCNOTTY	0x5422
+#define TIOCSETD	0x5423
+#define TIOCGETD	0x5424
+#define TCSBRKP		0x5425	/* Needed for POSIX tcsendbreak() */
+#define TIOCSBRK	0x5427  /* BSD compatibility */
+#define TIOCCBRK	0x5428  /* BSD compatibility */
+#define TIOCGSID	0x5429  /* Return the session ID of FD */
+#define TIOCGRS485	0x542e
+#define TIOCSRS485	0x542f
+#define TIOCGPTN	_IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
+#define TIOCSPTLCK	_IOW('T',0x31, int)  /* Lock/unlock Pty */
+#define TIOCGDEV	_IOR('T',0x32, unsigned int) /* Get primary device node of /dev/console */
+#define TIOCSIG		_IOW('T',0x36, int)  /* Generate signal on Pty slave */
+#define TIOCVHANGUP	0x5437
+#define TIOCGPKT	_IOR('T', 0x38, int) /* Get packet mode state */
+#define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
+#define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
+#define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
+
+#define TIOCSERCONFIG	0x5453
+#define TIOCSERGWILD	0x5454
+#define TIOCSERSWILD	0x5455
+#define TIOCGLCKTRMIOS	0x5456
+#define TIOCSLCKTRMIOS	0x5457
+#define TIOCSERGSTRUCT	0x5458 /* For debugging only */
+#define TIOCSERGETLSR   0x5459 /* Get line status register */
+  /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+# define TIOCSER_TEMT    0x01	/* Transmitter physically empty */
+#define TIOCSERGETMULTI 0x545A /* Get multiport config  */
+#define TIOCSERSETMULTI 0x545B /* Set multiport config */
+
+#define TIOCMIWAIT	0x545C	/* wait for a change on serial input line(s) */
+#define TIOCGICOUNT	0x545D	/* read serial port inline interrupt counts */
+
+#endif	/* _ASM_POWERPC_IOCTLS_H */
diff --git a/arch/powerpc/include/uapi/asm/ipcbuf.h b/arch/powerpc/include/uapi/asm/ipcbuf.h
new file mode 100644
index 0000000000..21e1e0ec0b
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/ipcbuf.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+#ifndef _ASM_POWERPC_IPCBUF_H
+#define _ASM_POWERPC_IPCBUF_H
+
+/*
+ * The ipc64_perm structure for the powerpc is identical to
+ * kern_ipc_perm as we have always had 32-bit UIDs and GIDs in the
+ * kernel.  Note extra padding because this structure is passed back
+ * and forth between kernel and user space.  Pad space is left for:
+ *	- 1 32-bit value to fill up for 8-byte alignment
+ *	- 2 miscellaneous 64-bit values
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/types.h>
+
+struct ipc64_perm
+{
+	__kernel_key_t	key;
+	__kernel_uid_t	uid;
+	__kernel_gid_t	gid;
+	__kernel_uid_t	cuid;
+	__kernel_gid_t	cgid;
+	__kernel_mode_t	mode;
+	unsigned int	seq;
+	unsigned int	__pad1;
+	unsigned long long __unused1;
+	unsigned long long __unused2;
+};
+
+#endif /* _ASM_POWERPC_IPCBUF_H */
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
new file mode 100644
index 0000000000..9f18fa090f
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -0,0 +1,736 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __LINUX_KVM_POWERPC_H
+#define __LINUX_KVM_POWERPC_H
+
+#include <linux/types.h>
+
+/* Select powerpc specific features in <linux/kvm.h> */
+#define __KVM_HAVE_SPAPR_TCE
+#define __KVM_HAVE_PPC_SMT
+#define __KVM_HAVE_IRQCHIP
+#define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_GUEST_DEBUG
+
+/* Not always available, but if it is, this is the correct offset.  */
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
+struct kvm_regs {
+	__u64 pc;
+	__u64 cr;
+	__u64 ctr;
+	__u64 lr;
+	__u64 xer;
+	__u64 msr;
+	__u64 srr0;
+	__u64 srr1;
+	__u64 pid;
+
+	__u64 sprg0;
+	__u64 sprg1;
+	__u64 sprg2;
+	__u64 sprg3;
+	__u64 sprg4;
+	__u64 sprg5;
+	__u64 sprg6;
+	__u64 sprg7;
+
+	__u64 gpr[32];
+};
+
+#define KVM_SREGS_E_IMPL_NONE	0
+#define KVM_SREGS_E_IMPL_FSL	1
+
+#define KVM_SREGS_E_FSL_PIDn	(1 << 0) /* PID1/PID2 */
+
+/* flags for kvm_run.flags */
+#define KVM_RUN_PPC_NMI_DISP_MASK		(3 << 0)
+#define   KVM_RUN_PPC_NMI_DISP_FULLY_RECOV	(1 << 0)
+#define   KVM_RUN_PPC_NMI_DISP_LIMITED_RECOV	(2 << 0)
+#define   KVM_RUN_PPC_NMI_DISP_NOT_RECOV	(3 << 0)
+
+/*
+ * Feature bits indicate which sections of the sregs struct are valid,
+ * both in KVM_GET_SREGS and KVM_SET_SREGS.  On KVM_SET_SREGS, registers
+ * corresponding to unset feature bits will not be modified.  This allows
+ * restoring a checkpoint made without that feature, while keeping the
+ * default values of the new registers.
+ *
+ * KVM_SREGS_E_BASE contains:
+ * CSRR0/1 (refers to SRR2/3 on 40x)
+ * ESR
+ * DEAR
+ * MCSR
+ * TSR
+ * TCR
+ * DEC
+ * TB
+ * VRSAVE (USPRG0)
+ */
+#define KVM_SREGS_E_BASE		(1 << 0)
+
+/*
+ * KVM_SREGS_E_ARCH206 contains:
+ *
+ * PIR
+ * MCSRR0/1
+ * DECAR
+ * IVPR
+ */
+#define KVM_SREGS_E_ARCH206		(1 << 1)
+
+/*
+ * Contains EPCR, plus the upper half of 64-bit registers
+ * that are 32-bit on 32-bit implementations.
+ */
+#define KVM_SREGS_E_64			(1 << 2)
+
+#define KVM_SREGS_E_SPRG8		(1 << 3)
+#define KVM_SREGS_E_MCIVPR		(1 << 4)
+
+/*
+ * IVORs are used -- contains IVOR0-15, plus additional IVORs
+ * in combination with an appropriate feature bit.
+ */
+#define KVM_SREGS_E_IVOR		(1 << 5)
+
+/*
+ * Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG.
+ * Also TLBnPS if MMUCFG[MAVN] = 1.
+ */
+#define KVM_SREGS_E_ARCH206_MMU		(1 << 6)
+
+/* DBSR, DBCR, IAC, DAC, DVC */
+#define KVM_SREGS_E_DEBUG		(1 << 7)
+
+/* Enhanced debug -- DSRR0/1, SPRG9 */
+#define KVM_SREGS_E_ED			(1 << 8)
+
+/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_SPE			(1 << 9)
+
+/*
+ * DEPRECATED! USE ONE_REG FOR THIS ONE!
+ * External Proxy (EXP) -- EPR
+ */
+#define KVM_SREGS_EXP			(1 << 10)
+
+/* External PID (E.PD) -- EPSC/EPLC */
+#define KVM_SREGS_E_PD			(1 << 11)
+
+/* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_PC			(1 << 12)
+
+/* Page table (E.PT) -- EPTCFG */
+#define KVM_SREGS_E_PT			(1 << 13)
+
+/* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_PM			(1 << 14)
+
+/*
+ * Special updates:
+ *
+ * Some registers may change even while a vcpu is not running.
+ * To avoid losing these changes, by default these registers are
+ * not updated by KVM_SET_SREGS.  To force an update, set the bit
+ * in u.e.update_special corresponding to the register to be updated.
+ *
+ * The update_special field is zero on return from KVM_GET_SREGS.
+ *
+ * When restoring a checkpoint, the caller can set update_special
+ * to 0xffffffff to ensure that everything is restored, even new features
+ * that the caller doesn't know about.
+ */
+#define KVM_SREGS_E_UPDATE_MCSR		(1 << 0)
+#define KVM_SREGS_E_UPDATE_TSR		(1 << 1)
+#define KVM_SREGS_E_UPDATE_DEC		(1 << 2)
+#define KVM_SREGS_E_UPDATE_DBSR		(1 << 3)
+
+/*
+ * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
+ * previous KVM_GET_REGS.
+ *
+ * Unless otherwise indicated, setting any register with KVM_SET_SREGS
+ * directly sets its value.  It does not trigger any special semantics such
+ * as write-one-to-clear.  Calling KVM_SET_SREGS on an unmodified struct
+ * just received from KVM_GET_SREGS is always a no-op.
+ */
+struct kvm_sregs {
+	__u32 pvr;
+	union {
+		struct {
+			__u64 sdr1;
+			struct {
+				struct {
+					__u64 slbe;
+					__u64 slbv;
+				} slb[64];
+			} ppc64;
+			struct {
+				__u32 sr[16];
+				__u64 ibat[8];
+				__u64 dbat[8];
+			} ppc32;
+		} s;
+		struct {
+			union {
+				struct { /* KVM_SREGS_E_IMPL_FSL */
+					__u32 features; /* KVM_SREGS_E_FSL_ */
+					__u32 svr;
+					__u64 mcar;
+					__u32 hid0;
+
+					/* KVM_SREGS_E_FSL_PIDn */
+					__u32 pid1, pid2;
+				} fsl;
+				__u8 pad[256];
+			} impl;
+
+			__u32 features; /* KVM_SREGS_E_ */
+			__u32 impl_id;	/* KVM_SREGS_E_IMPL_ */
+			__u32 update_special; /* KVM_SREGS_E_UPDATE_ */
+			__u32 pir;	/* read-only */
+			__u64 sprg8;
+			__u64 sprg9;	/* E.ED */
+			__u64 csrr0;
+			__u64 dsrr0;	/* E.ED */
+			__u64 mcsrr0;
+			__u32 csrr1;
+			__u32 dsrr1;	/* E.ED */
+			__u32 mcsrr1;
+			__u32 esr;
+			__u64 dear;
+			__u64 ivpr;
+			__u64 mcivpr;
+			__u64 mcsr;	/* KVM_SREGS_E_UPDATE_MCSR */
+
+			__u32 tsr;	/* KVM_SREGS_E_UPDATE_TSR */
+			__u32 tcr;
+			__u32 decar;
+			__u32 dec;	/* KVM_SREGS_E_UPDATE_DEC */
+
+			/*
+			 * Userspace can read TB directly, but the
+			 * value reported here is consistent with "dec".
+			 *
+			 * Read-only.
+			 */
+			__u64 tb;
+
+			__u32 dbsr;	/* KVM_SREGS_E_UPDATE_DBSR */
+			__u32 dbcr[3];
+			/*
+			 * iac/dac registers are 64bit wide, while this API
+			 * interface provides only lower 32 bits on 64 bit
+			 * processors. ONE_REG interface is added for 64bit
+			 * iac/dac registers.
+			 */
+			__u32 iac[4];
+			__u32 dac[2];
+			__u32 dvc[2];
+			__u8 num_iac;	/* read-only */
+			__u8 num_dac;	/* read-only */
+			__u8 num_dvc;	/* read-only */
+			__u8 pad;
+
+			__u32 epr;	/* EXP */
+			__u32 vrsave;	/* a.k.a. USPRG0 */
+			__u32 epcr;	/* KVM_SREGS_E_64 */
+
+			__u32 mas0;
+			__u32 mas1;
+			__u64 mas2;
+			__u64 mas7_3;
+			__u32 mas4;
+			__u32 mas6;
+
+			__u32 ivor_low[16]; /* IVOR0-15 */
+			__u32 ivor_high[18]; /* IVOR32+, plus room to expand */
+
+			__u32 mmucfg;	/* read-only */
+			__u32 eptcfg;	/* E.PT, read-only */
+			__u32 tlbcfg[4];/* read-only */
+			__u32 tlbps[4]; /* read-only */
+
+			__u32 eplc, epsc; /* E.PD */
+		} e;
+		__u8 pad[1020];
+	} u;
+};
+
+struct kvm_fpu {
+	__u64 fpr[32];
+};
+
+/*
+ * Defines for h/w breakpoint, watchpoint (read, write or both) and
+ * software breakpoint.
+ * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
+ * for KVM_DEBUG_EXIT.
+ */
+#define KVMPPC_DEBUG_NONE		0x0
+#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
+#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
+#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
+struct kvm_debug_exit_arch {
+	__u64 address;
+	/*
+	 * exiting to userspace because of h/w breakpoint, watchpoint
+	 * (read, write or both) and software breakpoint.
+	 */
+	__u32 status;
+	__u32 reserved;
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+	struct {
+		/* H/W breakpoint/watchpoint address */
+		__u64 addr;
+		/*
+		 * Type denotes h/w breakpoint, read watchpoint, write
+		 * watchpoint or watchpoint (both read and write).
+		 */
+		__u32 type;
+		__u32 reserved;
+	} bp[16];
+};
+
+/* Debug related defines */
+/*
+ * kvm_guest_debug->control is a 32 bit field. The lower 16 bits are generic
+ * and upper 16 bits are architecture specific. Architecture specific defines
+ * that ioctl is for setting hardware breakpoint or software breakpoint.
+ */
+#define KVM_GUESTDBG_USE_SW_BP		0x00010000
+#define KVM_GUESTDBG_USE_HW_BP		0x00020000
+
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+};
+
+#define KVM_INTERRUPT_SET	-1U
+#define KVM_INTERRUPT_UNSET	-2U
+#define KVM_INTERRUPT_SET_LEVEL	-3U
+
+#define KVM_CPU_440		1
+#define KVM_CPU_E500V2		2
+#define KVM_CPU_3S_32		3
+#define KVM_CPU_3S_64		4
+#define KVM_CPU_E500MC		5
+
+/* for KVM_CAP_SPAPR_TCE */
+struct kvm_create_spapr_tce {
+	__u64 liobn;
+	__u32 window_size;
+};
+
+/* for KVM_CAP_SPAPR_TCE_64 */
+struct kvm_create_spapr_tce_64 {
+	__u64 liobn;
+	__u32 page_shift;
+	__u32 flags;
+	__u64 offset;	/* in pages */
+	__u64 size;	/* in pages */
+};
+
+/* for KVM_ALLOCATE_RMA */
+struct kvm_allocate_rma {
+	__u64 rma_size;
+};
+
+/* for KVM_CAP_PPC_RTAS */
+struct kvm_rtas_token_args {
+	char name[120];
+	__u64 token;	/* Use a token of 0 to undefine a mapping */
+};
+
+struct kvm_book3e_206_tlb_entry {
+	__u32 mas8;
+	__u32 mas1;
+	__u64 mas2;
+	__u64 mas7_3;
+};
+
+struct kvm_book3e_206_tlb_params {
+	/*
+	 * For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
+	 *
+	 * - The number of ways of TLB0 must be a power of two between 2 and
+	 *   16.
+	 * - TLB1 must be fully associative.
+	 * - The size of TLB0 must be a multiple of the number of ways, and
+	 *   the number of sets must be a power of two.
+	 * - The size of TLB1 may not exceed 64 entries.
+	 * - TLB0 supports 4 KiB pages.
+	 * - The page sizes supported by TLB1 are as indicated by
+	 *   TLB1CFG (if MMUCFG[MAVN] = 0) or TLB1PS (if MMUCFG[MAVN] = 1)
+	 *   as returned by KVM_GET_SREGS.
+	 * - TLB2 and TLB3 are reserved, and their entries in tlb_sizes[]
+	 *   and tlb_ways[] must be zero.
+	 *
+	 * tlb_ways[n] = tlb_sizes[n] means the array is fully associative.
+	 *
+	 * KVM will adjust TLBnCFG based on the sizes configured here,
+	 * though arrays greater than 2048 entries will have TLBnCFG[NENTRY]
+	 * set to zero.
+	 */
+	__u32 tlb_sizes[4];
+	__u32 tlb_ways[4];
+	__u32 reserved[8];
+};
+
+/* For KVM_PPC_GET_HTAB_FD */
+struct kvm_get_htab_fd {
+	__u64	flags;
+	__u64	start_index;
+	__u64	reserved[2];
+};
+
+/* Values for kvm_get_htab_fd.flags */
+#define KVM_GET_HTAB_BOLTED_ONLY	((__u64)0x1)
+#define KVM_GET_HTAB_WRITE		((__u64)0x2)
+
+/*
+ * Data read on the file descriptor is formatted as a series of
+ * records, each consisting of a header followed by a series of
+ * `n_valid' HPTEs (16 bytes each), which are all valid.  Following
+ * those valid HPTEs there are `n_invalid' invalid HPTEs, which
+ * are not represented explicitly in the stream.  The same format
+ * is used for writing.
+ */
+struct kvm_get_htab_header {
+	__u32	index;
+	__u16	n_valid;
+	__u16	n_invalid;
+};
+
+/* For KVM_PPC_CONFIGURE_V3_MMU */
+struct kvm_ppc_mmuv3_cfg {
+	__u64	flags;
+	__u64	process_table;	/* second doubleword of partition table entry */
+};
+
+/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */
+#define KVM_PPC_MMUV3_RADIX	1	/* 1 = radix mode, 0 = HPT */
+#define KVM_PPC_MMUV3_GTSE	2	/* global translation shootdown enb. */
+
+/* For KVM_PPC_GET_RMMU_INFO */
+struct kvm_ppc_rmmu_info {
+	struct kvm_ppc_radix_geom {
+		__u8	page_shift;
+		__u8	level_bits[4];
+		__u8	pad[3];
+	}	geometries[8];
+	__u32	ap_encodings[8];
+};
+
+/* For KVM_PPC_GET_CPU_CHAR */
+struct kvm_ppc_cpu_char {
+	__u64	character;		/* characteristics of the CPU */
+	__u64	behaviour;		/* recommended software behaviour */
+	__u64	character_mask;		/* valid bits in character */
+	__u64	behaviour_mask;		/* valid bits in behaviour */
+};
+
+/*
+ * Values for character and character_mask.
+ * These are identical to the values used by H_GET_CPU_CHARACTERISTICS.
+ */
+#define KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31		(1ULL << 63)
+#define KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED	(1ULL << 62)
+#define KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30	(1ULL << 61)
+#define KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2	(1ULL << 60)
+#define KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV	(1ULL << 59)
+#define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED	(1ULL << 58)
+#define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF	(1ULL << 57)
+#define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS	(1ULL << 56)
+#define KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST	(1ull << 54)
+
+#define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY	(1ULL << 63)
+#define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR		(1ULL << 62)
+#define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR	(1ULL << 61)
+#define KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE	(1ull << 58)
+
+/* Per-vcpu XICS interrupt controller state */
+#define KVM_REG_PPC_ICP_STATE	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
+
+#define  KVM_REG_PPC_ICP_CPPR_SHIFT	56	/* current proc priority */
+#define  KVM_REG_PPC_ICP_CPPR_MASK	0xff
+#define  KVM_REG_PPC_ICP_XISR_SHIFT	32	/* interrupt status field */
+#define  KVM_REG_PPC_ICP_XISR_MASK	0xffffff
+#define  KVM_REG_PPC_ICP_MFRR_SHIFT	24	/* pending IPI priority */
+#define  KVM_REG_PPC_ICP_MFRR_MASK	0xff
+#define  KVM_REG_PPC_ICP_PPRI_SHIFT	16	/* pending irq priority */
+#define  KVM_REG_PPC_ICP_PPRI_MASK	0xff
+
+#define KVM_REG_PPC_VP_STATE	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x8d)
+
+/* Device control API: PPC-specific devices */
+#define KVM_DEV_MPIC_GRP_MISC		1
+#define   KVM_DEV_MPIC_BASE_ADDR	0	/* 64-bit */
+
+#define KVM_DEV_MPIC_GRP_REGISTER	2	/* 32-bit */
+#define KVM_DEV_MPIC_GRP_IRQ_ACTIVE	3	/* 32-bit */
+
+/* One-Reg API: PPC-specific registers */
+#define KVM_REG_PPC_HIOR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
+#define KVM_REG_PPC_IAC1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
+#define KVM_REG_PPC_IAC2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
+#define KVM_REG_PPC_IAC3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x4)
+#define KVM_REG_PPC_IAC4	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x5)
+#define KVM_REG_PPC_DAC1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x6)
+#define KVM_REG_PPC_DAC2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x7)
+#define KVM_REG_PPC_DABR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8)
+#define KVM_REG_PPC_DSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9)
+#define KVM_REG_PPC_PURR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa)
+#define KVM_REG_PPC_SPURR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb)
+#define KVM_REG_PPC_DAR		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc)
+#define KVM_REG_PPC_DSISR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd)
+#define KVM_REG_PPC_AMR		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xe)
+#define KVM_REG_PPC_UAMOR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xf)
+
+#define KVM_REG_PPC_MMCR0	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x10)
+#define KVM_REG_PPC_MMCR1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x11)
+#define KVM_REG_PPC_MMCRA	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x12)
+#define KVM_REG_PPC_MMCR2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x13)
+#define KVM_REG_PPC_MMCRS	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x14)
+#define KVM_REG_PPC_SIAR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x15)
+#define KVM_REG_PPC_SDAR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x16)
+#define KVM_REG_PPC_SIER	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x17)
+
+#define KVM_REG_PPC_PMC1	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x18)
+#define KVM_REG_PPC_PMC2	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x19)
+#define KVM_REG_PPC_PMC3	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1a)
+#define KVM_REG_PPC_PMC4	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1b)
+#define KVM_REG_PPC_PMC5	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1c)
+#define KVM_REG_PPC_PMC6	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1d)
+#define KVM_REG_PPC_PMC7	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1e)
+#define KVM_REG_PPC_PMC8	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1f)
+
+/* 32 floating-point registers */
+#define KVM_REG_PPC_FPR0	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x20)
+#define KVM_REG_PPC_FPR(n)	(KVM_REG_PPC_FPR0 + (n))
+#define KVM_REG_PPC_FPR31	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3f)
+
+/* 32 VMX/Altivec vector registers */
+#define KVM_REG_PPC_VR0		(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x40)
+#define KVM_REG_PPC_VR(n)	(KVM_REG_PPC_VR0 + (n))
+#define KVM_REG_PPC_VR31	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x5f)
+
+/* 32 double-width FP registers for VSX */
+/* High-order halves overlap with FP regs */
+#define KVM_REG_PPC_VSR0	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x60)
+#define KVM_REG_PPC_VSR(n)	(KVM_REG_PPC_VSR0 + (n))
+#define KVM_REG_PPC_VSR31	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x7f)
+
+/* FP and vector status/control registers */
+#define KVM_REG_PPC_FPSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80)
+/*
+ * VSCR register is documented as a 32-bit register in the ISA, but it can
+ * only be accesses via a vector register. Expose VSCR as a 32-bit register
+ * even though the kernel represents it as a 128-bit vector.
+ */
+#define KVM_REG_PPC_VSCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81)
+
+/* Virtual processor areas */
+/* For SLB & DTL, address in high (first) half, length in low half */
+#define KVM_REG_PPC_VPA_ADDR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x82)
+#define KVM_REG_PPC_VPA_SLB	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x83)
+#define KVM_REG_PPC_VPA_DTL	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84)
+
+#define KVM_REG_PPC_EPCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
+#define KVM_REG_PPC_EPR		(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86)
+
+/* Timer Status Register OR/CLEAR interface */
+#define KVM_REG_PPC_OR_TSR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x87)
+#define KVM_REG_PPC_CLEAR_TSR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x88)
+#define KVM_REG_PPC_TCR		(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x89)
+#define KVM_REG_PPC_TSR		(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8a)
+
+/* Debugging: Special instruction for software breakpoint */
+#define KVM_REG_PPC_DEBUG_INST	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b)
+
+/* MMU registers */
+#define KVM_REG_PPC_MAS0	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8c)
+#define KVM_REG_PPC_MAS1	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8d)
+#define KVM_REG_PPC_MAS2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8e)
+#define KVM_REG_PPC_MAS7_3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8f)
+#define KVM_REG_PPC_MAS4	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x90)
+#define KVM_REG_PPC_MAS6	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x91)
+#define KVM_REG_PPC_MMUCFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x92)
+/*
+ * TLBnCFG fields TLBnCFG_N_ENTRY and TLBnCFG_ASSOC can be changed only using
+ * KVM_CAP_SW_TLB ioctl
+ */
+#define KVM_REG_PPC_TLB0CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x93)
+#define KVM_REG_PPC_TLB1CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94)
+#define KVM_REG_PPC_TLB2CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95)
+#define KVM_REG_PPC_TLB3CFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96)
+#define KVM_REG_PPC_TLB0PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x97)
+#define KVM_REG_PPC_TLB1PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98)
+#define KVM_REG_PPC_TLB2PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99)
+#define KVM_REG_PPC_TLB3PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
+#define KVM_REG_PPC_EPTCFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)
+
+/* Timebase offset */
+#define KVM_REG_PPC_TB_OFFSET	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9c)
+
+/* POWER8 registers */
+#define KVM_REG_PPC_SPMC1	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9d)
+#define KVM_REG_PPC_SPMC2	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9e)
+#define KVM_REG_PPC_IAMR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9f)
+#define KVM_REG_PPC_TFHAR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa0)
+#define KVM_REG_PPC_TFIAR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa1)
+#define KVM_REG_PPC_TEXASR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa2)
+#define KVM_REG_PPC_FSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa3)
+#define KVM_REG_PPC_PSPB	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xa4)
+#define KVM_REG_PPC_EBBHR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa5)
+#define KVM_REG_PPC_EBBRR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa6)
+#define KVM_REG_PPC_BESCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa7)
+#define KVM_REG_PPC_TAR		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa8)
+#define KVM_REG_PPC_DPDES	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa9)
+#define KVM_REG_PPC_DAWR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaa)
+#define KVM_REG_PPC_DAWRX	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xab)
+#define KVM_REG_PPC_CIABR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xac)
+#define KVM_REG_PPC_IC		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xad)
+#define KVM_REG_PPC_VTB		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xae)
+#define KVM_REG_PPC_CSIGR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaf)
+#define KVM_REG_PPC_TACR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb0)
+#define KVM_REG_PPC_TCSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb1)
+#define KVM_REG_PPC_PID		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2)
+#define KVM_REG_PPC_ACOP	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3)
+
+#define KVM_REG_PPC_VRSAVE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4)
+#define KVM_REG_PPC_LPCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5)
+#define KVM_REG_PPC_LPCR_64	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb5)
+#define KVM_REG_PPC_PPR		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb6)
+
+/* Architecture compatibility level */
+#define KVM_REG_PPC_ARCH_COMPAT	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb7)
+
+#define KVM_REG_PPC_DABRX	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8)
+#define KVM_REG_PPC_WORT	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9)
+#define KVM_REG_PPC_SPRG9	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
+#define KVM_REG_PPC_DBSR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb)
+
+/* POWER9 registers */
+#define KVM_REG_PPC_TIDR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
+#define KVM_REG_PPC_PSSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
+
+#define KVM_REG_PPC_DEC_EXPIRY	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
+#define KVM_REG_PPC_ONLINE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
+#define KVM_REG_PPC_PTCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
+
+/* POWER10 registers */
+#define KVM_REG_PPC_MMCR3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1)
+#define KVM_REG_PPC_SIER2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2)
+#define KVM_REG_PPC_SIER3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
+#define KVM_REG_PPC_DAWR1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4)
+#define KVM_REG_PPC_DAWRX1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5)
+
+/* Transactional Memory checkpointed state:
+ * This is all GPRs, all VSX regs and a subset of SPRs
+ */
+#define KVM_REG_PPC_TM		(KVM_REG_PPC | 0x80000000)
+/* TM GPRs */
+#define KVM_REG_PPC_TM_GPR0	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0)
+#define KVM_REG_PPC_TM_GPR(n)	(KVM_REG_PPC_TM_GPR0 + (n))
+#define KVM_REG_PPC_TM_GPR31	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x1f)
+/* TM VSX */
+#define KVM_REG_PPC_TM_VSR0	(KVM_REG_PPC_TM | KVM_REG_SIZE_U128 | 0x20)
+#define KVM_REG_PPC_TM_VSR(n)	(KVM_REG_PPC_TM_VSR0 + (n))
+#define KVM_REG_PPC_TM_VSR63	(KVM_REG_PPC_TM | KVM_REG_SIZE_U128 | 0x5f)
+/* TM SPRS */
+#define KVM_REG_PPC_TM_CR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x60)
+#define KVM_REG_PPC_TM_LR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x61)
+#define KVM_REG_PPC_TM_CTR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x62)
+#define KVM_REG_PPC_TM_FPSCR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x63)
+#define KVM_REG_PPC_TM_AMR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x64)
+#define KVM_REG_PPC_TM_PPR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x65)
+#define KVM_REG_PPC_TM_VRSAVE	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x66)
+#define KVM_REG_PPC_TM_VSCR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67)
+#define KVM_REG_PPC_TM_DSCR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68)
+#define KVM_REG_PPC_TM_TAR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69)
+#define KVM_REG_PPC_TM_XER	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a)
+
+/* PPC64 eXternal Interrupt Controller Specification */
+#define KVM_DEV_XICS_GRP_SOURCES	1	/* 64-bit source attributes */
+#define KVM_DEV_XICS_GRP_CTRL		2
+#define   KVM_DEV_XICS_NR_SERVERS	1
+
+/* Layout of 64-bit source attribute values */
+#define  KVM_XICS_DESTINATION_SHIFT	0
+#define  KVM_XICS_DESTINATION_MASK	0xffffffffULL
+#define  KVM_XICS_PRIORITY_SHIFT	32
+#define  KVM_XICS_PRIORITY_MASK		0xff
+#define  KVM_XICS_LEVEL_SENSITIVE	(1ULL << 40)
+#define  KVM_XICS_MASKED		(1ULL << 41)
+#define  KVM_XICS_PENDING		(1ULL << 42)
+#define  KVM_XICS_PRESENTED		(1ULL << 43)
+#define  KVM_XICS_QUEUED		(1ULL << 44)
+
+/* POWER9 XIVE Native Interrupt Controller */
+#define KVM_DEV_XIVE_GRP_CTRL		1
+#define   KVM_DEV_XIVE_RESET		1
+#define   KVM_DEV_XIVE_EQ_SYNC		2
+#define   KVM_DEV_XIVE_NR_SERVERS	3
+#define KVM_DEV_XIVE_GRP_SOURCE		2	/* 64-bit source identifier */
+#define KVM_DEV_XIVE_GRP_SOURCE_CONFIG	3	/* 64-bit source identifier */
+#define KVM_DEV_XIVE_GRP_EQ_CONFIG	4	/* 64-bit EQ identifier */
+#define KVM_DEV_XIVE_GRP_SOURCE_SYNC	5       /* 64-bit source identifier */
+
+/* Layout of 64-bit XIVE source attribute values */
+#define KVM_XIVE_LEVEL_SENSITIVE	(1ULL << 0)
+#define KVM_XIVE_LEVEL_ASSERTED		(1ULL << 1)
+
+/* Layout of 64-bit XIVE source configuration attribute values */
+#define KVM_XIVE_SOURCE_PRIORITY_SHIFT	0
+#define KVM_XIVE_SOURCE_PRIORITY_MASK	0x7
+#define KVM_XIVE_SOURCE_SERVER_SHIFT	3
+#define KVM_XIVE_SOURCE_SERVER_MASK	0xfffffff8ULL
+#define KVM_XIVE_SOURCE_MASKED_SHIFT	32
+#define KVM_XIVE_SOURCE_MASKED_MASK	0x100000000ULL
+#define KVM_XIVE_SOURCE_EISN_SHIFT	33
+#define KVM_XIVE_SOURCE_EISN_MASK	0xfffffffe00000000ULL
+
+/* Layout of 64-bit EQ identifier */
+#define KVM_XIVE_EQ_PRIORITY_SHIFT	0
+#define KVM_XIVE_EQ_PRIORITY_MASK	0x7
+#define KVM_XIVE_EQ_SERVER_SHIFT	3
+#define KVM_XIVE_EQ_SERVER_MASK		0xfffffff8ULL
+
+/* Layout of EQ configuration values (64 bytes) */
+struct kvm_ppc_xive_eq {
+	__u32 flags;
+	__u32 qshift;
+	__u64 qaddr;
+	__u32 qtoggle;
+	__u32 qindex;
+	__u8  pad[40];
+};
+
+#define KVM_XIVE_EQ_ALWAYS_NOTIFY	0x00000001
+
+#define KVM_XIVE_TIMA_PAGE_OFFSET	0
+#define KVM_XIVE_ESB_PAGE_OFFSET	4
+
+#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/uapi/asm/kvm_para.h b/arch/powerpc/include/uapi/asm/kvm_para.h
new file mode 100644
index 0000000000..a809b1b44d
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm_para.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef _UAPI__POWERPC_KVM_PARA_H__
+#define _UAPI__POWERPC_KVM_PARA_H__
+
+#include <linux/types.h>
+
+/*
+ * Additions to this struct must only occur at the end, and should be
+ * accompanied by a KVM_MAGIC_FEAT flag to advertise that they are present
+ * (albeit not necessarily relevant to the current target hardware platform).
+ *
+ * Struct fields are always 32 or 64 bit aligned, depending on them being 32
+ * or 64 bit wide respectively.
+ *
+ * See Documentation/virt/kvm/ppc-pv.rst
+ */
+struct kvm_vcpu_arch_shared {
+	__u64 scratch1;
+	__u64 scratch2;
+	__u64 scratch3;
+	__u64 critical;		/* Guest may not get interrupts if == r1 */
+	__u64 sprg0;
+	__u64 sprg1;
+	__u64 sprg2;
+	__u64 sprg3;
+	__u64 srr0;
+	__u64 srr1;
+	__u64 dar;		/* dear on BookE */
+	__u64 msr;
+	__u32 dsisr;
+	__u32 int_pending;	/* Tells the guest if we have an interrupt */
+	__u32 sr[16];
+	__u32 mas0;
+	__u32 mas1;
+	__u64 mas7_3;
+	__u64 mas2;
+	__u32 mas4;
+	__u32 mas6;
+	__u32 esr;
+	__u32 pir;
+
+	/*
+	 * SPRG4-7 are user-readable, so we can only keep these consistent
+	 * between the shared area and the real registers when there's an
+	 * intervening exit to KVM.  This also applies to SPRG3 on some
+	 * chips.
+	 *
+	 * This suffices for access by guest userspace, since in PR-mode
+	 * KVM, an exit must occur when changing the guest's MSR[PR].
+	 * If the guest kernel writes to SPRG3-7 via the shared area, it
+	 * must also use the shared area for reading while in kernel space.
+	 */
+	__u64 sprg4;
+	__u64 sprg5;
+	__u64 sprg6;
+	__u64 sprg7;
+};
+
+#define KVM_SC_MAGIC_R0		0x4b564d21 /* "KVM!" */
+
+#define KVM_HCALL_TOKEN(num)     _EV_HCALL_TOKEN(EV_KVM_VENDOR_ID, num)
+
+#include <asm/epapr_hcalls.h>
+
+#define KVM_FEATURE_MAGIC_PAGE	1
+
+/* Magic page flags from host to guest */
+
+#define KVM_MAGIC_FEAT_SR		(1 << 0)
+
+/* MASn, ESR, PIR, and high SPRGs */
+#define KVM_MAGIC_FEAT_MAS0_TO_SPRG7	(1 << 1)
+
+/* Magic page flags from guest to host */
+
+#define MAGIC_PAGE_FLAG_NOT_MAPPED_NX	(1 << 0)
+
+
+#endif /* _UAPI__POWERPC_KVM_PARA_H__ */
diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h
new file mode 100644
index 0000000000..c0c737215b
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/mman.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _UAPI_ASM_POWERPC_MMAN_H
+#define _UAPI_ASM_POWERPC_MMAN_H
+
+#include <asm-generic/mman-common.h>
+
+
+#define PROT_SAO	0x10		/* Strong Access Ordering */
+
+#define MAP_RENAME      MAP_ANONYMOUS   /* In SunOS terminology */
+#define MAP_NORESERVE   0x40            /* don't reserve swap pages */
+#define MAP_LOCKED	0x80
+
+#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
+#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
+#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
+
+
+#define MCL_CURRENT     0x2000          /* lock all currently mapped pages */
+#define MCL_FUTURE      0x4000          /* lock all additions to address space */
+#define MCL_ONFAULT	0x8000		/* lock all pages that are faulted in */
+
+/* Override any generic PKEY permission defines */
+#define PKEY_DISABLE_EXECUTE   0x4
+#undef PKEY_ACCESS_MASK
+#define PKEY_ACCESS_MASK       (PKEY_DISABLE_ACCESS |\
+				PKEY_DISABLE_WRITE  |\
+				PKEY_DISABLE_EXECUTE)
+#endif /* _UAPI_ASM_POWERPC_MMAN_H */
diff --git a/arch/powerpc/include/uapi/asm/msgbuf.h b/arch/powerpc/include/uapi/asm/msgbuf.h
new file mode 100644
index 0000000000..7919b2ba41
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/msgbuf.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_POWERPC_MSGBUF_H
+#define _ASM_POWERPC_MSGBUF_H
+
+#include <asm/ipcbuf.h>
+
+/*
+ * The msqid64_ds structure for the PowerPC architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ */
+
+struct msqid64_ds {
+	struct ipc64_perm msg_perm;
+#ifdef __powerpc64__
+	long		 msg_stime;	/* last msgsnd time */
+	long		 msg_rtime;	/* last msgrcv time */
+	long		 msg_ctime;	/* last change time */
+#else
+	unsigned long  msg_stime_high;
+	unsigned long  msg_stime;	/* last msgsnd time */
+	unsigned long  msg_rtime_high;
+	unsigned long  msg_rtime;	/* last msgrcv time */
+	unsigned long  msg_ctime_high;
+	unsigned long  msg_ctime;	/* last change time */
+#endif
+	unsigned long  msg_cbytes;	/* current number of bytes on queue */
+	unsigned long  msg_qnum;	/* number of messages in queue */
+	unsigned long  msg_qbytes;	/* max number of bytes on queue */
+	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
+	__kernel_pid_t msg_lrpid;	/* last receive pid */
+	unsigned long  __unused4;
+	unsigned long  __unused5;
+};
+
+#endif	/* _ASM_POWERPC_MSGBUF_H */
diff --git a/arch/powerpc/include/uapi/asm/nvram.h b/arch/powerpc/include/uapi/asm/nvram.h
new file mode 100644
index 0000000000..c92c7f056a
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/nvram.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * NVRAM definitions and access functions.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_ASM_POWERPC_NVRAM_H
+#define _UAPI_ASM_POWERPC_NVRAM_H
+
+/* Signatures for nvram partitions */
+#define NVRAM_SIG_SP	0x02	/* support processor */
+#define NVRAM_SIG_OF	0x50	/* open firmware config */
+#define NVRAM_SIG_FW	0x51	/* general firmware */
+#define NVRAM_SIG_HW	0x52	/* hardware (VPD) */
+#define NVRAM_SIG_FLIP	0x5a	/* Apple flip/flop header */
+#define NVRAM_SIG_APPL	0x5f	/* Apple "system" (???) */
+#define NVRAM_SIG_SYS	0x70	/* system env vars */
+#define NVRAM_SIG_CFG	0x71	/* config data */
+#define NVRAM_SIG_ELOG	0x72	/* error log */
+#define NVRAM_SIG_VEND	0x7e	/* vendor defined */
+#define NVRAM_SIG_FREE	0x7f	/* Free space */
+#define NVRAM_SIG_OS	0xa0	/* OS defined */
+#define NVRAM_SIG_PANIC	0xa1	/* Apple OSX "panic" */
+
+
+/* PowerMac specific nvram stuffs */
+
+enum {
+	pmac_nvram_OF,		/* Open Firmware partition */
+	pmac_nvram_XPRAM,	/* MacOS XPRAM partition */
+	pmac_nvram_NR		/* MacOS Name Registry partition */
+};
+
+
+/* Some offsets in XPRAM */
+#define PMAC_XPRAM_MACHINE_LOC	0xe4
+#define PMAC_XPRAM_SOUND_VOLUME	0x08
+
+/* Machine location structure in PowerMac XPRAM */
+struct pmac_machine_location {
+	unsigned int	latitude;	/* 2+30 bit Fractional number */
+	unsigned int	longitude;	/* 2+30 bit Fractional number */
+	unsigned int	delta;		/* mix of GMT delta and DLS */
+};
+
+/*
+ * /dev/nvram ioctls
+ *
+ * Note that PMAC_NVRAM_GET_OFFSET is still supported, but is
+ * definitely obsolete. Do not use it if you can avoid it
+ */
+
+#define OBSOLETE_PMAC_NVRAM_GET_OFFSET \
+				_IOWR('p', 0x40, int)
+
+#define IOC_NVRAM_GET_OFFSET	_IOWR('p', 0x42, int)	/* Get NVRAM partition offset */
+#define IOC_NVRAM_SYNC		_IO('p', 0x43)		/* Sync NVRAM image */
+
+#endif /* _UAPI_ASM_POWERPC_NVRAM_H */
diff --git a/arch/powerpc/include/uapi/asm/opal-prd.h b/arch/powerpc/include/uapi/asm/opal-prd.h
new file mode 100644
index 0000000000..1869cf83a8
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/opal-prd.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * OPAL Runtime Diagnostics interface driver
+ * Supported on POWERNV platform
+ *
+ * (C) Copyright IBM 2015
+ *
+ * Author: Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _UAPI_ASM_POWERPC_OPAL_PRD_H_
+#define _UAPI_ASM_POWERPC_OPAL_PRD_H_
+
+#include <linux/types.h>
+
+/**
+ * The version of the kernel interface of the PRD system. This describes the
+ * interface available for the /dev/opal-prd device. The actual PRD message
+ * layout and content is private to the firmware <--> userspace interface, so
+ * is not covered by this versioning.
+ *
+ * Future interface versions are backwards-compatible; if a later kernel
+ * version is encountered, functionality provided in earlier versions
+ * will work.
+ */
+#define OPAL_PRD_KERNEL_VERSION		1
+
+#define OPAL_PRD_GET_INFO		_IOR('o', 0x01, struct opal_prd_info)
+#define OPAL_PRD_SCOM_READ		_IOR('o', 0x02, struct opal_prd_scom)
+#define OPAL_PRD_SCOM_WRITE		_IOW('o', 0x03, struct opal_prd_scom)
+
+#ifndef __ASSEMBLY__
+
+struct opal_prd_info {
+	__u64	version;
+	__u64	reserved[3];
+};
+
+struct opal_prd_scom {
+	__u64	chip;
+	__u64	addr;
+	__u64	data;
+	__s64	rc;
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_ASM_POWERPC_OPAL_PRD_H */
diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h
new file mode 100644
index 0000000000..1743992504
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl
+ *
+ * (C) Copyright IBM 2020
+ *
+ * Author: Vaibhav Jain <vaibhav at linux.ibm.com>
+ */
+
+#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_
+#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_
+
+#include <linux/types.h>
+#include <linux/ndctl.h>
+
+/*
+ * PDSM Envelope:
+ *
+ * The ioctl ND_CMD_CALL exchange data between user-space and kernel via
+ * envelope which consists of 2 headers sections and payload sections as
+ * illustrated below:
+ *  +-----------------+---------------+---------------------------+
+ *  |   64-Bytes      |   8-Bytes     |       Max 184-Bytes       |
+ *  +-----------------+---------------+---------------------------+
+ *  | ND-HEADER       |  PDSM-HEADER  |      PDSM-PAYLOAD         |
+ *  +-----------------+---------------+---------------------------+
+ *  | nd_family       |               |                           |
+ *  | nd_size_out     | cmd_status    |                           |
+ *  | nd_size_in      | reserved      |     nd_pdsm_payload       |
+ *  | nd_command      | payload   --> |                           |
+ *  | nd_fw_size      |               |                           |
+ *  | nd_payload ---> |               |                           |
+ *  +---------------+-----------------+---------------------------+
+ *
+ * ND Header:
+ * This is the generic libnvdimm header described as 'struct nd_cmd_pkg'
+ * which is interpreted by libnvdimm before passed on to papr_scm. Important
+ * member fields used are:
+ * 'nd_family'		: (In) NVDIMM_FAMILY_PAPR_SCM
+ * 'nd_size_in'		: (In) PDSM-HEADER + PDSM-IN-PAYLOAD (usually 0)
+ * 'nd_size_out'        : (In) PDSM-HEADER + PDSM-RETURN-PAYLOAD
+ * 'nd_command'         : (In) One of PAPR_PDSM_XXX
+ * 'nd_fw_size'         : (Out) PDSM-HEADER + size of actual payload returned
+ *
+ * PDSM Header:
+ * This is papr-scm specific header that precedes the payload. This is defined
+ * as nd_cmd_pdsm_pkg.  Following fields aare available in this header:
+ *
+ * 'cmd_status'		: (Out) Errors if any encountered while servicing PDSM.
+ * 'reserved'		: Not used, reserved for future and should be set to 0.
+ * 'payload'            : A union of all the possible payload structs
+ *
+ * PDSM Payload:
+ *
+ * The layout of the PDSM Payload is defined by various structs shared between
+ * papr_scm and libndctl so that contents of payload can be interpreted. As such
+ * its defined as a union of all possible payload structs as
+ * 'union nd_pdsm_payload'. Based on the value of 'nd_cmd_pkg.nd_command'
+ * appropriate member of the union is accessed.
+ */
+
+/* Max payload size that we can handle */
+#define ND_PDSM_PAYLOAD_MAX_SIZE 184
+
+/* Max payload size that we can handle */
+#define ND_PDSM_HDR_SIZE \
+	(sizeof(struct nd_pkg_pdsm) - ND_PDSM_PAYLOAD_MAX_SIZE)
+
+/* Various nvdimm health indicators */
+#define PAPR_PDSM_DIMM_HEALTHY       0
+#define PAPR_PDSM_DIMM_UNHEALTHY     1
+#define PAPR_PDSM_DIMM_CRITICAL      2
+#define PAPR_PDSM_DIMM_FATAL         3
+
+/* struct nd_papr_pdsm_health.extension_flags field flags */
+
+/* Indicate that the 'dimm_fuel_gauge' field is valid */
+#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
+
+/* Indicate that the 'dimm_dsc' field is valid */
+#define PDSM_DIMM_DSC_VALID 2
+
+/*
+ * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
+ * Various flags indicate the health status of the dimm.
+ *
+ * extension_flags	: Any extension fields present in the struct.
+ * dimm_unarmed		: Dimm not armed. So contents wont persist.
+ * dimm_bad_shutdown	: Previous shutdown did not persist contents.
+ * dimm_bad_restore	: Contents from previous shutdown werent restored.
+ * dimm_scrubbed	: Contents of the dimm have been scrubbed.
+ * dimm_locked		: Contents of the dimm cant be modified until CEC reboot
+ * dimm_encrypted	: Contents of dimm are encrypted.
+ * dimm_health		: Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX
+ * dimm_fuel_gauge	: Life remaining of DIMM as a percentage from 0-100
+ */
+struct nd_papr_pdsm_health {
+	union {
+		struct {
+			__u32 extension_flags;
+			__u8 dimm_unarmed;
+			__u8 dimm_bad_shutdown;
+			__u8 dimm_bad_restore;
+			__u8 dimm_scrubbed;
+			__u8 dimm_locked;
+			__u8 dimm_encrypted;
+			__u16 dimm_health;
+
+			/* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
+			__u16 dimm_fuel_gauge;
+
+			/* Extension flag PDSM_DIMM_DSC_VALID */
+			__u64 dimm_dsc;
+		};
+		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+	};
+};
+
+/* Flags for injecting specific smart errors */
+#define PDSM_SMART_INJECT_HEALTH_FATAL		(1 << 0)
+#define PDSM_SMART_INJECT_BAD_SHUTDOWN		(1 << 1)
+
+struct nd_papr_pdsm_smart_inject {
+	union {
+		struct {
+			/* One or more of PDSM_SMART_INJECT_ */
+			__u32 flags;
+			__u8 fatal_enable;
+			__u8 unsafe_shutdown_enable;
+		};
+		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+	};
+};
+
+/*
+ * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel
+ * via 'nd_cmd_pkg.nd_command' member of the ioctl struct
+ */
+enum papr_pdsm {
+	PAPR_PDSM_MIN = 0x0,
+	PAPR_PDSM_HEALTH,
+	PAPR_PDSM_SMART_INJECT,
+	PAPR_PDSM_MAX,
+};
+
+/* Maximal union that can hold all possible payload types */
+union nd_pdsm_payload {
+	struct nd_papr_pdsm_health health;
+	struct nd_papr_pdsm_smart_inject smart_inject;
+	__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+} __packed;
+
+/*
+ * PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm
+ * Valid member of union 'payload' is identified via 'nd_cmd_pkg.nd_command'
+ * that should always precede this struct when sent to papr_scm via CMD_CALL
+ * interface.
+ */
+struct nd_pkg_pdsm {
+	__s32 cmd_status;	/* Out: Sub-cmd status returned back */
+	__u16 reserved[2];	/* Ignored and to be set as '0' */
+	union nd_pdsm_payload payload;
+} __packed;
+
+#endif /* _UAPI_ASM_POWERPC_PAPR_PDSM_H_ */
diff --git a/arch/powerpc/include/uapi/asm/perf_event.h b/arch/powerpc/include/uapi/asm/perf_event.h
new file mode 100644
index 0000000000..ce488e48db
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/perf_event.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright 2013 Michael Ellerman, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2 of the
+ * License.
+ */
+
+#ifndef _UAPI_ASM_POWERPC_PERF_EVENT_H
+#define _UAPI_ASM_POWERPC_PERF_EVENT_H
+
+/*
+ * We use bit 63 of perf_event_attr.config as a flag to request EBB.
+ */
+#define PERF_EVENT_CONFIG_EBB_SHIFT	63
+
+#endif /* _UAPI_ASM_POWERPC_PERF_EVENT_H */
diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h
new file mode 100644
index 0000000000..749a2e3af8
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_POWERPC_PERF_REGS_H
+#define _UAPI_ASM_POWERPC_PERF_REGS_H
+
+enum perf_event_powerpc_regs {
+	PERF_REG_POWERPC_R0,
+	PERF_REG_POWERPC_R1,
+	PERF_REG_POWERPC_R2,
+	PERF_REG_POWERPC_R3,
+	PERF_REG_POWERPC_R4,
+	PERF_REG_POWERPC_R5,
+	PERF_REG_POWERPC_R6,
+	PERF_REG_POWERPC_R7,
+	PERF_REG_POWERPC_R8,
+	PERF_REG_POWERPC_R9,
+	PERF_REG_POWERPC_R10,
+	PERF_REG_POWERPC_R11,
+	PERF_REG_POWERPC_R12,
+	PERF_REG_POWERPC_R13,
+	PERF_REG_POWERPC_R14,
+	PERF_REG_POWERPC_R15,
+	PERF_REG_POWERPC_R16,
+	PERF_REG_POWERPC_R17,
+	PERF_REG_POWERPC_R18,
+	PERF_REG_POWERPC_R19,
+	PERF_REG_POWERPC_R20,
+	PERF_REG_POWERPC_R21,
+	PERF_REG_POWERPC_R22,
+	PERF_REG_POWERPC_R23,
+	PERF_REG_POWERPC_R24,
+	PERF_REG_POWERPC_R25,
+	PERF_REG_POWERPC_R26,
+	PERF_REG_POWERPC_R27,
+	PERF_REG_POWERPC_R28,
+	PERF_REG_POWERPC_R29,
+	PERF_REG_POWERPC_R30,
+	PERF_REG_POWERPC_R31,
+	PERF_REG_POWERPC_NIP,
+	PERF_REG_POWERPC_MSR,
+	PERF_REG_POWERPC_ORIG_R3,
+	PERF_REG_POWERPC_CTR,
+	PERF_REG_POWERPC_LINK,
+	PERF_REG_POWERPC_XER,
+	PERF_REG_POWERPC_CCR,
+	PERF_REG_POWERPC_SOFTE,
+	PERF_REG_POWERPC_TRAP,
+	PERF_REG_POWERPC_DAR,
+	PERF_REG_POWERPC_DSISR,
+	PERF_REG_POWERPC_SIER,
+	PERF_REG_POWERPC_MMCRA,
+	/* Extended registers */
+	PERF_REG_POWERPC_MMCR0,
+	PERF_REG_POWERPC_MMCR1,
+	PERF_REG_POWERPC_MMCR2,
+	PERF_REG_POWERPC_MMCR3,
+	PERF_REG_POWERPC_SIER2,
+	PERF_REG_POWERPC_SIER3,
+	PERF_REG_POWERPC_PMC1,
+	PERF_REG_POWERPC_PMC2,
+	PERF_REG_POWERPC_PMC3,
+	PERF_REG_POWERPC_PMC4,
+	PERF_REG_POWERPC_PMC5,
+	PERF_REG_POWERPC_PMC6,
+	PERF_REG_POWERPC_SDAR,
+	PERF_REG_POWERPC_SIAR,
+	/* Max mask value for interrupt regs w/o extended regs */
+	PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
+	/* Max mask value for interrupt regs including extended regs */
+	PERF_REG_EXTENDED_MAX = PERF_REG_POWERPC_SIAR + 1,
+};
+
+#define PERF_REG_PMU_MASK	((1ULL << PERF_REG_POWERPC_MAX) - 1)
+
+/*
+ * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300
+ * includes 11 SPRS from MMCR0 to SIAR excluding the
+ * unsupported SPRS MMCR3, SIER2 and SIER3.
+ */
+#define PERF_REG_PMU_MASK_300	\
+	((1ULL << PERF_REG_POWERPC_MMCR0) | (1ULL << PERF_REG_POWERPC_MMCR1) | \
+	(1ULL << PERF_REG_POWERPC_MMCR2) | (1ULL << PERF_REG_POWERPC_PMC1) | \
+	(1ULL << PERF_REG_POWERPC_PMC2) | (1ULL << PERF_REG_POWERPC_PMC3) | \
+	(1ULL << PERF_REG_POWERPC_PMC4) | (1ULL << PERF_REG_POWERPC_PMC5) | \
+	(1ULL << PERF_REG_POWERPC_PMC6) | (1ULL << PERF_REG_POWERPC_SDAR) | \
+	(1ULL << PERF_REG_POWERPC_SIAR))
+
+/*
+ * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31
+ * includes 14 SPRs from MMCR0 to SIAR.
+ */
+#define PERF_REG_PMU_MASK_31	\
+	(PERF_REG_PMU_MASK_300 | (1ULL << PERF_REG_POWERPC_MMCR3) | \
+	(1ULL << PERF_REG_POWERPC_SIER2) | (1ULL << PERF_REG_POWERPC_SIER3))
+
+#endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/arch/powerpc/include/uapi/asm/posix_types.h b/arch/powerpc/include/uapi/asm/posix_types.h
new file mode 100644
index 0000000000..9c03423125
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/posix_types.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_POWERPC_POSIX_TYPES_H
+#define _ASM_POWERPC_POSIX_TYPES_H
+
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.  Also, we cannot
+ * assume GCC is being used.
+ */
+
+#ifdef __powerpc64__
+typedef unsigned long	__kernel_old_dev_t;
+#define __kernel_old_dev_t __kernel_old_dev_t
+#else
+typedef short		__kernel_ipc_pid_t;
+#define __kernel_ipc_pid_t __kernel_ipc_pid_t
+#endif
+
+#include <asm-generic/posix_types.h>
+
+#endif /* _ASM_POWERPC_POSIX_TYPES_H */
diff --git a/arch/powerpc/include/uapi/asm/ps3fb.h b/arch/powerpc/include/uapi/asm/ps3fb.h
new file mode 100644
index 0000000000..fd7e3a0d35
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/ps3fb.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2006 Sony Computer Entertainment Inc.
+ * Copyright 2006, 2007 Sony Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _ASM_POWERPC_PS3FB_H_
+#define _ASM_POWERPC_PS3FB_H_
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/* ioctl */
+#define PS3FB_IOCTL_SETMODE       _IOW('r',  1, int) /* set video mode */
+#define PS3FB_IOCTL_GETMODE       _IOR('r',  2, int) /* get video mode */
+#define PS3FB_IOCTL_SCREENINFO    _IOR('r',  3, int) /* get screen info */
+#define PS3FB_IOCTL_ON            _IO('r', 4)        /* use IOCTL_FSEL */
+#define PS3FB_IOCTL_OFF           _IO('r', 5)        /* return to normal-flip */
+#define PS3FB_IOCTL_FSEL          _IOW('r', 6, int)  /* blit and flip request */
+
+#ifndef FBIO_WAITFORVSYNC
+#define FBIO_WAITFORVSYNC         _IOW('F', 0x20, __u32) /* wait for vsync */
+#endif
+
+struct ps3fb_ioctl_res {
+	__u32 xres; /* frame buffer x_size */
+	__u32 yres; /* frame buffer y_size */
+	__u32 xoff; /* margine x  */
+	__u32 yoff; /* margine y */
+	__u32 num_frames; /* num of frame buffers */
+};
+
+#endif /* _ASM_POWERPC_PS3FB_H_ */
diff --git a/arch/powerpc/include/uapi/asm/ptrace.h b/arch/powerpc/include/uapi/asm/ptrace.h
new file mode 100644
index 0000000000..7004cfea3f
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/ptrace.h
@@ -0,0 +1,272 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2001 PPC64 Team, IBM Corp
+ *
+ * This struct defines the way the registers are stored on the
+ * kernel stack during a system call or other kernel entry.
+ *
+ * this should only contain volatile regs
+ * since we can keep non-volatile in the thread_struct
+ * should set this up when only volatiles are saved
+ * by intr code.
+ *
+ * Since this is going on the stack, *CARE MUST BE TAKEN* to insure
+ * that the overall structure is a multiple of 16 bytes in length.
+ *
+ * Note that the offsets of the fields in this struct correspond with
+ * the PT_* values below.  This simplifies arch/powerpc/kernel/ptrace.c.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _UAPI_ASM_POWERPC_PTRACE_H
+#define _UAPI_ASM_POWERPC_PTRACE_H
+
+
+#include <linux/types.h>
+
+#ifndef __ASSEMBLY__
+
+#ifdef __KERNEL__
+struct user_pt_regs
+#else
+struct pt_regs
+#endif
+{
+	unsigned long gpr[32];
+	unsigned long nip;
+	unsigned long msr;
+	unsigned long orig_gpr3;	/* Used for restarting system calls */
+	unsigned long ctr;
+	unsigned long link;
+	unsigned long xer;
+	unsigned long ccr;
+#ifdef __powerpc64__
+	unsigned long softe;		/* Soft enabled/disabled */
+#else
+	unsigned long mq;		/* 601 only (not used at present) */
+					/* Used on APUS to hold IPL value. */
+#endif
+	unsigned long trap;		/* Reason for being here */
+	/* N.B. for critical exceptions on 4xx, the dar and dsisr
+	   fields are overloaded to hold srr0 and srr1. */
+	unsigned long dar;		/* Fault registers */
+	unsigned long dsisr;		/* on 4xx/Book-E used for ESR */
+	unsigned long result;		/* Result of a system call */
+};
+
+#endif /* __ASSEMBLY__ */
+
+
+/*
+ * Offsets used by 'ptrace' system call interface.
+ * These can't be changed without breaking binary compatibility
+ * with MkLinux, etc.
+ */
+#define PT_R0	0
+#define PT_R1	1
+#define PT_R2	2
+#define PT_R3	3
+#define PT_R4	4
+#define PT_R5	5
+#define PT_R6	6
+#define PT_R7	7
+#define PT_R8	8
+#define PT_R9	9
+#define PT_R10	10
+#define PT_R11	11
+#define PT_R12	12
+#define PT_R13	13
+#define PT_R14	14
+#define PT_R15	15
+#define PT_R16	16
+#define PT_R17	17
+#define PT_R18	18
+#define PT_R19	19
+#define PT_R20	20
+#define PT_R21	21
+#define PT_R22	22
+#define PT_R23	23
+#define PT_R24	24
+#define PT_R25	25
+#define PT_R26	26
+#define PT_R27	27
+#define PT_R28	28
+#define PT_R29	29
+#define PT_R30	30
+#define PT_R31	31
+
+#define PT_NIP	32
+#define PT_MSR	33
+#define PT_ORIG_R3 34
+#define PT_CTR	35
+#define PT_LNK	36
+#define PT_XER	37
+#define PT_CCR	38
+#ifndef __powerpc64__
+#define PT_MQ	39
+#else
+#define PT_SOFTE 39
+#endif
+#define PT_TRAP	40
+#define PT_DAR	41
+#define PT_DSISR 42
+#define PT_RESULT 43
+#define PT_DSCR 44
+#define PT_REGS_COUNT 44
+
+#define PT_FPR0	48	/* each FP reg occupies 2 slots in this space */
+
+#ifndef __powerpc64__
+
+#define PT_FPR31 (PT_FPR0 + 2*31)
+#define PT_FPSCR (PT_FPR0 + 2*32 + 1)
+
+#else /* __powerpc64__ */
+
+#define PT_FPSCR (PT_FPR0 + 32)	/* each FP reg occupies 1 slot in 64-bit space */
+
+
+#define PT_VR0 82	/* each Vector reg occupies 2 slots in 64-bit */
+#define PT_VSCR (PT_VR0 + 32*2 + 1)
+#define PT_VRSAVE (PT_VR0 + 33*2)
+
+
+/*
+ * Only store first 32 VSRs here. The second 32 VSRs in VR0-31
+ */
+#define PT_VSR0 150	/* each VSR reg occupies 2 slots in 64-bit */
+#define PT_VSR31 (PT_VSR0 + 2*31)
+#endif /* __powerpc64__ */
+
+/*
+ * Get/set all the altivec registers v0..v31, vscr, vrsave, in one go.
+ * The transfer totals 34 quadword.  Quadwords 0-31 contain the
+ * corresponding vector registers.  Quadword 32 contains the vscr as the
+ * last word (offset 12) within that quadword.  Quadword 33 contains the
+ * vrsave as the first word (offset 0) within the quadword.
+ *
+ * This definition of the VMX state is compatible with the current PPC32
+ * ptrace interface.  This allows signal handling and ptrace to use the same
+ * structures.  This also simplifies the implementation of a bi-arch
+ * (combined (32- and 64-bit) gdb.
+ */
+#define PTRACE_GETVRREGS	0x12
+#define PTRACE_SETVRREGS	0x13
+
+/* Get/set all the upper 32-bits of the SPE registers, accumulator, and
+ * spefscr, in one go */
+#define PTRACE_GETEVRREGS	0x14
+#define PTRACE_SETEVRREGS	0x15
+
+/* Get the first 32 128bit VSX registers */
+#define PTRACE_GETVSRREGS	0x1b
+#define PTRACE_SETVSRREGS	0x1c
+
+/* Syscall emulation defines */
+#define PTRACE_SYSEMU			0x1d
+#define PTRACE_SYSEMU_SINGLESTEP	0x1e
+
+/*
+ * Get or set a debug register. The first 16 are DABR registers and the
+ * second 16 are IABR registers.
+ */
+#define PTRACE_GET_DEBUGREG	0x19
+#define PTRACE_SET_DEBUGREG	0x1a
+
+/* (new) PTRACE requests using the same numbers as x86 and the same
+ * argument ordering. Additionally, they support more registers too
+ */
+#define PTRACE_GETREGS            0xc
+#define PTRACE_SETREGS            0xd
+#define PTRACE_GETFPREGS          0xe
+#define PTRACE_SETFPREGS          0xf
+#define PTRACE_GETREGS64	  0x16
+#define PTRACE_SETREGS64	  0x17
+
+/* Calls to trace a 64bit program from a 32bit program */
+#define PPC_PTRACE_PEEKTEXT_3264 0x95
+#define PPC_PTRACE_PEEKDATA_3264 0x94
+#define PPC_PTRACE_POKETEXT_3264 0x93
+#define PPC_PTRACE_POKEDATA_3264 0x92
+#define PPC_PTRACE_PEEKUSR_3264  0x91
+#define PPC_PTRACE_POKEUSR_3264  0x90
+
+#define PTRACE_SINGLEBLOCK	0x100	/* resume execution until next branch */
+
+#define PPC_PTRACE_GETHWDBGINFO	0x89
+#define PPC_PTRACE_SETHWDEBUG	0x88
+#define PPC_PTRACE_DELHWDEBUG	0x87
+
+#ifndef __ASSEMBLY__
+
+struct ppc_debug_info {
+	__u32 version;			/* Only version 1 exists to date */
+	__u32 num_instruction_bps;
+	__u32 num_data_bps;
+	__u32 num_condition_regs;
+	__u32 data_bp_alignment;
+	__u32 sizeof_condition;		/* size of the DVC register */
+	__u64 features;
+};
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * features will have bits indication whether there is support for:
+ */
+#define PPC_DEBUG_FEATURE_INSN_BP_RANGE		0x0000000000000001
+#define PPC_DEBUG_FEATURE_INSN_BP_MASK		0x0000000000000002
+#define PPC_DEBUG_FEATURE_DATA_BP_RANGE		0x0000000000000004
+#define PPC_DEBUG_FEATURE_DATA_BP_MASK		0x0000000000000008
+#define PPC_DEBUG_FEATURE_DATA_BP_DAWR		0x0000000000000010
+#define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31	0x0000000000000020
+
+#ifndef __ASSEMBLY__
+
+struct ppc_hw_breakpoint {
+	__u32 version;		/* currently, version must be 1 */
+	__u32 trigger_type;	/* only some combinations allowed */
+	__u32 addr_mode;	/* address match mode */
+	__u32 condition_mode;	/* break/watchpoint condition flags */
+	__u64 addr;		/* break/watchpoint address */
+	__u64 addr2;		/* range end or mask */
+	__u64 condition_value;	/* contents of the DVC register */
+};
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Trigger Type
+ */
+#define PPC_BREAKPOINT_TRIGGER_EXECUTE	0x00000001
+#define PPC_BREAKPOINT_TRIGGER_READ	0x00000002
+#define PPC_BREAKPOINT_TRIGGER_WRITE	0x00000004
+#define PPC_BREAKPOINT_TRIGGER_RW	\
+	(PPC_BREAKPOINT_TRIGGER_READ | PPC_BREAKPOINT_TRIGGER_WRITE)
+
+/*
+ * Address Mode
+ */
+#define PPC_BREAKPOINT_MODE_EXACT		0x00000000
+#define PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE	0x00000001
+#define PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE	0x00000002
+#define PPC_BREAKPOINT_MODE_MASK		0x00000003
+
+/*
+ * Condition Mode
+ */
+#define PPC_BREAKPOINT_CONDITION_MODE	0x00000003
+#define PPC_BREAKPOINT_CONDITION_NONE	0x00000000
+#define PPC_BREAKPOINT_CONDITION_AND	0x00000001
+#define PPC_BREAKPOINT_CONDITION_EXACT	PPC_BREAKPOINT_CONDITION_AND
+#define PPC_BREAKPOINT_CONDITION_OR	0x00000002
+#define PPC_BREAKPOINT_CONDITION_AND_OR	0x00000003
+#define PPC_BREAKPOINT_CONDITION_BE_ALL	0x00ff0000
+#define PPC_BREAKPOINT_CONDITION_BE_SHIFT	16
+#define PPC_BREAKPOINT_CONDITION_BE(n)	\
+	(1<<((n)+PPC_BREAKPOINT_CONDITION_BE_SHIFT))
+
+#endif /* _UAPI_ASM_POWERPC_PTRACE_H */
diff --git a/arch/powerpc/include/uapi/asm/sembuf.h b/arch/powerpc/include/uapi/asm/sembuf.h
new file mode 100644
index 0000000000..85e96ccb5f
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/sembuf.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+#ifndef _ASM_POWERPC_SEMBUF_H
+#define _ASM_POWERPC_SEMBUF_H
+
+#include <asm/ipcbuf.h>
+
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * The semid64_ds structure for PPC architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 2 miscellaneous 32/64-bit values
+ */
+
+struct semid64_ds {
+	struct ipc64_perm sem_perm;	/* permissions .. see ipc.h */
+#ifndef __powerpc64__
+	unsigned long	sem_otime_high;
+	unsigned long	sem_otime;	/* last semop time */
+	unsigned long	sem_ctime_high;
+	unsigned long	sem_ctime;	/* last change time */
+#else
+	long		sem_otime;	/* last semop time */
+	long		sem_ctime;	/* last change time */
+#endif
+	unsigned long	sem_nsems;	/* no. of semaphores in array */
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+#endif	/* _ASM_POWERPC_SEMBUF_H */
diff --git a/arch/powerpc/include/uapi/asm/setup.h b/arch/powerpc/include/uapi/asm/setup.h
new file mode 100644
index 0000000000..c54940b09d
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/setup.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_POWERPC_SETUP_H
+#define _UAPI_ASM_POWERPC_SETUP_H
+
+#define COMMAND_LINE_SIZE	2048
+
+#endif /* _UAPI_ASM_POWERPC_SETUP_H */
diff --git a/arch/powerpc/include/uapi/asm/shmbuf.h b/arch/powerpc/include/uapi/asm/shmbuf.h
new file mode 100644
index 0000000000..439a3a02ba
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/shmbuf.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+#ifndef _ASM_POWERPC_SHMBUF_H
+#define _ASM_POWERPC_SHMBUF_H
+
+#include <asm/ipcbuf.h>
+#include <asm/posix_types.h>
+
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * The shmid64_ds structure for PPC architecture.
+ *
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct shmid64_ds {
+	struct ipc64_perm	shm_perm;	/* operation perms */
+#ifdef __powerpc64__
+	long		shm_atime;	/* last attach time */
+	long		shm_dtime;	/* last detach time */
+	long		shm_ctime;	/* last change time */
+#else
+	unsigned long		shm_atime_high;
+	unsigned long		shm_atime;	/* last attach time */
+	unsigned long		shm_dtime_high;
+	unsigned long		shm_dtime;	/* last detach time */
+	unsigned long		shm_ctime_high;
+	unsigned long		shm_ctime;	/* last change time */
+	unsigned long		__unused4;
+#endif
+	__kernel_size_t		shm_segsz;	/* size of segment (bytes) */
+	__kernel_pid_t		shm_cpid;	/* pid of creator */
+	__kernel_pid_t		shm_lpid;	/* pid of last operator */
+	unsigned long		shm_nattch;	/* no. of current attaches */
+	unsigned long		__unused5;
+	unsigned long		__unused6;
+};
+
+struct shminfo64 {
+	unsigned long	shmmax;
+	unsigned long	shmmin;
+	unsigned long	shmmni;
+	unsigned long	shmseg;
+	unsigned long	shmall;
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+#endif	/* _ASM_POWERPC_SHMBUF_H */
diff --git a/arch/powerpc/include/uapi/asm/sigcontext.h b/arch/powerpc/include/uapi/asm/sigcontext.h
new file mode 100644
index 0000000000..630aeda56d
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/sigcontext.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+#ifndef _ASM_POWERPC_SIGCONTEXT_H
+#define _ASM_POWERPC_SIGCONTEXT_H
+
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/compiler.h>
+#include <asm/ptrace.h>
+#ifdef __powerpc64__
+#include <asm/elf.h>
+#endif
+
+struct sigcontext {
+	unsigned long	_unused[4];
+	int		signal;
+#ifdef __powerpc64__
+	int		_pad0;
+#endif
+	unsigned long	handler;
+	unsigned long	oldmask;
+#ifdef __KERNEL__
+	struct user_pt_regs __user *regs;
+#else
+	struct pt_regs	*regs;
+#endif
+#ifdef __powerpc64__
+	elf_gregset_t	gp_regs;
+	elf_fpregset_t	fp_regs;
+/*
+ * To maintain compatibility with current implementations the sigcontext is
+ * extended by appending a pointer (v_regs) to a quadword type (elf_vrreg_t)
+ * followed by an unstructured (vmx_reserve) field of 101 doublewords. This
+ * allows the array of vector registers to be quadword aligned independent of
+ * the alignment of the containing sigcontext or ucontext. It is the
+ * responsibility of the code setting the sigcontext to set this pointer to
+ * either NULL (if this processor does not support the VMX feature) or the
+ * address of the first quadword within the allocated (vmx_reserve) area.
+ *
+ * The pointer (v_regs) of vector type (elf_vrreg_t) is type compatible with
+ * an array of 34 quadword entries (elf_vrregset_t).  The entries with
+ * indexes 0-31 contain the corresponding vector registers.  The entry with
+ * index 32 contains the vscr as the last word (offset 12) within the
+ * quadword.  This allows the vscr to be stored as either a quadword (since
+ * it must be copied via a vector register to/from storage) or as a word.
+ * The entry with index 33 contains the vrsave as the first word (offset 0)
+ * within the quadword.
+ *
+ * Part of the VSX data is stored here also by extending vmx_restore
+ * by an additional 32 double words.  Architecturally the layout of
+ * the VSR registers and how they overlap on top of the legacy FPR and
+ * VR registers is shown below:
+ *
+ *                    VSR doubleword 0               VSR doubleword 1
+ *           ----------------------------------------------------------------
+ *   VSR[0]  |             FPR[0]            |                              |
+ *           ----------------------------------------------------------------
+ *   VSR[1]  |             FPR[1]            |                              |
+ *           ----------------------------------------------------------------
+ *           |              ...              |                              |
+ *           |              ...              |                              |
+ *           ----------------------------------------------------------------
+ *   VSR[30] |             FPR[30]           |                              |
+ *           ----------------------------------------------------------------
+ *   VSR[31] |             FPR[31]           |                              |
+ *           ----------------------------------------------------------------
+ *   VSR[32] |                             VR[0]                            |
+ *           ----------------------------------------------------------------
+ *   VSR[33] |                             VR[1]                            |
+ *           ----------------------------------------------------------------
+ *           |                              ...                             |
+ *           |                              ...                             |
+ *           ----------------------------------------------------------------
+ *   VSR[62] |                             VR[30]                           |
+ *           ----------------------------------------------------------------
+ *   VSR[63] |                             VR[31]                           |
+ *           ----------------------------------------------------------------
+ *
+ * FPR/VSR 0-31 doubleword 0 is stored in fp_regs, and VMX/VSR 32-63
+ * is stored at the start of vmx_reserve.  vmx_reserve is extended for
+ * backwards compatility to store VSR 0-31 doubleword 1 after the VMX
+ * registers and vscr/vrsave.
+ */
+	elf_vrreg_t	__user *v_regs;
+	long		vmx_reserve[ELF_NVRREG + ELF_NVRREG + 1 + 32];
+#endif
+};
+
+#endif /* _ASM_POWERPC_SIGCONTEXT_H */
diff --git a/arch/powerpc/include/uapi/asm/signal.h b/arch/powerpc/include/uapi/asm/signal.h
new file mode 100644
index 0000000000..a5dfe84f50
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/signal.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_POWERPC_SIGNAL_H
+#define _UAPI_ASM_POWERPC_SIGNAL_H
+
+#include <linux/types.h>
+
+#define _NSIG		64
+#ifdef __powerpc64__
+#define _NSIG_BPW	64
+#else
+#define _NSIG_BPW	32
+#endif
+#define _NSIG_WORDS	(_NSIG / _NSIG_BPW)
+
+typedef unsigned long old_sigset_t;		/* at least 32 bits */
+
+typedef struct {
+	unsigned long sig[_NSIG_WORDS];
+} sigset_t;
+
+#define SIGHUP		 1
+#define SIGINT		 2
+#define SIGQUIT		 3
+#define SIGILL		 4
+#define SIGTRAP		 5
+#define SIGABRT		 6
+#define SIGIOT		 6
+#define SIGBUS		 7
+#define SIGFPE		 8
+#define SIGKILL		 9
+#define SIGUSR1		10
+#define SIGSEGV		11
+#define SIGUSR2		12
+#define SIGPIPE		13
+#define SIGALRM		14
+#define SIGTERM		15
+#define SIGSTKFLT	16
+#define SIGCHLD		17
+#define SIGCONT		18
+#define SIGSTOP		19
+#define SIGTSTP		20
+#define SIGTTIN		21
+#define SIGTTOU		22
+#define SIGURG		23
+#define SIGXCPU		24
+#define SIGXFSZ		25
+#define SIGVTALRM	26
+#define SIGPROF		27
+#define SIGWINCH	28
+#define SIGIO		29
+#define SIGPOLL		SIGIO
+/*
+#define SIGLOST		29
+*/
+#define SIGPWR		30
+#define SIGSYS		31
+#define	SIGUNUSED	31
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN	32
+#define SIGRTMAX	_NSIG
+
+#define SA_RESTORER	0x04000000U
+
+#ifdef __powerpc64__
+#define MINSIGSTKSZ	8192
+#define SIGSTKSZ	32768
+#else
+#define MINSIGSTKSZ	2048
+#define SIGSTKSZ	8192
+#endif
+
+#include <asm-generic/signal-defs.h>
+
+#ifndef __KERNEL__
+struct old_sigaction {
+	__sighandler_t sa_handler;
+	old_sigset_t sa_mask;
+	unsigned long sa_flags;
+	__sigrestore_t sa_restorer;
+};
+
+struct sigaction {
+	__sighandler_t sa_handler;
+	unsigned long sa_flags;
+	__sigrestore_t sa_restorer;
+	sigset_t sa_mask;		/* mask last for extensibility */
+};
+#endif
+
+typedef struct sigaltstack {
+	void __user *ss_sp;
+	int ss_flags;
+	__kernel_size_t ss_size;
+} stack_t;
+
+
+#ifndef __powerpc64__
+/*
+ * These are parameters to dbg_sigreturn syscall.  They enable or
+ * disable certain debugging things that can be done from signal
+ * handlers.  The dbg_sigreturn syscall *must* be called from a
+ * SA_SIGINFO signal so the ucontext can be passed to it.  It takes an
+ * array of struct sig_dbg_op, which has the debug operations to
+ * perform before returning from the signal.
+ */
+struct sig_dbg_op {
+	int dbg_type;
+	unsigned long dbg_value;
+};
+
+/* Enable or disable single-stepping.  The value sets the state. */
+#define SIG_DBG_SINGLE_STEPPING		1
+
+/* Enable or disable branch tracing.  The value sets the state. */
+#define SIG_DBG_BRANCH_TRACING		2
+#endif /* ! __powerpc64__ */
+
+#endif /* _UAPI_ASM_POWERPC_SIGNAL_H */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
new file mode 100644
index 0000000000..12aa0c43e7
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+#ifndef _ASM_POWERPC_SOCKET_H
+#define _ASM_POWERPC_SOCKET_H
+
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define SO_RCVLOWAT	16
+#define SO_SNDLOWAT	17
+#define SO_RCVTIMEO_OLD	18
+#define SO_SNDTIMEO_OLD	19
+#define SO_PASSCRED	20
+#define SO_PEERCRED	21
+
+#include <asm-generic/socket.h>
+
+#endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/powerpc/include/uapi/asm/spu_info.h b/arch/powerpc/include/uapi/asm/spu_info.h
new file mode 100644
index 0000000000..45f9715058
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/spu_info.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * SPU info structures
+ *
+ * (C) Copyright 2006 IBM Corp.
+ *
+ * Author: Dwayne Grant McConnell <decimal@us.ibm.com>
+ */
+
+#ifndef _UAPI_SPU_INFO_H
+#define _UAPI_SPU_INFO_H
+
+#include <linux/types.h>
+
+#ifndef __KERNEL__
+struct mfc_cq_sr {
+	__u64 mfc_cq_data0_RW;
+	__u64 mfc_cq_data1_RW;
+	__u64 mfc_cq_data2_RW;
+	__u64 mfc_cq_data3_RW;
+};
+#endif /* __KERNEL__ */
+
+struct spu_dma_info {
+	__u64 dma_info_type;
+	__u64 dma_info_mask;
+	__u64 dma_info_status;
+	__u64 dma_info_stall_and_notify;
+	__u64 dma_info_atomic_command_status;
+	struct mfc_cq_sr dma_info_command_data[16];
+};
+
+struct spu_proxydma_info {
+	__u64 proxydma_info_type;
+	__u64 proxydma_info_mask;
+	__u64 proxydma_info_status;
+	struct mfc_cq_sr proxydma_info_command_data[8];
+};
+
+#endif /* _UAPI_SPU_INFO_H */
diff --git a/arch/powerpc/include/uapi/asm/stat.h b/arch/powerpc/include/uapi/asm/stat.h
new file mode 100644
index 0000000000..d509016642
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/stat.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+#ifndef _ASM_POWERPC_STAT_H
+#define _ASM_POWERPC_STAT_H
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+
+#define STAT_HAVE_NSEC 1
+
+#ifndef __powerpc64__
+struct __old_kernel_stat {
+	unsigned short st_dev;
+	unsigned short st_ino;
+	unsigned short st_mode;
+	unsigned short st_nlink;
+	unsigned short st_uid;
+	unsigned short st_gid;
+	unsigned short st_rdev;
+	unsigned long  st_size;
+	unsigned long  st_atime;
+	unsigned long  st_mtime;
+	unsigned long  st_ctime;
+};
+#endif /* !__powerpc64__ */
+
+struct stat {
+	unsigned long	st_dev;
+	__kernel_ino_t	st_ino;
+#ifdef __powerpc64__
+	unsigned long	st_nlink;
+	__kernel_mode_t	st_mode;
+#else
+	__kernel_mode_t	st_mode;
+	unsigned short	st_nlink;
+#endif
+	__kernel_uid32_t st_uid;
+	__kernel_gid32_t st_gid;
+	unsigned long	st_rdev;
+	long		st_size;
+	unsigned long	st_blksize;
+	unsigned long	st_blocks;
+	unsigned long	st_atime;
+	unsigned long	st_atime_nsec;
+	unsigned long	st_mtime;
+	unsigned long	st_mtime_nsec;
+	unsigned long	st_ctime;
+	unsigned long	st_ctime_nsec;
+	unsigned long	__unused4;
+	unsigned long	__unused5;
+#ifdef __powerpc64__
+	unsigned long	__unused6;
+#endif
+};
+
+/* This matches struct stat64 in glibc2.1. Only used for 32 bit. */
+struct stat64 {
+	unsigned long long st_dev;		/* Device.  */
+	unsigned long long st_ino;		/* File serial number.  */
+	unsigned int	st_mode;	/* File mode.  */
+	unsigned int	st_nlink;	/* Link count.  */
+	unsigned int	st_uid;		/* User ID of the file's owner.  */
+	unsigned int	st_gid;		/* Group ID of the file's group. */
+	unsigned long long st_rdev;	/* Device number, if device.  */
+	unsigned short	__pad2;
+	long long	st_size;	/* Size of file, in bytes.  */
+	int		st_blksize;	/* Optimal block size for I/O.  */
+	long long	st_blocks;	/* Number 512-byte blocks allocated. */
+	int		st_atime;	/* Time of last access.  */
+	unsigned int	st_atime_nsec;
+	int		st_mtime;	/* Time of last modification.  */
+	unsigned int	st_mtime_nsec;
+	int		st_ctime;	/* Time of last status change.  */
+	unsigned int	st_ctime_nsec;
+	unsigned int	__unused4;
+	unsigned int	__unused5;
+};
+
+#endif /* _ASM_POWERPC_STAT_H */
diff --git a/arch/powerpc/include/uapi/asm/swab.h b/arch/powerpc/include/uapi/asm/swab.h
new file mode 100644
index 0000000000..17b16c44d2
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/swab.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _UAPI_ASM_POWERPC_SWAB_H
+#define _UAPI_ASM_POWERPC_SWAB_H
+
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+#ifdef __GNUC__
+
+#ifndef __powerpc64__
+#define __SWAB_64_THRU_32__
+#endif /* __powerpc64__ */
+
+
+#endif /* __GNUC__ */
+
+#endif /* _UAPI_ASM_POWERPC_SWAB_H */
diff --git a/arch/powerpc/include/uapi/asm/termbits.h b/arch/powerpc/include/uapi/asm/termbits.h
new file mode 100644
index 0000000000..21dc86dcb2
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/termbits.h
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+#ifndef _ASM_POWERPC_TERMBITS_H
+#define _ASM_POWERPC_TERMBITS_H
+
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm-generic/termbits-common.h>
+
+typedef unsigned int	tcflag_t;
+
+/*
+ * termios type and macro definitions.  Be careful about adding stuff
+ * to this file since it's used in GNU libc and there are strict rules
+ * concerning namespace pollution.
+ */
+
+#define NCCS 19
+struct termios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_cc[NCCS];		/* control characters */
+	cc_t c_line;			/* line discipline (== c_cc[19]) */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+/* For PowerPC the termios and ktermios are the same */
+
+struct ktermios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_cc[NCCS];		/* control characters */
+	cc_t c_line;			/* line discipline (== c_cc[19]) */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+/* c_cc characters */
+#define VINTR 	         0
+#define VQUIT 	         1
+#define VERASE 	         2
+#define VKILL	         3
+#define VEOF	         4
+#define VMIN	         5
+#define VEOL	         6
+#define VTIME	         7
+#define VEOL2	         8
+#define VSWTC	         9
+#define VWERASE 	10
+#define VREPRINT	11
+#define VSUSP 		12
+#define VSTART		13
+#define VSTOP		14
+#define VLNEXT		15
+#define VDISCARD	16
+
+/* c_iflag bits */
+#define IXON	0x0200
+#define IXOFF	0x0400
+#define IUCLC	0x1000
+#define IMAXBEL	0x2000
+#define IUTF8	0x4000
+
+/* c_oflag bits */
+#define ONLCR	0x00002
+#define OLCUC	0x00004
+#define NLDLY	0x00300
+#define   NL0	0x00000
+#define   NL1	0x00100
+#define   NL2	0x00200
+#define   NL3	0x00300
+#define TABDLY	0x00c00
+#define   TAB0	0x00000
+#define   TAB1	0x00400
+#define   TAB2	0x00800
+#define   TAB3	0x00c00
+#define   XTABS	0x00c00		/* required by POSIX to == TAB3 */
+#define CRDLY	0x03000
+#define   CR0	0x00000
+#define   CR1	0x01000
+#define   CR2	0x02000
+#define   CR3	0x03000
+#define FFDLY	0x04000
+#define   FF0	0x00000
+#define   FF1	0x04000
+#define BSDLY	0x08000
+#define   BS0	0x00000
+#define   BS1	0x08000
+#define VTDLY	0x10000
+#define   VT0	0x00000
+#define   VT1	0x10000
+
+/* c_cflag bit meaning */
+#define CBAUD		0x000000ff
+#define CBAUDEX		0x00000000
+#define BOTHER		0x0000001f
+#define    B57600	0x00000010
+#define   B115200	0x00000011
+#define   B230400	0x00000012
+#define   B460800	0x00000013
+#define   B500000	0x00000014
+#define   B576000	0x00000015
+#define   B921600	0x00000016
+#define  B1000000	0x00000017
+#define  B1152000	0x00000018
+#define  B1500000	0x00000019
+#define  B2000000	0x0000001a
+#define  B2500000	0x0000001b
+#define  B3000000	0x0000001c
+#define  B3500000	0x0000001d
+#define  B4000000	0x0000001e
+#define CSIZE		0x00000300
+#define   CS5		0x00000000
+#define   CS6		0x00000100
+#define   CS7		0x00000200
+#define   CS8		0x00000300
+#define CSTOPB		0x00000400
+#define CREAD		0x00000800
+#define PARENB		0x00001000
+#define PARODD		0x00002000
+#define HUPCL		0x00004000
+#define CLOCAL		0x00008000
+#define CIBAUD		0x00ff0000
+
+/* c_lflag bits */
+#define ISIG	0x00000080
+#define ICANON	0x00000100
+#define XCASE	0x00004000
+#define ECHO	0x00000008
+#define ECHOE	0x00000002
+#define ECHOK	0x00000004
+#define ECHONL	0x00000010
+#define NOFLSH	0x80000000
+#define TOSTOP	0x00400000
+#define ECHOCTL	0x00000040
+#define ECHOPRT	0x00000020
+#define ECHOKE	0x00000001
+#define FLUSHO	0x00800000
+#define PENDIN	0x20000000
+#define IEXTEN	0x00000400
+#define EXTPROC	0x10000000
+
+/* Values for the OPTIONAL_ACTIONS argument to `tcsetattr'.  */
+#define	TCSANOW		0
+#define	TCSADRAIN	1
+#define	TCSAFLUSH	2
+
+#endif	/* _ASM_POWERPC_TERMBITS_H */
diff --git a/arch/powerpc/include/uapi/asm/termios.h b/arch/powerpc/include/uapi/asm/termios.h
new file mode 100644
index 0000000000..5d07fc89bc
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/termios.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Liberally adapted from alpha/termios.h.  In particular, the c_cc[]
+ * fields have been reordered so that termio & termios share the
+ * common subset in the same order (for brain dead programs that don't
+ * know or care about the differences).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _UAPI_ASM_POWERPC_TERMIOS_H
+#define _UAPI_ASM_POWERPC_TERMIOS_H
+
+
+#include <asm/ioctls.h>
+#include <asm/termbits.h>
+
+struct sgttyb {
+	char	sg_ispeed;
+	char	sg_ospeed;
+	char	sg_erase;
+	char	sg_kill;
+	short	sg_flags;
+};
+
+struct tchars {
+	char	t_intrc;
+	char	t_quitc;
+	char	t_startc;
+	char	t_stopc;
+	char	t_eofc;
+	char	t_brkc;
+};
+
+struct ltchars {
+	char	t_suspc;
+	char	t_dsuspc;
+	char	t_rprntc;
+	char	t_flushc;
+	char	t_werasc;
+	char	t_lnextc;
+};
+
+struct winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define NCC 10
+struct termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[NCC];	/* control characters */
+};
+
+/* c_cc characters */
+#define _VINTR	0
+#define _VQUIT	1
+#define _VERASE	2
+#define _VKILL	3
+#define _VEOF	4
+#define _VMIN	5
+#define _VEOL	6
+#define _VTIME	7
+#define _VEOL2	8
+#define _VSWTC	9
+
+
+
+#endif /* _UAPI_ASM_POWERPC_TERMIOS_H */
diff --git a/arch/powerpc/include/uapi/asm/tm.h b/arch/powerpc/include/uapi/asm/tm.h
new file mode 100644
index 0000000000..e1bf0e2fac
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/tm.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_POWERPC_TM_H
+#define _ASM_POWERPC_TM_H
+
+/* Reason codes describing kernel causes for transaction aborts.  By
+ * convention, bit0 is copied to TEXASR[56] (IBM bit 7) which is set if
+ * the failure is persistent.  PAPR saves 0xff-0xe0 for the hypervisor.
+ */
+#define TM_CAUSE_PERSISTENT	0x01
+#define TM_CAUSE_KVM_RESCHED	0xe0  /* From PAPR */
+#define TM_CAUSE_KVM_FAC_UNAV	0xe2  /* From PAPR */
+#define TM_CAUSE_RESCHED	0xde
+#define TM_CAUSE_TLBI		0xdc
+#define TM_CAUSE_FAC_UNAV	0xda
+#define TM_CAUSE_SYSCALL	0xd8
+#define TM_CAUSE_MISC		0xd6  /* future use */
+#define TM_CAUSE_SIGNAL		0xd4
+#define TM_CAUSE_ALIGNMENT	0xd2
+#define TM_CAUSE_EMULATE	0xd0
+
+#endif
diff --git a/arch/powerpc/include/uapi/asm/types.h b/arch/powerpc/include/uapi/asm/types.h
new file mode 100644
index 0000000000..327616fb70
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/types.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * This file is never included by application software unless
+ * explicitly requested (e.g., via linux/types.h) in which case the
+ * application is Linux specific so (user-) name space pollution is
+ * not a major issue.  However, for interoperability, libraries still
+ * need to be careful to avoid a name clashes.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _UAPI_ASM_POWERPC_TYPES_H
+#define _UAPI_ASM_POWERPC_TYPES_H
+
+/*
+ * This is here because we used to use l64 for 64bit powerpc
+ * and we don't want to impact user mode with our change to ll64
+ * in the kernel.
+ *
+ * However, some user programs are fine with this.  They can
+ * flag __SANE_USERSPACE_TYPES__ to get int-ll64.h here.
+ */
+#if !defined(__SANE_USERSPACE_TYPES__) && defined(__powerpc64__) && !defined(__KERNEL__)
+# include <asm-generic/int-l64.h>
+#else
+# include <asm-generic/int-ll64.h>
+#endif
+
+#ifndef __ASSEMBLY__
+
+
+typedef struct {
+	__u32 u[4];
+} __attribute__((aligned(16))) __vector128;
+
+#endif /* __ASSEMBLY__ */
+
+
+#endif /* _UAPI_ASM_POWERPC_TYPES_H */
diff --git a/arch/powerpc/include/uapi/asm/ucontext.h b/arch/powerpc/include/uapi/asm/ucontext.h
new file mode 100644
index 0000000000..6f14a96d49
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/ucontext.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_POWERPC_UCONTEXT_H
+#define _ASM_POWERPC_UCONTEXT_H
+
+#ifdef __powerpc64__
+#include <asm/sigcontext.h>
+#else
+#include <asm/elf.h>
+#endif
+#include <asm/signal.h>
+
+#ifndef __powerpc64__
+struct mcontext {
+	elf_gregset_t	mc_gregs;
+	elf_fpregset_t	mc_fregs;
+	unsigned long	mc_pad[2];
+	elf_vrregset_t	mc_vregs __attribute__((__aligned__(16)));
+};
+#endif
+
+struct ucontext {
+	unsigned long	uc_flags;
+	struct ucontext __user *uc_link;
+	stack_t		uc_stack;
+#ifndef __powerpc64__
+	int		uc_pad[7];
+	struct mcontext	__user *uc_regs;/* points to uc_mcontext field */
+#endif
+	sigset_t	uc_sigmask;
+	/* glibc has 1024-bit signal masks, ours are 64-bit */
+#ifdef __powerpc64__
+	sigset_t	__unused[15];	/* Allow for uc_sigmask growth */
+	struct sigcontext uc_mcontext;	/* last for extensibility */
+#else
+	int		uc_maskext[30];
+	int		uc_pad2[3];
+	struct mcontext	uc_mcontext;
+#endif
+};
+
+#endif /* _ASM_POWERPC_UCONTEXT_H */
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
new file mode 100644
index 0000000000..5f84e3dc98
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * This file contains the system call numbers.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _UAPI_ASM_POWERPC_UNISTD_H_
+#define _UAPI_ASM_POWERPC_UNISTD_H_
+
+#ifndef __powerpc64__
+#include <asm/unistd_32.h>
+#else
+#include <asm/unistd_64.h>
+#endif
+
+#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/include/uapi/asm/vas-api.h b/arch/powerpc/include/uapi/asm/vas-api.h
new file mode 100644
index 0000000000..7c81301ecd
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/vas-api.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Copyright 2019 IBM Corp.
+ */
+
+#ifndef _UAPI_MISC_VAS_H
+#define _UAPI_MISC_VAS_H
+
+#include <linux/types.h>
+
+#include <asm/ioctl.h>
+
+#define VAS_MAGIC	'v'
+#define VAS_TX_WIN_OPEN	_IOW(VAS_MAGIC, 0x20, struct vas_tx_win_open_attr)
+
+/* Flags to VAS TX open window ioctl */
+/* To allocate a window with QoS credit, otherwise use default credit */
+#define VAS_TX_WIN_FLAG_QOS_CREDIT	0x0000000000000001
+
+struct vas_tx_win_open_attr {
+	__u32	version;
+	__s16	vas_id;	/* specific instance of vas or -1 for default */
+	__u16	reserved1;
+	__u64	flags;
+	__u64	reserved2[6];
+};
+
+#endif /* _UAPI_MISC_VAS_H */
diff --git a/arch/powerpc/kernel/.gitignore b/arch/powerpc/kernel/.gitignore
new file mode 100644
index 0000000000..d71179d3ff
--- /dev/null
+++ b/arch/powerpc/kernel/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+prom_init_check
+vmlinux.lds
diff --git a/arch/powerpc/kernel/85xx_entry_mapping.S b/arch/powerpc/kernel/85xx_entry_mapping.S
new file mode 100644
index 0000000000..dedc17fac8
--- /dev/null
+++ b/arch/powerpc/kernel/85xx_entry_mapping.S
@@ -0,0 +1,230 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* 1. Find the index of the entry we're executing in */
+	bcl	20,31,$+4				/* Find our address */
+invstr:	mflr	r6				/* Make it accessible */
+	mfmsr	r7
+	rlwinm	r4,r7,27,31,31			/* extract MSR[IS] */
+	mfspr	r7, SPRN_PID0
+	slwi	r7,r7,16
+	or	r7,r7,r4
+	mtspr	SPRN_MAS6,r7
+	tlbsx	0,r6				/* search MSR[IS], SPID=PID0 */
+	mfspr	r7,SPRN_MAS1
+	andis.	r7,r7,MAS1_VALID@h
+	bne	match_TLB
+
+	mfspr	r7,SPRN_MMUCFG
+	rlwinm	r7,r7,21,28,31			/* extract MMUCFG[NPIDS] */
+	cmpwi	r7,3
+	bne	match_TLB			/* skip if NPIDS != 3 */
+
+	mfspr	r7,SPRN_PID1
+	slwi	r7,r7,16
+	or	r7,r7,r4
+	mtspr	SPRN_MAS6,r7
+	tlbsx	0,r6				/* search MSR[IS], SPID=PID1 */
+	mfspr	r7,SPRN_MAS1
+	andis.	r7,r7,MAS1_VALID@h
+	bne	match_TLB
+	mfspr	r7, SPRN_PID2
+	slwi	r7,r7,16
+	or	r7,r7,r4
+	mtspr	SPRN_MAS6,r7
+	tlbsx	0,r6				/* Fall through, we had to match */
+
+match_TLB:
+	mfspr	r7,SPRN_MAS0
+	rlwinm	r3,r7,16,20,31			/* Extract MAS0(Entry) */
+
+	mfspr	r7,SPRN_MAS1			/* Insure IPROT set */
+	oris	r7,r7,MAS1_IPROT@h
+	mtspr	SPRN_MAS1,r7
+	tlbwe
+
+/* 2. Invalidate all entries except the entry we're executing in */
+	mfspr	r9,SPRN_TLB1CFG
+	andi.	r9,r9,0xfff
+	li	r6,0				/* Set Entry counter to 0 */
+1:	lis	r7,0x1000			/* Set MAS0(TLBSEL) = 1 */
+	rlwimi	r7,r6,16,4,15			/* Setup MAS0 = TLBSEL | ESEL(r6) */
+	mtspr	SPRN_MAS0,r7
+	tlbre
+	mfspr	r7,SPRN_MAS1
+	rlwinm	r7,r7,0,2,31			/* Clear MAS1 Valid and IPROT */
+	cmpw	r3,r6
+	beq	skpinv				/* Dont update the current execution TLB */
+	mtspr	SPRN_MAS1,r7
+	tlbwe
+	isync
+skpinv:	addi	r6,r6,1				/* Increment */
+	cmpw	r6,r9				/* Are we done? */
+	bne	1b				/* If not, repeat */
+
+	/* Invalidate TLB0 */
+	li	r6,0x04
+	tlbivax 0,r6
+	TLBSYNC
+	/* Invalidate TLB1 */
+	li	r6,0x0c
+	tlbivax 0,r6
+	TLBSYNC
+
+/* 3. Setup a temp mapping and jump to it */
+	andi.	r5, r3, 0x1	/* Find an entry not used and is non-zero */
+	addi	r5, r5, 0x1
+	lis	r7,0x1000	/* Set MAS0(TLBSEL) = 1 */
+	rlwimi	r7,r3,16,4,15	/* Setup MAS0 = TLBSEL | ESEL(r3) */
+	mtspr	SPRN_MAS0,r7
+	tlbre
+
+	/* grab and fixup the RPN */
+	mfspr	r6,SPRN_MAS1	/* extract MAS1[SIZE] */
+	rlwinm	r6,r6,25,27,31
+	li	r8,-1
+	addi	r6,r6,10
+	slw	r6,r8,r6	/* convert to mask */
+
+	bcl	20,31,$+4	/* Find our address */
+1:	mflr	r7
+
+	mfspr	r8,SPRN_MAS3
+#ifdef CONFIG_PHYS_64BIT
+	mfspr	r23,SPRN_MAS7
+#endif
+	and	r8,r6,r8
+	subfic	r9,r6,-4096
+	and	r9,r9,r7
+
+	or	r25,r8,r9
+	ori	r8,r25,(MAS3_SX|MAS3_SW|MAS3_SR)
+
+	/* Just modify the entry ID and EPN for the temp mapping */
+	lis	r7,0x1000	/* Set MAS0(TLBSEL) = 1 */
+	rlwimi	r7,r5,16,4,15	/* Setup MAS0 = TLBSEL | ESEL(r5) */
+	mtspr	SPRN_MAS0,r7
+	xori	r6,r4,1		/* Setup TMP mapping in the other Address space */
+	slwi	r6,r6,12
+	oris	r6,r6,(MAS1_VALID|MAS1_IPROT)@h
+	ori	r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_4K))@l
+	mtspr	SPRN_MAS1,r6
+	mfspr	r6,SPRN_MAS2
+	li	r7,0		/* temp EPN = 0 */
+	rlwimi	r7,r6,0,20,31
+	mtspr	SPRN_MAS2,r7
+	mtspr	SPRN_MAS3,r8
+	tlbwe
+
+	xori	r6,r4,1
+	slwi	r6,r6,5		/* setup new context with other address space */
+	bcl	20,31,$+4	/* Find our address */
+1:	mflr	r9
+	rlwimi	r7,r9,0,20,31
+	addi	r7,r7,(2f - 1b)
+	mtspr	SPRN_SRR0,r7
+	mtspr	SPRN_SRR1,r6
+	rfi
+2:
+/* 4. Clear out PIDs & Search info */
+	li	r6,0
+	mtspr   SPRN_MAS6,r6
+	mtspr	SPRN_PID0,r6
+
+	mfspr	r7,SPRN_MMUCFG
+	rlwinm	r7,r7,21,28,31			/* extract MMUCFG[NPIDS] */
+	cmpwi	r7,3
+	bne	2f				/* skip if NPIDS != 3 */
+
+	mtspr	SPRN_PID1,r6
+	mtspr	SPRN_PID2,r6
+
+/* 5. Invalidate mapping we started in */
+2:
+	lis	r7,0x1000	/* Set MAS0(TLBSEL) = 1 */
+	rlwimi	r7,r3,16,4,15	/* Setup MAS0 = TLBSEL | ESEL(r3) */
+	mtspr	SPRN_MAS0,r7
+	tlbre
+	mfspr	r6,SPRN_MAS1
+	rlwinm	r6,r6,0,2,0	/* clear IPROT */
+	mtspr	SPRN_MAS1,r6
+	tlbwe
+	/* Invalidate TLB1 */
+	li	r9,0x0c
+	tlbivax 0,r9
+	TLBSYNC
+
+#if defined(ENTRY_MAPPING_BOOT_SETUP)
+
+/* 6. Setup kernstart_virt_addr mapping in TLB1[0] */
+	lis	r6,0x1000		/* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */
+	mtspr	SPRN_MAS0,r6
+	lis	r6,(MAS1_VALID|MAS1_IPROT)@h
+	ori	r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
+	mtspr	SPRN_MAS1,r6
+	lis	r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h
+	ori	r6,r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l
+	and	r6,r6,r20
+	ori	r6,r6,MAS2_M_IF_NEEDED@l
+	mtspr	SPRN_MAS2,r6
+	mtspr	SPRN_MAS3,r8
+	tlbwe
+
+/* 7. Jump to kernstart_virt_addr mapping */
+	mr	r6,r20
+
+#elif defined(ENTRY_MAPPING_KEXEC_SETUP)
+/*
+ * 6. Setup a 1:1 mapping in TLB1. Esel 0 is unsued, 1 or 2 contains the tmp
+ * mapping so we start at 3. We setup 8 mappings, each 256MiB in size. This
+ * will cover the first 2GiB of memory.
+ */
+
+	lis r10, (MAS1_VALID|MAS1_IPROT)@h
+	ori r10,r10, (MAS1_TSIZE(BOOK3E_PAGESZ_256M))@l
+	li  r11, 0
+	li  r0, 8
+	mtctr   r0
+
+next_tlb_setup:
+	addi	r0, r11, 3
+	rlwinm  r0, r0, 16, 4, 15  // Compute esel
+	rlwinm  r9, r11, 28, 0, 3   // Compute [ER]PN
+	oris    r0, r0, (MAS0_TLBSEL(1))@h
+	mtspr   SPRN_MAS0,r0
+	mtspr   SPRN_MAS1,r10
+	mtspr   SPRN_MAS2,r9
+	ori r9, r9, (MAS3_SX|MAS3_SW|MAS3_SR)
+	mtspr   SPRN_MAS3,r9
+	tlbwe
+	addi    r11, r11, 1
+	bdnz+   next_tlb_setup
+
+/* 7. Jump to our 1:1 mapping */
+	mr	r6, r25
+#else
+	#error You need to specify the mapping or not use this at all.
+#endif
+
+	lis	r7,MSR_KERNEL@h
+	ori	r7,r7,MSR_KERNEL@l
+	bcl	20,31,$+4		/* Find our address */
+1:	mflr	r9
+	rlwimi	r6,r9,0,20,31
+	addi	r6,r6,(2f - 1b)
+	mtspr	SPRN_SRR0,r6
+	mtspr	SPRN_SRR1,r7
+	rfi				/* start execution out of TLB1[0] entry */
+
+/* 8. Clear out the temp mapping */
+2:	lis	r7,0x1000	/* Set MAS0(TLBSEL) = 1 */
+	rlwimi	r7,r5,16,4,15	/* Setup MAS0 = TLBSEL | ESEL(r5) */
+	mtspr	SPRN_MAS0,r7
+	tlbre
+	mfspr	r8,SPRN_MAS1
+	rlwinm	r8,r8,0,2,0	/* clear IPROT */
+	mtspr	SPRN_MAS1,r8
+	tlbwe
+	/* Invalidate TLB1 */
+	li	r9,0x0c
+	tlbivax 0,r9
+	TLBSYNC
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
new file mode 100644
index 0000000000..2919433be3
--- /dev/null
+++ b/arch/powerpc/kernel/Makefile
@@ -0,0 +1,231 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux kernel.
+#
+
+ifdef CONFIG_PPC64
+CFLAGS_prom_init.o	+= $(NO_MINIMAL_TOC)
+endif
+ifdef CONFIG_PPC32
+CFLAGS_prom_init.o      += -fPIC
+CFLAGS_btext.o		+= -fPIC
+endif
+
+CFLAGS_early_32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+
+CFLAGS_prom_init.o += -fno-stack-protector
+CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_prom_init.o += -ffreestanding
+CFLAGS_prom_init.o += $(call cc-option, -ftrivial-auto-var-init=uninitialized)
+
+ifdef CONFIG_FUNCTION_TRACER
+# Do not trace early boot code
+CFLAGS_REMOVE_cputable.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_prom_init.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_btext.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_prom.o = $(CC_FLAGS_FTRACE)
+endif
+
+KASAN_SANITIZE_early_32.o := n
+KASAN_SANITIZE_cputable.o := n
+KASAN_SANITIZE_prom_init.o := n
+KASAN_SANITIZE_btext.o := n
+KASAN_SANITIZE_paca.o := n
+KASAN_SANITIZE_setup_64.o := n
+KASAN_SANITIZE_mce.o := n
+KASAN_SANITIZE_mce_power.o := n
+KASAN_SANITIZE_udbg.o := n
+KASAN_SANITIZE_udbg_16550.o := n
+
+# we have to be particularly careful in ppc64 to exclude code that
+# runs with translations off, as we cannot access the shadow with
+# translations off. However, ppc32 can sanitize this.
+ifdef CONFIG_PPC64
+KASAN_SANITIZE_traps.o := n
+endif
+
+ifdef CONFIG_KASAN
+CFLAGS_early_32.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
+endif
+
+KCSAN_SANITIZE_early_32.o := n
+KCSAN_SANITIZE_early_64.o := n
+KCSAN_SANITIZE_cputable.o := n
+KCSAN_SANITIZE_btext.o := n
+KCSAN_SANITIZE_paca.o := n
+KCSAN_SANITIZE_setup_64.o := n
+
+#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
+# Remove stack protector to avoid triggering unneeded stack canary
+# checks due to randomize_kstack_offset.
+CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong
+CFLAGS_syscall.o += -fno-stack-protector
+#endif
+
+obj-y				:= cputable.o syscalls.o switch.o \
+				   irq.o align.o signal_$(BITS).o pmc.o vdso.o \
+				   process.o systbl.o idle.o \
+				   signal.o sysfs.o cacheinfo.o time.o \
+				   prom.o traps.o setup-common.o \
+				   udbg.o misc.o io.o misc_$(BITS).o \
+				   of_platform.o prom_parse.o firmware.o \
+				   hw_breakpoint_constraints.o interrupt.o \
+				   kdebugfs.o stacktrace.o syscall.o
+obj-y				+= ptrace/
+obj-$(CONFIG_PPC64)		+= setup_64.o irq_64.o\
+				   paca.o nvram_64.o note.o
+obj-$(CONFIG_PPC32)		+= sys_ppc32.o
+obj-$(CONFIG_COMPAT)		+= sys_ppc32.o signal_32.o
+obj-$(CONFIG_VDSO32)		+= vdso32_wrapper.o
+obj-$(CONFIG_PPC_WATCHDOG)	+= watchdog.o
+obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
+obj-$(CONFIG_PPC_DAWR)		+= dawr.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= mce.o mce_power.o
+obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_64e.o
+obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o
+obj-$(CONFIG_PPC64)		+= vdso64_wrapper.o
+obj-$(CONFIG_ALTIVEC)		+= vecemu.o
+obj-$(CONFIG_PPC_BOOK3S_IDLE)	+= idle_book3s.o
+procfs-y			:= proc_powerpc.o
+obj-$(CONFIG_PROC_FS)		+= $(procfs-y)
+rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI)	:= rtas_pci.o
+obj-$(CONFIG_PPC_RTAS)		+= rtas_entry.o rtas.o rtas-rtc.o $(rtaspci-y-y)
+obj-$(CONFIG_PPC_RTAS_DAEMON)	+= rtasd.o
+obj-$(CONFIG_RTAS_FLASH)	+= rtas_flash.o
+obj-$(CONFIG_RTAS_PROC)		+= rtas-proc.o
+obj-$(CONFIG_PPC_DT_CPU_FTRS)	+= dt_cpu_ftrs.o
+obj-$(CONFIG_EEH)              += eeh.o eeh_pe.o eeh_cache.o \
+				  eeh_driver.o eeh_event.o eeh_sysfs.o
+obj-$(CONFIG_GENERIC_TBSYNC)	+= smp-tbsync.o
+obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
+obj-$(CONFIG_FA_DUMP)		+= fadump.o
+obj-$(CONFIG_PRESERVE_FA_DUMP)	+= fadump.o
+obj-$(CONFIG_PPC_85xx)		+= idle_85xx.o
+obj-$(CONFIG_PPC_BOOK3S_32)	+= idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o
+obj-$(CONFIG_TAU)		+= tau_6xx.o
+obj-$(CONFIG_HIBERNATION)	+= swsusp.o suspend.o
+ifdef CONFIG_PPC_85xx
+obj-$(CONFIG_HIBERNATION)	+= swsusp_85xx.o
+else
+obj-$(CONFIG_HIBERNATION)	+= swsusp_$(BITS).o
+endif
+obj64-$(CONFIG_HIBERNATION)	+= swsusp_asm64.o
+obj-$(CONFIG_MODULES)		+= module.o module_$(BITS).o
+obj-$(CONFIG_44x)		+= cpu_setup_44x.o
+obj-$(CONFIG_PPC_E500)		+= cpu_setup_e500.o
+obj-$(CONFIG_PPC_DOORBELL)	+= dbell.o
+obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o
+
+obj-$(CONFIG_PPC64)		+= head_64.o
+obj-$(CONFIG_PPC_BOOK3S_32)	+= head_book3s_32.o
+obj-$(CONFIG_40x)		+= head_40x.o
+obj-$(CONFIG_44x)		+= head_44x.o
+obj-$(CONFIG_PPC_8xx)		+= head_8xx.o
+obj-$(CONFIG_PPC_85xx)		+= head_85xx.o
+extra-y				+= vmlinux.lds
+
+obj-$(CONFIG_RELOCATABLE)	+= reloc_$(BITS).o
+
+obj-$(CONFIG_PPC32)		+= entry_32.o setup_32.o early_32.o static_call.o
+obj-$(CONFIG_PPC64)		+= dma-iommu.o iommu.o
+obj-$(CONFIG_KGDB)		+= kgdb.o
+obj-$(CONFIG_BOOTX_TEXT)	+= btext.o
+obj-$(CONFIG_SMP)		+= smp.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o
+obj-$(CONFIG_OPTPROBES)		+= optprobes.o optprobes_head.o
+obj-$(CONFIG_KPROBES_ON_FTRACE)	+= kprobes-ftrace.o
+obj-$(CONFIG_UPROBES)		+= uprobes.o
+obj-$(CONFIG_PPC_UDBG_16550)	+= legacy_serial.o udbg_16550.o
+obj-$(CONFIG_SWIOTLB)		+= dma-swiotlb.o
+obj-$(CONFIG_ARCH_HAS_DMA_SET_MASK) += dma-mask.o
+
+pci64-$(CONFIG_PPC64)		+= pci_dn.o pci-hotplug.o isa-bridge.o
+obj-$(CONFIG_PCI)		+= pci_$(BITS).o $(pci64-y) \
+				   pci-common.o pci_of_scan.o
+obj-$(CONFIG_PCI_MSI)		+= msi.o
+
+obj-$(CONFIG_AUDIT)		+= audit.o
+obj64-$(CONFIG_AUDIT)		+= compat_audit.o
+
+obj-$(CONFIG_PPC_IO_WORKAROUNDS)	+= io-workarounds.o
+
+obj-y				+= trace/
+
+ifneq ($(CONFIG_PPC_INDIRECT_PIO),y)
+obj-y				+= iomap.o
+endif
+
+obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM)	+= tm.o
+
+ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE)(CONFIG_PPC_BOOK3S),)
+obj-y				+= ppc_save_regs.o
+endif
+
+obj-$(CONFIG_EPAPR_PARAVIRT)	+= epapr_paravirt.o epapr_hcalls.o
+obj-$(CONFIG_KVM_GUEST)		+= kvm.o kvm_emul.o
+ifneq ($(CONFIG_PPC_POWERNV)$(CONFIG_PPC_SVM),)
+obj-y				+= ucall.o
+endif
+
+obj-$(CONFIG_PPC_SECURE_BOOT)	+= secure_boot.o ima_arch.o secvar-ops.o
+obj-$(CONFIG_PPC_SECVAR_SYSFS)	+= secvar-sysfs.o
+
+# Disable GCOV, KCOV & sanitizers in odd or sensitive code
+GCOV_PROFILE_prom_init.o := n
+KCOV_INSTRUMENT_prom_init.o := n
+KCSAN_SANITIZE_prom_init.o := n
+UBSAN_SANITIZE_prom_init.o := n
+GCOV_PROFILE_kprobes.o := n
+KCOV_INSTRUMENT_kprobes.o := n
+KCSAN_SANITIZE_kprobes.o := n
+UBSAN_SANITIZE_kprobes.o := n
+GCOV_PROFILE_kprobes-ftrace.o := n
+KCOV_INSTRUMENT_kprobes-ftrace.o := n
+KCSAN_SANITIZE_kprobes-ftrace.o := n
+UBSAN_SANITIZE_kprobes-ftrace.o := n
+GCOV_PROFILE_syscall_64.o := n
+KCOV_INSTRUMENT_syscall_64.o := n
+UBSAN_SANITIZE_syscall_64.o := n
+UBSAN_SANITIZE_vdso.o := n
+
+# Necessary for booting with kcov enabled on book3e machines
+KCOV_INSTRUMENT_cputable.o := n
+KCOV_INSTRUMENT_setup_64.o := n
+KCOV_INSTRUMENT_paca.o := n
+
+CFLAGS_setup_64.o		+= -fno-stack-protector
+CFLAGS_paca.o			+= -fno-stack-protector
+
+obj-$(CONFIG_PPC_FPU)		+= fpu.o
+obj-$(CONFIG_ALTIVEC)		+= vector.o
+
+obj-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o
+obj64-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_entry_64.o
+extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check
+
+obj-$(CONFIG_PPC64)		+= $(obj64-y)
+obj-$(CONFIG_PPC32)		+= $(obj32-y)
+
+quiet_cmd_prom_init_check = PROMCHK $@
+      cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" $(obj)/prom_init.o; touch $@
+
+$(obj)/prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o FORCE
+	$(call if_changed,prom_init_check)
+targets += prom_init_check
+
+clean-files := vmlinux.lds
+
+# Force dependency (incbin is bad)
+$(obj)/vdso32_wrapper.o : $(obj)/vdso/vdso32.so.dbg
+$(obj)/vdso64_wrapper.o : $(obj)/vdso/vdso64.so.dbg
+
+# for cleaning
+subdir- += vdso
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
new file mode 100644
index 0000000000..3e37ece067
--- /dev/null
+++ b/arch/powerpc/kernel/align.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* align.c - handle alignment exceptions for the Power PC.
+ *
+ * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ * Copyright (c) 1998-1999 TiVo, Inc.
+ *   PowerPC 403GCX modifications.
+ * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
+ *   PowerPC 403GCX/405GP modifications.
+ * Copyright (c) 2001-2002 PPC64 team, IBM Corp
+ *   64-bit and Power4 support
+ * Copyright (c) 2005 Benjamin Herrenschmidt, IBM Corp
+ *                    <benh@kernel.crashing.org>
+ *   Merge ppc32 and ppc64 implementations
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <asm/processor.h>
+#include <linux/uaccess.h>
+#include <asm/cache.h>
+#include <asm/cputable.h>
+#include <asm/emulated_ops.h>
+#include <asm/switch_to.h>
+#include <asm/disassemble.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/sstep.h>
+#include <asm/inst.h>
+
+struct aligninfo {
+	unsigned char len;
+	unsigned char flags;
+};
+
+
+#define INVALID	{ 0, 0 }
+
+/* Bits in the flags field */
+#define LD	0	/* load */
+#define ST	1	/* store */
+#define SE	2	/* sign-extend value, or FP ld/st as word */
+#define SW	0x20	/* byte swap */
+#define E4	0x40	/* SPE endianness is word */
+#define E8	0x80	/* SPE endianness is double word */
+
+#ifdef CONFIG_SPE
+
+static struct aligninfo spe_aligninfo[32] = {
+	{ 8, LD+E8 },		/* 0 00 00: evldd[x] */
+	{ 8, LD+E4 },		/* 0 00 01: evldw[x] */
+	{ 8, LD },		/* 0 00 10: evldh[x] */
+	INVALID,		/* 0 00 11 */
+	{ 2, LD },		/* 0 01 00: evlhhesplat[x] */
+	INVALID,		/* 0 01 01 */
+	{ 2, LD },		/* 0 01 10: evlhhousplat[x] */
+	{ 2, LD+SE },		/* 0 01 11: evlhhossplat[x] */
+	{ 4, LD },		/* 0 10 00: evlwhe[x] */
+	INVALID,		/* 0 10 01 */
+	{ 4, LD },		/* 0 10 10: evlwhou[x] */
+	{ 4, LD+SE },		/* 0 10 11: evlwhos[x] */
+	{ 4, LD+E4 },		/* 0 11 00: evlwwsplat[x] */
+	INVALID,		/* 0 11 01 */
+	{ 4, LD },		/* 0 11 10: evlwhsplat[x] */
+	INVALID,		/* 0 11 11 */
+
+	{ 8, ST+E8 },		/* 1 00 00: evstdd[x] */
+	{ 8, ST+E4 },		/* 1 00 01: evstdw[x] */
+	{ 8, ST },		/* 1 00 10: evstdh[x] */
+	INVALID,		/* 1 00 11 */
+	INVALID,		/* 1 01 00 */
+	INVALID,		/* 1 01 01 */
+	INVALID,		/* 1 01 10 */
+	INVALID,		/* 1 01 11 */
+	{ 4, ST },		/* 1 10 00: evstwhe[x] */
+	INVALID,		/* 1 10 01 */
+	{ 4, ST },		/* 1 10 10: evstwho[x] */
+	INVALID,		/* 1 10 11 */
+	{ 4, ST+E4 },		/* 1 11 00: evstwwe[x] */
+	INVALID,		/* 1 11 01 */
+	{ 4, ST+E4 },		/* 1 11 10: evstwwo[x] */
+	INVALID,		/* 1 11 11 */
+};
+
+#define	EVLDD		0x00
+#define	EVLDW		0x01
+#define	EVLDH		0x02
+#define	EVLHHESPLAT	0x04
+#define	EVLHHOUSPLAT	0x06
+#define	EVLHHOSSPLAT	0x07
+#define	EVLWHE		0x08
+#define	EVLWHOU		0x0A
+#define	EVLWHOS		0x0B
+#define	EVLWWSPLAT	0x0C
+#define	EVLWHSPLAT	0x0E
+#define	EVSTDD		0x10
+#define	EVSTDW		0x11
+#define	EVSTDH		0x12
+#define	EVSTWHE		0x18
+#define	EVSTWHO		0x1A
+#define	EVSTWWE		0x1C
+#define	EVSTWWO		0x1E
+
+/*
+ * Emulate SPE loads and stores.
+ * Only Book-E has these instructions, and it does true little-endian,
+ * so we don't need the address swizzling.
+ */
+static int emulate_spe(struct pt_regs *regs, unsigned int reg,
+		       ppc_inst_t ppc_instr)
+{
+	union {
+		u64 ll;
+		u32 w[2];
+		u16 h[4];
+		u8 v[8];
+	} data, temp;
+	unsigned char __user *p, *addr;
+	unsigned long *evr = &current->thread.evr[reg];
+	unsigned int nb, flags, instr;
+
+	instr = ppc_inst_val(ppc_instr);
+	instr = (instr >> 1) & 0x1f;
+
+	/* DAR has the operand effective address */
+	addr = (unsigned char __user *)regs->dar;
+
+	nb = spe_aligninfo[instr].len;
+	flags = spe_aligninfo[instr].flags;
+
+	/* userland only */
+	if (unlikely(!user_mode(regs)))
+		return 0;
+
+	flush_spe_to_thread(current);
+
+	/* If we are loading, get the data from user space, else
+	 * get it from register values
+	 */
+	if (flags & ST) {
+		data.ll = 0;
+		switch (instr) {
+		case EVSTDD:
+		case EVSTDW:
+		case EVSTDH:
+			data.w[0] = *evr;
+			data.w[1] = regs->gpr[reg];
+			break;
+		case EVSTWHE:
+			data.h[2] = *evr >> 16;
+			data.h[3] = regs->gpr[reg] >> 16;
+			break;
+		case EVSTWHO:
+			data.h[2] = *evr & 0xffff;
+			data.h[3] = regs->gpr[reg] & 0xffff;
+			break;
+		case EVSTWWE:
+			data.w[1] = *evr;
+			break;
+		case EVSTWWO:
+			data.w[1] = regs->gpr[reg];
+			break;
+		default:
+			return -EINVAL;
+		}
+	} else {
+		temp.ll = data.ll = 0;
+		p = addr;
+
+		if (!user_read_access_begin(addr, nb))
+			return -EFAULT;
+
+		switch (nb) {
+		case 8:
+			unsafe_get_user(temp.v[0], p++, Efault_read);
+			unsafe_get_user(temp.v[1], p++, Efault_read);
+			unsafe_get_user(temp.v[2], p++, Efault_read);
+			unsafe_get_user(temp.v[3], p++, Efault_read);
+			fallthrough;
+		case 4:
+			unsafe_get_user(temp.v[4], p++, Efault_read);
+			unsafe_get_user(temp.v[5], p++, Efault_read);
+			fallthrough;
+		case 2:
+			unsafe_get_user(temp.v[6], p++, Efault_read);
+			unsafe_get_user(temp.v[7], p++, Efault_read);
+		}
+		user_read_access_end();
+
+		switch (instr) {
+		case EVLDD:
+		case EVLDW:
+		case EVLDH:
+			data.ll = temp.ll;
+			break;
+		case EVLHHESPLAT:
+			data.h[0] = temp.h[3];
+			data.h[2] = temp.h[3];
+			break;
+		case EVLHHOUSPLAT:
+		case EVLHHOSSPLAT:
+			data.h[1] = temp.h[3];
+			data.h[3] = temp.h[3];
+			break;
+		case EVLWHE:
+			data.h[0] = temp.h[2];
+			data.h[2] = temp.h[3];
+			break;
+		case EVLWHOU:
+		case EVLWHOS:
+			data.h[1] = temp.h[2];
+			data.h[3] = temp.h[3];
+			break;
+		case EVLWWSPLAT:
+			data.w[0] = temp.w[1];
+			data.w[1] = temp.w[1];
+			break;
+		case EVLWHSPLAT:
+			data.h[0] = temp.h[2];
+			data.h[1] = temp.h[2];
+			data.h[2] = temp.h[3];
+			data.h[3] = temp.h[3];
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	if (flags & SW) {
+		switch (flags & 0xf0) {
+		case E8:
+			data.ll = swab64(data.ll);
+			break;
+		case E4:
+			data.w[0] = swab32(data.w[0]);
+			data.w[1] = swab32(data.w[1]);
+			break;
+		/* Its half word endian */
+		default:
+			data.h[0] = swab16(data.h[0]);
+			data.h[1] = swab16(data.h[1]);
+			data.h[2] = swab16(data.h[2]);
+			data.h[3] = swab16(data.h[3]);
+			break;
+		}
+	}
+
+	if (flags & SE) {
+		data.w[0] = (s16)data.h[1];
+		data.w[1] = (s16)data.h[3];
+	}
+
+	/* Store result to memory or update registers */
+	if (flags & ST) {
+		p = addr;
+
+		if (!user_write_access_begin(addr, nb))
+			return -EFAULT;
+
+		switch (nb) {
+		case 8:
+			unsafe_put_user(data.v[0], p++, Efault_write);
+			unsafe_put_user(data.v[1], p++, Efault_write);
+			unsafe_put_user(data.v[2], p++, Efault_write);
+			unsafe_put_user(data.v[3], p++, Efault_write);
+			fallthrough;
+		case 4:
+			unsafe_put_user(data.v[4], p++, Efault_write);
+			unsafe_put_user(data.v[5], p++, Efault_write);
+			fallthrough;
+		case 2:
+			unsafe_put_user(data.v[6], p++, Efault_write);
+			unsafe_put_user(data.v[7], p++, Efault_write);
+		}
+		user_write_access_end();
+	} else {
+		*evr = data.w[0];
+		regs->gpr[reg] = data.w[1];
+	}
+
+	return 1;
+
+Efault_read:
+	user_read_access_end();
+	return -EFAULT;
+
+Efault_write:
+	user_write_access_end();
+	return -EFAULT;
+}
+#endif /* CONFIG_SPE */
+
+/*
+ * Called on alignment exception. Attempts to fixup
+ *
+ * Return 1 on success
+ * Return 0 if unable to handle the interrupt
+ * Return -EFAULT if data address is bad
+ * Other negative return values indicate that the instruction can't
+ * be emulated, and the process should be given a SIGBUS.
+ */
+
+int fix_alignment(struct pt_regs *regs)
+{
+	ppc_inst_t instr;
+	struct instruction_op op;
+	int r, type;
+
+	if (is_kernel_addr(regs->nip))
+		r = copy_inst_from_kernel_nofault(&instr, (void *)regs->nip);
+	else
+		r = __get_user_instr(instr, (void __user *)regs->nip);
+
+	if (unlikely(r))
+		return -EFAULT;
+	if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
+		/* We don't handle PPC little-endian any more... */
+		if (cpu_has_feature(CPU_FTR_PPC_LE))
+			return -EIO;
+		instr = ppc_inst_swab(instr);
+	}
+
+#ifdef CONFIG_SPE
+	if (ppc_inst_primary_opcode(instr) == 0x4) {
+		int reg = (ppc_inst_val(instr) >> 21) & 0x1f;
+		PPC_WARN_ALIGNMENT(spe, regs);
+		return emulate_spe(regs, reg, instr);
+	}
+#endif
+
+
+	/*
+	 * ISA 3.0 (such as P9) copy, copy_first, paste and paste_last alignment
+	 * check.
+	 *
+	 * Send a SIGBUS to the process that caused the fault.
+	 *
+	 * We do not emulate these because paste may contain additional metadata
+	 * when pasting to a co-processor. Furthermore, paste_last is the
+	 * synchronisation point for preceding copy/paste sequences.
+	 */
+	if ((ppc_inst_val(instr) & 0xfc0006fe) == (PPC_INST_COPY & 0xfc0006fe))
+		return -EIO;
+
+	r = analyse_instr(&op, regs, instr);
+	if (r < 0)
+		return -EINVAL;
+
+	type = GETTYPE(op.type);
+	if (!OP_IS_LOAD_STORE(type)) {
+		if (op.type != CACHEOP + DCBZ)
+			return -EINVAL;
+		PPC_WARN_ALIGNMENT(dcbz, regs);
+		WARN_ON_ONCE(!user_mode(regs));
+		r = emulate_dcbz(op.ea, regs);
+	} else {
+		if (type == LARX || type == STCX)
+			return -EIO;
+		PPC_WARN_ALIGNMENT(unaligned, regs);
+		r = emulate_loadstore(regs, &op);
+	}
+
+	if (!r)
+		return 1;
+	return r;
+}
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
new file mode 100644
index 0000000000..9f14d95b8b
--- /dev/null
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -0,0 +1,680 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This program is used to generate definitions needed by
+ * assembly language modules.
+ *
+ * We use the technique used in the OSF Mach kernel code:
+ * generate asm statements containing #defines,
+ * compile this file to assembler, and then extract the
+ * #defines from the assembly-language output.
+ */
+
+#include <linux/compat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/suspend.h>
+#include <linux/hrtimer.h>
+#ifdef CONFIG_PPC64
+#include <linux/time.h>
+#include <linux/hardirq.h>
+#endif
+#include <linux/kbuild.h>
+
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/rtas.h>
+#include <asm/vdso_datapage.h>
+#include <asm/dbell.h>
+#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+#include <asm/lppaca.h>
+#include <asm/cache.h>
+#include <asm/mmu.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#endif
+#ifdef CONFIG_PPC_POWERNV
+#include <asm/opal.h>
+#endif
+#if defined(CONFIG_KVM) || defined(CONFIG_KVM_GUEST)
+#include <linux/kvm_host.h>
+#endif
+#if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S)
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#endif
+
+#ifdef CONFIG_PPC32
+#ifdef CONFIG_BOOKE_OR_40x
+#include "head_booke.h"
+#endif
+#endif
+
+#if defined(CONFIG_PPC_E500)
+#include "../mm/mmu_decl.h"
+#endif
+
+#ifdef CONFIG_PPC_8xx
+#include <asm/fixmap.h>
+#endif
+
+#ifdef CONFIG_XMON
+#include "../xmon/xmon_bpts.h"
+#endif
+
+#define STACK_PT_REGS_OFFSET(sym, val)	\
+	DEFINE(sym, STACK_INT_FRAME_REGS + offsetof(struct pt_regs, val))
+
+int main(void)
+{
+	OFFSET(THREAD, task_struct, thread);
+	OFFSET(MM, task_struct, mm);
+#ifdef CONFIG_STACKPROTECTOR
+	OFFSET(TASK_CANARY, task_struct, stack_canary);
+#ifdef CONFIG_PPC64
+	OFFSET(PACA_CANARY, paca_struct, canary);
+#endif
+#endif
+#ifdef CONFIG_PPC32
+#ifdef CONFIG_PPC_RTAS
+	OFFSET(RTAS_SP, thread_struct, rtas_sp);
+#endif
+#endif /* CONFIG_PPC64 */
+	OFFSET(TASK_STACK, task_struct, stack);
+#ifdef CONFIG_SMP
+	OFFSET(TASK_CPU, task_struct, thread_info.cpu);
+#endif
+
+#ifdef CONFIG_LIVEPATCH_64
+	OFFSET(TI_livepatch_sp, thread_info, livepatch_sp);
+#endif
+
+	OFFSET(KSP, thread_struct, ksp);
+	OFFSET(PT_REGS, thread_struct, regs);
+#ifdef CONFIG_BOOKE
+	OFFSET(THREAD_NORMSAVES, thread_struct, normsave[0]);
+#endif
+#ifdef CONFIG_PPC_FPU
+	OFFSET(THREAD_FPEXC_MODE, thread_struct, fpexc_mode);
+	OFFSET(THREAD_FPSTATE, thread_struct, fp_state.fpr);
+	OFFSET(THREAD_FPSAVEAREA, thread_struct, fp_save_area);
+#endif
+	OFFSET(FPSTATE_FPSCR, thread_fp_state, fpscr);
+	OFFSET(THREAD_LOAD_FP, thread_struct, load_fp);
+#ifdef CONFIG_ALTIVEC
+	OFFSET(THREAD_VRSTATE, thread_struct, vr_state.vr);
+	OFFSET(THREAD_VRSAVEAREA, thread_struct, vr_save_area);
+	OFFSET(THREAD_USED_VR, thread_struct, used_vr);
+	OFFSET(VRSTATE_VSCR, thread_vr_state, vscr);
+	OFFSET(THREAD_LOAD_VEC, thread_struct, load_vec);
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+	OFFSET(THREAD_USED_VSR, thread_struct, used_vsr);
+#endif /* CONFIG_VSX */
+#ifdef CONFIG_PPC64
+	OFFSET(KSP_VSID, thread_struct, ksp_vsid);
+#else /* CONFIG_PPC64 */
+	OFFSET(PGDIR, thread_struct, pgdir);
+	OFFSET(SRR0, thread_struct, srr0);
+	OFFSET(SRR1, thread_struct, srr1);
+	OFFSET(DAR, thread_struct, dar);
+	OFFSET(DSISR, thread_struct, dsisr);
+#ifdef CONFIG_PPC_BOOK3S_32
+	OFFSET(THR0, thread_struct, r0);
+	OFFSET(THR3, thread_struct, r3);
+	OFFSET(THR4, thread_struct, r4);
+	OFFSET(THR5, thread_struct, r5);
+	OFFSET(THR6, thread_struct, r6);
+	OFFSET(THR8, thread_struct, r8);
+	OFFSET(THR9, thread_struct, r9);
+	OFFSET(THR11, thread_struct, r11);
+	OFFSET(THLR, thread_struct, lr);
+	OFFSET(THCTR, thread_struct, ctr);
+	OFFSET(THSR0, thread_struct, sr0);
+#endif
+#ifdef CONFIG_SPE
+	OFFSET(THREAD_EVR0, thread_struct, evr[0]);
+	OFFSET(THREAD_ACC, thread_struct, acc);
+	OFFSET(THREAD_USED_SPE, thread_struct, used_spe);
+#endif /* CONFIG_SPE */
+#endif /* CONFIG_PPC64 */
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+	OFFSET(THREAD_KVM_SVCPU, thread_struct, kvm_shadow_vcpu);
+#endif
+#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
+	OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu);
+#endif
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	OFFSET(PACATMSCRATCH, paca_struct, tm_scratch);
+	OFFSET(THREAD_TM_TFHAR, thread_struct, tm_tfhar);
+	OFFSET(THREAD_TM_TEXASR, thread_struct, tm_texasr);
+	OFFSET(THREAD_TM_TFIAR, thread_struct, tm_tfiar);
+	OFFSET(THREAD_TM_TAR, thread_struct, tm_tar);
+	OFFSET(THREAD_TM_PPR, thread_struct, tm_ppr);
+	OFFSET(THREAD_TM_DSCR, thread_struct, tm_dscr);
+	OFFSET(THREAD_TM_AMR, thread_struct, tm_amr);
+	OFFSET(PT_CKPT_REGS, thread_struct, ckpt_regs);
+	OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state.vr);
+	OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave);
+	OFFSET(THREAD_CKFPSTATE, thread_struct, ckfp_state.fpr);
+	/* Local pt_regs on stack in int frame form, plus 16 bytes for TM */
+	DEFINE(TM_FRAME_SIZE, STACK_INT_FRAME_SIZE + 16);
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+	OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags);
+
+#ifdef CONFIG_PPC64
+	OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size);
+	OFFSET(DCACHEL1LOGBLOCKSIZE, ppc64_caches, l1d.log_block_size);
+	/* paca */
+	OFFSET(PACAPACAINDEX, paca_struct, paca_index);
+	OFFSET(PACAPROCSTART, paca_struct, cpu_start);
+	OFFSET(PACAKSAVE, paca_struct, kstack);
+	OFFSET(PACACURRENT, paca_struct, __current);
+	DEFINE(PACA_THREAD_INFO, offsetof(struct paca_struct, __current) +
+				 offsetof(struct task_struct, thread_info));
+	OFFSET(PACASAVEDMSR, paca_struct, saved_msr);
+	OFFSET(PACAR1, paca_struct, saved_r1);
+#ifndef CONFIG_PPC_KERNEL_PCREL
+	OFFSET(PACATOC, paca_struct, kernel_toc);
+#endif
+	OFFSET(PACAKBASE, paca_struct, kernelbase);
+	OFFSET(PACAKMSR, paca_struct, kernel_msr);
+#ifdef CONFIG_PPC_BOOK3S_64
+	OFFSET(PACAHSRR_VALID, paca_struct, hsrr_valid);
+	OFFSET(PACASRR_VALID, paca_struct, srr_valid);
+#endif
+	OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask);
+	OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened);
+	OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled);
+
+#ifdef CONFIG_PPC_BOOK3E_64
+	OFFSET(PACAPGD, paca_struct, pgd);
+	OFFSET(PACA_KERNELPGD, paca_struct, kernel_pgd);
+	OFFSET(PACA_EXGEN, paca_struct, exgen);
+	OFFSET(PACA_EXTLB, paca_struct, extlb);
+	OFFSET(PACA_EXMC, paca_struct, exmc);
+	OFFSET(PACA_EXCRIT, paca_struct, excrit);
+	OFFSET(PACA_EXDBG, paca_struct, exdbg);
+	OFFSET(PACA_MC_STACK, paca_struct, mc_kstack);
+	OFFSET(PACA_CRIT_STACK, paca_struct, crit_kstack);
+	OFFSET(PACA_DBG_STACK, paca_struct, dbg_kstack);
+	OFFSET(PACA_TCD_PTR, paca_struct, tcd_ptr);
+
+	OFFSET(TCD_ESEL_NEXT, tlb_core_data, esel_next);
+	OFFSET(TCD_ESEL_MAX, tlb_core_data, esel_max);
+	OFFSET(TCD_ESEL_FIRST, tlb_core_data, esel_first);
+#endif /* CONFIG_PPC_BOOK3E_64 */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	OFFSET(PACA_EXGEN, paca_struct, exgen);
+	OFFSET(PACA_EXMC, paca_struct, exmc);
+	OFFSET(PACA_EXNMI, paca_struct, exnmi);
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);
+	OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);
+	OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid);
+	OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area);
+#endif
+	OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use);
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use);
+#endif
+	OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count);
+#endif /* CONFIG_PPC_BOOK3S_64 */
+	OFFSET(PACAEMERGSP, paca_struct, emergency_sp);
+#ifdef CONFIG_PPC_BOOK3S_64
+	OFFSET(PACAMCEMERGSP, paca_struct, mc_emergency_sp);
+	OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
+	OFFSET(PACA_IN_MCE, paca_struct, in_mce);
+	OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
+	OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
+	OFFSET(PACA_EXRFI, paca_struct, exrfi);
+	OFFSET(PACA_L1D_FLUSH_SIZE, paca_struct, l1d_flush_size);
+
+#endif
+	OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
+	OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
+	OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default);
+#ifdef CONFIG_PPC64
+	OFFSET(PACA_EXIT_SAVE_R1, paca_struct, exit_save_r1);
+#endif
+#ifdef CONFIG_PPC_BOOK3E_64
+	OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
+#endif
+	OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso);
+#else /* CONFIG_PPC64 */
+#endif /* CONFIG_PPC64 */
+
+	/* RTAS */
+	OFFSET(RTASBASE, rtas_t, base);
+	OFFSET(RTASENTRY, rtas_t, entry);
+
+	/* Interrupt register frame */
+	DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
+	DEFINE(SWITCH_FRAME_SIZE, STACK_SWITCH_FRAME_SIZE);
+	STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
+	STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
+	STACK_PT_REGS_OFFSET(GPR2, gpr[2]);
+	STACK_PT_REGS_OFFSET(GPR3, gpr[3]);
+	STACK_PT_REGS_OFFSET(GPR4, gpr[4]);
+	STACK_PT_REGS_OFFSET(GPR5, gpr[5]);
+	STACK_PT_REGS_OFFSET(GPR6, gpr[6]);
+	STACK_PT_REGS_OFFSET(GPR7, gpr[7]);
+	STACK_PT_REGS_OFFSET(GPR8, gpr[8]);
+	STACK_PT_REGS_OFFSET(GPR9, gpr[9]);
+	STACK_PT_REGS_OFFSET(GPR10, gpr[10]);
+	STACK_PT_REGS_OFFSET(GPR11, gpr[11]);
+	STACK_PT_REGS_OFFSET(GPR12, gpr[12]);
+	STACK_PT_REGS_OFFSET(GPR13, gpr[13]);
+	/*
+	 * Note: these symbols include _ because they overlap with special
+	 * register names
+	 */
+	STACK_PT_REGS_OFFSET(_NIP, nip);
+	STACK_PT_REGS_OFFSET(_MSR, msr);
+	STACK_PT_REGS_OFFSET(_CTR, ctr);
+	STACK_PT_REGS_OFFSET(_LINK, link);
+	STACK_PT_REGS_OFFSET(_CCR, ccr);
+	STACK_PT_REGS_OFFSET(_XER, xer);
+	STACK_PT_REGS_OFFSET(_DAR, dar);
+	STACK_PT_REGS_OFFSET(_DEAR, dear);
+	STACK_PT_REGS_OFFSET(_DSISR, dsisr);
+	STACK_PT_REGS_OFFSET(_ESR, esr);
+	STACK_PT_REGS_OFFSET(ORIG_GPR3, orig_gpr3);
+	STACK_PT_REGS_OFFSET(RESULT, result);
+	STACK_PT_REGS_OFFSET(_TRAP, trap);
+#ifdef CONFIG_PPC64
+	STACK_PT_REGS_OFFSET(SOFTE, softe);
+	STACK_PT_REGS_OFFSET(_PPR, ppr);
+#endif
+
+#ifdef CONFIG_PPC_PKEY
+	STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr);
+	STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr);
+#endif
+
+#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE)
+	STACK_PT_REGS_OFFSET(MAS0, mas0);
+	/* we overload MMUCR for 44x on MAS0 since they are mutually exclusive */
+	STACK_PT_REGS_OFFSET(MMUCR, mas0);
+	STACK_PT_REGS_OFFSET(MAS1, mas1);
+	STACK_PT_REGS_OFFSET(MAS2, mas2);
+	STACK_PT_REGS_OFFSET(MAS3, mas3);
+	STACK_PT_REGS_OFFSET(MAS6, mas6);
+	STACK_PT_REGS_OFFSET(MAS7, mas7);
+	STACK_PT_REGS_OFFSET(_SRR0, srr0);
+	STACK_PT_REGS_OFFSET(_SRR1, srr1);
+	STACK_PT_REGS_OFFSET(_CSRR0, csrr0);
+	STACK_PT_REGS_OFFSET(_CSRR1, csrr1);
+	STACK_PT_REGS_OFFSET(_DSRR0, dsrr0);
+	STACK_PT_REGS_OFFSET(_DSRR1, dsrr1);
+#endif
+
+	/* About the CPU features table */
+	OFFSET(CPU_SPEC_FEATURES, cpu_spec, cpu_features);
+	OFFSET(CPU_SPEC_SETUP, cpu_spec, cpu_setup);
+	OFFSET(CPU_SPEC_RESTORE, cpu_spec, cpu_restore);
+
+	OFFSET(pbe_address, pbe, address);
+	OFFSET(pbe_orig_address, pbe, orig_address);
+	OFFSET(pbe_next, pbe, next);
+
+#ifndef CONFIG_PPC64
+	DEFINE(TASK_SIZE, TASK_SIZE);
+	DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28);
+#endif /* ! CONFIG_PPC64 */
+
+	/* datapage offsets for use by vdso */
+	OFFSET(VDSO_DATA_OFFSET, vdso_arch_data, data);
+	OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec);
+#ifdef CONFIG_PPC64
+	OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size);
+	OFFSET(CFG_DCACHE_BLOCKSZ, vdso_arch_data, dcache_block_size);
+	OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_arch_data, icache_log_block_size);
+	OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_arch_data, dcache_log_block_size);
+	OFFSET(CFG_SYSCALL_MAP64, vdso_arch_data, syscall_map);
+	OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, compat_syscall_map);
+#else
+	OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, syscall_map);
+#endif
+
+#ifdef CONFIG_BUG
+	DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
+#endif
+
+#ifdef CONFIG_KVM
+	OFFSET(VCPU_HOST_STACK, kvm_vcpu, arch.host_stack);
+	OFFSET(VCPU_HOST_PID, kvm_vcpu, arch.host_pid);
+	OFFSET(VCPU_GUEST_PID, kvm_vcpu, arch.pid);
+	OFFSET(VCPU_GPRS, kvm_vcpu, arch.regs.gpr);
+	OFFSET(VCPU_VRSAVE, kvm_vcpu, arch.vrsave);
+	OFFSET(VCPU_FPRS, kvm_vcpu, arch.fp.fpr);
+#ifdef CONFIG_ALTIVEC
+	OFFSET(VCPU_VRS, kvm_vcpu, arch.vr.vr);
+#endif
+	OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer);
+	OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr);
+	OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link);
+#ifdef CONFIG_PPC_BOOK3S
+	OFFSET(VCPU_TAR, kvm_vcpu, arch.tar);
+#endif
+	OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
+	OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip);
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr);
+	OFFSET(VCPU_SRR0, kvm_vcpu, arch.shregs.srr0);
+	OFFSET(VCPU_SRR1, kvm_vcpu, arch.shregs.srr1);
+	OFFSET(VCPU_SPRG0, kvm_vcpu, arch.shregs.sprg0);
+	OFFSET(VCPU_SPRG1, kvm_vcpu, arch.shregs.sprg1);
+	OFFSET(VCPU_SPRG2, kvm_vcpu, arch.shregs.sprg2);
+	OFFSET(VCPU_SPRG3, kvm_vcpu, arch.shregs.sprg3);
+#endif
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+	OFFSET(VCPU_TB_RMENTRY, kvm_vcpu, arch.rm_entry);
+	OFFSET(VCPU_TB_RMINTR, kvm_vcpu, arch.rm_intr);
+	OFFSET(VCPU_TB_RMEXIT, kvm_vcpu, arch.rm_exit);
+	OFFSET(VCPU_TB_GUEST, kvm_vcpu, arch.guest_time);
+	OFFSET(VCPU_TB_CEDE, kvm_vcpu, arch.cede_time);
+	OFFSET(VCPU_CUR_ACTIVITY, kvm_vcpu, arch.cur_activity);
+	OFFSET(VCPU_ACTIVITY_START, kvm_vcpu, arch.cur_tb_start);
+	OFFSET(TAS_SEQCOUNT, kvmhv_tb_accumulator, seqcount);
+	OFFSET(TAS_TOTAL, kvmhv_tb_accumulator, tb_total);
+	OFFSET(TAS_MIN, kvmhv_tb_accumulator, tb_min);
+	OFFSET(TAS_MAX, kvmhv_tb_accumulator, tb_max);
+#endif
+	OFFSET(VCPU_SHARED_SPRG3, kvm_vcpu_arch_shared, sprg3);
+	OFFSET(VCPU_SHARED_SPRG4, kvm_vcpu_arch_shared, sprg4);
+	OFFSET(VCPU_SHARED_SPRG5, kvm_vcpu_arch_shared, sprg5);
+	OFFSET(VCPU_SHARED_SPRG6, kvm_vcpu_arch_shared, sprg6);
+	OFFSET(VCPU_SHARED_SPRG7, kvm_vcpu_arch_shared, sprg7);
+	OFFSET(VCPU_SHADOW_PID, kvm_vcpu, arch.shadow_pid);
+	OFFSET(VCPU_SHADOW_PID1, kvm_vcpu, arch.shadow_pid1);
+	OFFSET(VCPU_SHARED, kvm_vcpu, arch.shared);
+	OFFSET(VCPU_SHARED_MSR, kvm_vcpu_arch_shared, msr);
+	OFFSET(VCPU_SHADOW_MSR, kvm_vcpu, arch.shadow_msr);
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
+	OFFSET(VCPU_SHAREDBE, kvm_vcpu, arch.shared_big_endian);
+#endif
+
+	OFFSET(VCPU_SHARED_MAS0, kvm_vcpu_arch_shared, mas0);
+	OFFSET(VCPU_SHARED_MAS1, kvm_vcpu_arch_shared, mas1);
+	OFFSET(VCPU_SHARED_MAS2, kvm_vcpu_arch_shared, mas2);
+	OFFSET(VCPU_SHARED_MAS7_3, kvm_vcpu_arch_shared, mas7_3);
+	OFFSET(VCPU_SHARED_MAS4, kvm_vcpu_arch_shared, mas4);
+	OFFSET(VCPU_SHARED_MAS6, kvm_vcpu_arch_shared, mas6);
+
+	OFFSET(VCPU_KVM, kvm_vcpu, kvm);
+	OFFSET(KVM_LPID, kvm, arch.lpid);
+
+	/* book3s */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	OFFSET(KVM_SDR1, kvm, arch.sdr1);
+	OFFSET(KVM_HOST_LPID, kvm, arch.host_lpid);
+	OFFSET(KVM_HOST_LPCR, kvm, arch.host_lpcr);
+	OFFSET(KVM_HOST_SDR1, kvm, arch.host_sdr1);
+	OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls);
+	OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
+	OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest);
+	OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
+	OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
+	OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
+	OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty);
+	OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst);
+	OFFSET(VCPU_CPU, kvm_vcpu, cpu);
+	OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu);
+#endif
+#ifdef CONFIG_PPC_BOOK3S
+	OFFSET(VCPU_PURR, kvm_vcpu, arch.purr);
+	OFFSET(VCPU_SPURR, kvm_vcpu, arch.spurr);
+	OFFSET(VCPU_IC, kvm_vcpu, arch.ic);
+	OFFSET(VCPU_DSCR, kvm_vcpu, arch.dscr);
+	OFFSET(VCPU_AMR, kvm_vcpu, arch.amr);
+	OFFSET(VCPU_UAMOR, kvm_vcpu, arch.uamor);
+	OFFSET(VCPU_IAMR, kvm_vcpu, arch.iamr);
+	OFFSET(VCPU_CTRL, kvm_vcpu, arch.ctrl);
+	OFFSET(VCPU_DABR, kvm_vcpu, arch.dabr);
+	OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx);
+	OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0);
+	OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0);
+	OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr);
+	OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags);
+	OFFSET(VCPU_DEC_EXPIRES, kvm_vcpu, arch.dec_expires);
+	OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
+	OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
+	OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
+	OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
+	OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra);
+	OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs);
+	OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
+	OFFSET(VCPU_SIAR, kvm_vcpu, arch.siar);
+	OFFSET(VCPU_SDAR, kvm_vcpu, arch.sdar);
+	OFFSET(VCPU_SIER, kvm_vcpu, arch.sier);
+	OFFSET(VCPU_SLB, kvm_vcpu, arch.slb);
+	OFFSET(VCPU_SLB_MAX, kvm_vcpu, arch.slb_max);
+	OFFSET(VCPU_SLB_NR, kvm_vcpu, arch.slb_nr);
+	OFFSET(VCPU_FAULT_DSISR, kvm_vcpu, arch.fault_dsisr);
+	OFFSET(VCPU_FAULT_DAR, kvm_vcpu, arch.fault_dar);
+	OFFSET(VCPU_INTR_MSR, kvm_vcpu, arch.intr_msr);
+	OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst);
+	OFFSET(VCPU_TRAP, kvm_vcpu, arch.trap);
+	OFFSET(VCPU_CFAR, kvm_vcpu, arch.cfar);
+	OFFSET(VCPU_PPR, kvm_vcpu, arch.ppr);
+	OFFSET(VCPU_FSCR, kvm_vcpu, arch.fscr);
+	OFFSET(VCPU_PSPB, kvm_vcpu, arch.pspb);
+	OFFSET(VCPU_EBBHR, kvm_vcpu, arch.ebbhr);
+	OFFSET(VCPU_EBBRR, kvm_vcpu, arch.ebbrr);
+	OFFSET(VCPU_BESCR, kvm_vcpu, arch.bescr);
+	OFFSET(VCPU_CSIGR, kvm_vcpu, arch.csigr);
+	OFFSET(VCPU_TACR, kvm_vcpu, arch.tacr);
+	OFFSET(VCPU_TCSCR, kvm_vcpu, arch.tcscr);
+	OFFSET(VCPU_ACOP, kvm_vcpu, arch.acop);
+	OFFSET(VCPU_WORT, kvm_vcpu, arch.wort);
+	OFFSET(VCPU_HFSCR, kvm_vcpu, arch.hfscr);
+	OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map);
+	OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest);
+	OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads);
+	OFFSET(VCORE_KVM, kvmppc_vcore, kvm);
+	OFFSET(VCORE_TB_OFFSET, kvmppc_vcore, tb_offset);
+	OFFSET(VCORE_TB_OFFSET_APPL, kvmppc_vcore, tb_offset_applied);
+	OFFSET(VCORE_LPCR, kvmppc_vcore, lpcr);
+	OFFSET(VCORE_PCR, kvmppc_vcore, pcr);
+	OFFSET(VCORE_DPDES, kvmppc_vcore, dpdes);
+	OFFSET(VCORE_VTB, kvmppc_vcore, vtb);
+	OFFSET(VCPU_SLB_E, kvmppc_slb, orige);
+	OFFSET(VCPU_SLB_V, kvmppc_slb, origv);
+	DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	OFFSET(VCPU_TFHAR, kvm_vcpu, arch.tfhar);
+	OFFSET(VCPU_TFIAR, kvm_vcpu, arch.tfiar);
+	OFFSET(VCPU_TEXASR, kvm_vcpu, arch.texasr);
+	OFFSET(VCPU_ORIG_TEXASR, kvm_vcpu, arch.orig_texasr);
+	OFFSET(VCPU_GPR_TM, kvm_vcpu, arch.gpr_tm);
+	OFFSET(VCPU_FPRS_TM, kvm_vcpu, arch.fp_tm.fpr);
+	OFFSET(VCPU_VRS_TM, kvm_vcpu, arch.vr_tm.vr);
+	OFFSET(VCPU_VRSAVE_TM, kvm_vcpu, arch.vrsave_tm);
+	OFFSET(VCPU_CR_TM, kvm_vcpu, arch.cr_tm);
+	OFFSET(VCPU_XER_TM, kvm_vcpu, arch.xer_tm);
+	OFFSET(VCPU_LR_TM, kvm_vcpu, arch.lr_tm);
+	OFFSET(VCPU_CTR_TM, kvm_vcpu, arch.ctr_tm);
+	OFFSET(VCPU_AMR_TM, kvm_vcpu, arch.amr_tm);
+	OFFSET(VCPU_PPR_TM, kvm_vcpu, arch.ppr_tm);
+	OFFSET(VCPU_DSCR_TM, kvm_vcpu, arch.dscr_tm);
+	OFFSET(VCPU_TAR_TM, kvm_vcpu, arch.tar_tm);
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	OFFSET(PACA_SVCPU, paca_struct, shadow_vcpu);
+# define SVCPU_FIELD(x, f)	DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f))
+#else
+# define SVCPU_FIELD(x, f)
+#endif
+# define HSTATE_FIELD(x, f)	DEFINE(x, offsetof(struct paca_struct, kvm_hstate.f))
+#else	/* 32-bit */
+# define SVCPU_FIELD(x, f)	DEFINE(x, offsetof(struct kvmppc_book3s_shadow_vcpu, f))
+# define HSTATE_FIELD(x, f)	DEFINE(x, offsetof(struct kvmppc_book3s_shadow_vcpu, hstate.f))
+#endif
+
+	SVCPU_FIELD(SVCPU_CR, cr);
+	SVCPU_FIELD(SVCPU_XER, xer);
+	SVCPU_FIELD(SVCPU_CTR, ctr);
+	SVCPU_FIELD(SVCPU_LR, lr);
+	SVCPU_FIELD(SVCPU_PC, pc);
+	SVCPU_FIELD(SVCPU_R0, gpr[0]);
+	SVCPU_FIELD(SVCPU_R1, gpr[1]);
+	SVCPU_FIELD(SVCPU_R2, gpr[2]);
+	SVCPU_FIELD(SVCPU_R3, gpr[3]);
+	SVCPU_FIELD(SVCPU_R4, gpr[4]);
+	SVCPU_FIELD(SVCPU_R5, gpr[5]);
+	SVCPU_FIELD(SVCPU_R6, gpr[6]);
+	SVCPU_FIELD(SVCPU_R7, gpr[7]);
+	SVCPU_FIELD(SVCPU_R8, gpr[8]);
+	SVCPU_FIELD(SVCPU_R9, gpr[9]);
+	SVCPU_FIELD(SVCPU_R10, gpr[10]);
+	SVCPU_FIELD(SVCPU_R11, gpr[11]);
+	SVCPU_FIELD(SVCPU_R12, gpr[12]);
+	SVCPU_FIELD(SVCPU_R13, gpr[13]);
+	SVCPU_FIELD(SVCPU_FAULT_DSISR, fault_dsisr);
+	SVCPU_FIELD(SVCPU_FAULT_DAR, fault_dar);
+	SVCPU_FIELD(SVCPU_LAST_INST, last_inst);
+	SVCPU_FIELD(SVCPU_SHADOW_SRR1, shadow_srr1);
+#ifdef CONFIG_PPC_BOOK3S_32
+	SVCPU_FIELD(SVCPU_SR, sr);
+#endif
+#ifdef CONFIG_PPC64
+	SVCPU_FIELD(SVCPU_SLB, slb);
+	SVCPU_FIELD(SVCPU_SLB_MAX, slb_max);
+	SVCPU_FIELD(SVCPU_SHADOW_FSCR, shadow_fscr);
+#endif
+
+	HSTATE_FIELD(HSTATE_HOST_R1, host_r1);
+	HSTATE_FIELD(HSTATE_HOST_R2, host_r2);
+	HSTATE_FIELD(HSTATE_HOST_MSR, host_msr);
+	HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler);
+	HSTATE_FIELD(HSTATE_SCRATCH0, scratch0);
+	HSTATE_FIELD(HSTATE_SCRATCH1, scratch1);
+	HSTATE_FIELD(HSTATE_SCRATCH2, scratch2);
+	HSTATE_FIELD(HSTATE_IN_GUEST, in_guest);
+	HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
+	HSTATE_FIELD(HSTATE_NAPPING, napping);
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req);
+	HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
+	HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
+	HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
+	HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
+	HSTATE_FIELD(HSTATE_PTID, ptid);
+	HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend);
+	HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
+	HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]);
+	HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]);
+	HSTATE_FIELD(HSTATE_SIAR, host_mmcr[3]);
+	HSTATE_FIELD(HSTATE_SDAR, host_mmcr[4]);
+	HSTATE_FIELD(HSTATE_MMCR2, host_mmcr[5]);
+	HSTATE_FIELD(HSTATE_SIER, host_mmcr[6]);
+	HSTATE_FIELD(HSTATE_PMC1, host_pmc[0]);
+	HSTATE_FIELD(HSTATE_PMC2, host_pmc[1]);
+	HSTATE_FIELD(HSTATE_PMC3, host_pmc[2]);
+	HSTATE_FIELD(HSTATE_PMC4, host_pmc[3]);
+	HSTATE_FIELD(HSTATE_PMC5, host_pmc[4]);
+	HSTATE_FIELD(HSTATE_PMC6, host_pmc[5]);
+	HSTATE_FIELD(HSTATE_PURR, host_purr);
+	HSTATE_FIELD(HSTATE_SPURR, host_spurr);
+	HSTATE_FIELD(HSTATE_DSCR, host_dscr);
+	HSTATE_FIELD(HSTATE_DABR, dabr);
+	HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
+	HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode);
+	DEFINE(IPI_PRIORITY, IPI_PRIORITY);
+	OFFSET(KVM_SPLIT_RPR, kvm_split_mode, rpr);
+	OFFSET(KVM_SPLIT_PMMAR, kvm_split_mode, pmmar);
+	OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar);
+	OFFSET(KVM_SPLIT_DO_NAP, kvm_split_mode, do_nap);
+	OFFSET(KVM_SPLIT_NAPPED, kvm_split_mode, napped);
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	HSTATE_FIELD(HSTATE_CFAR, cfar);
+	HSTATE_FIELD(HSTATE_PPR, ppr);
+	HSTATE_FIELD(HSTATE_HOST_FSCR, host_fscr);
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#else /* CONFIG_PPC_BOOK3S */
+	OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
+	OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer);
+	OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link);
+	OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr);
+	OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip);
+	OFFSET(VCPU_SPRG9, kvm_vcpu, arch.sprg9);
+	OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst);
+	OFFSET(VCPU_FAULT_DEAR, kvm_vcpu, arch.fault_dear);
+	OFFSET(VCPU_FAULT_ESR, kvm_vcpu, arch.fault_esr);
+	OFFSET(VCPU_CRIT_SAVE, kvm_vcpu, arch.crit_save);
+#endif /* CONFIG_PPC_BOOK3S */
+#endif /* CONFIG_KVM */
+
+#ifdef CONFIG_KVM_GUEST
+	OFFSET(KVM_MAGIC_SCRATCH1, kvm_vcpu_arch_shared, scratch1);
+	OFFSET(KVM_MAGIC_SCRATCH2, kvm_vcpu_arch_shared, scratch2);
+	OFFSET(KVM_MAGIC_SCRATCH3, kvm_vcpu_arch_shared, scratch3);
+	OFFSET(KVM_MAGIC_INT, kvm_vcpu_arch_shared, int_pending);
+	OFFSET(KVM_MAGIC_MSR, kvm_vcpu_arch_shared, msr);
+	OFFSET(KVM_MAGIC_CRITICAL, kvm_vcpu_arch_shared, critical);
+	OFFSET(KVM_MAGIC_SR, kvm_vcpu_arch_shared, sr);
+#endif
+
+#ifdef CONFIG_44x
+	DEFINE(PGD_T_LOG2, PGD_T_LOG2);
+	DEFINE(PTE_T_LOG2, PTE_T_LOG2);
+#endif
+#ifdef CONFIG_PPC_E500
+	DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam));
+	OFFSET(TLBCAM_MAS0, tlbcam, MAS0);
+	OFFSET(TLBCAM_MAS1, tlbcam, MAS1);
+	OFFSET(TLBCAM_MAS2, tlbcam, MAS2);
+	OFFSET(TLBCAM_MAS3, tlbcam, MAS3);
+	OFFSET(TLBCAM_MAS7, tlbcam, MAS7);
+#endif
+
+#if defined(CONFIG_KVM) && defined(CONFIG_SPE)
+	OFFSET(VCPU_EVR, kvm_vcpu, arch.evr[0]);
+	OFFSET(VCPU_ACC, kvm_vcpu, arch.acc);
+	OFFSET(VCPU_SPEFSCR, kvm_vcpu, arch.spefscr);
+	OFFSET(VCPU_HOST_SPEFSCR, kvm_vcpu, arch.host_spefscr);
+#endif
+
+#ifdef CONFIG_KVM_BOOKE_HV
+	OFFSET(VCPU_HOST_MAS4, kvm_vcpu, arch.host_mas4);
+	OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6);
+#endif
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+	OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu);
+	OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl);
+	OFFSET(VCPU_TIMING_LAST_ENTER_TBU, kvm_vcpu, arch.timing_last_enter.tv32.tbu);
+	OFFSET(VCPU_TIMING_LAST_ENTER_TBL, kvm_vcpu, arch.timing_last_enter.tv32.tbl);
+#endif
+
+	DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
+
+#ifdef CONFIG_PPC_8xx
+	DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE));
+#endif
+
+#ifdef CONFIG_XMON
+	DEFINE(BPT_SIZE, BPT_SIZE);
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c
new file mode 100644
index 0000000000..92298d6a3a
--- /dev/null
+++ b/arch/powerpc/kernel/audit.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/audit.h>
+#include <asm/unistd.h>
+
+#include "audit_32.h"
+
+static unsigned dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+static unsigned read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+static unsigned write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+static unsigned chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+static unsigned signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_arch(int arch)
+{
+#ifdef CONFIG_PPC64
+	if (arch == AUDIT_ARCH_PPC)
+		return 1;
+#endif
+	return 0;
+}
+
+int audit_classify_syscall(int abi, unsigned syscall)
+{
+#ifdef CONFIG_PPC64
+	if (abi == AUDIT_ARCH_PPC)
+		return ppc32_classify_syscall(syscall);
+#endif
+	switch(syscall) {
+	case __NR_open:
+		return AUDITSC_OPEN;
+	case __NR_openat:
+		return AUDITSC_OPENAT;
+	case __NR_socketcall:
+		return AUDITSC_SOCKETCALL;
+	case __NR_execve:
+		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
+	default:
+		return AUDITSC_NATIVE;
+	}
+}
+
+static int __init audit_classes_init(void)
+{
+#ifdef CONFIG_PPC64
+	extern __u32 ppc32_dir_class[];
+	extern __u32 ppc32_write_class[];
+	extern __u32 ppc32_read_class[];
+	extern __u32 ppc32_chattr_class[];
+	extern __u32 ppc32_signal_class[];
+	audit_register_class(AUDIT_CLASS_WRITE_32, ppc32_write_class);
+	audit_register_class(AUDIT_CLASS_READ_32, ppc32_read_class);
+	audit_register_class(AUDIT_CLASS_DIR_WRITE_32, ppc32_dir_class);
+	audit_register_class(AUDIT_CLASS_CHATTR_32, ppc32_chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL_32, ppc32_signal_class);
+#endif
+	audit_register_class(AUDIT_CLASS_WRITE, write_class);
+	audit_register_class(AUDIT_CLASS_READ, read_class);
+	audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
+	audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
+	return 0;
+}
+
+__initcall(audit_classes_init);
diff --git a/arch/powerpc/kernel/audit_32.h b/arch/powerpc/kernel/audit_32.h
new file mode 100644
index 0000000000..c6c79c3041
--- /dev/null
+++ b/arch/powerpc/kernel/audit_32.h
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __AUDIT_32_H__
+#define __AUDIT_32_H__
+
+extern int ppc32_classify_syscall(unsigned);
+
+#endif
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
new file mode 100644
index 0000000000..19e46fd623
--- /dev/null
+++ b/arch/powerpc/kernel/btext.c
@@ -0,0 +1,931 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Procedures for drawing on the screen early on in the boot process.
+ *
+ * Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ */
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/memblock.h>
+#include <linux/pgtable.h>
+#include <linux/of.h>
+
+#include <asm/sections.h>
+#include <asm/btext.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/udbg.h>
+
+#define NO_SCROLL
+
+#ifndef NO_SCROLL
+static void scrollscreen(void);
+#endif
+
+#define __force_data __section(".data")
+
+static int g_loc_X __force_data;
+static int g_loc_Y __force_data;
+static int g_max_loc_X __force_data;
+static int g_max_loc_Y __force_data;
+
+static int dispDeviceRowBytes __force_data;
+static int dispDeviceDepth  __force_data;
+static int dispDeviceRect[4] __force_data;
+static unsigned char *dispDeviceBase __force_data;
+static unsigned char *logicalDisplayBase __force_data;
+
+unsigned long disp_BAT[2] __initdata = {0, 0};
+
+#define cmapsz	(16*256)
+
+static unsigned char vga_font[cmapsz];
+
+static int boot_text_mapped __force_data;
+
+extern void rmci_on(void);
+extern void rmci_off(void);
+
+static inline void rmci_maybe_on(void)
+{
+#if defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) && defined(CONFIG_PPC64)
+	if (!(mfmsr() & MSR_DR))
+		rmci_on();
+#endif
+}
+
+static inline void rmci_maybe_off(void)
+{
+#if defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) && defined(CONFIG_PPC64)
+	if (!(mfmsr() & MSR_DR))
+		rmci_off();
+#endif
+}
+
+
+#ifdef CONFIG_PPC32
+/* Calc BAT values for mapping the display and store them
+ * in disp_BAT.  Those values are then used from head.S to map
+ * the display during identify_machine() and MMU_Init()
+ *
+ * The display is mapped to virtual address 0xD0000000, rather
+ * than 1:1, because some CHRP machines put the frame buffer
+ * in the region starting at 0xC0000000 (PAGE_OFFSET).
+ * This mapping is temporary and will disappear as soon as the
+ * setup done by MMU_Init() is applied.
+ *
+ * For now, we align the BAT and then map 8Mb on 601 and 16Mb
+ * on other PPCs. This may cause trouble if the framebuffer
+ * is really badly aligned, but I didn't encounter this case
+ * yet.
+ */
+void __init btext_prepare_BAT(void)
+{
+	unsigned long vaddr = PAGE_OFFSET + 0x10000000;
+	unsigned long addr;
+	unsigned long lowbits;
+
+	addr = (unsigned long)dispDeviceBase;
+	if (!addr) {
+		boot_text_mapped = 0;
+		return;
+	}
+	lowbits = addr & ~0xFF000000UL;
+	addr &= 0xFF000000UL;
+	disp_BAT[0] = vaddr | (BL_16M<<2) | 2;
+	disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW);
+	logicalDisplayBase = (void *) (vaddr + lowbits);
+}
+#endif
+
+
+/* This function can be used to enable the early boot text when doing
+ * OF booting or within bootx init. It must be followed by a btext_unmap()
+ * call before the logical address becomes unusable
+ */
+void __init btext_setup_display(int width, int height, int depth, int pitch,
+				unsigned long address)
+{
+	g_loc_X = 0;
+	g_loc_Y = 0;
+	g_max_loc_X = width / 8;
+	g_max_loc_Y = height / 16;
+	logicalDisplayBase = (unsigned char *)address;
+	dispDeviceBase = (unsigned char *)address;
+	dispDeviceRowBytes = pitch;
+	dispDeviceDepth = depth == 15 ? 16 : depth;
+	dispDeviceRect[0] = dispDeviceRect[1] = 0;
+	dispDeviceRect[2] = width;
+	dispDeviceRect[3] = height;
+	boot_text_mapped = 1;
+}
+
+void __init btext_unmap(void)
+{
+	boot_text_mapped = 0;
+}
+
+/* Here's a small text engine to use during early boot
+ * or for debugging purposes
+ *
+ * todo:
+ *
+ *  - build some kind of vgacon with it to enable early printk
+ *  - move to a separate file
+ *  - add a few video driver hooks to keep in sync with display
+ *    changes.
+ */
+
+void btext_map(void)
+{
+	unsigned long base, offset, size;
+	unsigned char *vbase;
+
+	/* By default, we are no longer mapped */
+	boot_text_mapped = 0;
+	if (!dispDeviceBase)
+		return;
+	base = ((unsigned long) dispDeviceBase) & 0xFFFFF000UL;
+	offset = ((unsigned long) dispDeviceBase) - base;
+	size = dispDeviceRowBytes * dispDeviceRect[3] + offset
+		+ dispDeviceRect[0];
+	vbase = ioremap_wc(base, size);
+	if (!vbase)
+		return;
+	logicalDisplayBase = vbase + offset;
+	boot_text_mapped = 1;
+}
+
+static int __init btext_initialize(struct device_node *np)
+{
+	unsigned int width, height, depth, pitch;
+	unsigned long address = 0;
+	const u32 *prop;
+
+	prop = of_get_property(np, "linux,bootx-width", NULL);
+	if (prop == NULL)
+		prop = of_get_property(np, "width", NULL);
+	if (prop == NULL)
+		return -EINVAL;
+	width = *prop;
+	prop = of_get_property(np, "linux,bootx-height", NULL);
+	if (prop == NULL)
+		prop = of_get_property(np, "height", NULL);
+	if (prop == NULL)
+		return -EINVAL;
+	height = *prop;
+	prop = of_get_property(np, "linux,bootx-depth", NULL);
+	if (prop == NULL)
+		prop = of_get_property(np, "depth", NULL);
+	if (prop == NULL)
+		return -EINVAL;
+	depth = *prop;
+	pitch = width * ((depth + 7) / 8);
+	prop = of_get_property(np, "linux,bootx-linebytes", NULL);
+	if (prop == NULL)
+		prop = of_get_property(np, "linebytes", NULL);
+	if (prop && *prop != 0xffffffffu)
+		pitch = *prop;
+	if (pitch == 1)
+		pitch = 0x1000;
+	prop = of_get_property(np, "linux,bootx-addr", NULL);
+	if (prop == NULL)
+		prop = of_get_property(np, "address", NULL);
+	if (prop)
+		address = *prop;
+
+	/* FIXME: Add support for PCI reg properties. Right now, only
+	 * reliable on macs
+	 */
+	if (address == 0)
+		return -EINVAL;
+
+	g_loc_X = 0;
+	g_loc_Y = 0;
+	g_max_loc_X = width / 8;
+	g_max_loc_Y = height / 16;
+	dispDeviceBase = (unsigned char *)address;
+	dispDeviceRowBytes = pitch;
+	dispDeviceDepth = depth == 15 ? 16 : depth;
+	dispDeviceRect[0] = dispDeviceRect[1] = 0;
+	dispDeviceRect[2] = width;
+	dispDeviceRect[3] = height;
+
+	btext_map();
+
+	return 0;
+}
+
+int __init btext_find_display(int allow_nonstdout)
+{
+	struct device_node *np = of_stdout;
+	int rc = -ENODEV;
+
+	if (!of_node_is_type(np, "display")) {
+		printk("boot stdout isn't a display !\n");
+		np = NULL;
+	}
+	if (np)
+		rc = btext_initialize(np);
+	if (rc == 0 || !allow_nonstdout)
+		return rc;
+
+	for_each_node_by_type(np, "display") {
+		if (of_property_read_bool(np, "linux,opened")) {
+			printk("trying %pOF ...\n", np);
+			rc = btext_initialize(np);
+			printk("result: %d\n", rc);
+		}
+		if (rc == 0) {
+			of_node_put(np);
+			break;
+		}
+	}
+	return rc;
+}
+
+/* Calc the base address of a given point (x,y) */
+static unsigned char * calc_base(int x, int y)
+{
+	unsigned char *base;
+
+	base = logicalDisplayBase;
+	if (!base)
+		base = dispDeviceBase;
+	base += (x + dispDeviceRect[0]) * (dispDeviceDepth >> 3);
+	base += (y + dispDeviceRect[1]) * dispDeviceRowBytes;
+	return base;
+}
+
+/* Adjust the display to a new resolution */
+void btext_update_display(unsigned long phys, int width, int height,
+			  int depth, int pitch)
+{
+	if (!dispDeviceBase)
+		return;
+
+	/* check it's the same frame buffer (within 256MB) */
+	if ((phys ^ (unsigned long)dispDeviceBase) & 0xf0000000)
+		return;
+
+	dispDeviceBase = (__u8 *) phys;
+	dispDeviceRect[0] = 0;
+	dispDeviceRect[1] = 0;
+	dispDeviceRect[2] = width;
+	dispDeviceRect[3] = height;
+	dispDeviceDepth = depth;
+	dispDeviceRowBytes = pitch;
+	if (boot_text_mapped) {
+		iounmap(logicalDisplayBase);
+		boot_text_mapped = 0;
+	}
+	btext_map();
+	g_loc_X = 0;
+	g_loc_Y = 0;
+	g_max_loc_X = width / 8;
+	g_max_loc_Y = height / 16;
+}
+EXPORT_SYMBOL(btext_update_display);
+
+void __init btext_clearscreen(void)
+{
+	unsigned int *base	= (unsigned int *)calc_base(0, 0);
+	unsigned long width 	= ((dispDeviceRect[2] - dispDeviceRect[0]) *
+					(dispDeviceDepth >> 3)) >> 2;
+	int i,j;
+
+	rmci_maybe_on();
+	for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1]); i++)
+	{
+		unsigned int *ptr = base;
+		for(j=width; j; --j)
+			*(ptr++) = 0;
+		base += (dispDeviceRowBytes >> 2);
+	}
+	rmci_maybe_off();
+}
+
+void __init btext_flushscreen(void)
+{
+	unsigned int *base	= (unsigned int *)calc_base(0, 0);
+	unsigned long width 	= ((dispDeviceRect[2] - dispDeviceRect[0]) *
+					(dispDeviceDepth >> 3)) >> 2;
+	int i,j;
+
+	for (i=0; i < (dispDeviceRect[3] - dispDeviceRect[1]); i++)
+	{
+		unsigned int *ptr = base;
+		for(j = width; j > 0; j -= 8) {
+			__asm__ __volatile__ ("dcbst 0,%0" :: "r" (ptr));
+			ptr += 8;
+		}
+		base += (dispDeviceRowBytes >> 2);
+	}
+	__asm__ __volatile__ ("sync" ::: "memory");
+}
+
+void __init btext_flushline(void)
+{
+	unsigned int *base	= (unsigned int *)calc_base(0, g_loc_Y << 4);
+	unsigned long width 	= ((dispDeviceRect[2] - dispDeviceRect[0]) *
+					(dispDeviceDepth >> 3)) >> 2;
+	int i,j;
+
+	for (i=0; i < 16; i++)
+	{
+		unsigned int *ptr = base;
+		for(j = width; j > 0; j -= 8) {
+			__asm__ __volatile__ ("dcbst 0,%0" :: "r" (ptr));
+			ptr += 8;
+		}
+		base += (dispDeviceRowBytes >> 2);
+	}
+	__asm__ __volatile__ ("sync" ::: "memory");
+}
+
+
+#ifndef NO_SCROLL
+static void scrollscreen(void)
+{
+	unsigned int *src     	= (unsigned int *)calc_base(0,16);
+	unsigned int *dst     	= (unsigned int *)calc_base(0,0);
+	unsigned long width    	= ((dispDeviceRect[2] - dispDeviceRect[0]) *
+				   (dispDeviceDepth >> 3)) >> 2;
+	int i,j;
+
+	rmci_maybe_on();
+
+	for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1] - 16); i++)
+	{
+		unsigned int *src_ptr = src;
+		unsigned int *dst_ptr = dst;
+		for(j=width; j; --j)
+			*(dst_ptr++) = *(src_ptr++);
+		src += (dispDeviceRowBytes >> 2);
+		dst += (dispDeviceRowBytes >> 2);
+	}
+	for (i=0; i<16; i++)
+	{
+		unsigned int *dst_ptr = dst;
+		for(j=width; j; --j)
+			*(dst_ptr++) = 0;
+		dst += (dispDeviceRowBytes >> 2);
+	}
+
+	rmci_maybe_off();
+}
+#endif /* ndef NO_SCROLL */
+
+static unsigned int expand_bits_8[16] = {
+	0x00000000,
+	0x000000ff,
+	0x0000ff00,
+	0x0000ffff,
+	0x00ff0000,
+	0x00ff00ff,
+	0x00ffff00,
+	0x00ffffff,
+	0xff000000,
+	0xff0000ff,
+	0xff00ff00,
+	0xff00ffff,
+	0xffff0000,
+	0xffff00ff,
+	0xffffff00,
+	0xffffffff
+};
+
+static unsigned int expand_bits_16[4] = {
+	0x00000000,
+	0x0000ffff,
+	0xffff0000,
+	0xffffffff
+};
+
+
+static void draw_byte_32(unsigned char *font, unsigned int *base, int rb)
+{
+	int l, bits;
+	int fg = 0xFFFFFFFFUL;
+	int bg = 0x00000000UL;
+
+	for (l = 0; l < 16; ++l)
+	{
+		bits = *font++;
+		base[0] = (-(bits >> 7) & fg) ^ bg;
+		base[1] = (-((bits >> 6) & 1) & fg) ^ bg;
+		base[2] = (-((bits >> 5) & 1) & fg) ^ bg;
+		base[3] = (-((bits >> 4) & 1) & fg) ^ bg;
+		base[4] = (-((bits >> 3) & 1) & fg) ^ bg;
+		base[5] = (-((bits >> 2) & 1) & fg) ^ bg;
+		base[6] = (-((bits >> 1) & 1) & fg) ^ bg;
+		base[7] = (-(bits & 1) & fg) ^ bg;
+		base = (unsigned int *) ((char *)base + rb);
+	}
+}
+
+static inline void draw_byte_16(unsigned char *font, unsigned int *base, int rb)
+{
+	int l, bits;
+	int fg = 0xFFFFFFFFUL;
+	int bg = 0x00000000UL;
+	unsigned int *eb = (int *)expand_bits_16;
+
+	for (l = 0; l < 16; ++l)
+	{
+		bits = *font++;
+		base[0] = (eb[bits >> 6] & fg) ^ bg;
+		base[1] = (eb[(bits >> 4) & 3] & fg) ^ bg;
+		base[2] = (eb[(bits >> 2) & 3] & fg) ^ bg;
+		base[3] = (eb[bits & 3] & fg) ^ bg;
+		base = (unsigned int *) ((char *)base + rb);
+	}
+}
+
+static inline void draw_byte_8(unsigned char *font, unsigned int *base, int rb)
+{
+	int l, bits;
+	int fg = 0x0F0F0F0FUL;
+	int bg = 0x00000000UL;
+	unsigned int *eb = (int *)expand_bits_8;
+
+	for (l = 0; l < 16; ++l)
+	{
+		bits = *font++;
+		base[0] = (eb[bits >> 4] & fg) ^ bg;
+		base[1] = (eb[bits & 0xf] & fg) ^ bg;
+		base = (unsigned int *) ((char *)base + rb);
+	}
+}
+
+static noinline void draw_byte(unsigned char c, long locX, long locY)
+{
+	unsigned char *base	= calc_base(locX << 3, locY << 4);
+	unsigned char *font	= &vga_font[((unsigned int)c) * 16];
+	int rb			= dispDeviceRowBytes;
+
+	rmci_maybe_on();
+	switch(dispDeviceDepth) {
+	case 24:
+	case 32:
+		draw_byte_32(font, (unsigned int *)base, rb);
+		break;
+	case 15:
+	case 16:
+		draw_byte_16(font, (unsigned int *)base, rb);
+		break;
+	case 8:
+		draw_byte_8(font, (unsigned int *)base, rb);
+		break;
+	}
+	rmci_maybe_off();
+}
+
+void btext_drawchar(char c)
+{
+	int cline = 0;
+#ifdef NO_SCROLL
+	int x;
+#endif
+	if (!boot_text_mapped)
+		return;
+
+	switch (c) {
+	case '\b':
+		if (g_loc_X > 0)
+			--g_loc_X;
+		break;
+	case '\t':
+		g_loc_X = (g_loc_X & -8) + 8;
+		break;
+	case '\r':
+		g_loc_X = 0;
+		break;
+	case '\n':
+		g_loc_X = 0;
+		g_loc_Y++;
+		cline = 1;
+		break;
+	default:
+		draw_byte(c, g_loc_X++, g_loc_Y);
+	}
+	if (g_loc_X >= g_max_loc_X) {
+		g_loc_X = 0;
+		g_loc_Y++;
+		cline = 1;
+	}
+#ifndef NO_SCROLL
+	while (g_loc_Y >= g_max_loc_Y) {
+		scrollscreen();
+		g_loc_Y--;
+	}
+#else
+	/* wrap around from bottom to top of screen so we don't
+	   waste time scrolling each line.  -- paulus. */
+	if (g_loc_Y >= g_max_loc_Y)
+		g_loc_Y = 0;
+	if (cline) {
+		for (x = 0; x < g_max_loc_X; ++x)
+			draw_byte(' ', x, g_loc_Y);
+	}
+#endif
+}
+
+void btext_drawstring(const char *c)
+{
+	if (!boot_text_mapped)
+		return;
+	while (*c)
+		btext_drawchar(*c++);
+}
+
+void __init btext_drawtext(const char *c, unsigned int len)
+{
+	if (!boot_text_mapped)
+		return;
+	while (len--)
+		btext_drawchar(*c++);
+}
+
+void __init btext_drawhex(unsigned long v)
+{
+	if (!boot_text_mapped)
+		return;
+#ifdef CONFIG_PPC64
+	btext_drawchar(hex_asc_hi(v >> 56));
+	btext_drawchar(hex_asc_lo(v >> 56));
+	btext_drawchar(hex_asc_hi(v >> 48));
+	btext_drawchar(hex_asc_lo(v >> 48));
+	btext_drawchar(hex_asc_hi(v >> 40));
+	btext_drawchar(hex_asc_lo(v >> 40));
+	btext_drawchar(hex_asc_hi(v >> 32));
+	btext_drawchar(hex_asc_lo(v >> 32));
+#endif
+	btext_drawchar(hex_asc_hi(v >> 24));
+	btext_drawchar(hex_asc_lo(v >> 24));
+	btext_drawchar(hex_asc_hi(v >> 16));
+	btext_drawchar(hex_asc_lo(v >> 16));
+	btext_drawchar(hex_asc_hi(v >> 8));
+	btext_drawchar(hex_asc_lo(v >> 8));
+	btext_drawchar(hex_asc_hi(v));
+	btext_drawchar(hex_asc_lo(v));
+	btext_drawchar(' ');
+}
+
+void __init udbg_init_btext(void)
+{
+	/* If btext is enabled, we might have a BAT setup for early display,
+	 * thus we do enable some very basic udbg output
+	 */
+	udbg_putc = btext_drawchar;
+}
+
+static unsigned char vga_font[cmapsz] = {
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd,
+0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xff,
+0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe,
+0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18,
+0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c,
+0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00,
+0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd,
+0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x1e, 0x0e,
+0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30,
+0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x63,
+0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8,
+0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0e,
+0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
+0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xdb,
+0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6,
+0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c,
+0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0,
+0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c,
+0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c,
+0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
+0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c,
+0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18,
+0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c,
+0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30,
+0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x18,
+0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e,
+0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
+0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe,
+0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0,
+0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18,
+0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
+0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00,
+0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00,
+0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60,
+0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xde, 0xde,
+0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38,
+0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0,
+0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x6c,
+0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68,
+0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66,
+0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x0c,
+0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60,
+0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xe7,
+0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
+0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66,
+0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c,
+0x0c, 0x0e, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c,
+0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
+0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
+0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3,
+0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18,
+0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3,
+0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30,
+0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
+0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c,
+0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
+0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c,
+0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x60,
+0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc,
+0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc,
+0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00, 0x00, 0x00, 0xe0, 0x60,
+0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06,
+0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0xe0, 0x60,
+0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb,
+0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66,
+0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60,
+0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x30,
+0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3,
+0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6,
+0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x18,
+0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6,
+0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66,
+0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00,
+0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe,
+0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c,
+0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c,
+0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38,
+0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06,
+0x3c, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe,
+0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00,
+0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x66,
+0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6,
+0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, 0x00,
+0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
+0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b,
+0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x6c,
+0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6,
+0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18,
+0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc,
+0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00,
+0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00,
+0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
+0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e,
+0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18,
+0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66,
+0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18,
+0xd8, 0x70, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c,
+0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30,
+0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc,
+0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc,
+0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
+0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c,
+0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0,
+0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06,
+0x0c, 0x1f, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30,
+0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18,
+0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36,
+0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x44, 0x11, 0x44,
+0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44,
+0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa,
+0x55, 0xaa, 0x55, 0xaa, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77,
+0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36,
+0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36,
+0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xf0, 0xf0, 0xf0,
+0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+0x0f, 0x0f, 0x0f, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0,
+0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8,
+0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66,
+0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38,
+0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66,
+0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60,
+0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c,
+0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18,
+0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30,
+0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x1b, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00,
+0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c,
+0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0c, 0x0c,
+0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00,
+};
+
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
new file mode 100644
index 0000000000..f502337dd3
--- /dev/null
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -0,0 +1,953 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Processor cache information made available to userspace via sysfs;
+ * intended to be compatible with x86 intel_cacheinfo implementation.
+ *
+ * Copyright 2008 IBM Corporation
+ * Author: Nathan Lynch
+ */
+
+#define pr_fmt(fmt) "cacheinfo: " fmt
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/list.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <asm/cputhreads.h>
+#include <asm/smp.h>
+
+#include "cacheinfo.h"
+
+/* per-cpu object for tracking:
+ * - a "cache" kobject for the top-level directory
+ * - a list of "index" objects representing the cpu's local cache hierarchy
+ */
+struct cache_dir {
+	struct kobject *kobj; /* bare (not embedded) kobject for cache
+			       * directory */
+	struct cache_index_dir *index; /* list of index objects */
+};
+
+/* "index" object: each cpu's cache directory has an index
+ * subdirectory corresponding to a cache object associated with the
+ * cpu.  This object's lifetime is managed via the embedded kobject.
+ */
+struct cache_index_dir {
+	struct kobject kobj;
+	struct cache_index_dir *next; /* next index in parent directory */
+	struct cache *cache;
+};
+
+/* Template for determining which OF properties to query for a given
+ * cache type */
+struct cache_type_info {
+	const char *name;
+	const char *size_prop;
+
+	/* Allow for both [di]-cache-line-size and
+	 * [di]-cache-block-size properties.  According to the PowerPC
+	 * Processor binding, -line-size should be provided if it
+	 * differs from the cache block size (that which is operated
+	 * on by cache instructions), so we look for -line-size first.
+	 * See cache_get_line_size(). */
+
+	const char *line_size_props[2];
+	const char *nr_sets_prop;
+};
+
+/* These are used to index the cache_type_info array. */
+#define CACHE_TYPE_UNIFIED     0 /* cache-size, cache-block-size, etc. */
+#define CACHE_TYPE_UNIFIED_D   1 /* d-cache-size, d-cache-block-size, etc */
+#define CACHE_TYPE_INSTRUCTION 2
+#define CACHE_TYPE_DATA        3
+
+static const struct cache_type_info cache_type_info[] = {
+	{
+		/* Embedded systems that use cache-size, cache-block-size,
+		 * etc. for the Unified (typically L2) cache. */
+		.name            = "Unified",
+		.size_prop       = "cache-size",
+		.line_size_props = { "cache-line-size",
+				     "cache-block-size", },
+		.nr_sets_prop    = "cache-sets",
+	},
+	{
+		/* PowerPC Processor binding says the [di]-cache-*
+		 * must be equal on unified caches, so just use
+		 * d-cache properties. */
+		.name            = "Unified",
+		.size_prop       = "d-cache-size",
+		.line_size_props = { "d-cache-line-size",
+				     "d-cache-block-size", },
+		.nr_sets_prop    = "d-cache-sets",
+	},
+	{
+		.name            = "Instruction",
+		.size_prop       = "i-cache-size",
+		.line_size_props = { "i-cache-line-size",
+				     "i-cache-block-size", },
+		.nr_sets_prop    = "i-cache-sets",
+	},
+	{
+		.name            = "Data",
+		.size_prop       = "d-cache-size",
+		.line_size_props = { "d-cache-line-size",
+				     "d-cache-block-size", },
+		.nr_sets_prop    = "d-cache-sets",
+	},
+};
+
+/* Cache object: each instance of this corresponds to a distinct cache
+ * in the system.  There are separate objects for Harvard caches: one
+ * each for instruction and data, and each refers to the same OF node.
+ * The refcount of the OF node is elevated for the lifetime of the
+ * cache object.  A cache object is released when its shared_cpu_map
+ * is cleared (see cache_cpu_clear).
+ *
+ * A cache object is on two lists: an unsorted global list
+ * (cache_list) of cache objects; and a singly-linked list
+ * representing the local cache hierarchy, which is ordered by level
+ * (e.g. L1d -> L1i -> L2 -> L3).
+ */
+struct cache {
+	struct device_node *ofnode;    /* OF node for this cache, may be cpu */
+	struct cpumask shared_cpu_map; /* online CPUs using this cache */
+	int type;                      /* split cache disambiguation */
+	int level;                     /* level not explicit in device tree */
+	int group_id;                  /* id of the group of threads that share this cache */
+	struct list_head list;         /* global list of cache objects */
+	struct cache *next_local;      /* next cache of >= level */
+};
+
+static DEFINE_PER_CPU(struct cache_dir *, cache_dir_pcpu);
+
+/* traversal/modification of this list occurs only at cpu hotplug time;
+ * access is serialized by cpu hotplug locking
+ */
+static LIST_HEAD(cache_list);
+
+static struct cache_index_dir *kobj_to_cache_index_dir(struct kobject *k)
+{
+	return container_of(k, struct cache_index_dir, kobj);
+}
+
+static const char *cache_type_string(const struct cache *cache)
+{
+	return cache_type_info[cache->type].name;
+}
+
+static void cache_init(struct cache *cache, int type, int level,
+		       struct device_node *ofnode, int group_id)
+{
+	cache->type = type;
+	cache->level = level;
+	cache->ofnode = of_node_get(ofnode);
+	cache->group_id = group_id;
+	INIT_LIST_HEAD(&cache->list);
+	list_add(&cache->list, &cache_list);
+}
+
+static struct cache *new_cache(int type, int level,
+			       struct device_node *ofnode, int group_id)
+{
+	struct cache *cache;
+
+	cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+	if (cache)
+		cache_init(cache, type, level, ofnode, group_id);
+
+	return cache;
+}
+
+static void release_cache_debugcheck(struct cache *cache)
+{
+	struct cache *iter;
+
+	list_for_each_entry(iter, &cache_list, list)
+		WARN_ONCE(iter->next_local == cache,
+			  "cache for %pOFP(%s) refers to cache for %pOFP(%s)\n",
+			  iter->ofnode,
+			  cache_type_string(iter),
+			  cache->ofnode,
+			  cache_type_string(cache));
+}
+
+static void release_cache(struct cache *cache)
+{
+	if (!cache)
+		return;
+
+	pr_debug("freeing L%d %s cache for %pOFP\n", cache->level,
+		 cache_type_string(cache), cache->ofnode);
+
+	release_cache_debugcheck(cache);
+	list_del(&cache->list);
+	of_node_put(cache->ofnode);
+	kfree(cache);
+}
+
+static void cache_cpu_set(struct cache *cache, int cpu)
+{
+	struct cache *next = cache;
+
+	while (next) {
+		WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map),
+			  "CPU %i already accounted in %pOFP(%s)\n",
+			  cpu, next->ofnode,
+			  cache_type_string(next));
+		cpumask_set_cpu(cpu, &next->shared_cpu_map);
+		next = next->next_local;
+	}
+}
+
+static int cache_size(const struct cache *cache, unsigned int *ret)
+{
+	const char *propname;
+	const __be32 *cache_size;
+
+	propname = cache_type_info[cache->type].size_prop;
+
+	cache_size = of_get_property(cache->ofnode, propname, NULL);
+	if (!cache_size)
+		return -ENODEV;
+
+	*ret = of_read_number(cache_size, 1);
+	return 0;
+}
+
+static int cache_size_kb(const struct cache *cache, unsigned int *ret)
+{
+	unsigned int size;
+
+	if (cache_size(cache, &size))
+		return -ENODEV;
+
+	*ret = size / 1024;
+	return 0;
+}
+
+/* not cache_line_size() because that's a macro in include/linux/cache.h */
+static int cache_get_line_size(const struct cache *cache, unsigned int *ret)
+{
+	const __be32 *line_size;
+	int i, lim;
+
+	lim = ARRAY_SIZE(cache_type_info[cache->type].line_size_props);
+
+	for (i = 0; i < lim; i++) {
+		const char *propname;
+
+		propname = cache_type_info[cache->type].line_size_props[i];
+		line_size = of_get_property(cache->ofnode, propname, NULL);
+		if (line_size)
+			break;
+	}
+
+	if (!line_size)
+		return -ENODEV;
+
+	*ret = of_read_number(line_size, 1);
+	return 0;
+}
+
+static int cache_nr_sets(const struct cache *cache, unsigned int *ret)
+{
+	const char *propname;
+	const __be32 *nr_sets;
+
+	propname = cache_type_info[cache->type].nr_sets_prop;
+
+	nr_sets = of_get_property(cache->ofnode, propname, NULL);
+	if (!nr_sets)
+		return -ENODEV;
+
+	*ret = of_read_number(nr_sets, 1);
+	return 0;
+}
+
+static int cache_associativity(const struct cache *cache, unsigned int *ret)
+{
+	unsigned int line_size;
+	unsigned int nr_sets;
+	unsigned int size;
+
+	if (cache_nr_sets(cache, &nr_sets))
+		goto err;
+
+	/* If the cache is fully associative, there is no need to
+	 * check the other properties.
+	 */
+	if (nr_sets == 1) {
+		*ret = 0;
+		return 0;
+	}
+
+	if (cache_get_line_size(cache, &line_size))
+		goto err;
+	if (cache_size(cache, &size))
+		goto err;
+
+	if (!(nr_sets > 0 && size > 0 && line_size > 0))
+		goto err;
+
+	*ret = (size / nr_sets) / line_size;
+	return 0;
+err:
+	return -ENODEV;
+}
+
+/* helper for dealing with split caches */
+static struct cache *cache_find_first_sibling(struct cache *cache)
+{
+	struct cache *iter;
+
+	if (cache->type == CACHE_TYPE_UNIFIED ||
+	    cache->type == CACHE_TYPE_UNIFIED_D)
+		return cache;
+
+	list_for_each_entry(iter, &cache_list, list)
+		if (iter->ofnode == cache->ofnode &&
+		    iter->group_id == cache->group_id &&
+		    iter->next_local == cache)
+			return iter;
+
+	return cache;
+}
+
+/* return the first cache on a local list matching node and thread-group id */
+static struct cache *cache_lookup_by_node_group(const struct device_node *node,
+						int group_id)
+{
+	struct cache *cache = NULL;
+	struct cache *iter;
+
+	list_for_each_entry(iter, &cache_list, list) {
+		if (iter->ofnode != node ||
+		    iter->group_id != group_id)
+			continue;
+		cache = cache_find_first_sibling(iter);
+		break;
+	}
+
+	return cache;
+}
+
+static bool cache_node_is_unified(const struct device_node *np)
+{
+	return of_get_property(np, "cache-unified", NULL);
+}
+
+/*
+ * Unified caches can have two different sets of tags.  Most embedded
+ * use cache-size, etc. for the unified cache size, but open firmware systems
+ * use d-cache-size, etc.   Check on initialization for which type we have, and
+ * return the appropriate structure type.  Assume it's embedded if it isn't
+ * open firmware.  If it's yet a 3rd type, then there will be missing entries
+ * in /sys/devices/system/cpu/cpu0/cache/index2/, and this code will need
+ * to be extended further.
+ */
+static int cache_is_unified_d(const struct device_node *np)
+{
+	return of_get_property(np,
+		cache_type_info[CACHE_TYPE_UNIFIED_D].size_prop, NULL) ?
+		CACHE_TYPE_UNIFIED_D : CACHE_TYPE_UNIFIED;
+}
+
+static struct cache *cache_do_one_devnode_unified(struct device_node *node, int group_id,
+						  int level)
+{
+	pr_debug("creating L%d ucache for %pOFP\n", level, node);
+
+	return new_cache(cache_is_unified_d(node), level, node, group_id);
+}
+
+static struct cache *cache_do_one_devnode_split(struct device_node *node, int group_id,
+						int level)
+{
+	struct cache *dcache, *icache;
+
+	pr_debug("creating L%d dcache and icache for %pOFP\n", level,
+		 node);
+
+	dcache = new_cache(CACHE_TYPE_DATA, level, node, group_id);
+	icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node, group_id);
+
+	if (!dcache || !icache)
+		goto err;
+
+	dcache->next_local = icache;
+
+	return dcache;
+err:
+	release_cache(dcache);
+	release_cache(icache);
+	return NULL;
+}
+
+static struct cache *cache_do_one_devnode(struct device_node *node, int group_id, int level)
+{
+	struct cache *cache;
+
+	if (cache_node_is_unified(node))
+		cache = cache_do_one_devnode_unified(node, group_id, level);
+	else
+		cache = cache_do_one_devnode_split(node, group_id, level);
+
+	return cache;
+}
+
+static struct cache *cache_lookup_or_instantiate(struct device_node *node,
+						 int group_id,
+						 int level)
+{
+	struct cache *cache;
+
+	cache = cache_lookup_by_node_group(node, group_id);
+
+	WARN_ONCE(cache && cache->level != level,
+		  "cache level mismatch on lookup (got %d, expected %d)\n",
+		  cache->level, level);
+
+	if (!cache)
+		cache = cache_do_one_devnode(node, group_id, level);
+
+	return cache;
+}
+
+static void link_cache_lists(struct cache *smaller, struct cache *bigger)
+{
+	while (smaller->next_local) {
+		if (smaller->next_local == bigger)
+			return; /* already linked */
+		smaller = smaller->next_local;
+	}
+
+	smaller->next_local = bigger;
+
+	/*
+	 * The cache->next_local list sorts by level ascending:
+	 * L1d -> L1i -> L2 -> L3 ...
+	 */
+	WARN_ONCE((smaller->level == 1 && bigger->level > 2) ||
+		  (smaller->level > 1 && bigger->level != smaller->level + 1),
+		  "linking L%i cache %pOFP to L%i cache %pOFP; skipped a level?\n",
+		  smaller->level, smaller->ofnode, bigger->level, bigger->ofnode);
+}
+
+static void do_subsidiary_caches_debugcheck(struct cache *cache)
+{
+	WARN_ONCE(cache->level != 1,
+		  "instantiating cache chain from L%d %s cache for "
+		  "%pOFP instead of an L1\n", cache->level,
+		  cache_type_string(cache), cache->ofnode);
+	WARN_ONCE(!of_node_is_type(cache->ofnode, "cpu"),
+		  "instantiating cache chain from node %pOFP of type '%s' "
+		  "instead of a cpu node\n", cache->ofnode,
+		  of_node_get_device_type(cache->ofnode));
+}
+
+/*
+ * If sub-groups of threads in a core containing @cpu_id share the
+ * L@level-cache (information obtained via "ibm,thread-groups"
+ * device-tree property), then we identify the group by the first
+ * thread-sibling in the group. We define this to be the group-id.
+ *
+ * In the absence of any thread-group information for L@level-cache,
+ * this function returns -1.
+ */
+static int get_group_id(unsigned int cpu_id, int level)
+{
+	if (has_big_cores && level == 1)
+		return cpumask_first(per_cpu(thread_group_l1_cache_map,
+					     cpu_id));
+	else if (thread_group_shares_l2 && level == 2)
+		return cpumask_first(per_cpu(thread_group_l2_cache_map,
+					     cpu_id));
+	else if (thread_group_shares_l3 && level == 3)
+		return cpumask_first(per_cpu(thread_group_l3_cache_map,
+					     cpu_id));
+	return -1;
+}
+
+static void do_subsidiary_caches(struct cache *cache, unsigned int cpu_id)
+{
+	struct device_node *subcache_node;
+	int level = cache->level;
+
+	do_subsidiary_caches_debugcheck(cache);
+
+	while ((subcache_node = of_find_next_cache_node(cache->ofnode))) {
+		struct cache *subcache;
+		int group_id;
+
+		level++;
+		group_id = get_group_id(cpu_id, level);
+		subcache = cache_lookup_or_instantiate(subcache_node, group_id, level);
+		of_node_put(subcache_node);
+		if (!subcache)
+			break;
+
+		link_cache_lists(cache, subcache);
+		cache = subcache;
+	}
+}
+
+static struct cache *cache_chain_instantiate(unsigned int cpu_id)
+{
+	struct device_node *cpu_node;
+	struct cache *cpu_cache = NULL;
+	int group_id;
+
+	pr_debug("creating cache object(s) for CPU %i\n", cpu_id);
+
+	cpu_node = of_get_cpu_node(cpu_id, NULL);
+	WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
+	if (!cpu_node)
+		goto out;
+
+	group_id = get_group_id(cpu_id, 1);
+
+	cpu_cache = cache_lookup_or_instantiate(cpu_node, group_id, 1);
+	if (!cpu_cache)
+		goto out;
+
+	do_subsidiary_caches(cpu_cache, cpu_id);
+
+	cache_cpu_set(cpu_cache, cpu_id);
+out:
+	of_node_put(cpu_node);
+
+	return cpu_cache;
+}
+
+static struct cache_dir *cacheinfo_create_cache_dir(unsigned int cpu_id)
+{
+	struct cache_dir *cache_dir;
+	struct device *dev;
+	struct kobject *kobj = NULL;
+
+	dev = get_cpu_device(cpu_id);
+	WARN_ONCE(!dev, "no dev for CPU %i\n", cpu_id);
+	if (!dev)
+		goto err;
+
+	kobj = kobject_create_and_add("cache", &dev->kobj);
+	if (!kobj)
+		goto err;
+
+	cache_dir = kzalloc(sizeof(*cache_dir), GFP_KERNEL);
+	if (!cache_dir)
+		goto err;
+
+	cache_dir->kobj = kobj;
+
+	WARN_ON_ONCE(per_cpu(cache_dir_pcpu, cpu_id) != NULL);
+
+	per_cpu(cache_dir_pcpu, cpu_id) = cache_dir;
+
+	return cache_dir;
+err:
+	kobject_put(kobj);
+	return NULL;
+}
+
+static void cache_index_release(struct kobject *kobj)
+{
+	struct cache_index_dir *index;
+
+	index = kobj_to_cache_index_dir(kobj);
+
+	pr_debug("freeing index directory for L%d %s cache\n",
+		 index->cache->level, cache_type_string(index->cache));
+
+	kfree(index);
+}
+
+static ssize_t cache_index_show(struct kobject *k, struct attribute *attr, char *buf)
+{
+	struct kobj_attribute *kobj_attr;
+
+	kobj_attr = container_of(attr, struct kobj_attribute, attr);
+
+	return kobj_attr->show(k, kobj_attr, buf);
+}
+
+static struct cache *index_kobj_to_cache(struct kobject *k)
+{
+	struct cache_index_dir *index;
+
+	index = kobj_to_cache_index_dir(k);
+
+	return index->cache;
+}
+
+static ssize_t size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+	unsigned int size_kb;
+	struct cache *cache;
+
+	cache = index_kobj_to_cache(k);
+
+	if (cache_size_kb(cache, &size_kb))
+		return -ENODEV;
+
+	return sprintf(buf, "%uK\n", size_kb);
+}
+
+static struct kobj_attribute cache_size_attr =
+	__ATTR(size, 0444, size_show, NULL);
+
+
+static ssize_t line_size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+	unsigned int line_size;
+	struct cache *cache;
+
+	cache = index_kobj_to_cache(k);
+
+	if (cache_get_line_size(cache, &line_size))
+		return -ENODEV;
+
+	return sprintf(buf, "%u\n", line_size);
+}
+
+static struct kobj_attribute cache_line_size_attr =
+	__ATTR(coherency_line_size, 0444, line_size_show, NULL);
+
+static ssize_t nr_sets_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+	unsigned int nr_sets;
+	struct cache *cache;
+
+	cache = index_kobj_to_cache(k);
+
+	if (cache_nr_sets(cache, &nr_sets))
+		return -ENODEV;
+
+	return sprintf(buf, "%u\n", nr_sets);
+}
+
+static struct kobj_attribute cache_nr_sets_attr =
+	__ATTR(number_of_sets, 0444, nr_sets_show, NULL);
+
+static ssize_t associativity_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+	unsigned int associativity;
+	struct cache *cache;
+
+	cache = index_kobj_to_cache(k);
+
+	if (cache_associativity(cache, &associativity))
+		return -ENODEV;
+
+	return sprintf(buf, "%u\n", associativity);
+}
+
+static struct kobj_attribute cache_assoc_attr =
+	__ATTR(ways_of_associativity, 0444, associativity_show, NULL);
+
+static ssize_t type_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+	struct cache *cache;
+
+	cache = index_kobj_to_cache(k);
+
+	return sprintf(buf, "%s\n", cache_type_string(cache));
+}
+
+static struct kobj_attribute cache_type_attr =
+	__ATTR(type, 0444, type_show, NULL);
+
+static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+	struct cache_index_dir *index;
+	struct cache *cache;
+
+	index = kobj_to_cache_index_dir(k);
+	cache = index->cache;
+
+	return sprintf(buf, "%d\n", cache->level);
+}
+
+static struct kobj_attribute cache_level_attr =
+	__ATTR(level, 0444, level_show, NULL);
+
+static ssize_t
+show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bool list)
+{
+	struct cache_index_dir *index;
+	struct cache *cache;
+	const struct cpumask *mask;
+
+	index = kobj_to_cache_index_dir(k);
+	cache = index->cache;
+
+	mask = &cache->shared_cpu_map;
+
+	return cpumap_print_to_pagebuf(list, buf, mask);
+}
+
+static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+	return show_shared_cpumap(k, attr, buf, false);
+}
+
+static ssize_t shared_cpu_list_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+	return show_shared_cpumap(k, attr, buf, true);
+}
+
+static struct kobj_attribute cache_shared_cpu_map_attr =
+	__ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL);
+
+static struct kobj_attribute cache_shared_cpu_list_attr =
+	__ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL);
+
+/* Attributes which should always be created -- the kobject/sysfs core
+ * does this automatically via kobj_type->default_groups.  This is the
+ * minimum data required to uniquely identify a cache.
+ */
+static struct attribute *cache_index_default_attrs[] = {
+	&cache_type_attr.attr,
+	&cache_level_attr.attr,
+	&cache_shared_cpu_map_attr.attr,
+	&cache_shared_cpu_list_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(cache_index_default);
+
+/* Attributes which should be created if the cache device node has the
+ * right properties -- see cacheinfo_create_index_opt_attrs
+ */
+static struct kobj_attribute *cache_index_opt_attrs[] = {
+	&cache_size_attr,
+	&cache_line_size_attr,
+	&cache_nr_sets_attr,
+	&cache_assoc_attr,
+};
+
+static const struct sysfs_ops cache_index_ops = {
+	.show = cache_index_show,
+};
+
+static struct kobj_type cache_index_type = {
+	.release = cache_index_release,
+	.sysfs_ops = &cache_index_ops,
+	.default_groups = cache_index_default_groups,
+};
+
+static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
+{
+	const char *cache_type;
+	struct cache *cache;
+	char *buf;
+	int i;
+
+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!buf)
+		return;
+
+	cache = dir->cache;
+	cache_type = cache_type_string(cache);
+
+	/* We don't want to create an attribute that can't provide a
+	 * meaningful value.  Check the return value of each optional
+	 * attribute's ->show method before registering the
+	 * attribute.
+	 */
+	for (i = 0; i < ARRAY_SIZE(cache_index_opt_attrs); i++) {
+		struct kobj_attribute *attr;
+		ssize_t rc;
+
+		attr = cache_index_opt_attrs[i];
+
+		rc = attr->show(&dir->kobj, attr, buf);
+		if (rc <= 0) {
+			pr_debug("not creating %s attribute for "
+				 "%pOFP(%s) (rc = %zd)\n",
+				 attr->attr.name, cache->ofnode,
+				 cache_type, rc);
+			continue;
+		}
+		if (sysfs_create_file(&dir->kobj, &attr->attr))
+			pr_debug("could not create %s attribute for %pOFP(%s)\n",
+				 attr->attr.name, cache->ofnode, cache_type);
+	}
+
+	kfree(buf);
+}
+
+static void cacheinfo_create_index_dir(struct cache *cache, int index,
+				       struct cache_dir *cache_dir)
+{
+	struct cache_index_dir *index_dir;
+	int rc;
+
+	index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL);
+	if (!index_dir)
+		return;
+
+	index_dir->cache = cache;
+
+	rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type,
+				  cache_dir->kobj, "index%d", index);
+	if (rc) {
+		kobject_put(&index_dir->kobj);
+		return;
+	}
+
+	index_dir->next = cache_dir->index;
+	cache_dir->index = index_dir;
+
+	cacheinfo_create_index_opt_attrs(index_dir);
+}
+
+static void cacheinfo_sysfs_populate(unsigned int cpu_id,
+				     struct cache *cache_list)
+{
+	struct cache_dir *cache_dir;
+	struct cache *cache;
+	int index = 0;
+
+	cache_dir = cacheinfo_create_cache_dir(cpu_id);
+	if (!cache_dir)
+		return;
+
+	cache = cache_list;
+	while (cache) {
+		cacheinfo_create_index_dir(cache, index, cache_dir);
+		index++;
+		cache = cache->next_local;
+	}
+}
+
+void cacheinfo_cpu_online(unsigned int cpu_id)
+{
+	struct cache *cache;
+
+	cache = cache_chain_instantiate(cpu_id);
+	if (!cache)
+		return;
+
+	cacheinfo_sysfs_populate(cpu_id, cache);
+}
+
+/* functions needed to remove cache entry for cpu offline or suspend/resume */
+
+#if (defined(CONFIG_PPC_PSERIES) && defined(CONFIG_SUSPEND)) || \
+    defined(CONFIG_HOTPLUG_CPU)
+
+static struct cache *cache_lookup_by_cpu(unsigned int cpu_id)
+{
+	struct device_node *cpu_node;
+	struct cache *cache;
+	int group_id;
+
+	cpu_node = of_get_cpu_node(cpu_id, NULL);
+	WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
+	if (!cpu_node)
+		return NULL;
+
+	group_id = get_group_id(cpu_id, 1);
+	cache = cache_lookup_by_node_group(cpu_node, group_id);
+	of_node_put(cpu_node);
+
+	return cache;
+}
+
+static void remove_index_dirs(struct cache_dir *cache_dir)
+{
+	struct cache_index_dir *index;
+
+	index = cache_dir->index;
+
+	while (index) {
+		struct cache_index_dir *next;
+
+		next = index->next;
+		kobject_put(&index->kobj);
+		index = next;
+	}
+}
+
+static void remove_cache_dir(struct cache_dir *cache_dir)
+{
+	remove_index_dirs(cache_dir);
+
+	/* Remove cache dir from sysfs */
+	kobject_del(cache_dir->kobj);
+
+	kobject_put(cache_dir->kobj);
+
+	kfree(cache_dir);
+}
+
+static void cache_cpu_clear(struct cache *cache, int cpu)
+{
+	while (cache) {
+		struct cache *next = cache->next_local;
+
+		WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map),
+			  "CPU %i not accounted in %pOFP(%s)\n",
+			  cpu, cache->ofnode,
+			  cache_type_string(cache));
+
+		cpumask_clear_cpu(cpu, &cache->shared_cpu_map);
+
+		/* Release the cache object if all the cpus using it
+		 * are offline */
+		if (cpumask_empty(&cache->shared_cpu_map))
+			release_cache(cache);
+
+		cache = next;
+	}
+}
+
+void cacheinfo_cpu_offline(unsigned int cpu_id)
+{
+	struct cache_dir *cache_dir;
+	struct cache *cache;
+
+	/* Prevent userspace from seeing inconsistent state - remove
+	 * the sysfs hierarchy first */
+	cache_dir = per_cpu(cache_dir_pcpu, cpu_id);
+
+	/* careful, sysfs population may have failed */
+	if (cache_dir)
+		remove_cache_dir(cache_dir);
+
+	per_cpu(cache_dir_pcpu, cpu_id) = NULL;
+
+	/* clear the CPU's bit in its cache chain, possibly freeing
+	 * cache objects */
+	cache = cache_lookup_by_cpu(cpu_id);
+	if (cache)
+		cache_cpu_clear(cache, cpu_id);
+}
+
+void cacheinfo_teardown(void)
+{
+	unsigned int cpu;
+
+	lockdep_assert_cpus_held();
+
+	for_each_online_cpu(cpu)
+		cacheinfo_cpu_offline(cpu);
+}
+
+void cacheinfo_rebuild(void)
+{
+	unsigned int cpu;
+
+	lockdep_assert_cpus_held();
+
+	for_each_online_cpu(cpu)
+		cacheinfo_cpu_online(cpu);
+}
+
+#endif /* (CONFIG_PPC_PSERIES && CONFIG_SUSPEND) || CONFIG_HOTPLUG_CPU */
diff --git a/arch/powerpc/kernel/cacheinfo.h b/arch/powerpc/kernel/cacheinfo.h
new file mode 100644
index 0000000000..52bd3fc664
--- /dev/null
+++ b/arch/powerpc/kernel/cacheinfo.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_CACHEINFO_H
+#define _PPC_CACHEINFO_H
+
+/* These are just hooks for sysfs.c to use. */
+extern void cacheinfo_cpu_online(unsigned int cpu_id);
+extern void cacheinfo_cpu_offline(unsigned int cpu_id);
+
+/* Allow migration/suspend to tear down and rebuild the hierarchy. */
+extern void cacheinfo_teardown(void);
+extern void cacheinfo_rebuild(void);
+
+#endif /* _PPC_CACHEINFO_H */
diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c
new file mode 100644
index 0000000000..57b38c592b
--- /dev/null
+++ b/arch/powerpc/kernel/compat_audit.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+#undef __powerpc64__
+#include <linux/audit_arch.h>
+#include <asm/unistd.h>
+
+#include "audit_32.h"
+
+unsigned ppc32_dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+unsigned ppc32_chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+unsigned ppc32_write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+unsigned ppc32_read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+unsigned ppc32_signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int ppc32_classify_syscall(unsigned syscall)
+{
+	switch(syscall) {
+	case __NR_open:
+		return AUDITSC_OPEN;
+	case __NR_openat:
+		return AUDITSC_OPENAT;
+	case __NR_socketcall:
+		return AUDITSC_SOCKETCALL;
+	case __NR_execve:
+		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
+	default:
+		return AUDITSC_COMPAT;
+	}
+}
diff --git a/arch/powerpc/kernel/cpu_setup_44x.S b/arch/powerpc/kernel/cpu_setup_44x.S
new file mode 100644
index 0000000000..e1d705ea2c
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_44x.S
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains low level CPU setup functions.
+ * Valentine Barshak <vbarshak@ru.mvista.com>
+ * MontaVista Software, Inc (c) 2007
+ *
+ * Based on cpu_setup_6xx code by
+ * Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ */
+
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+
+_GLOBAL(__setup_cpu_440ep)
+	b	__init_fpu_44x
+_GLOBAL(__setup_cpu_440epx)
+	mflr	r4
+	bl	__init_fpu_44x
+	bl	__plb_disable_wrp
+	bl	__fixup_440A_mcheck
+	mtlr	r4
+	blr
+_GLOBAL(__setup_cpu_440grx)
+	mflr	r4
+	bl	__plb_disable_wrp
+	bl	__fixup_440A_mcheck
+	mtlr	r4
+	blr
+_GLOBAL(__setup_cpu_460ex)
+_GLOBAL(__setup_cpu_460gt)
+_GLOBAL(__setup_cpu_460sx)
+_GLOBAL(__setup_cpu_apm821xx)
+	mflr	r4
+	bl	__init_fpu_44x
+	bl	__fixup_440A_mcheck
+	mtlr	r4
+	blr
+
+_GLOBAL(__setup_cpu_440x5)
+_GLOBAL(__setup_cpu_440gx)
+_GLOBAL(__setup_cpu_440spe)
+	b	__fixup_440A_mcheck
+
+/* enable APU between CPU and FPU */
+_GLOBAL(__init_fpu_44x)
+	mfspr	r3,SPRN_CCR0
+	/* Clear DAPUIB flag in CCR0 */
+	rlwinm	r3,r3,0,12,10
+	mtspr	SPRN_CCR0,r3
+	isync
+	blr
+
+/*
+ * Workaround for the incorrect write to DDR SDRAM errata.
+ * The write address can be corrupted during writes to
+ * DDR SDRAM when write pipelining is enabled on PLB0.
+ * Disable write pipelining here.
+ */
+#define DCRN_PLB4A0_ACR	0x81
+
+_GLOBAL(__plb_disable_wrp)
+	mfdcr	r3,DCRN_PLB4A0_ACR
+	/* clear WRP bit in PLB4A0_ACR */
+	rlwinm	r3,r3,0,8,6
+	mtdcr	DCRN_PLB4A0_ACR,r3
+	isync
+	blr
+
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
new file mode 100644
index 0000000000..f29ce3dd61
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -0,0 +1,498 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains low level CPU setup functions.
+ *    Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+#include <asm/mmu.h>
+#include <asm/feature-fixups.h>
+
+_GLOBAL(__setup_cpu_603)
+	mflr	r5
+BEGIN_MMU_FTR_SECTION
+	li	r10,0
+	mtspr	SPRN_SPRG_603_LRU,r10		/* init SW LRU tracking */
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+
+BEGIN_FTR_SECTION
+	bl	__init_fpu_registers
+END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE)
+	bl	setup_common_caches
+	mtlr	r5
+	blr
+_GLOBAL(__setup_cpu_604)
+	mflr	r5
+	bl	setup_common_caches
+	bl	setup_604_hid0
+	mtlr	r5
+	blr
+_GLOBAL(__setup_cpu_750)
+	mflr	r5
+	bl	__init_fpu_registers
+	bl	setup_common_caches
+	bl	setup_750_7400_hid0
+	mtlr	r5
+	blr
+_GLOBAL(__setup_cpu_750cx)
+	mflr	r5
+	bl	__init_fpu_registers
+	bl	setup_common_caches
+	bl	setup_750_7400_hid0
+	bl	setup_750cx
+	mtlr	r5
+	blr
+_GLOBAL(__setup_cpu_750fx)
+	mflr	r5
+	bl	__init_fpu_registers
+	bl	setup_common_caches
+	bl	setup_750_7400_hid0
+	bl	setup_750fx
+	mtlr	r5
+	blr
+_GLOBAL(__setup_cpu_7400)
+	mflr	r5
+	bl	__init_fpu_registers
+	bl	setup_7400_workarounds
+	bl	setup_common_caches
+	bl	setup_750_7400_hid0
+	mtlr	r5
+	blr
+_GLOBAL(__setup_cpu_7410)
+	mflr	r5
+	bl	__init_fpu_registers
+	bl	setup_7410_workarounds
+	bl	setup_common_caches
+	bl	setup_750_7400_hid0
+	li	r3,0
+	mtspr	SPRN_L2CR2,r3
+	mtlr	r5
+	blr
+_GLOBAL(__setup_cpu_745x)
+	mflr	r5
+	bl	setup_common_caches
+	bl	setup_745x_specifics
+	mtlr	r5
+	blr
+
+/* Enable caches for 603's, 604, 750 & 7400 */
+SYM_FUNC_START_LOCAL(setup_common_caches)
+	mfspr	r11,SPRN_HID0
+	andi.	r0,r11,HID0_DCE
+	ori	r11,r11,HID0_ICE|HID0_DCE
+	ori	r8,r11,HID0_ICFI
+	bne	1f			/* don't invalidate the D-cache */
+	ori	r8,r8,HID0_DCI		/* unless it wasn't enabled */
+1:	sync
+	mtspr	SPRN_HID0,r8		/* enable and invalidate caches */
+	sync
+	mtspr	SPRN_HID0,r11		/* enable caches */
+	sync
+	isync
+	blr
+SYM_FUNC_END(setup_common_caches)
+
+/* 604, 604e, 604ev, ...
+ * Enable superscalar execution & branch history table
+ */
+SYM_FUNC_START_LOCAL(setup_604_hid0)
+	mfspr	r11,SPRN_HID0
+	ori	r11,r11,HID0_SIED|HID0_BHTE
+	ori	r8,r11,HID0_BTCD
+	sync
+	mtspr	SPRN_HID0,r8	/* flush branch target address cache */
+	sync			/* on 604e/604r */
+	mtspr	SPRN_HID0,r11
+	sync
+	isync
+	blr
+SYM_FUNC_END(setup_604_hid0)
+
+/* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some
+ * erratas we work around here.
+ * Moto MPC710CE.pdf describes them, those are errata
+ * #3, #4 and #5
+ * Note that we assume the firmware didn't choose to
+ * apply other workarounds (there are other ones documented
+ * in the .pdf). It appear that Apple firmware only works
+ * around #3 and with the same fix we use. We may want to
+ * check if the CPU is using 60x bus mode in which case
+ * the workaround for errata #4 is useless. Also, we may
+ * want to explicitly clear HID0_NOPDST as this is not
+ * needed once we have applied workaround #5 (though it's
+ * not set by Apple's firmware at least).
+ */
+SYM_FUNC_START_LOCAL(setup_7400_workarounds)
+	mfpvr	r3
+	rlwinm	r3,r3,0,20,31
+	cmpwi	0,r3,0x0207
+	ble	1f
+	blr
+SYM_FUNC_END(setup_7400_workarounds)
+SYM_FUNC_START_LOCAL(setup_7410_workarounds)
+	mfpvr	r3
+	rlwinm	r3,r3,0,20,31
+	cmpwi	0,r3,0x0100
+	bnelr
+1:
+	mfspr	r11,SPRN_MSSSR0
+	/* Errata #3: Set L1OPQ_SIZE to 0x10 */
+	rlwinm	r11,r11,0,9,6
+	oris	r11,r11,0x0100
+	/* Errata #4: Set L2MQ_SIZE to 1 (check for MPX mode first ?) */
+	oris	r11,r11,0x0002
+	/* Errata #5: Set DRLT_SIZE to 0x01 */
+	rlwinm	r11,r11,0,5,2
+	oris	r11,r11,0x0800
+	sync
+	mtspr	SPRN_MSSSR0,r11
+	sync
+	isync
+	blr
+SYM_FUNC_END(setup_7410_workarounds)
+
+/* 740/750/7400/7410
+ * Enable Store Gathering (SGE), Address Broadcast (ABE),
+ * Branch History Table (BHTE), Branch Target ICache (BTIC)
+ * Dynamic Power Management (DPM), Speculative (SPD)
+ * Clear Instruction cache throttling (ICTC)
+ */
+SYM_FUNC_START_LOCAL(setup_750_7400_hid0)
+	mfspr	r11,SPRN_HID0
+	ori	r11,r11,HID0_SGE | HID0_ABE | HID0_BHTE | HID0_BTIC
+	oris	r11,r11,HID0_DPM@h
+BEGIN_FTR_SECTION
+	xori	r11,r11,HID0_BTIC
+END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC)
+BEGIN_FTR_SECTION
+	xoris	r11,r11,HID0_DPM@h	/* disable dynamic power mgmt */
+END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM)
+	li	r3,HID0_SPD
+	andc	r11,r11,r3		/* clear SPD: enable speculative */
+ 	li	r3,0
+ 	mtspr	SPRN_ICTC,r3		/* Instruction Cache Throttling off */
+	isync
+	mtspr	SPRN_HID0,r11
+	sync
+	isync
+	blr
+SYM_FUNC_END(setup_750_7400_hid0)
+
+/* 750cx specific
+ * Looks like we have to disable NAP feature for some PLL settings...
+ * (waiting for confirmation)
+ */
+SYM_FUNC_START_LOCAL(setup_750cx)
+	mfspr	r10, SPRN_HID1
+	rlwinm	r10,r10,4,28,31
+	cmpwi	cr0,r10,7
+	cmpwi	cr1,r10,9
+	cmpwi	cr2,r10,11
+	cror	4*cr0+eq,4*cr0+eq,4*cr1+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr2+eq
+	bnelr
+	lwz	r6,CPU_SPEC_FEATURES(r4)
+	li	r7,CPU_FTR_CAN_NAP
+	andc	r6,r6,r7
+	stw	r6,CPU_SPEC_FEATURES(r4)
+	blr
+SYM_FUNC_END(setup_750cx)
+
+/* 750fx specific
+ */
+SYM_FUNC_START_LOCAL(setup_750fx)
+	blr
+SYM_FUNC_END(setup_750fx)
+
+/* MPC 745x
+ * Enable Store Gathering (SGE), Branch Folding (FOLD)
+ * Branch History Table (BHTE), Branch Target ICache (BTIC)
+ * Dynamic Power Management (DPM), Speculative (SPD)
+ * Ensure our data cache instructions really operate.
+ * Timebase has to be running or we wouldn't have made it here,
+ * just ensure we don't disable it.
+ * Clear Instruction cache throttling (ICTC)
+ * Enable L2 HW prefetch
+ */
+SYM_FUNC_START_LOCAL(setup_745x_specifics)
+	/* We check for the presence of an L3 cache setup by
+	 * the firmware. If any, we disable NAP capability as
+	 * it's known to be bogus on rev 2.1 and earlier
+	 */
+BEGIN_FTR_SECTION
+	mfspr	r11,SPRN_L3CR
+	andis.	r11,r11,L3CR_L3E@h
+	beq	1f
+END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
+	lwz	r6,CPU_SPEC_FEATURES(r4)
+	andis.	r0,r6,CPU_FTR_L3_DISABLE_NAP@h
+	beq	1f
+	li	r7,CPU_FTR_CAN_NAP
+	andc	r6,r6,r7
+	stw	r6,CPU_SPEC_FEATURES(r4)
+1:
+	mfspr	r11,SPRN_HID0
+
+	/* All of the bits we have to set.....
+	 */
+	ori	r11,r11,HID0_SGE | HID0_FOLD | HID0_BHTE
+	ori	r11,r11,HID0_LRSTK | HID0_BTIC
+	oris	r11,r11,HID0_DPM@h
+BEGIN_MMU_FTR_SECTION
+	oris	r11,r11,HID0_HIGH_BAT@h
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+BEGIN_FTR_SECTION
+	xori	r11,r11,HID0_BTIC
+END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC)
+BEGIN_FTR_SECTION
+	xoris	r11,r11,HID0_DPM@h	/* disable dynamic power mgmt */
+END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM)
+
+	/* All of the bits we have to clear....
+	 */
+	li	r3,HID0_SPD | HID0_NOPDST | HID0_NOPTI
+	andc	r11,r11,r3		/* clear SPD: enable speculative */
+ 	li	r3,0
+
+ 	mtspr	SPRN_ICTC,r3		/* Instruction Cache Throttling off */
+	isync
+	mtspr	SPRN_HID0,r11
+	sync
+	isync
+
+	/* Enable L2 HW prefetch, if L2 is enabled
+	 */
+	mfspr	r3,SPRN_L2CR
+	andis.	r3,r3,L2CR_L2E@h
+	beqlr
+	mfspr	r3,SPRN_MSSCR0
+	ori	r3,r3,3
+	sync
+	mtspr	SPRN_MSSCR0,r3
+	sync
+	isync
+	blr
+SYM_FUNC_END(setup_745x_specifics)
+
+/*
+ * Initialize the FPU registers. This is needed to work around an errata
+ * in some 750 cpus where using a not yet initialized FPU register after
+ * power on reset may hang the CPU
+ */
+_GLOBAL(__init_fpu_registers)
+	mfmsr	r10
+	ori	r11,r10,MSR_FP
+	mtmsr	r11
+	isync
+	addis	r9,r3,empty_zero_page@ha
+	addi	r9,r9,empty_zero_page@l
+	REST_32FPRS(0,r9)
+	sync
+	mtmsr	r10
+	isync
+	blr
+_ASM_NOKPROBE_SYMBOL(__init_fpu_registers)
+
+
+/* Definitions for the table use to save CPU states */
+#define CS_HID0		0
+#define CS_HID1		4
+#define CS_HID2		8
+#define	CS_MSSCR0	12
+#define CS_MSSSR0	16
+#define CS_ICTRL	20
+#define CS_LDSTCR	24
+#define CS_LDSTDB	28
+#define CS_SIZE		32
+
+	.data
+	.balign	L1_CACHE_BYTES
+cpu_state_storage:
+	.space	CS_SIZE
+	.balign	L1_CACHE_BYTES,0
+	.text
+
+/* Called in normal context to backup CPU 0 state. This
+ * does not include cache settings. This function is also
+ * called for machine sleep. This does not include the MMU
+ * setup, BATs, etc... but rather the "special" registers
+ * like HID0, HID1, MSSCR0, etc...
+ */
+_GLOBAL(__save_cpu_setup)
+	/* Some CR fields are volatile, we back it up all */
+	mfcr	r7
+
+	/* Get storage ptr */
+	lis	r5,cpu_state_storage@h
+	ori	r5,r5,cpu_state_storage@l
+
+	/* Save HID0 (common to all CONFIG_PPC_BOOK3S_32 cpus) */
+	mfspr	r3,SPRN_HID0
+	stw	r3,CS_HID0(r5)
+
+	/* Now deal with CPU type dependent registers */
+	mfspr	r3,SPRN_PVR
+	srwi	r3,r3,16
+	cmplwi	cr0,r3,0x8000	/* 7450 */
+	cmplwi	cr1,r3,0x000c	/* 7400 */
+	cmplwi	cr2,r3,0x800c	/* 7410 */
+	cmplwi	cr3,r3,0x8001	/* 7455 */
+	cmplwi	cr4,r3,0x8002	/* 7457 */
+	cmplwi	cr5,r3,0x8003	/* 7447A */
+	cmplwi	cr6,r3,0x7000	/* 750FX */
+	cmplwi	cr7,r3,0x8004	/* 7448 */
+	/* cr1 is 7400 || 7410 */
+	cror	4*cr1+eq,4*cr1+eq,4*cr2+eq
+	/* cr0 is 74xx */
+	cror	4*cr0+eq,4*cr0+eq,4*cr3+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr4+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr1+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr5+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr7+eq
+	bne	1f
+	/* Backup 74xx specific regs */
+	mfspr	r4,SPRN_MSSCR0
+	stw	r4,CS_MSSCR0(r5)
+	mfspr	r4,SPRN_MSSSR0
+	stw	r4,CS_MSSSR0(r5)
+	beq	cr1,1f
+	/* Backup 745x specific registers */
+	mfspr	r4,SPRN_HID1
+	stw	r4,CS_HID1(r5)
+	mfspr	r4,SPRN_ICTRL
+	stw	r4,CS_ICTRL(r5)
+	mfspr	r4,SPRN_LDSTCR
+	stw	r4,CS_LDSTCR(r5)
+	mfspr	r4,SPRN_LDSTDB
+	stw	r4,CS_LDSTDB(r5)
+1:
+	bne	cr6,1f
+	/* Backup 750FX specific registers */
+	mfspr	r4,SPRN_HID1
+	stw	r4,CS_HID1(r5)
+	/* If rev 2.x, backup HID2 */
+	mfspr	r3,SPRN_PVR
+	andi.	r3,r3,0xff00
+	cmpwi	cr0,r3,0x0200
+	bne	1f
+	mfspr	r4,SPRN_HID2
+	stw	r4,CS_HID2(r5)
+1:
+	mtcr	r7
+	blr
+
+/* Called with no MMU context (typically MSR:IR/DR off) to
+ * restore CPU state as backed up by the previous
+ * function. This does not include cache setting
+ */
+_GLOBAL(__restore_cpu_setup)
+	/* Some CR fields are volatile, we back it up all */
+	mfcr	r7
+
+	/* Get storage ptr */
+	lis	r5,(cpu_state_storage-KERNELBASE)@h
+	ori	r5,r5,cpu_state_storage@l
+
+	/* Restore HID0 */
+	lwz	r3,CS_HID0(r5)
+	sync
+	isync
+	mtspr	SPRN_HID0,r3
+	sync
+	isync
+
+	/* Now deal with CPU type dependent registers */
+	mfspr	r3,SPRN_PVR
+	srwi	r3,r3,16
+	cmplwi	cr0,r3,0x8000	/* 7450 */
+	cmplwi	cr1,r3,0x000c	/* 7400 */
+	cmplwi	cr2,r3,0x800c	/* 7410 */
+	cmplwi	cr3,r3,0x8001	/* 7455 */
+	cmplwi	cr4,r3,0x8002	/* 7457 */
+	cmplwi	cr5,r3,0x8003	/* 7447A */
+	cmplwi	cr6,r3,0x7000	/* 750FX */
+	cmplwi	cr7,r3,0x8004	/* 7448 */
+	/* cr1 is 7400 || 7410 */
+	cror	4*cr1+eq,4*cr1+eq,4*cr2+eq
+	/* cr0 is 74xx */
+	cror	4*cr0+eq,4*cr0+eq,4*cr3+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr4+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr1+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr5+eq
+	cror	4*cr0+eq,4*cr0+eq,4*cr7+eq
+	bne	2f
+	/* Restore 74xx specific regs */
+	lwz	r4,CS_MSSCR0(r5)
+	sync
+	mtspr	SPRN_MSSCR0,r4
+	sync
+	isync
+	lwz	r4,CS_MSSSR0(r5)
+	sync
+	mtspr	SPRN_MSSSR0,r4
+	sync
+	isync
+	bne	cr2,1f
+	/* Clear 7410 L2CR2 */
+	li	r4,0
+	mtspr	SPRN_L2CR2,r4
+1:	beq	cr1,2f
+	/* Restore 745x specific registers */
+	lwz	r4,CS_HID1(r5)
+	sync
+	mtspr	SPRN_HID1,r4
+	isync
+	sync
+	lwz	r4,CS_ICTRL(r5)
+	sync
+	mtspr	SPRN_ICTRL,r4
+	isync
+	sync
+	lwz	r4,CS_LDSTCR(r5)
+	sync
+	mtspr	SPRN_LDSTCR,r4
+	isync
+	sync
+	lwz	r4,CS_LDSTDB(r5)
+	sync
+	mtspr	SPRN_LDSTDB,r4
+	isync
+	sync
+2:	bne	cr6,1f
+	/* Restore 750FX specific registers
+	 * that is restore HID2 on rev 2.x and PLL config & switch
+	 * to PLL 0 on all
+	 */
+	/* If rev 2.x, restore HID2 with low voltage bit cleared */
+	mfspr	r3,SPRN_PVR
+	andi.	r3,r3,0xff00
+	cmpwi	cr0,r3,0x0200
+	bne	4f
+	lwz	r4,CS_HID2(r5)
+	rlwinm	r4,r4,0,19,17
+	mtspr	SPRN_HID2,r4
+	sync
+4:
+	lwz	r4,CS_HID1(r5)
+	rlwinm  r5,r4,0,16,14
+	mtspr	SPRN_HID1,r5
+		/* Wait for PLL to stabilize */
+	mftbl	r5
+3:	mftbl	r6
+	sub	r6,r6,r5
+	cmplwi	cr0,r6,10000
+	ble	3b
+	/* Setup final PLL */
+	mtspr	SPRN_HID1,r4
+1:
+	mtcr	r7
+	blr
+_ASM_NOKPROBE_SYMBOL(__restore_cpu_setup)
+
diff --git a/arch/powerpc/kernel/cpu_setup_e500.S b/arch/powerpc/kernel/cpu_setup_e500.S
new file mode 100644
index 0000000000..077cfccc34
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_e500.S
@@ -0,0 +1,337 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains low level CPU setup functions.
+ * Kumar Gala <galak@kernel.crashing.org>
+ * Copyright 2009 Freescale Semiconductor, Inc.
+ *
+ * Based on cpu_setup_6xx code by
+ * Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/nohash/mmu-e500.h>
+#include <asm/asm-offsets.h>
+#include <asm/mpc85xx.h>
+
+_GLOBAL(__e500_icache_setup)
+	mfspr	r0, SPRN_L1CSR1
+	andi.	r3, r0, L1CSR1_ICE
+	bnelr				/* Already enabled */
+	oris	r0, r0, L1CSR1_CPE@h
+	ori	r0, r0, (L1CSR1_ICFI | L1CSR1_ICLFR |  L1CSR1_ICE)
+	mtspr	SPRN_L1CSR1, r0		/* Enable I-Cache */
+	isync
+	blr
+
+_GLOBAL(__e500_dcache_setup)
+	mfspr	r0, SPRN_L1CSR0
+	andi.	r3, r0, L1CSR0_DCE
+	bnelr				/* Already enabled */
+	msync
+	isync
+	li	r0, 0
+	mtspr	SPRN_L1CSR0, r0		/* Disable */
+	msync
+	isync
+	li	r0, (L1CSR0_DCFI | L1CSR0_CLFC)
+	mtspr	SPRN_L1CSR0, r0		/* Invalidate */
+	isync
+1:	mfspr	r0, SPRN_L1CSR0
+	andi.	r3, r0, L1CSR0_CLFC
+	bne+	1b			/* Wait for lock bits reset */
+	oris	r0, r0, L1CSR0_CPE@h
+	ori	r0, r0, L1CSR0_DCE
+	msync
+	isync
+	mtspr	SPRN_L1CSR0, r0		/* Enable */
+	isync
+	blr
+
+/*
+ * FIXME - we haven't yet done testing to determine a reasonable default
+ * value for PW20_WAIT_IDLE_BIT.
+ */
+#define PW20_WAIT_IDLE_BIT		50 /* 1ms, TB frequency is 41.66MHZ */
+_GLOBAL(setup_pw20_idle)
+	mfspr	r3, SPRN_PWRMGTCR0
+
+	/* Set PW20_WAIT bit, enable pw20 state*/
+	ori	r3, r3, PWRMGTCR0_PW20_WAIT
+	li	r11, PW20_WAIT_IDLE_BIT
+
+	/* Set Automatic PW20 Core Idle Count */
+	rlwimi	r3, r11, PWRMGTCR0_PW20_ENT_SHIFT, PWRMGTCR0_PW20_ENT
+
+	mtspr	SPRN_PWRMGTCR0, r3
+
+	blr
+
+/*
+ * FIXME - we haven't yet done testing to determine a reasonable default
+ * value for AV_WAIT_IDLE_BIT.
+ */
+#define AV_WAIT_IDLE_BIT		50 /* 1ms, TB frequency is 41.66MHZ */
+_GLOBAL(setup_altivec_idle)
+	mfspr	r3, SPRN_PWRMGTCR0
+
+	/* Enable Altivec Idle */
+	oris	r3, r3, PWRMGTCR0_AV_IDLE_PD_EN@h
+	li	r11, AV_WAIT_IDLE_BIT
+
+	/* Set Automatic AltiVec Idle Count */
+	rlwimi	r3, r11, PWRMGTCR0_AV_IDLE_CNT_SHIFT, PWRMGTCR0_AV_IDLE_CNT
+
+	mtspr	SPRN_PWRMGTCR0, r3
+
+	blr
+
+#ifdef CONFIG_PPC_E500MC
+_GLOBAL(__setup_cpu_e6500)
+	mflr	r6
+#ifdef CONFIG_PPC64
+	bl	setup_altivec_ivors
+	/* Touch IVOR42 only if the CPU supports E.HV category */
+	mfspr	r10,SPRN_MMUCFG
+	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
+	beq	1f
+	bl	setup_lrat_ivor
+1:
+#endif
+	bl	setup_pw20_idle
+	bl	setup_altivec_idle
+	bl	__setup_cpu_e5500
+	mtlr	r6
+	blr
+#endif /* CONFIG_PPC_E500MC */
+
+#ifdef CONFIG_PPC32
+#ifdef CONFIG_PPC_E500
+#ifndef CONFIG_PPC_E500MC
+_GLOBAL(__setup_cpu_e500v1)
+_GLOBAL(__setup_cpu_e500v2)
+	mflr	r4
+	bl	__e500_icache_setup
+	bl	__e500_dcache_setup
+	bl	__setup_e500_ivors
+#if defined(CONFIG_FSL_RIO) || defined(CONFIG_FSL_PCI)
+	/* Ensure that RFXE is set */
+	mfspr	r3,SPRN_HID1
+	oris	r3,r3,HID1_RFXE@h
+	mtspr	SPRN_HID1,r3
+#endif
+	mtlr	r4
+	blr
+#else /* CONFIG_PPC_E500MC */
+_GLOBAL(__setup_cpu_e500mc)
+_GLOBAL(__setup_cpu_e5500)
+	mflr	r5
+	bl	__e500_icache_setup
+	bl	__e500_dcache_setup
+	bl	__setup_e500mc_ivors
+	/*
+	 * We only want to touch IVOR38-41 if we're running on hardware
+	 * that supports category E.HV.  The architectural way to determine
+	 * this is MMUCFG[LPIDSIZE].
+	 */
+	mfspr	r3, SPRN_MMUCFG
+	rlwinm.	r3, r3, 0, MMUCFG_LPIDSIZE
+	beq	1f
+	bl	__setup_ehv_ivors
+	b	2f
+1:
+	lwz	r3, CPU_SPEC_FEATURES(r4)
+	/* We need this check as cpu_setup is also called for
+	 * the secondary cores. So, if we have already cleared
+	 * the feature on the primary core, avoid doing it on the
+	 * secondary core.
+	 */
+	andi.	r6, r3, CPU_FTR_EMB_HV
+	beq	2f
+	rlwinm	r3, r3, 0, ~CPU_FTR_EMB_HV
+	stw	r3, CPU_SPEC_FEATURES(r4)
+2:
+	mtlr	r5
+	blr
+#endif /* CONFIG_PPC_E500MC */
+#endif /* CONFIG_PPC_E500 */
+#endif /* CONFIG_PPC32 */
+
+#ifdef CONFIG_PPC_BOOK3E_64
+_GLOBAL(__restore_cpu_e6500)
+	mflr	r5
+	bl	setup_altivec_ivors
+	/* Touch IVOR42 only if the CPU supports E.HV category */
+	mfspr	r10,SPRN_MMUCFG
+	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
+	beq	1f
+	bl	setup_lrat_ivor
+1:
+	bl	setup_pw20_idle
+	bl	setup_altivec_idle
+	bl	__restore_cpu_e5500
+	mtlr	r5
+	blr
+
+_GLOBAL(__restore_cpu_e5500)
+	mflr	r4
+	bl	__e500_icache_setup
+	bl	__e500_dcache_setup
+	bl	__setup_base_ivors
+	bl	setup_perfmon_ivor
+	bl	setup_doorbell_ivors
+	/*
+	 * We only want to touch IVOR38-41 if we're running on hardware
+	 * that supports category E.HV.  The architectural way to determine
+	 * this is MMUCFG[LPIDSIZE].
+	 */
+	mfspr	r10,SPRN_MMUCFG
+	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
+	beq	1f
+	bl	setup_ehv_ivors
+1:
+	mtlr	r4
+	blr
+
+_GLOBAL(__setup_cpu_e5500)
+	mflr	r5
+	bl	__e500_icache_setup
+	bl	__e500_dcache_setup
+	bl	__setup_base_ivors
+	bl	setup_perfmon_ivor
+	bl	setup_doorbell_ivors
+	/*
+	 * We only want to touch IVOR38-41 if we're running on hardware
+	 * that supports category E.HV.  The architectural way to determine
+	 * this is MMUCFG[LPIDSIZE].
+	 */
+	mfspr	r10,SPRN_MMUCFG
+	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
+	beq	1f
+	bl	setup_ehv_ivors
+	b	2f
+1:
+	ld	r10,CPU_SPEC_FEATURES(r4)
+	LOAD_REG_IMMEDIATE(r9,CPU_FTR_EMB_HV)
+	andc	r10,r10,r9
+	std	r10,CPU_SPEC_FEATURES(r4)
+2:
+	mtlr	r5
+	blr
+#endif
+
+/* flush L1 data cache, it can apply to e500v2, e500mc and e5500 */
+_GLOBAL(flush_dcache_L1)
+	mfmsr	r10
+	wrteei	0
+
+	mfspr	r3,SPRN_L1CFG0
+	rlwinm	r5,r3,9,3	/* Extract cache block size */
+	twlgti	r5,1		/* Only 32 and 64 byte cache blocks
+				 * are currently defined.
+				 */
+	li	r4,32
+	subfic	r6,r5,2		/* r6 = log2(1KiB / cache block size) -
+				 *      log2(number of ways)
+				 */
+	slw	r5,r4,r5	/* r5 = cache block size */
+
+	rlwinm	r7,r3,0,0xff	/* Extract number of KiB in the cache */
+	mulli	r7,r7,13	/* An 8-way cache will require 13
+				 * loads per set.
+				 */
+	slw	r7,r7,r6
+
+	/* save off HID0 and set DCFA */
+	mfspr	r8,SPRN_HID0
+	ori	r9,r8,HID0_DCFA@l
+	mtspr	SPRN_HID0,r9
+	isync
+
+	LOAD_REG_IMMEDIATE(r6, KERNELBASE)
+	mr	r4, r6
+	mtctr	r7
+
+1:	lwz	r3,0(r4)	/* Load... */
+	add	r4,r4,r5
+	bdnz	1b
+
+	msync
+	mr	r4, r6
+	mtctr	r7
+
+1:	dcbf	0,r4		/* ...and flush. */
+	add	r4,r4,r5
+	bdnz	1b
+
+	/* restore HID0 */
+	mtspr	SPRN_HID0,r8
+	isync
+
+	wrtee r10
+
+	blr
+
+SYM_FUNC_START_LOCAL(has_L2_cache)
+	/* skip L2 cache on P2040/P2040E as they have no L2 cache */
+	mfspr	r3, SPRN_SVR
+	/* shift right by 8 bits and clear E bit of SVR */
+	rlwinm	r4, r3, 24, ~0x800
+
+	lis	r3, SVR_P2040@h
+	ori	r3, r3, SVR_P2040@l
+	cmpw	r4, r3
+	beq	1f
+
+	li	r3, 1
+	blr
+1:
+	li	r3, 0
+	blr
+SYM_FUNC_END(has_L2_cache)
+
+/* flush backside L2 cache */
+SYM_FUNC_START_LOCAL(flush_backside_L2_cache)
+	mflr	r10
+	bl	has_L2_cache
+	mtlr	r10
+	cmpwi	r3, 0
+	beq	2f
+
+	/* Flush the L2 cache */
+	mfspr	r3, SPRN_L2CSR0
+	ori	r3, r3, L2CSR0_L2FL@l
+	msync
+	isync
+	mtspr	SPRN_L2CSR0,r3
+	isync
+
+	/* check if it is complete */
+1:	mfspr	r3,SPRN_L2CSR0
+	andi.	r3, r3, L2CSR0_L2FL@l
+	bne	1b
+2:
+	blr
+SYM_FUNC_END(flush_backside_L2_cache)
+
+_GLOBAL(cpu_down_flush_e500v2)
+	mflr r0
+	bl	flush_dcache_L1
+	mtlr r0
+	blr
+
+_GLOBAL(cpu_down_flush_e500mc)
+_GLOBAL(cpu_down_flush_e5500)
+	mflr r0
+	bl	flush_dcache_L1
+	bl	flush_backside_L2_cache
+	mtlr r0
+	blr
+
+/* L1 Data Cache of e6500 contains no modified data, no flush is required */
+_GLOBAL(cpu_down_flush_e6500)
+	blr
diff --git a/arch/powerpc/kernel/cpu_setup_pa6t.S b/arch/powerpc/kernel/cpu_setup_pa6t.S
new file mode 100644
index 0000000000..e6bfd4490e
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_pa6t.S
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+
+/* Right now, restore and setup are the same thing */
+_GLOBAL(__restore_cpu_pa6t)
+_GLOBAL(__setup_cpu_pa6t)
+	/* Do nothing if not running in HV mode */
+	mfmsr	r0
+	rldicl.	r0,r0,4,63
+	beqlr
+
+	mfspr	r0,SPRN_HID5
+	ori	r0,r0,0x38
+	mtspr	SPRN_HID5,r0
+
+	mfspr	r0,SPRN_LPCR
+	ori	r0,r0,0x7000
+	mtspr	SPRN_LPCR,r0
+
+	blr
diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
new file mode 100644
index 0000000000..98bd4e6c17
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_power.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2020, Jordan Niethe, IBM Corporation.
+ *
+ * This file contains low level CPU setup functions.
+ * Originally written in assembly by Benjamin Herrenschmidt & various other
+ * authors.
+ */
+
+#include <asm/reg.h>
+#include <asm/synch.h>
+#include <linux/bitops.h>
+#include <asm/cputable.h>
+#include <asm/cpu_setup.h>
+
+/* Disable CPU_FTR_HVMODE and return false if MSR:HV is not set */
+static bool init_hvmode_206(struct cpu_spec *t)
+{
+	u64 msr;
+
+	msr = mfmsr();
+	if (msr & MSR_HV)
+		return true;
+
+	t->cpu_features &= ~(CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST);
+	return false;
+}
+
+static void init_LPCR_ISA300(u64 lpcr, u64 lpes)
+{
+	/* POWER9 has no VRMASD */
+	lpcr |= (lpes << LPCR_LPES_SH) & LPCR_LPES;
+	lpcr |= LPCR_PECE0|LPCR_PECE1|LPCR_PECE2;
+	lpcr |= (4ull << LPCR_DPFD_SH) & LPCR_DPFD;
+	lpcr &= ~LPCR_HDICE;	/* clear HDICE */
+	lpcr |= (4ull << LPCR_VC_SH);
+	mtspr(SPRN_LPCR, lpcr);
+	isync();
+}
+
+/*
+ * Setup a sane LPCR:
+ *   Called with initial LPCR and desired LPES 2-bit value
+ *
+ *   LPES = 0b01 (HSRR0/1 used for 0x500)
+ *   PECE = 0b111
+ *   DPFD = 4
+ *   HDICE = 0
+ *   VC = 0b100 (VPM0=1, VPM1=0, ISL=0)
+ *   VRMASD = 0b10000 (L=1, LP=00)
+ *
+ * Other bits untouched for now
+ */
+static void init_LPCR_ISA206(u64 lpcr, u64 lpes)
+{
+	lpcr |= (0x10ull << LPCR_VRMASD_SH) & LPCR_VRMASD;
+	init_LPCR_ISA300(lpcr, lpes);
+}
+
+static void init_FSCR(void)
+{
+	u64 fscr;
+
+	fscr = mfspr(SPRN_FSCR);
+	fscr |= FSCR_TAR|FSCR_EBB;
+	mtspr(SPRN_FSCR, fscr);
+}
+
+static void init_FSCR_power9(void)
+{
+	u64 fscr;
+
+	fscr = mfspr(SPRN_FSCR);
+	fscr |= FSCR_SCV;
+	mtspr(SPRN_FSCR, fscr);
+	init_FSCR();
+}
+
+static void init_FSCR_power10(void)
+{
+	u64 fscr;
+
+	fscr = mfspr(SPRN_FSCR);
+	fscr |= FSCR_PREFIX;
+	mtspr(SPRN_FSCR, fscr);
+	init_FSCR_power9();
+}
+
+static void init_HFSCR(void)
+{
+	u64 hfscr;
+
+	hfscr = mfspr(SPRN_HFSCR);
+	hfscr |= HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|HFSCR_DSCR|\
+		 HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP;
+	mtspr(SPRN_HFSCR, hfscr);
+}
+
+static void init_PMU_HV(void)
+{
+	mtspr(SPRN_MMCRC, 0);
+}
+
+static void init_PMU_HV_ISA207(void)
+{
+	mtspr(SPRN_MMCRH, 0);
+}
+
+static void init_PMU(void)
+{
+	mtspr(SPRN_MMCRA, 0);
+	mtspr(SPRN_MMCR0, MMCR0_FC);
+	mtspr(SPRN_MMCR1, 0);
+	mtspr(SPRN_MMCR2, 0);
+}
+
+static void init_PMU_ISA207(void)
+{
+	mtspr(SPRN_MMCRS, 0);
+}
+
+static void init_PMU_ISA31(void)
+{
+	mtspr(SPRN_MMCR3, 0);
+	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
+	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
+}
+
+static void init_DEXCR(void)
+{
+	mtspr(SPRN_DEXCR, DEXCR_INIT);
+	mtspr(SPRN_HASHKEYR, 0);
+}
+
+/*
+ * Note that we can be called twice of pseudo-PVRs.
+ * The parameter offset is not used.
+ */
+
+void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t)
+{
+	if (!init_hvmode_206(t))
+		return;
+
+	mtspr(SPRN_LPID, 0);
+	mtspr(SPRN_AMOR, ~0);
+	mtspr(SPRN_PCR, PCR_MASK);
+	init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
+}
+
+void __restore_cpu_power7(void)
+{
+	u64 msr;
+
+	msr = mfmsr();
+	if (!(msr & MSR_HV))
+		return;
+
+	mtspr(SPRN_LPID, 0);
+	mtspr(SPRN_AMOR, ~0);
+	mtspr(SPRN_PCR, PCR_MASK);
+	init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
+}
+
+void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t)
+{
+	init_FSCR();
+	init_PMU();
+	init_PMU_ISA207();
+
+	if (!init_hvmode_206(t))
+		return;
+
+	mtspr(SPRN_LPID, 0);
+	mtspr(SPRN_AMOR, ~0);
+	mtspr(SPRN_PCR, PCR_MASK);
+	init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
+	init_HFSCR();
+	init_PMU_HV();
+	init_PMU_HV_ISA207();
+}
+
+void __restore_cpu_power8(void)
+{
+	u64 msr;
+
+	init_FSCR();
+	init_PMU();
+	init_PMU_ISA207();
+
+	msr = mfmsr();
+	if (!(msr & MSR_HV))
+		return;
+
+	mtspr(SPRN_LPID, 0);
+	mtspr(SPRN_AMOR, ~0);
+	mtspr(SPRN_PCR, PCR_MASK);
+	init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
+	init_HFSCR();
+	init_PMU_HV();
+	init_PMU_HV_ISA207();
+}
+
+void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t)
+{
+	init_FSCR_power9();
+	init_PMU();
+
+	if (!init_hvmode_206(t))
+		return;
+
+	mtspr(SPRN_PSSCR, 0);
+	mtspr(SPRN_LPID, 0);
+	mtspr(SPRN_PID, 0);
+	mtspr(SPRN_AMOR, ~0);
+	mtspr(SPRN_PCR, PCR_MASK);
+	init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
+			 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
+	init_HFSCR();
+	init_PMU_HV();
+}
+
+void __restore_cpu_power9(void)
+{
+	u64 msr;
+
+	init_FSCR_power9();
+	init_PMU();
+
+	msr = mfmsr();
+	if (!(msr & MSR_HV))
+		return;
+
+	mtspr(SPRN_PSSCR, 0);
+	mtspr(SPRN_LPID, 0);
+	mtspr(SPRN_PID, 0);
+	mtspr(SPRN_AMOR, ~0);
+	mtspr(SPRN_PCR, PCR_MASK);
+	init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
+			 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
+	init_HFSCR();
+	init_PMU_HV();
+}
+
+void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t)
+{
+	init_FSCR_power10();
+	init_PMU();
+	init_PMU_ISA31();
+	init_DEXCR();
+
+	if (!init_hvmode_206(t))
+		return;
+
+	mtspr(SPRN_PSSCR, 0);
+	mtspr(SPRN_LPID, 0);
+	mtspr(SPRN_PID, 0);
+	mtspr(SPRN_AMOR, ~0);
+	mtspr(SPRN_PCR, PCR_MASK);
+	init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
+			 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
+	init_HFSCR();
+	init_PMU_HV();
+}
+
+void __restore_cpu_power10(void)
+{
+	u64 msr;
+
+	init_FSCR_power10();
+	init_PMU();
+	init_PMU_ISA31();
+	init_DEXCR();
+
+	msr = mfmsr();
+	if (!(msr & MSR_HV))
+		return;
+
+	mtspr(SPRN_PSSCR, 0);
+	mtspr(SPRN_LPID, 0);
+	mtspr(SPRN_PID, 0);
+	mtspr(SPRN_AMOR, ~0);
+	mtspr(SPRN_PCR, PCR_MASK);
+	init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
+			 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
+	init_HFSCR();
+	init_PMU_HV();
+}
diff --git a/arch/powerpc/kernel/cpu_setup_ppc970.S b/arch/powerpc/kernel/cpu_setup_ppc970.S
new file mode 100644
index 0000000000..f0c07e70f0
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_ppc970.S
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains low level CPU setup functions.
+ *    Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+
+_GLOBAL(__cpu_preinit_ppc970)
+	/* Do nothing if not running in HV mode */
+	mfmsr	r0
+	rldicl.	r0,r0,4,63
+	beqlr
+
+	/* Make sure HID4:rm_ci is off before MMU is turned off, that large
+	 * pages are enabled with HID4:61 and clear HID5:DCBZ_size and
+	 * HID5:DCBZ32_ill
+	 */
+	li	r0,0
+	mfspr	r3,SPRN_HID4
+	rldimi	r3,r0,40,23	/* clear bit 23 (rm_ci) */
+	rldimi	r3,r0,2,61	/* clear bit 61 (lg_pg_en) */
+	sync
+	mtspr	SPRN_HID4,r3
+	isync
+	sync
+	mfspr	r3,SPRN_HID5
+	rldimi	r3,r0,6,56	/* clear bits 56 & 57 (DCBZ*) */
+	sync
+	mtspr	SPRN_HID5,r3
+	isync
+	sync
+
+	/* Setup some basic HID1 features */
+	mfspr	r0,SPRN_HID1
+	li	r3,0x1200		/* enable i-fetch cacheability */
+	sldi	r3,r3,44		/* and prefetch */
+	or	r0,r0,r3
+	mtspr	SPRN_HID1,r0
+	mtspr	SPRN_HID1,r0
+	isync
+
+	/* Clear HIOR */
+	li	r0,0
+	sync
+	mtspr	SPRN_HIOR,0		/* Clear interrupt prefix */
+	isync
+	blr
+
+/* Definitions for the table use to save CPU states */
+#define CS_HID0		0
+#define CS_HID1		8
+#define	CS_HID4		16
+#define CS_HID5		24
+#define CS_SIZE		32
+
+	.data
+	.balign	L1_CACHE_BYTES,0
+cpu_state_storage:
+	.space	CS_SIZE
+	.balign	L1_CACHE_BYTES,0
+	.text
+
+
+_GLOBAL(__setup_cpu_ppc970)
+	/* Do nothing if not running in HV mode */
+	mfmsr	r0
+	rldicl.	r0,r0,4,63
+	beq	no_hv_mode
+
+	mfspr	r0,SPRN_HID0
+	li	r11,5			/* clear DOZE and SLEEP */
+	rldimi	r0,r11,52,8		/* set NAP and DPM */
+	li	r11,0
+	rldimi	r0,r11,32,31		/* clear EN_ATTN */
+	b	load_hids		/* Jump to shared code */
+
+
+_GLOBAL(__setup_cpu_ppc970MP)
+	/* Do nothing if not running in HV mode */
+	mfmsr	r0
+	rldicl.	r0,r0,4,63
+	beq	no_hv_mode
+
+	mfspr	r0,SPRN_HID0
+	li	r11,0x15		/* clear DOZE and SLEEP */
+	rldimi	r0,r11,52,6		/* set DEEPNAP, NAP and DPM */
+	li	r11,0
+	rldimi	r0,r11,32,31		/* clear EN_ATTN */
+
+load_hids:
+	mtspr	SPRN_HID0,r0
+	mfspr	r0,SPRN_HID0
+	mfspr	r0,SPRN_HID0
+	mfspr	r0,SPRN_HID0
+	mfspr	r0,SPRN_HID0
+	mfspr	r0,SPRN_HID0
+	mfspr	r0,SPRN_HID0
+	sync
+	isync
+
+	/* Try to set LPES = 01 in HID4 */
+	mfspr	r0,SPRN_HID4
+	clrldi	r0,r0,1			/* clear LPES0 */
+	ori	r0,r0,HID4_LPES1	/* set LPES1 */
+	sync
+	mtspr	SPRN_HID4,r0
+	isync
+
+	/* Save away cpu state */
+	LOAD_REG_ADDR(r5,cpu_state_storage)
+
+	/* Save HID0,1,4 and 5 */
+	mfspr	r3,SPRN_HID0
+	std	r3,CS_HID0(r5)
+	mfspr	r3,SPRN_HID1
+	std	r3,CS_HID1(r5)
+	mfspr	r4,SPRN_HID4
+	std	r4,CS_HID4(r5)
+	mfspr	r3,SPRN_HID5
+	std	r3,CS_HID5(r5)
+
+	/* See if we successfully set LPES1 to 1; if not we are in Apple mode */
+	andi.	r4,r4,HID4_LPES1
+	bnelr
+
+no_hv_mode:
+	/* Disable CPU_FTR_HVMODE and exit, since we don't have HV mode */
+	ld	r5,CPU_SPEC_FEATURES(r4)
+	LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE)
+	andc	r5,r5,r6
+	std	r5,CPU_SPEC_FEATURES(r4)
+	blr
+
+/* Called with no MMU context (typically MSR:IR/DR off) to
+ * restore CPU state as backed up by the previous
+ * function. This does not include cache setting
+ */
+_GLOBAL(__restore_cpu_ppc970)
+	/* Do nothing if not running in HV mode */
+	mfmsr	r0
+	rldicl.	r0,r0,4,63
+	beqlr
+
+	LOAD_REG_ADDR(r5,cpu_state_storage)
+	/* Before accessing memory, we make sure rm_ci is clear */
+	li	r0,0
+	mfspr	r3,SPRN_HID4
+	rldimi	r3,r0,40,23	/* clear bit 23 (rm_ci) */
+	sync
+	mtspr	SPRN_HID4,r3
+	isync
+	sync
+
+	/* Clear interrupt prefix */
+	li	r0,0
+	sync
+	mtspr	SPRN_HIOR,0
+	isync
+
+	/* Restore HID0 */
+	ld	r3,CS_HID0(r5)
+	sync
+	isync
+	mtspr	SPRN_HID0,r3
+	mfspr	r3,SPRN_HID0
+	mfspr	r3,SPRN_HID0
+	mfspr	r3,SPRN_HID0
+	mfspr	r3,SPRN_HID0
+	mfspr	r3,SPRN_HID0
+	mfspr	r3,SPRN_HID0
+	sync
+	isync
+
+	/* Restore HID1 */
+	ld	r3,CS_HID1(r5)
+	sync
+	isync
+	mtspr	SPRN_HID1,r3
+	mtspr	SPRN_HID1,r3
+	sync
+	isync
+
+	/* Restore HID4 */
+	ld	r3,CS_HID4(r5)
+	sync
+	isync
+	mtspr	SPRN_HID4,r3
+	sync
+	isync
+
+	/* Restore HID5 */
+	ld	r3,CS_HID5(r5)
+	sync
+	isync
+	mtspr	SPRN_HID5,r3
+	sync
+	isync
+	blr
+
diff --git a/arch/powerpc/kernel/cpu_specs.h b/arch/powerpc/kernel/cpu_specs.h
new file mode 100644
index 0000000000..85ded3f772
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifdef CONFIG_40x
+#include "cpu_specs_40x.h"
+#endif
+
+#ifdef CONFIG_PPC_47x
+#include "cpu_specs_47x.h"
+#elif defined(CONFIG_44x)
+#include "cpu_specs_44x.h"
+#endif
+
+#ifdef CONFIG_PPC_8xx
+#include "cpu_specs_8xx.h"
+#endif
+
+#ifdef CONFIG_PPC_E500MC
+#include "cpu_specs_e500mc.h"
+#elif defined(CONFIG_PPC_85xx)
+#include "cpu_specs_85xx.h"
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+#include "cpu_specs_book3s_32.h"
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include "cpu_specs_book3s_64.h"
+#endif
diff --git a/arch/powerpc/kernel/cpu_specs_40x.h b/arch/powerpc/kernel/cpu_specs_40x.h
new file mode 100644
index 0000000000..a1362a75b8
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_40x.h
@@ -0,0 +1,280 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+static struct cpu_spec cpu_specs[] __initdata = {
+	{	/* STB 04xxx */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x41810000,
+		.cpu_name		= "STB04xxx",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* NP405L */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x41610000,
+		.cpu_name		= "NP405L",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* NP4GS3 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x40B10000,
+		.cpu_name		= "NP4GS3",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{   /* NP405H */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x41410000,
+		.cpu_name		= "NP405H",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405GPr */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x50910000,
+		.cpu_name		= "405GPr",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{   /* STBx25xx */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x51510000,
+		.cpu_name		= "STBx25xx",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405LP */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x41F10000,
+		.cpu_name		= "405LP",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EP */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x51210000,
+		.cpu_name		= "405EP",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EX Rev. A/B with Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x12910007,
+		.cpu_name		= "405EX Rev. A/B",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EX Rev. C without Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x1291000d,
+		.cpu_name		= "405EX Rev. C",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EX Rev. C with Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x1291000f,
+		.cpu_name		= "405EX Rev. C",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EX Rev. D without Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x12910003,
+		.cpu_name		= "405EX Rev. D",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EX Rev. D with Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x12910005,
+		.cpu_name		= "405EX Rev. D",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EXr Rev. A/B without Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x12910001,
+		.cpu_name		= "405EXr Rev. A/B",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EXr Rev. C without Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x12910009,
+		.cpu_name		= "405EXr Rev. C",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EXr Rev. C with Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x1291000b,
+		.cpu_name		= "405EXr Rev. C",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EXr Rev. D without Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x12910000,
+		.cpu_name		= "405EXr Rev. D",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EXr Rev. D with Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x12910002,
+		.cpu_name		= "405EXr Rev. D",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{
+		/* 405EZ */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x41510000,
+		.cpu_name		= "405EZ",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* APM8018X */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x7ff11432,
+		.cpu_name		= "APM8018X",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* default match */
+		.pvr_mask		= 0x00000000,
+		.pvr_value		= 0x00000000,
+		.cpu_name		= "(generic 40x PPC)",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	}
+};
diff --git a/arch/powerpc/kernel/cpu_specs_44x.h b/arch/powerpc/kernel/cpu_specs_44x.h
new file mode 100644
index 0000000000..69c4cdc0cd
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_44x.h
@@ -0,0 +1,304 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#define COMMON_USER_BOOKE	(PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+				 PPC_FEATURE_BOOKE)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+	{
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x40000850,
+		.cpu_name		= "440GR Rev. A",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc440",
+	},
+	{ /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x40000858,
+		.cpu_name		= "440EP Rev. A",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440ep,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc440",
+	},
+	{
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x400008d3,
+		.cpu_name		= "440GR Rev. B",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc440",
+	},
+	{ /* Matches both physical and logical PVR for 440EP (logical pvr = pvr | 0x8) */
+		.pvr_mask		= 0xf0000ff7,
+		.pvr_value		= 0x400008d4,
+		.cpu_name		= "440EP Rev. C",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440ep,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc440",
+	},
+	{ /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x400008db,
+		.cpu_name		= "440EP Rev. B",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440ep,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc440",
+	},
+	{ /* 440GRX */
+		.pvr_mask		= 0xf0000ffb,
+		.pvr_value		= 0x200008D0,
+		.cpu_name		= "440GRX",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440grx,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* Use logical PVR for 440EPx (logical pvr = pvr | 0x8) */
+		.pvr_mask		= 0xf0000ffb,
+		.pvr_value		= 0x200008D8,
+		.cpu_name		= "440EPX",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440epx,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{	/* 440GP Rev. B */
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x40000440,
+		.cpu_name		= "440GP Rev. B",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc440gp",
+	},
+	{	/* 440GP Rev. C */
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x40000481,
+		.cpu_name		= "440GP Rev. C",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc440gp",
+	},
+	{ /* 440GX Rev. A */
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x50000850,
+		.cpu_name		= "440GX Rev. A",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440gx,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 440GX Rev. B */
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x50000851,
+		.cpu_name		= "440GX Rev. B",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440gx,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 440GX Rev. C */
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x50000892,
+		.cpu_name		= "440GX Rev. C",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440gx,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 440GX Rev. F */
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x50000894,
+		.cpu_name		= "440GX Rev. F",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440gx,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 440SP Rev. A */
+		.pvr_mask		= 0xfff00fff,
+		.pvr_value		= 0x53200891,
+		.cpu_name		= "440SP Rev. A",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc440",
+	},
+	{ /* 440SPe Rev. A */
+		.pvr_mask               = 0xfff00fff,
+		.pvr_value              = 0x53400890,
+		.cpu_name               = "440SPe Rev. A",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features      = COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize           = 32,
+		.dcache_bsize           = 32,
+		.cpu_setup		= __setup_cpu_440spe,
+		.machine_check		= machine_check_440A,
+		.platform               = "ppc440",
+	},
+	{ /* 440SPe Rev. B */
+		.pvr_mask		= 0xfff00fff,
+		.pvr_value		= 0x53400891,
+		.cpu_name		= "440SPe Rev. B",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440spe,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 460EX */
+		.pvr_mask		= 0xffff0006,
+		.pvr_value		= 0x13020002,
+		.cpu_name		= "460EX",
+		.cpu_features		= CPU_FTRS_440x6,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_460ex,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 460EX Rev B */
+		.pvr_mask		= 0xffff0007,
+		.pvr_value		= 0x13020004,
+		.cpu_name		= "460EX Rev. B",
+		.cpu_features		= CPU_FTRS_440x6,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_460ex,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 460GT */
+		.pvr_mask		= 0xffff0006,
+		.pvr_value		= 0x13020000,
+		.cpu_name		= "460GT",
+		.cpu_features		= CPU_FTRS_440x6,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_460gt,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 460GT Rev B */
+		.pvr_mask		= 0xffff0007,
+		.pvr_value		= 0x13020005,
+		.cpu_name		= "460GT Rev. B",
+		.cpu_features		= CPU_FTRS_440x6,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_460gt,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 460SX */
+		.pvr_mask		= 0xffffff00,
+		.pvr_value		= 0x13541800,
+		.cpu_name		= "460SX",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_460sx,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 464 in APM821xx */
+		.pvr_mask		= 0xfffffff0,
+		.pvr_value		= 0x12C41C80,
+		.cpu_name		= "APM821XX",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE |
+			PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_apm821xx,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{	/* default match */
+		.pvr_mask		= 0x00000000,
+		.pvr_value		= 0x00000000,
+		.cpu_name		= "(generic 44x PPC)",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc440",
+	}
+};
diff --git a/arch/powerpc/kernel/cpu_specs_47x.h b/arch/powerpc/kernel/cpu_specs_47x.h
new file mode 100644
index 0000000000..3143cd504a
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_47x.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#define COMMON_USER_BOOKE	(PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+				 PPC_FEATURE_BOOKE)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+	{ /* 476 DD2 core */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x11a52080,
+		.cpu_name		= "476",
+		.cpu_features		= CPU_FTRS_47X | CPU_FTR_476_DD2,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST |
+					  MMU_FTR_LOCK_BCAST_INVAL,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 128,
+		.machine_check		= machine_check_47x,
+		.platform		= "ppc470",
+	},
+	{ /* 476fpe */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x7ff50000,
+		.cpu_name		= "476fpe",
+		.cpu_features		= CPU_FTRS_47X | CPU_FTR_476_DD2,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST |
+					  MMU_FTR_LOCK_BCAST_INVAL,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 128,
+		.machine_check		= machine_check_47x,
+		.platform		= "ppc470",
+	},
+	{ /* 476 iss */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00050000,
+		.cpu_name		= "476",
+		.cpu_features		= CPU_FTRS_47X,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST |
+					  MMU_FTR_LOCK_BCAST_INVAL,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 128,
+		.machine_check		= machine_check_47x,
+		.platform		= "ppc470",
+	},
+	{ /* 476 others */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x11a50000,
+		.cpu_name		= "476",
+		.cpu_features		= CPU_FTRS_47X,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST |
+					  MMU_FTR_LOCK_BCAST_INVAL,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 128,
+		.machine_check		= machine_check_47x,
+		.platform		= "ppc470",
+	},
+	{	/* default match */
+		.pvr_mask		= 0x00000000,
+		.pvr_value		= 0x00000000,
+		.cpu_name		= "(generic 47x PPC)",
+		.cpu_features		= CPU_FTRS_47X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_47x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 128,
+		.machine_check		= machine_check_47x,
+		.platform		= "ppc470",
+	}
+};
diff --git a/arch/powerpc/kernel/cpu_specs_85xx.h b/arch/powerpc/kernel/cpu_specs_85xx.h
new file mode 100644
index 0000000000..aaae202c1a
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_85xx.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#define COMMON_USER_BOOKE	(PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+				 PPC_FEATURE_BOOKE)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+	{	/* e500 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80200000,
+		.cpu_name		= "e500",
+		.cpu_features		= CPU_FTRS_E500,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP |
+					  PPC_FEATURE_HAS_EFP_SINGLE_COMP,
+		.cpu_user_features2	= PPC_FEATURE2_ISEL,
+		.mmu_features		= MMU_FTR_TYPE_FSL_E,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.cpu_setup		= __setup_cpu_e500v1,
+		.machine_check		= machine_check_e500,
+		.platform		= "ppc8540",
+	},
+	{	/* e500v2 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80210000,
+		.cpu_name		= "e500v2",
+		.cpu_features		= CPU_FTRS_E500_2,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP |
+					  PPC_FEATURE_HAS_EFP_SINGLE_COMP |
+					  PPC_FEATURE_HAS_EFP_DOUBLE_COMP,
+		.cpu_user_features2	= PPC_FEATURE2_ISEL,
+		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.cpu_setup		= __setup_cpu_e500v2,
+		.machine_check		= machine_check_e500,
+		.platform		= "ppc8548",
+		.cpu_down_flush		= cpu_down_flush_e500v2,
+	},
+	{	/* default match */
+		.pvr_mask		= 0x00000000,
+		.pvr_value		= 0x00000000,
+		.cpu_name		= "(generic E500 PPC)",
+		.cpu_features		= CPU_FTRS_E500,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP |
+					  PPC_FEATURE_HAS_EFP_SINGLE_COMP,
+		.mmu_features		= MMU_FTR_TYPE_FSL_E,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_e500,
+		.platform		= "powerpc",
+	}
+};
diff --git a/arch/powerpc/kernel/cpu_specs_8xx.h b/arch/powerpc/kernel/cpu_specs_8xx.h
new file mode 100644
index 0000000000..93ddbc202b
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_8xx.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+static struct cpu_spec cpu_specs[] __initdata = {
+	{	/* 8xx */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= PVR_8xx,
+		.cpu_name		= "8xx",
+		/*
+		 * CPU_FTR_MAYBE_CAN_DOZE is possible,
+		 * if the 8xx code is there....
+		 */
+		.cpu_features		= CPU_FTRS_8XX,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+		.mmu_features		= MMU_FTR_TYPE_8xx,
+		.icache_bsize		= 16,
+		.dcache_bsize		= 16,
+		.machine_check		= machine_check_8xx,
+		.platform		= "ppc823",
+	},
+};
diff --git a/arch/powerpc/kernel/cpu_specs_book3s_32.h b/arch/powerpc/kernel/cpu_specs_book3s_32.h
new file mode 100644
index 0000000000..3714634d19
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_book3s_32.h
@@ -0,0 +1,605 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#define COMMON_USER	(PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \
+			 PPC_FEATURE_HAS_MMU)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+#ifdef CONFIG_PPC_BOOK3S_603
+	{	/* 603 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00030000,
+		.cpu_name		= "603",
+		.cpu_features		= CPU_FTRS_603,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= 0,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_603,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc603",
+	},
+	{	/* 603e */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00060000,
+		.cpu_name		= "603e",
+		.cpu_features		= CPU_FTRS_603,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= 0,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_603,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc603",
+	},
+	{	/* 603ev */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00070000,
+		.cpu_name		= "603ev",
+		.cpu_features		= CPU_FTRS_603,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= 0,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_603,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc603",
+	},
+	{	/* 82xx (8240, 8245, 8260 are all 603e cores) */
+		.pvr_mask		= 0x7fff0000,
+		.pvr_value		= 0x00810000,
+		.cpu_name		= "82xx",
+		.cpu_features		= CPU_FTRS_82XX,
+		.cpu_user_features	= COMMON_USER,
+		.mmu_features		= 0,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_603,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc603",
+	},
+	{	/* All G2_LE (603e core, plus some) have the same pvr */
+		.pvr_mask		= 0x7fff0000,
+		.pvr_value		= 0x00820000,
+		.cpu_name		= "G2_LE",
+		.cpu_features		= CPU_FTRS_G2_LE,
+		.cpu_user_features	= COMMON_USER,
+		.mmu_features		= MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_603,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc603",
+	},
+#ifdef CONFIG_PPC_83xx
+	{	/* e300c1 (a 603e core, plus some) on 83xx */
+		.pvr_mask		= 0x7fff0000,
+		.pvr_value		= 0x00830000,
+		.cpu_name		= "e300c1",
+		.cpu_features		= CPU_FTRS_E300,
+		.cpu_user_features	= COMMON_USER,
+		.mmu_features		= MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_603,
+		.machine_check		= machine_check_83xx,
+		.platform		= "ppc603",
+	},
+	{	/* e300c2 (an e300c1 core, plus some, minus FPU) on 83xx */
+		.pvr_mask		= 0x7fff0000,
+		.pvr_value		= 0x00840000,
+		.cpu_name		= "e300c2",
+		.cpu_features		= CPU_FTRS_E300C2,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+		.mmu_features		= MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_603,
+		.machine_check		= machine_check_83xx,
+		.platform		= "ppc603",
+	},
+	{	/* e300c3 (e300c1, plus one IU, half cache size) on 83xx */
+		.pvr_mask		= 0x7fff0000,
+		.pvr_value		= 0x00850000,
+		.cpu_name		= "e300c3",
+		.cpu_features		= CPU_FTRS_E300,
+		.cpu_user_features	= COMMON_USER,
+		.mmu_features		= MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_603,
+		.machine_check		= machine_check_83xx,
+		.num_pmcs		= 4,
+		.platform		= "ppc603",
+	},
+	{	/* e300c4 (e300c1, plus one IU) */
+		.pvr_mask		= 0x7fff0000,
+		.pvr_value		= 0x00860000,
+		.cpu_name		= "e300c4",
+		.cpu_features		= CPU_FTRS_E300,
+		.cpu_user_features	= COMMON_USER,
+		.mmu_features		= MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_603,
+		.machine_check		= machine_check_83xx,
+		.num_pmcs		= 4,
+		.platform		= "ppc603",
+	},
+#endif
+#endif /* CONFIG_PPC_BOOK3S_603 */
+#ifdef CONFIG_PPC_BOOK3S_604
+	{	/* 604 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00040000,
+		.cpu_name		= "604",
+		.cpu_features		= CPU_FTRS_604,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 2,
+		.cpu_setup		= __setup_cpu_604,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc604",
+	},
+	{	/* 604e */
+		.pvr_mask		= 0xfffff000,
+		.pvr_value		= 0x00090000,
+		.cpu_name		= "604e",
+		.cpu_features		= CPU_FTRS_604,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.cpu_setup		= __setup_cpu_604,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc604",
+	},
+	{	/* 604r */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00090000,
+		.cpu_name		= "604r",
+		.cpu_features		= CPU_FTRS_604,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.cpu_setup		= __setup_cpu_604,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc604",
+	},
+	{	/* 604ev */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x000a0000,
+		.cpu_name		= "604ev",
+		.cpu_features		= CPU_FTRS_604,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.cpu_setup		= __setup_cpu_604,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc604",
+	},
+	{	/* 740/750 (0x4202, don't support TAU ?) */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x00084202,
+		.cpu_name		= "740/750",
+		.cpu_features		= CPU_FTRS_740_NOTAU,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.cpu_setup		= __setup_cpu_750,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 750CX (80100 and 8010x?) */
+		.pvr_mask		= 0xfffffff0,
+		.pvr_value		= 0x00080100,
+		.cpu_name		= "750CX",
+		.cpu_features		= CPU_FTRS_750,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.cpu_setup		= __setup_cpu_750cx,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 750CX (82201 and 82202) */
+		.pvr_mask		= 0xfffffff0,
+		.pvr_value		= 0x00082200,
+		.cpu_name		= "750CX",
+		.cpu_features		= CPU_FTRS_750,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750cx,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 750CXe (82214) */
+		.pvr_mask		= 0xfffffff0,
+		.pvr_value		= 0x00082210,
+		.cpu_name		= "750CXe",
+		.cpu_features		= CPU_FTRS_750,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750cx,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 750CXe "Gekko" (83214) */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x00083214,
+		.cpu_name		= "750CXe",
+		.cpu_features		= CPU_FTRS_750,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750cx,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 750CL (and "Broadway") */
+		.pvr_mask		= 0xfffff0e0,
+		.pvr_value		= 0x00087000,
+		.cpu_name		= "750CL",
+		.cpu_features		= CPU_FTRS_750CL,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 745/755 */
+		.pvr_mask		= 0xfffff000,
+		.pvr_value		= 0x00083000,
+		.cpu_name		= "745/755",
+		.cpu_features		= CPU_FTRS_750,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 750FX rev 1.x */
+		.pvr_mask		= 0xffffff00,
+		.pvr_value		= 0x70000100,
+		.cpu_name		= "750FX",
+		.cpu_features		= CPU_FTRS_750FX1,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 750FX rev 2.0 must disable HID0[DPM] */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x70000200,
+		.cpu_name		= "750FX",
+		.cpu_features		= CPU_FTRS_750FX2,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 750FX (All revs except 2.0) */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x70000000,
+		.cpu_name		= "750FX",
+		.cpu_features		= CPU_FTRS_750FX,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750fx,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 750GX */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x70020000,
+		.cpu_name		= "750GX",
+		.cpu_features		= CPU_FTRS_750GX,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750fx,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 740/750 (L2CR bit need fixup for 740) */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00080000,
+		.cpu_name		= "740/750",
+		.cpu_features		= CPU_FTRS_740,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 7400 rev 1.1 ? (no TAU) */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x000c1101,
+		.cpu_name		= "7400 (1.1)",
+		.cpu_features		= CPU_FTRS_7400_NOTAU,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_7400,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7400",
+	},
+	{	/* 7400 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x000c0000,
+		.cpu_name		= "7400",
+		.cpu_features		= CPU_FTRS_7400,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_7400,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7400",
+	},
+	{	/* 7410 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x800c0000,
+		.cpu_name		= "7410",
+		.cpu_features		= CPU_FTRS_7400,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_7410,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7400",
+	},
+	{	/* 7450 2.0 - no doze/nap */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x80000200,
+		.cpu_name		= "7450",
+		.cpu_features		= CPU_FTRS_7450_20,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7450 2.1 */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x80000201,
+		.cpu_name		= "7450",
+		.cpu_features		= CPU_FTRS_7450_21,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7450 2.3 and newer */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80000000,
+		.cpu_name		= "7450",
+		.cpu_features		= CPU_FTRS_7450_23,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7455 rev 1.x */
+		.pvr_mask		= 0xffffff00,
+		.pvr_value		= 0x80010100,
+		.cpu_name		= "7455",
+		.cpu_features		= CPU_FTRS_7455_1,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7455 rev 2.0 */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x80010200,
+		.cpu_name		= "7455",
+		.cpu_features		= CPU_FTRS_7455_20,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7455 others */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80010000,
+		.cpu_name		= "7455",
+		.cpu_features		= CPU_FTRS_7455,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7447/7457 Rev 1.0 */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x80020100,
+		.cpu_name		= "7447/7457",
+		.cpu_features		= CPU_FTRS_7447_10,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7447/7457 Rev 1.1 */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x80020101,
+		.cpu_name		= "7447/7457",
+		.cpu_features		= CPU_FTRS_7447_10,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7447/7457 Rev 1.2 and later */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80020000,
+		.cpu_name		= "7447/7457",
+		.cpu_features		= CPU_FTRS_7447,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7447A */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80030000,
+		.cpu_name		= "7447A",
+		.cpu_features		= CPU_FTRS_7447A,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7448 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80040000,
+		.cpu_name		= "7448",
+		.cpu_features		= CPU_FTRS_7448,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* default match, we assume split I/D cache & TB (non-601)... */
+		.pvr_mask		= 0x00000000,
+		.pvr_value		= 0x00000000,
+		.cpu_name		= "(generic PPC)",
+		.cpu_features		= CPU_FTRS_CLASSIC32,
+		.cpu_user_features	= COMMON_USER,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc603",
+	},
+#endif /* CONFIG_PPC_BOOK3S_604 */
+};
diff --git a/arch/powerpc/kernel/cpu_specs_book3s_64.h b/arch/powerpc/kernel/cpu_specs_book3s_64.h
new file mode 100644
index 0000000000..c370c1b804
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_book3s_64.h
@@ -0,0 +1,481 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  Modifications for ppc64:
+ *      Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ */
+
+/* NOTE:
+ * Unlike ppc32, ppc64 will only call cpu_setup() for the boot CPU, it's
+ * the responsibility of the appropriate CPU save/restore functions to
+ * eventually copy these settings over. Those save/restore aren't yet
+ * part of the cputable though. That has to be fixed for both ppc32
+ * and ppc64
+ */
+#define COMMON_USER_PPC64	(PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \
+				 PPC_FEATURE_HAS_MMU | PPC_FEATURE_64)
+#define COMMON_USER_POWER4	(COMMON_USER_PPC64 | PPC_FEATURE_POWER4)
+#define COMMON_USER_POWER5	(COMMON_USER_PPC64 | PPC_FEATURE_POWER5 |\
+				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP)
+#define COMMON_USER_POWER5_PLUS	(COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS|\
+				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP)
+#define COMMON_USER_POWER6	(COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\
+				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+				 PPC_FEATURE_TRUE_LE | \
+				 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER_POWER7	(COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
+				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+				 PPC_FEATURE_TRUE_LE | \
+				 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER2_POWER7	(PPC_FEATURE2_DSCR)
+#define COMMON_USER_POWER8	(COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
+				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+				 PPC_FEATURE_TRUE_LE | \
+				 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER2_POWER8	(PPC_FEATURE2_ARCH_2_07 | \
+				 PPC_FEATURE2_HTM_COMP | \
+				 PPC_FEATURE2_HTM_NOSC_COMP | \
+				 PPC_FEATURE2_DSCR | \
+				 PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
+				 PPC_FEATURE2_VEC_CRYPTO)
+#define COMMON_USER_PA6T	(COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
+				 PPC_FEATURE_TRUE_LE | \
+				 PPC_FEATURE_HAS_ALTIVEC_COMP)
+#define COMMON_USER_POWER9	COMMON_USER_POWER8
+#define COMMON_USER2_POWER9	(COMMON_USER2_POWER8 | \
+				 PPC_FEATURE2_ARCH_3_00 | \
+				 PPC_FEATURE2_HAS_IEEE128 | \
+				 PPC_FEATURE2_DARN | \
+				 PPC_FEATURE2_SCV)
+#define COMMON_USER_POWER10	COMMON_USER_POWER9
+#define COMMON_USER2_POWER10	(PPC_FEATURE2_ARCH_3_1 | \
+				 PPC_FEATURE2_MMA | \
+				 PPC_FEATURE2_ARCH_3_00 | \
+				 PPC_FEATURE2_HAS_IEEE128 | \
+				 PPC_FEATURE2_DARN | \
+				 PPC_FEATURE2_SCV | \
+				 PPC_FEATURE2_ARCH_2_07 | \
+				 PPC_FEATURE2_DSCR | \
+				 PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
+				 PPC_FEATURE2_VEC_CRYPTO)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+	{	/* PPC970 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00390000,
+		.cpu_name		= "PPC970",
+		.cpu_features		= CPU_FTRS_PPC970,
+		.cpu_user_features	= COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+		.mmu_features		= MMU_FTRS_PPC970,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 8,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_ppc970,
+		.cpu_restore		= __restore_cpu_ppc970,
+		.platform		= "ppc970",
+	},
+	{	/* PPC970FX */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x003c0000,
+		.cpu_name		= "PPC970FX",
+		.cpu_features		= CPU_FTRS_PPC970,
+		.cpu_user_features	= COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+		.mmu_features		= MMU_FTRS_PPC970,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 8,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_ppc970,
+		.cpu_restore		= __restore_cpu_ppc970,
+		.platform		= "ppc970",
+	},
+	{	/* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x00440100,
+		.cpu_name		= "PPC970MP",
+		.cpu_features		= CPU_FTRS_PPC970,
+		.cpu_user_features	= COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+		.mmu_features		= MMU_FTRS_PPC970,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 8,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_ppc970,
+		.cpu_restore		= __restore_cpu_ppc970,
+		.platform		= "ppc970",
+	},
+	{	/* PPC970MP */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00440000,
+		.cpu_name		= "PPC970MP",
+		.cpu_features		= CPU_FTRS_PPC970,
+		.cpu_user_features	= COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+		.mmu_features		= MMU_FTRS_PPC970,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 8,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_ppc970MP,
+		.cpu_restore		= __restore_cpu_ppc970,
+		.platform		= "ppc970",
+	},
+	{	/* PPC970GX */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00450000,
+		.cpu_name		= "PPC970GX",
+		.cpu_features		= CPU_FTRS_PPC970,
+		.cpu_user_features	= COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+		.mmu_features		= MMU_FTRS_PPC970,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 8,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_ppc970,
+		.platform		= "ppc970",
+	},
+	{	/* Power5 GR */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x003a0000,
+		.cpu_name		= "POWER5 (gr)",
+		.cpu_features		= CPU_FTRS_POWER5,
+		.cpu_user_features	= COMMON_USER_POWER5,
+		.mmu_features		= MMU_FTRS_POWER5,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.platform		= "power5",
+	},
+	{	/* Power5++ */
+		.pvr_mask		= 0xffffff00,
+		.pvr_value		= 0x003b0300,
+		.cpu_name		= "POWER5+ (gs)",
+		.cpu_features		= CPU_FTRS_POWER5,
+		.cpu_user_features	= COMMON_USER_POWER5_PLUS,
+		.mmu_features		= MMU_FTRS_POWER5,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.platform		= "power5+",
+	},
+	{	/* Power5 GS */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x003b0000,
+		.cpu_name		= "POWER5+ (gs)",
+		.cpu_features		= CPU_FTRS_POWER5,
+		.cpu_user_features	= COMMON_USER_POWER5_PLUS,
+		.mmu_features		= MMU_FTRS_POWER5,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.platform		= "power5+",
+	},
+	{	/* POWER6 in P5+ mode; 2.04-compliant processor */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x0f000001,
+		.cpu_name		= "POWER5+",
+		.cpu_features		= CPU_FTRS_POWER5,
+		.cpu_user_features	= COMMON_USER_POWER5_PLUS,
+		.mmu_features		= MMU_FTRS_POWER5,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.platform		= "power5+",
+	},
+	{	/* Power6 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x003e0000,
+		.cpu_name		= "POWER6 (raw)",
+		.cpu_features		= CPU_FTRS_POWER6,
+		.cpu_user_features	= COMMON_USER_POWER6 | PPC_FEATURE_POWER6_EXT,
+		.mmu_features		= MMU_FTRS_POWER6,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.platform		= "power6x",
+	},
+	{	/* 2.05-compliant processor, i.e. Power6 "architected" mode */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x0f000002,
+		.cpu_name		= "POWER6 (architected)",
+		.cpu_features		= CPU_FTRS_POWER6,
+		.cpu_user_features	= COMMON_USER_POWER6,
+		.mmu_features		= MMU_FTRS_POWER6,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.platform		= "power6",
+	},
+	{	/* 2.06-compliant processor, i.e. Power7 "architected" mode */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x0f000003,
+		.cpu_name		= "POWER7 (architected)",
+		.cpu_features		= CPU_FTRS_POWER7,
+		.cpu_user_features	= COMMON_USER_POWER7,
+		.cpu_user_features2	= COMMON_USER2_POWER7,
+		.mmu_features		= MMU_FTRS_POWER7,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.cpu_setup		= __setup_cpu_power7,
+		.cpu_restore		= __restore_cpu_power7,
+		.machine_check_early	= __machine_check_early_realmode_p7,
+		.platform		= "power7",
+	},
+	{	/* 2.07-compliant processor, i.e. Power8 "architected" mode */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x0f000004,
+		.cpu_name		= "POWER8 (architected)",
+		.cpu_features		= CPU_FTRS_POWER8,
+		.cpu_user_features	= COMMON_USER_POWER8,
+		.cpu_user_features2	= COMMON_USER2_POWER8,
+		.mmu_features		= MMU_FTRS_POWER8,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.cpu_setup		= __setup_cpu_power8,
+		.cpu_restore		= __restore_cpu_power8,
+		.machine_check_early	= __machine_check_early_realmode_p8,
+		.platform		= "power8",
+	},
+	{	/* 3.00-compliant processor, i.e. Power9 "architected" mode */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x0f000005,
+		.cpu_name		= "POWER9 (architected)",
+		.cpu_features		= CPU_FTRS_POWER9,
+		.cpu_user_features	= COMMON_USER_POWER9,
+		.cpu_user_features2	= COMMON_USER2_POWER9,
+		.mmu_features		= MMU_FTRS_POWER9,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.cpu_setup		= __setup_cpu_power9,
+		.cpu_restore		= __restore_cpu_power9,
+		.platform		= "power9",
+	},
+	{	/* 3.1-compliant processor, i.e. Power10 "architected" mode */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x0f000006,
+		.cpu_name		= "POWER10 (architected)",
+		.cpu_features		= CPU_FTRS_POWER10,
+		.cpu_user_features	= COMMON_USER_POWER10,
+		.cpu_user_features2	= COMMON_USER2_POWER10,
+		.mmu_features		= MMU_FTRS_POWER10,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.cpu_setup		= __setup_cpu_power10,
+		.cpu_restore		= __restore_cpu_power10,
+		.platform		= "power10",
+	},
+	{	/* Power7 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x003f0000,
+		.cpu_name		= "POWER7 (raw)",
+		.cpu_features		= CPU_FTRS_POWER7,
+		.cpu_user_features	= COMMON_USER_POWER7,
+		.cpu_user_features2	= COMMON_USER2_POWER7,
+		.mmu_features		= MMU_FTRS_POWER7,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power7,
+		.cpu_restore		= __restore_cpu_power7,
+		.machine_check_early	= __machine_check_early_realmode_p7,
+		.platform		= "power7",
+	},
+	{	/* Power7+ */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x004A0000,
+		.cpu_name		= "POWER7+ (raw)",
+		.cpu_features		= CPU_FTRS_POWER7,
+		.cpu_user_features	= COMMON_USER_POWER7,
+		.cpu_user_features2	= COMMON_USER2_POWER7,
+		.mmu_features		= MMU_FTRS_POWER7,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power7,
+		.cpu_restore		= __restore_cpu_power7,
+		.machine_check_early	= __machine_check_early_realmode_p7,
+		.platform		= "power7+",
+	},
+	{	/* Power8E */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x004b0000,
+		.cpu_name		= "POWER8E (raw)",
+		.cpu_features		= CPU_FTRS_POWER8E,
+		.cpu_user_features	= COMMON_USER_POWER8,
+		.cpu_user_features2	= COMMON_USER2_POWER8,
+		.mmu_features		= MMU_FTRS_POWER8,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power8,
+		.cpu_restore		= __restore_cpu_power8,
+		.machine_check_early	= __machine_check_early_realmode_p8,
+		.platform		= "power8",
+	},
+	{	/* Power8NVL */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x004c0000,
+		.cpu_name		= "POWER8NVL (raw)",
+		.cpu_features		= CPU_FTRS_POWER8,
+		.cpu_user_features	= COMMON_USER_POWER8,
+		.cpu_user_features2	= COMMON_USER2_POWER8,
+		.mmu_features		= MMU_FTRS_POWER8,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power8,
+		.cpu_restore		= __restore_cpu_power8,
+		.machine_check_early	= __machine_check_early_realmode_p8,
+		.platform		= "power8",
+	},
+	{	/* Power8 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x004d0000,
+		.cpu_name		= "POWER8 (raw)",
+		.cpu_features		= CPU_FTRS_POWER8,
+		.cpu_user_features	= COMMON_USER_POWER8,
+		.cpu_user_features2	= COMMON_USER2_POWER8,
+		.mmu_features		= MMU_FTRS_POWER8,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power8,
+		.cpu_restore		= __restore_cpu_power8,
+		.machine_check_early	= __machine_check_early_realmode_p8,
+		.platform		= "power8",
+	},
+	{	/* Power9 DD2.0 */
+		.pvr_mask		= 0xffffefff,
+		.pvr_value		= 0x004e0200,
+		.cpu_name		= "POWER9 (raw)",
+		.cpu_features		= CPU_FTRS_POWER9_DD2_0,
+		.cpu_user_features	= COMMON_USER_POWER9,
+		.cpu_user_features2	= COMMON_USER2_POWER9,
+		.mmu_features		= MMU_FTRS_POWER9,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power9,
+		.cpu_restore		= __restore_cpu_power9,
+		.machine_check_early	= __machine_check_early_realmode_p9,
+		.platform		= "power9",
+	},
+	{	/* Power9 DD 2.1 */
+		.pvr_mask		= 0xffffefff,
+		.pvr_value		= 0x004e0201,
+		.cpu_name		= "POWER9 (raw)",
+		.cpu_features		= CPU_FTRS_POWER9_DD2_1,
+		.cpu_user_features	= COMMON_USER_POWER9,
+		.cpu_user_features2	= COMMON_USER2_POWER9,
+		.mmu_features		= MMU_FTRS_POWER9,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power9,
+		.cpu_restore		= __restore_cpu_power9,
+		.machine_check_early	= __machine_check_early_realmode_p9,
+		.platform		= "power9",
+	},
+	{	/* Power9 DD2.2 */
+		.pvr_mask		= 0xffffefff,
+		.pvr_value		= 0x004e0202,
+		.cpu_name		= "POWER9 (raw)",
+		.cpu_features		= CPU_FTRS_POWER9_DD2_2,
+		.cpu_user_features	= COMMON_USER_POWER9,
+		.cpu_user_features2	= COMMON_USER2_POWER9,
+		.mmu_features		= MMU_FTRS_POWER9,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power9,
+		.cpu_restore		= __restore_cpu_power9,
+		.machine_check_early	= __machine_check_early_realmode_p9,
+		.platform		= "power9",
+	},
+	{	/* Power9 DD2.3 or later */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x004e0000,
+		.cpu_name		= "POWER9 (raw)",
+		.cpu_features		= CPU_FTRS_POWER9_DD2_3,
+		.cpu_user_features	= COMMON_USER_POWER9,
+		.cpu_user_features2	= COMMON_USER2_POWER9,
+		.mmu_features		= MMU_FTRS_POWER9,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power9,
+		.cpu_restore		= __restore_cpu_power9,
+		.machine_check_early	= __machine_check_early_realmode_p9,
+		.platform		= "power9",
+	},
+	{	/* Power10 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00800000,
+		.cpu_name		= "POWER10 (raw)",
+		.cpu_features		= CPU_FTRS_POWER10,
+		.cpu_user_features	= COMMON_USER_POWER10,
+		.cpu_user_features2	= COMMON_USER2_POWER10,
+		.mmu_features		= MMU_FTRS_POWER10,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power10,
+		.cpu_restore		= __restore_cpu_power10,
+		.machine_check_early	= __machine_check_early_realmode_p10,
+		.platform		= "power10",
+	},
+	{	/* Cell Broadband Engine */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00700000,
+		.cpu_name		= "Cell Broadband Engine",
+		.cpu_features		= CPU_FTRS_CELL,
+		.cpu_user_features	= COMMON_USER_PPC64 | PPC_FEATURE_CELL |
+					  PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_SMT,
+		.mmu_features		= MMU_FTRS_CELL,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.platform		= "ppc-cell-be",
+	},
+	{	/* PA Semi PA6T */
+		.pvr_mask		= 0x7fff0000,
+		.pvr_value		= 0x00900000,
+		.cpu_name		= "PA6T",
+		.cpu_features		= CPU_FTRS_PA6T,
+		.cpu_user_features	= COMMON_USER_PA6T,
+		.mmu_features		= MMU_FTRS_PA6T,
+		.icache_bsize		= 64,
+		.dcache_bsize		= 64,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_PA6T,
+		.cpu_setup		= __setup_cpu_pa6t,
+		.cpu_restore		= __restore_cpu_pa6t,
+		.platform		= "pa6t",
+	},
+	{	/* default match */
+		.pvr_mask		= 0x00000000,
+		.pvr_value		= 0x00000000,
+		.cpu_name		= "POWER5 (compatible)",
+		.cpu_features		= CPU_FTRS_COMPATIBLE,
+		.cpu_user_features	= COMMON_USER_PPC64,
+		.mmu_features		= MMU_FTRS_POWER,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.platform		= "power5",
+	}
+};
diff --git a/arch/powerpc/kernel/cpu_specs_e500mc.h b/arch/powerpc/kernel/cpu_specs_e500mc.h
new file mode 100644
index 0000000000..ceb06b109f
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_e500mc.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  Modifications for ppc64:
+ *      Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ */
+
+#ifdef CONFIG_PPC64
+#define COMMON_USER_BOOKE	(PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+				 PPC_FEATURE_HAS_FPU | PPC_FEATURE_64)
+#else
+#define COMMON_USER_BOOKE	(PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+				 PPC_FEATURE_BOOKE)
+#endif
+
+static struct cpu_spec cpu_specs[] __initdata = {
+#ifdef CONFIG_PPC32
+	{	/* e500mc */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80230000,
+		.cpu_name		= "e500mc",
+		.cpu_features		= CPU_FTRS_E500MC,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.cpu_user_features2	= PPC_FEATURE2_ISEL,
+		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX,
+		.icache_bsize		= 64,
+		.dcache_bsize		= 64,
+		.num_pmcs		= 4,
+		.cpu_setup		= __setup_cpu_e500mc,
+		.machine_check		= machine_check_e500mc,
+		.platform		= "ppce500mc",
+		.cpu_down_flush		= cpu_down_flush_e500mc,
+	},
+#endif /* CONFIG_PPC32 */
+	{	/* e5500 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80240000,
+		.cpu_name		= "e5500",
+		.cpu_features		= CPU_FTRS_E5500,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.cpu_user_features2	= PPC_FEATURE2_ISEL,
+		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX,
+		.icache_bsize		= 64,
+		.dcache_bsize		= 64,
+		.num_pmcs		= 4,
+		.cpu_setup		= __setup_cpu_e5500,
+#ifndef CONFIG_PPC32
+		.cpu_restore		= __restore_cpu_e5500,
+#endif
+		.machine_check		= machine_check_e500mc,
+		.platform		= "ppce5500",
+		.cpu_down_flush		= cpu_down_flush_e5500,
+	},
+	{	/* e6500 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80400000,
+		.cpu_name		= "e6500",
+		.cpu_features		= CPU_FTRS_E6500,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU |
+					  PPC_FEATURE_HAS_ALTIVEC_COMP,
+		.cpu_user_features2	= PPC_FEATURE2_ISEL,
+		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX,
+		.icache_bsize		= 64,
+		.dcache_bsize		= 64,
+		.num_pmcs		= 6,
+		.cpu_setup		= __setup_cpu_e6500,
+#ifndef CONFIG_PPC32
+		.cpu_restore		= __restore_cpu_e6500,
+#endif
+		.machine_check		= machine_check_e500mc,
+		.platform		= "ppce6500",
+		.cpu_down_flush		= cpu_down_flush_e6500,
+	},
+};
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
new file mode 100644
index 0000000000..e97a0fd0ae
--- /dev/null
+++ b/arch/powerpc/kernel/cputable.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  Modifications for ppc64:
+ *      Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ */
+
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/threads.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/jump_label.h>
+#include <linux/of.h>
+
+#include <asm/cputable.h>
+#include <asm/mce.h>
+#include <asm/mmu.h>
+#include <asm/setup.h>
+#include <asm/cpu_setup.h>
+
+static struct cpu_spec the_cpu_spec __read_mostly;
+
+struct cpu_spec* cur_cpu_spec __read_mostly = NULL;
+EXPORT_SYMBOL(cur_cpu_spec);
+
+/* The platform string corresponding to the real PVR */
+const char *powerpc_base_platform;
+
+#include "cpu_specs.h"
+
+void __init set_cur_cpu_spec(struct cpu_spec *s)
+{
+	struct cpu_spec *t = &the_cpu_spec;
+
+	t = PTRRELOC(t);
+	/*
+	 * use memcpy() instead of *t = *s so that GCC replaces it
+	 * by __memcpy() when KASAN is active
+	 */
+	memcpy(t, s, sizeof(*t));
+
+	*PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
+}
+
+static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
+					       struct cpu_spec *s)
+{
+	struct cpu_spec *t = &the_cpu_spec;
+	struct cpu_spec old;
+
+	t = PTRRELOC(t);
+	old = *t;
+
+	/*
+	 * Copy everything, then do fixups. Use memcpy() instead of *t = *s
+	 * so that GCC replaces it by __memcpy() when KASAN is active
+	 */
+	memcpy(t, s, sizeof(*t));
+
+	/*
+	 * If we are overriding a previous value derived from the real
+	 * PVR with a new value obtained using a logical PVR value,
+	 * don't modify the performance monitor fields.
+	 */
+	if (old.num_pmcs && !s->num_pmcs) {
+		t->num_pmcs = old.num_pmcs;
+		t->pmc_type = old.pmc_type;
+
+		/*
+		 * Let's ensure that the
+		 * fix for the PMAO bug is enabled on compatibility mode.
+		 */
+		t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG;
+	}
+
+	/* Set kuap ON at startup, will be disabled later if cmdline has 'nosmap' */
+	if (IS_ENABLED(CONFIG_PPC_KUAP) && IS_ENABLED(CONFIG_PPC32))
+		t->mmu_features |= MMU_FTR_KUAP;
+
+	*PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
+
+	/*
+	 * Set the base platform string once; assumes
+	 * we're called with real pvr first.
+	 */
+	if (*PTRRELOC(&powerpc_base_platform) == NULL)
+		*PTRRELOC(&powerpc_base_platform) = t->platform;
+
+#if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE)
+	/* ppc64 and booke expect identify_cpu to also call setup_cpu for
+	 * that processor. I will consolidate that at a later time, for now,
+	 * just use #ifdef. We also don't need to PTRRELOC the function
+	 * pointer on ppc64 and booke as we are running at 0 in real mode
+	 * on ppc64 and reloc_offset is always 0 on booke.
+	 */
+	if (t->cpu_setup) {
+		t->cpu_setup(offset, t);
+	}
+#endif /* CONFIG_PPC64 || CONFIG_BOOKE */
+
+	return t;
+}
+
+struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
+{
+	struct cpu_spec *s = cpu_specs;
+	int i;
+
+	BUILD_BUG_ON(!ARRAY_SIZE(cpu_specs));
+
+	s = PTRRELOC(s);
+
+	for (i = 0; i < ARRAY_SIZE(cpu_specs); i++,s++) {
+		if ((pvr & s->pvr_mask) == s->pvr_value)
+			return setup_cpu_spec(offset, s);
+	}
+
+	BUG();
+
+	return NULL;
+}
+
+/*
+ * Used by cpufeatures to get the name for CPUs with a PVR table.
+ * If they don't hae a PVR table, cpufeatures gets the name from
+ * cpu device-tree node.
+ */
+void __init identify_cpu_name(unsigned int pvr)
+{
+	struct cpu_spec *s = cpu_specs;
+	struct cpu_spec *t = &the_cpu_spec;
+	int i;
+
+	s = PTRRELOC(s);
+	t = PTRRELOC(t);
+
+	for (i = 0; i < ARRAY_SIZE(cpu_specs); i++,s++) {
+		if ((pvr & s->pvr_mask) == s->pvr_value) {
+			t->cpu_name = s->cpu_name;
+			return;
+		}
+	}
+}
+
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
+struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS] = {
+			[0 ... NUM_CPU_FTR_KEYS - 1] = STATIC_KEY_TRUE_INIT
+};
+EXPORT_SYMBOL_GPL(cpu_feature_keys);
+
+void __init cpu_feature_keys_init(void)
+{
+	int i;
+
+	for (i = 0; i < NUM_CPU_FTR_KEYS; i++) {
+		unsigned long f = 1ul << i;
+
+		if (!(cur_cpu_spec->cpu_features & f))
+			static_branch_disable(&cpu_feature_keys[i]);
+	}
+}
+
+struct static_key_true mmu_feature_keys[NUM_MMU_FTR_KEYS] = {
+			[0 ... NUM_MMU_FTR_KEYS - 1] = STATIC_KEY_TRUE_INIT
+};
+EXPORT_SYMBOL(mmu_feature_keys);
+
+void __init mmu_feature_keys_init(void)
+{
+	int i;
+
+	for (i = 0; i < NUM_MMU_FTR_KEYS; i++) {
+		unsigned long f = 1ul << i;
+
+		if (!(cur_cpu_spec->mmu_features & f))
+			static_branch_disable(&mmu_feature_keys[i]);
+	}
+}
+#endif
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
new file mode 100644
index 0000000000..9a3b85bfc8
--- /dev/null
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Routines for doing kexec-based kdump.
+ *
+ * Copyright (C) 2005, IBM Corp.
+ *
+ * Created by: Michael Ellerman
+ */
+
+#undef DEBUG
+
+#include <linux/crash_dump.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <asm/code-patching.h>
+#include <asm/kdump.h>
+#include <asm/firmware.h>
+#include <linux/uio.h>
+#include <asm/rtas.h>
+#include <asm/inst.h>
+
+#ifdef DEBUG
+#include <asm/udbg.h>
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+#ifndef CONFIG_NONSTATIC_KERNEL
+void __init reserve_kdump_trampoline(void)
+{
+	memblock_reserve(0, KDUMP_RESERVE_LIMIT);
+}
+
+static void __init create_trampoline(unsigned long addr)
+{
+	u32 *p = (u32 *)addr;
+
+	/* The maximum range of a single instruction branch, is the current
+	 * instruction's address + (32 MB - 4) bytes. For the trampoline we
+	 * need to branch to current address + 32 MB. So we insert a nop at
+	 * the trampoline address, then the next instruction (+ 4 bytes)
+	 * does a branch to (32 MB - 4). The net effect is that when we
+	 * branch to "addr" we jump to ("addr" + 32 MB). Although it requires
+	 * two instructions it doesn't require any registers.
+	 */
+	patch_instruction(p, ppc_inst(PPC_RAW_NOP()));
+	patch_branch(p + 1, addr + PHYSICAL_START, 0);
+}
+
+void __init setup_kdump_trampoline(void)
+{
+	unsigned long i;
+
+	DBG(" -> setup_kdump_trampoline()\n");
+
+	for (i = KDUMP_TRAMPOLINE_START; i < KDUMP_TRAMPOLINE_END; i += 8) {
+		create_trampoline(i);
+	}
+
+#ifdef CONFIG_PPC_PSERIES
+	create_trampoline(__pa(system_reset_fwnmi) - PHYSICAL_START);
+	create_trampoline(__pa(machine_check_fwnmi) - PHYSICAL_START);
+#endif /* CONFIG_PPC_PSERIES */
+
+	DBG(" <- setup_kdump_trampoline()\n");
+}
+#endif /* CONFIG_NONSTATIC_KERNEL */
+
+ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
+			size_t csize, unsigned long offset)
+{
+	void  *vaddr;
+	phys_addr_t paddr;
+
+	if (!csize)
+		return 0;
+
+	csize = min_t(size_t, csize, PAGE_SIZE);
+	paddr = pfn << PAGE_SHIFT;
+
+	if (memblock_is_region_memory(paddr, csize)) {
+		vaddr = __va(paddr);
+		csize = copy_to_iter(vaddr + offset, csize, iter);
+	} else {
+		vaddr = ioremap_cache(paddr, PAGE_SIZE);
+		csize = copy_to_iter(vaddr + offset, csize, iter);
+		iounmap(vaddr);
+	}
+
+	return csize;
+}
+
+#ifdef CONFIG_PPC_RTAS
+/*
+ * The crashkernel region will almost always overlap the RTAS region, so
+ * we have to be careful when shrinking the crashkernel region.
+ */
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
+{
+	unsigned long addr;
+	const __be32 *basep, *sizep;
+	unsigned int rtas_start = 0, rtas_end = 0;
+
+	basep = of_get_property(rtas.dev, "linux,rtas-base", NULL);
+	sizep = of_get_property(rtas.dev, "rtas-size", NULL);
+
+	if (basep && sizep) {
+		rtas_start = be32_to_cpup(basep);
+		rtas_end = rtas_start + be32_to_cpup(sizep);
+	}
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE) {
+		/* Does this page overlap with the RTAS region? */
+		if (addr <= rtas_end && ((addr + PAGE_SIZE) > rtas_start))
+			continue;
+
+		free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
+	}
+}
+#endif
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
new file mode 100644
index 0000000000..909a05cd28
--- /dev/null
+++ b/arch/powerpc/kernel/dawr.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * DAWR infrastructure
+ *
+ * Copyright 2019, Michael Neuling, IBM Corporation.
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <asm/machdep.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+
+bool dawr_force_enable;
+EXPORT_SYMBOL_GPL(dawr_force_enable);
+
+int set_dawr(int nr, struct arch_hw_breakpoint *brk)
+{
+	unsigned long dawr, dawrx, mrd;
+
+	dawr = brk->address;
+
+	dawrx  = (brk->type & (HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE))
+		<< (63 - 58);
+	dawrx |= ((brk->type & (HW_BRK_TYPE_TRANSLATE)) >> 2) << (63 - 59);
+	dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) >> 3;
+	/*
+	 * DAWR length is stored in field MDR bits 48:53.  Matches range in
+	 * doublewords (64 bits) biased by -1 eg. 0b000000=1DW and
+	 * 0b111111=64DW.
+	 * brk->hw_len is in bytes.
+	 * This aligns up to double word size, shifts and does the bias.
+	 */
+	mrd = ((brk->hw_len + 7) >> 3) - 1;
+	dawrx |= (mrd & 0x3f) << (63 - 53);
+
+	if (ppc_md.set_dawr)
+		return ppc_md.set_dawr(nr, dawr, dawrx);
+
+	if (nr == 0) {
+		mtspr(SPRN_DAWR0, dawr);
+		mtspr(SPRN_DAWRX0, dawrx);
+	} else {
+		mtspr(SPRN_DAWR1, dawr);
+		mtspr(SPRN_DAWRX1, dawrx);
+	}
+
+	return 0;
+}
+
+static void disable_dawrs_cb(void *info)
+{
+	struct arch_hw_breakpoint null_brk = {0};
+	int i;
+
+	for (i = 0; i < nr_wp_slots(); i++)
+		set_dawr(i, &null_brk);
+}
+
+static ssize_t dawr_write_file_bool(struct file *file,
+				    const char __user *user_buf,
+				    size_t count, loff_t *ppos)
+{
+	struct arch_hw_breakpoint null_brk = {0};
+	size_t rc;
+
+	/* Send error to user if they hypervisor won't allow us to write DAWR */
+	if (!dawr_force_enable &&
+	    firmware_has_feature(FW_FEATURE_LPAR) &&
+	    set_dawr(0, &null_brk) != H_SUCCESS)
+		return -ENODEV;
+
+	rc = debugfs_write_file_bool(file, user_buf, count, ppos);
+	if (rc)
+		return rc;
+
+	/* If we are clearing, make sure all CPUs have the DAWR cleared */
+	if (!dawr_force_enable)
+		smp_call_function(disable_dawrs_cb, NULL, 0);
+
+	return rc;
+}
+
+static const struct file_operations dawr_enable_fops = {
+	.read =		debugfs_read_file_bool,
+	.write =	dawr_write_file_bool,
+	.open =		simple_open,
+	.llseek =	default_llseek,
+};
+
+static int __init dawr_force_setup(void)
+{
+	if (cpu_has_feature(CPU_FTR_DAWR)) {
+		/* Don't setup sysfs file for user control on P8 */
+		dawr_force_enable = true;
+		return 0;
+	}
+
+	if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) {
+		/* Turn DAWR off by default, but allow admin to turn it on */
+		debugfs_create_file_unsafe("dawr_enable_dangerous", 0600,
+					   arch_debugfs_dir,
+					   &dawr_force_enable,
+					   &dawr_enable_fops);
+	}
+	return 0;
+}
+arch_initcall(dawr_force_setup);
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
new file mode 100644
index 0000000000..5712dd8462
--- /dev/null
+++ b/arch/powerpc/kernel/dbell.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Kumar Gala <galak@kernel.crashing.org>
+ *
+ * Copyright 2009 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/threads.h>
+#include <linux/hardirq.h>
+
+#include <asm/dbell.h>
+#include <asm/interrupt.h>
+#include <asm/irq_regs.h>
+#include <asm/kvm_ppc.h>
+#include <asm/trace.h>
+
+#ifdef CONFIG_SMP
+
+DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	trace_doorbell_entry(regs);
+
+	ppc_msgsync();
+
+	if (should_hard_irq_enable(regs))
+		do_hard_irq_enable();
+
+	kvmppc_clear_host_ipi(smp_processor_id());
+	__this_cpu_inc(irq_stat.doorbell_irqs);
+
+	smp_ipi_demux_relaxed(); /* already performed the barrier */
+
+	trace_doorbell_exit(regs);
+
+	set_irq_regs(old_regs);
+}
+#else /* CONFIG_SMP */
+DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
+{
+	printk(KERN_WARNING "Received doorbell on non-smp system\n");
+}
+#endif /* CONFIG_SMP */
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
new file mode 100644
index 0000000000..8920862ffd
--- /dev/null
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * Provide default implementations of the DMA mapping callbacks for
+ * busses using the iommu infrastructure
+ */
+
+#include <linux/dma-direct.h>
+#include <linux/pci.h>
+#include <asm/iommu.h>
+
+#ifdef CONFIG_ARCH_HAS_DMA_MAP_DIRECT
+#define can_map_direct(dev, addr) \
+	((dev)->bus_dma_limit >= phys_to_dma((dev), (addr)))
+
+bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr)
+{
+	if (likely(!dev->bus_dma_limit))
+		return false;
+
+	return can_map_direct(dev, addr);
+}
+
+#define is_direct_handle(dev, h) ((h) >= (dev)->archdata.dma_offset)
+
+bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle)
+{
+	if (likely(!dev->bus_dma_limit))
+		return false;
+
+	return is_direct_handle(dev, dma_handle);
+}
+
+bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg,
+			    int nents)
+{
+	struct scatterlist *s;
+	int i;
+
+	if (likely(!dev->bus_dma_limit))
+		return false;
+
+	for_each_sg(sg, s, nents, i) {
+		if (!can_map_direct(dev, sg_phys(s) + s->offset + s->length))
+			return false;
+	}
+
+	return true;
+}
+
+bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg,
+			      int nents)
+{
+	struct scatterlist *s;
+	int i;
+
+	if (likely(!dev->bus_dma_limit))
+		return false;
+
+	for_each_sg(sg, s, nents, i) {
+		if (!is_direct_handle(dev, s->dma_address + s->length))
+			return false;
+	}
+
+	return true;
+}
+#endif /* CONFIG_ARCH_HAS_DMA_MAP_DIRECT */
+
+/*
+ * Generic iommu implementation
+ */
+
+/* Allocates a contiguous real buffer and creates mappings over it.
+ * Returns the virtual address of the buffer and sets dma_handle
+ * to the dma address (mapping) of the first page.
+ */
+static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
+				      dma_addr_t *dma_handle, gfp_t flag,
+				      unsigned long attrs)
+{
+	return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
+				    dma_handle, dev->coherent_dma_mask, flag,
+				    dev_to_node(dev));
+}
+
+static void dma_iommu_free_coherent(struct device *dev, size_t size,
+				    void *vaddr, dma_addr_t dma_handle,
+				    unsigned long attrs)
+{
+	iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
+}
+
+/* Creates TCEs for a user provided buffer.  The user buffer must be
+ * contiguous real kernel storage (not vmalloc).  The address passed here
+ * comprises a page address and offset into that page. The dma_addr_t
+ * returned will point to the same byte within the page as was passed in.
+ */
+static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page,
+				     unsigned long offset, size_t size,
+				     enum dma_data_direction direction,
+				     unsigned long attrs)
+{
+	return iommu_map_page(dev, get_iommu_table_base(dev), page, offset,
+			      size, dma_get_mask(dev), direction, attrs);
+}
+
+
+static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
+				 size_t size, enum dma_data_direction direction,
+				 unsigned long attrs)
+{
+	iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction,
+			 attrs);
+}
+
+
+static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
+			    int nelems, enum dma_data_direction direction,
+			    unsigned long attrs)
+{
+	return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
+				dma_get_mask(dev), direction, attrs);
+}
+
+static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
+		int nelems, enum dma_data_direction direction,
+		unsigned long attrs)
+{
+	ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
+			   direction, attrs);
+}
+
+static bool dma_iommu_bypass_supported(struct device *dev, u64 mask)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_controller *phb = pci_bus_to_host(pdev->bus);
+
+	if (iommu_fixed_is_weak || !phb->controller_ops.iommu_bypass_supported)
+		return false;
+	return phb->controller_ops.iommu_bypass_supported(pdev, mask);
+}
+
+/* We support DMA to/from any memory page via the iommu */
+int dma_iommu_dma_supported(struct device *dev, u64 mask)
+{
+	struct iommu_table *tbl;
+
+	if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
+		/*
+		 * dma_iommu_bypass_supported() sets dma_max when there is
+		 * 1:1 mapping but it is somehow limited.
+		 * ibm,pmemory is one example.
+		 */
+		dev->dma_ops_bypass = dev->bus_dma_limit == 0;
+		if (!dev->dma_ops_bypass)
+			dev_warn(dev,
+				 "iommu: 64-bit OK but direct DMA is limited by %llx\n",
+				 dev->bus_dma_limit);
+		else
+			dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+		return 1;
+	}
+
+	tbl = get_iommu_table_base(dev);
+
+	if (!tbl) {
+		dev_err(dev, "Warning: IOMMU dma not supported: mask 0x%08llx, table unavailable\n", mask);
+		return 0;
+	}
+
+	if (tbl->it_offset > (mask >> tbl->it_page_shift)) {
+		dev_info(dev, "Warning: IOMMU offset too big for device mask\n");
+		dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",
+				mask, tbl->it_offset << tbl->it_page_shift);
+		return 0;
+	}
+
+	dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
+	dev->dma_ops_bypass = false;
+	return 1;
+}
+
+u64 dma_iommu_get_required_mask(struct device *dev)
+{
+	struct iommu_table *tbl = get_iommu_table_base(dev);
+	u64 mask;
+
+	if (dev_is_pci(dev)) {
+		u64 bypass_mask = dma_direct_get_required_mask(dev);
+
+		if (dma_iommu_dma_supported(dev, bypass_mask)) {
+			dev_info(dev, "%s: returning bypass mask 0x%llx\n", __func__, bypass_mask);
+			return bypass_mask;
+		}
+	}
+
+	if (!tbl)
+		return 0;
+
+	mask = 1ULL << (fls_long(tbl->it_offset + tbl->it_size) +
+			tbl->it_page_shift - 1);
+	mask += mask - 1;
+
+	return mask;
+}
+
+const struct dma_map_ops dma_iommu_ops = {
+	.alloc			= dma_iommu_alloc_coherent,
+	.free			= dma_iommu_free_coherent,
+	.map_sg			= dma_iommu_map_sg,
+	.unmap_sg		= dma_iommu_unmap_sg,
+	.dma_supported		= dma_iommu_dma_supported,
+	.map_page		= dma_iommu_map_page,
+	.unmap_page		= dma_iommu_unmap_page,
+	.get_required_mask	= dma_iommu_get_required_mask,
+	.mmap			= dma_common_mmap,
+	.get_sgtable		= dma_common_get_sgtable,
+	.alloc_pages		= dma_common_alloc_pages,
+	.free_pages		= dma_common_free_pages,
+};
diff --git a/arch/powerpc/kernel/dma-mask.c b/arch/powerpc/kernel/dma-mask.c
new file mode 100644
index 0000000000..5b07ca7b73
--- /dev/null
+++ b/arch/powerpc/kernel/dma-mask.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
+#include <linux/export.h>
+#include <asm/machdep.h>
+
+void arch_dma_set_mask(struct device *dev, u64 dma_mask)
+{
+	if (ppc_md.dma_set_mask)
+		ppc_md.dma_set_mask(dev, dma_mask);
+}
+EXPORT_SYMBOL(arch_dma_set_mask);
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
new file mode 100644
index 0000000000..ba256c37bc
--- /dev/null
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Contains routines needed to support swiotlb for ppc.
+ *
+ * Copyright (C) 2009-2010 Freescale Semiconductor, Inc.
+ * Author: Becky Bruce
+ */
+#include <linux/memblock.h>
+#include <asm/machdep.h>
+#include <asm/swiotlb.h>
+
+unsigned int ppc_swiotlb_enable;
+unsigned int ppc_swiotlb_flags;
+
+void __init swiotlb_detect_4g(void)
+{
+	if ((memblock_end_of_DRAM() - 1) > 0xffffffff)
+		ppc_swiotlb_enable = 1;
+}
+
+static int __init check_swiotlb_enabled(void)
+{
+	if (ppc_swiotlb_enable)
+		swiotlb_print_info();
+	else
+		swiotlb_exit();
+
+	return 0;
+}
+subsys_initcall(check_swiotlb_enabled);
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
new file mode 100644
index 0000000000..c3fb9fdf5b
--- /dev/null
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -0,0 +1,1120 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2017, Nicholas Piggin, IBM Corporation
+ */
+
+#define pr_fmt(fmt) "dt-cpu-ftrs: " fmt
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/jump_label.h>
+#include <linux/libfdt.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/threads.h>
+
+#include <asm/cputable.h>
+#include <asm/dt_cpu_ftrs.h>
+#include <asm/mce.h>
+#include <asm/mmu.h>
+#include <asm/setup.h>
+
+
+/* Device-tree visible constants follow */
+#define ISA_V3_0B       3000
+#define ISA_V3_1        3100
+
+#define USABLE_PR               (1U << 0)
+#define USABLE_OS               (1U << 1)
+#define USABLE_HV               (1U << 2)
+
+#define HV_SUPPORT_HFSCR        (1U << 0)
+#define OS_SUPPORT_FSCR         (1U << 0)
+
+/* For parsing, we define all bits set as "NONE" case */
+#define HV_SUPPORT_NONE		0xffffffffU
+#define OS_SUPPORT_NONE		0xffffffffU
+
+struct dt_cpu_feature {
+	const char *name;
+	uint32_t isa;
+	uint32_t usable_privilege;
+	uint32_t hv_support;
+	uint32_t os_support;
+	uint32_t hfscr_bit_nr;
+	uint32_t fscr_bit_nr;
+	uint32_t hwcap_bit_nr;
+	/* fdt parsing */
+	unsigned long node;
+	int enabled;
+	int disabled;
+};
+
+#define MMU_FTRS_HASH_BASE (MMU_FTRS_POWER8)
+
+#define COMMON_USER_BASE	(PPC_FEATURE_32 | PPC_FEATURE_64 | \
+				 PPC_FEATURE_ARCH_2_06 |\
+				 PPC_FEATURE_ICACHE_SNOOP)
+#define COMMON_USER2_BASE	(PPC_FEATURE2_ARCH_2_07 | \
+				 PPC_FEATURE2_ISEL)
+/*
+ * Set up the base CPU
+ */
+
+static int hv_mode;
+
+static struct {
+	u64	lpcr;
+	u64	hfscr;
+	u64	fscr;
+	u64	pcr;
+} system_registers;
+
+static void (*init_pmu_registers)(void);
+
+static void __restore_cpu_cpufeatures(void)
+{
+	mtspr(SPRN_LPCR, system_registers.lpcr);
+	if (hv_mode) {
+		mtspr(SPRN_LPID, 0);
+		mtspr(SPRN_AMOR, ~0);
+		mtspr(SPRN_HFSCR, system_registers.hfscr);
+		mtspr(SPRN_PCR, system_registers.pcr);
+	}
+	mtspr(SPRN_FSCR, system_registers.fscr);
+
+	if (init_pmu_registers)
+		init_pmu_registers();
+}
+
+static char dt_cpu_name[64];
+
+static struct cpu_spec __initdata base_cpu_spec = {
+	.cpu_name		= NULL,
+	.cpu_features		= CPU_FTRS_DT_CPU_BASE,
+	.cpu_user_features	= COMMON_USER_BASE,
+	.cpu_user_features2	= COMMON_USER2_BASE,
+	.mmu_features		= 0,
+	.icache_bsize		= 32, /* minimum block size, fixed by */
+	.dcache_bsize		= 32, /* cache info init.             */
+	.num_pmcs		= 0,
+	.pmc_type		= PPC_PMC_DEFAULT,
+	.cpu_setup		= NULL,
+	.cpu_restore		= __restore_cpu_cpufeatures,
+	.machine_check_early	= NULL,
+	.platform		= NULL,
+};
+
+static void __init cpufeatures_setup_cpu(void)
+{
+	set_cur_cpu_spec(&base_cpu_spec);
+
+	cur_cpu_spec->pvr_mask = -1;
+	cur_cpu_spec->pvr_value = mfspr(SPRN_PVR);
+
+	/* Initialize the base environment -- clear FSCR/HFSCR.  */
+	hv_mode = !!(mfmsr() & MSR_HV);
+	if (hv_mode) {
+		cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
+		mtspr(SPRN_HFSCR, 0);
+	}
+	mtspr(SPRN_FSCR, 0);
+	mtspr(SPRN_PCR, PCR_MASK);
+
+	/*
+	 * LPCR does not get cleared, to match behaviour with secondaries
+	 * in __restore_cpu_cpufeatures. Once the idle code is fixed, this
+	 * could clear LPCR too.
+	 */
+}
+
+static int __init feat_try_enable_unknown(struct dt_cpu_feature *f)
+{
+	if (f->hv_support == HV_SUPPORT_NONE) {
+	} else if (f->hv_support & HV_SUPPORT_HFSCR) {
+		u64 hfscr = mfspr(SPRN_HFSCR);
+		hfscr |= 1UL << f->hfscr_bit_nr;
+		mtspr(SPRN_HFSCR, hfscr);
+	} else {
+		/* Does not have a known recipe */
+		return 0;
+	}
+
+	if (f->os_support == OS_SUPPORT_NONE) {
+	} else if (f->os_support & OS_SUPPORT_FSCR) {
+		u64 fscr = mfspr(SPRN_FSCR);
+		fscr |= 1UL << f->fscr_bit_nr;
+		mtspr(SPRN_FSCR, fscr);
+	} else {
+		/* Does not have a known recipe */
+		return 0;
+	}
+
+	if ((f->usable_privilege & USABLE_PR) && (f->hwcap_bit_nr != -1)) {
+		uint32_t word = f->hwcap_bit_nr / 32;
+		uint32_t bit = f->hwcap_bit_nr % 32;
+
+		if (word == 0)
+			cur_cpu_spec->cpu_user_features |= 1U << bit;
+		else if (word == 1)
+			cur_cpu_spec->cpu_user_features2 |= 1U << bit;
+		else
+			pr_err("%s could not advertise to user (no hwcap bits)\n", f->name);
+	}
+
+	return 1;
+}
+
+static int __init feat_enable(struct dt_cpu_feature *f)
+{
+	if (f->hv_support != HV_SUPPORT_NONE) {
+		if (f->hfscr_bit_nr != -1) {
+			u64 hfscr = mfspr(SPRN_HFSCR);
+			hfscr |= 1UL << f->hfscr_bit_nr;
+			mtspr(SPRN_HFSCR, hfscr);
+		}
+	}
+
+	if (f->os_support != OS_SUPPORT_NONE) {
+		if (f->fscr_bit_nr != -1) {
+			u64 fscr = mfspr(SPRN_FSCR);
+			fscr |= 1UL << f->fscr_bit_nr;
+			mtspr(SPRN_FSCR, fscr);
+		}
+	}
+
+	if ((f->usable_privilege & USABLE_PR) && (f->hwcap_bit_nr != -1)) {
+		uint32_t word = f->hwcap_bit_nr / 32;
+		uint32_t bit = f->hwcap_bit_nr % 32;
+
+		if (word == 0)
+			cur_cpu_spec->cpu_user_features |= 1U << bit;
+		else if (word == 1)
+			cur_cpu_spec->cpu_user_features2 |= 1U << bit;
+		else
+			pr_err("CPU feature: %s could not advertise to user (no hwcap bits)\n", f->name);
+	}
+
+	return 1;
+}
+
+static int __init feat_disable(struct dt_cpu_feature *f)
+{
+	return 0;
+}
+
+static int __init feat_enable_hv(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	if (!hv_mode) {
+		pr_err("CPU feature hypervisor present in device tree but HV mode not enabled in the CPU. Ignoring.\n");
+		return 0;
+	}
+
+	mtspr(SPRN_LPID, 0);
+	mtspr(SPRN_AMOR, ~0);
+
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr &=  ~LPCR_LPES0; /* HV external interrupts */
+	mtspr(SPRN_LPCR, lpcr);
+
+	cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
+
+	return 1;
+}
+
+static int __init feat_enable_le(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_TRUE_LE;
+	return 1;
+}
+
+static int __init feat_enable_smt(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_SMT;
+	return 1;
+}
+
+static int __init feat_enable_idle_nap(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	/* Set PECE wakeup modes for ISA 207 */
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr |=  LPCR_PECE0;
+	lpcr |=  LPCR_PECE1;
+	lpcr |=  LPCR_PECE2;
+	mtspr(SPRN_LPCR, lpcr);
+
+	return 1;
+}
+
+static int __init feat_enable_idle_stop(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	/* Set PECE wakeup modes for ISAv3.0B */
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr |=  LPCR_PECE0;
+	lpcr |=  LPCR_PECE1;
+	lpcr |=  LPCR_PECE2;
+	mtspr(SPRN_LPCR, lpcr);
+
+	return 1;
+}
+
+static int __init feat_enable_mmu_hash(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU))
+		return 0;
+
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr &= ~LPCR_ISL;
+
+	/* VRMASD */
+	lpcr |= LPCR_VPM0;
+	lpcr &= ~LPCR_VPM1;
+	lpcr |= 0x10UL << LPCR_VRMASD_SH; /* L=1 LP=00 */
+	mtspr(SPRN_LPCR, lpcr);
+
+	cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
+
+	return 1;
+}
+
+static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU))
+		return 0;
+
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR);
+	mtspr(SPRN_LPCR, lpcr);
+
+	cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
+
+	return 1;
+}
+
+
+static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f)
+{
+	if (!IS_ENABLED(CONFIG_PPC_RADIX_MMU))
+		return 0;
+
+	cur_cpu_spec->mmu_features |= MMU_FTR_KERNEL_RO;
+	cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
+	cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
+
+	return 1;
+}
+
+static int __init feat_enable_dscr(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	/*
+	 * Linux relies on FSCR[DSCR] being clear, so that we can take the
+	 * facility unavailable interrupt and track the task's usage of DSCR.
+	 * See facility_unavailable_exception().
+	 * Clear the bit here so that feat_enable() doesn't set it.
+	 */
+	f->fscr_bit_nr = -1;
+
+	feat_enable(f);
+
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr &= ~LPCR_DPFD;
+	lpcr |=  (4UL << LPCR_DPFD_SH);
+	mtspr(SPRN_LPCR, lpcr);
+
+	return 1;
+}
+
+static void __init hfscr_pmu_enable(void)
+{
+	u64 hfscr = mfspr(SPRN_HFSCR);
+	hfscr |= PPC_BIT(60);
+	mtspr(SPRN_HFSCR, hfscr);
+}
+
+static void init_pmu_power8(void)
+{
+	if (hv_mode) {
+		mtspr(SPRN_MMCRC, 0);
+		mtspr(SPRN_MMCRH, 0);
+	}
+
+	mtspr(SPRN_MMCRA, 0);
+	mtspr(SPRN_MMCR0, MMCR0_FC);
+	mtspr(SPRN_MMCR1, 0);
+	mtspr(SPRN_MMCR2, 0);
+	mtspr(SPRN_MMCRS, 0);
+}
+
+static int __init feat_enable_mce_power8(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->platform = "power8";
+	cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p8;
+
+	return 1;
+}
+
+static int __init feat_enable_pmu_power8(struct dt_cpu_feature *f)
+{
+	hfscr_pmu_enable();
+
+	init_pmu_power8();
+	init_pmu_registers = init_pmu_power8;
+
+	cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT;
+	if (pvr_version_is(PVR_POWER8E))
+		cur_cpu_spec->cpu_features |= CPU_FTR_PMAO_BUG;
+
+	cur_cpu_spec->num_pmcs		= 6;
+	cur_cpu_spec->pmc_type		= PPC_PMC_IBM;
+
+	return 1;
+}
+
+static void init_pmu_power9(void)
+{
+	if (hv_mode)
+		mtspr(SPRN_MMCRC, 0);
+
+	mtspr(SPRN_MMCRA, 0);
+	mtspr(SPRN_MMCR0, MMCR0_FC);
+	mtspr(SPRN_MMCR1, 0);
+	mtspr(SPRN_MMCR2, 0);
+}
+
+static int __init feat_enable_mce_power9(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->platform = "power9";
+	cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p9;
+
+	return 1;
+}
+
+static int __init feat_enable_pmu_power9(struct dt_cpu_feature *f)
+{
+	hfscr_pmu_enable();
+
+	init_pmu_power9();
+	init_pmu_registers = init_pmu_power9;
+
+	cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT;
+
+	cur_cpu_spec->num_pmcs		= 6;
+	cur_cpu_spec->pmc_type		= PPC_PMC_IBM;
+
+	return 1;
+}
+
+static void init_pmu_power10(void)
+{
+	init_pmu_power9();
+
+	mtspr(SPRN_MMCR3, 0);
+	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
+	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
+}
+
+static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
+{
+	hfscr_pmu_enable();
+
+	init_pmu_power10();
+	init_pmu_registers = init_pmu_power10;
+
+	cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT;
+
+	cur_cpu_spec->num_pmcs          = 6;
+	cur_cpu_spec->pmc_type          = PPC_PMC_IBM;
+
+	return 1;
+}
+
+static int __init feat_enable_mce_power10(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->platform = "power10";
+	cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p10;
+
+	return 1;
+}
+
+static int __init feat_enable_tm(struct dt_cpu_feature *f)
+{
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	feat_enable(f);
+	cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NOSC;
+	return 1;
+#endif
+	return 0;
+}
+
+static int __init feat_enable_fp(struct dt_cpu_feature *f)
+{
+	feat_enable(f);
+	cur_cpu_spec->cpu_features &= ~CPU_FTR_FPU_UNAVAILABLE;
+
+	return 1;
+}
+
+static int __init feat_enable_vector(struct dt_cpu_feature *f)
+{
+#ifdef CONFIG_ALTIVEC
+	feat_enable(f);
+	cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
+	cur_cpu_spec->cpu_features |= CPU_FTR_VMX_COPY;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
+
+	return 1;
+#endif
+	return 0;
+}
+
+static int __init feat_enable_vsx(struct dt_cpu_feature *f)
+{
+#ifdef CONFIG_VSX
+	feat_enable(f);
+	cur_cpu_spec->cpu_features |= CPU_FTR_VSX;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_VSX;
+
+	return 1;
+#endif
+	return 0;
+}
+
+static int __init feat_enable_purr(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->cpu_features |= CPU_FTR_PURR | CPU_FTR_SPURR;
+
+	return 1;
+}
+
+static int __init feat_enable_ebb(struct dt_cpu_feature *f)
+{
+	/*
+	 * PPC_FEATURE2_EBB is enabled in PMU init code because it has
+	 * historically been related to the PMU facility. This may have
+	 * to be decoupled if EBB becomes more generic. For now, follow
+	 * existing convention.
+	 */
+	f->hwcap_bit_nr = -1;
+	feat_enable(f);
+
+	return 1;
+}
+
+static int __init feat_enable_dbell(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	/* P9 has an HFSCR for privileged state */
+	feat_enable(f);
+
+	cur_cpu_spec->cpu_features |= CPU_FTR_DBELL;
+
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr |=  LPCR_PECEDH; /* hyp doorbell wakeup */
+	mtspr(SPRN_LPCR, lpcr);
+
+	return 1;
+}
+
+static int __init feat_enable_hvi(struct dt_cpu_feature *f)
+{
+	u64 lpcr;
+
+	/*
+	 * POWER9 XIVE interrupts including in OPAL XICS compatibility
+	 * are always delivered as hypervisor virtualization interrupts (HVI)
+	 * rather than EE.
+	 *
+	 * However LPES0 is not set here, in the chance that an EE does get
+	 * delivered to the host somehow, the EE handler would not expect it
+	 * to be delivered in LPES0 mode (e.g., using SRR[01]). This could
+	 * happen if there is a bug in interrupt controller code, or IC is
+	 * misconfigured in systemsim.
+	 */
+
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr |= LPCR_HVICE;	/* enable hvi interrupts */
+	lpcr |= LPCR_HEIC;	/* disable ee interrupts when MSR_HV */
+	lpcr |= LPCR_PECE_HVEE; /* hvi can wake from stop */
+	mtspr(SPRN_LPCR, lpcr);
+
+	return 1;
+}
+
+static int __init feat_enable_large_ci(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->mmu_features |= MMU_FTR_CI_LARGE_PAGE;
+
+	return 1;
+}
+
+static int __init feat_enable_mma(struct dt_cpu_feature *f)
+{
+	u64 pcr;
+
+	feat_enable(f);
+	pcr = mfspr(SPRN_PCR);
+	pcr &= ~PCR_MMA_DIS;
+	mtspr(SPRN_PCR, pcr);
+
+	return 1;
+}
+
+struct dt_cpu_feature_match {
+	const char *name;
+	int (*enable)(struct dt_cpu_feature *f);
+	u64 cpu_ftr_bit_mask;
+};
+
+static struct dt_cpu_feature_match __initdata
+		dt_cpu_feature_match_table[] = {
+	{"hypervisor", feat_enable_hv, 0},
+	{"big-endian", feat_enable, 0},
+	{"little-endian", feat_enable_le, CPU_FTR_REAL_LE},
+	{"smt", feat_enable_smt, 0},
+	{"interrupt-facilities", feat_enable, 0},
+	{"system-call-vectored", feat_enable, 0},
+	{"timer-facilities", feat_enable, 0},
+	{"timer-facilities-v3", feat_enable, 0},
+	{"debug-facilities", feat_enable, 0},
+	{"come-from-address-register", feat_enable, CPU_FTR_CFAR},
+	{"branch-tracing", feat_enable, 0},
+	{"floating-point", feat_enable_fp, 0},
+	{"vector", feat_enable_vector, 0},
+	{"vector-scalar", feat_enable_vsx, 0},
+	{"vector-scalar-v3", feat_enable, 0},
+	{"decimal-floating-point", feat_enable, 0},
+	{"decimal-integer", feat_enable, 0},
+	{"quadword-load-store", feat_enable, 0},
+	{"vector-crypto", feat_enable, 0},
+	{"mmu-hash", feat_enable_mmu_hash, 0},
+	{"mmu-radix", feat_enable_mmu_radix, 0},
+	{"mmu-hash-v3", feat_enable_mmu_hash_v3, 0},
+	{"virtual-page-class-key-protection", feat_enable, 0},
+	{"transactional-memory", feat_enable_tm, CPU_FTR_TM},
+	{"transactional-memory-v3", feat_enable_tm, 0},
+	{"tm-suspend-hypervisor-assist", feat_enable, CPU_FTR_P9_TM_HV_ASSIST},
+	{"tm-suspend-xer-so-bug", feat_enable, CPU_FTR_P9_TM_XER_SO_BUG},
+	{"idle-nap", feat_enable_idle_nap, 0},
+	/* alignment-interrupt-dsisr ignored */
+	{"idle-stop", feat_enable_idle_stop, 0},
+	{"machine-check-power8", feat_enable_mce_power8, 0},
+	{"performance-monitor-power8", feat_enable_pmu_power8, 0},
+	{"data-stream-control-register", feat_enable_dscr, CPU_FTR_DSCR},
+	{"event-based-branch", feat_enable_ebb, 0},
+	{"target-address-register", feat_enable, 0},
+	{"branch-history-rolling-buffer", feat_enable, 0},
+	{"control-register", feat_enable, CPU_FTR_CTRL},
+	{"processor-control-facility", feat_enable_dbell, CPU_FTR_DBELL},
+	{"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL},
+	{"processor-utilization-of-resources-register", feat_enable_purr, 0},
+	{"no-execute", feat_enable, 0},
+	{"strong-access-ordering", feat_enable, CPU_FTR_SAO},
+	{"cache-inhibited-large-page", feat_enable_large_ci, 0},
+	{"coprocessor-icswx", feat_enable, 0},
+	{"hypervisor-virtualization-interrupt", feat_enable_hvi, 0},
+	{"program-priority-register", feat_enable, CPU_FTR_HAS_PPR},
+	{"wait", feat_enable, 0},
+	{"atomic-memory-operations", feat_enable, 0},
+	{"branch-v3", feat_enable, 0},
+	{"copy-paste", feat_enable, 0},
+	{"decimal-floating-point-v3", feat_enable, 0},
+	{"decimal-integer-v3", feat_enable, 0},
+	{"fixed-point-v3", feat_enable, 0},
+	{"floating-point-v3", feat_enable, 0},
+	{"group-start-register", feat_enable, 0},
+	{"pc-relative-addressing", feat_enable, 0},
+	{"machine-check-power9", feat_enable_mce_power9, 0},
+	{"machine-check-power10", feat_enable_mce_power10, 0},
+	{"performance-monitor-power9", feat_enable_pmu_power9, 0},
+	{"performance-monitor-power10", feat_enable_pmu_power10, 0},
+	{"event-based-branch-v3", feat_enable, 0},
+	{"random-number-generator", feat_enable, 0},
+	{"system-call-vectored", feat_disable, 0},
+	{"trace-interrupt-v3", feat_enable, 0},
+	{"vector-v3", feat_enable, 0},
+	{"vector-binary128", feat_enable, 0},
+	{"vector-binary16", feat_enable, 0},
+	{"wait-v3", feat_enable, 0},
+	{"prefix-instructions", feat_enable, 0},
+	{"matrix-multiply-assist", feat_enable_mma, 0},
+	{"debug-facilities-v31", feat_enable, CPU_FTR_DAWR1},
+};
+
+static bool __initdata using_dt_cpu_ftrs;
+static bool __initdata enable_unknown = true;
+
+static int __init dt_cpu_ftrs_parse(char *str)
+{
+	if (!str)
+		return 0;
+
+	if (!strcmp(str, "off"))
+		using_dt_cpu_ftrs = false;
+	else if (!strcmp(str, "known"))
+		enable_unknown = false;
+	else
+		return 1;
+
+	return 0;
+}
+early_param("dt_cpu_ftrs", dt_cpu_ftrs_parse);
+
+static void __init cpufeatures_setup_start(u32 isa)
+{
+	pr_info("setup for ISA %d\n", isa);
+
+	if (isa >= ISA_V3_0B) {
+		cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_300;
+		cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_00;
+	}
+
+	if (isa >= ISA_V3_1) {
+		cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_31;
+		cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_1;
+	}
+}
+
+static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
+{
+	const struct dt_cpu_feature_match *m;
+	bool known = false;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dt_cpu_feature_match_table); i++) {
+		m = &dt_cpu_feature_match_table[i];
+		if (!strcmp(f->name, m->name)) {
+			known = true;
+			if (m->enable(f)) {
+				cur_cpu_spec->cpu_features |= m->cpu_ftr_bit_mask;
+				break;
+			}
+
+			pr_info("not enabling: %s (disabled or unsupported by kernel)\n",
+				f->name);
+			return false;
+		}
+	}
+
+	if (!known && (!enable_unknown || !feat_try_enable_unknown(f))) {
+		pr_info("not enabling: %s (unknown and unsupported by kernel)\n",
+			f->name);
+		return false;
+	}
+
+	if (known)
+		pr_debug("enabling: %s\n", f->name);
+	else
+		pr_debug("enabling: %s (unknown)\n", f->name);
+
+	return true;
+}
+
+/*
+ * Handle POWER9 broadcast tlbie invalidation issue using
+ * cpu feature flag.
+ */
+static __init void update_tlbie_feature_flag(unsigned long pvr)
+{
+	if (PVR_VER(pvr) == PVR_POWER9) {
+		/*
+		 * Set the tlbie feature flag for anything below
+		 * Nimbus DD 2.3 and Cumulus DD 1.3
+		 */
+		if ((pvr & 0xe000) == 0) {
+			/* Nimbus */
+			if ((pvr & 0xfff) < 0x203)
+				cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+		} else if ((pvr & 0xc000) == 0) {
+			/* Cumulus */
+			if ((pvr & 0xfff) < 0x103)
+				cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+		} else {
+			WARN_ONCE(1, "Unknown PVR");
+			cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+		}
+
+		cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG;
+	}
+}
+
+static __init void cpufeatures_cpu_quirks(void)
+{
+	unsigned long version = mfspr(SPRN_PVR);
+
+	/*
+	 * Not all quirks can be derived from the cpufeatures device tree.
+	 */
+	if ((version & 0xffffefff) == 0x004e0200) {
+		/* DD2.0 has no feature flag */
+		cur_cpu_spec->cpu_features |= CPU_FTR_P9_RADIX_PREFETCH_BUG;
+		cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
+	} else if ((version & 0xffffefff) == 0x004e0201) {
+		cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+		cur_cpu_spec->cpu_features |= CPU_FTR_P9_RADIX_PREFETCH_BUG;
+		cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
+	} else if ((version & 0xffffefff) == 0x004e0202) {
+		cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST;
+		cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG;
+		cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+		cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
+	} else if ((version & 0xffffefff) == 0x004e0203) {
+		cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST;
+		cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG;
+		cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+	} else if ((version & 0xffff0000) == 0x004e0000) {
+		/* DD2.1 and up have DD2_1 */
+		cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+	}
+
+	if ((version & 0xffff0000) == 0x004e0000) {
+		cur_cpu_spec->cpu_features |= CPU_FTR_P9_TIDR;
+	}
+
+	update_tlbie_feature_flag(version);
+}
+
+static void __init cpufeatures_setup_finished(void)
+{
+	cpufeatures_cpu_quirks();
+
+	if (hv_mode && !(cur_cpu_spec->cpu_features & CPU_FTR_HVMODE)) {
+		pr_err("hypervisor not present in device tree but HV mode is enabled in the CPU. Enabling.\n");
+		cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
+	}
+
+	/* Make sure powerpc_base_platform is non-NULL */
+	powerpc_base_platform = cur_cpu_spec->platform;
+
+	system_registers.lpcr = mfspr(SPRN_LPCR);
+	system_registers.hfscr = mfspr(SPRN_HFSCR);
+	system_registers.fscr = mfspr(SPRN_FSCR);
+	system_registers.pcr = mfspr(SPRN_PCR);
+
+	pr_info("final cpu/mmu features = 0x%016lx 0x%08x\n",
+		cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features);
+}
+
+static int __init disabled_on_cmdline(void)
+{
+	unsigned long root, chosen;
+	const char *p;
+
+	root = of_get_flat_dt_root();
+	chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
+	if (chosen == -FDT_ERR_NOTFOUND)
+		return false;
+
+	p = of_get_flat_dt_prop(chosen, "bootargs", NULL);
+	if (!p)
+		return false;
+
+	if (strstr(p, "dt_cpu_ftrs=off"))
+		return true;
+
+	return false;
+}
+
+static int __init fdt_find_cpu_features(unsigned long node, const char *uname,
+					int depth, void *data)
+{
+	if (of_flat_dt_is_compatible(node, "ibm,powerpc-cpu-features")
+	    && of_get_flat_dt_prop(node, "isa", NULL))
+		return 1;
+
+	return 0;
+}
+
+bool __init dt_cpu_ftrs_in_use(void)
+{
+	return using_dt_cpu_ftrs;
+}
+
+bool __init dt_cpu_ftrs_init(void *fdt)
+{
+	using_dt_cpu_ftrs = false;
+
+	/* Setup and verify the FDT, if it fails we just bail */
+	if (!early_init_dt_verify(fdt))
+		return false;
+
+	if (!of_scan_flat_dt(fdt_find_cpu_features, NULL))
+		return false;
+
+	if (disabled_on_cmdline())
+		return false;
+
+	cpufeatures_setup_cpu();
+
+	using_dt_cpu_ftrs = true;
+	return true;
+}
+
+static int nr_dt_cpu_features;
+static struct dt_cpu_feature *dt_cpu_features;
+
+static int __init process_cpufeatures_node(unsigned long node,
+					  const char *uname, int i)
+{
+	const __be32 *prop;
+	struct dt_cpu_feature *f;
+	int len;
+
+	f = &dt_cpu_features[i];
+
+	f->node = node;
+
+	f->name = uname;
+
+	prop = of_get_flat_dt_prop(node, "isa", &len);
+	if (!prop) {
+		pr_warn("%s: missing isa property\n", uname);
+		return 0;
+	}
+	f->isa = be32_to_cpup(prop);
+
+	prop = of_get_flat_dt_prop(node, "usable-privilege", &len);
+	if (!prop) {
+		pr_warn("%s: missing usable-privilege property", uname);
+		return 0;
+	}
+	f->usable_privilege = be32_to_cpup(prop);
+
+	prop = of_get_flat_dt_prop(node, "hv-support", &len);
+	if (prop)
+		f->hv_support = be32_to_cpup(prop);
+	else
+		f->hv_support = HV_SUPPORT_NONE;
+
+	prop = of_get_flat_dt_prop(node, "os-support", &len);
+	if (prop)
+		f->os_support = be32_to_cpup(prop);
+	else
+		f->os_support = OS_SUPPORT_NONE;
+
+	prop = of_get_flat_dt_prop(node, "hfscr-bit-nr", &len);
+	if (prop)
+		f->hfscr_bit_nr = be32_to_cpup(prop);
+	else
+		f->hfscr_bit_nr = -1;
+	prop = of_get_flat_dt_prop(node, "fscr-bit-nr", &len);
+	if (prop)
+		f->fscr_bit_nr = be32_to_cpup(prop);
+	else
+		f->fscr_bit_nr = -1;
+	prop = of_get_flat_dt_prop(node, "hwcap-bit-nr", &len);
+	if (prop)
+		f->hwcap_bit_nr = be32_to_cpup(prop);
+	else
+		f->hwcap_bit_nr = -1;
+
+	if (f->usable_privilege & USABLE_HV) {
+		if (!(mfmsr() & MSR_HV)) {
+			pr_warn("%s: HV feature passed to guest\n", uname);
+			return 0;
+		}
+
+		if (f->hv_support == HV_SUPPORT_NONE && f->hfscr_bit_nr != -1) {
+			pr_warn("%s: unwanted hfscr_bit_nr\n", uname);
+			return 0;
+		}
+
+		if (f->hv_support == HV_SUPPORT_HFSCR) {
+			if (f->hfscr_bit_nr == -1) {
+				pr_warn("%s: missing hfscr_bit_nr\n", uname);
+				return 0;
+			}
+		}
+	} else {
+		if (f->hv_support != HV_SUPPORT_NONE || f->hfscr_bit_nr != -1) {
+			pr_warn("%s: unwanted hv_support/hfscr_bit_nr\n", uname);
+			return 0;
+		}
+	}
+
+	if (f->usable_privilege & USABLE_OS) {
+		if (f->os_support == OS_SUPPORT_NONE && f->fscr_bit_nr != -1) {
+			pr_warn("%s: unwanted fscr_bit_nr\n", uname);
+			return 0;
+		}
+
+		if (f->os_support == OS_SUPPORT_FSCR) {
+			if (f->fscr_bit_nr == -1) {
+				pr_warn("%s: missing fscr_bit_nr\n", uname);
+				return 0;
+			}
+		}
+	} else {
+		if (f->os_support != OS_SUPPORT_NONE || f->fscr_bit_nr != -1) {
+			pr_warn("%s: unwanted os_support/fscr_bit_nr\n", uname);
+			return 0;
+		}
+	}
+
+	if (!(f->usable_privilege & USABLE_PR)) {
+		if (f->hwcap_bit_nr != -1) {
+			pr_warn("%s: unwanted hwcap_bit_nr\n", uname);
+			return 0;
+		}
+	}
+
+	/* Do all the independent features in the first pass */
+	if (!of_get_flat_dt_prop(node, "dependencies", &len)) {
+		if (cpufeatures_process_feature(f))
+			f->enabled = 1;
+		else
+			f->disabled = 1;
+	}
+
+	return 0;
+}
+
+static void __init cpufeatures_deps_enable(struct dt_cpu_feature *f)
+{
+	const __be32 *prop;
+	int len;
+	int nr_deps;
+	int i;
+
+	if (f->enabled || f->disabled)
+		return;
+
+	prop = of_get_flat_dt_prop(f->node, "dependencies", &len);
+	if (!prop) {
+		pr_warn("%s: missing dependencies property", f->name);
+		return;
+	}
+
+	nr_deps = len / sizeof(int);
+
+	for (i = 0; i < nr_deps; i++) {
+		unsigned long phandle = be32_to_cpu(prop[i]);
+		int j;
+
+		for (j = 0; j < nr_dt_cpu_features; j++) {
+			struct dt_cpu_feature *d = &dt_cpu_features[j];
+
+			if (of_get_flat_dt_phandle(d->node) == phandle) {
+				cpufeatures_deps_enable(d);
+				if (d->disabled) {
+					f->disabled = 1;
+					return;
+				}
+			}
+		}
+	}
+
+	if (cpufeatures_process_feature(f))
+		f->enabled = 1;
+	else
+		f->disabled = 1;
+}
+
+static int __init scan_cpufeatures_subnodes(unsigned long node,
+					  const char *uname,
+					  void *data)
+{
+	int *count = data;
+
+	process_cpufeatures_node(node, uname, *count);
+
+	(*count)++;
+
+	return 0;
+}
+
+static int __init count_cpufeatures_subnodes(unsigned long node,
+					  const char *uname,
+					  void *data)
+{
+	int *count = data;
+
+	(*count)++;
+
+	return 0;
+}
+
+static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
+					    *uname, int depth, void *data)
+{
+	const __be32 *prop;
+	int count, i;
+	u32 isa;
+
+	/* We are scanning "ibm,powerpc-cpu-features" nodes only */
+	if (!of_flat_dt_is_compatible(node, "ibm,powerpc-cpu-features"))
+		return 0;
+
+	prop = of_get_flat_dt_prop(node, "isa", NULL);
+	if (!prop)
+		/* We checked before, "can't happen" */
+		return 0;
+
+	isa = be32_to_cpup(prop);
+
+	/* Count and allocate space for cpu features */
+	of_scan_flat_dt_subnodes(node, count_cpufeatures_subnodes,
+						&nr_dt_cpu_features);
+	dt_cpu_features = memblock_alloc(sizeof(struct dt_cpu_feature) * nr_dt_cpu_features, PAGE_SIZE);
+	if (!dt_cpu_features)
+		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
+		      __func__,
+		      sizeof(struct dt_cpu_feature) * nr_dt_cpu_features,
+		      PAGE_SIZE);
+
+	cpufeatures_setup_start(isa);
+
+	/* Scan nodes into dt_cpu_features and enable those without deps  */
+	count = 0;
+	of_scan_flat_dt_subnodes(node, scan_cpufeatures_subnodes, &count);
+
+	/* Recursive enable remaining features with dependencies */
+	for (i = 0; i < nr_dt_cpu_features; i++) {
+		struct dt_cpu_feature *f = &dt_cpu_features[i];
+
+		cpufeatures_deps_enable(f);
+	}
+
+	prop = of_get_flat_dt_prop(node, "display-name", NULL);
+	if (prop && strlen((char *)prop) != 0) {
+		strscpy(dt_cpu_name, (char *)prop, sizeof(dt_cpu_name));
+		cur_cpu_spec->cpu_name = dt_cpu_name;
+	}
+
+	cpufeatures_setup_finished();
+
+	memblock_free(dt_cpu_features,
+		      sizeof(struct dt_cpu_feature) * nr_dt_cpu_features);
+
+	return 0;
+}
+
+void __init dt_cpu_ftrs_scan(void)
+{
+	if (!using_dt_cpu_ftrs)
+		return;
+
+	of_scan_flat_dt(dt_cpu_ftrs_scan_callback, NULL);
+}
diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c
new file mode 100644
index 0000000000..03f1135ef6
--- /dev/null
+++ b/arch/powerpc/kernel/early_32.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Early init before relocation
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+
+/*
+ * We're called here very early in the boot.
+ *
+ * Note that the kernel may be running at an address which is different
+ * from the address that it was linked at, so we must use RELOC/PTRRELOC
+ * to access static data (including strings).  -- paulus
+ */
+notrace unsigned long __init early_init(unsigned long dt_ptr)
+{
+	unsigned long kva, offset = reloc_offset();
+
+	kva = *PTRRELOC(&kernstart_virt_addr);
+
+	/* First zero the BSS */
+	if (kva == KERNELBASE)
+		memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start);
+
+	/*
+	 * Identify the CPU type and fix up code sections
+	 * that depend on which cpu we have.
+	 */
+	identify_cpu(offset, mfspr(SPRN_PVR));
+
+	apply_feature_fixups();
+
+	return kva + offset;
+}
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
new file mode 100644
index 0000000000..ab316e155e
--- /dev/null
+++ b/arch/powerpc/kernel/eeh.c
@@ -0,0 +1,1941 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright IBM Corporation 2001, 2005, 2006
+ * Copyright Dave Engebretsen & Todd Inglett 2001
+ * Copyright Linas Vepstas 2005, 2006
+ * Copyright 2001-2012 IBM Corporation.
+ *
+ * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/iommu.h>
+#include <linux/proc_fs.h>
+#include <linux/rbtree.h>
+#include <linux/reboot.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/export.h>
+#include <linux/of.h>
+#include <linux/debugfs.h>
+
+#include <linux/atomic.h>
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+#include <asm/rtas.h>
+#include <asm/pte-walk.h>
+
+
+/** Overview:
+ *  EEH, or "Enhanced Error Handling" is a PCI bridge technology for
+ *  dealing with PCI bus errors that can't be dealt with within the
+ *  usual PCI framework, except by check-stopping the CPU.  Systems
+ *  that are designed for high-availability/reliability cannot afford
+ *  to crash due to a "mere" PCI error, thus the need for EEH.
+ *  An EEH-capable bridge operates by converting a detected error
+ *  into a "slot freeze", taking the PCI adapter off-line, making
+ *  the slot behave, from the OS'es point of view, as if the slot
+ *  were "empty": all reads return 0xff's and all writes are silently
+ *  ignored.  EEH slot isolation events can be triggered by parity
+ *  errors on the address or data busses (e.g. during posted writes),
+ *  which in turn might be caused by low voltage on the bus, dust,
+ *  vibration, humidity, radioactivity or plain-old failed hardware.
+ *
+ *  Note, however, that one of the leading causes of EEH slot
+ *  freeze events are buggy device drivers, buggy device microcode,
+ *  or buggy device hardware.  This is because any attempt by the
+ *  device to bus-master data to a memory address that is not
+ *  assigned to the device will trigger a slot freeze.   (The idea
+ *  is to prevent devices-gone-wild from corrupting system memory).
+ *  Buggy hardware/drivers will have a miserable time co-existing
+ *  with EEH.
+ *
+ *  Ideally, a PCI device driver, when suspecting that an isolation
+ *  event has occurred (e.g. by reading 0xff's), will then ask EEH
+ *  whether this is the case, and then take appropriate steps to
+ *  reset the PCI slot, the PCI device, and then resume operations.
+ *  However, until that day,  the checking is done here, with the
+ *  eeh_check_failure() routine embedded in the MMIO macros.  If
+ *  the slot is found to be isolated, an "EEH Event" is synthesized
+ *  and sent out for processing.
+ */
+
+/* If a device driver keeps reading an MMIO register in an interrupt
+ * handler after a slot isolation event, it might be broken.
+ * This sets the threshold for how many read attempts we allow
+ * before printing an error message.
+ */
+#define EEH_MAX_FAILS	2100000
+
+/* Time to wait for a PCI slot to report status, in milliseconds */
+#define PCI_BUS_RESET_WAIT_MSEC (5*60*1000)
+
+/*
+ * EEH probe mode support, which is part of the flags,
+ * is to support multiple platforms for EEH. Some platforms
+ * like pSeries do PCI emunation based on device tree.
+ * However, other platforms like powernv probe PCI devices
+ * from hardware. The flag is used to distinguish that.
+ * In addition, struct eeh_ops::probe would be invoked for
+ * particular OF node or PCI device so that the corresponding
+ * PE would be created there.
+ */
+int eeh_subsystem_flags;
+EXPORT_SYMBOL(eeh_subsystem_flags);
+
+/*
+ * EEH allowed maximal frozen times. If one particular PE's
+ * frozen count in last hour exceeds this limit, the PE will
+ * be forced to be offline permanently.
+ */
+u32 eeh_max_freezes = 5;
+
+/*
+ * Controls whether a recovery event should be scheduled when an
+ * isolated device is discovered. This is only really useful for
+ * debugging problems with the EEH core.
+ */
+bool eeh_debugfs_no_recover;
+
+/* Platform dependent EEH operations */
+struct eeh_ops *eeh_ops = NULL;
+
+/* Lock to avoid races due to multiple reports of an error */
+DEFINE_RAW_SPINLOCK(confirm_error_lock);
+EXPORT_SYMBOL_GPL(confirm_error_lock);
+
+/* Lock to protect passed flags */
+static DEFINE_MUTEX(eeh_dev_mutex);
+
+/* Buffer for reporting pci register dumps. Its here in BSS, and
+ * not dynamically alloced, so that it ends up in RMO where RTAS
+ * can access it.
+ */
+#define EEH_PCI_REGS_LOG_LEN 8192
+static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
+
+/*
+ * The struct is used to maintain the EEH global statistic
+ * information. Besides, the EEH global statistics will be
+ * exported to user space through procfs
+ */
+struct eeh_stats {
+	u64 no_device;		/* PCI device not found		*/
+	u64 no_dn;		/* OF node not found		*/
+	u64 no_cfg_addr;	/* Config address not found	*/
+	u64 ignored_check;	/* EEH check skipped		*/
+	u64 total_mmio_ffs;	/* Total EEH checks		*/
+	u64 false_positives;	/* Unnecessary EEH checks	*/
+	u64 slot_resets;	/* PE reset			*/
+};
+
+static struct eeh_stats eeh_stats;
+
+static int __init eeh_setup(char *str)
+{
+	if (!strcmp(str, "off"))
+		eeh_add_flag(EEH_FORCE_DISABLED);
+	else if (!strcmp(str, "early_log"))
+		eeh_add_flag(EEH_EARLY_DUMP_LOG);
+
+	return 1;
+}
+__setup("eeh=", eeh_setup);
+
+void eeh_show_enabled(void)
+{
+	if (eeh_has_flag(EEH_FORCE_DISABLED))
+		pr_info("EEH: Recovery disabled by kernel parameter.\n");
+	else if (eeh_has_flag(EEH_ENABLED))
+		pr_info("EEH: Capable adapter found: recovery enabled.\n");
+	else
+		pr_info("EEH: No capable adapters found: recovery disabled.\n");
+}
+
+/*
+ * This routine captures assorted PCI configuration space data
+ * for the indicated PCI device, and puts them into a buffer
+ * for RTAS error logging.
+ */
+static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
+{
+	u32 cfg;
+	int cap, i;
+	int n = 0, l = 0;
+	char buffer[128];
+
+	n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
+			edev->pe->phb->global_number, edev->bdfn >> 8,
+			PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn));
+	pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
+		edev->pe->phb->global_number, edev->bdfn >> 8,
+		PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn));
+
+	eeh_ops->read_config(edev, PCI_VENDOR_ID, 4, &cfg);
+	n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
+	pr_warn("EEH: PCI device/vendor: %08x\n", cfg);
+
+	eeh_ops->read_config(edev, PCI_COMMAND, 4, &cfg);
+	n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
+	pr_warn("EEH: PCI cmd/status register: %08x\n", cfg);
+
+	/* Gather bridge-specific registers */
+	if (edev->mode & EEH_DEV_BRIDGE) {
+		eeh_ops->read_config(edev, PCI_SEC_STATUS, 2, &cfg);
+		n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
+		pr_warn("EEH: Bridge secondary status: %04x\n", cfg);
+
+		eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &cfg);
+		n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
+		pr_warn("EEH: Bridge control: %04x\n", cfg);
+	}
+
+	/* Dump out the PCI-X command and status regs */
+	cap = edev->pcix_cap;
+	if (cap) {
+		eeh_ops->read_config(edev, cap, 4, &cfg);
+		n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
+		pr_warn("EEH: PCI-X cmd: %08x\n", cfg);
+
+		eeh_ops->read_config(edev, cap+4, 4, &cfg);
+		n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
+		pr_warn("EEH: PCI-X status: %08x\n", cfg);
+	}
+
+	/* If PCI-E capable, dump PCI-E cap 10 */
+	cap = edev->pcie_cap;
+	if (cap) {
+		n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
+		pr_warn("EEH: PCI-E capabilities and status follow:\n");
+
+		for (i=0; i<=8; i++) {
+			eeh_ops->read_config(edev, cap+4*i, 4, &cfg);
+			n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
+
+			if ((i % 4) == 0) {
+				if (i != 0)
+					pr_warn("%s\n", buffer);
+
+				l = scnprintf(buffer, sizeof(buffer),
+					      "EEH: PCI-E %02x: %08x ",
+					      4*i, cfg);
+			} else {
+				l += scnprintf(buffer+l, sizeof(buffer)-l,
+					       "%08x ", cfg);
+			}
+
+		}
+
+		pr_warn("%s\n", buffer);
+	}
+
+	/* If AER capable, dump it */
+	cap = edev->aer_cap;
+	if (cap) {
+		n += scnprintf(buf+n, len-n, "pci-e AER:\n");
+		pr_warn("EEH: PCI-E AER capability register set follows:\n");
+
+		for (i=0; i<=13; i++) {
+			eeh_ops->read_config(edev, cap+4*i, 4, &cfg);
+			n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
+
+			if ((i % 4) == 0) {
+				if (i != 0)
+					pr_warn("%s\n", buffer);
+
+				l = scnprintf(buffer, sizeof(buffer),
+					      "EEH: PCI-E AER %02x: %08x ",
+					      4*i, cfg);
+			} else {
+				l += scnprintf(buffer+l, sizeof(buffer)-l,
+					       "%08x ", cfg);
+			}
+		}
+
+		pr_warn("%s\n", buffer);
+	}
+
+	return n;
+}
+
+static void *eeh_dump_pe_log(struct eeh_pe *pe, void *flag)
+{
+	struct eeh_dev *edev, *tmp;
+	size_t *plen = flag;
+
+	eeh_pe_for_each_dev(pe, edev, tmp)
+		*plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen,
+					  EEH_PCI_REGS_LOG_LEN - *plen);
+
+	return NULL;
+}
+
+/**
+ * eeh_slot_error_detail - Generate combined log including driver log and error log
+ * @pe: EEH PE
+ * @severity: temporary or permanent error log
+ *
+ * This routine should be called to generate the combined log, which
+ * is comprised of driver log and error log. The driver log is figured
+ * out from the config space of the corresponding PCI device, while
+ * the error log is fetched through platform dependent function call.
+ */
+void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
+{
+	size_t loglen = 0;
+
+	/*
+	 * When the PHB is fenced or dead, it's pointless to collect
+	 * the data from PCI config space because it should return
+	 * 0xFF's. For ER, we still retrieve the data from the PCI
+	 * config space.
+	 *
+	 * For pHyp, we have to enable IO for log retrieval. Otherwise,
+	 * 0xFF's is always returned from PCI config space.
+	 *
+	 * When the @severity is EEH_LOG_PERM, the PE is going to be
+	 * removed. Prior to that, the drivers for devices included in
+	 * the PE will be closed. The drivers rely on working IO path
+	 * to bring the devices to quiet state. Otherwise, PCI traffic
+	 * from those devices after they are removed is like to cause
+	 * another unexpected EEH error.
+	 */
+	if (!(pe->type & EEH_PE_PHB)) {
+		if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG) ||
+		    severity == EEH_LOG_PERM)
+			eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+
+		/*
+		 * The config space of some PCI devices can't be accessed
+		 * when their PEs are in frozen state. Otherwise, fenced
+		 * PHB might be seen. Those PEs are identified with flag
+		 * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED
+		 * is set automatically when the PE is put to EEH_PE_ISOLATED.
+		 *
+		 * Restoring BARs possibly triggers PCI config access in
+		 * (OPAL) firmware and then causes fenced PHB. If the
+		 * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's
+		 * pointless to restore BARs and dump config space.
+		 */
+		eeh_ops->configure_bridge(pe);
+		if (!(pe->state & EEH_PE_CFG_BLOCKED)) {
+			eeh_pe_restore_bars(pe);
+
+			pci_regs_buf[0] = 0;
+			eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen);
+		}
+	}
+
+	eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
+}
+
+/**
+ * eeh_token_to_phys - Convert EEH address token to phys address
+ * @token: I/O token, should be address in the form 0xA....
+ *
+ * This routine should be called to convert virtual I/O address
+ * to physical one.
+ */
+static inline unsigned long eeh_token_to_phys(unsigned long token)
+{
+	return ppc_find_vmap_phys(token);
+}
+
+/*
+ * On PowerNV platform, we might already have fenced PHB there.
+ * For that case, it's meaningless to recover frozen PE. Intead,
+ * We have to handle fenced PHB firstly.
+ */
+static int eeh_phb_check_failure(struct eeh_pe *pe)
+{
+	struct eeh_pe *phb_pe;
+	unsigned long flags;
+	int ret;
+
+	if (!eeh_has_flag(EEH_PROBE_MODE_DEV))
+		return -EPERM;
+
+	/* Find the PHB PE */
+	phb_pe = eeh_phb_pe_get(pe->phb);
+	if (!phb_pe) {
+		pr_warn("%s Can't find PE for PHB#%x\n",
+			__func__, pe->phb->global_number);
+		return -EEXIST;
+	}
+
+	/* If the PHB has been in problematic state */
+	eeh_serialize_lock(&flags);
+	if (phb_pe->state & EEH_PE_ISOLATED) {
+		ret = 0;
+		goto out;
+	}
+
+	/* Check PHB state */
+	ret = eeh_ops->get_state(phb_pe, NULL);
+	if ((ret < 0) ||
+	    (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) {
+		ret = 0;
+		goto out;
+	}
+
+	/* Isolate the PHB and send event */
+	eeh_pe_mark_isolated(phb_pe);
+	eeh_serialize_unlock(flags);
+
+	pr_debug("EEH: PHB#%x failure detected, location: %s\n",
+		phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
+	eeh_send_failure_event(phb_pe);
+	return 1;
+out:
+	eeh_serialize_unlock(flags);
+	return ret;
+}
+
+static inline const char *eeh_driver_name(struct pci_dev *pdev)
+{
+	if (pdev)
+		return dev_driver_string(&pdev->dev);
+
+	return "<null>";
+}
+
+/**
+ * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
+ * @edev: eeh device
+ *
+ * Check for an EEH failure for the given device node.  Call this
+ * routine if the result of a read was all 0xff's and you want to
+ * find out if this is due to an EEH slot freeze.  This routine
+ * will query firmware for the EEH status.
+ *
+ * Returns 0 if there has not been an EEH error; otherwise returns
+ * a non-zero value and queues up a slot isolation event notification.
+ *
+ * It is safe to call this routine in an interrupt context.
+ */
+int eeh_dev_check_failure(struct eeh_dev *edev)
+{
+	int ret;
+	unsigned long flags;
+	struct device_node *dn;
+	struct pci_dev *dev;
+	struct eeh_pe *pe, *parent_pe;
+	int rc = 0;
+	const char *location = NULL;
+
+	eeh_stats.total_mmio_ffs++;
+
+	if (!eeh_enabled())
+		return 0;
+
+	if (!edev) {
+		eeh_stats.no_dn++;
+		return 0;
+	}
+	dev = eeh_dev_to_pci_dev(edev);
+	pe = eeh_dev_to_pe(edev);
+
+	/* Access to IO BARs might get this far and still not want checking. */
+	if (!pe) {
+		eeh_stats.ignored_check++;
+		eeh_edev_dbg(edev, "Ignored check\n");
+		return 0;
+	}
+
+	/*
+	 * On PowerNV platform, we might already have fenced PHB
+	 * there and we need take care of that firstly.
+	 */
+	ret = eeh_phb_check_failure(pe);
+	if (ret > 0)
+		return ret;
+
+	/*
+	 * If the PE isn't owned by us, we shouldn't check the
+	 * state. Instead, let the owner handle it if the PE has
+	 * been frozen.
+	 */
+	if (eeh_pe_passed(pe))
+		return 0;
+
+	/* If we already have a pending isolation event for this
+	 * slot, we know it's bad already, we don't need to check.
+	 * Do this checking under a lock; as multiple PCI devices
+	 * in one slot might report errors simultaneously, and we
+	 * only want one error recovery routine running.
+	 */
+	eeh_serialize_lock(&flags);
+	rc = 1;
+	if (pe->state & EEH_PE_ISOLATED) {
+		pe->check_count++;
+		if (pe->check_count == EEH_MAX_FAILS) {
+			dn = pci_device_to_OF_node(dev);
+			if (dn)
+				location = of_get_property(dn, "ibm,loc-code",
+						NULL);
+			eeh_edev_err(edev, "%d reads ignored for recovering device at location=%s driver=%s\n",
+				pe->check_count,
+				location ? location : "unknown",
+				eeh_driver_name(dev));
+			eeh_edev_err(edev, "Might be infinite loop in %s driver\n",
+				eeh_driver_name(dev));
+			dump_stack();
+		}
+		goto dn_unlock;
+	}
+
+	/*
+	 * Now test for an EEH failure.  This is VERY expensive.
+	 * Note that the eeh_config_addr may be a parent device
+	 * in the case of a device behind a bridge, or it may be
+	 * function zero of a multi-function device.
+	 * In any case they must share a common PHB.
+	 */
+	ret = eeh_ops->get_state(pe, NULL);
+
+	/* Note that config-io to empty slots may fail;
+	 * they are empty when they don't have children.
+	 * We will punt with the following conditions: Failure to get
+	 * PE's state, EEH not support and Permanently unavailable
+	 * state, PE is in good state.
+	 */
+	if ((ret < 0) ||
+	    (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) {
+		eeh_stats.false_positives++;
+		pe->false_positives++;
+		rc = 0;
+		goto dn_unlock;
+	}
+
+	/*
+	 * It should be corner case that the parent PE has been
+	 * put into frozen state as well. We should take care
+	 * that at first.
+	 */
+	parent_pe = pe->parent;
+	while (parent_pe) {
+		/* Hit the ceiling ? */
+		if (parent_pe->type & EEH_PE_PHB)
+			break;
+
+		/* Frozen parent PE ? */
+		ret = eeh_ops->get_state(parent_pe, NULL);
+		if (ret > 0 && !eeh_state_active(ret)) {
+			pe = parent_pe;
+			pr_err("EEH: Failure of PHB#%x-PE#%x will be handled at parent PHB#%x-PE#%x.\n",
+			       pe->phb->global_number, pe->addr,
+			       pe->phb->global_number, parent_pe->addr);
+		}
+
+		/* Next parent level */
+		parent_pe = parent_pe->parent;
+	}
+
+	eeh_stats.slot_resets++;
+
+	/* Avoid repeated reports of this failure, including problems
+	 * with other functions on this device, and functions under
+	 * bridges.
+	 */
+	eeh_pe_mark_isolated(pe);
+	eeh_serialize_unlock(flags);
+
+	/* Most EEH events are due to device driver bugs.  Having
+	 * a stack trace will help the device-driver authors figure
+	 * out what happened.  So print that out.
+	 */
+	pr_debug("EEH: %s: Frozen PHB#%x-PE#%x detected\n",
+		__func__, pe->phb->global_number, pe->addr);
+	eeh_send_failure_event(pe);
+
+	return 1;
+
+dn_unlock:
+	eeh_serialize_unlock(flags);
+	return rc;
+}
+
+EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
+
+/**
+ * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
+ * @token: I/O address
+ *
+ * Check for an EEH failure at the given I/O address. Call this
+ * routine if the result of a read was all 0xff's and you want to
+ * find out if this is due to an EEH slot freeze event. This routine
+ * will query firmware for the EEH status.
+ *
+ * Note this routine is safe to call in an interrupt context.
+ */
+int eeh_check_failure(const volatile void __iomem *token)
+{
+	unsigned long addr;
+	struct eeh_dev *edev;
+
+	/* Finding the phys addr + pci device; this is pretty quick. */
+	addr = eeh_token_to_phys((unsigned long __force) token);
+	edev = eeh_addr_cache_get_dev(addr);
+	if (!edev) {
+		eeh_stats.no_device++;
+		return 0;
+	}
+
+	return eeh_dev_check_failure(edev);
+}
+EXPORT_SYMBOL(eeh_check_failure);
+
+
+/**
+ * eeh_pci_enable - Enable MMIO or DMA transfers for this slot
+ * @pe: EEH PE
+ * @function: EEH option
+ *
+ * This routine should be called to reenable frozen MMIO or DMA
+ * so that it would work correctly again. It's useful while doing
+ * recovery or log collection on the indicated device.
+ */
+int eeh_pci_enable(struct eeh_pe *pe, int function)
+{
+	int active_flag, rc;
+
+	/*
+	 * pHyp doesn't allow to enable IO or DMA on unfrozen PE.
+	 * Also, it's pointless to enable them on unfrozen PE. So
+	 * we have to check before enabling IO or DMA.
+	 */
+	switch (function) {
+	case EEH_OPT_THAW_MMIO:
+		active_flag = EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED;
+		break;
+	case EEH_OPT_THAW_DMA:
+		active_flag = EEH_STATE_DMA_ACTIVE;
+		break;
+	case EEH_OPT_DISABLE:
+	case EEH_OPT_ENABLE:
+	case EEH_OPT_FREEZE_PE:
+		active_flag = 0;
+		break;
+	default:
+		pr_warn("%s: Invalid function %d\n",
+			__func__, function);
+		return -EINVAL;
+	}
+
+	/*
+	 * Check if IO or DMA has been enabled before
+	 * enabling them.
+	 */
+	if (active_flag) {
+		rc = eeh_ops->get_state(pe, NULL);
+		if (rc < 0)
+			return rc;
+
+		/* Needn't enable it at all */
+		if (rc == EEH_STATE_NOT_SUPPORT)
+			return 0;
+
+		/* It's already enabled */
+		if (rc & active_flag)
+			return 0;
+	}
+
+
+	/* Issue the request */
+	rc = eeh_ops->set_option(pe, function);
+	if (rc)
+		pr_warn("%s: Unexpected state change %d on "
+			"PHB#%x-PE#%x, err=%d\n",
+			__func__, function, pe->phb->global_number,
+			pe->addr, rc);
+
+	/* Check if the request is finished successfully */
+	if (active_flag) {
+		rc = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
+		if (rc < 0)
+			return rc;
+
+		if (rc & active_flag)
+			return 0;
+
+		return -EIO;
+	}
+
+	return rc;
+}
+
+static void eeh_disable_and_save_dev_state(struct eeh_dev *edev,
+					    void *userdata)
+{
+	struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
+	struct pci_dev *dev = userdata;
+
+	/*
+	 * The caller should have disabled and saved the
+	 * state for the specified device
+	 */
+	if (!pdev || pdev == dev)
+		return;
+
+	/* Ensure we have D0 power state */
+	pci_set_power_state(pdev, PCI_D0);
+
+	/* Save device state */
+	pci_save_state(pdev);
+
+	/*
+	 * Disable device to avoid any DMA traffic and
+	 * interrupt from the device
+	 */
+	pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
+}
+
+static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
+{
+	struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
+	struct pci_dev *dev = userdata;
+
+	if (!pdev)
+		return;
+
+	/* Apply customization from firmware */
+	if (eeh_ops->restore_config)
+		eeh_ops->restore_config(edev);
+
+	/* The caller should restore state for the specified device */
+	if (pdev != dev)
+		pci_restore_state(pdev);
+}
+
+/**
+ * pcibios_set_pcie_reset_state - Set PCI-E reset state
+ * @dev: pci device struct
+ * @state: reset state to enter
+ *
+ * Return value:
+ * 	0 if success
+ */
+int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
+{
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
+	struct eeh_pe *pe = eeh_dev_to_pe(edev);
+
+	if (!pe) {
+		pr_err("%s: No PE found on PCI device %s\n",
+			__func__, pci_name(dev));
+		return -EINVAL;
+	}
+
+	switch (state) {
+	case pcie_deassert_reset:
+		eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
+		eeh_unfreeze_pe(pe);
+		if (!(pe->type & EEH_PE_VF))
+			eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
+		eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev);
+		eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
+		break;
+	case pcie_hot_reset:
+		eeh_pe_mark_isolated(pe);
+		eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
+		eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
+		eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
+		if (!(pe->type & EEH_PE_VF))
+			eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
+		eeh_ops->reset(pe, EEH_RESET_HOT);
+		break;
+	case pcie_warm_reset:
+		eeh_pe_mark_isolated(pe);
+		eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
+		eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
+		eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
+		if (!(pe->type & EEH_PE_VF))
+			eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
+		eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
+		break;
+	default:
+		eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * eeh_set_dev_freset - Check the required reset for the indicated device
+ * @edev: EEH device
+ * @flag: return value
+ *
+ * Each device might have its preferred reset type: fundamental or
+ * hot reset. The routine is used to collected the information for
+ * the indicated device and its children so that the bunch of the
+ * devices could be reset properly.
+ */
+static void eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
+{
+	struct pci_dev *dev;
+	unsigned int *freset = (unsigned int *)flag;
+
+	dev = eeh_dev_to_pci_dev(edev);
+	if (dev)
+		*freset |= dev->needs_freset;
+}
+
+static void eeh_pe_refreeze_passed(struct eeh_pe *root)
+{
+	struct eeh_pe *pe;
+	int state;
+
+	eeh_for_each_pe(root, pe) {
+		if (eeh_pe_passed(pe)) {
+			state = eeh_ops->get_state(pe, NULL);
+			if (state &
+			   (EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED)) {
+				pr_info("EEH: Passed-through PE PHB#%x-PE#%x was thawed by reset, re-freezing for safety.\n",
+					pe->phb->global_number, pe->addr);
+				eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE);
+			}
+		}
+	}
+}
+
+/**
+ * eeh_pe_reset_full - Complete a full reset process on the indicated PE
+ * @pe: EEH PE
+ * @include_passed: include passed-through devices?
+ *
+ * This function executes a full reset procedure on a PE, including setting
+ * the appropriate flags, performing a fundamental or hot reset, and then
+ * deactivating the reset status.  It is designed to be used within the EEH
+ * subsystem, as opposed to eeh_pe_reset which is exported to drivers and
+ * only performs a single operation at a time.
+ *
+ * This function will attempt to reset a PE three times before failing.
+ */
+int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed)
+{
+	int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
+	int type = EEH_RESET_HOT;
+	unsigned int freset = 0;
+	int i, state = 0, ret;
+
+	/*
+	 * Determine the type of reset to perform - hot or fundamental.
+	 * Hot reset is the default operation, unless any device under the
+	 * PE requires a fundamental reset.
+	 */
+	eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
+
+	if (freset)
+		type = EEH_RESET_FUNDAMENTAL;
+
+	/* Mark the PE as in reset state and block config space accesses */
+	eeh_pe_state_mark(pe, reset_state);
+
+	/* Make three attempts at resetting the bus */
+	for (i = 0; i < 3; i++) {
+		ret = eeh_pe_reset(pe, type, include_passed);
+		if (!ret)
+			ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE,
+					   include_passed);
+		if (ret) {
+			ret = -EIO;
+			pr_warn("EEH: Failure %d resetting PHB#%x-PE#%x (attempt %d)\n\n",
+				state, pe->phb->global_number, pe->addr, i + 1);
+			continue;
+		}
+		if (i)
+			pr_warn("EEH: PHB#%x-PE#%x: Successful reset (attempt %d)\n",
+				pe->phb->global_number, pe->addr, i + 1);
+
+		/* Wait until the PE is in a functioning state */
+		state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
+		if (state < 0) {
+			pr_warn("EEH: Unrecoverable slot failure on PHB#%x-PE#%x",
+				pe->phb->global_number, pe->addr);
+			ret = -ENOTRECOVERABLE;
+			break;
+		}
+		if (eeh_state_active(state))
+			break;
+		else
+			pr_warn("EEH: PHB#%x-PE#%x: Slot inactive after reset: 0x%x (attempt %d)\n",
+				pe->phb->global_number, pe->addr, state, i + 1);
+	}
+
+	/* Resetting the PE may have unfrozen child PEs. If those PEs have been
+	 * (potentially) passed through to a guest, re-freeze them:
+	 */
+	if (!include_passed)
+		eeh_pe_refreeze_passed(pe);
+
+	eeh_pe_state_clear(pe, reset_state, true);
+	return ret;
+}
+
+/**
+ * eeh_save_bars - Save device bars
+ * @edev: PCI device associated EEH device
+ *
+ * Save the values of the device bars. Unlike the restore
+ * routine, this routine is *not* recursive. This is because
+ * PCI devices are added individually; but, for the restore,
+ * an entire slot is reset at a time.
+ */
+void eeh_save_bars(struct eeh_dev *edev)
+{
+	int i;
+
+	if (!edev)
+		return;
+
+	for (i = 0; i < 16; i++)
+		eeh_ops->read_config(edev, i * 4, 4, &edev->config_space[i]);
+
+	/*
+	 * For PCI bridges including root port, we need enable bus
+	 * master explicitly. Otherwise, it can't fetch IODA table
+	 * entries correctly. So we cache the bit in advance so that
+	 * we can restore it after reset, either PHB range or PE range.
+	 */
+	if (edev->mode & EEH_DEV_BRIDGE)
+		edev->config_space[1] |= PCI_COMMAND_MASTER;
+}
+
+static int eeh_reboot_notifier(struct notifier_block *nb,
+			       unsigned long action, void *unused)
+{
+	eeh_clear_flag(EEH_ENABLED);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block eeh_reboot_nb = {
+	.notifier_call = eeh_reboot_notifier,
+};
+
+static int eeh_device_notifier(struct notifier_block *nb,
+			       unsigned long action, void *data)
+{
+	struct device *dev = data;
+
+	switch (action) {
+	/*
+	 * Note: It's not possible to perform EEH device addition (i.e.
+	 * {pseries,pnv}_pcibios_bus_add_device()) here because it depends on
+	 * the device's resources, which have not yet been set up.
+	 */
+	case BUS_NOTIFY_DEL_DEVICE:
+		eeh_remove_device(to_pci_dev(dev));
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block eeh_device_nb = {
+	.notifier_call = eeh_device_notifier,
+};
+
+/**
+ * eeh_init - System wide EEH initialization
+ * @ops: struct to trace EEH operation callback functions
+ *
+ * It's the platform's job to call this from an arch_initcall().
+ */
+int eeh_init(struct eeh_ops *ops)
+{
+	struct pci_controller *hose, *tmp;
+	int ret = 0;
+
+	/* the platform should only initialise EEH once */
+	if (WARN_ON(eeh_ops))
+		return -EEXIST;
+	if (WARN_ON(!ops))
+		return -ENOENT;
+	eeh_ops = ops;
+
+	/* Register reboot notifier */
+	ret = register_reboot_notifier(&eeh_reboot_nb);
+	if (ret) {
+		pr_warn("%s: Failed to register reboot notifier (%d)\n",
+			__func__, ret);
+		return ret;
+	}
+
+	ret = bus_register_notifier(&pci_bus_type, &eeh_device_nb);
+	if (ret) {
+		pr_warn("%s: Failed to register bus notifier (%d)\n",
+			__func__, ret);
+		return ret;
+	}
+
+	/* Initialize PHB PEs */
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+		eeh_phb_pe_create(hose);
+
+	eeh_addr_cache_init();
+
+	/* Initialize EEH event */
+	return eeh_event_init();
+}
+
+/**
+ * eeh_probe_device() - Perform EEH initialization for the indicated pci device
+ * @dev: pci device for which to set up EEH
+ *
+ * This routine must be used to complete EEH initialization for PCI
+ * devices that were added after system boot (e.g. hotplug, dlpar).
+ */
+void eeh_probe_device(struct pci_dev *dev)
+{
+	struct eeh_dev *edev;
+
+	pr_debug("EEH: Adding device %s\n", pci_name(dev));
+
+	/*
+	 * pci_dev_to_eeh_dev() can only work if eeh_probe_dev() was
+	 * already called for this device.
+	 */
+	if (WARN_ON_ONCE(pci_dev_to_eeh_dev(dev))) {
+		pci_dbg(dev, "Already bound to an eeh_dev!\n");
+		return;
+	}
+
+	edev = eeh_ops->probe(dev);
+	if (!edev) {
+		pr_debug("EEH: Adding device failed\n");
+		return;
+	}
+
+	/*
+	 * FIXME: We rely on pcibios_release_device() to remove the
+	 * existing EEH state. The release function is only called if
+	 * the pci_dev's refcount drops to zero so if something is
+	 * keeping a ref to a device (e.g. a filesystem) we need to
+	 * remove the old EEH state.
+	 *
+	 * FIXME: HEY MA, LOOK AT ME, NO LOCKING!
+	 */
+	if (edev->pdev && edev->pdev != dev) {
+		eeh_pe_tree_remove(edev);
+		eeh_addr_cache_rmv_dev(edev->pdev);
+		eeh_sysfs_remove_device(edev->pdev);
+
+		/*
+		 * We definitely should have the PCI device removed
+		 * though it wasn't correctly. So we needn't call
+		 * into error handler afterwards.
+		 */
+		edev->mode |= EEH_DEV_NO_HANDLER;
+	}
+
+	/* bind the pdev and the edev together */
+	edev->pdev = dev;
+	dev->dev.archdata.edev = edev;
+	eeh_addr_cache_insert_dev(dev);
+	eeh_sysfs_add_device(dev);
+}
+
+/**
+ * eeh_remove_device - Undo EEH setup for the indicated pci device
+ * @dev: pci device to be removed
+ *
+ * This routine should be called when a device is removed from
+ * a running system (e.g. by hotplug or dlpar).  It unregisters
+ * the PCI device from the EEH subsystem.  I/O errors affecting
+ * this device will no longer be detected after this call; thus,
+ * i/o errors affecting this slot may leave this device unusable.
+ */
+void eeh_remove_device(struct pci_dev *dev)
+{
+	struct eeh_dev *edev;
+
+	if (!dev || !eeh_enabled())
+		return;
+	edev = pci_dev_to_eeh_dev(dev);
+
+	/* Unregister the device with the EEH/PCI address search system */
+	dev_dbg(&dev->dev, "EEH: Removing device\n");
+
+	if (!edev || !edev->pdev || !edev->pe) {
+		dev_dbg(&dev->dev, "EEH: Device not referenced!\n");
+		return;
+	}
+
+	/*
+	 * During the hotplug for EEH error recovery, we need the EEH
+	 * device attached to the parent PE in order for BAR restore
+	 * a bit later. So we keep it for BAR restore and remove it
+	 * from the parent PE during the BAR resotre.
+	 */
+	edev->pdev = NULL;
+
+	/*
+	 * eeh_sysfs_remove_device() uses pci_dev_to_eeh_dev() so we need to
+	 * remove the sysfs files before clearing dev.archdata.edev
+	 */
+	if (edev->mode & EEH_DEV_SYSFS)
+		eeh_sysfs_remove_device(dev);
+
+	/*
+	 * We're removing from the PCI subsystem, that means
+	 * the PCI device driver can't support EEH or not
+	 * well. So we rely on hotplug completely to do recovery
+	 * for the specific PCI device.
+	 */
+	edev->mode |= EEH_DEV_NO_HANDLER;
+
+	eeh_addr_cache_rmv_dev(dev);
+
+	/*
+	 * The flag "in_error" is used to trace EEH devices for VFs
+	 * in error state or not. It's set in eeh_report_error(). If
+	 * it's not set, eeh_report_{reset,resume}() won't be called
+	 * for the VF EEH device.
+	 */
+	edev->in_error = false;
+	dev->dev.archdata.edev = NULL;
+	if (!(edev->pe->state & EEH_PE_KEEP))
+		eeh_pe_tree_remove(edev);
+	else
+		edev->mode |= EEH_DEV_DISCONNECTED;
+}
+
+int eeh_unfreeze_pe(struct eeh_pe *pe)
+{
+	int ret;
+
+	ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+	if (ret) {
+		pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n",
+			__func__, ret, pe->phb->global_number, pe->addr);
+		return ret;
+	}
+
+	ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
+	if (ret) {
+		pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n",
+			__func__, ret, pe->phb->global_number, pe->addr);
+		return ret;
+	}
+
+	return ret;
+}
+
+
+static struct pci_device_id eeh_reset_ids[] = {
+	{ PCI_DEVICE(0x19a2, 0x0710) },	/* Emulex, BE     */
+	{ PCI_DEVICE(0x10df, 0xe220) },	/* Emulex, Lancer */
+	{ PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */
+	{ 0 }
+};
+
+static int eeh_pe_change_owner(struct eeh_pe *pe)
+{
+	struct eeh_dev *edev, *tmp;
+	struct pci_dev *pdev;
+	struct pci_device_id *id;
+	int ret;
+
+	/* Check PE state */
+	ret = eeh_ops->get_state(pe, NULL);
+	if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT)
+		return 0;
+
+	/* Unfrozen PE, nothing to do */
+	if (eeh_state_active(ret))
+		return 0;
+
+	/* Frozen PE, check if it needs PE level reset */
+	eeh_pe_for_each_dev(pe, edev, tmp) {
+		pdev = eeh_dev_to_pci_dev(edev);
+		if (!pdev)
+			continue;
+
+		for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) {
+			if (id->vendor != PCI_ANY_ID &&
+			    id->vendor != pdev->vendor)
+				continue;
+			if (id->device != PCI_ANY_ID &&
+			    id->device != pdev->device)
+				continue;
+			if (id->subvendor != PCI_ANY_ID &&
+			    id->subvendor != pdev->subsystem_vendor)
+				continue;
+			if (id->subdevice != PCI_ANY_ID &&
+			    id->subdevice != pdev->subsystem_device)
+				continue;
+
+			return eeh_pe_reset_and_recover(pe);
+		}
+	}
+
+	ret = eeh_unfreeze_pe(pe);
+	if (!ret)
+		eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
+	return ret;
+}
+
+/**
+ * eeh_dev_open - Increase count of pass through devices for PE
+ * @pdev: PCI device
+ *
+ * Increase count of passed through devices for the indicated
+ * PE. In the result, the EEH errors detected on the PE won't be
+ * reported. The PE owner will be responsible for detection
+ * and recovery.
+ */
+int eeh_dev_open(struct pci_dev *pdev)
+{
+	struct eeh_dev *edev;
+	int ret = -ENODEV;
+
+	mutex_lock(&eeh_dev_mutex);
+
+	/* No PCI device ? */
+	if (!pdev)
+		goto out;
+
+	/* No EEH device or PE ? */
+	edev = pci_dev_to_eeh_dev(pdev);
+	if (!edev || !edev->pe)
+		goto out;
+
+	/*
+	 * The PE might have been put into frozen state, but we
+	 * didn't detect that yet. The passed through PCI devices
+	 * in frozen PE won't work properly. Clear the frozen state
+	 * in advance.
+	 */
+	ret = eeh_pe_change_owner(edev->pe);
+	if (ret)
+		goto out;
+
+	/* Increase PE's pass through count */
+	atomic_inc(&edev->pe->pass_dev_cnt);
+	mutex_unlock(&eeh_dev_mutex);
+
+	return 0;
+out:
+	mutex_unlock(&eeh_dev_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(eeh_dev_open);
+
+/**
+ * eeh_dev_release - Decrease count of pass through devices for PE
+ * @pdev: PCI device
+ *
+ * Decrease count of pass through devices for the indicated PE. If
+ * there is no passed through device in PE, the EEH errors detected
+ * on the PE will be reported and handled as usual.
+ */
+void eeh_dev_release(struct pci_dev *pdev)
+{
+	struct eeh_dev *edev;
+
+	mutex_lock(&eeh_dev_mutex);
+
+	/* No PCI device ? */
+	if (!pdev)
+		goto out;
+
+	/* No EEH device ? */
+	edev = pci_dev_to_eeh_dev(pdev);
+	if (!edev || !edev->pe || !eeh_pe_passed(edev->pe))
+		goto out;
+
+	/* Decrease PE's pass through count */
+	WARN_ON(atomic_dec_if_positive(&edev->pe->pass_dev_cnt) < 0);
+	eeh_pe_change_owner(edev->pe);
+out:
+	mutex_unlock(&eeh_dev_mutex);
+}
+EXPORT_SYMBOL(eeh_dev_release);
+
+#ifdef CONFIG_IOMMU_API
+
+static int dev_has_iommu_table(struct device *dev, void *data)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_dev **ppdev = data;
+
+	if (!dev)
+		return 0;
+
+	if (device_iommu_mapped(dev)) {
+		*ppdev = pdev;
+		return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE
+ * @group: IOMMU group
+ *
+ * The routine is called to convert IOMMU group to EEH PE.
+ */
+struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group)
+{
+	struct pci_dev *pdev = NULL;
+	struct eeh_dev *edev;
+	int ret;
+
+	/* No IOMMU group ? */
+	if (!group)
+		return NULL;
+
+	ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table);
+	if (!ret || !pdev)
+		return NULL;
+
+	/* No EEH device or PE ? */
+	edev = pci_dev_to_eeh_dev(pdev);
+	if (!edev || !edev->pe)
+		return NULL;
+
+	return edev->pe;
+}
+EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe);
+
+#endif /* CONFIG_IOMMU_API */
+
+/**
+ * eeh_pe_set_option - Set options for the indicated PE
+ * @pe: EEH PE
+ * @option: requested option
+ *
+ * The routine is called to enable or disable EEH functionality
+ * on the indicated PE, to enable IO or DMA for the frozen PE.
+ */
+int eeh_pe_set_option(struct eeh_pe *pe, int option)
+{
+	int ret = 0;
+
+	/* Invalid PE ? */
+	if (!pe)
+		return -ENODEV;
+
+	/*
+	 * EEH functionality could possibly be disabled, just
+	 * return error for the case. And the EEH functionality
+	 * isn't expected to be disabled on one specific PE.
+	 */
+	switch (option) {
+	case EEH_OPT_ENABLE:
+		if (eeh_enabled()) {
+			ret = eeh_pe_change_owner(pe);
+			break;
+		}
+		ret = -EIO;
+		break;
+	case EEH_OPT_DISABLE:
+		break;
+	case EEH_OPT_THAW_MMIO:
+	case EEH_OPT_THAW_DMA:
+	case EEH_OPT_FREEZE_PE:
+		if (!eeh_ops || !eeh_ops->set_option) {
+			ret = -ENOENT;
+			break;
+		}
+
+		ret = eeh_pci_enable(pe, option);
+		break;
+	default:
+		pr_debug("%s: Option %d out of range (%d, %d)\n",
+			__func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(eeh_pe_set_option);
+
+/**
+ * eeh_pe_get_state - Retrieve PE's state
+ * @pe: EEH PE
+ *
+ * Retrieve the PE's state, which includes 3 aspects: enabled
+ * DMA, enabled IO and asserted reset.
+ */
+int eeh_pe_get_state(struct eeh_pe *pe)
+{
+	int result, ret = 0;
+	bool rst_active, dma_en, mmio_en;
+
+	/* Existing PE ? */
+	if (!pe)
+		return -ENODEV;
+
+	if (!eeh_ops || !eeh_ops->get_state)
+		return -ENOENT;
+
+	/*
+	 * If the parent PE is owned by the host kernel and is undergoing
+	 * error recovery, we should return the PE state as temporarily
+	 * unavailable so that the error recovery on the guest is suspended
+	 * until the recovery completes on the host.
+	 */
+	if (pe->parent &&
+	    !(pe->state & EEH_PE_REMOVED) &&
+	    (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING)))
+		return EEH_PE_STATE_UNAVAIL;
+
+	result = eeh_ops->get_state(pe, NULL);
+	rst_active = !!(result & EEH_STATE_RESET_ACTIVE);
+	dma_en = !!(result & EEH_STATE_DMA_ENABLED);
+	mmio_en = !!(result & EEH_STATE_MMIO_ENABLED);
+
+	if (rst_active)
+		ret = EEH_PE_STATE_RESET;
+	else if (dma_en && mmio_en)
+		ret = EEH_PE_STATE_NORMAL;
+	else if (!dma_en && !mmio_en)
+		ret = EEH_PE_STATE_STOPPED_IO_DMA;
+	else if (!dma_en && mmio_en)
+		ret = EEH_PE_STATE_STOPPED_DMA;
+	else
+		ret = EEH_PE_STATE_UNAVAIL;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(eeh_pe_get_state);
+
+static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed)
+{
+	struct eeh_dev *edev, *tmp;
+	struct pci_dev *pdev;
+	int ret = 0;
+
+	eeh_pe_restore_bars(pe);
+
+	/*
+	 * Reenable PCI devices as the devices passed
+	 * through are always enabled before the reset.
+	 */
+	eeh_pe_for_each_dev(pe, edev, tmp) {
+		pdev = eeh_dev_to_pci_dev(edev);
+		if (!pdev)
+			continue;
+
+		ret = pci_reenable_device(pdev);
+		if (ret) {
+			pr_warn("%s: Failure %d reenabling %s\n",
+				__func__, ret, pci_name(pdev));
+			return ret;
+		}
+	}
+
+	/* The PE is still in frozen state */
+	if (include_passed || !eeh_pe_passed(pe)) {
+		ret = eeh_unfreeze_pe(pe);
+	} else
+		pr_info("EEH: Note: Leaving passthrough PHB#%x-PE#%x frozen.\n",
+			pe->phb->global_number, pe->addr);
+	if (!ret)
+		eeh_pe_state_clear(pe, EEH_PE_ISOLATED, include_passed);
+	return ret;
+}
+
+
+/**
+ * eeh_pe_reset - Issue PE reset according to specified type
+ * @pe: EEH PE
+ * @option: reset type
+ * @include_passed: include passed-through devices?
+ *
+ * The routine is called to reset the specified PE with the
+ * indicated type, either fundamental reset or hot reset.
+ * PE reset is the most important part for error recovery.
+ */
+int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed)
+{
+	int ret = 0;
+
+	/* Invalid PE ? */
+	if (!pe)
+		return -ENODEV;
+
+	if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset)
+		return -ENOENT;
+
+	switch (option) {
+	case EEH_RESET_DEACTIVATE:
+		ret = eeh_ops->reset(pe, option);
+		eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, include_passed);
+		if (ret)
+			break;
+
+		ret = eeh_pe_reenable_devices(pe, include_passed);
+		break;
+	case EEH_RESET_HOT:
+	case EEH_RESET_FUNDAMENTAL:
+		/*
+		 * Proactively freeze the PE to drop all MMIO access
+		 * during reset, which should be banned as it's always
+		 * cause recursive EEH error.
+		 */
+		eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
+
+		eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
+		ret = eeh_ops->reset(pe, option);
+		break;
+	default:
+		pr_debug("%s: Unsupported option %d\n",
+			__func__, option);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(eeh_pe_reset);
+
+/**
+ * eeh_pe_configure - Configure PCI bridges after PE reset
+ * @pe: EEH PE
+ *
+ * The routine is called to restore the PCI config space for
+ * those PCI devices, especially PCI bridges affected by PE
+ * reset issued previously.
+ */
+int eeh_pe_configure(struct eeh_pe *pe)
+{
+	int ret = 0;
+
+	/* Invalid PE ? */
+	if (!pe)
+		return -ENODEV;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(eeh_pe_configure);
+
+/**
+ * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE
+ * @pe: the indicated PE
+ * @type: error type
+ * @func: error function
+ * @addr: address
+ * @mask: address mask
+ *
+ * The routine is called to inject the specified PCI error, which
+ * is determined by @type and @func, to the indicated PE for
+ * testing purpose.
+ */
+int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
+		      unsigned long addr, unsigned long mask)
+{
+	/* Invalid PE ? */
+	if (!pe)
+		return -ENODEV;
+
+	/* Unsupported operation ? */
+	if (!eeh_ops || !eeh_ops->err_inject)
+		return -ENOENT;
+
+	/* Check on PCI error type */
+	if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
+		return -EINVAL;
+
+	/* Check on PCI error function */
+	if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX)
+		return -EINVAL;
+
+	return eeh_ops->err_inject(pe, type, func, addr, mask);
+}
+EXPORT_SYMBOL_GPL(eeh_pe_inject_err);
+
+#ifdef CONFIG_PROC_FS
+static int proc_eeh_show(struct seq_file *m, void *v)
+{
+	if (!eeh_enabled()) {
+		seq_printf(m, "EEH Subsystem is globally disabled\n");
+		seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
+	} else {
+		seq_printf(m, "EEH Subsystem is enabled\n");
+		seq_printf(m,
+				"no device=%llu\n"
+				"no device node=%llu\n"
+				"no config address=%llu\n"
+				"check not wanted=%llu\n"
+				"eeh_total_mmio_ffs=%llu\n"
+				"eeh_false_positives=%llu\n"
+				"eeh_slot_resets=%llu\n",
+				eeh_stats.no_device,
+				eeh_stats.no_dn,
+				eeh_stats.no_cfg_addr,
+				eeh_stats.ignored_check,
+				eeh_stats.total_mmio_ffs,
+				eeh_stats.false_positives,
+				eeh_stats.slot_resets);
+	}
+
+	return 0;
+}
+#endif /* CONFIG_PROC_FS */
+
+#ifdef CONFIG_DEBUG_FS
+
+
+static struct pci_dev *eeh_debug_lookup_pdev(struct file *filp,
+					     const char __user *user_buf,
+					     size_t count, loff_t *ppos)
+{
+	uint32_t domain, bus, dev, fn;
+	struct pci_dev *pdev;
+	char buf[20];
+	int ret;
+
+	memset(buf, 0, sizeof(buf));
+	ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
+	if (!ret)
+		return ERR_PTR(-EFAULT);
+
+	ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
+	if (ret != 4) {
+		pr_err("%s: expected 4 args, got %d\n", __func__, ret);
+		return ERR_PTR(-EINVAL);
+	}
+
+	pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
+	if (!pdev)
+		return ERR_PTR(-ENODEV);
+
+	return pdev;
+}
+
+static int eeh_enable_dbgfs_set(void *data, u64 val)
+{
+	if (val)
+		eeh_clear_flag(EEH_FORCE_DISABLED);
+	else
+		eeh_add_flag(EEH_FORCE_DISABLED);
+
+	return 0;
+}
+
+static int eeh_enable_dbgfs_get(void *data, u64 *val)
+{
+	if (eeh_enabled())
+		*val = 0x1ul;
+	else
+		*val = 0x0ul;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
+			 eeh_enable_dbgfs_set, "0x%llx\n");
+
+static ssize_t eeh_force_recover_write(struct file *filp,
+				const char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct pci_controller *hose;
+	uint32_t phbid, pe_no;
+	struct eeh_pe *pe;
+	char buf[20];
+	int ret;
+
+	ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
+	if (!ret)
+		return -EFAULT;
+
+	/*
+	 * When PE is NULL the event is a "special" event. Rather than
+	 * recovering a specific PE it forces the EEH core to scan for failed
+	 * PHBs and recovers each. This needs to be done before any device
+	 * recoveries can occur.
+	 */
+	if (!strncmp(buf, "hwcheck", 7)) {
+		__eeh_send_failure_event(NULL);
+		return count;
+	}
+
+	ret = sscanf(buf, "%x:%x", &phbid, &pe_no);
+	if (ret != 2)
+		return -EINVAL;
+
+	hose = pci_find_controller_for_domain(phbid);
+	if (!hose)
+		return -ENODEV;
+
+	/* Retrieve PE */
+	pe = eeh_pe_get(hose, pe_no);
+	if (!pe)
+		return -ENODEV;
+
+	/*
+	 * We don't do any state checking here since the detection
+	 * process is async to the recovery process. The recovery
+	 * thread *should* not break even if we schedule a recovery
+	 * from an odd state (e.g. PE removed, or recovery of a
+	 * non-isolated PE)
+	 */
+	__eeh_send_failure_event(pe);
+
+	return ret < 0 ? ret : count;
+}
+
+static const struct file_operations eeh_force_recover_fops = {
+	.open	= simple_open,
+	.llseek	= no_llseek,
+	.write	= eeh_force_recover_write,
+};
+
+static ssize_t eeh_debugfs_dev_usage(struct file *filp,
+				char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	static const char usage[] = "input format: <domain>:<bus>:<dev>.<fn>\n";
+
+	return simple_read_from_buffer(user_buf, count, ppos,
+				       usage, sizeof(usage) - 1);
+}
+
+static ssize_t eeh_dev_check_write(struct file *filp,
+				const char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct pci_dev *pdev;
+	struct eeh_dev *edev;
+	int ret;
+
+	pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos);
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	edev = pci_dev_to_eeh_dev(pdev);
+	if (!edev) {
+		pci_err(pdev, "No eeh_dev for this device!\n");
+		pci_dev_put(pdev);
+		return -ENODEV;
+	}
+
+	ret = eeh_dev_check_failure(edev);
+	pci_info(pdev, "eeh_dev_check_failure(%s) = %d\n",
+			pci_name(pdev), ret);
+
+	pci_dev_put(pdev);
+
+	return count;
+}
+
+static const struct file_operations eeh_dev_check_fops = {
+	.open	= simple_open,
+	.llseek	= no_llseek,
+	.write	= eeh_dev_check_write,
+	.read   = eeh_debugfs_dev_usage,
+};
+
+static int eeh_debugfs_break_device(struct pci_dev *pdev)
+{
+	struct resource *bar = NULL;
+	void __iomem *mapped;
+	u16 old, bit;
+	int i, pos;
+
+	/* Do we have an MMIO BAR to disable? */
+	for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
+		struct resource *r = &pdev->resource[i];
+
+		if (!r->flags || !r->start)
+			continue;
+		if (r->flags & IORESOURCE_IO)
+			continue;
+		if (r->flags & IORESOURCE_UNSET)
+			continue;
+
+		bar = r;
+		break;
+	}
+
+	if (!bar) {
+		pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
+		return -ENXIO;
+	}
+
+	pci_err(pdev, "Going to break: %pR\n", bar);
+
+	if (pdev->is_virtfn) {
+#ifndef CONFIG_PCI_IOV
+		return -ENXIO;
+#else
+		/*
+		 * VFs don't have a per-function COMMAND register, so the best
+		 * we can do is clear the Memory Space Enable bit in the PF's
+		 * SRIOV control reg.
+		 *
+		 * Unfortunately, this requires that we have a PF (i.e doesn't
+		 * work for a passed-through VF) and it has the potential side
+		 * effect of also causing an EEH on every other VF under the
+		 * PF. Oh well.
+		 */
+		pdev = pdev->physfn;
+		if (!pdev)
+			return -ENXIO; /* passed through VFs have no PF */
+
+		pos  = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+		pos += PCI_SRIOV_CTRL;
+		bit  = PCI_SRIOV_CTRL_MSE;
+#endif /* !CONFIG_PCI_IOV */
+	} else {
+		bit = PCI_COMMAND_MEMORY;
+		pos = PCI_COMMAND;
+	}
+
+	/*
+	 * Process here is:
+	 *
+	 * 1. Disable Memory space.
+	 *
+	 * 2. Perform an MMIO to the device. This should result in an error
+	 *    (CA  / UR) being raised by the device which results in an EEH
+	 *    PE freeze. Using the in_8() accessor skips the eeh detection hook
+	 *    so the freeze hook so the EEH Detection machinery won't be
+	 *    triggered here. This is to match the usual behaviour of EEH
+	 *    where the HW will asynchronously freeze a PE and it's up to
+	 *    the kernel to notice and deal with it.
+	 *
+	 * 3. Turn Memory space back on. This is more important for VFs
+	 *    since recovery will probably fail if we don't. For normal
+	 *    the COMMAND register is reset as a part of re-initialising
+	 *    the device.
+	 *
+	 * Breaking stuff is the point so who cares if it's racy ;)
+	 */
+	pci_read_config_word(pdev, pos, &old);
+
+	mapped = ioremap(bar->start, PAGE_SIZE);
+	if (!mapped) {
+		pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
+		return -ENXIO;
+	}
+
+	pci_write_config_word(pdev, pos, old & ~bit);
+	in_8(mapped);
+	pci_write_config_word(pdev, pos, old);
+
+	iounmap(mapped);
+
+	return 0;
+}
+
+static ssize_t eeh_dev_break_write(struct file *filp,
+				const char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct pci_dev *pdev;
+	int ret;
+
+	pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos);
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	ret = eeh_debugfs_break_device(pdev);
+	pci_dev_put(pdev);
+
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static const struct file_operations eeh_dev_break_fops = {
+	.open	= simple_open,
+	.llseek	= no_llseek,
+	.write	= eeh_dev_break_write,
+	.read   = eeh_debugfs_dev_usage,
+};
+
+static ssize_t eeh_dev_can_recover(struct file *filp,
+				   const char __user *user_buf,
+				   size_t count, loff_t *ppos)
+{
+	struct pci_driver *drv;
+	struct pci_dev *pdev;
+	size_t ret;
+
+	pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos);
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	/*
+	 * In order for error recovery to work the driver needs to implement
+	 * .error_detected(), so it can quiesce IO to the device, and
+	 * .slot_reset() so it can re-initialise the device after a reset.
+	 *
+	 * Ideally they'd implement .resume() too, but some drivers which
+	 * we need to support (notably IPR) don't so I guess we can tolerate
+	 * that.
+	 *
+	 * .mmio_enabled() is mostly there as a work-around for devices which
+	 * take forever to re-init after a hot reset. Implementing that is
+	 * strictly optional.
+	 */
+	drv = pci_dev_driver(pdev);
+	if (drv &&
+	    drv->err_handler &&
+	    drv->err_handler->error_detected &&
+	    drv->err_handler->slot_reset) {
+		ret = count;
+	} else {
+		ret = -EOPNOTSUPP;
+	}
+
+	pci_dev_put(pdev);
+
+	return ret;
+}
+
+static const struct file_operations eeh_dev_can_recover_fops = {
+	.open	= simple_open,
+	.llseek	= no_llseek,
+	.write	= eeh_dev_can_recover,
+	.read   = eeh_debugfs_dev_usage,
+};
+
+#endif
+
+static int __init eeh_init_proc(void)
+{
+	if (machine_is(pseries) || machine_is(powernv)) {
+		proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show);
+#ifdef CONFIG_DEBUG_FS
+		debugfs_create_file_unsafe("eeh_enable", 0600,
+					   arch_debugfs_dir, NULL,
+					   &eeh_enable_dbgfs_ops);
+		debugfs_create_u32("eeh_max_freezes", 0600,
+				arch_debugfs_dir, &eeh_max_freezes);
+		debugfs_create_bool("eeh_disable_recovery", 0600,
+				arch_debugfs_dir,
+				&eeh_debugfs_no_recover);
+		debugfs_create_file_unsafe("eeh_dev_check", 0600,
+				arch_debugfs_dir, NULL,
+				&eeh_dev_check_fops);
+		debugfs_create_file_unsafe("eeh_dev_break", 0600,
+				arch_debugfs_dir, NULL,
+				&eeh_dev_break_fops);
+		debugfs_create_file_unsafe("eeh_force_recover", 0600,
+				arch_debugfs_dir, NULL,
+				&eeh_force_recover_fops);
+		debugfs_create_file_unsafe("eeh_dev_can_recover", 0600,
+				arch_debugfs_dir, NULL,
+				&eeh_dev_can_recover_fops);
+		eeh_cache_debugfs_init();
+#endif
+	}
+
+	return 0;
+}
+__initcall(eeh_init_proc);
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
new file mode 100644
index 0000000000..2f9dbf8ad2
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PCI address cache; allows the lookup of PCI devices based on I/O address
+ *
+ * Copyright IBM Corporation 2004
+ * Copyright Linas Vepstas <linas@austin.ibm.com> 2004
+ */
+
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/debugfs.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+
+/**
+ * DOC: Overview
+ *
+ * The pci address cache subsystem.  This subsystem places
+ * PCI device address resources into a red-black tree, sorted
+ * according to the address range, so that given only an i/o
+ * address, the corresponding PCI device can be **quickly**
+ * found. It is safe to perform an address lookup in an interrupt
+ * context; this ability is an important feature.
+ *
+ * Currently, the only customer of this code is the EEH subsystem;
+ * thus, this code has been somewhat tailored to suit EEH better.
+ * In particular, the cache does *not* hold the addresses of devices
+ * for which EEH is not enabled.
+ *
+ * (Implementation Note: The RB tree seems to be better/faster
+ * than any hash algo I could think of for this problem, even
+ * with the penalty of slow pointer chases for d-cache misses).
+ */
+
+struct pci_io_addr_range {
+	struct rb_node rb_node;
+	resource_size_t addr_lo;
+	resource_size_t addr_hi;
+	struct eeh_dev *edev;
+	struct pci_dev *pcidev;
+	unsigned long flags;
+};
+
+static struct pci_io_addr_cache {
+	struct rb_root rb_root;
+	spinlock_t piar_lock;
+} pci_io_addr_cache_root;
+
+static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr)
+{
+	struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
+
+	while (n) {
+		struct pci_io_addr_range *piar;
+		piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+
+		if (addr < piar->addr_lo)
+			n = n->rb_left;
+		else if (addr > piar->addr_hi)
+			n = n->rb_right;
+		else
+			return piar->edev;
+	}
+
+	return NULL;
+}
+
+/**
+ * eeh_addr_cache_get_dev - Get device, given only address
+ * @addr: mmio (PIO) phys address or i/o port number
+ *
+ * Given an mmio phys address, or a port number, find a pci device
+ * that implements this address.  I/O port numbers are assumed to be offset
+ * from zero (that is, they do *not* have pci_io_addr added in).
+ * It is safe to call this function within an interrupt.
+ */
+struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr)
+{
+	struct eeh_dev *edev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+	edev = __eeh_addr_cache_get_device(addr);
+	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+	return edev;
+}
+
+#ifdef DEBUG
+/*
+ * Handy-dandy debug print routine, does nothing more
+ * than print out the contents of our addr cache.
+ */
+static void eeh_addr_cache_print(struct pci_io_addr_cache *cache)
+{
+	struct rb_node *n;
+	int cnt = 0;
+
+	n = rb_first(&cache->rb_root);
+	while (n) {
+		struct pci_io_addr_range *piar;
+		piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+		pr_info("PCI: %s addr range %d [%pap-%pap]: %s\n",
+		       (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
+		       &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev));
+		cnt++;
+		n = rb_next(n);
+	}
+}
+#endif
+
+/* Insert address range into the rb tree. */
+static struct pci_io_addr_range *
+eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo,
+		      resource_size_t ahi, unsigned long flags)
+{
+	struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct pci_io_addr_range *piar;
+
+	/* Walk tree, find a place to insert into tree */
+	while (*p) {
+		parent = *p;
+		piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
+		if (ahi < piar->addr_lo) {
+			p = &parent->rb_left;
+		} else if (alo > piar->addr_hi) {
+			p = &parent->rb_right;
+		} else {
+			if (dev != piar->pcidev ||
+			    alo != piar->addr_lo || ahi != piar->addr_hi) {
+				pr_warn("PIAR: overlapping address range\n");
+			}
+			return piar;
+		}
+	}
+	piar = kzalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
+	if (!piar)
+		return NULL;
+
+	piar->addr_lo = alo;
+	piar->addr_hi = ahi;
+	piar->edev = pci_dev_to_eeh_dev(dev);
+	piar->pcidev = dev;
+	piar->flags = flags;
+
+	eeh_edev_dbg(piar->edev, "PIAR: insert range=[%pap:%pap]\n",
+		 &alo, &ahi);
+
+	rb_link_node(&piar->rb_node, parent, p);
+	rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
+
+	return piar;
+}
+
+static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
+{
+	struct eeh_dev *edev;
+	int i;
+
+	edev = pci_dev_to_eeh_dev(dev);
+	if (!edev) {
+		pr_warn("PCI: no EEH dev found for %s\n",
+			pci_name(dev));
+		return;
+	}
+
+	/* Skip any devices for which EEH is not enabled. */
+	if (!edev->pe) {
+		dev_dbg(&dev->dev, "EEH: Skip building address cache\n");
+		return;
+	}
+
+	/*
+	 * Walk resources on this device, poke the first 7 (6 normal BAR and 1
+	 * ROM BAR) into the tree.
+	 */
+	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+		resource_size_t start = pci_resource_start(dev,i);
+		resource_size_t end = pci_resource_end(dev,i);
+		unsigned long flags = pci_resource_flags(dev,i);
+
+		/* We are interested only bus addresses, not dma or other stuff */
+		if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
+			continue;
+		if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
+			 continue;
+		eeh_addr_cache_insert(dev, start, end, flags);
+	}
+}
+
+/**
+ * eeh_addr_cache_insert_dev - Add a device to the address cache
+ * @dev: PCI device whose I/O addresses we are interested in.
+ *
+ * In order to support the fast lookup of devices based on addresses,
+ * we maintain a cache of devices that can be quickly searched.
+ * This routine adds a device to that cache.
+ */
+void eeh_addr_cache_insert_dev(struct pci_dev *dev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+	__eeh_addr_cache_insert_dev(dev);
+	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+}
+
+static inline void __eeh_addr_cache_rmv_dev(struct pci_dev *dev)
+{
+	struct rb_node *n;
+
+restart:
+	n = rb_first(&pci_io_addr_cache_root.rb_root);
+	while (n) {
+		struct pci_io_addr_range *piar;
+		piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+
+		if (piar->pcidev == dev) {
+			eeh_edev_dbg(piar->edev, "PIAR: remove range=[%pap:%pap]\n",
+				 &piar->addr_lo, &piar->addr_hi);
+			rb_erase(n, &pci_io_addr_cache_root.rb_root);
+			kfree(piar);
+			goto restart;
+		}
+		n = rb_next(n);
+	}
+}
+
+/**
+ * eeh_addr_cache_rmv_dev - remove pci device from addr cache
+ * @dev: device to remove
+ *
+ * Remove a device from the addr-cache tree.
+ * This is potentially expensive, since it will walk
+ * the tree multiple times (once per resource).
+ * But so what; device removal doesn't need to be that fast.
+ */
+void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+	__eeh_addr_cache_rmv_dev(dev);
+	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+}
+
+/**
+ * eeh_addr_cache_init - Initialize a cache of I/O addresses
+ *
+ * Initialize a cache of pci i/o addresses.  This cache will be used to
+ * find the pci device that corresponds to a given address.
+ */
+void eeh_addr_cache_init(void)
+{
+	spin_lock_init(&pci_io_addr_cache_root.piar_lock);
+}
+
+static int eeh_addr_cache_show(struct seq_file *s, void *v)
+{
+	struct pci_io_addr_range *piar;
+	struct rb_node *n;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+	for (n = rb_first(&pci_io_addr_cache_root.rb_root); n; n = rb_next(n)) {
+		piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+
+		seq_printf(s, "%s addr range [%pap-%pap]: %s\n",
+		       (piar->flags & IORESOURCE_IO) ? "i/o" : "mem",
+		       &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev));
+	}
+	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache);
+
+void __init eeh_cache_debugfs_init(void)
+{
+	debugfs_create_file_unsafe("eeh_address_cache", 0400,
+			arch_debugfs_dir, NULL,
+			&eeh_addr_cache_fops);
+}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
new file mode 100644
index 0000000000..438568a472
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -0,0 +1,1222 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PCI Error Recovery Driver for RPA-compliant PPC64 platform.
+ * Copyright IBM Corp. 2004 2005
+ * Copyright Linas Vepstas <linas@linas.org> 2004, 2005
+ *
+ * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
+ */
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/ppc-pci.h>
+#include <asm/pci-bridge.h>
+#include <asm/rtas.h>
+
+struct eeh_rmv_data {
+	struct list_head removed_vf_list;
+	int removed_dev_count;
+};
+
+static int eeh_result_priority(enum pci_ers_result result)
+{
+	switch (result) {
+	case PCI_ERS_RESULT_NONE:
+		return 1;
+	case PCI_ERS_RESULT_NO_AER_DRIVER:
+		return 2;
+	case PCI_ERS_RESULT_RECOVERED:
+		return 3;
+	case PCI_ERS_RESULT_CAN_RECOVER:
+		return 4;
+	case PCI_ERS_RESULT_DISCONNECT:
+		return 5;
+	case PCI_ERS_RESULT_NEED_RESET:
+		return 6;
+	default:
+		WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", (int)result);
+		return 0;
+	}
+};
+
+static const char *pci_ers_result_name(enum pci_ers_result result)
+{
+	switch (result) {
+	case PCI_ERS_RESULT_NONE:
+		return "none";
+	case PCI_ERS_RESULT_CAN_RECOVER:
+		return "can recover";
+	case PCI_ERS_RESULT_NEED_RESET:
+		return "need reset";
+	case PCI_ERS_RESULT_DISCONNECT:
+		return "disconnect";
+	case PCI_ERS_RESULT_RECOVERED:
+		return "recovered";
+	case PCI_ERS_RESULT_NO_AER_DRIVER:
+		return "no AER driver";
+	default:
+		WARN_ONCE(1, "Unknown result type: %d\n", (int)result);
+		return "unknown";
+	}
+};
+
+static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
+						enum pci_ers_result new)
+{
+	if (eeh_result_priority(new) > eeh_result_priority(old))
+		return new;
+	return old;
+}
+
+static bool eeh_dev_removed(struct eeh_dev *edev)
+{
+	return !edev || (edev->mode & EEH_DEV_REMOVED);
+}
+
+static bool eeh_edev_actionable(struct eeh_dev *edev)
+{
+	if (!edev->pdev)
+		return false;
+	if (edev->pdev->error_state == pci_channel_io_perm_failure)
+		return false;
+	if (eeh_dev_removed(edev))
+		return false;
+	if (eeh_pe_passed(edev->pe))
+		return false;
+
+	return true;
+}
+
+/**
+ * eeh_pcid_get - Get the PCI device driver
+ * @pdev: PCI device
+ *
+ * The function is used to retrieve the PCI device driver for
+ * the indicated PCI device. Besides, we will increase the reference
+ * of the PCI device driver to prevent that being unloaded on
+ * the fly. Otherwise, kernel crash would be seen.
+ */
+static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
+{
+	if (!pdev || !pdev->dev.driver)
+		return NULL;
+
+	if (!try_module_get(pdev->dev.driver->owner))
+		return NULL;
+
+	return to_pci_driver(pdev->dev.driver);
+}
+
+/**
+ * eeh_pcid_put - Dereference on the PCI device driver
+ * @pdev: PCI device
+ *
+ * The function is called to do dereference on the PCI device
+ * driver of the indicated PCI device.
+ */
+static inline void eeh_pcid_put(struct pci_dev *pdev)
+{
+	if (!pdev || !pdev->dev.driver)
+		return;
+
+	module_put(pdev->dev.driver->owner);
+}
+
+/**
+ * eeh_disable_irq - Disable interrupt for the recovering device
+ * @dev: PCI device
+ *
+ * This routine must be called when reporting temporary or permanent
+ * error to the particular PCI device to disable interrupt of that
+ * device. If the device has enabled MSI or MSI-X interrupt, we needn't
+ * do real work because EEH should freeze DMA transfers for those PCI
+ * devices encountering EEH errors, which includes MSI or MSI-X.
+ */
+static void eeh_disable_irq(struct eeh_dev *edev)
+{
+	/* Don't disable MSI and MSI-X interrupts. They are
+	 * effectively disabled by the DMA Stopped state
+	 * when an EEH error occurs.
+	 */
+	if (edev->pdev->msi_enabled || edev->pdev->msix_enabled)
+		return;
+
+	if (!irq_has_action(edev->pdev->irq))
+		return;
+
+	edev->mode |= EEH_DEV_IRQ_DISABLED;
+	disable_irq_nosync(edev->pdev->irq);
+}
+
+/**
+ * eeh_enable_irq - Enable interrupt for the recovering device
+ * @dev: PCI device
+ *
+ * This routine must be called to enable interrupt while failed
+ * device could be resumed.
+ */
+static void eeh_enable_irq(struct eeh_dev *edev)
+{
+	if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
+		edev->mode &= ~EEH_DEV_IRQ_DISABLED;
+		/*
+		 * FIXME !!!!!
+		 *
+		 * This is just ass backwards. This maze has
+		 * unbalanced irq_enable/disable calls. So instead of
+		 * finding the root cause it works around the warning
+		 * in the irq_enable code by conditionally calling
+		 * into it.
+		 *
+		 * That's just wrong.The warning in the core code is
+		 * there to tell people to fix their asymmetries in
+		 * their own code, not by abusing the core information
+		 * to avoid it.
+		 *
+		 * I so wish that the assymetry would be the other way
+		 * round and a few more irq_disable calls render that
+		 * shit unusable forever.
+		 *
+		 *	tglx
+		 */
+		if (irqd_irq_disabled(irq_get_irq_data(edev->pdev->irq)))
+			enable_irq(edev->pdev->irq);
+	}
+}
+
+static void eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
+{
+	struct pci_dev *pdev;
+
+	if (!edev)
+		return;
+
+	/*
+	 * We cannot access the config space on some adapters.
+	 * Otherwise, it will cause fenced PHB. We don't save
+	 * the content in their config space and will restore
+	 * from the initial config space saved when the EEH
+	 * device is created.
+	 */
+	if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED))
+		return;
+
+	pdev = eeh_dev_to_pci_dev(edev);
+	if (!pdev)
+		return;
+
+	pci_save_state(pdev);
+}
+
+static void eeh_set_channel_state(struct eeh_pe *root, pci_channel_state_t s)
+{
+	struct eeh_pe *pe;
+	struct eeh_dev *edev, *tmp;
+
+	eeh_for_each_pe(root, pe)
+		eeh_pe_for_each_dev(pe, edev, tmp)
+			if (eeh_edev_actionable(edev))
+				edev->pdev->error_state = s;
+}
+
+static void eeh_set_irq_state(struct eeh_pe *root, bool enable)
+{
+	struct eeh_pe *pe;
+	struct eeh_dev *edev, *tmp;
+
+	eeh_for_each_pe(root, pe) {
+		eeh_pe_for_each_dev(pe, edev, tmp) {
+			if (!eeh_edev_actionable(edev))
+				continue;
+
+			if (!eeh_pcid_get(edev->pdev))
+				continue;
+
+			if (enable)
+				eeh_enable_irq(edev);
+			else
+				eeh_disable_irq(edev);
+
+			eeh_pcid_put(edev->pdev);
+		}
+	}
+}
+
+typedef enum pci_ers_result (*eeh_report_fn)(struct eeh_dev *,
+					     struct pci_dev *,
+					     struct pci_driver *);
+static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
+			       enum pci_ers_result *result)
+{
+	struct pci_dev *pdev;
+	struct pci_driver *driver;
+	enum pci_ers_result new_result;
+
+	pci_lock_rescan_remove();
+	pdev = edev->pdev;
+	if (pdev)
+		get_device(&pdev->dev);
+	pci_unlock_rescan_remove();
+	if (!pdev) {
+		eeh_edev_info(edev, "no device");
+		return;
+	}
+	device_lock(&pdev->dev);
+	if (eeh_edev_actionable(edev)) {
+		driver = eeh_pcid_get(pdev);
+
+		if (!driver)
+			eeh_edev_info(edev, "no driver");
+		else if (!driver->err_handler)
+			eeh_edev_info(edev, "driver not EEH aware");
+		else if (edev->mode & EEH_DEV_NO_HANDLER)
+			eeh_edev_info(edev, "driver bound too late");
+		else {
+			new_result = fn(edev, pdev, driver);
+			eeh_edev_info(edev, "%s driver reports: '%s'",
+				      driver->name,
+				      pci_ers_result_name(new_result));
+			if (result)
+				*result = pci_ers_merge_result(*result,
+							       new_result);
+		}
+		if (driver)
+			eeh_pcid_put(pdev);
+	} else {
+		eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!pdev,
+			      !eeh_dev_removed(edev), !eeh_pe_passed(edev->pe));
+	}
+	device_unlock(&pdev->dev);
+	if (edev->pdev != pdev)
+		eeh_edev_warn(edev, "Device changed during processing!\n");
+	put_device(&pdev->dev);
+}
+
+static void eeh_pe_report(const char *name, struct eeh_pe *root,
+			  eeh_report_fn fn, enum pci_ers_result *result)
+{
+	struct eeh_pe *pe;
+	struct eeh_dev *edev, *tmp;
+
+	pr_info("EEH: Beginning: '%s'\n", name);
+	eeh_for_each_pe(root, pe) eeh_pe_for_each_dev(pe, edev, tmp)
+		eeh_pe_report_edev(edev, fn, result);
+	if (result)
+		pr_info("EEH: Finished:'%s' with aggregate recovery state:'%s'\n",
+			name, pci_ers_result_name(*result));
+	else
+		pr_info("EEH: Finished:'%s'", name);
+}
+
+/**
+ * eeh_report_error - Report pci error to each device driver
+ * @edev: eeh device
+ * @driver: device's PCI driver
+ *
+ * Report an EEH error to each device driver.
+ */
+static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
+					    struct pci_dev *pdev,
+					    struct pci_driver *driver)
+{
+	enum pci_ers_result rc;
+
+	if (!driver->err_handler->error_detected)
+		return PCI_ERS_RESULT_NONE;
+
+	eeh_edev_info(edev, "Invoking %s->error_detected(IO frozen)",
+		      driver->name);
+	rc = driver->err_handler->error_detected(pdev, pci_channel_io_frozen);
+
+	edev->in_error = true;
+	pci_uevent_ers(pdev, PCI_ERS_RESULT_NONE);
+	return rc;
+}
+
+/**
+ * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
+ * @edev: eeh device
+ * @driver: device's PCI driver
+ *
+ * Tells each device driver that IO ports, MMIO and config space I/O
+ * are now enabled.
+ */
+static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
+						   struct pci_dev *pdev,
+						   struct pci_driver *driver)
+{
+	if (!driver->err_handler->mmio_enabled)
+		return PCI_ERS_RESULT_NONE;
+	eeh_edev_info(edev, "Invoking %s->mmio_enabled()", driver->name);
+	return driver->err_handler->mmio_enabled(pdev);
+}
+
+/**
+ * eeh_report_reset - Tell device that slot has been reset
+ * @edev: eeh device
+ * @driver: device's PCI driver
+ *
+ * This routine must be called while EEH tries to reset particular
+ * PCI device so that the associated PCI device driver could take
+ * some actions, usually to save data the driver needs so that the
+ * driver can work again while the device is recovered.
+ */
+static enum pci_ers_result eeh_report_reset(struct eeh_dev *edev,
+					    struct pci_dev *pdev,
+					    struct pci_driver *driver)
+{
+	if (!driver->err_handler->slot_reset || !edev->in_error)
+		return PCI_ERS_RESULT_NONE;
+	eeh_edev_info(edev, "Invoking %s->slot_reset()", driver->name);
+	return driver->err_handler->slot_reset(pdev);
+}
+
+static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
+{
+	struct pci_dev *pdev;
+
+	if (!edev)
+		return;
+
+	/*
+	 * The content in the config space isn't saved because
+	 * the blocked config space on some adapters. We have
+	 * to restore the initial saved config space when the
+	 * EEH device is created.
+	 */
+	if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) {
+		if (list_is_last(&edev->entry, &edev->pe->edevs))
+			eeh_pe_restore_bars(edev->pe);
+
+		return;
+	}
+
+	pdev = eeh_dev_to_pci_dev(edev);
+	if (!pdev)
+		return;
+
+	pci_restore_state(pdev);
+}
+
+/**
+ * eeh_report_resume - Tell device to resume normal operations
+ * @edev: eeh device
+ * @driver: device's PCI driver
+ *
+ * This routine must be called to notify the device driver that it
+ * could resume so that the device driver can do some initialization
+ * to make the recovered device work again.
+ */
+static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
+					     struct pci_dev *pdev,
+					     struct pci_driver *driver)
+{
+	if (!driver->err_handler->resume || !edev->in_error)
+		return PCI_ERS_RESULT_NONE;
+
+	eeh_edev_info(edev, "Invoking %s->resume()", driver->name);
+	driver->err_handler->resume(pdev);
+
+	pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED);
+#ifdef CONFIG_PCI_IOV
+	if (eeh_ops->notify_resume)
+		eeh_ops->notify_resume(edev);
+#endif
+	return PCI_ERS_RESULT_NONE;
+}
+
+/**
+ * eeh_report_failure - Tell device driver that device is dead.
+ * @edev: eeh device
+ * @driver: device's PCI driver
+ *
+ * This informs the device driver that the device is permanently
+ * dead, and that no further recovery attempts will be made on it.
+ */
+static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
+					      struct pci_dev *pdev,
+					      struct pci_driver *driver)
+{
+	enum pci_ers_result rc;
+
+	if (!driver->err_handler->error_detected)
+		return PCI_ERS_RESULT_NONE;
+
+	eeh_edev_info(edev, "Invoking %s->error_detected(permanent failure)",
+		      driver->name);
+	rc = driver->err_handler->error_detected(pdev,
+						 pci_channel_io_perm_failure);
+
+	pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT);
+	return rc;
+}
+
+static void *eeh_add_virt_device(struct eeh_dev *edev)
+{
+	struct pci_driver *driver;
+	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+
+	if (!(edev->physfn)) {
+		eeh_edev_warn(edev, "Not for VF\n");
+		return NULL;
+	}
+
+	driver = eeh_pcid_get(dev);
+	if (driver) {
+		if (driver->err_handler) {
+			eeh_pcid_put(dev);
+			return NULL;
+		}
+		eeh_pcid_put(dev);
+	}
+
+#ifdef CONFIG_PCI_IOV
+	pci_iov_add_virtfn(edev->physfn, edev->vf_index);
+#endif
+	return NULL;
+}
+
+static void eeh_rmv_device(struct eeh_dev *edev, void *userdata)
+{
+	struct pci_driver *driver;
+	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+	struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata;
+
+	/*
+	 * Actually, we should remove the PCI bridges as well.
+	 * However, that's lots of complexity to do that,
+	 * particularly some of devices under the bridge might
+	 * support EEH. So we just care about PCI devices for
+	 * simplicity here.
+	 */
+	if (!eeh_edev_actionable(edev) ||
+	    (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
+		return;
+
+	if (rmv_data) {
+		driver = eeh_pcid_get(dev);
+		if (driver) {
+			if (driver->err_handler &&
+			    driver->err_handler->error_detected &&
+			    driver->err_handler->slot_reset) {
+				eeh_pcid_put(dev);
+				return;
+			}
+			eeh_pcid_put(dev);
+		}
+	}
+
+	/* Remove it from PCI subsystem */
+	pr_info("EEH: Removing %s without EEH sensitive driver\n",
+		pci_name(dev));
+	edev->mode |= EEH_DEV_DISCONNECTED;
+	if (rmv_data)
+		rmv_data->removed_dev_count++;
+
+	if (edev->physfn) {
+#ifdef CONFIG_PCI_IOV
+		pci_iov_remove_virtfn(edev->physfn, edev->vf_index);
+		edev->pdev = NULL;
+#endif
+		if (rmv_data)
+			list_add(&edev->rmv_entry, &rmv_data->removed_vf_list);
+	} else {
+		pci_lock_rescan_remove();
+		pci_stop_and_remove_bus_device(dev);
+		pci_unlock_rescan_remove();
+	}
+}
+
+static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
+{
+	struct eeh_dev *edev, *tmp;
+
+	eeh_pe_for_each_dev(pe, edev, tmp) {
+		if (!(edev->mode & EEH_DEV_DISCONNECTED))
+			continue;
+
+		edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED);
+		eeh_pe_tree_remove(edev);
+	}
+
+	return NULL;
+}
+
+/*
+ * Explicitly clear PE's frozen state for PowerNV where
+ * we have frozen PE until BAR restore is completed. It's
+ * harmless to clear it for pSeries. To be consistent with
+ * PE reset (for 3 times), we try to clear the frozen state
+ * for 3 times as well.
+ */
+static int eeh_clear_pe_frozen_state(struct eeh_pe *root, bool include_passed)
+{
+	struct eeh_pe *pe;
+	int i;
+
+	eeh_for_each_pe(root, pe) {
+		if (include_passed || !eeh_pe_passed(pe)) {
+			for (i = 0; i < 3; i++)
+				if (!eeh_unfreeze_pe(pe))
+					break;
+			if (i >= 3)
+				return -EIO;
+		}
+	}
+	eeh_pe_state_clear(root, EEH_PE_ISOLATED, include_passed);
+	return 0;
+}
+
+int eeh_pe_reset_and_recover(struct eeh_pe *pe)
+{
+	int ret;
+
+	/* Bail if the PE is being recovered */
+	if (pe->state & EEH_PE_RECOVERING)
+		return 0;
+
+	/* Put the PE into recovery mode */
+	eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+
+	/* Save states */
+	eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
+
+	/* Issue reset */
+	ret = eeh_pe_reset_full(pe, true);
+	if (ret) {
+		eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
+		return ret;
+	}
+
+	/* Unfreeze the PE */
+	ret = eeh_clear_pe_frozen_state(pe, true);
+	if (ret) {
+		eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
+		return ret;
+	}
+
+	/* Restore device state */
+	eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL);
+
+	/* Clear recovery mode */
+	eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
+
+	return 0;
+}
+
+/**
+ * eeh_reset_device - Perform actual reset of a pci slot
+ * @driver_eeh_aware: Does the device's driver provide EEH support?
+ * @pe: EEH PE
+ * @bus: PCI bus corresponding to the isolcated slot
+ * @rmv_data: Optional, list to record removed devices
+ *
+ * This routine must be called to do reset on the indicated PE.
+ * During the reset, udev might be invoked because those affected
+ * PCI devices will be removed and then added.
+ */
+static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
+			    struct eeh_rmv_data *rmv_data,
+			    bool driver_eeh_aware)
+{
+	time64_t tstamp;
+	int cnt, rc;
+	struct eeh_dev *edev;
+	struct eeh_pe *tmp_pe;
+	bool any_passed = false;
+
+	eeh_for_each_pe(pe, tmp_pe)
+		any_passed |= eeh_pe_passed(tmp_pe);
+
+	/* pcibios will clear the counter; save the value */
+	cnt = pe->freeze_count;
+	tstamp = pe->tstamp;
+
+	/*
+	 * We don't remove the corresponding PE instances because
+	 * we need the information afterwords. The attached EEH
+	 * devices are expected to be attached soon when calling
+	 * into pci_hp_add_devices().
+	 */
+	eeh_pe_state_mark(pe, EEH_PE_KEEP);
+	if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) {
+		eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
+	} else {
+		pci_lock_rescan_remove();
+		pci_hp_remove_devices(bus);
+		pci_unlock_rescan_remove();
+	}
+
+	/*
+	 * Reset the pci controller. (Asserts RST#; resets config space).
+	 * Reconfigure bridges and devices. Don't try to bring the system
+	 * up if the reset failed for some reason.
+	 *
+	 * During the reset, it's very dangerous to have uncontrolled PCI
+	 * config accesses. So we prefer to block them. However, controlled
+	 * PCI config accesses initiated from EEH itself are allowed.
+	 */
+	rc = eeh_pe_reset_full(pe, false);
+	if (rc)
+		return rc;
+
+	pci_lock_rescan_remove();
+
+	/* Restore PE */
+	eeh_ops->configure_bridge(pe);
+	eeh_pe_restore_bars(pe);
+
+	/* Clear frozen state */
+	rc = eeh_clear_pe_frozen_state(pe, false);
+	if (rc) {
+		pci_unlock_rescan_remove();
+		return rc;
+	}
+
+	/* Give the system 5 seconds to finish running the user-space
+	 * hotplug shutdown scripts, e.g. ifdown for ethernet.  Yes,
+	 * this is a hack, but if we don't do this, and try to bring
+	 * the device up before the scripts have taken it down,
+	 * potentially weird things happen.
+	 */
+	if (!driver_eeh_aware || rmv_data->removed_dev_count) {
+		pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
+			(driver_eeh_aware ? "partial" : "complete"));
+		ssleep(5);
+
+		/*
+		 * The EEH device is still connected with its parent
+		 * PE. We should disconnect it so the binding can be
+		 * rebuilt when adding PCI devices.
+		 */
+		edev = list_first_entry(&pe->edevs, struct eeh_dev, entry);
+		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
+		if (pe->type & EEH_PE_VF) {
+			eeh_add_virt_device(edev);
+		} else {
+			if (!driver_eeh_aware)
+				eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
+			pci_hp_add_devices(bus);
+		}
+	}
+	eeh_pe_state_clear(pe, EEH_PE_KEEP, true);
+
+	pe->tstamp = tstamp;
+	pe->freeze_count = cnt;
+
+	pci_unlock_rescan_remove();
+	return 0;
+}
+
+/* The longest amount of time to wait for a pci device
+ * to come back on line, in seconds.
+ */
+#define MAX_WAIT_FOR_RECOVERY 300
+
+
+/* Walks the PE tree after processing an event to remove any stale PEs.
+ *
+ * NB: This needs to be recursive to ensure the leaf PEs get removed
+ * before their parents do. Although this is possible to do recursively
+ * we don't since this is easier to read and we need to garantee
+ * the leaf nodes will be handled first.
+ */
+static void eeh_pe_cleanup(struct eeh_pe *pe)
+{
+	struct eeh_pe *child_pe, *tmp;
+
+	list_for_each_entry_safe(child_pe, tmp, &pe->child_list, child)
+		eeh_pe_cleanup(child_pe);
+
+	if (pe->state & EEH_PE_KEEP)
+		return;
+
+	if (!(pe->state & EEH_PE_INVALID))
+		return;
+
+	if (list_empty(&pe->edevs) && list_empty(&pe->child_list)) {
+		list_del(&pe->child);
+		kfree(pe);
+	}
+}
+
+/**
+ * eeh_check_slot_presence - Check if a device is still present in a slot
+ * @pdev: pci_dev to check
+ *
+ * This function may return a false positive if we can't determine the slot's
+ * presence state. This might happen for PCIe slots if the PE containing
+ * the upstream bridge is also frozen, or the bridge is part of the same PE
+ * as the device.
+ *
+ * This shouldn't happen often, but you might see it if you hotplug a PCIe
+ * switch.
+ */
+static bool eeh_slot_presence_check(struct pci_dev *pdev)
+{
+	const struct hotplug_slot_ops *ops;
+	struct pci_slot *slot;
+	u8 state;
+	int rc;
+
+	if (!pdev)
+		return false;
+
+	if (pdev->error_state == pci_channel_io_perm_failure)
+		return false;
+
+	slot = pdev->slot;
+	if (!slot || !slot->hotplug)
+		return true;
+
+	ops = slot->hotplug->ops;
+	if (!ops || !ops->get_adapter_status)
+		return true;
+
+	/* set the attention indicator while we've got the slot ops */
+	if (ops->set_attention_status)
+		ops->set_attention_status(slot->hotplug, 1);
+
+	rc = ops->get_adapter_status(slot->hotplug, &state);
+	if (rc)
+		return true;
+
+	return !!state;
+}
+
+static void eeh_clear_slot_attention(struct pci_dev *pdev)
+{
+	const struct hotplug_slot_ops *ops;
+	struct pci_slot *slot;
+
+	if (!pdev)
+		return;
+
+	if (pdev->error_state == pci_channel_io_perm_failure)
+		return;
+
+	slot = pdev->slot;
+	if (!slot || !slot->hotplug)
+		return;
+
+	ops = slot->hotplug->ops;
+	if (!ops || !ops->set_attention_status)
+		return;
+
+	ops->set_attention_status(slot->hotplug, 0);
+}
+
+/**
+ * eeh_handle_normal_event - Handle EEH events on a specific PE
+ * @pe: EEH PE - which should not be used after we return, as it may
+ * have been invalidated.
+ *
+ * Attempts to recover the given PE.  If recovery fails or the PE has failed
+ * too many times, remove the PE.
+ *
+ * While PHB detects address or data parity errors on particular PCI
+ * slot, the associated PE will be frozen. Besides, DMA's occurring
+ * to wild addresses (which usually happen due to bugs in device
+ * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
+ * #PERR or other misc PCI-related errors also can trigger EEH errors.
+ *
+ * Recovery process consists of unplugging the device driver (which
+ * generated hotplug events to userspace), then issuing a PCI #RST to
+ * the device, then reconfiguring the PCI config space for all bridges
+ * & devices under this slot, and then finally restarting the device
+ * drivers (which cause a second set of hotplug events to go out to
+ * userspace).
+ */
+void eeh_handle_normal_event(struct eeh_pe *pe)
+{
+	struct pci_bus *bus;
+	struct eeh_dev *edev, *tmp;
+	struct eeh_pe *tmp_pe;
+	int rc = 0;
+	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
+	struct eeh_rmv_data rmv_data =
+		{LIST_HEAD_INIT(rmv_data.removed_vf_list), 0};
+	int devices = 0;
+
+	bus = eeh_pe_bus_get(pe);
+	if (!bus) {
+		pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
+			__func__, pe->phb->global_number, pe->addr);
+		return;
+	}
+
+	/*
+	 * When devices are hot-removed we might get an EEH due to
+	 * a driver attempting to touch the MMIO space of a removed
+	 * device. In this case we don't have a device to recover
+	 * so suppress the event if we can't find any present devices.
+	 *
+	 * The hotplug driver should take care of tearing down the
+	 * device itself.
+	 */
+	eeh_for_each_pe(pe, tmp_pe)
+		eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+			if (eeh_slot_presence_check(edev->pdev))
+				devices++;
+
+	if (!devices) {
+		pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n",
+			pe->phb->global_number, pe->addr);
+		goto out; /* nothing to recover */
+	}
+
+	/* Log the event */
+	if (pe->type & EEH_PE_PHB) {
+		pr_err("EEH: Recovering PHB#%x, location: %s\n",
+			pe->phb->global_number, eeh_pe_loc_get(pe));
+	} else {
+		struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb);
+
+		pr_err("EEH: Recovering PHB#%x-PE#%x\n",
+		       pe->phb->global_number, pe->addr);
+		pr_err("EEH: PE location: %s, PHB location: %s\n",
+		       eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
+	}
+
+#ifdef CONFIG_STACKTRACE
+	/*
+	 * Print the saved stack trace now that we've verified there's
+	 * something to recover.
+	 */
+	if (pe->trace_entries) {
+		void **ptrs = (void **) pe->stack_trace;
+		int i;
+
+		pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+		       pe->phb->global_number, pe->addr);
+
+		/* FIXME: Use the same format as dump_stack() */
+		pr_err("EEH: Call Trace:\n");
+		for (i = 0; i < pe->trace_entries; i++)
+			pr_err("EEH: [%pK] %pS\n", ptrs[i], ptrs[i]);
+
+		pe->trace_entries = 0;
+	}
+#endif /* CONFIG_STACKTRACE */
+
+	eeh_for_each_pe(pe, tmp_pe)
+		eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+			edev->mode &= ~EEH_DEV_NO_HANDLER;
+
+	eeh_pe_update_time_stamp(pe);
+	pe->freeze_count++;
+	if (pe->freeze_count > eeh_max_freezes) {
+		pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
+		       pe->phb->global_number, pe->addr,
+		       pe->freeze_count);
+
+		goto recover_failed;
+	}
+
+	/* Walk the various device drivers attached to this slot through
+	 * a reset sequence, giving each an opportunity to do what it needs
+	 * to accomplish the reset.  Each child gets a report of the
+	 * status ... if any child can't handle the reset, then the entire
+	 * slot is dlpar removed and added.
+	 *
+	 * When the PHB is fenced, we have to issue a reset to recover from
+	 * the error. Override the result if necessary to have partially
+	 * hotplug for this case.
+	 */
+	pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
+		pe->freeze_count, eeh_max_freezes);
+	pr_info("EEH: Notify device drivers to shutdown\n");
+	eeh_set_channel_state(pe, pci_channel_io_frozen);
+	eeh_set_irq_state(pe, false);
+	eeh_pe_report("error_detected(IO frozen)", pe,
+		      eeh_report_error, &result);
+	if (result == PCI_ERS_RESULT_DISCONNECT)
+		goto recover_failed;
+
+	/*
+	 * Error logged on a PHB are always fences which need a full
+	 * PHB reset to clear so force that to happen.
+	 */
+	if ((pe->type & EEH_PE_PHB) && result != PCI_ERS_RESULT_NONE)
+		result = PCI_ERS_RESULT_NEED_RESET;
+
+	/* Get the current PCI slot state. This can take a long time,
+	 * sometimes over 300 seconds for certain systems.
+	 */
+	rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY * 1000);
+	if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
+		pr_warn("EEH: Permanent failure\n");
+		goto recover_failed;
+	}
+
+	/* Since rtas may enable MMIO when posting the error log,
+	 * don't post the error log until after all dev drivers
+	 * have been informed.
+	 */
+	pr_info("EEH: Collect temporary log\n");
+	eeh_slot_error_detail(pe, EEH_LOG_TEMP);
+
+	/* If all device drivers were EEH-unaware, then shut
+	 * down all of the device drivers, and hope they
+	 * go down willingly, without panicing the system.
+	 */
+	if (result == PCI_ERS_RESULT_NONE) {
+		pr_info("EEH: Reset with hotplug activity\n");
+		rc = eeh_reset_device(pe, bus, NULL, false);
+		if (rc) {
+			pr_warn("%s: Unable to reset, err=%d\n", __func__, rc);
+			goto recover_failed;
+		}
+	}
+
+	/* If all devices reported they can proceed, then re-enable MMIO */
+	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
+		pr_info("EEH: Enable I/O for affected devices\n");
+		rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+		if (rc < 0)
+			goto recover_failed;
+
+		if (rc) {
+			result = PCI_ERS_RESULT_NEED_RESET;
+		} else {
+			pr_info("EEH: Notify device drivers to resume I/O\n");
+			eeh_pe_report("mmio_enabled", pe,
+				      eeh_report_mmio_enabled, &result);
+		}
+	}
+	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
+		pr_info("EEH: Enabled DMA for affected devices\n");
+		rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
+		if (rc < 0)
+			goto recover_failed;
+
+		if (rc) {
+			result = PCI_ERS_RESULT_NEED_RESET;
+		} else {
+			/*
+			 * We didn't do PE reset for the case. The PE
+			 * is still in frozen state. Clear it before
+			 * resuming the PE.
+			 */
+			eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
+			result = PCI_ERS_RESULT_RECOVERED;
+		}
+	}
+
+	/* If any device called out for a reset, then reset the slot */
+	if (result == PCI_ERS_RESULT_NEED_RESET) {
+		pr_info("EEH: Reset without hotplug activity\n");
+		rc = eeh_reset_device(pe, bus, &rmv_data, true);
+		if (rc) {
+			pr_warn("%s: Cannot reset, err=%d\n", __func__, rc);
+			goto recover_failed;
+		}
+
+		result = PCI_ERS_RESULT_NONE;
+		eeh_set_channel_state(pe, pci_channel_io_normal);
+		eeh_set_irq_state(pe, true);
+		eeh_pe_report("slot_reset", pe, eeh_report_reset,
+			      &result);
+	}
+
+	if ((result == PCI_ERS_RESULT_RECOVERED) ||
+	    (result == PCI_ERS_RESULT_NONE)) {
+		/*
+		 * For those hot removed VFs, we should add back them after PF
+		 * get recovered properly.
+		 */
+		list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list,
+					 rmv_entry) {
+			eeh_add_virt_device(edev);
+			list_del(&edev->rmv_entry);
+		}
+
+		/* Tell all device drivers that they can resume operations */
+		pr_info("EEH: Notify device driver to resume\n");
+		eeh_set_channel_state(pe, pci_channel_io_normal);
+		eeh_set_irq_state(pe, true);
+		eeh_pe_report("resume", pe, eeh_report_resume, NULL);
+		eeh_for_each_pe(pe, tmp_pe) {
+			eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
+				edev->mode &= ~EEH_DEV_NO_HANDLER;
+				edev->in_error = false;
+			}
+		}
+
+		pr_info("EEH: Recovery successful.\n");
+		goto out;
+	}
+
+recover_failed:
+	/*
+	 * About 90% of all real-life EEH failures in the field
+	 * are due to poorly seated PCI cards. Only 10% or so are
+	 * due to actual, failed cards.
+	 */
+	pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
+		"Please try reseating or replacing it\n",
+		pe->phb->global_number, pe->addr);
+
+	eeh_slot_error_detail(pe, EEH_LOG_PERM);
+
+	/* Notify all devices that they're about to go down. */
+	eeh_set_irq_state(pe, false);
+	eeh_pe_report("error_detected(permanent failure)", pe,
+		      eeh_report_failure, NULL);
+	eeh_set_channel_state(pe, pci_channel_io_perm_failure);
+
+	/* Mark the PE to be removed permanently */
+	eeh_pe_state_mark(pe, EEH_PE_REMOVED);
+
+	/*
+	 * Shut down the device drivers for good. We mark
+	 * all removed devices correctly to avoid access
+	 * the their PCI config any more.
+	 */
+	if (pe->type & EEH_PE_VF) {
+		eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+	} else {
+		eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
+		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+
+		pci_lock_rescan_remove();
+		pci_hp_remove_devices(bus);
+		pci_unlock_rescan_remove();
+		/* The passed PE should no longer be used */
+		return;
+	}
+
+out:
+	/*
+	 * Clean up any PEs without devices. While marked as EEH_PE_RECOVERYING
+	 * we don't want to modify the PE tree structure so we do it here.
+	 */
+	eeh_pe_cleanup(pe);
+
+	/* clear the slot attention LED for all recovered devices */
+	eeh_for_each_pe(pe, tmp_pe)
+		eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+			eeh_clear_slot_attention(edev->pdev);
+
+	eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
+}
+
+/**
+ * eeh_handle_special_event - Handle EEH events without a specific failing PE
+ *
+ * Called when an EEH event is detected but can't be narrowed down to a
+ * specific PE.  Iterates through possible failures and handles them as
+ * necessary.
+ */
+void eeh_handle_special_event(void)
+{
+	struct eeh_pe *pe, *phb_pe, *tmp_pe;
+	struct eeh_dev *edev, *tmp_edev;
+	struct pci_bus *bus;
+	struct pci_controller *hose;
+	unsigned long flags;
+	int rc;
+
+
+	do {
+		rc = eeh_ops->next_error(&pe);
+
+		switch (rc) {
+		case EEH_NEXT_ERR_DEAD_IOC:
+			/* Mark all PHBs in dead state */
+			eeh_serialize_lock(&flags);
+
+			/* Purge all events */
+			eeh_remove_event(NULL, true);
+
+			list_for_each_entry(hose, &hose_list, list_node) {
+				phb_pe = eeh_phb_pe_get(hose);
+				if (!phb_pe) continue;
+
+				eeh_pe_mark_isolated(phb_pe);
+			}
+
+			eeh_serialize_unlock(flags);
+
+			break;
+		case EEH_NEXT_ERR_FROZEN_PE:
+		case EEH_NEXT_ERR_FENCED_PHB:
+		case EEH_NEXT_ERR_DEAD_PHB:
+			/* Mark the PE in fenced state */
+			eeh_serialize_lock(&flags);
+
+			/* Purge all events of the PHB */
+			eeh_remove_event(pe, true);
+
+			if (rc != EEH_NEXT_ERR_DEAD_PHB)
+				eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+			eeh_pe_mark_isolated(pe);
+
+			eeh_serialize_unlock(flags);
+
+			break;
+		case EEH_NEXT_ERR_NONE:
+			return;
+		default:
+			pr_warn("%s: Invalid value %d from next_error()\n",
+				__func__, rc);
+			return;
+		}
+
+		/*
+		 * For fenced PHB and frozen PE, it's handled as normal
+		 * event. We have to remove the affected PHBs for dead
+		 * PHB and IOC
+		 */
+		if (rc == EEH_NEXT_ERR_FROZEN_PE ||
+		    rc == EEH_NEXT_ERR_FENCED_PHB) {
+			eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+			eeh_handle_normal_event(pe);
+		} else {
+			eeh_for_each_pe(pe, tmp_pe)
+				eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
+					edev->mode &= ~EEH_DEV_NO_HANDLER;
+
+			/* Notify all devices to be down */
+			eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
+			eeh_pe_report(
+				"error_detected(permanent failure)", pe,
+				eeh_report_failure, NULL);
+			eeh_set_channel_state(pe, pci_channel_io_perm_failure);
+
+			pci_lock_rescan_remove();
+			list_for_each_entry(hose, &hose_list, list_node) {
+				phb_pe = eeh_phb_pe_get(hose);
+				if (!phb_pe ||
+				    !(phb_pe->state & EEH_PE_ISOLATED) ||
+				    (phb_pe->state & EEH_PE_RECOVERING))
+					continue;
+
+				bus = eeh_pe_bus_get(phb_pe);
+				if (!bus) {
+					pr_err("%s: Cannot find PCI bus for "
+					       "PHB#%x-PE#%x\n",
+					       __func__,
+					       pe->phb->global_number,
+					       pe->addr);
+					break;
+				}
+				pci_hp_remove_devices(bus);
+			}
+			pci_unlock_rescan_remove();
+		}
+
+		/*
+		 * If we have detected dead IOC, we needn't proceed
+		 * any more since all PHBs would have been removed
+		 */
+		if (rc == EEH_NEXT_ERR_DEAD_IOC)
+			break;
+	} while (rc != EEH_NEXT_ERR_NONE);
+}
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
new file mode 100644
index 0000000000..c23a454af0
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *
+ * Copyright (c) 2005 Linas Vepstas <linas@linas.org>
+ */
+
+#include <linux/delay.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/kthread.h>
+#include <asm/eeh_event.h>
+#include <asm/ppc-pci.h>
+
+/** Overview:
+ *  EEH error states may be detected within exception handlers;
+ *  however, the recovery processing needs to occur asynchronously
+ *  in a normal kernel context and not an interrupt context.
+ *  This pair of routines creates an event and queues it onto a
+ *  work-queue, where a worker thread can drive recovery.
+ */
+
+static DEFINE_SPINLOCK(eeh_eventlist_lock);
+static DECLARE_COMPLETION(eeh_eventlist_event);
+static LIST_HEAD(eeh_eventlist);
+
+/**
+ * eeh_event_handler - Dispatch EEH events.
+ * @dummy - unused
+ *
+ * The detection of a frozen slot can occur inside an interrupt,
+ * where it can be hard to do anything about it.  The goal of this
+ * routine is to pull these detection events out of the context
+ * of the interrupt handler, and re-dispatch them for processing
+ * at a later time in a normal context.
+ */
+static int eeh_event_handler(void * dummy)
+{
+	unsigned long flags;
+	struct eeh_event *event;
+
+	while (!kthread_should_stop()) {
+		if (wait_for_completion_interruptible(&eeh_eventlist_event))
+			break;
+
+		/* Fetch EEH event from the queue */
+		spin_lock_irqsave(&eeh_eventlist_lock, flags);
+		event = NULL;
+		if (!list_empty(&eeh_eventlist)) {
+			event = list_entry(eeh_eventlist.next,
+					   struct eeh_event, list);
+			list_del(&event->list);
+		}
+		spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
+		if (!event)
+			continue;
+
+		/* We might have event without binding PE */
+		if (event->pe)
+			eeh_handle_normal_event(event->pe);
+		else
+			eeh_handle_special_event();
+
+		kfree(event);
+	}
+
+	return 0;
+}
+
+/**
+ * eeh_event_init - Start kernel thread to handle EEH events
+ *
+ * This routine is called to start the kernel thread for processing
+ * EEH event.
+ */
+int eeh_event_init(void)
+{
+	struct task_struct *t;
+	int ret = 0;
+
+	t = kthread_run(eeh_event_handler, NULL, "eehd");
+	if (IS_ERR(t)) {
+		ret = PTR_ERR(t);
+		pr_err("%s: Failed to start EEH daemon (%d)\n",
+			__func__, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * eeh_send_failure_event - Generate a PCI error event
+ * @pe: EEH PE
+ *
+ * This routine can be called within an interrupt context;
+ * the actual event will be delivered in a normal context
+ * (from a workqueue).
+ */
+int __eeh_send_failure_event(struct eeh_pe *pe)
+{
+	unsigned long flags;
+	struct eeh_event *event;
+
+	event = kzalloc(sizeof(*event), GFP_ATOMIC);
+	if (!event) {
+		pr_err("EEH: out of memory, event not handled\n");
+		return -ENOMEM;
+	}
+	event->pe = pe;
+
+	/*
+	 * Mark the PE as recovering before inserting it in the queue.
+	 * This prevents the PE from being free()ed by a hotplug driver
+	 * while the PE is sitting in the event queue.
+	 */
+	if (pe) {
+#ifdef CONFIG_STACKTRACE
+		/*
+		 * Save the current stack trace so we can dump it from the
+		 * event handler thread.
+		 */
+		pe->trace_entries = stack_trace_save(pe->stack_trace,
+					 ARRAY_SIZE(pe->stack_trace), 0);
+#endif /* CONFIG_STACKTRACE */
+
+		eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+	}
+
+	/* We may or may not be called in an interrupt context */
+	spin_lock_irqsave(&eeh_eventlist_lock, flags);
+	list_add(&event->list, &eeh_eventlist);
+	spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
+
+	/* For EEH deamon to knick in */
+	complete(&eeh_eventlist_event);
+
+	return 0;
+}
+
+int eeh_send_failure_event(struct eeh_pe *pe)
+{
+	/*
+	 * If we've manually suppressed recovery events via debugfs
+	 * then just drop it on the floor.
+	 */
+	if (eeh_debugfs_no_recover) {
+		pr_err("EEH: Event dropped due to no_recover setting\n");
+		return 0;
+	}
+
+	return __eeh_send_failure_event(pe);
+}
+
+/**
+ * eeh_remove_event - Remove EEH event from the queue
+ * @pe: Event binding to the PE
+ * @force: Event will be removed unconditionally
+ *
+ * On PowerNV platform, we might have subsequent coming events
+ * is part of the former one. For that case, those subsequent
+ * coming events are totally duplicated and unnecessary, thus
+ * they should be removed.
+ */
+void eeh_remove_event(struct eeh_pe *pe, bool force)
+{
+	unsigned long flags;
+	struct eeh_event *event, *tmp;
+
+	/*
+	 * If we have NULL PE passed in, we have dead IOC
+	 * or we're sure we can report all existing errors
+	 * by the caller.
+	 *
+	 * With "force", the event with associated PE that
+	 * have been isolated, the event won't be removed
+	 * to avoid event lost.
+	 */
+	spin_lock_irqsave(&eeh_eventlist_lock, flags);
+	list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) {
+		if (!force && event->pe &&
+		    (event->pe->state & EEH_PE_ISOLATED))
+			continue;
+
+		if (!pe) {
+			list_del(&event->list);
+			kfree(event);
+		} else if (pe->type & EEH_PE_PHB) {
+			if (event->pe && event->pe->phb == pe->phb) {
+				list_del(&event->list);
+				kfree(event);
+			}
+		} else if (event->pe == pe) {
+			list_del(&event->list);
+			kfree(event);
+		}
+	}
+	spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
+}
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
new file mode 100644
index 0000000000..e0ce812796
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -0,0 +1,867 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * The file intends to implement PE based on the information from
+ * platforms. Basically, there have 3 types of PEs: PHB/Bus/Device.
+ * All the PEs should be organized as hierarchy tree. The first level
+ * of the tree will be associated to existing PHBs since the particular
+ * PE is only meaningful in one PHB domain.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
+ */
+
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+static int eeh_pe_aux_size = 0;
+static LIST_HEAD(eeh_phb_pe);
+
+/**
+ * eeh_set_pe_aux_size - Set PE auxillary data size
+ * @size: PE auxillary data size
+ *
+ * Set PE auxillary data size
+ */
+void eeh_set_pe_aux_size(int size)
+{
+	if (size < 0)
+		return;
+
+	eeh_pe_aux_size = size;
+}
+
+/**
+ * eeh_pe_alloc - Allocate PE
+ * @phb: PCI controller
+ * @type: PE type
+ *
+ * Allocate PE instance dynamically.
+ */
+static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
+{
+	struct eeh_pe *pe;
+	size_t alloc_size;
+
+	alloc_size = sizeof(struct eeh_pe);
+	if (eeh_pe_aux_size) {
+		alloc_size = ALIGN(alloc_size, cache_line_size());
+		alloc_size += eeh_pe_aux_size;
+	}
+
+	/* Allocate PHB PE */
+	pe = kzalloc(alloc_size, GFP_KERNEL);
+	if (!pe) return NULL;
+
+	/* Initialize PHB PE */
+	pe->type = type;
+	pe->phb = phb;
+	INIT_LIST_HEAD(&pe->child_list);
+	INIT_LIST_HEAD(&pe->edevs);
+
+	pe->data = (void *)pe + ALIGN(sizeof(struct eeh_pe),
+				      cache_line_size());
+	return pe;
+}
+
+/**
+ * eeh_phb_pe_create - Create PHB PE
+ * @phb: PCI controller
+ *
+ * The function should be called while the PHB is detected during
+ * system boot or PCI hotplug in order to create PHB PE.
+ */
+int eeh_phb_pe_create(struct pci_controller *phb)
+{
+	struct eeh_pe *pe;
+
+	/* Allocate PHB PE */
+	pe = eeh_pe_alloc(phb, EEH_PE_PHB);
+	if (!pe) {
+		pr_err("%s: out of memory!\n", __func__);
+		return -ENOMEM;
+	}
+
+	/* Put it into the list */
+	list_add_tail(&pe->child, &eeh_phb_pe);
+
+	pr_debug("EEH: Add PE for PHB#%x\n", phb->global_number);
+
+	return 0;
+}
+
+/**
+ * eeh_wait_state - Wait for PE state
+ * @pe: EEH PE
+ * @max_wait: maximal period in millisecond
+ *
+ * Wait for the state of associated PE. It might take some time
+ * to retrieve the PE's state.
+ */
+int eeh_wait_state(struct eeh_pe *pe, int max_wait)
+{
+	int ret;
+	int mwait;
+
+	/*
+	 * According to PAPR, the state of PE might be temporarily
+	 * unavailable. Under the circumstance, we have to wait
+	 * for indicated time determined by firmware. The maximal
+	 * wait time is 5 minutes, which is acquired from the original
+	 * EEH implementation. Also, the original implementation
+	 * also defined the minimal wait time as 1 second.
+	 */
+#define EEH_STATE_MIN_WAIT_TIME	(1000)
+#define EEH_STATE_MAX_WAIT_TIME	(300 * 1000)
+
+	while (1) {
+		ret = eeh_ops->get_state(pe, &mwait);
+
+		if (ret != EEH_STATE_UNAVAILABLE)
+			return ret;
+
+		if (max_wait <= 0) {
+			pr_warn("%s: Timeout when getting PE's state (%d)\n",
+				__func__, max_wait);
+			return EEH_STATE_NOT_SUPPORT;
+		}
+
+		if (mwait < EEH_STATE_MIN_WAIT_TIME) {
+			pr_warn("%s: Firmware returned bad wait value %d\n",
+				__func__, mwait);
+			mwait = EEH_STATE_MIN_WAIT_TIME;
+		} else if (mwait > EEH_STATE_MAX_WAIT_TIME) {
+			pr_warn("%s: Firmware returned too long wait value %d\n",
+				__func__, mwait);
+			mwait = EEH_STATE_MAX_WAIT_TIME;
+		}
+
+		msleep(min(mwait, max_wait));
+		max_wait -= mwait;
+	}
+}
+
+/**
+ * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
+ * @phb: PCI controller
+ *
+ * The overall PEs form hierarchy tree. The first layer of the
+ * hierarchy tree is composed of PHB PEs. The function is used
+ * to retrieve the corresponding PHB PE according to the given PHB.
+ */
+struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
+{
+	struct eeh_pe *pe;
+
+	list_for_each_entry(pe, &eeh_phb_pe, child) {
+		/*
+		 * Actually, we needn't check the type since
+		 * the PE for PHB has been determined when that
+		 * was created.
+		 */
+		if ((pe->type & EEH_PE_PHB) && pe->phb == phb)
+			return pe;
+	}
+
+	return NULL;
+}
+
+/**
+ * eeh_pe_next - Retrieve the next PE in the tree
+ * @pe: current PE
+ * @root: root PE
+ *
+ * The function is used to retrieve the next PE in the
+ * hierarchy PE tree.
+ */
+struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root)
+{
+	struct list_head *next = pe->child_list.next;
+
+	if (next == &pe->child_list) {
+		while (1) {
+			if (pe == root)
+				return NULL;
+			next = pe->child.next;
+			if (next != &pe->parent->child_list)
+				break;
+			pe = pe->parent;
+		}
+	}
+
+	return list_entry(next, struct eeh_pe, child);
+}
+
+/**
+ * eeh_pe_traverse - Traverse PEs in the specified PHB
+ * @root: root PE
+ * @fn: callback
+ * @flag: extra parameter to callback
+ *
+ * The function is used to traverse the specified PE and its
+ * child PEs. The traversing is to be terminated once the
+ * callback returns something other than NULL, or no more PEs
+ * to be traversed.
+ */
+void *eeh_pe_traverse(struct eeh_pe *root,
+		      eeh_pe_traverse_func fn, void *flag)
+{
+	struct eeh_pe *pe;
+	void *ret;
+
+	eeh_for_each_pe(root, pe) {
+		ret = fn(pe, flag);
+		if (ret) return ret;
+	}
+
+	return NULL;
+}
+
+/**
+ * eeh_pe_dev_traverse - Traverse the devices from the PE
+ * @root: EEH PE
+ * @fn: function callback
+ * @flag: extra parameter to callback
+ *
+ * The function is used to traverse the devices of the specified
+ * PE and its child PEs.
+ */
+void eeh_pe_dev_traverse(struct eeh_pe *root,
+			  eeh_edev_traverse_func fn, void *flag)
+{
+	struct eeh_pe *pe;
+	struct eeh_dev *edev, *tmp;
+
+	if (!root) {
+		pr_warn("%s: Invalid PE %p\n",
+			__func__, root);
+		return;
+	}
+
+	/* Traverse root PE */
+	eeh_for_each_pe(root, pe)
+		eeh_pe_for_each_dev(pe, edev, tmp)
+			fn(edev, flag);
+}
+
+/**
+ * __eeh_pe_get - Check the PE address
+ *
+ * For one particular PE, it can be identified by PE address
+ * or tranditional BDF address. BDF address is composed of
+ * Bus/Device/Function number. The extra data referred by flag
+ * indicates which type of address should be used.
+ */
+static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
+{
+	int *target_pe = flag;
+
+	/* PHB PEs are special and should be ignored */
+	if (pe->type & EEH_PE_PHB)
+		return NULL;
+
+	if (*target_pe == pe->addr)
+		return pe;
+
+	return NULL;
+}
+
+/**
+ * eeh_pe_get - Search PE based on the given address
+ * @phb: PCI controller
+ * @pe_no: PE number
+ *
+ * Search the corresponding PE based on the specified address which
+ * is included in the eeh device. The function is used to check if
+ * the associated PE has been created against the PE address. It's
+ * notable that the PE address has 2 format: traditional PE address
+ * which is composed of PCI bus/device/function number, or unified
+ * PE address.
+ */
+struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no)
+{
+	struct eeh_pe *root = eeh_phb_pe_get(phb);
+
+	return eeh_pe_traverse(root, __eeh_pe_get, &pe_no);
+}
+
+/**
+ * eeh_pe_tree_insert - Add EEH device to parent PE
+ * @edev: EEH device
+ * @new_pe_parent: PE to create additional PEs under
+ *
+ * Add EEH device to the PE in edev->pe_config_addr. If a PE already
+ * exists with that address then @edev is added to that PE. Otherwise
+ * a new PE is created and inserted into the PE tree as a child of
+ * @new_pe_parent.
+ *
+ * If @new_pe_parent is NULL then the new PE will be inserted under
+ * directly under the PHB.
+ */
+int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent)
+{
+	struct pci_controller *hose = edev->controller;
+	struct eeh_pe *pe, *parent;
+
+	/*
+	 * Search the PE has been existing or not according
+	 * to the PE address. If that has been existing, the
+	 * PE should be composed of PCI bus and its subordinate
+	 * components.
+	 */
+	pe = eeh_pe_get(hose, edev->pe_config_addr);
+	if (pe) {
+		if (pe->type & EEH_PE_INVALID) {
+			list_add_tail(&edev->entry, &pe->edevs);
+			edev->pe = pe;
+			/*
+			 * We're running to here because of PCI hotplug caused by
+			 * EEH recovery. We need clear EEH_PE_INVALID until the top.
+			 */
+			parent = pe;
+			while (parent) {
+				if (!(parent->type & EEH_PE_INVALID))
+					break;
+				parent->type &= ~EEH_PE_INVALID;
+				parent = parent->parent;
+			}
+
+			eeh_edev_dbg(edev, "Added to existing PE (parent: PE#%x)\n",
+				     pe->parent->addr);
+		} else {
+			/* Mark the PE as type of PCI bus */
+			pe->type = EEH_PE_BUS;
+			edev->pe = pe;
+
+			/* Put the edev to PE */
+			list_add_tail(&edev->entry, &pe->edevs);
+			eeh_edev_dbg(edev, "Added to bus PE\n");
+		}
+		return 0;
+	}
+
+	/* Create a new EEH PE */
+	if (edev->physfn)
+		pe = eeh_pe_alloc(hose, EEH_PE_VF);
+	else
+		pe = eeh_pe_alloc(hose, EEH_PE_DEVICE);
+	if (!pe) {
+		pr_err("%s: out of memory!\n", __func__);
+		return -ENOMEM;
+	}
+
+	pe->addr = edev->pe_config_addr;
+
+	/*
+	 * Put the new EEH PE into hierarchy tree. If the parent
+	 * can't be found, the newly created PE will be attached
+	 * to PHB directly. Otherwise, we have to associate the
+	 * PE with its parent.
+	 */
+	if (!new_pe_parent) {
+		new_pe_parent = eeh_phb_pe_get(hose);
+		if (!new_pe_parent) {
+			pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
+				__func__, hose->global_number);
+			edev->pe = NULL;
+			kfree(pe);
+			return -EEXIST;
+		}
+	}
+
+	/* link new PE into the tree */
+	pe->parent = new_pe_parent;
+	list_add_tail(&pe->child, &new_pe_parent->child_list);
+
+	/*
+	 * Put the newly created PE into the child list and
+	 * link the EEH device accordingly.
+	 */
+	list_add_tail(&edev->entry, &pe->edevs);
+	edev->pe = pe;
+	eeh_edev_dbg(edev, "Added to new (parent: PE#%x)\n",
+		     new_pe_parent->addr);
+
+	return 0;
+}
+
+/**
+ * eeh_pe_tree_remove - Remove one EEH device from the associated PE
+ * @edev: EEH device
+ *
+ * The PE hierarchy tree might be changed when doing PCI hotplug.
+ * Also, the PCI devices or buses could be removed from the system
+ * during EEH recovery. So we have to call the function remove the
+ * corresponding PE accordingly if necessary.
+ */
+int eeh_pe_tree_remove(struct eeh_dev *edev)
+{
+	struct eeh_pe *pe, *parent, *child;
+	bool keep, recover;
+	int cnt;
+
+	pe = eeh_dev_to_pe(edev);
+	if (!pe) {
+		eeh_edev_dbg(edev, "No PE found for device.\n");
+		return -EEXIST;
+	}
+
+	/* Remove the EEH device */
+	edev->pe = NULL;
+	list_del(&edev->entry);
+
+	/*
+	 * Check if the parent PE includes any EEH devices.
+	 * If not, we should delete that. Also, we should
+	 * delete the parent PE if it doesn't have associated
+	 * child PEs and EEH devices.
+	 */
+	while (1) {
+		parent = pe->parent;
+
+		/* PHB PEs should never be removed */
+		if (pe->type & EEH_PE_PHB)
+			break;
+
+		/*
+		 * XXX: KEEP is set while resetting a PE. I don't think it's
+		 * ever set without RECOVERING also being set. I could
+		 * be wrong though so catch that with a WARN.
+		 */
+		keep = !!(pe->state & EEH_PE_KEEP);
+		recover = !!(pe->state & EEH_PE_RECOVERING);
+		WARN_ON(keep && !recover);
+
+		if (!keep && !recover) {
+			if (list_empty(&pe->edevs) &&
+			    list_empty(&pe->child_list)) {
+				list_del(&pe->child);
+				kfree(pe);
+			} else {
+				break;
+			}
+		} else {
+			/*
+			 * Mark the PE as invalid. At the end of the recovery
+			 * process any invalid PEs will be garbage collected.
+			 *
+			 * We need to delay the free()ing of them since we can
+			 * remove edev's while traversing the PE tree which
+			 * might trigger the removal of a PE and we can't
+			 * deal with that (yet).
+			 */
+			if (list_empty(&pe->edevs)) {
+				cnt = 0;
+				list_for_each_entry(child, &pe->child_list, child) {
+					if (!(child->type & EEH_PE_INVALID)) {
+						cnt++;
+						break;
+					}
+				}
+
+				if (!cnt)
+					pe->type |= EEH_PE_INVALID;
+				else
+					break;
+			}
+		}
+
+		pe = parent;
+	}
+
+	return 0;
+}
+
+/**
+ * eeh_pe_update_time_stamp - Update PE's frozen time stamp
+ * @pe: EEH PE
+ *
+ * We have time stamp for each PE to trace its time of getting
+ * frozen in last hour. The function should be called to update
+ * the time stamp on first error of the specific PE. On the other
+ * handle, we needn't account for errors happened in last hour.
+ */
+void eeh_pe_update_time_stamp(struct eeh_pe *pe)
+{
+	time64_t tstamp;
+
+	if (!pe) return;
+
+	if (pe->freeze_count <= 0) {
+		pe->freeze_count = 0;
+		pe->tstamp = ktime_get_seconds();
+	} else {
+		tstamp = ktime_get_seconds();
+		if (tstamp - pe->tstamp > 3600) {
+			pe->tstamp = tstamp;
+			pe->freeze_count = 0;
+		}
+	}
+}
+
+/**
+ * eeh_pe_state_mark - Mark specified state for PE and its associated device
+ * @pe: EEH PE
+ *
+ * EEH error affects the current PE and its child PEs. The function
+ * is used to mark appropriate state for the affected PEs and the
+ * associated devices.
+ */
+void eeh_pe_state_mark(struct eeh_pe *root, int state)
+{
+	struct eeh_pe *pe;
+
+	eeh_for_each_pe(root, pe)
+		if (!(pe->state & EEH_PE_REMOVED))
+			pe->state |= state;
+}
+EXPORT_SYMBOL_GPL(eeh_pe_state_mark);
+
+/**
+ * eeh_pe_mark_isolated
+ * @pe: EEH PE
+ *
+ * Record that a PE has been isolated by marking the PE and it's children as
+ * EEH_PE_ISOLATED (and EEH_PE_CFG_BLOCKED, if required) and their PCI devices
+ * as pci_channel_io_frozen.
+ */
+void eeh_pe_mark_isolated(struct eeh_pe *root)
+{
+	struct eeh_pe *pe;
+	struct eeh_dev *edev;
+	struct pci_dev *pdev;
+
+	eeh_pe_state_mark(root, EEH_PE_ISOLATED);
+	eeh_for_each_pe(root, pe) {
+		list_for_each_entry(edev, &pe->edevs, entry) {
+			pdev = eeh_dev_to_pci_dev(edev);
+			if (pdev)
+				pdev->error_state = pci_channel_io_frozen;
+		}
+		/* Block PCI config access if required */
+		if (pe->state & EEH_PE_CFG_RESTRICTED)
+			pe->state |= EEH_PE_CFG_BLOCKED;
+	}
+}
+EXPORT_SYMBOL_GPL(eeh_pe_mark_isolated);
+
+static void __eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
+{
+	int mode = *((int *)flag);
+
+	edev->mode |= mode;
+}
+
+/**
+ * eeh_pe_dev_state_mark - Mark state for all device under the PE
+ * @pe: EEH PE
+ *
+ * Mark specific state for all child devices of the PE.
+ */
+void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode)
+{
+	eeh_pe_dev_traverse(pe, __eeh_pe_dev_mode_mark, &mode);
+}
+
+/**
+ * eeh_pe_state_clear - Clear state for the PE
+ * @data: EEH PE
+ * @state: state
+ * @include_passed: include passed-through devices?
+ *
+ * The function is used to clear the indicated state from the
+ * given PE. Besides, we also clear the check count of the PE
+ * as well.
+ */
+void eeh_pe_state_clear(struct eeh_pe *root, int state, bool include_passed)
+{
+	struct eeh_pe *pe;
+	struct eeh_dev *edev, *tmp;
+	struct pci_dev *pdev;
+
+	eeh_for_each_pe(root, pe) {
+		/* Keep the state of permanently removed PE intact */
+		if (pe->state & EEH_PE_REMOVED)
+			continue;
+
+		if (!include_passed && eeh_pe_passed(pe))
+			continue;
+
+		pe->state &= ~state;
+
+		/*
+		 * Special treatment on clearing isolated state. Clear
+		 * check count since last isolation and put all affected
+		 * devices to normal state.
+		 */
+		if (!(state & EEH_PE_ISOLATED))
+			continue;
+
+		pe->check_count = 0;
+		eeh_pe_for_each_dev(pe, edev, tmp) {
+			pdev = eeh_dev_to_pci_dev(edev);
+			if (!pdev)
+				continue;
+
+			pdev->error_state = pci_channel_io_normal;
+		}
+
+		/* Unblock PCI config access if required */
+		if (pe->state & EEH_PE_CFG_RESTRICTED)
+			pe->state &= ~EEH_PE_CFG_BLOCKED;
+	}
+}
+
+/*
+ * Some PCI bridges (e.g. PLX bridges) have primary/secondary
+ * buses assigned explicitly by firmware, and we probably have
+ * lost that after reset. So we have to delay the check until
+ * the PCI-CFG registers have been restored for the parent
+ * bridge.
+ *
+ * Don't use normal PCI-CFG accessors, which probably has been
+ * blocked on normal path during the stage. So we need utilize
+ * eeh operations, which is always permitted.
+ */
+static void eeh_bridge_check_link(struct eeh_dev *edev)
+{
+	int cap;
+	uint32_t val;
+	int timeout = 0;
+
+	/*
+	 * We only check root port and downstream ports of
+	 * PCIe switches
+	 */
+	if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT)))
+		return;
+
+	eeh_edev_dbg(edev, "Checking PCIe link...\n");
+
+	/* Check slot status */
+	cap = edev->pcie_cap;
+	eeh_ops->read_config(edev, cap + PCI_EXP_SLTSTA, 2, &val);
+	if (!(val & PCI_EXP_SLTSTA_PDS)) {
+		eeh_edev_dbg(edev, "No card in the slot (0x%04x) !\n", val);
+		return;
+	}
+
+	/* Check power status if we have the capability */
+	eeh_ops->read_config(edev, cap + PCI_EXP_SLTCAP, 2, &val);
+	if (val & PCI_EXP_SLTCAP_PCP) {
+		eeh_ops->read_config(edev, cap + PCI_EXP_SLTCTL, 2, &val);
+		if (val & PCI_EXP_SLTCTL_PCC) {
+			eeh_edev_dbg(edev, "In power-off state, power it on ...\n");
+			val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
+			val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
+			eeh_ops->write_config(edev, cap + PCI_EXP_SLTCTL, 2, val);
+			msleep(2 * 1000);
+		}
+	}
+
+	/* Enable link */
+	eeh_ops->read_config(edev, cap + PCI_EXP_LNKCTL, 2, &val);
+	val &= ~PCI_EXP_LNKCTL_LD;
+	eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val);
+
+	/* Check link */
+	if (!edev->pdev->link_active_reporting) {
+		eeh_edev_dbg(edev, "No link reporting capability\n");
+		msleep(1000);
+		return;
+	}
+
+	/* Wait the link is up until timeout (5s) */
+	timeout = 0;
+	while (timeout < 5000) {
+		msleep(20);
+		timeout += 20;
+
+		eeh_ops->read_config(edev, cap + PCI_EXP_LNKSTA, 2, &val);
+		if (val & PCI_EXP_LNKSTA_DLLLA)
+			break;
+	}
+
+	if (val & PCI_EXP_LNKSTA_DLLLA)
+		eeh_edev_dbg(edev, "Link up (%s)\n",
+			 (val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB");
+	else
+		eeh_edev_dbg(edev, "Link not ready (0x%04x)\n", val);
+}
+
+#define BYTE_SWAP(OFF)	(8*((OFF)/4)+3-(OFF))
+#define SAVED_BYTE(OFF)	(((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
+
+static void eeh_restore_bridge_bars(struct eeh_dev *edev)
+{
+	int i;
+
+	/*
+	 * Device BARs: 0x10 - 0x18
+	 * Bus numbers and windows: 0x18 - 0x30
+	 */
+	for (i = 4; i < 13; i++)
+		eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]);
+	/* Rom: 0x38 */
+	eeh_ops->write_config(edev, 14*4, 4, edev->config_space[14]);
+
+	/* Cache line & Latency timer: 0xC 0xD */
+	eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1,
+                SAVED_BYTE(PCI_CACHE_LINE_SIZE));
+	eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1,
+		SAVED_BYTE(PCI_LATENCY_TIMER));
+	/* Max latency, min grant, interrupt ping and line: 0x3C */
+	eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]);
+
+	/* PCI Command: 0x4 */
+	eeh_ops->write_config(edev, PCI_COMMAND, 4, edev->config_space[1] |
+			      PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+
+	/* Check the PCIe link is ready */
+	eeh_bridge_check_link(edev);
+}
+
+static void eeh_restore_device_bars(struct eeh_dev *edev)
+{
+	int i;
+	u32 cmd;
+
+	for (i = 4; i < 10; i++)
+		eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]);
+	/* 12 == Expansion ROM Address */
+	eeh_ops->write_config(edev, 12*4, 4, edev->config_space[12]);
+
+	eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1,
+		SAVED_BYTE(PCI_CACHE_LINE_SIZE));
+	eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1,
+		SAVED_BYTE(PCI_LATENCY_TIMER));
+
+	/* max latency, min grant, interrupt pin and line */
+	eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]);
+
+	/*
+	 * Restore PERR & SERR bits, some devices require it,
+	 * don't touch the other command bits
+	 */
+	eeh_ops->read_config(edev, PCI_COMMAND, 4, &cmd);
+	if (edev->config_space[1] & PCI_COMMAND_PARITY)
+		cmd |= PCI_COMMAND_PARITY;
+	else
+		cmd &= ~PCI_COMMAND_PARITY;
+	if (edev->config_space[1] & PCI_COMMAND_SERR)
+		cmd |= PCI_COMMAND_SERR;
+	else
+		cmd &= ~PCI_COMMAND_SERR;
+	eeh_ops->write_config(edev, PCI_COMMAND, 4, cmd);
+}
+
+/**
+ * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
+ * @data: EEH device
+ * @flag: Unused
+ *
+ * Loads the PCI configuration space base address registers,
+ * the expansion ROM base address, the latency timer, and etc.
+ * from the saved values in the device node.
+ */
+static void eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
+{
+	/* Do special restore for bridges */
+	if (edev->mode & EEH_DEV_BRIDGE)
+		eeh_restore_bridge_bars(edev);
+	else
+		eeh_restore_device_bars(edev);
+
+	if (eeh_ops->restore_config)
+		eeh_ops->restore_config(edev);
+}
+
+/**
+ * eeh_pe_restore_bars - Restore the PCI config space info
+ * @pe: EEH PE
+ *
+ * This routine performs a recursive walk to the children
+ * of this device as well.
+ */
+void eeh_pe_restore_bars(struct eeh_pe *pe)
+{
+	/*
+	 * We needn't take the EEH lock since eeh_pe_dev_traverse()
+	 * will take that.
+	 */
+	eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL);
+}
+
+/**
+ * eeh_pe_loc_get - Retrieve location code binding to the given PE
+ * @pe: EEH PE
+ *
+ * Retrieve the location code of the given PE. If the primary PE bus
+ * is root bus, we will grab location code from PHB device tree node
+ * or root port. Otherwise, the upstream bridge's device tree node
+ * of the primary PE bus will be checked for the location code.
+ */
+const char *eeh_pe_loc_get(struct eeh_pe *pe)
+{
+	struct pci_bus *bus = eeh_pe_bus_get(pe);
+	struct device_node *dn;
+	const char *loc = NULL;
+
+	while (bus) {
+		dn = pci_bus_to_OF_node(bus);
+		if (!dn) {
+			bus = bus->parent;
+			continue;
+		}
+
+		if (pci_is_root_bus(bus))
+			loc = of_get_property(dn, "ibm,io-base-loc-code", NULL);
+		else
+			loc = of_get_property(dn, "ibm,slot-location-code",
+					      NULL);
+
+		if (loc)
+			return loc;
+
+		bus = bus->parent;
+	}
+
+	return "N/A";
+}
+
+/**
+ * eeh_pe_bus_get - Retrieve PCI bus according to the given PE
+ * @pe: EEH PE
+ *
+ * Retrieve the PCI bus according to the given PE. Basically,
+ * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the
+ * primary PCI bus will be retrieved. The parent bus will be
+ * returned for BUS PE. However, we don't have associated PCI
+ * bus for DEVICE PE.
+ */
+struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
+{
+	struct eeh_dev *edev;
+	struct pci_dev *pdev;
+
+	if (pe->type & EEH_PE_PHB)
+		return pe->phb->bus;
+
+	/* The primary bus might be cached during probe time */
+	if (pe->state & EEH_PE_PRI_BUS)
+		return pe->bus;
+
+	/* Retrieve the parent PCI bus of first (top) PCI device */
+	edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
+	pdev = eeh_dev_to_pci_dev(edev);
+	if (pdev)
+		return pdev->bus;
+
+	return NULL;
+}
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
new file mode 100644
index 0000000000..706e1eb95e
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Sysfs entries for PCI Error Recovery for PAPR-compliant platform.
+ * Copyright IBM Corporation 2007
+ * Copyright Linas Vepstas <linas@austin.ibm.com> 2007
+ *
+ * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
+ */
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/stat.h>
+#include <asm/ppc-pci.h>
+#include <asm/pci-bridge.h>
+
+/**
+ * EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic
+ * @_name: name of file in sysfs directory
+ * @_memb: name of member in struct eeh_dev to access
+ * @_format: printf format for display
+ *
+ * All of the attributes look very similar, so just
+ * auto-gen a cut-n-paste routine to display them.
+ */
+#define EEH_SHOW_ATTR(_name,_memb,_format)               \
+static ssize_t eeh_show_##_name(struct device *dev,      \
+		struct device_attribute *attr, char *buf)          \
+{                                                        \
+	struct pci_dev *pdev = to_pci_dev(dev);               \
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);      \
+	                                                      \
+	if (!edev)                                            \
+		return 0;                                     \
+	                                                      \
+	return sprintf(buf, _format "\n", edev->_memb);       \
+}                                                        \
+static DEVICE_ATTR(_name, 0444, eeh_show_##_name, NULL);
+
+EEH_SHOW_ATTR(eeh_mode,            mode,            "0x%x");
+EEH_SHOW_ATTR(eeh_pe_config_addr,  pe_config_addr,  "0x%x");
+
+static ssize_t eeh_pe_state_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+	int state;
+
+	if (!edev || !edev->pe)
+		return -ENODEV;
+
+	state = eeh_ops->get_state(edev->pe, NULL);
+	return sprintf(buf, "0x%08x 0x%08x\n",
+		       state, edev->pe->state);
+}
+
+static ssize_t eeh_pe_state_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+
+	if (!edev || !edev->pe)
+		return -ENODEV;
+
+	/* Nothing to do if it's not frozen */
+	if (!(edev->pe->state & EEH_PE_ISOLATED))
+		return count;
+
+	if (eeh_unfreeze_pe(edev->pe))
+		return -EIO;
+	eeh_pe_state_clear(edev->pe, EEH_PE_ISOLATED, true);
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(eeh_pe_state);
+
+#if defined(CONFIG_PCI_IOV) && defined(CONFIG_PPC_PSERIES)
+static ssize_t eeh_notify_resume_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+	struct pci_dn *pdn = pci_get_pdn(pdev);
+
+	if (!edev || !edev->pe)
+		return -ENODEV;
+
+	return sprintf(buf, "%d\n", pdn->last_allow_rc);
+}
+
+static ssize_t eeh_notify_resume_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+
+	if (!edev || !edev->pe || !eeh_ops->notify_resume)
+		return -ENODEV;
+
+	if (eeh_ops->notify_resume(edev))
+		return -EIO;
+
+	return count;
+}
+static DEVICE_ATTR_RW(eeh_notify_resume);
+
+static int eeh_notify_resume_add(struct pci_dev *pdev)
+{
+	struct device_node *np;
+	int rc = 0;
+
+	np = pci_device_to_OF_node(pdev->is_physfn ? pdev : pdev->physfn);
+
+	if (of_property_read_bool(np, "ibm,is-open-sriov-pf"))
+		rc = device_create_file(&pdev->dev, &dev_attr_eeh_notify_resume);
+
+	return rc;
+}
+
+static void eeh_notify_resume_remove(struct pci_dev *pdev)
+{
+	struct device_node *np;
+
+	np = pci_device_to_OF_node(pdev->is_physfn ? pdev : pdev->physfn);
+
+	if (of_property_read_bool(np, "ibm,is-open-sriov-pf"))
+		device_remove_file(&pdev->dev, &dev_attr_eeh_notify_resume);
+}
+#else
+static inline int eeh_notify_resume_add(struct pci_dev *pdev) { return 0; }
+static inline void eeh_notify_resume_remove(struct pci_dev *pdev) { }
+#endif /* CONFIG_PCI_IOV && CONFIG PPC_PSERIES*/
+
+void eeh_sysfs_add_device(struct pci_dev *pdev)
+{
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+	int rc=0;
+
+	if (!eeh_enabled())
+		return;
+
+	if (edev && (edev->mode & EEH_DEV_SYSFS))
+		return;
+
+	rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
+	rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
+	rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state);
+	rc += eeh_notify_resume_add(pdev);
+
+	if (rc)
+		pr_warn("EEH: Unable to create sysfs entries\n");
+	else if (edev)
+		edev->mode |= EEH_DEV_SYSFS;
+}
+
+void eeh_sysfs_remove_device(struct pci_dev *pdev)
+{
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+
+	if (!edev) {
+		WARN_ON(eeh_enabled());
+		return;
+	}
+
+	edev->mode &= ~EEH_DEV_SYSFS;
+
+	/*
+	 * The parent directory might have been removed. We needn't
+	 * continue for that case.
+	 */
+	if (!pdev->dev.kobj.sd)
+		return;
+
+	device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
+	device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
+	device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state);
+
+	eeh_notify_resume_remove(pdev);
+}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
new file mode 100644
index 0000000000..7eda33a24b
--- /dev/null
+++ b/arch/powerpc/kernel/entry_32.S
@@ -0,0 +1,512 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *  Rewritten by Cort Dougan (cort@fsmlabs.com) for PReP
+ *    Copyright (C) 1996 Cort Dougan <cort@fsmlabs.com>
+ *  Adapted for Power Macintosh by Paul Mackerras.
+ *  Low-level exception handlers and MMU support
+ *  rewritten by Paul Mackerras.
+ *    Copyright (C) 1996 Paul Mackerras.
+ *  MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
+ *
+ *  This file contains the system call entry code, context switch
+ *  code, and exception/interrupt return code for PowerPC.
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/sys.h>
+#include <linux/threads.h>
+#include <linux/linkage.h>
+
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/ptrace.h>
+#include <asm/feature-fixups.h>
+#include <asm/barrier.h>
+#include <asm/kup.h>
+#include <asm/bug.h>
+#include <asm/interrupt.h>
+
+#include "head_32.h"
+
+/*
+ * powerpc relies on return from interrupt/syscall being context synchronising
+ * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional
+ * synchronisation instructions.
+ */
+
+/*
+ * Align to 4k in order to ensure that all functions modyfing srr0/srr1
+ * fit into one page in order to not encounter a TLB miss between the
+ * modification of srr0/srr1 and the associated rfi.
+ */
+	.align	12
+
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_E500)
+	.globl	prepare_transfer_to_handler
+prepare_transfer_to_handler:
+	/* if from kernel, check interrupted DOZE/NAP mode */
+	lwz	r12,TI_LOCAL_FLAGS(r2)
+	mtcrf	0x01,r12
+	bt-	31-TLF_NAPPING,4f
+	bt-	31-TLF_SLEEPING,7f
+	blr
+
+4:	rlwinm	r12,r12,0,~_TLF_NAPPING
+	stw	r12,TI_LOCAL_FLAGS(r2)
+	b	power_save_ppc32_restore
+
+7:	rlwinm	r12,r12,0,~_TLF_SLEEPING
+	stw	r12,TI_LOCAL_FLAGS(r2)
+	lwz	r9,_MSR(r11)		/* if sleeping, clear MSR.EE */
+	rlwinm	r9,r9,0,~MSR_EE
+	lwz	r12,_LINK(r11)		/* and return to address in LR */
+	REST_GPR(2, r11)
+	b	fast_exception_return
+_ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
+#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_PPC_E500 */
+
+#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32)
+SYM_FUNC_START(__kuep_lock)
+	lwz	r9, THREAD+THSR0(r2)
+	update_user_segments_by_4 r9, r10, r11, r12
+	blr
+SYM_FUNC_END(__kuep_lock)
+
+SYM_FUNC_START_LOCAL(__kuep_unlock)
+	lwz	r9, THREAD+THSR0(r2)
+	rlwinm  r9,r9,0,~SR_NX
+	update_user_segments_by_4 r9, r10, r11, r12
+	blr
+SYM_FUNC_END(__kuep_unlock)
+
+.macro	kuep_lock
+	bl	__kuep_lock
+.endm
+.macro	kuep_unlock
+	bl	__kuep_unlock
+.endm
+#else
+.macro	kuep_lock
+.endm
+.macro	kuep_unlock
+.endm
+#endif
+
+	.globl	transfer_to_syscall
+transfer_to_syscall:
+	stw	r3, ORIG_GPR3(r1)
+	stw	r11, GPR1(r1)
+	stw	r11, 0(r1)
+	mflr	r12
+	stw	r12, _LINK(r1)
+#ifdef CONFIG_BOOKE_OR_40x
+	rlwinm	r9,r9,0,14,12		/* clear MSR_WE (necessary?) */
+#endif
+	lis	r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+	SAVE_GPR(2, r1)
+	addi	r12,r12,STACK_FRAME_REGS_MARKER@l
+	stw	r9,_MSR(r1)
+	li	r2, INTERRUPT_SYSCALL
+	stw	r12,STACK_INT_FRAME_MARKER(r1)
+	stw	r2,_TRAP(r1)
+	SAVE_GPR(0, r1)
+	SAVE_GPRS(3, 8, r1)
+	addi	r2,r10,-THREAD
+	SAVE_NVGPRS(r1)
+	kuep_lock
+
+	/* Calling convention has r3 = regs, r4 = orig r0 */
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	mr	r4,r0
+	bl	system_call_exception
+
+ret_from_syscall:
+	addi    r4,r1,STACK_INT_FRAME_REGS
+	li	r5,0
+	bl	syscall_exit_prepare
+#ifdef CONFIG_PPC_47x
+	lis	r4,icache_44x_need_flush@ha
+	lwz	r5,icache_44x_need_flush@l(r4)
+	cmplwi	cr0,r5,0
+	bne-	.L44x_icache_flush
+#endif /* CONFIG_PPC_47x */
+.L44x_icache_flush_return:
+	kuep_unlock
+	lwz	r4,_LINK(r1)
+	lwz	r5,_CCR(r1)
+	mtlr	r4
+	lwz	r7,_NIP(r1)
+	lwz	r8,_MSR(r1)
+	cmpwi	r3,0
+	REST_GPR(3, r1)
+syscall_exit_finish:
+	mtspr	SPRN_SRR0,r7
+	mtspr	SPRN_SRR1,r8
+
+	bne	3f
+	mtcr	r5
+
+1:	REST_GPR(2, r1)
+	REST_GPR(1, r1)
+	rfi
+#ifdef CONFIG_40x
+	b .	/* Prevent prefetch past rfi */
+#endif
+
+3:	mtcr	r5
+	lwz	r4,_CTR(r1)
+	lwz	r5,_XER(r1)
+	REST_NVGPRS(r1)
+	mtctr	r4
+	mtxer	r5
+	REST_GPR(0, r1)
+	REST_GPRS(3, 12, r1)
+	b	1b
+
+#ifdef CONFIG_44x
+.L44x_icache_flush:
+	li	r7,0
+	iccci	r0,r0
+	stw	r7,icache_44x_need_flush@l(r4)
+	b	.L44x_icache_flush_return
+#endif  /* CONFIG_44x */
+
+	.globl	ret_from_fork
+ret_from_fork:
+	REST_NVGPRS(r1)
+	bl	schedule_tail
+	li	r3,0	/* fork() return value */
+	b	ret_from_syscall
+
+	.globl	ret_from_kernel_user_thread
+ret_from_kernel_user_thread:
+	bl	schedule_tail
+	mtctr	r14
+	mr	r3,r15
+	PPC440EP_ERR42
+	bctrl
+	li	r3,0
+	b	ret_from_syscall
+
+	.globl	start_kernel_thread
+start_kernel_thread:
+	bl	schedule_tail
+	mtctr	r14
+	mr	r3,r15
+	PPC440EP_ERR42
+	bctrl
+	/*
+	 * This must not return. We actually want to BUG here, not WARN,
+	 * because BUG will exit the process which is what the kernel thread
+	 * should have done, which may give some hope of continuing.
+	 */
+100:	trap
+	EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
+
+	.globl	fast_exception_return
+fast_exception_return:
+#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+	andi.	r10,r9,MSR_RI		/* check for recoverable interrupt */
+	beq	3f			/* if not, we've got problems */
+#endif
+
+2:	lwz	r10,_CCR(r11)
+	REST_GPRS(1, 6, r11)
+	mtcr	r10
+	lwz	r10,_LINK(r11)
+	mtlr	r10
+	/* Clear the exception marker on the stack to avoid confusing stacktrace */
+	li	r10, 0
+	stw	r10, 8(r11)
+	REST_GPR(10, r11)
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
+	mtspr	SPRN_NRI, r0
+#endif
+	mtspr	SPRN_SRR1,r9
+	mtspr	SPRN_SRR0,r12
+	REST_GPR(9, r11)
+	REST_GPR(12, r11)
+	REST_GPR(11, r11)
+	rfi
+#ifdef CONFIG_40x
+	b .	/* Prevent prefetch past rfi */
+#endif
+_ASM_NOKPROBE_SYMBOL(fast_exception_return)
+
+/* aargh, a nonrecoverable interrupt, panic */
+/* aargh, we don't know which trap this is */
+3:
+	li	r10,-1
+	stw	r10,_TRAP(r11)
+	prepare_transfer_to_handler
+	bl	unrecoverable_exception
+	trap	/* should not get here */
+
+	.globl interrupt_return
+interrupt_return:
+	lwz	r4,_MSR(r1)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	andi.	r0,r4,MSR_PR
+	beq	.Lkernel_interrupt_return
+	bl	interrupt_exit_user_prepare
+	cmpwi	r3,0
+	kuep_unlock
+	bne-	.Lrestore_nvgprs
+
+.Lfast_user_interrupt_return:
+	lwz	r11,_NIP(r1)
+	lwz	r12,_MSR(r1)
+	mtspr	SPRN_SRR0,r11
+	mtspr	SPRN_SRR1,r12
+
+BEGIN_FTR_SECTION
+	stwcx.	r0,0,r1		/* to clear the reservation */
+FTR_SECTION_ELSE
+	lwarx	r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+	lwz	r3,_CCR(r1)
+	lwz	r4,_LINK(r1)
+	lwz	r5,_CTR(r1)
+	lwz	r6,_XER(r1)
+	li	r0,0
+
+	/*
+	 * Leaving a stale exception marker on the stack can confuse
+	 * the reliable stack unwinder later on. Clear it.
+	 */
+	stw	r0,8(r1)
+	REST_GPRS(7, 12, r1)
+
+	mtcr	r3
+	mtlr	r4
+	mtctr	r5
+	mtspr	SPRN_XER,r6
+
+	REST_GPRS(2, 6, r1)
+	REST_GPR(0, r1)
+	REST_GPR(1, r1)
+	rfi
+#ifdef CONFIG_40x
+	b .	/* Prevent prefetch past rfi */
+#endif
+
+.Lrestore_nvgprs:
+	REST_NVGPRS(r1)
+	b	.Lfast_user_interrupt_return
+
+.Lkernel_interrupt_return:
+	bl	interrupt_exit_kernel_prepare
+
+.Lfast_kernel_interrupt_return:
+	cmpwi	cr1,r3,0
+	lwz	r11,_NIP(r1)
+	lwz	r12,_MSR(r1)
+	mtspr	SPRN_SRR0,r11
+	mtspr	SPRN_SRR1,r12
+
+BEGIN_FTR_SECTION
+	stwcx.	r0,0,r1		/* to clear the reservation */
+FTR_SECTION_ELSE
+	lwarx	r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+	lwz	r3,_LINK(r1)
+	lwz	r4,_CTR(r1)
+	lwz	r5,_XER(r1)
+	lwz	r6,_CCR(r1)
+	li	r0,0
+
+	REST_GPRS(7, 12, r1)
+
+	mtlr	r3
+	mtctr	r4
+	mtspr	SPRN_XER,r5
+
+	/*
+	 * Leaving a stale exception marker on the stack can confuse
+	 * the reliable stack unwinder later on. Clear it.
+	 */
+	stw	r0,8(r1)
+
+	REST_GPRS(2, 5, r1)
+
+	bne-	cr1,1f /* emulate stack store */
+	mtcr	r6
+	REST_GPR(6, r1)
+	REST_GPR(0, r1)
+	REST_GPR(1, r1)
+	rfi
+#ifdef CONFIG_40x
+	b .	/* Prevent prefetch past rfi */
+#endif
+
+1:	/*
+	 * Emulate stack store with update. New r1 value was already calculated
+	 * and updated in our interrupt regs by emulate_loadstore, but we can't
+	 * store the previous value of r1 to the stack before re-loading our
+	 * registers from it, otherwise they could be clobbered.  Use
+	 * SPRG Scratch0 as temporary storage to hold the store
+	 * data, as interrupts are disabled here so it won't be clobbered.
+	 */
+	mtcr	r6
+#ifdef CONFIG_BOOKE
+	mtspr	SPRN_SPRG_WSCRATCH0, r9
+#else
+	mtspr	SPRN_SPRG_SCRATCH0, r9
+#endif
+	addi	r9,r1,INT_FRAME_SIZE /* get original r1 */
+	REST_GPR(6, r1)
+	REST_GPR(0, r1)
+	REST_GPR(1, r1)
+	stw	r9,0(r1) /* perform store component of stwu */
+#ifdef CONFIG_BOOKE
+	mfspr	r9, SPRN_SPRG_RSCRATCH0
+#else
+	mfspr	r9, SPRN_SPRG_SCRATCH0
+#endif
+	rfi
+#ifdef CONFIG_40x
+	b .	/* Prevent prefetch past rfi */
+#endif
+_ASM_NOKPROBE_SYMBOL(interrupt_return)
+
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+
+/*
+ * Returning from a critical interrupt in user mode doesn't need
+ * to be any different from a normal exception.  For a critical
+ * interrupt in the kernel, we just return (without checking for
+ * preemption) since the interrupt may have happened at some crucial
+ * place (e.g. inside the TLB miss handler), and because we will be
+ * running with r1 pointing into critical_stack, not the current
+ * process's kernel stack (and therefore current_thread_info() will
+ * give the wrong answer).
+ * We have to restore various SPRs that may have been in use at the
+ * time of the critical interrupt.
+ *
+ */
+#ifdef CONFIG_40x
+#define PPC_40x_TURN_OFF_MSR_DR						    \
+	/* avoid any possible TLB misses here by turning off MSR.DR, we	    \
+	 * assume the instructions here are mapped by a pinned TLB entry */ \
+	li	r10,MSR_IR;						    \
+	mtmsr	r10;							    \
+	isync;								    \
+	tophys(r1, r1);
+#else
+#define PPC_40x_TURN_OFF_MSR_DR
+#endif
+
+#define RET_FROM_EXC_LEVEL(exc_lvl_srr0, exc_lvl_srr1, exc_lvl_rfi)	\
+	REST_NVGPRS(r1);						\
+	lwz	r3,_MSR(r1);						\
+	andi.	r3,r3,MSR_PR;						\
+	bne	interrupt_return;					\
+	REST_GPR(0, r1);						\
+	REST_GPRS(2, 8, r1);						\
+	lwz	r10,_XER(r1);						\
+	lwz	r11,_CTR(r1);						\
+	mtspr	SPRN_XER,r10;						\
+	mtctr	r11;							\
+	stwcx.	r0,0,r1;		/* to clear the reservation */	\
+	lwz	r11,_LINK(r1);						\
+	mtlr	r11;							\
+	lwz	r10,_CCR(r1);						\
+	mtcrf	0xff,r10;						\
+	PPC_40x_TURN_OFF_MSR_DR;					\
+	lwz	r9,_DEAR(r1);						\
+	lwz	r10,_ESR(r1);						\
+	mtspr	SPRN_DEAR,r9;						\
+	mtspr	SPRN_ESR,r10;						\
+	lwz	r11,_NIP(r1);						\
+	lwz	r12,_MSR(r1);						\
+	mtspr	exc_lvl_srr0,r11;					\
+	mtspr	exc_lvl_srr1,r12;					\
+	REST_GPRS(9, 12, r1);						\
+	REST_GPR(1, r1);						\
+	exc_lvl_rfi;							\
+	b	.;		/* prevent prefetch past exc_lvl_rfi */
+
+#define	RESTORE_xSRR(exc_lvl_srr0, exc_lvl_srr1)			\
+	lwz	r9,_##exc_lvl_srr0(r1);					\
+	lwz	r10,_##exc_lvl_srr1(r1);				\
+	mtspr	SPRN_##exc_lvl_srr0,r9;					\
+	mtspr	SPRN_##exc_lvl_srr1,r10;
+
+#if defined(CONFIG_PPC_E500)
+#ifdef CONFIG_PHYS_64BIT
+#define	RESTORE_MAS7							\
+	lwz	r11,MAS7(r1);						\
+	mtspr	SPRN_MAS7,r11;
+#else
+#define	RESTORE_MAS7
+#endif /* CONFIG_PHYS_64BIT */
+#define RESTORE_MMU_REGS						\
+	lwz	r9,MAS0(r1);						\
+	lwz	r10,MAS1(r1);						\
+	lwz	r11,MAS2(r1);						\
+	mtspr	SPRN_MAS0,r9;						\
+	lwz	r9,MAS3(r1);						\
+	mtspr	SPRN_MAS1,r10;						\
+	lwz	r10,MAS6(r1);						\
+	mtspr	SPRN_MAS2,r11;						\
+	mtspr	SPRN_MAS3,r9;						\
+	mtspr	SPRN_MAS6,r10;						\
+	RESTORE_MAS7;
+#elif defined(CONFIG_44x)
+#define RESTORE_MMU_REGS						\
+	lwz	r9,MMUCR(r1);						\
+	mtspr	SPRN_MMUCR,r9;
+#else
+#define RESTORE_MMU_REGS
+#endif
+
+#ifdef CONFIG_40x
+	.globl	ret_from_crit_exc
+ret_from_crit_exc:
+	lis	r9,crit_srr0@ha;
+	lwz	r9,crit_srr0@l(r9);
+	lis	r10,crit_srr1@ha;
+	lwz	r10,crit_srr1@l(r10);
+	mtspr	SPRN_SRR0,r9;
+	mtspr	SPRN_SRR1,r10;
+	RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI)
+_ASM_NOKPROBE_SYMBOL(ret_from_crit_exc)
+#endif /* CONFIG_40x */
+
+#ifdef CONFIG_BOOKE
+	.globl	ret_from_crit_exc
+ret_from_crit_exc:
+	RESTORE_xSRR(SRR0,SRR1);
+	RESTORE_MMU_REGS;
+	RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI)
+_ASM_NOKPROBE_SYMBOL(ret_from_crit_exc)
+
+	.globl	ret_from_debug_exc
+ret_from_debug_exc:
+	RESTORE_xSRR(SRR0,SRR1);
+	RESTORE_xSRR(CSRR0,CSRR1);
+	RESTORE_MMU_REGS;
+	RET_FROM_EXC_LEVEL(SPRN_DSRR0, SPRN_DSRR1, PPC_RFDI)
+_ASM_NOKPROBE_SYMBOL(ret_from_debug_exc)
+
+	.globl	ret_from_mcheck_exc
+ret_from_mcheck_exc:
+	RESTORE_xSRR(SRR0,SRR1);
+	RESTORE_xSRR(CSRR0,CSRR1);
+	RESTORE_xSRR(DSRR0,DSRR1);
+	RESTORE_MMU_REGS;
+	RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI)
+_ASM_NOKPROBE_SYMBOL(ret_from_mcheck_exc)
+#endif /* CONFIG_BOOKE */
+#endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S
new file mode 100644
index 0000000000..1a9b5ae8cc
--- /dev/null
+++ b/arch/powerpc/kernel/epapr_hcalls.S
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/export.h>
+#include <linux/threads.h>
+#include <asm/epapr_hcalls.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-compat.h>
+#include <asm/asm-offsets.h>
+
+#ifndef CONFIG_PPC64
+/* epapr_ev_idle() was derived from e500_idle() */
+_GLOBAL(epapr_ev_idle)
+	PPC_LL	r4, TI_LOCAL_FLAGS(r2)	/* set napping bit */
+	ori	r4, r4,_TLF_NAPPING	/* so when we take an exception */
+	PPC_STL	r4, TI_LOCAL_FLAGS(r2)	/* it will return to our caller */
+
+#ifdef CONFIG_BOOKE_OR_40x
+	wrteei	1
+#else
+	mfmsr	r4
+	ori	r4, r4, MSR_EE
+	mtmsr	r4
+#endif
+
+idle_loop:
+	LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE))
+
+.global epapr_ev_idle_start
+epapr_ev_idle_start:
+	li	r3, -1
+	nop
+	nop
+	nop
+
+	/*
+	 * Guard against spurious wakeups from a hypervisor --
+	 * only interrupt will cause us to return to LR due to
+	 * _TLF_NAPPING.
+	 */
+	b	idle_loop
+#endif
+
+/* Hypercall entry point. Will be patched with device tree instructions. */
+.global epapr_hypercall_start
+epapr_hypercall_start:
+	li	r3, -1
+	nop
+	nop
+	nop
+	blr
+EXPORT_SYMBOL(epapr_hypercall_start)
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
new file mode 100644
index 0000000000..d4b8aff208
--- /dev/null
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ePAPR para-virtualization support.
+ *
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <asm/epapr_hcalls.h>
+#include <asm/cacheflush.h>
+#include <asm/code-patching.h>
+#include <asm/machdep.h>
+#include <asm/inst.h>
+
+#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
+extern void epapr_ev_idle(void);
+extern u32 epapr_ev_idle_start[];
+#endif
+
+bool epapr_paravirt_enabled;
+static bool __maybe_unused epapr_has_idle;
+
+static int __init early_init_dt_scan_epapr(unsigned long node,
+					   const char *uname,
+					   int depth, void *data)
+{
+	const u32 *insts;
+	int len;
+	int i;
+
+	insts = of_get_flat_dt_prop(node, "hcall-instructions", &len);
+	if (!insts)
+		return 0;
+
+	if (len % 4 || len > (4 * 4))
+		return -1;
+
+	for (i = 0; i < (len / 4); i++) {
+		ppc_inst_t inst = ppc_inst(be32_to_cpu(insts[i]));
+		patch_instruction(epapr_hypercall_start + i, inst);
+#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
+		patch_instruction(epapr_ev_idle_start + i, inst);
+#endif
+	}
+
+#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
+	if (of_get_flat_dt_prop(node, "has-idle", NULL))
+		epapr_has_idle = true;
+#endif
+
+	epapr_paravirt_enabled = true;
+
+	return 1;
+}
+
+int __init epapr_paravirt_early_init(void)
+{
+	of_scan_flat_dt(early_init_dt_scan_epapr, NULL);
+
+	return 0;
+}
+
+static int __init epapr_idle_init(void)
+{
+#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
+	if (epapr_has_idle)
+		ppc_md.power_save = epapr_ev_idle;
+#endif
+
+	return 0;
+}
+
+postcore_initcall(epapr_idle_init);
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
new file mode 100644
index 0000000000..7ab4c8c0f1
--- /dev/null
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -0,0 +1,1556 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Boot code and exception vectors for Book3E processors
+ *
+ *  Copyright (C) 2007 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
+ */
+
+#include <linux/linkage.h>
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cputable.h>
+#include <asm/setup.h>
+#include <asm/thread_info.h>
+#include <asm/reg_a2.h>
+#include <asm/exception-64e.h>
+#include <asm/bug.h>
+#include <asm/irqflags.h>
+#include <asm/ptrace.h>
+#include <asm/ppc-opcode.h>
+#include <asm/mmu.h>
+#include <asm/hw_irq.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_booke_hv_asm.h>
+#include <asm/feature-fixups.h>
+#include <asm/context_tracking.h>
+
+/* 64e interrupt returns always use SRR registers */
+#define fast_interrupt_return fast_interrupt_return_srr
+#define interrupt_return interrupt_return_srr
+
+/* XXX This will ultimately add space for a special exception save
+ *     structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...
+ *     when taking special interrupts. For now we don't support that,
+ *     special interrupts from within a non-standard level will probably
+ *     blow you up
+ */
+#define SPECIAL_EXC_SRR0	0
+#define SPECIAL_EXC_SRR1	1
+#define SPECIAL_EXC_SPRG_GEN	2
+#define SPECIAL_EXC_SPRG_TLB	3
+#define SPECIAL_EXC_MAS0	4
+#define SPECIAL_EXC_MAS1	5
+#define SPECIAL_EXC_MAS2	6
+#define SPECIAL_EXC_MAS3	7
+#define SPECIAL_EXC_MAS6	8
+#define SPECIAL_EXC_MAS7	9
+#define SPECIAL_EXC_MAS5	10	/* E.HV only */
+#define SPECIAL_EXC_MAS8	11	/* E.HV only */
+#define SPECIAL_EXC_IRQHAPPENED	12
+#define SPECIAL_EXC_DEAR	13
+#define SPECIAL_EXC_ESR		14
+#define SPECIAL_EXC_SOFTE	15
+#define SPECIAL_EXC_CSRR0	16
+#define SPECIAL_EXC_CSRR1	17
+/* must be even to keep 16-byte stack alignment */
+#define SPECIAL_EXC_END		18
+
+#define SPECIAL_EXC_FRAME_SIZE	(INT_FRAME_SIZE + SPECIAL_EXC_END * 8)
+#define SPECIAL_EXC_FRAME_OFFS  (INT_FRAME_SIZE - 288)
+
+#define SPECIAL_EXC_STORE(reg, name) \
+	std	reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1)
+
+#define SPECIAL_EXC_LOAD(reg, name) \
+	ld	reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1)
+
+SYM_CODE_START_LOCAL(special_reg_save)
+	/*
+	 * We only need (or have stack space) to save this stuff if
+	 * we interrupted the kernel.
+	 */
+	ld	r3,_MSR(r1)
+	andi.	r3,r3,MSR_PR
+	bnelr
+
+	/*
+	 * Advance to the next TLB exception frame for handler
+	 * types that don't do it automatically.
+	 */
+	LOAD_REG_ADDR(r11,extlb_level_exc)
+	lwz	r12,0(r11)
+	mfspr	r10,SPRN_SPRG_TLB_EXFRAME
+	add	r10,r10,r12
+	mtspr	SPRN_SPRG_TLB_EXFRAME,r10
+
+	/*
+	 * Save registers needed to allow nesting of certain exceptions
+	 * (such as TLB misses) inside special exception levels
+	 */
+	mfspr	r10,SPRN_SRR0
+	SPECIAL_EXC_STORE(r10,SRR0)
+	mfspr	r10,SPRN_SRR1
+	SPECIAL_EXC_STORE(r10,SRR1)
+	mfspr	r10,SPRN_SPRG_GEN_SCRATCH
+	SPECIAL_EXC_STORE(r10,SPRG_GEN)
+	mfspr	r10,SPRN_SPRG_TLB_SCRATCH
+	SPECIAL_EXC_STORE(r10,SPRG_TLB)
+	mfspr	r10,SPRN_MAS0
+	SPECIAL_EXC_STORE(r10,MAS0)
+	mfspr	r10,SPRN_MAS1
+	SPECIAL_EXC_STORE(r10,MAS1)
+	mfspr	r10,SPRN_MAS2
+	SPECIAL_EXC_STORE(r10,MAS2)
+	mfspr	r10,SPRN_MAS3
+	SPECIAL_EXC_STORE(r10,MAS3)
+	mfspr	r10,SPRN_MAS6
+	SPECIAL_EXC_STORE(r10,MAS6)
+	mfspr	r10,SPRN_MAS7
+	SPECIAL_EXC_STORE(r10,MAS7)
+BEGIN_FTR_SECTION
+	mfspr	r10,SPRN_MAS5
+	SPECIAL_EXC_STORE(r10,MAS5)
+	mfspr	r10,SPRN_MAS8
+	SPECIAL_EXC_STORE(r10,MAS8)
+
+	/* MAS5/8 could have inappropriate values if we interrupted KVM code */
+	li	r10,0
+	mtspr	SPRN_MAS5,r10
+	mtspr	SPRN_MAS8,r10
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+	mfspr	r10,SPRN_DEAR
+	SPECIAL_EXC_STORE(r10,DEAR)
+	mfspr	r10,SPRN_ESR
+	SPECIAL_EXC_STORE(r10,ESR)
+
+	ld	r10,_NIP(r1)
+	SPECIAL_EXC_STORE(r10,CSRR0)
+	ld	r10,_MSR(r1)
+	SPECIAL_EXC_STORE(r10,CSRR1)
+
+	blr
+SYM_CODE_END(special_reg_save)
+
+SYM_CODE_START_LOCAL(ret_from_level_except)
+	ld	r3,_MSR(r1)
+	andi.	r3,r3,MSR_PR
+	beq	1f
+	REST_NVGPRS(r1)
+	b	interrupt_return
+1:
+
+	LOAD_REG_ADDR(r11,extlb_level_exc)
+	lwz	r12,0(r11)
+	mfspr	r10,SPRN_SPRG_TLB_EXFRAME
+	sub	r10,r10,r12
+	mtspr	SPRN_SPRG_TLB_EXFRAME,r10
+
+	/*
+	 * It's possible that the special level exception interrupted a
+	 * TLB miss handler, and inserted the same entry that the
+	 * interrupted handler was about to insert.  On CPUs without TLB
+	 * write conditional, this can result in a duplicate TLB entry.
+	 * Wipe all non-bolted entries to be safe.
+	 *
+	 * Note that this doesn't protect against any TLB misses
+	 * we may take accessing the stack from here to the end of
+	 * the special level exception.  It's not clear how we can
+	 * reasonably protect against that, but only CPUs with
+	 * neither TLB write conditional nor bolted kernel memory
+	 * are affected.  Do any such CPUs even exist?
+	 */
+	PPC_TLBILX_ALL(0,R0)
+
+	REST_NVGPRS(r1)
+
+	SPECIAL_EXC_LOAD(r10,SRR0)
+	mtspr	SPRN_SRR0,r10
+	SPECIAL_EXC_LOAD(r10,SRR1)
+	mtspr	SPRN_SRR1,r10
+	SPECIAL_EXC_LOAD(r10,SPRG_GEN)
+	mtspr	SPRN_SPRG_GEN_SCRATCH,r10
+	SPECIAL_EXC_LOAD(r10,SPRG_TLB)
+	mtspr	SPRN_SPRG_TLB_SCRATCH,r10
+	SPECIAL_EXC_LOAD(r10,MAS0)
+	mtspr	SPRN_MAS0,r10
+	SPECIAL_EXC_LOAD(r10,MAS1)
+	mtspr	SPRN_MAS1,r10
+	SPECIAL_EXC_LOAD(r10,MAS2)
+	mtspr	SPRN_MAS2,r10
+	SPECIAL_EXC_LOAD(r10,MAS3)
+	mtspr	SPRN_MAS3,r10
+	SPECIAL_EXC_LOAD(r10,MAS6)
+	mtspr	SPRN_MAS6,r10
+	SPECIAL_EXC_LOAD(r10,MAS7)
+	mtspr	SPRN_MAS7,r10
+BEGIN_FTR_SECTION
+	SPECIAL_EXC_LOAD(r10,MAS5)
+	mtspr	SPRN_MAS5,r10
+	SPECIAL_EXC_LOAD(r10,MAS8)
+	mtspr	SPRN_MAS8,r10
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+
+	SPECIAL_EXC_LOAD(r10,DEAR)
+	mtspr	SPRN_DEAR,r10
+	SPECIAL_EXC_LOAD(r10,ESR)
+	mtspr	SPRN_ESR,r10
+
+	stdcx.	r0,0,r1		/* to clear the reservation */
+
+	REST_GPRS(2, 9, r1)
+
+	ld	r10,_CTR(r1)
+	ld	r11,_XER(r1)
+	mtctr	r10
+	mtxer	r11
+
+	blr
+SYM_CODE_END(ret_from_level_except)
+
+.macro ret_from_level srr0 srr1 paca_ex scratch
+	bl	ret_from_level_except
+
+	ld	r10,_LINK(r1)
+	ld	r11,_CCR(r1)
+	ld	r0,GPR13(r1)
+	mtlr	r10
+	mtcr	r11
+
+	REST_GPRS(10, 12, r1)
+	mtspr	\scratch,r0
+
+	std	r10,\paca_ex+EX_R10(r13);
+	std	r11,\paca_ex+EX_R11(r13);
+	ld	r10,_NIP(r1)
+	ld	r11,_MSR(r1)
+	REST_GPR(0, r1)
+	REST_GPR(1, r1)
+	mtspr	\srr0,r10
+	mtspr	\srr1,r11
+	ld	r10,\paca_ex+EX_R10(r13)
+	ld	r11,\paca_ex+EX_R11(r13)
+	mfspr	r13,\scratch
+.endm
+
+SYM_CODE_START_LOCAL(ret_from_crit_except)
+	ret_from_level SPRN_CSRR0 SPRN_CSRR1 PACA_EXCRIT SPRN_SPRG_CRIT_SCRATCH
+	rfci
+SYM_CODE_END(ret_from_crit_except)
+
+SYM_CODE_START_LOCAL(ret_from_mc_except)
+	ret_from_level SPRN_MCSRR0 SPRN_MCSRR1 PACA_EXMC SPRN_SPRG_MC_SCRATCH
+	rfmci
+SYM_CODE_END(ret_from_mc_except)
+
+/* Exception prolog code for all exceptions */
+#define EXCEPTION_PROLOG(n, intnum, type, addition)	    		    \
+	mtspr	SPRN_SPRG_##type##_SCRATCH,r13;	/* get spare registers */   \
+	mfspr	r13,SPRN_SPRG_PACA;	/* get PACA */			    \
+	std	r10,PACA_EX##type+EX_R10(r13);				    \
+	std	r11,PACA_EX##type+EX_R11(r13);				    \
+	mfcr	r10;			/* save CR */			    \
+	mfspr	r11,SPRN_##type##_SRR1;/* what are we coming from */	    \
+	DO_KVM	intnum,SPRN_##type##_SRR1;    /* KVM hook */		    \
+	stw	r10,PACA_EX##type+EX_CR(r13); /* save old CR in the PACA */ \
+	addition;			/* additional code for that exc. */ \
+	std	r1,PACA_EX##type+EX_R1(r13); /* save old r1 in the PACA */  \
+	type##_SET_KSTACK;		/* get special stack if necessary */\
+	andi.	r10,r11,MSR_PR;		/* save stack pointer */	    \
+	beq	1f;			/* branch around if supervisor */   \
+	ld	r1,PACAKSAVE(r13);	/* get kernel stack coming from usr */\
+1:	type##_BTB_FLUSH		\
+	cmpdi	cr1,r1,0;		/* check if SP makes sense */	    \
+	bge-	cr1,exc_##n##_bad_stack;/* bad stack (TODO: out of line) */ \
+	mfspr	r10,SPRN_##type##_SRR0;	/* read SRR0 before touching stack */
+
+/* Exception type-specific macros */
+#define	GEN_SET_KSTACK							    \
+	subi	r1,r1,INT_FRAME_SIZE;	/* alloc frame on kernel stack */
+#define SPRN_GEN_SRR0	SPRN_SRR0
+#define SPRN_GEN_SRR1	SPRN_SRR1
+
+#define	GDBELL_SET_KSTACK	GEN_SET_KSTACK
+#define SPRN_GDBELL_SRR0	SPRN_GSRR0
+#define SPRN_GDBELL_SRR1	SPRN_GSRR1
+
+#define CRIT_SET_KSTACK						            \
+	ld	r1,PACA_CRIT_STACK(r13);				    \
+	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE
+#define SPRN_CRIT_SRR0	SPRN_CSRR0
+#define SPRN_CRIT_SRR1	SPRN_CSRR1
+
+#define DBG_SET_KSTACK						            \
+	ld	r1,PACA_DBG_STACK(r13);					    \
+	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE
+#define SPRN_DBG_SRR0	SPRN_DSRR0
+#define SPRN_DBG_SRR1	SPRN_DSRR1
+
+#define MC_SET_KSTACK						            \
+	ld	r1,PACA_MC_STACK(r13);					    \
+	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE
+#define SPRN_MC_SRR0	SPRN_MCSRR0
+#define SPRN_MC_SRR1	SPRN_MCSRR1
+
+#define GEN_BTB_FLUSH			\
+	START_BTB_FLUSH_SECTION		\
+		beq 1f;			\
+		BTB_FLUSH(r10)			\
+		1:		\
+	END_BTB_FLUSH_SECTION
+
+#define CRIT_BTB_FLUSH			\
+	START_BTB_FLUSH_SECTION		\
+		BTB_FLUSH(r10)		\
+	END_BTB_FLUSH_SECTION
+
+#define DBG_BTB_FLUSH CRIT_BTB_FLUSH
+#define MC_BTB_FLUSH CRIT_BTB_FLUSH
+#define GDBELL_BTB_FLUSH GEN_BTB_FLUSH
+
+#define NORMAL_EXCEPTION_PROLOG(n, intnum, addition)			    \
+	EXCEPTION_PROLOG(n, intnum, GEN, addition##_GEN(n))
+
+#define CRIT_EXCEPTION_PROLOG(n, intnum, addition)			    \
+	EXCEPTION_PROLOG(n, intnum, CRIT, addition##_CRIT(n))
+
+#define DBG_EXCEPTION_PROLOG(n, intnum, addition)			    \
+	EXCEPTION_PROLOG(n, intnum, DBG, addition##_DBG(n))
+
+#define MC_EXCEPTION_PROLOG(n, intnum, addition)			    \
+	EXCEPTION_PROLOG(n, intnum, MC, addition##_MC(n))
+
+#define GDBELL_EXCEPTION_PROLOG(n, intnum, addition)			    \
+	EXCEPTION_PROLOG(n, intnum, GDBELL, addition##_GDBELL(n))
+
+/* Variants of the "addition" argument for the prolog
+ */
+#define PROLOG_ADDITION_NONE_GEN(n)
+#define PROLOG_ADDITION_NONE_GDBELL(n)
+#define PROLOG_ADDITION_NONE_CRIT(n)
+#define PROLOG_ADDITION_NONE_DBG(n)
+#define PROLOG_ADDITION_NONE_MC(n)
+
+#define PROLOG_ADDITION_MASKABLE_GEN(n)					    \
+	lbz	r10,PACAIRQSOFTMASK(r13);	/* are irqs soft-masked? */ \
+	andi.	r10,r10,IRQS_DISABLED;	/* yes -> go out of line */ \
+	bne	masked_interrupt_book3e_##n
+
+/*
+ * Additional regs must be re-loaded from paca before EXCEPTION_COMMON* is
+ * called, because that does SAVE_NVGPRS which must see the original register
+ * values, otherwise the scratch values might be restored when exiting the
+ * interrupt.
+ */
+#define PROLOG_ADDITION_2REGS_GEN(n)					    \
+	std	r14,PACA_EXGEN+EX_R14(r13);				    \
+	std	r15,PACA_EXGEN+EX_R15(r13)
+
+#define PROLOG_ADDITION_1REG_GEN(n)					    \
+	std	r14,PACA_EXGEN+EX_R14(r13);
+
+#define PROLOG_ADDITION_2REGS_CRIT(n)					    \
+	std	r14,PACA_EXCRIT+EX_R14(r13);				    \
+	std	r15,PACA_EXCRIT+EX_R15(r13)
+
+#define PROLOG_ADDITION_2REGS_DBG(n)					    \
+	std	r14,PACA_EXDBG+EX_R14(r13);				    \
+	std	r15,PACA_EXDBG+EX_R15(r13)
+
+#define PROLOG_ADDITION_2REGS_MC(n)					    \
+	std	r14,PACA_EXMC+EX_R14(r13);				    \
+	std	r15,PACA_EXMC+EX_R15(r13)
+
+/* Core exception code for all exceptions except TLB misses. */
+#define EXCEPTION_COMMON_LVL(n, scratch, excf)				    \
+exc_##n##_common:							    \
+	SAVE_GPR(0, r1);		/* save r0 in stackframe */	    \
+	SAVE_GPRS(2, 9, r1);		/* save r2 - r9 in stackframe */    \
+	std	r10,_NIP(r1);		/* save SRR0 to stackframe */	    \
+	std	r11,_MSR(r1);		/* save SRR1 to stackframe */	    \
+	beq	2f;			/* if from kernel mode */	    \
+2:	ld	r3,excf+EX_R10(r13);	/* get back r10 */		    \
+	ld	r4,excf+EX_R11(r13);	/* get back r11 */		    \
+	mfspr	r5,scratch;		/* get back r13 */		    \
+	SAVE_GPR(12, r1);		/* save r12 in stackframe */	    \
+	LOAD_PACA_TOC();		/* get kernel TOC into r2 */	    \
+	mflr	r6;			/* save LR in stackframe */	    \
+	mfctr	r7;			/* save CTR in stackframe */	    \
+	mfspr	r8,SPRN_XER;		/* save XER in stackframe */	    \
+	ld	r9,excf+EX_R1(r13);	/* load orig r1 back from PACA */   \
+	lwz	r10,excf+EX_CR(r13);	/* load orig CR back from PACA	*/  \
+	lbz	r11,PACAIRQSOFTMASK(r13); /* get current IRQ softe */	    \
+	LOAD_REG_IMMEDIATE(r12, STACK_FRAME_REGS_MARKER);		    \
+	ZEROIZE_GPR(0);							    \
+	std	r3,GPR10(r1);		/* save r10 to stackframe */	    \
+	std	r4,GPR11(r1);		/* save r11 to stackframe */	    \
+	std	r5,GPR13(r1);		/* save it to stackframe */	    \
+	std	r6,_LINK(r1);						    \
+	std	r7,_CTR(r1);						    \
+	std	r8,_XER(r1);						    \
+	li	r3,(n);			/* regs.trap vector */		    \
+	std	r9,0(r1);		/* store stack frame back link */   \
+	std	r10,_CCR(r1);		/* store orig CR in stackframe */   \
+	std	r9,GPR1(r1);		/* store stack frame back link */   \
+	std	r11,SOFTE(r1);		/* and save it to stackframe */     \
+	std	r12,STACK_INT_FRAME_MARKER(r1); /* mark the frame */	    \
+	std	r3,_TRAP(r1);		/* set trap number		*/  \
+	std	r0,RESULT(r1);		/* clear regs->result */	    \
+	SAVE_NVGPRS(r1);						    \
+	SANITIZE_NVGPRS();		/* minimise speculation influence */
+
+#define EXCEPTION_COMMON(n) \
+	EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN)
+#define EXCEPTION_COMMON_CRIT(n) \
+	EXCEPTION_COMMON_LVL(n, SPRN_SPRG_CRIT_SCRATCH, PACA_EXCRIT)
+#define EXCEPTION_COMMON_MC(n) \
+	EXCEPTION_COMMON_LVL(n, SPRN_SPRG_MC_SCRATCH, PACA_EXMC)
+#define EXCEPTION_COMMON_DBG(n) \
+	EXCEPTION_COMMON_LVL(n, SPRN_SPRG_DBG_SCRATCH, PACA_EXDBG)
+
+/* XXX FIXME: Restore r14/r15 when necessary */
+#define BAD_STACK_TRAMPOLINE(n)						    \
+exc_##n##_bad_stack:							    \
+	li	r1,(n);			/* get exception number */	    \
+	sth	r1,PACA_TRAP_SAVE(r13);	/* store trap */		    \
+	b	bad_stack_book3e;	/* bad stack error */
+
+/* WARNING: If you change the layout of this stub, make sure you check
+	*   the debug exception handler which handles single stepping
+	*   into exceptions from userspace, and the MM code in
+	*   arch/powerpc/mm/tlb_nohash.c which patches the branch here
+	*   and would need to be updated if that branch is moved
+	*/
+#define	EXCEPTION_STUB(loc, label)					\
+	. = interrupt_base_book3e + loc;				\
+	nop;	/* To make debug interrupts happy */			\
+	b	exc_##label##_book3e;
+
+#define ACK_NONE(r)
+#define ACK_DEC(r)							\
+	lis	r,TSR_DIS@h;						\
+	mtspr	SPRN_TSR,r
+#define ACK_FIT(r)							\
+	lis	r,TSR_FIS@h;						\
+	mtspr	SPRN_TSR,r
+
+/* Used by asynchronous interrupt that may happen in the idle loop.
+ *
+ * This check if the thread was in the idle loop, and if yes, returns
+ * to the caller rather than the PC. This is to avoid a race if
+ * interrupts happen before the wait instruction.
+ */
+#define CHECK_NAPPING()							\
+	ld	r11, PACA_THREAD_INFO(r13);				\
+	ld	r10,TI_LOCAL_FLAGS(r11);				\
+	andi.	r9,r10,_TLF_NAPPING;					\
+	beq+	1f;							\
+	ld	r8,_LINK(r1);						\
+	rlwinm	r7,r10,0,~_TLF_NAPPING;					\
+	std	r8,_NIP(r1);						\
+	std	r7,TI_LOCAL_FLAGS(r11);					\
+1:
+
+
+#define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack)		\
+	START_EXCEPTION(label);						\
+	NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\
+	EXCEPTION_COMMON(trapnum)					\
+	ack(r8);							\
+	CHECK_NAPPING();						\
+	addi	r3,r1,STACK_INT_FRAME_REGS;				\
+	bl	hdlr;							\
+	b	interrupt_return
+
+/*
+ * And here we have the exception vectors !
+ */
+
+	.text
+	.balign	0x1000
+	.globl interrupt_base_book3e
+interrupt_base_book3e:					/* fake trap */
+	EXCEPTION_STUB(0x000, machine_check)
+	EXCEPTION_STUB(0x020, critical_input)		/* 0x0100 */
+	EXCEPTION_STUB(0x040, debug_crit)		/* 0x0d00 */
+	EXCEPTION_STUB(0x060, data_storage)		/* 0x0300 */
+	EXCEPTION_STUB(0x080, instruction_storage)	/* 0x0400 */
+	EXCEPTION_STUB(0x0a0, external_input)		/* 0x0500 */
+	EXCEPTION_STUB(0x0c0, alignment)		/* 0x0600 */
+	EXCEPTION_STUB(0x0e0, program)			/* 0x0700 */
+	EXCEPTION_STUB(0x100, fp_unavailable)		/* 0x0800 */
+	EXCEPTION_STUB(0x120, system_call)		/* 0x0c00 */
+	EXCEPTION_STUB(0x140, ap_unavailable)		/* 0x0f20 */
+	EXCEPTION_STUB(0x160, decrementer)		/* 0x0900 */
+	EXCEPTION_STUB(0x180, fixed_interval)		/* 0x0980 */
+	EXCEPTION_STUB(0x1a0, watchdog)			/* 0x09f0 */
+	EXCEPTION_STUB(0x1c0, data_tlb_miss)
+	EXCEPTION_STUB(0x1e0, instruction_tlb_miss)
+	EXCEPTION_STUB(0x200, altivec_unavailable)
+	EXCEPTION_STUB(0x220, altivec_assist)
+	EXCEPTION_STUB(0x260, perfmon)
+	EXCEPTION_STUB(0x280, doorbell)
+	EXCEPTION_STUB(0x2a0, doorbell_crit)
+	EXCEPTION_STUB(0x2c0, guest_doorbell)
+	EXCEPTION_STUB(0x2e0, guest_doorbell_crit)
+	EXCEPTION_STUB(0x300, hypercall)
+	EXCEPTION_STUB(0x320, ehpriv)
+	EXCEPTION_STUB(0x340, lrat_error)
+
+	.globl __end_interrupts
+__end_interrupts:
+
+/* Critical Input Interrupt */
+	START_EXCEPTION(critical_input);
+	CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL,
+			      PROLOG_ADDITION_NONE)
+	EXCEPTION_COMMON_CRIT(0x100)
+	bl	special_reg_save
+	CHECK_NAPPING();
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	unknown_nmi_exception
+	b	ret_from_crit_except
+
+/* Machine Check Interrupt */
+	START_EXCEPTION(machine_check);
+	MC_EXCEPTION_PROLOG(0x000, BOOKE_INTERRUPT_MACHINE_CHECK,
+			    PROLOG_ADDITION_NONE)
+	EXCEPTION_COMMON_MC(0x000)
+	bl	special_reg_save
+	CHECK_NAPPING();
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	machine_check_exception
+	b	ret_from_mc_except
+
+/* Data Storage Interrupt */
+	START_EXCEPTION(data_storage)
+	NORMAL_EXCEPTION_PROLOG(0x300, BOOKE_INTERRUPT_DATA_STORAGE,
+				PROLOG_ADDITION_2REGS)
+	mfspr	r14,SPRN_DEAR
+	mfspr	r15,SPRN_ESR
+	std	r14,_DEAR(r1)
+	std	r15,_ESR(r1)
+	ld	r14,PACA_EXGEN+EX_R14(r13)
+	ld	r15,PACA_EXGEN+EX_R15(r13)
+	EXCEPTION_COMMON(0x300)
+	b	storage_fault_common
+
+/* Instruction Storage Interrupt */
+	START_EXCEPTION(instruction_storage);
+	NORMAL_EXCEPTION_PROLOG(0x400, BOOKE_INTERRUPT_INST_STORAGE,
+				PROLOG_ADDITION_2REGS)
+	li	r15,0
+	mr	r14,r10
+	std	r14,_DEAR(r1)
+	std	r15,_ESR(r1)
+	ld	r14,PACA_EXGEN+EX_R14(r13)
+	ld	r15,PACA_EXGEN+EX_R15(r13)
+	EXCEPTION_COMMON(0x400)
+	b	storage_fault_common
+
+/* External Input Interrupt */
+	MASKABLE_EXCEPTION(0x500, BOOKE_INTERRUPT_EXTERNAL,
+			   external_input, do_IRQ, ACK_NONE)
+
+/* Alignment */
+	START_EXCEPTION(alignment);
+	NORMAL_EXCEPTION_PROLOG(0x600, BOOKE_INTERRUPT_ALIGNMENT,
+				PROLOG_ADDITION_2REGS)
+	mfspr	r14,SPRN_DEAR
+	mfspr	r15,SPRN_ESR
+	std	r14,_DEAR(r1)
+	std	r15,_ESR(r1)
+	ld	r14,PACA_EXGEN+EX_R14(r13)
+	ld	r15,PACA_EXGEN+EX_R15(r13)
+	EXCEPTION_COMMON(0x600)
+	b	alignment_more	/* no room, go out of line */
+
+/* Program Interrupt */
+	START_EXCEPTION(program);
+	NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,
+				PROLOG_ADDITION_1REG)
+	mfspr	r14,SPRN_ESR
+	std	r14,_ESR(r1)
+	ld	r14,PACA_EXGEN+EX_R14(r13)
+	EXCEPTION_COMMON(0x700)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	program_check_exception
+	REST_NVGPRS(r1)
+	b	interrupt_return
+
+/* Floating Point Unavailable Interrupt */
+	START_EXCEPTION(fp_unavailable);
+	NORMAL_EXCEPTION_PROLOG(0x800, BOOKE_INTERRUPT_FP_UNAVAIL,
+				PROLOG_ADDITION_NONE)
+	/* we can probably do a shorter exception entry for that one... */
+	EXCEPTION_COMMON(0x800)
+	ld	r12,_MSR(r1)
+	andi.	r0,r12,MSR_PR;
+	beq-	1f
+	bl	load_up_fpu
+	b	fast_interrupt_return
+1:	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	kernel_fp_unavailable_exception
+	b	interrupt_return
+
+/* Altivec Unavailable Interrupt */
+	START_EXCEPTION(altivec_unavailable);
+	NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_ALTIVEC_UNAVAIL,
+				PROLOG_ADDITION_NONE)
+	/* we can probably do a shorter exception entry for that one... */
+	EXCEPTION_COMMON(0x200)
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+	ld	r12,_MSR(r1)
+	andi.	r0,r12,MSR_PR;
+	beq-	1f
+	bl	load_up_altivec
+	b	fast_interrupt_return
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	altivec_unavailable_exception
+	b	interrupt_return
+
+/* AltiVec Assist */
+	START_EXCEPTION(altivec_assist);
+	NORMAL_EXCEPTION_PROLOG(0x220,
+				BOOKE_INTERRUPT_ALTIVEC_ASSIST,
+				PROLOG_ADDITION_NONE)
+	EXCEPTION_COMMON(0x220)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+	bl	altivec_assist_exception
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+	REST_NVGPRS(r1)
+#else
+	bl	unknown_exception
+#endif
+	b	interrupt_return
+
+
+/* Decrementer Interrupt */
+	MASKABLE_EXCEPTION(0x900, BOOKE_INTERRUPT_DECREMENTER,
+			   decrementer, timer_interrupt, ACK_DEC)
+
+/* Fixed Interval Timer Interrupt */
+	MASKABLE_EXCEPTION(0x980, BOOKE_INTERRUPT_FIT,
+			   fixed_interval, unknown_exception, ACK_FIT)
+
+/* Watchdog Timer Interrupt */
+	START_EXCEPTION(watchdog);
+	CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG,
+			      PROLOG_ADDITION_NONE)
+	EXCEPTION_COMMON_CRIT(0x9f0)
+	bl	special_reg_save
+	CHECK_NAPPING();
+	addi	r3,r1,STACK_INT_FRAME_REGS
+#ifdef CONFIG_BOOKE_WDT
+	bl	WatchdogException
+#else
+	bl	unknown_nmi_exception
+#endif
+	b	ret_from_crit_except
+
+/* System Call Interrupt */
+	START_EXCEPTION(system_call)
+	mr	r9,r13			/* keep a copy of userland r13 */
+	mfspr	r11,SPRN_SRR0		/* get return address */
+	mfspr	r12,SPRN_SRR1		/* get previous MSR */
+	mfspr	r13,SPRN_SPRG_PACA	/* get our PACA */
+	b	system_call_common
+
+/* Auxiliary Processor Unavailable Interrupt */
+	START_EXCEPTION(ap_unavailable);
+	NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL,
+				PROLOG_ADDITION_NONE)
+	EXCEPTION_COMMON(0xf20)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	unknown_exception
+	b	interrupt_return
+
+/* Debug exception as a critical interrupt*/
+	START_EXCEPTION(debug_crit);
+	CRIT_EXCEPTION_PROLOG(0xd00, BOOKE_INTERRUPT_DEBUG,
+			      PROLOG_ADDITION_2REGS)
+
+	/*
+	 * If there is a single step or branch-taken exception in an
+	 * exception entry sequence, it was probably meant to apply to
+	 * the code where the exception occurred (since exception entry
+	 * doesn't turn off DE automatically).  We simulate the effect
+	 * of turning off DE on entry to an exception handler by turning
+	 * off DE in the CSRR1 value and clearing the debug status.
+	 */
+
+	mfspr	r14,SPRN_DBSR		/* check single-step/branch taken */
+	andis.	r15,r14,(DBSR_IC|DBSR_BT)@h
+	beq+	1f
+
+#ifdef CONFIG_RELOCATABLE
+	__LOAD_PACA_TOC(r15)
+	LOAD_REG_ADDR_ALTTOC(r14, r15, interrupt_base_book3e)
+	LOAD_REG_ADDR_ALTTOC(r15, r15, __end_interrupts)
+	cmpld	cr0,r10,r14
+	cmpld	cr1,r10,r15
+#else
+	LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
+	cmpld	cr0, r10, r14
+	LOAD_REG_IMMEDIATE_SYM(r14, r15, __end_interrupts)
+	cmpld	cr1, r10, r14
+#endif
+	blt+	cr0,1f
+	bge+	cr1,1f
+
+	/* here it looks like we got an inappropriate debug exception. */
+	lis	r14,(DBSR_IC|DBSR_BT)@h		/* clear the event */
+	rlwinm	r11,r11,0,~MSR_DE	/* clear DE in the CSRR1 value */
+	mtspr	SPRN_DBSR,r14
+	mtspr	SPRN_CSRR1,r11
+	lwz	r10,PACA_EXCRIT+EX_CR(r13)	/* restore registers */
+	ld	r1,PACA_EXCRIT+EX_R1(r13)
+	ld	r14,PACA_EXCRIT+EX_R14(r13)
+	ld	r15,PACA_EXCRIT+EX_R15(r13)
+	mtcr	r10
+	ld	r10,PACA_EXCRIT+EX_R10(r13)	/* restore registers */
+	ld	r11,PACA_EXCRIT+EX_R11(r13)
+	mfspr	r13,SPRN_SPRG_CRIT_SCRATCH
+	rfci
+
+	/* Normal debug exception */
+	/* XXX We only handle coming from userspace for now since we can't
+	 *     quite save properly an interrupted kernel state yet
+	 */
+1:	andi.	r14,r11,MSR_PR;		/* check for userspace again */
+	beq	kernel_dbg_exc;		/* if from kernel mode */
+
+	/* Now we mash up things to make it look like we are coming on a
+	 * normal exception
+	 */
+	mfspr	r14,SPRN_DBSR
+	std	r14,_DSISR(r1)
+	ld	r14,PACA_EXCRIT+EX_R14(r13)
+	ld	r15,PACA_EXCRIT+EX_R15(r13)
+	EXCEPTION_COMMON_CRIT(0xd00)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	DebugException
+	REST_NVGPRS(r1)
+	b	interrupt_return
+
+kernel_dbg_exc:
+	b	.	/* NYI */
+
+/* Debug exception as a debug interrupt*/
+	START_EXCEPTION(debug_debug);
+	DBG_EXCEPTION_PROLOG(0xd00, BOOKE_INTERRUPT_DEBUG,
+						 PROLOG_ADDITION_2REGS)
+
+	/*
+	 * If there is a single step or branch-taken exception in an
+	 * exception entry sequence, it was probably meant to apply to
+	 * the code where the exception occurred (since exception entry
+	 * doesn't turn off DE automatically).  We simulate the effect
+	 * of turning off DE on entry to an exception handler by turning
+	 * off DE in the DSRR1 value and clearing the debug status.
+	 */
+
+	mfspr	r14,SPRN_DBSR		/* check single-step/branch taken */
+	andis.	r15,r14,(DBSR_IC|DBSR_BT)@h
+	beq+	1f
+
+#ifdef CONFIG_RELOCATABLE
+	__LOAD_PACA_TOC(r15)
+	LOAD_REG_ADDR_ALTTOC(r14, r15, interrupt_base_book3e)
+	LOAD_REG_ADDR_ALTTOC(r15, r15, __end_interrupts)
+	cmpld	cr0,r10,r14
+	cmpld	cr1,r10,r15
+#else
+	LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
+	cmpld	cr0, r10, r14
+	LOAD_REG_IMMEDIATE_SYM(r14, r15,__end_interrupts)
+	cmpld	cr1, r10, r14
+#endif
+	blt+	cr0,1f
+	bge+	cr1,1f
+
+	/* here it looks like we got an inappropriate debug exception. */
+	lis	r14,(DBSR_IC|DBSR_BT)@h		/* clear the event */
+	rlwinm	r11,r11,0,~MSR_DE	/* clear DE in the DSRR1 value */
+	mtspr	SPRN_DBSR,r14
+	mtspr	SPRN_DSRR1,r11
+	lwz	r10,PACA_EXDBG+EX_CR(r13)	/* restore registers */
+	ld	r1,PACA_EXDBG+EX_R1(r13)
+	ld	r14,PACA_EXDBG+EX_R14(r13)
+	ld	r15,PACA_EXDBG+EX_R15(r13)
+	mtcr	r10
+	ld	r10,PACA_EXDBG+EX_R10(r13)	/* restore registers */
+	ld	r11,PACA_EXDBG+EX_R11(r13)
+	mfspr	r13,SPRN_SPRG_DBG_SCRATCH
+	rfdi
+
+	/* Normal debug exception */
+	/* XXX We only handle coming from userspace for now since we can't
+	 *     quite save properly an interrupted kernel state yet
+	 */
+1:	andi.	r14,r11,MSR_PR;		/* check for userspace again */
+	beq	kernel_dbg_exc;		/* if from kernel mode */
+
+	/* Now we mash up things to make it look like we are coming on a
+	 * normal exception
+	 */
+	mfspr	r14,SPRN_DBSR
+	std	r14,_DSISR(r1)
+	ld	r14,PACA_EXDBG+EX_R14(r13)
+	ld	r15,PACA_EXDBG+EX_R15(r13)
+	EXCEPTION_COMMON_DBG(0xd08)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	DebugException
+	REST_NVGPRS(r1)
+	b	interrupt_return
+
+	START_EXCEPTION(perfmon);
+	NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
+				PROLOG_ADDITION_NONE)
+	EXCEPTION_COMMON(0x260)
+	CHECK_NAPPING()
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	/*
+	 * XXX: Returning from performance_monitor_exception taken as a
+	 * soft-NMI (Linux irqs disabled) may be risky to use interrupt_return
+	 * and could cause bugs in return or elsewhere. That case should just
+	 * restore registers and return. There is a workaround for one known
+	 * problem in interrupt_exit_kernel_prepare().
+	 */
+	bl	performance_monitor_exception
+	b	interrupt_return
+
+/* Doorbell interrupt */
+	MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL,
+			   doorbell, doorbell_exception, ACK_NONE)
+
+/* Doorbell critical Interrupt */
+	START_EXCEPTION(doorbell_crit);
+	CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL,
+			      PROLOG_ADDITION_NONE)
+	EXCEPTION_COMMON_CRIT(0x2a0)
+	bl	special_reg_save
+	CHECK_NAPPING();
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	unknown_nmi_exception
+	b	ret_from_crit_except
+
+/*
+ *	Guest doorbell interrupt
+ *	This general exception use GSRRx save/restore registers
+ */
+	START_EXCEPTION(guest_doorbell);
+	GDBELL_EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL,
+			        PROLOG_ADDITION_NONE)
+	EXCEPTION_COMMON(0x2c0)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	unknown_exception
+	b	interrupt_return
+
+/* Guest Doorbell critical Interrupt */
+	START_EXCEPTION(guest_doorbell_crit);
+	CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT,
+			      PROLOG_ADDITION_NONE)
+	EXCEPTION_COMMON_CRIT(0x2e0)
+	bl	special_reg_save
+	CHECK_NAPPING();
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	unknown_nmi_exception
+	b	ret_from_crit_except
+
+/* Hypervisor call */
+	START_EXCEPTION(hypercall);
+	NORMAL_EXCEPTION_PROLOG(0x310, BOOKE_INTERRUPT_HV_SYSCALL,
+			        PROLOG_ADDITION_NONE)
+	EXCEPTION_COMMON(0x310)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	unknown_exception
+	b	interrupt_return
+
+/* Embedded Hypervisor priviledged  */
+	START_EXCEPTION(ehpriv);
+	NORMAL_EXCEPTION_PROLOG(0x320, BOOKE_INTERRUPT_HV_PRIV,
+			        PROLOG_ADDITION_NONE)
+	EXCEPTION_COMMON(0x320)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	unknown_exception
+	b	interrupt_return
+
+/* LRAT Error interrupt */
+	START_EXCEPTION(lrat_error);
+	NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR,
+			        PROLOG_ADDITION_NONE)
+	EXCEPTION_COMMON(0x340)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	unknown_exception
+	b	interrupt_return
+
+.macro SEARCH_RESTART_TABLE
+#ifdef CONFIG_RELOCATABLE
+	__LOAD_PACA_TOC(r11)
+	LOAD_REG_ADDR_ALTTOC(r14, r11, __start___restart_table)
+	LOAD_REG_ADDR_ALTTOC(r15, r11, __stop___restart_table)
+#else
+	LOAD_REG_IMMEDIATE_SYM(r14, r11, __start___restart_table)
+	LOAD_REG_IMMEDIATE_SYM(r15, r11, __stop___restart_table)
+#endif
+300:
+	cmpd	r14,r15
+	beq	302f
+	ld	r11,0(r14)
+	cmpld	r10,r11
+	blt	301f
+	ld	r11,8(r14)
+	cmpld	r10,r11
+	bge	301f
+	ld	r11,16(r14)
+	b	303f
+301:
+	addi	r14,r14,24
+	b	300b
+302:
+	li	r11,0
+303:
+.endm
+
+/*
+ * An interrupt came in while soft-disabled; We mark paca->irq_happened
+ * accordingly and if the interrupt is level sensitive, we hard disable
+ * hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so
+ * keep these in synch.
+ */
+
+.macro masked_interrupt_book3e paca_irq full_mask
+	std	r14,PACA_EXGEN+EX_R14(r13)
+	std	r15,PACA_EXGEN+EX_R15(r13)
+
+	lbz	r10,PACAIRQHAPPENED(r13)
+	.if \full_mask == 1
+	ori	r10,r10,\paca_irq | PACA_IRQ_HARD_DIS
+	.else
+	ori	r10,r10,\paca_irq
+	.endif
+	stb	r10,PACAIRQHAPPENED(r13)
+
+	.if \full_mask == 1
+	xori	r11,r11,MSR_EE		/* clear MSR_EE */
+	mtspr	SPRN_SRR1,r11
+	.endif
+
+	mfspr	r10,SPRN_SRR0
+	SEARCH_RESTART_TABLE
+	cmpdi	r11,0
+	beq	1f
+	mtspr	SPRN_SRR0,r11		/* return to restart address */
+1:
+
+	lwz	r11,PACA_EXGEN+EX_CR(r13)
+	mtcr	r11
+	ld	r10,PACA_EXGEN+EX_R10(r13)
+	ld	r11,PACA_EXGEN+EX_R11(r13)
+	ld	r14,PACA_EXGEN+EX_R14(r13)
+	ld	r15,PACA_EXGEN+EX_R15(r13)
+	mfspr	r13,SPRN_SPRG_GEN_SCRATCH
+	rfi
+	b	.
+.endm
+
+masked_interrupt_book3e_0x500:
+	masked_interrupt_book3e PACA_IRQ_EE 1
+
+masked_interrupt_book3e_0x900:
+	ACK_DEC(r10);
+	masked_interrupt_book3e PACA_IRQ_DEC 0
+
+masked_interrupt_book3e_0x980:
+	ACK_FIT(r10);
+	masked_interrupt_book3e PACA_IRQ_DEC 0
+
+masked_interrupt_book3e_0x280:
+masked_interrupt_book3e_0x2c0:
+	masked_interrupt_book3e PACA_IRQ_DBELL 0
+
+/*
+ * This is called from 0x300 and 0x400 handlers after the prologs with
+ * r14 and r15 containing the fault address and error code, with the
+ * original values stashed away in the PACA
+ */
+SYM_CODE_START_LOCAL(storage_fault_common)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	do_page_fault
+	b	interrupt_return
+SYM_CODE_END(storage_fault_common)
+
+/*
+ * Alignment exception doesn't fit entirely in the 0x100 bytes so it
+ * continues here.
+ */
+SYM_CODE_START_LOCAL(alignment_more)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	alignment_exception
+	REST_NVGPRS(r1)
+	b	interrupt_return
+SYM_CODE_END(alignment_more)
+
+/*
+ * Trampolines used when spotting a bad kernel stack pointer in
+ * the exception entry code.
+ *
+ * TODO: move some bits like SRR0 read to trampoline, pass PACA
+ * index around, etc... to handle crit & mcheck
+ */
+BAD_STACK_TRAMPOLINE(0x000)
+BAD_STACK_TRAMPOLINE(0x100)
+BAD_STACK_TRAMPOLINE(0x200)
+BAD_STACK_TRAMPOLINE(0x220)
+BAD_STACK_TRAMPOLINE(0x260)
+BAD_STACK_TRAMPOLINE(0x280)
+BAD_STACK_TRAMPOLINE(0x2a0)
+BAD_STACK_TRAMPOLINE(0x2c0)
+BAD_STACK_TRAMPOLINE(0x2e0)
+BAD_STACK_TRAMPOLINE(0x300)
+BAD_STACK_TRAMPOLINE(0x310)
+BAD_STACK_TRAMPOLINE(0x320)
+BAD_STACK_TRAMPOLINE(0x340)
+BAD_STACK_TRAMPOLINE(0x400)
+BAD_STACK_TRAMPOLINE(0x500)
+BAD_STACK_TRAMPOLINE(0x600)
+BAD_STACK_TRAMPOLINE(0x700)
+BAD_STACK_TRAMPOLINE(0x800)
+BAD_STACK_TRAMPOLINE(0x900)
+BAD_STACK_TRAMPOLINE(0x980)
+BAD_STACK_TRAMPOLINE(0x9f0)
+BAD_STACK_TRAMPOLINE(0xa00)
+BAD_STACK_TRAMPOLINE(0xb00)
+BAD_STACK_TRAMPOLINE(0xc00)
+BAD_STACK_TRAMPOLINE(0xd00)
+BAD_STACK_TRAMPOLINE(0xd08)
+BAD_STACK_TRAMPOLINE(0xe00)
+BAD_STACK_TRAMPOLINE(0xf00)
+BAD_STACK_TRAMPOLINE(0xf20)
+
+_GLOBAL(bad_stack_book3e)
+	/* XXX: Needs to make SPRN_SPRG_GEN depend on exception type */
+	mfspr	r10,SPRN_SRR0;		  /* read SRR0 before touching stack */
+	ld	r1,PACAEMERGSP(r13)
+	subi	r1,r1,64+INT_FRAME_SIZE
+	std	r10,_NIP(r1)
+	std	r11,_MSR(r1)
+	ld	r10,PACA_EXGEN+EX_R1(r13) /* FIXME for crit & mcheck */
+	lwz	r11,PACA_EXGEN+EX_CR(r13) /* FIXME for crit & mcheck */
+	std	r10,GPR1(r1)
+	std	r11,_CCR(r1)
+	mfspr	r10,SPRN_DEAR
+	mfspr	r11,SPRN_ESR
+	std	r10,_DEAR(r1)
+	std	r11,_ESR(r1)
+	SAVE_GPR(0, r1);		/* save r0 in stackframe */	    \
+	SAVE_GPRS(2, 9, r1);		/* save r2 - r9 in stackframe */    \
+	ld	r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */		    \
+	ld	r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */		    \
+	mfspr	r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \
+	std	r3,GPR10(r1);		/* save r10 to stackframe */	    \
+	std	r4,GPR11(r1);		/* save r11 to stackframe */	    \
+	SAVE_GPR(12, r1);		/* save r12 in stackframe */	    \
+	std	r5,GPR13(r1);		/* save it to stackframe */	    \
+	mflr	r10
+	mfctr	r11
+	mfxer	r12
+	std	r10,_LINK(r1)
+	std	r11,_CTR(r1)
+	std	r12,_XER(r1)
+	SAVE_NVGPRS(r1)
+	lhz	r12,PACA_TRAP_SAVE(r13)
+	std	r12,_TRAP(r1)
+	addi	r11,r1,INT_FRAME_SIZE
+	std	r11,0(r1)
+	ZEROIZE_GPR(12)
+	std	r12,0(r11)
+	LOAD_PACA_TOC()
+1:	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	kernel_bad_stack
+	b	1b
+
+/*
+ * Setup the initial TLB for a core. This current implementation
+ * assume that whatever we are running off will not conflict with
+ * the new mapping at PAGE_OFFSET.
+ */
+_GLOBAL(initial_tlb_book3e)
+
+	/* Look for the first TLB with IPROT set */
+	mfspr	r4,SPRN_TLB0CFG
+	andi.	r3,r4,TLBnCFG_IPROT
+	lis	r3,MAS0_TLBSEL(0)@h
+	bne	found_iprot
+
+	mfspr	r4,SPRN_TLB1CFG
+	andi.	r3,r4,TLBnCFG_IPROT
+	lis	r3,MAS0_TLBSEL(1)@h
+	bne	found_iprot
+
+	mfspr	r4,SPRN_TLB2CFG
+	andi.	r3,r4,TLBnCFG_IPROT
+	lis	r3,MAS0_TLBSEL(2)@h
+	bne	found_iprot
+
+	lis	r3,MAS0_TLBSEL(3)@h
+	mfspr	r4,SPRN_TLB3CFG
+	/* fall through */
+
+found_iprot:
+	andi.	r5,r4,TLBnCFG_HES
+	bne	have_hes
+
+	mflr	r8				/* save LR */
+/* 1. Find the index of the entry we're executing in
+ *
+ * r3 = MAS0_TLBSEL (for the iprot array)
+ * r4 = SPRN_TLBnCFG
+ */
+	bcl	20,31,$+4			/* Find our address */
+invstr:	mflr	r6				/* Make it accessible */
+	mfmsr	r7
+	rlwinm	r5,r7,27,31,31			/* extract MSR[IS] */
+	mfspr	r7,SPRN_PID
+	slwi	r7,r7,16
+	or	r7,r7,r5
+	mtspr	SPRN_MAS6,r7
+	tlbsx	0,r6				/* search MSR[IS], SPID=PID */
+
+	mfspr	r3,SPRN_MAS0
+	rlwinm	r5,r3,16,20,31			/* Extract MAS0(Entry) */
+
+	mfspr	r7,SPRN_MAS1			/* Insure IPROT set */
+	oris	r7,r7,MAS1_IPROT@h
+	mtspr	SPRN_MAS1,r7
+	tlbwe
+
+/* 2. Invalidate all entries except the entry we're executing in
+ *
+ * r3 = MAS0 w/TLBSEL & ESEL for the entry we are running in
+ * r4 = SPRN_TLBnCFG
+ * r5 = ESEL of entry we are running in
+ */
+	andi.	r4,r4,TLBnCFG_N_ENTRY		/* Extract # entries */
+	li	r6,0				/* Set Entry counter to 0 */
+1:	mr	r7,r3				/* Set MAS0(TLBSEL) */
+	rlwimi	r7,r6,16,4,15			/* Setup MAS0 = TLBSEL | ESEL(r6) */
+	mtspr	SPRN_MAS0,r7
+	tlbre
+	mfspr	r7,SPRN_MAS1
+	rlwinm	r7,r7,0,2,31			/* Clear MAS1 Valid and IPROT */
+	cmpw	r5,r6
+	beq	skpinv				/* Dont update the current execution TLB */
+	mtspr	SPRN_MAS1,r7
+	tlbwe
+	isync
+skpinv:	addi	r6,r6,1				/* Increment */
+	cmpw	r6,r4				/* Are we done? */
+	bne	1b				/* If not, repeat */
+
+	/* Invalidate all TLBs */
+	PPC_TLBILX_ALL(0,R0)
+	sync
+	isync
+
+/* 3. Setup a temp mapping and jump to it
+ *
+ * r3 = MAS0 w/TLBSEL & ESEL for the entry we are running in
+ * r5 = ESEL of entry we are running in
+ */
+	andi.	r7,r5,0x1	/* Find an entry not used and is non-zero */
+	addi	r7,r7,0x1
+	mr	r4,r3		/* Set MAS0(TLBSEL) = 1 */
+	mtspr	SPRN_MAS0,r4
+	tlbre
+
+	rlwimi	r4,r7,16,4,15	/* Setup MAS0 = TLBSEL | ESEL(r7) */
+	mtspr	SPRN_MAS0,r4
+
+	mfspr	r7,SPRN_MAS1
+	xori	r6,r7,MAS1_TS		/* Setup TMP mapping in the other Address space */
+	mtspr	SPRN_MAS1,r6
+
+	tlbwe
+
+	mfmsr	r6
+	xori	r6,r6,MSR_IS
+	mtspr	SPRN_SRR1,r6
+	bcl	20,31,$+4	/* Find our address */
+1:	mflr	r6
+	addi	r6,r6,(2f - 1b)
+	mtspr	SPRN_SRR0,r6
+	rfi
+2:
+
+/* 4. Clear out PIDs & Search info
+ *
+ * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
+ * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
+ * r5 = MAS3
+ */
+	li	r6,0
+	mtspr   SPRN_MAS6,r6
+	mtspr	SPRN_PID,r6
+
+/* 5. Invalidate mapping we started in
+ *
+ * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
+ * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
+ * r5 = MAS3
+ */
+	mtspr	SPRN_MAS0,r3
+	tlbre
+	mfspr	r6,SPRN_MAS1
+	rlwinm	r6,r6,0,2,31	/* clear IPROT and VALID */
+	mtspr	SPRN_MAS1,r6
+	tlbwe
+	sync
+	isync
+
+/* 6. Setup KERNELBASE mapping in TLB[0]
+ *
+ * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
+ * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
+ * r5 = MAS3
+ */
+	rlwinm	r3,r3,0,16,3	/* clear ESEL */
+	mtspr	SPRN_MAS0,r3
+	lis	r6,(MAS1_VALID|MAS1_IPROT)@h
+	ori	r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
+	mtspr	SPRN_MAS1,r6
+
+	LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | MAS2_M_IF_NEEDED)
+	mtspr	SPRN_MAS2,r6
+
+	rlwinm	r5,r5,0,0,25
+	ori	r5,r5,MAS3_SR | MAS3_SW | MAS3_SX
+	mtspr	SPRN_MAS3,r5
+	li	r5,-1
+	rlwinm	r5,r5,0,0,25
+
+	tlbwe
+
+/* 7. Jump to KERNELBASE mapping
+ *
+ * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
+ */
+	/* Now we branch the new virtual address mapped by this entry */
+	bcl	20,31,$+4	/* Find our address */
+1:	mflr	r6
+	addi	r6,r6,(2f - 1b)
+	tovirt(r6,r6)
+	lis	r7,MSR_KERNEL@h
+	ori	r7,r7,MSR_KERNEL@l
+	mtspr	SPRN_SRR0,r6
+	mtspr	SPRN_SRR1,r7
+	rfi				/* start execution out of TLB1[0] entry */
+2:
+
+/* 8. Clear out the temp mapping
+ *
+ * r4 = MAS0 w/TLBSEL & ESEL for the entry we are running in
+ */
+	mtspr	SPRN_MAS0,r4
+	tlbre
+	mfspr	r5,SPRN_MAS1
+	rlwinm	r5,r5,0,2,31	/* clear IPROT and VALID */
+	mtspr	SPRN_MAS1,r5
+	tlbwe
+	sync
+	isync
+
+	/* We translate LR and return */
+	tovirt(r8,r8)
+	mtlr	r8
+	blr
+
+have_hes:
+	/* Setup MAS 0,1,2,3 and 7 for tlbwe of a 1G entry that maps the
+	 * kernel linear mapping. We also set MAS8 once for all here though
+	 * that will have to be made dependent on whether we are running under
+	 * a hypervisor I suppose.
+	 */
+
+	/* BEWARE, MAGIC
+	 * This code is called as an ordinary function on the boot CPU. But to
+	 * avoid duplication, this code is also used in SCOM bringup of
+	 * secondary CPUs. We read the code between the initial_tlb_code_start
+	 * and initial_tlb_code_end labels one instruction at a time and RAM it
+	 * into the new core via SCOM. That doesn't process branches, so there
+	 * must be none between those two labels. It also means if this code
+	 * ever takes any parameters, the SCOM code must also be updated to
+	 * provide them.
+	 */
+_GLOBAL(a2_tlbinit_code_start)
+
+	ori	r11,r3,MAS0_WQ_ALLWAYS
+	oris	r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */
+	mtspr	SPRN_MAS0,r11
+	lis	r3,(MAS1_VALID | MAS1_IPROT)@h
+	ori	r3,r3,BOOK3E_PAGESZ_1GB << MAS1_TSIZE_SHIFT
+	mtspr	SPRN_MAS1,r3
+	LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET | MAS2_M)
+	mtspr	SPRN_MAS2,r3
+	li	r3,MAS3_SR | MAS3_SW | MAS3_SX
+	mtspr	SPRN_MAS7_MAS3,r3
+	li	r3,0
+	mtspr	SPRN_MAS8,r3
+
+	/* Write the TLB entry */
+	tlbwe
+
+	.globl a2_tlbinit_after_linear_map
+a2_tlbinit_after_linear_map:
+
+	/* Now we branch the new virtual address mapped by this entry */
+#ifdef CONFIG_RELOCATABLE
+	__LOAD_PACA_TOC(r5)
+	LOAD_REG_ADDR_ALTTOC(r3, r5, 1f)
+#else
+	LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f)
+#endif
+	mtctr	r3
+	bctr
+
+1:	/* We are now running at PAGE_OFFSET, clean the TLB of everything
+	 * else (including IPROTed things left by firmware)
+	 * r4 = TLBnCFG
+	 * r3 = current address (more or less)
+	 */
+
+	li	r5,0
+	mtspr	SPRN_MAS6,r5
+	tlbsx	0,r3
+
+	rlwinm	r9,r4,0,TLBnCFG_N_ENTRY
+	rlwinm	r10,r4,8,0xff
+	addi	r10,r10,-1	/* Get inner loop mask */
+
+	li	r3,1
+
+	mfspr	r5,SPRN_MAS1
+	rlwinm	r5,r5,0,(~(MAS1_VALID|MAS1_IPROT))
+
+	mfspr	r6,SPRN_MAS2
+	rldicr	r6,r6,0,51		/* Extract EPN */
+
+	mfspr	r7,SPRN_MAS0
+	rlwinm	r7,r7,0,0xffff0fff	/* Clear HES and WQ */
+
+	rlwinm	r8,r7,16,0xfff		/* Extract ESEL */
+
+2:	add	r4,r3,r8
+	and	r4,r4,r10
+
+	rlwimi	r7,r4,16,MAS0_ESEL_MASK
+
+	mtspr	SPRN_MAS0,r7
+	mtspr	SPRN_MAS1,r5
+	mtspr	SPRN_MAS2,r6
+	tlbwe
+
+	addi	r3,r3,1
+	and.	r4,r3,r10
+
+	bne	3f
+	addis	r6,r6,(1<<30)@h
+3:
+	cmpw	r3,r9
+	blt	2b
+
+	.globl  a2_tlbinit_after_iprot_flush
+a2_tlbinit_after_iprot_flush:
+
+	PPC_TLBILX(0,0,R0)
+	sync
+	isync
+
+	.globl a2_tlbinit_code_end
+a2_tlbinit_code_end:
+
+	/* We translate LR and return */
+	mflr	r3
+	tovirt(r3,r3)
+	mtlr	r3
+	blr
+
+/*
+ * Main entry (boot CPU, thread 0)
+ *
+ * We enter here from head_64.S, possibly after the prom_init trampoline
+ * with r3 and r4 already saved to r31 and 30 respectively and in 64 bits
+ * mode. Anything else is as it was left by the bootloader
+ *
+ * Initial requirements of this port:
+ *
+ * - Kernel loaded at 0 physical
+ * - A good lump of memory mapped 0:0 by UTLB entry 0
+ * - MSR:IS & MSR:DS set to 0
+ *
+ * Note that some of the above requirements will be relaxed in the future
+ * as the kernel becomes smarter at dealing with different initial conditions
+ * but for now you have to be careful
+ */
+_GLOBAL(start_initialization_book3e)
+	mflr	r28
+
+	/* First, we need to setup some initial TLBs to map the kernel
+	 * text, data and bss at PAGE_OFFSET. We don't have a real mode
+	 * and always use AS 0, so we just set it up to match our link
+	 * address and never use 0 based addresses.
+	 */
+	bl	initial_tlb_book3e
+
+	/* Init global core bits */
+	bl	init_core_book3e
+
+	/* Init per-thread bits */
+	bl	init_thread_book3e
+
+	/* Return to common init code */
+	tovirt(r28,r28)
+	mtlr	r28
+	blr
+
+
+/*
+ * Secondary core/processor entry
+ *
+ * This is entered for thread 0 of a secondary core, all other threads
+ * are expected to be stopped. It's similar to start_initialization_book3e
+ * except that it's generally entered from the holding loop in head_64.S
+ * after CPUs have been gathered by Open Firmware.
+ *
+ * We assume we are in 32 bits mode running with whatever TLB entry was
+ * set for us by the firmware or POR engine.
+ */
+_GLOBAL(book3e_secondary_core_init_tlb_set)
+	li	r4,1
+	b	generic_secondary_smp_init
+
+_GLOBAL(book3e_secondary_core_init)
+	mflr	r28
+
+	/* Do we need to setup initial TLB entry ? */
+	cmplwi	r4,0
+	bne	2f
+
+	/* Setup TLB for this core */
+	bl	initial_tlb_book3e
+
+	/* We can return from the above running at a different
+	 * address, so recalculate r2 (TOC)
+	 */
+	bl	relative_toc
+
+	/* Init global core bits */
+2:	bl	init_core_book3e
+
+	/* Init per-thread bits */
+3:	bl	init_thread_book3e
+
+	/* Return to common init code at proper virtual address.
+	 *
+	 * Due to various previous assumptions, we know we entered this
+	 * function at either the final PAGE_OFFSET mapping or using a
+	 * 1:1 mapping at 0, so we don't bother doing a complicated check
+	 * here, we just ensure the return address has the right top bits.
+	 *
+	 * Note that if we ever want to be smarter about where we can be
+	 * started from, we have to be careful that by the time we reach
+	 * the code below we may already be running at a different location
+	 * than the one we were called from since initial_tlb_book3e can
+	 * have moved us already.
+	 */
+	cmpdi	cr0,r28,0
+	blt	1f
+	lis	r3,PAGE_OFFSET@highest
+	sldi	r3,r3,32
+	or	r28,r28,r3
+1:	mtlr	r28
+	blr
+
+_GLOBAL(book3e_secondary_thread_init)
+	mflr	r28
+	b	3b
+
+_GLOBAL(init_core_book3e)
+	/* Establish the interrupt vector base */
+	tovirt(r2,r2)
+	LOAD_REG_ADDR(r3, interrupt_base_book3e)
+	mtspr	SPRN_IVPR,r3
+	sync
+	blr
+
+SYM_CODE_START_LOCAL(init_thread_book3e)
+	lis	r3,(SPRN_EPCR_ICM | SPRN_EPCR_GICM)@h
+	mtspr	SPRN_EPCR,r3
+
+	/* Make sure interrupts are off */
+	wrteei	0
+
+	/* disable all timers and clear out status */
+	li	r3,0
+	mtspr	SPRN_TCR,r3
+	mfspr	r3,SPRN_TSR
+	mtspr	SPRN_TSR,r3
+
+	blr
+SYM_CODE_END(init_thread_book3e)
+
+_GLOBAL(__setup_base_ivors)
+	SET_IVOR(0, 0x020) /* Critical Input */
+	SET_IVOR(1, 0x000) /* Machine Check */
+	SET_IVOR(2, 0x060) /* Data Storage */ 
+	SET_IVOR(3, 0x080) /* Instruction Storage */
+	SET_IVOR(4, 0x0a0) /* External Input */ 
+	SET_IVOR(5, 0x0c0) /* Alignment */ 
+	SET_IVOR(6, 0x0e0) /* Program */ 
+	SET_IVOR(7, 0x100) /* FP Unavailable */ 
+	SET_IVOR(8, 0x120) /* System Call */ 
+	SET_IVOR(9, 0x140) /* Auxiliary Processor Unavailable */ 
+	SET_IVOR(10, 0x160) /* Decrementer */ 
+	SET_IVOR(11, 0x180) /* Fixed Interval Timer */ 
+	SET_IVOR(12, 0x1a0) /* Watchdog Timer */ 
+	SET_IVOR(13, 0x1c0) /* Data TLB Error */ 
+	SET_IVOR(14, 0x1e0) /* Instruction TLB Error */
+	SET_IVOR(15, 0x040) /* Debug */
+
+	sync
+
+	blr
+
+_GLOBAL(setup_altivec_ivors)
+	SET_IVOR(32, 0x200) /* AltiVec Unavailable */
+	SET_IVOR(33, 0x220) /* AltiVec Assist */
+	blr
+
+_GLOBAL(setup_perfmon_ivor)
+	SET_IVOR(35, 0x260) /* Performance Monitor */
+	blr
+
+_GLOBAL(setup_doorbell_ivors)
+	SET_IVOR(36, 0x280) /* Processor Doorbell */
+	SET_IVOR(37, 0x2a0) /* Processor Doorbell Crit */
+	blr
+
+_GLOBAL(setup_ehv_ivors)
+	SET_IVOR(40, 0x300) /* Embedded Hypervisor System Call */
+	SET_IVOR(41, 0x320) /* Embedded Hypervisor Privilege */
+	SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */
+	SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */
+	blr
+
+_GLOBAL(setup_lrat_ivor)
+	SET_IVOR(42, 0x340) /* LRAT Error */
+	blr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
new file mode 100644
index 0000000000..c33c8ebf86
--- /dev/null
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -0,0 +1,3157 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This file contains the 64-bit "server" PowerPC variant
+ * of the low level exception handling including exception
+ * vectors, exception return, part of the slb and stab
+ * handling and other fixed offset specific things.
+ *
+ * This file is meant to be #included from head_64.S due to
+ * position dependent assembly.
+ *
+ * Most of this originates from head_64.S and thus has the same
+ * copyright history.
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/hw_irq.h>
+#include <asm/exception-64s.h>
+#include <asm/ptrace.h>
+#include <asm/cpuidle.h>
+#include <asm/head-64.h>
+#include <asm/feature-fixups.h>
+#include <asm/kup.h>
+
+/*
+ * Following are fixed section helper macros.
+ *
+ * EXC_REAL_BEGIN/END  - real, unrelocated exception vectors
+ * EXC_VIRT_BEGIN/END  - virt (AIL), unrelocated exception vectors
+ * TRAMP_REAL_BEGIN    - real, unrelocated helpers (virt may call these)
+ * TRAMP_VIRT_BEGIN    - virt, unreloc helpers (in practice, real can use)
+ * EXC_COMMON          - After switching to virtual, relocated mode.
+ */
+
+#define EXC_REAL_BEGIN(name, start, size)			\
+	FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
+
+#define EXC_REAL_END(name, start, size)				\
+	FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
+
+#define EXC_VIRT_BEGIN(name, start, size)			\
+	FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
+
+#define EXC_VIRT_END(name, start, size)				\
+	FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
+
+#define EXC_COMMON_BEGIN(name)					\
+	USE_TEXT_SECTION();					\
+	.balign IFETCH_ALIGN_BYTES;				\
+	.global name;						\
+	_ASM_NOKPROBE_SYMBOL(name);				\
+	DEFINE_FIXED_SYMBOL(name, text);			\
+name:
+
+#define TRAMP_REAL_BEGIN(name)					\
+	FIXED_SECTION_ENTRY_BEGIN(real_trampolines, name)
+
+#define TRAMP_VIRT_BEGIN(name)					\
+	FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name)
+
+#define EXC_REAL_NONE(start, size)				\
+	FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \
+	FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size)
+
+#define EXC_VIRT_NONE(start, size)				\
+	FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size); \
+	FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size)
+
+/*
+ * We're short on space and time in the exception prolog, so we can't
+ * use the normal LOAD_REG_IMMEDIATE macro to load the address of label.
+ * Instead we get the base of the kernel from paca->kernelbase and or in the low
+ * part of label. This requires that the label be within 64KB of kernelbase, and
+ * that kernelbase be 64K aligned.
+ */
+#define LOAD_HANDLER(reg, label)					\
+	ld	reg,PACAKBASE(r13);	/* get high part of &label */	\
+	ori	reg,reg,FIXED_SYMBOL_ABS_ADDR(label)
+
+#define __LOAD_HANDLER(reg, label, section)					\
+	ld	reg,PACAKBASE(r13);					\
+	ori	reg,reg,(ABS_ADDR(label, section))@l
+
+/*
+ * Branches from unrelocated code (e.g., interrupts) to labels outside
+ * head-y require >64K offsets.
+ */
+#define __LOAD_FAR_HANDLER(reg, label, section)					\
+	ld	reg,PACAKBASE(r13);					\
+	ori	reg,reg,(ABS_ADDR(label, section))@l;				\
+	addis	reg,reg,(ABS_ADDR(label, section))@h
+
+/*
+ * Interrupt code generation macros
+ */
+#define IVEC		.L_IVEC_\name\()	/* Interrupt vector address */
+#define IHSRR		.L_IHSRR_\name\()	/* Sets SRR or HSRR registers */
+#define IHSRR_IF_HVMODE	.L_IHSRR_IF_HVMODE_\name\() /* HSRR if HV else SRR */
+#define IAREA		.L_IAREA_\name\()	/* PACA save area */
+#define IVIRT		.L_IVIRT_\name\()	/* Has virt mode entry point */
+#define IISIDE		.L_IISIDE_\name\()	/* Uses SRR0/1 not DAR/DSISR */
+#define ICFAR		.L_ICFAR_\name\()	/* Uses CFAR */
+#define ICFAR_IF_HVMODE	.L_ICFAR_IF_HVMODE_\name\() /* Uses CFAR if HV */
+#define IDAR		.L_IDAR_\name\()	/* Uses DAR (or SRR0) */
+#define IDSISR		.L_IDSISR_\name\()	/* Uses DSISR (or SRR1) */
+#define IBRANCH_TO_COMMON	.L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
+#define IREALMODE_COMMON	.L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
+#define IMASK		.L_IMASK_\name\()	/* IRQ soft-mask bit */
+#define IKVM_REAL	.L_IKVM_REAL_\name\()	/* Real entry tests KVM */
+#define __IKVM_REAL(name)	.L_IKVM_REAL_ ## name
+#define IKVM_VIRT	.L_IKVM_VIRT_\name\()	/* Virt entry tests KVM */
+#define ISTACK		.L_ISTACK_\name\()	/* Set regular kernel stack */
+#define __ISTACK(name)	.L_ISTACK_ ## name
+#define IKUAP		.L_IKUAP_\name\()	/* Do KUAP lock */
+#define IMSR_R12	.L_IMSR_R12_\name\()	/* Assumes MSR saved to r12 */
+
+#define INT_DEFINE_BEGIN(n)						\
+.macro int_define_ ## n name
+
+#define INT_DEFINE_END(n)						\
+.endm ;									\
+int_define_ ## n n ;							\
+do_define_int n
+
+.macro do_define_int name
+	.ifndef IVEC
+		.error "IVEC not defined"
+	.endif
+	.ifndef IHSRR
+		IHSRR=0
+	.endif
+	.ifndef IHSRR_IF_HVMODE
+		IHSRR_IF_HVMODE=0
+	.endif
+	.ifndef IAREA
+		IAREA=PACA_EXGEN
+	.endif
+	.ifndef IVIRT
+		IVIRT=1
+	.endif
+	.ifndef IISIDE
+		IISIDE=0
+	.endif
+	.ifndef ICFAR
+		ICFAR=1
+	.endif
+	.ifndef ICFAR_IF_HVMODE
+		ICFAR_IF_HVMODE=0
+	.endif
+	.ifndef IDAR
+		IDAR=0
+	.endif
+	.ifndef IDSISR
+		IDSISR=0
+	.endif
+	.ifndef IBRANCH_TO_COMMON
+		IBRANCH_TO_COMMON=1
+	.endif
+	.ifndef IREALMODE_COMMON
+		IREALMODE_COMMON=0
+	.else
+		.if ! IBRANCH_TO_COMMON
+			.error "IREALMODE_COMMON=1 but IBRANCH_TO_COMMON=0"
+		.endif
+	.endif
+	.ifndef IMASK
+		IMASK=0
+	.endif
+	.ifndef IKVM_REAL
+		IKVM_REAL=0
+	.endif
+	.ifndef IKVM_VIRT
+		IKVM_VIRT=0
+	.endif
+	.ifndef ISTACK
+		ISTACK=1
+	.endif
+	.ifndef IKUAP
+		IKUAP=1
+	.endif
+	.ifndef IMSR_R12
+		IMSR_R12=0
+	.endif
+.endm
+
+/*
+ * All interrupts which set HSRR registers, as well as SRESET and MCE and
+ * syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken,
+ * so they all generally need to test whether they were taken in guest context.
+ *
+ * Note: SRESET and MCE may also be sent to the guest by the hypervisor, and be
+ * taken with MSR[HV]=0.
+ *
+ * Interrupts which set SRR registers (with the above exceptions) do not
+ * elevate to MSR[HV]=1 mode, though most can be taken when running with
+ * MSR[HV]=1  (e.g., bare metal kernel and userspace). So these interrupts do
+ * not need to test whether a guest is running because they get delivered to
+ * the guest directly, including nested HV KVM guests.
+ *
+ * The exception is PR KVM, where the guest runs with MSR[PR]=1 and the host
+ * runs with MSR[HV]=0, so the host takes all interrupts on behalf of the
+ * guest. PR KVM runs with LPCR[AIL]=0 which causes interrupts to always be
+ * delivered to the real-mode entry point, therefore such interrupts only test
+ * KVM in their real mode handlers, and only when PR KVM is possible.
+ *
+ * Interrupts that are taken in MSR[HV]=0 and escalate to MSR[HV]=1 are always
+ * delivered in real-mode when the MMU is in hash mode because the MMU
+ * registers are not set appropriately to translate host addresses. In nested
+ * radix mode these can be delivered in virt-mode as the host translations are
+ * used implicitly (see: effective LPID, effective PID).
+ */
+
+/*
+ * If an interrupt is taken while a guest is running, it is immediately routed
+ * to KVM to handle.
+ */
+
+.macro KVMTEST name handler
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+	lbz	r10,HSTATE_IN_GUEST(r13)
+	cmpwi	r10,0
+	/* HSRR variants have the 0x2 bit added to their trap number */
+	.if IHSRR_IF_HVMODE
+	BEGIN_FTR_SECTION
+	li	r10,(IVEC + 0x2)
+	FTR_SECTION_ELSE
+	li	r10,(IVEC)
+	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+	.elseif IHSRR
+	li	r10,(IVEC + 0x2)
+	.else
+	li	r10,(IVEC)
+	.endif
+	bne	\handler
+#endif
+.endm
+
+/*
+ * This is the BOOK3S interrupt entry code macro.
+ *
+ * This can result in one of several things happening:
+ * - Branch to the _common handler, relocated, in virtual mode.
+ *   These are normal interrupts (synchronous and asynchronous) handled by
+ *   the kernel.
+ * - Branch to KVM, relocated but real mode interrupts remain in real mode.
+ *   These occur when HSTATE_IN_GUEST is set. The interrupt may be caused by
+ *   / intended for host or guest kernel, but KVM must always be involved
+ *   because the machine state is set for guest execution.
+ * - Branch to the masked handler, unrelocated.
+ *   These occur when maskable asynchronous interrupts are taken with the
+ *   irq_soft_mask set.
+ * - Branch to an "early" handler in real mode but relocated.
+ *   This is done if early=1. MCE and HMI use these to handle errors in real
+ *   mode.
+ * - Fall through and continue executing in real, unrelocated mode.
+ *   This is done if early=2.
+ */
+
+.macro GEN_BRANCH_TO_COMMON name, virt
+	.if IREALMODE_COMMON
+	LOAD_HANDLER(r10, \name\()_common)
+	mtctr	r10
+	bctr
+	.else
+	.if \virt
+#ifndef CONFIG_RELOCATABLE
+	b	\name\()_common_virt
+#else
+	LOAD_HANDLER(r10, \name\()_common_virt)
+	mtctr	r10
+	bctr
+#endif
+	.else
+	LOAD_HANDLER(r10, \name\()_common_real)
+	mtctr	r10
+	bctr
+	.endif
+	.endif
+.endm
+
+.macro GEN_INT_ENTRY name, virt, ool=0
+	SET_SCRATCH0(r13)			/* save r13 */
+	GET_PACA(r13)
+	std	r9,IAREA+EX_R9(r13)		/* save r9 */
+BEGIN_FTR_SECTION
+	mfspr	r9,SPRN_PPR
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+	HMT_MEDIUM
+	std	r10,IAREA+EX_R10(r13)		/* save r10 */
+	.if ICFAR
+BEGIN_FTR_SECTION
+	mfspr	r10,SPRN_CFAR
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+	.elseif ICFAR_IF_HVMODE
+BEGIN_FTR_SECTION
+  BEGIN_FTR_SECTION_NESTED(69)
+	mfspr	r10,SPRN_CFAR
+  END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69)
+FTR_SECTION_ELSE
+  BEGIN_FTR_SECTION_NESTED(69)
+	li	r10,0
+  END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69)
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+	.endif
+	.if \ool
+	.if !\virt
+	b	tramp_real_\name
+	.pushsection .text
+	TRAMP_REAL_BEGIN(tramp_real_\name)
+	.else
+	b	tramp_virt_\name
+	.pushsection .text
+	TRAMP_VIRT_BEGIN(tramp_virt_\name)
+	.endif
+	.endif
+
+BEGIN_FTR_SECTION
+	std	r9,IAREA+EX_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+	.if ICFAR || ICFAR_IF_HVMODE
+BEGIN_FTR_SECTION
+	std	r10,IAREA+EX_CFAR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+	.endif
+	INTERRUPT_TO_KERNEL
+	mfctr	r10
+	std	r10,IAREA+EX_CTR(r13)
+	mfcr	r9
+	std	r11,IAREA+EX_R11(r13)		/* save r11 - r12 */
+	std	r12,IAREA+EX_R12(r13)
+
+	/*
+	 * DAR/DSISR, SCRATCH0 must be read before setting MSR[RI],
+	 * because a d-side MCE will clobber those registers so is
+	 * not recoverable if they are live.
+	 */
+	GET_SCRATCH0(r10)
+	std	r10,IAREA+EX_R13(r13)
+	.if IDAR && !IISIDE
+	.if IHSRR
+	mfspr	r10,SPRN_HDAR
+	.else
+	mfspr	r10,SPRN_DAR
+	.endif
+	std	r10,IAREA+EX_DAR(r13)
+	.endif
+	.if IDSISR && !IISIDE
+	.if IHSRR
+	mfspr	r10,SPRN_HDSISR
+	.else
+	mfspr	r10,SPRN_DSISR
+	.endif
+	stw	r10,IAREA+EX_DSISR(r13)
+	.endif
+
+	.if IHSRR_IF_HVMODE
+	BEGIN_FTR_SECTION
+	mfspr	r11,SPRN_HSRR0		/* save HSRR0 */
+	mfspr	r12,SPRN_HSRR1		/* and HSRR1 */
+	FTR_SECTION_ELSE
+	mfspr	r11,SPRN_SRR0		/* save SRR0 */
+	mfspr	r12,SPRN_SRR1		/* and SRR1 */
+	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+	.elseif IHSRR
+	mfspr	r11,SPRN_HSRR0		/* save HSRR0 */
+	mfspr	r12,SPRN_HSRR1		/* and HSRR1 */
+	.else
+	mfspr	r11,SPRN_SRR0		/* save SRR0 */
+	mfspr	r12,SPRN_SRR1		/* and SRR1 */
+	.endif
+
+	.if IBRANCH_TO_COMMON
+	GEN_BRANCH_TO_COMMON \name \virt
+	.endif
+
+	.if \ool
+	.popsection
+	.endif
+.endm
+
+/*
+ * __GEN_COMMON_ENTRY is required to receive the branch from interrupt
+ * entry, except in the case of the real-mode handlers which require
+ * __GEN_REALMODE_COMMON_ENTRY.
+ *
+ * This switches to virtual mode and sets MSR[RI].
+ */
+.macro __GEN_COMMON_ENTRY name
+DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
+\name\()_common_real:
+	.if IKVM_REAL
+		KVMTEST \name kvm_interrupt
+	.endif
+
+	ld	r10,PACAKMSR(r13)	/* get MSR value for kernel */
+	/* MSR[RI] is clear iff using SRR regs */
+	.if IHSRR_IF_HVMODE
+	BEGIN_FTR_SECTION
+	xori	r10,r10,MSR_RI
+	END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
+	.elseif ! IHSRR
+	xori	r10,r10,MSR_RI
+	.endif
+	mtmsrd	r10
+
+	.if IVIRT
+	.if IKVM_VIRT
+	b	1f /* skip the virt test coming from real */
+	.endif
+
+	.balign IFETCH_ALIGN_BYTES
+DEFINE_FIXED_SYMBOL(\name\()_common_virt, text)
+\name\()_common_virt:
+	.if IKVM_VIRT
+		KVMTEST \name kvm_interrupt
+1:
+	.endif
+	.endif /* IVIRT */
+.endm
+
+/*
+ * Don't switch to virt mode. Used for early MCE and HMI handlers that
+ * want to run in real mode.
+ */
+.macro __GEN_REALMODE_COMMON_ENTRY name
+DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
+\name\()_common_real:
+	.if IKVM_REAL
+		KVMTEST \name kvm_interrupt
+	.endif
+.endm
+
+.macro __GEN_COMMON_BODY name
+	.if IMASK
+		.if ! ISTACK
+		.error "No support for masked interrupt to use custom stack"
+		.endif
+
+		/* If coming from user, skip soft-mask tests. */
+		andi.	r10,r12,MSR_PR
+		bne	3f
+
+		/*
+		 * Kernel code running below __end_soft_masked may be
+		 * implicitly soft-masked if it is within the regions
+		 * in the soft mask table.
+		 */
+		LOAD_HANDLER(r10, __end_soft_masked)
+		cmpld	r11,r10
+		bge+	1f
+
+		/* SEARCH_SOFT_MASK_TABLE clobbers r9,r10,r12 */
+		mtctr	r12
+		stw	r9,PACA_EXGEN+EX_CCR(r13)
+		SEARCH_SOFT_MASK_TABLE
+		cmpdi	r12,0
+		mfctr	r12		/* Restore r12 to SRR1 */
+		lwz	r9,PACA_EXGEN+EX_CCR(r13)
+		beq	1f		/* Not in soft-mask table */
+		li	r10,IMASK
+		b	2f		/* In soft-mask table, always mask */
+
+		/* Test the soft mask state against our interrupt's bit */
+1:		lbz	r10,PACAIRQSOFTMASK(r13)
+2:		andi.	r10,r10,IMASK
+		/* Associate vector numbers with bits in paca->irq_happened */
+		.if IVEC == 0x500 || IVEC == 0xea0
+		li	r10,PACA_IRQ_EE
+		.elseif IVEC == 0x900
+		li	r10,PACA_IRQ_DEC
+		.elseif IVEC == 0xa00 || IVEC == 0xe80
+		li	r10,PACA_IRQ_DBELL
+		.elseif IVEC == 0xe60
+		li	r10,PACA_IRQ_HMI
+		.elseif IVEC == 0xf00
+		li	r10,PACA_IRQ_PMI
+		.else
+		.abort "Bad maskable vector"
+		.endif
+
+		.if IHSRR_IF_HVMODE
+		BEGIN_FTR_SECTION
+		bne	masked_Hinterrupt
+		FTR_SECTION_ELSE
+		bne	masked_interrupt
+		ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+		.elseif IHSRR
+		bne	masked_Hinterrupt
+		.else
+		bne	masked_interrupt
+		.endif
+	.endif
+
+	.if ISTACK
+	andi.	r10,r12,MSR_PR		/* See if coming from user	*/
+3:	mr	r10,r1			/* Save r1			*/
+	subi	r1,r1,INT_FRAME_SIZE	/* alloc frame on kernel stack	*/
+	beq-	100f
+	ld	r1,PACAKSAVE(r13)	/* kernel stack to use		*/
+100:	tdgei	r1,-INT_FRAME_SIZE	/* trap if r1 is in userspace	*/
+	EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
+	.endif
+
+	std	r9,_CCR(r1)		/* save CR in stackframe	*/
+	std	r11,_NIP(r1)		/* save SRR0 in stackframe	*/
+	std	r12,_MSR(r1)		/* save SRR1 in stackframe	*/
+	std	r10,0(r1)		/* make stack chain pointer	*/
+	std	r0,GPR0(r1)		/* save r0 in stackframe	*/
+	std	r10,GPR1(r1)		/* save r1 in stackframe	*/
+	SANITIZE_GPR(0)
+
+	/* Mark our [H]SRRs valid for return */
+	li	r10,1
+	.if IHSRR_IF_HVMODE
+	BEGIN_FTR_SECTION
+	stb	r10,PACAHSRR_VALID(r13)
+	FTR_SECTION_ELSE
+	stb	r10,PACASRR_VALID(r13)
+	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+	.elseif IHSRR
+	stb	r10,PACAHSRR_VALID(r13)
+	.else
+	stb	r10,PACASRR_VALID(r13)
+	.endif
+
+	.if ISTACK
+	.if IKUAP
+	kuap_save_amr_and_lock r9, r10, cr1, cr0
+	.endif
+	beq	101f			/* if from kernel mode		*/
+BEGIN_FTR_SECTION
+	ld	r9,IAREA+EX_PPR(r13)	/* Read PPR from paca		*/
+	std	r9,_PPR(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+101:
+	.else
+	.if IKUAP
+	kuap_save_amr_and_lock r9, r10, cr1
+	.endif
+	.endif
+
+	/* Save original regs values from save area to stack frame. */
+	ld	r9,IAREA+EX_R9(r13)	/* move r9, r10 to stackframe	*/
+	ld	r10,IAREA+EX_R10(r13)
+	std	r9,GPR9(r1)
+	std	r10,GPR10(r1)
+	ld	r9,IAREA+EX_R11(r13)	/* move r11 - r13 to stackframe	*/
+	ld	r10,IAREA+EX_R12(r13)
+	ld	r11,IAREA+EX_R13(r13)
+	std	r9,GPR11(r1)
+	std	r10,GPR12(r1)
+	std	r11,GPR13(r1)
+	.if !IMSR_R12
+	SANITIZE_GPRS(9, 12)
+	.else
+	SANITIZE_GPRS(9, 11)
+	.endif
+
+	SAVE_NVGPRS(r1)
+	SANITIZE_NVGPRS()
+
+	.if IDAR
+	.if IISIDE
+	ld	r10,_NIP(r1)
+	.else
+	ld	r10,IAREA+EX_DAR(r13)
+	.endif
+	std	r10,_DAR(r1)
+	.endif
+
+	.if IDSISR
+	.if IISIDE
+	ld	r10,_MSR(r1)
+	lis	r11,DSISR_SRR1_MATCH_64S@h
+	and	r10,r10,r11
+	.else
+	lwz	r10,IAREA+EX_DSISR(r13)
+	.endif
+	std	r10,_DSISR(r1)
+	.endif
+
+BEGIN_FTR_SECTION
+	.if ICFAR || ICFAR_IF_HVMODE
+	ld	r10,IAREA+EX_CFAR(r13)
+	.else
+	li	r10,0
+	.endif
+	std	r10,ORIG_GPR3(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+	ld	r10,IAREA+EX_CTR(r13)
+	std	r10,_CTR(r1)
+	SAVE_GPRS(2, 8, r1)		/* save r2 - r8 in stackframe   */
+	SANITIZE_GPRS(2, 8)
+	mflr	r9			/* Get LR, later save to stack	*/
+	LOAD_PACA_TOC()			/* get kernel TOC into r2	*/
+	std	r9,_LINK(r1)
+	lbz	r10,PACAIRQSOFTMASK(r13)
+	mfspr	r11,SPRN_XER		/* save XER in stackframe	*/
+	std	r10,SOFTE(r1)
+	std	r11,_XER(r1)
+	li	r9,IVEC
+	std	r9,_TRAP(r1)		/* set trap number		*/
+	li	r10,0
+	LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
+	std	r10,RESULT(r1)		/* clear regs->result		*/
+	std	r11,STACK_INT_FRAME_MARKER(r1) /* mark the frame	*/
+.endm
+
+/*
+ * On entry r13 points to the paca, r9-r13 are saved in the paca,
+ * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
+ * SRR1, and relocation is on.
+ *
+ * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
+ * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
+ */
+.macro GEN_COMMON name
+	__GEN_COMMON_ENTRY \name
+	__GEN_COMMON_BODY \name
+.endm
+
+.macro SEARCH_RESTART_TABLE
+#ifdef CONFIG_RELOCATABLE
+	mr	r12,r2
+	LOAD_PACA_TOC()
+	LOAD_REG_ADDR(r9, __start___restart_table)
+	LOAD_REG_ADDR(r10, __stop___restart_table)
+	mr	r2,r12
+#else
+	LOAD_REG_IMMEDIATE_SYM(r9, r12, __start___restart_table)
+	LOAD_REG_IMMEDIATE_SYM(r10, r12, __stop___restart_table)
+#endif
+300:
+	cmpd	r9,r10
+	beq	302f
+	ld	r12,0(r9)
+	cmpld	r11,r12
+	blt	301f
+	ld	r12,8(r9)
+	cmpld	r11,r12
+	bge	301f
+	ld	r12,16(r9)
+	b	303f
+301:
+	addi	r9,r9,24
+	b	300b
+302:
+	li	r12,0
+303:
+.endm
+
+.macro SEARCH_SOFT_MASK_TABLE
+#ifdef CONFIG_RELOCATABLE
+	mr	r12,r2
+	LOAD_PACA_TOC()
+	LOAD_REG_ADDR(r9, __start___soft_mask_table)
+	LOAD_REG_ADDR(r10, __stop___soft_mask_table)
+	mr	r2,r12
+#else
+	LOAD_REG_IMMEDIATE_SYM(r9, r12, __start___soft_mask_table)
+	LOAD_REG_IMMEDIATE_SYM(r10, r12, __stop___soft_mask_table)
+#endif
+300:
+	cmpd	r9,r10
+	beq	302f
+	ld	r12,0(r9)
+	cmpld	r11,r12
+	blt	301f
+	ld	r12,8(r9)
+	cmpld	r11,r12
+	bge	301f
+	li	r12,1
+	b	303f
+301:
+	addi	r9,r9,16
+	b	300b
+302:
+	li	r12,0
+303:
+.endm
+
+/*
+ * Restore all registers including H/SRR0/1 saved in a stack frame of a
+ * standard exception.
+ */
+.macro EXCEPTION_RESTORE_REGS hsrr=0
+	/* Move original SRR0 and SRR1 into the respective regs */
+	ld	r9,_MSR(r1)
+	li	r10,0
+	.if \hsrr
+	mtspr	SPRN_HSRR1,r9
+	stb	r10,PACAHSRR_VALID(r13)
+	.else
+	mtspr	SPRN_SRR1,r9
+	stb	r10,PACASRR_VALID(r13)
+	.endif
+	ld	r9,_NIP(r1)
+	.if \hsrr
+	mtspr	SPRN_HSRR0,r9
+	.else
+	mtspr	SPRN_SRR0,r9
+	.endif
+	ld	r9,_CTR(r1)
+	mtctr	r9
+	ld	r9,_XER(r1)
+	mtxer	r9
+	ld	r9,_LINK(r1)
+	mtlr	r9
+	ld	r9,_CCR(r1)
+	mtcr	r9
+	SANITIZE_RESTORE_NVGPRS()
+	REST_GPRS(2, 13, r1)
+	REST_GPR(0, r1)
+	/* restore original r1. */
+	ld	r1,GPR1(r1)
+.endm
+
+/*
+ * EARLY_BOOT_FIXUP - Fix real-mode interrupt with wrong endian in early boot.
+ *
+ * There's a short window during boot where although the kernel is running
+ * little endian, any exceptions will cause the CPU to switch back to big
+ * endian. For example a WARN() boils down to a trap instruction, which will
+ * cause a program check, and we end up here but with the CPU in big endian
+ * mode. The first instruction of the program check handler (in GEN_INT_ENTRY
+ * below) is an mtsprg, which when executed in the wrong endian is an lhzu with
+ * a ~3GB displacement from r3. The content of r3 is random, so that is a load
+ * from some random location, and depending on the system can easily lead to a
+ * checkstop, or an infinitely recursive page fault.
+ *
+ * So to handle that case we have a trampoline here that can detect we are in
+ * the wrong endian and flip us back to the correct endian. We can't flip
+ * MSR[LE] using mtmsr, so we have to use rfid. That requires backing up SRR0/1
+ * as well as a GPR. To do that we use SPRG0/2/3, as SPRG1 is already used for
+ * the paca. SPRG3 is user readable, but this trampoline is only active very
+ * early in boot, and SPRG3 will be reinitialised in vdso_getcpu_init() before
+ * userspace starts.
+ */
+.macro EARLY_BOOT_FIXUP
+BEGIN_FTR_SECTION
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+	tdi   0,0,0x48    // Trap never, or in reverse endian: b . + 8
+	b     2f          // Skip trampoline if endian is correct
+	.long 0xa643707d  // mtsprg  0, r11      Backup r11
+	.long 0xa6027a7d  // mfsrr0  r11
+	.long 0xa643727d  // mtsprg  2, r11      Backup SRR0 in SPRG2
+	.long 0xa6027b7d  // mfsrr1  r11
+	.long 0xa643737d  // mtsprg  3, r11      Backup SRR1 in SPRG3
+	.long 0xa600607d  // mfmsr   r11
+	.long 0x01006b69  // xori    r11, r11, 1 Invert MSR[LE]
+	.long 0xa6037b7d  // mtsrr1  r11
+	/*
+	 * This is 'li  r11,1f' where 1f is the absolute address of that
+	 * label, byteswapped into the SI field of the instruction.
+	 */
+	.long 0x00006039 | \
+		((ABS_ADDR(1f, real_vectors) & 0x00ff) << 24) | \
+		((ABS_ADDR(1f, real_vectors) & 0xff00) << 8)
+	.long 0xa6037a7d  // mtsrr0  r11
+	.long 0x2400004c  // rfid
+1:
+	mfsprg r11, 3
+	mtsrr1 r11        // Restore SRR1
+	mfsprg r11, 2
+	mtsrr0 r11        // Restore SRR0
+	mfsprg r11, 0     // Restore r11
+2:
+#endif
+	/*
+	 * program check could hit at any time, and pseries can not block
+	 * MSR[ME] in early boot. So check if there is anything useful in r13
+	 * yet, and spin forever if not.
+	 */
+	mtsprg	0, r11
+	mfcr	r11
+	cmpdi	r13, 0
+	beq	.
+	mtcr	r11
+	mfsprg	r11, 0
+END_FTR_SECTION(0, 1)     // nop out after boot
+.endm
+
+/*
+ * There are a few constraints to be concerned with.
+ * - Real mode exceptions code/data must be located at their physical location.
+ * - Virtual mode exceptions must be mapped at their 0xc000... location.
+ * - Fixed location code must not call directly beyond the __end_interrupts
+ *   area when built with CONFIG_RELOCATABLE. LOAD_HANDLER / bctr sequence
+ *   must be used.
+ * - LOAD_HANDLER targets must be within first 64K of physical 0 /
+ *   virtual 0xc00...
+ * - Conditional branch targets must be within +/-32K of caller.
+ *
+ * "Virtual exceptions" run with relocation on (MSR_IR=1, MSR_DR=1), and
+ * therefore don't have to run in physically located code or rfid to
+ * virtual mode kernel code. However on relocatable kernels they do have
+ * to branch to KERNELBASE offset because the rest of the kernel (outside
+ * the exception vectors) may be located elsewhere.
+ *
+ * Virtual exceptions correspond with physical, except their entry points
+ * are offset by 0xc000000000000000 and also tend to get an added 0x4000
+ * offset applied. Virtual exceptions are enabled with the Alternate
+ * Interrupt Location (AIL) bit set in the LPCR. However this does not
+ * guarantee they will be delivered virtually. Some conditions (see the ISA)
+ * cause exceptions to be delivered in real mode.
+ *
+ * The scv instructions are a special case. They get a 0x3000 offset applied.
+ * scv exceptions have unique reentrancy properties, see below.
+ *
+ * It's impossible to receive interrupts below 0x300 via AIL.
+ *
+ * KVM: None of the virtual exceptions are from the guest. Anything that
+ * escalated to HV=1 from HV=0 is delivered via real mode handlers.
+ *
+ *
+ * We layout physical memory as follows:
+ * 0x0000 - 0x00ff : Secondary processor spin code
+ * 0x0100 - 0x18ff : Real mode pSeries interrupt vectors
+ * 0x1900 - 0x2fff : Real mode trampolines
+ * 0x3000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors
+ * 0x5900 - 0x6fff : Relon mode trampolines
+ * 0x7000 - 0x7fff : FWNMI data area
+ * 0x8000 -   .... : Common interrupt handlers, remaining early
+ *                   setup code, rest of kernel.
+ *
+ * We could reclaim 0x4000-0x42ff for real mode trampolines if the space
+ * is necessary. Until then it's more consistent to explicitly put VIRT_NONE
+ * vectors there.
+ */
+OPEN_FIXED_SECTION(real_vectors,        0x0100, 0x1900)
+OPEN_FIXED_SECTION(real_trampolines,    0x1900, 0x3000)
+OPEN_FIXED_SECTION(virt_vectors,        0x3000, 0x5900)
+OPEN_FIXED_SECTION(virt_trampolines,    0x5900, 0x7000)
+
+#ifdef CONFIG_PPC_POWERNV
+	.globl start_real_trampolines
+	.globl end_real_trampolines
+	.globl start_virt_trampolines
+	.globl end_virt_trampolines
+#endif
+
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+/*
+ * Data area reserved for FWNMI option.
+ * This address (0x7000) is fixed by the RPA.
+ * pseries and powernv need to keep the whole page from
+ * 0x7000 to 0x8000 free for use by the firmware
+ */
+ZERO_FIXED_SECTION(fwnmi_page,          0x7000, 0x8000)
+OPEN_TEXT_SECTION(0x8000)
+#else
+OPEN_TEXT_SECTION(0x7000)
+#endif
+
+USE_FIXED_SECTION(real_vectors)
+
+/*
+ * This is the start of the interrupt handlers for pSeries
+ * This code runs with relocation off.
+ * Code from here to __end_interrupts gets copied down to real
+ * address 0x100 when we are running a relocatable kernel.
+ * Therefore any relative branches in this section must only
+ * branch to labels in this section.
+ */
+	.globl __start_interrupts
+__start_interrupts:
+
+/**
+ * Interrupt 0x3000 - System Call Vectored Interrupt (syscall).
+ * This is a synchronous interrupt invoked with the "scv" instruction. The
+ * system call does not alter the HV bit, so it is directed to the OS.
+ *
+ * Handling:
+ * scv instructions enter the kernel without changing EE, RI, ME, or HV.
+ * In particular, this means we can take a maskable interrupt at any point
+ * in the scv handler, which is unlike any other interrupt. This is solved
+ * by treating the instruction addresses in the handler as being soft-masked,
+ * by adding a SOFT_MASK_TABLE entry for them.
+ *
+ * AIL-0 mode scv exceptions go to 0x17000-0x17fff, but we set AIL-3 and
+ * ensure scv is never executed with relocation off, which means AIL-0
+ * should never happen.
+ *
+ * Before leaving the following inside-__end_soft_masked text, at least of the
+ * following must be true:
+ * - MSR[PR]=1 (i.e., return to userspace)
+ * - MSR_EE|MSR_RI is clear (no reentrant exceptions)
+ * - Standard kernel environment is set up (stack, paca, etc)
+ *
+ * KVM:
+ * These interrupts do not elevate HV 0->1, so HV is not involved. PR KVM
+ * ensures that FSCR[SCV] is disabled whenever it has to force AIL off.
+ *
+ * Call convention:
+ *
+ * syscall register convention is in Documentation/powerpc/syscall64-abi.rst
+ */
+EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000)
+	/* SCV 0 */
+	mr	r9,r13
+	GET_PACA(r13)
+	mflr	r11
+	mfctr	r12
+	li	r10,IRQS_ALL_DISABLED
+	stb	r10,PACAIRQSOFTMASK(r13)
+#ifdef CONFIG_RELOCATABLE
+	b	system_call_vectored_tramp
+#else
+	b	system_call_vectored_common
+#endif
+	nop
+
+	/* SCV 1 - 127 */
+	.rept	127
+	mr	r9,r13
+	GET_PACA(r13)
+	mflr	r11
+	mfctr	r12
+	li	r10,IRQS_ALL_DISABLED
+	stb	r10,PACAIRQSOFTMASK(r13)
+	li	r0,-1 /* cause failure */
+#ifdef CONFIG_RELOCATABLE
+	b	system_call_vectored_sigill_tramp
+#else
+	b	system_call_vectored_sigill
+#endif
+	.endr
+EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000)
+
+// Treat scv vectors as soft-masked, see comment above.
+// Use absolute values rather than labels here, so they don't get relocated,
+// because this code runs unrelocated.
+SOFT_MASK_TABLE(0xc000000000003000, 0xc000000000004000)
+
+#ifdef CONFIG_RELOCATABLE
+TRAMP_VIRT_BEGIN(system_call_vectored_tramp)
+	__LOAD_HANDLER(r10, system_call_vectored_common, virt_trampolines)
+	mtctr	r10
+	bctr
+
+TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp)
+	__LOAD_HANDLER(r10, system_call_vectored_sigill, virt_trampolines)
+	mtctr	r10
+	bctr
+#endif
+
+
+/* No virt vectors corresponding with 0x0..0x100 */
+EXC_VIRT_NONE(0x4000, 0x100)
+
+
+/**
+ * Interrupt 0x100 - System Reset Interrupt (SRESET aka NMI).
+ * This is a non-maskable, asynchronous interrupt always taken in real-mode.
+ * It is caused by:
+ * - Wake from power-saving state, on powernv.
+ * - An NMI from another CPU, triggered by firmware or hypercall.
+ * - As crash/debug signal injected from BMC, firmware or hypervisor.
+ *
+ * Handling:
+ * Power-save wakeup is the only performance critical path, so this is
+ * determined quickly as possible first. In this case volatile registers
+ * can be discarded and SPRs like CFAR don't need to be read.
+ *
+ * If not a powersave wakeup, then it's run as a regular interrupt, however
+ * it uses its own stack and PACA save area to preserve the regular kernel
+ * environment for debugging.
+ *
+ * This interrupt is not maskable, so triggering it when MSR[RI] is clear,
+ * or SCRATCH0 is in use, etc. may cause a crash. It's also not entirely
+ * correct to switch to virtual mode to run the regular interrupt handler
+ * because it might be interrupted when the MMU is in a bad state (e.g., SLB
+ * is clear).
+ *
+ * FWNMI:
+ * PAPR specifies a "fwnmi" facility which sends the sreset to a different
+ * entry point with a different register set up. Some hypervisors will
+ * send the sreset to 0x100 in the guest if it is not fwnmi capable.
+ *
+ * KVM:
+ * Unlike most SRR interrupts, this may be taken by the host while executing
+ * in a guest, so a KVM test is required. KVM will pull the CPU out of guest
+ * mode and then raise the sreset.
+ */
+INT_DEFINE_BEGIN(system_reset)
+	IVEC=0x100
+	IAREA=PACA_EXNMI
+	IVIRT=0 /* no virt entry point */
+	ISTACK=0
+	IKVM_REAL=1
+INT_DEFINE_END(system_reset)
+
+EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
+#ifdef CONFIG_PPC_P7_NAP
+	/*
+	 * If running native on arch 2.06 or later, check if we are waking up
+	 * from nap/sleep/winkle, and branch to idle handler. This tests SRR1
+	 * bits 46:47. A non-0 value indicates that we are coming from a power
+	 * saving state. The idle wakeup handler initially runs in real mode,
+	 * but we branch to the 0xc000... address so we can turn on relocation
+	 * with mtmsrd later, after SPRs are restored.
+	 *
+	 * Careful to minimise cost for the fast path (idle wakeup) while
+	 * also avoiding clobbering CFAR for the debug path (non-idle).
+	 *
+	 * For the idle wake case volatile registers can be clobbered, which
+	 * is why we use those initially. If it turns out to not be an idle
+	 * wake, carefully put everything back the way it was, so we can use
+	 * common exception macros to handle it.
+	 */
+BEGIN_FTR_SECTION
+	SET_SCRATCH0(r13)
+	GET_PACA(r13)
+	std	r3,PACA_EXNMI+0*8(r13)
+	std	r4,PACA_EXNMI+1*8(r13)
+	std	r5,PACA_EXNMI+2*8(r13)
+	mfspr	r3,SPRN_SRR1
+	mfocrf	r4,0x80
+	rlwinm.	r5,r3,47-31,30,31
+	bne+	system_reset_idle_wake
+	/* Not powersave wakeup. Restore regs for regular interrupt handler. */
+	mtocrf	0x80,r4
+	ld	r3,PACA_EXNMI+0*8(r13)
+	ld	r4,PACA_EXNMI+1*8(r13)
+	ld	r5,PACA_EXNMI+2*8(r13)
+	GET_SCRATCH0(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#endif
+
+	GEN_INT_ENTRY system_reset, virt=0
+	/*
+	 * In theory, we should not enable relocation here if it was disabled
+	 * in SRR1, because the MMU may not be configured to support it (e.g.,
+	 * SLB may have been cleared). In practice, there should only be a few
+	 * small windows where that's the case, and sreset is considered to
+	 * be dangerous anyway.
+	 */
+EXC_REAL_END(system_reset, 0x100, 0x100)
+EXC_VIRT_NONE(0x4100, 0x100)
+
+#ifdef CONFIG_PPC_P7_NAP
+TRAMP_REAL_BEGIN(system_reset_idle_wake)
+	/* We are waking up from idle, so may clobber any volatile register */
+	cmpwi	cr1,r5,2
+	bltlr	cr1	/* no state loss, return to idle caller with r3=SRR1 */
+	__LOAD_FAR_HANDLER(r12, DOTSYM(idle_return_gpr_loss), real_trampolines)
+	mtctr	r12
+	bctr
+#endif
+
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * Vectors for the FWNMI option.  Share common code.
+ */
+TRAMP_REAL_BEGIN(system_reset_fwnmi)
+	GEN_INT_ENTRY system_reset, virt=0
+
+#endif /* CONFIG_PPC_PSERIES */
+
+EXC_COMMON_BEGIN(system_reset_common)
+	__GEN_COMMON_ENTRY system_reset
+	/*
+	 * Increment paca->in_nmi. When the interrupt entry wrapper later
+	 * enable MSR_RI, then SLB or MCE will be able to recover, but a nested
+	 * NMI will notice in_nmi and not recover because of the use of the NMI
+	 * stack. in_nmi reentrancy is tested in system_reset_exception.
+	 */
+	lhz	r10,PACA_IN_NMI(r13)
+	addi	r10,r10,1
+	sth	r10,PACA_IN_NMI(r13)
+
+	mr	r10,r1
+	ld	r1,PACA_NMI_EMERG_SP(r13)
+	subi	r1,r1,INT_FRAME_SIZE
+	__GEN_COMMON_BODY system_reset
+
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(system_reset_exception)
+
+	/* Clear MSR_RI before setting SRR0 and SRR1. */
+	li	r9,0
+	mtmsrd	r9,1
+
+	/*
+	 * MSR_RI is clear, now we can decrement paca->in_nmi.
+	 */
+	lhz	r10,PACA_IN_NMI(r13)
+	subi	r10,r10,1
+	sth	r10,PACA_IN_NMI(r13)
+
+	kuap_kernel_restore r9, r10
+	EXCEPTION_RESTORE_REGS
+	RFI_TO_USER_OR_KERNEL
+
+
+/**
+ * Interrupt 0x200 - Machine Check Interrupt (MCE).
+ * This is a non-maskable interrupt always taken in real-mode. It can be
+ * synchronous or asynchronous, caused by hardware or software, and it may be
+ * taken in a power-saving state.
+ *
+ * Handling:
+ * Similarly to system reset, this uses its own stack and PACA save area,
+ * the difference is re-entrancy is allowed on the machine check stack.
+ *
+ * machine_check_early is run in real mode, and carefully decodes the
+ * machine check and tries to handle it (e.g., flush the SLB if there was an
+ * error detected there), determines if it was recoverable and logs the
+ * event.
+ *
+ * This early code does not "reconcile" irq soft-mask state like SRESET or
+ * regular interrupts do, so irqs_disabled() among other things may not work
+ * properly (irq disable/enable already doesn't work because irq tracing can
+ * not work in real mode).
+ *
+ * Then, depending on the execution context when the interrupt is taken, there
+ * are 3 main actions:
+ * - Executing in kernel mode. The event is queued with irq_work, which means
+ *   it is handled when it is next safe to do so (i.e., the kernel has enabled
+ *   interrupts), which could be immediately when the interrupt returns. This
+ *   avoids nasty issues like switching to virtual mode when the MMU is in a
+ *   bad state, or when executing OPAL code. (SRESET is exposed to such issues,
+ *   but it has different priorities). Check to see if the CPU was in power
+ *   save, and return via the wake up code if it was.
+ *
+ * - Executing in user mode. machine_check_exception is run like a normal
+ *   interrupt handler, which processes the data generated by the early handler.
+ *
+ * - Executing in guest mode. The interrupt is run with its KVM test, and
+ *   branches to KVM to deal with. KVM may queue the event for the host
+ *   to report later.
+ *
+ * This interrupt is not maskable, so if it triggers when MSR[RI] is clear,
+ * or SCRATCH0 is in use, it may cause a crash.
+ *
+ * KVM:
+ * See SRESET.
+ */
+INT_DEFINE_BEGIN(machine_check_early)
+	IVEC=0x200
+	IAREA=PACA_EXMC
+	IVIRT=0 /* no virt entry point */
+	IREALMODE_COMMON=1
+	ISTACK=0
+	IDAR=1
+	IDSISR=1
+	IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
+INT_DEFINE_END(machine_check_early)
+
+INT_DEFINE_BEGIN(machine_check)
+	IVEC=0x200
+	IAREA=PACA_EXMC
+	IVIRT=0 /* no virt entry point */
+	IDAR=1
+	IDSISR=1
+	IKVM_REAL=1
+INT_DEFINE_END(machine_check)
+
+EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
+	EARLY_BOOT_FIXUP
+	GEN_INT_ENTRY machine_check_early, virt=0
+EXC_REAL_END(machine_check, 0x200, 0x100)
+EXC_VIRT_NONE(0x4200, 0x100)
+
+#ifdef CONFIG_PPC_PSERIES
+TRAMP_REAL_BEGIN(machine_check_fwnmi)
+	/* See comment at machine_check exception, don't turn on RI */
+	GEN_INT_ENTRY machine_check_early, virt=0
+#endif
+
+#define MACHINE_CHECK_HANDLER_WINDUP			\
+	/* Clear MSR_RI before setting SRR0 and SRR1. */\
+	li	r9,0;					\
+	mtmsrd	r9,1;		/* Clear MSR_RI */	\
+	/* Decrement paca->in_mce now RI is clear. */	\
+	lhz	r12,PACA_IN_MCE(r13);			\
+	subi	r12,r12,1;				\
+	sth	r12,PACA_IN_MCE(r13);			\
+	EXCEPTION_RESTORE_REGS
+
+EXC_COMMON_BEGIN(machine_check_early_common)
+	__GEN_REALMODE_COMMON_ENTRY machine_check_early
+
+	/*
+	 * Switch to mc_emergency stack and handle re-entrancy (we limit
+	 * the nested MCE upto level 4 to avoid stack overflow).
+	 * Save MCE registers srr1, srr0, dar and dsisr and then set ME=1
+	 *
+	 * We use paca->in_mce to check whether this is the first entry or
+	 * nested machine check. We increment paca->in_mce to track nested
+	 * machine checks.
+	 *
+	 * If this is the first entry then set stack pointer to
+	 * paca->mc_emergency_sp, otherwise r1 is already pointing to
+	 * stack frame on mc_emergency stack.
+	 *
+	 * NOTE: We are here with MSR_ME=0 (off), which means we risk a
+	 * checkstop if we get another machine check exception before we do
+	 * rfid with MSR_ME=1.
+	 *
+	 * This interrupt can wake directly from idle. If that is the case,
+	 * the machine check is handled then the idle wakeup code is called
+	 * to restore state.
+	 */
+	lhz	r10,PACA_IN_MCE(r13)
+	cmpwi	r10,0			/* Are we in nested machine check */
+	cmpwi	cr1,r10,MAX_MCE_DEPTH	/* Are we at maximum nesting */
+	addi	r10,r10,1		/* increment paca->in_mce */
+	sth	r10,PACA_IN_MCE(r13)
+
+	mr	r10,r1			/* Save r1 */
+	bne	1f
+	/* First machine check entry */
+	ld	r1,PACAMCEMERGSP(r13)	/* Use MC emergency stack */
+1:	/* Limit nested MCE to level 4 to avoid stack overflow */
+	bgt	cr1,unrecoverable_mce	/* Check if we hit limit of 4 */
+	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame */
+
+	__GEN_COMMON_BODY machine_check_early
+
+BEGIN_FTR_SECTION
+	bl	enable_machine_check
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+BEGIN_FTR_SECTION
+	bl	CFUNC(machine_check_early_boot)
+END_FTR_SECTION(0, 1)     // nop out after boot
+	bl	CFUNC(machine_check_early)
+	std	r3,RESULT(r1)	/* Save result */
+	ld	r12,_MSR(r1)
+
+#ifdef CONFIG_PPC_P7_NAP
+	/*
+	 * Check if thread was in power saving mode. We come here when any
+	 * of the following is true:
+	 * a. thread wasn't in power saving mode
+	 * b. thread was in power saving mode with no state loss,
+	 *    supervisor state loss or hypervisor state loss.
+	 *
+	 * Go back to nap/sleep/winkle mode again if (b) is true.
+	 */
+BEGIN_FTR_SECTION
+	rlwinm.	r11,r12,47-31,30,31
+	bne	machine_check_idle_common
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#endif
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+	/*
+	 * Check if we are coming from guest. If yes, then run the normal
+	 * exception handler which will take the
+	 * machine_check_kvm->kvm_interrupt branch to deliver the MC event
+	 * to guest.
+	 */
+	lbz	r11,HSTATE_IN_GUEST(r13)
+	cmpwi	r11,0			/* Check if coming from guest */
+	bne	mce_deliver		/* continue if we are. */
+#endif
+
+	/*
+	 * Check if we are coming from userspace. If yes, then run the normal
+	 * exception handler which will deliver the MC event to this kernel.
+	 */
+	andi.	r11,r12,MSR_PR		/* See if coming from user. */
+	bne	mce_deliver		/* continue in V mode if we are. */
+
+	/*
+	 * At this point we are coming from kernel context.
+	 * Queue up the MCE event and return from the interrupt.
+	 * But before that, check if this is an un-recoverable exception.
+	 * If yes, then stay on emergency stack and panic.
+	 */
+	andi.	r11,r12,MSR_RI
+	beq	unrecoverable_mce
+
+	/*
+	 * Check if we have successfully handled/recovered from error, if not
+	 * then stay on emergency stack and panic.
+	 */
+	ld	r3,RESULT(r1)	/* Load result */
+	cmpdi	r3,0		/* see if we handled MCE successfully */
+	beq	unrecoverable_mce /* if !handled then panic */
+
+	/*
+	 * Return from MC interrupt.
+	 * Queue up the MCE event so that we can log it later, while
+	 * returning from kernel or opal call.
+	 */
+	bl	CFUNC(machine_check_queue_event)
+	MACHINE_CHECK_HANDLER_WINDUP
+	RFI_TO_KERNEL
+
+mce_deliver:
+	/*
+	 * This is a host user or guest MCE. Restore all registers, then
+	 * run the "late" handler. For host user, this will run the
+	 * machine_check_exception handler in virtual mode like a normal
+	 * interrupt handler. For guest, this will trigger the KVM test
+	 * and branch to the KVM interrupt similarly to other interrupts.
+	 */
+BEGIN_FTR_SECTION
+	ld	r10,ORIG_GPR3(r1)
+	mtspr	SPRN_CFAR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+	MACHINE_CHECK_HANDLER_WINDUP
+	GEN_INT_ENTRY machine_check, virt=0
+
+EXC_COMMON_BEGIN(machine_check_common)
+	/*
+	 * Machine check is different because we use a different
+	 * save area: PACA_EXMC instead of PACA_EXGEN.
+	 */
+	GEN_COMMON machine_check
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(machine_check_exception_async)
+	b	interrupt_return_srr
+
+
+#ifdef CONFIG_PPC_P7_NAP
+/*
+ * This is an idle wakeup. Low level machine check has already been
+ * done. Queue the event then call the idle code to do the wake up.
+ */
+EXC_COMMON_BEGIN(machine_check_idle_common)
+	bl	CFUNC(machine_check_queue_event)
+
+	/*
+	 * GPR-loss wakeups are relatively straightforward, because the
+	 * idle sleep code has saved all non-volatile registers on its
+	 * own stack, and r1 in PACAR1.
+	 *
+	 * For no-loss wakeups the r1 and lr registers used by the
+	 * early machine check handler have to be restored first. r2 is
+	 * the kernel TOC, so no need to restore it.
+	 *
+	 * Then decrement MCE nesting after finishing with the stack.
+	 */
+	ld	r3,_MSR(r1)
+	ld	r4,_LINK(r1)
+	ld	r1,GPR1(r1)
+
+	lhz	r11,PACA_IN_MCE(r13)
+	subi	r11,r11,1
+	sth	r11,PACA_IN_MCE(r13)
+
+	mtlr	r4
+	rlwinm	r10,r3,47-31,30,31
+	cmpwi	cr1,r10,2
+	bltlr	cr1	/* no state loss, return to idle caller with r3=SRR1 */
+	b	idle_return_gpr_loss
+#endif
+
+EXC_COMMON_BEGIN(unrecoverable_mce)
+	/*
+	 * We are going down. But there are chances that we might get hit by
+	 * another MCE during panic path and we may run into unstable state
+	 * with no way out. Hence, turn ME bit off while going down, so that
+	 * when another MCE is hit during panic path, system will checkstop
+	 * and hypervisor will get restarted cleanly by SP.
+	 */
+BEGIN_FTR_SECTION
+	li	r10,0 /* clear MSR_RI */
+	mtmsrd	r10,1
+	bl	CFUNC(disable_machine_check)
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+	ld	r10,PACAKMSR(r13)
+	li	r3,MSR_ME
+	andc	r10,r10,r3
+	mtmsrd	r10
+
+	lhz	r12,PACA_IN_MCE(r13)
+	subi	r12,r12,1
+	sth	r12,PACA_IN_MCE(r13)
+
+	/*
+	 * Invoke machine_check_exception to print MCE event and panic.
+	 * This is the NMI version of the handler because we are called from
+	 * the early handler which is a true NMI.
+	 */
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(machine_check_exception)
+
+	/*
+	 * We will not reach here. Even if we did, there is no way out.
+	 * Call unrecoverable_exception and die.
+	 */
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(unrecoverable_exception)
+	b	.
+
+
+/**
+ * Interrupt 0x300 - Data Storage Interrupt (DSI).
+ * This is a synchronous interrupt generated due to a data access exception,
+ * e.g., a load orstore which does not have a valid page table entry with
+ * permissions. DAWR matches also fault here, as do RC updates, and minor misc
+ * errors e.g., copy/paste, AMO, certain invalid CI accesses, etc.
+ *
+ * Handling:
+ * - Hash MMU
+ *   Go to do_hash_fault, which attempts to fill the HPT from an entry in the
+ *   Linux page table. Hash faults can hit in kernel mode in a fairly
+ *   arbitrary state (e.g., interrupts disabled, locks held) when accessing
+ *   "non-bolted" regions, e.g., vmalloc space. However these should always be
+ *   backed by Linux page table entries.
+ *
+ *   If no entry is found the Linux page fault handler is invoked (by
+ *   do_hash_fault). Linux page faults can happen in kernel mode due to user
+ *   copy operations of course.
+ *
+ *   KVM: The KVM HDSI handler may perform a load with MSR[DR]=1 in guest
+ *   MMU context, which may cause a DSI in the host, which must go to the
+ *   KVM handler. MSR[IR] is not enabled, so the real-mode handler will
+ *   always be used regardless of AIL setting.
+ *
+ * - Radix MMU
+ *   The hardware loads from the Linux page table directly, so a fault goes
+ *   immediately to Linux page fault.
+ *
+ * Conditions like DAWR match are handled on the way in to Linux page fault.
+ */
+INT_DEFINE_BEGIN(data_access)
+	IVEC=0x300
+	IDAR=1
+	IDSISR=1
+	IKVM_REAL=1
+INT_DEFINE_END(data_access)
+
+EXC_REAL_BEGIN(data_access, 0x300, 0x80)
+	GEN_INT_ENTRY data_access, virt=0
+EXC_REAL_END(data_access, 0x300, 0x80)
+EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
+	GEN_INT_ENTRY data_access, virt=1
+EXC_VIRT_END(data_access, 0x4300, 0x80)
+EXC_COMMON_BEGIN(data_access_common)
+	GEN_COMMON data_access
+	ld	r4,_DSISR(r1)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	andis.	r0,r4,DSISR_DABRMATCH@h
+	bne-	1f
+#ifdef CONFIG_PPC_64S_HASH_MMU
+BEGIN_MMU_FTR_SECTION
+	bl	CFUNC(do_hash_fault)
+MMU_FTR_SECTION_ELSE
+	bl	CFUNC(do_page_fault)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+	bl	CFUNC(do_page_fault)
+#endif
+	b	interrupt_return_srr
+
+1:	bl	CFUNC(do_break)
+	/*
+	 * do_break() may have changed the NV GPRS while handling a breakpoint.
+	 * If so, we need to restore them with their updated values.
+	 */
+	HANDLER_RESTORE_NVGPRS()
+	b	interrupt_return_srr
+
+
+/**
+ * Interrupt 0x380 - Data Segment Interrupt (DSLB).
+ * This is a synchronous interrupt in response to an MMU fault missing SLB
+ * entry for HPT, or an address outside RPT translation range.
+ *
+ * Handling:
+ * - HPT:
+ *   This refills the SLB, or reports an access fault similarly to a bad page
+ *   fault. When coming from user-mode, the SLB handler may access any kernel
+ *   data, though it may itself take a DSLB. When coming from kernel mode,
+ *   recursive faults must be avoided so access is restricted to the kernel
+ *   image text/data, kernel stack, and any data allocated below
+ *   ppc64_bolted_size (first segment). The kernel handler must avoid stomping
+ *   on user-handler data structures.
+ *
+ *   KVM: Same as 0x300, DSLB must test for KVM guest.
+ */
+INT_DEFINE_BEGIN(data_access_slb)
+	IVEC=0x380
+	IDAR=1
+	IKVM_REAL=1
+INT_DEFINE_END(data_access_slb)
+
+EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
+	GEN_INT_ENTRY data_access_slb, virt=0
+EXC_REAL_END(data_access_slb, 0x380, 0x80)
+EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
+	GEN_INT_ENTRY data_access_slb, virt=1
+EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
+EXC_COMMON_BEGIN(data_access_slb_common)
+	GEN_COMMON data_access_slb
+#ifdef CONFIG_PPC_64S_HASH_MMU
+BEGIN_MMU_FTR_SECTION
+	/* HPT case, do SLB fault */
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(do_slb_fault)
+	cmpdi	r3,0
+	bne-	1f
+	b	fast_interrupt_return_srr
+1:	/* Error case */
+MMU_FTR_SECTION_ELSE
+	/* Radix case, access is outside page table range */
+	li	r3,-EFAULT
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+	li	r3,-EFAULT
+#endif
+	std	r3,RESULT(r1)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(do_bad_segment_interrupt)
+	b	interrupt_return_srr
+
+
+/**
+ * Interrupt 0x400 - Instruction Storage Interrupt (ISI).
+ * This is a synchronous interrupt in response to an MMU fault due to an
+ * instruction fetch.
+ *
+ * Handling:
+ * Similar to DSI, though in response to fetch. The faulting address is found
+ * in SRR0 (rather than DAR), and status in SRR1 (rather than DSISR).
+ */
+INT_DEFINE_BEGIN(instruction_access)
+	IVEC=0x400
+	IISIDE=1
+	IDAR=1
+	IDSISR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	IKVM_REAL=1
+#endif
+INT_DEFINE_END(instruction_access)
+
+EXC_REAL_BEGIN(instruction_access, 0x400, 0x80)
+	GEN_INT_ENTRY instruction_access, virt=0
+EXC_REAL_END(instruction_access, 0x400, 0x80)
+EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80)
+	GEN_INT_ENTRY instruction_access, virt=1
+EXC_VIRT_END(instruction_access, 0x4400, 0x80)
+EXC_COMMON_BEGIN(instruction_access_common)
+	GEN_COMMON instruction_access
+	addi	r3,r1,STACK_INT_FRAME_REGS
+#ifdef CONFIG_PPC_64S_HASH_MMU
+BEGIN_MMU_FTR_SECTION
+	bl	CFUNC(do_hash_fault)
+MMU_FTR_SECTION_ELSE
+	bl	CFUNC(do_page_fault)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+	bl	CFUNC(do_page_fault)
+#endif
+	b	interrupt_return_srr
+
+
+/**
+ * Interrupt 0x480 - Instruction Segment Interrupt (ISLB).
+ * This is a synchronous interrupt in response to an MMU fault due to an
+ * instruction fetch.
+ *
+ * Handling:
+ * Similar to DSLB, though in response to fetch. The faulting address is found
+ * in SRR0 (rather than DAR).
+ */
+INT_DEFINE_BEGIN(instruction_access_slb)
+	IVEC=0x480
+	IISIDE=1
+	IDAR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	IKVM_REAL=1
+#endif
+INT_DEFINE_END(instruction_access_slb)
+
+EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
+	GEN_INT_ENTRY instruction_access_slb, virt=0
+EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
+EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
+	GEN_INT_ENTRY instruction_access_slb, virt=1
+EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
+EXC_COMMON_BEGIN(instruction_access_slb_common)
+	GEN_COMMON instruction_access_slb
+#ifdef CONFIG_PPC_64S_HASH_MMU
+BEGIN_MMU_FTR_SECTION
+	/* HPT case, do SLB fault */
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(do_slb_fault)
+	cmpdi	r3,0
+	bne-	1f
+	b	fast_interrupt_return_srr
+1:	/* Error case */
+MMU_FTR_SECTION_ELSE
+	/* Radix case, access is outside page table range */
+	li	r3,-EFAULT
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+	li	r3,-EFAULT
+#endif
+	std	r3,RESULT(r1)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(do_bad_segment_interrupt)
+	b	interrupt_return_srr
+
+
+/**
+ * Interrupt 0x500 - External Interrupt.
+ * This is an asynchronous maskable interrupt in response to an "external
+ * exception" from the interrupt controller or hypervisor (e.g., device
+ * interrupt). It is maskable in hardware by clearing MSR[EE], and
+ * soft-maskable with IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * When running in HV mode, Linux sets up the LPCR[LPES] bit such that
+ * interrupts are delivered with HSRR registers, guests use SRRs, which
+ * reqiures IHSRR_IF_HVMODE.
+ *
+ * On bare metal POWER9 and later, Linux sets the LPCR[HVICE] bit such that
+ * external interrupts are delivered as Hypervisor Virtualization Interrupts
+ * rather than External Interrupts.
+ *
+ * Handling:
+ * This calls into Linux IRQ handler. NVGPRs are not saved to reduce overhead,
+ * because registers at the time of the interrupt are not so important as it is
+ * asynchronous.
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and clear MSR[EE] in the interrupted context.
+ *
+ * CFAR is not required because this is an asynchronous interrupt that in
+ * general won't have much bearing on the state of the CPU, with the possible
+ * exception of crash/debug IPIs, but those are generally moving to use SRESET
+ * IPIs. Unless this is an HV interrupt and KVM HV is possible, in which case
+ * it may be exiting the guest and need CFAR to be saved.
+ */
+INT_DEFINE_BEGIN(hardware_interrupt)
+	IVEC=0x500
+	IHSRR_IF_HVMODE=1
+	IMASK=IRQS_DISABLED
+	IKVM_REAL=1
+	IKVM_VIRT=1
+	ICFAR=0
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	ICFAR_IF_HVMODE=1
+#endif
+INT_DEFINE_END(hardware_interrupt)
+
+EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
+	GEN_INT_ENTRY hardware_interrupt, virt=0
+EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
+EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
+	GEN_INT_ENTRY hardware_interrupt, virt=1
+EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
+EXC_COMMON_BEGIN(hardware_interrupt_common)
+	GEN_COMMON hardware_interrupt
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(do_IRQ)
+	BEGIN_FTR_SECTION
+	b	interrupt_return_hsrr
+	FTR_SECTION_ELSE
+	b	interrupt_return_srr
+	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+
+
+/**
+ * Interrupt 0x600 - Alignment Interrupt
+ * This is a synchronous interrupt in response to data alignment fault.
+ */
+INT_DEFINE_BEGIN(alignment)
+	IVEC=0x600
+	IDAR=1
+	IDSISR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	IKVM_REAL=1
+#endif
+INT_DEFINE_END(alignment)
+
+EXC_REAL_BEGIN(alignment, 0x600, 0x100)
+	GEN_INT_ENTRY alignment, virt=0
+EXC_REAL_END(alignment, 0x600, 0x100)
+EXC_VIRT_BEGIN(alignment, 0x4600, 0x100)
+	GEN_INT_ENTRY alignment, virt=1
+EXC_VIRT_END(alignment, 0x4600, 0x100)
+EXC_COMMON_BEGIN(alignment_common)
+	GEN_COMMON alignment
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(alignment_exception)
+	HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
+	b	interrupt_return_srr
+
+
+/**
+ * Interrupt 0x700 - Program Interrupt (program check).
+ * This is a synchronous interrupt in response to various instruction faults:
+ * traps, privilege errors, TM errors, floating point exceptions.
+ *
+ * Handling:
+ * This interrupt may use the "emergency stack" in some cases when being taken
+ * from kernel context, which complicates handling.
+ */
+INT_DEFINE_BEGIN(program_check)
+	IVEC=0x700
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	IKVM_REAL=1
+#endif
+INT_DEFINE_END(program_check)
+
+EXC_REAL_BEGIN(program_check, 0x700, 0x100)
+	EARLY_BOOT_FIXUP
+	GEN_INT_ENTRY program_check, virt=0
+EXC_REAL_END(program_check, 0x700, 0x100)
+EXC_VIRT_BEGIN(program_check, 0x4700, 0x100)
+	GEN_INT_ENTRY program_check, virt=1
+EXC_VIRT_END(program_check, 0x4700, 0x100)
+EXC_COMMON_BEGIN(program_check_common)
+	__GEN_COMMON_ENTRY program_check
+
+	/*
+	 * It's possible to receive a TM Bad Thing type program check with
+	 * userspace register values (in particular r1), but with SRR1 reporting
+	 * that we came from the kernel. Normally that would confuse the bad
+	 * stack logic, and we would report a bad kernel stack pointer. Instead
+	 * we switch to the emergency stack if we're taking a TM Bad Thing from
+	 * the kernel.
+	 */
+
+	andi.	r10,r12,MSR_PR
+	bne	.Lnormal_stack		/* If userspace, go normal path */
+
+	andis.	r10,r12,(SRR1_PROGTM)@h
+	bne	.Lemergency_stack	/* If TM, emergency		*/
+
+	cmpdi	r1,-INT_FRAME_SIZE	/* check if r1 is in userspace	*/
+	blt	.Lnormal_stack		/* normal path if not		*/
+
+	/* Use the emergency stack					*/
+.Lemergency_stack:
+	andi.	r10,r12,MSR_PR		/* Set CR0 correctly for label	*/
+					/* 3 in EXCEPTION_PROLOG_COMMON	*/
+	mr	r10,r1			/* Save r1			*/
+	ld	r1,PACAEMERGSP(r13)	/* Use emergency stack		*/
+	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame		*/
+	__ISTACK(program_check)=0
+	__GEN_COMMON_BODY program_check
+	b .Ldo_program_check
+
+.Lnormal_stack:
+	__ISTACK(program_check)=1
+	__GEN_COMMON_BODY program_check
+
+.Ldo_program_check:
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(program_check_exception)
+	HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
+	b	interrupt_return_srr
+
+
+/*
+ * Interrupt 0x800 - Floating-Point Unavailable Interrupt.
+ * This is a synchronous interrupt in response to executing an fp instruction
+ * with MSR[FP]=0.
+ *
+ * Handling:
+ * This will load FP registers and enable the FP bit if coming from userspace,
+ * otherwise report a bad kernel use of FP.
+ */
+INT_DEFINE_BEGIN(fp_unavailable)
+	IVEC=0x800
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	IKVM_REAL=1
+#endif
+	IMSR_R12=1
+INT_DEFINE_END(fp_unavailable)
+
+EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100)
+	GEN_INT_ENTRY fp_unavailable, virt=0
+EXC_REAL_END(fp_unavailable, 0x800, 0x100)
+EXC_VIRT_BEGIN(fp_unavailable, 0x4800, 0x100)
+	GEN_INT_ENTRY fp_unavailable, virt=1
+EXC_VIRT_END(fp_unavailable, 0x4800, 0x100)
+EXC_COMMON_BEGIN(fp_unavailable_common)
+	GEN_COMMON fp_unavailable
+	bne	1f			/* if from user, just load it up */
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(kernel_fp_unavailable_exception)
+0:	trap
+	EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
+1:
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+	/* Test if 2 TM state bits are zero.  If non-zero (ie. userspace was in
+	 * transaction), go do TM stuff
+	 */
+	rldicl.	r0, r12, (64-MSR_TS_LG), (64-2)
+	bne-	2f
+END_FTR_SECTION_IFSET(CPU_FTR_TM)
+#endif
+	bl	CFUNC(load_up_fpu)
+	b	fast_interrupt_return_srr
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+2:	/* User process was in a transaction */
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(fp_unavailable_tm)
+	b	interrupt_return_srr
+#endif
+
+
+/**
+ * Interrupt 0x900 - Decrementer Interrupt.
+ * This is an asynchronous interrupt in response to a decrementer exception
+ * (e.g., DEC has wrapped below zero). It is maskable in hardware by clearing
+ * MSR[EE], and soft-maskable with IRQS_DISABLED mask (i.e.,
+ * local_irq_disable()).
+ *
+ * Handling:
+ * This calls into Linux timer handler. NVGPRs are not saved (see 0x500).
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and bump the decrementer to a high value, leaving MSR[EE] enabled
+ * in the interrupted context.
+ * If PPC_WATCHDOG is configured, the soft masked handler will actually set
+ * things back up to run soft_nmi_interrupt as a regular interrupt handler
+ * on the emergency stack.
+ *
+ * CFAR is not required because this is asynchronous (see hardware_interrupt).
+ * A watchdog interrupt may like to have CFAR, but usually the interesting
+ * branch is long gone by that point (e.g., infinite loop).
+ */
+INT_DEFINE_BEGIN(decrementer)
+	IVEC=0x900
+	IMASK=IRQS_DISABLED
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	IKVM_REAL=1
+#endif
+	ICFAR=0
+INT_DEFINE_END(decrementer)
+
+EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
+	GEN_INT_ENTRY decrementer, virt=0
+EXC_REAL_END(decrementer, 0x900, 0x80)
+EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
+	GEN_INT_ENTRY decrementer, virt=1
+EXC_VIRT_END(decrementer, 0x4900, 0x80)
+EXC_COMMON_BEGIN(decrementer_common)
+	GEN_COMMON decrementer
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(timer_interrupt)
+	b	interrupt_return_srr
+
+
+/**
+ * Interrupt 0x980 - Hypervisor Decrementer Interrupt.
+ * This is an asynchronous interrupt, similar to 0x900 but for the HDEC
+ * register.
+ *
+ * Handling:
+ * Linux does not use this outside KVM where it's used to keep a host timer
+ * while the guest is given control of DEC. It should normally be caught by
+ * the KVM test and routed there.
+ */
+INT_DEFINE_BEGIN(hdecrementer)
+	IVEC=0x980
+	IHSRR=1
+	ISTACK=0
+	IKVM_REAL=1
+	IKVM_VIRT=1
+INT_DEFINE_END(hdecrementer)
+
+EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80)
+	GEN_INT_ENTRY hdecrementer, virt=0
+EXC_REAL_END(hdecrementer, 0x980, 0x80)
+EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80)
+	GEN_INT_ENTRY hdecrementer, virt=1
+EXC_VIRT_END(hdecrementer, 0x4980, 0x80)
+EXC_COMMON_BEGIN(hdecrementer_common)
+	__GEN_COMMON_ENTRY hdecrementer
+	/*
+	 * Hypervisor decrementer interrupts not caught by the KVM test
+	 * shouldn't occur but are sometimes left pending on exit from a KVM
+	 * guest.  We don't need to do anything to clear them, as they are
+	 * edge-triggered.
+	 *
+	 * Be careful to avoid touching the kernel stack.
+	 */
+	li	r10,0
+	stb	r10,PACAHSRR_VALID(r13)
+	ld	r10,PACA_EXGEN+EX_CTR(r13)
+	mtctr	r10
+	mtcrf	0x80,r9
+	ld	r9,PACA_EXGEN+EX_R9(r13)
+	ld	r10,PACA_EXGEN+EX_R10(r13)
+	ld	r11,PACA_EXGEN+EX_R11(r13)
+	ld	r12,PACA_EXGEN+EX_R12(r13)
+	ld	r13,PACA_EXGEN+EX_R13(r13)
+	HRFI_TO_KERNEL
+
+
+/**
+ * Interrupt 0xa00 - Directed Privileged Doorbell Interrupt.
+ * This is an asynchronous interrupt in response to a msgsndp doorbell.
+ * It is maskable in hardware by clearing MSR[EE], and soft-maskable with
+ * IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * Handling:
+ * Guests may use this for IPIs between threads in a core if the
+ * hypervisor supports it. NVGPRS are not saved (see 0x500).
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, leaving MSR[EE] enabled in the interrupted context because the
+ * doorbells are edge triggered.
+ *
+ * CFAR is not required, similarly to hardware_interrupt.
+ */
+INT_DEFINE_BEGIN(doorbell_super)
+	IVEC=0xa00
+	IMASK=IRQS_DISABLED
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	IKVM_REAL=1
+#endif
+	ICFAR=0
+INT_DEFINE_END(doorbell_super)
+
+EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100)
+	GEN_INT_ENTRY doorbell_super, virt=0
+EXC_REAL_END(doorbell_super, 0xa00, 0x100)
+EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
+	GEN_INT_ENTRY doorbell_super, virt=1
+EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
+EXC_COMMON_BEGIN(doorbell_super_common)
+	GEN_COMMON doorbell_super
+	addi	r3,r1,STACK_INT_FRAME_REGS
+#ifdef CONFIG_PPC_DOORBELL
+	bl	CFUNC(doorbell_exception)
+#else
+	bl	CFUNC(unknown_async_exception)
+#endif
+	b	interrupt_return_srr
+
+
+EXC_REAL_NONE(0xb00, 0x100)
+EXC_VIRT_NONE(0x4b00, 0x100)
+
+/**
+ * Interrupt 0xc00 - System Call Interrupt (syscall, hcall).
+ * This is a synchronous interrupt invoked with the "sc" instruction. The
+ * system call is invoked with "sc 0" and does not alter the HV bit, so it
+ * is directed to the currently running OS. The hypercall is invoked with
+ * "sc 1" and it sets HV=1, so it elevates to hypervisor.
+ *
+ * In HPT, sc 1 always goes to 0xc00 real mode. In RADIX, sc 1 can go to
+ * 0x4c00 virtual mode.
+ *
+ * Handling:
+ * If the KVM test fires then it was due to a hypercall and is accordingly
+ * routed to KVM. Otherwise this executes a normal Linux system call.
+ *
+ * Call convention:
+ *
+ * syscall and hypercalls register conventions are documented in
+ * Documentation/powerpc/syscall64-abi.rst and
+ * Documentation/powerpc/papr_hcalls.rst respectively.
+ *
+ * The intersection of volatile registers that don't contain possible
+ * inputs is: cr0, xer, ctr. We may use these as scratch regs upon entry
+ * without saving, though xer is not a good idea to use, as hardware may
+ * interpret some bits so it may be costly to change them.
+ */
+INT_DEFINE_BEGIN(system_call)
+	IVEC=0xc00
+	IKVM_REAL=1
+	IKVM_VIRT=1
+	ICFAR=0
+INT_DEFINE_END(system_call)
+
+.macro SYSTEM_CALL virt
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+	/*
+	 * There is a little bit of juggling to get syscall and hcall
+	 * working well. Save r13 in ctr to avoid using SPRG scratch
+	 * register.
+	 *
+	 * Userspace syscalls have already saved the PPR, hcalls must save
+	 * it before setting HMT_MEDIUM.
+	 */
+	mtctr	r13
+	GET_PACA(r13)
+	std	r10,PACA_EXGEN+EX_R10(r13)
+	INTERRUPT_TO_KERNEL
+	KVMTEST system_call kvm_hcall /* uses r10, branch to kvm_hcall */
+	mfctr	r9
+#else
+	mr	r9,r13
+	GET_PACA(r13)
+	INTERRUPT_TO_KERNEL
+#endif
+
+#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
+BEGIN_FTR_SECTION
+	cmpdi	r0,0x1ebe
+	beq-	1f
+END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
+#endif
+
+	/* We reach here with PACA in r13, r13 in r9. */
+	mfspr	r11,SPRN_SRR0
+	mfspr	r12,SPRN_SRR1
+
+	HMT_MEDIUM
+
+	.if ! \virt
+	__LOAD_HANDLER(r10, system_call_common_real, real_vectors)
+	mtctr	r10
+	bctr
+	.else
+#ifdef CONFIG_RELOCATABLE
+	__LOAD_HANDLER(r10, system_call_common, virt_vectors)
+	mtctr	r10
+	bctr
+#else
+	b	system_call_common
+#endif
+	.endif
+
+#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
+	/* Fast LE/BE switch system call */
+1:	mfspr	r12,SPRN_SRR1
+	xori	r12,r12,MSR_LE
+	mtspr	SPRN_SRR1,r12
+	mr	r13,r9
+	RFI_TO_USER	/* return to userspace */
+	b	.	/* prevent speculative execution */
+#endif
+.endm
+
+EXC_REAL_BEGIN(system_call, 0xc00, 0x100)
+	SYSTEM_CALL 0
+EXC_REAL_END(system_call, 0xc00, 0x100)
+EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
+	SYSTEM_CALL 1
+EXC_VIRT_END(system_call, 0x4c00, 0x100)
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+TRAMP_REAL_BEGIN(kvm_hcall)
+	std	r9,PACA_EXGEN+EX_R9(r13)
+	std	r11,PACA_EXGEN+EX_R11(r13)
+	std	r12,PACA_EXGEN+EX_R12(r13)
+	mfcr	r9
+	mfctr	r10
+	std	r10,PACA_EXGEN+EX_R13(r13)
+	li	r10,0
+	std	r10,PACA_EXGEN+EX_CFAR(r13)
+	std	r10,PACA_EXGEN+EX_CTR(r13)
+	 /*
+	  * Save the PPR (on systems that support it) before changing to
+	  * HMT_MEDIUM. That allows the KVM code to save that value into the
+	  * guest state (it is the guest's PPR value).
+	  */
+BEGIN_FTR_SECTION
+	mfspr	r10,SPRN_PPR
+	std	r10,PACA_EXGEN+EX_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+	HMT_MEDIUM
+
+#ifdef CONFIG_RELOCATABLE
+	/*
+	 * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives
+	 * outside the head section.
+	 */
+	__LOAD_FAR_HANDLER(r10, kvmppc_hcall, real_trampolines)
+	mtctr   r10
+	bctr
+#else
+	b       kvmppc_hcall
+#endif
+#endif
+
+/**
+ * Interrupt 0xd00 - Trace Interrupt.
+ * This is a synchronous interrupt in response to instruction step or
+ * breakpoint faults.
+ */
+INT_DEFINE_BEGIN(single_step)
+	IVEC=0xd00
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	IKVM_REAL=1
+#endif
+INT_DEFINE_END(single_step)
+
+EXC_REAL_BEGIN(single_step, 0xd00, 0x100)
+	GEN_INT_ENTRY single_step, virt=0
+EXC_REAL_END(single_step, 0xd00, 0x100)
+EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100)
+	GEN_INT_ENTRY single_step, virt=1
+EXC_VIRT_END(single_step, 0x4d00, 0x100)
+EXC_COMMON_BEGIN(single_step_common)
+	GEN_COMMON single_step
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(single_step_exception)
+	b	interrupt_return_srr
+
+
+/**
+ * Interrupt 0xe00 - Hypervisor Data Storage Interrupt (HDSI).
+ * This is a synchronous interrupt in response to an MMU fault caused by a
+ * guest data access.
+ *
+ * Handling:
+ * This should always get routed to KVM. In radix MMU mode, this is caused
+ * by a guest nested radix access that can't be performed due to the
+ * partition scope page table. In hash mode, this can be caused by guests
+ * running with translation disabled (virtual real mode) or with VPM enabled.
+ * KVM will update the page table structures or disallow the access.
+ */
+INT_DEFINE_BEGIN(h_data_storage)
+	IVEC=0xe00
+	IHSRR=1
+	IDAR=1
+	IDSISR=1
+	IKVM_REAL=1
+	IKVM_VIRT=1
+INT_DEFINE_END(h_data_storage)
+
+EXC_REAL_BEGIN(h_data_storage, 0xe00, 0x20)
+	GEN_INT_ENTRY h_data_storage, virt=0, ool=1
+EXC_REAL_END(h_data_storage, 0xe00, 0x20)
+EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20)
+	GEN_INT_ENTRY h_data_storage, virt=1, ool=1
+EXC_VIRT_END(h_data_storage, 0x4e00, 0x20)
+EXC_COMMON_BEGIN(h_data_storage_common)
+	GEN_COMMON h_data_storage
+	addi    r3,r1,STACK_INT_FRAME_REGS
+BEGIN_MMU_FTR_SECTION
+	bl	CFUNC(do_bad_page_fault_segv)
+MMU_FTR_SECTION_ELSE
+	bl	CFUNC(unknown_exception)
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
+	b       interrupt_return_hsrr
+
+
+/**
+ * Interrupt 0xe20 - Hypervisor Instruction Storage Interrupt (HISI).
+ * This is a synchronous interrupt in response to an MMU fault caused by a
+ * guest instruction fetch, similar to HDSI.
+ */
+INT_DEFINE_BEGIN(h_instr_storage)
+	IVEC=0xe20
+	IHSRR=1
+	IKVM_REAL=1
+	IKVM_VIRT=1
+INT_DEFINE_END(h_instr_storage)
+
+EXC_REAL_BEGIN(h_instr_storage, 0xe20, 0x20)
+	GEN_INT_ENTRY h_instr_storage, virt=0, ool=1
+EXC_REAL_END(h_instr_storage, 0xe20, 0x20)
+EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20)
+	GEN_INT_ENTRY h_instr_storage, virt=1, ool=1
+EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20)
+EXC_COMMON_BEGIN(h_instr_storage_common)
+	GEN_COMMON h_instr_storage
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(unknown_exception)
+	b	interrupt_return_hsrr
+
+
+/**
+ * Interrupt 0xe40 - Hypervisor Emulation Assistance Interrupt.
+ */
+INT_DEFINE_BEGIN(emulation_assist)
+	IVEC=0xe40
+	IHSRR=1
+	IKVM_REAL=1
+	IKVM_VIRT=1
+INT_DEFINE_END(emulation_assist)
+
+EXC_REAL_BEGIN(emulation_assist, 0xe40, 0x20)
+	GEN_INT_ENTRY emulation_assist, virt=0, ool=1
+EXC_REAL_END(emulation_assist, 0xe40, 0x20)
+EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20)
+	GEN_INT_ENTRY emulation_assist, virt=1, ool=1
+EXC_VIRT_END(emulation_assist, 0x4e40, 0x20)
+EXC_COMMON_BEGIN(emulation_assist_common)
+	GEN_COMMON emulation_assist
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(emulation_assist_interrupt)
+	HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
+	b	interrupt_return_hsrr
+
+
+/**
+ * Interrupt 0xe60 - Hypervisor Maintenance Interrupt (HMI).
+ * This is an asynchronous interrupt caused by a Hypervisor Maintenance
+ * Exception. It is always taken in real mode but uses HSRR registers
+ * unlike SRESET and MCE.
+ *
+ * It is maskable in hardware by clearing MSR[EE], and partially soft-maskable
+ * with IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * Handling:
+ * This is a special case, this is handled similarly to machine checks, with an
+ * initial real mode handler that is not soft-masked, which attempts to fix the
+ * problem. Then a regular handler which is soft-maskable and reports the
+ * problem.
+ *
+ * The emergency stack is used for the early real mode handler.
+ *
+ * XXX: unclear why MCE and HMI schemes could not be made common, e.g.,
+ * either use soft-masking for the MCE, or use irq_work for the HMI.
+ *
+ * KVM:
+ * Unlike MCE, this calls into KVM without calling the real mode handler
+ * first.
+ */
+INT_DEFINE_BEGIN(hmi_exception_early)
+	IVEC=0xe60
+	IHSRR=1
+	IREALMODE_COMMON=1
+	ISTACK=0
+	IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
+	IKVM_REAL=1
+INT_DEFINE_END(hmi_exception_early)
+
+INT_DEFINE_BEGIN(hmi_exception)
+	IVEC=0xe60
+	IHSRR=1
+	IMASK=IRQS_DISABLED
+	IKVM_REAL=1
+INT_DEFINE_END(hmi_exception)
+
+EXC_REAL_BEGIN(hmi_exception, 0xe60, 0x20)
+	GEN_INT_ENTRY hmi_exception_early, virt=0, ool=1
+EXC_REAL_END(hmi_exception, 0xe60, 0x20)
+EXC_VIRT_NONE(0x4e60, 0x20)
+
+EXC_COMMON_BEGIN(hmi_exception_early_common)
+	__GEN_REALMODE_COMMON_ENTRY hmi_exception_early
+
+	mr	r10,r1			/* Save r1 */
+	ld	r1,PACAEMERGSP(r13)	/* Use emergency stack for realmode */
+	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame		*/
+
+	__GEN_COMMON_BODY hmi_exception_early
+
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(hmi_exception_realmode)
+	cmpdi	cr0,r3,0
+	bne	1f
+
+	EXCEPTION_RESTORE_REGS hsrr=1
+	HRFI_TO_USER_OR_KERNEL
+
+1:
+	/*
+	 * Go to virtual mode and pull the HMI event information from
+	 * firmware.
+	 */
+	EXCEPTION_RESTORE_REGS hsrr=1
+	GEN_INT_ENTRY hmi_exception, virt=0
+
+EXC_COMMON_BEGIN(hmi_exception_common)
+	GEN_COMMON hmi_exception
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(handle_hmi_exception)
+	b	interrupt_return_hsrr
+
+
+/**
+ * Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.
+ * This is an asynchronous interrupt in response to a msgsnd doorbell.
+ * Similar to the 0xa00 doorbell but for host rather than guest.
+ *
+ * CFAR is not required (similar to doorbell_interrupt), unless KVM HV
+ * is enabled, in which case it may be a guest exit. Most PowerNV kernels
+ * include KVM support so it would be nice if this could be dynamically
+ * patched out if KVM was not currently running any guests.
+ */
+INT_DEFINE_BEGIN(h_doorbell)
+	IVEC=0xe80
+	IHSRR=1
+	IMASK=IRQS_DISABLED
+	IKVM_REAL=1
+	IKVM_VIRT=1
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	ICFAR=0
+#endif
+INT_DEFINE_END(h_doorbell)
+
+EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20)
+	GEN_INT_ENTRY h_doorbell, virt=0, ool=1
+EXC_REAL_END(h_doorbell, 0xe80, 0x20)
+EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
+	GEN_INT_ENTRY h_doorbell, virt=1, ool=1
+EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
+EXC_COMMON_BEGIN(h_doorbell_common)
+	GEN_COMMON h_doorbell
+	addi	r3,r1,STACK_INT_FRAME_REGS
+#ifdef CONFIG_PPC_DOORBELL
+	bl	CFUNC(doorbell_exception)
+#else
+	bl	CFUNC(unknown_async_exception)
+#endif
+	b	interrupt_return_hsrr
+
+
+/**
+ * Interrupt 0xea0 - Hypervisor Virtualization Interrupt.
+ * This is an asynchronous interrupt in response to an "external exception".
+ * Similar to 0x500 but for host only.
+ *
+ * Like h_doorbell, CFAR is only required for KVM HV because this can be
+ * a guest exit.
+ */
+INT_DEFINE_BEGIN(h_virt_irq)
+	IVEC=0xea0
+	IHSRR=1
+	IMASK=IRQS_DISABLED
+	IKVM_REAL=1
+	IKVM_VIRT=1
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	ICFAR=0
+#endif
+INT_DEFINE_END(h_virt_irq)
+
+EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20)
+	GEN_INT_ENTRY h_virt_irq, virt=0, ool=1
+EXC_REAL_END(h_virt_irq, 0xea0, 0x20)
+EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
+	GEN_INT_ENTRY h_virt_irq, virt=1, ool=1
+EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
+EXC_COMMON_BEGIN(h_virt_irq_common)
+	GEN_COMMON h_virt_irq
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(do_IRQ)
+	b	interrupt_return_hsrr
+
+
+EXC_REAL_NONE(0xec0, 0x20)
+EXC_VIRT_NONE(0x4ec0, 0x20)
+EXC_REAL_NONE(0xee0, 0x20)
+EXC_VIRT_NONE(0x4ee0, 0x20)
+
+
+/*
+ * Interrupt 0xf00 - Performance Monitor Interrupt (PMI, PMU).
+ * This is an asynchronous interrupt in response to a PMU exception.
+ * It is maskable in hardware by clearing MSR[EE], and soft-maskable with
+ * IRQS_PMI_DISABLED mask (NOTE: NOT local_irq_disable()).
+ *
+ * Handling:
+ * This calls into the perf subsystem.
+ *
+ * Like the watchdog soft-nmi, it appears an NMI interrupt to Linux, in that it
+ * runs under local_irq_disable. However it may be soft-masked in
+ * powerpc-specific code.
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and clear MSR[EE] in the interrupted context.
+ *
+ * CFAR is not used by perf interrupts so not required.
+ */
+INT_DEFINE_BEGIN(performance_monitor)
+	IVEC=0xf00
+	IMASK=IRQS_PMI_DISABLED
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	IKVM_REAL=1
+#endif
+	ICFAR=0
+INT_DEFINE_END(performance_monitor)
+
+EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20)
+	GEN_INT_ENTRY performance_monitor, virt=0, ool=1
+EXC_REAL_END(performance_monitor, 0xf00, 0x20)
+EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
+	GEN_INT_ENTRY performance_monitor, virt=1, ool=1
+EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
+EXC_COMMON_BEGIN(performance_monitor_common)
+	GEN_COMMON performance_monitor
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	lbz	r4,PACAIRQSOFTMASK(r13)
+	cmpdi	r4,IRQS_ENABLED
+	bne	1f
+	bl	CFUNC(performance_monitor_exception_async)
+	b	interrupt_return_srr
+1:
+	bl	CFUNC(performance_monitor_exception_nmi)
+	/* Clear MSR_RI before setting SRR0 and SRR1. */
+	li	r9,0
+	mtmsrd	r9,1
+
+	kuap_kernel_restore r9, r10
+
+	EXCEPTION_RESTORE_REGS hsrr=0
+	RFI_TO_KERNEL
+
+/**
+ * Interrupt 0xf20 - Vector Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing a vector (or altivec) instruction with MSR[VEC]=0.
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(altivec_unavailable)
+	IVEC=0xf20
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	IKVM_REAL=1
+#endif
+	IMSR_R12=1
+INT_DEFINE_END(altivec_unavailable)
+
+EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20)
+	GEN_INT_ENTRY altivec_unavailable, virt=0, ool=1
+EXC_REAL_END(altivec_unavailable, 0xf20, 0x20)
+EXC_VIRT_BEGIN(altivec_unavailable, 0x4f20, 0x20)
+	GEN_INT_ENTRY altivec_unavailable, virt=1, ool=1
+EXC_VIRT_END(altivec_unavailable, 0x4f20, 0x20)
+EXC_COMMON_BEGIN(altivec_unavailable_common)
+	GEN_COMMON altivec_unavailable
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+	beq	1f
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+  BEGIN_FTR_SECTION_NESTED(69)
+	/* Test if 2 TM state bits are zero.  If non-zero (ie. userspace was in
+	 * transaction), go do TM stuff
+	 */
+	rldicl.	r0, r12, (64-MSR_TS_LG), (64-2)
+	bne-	2f
+  END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
+#endif
+	bl	CFUNC(load_up_altivec)
+	b	fast_interrupt_return_srr
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+2:	/* User process was in a transaction */
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(altivec_unavailable_tm)
+	b	interrupt_return_srr
+#endif
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(altivec_unavailable_exception)
+	b	interrupt_return_srr
+
+
+/**
+ * Interrupt 0xf40 - VSX Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing a VSX instruction with MSR[VSX]=0.
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(vsx_unavailable)
+	IVEC=0xf40
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	IKVM_REAL=1
+#endif
+	IMSR_R12=1
+INT_DEFINE_END(vsx_unavailable)
+
+EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20)
+	GEN_INT_ENTRY vsx_unavailable, virt=0, ool=1
+EXC_REAL_END(vsx_unavailable, 0xf40, 0x20)
+EXC_VIRT_BEGIN(vsx_unavailable, 0x4f40, 0x20)
+	GEN_INT_ENTRY vsx_unavailable, virt=1, ool=1
+EXC_VIRT_END(vsx_unavailable, 0x4f40, 0x20)
+EXC_COMMON_BEGIN(vsx_unavailable_common)
+	GEN_COMMON vsx_unavailable
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+	beq	1f
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+  BEGIN_FTR_SECTION_NESTED(69)
+	/* Test if 2 TM state bits are zero.  If non-zero (ie. userspace was in
+	 * transaction), go do TM stuff
+	 */
+	rldicl.	r0, r12, (64-MSR_TS_LG), (64-2)
+	bne-	2f
+  END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
+#endif
+	b	load_up_vsx
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+2:	/* User process was in a transaction */
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(vsx_unavailable_tm)
+	b	interrupt_return_srr
+#endif
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(vsx_unavailable_exception)
+	b	interrupt_return_srr
+
+
+/**
+ * Interrupt 0xf60 - Facility Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing an instruction without access to the facility that can be
+ * resolved by the OS (e.g., FSCR, MSR).
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(facility_unavailable)
+	IVEC=0xf60
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	IKVM_REAL=1
+#endif
+INT_DEFINE_END(facility_unavailable)
+
+EXC_REAL_BEGIN(facility_unavailable, 0xf60, 0x20)
+	GEN_INT_ENTRY facility_unavailable, virt=0, ool=1
+EXC_REAL_END(facility_unavailable, 0xf60, 0x20)
+EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20)
+	GEN_INT_ENTRY facility_unavailable, virt=1, ool=1
+EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20)
+EXC_COMMON_BEGIN(facility_unavailable_common)
+	GEN_COMMON facility_unavailable
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(facility_unavailable_exception)
+	HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
+	b	interrupt_return_srr
+
+
+/**
+ * Interrupt 0xf60 - Hypervisor Facility Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing an instruction without access to the facility that can only
+ * be resolved in HV mode (e.g., HFSCR).
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(h_facility_unavailable)
+	IVEC=0xf80
+	IHSRR=1
+	IKVM_REAL=1
+	IKVM_VIRT=1
+INT_DEFINE_END(h_facility_unavailable)
+
+EXC_REAL_BEGIN(h_facility_unavailable, 0xf80, 0x20)
+	GEN_INT_ENTRY h_facility_unavailable, virt=0, ool=1
+EXC_REAL_END(h_facility_unavailable, 0xf80, 0x20)
+EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20)
+	GEN_INT_ENTRY h_facility_unavailable, virt=1, ool=1
+EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20)
+EXC_COMMON_BEGIN(h_facility_unavailable_common)
+	GEN_COMMON h_facility_unavailable
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(facility_unavailable_exception)
+	/* XXX Shouldn't be necessary in practice */
+	HANDLER_RESTORE_NVGPRS()
+	b	interrupt_return_hsrr
+
+
+EXC_REAL_NONE(0xfa0, 0x20)
+EXC_VIRT_NONE(0x4fa0, 0x20)
+EXC_REAL_NONE(0xfc0, 0x20)
+EXC_VIRT_NONE(0x4fc0, 0x20)
+EXC_REAL_NONE(0xfe0, 0x20)
+EXC_VIRT_NONE(0x4fe0, 0x20)
+
+EXC_REAL_NONE(0x1000, 0x100)
+EXC_VIRT_NONE(0x5000, 0x100)
+EXC_REAL_NONE(0x1100, 0x100)
+EXC_VIRT_NONE(0x5100, 0x100)
+
+#ifdef CONFIG_CBE_RAS
+INT_DEFINE_BEGIN(cbe_system_error)
+	IVEC=0x1200
+	IHSRR=1
+INT_DEFINE_END(cbe_system_error)
+
+EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100)
+	GEN_INT_ENTRY cbe_system_error, virt=0
+EXC_REAL_END(cbe_system_error, 0x1200, 0x100)
+EXC_VIRT_NONE(0x5200, 0x100)
+EXC_COMMON_BEGIN(cbe_system_error_common)
+	GEN_COMMON cbe_system_error
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(cbe_system_error_exception)
+	b	interrupt_return_hsrr
+
+#else /* CONFIG_CBE_RAS */
+EXC_REAL_NONE(0x1200, 0x100)
+EXC_VIRT_NONE(0x5200, 0x100)
+#endif
+
+/**
+ * Interrupt 0x1300 - Instruction Address Breakpoint Interrupt.
+ * This has been removed from the ISA before 2.01, which is the earliest
+ * 64-bit BookS ISA supported, however the G5 / 970 implements this
+ * interrupt with a non-architected feature available through the support
+ * processor interface.
+ */
+INT_DEFINE_BEGIN(instruction_breakpoint)
+	IVEC=0x1300
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	IKVM_REAL=1
+#endif
+INT_DEFINE_END(instruction_breakpoint)
+
+EXC_REAL_BEGIN(instruction_breakpoint, 0x1300, 0x100)
+	GEN_INT_ENTRY instruction_breakpoint, virt=0
+EXC_REAL_END(instruction_breakpoint, 0x1300, 0x100)
+EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100)
+	GEN_INT_ENTRY instruction_breakpoint, virt=1
+EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100)
+EXC_COMMON_BEGIN(instruction_breakpoint_common)
+	GEN_COMMON instruction_breakpoint
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(instruction_breakpoint_exception)
+	b	interrupt_return_srr
+
+
+EXC_REAL_NONE(0x1400, 0x100)
+EXC_VIRT_NONE(0x5400, 0x100)
+
+/**
+ * Interrupt 0x1500 - Soft Patch Interrupt
+ *
+ * Handling:
+ * This is an implementation specific interrupt which can be used for a
+ * range of exceptions.
+ *
+ * This interrupt handler is unique in that it runs the denormal assist
+ * code even for guests (and even in guest context) without going to KVM,
+ * for speed. POWER9 does not raise denorm exceptions, so this special case
+ * could be phased out in future to reduce special cases.
+ */
+INT_DEFINE_BEGIN(denorm_exception)
+	IVEC=0x1500
+	IHSRR=1
+	IBRANCH_TO_COMMON=0
+	IKVM_REAL=1
+INT_DEFINE_END(denorm_exception)
+
+EXC_REAL_BEGIN(denorm_exception, 0x1500, 0x100)
+	GEN_INT_ENTRY denorm_exception, virt=0
+#ifdef CONFIG_PPC_DENORMALISATION
+	andis.	r10,r12,(HSRR1_DENORM)@h /* denorm? */
+	bne+	denorm_assist
+#endif
+	GEN_BRANCH_TO_COMMON denorm_exception, virt=0
+EXC_REAL_END(denorm_exception, 0x1500, 0x100)
+#ifdef CONFIG_PPC_DENORMALISATION
+EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100)
+	GEN_INT_ENTRY denorm_exception, virt=1
+	andis.	r10,r12,(HSRR1_DENORM)@h /* denorm? */
+	bne+	denorm_assist
+	GEN_BRANCH_TO_COMMON denorm_exception, virt=1
+EXC_VIRT_END(denorm_exception, 0x5500, 0x100)
+#else
+EXC_VIRT_NONE(0x5500, 0x100)
+#endif
+
+#ifdef CONFIG_PPC_DENORMALISATION
+TRAMP_REAL_BEGIN(denorm_assist)
+BEGIN_FTR_SECTION
+/*
+ * To denormalise we need to move a copy of the register to itself.
+ * For POWER6 do that here for all FP regs.
+ */
+	mfmsr	r10
+	ori	r10,r10,(MSR_FP|MSR_FE0|MSR_FE1)
+	xori	r10,r10,(MSR_FE0|MSR_FE1)
+	mtmsrd	r10
+	sync
+
+	.Lreg=0
+	.rept 32
+	fmr	.Lreg,.Lreg
+	.Lreg=.Lreg+1
+	.endr
+
+FTR_SECTION_ELSE
+/*
+ * To denormalise we need to move a copy of the register to itself.
+ * For POWER7 do that here for the first 32 VSX registers only.
+ */
+	mfmsr	r10
+	oris	r10,r10,MSR_VSX@h
+	mtmsrd	r10
+	sync
+
+	.Lreg=0
+	.rept 32
+	XVCPSGNDP(.Lreg,.Lreg,.Lreg)
+	.Lreg=.Lreg+1
+	.endr
+
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206)
+
+BEGIN_FTR_SECTION
+	b	denorm_done
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+/*
+ * To denormalise we need to move a copy of the register to itself.
+ * For POWER8 we need to do that for all 64 VSX registers
+ */
+	.Lreg=32
+	.rept 32
+	XVCPSGNDP(.Lreg,.Lreg,.Lreg)
+	.Lreg=.Lreg+1
+	.endr
+
+denorm_done:
+	mfspr	r11,SPRN_HSRR0
+	subi	r11,r11,4
+	mtspr	SPRN_HSRR0,r11
+	mtcrf	0x80,r9
+	ld	r9,PACA_EXGEN+EX_R9(r13)
+BEGIN_FTR_SECTION
+	ld	r10,PACA_EXGEN+EX_PPR(r13)
+	mtspr	SPRN_PPR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+BEGIN_FTR_SECTION
+	ld	r10,PACA_EXGEN+EX_CFAR(r13)
+	mtspr	SPRN_CFAR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+	li	r10,0
+	stb	r10,PACAHSRR_VALID(r13)
+	ld	r10,PACA_EXGEN+EX_R10(r13)
+	ld	r11,PACA_EXGEN+EX_R11(r13)
+	ld	r12,PACA_EXGEN+EX_R12(r13)
+	ld	r13,PACA_EXGEN+EX_R13(r13)
+	HRFI_TO_UNKNOWN
+	b	.
+#endif
+
+EXC_COMMON_BEGIN(denorm_exception_common)
+	GEN_COMMON denorm_exception
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(unknown_exception)
+	b	interrupt_return_hsrr
+
+
+#ifdef CONFIG_CBE_RAS
+INT_DEFINE_BEGIN(cbe_maintenance)
+	IVEC=0x1600
+	IHSRR=1
+INT_DEFINE_END(cbe_maintenance)
+
+EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100)
+	GEN_INT_ENTRY cbe_maintenance, virt=0
+EXC_REAL_END(cbe_maintenance, 0x1600, 0x100)
+EXC_VIRT_NONE(0x5600, 0x100)
+EXC_COMMON_BEGIN(cbe_maintenance_common)
+	GEN_COMMON cbe_maintenance
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(cbe_maintenance_exception)
+	b	interrupt_return_hsrr
+
+#else /* CONFIG_CBE_RAS */
+EXC_REAL_NONE(0x1600, 0x100)
+EXC_VIRT_NONE(0x5600, 0x100)
+#endif
+
+
+INT_DEFINE_BEGIN(altivec_assist)
+	IVEC=0x1700
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	IKVM_REAL=1
+#endif
+INT_DEFINE_END(altivec_assist)
+
+EXC_REAL_BEGIN(altivec_assist, 0x1700, 0x100)
+	GEN_INT_ENTRY altivec_assist, virt=0
+EXC_REAL_END(altivec_assist, 0x1700, 0x100)
+EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100)
+	GEN_INT_ENTRY altivec_assist, virt=1
+EXC_VIRT_END(altivec_assist, 0x5700, 0x100)
+EXC_COMMON_BEGIN(altivec_assist_common)
+	GEN_COMMON altivec_assist
+	addi	r3,r1,STACK_INT_FRAME_REGS
+#ifdef CONFIG_ALTIVEC
+	bl	CFUNC(altivec_assist_exception)
+	HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
+#else
+	bl	CFUNC(unknown_exception)
+#endif
+	b	interrupt_return_srr
+
+
+#ifdef CONFIG_CBE_RAS
+INT_DEFINE_BEGIN(cbe_thermal)
+	IVEC=0x1800
+	IHSRR=1
+INT_DEFINE_END(cbe_thermal)
+
+EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100)
+	GEN_INT_ENTRY cbe_thermal, virt=0
+EXC_REAL_END(cbe_thermal, 0x1800, 0x100)
+EXC_VIRT_NONE(0x5800, 0x100)
+EXC_COMMON_BEGIN(cbe_thermal_common)
+	GEN_COMMON cbe_thermal
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(cbe_thermal_exception)
+	b	interrupt_return_hsrr
+
+#else /* CONFIG_CBE_RAS */
+EXC_REAL_NONE(0x1800, 0x100)
+EXC_VIRT_NONE(0x5800, 0x100)
+#endif
+
+
+#ifdef CONFIG_PPC_WATCHDOG
+
+INT_DEFINE_BEGIN(soft_nmi)
+	IVEC=0x900
+	ISTACK=0
+	ICFAR=0
+INT_DEFINE_END(soft_nmi)
+
+/*
+ * Branch to soft_nmi_interrupt using the emergency stack. The emergency
+ * stack is one that is usable by maskable interrupts so long as MSR_EE
+ * remains off. It is used for recovery when something has corrupted the
+ * normal kernel stack, for example. The "soft NMI" must not use the process
+ * stack because we want irq disabled sections to avoid touching the stack
+ * at all (other than PMU interrupts), so use the emergency stack for this,
+ * and run it entirely with interrupts hard disabled.
+ */
+EXC_COMMON_BEGIN(soft_nmi_common)
+	mr	r10,r1
+	ld	r1,PACAEMERGSP(r13)
+	subi	r1,r1,INT_FRAME_SIZE
+	__GEN_COMMON_BODY soft_nmi
+
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(soft_nmi_interrupt)
+
+	/* Clear MSR_RI before setting SRR0 and SRR1. */
+	li	r9,0
+	mtmsrd	r9,1
+
+	kuap_kernel_restore r9, r10
+
+	EXCEPTION_RESTORE_REGS hsrr=0
+	RFI_TO_KERNEL
+
+#endif /* CONFIG_PPC_WATCHDOG */
+
+/*
+ * An interrupt came in while soft-disabled. We set paca->irq_happened, then:
+ * - If it was a decrementer interrupt, we bump the dec to max and return.
+ * - If it was a doorbell we return immediately since doorbells are edge
+ *   triggered and won't automatically refire.
+ * - If it was a HMI we return immediately since we handled it in realmode
+ *   and it won't refire.
+ * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
+ * This is called with r10 containing the value to OR to the paca field.
+ */
+.macro MASKED_INTERRUPT hsrr=0
+	.if \hsrr
+masked_Hinterrupt:
+	.else
+masked_interrupt:
+	.endif
+	stw	r9,PACA_EXGEN+EX_CCR(r13)
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+	/*
+	 * Ensure there was no previous MUST_HARD_MASK interrupt or
+	 * HARD_DIS setting. If this does fire, the interrupt is still
+	 * masked and MSR[EE] will be cleared on return, so no need to
+	 * panic, but somebody probably enabled MSR[EE] under
+	 * PACA_IRQ_HARD_DIS, mtmsr(mfmsr() | MSR_x) being a common
+	 * cause.
+	 */
+	lbz	r9,PACAIRQHAPPENED(r13)
+	andi.	r9,r9,(PACA_IRQ_MUST_HARD_MASK|PACA_IRQ_HARD_DIS)
+0:	tdnei	r9,0
+	EMIT_WARN_ENTRY 0b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+#endif
+	lbz	r9,PACAIRQHAPPENED(r13)
+	or	r9,r9,r10
+	stb	r9,PACAIRQHAPPENED(r13)
+
+	.if ! \hsrr
+	cmpwi	r10,PACA_IRQ_DEC
+	bne	1f
+	LOAD_REG_IMMEDIATE(r9, 0x7fffffff)
+	mtspr	SPRN_DEC,r9
+#ifdef CONFIG_PPC_WATCHDOG
+	lwz	r9,PACA_EXGEN+EX_CCR(r13)
+	b	soft_nmi_common
+#else
+	b	2f
+#endif
+	.endif
+
+1:	andi.	r10,r10,PACA_IRQ_MUST_HARD_MASK
+	beq	2f
+	xori	r12,r12,MSR_EE	/* clear MSR_EE */
+	.if \hsrr
+	mtspr	SPRN_HSRR1,r12
+	.else
+	mtspr	SPRN_SRR1,r12
+	.endif
+	ori	r9,r9,PACA_IRQ_HARD_DIS
+	stb	r9,PACAIRQHAPPENED(r13)
+2:	/* done */
+	li	r9,0
+	.if \hsrr
+	stb	r9,PACAHSRR_VALID(r13)
+	.else
+	stb	r9,PACASRR_VALID(r13)
+	.endif
+
+	SEARCH_RESTART_TABLE
+	cmpdi	r12,0
+	beq	3f
+	.if \hsrr
+	mtspr	SPRN_HSRR0,r12
+	.else
+	mtspr	SPRN_SRR0,r12
+	.endif
+3:
+
+	ld	r9,PACA_EXGEN+EX_CTR(r13)
+	mtctr	r9
+	lwz	r9,PACA_EXGEN+EX_CCR(r13)
+	mtcrf	0x80,r9
+	std	r1,PACAR1(r13)
+	ld	r9,PACA_EXGEN+EX_R9(r13)
+	ld	r10,PACA_EXGEN+EX_R10(r13)
+	ld	r11,PACA_EXGEN+EX_R11(r13)
+	ld	r12,PACA_EXGEN+EX_R12(r13)
+	ld	r13,PACA_EXGEN+EX_R13(r13)
+	/* May return to masked low address where r13 is not set up */
+	.if \hsrr
+	HRFI_TO_KERNEL
+	.else
+	RFI_TO_KERNEL
+	.endif
+	b	.
+.endm
+
+TRAMP_REAL_BEGIN(stf_barrier_fallback)
+	std	r9,PACA_EXRFI+EX_R9(r13)
+	std	r10,PACA_EXRFI+EX_R10(r13)
+	sync
+	ld	r9,PACA_EXRFI+EX_R9(r13)
+	ld	r10,PACA_EXRFI+EX_R10(r13)
+	ori	31,31,0
+	.rept 14
+	b	1f
+1:
+	.endr
+	blr
+
+/* Clobbers r10, r11, ctr */
+.macro L1D_DISPLACEMENT_FLUSH
+	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+	ld	r11,PACA_L1D_FLUSH_SIZE(r13)
+	srdi	r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
+	mtctr	r11
+	DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+	/* order ld/st prior to dcbt stop all streams with flushing */
+	sync
+
+	/*
+	 * The load addresses are at staggered offsets within cachelines,
+	 * which suits some pipelines better (on others it should not
+	 * hurt).
+	 */
+1:
+	ld	r11,(0x80 + 8)*0(r10)
+	ld	r11,(0x80 + 8)*1(r10)
+	ld	r11,(0x80 + 8)*2(r10)
+	ld	r11,(0x80 + 8)*3(r10)
+	ld	r11,(0x80 + 8)*4(r10)
+	ld	r11,(0x80 + 8)*5(r10)
+	ld	r11,(0x80 + 8)*6(r10)
+	ld	r11,(0x80 + 8)*7(r10)
+	addi	r10,r10,0x80*8
+	bdnz	1b
+.endm
+
+TRAMP_REAL_BEGIN(entry_flush_fallback)
+	std	r9,PACA_EXRFI+EX_R9(r13)
+	std	r10,PACA_EXRFI+EX_R10(r13)
+	std	r11,PACA_EXRFI+EX_R11(r13)
+	mfctr	r9
+	L1D_DISPLACEMENT_FLUSH
+	mtctr	r9
+	ld	r9,PACA_EXRFI+EX_R9(r13)
+	ld	r10,PACA_EXRFI+EX_R10(r13)
+	ld	r11,PACA_EXRFI+EX_R11(r13)
+	blr
+
+/*
+ * The SCV entry flush happens with interrupts enabled, so it must disable
+ * to prevent EXRFI being clobbered by NMIs (e.g., soft_nmi_common). r10
+ * (containing LR) does not need to be preserved here because scv entry
+ * puts 0 in the pt_regs, CTR can be clobbered for the same reason.
+ */
+TRAMP_REAL_BEGIN(scv_entry_flush_fallback)
+	li	r10,0
+	mtmsrd	r10,1
+	lbz	r10,PACAIRQHAPPENED(r13)
+	ori	r10,r10,PACA_IRQ_HARD_DIS
+	stb	r10,PACAIRQHAPPENED(r13)
+	std	r11,PACA_EXRFI+EX_R11(r13)
+	L1D_DISPLACEMENT_FLUSH
+	ld	r11,PACA_EXRFI+EX_R11(r13)
+	li	r10,MSR_RI
+	mtmsrd	r10,1
+	blr
+
+TRAMP_REAL_BEGIN(rfi_flush_fallback)
+	SET_SCRATCH0(r13);
+	GET_PACA(r13);
+	std	r1,PACA_EXRFI+EX_R12(r13)
+	ld	r1,PACAKSAVE(r13)
+	std	r9,PACA_EXRFI+EX_R9(r13)
+	std	r10,PACA_EXRFI+EX_R10(r13)
+	std	r11,PACA_EXRFI+EX_R11(r13)
+	mfctr	r9
+	L1D_DISPLACEMENT_FLUSH
+	mtctr	r9
+	ld	r9,PACA_EXRFI+EX_R9(r13)
+	ld	r10,PACA_EXRFI+EX_R10(r13)
+	ld	r11,PACA_EXRFI+EX_R11(r13)
+	ld	r1,PACA_EXRFI+EX_R12(r13)
+	GET_SCRATCH0(r13);
+	rfid
+
+TRAMP_REAL_BEGIN(hrfi_flush_fallback)
+	SET_SCRATCH0(r13);
+	GET_PACA(r13);
+	std	r1,PACA_EXRFI+EX_R12(r13)
+	ld	r1,PACAKSAVE(r13)
+	std	r9,PACA_EXRFI+EX_R9(r13)
+	std	r10,PACA_EXRFI+EX_R10(r13)
+	std	r11,PACA_EXRFI+EX_R11(r13)
+	mfctr	r9
+	L1D_DISPLACEMENT_FLUSH
+	mtctr	r9
+	ld	r9,PACA_EXRFI+EX_R9(r13)
+	ld	r10,PACA_EXRFI+EX_R10(r13)
+	ld	r11,PACA_EXRFI+EX_R11(r13)
+	ld	r1,PACA_EXRFI+EX_R12(r13)
+	GET_SCRATCH0(r13);
+	hrfid
+
+TRAMP_REAL_BEGIN(rfscv_flush_fallback)
+	/* system call volatile */
+	mr	r7,r13
+	GET_PACA(r13);
+	mr	r8,r1
+	ld	r1,PACAKSAVE(r13)
+	mfctr	r9
+	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+	ld	r11,PACA_L1D_FLUSH_SIZE(r13)
+	srdi	r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
+	mtctr	r11
+	DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+	/* order ld/st prior to dcbt stop all streams with flushing */
+	sync
+
+	/*
+	 * The load adresses are at staggered offsets within cachelines,
+	 * which suits some pipelines better (on others it should not
+	 * hurt).
+	 */
+1:
+	ld	r11,(0x80 + 8)*0(r10)
+	ld	r11,(0x80 + 8)*1(r10)
+	ld	r11,(0x80 + 8)*2(r10)
+	ld	r11,(0x80 + 8)*3(r10)
+	ld	r11,(0x80 + 8)*4(r10)
+	ld	r11,(0x80 + 8)*5(r10)
+	ld	r11,(0x80 + 8)*6(r10)
+	ld	r11,(0x80 + 8)*7(r10)
+	addi	r10,r10,0x80*8
+	bdnz	1b
+
+	mtctr	r9
+	li	r9,0
+	li	r10,0
+	li	r11,0
+	mr	r1,r8
+	mr	r13,r7
+	RFSCV
+
+USE_TEXT_SECTION()
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+kvm_interrupt:
+	/*
+	 * The conditional branch in KVMTEST can't reach all the way,
+	 * make a stub.
+	 */
+	b	kvmppc_interrupt
+#endif
+
+_GLOBAL(do_uaccess_flush)
+	UACCESS_FLUSH_FIXUP_SECTION
+	nop
+	nop
+	nop
+	blr
+	L1D_DISPLACEMENT_FLUSH
+	blr
+_ASM_NOKPROBE_SYMBOL(do_uaccess_flush)
+EXPORT_SYMBOL(do_uaccess_flush)
+
+
+MASKED_INTERRUPT
+MASKED_INTERRUPT hsrr=1
+
+USE_FIXED_SECTION(virt_trampolines)
+	/*
+	 * All code below __end_soft_masked is treated as soft-masked. If
+	 * any code runs here with MSR[EE]=1, it must then cope with pending
+	 * soft interrupt being raised (i.e., by ensuring it is replayed).
+	 *
+	 * The __end_interrupts marker must be past the out-of-line (OOL)
+	 * handlers, so that they are copied to real address 0x100 when running
+	 * a relocatable kernel. This ensures they can be reached from the short
+	 * trampoline handlers (like 0x4f00, 0x4f20, etc.) which branch
+	 * directly, without using LOAD_HANDLER().
+	 */
+	.align	7
+	.globl	__end_interrupts
+__end_interrupts:
+DEFINE_FIXED_SYMBOL(__end_interrupts, virt_trampolines)
+
+CLOSE_FIXED_SECTION(real_vectors);
+CLOSE_FIXED_SECTION(real_trampolines);
+CLOSE_FIXED_SECTION(virt_vectors);
+CLOSE_FIXED_SECTION(virt_trampolines);
+
+USE_TEXT_SECTION()
+
+/* MSR[RI] should be clear because this uses SRR[01] */
+_GLOBAL(enable_machine_check)
+	mflr	r0
+	bcl	20,31,$+4
+0:	mflr	r3
+	addi	r3,r3,(1f - 0b)
+	mtspr	SPRN_SRR0,r3
+	mfmsr	r3
+	ori	r3,r3,MSR_ME
+	mtspr	SPRN_SRR1,r3
+	RFI_TO_KERNEL
+1:	mtlr	r0
+	blr
+
+/* MSR[RI] should be clear because this uses SRR[01] */
+SYM_FUNC_START_LOCAL(disable_machine_check)
+	mflr	r0
+	bcl	20,31,$+4
+0:	mflr	r3
+	addi	r3,r3,(1f - 0b)
+	mtspr	SPRN_SRR0,r3
+	mfmsr	r3
+	li	r4,MSR_ME
+	andc	r3,r3,r4
+	mtspr	SPRN_SRR1,r3
+	RFI_TO_KERNEL
+1:	mtlr	r0
+	blr
+SYM_FUNC_END(disable_machine_check)
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
new file mode 100644
index 0000000000..3ff2da7b12
--- /dev/null
+++ b/arch/powerpc/kernel/fadump.c
@@ -0,0 +1,1742 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Firmware Assisted dump: A robust mechanism to get reliable kernel crash
+ * dump with assistance from firmware. This approach does not use kexec,
+ * instead firmware assists in booting the kdump kernel while preserving
+ * memory contents. The most of the code implementation has been adapted
+ * from phyp assisted dump implementation written by Linas Vepstas and
+ * Manish Ahuja
+ *
+ * Copyright 2011 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "fadump: " fmt
+
+#include <linux/string.h>
+#include <linux/memblock.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/crash_dump.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/slab.h>
+#include <linux/cma.h>
+#include <linux/hugetlb.h>
+#include <linux/debugfs.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+
+#include <asm/page.h>
+#include <asm/fadump.h>
+#include <asm/fadump-internal.h>
+#include <asm/setup.h>
+#include <asm/interrupt.h>
+
+/*
+ * The CPU who acquired the lock to trigger the fadump crash should
+ * wait for other CPUs to enter.
+ *
+ * The timeout is in milliseconds.
+ */
+#define CRASH_TIMEOUT		500
+
+static struct fw_dump fw_dump;
+
+static void __init fadump_reserve_crash_area(u64 base);
+
+#ifndef CONFIG_PRESERVE_FA_DUMP
+
+static struct kobject *fadump_kobj;
+
+static atomic_t cpus_in_fadump;
+static DEFINE_MUTEX(fadump_mutex);
+
+static struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false };
+
+#define RESERVED_RNGS_SZ	16384 /* 16K - 128 entries */
+#define RESERVED_RNGS_CNT	(RESERVED_RNGS_SZ / \
+				 sizeof(struct fadump_memory_range))
+static struct fadump_memory_range rngs[RESERVED_RNGS_CNT];
+static struct fadump_mrange_info
+reserved_mrange_info = { "reserved", rngs, RESERVED_RNGS_SZ, 0, RESERVED_RNGS_CNT, true };
+
+static void __init early_init_dt_scan_reserved_ranges(unsigned long node);
+
+#ifdef CONFIG_CMA
+static struct cma *fadump_cma;
+
+/*
+ * fadump_cma_init() - Initialize CMA area from a fadump reserved memory
+ *
+ * This function initializes CMA area from fadump reserved memory.
+ * The total size of fadump reserved memory covers for boot memory size
+ * + cpu data size + hpte size and metadata.
+ * Initialize only the area equivalent to boot memory size for CMA use.
+ * The remaining portion of fadump reserved memory will be not given
+ * to CMA and pages for those will stay reserved. boot memory size is
+ * aligned per CMA requirement to satisy cma_init_reserved_mem() call.
+ * But for some reason even if it fails we still have the memory reservation
+ * with us and we can still continue doing fadump.
+ */
+static int __init fadump_cma_init(void)
+{
+	unsigned long long base, size;
+	int rc;
+
+	if (!fw_dump.fadump_enabled)
+		return 0;
+
+	/*
+	 * Do not use CMA if user has provided fadump=nocma kernel parameter.
+	 * Return 1 to continue with fadump old behaviour.
+	 */
+	if (fw_dump.nocma)
+		return 1;
+
+	base = fw_dump.reserve_dump_area_start;
+	size = fw_dump.boot_memory_size;
+
+	if (!size)
+		return 0;
+
+	rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma);
+	if (rc) {
+		pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc);
+		/*
+		 * Though the CMA init has failed we still have memory
+		 * reservation with us. The reserved memory will be
+		 * blocked from production system usage.  Hence return 1,
+		 * so that we can continue with fadump.
+		 */
+		return 1;
+	}
+
+	/*
+	 *  If CMA activation fails, keep the pages reserved, instead of
+	 *  exposing them to buddy allocator. Same as 'fadump=nocma' case.
+	 */
+	cma_reserve_pages_on_error(fadump_cma);
+
+	/*
+	 * So we now have successfully initialized cma area for fadump.
+	 */
+	pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx "
+		"bytes of memory reserved for firmware-assisted dump\n",
+		cma_get_size(fadump_cma),
+		(unsigned long)cma_get_base(fadump_cma) >> 20,
+		fw_dump.reserve_dump_area_size);
+	return 1;
+}
+#else
+static int __init fadump_cma_init(void) { return 1; }
+#endif /* CONFIG_CMA */
+
+/* Scan the Firmware Assisted dump configuration details. */
+int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+				      int depth, void *data)
+{
+	if (depth == 0) {
+		early_init_dt_scan_reserved_ranges(node);
+		return 0;
+	}
+
+	if (depth != 1)
+		return 0;
+
+	if (strcmp(uname, "rtas") == 0) {
+		rtas_fadump_dt_scan(&fw_dump, node);
+		return 1;
+	}
+
+	if (strcmp(uname, "ibm,opal") == 0) {
+		opal_fadump_dt_scan(&fw_dump, node);
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * If fadump is registered, check if the memory provided
+ * falls within boot memory area and reserved memory area.
+ */
+int is_fadump_memory_area(u64 addr, unsigned long size)
+{
+	u64 d_start, d_end;
+
+	if (!fw_dump.dump_registered)
+		return 0;
+
+	if (!size)
+		return 0;
+
+	d_start = fw_dump.reserve_dump_area_start;
+	d_end = d_start + fw_dump.reserve_dump_area_size;
+	if (((addr + size) > d_start) && (addr <= d_end))
+		return 1;
+
+	return (addr <= fw_dump.boot_mem_top);
+}
+
+int should_fadump_crash(void)
+{
+	if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
+		return 0;
+	return 1;
+}
+
+int is_fadump_active(void)
+{
+	return fw_dump.dump_active;
+}
+
+/*
+ * Returns true, if there are no holes in memory area between d_start to d_end,
+ * false otherwise.
+ */
+static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
+{
+	phys_addr_t reg_start, reg_end;
+	bool ret = false;
+	u64 i, start, end;
+
+	for_each_mem_range(i, &reg_start, &reg_end) {
+		start = max_t(u64, d_start, reg_start);
+		end = min_t(u64, d_end, reg_end);
+		if (d_start < end) {
+			/* Memory hole from d_start to start */
+			if (start > d_start)
+				break;
+
+			if (end == d_end) {
+				ret = true;
+				break;
+			}
+
+			d_start = end + 1;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * Returns true, if there are no holes in boot memory area,
+ * false otherwise.
+ */
+bool is_fadump_boot_mem_contiguous(void)
+{
+	unsigned long d_start, d_end;
+	bool ret = false;
+	int i;
+
+	for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+		d_start = fw_dump.boot_mem_addr[i];
+		d_end   = d_start + fw_dump.boot_mem_sz[i];
+
+		ret = is_fadump_mem_area_contiguous(d_start, d_end);
+		if (!ret)
+			break;
+	}
+
+	return ret;
+}
+
+/*
+ * Returns true, if there are no holes in reserved memory area,
+ * false otherwise.
+ */
+bool is_fadump_reserved_mem_contiguous(void)
+{
+	u64 d_start, d_end;
+
+	d_start	= fw_dump.reserve_dump_area_start;
+	d_end	= d_start + fw_dump.reserve_dump_area_size;
+	return is_fadump_mem_area_contiguous(d_start, d_end);
+}
+
+/* Print firmware assisted dump configurations for debugging purpose. */
+static void __init fadump_show_config(void)
+{
+	int i;
+
+	pr_debug("Support for firmware-assisted dump (fadump): %s\n",
+			(fw_dump.fadump_supported ? "present" : "no support"));
+
+	if (!fw_dump.fadump_supported)
+		return;
+
+	pr_debug("Fadump enabled    : %s\n",
+				(fw_dump.fadump_enabled ? "yes" : "no"));
+	pr_debug("Dump Active       : %s\n",
+				(fw_dump.dump_active ? "yes" : "no"));
+	pr_debug("Dump section sizes:\n");
+	pr_debug("    CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
+	pr_debug("    HPTE region size   : %lx\n", fw_dump.hpte_region_size);
+	pr_debug("    Boot memory size   : %lx\n", fw_dump.boot_memory_size);
+	pr_debug("    Boot memory top    : %llx\n", fw_dump.boot_mem_top);
+	pr_debug("Boot memory regions cnt: %llx\n", fw_dump.boot_mem_regs_cnt);
+	for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+		pr_debug("[%03d] base = %llx, size = %llx\n", i,
+			 fw_dump.boot_mem_addr[i], fw_dump.boot_mem_sz[i]);
+	}
+}
+
+/**
+ * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
+ *
+ * Function to find the largest memory size we need to reserve during early
+ * boot process. This will be the size of the memory that is required for a
+ * kernel to boot successfully.
+ *
+ * This function has been taken from phyp-assisted dump feature implementation.
+ *
+ * returns larger of 256MB or 5% rounded down to multiples of 256MB.
+ *
+ * TODO: Come up with better approach to find out more accurate memory size
+ * that is required for a kernel to boot successfully.
+ *
+ */
+static __init u64 fadump_calculate_reserve_size(void)
+{
+	u64 base, size, bootmem_min;
+	int ret;
+
+	if (fw_dump.reserve_bootvar)
+		pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n");
+
+	/*
+	 * Check if the size is specified through crashkernel= cmdline
+	 * option. If yes, then use that but ignore base as fadump reserves
+	 * memory at a predefined offset.
+	 */
+	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+				&size, &base);
+	if (ret == 0 && size > 0) {
+		unsigned long max_size;
+
+		if (fw_dump.reserve_bootvar)
+			pr_info("Using 'crashkernel=' parameter for memory reservation.\n");
+
+		fw_dump.reserve_bootvar = (unsigned long)size;
+
+		/*
+		 * Adjust if the boot memory size specified is above
+		 * the upper limit.
+		 */
+		max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO;
+		if (fw_dump.reserve_bootvar > max_size) {
+			fw_dump.reserve_bootvar = max_size;
+			pr_info("Adjusted boot memory size to %luMB\n",
+				(fw_dump.reserve_bootvar >> 20));
+		}
+
+		return fw_dump.reserve_bootvar;
+	} else if (fw_dump.reserve_bootvar) {
+		/*
+		 * 'fadump_reserve_mem=' is being used to reserve memory
+		 * for firmware-assisted dump.
+		 */
+		return fw_dump.reserve_bootvar;
+	}
+
+	/* divide by 20 to get 5% of value */
+	size = memblock_phys_mem_size() / 20;
+
+	/* round it down in multiples of 256 */
+	size = size & ~0x0FFFFFFFUL;
+
+	/* Truncate to memory_limit. We don't want to over reserve the memory.*/
+	if (memory_limit && size > memory_limit)
+		size = memory_limit;
+
+	bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
+	return (size > bootmem_min ? size : bootmem_min);
+}
+
+/*
+ * Calculate the total memory size required to be reserved for
+ * firmware-assisted dump registration.
+ */
+static unsigned long __init get_fadump_area_size(void)
+{
+	unsigned long size = 0;
+
+	size += fw_dump.cpu_state_data_size;
+	size += fw_dump.hpte_region_size;
+	/*
+	 * Account for pagesize alignment of boot memory area destination address.
+	 * This faciliates in mmap reading of first kernel's memory.
+	 */
+	size = PAGE_ALIGN(size);
+	size += fw_dump.boot_memory_size;
+	size += sizeof(struct fadump_crash_info_header);
+	size += sizeof(struct elfhdr); /* ELF core header.*/
+	size += sizeof(struct elf_phdr); /* place holder for cpu notes */
+	/* Program headers for crash memory regions. */
+	size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
+
+	size = PAGE_ALIGN(size);
+
+	/* This is to hold kernel metadata on platforms that support it */
+	size += (fw_dump.ops->fadump_get_metadata_size ?
+		 fw_dump.ops->fadump_get_metadata_size() : 0);
+	return size;
+}
+
+static int __init add_boot_mem_region(unsigned long rstart,
+				      unsigned long rsize)
+{
+	int i = fw_dump.boot_mem_regs_cnt++;
+
+	if (fw_dump.boot_mem_regs_cnt > FADUMP_MAX_MEM_REGS) {
+		fw_dump.boot_mem_regs_cnt = FADUMP_MAX_MEM_REGS;
+		return 0;
+	}
+
+	pr_debug("Added boot memory range[%d] [%#016lx-%#016lx)\n",
+		 i, rstart, (rstart + rsize));
+	fw_dump.boot_mem_addr[i] = rstart;
+	fw_dump.boot_mem_sz[i] = rsize;
+	return 1;
+}
+
+/*
+ * Firmware usually has a hard limit on the data it can copy per region.
+ * Honour that by splitting a memory range into multiple regions.
+ */
+static int __init add_boot_mem_regions(unsigned long mstart,
+				       unsigned long msize)
+{
+	unsigned long rstart, rsize, max_size;
+	int ret = 1;
+
+	rstart = mstart;
+	max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : msize;
+	while (msize) {
+		if (msize > max_size)
+			rsize = max_size;
+		else
+			rsize = msize;
+
+		ret = add_boot_mem_region(rstart, rsize);
+		if (!ret)
+			break;
+
+		msize -= rsize;
+		rstart += rsize;
+	}
+
+	return ret;
+}
+
+static int __init fadump_get_boot_mem_regions(void)
+{
+	unsigned long size, cur_size, hole_size, last_end;
+	unsigned long mem_size = fw_dump.boot_memory_size;
+	phys_addr_t reg_start, reg_end;
+	int ret = 1;
+	u64 i;
+
+	fw_dump.boot_mem_regs_cnt = 0;
+
+	last_end = 0;
+	hole_size = 0;
+	cur_size = 0;
+	for_each_mem_range(i, &reg_start, &reg_end) {
+		size = reg_end - reg_start;
+		hole_size += (reg_start - last_end);
+
+		if ((cur_size + size) >= mem_size) {
+			size = (mem_size - cur_size);
+			ret = add_boot_mem_regions(reg_start, size);
+			break;
+		}
+
+		mem_size -= size;
+		cur_size += size;
+		ret = add_boot_mem_regions(reg_start, size);
+		if (!ret)
+			break;
+
+		last_end = reg_end;
+	}
+	fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size);
+
+	return ret;
+}
+
+/*
+ * Returns true, if the given range overlaps with reserved memory ranges
+ * starting at idx. Also, updates idx to index of overlapping memory range
+ * with the given memory range.
+ * False, otherwise.
+ */
+static bool __init overlaps_reserved_ranges(u64 base, u64 end, int *idx)
+{
+	bool ret = false;
+	int i;
+
+	for (i = *idx; i < reserved_mrange_info.mem_range_cnt; i++) {
+		u64 rbase = reserved_mrange_info.mem_ranges[i].base;
+		u64 rend = rbase + reserved_mrange_info.mem_ranges[i].size;
+
+		if (end <= rbase)
+			break;
+
+		if ((end > rbase) &&  (base < rend)) {
+			*idx = i;
+			ret = true;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * Locate a suitable memory area to reserve memory for FADump. While at it,
+ * lookup reserved-ranges & avoid overlap with them, as they are used by F/W.
+ */
+static u64 __init fadump_locate_reserve_mem(u64 base, u64 size)
+{
+	struct fadump_memory_range *mrngs;
+	phys_addr_t mstart, mend;
+	int idx = 0;
+	u64 i, ret = 0;
+
+	mrngs = reserved_mrange_info.mem_ranges;
+	for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
+				&mstart, &mend, NULL) {
+		pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n",
+			 i, mstart, mend, base);
+
+		if (mstart > base)
+			base = PAGE_ALIGN(mstart);
+
+		while ((mend > base) && ((mend - base) >= size)) {
+			if (!overlaps_reserved_ranges(base, base+size, &idx)) {
+				ret = base;
+				goto out;
+			}
+
+			base = mrngs[idx].base + mrngs[idx].size;
+			base = PAGE_ALIGN(base);
+		}
+	}
+
+out:
+	return ret;
+}
+
+int __init fadump_reserve_mem(void)
+{
+	u64 base, size, mem_boundary, bootmem_min;
+	int ret = 1;
+
+	if (!fw_dump.fadump_enabled)
+		return 0;
+
+	if (!fw_dump.fadump_supported) {
+		pr_info("Firmware-Assisted Dump is not supported on this hardware\n");
+		goto error_out;
+	}
+
+	/*
+	 * Initialize boot memory size
+	 * If dump is active then we have already calculated the size during
+	 * first kernel.
+	 */
+	if (!fw_dump.dump_active) {
+		fw_dump.boot_memory_size =
+			PAGE_ALIGN(fadump_calculate_reserve_size());
+#ifdef CONFIG_CMA
+		if (!fw_dump.nocma) {
+			fw_dump.boot_memory_size =
+				ALIGN(fw_dump.boot_memory_size,
+				      CMA_MIN_ALIGNMENT_BYTES);
+		}
+#endif
+
+		bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
+		if (fw_dump.boot_memory_size < bootmem_min) {
+			pr_err("Can't enable fadump with boot memory size (0x%lx) less than 0x%llx\n",
+			       fw_dump.boot_memory_size, bootmem_min);
+			goto error_out;
+		}
+
+		if (!fadump_get_boot_mem_regions()) {
+			pr_err("Too many holes in boot memory area to enable fadump\n");
+			goto error_out;
+		}
+	}
+
+	/*
+	 * Calculate the memory boundary.
+	 * If memory_limit is less than actual memory boundary then reserve
+	 * the memory for fadump beyond the memory_limit and adjust the
+	 * memory_limit accordingly, so that the running kernel can run with
+	 * specified memory_limit.
+	 */
+	if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
+		size = get_fadump_area_size();
+		if ((memory_limit + size) < memblock_end_of_DRAM())
+			memory_limit += size;
+		else
+			memory_limit = memblock_end_of_DRAM();
+		printk(KERN_INFO "Adjusted memory_limit for firmware-assisted"
+				" dump, now %#016llx\n", memory_limit);
+	}
+	if (memory_limit)
+		mem_boundary = memory_limit;
+	else
+		mem_boundary = memblock_end_of_DRAM();
+
+	base = fw_dump.boot_mem_top;
+	size = get_fadump_area_size();
+	fw_dump.reserve_dump_area_size = size;
+	if (fw_dump.dump_active) {
+		pr_info("Firmware-assisted dump is active.\n");
+
+#ifdef CONFIG_HUGETLB_PAGE
+		/*
+		 * FADump capture kernel doesn't care much about hugepages.
+		 * In fact, handling hugepages in capture kernel is asking for
+		 * trouble. So, disable HugeTLB support when fadump is active.
+		 */
+		hugetlb_disabled = true;
+#endif
+		/*
+		 * If last boot has crashed then reserve all the memory
+		 * above boot memory size so that we don't touch it until
+		 * dump is written to disk by userspace tool. This memory
+		 * can be released for general use by invalidating fadump.
+		 */
+		fadump_reserve_crash_area(base);
+
+		pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr);
+		pr_debug("Reserve dump area start address: 0x%lx\n",
+			 fw_dump.reserve_dump_area_start);
+	} else {
+		/*
+		 * Reserve memory at an offset closer to bottom of the RAM to
+		 * minimize the impact of memory hot-remove operation.
+		 */
+		base = fadump_locate_reserve_mem(base, size);
+
+		if (!base || (base + size > mem_boundary)) {
+			pr_err("Failed to find memory chunk for reservation!\n");
+			goto error_out;
+		}
+		fw_dump.reserve_dump_area_start = base;
+
+		/*
+		 * Calculate the kernel metadata address and register it with
+		 * f/w if the platform supports.
+		 */
+		if (fw_dump.ops->fadump_setup_metadata &&
+		    (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
+			goto error_out;
+
+		if (memblock_reserve(base, size)) {
+			pr_err("Failed to reserve memory!\n");
+			goto error_out;
+		}
+
+		pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n",
+			(size >> 20), base, (memblock_phys_mem_size() >> 20));
+
+		ret = fadump_cma_init();
+	}
+
+	return ret;
+error_out:
+	fw_dump.fadump_enabled = 0;
+	fw_dump.reserve_dump_area_size = 0;
+	return 0;
+}
+
+/* Look for fadump= cmdline option. */
+static int __init early_fadump_param(char *p)
+{
+	if (!p)
+		return 1;
+
+	if (strncmp(p, "on", 2) == 0)
+		fw_dump.fadump_enabled = 1;
+	else if (strncmp(p, "off", 3) == 0)
+		fw_dump.fadump_enabled = 0;
+	else if (strncmp(p, "nocma", 5) == 0) {
+		fw_dump.fadump_enabled = 1;
+		fw_dump.nocma = 1;
+	}
+
+	return 0;
+}
+early_param("fadump", early_fadump_param);
+
+/*
+ * Look for fadump_reserve_mem= cmdline option
+ * TODO: Remove references to 'fadump_reserve_mem=' parameter,
+ *       the sooner 'crashkernel=' parameter is accustomed to.
+ */
+static int __init early_fadump_reserve_mem(char *p)
+{
+	if (p)
+		fw_dump.reserve_bootvar = memparse(p, &p);
+	return 0;
+}
+early_param("fadump_reserve_mem", early_fadump_reserve_mem);
+
+void crash_fadump(struct pt_regs *regs, const char *str)
+{
+	unsigned int msecs;
+	struct fadump_crash_info_header *fdh = NULL;
+	int old_cpu, this_cpu;
+	/* Do not include first CPU */
+	unsigned int ncpus = num_online_cpus() - 1;
+
+	if (!should_fadump_crash())
+		return;
+
+	/*
+	 * old_cpu == -1 means this is the first CPU which has come here,
+	 * go ahead and trigger fadump.
+	 *
+	 * old_cpu != -1 means some other CPU has already on it's way
+	 * to trigger fadump, just keep looping here.
+	 */
+	this_cpu = smp_processor_id();
+	old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu);
+
+	if (old_cpu != -1) {
+		atomic_inc(&cpus_in_fadump);
+
+		/*
+		 * We can't loop here indefinitely. Wait as long as fadump
+		 * is in force. If we race with fadump un-registration this
+		 * loop will break and then we go down to normal panic path
+		 * and reboot. If fadump is in force the first crashing
+		 * cpu will definitely trigger fadump.
+		 */
+		while (fw_dump.dump_registered)
+			cpu_relax();
+		return;
+	}
+
+	fdh = __va(fw_dump.fadumphdr_addr);
+	fdh->crashing_cpu = crashing_cpu;
+	crash_save_vmcoreinfo();
+
+	if (regs)
+		fdh->regs = *regs;
+	else
+		ppc_save_regs(&fdh->regs);
+
+	fdh->cpu_mask = *cpu_online_mask;
+
+	/*
+	 * If we came in via system reset, wait a while for the secondary
+	 * CPUs to enter.
+	 */
+	if (TRAP(&(fdh->regs)) == INTERRUPT_SYSTEM_RESET) {
+		msecs = CRASH_TIMEOUT;
+		while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0))
+			mdelay(1);
+	}
+
+	fw_dump.ops->fadump_trigger(fdh, str);
+}
+
+u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
+{
+	struct elf_prstatus prstatus;
+
+	memset(&prstatus, 0, sizeof(prstatus));
+	/*
+	 * FIXME: How do i get PID? Do I really need it?
+	 * prstatus.pr_pid = ????
+	 */
+	elf_core_copy_regs(&prstatus.pr_reg, regs);
+	buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
+			      &prstatus, sizeof(prstatus));
+	return buf;
+}
+
+void __init fadump_update_elfcore_header(char *bufp)
+{
+	struct elf_phdr *phdr;
+
+	bufp += sizeof(struct elfhdr);
+
+	/* First note is a place holder for cpu notes info. */
+	phdr = (struct elf_phdr *)bufp;
+
+	if (phdr->p_type == PT_NOTE) {
+		phdr->p_paddr	= __pa(fw_dump.cpu_notes_buf_vaddr);
+		phdr->p_offset	= phdr->p_paddr;
+		phdr->p_filesz	= fw_dump.cpu_notes_buf_size;
+		phdr->p_memsz = fw_dump.cpu_notes_buf_size;
+	}
+	return;
+}
+
+static void *__init fadump_alloc_buffer(unsigned long size)
+{
+	unsigned long count, i;
+	struct page *page;
+	void *vaddr;
+
+	vaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
+	if (!vaddr)
+		return NULL;
+
+	count = PAGE_ALIGN(size) / PAGE_SIZE;
+	page = virt_to_page(vaddr);
+	for (i = 0; i < count; i++)
+		mark_page_reserved(page + i);
+	return vaddr;
+}
+
+static void fadump_free_buffer(unsigned long vaddr, unsigned long size)
+{
+	free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);
+}
+
+s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus)
+{
+	/* Allocate buffer to hold cpu crash notes. */
+	fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
+	fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
+	fw_dump.cpu_notes_buf_vaddr =
+		(unsigned long)fadump_alloc_buffer(fw_dump.cpu_notes_buf_size);
+	if (!fw_dump.cpu_notes_buf_vaddr) {
+		pr_err("Failed to allocate %ld bytes for CPU notes buffer\n",
+		       fw_dump.cpu_notes_buf_size);
+		return -ENOMEM;
+	}
+
+	pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n",
+		 fw_dump.cpu_notes_buf_size,
+		 fw_dump.cpu_notes_buf_vaddr);
+	return 0;
+}
+
+void fadump_free_cpu_notes_buf(void)
+{
+	if (!fw_dump.cpu_notes_buf_vaddr)
+		return;
+
+	fadump_free_buffer(fw_dump.cpu_notes_buf_vaddr,
+			   fw_dump.cpu_notes_buf_size);
+	fw_dump.cpu_notes_buf_vaddr = 0;
+	fw_dump.cpu_notes_buf_size = 0;
+}
+
+static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info)
+{
+	if (mrange_info->is_static) {
+		mrange_info->mem_range_cnt = 0;
+		return;
+	}
+
+	kfree(mrange_info->mem_ranges);
+	memset((void *)((u64)mrange_info + RNG_NAME_SZ), 0,
+	       (sizeof(struct fadump_mrange_info) - RNG_NAME_SZ));
+}
+
+/*
+ * Allocate or reallocate mem_ranges array in incremental units
+ * of PAGE_SIZE.
+ */
+static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info)
+{
+	struct fadump_memory_range *new_array;
+	u64 new_size;
+
+	new_size = mrange_info->mem_ranges_sz + PAGE_SIZE;
+	pr_debug("Allocating %llu bytes of memory for %s memory ranges\n",
+		 new_size, mrange_info->name);
+
+	new_array = krealloc(mrange_info->mem_ranges, new_size, GFP_KERNEL);
+	if (new_array == NULL) {
+		pr_err("Insufficient memory for setting up %s memory ranges\n",
+		       mrange_info->name);
+		fadump_free_mem_ranges(mrange_info);
+		return -ENOMEM;
+	}
+
+	mrange_info->mem_ranges = new_array;
+	mrange_info->mem_ranges_sz = new_size;
+	mrange_info->max_mem_ranges = (new_size /
+				       sizeof(struct fadump_memory_range));
+	return 0;
+}
+static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
+				       u64 base, u64 end)
+{
+	struct fadump_memory_range *mem_ranges = mrange_info->mem_ranges;
+	bool is_adjacent = false;
+	u64 start, size;
+
+	if (base == end)
+		return 0;
+
+	/*
+	 * Fold adjacent memory ranges to bring down the memory ranges/
+	 * PT_LOAD segments count.
+	 */
+	if (mrange_info->mem_range_cnt) {
+		start = mem_ranges[mrange_info->mem_range_cnt - 1].base;
+		size  = mem_ranges[mrange_info->mem_range_cnt - 1].size;
+
+		/*
+		 * Boot memory area needs separate PT_LOAD segment(s) as it
+		 * is moved to a different location at the time of crash.
+		 * So, fold only if the region is not boot memory area.
+		 */
+		if ((start + size) == base && start >= fw_dump.boot_mem_top)
+			is_adjacent = true;
+	}
+	if (!is_adjacent) {
+		/* resize the array on reaching the limit */
+		if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) {
+			int ret;
+
+			if (mrange_info->is_static) {
+				pr_err("Reached array size limit for %s memory ranges\n",
+				       mrange_info->name);
+				return -ENOSPC;
+			}
+
+			ret = fadump_alloc_mem_ranges(mrange_info);
+			if (ret)
+				return ret;
+
+			/* Update to the new resized array */
+			mem_ranges = mrange_info->mem_ranges;
+		}
+
+		start = base;
+		mem_ranges[mrange_info->mem_range_cnt].base = start;
+		mrange_info->mem_range_cnt++;
+	}
+
+	mem_ranges[mrange_info->mem_range_cnt - 1].size = (end - start);
+	pr_debug("%s_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
+		 mrange_info->name, (mrange_info->mem_range_cnt - 1),
+		 start, end - 1, (end - start));
+	return 0;
+}
+
+static int fadump_exclude_reserved_area(u64 start, u64 end)
+{
+	u64 ra_start, ra_end;
+	int ret = 0;
+
+	ra_start = fw_dump.reserve_dump_area_start;
+	ra_end = ra_start + fw_dump.reserve_dump_area_size;
+
+	if ((ra_start < end) && (ra_end > start)) {
+		if ((start < ra_start) && (end > ra_end)) {
+			ret = fadump_add_mem_range(&crash_mrange_info,
+						   start, ra_start);
+			if (ret)
+				return ret;
+
+			ret = fadump_add_mem_range(&crash_mrange_info,
+						   ra_end, end);
+		} else if (start < ra_start) {
+			ret = fadump_add_mem_range(&crash_mrange_info,
+						   start, ra_start);
+		} else if (ra_end < end) {
+			ret = fadump_add_mem_range(&crash_mrange_info,
+						   ra_end, end);
+		}
+	} else
+		ret = fadump_add_mem_range(&crash_mrange_info, start, end);
+
+	return ret;
+}
+
+static int fadump_init_elfcore_header(char *bufp)
+{
+	struct elfhdr *elf;
+
+	elf = (struct elfhdr *) bufp;
+	bufp += sizeof(struct elfhdr);
+	memcpy(elf->e_ident, ELFMAG, SELFMAG);
+	elf->e_ident[EI_CLASS] = ELF_CLASS;
+	elf->e_ident[EI_DATA] = ELF_DATA;
+	elf->e_ident[EI_VERSION] = EV_CURRENT;
+	elf->e_ident[EI_OSABI] = ELF_OSABI;
+	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
+	elf->e_type = ET_CORE;
+	elf->e_machine = ELF_ARCH;
+	elf->e_version = EV_CURRENT;
+	elf->e_entry = 0;
+	elf->e_phoff = sizeof(struct elfhdr);
+	elf->e_shoff = 0;
+
+	if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
+		elf->e_flags = 2;
+	else if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1))
+		elf->e_flags = 1;
+	else
+		elf->e_flags = 0;
+
+	elf->e_ehsize = sizeof(struct elfhdr);
+	elf->e_phentsize = sizeof(struct elf_phdr);
+	elf->e_phnum = 0;
+	elf->e_shentsize = 0;
+	elf->e_shnum = 0;
+	elf->e_shstrndx = 0;
+
+	return 0;
+}
+
+/*
+ * Traverse through memblock structure and setup crash memory ranges. These
+ * ranges will be used create PT_LOAD program headers in elfcore header.
+ */
+static int fadump_setup_crash_memory_ranges(void)
+{
+	u64 i, start, end;
+	int ret;
+
+	pr_debug("Setup crash memory ranges.\n");
+	crash_mrange_info.mem_range_cnt = 0;
+
+	/*
+	 * Boot memory region(s) registered with firmware are moved to
+	 * different location at the time of crash. Create separate program
+	 * header(s) for this memory chunk(s) with the correct offset.
+	 */
+	for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+		start = fw_dump.boot_mem_addr[i];
+		end = start + fw_dump.boot_mem_sz[i];
+		ret = fadump_add_mem_range(&crash_mrange_info, start, end);
+		if (ret)
+			return ret;
+	}
+
+	for_each_mem_range(i, &start, &end) {
+		/*
+		 * skip the memory chunk that is already added
+		 * (0 through boot_memory_top).
+		 */
+		if (start < fw_dump.boot_mem_top) {
+			if (end > fw_dump.boot_mem_top)
+				start = fw_dump.boot_mem_top;
+			else
+				continue;
+		}
+
+		/* add this range excluding the reserved dump area. */
+		ret = fadump_exclude_reserved_area(start, end);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * If the given physical address falls within the boot memory region then
+ * return the relocated address that points to the dump region reserved
+ * for saving initial boot memory contents.
+ */
+static inline unsigned long fadump_relocate(unsigned long paddr)
+{
+	unsigned long raddr, rstart, rend, rlast, hole_size;
+	int i;
+
+	hole_size = 0;
+	rlast = 0;
+	raddr = paddr;
+	for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+		rstart = fw_dump.boot_mem_addr[i];
+		rend = rstart + fw_dump.boot_mem_sz[i];
+		hole_size += (rstart - rlast);
+
+		if (paddr >= rstart && paddr < rend) {
+			raddr += fw_dump.boot_mem_dest_addr - hole_size;
+			break;
+		}
+
+		rlast = rend;
+	}
+
+	pr_debug("vmcoreinfo: paddr = 0x%lx, raddr = 0x%lx\n", paddr, raddr);
+	return raddr;
+}
+
+static int fadump_create_elfcore_headers(char *bufp)
+{
+	unsigned long long raddr, offset;
+	struct elf_phdr *phdr;
+	struct elfhdr *elf;
+	int i, j;
+
+	fadump_init_elfcore_header(bufp);
+	elf = (struct elfhdr *)bufp;
+	bufp += sizeof(struct elfhdr);
+
+	/*
+	 * setup ELF PT_NOTE, place holder for cpu notes info. The notes info
+	 * will be populated during second kernel boot after crash. Hence
+	 * this PT_NOTE will always be the first elf note.
+	 *
+	 * NOTE: Any new ELF note addition should be placed after this note.
+	 */
+	phdr = (struct elf_phdr *)bufp;
+	bufp += sizeof(struct elf_phdr);
+	phdr->p_type = PT_NOTE;
+	phdr->p_flags = 0;
+	phdr->p_vaddr = 0;
+	phdr->p_align = 0;
+
+	phdr->p_offset = 0;
+	phdr->p_paddr = 0;
+	phdr->p_filesz = 0;
+	phdr->p_memsz = 0;
+
+	(elf->e_phnum)++;
+
+	/* setup ELF PT_NOTE for vmcoreinfo */
+	phdr = (struct elf_phdr *)bufp;
+	bufp += sizeof(struct elf_phdr);
+	phdr->p_type	= PT_NOTE;
+	phdr->p_flags	= 0;
+	phdr->p_vaddr	= 0;
+	phdr->p_align	= 0;
+
+	phdr->p_paddr	= fadump_relocate(paddr_vmcoreinfo_note());
+	phdr->p_offset	= phdr->p_paddr;
+	phdr->p_memsz	= phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
+
+	/* Increment number of program headers. */
+	(elf->e_phnum)++;
+
+	/* setup PT_LOAD sections. */
+	j = 0;
+	offset = 0;
+	raddr = fw_dump.boot_mem_addr[0];
+	for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) {
+		u64 mbase, msize;
+
+		mbase = crash_mrange_info.mem_ranges[i].base;
+		msize = crash_mrange_info.mem_ranges[i].size;
+		if (!msize)
+			continue;
+
+		phdr = (struct elf_phdr *)bufp;
+		bufp += sizeof(struct elf_phdr);
+		phdr->p_type	= PT_LOAD;
+		phdr->p_flags	= PF_R|PF_W|PF_X;
+		phdr->p_offset	= mbase;
+
+		if (mbase == raddr) {
+			/*
+			 * The entire real memory region will be moved by
+			 * firmware to the specified destination_address.
+			 * Hence set the correct offset.
+			 */
+			phdr->p_offset = fw_dump.boot_mem_dest_addr + offset;
+			if (j < (fw_dump.boot_mem_regs_cnt - 1)) {
+				offset += fw_dump.boot_mem_sz[j];
+				raddr = fw_dump.boot_mem_addr[++j];
+			}
+		}
+
+		phdr->p_paddr = mbase;
+		phdr->p_vaddr = (unsigned long)__va(mbase);
+		phdr->p_filesz = msize;
+		phdr->p_memsz = msize;
+		phdr->p_align = 0;
+
+		/* Increment number of program headers. */
+		(elf->e_phnum)++;
+	}
+	return 0;
+}
+
+static unsigned long init_fadump_header(unsigned long addr)
+{
+	struct fadump_crash_info_header *fdh;
+
+	if (!addr)
+		return 0;
+
+	fdh = __va(addr);
+	addr += sizeof(struct fadump_crash_info_header);
+
+	memset(fdh, 0, sizeof(struct fadump_crash_info_header));
+	fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
+	fdh->elfcorehdr_addr = addr;
+	/* We will set the crashing cpu id in crash_fadump() during crash. */
+	fdh->crashing_cpu = FADUMP_CPU_UNKNOWN;
+	/*
+	 * When LPAR is terminated by PYHP, ensure all possible CPUs'
+	 * register data is processed while exporting the vmcore.
+	 */
+	fdh->cpu_mask = *cpu_possible_mask;
+
+	return addr;
+}
+
+static int register_fadump(void)
+{
+	unsigned long addr;
+	void *vaddr;
+	int ret;
+
+	/*
+	 * If no memory is reserved then we can not register for firmware-
+	 * assisted dump.
+	 */
+	if (!fw_dump.reserve_dump_area_size)
+		return -ENODEV;
+
+	ret = fadump_setup_crash_memory_ranges();
+	if (ret)
+		return ret;
+
+	addr = fw_dump.fadumphdr_addr;
+
+	/* Initialize fadump crash info header. */
+	addr = init_fadump_header(addr);
+	vaddr = __va(addr);
+
+	pr_debug("Creating ELF core headers at %#016lx\n", addr);
+	fadump_create_elfcore_headers(vaddr);
+
+	/* register the future kernel dump with firmware. */
+	pr_debug("Registering for firmware-assisted kernel dump...\n");
+	return fw_dump.ops->fadump_register(&fw_dump);
+}
+
+void fadump_cleanup(void)
+{
+	if (!fw_dump.fadump_supported)
+		return;
+
+	/* Invalidate the registration only if dump is active. */
+	if (fw_dump.dump_active) {
+		pr_debug("Invalidating firmware-assisted dump registration\n");
+		fw_dump.ops->fadump_invalidate(&fw_dump);
+	} else if (fw_dump.dump_registered) {
+		/* Un-register Firmware-assisted dump if it was registered. */
+		fw_dump.ops->fadump_unregister(&fw_dump);
+		fadump_free_mem_ranges(&crash_mrange_info);
+	}
+
+	if (fw_dump.ops->fadump_cleanup)
+		fw_dump.ops->fadump_cleanup(&fw_dump);
+}
+
+static void fadump_free_reserved_memory(unsigned long start_pfn,
+					unsigned long end_pfn)
+{
+	unsigned long pfn;
+	unsigned long time_limit = jiffies + HZ;
+
+	pr_info("freeing reserved memory (0x%llx - 0x%llx)\n",
+		PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+		free_reserved_page(pfn_to_page(pfn));
+
+		if (time_after(jiffies, time_limit)) {
+			cond_resched();
+			time_limit = jiffies + HZ;
+		}
+	}
+}
+
+/*
+ * Skip memory holes and free memory that was actually reserved.
+ */
+static void fadump_release_reserved_area(u64 start, u64 end)
+{
+	unsigned long reg_spfn, reg_epfn;
+	u64 tstart, tend, spfn, epfn;
+	int i;
+
+	spfn = PHYS_PFN(start);
+	epfn = PHYS_PFN(end);
+
+	for_each_mem_pfn_range(i, MAX_NUMNODES, &reg_spfn, &reg_epfn, NULL) {
+		tstart = max_t(u64, spfn, reg_spfn);
+		tend   = min_t(u64, epfn, reg_epfn);
+
+		if (tstart < tend) {
+			fadump_free_reserved_memory(tstart, tend);
+
+			if (tend == epfn)
+				break;
+
+			spfn = tend;
+		}
+	}
+}
+
+/*
+ * Sort the mem ranges in-place and merge adjacent ranges
+ * to minimize the memory ranges count.
+ */
+static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info)
+{
+	struct fadump_memory_range *mem_ranges;
+	u64 base, size;
+	int i, j, idx;
+
+	if (!reserved_mrange_info.mem_range_cnt)
+		return;
+
+	/* Sort the memory ranges */
+	mem_ranges = mrange_info->mem_ranges;
+	for (i = 0; i < mrange_info->mem_range_cnt; i++) {
+		idx = i;
+		for (j = (i + 1); j < mrange_info->mem_range_cnt; j++) {
+			if (mem_ranges[idx].base > mem_ranges[j].base)
+				idx = j;
+		}
+		if (idx != i)
+			swap(mem_ranges[idx], mem_ranges[i]);
+	}
+
+	/* Merge adjacent reserved ranges */
+	idx = 0;
+	for (i = 1; i < mrange_info->mem_range_cnt; i++) {
+		base = mem_ranges[i-1].base;
+		size = mem_ranges[i-1].size;
+		if (mem_ranges[i].base == (base + size))
+			mem_ranges[idx].size += mem_ranges[i].size;
+		else {
+			idx++;
+			if (i == idx)
+				continue;
+
+			mem_ranges[idx] = mem_ranges[i];
+		}
+	}
+	mrange_info->mem_range_cnt = idx + 1;
+}
+
+/*
+ * Scan reserved-ranges to consider them while reserving/releasing
+ * memory for FADump.
+ */
+static void __init early_init_dt_scan_reserved_ranges(unsigned long node)
+{
+	const __be32 *prop;
+	int len, ret = -1;
+	unsigned long i;
+
+	/* reserved-ranges already scanned */
+	if (reserved_mrange_info.mem_range_cnt != 0)
+		return;
+
+	prop = of_get_flat_dt_prop(node, "reserved-ranges", &len);
+	if (!prop)
+		return;
+
+	/*
+	 * Each reserved range is an (address,size) pair, 2 cells each,
+	 * totalling 4 cells per range.
+	 */
+	for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
+		u64 base, size;
+
+		base = of_read_number(prop + (i * 4) + 0, 2);
+		size = of_read_number(prop + (i * 4) + 2, 2);
+
+		if (size) {
+			ret = fadump_add_mem_range(&reserved_mrange_info,
+						   base, base + size);
+			if (ret < 0) {
+				pr_warn("some reserved ranges are ignored!\n");
+				break;
+			}
+		}
+	}
+
+	/* Compact reserved ranges */
+	sort_and_merge_mem_ranges(&reserved_mrange_info);
+}
+
+/*
+ * Release the memory that was reserved during early boot to preserve the
+ * crash'ed kernel's memory contents except reserved dump area (permanent
+ * reservation) and reserved ranges used by F/W. The released memory will
+ * be available for general use.
+ */
+static void fadump_release_memory(u64 begin, u64 end)
+{
+	u64 ra_start, ra_end, tstart;
+	int i, ret;
+
+	ra_start = fw_dump.reserve_dump_area_start;
+	ra_end = ra_start + fw_dump.reserve_dump_area_size;
+
+	/*
+	 * If reserved ranges array limit is hit, overwrite the last reserved
+	 * memory range with reserved dump area to ensure it is excluded from
+	 * the memory being released (reused for next FADump registration).
+	 */
+	if (reserved_mrange_info.mem_range_cnt ==
+	    reserved_mrange_info.max_mem_ranges)
+		reserved_mrange_info.mem_range_cnt--;
+
+	ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end);
+	if (ret != 0)
+		return;
+
+	/* Get the reserved ranges list in order first. */
+	sort_and_merge_mem_ranges(&reserved_mrange_info);
+
+	/* Exclude reserved ranges and release remaining memory */
+	tstart = begin;
+	for (i = 0; i < reserved_mrange_info.mem_range_cnt; i++) {
+		ra_start = reserved_mrange_info.mem_ranges[i].base;
+		ra_end = ra_start + reserved_mrange_info.mem_ranges[i].size;
+
+		if (tstart >= ra_end)
+			continue;
+
+		if (tstart < ra_start)
+			fadump_release_reserved_area(tstart, ra_start);
+		tstart = ra_end;
+	}
+
+	if (tstart < end)
+		fadump_release_reserved_area(tstart, end);
+}
+
+static void fadump_invalidate_release_mem(void)
+{
+	mutex_lock(&fadump_mutex);
+	if (!fw_dump.dump_active) {
+		mutex_unlock(&fadump_mutex);
+		return;
+	}
+
+	fadump_cleanup();
+	mutex_unlock(&fadump_mutex);
+
+	fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM());
+	fadump_free_cpu_notes_buf();
+
+	/*
+	 * Setup kernel metadata and initialize the kernel dump
+	 * memory structure for FADump re-registration.
+	 */
+	if (fw_dump.ops->fadump_setup_metadata &&
+	    (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
+		pr_warn("Failed to setup kernel metadata!\n");
+	fw_dump.ops->fadump_init_mem_struct(&fw_dump);
+}
+
+static ssize_t release_mem_store(struct kobject *kobj,
+				 struct kobj_attribute *attr,
+				 const char *buf, size_t count)
+{
+	int input = -1;
+
+	if (!fw_dump.dump_active)
+		return -EPERM;
+
+	if (kstrtoint(buf, 0, &input))
+		return -EINVAL;
+
+	if (input == 1) {
+		/*
+		 * Take away the '/proc/vmcore'. We are releasing the dump
+		 * memory, hence it will not be valid anymore.
+		 */
+#ifdef CONFIG_PROC_VMCORE
+		vmcore_cleanup();
+#endif
+		fadump_invalidate_release_mem();
+
+	} else
+		return -EINVAL;
+	return count;
+}
+
+/* Release the reserved memory and disable the FADump */
+static void __init unregister_fadump(void)
+{
+	fadump_cleanup();
+	fadump_release_memory(fw_dump.reserve_dump_area_start,
+			      fw_dump.reserve_dump_area_size);
+	fw_dump.fadump_enabled = 0;
+	kobject_put(fadump_kobj);
+}
+
+static ssize_t enabled_show(struct kobject *kobj,
+			    struct kobj_attribute *attr,
+			    char *buf)
+{
+	return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
+}
+
+static ssize_t mem_reserved_show(struct kobject *kobj,
+				 struct kobj_attribute *attr,
+				 char *buf)
+{
+	return sprintf(buf, "%ld\n", fw_dump.reserve_dump_area_size);
+}
+
+static ssize_t registered_show(struct kobject *kobj,
+			       struct kobj_attribute *attr,
+			       char *buf)
+{
+	return sprintf(buf, "%d\n", fw_dump.dump_registered);
+}
+
+static ssize_t registered_store(struct kobject *kobj,
+				struct kobj_attribute *attr,
+				const char *buf, size_t count)
+{
+	int ret = 0;
+	int input = -1;
+
+	if (!fw_dump.fadump_enabled || fw_dump.dump_active)
+		return -EPERM;
+
+	if (kstrtoint(buf, 0, &input))
+		return -EINVAL;
+
+	mutex_lock(&fadump_mutex);
+
+	switch (input) {
+	case 0:
+		if (fw_dump.dump_registered == 0) {
+			goto unlock_out;
+		}
+
+		/* Un-register Firmware-assisted dump */
+		pr_debug("Un-register firmware-assisted dump\n");
+		fw_dump.ops->fadump_unregister(&fw_dump);
+		break;
+	case 1:
+		if (fw_dump.dump_registered == 1) {
+			/* Un-register Firmware-assisted dump */
+			fw_dump.ops->fadump_unregister(&fw_dump);
+		}
+		/* Register Firmware-assisted dump */
+		ret = register_fadump();
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+unlock_out:
+	mutex_unlock(&fadump_mutex);
+	return ret < 0 ? ret : count;
+}
+
+static int fadump_region_show(struct seq_file *m, void *private)
+{
+	if (!fw_dump.fadump_enabled)
+		return 0;
+
+	mutex_lock(&fadump_mutex);
+	fw_dump.ops->fadump_region_show(&fw_dump, m);
+	mutex_unlock(&fadump_mutex);
+	return 0;
+}
+
+static struct kobj_attribute release_attr = __ATTR_WO(release_mem);
+static struct kobj_attribute enable_attr = __ATTR_RO(enabled);
+static struct kobj_attribute register_attr = __ATTR_RW(registered);
+static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved);
+
+static struct attribute *fadump_attrs[] = {
+	&enable_attr.attr,
+	&register_attr.attr,
+	&mem_reserved_attr.attr,
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(fadump);
+
+DEFINE_SHOW_ATTRIBUTE(fadump_region);
+
+static void __init fadump_init_files(void)
+{
+	int rc = 0;
+
+	fadump_kobj = kobject_create_and_add("fadump", kernel_kobj);
+	if (!fadump_kobj) {
+		pr_err("failed to create fadump kobject\n");
+		return;
+	}
+
+	debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL,
+			    &fadump_region_fops);
+
+	if (fw_dump.dump_active) {
+		rc = sysfs_create_file(fadump_kobj, &release_attr.attr);
+		if (rc)
+			pr_err("unable to create release_mem sysfs file (%d)\n",
+			       rc);
+	}
+
+	rc = sysfs_create_groups(fadump_kobj, fadump_groups);
+	if (rc) {
+		pr_err("sysfs group creation failed (%d), unregistering FADump",
+		       rc);
+		unregister_fadump();
+		return;
+	}
+
+	/*
+	 * The FADump sysfs are moved from kernel_kobj to fadump_kobj need to
+	 * create symlink at old location to maintain backward compatibility.
+	 *
+	 *      - fadump_enabled -> fadump/enabled
+	 *      - fadump_registered -> fadump/registered
+	 *      - fadump_release_mem -> fadump/release_mem
+	 */
+	rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj,
+						  "enabled", "fadump_enabled");
+	if (rc) {
+		pr_err("unable to create fadump_enabled symlink (%d)", rc);
+		return;
+	}
+
+	rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj,
+						  "registered",
+						  "fadump_registered");
+	if (rc) {
+		pr_err("unable to create fadump_registered symlink (%d)", rc);
+		sysfs_remove_link(kernel_kobj, "fadump_enabled");
+		return;
+	}
+
+	if (fw_dump.dump_active) {
+		rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj,
+							  fadump_kobj,
+							  "release_mem",
+							  "fadump_release_mem");
+		if (rc)
+			pr_err("unable to create fadump_release_mem symlink (%d)",
+			       rc);
+	}
+	return;
+}
+
+/*
+ * Prepare for firmware-assisted dump.
+ */
+int __init setup_fadump(void)
+{
+	if (!fw_dump.fadump_supported)
+		return 0;
+
+	fadump_init_files();
+	fadump_show_config();
+
+	if (!fw_dump.fadump_enabled)
+		return 1;
+
+	/*
+	 * If dump data is available then see if it is valid and prepare for
+	 * saving it to the disk.
+	 */
+	if (fw_dump.dump_active) {
+		/*
+		 * if dump process fails then invalidate the registration
+		 * and release memory before proceeding for re-registration.
+		 */
+		if (fw_dump.ops->fadump_process(&fw_dump) < 0)
+			fadump_invalidate_release_mem();
+	}
+	/* Initialize the kernel dump memory structure and register with f/w */
+	else if (fw_dump.reserve_dump_area_size) {
+		fw_dump.ops->fadump_init_mem_struct(&fw_dump);
+		register_fadump();
+	}
+
+	/*
+	 * In case of panic, fadump is triggered via ppc_panic_event()
+	 * panic notifier. Setting crash_kexec_post_notifiers to 'true'
+	 * lets panic() function take crash friendly path before panic
+	 * notifiers are invoked.
+	 */
+	crash_kexec_post_notifiers = true;
+
+	return 1;
+}
+/*
+ * Use subsys_initcall_sync() here because there is dependency with
+ * crash_save_vmcoreinfo_init(), which must run first to ensure vmcoreinfo initialization
+ * is done before registering with f/w.
+ */
+subsys_initcall_sync(setup_fadump);
+#else /* !CONFIG_PRESERVE_FA_DUMP */
+
+/* Scan the Firmware Assisted dump configuration details. */
+int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+				      int depth, void *data)
+{
+	if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0))
+		return 0;
+
+	opal_fadump_dt_scan(&fw_dump, node);
+	return 1;
+}
+
+/*
+ * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
+ * preserve crash data. The subsequent memory preserving kernel boot
+ * is likely to process this crash data.
+ */
+int __init fadump_reserve_mem(void)
+{
+	if (fw_dump.dump_active) {
+		/*
+		 * If last boot has crashed then reserve all the memory
+		 * above boot memory to preserve crash data.
+		 */
+		pr_info("Preserving crash data for processing in next boot.\n");
+		fadump_reserve_crash_area(fw_dump.boot_mem_top);
+	} else
+		pr_debug("FADump-aware kernel..\n");
+
+	return 1;
+}
+#endif /* CONFIG_PRESERVE_FA_DUMP */
+
+/* Preserve everything above the base address */
+static void __init fadump_reserve_crash_area(u64 base)
+{
+	u64 i, mstart, mend, msize;
+
+	for_each_mem_range(i, &mstart, &mend) {
+		msize  = mend - mstart;
+
+		if ((mstart + msize) < base)
+			continue;
+
+		if (mstart < base) {
+			msize -= (base - mstart);
+			mstart = base;
+		}
+
+		pr_info("Reserving %lluMB of memory at %#016llx for preserving crash data",
+			(msize >> 20), mstart);
+		memblock_reserve(mstart, msize);
+	}
+}
+
+unsigned long __init arch_reserved_kernel_pages(void)
+{
+	return memblock_reserved_size() / PAGE_SIZE;
+}
diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
new file mode 100644
index 0000000000..20328f72f9
--- /dev/null
+++ b/arch/powerpc/kernel/firmware.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Extracted from cputable.c
+ *
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  Modifications for ppc64:
+ *      Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ *  Copyright (C) 2005 Stephen Rothwell, IBM Corporation
+ */
+
+#include <linux/export.h>
+#include <linux/cache.h>
+#include <linux/of.h>
+
+#include <asm/firmware.h>
+#include <asm/kvm_guest.h>
+
+#ifdef CONFIG_PPC64
+unsigned long powerpc_firmware_features __read_mostly;
+EXPORT_SYMBOL_GPL(powerpc_firmware_features);
+#endif
+
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
+DEFINE_STATIC_KEY_FALSE(kvm_guest);
+int __init check_kvm_guest(void)
+{
+	struct device_node *hyper_node;
+
+	hyper_node = of_find_node_by_path("/hypervisor");
+	if (!hyper_node)
+		return 0;
+
+	if (of_device_is_compatible(hyper_node, "linux,kvm"))
+		static_branch_enable(&kvm_guest);
+
+	of_node_put(hyper_node);
+	return 0;
+}
+core_initcall(check_kvm_guest); // before kvm_guest_init()
+#endif
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
new file mode 100644
index 0000000000..2f8f3f93cb
--- /dev/null
+++ b/arch/powerpc/kernel/fpu.S
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  FPU support code, moved here from head.S so that it can be used
+ *  by chips which use other head-whatever.S files.
+ *
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *    Copyright (C) 1996 Paul Mackerras.
+ *    Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
+ */
+
+#include <linux/export.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/cputable.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+
+#ifdef CONFIG_VSX
+#define __REST_1FPVSR(n,c,base)						\
+BEGIN_FTR_SECTION							\
+	b	2f;							\
+END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
+	REST_FPR(n,base);						\
+	b	3f;							\
+2:	REST_VSR(n,c,base);						\
+3:
+
+#define __REST_32FPVSRS(n,c,base)					\
+BEGIN_FTR_SECTION							\
+	b	2f;							\
+END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
+	REST_32FPRS(n,base);						\
+	b	3f;							\
+2:	REST_32VSRS(n,c,base);						\
+3:
+
+#define __SAVE_32FPVSRS(n,c,base)					\
+BEGIN_FTR_SECTION							\
+	b	2f;							\
+END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
+	SAVE_32FPRS(n,base);						\
+	b	3f;							\
+2:	SAVE_32VSRS(n,c,base);						\
+3:
+#else
+#define __REST_1FPVSR(n,b,base)		REST_FPR(n, base)
+#define __REST_32FPVSRS(n,b,base)	REST_32FPRS(n, base)
+#define __SAVE_32FPVSRS(n,b,base)	SAVE_32FPRS(n, base)
+#endif
+#define REST_1FPVSR(n,c,base)   __REST_1FPVSR(n,__REG_##c,__REG_##base)
+#define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base)
+#define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)
+
+/*
+ * Load state from memory into FP registers including FPSCR.
+ * Assumes the caller has enabled FP in the MSR.
+ */
+_GLOBAL(load_fp_state)
+	lfd	fr0,FPSTATE_FPSCR(r3)
+	MTFSF_L(fr0)
+	REST_32FPVSRS(0, R4, R3)
+	blr
+EXPORT_SYMBOL(load_fp_state)
+_ASM_NOKPROBE_SYMBOL(load_fp_state); /* used by restore_math */
+
+/*
+ * Store FP state into memory, including FPSCR
+ * Assumes the caller has enabled FP in the MSR.
+ */
+_GLOBAL(store_fp_state)
+	SAVE_32FPVSRS(0, R4, R3)
+	mffs	fr0
+	stfd	fr0,FPSTATE_FPSCR(r3)
+	REST_1FPVSR(0, R4, R3)
+	blr
+EXPORT_SYMBOL(store_fp_state)
+
+/*
+ * This task wants to use the FPU now.
+ * On UP, disable FP for the task which had the FPU previously,
+ * and save its floating-point registers in its thread_struct.
+ * Load up this task's FP registers from its thread_struct,
+ * enable the FPU for the current task and return to the task.
+ * Note that on 32-bit this can only use registers that will be
+ * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
+ */
+_GLOBAL(load_up_fpu)
+	mfmsr	r5
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+	ori	r5,r5,MSR_FP|MSR_RI
+#else
+	ori	r5,r5,MSR_FP
+#endif
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+	oris	r5,r5,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+	MTMSRD(r5)			/* enable use of fpu now */
+	isync
+	/* enable use of FP after return */
+#ifdef CONFIG_PPC32
+	addi	r5,r2,THREAD
+	lwz	r4,THREAD_FPEXC_MODE(r5)
+	ori	r9,r9,MSR_FP		/* enable FP for current */
+	or	r9,r9,r4
+#else
+	ld	r4,PACACURRENT(r13)
+	addi	r5,r4,THREAD		/* Get THREAD */
+	lwz	r4,THREAD_FPEXC_MODE(r5)
+	ori	r12,r12,MSR_FP
+	or	r12,r12,r4
+	std	r12,_MSR(r1)
+#ifdef CONFIG_PPC_BOOK3S_64
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+#endif
+#endif
+	li	r4,1
+	stb	r4,THREAD_LOAD_FP(r5)
+	addi	r10,r5,THREAD_FPSTATE
+	lfd	fr0,FPSTATE_FPSCR(r10)
+	MTFSF_L(fr0)
+	REST_32FPVSRS(0, R4, R10)
+	/* restore registers and return */
+	/* we haven't used ctr or xer or lr */
+	blr
+_ASM_NOKPROBE_SYMBOL(load_up_fpu)
+
+/*
+ * save_fpu(tsk)
+ * Save the floating-point registers in its thread_struct.
+ * Enables the FPU for use in the kernel on return.
+ */
+_GLOBAL(save_fpu)
+	addi	r3,r3,THREAD	        /* want THREAD of task */
+	PPC_LL	r6,THREAD_FPSAVEAREA(r3)
+	PPC_LL	r5,PT_REGS(r3)
+	PPC_LCMPI	0,r6,0
+	bne	2f
+	addi	r6,r3,THREAD_FPSTATE
+2:	SAVE_32FPVSRS(0, R4, R6)
+	mffs	fr0
+	stfd	fr0,FPSTATE_FPSCR(r6)
+	REST_1FPVSR(0, R4, R6)
+	blr
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
new file mode 100644
index 0000000000..f8e2911478
--- /dev/null
+++ b/arch/powerpc/kernel/head_32.h
@@ -0,0 +1,222 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __HEAD_32_H__
+#define __HEAD_32_H__
+
+#include <asm/ptrace.h>	/* for STACK_FRAME_REGS_MARKER */
+
+/*
+ * Exception entry code.  This code runs with address translation
+ * turned off, i.e. using physical addresses.
+ * We assume sprg3 has the physical address of the current
+ * task's thread_struct.
+ */
+.macro EXCEPTION_PROLOG		trapno name handle_dar_dsisr=0
+	EXCEPTION_PROLOG_0	handle_dar_dsisr=\handle_dar_dsisr
+	EXCEPTION_PROLOG_1
+	EXCEPTION_PROLOG_2	\trapno \name handle_dar_dsisr=\handle_dar_dsisr
+.endm
+
+.macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0
+	mtspr	SPRN_SPRG_SCRATCH0,r10
+	mtspr	SPRN_SPRG_SCRATCH1,r11
+	mfspr	r10, SPRN_SPRG_THREAD
+	.if	\handle_dar_dsisr
+#ifdef CONFIG_40x
+	mfspr	r11, SPRN_DEAR
+#else
+	mfspr	r11, SPRN_DAR
+#endif
+	stw	r11, DAR(r10)
+#ifdef CONFIG_40x
+	mfspr	r11, SPRN_ESR
+#else
+	mfspr	r11, SPRN_DSISR
+#endif
+	stw	r11, DSISR(r10)
+	.endif
+	mfspr	r11, SPRN_SRR0
+	stw	r11, SRR0(r10)
+	mfspr	r11, SPRN_SRR1		/* check whether user or kernel */
+	stw	r11, SRR1(r10)
+	mfcr	r10
+	andi.	r11, r11, MSR_PR
+.endm
+
+.macro EXCEPTION_PROLOG_1
+	mtspr	SPRN_SPRG_SCRATCH2,r1
+	subi	r1, r1, INT_FRAME_SIZE		/* use r1 if kernel */
+	beq	1f
+	mfspr	r1,SPRN_SPRG_THREAD
+	lwz	r1,TASK_STACK-THREAD(r1)
+	addi	r1, r1, THREAD_SIZE - INT_FRAME_SIZE
+1:
+#ifdef CONFIG_VMAP_STACK
+	mtcrf	0x3f, r1
+	bt	32 - THREAD_ALIGN_SHIFT, vmap_stack_overflow
+#endif
+.endm
+
+.macro EXCEPTION_PROLOG_2 trapno name handle_dar_dsisr=0
+#ifdef CONFIG_PPC_8xx
+	.if	\handle_dar_dsisr
+	li	r11, RPN_PATTERN
+	mtspr	SPRN_DAR, r11	/* Tag DAR, to be used in DTLB Error */
+	.endif
+#endif
+	LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~MSR_RI) /* re-enable MMU */
+	mtspr	SPRN_SRR1, r11
+	lis	r11, 1f@h
+	ori	r11, r11, 1f@l
+	mtspr	SPRN_SRR0, r11
+	mfspr	r11, SPRN_SPRG_SCRATCH2
+	rfi
+
+	.text
+\name\()_virt:
+1:
+	stw	r11,GPR1(r1)
+	stw	r11,0(r1)
+	mr	r11, r1
+	stw	r10,_CCR(r11)		/* save registers */
+	stw	r12,GPR12(r11)
+	stw	r9,GPR9(r11)
+	mfspr	r10,SPRN_SPRG_SCRATCH0
+	mfspr	r12,SPRN_SPRG_SCRATCH1
+	stw	r10,GPR10(r11)
+	stw	r12,GPR11(r11)
+	mflr	r10
+	stw	r10,_LINK(r11)
+	mfspr	r12, SPRN_SPRG_THREAD
+	tovirt(r12, r12)
+	.if	\handle_dar_dsisr
+	lwz	r10, DAR(r12)
+	stw	r10, _DAR(r11)
+	lwz	r10, DSISR(r12)
+	stw	r10, _DSISR(r11)
+	.endif
+	lwz	r9, SRR1(r12)
+	lwz	r12, SRR0(r12)
+#ifdef CONFIG_40x
+	rlwinm	r9,r9,0,14,12		/* clear MSR_WE (necessary?) */
+#elif defined(CONFIG_PPC_8xx)
+	mtspr	SPRN_EID, r2		/* Set MSR_RI */
+#else
+	li	r10, MSR_KERNEL		/* can take exceptions */
+	mtmsr	r10			/* (except for mach check in rtas) */
+#endif
+	COMMON_EXCEPTION_PROLOG_END \trapno
+_ASM_NOKPROBE_SYMBOL(\name\()_virt)
+.endm
+
+.macro COMMON_EXCEPTION_PROLOG_END trapno
+	stw	r0,GPR0(r1)
+	lis	r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+	addi	r10,r10,STACK_FRAME_REGS_MARKER@l
+	stw	r10,STACK_INT_FRAME_MARKER(r1)
+	li	r10, \trapno
+	stw	r10,_TRAP(r1)
+	SAVE_GPRS(3, 8, r1)
+	SAVE_NVGPRS(r1)
+	stw	r2,GPR2(r1)
+	stw	r12,_NIP(r1)
+	stw	r9,_MSR(r1)
+	mfctr	r10
+	mfspr	r2,SPRN_SPRG_THREAD
+	stw	r10,_CTR(r1)
+	tovirt(r2, r2)
+	mfspr	r10,SPRN_XER
+	addi	r2, r2, -THREAD
+	stw	r10,_XER(r1)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+.endm
+
+.macro prepare_transfer_to_handler
+#ifdef CONFIG_PPC_BOOK3S_32
+	andi.	r12,r9,MSR_PR
+	bne	777f
+	bl	prepare_transfer_to_handler
+#ifdef CONFIG_PPC_KUEP
+	b	778f
+777:
+	bl	__kuep_lock
+778:
+#endif
+777:
+#endif
+.endm
+
+.macro SYSCALL_ENTRY trapno
+	mfspr	r9, SPRN_SRR1
+	mfspr	r12, SPRN_SRR0
+	LOAD_REG_IMMEDIATE(r11, MSR_KERNEL)		/* can take exceptions */
+	lis	r10, 1f@h
+	ori	r10, r10, 1f@l
+	mtspr	SPRN_SRR1, r11
+	mtspr	SPRN_SRR0, r10
+	mfspr	r10,SPRN_SPRG_THREAD
+	mr	r11, r1
+	lwz	r1,TASK_STACK-THREAD(r10)
+	tovirt(r10, r10)
+	addi	r1, r1, THREAD_SIZE - INT_FRAME_SIZE
+	rfi
+1:
+	stw	r12,_NIP(r1)
+	mfcr	r12
+	rlwinm	r12,r12,0,4,2	/* Clear SO bit in CR */
+	stw	r12,_CCR(r1)
+	b	transfer_to_syscall		/* jump to handler */
+.endm
+
+/*
+ * Note: code which follows this uses cr0.eq (set if from kernel),
+ * r11, r12 (SRR0), and r9 (SRR1).
+ *
+ * Note2: once we have set r1 we are in a position to take exceptions
+ * again, and we could thus set MSR:RI at that point.
+ */
+
+/*
+ * Exception vectors.
+ */
+#ifdef CONFIG_PPC_BOOK3S
+#define	START_EXCEPTION(n, label)		\
+	__HEAD;					\
+	. = n;					\
+	DO_KVM n;				\
+label:
+
+#else
+#define	START_EXCEPTION(n, label)		\
+	__HEAD;					\
+	. = n;					\
+label:
+
+#endif
+
+#define EXCEPTION(n, label, hdlr)		\
+	START_EXCEPTION(n, label)		\
+	EXCEPTION_PROLOG n label;		\
+	prepare_transfer_to_handler;		\
+	bl	hdlr;				\
+	b	interrupt_return
+
+.macro vmap_stack_overflow_exception
+	__HEAD
+vmap_stack_overflow:
+#ifdef CONFIG_SMP
+	mfspr	r1, SPRN_SPRG_THREAD
+	lwz	r1, TASK_CPU - THREAD(r1)
+	slwi	r1, r1, 3
+	addis	r1, r1, emergency_ctx-PAGE_OFFSET@ha
+#else
+	lis	r1, emergency_ctx-PAGE_OFFSET@ha
+#endif
+	lwz	r1, emergency_ctx-PAGE_OFFSET@l(r1)
+	addi	r1, r1, THREAD_SIZE - INT_FRAME_SIZE
+	EXCEPTION_PROLOG_2 0 vmap_stack_overflow
+	prepare_transfer_to_handler
+	bl	stack_overflow_exception
+	b	interrupt_return
+.endm
+
+#endif /* __HEAD_32_H__ */
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
new file mode 100644
index 0000000000..b32e7b2ebd
--- /dev/null
+++ b/arch/powerpc/kernel/head_40x.S
@@ -0,0 +1,720 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
+ *      Initial PowerPC version.
+ *    Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *      Rewritten for PReP
+ *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ *      Low-level exception handers, MMU support, and rewrite.
+ *    Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
+ *      PowerPC 8xx modifications.
+ *    Copyright (c) 1998-1999 TiVo, Inc.
+ *      PowerPC 403GCX modifications.
+ *    Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
+ *      PowerPC 403GCX/405GP modifications.
+ *    Copyright 2000 MontaVista Software Inc.
+ *	PPC405 modifications
+ *      PowerPC 403GCX/405GP modifications.
+ * 	Author: MontaVista Software, Inc.
+ *         	frank_rowand@mvista.com or source@mvista.com
+ * 	   	debbie_chu@mvista.com
+ *
+ *    Module name: head_4xx.S
+ *
+ *    Description:
+ *      Kernel execution entry point code.
+ */
+
+#include <linux/init.h>
+#include <linux/pgtable.h>
+#include <linux/sizes.h>
+#include <linux/linkage.h>
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+
+#include "head_32.h"
+
+/* As with the other PowerPC ports, it is expected that when code
+ * execution begins here, the following registers contain valid, yet
+ * optional, information:
+ *
+ *   r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.)
+ *   r4 - Starting address of the init RAM disk
+ *   r5 - Ending address of the init RAM disk
+ *   r6 - Start of kernel command line string (e.g. "mem=96m")
+ *   r7 - End of kernel command line string
+ *
+ * This is all going to change RSN when we add bi_recs.......  -- Dan
+ */
+	__HEAD
+_GLOBAL(_stext);
+_GLOBAL(_start);
+
+	mr	r31,r3			/* save device tree ptr */
+
+	/* We have to turn on the MMU right away so we get cache modes
+	 * set correctly.
+	 */
+	bl	initial_mmu
+
+/* We now have the lower 16 Meg mapped into TLB entries, and the caches
+ * ready to work.
+ */
+turn_on_mmu:
+	lis	r0,MSR_KERNEL@h
+	ori	r0,r0,MSR_KERNEL@l
+	mtspr	SPRN_SRR1,r0
+	lis	r0,start_here@h
+	ori	r0,r0,start_here@l
+	mtspr	SPRN_SRR0,r0
+	rfi				/* enables MMU */
+	b	.			/* prevent prefetch past rfi */
+
+/*
+ * This area is used for temporarily saving registers during the
+ * critical exception prolog.
+ */
+	. = 0xc0
+crit_save:
+_GLOBAL(crit_r10)
+	.space	4
+_GLOBAL(crit_r11)
+	.space	4
+_GLOBAL(crit_srr0)
+	.space	4
+_GLOBAL(crit_srr1)
+	.space	4
+_GLOBAL(crit_r1)
+	.space	4
+_GLOBAL(crit_dear)
+	.space	4
+_GLOBAL(crit_esr)
+	.space	4
+
+/*
+ * Exception prolog for critical exceptions.  This is a little different
+ * from the normal exception prolog above since a critical exception
+ * can potentially occur at any point during normal exception processing.
+ * Thus we cannot use the same SPRG registers as the normal prolog above.
+ * Instead we use a couple of words of memory at low physical addresses.
+ * This is OK since we don't support SMP on these processors.
+ */
+.macro CRITICAL_EXCEPTION_PROLOG trapno name
+	stw	r10,crit_r10@l(0)	/* save two registers to work with */
+	stw	r11,crit_r11@l(0)
+	mfspr	r10,SPRN_SRR0
+	mfspr	r11,SPRN_SRR1
+	stw	r10,crit_srr0@l(0)
+	stw	r11,crit_srr1@l(0)
+	mfspr	r10,SPRN_DEAR
+	mfspr	r11,SPRN_ESR
+	stw	r10,crit_dear@l(0)
+	stw	r11,crit_esr@l(0)
+	mfcr	r10			/* save CR in r10 for now	   */
+	mfspr	r11,SPRN_SRR3		/* check whether user or kernel    */
+	andi.	r11,r11,MSR_PR
+	lis	r11,(critirq_ctx-PAGE_OFFSET)@ha
+	lwz	r11,(critirq_ctx-PAGE_OFFSET)@l(r11)
+	beq	1f
+	/* COMING FROM USER MODE */
+	mfspr	r11,SPRN_SPRG_THREAD	/* if from user, start at top of   */
+	lwz	r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */
+1:	stw	r1,crit_r1@l(0)
+	addi	r1,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm  */
+	LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)) /* re-enable MMU */
+	mtspr	SPRN_SRR1, r11
+	lis	r11, 1f@h
+	ori	r11, r11, 1f@l
+	mtspr	SPRN_SRR0, r11
+	rfi
+
+	.text
+1:
+\name\()_virt:
+	lwz	r11,crit_r1@l(0)
+	stw	r11,GPR1(r1)
+	stw	r11,0(r1)
+	mr	r11,r1
+	stw	r10,_CCR(r11)		/* save various registers	   */
+	stw	r12,GPR12(r11)
+	stw	r9,GPR9(r11)
+	mflr	r10
+	stw	r10,_LINK(r11)
+	lis	r9,PAGE_OFFSET@ha
+	lwz	r10,crit_r10@l(r9)
+	lwz	r12,crit_r11@l(r9)
+	stw	r10,GPR10(r11)
+	stw	r12,GPR11(r11)
+	lwz	r12,crit_dear@l(r9)
+	lwz	r9,crit_esr@l(r9)
+	stw	r12,_DEAR(r11)		/* since they may have had stuff   */
+	stw	r9,_ESR(r11)		/* exception was taken		   */
+	mfspr	r12,SPRN_SRR2
+	mfspr	r9,SPRN_SRR3
+	rlwinm	r9,r9,0,14,12		/* clear MSR_WE (necessary?)	   */
+	COMMON_EXCEPTION_PROLOG_END \trapno + 2
+_ASM_NOKPROBE_SYMBOL(\name\()_virt)
+.endm
+
+	/*
+	 * State at this point:
+	 * r9 saved in stack frame, now saved SRR3 & ~MSR_WE
+	 * r10 saved in crit_r10 and in stack frame, trashed
+	 * r11 saved in crit_r11 and in stack frame,
+	 *	now phys stack/exception frame pointer
+	 * r12 saved in stack frame, now saved SRR2
+	 * CR saved in stack frame, CR0.EQ = !SRR3.PR
+	 * LR, DEAR, ESR in stack frame
+	 * r1 saved in stack frame, now virt stack/excframe pointer
+	 * r0, r3-r8 saved in stack frame
+	 */
+
+/*
+ * Exception vectors.
+ */
+#define CRITICAL_EXCEPTION(n, label, hdlr)			\
+	START_EXCEPTION(n, label);				\
+	CRITICAL_EXCEPTION_PROLOG n label;				\
+	prepare_transfer_to_handler;				\
+	bl	hdlr;						\
+	b	ret_from_crit_exc
+
+/*
+ * 0x0100 - Critical Interrupt Exception
+ */
+	CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, unknown_exception)
+
+/*
+ * 0x0200 - Machine Check Exception
+ */
+	CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
+
+/*
+ * 0x0300 - Data Storage Exception
+ * This happens for just a few reasons.  U0 set (but we don't do that),
+ * or zone protection fault (user violation, write to protected page).
+ * The other Data TLB exceptions bail out to this point
+ * if they can't resolve the lightweight TLB fault.
+ */
+	START_EXCEPTION(0x0300,	DataStorage)
+	EXCEPTION_PROLOG 0x300 DataStorage handle_dar_dsisr=1
+	prepare_transfer_to_handler
+	bl	do_page_fault
+	b	interrupt_return
+
+/*
+ * 0x0400 - Instruction Storage Exception
+ * This is caused by a fetch from non-execute or guarded pages.
+ */
+	START_EXCEPTION(0x0400, InstructionAccess)
+	EXCEPTION_PROLOG 0x400 InstructionAccess
+	li	r5,0
+	stw	r5, _ESR(r11)		/* Zero ESR */
+	stw	r12, _DEAR(r11)		/* SRR0 as DEAR */
+	prepare_transfer_to_handler
+	bl	do_page_fault
+	b	interrupt_return
+
+/* 0x0500 - External Interrupt Exception */
+	EXCEPTION(0x0500, HardwareInterrupt, do_IRQ)
+
+/* 0x0600 - Alignment Exception */
+	START_EXCEPTION(0x0600, Alignment)
+	EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1
+	prepare_transfer_to_handler
+	bl	alignment_exception
+	REST_NVGPRS(r1)
+	b	interrupt_return
+
+/* 0x0700 - Program Exception */
+	START_EXCEPTION(0x0700, ProgramCheck)
+	EXCEPTION_PROLOG 0x700 ProgramCheck handle_dar_dsisr=1
+	prepare_transfer_to_handler
+	bl	program_check_exception
+	REST_NVGPRS(r1)
+	b	interrupt_return
+
+	EXCEPTION(0x0800, Trap_08, unknown_exception)
+	EXCEPTION(0x0900, Trap_09, unknown_exception)
+	EXCEPTION(0x0A00, Trap_0A, unknown_exception)
+	EXCEPTION(0x0B00, Trap_0B, unknown_exception)
+
+/* 0x0C00 - System Call Exception */
+	START_EXCEPTION(0x0C00,	SystemCall)
+	SYSCALL_ENTRY	0xc00
+/*	Trap_0D is commented out to get more space for system call exception */
+
+/*	EXCEPTION(0x0D00, Trap_0D, unknown_exception) */
+	EXCEPTION(0x0E00, Trap_0E, unknown_exception)
+	EXCEPTION(0x0F00, Trap_0F, unknown_exception)
+
+/* 0x1000 - Programmable Interval Timer (PIT) Exception */
+	START_EXCEPTION(0x1000, DecrementerTrap)
+	b Decrementer
+
+/* 0x1010 - Fixed Interval Timer (FIT) Exception */
+	START_EXCEPTION(0x1010, FITExceptionTrap)
+	b FITException
+
+/* 0x1020 - Watchdog Timer (WDT) Exception */
+	START_EXCEPTION(0x1020, WDTExceptionTrap)
+	b WDTException
+
+/* 0x1100 - Data TLB Miss Exception
+ * As the name implies, translation is not in the MMU, so search the
+ * page tables and fix it.  The only purpose of this function is to
+ * load TLB entries from the page table if they exist.
+ */
+	START_EXCEPTION(0x1100,	DTLBMiss)
+	mtspr	SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */
+	mtspr	SPRN_SPRG_SCRATCH6, r11
+	mtspr	SPRN_SPRG_SCRATCH3, r12
+	mtspr	SPRN_SPRG_SCRATCH4, r9
+	mfcr	r12
+	mfspr	r9, SPRN_PID
+	rlwimi	r12, r9, 0, 0xff
+	mfspr	r10, SPRN_DEAR		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11, PAGE_OFFSET@h
+	cmplw	r10, r11
+	blt+	3f
+	lis	r11, swapper_pg_dir@h
+	ori	r11, r11, swapper_pg_dir@l
+	li	r9, 0
+	mtspr	SPRN_PID, r9		/* TLB will have 0 TID */
+	b	4f
+
+	/* Get the PGD for the current thread.
+	 */
+3:
+	mfspr	r11,SPRN_SPRG_THREAD
+	lwz	r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+	rlwinm.	r9, r9, 0, 0xff
+	beq	5f			/* Kuap fault */
+#endif
+4:
+	tophys(r11, r11)
+	rlwimi	r11, r10, 12, 20, 29	/* Create L1 (pgdir/pmd) address */
+	lwz	r11, 0(r11)		/* Get L1 entry */
+	andi.	r9, r11, _PMD_PRESENT	/* Check if it points to a PTE page */
+	beq	2f			/* Bail if no table */
+
+	rlwimi	r11, r10, 22, 20, 29	/* Compute PTE address */
+	lwz	r11, 0(r11)		/* Get Linux PTE */
+	li	r9, _PAGE_PRESENT | _PAGE_ACCESSED
+	andc.	r9, r9, r11		/* Check permission */
+	bne	5f
+
+	rlwinm	r9, r11, 1, _PAGE_RW	/* dirty => rw */
+	and	r9, r9, r11		/* hwwrite = dirty & rw */
+	rlwimi	r11, r9, 0, _PAGE_RW	/* replace rw by hwwrite */
+
+	/* Create TLB tag.  This is the faulting address plus a static
+	 * set of bits.  These are size, valid, E, U0.
+	*/
+	li	r9, 0x00c0
+	rlwimi	r10, r9, 0, 20, 31
+
+	b	finish_tlb_load
+
+2:	/* Check for possible large-page pmd entry */
+	rlwinm.	r9, r11, 2, 22, 24
+	beq	5f
+
+	/* Create TLB tag.  This is the faulting address, plus a static
+	 * set of bits (valid, E, U0) plus the size from the PMD.
+	 */
+	ori	r9, r9, 0x40
+	rlwimi	r10, r9, 0, 20, 31
+
+	b	finish_tlb_load
+
+5:
+	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+	mtspr	SPRN_PID, r12
+	mtcrf	0x80, r12
+	mfspr	r9, SPRN_SPRG_SCRATCH4
+	mfspr	r12, SPRN_SPRG_SCRATCH3
+	mfspr	r11, SPRN_SPRG_SCRATCH6
+	mfspr	r10, SPRN_SPRG_SCRATCH5
+	b	DataStorage
+
+/* 0x1200 - Instruction TLB Miss Exception
+ * Nearly the same as above, except we get our information from different
+ * registers and bailout to a different point.
+ */
+	START_EXCEPTION(0x1200,	ITLBMiss)
+	mtspr	SPRN_SPRG_SCRATCH5, r10	 /* Save some working registers */
+	mtspr	SPRN_SPRG_SCRATCH6, r11
+	mtspr	SPRN_SPRG_SCRATCH3, r12
+	mtspr	SPRN_SPRG_SCRATCH4, r9
+	mfcr	r12
+	mfspr	r9, SPRN_PID
+	rlwimi	r12, r9, 0, 0xff
+	mfspr	r10, SPRN_SRR0		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11, PAGE_OFFSET@h
+	cmplw	r10, r11
+	blt+	3f
+	lis	r11, swapper_pg_dir@h
+	ori	r11, r11, swapper_pg_dir@l
+	li	r9, 0
+	mtspr	SPRN_PID, r9		/* TLB will have 0 TID */
+	b	4f
+
+	/* Get the PGD for the current thread.
+	 */
+3:
+	mfspr	r11,SPRN_SPRG_THREAD
+	lwz	r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+	rlwinm.	r9, r9, 0, 0xff
+	beq	5f			/* Kuap fault */
+#endif
+4:
+	tophys(r11, r11)
+	rlwimi	r11, r10, 12, 20, 29	/* Create L1 (pgdir/pmd) address */
+	lwz	r11, 0(r11)		/* Get L1 entry */
+	andi.	r9, r11, _PMD_PRESENT	/* Check if it points to a PTE page */
+	beq	2f			/* Bail if no table */
+
+	rlwimi	r11, r10, 22, 20, 29	/* Compute PTE address */
+	lwz	r11, 0(r11)		/* Get Linux PTE */
+	li	r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+	andc.	r9, r9, r11		/* Check permission */
+	bne	5f
+
+	rlwinm	r9, r11, 1, _PAGE_RW	/* dirty => rw */
+	and	r9, r9, r11		/* hwwrite = dirty & rw */
+	rlwimi	r11, r9, 0, _PAGE_RW	/* replace rw by hwwrite */
+
+	/* Create TLB tag.  This is the faulting address plus a static
+	 * set of bits.  These are size, valid, E, U0.
+	*/
+	li	r9, 0x00c0
+	rlwimi	r10, r9, 0, 20, 31
+
+	b	finish_tlb_load
+
+2:	/* Check for possible large-page pmd entry */
+	rlwinm.	r9, r11, 2, 22, 24
+	beq	5f
+
+	/* Create TLB tag.  This is the faulting address, plus a static
+	 * set of bits (valid, E, U0) plus the size from the PMD.
+	 */
+	ori	r9, r9, 0x40
+	rlwimi	r10, r9, 0, 20, 31
+
+	b	finish_tlb_load
+
+5:
+	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+	mtspr	SPRN_PID, r12
+	mtcrf	0x80, r12
+	mfspr	r9, SPRN_SPRG_SCRATCH4
+	mfspr	r12, SPRN_SPRG_SCRATCH3
+	mfspr	r11, SPRN_SPRG_SCRATCH6
+	mfspr	r10, SPRN_SPRG_SCRATCH5
+	b	InstructionAccess
+
+	EXCEPTION(0x1300, Trap_13, unknown_exception)
+	EXCEPTION(0x1400, Trap_14, unknown_exception)
+	EXCEPTION(0x1500, Trap_15, unknown_exception)
+	EXCEPTION(0x1600, Trap_16, unknown_exception)
+	EXCEPTION(0x1700, Trap_17, unknown_exception)
+	EXCEPTION(0x1800, Trap_18, unknown_exception)
+	EXCEPTION(0x1900, Trap_19, unknown_exception)
+	EXCEPTION(0x1A00, Trap_1A, unknown_exception)
+	EXCEPTION(0x1B00, Trap_1B, unknown_exception)
+	EXCEPTION(0x1C00, Trap_1C, unknown_exception)
+	EXCEPTION(0x1D00, Trap_1D, unknown_exception)
+	EXCEPTION(0x1E00, Trap_1E, unknown_exception)
+	EXCEPTION(0x1F00, Trap_1F, unknown_exception)
+
+/* Check for a single step debug exception while in an exception
+ * handler before state has been saved.  This is to catch the case
+ * where an instruction that we are trying to single step causes
+ * an exception (eg ITLB/DTLB miss) and thus the first instruction of
+ * the exception handler generates a single step debug exception.
+ *
+ * If we get a debug trap on the first instruction of an exception handler,
+ * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is
+ * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR).
+ * The exception handler was handling a non-critical interrupt, so it will
+ * save (and later restore) the MSR via SPRN_SRR1, which will still have
+ * the MSR_DE bit set.
+ */
+	/* 0x2000 - Debug Exception */
+	START_EXCEPTION(0x2000, DebugTrap)
+	CRITICAL_EXCEPTION_PROLOG 0x2000 DebugTrap
+
+	/*
+	 * If this is a single step or branch-taken exception in an
+	 * exception entry sequence, it was probably meant to apply to
+	 * the code where the exception occurred (since exception entry
+	 * doesn't turn off DE automatically).  We simulate the effect
+	 * of turning off DE on entry to an exception handler by turning
+	 * off DE in the SRR3 value and clearing the debug status.
+	 */
+	mfspr	r10,SPRN_DBSR		/* check single-step/branch taken */
+	andis.	r10,r10,DBSR_IC@h
+	beq+	2f
+
+	andi.	r10,r9,MSR_IR|MSR_PR	/* check supervisor + MMU off */
+	beq	1f			/* branch and fix it up */
+
+	mfspr   r10,SPRN_SRR2		/* Faulting instruction address */
+	cmplwi  r10,0x2100
+	bgt+    2f			/* address above exception vectors */
+
+	/* here it looks like we got an inappropriate debug exception. */
+1:	rlwinm	r9,r9,0,~MSR_DE		/* clear DE in the SRR3 value */
+	lis	r10,DBSR_IC@h		/* clear the IC event */
+	mtspr	SPRN_DBSR,r10
+	/* restore state and get out */
+	lwz	r10,_CCR(r11)
+	lwz	r0,GPR0(r11)
+	lwz	r1,GPR1(r11)
+	mtcrf	0x80,r10
+	mtspr	SPRN_SRR2,r12
+	mtspr	SPRN_SRR3,r9
+	lwz	r9,GPR9(r11)
+	lwz	r12,GPR12(r11)
+	lwz	r10,crit_r10@l(0)
+	lwz	r11,crit_r11@l(0)
+	rfci
+	b	.
+
+	/* continue normal handling for a critical exception... */
+2:	mfspr	r4,SPRN_DBSR
+	stw	r4,_ESR(r11)		/* DebugException takes DBSR in _ESR */
+	prepare_transfer_to_handler
+	bl	DebugException
+	b	ret_from_crit_exc
+
+	/* Programmable Interval Timer (PIT) Exception. (from 0x1000) */
+	__HEAD
+Decrementer:
+	EXCEPTION_PROLOG 0x1000 Decrementer
+	lis	r0,TSR_PIS@h
+	mtspr	SPRN_TSR,r0		/* Clear the PIT exception */
+	prepare_transfer_to_handler
+	bl	timer_interrupt
+	b	interrupt_return
+
+	/* Fixed Interval Timer (FIT) Exception. (from 0x1010) */
+	__HEAD
+FITException:
+	EXCEPTION_PROLOG 0x1010 FITException
+	prepare_transfer_to_handler
+	bl	unknown_exception
+	b	interrupt_return
+
+	/* Watchdog Timer (WDT) Exception. (from 0x1020) */
+	__HEAD
+WDTException:
+	CRITICAL_EXCEPTION_PROLOG 0x1020 WDTException
+	prepare_transfer_to_handler
+	bl	WatchdogException
+	b	ret_from_crit_exc
+
+/* Other PowerPC processors, namely those derived from the 6xx-series
+ * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved.
+ * However, for the 4xx-series processors these are neither defined nor
+ * reserved.
+ */
+
+	__HEAD
+	/* Damn, I came up one instruction too many to fit into the
+	 * exception space :-).  Both the instruction and data TLB
+	 * miss get to this point to load the TLB.
+	 * 	r10 - TLB_TAG value
+	 * 	r11 - Linux PTE
+	 *	r9 - available to use
+	 *	PID - loaded with proper value when we get here
+	 *	Upon exit, we reload everything and RFI.
+	 * Actually, it will fit now, but oh well.....a common place
+	 * to load the TLB.
+	 */
+tlb_4xx_index:
+	.long	0
+finish_tlb_load:
+	/*
+	 * Clear out the software-only bits in the PTE to generate the
+	 * TLB_DATA value.  These are the bottom 2 bits of the RPM, the
+	 * top 3 bits of the zone field, and M.
+	 */
+	li	r9, 0x0ce2
+	andc	r11, r11, r9
+
+	/* load the next available TLB index. */
+	lwz	r9, tlb_4xx_index@l(0)
+	addi	r9, r9, 1
+	andi.	r9, r9, PPC40X_TLB_SIZE - 1
+	stw	r9, tlb_4xx_index@l(0)
+
+	tlbwe	r11, r9, TLB_DATA		/* Load TLB LO */
+	tlbwe	r10, r9, TLB_TAG		/* Load TLB HI */
+
+	/* Done...restore registers and get out of here.
+	*/
+	mtspr	SPRN_PID, r12
+	mtcrf	0x80, r12
+	mfspr	r9, SPRN_SPRG_SCRATCH4
+	mfspr	r12, SPRN_SPRG_SCRATCH3
+	mfspr	r11, SPRN_SPRG_SCRATCH6
+	mfspr	r10, SPRN_SPRG_SCRATCH5
+	rfi			/* Should sync shadow TLBs */
+	b	.		/* prevent prefetch past rfi */
+
+/* This is where the main kernel code starts.
+ */
+start_here:
+
+	/* ptr to current */
+	lis	r2,init_task@h
+	ori	r2,r2,init_task@l
+
+	/* ptr to phys current thread */
+	tophys(r4,r2)
+	addi	r4,r4,THREAD	/* init task's THREAD */
+	mtspr	SPRN_SPRG_THREAD,r4
+
+	/* stack */
+	lis	r1,init_thread_union@ha
+	addi	r1,r1,init_thread_union@l
+	li	r0,0
+	stwu	r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
+
+	bl	early_init	/* We have to do this with MMU on */
+
+/*
+ * Decide what sort of machine this is and initialize the MMU.
+ */
+#ifdef CONFIG_KASAN
+	bl	kasan_early_init
+#endif
+	li	r3,0
+	mr	r4,r31
+	bl	machine_init
+	bl	MMU_init
+
+/* Go back to running unmapped so we can load up new values
+ * and change to using our exception vectors.
+ * On the 4xx, all we have to do is invalidate the TLB to clear
+ * the old 16M byte TLB mappings.
+ */
+	lis	r4,2f@h
+	ori	r4,r4,2f@l
+	tophys(r4,r4)
+	lis	r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@h
+	ori	r3,r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@l
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	rfi
+	b	.		/* prevent prefetch past rfi */
+
+/* Load up the kernel context */
+2:
+	sync			/* Flush to memory before changing TLB */
+	tlbia
+	isync			/* Flush shadow TLBs */
+
+	/* set up the PTE pointers for the Abatron bdiGDB.
+	*/
+	lis	r6, swapper_pg_dir@h
+	ori	r6, r6, swapper_pg_dir@l
+	lis	r5, abatron_pteptrs@h
+	ori	r5, r5, abatron_pteptrs@l
+	stw	r5, 0xf0(0)	/* Must match your Abatron config file */
+	tophys(r5,r5)
+	stw	r6, 0(r5)
+
+/* Now turn on the MMU for real! */
+	lis	r4,MSR_KERNEL@h
+	ori	r4,r4,MSR_KERNEL@l
+	lis	r3,start_kernel@h
+	ori	r3,r3,start_kernel@l
+	mtspr	SPRN_SRR0,r3
+	mtspr	SPRN_SRR1,r4
+	rfi			/* enable MMU and jump to start_kernel */
+	b	.		/* prevent prefetch past rfi */
+
+/* Set up the initial MMU state so we can do the first level of
+ * kernel initialization.  This maps the first 32 MBytes of memory 1:1
+ * virtual to physical and more importantly sets the cache mode.
+ */
+SYM_FUNC_START_LOCAL(initial_mmu)
+	tlbia			/* Invalidate all TLB entries */
+	isync
+
+	/* We should still be executing code at physical address 0x0000xxxx
+	 * at this point. However, start_here is at virtual address
+	 * 0xC000xxxx. So, set up a TLB mapping to cover this once
+	 * translation is enabled.
+	 */
+
+	lis	r3,KERNELBASE@h		/* Load the kernel virtual address */
+	ori	r3,r3,KERNELBASE@l
+	tophys(r4,r3)			/* Load the kernel physical address */
+
+	iccci	r0,r3			/* Invalidate the i-cache before use */
+
+	/* Load the kernel PID.
+	*/
+	li	r0,0
+	mtspr	SPRN_PID,r0
+	sync
+
+	/* Configure and load one entry into TLB slots 63 */
+	clrrwi	r4,r4,10		/* Mask off the real page number */
+	ori	r4,r4,(TLB_WR | TLB_EX)	/* Set the write and execute bits */
+
+	clrrwi	r3,r3,10		/* Mask off the effective page number */
+	ori	r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_16M))
+
+        li      r0,63                    /* TLB slot 63 */
+
+	tlbwe	r4,r0,TLB_DATA		/* Load the data portion of the entry */
+	tlbwe	r3,r0,TLB_TAG		/* Load the tag portion of the entry */
+
+	li	r0,62			/* TLB slot 62 */
+	addis	r4,r4,SZ_16M@h
+	addis	r3,r3,SZ_16M@h
+	tlbwe	r4,r0,TLB_DATA		/* Load the data portion of the entry */
+	tlbwe	r3,r0,TLB_TAG		/* Load the tag portion of the entry */
+
+	isync
+
+	/* Establish the exception vector base
+	*/
+	lis	r4,KERNELBASE@h		/* EVPR only uses the high 16-bits */
+	tophys(r0,r4)			/* Use the physical address */
+	mtspr	SPRN_EVPR,r0
+
+	blr
+SYM_FUNC_END(initial_mmu)
+
+_GLOBAL(abort)
+        mfspr   r13,SPRN_DBCR0
+        oris    r13,r13,DBCR0_RST_SYSTEM@h
+        mtspr   SPRN_DBCR0,r13
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
new file mode 100644
index 0000000000..a3197c9f72
--- /dev/null
+++ b/arch/powerpc/kernel/head_44x.S
@@ -0,0 +1,1250 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Kernel execution entry point code.
+ *
+ *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
+ *      Initial PowerPC version.
+ *    Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *      Rewritten for PReP
+ *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ *      Low-level exception handers, MMU support, and rewrite.
+ *    Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
+ *      PowerPC 8xx modifications.
+ *    Copyright (c) 1998-1999 TiVo, Inc.
+ *      PowerPC 403GCX modifications.
+ *    Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
+ *      PowerPC 403GCX/405GP modifications.
+ *    Copyright 2000 MontaVista Software Inc.
+ *	PPC405 modifications
+ *      PowerPC 403GCX/405GP modifications.
+ * 	Author: MontaVista Software, Inc.
+ *         	frank_rowand@mvista.com or source@mvista.com
+ * 	   	debbie_chu@mvista.com
+ *    Copyright 2002-2005 MontaVista Software, Inc.
+ *      PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
+ */
+
+#include <linux/init.h>
+#include <linux/pgtable.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+#include <asm/synch.h>
+#include <asm/code-patching-asm.h>
+#include "head_booke.h"
+
+
+/* As with the other PowerPC ports, it is expected that when code
+ * execution begins here, the following registers contain valid, yet
+ * optional, information:
+ *
+ *   r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.)
+ *   r4 - Starting address of the init RAM disk
+ *   r5 - Ending address of the init RAM disk
+ *   r6 - Start of kernel command line string (e.g. "mem=128")
+ *   r7 - End of kernel command line string
+ *
+ */
+	__HEAD
+_GLOBAL(_stext);
+_GLOBAL(_start);
+	/*
+	 * Reserve a word at a fixed location to store the address
+	 * of abatron_pteptrs
+	 */
+	nop
+	mr	r31,r3		/* save device tree ptr */
+	li	r24,0		/* CPU number */
+
+#ifdef CONFIG_RELOCATABLE
+/*
+ * Relocate ourselves to the current runtime address.
+ * This is called only by the Boot CPU.
+ * "relocate" is called with our current runtime virutal
+ * address.
+ * r21 will be loaded with the physical runtime address of _stext
+ */
+	bcl	20,31,$+4			/* Get our runtime address */
+0:	mflr	r21				/* Make it accessible */
+	addis	r21,r21,(_stext - 0b)@ha
+	addi	r21,r21,(_stext - 0b)@l 	/* Get our current runtime base */
+
+	/*
+	 * We have the runtime (virutal) address of our base.
+	 * We calculate our shift of offset from a 256M page.
+	 * We could map the 256M page we belong to at PAGE_OFFSET and
+	 * get going from there.
+	 */
+	lis	r4,KERNELBASE@h
+	ori	r4,r4,KERNELBASE@l
+	rlwinm	r6,r21,0,4,31			/* r6 = PHYS_START % 256M */
+	rlwinm	r5,r4,0,4,31			/* r5 = KERNELBASE % 256M */
+	subf	r3,r5,r6			/* r3 = r6 - r5 */
+	add	r3,r4,r3			/* Required Virutal Address */
+
+	bl	relocate
+#endif
+
+	bl	init_cpu_state
+
+	/*
+	 * This is where the main kernel code starts.
+	 */
+
+	/* ptr to current */
+	lis	r2,init_task@h
+	ori	r2,r2,init_task@l
+
+	/* ptr to current thread */
+	addi	r4,r2,THREAD	/* init task's THREAD */
+	mtspr	SPRN_SPRG_THREAD,r4
+
+	/* stack */
+	lis	r1,init_thread_union@h
+	ori	r1,r1,init_thread_union@l
+	li	r0,0
+	stwu	r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
+
+	bl	early_init
+
+#ifdef CONFIG_RELOCATABLE
+	/*
+	 * Relocatable kernel support based on processing of dynamic
+	 * relocation entries.
+	 *
+	 * r25 will contain RPN/ERPN for the start address of memory
+	 * r21 will contain the current offset of _stext
+	 */
+	lis	r3,kernstart_addr@ha
+	la	r3,kernstart_addr@l(r3)
+
+	/*
+	 * Compute the kernstart_addr.
+	 * kernstart_addr => (r6,r8)
+	 * kernstart_addr & ~0xfffffff => (r6,r7)
+	 */
+	rlwinm	r6,r25,0,28,31	/* ERPN. Bits 32-35 of Address */
+	rlwinm	r7,r25,0,0,3	/* RPN - assuming 256 MB page size */
+	rlwinm	r8,r21,0,4,31	/* r8 = (_stext & 0xfffffff) */
+	or	r8,r7,r8	/* Compute the lower 32bit of kernstart_addr */
+
+	/* Store kernstart_addr */
+	stw	r6,0(r3)	/* higher 32bit */
+	stw	r8,4(r3)	/* lower 32bit  */
+
+	/*
+	 * Compute the virt_phys_offset :
+	 * virt_phys_offset = stext.run - kernstart_addr
+	 *
+	 * stext.run = (KERNELBASE & ~0xfffffff) + (kernstart_addr & 0xfffffff)
+	 * When we relocate, we have :
+	 *
+	 *	(kernstart_addr & 0xfffffff) = (stext.run & 0xfffffff)
+	 *
+	 * hence:
+	 *  virt_phys_offset = (KERNELBASE & ~0xfffffff) - (kernstart_addr & ~0xfffffff)
+	 *
+	 */
+
+	/* KERNELBASE&~0xfffffff => (r4,r5) */
+	li	r4, 0		/* higer 32bit */
+	lis	r5,KERNELBASE@h
+	rlwinm	r5,r5,0,0,3	/* Align to 256M, lower 32bit */
+
+	/*
+	 * 64bit subtraction.
+	 */
+	subfc	r5,r7,r5
+	subfe	r4,r6,r4
+
+	/* Store virt_phys_offset */
+	lis	r3,virt_phys_offset@ha
+	la	r3,virt_phys_offset@l(r3)
+
+	stw	r4,0(r3)
+	stw	r5,4(r3)
+
+#elif defined(CONFIG_DYNAMIC_MEMSTART)
+	/*
+	 * Mapping based, page aligned dynamic kernel loading.
+	 *
+	 * r25 will contain RPN/ERPN for the start address of memory
+	 *
+	 * Add the difference between KERNELBASE and PAGE_OFFSET to the
+	 * start of physical memory to get kernstart_addr.
+	 */
+	lis	r3,kernstart_addr@ha
+	la	r3,kernstart_addr@l(r3)
+
+	lis	r4,KERNELBASE@h
+	ori	r4,r4,KERNELBASE@l
+	lis	r5,PAGE_OFFSET@h
+	ori	r5,r5,PAGE_OFFSET@l
+	subf	r4,r5,r4
+
+	rlwinm	r6,r25,0,28,31	/* ERPN */
+	rlwinm	r7,r25,0,0,3	/* RPN - assuming 256 MB page size */
+	add	r7,r7,r4
+
+	stw	r6,0(r3)
+	stw	r7,4(r3)
+#endif
+
+/*
+ * Decide what sort of machine this is and initialize the MMU.
+ */
+#ifdef CONFIG_KASAN
+	bl	kasan_early_init
+#endif
+	li	r3,0
+	mr	r4,r31
+	bl	machine_init
+	bl	MMU_init
+
+	/* Setup PTE pointers for the Abatron bdiGDB */
+	lis	r6, swapper_pg_dir@h
+	ori	r6, r6, swapper_pg_dir@l
+	lis	r5, abatron_pteptrs@h
+	ori	r5, r5, abatron_pteptrs@l
+	lis	r4, KERNELBASE@h
+	ori	r4, r4, KERNELBASE@l
+	stw	r5, 0(r4)	/* Save abatron_pteptrs at a fixed location */
+	stw	r6, 0(r5)
+
+	/* Clear the Machine Check Syndrome Register */
+	li	r0,0
+	mtspr	SPRN_MCSR,r0
+
+	/* Let's move on */
+	lis	r4,start_kernel@h
+	ori	r4,r4,start_kernel@l
+	lis	r3,MSR_KERNEL@h
+	ori	r3,r3,MSR_KERNEL@l
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	rfi			/* change context and jump to start_kernel */
+
+/*
+ * Interrupt vector entry code
+ *
+ * The Book E MMUs are always on so we don't need to handle
+ * interrupts in real mode as with previous PPC processors. In
+ * this case we handle interrupts in the kernel virtual address
+ * space.
+ *
+ * Interrupt vectors are dynamically placed relative to the
+ * interrupt prefix as determined by the address of interrupt_base.
+ * The interrupt vectors offsets are programmed using the labels
+ * for each interrupt vector entry.
+ *
+ * Interrupt vectors must be aligned on a 16 byte boundary.
+ * We align on a 32 byte cache line boundary for good measure.
+ */
+
+interrupt_base:
+	/* Critical Input Interrupt */
+	CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
+
+	/* Machine Check Interrupt */
+	CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \
+			   machine_check_exception)
+	MCHECK_EXCEPTION(0x0210, MachineCheckA, machine_check_exception)
+
+	/* Data Storage Interrupt */
+	DATA_STORAGE_EXCEPTION
+
+		/* Instruction Storage Interrupt */
+	INSTRUCTION_STORAGE_EXCEPTION
+
+	/* External Input Interrupt */
+	EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, do_IRQ)
+
+	/* Alignment Interrupt */
+	ALIGNMENT_EXCEPTION
+
+	/* Program Interrupt */
+	PROGRAM_EXCEPTION
+
+	/* Floating Point Unavailable Interrupt */
+#ifdef CONFIG_PPC_FPU
+	FP_UNAVAILABLE_EXCEPTION
+#else
+	EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \
+		  FloatingPointUnavailable, unknown_exception)
+#endif
+	/* System Call Interrupt */
+	START_EXCEPTION(SystemCall)
+	SYSCALL_ENTRY   0xc00 BOOKE_INTERRUPT_SYSCALL
+
+	/* Auxiliary Processor Unavailable Interrupt */
+	EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
+		  AuxillaryProcessorUnavailable, unknown_exception)
+
+	/* Decrementer Interrupt */
+	DECREMENTER_EXCEPTION
+
+	/* Fixed Internal Timer Interrupt */
+	/* TODO: Add FIT support */
+	EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, unknown_exception)
+
+	/* Watchdog Timer Interrupt */
+	/* TODO: Add watchdog support */
+#ifdef CONFIG_BOOKE_WDT
+	CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, WatchdogException)
+#else
+	CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, unknown_exception)
+#endif
+
+	/* Data TLB Error Interrupt */
+	START_EXCEPTION(DataTLBError44x)
+	mtspr	SPRN_SPRG_WSCRATCH0, r10		/* Save some working registers */
+	mtspr	SPRN_SPRG_WSCRATCH1, r11
+	mtspr	SPRN_SPRG_WSCRATCH2, r12
+	mtspr	SPRN_SPRG_WSCRATCH3, r13
+	mfcr	r11
+	mtspr	SPRN_SPRG_WSCRATCH4, r11
+	mfspr	r10, SPRN_DEAR		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11, PAGE_OFFSET@h
+	cmplw	r10, r11
+	blt+	3f
+	lis	r11, swapper_pg_dir@h
+	ori	r11, r11, swapper_pg_dir@l
+
+	mfspr	r12,SPRN_MMUCR
+	rlwinm	r12,r12,0,0,23		/* Clear TID */
+
+	b	4f
+
+	/* Get the PGD for the current thread */
+3:
+	mfspr	r11,SPRN_SPRG_THREAD
+	lwz	r11,PGDIR(r11)
+
+	/* Load PID into MMUCR TID */
+	mfspr	r12,SPRN_MMUCR
+	mfspr   r13,SPRN_PID		/* Get PID */
+	rlwimi	r12,r13,0,24,31		/* Set TID */
+#ifdef CONFIG_PPC_KUAP
+	cmpwi	r13,0
+	beq	2f			/* KUAP Fault */
+#endif
+
+4:
+	mtspr	SPRN_MMUCR,r12
+
+	/* Mask of required permission bits. Note that while we
+	 * do copy ESR:ST to _PAGE_RW position as trying to write
+	 * to an RO page is pretty common, we don't do it with
+	 * _PAGE_DIRTY. We could do it, but it's a fairly rare
+	 * event so I'd rather take the overhead when it happens
+	 * rather than adding an instruction here. We should measure
+	 * whether the whole thing is worth it in the first place
+	 * as we could avoid loading SPRN_ESR completely in the first
+	 * place...
+	 *
+	 * TODO: Is it worth doing that mfspr & rlwimi in the first
+	 *       place or can we save a couple of instructions here ?
+	 */
+	mfspr	r12,SPRN_ESR
+	li	r13,_PAGE_PRESENT|_PAGE_ACCESSED
+	rlwimi	r13,r12,10,30,30
+
+	/* Load the PTE */
+	/* Compute pgdir/pmd offset */
+	rlwinm  r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29
+	lwzx	r11, r12, r11		/* Get pgd/pmd entry */
+	rlwinm.	r12, r11, 0, 0, 20	/* Extract pt base address */
+	beq	2f			/* Bail if no table */
+
+	/* Compute pte address */
+	rlwimi  r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK_BIT, 28
+	lwz	r11, 0(r12)		/* Get high word of pte entry */
+	lwz	r12, 4(r12)		/* Get low word of pte entry */
+
+	lis	r10,tlb_44x_index@ha
+
+	andc.	r13,r13,r12		/* Check permission */
+
+	/* Load the next available TLB index */
+	lwz	r13,tlb_44x_index@l(r10)
+
+	bne	2f			/* Bail if permission mismatch */
+
+	/* Increment, rollover, and store TLB index */
+	addi	r13,r13,1
+
+	patch_site 0f, patch__tlb_44x_hwater_D
+	/* Compare with watermark (instruction gets patched) */
+0:	cmpwi	0,r13,1			/* reserve entries */
+	ble	5f
+	li	r13,0
+5:
+	/* Store the next available TLB index */
+	stw	r13,tlb_44x_index@l(r10)
+
+	/* Re-load the faulting address */
+	mfspr	r10,SPRN_DEAR
+
+	 /* Jump to common tlb load */
+	b	finish_tlb_load_44x
+
+2:
+	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+	mfspr	r11, SPRN_SPRG_RSCRATCH4
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG_RSCRATCH3
+	mfspr	r12, SPRN_SPRG_RSCRATCH2
+	mfspr	r11, SPRN_SPRG_RSCRATCH1
+	mfspr	r10, SPRN_SPRG_RSCRATCH0
+	b	DataStorage
+
+	/* Instruction TLB Error Interrupt */
+	/*
+	 * Nearly the same as above, except we get our
+	 * information from different registers and bailout
+	 * to a different point.
+	 */
+	START_EXCEPTION(InstructionTLBError44x)
+	mtspr	SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
+	mtspr	SPRN_SPRG_WSCRATCH1, r11
+	mtspr	SPRN_SPRG_WSCRATCH2, r12
+	mtspr	SPRN_SPRG_WSCRATCH3, r13
+	mfcr	r11
+	mtspr	SPRN_SPRG_WSCRATCH4, r11
+	mfspr	r10, SPRN_SRR0		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11, PAGE_OFFSET@h
+	cmplw	r10, r11
+	blt+	3f
+	lis	r11, swapper_pg_dir@h
+	ori	r11, r11, swapper_pg_dir@l
+
+	mfspr	r12,SPRN_MMUCR
+	rlwinm	r12,r12,0,0,23		/* Clear TID */
+
+	b	4f
+
+	/* Get the PGD for the current thread */
+3:
+	mfspr	r11,SPRN_SPRG_THREAD
+	lwz	r11,PGDIR(r11)
+
+	/* Load PID into MMUCR TID */
+	mfspr	r12,SPRN_MMUCR
+	mfspr   r13,SPRN_PID		/* Get PID */
+	rlwimi	r12,r13,0,24,31		/* Set TID */
+#ifdef CONFIG_PPC_KUAP
+	cmpwi	r13,0
+	beq	2f			/* KUAP Fault */
+#endif
+
+4:
+	mtspr	SPRN_MMUCR,r12
+
+	/* Make up the required permissions */
+	li	r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+
+	/* Compute pgdir/pmd offset */
+	rlwinm 	r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29
+	lwzx	r11, r12, r11		/* Get pgd/pmd entry */
+	rlwinm.	r12, r11, 0, 0, 20	/* Extract pt base address */
+	beq	2f			/* Bail if no table */
+
+	/* Compute pte address */
+	rlwimi	r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK_BIT, 28
+	lwz	r11, 0(r12)		/* Get high word of pte entry */
+	lwz	r12, 4(r12)		/* Get low word of pte entry */
+
+	lis	r10,tlb_44x_index@ha
+
+	andc.	r13,r13,r12		/* Check permission */
+
+	/* Load the next available TLB index */
+	lwz	r13,tlb_44x_index@l(r10)
+
+	bne	2f			/* Bail if permission mismatch */
+
+	/* Increment, rollover, and store TLB index */
+	addi	r13,r13,1
+
+	patch_site 0f, patch__tlb_44x_hwater_I
+	/* Compare with watermark (instruction gets patched) */
+0:	cmpwi	0,r13,1			/* reserve entries */
+	ble	5f
+	li	r13,0
+5:
+	/* Store the next available TLB index */
+	stw	r13,tlb_44x_index@l(r10)
+
+	/* Re-load the faulting address */
+	mfspr	r10,SPRN_SRR0
+
+	/* Jump to common TLB load point */
+	b	finish_tlb_load_44x
+
+2:
+	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+	mfspr	r11, SPRN_SPRG_RSCRATCH4
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG_RSCRATCH3
+	mfspr	r12, SPRN_SPRG_RSCRATCH2
+	mfspr	r11, SPRN_SPRG_RSCRATCH1
+	mfspr	r10, SPRN_SPRG_RSCRATCH0
+	b	InstructionStorage
+
+/*
+ * Both the instruction and data TLB miss get to this
+ * point to load the TLB.
+ * 	r10 - EA of fault
+ * 	r11 - PTE high word value
+ *	r12 - PTE low word value
+ *	r13 - TLB index
+ *	MMUCR - loaded with proper value when we get here
+ *	Upon exit, we reload everything and RFI.
+ */
+finish_tlb_load_44x:
+	/* Combine RPN & ERPN an write WS 0 */
+	rlwimi	r11,r12,0,0,31-PAGE_SHIFT
+	tlbwe	r11,r13,PPC44x_TLB_XLAT
+
+	/*
+	 * Create WS1. This is the faulting address (EPN),
+	 * page size, and valid flag.
+	 */
+	li	r11,PPC44x_TLB_VALID | PPC44x_TLBE_SIZE
+	/* Insert valid and page size */
+	rlwimi	r10,r11,0,PPC44x_PTE_ADD_MASK_BIT,31
+	tlbwe	r10,r13,PPC44x_TLB_PAGEID	/* Write PAGEID */
+
+	/* And WS 2 */
+	li	r10,0xf85			/* Mask to apply from PTE */
+	rlwimi	r10,r12,29,30,30		/* DIRTY -> SW position */
+	and	r11,r12,r10			/* Mask PTE bits to keep */
+	andi.	r10,r12,_PAGE_USER		/* User page ? */
+	beq	1f				/* nope, leave U bits empty */
+	rlwimi	r11,r11,3,26,28			/* yes, copy S bits to U */
+	rlwinm	r11,r11,0,~PPC44x_TLB_SX	/* Clear SX if User page */
+1:	tlbwe	r11,r13,PPC44x_TLB_ATTRIB	/* Write ATTRIB */
+
+	/* Done...restore registers and get out of here.
+	*/
+	mfspr	r11, SPRN_SPRG_RSCRATCH4
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG_RSCRATCH3
+	mfspr	r12, SPRN_SPRG_RSCRATCH2
+	mfspr	r11, SPRN_SPRG_RSCRATCH1
+	mfspr	r10, SPRN_SPRG_RSCRATCH0
+	rfi					/* Force context change */
+
+/* TLB error interrupts for 476
+ */
+#ifdef CONFIG_PPC_47x
+	START_EXCEPTION(DataTLBError47x)
+	mtspr	SPRN_SPRG_WSCRATCH0,r10	/* Save some working registers */
+	mtspr	SPRN_SPRG_WSCRATCH1,r11
+	mtspr	SPRN_SPRG_WSCRATCH2,r12
+	mtspr	SPRN_SPRG_WSCRATCH3,r13
+	mfcr	r11
+	mtspr	SPRN_SPRG_WSCRATCH4,r11
+	mfspr	r10,SPRN_DEAR		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11,PAGE_OFFSET@h
+	cmplw	cr0,r10,r11
+	blt+	3f
+	lis	r11,swapper_pg_dir@h
+	ori	r11,r11, swapper_pg_dir@l
+	li	r12,0			/* MMUCR = 0 */
+	b	4f
+
+	/* Get the PGD for the current thread and setup MMUCR */
+3:	mfspr	r11,SPRN_SPRG3
+	lwz	r11,PGDIR(r11)
+	mfspr   r12,SPRN_PID		/* Get PID */
+#ifdef CONFIG_PPC_KUAP
+	cmpwi	r12,0
+	beq	2f			/* KUAP Fault */
+#endif
+4:	mtspr	SPRN_MMUCR,r12		/* Set MMUCR */
+
+	/* Mask of required permission bits. Note that while we
+	 * do copy ESR:ST to _PAGE_RW position as trying to write
+	 * to an RO page is pretty common, we don't do it with
+	 * _PAGE_DIRTY. We could do it, but it's a fairly rare
+	 * event so I'd rather take the overhead when it happens
+	 * rather than adding an instruction here. We should measure
+	 * whether the whole thing is worth it in the first place
+	 * as we could avoid loading SPRN_ESR completely in the first
+	 * place...
+	 *
+	 * TODO: Is it worth doing that mfspr & rlwimi in the first
+	 *       place or can we save a couple of instructions here ?
+	 */
+	mfspr	r12,SPRN_ESR
+	li	r13,_PAGE_PRESENT|_PAGE_ACCESSED
+	rlwimi	r13,r12,10,30,30
+
+	/* Load the PTE */
+	/* Compute pgdir/pmd offset */
+	rlwinm  r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29
+	lwzx	r11,r12,r11		/* Get pgd/pmd entry */
+
+	/* Word 0 is EPN,V,TS,DSIZ */
+	li	r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE
+	rlwimi	r10,r12,0,32-PAGE_SHIFT,31	/* Insert valid and page size*/
+	li	r12,0
+	tlbwe	r10,r12,0
+
+	/* XXX can we do better ? Need to make sure tlbwe has established
+	 * latch V bit in MMUCR0 before the PTE is loaded further down */
+#ifdef CONFIG_SMP
+	isync
+#endif
+
+	rlwinm.	r12,r11,0,0,20		/* Extract pt base address */
+	/* Compute pte address */
+	rlwimi  r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28
+	beq	2f			/* Bail if no table */
+	lwz	r11,0(r12)		/* Get high word of pte entry */
+
+	/* XXX can we do better ? maybe insert a known 0 bit from r11 into the
+	 * bottom of r12 to create a data dependency... We can also use r10
+	 * as destination nowadays
+	 */
+#ifdef CONFIG_SMP
+	lwsync
+#endif
+	lwz	r12,4(r12)		/* Get low word of pte entry */
+
+	andc.	r13,r13,r12		/* Check permission */
+
+	 /* Jump to common tlb load */
+	beq	finish_tlb_load_47x
+
+2:	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+	mfspr	r11,SPRN_SPRG_RSCRATCH4
+	mtcr	r11
+	mfspr	r13,SPRN_SPRG_RSCRATCH3
+	mfspr	r12,SPRN_SPRG_RSCRATCH2
+	mfspr	r11,SPRN_SPRG_RSCRATCH1
+	mfspr	r10,SPRN_SPRG_RSCRATCH0
+	b	DataStorage
+
+	/* Instruction TLB Error Interrupt */
+	/*
+	 * Nearly the same as above, except we get our
+	 * information from different registers and bailout
+	 * to a different point.
+	 */
+	START_EXCEPTION(InstructionTLBError47x)
+	mtspr	SPRN_SPRG_WSCRATCH0,r10	/* Save some working registers */
+	mtspr	SPRN_SPRG_WSCRATCH1,r11
+	mtspr	SPRN_SPRG_WSCRATCH2,r12
+	mtspr	SPRN_SPRG_WSCRATCH3,r13
+	mfcr	r11
+	mtspr	SPRN_SPRG_WSCRATCH4,r11
+	mfspr	r10,SPRN_SRR0		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11,PAGE_OFFSET@h
+	cmplw	cr0,r10,r11
+	blt+	3f
+	lis	r11,swapper_pg_dir@h
+	ori	r11,r11, swapper_pg_dir@l
+	li	r12,0			/* MMUCR = 0 */
+	b	4f
+
+	/* Get the PGD for the current thread and setup MMUCR */
+3:	mfspr	r11,SPRN_SPRG_THREAD
+	lwz	r11,PGDIR(r11)
+	mfspr   r12,SPRN_PID		/* Get PID */
+#ifdef CONFIG_PPC_KUAP
+	cmpwi	r12,0
+	beq	2f			/* KUAP Fault */
+#endif
+4:	mtspr	SPRN_MMUCR,r12		/* Set MMUCR */
+
+	/* Make up the required permissions */
+	li	r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+
+	/* Load PTE */
+	/* Compute pgdir/pmd offset */
+	rlwinm  r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29
+	lwzx	r11,r12,r11		/* Get pgd/pmd entry */
+
+	/* Word 0 is EPN,V,TS,DSIZ */
+	li	r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE
+	rlwimi	r10,r12,0,32-PAGE_SHIFT,31	/* Insert valid and page size*/
+	li	r12,0
+	tlbwe	r10,r12,0
+
+	/* XXX can we do better ? Need to make sure tlbwe has established
+	 * latch V bit in MMUCR0 before the PTE is loaded further down */
+#ifdef CONFIG_SMP
+	isync
+#endif
+
+	rlwinm.	r12,r11,0,0,20		/* Extract pt base address */
+	/* Compute pte address */
+	rlwimi  r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28
+	beq	2f			/* Bail if no table */
+
+	lwz	r11,0(r12)		/* Get high word of pte entry */
+	/* XXX can we do better ? maybe insert a known 0 bit from r11 into the
+	 * bottom of r12 to create a data dependency... We can also use r10
+	 * as destination nowadays
+	 */
+#ifdef CONFIG_SMP
+	lwsync
+#endif
+	lwz	r12,4(r12)		/* Get low word of pte entry */
+
+	andc.	r13,r13,r12		/* Check permission */
+
+	/* Jump to common TLB load point */
+	beq	finish_tlb_load_47x
+
+2:	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+	mfspr	r11, SPRN_SPRG_RSCRATCH4
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG_RSCRATCH3
+	mfspr	r12, SPRN_SPRG_RSCRATCH2
+	mfspr	r11, SPRN_SPRG_RSCRATCH1
+	mfspr	r10, SPRN_SPRG_RSCRATCH0
+	b	InstructionStorage
+
+/*
+ * Both the instruction and data TLB miss get to this
+ * point to load the TLB.
+ * 	r10 - free to use
+ * 	r11 - PTE high word value
+ *	r12 - PTE low word value
+ *      r13 - free to use
+ *	MMUCR - loaded with proper value when we get here
+ *	Upon exit, we reload everything and RFI.
+ */
+finish_tlb_load_47x:
+	/* Combine RPN & ERPN an write WS 1 */
+	rlwimi	r11,r12,0,0,31-PAGE_SHIFT
+	tlbwe	r11,r13,1
+
+	/* And make up word 2 */
+	li	r10,0xf85			/* Mask to apply from PTE */
+	rlwimi	r10,r12,29,30,30		/* DIRTY -> SW position */
+	and	r11,r12,r10			/* Mask PTE bits to keep */
+	andi.	r10,r12,_PAGE_USER		/* User page ? */
+	beq	1f				/* nope, leave U bits empty */
+	rlwimi	r11,r11,3,26,28			/* yes, copy S bits to U */
+	rlwinm	r11,r11,0,~PPC47x_TLB2_SX	/* Clear SX if User page */
+1:	tlbwe	r11,r13,2
+
+	/* Done...restore registers and get out of here.
+	*/
+	mfspr	r11, SPRN_SPRG_RSCRATCH4
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG_RSCRATCH3
+	mfspr	r12, SPRN_SPRG_RSCRATCH2
+	mfspr	r11, SPRN_SPRG_RSCRATCH1
+	mfspr	r10, SPRN_SPRG_RSCRATCH0
+	rfi
+
+#endif /* CONFIG_PPC_47x */
+
+	/* Debug Interrupt */
+	/*
+	 * This statement needs to exist at the end of the IVPR
+	 * definition just in case you end up taking a debug
+	 * exception within another exception.
+	 */
+	DEBUG_CRIT_EXCEPTION
+
+interrupt_end:
+
+/*
+ * Global functions
+ */
+
+/*
+ * Adjust the machine check IVOR on 440A cores
+ */
+_GLOBAL(__fixup_440A_mcheck)
+	li	r3,MachineCheckA@l
+	mtspr	SPRN_IVOR1,r3
+	sync
+	blr
+
+/*
+ * Init CPU state. This is called at boot time or for secondary CPUs
+ * to setup initial TLB entries, setup IVORs, etc...
+ *
+ */
+_GLOBAL(init_cpu_state)
+	mflr	r22
+#ifdef CONFIG_PPC_47x
+	/* We use the PVR to differentiate 44x cores from 476 */
+	mfspr	r3,SPRN_PVR
+	srwi	r3,r3,16
+	cmplwi	cr0,r3,PVR_476FPE@h
+	beq	head_start_47x
+	cmplwi	cr0,r3,PVR_476@h
+	beq	head_start_47x
+	cmplwi	cr0,r3,PVR_476_ISS@h
+	beq	head_start_47x
+#endif /* CONFIG_PPC_47x */
+
+/*
+ * In case the firmware didn't do it, we apply some workarounds
+ * that are good for all 440 core variants here
+ */
+	mfspr	r3,SPRN_CCR0
+	rlwinm	r3,r3,0,0,27	/* disable icache prefetch */
+	isync
+	mtspr	SPRN_CCR0,r3
+	isync
+	sync
+
+/*
+ * Set up the initial MMU state for 44x
+ *
+ * We are still executing code at the virtual address
+ * mappings set by the firmware for the base of RAM.
+ *
+ * We first invalidate all TLB entries but the one
+ * we are running from.  We then load the KERNELBASE
+ * mappings so we can begin to use kernel addresses
+ * natively and so the interrupt vector locations are
+ * permanently pinned (necessary since Book E
+ * implementations always have translation enabled).
+ *
+ * TODO: Use the known TLB entry we are running from to
+ *	 determine which physical region we are located
+ *	 in.  This can be used to determine where in RAM
+ *	 (on a shared CPU system) or PCI memory space
+ *	 (on a DRAMless system) we are located.
+ *       For now, we assume a perfect world which means
+ *	 we are located at the base of DRAM (physical 0).
+ */
+
+/*
+ * Search TLB for entry that we are currently using.
+ * Invalidate all entries but the one we are using.
+ */
+	/* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */
+	mfspr	r3,SPRN_PID			/* Get PID */
+	mfmsr	r4				/* Get MSR */
+	andi.	r4,r4,MSR_IS@l			/* TS=1? */
+	beq	wmmucr				/* If not, leave STS=0 */
+	oris	r3,r3,PPC44x_MMUCR_STS@h	/* Set STS=1 */
+wmmucr:	mtspr	SPRN_MMUCR,r3			/* Put MMUCR */
+	sync
+
+	bcl	20,31,$+4			/* Find our address */
+invstr:	mflr	r5				/* Make it accessible */
+	tlbsx	r23,0,r5			/* Find entry we are in */
+	li	r4,0				/* Start at TLB entry 0 */
+	li	r3,0				/* Set PAGEID inval value */
+1:	cmpw	r23,r4				/* Is this our entry? */
+	beq	skpinv				/* If so, skip the inval */
+	tlbwe	r3,r4,PPC44x_TLB_PAGEID		/* If not, inval the entry */
+skpinv:	addi	r4,r4,1				/* Increment */
+	cmpwi	r4,64				/* Are we done? */
+	bne	1b				/* If not, repeat */
+	isync					/* If so, context change */
+
+/*
+ * Configure and load pinned entry into TLB slot 63.
+ */
+#ifdef CONFIG_NONSTATIC_KERNEL
+	/*
+	 * In case of a NONSTATIC_KERNEL we reuse the TLB XLAT
+	 * entries of the initial mapping set by the boot loader.
+	 * The XLAT entry is stored in r25
+	 */
+
+	/* Read the XLAT entry for our current mapping */
+	tlbre	r25,r23,PPC44x_TLB_XLAT
+
+	lis	r3,KERNELBASE@h
+	ori	r3,r3,KERNELBASE@l
+
+	/* Use our current RPN entry */
+	mr	r4,r25
+#else
+
+	lis	r3,PAGE_OFFSET@h
+	ori	r3,r3,PAGE_OFFSET@l
+
+	/* Kernel is at the base of RAM */
+	li r4, 0			/* Load the kernel physical address */
+#endif
+
+	/* Load the kernel PID = 0 */
+	li	r0,0
+	mtspr	SPRN_PID,r0
+	sync
+
+	/* Initialize MMUCR */
+	li	r5,0
+	mtspr	SPRN_MMUCR,r5
+	sync
+
+	/* pageid fields */
+	clrrwi	r3,r3,10		/* Mask off the effective page number */
+	ori	r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_256M
+
+	/* xlat fields */
+	clrrwi	r4,r4,10		/* Mask off the real page number */
+					/* ERPN is 0 for first 4GB page */
+
+	/* attrib fields */
+	/* Added guarded bit to protect against speculative loads/stores */
+	li	r5,0
+	ori	r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
+
+        li      r0,63                    /* TLB slot 63 */
+
+	tlbwe	r3,r0,PPC44x_TLB_PAGEID	/* Load the pageid fields */
+	tlbwe	r4,r0,PPC44x_TLB_XLAT	/* Load the translation fields */
+	tlbwe	r5,r0,PPC44x_TLB_ATTRIB	/* Load the attrib/access fields */
+
+	/* Force context change */
+	mfmsr	r0
+	mtspr	SPRN_SRR1, r0
+	lis	r0,3f@h
+	ori	r0,r0,3f@l
+	mtspr	SPRN_SRR0,r0
+	sync
+	rfi
+
+	/* If necessary, invalidate original entry we used */
+3:	cmpwi	r23,63
+	beq	4f
+	li	r6,0
+	tlbwe   r6,r23,PPC44x_TLB_PAGEID
+	isync
+
+4:
+#ifdef CONFIG_PPC_EARLY_DEBUG_44x
+	/* Add UART mapping for early debug. */
+
+	/* pageid fields */
+	lis	r3,PPC44x_EARLY_DEBUG_VIRTADDR@h
+	ori	r3,r3,PPC44x_TLB_VALID|PPC44x_TLB_TS|PPC44x_TLB_64K
+
+	/* xlat fields */
+	lis	r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h
+	ori	r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH
+
+	/* attrib fields */
+	li	r5,(PPC44x_TLB_SW|PPC44x_TLB_SR|PPC44x_TLB_I|PPC44x_TLB_G)
+        li      r0,62                    /* TLB slot 0 */
+
+	tlbwe	r3,r0,PPC44x_TLB_PAGEID
+	tlbwe	r4,r0,PPC44x_TLB_XLAT
+	tlbwe	r5,r0,PPC44x_TLB_ATTRIB
+
+	/* Force context change */
+	isync
+#endif /* CONFIG_PPC_EARLY_DEBUG_44x */
+
+	/* Establish the interrupt vector offsets */
+	SET_IVOR(0,  CriticalInput);
+	SET_IVOR(1,  MachineCheck);
+	SET_IVOR(2,  DataStorage);
+	SET_IVOR(3,  InstructionStorage);
+	SET_IVOR(4,  ExternalInput);
+	SET_IVOR(5,  Alignment);
+	SET_IVOR(6,  Program);
+	SET_IVOR(7,  FloatingPointUnavailable);
+	SET_IVOR(8,  SystemCall);
+	SET_IVOR(9,  AuxillaryProcessorUnavailable);
+	SET_IVOR(10, Decrementer);
+	SET_IVOR(11, FixedIntervalTimer);
+	SET_IVOR(12, WatchdogTimer);
+	SET_IVOR(13, DataTLBError44x);
+	SET_IVOR(14, InstructionTLBError44x);
+	SET_IVOR(15, DebugCrit);
+
+	b	head_start_common
+
+
+#ifdef CONFIG_PPC_47x
+
+#ifdef CONFIG_SMP
+
+/* Entry point for secondary 47x processors */
+_GLOBAL(start_secondary_47x)
+        mr      r24,r3          /* CPU number */
+
+	bl	init_cpu_state
+
+	/* Now we need to bolt the rest of kernel memory which
+	 * is done in C code. We must be careful because our task
+	 * struct or our stack can (and will probably) be out
+	 * of reach of the initial 256M TLB entry, so we use a
+	 * small temporary stack in .bss for that. This works
+	 * because only one CPU at a time can be in this code
+	 */
+	lis	r1,temp_boot_stack@h
+	ori	r1,r1,temp_boot_stack@l
+	addi	r1,r1,1024-STACK_FRAME_MIN_SIZE
+	li	r0,0
+	stw	r0,0(r1)
+	bl	mmu_init_secondary
+
+	/* Now we can get our task struct and real stack pointer */
+
+	/* Get current's stack and current */
+	lis	r2,secondary_current@ha
+	lwz	r2,secondary_current@l(r2)
+	lwz	r1,TASK_STACK(r2)
+
+	/* Current stack pointer */
+	addi	r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
+	li	r0,0
+	stw	r0,0(r1)
+
+	/* Kernel stack for exception entry in SPRG3 */
+	addi	r4,r2,THREAD	/* init task's THREAD */
+	mtspr	SPRN_SPRG3,r4
+
+	b	start_secondary
+
+#endif /* CONFIG_SMP */
+
+/*
+ * Set up the initial MMU state for 44x
+ *
+ * We are still executing code at the virtual address
+ * mappings set by the firmware for the base of RAM.
+ */
+
+head_start_47x:
+	/* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */
+	mfspr	r3,SPRN_PID			/* Get PID */
+	mfmsr	r4				/* Get MSR */
+	andi.	r4,r4,MSR_IS@l			/* TS=1? */
+	beq	1f				/* If not, leave STS=0 */
+	oris	r3,r3,PPC47x_MMUCR_STS@h	/* Set STS=1 */
+1:	mtspr	SPRN_MMUCR,r3			/* Put MMUCR */
+	sync
+
+	/* Find the entry we are running from */
+	bcl	20,31,$+4
+1:	mflr	r23
+	tlbsx	r23,0,r23
+	tlbre	r24,r23,0
+	tlbre	r25,r23,1
+	tlbre	r26,r23,2
+
+/*
+ * Cleanup time
+ */
+
+	/* Initialize MMUCR */
+	li	r5,0
+	mtspr	SPRN_MMUCR,r5
+	sync
+
+clear_all_utlb_entries:
+
+	#; Set initial values.
+
+	addis		r3,0,0x8000
+	addi		r4,0,0
+	addi		r5,0,0
+	b		clear_utlb_entry
+
+	#; Align the loop to speed things up.
+
+	.align		6
+
+clear_utlb_entry:
+
+	tlbwe		r4,r3,0
+	tlbwe		r5,r3,1
+	tlbwe		r5,r3,2
+	addis		r3,r3,0x2000
+	cmpwi		r3,0
+	bne		clear_utlb_entry
+	addis		r3,0,0x8000
+	addis		r4,r4,0x100
+	cmpwi		r4,0
+	bne		clear_utlb_entry
+
+	#; Restore original entry.
+
+	oris	r23,r23,0x8000  /* specify the way */
+	tlbwe		r24,r23,0
+	tlbwe		r25,r23,1
+	tlbwe		r26,r23,2
+
+/*
+ * Configure and load pinned entry into TLB for the kernel core
+ */
+
+	lis	r3,PAGE_OFFSET@h
+	ori	r3,r3,PAGE_OFFSET@l
+
+	/* Load the kernel PID = 0 */
+	li	r0,0
+	mtspr	SPRN_PID,r0
+	sync
+
+	/* Word 0 */
+	clrrwi	r3,r3,12		/* Mask off the effective page number */
+	ori	r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_256M
+
+	/* Word 1 - use r25.  RPN is the same as the original entry */
+
+	/* Word 2 */
+	li	r5,0
+	ori	r5,r5,PPC47x_TLB2_S_RWX
+#ifdef CONFIG_SMP
+	ori	r5,r5,PPC47x_TLB2_M
+#endif
+
+	/* We write to way 0 and bolted 0 */
+	lis	r0,0x8800
+	tlbwe	r3,r0,0
+	tlbwe	r25,r0,1
+	tlbwe	r5,r0,2
+
+/*
+ * Configure SSPCR, ISPCR and USPCR for now to search everything, we can fix
+ * them up later
+ */
+	LOAD_REG_IMMEDIATE(r3, 0x9abcdef0)
+	mtspr	SPRN_SSPCR,r3
+	mtspr	SPRN_USPCR,r3
+	LOAD_REG_IMMEDIATE(r3, 0x12345670)
+	mtspr	SPRN_ISPCR,r3
+
+	/* Force context change */
+	mfmsr	r0
+	mtspr	SPRN_SRR1, r0
+	lis	r0,3f@h
+	ori	r0,r0,3f@l
+	mtspr	SPRN_SRR0,r0
+	sync
+	rfi
+
+	/* Invalidate original entry we used */
+3:
+	rlwinm	r24,r24,0,21,19 /* clear the "valid" bit */
+	tlbwe	r24,r23,0
+	addi	r24,0,0
+	tlbwe	r24,r23,1
+	tlbwe	r24,r23,2
+	isync                   /* Clear out the shadow TLB entries */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_44x
+	/* Add UART mapping for early debug. */
+
+	/* Word 0 */
+	lis	r3,PPC44x_EARLY_DEBUG_VIRTADDR@h
+	ori	r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_TS | PPC47x_TLB0_1M
+
+	/* Word 1 */
+	lis	r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h
+	ori	r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH
+
+	/* Word 2 */
+	li	r5,(PPC47x_TLB2_S_RW | PPC47x_TLB2_IMG)
+
+	/* Bolted in way 0, bolt slot 5, we -hope- we don't hit the same
+	 * congruence class as the kernel, we need to make sure of it at
+	 * some point
+	 */
+        lis	r0,0x8d00
+	tlbwe	r3,r0,0
+	tlbwe	r4,r0,1
+	tlbwe	r5,r0,2
+
+	/* Force context change */
+	isync
+#endif /* CONFIG_PPC_EARLY_DEBUG_44x */
+
+	/* Establish the interrupt vector offsets */
+	SET_IVOR(0,  CriticalInput);
+	SET_IVOR(1,  MachineCheckA);
+	SET_IVOR(2,  DataStorage);
+	SET_IVOR(3,  InstructionStorage);
+	SET_IVOR(4,  ExternalInput);
+	SET_IVOR(5,  Alignment);
+	SET_IVOR(6,  Program);
+	SET_IVOR(7,  FloatingPointUnavailable);
+	SET_IVOR(8,  SystemCall);
+	SET_IVOR(9,  AuxillaryProcessorUnavailable);
+	SET_IVOR(10, Decrementer);
+	SET_IVOR(11, FixedIntervalTimer);
+	SET_IVOR(12, WatchdogTimer);
+	SET_IVOR(13, DataTLBError47x);
+	SET_IVOR(14, InstructionTLBError47x);
+	SET_IVOR(15, DebugCrit);
+
+	/* We configure icbi to invalidate 128 bytes at a time since the
+	 * current 32-bit kernel code isn't too happy with icache != dcache
+	 * block size. We also disable the BTAC as this can cause errors
+	 * in some circumstances (see IBM Erratum 47).
+	 */
+	mfspr	r3,SPRN_CCR0
+	oris	r3,r3,0x0020
+	ori	r3,r3,0x0040
+	mtspr	SPRN_CCR0,r3
+	isync
+
+#endif /* CONFIG_PPC_47x */
+
+/*
+ * Here we are back to code that is common between 44x and 47x
+ *
+ * We proceed to further kernel initialization and return to the
+ * main kernel entry
+ */
+head_start_common:
+	/* Establish the interrupt vector base */
+	lis	r4,interrupt_base@h	/* IVPR only uses the high 16-bits */
+	mtspr	SPRN_IVPR,r4
+
+	/*
+	 * If the kernel was loaded at a non-zero 256 MB page, we need to
+	 * mask off the most significant 4 bits to get the relative address
+	 * from the start of physical memory
+	 */
+	rlwinm	r22,r22,0,4,31
+	addis	r22,r22,PAGE_OFFSET@h
+	mtlr	r22
+	isync
+	blr
+
+#ifdef CONFIG_SMP
+	.data
+	.align	12
+temp_boot_stack:
+	.space	1024
+#endif /* CONFIG_SMP */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
new file mode 100644
index 0000000000..4690c219bf
--- /dev/null
+++ b/arch/powerpc/kernel/head_64.S
@@ -0,0 +1,1044 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *  Adapted for Power Macintosh by Paul Mackerras.
+ *  Low-level exception handlers and MMU support
+ *  rewritten by Paul Mackerras.
+ *    Copyright (C) 1996 Paul Mackerras.
+ *
+ *  Adapted for 64bit PowerPC by Dave Engebretsen, Peter Bergner, and
+ *    Mike Corrigan {engebret|bergner|mikejc}@us.ibm.com
+ *
+ *  This file contains the entry point for the 64-bit kernel along
+ *  with some early initialization code common to all 64-bit powerpc
+ *  variants.
+ */
+
+#include <linux/linkage.h>
+#include <linux/threads.h>
+#include <linux/init.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/head-64.h>
+#include <asm/asm-offsets.h>
+#include <asm/bug.h>
+#include <asm/cputable.h>
+#include <asm/setup.h>
+#include <asm/hvcall.h>
+#include <asm/thread_info.h>
+#include <asm/firmware.h>
+#include <asm/page_64.h>
+#include <asm/irqflags.h>
+#include <asm/kvm_book3s_asm.h>
+#include <asm/ptrace.h>
+#include <asm/hw_irq.h>
+#include <asm/cputhreads.h>
+#include <asm/ppc-opcode.h>
+#include <asm/feature-fixups.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
+
+/* The physical memory is laid out such that the secondary processor
+ * spin code sits at 0x0000...0x00ff. On server, the vectors follow
+ * using the layout described in exceptions-64s.S
+ */
+
+/*
+ * Entering into this code we make the following assumptions:
+ *
+ *  For pSeries or server processors:
+ *   1. The MMU is off & open firmware is running in real mode.
+ *   2. The primary CPU enters at __start.
+ *   3. If the RTAS supports "query-cpu-stopped-state", then secondary
+ *      CPUs will enter as directed by "start-cpu" RTAS call, which is
+ *      generic_secondary_smp_init, with PIR in r3.
+ *   4. Else the secondary CPUs will enter at secondary_hold (0x60) as
+ *      directed by the "start-cpu" RTS call, with PIR in r3.
+ * -or- For OPAL entry:
+ *   1. The MMU is off, processor in HV mode.
+ *   2. The primary CPU enters at 0 with device-tree in r3, OPAL base
+ *      in r8, and entry in r9 for debugging purposes.
+ *   3. Secondary CPUs enter as directed by OPAL_START_CPU call, which
+ *      is at generic_secondary_smp_init, with PIR in r3.
+ *
+ *  For Book3E processors:
+ *   1. The MMU is on running in AS0 in a state defined in ePAPR
+ *   2. The kernel is entered at __start
+ */
+
+/*
+ * boot_from_prom and prom_init run at the physical address. Everything
+ * after prom and kexec entry run at the virtual address (PAGE_OFFSET).
+ * Secondaries run at the virtual address from generic_secondary_common_init
+ * onward.
+ */
+
+OPEN_FIXED_SECTION(first_256B, 0x0, 0x100)
+USE_FIXED_SECTION(first_256B)
+	/*
+	 * Offsets are relative from the start of fixed section, and
+	 * first_256B starts at 0. Offsets are a bit easier to use here
+	 * than the fixed section entry macros.
+	 */
+	. = 0x0
+_GLOBAL(__start)
+	/* NOP this out unconditionally */
+BEGIN_FTR_SECTION
+	FIXUP_ENDIAN
+	b	__start_initialization_multiplatform
+END_FTR_SECTION(0, 1)
+
+	/* Catch branch to 0 in real mode */
+	trap
+
+	/* Secondary processors spin on this value until it becomes non-zero.
+	 * When non-zero, it contains the real address of the function the cpu
+	 * should jump to.
+	 */
+	.balign 8
+	.globl  __secondary_hold_spinloop
+__secondary_hold_spinloop:
+	.8byte	0x0
+
+	/* Secondary processors write this value with their cpu # */
+	/* after they enter the spin loop immediately below.	  */
+	.globl	__secondary_hold_acknowledge
+__secondary_hold_acknowledge:
+	.8byte	0x0
+
+#ifdef CONFIG_RELOCATABLE
+	/* This flag is set to 1 by a loader if the kernel should run
+	 * at the loaded address instead of the linked address.  This
+	 * is used by kexec-tools to keep the kdump kernel in the
+	 * crash_kernel region.  The loader is responsible for
+	 * observing the alignment requirement.
+	 */
+
+#ifdef CONFIG_RELOCATABLE_TEST
+#define RUN_AT_LOAD_DEFAULT 1		/* Test relocation, do not copy to 0 */
+#else
+#define RUN_AT_LOAD_DEFAULT 0x72756e30  /* "run0" -- relocate to 0 by default */
+#endif
+
+	/* Do not move this variable as kexec-tools knows about it. */
+	. = 0x5c
+	.globl	__run_at_load
+__run_at_load:
+DEFINE_FIXED_SYMBOL(__run_at_load, first_256B)
+	.long	RUN_AT_LOAD_DEFAULT
+#endif
+
+	. = 0x60
+/*
+ * The following code is used to hold secondary processors
+ * in a spin loop after they have entered the kernel, but
+ * before the bulk of the kernel has been relocated.  This code
+ * is relocated to physical address 0x60 before prom_init is run.
+ * All of it must fit below the first exception vector at 0x100.
+ * Use .globl here not _GLOBAL because we want __secondary_hold
+ * to be the actual text address, not a descriptor.
+ */
+	.globl	__secondary_hold
+__secondary_hold:
+	FIXUP_ENDIAN
+#ifndef CONFIG_PPC_BOOK3E_64
+	mfmsr	r24
+	ori	r24,r24,MSR_RI
+	mtmsrd	r24			/* RI on */
+#endif
+	/* Grab our physical cpu number */
+	mr	r24,r3
+	/* stash r4 for book3e */
+	mr	r25,r4
+
+	/* Tell the master cpu we're here */
+	/* Relocation is off & we are located at an address less */
+	/* than 0x100, so only need to grab low order offset.    */
+	std	r24,(ABS_ADDR(__secondary_hold_acknowledge, first_256B))(0)
+	sync
+
+	/* All secondary cpus wait here until told to start. */
+100:	ld	r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(0)
+	cmpdi	0,r12,0
+	beq	100b
+
+#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
+#ifdef CONFIG_PPC_BOOK3E_64
+	tovirt(r12,r12)
+#endif
+	mtctr	r12
+	mr	r3,r24
+	/*
+	 * it may be the case that other platforms have r4 right to
+	 * begin with, this gives us some safety in case it is not
+	 */
+#ifdef CONFIG_PPC_BOOK3E_64
+	mr	r4,r25
+#else
+	li	r4,0
+#endif
+	/* Make sure that patched code is visible */
+	isync
+	bctr
+#else
+0:	trap
+	EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
+#endif
+CLOSE_FIXED_SECTION(first_256B)
+
+/*
+ * On server, we include the exception vectors code here as it
+ * relies on absolute addressing which is only possible within
+ * this compilation unit
+ */
+#ifdef CONFIG_PPC_BOOK3S
+#include "exceptions-64s.S"
+#else
+OPEN_TEXT_SECTION(0x100)
+#endif
+
+USE_TEXT_SECTION()
+
+#include "interrupt_64.S"
+
+#ifdef CONFIG_PPC_BOOK3E_64
+/*
+ * The booting_thread_hwid holds the thread id we want to boot in cpu
+ * hotplug case. It is set by cpu hotplug code, and is invalid by default.
+ * The thread id is the same as the initial value of SPRN_PIR[THREAD_ID]
+ * bit field.
+ */
+	.globl	booting_thread_hwid
+booting_thread_hwid:
+	.long  INVALID_THREAD_HWID
+	.align 3
+/*
+ * start a thread in the same core
+ * input parameters:
+ * r3 = the thread physical id
+ * r4 = the entry point where thread starts
+ */
+_GLOBAL(book3e_start_thread)
+	LOAD_REG_IMMEDIATE(r5, MSR_KERNEL)
+	cmpwi	r3, 0
+	beq	10f
+	cmpwi	r3, 1
+	beq	11f
+	/* If the thread id is invalid, just exit. */
+	b	13f
+10:
+	MTTMR(TMRN_IMSR0, 5)
+	MTTMR(TMRN_INIA0, 4)
+	b	12f
+11:
+	MTTMR(TMRN_IMSR1, 5)
+	MTTMR(TMRN_INIA1, 4)
+12:
+	isync
+	li	r6, 1
+	sld	r6, r6, r3
+	mtspr	SPRN_TENS, r6
+13:
+	blr
+
+/*
+ * stop a thread in the same core
+ * input parameter:
+ * r3 = the thread physical id
+ */
+_GLOBAL(book3e_stop_thread)
+	cmpwi	r3, 0
+	beq	10f
+	cmpwi	r3, 1
+	beq	10f
+	/* If the thread id is invalid, just exit. */
+	b	13f
+10:
+	li	r4, 1
+	sld	r4, r4, r3
+	mtspr	SPRN_TENC, r4
+13:
+	blr
+
+_GLOBAL(fsl_secondary_thread_init)
+	mfspr	r4,SPRN_BUCSR
+
+	/* Enable branch prediction */
+	lis     r3,BUCSR_INIT@h
+	ori     r3,r3,BUCSR_INIT@l
+	mtspr   SPRN_BUCSR,r3
+	isync
+
+	/*
+	 * Fix PIR to match the linear numbering in the device tree.
+	 *
+	 * On e6500, the reset value of PIR uses the low three bits for
+	 * the thread within a core, and the upper bits for the core
+	 * number.  There are two threads per core, so shift everything
+	 * but the low bit right by two bits so that the cpu numbering is
+	 * continuous.
+	 *
+	 * If the old value of BUCSR is non-zero, this thread has run
+	 * before.  Thus, we assume we are coming from kexec or a similar
+	 * scenario, and PIR is already set to the correct value.  This
+	 * is a bit of a hack, but there are limited opportunities for
+	 * getting information into the thread and the alternatives
+	 * seemed like they'd be overkill.  We can't tell just by looking
+	 * at the old PIR value which state it's in, since the same value
+	 * could be valid for one thread out of reset and for a different
+	 * thread in Linux.
+	 */
+
+	mfspr	r3, SPRN_PIR
+	cmpwi	r4,0
+	bne	1f
+	rlwimi	r3, r3, 30, 2, 30
+	mtspr	SPRN_PIR, r3
+1:
+	mr	r24,r3
+
+	/* turn on 64-bit mode */
+	bl	enable_64b_mode
+
+	/* Book3E initialization */
+	mr	r3,r24
+	bl	book3e_secondary_thread_init
+	bl	relative_toc
+
+	b	generic_secondary_common_init
+
+#endif /* CONFIG_PPC_BOOK3E_64 */
+
+/*
+ * On pSeries and most other platforms, secondary processors spin
+ * in the following code.
+ * At entry, r3 = this processor's number (physical cpu id)
+ *
+ * On Book3E, r4 = 1 to indicate that the initial TLB entry for
+ * this core already exists (setup via some other mechanism such
+ * as SCOM before entry).
+ */
+_GLOBAL(generic_secondary_smp_init)
+	FIXUP_ENDIAN
+
+	li	r13,0
+
+	/* Poison TOC */
+	li	r2,-1
+
+	mr	r24,r3
+	mr	r25,r4
+
+	/* turn on 64-bit mode */
+	bl	enable_64b_mode
+
+#ifdef CONFIG_PPC_BOOK3E_64
+	/* Book3E initialization */
+	mr	r3,r24
+	mr	r4,r25
+	bl	book3e_secondary_core_init
+	/* Now NIA and r2 are relocated to PAGE_OFFSET if not already */
+/*
+ * After common core init has finished, check if the current thread is the
+ * one we wanted to boot. If not, start the specified thread and stop the
+ * current thread.
+ */
+	LOAD_REG_ADDR(r4, booting_thread_hwid)
+	lwz     r3, 0(r4)
+	li	r5, INVALID_THREAD_HWID
+	cmpw	r3, r5
+	beq	20f
+
+	/*
+	 * The value of booting_thread_hwid has been stored in r3,
+	 * so make it invalid.
+	 */
+	stw	r5, 0(r4)
+
+	/*
+	 * Get the current thread id and check if it is the one we wanted.
+	 * If not, start the one specified in booting_thread_hwid and stop
+	 * the current thread.
+	 */
+	mfspr	r8, SPRN_TIR
+	cmpw	r3, r8
+	beq	20f
+
+	/* start the specified thread */
+	LOAD_REG_ADDR(r5, DOTSYM(fsl_secondary_thread_init))
+	bl	book3e_start_thread
+
+	/* stop the current thread */
+	mr	r3, r8
+	bl	book3e_stop_thread
+10:
+	b	10b
+20:
+#else
+	/* Now the MMU is off, can branch to our PAGE_OFFSET address */
+	bcl	20,31,$+4
+1:	mflr	r11
+	addi	r11,r11,(2f - 1b)
+	tovirt(r11, r11)
+	mtctr	r11
+	bctr
+2:
+	bl	relative_toc
+#endif
+
+generic_secondary_common_init:
+	/* Set up a paca value for this processor. Since we have the
+	 * physical cpu id in r24, we need to search the pacas to find
+	 * which logical id maps to our physical one.
+	 */
+#ifndef CONFIG_SMP
+	b	kexec_wait		/* wait for next kernel if !SMP	 */
+#else
+	LOAD_REG_ADDR(r8, paca_ptrs)	/* Load paca_ptrs pointe	 */
+	ld	r8,0(r8)		/* Get base vaddr of array	 */
+#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
+	LOAD_REG_IMMEDIATE(r7, NR_CPUS)
+#else
+	LOAD_REG_ADDR(r7, nr_cpu_ids)	/* Load nr_cpu_ids address       */
+	lwz	r7,0(r7)		/* also the max paca allocated 	 */
+#endif
+	li	r5,0			/* logical cpu id                */
+1:
+	sldi	r9,r5,3			/* get paca_ptrs[] index from cpu id */
+	ldx	r13,r9,r8		/* r13 = paca_ptrs[cpu id]       */
+	lhz	r6,PACAHWCPUID(r13)	/* Load HW procid from paca      */
+	cmpw	r6,r24			/* Compare to our id             */
+	beq	2f
+	addi	r5,r5,1
+	cmpw	r5,r7			/* Check if more pacas exist     */
+	blt	1b
+
+	mr	r3,r24			/* not found, copy phys to r3	 */
+	b	kexec_wait		/* next kernel might do better	 */
+
+2:	SET_PACA(r13)
+#ifdef CONFIG_PPC_BOOK3E_64
+	addi	r12,r13,PACA_EXTLB	/* and TLB exc frame in another  */
+	mtspr	SPRN_SPRG_TLB_EXFRAME,r12
+#endif
+
+	/* From now on, r24 is expected to be logical cpuid */
+	mr	r24,r5
+
+	/* Create a temp kernel stack for use before relocation is on.	*/
+	ld	r1,PACAEMERGSP(r13)
+	subi	r1,r1,STACK_FRAME_MIN_SIZE
+
+	/* See if we need to call a cpu state restore handler */
+	LOAD_REG_ADDR(r23, cur_cpu_spec)
+	ld	r23,0(r23)
+	ld	r12,CPU_SPEC_RESTORE(r23)
+	cmpdi	0,r12,0
+	beq	3f
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+	ld	r12,0(r12)
+#endif
+	mtctr	r12
+	bctrl
+
+3:	LOAD_REG_ADDR(r3, spinning_secondaries) /* Decrement spinning_secondaries */
+	lwarx	r4,0,r3
+	subi	r4,r4,1
+	stwcx.	r4,0,r3
+	bne	3b
+	isync
+
+4:	HMT_LOW
+	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor should */
+					/* start.			 */
+	cmpwi	0,r23,0
+	beq	4b			/* Loop until told to go	 */
+
+	sync				/* order paca.run and cur_cpu_spec */
+	isync				/* In case code patching happened */
+
+	b	__secondary_start
+#endif /* SMP */
+
+/*
+ * Turn the MMU off.
+ * Assumes we're mapped EA == RA if the MMU is on.
+ */
+#ifdef CONFIG_PPC_BOOK3S
+SYM_FUNC_START_LOCAL(__mmu_off)
+	mfmsr	r3
+	andi.	r0,r3,MSR_IR|MSR_DR
+	beqlr
+	mflr	r4
+	andc	r3,r3,r0
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	sync
+	rfid
+	b	.	/* prevent speculative execution */
+SYM_FUNC_END(__mmu_off)
+
+SYM_FUNC_START_LOCAL(start_initialization_book3s)
+	mflr	r25
+
+	/* Setup some critical 970 SPRs before switching MMU off */
+	mfspr	r0,SPRN_PVR
+	srwi	r0,r0,16
+	cmpwi	r0,0x39		/* 970 */
+	beq	1f
+	cmpwi	r0,0x3c		/* 970FX */
+	beq	1f
+	cmpwi	r0,0x44		/* 970MP */
+	beq	1f
+	cmpwi	r0,0x45		/* 970GX */
+	bne	2f
+1:	bl	__cpu_preinit_ppc970
+2:
+
+	/* Switch off MMU if not already off */
+	bl	__mmu_off
+
+	/* Now the MMU is off, can return to our PAGE_OFFSET address */
+	tovirt(r25,r25)
+	mtlr	r25
+	blr
+SYM_FUNC_END(start_initialization_book3s)
+#endif
+
+/*
+ * Here is our main kernel entry point. We support currently 2 kind of entries
+ * depending on the value of r5.
+ *
+ *   r5 != NULL -> OF entry, we go to prom_init, "legacy" parameter content
+ *                 in r3...r7
+ *   
+ *   r5 == NULL -> kexec style entry. r3 is a physical pointer to the
+ *                 DT block, r4 is a physical pointer to the kernel itself
+ *
+ */
+__start_initialization_multiplatform:
+	/* Make sure we are running in 64 bits mode */
+	bl	enable_64b_mode
+
+	/* Zero r13 (paca) so early program check / mce don't use it */
+	li	r13,0
+
+	/* Poison TOC */
+	li	r2,-1
+
+	/*
+	 * Are we booted from a PROM Of-type client-interface ?
+	 */
+	cmpldi	cr0,r5,0
+	beq	1f
+	b	__boot_from_prom		/* yes -> prom */
+1:
+	/* Save parameters */
+	mr	r31,r3
+	mr	r30,r4
+#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
+	/* Save OPAL entry */
+	mr	r28,r8
+	mr	r29,r9
+#endif
+
+	/* Get TOC pointer (current runtime address) */
+	bl	relative_toc
+
+	/* These functions return to the virtual (PAGE_OFFSET) address */
+#ifdef CONFIG_PPC_BOOK3E_64
+	bl	start_initialization_book3e
+#else
+	bl	start_initialization_book3s
+#endif /* CONFIG_PPC_BOOK3E_64 */
+
+	/* Get TOC pointer, virtual */
+	bl	relative_toc
+
+	/* find out where we are now */
+
+	/* OPAL doesn't pass base address in r4, have to derive it. */
+	bcl	20,31,$+4
+0:	mflr	r26			/* r26 = runtime addr here */
+	addis	r26,r26,(_stext - 0b)@ha
+	addi	r26,r26,(_stext - 0b)@l	/* current runtime base addr */
+
+	b	__after_prom_start
+
+__REF
+__boot_from_prom:
+#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
+	/* Get TOC pointer, non-virtual */
+	bl	relative_toc
+
+	/* find out where we are now */
+	bcl	20,31,$+4
+0:	mflr	r26			/* r26 = runtime addr here */
+	addis	r26,r26,(_stext - 0b)@ha
+	addi	r26,r26,(_stext - 0b)@l	/* current runtime base addr */
+
+	/* Save parameters */
+	mr	r31,r3
+	mr	r30,r4
+	mr	r29,r5
+	mr	r28,r6
+	mr	r27,r7
+
+	/*
+	 * Align the stack to 16-byte boundary
+	 * Depending on the size and layout of the ELF sections in the initial
+	 * boot binary, the stack pointer may be unaligned on PowerMac
+	 */
+	rldicr	r1,r1,0,59
+
+#ifdef CONFIG_RELOCATABLE
+	/* Relocate code for where we are now */
+	mr	r3,r26
+	bl	relocate
+#endif
+
+	/* Restore parameters */
+	mr	r3,r31
+	mr	r4,r30
+	mr	r5,r29
+	mr	r6,r28
+	mr	r7,r27
+
+	/* Do all of the interaction with OF client interface */
+	mr	r8,r26
+	bl	CFUNC(prom_init)
+#endif /* #CONFIG_PPC_OF_BOOT_TRAMPOLINE */
+
+	/* We never return. We also hit that trap if trying to boot
+	 * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */
+	trap
+	.previous
+
+__after_prom_start:
+#ifdef CONFIG_RELOCATABLE
+	/* process relocations for the final address of the kernel */
+	lwz	r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26)
+	cmplwi	cr0,r7,1	/* flagged to stay where we are ? */
+	mr	r25,r26		/* then use current kernel base */
+	beq	1f
+	LOAD_REG_IMMEDIATE(r25, PAGE_OFFSET) /* else use static kernel base */
+1:	mr	r3,r25
+	bl	relocate
+#if defined(CONFIG_PPC_BOOK3E_64)
+	/* IVPR needs to be set after relocation. */
+	bl	init_core_book3e
+#endif
+#endif
+
+/*
+ * We need to run with _stext at physical address PHYSICAL_START.
+ * This will leave some code in the first 256B of
+ * real memory, which are reserved for software use.
+ *
+ * Note: This process overwrites the OF exception vectors.
+ */
+	LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET)
+	mr.	r4,r26			/* In some cases the loader may  */
+	beq	9f			/* have already put us at zero */
+	li	r6,0x100		/* Start offset, the first 0x100 */
+					/* bytes were copied earlier.	 */
+
+#ifdef CONFIG_RELOCATABLE
+/*
+ * Check if the kernel has to be running as relocatable kernel based on the
+ * variable __run_at_load, if it is set the kernel is treated as relocatable
+ * kernel, otherwise it will be moved to PHYSICAL_START
+ */
+	lwz	r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26)
+	cmplwi	cr0,r7,1
+	bne	3f
+
+#ifdef CONFIG_PPC_BOOK3E_64
+	LOAD_REG_ADDR(r5, __end_interrupts)
+	LOAD_REG_ADDR(r11, _stext)
+	sub	r5,r5,r11
+#else
+	/* just copy interrupts */
+	LOAD_REG_IMMEDIATE_SYM(r5, r11, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
+#endif
+	b	5f
+3:
+#endif
+	/* # bytes of memory to copy */
+	lis	r5,(ABS_ADDR(copy_to_here, text))@ha
+	addi	r5,r5,(ABS_ADDR(copy_to_here, text))@l
+
+	bl	copy_and_flush		/* copy the first n bytes	 */
+					/* this includes the code being	 */
+					/* executed here.		 */
+	/* Jump to the copy of this code that we just made */
+	addis	r8,r3,(ABS_ADDR(4f, text))@ha
+	addi	r12,r8,(ABS_ADDR(4f, text))@l
+	mtctr	r12
+	bctr
+
+.balign 8
+p_end: .8byte _end - copy_to_here
+
+4:
+	/*
+	 * Now copy the rest of the kernel up to _end, add
+	 * _end - copy_to_here to the copy limit and run again.
+	 */
+	addis   r8,r26,(ABS_ADDR(p_end, text))@ha
+	ld      r8,(ABS_ADDR(p_end, text))@l(r8)
+	add	r5,r5,r8
+5:	bl	copy_and_flush		/* copy the rest */
+
+9:	b	start_here_multiplatform
+
+/*
+ * Copy routine used to copy the kernel to start at physical address 0
+ * and flush and invalidate the caches as needed.
+ * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset
+ * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5.
+ *
+ * Note: this routine *only* clobbers r0, r6 and lr
+ */
+_GLOBAL(copy_and_flush)
+	addi	r5,r5,-8
+	addi	r6,r6,-8
+4:	li	r0,8			/* Use the smallest common	*/
+					/* denominator cache line	*/
+					/* size.  This results in	*/
+					/* extra cache line flushes	*/
+					/* but operation is correct.	*/
+					/* Can't get cache line size	*/
+					/* from NACA as it is being	*/
+					/* moved too.			*/
+
+	mtctr	r0			/* put # words/line in ctr	*/
+3:	addi	r6,r6,8			/* copy a cache line		*/
+	ldx	r0,r6,r4
+	stdx	r0,r6,r3
+	bdnz	3b
+	dcbst	r6,r3			/* write it to memory		*/
+	sync
+	icbi	r6,r3			/* flush the icache line	*/
+	cmpld	0,r6,r5
+	blt	4b
+	sync
+	addi	r5,r5,8
+	addi	r6,r6,8
+	isync
+	blr
+
+_ASM_NOKPROBE_SYMBOL(copy_and_flush); /* Called in real mode */
+
+.align 8
+copy_to_here:
+
+#ifdef CONFIG_SMP
+#ifdef CONFIG_PPC_PMAC
+/*
+ * On PowerMac, secondary processors starts from the reset vector, which
+ * is temporarily turned into a call to one of the functions below.
+ */
+	.section ".text";
+	.align 2 ;
+
+	.globl	__secondary_start_pmac_0
+__secondary_start_pmac_0:
+	/* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */
+	li	r24,0
+	b	1f
+	li	r24,1
+	b	1f
+	li	r24,2
+	b	1f
+	li	r24,3
+1:
+	
+_GLOBAL(pmac_secondary_start)
+	/* turn on 64-bit mode */
+	bl	enable_64b_mode
+
+	li	r0,0
+	mfspr	r3,SPRN_HID4
+	rldimi	r3,r0,40,23	/* clear bit 23 (rm_ci) */
+	sync
+	mtspr	SPRN_HID4,r3
+	isync
+	sync
+	slbia
+
+	/* Branch to our PAGE_OFFSET address */
+	bcl	20,31,$+4
+1:	mflr	r11
+	addi	r11,r11,(2f - 1b)
+	tovirt(r11, r11)
+	mtctr	r11
+	bctr
+2:
+	bl	relative_toc
+
+	/* Copy some CPU settings from CPU 0 */
+	bl	__restore_cpu_ppc970
+
+	/* pSeries do that early though I don't think we really need it */
+	mfmsr	r3
+	ori	r3,r3,MSR_RI
+	mtmsrd	r3			/* RI on */
+
+	/* Set up a paca value for this processor. */
+	LOAD_REG_ADDR(r4,paca_ptrs)	/* Load paca pointer		*/
+	ld	r4,0(r4)		/* Get base vaddr of paca_ptrs array */
+	sldi	r5,r24,3		/* get paca_ptrs[] index from cpu id */
+	ldx	r13,r5,r4		/* r13 = paca_ptrs[cpu id]       */
+	SET_PACA(r13)			/* Save vaddr of paca in an SPRG*/
+
+	/* Mark interrupts soft and hard disabled (they might be enabled
+	 * in the PACA when doing hotplug)
+	 */
+	li	r0,IRQS_DISABLED
+	stb	r0,PACAIRQSOFTMASK(r13)
+	li	r0,PACA_IRQ_HARD_DIS
+	stb	r0,PACAIRQHAPPENED(r13)
+
+	/* Create a temp kernel stack for use before relocation is on.	*/
+	ld	r1,PACAEMERGSP(r13)
+	subi	r1,r1,STACK_FRAME_MIN_SIZE
+
+	b	__secondary_start
+
+#endif /* CONFIG_PPC_PMAC */
+
+/*
+ * This function is called after the master CPU has released the
+ * secondary processors.  The execution environment is relocation off.
+ * The paca for this processor has the following fields initialized at
+ * this point:
+ *   1. Processor number
+ *   2. Segment table pointer (virtual address)
+ * On entry the following are set:
+ *   r1	       = stack pointer (real addr of temp stack)
+ *   r24       = cpu# (in Linux terms)
+ *   r13       = paca virtual address
+ *   SPRG_PACA = paca virtual address
+ */
+	.section ".text";
+	.align 2 ;
+
+	.globl	__secondary_start
+__secondary_start:
+	/* Set thread priority to MEDIUM */
+	HMT_MEDIUM
+
+	/*
+	 * Do early setup for this CPU, in particular initialising the MMU so we
+	 * can turn it on below. This is a call to C, which is OK, we're still
+	 * running on the emergency stack.
+	 */
+	bl	CFUNC(early_setup_secondary)
+
+	/*
+	 * The primary has initialized our kernel stack for us in the paca, grab
+	 * it and put it in r1. We must *not* use it until we turn on the MMU
+	 * below, because it may not be inside the RMO.
+	 */
+	ld	r1, PACAKSAVE(r13)
+
+	/* Clear backchain so we get nice backtraces */
+	li	r7,0
+	mtlr	r7
+
+	/* Mark interrupts soft and hard disabled (they might be enabled
+	 * in the PACA when doing hotplug)
+	 */
+	li	r7,IRQS_DISABLED
+	stb	r7,PACAIRQSOFTMASK(r13)
+	li	r0,PACA_IRQ_HARD_DIS
+	stb	r0,PACAIRQHAPPENED(r13)
+
+	/* enable MMU and jump to start_secondary */
+	LOAD_REG_ADDR(r3, start_secondary_prolog)
+	LOAD_REG_IMMEDIATE(r4, MSR_KERNEL)
+
+	mtspr	SPRN_SRR0,r3
+	mtspr	SPRN_SRR1,r4
+	RFI_TO_KERNEL
+	b	.	/* prevent speculative execution */
+
+/* 
+ * Running with relocation on at this point.  All we want to do is
+ * zero the stack back-chain pointer and get the TOC virtual address
+ * before going into C code.
+ */
+start_secondary_prolog:
+	LOAD_PACA_TOC()
+	li	r3,0
+	std	r3,0(r1)		/* Zero the stack frame pointer	*/
+	bl	CFUNC(start_secondary)
+	b	.
+/*
+ * Reset stack pointer and call start_secondary
+ * to continue with online operation when woken up
+ * from cede in cpu offline.
+ */
+_GLOBAL(start_secondary_resume)
+	ld	r1,PACAKSAVE(r13)	/* Reload kernel stack pointer */
+	li	r3,0
+	std	r3,0(r1)		/* Zero the stack frame pointer	*/
+	bl	CFUNC(start_secondary)
+	b	.
+#endif
+
+/*
+ * This subroutine clobbers r11 and r12
+ */
+SYM_FUNC_START_LOCAL(enable_64b_mode)
+	mfmsr	r11			/* grab the current MSR */
+#ifdef CONFIG_PPC_BOOK3E_64
+	oris	r11,r11,0x8000		/* CM bit set, we'll set ICM later */
+	mtmsr	r11
+#else /* CONFIG_PPC_BOOK3E_64 */
+	LOAD_REG_IMMEDIATE(r12, MSR_64BIT)
+	or	r11,r11,r12
+	mtmsrd	r11
+	isync
+#endif
+	blr
+SYM_FUNC_END(enable_64b_mode)
+
+/*
+ * This puts the TOC pointer into r2, offset by 0x8000 (as expected
+ * by the toolchain).  It computes the correct value for wherever we
+ * are running at the moment, using position-independent code.
+ *
+ * Note: The compiler constructs pointers using offsets from the
+ * TOC in -mcmodel=medium mode. After we relocate to 0 but before
+ * the MMU is on we need our TOC to be a virtual address otherwise
+ * these pointers will be real addresses which may get stored and
+ * accessed later with the MMU on. We branch to the virtual address
+ * while still in real mode then call relative_toc again to handle
+ * this.
+ */
+_GLOBAL(relative_toc)
+#ifdef CONFIG_PPC_KERNEL_PCREL
+	tdnei	r2,-1
+	blr
+#else
+	mflr	r0
+	bcl	20,31,$+4
+0:	mflr	r11
+	ld	r2,(p_toc - 0b)(r11)
+	add	r2,r2,r11
+	mtlr	r0
+	blr
+
+.balign 8
+p_toc:	.8byte	.TOC. - 0b
+#endif
+
+/*
+ * This is where the main kernel code starts.
+ */
+__REF
+start_here_multiplatform:
+	/* Adjust TOC for moved kernel. Could adjust when moving it instead. */
+	bl	relative_toc
+
+	/* Clear out the BSS. It may have been done in prom_init,
+	 * already but that's irrelevant since prom_init will soon
+	 * be detached from the kernel completely. Besides, we need
+	 * to clear it now for kexec-style entry.
+	 */
+	LOAD_REG_ADDR(r11,__bss_stop)
+	LOAD_REG_ADDR(r8,__bss_start)
+	sub	r11,r11,r8		/* bss size			*/
+	addi	r11,r11,7		/* round up to an even double word */
+	srdi.	r11,r11,3		/* shift right by 3		*/
+	beq	4f
+	addi	r8,r8,-8
+	li	r0,0
+	mtctr	r11			/* zero this many doublewords	*/
+3:	stdu	r0,8(r8)
+	bdnz	3b
+4:
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
+	/* Setup OPAL entry */
+	LOAD_REG_ADDR(r11, opal)
+	std	r28,0(r11);
+	std	r29,8(r11);
+#endif
+
+#ifndef CONFIG_PPC_BOOK3E_64
+	mfmsr	r6
+	ori	r6,r6,MSR_RI
+	mtmsrd	r6			/* RI on */
+#endif
+
+#ifdef CONFIG_RELOCATABLE
+	/* Save the physical address we're running at in kernstart_addr */
+	LOAD_REG_ADDR(r4, kernstart_addr)
+	clrldi	r0,r25,2
+	std	r0,0(r4)
+#endif
+
+	/* set up a stack pointer */
+	LOAD_REG_ADDR(r3,init_thread_union)
+	LOAD_REG_IMMEDIATE(r1,THREAD_SIZE)
+	add	r1,r3,r1
+	li	r0,0
+	stdu	r0,-STACK_FRAME_MIN_SIZE(r1)
+
+	/*
+	 * Do very early kernel initializations, including initial hash table
+	 * and SLB setup before we turn on relocation.
+	 */
+
+#ifdef CONFIG_KASAN
+	bl	CFUNC(kasan_early_init)
+#endif
+	/* Restore parameters passed from prom_init/kexec */
+	mr	r3,r31
+	LOAD_REG_ADDR(r12, DOTSYM(early_setup))
+	mtctr	r12
+	bctrl		/* also sets r13 and SPRG_PACA */
+
+	LOAD_REG_ADDR(r3, start_here_common)
+	ld	r4,PACAKMSR(r13)
+	mtspr	SPRN_SRR0,r3
+	mtspr	SPRN_SRR1,r4
+	RFI_TO_KERNEL
+	b	.	/* prevent speculative execution */
+
+	/* This is where all platforms converge execution */
+
+start_here_common:
+	/* relocation is on at this point */
+	std	r1,PACAKSAVE(r13)
+
+	/* Load the TOC (virtual address) */
+	LOAD_PACA_TOC()
+
+	/* Mark interrupts soft and hard disabled (they might be enabled
+	 * in the PACA when doing hotplug)
+	 */
+	li	r0,IRQS_DISABLED
+	stb	r0,PACAIRQSOFTMASK(r13)
+	li	r0,PACA_IRQ_HARD_DIS
+	stb	r0,PACAIRQHAPPENED(r13)
+
+	/* Generic kernel entry */
+	bl	CFUNC(start_kernel)
+
+	/* Not reached */
+0:	trap
+	EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
+	.previous
diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S
new file mode 100644
index 0000000000..0f1641a312
--- /dev/null
+++ b/arch/powerpc/kernel/head_85xx.S
@@ -0,0 +1,1230 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Kernel execution entry point code.
+ *
+ *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
+ *	Initial PowerPC version.
+ *    Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *	Rewritten for PReP
+ *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ *	Low-level exception handers, MMU support, and rewrite.
+ *    Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
+ *	PowerPC 8xx modifications.
+ *    Copyright (c) 1998-1999 TiVo, Inc.
+ *	PowerPC 403GCX modifications.
+ *    Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
+ *	PowerPC 403GCX/405GP modifications.
+ *    Copyright 2000 MontaVista Software Inc.
+ *	PPC405 modifications
+ *	PowerPC 403GCX/405GP modifications.
+ *	Author: MontaVista Software, Inc.
+ *		frank_rowand@mvista.com or source@mvista.com
+ *		debbie_chu@mvista.com
+ *    Copyright 2002-2004 MontaVista Software, Inc.
+ *	PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
+ *    Copyright 2004 Freescale Semiconductor, Inc
+ *	PowerPC e500 modifications, Kumar Gala <galak@kernel.crashing.org>
+ */
+
+#include <linux/init.h>
+#include <linux/threads.h>
+#include <linux/pgtable.h>
+#include <linux/linkage.h>
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+#include <asm/ptrace.h>
+#include <asm/feature-fixups.h>
+#include "head_booke.h"
+
+/* As with the other PowerPC ports, it is expected that when code
+ * execution begins here, the following registers contain valid, yet
+ * optional, information:
+ *
+ *   r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.)
+ *   r4 - Starting address of the init RAM disk
+ *   r5 - Ending address of the init RAM disk
+ *   r6 - Start of kernel command line string (e.g. "mem=128")
+ *   r7 - End of kernel command line string
+ *
+ */
+	__HEAD
+_GLOBAL(_stext);
+_GLOBAL(_start);
+	/*
+	 * Reserve a word at a fixed location to store the address
+	 * of abatron_pteptrs
+	 */
+	nop
+
+	/* Translate device tree address to physical, save in r30/r31 */
+	bl	get_phys_addr
+	mr	r30,r3
+	mr	r31,r4
+
+	li	r25,0			/* phys kernel start (low) */
+	li	r24,0			/* CPU number */
+	li	r23,0			/* phys kernel start (high) */
+
+#ifdef CONFIG_RELOCATABLE
+	LOAD_REG_ADDR_PIC(r3, _stext)	/* Get our current runtime base */
+
+	/* Translate _stext address to physical, save in r23/r25 */
+	bl	get_phys_addr
+	mr	r23,r3
+	mr	r25,r4
+
+	bcl	20,31,$+4
+0:	mflr	r8
+	addis	r3,r8,(is_second_reloc - 0b)@ha
+	lwz	r19,(is_second_reloc - 0b)@l(r3)
+
+	/* Check if this is the second relocation. */
+	cmpwi	r19,1
+	bne	1f
+
+	/*
+	 * For the second relocation, we already get the real memstart_addr
+	 * from device tree. So we will map PAGE_OFFSET to memstart_addr,
+	 * then the virtual address of start kernel should be:
+	 *          PAGE_OFFSET + (kernstart_addr - memstart_addr)
+	 * Since the offset between kernstart_addr and memstart_addr should
+	 * never be beyond 1G, so we can just use the lower 32bit of them
+	 * for the calculation.
+	 */
+	lis	r3,PAGE_OFFSET@h
+
+	addis	r4,r8,(kernstart_addr - 0b)@ha
+	addi	r4,r4,(kernstart_addr - 0b)@l
+	lwz	r5,4(r4)
+
+	addis	r6,r8,(memstart_addr - 0b)@ha
+	addi	r6,r6,(memstart_addr - 0b)@l
+	lwz	r7,4(r6)
+
+	subf	r5,r7,r5
+	add	r3,r3,r5
+	b	2f
+
+1:
+	/*
+	 * We have the runtime (virtual) address of our base.
+	 * We calculate our shift of offset from a 64M page.
+	 * We could map the 64M page we belong to at PAGE_OFFSET and
+	 * get going from there.
+	 */
+	lis	r4,KERNELBASE@h
+	ori	r4,r4,KERNELBASE@l
+	rlwinm	r6,r25,0,0x3ffffff		/* r6 = PHYS_START % 64M */
+	rlwinm	r5,r4,0,0x3ffffff		/* r5 = KERNELBASE % 64M */
+	subf	r3,r5,r6			/* r3 = r6 - r5 */
+	add	r3,r4,r3			/* Required Virtual Address */
+
+2:	bl	relocate
+
+	/*
+	 * For the second relocation, we already set the right tlb entries
+	 * for the kernel space, so skip the code in 85xx_entry_mapping.S
+	*/
+	cmpwi	r19,1
+	beq	set_ivor
+#endif
+
+/* We try to not make any assumptions about how the boot loader
+ * setup or used the TLBs.  We invalidate all mappings from the
+ * boot loader and load a single entry in TLB1[0] to map the
+ * first 64M of kernel memory.  Any boot info passed from the
+ * bootloader needs to live in this first 64M.
+ *
+ * Requirement on bootloader:
+ *  - The page we're executing in needs to reside in TLB1 and
+ *    have IPROT=1.  If not an invalidate broadcast could
+ *    evict the entry we're currently executing in.
+ *
+ *  r3 = Index of TLB1 were executing in
+ *  r4 = Current MSR[IS]
+ *  r5 = Index of TLB1 temp mapping
+ *
+ * Later in mapin_ram we will correctly map lowmem, and resize TLB1[0]
+ * if needed
+ */
+
+_GLOBAL(__early_start)
+	LOAD_REG_ADDR_PIC(r20, kernstart_virt_addr)
+	lwz     r20,0(r20)
+
+#define ENTRY_MAPPING_BOOT_SETUP
+#include "85xx_entry_mapping.S"
+#undef ENTRY_MAPPING_BOOT_SETUP
+
+set_ivor:
+	/* Establish the interrupt vector offsets */
+	SET_IVOR(0,  CriticalInput);
+	SET_IVOR(1,  MachineCheck);
+	SET_IVOR(2,  DataStorage);
+	SET_IVOR(3,  InstructionStorage);
+	SET_IVOR(4,  ExternalInput);
+	SET_IVOR(5,  Alignment);
+	SET_IVOR(6,  Program);
+	SET_IVOR(7,  FloatingPointUnavailable);
+	SET_IVOR(8,  SystemCall);
+	SET_IVOR(9,  AuxillaryProcessorUnavailable);
+	SET_IVOR(10, Decrementer);
+	SET_IVOR(11, FixedIntervalTimer);
+	SET_IVOR(12, WatchdogTimer);
+	SET_IVOR(13, DataTLBError);
+	SET_IVOR(14, InstructionTLBError);
+	SET_IVOR(15, DebugCrit);
+
+	/* Establish the interrupt vector base */
+	lis	r4,interrupt_base@h	/* IVPR only uses the high 16-bits */
+	mtspr	SPRN_IVPR,r4
+
+	/* Setup the defaults for TLB entries */
+	li	r2,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l
+	mtspr	SPRN_MAS4, r2
+
+#if !defined(CONFIG_BDI_SWITCH)
+	/*
+	 * The Abatron BDI JTAG debugger does not tolerate others
+	 * mucking with the debug registers.
+	 */
+	lis	r2,DBCR0_IDM@h
+	mtspr	SPRN_DBCR0,r2
+	isync
+	/* clear any residual debug events */
+	li	r2,-1
+	mtspr	SPRN_DBSR,r2
+#endif
+
+#ifdef CONFIG_SMP
+	/* Check to see if we're the second processor, and jump
+	 * to the secondary_start code if so
+	 */
+	LOAD_REG_ADDR_PIC(r24, boot_cpuid)
+	lwz	r24, 0(r24)
+	cmpwi	r24, -1
+	mfspr   r24,SPRN_PIR
+	bne	__secondary_start
+#endif
+
+	/*
+	 * This is where the main kernel code starts.
+	 */
+
+	/* ptr to current */
+	lis	r2,init_task@h
+	ori	r2,r2,init_task@l
+
+	/* ptr to current thread */
+	addi	r4,r2,THREAD	/* init task's THREAD */
+	mtspr	SPRN_SPRG_THREAD,r4
+
+	/* stack */
+	lis	r1,init_thread_union@h
+	ori	r1,r1,init_thread_union@l
+	li	r0,0
+	stwu	r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
+
+#ifdef CONFIG_SMP
+	stw	r24, TASK_CPU(r2)
+#endif
+
+	bl	early_init
+
+#ifdef CONFIG_KASAN
+	bl	kasan_early_init
+#endif
+#ifdef CONFIG_RELOCATABLE
+	mr	r3,r30
+	mr	r4,r31
+#ifdef CONFIG_PHYS_64BIT
+	mr	r5,r23
+	mr	r6,r25
+#else
+	mr	r5,r25
+#endif
+	bl	relocate_init
+#endif
+
+#ifdef CONFIG_DYNAMIC_MEMSTART
+	lis	r3,kernstart_addr@ha
+	la	r3,kernstart_addr@l(r3)
+#ifdef CONFIG_PHYS_64BIT
+	stw	r23,0(r3)
+	stw	r25,4(r3)
+#else
+	stw	r25,0(r3)
+#endif
+#endif
+
+/*
+ * Decide what sort of machine this is and initialize the MMU.
+ */
+	mr	r3,r30
+	mr	r4,r31
+	bl	machine_init
+	bl	MMU_init
+
+	/* Setup PTE pointers for the Abatron bdiGDB */
+	lis	r6, swapper_pg_dir@h
+	ori	r6, r6, swapper_pg_dir@l
+	lis	r5, abatron_pteptrs@h
+	ori	r5, r5, abatron_pteptrs@l
+	lis     r3, kernstart_virt_addr@ha
+	lwz     r4, kernstart_virt_addr@l(r3)
+	stw	r5, 0(r4)	/* Save abatron_pteptrs at a fixed location */
+	stw	r6, 0(r5)
+
+	/* Let's move on */
+	lis	r4,start_kernel@h
+	ori	r4,r4,start_kernel@l
+	lis	r3,MSR_KERNEL@h
+	ori	r3,r3,MSR_KERNEL@l
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	rfi			/* change context and jump to start_kernel */
+
+/* Macros to hide the PTE size differences
+ *
+ * FIND_PTE -- walks the page tables given EA & pgdir pointer
+ *   r10 -- EA of fault
+ *   r11 -- PGDIR pointer
+ *   r12 -- free
+ *   label 2: is the bailout case
+ *
+ * if we find the pte (fall through):
+ *   r11 is low pte word
+ *   r12 is pointer to the pte
+ *   r10 is the pshift from the PGD, if we're a hugepage
+ */
+#ifdef CONFIG_PTE_64BIT
+#ifdef CONFIG_HUGETLB_PAGE
+#define FIND_PTE	\
+	rlwinm	r12, r10, 13, 19, 29;	/* Compute pgdir/pmd offset */	\
+	lwzx	r11, r12, r11;		/* Get pgd/pmd entry */		\
+	rlwinm.	r12, r11, 0, 0, 20;	/* Extract pt base address */	\
+	blt	1000f;			/* Normal non-huge page */	\
+	beq	2f;			/* Bail if no table */		\
+	oris	r11, r11, PD_HUGE@h;	/* Put back address bit */	\
+	andi.	r10, r11, HUGEPD_SHIFT_MASK@l; /* extract size field */	\
+	xor	r12, r10, r11;		/* drop size bits from pointer */ \
+	b	1001f;							\
+1000:	rlwimi	r12, r10, 23, 20, 28;	/* Compute pte address */	\
+	li	r10, 0;			/* clear r10 */			\
+1001:	lwz	r11, 4(r12);		/* Get pte entry */
+#else
+#define FIND_PTE	\
+	rlwinm	r12, r10, 13, 19, 29;	/* Compute pgdir/pmd offset */	\
+	lwzx	r11, r12, r11;		/* Get pgd/pmd entry */		\
+	rlwinm.	r12, r11, 0, 0, 20;	/* Extract pt base address */	\
+	beq	2f;			/* Bail if no table */		\
+	rlwimi	r12, r10, 23, 20, 28;	/* Compute pte address */	\
+	lwz	r11, 4(r12);		/* Get pte entry */
+#endif /* HUGEPAGE */
+#else /* !PTE_64BIT */
+#define FIND_PTE	\
+	rlwimi	r11, r10, 12, 20, 29;	/* Create L1 (pgdir/pmd) address */	\
+	lwz	r11, 0(r11);		/* Get L1 entry */			\
+	rlwinm.	r12, r11, 0, 0, 19;	/* Extract L2 (pte) base address */	\
+	beq	2f;			/* Bail if no table */			\
+	rlwimi	r12, r10, 22, 20, 29;	/* Compute PTE address */		\
+	lwz	r11, 0(r12);		/* Get Linux PTE */
+#endif
+
+/*
+ * Interrupt vector entry code
+ *
+ * The Book E MMUs are always on so we don't need to handle
+ * interrupts in real mode as with previous PPC processors. In
+ * this case we handle interrupts in the kernel virtual address
+ * space.
+ *
+ * Interrupt vectors are dynamically placed relative to the
+ * interrupt prefix as determined by the address of interrupt_base.
+ * The interrupt vectors offsets are programmed using the labels
+ * for each interrupt vector entry.
+ *
+ * Interrupt vectors must be aligned on a 16 byte boundary.
+ * We align on a 32 byte cache line boundary for good measure.
+ */
+
+interrupt_base:
+	/* Critical Input Interrupt */
+	CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
+
+	/* Machine Check Interrupt */
+	MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
+
+	/* Data Storage Interrupt */
+	START_EXCEPTION(DataStorage)
+	NORMAL_EXCEPTION_PROLOG(0x300, DATA_STORAGE)
+	mfspr	r5,SPRN_ESR		/* Grab the ESR, save it */
+	stw	r5,_ESR(r11)
+	mfspr	r4,SPRN_DEAR		/* Grab the DEAR, save it */
+	stw	r4, _DEAR(r11)
+	andis.	r10,r5,(ESR_ILK|ESR_DLK)@h
+	bne	1f
+	prepare_transfer_to_handler
+	bl	do_page_fault
+	b	interrupt_return
+1:
+	prepare_transfer_to_handler
+	bl	CacheLockingException
+	b	interrupt_return
+
+	/* Instruction Storage Interrupt */
+	INSTRUCTION_STORAGE_EXCEPTION
+
+	/* External Input Interrupt */
+	EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ)
+
+	/* Alignment Interrupt */
+	ALIGNMENT_EXCEPTION
+
+	/* Program Interrupt */
+	PROGRAM_EXCEPTION
+
+	/* Floating Point Unavailable Interrupt */
+#ifdef CONFIG_PPC_FPU
+	FP_UNAVAILABLE_EXCEPTION
+#else
+	EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, emulation_assist_interrupt)
+#endif
+
+	/* System Call Interrupt */
+	START_EXCEPTION(SystemCall)
+	SYSCALL_ENTRY   0xc00 BOOKE_INTERRUPT_SYSCALL SPRN_SRR1
+
+	/* Auxiliary Processor Unavailable Interrupt */
+	EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, unknown_exception)
+
+	/* Decrementer Interrupt */
+	DECREMENTER_EXCEPTION
+
+	/* Fixed Internal Timer Interrupt */
+	/* TODO: Add FIT support */
+	EXCEPTION(0x3100, FIT, FixedIntervalTimer, unknown_exception)
+
+	/* Watchdog Timer Interrupt */
+#ifdef CONFIG_BOOKE_WDT
+	CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, WatchdogException)
+#else
+	CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, unknown_exception)
+#endif
+
+	/* Data TLB Error Interrupt */
+	START_EXCEPTION(DataTLBError)
+	mtspr	SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
+	mfspr	r10, SPRN_SPRG_THREAD
+	stw	r11, THREAD_NORMSAVE(0)(r10)
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+	mfspr	r11, SPRN_SRR1
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+#endif
+	stw	r12, THREAD_NORMSAVE(1)(r10)
+	stw	r13, THREAD_NORMSAVE(2)(r10)
+	mfcr	r13
+	stw	r13, THREAD_NORMSAVE(3)(r10)
+	DO_KVM	BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1
+START_BTB_FLUSH_SECTION
+	mfspr r11, SPRN_SRR1
+	andi. r10,r11,MSR_PR
+	beq 1f
+	BTB_FLUSH(r10)
+1:
+END_BTB_FLUSH_SECTION
+	mfspr	r10, SPRN_DEAR		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11, PAGE_OFFSET@h
+	cmplw	5, r10, r11
+	blt	5, 3f
+	lis	r11, swapper_pg_dir@h
+	ori	r11, r11, swapper_pg_dir@l
+
+	mfspr	r12,SPRN_MAS1		/* Set TID to 0 */
+	rlwinm	r12,r12,0,16,1
+	mtspr	SPRN_MAS1,r12
+
+	b	4f
+
+	/* Get the PGD for the current thread */
+3:
+	mfspr	r11,SPRN_SPRG_THREAD
+	lwz	r11,PGDIR(r11)
+
+#ifdef CONFIG_PPC_KUAP
+	mfspr	r12, SPRN_MAS1
+	rlwinm.	r12,r12,0,0x3fff0000
+	beq	2f			/* KUAP fault */
+#endif
+
+4:
+	/* Mask of required permission bits. Note that while we
+	 * do copy ESR:ST to _PAGE_RW position as trying to write
+	 * to an RO page is pretty common, we don't do it with
+	 * _PAGE_DIRTY. We could do it, but it's a fairly rare
+	 * event so I'd rather take the overhead when it happens
+	 * rather than adding an instruction here. We should measure
+	 * whether the whole thing is worth it in the first place
+	 * as we could avoid loading SPRN_ESR completely in the first
+	 * place...
+	 *
+	 * TODO: Is it worth doing that mfspr & rlwimi in the first
+	 *       place or can we save a couple of instructions here ?
+	 */
+	mfspr	r12,SPRN_ESR
+#ifdef CONFIG_PTE_64BIT
+	li	r13,_PAGE_PRESENT
+	oris	r13,r13,_PAGE_ACCESSED@h
+#else
+	li	r13,_PAGE_PRESENT|_PAGE_ACCESSED
+#endif
+	rlwimi	r13,r12,11,29,29
+
+	FIND_PTE
+	andc.	r13,r13,r11		/* Check permission */
+
+#ifdef CONFIG_PTE_64BIT
+#ifdef CONFIG_SMP
+	subf	r13,r11,r12		/* create false data dep */
+	lwzx	r13,r11,r13		/* Get upper pte bits */
+#else
+	lwz	r13,0(r12)		/* Get upper pte bits */
+#endif
+#endif
+
+	bne	2f			/* Bail if permission/valid mismatch */
+
+	/* Jump to common tlb load */
+	b	finish_tlb_load
+2:
+	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+	mfspr	r10, SPRN_SPRG_THREAD
+	lwz	r11, THREAD_NORMSAVE(3)(r10)
+	mtcr	r11
+	lwz	r13, THREAD_NORMSAVE(2)(r10)
+	lwz	r12, THREAD_NORMSAVE(1)(r10)
+	lwz	r11, THREAD_NORMSAVE(0)(r10)
+	mfspr	r10, SPRN_SPRG_RSCRATCH0
+	b	DataStorage
+
+	/* Instruction TLB Error Interrupt */
+	/*
+	 * Nearly the same as above, except we get our
+	 * information from different registers and bailout
+	 * to a different point.
+	 */
+	START_EXCEPTION(InstructionTLBError)
+	mtspr	SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
+	mfspr	r10, SPRN_SPRG_THREAD
+	stw	r11, THREAD_NORMSAVE(0)(r10)
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+	mfspr	r11, SPRN_SRR1
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+#endif
+	stw	r12, THREAD_NORMSAVE(1)(r10)
+	stw	r13, THREAD_NORMSAVE(2)(r10)
+	mfcr	r13
+	stw	r13, THREAD_NORMSAVE(3)(r10)
+	DO_KVM	BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR1
+START_BTB_FLUSH_SECTION
+	mfspr r11, SPRN_SRR1
+	andi. r10,r11,MSR_PR
+	beq 1f
+	BTB_FLUSH(r10)
+1:
+END_BTB_FLUSH_SECTION
+
+	mfspr	r10, SPRN_SRR0		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11, PAGE_OFFSET@h
+	cmplw	5, r10, r11
+	blt	5, 3f
+	lis	r11, swapper_pg_dir@h
+	ori	r11, r11, swapper_pg_dir@l
+
+	mfspr	r12,SPRN_MAS1		/* Set TID to 0 */
+	rlwinm	r12,r12,0,16,1
+	mtspr	SPRN_MAS1,r12
+
+	/* Make up the required permissions for kernel code */
+#ifdef CONFIG_PTE_64BIT
+	li	r13,_PAGE_PRESENT | _PAGE_BAP_SX
+	oris	r13,r13,_PAGE_ACCESSED@h
+#else
+	li	r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+#endif
+	b	4f
+
+	/* Get the PGD for the current thread */
+3:
+	mfspr	r11,SPRN_SPRG_THREAD
+	lwz	r11,PGDIR(r11)
+
+#ifdef CONFIG_PPC_KUAP
+	mfspr	r12, SPRN_MAS1
+	rlwinm.	r12,r12,0,0x3fff0000
+	beq	2f			/* KUAP fault */
+#endif
+
+	/* Make up the required permissions for user code */
+#ifdef CONFIG_PTE_64BIT
+	li	r13,_PAGE_PRESENT | _PAGE_BAP_UX
+	oris	r13,r13,_PAGE_ACCESSED@h
+#else
+	li	r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+#endif
+
+4:
+	FIND_PTE
+	andc.	r13,r13,r11		/* Check permission */
+
+#ifdef CONFIG_PTE_64BIT
+#ifdef CONFIG_SMP
+	subf	r13,r11,r12		/* create false data dep */
+	lwzx	r13,r11,r13		/* Get upper pte bits */
+#else
+	lwz	r13,0(r12)		/* Get upper pte bits */
+#endif
+#endif
+
+	bne	2f			/* Bail if permission mismatch */
+
+	/* Jump to common TLB load point */
+	b	finish_tlb_load
+
+2:
+	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+	mfspr	r10, SPRN_SPRG_THREAD
+	lwz	r11, THREAD_NORMSAVE(3)(r10)
+	mtcr	r11
+	lwz	r13, THREAD_NORMSAVE(2)(r10)
+	lwz	r12, THREAD_NORMSAVE(1)(r10)
+	lwz	r11, THREAD_NORMSAVE(0)(r10)
+	mfspr	r10, SPRN_SPRG_RSCRATCH0
+	b	InstructionStorage
+
+/* Define SPE handlers for e500v2 */
+#ifdef CONFIG_SPE
+	/* SPE Unavailable */
+	START_EXCEPTION(SPEUnavailable)
+	NORMAL_EXCEPTION_PROLOG(0x2010, SPE_UNAVAIL)
+	beq	1f
+	bl	load_up_spe
+	b	fast_exception_return
+1:	prepare_transfer_to_handler
+	bl	KernelSPE
+	b	interrupt_return
+#elif defined(CONFIG_SPE_POSSIBLE)
+	EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, unknown_exception)
+#endif /* CONFIG_SPE_POSSIBLE */
+
+	/* SPE Floating Point Data */
+#ifdef CONFIG_SPE
+	START_EXCEPTION(SPEFloatingPointData)
+	NORMAL_EXCEPTION_PROLOG(0x2030, SPE_FP_DATA)
+	prepare_transfer_to_handler
+	bl	SPEFloatingPointException
+	REST_NVGPRS(r1)
+	b	interrupt_return
+
+	/* SPE Floating Point Round */
+	START_EXCEPTION(SPEFloatingPointRound)
+	NORMAL_EXCEPTION_PROLOG(0x2050, SPE_FP_ROUND)
+	prepare_transfer_to_handler
+	bl	SPEFloatingPointRoundException
+	REST_NVGPRS(r1)
+	b	interrupt_return
+#elif defined(CONFIG_SPE_POSSIBLE)
+	EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, unknown_exception)
+	EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, unknown_exception)
+#endif /* CONFIG_SPE_POSSIBLE */
+
+
+	/* Performance Monitor */
+	EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \
+		  performance_monitor_exception)
+
+	EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception)
+
+	CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \
+			   CriticalDoorbell, unknown_exception)
+
+	/* Debug Interrupt */
+	DEBUG_DEBUG_EXCEPTION
+	DEBUG_CRIT_EXCEPTION
+
+	GUEST_DOORBELL_EXCEPTION
+
+	CRITICAL_EXCEPTION(0, GUEST_DBELL_CRIT, CriticalGuestDoorbell, \
+			   unknown_exception)
+
+	/* Hypercall */
+	EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception)
+
+	/* Embedded Hypervisor Privilege */
+	EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception)
+
+interrupt_end:
+
+/*
+ * Local functions
+ */
+
+/*
+ * Both the instruction and data TLB miss get to this
+ * point to load the TLB.
+ *	r10 - tsize encoding (if HUGETLB_PAGE) or available to use
+ *	r11 - TLB (info from Linux PTE)
+ *	r12 - available to use
+ *	r13 - upper bits of PTE (if PTE_64BIT) or available to use
+ *	CR5 - results of addr >= PAGE_OFFSET
+ *	MAS0, MAS1 - loaded with proper value when we get here
+ *	MAS2, MAS3 - will need additional info from Linux PTE
+ *	Upon exit, we reload everything and RFI.
+ */
+finish_tlb_load:
+#ifdef CONFIG_HUGETLB_PAGE
+	cmpwi	6, r10, 0			/* check for huge page */
+	beq	6, finish_tlb_load_cont    	/* !huge */
+
+	/* Alas, we need more scratch registers for hugepages */
+	mfspr	r12, SPRN_SPRG_THREAD
+	stw	r14, THREAD_NORMSAVE(4)(r12)
+	stw	r15, THREAD_NORMSAVE(5)(r12)
+	stw	r16, THREAD_NORMSAVE(6)(r12)
+	stw	r17, THREAD_NORMSAVE(7)(r12)
+
+	/* Get the next_tlbcam_idx percpu var */
+#ifdef CONFIG_SMP
+	lwz	r15, TASK_CPU-THREAD(r12)
+	lis     r14, __per_cpu_offset@h
+	ori     r14, r14, __per_cpu_offset@l
+	rlwinm  r15, r15, 2, 0, 29
+	lwzx    r16, r14, r15
+#else
+	li	r16, 0
+#endif
+	lis     r17, next_tlbcam_idx@h
+	ori	r17, r17, next_tlbcam_idx@l
+	add	r17, r17, r16			/* r17 = *next_tlbcam_idx */
+	lwz     r15, 0(r17)			/* r15 = next_tlbcam_idx */
+
+	lis	r14, MAS0_TLBSEL(1)@h		/* select TLB1 (TLBCAM) */
+	rlwimi	r14, r15, 16, 4, 15		/* next_tlbcam_idx entry */
+	mtspr	SPRN_MAS0, r14
+
+	/* Extract TLB1CFG(NENTRY) */
+	mfspr	r16, SPRN_TLB1CFG
+	andi.	r16, r16, 0xfff
+
+	/* Update next_tlbcam_idx, wrapping when necessary */
+	addi	r15, r15, 1
+	cmpw	r15, r16
+	blt 	100f
+	lis	r14, tlbcam_index@h
+	ori	r14, r14, tlbcam_index@l
+	lwz	r15, 0(r14)
+100:	stw	r15, 0(r17)
+
+	/*
+	 * Calc MAS1_TSIZE from r10 (which has pshift encoded)
+	 * tlb_enc = (pshift - 10).
+	 */
+	subi	r15, r10, 10
+	mfspr	r16, SPRN_MAS1
+	rlwimi	r16, r15, 7, 20, 24
+	mtspr	SPRN_MAS1, r16
+
+	/* copy the pshift for use later */
+	mr	r14, r10
+
+	/* fall through */
+
+#endif /* CONFIG_HUGETLB_PAGE */
+
+	/*
+	 * We set execute, because we don't have the granularity to
+	 * properly set this at the page level (Linux problem).
+	 * Many of these bits are software only.  Bits we don't set
+	 * here we (properly should) assume have the appropriate value.
+	 */
+finish_tlb_load_cont:
+#ifdef CONFIG_PTE_64BIT
+	rlwinm	r12, r11, 32-2, 26, 31	/* Move in perm bits */
+	andi.	r10, r11, _PAGE_DIRTY
+	bne	1f
+	li	r10, MAS3_SW | MAS3_UW
+	andc	r12, r12, r10
+1:	rlwimi	r12, r13, 20, 0, 11	/* grab RPN[32:43] */
+	rlwimi	r12, r11, 20, 12, 19	/* grab RPN[44:51] */
+2:	mtspr	SPRN_MAS3, r12
+BEGIN_MMU_FTR_SECTION
+	srwi	r10, r13, 12		/* grab RPN[12:31] */
+	mtspr	SPRN_MAS7, r10
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
+#else
+	li	r10, (_PAGE_EXEC | _PAGE_PRESENT)
+	mr	r13, r11
+	rlwimi	r10, r11, 31, 29, 29	/* extract _PAGE_DIRTY into SW */
+	and	r12, r11, r10
+	andi.	r10, r11, _PAGE_USER	/* Test for _PAGE_USER */
+	slwi	r10, r12, 1
+	or	r10, r10, r12
+	rlwinm	r10, r10, 0, ~_PAGE_EXEC	/* Clear SX on user pages */
+	iseleq	r12, r12, r10
+	rlwimi	r13, r12, 0, 20, 31	/* Get RPN from PTE, merge w/ perms */
+	mtspr	SPRN_MAS3, r13
+#endif
+
+	mfspr	r12, SPRN_MAS2
+#ifdef CONFIG_PTE_64BIT
+	rlwimi	r12, r11, 32-19, 27, 31	/* extract WIMGE from pte */
+#else
+	rlwimi	r12, r11, 26, 27, 31	/* extract WIMGE from pte */
+#endif
+#ifdef CONFIG_HUGETLB_PAGE
+	beq	6, 3f			/* don't mask if page isn't huge */
+	li	r13, 1
+	slw	r13, r13, r14
+	subi	r13, r13, 1
+	rlwinm	r13, r13, 0, 0, 19	/* bottom bits used for WIMGE/etc */
+	andc	r12, r12, r13		/* mask off ea bits within the page */
+#endif
+3:	mtspr	SPRN_MAS2, r12
+
+tlb_write_entry:
+	tlbwe
+
+	/* Done...restore registers and get out of here.  */
+	mfspr	r10, SPRN_SPRG_THREAD
+#ifdef CONFIG_HUGETLB_PAGE
+	beq	6, 8f /* skip restore for 4k page faults */
+	lwz	r14, THREAD_NORMSAVE(4)(r10)
+	lwz	r15, THREAD_NORMSAVE(5)(r10)
+	lwz	r16, THREAD_NORMSAVE(6)(r10)
+	lwz	r17, THREAD_NORMSAVE(7)(r10)
+#endif
+8:	lwz	r11, THREAD_NORMSAVE(3)(r10)
+	mtcr	r11
+	lwz	r13, THREAD_NORMSAVE(2)(r10)
+	lwz	r12, THREAD_NORMSAVE(1)(r10)
+	lwz	r11, THREAD_NORMSAVE(0)(r10)
+	mfspr	r10, SPRN_SPRG_RSCRATCH0
+	rfi					/* Force context change */
+
+#ifdef CONFIG_SPE
+/* Note that the SPE support is closely modeled after the AltiVec
+ * support.  Changes to one are likely to be applicable to the
+ * other!  */
+_GLOBAL(load_up_spe)
+/*
+ * Disable SPE for the task which had SPE previously,
+ * and save its SPE registers in its thread_struct.
+ * Enables SPE for use in the kernel on return.
+ * On SMP we know the SPE units are free, since we give it up every
+ * switch.  -- Kumar
+ */
+	mfmsr	r5
+	oris	r5,r5,MSR_SPE@h
+	mtmsr	r5			/* enable use of SPE now */
+	isync
+	/* enable use of SPE after return */
+	oris	r9,r9,MSR_SPE@h
+	mfspr	r5,SPRN_SPRG_THREAD	/* current task's THREAD (phys) */
+	li	r4,1
+	li	r10,THREAD_ACC
+	stw	r4,THREAD_USED_SPE(r5)
+	evlddx	evr4,r10,r5
+	evmra	evr4,evr4
+	REST_32EVRS(0,r10,r5,THREAD_EVR0)
+	blr
+
+/*
+ * SPE unavailable trap from kernel - print a message, but let
+ * the task use SPE in the kernel until it returns to user mode.
+ */
+SYM_FUNC_START_LOCAL(KernelSPE)
+	lwz	r3,_MSR(r1)
+	oris	r3,r3,MSR_SPE@h
+	stw	r3,_MSR(r1)	/* enable use of SPE after return */
+#ifdef CONFIG_PRINTK
+	lis	r3,87f@h
+	ori	r3,r3,87f@l
+	mr	r4,r2		/* current */
+	lwz	r5,_NIP(r1)
+	bl	_printk
+#endif
+	b	interrupt_return
+#ifdef CONFIG_PRINTK
+87:	.string	"SPE used in kernel  (task=%p, pc=%x)  \n"
+#endif
+	.align	4,0
+
+SYM_FUNC_END(KernelSPE)
+#endif /* CONFIG_SPE */
+
+/*
+ * Translate the effec addr in r3 to phys addr. The phys addr will be put
+ * into r3(higher 32bit) and r4(lower 32bit)
+ */
+SYM_FUNC_START_LOCAL(get_phys_addr)
+	mfmsr	r8
+	mfspr	r9,SPRN_PID
+	rlwinm	r9,r9,16,0x3fff0000	/* turn PID into MAS6[SPID] */
+	rlwimi	r9,r8,28,0x00000001	/* turn MSR[DS] into MAS6[SAS] */
+	mtspr	SPRN_MAS6,r9
+
+	tlbsx	0,r3			/* must succeed */
+
+	mfspr	r8,SPRN_MAS1
+	mfspr	r12,SPRN_MAS3
+	rlwinm	r9,r8,25,0x1f		/* r9 = log2(page size) */
+	li	r10,1024
+	slw	r10,r10,r9		/* r10 = page size */
+	addi	r10,r10,-1
+	and	r11,r3,r10		/* r11 = page offset */
+	andc	r4,r12,r10		/* r4 = page base */
+	or	r4,r4,r11		/* r4 = devtree phys addr */
+#ifdef CONFIG_PHYS_64BIT
+	mfspr	r3,SPRN_MAS7
+#endif
+	blr
+SYM_FUNC_END(get_phys_addr)
+
+/*
+ * Global functions
+ */
+
+#ifdef CONFIG_PPC_E500
+#ifndef CONFIG_PPC_E500MC
+/* Adjust or setup IVORs for e500v1/v2 */
+_GLOBAL(__setup_e500_ivors)
+	li	r3,DebugCrit@l
+	mtspr	SPRN_IVOR15,r3
+	li	r3,SPEUnavailable@l
+	mtspr	SPRN_IVOR32,r3
+	li	r3,SPEFloatingPointData@l
+	mtspr	SPRN_IVOR33,r3
+	li	r3,SPEFloatingPointRound@l
+	mtspr	SPRN_IVOR34,r3
+	li	r3,PerformanceMonitor@l
+	mtspr	SPRN_IVOR35,r3
+	sync
+	blr
+#else
+/* Adjust or setup IVORs for e500mc */
+_GLOBAL(__setup_e500mc_ivors)
+	li	r3,DebugDebug@l
+	mtspr	SPRN_IVOR15,r3
+	li	r3,PerformanceMonitor@l
+	mtspr	SPRN_IVOR35,r3
+	li	r3,Doorbell@l
+	mtspr	SPRN_IVOR36,r3
+	li	r3,CriticalDoorbell@l
+	mtspr	SPRN_IVOR37,r3
+	sync
+	blr
+
+/* setup ehv ivors for */
+_GLOBAL(__setup_ehv_ivors)
+	li	r3,GuestDoorbell@l
+	mtspr	SPRN_IVOR38,r3
+	li	r3,CriticalGuestDoorbell@l
+	mtspr	SPRN_IVOR39,r3
+	li	r3,Hypercall@l
+	mtspr	SPRN_IVOR40,r3
+	li	r3,Ehvpriv@l
+	mtspr	SPRN_IVOR41,r3
+	sync
+	blr
+#endif /* CONFIG_PPC_E500MC */
+#endif /* CONFIG_PPC_E500 */
+
+#ifdef CONFIG_SPE
+/*
+ * extern void __giveup_spe(struct task_struct *prev)
+ *
+ */
+_GLOBAL(__giveup_spe)
+	addi	r3,r3,THREAD		/* want THREAD of task */
+	lwz	r5,PT_REGS(r3)
+	cmpi	0,r5,0
+	SAVE_32EVRS(0, r4, r3, THREAD_EVR0)
+	evxor	evr6, evr6, evr6	/* clear out evr6 */
+	evmwumiaa evr6, evr6, evr6	/* evr6 <- ACC = 0 * 0 + ACC */
+	li	r4,THREAD_ACC
+	evstddx	evr6, r4, r3		/* save off accumulator */
+	beq	1f
+	lwz	r4,_MSR-STACK_INT_FRAME_REGS(r5)
+	lis	r3,MSR_SPE@h
+	andc	r4,r4,r3		/* disable SPE for previous task */
+	stw	r4,_MSR-STACK_INT_FRAME_REGS(r5)
+1:
+	blr
+#endif /* CONFIG_SPE */
+
+/*
+ * extern void abort(void)
+ *
+ * At present, this routine just applies a system reset.
+ */
+_GLOBAL(abort)
+	li	r13,0
+	mtspr	SPRN_DBCR0,r13		/* disable all debug events */
+	isync
+	mfmsr	r13
+	ori	r13,r13,MSR_DE@l	/* Enable Debug Events */
+	mtmsr	r13
+	isync
+	mfspr	r13,SPRN_DBCR0
+	lis	r13,(DBCR0_IDM|DBCR0_RST_CHIP)@h
+	mtspr	SPRN_DBCR0,r13
+	isync
+
+#ifdef CONFIG_SMP
+/* When we get here, r24 needs to hold the CPU # */
+	.globl __secondary_start
+__secondary_start:
+	LOAD_REG_ADDR_PIC(r3, tlbcam_index)
+	lwz	r3,0(r3)
+	mtctr	r3
+	li	r26,0		/* r26 safe? */
+
+	bl	switch_to_as1
+	mr	r27,r3		/* tlb entry */
+	/* Load each CAM entry */
+1:	mr	r3,r26
+	bl	loadcam_entry
+	addi	r26,r26,1
+	bdnz	1b
+	mr	r3,r27		/* tlb entry */
+	LOAD_REG_ADDR_PIC(r4, memstart_addr)
+	lwz	r4,0(r4)
+	mr	r5,r25		/* phys kernel start */
+	rlwinm	r5,r5,0,~0x3ffffff	/* aligned 64M */
+	subf	r4,r5,r4	/* memstart_addr - phys kernel start */
+	lis	r7,KERNELBASE@h
+	ori	r7,r7,KERNELBASE@l
+	cmpw	r20,r7		/* if kernstart_virt_addr != KERNELBASE, randomized */
+	beq	2f
+	li	r4,0
+2:	li	r5,0		/* no device tree */
+	li	r6,0		/* not boot cpu */
+	bl	restore_to_as0
+
+
+	lis	r3,__secondary_hold_acknowledge@h
+	ori	r3,r3,__secondary_hold_acknowledge@l
+	stw	r24,0(r3)
+
+	li	r3,0
+	mr	r4,r24		/* Why? */
+	bl	call_setup_cpu
+
+	/* get current's stack and current */
+	lis	r2,secondary_current@ha
+	lwz	r2,secondary_current@l(r2)
+	lwz	r1,TASK_STACK(r2)
+
+	/* stack */
+	addi	r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
+	li	r0,0
+	stw	r0,0(r1)
+
+	/* ptr to current thread */
+	addi	r4,r2,THREAD	/* address of our thread_struct */
+	mtspr	SPRN_SPRG_THREAD,r4
+
+	/* Setup the defaults for TLB entries */
+	li	r4,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l
+	mtspr	SPRN_MAS4,r4
+
+	/* Jump to start_secondary */
+	lis	r4,MSR_KERNEL@h
+	ori	r4,r4,MSR_KERNEL@l
+	lis	r3,start_secondary@h
+	ori	r3,r3,start_secondary@l
+	mtspr	SPRN_SRR0,r3
+	mtspr	SPRN_SRR1,r4
+	sync
+	rfi
+	sync
+
+	.globl __secondary_hold_acknowledge
+__secondary_hold_acknowledge:
+	.long	-1
+#endif
+
+/*
+ * Create a 64M tlb by address and entry
+ * r3 - entry
+ * r4 - virtual address
+ * r5/r6 - physical address
+ */
+_GLOBAL(create_kaslr_tlb_entry)
+	lis     r7,0x1000               /* Set MAS0(TLBSEL) = 1 */
+	rlwimi  r7,r3,16,4,15           /* Setup MAS0 = TLBSEL | ESEL(r6) */
+	mtspr   SPRN_MAS0,r7            /* Write MAS0 */
+
+	lis     r3,(MAS1_VALID|MAS1_IPROT)@h
+	ori     r3,r3,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
+	mtspr   SPRN_MAS1,r3            /* Write MAS1 */
+
+	lis     r3,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h
+	ori     r3,r3,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l
+	and     r3,r3,r4
+	ori	r3,r3,MAS2_M_IF_NEEDED@l
+	mtspr   SPRN_MAS2,r3            /* Write MAS2(EPN) */
+
+#ifdef CONFIG_PHYS_64BIT
+	ori     r8,r6,(MAS3_SW|MAS3_SR|MAS3_SX)
+	mtspr   SPRN_MAS3,r8            /* Write MAS3(RPN) */
+	mtspr	SPRN_MAS7,r5
+#else
+	ori     r8,r5,(MAS3_SW|MAS3_SR|MAS3_SX)
+	mtspr   SPRN_MAS3,r8            /* Write MAS3(RPN) */
+#endif
+
+	tlbwe                           /* Write TLB */
+	isync
+	sync
+	blr
+
+/*
+ * Return to the start of the relocated kernel and run again
+ * r3 - virtual address of fdt
+ * r4 - entry of the kernel
+ */
+_GLOBAL(reloc_kernel_entry)
+	mfmsr	r7
+	rlwinm	r7, r7, 0, ~(MSR_IS | MSR_DS)
+
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r7
+	rfi
+
+/*
+ * Create a tlb entry with the same effective and physical address as
+ * the tlb entry used by the current running code. But set the TS to 1.
+ * Then switch to the address space 1. It will return with the r3 set to
+ * the ESEL of the new created tlb.
+ */
+_GLOBAL(switch_to_as1)
+	mflr	r5
+
+	/* Find a entry not used */
+	mfspr	r3,SPRN_TLB1CFG
+	andi.	r3,r3,0xfff
+	mfspr	r4,SPRN_PID
+	rlwinm	r4,r4,16,0x3fff0000	/* turn PID into MAS6[SPID] */
+	mtspr	SPRN_MAS6,r4
+1:	lis	r4,0x1000		/* Set MAS0(TLBSEL) = 1 */
+	addi	r3,r3,-1
+	rlwimi	r4,r3,16,4,15		/* Setup MAS0 = TLBSEL | ESEL(r3) */
+	mtspr	SPRN_MAS0,r4
+	tlbre
+	mfspr	r4,SPRN_MAS1
+	andis.	r4,r4,MAS1_VALID@h
+	bne	1b
+
+	/* Get the tlb entry used by the current running code */
+	bcl	20,31,$+4
+0:	mflr	r4
+	tlbsx	0,r4
+
+	mfspr	r4,SPRN_MAS1
+	ori	r4,r4,MAS1_TS		/* Set the TS = 1 */
+	mtspr	SPRN_MAS1,r4
+
+	mfspr	r4,SPRN_MAS0
+	rlwinm	r4,r4,0,~MAS0_ESEL_MASK
+	rlwimi	r4,r3,16,4,15		/* Setup MAS0 = TLBSEL | ESEL(r3) */
+	mtspr	SPRN_MAS0,r4
+	tlbwe
+	isync
+	sync
+
+	mfmsr	r4
+	ori	r4,r4,MSR_IS | MSR_DS
+	mtspr	SPRN_SRR0,r5
+	mtspr	SPRN_SRR1,r4
+	sync
+	rfi
+
+/*
+ * Restore to the address space 0 and also invalidate the tlb entry created
+ * by switch_to_as1.
+ * r3 - the tlb entry which should be invalidated
+ * r4 - __pa(PAGE_OFFSET in AS1) - __pa(PAGE_OFFSET in AS0)
+ * r5 - device tree virtual address. If r4 is 0, r5 is ignored.
+ * r6 - boot cpu
+*/
+_GLOBAL(restore_to_as0)
+	mflr	r0
+
+	bcl	20,31,$+4
+0:	mflr	r9
+	addi	r9,r9,1f - 0b
+
+	/*
+	 * We may map the PAGE_OFFSET in AS0 to a different physical address,
+	 * so we need calculate the right jump and device tree address based
+	 * on the offset passed by r4.
+	 */
+	add	r9,r9,r4
+	add	r5,r5,r4
+	add	r0,r0,r4
+
+2:	mfmsr	r7
+	li	r8,(MSR_IS | MSR_DS)
+	andc	r7,r7,r8
+
+	mtspr	SPRN_SRR0,r9
+	mtspr	SPRN_SRR1,r7
+	sync
+	rfi
+
+	/* Invalidate the temporary tlb entry for AS1 */
+1:	lis	r9,0x1000		/* Set MAS0(TLBSEL) = 1 */
+	rlwimi	r9,r3,16,4,15		/* Setup MAS0 = TLBSEL | ESEL(r3) */
+	mtspr	SPRN_MAS0,r9
+	tlbre
+	mfspr	r9,SPRN_MAS1
+	rlwinm	r9,r9,0,2,31		/* Clear MAS1 Valid and IPPROT */
+	mtspr	SPRN_MAS1,r9
+	tlbwe
+	isync
+
+	cmpwi	r4,0
+	cmpwi	cr1,r6,0
+	cror	eq,4*cr1+eq,eq
+	bne	3f			/* offset != 0 && is_boot_cpu */
+	mtlr	r0
+	blr
+
+	/*
+	 * The PAGE_OFFSET will map to a different physical address,
+	 * jump to _start to do another relocation again.
+	*/
+3:	mr	r3,r5
+	bl	_start
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
new file mode 100644
index 0000000000..647b0b445e
--- /dev/null
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -0,0 +1,791 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *  Low-level exception handlers and MMU support
+ *  rewritten by Paul Mackerras.
+ *    Copyright (C) 1996 Paul Mackerras.
+ *  MPC8xx modifications by Dan Malek
+ *    Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
+ *
+ *  This file contains low-level support and setup for PowerPC 8xx
+ *  embedded processors, including trap and interrupt dispatch.
+ */
+
+#include <linux/init.h>
+#include <linux/magic.h>
+#include <linux/pgtable.h>
+#include <linux/sizes.h>
+#include <linux/linkage.h>
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/cache.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+#include <asm/code-patching-asm.h>
+#include <asm/interrupt.h>
+
+/*
+ * Value for the bits that have fixed value in RPN entries.
+ * Also used for tagging DAR for DTLBerror.
+ */
+#define RPN_PATTERN	0x00f0
+
+#include "head_32.h"
+
+.macro compare_to_kernel_boundary scratch, addr
+#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
+/* By simply checking Address >= 0x80000000, we know if its a kernel address */
+	not.	\scratch, \addr
+#else
+	rlwinm	\scratch, \addr, 16, 0xfff8
+	cmpli	cr0, \scratch, PAGE_OFFSET@h
+#endif
+.endm
+
+#define PAGE_SHIFT_512K		19
+#define PAGE_SHIFT_8M		23
+
+	__HEAD
+_GLOBAL(_stext);
+_GLOBAL(_start);
+
+/* MPC8xx
+ * This port was done on an MBX board with an 860.  Right now I only
+ * support an ELF compressed (zImage) boot from EPPC-Bug because the
+ * code there loads up some registers before calling us:
+ *   r3: ptr to board info data
+ *   r4: initrd_start or if no initrd then 0
+ *   r5: initrd_end - unused if r4 is 0
+ *   r6: Start of command line string
+ *   r7: End of command line string
+ *
+ * I decided to use conditional compilation instead of checking PVR and
+ * adding more processor specific branches around code I don't need.
+ * Since this is an embedded processor, I also appreciate any memory
+ * savings I can get.
+ *
+ * The MPC8xx does not have any BATs, but it supports large page sizes.
+ * We first initialize the MMU to support 8M byte pages, then load one
+ * entry into each of the instruction and data TLBs to map the first
+ * 8M 1:1.  I also mapped an additional I/O space 1:1 so we can get to
+ * the "internal" processor registers before MMU_init is called.
+ *
+ *	-- Dan
+ */
+	.globl	__start
+__start:
+	mr	r31,r3			/* save device tree ptr */
+
+	/* We have to turn on the MMU right away so we get cache modes
+	 * set correctly.
+	 */
+	bl	initial_mmu
+
+/* We now have the lower 8 Meg mapped into TLB entries, and the caches
+ * ready to work.
+ */
+
+turn_on_mmu:
+	mfmsr	r0
+	ori	r0,r0,MSR_DR|MSR_IR
+	mtspr	SPRN_SRR1,r0
+	lis	r0,start_here@h
+	ori	r0,r0,start_here@l
+	mtspr	SPRN_SRR0,r0
+	rfi				/* enables MMU */
+
+
+#ifdef CONFIG_PERF_EVENTS
+	.align	4
+
+	.globl	itlb_miss_counter
+itlb_miss_counter:
+	.space	4
+
+	.globl	dtlb_miss_counter
+dtlb_miss_counter:
+	.space	4
+
+	.globl	instruction_counter
+instruction_counter:
+	.space	4
+#endif
+
+/* System reset */
+	EXCEPTION(INTERRUPT_SYSTEM_RESET, Reset, system_reset_exception)
+
+/* Machine check */
+	START_EXCEPTION(INTERRUPT_MACHINE_CHECK, MachineCheck)
+	EXCEPTION_PROLOG INTERRUPT_MACHINE_CHECK MachineCheck handle_dar_dsisr=1
+	prepare_transfer_to_handler
+	bl	machine_check_exception
+	b	interrupt_return
+
+/* External interrupt */
+	EXCEPTION(INTERRUPT_EXTERNAL, HardwareInterrupt, do_IRQ)
+
+/* Alignment exception */
+	START_EXCEPTION(INTERRUPT_ALIGNMENT, Alignment)
+	EXCEPTION_PROLOG INTERRUPT_ALIGNMENT Alignment handle_dar_dsisr=1
+	prepare_transfer_to_handler
+	bl	alignment_exception
+	REST_NVGPRS(r1)
+	b	interrupt_return
+
+/* Program check exception */
+	START_EXCEPTION(INTERRUPT_PROGRAM, ProgramCheck)
+	EXCEPTION_PROLOG INTERRUPT_PROGRAM ProgramCheck
+	prepare_transfer_to_handler
+	bl	program_check_exception
+	REST_NVGPRS(r1)
+	b	interrupt_return
+
+/* Decrementer */
+	EXCEPTION(INTERRUPT_DECREMENTER, Decrementer, timer_interrupt)
+
+/* System call */
+	START_EXCEPTION(INTERRUPT_SYSCALL, SystemCall)
+	SYSCALL_ENTRY	INTERRUPT_SYSCALL
+
+/* Single step - not used on 601 */
+	EXCEPTION(INTERRUPT_TRACE, SingleStep, single_step_exception)
+
+/* On the MPC8xx, this is a software emulation interrupt.  It occurs
+ * for all unimplemented and illegal instructions.
+ */
+	START_EXCEPTION(INTERRUPT_SOFT_EMU_8xx, SoftEmu)
+	EXCEPTION_PROLOG INTERRUPT_SOFT_EMU_8xx SoftEmu
+	prepare_transfer_to_handler
+	bl	emulation_assist_interrupt
+	REST_NVGPRS(r1)
+	b	interrupt_return
+
+/*
+ * For the MPC8xx, this is a software tablewalk to load the instruction
+ * TLB.  The task switch loads the M_TWB register with the pointer to the first
+ * level table.
+ * If we discover there is no second level table (value is zero) or if there
+ * is an invalid pte, we load that into the TLB, which causes another fault
+ * into the TLB Error interrupt where we can handle such problems.
+ * We have to use the MD_xxx registers for the tablewalk because the
+ * equivalent MI_xxx registers only perform the attribute functions.
+ */
+
+#ifdef CONFIG_8xx_CPU15
+#define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp)	\
+	addi	tmp, addr, PAGE_SIZE;	\
+	tlbie	tmp;			\
+	addi	tmp, addr, -PAGE_SIZE;	\
+	tlbie	tmp
+#else
+#define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp)
+#endif
+
+	START_EXCEPTION(INTERRUPT_INST_TLB_MISS_8xx, InstructionTLBMiss)
+	mtspr	SPRN_SPRG_SCRATCH2, r10
+	mtspr	SPRN_M_TW, r11
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	mfspr	r10, SPRN_SRR0	/* Get effective address of fault */
+	INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11)
+	mtspr	SPRN_MD_EPN, r10
+#ifdef CONFIG_MODULES
+	mfcr	r11
+	compare_to_kernel_boundary r10, r10
+#endif
+	mfspr	r10, SPRN_M_TWB	/* Get level 1 table */
+#ifdef CONFIG_MODULES
+	blt+	3f
+	rlwinm	r10, r10, 0, 20, 31
+	oris	r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
+3:
+	mtcr	r11
+#endif
+	lwz	r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10)	/* Get level 1 entry */
+	mtspr	SPRN_MD_TWC, r11
+	mfspr	r10, SPRN_MD_TWC
+	lwz	r10, 0(r10)	/* Get the pte */
+	rlwimi	r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED
+	rlwimi	r11, r10, 32 - 9, _PMD_PAGE_512K
+	mtspr	SPRN_MI_TWC, r11
+	/* The Linux PTE won't go exactly into the MMU TLB.
+	 * Software indicator bits 20 and 23 must be clear.
+	 * Software indicator bits 22, 24, 25, 26, and 27 must be
+	 * set.  All other Linux PTE bits control the behavior
+	 * of the MMU.
+	 */
+	rlwinm	r10, r10, 0, ~0x0f00	/* Clear bits 20-23 */
+	rlwimi	r10, r10, 4, 0x0400	/* Copy _PAGE_EXEC into bit 21 */
+	ori	r10, r10, RPN_PATTERN | 0x200 /* Set 22 and 24-27 */
+	mtspr	SPRN_MI_RPN, r10	/* Update TLB entry */
+
+	/* Restore registers */
+0:	mfspr	r10, SPRN_SPRG_SCRATCH2
+	mfspr	r11, SPRN_M_TW
+	rfi
+	patch_site	0b, patch__itlbmiss_exit_1
+
+#ifdef CONFIG_PERF_EVENTS
+	patch_site	0f, patch__itlbmiss_perf
+0:	lwz	r10, (itlb_miss_counter - PAGE_OFFSET)@l(0)
+	addi	r10, r10, 1
+	stw	r10, (itlb_miss_counter - PAGE_OFFSET)@l(0)
+	mfspr	r10, SPRN_SPRG_SCRATCH2
+	mfspr	r11, SPRN_M_TW
+	rfi
+#endif
+
+	START_EXCEPTION(INTERRUPT_DATA_TLB_MISS_8xx, DataStoreTLBMiss)
+	mtspr	SPRN_SPRG_SCRATCH2, r10
+	mtspr	SPRN_M_TW, r11
+	mfcr	r11
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	mfspr	r10, SPRN_MD_EPN
+	compare_to_kernel_boundary r10, r10
+	mfspr	r10, SPRN_M_TWB	/* Get level 1 table */
+	blt+	3f
+	rlwinm	r10, r10, 0, 20, 31
+	oris	r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
+3:
+	mtcr	r11
+	lwz	r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10)	/* Get level 1 entry */
+
+	mtspr	SPRN_MD_TWC, r11
+	mfspr	r10, SPRN_MD_TWC
+	lwz	r10, 0(r10)	/* Get the pte */
+
+	/* Insert Guarded and Accessed flags into the TWC from the Linux PTE.
+	 * It is bit 27 of both the Linux PTE and the TWC (at least
+	 * I got that right :-).  It will be better when we can put
+	 * this into the Linux pgd/pmd and load it in the operation
+	 * above.
+	 */
+	rlwimi	r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED
+	rlwimi	r11, r10, 32 - 9, _PMD_PAGE_512K
+	mtspr	SPRN_MD_TWC, r11
+
+	/* The Linux PTE won't go exactly into the MMU TLB.
+	 * Software indicator bits 24, 25, 26, and 27 must be
+	 * set.  All other Linux PTE bits control the behavior
+	 * of the MMU.
+	 */
+	li	r11, RPN_PATTERN
+	rlwimi	r10, r11, 0, 24, 27	/* Set 24-27 */
+	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
+	mtspr	SPRN_DAR, r11		/* Tag DAR */
+
+	/* Restore registers */
+
+0:	mfspr	r10, SPRN_SPRG_SCRATCH2
+	mfspr	r11, SPRN_M_TW
+	rfi
+	patch_site	0b, patch__dtlbmiss_exit_1
+
+#ifdef CONFIG_PERF_EVENTS
+	patch_site	0f, patch__dtlbmiss_perf
+0:	lwz	r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+	addi	r10, r10, 1
+	stw	r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+	mfspr	r10, SPRN_SPRG_SCRATCH2
+	mfspr	r11, SPRN_M_TW
+	rfi
+#endif
+
+/* This is an instruction TLB error on the MPC8xx.  This could be due
+ * to many reasons, such as executing guarded memory or illegal instruction
+ * addresses.  There is nothing to do but handle a big time error fault.
+ */
+	START_EXCEPTION(INTERRUPT_INST_TLB_ERROR_8xx, InstructionTLBError)
+	/* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
+	EXCEPTION_PROLOG INTERRUPT_INST_STORAGE InstructionTLBError
+	andis.	r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
+	andis.	r10,r9,SRR1_ISI_NOPT@h
+	beq+	.Litlbie
+	tlbie	r12
+.Litlbie:
+	stw	r12, _DAR(r11)
+	stw	r5, _DSISR(r11)
+	prepare_transfer_to_handler
+	bl	do_page_fault
+	b	interrupt_return
+
+/* This is the data TLB error on the MPC8xx.  This could be due to
+ * many reasons, including a dirty update to a pte.  We bail out to
+ * a higher level function that can handle it.
+ */
+	START_EXCEPTION(INTERRUPT_DATA_TLB_ERROR_8xx, DataTLBError)
+	EXCEPTION_PROLOG_0 handle_dar_dsisr=1
+	mfspr	r11, SPRN_DAR
+	cmpwi	cr1, r11, RPN_PATTERN
+	beq-	cr1, FixupDAR	/* must be a buggy dcbX, icbi insn. */
+DARFixed:/* Return from dcbx instruction bug workaround */
+	EXCEPTION_PROLOG_1
+	/* 0x300 is DataAccess exception, needed by bad_page_fault() */
+	EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataTLBError handle_dar_dsisr=1
+	lwz	r4, _DAR(r11)
+	lwz	r5, _DSISR(r11)
+	andis.	r10,r5,DSISR_NOHPTE@h
+	beq+	.Ldtlbie
+	tlbie	r4
+.Ldtlbie:
+	prepare_transfer_to_handler
+	bl	do_page_fault
+	b	interrupt_return
+
+#ifdef CONFIG_VMAP_STACK
+	vmap_stack_overflow_exception
+#endif
+
+/* On the MPC8xx, these next four traps are used for development
+ * support of breakpoints and such.  Someday I will get around to
+ * using them.
+ */
+	START_EXCEPTION(INTERRUPT_DATA_BREAKPOINT_8xx, DataBreakpoint)
+	EXCEPTION_PROLOG_0 handle_dar_dsisr=1
+	mfspr	r11, SPRN_SRR0
+	cmplwi	cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l
+	cmplwi	cr7, r11, (.Litlbie - PAGE_OFFSET)@l
+	cror	4*cr1+eq, 4*cr1+eq, 4*cr7+eq
+	bne	cr1, 1f
+	mtcr	r10
+	mfspr	r10, SPRN_SPRG_SCRATCH0
+	mfspr	r11, SPRN_SPRG_SCRATCH1
+	rfi
+
+1:	EXCEPTION_PROLOG_1
+	EXCEPTION_PROLOG_2 INTERRUPT_DATA_BREAKPOINT_8xx DataBreakpoint handle_dar_dsisr=1
+	mfspr	r4,SPRN_BAR
+	stw	r4,_DAR(r11)
+	prepare_transfer_to_handler
+	bl	do_break
+	REST_NVGPRS(r1)
+	b	interrupt_return
+
+#ifdef CONFIG_PERF_EVENTS
+	START_EXCEPTION(INTERRUPT_INST_BREAKPOINT_8xx, InstructionBreakpoint)
+	mtspr	SPRN_SPRG_SCRATCH0, r10
+	lwz	r10, (instruction_counter - PAGE_OFFSET)@l(0)
+	addi	r10, r10, -1
+	stw	r10, (instruction_counter - PAGE_OFFSET)@l(0)
+	lis	r10, 0xffff
+	ori	r10, r10, 0x01
+	mtspr	SPRN_COUNTA, r10
+	mfspr	r10, SPRN_SPRG_SCRATCH0
+	rfi
+#else
+	EXCEPTION(INTERRUPT_INST_BREAKPOINT_8xx, Trap_1d, unknown_exception)
+#endif
+	EXCEPTION(0x1e00, Trap_1e, unknown_exception)
+	EXCEPTION(0x1f00, Trap_1f, unknown_exception)
+
+	__HEAD
+	. = 0x2000
+
+/* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
+ * by decoding the registers used by the dcbx instruction and adding them.
+ * DAR is set to the calculated address.
+ */
+FixupDAR:/* Entry point for dcbx workaround. */
+	mtspr	SPRN_M_TW, r10
+	/* fetch instruction from memory. */
+	mfspr	r10, SPRN_SRR0
+	mtspr	SPRN_MD_EPN, r10
+	rlwinm	r11, r10, 16, 0xfff8
+	cmpli	cr1, r11, PAGE_OFFSET@h
+	mfspr	r11, SPRN_M_TWB	/* Get level 1 table */
+	blt+	cr1, 3f
+
+	/* create physical page address from effective address */
+	tophys(r11, r10)
+	mfspr	r11, SPRN_M_TWB	/* Get level 1 table */
+	rlwinm	r11, r11, 0, 20, 31
+	oris	r11, r11, (swapper_pg_dir - PAGE_OFFSET)@ha
+3:
+	lwz	r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)	/* Get the level 1 entry */
+	mtspr	SPRN_MD_TWC, r11
+	mtcrf	0x01, r11
+	mfspr	r11, SPRN_MD_TWC
+	lwz	r11, 0(r11)	/* Get the pte */
+	bt	28,200f		/* bit 28 = Large page (8M) */
+	/* concat physical page address(r11) and page offset(r10) */
+	rlwimi	r11, r10, 0, 32 - PAGE_SHIFT, 31
+201:	lwz	r11,0(r11)
+/* Check if it really is a dcbx instruction. */
+/* dcbt and dcbtst does not generate DTLB Misses/Errors,
+ * no need to include them here */
+	xoris	r10, r11, 0x7c00	/* check if major OP code is 31 */
+	rlwinm	r10, r10, 0, 21, 5
+	cmpwi	cr1, r10, 2028	/* Is dcbz? */
+	beq+	cr1, 142f
+	cmpwi	cr1, r10, 940	/* Is dcbi? */
+	beq+	cr1, 142f
+	cmpwi	cr1, r10, 108	/* Is dcbst? */
+	beq+	cr1, 144f		/* Fix up store bit! */
+	cmpwi	cr1, r10, 172	/* Is dcbf? */
+	beq+	cr1, 142f
+	cmpwi	cr1, r10, 1964	/* Is icbi? */
+	beq+	cr1, 142f
+141:	mfspr	r10,SPRN_M_TW
+	b	DARFixed	/* Nope, go back to normal TLB processing */
+
+200:
+	/* concat physical page address(r11) and page offset(r10) */
+	rlwimi	r11, r10, 0, 32 - PAGE_SHIFT_8M, 31
+	b	201b
+
+144:	mfspr	r10, SPRN_DSISR
+	rlwinm	r10, r10,0,7,5	/* Clear store bit for buggy dcbst insn */
+	mtspr	SPRN_DSISR, r10
+142:	/* continue, it was a dcbx, dcbi instruction. */
+	mfctr	r10
+	mtdar	r10			/* save ctr reg in DAR */
+	rlwinm	r10, r11, 24, 24, 28	/* offset into jump table for reg RB */
+	addi	r10, r10, 150f@l	/* add start of table */
+	mtctr	r10			/* load ctr with jump address */
+	xor	r10, r10, r10		/* sum starts at zero */
+	bctr				/* jump into table */
+150:
+	add	r10, r10, r0	;b	151f
+	add	r10, r10, r1	;b	151f
+	add	r10, r10, r2	;b	151f
+	add	r10, r10, r3	;b	151f
+	add	r10, r10, r4	;b	151f
+	add	r10, r10, r5	;b	151f
+	add	r10, r10, r6	;b	151f
+	add	r10, r10, r7	;b	151f
+	add	r10, r10, r8	;b	151f
+	add	r10, r10, r9	;b	151f
+	mtctr	r11	;b	154f	/* r10 needs special handling */
+	mtctr	r11	;b	153f	/* r11 needs special handling */
+	add	r10, r10, r12	;b	151f
+	add	r10, r10, r13	;b	151f
+	add	r10, r10, r14	;b	151f
+	add	r10, r10, r15	;b	151f
+	add	r10, r10, r16	;b	151f
+	add	r10, r10, r17	;b	151f
+	add	r10, r10, r18	;b	151f
+	add	r10, r10, r19	;b	151f
+	add	r10, r10, r20	;b	151f
+	add	r10, r10, r21	;b	151f
+	add	r10, r10, r22	;b	151f
+	add	r10, r10, r23	;b	151f
+	add	r10, r10, r24	;b	151f
+	add	r10, r10, r25	;b	151f
+	add	r10, r10, r26	;b	151f
+	add	r10, r10, r27	;b	151f
+	add	r10, r10, r28	;b	151f
+	add	r10, r10, r29	;b	151f
+	add	r10, r10, r30	;b	151f
+	add	r10, r10, r31
+151:
+	rlwinm	r11,r11,19,24,28	/* offset into jump table for reg RA */
+	cmpwi	cr1, r11, 0
+	beq	cr1, 152f		/* if reg RA is zero, don't add it */
+	addi	r11, r11, 150b@l	/* add start of table */
+	mtctr	r11			/* load ctr with jump address */
+	rlwinm	r11,r11,0,16,10		/* make sure we don't execute this more than once */
+	bctr				/* jump into table */
+152:
+	mfdar	r11
+	mtctr	r11			/* restore ctr reg from DAR */
+	mfspr	r11, SPRN_SPRG_THREAD
+	stw	r10, DAR(r11)
+	mfspr	r10, SPRN_DSISR
+	stw	r10, DSISR(r11)
+	mfspr	r10,SPRN_M_TW
+	b	DARFixed		/* Go back to normal TLB handling */
+
+	/* special handling for r10,r11 since these are modified already */
+153:	mfspr	r11, SPRN_SPRG_SCRATCH1	/* load r11 from SPRN_SPRG_SCRATCH1 */
+	add	r10, r10, r11	/* add it */
+	mfctr	r11		/* restore r11 */
+	b	151b
+154:	mfspr	r11, SPRN_SPRG_SCRATCH0	/* load r10 from SPRN_SPRG_SCRATCH0 */
+	add	r10, r10, r11	/* add it */
+	mfctr	r11		/* restore r11 */
+	b	151b
+
+/*
+ * This is where the main kernel code starts.
+ */
+start_here:
+	/* ptr to current */
+	lis	r2,init_task@h
+	ori	r2,r2,init_task@l
+
+	/* ptr to phys current thread */
+	tophys(r4,r2)
+	addi	r4,r4,THREAD	/* init task's THREAD */
+	mtspr	SPRN_SPRG_THREAD,r4
+
+	/* stack */
+	lis	r1,init_thread_union@ha
+	addi	r1,r1,init_thread_union@l
+	lis	r0, STACK_END_MAGIC@h
+	ori	r0, r0, STACK_END_MAGIC@l
+	stw	r0, 0(r1)
+	li	r0,0
+	stwu	r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
+
+	lis	r6, swapper_pg_dir@ha
+	tophys(r6,r6)
+	mtspr	SPRN_M_TWB, r6
+
+	bl	early_init	/* We have to do this with MMU on */
+
+/*
+ * Decide what sort of machine this is and initialize the MMU.
+ */
+#ifdef CONFIG_KASAN
+	bl	kasan_early_init
+#endif
+	li	r3,0
+	mr	r4,r31
+	bl	machine_init
+	bl	MMU_init
+
+/*
+ * Go back to running unmapped so we can load up new values
+ * and change to using our exception vectors.
+ * On the 8xx, all we have to do is invalidate the TLB to clear
+ * the old 8M byte TLB mappings and load the page table base register.
+ */
+	/* The right way to do this would be to track it down through
+	 * init's THREAD like the context switch code does, but this is
+	 * easier......until someone changes init's static structures.
+	 */
+	lis	r4,2f@h
+	ori	r4,r4,2f@l
+	tophys(r4,r4)
+	li	r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	rfi
+/* Load up the kernel context */
+2:
+#ifdef CONFIG_PIN_TLB_IMMR
+	lis	r0, MD_TWAM@h
+	oris	r0, r0, 0x1f00
+	mtspr	SPRN_MD_CTR, r0
+	LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID)
+	tlbie	r0
+	mtspr	SPRN_MD_EPN, r0
+	LOAD_REG_IMMEDIATE(r0, MD_SVALID | MD_PS512K | MD_GUARDED)
+	mtspr	SPRN_MD_TWC, r0
+	mfspr   r0, SPRN_IMMR
+	rlwinm	r0, r0, 0, 0xfff80000
+	ori	r0, r0, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \
+			_PAGE_NO_CACHE | _PAGE_PRESENT
+	mtspr	SPRN_MD_RPN, r0
+	lis	r0, (MD_TWAM | MD_RSV4I)@h
+	mtspr	SPRN_MD_CTR, r0
+#endif
+#if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR)
+	lis	r0, MD_TWAM@h
+	mtspr	SPRN_MD_CTR, r0
+#endif
+	tlbia			/* Clear all TLB entries */
+	sync			/* wait for tlbia/tlbie to finish */
+
+	/* set up the PTE pointers for the Abatron bdiGDB.
+	*/
+	lis	r5, abatron_pteptrs@h
+	ori	r5, r5, abatron_pteptrs@l
+	stw	r5, 0xf0(0)	/* Must match your Abatron config file */
+	tophys(r5,r5)
+	lis	r6, swapper_pg_dir@h
+	ori	r6, r6, swapper_pg_dir@l
+	stw	r6, 0(r5)
+
+/* Now turn on the MMU for real! */
+	li	r4,MSR_KERNEL
+	lis	r3,start_kernel@h
+	ori	r3,r3,start_kernel@l
+	mtspr	SPRN_SRR0,r3
+	mtspr	SPRN_SRR1,r4
+	rfi			/* enable MMU and jump to start_kernel */
+
+/* Set up the initial MMU state so we can do the first level of
+ * kernel initialization.  This maps the first 8 MBytes of memory 1:1
+ * virtual to physical.  Also, set the cache mode since that is defined
+ * by TLB entries and perform any additional mapping (like of the IMMR).
+ * If configured to pin some TLBs, we pin the first 8 Mbytes of kernel,
+ * 24 Mbytes of data, and the 512k IMMR space.  Anything not covered by
+ * these mappings is mapped by page tables.
+ */
+SYM_FUNC_START_LOCAL(initial_mmu)
+	li	r8, 0
+	mtspr	SPRN_MI_CTR, r8		/* remove PINNED ITLB entries */
+	lis	r10, MD_TWAM@h
+	mtspr	SPRN_MD_CTR, r10	/* remove PINNED DTLB entries */
+
+	tlbia			/* Invalidate all TLB entries */
+
+	lis	r8, MI_APG_INIT@h	/* Set protection modes */
+	ori	r8, r8, MI_APG_INIT@l
+	mtspr	SPRN_MI_AP, r8
+	lis	r8, MD_APG_INIT@h
+	ori	r8, r8, MD_APG_INIT@l
+	mtspr	SPRN_MD_AP, r8
+
+	/* Map the lower RAM (up to 32 Mbytes) into the ITLB and DTLB */
+	lis	r8, MI_RSV4I@h
+	ori	r8, r8, 0x1c00
+	oris	r12, r10, MD_RSV4I@h
+	ori	r12, r12, 0x1c00
+	li	r9, 4				/* up to 4 pages of 8M */
+	mtctr	r9
+	lis	r9, KERNELBASE@h		/* Create vaddr for TLB */
+	li	r10, MI_PS8MEG | _PMD_ACCESSED | MI_SVALID
+	li	r11, MI_BOOTINIT		/* Create RPN for address 0 */
+1:
+	mtspr	SPRN_MI_CTR, r8	/* Set instruction MMU control */
+	addi	r8, r8, 0x100
+	ori	r0, r9, MI_EVALID		/* Mark it valid */
+	mtspr	SPRN_MI_EPN, r0
+	mtspr	SPRN_MI_TWC, r10
+	mtspr	SPRN_MI_RPN, r11		/* Store TLB entry */
+	mtspr	SPRN_MD_CTR, r12
+	addi	r12, r12, 0x100
+	mtspr	SPRN_MD_EPN, r0
+	mtspr	SPRN_MD_TWC, r10
+	mtspr	SPRN_MD_RPN, r11
+	addis	r9, r9, 0x80
+	addis	r11, r11, 0x80
+
+	bdnz	1b
+
+	/* Since the cache is enabled according to the information we
+	 * just loaded into the TLB, invalidate and enable the caches here.
+	 * We should probably check/set other modes....later.
+	 */
+	lis	r8, IDC_INVALL@h
+	mtspr	SPRN_IC_CST, r8
+	mtspr	SPRN_DC_CST, r8
+	lis	r8, IDC_ENABLE@h
+	mtspr	SPRN_IC_CST, r8
+	mtspr	SPRN_DC_CST, r8
+	/* Disable debug mode entry on breakpoints */
+	mfspr	r8, SPRN_DER
+#ifdef CONFIG_PERF_EVENTS
+	rlwinm	r8, r8, 0, ~0xc
+#else
+	rlwinm	r8, r8, 0, ~0x8
+#endif
+	mtspr	SPRN_DER, r8
+	blr
+SYM_FUNC_END(initial_mmu)
+
+_GLOBAL(mmu_pin_tlb)
+	lis	r9, (1f - PAGE_OFFSET)@h
+	ori	r9, r9, (1f - PAGE_OFFSET)@l
+	mfmsr	r10
+	mflr	r11
+	li	r12, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI)
+	rlwinm	r0, r10, 0, ~MSR_RI
+	rlwinm	r0, r0, 0, ~MSR_EE
+	mtmsr	r0
+	isync
+	.align	4
+	mtspr	SPRN_SRR0, r9
+	mtspr	SPRN_SRR1, r12
+	rfi
+1:
+	li	r5, 0
+	lis	r6, MD_TWAM@h
+	mtspr	SPRN_MI_CTR, r5
+	mtspr	SPRN_MD_CTR, r6
+	tlbia
+
+	LOAD_REG_IMMEDIATE(r5, 28 << 8)
+	LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
+	LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)
+	LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
+	LOAD_REG_ADDR(r9, _sinittext)
+	li	r0, 4
+	mtctr	r0
+
+2:	ori	r0, r6, MI_EVALID
+	mtspr	SPRN_MI_CTR, r5
+	mtspr	SPRN_MI_EPN, r0
+	mtspr	SPRN_MI_TWC, r7
+	mtspr	SPRN_MI_RPN, r8
+	addi	r5, r5, 0x100
+	addis	r6, r6, SZ_8M@h
+	addis	r8, r8, SZ_8M@h
+	cmplw	r6, r9
+	bdnzt	lt, 2b
+	lis	r0, MI_RSV4I@h
+	mtspr	SPRN_MI_CTR, r0
+
+	LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM)
+#ifdef CONFIG_PIN_TLB_DATA
+	LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
+	LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)
+	li	r8, 0
+#ifdef CONFIG_PIN_TLB_IMMR
+	li	r0, 3
+#else
+	li	r0, 4
+#endif
+	mtctr	r0
+	cmpwi	r4, 0
+	beq	4f
+	LOAD_REG_ADDR(r9, _sinittext)
+
+2:	ori	r0, r6, MD_EVALID
+	ori	r12, r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT
+	mtspr	SPRN_MD_CTR, r5
+	mtspr	SPRN_MD_EPN, r0
+	mtspr	SPRN_MD_TWC, r7
+	mtspr	SPRN_MD_RPN, r12
+	addi	r5, r5, 0x100
+	addis	r6, r6, SZ_8M@h
+	addis	r8, r8, SZ_8M@h
+	cmplw	r6, r9
+	bdnzt	lt, 2b
+4:
+2:	ori	r0, r6, MD_EVALID
+	ori	r12, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT
+	mtspr	SPRN_MD_CTR, r5
+	mtspr	SPRN_MD_EPN, r0
+	mtspr	SPRN_MD_TWC, r7
+	mtspr	SPRN_MD_RPN, r12
+	addi	r5, r5, 0x100
+	addis	r6, r6, SZ_8M@h
+	addis	r8, r8, SZ_8M@h
+	cmplw	r6, r3
+	bdnzt	lt, 2b
+#endif
+#ifdef CONFIG_PIN_TLB_IMMR
+	LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID)
+	LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED | _PMD_ACCESSED)
+	mfspr   r8, SPRN_IMMR
+	rlwinm	r8, r8, 0, 0xfff80000
+	ori	r8, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \
+			_PAGE_NO_CACHE | _PAGE_PRESENT
+	mtspr	SPRN_MD_CTR, r5
+	mtspr	SPRN_MD_EPN, r0
+	mtspr	SPRN_MD_TWC, r7
+	mtspr	SPRN_MD_RPN, r8
+#endif
+#if defined(CONFIG_PIN_TLB_IMMR) || defined(CONFIG_PIN_TLB_DATA)
+	lis	r0, (MD_RSV4I | MD_TWAM)@h
+	mtspr	SPRN_MD_CTR, r0
+#endif
+	mtspr	SPRN_SRR1, r10
+	mtspr	SPRN_SRR0, r11
+	rfi
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
new file mode 100644
index 0000000000..6764b98ca3
--- /dev/null
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -0,0 +1,1214 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *  Adapted for Power Macintosh by Paul Mackerras.
+ *  Low-level exception handlers and MMU support
+ *  rewritten by Paul Mackerras.
+ *    Copyright (C) 1996 Paul Mackerras.
+ *  MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
+ *
+ *  This file contains the low-level support and setup for the
+ *  PowerPC platform, including trap and interrupt dispatch.
+ *  (The PPC 8xx embedded CPUs use head_8xx.S instead.)
+ */
+
+#include <linux/init.h>
+#include <linux/pgtable.h>
+#include <linux/linkage.h>
+
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/cputable.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+#include <asm/bug.h>
+#include <asm/kvm_book3s_asm.h>
+#include <asm/feature-fixups.h>
+#include <asm/interrupt.h>
+
+#include "head_32.h"
+
+#define LOAD_BAT(n, reg, RA, RB)	\
+	/* see the comment for clear_bats() -- Cort */ \
+	li	RA,0;			\
+	mtspr	SPRN_IBAT##n##U,RA;	\
+	mtspr	SPRN_DBAT##n##U,RA;	\
+	lwz	RA,(n*16)+0(reg);	\
+	lwz	RB,(n*16)+4(reg);	\
+	mtspr	SPRN_IBAT##n##U,RA;	\
+	mtspr	SPRN_IBAT##n##L,RB;	\
+	lwz	RA,(n*16)+8(reg);	\
+	lwz	RB,(n*16)+12(reg);	\
+	mtspr	SPRN_DBAT##n##U,RA;	\
+	mtspr	SPRN_DBAT##n##L,RB
+
+	__HEAD
+_GLOBAL(_stext);
+
+/*
+ * _start is defined this way because the XCOFF loader in the OpenFirmware
+ * on the powermac expects the entry point to be a procedure descriptor.
+ */
+_GLOBAL(_start);
+	/*
+	 * These are here for legacy reasons, the kernel used to
+	 * need to look like a coff function entry for the pmac
+	 * but we're always started by some kind of bootloader now.
+	 *  -- Cort
+	 */
+	nop	/* used by __secondary_hold on prep (mtx) and chrp smp */
+	nop	/* used by __secondary_hold on prep (mtx) and chrp smp */
+	nop
+
+/* PMAC
+ * Enter here with the kernel text, data and bss loaded starting at
+ * 0, running with virtual == physical mapping.
+ * r5 points to the prom entry point (the client interface handler
+ * address).  Address translation is turned on, with the prom
+ * managing the hash table.  Interrupts are disabled.  The stack
+ * pointer (r1) points to just below the end of the half-meg region
+ * from 0x380000 - 0x400000, which is mapped in already.
+ *
+ * If we are booted from MacOS via BootX, we enter with the kernel
+ * image loaded somewhere, and the following values in registers:
+ *  r3: 'BooX' (0x426f6f58)
+ *  r4: virtual address of boot_infos_t
+ *  r5: 0
+ *
+ * PREP
+ * This is jumped to on prep systems right after the kernel is relocated
+ * to its proper place in memory by the boot loader.  The expected layout
+ * of the regs is:
+ *   r3: ptr to residual data
+ *   r4: initrd_start or if no initrd then 0
+ *   r5: initrd_end - unused if r4 is 0
+ *   r6: Start of command line string
+ *   r7: End of command line string
+ *
+ * This just gets a minimal mmu environment setup so we can call
+ * start_here() to do the real work.
+ * -- Cort
+ */
+
+	.globl	__start
+__start:
+/*
+ * We have to do any OF calls before we map ourselves to KERNELBASE,
+ * because OF may have I/O devices mapped into that area
+ * (particularly on CHRP).
+ */
+	cmpwi	0,r5,0
+	beq	1f
+
+#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
+	/* find out where we are now */
+	bcl	20,31,$+4
+0:	mflr	r8			/* r8 = runtime addr here */
+	addis	r8,r8,(_stext - 0b)@ha
+	addi	r8,r8,(_stext - 0b)@l	/* current runtime base addr */
+	bl	prom_init
+#endif /* CONFIG_PPC_OF_BOOT_TRAMPOLINE */
+
+	/* We never return. We also hit that trap if trying to boot
+	 * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */
+	trap
+
+/*
+ * Check for BootX signature when supporting PowerMac and branch to
+ * appropriate trampoline if it's present
+ */
+#ifdef CONFIG_PPC_PMAC
+1:	lis	r31,0x426f
+	ori	r31,r31,0x6f58
+	cmpw	0,r3,r31
+	bne	1f
+	bl	bootx_init
+	trap
+#endif /* CONFIG_PPC_PMAC */
+
+1:	mr	r31,r3			/* save device tree ptr */
+	li	r24,0			/* cpu # */
+
+/*
+ * early_init() does the early machine identification and does
+ * the necessary low-level setup and clears the BSS
+ *  -- Cort <cort@fsmlabs.com>
+ */
+	bl	early_init
+
+/* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains
+ * the physical address we are running at, returned by early_init()
+ */
+ 	bl	mmu_off
+__after_mmu_off:
+	bl	clear_bats
+	bl	flush_tlbs
+
+	bl	initial_bats
+	bl	load_segment_registers
+	bl	reloc_offset
+	bl	early_hash_table
+#if defined(CONFIG_BOOTX_TEXT)
+	bl	setup_disp_bat
+#endif
+#ifdef CONFIG_PPC_EARLY_DEBUG_CPM
+	bl	setup_cpm_bat
+#endif
+#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO
+	bl	setup_usbgecko_bat
+#endif
+
+/*
+ * Call setup_cpu for CPU 0 and initialize 6xx Idle
+ */
+	bl	reloc_offset
+	li	r24,0			/* cpu# */
+	bl	call_setup_cpu		/* Call setup_cpu for this CPU */
+	bl	reloc_offset
+	bl	init_idle_6xx
+
+
+/*
+ * We need to run with _start at physical address 0.
+ * On CHRP, we are loaded at 0x10000 since OF on CHRP uses
+ * the exception vectors at 0 (and therefore this copy
+ * overwrites OF's exception vectors with our own).
+ * The MMU is off at this point.
+ */
+	bl	reloc_offset
+	mr	r26,r3
+	addis	r4,r3,KERNELBASE@h	/* current address of _start */
+	lis	r5,PHYSICAL_START@h
+	cmplw	0,r4,r5			/* already running at PHYSICAL_START? */
+	bne	relocate_kernel
+/*
+ * we now have the 1st 16M of ram mapped with the bats.
+ * prep needs the mmu to be turned on here, but pmac already has it on.
+ * this shouldn't bother the pmac since it just gets turned on again
+ * as we jump to our code at KERNELBASE. -- Cort
+ * Actually no, pmac doesn't have it on any more. BootX enters with MMU
+ * off, and in other cases, we now turn it off before changing BATs above.
+ */
+turn_on_mmu:
+	mfmsr	r0
+	ori	r0,r0,MSR_DR|MSR_IR|MSR_RI
+	mtspr	SPRN_SRR1,r0
+	lis	r0,start_here@h
+	ori	r0,r0,start_here@l
+	mtspr	SPRN_SRR0,r0
+	rfi				/* enables MMU */
+
+/*
+ * We need __secondary_hold as a place to hold the other cpus on
+ * an SMP machine, even when we are running a UP kernel.
+ */
+	. = 0xc0			/* for prep bootloader */
+	li	r3,1			/* MTX only has 1 cpu */
+	.globl	__secondary_hold
+__secondary_hold:
+	/* tell the master we're here */
+	stw	r3,__secondary_hold_acknowledge@l(0)
+#ifdef CONFIG_SMP
+100:	lwz	r4,0(0)
+	/* wait until we're told to start */
+	cmpw	0,r4,r3
+	bne	100b
+	/* our cpu # was at addr 0 - go */
+	mr	r24,r3			/* cpu # */
+	b	__secondary_start
+#else
+	b	.
+#endif /* CONFIG_SMP */
+
+	.globl	__secondary_hold_spinloop
+__secondary_hold_spinloop:
+	.long	0
+	.globl	__secondary_hold_acknowledge
+__secondary_hold_acknowledge:
+	.long	-1
+
+/* System reset */
+/* core99 pmac starts the seconary here by changing the vector, and
+   putting it back to what it was (unknown_async_exception) when done.  */
+	EXCEPTION(INTERRUPT_SYSTEM_RESET, Reset, unknown_async_exception)
+
+/* Machine check */
+/*
+ * On CHRP, this is complicated by the fact that we could get a
+ * machine check inside RTAS, and we have no guarantee that certain
+ * critical registers will have the values we expect.  The set of
+ * registers that might have bad values includes all the GPRs
+ * and all the BATs.  We indicate that we are in RTAS by putting
+ * a non-zero value, the address of the exception frame to use,
+ * in thread.rtas_sp.  The machine check handler checks thread.rtas_sp
+ * and uses its value if it is non-zero.
+ * (Other exception handlers assume that r1 is a valid kernel stack
+ * pointer when we take an exception from supervisor mode.)
+ *	-- paulus.
+ */
+	START_EXCEPTION(INTERRUPT_MACHINE_CHECK, MachineCheck)
+	EXCEPTION_PROLOG_0
+#ifdef CONFIG_PPC_CHRP
+	mtspr	SPRN_SPRG_SCRATCH2,r1
+	mfspr	r1, SPRN_SPRG_THREAD
+	lwz	r1, RTAS_SP(r1)
+	cmpwi	cr1, r1, 0
+	bne	cr1, 7f
+	mfspr	r1, SPRN_SPRG_SCRATCH2
+#endif /* CONFIG_PPC_CHRP */
+	EXCEPTION_PROLOG_1
+7:	EXCEPTION_PROLOG_2 0x200 MachineCheck
+#ifdef CONFIG_PPC_CHRP
+	beq	cr1, 1f
+	twi	31, 0, 0
+#endif
+1:	prepare_transfer_to_handler
+	bl	machine_check_exception
+	b	interrupt_return
+
+/* Data access exception. */
+	START_EXCEPTION(INTERRUPT_DATA_STORAGE, DataAccess)
+#ifdef CONFIG_PPC_BOOK3S_604
+BEGIN_MMU_FTR_SECTION
+	mtspr	SPRN_SPRG_SCRATCH2,r10
+	mfspr	r10, SPRN_SPRG_THREAD
+	stw	r11, THR11(r10)
+	mfspr	r10, SPRN_DSISR
+	mfcr	r11
+	andis.	r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
+	mfspr	r10, SPRN_SPRG_THREAD
+	beq	hash_page_dsi
+.Lhash_page_dsi_cont:
+	mtcr	r11
+	lwz	r11, THR11(r10)
+	mfspr	r10, SPRN_SPRG_SCRATCH2
+MMU_FTR_SECTION_ELSE
+	b	1f
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
+1:	EXCEPTION_PROLOG_0 handle_dar_dsisr=1
+	EXCEPTION_PROLOG_1
+	EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataAccess handle_dar_dsisr=1
+	prepare_transfer_to_handler
+	lwz	r5, _DSISR(r1)
+	andis.	r0, r5, DSISR_DABRMATCH@h
+	bne-	1f
+	bl	do_page_fault
+	b	interrupt_return
+1:	bl	do_break
+	REST_NVGPRS(r1)
+	b	interrupt_return
+
+
+/* Instruction access exception. */
+	START_EXCEPTION(INTERRUPT_INST_STORAGE, InstructionAccess)
+	mtspr	SPRN_SPRG_SCRATCH0,r10
+	mtspr	SPRN_SPRG_SCRATCH1,r11
+	mfspr	r10, SPRN_SPRG_THREAD
+	mfspr	r11, SPRN_SRR0
+	stw	r11, SRR0(r10)
+	mfspr	r11, SPRN_SRR1		/* check whether user or kernel */
+	stw	r11, SRR1(r10)
+	mfcr	r10
+#ifdef CONFIG_PPC_BOOK3S_604
+BEGIN_MMU_FTR_SECTION
+	andis.	r11, r11, SRR1_ISI_NOPT@h	/* no pte found? */
+	bne	hash_page_isi
+.Lhash_page_isi_cont:
+	mfspr	r11, SPRN_SRR1		/* check whether user or kernel */
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
+	andi.	r11, r11, MSR_PR
+
+	EXCEPTION_PROLOG_1
+	EXCEPTION_PROLOG_2 INTERRUPT_INST_STORAGE InstructionAccess
+	andis.	r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
+	stw	r5, _DSISR(r11)
+	stw	r12, _DAR(r11)
+	prepare_transfer_to_handler
+	bl	do_page_fault
+	b	interrupt_return
+
+/* External interrupt */
+	EXCEPTION(INTERRUPT_EXTERNAL, HardwareInterrupt, do_IRQ)
+
+/* Alignment exception */
+	START_EXCEPTION(INTERRUPT_ALIGNMENT, Alignment)
+	EXCEPTION_PROLOG INTERRUPT_ALIGNMENT Alignment handle_dar_dsisr=1
+	prepare_transfer_to_handler
+	bl	alignment_exception
+	REST_NVGPRS(r1)
+	b	interrupt_return
+
+/* Program check exception */
+	START_EXCEPTION(INTERRUPT_PROGRAM, ProgramCheck)
+	EXCEPTION_PROLOG INTERRUPT_PROGRAM ProgramCheck
+	prepare_transfer_to_handler
+	bl	program_check_exception
+	REST_NVGPRS(r1)
+	b	interrupt_return
+
+/* Floating-point unavailable */
+	START_EXCEPTION(0x800, FPUnavailable)
+#ifdef CONFIG_PPC_FPU
+BEGIN_FTR_SECTION
+/*
+ * Certain Freescale cores don't have a FPU and treat fp instructions
+ * as a FP Unavailable exception.  Redirect to illegal/emulation handling.
+ */
+	b 	ProgramCheck
+END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
+	EXCEPTION_PROLOG INTERRUPT_FP_UNAVAIL FPUnavailable
+	beq	1f
+	bl	load_up_fpu		/* if from user, just load it up */
+	b	fast_exception_return
+1:	prepare_transfer_to_handler
+	bl	kernel_fp_unavailable_exception
+	b	interrupt_return
+#else
+	b 	ProgramCheck
+#endif
+
+/* Decrementer */
+	EXCEPTION(INTERRUPT_DECREMENTER, Decrementer, timer_interrupt)
+
+	EXCEPTION(0xa00, Trap_0a, unknown_exception)
+	EXCEPTION(0xb00, Trap_0b, unknown_exception)
+
+/* System call */
+	START_EXCEPTION(INTERRUPT_SYSCALL, SystemCall)
+	SYSCALL_ENTRY	INTERRUPT_SYSCALL
+
+	EXCEPTION(INTERRUPT_TRACE, SingleStep, single_step_exception)
+	EXCEPTION(0xe00, Trap_0e, unknown_exception)
+
+/*
+ * The Altivec unavailable trap is at 0x0f20.  Foo.
+ * We effectively remap it to 0x3000.
+ * We include an altivec unavailable exception vector even if
+ * not configured for Altivec, so that you can't panic a
+ * non-altivec kernel running on a machine with altivec just
+ * by executing an altivec instruction.
+ */
+	START_EXCEPTION(INTERRUPT_PERFMON, PerformanceMonitorTrap)
+	b	PerformanceMonitor
+
+	START_EXCEPTION(INTERRUPT_ALTIVEC_UNAVAIL, AltiVecUnavailableTrap)
+	b	AltiVecUnavailable
+
+	__HEAD
+/*
+ * Handle TLB miss for instruction on 603/603e.
+ * Note: we get an alternate set of r0 - r3 to use automatically.
+ */
+	. = INTERRUPT_INST_TLB_MISS_603
+InstructionTLBMiss:
+/*
+ * r0:	scratch
+ * r1:	linux style pte ( later becomes ppc hardware pte )
+ * r2:	ptr to linux-style pte
+ * r3:	scratch
+ */
+	/* Get PTE (linux-style) and check access */
+	mfspr	r3,SPRN_IMISS
+#ifdef CONFIG_MODULES
+	lis	r1, TASK_SIZE@h		/* check if kernel address */
+	cmplw	0,r1,r3
+#endif
+	mfspr	r2, SPRN_SDR1
+	li	r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER
+	rlwinm	r2, r2, 28, 0xfffff000
+#ifdef CONFIG_MODULES
+	bgt-	112f
+	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
+	li	r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
+#endif
+112:	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
+	lwz	r2,0(r2)		/* get pmd entry */
+	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
+	beq-	InstructionAddressInvalid	/* return if no mapping */
+	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
+	lwz	r0,0(r2)		/* get linux-style pte */
+	andc.	r1,r1,r0		/* check access & ~permission */
+	bne-	InstructionAddressInvalid /* return if access not permitted */
+	/* Convert linux-style PTE to low word of PPC-style PTE */
+	rlwimi	r0,r0,32-2,31,31	/* _PAGE_USER -> PP lsb */
+	ori	r1, r1, 0xe06		/* clear out reserved bits */
+	andc	r1, r0, r1		/* PP = user? 1 : 0 */
+BEGIN_FTR_SECTION
+	rlwinm	r1,r1,0,~_PAGE_COHERENT	/* clear M (coherence not required) */
+END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
+	mtspr	SPRN_RPA,r1
+	tlbli	r3
+	mfspr	r3,SPRN_SRR1		/* Need to restore CR0 */
+	mtcrf	0x80,r3
+	rfi
+InstructionAddressInvalid:
+	mfspr	r3,SPRN_SRR1
+	rlwinm	r1,r3,9,6,6	/* Get load/store bit */
+
+	addis	r1,r1,0x2000
+	mtspr	SPRN_DSISR,r1	/* (shouldn't be needed) */
+	andi.	r2,r3,0xFFFF	/* Clear upper bits of SRR1 */
+	or	r2,r2,r1
+	mtspr	SPRN_SRR1,r2
+	mfspr	r1,SPRN_IMISS	/* Get failing address */
+	rlwinm.	r2,r2,0,31,31	/* Check for little endian access */
+	rlwimi	r2,r2,1,30,30	/* change 1 -> 3 */
+	xor	r1,r1,r2
+	mtspr	SPRN_DAR,r1	/* Set fault address */
+	mfmsr	r0		/* Restore "normal" registers */
+	xoris	r0,r0,MSR_TGPR>>16
+	mtcrf	0x80,r3		/* Restore CR0 */
+	mtmsr	r0
+	b	InstructionAccess
+
+/*
+ * Handle TLB miss for DATA Load operation on 603/603e
+ */
+	. = INTERRUPT_DATA_LOAD_TLB_MISS_603
+DataLoadTLBMiss:
+/*
+ * r0:	scratch
+ * r1:	linux style pte ( later becomes ppc hardware pte )
+ * r2:	ptr to linux-style pte
+ * r3:	scratch
+ */
+	/* Get PTE (linux-style) and check access */
+	mfspr	r3,SPRN_DMISS
+	lis	r1, TASK_SIZE@h		/* check if kernel address */
+	cmplw	0,r1,r3
+	mfspr	r2, SPRN_SDR1
+	li	r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER
+	rlwinm	r2, r2, 28, 0xfffff000
+	bgt-	112f
+	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
+	li	r1, _PAGE_PRESENT | _PAGE_ACCESSED
+	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
+112:	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
+	lwz	r2,0(r2)		/* get pmd entry */
+	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
+	beq-	DataAddressInvalid	/* return if no mapping */
+	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
+	lwz	r0,0(r2)		/* get linux-style pte */
+	andc.	r1,r1,r0		/* check access & ~permission */
+	bne-	DataAddressInvalid	/* return if access not permitted */
+	/* Convert linux-style PTE to low word of PPC-style PTE */
+	rlwinm	r1,r0,32-9,30,30	/* _PAGE_RW -> PP msb */
+	rlwimi	r0,r0,32-1,30,30	/* _PAGE_USER -> PP msb */
+	rlwimi	r1,r0,32-3,24,24	/* _PAGE_RW -> _PAGE_DIRTY */
+	rlwimi	r0,r0,32-1,31,31	/* _PAGE_USER -> PP lsb */
+	xori	r1,r1,_PAGE_DIRTY	/* clear dirty when not rw */
+	ori	r1,r1,0xe04		/* clear out reserved bits */
+	andc	r1,r0,r1		/* PP = user? rw? 1: 3: 0 */
+BEGIN_FTR_SECTION
+	rlwinm	r1,r1,0,~_PAGE_COHERENT	/* clear M (coherence not required) */
+END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
+	mtspr	SPRN_RPA,r1
+BEGIN_MMU_FTR_SECTION
+	li	r0,1
+	mfspr	r1,SPRN_SPRG_603_LRU
+	rlwinm	r2,r3,20,27,31		/* Get Address bits 15:19 */
+	slw	r0,r0,r2
+	xor	r1,r0,r1
+	srw	r0,r1,r2
+	mtspr   SPRN_SPRG_603_LRU,r1
+	mfspr	r2,SPRN_SRR1
+	rlwimi	r2,r0,31-14,14,14
+	mtspr   SPRN_SRR1,r2
+	mtcrf	0x80,r2
+	tlbld	r3
+	rfi
+MMU_FTR_SECTION_ELSE
+	mfspr	r2,SPRN_SRR1		/* Need to restore CR0 */
+	mtcrf	0x80,r2
+	tlbld	r3
+	rfi
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+DataAddressInvalid:
+	mfspr	r3,SPRN_SRR1
+	rlwinm	r1,r3,9,6,6	/* Get load/store bit */
+	addis	r1,r1,0x2000
+	mtspr	SPRN_DSISR,r1
+	andi.	r2,r3,0xFFFF	/* Clear upper bits of SRR1 */
+	mtspr	SPRN_SRR1,r2
+	mfspr	r1,SPRN_DMISS	/* Get failing address */
+	rlwinm.	r2,r2,0,31,31	/* Check for little endian access */
+	beq	20f		/* Jump if big endian */
+	xori	r1,r1,3
+20:	mtspr	SPRN_DAR,r1	/* Set fault address */
+	mfmsr	r0		/* Restore "normal" registers */
+	xoris	r0,r0,MSR_TGPR>>16
+	mtcrf	0x80,r3		/* Restore CR0 */
+	mtmsr	r0
+	b	DataAccess
+
+/*
+ * Handle TLB miss for DATA Store on 603/603e
+ */
+	. = INTERRUPT_DATA_STORE_TLB_MISS_603
+DataStoreTLBMiss:
+/*
+ * r0:	scratch
+ * r1:	linux style pte ( later becomes ppc hardware pte )
+ * r2:	ptr to linux-style pte
+ * r3:	scratch
+ */
+	/* Get PTE (linux-style) and check access */
+	mfspr	r3,SPRN_DMISS
+	lis	r1, TASK_SIZE@h		/* check if kernel address */
+	cmplw	0,r1,r3
+	mfspr	r2, SPRN_SDR1
+	li	r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER
+	rlwinm	r2, r2, 28, 0xfffff000
+	bgt-	112f
+	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
+	li	r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
+	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
+112:	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
+	lwz	r2,0(r2)		/* get pmd entry */
+	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
+	beq-	DataAddressInvalid	/* return if no mapping */
+	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
+	lwz	r0,0(r2)		/* get linux-style pte */
+	andc.	r1,r1,r0		/* check access & ~permission */
+	bne-	DataAddressInvalid	/* return if access not permitted */
+	/* Convert linux-style PTE to low word of PPC-style PTE */
+	rlwimi	r0,r0,32-2,31,31	/* _PAGE_USER -> PP lsb */
+	li	r1,0xe06		/* clear out reserved bits & PP msb */
+	andc	r1,r0,r1		/* PP = user? 1: 0 */
+BEGIN_FTR_SECTION
+	rlwinm	r1,r1,0,~_PAGE_COHERENT	/* clear M (coherence not required) */
+END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
+	mtspr	SPRN_RPA,r1
+	mfspr	r2,SPRN_SRR1		/* Need to restore CR0 */
+	mtcrf	0x80,r2
+BEGIN_MMU_FTR_SECTION
+	li	r0,1
+	mfspr	r1,SPRN_SPRG_603_LRU
+	rlwinm	r2,r3,20,27,31		/* Get Address bits 15:19 */
+	slw	r0,r0,r2
+	xor	r1,r0,r1
+	srw	r0,r1,r2
+	mtspr   SPRN_SPRG_603_LRU,r1
+	mfspr	r2,SPRN_SRR1
+	rlwimi	r2,r0,31-14,14,14
+	mtspr   SPRN_SRR1,r2
+	mtcrf	0x80,r2
+	tlbld	r3
+	rfi
+MMU_FTR_SECTION_ELSE
+	mfspr	r2,SPRN_SRR1		/* Need to restore CR0 */
+	mtcrf	0x80,r2
+	tlbld	r3
+	rfi
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+
+#ifndef CONFIG_ALTIVEC
+#define altivec_assist_exception	unknown_exception
+#endif
+
+#ifndef CONFIG_TAU_INT
+#define TAUException	unknown_async_exception
+#endif
+
+	EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception)
+	EXCEPTION(0x1400, SMI, SMIException)
+	EXCEPTION(0x1500, Trap_15, unknown_exception)
+	EXCEPTION(0x1600, Trap_16, altivec_assist_exception)
+	EXCEPTION(0x1700, Trap_17, TAUException)
+	EXCEPTION(0x1800, Trap_18, unknown_exception)
+	EXCEPTION(0x1900, Trap_19, unknown_exception)
+	EXCEPTION(0x1a00, Trap_1a, unknown_exception)
+	EXCEPTION(0x1b00, Trap_1b, unknown_exception)
+	EXCEPTION(0x1c00, Trap_1c, unknown_exception)
+	EXCEPTION(0x1d00, Trap_1d, unknown_exception)
+	EXCEPTION(0x1e00, Trap_1e, unknown_exception)
+	EXCEPTION(0x1f00, Trap_1f, unknown_exception)
+	EXCEPTION(0x2000, RunMode, RunModeException)
+	EXCEPTION(0x2100, Trap_21, unknown_exception)
+	EXCEPTION(0x2200, Trap_22, unknown_exception)
+	EXCEPTION(0x2300, Trap_23, unknown_exception)
+	EXCEPTION(0x2400, Trap_24, unknown_exception)
+	EXCEPTION(0x2500, Trap_25, unknown_exception)
+	EXCEPTION(0x2600, Trap_26, unknown_exception)
+	EXCEPTION(0x2700, Trap_27, unknown_exception)
+	EXCEPTION(0x2800, Trap_28, unknown_exception)
+	EXCEPTION(0x2900, Trap_29, unknown_exception)
+	EXCEPTION(0x2a00, Trap_2a, unknown_exception)
+	EXCEPTION(0x2b00, Trap_2b, unknown_exception)
+	EXCEPTION(0x2c00, Trap_2c, unknown_exception)
+	EXCEPTION(0x2d00, Trap_2d, unknown_exception)
+	EXCEPTION(0x2e00, Trap_2e, unknown_exception)
+	EXCEPTION(0x2f00, Trap_2f, unknown_exception)
+
+	__HEAD
+	. = 0x3000
+
+#ifdef CONFIG_PPC_BOOK3S_604
+.macro save_regs_thread		thread
+	stw	r0, THR0(\thread)
+	stw	r3, THR3(\thread)
+	stw	r4, THR4(\thread)
+	stw	r5, THR5(\thread)
+	stw	r6, THR6(\thread)
+	stw	r8, THR8(\thread)
+	stw	r9, THR9(\thread)
+	mflr	r0
+	stw	r0, THLR(\thread)
+	mfctr	r0
+	stw	r0, THCTR(\thread)
+.endm
+
+.macro restore_regs_thread	thread
+	lwz	r0, THLR(\thread)
+	mtlr	r0
+	lwz	r0, THCTR(\thread)
+	mtctr	r0
+	lwz	r0, THR0(\thread)
+	lwz	r3, THR3(\thread)
+	lwz	r4, THR4(\thread)
+	lwz	r5, THR5(\thread)
+	lwz	r6, THR6(\thread)
+	lwz	r8, THR8(\thread)
+	lwz	r9, THR9(\thread)
+.endm
+
+hash_page_dsi:
+	save_regs_thread	r10
+	mfdsisr	r3
+	mfdar	r4
+	mfsrr0	r5
+	mfsrr1	r9
+	rlwinm	r3, r3, 32 - 15, _PAGE_RW	/* DSISR_STORE -> _PAGE_RW */
+	bl	hash_page
+	mfspr	r10, SPRN_SPRG_THREAD
+	restore_regs_thread r10
+	b	.Lhash_page_dsi_cont
+
+hash_page_isi:
+	mr	r11, r10
+	mfspr	r10, SPRN_SPRG_THREAD
+	save_regs_thread	r10
+	li	r3, 0
+	lwz	r4, SRR0(r10)
+	lwz	r9, SRR1(r10)
+	bl	hash_page
+	mfspr	r10, SPRN_SPRG_THREAD
+	restore_regs_thread r10
+	mr	r10, r11
+	b	.Lhash_page_isi_cont
+
+	.globl fast_hash_page_return
+fast_hash_page_return:
+	andis.	r10, r9, SRR1_ISI_NOPT@h	/* Set on ISI, cleared on DSI */
+	mfspr	r10, SPRN_SPRG_THREAD
+	restore_regs_thread r10
+	bne	1f
+
+	/* DSI */
+	mtcr	r11
+	lwz	r11, THR11(r10)
+	mfspr	r10, SPRN_SPRG_SCRATCH2
+	rfi
+
+1:	/* ISI */
+	mtcr	r11
+	mfspr	r11, SPRN_SPRG_SCRATCH1
+	mfspr	r10, SPRN_SPRG_SCRATCH0
+	rfi
+#endif /* CONFIG_PPC_BOOK3S_604 */
+
+#ifdef CONFIG_VMAP_STACK
+	vmap_stack_overflow_exception
+#endif
+
+	__HEAD
+AltiVecUnavailable:
+	EXCEPTION_PROLOG 0xf20 AltiVecUnavailable
+#ifdef CONFIG_ALTIVEC
+	beq	1f
+	bl	load_up_altivec		/* if from user, just load it up */
+	b	fast_exception_return
+#endif /* CONFIG_ALTIVEC */
+1:	prepare_transfer_to_handler
+	bl	altivec_unavailable_exception
+	b	interrupt_return
+
+	__HEAD
+PerformanceMonitor:
+	EXCEPTION_PROLOG 0xf00 PerformanceMonitor
+	prepare_transfer_to_handler
+	bl	performance_monitor_exception
+	b	interrupt_return
+
+
+	__HEAD
+/*
+ * This code is jumped to from the startup code to copy
+ * the kernel image to physical address PHYSICAL_START.
+ */
+relocate_kernel:
+	lis	r3,PHYSICAL_START@h	/* Destination base address */
+	li	r6,0			/* Destination offset */
+	li	r5,0x4000		/* # bytes of memory to copy */
+	bl	copy_and_flush		/* copy the first 0x4000 bytes */
+	addi	r0,r3,4f@l		/* jump to the address of 4f */
+	mtctr	r0			/* in copy and do the rest. */
+	bctr				/* jump to the copy */
+4:	lis	r5,_end-KERNELBASE@h
+	ori	r5,r5,_end-KERNELBASE@l
+	bl	copy_and_flush		/* copy the rest */
+	b	turn_on_mmu
+
+/*
+ * Copy routine used to copy the kernel to start at physical address 0
+ * and flush and invalidate the caches as needed.
+ * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset
+ * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5.
+ */
+_GLOBAL(copy_and_flush)
+	addi	r5,r5,-4
+	addi	r6,r6,-4
+4:	li	r0,L1_CACHE_BYTES/4
+	mtctr	r0
+3:	addi	r6,r6,4			/* copy a cache line */
+	lwzx	r0,r6,r4
+	stwx	r0,r6,r3
+	bdnz	3b
+	dcbst	r6,r3			/* write it to memory */
+	sync
+	icbi	r6,r3			/* flush the icache line */
+	cmplw	0,r6,r5
+	blt	4b
+	sync				/* additional sync needed on g4 */
+	isync
+	addi	r5,r5,4
+	addi	r6,r6,4
+	blr
+
+#ifdef CONFIG_SMP
+	.globl __secondary_start_mpc86xx
+__secondary_start_mpc86xx:
+	mfspr	r3, SPRN_PIR
+	stw	r3, __secondary_hold_acknowledge@l(0)
+	mr	r24, r3			/* cpu # */
+	b	__secondary_start
+
+	.globl	__secondary_start_pmac_0
+__secondary_start_pmac_0:
+	/* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */
+	li	r24,0
+	b	1f
+	li	r24,1
+	b	1f
+	li	r24,2
+	b	1f
+	li	r24,3
+1:
+	/* on powersurge, we come in here with IR=0 and DR=1, and DBAT 0
+	   set to map the 0xf0000000 - 0xffffffff region */
+	mfmsr	r0
+	rlwinm	r0,r0,0,28,26		/* clear DR (0x10) */
+	mtmsr	r0
+	isync
+
+	.globl	__secondary_start
+__secondary_start:
+	/* Copy some CPU settings from CPU 0 */
+	bl	__restore_cpu_setup
+
+	lis	r3,-KERNELBASE@h
+	mr	r4,r24
+	bl	call_setup_cpu		/* Call setup_cpu for this CPU */
+	lis	r3,-KERNELBASE@h
+	bl	init_idle_6xx
+
+	/* get current's stack and current */
+	lis	r2,secondary_current@ha
+	tophys(r2,r2)
+	lwz	r2,secondary_current@l(r2)
+	tophys(r1,r2)
+	lwz	r1,TASK_STACK(r1)
+
+	/* stack */
+	addi	r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
+	li	r0,0
+	tophys(r3,r1)
+	stw	r0,0(r3)
+
+	/* load up the MMU */
+	bl	load_segment_registers
+	bl	load_up_mmu
+
+	/* ptr to phys current thread */
+	tophys(r4,r2)
+	addi	r4,r4,THREAD	/* phys address of our thread_struct */
+	mtspr	SPRN_SPRG_THREAD,r4
+BEGIN_MMU_FTR_SECTION
+	lis	r4, (swapper_pg_dir - PAGE_OFFSET)@h
+	ori	r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
+	rlwinm	r4, r4, 4, 0xffff01ff
+	mtspr	SPRN_SDR1, r4
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE)
+
+	/* enable MMU and jump to start_secondary */
+	li	r4,MSR_KERNEL
+	lis	r3,start_secondary@h
+	ori	r3,r3,start_secondary@l
+	mtspr	SPRN_SRR0,r3
+	mtspr	SPRN_SRR1,r4
+	rfi
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_KVM_BOOK3S_HANDLER
+#include "../kvm/book3s_rmhandlers.S"
+#endif
+
+/*
+ * Load stuff into the MMU.  Intended to be called with
+ * IR=0 and DR=0.
+ */
+SYM_FUNC_START_LOCAL(early_hash_table)
+	sync			/* Force all PTE updates to finish */
+	isync
+	tlbia			/* Clear all TLB entries */
+	sync			/* wait for tlbia/tlbie to finish */
+	TLBSYNC			/* ... on all CPUs */
+	/* Load the SDR1 register (hash table base & size) */
+	lis	r6, early_hash - PAGE_OFFSET@h
+	ori	r6, r6, 3	/* 256kB table */
+	mtspr	SPRN_SDR1, r6
+	blr
+SYM_FUNC_END(early_hash_table)
+
+SYM_FUNC_START_LOCAL(load_up_mmu)
+	sync			/* Force all PTE updates to finish */
+	isync
+	tlbia			/* Clear all TLB entries */
+	sync			/* wait for tlbia/tlbie to finish */
+	TLBSYNC			/* ... on all CPUs */
+BEGIN_MMU_FTR_SECTION
+	/* Load the SDR1 register (hash table base & size) */
+	lis	r6,_SDR1@ha
+	tophys(r6,r6)
+	lwz	r6,_SDR1@l(r6)
+	mtspr	SPRN_SDR1,r6
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+
+/* Load the BAT registers with the values set up by MMU_init. */
+	lis	r3,BATS@ha
+	addi	r3,r3,BATS@l
+	tophys(r3,r3)
+	LOAD_BAT(0,r3,r4,r5)
+	LOAD_BAT(1,r3,r4,r5)
+	LOAD_BAT(2,r3,r4,r5)
+	LOAD_BAT(3,r3,r4,r5)
+BEGIN_MMU_FTR_SECTION
+	LOAD_BAT(4,r3,r4,r5)
+	LOAD_BAT(5,r3,r4,r5)
+	LOAD_BAT(6,r3,r4,r5)
+	LOAD_BAT(7,r3,r4,r5)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+	blr
+SYM_FUNC_END(load_up_mmu)
+
+_GLOBAL(load_segment_registers)
+	li	r0, NUM_USER_SEGMENTS /* load up user segment register values */
+	mtctr	r0		/* for context 0 */
+#ifdef CONFIG_PPC_KUEP
+	lis	r3, SR_NX@h	/* Kp = 0, Ks = 0, VSID = 0 */
+#else
+	li	r3, 0		/* Kp = 0, Ks = 0, VSID = 0 */
+#endif
+	li	r4, 0
+3:	mtsrin	r3, r4
+	addi	r3, r3, 0x111	/* increment VSID */
+	addis	r4, r4, 0x1000	/* address of next segment */
+	bdnz	3b
+	li	r0, 16 - NUM_USER_SEGMENTS /* load up kernel segment registers */
+	mtctr	r0			/* for context 0 */
+	rlwinm	r3, r3, 0, ~SR_NX	/* Nx = 0 */
+	rlwinm	r3, r3, 0, ~SR_KS	/* Ks = 0 */
+	oris	r3, r3, SR_KP@h		/* Kp = 1 */
+3:	mtsrin	r3, r4
+	addi	r3, r3, 0x111	/* increment VSID */
+	addis	r4, r4, 0x1000	/* address of next segment */
+	bdnz	3b
+	blr
+
+/*
+ * This is where the main kernel code starts.
+ */
+start_here:
+	/* ptr to current */
+	lis	r2,init_task@h
+	ori	r2,r2,init_task@l
+	/* Set up for using our exception vectors */
+	/* ptr to phys current thread */
+	tophys(r4,r2)
+	addi	r4,r4,THREAD	/* init task's THREAD */
+	mtspr	SPRN_SPRG_THREAD,r4
+BEGIN_MMU_FTR_SECTION
+	lis	r4, (swapper_pg_dir - PAGE_OFFSET)@h
+	ori	r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
+	rlwinm	r4, r4, 4, 0xffff01ff
+	mtspr	SPRN_SDR1, r4
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE)
+
+	/* stack */
+	lis	r1,init_thread_union@ha
+	addi	r1,r1,init_thread_union@l
+	li	r0,0
+	stwu	r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
+/*
+ * Do early platform-specific initialization,
+ * and set up the MMU.
+ */
+#ifdef CONFIG_KASAN
+	bl	kasan_early_init
+#endif
+	li	r3,0
+	mr	r4,r31
+	bl	machine_init
+	bl	__save_cpu_setup
+	bl	MMU_init
+	bl	MMU_init_hw_patch
+
+/*
+ * Go back to running unmapped so we can load up new values
+ * for SDR1 (hash table pointer) and the segment registers
+ * and change to using our exception vectors.
+ */
+	lis	r4,2f@h
+	ori	r4,r4,2f@l
+	tophys(r4,r4)
+	li	r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
+
+	.align	4
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	rfi
+/* Load up the kernel context */
+2:	bl	load_up_mmu
+
+#ifdef CONFIG_BDI_SWITCH
+	/* Add helper information for the Abatron bdiGDB debugger.
+	 * We do this here because we know the mmu is disabled, and
+	 * will be enabled for real in just a few instructions.
+	 */
+	lis	r5, abatron_pteptrs@h
+	ori	r5, r5, abatron_pteptrs@l
+	stw	r5, 0xf0(0)	/* This much match your Abatron config */
+	lis	r6, swapper_pg_dir@h
+	ori	r6, r6, swapper_pg_dir@l
+	tophys(r5, r5)
+	stw	r6, 0(r5)
+#endif /* CONFIG_BDI_SWITCH */
+
+/* Now turn on the MMU for real! */
+	li	r4,MSR_KERNEL
+	lis	r3,start_kernel@h
+	ori	r3,r3,start_kernel@l
+	mtspr	SPRN_SRR0,r3
+	mtspr	SPRN_SRR1,r4
+	rfi
+
+/*
+ * An undocumented "feature" of 604e requires that the v bit
+ * be cleared before changing BAT values.
+ *
+ * Also, newer IBM firmware does not clear bat3 and 4 so
+ * this makes sure it's done.
+ *  -- Cort
+ */
+SYM_FUNC_START_LOCAL(clear_bats)
+	li	r10,0
+
+	mtspr	SPRN_DBAT0U,r10
+	mtspr	SPRN_DBAT0L,r10
+	mtspr	SPRN_DBAT1U,r10
+	mtspr	SPRN_DBAT1L,r10
+	mtspr	SPRN_DBAT2U,r10
+	mtspr	SPRN_DBAT2L,r10
+	mtspr	SPRN_DBAT3U,r10
+	mtspr	SPRN_DBAT3L,r10
+	mtspr	SPRN_IBAT0U,r10
+	mtspr	SPRN_IBAT0L,r10
+	mtspr	SPRN_IBAT1U,r10
+	mtspr	SPRN_IBAT1L,r10
+	mtspr	SPRN_IBAT2U,r10
+	mtspr	SPRN_IBAT2L,r10
+	mtspr	SPRN_IBAT3U,r10
+	mtspr	SPRN_IBAT3L,r10
+BEGIN_MMU_FTR_SECTION
+	/* Here's a tweak: at this point, CPU setup have
+	 * not been called yet, so HIGH_BAT_EN may not be
+	 * set in HID0 for the 745x processors. However, it
+	 * seems that doesn't affect our ability to actually
+	 * write to these SPRs.
+	 */
+	mtspr	SPRN_DBAT4U,r10
+	mtspr	SPRN_DBAT4L,r10
+	mtspr	SPRN_DBAT5U,r10
+	mtspr	SPRN_DBAT5L,r10
+	mtspr	SPRN_DBAT6U,r10
+	mtspr	SPRN_DBAT6L,r10
+	mtspr	SPRN_DBAT7U,r10
+	mtspr	SPRN_DBAT7L,r10
+	mtspr	SPRN_IBAT4U,r10
+	mtspr	SPRN_IBAT4L,r10
+	mtspr	SPRN_IBAT5U,r10
+	mtspr	SPRN_IBAT5L,r10
+	mtspr	SPRN_IBAT6U,r10
+	mtspr	SPRN_IBAT6L,r10
+	mtspr	SPRN_IBAT7U,r10
+	mtspr	SPRN_IBAT7L,r10
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+	blr
+SYM_FUNC_END(clear_bats)
+
+_GLOBAL(update_bats)
+	lis	r4, 1f@h
+	ori	r4, r4, 1f@l
+	tophys(r4, r4)
+	mfmsr	r6
+	mflr	r7
+	li	r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)
+	rlwinm	r0, r6, 0, ~MSR_RI
+	rlwinm	r0, r0, 0, ~MSR_EE
+	mtmsr	r0
+
+	.align	4
+	mtspr	SPRN_SRR0, r4
+	mtspr	SPRN_SRR1, r3
+	rfi
+1:	bl	clear_bats
+	lis	r3, BATS@ha
+	addi	r3, r3, BATS@l
+	tophys(r3, r3)
+	LOAD_BAT(0, r3, r4, r5)
+	LOAD_BAT(1, r3, r4, r5)
+	LOAD_BAT(2, r3, r4, r5)
+	LOAD_BAT(3, r3, r4, r5)
+BEGIN_MMU_FTR_SECTION
+	LOAD_BAT(4, r3, r4, r5)
+	LOAD_BAT(5, r3, r4, r5)
+	LOAD_BAT(6, r3, r4, r5)
+	LOAD_BAT(7, r3, r4, r5)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+	li	r3, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI)
+	mtmsr	r3
+	mtspr	SPRN_SRR0, r7
+	mtspr	SPRN_SRR1, r6
+	rfi
+
+SYM_FUNC_START_LOCAL(flush_tlbs)
+	lis	r10, 0x40
+1:	addic.	r10, r10, -0x1000
+	tlbie	r10
+	bgt	1b
+	sync
+	blr
+SYM_FUNC_END(flush_tlbs)
+
+SYM_FUNC_START_LOCAL(mmu_off)
+ 	addi	r4, r3, __after_mmu_off - _start
+	mfmsr	r3
+	andi.	r0,r3,MSR_DR|MSR_IR		/* MMU enabled? */
+	beqlr
+	andc	r3,r3,r0
+
+	.align	4
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	sync
+	rfi
+SYM_FUNC_END(mmu_off)
+
+/* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */
+SYM_FUNC_START_LOCAL(initial_bats)
+	lis	r11,PAGE_OFFSET@h
+	tophys(r8,r11)
+#ifdef CONFIG_SMP
+	ori	r8,r8,0x12		/* R/W access, M=1 */
+#else
+	ori	r8,r8,2			/* R/W access */
+#endif /* CONFIG_SMP */
+	ori	r11,r11,BL_256M<<2|0x2	/* set up BAT registers for 604 */
+
+	mtspr	SPRN_DBAT0L,r8		/* N.B. 6xx have valid */
+	mtspr	SPRN_DBAT0U,r11		/* bit in upper BAT register */
+	mtspr	SPRN_IBAT0L,r8
+	mtspr	SPRN_IBAT0U,r11
+	isync
+	blr
+SYM_FUNC_END(initial_bats)
+
+#ifdef CONFIG_BOOTX_TEXT
+SYM_FUNC_START_LOCAL(setup_disp_bat)
+	/*
+	 * setup the display bat prepared for us in prom.c
+	 */
+	mflr	r8
+	bl	reloc_offset
+	mtlr	r8
+	addis	r8,r3,disp_BAT@ha
+	addi	r8,r8,disp_BAT@l
+	cmpwi	cr0,r8,0
+	beqlr
+	lwz	r11,0(r8)
+	lwz	r8,4(r8)
+	mtspr	SPRN_DBAT3L,r8
+	mtspr	SPRN_DBAT3U,r11
+	blr
+SYM_FUNC_END(setup_disp_bat)
+#endif /* CONFIG_BOOTX_TEXT */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_CPM
+SYM_FUNC_START_LOCAL(setup_cpm_bat)
+	lis	r8, 0xf000
+	ori	r8, r8,	0x002a
+	mtspr	SPRN_DBAT1L, r8
+
+	lis	r11, 0xf000
+	ori	r11, r11, (BL_1M << 2) | 2
+	mtspr	SPRN_DBAT1U, r11
+
+	blr
+SYM_FUNC_END(setup_cpm_bat)
+#endif
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO
+SYM_FUNC_START_LOCAL(setup_usbgecko_bat)
+	/* prepare a BAT for early io */
+#if defined(CONFIG_GAMECUBE)
+	lis	r8, 0x0c00
+#elif defined(CONFIG_WII)
+	lis	r8, 0x0d00
+#else
+#error Invalid platform for USB Gecko based early debugging.
+#endif
+	/*
+	 * The virtual address used must match the virtual address
+	 * associated to the fixmap entry FIX_EARLY_DEBUG_BASE.
+	 */
+	lis	r11, 0xfffe	/* top 128K */
+	ori	r8, r8, 0x002a	/* uncached, guarded ,rw */
+	ori	r11, r11, 0x2	/* 128K, Vs=1, Vp=0 */
+	mtspr	SPRN_DBAT1L, r8
+	mtspr	SPRN_DBAT1U, r11
+	blr
+SYM_FUNC_END(setup_usbgecko_bat)
+#endif
+
+	.data
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
new file mode 100644
index 0000000000..b6b5b01a17
--- /dev/null
+++ b/arch/powerpc/kernel/head_booke.h
@@ -0,0 +1,527 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __HEAD_BOOKE_H__
+#define __HEAD_BOOKE_H__
+
+#include <asm/ptrace.h>	/* for STACK_FRAME_REGS_MARKER */
+#include <asm/kvm_asm.h>
+#include <asm/kvm_booke_hv_asm.h>
+#include <asm/thread_info.h>	/* for THREAD_SHIFT */
+
+#ifdef __ASSEMBLY__
+
+/*
+ * Macros used for common Book-e exception handling
+ */
+
+#define SET_IVOR(vector_number, vector_label)		\
+		li	r26,vector_label@l; 		\
+		mtspr	SPRN_IVOR##vector_number,r26;	\
+		sync
+
+#if (THREAD_SHIFT < 15)
+#define ALLOC_STACK_FRAME(reg, val)			\
+	addi reg,reg,val
+#else
+#define ALLOC_STACK_FRAME(reg, val)			\
+	addis	reg,reg,val@ha;				\
+	addi	reg,reg,val@l
+#endif
+
+/*
+ * Macro used to get to thread save registers.
+ * Note that entries 0-3 are used for the prolog code, and the remaining
+ * entries are available for specific exception use in the event a handler
+ * requires more than 4 scratch registers.
+ */
+#define THREAD_NORMSAVE(offset)	(THREAD_NORMSAVES + (offset * 4))
+
+#ifdef CONFIG_PPC_E500
+#define BOOKE_CLEAR_BTB(reg)									\
+START_BTB_FLUSH_SECTION								\
+	BTB_FLUSH(reg)									\
+END_BTB_FLUSH_SECTION
+#else
+#define BOOKE_CLEAR_BTB(reg)
+#endif
+
+
+#define NORMAL_EXCEPTION_PROLOG(trapno, intno)						     \
+	mtspr	SPRN_SPRG_WSCRATCH0, r10;	/* save one register */	     \
+	mfspr	r10, SPRN_SPRG_THREAD;					     \
+	stw	r11, THREAD_NORMSAVE(0)(r10);				     \
+	stw	r13, THREAD_NORMSAVE(2)(r10);				     \
+	mfcr	r13;			/* save CR in r13 for now	   */\
+	mfspr	r11, SPRN_SRR1;		                                     \
+	DO_KVM	BOOKE_INTERRUPT_##intno SPRN_SRR1;			     \
+	andi.	r11, r11, MSR_PR;	/* check whether user or kernel    */\
+	LOAD_REG_IMMEDIATE(r11, MSR_KERNEL);				\
+	mtmsr	r11;							\
+	mr	r11, r1;						     \
+	beq	1f;							     \
+	BOOKE_CLEAR_BTB(r11)						\
+	/* if from user, start at top of this thread's kernel stack */       \
+	lwz	r11, TASK_STACK - THREAD(r10);				     \
+	ALLOC_STACK_FRAME(r11, THREAD_SIZE);				     \
+1 :	subi	r11, r11, INT_FRAME_SIZE; /* Allocate exception frame */     \
+	stw	r13, _CCR(r11);		/* save various registers */	     \
+	stw	r12,GPR12(r11);						     \
+	stw	r9,GPR9(r11);						     \
+	mfspr	r13, SPRN_SPRG_RSCRATCH0;				     \
+	stw	r13, GPR10(r11);					     \
+	lwz	r12, THREAD_NORMSAVE(0)(r10);				     \
+	stw	r12,GPR11(r11);						     \
+	lwz	r13, THREAD_NORMSAVE(2)(r10); /* restore r13 */		     \
+	mflr	r10;							     \
+	stw	r10,_LINK(r11);						     \
+	mfspr	r12,SPRN_SRR0;						     \
+	stw	r1, GPR1(r11);						     \
+	mfspr	r9,SPRN_SRR1;						     \
+	stw	r1, 0(r11);						     \
+	mr	r1, r11;						     \
+	rlwinm	r9,r9,0,14,12;		/* clear MSR_WE (necessary?)	   */\
+	COMMON_EXCEPTION_PROLOG_END trapno
+
+.macro COMMON_EXCEPTION_PROLOG_END trapno
+	stw	r0,GPR0(r1)
+	lis	r10, STACK_FRAME_REGS_MARKER@ha	/* exception frame marker */
+	addi	r10, r10, STACK_FRAME_REGS_MARKER@l
+	stw	r10, STACK_INT_FRAME_MARKER(r1)
+	li	r10, \trapno
+	stw	r10,_TRAP(r1)
+	SAVE_GPRS(3, 8, r1)
+	SAVE_NVGPRS(r1)
+	stw	r2,GPR2(r1)
+	stw	r12,_NIP(r1)
+	stw	r9,_MSR(r1)
+	mfctr	r10
+	mfspr	r2,SPRN_SPRG_THREAD
+	stw	r10,_CTR(r1)
+	tovirt(r2, r2)
+	mfspr	r10,SPRN_XER
+	addi	r2, r2, -THREAD
+	stw	r10,_XER(r1)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+.endm
+
+.macro prepare_transfer_to_handler
+#ifdef CONFIG_PPC_E500
+	andi.	r12,r9,MSR_PR
+	bne	777f
+	bl	prepare_transfer_to_handler
+777:
+#endif
+.endm
+
+.macro SYSCALL_ENTRY trapno intno srr1
+	mfspr	r10, SPRN_SPRG_THREAD
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+	mtspr	SPRN_SPRG_WSCRATCH0, r10
+	stw	r11, THREAD_NORMSAVE(0)(r10)
+	stw	r13, THREAD_NORMSAVE(2)(r10)
+	mfcr	r13			/* save CR in r13 for now	   */
+	mfspr	r11, SPRN_SRR1
+	mtocrf	0x80, r11	/* check MSR[GS] without clobbering reg */
+	bf	3, 1975f
+	b	kvmppc_handler_\intno\()_\srr1
+1975:
+	mr	r12, r13
+	lwz	r13, THREAD_NORMSAVE(2)(r10)
+FTR_SECTION_ELSE
+	mfcr	r12
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
+#else
+	mfcr	r12
+#endif
+	mfspr	r9, SPRN_SRR1
+	BOOKE_CLEAR_BTB(r11)
+	mr	r11, r1
+	lwz	r1, TASK_STACK - THREAD(r10)
+	rlwinm	r12,r12,0,4,2	/* Clear SO bit in CR */
+	ALLOC_STACK_FRAME(r1, THREAD_SIZE - INT_FRAME_SIZE)
+	stw	r12, _CCR(r1)
+	mfspr	r12,SPRN_SRR0
+	stw	r12,_NIP(r1)
+	b	transfer_to_syscall	/* jump to handler */
+.endm
+
+/* To handle the additional exception priority levels on 40x and Book-E
+ * processors we allocate a stack per additional priority level.
+ *
+ * On 40x critical is the only additional level
+ * On 44x/e500 we have critical and machine check
+ *
+ * Additionally we reserve a SPRG for each priority level so we can free up a
+ * GPR to use as the base for indirect access to the exception stacks.  This
+ * is necessary since the MMU is always on, for Book-E parts, and the stacks
+ * are offset from KERNELBASE.
+ *
+ * There is some space optimization to be had here if desired.  However
+ * to allow for a common kernel with support for debug exceptions either
+ * going to critical or their own debug level we aren't currently
+ * providing configurations that micro-optimize space usage.
+ */
+
+#define MC_STACK_BASE		mcheckirq_ctx
+#define CRIT_STACK_BASE		critirq_ctx
+
+/* only on e500mc */
+#define DBG_STACK_BASE		dbgirq_ctx
+
+#ifdef CONFIG_SMP
+#define BOOKE_LOAD_EXC_LEVEL_STACK(level)		\
+	mfspr	r8,SPRN_PIR;				\
+	slwi	r8,r8,2;				\
+	addis	r8,r8,level##_STACK_BASE@ha;		\
+	lwz	r8,level##_STACK_BASE@l(r8);		\
+	addi	r8,r8,THREAD_SIZE - INT_FRAME_SIZE;
+#else
+#define BOOKE_LOAD_EXC_LEVEL_STACK(level)		\
+	lis	r8,level##_STACK_BASE@ha;		\
+	lwz	r8,level##_STACK_BASE@l(r8);		\
+	addi	r8,r8,THREAD_SIZE - INT_FRAME_SIZE;
+#endif
+
+/*
+ * Exception prolog for critical/machine check exceptions.  This is a
+ * little different from the normal exception prolog above since a
+ * critical/machine check exception can potentially occur at any point
+ * during normal exception processing. Thus we cannot use the same SPRG
+ * registers as the normal prolog above. Instead we use a portion of the
+ * critical/machine check exception stack at low physical addresses.
+ */
+#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, trapno, intno, exc_level_srr0, exc_level_srr1) \
+	mtspr	SPRN_SPRG_WSCRATCH_##exc_level,r8;			     \
+	BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \
+	stw	r9,GPR9(r8);		/* save various registers	   */\
+	mfcr	r9;			/* save CR in r9 for now	   */\
+	stw	r10,GPR10(r8);						     \
+	stw	r11,GPR11(r8);						     \
+	stw	r9,_CCR(r8);		/* save CR on stack		   */\
+	mfspr	r11,exc_level_srr1;	/* check whether user or kernel    */\
+	DO_KVM	BOOKE_INTERRUPT_##intno exc_level_srr1;		             \
+	BOOKE_CLEAR_BTB(r10)						\
+	andi.	r11,r11,MSR_PR;						     \
+	LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE));	\
+	mtmsr	r11;							\
+	mfspr	r11,SPRN_SPRG_THREAD;	/* if from user, start at top of   */\
+	lwz	r11, TASK_STACK - THREAD(r11); /* this thread's kernel stack */\
+	addi	r11,r11,THREAD_SIZE - INT_FRAME_SIZE;	/* allocate stack frame    */\
+	beq	1f;							     \
+	/* COMING FROM USER MODE */					     \
+	stw	r9,_CCR(r11);		/* save CR			   */\
+	lwz	r10,GPR10(r8);		/* copy regs from exception stack  */\
+	lwz	r9,GPR9(r8);						     \
+	stw	r10,GPR10(r11);						     \
+	lwz	r10,GPR11(r8);						     \
+	stw	r9,GPR9(r11);						     \
+	stw	r10,GPR11(r11);						     \
+	b	2f;							     \
+	/* COMING FROM PRIV MODE */					     \
+1:	mr	r11, r8;							     \
+2:	mfspr	r8,SPRN_SPRG_RSCRATCH_##exc_level;			     \
+	stw	r12,GPR12(r11);		/* save various registers	   */\
+	mflr	r10;							     \
+	stw	r10,_LINK(r11);						     \
+	mfspr	r12,SPRN_DEAR;		/* save DEAR and ESR in the frame  */\
+	stw	r12,_DEAR(r11);		/* since they may have had stuff   */\
+	mfspr	r9,SPRN_ESR;		/* in them at the point where the  */\
+	stw	r9,_ESR(r11);		/* exception was taken		   */\
+	mfspr	r12,exc_level_srr0;					     \
+	stw	r1,GPR1(r11);						     \
+	mfspr	r9,exc_level_srr1;					     \
+	stw	r1,0(r11);						     \
+	mr	r1,r11;							     \
+	rlwinm	r9,r9,0,14,12;		/* clear MSR_WE (necessary?)	   */\
+	COMMON_EXCEPTION_PROLOG_END trapno
+
+#define SAVE_xSRR(xSRR)			\
+	mfspr	r0,SPRN_##xSRR##0;	\
+	stw	r0,_##xSRR##0(r1);	\
+	mfspr	r0,SPRN_##xSRR##1;	\
+	stw	r0,_##xSRR##1(r1)
+
+
+.macro SAVE_MMU_REGS
+#ifdef CONFIG_PPC_E500
+	mfspr	r0,SPRN_MAS0
+	stw	r0,MAS0(r1)
+	mfspr	r0,SPRN_MAS1
+	stw	r0,MAS1(r1)
+	mfspr	r0,SPRN_MAS2
+	stw	r0,MAS2(r1)
+	mfspr	r0,SPRN_MAS3
+	stw	r0,MAS3(r1)
+	mfspr	r0,SPRN_MAS6
+	stw	r0,MAS6(r1)
+#ifdef CONFIG_PHYS_64BIT
+	mfspr	r0,SPRN_MAS7
+	stw	r0,MAS7(r1)
+#endif /* CONFIG_PHYS_64BIT */
+#endif /* CONFIG_PPC_E500 */
+#ifdef CONFIG_44x
+	mfspr	r0,SPRN_MMUCR
+	stw	r0,MMUCR(r1)
+#endif
+.endm
+
+#define CRITICAL_EXCEPTION_PROLOG(trapno, intno) \
+		EXC_LEVEL_EXCEPTION_PROLOG(CRIT, trapno+2, intno, SPRN_CSRR0, SPRN_CSRR1)
+#define DEBUG_EXCEPTION_PROLOG(trapno) \
+		EXC_LEVEL_EXCEPTION_PROLOG(DBG, trapno+8, DEBUG, SPRN_DSRR0, SPRN_DSRR1)
+#define MCHECK_EXCEPTION_PROLOG(trapno) \
+		EXC_LEVEL_EXCEPTION_PROLOG(MC, trapno+4, MACHINE_CHECK, \
+			SPRN_MCSRR0, SPRN_MCSRR1)
+
+/*
+ * Guest Doorbell -- this is a bit odd in that uses GSRR0/1 despite
+ * being delivered to the host.  This exception can only happen
+ * inside a KVM guest -- so we just handle up to the DO_KVM rather
+ * than try to fit this into one of the existing prolog macros.
+ */
+#define GUEST_DOORBELL_EXCEPTION \
+	START_EXCEPTION(GuestDoorbell);					     \
+	mtspr	SPRN_SPRG_WSCRATCH0, r10;	/* save one register */	     \
+	mfspr	r10, SPRN_SPRG_THREAD;					     \
+	stw	r11, THREAD_NORMSAVE(0)(r10);				     \
+	mfspr	r11, SPRN_SRR1;		                                     \
+	stw	r13, THREAD_NORMSAVE(2)(r10);				     \
+	mfcr	r13;			/* save CR in r13 for now	   */\
+	DO_KVM	BOOKE_INTERRUPT_GUEST_DBELL SPRN_GSRR1;			     \
+	trap
+
+/*
+ * Exception vectors.
+ */
+#define	START_EXCEPTION(label)						     \
+        .align 5;              						     \
+label:
+
+#define EXCEPTION(n, intno, label, hdlr)			\
+	START_EXCEPTION(label);					\
+	NORMAL_EXCEPTION_PROLOG(n, intno);			\
+	prepare_transfer_to_handler;				\
+	bl	hdlr;						\
+	b	interrupt_return
+
+#define CRITICAL_EXCEPTION(n, intno, label, hdlr)			\
+	START_EXCEPTION(label);						\
+	CRITICAL_EXCEPTION_PROLOG(n, intno);				\
+	SAVE_MMU_REGS;							\
+	SAVE_xSRR(SRR);							\
+	prepare_transfer_to_handler;					\
+	bl	hdlr;							\
+	b	ret_from_crit_exc
+
+#define MCHECK_EXCEPTION(n, label, hdlr)			\
+	START_EXCEPTION(label);					\
+	MCHECK_EXCEPTION_PROLOG(n);				\
+	mfspr	r5,SPRN_ESR;					\
+	stw	r5,_ESR(r11);					\
+	SAVE_xSRR(DSRR);					\
+	SAVE_xSRR(CSRR);					\
+	SAVE_MMU_REGS;						\
+	SAVE_xSRR(SRR);						\
+	prepare_transfer_to_handler;				\
+	bl	hdlr;						\
+	b	ret_from_mcheck_exc
+
+/* Check for a single step debug exception while in an exception
+ * handler before state has been saved.  This is to catch the case
+ * where an instruction that we are trying to single step causes
+ * an exception (eg ITLB/DTLB miss) and thus the first instruction of
+ * the exception handler generates a single step debug exception.
+ *
+ * If we get a debug trap on the first instruction of an exception handler,
+ * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is
+ * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR).
+ * The exception handler was handling a non-critical interrupt, so it will
+ * save (and later restore) the MSR via SPRN_CSRR1, which will still have
+ * the MSR_DE bit set.
+ */
+#define DEBUG_DEBUG_EXCEPTION						      \
+	START_EXCEPTION(DebugDebug);					      \
+	DEBUG_EXCEPTION_PROLOG(2000);						      \
+									      \
+	/*								      \
+	 * If there is a single step or branch-taken exception in an	      \
+	 * exception entry sequence, it was probably meant to apply to	      \
+	 * the code where the exception occurred (since exception entry	      \
+	 * doesn't turn off DE automatically).  We simulate the effect	      \
+	 * of turning off DE on entry to an exception handler by turning      \
+	 * off DE in the DSRR1 value and clearing the debug status.	      \
+	 */								      \
+	mfspr	r10,SPRN_DBSR;		/* check single-step/branch taken */  \
+	andis.	r10,r10,(DBSR_IC|DBSR_BT)@h;				      \
+	beq+	2f;							      \
+									      \
+	lis	r10,interrupt_base@h;	/* check if exception in vectors */   \
+	ori	r10,r10,interrupt_base@l;				      \
+	cmplw	r12,r10;						      \
+	blt+	2f;			/* addr below exception vectors */    \
+									      \
+	lis	r10,interrupt_end@h;					      \
+	ori	r10,r10,interrupt_end@l;				      \
+	cmplw	r12,r10;						      \
+	bgt+	2f;			/* addr above exception vectors */    \
+									      \
+	/* here it looks like we got an inappropriate debug exception. */     \
+1:	rlwinm	r9,r9,0,~MSR_DE;	/* clear DE in the CDRR1 value */     \
+	lis	r10,(DBSR_IC|DBSR_BT)@h;	/* clear the IC event */      \
+	mtspr	SPRN_DBSR,r10;						      \
+	/* restore state and get out */					      \
+	lwz	r10,_CCR(r11);						      \
+	lwz	r0,GPR0(r11);						      \
+	lwz	r1,GPR1(r11);						      \
+	mtcrf	0x80,r10;						      \
+	mtspr	SPRN_DSRR0,r12;						      \
+	mtspr	SPRN_DSRR1,r9;						      \
+	lwz	r9,GPR9(r11);						      \
+	lwz	r12,GPR12(r11);						      \
+	mtspr	SPRN_SPRG_WSCRATCH_DBG,r8;				      \
+	BOOKE_LOAD_EXC_LEVEL_STACK(DBG); /* r8 points to the debug stack */ \
+	lwz	r10,GPR10(r8);						      \
+	lwz	r11,GPR11(r8);						      \
+	mfspr	r8,SPRN_SPRG_RSCRATCH_DBG;				      \
+									      \
+	PPC_RFDI;							      \
+	b	.;							      \
+									      \
+	/* continue normal handling for a debug exception... */		      \
+2:	mfspr	r4,SPRN_DBSR;						      \
+	stw	r4,_ESR(r11);		/* DebugException takes DBSR in _ESR */\
+	SAVE_xSRR(CSRR);						      \
+	SAVE_MMU_REGS;							      \
+	SAVE_xSRR(SRR);							      \
+	prepare_transfer_to_handler;				      \
+	bl	DebugException;						      \
+	b	ret_from_debug_exc
+
+#define DEBUG_CRIT_EXCEPTION						      \
+	START_EXCEPTION(DebugCrit);					      \
+	CRITICAL_EXCEPTION_PROLOG(2000,DEBUG);				      \
+									      \
+	/*								      \
+	 * If there is a single step or branch-taken exception in an	      \
+	 * exception entry sequence, it was probably meant to apply to	      \
+	 * the code where the exception occurred (since exception entry	      \
+	 * doesn't turn off DE automatically).  We simulate the effect	      \
+	 * of turning off DE on entry to an exception handler by turning      \
+	 * off DE in the CSRR1 value and clearing the debug status.	      \
+	 */								      \
+	mfspr	r10,SPRN_DBSR;		/* check single-step/branch taken */  \
+	andis.	r10,r10,(DBSR_IC|DBSR_BT)@h;				      \
+	beq+	2f;							      \
+									      \
+	lis	r10,interrupt_base@h;	/* check if exception in vectors */   \
+	ori	r10,r10,interrupt_base@l;				      \
+	cmplw	r12,r10;						      \
+	blt+	2f;			/* addr below exception vectors */    \
+									      \
+	lis	r10,interrupt_end@h;					      \
+	ori	r10,r10,interrupt_end@l;				      \
+	cmplw	r12,r10;						      \
+	bgt+	2f;			/* addr above exception vectors */    \
+									      \
+	/* here it looks like we got an inappropriate debug exception. */     \
+1:	rlwinm	r9,r9,0,~MSR_DE;	/* clear DE in the CSRR1 value */     \
+	lis	r10,(DBSR_IC|DBSR_BT)@h;	/* clear the IC event */      \
+	mtspr	SPRN_DBSR,r10;						      \
+	/* restore state and get out */					      \
+	lwz	r10,_CCR(r11);						      \
+	lwz	r0,GPR0(r11);						      \
+	lwz	r1,GPR1(r11);						      \
+	mtcrf	0x80,r10;						      \
+	mtspr	SPRN_CSRR0,r12;						      \
+	mtspr	SPRN_CSRR1,r9;						      \
+	lwz	r9,GPR9(r11);						      \
+	lwz	r12,GPR12(r11);						      \
+	mtspr	SPRN_SPRG_WSCRATCH_CRIT,r8;				      \
+	BOOKE_LOAD_EXC_LEVEL_STACK(CRIT); /* r8 points to the debug stack */  \
+	lwz	r10,GPR10(r8);						      \
+	lwz	r11,GPR11(r8);						      \
+	mfspr	r8,SPRN_SPRG_RSCRATCH_CRIT;				      \
+									      \
+	rfci;								      \
+	b	.;							      \
+									      \
+	/* continue normal handling for a critical exception... */	      \
+2:	mfspr	r4,SPRN_DBSR;						      \
+	stw	r4,_ESR(r11);		/* DebugException takes DBSR in _ESR */\
+	SAVE_MMU_REGS;							      \
+	SAVE_xSRR(SRR);							      \
+	prepare_transfer_to_handler;					      \
+	bl	DebugException;						      \
+	b	ret_from_crit_exc
+
+#define DATA_STORAGE_EXCEPTION						      \
+	START_EXCEPTION(DataStorage)					      \
+	NORMAL_EXCEPTION_PROLOG(0x300, DATA_STORAGE);		      \
+	mfspr	r5,SPRN_ESR;		/* Grab the ESR and save it */	      \
+	stw	r5,_ESR(r11);						      \
+	mfspr	r4,SPRN_DEAR;		/* Grab the DEAR */		      \
+	stw	r4, _DEAR(r11);						      \
+	prepare_transfer_to_handler;					      \
+	bl	do_page_fault;						      \
+	b	interrupt_return
+
+/*
+ * Instruction TLB Error interrupt handlers may call InstructionStorage
+ * directly without clearing ESR, so the ESR at this point may be left over
+ * from a prior interrupt.
+ *
+ * In any case, do_page_fault for BOOK3E does not use ESR and always expects
+ * dsisr to be 0. ESR_DST from a prior store in particular would confuse fault
+ * handling.
+ */
+#define INSTRUCTION_STORAGE_EXCEPTION					      \
+	START_EXCEPTION(InstructionStorage)				      \
+	NORMAL_EXCEPTION_PROLOG(0x400, INST_STORAGE);			      \
+	li	r5,0;			/* Store 0 in regs->esr (dsisr) */    \
+	stw	r5,_ESR(r11);						      \
+	stw	r12, _DEAR(r11);	/* Set regs->dear (dar) to SRR0 */    \
+	prepare_transfer_to_handler;					      \
+	bl	do_page_fault;						      \
+	b	interrupt_return
+
+#define ALIGNMENT_EXCEPTION						      \
+	START_EXCEPTION(Alignment)					      \
+	NORMAL_EXCEPTION_PROLOG(0x600, ALIGNMENT);		      \
+	mfspr   r4,SPRN_DEAR;           /* Grab the DEAR and save it */	      \
+	stw     r4,_DEAR(r11);						      \
+	prepare_transfer_to_handler;					      \
+	bl	alignment_exception;					      \
+	REST_NVGPRS(r1);						      \
+	b	interrupt_return
+
+#define PROGRAM_EXCEPTION						      \
+	START_EXCEPTION(Program)					      \
+	NORMAL_EXCEPTION_PROLOG(0x700, PROGRAM);		      \
+	mfspr	r4,SPRN_ESR;		/* Grab the ESR and save it */	      \
+	stw	r4,_ESR(r11);						      \
+	prepare_transfer_to_handler;					      \
+	bl	program_check_exception;				      \
+	REST_NVGPRS(r1);						      \
+	b	interrupt_return
+
+#define DECREMENTER_EXCEPTION						      \
+	START_EXCEPTION(Decrementer)					      \
+	NORMAL_EXCEPTION_PROLOG(0x900, DECREMENTER);		      \
+	lis     r0,TSR_DIS@h;           /* Setup the DEC interrupt mask */    \
+	mtspr   SPRN_TSR,r0;		/* Clear the DEC interrupt */	      \
+	prepare_transfer_to_handler;					      \
+	bl	timer_interrupt;					      \
+	b	interrupt_return
+
+#define FP_UNAVAILABLE_EXCEPTION					      \
+	START_EXCEPTION(FloatingPointUnavailable)			      \
+	NORMAL_EXCEPTION_PROLOG(0x800, FP_UNAVAIL);		      \
+	beq	1f;							      \
+	bl	load_up_fpu;		/* if from user, just load it up */   \
+	b	fast_exception_return;					      \
+1:	prepare_transfer_to_handler;					      \
+	bl	kernel_fp_unavailable_exception;			      \
+	b	interrupt_return
+
+#endif /* __ASSEMBLY__ */
+#endif /* __HEAD_BOOKE_H__ */
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
new file mode 100644
index 0000000000..a1318ce18d
--- /dev/null
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -0,0 +1,610 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers. Derived from
+ * "arch/x86/kernel/hw_breakpoint.c"
+ *
+ * Copyright 2010 IBM Corporation
+ * Author: K.Prasad <prasad@linux.vnet.ibm.com>
+ */
+
+#include <linux/hw_breakpoint.h>
+#include <linux/notifier.h>
+#include <linux/kprobes.h>
+#include <linux/percpu.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/init.h>
+
+#include <asm/hw_breakpoint.h>
+#include <asm/processor.h>
+#include <asm/sstep.h>
+#include <asm/debug.h>
+#include <asm/hvcall.h>
+#include <asm/inst.h>
+#include <linux/uaccess.h>
+
+/*
+ * Stores the breakpoints currently in use on each breakpoint address
+ * register for every cpu
+ */
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM_MAX]);
+
+/*
+ * Returns total number of data or instruction breakpoints available.
+ */
+int hw_breakpoint_slots(int type)
+{
+	if (type == TYPE_DATA)
+		return nr_wp_slots();
+	return 0;		/* no instruction breakpoints available */
+}
+
+
+/*
+ * Install a perf counter breakpoint.
+ *
+ * We seek a free debug address register and use it for this
+ * breakpoint.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+int arch_install_hw_breakpoint(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	struct perf_event **slot;
+	int i;
+
+	for (i = 0; i < nr_wp_slots(); i++) {
+		slot = this_cpu_ptr(&bp_per_reg[i]);
+		if (!*slot) {
+			*slot = bp;
+			break;
+		}
+	}
+
+	if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot"))
+		return -EBUSY;
+
+	/*
+	 * Do not install DABR values if the instruction must be single-stepped.
+	 * If so, DABR will be populated in single_step_dabr_instruction().
+	 */
+	if (!info->perf_single_step)
+		__set_breakpoint(i, info);
+
+	return 0;
+}
+
+/*
+ * Uninstall the breakpoint contained in the given counter.
+ *
+ * First we search the debug address register it uses and then we disable
+ * it.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint null_brk = {0};
+	struct perf_event **slot;
+	int i;
+
+	for (i = 0; i < nr_wp_slots(); i++) {
+		slot = this_cpu_ptr(&bp_per_reg[i]);
+		if (*slot == bp) {
+			*slot = NULL;
+			break;
+		}
+	}
+
+	if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot"))
+		return;
+
+	__set_breakpoint(i, &null_brk);
+}
+
+static bool is_ptrace_bp(struct perf_event *bp)
+{
+	return bp->overflow_handler == ptrace_triggered;
+}
+
+/*
+ * Check for virtual address in kernel space.
+ */
+int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
+{
+	return is_kernel_addr(hw->address);
+}
+
+int arch_bp_generic_fields(int type, int *gen_bp_type)
+{
+	*gen_bp_type = 0;
+	if (type & HW_BRK_TYPE_READ)
+		*gen_bp_type |= HW_BREAKPOINT_R;
+	if (type & HW_BRK_TYPE_WRITE)
+		*gen_bp_type |= HW_BREAKPOINT_W;
+	if (*gen_bp_type == 0)
+		return -EINVAL;
+	return 0;
+}
+
+/*
+ * Watchpoint match range is always doubleword(8 bytes) aligned on
+ * powerpc. If the given range is crossing doubleword boundary, we
+ * need to increase the length such that next doubleword also get
+ * covered. Ex,
+ *
+ *          address   len = 6 bytes
+ *                |=========.
+ *   |------------v--|------v--------|
+ *   | | | | | | | | | | | | | | | | |
+ *   |---------------|---------------|
+ *    <---8 bytes--->
+ *
+ * In this case, we should configure hw as:
+ *   start_addr = address & ~(HW_BREAKPOINT_SIZE - 1)
+ *   len = 16 bytes
+ *
+ * @start_addr is inclusive but @end_addr is exclusive.
+ */
+static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
+{
+	u16 max_len = DABR_MAX_LEN;
+	u16 hw_len;
+	unsigned long start_addr, end_addr;
+
+	start_addr = ALIGN_DOWN(hw->address, HW_BREAKPOINT_SIZE);
+	end_addr = ALIGN(hw->address + hw->len, HW_BREAKPOINT_SIZE);
+	hw_len = end_addr - start_addr;
+
+	if (dawr_enabled()) {
+		max_len = DAWR_MAX_LEN;
+		/* DAWR region can't cross 512 bytes boundary on p10 predecessors */
+		if (!cpu_has_feature(CPU_FTR_ARCH_31) &&
+		    (ALIGN_DOWN(start_addr, SZ_512) != ALIGN_DOWN(end_addr - 1, SZ_512)))
+			return -EINVAL;
+	} else if (IS_ENABLED(CONFIG_PPC_8xx)) {
+		/* 8xx can setup a range without limitation */
+		max_len = U16_MAX;
+	}
+
+	if (hw_len > max_len)
+		return -EINVAL;
+
+	hw->hw_len = hw_len;
+	return 0;
+}
+
+/*
+ * Validate the arch-specific HW Breakpoint register settings
+ */
+int hw_breakpoint_arch_parse(struct perf_event *bp,
+			     const struct perf_event_attr *attr,
+			     struct arch_hw_breakpoint *hw)
+{
+	int ret = -EINVAL;
+
+	if (!bp || !attr->bp_len)
+		return ret;
+
+	hw->type = HW_BRK_TYPE_TRANSLATE;
+	if (attr->bp_type & HW_BREAKPOINT_R)
+		hw->type |= HW_BRK_TYPE_READ;
+	if (attr->bp_type & HW_BREAKPOINT_W)
+		hw->type |= HW_BRK_TYPE_WRITE;
+	if (hw->type == HW_BRK_TYPE_TRANSLATE)
+		/* must set alteast read or write */
+		return ret;
+	if (!attr->exclude_user)
+		hw->type |= HW_BRK_TYPE_USER;
+	if (!attr->exclude_kernel)
+		hw->type |= HW_BRK_TYPE_KERNEL;
+	if (!attr->exclude_hv)
+		hw->type |= HW_BRK_TYPE_HYP;
+	hw->address = attr->bp_addr;
+	hw->len = attr->bp_len;
+
+	if (!ppc_breakpoint_available())
+		return -ENODEV;
+
+	return hw_breakpoint_validate_len(hw);
+}
+
+/*
+ * Restores the breakpoint on the debug registers.
+ * Invoke this function if it is known that the execution context is
+ * about to change to cause loss of MSR_SE settings.
+ *
+ * The perf watchpoint will simply re-trigger once the thread is started again,
+ * and the watchpoint handler will set up MSR_SE and perf_single_step as
+ * needed.
+ */
+void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
+{
+	struct arch_hw_breakpoint *info;
+	int i;
+
+	preempt_disable();
+
+	for (i = 0; i < nr_wp_slots(); i++) {
+		struct perf_event *bp = __this_cpu_read(bp_per_reg[i]);
+
+		if (unlikely(bp && counter_arch_bp(bp)->perf_single_step))
+			goto reset;
+	}
+	goto out;
+
+reset:
+	regs_set_return_msr(regs, regs->msr & ~MSR_SE);
+	for (i = 0; i < nr_wp_slots(); i++) {
+		info = counter_arch_bp(__this_cpu_read(bp_per_reg[i]));
+		__set_breakpoint(i, info);
+		info->perf_single_step = false;
+	}
+
+out:
+	preempt_enable();
+}
+
+static bool is_larx_stcx_instr(int type)
+{
+	return type == LARX || type == STCX;
+}
+
+static bool is_octword_vsx_instr(int type, int size)
+{
+	return ((type == LOAD_VSX || type == STORE_VSX) && size == 32);
+}
+
+/*
+ * We've failed in reliably handling the hw-breakpoint. Unregister
+ * it and throw a warning message to let the user know about it.
+ */
+static void handler_error(struct perf_event *bp)
+{
+	WARN(1, "Unable to handle hardware breakpoint. Breakpoint at 0x%lx will be disabled.",
+	     counter_arch_bp(bp)->address);
+	perf_event_disable_inatomic(bp);
+}
+
+static void larx_stcx_err(struct perf_event *bp)
+{
+	printk_ratelimited("Breakpoint hit on instruction that can't be emulated. Breakpoint at 0x%lx will be disabled.\n",
+			   counter_arch_bp(bp)->address);
+	perf_event_disable_inatomic(bp);
+}
+
+static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,
+			     int *hit, ppc_inst_t instr)
+{
+	int i;
+	int stepped;
+
+	/* Do not emulate user-space instructions, instead single-step them */
+	if (user_mode(regs)) {
+		for (i = 0; i < nr_wp_slots(); i++) {
+			if (!hit[i])
+				continue;
+
+			counter_arch_bp(bp[i])->perf_single_step = true;
+			bp[i] = NULL;
+		}
+		regs_set_return_msr(regs, regs->msr | MSR_SE);
+		return false;
+	}
+
+	stepped = emulate_step(regs, instr);
+	if (!stepped) {
+		for (i = 0; i < nr_wp_slots(); i++) {
+			if (!hit[i])
+				continue;
+			handler_error(bp[i]);
+			bp[i] = NULL;
+		}
+		return false;
+	}
+	return true;
+}
+
+static void handle_p10dd1_spurious_exception(struct perf_event **bp,
+					     int *hit, unsigned long ea)
+{
+	int i;
+	unsigned long hw_end_addr;
+
+	/*
+	 * Handle spurious exception only when any bp_per_reg is set.
+	 * Otherwise this might be created by xmon and not actually a
+	 * spurious exception.
+	 */
+	for (i = 0; i < nr_wp_slots(); i++) {
+		struct arch_hw_breakpoint *info;
+
+		if (!bp[i])
+			continue;
+
+		info = counter_arch_bp(bp[i]);
+
+		hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
+
+		/*
+		 * Ending address of DAWR range is less than starting
+		 * address of op.
+		 */
+		if ((hw_end_addr - 1) >= ea)
+			continue;
+
+		/*
+		 * Those addresses need to be in the same or in two
+		 * consecutive 512B blocks;
+		 */
+		if (((hw_end_addr - 1) >> 10) != (ea >> 10))
+			continue;
+
+		/*
+		 * 'op address + 64B' generates an address that has a
+		 * carry into bit 52 (crosses 2K boundary).
+		 */
+		if ((ea & 0x800) == ((ea + 64) & 0x800))
+			continue;
+
+		break;
+	}
+
+	if (i == nr_wp_slots())
+		return;
+
+	for (i = 0; i < nr_wp_slots(); i++) {
+		if (bp[i]) {
+			hit[i] = 1;
+			counter_arch_bp(bp[i])->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+		}
+	}
+}
+
+/*
+ * Handle a DABR or DAWR exception.
+ *
+ * Called in atomic context.
+ */
+int hw_breakpoint_handler(struct die_args *args)
+{
+	bool err = false;
+	int rc = NOTIFY_STOP;
+	struct perf_event *bp[HBP_NUM_MAX] = { NULL };
+	struct pt_regs *regs = args->regs;
+	int i;
+	int hit[HBP_NUM_MAX] = {0};
+	int nr_hit = 0;
+	bool ptrace_bp = false;
+	ppc_inst_t instr = ppc_inst(0);
+	int type = 0;
+	int size = 0;
+	unsigned long ea = 0;
+
+	/* Disable breakpoints during exception handling */
+	hw_breakpoint_disable();
+
+	/*
+	 * The counter may be concurrently released but that can only
+	 * occur from a call_rcu() path. We can then safely fetch
+	 * the breakpoint, use its callback, touch its counter
+	 * while we are in an rcu_read_lock() path.
+	 */
+	rcu_read_lock();
+
+	if (!IS_ENABLED(CONFIG_PPC_8xx))
+		wp_get_instr_detail(regs, &instr, &type, &size, &ea);
+
+	for (i = 0; i < nr_wp_slots(); i++) {
+		struct arch_hw_breakpoint *info;
+
+		bp[i] = __this_cpu_read(bp_per_reg[i]);
+		if (!bp[i])
+			continue;
+
+		info = counter_arch_bp(bp[i]);
+		info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
+
+		if (wp_check_constraints(regs, instr, ea, type, size, info)) {
+			if (!IS_ENABLED(CONFIG_PPC_8xx) &&
+			    ppc_inst_equal(instr, ppc_inst(0))) {
+				handler_error(bp[i]);
+				bp[i] = NULL;
+				err = 1;
+				continue;
+			}
+
+			if (is_ptrace_bp(bp[i]))
+				ptrace_bp = true;
+			hit[i] = 1;
+			nr_hit++;
+		}
+	}
+
+	if (err)
+		goto reset;
+
+	if (!nr_hit) {
+		/* Workaround for Power10 DD1 */
+		if (!IS_ENABLED(CONFIG_PPC_8xx) && mfspr(SPRN_PVR) == 0x800100 &&
+		    is_octword_vsx_instr(type, size)) {
+			handle_p10dd1_spurious_exception(bp, hit, ea);
+		} else {
+			rc = NOTIFY_DONE;
+			goto out;
+		}
+	}
+
+	/*
+	 * Return early after invoking user-callback function without restoring
+	 * DABR if the breakpoint is from ptrace which always operates in
+	 * one-shot mode. The ptrace-ed process will receive the SIGTRAP signal
+	 * generated in do_dabr().
+	 */
+	if (ptrace_bp) {
+		for (i = 0; i < nr_wp_slots(); i++) {
+			if (!hit[i] || !is_ptrace_bp(bp[i]))
+				continue;
+			perf_bp_event(bp[i], regs);
+			bp[i] = NULL;
+		}
+		rc = NOTIFY_DONE;
+		goto reset;
+	}
+
+	if (!IS_ENABLED(CONFIG_PPC_8xx)) {
+		if (is_larx_stcx_instr(type)) {
+			for (i = 0; i < nr_wp_slots(); i++) {
+				if (!hit[i])
+					continue;
+				larx_stcx_err(bp[i]);
+				bp[i] = NULL;
+			}
+			goto reset;
+		}
+
+		if (!stepping_handler(regs, bp, hit, instr))
+			goto reset;
+	}
+
+	/*
+	 * As a policy, the callback is invoked in a 'trigger-after-execute'
+	 * fashion
+	 */
+	for (i = 0; i < nr_wp_slots(); i++) {
+		if (!hit[i])
+			continue;
+		if (!(counter_arch_bp(bp[i])->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
+			perf_bp_event(bp[i], regs);
+	}
+
+reset:
+	for (i = 0; i < nr_wp_slots(); i++) {
+		if (!bp[i])
+			continue;
+		__set_breakpoint(i, counter_arch_bp(bp[i]));
+	}
+
+out:
+	rcu_read_unlock();
+	return rc;
+}
+NOKPROBE_SYMBOL(hw_breakpoint_handler);
+
+/*
+ * Handle single-step exceptions following a DABR hit.
+ *
+ * Called in atomic context.
+ */
+static int single_step_dabr_instruction(struct die_args *args)
+{
+	struct pt_regs *regs = args->regs;
+	bool found = false;
+
+	/*
+	 * Check if we are single-stepping as a result of a
+	 * previous HW Breakpoint exception
+	 */
+	for (int i = 0; i < nr_wp_slots(); i++) {
+		struct perf_event *bp;
+		struct arch_hw_breakpoint *info;
+
+		bp = __this_cpu_read(bp_per_reg[i]);
+
+		if (!bp)
+			continue;
+
+		info = counter_arch_bp(bp);
+
+		if (!info->perf_single_step)
+			continue;
+
+		found = true;
+
+		/*
+		 * We shall invoke the user-defined callback function in the
+		 * single stepping handler to confirm to 'trigger-after-execute'
+		 * semantics
+		 */
+		if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
+			perf_bp_event(bp, regs);
+
+		info->perf_single_step = false;
+		__set_breakpoint(i, counter_arch_bp(bp));
+	}
+
+	/*
+	 * If the process was being single-stepped by ptrace, let the
+	 * other single-step actions occur (e.g. generate SIGTRAP).
+	 */
+	if (!found || test_thread_flag(TIF_SINGLESTEP))
+		return NOTIFY_DONE;
+
+	return NOTIFY_STOP;
+}
+NOKPROBE_SYMBOL(single_step_dabr_instruction);
+
+/*
+ * Handle debug exception notifications.
+ *
+ * Called in atomic context.
+ */
+int hw_breakpoint_exceptions_notify(
+		struct notifier_block *unused, unsigned long val, void *data)
+{
+	int ret = NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_DABR_MATCH:
+		ret = hw_breakpoint_handler(data);
+		break;
+	case DIE_SSTEP:
+		ret = single_step_dabr_instruction(data);
+		break;
+	}
+
+	return ret;
+}
+NOKPROBE_SYMBOL(hw_breakpoint_exceptions_notify);
+
+/*
+ * Release the user breakpoints used by ptrace
+ */
+void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
+{
+	int i;
+	struct thread_struct *t = &tsk->thread;
+
+	for (i = 0; i < nr_wp_slots(); i++) {
+		unregister_hw_breakpoint(t->ptrace_bps[i]);
+		t->ptrace_bps[i] = NULL;
+	}
+}
+
+void hw_breakpoint_pmu_read(struct perf_event *bp)
+{
+	/* TODO */
+}
+
+void ptrace_triggered(struct perf_event *bp,
+		      struct perf_sample_data *data, struct pt_regs *regs)
+{
+	struct perf_event_attr attr;
+
+	/*
+	 * Disable the breakpoint request here since ptrace has defined a
+	 * one-shot behaviour for breakpoint exceptions in PPC64.
+	 * The SIGTRAP signal is generated automatically for us in do_dabr().
+	 * We don't have to do anything about that here
+	 */
+	attr = bp->attr;
+	attr.disabled = true;
+	modify_user_hw_breakpoint(bp, &attr);
+}
diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c
new file mode 100644
index 0000000000..9e51801c49
--- /dev/null
+++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/sstep.h>
+#include <asm/cache.h>
+
+static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info)
+{
+	return ((info->address <= dar) && (dar - info->address < info->len));
+}
+
+static bool ea_user_range_overlaps(unsigned long ea, int size,
+				   struct arch_hw_breakpoint *info)
+{
+	return ((ea < info->address + info->len) &&
+		(ea + size > info->address));
+}
+
+static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info)
+{
+	unsigned long hw_start_addr, hw_end_addr;
+
+	hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE);
+	hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
+
+	return ((hw_start_addr <= dar) && (hw_end_addr > dar));
+}
+
+static bool ea_hw_range_overlaps(unsigned long ea, int size,
+				 struct arch_hw_breakpoint *info)
+{
+	unsigned long hw_start_addr, hw_end_addr;
+	unsigned long align_size = HW_BREAKPOINT_SIZE;
+
+	/*
+	 * On p10 predecessors, quadword is handle differently then
+	 * other instructions.
+	 */
+	if (!cpu_has_feature(CPU_FTR_ARCH_31) && size == 16)
+		align_size = HW_BREAKPOINT_SIZE_QUADWORD;
+
+	hw_start_addr = ALIGN_DOWN(info->address, align_size);
+	hw_end_addr = ALIGN(info->address + info->len, align_size);
+
+	return ((ea < hw_end_addr) && (ea + size > hw_start_addr));
+}
+
+/*
+ * If hw has multiple DAWR registers, we also need to check all
+ * dawrx constraint bits to confirm this is _really_ a valid event.
+ * If type is UNKNOWN, but privilege level matches, consider it as
+ * a positive match.
+ */
+static bool check_dawrx_constraints(struct pt_regs *regs, int type,
+				    struct arch_hw_breakpoint *info)
+{
+	if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ))
+		return false;
+
+	/*
+	 * The Cache Management instructions other than dcbz never
+	 * cause a match. i.e. if type is CACHEOP, the instruction
+	 * is dcbz, and dcbz is treated as Store.
+	 */
+	if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE))
+		return false;
+
+	if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL))
+		return false;
+
+	if (user_mode(regs) && !(info->type & HW_BRK_TYPE_USER))
+		return false;
+
+	return true;
+}
+
+/*
+ * Return true if the event is valid wrt dawr configuration,
+ * including extraneous exception. Otherwise return false.
+ */
+bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr,
+			  unsigned long ea, int type, int size,
+			  struct arch_hw_breakpoint *info)
+{
+	bool in_user_range = dar_in_user_range(regs->dar, info);
+	bool dawrx_constraints;
+
+	/*
+	 * 8xx supports only one breakpoint and thus we can
+	 * unconditionally return true.
+	 */
+	if (IS_ENABLED(CONFIG_PPC_8xx)) {
+		if (!in_user_range)
+			info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+		return true;
+	}
+
+	if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) {
+		if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+		    !dar_in_hw_range(regs->dar, info))
+			return false;
+
+		return true;
+	}
+
+	dawrx_constraints = check_dawrx_constraints(regs, type, info);
+
+	if (type == UNKNOWN) {
+		if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+		    !dar_in_hw_range(regs->dar, info))
+			return false;
+
+		return dawrx_constraints;
+	}
+
+	if (ea_user_range_overlaps(ea, size, info))
+		return dawrx_constraints;
+
+	if (ea_hw_range_overlaps(ea, size, info)) {
+		if (dawrx_constraints) {
+			info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+			return true;
+		}
+	}
+	return false;
+}
+
+void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr,
+			 int *type, int *size, unsigned long *ea)
+{
+	struct instruction_op op;
+	int err;
+
+	pagefault_disable();
+	err = __get_user_instr(*instr, (void __user *)regs->nip);
+	pagefault_enable();
+
+	if (err)
+		return;
+
+	analyse_instr(&op, regs, *instr);
+	*type = GETTYPE(op.type);
+	*ea = op.ea;
+
+	if (!(regs->msr & MSR_64BIT))
+		*ea &= 0xffffffffUL;
+
+
+	*size = GETSIZE(op.type);
+	if (*type == CACHEOP) {
+		*size = l1_dcache_bytes();
+		*ea &= ~(*size - 1);
+	} else if (*type == LOAD_VMX || *type == STORE_VMX) {
+		*ea &= ~(*size - 1);
+	}
+}
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
new file mode 100644
index 0000000000..b1c0418b25
--- /dev/null
+++ b/arch/powerpc/kernel/idle.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Idle daemon for PowerPC.  Idle daemon will handle any action
+ * that needs to be taken when the system becomes idle.
+ *
+ * Originally written by Cort Dougan (cort@cs.nmt.edu).
+ * Subsequent 32-bit hacking by Tom Rini, Armin Kuster,
+ * Paul Mackerras and others.
+ *
+ * iSeries supported added by Mike Corrigan <mikejc@us.ibm.com>
+ *
+ * Additional shared processor, SMT, and firmware support
+ *    Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ *
+ * 32-bit and 64-bit versions merged by Paul Mackerras <paulus@samba.org>
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/cpu.h>
+#include <linux/sysctl.h>
+#include <linux/tick.h>
+
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/runlatch.h>
+#include <asm/smp.h>
+
+
+unsigned long cpuidle_disable = IDLE_NO_OVERRIDE;
+EXPORT_SYMBOL(cpuidle_disable);
+
+static int __init powersave_off(char *arg)
+{
+	ppc_md.power_save = NULL;
+	cpuidle_disable = IDLE_POWERSAVE_OFF;
+	return 1;
+}
+__setup("powersave=off", powersave_off);
+
+void arch_cpu_idle(void)
+{
+	ppc64_runlatch_off();
+
+	if (ppc_md.power_save) {
+		ppc_md.power_save();
+		/*
+		 * Some power_save functions return with
+		 * interrupts enabled, some don't.
+		 */
+		if (!irqs_disabled())
+			raw_local_irq_disable();
+	} else {
+		/*
+		 * Go into low thread priority and possibly
+		 * low power mode.
+		 */
+		HMT_low();
+		HMT_very_low();
+	}
+
+	HMT_medium();
+	ppc64_runlatch_on();
+}
+
+int powersave_nap;
+
+#ifdef CONFIG_PPC_970_NAP
+void power4_idle(void)
+{
+	if (!cpu_has_feature(CPU_FTR_CAN_NAP))
+		return;
+
+	if (!powersave_nap)
+		return;
+
+	if (!prep_irq_for_idle())
+		return;
+
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		asm volatile(PPC_DSSALL " ; sync" ::: "memory");
+
+	power4_idle_nap();
+
+	/*
+	 * power4_idle_nap returns with interrupts enabled (soft and hard).
+	 * to our caller with interrupts enabled (soft and hard). Our caller
+	 * can cope with either interrupts disabled or enabled upon return.
+	 */
+}
+#endif
+
+#ifdef CONFIG_SYSCTL
+/*
+ * Register the sysctl to set/clear powersave_nap.
+ */
+static struct ctl_table powersave_nap_ctl_table[] = {
+	{
+		.procname	= "powersave-nap",
+		.data		= &powersave_nap,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{}
+};
+
+static int __init
+register_powersave_nap_sysctl(void)
+{
+	register_sysctl("kernel", powersave_nap_ctl_table);
+
+	return 0;
+}
+__initcall(register_powersave_nap_sysctl);
+#endif
diff --git a/arch/powerpc/kernel/idle_64e.S b/arch/powerpc/kernel/idle_64e.S
new file mode 100644
index 0000000000..0fc680e03d
--- /dev/null
+++ b/arch/powerpc/kernel/idle_64e.S
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2010 IBM Corp, Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ *
+ * Generic idle routine for 64 bits e500 processors
+ */
+
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ppc-opcode.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/epapr_hcalls.h>
+#include <asm/hw_irq.h>
+
+/* 64-bit version only for now */
+.macro BOOK3E_IDLE name loop
+_GLOBAL(\name)
+	/* Save LR for later */
+	mflr	r0
+	std	r0,16(r1)
+
+	/* Hard disable interrupts */
+	wrteei	0
+
+	/* Now check if an interrupt came in while we were soft disabled
+	 * since we may otherwise lose it (doorbells etc...).
+	 */
+	lbz	r3,PACAIRQHAPPENED(r13)
+	cmpwi	cr0,r3,0
+	bne	2f
+
+	/* Now we are going to mark ourselves as soft and hard enabled in
+	 * order to be able to take interrupts while asleep. We inform lockdep
+	 * of that. We don't actually turn interrupts on just yet tho.
+	 */
+#ifdef CONFIG_TRACE_IRQFLAGS
+	stdu    r1,-128(r1)
+	bl	trace_hardirqs_on
+	addi    r1,r1,128
+#endif
+	li	r0,IRQS_ENABLED
+	stb	r0,PACAIRQSOFTMASK(r13)
+	
+	/* Interrupts will make use return to LR, so get something we want
+	 * in there
+	 */
+	bl	1f
+
+	/* And return (interrupts are on) */
+	ld	r0,16(r1)
+	mtlr	r0
+	blr
+
+1:	/* Let's set the _TLF_NAPPING flag so interrupts make us return
+	 * to the right spot
+	*/
+	ld	r11, PACACURRENT(r13)
+	ld	r10,TI_LOCAL_FLAGS(r11)
+	ori	r10,r10,_TLF_NAPPING
+	std	r10,TI_LOCAL_FLAGS(r11)
+
+	/* We can now re-enable hard interrupts and go to sleep */
+	wrteei	1
+	\loop
+
+2:
+	lbz	r10,PACAIRQHAPPENED(r13)
+	ori	r10,r10,PACA_IRQ_HARD_DIS
+	stb	r10,PACAIRQHAPPENED(r13)
+	blr
+.endm
+
+.macro BOOK3E_IDLE_LOOP
+1:
+	PPC_WAIT_v203
+	b	1b
+.endm
+
+/* epapr_ev_idle_start below is patched with the proper hcall
+   opcodes during kernel initialization */
+.macro EPAPR_EV_IDLE_LOOP
+idle_loop:
+	LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE))
+
+.global epapr_ev_idle_start
+epapr_ev_idle_start:
+	li      r3, -1
+	nop
+	nop
+	nop
+	b       idle_loop
+.endm
+
+BOOK3E_IDLE epapr_ev_idle EPAPR_EV_IDLE_LOOP
+
+BOOK3E_IDLE e500_idle BOOK3E_IDLE_LOOP
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
new file mode 100644
index 0000000000..3c09735636
--- /dev/null
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  This file contains the power_save function for 6xx & 7xxx CPUs
+ *  rewritten in assembler
+ *
+ *  Warning ! This code assumes that if your machine has a 750fx
+ *  it will have PLL 1 set to low speed mode (used during NAP/DOZE).
+ *  if this is not the case some additional changes will have to
+ *  be done to check a runtime var (a bit like powersave-nap)
+ */
+
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/feature-fixups.h>
+
+	.text
+
+/*
+ * Init idle, called at early CPU setup time from head.S for each CPU
+ * Make sure no rest of NAP mode remains in HID0, save default
+ * values for some CPU specific registers. Called with r24
+ * containing CPU number and r3 reloc offset
+ */
+_GLOBAL(init_idle_6xx)
+BEGIN_FTR_SECTION
+	mfspr	r4,SPRN_HID0
+	rlwinm	r4,r4,0,10,8	/* Clear NAP */
+	mtspr	SPRN_HID0, r4
+	b	1f
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
+	blr
+1:
+	slwi	r5,r24,2
+	add	r5,r5,r3
+BEGIN_FTR_SECTION
+	mfspr	r4,SPRN_MSSCR0
+	addis	r6,r5, nap_save_msscr0@ha
+	stw	r4,nap_save_msscr0@l(r6)
+END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR)
+BEGIN_FTR_SECTION
+	mfspr	r4,SPRN_HID1
+	addis	r6,r5,nap_save_hid1@ha
+	stw	r4,nap_save_hid1@l(r6)
+END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
+	blr
+
+/*
+ * Here is the power_save_6xx function. This could eventually be
+ * split into several functions & changing the function pointer
+ * depending on the various features.
+ */
+_GLOBAL(ppc6xx_idle)
+	/* Check if we can nap or doze, put HID0 mask in r3
+	 */
+	lis	r3, 0
+BEGIN_FTR_SECTION
+	lis	r3,HID0_DOZE@h
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE)
+BEGIN_FTR_SECTION
+	/* We must dynamically check for the NAP feature as it
+	 * can be cleared by CPU init after the fixups are done
+	 */
+	lis	r4,cur_cpu_spec@ha
+	lwz	r4,cur_cpu_spec@l(r4)
+	lwz	r4,CPU_SPEC_FEATURES(r4)
+	andi.	r0,r4,CPU_FTR_CAN_NAP
+	beq	1f
+	/* Now check if user or arch enabled NAP mode */
+	lis	r4,powersave_nap@ha
+	lwz	r4,powersave_nap@l(r4)
+	cmpwi	0,r4,0
+	beq	1f
+	lis	r3,HID0_NAP@h
+1:	
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
+	cmpwi	0,r3,0
+	beqlr
+
+	/* Some pre-nap cleanups needed on some CPUs */
+	andis.	r0,r3,HID0_NAP@h
+	beq	2f
+BEGIN_FTR_SECTION
+	/* Disable L2 prefetch on some 745x and try to ensure
+	 * L2 prefetch engines are idle. As explained by errata
+	 * text, we can't be sure they are, we just hope very hard
+	 * that well be enough (sic !). At least I noticed Apple
+	 * doesn't even bother doing the dcbf's here...
+	 */
+	mfspr	r4,SPRN_MSSCR0
+	rlwinm	r4,r4,0,0,29
+	sync
+	mtspr	SPRN_MSSCR0,r4
+	sync
+	isync
+	lis	r4,KERNELBASE@h
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR)
+2:
+BEGIN_FTR_SECTION
+	/* Go to low speed mode on some 750FX */
+	lis	r4,powersave_lowspeed@ha
+	lwz	r4,powersave_lowspeed@l(r4)
+	cmpwi	0,r4,0
+	beq	1f
+	mfspr	r4,SPRN_HID1
+	oris	r4,r4,0x0001
+	mtspr	SPRN_HID1,r4
+1:	
+END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
+
+	/* Go to NAP or DOZE now */	
+	mfspr	r4,SPRN_HID0
+	lis	r5,(HID0_NAP|HID0_SLEEP)@h
+BEGIN_FTR_SECTION
+	oris	r5,r5,HID0_DOZE@h
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE)
+	andc	r4,r4,r5
+	or	r4,r4,r3
+BEGIN_FTR_SECTION
+	oris	r4,r4,HID0_DPM@h	/* that should be done once for all  */
+END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM)
+	mtspr	SPRN_HID0,r4
+BEGIN_FTR_SECTION
+	PPC_DSSALL
+	sync
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+	lwz	r8,TI_LOCAL_FLAGS(r2)	/* set napping bit */
+	ori	r8,r8,_TLF_NAPPING	/* so when we take an exception */
+	stw	r8,TI_LOCAL_FLAGS(r2)	/* it will return to our caller */
+	mfmsr	r7
+	ori	r7,r7,MSR_EE
+	oris	r7,r7,MSR_POW@h
+1:	sync
+	mtmsr	r7
+	isync
+	b	1b
+
+/*
+ * Return from NAP/DOZE mode, restore some CPU specific registers,
+ * R11 points to the exception frame. We have to preserve r10.
+ */
+_GLOBAL(power_save_ppc32_restore)
+	lwz	r9,_LINK(r11)		/* interrupted in ppc6xx_idle: */
+	stw	r9,_NIP(r11)		/* make it do a blr */
+
+#ifdef CONFIG_SMP
+	lwz	r11,TASK_CPU(r2)	/* get cpu number * 4 */
+	slwi	r11,r11,2
+#else
+	li	r11,0
+#endif
+	/* Todo make sure all these are in the same page
+	 * and load r11 (@ha part + CPU offset) only once
+	 */
+BEGIN_FTR_SECTION
+	mfspr	r9,SPRN_HID0
+	andis.	r9,r9,HID0_NAP@h
+	beq	1f
+	addis	r9, r11, nap_save_msscr0@ha
+	lwz	r9,nap_save_msscr0@l(r9)
+	mtspr	SPRN_MSSCR0, r9
+	sync
+	isync
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR)
+BEGIN_FTR_SECTION
+	addis	r9, r11, nap_save_hid1@ha
+	lwz	r9,nap_save_hid1@l(r9)
+	mtspr	SPRN_HID1, r9
+END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
+	blr
+_ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore)
+
+	.data
+
+_GLOBAL(nap_save_msscr0)
+	.space	4*NR_CPUS
+
+_GLOBAL(nap_save_hid1)
+	.space	4*NR_CPUS
+
+_GLOBAL(powersave_lowspeed)
+	.long	0
diff --git a/arch/powerpc/kernel/idle_85xx.S b/arch/powerpc/kernel/idle_85xx.S
new file mode 100644
index 0000000000..9e1bc4502c
--- /dev/null
+++ b/arch/powerpc/kernel/idle_85xx.S
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ * Dave Liu <daveliu@freescale.com>
+ * copy from idle_6xx.S and modify for e500 based processor,
+ * implement the power_save function in idle.
+ */
+
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/feature-fixups.h>
+
+	.text
+
+_GLOBAL(e500_idle)
+	lwz	r4,TI_LOCAL_FLAGS(r2)	/* set napping bit */
+	ori	r4,r4,_TLF_NAPPING	/* so when we take an exception */
+	stw	r4,TI_LOCAL_FLAGS(r2)	/* it will return to our caller */
+
+#ifdef CONFIG_PPC_E500MC
+	wrteei	1
+1:	wait
+
+	/*
+	 * Guard against spurious wakeups (e.g. from a hypervisor) --
+	 * any real interrupt will cause us to return to LR due to
+	 * _TLF_NAPPING.
+	 */
+	b	1b
+#else
+	/* Check if we can nap or doze, put HID0 mask in r3 */
+	lis	r3,0
+BEGIN_FTR_SECTION
+	lis	r3,HID0_DOZE@h
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE)
+
+BEGIN_FTR_SECTION
+	/* Now check if user enabled NAP mode */
+	lis	r4,powersave_nap@ha
+	lwz	r4,powersave_nap@l(r4)
+	cmpwi	0,r4,0
+	beq	1f
+	stwu	r1,-16(r1)
+	mflr	r0
+	stw	r0,20(r1)
+	bl	flush_dcache_L1
+	lwz	r0,20(r1)
+	addi	r1,r1,16
+	mtlr	r0
+	lis	r3,HID0_NAP@h
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
+1:
+	/* Go to NAP or DOZE now */
+	mfspr	r4,SPRN_HID0
+	rlwinm	r4,r4,0,~(HID0_DOZE|HID0_NAP|HID0_SLEEP)
+	or	r4,r4,r3
+	isync
+	mtspr	SPRN_HID0,r4
+	isync
+
+	mfmsr	r7
+	oris	r7,r7,MSR_WE@h
+	ori	r7,r7,MSR_EE
+	msync
+	mtmsr	r7
+	isync
+2:	b	2b
+#endif /* !E500MC */
+
+/*
+ * Return from NAP/DOZE mode, restore some CPU specific registers,
+ * r2 containing address of current.
+ * r11 points to the exception frame.
+ * We have to preserve r10.
+ */
+_GLOBAL(power_save_ppc32_restore)
+	lwz	r9,_LINK(r11)		/* interrupted in e500_idle */
+	stw	r9,_NIP(r11)		/* make it do a blr */
+	blr
+_ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore)
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
new file mode 100644
index 0000000000..3d97fb8338
--- /dev/null
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Copyright 2018, IBM Corporation.
+ *
+ *  This file contains general idle entry/exit functions to save
+ *  and restore stack and NVGPRs which allows C code to call idle
+ *  states that lose GPRs, and it will return transparently with
+ *  SRR1 wakeup reason return value.
+ *
+ *  The platform / CPU caller must ensure SPRs and any other non-GPR
+ *  state is saved and restored correctly, handle KVM, interrupts, etc.
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ppc-opcode.h>
+#include <asm/cpuidle.h>
+#include <asm/thread_info.h> /* TLF_NAPPING */
+
+#ifdef CONFIG_PPC_P7_NAP
+/*
+ * Desired PSSCR in r3
+ *
+ * No state will be lost regardless of wakeup mechanism (interrupt or NIA).
+ *
+ * An EC=0 type wakeup will return with a value of 0. SRESET wakeup (which can
+ * happen with xscom SRESET and possibly MCE) may clobber volatiles except LR,
+ * and must blr, to return to caller with r3 set according to caller's expected
+ * return code (for Book3S/64 that is SRR1).
+ */
+_GLOBAL(isa300_idle_stop_noloss)
+	mtspr 	SPRN_PSSCR,r3
+	PPC_STOP
+	li	r3,0
+	blr
+
+/*
+ * Desired PSSCR in r3
+ *
+ * GPRs may be lost, so they are saved here. Wakeup is by interrupt only.
+ * The SRESET wakeup returns to this function's caller by calling
+ * idle_return_gpr_loss with r3 set to desired return value.
+ *
+ * A wakeup without GPR loss may alteratively be handled as in
+ * isa300_idle_stop_noloss and blr directly, as an optimisation.
+ *
+ * The caller is responsible for saving/restoring SPRs, MSR, timebase,
+ * etc.
+ */
+_GLOBAL(isa300_idle_stop_mayloss)
+	mtspr 	SPRN_PSSCR,r3
+	std	r1,PACAR1(r13)
+	mflr	r4
+	mfcr	r5
+	/*
+	 * Use the stack red zone rather than a new frame for saving regs since
+	 * in the case of no GPR loss the wakeup code branches directly back to
+	 * the caller without deallocating the stack frame first.
+	 */
+	std	r2,-8*1(r1)
+	std	r14,-8*2(r1)
+	std	r15,-8*3(r1)
+	std	r16,-8*4(r1)
+	std	r17,-8*5(r1)
+	std	r18,-8*6(r1)
+	std	r19,-8*7(r1)
+	std	r20,-8*8(r1)
+	std	r21,-8*9(r1)
+	std	r22,-8*10(r1)
+	std	r23,-8*11(r1)
+	std	r24,-8*12(r1)
+	std	r25,-8*13(r1)
+	std	r26,-8*14(r1)
+	std	r27,-8*15(r1)
+	std	r28,-8*16(r1)
+	std	r29,-8*17(r1)
+	std	r30,-8*18(r1)
+	std	r31,-8*19(r1)
+	std	r4,-8*20(r1)
+	std	r5,-8*21(r1)
+	/* 168 bytes */
+	PPC_STOP
+	b	.	/* catch bugs */
+
+/*
+ * Desired return value in r3
+ *
+ * The idle wakeup SRESET interrupt can call this after calling
+ * to return to the idle sleep function caller with r3 as the return code.
+ *
+ * This must not be used if idle was entered via a _noloss function (use
+ * a simple blr instead).
+ */
+_GLOBAL(idle_return_gpr_loss)
+	ld	r1,PACAR1(r13)
+	ld	r4,-8*20(r1)
+	ld	r5,-8*21(r1)
+	mtlr	r4
+	mtcr	r5
+	/*
+	 * KVM nap requires r2 to be saved, rather than just restoring it
+	 * from PACATOC. This could be avoided for that less common case
+	 * if KVM saved its r2.
+	 */
+	ld	r2,-8*1(r1)
+	ld	r14,-8*2(r1)
+	ld	r15,-8*3(r1)
+	ld	r16,-8*4(r1)
+	ld	r17,-8*5(r1)
+	ld	r18,-8*6(r1)
+	ld	r19,-8*7(r1)
+	ld	r20,-8*8(r1)
+	ld	r21,-8*9(r1)
+	ld	r22,-8*10(r1)
+	ld	r23,-8*11(r1)
+	ld	r24,-8*12(r1)
+	ld	r25,-8*13(r1)
+	ld	r26,-8*14(r1)
+	ld	r27,-8*15(r1)
+	ld	r28,-8*16(r1)
+	ld	r29,-8*17(r1)
+	ld	r30,-8*18(r1)
+	ld	r31,-8*19(r1)
+	blr
+
+/*
+ * This is the sequence required to execute idle instructions, as
+ * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0.
+ * We have to store a GPR somewhere, ptesync, then reload it, and create
+ * a false dependency on the result of the load. It doesn't matter which
+ * GPR we store, or where we store it. We have already stored r2 to the
+ * stack at -8(r1) in isa206_idle_insn_mayloss, so use that.
+ */
+#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST)			\
+	/* Magic NAP/SLEEP/WINKLE mode enter sequence */	\
+	std	r2,-8(r1);					\
+	ptesync;						\
+	ld	r2,-8(r1);					\
+236:	cmpd	cr0,r2,r2;					\
+	bne	236b;						\
+	IDLE_INST;						\
+	b	.	/* catch bugs */
+
+/*
+ * Desired instruction type in r3
+ *
+ * GPRs may be lost, so they are saved here. Wakeup is by interrupt only.
+ * The SRESET wakeup returns to this function's caller by calling
+ * idle_return_gpr_loss with r3 set to desired return value.
+ *
+ * A wakeup without GPR loss may alteratively be handled as in
+ * isa300_idle_stop_noloss and blr directly, as an optimisation.
+ *
+ * The caller is responsible for saving/restoring SPRs, MSR, timebase,
+ * etc.
+ *
+ * This must be called in real-mode (MSR_IDLE).
+ */
+_GLOBAL(isa206_idle_insn_mayloss)
+	std	r1,PACAR1(r13)
+	mflr	r4
+	mfcr	r5
+	/*
+	 * Use the stack red zone rather than a new frame for saving regs since
+	 * in the case of no GPR loss the wakeup code branches directly back to
+	 * the caller without deallocating the stack frame first.
+	 */
+	std	r2,-8*1(r1)
+	std	r14,-8*2(r1)
+	std	r15,-8*3(r1)
+	std	r16,-8*4(r1)
+	std	r17,-8*5(r1)
+	std	r18,-8*6(r1)
+	std	r19,-8*7(r1)
+	std	r20,-8*8(r1)
+	std	r21,-8*9(r1)
+	std	r22,-8*10(r1)
+	std	r23,-8*11(r1)
+	std	r24,-8*12(r1)
+	std	r25,-8*13(r1)
+	std	r26,-8*14(r1)
+	std	r27,-8*15(r1)
+	std	r28,-8*16(r1)
+	std	r29,-8*17(r1)
+	std	r30,-8*18(r1)
+	std	r31,-8*19(r1)
+	std	r4,-8*20(r1)
+	std	r5,-8*21(r1)
+	cmpwi	r3,PNV_THREAD_NAP
+	bne	1f
+	IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
+1:	cmpwi	r3,PNV_THREAD_SLEEP
+	bne	2f
+	IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
+2:	IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
+#endif
+
+#ifdef CONFIG_PPC_970_NAP
+_GLOBAL(power4_idle_nap)
+	LOAD_REG_IMMEDIATE(r7, MSR_KERNEL|MSR_EE|MSR_POW)
+	ld	r9,PACA_THREAD_INFO(r13)
+	ld	r8,TI_LOCAL_FLAGS(r9)
+	ori	r8,r8,_TLF_NAPPING
+	std	r8,TI_LOCAL_FLAGS(r9)
+	/*
+	 * NAPPING bit is set, from this point onward power4_fixup_nap
+	 * will cause exceptions to return to power4_idle_nap_return.
+	 */
+1:	sync
+	isync
+	mtmsrd	r7
+	isync
+	b	1b
+
+	.globl power4_idle_nap_return
+power4_idle_nap_return:
+	blr
+#endif
diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c
new file mode 100644
index 0000000000..b7029beed8
--- /dev/null
+++ b/arch/powerpc/kernel/ima_arch.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ */
+
+#include <linux/ima.h>
+#include <asm/secure_boot.h>
+
+bool arch_ima_get_secureboot(void)
+{
+	return is_ppc_secureboot_enabled();
+}
+
+/*
+ * The "secure_rules" are enabled only on "secureboot" enabled systems.
+ * These rules verify the file signatures against known good values.
+ * The "appraise_type=imasig|modsig" option allows the known good signature
+ * to be stored as an xattr or as an appended signature.
+ *
+ * To avoid duplicate signature verification as much as possible, the IMA
+ * policy rule for module appraisal is added only if CONFIG_MODULE_SIG
+ * is not enabled.
+ */
+static const char *const secure_rules[] = {
+	"appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig",
+#ifndef CONFIG_MODULE_SIG
+	"appraise func=MODULE_CHECK appraise_type=imasig|modsig",
+#endif
+	NULL
+};
+
+/*
+ * The "trusted_rules" are enabled only on "trustedboot" enabled systems.
+ * These rules add the kexec kernel image and kernel modules file hashes to
+ * the IMA measurement list.
+ */
+static const char *const trusted_rules[] = {
+	"measure func=KEXEC_KERNEL_CHECK",
+	"measure func=MODULE_CHECK",
+	NULL
+};
+
+/*
+ * The "secure_and_trusted_rules" contains rules for both the secure boot and
+ * trusted boot. The "template=ima-modsig" option includes the appended
+ * signature, when available, in the IMA measurement list.
+ */
+static const char *const secure_and_trusted_rules[] = {
+	"measure func=KEXEC_KERNEL_CHECK template=ima-modsig",
+	"measure func=MODULE_CHECK template=ima-modsig",
+	"appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig",
+#ifndef CONFIG_MODULE_SIG
+	"appraise func=MODULE_CHECK appraise_type=imasig|modsig",
+#endif
+	NULL
+};
+
+/*
+ * Returns the relevant IMA arch-specific policies based on the system secure
+ * boot state.
+ */
+const char *const *arch_get_ima_policy(void)
+{
+	if (is_ppc_secureboot_enabled()) {
+		if (IS_ENABLED(CONFIG_MODULE_SIG))
+			set_module_sig_enforced();
+
+		if (is_ppc_trustedboot_enabled())
+			return secure_and_trusted_rules;
+		else
+			return secure_rules;
+	} else if (is_ppc_trustedboot_enabled()) {
+		return trusted_rules;
+	}
+
+	return NULL;
+}
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
new file mode 100644
index 0000000000..c4f6d3c69b
--- /dev/null
+++ b/arch/powerpc/kernel/interrupt.c
@@ -0,0 +1,504 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/context_tracking.h>
+#include <linux/err.h>
+#include <linux/compat.h>
+#include <linux/sched/debug.h> /* for show_regs */
+
+#include <asm/kup.h>
+#include <asm/cputime.h>
+#include <asm/hw_irq.h>
+#include <asm/interrupt.h>
+#include <asm/kprobes.h>
+#include <asm/paca.h>
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+#include <asm/signal.h>
+#include <asm/switch_to.h>
+#include <asm/syscall.h>
+#include <asm/time.h>
+#include <asm/tm.h>
+#include <asm/unistd.h>
+
+#if defined(CONFIG_PPC_ADV_DEBUG_REGS) && defined(CONFIG_PPC32)
+unsigned long global_dbcr0[NR_CPUS];
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
+static inline bool exit_must_hard_disable(void)
+{
+	return static_branch_unlikely(&interrupt_exit_not_reentrant);
+}
+#else
+static inline bool exit_must_hard_disable(void)
+{
+	return true;
+}
+#endif
+
+/*
+ * local irqs must be disabled. Returns false if the caller must re-enable
+ * them, check for new work, and try again.
+ *
+ * This should be called with local irqs disabled, but if they were previously
+ * enabled when the interrupt handler returns (indicating a process-context /
+ * synchronous interrupt) then irqs_enabled should be true.
+ *
+ * restartable is true then EE/RI can be left on because interrupts are handled
+ * with a restart sequence.
+ */
+static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
+{
+	bool must_hard_disable = (exit_must_hard_disable() || !restartable);
+
+	/* This must be done with RI=1 because tracing may touch vmaps */
+	trace_hardirqs_on();
+
+	if (must_hard_disable)
+		__hard_EE_RI_disable();
+
+#ifdef CONFIG_PPC64
+	/* This pattern matches prep_irq_for_idle */
+	if (unlikely(lazy_irq_pending_nocheck())) {
+		if (must_hard_disable) {
+			local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+			__hard_RI_enable();
+		}
+		trace_hardirqs_off();
+
+		return false;
+	}
+#endif
+	return true;
+}
+
+static notrace void booke_load_dbcr0(void)
+{
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+	unsigned long dbcr0 = current->thread.debug.dbcr0;
+
+	if (likely(!(dbcr0 & DBCR0_IDM)))
+		return;
+
+	/*
+	 * Check to see if the dbcr0 register is set up to debug.
+	 * Use the internal debug mode bit to do this.
+	 */
+	mtmsr(mfmsr() & ~MSR_DE);
+	if (IS_ENABLED(CONFIG_PPC32)) {
+		isync();
+		global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0);
+	}
+	mtspr(SPRN_DBCR0, dbcr0);
+	mtspr(SPRN_DBSR, -1);
+#endif
+}
+
+static notrace void check_return_regs_valid(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+	unsigned long trap, srr0, srr1;
+	static bool warned;
+	u8 *validp;
+	char *h;
+
+	if (trap_is_scv(regs))
+		return;
+
+	trap = TRAP(regs);
+	// EE in HV mode sets HSRRs like 0xea0
+	if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL)
+		trap = 0xea0;
+
+	switch (trap) {
+	case 0x980:
+	case INTERRUPT_H_DATA_STORAGE:
+	case 0xe20:
+	case 0xe40:
+	case INTERRUPT_HMI:
+	case 0xe80:
+	case 0xea0:
+	case INTERRUPT_H_FAC_UNAVAIL:
+	case 0x1200:
+	case 0x1500:
+	case 0x1600:
+	case 0x1800:
+		validp = &local_paca->hsrr_valid;
+		if (!READ_ONCE(*validp))
+			return;
+
+		srr0 = mfspr(SPRN_HSRR0);
+		srr1 = mfspr(SPRN_HSRR1);
+		h = "H";
+
+		break;
+	default:
+		validp = &local_paca->srr_valid;
+		if (!READ_ONCE(*validp))
+			return;
+
+		srr0 = mfspr(SPRN_SRR0);
+		srr1 = mfspr(SPRN_SRR1);
+		h = "";
+		break;
+	}
+
+	if (srr0 == regs->nip && srr1 == regs->msr)
+		return;
+
+	/*
+	 * A NMI / soft-NMI interrupt may have come in after we found
+	 * srr_valid and before the SRRs are loaded. The interrupt then
+	 * comes in and clobbers SRRs and clears srr_valid. Then we load
+	 * the SRRs here and test them above and find they don't match.
+	 *
+	 * Test validity again after that, to catch such false positives.
+	 *
+	 * This test in general will have some window for false negatives
+	 * and may not catch and fix all such cases if an NMI comes in
+	 * later and clobbers SRRs without clearing srr_valid, but hopefully
+	 * such things will get caught most of the time, statistically
+	 * enough to be able to get a warning out.
+	 */
+	if (!READ_ONCE(*validp))
+		return;
+
+	if (!data_race(warned)) {
+		data_race(warned = true);
+		printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip);
+		printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr);
+		show_regs(regs);
+	}
+
+	WRITE_ONCE(*validp, 0); /* fixup */
+#endif
+}
+
+static notrace unsigned long
+interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
+{
+	unsigned long ti_flags;
+
+again:
+	ti_flags = read_thread_flags();
+	while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
+		local_irq_enable();
+		if (ti_flags & _TIF_NEED_RESCHED) {
+			schedule();
+		} else {
+			/*
+			 * SIGPENDING must restore signal handler function
+			 * argument GPRs, and some non-volatiles (e.g., r1).
+			 * Restore all for now. This could be made lighter.
+			 */
+			if (ti_flags & _TIF_SIGPENDING)
+				ret |= _TIF_RESTOREALL;
+			do_notify_resume(regs, ti_flags);
+		}
+		local_irq_disable();
+		ti_flags = read_thread_flags();
+	}
+
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
+		if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+				unlikely((ti_flags & _TIF_RESTORE_TM))) {
+			restore_tm_state(regs);
+		} else {
+			unsigned long mathflags = MSR_FP;
+
+			if (cpu_has_feature(CPU_FTR_VSX))
+				mathflags |= MSR_VEC | MSR_VSX;
+			else if (cpu_has_feature(CPU_FTR_ALTIVEC))
+				mathflags |= MSR_VEC;
+
+			/*
+			 * If userspace MSR has all available FP bits set,
+			 * then they are live and no need to restore. If not,
+			 * it means the regs were given up and restore_math
+			 * may decide to restore them (to avoid taking an FP
+			 * fault).
+			 */
+			if ((regs->msr & mathflags) != mathflags)
+				restore_math(regs);
+		}
+	}
+
+	check_return_regs_valid(regs);
+
+	user_enter_irqoff();
+	if (!prep_irq_for_enabled_exit(true)) {
+		user_exit_irqoff();
+		local_irq_enable();
+		local_irq_disable();
+		goto again;
+	}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	local_paca->tm_scratch = regs->msr;
+#endif
+
+	booke_load_dbcr0();
+
+	account_cpu_user_exit();
+
+	/* Restore user access locks last */
+	kuap_user_restore(regs);
+
+	return ret;
+}
+
+/*
+ * This should be called after a syscall returns, with r3 the return value
+ * from the syscall. If this function returns non-zero, the system call
+ * exit assembly should additionally load all GPR registers and CTR and XER
+ * from the interrupt frame.
+ *
+ * The function graph tracer can not trace the return side of this function,
+ * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
+ */
+notrace unsigned long syscall_exit_prepare(unsigned long r3,
+					   struct pt_regs *regs,
+					   long scv)
+{
+	unsigned long ti_flags;
+	unsigned long ret = 0;
+	bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
+
+	CT_WARN_ON(ct_state() == CONTEXT_USER);
+
+	kuap_assert_locked();
+
+	regs->result = r3;
+
+	/* Check whether the syscall is issued inside a restartable sequence */
+	rseq_syscall(regs);
+
+	ti_flags = read_thread_flags();
+
+	if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
+		if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
+			r3 = -r3;
+			regs->ccr |= 0x10000000; /* Set SO bit in CR */
+		}
+	}
+
+	if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
+		if (ti_flags & _TIF_RESTOREALL)
+			ret = _TIF_RESTOREALL;
+		else
+			regs->gpr[3] = r3;
+		clear_bits(_TIF_PERSYSCALL_MASK, &current_thread_info()->flags);
+	} else {
+		regs->gpr[3] = r3;
+	}
+
+	if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
+		do_syscall_trace_leave(regs);
+		ret |= _TIF_RESTOREALL;
+	}
+
+	local_irq_disable();
+	ret = interrupt_exit_user_prepare_main(ret, regs);
+
+#ifdef CONFIG_PPC64
+	regs->exit_result = ret;
+#endif
+
+	return ret;
+}
+
+#ifdef CONFIG_PPC64
+notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs)
+{
+	/*
+	 * This is called when detecting a soft-pending interrupt as well as
+	 * an alternate-return interrupt. So we can't just have the alternate
+	 * return path clear SRR1[MSR] and set PACA_IRQ_HARD_DIS (unless
+	 * the soft-pending case were to fix things up as well). RI might be
+	 * disabled, in which case it gets re-enabled by __hard_irq_disable().
+	 */
+	__hard_irq_disable();
+	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+	trace_hardirqs_off();
+	user_exit_irqoff();
+	account_cpu_user_entry();
+
+	BUG_ON(!user_mode(regs));
+
+	regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs);
+
+	return regs->exit_result;
+}
+#endif
+
+notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs)
+{
+	unsigned long ret;
+
+	BUG_ON(regs_is_unrecoverable(regs));
+	BUG_ON(arch_irq_disabled_regs(regs));
+	CT_WARN_ON(ct_state() == CONTEXT_USER);
+
+	/*
+	 * We don't need to restore AMR on the way back to userspace for KUAP.
+	 * AMR can only have been unlocked if we interrupted the kernel.
+	 */
+	kuap_assert_locked();
+
+	local_irq_disable();
+
+	ret = interrupt_exit_user_prepare_main(0, regs);
+
+#ifdef CONFIG_PPC64
+	regs->exit_result = ret;
+#endif
+
+	return ret;
+}
+
+void preempt_schedule_irq(void);
+
+notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
+{
+	unsigned long ret = 0;
+	unsigned long kuap;
+	bool stack_store = read_thread_flags() & _TIF_EMULATE_STACK_STORE;
+
+	if (regs_is_unrecoverable(regs))
+		unrecoverable_exception(regs);
+	/*
+	 * CT_WARN_ON comes here via program_check_exception, so avoid
+	 * recursion.
+	 *
+	 * Skip the assertion on PMIs on 64e to work around a problem caused
+	 * by NMI PMIs incorrectly taking this interrupt return path, it's
+	 * possible for this to hit after interrupt exit to user switches
+	 * context to user. See also the comment in the performance monitor
+	 * handler in exceptions-64e.S
+	 */
+	if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64) &&
+	    TRAP(regs) != INTERRUPT_PROGRAM &&
+	    TRAP(regs) != INTERRUPT_PERFMON)
+		CT_WARN_ON(ct_state() == CONTEXT_USER);
+
+	kuap = kuap_get_and_assert_locked();
+
+	local_irq_disable();
+
+	if (!arch_irq_disabled_regs(regs)) {
+		/* Returning to a kernel context with local irqs enabled. */
+		WARN_ON_ONCE(!(regs->msr & MSR_EE));
+again:
+		if (IS_ENABLED(CONFIG_PREEMPT)) {
+			/* Return to preemptible kernel context */
+			if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) {
+				if (preempt_count() == 0)
+					preempt_schedule_irq();
+			}
+		}
+
+		check_return_regs_valid(regs);
+
+		/*
+		 * Stack store exit can't be restarted because the interrupt
+		 * stack frame might have been clobbered.
+		 */
+		if (!prep_irq_for_enabled_exit(unlikely(stack_store))) {
+			/*
+			 * Replay pending soft-masked interrupts now. Don't
+			 * just local_irq_enabe(); local_irq_disable(); because
+			 * if we are returning from an asynchronous interrupt
+			 * here, another one might hit after irqs are enabled,
+			 * and it would exit via this same path allowing
+			 * another to fire, and so on unbounded.
+			 */
+			hard_irq_disable();
+			replay_soft_interrupts();
+			/* Took an interrupt, may have more exit work to do. */
+			goto again;
+		}
+#ifdef CONFIG_PPC64
+		/*
+		 * An interrupt may clear MSR[EE] and set this concurrently,
+		 * but it will be marked pending and the exit will be retried.
+		 * This leaves a racy window where MSR[EE]=0 and HARD_DIS is
+		 * clear, until interrupt_exit_kernel_restart() calls
+		 * hard_irq_disable(), which will set HARD_DIS again.
+		 */
+		local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+
+	} else {
+		check_return_regs_valid(regs);
+
+		if (unlikely(stack_store))
+			__hard_EE_RI_disable();
+#endif /* CONFIG_PPC64 */
+	}
+
+	if (unlikely(stack_store)) {
+		clear_bits(_TIF_EMULATE_STACK_STORE, &current_thread_info()->flags);
+		ret = 1;
+	}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	local_paca->tm_scratch = regs->msr;
+#endif
+
+	/*
+	 * 64s does not want to mfspr(SPRN_AMR) here, because this comes after
+	 * mtmsr, which would cause Read-After-Write stalls. Hence, take the
+	 * AMR value from the check above.
+	 */
+	kuap_kernel_restore(regs, kuap);
+
+	return ret;
+}
+
+#ifdef CONFIG_PPC64
+notrace unsigned long interrupt_exit_user_restart(struct pt_regs *regs)
+{
+	__hard_irq_disable();
+	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+	trace_hardirqs_off();
+	user_exit_irqoff();
+	account_cpu_user_entry();
+
+	BUG_ON(!user_mode(regs));
+
+	regs->exit_result |= interrupt_exit_user_prepare(regs);
+
+	return regs->exit_result;
+}
+
+/*
+ * No real need to return a value here because the stack store case does not
+ * get restarted.
+ */
+notrace unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs)
+{
+	__hard_irq_disable();
+	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+	if (regs->softe == IRQS_ENABLED)
+		trace_hardirqs_off();
+
+	BUG_ON(user_mode(regs));
+
+	return interrupt_exit_kernel_prepare(regs);
+}
+#endif
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
new file mode 100644
index 0000000000..bd863702d8
--- /dev/null
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -0,0 +1,772 @@
+#include <asm/asm-offsets.h>
+#include <asm/bug.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
+#include <asm/feature-fixups.h>
+#include <asm/head-64.h>
+#include <asm/hw_irq.h>
+#include <asm/kup.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/ptrace.h>
+
+	.align 7
+
+.macro DEBUG_SRR_VALID srr
+#ifdef CONFIG_PPC_RFI_SRR_DEBUG
+	.ifc \srr,srr
+	mfspr	r11,SPRN_SRR0
+	ld	r12,_NIP(r1)
+	clrrdi  r11,r11,2
+	clrrdi  r12,r12,2
+100:	tdne	r11,r12
+	EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+	mfspr	r11,SPRN_SRR1
+	ld	r12,_MSR(r1)
+100:	tdne	r11,r12
+	EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+	.else
+	mfspr	r11,SPRN_HSRR0
+	ld	r12,_NIP(r1)
+	clrrdi  r11,r11,2
+	clrrdi  r12,r12,2
+100:	tdne	r11,r12
+	EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+	mfspr	r11,SPRN_HSRR1
+	ld	r12,_MSR(r1)
+100:	tdne	r11,r12
+	EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+	.endif
+#endif
+.endm
+
+#ifdef CONFIG_PPC_BOOK3S
+.macro system_call_vectored name trapnr
+	.globl system_call_vectored_\name
+system_call_vectored_\name:
+_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
+	SCV_INTERRUPT_TO_KERNEL
+	mr	r10,r1
+	ld	r1,PACAKSAVE(r13)
+	std	r10,0(r1)
+	std	r11,_NIP(r1)
+	std	r12,_MSR(r1)
+	std	r0,GPR0(r1)
+	std	r10,GPR1(r1)
+	std	r2,GPR2(r1)
+	LOAD_PACA_TOC()
+	mfcr	r12
+	li	r11,0
+	/* Save syscall parameters in r3-r8 */
+	SAVE_GPRS(3, 8, r1)
+	/* Zero r9-r12, this should only be required when restoring all GPRs */
+	std	r11,GPR9(r1)
+	std	r11,GPR10(r1)
+	std	r11,GPR11(r1)
+	std	r11,GPR12(r1)
+	std	r9,GPR13(r1)
+	SAVE_NVGPRS(r1)
+	std	r11,_XER(r1)
+	std	r11,_LINK(r1)
+	std	r11,_CTR(r1)
+
+	li	r11,\trapnr
+	std	r11,_TRAP(r1)
+	std	r12,_CCR(r1)
+	std	r3,ORIG_GPR3(r1)
+	LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
+	std	r11,STACK_INT_FRAME_MARKER(r1)		/* "regs" marker */
+	/* Calling convention has r3 = regs, r4 = orig r0 */
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	mr	r4,r0
+
+BEGIN_FTR_SECTION
+	HMT_MEDIUM
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+	/*
+	 * scv enters with MSR[EE]=1 and is immediately considered soft-masked.
+	 * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED,
+	 * and interrupts may be masked and pending already.
+	 * system_call_exception() will call trace_hardirqs_off() which means
+	 * interrupts could already have been blocked before trace_hardirqs_off,
+	 * but this is the best we can do.
+	 */
+
+	/*
+	 * Zero user registers to prevent influencing speculative execution
+	 * state of kernel code.
+	 */
+	SANITIZE_SYSCALL_GPRS()
+	bl	CFUNC(system_call_exception)
+
+.Lsyscall_vectored_\name\()_exit:
+	addi	r4,r1,STACK_INT_FRAME_REGS
+	li	r5,1 /* scv */
+	bl	CFUNC(syscall_exit_prepare)
+	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+.Lsyscall_vectored_\name\()_rst_start:
+	lbz	r11,PACAIRQHAPPENED(r13)
+	andi.	r11,r11,(~PACA_IRQ_HARD_DIS)@l
+	bne-	syscall_vectored_\name\()_restart
+	li	r11,IRQS_ENABLED
+	stb	r11,PACAIRQSOFTMASK(r13)
+	li	r11,0
+	stb	r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
+
+	ld	r2,_CCR(r1)
+	ld	r4,_NIP(r1)
+	ld	r5,_MSR(r1)
+
+BEGIN_FTR_SECTION
+	stdcx.	r0,0,r1			/* to clear the reservation */
+END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+BEGIN_FTR_SECTION
+	HMT_MEDIUM_LOW
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+	SANITIZE_RESTORE_NVGPRS()
+	cmpdi	r3,0
+	bne	.Lsyscall_vectored_\name\()_restore_regs
+
+	/* rfscv returns with LR->NIA and CTR->MSR */
+	mtlr	r4
+	mtctr	r5
+
+	/* Could zero these as per ABI, but we may consider a stricter ABI
+	 * which preserves these if libc implementations can benefit, so
+	 * restore them for now until further measurement is done. */
+	REST_GPR(0, r1)
+	REST_GPRS(4, 8, r1)
+	/* Zero volatile regs that may contain sensitive kernel data */
+	ZEROIZE_GPRS(9, 12)
+	mtspr	SPRN_XER,r0
+
+	/*
+	 * We don't need to restore AMR on the way back to userspace for KUAP.
+	 * The value of AMR only matters while we're in the kernel.
+	 */
+	mtcr	r2
+	REST_GPRS(2, 3, r1)
+	REST_GPR(13, r1)
+	REST_GPR(1, r1)
+	RFSCV_TO_USER
+	b	.	/* prevent speculative execution */
+
+.Lsyscall_vectored_\name\()_restore_regs:
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r5
+
+	ld	r3,_CTR(r1)
+	ld	r4,_LINK(r1)
+	ld	r5,_XER(r1)
+
+	HANDLER_RESTORE_NVGPRS()
+	REST_GPR(0, r1)
+	mtcr	r2
+	mtctr	r3
+	mtlr	r4
+	mtspr	SPRN_XER,r5
+	REST_GPRS(2, 13, r1)
+	REST_GPR(1, r1)
+	RFI_TO_USER
+.Lsyscall_vectored_\name\()_rst_end:
+
+syscall_vectored_\name\()_restart:
+_ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart)
+	GET_PACA(r13)
+	ld	r1,PACA_EXIT_SAVE_R1(r13)
+	LOAD_PACA_TOC()
+	ld	r3,RESULT(r1)
+	addi	r4,r1,STACK_INT_FRAME_REGS
+	li	r11,IRQS_ALL_DISABLED
+	stb	r11,PACAIRQSOFTMASK(r13)
+	bl	CFUNC(syscall_exit_restart)
+	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+	b	.Lsyscall_vectored_\name\()_rst_start
+1:
+
+SOFT_MASK_TABLE(.Lsyscall_vectored_\name\()_rst_start, 1b)
+RESTART_TABLE(.Lsyscall_vectored_\name\()_rst_start, .Lsyscall_vectored_\name\()_rst_end, syscall_vectored_\name\()_restart)
+
+.endm
+
+system_call_vectored common 0x3000
+
+/*
+ * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0
+ * which is tested by system_call_exception when r0 is -1 (as set by vector
+ * entry code).
+ */
+system_call_vectored sigill 0x7ff0
+
+#endif /* CONFIG_PPC_BOOK3S */
+
+	.balign IFETCH_ALIGN_BYTES
+	.globl system_call_common_real
+system_call_common_real:
+_ASM_NOKPROBE_SYMBOL(system_call_common_real)
+	ld	r10,PACAKMSR(r13)	/* get MSR value for kernel */
+	mtmsrd	r10
+
+	.balign IFETCH_ALIGN_BYTES
+	.globl system_call_common
+system_call_common:
+_ASM_NOKPROBE_SYMBOL(system_call_common)
+	mr	r10,r1
+	ld	r1,PACAKSAVE(r13)
+	std	r10,0(r1)
+	std	r11,_NIP(r1)
+	std	r12,_MSR(r1)
+	std	r0,GPR0(r1)
+	std	r10,GPR1(r1)
+	std	r2,GPR2(r1)
+#ifdef CONFIG_PPC_E500
+START_BTB_FLUSH_SECTION
+	BTB_FLUSH(r10)
+END_BTB_FLUSH_SECTION
+#endif
+	LOAD_PACA_TOC()
+	mfcr	r12
+	li	r11,0
+	/* Save syscall parameters in r3-r8 */
+	SAVE_GPRS(3, 8, r1)
+	/* Zero r9-r12, this should only be required when restoring all GPRs */
+	std	r11,GPR9(r1)
+	std	r11,GPR10(r1)
+	std	r11,GPR11(r1)
+	std	r11,GPR12(r1)
+	std	r9,GPR13(r1)
+	SAVE_NVGPRS(r1)
+	std	r11,_XER(r1)
+	std	r11,_CTR(r1)
+	mflr	r10
+
+	/*
+	 * This clears CR0.SO (bit 28), which is the error indication on
+	 * return from this system call.
+	 */
+	rldimi	r12,r11,28,(63-28)
+	li	r11,0xc00
+	std	r10,_LINK(r1)
+	std	r11,_TRAP(r1)
+	std	r12,_CCR(r1)
+	std	r3,ORIG_GPR3(r1)
+	LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
+	std	r11,STACK_INT_FRAME_MARKER(r1)		/* "regs" marker */
+	/* Calling convention has r3 = regs, r4 = orig r0 */
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	mr	r4,r0
+
+#ifdef CONFIG_PPC_BOOK3S
+	li	r11,1
+	stb	r11,PACASRR_VALID(r13)
+#endif
+
+	/*
+	 * We always enter kernel from userspace with irq soft-mask enabled and
+	 * nothing pending. system_call_exception() will call
+	 * trace_hardirqs_off().
+	 */
+	li	r11,IRQS_ALL_DISABLED
+	stb	r11,PACAIRQSOFTMASK(r13)
+#ifdef CONFIG_PPC_BOOK3S
+	li	r12,-1 /* Set MSR_EE and MSR_RI */
+	mtmsrd	r12,1
+#else
+	wrteei	1
+#endif
+
+	/*
+	 * Zero user registers to prevent influencing speculative execution
+	 * state of kernel code.
+	 */
+	SANITIZE_SYSCALL_GPRS()
+	bl	CFUNC(system_call_exception)
+
+.Lsyscall_exit:
+	addi	r4,r1,STACK_INT_FRAME_REGS
+	li	r5,0 /* !scv */
+	bl	CFUNC(syscall_exit_prepare)
+	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+#ifdef CONFIG_PPC_BOOK3S
+.Lsyscall_rst_start:
+	lbz	r11,PACAIRQHAPPENED(r13)
+	andi.	r11,r11,(~PACA_IRQ_HARD_DIS)@l
+	bne-	syscall_restart
+#endif
+	li	r11,IRQS_ENABLED
+	stb	r11,PACAIRQSOFTMASK(r13)
+	li	r11,0
+	stb	r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
+
+	ld	r2,_CCR(r1)
+	ld	r6,_LINK(r1)
+	mtlr	r6
+
+#ifdef CONFIG_PPC_BOOK3S
+	lbz	r4,PACASRR_VALID(r13)
+	cmpdi	r4,0
+	bne	1f
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+#endif
+	ld	r4,_NIP(r1)
+	ld	r5,_MSR(r1)
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r5
+1:
+	DEBUG_SRR_VALID srr
+
+BEGIN_FTR_SECTION
+	stdcx.	r0,0,r1			/* to clear the reservation */
+END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+	SANITIZE_RESTORE_NVGPRS()
+	cmpdi	r3,0
+	bne	.Lsyscall_restore_regs
+	/* Zero volatile regs that may contain sensitive kernel data */
+	ZEROIZE_GPR(0)
+	ZEROIZE_GPRS(4, 12)
+	mtctr	r0
+	mtspr	SPRN_XER,r0
+.Lsyscall_restore_regs_cont:
+
+BEGIN_FTR_SECTION
+	HMT_MEDIUM_LOW
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+	/*
+	 * We don't need to restore AMR on the way back to userspace for KUAP.
+	 * The value of AMR only matters while we're in the kernel.
+	 */
+	mtcr	r2
+	REST_GPRS(2, 3, r1)
+	REST_GPR(13, r1)
+	REST_GPR(1, r1)
+	RFI_TO_USER
+	b	.	/* prevent speculative execution */
+
+.Lsyscall_restore_regs:
+	ld	r3,_CTR(r1)
+	ld	r4,_XER(r1)
+	HANDLER_RESTORE_NVGPRS()
+	mtctr	r3
+	mtspr	SPRN_XER,r4
+	REST_GPR(0, r1)
+	REST_GPRS(4, 12, r1)
+	b	.Lsyscall_restore_regs_cont
+.Lsyscall_rst_end:
+
+#ifdef CONFIG_PPC_BOOK3S
+syscall_restart:
+_ASM_NOKPROBE_SYMBOL(syscall_restart)
+	GET_PACA(r13)
+	ld	r1,PACA_EXIT_SAVE_R1(r13)
+	LOAD_PACA_TOC()
+	ld	r3,RESULT(r1)
+	addi	r4,r1,STACK_INT_FRAME_REGS
+	li	r11,IRQS_ALL_DISABLED
+	stb	r11,PACAIRQSOFTMASK(r13)
+	bl	CFUNC(syscall_exit_restart)
+	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+	b	.Lsyscall_rst_start
+1:
+
+SOFT_MASK_TABLE(.Lsyscall_rst_start, 1b)
+RESTART_TABLE(.Lsyscall_rst_start, .Lsyscall_rst_end, syscall_restart)
+#endif
+
+	/*
+	 * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
+	 * touched, no exit work created, then this can be used.
+	 */
+	.balign IFETCH_ALIGN_BYTES
+	.globl fast_interrupt_return_srr
+fast_interrupt_return_srr:
+_ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr)
+	kuap_check_amr r3, r4
+	ld	r5,_MSR(r1)
+	andi.	r0,r5,MSR_PR
+#ifdef CONFIG_PPC_BOOK3S
+	beq	1f
+	kuap_user_restore r3, r4
+	b	.Lfast_user_interrupt_return_srr
+1:	kuap_kernel_restore r3, r4
+	andi.	r0,r5,MSR_RI
+	li	r3,0 /* 0 return value, no EMULATE_STACK_STORE */
+	bne+	.Lfast_kernel_interrupt_return_srr
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(unrecoverable_exception)
+	b	. /* should not get here */
+#else
+	bne	.Lfast_user_interrupt_return_srr
+	b	.Lfast_kernel_interrupt_return_srr
+#endif
+
+.macro interrupt_return_macro srr
+	.balign IFETCH_ALIGN_BYTES
+	.globl interrupt_return_\srr
+interrupt_return_\srr\():
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\())
+	ld	r4,_MSR(r1)
+	andi.	r0,r4,MSR_PR
+	beq	interrupt_return_\srr\()_kernel
+interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(interrupt_exit_user_prepare)
+#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS
+	cmpdi	r3,0
+	bne-	.Lrestore_nvgprs_\srr
+.Lrestore_nvgprs_\srr\()_cont:
+#endif
+	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+#ifdef CONFIG_PPC_BOOK3S
+.Linterrupt_return_\srr\()_user_rst_start:
+	lbz	r11,PACAIRQHAPPENED(r13)
+	andi.	r11,r11,(~PACA_IRQ_HARD_DIS)@l
+	bne-	interrupt_return_\srr\()_user_restart
+#endif
+	li	r11,IRQS_ENABLED
+	stb	r11,PACAIRQSOFTMASK(r13)
+	li	r11,0
+	stb	r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
+
+.Lfast_user_interrupt_return_\srr\():
+	SANITIZE_RESTORE_NVGPRS()
+#ifdef CONFIG_PPC_BOOK3S
+	.ifc \srr,srr
+	lbz	r4,PACASRR_VALID(r13)
+	.else
+	lbz	r4,PACAHSRR_VALID(r13)
+	.endif
+	cmpdi	r4,0
+	li	r4,0
+	bne	1f
+#endif
+	ld	r11,_NIP(r1)
+	ld	r12,_MSR(r1)
+	.ifc \srr,srr
+	mtspr	SPRN_SRR0,r11
+	mtspr	SPRN_SRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+	stb	r4,PACASRR_VALID(r13)
+#endif
+	.else
+	mtspr	SPRN_HSRR0,r11
+	mtspr	SPRN_HSRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+	stb	r4,PACAHSRR_VALID(r13)
+#endif
+	.endif
+	DEBUG_SRR_VALID \srr
+
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+	lbz	r4,PACAIRQSOFTMASK(r13)
+	tdnei	r4,IRQS_ENABLED
+#endif
+
+BEGIN_FTR_SECTION
+	ld	r10,_PPR(r1)
+	mtspr	SPRN_PPR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+BEGIN_FTR_SECTION
+	stdcx.	r0,0,r1		/* to clear the reservation */
+FTR_SECTION_ELSE
+	ldarx	r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+	ld	r3,_CCR(r1)
+	ld	r4,_LINK(r1)
+	ld	r5,_CTR(r1)
+	ld	r6,_XER(r1)
+	li	r0,0
+
+	REST_GPRS(7, 13, r1)
+
+	mtcr	r3
+	mtlr	r4
+	mtctr	r5
+	mtspr	SPRN_XER,r6
+
+	REST_GPRS(2, 6, r1)
+	REST_GPR(0, r1)
+	REST_GPR(1, r1)
+	.ifc \srr,srr
+	RFI_TO_USER
+	.else
+	HRFI_TO_USER
+	.endif
+	b	.	/* prevent speculative execution */
+.Linterrupt_return_\srr\()_user_rst_end:
+
+#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS
+.Lrestore_nvgprs_\srr\():
+	REST_NVGPRS(r1)
+	b	.Lrestore_nvgprs_\srr\()_cont
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S
+interrupt_return_\srr\()_user_restart:
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart)
+	GET_PACA(r13)
+	ld	r1,PACA_EXIT_SAVE_R1(r13)
+	LOAD_PACA_TOC()
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	li	r11,IRQS_ALL_DISABLED
+	stb	r11,PACAIRQSOFTMASK(r13)
+	bl	CFUNC(interrupt_exit_user_restart)
+	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+	b	.Linterrupt_return_\srr\()_user_rst_start
+1:
+
+SOFT_MASK_TABLE(.Linterrupt_return_\srr\()_user_rst_start, 1b)
+RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr\()_user_rst_end, interrupt_return_\srr\()_user_restart)
+#endif
+
+	.balign IFETCH_ALIGN_BYTES
+interrupt_return_\srr\()_kernel:
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	bl	CFUNC(interrupt_exit_kernel_prepare)
+
+	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+.Linterrupt_return_\srr\()_kernel_rst_start:
+	ld	r11,SOFTE(r1)
+	cmpwi	r11,IRQS_ENABLED
+	stb	r11,PACAIRQSOFTMASK(r13)
+	beq	.Linterrupt_return_\srr\()_soft_enabled
+
+	/*
+	 * Returning to soft-disabled context.
+	 * Check if a MUST_HARD_MASK interrupt has become pending, in which
+	 * case we need to disable MSR[EE] in the return context.
+	 *
+	 * The MSR[EE] check catches among other things the short incoherency
+	 * in hard_irq_disable() between clearing MSR[EE] and setting
+	 * PACA_IRQ_HARD_DIS.
+	 */
+	ld	r12,_MSR(r1)
+	andi.	r10,r12,MSR_EE
+	beq	.Lfast_kernel_interrupt_return_\srr\() // EE already disabled
+	lbz	r11,PACAIRQHAPPENED(r13)
+	andi.	r10,r11,PACA_IRQ_MUST_HARD_MASK
+	bne	1f // HARD_MASK is pending
+	// No HARD_MASK pending, clear possible HARD_DIS set by interrupt
+	andi.	r11,r11,(~PACA_IRQ_HARD_DIS)@l
+	stb	r11,PACAIRQHAPPENED(r13)
+	b	.Lfast_kernel_interrupt_return_\srr\()
+
+
+1:	/* Must clear MSR_EE from _MSR */
+#ifdef CONFIG_PPC_BOOK3S
+	li	r10,0
+	/* Clear valid before changing _MSR */
+	.ifc \srr,srr
+	stb	r10,PACASRR_VALID(r13)
+	.else
+	stb	r10,PACAHSRR_VALID(r13)
+	.endif
+#endif
+	xori	r12,r12,MSR_EE
+	std	r12,_MSR(r1)
+	b	.Lfast_kernel_interrupt_return_\srr\()
+
+.Linterrupt_return_\srr\()_soft_enabled:
+	/*
+	 * In the soft-enabled case, need to double-check that we have no
+	 * pending interrupts that might have come in before we reached the
+	 * restart section of code, and restart the exit so those can be
+	 * handled.
+	 *
+	 * If there are none, it is be possible that the interrupt still
+	 * has PACA_IRQ_HARD_DIS set, which needs to be cleared for the
+	 * interrupted context. This clear will not clobber a new pending
+	 * interrupt coming in, because we're in the restart section, so
+	 * such would return to the restart location.
+	 */
+#ifdef CONFIG_PPC_BOOK3S
+	lbz	r11,PACAIRQHAPPENED(r13)
+	andi.	r11,r11,(~PACA_IRQ_HARD_DIS)@l
+	bne-	interrupt_return_\srr\()_kernel_restart
+#endif
+	li	r11,0
+	stb	r11,PACAIRQHAPPENED(r13) // clear the possible HARD_DIS
+
+.Lfast_kernel_interrupt_return_\srr\():
+	SANITIZE_RESTORE_NVGPRS()
+	cmpdi	cr1,r3,0
+#ifdef CONFIG_PPC_BOOK3S
+	.ifc \srr,srr
+	lbz	r4,PACASRR_VALID(r13)
+	.else
+	lbz	r4,PACAHSRR_VALID(r13)
+	.endif
+	cmpdi	r4,0
+	li	r4,0
+	bne	1f
+#endif
+	ld	r11,_NIP(r1)
+	ld	r12,_MSR(r1)
+	.ifc \srr,srr
+	mtspr	SPRN_SRR0,r11
+	mtspr	SPRN_SRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+	stb	r4,PACASRR_VALID(r13)
+#endif
+	.else
+	mtspr	SPRN_HSRR0,r11
+	mtspr	SPRN_HSRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+	stb	r4,PACAHSRR_VALID(r13)
+#endif
+	.endif
+	DEBUG_SRR_VALID \srr
+
+BEGIN_FTR_SECTION
+	stdcx.	r0,0,r1		/* to clear the reservation */
+FTR_SECTION_ELSE
+	ldarx	r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+	ld	r3,_LINK(r1)
+	ld	r4,_CTR(r1)
+	ld	r5,_XER(r1)
+	ld	r6,_CCR(r1)
+	li	r0,0
+
+	REST_GPRS(7, 12, r1)
+
+	mtlr	r3
+	mtctr	r4
+	mtspr	SPRN_XER,r5
+
+	/*
+	 * Leaving a stale STACK_FRAME_REGS_MARKER on the stack can confuse
+	 * the reliable stack unwinder later on. Clear it.
+	 */
+	std	r0,STACK_INT_FRAME_MARKER(r1)
+
+	REST_GPRS(2, 5, r1)
+
+	bne-	cr1,1f /* emulate stack store */
+	mtcr	r6
+	REST_GPR(6, r1)
+	REST_GPR(0, r1)
+	REST_GPR(1, r1)
+	.ifc \srr,srr
+	RFI_TO_KERNEL
+	.else
+	HRFI_TO_KERNEL
+	.endif
+	b	.	/* prevent speculative execution */
+
+1:	/*
+	 * Emulate stack store with update. New r1 value was already calculated
+	 * and updated in our interrupt regs by emulate_loadstore, but we can't
+	 * store the previous value of r1 to the stack before re-loading our
+	 * registers from it, otherwise they could be clobbered.  Use
+	 * PACA_EXGEN as temporary storage to hold the store data, as
+	 * interrupts are disabled here so it won't be clobbered.
+	 */
+	mtcr	r6
+	std	r9,PACA_EXGEN+0(r13)
+	addi	r9,r1,INT_FRAME_SIZE /* get original r1 */
+	REST_GPR(6, r1)
+	REST_GPR(0, r1)
+	REST_GPR(1, r1)
+	std	r9,0(r1) /* perform store component of stdu */
+	ld	r9,PACA_EXGEN+0(r13)
+
+	.ifc \srr,srr
+	RFI_TO_KERNEL
+	.else
+	HRFI_TO_KERNEL
+	.endif
+	b	.	/* prevent speculative execution */
+.Linterrupt_return_\srr\()_kernel_rst_end:
+
+#ifdef CONFIG_PPC_BOOK3S
+interrupt_return_\srr\()_kernel_restart:
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart)
+	GET_PACA(r13)
+	ld	r1,PACA_EXIT_SAVE_R1(r13)
+	LOAD_PACA_TOC()
+	addi	r3,r1,STACK_INT_FRAME_REGS
+	li	r11,IRQS_ALL_DISABLED
+	stb	r11,PACAIRQSOFTMASK(r13)
+	bl	CFUNC(interrupt_exit_kernel_restart)
+	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+	b	.Linterrupt_return_\srr\()_kernel_rst_start
+1:
+
+SOFT_MASK_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, 1b)
+RESTART_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, .Linterrupt_return_\srr\()_kernel_rst_end, interrupt_return_\srr\()_kernel_restart)
+#endif
+
+.endm
+
+interrupt_return_macro srr
+#ifdef CONFIG_PPC_BOOK3S
+interrupt_return_macro hsrr
+
+	.globl __end_soft_masked
+__end_soft_masked:
+DEFINE_FIXED_SYMBOL(__end_soft_masked, text)
+#endif /* CONFIG_PPC_BOOK3S */
+
+#ifdef CONFIG_PPC_BOOK3S
+_GLOBAL(ret_from_fork_scv)
+	bl	CFUNC(schedule_tail)
+	HANDLER_RESTORE_NVGPRS()
+	li	r3,0	/* fork() return value */
+	b	.Lsyscall_vectored_common_exit
+#endif
+
+_GLOBAL(ret_from_fork)
+	bl	CFUNC(schedule_tail)
+	HANDLER_RESTORE_NVGPRS()
+	li	r3,0	/* fork() return value */
+	b	.Lsyscall_exit
+
+_GLOBAL(ret_from_kernel_user_thread)
+	bl	CFUNC(schedule_tail)
+	mtctr	r14
+	mr	r3,r15
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+	mr	r12,r14
+#endif
+	bctrl
+	li	r3,0
+	/*
+	 * It does not matter whether this returns via the scv or sc path
+	 * because it returns as execve() and therefore has no calling ABI
+	 * (i.e., it sets registers according to the exec()ed entry point).
+	 */
+	b	.Lsyscall_exit
+
+_GLOBAL(start_kernel_thread)
+	bl	CFUNC(schedule_tail)
+	mtctr	r14
+	mr	r3,r15
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+	mr	r12,r14
+#endif
+	bctrl
+	/*
+	 * This must not return. We actually want to BUG here, not WARN,
+	 * because BUG will exit the process which is what the kernel thread
+	 * should have done, which may give some hope of continuing.
+	 */
+100:	trap
+	EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
new file mode 100644
index 0000000000..c877f074d1
--- /dev/null
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Support PCI IO workaround
+ *
+ *  Copyright (C) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ *		       IBM, Corp.
+ *  (C) Copyright 2007-2008 TOSHIBA CORPORATION
+ */
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/sched/mm.h>	/* for init_mm */
+#include <linux/pgtable.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+#include <asm/io-workarounds.h>
+#include <asm/pte-walk.h>
+
+
+#define IOWA_MAX_BUS	8
+
+static struct iowa_bus iowa_busses[IOWA_MAX_BUS];
+static unsigned int iowa_bus_count;
+
+static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr)
+{
+	int i, j;
+	struct resource *res;
+	unsigned long vstart, vend;
+
+	for (i = 0; i < iowa_bus_count; i++) {
+		struct iowa_bus *bus = &iowa_busses[i];
+		struct pci_controller *phb = bus->phb;
+
+		if (vaddr) {
+			vstart = (unsigned long)phb->io_base_virt;
+			vend = vstart + phb->pci_io_size - 1;
+			if ((vaddr >= vstart) && (vaddr <= vend))
+				return bus;
+		}
+
+		if (paddr)
+			for (j = 0; j < 3; j++) {
+				res = &phb->mem_resources[j];
+				if (paddr >= res->start && paddr <= res->end)
+					return bus;
+			}
+	}
+
+	return NULL;
+}
+
+#ifdef CONFIG_PPC_INDIRECT_MMIO
+struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
+{
+	struct iowa_bus *bus;
+	int token;
+
+	token = PCI_GET_ADDR_TOKEN(addr);
+
+	if (token && token <= iowa_bus_count)
+		bus = &iowa_busses[token - 1];
+	else {
+		unsigned long vaddr, paddr;
+
+		vaddr = (unsigned long)PCI_FIX_ADDR(addr);
+		if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
+			return NULL;
+
+		paddr = ppc_find_vmap_phys(vaddr);
+
+		bus = iowa_pci_find(vaddr, paddr);
+
+		if (bus == NULL)
+			return NULL;
+	}
+
+	return bus;
+}
+#else /* CONFIG_PPC_INDIRECT_MMIO */
+struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
+{
+	return NULL;
+}
+#endif /* !CONFIG_PPC_INDIRECT_MMIO */
+
+#ifdef CONFIG_PPC_INDIRECT_PIO
+struct iowa_bus *iowa_pio_find_bus(unsigned long port)
+{
+	unsigned long vaddr = (unsigned long)pci_io_base + port;
+	return iowa_pci_find(vaddr, 0);
+}
+#else
+struct iowa_bus *iowa_pio_find_bus(unsigned long port)
+{
+	return NULL;
+}
+#endif
+
+#define DEF_PCI_AC_RET(name, ret, at, al, space, aa)		\
+static ret iowa_##name at					\
+{								\
+	struct iowa_bus *bus;					\
+	bus = iowa_##space##_find_bus(aa);			\
+	if (bus && bus->ops && bus->ops->name)			\
+		return bus->ops->name al;			\
+	return __do_##name al;					\
+}
+
+#define DEF_PCI_AC_NORET(name, at, al, space, aa)		\
+static void iowa_##name at					\
+{								\
+	struct iowa_bus *bus;					\
+	bus = iowa_##space##_find_bus(aa);			\
+	if (bus && bus->ops && bus->ops->name) {		\
+		bus->ops->name al;				\
+		return;						\
+	}							\
+	__do_##name al;						\
+}
+
+#include <asm/io-defs.h>
+
+#undef DEF_PCI_AC_RET
+#undef DEF_PCI_AC_NORET
+
+static const struct ppc_pci_io iowa_pci_io = {
+
+#define DEF_PCI_AC_RET(name, ret, at, al, space, aa)	.name = iowa_##name,
+#define DEF_PCI_AC_NORET(name, at, al, space, aa)	.name = iowa_##name,
+
+#include <asm/io-defs.h>
+
+#undef DEF_PCI_AC_RET
+#undef DEF_PCI_AC_NORET
+
+};
+
+#ifdef CONFIG_PPC_INDIRECT_MMIO
+void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
+			   pgprot_t prot, void *caller)
+{
+	struct iowa_bus *bus;
+	void __iomem *res = __ioremap_caller(addr, size, prot, caller);
+	int busno;
+
+	bus = iowa_pci_find(0, (unsigned long)addr);
+	if (bus != NULL) {
+		busno = bus - iowa_busses;
+		PCI_SET_ADDR_TOKEN(res, busno + 1);
+	}
+	return res;
+}
+#endif /* !CONFIG_PPC_INDIRECT_MMIO */
+
+bool io_workaround_inited;
+
+/* Enable IO workaround */
+static void io_workaround_init(void)
+{
+	if (io_workaround_inited)
+		return;
+	ppc_pci_io = iowa_pci_io;
+	io_workaround_inited = true;
+}
+
+/* Register new bus to support workaround */
+void iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops,
+		       int (*initfunc)(struct iowa_bus *, void *), void *data)
+{
+	struct iowa_bus *bus;
+	struct device_node *np = phb->dn;
+
+	io_workaround_init();
+
+	if (iowa_bus_count >= IOWA_MAX_BUS) {
+		pr_err("IOWA:Too many pci bridges, "
+		       "workarounds disabled for %pOF\n", np);
+		return;
+	}
+
+	bus = &iowa_busses[iowa_bus_count];
+	bus->phb = phb;
+	bus->ops = ops;
+	bus->private = data;
+
+	if (initfunc)
+		if ((*initfunc)(bus, data))
+			return;
+
+	iowa_bus_count++;
+
+	pr_debug("IOWA:[%d]Add bus, %pOF.\n", iowa_bus_count-1, np);
+}
+
diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c
new file mode 100644
index 0000000000..2f29b7d432
--- /dev/null
+++ b/arch/powerpc/kernel/io.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * I/O string operations
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *    Copyright (C) 2006 IBM Corporation
+ *
+ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
+ * and Paul Mackerras.
+ *
+ * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
+ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
+ *
+ * Rewritten in C by Stephen Rothwell.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+
+#include <asm/io.h>
+#include <asm/firmware.h>
+#include <asm/bug.h>
+
+/* See definition in io.h */
+bool isa_io_special;
+
+void _insb(const volatile u8 __iomem *port, void *buf, long count)
+{
+	u8 *tbuf = buf;
+	u8 tmp;
+
+	if (unlikely(count <= 0))
+		return;
+	asm volatile("sync");
+	do {
+		tmp = *port;
+		eieio();
+		*tbuf++ = tmp;
+	} while (--count != 0);
+	asm volatile("twi 0,%0,0; isync" : : "r" (tmp));
+}
+EXPORT_SYMBOL(_insb);
+
+void _outsb(volatile u8 __iomem *port, const void *buf, long count)
+{
+	const u8 *tbuf = buf;
+
+	if (unlikely(count <= 0))
+		return;
+	asm volatile("sync");
+	do {
+		*port = *tbuf++;
+	} while (--count != 0);
+	asm volatile("sync");
+}
+EXPORT_SYMBOL(_outsb);
+
+void _insw_ns(const volatile u16 __iomem *port, void *buf, long count)
+{
+	u16 *tbuf = buf;
+	u16 tmp;
+
+	if (unlikely(count <= 0))
+		return;
+	asm volatile("sync");
+	do {
+		tmp = *port;
+		eieio();
+		*tbuf++ = tmp;
+	} while (--count != 0);
+	asm volatile("twi 0,%0,0; isync" : : "r" (tmp));
+}
+EXPORT_SYMBOL(_insw_ns);
+
+void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count)
+{
+	const u16 *tbuf = buf;
+
+	if (unlikely(count <= 0))
+		return;
+	asm volatile("sync");
+	do {
+		*port = *tbuf++;
+	} while (--count != 0);
+	asm volatile("sync");
+}
+EXPORT_SYMBOL(_outsw_ns);
+
+void _insl_ns(const volatile u32 __iomem *port, void *buf, long count)
+{
+	u32 *tbuf = buf;
+	u32 tmp;
+
+	if (unlikely(count <= 0))
+		return;
+	asm volatile("sync");
+	do {
+		tmp = *port;
+		eieio();
+		*tbuf++ = tmp;
+	} while (--count != 0);
+	asm volatile("twi 0,%0,0; isync" : : "r" (tmp));
+}
+EXPORT_SYMBOL(_insl_ns);
+
+void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count)
+{
+	const u32 *tbuf = buf;
+
+	if (unlikely(count <= 0))
+		return;
+	asm volatile("sync");
+	do {
+		*port = *tbuf++;
+	} while (--count != 0);
+	asm volatile("sync");
+}
+EXPORT_SYMBOL(_outsl_ns);
+
+#define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0)
+
+notrace void
+_memset_io(volatile void __iomem *addr, int c, unsigned long n)
+{
+	void *p = (void __force *)addr;
+	u32 lc = c;
+	lc |= lc << 8;
+	lc |= lc << 16;
+
+	__asm__ __volatile__ ("sync" : : : "memory");
+	while(n && !IO_CHECK_ALIGN(p, 4)) {
+		*((volatile u8 *)p) = c;
+		p++;
+		n--;
+	}
+	while(n >= 4) {
+		*((volatile u32 *)p) = lc;
+		p += 4;
+		n -= 4;
+	}
+	while(n) {
+		*((volatile u8 *)p) = c;
+		p++;
+		n--;
+	}
+	__asm__ __volatile__ ("sync" : : : "memory");
+}
+EXPORT_SYMBOL(_memset_io);
+
+void _memcpy_fromio(void *dest, const volatile void __iomem *src,
+		    unsigned long n)
+{
+	void *vsrc = (void __force *) src;
+
+	__asm__ __volatile__ ("sync" : : : "memory");
+	while(n && (!IO_CHECK_ALIGN(vsrc, 4) || !IO_CHECK_ALIGN(dest, 4))) {
+		*((u8 *)dest) = *((volatile u8 *)vsrc);
+		eieio();
+		vsrc++;
+		dest++;
+		n--;
+	}
+	while(n >= 4) {
+		*((u32 *)dest) = *((volatile u32 *)vsrc);
+		eieio();
+		vsrc += 4;
+		dest += 4;
+		n -= 4;
+	}
+	while(n) {
+		*((u8 *)dest) = *((volatile u8 *)vsrc);
+		eieio();
+		vsrc++;
+		dest++;
+		n--;
+	}
+	__asm__ __volatile__ ("sync" : : : "memory");
+}
+EXPORT_SYMBOL(_memcpy_fromio);
+
+void _memcpy_toio(volatile void __iomem *dest, const void *src, unsigned long n)
+{
+	void *vdest = (void __force *) dest;
+
+	__asm__ __volatile__ ("sync" : : : "memory");
+	while(n && (!IO_CHECK_ALIGN(vdest, 4) || !IO_CHECK_ALIGN(src, 4))) {
+		*((volatile u8 *)vdest) = *((u8 *)src);
+		src++;
+		vdest++;
+		n--;
+	}
+	while(n >= 4) {
+		*((volatile u32 *)vdest) = *((volatile u32 *)src);
+		src += 4;
+		vdest += 4;
+		n-=4;
+	}
+	while(n) {
+		*((volatile u8 *)vdest) = *((u8 *)src);
+		src++;
+		vdest++;
+		n--;
+	}
+	__asm__ __volatile__ ("sync" : : : "memory");
+}
+EXPORT_SYMBOL(_memcpy_toio);
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
new file mode 100644
index 0000000000..72862a4d3a
--- /dev/null
+++ b/arch/powerpc/kernel/iomap.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ppc64 "iomap" interface implementation.
+ *
+ * (C) Copyright 2004 Linus Torvalds
+ */
+#include <linux/pci.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/isa-bridge.h>
+
+void __iomem *ioport_map(unsigned long port, unsigned int len)
+{
+	return (void __iomem *) (port + _IO_BASE);
+}
+EXPORT_SYMBOL(ioport_map);
+
+#ifdef CONFIG_PCI
+void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
+{
+	if (isa_vaddr_is_ioport(addr))
+		return;
+	if (pcibios_vaddr_is_ioport(addr))
+		return;
+	iounmap(addr);
+}
+
+EXPORT_SYMBOL(pci_iounmap);
+#endif /* CONFIG_PCI */
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
new file mode 100644
index 0000000000..14251bc521
--- /dev/null
+++ b/arch/powerpc/kernel/iommu.c
@@ -0,0 +1,1417 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
+ * 
+ * Rewrite, cleanup, new allocation schemes, virtual merging: 
+ * Copyright (C) 2004 Olof Johansson, IBM Corporation
+ *               and  Ben. Herrenschmidt, IBM Corporation
+ *
+ * Dynamic DMA mapping support, bus-independent parts.
+ */
+
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/dma-mapping.h>
+#include <linux/bitmap.h>
+#include <linux/iommu-helper.h>
+#include <linux/crash_dump.h>
+#include <linux/hash.h>
+#include <linux/fault-inject.h>
+#include <linux/pci.h>
+#include <linux/iommu.h>
+#include <linux/sched.h>
+#include <linux/debugfs.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/kdump.h>
+#include <asm/fadump.h>
+#include <asm/vio.h>
+#include <asm/tce.h>
+#include <asm/mmu_context.h>
+#include <asm/ppc-pci.h>
+
+#define DBG(...)
+
+#ifdef CONFIG_IOMMU_DEBUGFS
+static int iommu_debugfs_weight_get(void *data, u64 *val)
+{
+	struct iommu_table *tbl = data;
+	*val = bitmap_weight(tbl->it_map, tbl->it_size);
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(iommu_debugfs_fops_weight, iommu_debugfs_weight_get, NULL, "%llu\n");
+
+static void iommu_debugfs_add(struct iommu_table *tbl)
+{
+	char name[10];
+	struct dentry *liobn_entry;
+
+	sprintf(name, "%08lx", tbl->it_index);
+	liobn_entry = debugfs_create_dir(name, iommu_debugfs_dir);
+
+	debugfs_create_file_unsafe("weight", 0400, liobn_entry, tbl, &iommu_debugfs_fops_weight);
+	debugfs_create_ulong("it_size", 0400, liobn_entry, &tbl->it_size);
+	debugfs_create_ulong("it_page_shift", 0400, liobn_entry, &tbl->it_page_shift);
+	debugfs_create_ulong("it_reserved_start", 0400, liobn_entry, &tbl->it_reserved_start);
+	debugfs_create_ulong("it_reserved_end", 0400, liobn_entry, &tbl->it_reserved_end);
+	debugfs_create_ulong("it_indirect_levels", 0400, liobn_entry, &tbl->it_indirect_levels);
+	debugfs_create_ulong("it_level_size", 0400, liobn_entry, &tbl->it_level_size);
+}
+
+static void iommu_debugfs_del(struct iommu_table *tbl)
+{
+	char name[10];
+
+	sprintf(name, "%08lx", tbl->it_index);
+	debugfs_lookup_and_remove(name, iommu_debugfs_dir);
+}
+#else
+static void iommu_debugfs_add(struct iommu_table *tbl){}
+static void iommu_debugfs_del(struct iommu_table *tbl){}
+#endif
+
+static int novmerge;
+
+static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int);
+
+static int __init setup_iommu(char *str)
+{
+	if (!strcmp(str, "novmerge"))
+		novmerge = 1;
+	else if (!strcmp(str, "vmerge"))
+		novmerge = 0;
+	return 1;
+}
+
+__setup("iommu=", setup_iommu);
+
+static DEFINE_PER_CPU(unsigned int, iommu_pool_hash);
+
+/*
+ * We precalculate the hash to avoid doing it on every allocation.
+ *
+ * The hash is important to spread CPUs across all the pools. For example,
+ * on a POWER7 with 4 way SMT we want interrupts on the primary threads and
+ * with 4 pools all primary threads would map to the same pool.
+ */
+static int __init setup_iommu_pool_hash(void)
+{
+	unsigned int i;
+
+	for_each_possible_cpu(i)
+		per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
+
+	return 0;
+}
+subsys_initcall(setup_iommu_pool_hash);
+
+#ifdef CONFIG_FAIL_IOMMU
+
+static DECLARE_FAULT_ATTR(fail_iommu);
+
+static int __init setup_fail_iommu(char *str)
+{
+	return setup_fault_attr(&fail_iommu, str);
+}
+__setup("fail_iommu=", setup_fail_iommu);
+
+static bool should_fail_iommu(struct device *dev)
+{
+	return dev->archdata.fail_iommu && should_fail(&fail_iommu, 1);
+}
+
+static int __init fail_iommu_debugfs(void)
+{
+	struct dentry *dir = fault_create_debugfs_attr("fail_iommu",
+						       NULL, &fail_iommu);
+
+	return PTR_ERR_OR_ZERO(dir);
+}
+late_initcall(fail_iommu_debugfs);
+
+static ssize_t fail_iommu_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", dev->archdata.fail_iommu);
+}
+
+static ssize_t fail_iommu_store(struct device *dev,
+				struct device_attribute *attr, const char *buf,
+				size_t count)
+{
+	int i;
+
+	if (count > 0 && sscanf(buf, "%d", &i) > 0)
+		dev->archdata.fail_iommu = (i == 0) ? 0 : 1;
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(fail_iommu);
+
+static int fail_iommu_bus_notify(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct device *dev = data;
+
+	if (action == BUS_NOTIFY_ADD_DEVICE) {
+		if (device_create_file(dev, &dev_attr_fail_iommu))
+			pr_warn("Unable to create IOMMU fault injection sysfs "
+				"entries\n");
+	} else if (action == BUS_NOTIFY_DEL_DEVICE) {
+		device_remove_file(dev, &dev_attr_fail_iommu);
+	}
+
+	return 0;
+}
+
+/*
+ * PCI and VIO buses need separate notifier_block structs, since they're linked
+ * list nodes.  Sharing a notifier_block would mean that any notifiers later
+ * registered for PCI buses would also get called by VIO buses and vice versa.
+ */
+static struct notifier_block fail_iommu_pci_bus_notifier = {
+	.notifier_call = fail_iommu_bus_notify
+};
+
+#ifdef CONFIG_IBMVIO
+static struct notifier_block fail_iommu_vio_bus_notifier = {
+	.notifier_call = fail_iommu_bus_notify
+};
+#endif
+
+static int __init fail_iommu_setup(void)
+{
+#ifdef CONFIG_PCI
+	bus_register_notifier(&pci_bus_type, &fail_iommu_pci_bus_notifier);
+#endif
+#ifdef CONFIG_IBMVIO
+	bus_register_notifier(&vio_bus_type, &fail_iommu_vio_bus_notifier);
+#endif
+
+	return 0;
+}
+/*
+ * Must execute after PCI and VIO subsystem have initialised but before
+ * devices are probed.
+ */
+arch_initcall(fail_iommu_setup);
+#else
+static inline bool should_fail_iommu(struct device *dev)
+{
+	return false;
+}
+#endif
+
+static unsigned long iommu_range_alloc(struct device *dev,
+				       struct iommu_table *tbl,
+                                       unsigned long npages,
+                                       unsigned long *handle,
+                                       unsigned long mask,
+                                       unsigned int align_order)
+{ 
+	unsigned long n, end, start;
+	unsigned long limit;
+	int largealloc = npages > 15;
+	int pass = 0;
+	unsigned long align_mask;
+	unsigned long flags;
+	unsigned int pool_nr;
+	struct iommu_pool *pool;
+
+	align_mask = (1ull << align_order) - 1;
+
+	/* This allocator was derived from x86_64's bit string search */
+
+	/* Sanity check */
+	if (unlikely(npages == 0)) {
+		if (printk_ratelimit())
+			WARN_ON(1);
+		return DMA_MAPPING_ERROR;
+	}
+
+	if (should_fail_iommu(dev))
+		return DMA_MAPPING_ERROR;
+
+	/*
+	 * We don't need to disable preemption here because any CPU can
+	 * safely use any IOMMU pool.
+	 */
+	pool_nr = raw_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1);
+
+	if (largealloc)
+		pool = &(tbl->large_pool);
+	else
+		pool = &(tbl->pools[pool_nr]);
+
+	spin_lock_irqsave(&(pool->lock), flags);
+
+again:
+	if ((pass == 0) && handle && *handle &&
+	    (*handle >= pool->start) && (*handle < pool->end))
+		start = *handle;
+	else
+		start = pool->hint;
+
+	limit = pool->end;
+
+	/* The case below can happen if we have a small segment appended
+	 * to a large, or when the previous alloc was at the very end of
+	 * the available space. If so, go back to the initial start.
+	 */
+	if (start >= limit)
+		start = pool->start;
+
+	if (limit + tbl->it_offset > mask) {
+		limit = mask - tbl->it_offset + 1;
+		/* If we're constrained on address range, first try
+		 * at the masked hint to avoid O(n) search complexity,
+		 * but on second pass, start at 0 in pool 0.
+		 */
+		if ((start & mask) >= limit || pass > 0) {
+			spin_unlock(&(pool->lock));
+			pool = &(tbl->pools[0]);
+			spin_lock(&(pool->lock));
+			start = pool->start;
+		} else {
+			start &= mask;
+		}
+	}
+
+	n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset,
+			dma_get_seg_boundary_nr_pages(dev, tbl->it_page_shift),
+			align_mask);
+	if (n == -1) {
+		if (likely(pass == 0)) {
+			/* First try the pool from the start */
+			pool->hint = pool->start;
+			pass++;
+			goto again;
+
+		} else if (pass <= tbl->nr_pools) {
+			/* Now try scanning all the other pools */
+			spin_unlock(&(pool->lock));
+			pool_nr = (pool_nr + 1) & (tbl->nr_pools - 1);
+			pool = &tbl->pools[pool_nr];
+			spin_lock(&(pool->lock));
+			pool->hint = pool->start;
+			pass++;
+			goto again;
+
+		} else if (pass == tbl->nr_pools + 1) {
+			/* Last resort: try largepool */
+			spin_unlock(&pool->lock);
+			pool = &tbl->large_pool;
+			spin_lock(&pool->lock);
+			pool->hint = pool->start;
+			pass++;
+			goto again;
+
+		} else {
+			/* Give up */
+			spin_unlock_irqrestore(&(pool->lock), flags);
+			return DMA_MAPPING_ERROR;
+		}
+	}
+
+	end = n + npages;
+
+	/* Bump the hint to a new block for small allocs. */
+	if (largealloc) {
+		/* Don't bump to new block to avoid fragmentation */
+		pool->hint = end;
+	} else {
+		/* Overflow will be taken care of at the next allocation */
+		pool->hint = (end + tbl->it_blocksize - 1) &
+		                ~(tbl->it_blocksize - 1);
+	}
+
+	/* Update handle for SG allocations */
+	if (handle)
+		*handle = end;
+
+	spin_unlock_irqrestore(&(pool->lock), flags);
+
+	return n;
+}
+
+static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
+			      void *page, unsigned int npages,
+			      enum dma_data_direction direction,
+			      unsigned long mask, unsigned int align_order,
+			      unsigned long attrs)
+{
+	unsigned long entry;
+	dma_addr_t ret = DMA_MAPPING_ERROR;
+	int build_fail;
+
+	entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
+
+	if (unlikely(entry == DMA_MAPPING_ERROR))
+		return DMA_MAPPING_ERROR;
+
+	entry += tbl->it_offset;	/* Offset into real TCE table */
+	ret = entry << tbl->it_page_shift;	/* Set the return dma address */
+
+	/* Put the TCEs in the HW table */
+	build_fail = tbl->it_ops->set(tbl, entry, npages,
+				      (unsigned long)page &
+				      IOMMU_PAGE_MASK(tbl), direction, attrs);
+
+	/* tbl->it_ops->set() only returns non-zero for transient errors.
+	 * Clean up the table bitmap in this case and return
+	 * DMA_MAPPING_ERROR. For all other errors the functionality is
+	 * not altered.
+	 */
+	if (unlikely(build_fail)) {
+		__iommu_free(tbl, ret, npages);
+		return DMA_MAPPING_ERROR;
+	}
+
+	/* Flush/invalidate TLB caches if necessary */
+	if (tbl->it_ops->flush)
+		tbl->it_ops->flush(tbl);
+
+	/* Make sure updates are seen by hardware */
+	mb();
+
+	return ret;
+}
+
+static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr,
+			     unsigned int npages)
+{
+	unsigned long entry, free_entry;
+
+	entry = dma_addr >> tbl->it_page_shift;
+	free_entry = entry - tbl->it_offset;
+
+	if (((free_entry + npages) > tbl->it_size) ||
+	    (entry < tbl->it_offset)) {
+		if (printk_ratelimit()) {
+			printk(KERN_INFO "iommu_free: invalid entry\n");
+			printk(KERN_INFO "\tentry     = 0x%lx\n", entry); 
+			printk(KERN_INFO "\tdma_addr  = 0x%llx\n", (u64)dma_addr);
+			printk(KERN_INFO "\tTable     = 0x%llx\n", (u64)tbl);
+			printk(KERN_INFO "\tbus#      = 0x%llx\n", (u64)tbl->it_busno);
+			printk(KERN_INFO "\tsize      = 0x%llx\n", (u64)tbl->it_size);
+			printk(KERN_INFO "\tstartOff  = 0x%llx\n", (u64)tbl->it_offset);
+			printk(KERN_INFO "\tindex     = 0x%llx\n", (u64)tbl->it_index);
+			WARN_ON(1);
+		}
+
+		return false;
+	}
+
+	return true;
+}
+
+static struct iommu_pool *get_pool(struct iommu_table *tbl,
+				   unsigned long entry)
+{
+	struct iommu_pool *p;
+	unsigned long largepool_start = tbl->large_pool.start;
+
+	/* The large pool is the last pool at the top of the table */
+	if (entry >= largepool_start) {
+		p = &tbl->large_pool;
+	} else {
+		unsigned int pool_nr = entry / tbl->poolsize;
+
+		BUG_ON(pool_nr > tbl->nr_pools);
+		p = &tbl->pools[pool_nr];
+	}
+
+	return p;
+}
+
+static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+			 unsigned int npages)
+{
+	unsigned long entry, free_entry;
+	unsigned long flags;
+	struct iommu_pool *pool;
+
+	entry = dma_addr >> tbl->it_page_shift;
+	free_entry = entry - tbl->it_offset;
+
+	pool = get_pool(tbl, free_entry);
+
+	if (!iommu_free_check(tbl, dma_addr, npages))
+		return;
+
+	tbl->it_ops->clear(tbl, entry, npages);
+
+	spin_lock_irqsave(&(pool->lock), flags);
+	bitmap_clear(tbl->it_map, free_entry, npages);
+	spin_unlock_irqrestore(&(pool->lock), flags);
+}
+
+static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+		unsigned int npages)
+{
+	__iommu_free(tbl, dma_addr, npages);
+
+	/* Make sure TLB cache is flushed if the HW needs it. We do
+	 * not do an mb() here on purpose, it is not needed on any of
+	 * the current platforms.
+	 */
+	if (tbl->it_ops->flush)
+		tbl->it_ops->flush(tbl);
+}
+
+int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
+		     struct scatterlist *sglist, int nelems,
+		     unsigned long mask, enum dma_data_direction direction,
+		     unsigned long attrs)
+{
+	dma_addr_t dma_next = 0, dma_addr;
+	struct scatterlist *s, *outs, *segstart;
+	int outcount, incount, i, build_fail = 0;
+	unsigned int align;
+	unsigned long handle;
+	unsigned int max_seg_size;
+
+	BUG_ON(direction == DMA_NONE);
+
+	if ((nelems == 0) || !tbl)
+		return -EINVAL;
+
+	outs = s = segstart = &sglist[0];
+	outcount = 1;
+	incount = nelems;
+	handle = 0;
+
+	/* Init first segment length for backout at failure */
+	outs->dma_length = 0;
+
+	DBG("sg mapping %d elements:\n", nelems);
+
+	max_seg_size = dma_get_max_seg_size(dev);
+	for_each_sg(sglist, s, nelems, i) {
+		unsigned long vaddr, npages, entry, slen;
+
+		slen = s->length;
+		/* Sanity check */
+		if (slen == 0) {
+			dma_next = 0;
+			continue;
+		}
+		/* Allocate iommu entries for that segment */
+		vaddr = (unsigned long) sg_virt(s);
+		npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE(tbl));
+		align = 0;
+		if (tbl->it_page_shift < PAGE_SHIFT && slen >= PAGE_SIZE &&
+		    (vaddr & ~PAGE_MASK) == 0)
+			align = PAGE_SHIFT - tbl->it_page_shift;
+		entry = iommu_range_alloc(dev, tbl, npages, &handle,
+					  mask >> tbl->it_page_shift, align);
+
+		DBG("  - vaddr: %lx, size: %lx\n", vaddr, slen);
+
+		/* Handle failure */
+		if (unlikely(entry == DMA_MAPPING_ERROR)) {
+			if (!(attrs & DMA_ATTR_NO_WARN) &&
+			    printk_ratelimit())
+				dev_info(dev, "iommu_alloc failed, tbl %p "
+					 "vaddr %lx npages %lu\n", tbl, vaddr,
+					 npages);
+			goto failure;
+		}
+
+		/* Convert entry to a dma_addr_t */
+		entry += tbl->it_offset;
+		dma_addr = entry << tbl->it_page_shift;
+		dma_addr |= (vaddr & ~IOMMU_PAGE_MASK(tbl));
+
+		DBG("  - %lu pages, entry: %lx, dma_addr: %lx\n",
+			    npages, entry, dma_addr);
+
+		/* Insert into HW table */
+		build_fail = tbl->it_ops->set(tbl, entry, npages,
+					      vaddr & IOMMU_PAGE_MASK(tbl),
+					      direction, attrs);
+		if(unlikely(build_fail))
+			goto failure;
+
+		/* If we are in an open segment, try merging */
+		if (segstart != s) {
+			DBG("  - trying merge...\n");
+			/* We cannot merge if:
+			 * - allocated dma_addr isn't contiguous to previous allocation
+			 */
+			if (novmerge || (dma_addr != dma_next) ||
+			    (outs->dma_length + s->length > max_seg_size)) {
+				/* Can't merge: create a new segment */
+				segstart = s;
+				outcount++;
+				outs = sg_next(outs);
+				DBG("    can't merge, new segment.\n");
+			} else {
+				outs->dma_length += s->length;
+				DBG("    merged, new len: %ux\n", outs->dma_length);
+			}
+		}
+
+		if (segstart == s) {
+			/* This is a new segment, fill entries */
+			DBG("  - filling new segment.\n");
+			outs->dma_address = dma_addr;
+			outs->dma_length = slen;
+		}
+
+		/* Calculate next page pointer for contiguous check */
+		dma_next = dma_addr + slen;
+
+		DBG("  - dma next is: %lx\n", dma_next);
+	}
+
+	/* Flush/invalidate TLB caches if necessary */
+	if (tbl->it_ops->flush)
+		tbl->it_ops->flush(tbl);
+
+	DBG("mapped %d elements:\n", outcount);
+
+	/* For the sake of ppc_iommu_unmap_sg, we clear out the length in the
+	 * next entry of the sglist if we didn't fill the list completely
+	 */
+	if (outcount < incount) {
+		outs = sg_next(outs);
+		outs->dma_length = 0;
+	}
+
+	/* Make sure updates are seen by hardware */
+	mb();
+
+	return outcount;
+
+ failure:
+	for_each_sg(sglist, s, nelems, i) {
+		if (s->dma_length != 0) {
+			unsigned long vaddr, npages;
+
+			vaddr = s->dma_address & IOMMU_PAGE_MASK(tbl);
+			npages = iommu_num_pages(s->dma_address, s->dma_length,
+						 IOMMU_PAGE_SIZE(tbl));
+			__iommu_free(tbl, vaddr, npages);
+			s->dma_length = 0;
+		}
+		if (s == outs)
+			break;
+	}
+	return -EIO;
+}
+
+
+void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
+			int nelems, enum dma_data_direction direction,
+			unsigned long attrs)
+{
+	struct scatterlist *sg;
+
+	BUG_ON(direction == DMA_NONE);
+
+	if (!tbl)
+		return;
+
+	sg = sglist;
+	while (nelems--) {
+		unsigned int npages;
+		dma_addr_t dma_handle = sg->dma_address;
+
+		if (sg->dma_length == 0)
+			break;
+		npages = iommu_num_pages(dma_handle, sg->dma_length,
+					 IOMMU_PAGE_SIZE(tbl));
+		__iommu_free(tbl, dma_handle, npages);
+		sg = sg_next(sg);
+	}
+
+	/* Flush/invalidate TLBs if necessary. As for iommu_free(), we
+	 * do not do an mb() here, the affected platforms do not need it
+	 * when freeing.
+	 */
+	if (tbl->it_ops->flush)
+		tbl->it_ops->flush(tbl);
+}
+
+static void iommu_table_clear(struct iommu_table *tbl)
+{
+	/*
+	 * In case of firmware assisted dump system goes through clean
+	 * reboot process at the time of system crash. Hence it's safe to
+	 * clear the TCE entries if firmware assisted dump is active.
+	 */
+	if (!is_kdump_kernel() || is_fadump_active()) {
+		/* Clear the table in case firmware left allocations in it */
+		tbl->it_ops->clear(tbl, tbl->it_offset, tbl->it_size);
+		return;
+	}
+
+#ifdef CONFIG_CRASH_DUMP
+	if (tbl->it_ops->get) {
+		unsigned long index, tceval, tcecount = 0;
+
+		/* Reserve the existing mappings left by the first kernel. */
+		for (index = 0; index < tbl->it_size; index++) {
+			tceval = tbl->it_ops->get(tbl, index + tbl->it_offset);
+			/*
+			 * Freed TCE entry contains 0x7fffffffffffffff on JS20
+			 */
+			if (tceval && (tceval != 0x7fffffffffffffffUL)) {
+				__set_bit(index, tbl->it_map);
+				tcecount++;
+			}
+		}
+
+		if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) {
+			printk(KERN_WARNING "TCE table is full; freeing ");
+			printk(KERN_WARNING "%d entries for the kdump boot\n",
+				KDUMP_MIN_TCE_ENTRIES);
+			for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES;
+				index < tbl->it_size; index++)
+				__clear_bit(index, tbl->it_map);
+		}
+	}
+#endif
+}
+
+static void iommu_table_reserve_pages(struct iommu_table *tbl,
+		unsigned long res_start, unsigned long res_end)
+{
+	int i;
+
+	WARN_ON_ONCE(res_end < res_start);
+	/*
+	 * Reserve page 0 so it will not be used for any mappings.
+	 * This avoids buggy drivers that consider page 0 to be invalid
+	 * to crash the machine or even lose data.
+	 */
+	if (tbl->it_offset == 0)
+		set_bit(0, tbl->it_map);
+
+	if (res_start < tbl->it_offset)
+		res_start = tbl->it_offset;
+
+	if (res_end > (tbl->it_offset + tbl->it_size))
+		res_end = tbl->it_offset + tbl->it_size;
+
+	/* Check if res_start..res_end is a valid range in the table */
+	if (res_start >= res_end) {
+		tbl->it_reserved_start = tbl->it_offset;
+		tbl->it_reserved_end = tbl->it_offset;
+		return;
+	}
+
+	tbl->it_reserved_start = res_start;
+	tbl->it_reserved_end = res_end;
+
+	for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
+		set_bit(i - tbl->it_offset, tbl->it_map);
+}
+
+/*
+ * Build a iommu_table structure.  This contains a bit map which
+ * is used to manage allocation of the tce space.
+ */
+struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
+		unsigned long res_start, unsigned long res_end)
+{
+	unsigned long sz;
+	static int welcomed = 0;
+	unsigned int i;
+	struct iommu_pool *p;
+
+	BUG_ON(!tbl->it_ops);
+
+	/* number of bytes needed for the bitmap */
+	sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
+
+	tbl->it_map = vzalloc_node(sz, nid);
+	if (!tbl->it_map) {
+		pr_err("%s: Can't allocate %ld bytes\n", __func__, sz);
+		return NULL;
+	}
+
+	iommu_table_reserve_pages(tbl, res_start, res_end);
+
+	/* We only split the IOMMU table if we have 1GB or more of space */
+	if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))
+		tbl->nr_pools = IOMMU_NR_POOLS;
+	else
+		tbl->nr_pools = 1;
+
+	/* We reserve the top 1/4 of the table for large allocations */
+	tbl->poolsize = (tbl->it_size * 3 / 4) / tbl->nr_pools;
+
+	for (i = 0; i < tbl->nr_pools; i++) {
+		p = &tbl->pools[i];
+		spin_lock_init(&(p->lock));
+		p->start = tbl->poolsize * i;
+		p->hint = p->start;
+		p->end = p->start + tbl->poolsize;
+	}
+
+	p = &tbl->large_pool;
+	spin_lock_init(&(p->lock));
+	p->start = tbl->poolsize * i;
+	p->hint = p->start;
+	p->end = tbl->it_size;
+
+	iommu_table_clear(tbl);
+
+	if (!welcomed) {
+		printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
+		       novmerge ? "disabled" : "enabled");
+		welcomed = 1;
+	}
+
+	iommu_debugfs_add(tbl);
+
+	return tbl;
+}
+
+bool iommu_table_in_use(struct iommu_table *tbl)
+{
+	unsigned long start = 0, end;
+
+	/* ignore reserved bit0 */
+	if (tbl->it_offset == 0)
+		start = 1;
+
+	/* Simple case with no reserved MMIO32 region */
+	if (!tbl->it_reserved_start && !tbl->it_reserved_end)
+		return find_next_bit(tbl->it_map, tbl->it_size, start) != tbl->it_size;
+
+	end = tbl->it_reserved_start - tbl->it_offset;
+	if (find_next_bit(tbl->it_map, end, start) != end)
+		return true;
+
+	start = tbl->it_reserved_end - tbl->it_offset;
+	end = tbl->it_size;
+	return find_next_bit(tbl->it_map, end, start) != end;
+}
+
+static void iommu_table_free(struct kref *kref)
+{
+	struct iommu_table *tbl;
+
+	tbl = container_of(kref, struct iommu_table, it_kref);
+
+	if (tbl->it_ops->free)
+		tbl->it_ops->free(tbl);
+
+	if (!tbl->it_map) {
+		kfree(tbl);
+		return;
+	}
+
+	iommu_debugfs_del(tbl);
+
+	/* verify that table contains no entries */
+	if (iommu_table_in_use(tbl))
+		pr_warn("%s: Unexpected TCEs\n", __func__);
+
+	/* free bitmap */
+	vfree(tbl->it_map);
+
+	/* free table */
+	kfree(tbl);
+}
+
+struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl)
+{
+	if (kref_get_unless_zero(&tbl->it_kref))
+		return tbl;
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_table_get);
+
+int iommu_tce_table_put(struct iommu_table *tbl)
+{
+	if (WARN_ON(!tbl))
+		return 0;
+
+	return kref_put(&tbl->it_kref, iommu_table_free);
+}
+EXPORT_SYMBOL_GPL(iommu_tce_table_put);
+
+/* Creates TCEs for a user provided buffer.  The user buffer must be
+ * contiguous real kernel storage (not vmalloc).  The address passed here
+ * comprises a page address and offset into that page. The dma_addr_t
+ * returned will point to the same byte within the page as was passed in.
+ */
+dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
+			  struct page *page, unsigned long offset, size_t size,
+			  unsigned long mask, enum dma_data_direction direction,
+			  unsigned long attrs)
+{
+	dma_addr_t dma_handle = DMA_MAPPING_ERROR;
+	void *vaddr;
+	unsigned long uaddr;
+	unsigned int npages, align;
+
+	BUG_ON(direction == DMA_NONE);
+
+	vaddr = page_address(page) + offset;
+	uaddr = (unsigned long)vaddr;
+
+	if (tbl) {
+		npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
+		align = 0;
+		if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE &&
+		    ((unsigned long)vaddr & ~PAGE_MASK) == 0)
+			align = PAGE_SHIFT - tbl->it_page_shift;
+
+		dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction,
+					 mask >> tbl->it_page_shift, align,
+					 attrs);
+		if (dma_handle == DMA_MAPPING_ERROR) {
+			if (!(attrs & DMA_ATTR_NO_WARN) &&
+			    printk_ratelimit())  {
+				dev_info(dev, "iommu_alloc failed, tbl %p "
+					 "vaddr %p npages %d\n", tbl, vaddr,
+					 npages);
+			}
+		} else
+			dma_handle |= (uaddr & ~IOMMU_PAGE_MASK(tbl));
+	}
+
+	return dma_handle;
+}
+
+void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
+		      size_t size, enum dma_data_direction direction,
+		      unsigned long attrs)
+{
+	unsigned int npages;
+
+	BUG_ON(direction == DMA_NONE);
+
+	if (tbl) {
+		npages = iommu_num_pages(dma_handle, size,
+					 IOMMU_PAGE_SIZE(tbl));
+		iommu_free(tbl, dma_handle, npages);
+	}
+}
+
+/* Allocates a contiguous real buffer and creates mappings over it.
+ * Returns the virtual address of the buffer and sets dma_handle
+ * to the dma address (mapping) of the first page.
+ */
+void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
+			   size_t size,	dma_addr_t *dma_handle,
+			   unsigned long mask, gfp_t flag, int node)
+{
+	void *ret = NULL;
+	dma_addr_t mapping;
+	unsigned int order;
+	unsigned int nio_pages, io_order;
+	struct page *page;
+	int tcesize = (1 << tbl->it_page_shift);
+
+	size = PAGE_ALIGN(size);
+	order = get_order(size);
+
+ 	/*
+	 * Client asked for way too much space.  This is checked later
+	 * anyway.  It is easier to debug here for the drivers than in
+	 * the tce tables.
+	 */
+	if (order >= IOMAP_MAX_ORDER) {
+		dev_info(dev, "iommu_alloc_consistent size too large: 0x%lx\n",
+			 size);
+		return NULL;
+	}
+
+	if (!tbl)
+		return NULL;
+
+	/* Alloc enough pages (and possibly more) */
+	page = alloc_pages_node(node, flag, order);
+	if (!page)
+		return NULL;
+	ret = page_address(page);
+	memset(ret, 0, size);
+
+	/* Set up tces to cover the allocated range */
+	nio_pages = IOMMU_PAGE_ALIGN(size, tbl) >> tbl->it_page_shift;
+
+	io_order = get_iommu_order(size, tbl);
+	mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
+			      mask >> tbl->it_page_shift, io_order, 0);
+	if (mapping == DMA_MAPPING_ERROR) {
+		free_pages((unsigned long)ret, order);
+		return NULL;
+	}
+
+	*dma_handle = mapping | ((u64)ret & (tcesize - 1));
+	return ret;
+}
+
+void iommu_free_coherent(struct iommu_table *tbl, size_t size,
+			 void *vaddr, dma_addr_t dma_handle)
+{
+	if (tbl) {
+		unsigned int nio_pages;
+
+		size = PAGE_ALIGN(size);
+		nio_pages = IOMMU_PAGE_ALIGN(size, tbl) >> tbl->it_page_shift;
+		iommu_free(tbl, dma_handle, nio_pages);
+		size = PAGE_ALIGN(size);
+		free_pages((unsigned long)vaddr, get_order(size));
+	}
+}
+
+unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir)
+{
+	switch (dir) {
+	case DMA_BIDIRECTIONAL:
+		return TCE_PCI_READ | TCE_PCI_WRITE;
+	case DMA_FROM_DEVICE:
+		return TCE_PCI_WRITE;
+	case DMA_TO_DEVICE:
+		return TCE_PCI_READ;
+	default:
+		return 0;
+	}
+}
+EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm);
+
+#ifdef CONFIG_IOMMU_API
+/*
+ * SPAPR TCE API
+ */
+static void group_release(void *iommu_data)
+{
+	struct iommu_table_group *table_group = iommu_data;
+
+	table_group->group = NULL;
+}
+
+void iommu_register_group(struct iommu_table_group *table_group,
+		int pci_domain_number, unsigned long pe_num)
+{
+	struct iommu_group *grp;
+	char *name;
+
+	grp = iommu_group_alloc();
+	if (IS_ERR(grp)) {
+		pr_warn("powerpc iommu api: cannot create new group, err=%ld\n",
+				PTR_ERR(grp));
+		return;
+	}
+	table_group->group = grp;
+	iommu_group_set_iommudata(grp, table_group, group_release);
+	name = kasprintf(GFP_KERNEL, "domain%d-pe%lx",
+			pci_domain_number, pe_num);
+	if (!name)
+		return;
+	iommu_group_set_name(grp, name);
+	kfree(name);
+}
+
+enum dma_data_direction iommu_tce_direction(unsigned long tce)
+{
+	if ((tce & TCE_PCI_READ) && (tce & TCE_PCI_WRITE))
+		return DMA_BIDIRECTIONAL;
+	else if (tce & TCE_PCI_READ)
+		return DMA_TO_DEVICE;
+	else if (tce & TCE_PCI_WRITE)
+		return DMA_FROM_DEVICE;
+	else
+		return DMA_NONE;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_direction);
+
+void iommu_flush_tce(struct iommu_table *tbl)
+{
+	/* Flush/invalidate TLB caches if necessary */
+	if (tbl->it_ops->flush)
+		tbl->it_ops->flush(tbl);
+
+	/* Make sure updates are seen by hardware */
+	mb();
+}
+EXPORT_SYMBOL_GPL(iommu_flush_tce);
+
+int iommu_tce_check_ioba(unsigned long page_shift,
+		unsigned long offset, unsigned long size,
+		unsigned long ioba, unsigned long npages)
+{
+	unsigned long mask = (1UL << page_shift) - 1;
+
+	if (ioba & mask)
+		return -EINVAL;
+
+	ioba >>= page_shift;
+	if (ioba < offset)
+		return -EINVAL;
+
+	if ((ioba + 1) > (offset + size))
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_check_ioba);
+
+int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
+{
+	unsigned long mask = (1UL << page_shift) - 1;
+
+	if (gpa & mask)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
+
+extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
+		struct iommu_table *tbl,
+		unsigned long entry, unsigned long *hpa,
+		enum dma_data_direction *direction)
+{
+	long ret;
+	unsigned long size = 0;
+
+	ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction);
+	if (!ret && ((*direction == DMA_FROM_DEVICE) ||
+			(*direction == DMA_BIDIRECTIONAL)) &&
+			!mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
+					&size))
+		SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_xchg_no_kill);
+
+void iommu_tce_kill(struct iommu_table *tbl,
+		unsigned long entry, unsigned long pages)
+{
+	if (tbl->it_ops->tce_kill)
+		tbl->it_ops->tce_kill(tbl, entry, pages);
+}
+EXPORT_SYMBOL_GPL(iommu_tce_kill);
+
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+static int iommu_take_ownership(struct iommu_table *tbl)
+{
+	unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
+	int ret = 0;
+
+	/*
+	 * VFIO does not control TCE entries allocation and the guest
+	 * can write new TCEs on top of existing ones so iommu_tce_build()
+	 * must be able to release old pages. This functionality
+	 * requires exchange() callback defined so if it is not
+	 * implemented, we disallow taking ownership over the table.
+	 */
+	if (!tbl->it_ops->xchg_no_kill)
+		return -EINVAL;
+
+	spin_lock_irqsave(&tbl->large_pool.lock, flags);
+	for (i = 0; i < tbl->nr_pools; i++)
+		spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
+
+	if (iommu_table_in_use(tbl)) {
+		pr_err("iommu_tce: it_map is not empty");
+		ret = -EBUSY;
+	} else {
+		memset(tbl->it_map, 0xff, sz);
+	}
+
+	for (i = 0; i < tbl->nr_pools; i++)
+		spin_unlock(&tbl->pools[i].lock);
+	spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
+
+	return ret;
+}
+
+static void iommu_release_ownership(struct iommu_table *tbl)
+{
+	unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
+
+	spin_lock_irqsave(&tbl->large_pool.lock, flags);
+	for (i = 0; i < tbl->nr_pools; i++)
+		spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
+
+	memset(tbl->it_map, 0, sz);
+
+	iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
+			tbl->it_reserved_end);
+
+	for (i = 0; i < tbl->nr_pools; i++)
+		spin_unlock(&tbl->pools[i].lock);
+	spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
+}
+#endif
+
+int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
+{
+	/*
+	 * The sysfs entries should be populated before
+	 * binding IOMMU group. If sysfs entries isn't
+	 * ready, we simply bail.
+	 */
+	if (!device_is_registered(dev))
+		return -ENOENT;
+
+	if (device_iommu_mapped(dev)) {
+		pr_debug("%s: Skipping device %s with iommu group %d\n",
+			 __func__, dev_name(dev),
+			 iommu_group_id(dev->iommu_group));
+		return -EBUSY;
+	}
+
+	pr_debug("%s: Adding %s to iommu group %d\n",
+		 __func__, dev_name(dev),  iommu_group_id(table_group->group));
+	/*
+	 * This is still not adding devices via the IOMMU bus notifier because
+	 * of pcibios_init() from arch/powerpc/kernel/pci_64.c which calls
+	 * pcibios_scan_phb() first (and this guy adds devices and triggers
+	 * the notifier) and only then it calls pci_bus_add_devices() which
+	 * configures DMA for buses which also creates PEs and IOMMU groups.
+	 */
+	return iommu_probe_device(dev);
+}
+EXPORT_SYMBOL_GPL(iommu_add_device);
+
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+/*
+ * A simple iommu_table_group_ops which only allows reusing the existing
+ * iommu_table. This handles VFIO for POWER7 or the nested KVM.
+ * The ops does not allow creating windows and only allows reusing the existing
+ * one if it matches table_group->tce32_start/tce32_size/page_shift.
+ */
+static unsigned long spapr_tce_get_table_size(__u32 page_shift,
+					      __u64 window_size, __u32 levels)
+{
+	unsigned long size;
+
+	if (levels > 1)
+		return ~0U;
+	size = window_size >> (page_shift - 3);
+	return size;
+}
+
+static long spapr_tce_create_table(struct iommu_table_group *table_group, int num,
+				   __u32 page_shift, __u64 window_size, __u32 levels,
+				   struct iommu_table **ptbl)
+{
+	struct iommu_table *tbl = table_group->tables[0];
+
+	if (num > 0)
+		return -EPERM;
+
+	if (tbl->it_page_shift != page_shift ||
+	    tbl->it_size != (window_size >> page_shift) ||
+	    tbl->it_indirect_levels != levels - 1)
+		return -EINVAL;
+
+	*ptbl = iommu_tce_table_get(tbl);
+	return 0;
+}
+
+static long spapr_tce_set_window(struct iommu_table_group *table_group,
+				 int num, struct iommu_table *tbl)
+{
+	return tbl == table_group->tables[num] ? 0 : -EPERM;
+}
+
+static long spapr_tce_unset_window(struct iommu_table_group *table_group, int num)
+{
+	return 0;
+}
+
+static long spapr_tce_take_ownership(struct iommu_table_group *table_group)
+{
+	int i, j, rc = 0;
+
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+		struct iommu_table *tbl = table_group->tables[i];
+
+		if (!tbl || !tbl->it_map)
+			continue;
+
+		rc = iommu_take_ownership(tbl);
+		if (!rc)
+			continue;
+
+		for (j = 0; j < i; ++j)
+			iommu_release_ownership(table_group->tables[j]);
+		return rc;
+	}
+	return 0;
+}
+
+static void spapr_tce_release_ownership(struct iommu_table_group *table_group)
+{
+	int i;
+
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+		struct iommu_table *tbl = table_group->tables[i];
+
+		if (!tbl)
+			continue;
+
+		iommu_table_clear(tbl);
+		if (tbl->it_map)
+			iommu_release_ownership(tbl);
+	}
+}
+
+struct iommu_table_group_ops spapr_tce_table_group_ops = {
+	.get_table_size = spapr_tce_get_table_size,
+	.create_table = spapr_tce_create_table,
+	.set_window = spapr_tce_set_window,
+	.unset_window = spapr_tce_unset_window,
+	.take_ownership = spapr_tce_take_ownership,
+	.release_ownership = spapr_tce_release_ownership,
+};
+
+/*
+ * A simple iommu_ops to allow less cruft in generic VFIO code.
+ */
+static int spapr_tce_blocking_iommu_attach_dev(struct iommu_domain *dom,
+					       struct device *dev)
+{
+	struct iommu_group *grp = iommu_group_get(dev);
+	struct iommu_table_group *table_group;
+	int ret = -EINVAL;
+
+	if (!grp)
+		return -ENODEV;
+
+	table_group = iommu_group_get_iommudata(grp);
+	ret = table_group->ops->take_ownership(table_group);
+	iommu_group_put(grp);
+
+	return ret;
+}
+
+static void spapr_tce_blocking_iommu_set_platform_dma(struct device *dev)
+{
+	struct iommu_group *grp = iommu_group_get(dev);
+	struct iommu_table_group *table_group;
+
+	table_group = iommu_group_get_iommudata(grp);
+	table_group->ops->release_ownership(table_group);
+}
+
+static const struct iommu_domain_ops spapr_tce_blocking_domain_ops = {
+	.attach_dev = spapr_tce_blocking_iommu_attach_dev,
+};
+
+static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap)
+{
+	switch (cap) {
+	case IOMMU_CAP_CACHE_COHERENCY:
+		return true;
+	default:
+		break;
+	}
+
+	return false;
+}
+
+static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type)
+{
+	struct iommu_domain *dom;
+
+	if (type != IOMMU_DOMAIN_BLOCKED)
+		return NULL;
+
+	dom = kzalloc(sizeof(*dom), GFP_KERNEL);
+	if (!dom)
+		return NULL;
+
+	dom->ops = &spapr_tce_blocking_domain_ops;
+
+	return dom;
+}
+
+static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev)
+{
+	struct pci_dev *pdev;
+	struct pci_controller *hose;
+
+	if (!dev_is_pci(dev))
+		return ERR_PTR(-EPERM);
+
+	pdev = to_pci_dev(dev);
+	hose = pdev->bus->sysdata;
+
+	return &hose->iommu;
+}
+
+static void spapr_tce_iommu_release_device(struct device *dev)
+{
+}
+
+static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev)
+{
+	struct pci_controller *hose;
+	struct pci_dev *pdev;
+
+	pdev = to_pci_dev(dev);
+	hose = pdev->bus->sysdata;
+
+	if (!hose->controller_ops.device_group)
+		return ERR_PTR(-ENOENT);
+
+	return hose->controller_ops.device_group(hose, pdev);
+}
+
+static const struct iommu_ops spapr_tce_iommu_ops = {
+	.capable = spapr_tce_iommu_capable,
+	.domain_alloc = spapr_tce_iommu_domain_alloc,
+	.probe_device = spapr_tce_iommu_probe_device,
+	.release_device = spapr_tce_iommu_release_device,
+	.device_group = spapr_tce_iommu_device_group,
+	.set_platform_dma_ops = spapr_tce_blocking_iommu_set_platform_dma,
+};
+
+static struct attribute *spapr_tce_iommu_attrs[] = {
+	NULL,
+};
+
+static struct attribute_group spapr_tce_iommu_group = {
+	.name = "spapr-tce-iommu",
+	.attrs = spapr_tce_iommu_attrs,
+};
+
+static const struct attribute_group *spapr_tce_iommu_groups[] = {
+	&spapr_tce_iommu_group,
+	NULL,
+};
+
+/*
+ * This registers IOMMU devices of PHBs. This needs to happen
+ * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and
+ * before subsys_initcall(iommu_subsys_init).
+ */
+static int __init spapr_tce_setup_phb_iommus_initcall(void)
+{
+	struct pci_controller *hose;
+
+	list_for_each_entry(hose, &hose_list, list_node) {
+		iommu_device_sysfs_add(&hose->iommu, hose->parent,
+				       spapr_tce_iommu_groups, "iommu-phb%04x",
+				       hose->global_number);
+		iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops,
+				      hose->parent);
+	}
+	return 0;
+}
+postcore_initcall_sync(spapr_tce_setup_phb_iommus_initcall);
+#endif
+
+#endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
new file mode 100644
index 0000000000..6f7d4edaa0
--- /dev/null
+++ b/arch/powerpc/kernel/irq.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Derived from arch/i386/kernel/irq.c
+ *    Copyright (C) 1992 Linus Torvalds
+ *  Adapted from arch/i386 by Gary Thomas
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *  Updated and modified by Cort Dougan <cort@fsmlabs.com>
+ *    Copyright (C) 1996-2001 Cort Dougan
+ *  Adapted for Power Macintosh by Paul Mackerras
+ *    Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ *
+ * The MPC8xx has an interrupt mask in the SIU.  If a bit is set, the
+ * interrupt is _enabled_.  As expected, IRQ0 is bit 0 in the 32-bit
+ * mask register (of which only 16 are defined), hence the weird shifting
+ * and complement of the cached_irq_mask.  I want to be able to stuff
+ * this right into the SIU SMASK register.
+ * Many of the prep/chrp functions are conditional compiled on CONFIG_PPC_8xx
+ * to reduce code space and undefined function references.
+ */
+
+#undef DEBUG
+
+#include <linux/export.h>
+#include <linux/threads.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <linux/profile.h>
+#include <linux/bitops.h>
+#include <linux/list.h>
+#include <linux/radix-tree.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/debugfs.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/vmalloc.h>
+#include <linux/pgtable.h>
+#include <linux/static_call.h>
+
+#include <linux/uaccess.h>
+#include <asm/interrupt.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/cache.h>
+#include <asm/ptrace.h>
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/smp.h>
+#include <asm/hw_irq.h>
+#include <asm/softirq_stack.h>
+#include <asm/ppc_asm.h>
+
+#define CREATE_TRACE_POINTS
+#include <asm/trace.h>
+#include <asm/cpu_has_feature.h>
+
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+EXPORT_PER_CPU_SYMBOL(irq_stat);
+
+#ifdef CONFIG_PPC32
+atomic_t ppc_n_lost_interrupts;
+
+#ifdef CONFIG_TAU_INT
+extern int tau_initialized;
+u32 tau_interrupts(unsigned long cpu);
+#endif
+#endif /* CONFIG_PPC32 */
+
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+	int j;
+
+#if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT)
+	if (tau_initialized) {
+		seq_printf(p, "%*s: ", prec, "TAU");
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", tau_interrupts(j));
+		seq_puts(p, "  PowerPC             Thermal Assist (cpu temp)\n");
+	}
+#endif /* CONFIG_PPC32 && CONFIG_TAU_INT */
+
+	seq_printf(p, "%*s: ", prec, "LOC");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event);
+        seq_printf(p, "  Local timer interrupts for timer event device\n");
+
+	seq_printf(p, "%*s: ", prec, "BCT");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat, j).broadcast_irqs_event);
+	seq_printf(p, "  Broadcast timer interrupts for timer event device\n");
+
+	seq_printf(p, "%*s: ", prec, "LOC");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others);
+        seq_printf(p, "  Local timer interrupts for others\n");
+
+	seq_printf(p, "%*s: ", prec, "SPU");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs);
+	seq_printf(p, "  Spurious interrupts\n");
+
+	seq_printf(p, "%*s: ", prec, "PMI");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs);
+	seq_printf(p, "  Performance monitoring interrupts\n");
+
+	seq_printf(p, "%*s: ", prec, "MCE");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions);
+	seq_printf(p, "  Machine check exceptions\n");
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		seq_printf(p, "%*s: ", prec, "HMI");
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", paca_ptrs[j]->hmi_irqs);
+		seq_printf(p, "  Hypervisor Maintenance Interrupts\n");
+	}
+#endif
+
+	seq_printf(p, "%*s: ", prec, "NMI");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat, j).sreset_irqs);
+	seq_printf(p, "  System Reset interrupts\n");
+
+#ifdef CONFIG_PPC_WATCHDOG
+	seq_printf(p, "%*s: ", prec, "WDG");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(irq_stat, j).soft_nmi_irqs);
+	seq_printf(p, "  Watchdog soft-NMI interrupts\n");
+#endif
+
+#ifdef CONFIG_PPC_DOORBELL
+	if (cpu_has_feature(CPU_FTR_DBELL)) {
+		seq_printf(p, "%*s: ", prec, "DBL");
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", per_cpu(irq_stat, j).doorbell_irqs);
+		seq_printf(p, "  Doorbell interrupts\n");
+	}
+#endif
+
+	return 0;
+}
+
+/*
+ * /proc/stat helpers
+ */
+u64 arch_irq_stat_cpu(unsigned int cpu)
+{
+	u64 sum = per_cpu(irq_stat, cpu).timer_irqs_event;
+
+	sum += per_cpu(irq_stat, cpu).broadcast_irqs_event;
+	sum += per_cpu(irq_stat, cpu).pmu_irqs;
+	sum += per_cpu(irq_stat, cpu).mce_exceptions;
+	sum += per_cpu(irq_stat, cpu).spurious_irqs;
+	sum += per_cpu(irq_stat, cpu).timer_irqs_others;
+#ifdef CONFIG_PPC_BOOK3S_64
+	sum += paca_ptrs[cpu]->hmi_irqs;
+#endif
+	sum += per_cpu(irq_stat, cpu).sreset_irqs;
+#ifdef CONFIG_PPC_WATCHDOG
+	sum += per_cpu(irq_stat, cpu).soft_nmi_irqs;
+#endif
+#ifdef CONFIG_PPC_DOORBELL
+	sum += per_cpu(irq_stat, cpu).doorbell_irqs;
+#endif
+
+	return sum;
+}
+
+static inline void check_stack_overflow(unsigned long sp)
+{
+	if (!IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW))
+		return;
+
+	sp &= THREAD_SIZE - 1;
+
+	/* check for stack overflow: is there less than 1/4th free? */
+	if (unlikely(sp < THREAD_SIZE / 4)) {
+		pr_err("do_IRQ: stack overflow: %ld\n", sp);
+		dump_stack();
+	}
+}
+
+#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
+static __always_inline void call_do_softirq(const void *sp)
+{
+	/* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
+	asm volatile (
+		 PPC_STLU "	%%r1, %[offset](%[sp])	;"
+		"mr		%%r1, %[sp]		;"
+#ifdef CONFIG_PPC_KERNEL_PCREL
+		"bl		%[callee]@notoc		;"
+#else
+		"bl		%[callee]		;"
+#endif
+		 PPC_LL "	%%r1, 0(%%r1)		;"
+		 : // Outputs
+		 : // Inputs
+		   [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_MIN_SIZE),
+		   [callee] "i" (__do_softirq)
+		 : // Clobbers
+		   "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6",
+		   "cr7", "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
+		   "r11", "r12"
+	);
+}
+#endif
+
+DEFINE_STATIC_CALL_RET0(ppc_get_irq, *ppc_md.get_irq);
+
+static void __do_irq(struct pt_regs *regs, unsigned long oldsp)
+{
+	unsigned int irq;
+
+	trace_irq_entry(regs);
+
+	check_stack_overflow(oldsp);
+
+	/*
+	 * Query the platform PIC for the interrupt & ack it.
+	 *
+	 * This will typically lower the interrupt line to the CPU
+	 */
+	irq = static_call(ppc_get_irq)();
+
+	/* We can hard enable interrupts now to allow perf interrupts */
+	if (should_hard_irq_enable(regs))
+		do_hard_irq_enable();
+
+	/* And finally process it */
+	if (unlikely(!irq))
+		__this_cpu_inc(irq_stat.spurious_irqs);
+	else
+		generic_handle_irq(irq);
+
+	trace_irq_exit(regs);
+}
+
+static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
+{
+	register unsigned long r3 asm("r3") = (unsigned long)regs;
+
+	/* Temporarily switch r1 to sp, call __do_irq() then restore r1. */
+	asm volatile (
+		 PPC_STLU "	%%r1, %[offset](%[sp])	;"
+		"mr		%%r4, %%r1		;"
+		"mr		%%r1, %[sp]		;"
+#ifdef CONFIG_PPC_KERNEL_PCREL
+		"bl		%[callee]@notoc		;"
+#else
+		"bl		%[callee]		;"
+#endif
+		 PPC_LL "	%%r1, 0(%%r1)		;"
+		 : // Outputs
+		   "+r" (r3)
+		 : // Inputs
+		   [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_MIN_SIZE),
+		   [callee] "i" (__do_irq)
+		 : // Clobbers
+		   "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6",
+		   "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
+		   "r11", "r12"
+	);
+}
+
+void __do_IRQ(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	void *cursp, *irqsp, *sirqsp;
+
+	/* Switch to the irq stack to handle this */
+	cursp = (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
+	irqsp = hardirq_ctx[raw_smp_processor_id()];
+	sirqsp = softirq_ctx[raw_smp_processor_id()];
+
+	/* Already there ? If not switch stack and call */
+	if (unlikely(cursp == irqsp || cursp == sirqsp))
+		__do_irq(regs, current_stack_pointer);
+	else
+		call_do_irq(regs, irqsp);
+
+	set_irq_regs(old_regs);
+}
+
+DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ)
+{
+	__do_IRQ(regs);
+}
+
+static void *__init alloc_vm_stack(void)
+{
+	return __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, THREADINFO_GFP,
+			      NUMA_NO_NODE, (void *)_RET_IP_);
+}
+
+static void __init vmap_irqstack_init(void)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		softirq_ctx[i] = alloc_vm_stack();
+		hardirq_ctx[i] = alloc_vm_stack();
+	}
+}
+
+
+void __init init_IRQ(void)
+{
+	if (IS_ENABLED(CONFIG_VMAP_STACK))
+		vmap_irqstack_init();
+
+	if (ppc_md.init_IRQ)
+		ppc_md.init_IRQ();
+
+	if (!WARN_ON(!ppc_md.get_irq))
+		static_call_update(ppc_get_irq, ppc_md.get_irq);
+}
+
+#ifdef CONFIG_BOOKE_OR_40x
+void   *critirq_ctx[NR_CPUS] __read_mostly;
+void    *dbgirq_ctx[NR_CPUS] __read_mostly;
+void *mcheckirq_ctx[NR_CPUS] __read_mostly;
+#endif
+
+void *softirq_ctx[NR_CPUS] __read_mostly;
+void *hardirq_ctx[NR_CPUS] __read_mostly;
+
+#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
+void do_softirq_own_stack(void)
+{
+	call_do_softirq(softirq_ctx[smp_processor_id()]);
+}
+#endif
+
+irq_hw_number_t virq_to_hw(unsigned int virq)
+{
+	struct irq_data *irq_data = irq_get_irq_data(virq);
+	return WARN_ON(!irq_data) ? 0 : irq_data->hwirq;
+}
+EXPORT_SYMBOL_GPL(virq_to_hw);
+
+#ifdef CONFIG_SMP
+int irq_choose_cpu(const struct cpumask *mask)
+{
+	int cpuid;
+
+	if (cpumask_equal(mask, cpu_online_mask)) {
+		static int irq_rover;
+		static DEFINE_RAW_SPINLOCK(irq_rover_lock);
+		unsigned long flags;
+
+		/* Round-robin distribution... */
+do_round_robin:
+		raw_spin_lock_irqsave(&irq_rover_lock, flags);
+
+		irq_rover = cpumask_next(irq_rover, cpu_online_mask);
+		if (irq_rover >= nr_cpu_ids)
+			irq_rover = cpumask_first(cpu_online_mask);
+
+		cpuid = irq_rover;
+
+		raw_spin_unlock_irqrestore(&irq_rover_lock, flags);
+	} else {
+		cpuid = cpumask_first_and(mask, cpu_online_mask);
+		if (cpuid >= nr_cpu_ids)
+			goto do_round_robin;
+	}
+
+	return get_hard_smp_processor_id(cpuid);
+}
+#else
+int irq_choose_cpu(const struct cpumask *mask)
+{
+	return hard_smp_processor_id();
+}
+#endif
diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c
new file mode 100644
index 0000000000..938e66829e
--- /dev/null
+++ b/arch/powerpc/kernel/irq_64.c
@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Derived from arch/i386/kernel/irq.c
+ *    Copyright (C) 1992 Linus Torvalds
+ *  Adapted from arch/i386 by Gary Thomas
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *  Updated and modified by Cort Dougan <cort@fsmlabs.com>
+ *    Copyright (C) 1996-2001 Cort Dougan
+ *  Adapted for Power Macintosh by Paul Mackerras
+ *    Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ */
+
+#undef DEBUG
+
+#include <linux/export.h>
+#include <linux/threads.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <linux/profile.h>
+#include <linux/bitops.h>
+#include <linux/list.h>
+#include <linux/radix-tree.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/debugfs.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/vmalloc.h>
+#include <linux/pgtable.h>
+#include <linux/static_call.h>
+
+#include <linux/uaccess.h>
+#include <asm/interrupt.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/cache.h>
+#include <asm/ptrace.h>
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/smp.h>
+#include <asm/hw_irq.h>
+#include <asm/softirq_stack.h>
+#include <asm/ppc_asm.h>
+
+#include <asm/paca.h>
+#include <asm/firmware.h>
+#include <asm/lv1call.h>
+#include <asm/dbell.h>
+#include <asm/trace.h>
+#include <asm/cpu_has_feature.h>
+
+int distribute_irqs = 1;
+
+static inline void next_interrupt(struct pt_regs *regs)
+{
+	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+		WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS));
+		WARN_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+	}
+
+	/*
+	 * We are responding to the next interrupt, so interrupt-off
+	 * latencies should be reset here.
+	 */
+	lockdep_hardirq_exit();
+	trace_hardirqs_on();
+	trace_hardirqs_off();
+	lockdep_hardirq_enter();
+}
+
+static inline bool irq_happened_test_and_clear(u8 irq)
+{
+	if (local_paca->irq_happened & irq) {
+		local_paca->irq_happened &= ~irq;
+		return true;
+	}
+	return false;
+}
+
+static __no_kcsan void __replay_soft_interrupts(void)
+{
+	struct pt_regs regs;
+
+	/*
+	 * We use local_paca rather than get_paca() to avoid all the
+	 * debug_smp_processor_id() business in this low level function.
+	 */
+
+	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+		WARN_ON_ONCE(mfmsr() & MSR_EE);
+		WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS));
+		WARN_ON(local_paca->irq_happened & PACA_IRQ_REPLAYING);
+	}
+
+	/*
+	 * PACA_IRQ_REPLAYING prevents interrupt handlers from enabling
+	 * MSR[EE] to get PMIs, which can result in more IRQs becoming
+	 * pending.
+	 */
+	local_paca->irq_happened |= PACA_IRQ_REPLAYING;
+
+	ppc_save_regs(&regs);
+	regs.softe = IRQS_ENABLED;
+	regs.msr |= MSR_EE;
+
+	/*
+	 * Force the delivery of pending soft-disabled interrupts on PS3.
+	 * Any HV call will have this side effect.
+	 */
+	if (firmware_has_feature(FW_FEATURE_PS3_LV1)) {
+		u64 tmp, tmp2;
+		lv1_get_version_info(&tmp, &tmp2);
+	}
+
+	/*
+	 * Check if an hypervisor Maintenance interrupt happened.
+	 * This is a higher priority interrupt than the others, so
+	 * replay it first.
+	 */
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S) &&
+	    irq_happened_test_and_clear(PACA_IRQ_HMI)) {
+		regs.trap = INTERRUPT_HMI;
+		handle_hmi_exception(&regs);
+		next_interrupt(&regs);
+	}
+
+	if (irq_happened_test_and_clear(PACA_IRQ_DEC)) {
+		regs.trap = INTERRUPT_DECREMENTER;
+		timer_interrupt(&regs);
+		next_interrupt(&regs);
+	}
+
+	if (irq_happened_test_and_clear(PACA_IRQ_EE)) {
+		regs.trap = INTERRUPT_EXTERNAL;
+		do_IRQ(&regs);
+		next_interrupt(&regs);
+	}
+
+	if (IS_ENABLED(CONFIG_PPC_DOORBELL) &&
+	    irq_happened_test_and_clear(PACA_IRQ_DBELL)) {
+		regs.trap = INTERRUPT_DOORBELL;
+		doorbell_exception(&regs);
+		next_interrupt(&regs);
+	}
+
+	/* Book3E does not support soft-masking PMI interrupts */
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S) &&
+	    irq_happened_test_and_clear(PACA_IRQ_PMI)) {
+		regs.trap = INTERRUPT_PERFMON;
+		performance_monitor_exception(&regs);
+		next_interrupt(&regs);
+	}
+
+	local_paca->irq_happened &= ~PACA_IRQ_REPLAYING;
+}
+
+__no_kcsan void replay_soft_interrupts(void)
+{
+	irq_enter(); /* See comment in arch_local_irq_restore */
+	__replay_soft_interrupts();
+	irq_exit();
+}
+
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP)
+static inline __no_kcsan void replay_soft_interrupts_irqrestore(void)
+{
+	unsigned long kuap_state = get_kuap();
+
+	/*
+	 * Check if anything calls local_irq_enable/restore() when KUAP is
+	 * disabled (user access enabled). We handle that case here by saving
+	 * and re-locking AMR but we shouldn't get here in the first place,
+	 * hence the warning.
+	 */
+	kuap_assert_locked();
+
+	if (kuap_state != AMR_KUAP_BLOCKED)
+		set_kuap(AMR_KUAP_BLOCKED);
+
+	__replay_soft_interrupts();
+
+	if (kuap_state != AMR_KUAP_BLOCKED)
+		set_kuap(kuap_state);
+}
+#else
+#define replay_soft_interrupts_irqrestore() __replay_soft_interrupts()
+#endif
+
+notrace __no_kcsan void arch_local_irq_restore(unsigned long mask)
+{
+	unsigned char irq_happened;
+
+	/* Write the new soft-enabled value if it is a disable */
+	if (mask) {
+		irq_soft_mask_set(mask);
+		return;
+	}
+
+	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+		WARN_ON_ONCE(in_nmi());
+		WARN_ON_ONCE(in_hardirq());
+		WARN_ON_ONCE(local_paca->irq_happened & PACA_IRQ_REPLAYING);
+	}
+
+again:
+	/*
+	 * After the stb, interrupts are unmasked and there are no interrupts
+	 * pending replay. The restart sequence makes this atomic with
+	 * respect to soft-masked interrupts. If this was just a simple code
+	 * sequence, a soft-masked interrupt could become pending right after
+	 * the comparison and before the stb.
+	 *
+	 * This allows interrupts to be unmasked without hard disabling, and
+	 * also without new hard interrupts coming in ahead of pending ones.
+	 */
+	asm_volatile_goto(
+"1:					\n"
+"		lbz	9,%0(13)	\n"
+"		cmpwi	9,0		\n"
+"		bne	%l[happened]	\n"
+"		stb	9,%1(13)	\n"
+"2:					\n"
+		RESTART_TABLE(1b, 2b, 1b)
+	: : "i" (offsetof(struct paca_struct, irq_happened)),
+	    "i" (offsetof(struct paca_struct, irq_soft_mask))
+	: "cr0", "r9"
+	: happened);
+
+	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+		WARN_ON_ONCE(!(mfmsr() & MSR_EE));
+
+	/*
+	 * If we came here from the replay below, we might have a preempt
+	 * pending (due to preempt_enable_no_resched()). Have to check now.
+	 */
+	preempt_check_resched();
+
+	return;
+
+happened:
+	irq_happened = READ_ONCE(local_paca->irq_happened);
+	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+		WARN_ON_ONCE(!irq_happened);
+
+	if (irq_happened == PACA_IRQ_HARD_DIS) {
+		if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+			WARN_ON_ONCE(mfmsr() & MSR_EE);
+		irq_soft_mask_set(IRQS_ENABLED);
+		local_paca->irq_happened = 0;
+		__hard_irq_enable();
+		preempt_check_resched();
+		return;
+	}
+
+	/* Have interrupts to replay, need to hard disable first */
+	if (!(irq_happened & PACA_IRQ_HARD_DIS)) {
+		if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+			if (!(mfmsr() & MSR_EE)) {
+				/*
+				 * An interrupt could have come in and cleared
+				 * MSR[EE] and set IRQ_HARD_DIS, so check
+				 * IRQ_HARD_DIS again and warn if it is still
+				 * clear.
+				 */
+				irq_happened = READ_ONCE(local_paca->irq_happened);
+				WARN_ON_ONCE(!(irq_happened & PACA_IRQ_HARD_DIS));
+			}
+		}
+		__hard_irq_disable();
+		local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+	} else {
+		if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+			if (WARN_ON_ONCE(mfmsr() & MSR_EE))
+				__hard_irq_disable();
+		}
+	}
+
+	/*
+	 * Disable preempt here, so that the below preempt_enable will
+	 * perform resched if required (a replayed interrupt may set
+	 * need_resched).
+	 */
+	preempt_disable();
+	irq_soft_mask_set(IRQS_ALL_DISABLED);
+	trace_hardirqs_off();
+
+	/*
+	 * Now enter interrupt context. The interrupt handlers themselves
+	 * also call irq_enter/exit (which is okay, they can nest). But call
+	 * it here now to hold off softirqs until the below irq_exit(). If
+	 * we allowed replayed handlers to run softirqs, that enables irqs,
+	 * which must replay interrupts, which recurses in here and makes
+	 * things more complicated. The recursion is limited to 2, and it can
+	 * be made to work, but it's complicated.
+	 *
+	 * local_bh_disable can not be used here because interrupts taken in
+	 * idle are not in the right context (RCU, tick, etc) to run softirqs
+	 * so irq_enter must be called.
+	 */
+	irq_enter();
+
+	replay_soft_interrupts_irqrestore();
+
+	irq_exit();
+
+	if (unlikely(local_paca->irq_happened != PACA_IRQ_HARD_DIS)) {
+		/*
+		 * The softirq processing in irq_exit() may enable interrupts
+		 * temporarily, which can result in MSR[EE] being enabled and
+		 * more irqs becoming pending. Go around again if that happens.
+		 */
+		trace_hardirqs_on();
+		preempt_enable_no_resched();
+		goto again;
+	}
+
+	trace_hardirqs_on();
+	irq_soft_mask_set(IRQS_ENABLED);
+	local_paca->irq_happened = 0;
+	__hard_irq_enable();
+	preempt_enable();
+}
+EXPORT_SYMBOL(arch_local_irq_restore);
+
+/*
+ * This is a helper to use when about to go into idle low-power
+ * when the latter has the side effect of re-enabling interrupts
+ * (such as calling H_CEDE under pHyp).
+ *
+ * You call this function with interrupts soft-disabled (this is
+ * already the case when ppc_md.power_save is called). The function
+ * will return whether to enter power save or just return.
+ *
+ * In the former case, it will have generally sanitized the lazy irq
+ * state, and in the latter case it will leave with interrupts hard
+ * disabled and marked as such, so the local_irq_enable() call
+ * in arch_cpu_idle() will properly re-enable everything.
+ */
+__cpuidle bool prep_irq_for_idle(void)
+{
+	/*
+	 * First we need to hard disable to ensure no interrupt
+	 * occurs before we effectively enter the low power state
+	 */
+	__hard_irq_disable();
+	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+	/*
+	 * If anything happened while we were soft-disabled,
+	 * we return now and do not enter the low power state.
+	 */
+	if (lazy_irq_pending())
+		return false;
+
+	/*
+	 * Mark interrupts as soft-enabled and clear the
+	 * PACA_IRQ_HARD_DIS from the pending mask since we
+	 * are about to hard enable as well as a side effect
+	 * of entering the low power state.
+	 */
+	local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+	irq_soft_mask_set(IRQS_ENABLED);
+
+	/* Tell the caller to enter the low power state */
+	return true;
+}
+
+#ifdef CONFIG_PPC_BOOK3S
+/*
+ * This is for idle sequences that return with IRQs off, but the
+ * idle state itself wakes on interrupt. Tell the irq tracer that
+ * IRQs are enabled for the duration of idle so it does not get long
+ * off times. Must be paired with fini_irq_for_idle_irqsoff.
+ */
+bool prep_irq_for_idle_irqsoff(void)
+{
+	WARN_ON(!irqs_disabled());
+
+	/*
+	 * First we need to hard disable to ensure no interrupt
+	 * occurs before we effectively enter the low power state
+	 */
+	__hard_irq_disable();
+	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+	/*
+	 * If anything happened while we were soft-disabled,
+	 * we return now and do not enter the low power state.
+	 */
+	if (lazy_irq_pending())
+		return false;
+
+	/* Tell lockdep we are about to re-enable */
+	trace_hardirqs_on();
+
+	return true;
+}
+
+/*
+ * Take the SRR1 wakeup reason, index into this table to find the
+ * appropriate irq_happened bit.
+ *
+ * Sytem reset exceptions taken in idle state also come through here,
+ * but they are NMI interrupts so do not need to wait for IRQs to be
+ * restored, and should be taken as early as practical. These are marked
+ * with 0xff in the table. The Power ISA specifies 0100b as the system
+ * reset interrupt reason.
+ */
+#define IRQ_SYSTEM_RESET	0xff
+
+static const u8 srr1_to_lazyirq[0x10] = {
+	0, 0, 0,
+	PACA_IRQ_DBELL,
+	IRQ_SYSTEM_RESET,
+	PACA_IRQ_DBELL,
+	PACA_IRQ_DEC,
+	0,
+	PACA_IRQ_EE,
+	PACA_IRQ_EE,
+	PACA_IRQ_HMI,
+	0, 0, 0, 0, 0 };
+
+void replay_system_reset(void)
+{
+	struct pt_regs regs;
+
+	ppc_save_regs(&regs);
+	regs.trap = 0x100;
+	get_paca()->in_nmi = 1;
+	system_reset_exception(&regs);
+	get_paca()->in_nmi = 0;
+}
+EXPORT_SYMBOL_GPL(replay_system_reset);
+
+void irq_set_pending_from_srr1(unsigned long srr1)
+{
+	unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18;
+	u8 reason = srr1_to_lazyirq[idx];
+
+	/*
+	 * Take the system reset now, which is immediately after registers
+	 * are restored from idle. It's an NMI, so interrupts need not be
+	 * re-enabled before it is taken.
+	 */
+	if (unlikely(reason == IRQ_SYSTEM_RESET)) {
+		replay_system_reset();
+		return;
+	}
+
+	if (reason == PACA_IRQ_DBELL) {
+		/*
+		 * When doorbell triggers a system reset wakeup, the message
+		 * is not cleared, so if the doorbell interrupt is replayed
+		 * and the IPI handled, the doorbell interrupt would still
+		 * fire when EE is enabled.
+		 *
+		 * To avoid taking the superfluous doorbell interrupt,
+		 * execute a msgclr here before the interrupt is replayed.
+		 */
+		ppc_msgclr(PPC_DBELL_MSGTYPE);
+	}
+
+	/*
+	 * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0,
+	 * so this can be called unconditionally with the SRR1 wake
+	 * reason as returned by the idle code, which uses 0 to mean no
+	 * interrupt.
+	 *
+	 * If a future CPU was to designate this as an interrupt reason,
+	 * then a new index for no interrupt must be assigned.
+	 */
+	local_paca->irq_happened |= reason;
+}
+#endif /* CONFIG_PPC_BOOK3S */
+
+/*
+ * Force a replay of the external interrupt handler on this CPU.
+ */
+void force_external_irq_replay(void)
+{
+	/*
+	 * This must only be called with interrupts soft-disabled,
+	 * the replay will happen when re-enabling.
+	 */
+	WARN_ON(!arch_irqs_disabled());
+
+	/*
+	 * Interrupts must always be hard disabled before irq_happened is
+	 * modified (to prevent lost update in case of interrupt between
+	 * load and store).
+	 */
+	__hard_irq_disable();
+	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+	/* Indicate in the PACA that we have an interrupt to replay */
+	local_paca->irq_happened |= PACA_IRQ_EE;
+}
+
+static int __init setup_noirqdistrib(char *str)
+{
+	distribute_irqs = 0;
+	return 1;
+}
+
+__setup("noirqdistrib", setup_noirqdistrib);
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
new file mode 100644
index 0000000000..48e0eaf1ad
--- /dev/null
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Routines for tracking a legacy ISA bridge
+ *
+ * Copyrigh 2007 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
+ *
+ * Some bits and pieces moved over from pci_64.c
+ *
+ * Copyrigh 2003 Anton Blanchard <anton@au.ibm.com>, IBM Corp.
+ */
+
+#define DEBUG
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/of_address.h>
+#include <linux/vmalloc.h>
+
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+#include <asm/isa-bridge.h>
+
+unsigned long isa_io_base;	/* NULL if no ISA bus */
+EXPORT_SYMBOL(isa_io_base);
+
+/* Cached ISA bridge dev. */
+static struct device_node *isa_bridge_devnode;
+struct pci_dev *isa_bridge_pcidev;
+EXPORT_SYMBOL_GPL(isa_bridge_pcidev);
+
+#define ISA_SPACE_MASK 0x1
+#define ISA_SPACE_IO 0x1
+
+static void remap_isa_base(phys_addr_t pa, unsigned long size)
+{
+	WARN_ON_ONCE(ISA_IO_BASE & ~PAGE_MASK);
+	WARN_ON_ONCE(pa & ~PAGE_MASK);
+	WARN_ON_ONCE(size & ~PAGE_MASK);
+
+	if (slab_is_available()) {
+		if (ioremap_page_range(ISA_IO_BASE, ISA_IO_BASE + size, pa,
+				pgprot_noncached(PAGE_KERNEL)))
+			vunmap_range(ISA_IO_BASE, ISA_IO_BASE + size);
+	} else {
+		early_ioremap_range(ISA_IO_BASE, pa, size,
+				pgprot_noncached(PAGE_KERNEL));
+	}
+}
+
+static int process_ISA_OF_ranges(struct device_node *isa_node,
+				 unsigned long phb_io_base_phys)
+{
+	unsigned int size;
+	struct of_range_parser parser;
+	struct of_range range;
+
+	if (of_range_parser_init(&parser, isa_node))
+		goto inval_range;
+
+	for_each_of_range(&parser, &range) {
+		if ((range.flags & ISA_SPACE_MASK) != ISA_SPACE_IO)
+			continue;
+
+		if (range.cpu_addr == OF_BAD_ADDR) {
+			pr_err("ISA: Bad CPU mapping: %s\n", __func__);
+			return -EINVAL;
+		}
+
+		/* We need page alignment */
+		if ((range.bus_addr & ~PAGE_MASK) || (range.cpu_addr & ~PAGE_MASK)) {
+			pr_warn("ISA: bridge %pOF has non aligned IO range\n", isa_node);
+			return -EINVAL;
+		}
+
+		/* Align size and make sure it's cropped to 64K */
+		size = PAGE_ALIGN(range.size);
+		if (size > 0x10000)
+			size = 0x10000;
+
+		if (!phb_io_base_phys)
+			phb_io_base_phys = range.cpu_addr;
+
+		remap_isa_base(phb_io_base_phys, size);
+		return 0;
+	}
+
+inval_range:
+	if (phb_io_base_phys) {
+		pr_err("no ISA IO ranges or unexpected isa range, mapping 64k\n");
+		remap_isa_base(phb_io_base_phys, 0x10000);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+
+/**
+ * isa_bridge_find_early - Find and map the ISA IO space early before
+ *                         main PCI discovery. This is optionally called by
+ *                         the arch code when adding PCI PHBs to get early
+ *                         access to ISA IO ports
+ */
+void __init isa_bridge_find_early(struct pci_controller *hose)
+{
+	struct device_node *np, *parent = NULL, *tmp;
+
+	/* If we already have an ISA bridge, bail off */
+	if (isa_bridge_devnode != NULL)
+		return;
+
+	/* For each "isa" node in the system. Note : we do a search by
+	 * type and not by name. It might be better to do by name but that's
+	 * what the code used to do and I don't want to break too much at
+	 * once. We can look into changing that separately
+	 */
+	for_each_node_by_type(np, "isa") {
+		/* Look for our hose being a parent */
+		for (parent = of_get_parent(np); parent;) {
+			if (parent == hose->dn) {
+				of_node_put(parent);
+				break;
+			}
+			tmp = parent;
+			parent = of_get_parent(parent);
+			of_node_put(tmp);
+		}
+		if (parent != NULL)
+			break;
+	}
+	if (np == NULL)
+		return;
+	isa_bridge_devnode = np;
+
+	/* Now parse the "ranges" property and setup the ISA mapping */
+	process_ISA_OF_ranges(np, hose->io_base_phys);
+
+	/* Set the global ISA io base to indicate we have an ISA bridge */
+	isa_io_base = ISA_IO_BASE;
+
+	pr_debug("ISA bridge (early) is %pOF\n", np);
+}
+
+/**
+ * isa_bridge_find_early - Find and map the ISA IO space early before
+ *                         main PCI discovery. This is optionally called by
+ *                         the arch code when adding PCI PHBs to get early
+ *                         access to ISA IO ports
+ */
+void __init isa_bridge_init_non_pci(struct device_node *np)
+{
+	int ret;
+
+	/* If we already have an ISA bridge, bail off */
+	if (isa_bridge_devnode != NULL)
+		return;
+
+	ret = process_ISA_OF_ranges(np, 0);
+	if (ret)
+		return;
+
+	/* Got it */
+	isa_bridge_devnode = np;
+
+	/* Set the global ISA io base to indicate we have an ISA bridge
+	 * and map it
+	 */
+	isa_io_base = ISA_IO_BASE;
+
+	pr_debug("ISA: Non-PCI bridge is %pOF\n", np);
+}
+
+/**
+ * isa_bridge_find_late - Find and map the ISA IO space upon discovery of
+ *                        a new ISA bridge
+ */
+static void isa_bridge_find_late(struct pci_dev *pdev,
+				 struct device_node *devnode)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+
+	/* Store ISA device node and PCI device */
+	isa_bridge_devnode = of_node_get(devnode);
+	isa_bridge_pcidev = pdev;
+
+	/* Now parse the "ranges" property and setup the ISA mapping */
+	process_ISA_OF_ranges(devnode, hose->io_base_phys);
+
+	/* Set the global ISA io base to indicate we have an ISA bridge */
+	isa_io_base = ISA_IO_BASE;
+
+	pr_debug("ISA bridge (late) is %pOF on %s\n",
+		 devnode, pci_name(pdev));
+}
+
+/**
+ * isa_bridge_remove - Remove/unmap an ISA bridge
+ */
+static void isa_bridge_remove(void)
+{
+	pr_debug("ISA bridge removed !\n");
+
+	/* Clear the global ISA io base to indicate that we have no more
+	 * ISA bridge. Note that drivers don't quite handle that, though
+	 * we should probably do something about it. But do we ever really
+	 * have ISA bridges being removed on machines using legacy devices ?
+	 */
+	isa_io_base = ISA_IO_BASE;
+
+	/* Clear references to the bridge */
+	of_node_put(isa_bridge_devnode);
+	isa_bridge_devnode = NULL;
+	isa_bridge_pcidev = NULL;
+
+	/* Unmap the ISA area */
+	vunmap_range(ISA_IO_BASE, ISA_IO_BASE + 0x10000);
+}
+
+/**
+ * isa_bridge_notify - Get notified of PCI devices addition/removal
+ */
+static int isa_bridge_notify(struct notifier_block *nb, unsigned long action,
+			     void *data)
+{
+	struct device *dev = data;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct device_node *devnode = pci_device_to_OF_node(pdev);
+
+	switch(action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		/* Check if we have an early ISA device, without PCI dev */
+		if (isa_bridge_devnode && isa_bridge_devnode == devnode &&
+		    !isa_bridge_pcidev) {
+			pr_debug("ISA bridge PCI attached: %s\n",
+				 pci_name(pdev));
+			isa_bridge_pcidev = pdev;
+		}
+
+		/* Check if we have no ISA device, and this happens to be one,
+		 * register it as such if it has an OF device
+		 */
+		if (!isa_bridge_devnode && of_node_is_type(devnode, "isa"))
+			isa_bridge_find_late(pdev, devnode);
+
+		return 0;
+	case BUS_NOTIFY_DEL_DEVICE:
+		/* Check if this our existing ISA device */
+		if (pdev == isa_bridge_pcidev ||
+		    (devnode && devnode == isa_bridge_devnode))
+			isa_bridge_remove();
+		return 0;
+	}
+	return 0;
+}
+
+static struct notifier_block isa_bridge_notifier = {
+	.notifier_call = isa_bridge_notify
+};
+
+/**
+ * isa_bridge_init - register to be notified of ISA bridge addition/removal
+ *
+ */
+static int __init isa_bridge_init(void)
+{
+	bus_register_notifier(&pci_bus_type, &isa_bridge_notifier);
+	return 0;
+}
+arch_initcall(isa_bridge_init);
diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c
new file mode 100644
index 0000000000..5277cf582c
--- /dev/null
+++ b/arch/powerpc/kernel/jump_label.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2010 Michael Ellerman, IBM Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/jump_label.h>
+#include <asm/code-patching.h>
+#include <asm/inst.h>
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
+{
+	u32 *addr = (u32 *)jump_entry_code(entry);
+
+	if (type == JUMP_LABEL_JMP)
+		patch_branch(addr, jump_entry_target(entry), 0);
+	else
+		patch_instruction(addr, ppc_inst(PPC_RAW_NOP()));
+}
diff --git a/arch/powerpc/kernel/kdebugfs.c b/arch/powerpc/kernel/kdebugfs.c
new file mode 100644
index 0000000000..36d3124d5a
--- /dev/null
+++ b/arch/powerpc/kernel/kdebugfs.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/init.h>
+
+struct dentry *arch_debugfs_dir;
+EXPORT_SYMBOL(arch_debugfs_dir);
+
+static int __init arch_kdebugfs_init(void)
+{
+	arch_debugfs_dir = debugfs_create_dir("powerpc", NULL);
+	return 0;
+}
+arch_initcall(arch_kdebugfs_init);
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
new file mode 100644
index 0000000000..ebe4d1645c
--- /dev/null
+++ b/arch/powerpc/kernel/kgdb.c
@@ -0,0 +1,494 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PowerPC backend to the KGDB stub.
+ *
+ * 1998 (c) Michael AK Tesch (tesch@cs.wisc.edu)
+ * Copyright (C) 2003 Timesys Corporation.
+ * Copyright (C) 2004-2006 MontaVista Software, Inc.
+ * PPC64 Mods (C) 2005 Frank Rowand (frowand@mvista.com)
+ * PPC32 support restored by Vitaly Wool <vwool@ru.mvista.com> and
+ * Sergei Shtylyov <sshtylyov@ru.mvista.com>
+ * Copyright (C) 2007-2008 Wind River Systems, Inc.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kgdb.h>
+#include <linux/smp.h>
+#include <linux/signal.h>
+#include <linux/ptrace.h>
+#include <linux/kdebug.h>
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/machdep.h>
+#include <asm/debug.h>
+#include <asm/code-patching.h>
+#include <linux/slab.h>
+#include <asm/inst.h>
+
+/*
+ * This table contains the mapping between PowerPC hardware trap types, and
+ * signals, which are primarily what GDB understands.  GDB and the kernel
+ * don't always agree on values, so we use constants taken from gdb-6.2.
+ */
+static struct hard_trap_info
+{
+	unsigned int tt;		/* Trap type code for powerpc */
+	unsigned char signo;		/* Signal that we map this trap into */
+} hard_trap_info[] = {
+	{ 0x0100, 0x02 /* SIGINT */  },		/* system reset */
+	{ 0x0200, 0x0b /* SIGSEGV */ },		/* machine check */
+	{ 0x0300, 0x0b /* SIGSEGV */ },		/* data access */
+	{ 0x0400, 0x0b /* SIGSEGV */ },		/* instruction access */
+	{ 0x0500, 0x02 /* SIGINT */  },		/* external interrupt */
+	{ 0x0600, 0x0a /* SIGBUS */  },		/* alignment */
+	{ 0x0700, 0x05 /* SIGTRAP */ },		/* program check */
+	{ 0x0800, 0x08 /* SIGFPE */  },		/* fp unavailable */
+	{ 0x0900, 0x0e /* SIGALRM */ },		/* decrementer */
+	{ 0x0c00, 0x14 /* SIGCHLD */ },		/* system call */
+#ifdef CONFIG_BOOKE_OR_40x
+	{ 0x2002, 0x05 /* SIGTRAP */ },		/* debug */
+#if defined(CONFIG_PPC_85xx)
+	{ 0x2010, 0x08 /* SIGFPE */  },		/* spe unavailable */
+	{ 0x2020, 0x08 /* SIGFPE */  },		/* spe unavailable */
+	{ 0x2030, 0x08 /* SIGFPE */  },		/* spe fp data */
+	{ 0x2040, 0x08 /* SIGFPE */  },		/* spe fp data */
+	{ 0x2050, 0x08 /* SIGFPE */  },		/* spe fp round */
+	{ 0x2060, 0x0e /* SIGILL */  },		/* performance monitor */
+	{ 0x2900, 0x08 /* SIGFPE */  },		/* apu unavailable */
+	{ 0x3100, 0x0e /* SIGALRM */ },		/* fixed interval timer */
+	{ 0x3200, 0x02 /* SIGINT */  }, 	/* watchdog */
+#else /* ! CONFIG_PPC_85xx */
+	{ 0x1000, 0x0e /* SIGALRM */ },		/* prog interval timer */
+	{ 0x1010, 0x0e /* SIGALRM */ },		/* fixed interval timer */
+	{ 0x1020, 0x02 /* SIGINT */  }, 	/* watchdog */
+	{ 0x2010, 0x08 /* SIGFPE */  },		/* fp unavailable */
+	{ 0x2020, 0x08 /* SIGFPE */  },		/* ap unavailable */
+#endif
+#else /* !CONFIG_BOOKE_OR_40x */
+	{ 0x0d00, 0x05 /* SIGTRAP */ },		/* single-step */
+#if defined(CONFIG_PPC_8xx)
+	{ 0x1000, 0x04 /* SIGILL */  },		/* software emulation */
+#else /* ! CONFIG_PPC_8xx */
+	{ 0x0f00, 0x04 /* SIGILL */  },		/* performance monitor */
+	{ 0x0f20, 0x08 /* SIGFPE */  },		/* altivec unavailable */
+	{ 0x1300, 0x05 /* SIGTRAP */ }, 	/* instruction address break */
+#if defined(CONFIG_PPC64)
+	{ 0x1200, 0x05 /* SIGILL */  },		/* system error */
+	{ 0x1500, 0x04 /* SIGILL */  },		/* soft patch */
+	{ 0x1600, 0x04 /* SIGILL */  },		/* maintenance */
+	{ 0x1700, 0x08 /* SIGFPE */  },		/* altivec assist */
+	{ 0x1800, 0x04 /* SIGILL */  },		/* thermal */
+#else /* ! CONFIG_PPC64 */
+	{ 0x1400, 0x02 /* SIGINT */  },		/* SMI */
+	{ 0x1600, 0x08 /* SIGFPE */  },		/* altivec assist */
+	{ 0x1700, 0x04 /* SIGILL */  },		/* TAU */
+	{ 0x2000, 0x05 /* SIGTRAP */ },		/* run mode */
+#endif
+#endif
+#endif
+	{ 0x0000, 0x00 }			/* Must be last */
+};
+
+static int computeSignal(unsigned int tt)
+{
+	struct hard_trap_info *ht;
+
+	for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
+		if (ht->tt == tt)
+			return ht->signo;
+
+	return SIGHUP;		/* default for things we don't know about */
+}
+
+/**
+ *
+ *	kgdb_skipexception - Bail out of KGDB when we've been triggered.
+ *	@exception: Exception vector number
+ *	@regs: Current &struct pt_regs.
+ *
+ *	On some architectures we need to skip a breakpoint exception when
+ *	it occurs after a breakpoint has been removed.
+ *
+ */
+int kgdb_skipexception(int exception, struct pt_regs *regs)
+{
+	return kgdb_isremovedbreak(regs->nip);
+}
+
+static int kgdb_debugger_ipi(struct pt_regs *regs)
+{
+	kgdb_nmicallback(raw_smp_processor_id(), regs);
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+void kgdb_roundup_cpus(void)
+{
+	smp_send_debugger_break();
+}
+#endif
+
+/* KGDB functions to use existing PowerPC64 hooks. */
+static int kgdb_debugger(struct pt_regs *regs)
+{
+	return !kgdb_handle_exception(1, computeSignal(TRAP(regs)),
+				      DIE_OOPS, regs);
+}
+
+static int kgdb_handle_breakpoint(struct pt_regs *regs)
+{
+	if (user_mode(regs))
+		return 0;
+
+	if (kgdb_handle_exception(1, SIGTRAP, 0, regs) != 0)
+		return 0;
+
+	if (*(u32 *)regs->nip == BREAK_INSTR)
+		regs_add_return_ip(regs, BREAK_INSTR_SIZE);
+
+	return 1;
+}
+
+static int kgdb_singlestep(struct pt_regs *regs)
+{
+	if (user_mode(regs))
+		return 0;
+
+	kgdb_handle_exception(0, SIGTRAP, 0, regs);
+
+	return 1;
+}
+
+static int kgdb_iabr_match(struct pt_regs *regs)
+{
+	if (user_mode(regs))
+		return 0;
+
+	if (kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs) != 0)
+		return 0;
+	return 1;
+}
+
+static int kgdb_break_match(struct pt_regs *regs)
+{
+	if (user_mode(regs))
+		return 0;
+
+	if (kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs) != 0)
+		return 0;
+	return 1;
+}
+
+#define PACK64(ptr, src) do { *(ptr++) = (src); } while (0)
+
+#define PACK32(ptr, src) do {          \
+	u32 *ptr32;                   \
+	ptr32 = (u32 *)ptr;           \
+	*(ptr32++) = (src);           \
+	ptr = (unsigned long *)ptr32; \
+	} while (0)
+
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+	struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp +
+						  STACK_INT_FRAME_REGS);
+	unsigned long *ptr = gdb_regs;
+	int reg;
+
+	memset(gdb_regs, 0, NUMREGBYTES);
+
+	/* Regs GPR0-2 */
+	for (reg = 0; reg < 3; reg++)
+		PACK64(ptr, regs->gpr[reg]);
+
+	/* Regs GPR3-13 are caller saved, not in regs->gpr[] */
+	ptr += 11;
+
+	/* Regs GPR14-31 */
+	for (reg = 14; reg < 32; reg++)
+		PACK64(ptr, regs->gpr[reg]);
+
+#ifdef CONFIG_PPC_85xx
+#ifdef CONFIG_SPE
+	for (reg = 0; reg < 32; reg++)
+		PACK64(ptr, p->thread.evr[reg]);
+#else
+	ptr += 32;
+#endif
+#else
+	/* fp registers not used by kernel, leave zero */
+	ptr += 32 * 8 / sizeof(long);
+#endif
+
+	PACK64(ptr, regs->nip);
+	PACK64(ptr, regs->msr);
+	PACK32(ptr, regs->ccr);
+	PACK64(ptr, regs->link);
+	PACK64(ptr, regs->ctr);
+	PACK32(ptr, regs->xer);
+
+	BUG_ON((unsigned long)ptr >
+	       (unsigned long)(((void *)gdb_regs) + NUMREGBYTES));
+}
+
+#define GDB_SIZEOF_REG sizeof(unsigned long)
+#define GDB_SIZEOF_REG_U32 sizeof(u32)
+
+#ifdef CONFIG_PPC_85xx
+#define GDB_SIZEOF_FLOAT_REG sizeof(unsigned long)
+#else
+#define GDB_SIZEOF_FLOAT_REG sizeof(u64)
+#endif
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
+{
+	{ "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[0]) },
+	{ "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[1]) },
+	{ "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[2]) },
+	{ "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[3]) },
+	{ "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[4]) },
+	{ "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[5]) },
+	{ "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[6]) },
+	{ "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[7]) },
+	{ "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[8]) },
+	{ "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[9]) },
+	{ "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[10]) },
+	{ "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[11]) },
+	{ "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[12]) },
+	{ "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[13]) },
+	{ "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[14]) },
+	{ "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[15]) },
+	{ "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[16]) },
+	{ "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[17]) },
+	{ "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[18]) },
+	{ "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[19]) },
+	{ "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[20]) },
+	{ "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[21]) },
+	{ "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[22]) },
+	{ "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[23]) },
+	{ "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[24]) },
+	{ "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[25]) },
+	{ "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[26]) },
+	{ "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[27]) },
+	{ "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[28]) },
+	{ "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[29]) },
+	{ "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[30]) },
+	{ "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[31]) },
+
+	{ "f0", GDB_SIZEOF_FLOAT_REG, 0 },
+	{ "f1", GDB_SIZEOF_FLOAT_REG, 1 },
+	{ "f2", GDB_SIZEOF_FLOAT_REG, 2 },
+	{ "f3", GDB_SIZEOF_FLOAT_REG, 3 },
+	{ "f4", GDB_SIZEOF_FLOAT_REG, 4 },
+	{ "f5", GDB_SIZEOF_FLOAT_REG, 5 },
+	{ "f6", GDB_SIZEOF_FLOAT_REG, 6 },
+	{ "f7", GDB_SIZEOF_FLOAT_REG, 7 },
+	{ "f8", GDB_SIZEOF_FLOAT_REG, 8 },
+	{ "f9", GDB_SIZEOF_FLOAT_REG, 9 },
+	{ "f10", GDB_SIZEOF_FLOAT_REG, 10 },
+	{ "f11", GDB_SIZEOF_FLOAT_REG, 11 },
+	{ "f12", GDB_SIZEOF_FLOAT_REG, 12 },
+	{ "f13", GDB_SIZEOF_FLOAT_REG, 13 },
+	{ "f14", GDB_SIZEOF_FLOAT_REG, 14 },
+	{ "f15", GDB_SIZEOF_FLOAT_REG, 15 },
+	{ "f16", GDB_SIZEOF_FLOAT_REG, 16 },
+	{ "f17", GDB_SIZEOF_FLOAT_REG, 17 },
+	{ "f18", GDB_SIZEOF_FLOAT_REG, 18 },
+	{ "f19", GDB_SIZEOF_FLOAT_REG, 19 },
+	{ "f20", GDB_SIZEOF_FLOAT_REG, 20 },
+	{ "f21", GDB_SIZEOF_FLOAT_REG, 21 },
+	{ "f22", GDB_SIZEOF_FLOAT_REG, 22 },
+	{ "f23", GDB_SIZEOF_FLOAT_REG, 23 },
+	{ "f24", GDB_SIZEOF_FLOAT_REG, 24 },
+	{ "f25", GDB_SIZEOF_FLOAT_REG, 25 },
+	{ "f26", GDB_SIZEOF_FLOAT_REG, 26 },
+	{ "f27", GDB_SIZEOF_FLOAT_REG, 27 },
+	{ "f28", GDB_SIZEOF_FLOAT_REG, 28 },
+	{ "f29", GDB_SIZEOF_FLOAT_REG, 29 },
+	{ "f30", GDB_SIZEOF_FLOAT_REG, 30 },
+	{ "f31", GDB_SIZEOF_FLOAT_REG, 31 },
+
+	{ "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, nip) },
+	{ "msr", GDB_SIZEOF_REG, offsetof(struct pt_regs, msr) },
+	{ "cr", GDB_SIZEOF_REG_U32, offsetof(struct pt_regs, ccr) },
+	{ "lr", GDB_SIZEOF_REG, offsetof(struct pt_regs, link) },
+	{ "ctr", GDB_SIZEOF_REG_U32, offsetof(struct pt_regs, ctr) },
+	{ "xer", GDB_SIZEOF_REG, offsetof(struct pt_regs, xer) },
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+	if (regno >= DBG_MAX_REG_NUM || regno < 0)
+		return NULL;
+
+	if (regno < 32 || regno >= 64)
+		/* First 0 -> 31 gpr registers*/
+		/* pc, msr, ls... registers 64 -> 69 */
+		memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
+				dbg_reg_def[regno].size);
+
+	if (regno >= 32 && regno < 64) {
+		/* FP registers 32 -> 63 */
+#if defined(CONFIG_PPC_85xx) && defined(CONFIG_SPE)
+		if (current)
+			memcpy(mem, &current->thread.evr[regno-32],
+					dbg_reg_def[regno].size);
+#else
+		/* fp registers not used by kernel, leave zero */
+		memset(mem, 0, dbg_reg_def[regno].size);
+#endif
+	}
+
+	return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+	if (regno >= DBG_MAX_REG_NUM || regno < 0)
+		return -EINVAL;
+
+	if (regno < 32 || regno >= 64)
+		/* First 0 -> 31 gpr registers*/
+		/* pc, msr, ls... registers 64 -> 69 */
+		memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
+				dbg_reg_def[regno].size);
+
+	if (regno >= 32 && regno < 64) {
+		/* FP registers 32 -> 63 */
+#if defined(CONFIG_PPC_85xx) && defined(CONFIG_SPE)
+		memcpy(&current->thread.evr[regno-32], mem,
+				dbg_reg_def[regno].size);
+#else
+		/* fp registers not used by kernel, leave zero */
+		return 0;
+#endif
+	}
+
+	return 0;
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+	regs_set_return_ip(regs, pc);
+}
+
+/*
+ * This function does PowerPC specific processing for interfacing to gdb.
+ */
+int kgdb_arch_handle_exception(int vector, int signo, int err_code,
+			       char *remcom_in_buffer, char *remcom_out_buffer,
+			       struct pt_regs *linux_regs)
+{
+	char *ptr = &remcom_in_buffer[1];
+	unsigned long addr;
+
+	switch (remcom_in_buffer[0]) {
+		/*
+		 * sAA..AA   Step one instruction from AA..AA
+		 * This will return an error to gdb ..
+		 */
+	case 's':
+	case 'c':
+		/* handle the optional parameter */
+		if (kgdb_hex2long(&ptr, &addr))
+			regs_set_return_ip(linux_regs, addr);
+
+		atomic_set(&kgdb_cpu_doing_single_step, -1);
+		/* set the trace bit if we're stepping */
+		if (remcom_in_buffer[0] == 's') {
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+			mtspr(SPRN_DBCR0,
+			      mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
+			regs_set_return_msr(linux_regs, linux_regs->msr | MSR_DE);
+#else
+			regs_set_return_msr(linux_regs, linux_regs->msr | MSR_SE);
+#endif
+			atomic_set(&kgdb_cpu_doing_single_step,
+				   raw_smp_processor_id());
+		}
+		return 0;
+	}
+
+	return -1;
+}
+
+int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
+{
+	u32 instr, *addr = (u32 *)bpt->bpt_addr;
+	int err;
+
+	err = get_kernel_nofault(instr, addr);
+	if (err)
+		return err;
+
+	err = patch_instruction(addr, ppc_inst(BREAK_INSTR));
+	if (err)
+		return -EFAULT;
+
+	*(u32 *)bpt->saved_instr = instr;
+
+	return 0;
+}
+
+int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
+{
+	int err;
+	unsigned int instr = *(unsigned int *)bpt->saved_instr;
+	u32 *addr = (u32 *)bpt->bpt_addr;
+
+	err = patch_instruction(addr, ppc_inst(instr));
+	if (err)
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ * Global data
+ */
+const struct kgdb_arch arch_kgdb_ops;
+
+static int kgdb_not_implemented(struct pt_regs *regs)
+{
+	return 0;
+}
+
+static void *old__debugger_ipi;
+static void *old__debugger;
+static void *old__debugger_bpt;
+static void *old__debugger_sstep;
+static void *old__debugger_iabr_match;
+static void *old__debugger_break_match;
+static void *old__debugger_fault_handler;
+
+int kgdb_arch_init(void)
+{
+	old__debugger_ipi = __debugger_ipi;
+	old__debugger = __debugger;
+	old__debugger_bpt = __debugger_bpt;
+	old__debugger_sstep = __debugger_sstep;
+	old__debugger_iabr_match = __debugger_iabr_match;
+	old__debugger_break_match = __debugger_break_match;
+	old__debugger_fault_handler = __debugger_fault_handler;
+
+	__debugger_ipi = kgdb_debugger_ipi;
+	__debugger = kgdb_debugger;
+	__debugger_bpt = kgdb_handle_breakpoint;
+	__debugger_sstep = kgdb_singlestep;
+	__debugger_iabr_match = kgdb_iabr_match;
+	__debugger_break_match = kgdb_break_match;
+	__debugger_fault_handler = kgdb_not_implemented;
+
+	return 0;
+}
+
+void kgdb_arch_exit(void)
+{
+	__debugger_ipi = old__debugger_ipi;
+	__debugger = old__debugger;
+	__debugger_bpt = old__debugger_bpt;
+	__debugger_sstep = old__debugger_sstep;
+	__debugger_iabr_match = old__debugger_iabr_match;
+	__debugger_break_match = old__debugger_break_match;
+	__debugger_fault_handler = old__debugger_fault_handler;
+}
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
new file mode 100644
index 0000000000..072ebe7f29
--- /dev/null
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Dynamic Ftrace based Kprobes Optimization
+ *
+ * Copyright (C) Hitachi Ltd., 2012
+ * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+ *		  IBM Corporation
+ */
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/hardirq.h>
+#include <linux/preempt.h>
+#include <linux/ftrace.h>
+
+/* Ftrace callback handler for kprobes */
+void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
+			   struct ftrace_ops *ops, struct ftrace_regs *fregs)
+{
+	struct kprobe *p;
+	struct kprobe_ctlblk *kcb;
+	struct pt_regs *regs;
+	int bit;
+
+	bit = ftrace_test_recursion_trylock(nip, parent_nip);
+	if (bit < 0)
+		return;
+
+	regs = ftrace_get_regs(fregs);
+	p = get_kprobe((kprobe_opcode_t *)nip);
+	if (unlikely(!p) || kprobe_disabled(p))
+		goto out;
+
+	kcb = get_kprobe_ctlblk();
+	if (kprobe_running()) {
+		kprobes_inc_nmissed_count(p);
+	} else {
+		/*
+		 * On powerpc, NIP is *before* this instruction for the
+		 * pre handler
+		 */
+		regs_add_return_ip(regs, -MCOUNT_INSN_SIZE);
+
+		__this_cpu_write(current_kprobe, p);
+		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+		if (!p->pre_handler || !p->pre_handler(p, regs)) {
+			/*
+			 * Emulate singlestep (and also recover regs->nip)
+			 * as if there is a nop
+			 */
+			regs_add_return_ip(regs, MCOUNT_INSN_SIZE);
+			if (unlikely(p->post_handler)) {
+				kcb->kprobe_status = KPROBE_HIT_SSDONE;
+				p->post_handler(p, regs, 0);
+			}
+		}
+		/*
+		 * If pre_handler returns !0, it changes regs->nip. We have to
+		 * skip emulating post_handler.
+		 */
+		__this_cpu_write(current_kprobe, NULL);
+	}
+out:
+	ftrace_test_recursion_unlock(bit);
+}
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
+
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+	p->ainsn.insn = NULL;
+	p->ainsn.boostable = -1;
+	return 0;
+}
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
new file mode 100644
index 0000000000..b20ee72e87
--- /dev/null
+++ b/arch/powerpc/kernel/kprobes.c
@@ -0,0 +1,573 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Kernel Probes (KProbes)
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ *		Probes initial implementation ( includes contributions from
+ *		Rusty Russell).
+ * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
+ *		interface to access function arguments.
+ * 2004-Nov	Ananth N Mavinakayanahalli <ananth@in.ibm.com> kprobes port
+ *		for PPC64
+ */
+
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <linux/extable.h>
+#include <linux/kdebug.h>
+#include <linux/slab.h>
+#include <linux/moduleloader.h>
+#include <linux/set_memory.h>
+#include <asm/code-patching.h>
+#include <asm/cacheflush.h>
+#include <asm/sstep.h>
+#include <asm/sections.h>
+#include <asm/inst.h>
+#include <linux/uaccess.h>
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+
+bool arch_within_kprobe_blacklist(unsigned long addr)
+{
+	return  (addr >= (unsigned long)__kprobes_text_start &&
+		 addr < (unsigned long)__kprobes_text_end) ||
+		(addr >= (unsigned long)_stext &&
+		 addr < (unsigned long)__head_end);
+}
+
+kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset)
+{
+	kprobe_opcode_t *addr = NULL;
+
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+	/* PPC64 ABIv2 needs local entry point */
+	addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
+	if (addr && !offset) {
+#ifdef CONFIG_KPROBES_ON_FTRACE
+		unsigned long faddr;
+		/*
+		 * Per livepatch.h, ftrace location is always within the first
+		 * 16 bytes of a function on powerpc with -mprofile-kernel.
+		 */
+		faddr = ftrace_location_range((unsigned long)addr,
+					      (unsigned long)addr + 16);
+		if (faddr)
+			addr = (kprobe_opcode_t *)faddr;
+		else
+#endif
+			addr = (kprobe_opcode_t *)ppc_function_entry(addr);
+	}
+#elif defined(CONFIG_PPC64_ELF_ABI_V1)
+	/*
+	 * 64bit powerpc ABIv1 uses function descriptors:
+	 * - Check for the dot variant of the symbol first.
+	 * - If that fails, try looking up the symbol provided.
+	 *
+	 * This ensures we always get to the actual symbol and not
+	 * the descriptor.
+	 *
+	 * Also handle <module:symbol> format.
+	 */
+	char dot_name[MODULE_NAME_LEN + 1 + KSYM_NAME_LEN];
+	bool dot_appended = false;
+	const char *c;
+	ssize_t ret = 0;
+	int len = 0;
+
+	if ((c = strnchr(name, MODULE_NAME_LEN, ':')) != NULL) {
+		c++;
+		len = c - name;
+		memcpy(dot_name, name, len);
+	} else
+		c = name;
+
+	if (*c != '\0' && *c != '.') {
+		dot_name[len++] = '.';
+		dot_appended = true;
+	}
+	ret = strscpy(dot_name + len, c, KSYM_NAME_LEN);
+	if (ret > 0)
+		addr = (kprobe_opcode_t *)kallsyms_lookup_name(dot_name);
+
+	/* Fallback to the original non-dot symbol lookup */
+	if (!addr && dot_appended)
+		addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
+#else
+	addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
+#endif
+
+	return addr;
+}
+
+static bool arch_kprobe_on_func_entry(unsigned long offset)
+{
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+#ifdef CONFIG_KPROBES_ON_FTRACE
+	return offset <= 16;
+#else
+	return offset <= 8;
+#endif
+#else
+	return !offset;
+#endif
+}
+
+/* XXX try and fold the magic of kprobe_lookup_name() in this */
+kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset,
+					 bool *on_func_entry)
+{
+	*on_func_entry = arch_kprobe_on_func_entry(offset);
+	return (kprobe_opcode_t *)(addr + offset);
+}
+
+void *alloc_insn_page(void)
+{
+	void *page;
+
+	page = module_alloc(PAGE_SIZE);
+	if (!page)
+		return NULL;
+
+	if (strict_module_rwx_enabled())
+		set_memory_rox((unsigned long)page, 1);
+
+	return page;
+}
+
+int arch_prepare_kprobe(struct kprobe *p)
+{
+	int ret = 0;
+	struct kprobe *prev;
+	ppc_inst_t insn = ppc_inst_read(p->addr);
+
+	if ((unsigned long)p->addr & 0x03) {
+		printk("Attempt to register kprobe at an unaligned address\n");
+		ret = -EINVAL;
+	} else if (!can_single_step(ppc_inst_val(insn))) {
+		printk("Cannot register a kprobe on instructions that can't be single stepped\n");
+		ret = -EINVAL;
+	} else if ((unsigned long)p->addr & ~PAGE_MASK &&
+		   ppc_inst_prefixed(ppc_inst_read(p->addr - 1))) {
+		printk("Cannot register a kprobe on the second word of prefixed instruction\n");
+		ret = -EINVAL;
+	}
+	prev = get_kprobe(p->addr - 1);
+
+	/*
+	 * When prev is a ftrace-based kprobe, we don't have an insn, and it
+	 * doesn't probe for prefixed instruction.
+	 */
+	if (prev && !kprobe_ftrace(prev) &&
+	    ppc_inst_prefixed(ppc_inst_read(prev->ainsn.insn))) {
+		printk("Cannot register a kprobe on the second word of prefixed instruction\n");
+		ret = -EINVAL;
+	}
+
+	/* insn must be on a special executable page on ppc64.  This is
+	 * not explicitly required on ppc32 (right now), but it doesn't hurt */
+	if (!ret) {
+		p->ainsn.insn = get_insn_slot();
+		if (!p->ainsn.insn)
+			ret = -ENOMEM;
+	}
+
+	if (!ret) {
+		patch_instruction(p->ainsn.insn, insn);
+		p->opcode = ppc_inst_val(insn);
+	}
+
+	p->ainsn.boostable = 0;
+	return ret;
+}
+NOKPROBE_SYMBOL(arch_prepare_kprobe);
+
+void arch_arm_kprobe(struct kprobe *p)
+{
+	WARN_ON_ONCE(patch_instruction(p->addr, ppc_inst(BREAKPOINT_INSTRUCTION)));
+}
+NOKPROBE_SYMBOL(arch_arm_kprobe);
+
+void arch_disarm_kprobe(struct kprobe *p)
+{
+	WARN_ON_ONCE(patch_instruction(p->addr, ppc_inst(p->opcode)));
+}
+NOKPROBE_SYMBOL(arch_disarm_kprobe);
+
+void arch_remove_kprobe(struct kprobe *p)
+{
+	if (p->ainsn.insn) {
+		free_insn_slot(p->ainsn.insn, 0);
+		p->ainsn.insn = NULL;
+	}
+}
+NOKPROBE_SYMBOL(arch_remove_kprobe);
+
+static nokprobe_inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+	enable_single_step(regs);
+
+	/*
+	 * On powerpc we should single step on the original
+	 * instruction even if the probed insn is a trap
+	 * variant as values in regs could play a part in
+	 * if the trap is taken or not
+	 */
+	regs_set_return_ip(regs, (unsigned long)p->ainsn.insn);
+}
+
+static nokprobe_inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	kcb->prev_kprobe.kp = kprobe_running();
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+	kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr;
+}
+
+static nokprobe_inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+	kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr;
+}
+
+static nokprobe_inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+				struct kprobe_ctlblk *kcb)
+{
+	__this_cpu_write(current_kprobe, p);
+	kcb->kprobe_saved_msr = regs->msr;
+}
+
+void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+	ri->ret_addr = (kprobe_opcode_t *)regs->link;
+	ri->fp = NULL;
+
+	/* Replace the return addr with trampoline addr */
+	regs->link = (unsigned long)__kretprobe_trampoline;
+}
+NOKPROBE_SYMBOL(arch_prepare_kretprobe);
+
+static int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
+{
+	int ret;
+	ppc_inst_t insn = ppc_inst_read(p->ainsn.insn);
+
+	/* regs->nip is also adjusted if emulate_step returns 1 */
+	ret = emulate_step(regs, insn);
+	if (ret > 0) {
+		/*
+		 * Once this instruction has been boosted
+		 * successfully, set the boostable flag
+		 */
+		if (unlikely(p->ainsn.boostable == 0))
+			p->ainsn.boostable = 1;
+	} else if (ret < 0) {
+		/*
+		 * We don't allow kprobes on mtmsr(d)/rfi(d), etc.
+		 * So, we should never get here... but, its still
+		 * good to catch them, just in case...
+		 */
+		printk("Can't step on instruction %08lx\n", ppc_inst_as_ulong(insn));
+		BUG();
+	} else {
+		/*
+		 * If we haven't previously emulated this instruction, then it
+		 * can't be boosted. Note it down so we don't try to do so again.
+		 *
+		 * If, however, we had emulated this instruction in the past,
+		 * then this is just an error with the current run (for
+		 * instance, exceptions due to a load/store). We return 0 so
+		 * that this is now single-stepped, but continue to try
+		 * emulating it in subsequent probe hits.
+		 */
+		if (unlikely(p->ainsn.boostable != 1))
+			p->ainsn.boostable = -1;
+	}
+
+	return ret;
+}
+NOKPROBE_SYMBOL(try_to_emulate);
+
+int kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *p;
+	int ret = 0;
+	unsigned int *addr = (unsigned int *)regs->nip;
+	struct kprobe_ctlblk *kcb;
+
+	if (user_mode(regs))
+		return 0;
+
+	if (!IS_ENABLED(CONFIG_BOOKE) &&
+	    (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR)))
+		return 0;
+
+	/*
+	 * We don't want to be preempted for the entire
+	 * duration of kprobe processing
+	 */
+	preempt_disable();
+	kcb = get_kprobe_ctlblk();
+
+	p = get_kprobe(addr);
+	if (!p) {
+		unsigned int instr;
+
+		if (get_kernel_nofault(instr, addr))
+			goto no_kprobe;
+
+		if (instr != BREAKPOINT_INSTRUCTION) {
+			/*
+			 * PowerPC has multiple variants of the "trap"
+			 * instruction. If the current instruction is a
+			 * trap variant, it could belong to someone else
+			 */
+			if (is_trap(instr))
+				goto no_kprobe;
+			/*
+			 * The breakpoint instruction was removed right
+			 * after we hit it.  Another cpu has removed
+			 * either a probepoint or a debugger breakpoint
+			 * at this address.  In either case, no further
+			 * handling of this interrupt is appropriate.
+			 */
+			ret = 1;
+		}
+		/* Not one of ours: let kernel handle it */
+		goto no_kprobe;
+	}
+
+	/* Check we're not actually recursing */
+	if (kprobe_running()) {
+		kprobe_opcode_t insn = *p->ainsn.insn;
+		if (kcb->kprobe_status == KPROBE_HIT_SS && is_trap(insn)) {
+			/* Turn off 'trace' bits */
+			regs_set_return_msr(regs,
+				(regs->msr & ~MSR_SINGLESTEP) |
+				kcb->kprobe_saved_msr);
+			goto no_kprobe;
+		}
+
+		/*
+		 * We have reentered the kprobe_handler(), since another probe
+		 * was hit while within the handler. We here save the original
+		 * kprobes variables and just single step on the instruction of
+		 * the new probe without calling any user handlers.
+		 */
+		save_previous_kprobe(kcb);
+		set_current_kprobe(p, regs, kcb);
+		kprobes_inc_nmissed_count(p);
+		kcb->kprobe_status = KPROBE_REENTER;
+		if (p->ainsn.boostable >= 0) {
+			ret = try_to_emulate(p, regs);
+
+			if (ret > 0) {
+				restore_previous_kprobe(kcb);
+				preempt_enable();
+				return 1;
+			}
+		}
+		prepare_singlestep(p, regs);
+		return 1;
+	}
+
+	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+	set_current_kprobe(p, regs, kcb);
+	if (p->pre_handler && p->pre_handler(p, regs)) {
+		/* handler changed execution path, so skip ss setup */
+		reset_current_kprobe();
+		preempt_enable();
+		return 1;
+	}
+
+	if (p->ainsn.boostable >= 0) {
+		ret = try_to_emulate(p, regs);
+
+		if (ret > 0) {
+			if (p->post_handler)
+				p->post_handler(p, regs, 0);
+
+			kcb->kprobe_status = KPROBE_HIT_SSDONE;
+			reset_current_kprobe();
+			preempt_enable();
+			return 1;
+		}
+	}
+	prepare_singlestep(p, regs);
+	kcb->kprobe_status = KPROBE_HIT_SS;
+	return 1;
+
+no_kprobe:
+	preempt_enable();
+	return ret;
+}
+NOKPROBE_SYMBOL(kprobe_handler);
+
+/*
+ * Function return probe trampoline:
+ * 	- init_kprobes() establishes a probepoint here
+ * 	- When the probed function returns, this probe
+ * 		causes the handlers to fire
+ */
+asm(".global __kretprobe_trampoline\n"
+	".type __kretprobe_trampoline, @function\n"
+	"__kretprobe_trampoline:\n"
+	"nop\n"
+	"blr\n"
+	".size __kretprobe_trampoline, .-__kretprobe_trampoline\n");
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	unsigned long orig_ret_address;
+
+	orig_ret_address = __kretprobe_trampoline_handler(regs, NULL);
+	/*
+	 * We get here through one of two paths:
+	 * 1. by taking a trap -> kprobe_handler() -> here
+	 * 2. by optprobe branch -> optimized_callback() -> opt_pre_handler() -> here
+	 *
+	 * When going back through (1), we need regs->nip to be setup properly
+	 * as it is used to determine the return address from the trap.
+	 * For (2), since nip is not honoured with optprobes, we instead setup
+	 * the link register properly so that the subsequent 'blr' in
+	 * __kretprobe_trampoline jumps back to the right instruction.
+	 *
+	 * For nip, we should set the address to the previous instruction since
+	 * we end up emulating it in kprobe_handler(), which increments the nip
+	 * again.
+	 */
+	regs_set_return_ip(regs, orig_ret_address - 4);
+	regs->link = orig_ret_address;
+
+	return 0;
+}
+NOKPROBE_SYMBOL(trampoline_probe_handler);
+
+/*
+ * Called after single-stepping.  p->addr is the address of the
+ * instruction whose first byte has been replaced by the "breakpoint"
+ * instruction.  To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction.  The address of this
+ * copy is p->ainsn.insn.
+ */
+int kprobe_post_handler(struct pt_regs *regs)
+{
+	int len;
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (!cur || user_mode(regs))
+		return 0;
+
+	len = ppc_inst_len(ppc_inst_read(cur->ainsn.insn));
+	/* make sure we got here for instruction we have a kprobe on */
+	if (((unsigned long)cur->ainsn.insn + len) != regs->nip)
+		return 0;
+
+	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		cur->post_handler(cur, regs, 0);
+	}
+
+	/* Adjust nip to after the single-stepped instruction */
+	regs_set_return_ip(regs, (unsigned long)cur->addr + len);
+	regs_set_return_msr(regs, regs->msr | kcb->kprobe_saved_msr);
+
+	/*Restore back the original saved kprobes variables and continue. */
+	if (kcb->kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe(kcb);
+		goto out;
+	}
+	reset_current_kprobe();
+out:
+	preempt_enable();
+
+	/*
+	 * if somebody else is singlestepping across a probe point, msr
+	 * will have DE/SE set, in which case, continue the remaining processing
+	 * of do_debug, as if this is not a probe hit.
+	 */
+	if (regs->msr & MSR_SINGLESTEP)
+		return 0;
+
+	return 1;
+}
+NOKPROBE_SYMBOL(kprobe_post_handler);
+
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	const struct exception_table_entry *entry;
+
+	switch(kcb->kprobe_status) {
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		/*
+		 * We are here because the instruction being single
+		 * stepped caused a page fault. We reset the current
+		 * kprobe and the nip points back to the probe address
+		 * and allow the page fault handler to continue as a
+		 * normal page fault.
+		 */
+		regs_set_return_ip(regs, (unsigned long)cur->addr);
+		/* Turn off 'trace' bits */
+		regs_set_return_msr(regs,
+			(regs->msr & ~MSR_SINGLESTEP) |
+			kcb->kprobe_saved_msr);
+		if (kcb->kprobe_status == KPROBE_REENTER)
+			restore_previous_kprobe(kcb);
+		else
+			reset_current_kprobe();
+		preempt_enable();
+		break;
+	case KPROBE_HIT_ACTIVE:
+	case KPROBE_HIT_SSDONE:
+		/*
+		 * In case the user-specified fault handler returned
+		 * zero, try to fix up.
+		 */
+		if ((entry = search_exception_tables(regs->nip)) != NULL) {
+			regs_set_return_ip(regs, extable_fixup(entry));
+			return 1;
+		}
+
+		/*
+		 * fixup_exception() could not handle it,
+		 * Let do_page_fault() fix it.
+		 */
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+NOKPROBE_SYMBOL(kprobe_fault_handler);
+
+static struct kprobe trampoline_p = {
+	.addr = (kprobe_opcode_t *) &__kretprobe_trampoline,
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+	return register_kprobe(&trampoline_p);
+}
+
+int arch_trampoline_kprobe(struct kprobe *p)
+{
+	if (p->addr == (kprobe_opcode_t *)&__kretprobe_trampoline)
+		return 1;
+
+	return 0;
+}
+NOKPROBE_SYMBOL(arch_trampoline_kprobe);
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
new file mode 100644
index 0000000000..5b3c093611
--- /dev/null
+++ b/arch/powerpc/kernel/kvm.c
@@ -0,0 +1,723 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
+ * Copyright 2010-2011 Freescale Semiconductor, Inc.
+ *
+ * Authors:
+ *     Alexander Graf <agraf@suse.de>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/kmemleak.h>
+#include <linux/kvm_para.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/pagemap.h>
+
+#include <asm/reg.h>
+#include <asm/sections.h>
+#include <asm/cacheflush.h>
+#include <asm/disassemble.h>
+#include <asm/ppc-opcode.h>
+#include <asm/epapr_hcalls.h>
+
+#define KVM_MAGIC_PAGE		(-4096L)
+#define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x)
+
+#define KVM_INST_LWZ		0x80000000
+#define KVM_INST_STW		0x90000000
+#define KVM_INST_LD		0xe8000000
+#define KVM_INST_STD		0xf8000000
+#define KVM_INST_NOP		0x60000000
+#define KVM_INST_B		0x48000000
+#define KVM_INST_B_MASK		0x03ffffff
+#define KVM_INST_B_MAX		0x01ffffff
+#define KVM_INST_LI		0x38000000
+
+#define KVM_MASK_RT		0x03e00000
+#define KVM_RT_30		0x03c00000
+#define KVM_MASK_RB		0x0000f800
+#define KVM_INST_MFMSR		0x7c0000a6
+
+#define SPR_FROM		0
+#define SPR_TO			0x100
+
+#define KVM_INST_SPR(sprn, moveto) (0x7c0002a6 | \
+				    (((sprn) & 0x1f) << 16) | \
+				    (((sprn) & 0x3e0) << 6) | \
+				    (moveto))
+
+#define KVM_INST_MFSPR(sprn)	KVM_INST_SPR(sprn, SPR_FROM)
+#define KVM_INST_MTSPR(sprn)	KVM_INST_SPR(sprn, SPR_TO)
+
+#define KVM_INST_TLBSYNC	0x7c00046c
+#define KVM_INST_MTMSRD_L0	0x7c000164
+#define KVM_INST_MTMSRD_L1	0x7c010164
+#define KVM_INST_MTMSR		0x7c000124
+
+#define KVM_INST_WRTEE		0x7c000106
+#define KVM_INST_WRTEEI_0	0x7c000146
+#define KVM_INST_WRTEEI_1	0x7c008146
+
+#define KVM_INST_MTSRIN		0x7c0001e4
+
+static bool kvm_patching_worked = true;
+extern char kvm_tmp[];
+extern char kvm_tmp_end[];
+static int kvm_tmp_index;
+
+static void __init kvm_patch_ins(u32 *inst, u32 new_inst)
+{
+	*inst = new_inst;
+	flush_icache_range((ulong)inst, (ulong)inst + 4);
+}
+
+static void __init kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
+{
+#ifdef CONFIG_64BIT
+	kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
+#else
+	kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000fffc));
+#endif
+}
+
+static void __init kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
+{
+#ifdef CONFIG_64BIT
+	kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
+#else
+	kvm_patch_ins(inst, KVM_INST_LWZ | rt | ((addr + 4) & 0x0000fffc));
+#endif
+}
+
+static void __init kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
+{
+	kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000ffff));
+}
+
+static void __init kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
+{
+#ifdef CONFIG_64BIT
+	kvm_patch_ins(inst, KVM_INST_STD | rt | (addr & 0x0000fffc));
+#else
+	kvm_patch_ins(inst, KVM_INST_STW | rt | ((addr + 4) & 0x0000fffc));
+#endif
+}
+
+static void __init kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
+{
+	kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc));
+}
+
+static void __init kvm_patch_ins_nop(u32 *inst)
+{
+	kvm_patch_ins(inst, KVM_INST_NOP);
+}
+
+static void __init kvm_patch_ins_b(u32 *inst, int addr)
+{
+#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC_BOOK3S)
+	/* On relocatable kernels interrupts handlers and our code
+	   can be in different regions, so we don't patch them */
+
+	if ((ulong)inst < (ulong)&__end_interrupts)
+		return;
+#endif
+
+	kvm_patch_ins(inst, KVM_INST_B | (addr & KVM_INST_B_MASK));
+}
+
+static u32 * __init kvm_alloc(int len)
+{
+	u32 *p;
+
+	if ((kvm_tmp_index + len) > (kvm_tmp_end - kvm_tmp)) {
+		printk(KERN_ERR "KVM: No more space (%d + %d)\n",
+				kvm_tmp_index, len);
+		kvm_patching_worked = false;
+		return NULL;
+	}
+
+	p = (void*)&kvm_tmp[kvm_tmp_index];
+	kvm_tmp_index += len;
+
+	return p;
+}
+
+extern u32 kvm_emulate_mtmsrd_branch_offs;
+extern u32 kvm_emulate_mtmsrd_reg_offs;
+extern u32 kvm_emulate_mtmsrd_orig_ins_offs;
+extern u32 kvm_emulate_mtmsrd_len;
+extern u32 kvm_emulate_mtmsrd[];
+
+static void __init kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
+{
+	u32 *p;
+	int distance_start;
+	int distance_end;
+	ulong next_inst;
+
+	p = kvm_alloc(kvm_emulate_mtmsrd_len * 4);
+	if (!p)
+		return;
+
+	/* Find out where we are and put everything there */
+	distance_start = (ulong)p - (ulong)inst;
+	next_inst = ((ulong)inst + 4);
+	distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsrd_branch_offs];
+
+	/* Make sure we only write valid b instructions */
+	if (distance_start > KVM_INST_B_MAX) {
+		kvm_patching_worked = false;
+		return;
+	}
+
+	/* Modify the chunk to fit the invocation */
+	memcpy(p, kvm_emulate_mtmsrd, kvm_emulate_mtmsrd_len * 4);
+	p[kvm_emulate_mtmsrd_branch_offs] |= distance_end & KVM_INST_B_MASK;
+	switch (get_rt(rt)) {
+	case 30:
+		kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs],
+				 magic_var(scratch2), KVM_RT_30);
+		break;
+	case 31:
+		kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs],
+				 magic_var(scratch1), KVM_RT_30);
+		break;
+	default:
+		p[kvm_emulate_mtmsrd_reg_offs] |= rt;
+		break;
+	}
+
+	p[kvm_emulate_mtmsrd_orig_ins_offs] = *inst;
+	flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsrd_len * 4);
+
+	/* Patch the invocation */
+	kvm_patch_ins_b(inst, distance_start);
+}
+
+extern u32 kvm_emulate_mtmsr_branch_offs;
+extern u32 kvm_emulate_mtmsr_reg1_offs;
+extern u32 kvm_emulate_mtmsr_reg2_offs;
+extern u32 kvm_emulate_mtmsr_orig_ins_offs;
+extern u32 kvm_emulate_mtmsr_len;
+extern u32 kvm_emulate_mtmsr[];
+
+static void __init kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
+{
+	u32 *p;
+	int distance_start;
+	int distance_end;
+	ulong next_inst;
+
+	p = kvm_alloc(kvm_emulate_mtmsr_len * 4);
+	if (!p)
+		return;
+
+	/* Find out where we are and put everything there */
+	distance_start = (ulong)p - (ulong)inst;
+	next_inst = ((ulong)inst + 4);
+	distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsr_branch_offs];
+
+	/* Make sure we only write valid b instructions */
+	if (distance_start > KVM_INST_B_MAX) {
+		kvm_patching_worked = false;
+		return;
+	}
+
+	/* Modify the chunk to fit the invocation */
+	memcpy(p, kvm_emulate_mtmsr, kvm_emulate_mtmsr_len * 4);
+	p[kvm_emulate_mtmsr_branch_offs] |= distance_end & KVM_INST_B_MASK;
+
+	/* Make clobbered registers work too */
+	switch (get_rt(rt)) {
+	case 30:
+		kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs],
+				 magic_var(scratch2), KVM_RT_30);
+		kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs],
+				 magic_var(scratch2), KVM_RT_30);
+		break;
+	case 31:
+		kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs],
+				 magic_var(scratch1), KVM_RT_30);
+		kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs],
+				 magic_var(scratch1), KVM_RT_30);
+		break;
+	default:
+		p[kvm_emulate_mtmsr_reg1_offs] |= rt;
+		p[kvm_emulate_mtmsr_reg2_offs] |= rt;
+		break;
+	}
+
+	p[kvm_emulate_mtmsr_orig_ins_offs] = *inst;
+	flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsr_len * 4);
+
+	/* Patch the invocation */
+	kvm_patch_ins_b(inst, distance_start);
+}
+
+#ifdef CONFIG_BOOKE
+
+extern u32 kvm_emulate_wrtee_branch_offs;
+extern u32 kvm_emulate_wrtee_reg_offs;
+extern u32 kvm_emulate_wrtee_orig_ins_offs;
+extern u32 kvm_emulate_wrtee_len;
+extern u32 kvm_emulate_wrtee[];
+
+static void __init kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
+{
+	u32 *p;
+	int distance_start;
+	int distance_end;
+	ulong next_inst;
+
+	p = kvm_alloc(kvm_emulate_wrtee_len * 4);
+	if (!p)
+		return;
+
+	/* Find out where we are and put everything there */
+	distance_start = (ulong)p - (ulong)inst;
+	next_inst = ((ulong)inst + 4);
+	distance_end = next_inst - (ulong)&p[kvm_emulate_wrtee_branch_offs];
+
+	/* Make sure we only write valid b instructions */
+	if (distance_start > KVM_INST_B_MAX) {
+		kvm_patching_worked = false;
+		return;
+	}
+
+	/* Modify the chunk to fit the invocation */
+	memcpy(p, kvm_emulate_wrtee, kvm_emulate_wrtee_len * 4);
+	p[kvm_emulate_wrtee_branch_offs] |= distance_end & KVM_INST_B_MASK;
+
+	if (imm_one) {
+		p[kvm_emulate_wrtee_reg_offs] =
+			KVM_INST_LI | __PPC_RT(R30) | MSR_EE;
+	} else {
+		/* Make clobbered registers work too */
+		switch (get_rt(rt)) {
+		case 30:
+			kvm_patch_ins_ll(&p[kvm_emulate_wrtee_reg_offs],
+					 magic_var(scratch2), KVM_RT_30);
+			break;
+		case 31:
+			kvm_patch_ins_ll(&p[kvm_emulate_wrtee_reg_offs],
+					 magic_var(scratch1), KVM_RT_30);
+			break;
+		default:
+			p[kvm_emulate_wrtee_reg_offs] |= rt;
+			break;
+		}
+	}
+
+	p[kvm_emulate_wrtee_orig_ins_offs] = *inst;
+	flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrtee_len * 4);
+
+	/* Patch the invocation */
+	kvm_patch_ins_b(inst, distance_start);
+}
+
+extern u32 kvm_emulate_wrteei_0_branch_offs;
+extern u32 kvm_emulate_wrteei_0_len;
+extern u32 kvm_emulate_wrteei_0[];
+
+static void __init kvm_patch_ins_wrteei_0(u32 *inst)
+{
+	u32 *p;
+	int distance_start;
+	int distance_end;
+	ulong next_inst;
+
+	p = kvm_alloc(kvm_emulate_wrteei_0_len * 4);
+	if (!p)
+		return;
+
+	/* Find out where we are and put everything there */
+	distance_start = (ulong)p - (ulong)inst;
+	next_inst = ((ulong)inst + 4);
+	distance_end = next_inst - (ulong)&p[kvm_emulate_wrteei_0_branch_offs];
+
+	/* Make sure we only write valid b instructions */
+	if (distance_start > KVM_INST_B_MAX) {
+		kvm_patching_worked = false;
+		return;
+	}
+
+	memcpy(p, kvm_emulate_wrteei_0, kvm_emulate_wrteei_0_len * 4);
+	p[kvm_emulate_wrteei_0_branch_offs] |= distance_end & KVM_INST_B_MASK;
+	flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrteei_0_len * 4);
+
+	/* Patch the invocation */
+	kvm_patch_ins_b(inst, distance_start);
+}
+
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+
+extern u32 kvm_emulate_mtsrin_branch_offs;
+extern u32 kvm_emulate_mtsrin_reg1_offs;
+extern u32 kvm_emulate_mtsrin_reg2_offs;
+extern u32 kvm_emulate_mtsrin_orig_ins_offs;
+extern u32 kvm_emulate_mtsrin_len;
+extern u32 kvm_emulate_mtsrin[];
+
+static void __init kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
+{
+	u32 *p;
+	int distance_start;
+	int distance_end;
+	ulong next_inst;
+
+	p = kvm_alloc(kvm_emulate_mtsrin_len * 4);
+	if (!p)
+		return;
+
+	/* Find out where we are and put everything there */
+	distance_start = (ulong)p - (ulong)inst;
+	next_inst = ((ulong)inst + 4);
+	distance_end = next_inst - (ulong)&p[kvm_emulate_mtsrin_branch_offs];
+
+	/* Make sure we only write valid b instructions */
+	if (distance_start > KVM_INST_B_MAX) {
+		kvm_patching_worked = false;
+		return;
+	}
+
+	/* Modify the chunk to fit the invocation */
+	memcpy(p, kvm_emulate_mtsrin, kvm_emulate_mtsrin_len * 4);
+	p[kvm_emulate_mtsrin_branch_offs] |= distance_end & KVM_INST_B_MASK;
+	p[kvm_emulate_mtsrin_reg1_offs] |= (rb << 10);
+	p[kvm_emulate_mtsrin_reg2_offs] |= rt;
+	p[kvm_emulate_mtsrin_orig_ins_offs] = *inst;
+	flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtsrin_len * 4);
+
+	/* Patch the invocation */
+	kvm_patch_ins_b(inst, distance_start);
+}
+
+#endif
+
+static void __init kvm_map_magic_page(void *data)
+{
+	u32 *features = data;
+
+	ulong in[8] = {0};
+	ulong out[8];
+
+	in[0] = KVM_MAGIC_PAGE;
+	in[1] = KVM_MAGIC_PAGE | MAGIC_PAGE_FLAG_NOT_MAPPED_NX;
+
+	epapr_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE));
+
+	*features = out[0];
+}
+
+static void __init kvm_check_ins(u32 *inst, u32 features)
+{
+	u32 _inst = *inst;
+	u32 inst_no_rt = _inst & ~KVM_MASK_RT;
+	u32 inst_rt = _inst & KVM_MASK_RT;
+
+	switch (inst_no_rt) {
+	/* Loads */
+	case KVM_INST_MFMSR:
+		kvm_patch_ins_ld(inst, magic_var(msr), inst_rt);
+		break;
+	case KVM_INST_MFSPR(SPRN_SPRG0):
+		kvm_patch_ins_ld(inst, magic_var(sprg0), inst_rt);
+		break;
+	case KVM_INST_MFSPR(SPRN_SPRG1):
+		kvm_patch_ins_ld(inst, magic_var(sprg1), inst_rt);
+		break;
+	case KVM_INST_MFSPR(SPRN_SPRG2):
+		kvm_patch_ins_ld(inst, magic_var(sprg2), inst_rt);
+		break;
+	case KVM_INST_MFSPR(SPRN_SPRG3):
+		kvm_patch_ins_ld(inst, magic_var(sprg3), inst_rt);
+		break;
+	case KVM_INST_MFSPR(SPRN_SRR0):
+		kvm_patch_ins_ld(inst, magic_var(srr0), inst_rt);
+		break;
+	case KVM_INST_MFSPR(SPRN_SRR1):
+		kvm_patch_ins_ld(inst, magic_var(srr1), inst_rt);
+		break;
+#ifdef CONFIG_BOOKE
+	case KVM_INST_MFSPR(SPRN_DEAR):
+#else
+	case KVM_INST_MFSPR(SPRN_DAR):
+#endif
+		kvm_patch_ins_ld(inst, magic_var(dar), inst_rt);
+		break;
+	case KVM_INST_MFSPR(SPRN_DSISR):
+		kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt);
+		break;
+
+#ifdef CONFIG_PPC_E500
+	case KVM_INST_MFSPR(SPRN_MAS0):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_lwz(inst, magic_var(mas0), inst_rt);
+		break;
+	case KVM_INST_MFSPR(SPRN_MAS1):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_lwz(inst, magic_var(mas1), inst_rt);
+		break;
+	case KVM_INST_MFSPR(SPRN_MAS2):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_ld(inst, magic_var(mas2), inst_rt);
+		break;
+	case KVM_INST_MFSPR(SPRN_MAS3):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_lwz(inst, magic_var(mas7_3) + 4, inst_rt);
+		break;
+	case KVM_INST_MFSPR(SPRN_MAS4):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_lwz(inst, magic_var(mas4), inst_rt);
+		break;
+	case KVM_INST_MFSPR(SPRN_MAS6):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_lwz(inst, magic_var(mas6), inst_rt);
+		break;
+	case KVM_INST_MFSPR(SPRN_MAS7):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_lwz(inst, magic_var(mas7_3), inst_rt);
+		break;
+#endif /* CONFIG_PPC_E500 */
+
+	case KVM_INST_MFSPR(SPRN_SPRG4):
+#ifdef CONFIG_BOOKE
+	case KVM_INST_MFSPR(SPRN_SPRG4R):
+#endif
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_ld(inst, magic_var(sprg4), inst_rt);
+		break;
+	case KVM_INST_MFSPR(SPRN_SPRG5):
+#ifdef CONFIG_BOOKE
+	case KVM_INST_MFSPR(SPRN_SPRG5R):
+#endif
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_ld(inst, magic_var(sprg5), inst_rt);
+		break;
+	case KVM_INST_MFSPR(SPRN_SPRG6):
+#ifdef CONFIG_BOOKE
+	case KVM_INST_MFSPR(SPRN_SPRG6R):
+#endif
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_ld(inst, magic_var(sprg6), inst_rt);
+		break;
+	case KVM_INST_MFSPR(SPRN_SPRG7):
+#ifdef CONFIG_BOOKE
+	case KVM_INST_MFSPR(SPRN_SPRG7R):
+#endif
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_ld(inst, magic_var(sprg7), inst_rt);
+		break;
+
+#ifdef CONFIG_BOOKE
+	case KVM_INST_MFSPR(SPRN_ESR):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_lwz(inst, magic_var(esr), inst_rt);
+		break;
+#endif
+
+	case KVM_INST_MFSPR(SPRN_PIR):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_lwz(inst, magic_var(pir), inst_rt);
+		break;
+
+
+	/* Stores */
+	case KVM_INST_MTSPR(SPRN_SPRG0):
+		kvm_patch_ins_std(inst, magic_var(sprg0), inst_rt);
+		break;
+	case KVM_INST_MTSPR(SPRN_SPRG1):
+		kvm_patch_ins_std(inst, magic_var(sprg1), inst_rt);
+		break;
+	case KVM_INST_MTSPR(SPRN_SPRG2):
+		kvm_patch_ins_std(inst, magic_var(sprg2), inst_rt);
+		break;
+	case KVM_INST_MTSPR(SPRN_SPRG3):
+		kvm_patch_ins_std(inst, magic_var(sprg3), inst_rt);
+		break;
+	case KVM_INST_MTSPR(SPRN_SRR0):
+		kvm_patch_ins_std(inst, magic_var(srr0), inst_rt);
+		break;
+	case KVM_INST_MTSPR(SPRN_SRR1):
+		kvm_patch_ins_std(inst, magic_var(srr1), inst_rt);
+		break;
+#ifdef CONFIG_BOOKE
+	case KVM_INST_MTSPR(SPRN_DEAR):
+#else
+	case KVM_INST_MTSPR(SPRN_DAR):
+#endif
+		kvm_patch_ins_std(inst, magic_var(dar), inst_rt);
+		break;
+	case KVM_INST_MTSPR(SPRN_DSISR):
+		kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt);
+		break;
+#ifdef CONFIG_PPC_E500
+	case KVM_INST_MTSPR(SPRN_MAS0):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_stw(inst, magic_var(mas0), inst_rt);
+		break;
+	case KVM_INST_MTSPR(SPRN_MAS1):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_stw(inst, magic_var(mas1), inst_rt);
+		break;
+	case KVM_INST_MTSPR(SPRN_MAS2):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_std(inst, magic_var(mas2), inst_rt);
+		break;
+	case KVM_INST_MTSPR(SPRN_MAS3):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_stw(inst, magic_var(mas7_3) + 4, inst_rt);
+		break;
+	case KVM_INST_MTSPR(SPRN_MAS4):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_stw(inst, magic_var(mas4), inst_rt);
+		break;
+	case KVM_INST_MTSPR(SPRN_MAS6):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_stw(inst, magic_var(mas6), inst_rt);
+		break;
+	case KVM_INST_MTSPR(SPRN_MAS7):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_stw(inst, magic_var(mas7_3), inst_rt);
+		break;
+#endif /* CONFIG_PPC_E500 */
+
+	case KVM_INST_MTSPR(SPRN_SPRG4):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_std(inst, magic_var(sprg4), inst_rt);
+		break;
+	case KVM_INST_MTSPR(SPRN_SPRG5):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_std(inst, magic_var(sprg5), inst_rt);
+		break;
+	case KVM_INST_MTSPR(SPRN_SPRG6):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_std(inst, magic_var(sprg6), inst_rt);
+		break;
+	case KVM_INST_MTSPR(SPRN_SPRG7):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_std(inst, magic_var(sprg7), inst_rt);
+		break;
+
+#ifdef CONFIG_BOOKE
+	case KVM_INST_MTSPR(SPRN_ESR):
+		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+			kvm_patch_ins_stw(inst, magic_var(esr), inst_rt);
+		break;
+#endif
+
+	/* Nops */
+	case KVM_INST_TLBSYNC:
+		kvm_patch_ins_nop(inst);
+		break;
+
+	/* Rewrites */
+	case KVM_INST_MTMSRD_L1:
+		kvm_patch_ins_mtmsrd(inst, inst_rt);
+		break;
+	case KVM_INST_MTMSR:
+	case KVM_INST_MTMSRD_L0:
+		kvm_patch_ins_mtmsr(inst, inst_rt);
+		break;
+#ifdef CONFIG_BOOKE
+	case KVM_INST_WRTEE:
+		kvm_patch_ins_wrtee(inst, inst_rt, 0);
+		break;
+#endif
+	}
+
+	switch (inst_no_rt & ~KVM_MASK_RB) {
+#ifdef CONFIG_PPC_BOOK3S_32
+	case KVM_INST_MTSRIN:
+		if (features & KVM_MAGIC_FEAT_SR) {
+			u32 inst_rb = _inst & KVM_MASK_RB;
+			kvm_patch_ins_mtsrin(inst, inst_rt, inst_rb);
+		}
+		break;
+#endif
+	}
+
+	switch (_inst) {
+#ifdef CONFIG_BOOKE
+	case KVM_INST_WRTEEI_0:
+		kvm_patch_ins_wrteei_0(inst);
+		break;
+
+	case KVM_INST_WRTEEI_1:
+		kvm_patch_ins_wrtee(inst, 0, 1);
+		break;
+#endif
+	}
+}
+
+extern u32 kvm_template_start[];
+extern u32 kvm_template_end[];
+
+static void __init kvm_use_magic_page(void)
+{
+	u32 *p;
+	u32 *start, *end;
+	u32 features;
+
+	/* Tell the host to map the magic page to -4096 on all CPUs */
+	on_each_cpu(kvm_map_magic_page, &features, 1);
+
+	/* Quick self-test to see if the mapping works */
+	if (fault_in_readable((const char __user *)KVM_MAGIC_PAGE,
+			      sizeof(u32))) {
+		kvm_patching_worked = false;
+		return;
+	}
+
+	/* Now loop through all code and find instructions */
+	start = (void*)_stext;
+	end = (void*)_etext;
+
+	/*
+	 * Being interrupted in the middle of patching would
+	 * be bad for SPRG4-7, which KVM can't keep in sync
+	 * with emulated accesses because reads don't trap.
+	 */
+	local_irq_disable();
+
+	for (p = start; p < end; p++) {
+		/* Avoid patching the template code */
+		if (p >= kvm_template_start && p < kvm_template_end) {
+			p = kvm_template_end - 1;
+			continue;
+		}
+		kvm_check_ins(p, features);
+	}
+
+	local_irq_enable();
+
+	printk(KERN_INFO "KVM: Live patching for a fast VM %s\n",
+			 kvm_patching_worked ? "worked" : "failed");
+}
+
+static int __init kvm_guest_init(void)
+{
+	if (!kvm_para_available())
+		return 0;
+
+	if (!epapr_paravirt_enabled)
+		return 0;
+
+	if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
+		kvm_use_magic_page();
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* Enable napping */
+	powersave_nap = 1;
+#endif
+
+	return 0;
+}
+
+postcore_initcall(kvm_guest_init);
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
new file mode 100644
index 0000000000..7af6f8b50c
--- /dev/null
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -0,0 +1,354 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright SUSE Linux Products GmbH 2010
+ * Copyright 2010-2011 Freescale Semiconductor, Inc.
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+#include <asm/asm-compat.h>
+
+#define KVM_MAGIC_PAGE		(-4096)
+
+#ifdef CONFIG_64BIT
+#define LL64(reg, offs, reg2)	ld	reg, (offs)(reg2)
+#define STL64(reg, offs, reg2)	std	reg, (offs)(reg2)
+#else
+#define LL64(reg, offs, reg2)	lwz	reg, (offs + 4)(reg2)
+#define STL64(reg, offs, reg2)	stw	reg, (offs + 4)(reg2)
+#endif
+
+#define SCRATCH_SAVE							\
+	/* Enable critical section. We are critical if			\
+	   shared->critical == r1 */					\
+	STL64(r1, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0);		\
+									\
+	/* Save state */						\
+	PPC_STL	r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0);		\
+	PPC_STL	r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0);		\
+	mfcr	r31;							\
+	stw	r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0);
+
+#define SCRATCH_RESTORE							\
+	/* Restore state */						\
+	PPC_LL	r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0);		\
+	lwz	r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0);		\
+	mtcr	r30;							\
+	PPC_LL	r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0);		\
+									\
+	/* Disable critical section. We are critical if			\
+	   shared->critical == r1 and r2 is always != r1 */		\
+	STL64(r2, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0);
+
+.global kvm_template_start
+kvm_template_start:
+
+.global kvm_emulate_mtmsrd
+kvm_emulate_mtmsrd:
+
+	SCRATCH_SAVE
+
+	/* Put MSR & ~(MSR_EE|MSR_RI) in r31 */
+	LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+	lis	r30, (~(MSR_EE | MSR_RI))@h
+	ori	r30, r30, (~(MSR_EE | MSR_RI))@l
+	and	r31, r31, r30
+
+	/* OR the register's (MSR_EE|MSR_RI) on MSR */
+kvm_emulate_mtmsrd_reg:
+	ori	r30, r0, 0
+	andi.	r30, r30, (MSR_EE|MSR_RI)
+	or	r31, r31, r30
+
+	/* Put MSR back into magic page */
+	STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+	/* Check if we have to fetch an interrupt */
+	lwz	r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
+	cmpwi	r31, 0
+	beq+	no_check
+
+	/* Check if we may trigger an interrupt */
+	andi.	r30, r30, MSR_EE
+	beq	no_check
+
+	SCRATCH_RESTORE
+
+	/* Nag hypervisor */
+kvm_emulate_mtmsrd_orig_ins:
+	tlbsync
+
+	b	kvm_emulate_mtmsrd_branch
+
+no_check:
+
+	SCRATCH_RESTORE
+
+	/* Go back to caller */
+kvm_emulate_mtmsrd_branch:
+	b	.
+kvm_emulate_mtmsrd_end:
+
+.global kvm_emulate_mtmsrd_branch_offs
+kvm_emulate_mtmsrd_branch_offs:
+	.long (kvm_emulate_mtmsrd_branch - kvm_emulate_mtmsrd) / 4
+
+.global kvm_emulate_mtmsrd_reg_offs
+kvm_emulate_mtmsrd_reg_offs:
+	.long (kvm_emulate_mtmsrd_reg - kvm_emulate_mtmsrd) / 4
+
+.global kvm_emulate_mtmsrd_orig_ins_offs
+kvm_emulate_mtmsrd_orig_ins_offs:
+	.long (kvm_emulate_mtmsrd_orig_ins - kvm_emulate_mtmsrd) / 4
+
+.global kvm_emulate_mtmsrd_len
+kvm_emulate_mtmsrd_len:
+	.long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4
+
+
+#define MSR_SAFE_BITS (MSR_EE | MSR_RI)
+#define MSR_CRITICAL_BITS ~MSR_SAFE_BITS
+
+.global kvm_emulate_mtmsr
+kvm_emulate_mtmsr:
+
+	SCRATCH_SAVE
+
+	/* Fetch old MSR in r31 */
+	LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+	/* Find the changed bits between old and new MSR */
+kvm_emulate_mtmsr_reg1:
+	ori	r30, r0, 0
+	xor	r31, r30, r31
+
+	/* Check if we need to really do mtmsr */
+	LOAD_REG_IMMEDIATE(r30, MSR_CRITICAL_BITS)
+	and.	r31, r31, r30
+
+	/* No critical bits changed? Maybe we can stay in the guest. */
+	beq	maybe_stay_in_guest
+
+do_mtmsr:
+
+	SCRATCH_RESTORE
+
+	/* Just fire off the mtmsr if it's critical */
+kvm_emulate_mtmsr_orig_ins:
+	mtmsr	r0
+
+	b	kvm_emulate_mtmsr_branch
+
+maybe_stay_in_guest:
+
+	/* Get the target register in r30 */
+kvm_emulate_mtmsr_reg2:
+	ori	r30, r0, 0
+
+	/* Put MSR into magic page because we don't call mtmsr */
+	STL64(r30, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+	/* Check if we have to fetch an interrupt */
+	lwz	r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
+	cmpwi	r31, 0
+	beq+	no_mtmsr
+
+	/* Check if we may trigger an interrupt */
+	andi.	r31, r30, MSR_EE
+	bne	do_mtmsr
+
+no_mtmsr:
+
+	SCRATCH_RESTORE
+
+	/* Go back to caller */
+kvm_emulate_mtmsr_branch:
+	b	.
+kvm_emulate_mtmsr_end:
+
+.global kvm_emulate_mtmsr_branch_offs
+kvm_emulate_mtmsr_branch_offs:
+	.long (kvm_emulate_mtmsr_branch - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_reg1_offs
+kvm_emulate_mtmsr_reg1_offs:
+	.long (kvm_emulate_mtmsr_reg1 - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_reg2_offs
+kvm_emulate_mtmsr_reg2_offs:
+	.long (kvm_emulate_mtmsr_reg2 - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_orig_ins_offs
+kvm_emulate_mtmsr_orig_ins_offs:
+	.long (kvm_emulate_mtmsr_orig_ins - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_len
+kvm_emulate_mtmsr_len:
+	.long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4
+
+#ifdef CONFIG_BOOKE
+
+/* also used for wrteei 1 */
+.global kvm_emulate_wrtee
+kvm_emulate_wrtee:
+
+	SCRATCH_SAVE
+
+	/* Fetch old MSR in r31 */
+	LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+	/* Insert new MSR[EE] */
+kvm_emulate_wrtee_reg:
+	ori	r30, r0, 0
+	rlwimi	r31, r30, 0, MSR_EE
+
+	/*
+	 * If MSR[EE] is now set, check for a pending interrupt.
+	 * We could skip this if MSR[EE] was already on, but that
+	 * should be rare, so don't bother.
+	 */
+	andi.	r30, r30, MSR_EE
+
+	/* Put MSR into magic page because we don't call wrtee */
+	STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+	beq	no_wrtee
+
+	/* Check if we have to fetch an interrupt */
+	lwz	r30, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
+	cmpwi	r30, 0
+	bne	do_wrtee
+
+no_wrtee:
+	SCRATCH_RESTORE
+
+	/* Go back to caller */
+kvm_emulate_wrtee_branch:
+	b	.
+
+do_wrtee:
+	SCRATCH_RESTORE
+
+	/* Just fire off the wrtee if it's critical */
+kvm_emulate_wrtee_orig_ins:
+	wrtee	r0
+
+	b	kvm_emulate_wrtee_branch
+
+kvm_emulate_wrtee_end:
+
+.global kvm_emulate_wrtee_branch_offs
+kvm_emulate_wrtee_branch_offs:
+	.long (kvm_emulate_wrtee_branch - kvm_emulate_wrtee) / 4
+
+.global kvm_emulate_wrtee_reg_offs
+kvm_emulate_wrtee_reg_offs:
+	.long (kvm_emulate_wrtee_reg - kvm_emulate_wrtee) / 4
+
+.global kvm_emulate_wrtee_orig_ins_offs
+kvm_emulate_wrtee_orig_ins_offs:
+	.long (kvm_emulate_wrtee_orig_ins - kvm_emulate_wrtee) / 4
+
+.global kvm_emulate_wrtee_len
+kvm_emulate_wrtee_len:
+	.long (kvm_emulate_wrtee_end - kvm_emulate_wrtee) / 4
+
+.global kvm_emulate_wrteei_0
+kvm_emulate_wrteei_0:
+	SCRATCH_SAVE
+
+	/* Fetch old MSR in r31 */
+	LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+	/* Remove MSR_EE from old MSR */
+	rlwinm	r31, r31, 0, ~MSR_EE
+
+	/* Write new MSR value back */
+	STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+	SCRATCH_RESTORE
+
+	/* Go back to caller */
+kvm_emulate_wrteei_0_branch:
+	b	.
+kvm_emulate_wrteei_0_end:
+
+.global kvm_emulate_wrteei_0_branch_offs
+kvm_emulate_wrteei_0_branch_offs:
+	.long (kvm_emulate_wrteei_0_branch - kvm_emulate_wrteei_0) / 4
+
+.global kvm_emulate_wrteei_0_len
+kvm_emulate_wrteei_0_len:
+	.long (kvm_emulate_wrteei_0_end - kvm_emulate_wrteei_0) / 4
+
+#endif /* CONFIG_BOOKE */
+
+#ifdef CONFIG_PPC_BOOK3S_32
+
+.global kvm_emulate_mtsrin
+kvm_emulate_mtsrin:
+
+	SCRATCH_SAVE
+
+	LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+	andi.	r31, r31, MSR_DR | MSR_IR
+	beq	kvm_emulate_mtsrin_reg1
+
+	SCRATCH_RESTORE
+
+kvm_emulate_mtsrin_orig_ins:
+	nop
+	b	kvm_emulate_mtsrin_branch
+
+kvm_emulate_mtsrin_reg1:
+	/* rX >> 26 */
+	rlwinm  r30,r0,6,26,29
+
+kvm_emulate_mtsrin_reg2:
+	stw	r0, (KVM_MAGIC_PAGE + KVM_MAGIC_SR)(r30)
+
+	SCRATCH_RESTORE
+
+	/* Go back to caller */
+kvm_emulate_mtsrin_branch:
+	b	.
+kvm_emulate_mtsrin_end:
+
+.global kvm_emulate_mtsrin_branch_offs
+kvm_emulate_mtsrin_branch_offs:
+	.long (kvm_emulate_mtsrin_branch - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_reg1_offs
+kvm_emulate_mtsrin_reg1_offs:
+	.long (kvm_emulate_mtsrin_reg1 - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_reg2_offs
+kvm_emulate_mtsrin_reg2_offs:
+	.long (kvm_emulate_mtsrin_reg2 - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_orig_ins_offs
+kvm_emulate_mtsrin_orig_ins_offs:
+	.long (kvm_emulate_mtsrin_orig_ins - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_len
+kvm_emulate_mtsrin_len:
+	.long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4
+
+#endif /* CONFIG_PPC_BOOK3S_32 */
+
+	.balign 4
+	.global kvm_tmp
+kvm_tmp:
+	.space	(64 * 1024)
+
+.global kvm_tmp_end
+kvm_tmp_end:
+
+.global kvm_template_end
+kvm_template_end:
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
new file mode 100644
index 0000000000..f2e03ed423
--- /dev/null
+++ b/arch/powerpc/kernel/l2cr_6xx.S
@@ -0,0 +1,459 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+	L2CR functions
+	Copyright © 1997-1998 by PowerLogix R & D, Inc.
+
+*/
+/*
+	Thur, Dec. 12, 1998.
+	- First public release, contributed by PowerLogix.
+	***********
+	Sat, Aug. 7, 1999.
+	- Terry: Made sure code disabled interrupts before running. (Previously
+			it was assumed interrupts were already disabled).
+	- Terry: Updated for tentative G4 support.  4MB of memory is now flushed
+			instead of 2MB.  (Prob. only 3 is necessary).
+	- Terry: Updated for workaround to HID0[DPM] processor bug
+			during global invalidates.
+	***********
+	Thu, July 13, 2000.
+	- Terry: Added isync to correct for an errata.
+
+	22 August 2001.
+	- DanM: Finally added the 7450 patch I've had for the past
+		several months.  The L2CR is similar, but I'm going
+		to assume the user of this functions knows what they
+		are doing.
+
+	Author:	Terry Greeniaus (tgree@phys.ualberta.ca)
+	Please e-mail updates to this file to me, thanks!
+*/
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/cache.h>
+#include <asm/page.h>
+#include <asm/feature-fixups.h>
+
+/* Usage:
+
+	When setting the L2CR register, you must do a few special
+	things.  If you are enabling the cache, you must perform a
+	global invalidate.  If you are disabling the cache, you must
+	flush the cache contents first.  This routine takes care of
+	doing these things.  When first enabling the cache, make sure
+	you pass in the L2CR you want, as well as passing in the
+	global invalidate bit set.  A global invalidate will only be
+	performed if the L2I bit is set in applyThis.  When enabling
+	the cache, you should also set the L2E bit in applyThis.  If
+	you want to modify the L2CR contents after the cache has been
+	enabled, the recommended procedure is to first call
+	__setL2CR(0) to disable the cache and then call it again with
+	the new values for L2CR.  Examples:
+
+	_setL2CR(0)		- disables the cache
+	_setL2CR(0xB3A04000)	- enables my G3 upgrade card:
+				- L2E set to turn on the cache
+				- L2SIZ set to 1MB
+				- L2CLK set to 1:1
+				- L2RAM set to pipelined synchronous late-write
+				- L2I set to perform a global invalidation
+				- L2OH set to 0.5 nS
+				- L2DF set because this upgrade card
+				  requires it
+
+	A similar call should work for your card.  You need to know
+	the correct setting for your card and then place them in the
+	fields I have outlined above.  Other fields support optional
+	features, such as L2DO which caches only data, or L2TS which
+	causes cache pushes from the L1 cache to go to the L2 cache
+	instead of to main memory.
+
+IMPORTANT:
+	Starting with the 7450, the bits in this register have moved
+	or behave differently.  The Enable, Parity Enable, Size,
+	and L2 Invalidate are the only bits that have not moved.
+	The size is read-only for these processors with internal L2
+	cache, and the invalidate is a control as well as status.
+		-- Dan
+
+*/
+/*
+ * Summary: this procedure ignores the L2I bit in the value passed in,
+ * flushes the cache if it was already enabled, always invalidates the
+ * cache, then enables the cache if the L2E bit is set in the value
+ * passed in.
+ *   -- paulus.
+ */
+_GLOBAL(_set_L2CR)
+	/* Make sure this is a 750 or 7400 chip */
+BEGIN_FTR_SECTION
+	li	r3,-1
+	blr
+END_FTR_SECTION_IFCLR(CPU_FTR_L2CR)
+
+	mflr	r9
+
+	/* Stop DST streams */
+BEGIN_FTR_SECTION
+	PPC_DSSALL
+	sync
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+
+	/* Turn off interrupts and data relocation. */
+	mfmsr	r7		/* Save MSR in r7 */
+	rlwinm	r4,r7,0,17,15
+	rlwinm	r4,r4,0,28,26	/* Turn off DR bit */
+	sync
+	mtmsr	r4
+	isync
+
+	/* Before we perform the global invalidation, we must disable dynamic
+	 * power management via HID0[DPM] to work around a processor bug where
+	 * DPM can possibly interfere with the state machine in the processor
+	 * that invalidates the L2 cache tags.
+	 */
+	mfspr	r8,SPRN_HID0		/* Save HID0 in r8 */
+	rlwinm	r4,r8,0,12,10		/* Turn off HID0[DPM] */
+	sync
+	mtspr	SPRN_HID0,r4		/* Disable DPM */
+	sync
+
+	/* Get the current enable bit of the L2CR into r4 */
+	mfspr	r4,SPRN_L2CR
+
+	/* Tweak some bits */
+	rlwinm	r5,r3,0,0,0		/* r5 contains the new enable bit */
+	rlwinm	r3,r3,0,11,9		/* Turn off the invalidate bit */
+	rlwinm	r3,r3,0,1,31		/* Turn off the enable bit */
+
+	/* Check to see if we need to flush */
+	rlwinm.	r4,r4,0,0,0
+	beq	2f
+
+	/* Flush the cache. First, read the first 4MB of memory (physical) to
+	 * put new data in the cache.  (Actually we only need
+	 * the size of the L2 cache plus the size of the L1 cache, but 4MB will
+	 * cover everything just to be safe).
+	 */
+
+	 /**** Might be a good idea to set L2DO here - to prevent instructions
+	       from getting into the cache.  But since we invalidate
+	       the next time we enable the cache it doesn't really matter.
+	       Don't do this unless you accommodate all processor variations.
+	       The bit moved on the 7450.....
+	  ****/
+
+BEGIN_FTR_SECTION
+	/* Disable L2 prefetch on some 745x and try to ensure
+	 * L2 prefetch engines are idle. As explained by errata
+	 * text, we can't be sure they are, we just hope very hard
+	 * that well be enough (sic !). At least I noticed Apple
+	 * doesn't even bother doing the dcbf's here...
+	 */
+	mfspr	r4,SPRN_MSSCR0
+	rlwinm	r4,r4,0,0,29
+	sync
+	mtspr	SPRN_MSSCR0,r4
+	sync
+	isync
+	lis	r4,KERNELBASE@h
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
+
+	/* TODO: use HW flush assist when available */
+
+	lis	r4,0x0002
+	mtctr	r4
+	li	r4,0
+1:
+	lwzx	r0,0,r4
+	addi	r4,r4,32		/* Go to start of next cache line */
+	bdnz	1b
+	isync
+
+	/* Now, flush the first 4MB of memory */
+	lis	r4,0x0002
+	mtctr	r4
+	li	r4,0
+	sync
+1:
+	dcbf	0,r4
+	addi	r4,r4,32		/* Go to start of next cache line */
+	bdnz	1b
+
+2:
+	/* Set up the L2CR configuration bits (and switch L2 off) */
+	/* CPU errata: Make sure the mtspr below is already in the
+	 * L1 icache
+	 */
+	b	20f
+	.balign	L1_CACHE_BYTES
+22:
+	sync
+	mtspr	SPRN_L2CR,r3
+	sync
+	b	23f
+20:
+	b	21f
+21:	sync
+	isync
+	b	22b
+
+23:
+	/* Perform a global invalidation */
+	oris	r3,r3,0x0020
+	sync
+	mtspr	SPRN_L2CR,r3
+	sync
+	isync				/* For errata */
+
+BEGIN_FTR_SECTION
+	/* On the 7450, we wait for the L2I bit to clear......
+	*/
+10:	mfspr	r3,SPRN_L2CR
+	andis.	r4,r3,0x0020
+	bne	10b
+	b	11f
+END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
+
+	/* Wait for the invalidation to complete */
+3:	mfspr	r3,SPRN_L2CR
+	rlwinm.	r4,r3,0,31,31
+	bne	3b
+
+11:	rlwinm	r3,r3,0,11,9		/* Turn off the L2I bit */
+	sync
+	mtspr	SPRN_L2CR,r3
+	sync
+
+	/* See if we need to enable the cache */
+	cmplwi	r5,0
+	beq	4f
+
+	/* Enable the cache */
+	oris	r3,r3,0x8000
+	mtspr	SPRN_L2CR,r3
+	sync
+	
+	/* Enable L2 HW prefetch on 744x/745x */
+BEGIN_FTR_SECTION
+	mfspr	r3,SPRN_MSSCR0
+	ori	r3,r3,3
+	sync
+	mtspr	SPRN_MSSCR0,r3
+	sync
+	isync
+END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
+4:
+
+	/* Restore HID0[DPM] to whatever it was before */
+	sync
+	mtspr	1008,r8
+	sync
+
+	/* Restore MSR (restores EE and DR bits to original state) */
+	mtmsr	r7
+	isync
+
+	mtlr	r9
+	blr
+
+_GLOBAL(_get_L2CR)
+	/* Return the L2CR contents */
+	li	r3,0
+BEGIN_FTR_SECTION
+	mfspr	r3,SPRN_L2CR
+END_FTR_SECTION_IFSET(CPU_FTR_L2CR)
+	blr
+
+
+/*
+ * Here is a similar routine for dealing with the L3 cache
+ * on the 745x family of chips
+ */
+
+_GLOBAL(_set_L3CR)
+	/* Make sure this is a 745x chip */
+BEGIN_FTR_SECTION
+	li	r3,-1
+	blr
+END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
+
+	/* Turn off interrupts and data relocation. */
+	mfmsr	r7		/* Save MSR in r7 */
+	rlwinm	r4,r7,0,17,15
+	rlwinm	r4,r4,0,28,26	/* Turn off DR bit */
+	sync
+	mtmsr	r4
+	isync
+
+	/* Stop DST streams */
+	PPC_DSSALL
+	sync
+
+	/* Get the current enable bit of the L3CR into r4 */
+	mfspr	r4,SPRN_L3CR
+
+	/* Tweak some bits */
+	rlwinm	r5,r3,0,0,0		/* r5 contains the new enable bit */
+	rlwinm	r3,r3,0,22,20		/* Turn off the invalidate bit */
+	rlwinm	r3,r3,0,2,31		/* Turn off the enable & PE bits */
+	rlwinm	r3,r3,0,5,3		/* Turn off the clken bit */
+	/* Check to see if we need to flush */
+	rlwinm.	r4,r4,0,0,0
+	beq	2f
+
+	/* Flush the cache.
+	 */
+
+	/* TODO: use HW flush assist */
+
+	lis	r4,0x0008
+	mtctr	r4
+	li	r4,0
+1:
+	lwzx	r0,0,r4
+	dcbf	0,r4
+	addi	r4,r4,32		/* Go to start of next cache line */
+	bdnz	1b
+
+2:
+	/* Set up the L3CR configuration bits (and switch L3 off) */
+	sync
+	mtspr	SPRN_L3CR,r3
+	sync
+
+	oris	r3,r3,L3CR_L3RES@h		/* Set reserved bit 5 */
+	mtspr	SPRN_L3CR,r3
+	sync
+	oris	r3,r3,L3CR_L3CLKEN@h		/* Set clken */
+	mtspr	SPRN_L3CR,r3
+	sync
+
+	/* Wait for stabilize */
+	li	r0,256
+	mtctr	r0
+1:	bdnz	1b
+
+	/* Perform a global invalidation */
+	ori	r3,r3,0x0400
+	sync
+	mtspr	SPRN_L3CR,r3
+	sync
+	isync
+
+	/* We wait for the L3I bit to clear...... */
+10:	mfspr	r3,SPRN_L3CR
+	andi.	r4,r3,0x0400
+	bne	10b
+
+	/* Clear CLKEN */
+	rlwinm	r3,r3,0,5,3		/* Turn off the clken bit */
+	mtspr	SPRN_L3CR,r3
+	sync
+
+	/* Wait for stabilize */
+	li	r0,256
+	mtctr	r0
+1:	bdnz	1b
+
+	/* See if we need to enable the cache */
+	cmplwi	r5,0
+	beq	4f
+
+	/* Enable the cache */
+	oris	r3,r3,(L3CR_L3E | L3CR_L3CLKEN)@h
+	mtspr	SPRN_L3CR,r3
+	sync
+
+	/* Wait for stabilize */
+	li	r0,256
+	mtctr	r0
+1:	bdnz	1b
+
+	/* Restore MSR (restores EE and DR bits to original state) */
+4:
+	mtmsr	r7
+	isync
+	blr
+
+_GLOBAL(_get_L3CR)
+	/* Return the L3CR contents */
+	li	r3,0
+BEGIN_FTR_SECTION
+	mfspr	r3,SPRN_L3CR
+END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
+	blr
+
+/* --- End of PowerLogix code ---
+ */
+
+
+/* flush_disable_L1()	- Flush and disable L1 cache
+ *
+ * clobbers r0, r3, ctr, cr0
+ * Must be called with interrupts disabled and MMU enabled.
+ */
+_GLOBAL(__flush_disable_L1)
+	/* Stop pending alitvec streams and memory accesses */
+BEGIN_FTR_SECTION
+	PPC_DSSALL
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ 	sync
+
+	/* Load counter to 0x4000 cache lines (512k) and
+	 * load cache with datas
+	 */
+	li	r3,0x4000	/* 512kB / 32B */
+	mtctr	r3
+	lis	r3,KERNELBASE@h
+1:
+	lwz	r0,0(r3)
+	addi	r3,r3,0x0020	/* Go to start of next cache line */
+	bdnz	1b
+	isync
+	sync
+
+	/* Now flush those cache lines */
+	li	r3,0x4000	/* 512kB / 32B */
+	mtctr	r3
+	lis	r3,KERNELBASE@h
+1:
+	dcbf	0,r3
+	addi	r3,r3,0x0020	/* Go to start of next cache line */
+	bdnz	1b
+	sync
+
+	/* We can now disable the L1 cache (HID0:DCE, HID0:ICE) */
+	mfspr	r3,SPRN_HID0
+	rlwinm	r3,r3,0,18,15
+	mtspr	SPRN_HID0,r3
+	sync
+	isync
+ 	blr
+
+/* inval_enable_L1	- Invalidate and enable L1 cache
+ *
+ * Assumes L1 is already disabled and MSR:EE is off
+ *
+ * clobbers r3
+ */
+_GLOBAL(__inval_enable_L1)
+	/* Enable and then Flash inval the instruction & data cache */
+	mfspr	r3,SPRN_HID0
+	ori	r3,r3, HID0_ICE|HID0_ICFI|HID0_DCE|HID0_DCI
+	sync
+	isync
+	mtspr	SPRN_HID0,r3
+	xori	r3,r3, HID0_ICFI|HID0_DCI
+	mtspr	SPRN_HID0,r3
+	sync
+
+ 	blr
+_ASM_NOKPROBE_SYMBOL(__inval_enable_L1)
+
+
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
new file mode 100644
index 0000000000..1da2f6e7d2
--- /dev/null
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -0,0 +1,690 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/serial.h>
+#include <linux/serial_8250.h>
+#include <linux/serial_core.h>
+#include <linux/console.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/serial_reg.h>
+#include <asm/io.h>
+#include <asm/mmu.h>
+#include <asm/serial.h>
+#include <asm/udbg.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/early_ioremap.h>
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) do { printk(fmt); } while(0)
+#else
+#define DBG(fmt...) do { } while(0)
+#endif
+
+#define MAX_LEGACY_SERIAL_PORTS	8
+
+static struct plat_serial8250_port
+legacy_serial_ports[MAX_LEGACY_SERIAL_PORTS+1];
+static struct legacy_serial_info {
+	struct device_node		*np;
+	unsigned int			speed;
+	unsigned int			clock;
+	int				irq_check_parent;
+	phys_addr_t			taddr;
+	void __iomem			*early_addr;
+} legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS];
+
+static const struct of_device_id legacy_serial_parents[] __initconst = {
+	{.type = "soc",},
+	{.type = "tsi-bridge",},
+	{.type = "opb", },
+	{.compatible = "ibm,opb",},
+	{.compatible = "simple-bus",},
+	{.compatible = "wrs,epld-localbus",},
+	{},
+};
+
+static unsigned int legacy_serial_count;
+static int legacy_serial_console = -1;
+
+static const upf_t legacy_port_flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
+	UPF_SHARE_IRQ | UPF_FIXED_PORT;
+
+static unsigned int tsi_serial_in(struct uart_port *p, int offset)
+{
+	unsigned int tmp;
+	offset = offset << p->regshift;
+	if (offset == UART_IIR) {
+		tmp = readl(p->membase + (UART_IIR & ~3));
+		return (tmp >> 16) & 0xff; /* UART_IIR % 4 == 2 */
+	} else
+		return readb(p->membase + offset);
+}
+
+static void tsi_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = offset << p->regshift;
+	if (!((offset == UART_IER) && (value & UART_IER_UUE)))
+		writeb(value, p->membase + offset);
+}
+
+static int __init add_legacy_port(struct device_node *np, int want_index,
+				  int iotype, phys_addr_t base,
+				  phys_addr_t taddr, unsigned long irq,
+				  upf_t flags, int irq_check_parent)
+{
+	const __be32 *clk, *spd, *rs;
+	u32 clock = BASE_BAUD * 16;
+	u32 shift = 0;
+	int index;
+
+	/* get clock freq. if present */
+	clk = of_get_property(np, "clock-frequency", NULL);
+	if (clk && *clk)
+		clock = be32_to_cpup(clk);
+
+	/* get default speed if present */
+	spd = of_get_property(np, "current-speed", NULL);
+
+	/* get register shift if present */
+	rs = of_get_property(np, "reg-shift", NULL);
+	if (rs && *rs)
+		shift = be32_to_cpup(rs);
+
+	/* If we have a location index, then try to use it */
+	if (want_index >= 0 && want_index < MAX_LEGACY_SERIAL_PORTS)
+		index = want_index;
+	else
+		index = legacy_serial_count;
+
+	/* if our index is still out of range, that mean that
+	 * array is full, we could scan for a free slot but that
+	 * make little sense to bother, just skip the port
+	 */
+	if (index >= MAX_LEGACY_SERIAL_PORTS)
+		return -1;
+	if (index >= legacy_serial_count)
+		legacy_serial_count = index + 1;
+
+	/* Check if there is a port who already claimed our slot */
+	if (legacy_serial_infos[index].np != NULL) {
+		/* if we still have some room, move it, else override */
+		if (legacy_serial_count < MAX_LEGACY_SERIAL_PORTS) {
+			printk(KERN_DEBUG "Moved legacy port %d -> %d\n",
+			       index, legacy_serial_count);
+			legacy_serial_ports[legacy_serial_count] =
+				legacy_serial_ports[index];
+			legacy_serial_infos[legacy_serial_count] =
+				legacy_serial_infos[index];
+			legacy_serial_count++;
+		} else {
+			printk(KERN_DEBUG "Replacing legacy port %d\n", index);
+		}
+	}
+
+	/* Now fill the entry */
+	memset(&legacy_serial_ports[index], 0,
+	       sizeof(struct plat_serial8250_port));
+	if (iotype == UPIO_PORT)
+		legacy_serial_ports[index].iobase = base;
+	else
+		legacy_serial_ports[index].mapbase = base;
+
+	legacy_serial_ports[index].iotype = iotype;
+	legacy_serial_ports[index].uartclk = clock;
+	legacy_serial_ports[index].irq = irq;
+	legacy_serial_ports[index].flags = flags;
+	legacy_serial_ports[index].regshift = shift;
+	legacy_serial_infos[index].taddr = taddr;
+	legacy_serial_infos[index].np = of_node_get(np);
+	legacy_serial_infos[index].clock = clock;
+	legacy_serial_infos[index].speed = spd ? be32_to_cpup(spd) : 0;
+	legacy_serial_infos[index].irq_check_parent = irq_check_parent;
+
+	if (iotype == UPIO_TSI) {
+		legacy_serial_ports[index].serial_in = tsi_serial_in;
+		legacy_serial_ports[index].serial_out = tsi_serial_out;
+	}
+
+	printk(KERN_DEBUG "Found legacy serial port %d for %pOF\n",
+	       index, np);
+	printk(KERN_DEBUG "  %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n",
+	       (iotype == UPIO_PORT) ? "port" : "mem",
+	       (unsigned long long)base, (unsigned long long)taddr, irq,
+	       legacy_serial_ports[index].uartclk,
+	       legacy_serial_infos[index].speed);
+
+	return index;
+}
+
+static int __init add_legacy_soc_port(struct device_node *np,
+				      struct device_node *soc_dev)
+{
+	u64 addr;
+	const __be32 *addrp;
+	struct device_node *tsi = of_get_parent(np);
+
+	/* We only support ports that have a clock frequency properly
+	 * encoded in the device-tree.
+	 */
+	if (!of_property_present(np, "clock-frequency"))
+		return -1;
+
+	/* if reg-offset don't try to use it */
+	if (of_property_present(np, "reg-offset"))
+		return -1;
+
+	/* if rtas uses this device, don't try to use it as well */
+	if (of_property_read_bool(np, "used-by-rtas"))
+		return -1;
+
+	/* Get the address */
+	addrp = of_get_address(soc_dev, 0, NULL, NULL);
+	if (addrp == NULL)
+		return -1;
+
+	addr = of_translate_address(soc_dev, addrp);
+	if (addr == OF_BAD_ADDR)
+		return -1;
+
+	/* Add port, irq will be dealt with later. We passed a translated
+	 * IO port value. It will be fixed up later along with the irq
+	 */
+	if (of_node_is_type(tsi, "tsi-bridge"))
+		return add_legacy_port(np, -1, UPIO_TSI, addr, addr,
+				       0, legacy_port_flags, 0);
+	else
+		return add_legacy_port(np, -1, UPIO_MEM, addr, addr,
+				       0, legacy_port_flags, 0);
+}
+
+static int __init add_legacy_isa_port(struct device_node *np,
+				      struct device_node *isa_brg)
+{
+	const __be32 *reg;
+	const char *typep;
+	int index = -1;
+	u64 taddr;
+
+	DBG(" -> add_legacy_isa_port(%pOF)\n", np);
+
+	/* Get the ISA port number */
+	reg = of_get_property(np, "reg", NULL);
+	if (reg == NULL)
+		return -1;
+
+	/* Verify it's an IO port, we don't support anything else */
+	if (!(be32_to_cpu(reg[0]) & 0x00000001))
+		return -1;
+
+	/* Now look for an "ibm,aix-loc" property that gives us ordering
+	 * if any...
+	 */
+	typep = of_get_property(np, "ibm,aix-loc", NULL);
+
+	/* If we have a location index, then use it */
+	if (typep && *typep == 'S')
+		index = simple_strtol(typep+1, NULL, 0) - 1;
+
+	/* Translate ISA address. If it fails, we still register the port
+	 * with no translated address so that it can be picked up as an IO
+	 * port later by the serial driver
+	 *
+	 * Note: Don't even try on P8 lpc, we know it's not directly mapped
+	 */
+	if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc") ||
+	    of_property_present(isa_brg, "ranges")) {
+		taddr = of_translate_address(np, reg);
+		if (taddr == OF_BAD_ADDR)
+			taddr = 0;
+	} else
+		taddr = 0;
+
+	/* Add port, irq will be dealt with later */
+	return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]),
+			       taddr, 0, legacy_port_flags, 0);
+
+}
+
+#ifdef CONFIG_PCI
+static int __init add_legacy_pci_port(struct device_node *np,
+				      struct device_node *pci_dev)
+{
+	u64 addr, base;
+	const __be32 *addrp;
+	unsigned int flags;
+	int iotype, index = -1, lindex = 0;
+
+	DBG(" -> add_legacy_pci_port(%pOF)\n", np);
+
+	/* We only support ports that have a clock frequency properly
+	 * encoded in the device-tree (that is have an fcode). Anything
+	 * else can't be used that early and will be normally probed by
+	 * the generic 8250_pci driver later on. The reason is that 8250
+	 * compatible UARTs on PCI need all sort of quirks (port offsets
+	 * etc...) that this code doesn't know about
+	 */
+	if (!of_property_present(np, "clock-frequency"))
+		return -1;
+
+	/* Get the PCI address. Assume BAR 0 */
+	addrp = of_get_pci_address(pci_dev, 0, NULL, &flags);
+	if (addrp == NULL)
+		return -1;
+
+	/* We only support BAR 0 for now */
+	iotype = (flags & IORESOURCE_MEM) ? UPIO_MEM : UPIO_PORT;
+	addr = of_translate_address(pci_dev, addrp);
+	if (addr == OF_BAD_ADDR)
+		return -1;
+
+	/* Set the IO base to the same as the translated address for MMIO,
+	 * or to the domain local IO base for PIO (it will be fixed up later)
+	 */
+	if (iotype == UPIO_MEM)
+		base = addr;
+	else
+		base = of_read_number(&addrp[2], 1);
+
+	/* Try to guess an index... If we have subdevices of the pci dev,
+	 * we get to their "reg" property
+	 */
+	if (np != pci_dev) {
+		const __be32 *reg = of_get_property(np, "reg", NULL);
+		if (reg && (be32_to_cpup(reg) < 4))
+			index = lindex = be32_to_cpup(reg);
+	}
+
+	/* Local index means it's the Nth port in the PCI chip. Unfortunately
+	 * the offset to add here is device specific. We know about those
+	 * EXAR ports and we default to the most common case. If your UART
+	 * doesn't work for these settings, you'll have to add your own special
+	 * cases here
+	 */
+	if (of_device_is_compatible(pci_dev, "pci13a8,152") ||
+	    of_device_is_compatible(pci_dev, "pci13a8,154") ||
+	    of_device_is_compatible(pci_dev, "pci13a8,158")) {
+		addr += 0x200 * lindex;
+		base += 0x200 * lindex;
+	} else {
+		addr += 8 * lindex;
+		base += 8 * lindex;
+	}
+
+	/* Add port, irq will be dealt with later. We passed a translated
+	 * IO port value. It will be fixed up later along with the irq
+	 */
+	return add_legacy_port(np, index, iotype, base, addr, 0,
+			       legacy_port_flags, np != pci_dev);
+}
+#endif
+
+static void __init setup_legacy_serial_console(int console)
+{
+	struct legacy_serial_info *info = &legacy_serial_infos[console];
+	struct plat_serial8250_port *port = &legacy_serial_ports[console];
+	unsigned int stride;
+
+	stride = 1 << port->regshift;
+
+	/* Check if a translated MMIO address has been found */
+	if (info->taddr) {
+		info->early_addr = early_ioremap(info->taddr, 0x1000);
+		if (info->early_addr == NULL)
+			return;
+		udbg_uart_init_mmio(info->early_addr, stride);
+	} else {
+		/* Check if it's PIO and we support untranslated PIO */
+		if (port->iotype == UPIO_PORT && isa_io_special)
+			udbg_uart_init_pio(port->iobase, stride);
+		else
+			return;
+	}
+
+	/* Try to query the current speed */
+	if (info->speed == 0)
+		info->speed = udbg_probe_uart_speed(info->clock);
+
+	/* Set it up */
+	DBG("default console speed = %d\n", info->speed);
+	udbg_uart_setup(info->speed, info->clock);
+}
+
+static int __init ioremap_legacy_serial_console(void)
+{
+	struct plat_serial8250_port *port;
+	struct legacy_serial_info *info;
+	void __iomem *vaddr;
+
+	if (legacy_serial_console < 0)
+		return 0;
+
+	info = &legacy_serial_infos[legacy_serial_console];
+	port = &legacy_serial_ports[legacy_serial_console];
+
+	if (!info->early_addr)
+		return 0;
+
+	vaddr = ioremap(info->taddr, 0x1000);
+	if (WARN_ON(!vaddr))
+		return -ENOMEM;
+
+	udbg_uart_init_mmio(vaddr, 1 << port->regshift);
+	early_iounmap(info->early_addr, 0x1000);
+	info->early_addr = NULL;
+
+	return 0;
+}
+early_initcall(ioremap_legacy_serial_console);
+
+/*
+ * This is called very early, as part of setup_system() or eventually
+ * setup_arch(), basically before anything else in this file. This function
+ * will try to build a list of all the available 8250-compatible serial ports
+ * in the machine using the Open Firmware device-tree. It currently only deals
+ * with ISA and PCI busses but could be extended. It allows a very early boot
+ * console to be initialized, that list is also used later to provide 8250 with
+ * the machine non-PCI ports and to properly pick the default console port
+ */
+void __init find_legacy_serial_ports(void)
+{
+	struct device_node *np, *stdout = NULL;
+	const char *path;
+	int index;
+
+	DBG(" -> find_legacy_serial_port()\n");
+
+	/* Now find out if one of these is out firmware console */
+	path = of_get_property(of_chosen, "linux,stdout-path", NULL);
+	if (path == NULL)
+		path = of_get_property(of_chosen, "stdout-path", NULL);
+	if (path != NULL) {
+		stdout = of_find_node_by_path(path);
+		if (stdout)
+			DBG("stdout is %pOF\n", stdout);
+	} else {
+		DBG(" no linux,stdout-path !\n");
+	}
+
+	/* Iterate over all the 16550 ports, looking for known parents */
+	for_each_compatible_node(np, "serial", "ns16550") {
+		struct device_node *parent = of_get_parent(np);
+		if (!parent)
+			continue;
+		if (of_match_node(legacy_serial_parents, parent) != NULL) {
+			if (of_device_is_available(np)) {
+				index = add_legacy_soc_port(np, np);
+				if (index >= 0 && np == stdout)
+					legacy_serial_console = index;
+			}
+		}
+		of_node_put(parent);
+	}
+
+	/* Next, fill our array with ISA ports */
+	for_each_node_by_type(np, "serial") {
+		struct device_node *isa = of_get_parent(np);
+		if (of_node_name_eq(isa, "isa") || of_node_name_eq(isa, "lpc")) {
+			if (of_device_is_available(np)) {
+				index = add_legacy_isa_port(np, isa);
+				if (index >= 0 && np == stdout)
+					legacy_serial_console = index;
+			}
+		}
+		of_node_put(isa);
+	}
+
+#ifdef CONFIG_PCI
+	/* Next, try to locate PCI ports */
+	for (np = NULL; (np = of_find_all_nodes(np));) {
+		struct device_node *pci, *parent = of_get_parent(np);
+		if (of_node_name_eq(parent, "isa")) {
+			of_node_put(parent);
+			continue;
+		}
+		if (!of_node_name_eq(np, "serial") &&
+		    !of_node_is_type(np, "serial")) {
+			of_node_put(parent);
+			continue;
+		}
+		/* Check for known pciclass, and also check whether we have
+		 * a device with child nodes for ports or not
+		 */
+		if (of_device_is_compatible(np, "pciclass,0700") ||
+		    of_device_is_compatible(np, "pciclass,070002"))
+			pci = np;
+		else if (of_device_is_compatible(parent, "pciclass,0700") ||
+			 of_device_is_compatible(parent, "pciclass,070002"))
+			pci = parent;
+		else {
+			of_node_put(parent);
+			continue;
+		}
+		index = add_legacy_pci_port(np, pci);
+		if (index >= 0 && np == stdout)
+			legacy_serial_console = index;
+		of_node_put(parent);
+	}
+#endif
+
+	of_node_put(stdout);
+
+	DBG("legacy_serial_console = %d\n", legacy_serial_console);
+	if (legacy_serial_console >= 0)
+		setup_legacy_serial_console(legacy_serial_console);
+	DBG(" <- find_legacy_serial_port()\n");
+}
+
+static struct platform_device serial_device = {
+	.name	= "serial8250",
+	.id	= PLAT8250_DEV_PLATFORM,
+	.dev	= {
+		.platform_data = legacy_serial_ports,
+	},
+};
+
+static void __init fixup_port_irq(int index,
+				  struct device_node *np,
+				  struct plat_serial8250_port *port)
+{
+	unsigned int virq;
+
+	DBG("fixup_port_irq(%d)\n", index);
+
+	virq = irq_of_parse_and_map(np, 0);
+	if (!virq && legacy_serial_infos[index].irq_check_parent) {
+		np = of_get_parent(np);
+		if (np == NULL)
+			return;
+		virq = irq_of_parse_and_map(np, 0);
+		of_node_put(np);
+	}
+	if (!virq)
+		return;
+
+	port->irq = virq;
+
+	if (IS_ENABLED(CONFIG_SERIAL_8250) &&
+	    of_device_is_compatible(np, "fsl,ns16550")) {
+		if (IS_REACHABLE(CONFIG_SERIAL_8250_FSL)) {
+			port->handle_irq = fsl8250_handle_irq;
+			port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE);
+		} else {
+			pr_warn_once("Not activating Freescale specific workaround for device %pOFP\n",
+				     np);
+		}
+	}
+}
+
+static void __init fixup_port_pio(int index,
+				  struct device_node *np,
+				  struct plat_serial8250_port *port)
+{
+#ifdef CONFIG_PCI
+	struct pci_controller *hose;
+
+	DBG("fixup_port_pio(%d)\n", index);
+
+	hose = pci_find_hose_for_OF_device(np);
+	if (hose) {
+		unsigned long offset = (unsigned long)hose->io_base_virt -
+#ifdef CONFIG_PPC64
+			pci_io_base;
+#else
+			isa_io_base;
+#endif
+		DBG("port %d, IO %lx -> %lx\n",
+		    index, port->iobase, port->iobase + offset);
+		port->iobase += offset;
+	}
+#endif
+}
+
+static void __init fixup_port_mmio(int index,
+				   struct device_node *np,
+				   struct plat_serial8250_port *port)
+{
+	DBG("fixup_port_mmio(%d)\n", index);
+
+	port->membase = ioremap(port->mapbase, 0x100);
+}
+
+/*
+ * This is called as an arch initcall, hopefully before the PCI bus is
+ * probed and/or the 8250 driver loaded since we need to register our
+ * platform devices before 8250 PCI ones are detected as some of them
+ * must properly "override" the platform ones.
+ *
+ * This function fixes up the interrupt value for platform ports as it
+ * couldn't be done earlier before interrupt maps have been parsed. It
+ * also "corrects" the IO address for PIO ports for the same reason,
+ * since earlier, the PHBs virtual IO space wasn't assigned yet. It then
+ * registers all those platform ports for use by the 8250 driver when it
+ * finally loads.
+ */
+static int __init serial_dev_init(void)
+{
+	int i;
+
+	if (legacy_serial_count == 0)
+		return -ENODEV;
+
+	/*
+	 * Before we register the platform serial devices, we need
+	 * to fixup their interrupts and their IO ports.
+	 */
+	DBG("Fixing serial ports interrupts and IO ports ...\n");
+
+	for (i = 0; i < legacy_serial_count; i++) {
+		struct plat_serial8250_port *port = &legacy_serial_ports[i];
+		struct device_node *np = legacy_serial_infos[i].np;
+
+		if (!port->irq)
+			fixup_port_irq(i, np, port);
+		if (port->iotype == UPIO_PORT)
+			fixup_port_pio(i, np, port);
+		if ((port->iotype == UPIO_MEM) || (port->iotype == UPIO_TSI))
+			fixup_port_mmio(i, np, port);
+	}
+
+	DBG("Registering platform serial ports\n");
+
+	return platform_device_register(&serial_device);
+}
+device_initcall(serial_dev_init);
+
+
+#ifdef CONFIG_SERIAL_8250_CONSOLE
+/*
+ * This is called very early, as part of console_init() (typically just after
+ * time_init()). This function is respondible for trying to find a good
+ * default console on serial ports. It tries to match the open firmware
+ * default output with one of the available serial console drivers that have
+ * been probed earlier by find_legacy_serial_ports()
+ */
+static int __init check_legacy_serial_console(void)
+{
+	struct device_node *prom_stdout = NULL;
+	int i, speed = 0, offset = 0;
+	const char *name;
+	const __be32 *spd;
+
+	DBG(" -> check_legacy_serial_console()\n");
+
+	/* The user has requested a console so this is already set up. */
+	if (strstr(boot_command_line, "console=")) {
+		DBG(" console was specified !\n");
+		return -EBUSY;
+	}
+
+	if (!of_chosen) {
+		DBG(" of_chosen is NULL !\n");
+		return -ENODEV;
+	}
+
+	if (legacy_serial_console < 0) {
+		DBG(" legacy_serial_console not found !\n");
+		return -ENODEV;
+	}
+	/* We are getting a weird phandle from OF ... */
+	/* ... So use the full path instead */
+	name = of_get_property(of_chosen, "linux,stdout-path", NULL);
+	if (name == NULL)
+		name = of_get_property(of_chosen, "stdout-path", NULL);
+	if (name == NULL) {
+		DBG(" no stdout-path !\n");
+		return -ENODEV;
+	}
+	prom_stdout = of_find_node_by_path(name);
+	if (!prom_stdout) {
+		DBG(" can't find stdout package %s !\n", name);
+		return -ENODEV;
+	}
+	DBG("stdout is %pOF\n", prom_stdout);
+
+	name = of_get_property(prom_stdout, "name", NULL);
+	if (!name) {
+		DBG(" stdout package has no name !\n");
+		goto not_found;
+	}
+	spd = of_get_property(prom_stdout, "current-speed", NULL);
+	if (spd)
+		speed = be32_to_cpup(spd);
+
+	if (strcmp(name, "serial") != 0)
+		goto not_found;
+
+	/* Look for it in probed array */
+	for (i = 0; i < legacy_serial_count; i++) {
+		if (prom_stdout != legacy_serial_infos[i].np)
+			continue;
+		offset = i;
+		speed = legacy_serial_infos[i].speed;
+		break;
+	}
+	if (i >= legacy_serial_count)
+		goto not_found;
+
+	of_node_put(prom_stdout);
+
+	DBG("Found serial console at ttyS%d\n", offset);
+
+	if (speed) {
+		static char __initdata opt[16];
+		sprintf(opt, "%d", speed);
+		return add_preferred_console("ttyS", offset, opt);
+	} else
+		return add_preferred_console("ttyS", offset, NULL);
+
+ not_found:
+	DBG("No preferred console found !\n");
+	of_node_put(prom_stdout);
+	return -ENODEV;
+}
+console_initcall(check_legacy_serial_console);
+
+#endif /* CONFIG_SERIAL_8250_CONSOLE */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
new file mode 100644
index 0000000000..219f28637a
--- /dev/null
+++ b/arch/powerpc/kernel/mce.c
@@ -0,0 +1,771 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Machine check exception handling.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "mce: " fmt
+
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+#include <linux/export.h>
+#include <linux/irq_work.h>
+#include <linux/extable.h>
+#include <linux/ftrace.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+
+#include <asm/interrupt.h>
+#include <asm/machdep.h>
+#include <asm/mce.h>
+#include <asm/nmi.h>
+
+#include "setup.h"
+
+static void machine_check_ue_event(struct machine_check_event *evt);
+static void machine_process_ue_event(struct work_struct *work);
+
+static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
+
+static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
+
+int mce_register_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&mce_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(mce_register_notifier);
+
+int mce_unregister_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(mce_unregister_notifier);
+
+static void mce_set_error_info(struct machine_check_event *mce,
+			       struct mce_error_info *mce_err)
+{
+	mce->error_type = mce_err->error_type;
+	switch (mce_err->error_type) {
+	case MCE_ERROR_TYPE_UE:
+		mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
+		break;
+	case MCE_ERROR_TYPE_SLB:
+		mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
+		break;
+	case MCE_ERROR_TYPE_ERAT:
+		mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
+		break;
+	case MCE_ERROR_TYPE_TLB:
+		mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
+		break;
+	case MCE_ERROR_TYPE_USER:
+		mce->u.user_error.user_error_type = mce_err->u.user_error_type;
+		break;
+	case MCE_ERROR_TYPE_RA:
+		mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
+		break;
+	case MCE_ERROR_TYPE_LINK:
+		mce->u.link_error.link_error_type = mce_err->u.link_error_type;
+		break;
+	case MCE_ERROR_TYPE_UNKNOWN:
+	default:
+		break;
+	}
+}
+
+void mce_irq_work_queue(void)
+{
+	/* Raise decrementer interrupt */
+	arch_irq_work_raise();
+	set_mce_pending_irq_work();
+}
+
+/*
+ * Decode and save high level MCE information into per cpu buffer which
+ * is an array of machine_check_event structure.
+ */
+void save_mce_event(struct pt_regs *regs, long handled,
+		    struct mce_error_info *mce_err,
+		    uint64_t nip, uint64_t addr, uint64_t phys_addr)
+{
+	int index = local_paca->mce_info->mce_nest_count++;
+	struct machine_check_event *mce;
+
+	mce = &local_paca->mce_info->mce_event[index];
+	/*
+	 * Return if we don't have enough space to log mce event.
+	 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
+	 * the check below will stop buffer overrun.
+	 */
+	if (index >= MAX_MC_EVT)
+		return;
+
+	/* Populate generic machine check info */
+	mce->version = MCE_V1;
+	mce->srr0 = nip;
+	mce->srr1 = regs->msr;
+	mce->gpr3 = regs->gpr[3];
+	mce->in_use = 1;
+	mce->cpu = get_paca()->paca_index;
+
+	/* Mark it recovered if we have handled it and MSR(RI=1). */
+	if (handled && (regs->msr & MSR_RI))
+		mce->disposition = MCE_DISPOSITION_RECOVERED;
+	else
+		mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
+
+	mce->initiator = mce_err->initiator;
+	mce->severity = mce_err->severity;
+	mce->sync_error = mce_err->sync_error;
+	mce->error_class = mce_err->error_class;
+
+	/*
+	 * Populate the mce error_type and type-specific error_type.
+	 */
+	mce_set_error_info(mce, mce_err);
+	if (mce->error_type == MCE_ERROR_TYPE_UE)
+		mce->u.ue_error.ignore_event = mce_err->ignore_event;
+
+	/*
+	 * Raise irq work, So that we don't miss to log the error for
+	 * unrecoverable errors.
+	 */
+	if (mce->disposition == MCE_DISPOSITION_NOT_RECOVERED)
+		mce_irq_work_queue();
+
+	if (!addr)
+		return;
+
+	if (mce->error_type == MCE_ERROR_TYPE_TLB) {
+		mce->u.tlb_error.effective_address_provided = true;
+		mce->u.tlb_error.effective_address = addr;
+	} else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
+		mce->u.slb_error.effective_address_provided = true;
+		mce->u.slb_error.effective_address = addr;
+	} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
+		mce->u.erat_error.effective_address_provided = true;
+		mce->u.erat_error.effective_address = addr;
+	} else if (mce->error_type == MCE_ERROR_TYPE_USER) {
+		mce->u.user_error.effective_address_provided = true;
+		mce->u.user_error.effective_address = addr;
+	} else if (mce->error_type == MCE_ERROR_TYPE_RA) {
+		mce->u.ra_error.effective_address_provided = true;
+		mce->u.ra_error.effective_address = addr;
+	} else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
+		mce->u.link_error.effective_address_provided = true;
+		mce->u.link_error.effective_address = addr;
+	} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
+		mce->u.ue_error.effective_address_provided = true;
+		mce->u.ue_error.effective_address = addr;
+		if (phys_addr != ULONG_MAX) {
+			mce->u.ue_error.physical_address_provided = true;
+			mce->u.ue_error.physical_address = phys_addr;
+			machine_check_ue_event(mce);
+		}
+	}
+	return;
+}
+
+/*
+ * get_mce_event:
+ *	mce	Pointer to machine_check_event structure to be filled.
+ *	release Flag to indicate whether to free the event slot or not.
+ *		0 <= do not release the mce event. Caller will invoke
+ *		     release_mce_event() once event has been consumed.
+ *		1 <= release the slot.
+ *
+ *	return	1 = success
+ *		0 = failure
+ *
+ * get_mce_event() will be called by platform specific machine check
+ * handle routine and in KVM.
+ * When we call get_mce_event(), we are still in interrupt context and
+ * preemption will not be scheduled until ret_from_expect() routine
+ * is called.
+ */
+int get_mce_event(struct machine_check_event *mce, bool release)
+{
+	int index = local_paca->mce_info->mce_nest_count - 1;
+	struct machine_check_event *mc_evt;
+	int ret = 0;
+
+	/* Sanity check */
+	if (index < 0)
+		return ret;
+
+	/* Check if we have MCE info to process. */
+	if (index < MAX_MC_EVT) {
+		mc_evt = &local_paca->mce_info->mce_event[index];
+		/* Copy the event structure and release the original */
+		if (mce)
+			*mce = *mc_evt;
+		if (release)
+			mc_evt->in_use = 0;
+		ret = 1;
+	}
+	/* Decrement the count to free the slot. */
+	if (release)
+		local_paca->mce_info->mce_nest_count--;
+
+	return ret;
+}
+
+void release_mce_event(void)
+{
+	get_mce_event(NULL, true);
+}
+
+static void machine_check_ue_work(void)
+{
+	schedule_work(&mce_ue_event_work);
+}
+
+/*
+ * Queue up the MCE event which then can be handled later.
+ */
+static void machine_check_ue_event(struct machine_check_event *evt)
+{
+	int index;
+
+	index = local_paca->mce_info->mce_ue_count++;
+	/* If queue is full, just return for now. */
+	if (index >= MAX_MC_EVT) {
+		local_paca->mce_info->mce_ue_count--;
+		return;
+	}
+	memcpy(&local_paca->mce_info->mce_ue_event_queue[index],
+	       evt, sizeof(*evt));
+}
+
+/*
+ * Queue up the MCE event which then can be handled later.
+ */
+void machine_check_queue_event(void)
+{
+	int index;
+	struct machine_check_event evt;
+
+	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+		return;
+
+	index = local_paca->mce_info->mce_queue_count++;
+	/* If queue is full, just return for now. */
+	if (index >= MAX_MC_EVT) {
+		local_paca->mce_info->mce_queue_count--;
+		return;
+	}
+	memcpy(&local_paca->mce_info->mce_event_queue[index],
+	       &evt, sizeof(evt));
+
+	mce_irq_work_queue();
+}
+
+void mce_common_process_ue(struct pt_regs *regs,
+			   struct mce_error_info *mce_err)
+{
+	const struct exception_table_entry *entry;
+
+	entry = search_kernel_exception_table(regs->nip);
+	if (entry) {
+		mce_err->ignore_event = true;
+		regs_set_return_ip(regs, extable_fixup(entry));
+	}
+}
+
+/*
+ * process pending MCE event from the mce event queue. This function will be
+ * called during syscall exit.
+ */
+static void machine_process_ue_event(struct work_struct *work)
+{
+	int index;
+	struct machine_check_event *evt;
+
+	while (local_paca->mce_info->mce_ue_count > 0) {
+		index = local_paca->mce_info->mce_ue_count - 1;
+		evt = &local_paca->mce_info->mce_ue_event_queue[index];
+		blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
+#ifdef CONFIG_MEMORY_FAILURE
+		/*
+		 * This should probably queued elsewhere, but
+		 * oh! well
+		 *
+		 * Don't report this machine check because the caller has a
+		 * asked us to ignore the event, it has a fixup handler which
+		 * will do the appropriate error handling and reporting.
+		 */
+		if (evt->error_type == MCE_ERROR_TYPE_UE) {
+			if (evt->u.ue_error.ignore_event) {
+				local_paca->mce_info->mce_ue_count--;
+				continue;
+			}
+
+			if (evt->u.ue_error.physical_address_provided) {
+				unsigned long pfn;
+
+				pfn = evt->u.ue_error.physical_address >>
+					PAGE_SHIFT;
+				memory_failure(pfn, 0);
+			} else
+				pr_warn("Failed to identify bad address from "
+					"where the uncorrectable error (UE) "
+					"was generated\n");
+		}
+#endif
+		local_paca->mce_info->mce_ue_count--;
+	}
+}
+/*
+ * process pending MCE event from the mce event queue. This function will be
+ * called during syscall exit.
+ */
+static void machine_check_process_queued_event(void)
+{
+	int index;
+	struct machine_check_event *evt;
+
+	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
+
+	/*
+	 * For now just print it to console.
+	 * TODO: log this error event to FSP or nvram.
+	 */
+	while (local_paca->mce_info->mce_queue_count > 0) {
+		index = local_paca->mce_info->mce_queue_count - 1;
+		evt = &local_paca->mce_info->mce_event_queue[index];
+
+		if (evt->error_type == MCE_ERROR_TYPE_UE &&
+		    evt->u.ue_error.ignore_event) {
+			local_paca->mce_info->mce_queue_count--;
+			continue;
+		}
+		machine_check_print_event_info(evt, false, false);
+		local_paca->mce_info->mce_queue_count--;
+	}
+}
+
+void set_mce_pending_irq_work(void)
+{
+	local_paca->mce_pending_irq_work = 1;
+}
+
+void clear_mce_pending_irq_work(void)
+{
+	local_paca->mce_pending_irq_work = 0;
+}
+
+void mce_run_irq_context_handlers(void)
+{
+	if (unlikely(local_paca->mce_pending_irq_work)) {
+		if (ppc_md.machine_check_log_err)
+			ppc_md.machine_check_log_err();
+		machine_check_process_queued_event();
+		machine_check_ue_work();
+		clear_mce_pending_irq_work();
+	}
+}
+
+void machine_check_print_event_info(struct machine_check_event *evt,
+				    bool user_mode, bool in_guest)
+{
+	const char *level, *sevstr, *subtype, *err_type, *initiator;
+	uint64_t ea = 0, pa = 0;
+	int n = 0;
+	char dar_str[50];
+	char pa_str[50];
+	static const char *mc_ue_types[] = {
+		"Indeterminate",
+		"Instruction fetch",
+		"Page table walk ifetch",
+		"Load/Store",
+		"Page table walk Load/Store",
+	};
+	static const char *mc_slb_types[] = {
+		"Indeterminate",
+		"Parity",
+		"Multihit",
+	};
+	static const char *mc_erat_types[] = {
+		"Indeterminate",
+		"Parity",
+		"Multihit",
+	};
+	static const char *mc_tlb_types[] = {
+		"Indeterminate",
+		"Parity",
+		"Multihit",
+	};
+	static const char *mc_user_types[] = {
+		"Indeterminate",
+		"tlbie(l) invalid",
+		"scv invalid",
+	};
+	static const char *mc_ra_types[] = {
+		"Indeterminate",
+		"Instruction fetch (bad)",
+		"Instruction fetch (foreign/control memory)",
+		"Page table walk ifetch (bad)",
+		"Page table walk ifetch (foreign/control memory)",
+		"Load (bad)",
+		"Store (bad)",
+		"Page table walk Load/Store (bad)",
+		"Page table walk Load/Store (foreign/control memory)",
+		"Load/Store (foreign/control memory)",
+	};
+	static const char *mc_link_types[] = {
+		"Indeterminate",
+		"Instruction fetch (timeout)",
+		"Page table walk ifetch (timeout)",
+		"Load (timeout)",
+		"Store (timeout)",
+		"Page table walk Load/Store (timeout)",
+	};
+	static const char *mc_error_class[] = {
+		"Unknown",
+		"Hardware error",
+		"Probable Hardware error (some chance of software cause)",
+		"Software error",
+		"Probable Software error (some chance of hardware cause)",
+	};
+
+	/* Print things out */
+	if (evt->version != MCE_V1) {
+		pr_err("Machine Check Exception, Unknown event version %d !\n",
+		       evt->version);
+		return;
+	}
+	switch (evt->severity) {
+	case MCE_SEV_NO_ERROR:
+		level = KERN_INFO;
+		sevstr = "Harmless";
+		break;
+	case MCE_SEV_WARNING:
+		level = KERN_WARNING;
+		sevstr = "Warning";
+		break;
+	case MCE_SEV_SEVERE:
+		level = KERN_ERR;
+		sevstr = "Severe";
+		break;
+	case MCE_SEV_FATAL:
+	default:
+		level = KERN_ERR;
+		sevstr = "Fatal";
+		break;
+	}
+
+	switch(evt->initiator) {
+	case MCE_INITIATOR_CPU:
+		initiator = "CPU";
+		break;
+	case MCE_INITIATOR_PCI:
+		initiator = "PCI";
+		break;
+	case MCE_INITIATOR_ISA:
+		initiator = "ISA";
+		break;
+	case MCE_INITIATOR_MEMORY:
+		initiator = "Memory";
+		break;
+	case MCE_INITIATOR_POWERMGM:
+		initiator = "Power Management";
+		break;
+	case MCE_INITIATOR_UNKNOWN:
+	default:
+		initiator = "Unknown";
+		break;
+	}
+
+	switch (evt->error_type) {
+	case MCE_ERROR_TYPE_UE:
+		err_type = "UE";
+		subtype = evt->u.ue_error.ue_error_type <
+			ARRAY_SIZE(mc_ue_types) ?
+			mc_ue_types[evt->u.ue_error.ue_error_type]
+			: "Unknown";
+		if (evt->u.ue_error.effective_address_provided)
+			ea = evt->u.ue_error.effective_address;
+		if (evt->u.ue_error.physical_address_provided)
+			pa = evt->u.ue_error.physical_address;
+		break;
+	case MCE_ERROR_TYPE_SLB:
+		err_type = "SLB";
+		subtype = evt->u.slb_error.slb_error_type <
+			ARRAY_SIZE(mc_slb_types) ?
+			mc_slb_types[evt->u.slb_error.slb_error_type]
+			: "Unknown";
+		if (evt->u.slb_error.effective_address_provided)
+			ea = evt->u.slb_error.effective_address;
+		break;
+	case MCE_ERROR_TYPE_ERAT:
+		err_type = "ERAT";
+		subtype = evt->u.erat_error.erat_error_type <
+			ARRAY_SIZE(mc_erat_types) ?
+			mc_erat_types[evt->u.erat_error.erat_error_type]
+			: "Unknown";
+		if (evt->u.erat_error.effective_address_provided)
+			ea = evt->u.erat_error.effective_address;
+		break;
+	case MCE_ERROR_TYPE_TLB:
+		err_type = "TLB";
+		subtype = evt->u.tlb_error.tlb_error_type <
+			ARRAY_SIZE(mc_tlb_types) ?
+			mc_tlb_types[evt->u.tlb_error.tlb_error_type]
+			: "Unknown";
+		if (evt->u.tlb_error.effective_address_provided)
+			ea = evt->u.tlb_error.effective_address;
+		break;
+	case MCE_ERROR_TYPE_USER:
+		err_type = "User";
+		subtype = evt->u.user_error.user_error_type <
+			ARRAY_SIZE(mc_user_types) ?
+			mc_user_types[evt->u.user_error.user_error_type]
+			: "Unknown";
+		if (evt->u.user_error.effective_address_provided)
+			ea = evt->u.user_error.effective_address;
+		break;
+	case MCE_ERROR_TYPE_RA:
+		err_type = "Real address";
+		subtype = evt->u.ra_error.ra_error_type <
+			ARRAY_SIZE(mc_ra_types) ?
+			mc_ra_types[evt->u.ra_error.ra_error_type]
+			: "Unknown";
+		if (evt->u.ra_error.effective_address_provided)
+			ea = evt->u.ra_error.effective_address;
+		break;
+	case MCE_ERROR_TYPE_LINK:
+		err_type = "Link";
+		subtype = evt->u.link_error.link_error_type <
+			ARRAY_SIZE(mc_link_types) ?
+			mc_link_types[evt->u.link_error.link_error_type]
+			: "Unknown";
+		if (evt->u.link_error.effective_address_provided)
+			ea = evt->u.link_error.effective_address;
+		break;
+	case MCE_ERROR_TYPE_DCACHE:
+		err_type = "D-Cache";
+		subtype = "Unknown";
+		break;
+	case MCE_ERROR_TYPE_ICACHE:
+		err_type = "I-Cache";
+		subtype = "Unknown";
+		break;
+	default:
+	case MCE_ERROR_TYPE_UNKNOWN:
+		err_type = "Unknown";
+		subtype = "";
+		break;
+	}
+
+	dar_str[0] = pa_str[0] = '\0';
+	if (ea && evt->srr0 != ea) {
+		/* Load/Store address */
+		n = sprintf(dar_str, "DAR: %016llx ", ea);
+		if (pa)
+			sprintf(dar_str + n, "paddr: %016llx ", pa);
+	} else if (pa) {
+		sprintf(pa_str, " paddr: %016llx", pa);
+	}
+
+	printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
+		level, evt->cpu, sevstr, in_guest ? "Guest" : "",
+		err_type, subtype, dar_str,
+		evt->disposition == MCE_DISPOSITION_RECOVERED ?
+		"Recovered" : "Not recovered");
+
+	if (in_guest || user_mode) {
+		printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
+			level, evt->cpu, current->pid, current->comm,
+			in_guest ? "Guest " : "", evt->srr0, pa_str);
+	} else {
+		printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
+			level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
+	}
+
+	printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
+
+	subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
+		mc_error_class[evt->error_class] : "Unknown";
+	printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	/* Display faulty slb contents for SLB errors. */
+	if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest)
+		slb_dump_contents(local_paca->mce_faulty_slbs);
+#endif
+}
+EXPORT_SYMBOL_GPL(machine_check_print_event_info);
+
+/*
+ * This function is called in real mode. Strictly no printk's please.
+ *
+ * regs->nip and regs->msr contains srr0 and ssr1.
+ */
+DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early)
+{
+	long handled = 0;
+
+	hv_nmi_check_nonrecoverable(regs);
+
+	/*
+	 * See if platform is capable of handling machine check.
+	 */
+	if (ppc_md.machine_check_early)
+		handled = ppc_md.machine_check_early(regs);
+
+	return handled;
+}
+
+/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
+static enum {
+	DTRIG_UNKNOWN,
+	DTRIG_VECTOR_CI,	/* need to emulate vector CI load instr */
+	DTRIG_SUSPEND_ESCAPE,	/* need to escape from TM suspend mode */
+} hmer_debug_trig_function;
+
+static int init_debug_trig_function(void)
+{
+	int pvr;
+	struct device_node *cpun;
+	struct property *prop = NULL;
+	const char *str;
+
+	/* First look in the device tree */
+	preempt_disable();
+	cpun = of_get_cpu_node(smp_processor_id(), NULL);
+	if (cpun) {
+		of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
+					    prop, str) {
+			if (strcmp(str, "bit17-vector-ci-load") == 0)
+				hmer_debug_trig_function = DTRIG_VECTOR_CI;
+			else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
+				hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
+		}
+		of_node_put(cpun);
+	}
+	preempt_enable();
+
+	/* If we found the property, don't look at PVR */
+	if (prop)
+		goto out;
+
+	pvr = mfspr(SPRN_PVR);
+	/* Check for POWER9 Nimbus (scale-out) */
+	if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
+		/* DD2.2 and later */
+		if ((pvr & 0xfff) >= 0x202)
+			hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
+		/* DD2.0 and DD2.1 - used for vector CI load emulation */
+		else if ((pvr & 0xfff) >= 0x200)
+			hmer_debug_trig_function = DTRIG_VECTOR_CI;
+	}
+
+ out:
+	switch (hmer_debug_trig_function) {
+	case DTRIG_VECTOR_CI:
+		pr_debug("HMI debug trigger used for vector CI load\n");
+		break;
+	case DTRIG_SUSPEND_ESCAPE:
+		pr_debug("HMI debug trigger used for TM suspend escape\n");
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+__initcall(init_debug_trig_function);
+
+/*
+ * Handle HMIs that occur as a result of a debug trigger.
+ * Return values:
+ * -1 means this is not a HMI cause that we know about
+ *  0 means no further handling is required
+ *  1 means further handling is required
+ */
+long hmi_handle_debugtrig(struct pt_regs *regs)
+{
+	unsigned long hmer = mfspr(SPRN_HMER);
+	long ret = 0;
+
+	/* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
+	if (!((hmer & HMER_DEBUG_TRIG)
+	      && hmer_debug_trig_function != DTRIG_UNKNOWN))
+		return -1;
+		
+	hmer &= ~HMER_DEBUG_TRIG;
+	/* HMER is a write-AND register */
+	mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
+
+	switch (hmer_debug_trig_function) {
+	case DTRIG_VECTOR_CI:
+		/*
+		 * Now to avoid problems with soft-disable we
+		 * only do the emulation if we are coming from
+		 * host user space
+		 */
+		if (regs && user_mode(regs))
+			ret = local_paca->hmi_p9_special_emu = 1;
+
+		break;
+
+	default:
+		break;
+	}
+
+	/*
+	 * See if any other HMI causes remain to be handled
+	 */
+	if (hmer & mfspr(SPRN_HMEER))
+		return -1;
+
+	return ret;
+}
+
+/*
+ * Return values:
+ */
+DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode)
+{	
+	int ret;
+
+	local_paca->hmi_irqs++;
+
+	ret = hmi_handle_debugtrig(regs);
+	if (ret >= 0)
+		return ret;
+
+	wait_for_subcore_guest_exit();
+
+	if (ppc_md.hmi_exception_early)
+		ppc_md.hmi_exception_early(regs);
+
+	wait_for_tb_resync();
+
+	return 1;
+}
+
+void __init mce_init(void)
+{
+	struct mce_info *mce_info;
+	u64 limit;
+	int i;
+
+	limit = min(ppc64_bolted_size(), ppc64_rma_size);
+	for_each_possible_cpu(i) {
+		mce_info = memblock_alloc_try_nid(sizeof(*mce_info),
+						  __alignof__(*mce_info),
+						  MEMBLOCK_LOW_LIMIT,
+						  limit, early_cpu_to_node(i));
+		if (!mce_info)
+			goto err;
+		paca_ptrs[i]->mce_info = mce_info;
+	}
+	return;
+err:
+	panic("Failed to allocate memory for MCE event data\n");
+}
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
new file mode 100644
index 0000000000..71e8f2a92e
--- /dev/null
+++ b/arch/powerpc/kernel/mce_power.c
@@ -0,0 +1,791 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Machine check exception handling CPU-side for power7 and power8
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "mce_power: " fmt
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/extable.h>
+#include <linux/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/mce.h>
+#include <asm/machdep.h>
+#include <asm/pte-walk.h>
+#include <asm/sstep.h>
+#include <asm/exception-64s.h>
+#include <asm/extable.h>
+#include <asm/inst.h>
+
+/*
+ * Convert an address related to an mm to a PFN. NOTE: we are in real
+ * mode, we could potentially race with page table updates.
+ */
+unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
+{
+	pte_t *ptep, pte;
+	unsigned int shift;
+	unsigned long pfn, flags;
+	struct mm_struct *mm;
+
+	if (user_mode(regs))
+		mm = current->mm;
+	else
+		mm = &init_mm;
+
+	local_irq_save(flags);
+	ptep = __find_linux_pte(mm->pgd, addr, NULL, &shift);
+	if (!ptep) {
+		pfn = ULONG_MAX;
+		goto out;
+	}
+	pte = READ_ONCE(*ptep);
+
+	if (!pte_present(pte) || pte_special(pte)) {
+		pfn = ULONG_MAX;
+		goto out;
+	}
+
+	if (shift <= PAGE_SHIFT)
+		pfn = pte_pfn(pte);
+	else {
+		unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
+		pfn = pte_pfn(__pte(pte_val(pte) | (addr & rpnmask)));
+	}
+out:
+	local_irq_restore(flags);
+	return pfn;
+}
+
+static bool mce_in_guest(void)
+{
+#ifdef CONFIG_KVM_BOOK3S_HANDLER
+	/*
+	 * If machine check is hit when in guest context or low level KVM
+	 * code, avoid looking up any translations or making any attempts
+	 * to recover, just record the event and pass to KVM.
+	 */
+	if (get_paca()->kvm_hstate.in_guest)
+		return true;
+#endif
+	return false;
+}
+
+/* flush SLBs and reload */
+#ifdef CONFIG_PPC_64S_HASH_MMU
+void flush_and_reload_slb(void)
+{
+	if (early_radix_enabled())
+		return;
+
+	/* Invalidate all SLBs */
+	slb_flush_all_realmode();
+
+	/*
+	 * This probably shouldn't happen, but it may be possible it's
+	 * called in early boot before SLB shadows are allocated.
+	 */
+	if (!get_slb_shadow())
+		return;
+
+	slb_restore_bolted_realmode();
+}
+#endif
+
+void flush_erat(void)
+{
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
+		flush_and_reload_slb();
+		return;
+	}
+#endif
+	asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
+}
+
+#define MCE_FLUSH_SLB 1
+#define MCE_FLUSH_TLB 2
+#define MCE_FLUSH_ERAT 3
+
+static int mce_flush(int what)
+{
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	if (what == MCE_FLUSH_SLB) {
+		flush_and_reload_slb();
+		return 1;
+	}
+#endif
+	if (what == MCE_FLUSH_ERAT) {
+		flush_erat();
+		return 1;
+	}
+	if (what == MCE_FLUSH_TLB) {
+		tlbiel_all();
+		return 1;
+	}
+
+	return 0;
+}
+
+#define SRR1_MC_LOADSTORE(srr1)	((srr1) & PPC_BIT(42))
+
+struct mce_ierror_table {
+	unsigned long srr1_mask;
+	unsigned long srr1_value;
+	bool nip_valid; /* nip is a valid indicator of faulting address */
+	unsigned int error_type;
+	unsigned int error_subtype;
+	unsigned int error_class;
+	unsigned int initiator;
+	unsigned int severity;
+	bool sync_error;
+};
+
+static const struct mce_ierror_table mce_p7_ierror_table[] = {
+{ 0x00000000001c0000, 0x0000000000040000, true,
+  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000001c0000, 0x0000000000080000, true,
+  MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000001c0000, 0x00000000000c0000, true,
+  MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
+{ 0x00000000001c0000, 0x0000000000100000, true,
+  MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
+  MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
+{ 0x00000000001c0000, 0x0000000000140000, true,
+  MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
+{ 0x00000000001c0000, 0x0000000000180000, true,
+  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000001c0000, 0x00000000001c0000, true,
+  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
+
+static const struct mce_ierror_table mce_p8_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+  MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+  MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+  MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+  MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
+  MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000001c0000, true,
+  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008000000, true,
+  MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008040000, true,
+  MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
+  MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
+
+static const struct mce_ierror_table mce_p9_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+  MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+  MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+  MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+  MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000001c0000, true,
+  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008000000, true,
+  MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008040000, true,
+  MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
+  MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000080c0000, true,
+  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008100000, true,
+  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008140000, false,
+  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_FATAL, false }, /* ASYNC is fatal */
+{ 0x00000000081c0000, 0x0000000008180000, false,
+  MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT,
+  MCE_INITIATOR_CPU,  MCE_SEV_FATAL, false }, /* ASYNC is fatal */
+{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE,
+  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
+
+static const struct mce_ierror_table mce_p10_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+  MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+  MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+  MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+  MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000001c0000, true,
+  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008080000, true,
+  MCE_ERROR_TYPE_USER,MCE_USER_ERROR_SCV, MCE_ECLASS_SOFTWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x00000000080c0000, true,
+  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008100000, true,
+  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008140000, false,
+  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_FATAL, false }, /* ASYNC is fatal */
+{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE,
+  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
+
+struct mce_derror_table {
+	unsigned long dsisr_value;
+	bool dar_valid; /* dar is a valid indicator of faulting address */
+	unsigned int error_type;
+	unsigned int error_subtype;
+	unsigned int error_class;
+	unsigned int initiator;
+	unsigned int severity;
+	bool sync_error;
+};
+
+static const struct mce_derror_table mce_p7_derror_table[] = {
+{ 0x00008000, false,
+  MCE_ERROR_TYPE_UE,   MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00004000, true,
+  MCE_ERROR_TYPE_UE,   MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+  MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00000800, true,
+  MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000400, true,
+  MCE_ERROR_TYPE_TLB,  MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000080, true,
+  MCE_ERROR_TYPE_SLB,  MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000100, true,
+  MCE_ERROR_TYPE_SLB,  MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00000040, true,
+  MCE_ERROR_TYPE_SLB,  MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
+  MCE_ECLASS_HARD_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
+
+static const struct mce_derror_table mce_p8_derror_table[] = {
+{ 0x00008000, false,
+  MCE_ERROR_TYPE_UE,   MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00004000, true,
+  MCE_ERROR_TYPE_UE,   MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+  MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00002000, true,
+  MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00001000, true,
+  MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
+  MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00000800, true,
+  MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000400, true,
+  MCE_ERROR_TYPE_TLB,  MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000200, true,
+  MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */
+  MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000080, true,
+  MCE_ERROR_TYPE_SLB,  MCE_SLB_ERROR_MULTIHIT,	/* Before PARITY */
+  MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000100, true,
+  MCE_ERROR_TYPE_SLB,  MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
+
+static const struct mce_derror_table mce_p9_derror_table[] = {
+{ 0x00008000, false,
+  MCE_ERROR_TYPE_UE,   MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00004000, true,
+  MCE_ERROR_TYPE_UE,   MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+  MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00002000, true,
+  MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00001000, true,
+  MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
+  MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00000800, true,
+  MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000400, true,
+  MCE_ERROR_TYPE_TLB,  MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000200, false,
+  MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000080, true,
+  MCE_ERROR_TYPE_SLB,  MCE_SLB_ERROR_MULTIHIT,	/* Before PARITY */
+  MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000100, true,
+  MCE_ERROR_TYPE_SLB,  MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00000040, true,
+  MCE_ERROR_TYPE_RA,   MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00000020, false,
+  MCE_ERROR_TYPE_RA,   MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+  MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00000010, false,
+  MCE_ERROR_TYPE_RA,   MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN,
+  MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00000008, false,
+  MCE_ERROR_TYPE_RA,   MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
+
+static const struct mce_derror_table mce_p10_derror_table[] = {
+{ 0x00008000, false,
+  MCE_ERROR_TYPE_UE,   MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00004000, true,
+  MCE_ERROR_TYPE_UE,   MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+  MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00000800, true,
+  MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000400, true,
+  MCE_ERROR_TYPE_TLB,  MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000200, false,
+  MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000080, true,
+  MCE_ERROR_TYPE_SLB,  MCE_SLB_ERROR_MULTIHIT,	/* Before PARITY */
+  MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000100, true,
+  MCE_ERROR_TYPE_SLB,  MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00000040, true,
+  MCE_ERROR_TYPE_RA,   MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00000020, false,
+  MCE_ERROR_TYPE_RA,   MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+  MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00000010, false,
+  MCE_ERROR_TYPE_RA,   MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN,
+  MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00000008, false,
+  MCE_ERROR_TYPE_RA,   MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
+
+static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
+					uint64_t *phys_addr)
+{
+	/*
+	 * Carefully look at the NIP to determine
+	 * the instruction to analyse. Reading the NIP
+	 * in real-mode is tricky and can lead to recursive
+	 * faults
+	 */
+	ppc_inst_t instr;
+	unsigned long pfn, instr_addr;
+	struct instruction_op op;
+	struct pt_regs tmp = *regs;
+
+	pfn = addr_to_pfn(regs, regs->nip);
+	if (pfn != ULONG_MAX) {
+		instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK);
+		instr = ppc_inst_read((u32 *)instr_addr);
+		if (!analyse_instr(&op, &tmp, instr)) {
+			pfn = addr_to_pfn(regs, op.ea);
+			*addr = op.ea;
+			*phys_addr = (pfn << PAGE_SHIFT);
+			return 0;
+		}
+		/*
+		 * analyse_instr() might fail if the instruction
+		 * is not a load/store, although this is unexpected
+		 * for load/store errors or if we got the NIP
+		 * wrong
+		 */
+	}
+	*addr = 0;
+	return -1;
+}
+
+static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1,
+		const struct mce_ierror_table table[],
+		struct mce_error_info *mce_err, uint64_t *addr,
+		uint64_t *phys_addr)
+{
+	int handled = 0;
+	int i;
+
+	*addr = 0;
+
+	for (i = 0; table[i].srr1_mask; i++) {
+		if ((srr1 & table[i].srr1_mask) != table[i].srr1_value)
+			continue;
+
+		if (!mce_in_guest()) {
+			/* attempt to correct the error */
+			switch (table[i].error_type) {
+			case MCE_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
+				if (local_paca->in_mce == 1)
+					slb_save_contents(local_paca->mce_faulty_slbs);
+#endif
+				handled = mce_flush(MCE_FLUSH_SLB);
+				break;
+			case MCE_ERROR_TYPE_ERAT:
+				handled = mce_flush(MCE_FLUSH_ERAT);
+				break;
+			case MCE_ERROR_TYPE_TLB:
+				handled = mce_flush(MCE_FLUSH_TLB);
+				break;
+			}
+		}
+
+		/* now fill in mce_error_info */
+		mce_err->error_type = table[i].error_type;
+		mce_err->error_class = table[i].error_class;
+		switch (table[i].error_type) {
+		case MCE_ERROR_TYPE_UE:
+			mce_err->u.ue_error_type = table[i].error_subtype;
+			break;
+		case MCE_ERROR_TYPE_SLB:
+			mce_err->u.slb_error_type = table[i].error_subtype;
+			break;
+		case MCE_ERROR_TYPE_ERAT:
+			mce_err->u.erat_error_type = table[i].error_subtype;
+			break;
+		case MCE_ERROR_TYPE_TLB:
+			mce_err->u.tlb_error_type = table[i].error_subtype;
+			break;
+		case MCE_ERROR_TYPE_USER:
+			mce_err->u.user_error_type = table[i].error_subtype;
+			break;
+		case MCE_ERROR_TYPE_RA:
+			mce_err->u.ra_error_type = table[i].error_subtype;
+			break;
+		case MCE_ERROR_TYPE_LINK:
+			mce_err->u.link_error_type = table[i].error_subtype;
+			break;
+		}
+		mce_err->sync_error = table[i].sync_error;
+		mce_err->severity = table[i].severity;
+		mce_err->initiator = table[i].initiator;
+		if (table[i].nip_valid && !mce_in_guest()) {
+			*addr = regs->nip;
+			if (mce_err->sync_error &&
+				table[i].error_type == MCE_ERROR_TYPE_UE) {
+				unsigned long pfn;
+
+				if (get_paca()->in_mce < MAX_MCE_DEPTH) {
+					pfn = addr_to_pfn(regs, regs->nip);
+					if (pfn != ULONG_MAX) {
+						*phys_addr =
+							(pfn << PAGE_SHIFT);
+					}
+				}
+			}
+		}
+		return handled;
+	}
+
+	mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
+	mce_err->error_class = MCE_ECLASS_UNKNOWN;
+	mce_err->severity = MCE_SEV_SEVERE;
+	mce_err->initiator = MCE_INITIATOR_CPU;
+	mce_err->sync_error = true;
+
+	return 0;
+}
+
+static int mce_handle_derror(struct pt_regs *regs,
+		const struct mce_derror_table table[],
+		struct mce_error_info *mce_err, uint64_t *addr,
+		uint64_t *phys_addr)
+{
+	uint64_t dsisr = regs->dsisr;
+	int handled = 0;
+	int found = 0;
+	int i;
+
+	*addr = 0;
+
+	for (i = 0; table[i].dsisr_value; i++) {
+		if (!(dsisr & table[i].dsisr_value))
+			continue;
+
+		if (!mce_in_guest()) {
+			/* attempt to correct the error */
+			switch (table[i].error_type) {
+			case MCE_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
+				if (local_paca->in_mce == 1)
+					slb_save_contents(local_paca->mce_faulty_slbs);
+#endif
+				if (mce_flush(MCE_FLUSH_SLB))
+					handled = 1;
+				break;
+			case MCE_ERROR_TYPE_ERAT:
+				if (mce_flush(MCE_FLUSH_ERAT))
+					handled = 1;
+				break;
+			case MCE_ERROR_TYPE_TLB:
+				if (mce_flush(MCE_FLUSH_TLB))
+					handled = 1;
+				break;
+			}
+		}
+
+		/*
+		 * Attempt to handle multiple conditions, but only return
+		 * one. Ensure uncorrectable errors are first in the table
+		 * to match.
+		 */
+		if (found)
+			continue;
+
+		/* now fill in mce_error_info */
+		mce_err->error_type = table[i].error_type;
+		mce_err->error_class = table[i].error_class;
+		switch (table[i].error_type) {
+		case MCE_ERROR_TYPE_UE:
+			mce_err->u.ue_error_type = table[i].error_subtype;
+			break;
+		case MCE_ERROR_TYPE_SLB:
+			mce_err->u.slb_error_type = table[i].error_subtype;
+			break;
+		case MCE_ERROR_TYPE_ERAT:
+			mce_err->u.erat_error_type = table[i].error_subtype;
+			break;
+		case MCE_ERROR_TYPE_TLB:
+			mce_err->u.tlb_error_type = table[i].error_subtype;
+			break;
+		case MCE_ERROR_TYPE_USER:
+			mce_err->u.user_error_type = table[i].error_subtype;
+			break;
+		case MCE_ERROR_TYPE_RA:
+			mce_err->u.ra_error_type = table[i].error_subtype;
+			break;
+		case MCE_ERROR_TYPE_LINK:
+			mce_err->u.link_error_type = table[i].error_subtype;
+			break;
+		}
+		mce_err->sync_error = table[i].sync_error;
+		mce_err->severity = table[i].severity;
+		mce_err->initiator = table[i].initiator;
+		if (table[i].dar_valid)
+			*addr = regs->dar;
+		else if (mce_err->sync_error && !mce_in_guest() &&
+				table[i].error_type == MCE_ERROR_TYPE_UE) {
+			/*
+			 * We do a maximum of 4 nested MCE calls, see
+			 * kernel/exception-64s.h
+			 */
+			if (get_paca()->in_mce < MAX_MCE_DEPTH)
+				mce_find_instr_ea_and_phys(regs, addr,
+							   phys_addr);
+		}
+		found = 1;
+	}
+
+	if (found)
+		return handled;
+
+	mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
+	mce_err->error_class = MCE_ECLASS_UNKNOWN;
+	mce_err->severity = MCE_SEV_SEVERE;
+	mce_err->initiator = MCE_INITIATOR_CPU;
+	mce_err->sync_error = true;
+
+	return 0;
+}
+
+static long mce_handle_ue_error(struct pt_regs *regs,
+				struct mce_error_info *mce_err)
+{
+	if (mce_in_guest())
+		return 0;
+
+	mce_common_process_ue(regs, mce_err);
+	if (mce_err->ignore_event)
+		return 1;
+
+	/*
+	 * On specific SCOM read via MMIO we may get a machine check
+	 * exception with SRR0 pointing inside opal. If that is the
+	 * case OPAL may have recovery address to re-read SCOM data in
+	 * different way and hence we can recover from this MC.
+	 */
+
+	if (ppc_md.mce_check_early_recovery) {
+		if (ppc_md.mce_check_early_recovery(regs))
+			return 1;
+	}
+
+	return 0;
+}
+
+static long mce_handle_error(struct pt_regs *regs,
+		unsigned long srr1,
+		const struct mce_derror_table dtable[],
+		const struct mce_ierror_table itable[])
+{
+	struct mce_error_info mce_err = { 0 };
+	uint64_t addr, phys_addr = ULONG_MAX;
+	long handled;
+
+	if (SRR1_MC_LOADSTORE(srr1))
+		handled = mce_handle_derror(regs, dtable, &mce_err, &addr,
+				&phys_addr);
+	else
+		handled = mce_handle_ierror(regs, srr1, itable, &mce_err, &addr,
+				&phys_addr);
+
+	if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
+		handled = mce_handle_ue_error(regs, &mce_err);
+
+	save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
+
+	return handled;
+}
+
+long __machine_check_early_realmode_p7(struct pt_regs *regs)
+{
+	/* P7 DD1 leaves top bits of DSISR undefined */
+	regs->dsisr &= 0x0000ffff;
+
+	return mce_handle_error(regs, regs->msr,
+			mce_p7_derror_table, mce_p7_ierror_table);
+}
+
+long __machine_check_early_realmode_p8(struct pt_regs *regs)
+{
+	return mce_handle_error(regs, regs->msr,
+			mce_p8_derror_table, mce_p8_ierror_table);
+}
+
+long __machine_check_early_realmode_p9(struct pt_regs *regs)
+{
+	unsigned long srr1 = regs->msr;
+
+	/*
+	 * On POWER9 DD2.1 and below, it's possible to get a machine check
+	 * caused by a paste instruction where only DSISR bit 25 is set. This
+	 * will result in the MCE handler seeing an unknown event and the kernel
+	 * crashing. An MCE that occurs like this is spurious, so we don't need
+	 * to do anything in terms of servicing it. If there is something that
+	 * needs to be serviced, the CPU will raise the MCE again with the
+	 * correct DSISR so that it can be serviced properly. So detect this
+	 * case and mark it as handled.
+	 */
+	if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000)
+		return 1;
+
+	/*
+	 * Async machine check due to bad real address from store or foreign
+	 * link time out comes with the load/store bit (PPC bit 42) set in
+	 * SRR1, but the cause comes in SRR1 not DSISR. Clear bit 42 so we're
+	 * directed to the ierror table so it will find the cause (which
+	 * describes it correctly as a store error).
+	 */
+	if (SRR1_MC_LOADSTORE(srr1) &&
+			((srr1 & 0x081c0000) == 0x08140000 ||
+			 (srr1 & 0x081c0000) == 0x08180000)) {
+		srr1 &= ~PPC_BIT(42);
+	}
+
+	return mce_handle_error(regs, srr1,
+			mce_p9_derror_table, mce_p9_ierror_table);
+}
+
+long __machine_check_early_realmode_p10(struct pt_regs *regs)
+{
+	unsigned long srr1 = regs->msr;
+
+	/*
+	 * Async machine check due to bad real address from store comes with
+	 * the load/store bit (PPC bit 42) set in SRR1, but the cause comes in
+	 * SRR1 not DSISR. Clear bit 42 so we're directed to the ierror table
+	 * so it will find the cause (which describes it correctly as a store
+	 * error).
+	 */
+	if (SRR1_MC_LOADSTORE(srr1) &&
+			(srr1 & 0x081c0000) == 0x08140000) {
+		srr1 &= ~PPC_BIT(42);
+	}
+
+	return mce_handle_error(regs, srr1,
+			mce_p10_derror_table, mce_p10_ierror_table);
+}
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
new file mode 100644
index 0000000000..29e1440d14
--- /dev/null
+++ b/arch/powerpc/kernel/misc.S
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains miscellaneous low-level functions.
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
+ * and Paul Mackerras.
+ *
+ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
+ *
+ * setjmp/longjmp code by Paul Mackerras.
+ */
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/unistd.h>
+#include <asm/asm-compat.h>
+#include <asm/asm-offsets.h>
+
+	.text
+
+/*
+ * Returns (address we are running at) - (address we were linked at)
+ * for use before the text and data are mapped to KERNELBASE.
+
+ * add_reloc_offset(x) returns x + reloc_offset().
+ */
+
+_GLOBAL(reloc_offset)
+	li	r3, 0
+_GLOBAL(add_reloc_offset)
+	mflr	r0
+	bcl	20,31,$+4
+1:	mflr	r5
+	PPC_LL	r4,(2f-1b)(r5)
+	subf	r5,r4,r5
+	add	r3,r3,r5
+	mtlr	r0
+	blr
+_ASM_NOKPROBE_SYMBOL(reloc_offset)
+_ASM_NOKPROBE_SYMBOL(add_reloc_offset)
+
+	.align	3
+2:	PPC_LONG 1b
+
+_GLOBAL(setjmp)
+	mflr	r0
+	PPC_STL	r0,0(r3)
+	PPC_STL	r1,SZL(r3)
+	PPC_STL	r2,2*SZL(r3)
+#ifdef CONFIG_PPC32
+	mfcr	r12
+	stmw	r12, 3*SZL(r3)
+#else
+	mfcr	r0
+	PPC_STL	r0,3*SZL(r3)
+	PPC_STL	r13,4*SZL(r3)
+	PPC_STL	r14,5*SZL(r3)
+	PPC_STL	r15,6*SZL(r3)
+	PPC_STL	r16,7*SZL(r3)
+	PPC_STL	r17,8*SZL(r3)
+	PPC_STL	r18,9*SZL(r3)
+	PPC_STL	r19,10*SZL(r3)
+	PPC_STL	r20,11*SZL(r3)
+	PPC_STL	r21,12*SZL(r3)
+	PPC_STL	r22,13*SZL(r3)
+	PPC_STL	r23,14*SZL(r3)
+	PPC_STL	r24,15*SZL(r3)
+	PPC_STL	r25,16*SZL(r3)
+	PPC_STL	r26,17*SZL(r3)
+	PPC_STL	r27,18*SZL(r3)
+	PPC_STL	r28,19*SZL(r3)
+	PPC_STL	r29,20*SZL(r3)
+	PPC_STL	r30,21*SZL(r3)
+	PPC_STL	r31,22*SZL(r3)
+#endif
+	li	r3,0
+	blr
+
+_GLOBAL(longjmp)
+#ifdef CONFIG_PPC32
+	lmw	r12, 3*SZL(r3)
+	mtcrf	0x38, r12
+#else
+	PPC_LL	r13,4*SZL(r3)
+	PPC_LL	r14,5*SZL(r3)
+	PPC_LL	r15,6*SZL(r3)
+	PPC_LL	r16,7*SZL(r3)
+	PPC_LL	r17,8*SZL(r3)
+	PPC_LL	r18,9*SZL(r3)
+	PPC_LL	r19,10*SZL(r3)
+	PPC_LL	r20,11*SZL(r3)
+	PPC_LL	r21,12*SZL(r3)
+	PPC_LL	r22,13*SZL(r3)
+	PPC_LL	r23,14*SZL(r3)
+	PPC_LL	r24,15*SZL(r3)
+	PPC_LL	r25,16*SZL(r3)
+	PPC_LL	r26,17*SZL(r3)
+	PPC_LL	r27,18*SZL(r3)
+	PPC_LL	r28,19*SZL(r3)
+	PPC_LL	r29,20*SZL(r3)
+	PPC_LL	r30,21*SZL(r3)
+	PPC_LL	r31,22*SZL(r3)
+	PPC_LL	r0,3*SZL(r3)
+	mtcrf	0x38,r0
+#endif
+	PPC_LL	r0,0(r3)
+	PPC_LL	r1,SZL(r3)
+	PPC_LL	r2,2*SZL(r3)
+	mtlr	r0
+	mr.	r3, r4
+	bnelr
+	li	r3, 1
+	blr
+
+_GLOBAL(current_stack_frame)
+	PPC_LL	r3,0(r1)
+	blr
+EXPORT_SYMBOL(current_stack_frame)
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
new file mode 100644
index 0000000000..2eabb15687
--- /dev/null
+++ b/arch/powerpc/kernel/misc_32.S
@@ -0,0 +1,390 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains miscellaneous low-level functions.
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
+ * and Paul Mackerras.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/sys.h>
+#include <asm/unistd.h>
+#include <asm/errno.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/cputable.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/processor.h>
+#include <asm/bug.h>
+#include <asm/ptrace.h>
+#include <asm/feature-fixups.h>
+
+	.text
+
+/*
+ * This returns the high 64 bits of the product of two 64-bit numbers.
+ */
+_GLOBAL(mulhdu)
+	cmpwi	r6,0
+	cmpwi	cr1,r3,0
+	mr	r10,r4
+	mulhwu	r4,r4,r5
+	beq	1f
+	mulhwu	r0,r10,r6
+	mullw	r7,r10,r5
+	addc	r7,r0,r7
+	addze	r4,r4
+1:	beqlr	cr1		/* all done if high part of A is 0 */
+	mullw	r9,r3,r5
+	mulhwu	r10,r3,r5
+	beq	2f
+	mullw	r0,r3,r6
+	mulhwu	r8,r3,r6
+	addc	r7,r0,r7
+	adde	r4,r4,r8
+	addze	r10,r10
+2:	addc	r4,r4,r9
+	addze	r3,r10
+	blr
+
+/*
+ * reloc_got2 runs through the .got2 section adding an offset
+ * to each entry.
+ */
+_GLOBAL(reloc_got2)
+	mflr	r11
+	lis	r7,__got2_start@ha
+	addi	r7,r7,__got2_start@l
+	lis	r8,__got2_end@ha
+	addi	r8,r8,__got2_end@l
+	subf	r8,r7,r8
+	srwi.	r8,r8,2
+	beqlr
+	mtctr	r8
+	bcl	20,31,$+4
+1:	mflr	r0
+	lis	r4,1b@ha
+	addi	r4,r4,1b@l
+	subf	r0,r4,r0
+	add	r7,r0,r7
+2:	lwz	r0,0(r7)
+	add	r0,r0,r3
+	stw	r0,0(r7)
+	addi	r7,r7,4
+	bdnz	2b
+	mtlr	r11
+	blr
+
+/*
+ * call_setup_cpu - call the setup_cpu function for this cpu
+ * r3 = data offset, r24 = cpu number
+ *
+ * Setup function is called with:
+ *   r3 = data offset
+ *   r4 = ptr to CPU spec (relocated)
+ */
+_GLOBAL(call_setup_cpu)
+	addis	r4,r3,cur_cpu_spec@ha
+	addi	r4,r4,cur_cpu_spec@l
+	lwz	r4,0(r4)
+	add	r4,r4,r3
+	lwz	r5,CPU_SPEC_SETUP(r4)
+	cmpwi	0,r5,0
+	add	r5,r5,r3
+	beqlr
+	mtctr	r5
+	bctr
+
+#if defined(CONFIG_CPU_FREQ_PMAC) && defined(CONFIG_PPC_BOOK3S_32)
+
+/* This gets called by via-pmu.c to switch the PLL selection
+ * on 750fx CPU. This function should really be moved to some
+ * other place (as most of the cpufreq code in via-pmu
+ */
+_GLOBAL(low_choose_750fx_pll)
+	/* Clear MSR:EE */
+	mfmsr	r7
+	rlwinm	r0,r7,0,17,15
+	mtmsr	r0
+
+	/* If switching to PLL1, disable HID0:BTIC */
+	cmplwi	cr0,r3,0
+	beq	1f
+	mfspr	r5,SPRN_HID0
+	rlwinm	r5,r5,0,27,25
+	sync
+	mtspr	SPRN_HID0,r5
+	isync
+	sync
+
+1:
+	/* Calc new HID1 value */
+	mfspr	r4,SPRN_HID1	/* Build a HID1:PS bit from parameter */
+	rlwinm	r5,r3,16,15,15	/* Clear out HID1:PS from value read */
+	rlwinm	r4,r4,0,16,14	/* Could have I used rlwimi here ? */
+	or	r4,r4,r5
+	mtspr	SPRN_HID1,r4
+
+#ifdef CONFIG_SMP
+	/* Store new HID1 image */
+	lwz	r6,TASK_CPU(r2)
+	slwi	r6,r6,2
+#else
+	li	r6, 0
+#endif
+	addis	r6,r6,nap_save_hid1@ha
+	stw	r4,nap_save_hid1@l(r6)
+
+	/* If switching to PLL0, enable HID0:BTIC */
+	cmplwi	cr0,r3,0
+	bne	1f
+	mfspr	r5,SPRN_HID0
+	ori	r5,r5,HID0_BTIC
+	sync
+	mtspr	SPRN_HID0,r5
+	isync
+	sync
+
+1:
+	/* Return */
+	mtmsr	r7
+	blr
+
+_GLOBAL(low_choose_7447a_dfs)
+	/* Clear MSR:EE */
+	mfmsr	r7
+	rlwinm	r0,r7,0,17,15
+	mtmsr	r0
+	
+	/* Calc new HID1 value */
+	mfspr	r4,SPRN_HID1
+	insrwi	r4,r3,1,9	/* insert parameter into bit 9 */
+	sync
+	mtspr	SPRN_HID1,r4
+	sync
+	isync
+
+	/* Return */
+	mtmsr	r7
+	blr
+
+#endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_PPC_BOOK3S_32 */
+
+#ifdef CONFIG_40x
+
+/*
+ * Do an IO access in real mode
+ */
+_GLOBAL(real_readb)
+	mfmsr	r7
+	rlwinm	r0,r7,0,~MSR_DR
+	sync
+	mtmsr	r0
+	sync
+	isync
+	lbz	r3,0(r3)
+	sync
+	mtmsr	r7
+	sync
+	isync
+	blr
+_ASM_NOKPROBE_SYMBOL(real_readb)
+
+	/*
+ * Do an IO access in real mode
+ */
+_GLOBAL(real_writeb)
+	mfmsr	r7
+	rlwinm	r0,r7,0,~MSR_DR
+	sync
+	mtmsr	r0
+	sync
+	isync
+	stb	r3,0(r4)
+	sync
+	mtmsr	r7
+	sync
+	isync
+	blr
+_ASM_NOKPROBE_SYMBOL(real_writeb)
+
+#endif /* CONFIG_40x */
+
+/*
+ * Copy a whole page.  We use the dcbz instruction on the destination
+ * to reduce memory traffic (it eliminates the unnecessary reads of
+ * the destination into cache).  This requires that the destination
+ * is cacheable.
+ */
+#define COPY_16_BYTES		\
+	lwz	r6,4(r4);	\
+	lwz	r7,8(r4);	\
+	lwz	r8,12(r4);	\
+	lwzu	r9,16(r4);	\
+	stw	r6,4(r3);	\
+	stw	r7,8(r3);	\
+	stw	r8,12(r3);	\
+	stwu	r9,16(r3)
+
+_GLOBAL(copy_page)
+	rlwinm	r5, r3, 0, L1_CACHE_BYTES - 1
+	addi	r3,r3,-4
+
+0:	twnei	r5, 0	/* WARN if r3 is not cache aligned */
+	EMIT_WARN_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+
+	addi	r4,r4,-4
+
+	li	r5,4
+
+#if MAX_COPY_PREFETCH > 1
+	li	r0,MAX_COPY_PREFETCH
+	li	r11,4
+	mtctr	r0
+11:	dcbt	r11,r4
+	addi	r11,r11,L1_CACHE_BYTES
+	bdnz	11b
+#else /* MAX_COPY_PREFETCH == 1 */
+	dcbt	r5,r4
+	li	r11,L1_CACHE_BYTES+4
+#endif /* MAX_COPY_PREFETCH */
+	li	r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH
+	crclr	4*cr0+eq
+2:
+	mtctr	r0
+1:
+	dcbt	r11,r4
+	dcbz	r5,r3
+	COPY_16_BYTES
+#if L1_CACHE_BYTES >= 32
+	COPY_16_BYTES
+#if L1_CACHE_BYTES >= 64
+	COPY_16_BYTES
+	COPY_16_BYTES
+#if L1_CACHE_BYTES >= 128
+	COPY_16_BYTES
+	COPY_16_BYTES
+	COPY_16_BYTES
+	COPY_16_BYTES
+#endif
+#endif
+#endif
+	bdnz	1b
+	beqlr
+	crnot	4*cr0+eq,4*cr0+eq
+	li	r0,MAX_COPY_PREFETCH
+	li	r11,4
+	b	2b
+EXPORT_SYMBOL(copy_page)
+
+/*
+ * Extended precision shifts.
+ *
+ * Updated to be valid for shift counts from 0 to 63 inclusive.
+ * -- Gabriel
+ *
+ * R3/R4 has 64 bit value
+ * R5    has shift count
+ * result in R3/R4
+ *
+ *  ashrdi3: arithmetic right shift (sign propagation)	
+ *  lshrdi3: logical right shift
+ *  ashldi3: left shift
+ */
+_GLOBAL(__ashrdi3)
+	subfic	r6,r5,32
+	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
+	addi	r7,r5,32	# could be xori, or addi with -32
+	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
+	rlwinm	r8,r7,0,32	# t3 = (count < 32) ? 32 : 0
+	sraw	r7,r3,r7	# t2 = MSW >> (count-32)
+	or	r4,r4,r6	# LSW |= t1
+	slw	r7,r7,r8	# t2 = (count < 32) ? 0 : t2
+	sraw	r3,r3,r5	# MSW = MSW >> count
+	or	r4,r4,r7	# LSW |= t2
+	blr
+EXPORT_SYMBOL(__ashrdi3)
+
+_GLOBAL(__ashldi3)
+	subfic	r6,r5,32
+	slw	r3,r3,r5	# MSW = count > 31 ? 0 : MSW << count
+	addi	r7,r5,32	# could be xori, or addi with -32
+	srw	r6,r4,r6	# t1 = count > 31 ? 0 : LSW >> (32-count)
+	slw	r7,r4,r7	# t2 = count < 32 ? 0 : LSW << (count-32)
+	or	r3,r3,r6	# MSW |= t1
+	slw	r4,r4,r5	# LSW = LSW << count
+	or	r3,r3,r7	# MSW |= t2
+	blr
+EXPORT_SYMBOL(__ashldi3)
+
+_GLOBAL(__lshrdi3)
+	subfic	r6,r5,32
+	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
+	addi	r7,r5,32	# could be xori, or addi with -32
+	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
+	srw	r7,r3,r7	# t2 = count < 32 ? 0 : MSW >> (count-32)
+	or	r4,r4,r6	# LSW |= t1
+	srw	r3,r3,r5	# MSW = MSW >> count
+	or	r4,r4,r7	# LSW |= t2
+	blr
+EXPORT_SYMBOL(__lshrdi3)
+
+/*
+ * 64-bit comparison: __cmpdi2(s64 a, s64 b)
+ * Returns 0 if a < b, 1 if a == b, 2 if a > b.
+ */
+_GLOBAL(__cmpdi2)
+	cmpw	r3,r5
+	li	r3,1
+	bne	1f
+	cmplw	r4,r6
+	beqlr
+1:	li	r3,0
+	bltlr
+	li	r3,2
+	blr
+EXPORT_SYMBOL(__cmpdi2)
+/*
+ * 64-bit comparison: __ucmpdi2(u64 a, u64 b)
+ * Returns 0 if a < b, 1 if a == b, 2 if a > b.
+ */
+_GLOBAL(__ucmpdi2)
+	cmplw	r3,r5
+	li	r3,1
+	bne	1f
+	cmplw	r4,r6
+	beqlr
+1:	li	r3,0
+	bltlr
+	li	r3,2
+	blr
+EXPORT_SYMBOL(__ucmpdi2)
+
+_GLOBAL(__bswapdi2)
+	rotlwi  r9,r4,8
+	rotlwi  r10,r3,8
+	rlwimi  r9,r4,24,0,7
+	rlwimi  r10,r3,24,0,7
+	rlwimi  r9,r4,24,16,23
+	rlwimi  r10,r3,24,16,23
+	mr      r3,r9
+	mr      r4,r10
+	blr
+EXPORT_SYMBOL(__bswapdi2)
+
+#ifdef CONFIG_SMP
+_GLOBAL(start_secondary_resume)
+	/* Reset stack */
+	rlwinm	r1, r1, 0, 0, 31 - THREAD_SHIFT
+	addi	r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
+	li	r3,0
+	stw	r3,0(r1)		/* Zero the stack frame pointer	*/
+	bl	start_secondary
+	b	.
+#endif /* CONFIG_SMP */
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
new file mode 100644
index 0000000000..1a8cdafd68
--- /dev/null
+++ b/arch/powerpc/kernel/misc_64.S
@@ -0,0 +1,497 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains miscellaneous low-level functions.
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
+ * and Paul Mackerras.
+ * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
+ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
+ */
+
+#include <linux/export.h>
+#include <linux/linkage.h>
+#include <linux/sys.h>
+#include <asm/unistd.h>
+#include <asm/errno.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/kexec.h>
+#include <asm/ptrace.h>
+#include <asm/mmu.h>
+#include <asm/feature-fixups.h>
+
+	.text
+
+_GLOBAL(__bswapdi2)
+EXPORT_SYMBOL(__bswapdi2)
+	srdi	r8,r3,32
+	rlwinm	r7,r3,8,0xffffffff
+	rlwimi	r7,r3,24,0,7
+	rlwinm	r9,r8,8,0xffffffff
+	rlwimi	r7,r3,24,16,23
+	rlwimi	r9,r8,24,0,7
+	rlwimi	r9,r8,24,16,23
+	sldi	r7,r7,32
+	or	r3,r7,r9
+	blr
+
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
+_GLOBAL(rmci_on)
+	sync
+	isync
+	li	r3,0x100
+	rldicl	r3,r3,32,0
+	mfspr	r5,SPRN_HID4
+	or	r5,r5,r3
+	sync
+	mtspr	SPRN_HID4,r5
+	isync
+	slbia
+	isync
+	sync
+	blr
+
+_GLOBAL(rmci_off)
+	sync
+	isync
+	li	r3,0x100
+	rldicl	r3,r3,32,0
+	mfspr	r5,SPRN_HID4
+	andc	r5,r5,r3
+	sync
+	mtspr	SPRN_HID4,r5
+	isync
+	slbia
+	isync
+	sync
+	blr
+#endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
+
+#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE)
+
+/*
+ * Do an IO access in real mode
+ */
+_GLOBAL(real_readb)
+	mfmsr	r7
+	ori	r0,r7,MSR_DR
+	xori	r0,r0,MSR_DR
+	sync
+	mtmsrd	r0
+	sync
+	isync
+	mfspr	r6,SPRN_HID4
+	rldicl	r5,r6,32,0
+	ori	r5,r5,0x100
+	rldicl	r5,r5,32,0
+	sync
+	mtspr	SPRN_HID4,r5
+	isync
+	slbia
+	isync
+	lbz	r3,0(r3)
+	sync
+	mtspr	SPRN_HID4,r6
+	isync
+	slbia
+	isync
+	mtmsrd	r7
+	sync
+	isync
+	blr
+
+	/*
+ * Do an IO access in real mode
+ */
+_GLOBAL(real_writeb)
+	mfmsr	r7
+	ori	r0,r7,MSR_DR
+	xori	r0,r0,MSR_DR
+	sync
+	mtmsrd	r0
+	sync
+	isync
+	mfspr	r6,SPRN_HID4
+	rldicl	r5,r6,32,0
+	ori	r5,r5,0x100
+	rldicl	r5,r5,32,0
+	sync
+	mtspr	SPRN_HID4,r5
+	isync
+	slbia
+	isync
+	stb	r3,0(r4)
+	sync
+	mtspr	SPRN_HID4,r6
+	isync
+	slbia
+	isync
+	mtmsrd	r7
+	sync
+	isync
+	blr
+#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */
+
+#ifdef CONFIG_PPC_PASEMI
+
+_GLOBAL(real_205_readb)
+	mfmsr	r7
+	ori	r0,r7,MSR_DR
+	xori	r0,r0,MSR_DR
+	sync
+	mtmsrd	r0
+	sync
+	isync
+	LBZCIX(R3,R0,R3)
+	isync
+	mtmsrd	r7
+	sync
+	isync
+	blr
+
+_GLOBAL(real_205_writeb)
+	mfmsr	r7
+	ori	r0,r7,MSR_DR
+	xori	r0,r0,MSR_DR
+	sync
+	mtmsrd	r0
+	sync
+	isync
+	STBCIX(R3,R0,R4)
+	isync
+	mtmsrd	r7
+	sync
+	isync
+	blr
+
+#endif /* CONFIG_PPC_PASEMI */
+
+
+#if defined(CONFIG_CPU_FREQ_PMAC64) || defined(CONFIG_CPU_FREQ_MAPLE)
+/*
+ * SCOM access functions for 970 (FX only for now)
+ *
+ * unsigned long scom970_read(unsigned int address);
+ * void scom970_write(unsigned int address, unsigned long value);
+ *
+ * The address passed in is the 24 bits register address. This code
+ * is 970 specific and will not check the status bits, so you should
+ * know what you are doing.
+ */
+_GLOBAL(scom970_read)
+	/* interrupts off */
+	mfmsr	r4
+	ori	r0,r4,MSR_EE
+	xori	r0,r0,MSR_EE
+	mtmsrd	r0,1
+
+	/* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
+	 * (including parity). On current CPUs they must be 0'd,
+	 * and finally or in RW bit
+	 */
+	rlwinm	r3,r3,8,0,15
+	ori	r3,r3,0x8000
+
+	/* do the actual scom read */
+	sync
+	mtspr	SPRN_SCOMC,r3
+	isync
+	mfspr	r3,SPRN_SCOMD
+	isync
+	mfspr	r0,SPRN_SCOMC
+	isync
+
+	/* XXX:	fixup result on some buggy 970's (ouch ! we lost a bit, bah
+	 * that's the best we can do). Not implemented yet as we don't use
+	 * the scom on any of the bogus CPUs yet, but may have to be done
+	 * ultimately
+	 */
+
+	/* restore interrupts */
+	mtmsrd	r4,1
+	blr
+
+
+_GLOBAL(scom970_write)
+	/* interrupts off */
+	mfmsr	r5
+	ori	r0,r5,MSR_EE
+	xori	r0,r0,MSR_EE
+	mtmsrd	r0,1
+
+	/* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
+	 * (including parity). On current CPUs they must be 0'd.
+	 */
+
+	rlwinm	r3,r3,8,0,15
+
+	sync
+	mtspr	SPRN_SCOMD,r4      /* write data */
+	isync
+	mtspr	SPRN_SCOMC,r3      /* write command */
+	isync
+	mfspr	3,SPRN_SCOMC
+	isync
+
+	/* restore interrupts */
+	mtmsrd	r5,1
+	blr
+#endif /* CONFIG_CPU_FREQ_PMAC64 || CONFIG_CPU_FREQ_MAPLE */
+
+/* kexec_wait(phys_cpu)
+ *
+ * wait for the flag to change, indicating this kernel is going away but
+ * the slave code for the next one is at addresses 0 to 100.
+ *
+ * This is used by all slaves, even those that did not find a matching
+ * paca in the secondary startup code.
+ *
+ * Physical (hardware) cpu id should be in r3.
+ */
+_GLOBAL(kexec_wait)
+	bcl	20,31,$+4
+1:	mflr	r5
+	addi	r5,r5,kexec_flag-1b
+
+99:	HMT_LOW
+#ifdef CONFIG_KEXEC_CORE	/* use no memory without kexec */
+	lwz	r4,0(r5)
+	cmpwi	0,r4,0
+	beq	99b
+#ifdef CONFIG_PPC_BOOK3S_64
+	li	r10,0x60
+	mfmsr	r11
+	clrrdi	r11,r11,1	/* Clear MSR_LE */
+	mtsrr0	r10
+	mtsrr1	r11
+	rfid
+#else
+	/* Create TLB entry in book3e_secondary_core_init */
+	li	r4,0
+	ba	0x60
+#endif
+#endif
+
+/* this can be in text because we won't change it until we are
+ * running in real anyways
+ */
+kexec_flag:
+	.long	0
+
+
+#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_PPC_BOOK3E_64
+/*
+ * BOOK3E has no real MMU mode, so we have to setup the initial TLB
+ * for a core to identity map v:0 to p:0.  This current implementation
+ * assumes that 1G is enough for kexec.
+ */
+kexec_create_tlb:
+	/*
+	 * Invalidate all non-IPROT TLB entries to avoid any TLB conflict.
+	 * IPROT TLB entries should be >= PAGE_OFFSET and thus not conflict.
+	 */
+	PPC_TLBILX_ALL(0,R0)
+	sync
+	isync
+
+	mfspr	r10,SPRN_TLB1CFG
+	andi.	r10,r10,TLBnCFG_N_ENTRY	/* Extract # entries */
+	subi	r10,r10,1	/* Last entry: no conflict with kernel text */
+	lis	r9,MAS0_TLBSEL(1)@h
+	rlwimi	r9,r10,16,4,15		/* Setup MAS0 = TLBSEL | ESEL(r9) */
+
+/* Set up a temp identity mapping v:0 to p:0 and return to it. */
+	mtspr	SPRN_MAS0,r9
+
+	lis	r9,(MAS1_VALID|MAS1_IPROT)@h
+	ori	r9,r9,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
+	mtspr	SPRN_MAS1,r9
+
+	LOAD_REG_IMMEDIATE(r9, 0x0 | MAS2_M_IF_NEEDED)
+	mtspr	SPRN_MAS2,r9
+
+	LOAD_REG_IMMEDIATE(r9, 0x0 | MAS3_SR | MAS3_SW | MAS3_SX)
+	mtspr	SPRN_MAS3,r9
+	li	r9,0
+	mtspr	SPRN_MAS7,r9
+
+	tlbwe
+	isync
+	blr
+#endif
+
+/* kexec_smp_wait(void)
+ *
+ * call with interrupts off
+ * note: this is a terminal routine, it does not save lr
+ *
+ * get phys id from paca
+ * switch to real mode
+ * mark the paca as no longer used
+ * join other cpus in kexec_wait(phys_id)
+ */
+_GLOBAL(kexec_smp_wait)
+	lhz	r3,PACAHWCPUID(r13)
+	bl	real_mode
+
+	li	r4,KEXEC_STATE_REAL_MODE
+	stb	r4,PACAKEXECSTATE(r13)
+
+	b	kexec_wait
+
+/*
+ * switch to real mode (turn mmu off)
+ * we use the early kernel trick that the hardware ignores bits
+ * 0 and 1 (big endian) of the effective address in real mode
+ *
+ * don't overwrite r3 here, it is live for kexec_wait above.
+ */
+SYM_FUNC_START_LOCAL(real_mode)	/* assume normal blr return */
+#ifdef CONFIG_PPC_BOOK3E_64
+	/* Create an identity mapping. */
+	b	kexec_create_tlb
+#else
+1:	li	r9,MSR_RI
+	li	r10,MSR_DR|MSR_IR
+	mflr	r11		/* return address to SRR0 */
+	mfmsr	r12
+	andc	r9,r12,r9
+	andc	r10,r12,r10
+
+	mtmsrd	r9,1
+	mtspr	SPRN_SRR1,r10
+	mtspr	SPRN_SRR0,r11
+	rfid
+#endif
+SYM_FUNC_END(real_mode)
+
+/*
+ * kexec_sequence(newstack, start, image, control, clear_all(),
+	          copy_with_mmu_off)
+ *
+ * does the grungy work with stack switching and real mode switches
+ * also does simple calls to other code
+ */
+
+_GLOBAL(kexec_sequence)
+	mflr	r0
+	std	r0,16(r1)
+
+	/* switch stacks to newstack -- &kexec_stack.stack */
+	stdu	r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r3)
+	mr	r1,r3
+
+	li	r0,0
+	std	r0,16(r1)
+
+	/* save regs for local vars on new stack.
+	 * yes, we won't go back, but ...
+	 */
+	std	r31,-8(r1)
+	std	r30,-16(r1)
+	std	r29,-24(r1)
+	std	r28,-32(r1)
+	std	r27,-40(r1)
+	std	r26,-48(r1)
+	std	r25,-56(r1)
+
+	stdu	r1,-STACK_FRAME_MIN_SIZE-64(r1)
+
+	/* save args into preserved regs */
+	mr	r31,r3			/* newstack (both) */
+	mr	r30,r4			/* start (real) */
+	mr	r29,r5			/* image (virt) */
+	mr	r28,r6			/* control, unused */
+	mr	r27,r7			/* clear_all() fn desc */
+	mr	r26,r8			/* copy_with_mmu_off */
+	lhz	r25,PACAHWCPUID(r13)	/* get our phys cpu from paca */
+
+	/* disable interrupts, we are overwriting kernel data next */
+#ifdef CONFIG_PPC_BOOK3E_64
+	wrteei	0
+#else
+	mfmsr	r3
+	rlwinm	r3,r3,0,17,15
+	mtmsrd	r3,1
+#endif
+
+	/* We need to turn the MMU off unless we are in hash mode
+	 * under a hypervisor
+	 */
+	cmpdi	r26,0
+	beq	1f
+	bl	real_mode
+1:
+	/* copy dest pages, flush whole dest image */
+	mr	r3,r29
+	bl	CFUNC(kexec_copy_flush)	/* (image) */
+
+	/* turn off mmu now if not done earlier */
+	cmpdi	r26,0
+	bne	1f
+	bl	real_mode
+
+	/* copy  0x100 bytes starting at start to 0 */
+1:	li	r3,0
+	mr	r4,r30		/* start, aka phys mem offset */
+	li	r5,0x100
+	li	r6,0
+	bl	copy_and_flush	/* (dest, src, copy limit, start offset) */
+1:	/* assume normal blr return */
+
+	/* release other cpus to the new kernel secondary start at 0x60 */
+	mflr	r5
+	li	r6,1
+	stw	r6,kexec_flag-1b(5)
+
+	cmpdi	r27,0
+	beq	1f
+
+	/* clear out hardware hash page table and tlb */
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+	ld	r12,0(r27)		/* deref function descriptor */
+#else
+	mr	r12,r27
+#endif
+	mtctr	r12
+	bctrl				/* mmu_hash_ops.hpte_clear_all(void); */
+
+/*
+ *   kexec image calling is:
+ *      the first 0x100 bytes of the entry point are copied to 0
+ *
+ *      all slaves branch to slave = 0x60 (absolute)
+ *              slave(phys_cpu_id);
+ *
+ *      master goes to start = entry point
+ *              start(phys_cpu_id, start, 0);
+ *
+ *
+ *   a wrapper is needed to call existing kernels, here is an approximate
+ *   description of one method:
+ *
+ * v2: (2.6.10)
+ *   start will be near the boot_block (maybe 0x100 bytes before it?)
+ *   it will have a 0x60, which will b to boot_block, where it will wait
+ *   and 0 will store phys into struct boot-block and load r3 from there,
+ *   copy kernel 0-0x100 and tell slaves to back down to 0x60 again
+ *
+ * v1: (2.6.9)
+ *    boot block will have all cpus scanning device tree to see if they
+ *    are the boot cpu ?????
+ *    other device tree differences (prop sizes, va vs pa, etc)...
+ */
+1:	mr	r3,r25	# my phys cpu
+	mr	r4,r30	# start, aka phys mem offset
+	mtlr	4
+	li	r5,0
+	blr	/* image->start(physid, image->start, 0); */
+#endif /* CONFIG_KEXEC_CORE */
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
new file mode 100644
index 0000000000..f6d6ae0a16
--- /dev/null
+++ b/arch/powerpc/kernel/module.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*  Kernel module help for powerpc.
+    Copyright (C) 2001, 2003 Rusty Russell IBM Corporation.
+    Copyright (C) 2008 Freescale Semiconductor, Inc.
+
+*/
+#include <linux/elf.h>
+#include <linux/moduleloader.h>
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/bug.h>
+#include <asm/module.h>
+#include <linux/uaccess.h>
+#include <asm/firmware.h>
+#include <linux/sort.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+
+static LIST_HEAD(module_bug_list);
+
+static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
+				    const Elf_Shdr *sechdrs,
+				    const char *name)
+{
+	char *secstrings;
+	unsigned int i;
+
+	secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+	for (i = 1; i < hdr->e_shnum; i++)
+		if (strcmp(secstrings+sechdrs[i].sh_name, name) == 0)
+			return &sechdrs[i];
+	return NULL;
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+		const Elf_Shdr *sechdrs, struct module *me)
+{
+	const Elf_Shdr *sect;
+	int rc;
+
+	rc = module_finalize_ftrace(me, sechdrs);
+	if (rc)
+		return rc;
+
+	/* Apply feature fixups */
+	sect = find_section(hdr, sechdrs, "__ftr_fixup");
+	if (sect != NULL)
+		do_feature_fixups(cur_cpu_spec->cpu_features,
+				  (void *)sect->sh_addr,
+				  (void *)sect->sh_addr + sect->sh_size);
+
+	sect = find_section(hdr, sechdrs, "__mmu_ftr_fixup");
+	if (sect != NULL)
+		do_feature_fixups(cur_cpu_spec->mmu_features,
+				  (void *)sect->sh_addr,
+				  (void *)sect->sh_addr + sect->sh_size);
+
+#ifdef CONFIG_PPC64
+	sect = find_section(hdr, sechdrs, "__fw_ftr_fixup");
+	if (sect != NULL)
+		do_feature_fixups(powerpc_firmware_features,
+				  (void *)sect->sh_addr,
+				  (void *)sect->sh_addr + sect->sh_size);
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+	sect = find_section(hdr, sechdrs, ".opd");
+	if (sect != NULL) {
+		me->arch.start_opd = sect->sh_addr;
+		me->arch.end_opd = sect->sh_addr + sect->sh_size;
+	}
+#endif /* CONFIG_PPC64_ELF_ABI_V1 */
+
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+	sect = find_section(hdr, sechdrs, "__spec_barrier_fixup");
+	if (sect != NULL)
+		do_barrier_nospec_fixups_range(barrier_nospec_enabled,
+				  (void *)sect->sh_addr,
+				  (void *)sect->sh_addr + sect->sh_size);
+#endif /* CONFIG_PPC_BARRIER_NOSPEC */
+
+	sect = find_section(hdr, sechdrs, "__lwsync_fixup");
+	if (sect != NULL)
+		do_lwsync_fixups(cur_cpu_spec->cpu_features,
+				 (void *)sect->sh_addr,
+				 (void *)sect->sh_addr + sect->sh_size);
+
+	return 0;
+}
+
+static __always_inline void *
+__module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn)
+{
+	pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC;
+	gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0);
+
+	/*
+	 * Don't do huge page allocations for modules yet until more testing
+	 * is done. STRICT_MODULE_RWX may require extra work to support this
+	 * too.
+	 */
+	return __vmalloc_node_range(size, 1, start, end, gfp, prot,
+				    VM_FLUSH_RESET_PERMS,
+				    NUMA_NO_NODE, __builtin_return_address(0));
+}
+
+void *module_alloc(unsigned long size)
+{
+#ifdef MODULES_VADDR
+	unsigned long limit = (unsigned long)_etext - SZ_32M;
+	void *ptr = NULL;
+
+	BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
+
+	/* First try within 32M limit from _etext to avoid branch trampolines */
+	if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit)
+		ptr = __module_alloc(size, limit, MODULES_END, true);
+
+	if (!ptr)
+		ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false);
+
+	return ptr;
+#else
+	return __module_alloc(size, VMALLOC_START, VMALLOC_END, false);
+#endif
+}
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
new file mode 100644
index 0000000000..816a63fd71
--- /dev/null
+++ b/arch/powerpc/kernel/module_32.c
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*  Kernel module help for PPC.
+    Copyright (C) 2001 Rusty Russell.
+
+*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ftrace.h>
+#include <linux/cache.h>
+#include <linux/bug.h>
+#include <linux/sort.h>
+#include <asm/setup.h>
+#include <asm/code-patching.h>
+
+/* Count how many different relocations (different symbol, different
+   addend) */
+static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num)
+{
+	unsigned int i, r_info, r_addend, _count_relocs;
+
+	_count_relocs = 0;
+	r_info = 0;
+	r_addend = 0;
+	for (i = 0; i < num; i++)
+		/* Only count 24-bit relocs, others don't need stubs */
+		if (ELF32_R_TYPE(rela[i].r_info) == R_PPC_REL24 &&
+		    (r_info != ELF32_R_SYM(rela[i].r_info) ||
+		     r_addend != rela[i].r_addend)) {
+			_count_relocs++;
+			r_info = ELF32_R_SYM(rela[i].r_info);
+			r_addend = rela[i].r_addend;
+		}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+	_count_relocs++;	/* add one for ftrace_caller */
+#endif
+	return _count_relocs;
+}
+
+static int relacmp(const void *_x, const void *_y)
+{
+	const Elf32_Rela *x, *y;
+
+	y = (Elf32_Rela *)_x;
+	x = (Elf32_Rela *)_y;
+
+	/* Compare the entire r_info (as opposed to ELF32_R_SYM(r_info) only) to
+	 * make the comparison cheaper/faster. It won't affect the sorting or
+	 * the counting algorithms' performance
+	 */
+	if (x->r_info < y->r_info)
+		return -1;
+	else if (x->r_info > y->r_info)
+		return 1;
+	else if (x->r_addend < y->r_addend)
+		return -1;
+	else if (x->r_addend > y->r_addend)
+		return 1;
+	else
+		return 0;
+}
+
+/* Get the potential trampolines size required of the init and
+   non-init sections */
+static unsigned long get_plt_size(const Elf32_Ehdr *hdr,
+				  const Elf32_Shdr *sechdrs,
+				  const char *secstrings,
+				  int is_init)
+{
+	unsigned long ret = 0;
+	unsigned i;
+
+	/* Everything marked ALLOC (this includes the exported
+           symbols) */
+	for (i = 1; i < hdr->e_shnum; i++) {
+		/* If it's called *.init*, and we're not init, we're
+                   not interested */
+		if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != NULL)
+		    != is_init)
+			continue;
+
+		/* We don't want to look at debug sections. */
+		if (strstr(secstrings + sechdrs[i].sh_name, ".debug"))
+			continue;
+
+		if (sechdrs[i].sh_type == SHT_RELA) {
+			pr_debug("Found relocations in section %u\n", i);
+			pr_debug("Ptr: %p.  Number: %u\n",
+			       (void *)hdr + sechdrs[i].sh_offset,
+			       sechdrs[i].sh_size / sizeof(Elf32_Rela));
+
+			/* Sort the relocation information based on a symbol and
+			 * addend key. This is a stable O(n*log n) complexity
+			 * algorithm but it will reduce the complexity of
+			 * count_relocs() to linear complexity O(n)
+			 */
+			sort((void *)hdr + sechdrs[i].sh_offset,
+			     sechdrs[i].sh_size / sizeof(Elf32_Rela),
+			     sizeof(Elf32_Rela), relacmp, NULL);
+
+			ret += count_relocs((void *)hdr
+					     + sechdrs[i].sh_offset,
+					     sechdrs[i].sh_size
+					     / sizeof(Elf32_Rela))
+				* sizeof(struct ppc_plt_entry);
+		}
+	}
+
+	return ret;
+}
+
+int module_frob_arch_sections(Elf32_Ehdr *hdr,
+			      Elf32_Shdr *sechdrs,
+			      char *secstrings,
+			      struct module *me)
+{
+	unsigned int i;
+
+	/* Find .plt and .init.plt sections */
+	for (i = 0; i < hdr->e_shnum; i++) {
+		if (strcmp(secstrings + sechdrs[i].sh_name, ".init.plt") == 0)
+			me->arch.init_plt_section = i;
+		else if (strcmp(secstrings + sechdrs[i].sh_name, ".plt") == 0)
+			me->arch.core_plt_section = i;
+	}
+	if (!me->arch.core_plt_section || !me->arch.init_plt_section) {
+		pr_err("Module doesn't contain .plt or .init.plt sections.\n");
+		return -ENOEXEC;
+	}
+
+	/* Override their sizes */
+	sechdrs[me->arch.core_plt_section].sh_size
+		= get_plt_size(hdr, sechdrs, secstrings, 0);
+	sechdrs[me->arch.init_plt_section].sh_size
+		= get_plt_size(hdr, sechdrs, secstrings, 1);
+	return 0;
+}
+
+static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val)
+{
+	if (entry->jump[0] != PPC_RAW_LIS(_R12, PPC_HA(val)))
+		return 0;
+	if (entry->jump[1] != PPC_RAW_ADDI(_R12, _R12, PPC_LO(val)))
+		return 0;
+	return 1;
+}
+
+/* Set up a trampoline in the PLT to bounce us to the distant function */
+static uint32_t do_plt_call(void *location,
+			    Elf32_Addr val,
+			    const Elf32_Shdr *sechdrs,
+			    struct module *mod)
+{
+	struct ppc_plt_entry *entry;
+
+	pr_debug("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location);
+	/* Init, or core PLT? */
+	if (within_module_core((unsigned long)location, mod))
+		entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr;
+	else
+		entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr;
+
+	/* Find this entry, or if that fails, the next avail. entry */
+	while (entry->jump[0]) {
+		if (entry_matches(entry, val)) return (uint32_t)entry;
+		entry++;
+	}
+
+	if (patch_instruction(&entry->jump[0], ppc_inst(PPC_RAW_LIS(_R12, PPC_HA(val)))))
+		return 0;
+	if (patch_instruction(&entry->jump[1], ppc_inst(PPC_RAW_ADDI(_R12, _R12, PPC_LO(val)))))
+		return 0;
+	if (patch_instruction(&entry->jump[2], ppc_inst(PPC_RAW_MTCTR(_R12))))
+		return 0;
+	if (patch_instruction(&entry->jump[3], ppc_inst(PPC_RAW_BCTR())))
+		return 0;
+
+	pr_debug("Initialized plt for 0x%x at %p\n", val, entry);
+	return (uint32_t)entry;
+}
+
+static int patch_location_16(uint32_t *loc, u16 value)
+{
+	loc = PTR_ALIGN_DOWN(loc, sizeof(u32));
+	return patch_instruction(loc, ppc_inst((*loc & 0xffff0000) | value));
+}
+
+int apply_relocate_add(Elf32_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,
+		       unsigned int relsec,
+		       struct module *module)
+{
+	unsigned int i;
+	Elf32_Rela *rela = (void *)sechdrs[relsec].sh_addr;
+	Elf32_Sym *sym;
+	uint32_t *location;
+	uint32_t value;
+
+	pr_debug("Applying ADD relocate section %u to %u\n", relsec,
+	       sechdrs[relsec].sh_info);
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
+		/* This is where to make the change */
+		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rela[i].r_offset;
+		/* This is the symbol it is referring to.  Note that all
+		   undefined symbols have been resolved.  */
+		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
+			+ ELF32_R_SYM(rela[i].r_info);
+		/* `Everything is relative'. */
+		value = sym->st_value + rela[i].r_addend;
+
+		switch (ELF32_R_TYPE(rela[i].r_info)) {
+		case R_PPC_ADDR32:
+			/* Simply set it */
+			*(uint32_t *)location = value;
+			break;
+
+		case R_PPC_ADDR16_LO:
+			/* Low half of the symbol */
+			if (patch_location_16(location, PPC_LO(value)))
+				return -EFAULT;
+			break;
+
+		case R_PPC_ADDR16_HI:
+			/* Higher half of the symbol */
+			if (patch_location_16(location, PPC_HI(value)))
+				return -EFAULT;
+			break;
+
+		case R_PPC_ADDR16_HA:
+			if (patch_location_16(location, PPC_HA(value)))
+				return -EFAULT;
+			break;
+
+		case R_PPC_REL24:
+			if ((int)(value - (uint32_t)location) < -0x02000000
+			    || (int)(value - (uint32_t)location) >= 0x02000000) {
+				value = do_plt_call(location, value,
+						    sechdrs, module);
+				if (!value)
+					return -EFAULT;
+			}
+
+			/* Only replace bits 2 through 26 */
+			pr_debug("REL24 value = %08X. location = %08X\n",
+			       value, (uint32_t)location);
+			pr_debug("Location before: %08X.\n",
+			       *(uint32_t *)location);
+			value = (*(uint32_t *)location & ~PPC_LI_MASK) |
+				PPC_LI(value - (uint32_t)location);
+
+			if (patch_instruction(location, ppc_inst(value)))
+				return -EFAULT;
+
+			pr_debug("Location after: %08X.\n",
+			       *(uint32_t *)location);
+			pr_debug("ie. jump to %08X+%08X = %08X\n",
+				 *(uint32_t *)PPC_LI((uint32_t)location), (uint32_t)location,
+				 (*(uint32_t *)PPC_LI((uint32_t)location)) + (uint32_t)location);
+			break;
+
+		case R_PPC_REL32:
+			/* 32-bit relative jump. */
+			*(uint32_t *)location = value - (uint32_t)location;
+			break;
+
+		default:
+			pr_err("%s: unknown ADD relocation: %u\n",
+			       module->name,
+			       ELF32_R_TYPE(rela[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+notrace int module_trampoline_target(struct module *mod, unsigned long addr,
+				     unsigned long *target)
+{
+	ppc_inst_t jmp[4];
+
+	/* Find where the trampoline jumps to */
+	if (copy_inst_from_kernel_nofault(jmp, (void *)addr))
+		return -EFAULT;
+	if (__copy_inst_from_kernel_nofault(jmp + 1, (void *)addr + 4))
+		return -EFAULT;
+	if (__copy_inst_from_kernel_nofault(jmp + 2, (void *)addr + 8))
+		return -EFAULT;
+	if (__copy_inst_from_kernel_nofault(jmp + 3, (void *)addr + 12))
+		return -EFAULT;
+
+	/* verify that this is what we expect it to be */
+	if ((ppc_inst_val(jmp[0]) & 0xffff0000) != PPC_RAW_LIS(_R12, 0))
+		return -EINVAL;
+	if ((ppc_inst_val(jmp[1]) & 0xffff0000) != PPC_RAW_ADDI(_R12, _R12, 0))
+		return -EINVAL;
+	if (ppc_inst_val(jmp[2]) != PPC_RAW_MTCTR(_R12))
+		return -EINVAL;
+	if (ppc_inst_val(jmp[3]) != PPC_RAW_BCTR())
+		return -EINVAL;
+
+	addr = (ppc_inst_val(jmp[1]) & 0xffff) | ((ppc_inst_val(jmp[0]) & 0xffff) << 16);
+	if (addr & 0x8000)
+		addr -= 0x10000;
+
+	*target = addr;
+
+	return 0;
+}
+
+int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs)
+{
+	module->arch.tramp = do_plt_call(module->mem[MOD_TEXT].base,
+					 (unsigned long)ftrace_caller,
+					 sechdrs, module);
+	if (!module->arch.tramp)
+		return -ENOENT;
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	module->arch.tramp_regs = do_plt_call(module->mem[MOD_TEXT].base,
+					      (unsigned long)ftrace_regs_caller,
+					      sechdrs, module);
+	if (!module->arch.tramp_regs)
+		return -ENOENT;
+#endif
+
+	return 0;
+}
+#endif
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
new file mode 100644
index 0000000000..7112adc597
--- /dev/null
+++ b/arch/powerpc/kernel/module_64.c
@@ -0,0 +1,1109 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*  Kernel module help for PPC64.
+    Copyright (C) 2001, 2003 Rusty Russell IBM Corporation.
+
+*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/elf.h>
+#include <linux/moduleloader.h>
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+#include <linux/ftrace.h>
+#include <linux/bug.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <asm/module.h>
+#include <asm/firmware.h>
+#include <asm/code-patching.h>
+#include <linux/sort.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/inst.h>
+
+/* FIXME: We don't do .init separately.  To do this, we'd need to have
+   a separate r2 value in the init and core section, and stub between
+   them, too.
+
+   Using a magic allocator which places modules within 32MB solves
+   this, and makes other things simpler.  Anton?
+   --RR.  */
+
+bool module_elf_check_arch(Elf_Ehdr *hdr)
+{
+	unsigned long abi_level = hdr->e_flags & 0x3;
+
+	if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
+		return abi_level == 2;
+	else
+		return abi_level < 2;
+}
+
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+
+static func_desc_t func_desc(unsigned long addr)
+{
+	func_desc_t desc = {
+		.addr = addr,
+	};
+
+	return desc;
+}
+
+/* PowerPC64 specific values for the Elf64_Sym st_other field.  */
+#define STO_PPC64_LOCAL_BIT	5
+#define STO_PPC64_LOCAL_MASK	(7 << STO_PPC64_LOCAL_BIT)
+#define PPC64_LOCAL_ENTRY_OFFSET(other)					\
+ (((1 << (((other) & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT)) >> 2) << 2)
+
+static unsigned int local_entry_offset(const Elf64_Sym *sym)
+{
+	/* sym->st_other indicates offset to local entry point
+	 * (otherwise it will assume r12 is the address of the start
+	 * of function and try to derive r2 from it). */
+	return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
+}
+#else
+
+static func_desc_t func_desc(unsigned long addr)
+{
+	return *(struct func_desc *)addr;
+}
+static unsigned int local_entry_offset(const Elf64_Sym *sym)
+{
+	return 0;
+}
+
+void *dereference_module_function_descriptor(struct module *mod, void *ptr)
+{
+	if (ptr < (void *)mod->arch.start_opd ||
+			ptr >= (void *)mod->arch.end_opd)
+		return ptr;
+
+	return dereference_function_descriptor(ptr);
+}
+#endif
+
+static unsigned long func_addr(unsigned long addr)
+{
+	return func_desc(addr).addr;
+}
+
+static unsigned long stub_func_addr(func_desc_t func)
+{
+	return func.addr;
+}
+
+#define STUB_MAGIC 0x73747562 /* stub */
+
+/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
+   the kernel itself).  But on PPC64, these need to be used for every
+   jump, actually, to reset r2 (TOC+0x8000). */
+struct ppc64_stub_entry {
+	/*
+	 * 28 byte jump instruction sequence (7 instructions) that can
+	 * hold ppc64_stub_insns or stub_insns. Must be 8-byte aligned
+	 * with PCREL kernels that use prefix instructions in the stub.
+	 */
+	u32 jump[7];
+	/* Used by ftrace to identify stubs */
+	u32 magic;
+	/* Data for the above code */
+	func_desc_t funcdata;
+} __aligned(8);
+
+struct ppc64_got_entry {
+	u64 addr;
+};
+
+/*
+ * PPC64 uses 24 bit jumps, but we need to jump into other modules or
+ * the kernel which may be further.  So we jump to a stub.
+ *
+ * Target address and TOC are loaded from function descriptor in the
+ * ppc64_stub_entry.
+ *
+ * r12 is used to generate the target address, which is required for the
+ * ELFv2 global entry point calling convention.
+ *
+ * TOC handling:
+ * - PCREL does not have a TOC.
+ * - ELFv2 non-PCREL just has to save r2, the callee is responsible for
+ *   setting its own TOC pointer at the global entry address.
+ * - ELFv1 must load the new TOC pointer from the function descriptor.
+ */
+static u32 ppc64_stub_insns[] = {
+#ifdef CONFIG_PPC_KERNEL_PCREL
+	/* pld r12,addr */
+	PPC_PREFIX_8LS | __PPC_PRFX_R(1),
+	PPC_INST_PLD | ___PPC_RT(_R12),
+#else
+	PPC_RAW_ADDIS(_R11, _R2, 0),
+	PPC_RAW_ADDI(_R11, _R11, 0),
+	/* Save current r2 value in magic place on the stack. */
+	PPC_RAW_STD(_R2, _R1, R2_STACK_OFFSET),
+	PPC_RAW_LD(_R12, _R11, 32),
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+	/* Set up new r2 from function descriptor */
+	PPC_RAW_LD(_R2, _R11, 40),
+#endif
+#endif
+	PPC_RAW_MTCTR(_R12),
+	PPC_RAW_BCTR(),
+};
+
+/*
+ * Count how many different r_type relocations (different symbol,
+ * different addend).
+ */
+static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num,
+				 unsigned long r_type)
+{
+	unsigned int i, r_info, r_addend, _count_relocs;
+
+	/* FIXME: Only count external ones --RR */
+	_count_relocs = 0;
+	r_info = 0;
+	r_addend = 0;
+	for (i = 0; i < num; i++)
+		/* Only count r_type relocs, others don't need stubs */
+		if (ELF64_R_TYPE(rela[i].r_info) == r_type &&
+		    (r_info != ELF64_R_SYM(rela[i].r_info) ||
+		     r_addend != rela[i].r_addend)) {
+			_count_relocs++;
+			r_info = ELF64_R_SYM(rela[i].r_info);
+			r_addend = rela[i].r_addend;
+		}
+
+	return _count_relocs;
+}
+
+static int relacmp(const void *_x, const void *_y)
+{
+	const Elf64_Rela *x, *y;
+
+	y = (Elf64_Rela *)_x;
+	x = (Elf64_Rela *)_y;
+
+	/* Compare the entire r_info (as opposed to ELF64_R_SYM(r_info) only) to
+	 * make the comparison cheaper/faster. It won't affect the sorting or
+	 * the counting algorithms' performance
+	 */
+	if (x->r_info < y->r_info)
+		return -1;
+	else if (x->r_info > y->r_info)
+		return 1;
+	else if (x->r_addend < y->r_addend)
+		return -1;
+	else if (x->r_addend > y->r_addend)
+		return 1;
+	else
+		return 0;
+}
+
+/* Get size of potential trampolines required. */
+static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
+				    const Elf64_Shdr *sechdrs)
+{
+	/* One extra reloc so it's always 0-addr terminated */
+	unsigned long relocs = 1;
+	unsigned i;
+
+	/* Every relocated section... */
+	for (i = 1; i < hdr->e_shnum; i++) {
+		if (sechdrs[i].sh_type == SHT_RELA) {
+			pr_debug("Found relocations in section %u\n", i);
+			pr_debug("Ptr: %p.  Number: %Lu\n",
+			       (void *)sechdrs[i].sh_addr,
+			       sechdrs[i].sh_size / sizeof(Elf64_Rela));
+
+			/* Sort the relocation information based on a symbol and
+			 * addend key. This is a stable O(n*log n) complexity
+			 * algorithm but it will reduce the complexity of
+			 * count_relocs() to linear complexity O(n)
+			 */
+			sort((void *)sechdrs[i].sh_addr,
+			     sechdrs[i].sh_size / sizeof(Elf64_Rela),
+			     sizeof(Elf64_Rela), relacmp, NULL);
+
+			relocs += count_relocs((void *)sechdrs[i].sh_addr,
+					       sechdrs[i].sh_size
+					       / sizeof(Elf64_Rela),
+					       R_PPC_REL24);
+#ifdef CONFIG_PPC_KERNEL_PCREL
+			relocs += count_relocs((void *)sechdrs[i].sh_addr,
+					       sechdrs[i].sh_size
+					       / sizeof(Elf64_Rela),
+					       R_PPC64_REL24_NOTOC);
+#endif
+		}
+	}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+	/* make the trampoline to the ftrace_caller */
+	relocs++;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	/* an additional one for ftrace_regs_caller */
+	relocs++;
+#endif
+#endif
+
+	pr_debug("Looks like a total of %lu stubs, max\n", relocs);
+	return relocs * sizeof(struct ppc64_stub_entry);
+}
+
+#ifdef CONFIG_PPC_KERNEL_PCREL
+static int count_pcpu_relocs(const Elf64_Shdr *sechdrs,
+			     const Elf64_Rela *rela, unsigned int num,
+			     unsigned int symindex, unsigned int pcpu)
+{
+	unsigned int i, r_info, r_addend, _count_relocs;
+
+	_count_relocs = 0;
+	r_info = 0;
+	r_addend = 0;
+
+	for (i = 0; i < num; i++) {
+		Elf64_Sym *sym;
+
+		/* This is the symbol it is referring to */
+		sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+			+ ELF64_R_SYM(rela[i].r_info);
+
+		if (sym->st_shndx == pcpu &&
+		    (r_info != ELF64_R_SYM(rela[i].r_info) ||
+		     r_addend != rela[i].r_addend)) {
+			_count_relocs++;
+			r_info = ELF64_R_SYM(rela[i].r_info);
+			r_addend = rela[i].r_addend;
+		}
+	}
+
+	return _count_relocs;
+}
+
+/* Get size of potential GOT required. */
+static unsigned long get_got_size(const Elf64_Ehdr *hdr,
+				  const Elf64_Shdr *sechdrs,
+				  struct module *me)
+{
+	/* One extra reloc so it's always 0-addr terminated */
+	unsigned long relocs = 1;
+	unsigned int i, symindex = 0;
+
+	for (i = 1; i < hdr->e_shnum; i++) {
+		if (sechdrs[i].sh_type == SHT_SYMTAB) {
+			symindex = i;
+			break;
+		}
+	}
+	WARN_ON_ONCE(!symindex);
+
+	/* Every relocated section... */
+	for (i = 1; i < hdr->e_shnum; i++) {
+		if (sechdrs[i].sh_type == SHT_RELA) {
+			pr_debug("Found relocations in section %u\n", i);
+			pr_debug("Ptr: %p.  Number: %llu\n", (void *)sechdrs[i].sh_addr,
+				 sechdrs[i].sh_size / sizeof(Elf64_Rela));
+
+			/*
+			 * Sort the relocation information based on a symbol and
+			 * addend key. This is a stable O(n*log n) complexity
+			 * algorithm but it will reduce the complexity of
+			 * count_relocs() to linear complexity O(n)
+			 */
+			sort((void *)sechdrs[i].sh_addr,
+			     sechdrs[i].sh_size / sizeof(Elf64_Rela),
+			     sizeof(Elf64_Rela), relacmp, NULL);
+
+			relocs += count_relocs((void *)sechdrs[i].sh_addr,
+					       sechdrs[i].sh_size
+					       / sizeof(Elf64_Rela),
+					       R_PPC64_GOT_PCREL34);
+
+			/*
+			 * Percpu data access typically gets linked with
+			 * REL34 relocations, but the percpu section gets
+			 * moved at load time and requires that to be
+			 * converted to GOT linkage.
+			 */
+			if (IS_ENABLED(CONFIG_SMP) && symindex)
+				relocs += count_pcpu_relocs(sechdrs,
+						(void *)sechdrs[i].sh_addr,
+					       sechdrs[i].sh_size
+					       / sizeof(Elf64_Rela),
+					       symindex, me->arch.pcpu_section);
+		}
+	}
+
+	pr_debug("Looks like a total of %lu GOT entries, max\n", relocs);
+	return relocs * sizeof(struct ppc64_got_entry);
+}
+#else /* CONFIG_PPC_KERNEL_PCREL */
+
+/* Still needed for ELFv2, for .TOC. */
+static void dedotify_versions(struct modversion_info *vers,
+			      unsigned long size)
+{
+	struct modversion_info *end;
+
+	for (end = (void *)vers + size; vers < end; vers++)
+		if (vers->name[0] == '.') {
+			memmove(vers->name, vers->name+1, strlen(vers->name));
+		}
+}
+
+/*
+ * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC.
+ * seem to be defined (value set later).
+ */
+static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
+{
+	unsigned int i;
+
+	for (i = 1; i < numsyms; i++) {
+		if (syms[i].st_shndx == SHN_UNDEF) {
+			char *name = strtab + syms[i].st_name;
+			if (name[0] == '.') {
+				if (strcmp(name+1, "TOC.") == 0)
+					syms[i].st_shndx = SHN_ABS;
+				syms[i].st_name++;
+			}
+		}
+	}
+}
+
+static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs,
+			       const char *strtab,
+			       unsigned int symindex)
+{
+	unsigned int i, numsyms;
+	Elf64_Sym *syms;
+
+	syms = (Elf64_Sym *)sechdrs[symindex].sh_addr;
+	numsyms = sechdrs[symindex].sh_size / sizeof(Elf64_Sym);
+
+	for (i = 1; i < numsyms; i++) {
+		if (syms[i].st_shndx == SHN_ABS
+		    && strcmp(strtab + syms[i].st_name, "TOC.") == 0)
+			return &syms[i];
+	}
+	return NULL;
+}
+#endif /* CONFIG_PPC_KERNEL_PCREL */
+
+bool module_init_section(const char *name)
+{
+	/* We don't handle .init for the moment: always return false. */
+	return false;
+}
+
+int module_frob_arch_sections(Elf64_Ehdr *hdr,
+			      Elf64_Shdr *sechdrs,
+			      char *secstrings,
+			      struct module *me)
+{
+	unsigned int i;
+
+	/* Find .toc and .stubs sections, symtab and strtab */
+	for (i = 1; i < hdr->e_shnum; i++) {
+		if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
+			me->arch.stubs_section = i;
+#ifdef CONFIG_PPC_KERNEL_PCREL
+		else if (strcmp(secstrings + sechdrs[i].sh_name, ".data..percpu") == 0)
+			me->arch.pcpu_section = i;
+		else if (strcmp(secstrings + sechdrs[i].sh_name, ".mygot") == 0) {
+			me->arch.got_section = i;
+			if (sechdrs[i].sh_addralign < 8)
+				sechdrs[i].sh_addralign = 8;
+		}
+#else
+		else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) {
+			me->arch.toc_section = i;
+			if (sechdrs[i].sh_addralign < 8)
+				sechdrs[i].sh_addralign = 8;
+		}
+		else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
+			dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
+					  sechdrs[i].sh_size);
+
+		if (sechdrs[i].sh_type == SHT_SYMTAB)
+			dedotify((void *)hdr + sechdrs[i].sh_offset,
+				 sechdrs[i].sh_size / sizeof(Elf64_Sym),
+				 (void *)hdr
+				 + sechdrs[sechdrs[i].sh_link].sh_offset);
+#endif
+	}
+
+	if (!me->arch.stubs_section) {
+		pr_err("%s: doesn't contain .stubs.\n", me->name);
+		return -ENOEXEC;
+	}
+
+#ifdef CONFIG_PPC_KERNEL_PCREL
+	if (!me->arch.got_section) {
+		pr_err("%s: doesn't contain .mygot.\n", me->name);
+		return -ENOEXEC;
+	}
+
+	/* Override the got size */
+	sechdrs[me->arch.got_section].sh_size = get_got_size(hdr, sechdrs, me);
+#else
+	/* If we don't have a .toc, just use .stubs.  We need to set r2
+	   to some reasonable value in case the module calls out to
+	   other functions via a stub, or if a function pointer escapes
+	   the module by some means.  */
+	if (!me->arch.toc_section)
+		me->arch.toc_section = me->arch.stubs_section;
+#endif
+
+	/* Override the stubs size */
+	sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs);
+
+	return 0;
+}
+
+#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)
+
+static u32 stub_insns[] = {
+#ifdef CONFIG_PPC_KERNEL_PCREL
+	PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase)),
+	PPC_RAW_NOP(), /* align the prefix insn */
+	/* paddi r12,r12,addr */
+	PPC_PREFIX_MLS | __PPC_PRFX_R(0),
+	PPC_INST_PADDI | ___PPC_RT(_R12) | ___PPC_RA(_R12),
+	PPC_RAW_MTCTR(_R12),
+	PPC_RAW_BCTR(),
+#else
+	PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)),
+	PPC_RAW_ADDIS(_R12, _R12, 0),
+	PPC_RAW_ADDI(_R12, _R12, 0),
+	PPC_RAW_MTCTR(_R12),
+	PPC_RAW_BCTR(),
+#endif
+};
+
+/*
+ * For mprofile-kernel we use a special stub for ftrace_caller() because we
+ * can't rely on r2 containing this module's TOC when we enter the stub.
+ *
+ * That can happen if the function calling us didn't need to use the toc. In
+ * that case it won't have setup r2, and the r2 value will be either the
+ * kernel's toc, or possibly another modules toc.
+ *
+ * To deal with that this stub uses the kernel toc, which is always accessible
+ * via the paca (in r13). The target (ftrace_caller()) is responsible for
+ * saving and restoring the toc before returning.
+ */
+static inline int create_ftrace_stub(struct ppc64_stub_entry *entry,
+					unsigned long addr,
+					struct module *me)
+{
+	long reladdr;
+
+	if ((unsigned long)entry->jump % 8 != 0) {
+		pr_err("%s: Address of stub entry is not 8-byte aligned\n", me->name);
+		return 0;
+	}
+
+	BUILD_BUG_ON(sizeof(stub_insns) > sizeof(entry->jump));
+	memcpy(entry->jump, stub_insns, sizeof(stub_insns));
+
+	if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+		/* Stub uses address relative to kernel base (from the paca) */
+		reladdr = addr - local_paca->kernelbase;
+		if (reladdr > 0x1FFFFFFFFL || reladdr < -0x200000000L) {
+			pr_err("%s: Address of %ps out of range of 34-bit relative address.\n",
+				me->name, (void *)addr);
+			return 0;
+		}
+
+		entry->jump[2] |= IMM_H18(reladdr);
+		entry->jump[3] |= IMM_L(reladdr);
+	} else {
+		/* Stub uses address relative to kernel toc (from the paca) */
+		reladdr = addr - kernel_toc_addr();
+		if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+			pr_err("%s: Address of %ps out of range of kernel_toc.\n",
+				me->name, (void *)addr);
+			return 0;
+		}
+
+		entry->jump[1] |= PPC_HA(reladdr);
+		entry->jump[2] |= PPC_LO(reladdr);
+	}
+
+	/* Even though we don't use funcdata in the stub, it's needed elsewhere. */
+	entry->funcdata = func_desc(addr);
+	entry->magic = STUB_MAGIC;
+
+	return 1;
+}
+
+static bool is_mprofile_ftrace_call(const char *name)
+{
+	if (!strcmp("_mcount", name))
+		return true;
+#ifdef CONFIG_DYNAMIC_FTRACE
+	if (!strcmp("ftrace_caller", name))
+		return true;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	if (!strcmp("ftrace_regs_caller", name))
+		return true;
+#endif
+#endif
+
+	return false;
+}
+#else
+static inline int create_ftrace_stub(struct ppc64_stub_entry *entry,
+					unsigned long addr,
+					struct module *me)
+{
+	return 0;
+}
+
+static bool is_mprofile_ftrace_call(const char *name)
+{
+	return false;
+}
+#endif
+
+/*
+ * r2 is the TOC pointer: it actually points 0x8000 into the TOC (this gives the
+ * value maximum span in an instruction which uses a signed offset). Round down
+ * to a 256 byte boundary for the odd case where we are setting up r2 without a
+ * .toc section.
+ */
+static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me)
+{
+#ifndef CONFIG_PPC_KERNEL_PCREL
+	return (sechdrs[me->arch.toc_section].sh_addr & ~0xfful) + 0x8000;
+#else
+	return -1;
+#endif
+}
+
+/* Patch stub to reference function and correct r2 value. */
+static inline int create_stub(const Elf64_Shdr *sechdrs,
+			      struct ppc64_stub_entry *entry,
+			      unsigned long addr,
+			      struct module *me,
+			      const char *name)
+{
+	long reladdr;
+	func_desc_t desc;
+	int i;
+
+	if (is_mprofile_ftrace_call(name))
+		return create_ftrace_stub(entry, addr, me);
+
+	if ((unsigned long)entry->jump % 8 != 0) {
+		pr_err("%s: Address of stub entry is not 8-byte aligned\n", me->name);
+		return 0;
+	}
+
+	BUILD_BUG_ON(sizeof(ppc64_stub_insns) > sizeof(entry->jump));
+	for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) {
+		if (patch_instruction(&entry->jump[i],
+				      ppc_inst(ppc64_stub_insns[i])))
+			return 0;
+	}
+
+	if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+		/* Stub uses address relative to itself! */
+		reladdr = 0 + offsetof(struct ppc64_stub_entry, funcdata);
+		BUILD_BUG_ON(reladdr != 32);
+		if (reladdr > 0x1FFFFFFFFL || reladdr < -0x200000000L) {
+			pr_err("%s: Address of %p out of range of 34-bit relative address.\n",
+				me->name, (void *)reladdr);
+			return 0;
+		}
+		pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr);
+
+		/* May not even need this if we're relative to 0 */
+		if (patch_instruction(&entry->jump[0],
+		    ppc_inst_prefix(entry->jump[0] | IMM_H18(reladdr),
+				    entry->jump[1] | IMM_L(reladdr))))
+			return 0;
+
+	} else {
+		/* Stub uses address relative to r2. */
+		reladdr = (unsigned long)entry - my_r2(sechdrs, me);
+		if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+			pr_err("%s: Address %p of stub out of range of %p.\n",
+			       me->name, (void *)reladdr, (void *)my_r2);
+			return 0;
+		}
+		pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr);
+
+		if (patch_instruction(&entry->jump[0],
+				      ppc_inst(entry->jump[0] | PPC_HA(reladdr))))
+			return 0;
+
+		if (patch_instruction(&entry->jump[1],
+				      ppc_inst(entry->jump[1] | PPC_LO(reladdr))))
+			return 0;
+	}
+
+	// func_desc_t is 8 bytes if ABIv2, else 16 bytes
+	desc = func_desc(addr);
+	for (i = 0; i < sizeof(func_desc_t) / sizeof(u32); i++) {
+		if (patch_instruction(((u32 *)&entry->funcdata) + i,
+				      ppc_inst(((u32 *)(&desc))[i])))
+			return 0;
+	}
+
+	if (patch_instruction(&entry->magic, ppc_inst(STUB_MAGIC)))
+		return 0;
+
+	return 1;
+}
+
+/* Create stub to jump to function described in this OPD/ptr: we need the
+   stub to set up the TOC ptr (r2) for the function. */
+static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs,
+				   unsigned long addr,
+				   struct module *me,
+				   const char *name)
+{
+	struct ppc64_stub_entry *stubs;
+	unsigned int i, num_stubs;
+
+	num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs);
+
+	/* Find this stub, or if that fails, the next avail. entry */
+	stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr;
+	for (i = 0; stub_func_addr(stubs[i].funcdata); i++) {
+		if (WARN_ON(i >= num_stubs))
+			return 0;
+
+		if (stub_func_addr(stubs[i].funcdata) == func_addr(addr))
+			return (unsigned long)&stubs[i];
+	}
+
+	if (!create_stub(sechdrs, &stubs[i], addr, me, name))
+		return 0;
+
+	return (unsigned long)&stubs[i];
+}
+
+#ifdef CONFIG_PPC_KERNEL_PCREL
+/* Create GOT to load the location described in this ptr */
+static unsigned long got_for_addr(const Elf64_Shdr *sechdrs,
+				  unsigned long addr,
+				  struct module *me,
+				  const char *name)
+{
+	struct ppc64_got_entry *got;
+	unsigned int i, num_got;
+
+	if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+		return addr;
+
+	num_got = sechdrs[me->arch.got_section].sh_size / sizeof(*got);
+
+	/* Find this stub, or if that fails, the next avail. entry */
+	got = (void *)sechdrs[me->arch.got_section].sh_addr;
+	for (i = 0; got[i].addr; i++) {
+		if (WARN_ON(i >= num_got))
+			return 0;
+
+		if (got[i].addr == addr)
+			return (unsigned long)&got[i];
+	}
+
+	got[i].addr = addr;
+
+	return (unsigned long)&got[i];
+}
+#endif
+
+/* We expect a noop next: if it is, replace it with instruction to
+   restore r2. */
+static int restore_r2(const char *name, u32 *instruction, struct module *me)
+{
+	u32 *prev_insn = instruction - 1;
+	u32 insn_val = *instruction;
+
+	if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+		return 0;
+
+	if (is_mprofile_ftrace_call(name))
+		return 0;
+
+	/*
+	 * Make sure the branch isn't a sibling call.  Sibling calls aren't
+	 * "link" branches and they don't return, so they don't need the r2
+	 * restore afterwards.
+	 */
+	if (!instr_is_relative_link_branch(ppc_inst(*prev_insn)))
+		return 0;
+
+	/*
+	 * For livepatch, the restore r2 instruction might have already been
+	 * written previously, if the referenced symbol is in a previously
+	 * unloaded module which is now being loaded again.  In that case, skip
+	 * the warning and the instruction write.
+	 */
+	if (insn_val == PPC_INST_LD_TOC)
+		return 0;
+
+	if (insn_val != PPC_RAW_NOP()) {
+		pr_err("%s: Expected nop after call, got %08x at %pS\n",
+			me->name, insn_val, instruction);
+		return -ENOEXEC;
+	}
+
+	/* ld r2,R2_STACK_OFFSET(r1) */
+	return patch_instruction(instruction, ppc_inst(PPC_INST_LD_TOC));
+}
+
+int apply_relocate_add(Elf64_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,
+		       unsigned int relsec,
+		       struct module *me)
+{
+	unsigned int i;
+	Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr;
+	Elf64_Sym *sym;
+	unsigned long *location;
+	unsigned long value;
+
+	pr_debug("Applying ADD relocate section %u to %u\n", relsec,
+	       sechdrs[relsec].sh_info);
+
+#ifndef CONFIG_PPC_KERNEL_PCREL
+	/* First time we're called, we can fix up .TOC. */
+	if (!me->arch.toc_fixed) {
+		sym = find_dot_toc(sechdrs, strtab, symindex);
+		/* It's theoretically possible that a module doesn't want a
+		 * .TOC. so don't fail it just for that. */
+		if (sym)
+			sym->st_value = my_r2(sechdrs, me);
+		me->arch.toc_fixed = true;
+	}
+#endif
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
+		/* This is where to make the change */
+		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rela[i].r_offset;
+		/* This is the symbol it is referring to */
+		sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+			+ ELF64_R_SYM(rela[i].r_info);
+
+		pr_debug("RELOC at %p: %li-type as %s (0x%lx) + %li\n",
+		       location, (long)ELF64_R_TYPE(rela[i].r_info),
+		       strtab + sym->st_name, (unsigned long)sym->st_value,
+		       (long)rela[i].r_addend);
+
+		/* `Everything is relative'. */
+		value = sym->st_value + rela[i].r_addend;
+
+		switch (ELF64_R_TYPE(rela[i].r_info)) {
+		case R_PPC64_ADDR32:
+			/* Simply set it */
+			*(u32 *)location = value;
+			break;
+
+		case R_PPC64_ADDR64:
+			/* Simply set it */
+			*(unsigned long *)location = value;
+			break;
+
+#ifndef CONFIG_PPC_KERNEL_PCREL
+		case R_PPC64_TOC:
+			*(unsigned long *)location = my_r2(sechdrs, me);
+			break;
+
+		case R_PPC64_TOC16:
+			/* Subtract TOC pointer */
+			value -= my_r2(sechdrs, me);
+			if (value + 0x8000 > 0xffff) {
+				pr_err("%s: bad TOC16 relocation (0x%lx)\n",
+				       me->name, value);
+				return -ENOEXEC;
+			}
+			*((uint16_t *) location)
+				= (*((uint16_t *) location) & ~0xffff)
+				| (value & 0xffff);
+			break;
+
+		case R_PPC64_TOC16_LO:
+			/* Subtract TOC pointer */
+			value -= my_r2(sechdrs, me);
+			*((uint16_t *) location)
+				= (*((uint16_t *) location) & ~0xffff)
+				| (value & 0xffff);
+			break;
+
+		case R_PPC64_TOC16_DS:
+			/* Subtract TOC pointer */
+			value -= my_r2(sechdrs, me);
+			if ((value & 3) != 0 || value + 0x8000 > 0xffff) {
+				pr_err("%s: bad TOC16_DS relocation (0x%lx)\n",
+				       me->name, value);
+				return -ENOEXEC;
+			}
+			*((uint16_t *) location)
+				= (*((uint16_t *) location) & ~0xfffc)
+				| (value & 0xfffc);
+			break;
+
+		case R_PPC64_TOC16_LO_DS:
+			/* Subtract TOC pointer */
+			value -= my_r2(sechdrs, me);
+			if ((value & 3) != 0) {
+				pr_err("%s: bad TOC16_LO_DS relocation (0x%lx)\n",
+				       me->name, value);
+				return -ENOEXEC;
+			}
+			*((uint16_t *) location)
+				= (*((uint16_t *) location) & ~0xfffc)
+				| (value & 0xfffc);
+			break;
+
+		case R_PPC64_TOC16_HA:
+			/* Subtract TOC pointer */
+			value -= my_r2(sechdrs, me);
+			value = ((value + 0x8000) >> 16);
+			*((uint16_t *) location)
+				= (*((uint16_t *) location) & ~0xffff)
+				| (value & 0xffff);
+			break;
+#endif
+
+		case R_PPC_REL24:
+#ifdef CONFIG_PPC_KERNEL_PCREL
+		/* PCREL still generates REL24 for mcount */
+		case R_PPC64_REL24_NOTOC:
+#endif
+			/* FIXME: Handle weak symbols here --RR */
+			if (sym->st_shndx == SHN_UNDEF ||
+			    sym->st_shndx == SHN_LIVEPATCH) {
+				/* External: go via stub */
+				value = stub_for_addr(sechdrs, value, me,
+						strtab + sym->st_name);
+				if (!value)
+					return -ENOENT;
+				if (restore_r2(strtab + sym->st_name,
+					       (u32 *)location + 1, me))
+					return -ENOEXEC;
+			} else
+				value += local_entry_offset(sym);
+
+			/* Convert value to relative */
+			value -= (unsigned long)location;
+			if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){
+				pr_err("%s: REL24 %li out of range!\n",
+				       me->name, (long int)value);
+				return -ENOEXEC;
+			}
+
+			/* Only replace bits 2 through 26 */
+			value = (*(uint32_t *)location & ~PPC_LI_MASK) | PPC_LI(value);
+
+			if (patch_instruction((u32 *)location, ppc_inst(value)))
+				return -EFAULT;
+
+			break;
+
+		case R_PPC64_REL64:
+			/* 64 bits relative (used by features fixups) */
+			*location = value - (unsigned long)location;
+			break;
+
+		case R_PPC64_REL32:
+			/* 32 bits relative (used by relative exception tables) */
+			/* Convert value to relative */
+			value -= (unsigned long)location;
+			if (value + 0x80000000 > 0xffffffff) {
+				pr_err("%s: REL32 %li out of range!\n",
+				       me->name, (long int)value);
+				return -ENOEXEC;
+			}
+			*(u32 *)location = value;
+			break;
+
+#ifdef CONFIG_PPC_KERNEL_PCREL
+		case R_PPC64_PCREL34: {
+			unsigned long absvalue = value;
+
+			/* Convert value to relative */
+			value -= (unsigned long)location;
+
+			if (value + 0x200000000 > 0x3ffffffff) {
+				if (sym->st_shndx != me->arch.pcpu_section) {
+					pr_err("%s: REL34 %li out of range!\n",
+					       me->name, (long)value);
+					return -ENOEXEC;
+				}
+
+				/*
+				 * per-cpu section is special cased because
+				 * it is moved during loading, so has to be
+				 * converted to use GOT.
+				 */
+				value = got_for_addr(sechdrs, absvalue, me,
+						     strtab + sym->st_name);
+				if (!value)
+					return -ENOENT;
+				value -= (unsigned long)location;
+
+				/* Turn pla into pld */
+				if (patch_instruction((u32 *)location,
+				    ppc_inst_prefix((*(u32 *)location & ~0x02000000),
+						    (*((u32 *)location + 1) & ~0xf8000000) | 0xe4000000)))
+					return -EFAULT;
+			}
+
+			if (patch_instruction((u32 *)location,
+			    ppc_inst_prefix((*(u32 *)location & ~0x3ffff) | IMM_H18(value),
+					    (*((u32 *)location + 1) & ~0xffff) | IMM_L(value))))
+				return -EFAULT;
+
+			break;
+		}
+
+#else
+		case R_PPC64_TOCSAVE:
+			/*
+			 * Marker reloc indicates we don't have to save r2.
+			 * That would only save us one instruction, so ignore
+			 * it.
+			 */
+			break;
+#endif
+
+		case R_PPC64_ENTRY:
+			if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+				break;
+
+			/*
+			 * Optimize ELFv2 large code model entry point if
+			 * the TOC is within 2GB range of current location.
+			 */
+			value = my_r2(sechdrs, me) - (unsigned long)location;
+			if (value + 0x80008000 > 0xffffffff)
+				break;
+			/*
+			 * Check for the large code model prolog sequence:
+		         *	ld r2, ...(r12)
+			 *	add r2, r2, r12
+			 */
+			if ((((uint32_t *)location)[0] & ~0xfffc) != PPC_RAW_LD(_R2, _R12, 0))
+				break;
+			if (((uint32_t *)location)[1] != PPC_RAW_ADD(_R2, _R2, _R12))
+				break;
+			/*
+			 * If found, replace it with:
+			 *	addis r2, r12, (.TOC.-func)@ha
+			 *	addi  r2,  r2, (.TOC.-func)@l
+			 */
+			((uint32_t *)location)[0] = PPC_RAW_ADDIS(_R2, _R12, PPC_HA(value));
+			((uint32_t *)location)[1] = PPC_RAW_ADDI(_R2, _R2, PPC_LO(value));
+			break;
+
+		case R_PPC64_REL16_HA:
+			/* Subtract location pointer */
+			value -= (unsigned long)location;
+			value = ((value + 0x8000) >> 16);
+			*((uint16_t *) location)
+				= (*((uint16_t *) location) & ~0xffff)
+				| (value & 0xffff);
+			break;
+
+		case R_PPC64_REL16_LO:
+			/* Subtract location pointer */
+			value -= (unsigned long)location;
+			*((uint16_t *) location)
+				= (*((uint16_t *) location) & ~0xffff)
+				| (value & 0xffff);
+			break;
+
+#ifdef CONFIG_PPC_KERNEL_PCREL
+		case R_PPC64_GOT_PCREL34:
+			value = got_for_addr(sechdrs, value, me,
+					     strtab + sym->st_name);
+			if (!value)
+				return -ENOENT;
+			value -= (unsigned long)location;
+			((uint32_t *)location)[0] = (((uint32_t *)location)[0] & ~0x3ffff) |
+						    ((value >> 16) & 0x3ffff);
+			((uint32_t *)location)[1] = (((uint32_t *)location)[1] & ~0xffff) |
+						    (value & 0xffff);
+			break;
+#endif
+
+		default:
+			pr_err("%s: Unknown ADD relocation: %lu\n",
+			       me->name,
+			       (unsigned long)ELF64_R_TYPE(rela[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+int module_trampoline_target(struct module *mod, unsigned long addr,
+			     unsigned long *target)
+{
+	struct ppc64_stub_entry *stub;
+	func_desc_t funcdata;
+	u32 magic;
+
+	if (!within_module_core(addr, mod)) {
+		pr_err("%s: stub %lx not in module %s\n", __func__, addr, mod->name);
+		return -EFAULT;
+	}
+
+	stub = (struct ppc64_stub_entry *)addr;
+
+	if (copy_from_kernel_nofault(&magic, &stub->magic,
+			sizeof(magic))) {
+		pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name);
+		return -EFAULT;
+	}
+
+	if (magic != STUB_MAGIC) {
+		pr_err("%s: bad magic for stub %lx for %s\n", __func__, addr, mod->name);
+		return -EFAULT;
+	}
+
+	if (copy_from_kernel_nofault(&funcdata, &stub->funcdata,
+			sizeof(funcdata))) {
+		pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name);
+                return -EFAULT;
+	}
+
+	*target = stub_func_addr(funcdata);
+
+	return 0;
+}
+
+int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs)
+{
+	mod->arch.tramp = stub_for_addr(sechdrs,
+					(unsigned long)ftrace_caller,
+					mod,
+					"ftrace_caller");
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	mod->arch.tramp_regs = stub_for_addr(sechdrs,
+					(unsigned long)ftrace_regs_caller,
+					mod,
+					"ftrace_regs_caller");
+	if (!mod->arch.tramp_regs)
+		return -ENOENT;
+#endif
+
+	if (!mod->arch.tramp)
+		return -ENOENT;
+
+	return 0;
+}
+#endif
diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c
new file mode 100644
index 0000000000..a5d25bebca
--- /dev/null
+++ b/arch/powerpc/kernel/msi.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2006-2007, Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
+
+#include <asm/machdep.h>
+
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+	if (!phb->controller_ops.setup_msi_irqs ||
+	    !phb->controller_ops.teardown_msi_irqs) {
+		pr_debug("msi: Platform doesn't provide MSI callbacks.\n");
+		return -ENOSYS;
+	}
+
+	/* PowerPC doesn't support multiple MSI yet */
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
+
+	return phb->controller_ops.setup_msi_irqs(dev, nvec, type);
+}
+
+void arch_teardown_msi_irqs(struct pci_dev *dev)
+{
+	struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+	/*
+	 * We can be called even when arch_setup_msi_irqs() returns -ENOSYS,
+	 * so check the pointer again.
+	 */
+	if (phb->controller_ops.teardown_msi_irqs)
+		phb->controller_ops.teardown_msi_irqs(dev);
+}
diff --git a/arch/powerpc/kernel/note.S b/arch/powerpc/kernel/note.S
new file mode 100644
index 0000000000..bcdad15395
--- /dev/null
+++ b/arch/powerpc/kernel/note.S
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PowerPC ELF notes.
+ *
+ * Copyright 2019, IBM Corporation
+ */
+
+#include <linux/elfnote.h>
+#include <asm/elfnote.h>
+
+/*
+ * Ultravisor-capable bit (PowerNV only).
+ *
+ * Bit 0 indicates that the powerpc kernel binary knows how to run in an
+ * ultravisor-enabled system.
+ *
+ * In an ultravisor-enabled system, some machine resources are now controlled
+ * by the ultravisor. If the kernel is not ultravisor-capable, but it ends up
+ * being run on a machine with ultravisor, the kernel will probably crash
+ * trying to access ultravisor resources. For instance, it may crash in early
+ * boot trying to set the partition table entry 0.
+ *
+ * In an ultravisor-enabled system, a bootloader could warn the user or prevent
+ * the kernel from being run if the PowerPC ultravisor capability doesn't exist
+ * or the Ultravisor-capable bit is not set.
+ */
+#ifdef CONFIG_PPC_POWERNV
+#define PPCCAP_ULTRAVISOR_BIT		(1 << 0)
+#else
+#define PPCCAP_ULTRAVISOR_BIT		0
+#endif
+
+/*
+ * Add the PowerPC Capabilities in the binary ELF note. It is a bitmap that
+ * can be used to advertise kernel capabilities to userland.
+ */
+#define PPC_CAPABILITIES_BITMAP (PPCCAP_ULTRAVISOR_BIT)
+
+ELFNOTE(PowerPC, PPC_ELFNOTE_CAPABILITIES,
+	.long PPC_CAPABILITIES_BITMAP)
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
new file mode 100644
index 0000000000..e385d31646
--- /dev/null
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -0,0 +1,1055 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  c 2001 PPC 64 Team, IBM Corp
+ *
+ * /dev/nvram driver for PPC64
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/fcntl.h>
+#include <linux/nvram.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/kmsg_dump.h>
+#include <linux/pagemap.h>
+#include <linux/pstore.h>
+#include <linux/zlib.h>
+#include <linux/uaccess.h>
+#include <linux/of.h>
+#include <asm/nvram.h>
+#include <asm/rtas.h>
+#include <asm/machdep.h>
+
+#undef DEBUG_NVRAM
+
+#define NVRAM_HEADER_LEN	sizeof(struct nvram_header)
+#define NVRAM_BLOCK_LEN		NVRAM_HEADER_LEN
+
+/* If change this size, then change the size of NVNAME_LEN */
+struct nvram_header {
+	unsigned char signature;
+	unsigned char checksum;
+	unsigned short length;
+	/* Terminating null required only for names < 12 chars. */
+	char name[12];
+};
+
+struct nvram_partition {
+	struct list_head partition;
+	struct nvram_header header;
+	unsigned int index;
+};
+
+static LIST_HEAD(nvram_partitions);
+
+#ifdef CONFIG_PPC_PSERIES
+struct nvram_os_partition rtas_log_partition = {
+	.name = "ibm,rtas-log",
+	.req_size = 2079,
+	.min_size = 1055,
+	.index = -1,
+	.os_partition = true
+};
+#endif
+
+struct nvram_os_partition oops_log_partition = {
+	.name = "lnx,oops-log",
+	.req_size = 4000,
+	.min_size = 2000,
+	.index = -1,
+	.os_partition = true
+};
+
+static const char *nvram_os_partitions[] = {
+#ifdef CONFIG_PPC_PSERIES
+	"ibm,rtas-log",
+#endif
+	"lnx,oops-log",
+	NULL
+};
+
+static void oops_to_nvram(struct kmsg_dumper *dumper,
+			  enum kmsg_dump_reason reason);
+
+static struct kmsg_dumper nvram_kmsg_dumper = {
+	.dump = oops_to_nvram
+};
+
+/*
+ * For capturing and compressing an oops or panic report...
+
+ * big_oops_buf[] holds the uncompressed text we're capturing.
+ *
+ * oops_buf[] holds the compressed text, preceded by a oops header.
+ * oops header has u16 holding the version of oops header (to differentiate
+ * between old and new format header) followed by u16 holding the length of
+ * the compressed* text (*Or uncompressed, if compression fails.) and u64
+ * holding the timestamp. oops_buf[] gets written to NVRAM.
+ *
+ * oops_log_info points to the header. oops_data points to the compressed text.
+ *
+ * +- oops_buf
+ * |                                   +- oops_data
+ * v                                   v
+ * +-----------+-----------+-----------+------------------------+
+ * | version   | length    | timestamp | text                   |
+ * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes)   |
+ * +-----------+-----------+-----------+------------------------+
+ * ^
+ * +- oops_log_info
+ *
+ * We preallocate these buffers during init to avoid kmalloc during oops/panic.
+ */
+static size_t big_oops_buf_sz;
+static char *big_oops_buf, *oops_buf;
+static char *oops_data;
+static size_t oops_data_sz;
+
+/* Compression parameters */
+#define COMPR_LEVEL 6
+#define WINDOW_BITS 12
+#define MEM_LEVEL 4
+static struct z_stream_s stream;
+
+#ifdef CONFIG_PSTORE
+#ifdef CONFIG_PPC_POWERNV
+static struct nvram_os_partition skiboot_partition = {
+	.name = "ibm,skiboot",
+	.index = -1,
+	.os_partition = false
+};
+#endif
+
+#ifdef CONFIG_PPC_PSERIES
+static struct nvram_os_partition of_config_partition = {
+	.name = "of-config",
+	.index = -1,
+	.os_partition = false
+};
+#endif
+
+static struct nvram_os_partition common_partition = {
+	.name = "common",
+	.index = -1,
+	.os_partition = false
+};
+
+static enum pstore_type_id nvram_type_ids[] = {
+	PSTORE_TYPE_DMESG,
+	PSTORE_TYPE_PPC_COMMON,
+	-1,
+	-1,
+	-1
+};
+static int read_type;
+#endif
+
+/* nvram_write_os_partition
+ *
+ * We need to buffer the error logs into nvram to ensure that we have
+ * the failure information to decode.  If we have a severe error there
+ * is no way to guarantee that the OS or the machine is in a state to
+ * get back to user land and write the error to disk.  For example if
+ * the SCSI device driver causes a Machine Check by writing to a bad
+ * IO address, there is no way of guaranteeing that the device driver
+ * is in any state that is would also be able to write the error data
+ * captured to disk, thus we buffer it in NVRAM for analysis on the
+ * next boot.
+ *
+ * In NVRAM the partition containing the error log buffer will looks like:
+ * Header (in bytes):
+ * +-----------+----------+--------+------------+------------------+
+ * | signature | checksum | length | name       | data             |
+ * |0          |1         |2      3|4         15|16        length-1|
+ * +-----------+----------+--------+------------+------------------+
+ *
+ * The 'data' section would look like (in bytes):
+ * +--------------+------------+-----------------------------------+
+ * | event_logged | sequence # | error log                         |
+ * |0            3|4          7|8                  error_log_size-1|
+ * +--------------+------------+-----------------------------------+
+ *
+ * event_logged: 0 if event has not been logged to syslog, 1 if it has
+ * sequence #: The unique sequence # for each event. (until it wraps)
+ * error log: The error log from event_scan
+ */
+int nvram_write_os_partition(struct nvram_os_partition *part,
+			     char *buff, int length,
+			     unsigned int err_type,
+			     unsigned int error_log_cnt)
+{
+	int rc;
+	loff_t tmp_index;
+	struct err_log_info info;
+
+	if (part->index == -1)
+		return -ESPIPE;
+
+	if (length > part->size)
+		length = part->size;
+
+	info.error_type = cpu_to_be32(err_type);
+	info.seq_num = cpu_to_be32(error_log_cnt);
+
+	tmp_index = part->index;
+
+	rc = ppc_md.nvram_write((char *)&info, sizeof(info), &tmp_index);
+	if (rc <= 0) {
+		pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
+		return rc;
+	}
+
+	rc = ppc_md.nvram_write(buff, length, &tmp_index);
+	if (rc <= 0) {
+		pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+/* nvram_read_partition
+ *
+ * Reads nvram partition for at most 'length'
+ */
+int nvram_read_partition(struct nvram_os_partition *part, char *buff,
+			 int length, unsigned int *err_type,
+			 unsigned int *error_log_cnt)
+{
+	int rc;
+	loff_t tmp_index;
+	struct err_log_info info;
+
+	if (part->index == -1)
+		return -1;
+
+	if (length > part->size)
+		length = part->size;
+
+	tmp_index = part->index;
+
+	if (part->os_partition) {
+		rc = ppc_md.nvram_read((char *)&info, sizeof(info), &tmp_index);
+		if (rc <= 0) {
+			pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
+			return rc;
+		}
+	}
+
+	rc = ppc_md.nvram_read(buff, length, &tmp_index);
+	if (rc <= 0) {
+		pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
+		return rc;
+	}
+
+	if (part->os_partition) {
+		*error_log_cnt = be32_to_cpu(info.seq_num);
+		*err_type = be32_to_cpu(info.error_type);
+	}
+
+	return 0;
+}
+
+/* nvram_init_os_partition
+ *
+ * This sets up a partition with an "OS" signature.
+ *
+ * The general strategy is the following:
+ * 1.) If a partition with the indicated name already exists...
+ *	- If it's large enough, use it.
+ *	- Otherwise, recycle it and keep going.
+ * 2.) Search for a free partition that is large enough.
+ * 3.) If there's not a free partition large enough, recycle any obsolete
+ * OS partitions and try again.
+ * 4.) Will first try getting a chunk that will satisfy the requested size.
+ * 5.) If a chunk of the requested size cannot be allocated, then try finding
+ * a chunk that will satisfy the minum needed.
+ *
+ * Returns 0 on success, else -1.
+ */
+int __init nvram_init_os_partition(struct nvram_os_partition *part)
+{
+	loff_t p;
+	int size;
+
+	/* Look for ours */
+	p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size);
+
+	/* Found one but too small, remove it */
+	if (p && size < part->min_size) {
+		pr_info("nvram: Found too small %s partition,"
+					" removing it...\n", part->name);
+		nvram_remove_partition(part->name, NVRAM_SIG_OS, NULL);
+		p = 0;
+	}
+
+	/* Create one if we didn't find */
+	if (!p) {
+		p = nvram_create_partition(part->name, NVRAM_SIG_OS,
+					part->req_size, part->min_size);
+		if (p == -ENOSPC) {
+			pr_info("nvram: No room to create %s partition, "
+				"deleting any obsolete OS partitions...\n",
+				part->name);
+			nvram_remove_partition(NULL, NVRAM_SIG_OS,
+					nvram_os_partitions);
+			p = nvram_create_partition(part->name, NVRAM_SIG_OS,
+					part->req_size, part->min_size);
+		}
+	}
+
+	if (p <= 0) {
+		pr_err("nvram: Failed to find or create %s"
+		       " partition, err %d\n", part->name, (int)p);
+		return -1;
+	}
+
+	part->index = p;
+	part->size = nvram_get_partition_size(p) - sizeof(struct err_log_info);
+
+	return 0;
+}
+
+/* Derived from logfs_compress() */
+static int nvram_compress(const void *in, void *out, size_t inlen,
+							size_t outlen)
+{
+	int err, ret;
+
+	ret = -EIO;
+	err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
+						MEM_LEVEL, Z_DEFAULT_STRATEGY);
+	if (err != Z_OK)
+		goto error;
+
+	stream.next_in = in;
+	stream.avail_in = inlen;
+	stream.total_in = 0;
+	stream.next_out = out;
+	stream.avail_out = outlen;
+	stream.total_out = 0;
+
+	err = zlib_deflate(&stream, Z_FINISH);
+	if (err != Z_STREAM_END)
+		goto error;
+
+	err = zlib_deflateEnd(&stream);
+	if (err != Z_OK)
+		goto error;
+
+	if (stream.total_out >= stream.total_in)
+		goto error;
+
+	ret = stream.total_out;
+error:
+	return ret;
+}
+
+/* Compress the text from big_oops_buf into oops_buf. */
+static int zip_oops(size_t text_len)
+{
+	struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
+	int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
+								oops_data_sz);
+	if (zipped_len < 0) {
+		pr_err("nvram: compression failed; returned %d\n", zipped_len);
+		pr_err("nvram: logging uncompressed oops/panic report\n");
+		return -1;
+	}
+	oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
+	oops_hdr->report_length = cpu_to_be16(zipped_len);
+	oops_hdr->timestamp = cpu_to_be64(ktime_get_real_seconds());
+	return 0;
+}
+
+#ifdef CONFIG_PSTORE
+static int nvram_pstore_open(struct pstore_info *psi)
+{
+	/* Reset the iterator to start reading partitions again */
+	read_type = -1;
+	return 0;
+}
+
+/**
+ * nvram_pstore_write - pstore write callback for nvram
+ * @record:             pstore record to write, with @id to be set
+ *
+ * Called by pstore_dump() when an oops or panic report is logged in the
+ * printk buffer.
+ * Returns 0 on successful write.
+ */
+static int nvram_pstore_write(struct pstore_record *record)
+{
+	int rc;
+	unsigned int err_type = ERR_TYPE_KERNEL_PANIC;
+	struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf;
+
+	/* part 1 has the recent messages from printk buffer */
+	if (record->part > 1 || (record->type != PSTORE_TYPE_DMESG))
+		return -1;
+
+	if (clobbering_unread_rtas_event())
+		return -1;
+
+	oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
+	oops_hdr->report_length = cpu_to_be16(record->size);
+	oops_hdr->timestamp = cpu_to_be64(ktime_get_real_seconds());
+
+	if (record->compressed)
+		err_type = ERR_TYPE_KERNEL_PANIC_GZ;
+
+	rc = nvram_write_os_partition(&oops_log_partition, oops_buf,
+		(int) (sizeof(*oops_hdr) + record->size), err_type,
+		record->count);
+
+	if (rc != 0)
+		return rc;
+
+	record->id = record->part;
+	return 0;
+}
+
+/*
+ * Reads the oops/panic report, rtas, of-config and common partition.
+ * Returns the length of the data we read from each partition.
+ * Returns 0 if we've been called before.
+ */
+static ssize_t nvram_pstore_read(struct pstore_record *record)
+{
+	struct oops_log_info *oops_hdr;
+	unsigned int err_type, id_no, size = 0;
+	struct nvram_os_partition *part = NULL;
+	char *buff = NULL;
+	int sig = 0;
+	loff_t p;
+
+	read_type++;
+
+	switch (nvram_type_ids[read_type]) {
+	case PSTORE_TYPE_DMESG:
+		part = &oops_log_partition;
+		record->type = PSTORE_TYPE_DMESG;
+		break;
+	case PSTORE_TYPE_PPC_COMMON:
+		sig = NVRAM_SIG_SYS;
+		part = &common_partition;
+		record->type = PSTORE_TYPE_PPC_COMMON;
+		record->id = PSTORE_TYPE_PPC_COMMON;
+		record->time.tv_sec = 0;
+		record->time.tv_nsec = 0;
+		break;
+#ifdef CONFIG_PPC_PSERIES
+	case PSTORE_TYPE_PPC_RTAS:
+		part = &rtas_log_partition;
+		record->type = PSTORE_TYPE_PPC_RTAS;
+		record->time.tv_sec = last_rtas_event;
+		record->time.tv_nsec = 0;
+		break;
+	case PSTORE_TYPE_PPC_OF:
+		sig = NVRAM_SIG_OF;
+		part = &of_config_partition;
+		record->type = PSTORE_TYPE_PPC_OF;
+		record->id = PSTORE_TYPE_PPC_OF;
+		record->time.tv_sec = 0;
+		record->time.tv_nsec = 0;
+		break;
+#endif
+#ifdef CONFIG_PPC_POWERNV
+	case PSTORE_TYPE_PPC_OPAL:
+		sig = NVRAM_SIG_FW;
+		part = &skiboot_partition;
+		record->type = PSTORE_TYPE_PPC_OPAL;
+		record->id = PSTORE_TYPE_PPC_OPAL;
+		record->time.tv_sec = 0;
+		record->time.tv_nsec = 0;
+		break;
+#endif
+	default:
+		return 0;
+	}
+
+	if (!part->os_partition) {
+		p = nvram_find_partition(part->name, sig, &size);
+		if (p <= 0) {
+			pr_err("nvram: Failed to find partition %s, "
+				"err %d\n", part->name, (int)p);
+			return 0;
+		}
+		part->index = p;
+		part->size = size;
+	}
+
+	buff = kmalloc(part->size, GFP_KERNEL);
+
+	if (!buff)
+		return -ENOMEM;
+
+	if (nvram_read_partition(part, buff, part->size, &err_type, &id_no)) {
+		kfree(buff);
+		return 0;
+	}
+
+	record->count = 0;
+
+	if (part->os_partition)
+		record->id = id_no;
+
+	if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) {
+		size_t length, hdr_size;
+
+		oops_hdr = (struct oops_log_info *)buff;
+		if (be16_to_cpu(oops_hdr->version) < OOPS_HDR_VERSION) {
+			/* Old format oops header had 2-byte record size */
+			hdr_size = sizeof(u16);
+			length = be16_to_cpu(oops_hdr->version);
+			record->time.tv_sec = 0;
+			record->time.tv_nsec = 0;
+		} else {
+			hdr_size = sizeof(*oops_hdr);
+			length = be16_to_cpu(oops_hdr->report_length);
+			record->time.tv_sec = be64_to_cpu(oops_hdr->timestamp);
+			record->time.tv_nsec = 0;
+		}
+		record->buf = kmemdup(buff + hdr_size, length, GFP_KERNEL);
+		kfree(buff);
+		if (record->buf == NULL)
+			return -ENOMEM;
+
+		record->ecc_notice_size = 0;
+		if (err_type == ERR_TYPE_KERNEL_PANIC_GZ)
+			record->compressed = true;
+		else
+			record->compressed = false;
+		return length;
+	}
+
+	record->buf = buff;
+	return part->size;
+}
+
+static struct pstore_info nvram_pstore_info = {
+	.owner = THIS_MODULE,
+	.name = "nvram",
+	.flags = PSTORE_FLAGS_DMESG,
+	.open = nvram_pstore_open,
+	.read = nvram_pstore_read,
+	.write = nvram_pstore_write,
+};
+
+static int __init nvram_pstore_init(void)
+{
+	int rc = 0;
+
+	if (machine_is(pseries)) {
+		nvram_type_ids[2] = PSTORE_TYPE_PPC_RTAS;
+		nvram_type_ids[3] = PSTORE_TYPE_PPC_OF;
+	} else
+		nvram_type_ids[2] = PSTORE_TYPE_PPC_OPAL;
+
+	nvram_pstore_info.buf = oops_data;
+	nvram_pstore_info.bufsize = oops_data_sz;
+
+	rc = pstore_register(&nvram_pstore_info);
+	if (rc && (rc != -EPERM))
+		/* Print error only when pstore.backend == nvram */
+		pr_err("nvram: pstore_register() failed, returned %d. "
+				"Defaults to kmsg_dump\n", rc);
+
+	return rc;
+}
+#else
+static int __init nvram_pstore_init(void)
+{
+	return -1;
+}
+#endif
+
+void __init nvram_init_oops_partition(int rtas_partition_exists)
+{
+	int rc;
+
+	rc = nvram_init_os_partition(&oops_log_partition);
+	if (rc != 0) {
+#ifdef CONFIG_PPC_PSERIES
+		if (!rtas_partition_exists) {
+			pr_err("nvram: Failed to initialize oops partition!");
+			return;
+		}
+		pr_notice("nvram: Using %s partition to log both"
+			" RTAS errors and oops/panic reports\n",
+			rtas_log_partition.name);
+		memcpy(&oops_log_partition, &rtas_log_partition,
+						sizeof(rtas_log_partition));
+#else
+		pr_err("nvram: Failed to initialize oops partition!");
+		return;
+#endif
+	}
+	oops_buf = kmalloc(oops_log_partition.size, GFP_KERNEL);
+	if (!oops_buf) {
+		pr_err("nvram: No memory for %s partition\n",
+						oops_log_partition.name);
+		return;
+	}
+	oops_data = oops_buf + sizeof(struct oops_log_info);
+	oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info);
+
+	rc = nvram_pstore_init();
+
+	if (!rc)
+		return;
+
+	/*
+	 * Figure compression (preceded by elimination of each line's <n>
+	 * severity prefix) will reduce the oops/panic report to at most
+	 * 45% of its original size.
+	 */
+	big_oops_buf_sz = (oops_data_sz * 100) / 45;
+	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
+	if (big_oops_buf) {
+		stream.workspace =  kmalloc(zlib_deflate_workspacesize(
+					WINDOW_BITS, MEM_LEVEL), GFP_KERNEL);
+		if (!stream.workspace) {
+			pr_err("nvram: No memory for compression workspace; "
+				"skipping compression of %s partition data\n",
+				oops_log_partition.name);
+			kfree(big_oops_buf);
+			big_oops_buf = NULL;
+		}
+	} else {
+		pr_err("No memory for uncompressed %s data; "
+			"skipping compression\n", oops_log_partition.name);
+		stream.workspace = NULL;
+	}
+
+	rc = kmsg_dump_register(&nvram_kmsg_dumper);
+	if (rc != 0) {
+		pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc);
+		kfree(oops_buf);
+		kfree(big_oops_buf);
+		kfree(stream.workspace);
+	}
+}
+
+/*
+ * This is our kmsg_dump callback, called after an oops or panic report
+ * has been written to the printk buffer.  We want to capture as much
+ * of the printk buffer as possible.  First, capture as much as we can
+ * that we think will compress sufficiently to fit in the lnx,oops-log
+ * partition.  If that's too much, go back and capture uncompressed text.
+ */
+static void oops_to_nvram(struct kmsg_dumper *dumper,
+			  enum kmsg_dump_reason reason)
+{
+	struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
+	static unsigned int oops_count = 0;
+	static struct kmsg_dump_iter iter;
+	static bool panicking = false;
+	static DEFINE_SPINLOCK(lock);
+	unsigned long flags;
+	size_t text_len;
+	unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ;
+	int rc = -1;
+
+	switch (reason) {
+	case KMSG_DUMP_SHUTDOWN:
+		/* These are almost always orderly shutdowns. */
+		return;
+	case KMSG_DUMP_OOPS:
+		break;
+	case KMSG_DUMP_PANIC:
+		panicking = true;
+		break;
+	case KMSG_DUMP_EMERG:
+		if (panicking)
+			/* Panic report already captured. */
+			return;
+		break;
+	default:
+		pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n",
+		       __func__, (int) reason);
+		return;
+	}
+
+	if (clobbering_unread_rtas_event())
+		return;
+
+	if (!spin_trylock_irqsave(&lock, flags))
+		return;
+
+	if (big_oops_buf) {
+		kmsg_dump_rewind(&iter);
+		kmsg_dump_get_buffer(&iter, false,
+				     big_oops_buf, big_oops_buf_sz, &text_len);
+		rc = zip_oops(text_len);
+	}
+	if (rc != 0) {
+		kmsg_dump_rewind(&iter);
+		kmsg_dump_get_buffer(&iter, false,
+				     oops_data, oops_data_sz, &text_len);
+		err_type = ERR_TYPE_KERNEL_PANIC;
+		oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
+		oops_hdr->report_length = cpu_to_be16(text_len);
+		oops_hdr->timestamp = cpu_to_be64(ktime_get_real_seconds());
+	}
+
+	(void) nvram_write_os_partition(&oops_log_partition, oops_buf,
+		(int) (sizeof(*oops_hdr) + text_len), err_type,
+		++oops_count);
+
+	spin_unlock_irqrestore(&lock, flags);
+}
+
+#ifdef DEBUG_NVRAM
+static void __init nvram_print_partitions(char * label)
+{
+	struct nvram_partition * tmp_part;
+	
+	printk(KERN_WARNING "--------%s---------\n", label);
+	printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n");
+	list_for_each_entry(tmp_part, &nvram_partitions, partition) {
+		printk(KERN_WARNING "%4d    \t%02x\t%02x\t%d\t%12.12s\n",
+		       tmp_part->index, tmp_part->header.signature,
+		       tmp_part->header.checksum, tmp_part->header.length,
+		       tmp_part->header.name);
+	}
+}
+#endif
+
+
+static int __init nvram_write_header(struct nvram_partition * part)
+{
+	loff_t tmp_index;
+	int rc;
+	struct nvram_header phead;
+
+	memcpy(&phead, &part->header, NVRAM_HEADER_LEN);
+	phead.length = cpu_to_be16(phead.length);
+
+	tmp_index = part->index;
+	rc = ppc_md.nvram_write((char *)&phead, NVRAM_HEADER_LEN, &tmp_index);
+
+	return rc;
+}
+
+
+static unsigned char __init nvram_checksum(struct nvram_header *p)
+{
+	unsigned int c_sum, c_sum2;
+	unsigned short *sp = (unsigned short *)p->name; /* assume 6 shorts */
+	c_sum = p->signature + p->length + sp[0] + sp[1] + sp[2] + sp[3] + sp[4] + sp[5];
+
+	/* The sum may have spilled into the 3rd byte.  Fold it back. */
+	c_sum = ((c_sum & 0xffff) + (c_sum >> 16)) & 0xffff;
+	/* The sum cannot exceed 2 bytes.  Fold it into a checksum */
+	c_sum2 = (c_sum >> 8) + (c_sum << 8);
+	c_sum = ((c_sum + c_sum2) >> 8) & 0xff;
+	return c_sum;
+}
+
+/*
+ * Per the criteria passed via nvram_remove_partition(), should this
+ * partition be removed?  1=remove, 0=keep
+ */
+static int __init nvram_can_remove_partition(struct nvram_partition *part,
+		const char *name, int sig, const char *exceptions[])
+{
+	if (part->header.signature != sig)
+		return 0;
+	if (name) {
+		if (strncmp(name, part->header.name, 12))
+			return 0;
+	} else if (exceptions) {
+		const char **except;
+		for (except = exceptions; *except; except++) {
+			if (!strncmp(*except, part->header.name, 12))
+				return 0;
+		}
+	}
+	return 1;
+}
+
+/**
+ * nvram_remove_partition - Remove one or more partitions in nvram
+ * @name: name of the partition to remove, or NULL for a
+ *        signature only match
+ * @sig: signature of the partition(s) to remove
+ * @exceptions: When removing all partitions with a matching signature,
+ *        leave these alone.
+ */
+
+int __init nvram_remove_partition(const char *name, int sig,
+						const char *exceptions[])
+{
+	struct nvram_partition *part, *prev, *tmp;
+	int rc;
+
+	list_for_each_entry(part, &nvram_partitions, partition) {
+		if (!nvram_can_remove_partition(part, name, sig, exceptions))
+			continue;
+
+		/* Make partition a free partition */
+		part->header.signature = NVRAM_SIG_FREE;
+		memset(part->header.name, 'w', 12);
+		part->header.checksum = nvram_checksum(&part->header);
+		rc = nvram_write_header(part);
+		if (rc <= 0) {
+			printk(KERN_ERR "nvram_remove_partition: nvram_write failed (%d)\n", rc);
+			return rc;
+		}
+	}
+
+	/* Merge contiguous ones */
+	prev = NULL;
+	list_for_each_entry_safe(part, tmp, &nvram_partitions, partition) {
+		if (part->header.signature != NVRAM_SIG_FREE) {
+			prev = NULL;
+			continue;
+		}
+		if (prev) {
+			prev->header.length += part->header.length;
+			prev->header.checksum = nvram_checksum(&prev->header);
+			rc = nvram_write_header(prev);
+			if (rc <= 0) {
+				printk(KERN_ERR "nvram_remove_partition: nvram_write failed (%d)\n", rc);
+				return rc;
+			}
+			list_del(&part->partition);
+			kfree(part);
+		} else
+			prev = part;
+	}
+	
+	return 0;
+}
+
+/**
+ * nvram_create_partition - Create a partition in nvram
+ * @name: name of the partition to create
+ * @sig: signature of the partition to create
+ * @req_size: size of data to allocate in bytes
+ * @min_size: minimum acceptable size (0 means req_size)
+ *
+ * Returns a negative error code or a positive nvram index
+ * of the beginning of the data area of the newly created
+ * partition. If you provided a min_size smaller than req_size
+ * you need to query for the actual size yourself after the
+ * call using nvram_partition_get_size().
+ */
+loff_t __init nvram_create_partition(const char *name, int sig,
+				     int req_size, int min_size)
+{
+	struct nvram_partition *part;
+	struct nvram_partition *new_part;
+	struct nvram_partition *free_part = NULL;
+	static char nv_init_vals[16];
+	loff_t tmp_index;
+	long size = 0;
+	int rc;
+
+	BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16);
+
+	/* Convert sizes from bytes to blocks */
+	req_size = ALIGN(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
+	min_size = ALIGN(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
+
+	/* If no minimum size specified, make it the same as the
+	 * requested size
+	 */
+	if (min_size == 0)
+		min_size = req_size;
+	if (min_size > req_size)
+		return -EINVAL;
+
+	/* Now add one block to each for the header */
+	req_size += 1;
+	min_size += 1;
+
+	/* Find a free partition that will give us the maximum needed size 
+	   If can't find one that will give us the minimum size needed */
+	list_for_each_entry(part, &nvram_partitions, partition) {
+		if (part->header.signature != NVRAM_SIG_FREE)
+			continue;
+
+		if (part->header.length >= req_size) {
+			size = req_size;
+			free_part = part;
+			break;
+		}
+		if (part->header.length > size &&
+		    part->header.length >= min_size) {
+			size = part->header.length;
+			free_part = part;
+		}
+	}
+	if (!size)
+		return -ENOSPC;
+	
+	/* Create our OS partition */
+	new_part = kzalloc(sizeof(*new_part), GFP_KERNEL);
+	if (!new_part) {
+		pr_err("%s: kmalloc failed\n", __func__);
+		return -ENOMEM;
+	}
+
+	new_part->index = free_part->index;
+	new_part->header.signature = sig;
+	new_part->header.length = size;
+	memcpy(new_part->header.name, name, strnlen(name, sizeof(new_part->header.name)));
+	new_part->header.checksum = nvram_checksum(&new_part->header);
+
+	rc = nvram_write_header(new_part);
+	if (rc <= 0) {
+		pr_err("%s: nvram_write_header failed (%d)\n", __func__, rc);
+		kfree(new_part);
+		return rc;
+	}
+	list_add_tail(&new_part->partition, &free_part->partition);
+
+	/* Adjust or remove the partition we stole the space from */
+	if (free_part->header.length > size) {
+		free_part->index += size * NVRAM_BLOCK_LEN;
+		free_part->header.length -= size;
+		free_part->header.checksum = nvram_checksum(&free_part->header);
+		rc = nvram_write_header(free_part);
+		if (rc <= 0) {
+			pr_err("%s: nvram_write_header failed (%d)\n",
+			       __func__, rc);
+			return rc;
+		}
+	} else {
+		list_del(&free_part->partition);
+		kfree(free_part);
+	} 
+
+	/* Clear the new partition */
+	for (tmp_index = new_part->index + NVRAM_HEADER_LEN;
+	     tmp_index <  ((size - 1) * NVRAM_BLOCK_LEN);
+	     tmp_index += NVRAM_BLOCK_LEN) {
+		rc = ppc_md.nvram_write(nv_init_vals, NVRAM_BLOCK_LEN, &tmp_index);
+		if (rc <= 0) {
+			pr_err("%s: nvram_write failed (%d)\n",
+			       __func__, rc);
+			return rc;
+		}
+	}
+
+	return new_part->index + NVRAM_HEADER_LEN;
+}
+
+/**
+ * nvram_get_partition_size - Get the data size of an nvram partition
+ * @data_index: This is the offset of the start of the data of
+ *              the partition. The same value that is returned by
+ *              nvram_create_partition().
+ */
+int nvram_get_partition_size(loff_t data_index)
+{
+	struct nvram_partition *part;
+	
+	list_for_each_entry(part, &nvram_partitions, partition) {
+		if (part->index + NVRAM_HEADER_LEN == data_index)
+			return (part->header.length - 1) * NVRAM_BLOCK_LEN;
+	}
+	return -1;
+}
+
+
+/**
+ * nvram_find_partition - Find an nvram partition by signature and name
+ * @name: Name of the partition or NULL for any name
+ * @sig: Signature to test against
+ * @out_size: if non-NULL, returns the size of the data part of the partition
+ */
+loff_t nvram_find_partition(const char *name, int sig, int *out_size)
+{
+	struct nvram_partition *p;
+
+	list_for_each_entry(p, &nvram_partitions, partition) {
+		if (p->header.signature == sig &&
+		    (!name || !strncmp(p->header.name, name, 12))) {
+			if (out_size)
+				*out_size = (p->header.length - 1) *
+					NVRAM_BLOCK_LEN;
+			return p->index + NVRAM_HEADER_LEN;
+		}
+	}
+	return 0;
+}
+
+int __init nvram_scan_partitions(void)
+{
+	loff_t cur_index = 0;
+	struct nvram_header phead;
+	struct nvram_partition * tmp_part;
+	unsigned char c_sum;
+	char * header;
+	int total_size;
+	int err;
+
+	if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0)
+		return -ENODEV;
+	total_size = ppc_md.nvram_size();
+	
+	header = kmalloc(NVRAM_HEADER_LEN, GFP_KERNEL);
+	if (!header) {
+		printk(KERN_ERR "nvram_scan_partitions: Failed kmalloc\n");
+		return -ENOMEM;
+	}
+
+	while (cur_index < total_size) {
+
+		err = ppc_md.nvram_read(header, NVRAM_HEADER_LEN, &cur_index);
+		if (err != NVRAM_HEADER_LEN) {
+			printk(KERN_ERR "nvram_scan_partitions: Error parsing "
+			       "nvram partitions\n");
+			goto out;
+		}
+
+		cur_index -= NVRAM_HEADER_LEN; /* nvram_read will advance us */
+
+		memcpy(&phead, header, NVRAM_HEADER_LEN);
+
+		phead.length = be16_to_cpu(phead.length);
+
+		err = 0;
+		c_sum = nvram_checksum(&phead);
+		if (c_sum != phead.checksum) {
+			printk(KERN_WARNING "WARNING: nvram partition checksum"
+			       " was %02x, should be %02x!\n",
+			       phead.checksum, c_sum);
+			printk(KERN_WARNING "Terminating nvram partition scan\n");
+			goto out;
+		}
+		if (!phead.length) {
+			printk(KERN_WARNING "WARNING: nvram corruption "
+			       "detected: 0-length partition\n");
+			goto out;
+		}
+		tmp_part = kmalloc(sizeof(*tmp_part), GFP_KERNEL);
+		err = -ENOMEM;
+		if (!tmp_part) {
+			printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n");
+			goto out;
+		}
+		
+		memcpy(&tmp_part->header, &phead, NVRAM_HEADER_LEN);
+		tmp_part->index = cur_index;
+		list_add_tail(&tmp_part->partition, &nvram_partitions);
+		
+		cur_index += phead.length * NVRAM_BLOCK_LEN;
+	}
+	err = 0;
+
+#ifdef DEBUG_NVRAM
+	nvram_print_partitions("NVRAM Partitions");
+#endif
+
+ out:
+	kfree(header);
+	return err;
+}
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
new file mode 100644
index 0000000000..adc76fa58d
--- /dev/null
+++ b/arch/powerpc/kernel/of_platform.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *    Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corp.
+ *			 <benh@kernel.crashing.org>
+ *    and		 Arnd Bergmann, IBM Corp.
+ */
+
+#undef DEBUG
+
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/mod_devicetable.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/atomic.h>
+
+#include <asm/errno.h>
+#include <asm/topology.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/eeh.h>
+
+#ifdef CONFIG_PPC_OF_PLATFORM_PCI
+
+/* The probing of PCI controllers from of_platform is currently
+ * 64 bits only, mostly due to gratuitous differences between
+ * the 32 and 64 bits PCI code on PowerPC and the 32 bits one
+ * lacking some bits needed here.
+ */
+
+static int of_pci_phb_probe(struct platform_device *dev)
+{
+	struct pci_controller *phb;
+
+	/* Check if we can do that ... */
+	if (ppc_md.pci_setup_phb == NULL)
+		return -ENODEV;
+
+	pr_info("Setting up PCI bus %pOF\n", dev->dev.of_node);
+
+	/* Alloc and setup PHB data structure */
+	phb = pcibios_alloc_controller(dev->dev.of_node);
+	if (!phb)
+		return -ENODEV;
+
+	/* Setup parent in sysfs */
+	phb->parent = &dev->dev;
+
+	/* Setup the PHB using arch provided callback */
+	if (ppc_md.pci_setup_phb(phb)) {
+		pcibios_free_controller(phb);
+		return -ENODEV;
+	}
+
+	/* Process "ranges" property */
+	pci_process_bridge_OF_ranges(phb, dev->dev.of_node, 0);
+
+	/* Init pci_dn data structures */
+	pci_devs_phb_init_dynamic(phb);
+
+	/* Create EEH PE for the PHB */
+	eeh_phb_pe_create(phb);
+
+	/* Scan the bus */
+	pcibios_scan_phb(phb);
+	if (phb->bus == NULL)
+		return -ENXIO;
+
+	/* Claim resources. This might need some rework as well depending
+	 * whether we are doing probe-only or not, like assigning unassigned
+	 * resources etc...
+	 */
+	pcibios_claim_one_bus(phb->bus);
+
+	/* Add probed PCI devices to the device model */
+	pci_bus_add_devices(phb->bus);
+
+	return 0;
+}
+
+static const struct of_device_id of_pci_phb_ids[] = {
+	{ .type = "pci", },
+	{ .type = "pcix", },
+	{ .type = "pcie", },
+	{ .type = "pciex", },
+	{ .type = "ht", },
+	{}
+};
+
+static struct platform_driver of_pci_phb_driver = {
+	.probe = of_pci_phb_probe,
+	.driver = {
+		.name = "of-pci",
+		.of_match_table = of_pci_phb_ids,
+	},
+};
+
+builtin_platform_driver(of_pci_phb_driver);
+
+#endif /* CONFIG_PPC_OF_PLATFORM_PCI */
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
new file mode 100644
index 0000000000..004fae2044
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Code for Kernel probes Jump optimization.
+ *
+ * Copyright 2017, Anju T, IBM Corp.
+ */
+
+#include <linux/kprobes.h>
+#include <linux/jump_label.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <asm/kprobes.h>
+#include <asm/ptrace.h>
+#include <asm/cacheflush.h>
+#include <asm/code-patching.h>
+#include <asm/sstep.h>
+#include <asm/ppc-opcode.h>
+#include <asm/inst.h>
+
+#define TMPL_CALL_HDLR_IDX	(optprobe_template_call_handler - optprobe_template_entry)
+#define TMPL_EMULATE_IDX	(optprobe_template_call_emulate - optprobe_template_entry)
+#define TMPL_RET_IDX		(optprobe_template_ret - optprobe_template_entry)
+#define TMPL_OP_IDX		(optprobe_template_op_address - optprobe_template_entry)
+#define TMPL_INSN_IDX		(optprobe_template_insn - optprobe_template_entry)
+#define TMPL_END_IDX		(optprobe_template_end - optprobe_template_entry)
+
+static bool insn_page_in_use;
+
+void *alloc_optinsn_page(void)
+{
+	if (insn_page_in_use)
+		return NULL;
+	insn_page_in_use = true;
+	return &optinsn_slot;
+}
+
+void free_optinsn_page(void *page)
+{
+	insn_page_in_use = false;
+}
+
+/*
+ * Check if we can optimize this probe. Returns NIP post-emulation if this can
+ * be optimized and 0 otherwise.
+ */
+static unsigned long can_optimize(struct kprobe *p)
+{
+	struct pt_regs regs;
+	struct instruction_op op;
+	unsigned long nip = 0;
+	unsigned long addr = (unsigned long)p->addr;
+
+	/*
+	 * kprobe placed for kretprobe during boot time
+	 * has a 'nop' instruction, which can be emulated.
+	 * So further checks can be skipped.
+	 */
+	if (p->addr == (kprobe_opcode_t *)&__kretprobe_trampoline)
+		return addr + sizeof(kprobe_opcode_t);
+
+	/*
+	 * We only support optimizing kernel addresses, but not
+	 * module addresses.
+	 *
+	 * FIXME: Optimize kprobes placed in module addresses.
+	 */
+	if (!is_kernel_addr(addr))
+		return 0;
+
+	memset(&regs, 0, sizeof(struct pt_regs));
+	regs.nip = addr;
+	regs.trap = 0x0;
+	regs.msr = MSR_KERNEL;
+
+	/*
+	 * Kprobe placed in conditional branch instructions are
+	 * not optimized, as we can't predict the nip prior with
+	 * dummy pt_regs and can not ensure that the return branch
+	 * from detour buffer falls in the range of address (i.e 32MB).
+	 * A branch back from trampoline is set up in the detour buffer
+	 * to the nip returned by the analyse_instr() here.
+	 *
+	 * Ensure that the instruction is not a conditional branch,
+	 * and that can be emulated.
+	 */
+	if (!is_conditional_branch(ppc_inst_read(p->ainsn.insn)) &&
+	    analyse_instr(&op, &regs, ppc_inst_read(p->ainsn.insn)) == 1) {
+		emulate_update_regs(&regs, &op);
+		nip = regs.nip;
+	}
+
+	return nip;
+}
+
+static void optimized_callback(struct optimized_kprobe *op,
+			       struct pt_regs *regs)
+{
+	/* This is possible if op is under delayed unoptimizing */
+	if (kprobe_disabled(&op->kp))
+		return;
+
+	preempt_disable();
+
+	if (kprobe_running()) {
+		kprobes_inc_nmissed_count(&op->kp);
+	} else {
+		__this_cpu_write(current_kprobe, &op->kp);
+		regs_set_return_ip(regs, (unsigned long)op->kp.addr);
+		get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
+		opt_pre_handler(&op->kp, regs);
+		__this_cpu_write(current_kprobe, NULL);
+	}
+
+	preempt_enable();
+}
+NOKPROBE_SYMBOL(optimized_callback);
+
+void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
+{
+	if (op->optinsn.insn) {
+		free_optinsn_slot(op->optinsn.insn, 1);
+		op->optinsn.insn = NULL;
+	}
+}
+
+static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr)
+{
+	patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(reg, PPC_HI(val))));
+	patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_LO(val))));
+}
+
+/*
+ * Generate instructions to load provided immediate 64-bit value
+ * to register 'reg' and patch these instructions at 'addr'.
+ */
+static void patch_imm64_load_insns(unsigned long long val, int reg, kprobe_opcode_t *addr)
+{
+	patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(reg, PPC_HIGHEST(val))));
+	patch_instruction(addr++, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_HIGHER(val))));
+	patch_instruction(addr++, ppc_inst(PPC_RAW_SLDI(reg, reg, 32)));
+	patch_instruction(addr++, ppc_inst(PPC_RAW_ORIS(reg, reg, PPC_HI(val))));
+	patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_LO(val))));
+}
+
+static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr)
+{
+	if (IS_ENABLED(CONFIG_PPC64))
+		patch_imm64_load_insns(val, reg, addr);
+	else
+		patch_imm32_load_insns(val, reg, addr);
+}
+
+int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
+{
+	ppc_inst_t branch_op_callback, branch_emulate_step, temp;
+	unsigned long op_callback_addr, emulate_step_addr;
+	kprobe_opcode_t *buff;
+	long b_offset;
+	unsigned long nip, size;
+	int rc, i;
+
+	nip = can_optimize(p);
+	if (!nip)
+		return -EILSEQ;
+
+	/* Allocate instruction slot for detour buffer */
+	buff = get_optinsn_slot();
+	if (!buff)
+		return -ENOMEM;
+
+	/*
+	 * OPTPROBE uses 'b' instruction to branch to optinsn.insn.
+	 *
+	 * The target address has to be relatively nearby, to permit use
+	 * of branch instruction in powerpc, because the address is specified
+	 * in an immediate field in the instruction opcode itself, ie 24 bits
+	 * in the opcode specify the address. Therefore the address should
+	 * be within 32MB on either side of the current instruction.
+	 */
+	b_offset = (unsigned long)buff - (unsigned long)p->addr;
+	if (!is_offset_in_branch_range(b_offset))
+		goto error;
+
+	/* Check if the return address is also within 32MB range */
+	b_offset = (unsigned long)(buff + TMPL_RET_IDX) - nip;
+	if (!is_offset_in_branch_range(b_offset))
+		goto error;
+
+	/* Setup template */
+	/* We can optimize this via patch_instruction_window later */
+	size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int);
+	pr_devel("Copying template to %p, size %lu\n", buff, size);
+	for (i = 0; i < size; i++) {
+		rc = patch_instruction(buff + i, ppc_inst(*(optprobe_template_entry + i)));
+		if (rc < 0)
+			goto error;
+	}
+
+	/*
+	 * Fixup the template with instructions to:
+	 * 1. load the address of the actual probepoint
+	 */
+	patch_imm_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX);
+
+	/*
+	 * 2. branch to optimized_callback() and emulate_step()
+	 */
+	op_callback_addr = ppc_kallsyms_lookup_name("optimized_callback");
+	emulate_step_addr = ppc_kallsyms_lookup_name("emulate_step");
+	if (!op_callback_addr || !emulate_step_addr) {
+		WARN(1, "Unable to lookup optimized_callback()/emulate_step()\n");
+		goto error;
+	}
+
+	rc = create_branch(&branch_op_callback, buff + TMPL_CALL_HDLR_IDX,
+			   op_callback_addr, BRANCH_SET_LINK);
+
+	rc |= create_branch(&branch_emulate_step, buff + TMPL_EMULATE_IDX,
+			    emulate_step_addr, BRANCH_SET_LINK);
+
+	if (rc)
+		goto error;
+
+	patch_instruction(buff + TMPL_CALL_HDLR_IDX, branch_op_callback);
+	patch_instruction(buff + TMPL_EMULATE_IDX, branch_emulate_step);
+
+	/*
+	 * 3. load instruction to be emulated into relevant register, and
+	 */
+	temp = ppc_inst_read(p->ainsn.insn);
+	patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);
+
+	/*
+	 * 4. branch back from trampoline
+	 */
+	patch_branch(buff + TMPL_RET_IDX, nip, 0);
+
+	flush_icache_range((unsigned long)buff, (unsigned long)(&buff[TMPL_END_IDX]));
+
+	op->optinsn.insn = buff;
+
+	return 0;
+
+error:
+	free_optinsn_slot(buff, 0);
+	return -ERANGE;
+
+}
+
+int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
+{
+	return optinsn->insn != NULL;
+}
+
+/*
+ * On powerpc, Optprobes always replaces one instruction (4 bytes
+ * aligned and 4 bytes long). It is impossible to encounter another
+ * kprobe in this address range. So always return 0.
+ */
+int arch_check_optimized_kprobe(struct optimized_kprobe *op)
+{
+	return 0;
+}
+
+void arch_optimize_kprobes(struct list_head *oplist)
+{
+	ppc_inst_t instr;
+	struct optimized_kprobe *op;
+	struct optimized_kprobe *tmp;
+
+	list_for_each_entry_safe(op, tmp, oplist, list) {
+		/*
+		 * Backup instructions which will be replaced
+		 * by jump address
+		 */
+		memcpy(op->optinsn.copied_insn, op->kp.addr, RELATIVEJUMP_SIZE);
+		create_branch(&instr, op->kp.addr, (unsigned long)op->optinsn.insn, 0);
+		patch_instruction(op->kp.addr, instr);
+		list_del_init(&op->list);
+	}
+}
+
+void arch_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+	arch_arm_kprobe(&op->kp);
+}
+
+void arch_unoptimize_kprobes(struct list_head *oplist, struct list_head *done_list)
+{
+	struct optimized_kprobe *op;
+	struct optimized_kprobe *tmp;
+
+	list_for_each_entry_safe(op, tmp, oplist, list) {
+		arch_unoptimize_kprobe(op);
+		list_move(&op->list, done_list);
+	}
+}
+
+int arch_within_optimized_kprobe(struct optimized_kprobe *op, kprobe_opcode_t *addr)
+{
+	return (op->kp.addr <= addr &&
+		op->kp.addr + (RELATIVEJUMP_SIZE / sizeof(kprobe_opcode_t)) > addr);
+}
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
new file mode 100644
index 0000000000..35932f45fb
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Code to prepare detour buffer for optprobes in Kernel.
+ *
+ * Copyright 2017, Anju T, IBM Corp.
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/ptrace.h>
+#include <asm/asm-offsets.h>
+
+#ifdef CONFIG_PPC64
+#define SAVE_30GPRS(base) SAVE_GPRS(2, 31, base)
+#define REST_30GPRS(base) REST_GPRS(2, 31, base)
+#define TEMPLATE_FOR_IMM_LOAD_INSNS	nop; nop; nop; nop; nop
+#else
+#define SAVE_30GPRS(base) stmw	r2, GPR2(base)
+#define REST_30GPRS(base) lmw	r2, GPR2(base)
+#define TEMPLATE_FOR_IMM_LOAD_INSNS	nop; nop; nop
+#endif
+
+#define	OPT_SLOT_SIZE	65536
+
+	.balign	4
+
+	/*
+	 * Reserve an area to allocate slots for detour buffer.
+	 * This is part of .text section (rather than vmalloc area)
+	 * as this needs to be within 32MB of the probed address.
+	 */
+	.global optinsn_slot
+optinsn_slot:
+	.space	OPT_SLOT_SIZE
+
+	/*
+	 * Optprobe template:
+	 * This template gets copied into one of the slots in optinsn_slot
+	 * and gets fixed up with real optprobe structures et al.
+	 */
+	.global optprobe_template_entry
+optprobe_template_entry:
+	/* Create an in-memory pt_regs */
+	PPC_STLU	r1,-INT_FRAME_SIZE(r1)
+	SAVE_GPR(0,r1)
+	/* Save the previous SP into stack */
+	addi	r0,r1,INT_FRAME_SIZE
+	PPC_STL	r0,GPR1(r1)
+	SAVE_30GPRS(r1)
+	/* Save SPRS */
+	mfmsr	r5
+	PPC_STL	r5,_MSR(r1)
+	li	r5,0x700
+	PPC_STL	r5,_TRAP(r1)
+	li	r5,0
+	PPC_STL	r5,ORIG_GPR3(r1)
+	PPC_STL	r5,RESULT(r1)
+	mfctr	r5
+	PPC_STL	r5,_CTR(r1)
+	mflr	r5
+	PPC_STL	r5,_LINK(r1)
+	mfspr	r5,SPRN_XER
+	PPC_STL	r5,_XER(r1)
+	mfcr	r5
+	PPC_STL	r5,_CCR(r1)
+#ifdef CONFIG_PPC64
+	lbz     r5,PACAIRQSOFTMASK(r13)
+	std     r5,SOFTE(r1)
+#endif
+
+	/*
+	 * We may get here from a module, so load the kernel TOC in r2.
+	 * The original TOC gets restored when pt_regs is restored
+	 * further below.
+	 */
+#ifdef CONFIG_PPC64
+	LOAD_PACA_TOC()
+#endif
+
+	.global optprobe_template_op_address
+optprobe_template_op_address:
+	/*
+	 * Parameters to optimized_callback():
+	 * 1. optimized_kprobe structure in r3
+	 */
+	TEMPLATE_FOR_IMM_LOAD_INSNS
+
+	/* 2. pt_regs pointer in r4 */
+	addi	r4,r1,STACK_INT_FRAME_REGS
+
+	.global optprobe_template_call_handler
+optprobe_template_call_handler:
+	/* Branch to optimized_callback() */
+	nop
+
+	/*
+	 * Parameters for instruction emulation:
+	 * 1. Pass SP in register r3.
+	 */
+	addi	r3,r1,STACK_INT_FRAME_REGS
+
+	.global optprobe_template_insn
+optprobe_template_insn:
+	/* 2, Pass instruction to be emulated in r4 */
+	TEMPLATE_FOR_IMM_LOAD_INSNS
+
+	.global optprobe_template_call_emulate
+optprobe_template_call_emulate:
+	/* Branch to emulate_step()  */
+	nop
+
+	/*
+	 * All done.
+	 * Now, restore the registers...
+	 */
+	PPC_LL	r5,_MSR(r1)
+	mtmsr	r5
+	PPC_LL	r5,_CTR(r1)
+	mtctr	r5
+	PPC_LL	r5,_LINK(r1)
+	mtlr	r5
+	PPC_LL	r5,_XER(r1)
+	mtxer	r5
+	PPC_LL	r5,_CCR(r1)
+	mtcr	r5
+	REST_GPR(0,r1)
+	REST_30GPRS(r1)
+	/* Restore the previous SP */
+	addi	r1,r1,INT_FRAME_SIZE
+
+	.global optprobe_template_ret
+optprobe_template_ret:
+	/* ... and jump back from trampoline */
+	nop
+
+	.global optprobe_template_end
+optprobe_template_end:
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
new file mode 100644
index 0000000000..cda4e00b67
--- /dev/null
+++ b/arch/powerpc/kernel/paca.c
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * c 2001 PPC 64 Team, IBM Corp
+ */
+
+#include <linux/smp.h>
+#include <linux/export.h>
+#include <linux/memblock.h>
+#include <linux/sched/task.h>
+#include <linux/numa.h>
+#include <linux/pgtable.h>
+
+#include <asm/lppaca.h>
+#include <asm/paca.h>
+#include <asm/sections.h>
+#include <asm/kexec.h>
+#include <asm/svm.h>
+#include <asm/ultravisor.h>
+
+#include "setup.h"
+
+#ifndef CONFIG_SMP
+#define boot_cpuid 0
+#endif
+
+static void *__init alloc_paca_data(unsigned long size, unsigned long align,
+				unsigned long limit, int cpu)
+{
+	void *ptr;
+	int nid;
+
+	/*
+	 * boot_cpuid paca is allocated very early before cpu_to_node is up.
+	 * Set bottom-up mode, because the boot CPU should be on node-0,
+	 * which will put its paca in the right place.
+	 */
+	if (cpu == boot_cpuid) {
+		nid = NUMA_NO_NODE;
+		memblock_set_bottom_up(true);
+	} else {
+		nid = early_cpu_to_node(cpu);
+	}
+
+	ptr = memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT,
+				     limit, nid);
+	if (!ptr)
+		panic("cannot allocate paca data");
+
+	if (cpu == boot_cpuid)
+		memblock_set_bottom_up(false);
+
+	return ptr;
+}
+
+#ifdef CONFIG_PPC_PSERIES
+
+#define LPPACA_SIZE 0x400
+
+static void *__init alloc_shared_lppaca(unsigned long size, unsigned long limit,
+					int cpu)
+{
+	size_t shared_lppaca_total_size = PAGE_ALIGN(nr_cpu_ids * LPPACA_SIZE);
+	static unsigned long shared_lppaca_size;
+	static void *shared_lppaca;
+	void *ptr;
+
+	if (!shared_lppaca) {
+		memblock_set_bottom_up(true);
+
+		/*
+		 * See Documentation/powerpc/ultravisor.rst for more details.
+		 *
+		 * UV/HV data sharing is in PAGE_SIZE granularity. In order to
+		 * minimize the number of pages shared, align the allocation to
+		 * PAGE_SIZE.
+		 */
+		shared_lppaca =
+			memblock_alloc_try_nid(shared_lppaca_total_size,
+					       PAGE_SIZE, MEMBLOCK_LOW_LIMIT,
+					       limit, NUMA_NO_NODE);
+		if (!shared_lppaca)
+			panic("cannot allocate shared data");
+
+		memblock_set_bottom_up(false);
+		uv_share_page(PHYS_PFN(__pa(shared_lppaca)),
+			      shared_lppaca_total_size >> PAGE_SHIFT);
+	}
+
+	ptr = shared_lppaca + shared_lppaca_size;
+	shared_lppaca_size += size;
+
+	/*
+	 * This is very early in boot, so no harm done if the kernel crashes at
+	 * this point.
+	 */
+	BUG_ON(shared_lppaca_size > shared_lppaca_total_size);
+
+	return ptr;
+}
+
+/*
+ * See asm/lppaca.h for more detail.
+ *
+ * lppaca structures must must be 1kB in size, L1 cache line aligned,
+ * and not cross 4kB boundary. A 1kB size and 1kB alignment will satisfy
+ * these requirements.
+ */
+static inline void init_lppaca(struct lppaca *lppaca)
+{
+	BUILD_BUG_ON(sizeof(struct lppaca) != 640);
+
+	*lppaca = (struct lppaca) {
+		.desc = cpu_to_be32(0xd397d781),	/* "LpPa" */
+		.size = cpu_to_be16(LPPACA_SIZE),
+		.fpregs_in_use = 1,
+		.slb_count = cpu_to_be16(64),
+		.vmxregs_in_use = 0,
+		.page_ins = 0, };
+};
+
+static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
+{
+	struct lppaca *lp;
+
+	BUILD_BUG_ON(sizeof(struct lppaca) > LPPACA_SIZE);
+
+	if (early_cpu_has_feature(CPU_FTR_HVMODE))
+		return NULL;
+
+	if (is_secure_guest())
+		lp = alloc_shared_lppaca(LPPACA_SIZE, limit, cpu);
+	else
+		lp = alloc_paca_data(LPPACA_SIZE, 0x400, limit, cpu);
+
+	init_lppaca(lp);
+
+	return lp;
+}
+#endif /* CONFIG_PPC_PSERIES */
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+/*
+ * 3 persistent SLBs are allocated here.  The buffer will be zero
+ * initially, hence will all be invaild until we actually write them.
+ *
+ * If you make the number of persistent SLB entries dynamic, please also
+ * update PR KVM to flush and restore them accordingly.
+ */
+static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
+{
+	struct slb_shadow *s;
+
+	if (cpu != boot_cpuid) {
+		/*
+		 * Boot CPU comes here before early_radix_enabled
+		 * is parsed (e.g., for disable_radix). So allocate
+		 * always and this will be fixed up in free_unused_pacas.
+		 */
+		if (early_radix_enabled())
+			return NULL;
+	}
+
+	s = alloc_paca_data(sizeof(*s), L1_CACHE_BYTES, limit, cpu);
+
+	s->persistent = cpu_to_be32(SLB_NUM_BOLTED);
+	s->buffer_length = cpu_to_be32(sizeof(*s));
+
+	return s;
+}
+#endif /* CONFIG_PPC_64S_HASH_MMU */
+
+/* The Paca is an array with one entry per processor.  Each contains an
+ * lppaca, which contains the information shared between the
+ * hypervisor and Linux.
+ * On systems with hardware multi-threading, there are two threads
+ * per processor.  The Paca array must contain an entry for each thread.
+ * The VPD Areas will give a max logical processors = 2 * max physical
+ * processors.  The processor VPD array needs one entry per physical
+ * processor (not thread).
+ */
+struct paca_struct **paca_ptrs __read_mostly;
+EXPORT_SYMBOL(paca_ptrs);
+
+void __init initialise_paca(struct paca_struct *new_paca, int cpu)
+{
+#ifdef CONFIG_PPC_PSERIES
+	new_paca->lppaca_ptr = NULL;
+#endif
+#ifdef CONFIG_PPC_BOOK3E_64
+	new_paca->kernel_pgd = swapper_pg_dir;
+#endif
+	new_paca->lock_token = 0x8000;
+	new_paca->paca_index = cpu;
+#ifndef CONFIG_PPC_KERNEL_PCREL
+	new_paca->kernel_toc = kernel_toc_addr();
+#endif
+	new_paca->kernelbase = (unsigned long) _stext;
+	/* Only set MSR:IR/DR when MMU is initialized */
+	new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR);
+	new_paca->hw_cpu_id = 0xffff;
+	new_paca->kexec_state = KEXEC_STATE_NONE;
+	new_paca->__current = &init_task;
+	new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	new_paca->slb_shadow_ptr = NULL;
+#endif
+
+#ifdef CONFIG_PPC_BOOK3E_64
+	/* For now -- if we have threads this will be adjusted later */
+	new_paca->tcd_ptr = &new_paca->tcd;
+#endif
+}
+
+/* Put the paca pointer into r13 and SPRG_PACA */
+void setup_paca(struct paca_struct *new_paca)
+{
+	/* Setup r13 */
+	local_paca = new_paca;
+
+#ifdef CONFIG_PPC_BOOK3E_64
+	/* On Book3E, initialize the TLB miss exception frames */
+	mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb);
+#else
+	/*
+	 * In HV mode, we setup both HPACA and PACA to avoid problems
+	 * if we do a GET_PACA() before the feature fixups have been
+	 * applied.
+	 *
+	 * Normally you should test against CPU_FTR_HVMODE, but CPU features
+	 * are not yet set up when we first reach here.
+	 */
+	if (mfmsr() & MSR_HV)
+		mtspr(SPRN_SPRG_HPACA, local_paca);
+#endif
+	mtspr(SPRN_SPRG_PACA, local_paca);
+
+}
+
+static int __initdata paca_nr_cpu_ids;
+static int __initdata paca_ptrs_size;
+static int __initdata paca_struct_size;
+
+void __init allocate_paca_ptrs(void)
+{
+	paca_nr_cpu_ids = nr_cpu_ids;
+
+	paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+	paca_ptrs = memblock_alloc_raw(paca_ptrs_size, SMP_CACHE_BYTES);
+	if (!paca_ptrs)
+		panic("Failed to allocate %d bytes for paca pointers\n",
+		      paca_ptrs_size);
+
+	memset(paca_ptrs, 0x88, paca_ptrs_size);
+}
+
+void __init allocate_paca(int cpu)
+{
+	u64 limit;
+	struct paca_struct *paca;
+
+	BUG_ON(cpu >= paca_nr_cpu_ids);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * We access pacas in real mode, and cannot take SLB faults
+	 * on them when in virtual mode, so allocate them accordingly.
+	 */
+	limit = min(ppc64_bolted_size(), ppc64_rma_size);
+#else
+	limit = ppc64_rma_size;
+#endif
+
+	paca = alloc_paca_data(sizeof(struct paca_struct), L1_CACHE_BYTES,
+				limit, cpu);
+	paca_ptrs[cpu] = paca;
+
+	initialise_paca(paca, cpu);
+#ifdef CONFIG_PPC_PSERIES
+	paca->lppaca_ptr = new_lppaca(cpu, limit);
+#endif
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
+#endif
+	paca_struct_size += sizeof(struct paca_struct);
+}
+
+void __init free_unused_pacas(void)
+{
+	int new_ptrs_size;
+
+	new_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+	if (new_ptrs_size < paca_ptrs_size)
+		memblock_phys_free(__pa(paca_ptrs) + new_ptrs_size,
+				   paca_ptrs_size - new_ptrs_size);
+
+	paca_nr_cpu_ids = nr_cpu_ids;
+	paca_ptrs_size = new_ptrs_size;
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	if (early_radix_enabled()) {
+		/* Ugly fixup, see new_slb_shadow() */
+		memblock_phys_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr),
+				   sizeof(struct slb_shadow));
+		paca_ptrs[boot_cpuid]->slb_shadow_ptr = NULL;
+	}
+#endif
+
+	printk(KERN_DEBUG "Allocated %u bytes for %u pacas\n",
+			paca_ptrs_size + paca_struct_size, nr_cpu_ids);
+}
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+void copy_mm_to_paca(struct mm_struct *mm)
+{
+	mm_context_t *context = &mm->context;
+
+	VM_BUG_ON(!mm_ctx_slb_addr_limit(context));
+	memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context),
+	       LOW_SLICE_ARRAY_SZ);
+	memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context),
+	       TASK_SLICE_ARRAY_SZ(context));
+}
+#endif /* CONFIG_PPC_64S_HASH_MMU */
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
new file mode 100644
index 0000000000..040255ddb5
--- /dev/null
+++ b/arch/powerpc/kernel/pci-common.c
@@ -0,0 +1,1736 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Contains common pci routines for ALL ppc platform
+ * (based on pci_32.c and pci_64.c)
+ *
+ * Port for PPC64 David Engebretsen, IBM Corp.
+ * Contains common pci routines for ppc64 platform, pSeries and iSeries brands.
+ *
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *   Rework, based on alpha PCI code.
+ *
+ * Common pmac/prep/chrp pci routines. -- Cort
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/mm.h>
+#include <linux/shmem_fs.h>
+#include <linux/list.h>
+#include <linux/syscalls.h>
+#include <linux/irq.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/vgaarb.h>
+#include <linux/numa.h>
+#include <linux/msi.h>
+#include <linux/irqdomain.h>
+
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/byteorder.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+#include <asm/eeh.h>
+#include <asm/setup.h>
+
+#include "../../../drivers/pci/pci.h"
+
+/* hose_spinlock protects accesses to the phb_bitmap. */
+static DEFINE_SPINLOCK(hose_spinlock);
+LIST_HEAD(hose_list);
+
+/* For dynamic PHB numbering on get_phb_number(): max number of PHBs. */
+#define MAX_PHBS 0x10000
+
+/*
+ * For dynamic PHB numbering: used/free PHBs tracking bitmap.
+ * Accesses to this bitmap should be protected by hose_spinlock.
+ */
+static DECLARE_BITMAP(phb_bitmap, MAX_PHBS);
+
+/* ISA Memory physical address */
+resource_size_t isa_mem_base;
+EXPORT_SYMBOL(isa_mem_base);
+
+
+static const struct dma_map_ops *pci_dma_ops;
+
+void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops)
+{
+	pci_dma_ops = dma_ops;
+}
+
+static int get_phb_number(struct device_node *dn)
+{
+	int ret, phb_id = -1;
+	u64 prop;
+
+	/*
+	 * Try fixed PHB numbering first, by checking archs and reading
+	 * the respective device-tree properties. Firstly, try reading
+	 * standard "linux,pci-domain", then try reading "ibm,opal-phbid"
+	 * (only present in powernv OPAL environment), then try device-tree
+	 * alias and as the last try to use lower bits of "reg" property.
+	 */
+	ret = of_get_pci_domain_nr(dn);
+	if (ret >= 0) {
+		prop = ret;
+		ret = 0;
+	}
+	if (ret)
+		ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop);
+
+	if (ret) {
+		ret = of_alias_get_id(dn, "pci");
+		if (ret >= 0) {
+			prop = ret;
+			ret = 0;
+		}
+	}
+	if (ret) {
+		u32 prop_32;
+		ret = of_property_read_u32_index(dn, "reg", 1, &prop_32);
+		prop = prop_32;
+	}
+
+	if (!ret)
+		phb_id = (int)(prop & (MAX_PHBS - 1));
+
+	spin_lock(&hose_spinlock);
+
+	/* We need to be sure to not use the same PHB number twice. */
+	if ((phb_id >= 0) && !test_and_set_bit(phb_id, phb_bitmap))
+		goto out_unlock;
+
+	/* If everything fails then fallback to dynamic PHB numbering. */
+	phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS);
+	BUG_ON(phb_id >= MAX_PHBS);
+	set_bit(phb_id, phb_bitmap);
+
+out_unlock:
+	spin_unlock(&hose_spinlock);
+
+	return phb_id;
+}
+
+struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
+{
+	struct pci_controller *phb;
+
+	phb = kzalloc(sizeof(struct pci_controller), GFP_KERNEL);
+	if (phb == NULL)
+		return NULL;
+
+	phb->global_number = get_phb_number(dev);
+
+	spin_lock(&hose_spinlock);
+	list_add_tail(&phb->list_node, &hose_list);
+	spin_unlock(&hose_spinlock);
+
+	phb->dn = of_node_get(dev);
+	phb->is_dynamic = slab_is_available();
+#ifdef CONFIG_PPC64
+	if (dev) {
+		int nid = of_node_to_nid(dev);
+
+		if (nid < 0 || !node_online(nid))
+			nid = NUMA_NO_NODE;
+
+		PHB_SET_NODE(phb, nid);
+	}
+#endif
+	return phb;
+}
+EXPORT_SYMBOL_GPL(pcibios_alloc_controller);
+
+void pcibios_free_controller(struct pci_controller *phb)
+{
+	spin_lock(&hose_spinlock);
+
+	/* Clear bit of phb_bitmap to allow reuse of this PHB number. */
+	if (phb->global_number < MAX_PHBS)
+		clear_bit(phb->global_number, phb_bitmap);
+	of_node_put(phb->dn);
+	list_del(&phb->list_node);
+	spin_unlock(&hose_spinlock);
+
+	if (phb->is_dynamic)
+		kfree(phb);
+}
+EXPORT_SYMBOL_GPL(pcibios_free_controller);
+
+/*
+ * This function is used to call pcibios_free_controller()
+ * in a deferred manner: a callback from the PCI subsystem.
+ *
+ * _*DO NOT*_ call pcibios_free_controller() explicitly if
+ * this is used (or it may access an invalid *phb pointer).
+ *
+ * The callback occurs when all references to the root bus
+ * are dropped (e.g., child buses/devices and their users).
+ *
+ * It's called as .release_fn() of 'struct pci_host_bridge'
+ * which is associated with the 'struct pci_controller.bus'
+ * (root bus) - it expects .release_data to hold a pointer
+ * to 'struct pci_controller'.
+ *
+ * In order to use it, register .release_fn()/release_data
+ * like this:
+ *
+ * pci_set_host_bridge_release(bridge,
+ *                             pcibios_free_controller_deferred
+ *                             (void *) phb);
+ *
+ * e.g. in the pcibios_root_bridge_prepare() callback from
+ * pci_create_root_bus().
+ */
+void pcibios_free_controller_deferred(struct pci_host_bridge *bridge)
+{
+	struct pci_controller *phb = (struct pci_controller *)
+					 bridge->release_data;
+
+	pr_debug("domain %d, dynamic %d\n", phb->global_number, phb->is_dynamic);
+
+	pcibios_free_controller(phb);
+}
+EXPORT_SYMBOL_GPL(pcibios_free_controller_deferred);
+
+/*
+ * The function is used to return the minimal alignment
+ * for memory or I/O windows of the associated P2P bridge.
+ * By default, 4KiB alignment for I/O windows and 1MiB for
+ * memory windows.
+ */
+resource_size_t pcibios_window_alignment(struct pci_bus *bus,
+					 unsigned long type)
+{
+	struct pci_controller *phb = pci_bus_to_host(bus);
+
+	if (phb->controller_ops.window_alignment)
+		return phb->controller_ops.window_alignment(bus, type);
+
+	/*
+	 * PCI core will figure out the default
+	 * alignment: 4KiB for I/O and 1MiB for
+	 * memory window.
+	 */
+	return 1;
+}
+
+void pcibios_setup_bridge(struct pci_bus *bus, unsigned long type)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	if (hose->controller_ops.setup_bridge)
+		hose->controller_ops.setup_bridge(bus, type);
+}
+
+void pcibios_reset_secondary_bus(struct pci_dev *dev)
+{
+	struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+	if (phb->controller_ops.reset_secondary_bus) {
+		phb->controller_ops.reset_secondary_bus(dev);
+		return;
+	}
+
+	pci_reset_secondary_bus(dev);
+}
+
+resource_size_t pcibios_default_alignment(void)
+{
+	if (ppc_md.pcibios_default_alignment)
+		return ppc_md.pcibios_default_alignment();
+
+	return 0;
+}
+
+#ifdef CONFIG_PCI_IOV
+resource_size_t pcibios_iov_resource_alignment(struct pci_dev *pdev, int resno)
+{
+	if (ppc_md.pcibios_iov_resource_alignment)
+		return ppc_md.pcibios_iov_resource_alignment(pdev, resno);
+
+	return pci_iov_resource_size(pdev, resno);
+}
+
+int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+	if (ppc_md.pcibios_sriov_enable)
+		return ppc_md.pcibios_sriov_enable(pdev, num_vfs);
+
+	return 0;
+}
+
+int pcibios_sriov_disable(struct pci_dev *pdev)
+{
+	if (ppc_md.pcibios_sriov_disable)
+		return ppc_md.pcibios_sriov_disable(pdev);
+
+	return 0;
+}
+
+#endif /* CONFIG_PCI_IOV */
+
+static resource_size_t pcibios_io_size(const struct pci_controller *hose)
+{
+#ifdef CONFIG_PPC64
+	return hose->pci_io_size;
+#else
+	return resource_size(&hose->io_resource);
+#endif
+}
+
+int pcibios_vaddr_is_ioport(void __iomem *address)
+{
+	int ret = 0;
+	struct pci_controller *hose;
+	resource_size_t size;
+
+	spin_lock(&hose_spinlock);
+	list_for_each_entry(hose, &hose_list, list_node) {
+		size = pcibios_io_size(hose);
+		if (address >= hose->io_base_virt &&
+		    address < (hose->io_base_virt + size)) {
+			ret = 1;
+			break;
+		}
+	}
+	spin_unlock(&hose_spinlock);
+	return ret;
+}
+
+unsigned long pci_address_to_pio(phys_addr_t address)
+{
+	struct pci_controller *hose;
+	resource_size_t size;
+	unsigned long ret = ~0;
+
+	spin_lock(&hose_spinlock);
+	list_for_each_entry(hose, &hose_list, list_node) {
+		size = pcibios_io_size(hose);
+		if (address >= hose->io_base_phys &&
+		    address < (hose->io_base_phys + size)) {
+			unsigned long base =
+				(unsigned long)hose->io_base_virt - _IO_BASE;
+			ret = base + (address - hose->io_base_phys);
+			break;
+		}
+	}
+	spin_unlock(&hose_spinlock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_address_to_pio);
+
+/*
+ * Return the domain number for this bus.
+ */
+int pci_domain_nr(struct pci_bus *bus)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	return hose->global_number;
+}
+EXPORT_SYMBOL(pci_domain_nr);
+
+/* This routine is meant to be used early during boot, when the
+ * PCI bus numbers have not yet been assigned, and you need to
+ * issue PCI config cycles to an OF device.
+ * It could also be used to "fix" RTAS config cycles if you want
+ * to set pci_assign_all_buses to 1 and still use RTAS for PCI
+ * config cycles.
+ */
+struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node)
+{
+	while(node) {
+		struct pci_controller *hose, *tmp;
+		list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+			if (hose->dn == node)
+				return hose;
+		node = node->parent;
+	}
+	return NULL;
+}
+
+struct pci_controller *pci_find_controller_for_domain(int domain_nr)
+{
+	struct pci_controller *hose;
+
+	list_for_each_entry(hose, &hose_list, list_node)
+		if (hose->global_number == domain_nr)
+			return hose;
+
+	return NULL;
+}
+
+struct pci_intx_virq {
+	int virq;
+	struct kref kref;
+	struct list_head list_node;
+};
+
+static LIST_HEAD(intx_list);
+static DEFINE_MUTEX(intx_mutex);
+
+static void ppc_pci_intx_release(struct kref *kref)
+{
+	struct pci_intx_virq *vi = container_of(kref, struct pci_intx_virq, kref);
+
+	list_del(&vi->list_node);
+	irq_dispose_mapping(vi->virq);
+	kfree(vi);
+}
+
+static int ppc_pci_unmap_irq_line(struct notifier_block *nb,
+			       unsigned long action, void *data)
+{
+	struct pci_dev *pdev = to_pci_dev(data);
+
+	if (action == BUS_NOTIFY_DEL_DEVICE) {
+		struct pci_intx_virq *vi;
+
+		mutex_lock(&intx_mutex);
+		list_for_each_entry(vi, &intx_list, list_node) {
+			if (vi->virq == pdev->irq) {
+				kref_put(&vi->kref, ppc_pci_intx_release);
+				break;
+			}
+		}
+		mutex_unlock(&intx_mutex);
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ppc_pci_unmap_irq_notifier = {
+	.notifier_call = ppc_pci_unmap_irq_line,
+};
+
+static int ppc_pci_register_irq_notifier(void)
+{
+	return bus_register_notifier(&pci_bus_type, &ppc_pci_unmap_irq_notifier);
+}
+arch_initcall(ppc_pci_register_irq_notifier);
+
+/*
+ * Reads the interrupt pin to determine if interrupt is use by card.
+ * If the interrupt is used, then gets the interrupt line from the
+ * openfirmware and sets it in the pci_dev and pci_config line.
+ */
+static int pci_read_irq_line(struct pci_dev *pci_dev)
+{
+	int virq;
+	struct pci_intx_virq *vi, *vitmp;
+
+	/* Preallocate vi as rewind is complex if this fails after mapping */
+	vi = kzalloc(sizeof(struct pci_intx_virq), GFP_KERNEL);
+	if (!vi)
+		return -1;
+
+	pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev));
+
+	/* Try to get a mapping from the device-tree */
+	virq = of_irq_parse_and_map_pci(pci_dev, 0, 0);
+	if (virq <= 0) {
+		u8 line, pin;
+
+		/* If that fails, lets fallback to what is in the config
+		 * space and map that through the default controller. We
+		 * also set the type to level low since that's what PCI
+		 * interrupts are. If your platform does differently, then
+		 * either provide a proper interrupt tree or don't use this
+		 * function.
+		 */
+		if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin))
+			goto error_exit;
+		if (pin == 0)
+			goto error_exit;
+		if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) ||
+		    line == 0xff || line == 0) {
+			goto error_exit;
+		}
+		pr_debug(" No map ! Using line %d (pin %d) from PCI config\n",
+			 line, pin);
+
+		virq = irq_create_mapping(NULL, line);
+		if (virq)
+			irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW);
+	}
+
+	if (!virq) {
+		pr_debug(" Failed to map !\n");
+		goto error_exit;
+	}
+
+	pr_debug(" Mapped to linux irq %d\n", virq);
+
+	pci_dev->irq = virq;
+
+	mutex_lock(&intx_mutex);
+	list_for_each_entry(vitmp, &intx_list, list_node) {
+		if (vitmp->virq == virq) {
+			kref_get(&vitmp->kref);
+			kfree(vi);
+			vi = NULL;
+			break;
+		}
+	}
+	if (vi) {
+		vi->virq = virq;
+		kref_init(&vi->kref);
+		list_add_tail(&vi->list_node, &intx_list);
+	}
+	mutex_unlock(&intx_mutex);
+
+	return 0;
+error_exit:
+	kfree(vi);
+	return -1;
+}
+
+/*
+ * Platform support for /proc/bus/pci/X/Y mmap()s.
+ *  -- paulus.
+ */
+int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+	resource_size_t ioaddr = pci_resource_start(pdev, bar);
+
+	if (!hose)
+		return -EINVAL;
+
+	/* Convert to an offset within this PCI controller */
+	ioaddr -= (unsigned long)hose->io_base_virt - _IO_BASE;
+
+	vma->vm_pgoff += (ioaddr + hose->io_base_phys) >> PAGE_SHIFT;
+	return 0;
+}
+
+/*
+ * This one is used by /dev/mem and fbdev who have no clue about the
+ * PCI device, it tries to find the PCI device first and calls the
+ * above routine
+ */
+pgprot_t pci_phys_mem_access_prot(struct file *file,
+				  unsigned long pfn,
+				  unsigned long size,
+				  pgprot_t prot)
+{
+	struct pci_dev *pdev = NULL;
+	struct resource *found = NULL;
+	resource_size_t offset = ((resource_size_t)pfn) << PAGE_SHIFT;
+	int i;
+
+	if (page_is_ram(pfn))
+		return prot;
+
+	prot = pgprot_noncached(prot);
+	for_each_pci_dev(pdev) {
+		for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+			struct resource *rp = &pdev->resource[i];
+			int flags = rp->flags;
+
+			/* Active and same type? */
+			if ((flags & IORESOURCE_MEM) == 0)
+				continue;
+			/* In the range of this resource? */
+			if (offset < (rp->start & PAGE_MASK) ||
+			    offset > rp->end)
+				continue;
+			found = rp;
+			break;
+		}
+		if (found)
+			break;
+	}
+	if (found) {
+		if (found->flags & IORESOURCE_PREFETCH)
+			prot = pgprot_noncached_wc(prot);
+		pci_dev_put(pdev);
+	}
+
+	pr_debug("PCI: Non-PCI map for %llx, prot: %lx\n",
+		 (unsigned long long)offset, pgprot_val(prot));
+
+	return prot;
+}
+
+/* This provides legacy IO read access on a bus */
+int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
+{
+	unsigned long offset;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct resource *rp = &hose->io_resource;
+	void __iomem *addr;
+
+	/* Check if port can be supported by that bus. We only check
+	 * the ranges of the PHB though, not the bus itself as the rules
+	 * for forwarding legacy cycles down bridges are not our problem
+	 * here. So if the host bridge supports it, we do it.
+	 */
+	offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+	offset += port;
+
+	if (!(rp->flags & IORESOURCE_IO))
+		return -ENXIO;
+	if (offset < rp->start || (offset + size) > rp->end)
+		return -ENXIO;
+	addr = hose->io_base_virt + port;
+
+	switch(size) {
+	case 1:
+		*((u8 *)val) = in_8(addr);
+		return 1;
+	case 2:
+		if (port & 1)
+			return -EINVAL;
+		*((u16 *)val) = in_le16(addr);
+		return 2;
+	case 4:
+		if (port & 3)
+			return -EINVAL;
+		*((u32 *)val) = in_le32(addr);
+		return 4;
+	}
+	return -EINVAL;
+}
+
+/* This provides legacy IO write access on a bus */
+int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size)
+{
+	unsigned long offset;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct resource *rp = &hose->io_resource;
+	void __iomem *addr;
+
+	/* Check if port can be supported by that bus. We only check
+	 * the ranges of the PHB though, not the bus itself as the rules
+	 * for forwarding legacy cycles down bridges are not our problem
+	 * here. So if the host bridge supports it, we do it.
+	 */
+	offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+	offset += port;
+
+	if (!(rp->flags & IORESOURCE_IO))
+		return -ENXIO;
+	if (offset < rp->start || (offset + size) > rp->end)
+		return -ENXIO;
+	addr = hose->io_base_virt + port;
+
+	/* WARNING: The generic code is idiotic. It gets passed a pointer
+	 * to what can be a 1, 2 or 4 byte quantity and always reads that
+	 * as a u32, which means that we have to correct the location of
+	 * the data read within those 32 bits for size 1 and 2
+	 */
+	switch(size) {
+	case 1:
+		out_8(addr, val >> 24);
+		return 1;
+	case 2:
+		if (port & 1)
+			return -EINVAL;
+		out_le16(addr, val >> 16);
+		return 2;
+	case 4:
+		if (port & 3)
+			return -EINVAL;
+		out_le32(addr, val);
+		return 4;
+	}
+	return -EINVAL;
+}
+
+/* This provides legacy IO or memory mmap access on a bus */
+int pci_mmap_legacy_page_range(struct pci_bus *bus,
+			       struct vm_area_struct *vma,
+			       enum pci_mmap_state mmap_state)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	resource_size_t offset =
+		((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
+	resource_size_t size = vma->vm_end - vma->vm_start;
+	struct resource *rp;
+
+	pr_debug("pci_mmap_legacy_page_range(%04x:%02x, %s @%llx..%llx)\n",
+		 pci_domain_nr(bus), bus->number,
+		 mmap_state == pci_mmap_mem ? "MEM" : "IO",
+		 (unsigned long long)offset,
+		 (unsigned long long)(offset + size - 1));
+
+	if (mmap_state == pci_mmap_mem) {
+		/* Hack alert !
+		 *
+		 * Because X is lame and can fail starting if it gets an error trying
+		 * to mmap legacy_mem (instead of just moving on without legacy memory
+		 * access) we fake it here by giving it anonymous memory, effectively
+		 * behaving just like /dev/zero
+		 */
+		if ((offset + size) > hose->isa_mem_size) {
+			printk(KERN_DEBUG
+			       "Process %s (pid:%d) mapped non-existing PCI legacy memory for 0%04x:%02x\n",
+			       current->comm, current->pid, pci_domain_nr(bus), bus->number);
+			if (vma->vm_flags & VM_SHARED)
+				return shmem_zero_setup(vma);
+			return 0;
+		}
+		offset += hose->isa_mem_phys;
+	} else {
+		unsigned long io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+		unsigned long roffset = offset + io_offset;
+		rp = &hose->io_resource;
+		if (!(rp->flags & IORESOURCE_IO))
+			return -ENXIO;
+		if (roffset < rp->start || (roffset + size) > rp->end)
+			return -ENXIO;
+		offset += hose->io_base_phys;
+	}
+	pr_debug(" -> mapping phys %llx\n", (unsigned long long)offset);
+
+	vma->vm_pgoff = offset >> PAGE_SHIFT;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			       vma->vm_end - vma->vm_start,
+			       vma->vm_page_prot);
+}
+
+void pci_resource_to_user(const struct pci_dev *dev, int bar,
+			  const struct resource *rsrc,
+			  resource_size_t *start, resource_size_t *end)
+{
+	struct pci_bus_region region;
+
+	if (rsrc->flags & IORESOURCE_IO) {
+		pcibios_resource_to_bus(dev->bus, &region,
+					(struct resource *) rsrc);
+		*start = region.start;
+		*end = region.end;
+		return;
+	}
+
+	/* We pass a CPU physical address to userland for MMIO instead of a
+	 * BAR value because X is lame and expects to be able to use that
+	 * to pass to /dev/mem!
+	 *
+	 * That means we may have 64-bit values where some apps only expect
+	 * 32 (like X itself since it thinks only Sparc has 64-bit MMIO).
+	 */
+	*start = rsrc->start;
+	*end = rsrc->end;
+}
+
+/**
+ * pci_process_bridge_OF_ranges - Parse PCI bridge resources from device tree
+ * @hose: newly allocated pci_controller to be setup
+ * @dev: device node of the host bridge
+ * @primary: set if primary bus (32 bits only, soon to be deprecated)
+ *
+ * This function will parse the "ranges" property of a PCI host bridge device
+ * node and setup the resource mapping of a pci controller based on its
+ * content.
+ *
+ * Life would be boring if it wasn't for a few issues that we have to deal
+ * with here:
+ *
+ *   - We can only cope with one IO space range and up to 3 Memory space
+ *     ranges. However, some machines (thanks Apple !) tend to split their
+ *     space into lots of small contiguous ranges. So we have to coalesce.
+ *
+ *   - Some busses have IO space not starting at 0, which causes trouble with
+ *     the way we do our IO resource renumbering. The code somewhat deals with
+ *     it for 64 bits but I would expect problems on 32 bits.
+ *
+ *   - Some 32 bits platforms such as 4xx can have physical space larger than
+ *     32 bits so we need to use 64 bits values for the parsing
+ */
+void pci_process_bridge_OF_ranges(struct pci_controller *hose,
+				  struct device_node *dev, int primary)
+{
+	int memno = 0;
+	struct resource *res;
+	struct of_pci_range range;
+	struct of_pci_range_parser parser;
+
+	printk(KERN_INFO "PCI host bridge %pOF %s ranges:\n",
+	       dev, primary ? "(primary)" : "");
+
+	/* Check for ranges property */
+	if (of_pci_range_parser_init(&parser, dev))
+		return;
+
+	/* Parse it */
+	for_each_of_pci_range(&parser, &range) {
+		/* If we failed translation or got a zero-sized region
+		 * (some FW try to feed us with non sensical zero sized regions
+		 * such as power3 which look like some kind of attempt at exposing
+		 * the VGA memory hole)
+		 */
+		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
+			continue;
+
+		/* Act based on address space type */
+		res = NULL;
+		switch (range.flags & IORESOURCE_TYPE_BITS) {
+		case IORESOURCE_IO:
+			printk(KERN_INFO
+			       "  IO 0x%016llx..0x%016llx -> 0x%016llx\n",
+			       range.cpu_addr, range.cpu_addr + range.size - 1,
+			       range.pci_addr);
+
+			/* We support only one IO range */
+			if (hose->pci_io_size) {
+				printk(KERN_INFO
+				       " \\--> Skipped (too many) !\n");
+				continue;
+			}
+#ifdef CONFIG_PPC32
+			/* On 32 bits, limit I/O space to 16MB */
+			if (range.size > 0x01000000)
+				range.size = 0x01000000;
+
+			/* 32 bits needs to map IOs here */
+			hose->io_base_virt = ioremap(range.cpu_addr,
+						range.size);
+
+			/* Expect trouble if pci_addr is not 0 */
+			if (primary)
+				isa_io_base =
+					(unsigned long)hose->io_base_virt;
+#endif /* CONFIG_PPC32 */
+			/* pci_io_size and io_base_phys always represent IO
+			 * space starting at 0 so we factor in pci_addr
+			 */
+			hose->pci_io_size = range.pci_addr + range.size;
+			hose->io_base_phys = range.cpu_addr - range.pci_addr;
+
+			/* Build resource */
+			res = &hose->io_resource;
+			range.cpu_addr = range.pci_addr;
+			break;
+		case IORESOURCE_MEM:
+			printk(KERN_INFO
+			       " MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n",
+			       range.cpu_addr, range.cpu_addr + range.size - 1,
+			       range.pci_addr,
+			       (range.flags & IORESOURCE_PREFETCH) ?
+			       "Prefetch" : "");
+
+			/* We support only 3 memory ranges */
+			if (memno >= 3) {
+				printk(KERN_INFO
+				       " \\--> Skipped (too many) !\n");
+				continue;
+			}
+			/* Handles ISA memory hole space here */
+			if (range.pci_addr == 0) {
+				if (primary || isa_mem_base == 0)
+					isa_mem_base = range.cpu_addr;
+				hose->isa_mem_phys = range.cpu_addr;
+				hose->isa_mem_size = range.size;
+			}
+
+			/* Build resource */
+			hose->mem_offset[memno] = range.cpu_addr -
+							range.pci_addr;
+			res = &hose->mem_resources[memno++];
+			break;
+		}
+		if (res != NULL) {
+			res->name = dev->full_name;
+			res->flags = range.flags;
+			res->start = range.cpu_addr;
+			res->end = range.cpu_addr + range.size - 1;
+			res->parent = res->child = res->sibling = NULL;
+		}
+	}
+}
+
+/* Decide whether to display the domain number in /proc */
+int pci_proc_domain(struct pci_bus *bus)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	if (!pci_has_flag(PCI_ENABLE_PROC_DOMAINS))
+		return 0;
+	if (pci_has_flag(PCI_COMPAT_DOMAIN_0))
+		return hose->global_number != 0;
+	return 1;
+}
+
+int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+	if (ppc_md.pcibios_root_bridge_prepare)
+		return ppc_md.pcibios_root_bridge_prepare(bridge);
+
+	return 0;
+}
+
+/* This header fixup will do the resource fixup for all devices as they are
+ * probed, but not for bridge ranges
+ */
+static void pcibios_fixup_resources(struct pci_dev *dev)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct resource *res;
+	int i;
+
+	if (!hose) {
+		printk(KERN_ERR "No host bridge for PCI dev %s !\n",
+		       pci_name(dev));
+		return;
+	}
+
+	if (dev->is_virtfn)
+		return;
+
+	pci_dev_for_each_resource(dev, res, i) {
+		struct pci_bus_region reg;
+
+		if (!res->flags)
+			continue;
+
+		/* If we're going to re-assign everything, we mark all resources
+		 * as unset (and 0-base them). In addition, we mark BARs starting
+		 * at 0 as unset as well, except if PCI_PROBE_ONLY is also set
+		 * since in that case, we don't want to re-assign anything
+		 */
+		pcibios_resource_to_bus(dev->bus, &reg, res);
+		if (pci_has_flag(PCI_REASSIGN_ALL_RSRC) ||
+		    (reg.start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) {
+			/* Only print message if not re-assigning */
+			if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC))
+				pr_debug("PCI:%s Resource %d %pR is unassigned\n",
+					 pci_name(dev), i, res);
+			res->end -= res->start;
+			res->start = 0;
+			res->flags |= IORESOURCE_UNSET;
+			continue;
+		}
+
+		pr_debug("PCI:%s Resource %d %pR\n", pci_name(dev), i, res);
+	}
+
+	/* Call machine specific resource fixup */
+	if (ppc_md.pcibios_fixup_resources)
+		ppc_md.pcibios_fixup_resources(dev);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources);
+
+/* This function tries to figure out if a bridge resource has been initialized
+ * by the firmware or not. It doesn't have to be absolutely bullet proof, but
+ * things go more smoothly when it gets it right. It should covers cases such
+ * as Apple "closed" bridge resources and bare-metal pSeries unassigned bridges
+ */
+static int pcibios_uninitialized_bridge_resource(struct pci_bus *bus,
+						 struct resource *res)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct pci_dev *dev = bus->self;
+	resource_size_t offset;
+	struct pci_bus_region region;
+	u16 command;
+	int i;
+
+	/* We don't do anything if PCI_PROBE_ONLY is set */
+	if (pci_has_flag(PCI_PROBE_ONLY))
+		return 0;
+
+	/* Job is a bit different between memory and IO */
+	if (res->flags & IORESOURCE_MEM) {
+		pcibios_resource_to_bus(dev->bus, &region, res);
+
+		/* If the BAR is non-0 then it's probably been initialized */
+		if (region.start != 0)
+			return 0;
+
+		/* The BAR is 0, let's check if memory decoding is enabled on
+		 * the bridge. If not, we consider it unassigned
+		 */
+		pci_read_config_word(dev, PCI_COMMAND, &command);
+		if ((command & PCI_COMMAND_MEMORY) == 0)
+			return 1;
+
+		/* Memory decoding is enabled and the BAR is 0. If any of the bridge
+		 * resources covers that starting address (0 then it's good enough for
+		 * us for memory space)
+		 */
+		for (i = 0; i < 3; i++) {
+			if ((hose->mem_resources[i].flags & IORESOURCE_MEM) &&
+			    hose->mem_resources[i].start == hose->mem_offset[i])
+				return 0;
+		}
+
+		/* Well, it starts at 0 and we know it will collide so we may as
+		 * well consider it as unassigned. That covers the Apple case.
+		 */
+		return 1;
+	} else {
+		/* If the BAR is non-0, then we consider it assigned */
+		offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+		if (((res->start - offset) & 0xfffffffful) != 0)
+			return 0;
+
+		/* Here, we are a bit different than memory as typically IO space
+		 * starting at low addresses -is- valid. What we do instead if that
+		 * we consider as unassigned anything that doesn't have IO enabled
+		 * in the PCI command register, and that's it.
+		 */
+		pci_read_config_word(dev, PCI_COMMAND, &command);
+		if (command & PCI_COMMAND_IO)
+			return 0;
+
+		/* It's starting at 0 and IO is disabled in the bridge, consider
+		 * it unassigned
+		 */
+		return 1;
+	}
+}
+
+/* Fixup resources of a PCI<->PCI bridge */
+static void pcibios_fixup_bridge(struct pci_bus *bus)
+{
+	struct resource *res;
+	int i;
+
+	struct pci_dev *dev = bus->self;
+
+	pci_bus_for_each_resource(bus, res, i) {
+		if (!res || !res->flags)
+			continue;
+		if (i >= 3 && bus->self->transparent)
+			continue;
+
+		/* If we're going to reassign everything, we can
+		 * shrink the P2P resource to have size as being
+		 * of 0 in order to save space.
+		 */
+		if (pci_has_flag(PCI_REASSIGN_ALL_RSRC)) {
+			res->flags |= IORESOURCE_UNSET;
+			res->start = 0;
+			res->end = -1;
+			continue;
+		}
+
+		pr_debug("PCI:%s Bus rsrc %d %pR\n", pci_name(dev), i, res);
+
+		/* Try to detect uninitialized P2P bridge resources,
+		 * and clear them out so they get re-assigned later
+		 */
+		if (pcibios_uninitialized_bridge_resource(bus, res)) {
+			res->flags = 0;
+			pr_debug("PCI:%s            (unassigned)\n", pci_name(dev));
+		}
+	}
+}
+
+void pcibios_setup_bus_self(struct pci_bus *bus)
+{
+	struct pci_controller *phb;
+
+	/* Fix up the bus resources for P2P bridges */
+	if (bus->self != NULL)
+		pcibios_fixup_bridge(bus);
+
+	/* Platform specific bus fixups. This is currently only used
+	 * by fsl_pci and I'm hoping to get rid of it at some point
+	 */
+	if (ppc_md.pcibios_fixup_bus)
+		ppc_md.pcibios_fixup_bus(bus);
+
+	/* Setup bus DMA mappings */
+	phb = pci_bus_to_host(bus);
+	if (phb->controller_ops.dma_bus_setup)
+		phb->controller_ops.dma_bus_setup(bus);
+}
+
+void pcibios_bus_add_device(struct pci_dev *dev)
+{
+	struct pci_controller *phb;
+	/* Fixup NUMA node as it may not be setup yet by the generic
+	 * code and is needed by the DMA init
+	 */
+	set_dev_node(&dev->dev, pcibus_to_node(dev->bus));
+
+	/* Hook up default DMA ops */
+	set_dma_ops(&dev->dev, pci_dma_ops);
+	dev->dev.archdata.dma_offset = PCI_DRAM_OFFSET;
+
+	/* Additional platform DMA/iommu setup */
+	phb = pci_bus_to_host(dev->bus);
+	if (phb->controller_ops.dma_dev_setup)
+		phb->controller_ops.dma_dev_setup(dev);
+
+	/* Read default IRQs and fixup if necessary */
+	pci_read_irq_line(dev);
+	if (ppc_md.pci_irq_fixup)
+		ppc_md.pci_irq_fixup(dev);
+
+	if (ppc_md.pcibios_bus_add_device)
+		ppc_md.pcibios_bus_add_device(dev);
+}
+
+int pcibios_device_add(struct pci_dev *dev)
+{
+	struct irq_domain *d;
+
+#ifdef CONFIG_PCI_IOV
+	if (ppc_md.pcibios_fixup_sriov)
+		ppc_md.pcibios_fixup_sriov(dev);
+#endif /* CONFIG_PCI_IOV */
+
+	d = dev_get_msi_domain(&dev->bus->dev);
+	if (d)
+		dev_set_msi_domain(&dev->dev, d);
+	return 0;
+}
+
+void pcibios_set_master(struct pci_dev *dev)
+{
+	/* No special bus mastering setup handling */
+}
+
+void pcibios_fixup_bus(struct pci_bus *bus)
+{
+	/* When called from the generic PCI probe, read PCI<->PCI bridge
+	 * bases. This is -not- called when generating the PCI tree from
+	 * the OF device-tree.
+	 */
+	pci_read_bridge_bases(bus);
+
+	/* Now fixup the bus */
+	pcibios_setup_bus_self(bus);
+}
+EXPORT_SYMBOL(pcibios_fixup_bus);
+
+static int skip_isa_ioresource_align(struct pci_dev *dev)
+{
+	if (pci_has_flag(PCI_CAN_SKIP_ISA_ALIGN) &&
+	    !(dev->bus->bridge_ctl & PCI_BRIDGE_CTL_ISA))
+		return 1;
+	return 0;
+}
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ *
+ * Why? Because some silly external IO cards only decode
+ * the low 10 bits of the IO address. The 0x00-0xff region
+ * is reserved for motherboard devices that decode all 16
+ * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
+ * but we want to try to avoid allocating at 0x2900-0x2bff
+ * which might have be mirrored at 0x0100-0x03ff..
+ */
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+				resource_size_t size, resource_size_t align)
+{
+	struct pci_dev *dev = data;
+	resource_size_t start = res->start;
+
+	if (res->flags & IORESOURCE_IO) {
+		if (skip_isa_ioresource_align(dev))
+			return start;
+		if (start & 0x300)
+			start = (start + 0x3ff) & ~0x3ff;
+	}
+
+	return start;
+}
+EXPORT_SYMBOL(pcibios_align_resource);
+
+/*
+ * Reparent resource children of pr that conflict with res
+ * under res, and make res replace those children.
+ */
+static int reparent_resources(struct resource *parent,
+				     struct resource *res)
+{
+	struct resource *p, **pp;
+	struct resource **firstpp = NULL;
+
+	for (pp = &parent->child; (p = *pp) != NULL; pp = &p->sibling) {
+		if (p->end < res->start)
+			continue;
+		if (res->end < p->start)
+			break;
+		if (p->start < res->start || p->end > res->end)
+			return -1;	/* not completely contained */
+		if (firstpp == NULL)
+			firstpp = pp;
+	}
+	if (firstpp == NULL)
+		return -1;	/* didn't find any conflicting entries? */
+	res->parent = parent;
+	res->child = *firstpp;
+	res->sibling = *pp;
+	*firstpp = res;
+	*pp = NULL;
+	for (p = res->child; p != NULL; p = p->sibling) {
+		p->parent = res;
+		pr_debug("PCI: Reparented %s %pR under %s\n",
+			 p->name, p, res->name);
+	}
+	return 0;
+}
+
+/*
+ *  Handle resources of PCI devices.  If the world were perfect, we could
+ *  just allocate all the resource regions and do nothing more.  It isn't.
+ *  On the other hand, we cannot just re-allocate all devices, as it would
+ *  require us to know lots of host bridge internals.  So we attempt to
+ *  keep as much of the original configuration as possible, but tweak it
+ *  when it's found to be wrong.
+ *
+ *  Known BIOS problems we have to work around:
+ *	- I/O or memory regions not configured
+ *	- regions configured, but not enabled in the command register
+ *	- bogus I/O addresses above 64K used
+ *	- expansion ROMs left enabled (this may sound harmless, but given
+ *	  the fact the PCI specs explicitly allow address decoders to be
+ *	  shared between expansion ROMs and other resource regions, it's
+ *	  at least dangerous)
+ *
+ *  Our solution:
+ *	(1) Allocate resources for all buses behind PCI-to-PCI bridges.
+ *	    This gives us fixed barriers on where we can allocate.
+ *	(2) Allocate resources for all enabled devices.  If there is
+ *	    a collision, just mark the resource as unallocated. Also
+ *	    disable expansion ROMs during this step.
+ *	(3) Try to allocate resources for disabled devices.  If the
+ *	    resources were assigned correctly, everything goes well,
+ *	    if they weren't, they won't disturb allocation of other
+ *	    resources.
+ *	(4) Assign new addresses to resources which were either
+ *	    not configured at all or misconfigured.  If explicitly
+ *	    requested by the user, configure expansion ROM address
+ *	    as well.
+ */
+
+static void pcibios_allocate_bus_resources(struct pci_bus *bus)
+{
+	struct pci_bus *b;
+	int i;
+	struct resource *res, *pr;
+
+	pr_debug("PCI: Allocating bus resources for %04x:%02x...\n",
+		 pci_domain_nr(bus), bus->number);
+
+	pci_bus_for_each_resource(bus, res, i) {
+		if (!res || !res->flags || res->start > res->end || res->parent)
+			continue;
+
+		/* If the resource was left unset at this point, we clear it */
+		if (res->flags & IORESOURCE_UNSET)
+			goto clear_resource;
+
+		if (bus->parent == NULL)
+			pr = (res->flags & IORESOURCE_IO) ?
+				&ioport_resource : &iomem_resource;
+		else {
+			pr = pci_find_parent_resource(bus->self, res);
+			if (pr == res) {
+				/* this happens when the generic PCI
+				 * code (wrongly) decides that this
+				 * bridge is transparent  -- paulus
+				 */
+				continue;
+			}
+		}
+
+		pr_debug("PCI: %s (bus %d) bridge rsrc %d: %pR, parent %p (%s)\n",
+			 bus->self ? pci_name(bus->self) : "PHB", bus->number,
+			 i, res, pr, (pr && pr->name) ? pr->name : "nil");
+
+		if (pr && !(pr->flags & IORESOURCE_UNSET)) {
+			struct pci_dev *dev = bus->self;
+
+			if (request_resource(pr, res) == 0)
+				continue;
+			/*
+			 * Must be a conflict with an existing entry.
+			 * Move that entry (or entries) under the
+			 * bridge resource and try again.
+			 */
+			if (reparent_resources(pr, res) == 0)
+				continue;
+
+			if (dev && i < PCI_BRIDGE_RESOURCE_NUM &&
+			    pci_claim_bridge_resource(dev,
+						i + PCI_BRIDGE_RESOURCES) == 0)
+				continue;
+		}
+		pr_warn("PCI: Cannot allocate resource region %d of PCI bridge %d, will remap\n",
+			i, bus->number);
+	clear_resource:
+		/* The resource might be figured out when doing
+		 * reassignment based on the resources required
+		 * by the downstream PCI devices. Here we set
+		 * the size of the resource to be 0 in order to
+		 * save more space.
+		 */
+		res->start = 0;
+		res->end = -1;
+		res->flags = 0;
+	}
+
+	list_for_each_entry(b, &bus->children, node)
+		pcibios_allocate_bus_resources(b);
+}
+
+static inline void alloc_resource(struct pci_dev *dev, int idx)
+{
+	struct resource *pr, *r = &dev->resource[idx];
+
+	pr_debug("PCI: Allocating %s: Resource %d: %pR\n",
+		 pci_name(dev), idx, r);
+
+	pr = pci_find_parent_resource(dev, r);
+	if (!pr || (pr->flags & IORESOURCE_UNSET) ||
+	    request_resource(pr, r) < 0) {
+		printk(KERN_WARNING "PCI: Cannot allocate resource region %d"
+		       " of device %s, will remap\n", idx, pci_name(dev));
+		if (pr)
+			pr_debug("PCI:  parent is %p: %pR\n", pr, pr);
+		/* We'll assign a new address later */
+		r->flags |= IORESOURCE_UNSET;
+		r->end -= r->start;
+		r->start = 0;
+	}
+}
+
+static void __init pcibios_allocate_resources(int pass)
+{
+	struct pci_dev *dev = NULL;
+	int idx, disabled;
+	u16 command;
+	struct resource *r;
+
+	for_each_pci_dev(dev) {
+		pci_read_config_word(dev, PCI_COMMAND, &command);
+		for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) {
+			r = &dev->resource[idx];
+			if (r->parent)		/* Already allocated */
+				continue;
+			if (!r->flags || (r->flags & IORESOURCE_UNSET))
+				continue;	/* Not assigned at all */
+			/* We only allocate ROMs on pass 1 just in case they
+			 * have been screwed up by firmware
+			 */
+			if (idx == PCI_ROM_RESOURCE )
+				disabled = 1;
+			if (r->flags & IORESOURCE_IO)
+				disabled = !(command & PCI_COMMAND_IO);
+			else
+				disabled = !(command & PCI_COMMAND_MEMORY);
+			if (pass == disabled)
+				alloc_resource(dev, idx);
+		}
+		if (pass)
+			continue;
+		r = &dev->resource[PCI_ROM_RESOURCE];
+		if (r->flags) {
+			/* Turn the ROM off, leave the resource region,
+			 * but keep it unregistered.
+			 */
+			u32 reg;
+			pci_read_config_dword(dev, dev->rom_base_reg, &reg);
+			if (reg & PCI_ROM_ADDRESS_ENABLE) {
+				pr_debug("PCI: Switching off ROM of %s\n",
+					 pci_name(dev));
+				r->flags &= ~IORESOURCE_ROM_ENABLE;
+				pci_write_config_dword(dev, dev->rom_base_reg,
+						       reg & ~PCI_ROM_ADDRESS_ENABLE);
+			}
+		}
+	}
+}
+
+static void __init pcibios_reserve_legacy_regions(struct pci_bus *bus)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	resource_size_t	offset;
+	struct resource *res, *pres;
+	int i;
+
+	pr_debug("Reserving legacy ranges for domain %04x\n", pci_domain_nr(bus));
+
+	/* Check for IO */
+	if (!(hose->io_resource.flags & IORESOURCE_IO))
+		goto no_io;
+	offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+	BUG_ON(res == NULL);
+	res->name = "Legacy IO";
+	res->flags = IORESOURCE_IO;
+	res->start = offset;
+	res->end = (offset + 0xfff) & 0xfffffffful;
+	pr_debug("Candidate legacy IO: %pR\n", res);
+	if (request_resource(&hose->io_resource, res)) {
+		printk(KERN_DEBUG
+		       "PCI %04x:%02x Cannot reserve Legacy IO %pR\n",
+		       pci_domain_nr(bus), bus->number, res);
+		kfree(res);
+	}
+
+ no_io:
+	/* Check for memory */
+	for (i = 0; i < 3; i++) {
+		pres = &hose->mem_resources[i];
+		offset = hose->mem_offset[i];
+		if (!(pres->flags & IORESOURCE_MEM))
+			continue;
+		pr_debug("hose mem res: %pR\n", pres);
+		if ((pres->start - offset) <= 0xa0000 &&
+		    (pres->end - offset) >= 0xbffff)
+			break;
+	}
+	if (i >= 3)
+		return;
+	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+	BUG_ON(res == NULL);
+	res->name = "Legacy VGA memory";
+	res->flags = IORESOURCE_MEM;
+	res->start = 0xa0000 + offset;
+	res->end = 0xbffff + offset;
+	pr_debug("Candidate VGA memory: %pR\n", res);
+	if (request_resource(pres, res)) {
+		printk(KERN_DEBUG
+		       "PCI %04x:%02x Cannot reserve VGA memory %pR\n",
+		       pci_domain_nr(bus), bus->number, res);
+		kfree(res);
+	}
+}
+
+void __init pcibios_resource_survey(void)
+{
+	struct pci_bus *b;
+
+	/* Allocate and assign resources */
+	list_for_each_entry(b, &pci_root_buses, node)
+		pcibios_allocate_bus_resources(b);
+	if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) {
+		pcibios_allocate_resources(0);
+		pcibios_allocate_resources(1);
+	}
+
+	/* Before we start assigning unassigned resource, we try to reserve
+	 * the low IO area and the VGA memory area if they intersect the
+	 * bus available resources to avoid allocating things on top of them
+	 */
+	if (!pci_has_flag(PCI_PROBE_ONLY)) {
+		list_for_each_entry(b, &pci_root_buses, node)
+			pcibios_reserve_legacy_regions(b);
+	}
+
+	/* Now, if the platform didn't decide to blindly trust the firmware,
+	 * we proceed to assigning things that were left unassigned
+	 */
+	if (!pci_has_flag(PCI_PROBE_ONLY)) {
+		pr_debug("PCI: Assigning unassigned resources...\n");
+		pci_assign_unassigned_resources();
+	}
+}
+
+/* This is used by the PCI hotplug driver to allocate resource
+ * of newly plugged busses. We can try to consolidate with the
+ * rest of the code later, for now, keep it as-is as our main
+ * resource allocation function doesn't deal with sub-trees yet.
+ */
+void pcibios_claim_one_bus(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+	struct pci_bus *child_bus;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		struct resource *r;
+		int i;
+
+		pci_dev_for_each_resource(dev, r, i) {
+			if (r->parent || !r->start || !r->flags)
+				continue;
+
+			pr_debug("PCI: Claiming %s: Resource %d: %pR\n",
+				 pci_name(dev), i, r);
+
+			if (pci_claim_resource(dev, i) == 0)
+				continue;
+
+			pci_claim_bridge_resource(dev, i);
+		}
+	}
+
+	list_for_each_entry(child_bus, &bus->children, node)
+		pcibios_claim_one_bus(child_bus);
+}
+EXPORT_SYMBOL_GPL(pcibios_claim_one_bus);
+
+
+/* pcibios_finish_adding_to_bus
+ *
+ * This is to be called by the hotplug code after devices have been
+ * added to a bus, this include calling it for a PHB that is just
+ * being added
+ */
+void pcibios_finish_adding_to_bus(struct pci_bus *bus)
+{
+	pr_debug("PCI: Finishing adding to hotplug bus %04x:%02x\n",
+		 pci_domain_nr(bus), bus->number);
+
+	/* Allocate bus and devices resources */
+	pcibios_allocate_bus_resources(bus);
+	pcibios_claim_one_bus(bus);
+	if (!pci_has_flag(PCI_PROBE_ONLY)) {
+		if (bus->self)
+			pci_assign_unassigned_bridge_resources(bus->self);
+		else
+			pci_assign_unassigned_bus_resources(bus);
+	}
+
+	/* Add new devices to global lists.  Register in proc, sysfs. */
+	pci_bus_add_devices(bus);
+}
+EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus);
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+	if (phb->controller_ops.enable_device_hook)
+		if (!phb->controller_ops.enable_device_hook(dev))
+			return -EINVAL;
+
+	return pci_enable_resources(dev, mask);
+}
+
+void pcibios_disable_device(struct pci_dev *dev)
+{
+	struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+	if (phb->controller_ops.disable_device)
+		phb->controller_ops.disable_device(dev);
+}
+
+resource_size_t pcibios_io_space_offset(struct pci_controller *hose)
+{
+	return (unsigned long) hose->io_base_virt - _IO_BASE;
+}
+
+static void pcibios_setup_phb_resources(struct pci_controller *hose,
+					struct list_head *resources)
+{
+	struct resource *res;
+	resource_size_t offset;
+	int i;
+
+	/* Hookup PHB IO resource */
+	res = &hose->io_resource;
+
+	if (!res->flags) {
+		pr_debug("PCI: I/O resource not set for host"
+			 " bridge %pOF (domain %d)\n",
+			 hose->dn, hose->global_number);
+	} else {
+		offset = pcibios_io_space_offset(hose);
+
+		pr_debug("PCI: PHB IO resource    = %pR off 0x%08llx\n",
+			 res, (unsigned long long)offset);
+		pci_add_resource_offset(resources, res, offset);
+	}
+
+	/* Hookup PHB Memory resources */
+	for (i = 0; i < 3; ++i) {
+		res = &hose->mem_resources[i];
+		if (!res->flags)
+			continue;
+
+		offset = hose->mem_offset[i];
+		pr_debug("PCI: PHB MEM resource %d = %pR off 0x%08llx\n", i,
+			 res, (unsigned long long)offset);
+
+		pci_add_resource_offset(resources, res, offset);
+	}
+}
+
+/*
+ * Null PCI config access functions, for the case when we can't
+ * find a hose.
+ */
+#define NULL_PCI_OP(rw, size, type)					\
+static int								\
+null_##rw##_config_##size(struct pci_dev *dev, int offset, type val)	\
+{									\
+	return PCIBIOS_DEVICE_NOT_FOUND;    				\
+}
+
+static int
+null_read_config(struct pci_bus *bus, unsigned int devfn, int offset,
+		 int len, u32 *val)
+{
+	return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+static int
+null_write_config(struct pci_bus *bus, unsigned int devfn, int offset,
+		  int len, u32 val)
+{
+	return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+static struct pci_ops null_pci_ops =
+{
+	.read = null_read_config,
+	.write = null_write_config,
+};
+
+/*
+ * These functions are used early on before PCI scanning is done
+ * and all of the pci_dev and pci_bus structures have been created.
+ */
+static struct pci_bus *
+fake_pci_bus(struct pci_controller *hose, int busnr)
+{
+	static struct pci_bus bus;
+
+	if (hose == NULL) {
+		printk(KERN_ERR "Can't find hose for PCI bus %d!\n", busnr);
+	}
+	bus.number = busnr;
+	bus.sysdata = hose;
+	bus.ops = hose? hose->ops: &null_pci_ops;
+	return &bus;
+}
+
+#define EARLY_PCI_OP(rw, size, type)					\
+int early_##rw##_config_##size(struct pci_controller *hose, int bus,	\
+			       int devfn, int offset, type value)	\
+{									\
+	return pci_bus_##rw##_config_##size(fake_pci_bus(hose, bus),	\
+					    devfn, offset, value);	\
+}
+
+EARLY_PCI_OP(read, byte, u8 *)
+EARLY_PCI_OP(read, word, u16 *)
+EARLY_PCI_OP(read, dword, u32 *)
+EARLY_PCI_OP(write, byte, u8)
+EARLY_PCI_OP(write, word, u16)
+EARLY_PCI_OP(write, dword, u32)
+
+int early_find_capability(struct pci_controller *hose, int bus, int devfn,
+			  int cap)
+{
+	return pci_bus_find_capability(fake_pci_bus(hose, bus), devfn, cap);
+}
+
+struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	return of_node_get(hose->dn);
+}
+
+/**
+ * pci_scan_phb - Given a pci_controller, setup and scan the PCI bus
+ * @hose: Pointer to the PCI host controller instance structure
+ */
+void pcibios_scan_phb(struct pci_controller *hose)
+{
+	LIST_HEAD(resources);
+	struct pci_bus *bus;
+	struct device_node *node = hose->dn;
+	int mode;
+
+	pr_debug("PCI: Scanning PHB %pOF\n", node);
+
+	/* Get some IO space for the new PHB */
+	pcibios_setup_phb_io_space(hose);
+
+	/* Wire up PHB bus resources */
+	pcibios_setup_phb_resources(hose, &resources);
+
+	hose->busn.start = hose->first_busno;
+	hose->busn.end	 = hose->last_busno;
+	hose->busn.flags = IORESOURCE_BUS;
+	pci_add_resource(&resources, &hose->busn);
+
+	/* Create an empty bus for the toplevel */
+	bus = pci_create_root_bus(hose->parent, hose->first_busno,
+				  hose->ops, hose, &resources);
+	if (bus == NULL) {
+		pr_err("Failed to create bus for PCI domain %04x\n",
+			hose->global_number);
+		pci_free_resource_list(&resources);
+		return;
+	}
+	hose->bus = bus;
+
+	/* Get probe mode and perform scan */
+	mode = PCI_PROBE_NORMAL;
+	if (node && hose->controller_ops.probe_mode)
+		mode = hose->controller_ops.probe_mode(bus);
+	pr_debug("    probe mode: %d\n", mode);
+	if (mode == PCI_PROBE_DEVTREE)
+		of_scan_bus(node, bus);
+
+	if (mode == PCI_PROBE_NORMAL) {
+		pci_bus_update_busn_res_end(bus, 255);
+		hose->last_busno = pci_scan_child_bus(bus);
+		pci_bus_update_busn_res_end(bus, hose->last_busno);
+	}
+
+	/* Platform gets a chance to do some global fixups before
+	 * we proceed to resource allocation
+	 */
+	if (ppc_md.pcibios_fixup_phb)
+		ppc_md.pcibios_fixup_phb(hose);
+
+	/* Configure PCI Express settings */
+	if (bus && !pci_has_flag(PCI_PROBE_ONLY)) {
+		struct pci_bus *child;
+		list_for_each_entry(child, &bus->children, node)
+			pcie_bus_configure_settings(child);
+	}
+}
+EXPORT_SYMBOL_GPL(pcibios_scan_phb);
+
+static void fixup_hide_host_resource_fsl(struct pci_dev *dev)
+{
+	int class = dev->class >> 8;
+	/* When configured as agent, programming interface = 1 */
+	int prog_if = dev->class & 0xf;
+	struct resource *r;
+
+	if ((class == PCI_CLASS_PROCESSOR_POWERPC ||
+	     class == PCI_CLASS_BRIDGE_OTHER) &&
+		(dev->hdr_type == PCI_HEADER_TYPE_NORMAL) &&
+		(prog_if == 0) &&
+		(dev->bus->parent == NULL)) {
+		pci_dev_for_each_resource(dev, r) {
+			r->start = 0;
+			r->end = 0;
+			r->flags = 0;
+		}
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, fixup_hide_host_resource_fsl);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, fixup_hide_host_resource_fsl);
+
+
+static int __init discover_phbs(void)
+{
+	if (ppc_md.discover_phbs)
+		ppc_md.discover_phbs();
+
+	return 0;
+}
+core_initcall(discover_phbs);
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
new file mode 100644
index 0000000000..0fe251c6ac
--- /dev/null
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Derived from "arch/powerpc/platforms/pseries/pci_dlpar.c"
+ *
+ * Copyright (C) 2003 Linda Xie <lxie@us.ibm.com>
+ * Copyright (C) 2005 International Business Machines
+ *
+ * Updates, 2005, John Rose <johnrose@austin.ibm.com>
+ * Updates, 2005, Linas Vepstas <linas@austin.ibm.com>
+ * Updates, 2013, Gavin Shan <shangw@linux.vnet.ibm.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/export.h>
+#include <linux/of.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/firmware.h>
+#include <asm/eeh.h>
+
+static struct pci_bus *find_bus_among_children(struct pci_bus *bus,
+					       struct device_node *dn)
+{
+	struct pci_bus *child = NULL;
+	struct pci_bus *tmp;
+
+	if (pci_bus_to_OF_node(bus) == dn)
+		return bus;
+
+	list_for_each_entry(tmp, &bus->children, node) {
+		child = find_bus_among_children(tmp, dn);
+		if (child)
+			break;
+	}
+
+	return child;
+}
+
+struct pci_bus *pci_find_bus_by_node(struct device_node *dn)
+{
+	struct pci_dn *pdn = PCI_DN(dn);
+
+	if (!pdn  || !pdn->phb || !pdn->phb->bus)
+		return NULL;
+
+	return find_bus_among_children(pdn->phb->bus, dn);
+}
+EXPORT_SYMBOL_GPL(pci_find_bus_by_node);
+
+/**
+ * pcibios_release_device - release PCI device
+ * @dev: PCI device
+ *
+ * The function is called before releasing the indicated PCI device.
+ */
+void pcibios_release_device(struct pci_dev *dev)
+{
+	struct pci_controller *phb = pci_bus_to_host(dev->bus);
+	struct pci_dn *pdn = pci_get_pdn(dev);
+
+	if (phb->controller_ops.release_device)
+		phb->controller_ops.release_device(dev);
+
+	/* free()ing the pci_dn has been deferred to us, do it now */
+	if (pdn && (pdn->flags & PCI_DN_FLAG_DEAD)) {
+		pci_dbg(dev, "freeing dead pdn\n");
+		kfree(pdn);
+	}
+}
+
+/**
+ * pci_hp_remove_devices - remove all devices under this bus
+ * @bus: the indicated PCI bus
+ *
+ * Remove all of the PCI devices under this bus both from the
+ * linux pci device tree, and from the powerpc EEH address cache.
+ */
+void pci_hp_remove_devices(struct pci_bus *bus)
+{
+	struct pci_dev *dev, *tmp;
+	struct pci_bus *child_bus;
+
+	/* First go down child busses */
+	list_for_each_entry(child_bus, &bus->children, node)
+		pci_hp_remove_devices(child_bus);
+
+	pr_debug("PCI: Removing devices on bus %04x:%02x\n",
+		 pci_domain_nr(bus),  bus->number);
+	list_for_each_entry_safe_reverse(dev, tmp, &bus->devices, bus_list) {
+		pr_debug("   Removing %s...\n", pci_name(dev));
+		pci_stop_and_remove_bus_device(dev);
+	}
+}
+EXPORT_SYMBOL_GPL(pci_hp_remove_devices);
+
+/**
+ * pci_hp_add_devices - adds new pci devices to bus
+ * @bus: the indicated PCI bus
+ *
+ * This routine will find and fixup new pci devices under
+ * the indicated bus. This routine presumes that there
+ * might already be some devices under this bridge, so
+ * it carefully tries to add only new devices.  (And that
+ * is how this routine differs from other, similar pcibios
+ * routines.)
+ */
+void pci_hp_add_devices(struct pci_bus *bus)
+{
+	int slotno, mode, max;
+	struct pci_dev *dev;
+	struct pci_controller *phb;
+	struct device_node *dn = pci_bus_to_OF_node(bus);
+
+	phb = pci_bus_to_host(bus);
+
+	mode = PCI_PROBE_NORMAL;
+	if (phb->controller_ops.probe_mode)
+		mode = phb->controller_ops.probe_mode(bus);
+
+	if (mode == PCI_PROBE_DEVTREE) {
+		/* use ofdt-based probe */
+		of_rescan_bus(dn, bus);
+	} else if (mode == PCI_PROBE_NORMAL &&
+		   dn->child && PCI_DN(dn->child)) {
+		/*
+		 * Use legacy probe. In the partial hotplug case, we
+		 * probably have grandchildren devices unplugged. So
+		 * we don't check the return value from pci_scan_slot() in
+		 * order for fully rescan all the way down to pick them up.
+		 * They can have been removed during partial hotplug.
+		 */
+		slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
+		pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
+		max = bus->busn_res.start;
+		/*
+		 * Scan bridges that are already configured. We don't touch
+		 * them unless they are misconfigured (which will be done in
+		 * the second scan below).
+		 */
+		for_each_pci_bridge(dev, bus)
+			max = pci_scan_bridge(bus, dev, max, 0);
+
+		/* Scan bridges that need to be reconfigured */
+		for_each_pci_bridge(dev, bus)
+			max = pci_scan_bridge(bus, dev, max, 1);
+	}
+	pcibios_finish_adding_to_bus(bus);
+}
+EXPORT_SYMBOL_GPL(pci_hp_add_devices);
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
new file mode 100644
index 0000000000..ce0c8623e5
--- /dev/null
+++ b/arch/powerpc/kernel/pci_32.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Common pmac/prep/chrp pci routines. -- Cort
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/capability.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/memblock.h>
+#include <linux/syscalls.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/sections.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/byteorder.h>
+#include <linux/uaccess.h>
+#include <asm/machdep.h>
+
+#undef DEBUG
+
+unsigned long isa_io_base     = 0;
+unsigned long pci_dram_offset = 0;
+int pcibios_assign_bus_offset = 1;
+EXPORT_SYMBOL(isa_io_base);
+EXPORT_SYMBOL(pci_dram_offset);
+
+static void fixup_cpc710_pci64(struct pci_dev* dev);
+
+/* By default, we don't re-assign bus numbers. We do this only on
+ * some pmacs
+ */
+static int pci_assign_all_buses;
+
+/* This will remain NULL for now, until isa-bridge.c is made common
+ * to both 32-bit and 64-bit.
+ */
+struct pci_dev *isa_bridge_pcidev;
+EXPORT_SYMBOL_GPL(isa_bridge_pcidev);
+
+static void
+fixup_cpc710_pci64(struct pci_dev* dev)
+{
+	/* Hide the PCI64 BARs from the kernel as their content doesn't
+	 * fit well in the resource management
+	 */
+	dev->resource[0].start = dev->resource[0].end = 0;
+	dev->resource[0].flags = 0;
+	dev->resource[1].start = dev->resource[1].end = 0;
+	dev->resource[1].flags = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM,	PCI_DEVICE_ID_IBM_CPC710_PCI64,	fixup_cpc710_pci64);
+
+#ifdef CONFIG_PPC_PCI_OF_BUS_MAP
+
+static u8* pci_to_OF_bus_map;
+static int pci_bus_count;
+
+/*
+ * Functions below are used on OpenFirmware machines.
+ */
+static void
+make_one_node_map(struct device_node* node, u8 pci_bus)
+{
+	const int *bus_range;
+	int len;
+
+	if (pci_bus >= pci_bus_count)
+		return;
+	bus_range = of_get_property(node, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int)) {
+		printk(KERN_WARNING "Can't get bus-range for %pOF, "
+		       "assuming it starts at 0\n", node);
+		pci_to_OF_bus_map[pci_bus] = 0;
+	} else
+		pci_to_OF_bus_map[pci_bus] = bus_range[0];
+
+	for_each_child_of_node(node, node) {
+		struct pci_dev* dev;
+		const unsigned int *class_code, *reg;
+	
+		class_code = of_get_property(node, "class-code", NULL);
+		if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
+			(*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS))
+			continue;
+		reg = of_get_property(node, "reg", NULL);
+		if (!reg)
+			continue;
+		dev = pci_get_domain_bus_and_slot(0, pci_bus,
+						  ((reg[0] >> 8) & 0xff));
+		if (!dev || !dev->subordinate) {
+			pci_dev_put(dev);
+			continue;
+		}
+		make_one_node_map(node, dev->subordinate->number);
+		pci_dev_put(dev);
+	}
+}
+	
+static void __init
+pcibios_make_OF_bus_map(void)
+{
+	int i;
+	struct pci_controller *hose, *tmp;
+	struct property *map_prop;
+	struct device_node *dn;
+
+	pci_to_OF_bus_map = kmalloc(pci_bus_count, GFP_KERNEL);
+	if (!pci_to_OF_bus_map) {
+		printk(KERN_ERR "Can't allocate OF bus map !\n");
+		return;
+	}
+
+	/* We fill the bus map with invalid values, that helps
+	 * debugging.
+	 */
+	for (i=0; i<pci_bus_count; i++)
+		pci_to_OF_bus_map[i] = 0xff;
+
+	/* For each hose, we begin searching bridges */
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		struct device_node* node = hose->dn;
+
+		if (!node)
+			continue;
+		make_one_node_map(node, hose->first_busno);
+	}
+	dn = of_find_node_by_path("/");
+	map_prop = of_find_property(dn, "pci-OF-bus-map", NULL);
+	if (map_prop) {
+		BUG_ON(pci_bus_count > map_prop->length);
+		memcpy(map_prop->value, pci_to_OF_bus_map, pci_bus_count);
+	}
+	of_node_put(dn);
+#ifdef DEBUG
+	printk("PCI->OF bus map:\n");
+	for (i=0; i<pci_bus_count; i++) {
+		if (pci_to_OF_bus_map[i] == 0xff)
+			continue;
+		printk("%d -> %d\n", i, pci_to_OF_bus_map[i]);
+	}
+#endif
+}
+#endif // CONFIG_PPC_PCI_OF_BUS_MAP
+
+
+#ifdef CONFIG_PPC_PMAC
+/*
+ * Returns the PCI device matching a given OF node
+ */
+int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn)
+{
+#ifdef CONFIG_PPC_PCI_OF_BUS_MAP
+	struct pci_dev *dev = NULL;
+#endif
+	const __be32 *reg;
+	int size;
+
+	/* Check if it might have a chance to be a PCI device */
+	if (!pci_find_hose_for_OF_device(node))
+		return -ENODEV;
+
+	reg = of_get_property(node, "reg", &size);
+	if (!reg || size < 5 * sizeof(u32))
+		return -ENODEV;
+
+	*bus = (be32_to_cpup(&reg[0]) >> 16) & 0xff;
+	*devfn = (be32_to_cpup(&reg[0]) >> 8) & 0xff;
+
+#ifndef CONFIG_PPC_PCI_OF_BUS_MAP
+	return 0;
+#else
+	/* Ok, here we need some tweak. If we have already renumbered
+	 * all busses, we can't rely on the OF bus number any more.
+	 * the pci_to_OF_bus_map is not enough as several PCI busses
+	 * may match the same OF bus number.
+	 */
+	if (!pci_to_OF_bus_map)
+		return 0;
+
+	for_each_pci_dev(dev)
+		if (pci_to_OF_bus_map[dev->bus->number] == *bus &&
+				dev->devfn == *devfn) {
+			*bus = dev->bus->number;
+			pci_dev_put(dev);
+			return 0;
+		}
+
+	return -ENODEV;
+#endif // CONFIG_PPC_PCI_OF_BUS_MAP
+}
+EXPORT_SYMBOL(pci_device_from_OF_node);
+#endif
+
+#ifdef CONFIG_PPC_PCI_OF_BUS_MAP
+/* We create the "pci-OF-bus-map" property now so it appears in the
+ * /proc device tree
+ */
+void __init
+pci_create_OF_bus_map(void)
+{
+	struct property* of_prop;
+	struct device_node *dn;
+
+	of_prop = memblock_alloc(sizeof(struct property) + 256,
+				 SMP_CACHE_BYTES);
+	if (!of_prop)
+		panic("%s: Failed to allocate %zu bytes\n", __func__,
+		      sizeof(struct property) + 256);
+	dn = of_find_node_by_path("/");
+	if (dn) {
+		memset(of_prop, -1, sizeof(struct property) + 256);
+		of_prop->name = "pci-OF-bus-map";
+		of_prop->length = 256;
+		of_prop->value = &of_prop[1];
+		of_add_property(dn, of_prop);
+		of_node_put(dn);
+	}
+}
+#endif // CONFIG_PPC_PCI_OF_BUS_MAP
+
+void pcibios_setup_phb_io_space(struct pci_controller *hose)
+{
+	unsigned long io_offset;
+	struct resource *res = &hose->io_resource;
+
+	/* Fixup IO space offset */
+	io_offset = pcibios_io_space_offset(hose);
+	res->start += io_offset;
+	res->end += io_offset;
+}
+
+static int __init pcibios_init(void)
+{
+	struct pci_controller *hose, *tmp;
+#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT
+	int next_busno = 0;
+#endif
+
+	printk(KERN_INFO "PCI: Probing PCI hardware\n");
+
+#ifdef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT
+	/*
+	 * Enable PCI domains in /proc when PCI bus numbers are not unique
+	 * across all PCI domains to prevent conflicts. And keep PCI domain 0
+	 * backward compatible in /proc for video cards.
+	 */
+	pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0);
+#endif
+
+	if (pci_has_flag(PCI_REASSIGN_ALL_BUS))
+		pci_assign_all_buses = 1;
+
+	/* Scan all of the recorded PCI controllers.  */
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT
+		if (pci_assign_all_buses)
+			hose->first_busno = next_busno;
+#endif
+		hose->last_busno = 0xff;
+		pcibios_scan_phb(hose);
+		pci_bus_add_devices(hose->bus);
+#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT
+		if (pci_assign_all_buses || next_busno <= hose->last_busno)
+			next_busno = hose->last_busno + pcibios_assign_bus_offset;
+#endif
+	}
+
+#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP)
+#ifdef CONFIG_PPC_PCI_OF_BUS_MAP
+	pci_bus_count = next_busno;
+
+	/* OpenFirmware based machines need a map of OF bus
+	 * numbers vs. kernel bus numbers since we may have to
+	 * remap them.
+	 */
+	if (pci_assign_all_buses)
+		pcibios_make_OF_bus_map();
+#endif
+#endif
+
+	/* Call common code to handle resource allocation */
+	pcibios_resource_survey();
+
+	/* Call machine dependent fixup */
+	if (ppc_md.pcibios_fixup)
+		ppc_md.pcibios_fixup();
+
+	/* Call machine dependent post-init code */
+	if (ppc_md.pcibios_after_init)
+		ppc_md.pcibios_after_init();
+
+	return 0;
+}
+
+subsys_initcall(pcibios_init);
+
+static struct pci_controller*
+pci_bus_to_hose(int bus)
+{
+	struct pci_controller *hose, *tmp;
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+		if (bus >= hose->first_busno && bus <= hose->last_busno)
+			return hose;
+	return NULL;
+}
+
+/* Provide information on locations of various I/O regions in physical
+ * memory.  Do this on a per-card basis so that we choose the right
+ * root bridge.
+ * Note that the returned IO or memory base is a physical address
+ */
+
+SYSCALL_DEFINE3(pciconfig_iobase, long, which,
+		unsigned long, bus, unsigned long, devfn)
+{
+	struct pci_controller* hose;
+	long result = -EOPNOTSUPP;
+
+	hose = pci_bus_to_hose(bus);
+	if (!hose)
+		return -ENODEV;
+
+	switch (which) {
+	case IOBASE_BRIDGE_NUMBER:
+		return (long)hose->first_busno;
+	case IOBASE_MEMORY:
+		return (long)hose->mem_offset[0];
+	case IOBASE_IO:
+		return (long)hose->io_base_phys;
+	case IOBASE_ISA_IO:
+		return (long)isa_io_base;
+	case IOBASE_ISA_MEM:
+		return (long)isa_mem_base;
+	}
+
+	return result;
+}
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
new file mode 100644
index 0000000000..e27342ef12
--- /dev/null
+++ b/arch/powerpc/kernel/pci_64.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Port for PPC64 David Engebretsen, IBM Corp.
+ * Contains common pci routines for ppc64 platform, pSeries and iSeries brands.
+ * 
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *   Rework, based on alpha PCI code.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/syscalls.h>
+#include <linux/irq.h>
+#include <linux/vmalloc.h>
+#include <linux/of.h>
+
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/byteorder.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+
+/* pci_io_base -- the base address from which io bars are offsets.
+ * This is the lowest I/O base address (so bar values are always positive),
+ * and it *must* be the start of ISA space if an ISA bus exists because
+ * ISA drivers use hard coded offsets.  If no ISA bus exists nothing
+ * is mapped on the first 64K of IO space
+ */
+unsigned long pci_io_base;
+EXPORT_SYMBOL(pci_io_base);
+
+static int __init pcibios_init(void)
+{
+	struct pci_controller *hose, *tmp;
+
+	printk(KERN_INFO "PCI: Probing PCI hardware\n");
+
+	/* For now, override phys_mem_access_prot. If we need it,g
+	 * later, we may move that initialization to each ppc_md
+	 */
+	ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot;
+
+	/* On ppc64, we always enable PCI domains and we keep domain 0
+	 * backward compatible in /proc for video cards
+	 */
+	pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0);
+
+	/* Scan all of the recorded PCI controllers.  */
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+		pcibios_scan_phb(hose);
+
+	/* Call common code to handle resource allocation */
+	pcibios_resource_survey();
+
+	/* Add devices. */
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+		pci_bus_add_devices(hose->bus);
+
+	/* Call machine dependent fixup */
+	if (ppc_md.pcibios_fixup)
+		ppc_md.pcibios_fixup();
+
+	printk(KERN_DEBUG "PCI: Probing PCI hardware done\n");
+
+	return 0;
+}
+
+subsys_initcall_sync(pcibios_init);
+
+int pcibios_unmap_io_space(struct pci_bus *bus)
+{
+	struct pci_controller *hose;
+
+	WARN_ON(bus == NULL);
+
+	/* If this is not a PHB, we only flush the hash table over
+	 * the area mapped by this bridge. We don't play with the PTE
+	 * mappings since we might have to deal with sub-page alignments
+	 * so flushing the hash table is the only sane way to make sure
+	 * that no hash entries are covering that removed bridge area
+	 * while still allowing other busses overlapping those pages
+	 *
+	 * Note: If we ever support P2P hotplug on Book3E, we'll have
+	 * to do an appropriate TLB flush here too
+	 */
+	if (bus->self) {
+#ifdef CONFIG_PPC_BOOK3S_64
+		struct resource *res = bus->resource[0];
+#endif
+
+		pr_debug("IO unmapping for PCI-PCI bridge %s\n",
+			 pci_name(bus->self));
+
+#ifdef CONFIG_PPC_BOOK3S_64
+		__flush_hash_table_range(res->start + _IO_BASE,
+					 res->end + _IO_BASE + 1);
+#endif
+		return 0;
+	}
+
+	/* Get the host bridge */
+	hose = pci_bus_to_host(bus);
+
+	pr_debug("IO unmapping for PHB %pOF\n", hose->dn);
+	pr_debug("  alloc=0x%p\n", hose->io_base_alloc);
+
+	iounmap(hose->io_base_alloc);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pcibios_unmap_io_space);
+
+void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size)
+{
+	struct vm_struct *area;
+	unsigned long addr;
+
+	WARN_ON_ONCE(paddr & ~PAGE_MASK);
+	WARN_ON_ONCE(size & ~PAGE_MASK);
+
+	/*
+	 * Let's allocate some IO space for that guy. We don't pass VM_IOREMAP
+	 * because we don't care about alignment tricks that the core does in
+	 * that case.  Maybe we should due to stupid card with incomplete
+	 * address decoding but I'd rather not deal with those outside of the
+	 * reserved 64K legacy region.
+	 */
+	area = __get_vm_area_caller(size, VM_IOREMAP, PHB_IO_BASE, PHB_IO_END,
+				    __builtin_return_address(0));
+	if (!area)
+		return NULL;
+
+	addr = (unsigned long)area->addr;
+	if (ioremap_page_range(addr, addr + size, paddr,
+			pgprot_noncached(PAGE_KERNEL))) {
+		vunmap_range(addr, addr + size);
+		return NULL;
+	}
+
+	return (void __iomem *)addr;
+}
+EXPORT_SYMBOL_GPL(ioremap_phb);
+
+static int pcibios_map_phb_io_space(struct pci_controller *hose)
+{
+	unsigned long phys_page;
+	unsigned long size_page;
+	unsigned long io_virt_offset;
+
+	phys_page = ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE);
+	size_page = ALIGN(hose->pci_io_size, PAGE_SIZE);
+
+	/* Make sure IO area address is clear */
+	hose->io_base_alloc = NULL;
+
+	/* If there's no IO to map on that bus, get away too */
+	if (hose->pci_io_size == 0 || hose->io_base_phys == 0)
+		return 0;
+
+	/* Let's allocate some IO space for that guy. We don't pass
+	 * VM_IOREMAP because we don't care about alignment tricks that
+	 * the core does in that case. Maybe we should due to stupid card
+	 * with incomplete address decoding but I'd rather not deal with
+	 * those outside of the reserved 64K legacy region.
+	 */
+	hose->io_base_alloc = ioremap_phb(phys_page, size_page);
+	if (!hose->io_base_alloc)
+		return -ENOMEM;
+	hose->io_base_virt = hose->io_base_alloc +
+				hose->io_base_phys - phys_page;
+
+	pr_debug("IO mapping for PHB %pOF\n", hose->dn);
+	pr_debug("  phys=0x%016llx, virt=0x%p (alloc=0x%p)\n",
+		 hose->io_base_phys, hose->io_base_virt, hose->io_base_alloc);
+	pr_debug("  size=0x%016llx (alloc=0x%016lx)\n",
+		 hose->pci_io_size, size_page);
+
+	/* Fixup hose IO resource */
+	io_virt_offset = pcibios_io_space_offset(hose);
+	hose->io_resource.start += io_virt_offset;
+	hose->io_resource.end += io_virt_offset;
+
+	pr_debug("  hose->io_resource=%pR\n", &hose->io_resource);
+
+	return 0;
+}
+
+int pcibios_map_io_space(struct pci_bus *bus)
+{
+	WARN_ON(bus == NULL);
+
+	/* If this not a PHB, nothing to do, page tables still exist and
+	 * thus HPTEs will be faulted in when needed
+	 */
+	if (bus->self) {
+		pr_debug("IO mapping for PCI-PCI bridge %s\n",
+			 pci_name(bus->self));
+		pr_debug("  virt=0x%016llx...0x%016llx\n",
+			 bus->resource[0]->start + _IO_BASE,
+			 bus->resource[0]->end + _IO_BASE);
+		return 0;
+	}
+
+	return pcibios_map_phb_io_space(pci_bus_to_host(bus));
+}
+EXPORT_SYMBOL_GPL(pcibios_map_io_space);
+
+void pcibios_setup_phb_io_space(struct pci_controller *hose)
+{
+	pcibios_map_phb_io_space(hose);
+}
+
+#define IOBASE_BRIDGE_NUMBER	0
+#define IOBASE_MEMORY		1
+#define IOBASE_IO		2
+#define IOBASE_ISA_IO		3
+#define IOBASE_ISA_MEM		4
+
+SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, in_bus,
+			  unsigned long, in_devfn)
+{
+	struct pci_controller* hose;
+	struct pci_bus *tmp_bus, *bus = NULL;
+	struct device_node *hose_node;
+
+	/* Argh ! Please forgive me for that hack, but that's the
+	 * simplest way to get existing XFree to not lockup on some
+	 * G5 machines... So when something asks for bus 0 io base
+	 * (bus 0 is HT root), we return the AGP one instead.
+	 */
+	if (in_bus == 0 && of_machine_is_compatible("MacRISC4")) {
+		struct device_node *agp;
+
+		agp = of_find_compatible_node(NULL, NULL, "u3-agp");
+		if (agp)
+			in_bus = 0xf0;
+		of_node_put(agp);
+	}
+
+	/* That syscall isn't quite compatible with PCI domains, but it's
+	 * used on pre-domains setup. We return the first match
+	 */
+
+	list_for_each_entry(tmp_bus, &pci_root_buses, node) {
+		if (in_bus >= tmp_bus->number &&
+		    in_bus <= tmp_bus->busn_res.end) {
+			bus = tmp_bus;
+			break;
+		}
+	}
+	if (bus == NULL || bus->dev.of_node == NULL)
+		return -ENODEV;
+
+	hose_node = bus->dev.of_node;
+	hose = PCI_DN(hose_node)->phb;
+
+	switch (which) {
+	case IOBASE_BRIDGE_NUMBER:
+		return (long)hose->first_busno;
+	case IOBASE_MEMORY:
+		return (long)hose->mem_offset[0];
+	case IOBASE_IO:
+		return (long)hose->io_base_phys;
+	case IOBASE_ISA_IO:
+		return (long)isa_io_base;
+	case IOBASE_ISA_MEM:
+		return -EINVAL;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+#ifdef CONFIG_NUMA
+int pcibus_to_node(struct pci_bus *bus)
+{
+	struct pci_controller *phb = pci_bus_to_host(bus);
+	return phb->node;
+}
+EXPORT_SYMBOL(pcibus_to_node);
+#endif
+
+#ifdef CONFIG_PPC_PMAC
+int pci_device_from_OF_node(struct device_node *np, u8 *bus, u8 *devfn)
+{
+	if (!PCI_DN(np))
+		return -ENODEV;
+	*bus = PCI_DN(np)->busno;
+	*devfn = PCI_DN(np)->devfn;
+	return 0;
+}
+#endif
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
new file mode 100644
index 0000000000..38561d6a20
--- /dev/null
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -0,0 +1,496 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * pci_dn.c
+ *
+ * Copyright (C) 2001 Todd Inglett, IBM Corporation
+ *
+ * PCI manipulation via device_nodes.
+ */
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/gfp.h>
+#include <linux/of.h>
+
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/firmware.h>
+#include <asm/eeh.h>
+
+/*
+ * The function is used to find the firmware data of one
+ * specific PCI device, which is attached to the indicated
+ * PCI bus. For VFs, their firmware data is linked to that
+ * one of PF's bridge. For other devices, their firmware
+ * data is linked to that of their bridge.
+ */
+static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
+{
+	struct pci_bus *pbus;
+	struct device_node *dn;
+	struct pci_dn *pdn;
+
+	/*
+	 * We probably have virtual bus which doesn't
+	 * have associated bridge.
+	 */
+	pbus = bus;
+	while (pbus) {
+		if (pci_is_root_bus(pbus) || pbus->self)
+			break;
+
+		pbus = pbus->parent;
+	}
+
+	/*
+	 * Except virtual bus, all PCI buses should
+	 * have device nodes.
+	 */
+	dn = pci_bus_to_OF_node(pbus);
+	pdn = dn ? PCI_DN(dn) : NULL;
+
+	return pdn;
+}
+
+struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus,
+				    int devfn)
+{
+	struct device_node *dn = NULL;
+	struct pci_dn *parent, *pdn;
+	struct pci_dev *pdev = NULL;
+
+	/* Fast path: fetch from PCI device */
+	list_for_each_entry(pdev, &bus->devices, bus_list) {
+		if (pdev->devfn == devfn) {
+			if (pdev->dev.archdata.pci_data)
+				return pdev->dev.archdata.pci_data;
+
+			dn = pci_device_to_OF_node(pdev);
+			break;
+		}
+	}
+
+	/* Fast path: fetch from device node */
+	pdn = dn ? PCI_DN(dn) : NULL;
+	if (pdn)
+		return pdn;
+
+	/* Slow path: fetch from firmware data hierarchy */
+	parent = pci_bus_to_pdn(bus);
+	if (!parent)
+		return NULL;
+
+	list_for_each_entry(pdn, &parent->child_list, list) {
+		if (pdn->busno == bus->number &&
+                    pdn->devfn == devfn)
+                        return pdn;
+        }
+
+	return NULL;
+}
+
+struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
+{
+	struct device_node *dn;
+	struct pci_dn *parent, *pdn;
+
+	/* Search device directly */
+	if (pdev->dev.archdata.pci_data)
+		return pdev->dev.archdata.pci_data;
+
+	/* Check device node */
+	dn = pci_device_to_OF_node(pdev);
+	pdn = dn ? PCI_DN(dn) : NULL;
+	if (pdn)
+		return pdn;
+
+	/*
+	 * VFs don't have device nodes. We hook their
+	 * firmware data to PF's bridge.
+	 */
+	parent = pci_bus_to_pdn(pdev->bus);
+	if (!parent)
+		return NULL;
+
+	list_for_each_entry(pdn, &parent->child_list, list) {
+		if (pdn->busno == pdev->bus->number &&
+		    pdn->devfn == pdev->devfn)
+			return pdn;
+	}
+
+	return NULL;
+}
+
+#ifdef CONFIG_EEH
+static struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
+{
+	struct eeh_dev *edev;
+
+	/* Allocate EEH device */
+	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
+	if (!edev)
+		return NULL;
+
+	/* Associate EEH device with OF node */
+	pdn->edev = edev;
+	edev->pdn = pdn;
+	edev->bdfn = (pdn->busno << 8) | pdn->devfn;
+	edev->controller = pdn->phb;
+
+	return edev;
+}
+#endif /* CONFIG_EEH */
+
+#ifdef CONFIG_PCI_IOV
+static struct pci_dn *add_one_sriov_vf_pdn(struct pci_dn *parent,
+					   int busno, int devfn)
+{
+	struct pci_dn *pdn;
+
+	/* Except PHB, we always have the parent */
+	if (!parent)
+		return NULL;
+
+	pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
+	if (!pdn)
+		return NULL;
+
+	pdn->phb = parent->phb;
+	pdn->parent = parent;
+	pdn->busno = busno;
+	pdn->devfn = devfn;
+	pdn->pe_number = IODA_INVALID_PE;
+	INIT_LIST_HEAD(&pdn->child_list);
+	INIT_LIST_HEAD(&pdn->list);
+	list_add_tail(&pdn->list, &parent->child_list);
+
+	return pdn;
+}
+
+struct pci_dn *add_sriov_vf_pdns(struct pci_dev *pdev)
+{
+	struct pci_dn *parent, *pdn;
+	int i;
+
+	/* Only support IOV for now */
+	if (WARN_ON(!pdev->is_physfn))
+		return NULL;
+
+	/* Check if VFs have been populated */
+	pdn = pci_get_pdn(pdev);
+	if (!pdn || (pdn->flags & PCI_DN_FLAG_IOV_VF))
+		return NULL;
+
+	pdn->flags |= PCI_DN_FLAG_IOV_VF;
+	parent = pci_bus_to_pdn(pdev->bus);
+	if (!parent)
+		return NULL;
+
+	for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
+		struct eeh_dev *edev __maybe_unused;
+
+		pdn = add_one_sriov_vf_pdn(parent,
+					   pci_iov_virtfn_bus(pdev, i),
+					   pci_iov_virtfn_devfn(pdev, i));
+		if (!pdn) {
+			dev_warn(&pdev->dev, "%s: Cannot create firmware data for VF#%d\n",
+				 __func__, i);
+			return NULL;
+		}
+
+#ifdef CONFIG_EEH
+		/* Create the EEH device for the VF */
+		edev = eeh_dev_init(pdn);
+		BUG_ON(!edev);
+
+		/* FIXME: these should probably be populated by the EEH probe */
+		edev->physfn = pdev;
+		edev->vf_index = i;
+#endif /* CONFIG_EEH */
+	}
+	return pci_get_pdn(pdev);
+}
+
+void remove_sriov_vf_pdns(struct pci_dev *pdev)
+{
+	struct pci_dn *parent;
+	struct pci_dn *pdn, *tmp;
+	int i;
+
+	/* Only support IOV PF for now */
+	if (WARN_ON(!pdev->is_physfn))
+		return;
+
+	/* Check if VFs have been populated */
+	pdn = pci_get_pdn(pdev);
+	if (!pdn || !(pdn->flags & PCI_DN_FLAG_IOV_VF))
+		return;
+
+	pdn->flags &= ~PCI_DN_FLAG_IOV_VF;
+	parent = pci_bus_to_pdn(pdev->bus);
+	if (!parent)
+		return;
+
+	/*
+	 * We might introduce flag to pci_dn in future
+	 * so that we can release VF's firmware data in
+	 * a batch mode.
+	 */
+	for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
+		struct eeh_dev *edev __maybe_unused;
+
+		list_for_each_entry_safe(pdn, tmp,
+			&parent->child_list, list) {
+			if (pdn->busno != pci_iov_virtfn_bus(pdev, i) ||
+			    pdn->devfn != pci_iov_virtfn_devfn(pdev, i))
+				continue;
+
+#ifdef CONFIG_EEH
+			/*
+			 * Release EEH state for this VF. The PCI core
+			 * has already torn down the pci_dev for this VF, but
+			 * we're responsible to removing the eeh_dev since it
+			 * has the same lifetime as the pci_dn that spawned it.
+			 */
+			edev = pdn_to_eeh_dev(pdn);
+			if (edev) {
+				/*
+				 * We allocate pci_dn's for the totalvfs count,
+				 * but only the vfs that were activated
+				 * have a configured PE.
+				 */
+				if (edev->pe)
+					eeh_pe_tree_remove(edev);
+
+				pdn->edev = NULL;
+				kfree(edev);
+			}
+#endif /* CONFIG_EEH */
+
+			if (!list_empty(&pdn->list))
+				list_del(&pdn->list);
+
+			kfree(pdn);
+		}
+	}
+}
+#endif /* CONFIG_PCI_IOV */
+
+struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
+					struct device_node *dn)
+{
+	const __be32 *type = of_get_property(dn, "ibm,pci-config-space-type", NULL);
+	const __be32 *regs;
+	struct device_node *parent;
+	struct pci_dn *pdn;
+#ifdef CONFIG_EEH
+	struct eeh_dev *edev;
+#endif
+
+	pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
+	if (pdn == NULL)
+		return NULL;
+	dn->data = pdn;
+	pdn->phb = hose;
+	pdn->pe_number = IODA_INVALID_PE;
+	regs = of_get_property(dn, "reg", NULL);
+	if (regs) {
+		u32 addr = of_read_number(regs, 1);
+
+		/* First register entry is addr (00BBSS00)  */
+		pdn->busno = (addr >> 16) & 0xff;
+		pdn->devfn = (addr >> 8) & 0xff;
+	}
+
+	/* vendor/device IDs and class code */
+	regs = of_get_property(dn, "vendor-id", NULL);
+	pdn->vendor_id = regs ? of_read_number(regs, 1) : 0;
+	regs = of_get_property(dn, "device-id", NULL);
+	pdn->device_id = regs ? of_read_number(regs, 1) : 0;
+	regs = of_get_property(dn, "class-code", NULL);
+	pdn->class_code = regs ? of_read_number(regs, 1) : 0;
+
+	/* Extended config space */
+	pdn->pci_ext_config_space = (type && of_read_number(type, 1) == 1);
+
+	/* Create EEH device */
+#ifdef CONFIG_EEH
+	edev = eeh_dev_init(pdn);
+	if (!edev) {
+		kfree(pdn);
+		return NULL;
+	}
+#endif
+
+	/* Attach to parent node */
+	INIT_LIST_HEAD(&pdn->child_list);
+	INIT_LIST_HEAD(&pdn->list);
+	parent = of_get_parent(dn);
+	pdn->parent = parent ? PCI_DN(parent) : NULL;
+	of_node_put(parent);
+	if (pdn->parent)
+		list_add_tail(&pdn->list, &pdn->parent->child_list);
+
+	return pdn;
+}
+EXPORT_SYMBOL_GPL(pci_add_device_node_info);
+
+void pci_remove_device_node_info(struct device_node *dn)
+{
+	struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL;
+	struct device_node *parent;
+	struct pci_dev *pdev;
+#ifdef CONFIG_EEH
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+
+	if (edev)
+		edev->pdn = NULL;
+#endif
+
+	if (!pdn)
+		return;
+
+	WARN_ON(!list_empty(&pdn->child_list));
+	list_del(&pdn->list);
+
+	/* Drop the parent pci_dn's ref to our backing dt node */
+	parent = of_get_parent(dn);
+	if (parent)
+		of_node_put(parent);
+
+	/*
+	 * At this point we *might* still have a pci_dev that was
+	 * instantiated from this pci_dn. So defer free()ing it until
+	 * the pci_dev's release function is called.
+	 */
+	pdev = pci_get_domain_bus_and_slot(pdn->phb->global_number,
+			pdn->busno, pdn->devfn);
+	if (pdev) {
+		/* NB: pdev has a ref to dn */
+		pci_dbg(pdev, "marked pdn (from %pOF) as dead\n", dn);
+		pdn->flags |= PCI_DN_FLAG_DEAD;
+	} else {
+		dn->data = NULL;
+		kfree(pdn);
+	}
+
+	pci_dev_put(pdev);
+}
+EXPORT_SYMBOL_GPL(pci_remove_device_node_info);
+
+/*
+ * Traverse a device tree stopping each PCI device in the tree.
+ * This is done depth first.  As each node is processed, a "pre"
+ * function is called and the children are processed recursively.
+ *
+ * The "pre" func returns a value.  If non-zero is returned from
+ * the "pre" func, the traversal stops and this value is returned.
+ * This return value is useful when using traverse as a method of
+ * finding a device.
+ *
+ * NOTE: we do not run the func for devices that do not appear to
+ * be PCI except for the start node which we assume (this is good
+ * because the start node is often a phb which may be missing PCI
+ * properties).
+ * We use the class-code as an indicator. If we run into
+ * one of these nodes we also assume its siblings are non-pci for
+ * performance.
+ */
+void *pci_traverse_device_nodes(struct device_node *start,
+				void *(*fn)(struct device_node *, void *),
+				void *data)
+{
+	struct device_node *dn, *nextdn;
+	void *ret;
+
+	/* We started with a phb, iterate all childs */
+	for (dn = start->child; dn; dn = nextdn) {
+		const __be32 *classp;
+		u32 class = 0;
+
+		nextdn = NULL;
+		classp = of_get_property(dn, "class-code", NULL);
+		if (classp)
+			class = of_read_number(classp, 1);
+
+		if (fn) {
+			ret = fn(dn, data);
+			if (ret)
+				return ret;
+		}
+
+		/* If we are a PCI bridge, go down */
+		if (dn->child && ((class >> 8) == PCI_CLASS_BRIDGE_PCI ||
+				  (class >> 8) == PCI_CLASS_BRIDGE_CARDBUS))
+			/* Depth first...do children */
+			nextdn = dn->child;
+		else if (dn->sibling)
+			/* ok, try next sibling instead. */
+			nextdn = dn->sibling;
+		if (!nextdn) {
+			/* Walk up to next valid sibling. */
+			do {
+				dn = dn->parent;
+				if (dn == start)
+					return NULL;
+			} while (dn->sibling == NULL);
+			nextdn = dn->sibling;
+		}
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(pci_traverse_device_nodes);
+
+static void *add_pdn(struct device_node *dn, void *data)
+{
+	struct pci_controller *hose = data;
+	struct pci_dn *pdn;
+
+	pdn = pci_add_device_node_info(hose, dn);
+	if (!pdn)
+		return ERR_PTR(-ENOMEM);
+
+	return NULL;
+}
+
+/** 
+ * pci_devs_phb_init_dynamic - setup pci devices under this PHB
+ * phb: pci-to-host bridge (top-level bridge connecting to cpu)
+ *
+ * This routine is called both during boot, (before the memory
+ * subsystem is set up, before kmalloc is valid) and during the 
+ * dynamic lpar operation of adding a PHB to a running system.
+ */
+void pci_devs_phb_init_dynamic(struct pci_controller *phb)
+{
+	struct device_node *dn = phb->dn;
+	struct pci_dn *pdn;
+
+	/* PHB nodes themselves must not match */
+	pdn = pci_add_device_node_info(phb, dn);
+	if (pdn) {
+		pdn->devfn = pdn->busno = -1;
+		pdn->vendor_id = pdn->device_id = pdn->class_code = 0;
+		pdn->phb = phb;
+		phb->pci_data = pdn;
+	}
+
+	/* Update dn->phb ptrs for new phb and children devices */
+	pci_traverse_device_nodes(dn, add_pdn, phb);
+}
+
+static void pci_dev_pdn_setup(struct pci_dev *pdev)
+{
+	struct pci_dn *pdn;
+
+	if (pdev->dev.archdata.pci_data)
+		return;
+
+	/* Setup the fast path */
+	pdn = pci_get_pdn(pdev);
+	pdev->dev.archdata.pci_data = pdn;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pci_dev_pdn_setup);
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
new file mode 100644
index 0000000000..756043dd06
--- /dev/null
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -0,0 +1,447 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Helper routines to scan the device tree for PCI devices and busses
+ *
+ * Migrated out of PowerPC architecture pci_64.c file by Grant Likely
+ * <grant.likely@secretlab.ca> so that these routines are available for
+ * 32 bit also.
+ *
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *   Rework, based on alpha PCI code.
+ * Copyright (c) 2009 Secret Lab Technologies Ltd.
+ */
+
+#include <linux/pci.h>
+#include <linux/export.h>
+#include <linux/of.h>
+#include <asm/pci-bridge.h>
+
+/**
+ * get_int_prop - Decode a u32 from a device tree property
+ */
+static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
+{
+	const __be32 *prop;
+	int len;
+
+	prop = of_get_property(np, name, &len);
+	if (prop && len >= 4)
+		return of_read_number(prop, 1);
+	return def;
+}
+
+/**
+ * pci_parse_of_flags - Parse the flags cell of a device tree PCI address
+ * @addr0: value of 1st cell of a device tree PCI address.
+ * @bridge: Set this flag if the address is from a bridge 'ranges' property
+ *
+ * PCI Bus Binding to IEEE Std 1275-1994
+ *
+ * Bit#            33222222 22221111 11111100 00000000
+ *                 10987654 32109876 54321098 76543210
+ * phys.hi cell:   npt000ss bbbbbbbb dddddfff rrrrrrrr
+ * phys.mid cell:  hhhhhhhh hhhhhhhh hhhhhhhh hhhhhhhh
+ * phys.lo cell:   llllllll llllllll llllllll llllllll
+ *
+ * where:
+ * n        is 0 if the address is relocatable, 1 otherwise
+ * p        is 1 if the addressable region is "prefetchable", 0 otherwise
+ * t        is 1 if the address is aliased (for non-relocatable I/O),
+ *          below 1 MB (for Memory),or below 64 KB (for relocatable I/O).
+ * ss       is the space code, denoting the address space:
+ *              00 denotes Configuration Space
+ *              01 denotes I/O Space
+ *              10 denotes 32-bit-address Memory Space
+ *              11 denotes 64-bit-address Memory Space
+ * bbbbbbbb is the 8-bit Bus Number
+ * ddddd    is the 5-bit Device Number
+ * fff      is the 3-bit Function Number
+ * rrrrrrrr is the 8-bit Register Number
+ */
+#define OF_PCI_ADDR0_SPACE(ss)		(((ss)&3)<<24)
+#define OF_PCI_ADDR0_SPACE_CFG		OF_PCI_ADDR0_SPACE(0)
+#define OF_PCI_ADDR0_SPACE_IO		OF_PCI_ADDR0_SPACE(1)
+#define OF_PCI_ADDR0_SPACE_MMIO32	OF_PCI_ADDR0_SPACE(2)
+#define OF_PCI_ADDR0_SPACE_MMIO64	OF_PCI_ADDR0_SPACE(3)
+#define OF_PCI_ADDR0_SPACE_MASK		OF_PCI_ADDR0_SPACE(3)
+#define OF_PCI_ADDR0_RELOC		(1UL<<31)
+#define OF_PCI_ADDR0_PREFETCH		(1UL<<30)
+#define OF_PCI_ADDR0_ALIAS		(1UL<<29)
+#define OF_PCI_ADDR0_BUS		0x00FF0000UL
+#define OF_PCI_ADDR0_DEV		0x0000F800UL
+#define OF_PCI_ADDR0_FN			0x00000700UL
+#define OF_PCI_ADDR0_BARREG		0x000000FFUL
+
+unsigned int pci_parse_of_flags(u32 addr0, int bridge)
+{
+	unsigned int flags = 0, as = addr0 & OF_PCI_ADDR0_SPACE_MASK;
+
+	if (as == OF_PCI_ADDR0_SPACE_MMIO32 || as == OF_PCI_ADDR0_SPACE_MMIO64) {
+		flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
+
+		if (as == OF_PCI_ADDR0_SPACE_MMIO64)
+			flags |= PCI_BASE_ADDRESS_MEM_TYPE_64 | IORESOURCE_MEM_64;
+
+		if (addr0 & OF_PCI_ADDR0_ALIAS)
+			flags |= PCI_BASE_ADDRESS_MEM_TYPE_1M;
+
+		if (addr0 & OF_PCI_ADDR0_PREFETCH)
+			flags |= IORESOURCE_PREFETCH |
+				 PCI_BASE_ADDRESS_MEM_PREFETCH;
+
+		/* Note: We don't know whether the ROM has been left enabled
+		 * by the firmware or not. We mark it as disabled (ie, we do
+		 * not set the IORESOURCE_ROM_ENABLE flag) for now rather than
+		 * do a config space read, it will be force-enabled if needed
+		 */
+		if (!bridge && (addr0 & OF_PCI_ADDR0_BARREG) == PCI_ROM_ADDRESS)
+			flags |= IORESOURCE_READONLY;
+
+	} else if (as == OF_PCI_ADDR0_SPACE_IO)
+		flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
+
+	if (flags)
+		flags |= IORESOURCE_SIZEALIGN;
+
+	return flags;
+}
+
+/**
+ * of_pci_parse_addrs - Parse PCI addresses assigned in the device tree node
+ * @node: device tree node for the PCI device
+ * @dev: pci_dev structure for the device
+ *
+ * This function parses the 'assigned-addresses' property of a PCI devices'
+ * device tree node and writes them into the associated pci_dev structure.
+ */
+static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
+{
+	u64 base, size;
+	unsigned int flags;
+	struct pci_bus_region region;
+	struct resource *res;
+	const __be32 *addrs;
+	u32 i;
+	int proplen;
+	bool mark_unset = false;
+
+	addrs = of_get_property(node, "assigned-addresses", &proplen);
+	if (!addrs || !proplen) {
+		addrs = of_get_property(node, "reg", &proplen);
+		if (!addrs || !proplen)
+			return;
+		mark_unset = true;
+	}
+
+	pr_debug("    parse addresses (%d bytes) @ %p\n", proplen, addrs);
+	for (; proplen >= 20; proplen -= 20, addrs += 5) {
+		flags = pci_parse_of_flags(of_read_number(addrs, 1), 0);
+		if (!flags)
+			continue;
+		base = of_read_number(&addrs[1], 2);
+		size = of_read_number(&addrs[3], 2);
+		if (!size)
+			continue;
+		i = of_read_number(addrs, 1) & 0xff;
+		pr_debug("  base: %llx, size: %llx, i: %x\n",
+			 (unsigned long long)base,
+			 (unsigned long long)size, i);
+
+		if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) {
+			res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
+		} else if (i == dev->rom_base_reg) {
+			res = &dev->resource[PCI_ROM_RESOURCE];
+			flags |= IORESOURCE_READONLY;
+		} else {
+			printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
+			continue;
+		}
+		res->flags = flags;
+		if (mark_unset)
+			res->flags |= IORESOURCE_UNSET;
+		res->name = pci_name(dev);
+		region.start = base;
+		region.end = base + size - 1;
+		pcibios_bus_to_resource(dev->bus, res, &region);
+	}
+}
+
+/**
+ * of_create_pci_dev - Given a device tree node on a pci bus, create a pci_dev
+ * @node: device tree node pointer
+ * @bus: bus the device is sitting on
+ * @devfn: PCI function number, extracted from device tree by caller.
+ */
+struct pci_dev *of_create_pci_dev(struct device_node *node,
+				 struct pci_bus *bus, int devfn)
+{
+	struct pci_dev *dev;
+
+	dev = pci_alloc_dev(bus);
+	if (!dev)
+		return NULL;
+
+	pr_debug("    create device, devfn: %x, type: %s\n", devfn,
+		 of_node_get_device_type(node));
+
+	dev->dev.of_node = of_node_get(node);
+	dev->dev.parent = bus->bridge;
+	dev->dev.bus = &pci_bus_type;
+	dev->devfn = devfn;
+	dev->multifunction = 0;		/* maybe a lie? */
+	dev->needs_freset = 0;		/* pcie fundamental reset required */
+	set_pcie_port_type(dev);
+
+	pci_dev_assign_slot(dev);
+	dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
+	dev->device = get_int_prop(node, "device-id", 0xffff);
+	dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0);
+	dev->subsystem_device = get_int_prop(node, "subsystem-id", 0);
+
+	dev->cfg_size = pci_cfg_space_size(dev);
+
+	dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(bus),
+		dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
+	dev->class = get_int_prop(node, "class-code", 0);
+	dev->revision = get_int_prop(node, "revision-id", 0);
+
+	pr_debug("    class: 0x%x\n", dev->class);
+	pr_debug("    revision: 0x%x\n", dev->revision);
+
+	dev->current_state = PCI_UNKNOWN;	/* unknown power state */
+	dev->error_state = pci_channel_io_normal;
+	dev->dma_mask = 0xffffffff;
+
+	/* Early fixups, before probing the BARs */
+	pci_fixup_device(pci_fixup_early, dev);
+
+	if (of_node_is_type(node, "pci") || of_node_is_type(node, "pciex")) {
+		/* a PCI-PCI bridge */
+		dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
+		dev->rom_base_reg = PCI_ROM_ADDRESS1;
+		set_pcie_hotplug_bridge(dev);
+	} else if (of_node_is_type(node, "cardbus")) {
+		dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
+	} else {
+		dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
+		dev->rom_base_reg = PCI_ROM_ADDRESS;
+		/* Maybe do a default OF mapping here */
+		dev->irq = 0;
+	}
+
+	of_pci_parse_addrs(node, dev);
+
+	pr_debug("    adding to system ...\n");
+
+	pci_device_add(dev, bus);
+
+	return dev;
+}
+EXPORT_SYMBOL(of_create_pci_dev);
+
+/**
+ * of_scan_pci_bridge - Set up a PCI bridge and scan for child nodes
+ * @dev: pci_dev structure for the bridge
+ *
+ * of_scan_bus() calls this routine for each PCI bridge that it finds, and
+ * this routine in turn call of_scan_bus() recursively to scan for more child
+ * devices.
+ */
+void of_scan_pci_bridge(struct pci_dev *dev)
+{
+	struct device_node *node = dev->dev.of_node;
+	struct pci_bus *bus;
+	struct pci_controller *phb;
+	const __be32 *busrange, *ranges;
+	int len, i, mode;
+	struct pci_bus_region region;
+	struct resource *res;
+	unsigned int flags;
+	u64 size;
+
+	pr_debug("of_scan_pci_bridge(%pOF)\n", node);
+
+	/* parse bus-range property */
+	busrange = of_get_property(node, "bus-range", &len);
+	if (busrange == NULL || len != 8) {
+		printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %pOF\n",
+		       node);
+		return;
+	}
+	ranges = of_get_property(node, "ranges", &len);
+	if (ranges == NULL) {
+		printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %pOF\n",
+		       node);
+		return;
+	}
+
+	bus = pci_find_bus(pci_domain_nr(dev->bus),
+			   of_read_number(busrange, 1));
+	if (!bus) {
+		bus = pci_add_new_bus(dev->bus, dev,
+				      of_read_number(busrange, 1));
+		if (!bus) {
+			printk(KERN_ERR "Failed to create pci bus for %pOF\n",
+			       node);
+			return;
+		}
+	}
+
+	bus->primary = dev->bus->number;
+	pci_bus_insert_busn_res(bus, of_read_number(busrange, 1),
+				of_read_number(busrange+1, 1));
+	bus->bridge_ctl = 0;
+
+	/* parse ranges property */
+	/* PCI #address-cells == 3 and #size-cells == 2 always */
+	res = &dev->resource[PCI_BRIDGE_RESOURCES];
+	for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) {
+		res->flags = 0;
+		bus->resource[i] = res;
+		++res;
+	}
+	i = 1;
+	for (; len >= 32; len -= 32, ranges += 8) {
+		flags = pci_parse_of_flags(of_read_number(ranges, 1), 1);
+		size = of_read_number(&ranges[6], 2);
+		if (flags == 0 || size == 0)
+			continue;
+		if (flags & IORESOURCE_IO) {
+			res = bus->resource[0];
+			if (res->flags) {
+				printk(KERN_ERR "PCI: ignoring extra I/O range"
+				       " for bridge %pOF\n", node);
+				continue;
+			}
+		} else {
+			if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
+				printk(KERN_ERR "PCI: too many memory ranges"
+				       " for bridge %pOF\n", node);
+				continue;
+			}
+			res = bus->resource[i];
+			++i;
+		}
+		res->flags = flags;
+		region.start = of_read_number(&ranges[1], 2);
+		region.end = region.start + size - 1;
+		pcibios_bus_to_resource(dev->bus, res, &region);
+	}
+	sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
+		bus->number);
+	pr_debug("    bus name: %s\n", bus->name);
+
+	phb = pci_bus_to_host(bus);
+
+	mode = PCI_PROBE_NORMAL;
+	if (phb->controller_ops.probe_mode)
+		mode = phb->controller_ops.probe_mode(bus);
+	pr_debug("    probe mode: %d\n", mode);
+
+	if (mode == PCI_PROBE_DEVTREE)
+		of_scan_bus(node, bus);
+	else if (mode == PCI_PROBE_NORMAL)
+		pci_scan_child_bus(bus);
+}
+EXPORT_SYMBOL(of_scan_pci_bridge);
+
+static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
+			    struct device_node *dn)
+{
+	struct pci_dev *dev = NULL;
+	const __be32 *reg;
+	int reglen, devfn;
+#ifdef CONFIG_EEH
+	struct eeh_dev *edev = pdn_to_eeh_dev(PCI_DN(dn));
+#endif
+
+	pr_debug("  * %pOF\n", dn);
+	if (!of_device_is_available(dn))
+		return NULL;
+
+	reg = of_get_property(dn, "reg", &reglen);
+	if (reg == NULL || reglen < 20)
+		return NULL;
+	devfn = (of_read_number(reg, 1) >> 8) & 0xff;
+
+	/* Check if the PCI device is already there */
+	dev = pci_get_slot(bus, devfn);
+	if (dev) {
+		pci_dev_put(dev);
+		return dev;
+	}
+
+	/* Device removed permanently ? */
+#ifdef CONFIG_EEH
+	if (edev && (edev->mode & EEH_DEV_REMOVED))
+		return NULL;
+#endif
+
+	/* create a new pci_dev for this device */
+	dev = of_create_pci_dev(dn, bus, devfn);
+	if (!dev)
+		return NULL;
+
+	pr_debug("  dev header type: %x\n", dev->hdr_type);
+	return dev;
+}
+
+/**
+ * __of_scan_bus - given a PCI bus node, setup bus and scan for child devices
+ * @node: device tree node for the PCI bus
+ * @bus: pci_bus structure for the PCI bus
+ * @rescan_existing: Flag indicating bus has already been set up
+ */
+static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
+			  int rescan_existing)
+{
+	struct device_node *child;
+	struct pci_dev *dev;
+
+	pr_debug("of_scan_bus(%pOF) bus no %d...\n",
+		 node, bus->number);
+
+	/* Scan direct children */
+	for_each_child_of_node(node, child) {
+		dev = of_scan_pci_dev(bus, child);
+		if (!dev)
+			continue;
+		pr_debug("    dev header type: %x\n", dev->hdr_type);
+	}
+
+	/* Apply all fixups necessary. We don't fixup the bus "self"
+	 * for an existing bridge that is being rescanned
+	 */
+	if (!rescan_existing)
+		pcibios_setup_bus_self(bus);
+
+	/* Now scan child busses */
+	for_each_pci_bridge(dev, bus)
+		of_scan_pci_bridge(dev);
+}
+
+/**
+ * of_scan_bus - given a PCI bus node, setup bus and scan for child devices
+ * @node: device tree node for the PCI bus
+ * @bus: pci_bus structure for the PCI bus
+ */
+void of_scan_bus(struct device_node *node, struct pci_bus *bus)
+{
+	__of_scan_bus(node, bus, 0);
+}
+EXPORT_SYMBOL_GPL(of_scan_bus);
+
+/**
+ * of_rescan_bus - given a PCI bus node, scan for child devices
+ * @node: device tree node for the PCI bus
+ * @bus: pci_bus structure for the PCI bus
+ *
+ * Same as of_scan_bus, but for a pci_bus structure that has already been
+ * setup.
+ */
+void of_rescan_bus(struct device_node *node, struct pci_bus *bus)
+{
+	__of_scan_bus(node, bus, 1);
+}
+EXPORT_SYMBOL_GPL(of_rescan_bus);
+
diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c
new file mode 100644
index 0000000000..9fabb4d923
--- /dev/null
+++ b/arch/powerpc/kernel/pmc.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  arch/powerpc/kernel/pmc.c
+ *
+ *  Copyright (C) 2004 David Gibson, IBM Corporation.
+ *  Includes code formerly from arch/ppc/kernel/perfmon.c:
+ *    Author: Andy Fleming
+ *    Copyright (c) 2004 Freescale Semiconductor, Inc
+ */
+
+#include <linux/errno.h>
+#include <linux/bug.h>
+#include <linux/spinlock.h>
+#include <linux/export.h>
+
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/pmc.h>
+
+#ifndef MMCR0_PMAO
+#define MMCR0_PMAO	0
+#endif
+
+static void dummy_perf(struct pt_regs *regs)
+{
+#if defined(CONFIG_FSL_EMB_PERFMON)
+	mtpmr(PMRN_PMGC0, mfpmr(PMRN_PMGC0) & ~PMGC0_PMIE);
+#elif defined(CONFIG_PPC64) || defined(CONFIG_PPC_BOOK3S_32)
+	if (cur_cpu_spec->pmc_type == PPC_PMC_IBM)
+		mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~(MMCR0_PMXE|MMCR0_PMAO));
+#else
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMXE);
+#endif
+}
+
+
+static DEFINE_RAW_SPINLOCK(pmc_owner_lock);
+static void *pmc_owner_caller; /* mostly for debugging */
+perf_irq_t perf_irq = dummy_perf;
+
+int reserve_pmc_hardware(perf_irq_t new_perf_irq)
+{
+	int err = 0;
+
+	raw_spin_lock(&pmc_owner_lock);
+
+	if (pmc_owner_caller) {
+		printk(KERN_WARNING "reserve_pmc_hardware: "
+		       "PMC hardware busy (reserved by caller %p)\n",
+		       pmc_owner_caller);
+		err = -EBUSY;
+		goto out;
+	}
+
+	pmc_owner_caller = __builtin_return_address(0);
+	perf_irq = new_perf_irq ? new_perf_irq : dummy_perf;
+
+ out:
+	raw_spin_unlock(&pmc_owner_lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(reserve_pmc_hardware);
+
+void release_pmc_hardware(void)
+{
+	raw_spin_lock(&pmc_owner_lock);
+
+	WARN_ON(! pmc_owner_caller);
+
+	pmc_owner_caller = NULL;
+	perf_irq = dummy_perf;
+
+	raw_spin_unlock(&pmc_owner_lock);
+}
+EXPORT_SYMBOL_GPL(release_pmc_hardware);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void power4_enable_pmcs(void)
+{
+	unsigned long hid0;
+
+	hid0 = mfspr(SPRN_HID0);
+	hid0 |= 1UL << (63 - 20);
+
+	/* POWER4 requires the following sequence */
+	asm volatile(
+		"sync\n"
+		"mtspr     %1, %0\n"
+		"mfspr     %0, %1\n"
+		"mfspr     %0, %1\n"
+		"mfspr     %0, %1\n"
+		"mfspr     %0, %1\n"
+		"mfspr     %0, %1\n"
+		"mfspr     %0, %1\n"
+		"isync" : "=&r" (hid0) : "i" (SPRN_HID0), "0" (hid0):
+		"memory");
+}
+#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S
new file mode 100644
index 0000000000..a9b9c32d0c
--- /dev/null
+++ b/arch/powerpc/kernel/ppc_save_regs.S
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ * NOTE: assert(sizeof(buf) > 23 * sizeof(long))
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+#include <asm/asm-compat.h>
+
+/*
+ * Grab the register values as they are now.
+ * This won't do a particularly good job because we really
+ * want our caller's caller's registers, and our caller has
+ * already executed its prologue.
+ * ToDo: We could reach back into the caller's save area to do
+ * a better job of representing the caller's state (note that
+ * that will be different for 32-bit and 64-bit, because of the
+ * different ABIs, though).
+ */
+_GLOBAL(ppc_save_regs)
+	/* This allows stack frame accessor macros and offsets to be used */
+	subi	r3,r3,STACK_INT_FRAME_REGS
+	PPC_STL	r0,GPR0(r3)
+#ifdef CONFIG_PPC32
+	stmw	r2,GPR2(r3)
+#else
+	SAVE_GPRS(2, 31, r3)
+	lbz	r0,PACAIRQSOFTMASK(r13)
+	PPC_STL	r0,SOFTE(r3)
+#endif
+	/* store current SP */
+	PPC_STL	r1,GPR1(r3)
+	/* get caller's LR */
+	PPC_LL	r4,0(r1)
+	PPC_LL	r0,LRSAVE(r4)
+	PPC_STL	r0,_LINK(r3)
+	mflr	r0
+	PPC_STL	r0,_NIP(r3)
+	mfmsr	r0
+	PPC_STL	r0,_MSR(r3)
+	mfctr	r0
+	PPC_STL	r0,_CTR(r3)
+	mfxer	r0
+	PPC_STL	r0,_XER(r3)
+	mfcr	r0
+	PPC_STL	r0,_CCR(r3)
+	li	r0,0
+	PPC_STL	r0,_TRAP(r3)
+	PPC_STL	r0,ORIG_GPR3(r3)
+	blr
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
new file mode 100644
index 0000000000..b109cd7b5d
--- /dev/null
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+
+#include <asm/machdep.h>
+#include <asm/vdso_datapage.h>
+#include <asm/rtas.h>
+#include <linux/uaccess.h>
+
+#ifdef CONFIG_PPC64
+
+static loff_t page_map_seek(struct file *file, loff_t off, int whence)
+{
+	return fixed_size_llseek(file, off, whence, PAGE_SIZE);
+}
+
+static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
+			      loff_t *ppos)
+{
+	return simple_read_from_buffer(buf, nbytes, ppos,
+			pde_data(file_inode(file)), PAGE_SIZE);
+}
+
+static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
+{
+	if ((vma->vm_end - vma->vm_start) > PAGE_SIZE)
+		return -EINVAL;
+
+	remap_pfn_range(vma, vma->vm_start,
+			__pa(pde_data(file_inode(file))) >> PAGE_SHIFT,
+			PAGE_SIZE, vma->vm_page_prot);
+	return 0;
+}
+
+static const struct proc_ops page_map_proc_ops = {
+	.proc_lseek	= page_map_seek,
+	.proc_read	= page_map_read,
+	.proc_mmap	= page_map_mmap,
+};
+
+
+static int __init proc_ppc64_init(void)
+{
+	struct proc_dir_entry *pde;
+
+	pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL,
+			       &page_map_proc_ops, vdso_data);
+	if (!pde)
+		return 1;
+	proc_set_size(pde, PAGE_SIZE);
+
+	return 0;
+}
+__initcall(proc_ppc64_init);
+
+#endif /* CONFIG_PPC64 */
+
+/*
+ * Create the ppc64 and ppc64/rtas directories early. This allows us to
+ * assume that they have been previously created in drivers.
+ */
+static int __init proc_ppc64_create(void)
+{
+	struct proc_dir_entry *root;
+
+	root = proc_mkdir("powerpc", NULL);
+	if (!root)
+		return 1;
+
+#ifdef CONFIG_PPC64
+	if (!proc_symlink("ppc64", NULL, "powerpc"))
+		pr_err("Failed to create link /proc/ppc64 -> /proc/powerpc\n");
+#endif
+
+	if (!of_find_node_by_path("/rtas"))
+		return 0;
+
+	if (!proc_mkdir("rtas", root))
+		return 1;
+
+	if (!proc_symlink("rtas", NULL, "powerpc/rtas"))
+		return 1;
+
+	return 0;
+}
+core_initcall(proc_ppc64_create);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
new file mode 100644
index 0000000000..9452a54d35
--- /dev/null
+++ b/arch/powerpc/kernel/process.c
@@ -0,0 +1,2410 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Derived from "arch/i386/kernel/process.c"
+ *    Copyright (C) 1995  Linus Torvalds
+ *
+ *  Updated and modified by Cort Dougan (cort@cs.nmt.edu) and
+ *  Paul Mackerras (paulus@cs.anu.edu.au)
+ *
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/elf.h>
+#include <linux/prctl.h>
+#include <linux/init_task.h>
+#include <linux/export.h>
+#include <linux/kallsyms.h>
+#include <linux/mqueue.h>
+#include <linux/hardirq.h>
+#include <linux/utsname.h>
+#include <linux/ftrace.h>
+#include <linux/kernel_stat.h>
+#include <linux/personality.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/uaccess.h>
+#include <linux/pkeys.h>
+#include <linux/seq_buf.h>
+
+#include <asm/interrupt.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/runlatch.h>
+#include <asm/syscalls.h>
+#include <asm/switch_to.h>
+#include <asm/tm.h>
+#include <asm/debug.h>
+#ifdef CONFIG_PPC64
+#include <asm/firmware.h>
+#include <asm/hw_irq.h>
+#endif
+#include <asm/code-patching.h>
+#include <asm/exec.h>
+#include <asm/livepatch.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/asm-prototypes.h>
+#include <asm/stacktrace.h>
+#include <asm/hw_breakpoint.h>
+
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+
+/* Transactional Memory debug */
+#ifdef TM_DEBUG_SW
+#define TM_DEBUG(x...) printk(KERN_INFO x)
+#else
+#define TM_DEBUG(x...) do { } while(0)
+#endif
+
+extern unsigned long _get_SP(void);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Are we running in "Suspend disabled" mode? If so we have to block any
+ * sigreturn that would get us into suspended state, and we also warn in some
+ * other paths that we should never reach with suspend disabled.
+ */
+bool tm_suspend_disabled __ro_after_init = false;
+
+static void check_if_tm_restore_required(struct task_struct *tsk)
+{
+	/*
+	 * If we are saving the current thread's registers, and the
+	 * thread is in a transactional state, set the TIF_RESTORE_TM
+	 * bit so that we know to restore the registers before
+	 * returning to userspace.
+	 */
+	if (tsk == current && tsk->thread.regs &&
+	    MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
+	    !test_thread_flag(TIF_RESTORE_TM)) {
+		regs_set_return_msr(&tsk->thread.ckpt_regs,
+						tsk->thread.regs->msr);
+		set_thread_flag(TIF_RESTORE_TM);
+	}
+}
+
+#else
+static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+bool strict_msr_control;
+EXPORT_SYMBOL(strict_msr_control);
+
+static int __init enable_strict_msr_control(char *str)
+{
+	strict_msr_control = true;
+	pr_info("Enabling strict facility control\n");
+
+	return 0;
+}
+early_param("ppc_strict_facility_enable", enable_strict_msr_control);
+
+/* notrace because it's called by restore_math */
+unsigned long notrace msr_check_and_set(unsigned long bits)
+{
+	unsigned long oldmsr = mfmsr();
+	unsigned long newmsr;
+
+	newmsr = oldmsr | bits;
+
+	if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP))
+		newmsr |= MSR_VSX;
+
+	if (oldmsr != newmsr)
+		newmsr = mtmsr_isync_irqsafe(newmsr);
+
+	return newmsr;
+}
+EXPORT_SYMBOL_GPL(msr_check_and_set);
+
+/* notrace because it's called by restore_math */
+void notrace __msr_check_and_clear(unsigned long bits)
+{
+	unsigned long oldmsr = mfmsr();
+	unsigned long newmsr;
+
+	newmsr = oldmsr & ~bits;
+
+	if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP))
+		newmsr &= ~MSR_VSX;
+
+	if (oldmsr != newmsr)
+		mtmsr_isync_irqsafe(newmsr);
+}
+EXPORT_SYMBOL(__msr_check_and_clear);
+
+#ifdef CONFIG_PPC_FPU
+static void __giveup_fpu(struct task_struct *tsk)
+{
+	unsigned long msr;
+
+	save_fpu(tsk);
+	msr = tsk->thread.regs->msr;
+	msr &= ~(MSR_FP|MSR_FE0|MSR_FE1);
+	if (cpu_has_feature(CPU_FTR_VSX))
+		msr &= ~MSR_VSX;
+	regs_set_return_msr(tsk->thread.regs, msr);
+}
+
+void giveup_fpu(struct task_struct *tsk)
+{
+	check_if_tm_restore_required(tsk);
+
+	msr_check_and_set(MSR_FP);
+	__giveup_fpu(tsk);
+	msr_check_and_clear(MSR_FP);
+}
+EXPORT_SYMBOL(giveup_fpu);
+
+/*
+ * Make sure the floating-point register state in the
+ * the thread_struct is up to date for task tsk.
+ */
+void flush_fp_to_thread(struct task_struct *tsk)
+{
+	if (tsk->thread.regs) {
+		/*
+		 * We need to disable preemption here because if we didn't,
+		 * another process could get scheduled after the regs->msr
+		 * test but before we have finished saving the FP registers
+		 * to the thread_struct.  That process could take over the
+		 * FPU, and then when we get scheduled again we would store
+		 * bogus values for the remaining FP registers.
+		 */
+		preempt_disable();
+		if (tsk->thread.regs->msr & MSR_FP) {
+			/*
+			 * This should only ever be called for current or
+			 * for a stopped child process.  Since we save away
+			 * the FP register state on context switch,
+			 * there is something wrong if a stopped child appears
+			 * to still have its FP state in the CPU registers.
+			 */
+			BUG_ON(tsk != current);
+			giveup_fpu(tsk);
+		}
+		preempt_enable();
+	}
+}
+EXPORT_SYMBOL_GPL(flush_fp_to_thread);
+
+void enable_kernel_fp(void)
+{
+	unsigned long cpumsr;
+
+	WARN_ON(preemptible());
+
+	cpumsr = msr_check_and_set(MSR_FP);
+
+	if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) {
+		check_if_tm_restore_required(current);
+		/*
+		 * If a thread has already been reclaimed then the
+		 * checkpointed registers are on the CPU but have definitely
+		 * been saved by the reclaim code. Don't need to and *cannot*
+		 * giveup as this would save  to the 'live' structure not the
+		 * checkpointed structure.
+		 */
+		if (!MSR_TM_ACTIVE(cpumsr) &&
+		     MSR_TM_ACTIVE(current->thread.regs->msr))
+			return;
+		__giveup_fpu(current);
+	}
+}
+EXPORT_SYMBOL(enable_kernel_fp);
+#else
+static inline void __giveup_fpu(struct task_struct *tsk) { }
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef CONFIG_ALTIVEC
+static void __giveup_altivec(struct task_struct *tsk)
+{
+	unsigned long msr;
+
+	save_altivec(tsk);
+	msr = tsk->thread.regs->msr;
+	msr &= ~MSR_VEC;
+	if (cpu_has_feature(CPU_FTR_VSX))
+		msr &= ~MSR_VSX;
+	regs_set_return_msr(tsk->thread.regs, msr);
+}
+
+void giveup_altivec(struct task_struct *tsk)
+{
+	check_if_tm_restore_required(tsk);
+
+	msr_check_and_set(MSR_VEC);
+	__giveup_altivec(tsk);
+	msr_check_and_clear(MSR_VEC);
+}
+EXPORT_SYMBOL(giveup_altivec);
+
+void enable_kernel_altivec(void)
+{
+	unsigned long cpumsr;
+
+	WARN_ON(preemptible());
+
+	cpumsr = msr_check_and_set(MSR_VEC);
+
+	if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) {
+		check_if_tm_restore_required(current);
+		/*
+		 * If a thread has already been reclaimed then the
+		 * checkpointed registers are on the CPU but have definitely
+		 * been saved by the reclaim code. Don't need to and *cannot*
+		 * giveup as this would save  to the 'live' structure not the
+		 * checkpointed structure.
+		 */
+		if (!MSR_TM_ACTIVE(cpumsr) &&
+		     MSR_TM_ACTIVE(current->thread.regs->msr))
+			return;
+		__giveup_altivec(current);
+	}
+}
+EXPORT_SYMBOL(enable_kernel_altivec);
+
+/*
+ * Make sure the VMX/Altivec register state in the
+ * the thread_struct is up to date for task tsk.
+ */
+void flush_altivec_to_thread(struct task_struct *tsk)
+{
+	if (tsk->thread.regs) {
+		preempt_disable();
+		if (tsk->thread.regs->msr & MSR_VEC) {
+			BUG_ON(tsk != current);
+			giveup_altivec(tsk);
+		}
+		preempt_enable();
+	}
+}
+EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+static void __giveup_vsx(struct task_struct *tsk)
+{
+	unsigned long msr = tsk->thread.regs->msr;
+
+	/*
+	 * We should never be setting MSR_VSX without also setting
+	 * MSR_FP and MSR_VEC
+	 */
+	WARN_ON((msr & MSR_VSX) && !((msr & MSR_FP) && (msr & MSR_VEC)));
+
+	/* __giveup_fpu will clear MSR_VSX */
+	if (msr & MSR_FP)
+		__giveup_fpu(tsk);
+	if (msr & MSR_VEC)
+		__giveup_altivec(tsk);
+}
+
+static void giveup_vsx(struct task_struct *tsk)
+{
+	check_if_tm_restore_required(tsk);
+
+	msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
+	__giveup_vsx(tsk);
+	msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
+}
+
+void enable_kernel_vsx(void)
+{
+	unsigned long cpumsr;
+
+	WARN_ON(preemptible());
+
+	cpumsr = msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
+
+	if (current->thread.regs &&
+	    (current->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP))) {
+		check_if_tm_restore_required(current);
+		/*
+		 * If a thread has already been reclaimed then the
+		 * checkpointed registers are on the CPU but have definitely
+		 * been saved by the reclaim code. Don't need to and *cannot*
+		 * giveup as this would save  to the 'live' structure not the
+		 * checkpointed structure.
+		 */
+		if (!MSR_TM_ACTIVE(cpumsr) &&
+		     MSR_TM_ACTIVE(current->thread.regs->msr))
+			return;
+		__giveup_vsx(current);
+	}
+}
+EXPORT_SYMBOL(enable_kernel_vsx);
+
+void flush_vsx_to_thread(struct task_struct *tsk)
+{
+	if (tsk->thread.regs) {
+		preempt_disable();
+		if (tsk->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP)) {
+			BUG_ON(tsk != current);
+			giveup_vsx(tsk);
+		}
+		preempt_enable();
+	}
+}
+EXPORT_SYMBOL_GPL(flush_vsx_to_thread);
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_SPE
+void giveup_spe(struct task_struct *tsk)
+{
+	check_if_tm_restore_required(tsk);
+
+	msr_check_and_set(MSR_SPE);
+	__giveup_spe(tsk);
+	msr_check_and_clear(MSR_SPE);
+}
+EXPORT_SYMBOL(giveup_spe);
+
+void enable_kernel_spe(void)
+{
+	WARN_ON(preemptible());
+
+	msr_check_and_set(MSR_SPE);
+
+	if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) {
+		check_if_tm_restore_required(current);
+		__giveup_spe(current);
+	}
+}
+EXPORT_SYMBOL(enable_kernel_spe);
+
+void flush_spe_to_thread(struct task_struct *tsk)
+{
+	if (tsk->thread.regs) {
+		preempt_disable();
+		if (tsk->thread.regs->msr & MSR_SPE) {
+			BUG_ON(tsk != current);
+			tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
+			giveup_spe(tsk);
+		}
+		preempt_enable();
+	}
+}
+#endif /* CONFIG_SPE */
+
+static unsigned long msr_all_available;
+
+static int __init init_msr_all_available(void)
+{
+	if (IS_ENABLED(CONFIG_PPC_FPU))
+		msr_all_available |= MSR_FP;
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		msr_all_available |= MSR_VEC;
+	if (cpu_has_feature(CPU_FTR_VSX))
+		msr_all_available |= MSR_VSX;
+	if (cpu_has_feature(CPU_FTR_SPE))
+		msr_all_available |= MSR_SPE;
+
+	return 0;
+}
+early_initcall(init_msr_all_available);
+
+void giveup_all(struct task_struct *tsk)
+{
+	unsigned long usermsr;
+
+	if (!tsk->thread.regs)
+		return;
+
+	check_if_tm_restore_required(tsk);
+
+	usermsr = tsk->thread.regs->msr;
+
+	if ((usermsr & msr_all_available) == 0)
+		return;
+
+	msr_check_and_set(msr_all_available);
+
+	WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
+
+	if (usermsr & MSR_FP)
+		__giveup_fpu(tsk);
+	if (usermsr & MSR_VEC)
+		__giveup_altivec(tsk);
+	if (usermsr & MSR_SPE)
+		__giveup_spe(tsk);
+
+	msr_check_and_clear(msr_all_available);
+}
+EXPORT_SYMBOL(giveup_all);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_FPU
+static bool should_restore_fp(void)
+{
+	if (current->thread.load_fp) {
+		current->thread.load_fp++;
+		return true;
+	}
+	return false;
+}
+
+static void do_restore_fp(void)
+{
+	load_fp_state(&current->thread.fp_state);
+}
+#else
+static bool should_restore_fp(void) { return false; }
+static void do_restore_fp(void) { }
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef CONFIG_ALTIVEC
+static bool should_restore_altivec(void)
+{
+	if (cpu_has_feature(CPU_FTR_ALTIVEC) && (current->thread.load_vec)) {
+		current->thread.load_vec++;
+		return true;
+	}
+	return false;
+}
+
+static void do_restore_altivec(void)
+{
+	load_vr_state(&current->thread.vr_state);
+	current->thread.used_vr = 1;
+}
+#else
+static bool should_restore_altivec(void) { return false; }
+static void do_restore_altivec(void) { }
+#endif /* CONFIG_ALTIVEC */
+
+static bool should_restore_vsx(void)
+{
+	if (cpu_has_feature(CPU_FTR_VSX))
+		return true;
+	return false;
+}
+#ifdef CONFIG_VSX
+static void do_restore_vsx(void)
+{
+	current->thread.used_vsr = 1;
+}
+#else
+static void do_restore_vsx(void) { }
+#endif /* CONFIG_VSX */
+
+/*
+ * The exception exit path calls restore_math() with interrupts hard disabled
+ * but the soft irq state not "reconciled". ftrace code that calls
+ * local_irq_save/restore causes warnings.
+ *
+ * Rather than complicate the exit path, just don't trace restore_math. This
+ * could be done by having ftrace entry code check for this un-reconciled
+ * condition where MSR[EE]=0 and PACA_IRQ_HARD_DIS is not set, and
+ * temporarily fix it up for the duration of the ftrace call.
+ */
+void notrace restore_math(struct pt_regs *regs)
+{
+	unsigned long msr;
+	unsigned long new_msr = 0;
+
+	msr = regs->msr;
+
+	/*
+	 * new_msr tracks the facilities that are to be restored. Only reload
+	 * if the bit is not set in the user MSR (if it is set, the registers
+	 * are live for the user thread).
+	 */
+	if ((!(msr & MSR_FP)) && should_restore_fp())
+		new_msr |= MSR_FP;
+
+	if ((!(msr & MSR_VEC)) && should_restore_altivec())
+		new_msr |= MSR_VEC;
+
+	if ((!(msr & MSR_VSX)) && should_restore_vsx()) {
+		if (((msr | new_msr) & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC))
+			new_msr |= MSR_VSX;
+	}
+
+	if (new_msr) {
+		unsigned long fpexc_mode = 0;
+
+		msr_check_and_set(new_msr);
+
+		if (new_msr & MSR_FP) {
+			do_restore_fp();
+
+			// This also covers VSX, because VSX implies FP
+			fpexc_mode = current->thread.fpexc_mode;
+		}
+
+		if (new_msr & MSR_VEC)
+			do_restore_altivec();
+
+		if (new_msr & MSR_VSX)
+			do_restore_vsx();
+
+		msr_check_and_clear(new_msr);
+
+		regs_set_return_msr(regs, regs->msr | new_msr | fpexc_mode);
+	}
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+static void save_all(struct task_struct *tsk)
+{
+	unsigned long usermsr;
+
+	if (!tsk->thread.regs)
+		return;
+
+	usermsr = tsk->thread.regs->msr;
+
+	if ((usermsr & msr_all_available) == 0)
+		return;
+
+	msr_check_and_set(msr_all_available);
+
+	WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
+
+	if (usermsr & MSR_FP)
+		save_fpu(tsk);
+
+	if (usermsr & MSR_VEC)
+		save_altivec(tsk);
+
+	if (usermsr & MSR_SPE)
+		__giveup_spe(tsk);
+
+	msr_check_and_clear(msr_all_available);
+}
+
+void flush_all_to_thread(struct task_struct *tsk)
+{
+	if (tsk->thread.regs) {
+		preempt_disable();
+		BUG_ON(tsk != current);
+#ifdef CONFIG_SPE
+		if (tsk->thread.regs->msr & MSR_SPE)
+			tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
+#endif
+		save_all(tsk);
+
+		preempt_enable();
+	}
+}
+EXPORT_SYMBOL(flush_all_to_thread);
+
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+void do_send_trap(struct pt_regs *regs, unsigned long address,
+		  unsigned long error_code, int breakpt)
+{
+	current->thread.trap_nr = TRAP_HWBKPT;
+	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
+			11, SIGSEGV) == NOTIFY_STOP)
+		return;
+
+	/* Deliver the signal to userspace */
+	force_sig_ptrace_errno_trap(breakpt, /* breakpoint or watchpoint id */
+				    (void __user *)address);
+}
+#else	/* !CONFIG_PPC_ADV_DEBUG_REGS */
+
+static void do_break_handler(struct pt_regs *regs)
+{
+	struct arch_hw_breakpoint null_brk = {0};
+	struct arch_hw_breakpoint *info;
+	ppc_inst_t instr = ppc_inst(0);
+	int type = 0;
+	int size = 0;
+	unsigned long ea;
+	int i;
+
+	/*
+	 * If underneath hw supports only one watchpoint, we know it
+	 * caused exception. 8xx also falls into this category.
+	 */
+	if (nr_wp_slots() == 1) {
+		__set_breakpoint(0, &null_brk);
+		current->thread.hw_brk[0] = null_brk;
+		current->thread.hw_brk[0].flags |= HW_BRK_FLAG_DISABLED;
+		return;
+	}
+
+	/* Otherwise find out which DAWR caused exception and disable it. */
+	wp_get_instr_detail(regs, &instr, &type, &size, &ea);
+
+	for (i = 0; i < nr_wp_slots(); i++) {
+		info = &current->thread.hw_brk[i];
+		if (!info->address)
+			continue;
+
+		if (wp_check_constraints(regs, instr, ea, type, size, info)) {
+			__set_breakpoint(i, &null_brk);
+			current->thread.hw_brk[i] = null_brk;
+			current->thread.hw_brk[i].flags |= HW_BRK_FLAG_DISABLED;
+		}
+	}
+}
+
+DEFINE_INTERRUPT_HANDLER(do_break)
+{
+	current->thread.trap_nr = TRAP_HWBKPT;
+	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, regs->dsisr,
+			11, SIGSEGV) == NOTIFY_STOP)
+		return;
+
+	if (debugger_break_match(regs))
+		return;
+
+	/*
+	 * We reach here only when watchpoint exception is generated by ptrace
+	 * event (or hw is buggy!). Now if CONFIG_HAVE_HW_BREAKPOINT is set,
+	 * watchpoint is already handled by hw_breakpoint_handler() so we don't
+	 * have to do anything. But when CONFIG_HAVE_HW_BREAKPOINT is not set,
+	 * we need to manually handle the watchpoint here.
+	 */
+	if (!IS_ENABLED(CONFIG_HAVE_HW_BREAKPOINT))
+		do_break_handler(regs);
+
+	/* Deliver the signal to userspace */
+	force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)regs->dar);
+}
+#endif	/* CONFIG_PPC_ADV_DEBUG_REGS */
+
+static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk[HBP_NUM_MAX]);
+
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+/*
+ * Set the debug registers back to their default "safe" values.
+ */
+static void set_debug_reg_defaults(struct thread_struct *thread)
+{
+	thread->debug.iac1 = thread->debug.iac2 = 0;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	thread->debug.iac3 = thread->debug.iac4 = 0;
+#endif
+	thread->debug.dac1 = thread->debug.dac2 = 0;
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+	thread->debug.dvc1 = thread->debug.dvc2 = 0;
+#endif
+	thread->debug.dbcr0 = 0;
+#ifdef CONFIG_BOOKE
+	/*
+	 * Force User/Supervisor bits to b11 (user-only MSR[PR]=1)
+	 */
+	thread->debug.dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
+			DBCR1_IAC3US | DBCR1_IAC4US;
+	/*
+	 * Force Data Address Compare User/Supervisor bits to be User-only
+	 * (0b11 MSR[PR]=1) and set all other bits in DBCR2 register to be 0.
+	 */
+	thread->debug.dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
+#else
+	thread->debug.dbcr1 = 0;
+#endif
+}
+
+static void prime_debug_regs(struct debug_reg *debug)
+{
+	/*
+	 * We could have inherited MSR_DE from userspace, since
+	 * it doesn't get cleared on exception entry.  Make sure
+	 * MSR_DE is clear before we enable any debug events.
+	 */
+	mtmsr(mfmsr() & ~MSR_DE);
+
+	mtspr(SPRN_IAC1, debug->iac1);
+	mtspr(SPRN_IAC2, debug->iac2);
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	mtspr(SPRN_IAC3, debug->iac3);
+	mtspr(SPRN_IAC4, debug->iac4);
+#endif
+	mtspr(SPRN_DAC1, debug->dac1);
+	mtspr(SPRN_DAC2, debug->dac2);
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+	mtspr(SPRN_DVC1, debug->dvc1);
+	mtspr(SPRN_DVC2, debug->dvc2);
+#endif
+	mtspr(SPRN_DBCR0, debug->dbcr0);
+	mtspr(SPRN_DBCR1, debug->dbcr1);
+#ifdef CONFIG_BOOKE
+	mtspr(SPRN_DBCR2, debug->dbcr2);
+#endif
+}
+/*
+ * Unless neither the old or new thread are making use of the
+ * debug registers, set the debug registers from the values
+ * stored in the new thread.
+ */
+void switch_booke_debug_regs(struct debug_reg *new_debug)
+{
+	if ((current->thread.debug.dbcr0 & DBCR0_IDM)
+		|| (new_debug->dbcr0 & DBCR0_IDM))
+			prime_debug_regs(new_debug);
+}
+EXPORT_SYMBOL_GPL(switch_booke_debug_regs);
+#else	/* !CONFIG_PPC_ADV_DEBUG_REGS */
+#ifndef CONFIG_HAVE_HW_BREAKPOINT
+static void set_breakpoint(int i, struct arch_hw_breakpoint *brk)
+{
+	preempt_disable();
+	__set_breakpoint(i, brk);
+	preempt_enable();
+}
+
+static void set_debug_reg_defaults(struct thread_struct *thread)
+{
+	int i;
+	struct arch_hw_breakpoint null_brk = {0};
+
+	for (i = 0; i < nr_wp_slots(); i++) {
+		thread->hw_brk[i] = null_brk;
+		if (ppc_breakpoint_available())
+			set_breakpoint(i, &thread->hw_brk[i]);
+	}
+}
+
+static inline bool hw_brk_match(struct arch_hw_breakpoint *a,
+				struct arch_hw_breakpoint *b)
+{
+	if (a->address != b->address)
+		return false;
+	if (a->type != b->type)
+		return false;
+	if (a->len != b->len)
+		return false;
+	/* no need to check hw_len. it's calculated from address and len */
+	return true;
+}
+
+static void switch_hw_breakpoint(struct task_struct *new)
+{
+	int i;
+
+	for (i = 0; i < nr_wp_slots(); i++) {
+		if (likely(hw_brk_match(this_cpu_ptr(&current_brk[i]),
+					&new->thread.hw_brk[i])))
+			continue;
+
+		__set_breakpoint(i, &new->thread.hw_brk[i]);
+	}
+}
+#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
+#endif	/* CONFIG_PPC_ADV_DEBUG_REGS */
+
+static inline int set_dabr(struct arch_hw_breakpoint *brk)
+{
+	unsigned long dabr, dabrx;
+
+	dabr = brk->address | (brk->type & HW_BRK_TYPE_DABR);
+	dabrx = ((brk->type >> 3) & 0x7);
+
+	if (ppc_md.set_dabr)
+		return ppc_md.set_dabr(dabr, dabrx);
+
+	if (IS_ENABLED(CONFIG_PPC_ADV_DEBUG_REGS)) {
+		mtspr(SPRN_DAC1, dabr);
+		if (IS_ENABLED(CONFIG_PPC_47x))
+			isync();
+		return 0;
+	} else if (IS_ENABLED(CONFIG_PPC_BOOK3S)) {
+		mtspr(SPRN_DABR, dabr);
+		if (cpu_has_feature(CPU_FTR_DABRX))
+			mtspr(SPRN_DABRX, dabrx);
+		return 0;
+	} else {
+		return -EINVAL;
+	}
+}
+
+static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk)
+{
+	unsigned long lctrl1 = LCTRL1_CTE_GT | LCTRL1_CTF_LT | LCTRL1_CRWE_RW |
+			       LCTRL1_CRWF_RW;
+	unsigned long lctrl2 = LCTRL2_LW0EN | LCTRL2_LW0LADC | LCTRL2_SLW0EN;
+	unsigned long start_addr = ALIGN_DOWN(brk->address, HW_BREAKPOINT_SIZE);
+	unsigned long end_addr = ALIGN(brk->address + brk->len, HW_BREAKPOINT_SIZE);
+
+	if (start_addr == 0)
+		lctrl2 |= LCTRL2_LW0LA_F;
+	else if (end_addr == 0)
+		lctrl2 |= LCTRL2_LW0LA_E;
+	else
+		lctrl2 |= LCTRL2_LW0LA_EandF;
+
+	mtspr(SPRN_LCTRL2, 0);
+
+	if ((brk->type & HW_BRK_TYPE_RDWR) == 0)
+		return 0;
+
+	if ((brk->type & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_READ)
+		lctrl1 |= LCTRL1_CRWE_RO | LCTRL1_CRWF_RO;
+	if ((brk->type & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_WRITE)
+		lctrl1 |= LCTRL1_CRWE_WO | LCTRL1_CRWF_WO;
+
+	mtspr(SPRN_CMPE, start_addr - 1);
+	mtspr(SPRN_CMPF, end_addr);
+	mtspr(SPRN_LCTRL1, lctrl1);
+	mtspr(SPRN_LCTRL2, lctrl2);
+
+	return 0;
+}
+
+static void set_hw_breakpoint(int nr, struct arch_hw_breakpoint *brk)
+{
+	if (dawr_enabled())
+		// Power8 or later
+		set_dawr(nr, brk);
+	else if (IS_ENABLED(CONFIG_PPC_8xx))
+		set_breakpoint_8xx(brk);
+	else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		// Power7 or earlier
+		set_dabr(brk);
+	else
+		// Shouldn't happen due to higher level checks
+		WARN_ON_ONCE(1);
+}
+
+void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk)
+{
+	memcpy(this_cpu_ptr(&current_brk[nr]), brk, sizeof(*brk));
+	set_hw_breakpoint(nr, brk);
+}
+
+/* Check if we have DAWR or DABR hardware */
+bool ppc_breakpoint_available(void)
+{
+	if (dawr_enabled())
+		return true; /* POWER8 DAWR or POWER9 forced DAWR */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		return false; /* POWER9 with DAWR disabled */
+	/* DABR: Everything but POWER8 and POWER9 */
+	return true;
+}
+EXPORT_SYMBOL_GPL(ppc_breakpoint_available);
+
+/* Disable the breakpoint in hardware without touching current_brk[] */
+void suspend_breakpoints(void)
+{
+	struct arch_hw_breakpoint brk = {0};
+	int i;
+
+	if (!ppc_breakpoint_available())
+		return;
+
+	for (i = 0; i < nr_wp_slots(); i++)
+		set_hw_breakpoint(i, &brk);
+}
+
+/*
+ * Re-enable breakpoints suspended by suspend_breakpoints() in hardware
+ * from current_brk[]
+ */
+void restore_breakpoints(void)
+{
+	int i;
+
+	if (!ppc_breakpoint_available())
+		return;
+
+	for (i = 0; i < nr_wp_slots(); i++)
+		set_hw_breakpoint(i, this_cpu_ptr(&current_brk[i]));
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+
+static inline bool tm_enabled(struct task_struct *tsk)
+{
+	return tsk && tsk->thread.regs && (tsk->thread.regs->msr & MSR_TM);
+}
+
+static void tm_reclaim_thread(struct thread_struct *thr, uint8_t cause)
+{
+	/*
+	 * Use the current MSR TM suspended bit to track if we have
+	 * checkpointed state outstanding.
+	 * On signal delivery, we'd normally reclaim the checkpointed
+	 * state to obtain stack pointer (see:get_tm_stackpointer()).
+	 * This will then directly return to userspace without going
+	 * through __switch_to(). However, if the stack frame is bad,
+	 * we need to exit this thread which calls __switch_to() which
+	 * will again attempt to reclaim the already saved tm state.
+	 * Hence we need to check that we've not already reclaimed
+	 * this state.
+	 * We do this using the current MSR, rather tracking it in
+	 * some specific thread_struct bit, as it has the additional
+	 * benefit of checking for a potential TM bad thing exception.
+	 */
+	if (!MSR_TM_SUSPENDED(mfmsr()))
+		return;
+
+	giveup_all(container_of(thr, struct task_struct, thread));
+
+	tm_reclaim(thr, cause);
+
+	/*
+	 * If we are in a transaction and FP is off then we can't have
+	 * used FP inside that transaction. Hence the checkpointed
+	 * state is the same as the live state. We need to copy the
+	 * live state to the checkpointed state so that when the
+	 * transaction is restored, the checkpointed state is correct
+	 * and the aborted transaction sees the correct state. We use
+	 * ckpt_regs.msr here as that's what tm_reclaim will use to
+	 * determine if it's going to write the checkpointed state or
+	 * not. So either this will write the checkpointed registers,
+	 * or reclaim will. Similarly for VMX.
+	 */
+	if ((thr->ckpt_regs.msr & MSR_FP) == 0)
+		memcpy(&thr->ckfp_state, &thr->fp_state,
+		       sizeof(struct thread_fp_state));
+	if ((thr->ckpt_regs.msr & MSR_VEC) == 0)
+		memcpy(&thr->ckvr_state, &thr->vr_state,
+		       sizeof(struct thread_vr_state));
+}
+
+void tm_reclaim_current(uint8_t cause)
+{
+	tm_enable();
+	tm_reclaim_thread(&current->thread, cause);
+}
+
+static inline void tm_reclaim_task(struct task_struct *tsk)
+{
+	/* We have to work out if we're switching from/to a task that's in the
+	 * middle of a transaction.
+	 *
+	 * In switching we need to maintain a 2nd register state as
+	 * oldtask->thread.ckpt_regs.  We tm_reclaim(oldproc); this saves the
+	 * checkpointed (tbegin) state in ckpt_regs, ckfp_state and
+	 * ckvr_state
+	 *
+	 * We also context switch (save) TFHAR/TEXASR/TFIAR in here.
+	 */
+	struct thread_struct *thr = &tsk->thread;
+
+	if (!thr->regs)
+		return;
+
+	if (!MSR_TM_ACTIVE(thr->regs->msr))
+		goto out_and_saveregs;
+
+	WARN_ON(tm_suspend_disabled);
+
+	TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, "
+		 "ccr=%lx, msr=%lx, trap=%lx)\n",
+		 tsk->pid, thr->regs->nip,
+		 thr->regs->ccr, thr->regs->msr,
+		 thr->regs->trap);
+
+	tm_reclaim_thread(thr, TM_CAUSE_RESCHED);
+
+	TM_DEBUG("--- tm_reclaim on pid %d complete\n",
+		 tsk->pid);
+
+out_and_saveregs:
+	/* Always save the regs here, even if a transaction's not active.
+	 * This context-switches a thread's TM info SPRs.  We do it here to
+	 * be consistent with the restore path (in recheckpoint) which
+	 * cannot happen later in _switch().
+	 */
+	tm_save_sprs(thr);
+}
+
+extern void __tm_recheckpoint(struct thread_struct *thread);
+
+void tm_recheckpoint(struct thread_struct *thread)
+{
+	unsigned long flags;
+
+	if (!(thread->regs->msr & MSR_TM))
+		return;
+
+	/* We really can't be interrupted here as the TEXASR registers can't
+	 * change and later in the trecheckpoint code, we have a userspace R1.
+	 * So let's hard disable over this region.
+	 */
+	local_irq_save(flags);
+	hard_irq_disable();
+
+	/* The TM SPRs are restored here, so that TEXASR.FS can be set
+	 * before the trecheckpoint and no explosion occurs.
+	 */
+	tm_restore_sprs(thread);
+
+	__tm_recheckpoint(thread);
+
+	local_irq_restore(flags);
+}
+
+static inline void tm_recheckpoint_new_task(struct task_struct *new)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return;
+
+	/* Recheckpoint the registers of the thread we're about to switch to.
+	 *
+	 * If the task was using FP, we non-lazily reload both the original and
+	 * the speculative FP register states.  This is because the kernel
+	 * doesn't see if/when a TM rollback occurs, so if we take an FP
+	 * unavailable later, we are unable to determine which set of FP regs
+	 * need to be restored.
+	 */
+	if (!tm_enabled(new))
+		return;
+
+	if (!MSR_TM_ACTIVE(new->thread.regs->msr)){
+		tm_restore_sprs(&new->thread);
+		return;
+	}
+	/* Recheckpoint to restore original checkpointed register state. */
+	TM_DEBUG("*** tm_recheckpoint of pid %d (new->msr 0x%lx)\n",
+		 new->pid, new->thread.regs->msr);
+
+	tm_recheckpoint(&new->thread);
+
+	/*
+	 * The checkpointed state has been restored but the live state has
+	 * not, ensure all the math functionality is turned off to trigger
+	 * restore_math() to reload.
+	 */
+	new->thread.regs->msr &= ~(MSR_FP | MSR_VEC | MSR_VSX);
+
+	TM_DEBUG("*** tm_recheckpoint of pid %d complete "
+		 "(kernel msr 0x%lx)\n",
+		 new->pid, mfmsr());
+}
+
+static inline void __switch_to_tm(struct task_struct *prev,
+		struct task_struct *new)
+{
+	if (cpu_has_feature(CPU_FTR_TM)) {
+		if (tm_enabled(prev) || tm_enabled(new))
+			tm_enable();
+
+		if (tm_enabled(prev)) {
+			prev->thread.load_tm++;
+			tm_reclaim_task(prev);
+			if (!MSR_TM_ACTIVE(prev->thread.regs->msr) && prev->thread.load_tm == 0)
+				prev->thread.regs->msr &= ~MSR_TM;
+		}
+
+		tm_recheckpoint_new_task(new);
+	}
+}
+
+/*
+ * This is called if we are on the way out to userspace and the
+ * TIF_RESTORE_TM flag is set.  It checks if we need to reload
+ * FP and/or vector state and does so if necessary.
+ * If userspace is inside a transaction (whether active or
+ * suspended) and FP/VMX/VSX instructions have ever been enabled
+ * inside that transaction, then we have to keep them enabled
+ * and keep the FP/VMX/VSX state loaded while ever the transaction
+ * continues.  The reason is that if we didn't, and subsequently
+ * got a FP/VMX/VSX unavailable interrupt inside a transaction,
+ * we don't know whether it's the same transaction, and thus we
+ * don't know which of the checkpointed state and the transactional
+ * state to use.
+ */
+void restore_tm_state(struct pt_regs *regs)
+{
+	unsigned long msr_diff;
+
+	/*
+	 * This is the only moment we should clear TIF_RESTORE_TM as
+	 * it is here that ckpt_regs.msr and pt_regs.msr become the same
+	 * again, anything else could lead to an incorrect ckpt_msr being
+	 * saved and therefore incorrect signal contexts.
+	 */
+	clear_thread_flag(TIF_RESTORE_TM);
+	if (!MSR_TM_ACTIVE(regs->msr))
+		return;
+
+	msr_diff = current->thread.ckpt_regs.msr & ~regs->msr;
+	msr_diff &= MSR_FP | MSR_VEC | MSR_VSX;
+
+	/* Ensure that restore_math() will restore */
+	if (msr_diff & MSR_FP)
+		current->thread.load_fp = 1;
+#ifdef CONFIG_ALTIVEC
+	if (cpu_has_feature(CPU_FTR_ALTIVEC) && msr_diff & MSR_VEC)
+		current->thread.load_vec = 1;
+#endif
+	restore_math(regs);
+
+	regs_set_return_msr(regs, regs->msr | msr_diff);
+}
+
+#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */
+#define tm_recheckpoint_new_task(new)
+#define __switch_to_tm(prev, new)
+void tm_reclaim_current(uint8_t cause) {}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+static inline void save_sprs(struct thread_struct *t)
+{
+#ifdef CONFIG_ALTIVEC
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		t->vrsave = mfspr(SPRN_VRSAVE);
+#endif
+#ifdef CONFIG_SPE
+	if (cpu_has_feature(CPU_FTR_SPE))
+		t->spefscr = mfspr(SPRN_SPEFSCR);
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (cpu_has_feature(CPU_FTR_DSCR))
+		t->dscr = mfspr(SPRN_DSCR);
+
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		t->bescr = mfspr(SPRN_BESCR);
+		t->ebbhr = mfspr(SPRN_EBBHR);
+		t->ebbrr = mfspr(SPRN_EBBRR);
+
+		t->fscr = mfspr(SPRN_FSCR);
+
+		/*
+		 * Note that the TAR is not available for use in the kernel.
+		 * (To provide this, the TAR should be backed up/restored on
+		 * exception entry/exit instead, and be in pt_regs.  FIXME,
+		 * this should be in pt_regs anyway (for debug).)
+		 */
+		t->tar = mfspr(SPRN_TAR);
+	}
+
+	if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE))
+		t->hashkeyr = mfspr(SPRN_HASHKEYR);
+#endif
+}
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void kvmppc_save_user_regs(void)
+{
+	unsigned long usermsr;
+
+	if (!current->thread.regs)
+		return;
+
+	usermsr = current->thread.regs->msr;
+
+	/* Caller has enabled FP/VEC/VSX/TM in MSR */
+	if (usermsr & MSR_FP)
+		__giveup_fpu(current);
+	if (usermsr & MSR_VEC)
+		__giveup_altivec(current);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (usermsr & MSR_TM) {
+		current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+		current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+		current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+		current->thread.regs->msr &= ~MSR_TM;
+	}
+#endif
+}
+EXPORT_SYMBOL_GPL(kvmppc_save_user_regs);
+
+void kvmppc_save_current_sprs(void)
+{
+	save_sprs(&current->thread);
+}
+EXPORT_SYMBOL_GPL(kvmppc_save_current_sprs);
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
+static inline void restore_sprs(struct thread_struct *old_thread,
+				struct thread_struct *new_thread)
+{
+#ifdef CONFIG_ALTIVEC
+	if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
+	    old_thread->vrsave != new_thread->vrsave)
+		mtspr(SPRN_VRSAVE, new_thread->vrsave);
+#endif
+#ifdef CONFIG_SPE
+	if (cpu_has_feature(CPU_FTR_SPE) &&
+	    old_thread->spefscr != new_thread->spefscr)
+		mtspr(SPRN_SPEFSCR, new_thread->spefscr);
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (cpu_has_feature(CPU_FTR_DSCR)) {
+		u64 dscr = get_paca()->dscr_default;
+		if (new_thread->dscr_inherit)
+			dscr = new_thread->dscr;
+
+		if (old_thread->dscr != dscr)
+			mtspr(SPRN_DSCR, dscr);
+	}
+
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		if (old_thread->bescr != new_thread->bescr)
+			mtspr(SPRN_BESCR, new_thread->bescr);
+		if (old_thread->ebbhr != new_thread->ebbhr)
+			mtspr(SPRN_EBBHR, new_thread->ebbhr);
+		if (old_thread->ebbrr != new_thread->ebbrr)
+			mtspr(SPRN_EBBRR, new_thread->ebbrr);
+
+		if (old_thread->fscr != new_thread->fscr)
+			mtspr(SPRN_FSCR, new_thread->fscr);
+
+		if (old_thread->tar != new_thread->tar)
+			mtspr(SPRN_TAR, new_thread->tar);
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
+	    old_thread->tidr != new_thread->tidr)
+		mtspr(SPRN_TIDR, new_thread->tidr);
+
+	if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE) &&
+	    old_thread->hashkeyr != new_thread->hashkeyr)
+		mtspr(SPRN_HASHKEYR, new_thread->hashkeyr);
+#endif
+
+}
+
+struct task_struct *__switch_to(struct task_struct *prev,
+	struct task_struct *new)
+{
+	struct thread_struct *new_thread, *old_thread;
+	struct task_struct *last;
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	struct ppc64_tlb_batch *batch;
+#endif
+
+	new_thread = &new->thread;
+	old_thread = &current->thread;
+
+	WARN_ON(!irqs_disabled());
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	batch = this_cpu_ptr(&ppc64_tlb_batch);
+	if (batch->active) {
+		current_thread_info()->local_flags |= _TLF_LAZY_MMU;
+		if (batch->index)
+			__flush_tlb_pending(batch);
+		batch->active = 0;
+	}
+
+	/*
+	 * On POWER9 the copy-paste buffer can only paste into
+	 * foreign real addresses, so unprivileged processes can not
+	 * see the data or use it in any way unless they have
+	 * foreign real mappings. If the new process has the foreign
+	 * real address mappings, we must issue a cp_abort to clear
+	 * any state and prevent snooping, corruption or a covert
+	 * channel. ISA v3.1 supports paste into local memory.
+	 */
+	if (new->mm && (cpu_has_feature(CPU_FTR_ARCH_31) ||
+			atomic_read(&new->mm->context.vas_windows)))
+		asm volatile(PPC_CP_ABORT);
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+	switch_booke_debug_regs(&new->thread.debug);
+#else
+/*
+ * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would
+ * schedule DABR
+ */
+#ifndef CONFIG_HAVE_HW_BREAKPOINT
+	switch_hw_breakpoint(new);
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+#endif
+
+	/*
+	 * We need to save SPRs before treclaim/trecheckpoint as these will
+	 * change a number of them.
+	 */
+	save_sprs(&prev->thread);
+
+	/* Save FPU, Altivec, VSX and SPE state */
+	giveup_all(prev);
+
+	__switch_to_tm(prev, new);
+
+	if (!radix_enabled()) {
+		/*
+		 * We can't take a PMU exception inside _switch() since there
+		 * is a window where the kernel stack SLB and the kernel stack
+		 * are out of sync. Hard disable here.
+		 */
+		hard_irq_disable();
+	}
+
+	/*
+	 * Call restore_sprs() and set_return_regs_changed() before calling
+	 * _switch(). If we move it after _switch() then we miss out on calling
+	 * it for new tasks. The reason for this is we manually create a stack
+	 * frame for new tasks that directly returns through ret_from_fork() or
+	 * ret_from_kernel_thread(). See copy_thread() for details.
+	 */
+	restore_sprs(old_thread, new_thread);
+
+	set_return_regs_changed(); /* _switch changes stack (and regs) */
+
+	if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+		kuap_assert_locked();
+
+	last = _switch(old_thread, new_thread);
+
+	/*
+	 * Nothing after _switch will be run for newly created tasks,
+	 * because they switch directly to ret_from_fork/ret_from_kernel_thread
+	 * etc. Code added here should have a comment explaining why that is
+	 * okay.
+	 */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	/*
+	 * This applies to a process that was context switched while inside
+	 * arch_enter_lazy_mmu_mode(), to re-activate the batch that was
+	 * deactivated above, before _switch(). This will never be the case
+	 * for new tasks.
+	 */
+	if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
+		current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
+		batch = this_cpu_ptr(&ppc64_tlb_batch);
+		batch->active = 1;
+	}
+#endif
+
+	/*
+	 * Math facilities are masked out of the child MSR in copy_thread.
+	 * A new task does not need to restore_math because it will
+	 * demand fault them.
+	 */
+	if (current->thread.regs)
+		restore_math(current->thread.regs);
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+	return last;
+}
+
+#define NR_INSN_TO_PRINT	16
+
+static void show_instructions(struct pt_regs *regs)
+{
+	int i;
+	unsigned long nip = regs->nip;
+	unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
+
+	printk("Code: ");
+
+	/*
+	 * If we were executing with the MMU off for instructions, adjust pc
+	 * rather than printing XXXXXXXX.
+	 */
+	if (!IS_ENABLED(CONFIG_BOOKE) && !(regs->msr & MSR_IR)) {
+		pc = (unsigned long)phys_to_virt(pc);
+		nip = (unsigned long)phys_to_virt(regs->nip);
+	}
+
+	for (i = 0; i < NR_INSN_TO_PRINT; i++) {
+		int instr;
+
+		if (get_kernel_nofault(instr, (const void *)pc)) {
+			pr_cont("XXXXXXXX ");
+		} else {
+			if (nip == pc)
+				pr_cont("<%08x> ", instr);
+			else
+				pr_cont("%08x ", instr);
+		}
+
+		pc += sizeof(int);
+	}
+
+	pr_cont("\n");
+}
+
+void show_user_instructions(struct pt_regs *regs)
+{
+	unsigned long pc;
+	int n = NR_INSN_TO_PRINT;
+	struct seq_buf s;
+	char buf[96]; /* enough for 8 times 9 + 2 chars */
+
+	pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
+
+	seq_buf_init(&s, buf, sizeof(buf));
+
+	while (n) {
+		int i;
+
+		seq_buf_clear(&s);
+
+		for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) {
+			int instr;
+
+			if (copy_from_user_nofault(&instr, (void __user *)pc,
+					sizeof(instr))) {
+				seq_buf_printf(&s, "XXXXXXXX ");
+				continue;
+			}
+			seq_buf_printf(&s, regs->nip == pc ? "<%08x> " : "%08x ", instr);
+		}
+
+		if (!seq_buf_has_overflowed(&s))
+			pr_info("%s[%d]: code: %s\n", current->comm,
+				current->pid, s.buffer);
+	}
+}
+
+struct regbit {
+	unsigned long bit;
+	const char *name;
+};
+
+static struct regbit msr_bits[] = {
+#if defined(CONFIG_PPC64) && !defined(CONFIG_BOOKE)
+	{MSR_SF,	"SF"},
+	{MSR_HV,	"HV"},
+#endif
+	{MSR_VEC,	"VEC"},
+	{MSR_VSX,	"VSX"},
+#ifdef CONFIG_BOOKE
+	{MSR_CE,	"CE"},
+#endif
+	{MSR_EE,	"EE"},
+	{MSR_PR,	"PR"},
+	{MSR_FP,	"FP"},
+	{MSR_ME,	"ME"},
+#ifdef CONFIG_BOOKE
+	{MSR_DE,	"DE"},
+#else
+	{MSR_SE,	"SE"},
+	{MSR_BE,	"BE"},
+#endif
+	{MSR_IR,	"IR"},
+	{MSR_DR,	"DR"},
+	{MSR_PMM,	"PMM"},
+#ifndef CONFIG_BOOKE
+	{MSR_RI,	"RI"},
+	{MSR_LE,	"LE"},
+#endif
+	{0,		NULL}
+};
+
+static void print_bits(unsigned long val, struct regbit *bits, const char *sep)
+{
+	const char *s = "";
+
+	for (; bits->bit; ++bits)
+		if (val & bits->bit) {
+			pr_cont("%s%s", s, bits->name);
+			s = sep;
+		}
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static struct regbit msr_tm_bits[] = {
+	{MSR_TS_T,	"T"},
+	{MSR_TS_S,	"S"},
+	{MSR_TM,	"E"},
+	{0,		NULL}
+};
+
+static void print_tm_bits(unsigned long val)
+{
+/*
+ * This only prints something if at least one of the TM bit is set.
+ * Inside the TM[], the output means:
+ *   E: Enabled		(bit 32)
+ *   S: Suspended	(bit 33)
+ *   T: Transactional	(bit 34)
+ */
+	if (val & (MSR_TM | MSR_TS_S | MSR_TS_T)) {
+		pr_cont(",TM[");
+		print_bits(val, msr_tm_bits, "");
+		pr_cont("]");
+	}
+}
+#else
+static void print_tm_bits(unsigned long val) {}
+#endif
+
+static void print_msr_bits(unsigned long val)
+{
+	pr_cont("<");
+	print_bits(val, msr_bits, ",");
+	print_tm_bits(val);
+	pr_cont(">");
+}
+
+#ifdef CONFIG_PPC64
+#define REG		"%016lx"
+#define REGS_PER_LINE	4
+#else
+#define REG		"%08lx"
+#define REGS_PER_LINE	8
+#endif
+
+static void __show_regs(struct pt_regs *regs)
+{
+	int i, trap;
+
+	printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",
+	       regs->nip, regs->link, regs->ctr);
+	printk("REGS: %px TRAP: %04lx   %s  (%s)\n",
+	       regs, regs->trap, print_tainted(), init_utsname()->release);
+	printk("MSR:  "REG" ", regs->msr);
+	print_msr_bits(regs->msr);
+	pr_cont("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
+	trap = TRAP(regs);
+	if (!trap_is_syscall(regs) && cpu_has_feature(CPU_FTR_CFAR))
+		pr_cont("CFAR: "REG" ", regs->orig_gpr3);
+	if (trap == INTERRUPT_MACHINE_CHECK ||
+	    trap == INTERRUPT_DATA_STORAGE ||
+	    trap == INTERRUPT_ALIGNMENT) {
+		if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE))
+			pr_cont("DEAR: "REG" ESR: "REG" ", regs->dear, regs->esr);
+		else
+			pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
+	}
+
+#ifdef CONFIG_PPC64
+	pr_cont("IRQMASK: %lx ", regs->softe);
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (MSR_TM_ACTIVE(regs->msr))
+		pr_cont("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);
+#endif
+
+	for (i = 0;  i < 32;  i++) {
+		if ((i % REGS_PER_LINE) == 0)
+			pr_cont("\nGPR%02d: ", i);
+		pr_cont(REG " ", regs->gpr[i]);
+	}
+	pr_cont("\n");
+	/*
+	 * Lookup NIP late so we have the best change of getting the
+	 * above info out without failing
+	 */
+	if (IS_ENABLED(CONFIG_KALLSYMS)) {
+		printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
+		printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
+	}
+}
+
+void show_regs(struct pt_regs *regs)
+{
+	show_regs_print_info(KERN_DEFAULT);
+	__show_regs(regs);
+	show_stack(current, (unsigned long *) regs->gpr[1], KERN_DEFAULT);
+	if (!user_mode(regs))
+		show_instructions(regs);
+}
+
+void flush_thread(void)
+{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	flush_ptrace_hw_breakpoint(current);
+#else /* CONFIG_HAVE_HW_BREAKPOINT */
+	set_debug_reg_defaults(&current->thread);
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+}
+
+void arch_setup_new_exec(void)
+{
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (!radix_enabled())
+		hash__setup_new_exec();
+#endif
+	/*
+	 * If we exec out of a kernel thread then thread.regs will not be
+	 * set.  Do it now.
+	 */
+	if (!current->thread.regs) {
+		struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE;
+		current->thread.regs = regs - 1;
+	}
+
+#ifdef CONFIG_PPC_MEM_KEYS
+	current->thread.regs->amr  = default_amr;
+	current->thread.regs->iamr  = default_iamr;
+#endif
+}
+
+#ifdef CONFIG_PPC64
+/*
+ * Assign a TIDR (thread ID) for task @t and set it in the thread
+ * structure. For now, we only support setting TIDR for 'current' task.
+ *
+ * Since the TID value is a truncated form of it PID, it is possible
+ * (but unlikely) for 2 threads to have the same TID. In the unlikely event
+ * that 2 threads share the same TID and are waiting, one of the following
+ * cases will happen:
+ *
+ * 1. The correct thread is running, the wrong thread is not
+ * In this situation, the correct thread is woken and proceeds to pass it's
+ * condition check.
+ *
+ * 2. Neither threads are running
+ * In this situation, neither thread will be woken. When scheduled, the waiting
+ * threads will execute either a wait, which will return immediately, followed
+ * by a condition check, which will pass for the correct thread and fail
+ * for the wrong thread, or they will execute the condition check immediately.
+ *
+ * 3. The wrong thread is running, the correct thread is not
+ * The wrong thread will be woken, but will fail it's condition check and
+ * re-execute wait. The correct thread, when scheduled, will execute either
+ * it's condition check (which will pass), or wait, which returns immediately
+ * when called the first time after the thread is scheduled, followed by it's
+ * condition check (which will pass).
+ *
+ * 4. Both threads are running
+ * Both threads will be woken. The wrong thread will fail it's condition check
+ * and execute another wait, while the correct thread will pass it's condition
+ * check.
+ *
+ * @t: the task to set the thread ID for
+ */
+int set_thread_tidr(struct task_struct *t)
+{
+	if (!cpu_has_feature(CPU_FTR_P9_TIDR))
+		return -EINVAL;
+
+	if (t != current)
+		return -EINVAL;
+
+	if (t->thread.tidr)
+		return 0;
+
+	t->thread.tidr = (u16)task_pid_nr(t);
+	mtspr(SPRN_TIDR, t->thread.tidr);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(set_thread_tidr);
+
+#endif /* CONFIG_PPC64 */
+
+/*
+ * this gets called so that we can store coprocessor state into memory and
+ * copy the current task into the new thread.
+ */
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+{
+	flush_all_to_thread(src);
+	/*
+	 * Flush TM state out so we can copy it.  __switch_to_tm() does this
+	 * flush but it removes the checkpointed state from the current CPU and
+	 * transitions the CPU out of TM mode.  Hence we need to call
+	 * tm_recheckpoint_new_task() (on the same task) to restore the
+	 * checkpointed state back and the TM mode.
+	 *
+	 * Can't pass dst because it isn't ready. Doesn't matter, passing
+	 * dst is only important for __switch_to()
+	 */
+	__switch_to_tm(src, src);
+
+	*dst = *src;
+
+	clear_task_ebb(dst);
+
+	return 0;
+}
+
+static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
+{
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	unsigned long sp_vsid;
+	unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
+
+	if (radix_enabled())
+		return;
+
+	if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+		sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T)
+			<< SLB_VSID_SHIFT_1T;
+	else
+		sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_256M)
+			<< SLB_VSID_SHIFT;
+	sp_vsid |= SLB_VSID_KERNEL | llp;
+	p->thread.ksp_vsid = sp_vsid;
+#endif
+}
+
+/*
+ * Copy a thread..
+ */
+
+/*
+ * Copy architecture-specific thread state
+ */
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
+{
+	struct pt_regs *kregs; /* Switch frame regs */
+	extern void ret_from_fork(void);
+	extern void ret_from_fork_scv(void);
+	extern void ret_from_kernel_user_thread(void);
+	extern void start_kernel_thread(void);
+	void (*f)(void);
+	unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	int i;
+#endif
+
+	klp_init_thread_info(p);
+
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		/* kernel thread */
+
+		/* Create initial minimum stack frame. */
+		sp -= STACK_FRAME_MIN_SIZE;
+		((unsigned long *)sp)[0] = 0;
+
+		f = start_kernel_thread;
+		p->thread.regs = NULL;	/* no user register state */
+		clear_tsk_compat_task(p);
+	} else {
+		/* user thread */
+		struct pt_regs *childregs;
+
+		/* Create initial user return stack frame. */
+		sp -= STACK_USER_INT_FRAME_SIZE;
+		*(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER;
+
+		childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS);
+
+		if (unlikely(args->fn)) {
+			/*
+			 * A user space thread, but it first runs a kernel
+			 * thread, and then returns as though it had called
+			 * execve rather than fork, so user regs will be
+			 * filled in (e.g., by kernel_execve()).
+			 */
+			((unsigned long *)sp)[0] = 0;
+			memset(childregs, 0, sizeof(struct pt_regs));
+#ifdef CONFIG_PPC64
+			childregs->softe = IRQS_ENABLED;
+#endif
+			f = ret_from_kernel_user_thread;
+		} else {
+			struct pt_regs *regs = current_pt_regs();
+			unsigned long clone_flags = args->flags;
+			unsigned long usp = args->stack;
+
+			/* Copy registers */
+			*childregs = *regs;
+			if (usp)
+				childregs->gpr[1] = usp;
+			((unsigned long *)sp)[0] = childregs->gpr[1];
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+			WARN_ON_ONCE(childregs->softe != IRQS_ENABLED);
+#endif
+			if (clone_flags & CLONE_SETTLS) {
+				unsigned long tls = args->tls;
+
+				if (!is_32bit_task())
+					childregs->gpr[13] = tls;
+				else
+					childregs->gpr[2] = tls;
+			}
+
+			if (trap_is_scv(regs))
+				f = ret_from_fork_scv;
+			else
+				f = ret_from_fork;
+		}
+
+		childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
+		p->thread.regs = childregs;
+	}
+
+	/*
+	 * The way this works is that at some point in the future
+	 * some task will call _switch to switch to the new task.
+	 * That will pop off the stack frame created below and start
+	 * the new task running at ret_from_fork.  The new task will
+	 * do some house keeping and then return from the fork or clone
+	 * system call, using the stack frame created above.
+	 */
+	((unsigned long *)sp)[STACK_FRAME_LR_SAVE] = (unsigned long)f;
+	sp -= STACK_SWITCH_FRAME_SIZE;
+	((unsigned long *)sp)[0] = sp + STACK_SWITCH_FRAME_SIZE;
+	kregs = (struct pt_regs *)(sp + STACK_SWITCH_FRAME_REGS);
+	kregs->nip = ppc_function_entry(f);
+	if (unlikely(args->fn)) {
+		/*
+		 * Put kthread fn, arg parameters in non-volatile GPRs in the
+		 * switch frame so they are loaded by _switch before it returns
+		 * to ret_from_kernel_thread.
+		 */
+		kregs->gpr[14] = ppc_function_entry((void *)args->fn);
+		kregs->gpr[15] = (unsigned long)args->fn_arg;
+	}
+	p->thread.ksp = sp;
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	for (i = 0; i < nr_wp_slots(); i++)
+		p->thread.ptrace_bps[i] = NULL;
+#endif
+
+#ifdef CONFIG_PPC_FPU_REGS
+	p->thread.fp_save_area = NULL;
+#endif
+#ifdef CONFIG_ALTIVEC
+	p->thread.vr_save_area = NULL;
+#endif
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
+	p->thread.kuap = KUAP_NONE;
+#endif
+#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP)
+	p->thread.pid = MMU_NO_CONTEXT;
+#endif
+
+	setup_ksp_vsid(p, sp);
+
+#ifdef CONFIG_PPC64 
+	if (cpu_has_feature(CPU_FTR_DSCR)) {
+		p->thread.dscr_inherit = current->thread.dscr_inherit;
+		p->thread.dscr = mfspr(SPRN_DSCR);
+	}
+
+	p->thread.tidr = 0;
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE))
+		p->thread.hashkeyr = current->thread.hashkeyr;
+#endif
+	return 0;
+}
+
+void preload_new_slb_context(unsigned long start, unsigned long sp);
+
+/*
+ * Set up a thread for executing a new program
+ */
+void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
+{
+#ifdef CONFIG_PPC64
+	unsigned long load_addr = regs->gpr[2];	/* saved by ELF_PLAT_INIT */
+
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled())
+		preload_new_slb_context(start, sp);
+#endif
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/*
+	 * Clear any transactional state, we're exec()ing. The cause is
+	 * not important as there will never be a recheckpoint so it's not
+	 * user visible.
+	 */
+	if (MSR_TM_SUSPENDED(mfmsr()))
+		tm_reclaim_current(0);
+#endif
+
+	memset(&regs->gpr[1], 0, sizeof(regs->gpr) - sizeof(regs->gpr[0]));
+	regs->ctr = 0;
+	regs->link = 0;
+	regs->xer = 0;
+	regs->ccr = 0;
+	regs->gpr[1] = sp;
+
+#ifdef CONFIG_PPC32
+	regs->mq = 0;
+	regs->nip = start;
+	regs->msr = MSR_USER;
+#else
+	if (!is_32bit_task()) {
+		unsigned long entry;
+
+		if (is_elf2_task()) {
+			/* Look ma, no function descriptors! */
+			entry = start;
+
+			/*
+			 * Ulrich says:
+			 *   The latest iteration of the ABI requires that when
+			 *   calling a function (at its global entry point),
+			 *   the caller must ensure r12 holds the entry point
+			 *   address (so that the function can quickly
+			 *   establish addressability).
+			 */
+			regs->gpr[12] = start;
+			/* Make sure that's restored on entry to userspace. */
+			set_thread_flag(TIF_RESTOREALL);
+		} else {
+			unsigned long toc;
+
+			/* start is a relocated pointer to the function
+			 * descriptor for the elf _start routine.  The first
+			 * entry in the function descriptor is the entry
+			 * address of _start and the second entry is the TOC
+			 * value we need to use.
+			 */
+			__get_user(entry, (unsigned long __user *)start);
+			__get_user(toc, (unsigned long __user *)start+1);
+
+			/* Check whether the e_entry function descriptor entries
+			 * need to be relocated before we can use them.
+			 */
+			if (load_addr != 0) {
+				entry += load_addr;
+				toc   += load_addr;
+			}
+			regs->gpr[2] = toc;
+		}
+		regs_set_return_ip(regs, entry);
+		regs_set_return_msr(regs, MSR_USER64);
+	} else {
+		regs->gpr[2] = 0;
+		regs_set_return_ip(regs, start);
+		regs_set_return_msr(regs, MSR_USER32);
+	}
+
+#endif
+#ifdef CONFIG_VSX
+	current->thread.used_vsr = 0;
+#endif
+	current->thread.load_slb = 0;
+	current->thread.load_fp = 0;
+#ifdef CONFIG_PPC_FPU_REGS
+	memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
+	current->thread.fp_save_area = NULL;
+#endif
+#ifdef CONFIG_ALTIVEC
+	memset(&current->thread.vr_state, 0, sizeof(current->thread.vr_state));
+	current->thread.vr_state.vscr.u[3] = 0x00010000; /* Java mode disabled */
+	current->thread.vr_save_area = NULL;
+	current->thread.vrsave = 0;
+	current->thread.used_vr = 0;
+	current->thread.load_vec = 0;
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_SPE
+	memset(current->thread.evr, 0, sizeof(current->thread.evr));
+	current->thread.acc = 0;
+	current->thread.spefscr = 0;
+	current->thread.used_spe = 0;
+#endif /* CONFIG_SPE */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	current->thread.tm_tfhar = 0;
+	current->thread.tm_texasr = 0;
+	current->thread.tm_tfiar = 0;
+	current->thread.load_tm = 0;
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) {
+		current->thread.hashkeyr = get_random_long();
+		mtspr(SPRN_HASHKEYR, current->thread.hashkeyr);
+	}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+}
+EXPORT_SYMBOL(start_thread);
+
+#define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \
+		| PR_FP_EXC_RES | PR_FP_EXC_INV)
+
+int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
+{
+	struct pt_regs *regs = tsk->thread.regs;
+
+	/* This is a bit hairy.  If we are an SPE enabled  processor
+	 * (have embedded fp) we store the IEEE exception enable flags in
+	 * fpexc_mode.  fpexc_mode is also used for setting FP exception
+	 * mode (asyn, precise, disabled) for 'Classic' FP. */
+	if (val & PR_FP_EXC_SW_ENABLE) {
+		if (cpu_has_feature(CPU_FTR_SPE)) {
+			/*
+			 * When the sticky exception bits are set
+			 * directly by userspace, it must call prctl
+			 * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE
+			 * in the existing prctl settings) or
+			 * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in
+			 * the bits being set).  <fenv.h> functions
+			 * saving and restoring the whole
+			 * floating-point environment need to do so
+			 * anyway to restore the prctl settings from
+			 * the saved environment.
+			 */
+#ifdef CONFIG_SPE
+			tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
+			tsk->thread.fpexc_mode = val &
+				(PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);
+#endif
+			return 0;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	/* on a CONFIG_SPE this does not hurt us.  The bits that
+	 * __pack_fe01 use do not overlap with bits used for
+	 * PR_FP_EXC_SW_ENABLE.  Additionally, the MSR[FE0,FE1] bits
+	 * on CONFIG_SPE implementations are reserved so writing to
+	 * them does not change anything */
+	if (val > PR_FP_EXC_PRECISE)
+		return -EINVAL;
+	tsk->thread.fpexc_mode = __pack_fe01(val);
+	if (regs != NULL && (regs->msr & MSR_FP) != 0) {
+		regs_set_return_msr(regs, (regs->msr & ~(MSR_FE0|MSR_FE1))
+						| tsk->thread.fpexc_mode);
+	}
+	return 0;
+}
+
+int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
+{
+	unsigned int val = 0;
+
+	if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) {
+		if (cpu_has_feature(CPU_FTR_SPE)) {
+			/*
+			 * When the sticky exception bits are set
+			 * directly by userspace, it must call prctl
+			 * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE
+			 * in the existing prctl settings) or
+			 * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in
+			 * the bits being set).  <fenv.h> functions
+			 * saving and restoring the whole
+			 * floating-point environment need to do so
+			 * anyway to restore the prctl settings from
+			 * the saved environment.
+			 */
+#ifdef CONFIG_SPE
+			tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
+			val = tsk->thread.fpexc_mode;
+#endif
+		} else
+			return -EINVAL;
+	} else {
+		val = __unpack_fe01(tsk->thread.fpexc_mode);
+	}
+	return put_user(val, (unsigned int __user *) adr);
+}
+
+int set_endian(struct task_struct *tsk, unsigned int val)
+{
+	struct pt_regs *regs = tsk->thread.regs;
+
+	if ((val == PR_ENDIAN_LITTLE && !cpu_has_feature(CPU_FTR_REAL_LE)) ||
+	    (val == PR_ENDIAN_PPC_LITTLE && !cpu_has_feature(CPU_FTR_PPC_LE)))
+		return -EINVAL;
+
+	if (regs == NULL)
+		return -EINVAL;
+
+	if (val == PR_ENDIAN_BIG)
+		regs_set_return_msr(regs, regs->msr & ~MSR_LE);
+	else if (val == PR_ENDIAN_LITTLE || val == PR_ENDIAN_PPC_LITTLE)
+		regs_set_return_msr(regs, regs->msr | MSR_LE);
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+int get_endian(struct task_struct *tsk, unsigned long adr)
+{
+	struct pt_regs *regs = tsk->thread.regs;
+	unsigned int val;
+
+	if (!cpu_has_feature(CPU_FTR_PPC_LE) &&
+	    !cpu_has_feature(CPU_FTR_REAL_LE))
+		return -EINVAL;
+
+	if (regs == NULL)
+		return -EINVAL;
+
+	if (regs->msr & MSR_LE) {
+		if (cpu_has_feature(CPU_FTR_REAL_LE))
+			val = PR_ENDIAN_LITTLE;
+		else
+			val = PR_ENDIAN_PPC_LITTLE;
+	} else
+		val = PR_ENDIAN_BIG;
+
+	return put_user(val, (unsigned int __user *)adr);
+}
+
+int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
+{
+	tsk->thread.align_ctl = val;
+	return 0;
+}
+
+int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
+{
+	return put_user(tsk->thread.align_ctl, (unsigned int __user *)adr);
+}
+
+static inline int valid_irq_stack(unsigned long sp, struct task_struct *p,
+				  unsigned long nbytes)
+{
+	unsigned long stack_page;
+	unsigned long cpu = task_cpu(p);
+
+	if (!hardirq_ctx[cpu] || !softirq_ctx[cpu])
+		return 0;
+
+	stack_page = (unsigned long)hardirq_ctx[cpu];
+	if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+		return 1;
+
+	stack_page = (unsigned long)softirq_ctx[cpu];
+	if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+		return 1;
+
+	return 0;
+}
+
+static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p,
+					unsigned long nbytes)
+{
+#ifdef CONFIG_PPC64
+	unsigned long stack_page;
+	unsigned long cpu = task_cpu(p);
+
+	if (!paca_ptrs)
+		return 0;
+
+	if (!paca_ptrs[cpu]->emergency_sp)
+		return 0;
+
+# ifdef CONFIG_PPC_BOOK3S_64
+	if (!paca_ptrs[cpu]->nmi_emergency_sp || !paca_ptrs[cpu]->mc_emergency_sp)
+		return 0;
+#endif
+
+	stack_page = (unsigned long)paca_ptrs[cpu]->emergency_sp - THREAD_SIZE;
+	if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+		return 1;
+
+# ifdef CONFIG_PPC_BOOK3S_64
+	stack_page = (unsigned long)paca_ptrs[cpu]->nmi_emergency_sp - THREAD_SIZE;
+	if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+		return 1;
+
+	stack_page = (unsigned long)paca_ptrs[cpu]->mc_emergency_sp - THREAD_SIZE;
+	if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+		return 1;
+# endif
+#endif
+
+	return 0;
+}
+
+/*
+ * validate the stack frame of a particular minimum size, used for when we are
+ * looking at a certain object in the stack beyond the minimum.
+ */
+int validate_sp_size(unsigned long sp, struct task_struct *p,
+		     unsigned long nbytes)
+{
+	unsigned long stack_page = (unsigned long)task_stack_page(p);
+
+	if (sp < THREAD_SIZE)
+		return 0;
+
+	if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+		return 1;
+
+	if (valid_irq_stack(sp, p, nbytes))
+		return 1;
+
+	return valid_emergency_stack(sp, p, nbytes);
+}
+
+int validate_sp(unsigned long sp, struct task_struct *p)
+{
+	return validate_sp_size(sp, p, STACK_FRAME_MIN_SIZE);
+}
+
+static unsigned long ___get_wchan(struct task_struct *p)
+{
+	unsigned long ip, sp;
+	int count = 0;
+
+	sp = p->thread.ksp;
+	if (!validate_sp(sp, p))
+		return 0;
+
+	do {
+		sp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
+		if (!validate_sp(sp, p) || task_is_running(p))
+			return 0;
+		if (count > 0) {
+			ip = READ_ONCE_NOCHECK(((unsigned long *)sp)[STACK_FRAME_LR_SAVE]);
+			if (!in_sched_functions(ip))
+				return ip;
+		}
+	} while (count++ < 16);
+	return 0;
+}
+
+unsigned long __get_wchan(struct task_struct *p)
+{
+	unsigned long ret;
+
+	if (!try_get_task_stack(p))
+		return 0;
+
+	ret = ___get_wchan(p);
+
+	put_task_stack(p);
+
+	return ret;
+}
+
+static bool empty_user_regs(struct pt_regs *regs, struct task_struct *tsk)
+{
+	unsigned long stack_page;
+
+	// A non-empty pt_regs should never have a zero MSR or TRAP value.
+	if (regs->msr || regs->trap)
+		return false;
+
+	// Check it sits at the very base of the stack
+	stack_page = (unsigned long)task_stack_page(tsk);
+	if ((unsigned long)(regs + 1) != stack_page + THREAD_SIZE)
+		return false;
+
+	return true;
+}
+
+static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH;
+
+void __no_sanitize_address show_stack(struct task_struct *tsk,
+				      unsigned long *stack,
+				      const char *loglvl)
+{
+	unsigned long sp, ip, lr, newsp;
+	int count = 0;
+	int firstframe = 1;
+	unsigned long ret_addr;
+	int ftrace_idx = 0;
+
+	if (tsk == NULL)
+		tsk = current;
+
+	if (!try_get_task_stack(tsk))
+		return;
+
+	sp = (unsigned long) stack;
+	if (sp == 0) {
+		if (tsk == current)
+			sp = current_stack_frame();
+		else
+			sp = tsk->thread.ksp;
+	}
+
+	lr = 0;
+	printk("%sCall Trace:\n", loglvl);
+	do {
+		if (!validate_sp(sp, tsk))
+			break;
+
+		stack = (unsigned long *) sp;
+		newsp = stack[0];
+		ip = stack[STACK_FRAME_LR_SAVE];
+		if (!firstframe || ip != lr) {
+			printk("%s["REG"] ["REG"] %pS",
+				loglvl, sp, ip, (void *)ip);
+			ret_addr = ftrace_graph_ret_addr(current,
+						&ftrace_idx, ip, stack);
+			if (ret_addr != ip)
+				pr_cont(" (%pS)", (void *)ret_addr);
+			if (firstframe)
+				pr_cont(" (unreliable)");
+			pr_cont("\n");
+		}
+		firstframe = 0;
+
+		/*
+		 * See if this is an exception frame.
+		 * We look for the "regs" marker in the current frame.
+		 *
+		 * STACK_SWITCH_FRAME_SIZE being the smallest frame that
+		 * could hold a pt_regs, if that does not fit then it can't
+		 * have regs.
+		 */
+		if (validate_sp_size(sp, tsk, STACK_SWITCH_FRAME_SIZE)
+		    && stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
+			struct pt_regs *regs = (struct pt_regs *)
+				(sp + STACK_INT_FRAME_REGS);
+
+			lr = regs->link;
+			printk("%s--- interrupt: %lx at %pS\n",
+			       loglvl, regs->trap, (void *)regs->nip);
+
+			// Detect the case of an empty pt_regs at the very base
+			// of the stack and suppress showing it in full.
+			if (!empty_user_regs(regs, tsk)) {
+				__show_regs(regs);
+				printk("%s--- interrupt: %lx\n", loglvl, regs->trap);
+			}
+
+			firstframe = 1;
+		}
+
+		sp = newsp;
+	} while (count++ < kstack_depth_to_print);
+
+	put_task_stack(tsk);
+}
+
+#ifdef CONFIG_PPC64
+/* Called with hard IRQs off */
+void notrace __ppc64_runlatch_on(void)
+{
+	struct thread_info *ti = current_thread_info();
+
+	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+		/*
+		 * Least significant bit (RUN) is the only writable bit of
+		 * the CTRL register, so we can avoid mfspr. 2.06 is not the
+		 * earliest ISA where this is the case, but it's convenient.
+		 */
+		mtspr(SPRN_CTRLT, CTRL_RUNLATCH);
+	} else {
+		unsigned long ctrl;
+
+		/*
+		 * Some architectures (e.g., Cell) have writable fields other
+		 * than RUN, so do the read-modify-write.
+		 */
+		ctrl = mfspr(SPRN_CTRLF);
+		ctrl |= CTRL_RUNLATCH;
+		mtspr(SPRN_CTRLT, ctrl);
+	}
+
+	ti->local_flags |= _TLF_RUNLATCH;
+}
+
+/* Called with hard IRQs off */
+void notrace __ppc64_runlatch_off(void)
+{
+	struct thread_info *ti = current_thread_info();
+
+	ti->local_flags &= ~_TLF_RUNLATCH;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+		mtspr(SPRN_CTRLT, 0);
+	} else {
+		unsigned long ctrl;
+
+		ctrl = mfspr(SPRN_CTRLF);
+		ctrl &= ~CTRL_RUNLATCH;
+		mtspr(SPRN_CTRLT, ctrl);
+	}
+}
+#endif /* CONFIG_PPC64 */
+
+unsigned long arch_align_stack(unsigned long sp)
+{
+	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+		sp -= get_random_u32_below(PAGE_SIZE);
+	return sp & ~0xf;
+}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
new file mode 100644
index 0000000000..0b5878c312
--- /dev/null
+++ b/arch/powerpc/kernel/prom.c
@@ -0,0 +1,1006 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Procedures for creating, accessing and interpreting the device tree.
+ *
+ * Paul Mackerras	August 1996.
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ * 
+ *  Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner.
+ *    {engebret|bergner}@us.ibm.com 
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/threads.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/initrd.h>
+#include <linux/bitops.h>
+#include <linux/export.h>
+#include <linux/kexec.h>
+#include <linux/irq.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <linux/cpu.h>
+#include <linux/pgtable.h>
+#include <linux/seq_buf.h>
+
+#include <asm/rtas.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/kdump.h>
+#include <asm/smp.h>
+#include <asm/mmu.h>
+#include <asm/paca.h>
+#include <asm/powernv.h>
+#include <asm/iommu.h>
+#include <asm/btext.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/pci-bridge.h>
+#include <asm/kexec.h>
+#include <asm/opal.h>
+#include <asm/fadump.h>
+#include <asm/epapr_hcalls.h>
+#include <asm/firmware.h>
+#include <asm/dt_cpu_ftrs.h>
+#include <asm/drmem.h>
+#include <asm/ultravisor.h>
+#include <asm/prom.h>
+#include <asm/plpks.h>
+
+#include <mm/mmu_decl.h>
+
+#ifdef DEBUG
+#define DBG(fmt...) printk(KERN_ERR fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+int *chip_id_lookup_table;
+
+#ifdef CONFIG_PPC64
+int __initdata iommu_is_off;
+int __initdata iommu_force_on;
+unsigned long tce_alloc_start, tce_alloc_end;
+u64 ppc64_rma_size;
+unsigned int boot_cpu_node_count __ro_after_init;
+#endif
+static phys_addr_t first_memblock_size;
+static int __initdata boot_cpu_count;
+
+static int __init early_parse_mem(char *p)
+{
+	if (!p)
+		return 1;
+
+	memory_limit = PAGE_ALIGN(memparse(p, &p));
+	DBG("memory limit = 0x%llx\n", memory_limit);
+
+	return 0;
+}
+early_param("mem", early_parse_mem);
+
+/*
+ * overlaps_initrd - check for overlap with page aligned extension of
+ * initrd.
+ */
+static inline int overlaps_initrd(unsigned long start, unsigned long size)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (!initrd_start)
+		return 0;
+
+	return	(start + size) > ALIGN_DOWN(initrd_start, PAGE_SIZE) &&
+			start <= ALIGN(initrd_end, PAGE_SIZE);
+#else
+	return 0;
+#endif
+}
+
+/**
+ * move_device_tree - move tree to an unused area, if needed.
+ *
+ * The device tree may be allocated beyond our memory limit, or inside the
+ * crash kernel region for kdump, or within the page aligned range of initrd.
+ * If so, move it out of the way.
+ */
+static void __init move_device_tree(void)
+{
+	unsigned long start, size;
+	void *p;
+
+	DBG("-> move_device_tree\n");
+
+	start = __pa(initial_boot_params);
+	size = fdt_totalsize(initial_boot_params);
+
+	if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) ||
+	    !memblock_is_memory(start + size - 1) ||
+	    overlaps_crashkernel(start, size) || overlaps_initrd(start, size)) {
+		p = memblock_alloc_raw(size, PAGE_SIZE);
+		if (!p)
+			panic("Failed to allocate %lu bytes to move device tree\n",
+			      size);
+		memcpy(p, initial_boot_params, size);
+		initial_boot_params = p;
+		DBG("Moved device tree to 0x%px\n", p);
+	}
+
+	DBG("<- move_device_tree\n");
+}
+
+/*
+ * ibm,pa/pi-features is a per-cpu property that contains a string of
+ * attribute descriptors, each of which has a 2 byte header plus up
+ * to 254 bytes worth of processor attribute bits.  First header
+ * byte specifies the number of bytes following the header.
+ * Second header byte is an "attribute-specifier" type, of which
+ * zero is the only currently-defined value.
+ * Implementation:  Pass in the byte and bit offset for the feature
+ * that we are interested in.  The function will return -1 if the
+ * pa-features property is missing, or a 1/0 to indicate if the feature
+ * is supported/not supported.  Note that the bit numbers are
+ * big-endian to match the definition in PAPR.
+ */
+struct ibm_feature {
+	unsigned long	cpu_features;	/* CPU_FTR_xxx bit */
+	unsigned long	mmu_features;	/* MMU_FTR_xxx bit */
+	unsigned int	cpu_user_ftrs;	/* PPC_FEATURE_xxx bit */
+	unsigned int	cpu_user_ftrs2;	/* PPC_FEATURE2_xxx bit */
+	unsigned char	pabyte;		/* byte number in ibm,pa/pi-features */
+	unsigned char	pabit;		/* bit number (big-endian) */
+	unsigned char	invert;		/* if 1, pa bit set => clear feature */
+};
+
+static struct ibm_feature ibm_pa_features[] __initdata = {
+	{ .pabyte = 0,  .pabit = 0, .cpu_user_ftrs = PPC_FEATURE_HAS_MMU },
+	{ .pabyte = 0,  .pabit = 1, .cpu_user_ftrs = PPC_FEATURE_HAS_FPU },
+	{ .pabyte = 0,  .pabit = 3, .cpu_features  = CPU_FTR_CTRL },
+	{ .pabyte = 0,  .pabit = 6, .cpu_features  = CPU_FTR_NOEXECUTE },
+	{ .pabyte = 1,  .pabit = 2, .mmu_features  = MMU_FTR_CI_LARGE_PAGE },
+#ifdef CONFIG_PPC_RADIX_MMU
+	{ .pabyte = 40, .pabit = 0, .mmu_features  = MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE },
+#endif
+	{ .pabyte = 5,  .pabit = 0, .cpu_features  = CPU_FTR_REAL_LE,
+				    .cpu_user_ftrs = PPC_FEATURE_TRUE_LE },
+	/*
+	 * If the kernel doesn't support TM (ie CONFIG_PPC_TRANSACTIONAL_MEM=n),
+	 * we don't want to turn on TM here, so we use the *_COMP versions
+	 * which are 0 if the kernel doesn't support TM.
+	 */
+	{ .pabyte = 22, .pabit = 0, .cpu_features = CPU_FTR_TM_COMP,
+	  .cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP },
+
+	{ .pabyte = 64, .pabit = 0, .cpu_features = CPU_FTR_DAWR1 },
+	{ .pabyte = 68, .pabit = 5, .cpu_features = CPU_FTR_DEXCR_NPHIE },
+};
+
+/*
+ * ibm,pi-features property provides the support of processor specific
+ * options not described in ibm,pa-features. Right now use byte 0, bit 3
+ * which indicates the occurrence of DSI interrupt when the paste operation
+ * on the suspended NX window.
+ */
+static struct ibm_feature ibm_pi_features[] __initdata = {
+	{ .pabyte = 0, .pabit = 3, .mmu_features  = MMU_FTR_NX_DSI },
+};
+
+static void __init scan_features(unsigned long node, const unsigned char *ftrs,
+				 unsigned long tablelen,
+				 struct ibm_feature *fp,
+				 unsigned long ft_size)
+{
+	unsigned long i, len, bit;
+
+	/* find descriptor with type == 0 */
+	for (;;) {
+		if (tablelen < 3)
+			return;
+		len = 2 + ftrs[0];
+		if (tablelen < len)
+			return;		/* descriptor 0 not found */
+		if (ftrs[1] == 0)
+			break;
+		tablelen -= len;
+		ftrs += len;
+	}
+
+	/* loop over bits we know about */
+	for (i = 0; i < ft_size; ++i, ++fp) {
+		if (fp->pabyte >= ftrs[0])
+			continue;
+		bit = (ftrs[2 + fp->pabyte] >> (7 - fp->pabit)) & 1;
+		if (bit ^ fp->invert) {
+			cur_cpu_spec->cpu_features |= fp->cpu_features;
+			cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs;
+			cur_cpu_spec->cpu_user_features2 |= fp->cpu_user_ftrs2;
+			cur_cpu_spec->mmu_features |= fp->mmu_features;
+		} else {
+			cur_cpu_spec->cpu_features &= ~fp->cpu_features;
+			cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs;
+			cur_cpu_spec->cpu_user_features2 &= ~fp->cpu_user_ftrs2;
+			cur_cpu_spec->mmu_features &= ~fp->mmu_features;
+		}
+	}
+}
+
+static void __init check_cpu_features(unsigned long node, char *name,
+				      struct ibm_feature *fp,
+				      unsigned long size)
+{
+	const unsigned char *pa_ftrs;
+	int tablelen;
+
+	pa_ftrs = of_get_flat_dt_prop(node, name, &tablelen);
+	if (pa_ftrs == NULL)
+		return;
+
+	scan_features(node, pa_ftrs, tablelen, fp, size);
+}
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+static void __init init_mmu_slb_size(unsigned long node)
+{
+	const __be32 *slb_size_ptr;
+
+	slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL) ? :
+			of_get_flat_dt_prop(node, "ibm,slb-size", NULL);
+
+	if (slb_size_ptr)
+		mmu_slb_size = be32_to_cpup(slb_size_ptr);
+}
+#else
+#define init_mmu_slb_size(node) do { } while(0)
+#endif
+
+static struct feature_property {
+	const char *name;
+	u32 min_value;
+	unsigned long cpu_feature;
+	unsigned long cpu_user_ftr;
+} feature_properties[] __initdata = {
+#ifdef CONFIG_ALTIVEC
+	{"altivec", 0, CPU_FTR_ALTIVEC, PPC_FEATURE_HAS_ALTIVEC},
+	{"ibm,vmx", 1, CPU_FTR_ALTIVEC, PPC_FEATURE_HAS_ALTIVEC},
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+	/* Yes, this _really_ is ibm,vmx == 2 to enable VSX */
+	{"ibm,vmx", 2, CPU_FTR_VSX, PPC_FEATURE_HAS_VSX},
+#endif /* CONFIG_VSX */
+#ifdef CONFIG_PPC64
+	{"ibm,dfp", 1, 0, PPC_FEATURE_HAS_DFP},
+	{"ibm,purr", 1, CPU_FTR_PURR, 0},
+	{"ibm,spurr", 1, CPU_FTR_SPURR, 0},
+#endif /* CONFIG_PPC64 */
+};
+
+#if defined(CONFIG_44x) && defined(CONFIG_PPC_FPU)
+static __init void identical_pvr_fixup(unsigned long node)
+{
+	unsigned int pvr;
+	const char *model = of_get_flat_dt_prop(node, "model", NULL);
+
+	/*
+	 * Since 440GR(x)/440EP(x) processors have the same pvr,
+	 * we check the node path and set bit 28 in the cur_cpu_spec
+	 * pvr for EP(x) processor version. This bit is always 0 in
+	 * the "real" pvr. Then we call identify_cpu again with
+	 * the new logical pvr to enable FPU support.
+	 */
+	if (model && strstr(model, "440EP")) {
+		pvr = cur_cpu_spec->pvr_value | 0x8;
+		identify_cpu(0, pvr);
+		DBG("Using logical pvr %x for %s\n", pvr, model);
+	}
+}
+#else
+#define identical_pvr_fixup(node) do { } while(0)
+#endif
+
+static void __init check_cpu_feature_properties(unsigned long node)
+{
+	int i;
+	struct feature_property *fp = feature_properties;
+	const __be32 *prop;
+
+	for (i = 0; i < (int)ARRAY_SIZE(feature_properties); ++i, ++fp) {
+		prop = of_get_flat_dt_prop(node, fp->name, NULL);
+		if (prop && be32_to_cpup(prop) >= fp->min_value) {
+			cur_cpu_spec->cpu_features |= fp->cpu_feature;
+			cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftr;
+		}
+	}
+}
+
+static int __init early_init_dt_scan_cpus(unsigned long node,
+					  const char *uname, int depth,
+					  void *data)
+{
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const __be32 *prop;
+	const __be32 *intserv;
+	int i, nthreads;
+	int len;
+	int found = -1;
+	int found_thread = 0;
+
+	/* We are scanning "cpu" nodes only */
+	if (type == NULL || strcmp(type, "cpu") != 0)
+		return 0;
+
+	if (IS_ENABLED(CONFIG_PPC64))
+		boot_cpu_node_count++;
+
+	/* Get physical cpuid */
+	intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len);
+	if (!intserv)
+		intserv = of_get_flat_dt_prop(node, "reg", &len);
+
+	nthreads = len / sizeof(int);
+
+	/*
+	 * Now see if any of these threads match our boot cpu.
+	 * NOTE: This must match the parsing done in smp_setup_cpu_maps.
+	 */
+	for (i = 0; i < nthreads; i++) {
+		if (be32_to_cpu(intserv[i]) ==
+			fdt_boot_cpuid_phys(initial_boot_params)) {
+			found = boot_cpu_count;
+			found_thread = i;
+		}
+#ifdef CONFIG_SMP
+		/* logical cpu id is always 0 on UP kernels */
+		boot_cpu_count++;
+#endif
+	}
+
+	/* Not the boot CPU */
+	if (found < 0)
+		return 0;
+
+	DBG("boot cpu: logical %d physical %d\n", found,
+	    be32_to_cpu(intserv[found_thread]));
+	boot_cpuid = found;
+
+	if (IS_ENABLED(CONFIG_PPC64))
+		boot_cpu_hwid = be32_to_cpu(intserv[found_thread]);
+
+	/*
+	 * PAPR defines "logical" PVR values for cpus that
+	 * meet various levels of the architecture:
+	 * 0x0f000001	Architecture version 2.04
+	 * 0x0f000002	Architecture version 2.05
+	 * If the cpu-version property in the cpu node contains
+	 * such a value, we call identify_cpu again with the
+	 * logical PVR value in order to use the cpu feature
+	 * bits appropriate for the architecture level.
+	 *
+	 * A POWER6 partition in "POWER6 architected" mode
+	 * uses the 0x0f000002 PVR value; in POWER5+ mode
+	 * it uses 0x0f000001.
+	 *
+	 * If we're using device tree CPU feature discovery then we don't
+	 * support the cpu-version property, and it's the responsibility of the
+	 * firmware/hypervisor to provide the correct feature set for the
+	 * architecture level via the ibm,powerpc-cpu-features binding.
+	 */
+	if (!dt_cpu_ftrs_in_use()) {
+		prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
+		if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) {
+			identify_cpu(0, be32_to_cpup(prop));
+			seq_buf_printf(&ppc_hw_desc, "0x%04x ", be32_to_cpup(prop));
+		}
+
+		check_cpu_feature_properties(node);
+		check_cpu_features(node, "ibm,pa-features", ibm_pa_features,
+				   ARRAY_SIZE(ibm_pa_features));
+		check_cpu_features(node, "ibm,pi-features", ibm_pi_features,
+				   ARRAY_SIZE(ibm_pi_features));
+	}
+
+	identical_pvr_fixup(node);
+	init_mmu_slb_size(node);
+
+#ifdef CONFIG_PPC64
+	if (nthreads == 1)
+		cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
+	else if (!dt_cpu_ftrs_in_use())
+		cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
+#endif
+
+	return 0;
+}
+
+static int __init early_init_dt_scan_chosen_ppc(unsigned long node,
+						const char *uname,
+						int depth, void *data)
+{
+	const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */
+
+	/* Use common scan routine to determine if this is the chosen node */
+	if (early_init_dt_scan_chosen(data) < 0)
+		return 0;
+
+#ifdef CONFIG_PPC64
+	/* check if iommu is forced on or off */
+	if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
+		iommu_is_off = 1;
+	if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
+		iommu_force_on = 1;
+#endif
+
+	/* mem=x on the command line is the preferred mechanism */
+	lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL);
+	if (lprop)
+		memory_limit = *lprop;
+
+#ifdef CONFIG_PPC64
+	lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
+	if (lprop)
+		tce_alloc_start = *lprop;
+	lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
+	if (lprop)
+		tce_alloc_end = *lprop;
+#endif
+
+#ifdef CONFIG_KEXEC_CORE
+	lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL);
+	if (lprop)
+		crashk_res.start = *lprop;
+
+	lprop = of_get_flat_dt_prop(node, "linux,crashkernel-size", NULL);
+	if (lprop)
+		crashk_res.end = crashk_res.start + *lprop - 1;
+#endif
+
+	/* break now */
+	return 1;
+}
+
+/*
+ * Compare the range against max mem limit and update
+ * size if it cross the limit.
+ */
+
+#ifdef CONFIG_SPARSEMEM
+static bool __init validate_mem_limit(u64 base, u64 *size)
+{
+	u64 max_mem = 1UL << (MAX_PHYSMEM_BITS);
+
+	if (base >= max_mem)
+		return false;
+	if ((base + *size) > max_mem)
+		*size = max_mem - base;
+	return true;
+}
+#else
+static bool __init validate_mem_limit(u64 base, u64 *size)
+{
+	return true;
+}
+#endif
+
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * Interpret the ibm dynamic reconfiguration memory LMBs.
+ * This contains a list of memory blocks along with NUMA affinity
+ * information.
+ */
+static int  __init early_init_drmem_lmb(struct drmem_lmb *lmb,
+					const __be32 **usm,
+					void *data)
+{
+	u64 base, size;
+	int is_kexec_kdump = 0, rngs;
+
+	base = lmb->base_addr;
+	size = drmem_lmb_size();
+	rngs = 1;
+
+	/*
+	 * Skip this block if the reserved bit is set in flags
+	 * or if the block is not assigned to this partition.
+	 */
+	if ((lmb->flags & DRCONF_MEM_RESERVED) ||
+	    !(lmb->flags & DRCONF_MEM_ASSIGNED))
+		return 0;
+
+	if (*usm)
+		is_kexec_kdump = 1;
+
+	if (is_kexec_kdump) {
+		/*
+		 * For each memblock in ibm,dynamic-memory, a
+		 * corresponding entry in linux,drconf-usable-memory
+		 * property contains a counter 'p' followed by 'p'
+		 * (base, size) duple. Now read the counter from
+		 * linux,drconf-usable-memory property
+		 */
+		rngs = dt_mem_next_cell(dt_root_size_cells, usm);
+		if (!rngs) /* there are no (base, size) duple */
+			return 0;
+	}
+
+	do {
+		if (is_kexec_kdump) {
+			base = dt_mem_next_cell(dt_root_addr_cells, usm);
+			size = dt_mem_next_cell(dt_root_size_cells, usm);
+		}
+
+		if (iommu_is_off) {
+			if (base >= 0x80000000ul)
+				continue;
+			if ((base + size) > 0x80000000ul)
+				size = 0x80000000ul - base;
+		}
+
+		if (!validate_mem_limit(base, &size))
+			continue;
+
+		DBG("Adding: %llx -> %llx\n", base, size);
+		memblock_add(base, size);
+
+		if (lmb->flags & DRCONF_MEM_HOTREMOVABLE)
+			memblock_mark_hotplug(base, size);
+	} while (--rngs);
+
+	return 0;
+}
+#endif /* CONFIG_PPC_PSERIES */
+
+static int __init early_init_dt_scan_memory_ppc(void)
+{
+#ifdef CONFIG_PPC_PSERIES
+	const void *fdt = initial_boot_params;
+	int node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory");
+
+	if (node > 0)
+		walk_drmem_lmbs_early(node, NULL, early_init_drmem_lmb);
+
+#endif
+
+	return early_init_dt_scan_memory();
+}
+
+/*
+ * For a relocatable kernel, we need to get the memstart_addr first,
+ * then use it to calculate the virtual kernel start address. This has
+ * to happen at a very early stage (before machine_init). In this case,
+ * we just want to get the memstart_address and would not like to mess the
+ * memblock at this stage. So introduce a variable to skip the memblock_add()
+ * for this reason.
+ */
+#ifdef CONFIG_RELOCATABLE
+static int add_mem_to_memblock = 1;
+#else
+#define add_mem_to_memblock 1
+#endif
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+#ifdef CONFIG_PPC64
+	if (iommu_is_off) {
+		if (base >= 0x80000000ul)
+			return;
+		if ((base + size) > 0x80000000ul)
+			size = 0x80000000ul - base;
+	}
+#endif
+	/* Keep track of the beginning of memory -and- the size of
+	 * the very first block in the device-tree as it represents
+	 * the RMA on ppc64 server
+	 */
+	if (base < memstart_addr) {
+		memstart_addr = base;
+		first_memblock_size = size;
+	}
+
+	/* Add the chunk to the MEMBLOCK list */
+	if (add_mem_to_memblock) {
+		if (validate_mem_limit(base, &size))
+			memblock_add(base, size);
+	}
+}
+
+static void __init early_reserve_mem_dt(void)
+{
+	unsigned long i, dt_root;
+	int len;
+	const __be32 *prop;
+
+	early_init_fdt_reserve_self();
+	early_init_fdt_scan_reserved_mem();
+
+	dt_root = of_get_flat_dt_root();
+
+	prop = of_get_flat_dt_prop(dt_root, "reserved-ranges", &len);
+
+	if (!prop)
+		return;
+
+	DBG("Found new-style reserved-ranges\n");
+
+	/* Each reserved range is an (address,size) pair, 2 cells each,
+	 * totalling 4 cells per range. */
+	for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
+		u64 base, size;
+
+		base = of_read_number(prop + (i * 4) + 0, 2);
+		size = of_read_number(prop + (i * 4) + 2, 2);
+
+		if (size) {
+			DBG("reserving: %llx -> %llx\n", base, size);
+			memblock_reserve(base, size);
+		}
+	}
+}
+
+static void __init early_reserve_mem(void)
+{
+	__be64 *reserve_map;
+
+	reserve_map = (__be64 *)(((unsigned long)initial_boot_params) +
+			fdt_off_mem_rsvmap(initial_boot_params));
+
+	/* Look for the new "reserved-regions" property in the DT */
+	early_reserve_mem_dt();
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	/* Then reserve the initrd, if any */
+	if (initrd_start && (initrd_end > initrd_start)) {
+		memblock_reserve(ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE),
+			ALIGN(initrd_end, PAGE_SIZE) -
+			ALIGN_DOWN(initrd_start, PAGE_SIZE));
+	}
+#endif /* CONFIG_BLK_DEV_INITRD */
+
+	if (!IS_ENABLED(CONFIG_PPC32))
+		return;
+
+	/* 
+	 * Handle the case where we might be booting from an old kexec
+	 * image that setup the mem_rsvmap as pairs of 32-bit values
+	 */
+	if (be64_to_cpup(reserve_map) > 0xffffffffull) {
+		u32 base_32, size_32;
+		__be32 *reserve_map_32 = (__be32 *)reserve_map;
+
+		DBG("Found old 32-bit reserve map\n");
+
+		while (1) {
+			base_32 = be32_to_cpup(reserve_map_32++);
+			size_32 = be32_to_cpup(reserve_map_32++);
+			if (size_32 == 0)
+				break;
+			DBG("reserving: %x -> %x\n", base_32, size_32);
+			memblock_reserve(base_32, size_32);
+		}
+		return;
+	}
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static bool tm_disabled __initdata;
+
+static int __init parse_ppc_tm(char *str)
+{
+	bool res;
+
+	if (kstrtobool(str, &res))
+		return -EINVAL;
+
+	tm_disabled = !res;
+
+	return 0;
+}
+early_param("ppc_tm", parse_ppc_tm);
+
+static void __init tm_init(void)
+{
+	if (tm_disabled) {
+		pr_info("Disabling hardware transactional memory (HTM)\n");
+		cur_cpu_spec->cpu_user_features2 &=
+			~(PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM);
+		cur_cpu_spec->cpu_features &= ~CPU_FTR_TM;
+		return;
+	}
+
+	pnv_tm_init();
+}
+#else
+static void tm_init(void) { }
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+static int __init
+early_init_dt_scan_model(unsigned long node, const char *uname,
+			 int depth, void *data)
+{
+	const char *prop;
+
+	if (depth != 0)
+		return 0;
+
+	prop = of_get_flat_dt_prop(node, "model", NULL);
+	if (prop)
+		seq_buf_printf(&ppc_hw_desc, "%s ", prop);
+
+	/* break now */
+	return 1;
+}
+
+#ifdef CONFIG_PPC64
+static void __init save_fscr_to_task(void)
+{
+	/*
+	 * Ensure the init_task (pid 0, aka swapper) uses the value of FSCR we
+	 * have configured via the device tree features or via __init_FSCR().
+	 * That value will then be propagated to pid 1 (init) and all future
+	 * processes.
+	 */
+	if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
+		init_task.thread.fscr = mfspr(SPRN_FSCR);
+}
+#else
+static inline void save_fscr_to_task(void) {}
+#endif
+
+
+void __init early_init_devtree(void *params)
+{
+	phys_addr_t limit;
+
+	DBG(" -> early_init_devtree(%px)\n", params);
+
+	/* Too early to BUG_ON(), do it by hand */
+	if (!early_init_dt_verify(params))
+		panic("BUG: Failed verifying flat device tree, bad version?");
+
+	of_scan_flat_dt(early_init_dt_scan_model, NULL);
+
+#ifdef CONFIG_PPC_RTAS
+	/* Some machines might need RTAS info for debugging, grab it now. */
+	of_scan_flat_dt(early_init_dt_scan_rtas, NULL);
+#endif
+
+#ifdef CONFIG_PPC_POWERNV
+	/* Some machines might need OPAL info for debugging, grab it now. */
+	of_scan_flat_dt(early_init_dt_scan_opal, NULL);
+
+	/* Scan tree for ultravisor feature */
+	of_scan_flat_dt(early_init_dt_scan_ultravisor, NULL);
+#endif
+
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
+	/* scan tree to see if dump is active during last boot */
+	of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
+#endif
+
+	/* Retrieve various informations from the /chosen node of the
+	 * device-tree, including the platform type, initrd location and
+	 * size, TCE reserve, and more ...
+	 */
+	of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line);
+
+	/* Scan memory nodes and rebuild MEMBLOCKs */
+	early_init_dt_scan_root();
+	early_init_dt_scan_memory_ppc();
+
+	/*
+	 * As generic code authors expect to be able to use static keys
+	 * in early_param() handlers, we initialize the static keys just
+	 * before parsing early params (it's fine to call jump_label_init()
+	 * more than once).
+	 */
+	jump_label_init();
+	parse_early_param();
+
+	/* make sure we've parsed cmdline for mem= before this */
+	if (memory_limit)
+		first_memblock_size = min_t(u64, first_memblock_size, memory_limit);
+	setup_initial_memory_limit(memstart_addr, first_memblock_size);
+	/* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */
+	memblock_reserve(PHYSICAL_START, __pa(_end) - PHYSICAL_START);
+	/* If relocatable, reserve first 32k for interrupt vectors etc. */
+	if (PHYSICAL_START > MEMORY_START)
+		memblock_reserve(MEMORY_START, 0x8000);
+	reserve_kdump_trampoline();
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
+	/*
+	 * If we fail to reserve memory for firmware-assisted dump then
+	 * fallback to kexec based kdump.
+	 */
+	if (fadump_reserve_mem() == 0)
+#endif
+		reserve_crashkernel();
+	early_reserve_mem();
+
+	/* Ensure that total memory size is page-aligned. */
+	limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE);
+	memblock_enforce_memory_limit(limit);
+
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_4K_PAGES)
+	if (!early_radix_enabled())
+		memblock_cap_memory_range(0, 1UL << (H_MAX_PHYSMEM_BITS));
+#endif
+
+	memblock_allow_resize();
+	memblock_dump_all();
+
+	DBG("Phys. mem: %llx\n", (unsigned long long)memblock_phys_mem_size());
+
+	/* We may need to relocate the flat tree, do it now.
+	 * FIXME .. and the initrd too? */
+	move_device_tree();
+
+	DBG("Scanning CPUs ...\n");
+
+	dt_cpu_ftrs_scan();
+
+	// We can now add the CPU name & PVR to the hardware description
+	seq_buf_printf(&ppc_hw_desc, "%s 0x%04lx ", cur_cpu_spec->cpu_name, mfspr(SPRN_PVR));
+
+	/* Retrieve CPU related informations from the flat tree
+	 * (altivec support, boot CPU ID, ...)
+	 */
+	of_scan_flat_dt(early_init_dt_scan_cpus, NULL);
+	if (boot_cpuid < 0) {
+		printk("Failed to identify boot CPU !\n");
+		BUG();
+	}
+
+	save_fscr_to_task();
+
+#if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
+	/* We'll later wait for secondaries to check in; there are
+	 * NCPUS-1 non-boot CPUs  :-)
+	 */
+	spinning_secondaries = boot_cpu_count - 1;
+#endif
+
+	mmu_early_init_devtree();
+
+#ifdef CONFIG_PPC_POWERNV
+	/* Scan and build the list of machine check recoverable ranges */
+	of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL);
+#endif
+	epapr_paravirt_early_init();
+
+	/* Now try to figure out if we are running on LPAR and so on */
+	pseries_probe_fw_features();
+
+	/*
+	 * Initialize pkey features and default AMR/IAMR values
+	 */
+	pkey_early_init_devtree();
+
+#ifdef CONFIG_PPC_PS3
+	/* Identify PS3 firmware */
+	if (of_flat_dt_is_compatible(of_get_flat_dt_root(), "sony,ps3"))
+		powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE;
+#endif
+
+	/* If kexec left a PLPKS password in the DT, get it and clear it */
+	plpks_early_init_devtree();
+
+	tm_init();
+
+	DBG(" <- early_init_devtree()\n");
+}
+
+#ifdef CONFIG_RELOCATABLE
+/*
+ * This function run before early_init_devtree, so we have to init
+ * initial_boot_params.
+ */
+void __init early_get_first_memblock_info(void *params, phys_addr_t *size)
+{
+	/* Setup flat device-tree pointer */
+	initial_boot_params = params;
+
+	/*
+	 * Scan the memory nodes and set add_mem_to_memblock to 0 to avoid
+	 * mess the memblock.
+	 */
+	add_mem_to_memblock = 0;
+	early_init_dt_scan_root();
+	early_init_dt_scan_memory_ppc();
+	add_mem_to_memblock = 1;
+
+	if (size)
+		*size = first_memblock_size;
+}
+#endif
+
+/*******
+ *
+ * New implementation of the OF "find" APIs, return a refcounted
+ * object, call of_node_put() when done.  The device tree and list
+ * are protected by a rw_lock.
+ *
+ * Note that property management will need some locking as well,
+ * this isn't dealt with yet.
+ *
+ *******/
+
+/**
+ * of_get_ibm_chip_id - Returns the IBM "chip-id" of a device
+ * @np: device node of the device
+ *
+ * This looks for a property "ibm,chip-id" in the node or any
+ * of its parents and returns its content, or -1 if it cannot
+ * be found.
+ */
+int of_get_ibm_chip_id(struct device_node *np)
+{
+	of_node_get(np);
+	while (np) {
+		u32 chip_id;
+
+		/*
+		 * Skiboot may produce memory nodes that contain more than one
+		 * cell in chip-id, we only read the first one here.
+		 */
+		if (!of_property_read_u32(np, "ibm,chip-id", &chip_id)) {
+			of_node_put(np);
+			return chip_id;
+		}
+
+		np = of_get_next_parent(np);
+	}
+	return -1;
+}
+EXPORT_SYMBOL(of_get_ibm_chip_id);
+
+/**
+ * cpu_to_chip_id - Return the cpus chip-id
+ * @cpu: The logical cpu number.
+ *
+ * Return the value of the ibm,chip-id property corresponding to the given
+ * logical cpu number. If the chip-id can not be found, returns -1.
+ */
+int cpu_to_chip_id(int cpu)
+{
+	struct device_node *np;
+	int ret = -1, idx;
+
+	idx = cpu / threads_per_core;
+	if (chip_id_lookup_table && chip_id_lookup_table[idx] != -1)
+		return chip_id_lookup_table[idx];
+
+	np = of_get_cpu_node(cpu, NULL);
+	if (np) {
+		ret = of_get_ibm_chip_id(np);
+		of_node_put(np);
+
+		if (chip_id_lookup_table)
+			chip_id_lookup_table[idx] = ret;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(cpu_to_chip_id);
+
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
+{
+#ifdef CONFIG_SMP
+	/*
+	 * Early firmware scanning must use this rather than
+	 * get_hard_smp_processor_id because we don't have pacas allocated
+	 * until memory topology is discovered.
+	 */
+	if (cpu_to_phys_id != NULL)
+		return (int)phys_id == cpu_to_phys_id[cpu];
+#endif
+
+	return (int)phys_id == get_hard_smp_processor_id(cpu);
+}
diff --git a/arch/powerpc/kernel/prom_entry_64.S b/arch/powerpc/kernel/prom_entry_64.S
new file mode 100644
index 0000000000..f1b8793d28
--- /dev/null
+++ b/arch/powerpc/kernel/prom_entry_64.S
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  PowerPC version 
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *  Adapted for Power Macintosh by Paul Mackerras.
+ *  Low-level exception handlers and MMU support
+ *  rewritten by Paul Mackerras.
+ *    Copyright (C) 1996 Paul Mackerras.
+ *  MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
+ *
+ *  This file contains the 64-bit prom entry code.
+ */
+#include <asm/asm-offsets.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
+#include <asm/ppc_asm.h>
+
+.section ".text","ax",@progbits
+
+_GLOBAL(enter_prom)
+	mflr	r0
+	std	r0,16(r1)
+        stdu	r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space */
+
+	/* Because PROM is running in 32b mode, it clobbers the high order half
+	 * of all registers that it saves.  We therefore save those registers
+	 * PROM might touch to the stack.  (r0, r3-r13 are caller saved)
+   	 */
+	SAVE_GPR(2, r1)
+	SAVE_GPR(13, r1)
+	SAVE_NVGPRS(r1)
+	mfcr	r10
+	mfmsr	r11
+	std	r10,_CCR(r1)
+	std	r11,_MSR(r1)
+
+	/* Put PROM address in SRR0 */
+	mtsrr0	r4
+
+	/* Setup our trampoline return addr in LR */
+	bcl	20,31,$+4
+0:	mflr	r4
+	addi	r4,r4,(1f - 0b)
+       	mtlr	r4
+
+	/* Prepare a 32-bit mode big endian MSR
+	 */
+#ifdef CONFIG_PPC_BOOK3E_64
+	rlwinm	r11,r11,0,1,31
+	mtsrr1	r11
+	rfi
+#else /* CONFIG_PPC_BOOK3E_64 */
+	LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_LE)
+	andc	r11,r11,r12
+	mtsrr1	r11
+	RFI_TO_KERNEL
+#endif /* CONFIG_PPC_BOOK3E_64 */
+
+1:	/* Return from OF */
+	FIXUP_ENDIAN
+
+	/* Just make sure that r1 top 32 bits didn't get
+	 * corrupt by OF
+	 */
+	rldicl	r1,r1,0,32
+
+	/* Restore the MSR (back to 64 bits) */
+	ld	r0,_MSR(r1)
+	MTMSRD(r0)
+        isync
+
+	/* Restore other registers */
+	REST_GPR(2, r1)
+	REST_GPR(13, r1)
+	REST_NVGPRS(r1)
+	ld	r4,_CCR(r1)
+	mtcr	r4
+
+        addi	r1,r1,SWITCH_FRAME_SIZE
+	ld	r0,16(r1)
+	mtlr    r0
+        blr
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
new file mode 100644
index 0000000000..d464ba4120
--- /dev/null
+++ b/arch/powerpc/kernel/prom_init.c
@@ -0,0 +1,3493 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Procedures for interfacing to Open Firmware.
+ *
+ * Paul Mackerras	August 1996.
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ * 
+ *  Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner.
+ *    {engebret|bergner}@us.ibm.com 
+ */
+
+#undef DEBUG_PROM
+
+/* we cannot use FORTIFY as it brings in new symbols */
+#define __NO_FORTIFY
+
+#include <linux/stdarg.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/threads.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/delay.h>
+#include <linux/initrd.h>
+#include <linux/bitops.h>
+#include <linux/pgtable.h>
+#include <linux/printk.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/interrupt.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/mmu.h>
+#include <asm/iommu.h>
+#include <asm/btext.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/asm-prototypes.h>
+#include <asm/ultravisor-api.h>
+
+#include <linux/linux_logo.h>
+
+/* All of prom_init bss lives here */
+#define __prombss __section(".bss.prominit")
+
+/*
+ * Eventually bump that one up
+ */
+#define DEVTREE_CHUNK_SIZE	0x100000
+
+/*
+ * This is the size of the local memory reserve map that gets copied
+ * into the boot params passed to the kernel. That size is totally
+ * flexible as the kernel just reads the list until it encounters an
+ * entry with size 0, so it can be changed without breaking binary
+ * compatibility
+ */
+#define MEM_RESERVE_MAP_SIZE	8
+
+/*
+ * prom_init() is called very early on, before the kernel text
+ * and data have been mapped to KERNELBASE.  At this point the code
+ * is running at whatever address it has been loaded at.
+ * On ppc32 we compile with -mrelocatable, which means that references
+ * to extern and static variables get relocated automatically.
+ * ppc64 objects are always relocatable, we just need to relocate the
+ * TOC.
+ *
+ * Because OF may have mapped I/O devices into the area starting at
+ * KERNELBASE, particularly on CHRP machines, we can't safely call
+ * OF once the kernel has been mapped to KERNELBASE.  Therefore all
+ * OF calls must be done within prom_init().
+ *
+ * ADDR is used in calls to call_prom.  The 4th and following
+ * arguments to call_prom should be 32-bit values.
+ * On ppc64, 64 bit values are truncated to 32 bits (and
+ * fortunately don't get interpreted as two arguments).
+ */
+#define ADDR(x)		(u32)(unsigned long)(x)
+
+#ifdef CONFIG_PPC64
+#define OF_WORKAROUNDS	0
+#else
+#define OF_WORKAROUNDS	of_workarounds
+static int of_workarounds __prombss;
+#endif
+
+#define OF_WA_CLAIM	1	/* do phys/virt claim separately, then map */
+#define OF_WA_LONGTRAIL	2	/* work around longtrail bugs */
+
+#ifdef DEBUG_PROM
+#define prom_debug(x...)	prom_printf(x)
+#else
+#define prom_debug(x...)	do { } while (0)
+#endif
+
+
+typedef u32 prom_arg_t;
+
+struct prom_args {
+        __be32 service;
+        __be32 nargs;
+        __be32 nret;
+        __be32 args[10];
+};
+
+struct prom_t {
+	ihandle root;
+	phandle chosen;
+	int cpu;
+	ihandle stdout;
+	ihandle mmumap;
+	ihandle memory;
+};
+
+struct mem_map_entry {
+	__be64	base;
+	__be64	size;
+};
+
+typedef __be32 cell_t;
+
+extern void __start(unsigned long r3, unsigned long r4, unsigned long r5,
+		    unsigned long r6, unsigned long r7, unsigned long r8,
+		    unsigned long r9);
+
+#ifdef CONFIG_PPC64
+extern int enter_prom(struct prom_args *args, unsigned long entry);
+#else
+static inline int enter_prom(struct prom_args *args, unsigned long entry)
+{
+	return ((int (*)(struct prom_args *))entry)(args);
+}
+#endif
+
+extern void copy_and_flush(unsigned long dest, unsigned long src,
+			   unsigned long size, unsigned long offset);
+
+/* prom structure */
+static struct prom_t __prombss prom;
+
+static unsigned long __prombss prom_entry;
+
+static char __prombss of_stdout_device[256];
+static char __prombss prom_scratch[256];
+
+static unsigned long __prombss dt_header_start;
+static unsigned long __prombss dt_struct_start, dt_struct_end;
+static unsigned long __prombss dt_string_start, dt_string_end;
+
+static unsigned long __prombss prom_initrd_start, prom_initrd_end;
+
+#ifdef CONFIG_PPC64
+static int __prombss prom_iommu_force_on;
+static int __prombss prom_iommu_off;
+static unsigned long __prombss prom_tce_alloc_start;
+static unsigned long __prombss prom_tce_alloc_end;
+#endif
+
+#ifdef CONFIG_PPC_PSERIES
+static bool __prombss prom_radix_disable;
+static bool __prombss prom_radix_gtse_disable;
+static bool __prombss prom_xive_disable;
+#endif
+
+#ifdef CONFIG_PPC_SVM
+static bool __prombss prom_svm_enable;
+#endif
+
+struct platform_support {
+	bool hash_mmu;
+	bool radix_mmu;
+	bool radix_gtse;
+	bool xive;
+};
+
+/* Platforms codes are now obsolete in the kernel. Now only used within this
+ * file and ultimately gone too. Feel free to change them if you need, they
+ * are not shared with anything outside of this file anymore
+ */
+#define PLATFORM_PSERIES	0x0100
+#define PLATFORM_PSERIES_LPAR	0x0101
+#define PLATFORM_LPAR		0x0001
+#define PLATFORM_POWERMAC	0x0400
+#define PLATFORM_GENERIC	0x0500
+
+static int __prombss of_platform;
+
+static char __prombss prom_cmd_line[COMMAND_LINE_SIZE];
+
+static unsigned long __prombss prom_memory_limit;
+
+static unsigned long __prombss alloc_top;
+static unsigned long __prombss alloc_top_high;
+static unsigned long __prombss alloc_bottom;
+static unsigned long __prombss rmo_top;
+static unsigned long __prombss ram_top;
+
+static struct mem_map_entry __prombss mem_reserve_map[MEM_RESERVE_MAP_SIZE];
+static int __prombss mem_reserve_cnt;
+
+static cell_t __prombss regbuf[1024];
+
+static bool  __prombss rtas_has_query_cpu_stopped;
+
+
+/*
+ * Error results ... some OF calls will return "-1" on error, some
+ * will return 0, some will return either. To simplify, here are
+ * macros to use with any ihandle or phandle return value to check if
+ * it is valid
+ */
+
+#define PROM_ERROR		(-1u)
+#define PHANDLE_VALID(p)	((p) != 0 && (p) != PROM_ERROR)
+#define IHANDLE_VALID(i)	((i) != 0 && (i) != PROM_ERROR)
+
+/* Copied from lib/string.c and lib/kstrtox.c */
+
+static int __init prom_strcmp(const char *cs, const char *ct)
+{
+	unsigned char c1, c2;
+
+	while (1) {
+		c1 = *cs++;
+		c2 = *ct++;
+		if (c1 != c2)
+			return c1 < c2 ? -1 : 1;
+		if (!c1)
+			break;
+	}
+	return 0;
+}
+
+static ssize_t __init prom_strscpy_pad(char *dest, const char *src, size_t n)
+{
+	ssize_t rc;
+	size_t i;
+
+	if (n == 0 || n > INT_MAX)
+		return -E2BIG;
+
+	// Copy up to n bytes
+	for (i = 0; i < n && src[i] != '\0'; i++)
+		dest[i] = src[i];
+
+	rc = i;
+
+	// If we copied all n then we have run out of space for the nul
+	if (rc == n) {
+		// Rewind by one character to ensure nul termination
+		i--;
+		rc = -E2BIG;
+	}
+
+	for (; i < n; i++)
+		dest[i] = '\0';
+
+	return rc;
+}
+
+static int __init prom_strncmp(const char *cs, const char *ct, size_t count)
+{
+	unsigned char c1, c2;
+
+	while (count) {
+		c1 = *cs++;
+		c2 = *ct++;
+		if (c1 != c2)
+			return c1 < c2 ? -1 : 1;
+		if (!c1)
+			break;
+		count--;
+	}
+	return 0;
+}
+
+static size_t __init prom_strlen(const char *s)
+{
+	const char *sc;
+
+	for (sc = s; *sc != '\0'; ++sc)
+		/* nothing */;
+	return sc - s;
+}
+
+static int __init prom_memcmp(const void *cs, const void *ct, size_t count)
+{
+	const unsigned char *su1, *su2;
+	int res = 0;
+
+	for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
+		if ((res = *su1 - *su2) != 0)
+			break;
+	return res;
+}
+
+static char __init *prom_strstr(const char *s1, const char *s2)
+{
+	size_t l1, l2;
+
+	l2 = prom_strlen(s2);
+	if (!l2)
+		return (char *)s1;
+	l1 = prom_strlen(s1);
+	while (l1 >= l2) {
+		l1--;
+		if (!prom_memcmp(s1, s2, l2))
+			return (char *)s1;
+		s1++;
+	}
+	return NULL;
+}
+
+static size_t __init prom_strlcat(char *dest, const char *src, size_t count)
+{
+	size_t dsize = prom_strlen(dest);
+	size_t len = prom_strlen(src);
+	size_t res = dsize + len;
+
+	/* This would be a bug */
+	if (dsize >= count)
+		return count;
+
+	dest += dsize;
+	count -= dsize;
+	if (len >= count)
+		len = count-1;
+	memcpy(dest, src, len);
+	dest[len] = 0;
+	return res;
+
+}
+
+#ifdef CONFIG_PPC_PSERIES
+static int __init prom_strtobool(const char *s, bool *res)
+{
+	if (!s)
+		return -EINVAL;
+
+	switch (s[0]) {
+	case 'y':
+	case 'Y':
+	case '1':
+		*res = true;
+		return 0;
+	case 'n':
+	case 'N':
+	case '0':
+		*res = false;
+		return 0;
+	case 'o':
+	case 'O':
+		switch (s[1]) {
+		case 'n':
+		case 'N':
+			*res = true;
+			return 0;
+		case 'f':
+		case 'F':
+			*res = false;
+			return 0;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+#endif
+
+/* This is the one and *ONLY* place where we actually call open
+ * firmware.
+ */
+
+static int __init call_prom(const char *service, int nargs, int nret, ...)
+{
+	int i;
+	struct prom_args args;
+	va_list list;
+
+	args.service = cpu_to_be32(ADDR(service));
+	args.nargs = cpu_to_be32(nargs);
+	args.nret = cpu_to_be32(nret);
+
+	va_start(list, nret);
+	for (i = 0; i < nargs; i++)
+		args.args[i] = cpu_to_be32(va_arg(list, prom_arg_t));
+	va_end(list);
+
+	for (i = 0; i < nret; i++)
+		args.args[nargs+i] = 0;
+
+	if (enter_prom(&args, prom_entry) < 0)
+		return PROM_ERROR;
+
+	return (nret > 0) ? be32_to_cpu(args.args[nargs]) : 0;
+}
+
+static int __init call_prom_ret(const char *service, int nargs, int nret,
+				prom_arg_t *rets, ...)
+{
+	int i;
+	struct prom_args args;
+	va_list list;
+
+	args.service = cpu_to_be32(ADDR(service));
+	args.nargs = cpu_to_be32(nargs);
+	args.nret = cpu_to_be32(nret);
+
+	va_start(list, rets);
+	for (i = 0; i < nargs; i++)
+		args.args[i] = cpu_to_be32(va_arg(list, prom_arg_t));
+	va_end(list);
+
+	for (i = 0; i < nret; i++)
+		args.args[nargs+i] = 0;
+
+	if (enter_prom(&args, prom_entry) < 0)
+		return PROM_ERROR;
+
+	if (rets != NULL)
+		for (i = 1; i < nret; ++i)
+			rets[i-1] = be32_to_cpu(args.args[nargs+i]);
+
+	return (nret > 0) ? be32_to_cpu(args.args[nargs]) : 0;
+}
+
+
+static void __init prom_print(const char *msg)
+{
+	const char *p, *q;
+
+	if (prom.stdout == 0)
+		return;
+
+	for (p = msg; *p != 0; p = q) {
+		for (q = p; *q != 0 && *q != '\n'; ++q)
+			;
+		if (q > p)
+			call_prom("write", 3, 1, prom.stdout, p, q - p);
+		if (*q == 0)
+			break;
+		++q;
+		call_prom("write", 3, 1, prom.stdout, ADDR("\r\n"), 2);
+	}
+}
+
+
+/*
+ * Both prom_print_hex & prom_print_dec takes an unsigned long as input so that
+ * we do not need __udivdi3 or __umoddi3 on 32bits.
+ */
+static void __init prom_print_hex(unsigned long val)
+{
+	int i, nibbles = sizeof(val)*2;
+	char buf[sizeof(val)*2+1];
+
+	for (i = nibbles-1;  i >= 0;  i--) {
+		buf[i] = (val & 0xf) + '0';
+		if (buf[i] > '9')
+			buf[i] += ('a'-'0'-10);
+		val >>= 4;
+	}
+	buf[nibbles] = '\0';
+	call_prom("write", 3, 1, prom.stdout, buf, nibbles);
+}
+
+/* max number of decimal digits in an unsigned long */
+#define UL_DIGITS 21
+static void __init prom_print_dec(unsigned long val)
+{
+	int i, size;
+	char buf[UL_DIGITS+1];
+
+	for (i = UL_DIGITS-1; i >= 0;  i--) {
+		buf[i] = (val % 10) + '0';
+		val = val/10;
+		if (val == 0)
+			break;
+	}
+	/* shift stuff down */
+	size = UL_DIGITS - i;
+	call_prom("write", 3, 1, prom.stdout, buf+i, size);
+}
+
+__printf(1, 2)
+static void __init prom_printf(const char *format, ...)
+{
+	const char *p, *q, *s;
+	va_list args;
+	unsigned long v;
+	long vs;
+	int n = 0;
+
+	va_start(args, format);
+	for (p = format; *p != 0; p = q) {
+		for (q = p; *q != 0 && *q != '\n' && *q != '%'; ++q)
+			;
+		if (q > p)
+			call_prom("write", 3, 1, prom.stdout, p, q - p);
+		if (*q == 0)
+			break;
+		if (*q == '\n') {
+			++q;
+			call_prom("write", 3, 1, prom.stdout,
+				  ADDR("\r\n"), 2);
+			continue;
+		}
+		++q;
+		if (*q == 0)
+			break;
+		while (*q == 'l') {
+			++q;
+			++n;
+		}
+		switch (*q) {
+		case 's':
+			++q;
+			s = va_arg(args, const char *);
+			prom_print(s);
+			break;
+		case 'x':
+			++q;
+			switch (n) {
+			case 0:
+				v = va_arg(args, unsigned int);
+				break;
+			case 1:
+				v = va_arg(args, unsigned long);
+				break;
+			case 2:
+			default:
+				v = va_arg(args, unsigned long long);
+				break;
+			}
+			prom_print_hex(v);
+			break;
+		case 'u':
+			++q;
+			switch (n) {
+			case 0:
+				v = va_arg(args, unsigned int);
+				break;
+			case 1:
+				v = va_arg(args, unsigned long);
+				break;
+			case 2:
+			default:
+				v = va_arg(args, unsigned long long);
+				break;
+			}
+			prom_print_dec(v);
+			break;
+		case 'd':
+			++q;
+			switch (n) {
+			case 0:
+				vs = va_arg(args, int);
+				break;
+			case 1:
+				vs = va_arg(args, long);
+				break;
+			case 2:
+			default:
+				vs = va_arg(args, long long);
+				break;
+			}
+			if (vs < 0) {
+				prom_print("-");
+				vs = -vs;
+			}
+			prom_print_dec(vs);
+			break;
+		}
+	}
+	va_end(args);
+}
+
+
+static unsigned int __init prom_claim(unsigned long virt, unsigned long size,
+				unsigned long align)
+{
+
+	if (align == 0 && (OF_WORKAROUNDS & OF_WA_CLAIM)) {
+		/*
+		 * Old OF requires we claim physical and virtual separately
+		 * and then map explicitly (assuming virtual mode)
+		 */
+		int ret;
+		prom_arg_t result;
+
+		ret = call_prom_ret("call-method", 5, 2, &result,
+				    ADDR("claim"), prom.memory,
+				    align, size, virt);
+		if (ret != 0 || result == -1)
+			return -1;
+		ret = call_prom_ret("call-method", 5, 2, &result,
+				    ADDR("claim"), prom.mmumap,
+				    align, size, virt);
+		if (ret != 0) {
+			call_prom("call-method", 4, 1, ADDR("release"),
+				  prom.memory, size, virt);
+			return -1;
+		}
+		/* the 0x12 is M (coherence) + PP == read/write */
+		call_prom("call-method", 6, 1,
+			  ADDR("map"), prom.mmumap, 0x12, size, virt, virt);
+		return virt;
+	}
+	return call_prom("claim", 3, 1, (prom_arg_t)virt, (prom_arg_t)size,
+			 (prom_arg_t)align);
+}
+
+static void __init __attribute__((noreturn)) prom_panic(const char *reason)
+{
+	prom_print(reason);
+	/* Do not call exit because it clears the screen on pmac
+	 * it also causes some sort of double-fault on early pmacs */
+	if (of_platform == PLATFORM_POWERMAC)
+		asm("trap\n");
+
+	/* ToDo: should put up an SRC here on pSeries */
+	call_prom("exit", 0, 0);
+
+	for (;;)			/* should never get here */
+		;
+}
+
+
+static int __init prom_next_node(phandle *nodep)
+{
+	phandle node;
+
+	if ((node = *nodep) != 0
+	    && (*nodep = call_prom("child", 1, 1, node)) != 0)
+		return 1;
+	if ((*nodep = call_prom("peer", 1, 1, node)) != 0)
+		return 1;
+	for (;;) {
+		if ((node = call_prom("parent", 1, 1, node)) == 0)
+			return 0;
+		if ((*nodep = call_prom("peer", 1, 1, node)) != 0)
+			return 1;
+	}
+}
+
+static inline int __init prom_getprop(phandle node, const char *pname,
+				      void *value, size_t valuelen)
+{
+	return call_prom("getprop", 4, 1, node, ADDR(pname),
+			 (u32)(unsigned long) value, (u32) valuelen);
+}
+
+static inline int __init prom_getproplen(phandle node, const char *pname)
+{
+	return call_prom("getproplen", 2, 1, node, ADDR(pname));
+}
+
+static void __init add_string(char **str, const char *q)
+{
+	char *p = *str;
+
+	while (*q)
+		*p++ = *q++;
+	*p++ = ' ';
+	*str = p;
+}
+
+static char *__init tohex(unsigned int x)
+{
+	static const char digits[] __initconst = "0123456789abcdef";
+	static char result[9] __prombss;
+	int i;
+
+	result[8] = 0;
+	i = 8;
+	do {
+		--i;
+		result[i] = digits[x & 0xf];
+		x >>= 4;
+	} while (x != 0 && i > 0);
+	return &result[i];
+}
+
+static int __init prom_setprop(phandle node, const char *nodename,
+			       const char *pname, void *value, size_t valuelen)
+{
+	char cmd[256], *p;
+
+	if (!(OF_WORKAROUNDS & OF_WA_LONGTRAIL))
+		return call_prom("setprop", 4, 1, node, ADDR(pname),
+				 (u32)(unsigned long) value, (u32) valuelen);
+
+	/* gah... setprop doesn't work on longtrail, have to use interpret */
+	p = cmd;
+	add_string(&p, "dev");
+	add_string(&p, nodename);
+	add_string(&p, tohex((u32)(unsigned long) value));
+	add_string(&p, tohex(valuelen));
+	add_string(&p, tohex(ADDR(pname)));
+	add_string(&p, tohex(prom_strlen(pname)));
+	add_string(&p, "property");
+	*p = 0;
+	return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd);
+}
+
+/* We can't use the standard versions because of relocation headaches. */
+#define prom_isxdigit(c) \
+	(('0' <= (c) && (c) <= '9') || ('a' <= (c) && (c) <= 'f') || ('A' <= (c) && (c) <= 'F'))
+
+#define prom_isdigit(c)	('0' <= (c) && (c) <= '9')
+#define prom_islower(c)	('a' <= (c) && (c) <= 'z')
+#define prom_toupper(c)	(prom_islower(c) ? ((c) - 'a' + 'A') : (c))
+
+static unsigned long __init prom_strtoul(const char *cp, const char **endp)
+{
+	unsigned long result = 0, base = 10, value;
+
+	if (*cp == '0') {
+		base = 8;
+		cp++;
+		if (prom_toupper(*cp) == 'X') {
+			cp++;
+			base = 16;
+		}
+	}
+
+	while (prom_isxdigit(*cp) &&
+	       (value = prom_isdigit(*cp) ? *cp - '0' : prom_toupper(*cp) - 'A' + 10) < base) {
+		result = result * base + value;
+		cp++;
+	}
+
+	if (endp)
+		*endp = cp;
+
+	return result;
+}
+
+static unsigned long __init prom_memparse(const char *ptr, const char **retptr)
+{
+	unsigned long ret = prom_strtoul(ptr, retptr);
+	int shift = 0;
+
+	/*
+	 * We can't use a switch here because GCC *may* generate a
+	 * jump table which won't work, because we're not running at
+	 * the address we're linked at.
+	 */
+	if ('G' == **retptr || 'g' == **retptr)
+		shift = 30;
+
+	if ('M' == **retptr || 'm' == **retptr)
+		shift = 20;
+
+	if ('K' == **retptr || 'k' == **retptr)
+		shift = 10;
+
+	if (shift) {
+		ret <<= shift;
+		(*retptr)++;
+	}
+
+	return ret;
+}
+
+/*
+ * Early parsing of the command line passed to the kernel, used for
+ * "mem=x" and the options that affect the iommu
+ */
+static void __init early_cmdline_parse(void)
+{
+	const char *opt;
+
+	char *p;
+	int l = 0;
+
+	prom_cmd_line[0] = 0;
+	p = prom_cmd_line;
+
+	if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && (long)prom.chosen > 0)
+		l = prom_getprop(prom.chosen, "bootargs", p, COMMAND_LINE_SIZE-1);
+
+	if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || l <= 0 || p[0] == '\0')
+		prom_strlcat(prom_cmd_line, " " CONFIG_CMDLINE,
+			     sizeof(prom_cmd_line));
+
+	prom_printf("command line: %s\n", prom_cmd_line);
+
+#ifdef CONFIG_PPC64
+	opt = prom_strstr(prom_cmd_line, "iommu=");
+	if (opt) {
+		prom_printf("iommu opt is: %s\n", opt);
+		opt += 6;
+		while (*opt && *opt == ' ')
+			opt++;
+		if (!prom_strncmp(opt, "off", 3))
+			prom_iommu_off = 1;
+		else if (!prom_strncmp(opt, "force", 5))
+			prom_iommu_force_on = 1;
+	}
+#endif
+	opt = prom_strstr(prom_cmd_line, "mem=");
+	if (opt) {
+		opt += 4;
+		prom_memory_limit = prom_memparse(opt, (const char **)&opt);
+#ifdef CONFIG_PPC64
+		/* Align to 16 MB == size of ppc64 large page */
+		prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000);
+#endif
+	}
+
+#ifdef CONFIG_PPC_PSERIES
+	prom_radix_disable = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
+	opt = prom_strstr(prom_cmd_line, "disable_radix");
+	if (opt) {
+		opt += 13;
+		if (*opt && *opt == '=') {
+			bool val;
+
+			if (prom_strtobool(++opt, &val))
+				prom_radix_disable = false;
+			else
+				prom_radix_disable = val;
+		} else
+			prom_radix_disable = true;
+	}
+	if (prom_radix_disable)
+		prom_debug("Radix disabled from cmdline\n");
+
+	opt = prom_strstr(prom_cmd_line, "radix_hcall_invalidate=on");
+	if (opt) {
+		prom_radix_gtse_disable = true;
+		prom_debug("Radix GTSE disabled from cmdline\n");
+	}
+
+	opt = prom_strstr(prom_cmd_line, "xive=off");
+	if (opt) {
+		prom_xive_disable = true;
+		prom_debug("XIVE disabled from cmdline\n");
+	}
+#endif /* CONFIG_PPC_PSERIES */
+
+#ifdef CONFIG_PPC_SVM
+	opt = prom_strstr(prom_cmd_line, "svm=");
+	if (opt) {
+		bool val;
+
+		opt += sizeof("svm=") - 1;
+		if (!prom_strtobool(opt, &val))
+			prom_svm_enable = val;
+	}
+#endif /* CONFIG_PPC_SVM */
+}
+
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * The architecture vector has an array of PVR mask/value pairs,
+ * followed by # option vectors - 1, followed by the option vectors.
+ *
+ * See prom.h for the definition of the bits specified in the
+ * architecture vector.
+ */
+
+/* Firmware expects the value to be n - 1, where n is the # of vectors */
+#define NUM_VECTORS(n)		((n) - 1)
+
+/*
+ * Firmware expects 1 + n - 2, where n is the length of the option vector in
+ * bytes. The 1 accounts for the length byte itself, the - 2 .. ?
+ */
+#define VECTOR_LENGTH(n)	(1 + (n) - 2)
+
+struct option_vector1 {
+	u8 byte1;
+	u8 arch_versions;
+	u8 arch_versions3;
+} __packed;
+
+struct option_vector2 {
+	u8 byte1;
+	__be16 reserved;
+	__be32 real_base;
+	__be32 real_size;
+	__be32 virt_base;
+	__be32 virt_size;
+	__be32 load_base;
+	__be32 min_rma;
+	__be32 min_load;
+	u8 min_rma_percent;
+	u8 max_pft_size;
+} __packed;
+
+struct option_vector3 {
+	u8 byte1;
+	u8 byte2;
+} __packed;
+
+struct option_vector4 {
+	u8 byte1;
+	u8 min_vp_cap;
+} __packed;
+
+struct option_vector5 {
+	u8 byte1;
+	u8 byte2;
+	u8 byte3;
+	u8 cmo;
+	u8 associativity;
+	u8 bin_opts;
+	u8 micro_checkpoint;
+	u8 reserved0;
+	__be32 max_cpus;
+	__be16 papr_level;
+	__be16 reserved1;
+	u8 platform_facilities;
+	u8 reserved2;
+	__be16 reserved3;
+	u8 subprocessors;
+	u8 byte22;
+	u8 intarch;
+	u8 mmu;
+	u8 hash_ext;
+	u8 radix_ext;
+} __packed;
+
+struct option_vector6 {
+	u8 reserved;
+	u8 secondary_pteg;
+	u8 os_name;
+} __packed;
+
+struct option_vector7 {
+	u8 os_id[256];
+} __packed;
+
+struct ibm_arch_vec {
+	struct { u32 mask, val; } pvrs[14];
+
+	u8 num_vectors;
+
+	u8 vec1_len;
+	struct option_vector1 vec1;
+
+	u8 vec2_len;
+	struct option_vector2 vec2;
+
+	u8 vec3_len;
+	struct option_vector3 vec3;
+
+	u8 vec4_len;
+	struct option_vector4 vec4;
+
+	u8 vec5_len;
+	struct option_vector5 vec5;
+
+	u8 vec6_len;
+	struct option_vector6 vec6;
+
+	u8 vec7_len;
+	struct option_vector7 vec7;
+} __packed;
+
+static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
+	.pvrs = {
+		{
+			.mask = cpu_to_be32(0xfffe0000), /* POWER5/POWER5+ */
+			.val  = cpu_to_be32(0x003a0000),
+		},
+		{
+			.mask = cpu_to_be32(0xffff0000), /* POWER6 */
+			.val  = cpu_to_be32(0x003e0000),
+		},
+		{
+			.mask = cpu_to_be32(0xffff0000), /* POWER7 */
+			.val  = cpu_to_be32(0x003f0000),
+		},
+		{
+			.mask = cpu_to_be32(0xffff0000), /* POWER8E */
+			.val  = cpu_to_be32(0x004b0000),
+		},
+		{
+			.mask = cpu_to_be32(0xffff0000), /* POWER8NVL */
+			.val  = cpu_to_be32(0x004c0000),
+		},
+		{
+			.mask = cpu_to_be32(0xffff0000), /* POWER8 */
+			.val  = cpu_to_be32(0x004d0000),
+		},
+		{
+			.mask = cpu_to_be32(0xffff0000), /* POWER9 */
+			.val  = cpu_to_be32(0x004e0000),
+		},
+		{
+			.mask = cpu_to_be32(0xffff0000), /* POWER10 */
+			.val  = cpu_to_be32(0x00800000),
+		},
+		{
+			.mask = cpu_to_be32(0xffffffff), /* all 3.1-compliant */
+			.val  = cpu_to_be32(0x0f000006),
+		},
+		{
+			.mask = cpu_to_be32(0xffffffff), /* all 3.00-compliant */
+			.val  = cpu_to_be32(0x0f000005),
+		},
+		{
+			.mask = cpu_to_be32(0xffffffff), /* all 2.07-compliant */
+			.val  = cpu_to_be32(0x0f000004),
+		},
+		{
+			.mask = cpu_to_be32(0xffffffff), /* all 2.06-compliant */
+			.val  = cpu_to_be32(0x0f000003),
+		},
+		{
+			.mask = cpu_to_be32(0xffffffff), /* all 2.05-compliant */
+			.val  = cpu_to_be32(0x0f000002),
+		},
+		{
+			.mask = cpu_to_be32(0xfffffffe), /* all 2.04-compliant and earlier */
+			.val  = cpu_to_be32(0x0f000001),
+		},
+	},
+
+	.num_vectors = NUM_VECTORS(6),
+
+	.vec1_len = VECTOR_LENGTH(sizeof(struct option_vector1)),
+	.vec1 = {
+		.byte1 = 0,
+		.arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 |
+				 OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07,
+		.arch_versions3 = OV1_PPC_3_00 | OV1_PPC_3_1,
+	},
+
+	.vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)),
+	/* option vector 2: Open Firmware options supported */
+	.vec2 = {
+		.byte1 = OV2_REAL_MODE,
+		.reserved = 0,
+		.real_base = cpu_to_be32(0xffffffff),
+		.real_size = cpu_to_be32(0xffffffff),
+		.virt_base = cpu_to_be32(0xffffffff),
+		.virt_size = cpu_to_be32(0xffffffff),
+		.load_base = cpu_to_be32(0xffffffff),
+		.min_rma = cpu_to_be32(512),		/* 512MB min RMA */
+		.min_load = cpu_to_be32(0xffffffff),	/* full client load */
+		.min_rma_percent = 0,	/* min RMA percentage of total RAM */
+		.max_pft_size = 48,	/* max log_2(hash table size) */
+	},
+
+	.vec3_len = VECTOR_LENGTH(sizeof(struct option_vector3)),
+	/* option vector 3: processor options supported */
+	.vec3 = {
+		.byte1 = 0,			/* don't ignore, don't halt */
+		.byte2 = OV3_FP | OV3_VMX | OV3_DFP,
+	},
+
+	.vec4_len = VECTOR_LENGTH(sizeof(struct option_vector4)),
+	/* option vector 4: IBM PAPR implementation */
+	.vec4 = {
+		.byte1 = 0,			/* don't halt */
+		.min_vp_cap = OV4_MIN_ENT_CAP,	/* minimum VP entitled capacity */
+	},
+
+	.vec5_len = VECTOR_LENGTH(sizeof(struct option_vector5)),
+	/* option vector 5: PAPR/OF options */
+	.vec5 = {
+		.byte1 = 0,				/* don't ignore, don't halt */
+		.byte2 = OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) |
+		OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) |
+#ifdef CONFIG_PCI_MSI
+		/* PCIe/MSI support.  Without MSI full PCIe is not supported */
+		OV5_FEAT(OV5_MSI),
+#else
+		0,
+#endif
+		.byte3 = 0,
+		.cmo =
+#ifdef CONFIG_PPC_SMLPAR
+		OV5_FEAT(OV5_CMO) | OV5_FEAT(OV5_XCMO),
+#else
+		0,
+#endif
+		.associativity = OV5_FEAT(OV5_FORM1_AFFINITY) | OV5_FEAT(OV5_PRRN) |
+		OV5_FEAT(OV5_FORM2_AFFINITY),
+		.bin_opts = OV5_FEAT(OV5_RESIZE_HPT) | OV5_FEAT(OV5_HP_EVT),
+		.micro_checkpoint = 0,
+		.reserved0 = 0,
+		.max_cpus = cpu_to_be32(NR_CPUS),	/* number of cores supported */
+		.papr_level = 0,
+		.reserved1 = 0,
+		.platform_facilities = OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) | OV5_FEAT(OV5_PFO_HW_842),
+		.reserved2 = 0,
+		.reserved3 = 0,
+		.subprocessors = 1,
+		.byte22 = OV5_FEAT(OV5_DRMEM_V2) | OV5_FEAT(OV5_DRC_INFO),
+		.intarch = 0,
+		.mmu = 0,
+		.hash_ext = 0,
+		.radix_ext = 0,
+	},
+
+	/* option vector 6: IBM PAPR hints */
+	.vec6_len = VECTOR_LENGTH(sizeof(struct option_vector6)),
+	.vec6 = {
+		.reserved = 0,
+		.secondary_pteg = 0,
+		.os_name = OV6_LINUX,
+	},
+
+	/* option vector 7: OS Identification */
+	.vec7_len = VECTOR_LENGTH(sizeof(struct option_vector7)),
+};
+
+static struct ibm_arch_vec __prombss ibm_architecture_vec  ____cacheline_aligned;
+
+/* Old method - ELF header with PT_NOTE sections only works on BE */
+#ifdef __BIG_ENDIAN__
+static const struct fake_elf {
+	Elf32_Ehdr	elfhdr;
+	Elf32_Phdr	phdr[2];
+	struct chrpnote {
+		u32	namesz;
+		u32	descsz;
+		u32	type;
+		char	name[8];	/* "PowerPC" */
+		struct chrpdesc {
+			u32	real_mode;
+			u32	real_base;
+			u32	real_size;
+			u32	virt_base;
+			u32	virt_size;
+			u32	load_base;
+		} chrpdesc;
+	} chrpnote;
+	struct rpanote {
+		u32	namesz;
+		u32	descsz;
+		u32	type;
+		char	name[24];	/* "IBM,RPA-Client-Config" */
+		struct rpadesc {
+			u32	lpar_affinity;
+			u32	min_rmo_size;
+			u32	min_rmo_percent;
+			u32	max_pft_size;
+			u32	splpar;
+			u32	min_load;
+			u32	new_mem_def;
+			u32	ignore_me;
+		} rpadesc;
+	} rpanote;
+} fake_elf __initconst = {
+	.elfhdr = {
+		.e_ident = { 0x7f, 'E', 'L', 'F',
+			     ELFCLASS32, ELFDATA2MSB, EV_CURRENT },
+		.e_type = ET_EXEC,	/* yeah right */
+		.e_machine = EM_PPC,
+		.e_version = EV_CURRENT,
+		.e_phoff = offsetof(struct fake_elf, phdr),
+		.e_phentsize = sizeof(Elf32_Phdr),
+		.e_phnum = 2
+	},
+	.phdr = {
+		[0] = {
+			.p_type = PT_NOTE,
+			.p_offset = offsetof(struct fake_elf, chrpnote),
+			.p_filesz = sizeof(struct chrpnote)
+		}, [1] = {
+			.p_type = PT_NOTE,
+			.p_offset = offsetof(struct fake_elf, rpanote),
+			.p_filesz = sizeof(struct rpanote)
+		}
+	},
+	.chrpnote = {
+		.namesz = sizeof("PowerPC"),
+		.descsz = sizeof(struct chrpdesc),
+		.type = 0x1275,
+		.name = "PowerPC",
+		.chrpdesc = {
+			.real_mode = ~0U,	/* ~0 means "don't care" */
+			.real_base = ~0U,
+			.real_size = ~0U,
+			.virt_base = ~0U,
+			.virt_size = ~0U,
+			.load_base = ~0U
+		},
+	},
+	.rpanote = {
+		.namesz = sizeof("IBM,RPA-Client-Config"),
+		.descsz = sizeof(struct rpadesc),
+		.type = 0x12759999,
+		.name = "IBM,RPA-Client-Config",
+		.rpadesc = {
+			.lpar_affinity = 0,
+			.min_rmo_size = 64,	/* in megabytes */
+			.min_rmo_percent = 0,
+			.max_pft_size = 48,	/* 2^48 bytes max PFT size */
+			.splpar = 1,
+			.min_load = ~0U,
+			.new_mem_def = 0
+		}
+	}
+};
+#endif /* __BIG_ENDIAN__ */
+
+static int __init prom_count_smt_threads(void)
+{
+	phandle node;
+	char type[64];
+	unsigned int plen;
+
+	/* Pick up th first CPU node we can find */
+	for (node = 0; prom_next_node(&node); ) {
+		type[0] = 0;
+		prom_getprop(node, "device_type", type, sizeof(type));
+
+		if (prom_strcmp(type, "cpu"))
+			continue;
+		/*
+		 * There is an entry for each smt thread, each entry being
+		 * 4 bytes long.  All cpus should have the same number of
+		 * smt threads, so return after finding the first.
+		 */
+		plen = prom_getproplen(node, "ibm,ppc-interrupt-server#s");
+		if (plen == PROM_ERROR)
+			break;
+		plen >>= 2;
+		prom_debug("Found %lu smt threads per core\n", (unsigned long)plen);
+
+		/* Sanity check */
+		if (plen < 1 || plen > 64) {
+			prom_printf("Threads per core %lu out of bounds, assuming 1\n",
+				    (unsigned long)plen);
+			return 1;
+		}
+		return plen;
+	}
+	prom_debug("No threads found, assuming 1 per core\n");
+
+	return 1;
+
+}
+
+static void __init prom_parse_mmu_model(u8 val,
+					struct platform_support *support)
+{
+	switch (val) {
+	case OV5_FEAT(OV5_MMU_DYNAMIC):
+	case OV5_FEAT(OV5_MMU_EITHER): /* Either Available */
+		prom_debug("MMU - either supported\n");
+		support->radix_mmu = !prom_radix_disable;
+		support->hash_mmu = true;
+		break;
+	case OV5_FEAT(OV5_MMU_RADIX): /* Only Radix */
+		prom_debug("MMU - radix only\n");
+		if (prom_radix_disable) {
+			/*
+			 * If we __have__ to do radix, we're better off ignoring
+			 * the command line rather than not booting.
+			 */
+			prom_printf("WARNING: Ignoring cmdline option disable_radix\n");
+		}
+		support->radix_mmu = true;
+		break;
+	case OV5_FEAT(OV5_MMU_HASH):
+		prom_debug("MMU - hash only\n");
+		support->hash_mmu = true;
+		break;
+	default:
+		prom_debug("Unknown mmu support option: 0x%x\n", val);
+		break;
+	}
+}
+
+static void __init prom_parse_xive_model(u8 val,
+					 struct platform_support *support)
+{
+	switch (val) {
+	case OV5_FEAT(OV5_XIVE_EITHER): /* Either Available */
+		prom_debug("XIVE - either mode supported\n");
+		support->xive = !prom_xive_disable;
+		break;
+	case OV5_FEAT(OV5_XIVE_EXPLOIT): /* Only Exploitation mode */
+		prom_debug("XIVE - exploitation mode supported\n");
+		if (prom_xive_disable) {
+			/*
+			 * If we __have__ to do XIVE, we're better off ignoring
+			 * the command line rather than not booting.
+			 */
+			prom_printf("WARNING: Ignoring cmdline option xive=off\n");
+		}
+		support->xive = true;
+		break;
+	case OV5_FEAT(OV5_XIVE_LEGACY): /* Only Legacy mode */
+		prom_debug("XIVE - legacy mode supported\n");
+		break;
+	default:
+		prom_debug("Unknown xive support option: 0x%x\n", val);
+		break;
+	}
+}
+
+static void __init prom_parse_platform_support(u8 index, u8 val,
+					       struct platform_support *support)
+{
+	switch (index) {
+	case OV5_INDX(OV5_MMU_SUPPORT): /* MMU Model */
+		prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support);
+		break;
+	case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */
+		if (val & OV5_FEAT(OV5_RADIX_GTSE))
+			support->radix_gtse = !prom_radix_gtse_disable;
+		break;
+	case OV5_INDX(OV5_XIVE_SUPPORT): /* Interrupt mode */
+		prom_parse_xive_model(val & OV5_FEAT(OV5_XIVE_SUPPORT),
+				      support);
+		break;
+	}
+}
+
+static void __init prom_check_platform_support(void)
+{
+	struct platform_support supported = {
+		.hash_mmu = false,
+		.radix_mmu = false,
+		.radix_gtse = false,
+		.xive = false
+	};
+	int prop_len = prom_getproplen(prom.chosen,
+				       "ibm,arch-vec-5-platform-support");
+
+	/*
+	 * First copy the architecture vec template
+	 *
+	 * use memcpy() instead of *vec = *vec_template so that GCC replaces it
+	 * by __memcpy() when KASAN is active
+	 */
+	memcpy(&ibm_architecture_vec, &ibm_architecture_vec_template,
+	       sizeof(ibm_architecture_vec));
+
+	prom_strscpy_pad(ibm_architecture_vec.vec7.os_id, linux_banner, 256);
+
+	if (prop_len > 1) {
+		int i;
+		u8 vec[8];
+		prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n",
+			   prop_len);
+		if (prop_len > sizeof(vec))
+			prom_printf("WARNING: ibm,arch-vec-5-platform-support longer than expected (len: %d)\n",
+				    prop_len);
+		prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", &vec, sizeof(vec));
+		for (i = 0; i < prop_len; i += 2) {
+			prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2, vec[i], vec[i + 1]);
+			prom_parse_platform_support(vec[i], vec[i + 1], &supported);
+		}
+	}
+
+	if (supported.radix_mmu && IS_ENABLED(CONFIG_PPC_RADIX_MMU)) {
+		/* Radix preferred - Check if GTSE is also supported */
+		prom_debug("Asking for radix\n");
+		ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX);
+		if (supported.radix_gtse)
+			ibm_architecture_vec.vec5.radix_ext =
+					OV5_FEAT(OV5_RADIX_GTSE);
+		else
+			prom_debug("Radix GTSE isn't supported\n");
+	} else if (supported.hash_mmu) {
+		/* Default to hash mmu (if we can) */
+		prom_debug("Asking for hash\n");
+		ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_HASH);
+	} else {
+		/* We're probably on a legacy hypervisor */
+		prom_debug("Assuming legacy hash support\n");
+	}
+
+	if (supported.xive) {
+		prom_debug("Asking for XIVE\n");
+		ibm_architecture_vec.vec5.intarch = OV5_FEAT(OV5_XIVE_EXPLOIT);
+	}
+}
+
+static void __init prom_send_capabilities(void)
+{
+	ihandle root;
+	prom_arg_t ret;
+	u32 cores;
+
+	/* Check ibm,arch-vec-5-platform-support and fixup vec5 if required */
+	prom_check_platform_support();
+
+	root = call_prom("open", 1, 1, ADDR("/"));
+	if (root != 0) {
+		/* We need to tell the FW about the number of cores we support.
+		 *
+		 * To do that, we count the number of threads on the first core
+		 * (we assume this is the same for all cores) and use it to
+		 * divide NR_CPUS.
+		 */
+
+		cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads());
+		prom_printf("Max number of cores passed to firmware: %u (NR_CPUS = %d)\n",
+			    cores, NR_CPUS);
+
+		ibm_architecture_vec.vec5.max_cpus = cpu_to_be32(cores);
+
+		/* try calling the ibm,client-architecture-support method */
+		prom_printf("Calling ibm,client-architecture-support...");
+		if (call_prom_ret("call-method", 3, 2, &ret,
+				  ADDR("ibm,client-architecture-support"),
+				  root,
+				  ADDR(&ibm_architecture_vec)) == 0) {
+			/* the call exists... */
+			if (ret)
+				prom_printf("\nWARNING: ibm,client-architecture"
+					    "-support call FAILED!\n");
+			call_prom("close", 1, 0, root);
+			prom_printf(" done\n");
+			return;
+		}
+		call_prom("close", 1, 0, root);
+		prom_printf(" not implemented\n");
+	}
+
+#ifdef __BIG_ENDIAN__
+	{
+		ihandle elfloader;
+
+		/* no ibm,client-architecture-support call, try the old way */
+		elfloader = call_prom("open", 1, 1,
+				      ADDR("/packages/elf-loader"));
+		if (elfloader == 0) {
+			prom_printf("couldn't open /packages/elf-loader\n");
+			return;
+		}
+		call_prom("call-method", 3, 1, ADDR("process-elf-header"),
+			  elfloader, ADDR(&fake_elf));
+		call_prom("close", 1, 0, elfloader);
+	}
+#endif /* __BIG_ENDIAN__ */
+}
+#endif /* CONFIG_PPC_PSERIES */
+
+/*
+ * Memory allocation strategy... our layout is normally:
+ *
+ *  at 14Mb or more we have vmlinux, then a gap and initrd.  In some
+ *  rare cases, initrd might end up being before the kernel though.
+ *  We assume this won't override the final kernel at 0, we have no
+ *  provision to handle that in this version, but it should hopefully
+ *  never happen.
+ *
+ *  alloc_top is set to the top of RMO, eventually shrink down if the
+ *  TCEs overlap
+ *
+ *  alloc_bottom is set to the top of kernel/initrd
+ *
+ *  from there, allocations are done this way : rtas is allocated
+ *  topmost, and the device-tree is allocated from the bottom. We try
+ *  to grow the device-tree allocation as we progress. If we can't,
+ *  then we fail, we don't currently have a facility to restart
+ *  elsewhere, but that shouldn't be necessary.
+ *
+ *  Note that calls to reserve_mem have to be done explicitly, memory
+ *  allocated with either alloc_up or alloc_down isn't automatically
+ *  reserved.
+ */
+
+
+/*
+ * Allocates memory in the RMO upward from the kernel/initrd
+ *
+ * When align is 0, this is a special case, it means to allocate in place
+ * at the current location of alloc_bottom or fail (that is basically
+ * extending the previous allocation). Used for the device-tree flattening
+ */
+static unsigned long __init alloc_up(unsigned long size, unsigned long align)
+{
+	unsigned long base = alloc_bottom;
+	unsigned long addr = 0;
+
+	if (align)
+		base = ALIGN(base, align);
+	prom_debug("%s(%lx, %lx)\n", __func__, size, align);
+	if (ram_top == 0)
+		prom_panic("alloc_up() called with mem not initialized\n");
+
+	if (align)
+		base = ALIGN(alloc_bottom, align);
+	else
+		base = alloc_bottom;
+
+	for(; (base + size) <= alloc_top; 
+	    base = ALIGN(base + 0x100000, align)) {
+		prom_debug("    trying: 0x%lx\n\r", base);
+		addr = (unsigned long)prom_claim(base, size, 0);
+		if (addr != PROM_ERROR && addr != 0)
+			break;
+		addr = 0;
+		if (align == 0)
+			break;
+	}
+	if (addr == 0)
+		return 0;
+	alloc_bottom = addr + size;
+
+	prom_debug(" -> %lx\n", addr);
+	prom_debug("  alloc_bottom : %lx\n", alloc_bottom);
+	prom_debug("  alloc_top    : %lx\n", alloc_top);
+	prom_debug("  alloc_top_hi : %lx\n", alloc_top_high);
+	prom_debug("  rmo_top      : %lx\n", rmo_top);
+	prom_debug("  ram_top      : %lx\n", ram_top);
+
+	return addr;
+}
+
+/*
+ * Allocates memory downward, either from top of RMO, or if highmem
+ * is set, from the top of RAM.  Note that this one doesn't handle
+ * failures.  It does claim memory if highmem is not set.
+ */
+static unsigned long __init alloc_down(unsigned long size, unsigned long align,
+				       int highmem)
+{
+	unsigned long base, addr = 0;
+
+	prom_debug("%s(%lx, %lx, %s)\n", __func__, size, align,
+		   highmem ? "(high)" : "(low)");
+	if (ram_top == 0)
+		prom_panic("alloc_down() called with mem not initialized\n");
+
+	if (highmem) {
+		/* Carve out storage for the TCE table. */
+		addr = ALIGN_DOWN(alloc_top_high - size, align);
+		if (addr <= alloc_bottom)
+			return 0;
+		/* Will we bump into the RMO ? If yes, check out that we
+		 * didn't overlap existing allocations there, if we did,
+		 * we are dead, we must be the first in town !
+		 */
+		if (addr < rmo_top) {
+			/* Good, we are first */
+			if (alloc_top == rmo_top)
+				alloc_top = rmo_top = addr;
+			else
+				return 0;
+		}
+		alloc_top_high = addr;
+		goto bail;
+	}
+
+	base = ALIGN_DOWN(alloc_top - size, align);
+	for (; base > alloc_bottom;
+	     base = ALIGN_DOWN(base - 0x100000, align))  {
+		prom_debug("    trying: 0x%lx\n\r", base);
+		addr = (unsigned long)prom_claim(base, size, 0);
+		if (addr != PROM_ERROR && addr != 0)
+			break;
+		addr = 0;
+	}
+	if (addr == 0)
+		return 0;
+	alloc_top = addr;
+
+ bail:
+	prom_debug(" -> %lx\n", addr);
+	prom_debug("  alloc_bottom : %lx\n", alloc_bottom);
+	prom_debug("  alloc_top    : %lx\n", alloc_top);
+	prom_debug("  alloc_top_hi : %lx\n", alloc_top_high);
+	prom_debug("  rmo_top      : %lx\n", rmo_top);
+	prom_debug("  ram_top      : %lx\n", ram_top);
+
+	return addr;
+}
+
+/*
+ * Parse a "reg" cell
+ */
+static unsigned long __init prom_next_cell(int s, cell_t **cellp)
+{
+	cell_t *p = *cellp;
+	unsigned long r = 0;
+
+	/* Ignore more than 2 cells */
+	while (s > sizeof(unsigned long) / 4) {
+		p++;
+		s--;
+	}
+	r = be32_to_cpu(*p++);
+#ifdef CONFIG_PPC64
+	if (s > 1) {
+		r <<= 32;
+		r |= be32_to_cpu(*(p++));
+	}
+#endif
+	*cellp = p;
+	return r;
+}
+
+/*
+ * Very dumb function for adding to the memory reserve list, but
+ * we don't need anything smarter at this point
+ *
+ * XXX Eventually check for collisions.  They should NEVER happen.
+ * If problems seem to show up, it would be a good start to track
+ * them down.
+ */
+static void __init reserve_mem(u64 base, u64 size)
+{
+	u64 top = base + size;
+	unsigned long cnt = mem_reserve_cnt;
+
+	if (size == 0)
+		return;
+
+	/* We need to always keep one empty entry so that we
+	 * have our terminator with "size" set to 0 since we are
+	 * dumb and just copy this entire array to the boot params
+	 */
+	base = ALIGN_DOWN(base, PAGE_SIZE);
+	top = ALIGN(top, PAGE_SIZE);
+	size = top - base;
+
+	if (cnt >= (MEM_RESERVE_MAP_SIZE - 1))
+		prom_panic("Memory reserve map exhausted !\n");
+	mem_reserve_map[cnt].base = cpu_to_be64(base);
+	mem_reserve_map[cnt].size = cpu_to_be64(size);
+	mem_reserve_cnt = cnt + 1;
+}
+
+/*
+ * Initialize memory allocation mechanism, parse "memory" nodes and
+ * obtain that way the top of memory and RMO to setup out local allocator
+ */
+static void __init prom_init_mem(void)
+{
+	phandle node;
+	char type[64];
+	unsigned int plen;
+	cell_t *p, *endp;
+	__be32 val;
+	u32 rac, rsc;
+
+	/*
+	 * We iterate the memory nodes to find
+	 * 1) top of RMO (first node)
+	 * 2) top of memory
+	 */
+	val = cpu_to_be32(2);
+	prom_getprop(prom.root, "#address-cells", &val, sizeof(val));
+	rac = be32_to_cpu(val);
+	val = cpu_to_be32(1);
+	prom_getprop(prom.root, "#size-cells", &val, sizeof(rsc));
+	rsc = be32_to_cpu(val);
+	prom_debug("root_addr_cells: %x\n", rac);
+	prom_debug("root_size_cells: %x\n", rsc);
+
+	prom_debug("scanning memory:\n");
+
+	for (node = 0; prom_next_node(&node); ) {
+		type[0] = 0;
+		prom_getprop(node, "device_type", type, sizeof(type));
+
+		if (type[0] == 0) {
+			/*
+			 * CHRP Longtrail machines have no device_type
+			 * on the memory node, so check the name instead...
+			 */
+			prom_getprop(node, "name", type, sizeof(type));
+		}
+		if (prom_strcmp(type, "memory"))
+			continue;
+
+		plen = prom_getprop(node, "reg", regbuf, sizeof(regbuf));
+		if (plen > sizeof(regbuf)) {
+			prom_printf("memory node too large for buffer !\n");
+			plen = sizeof(regbuf);
+		}
+		p = regbuf;
+		endp = p + (plen / sizeof(cell_t));
+
+#ifdef DEBUG_PROM
+		memset(prom_scratch, 0, sizeof(prom_scratch));
+		call_prom("package-to-path", 3, 1, node, prom_scratch,
+			  sizeof(prom_scratch) - 1);
+		prom_debug("  node %s :\n", prom_scratch);
+#endif /* DEBUG_PROM */
+
+		while ((endp - p) >= (rac + rsc)) {
+			unsigned long base, size;
+
+			base = prom_next_cell(rac, &p);
+			size = prom_next_cell(rsc, &p);
+
+			if (size == 0)
+				continue;
+			prom_debug("    %lx %lx\n", base, size);
+			if (base == 0 && (of_platform & PLATFORM_LPAR))
+				rmo_top = size;
+			if ((base + size) > ram_top)
+				ram_top = base + size;
+		}
+	}
+
+	alloc_bottom = PAGE_ALIGN((unsigned long)&_end + 0x4000);
+
+	/*
+	 * If prom_memory_limit is set we reduce the upper limits *except* for
+	 * alloc_top_high. This must be the real top of RAM so we can put
+	 * TCE's up there.
+	 */
+
+	alloc_top_high = ram_top;
+
+	if (prom_memory_limit) {
+		if (prom_memory_limit <= alloc_bottom) {
+			prom_printf("Ignoring mem=%lx <= alloc_bottom.\n",
+				    prom_memory_limit);
+			prom_memory_limit = 0;
+		} else if (prom_memory_limit >= ram_top) {
+			prom_printf("Ignoring mem=%lx >= ram_top.\n",
+				    prom_memory_limit);
+			prom_memory_limit = 0;
+		} else {
+			ram_top = prom_memory_limit;
+			rmo_top = min(rmo_top, prom_memory_limit);
+		}
+	}
+
+	/*
+	 * Setup our top alloc point, that is top of RMO or top of
+	 * segment 0 when running non-LPAR.
+	 * Some RS64 machines have buggy firmware where claims up at
+	 * 1GB fail.  Cap at 768MB as a workaround.
+	 * Since 768MB is plenty of room, and we need to cap to something
+	 * reasonable on 32-bit, cap at 768MB on all machines.
+	 */
+	if (!rmo_top)
+		rmo_top = ram_top;
+	rmo_top = min(0x30000000ul, rmo_top);
+	alloc_top = rmo_top;
+	alloc_top_high = ram_top;
+
+	/*
+	 * Check if we have an initrd after the kernel but still inside
+	 * the RMO.  If we do move our bottom point to after it.
+	 */
+	if (prom_initrd_start &&
+	    prom_initrd_start < rmo_top &&
+	    prom_initrd_end > alloc_bottom)
+		alloc_bottom = PAGE_ALIGN(prom_initrd_end);
+
+	prom_printf("memory layout at init:\n");
+	prom_printf("  memory_limit : %lx (16 MB aligned)\n",
+		    prom_memory_limit);
+	prom_printf("  alloc_bottom : %lx\n", alloc_bottom);
+	prom_printf("  alloc_top    : %lx\n", alloc_top);
+	prom_printf("  alloc_top_hi : %lx\n", alloc_top_high);
+	prom_printf("  rmo_top      : %lx\n", rmo_top);
+	prom_printf("  ram_top      : %lx\n", ram_top);
+}
+
+static void __init prom_close_stdin(void)
+{
+	__be32 val;
+	ihandle stdin;
+
+	if (prom_getprop(prom.chosen, "stdin", &val, sizeof(val)) > 0) {
+		stdin = be32_to_cpu(val);
+		call_prom("close", 1, 0, stdin);
+	}
+}
+
+#ifdef CONFIG_PPC_SVM
+static int __init prom_rtas_hcall(uint64_t args)
+{
+	register uint64_t arg1 asm("r3") = H_RTAS;
+	register uint64_t arg2 asm("r4") = args;
+
+	asm volatile("sc 1\n" : "=r" (arg1) :
+			"r" (arg1),
+			"r" (arg2) :);
+	srr_regs_clobbered();
+
+	return arg1;
+}
+
+static struct rtas_args __prombss os_term_args;
+
+static void __init prom_rtas_os_term(char *str)
+{
+	phandle rtas_node;
+	__be32 val;
+	u32 token;
+
+	prom_debug("%s: start...\n", __func__);
+	rtas_node = call_prom("finddevice", 1, 1, ADDR("/rtas"));
+	prom_debug("rtas_node: %x\n", rtas_node);
+	if (!PHANDLE_VALID(rtas_node))
+		return;
+
+	val = 0;
+	prom_getprop(rtas_node, "ibm,os-term", &val, sizeof(val));
+	token = be32_to_cpu(val);
+	prom_debug("ibm,os-term: %x\n", token);
+	if (token == 0)
+		prom_panic("Could not get token for ibm,os-term\n");
+	os_term_args.token = cpu_to_be32(token);
+	os_term_args.nargs = cpu_to_be32(1);
+	os_term_args.nret = cpu_to_be32(1);
+	os_term_args.args[0] = cpu_to_be32(__pa(str));
+	prom_rtas_hcall((uint64_t)&os_term_args);
+}
+#endif /* CONFIG_PPC_SVM */
+
+/*
+ * Allocate room for and instantiate RTAS
+ */
+static void __init prom_instantiate_rtas(void)
+{
+	phandle rtas_node;
+	ihandle rtas_inst;
+	u32 base, entry = 0;
+	__be32 val;
+	u32 size = 0;
+
+	prom_debug("prom_instantiate_rtas: start...\n");
+
+	rtas_node = call_prom("finddevice", 1, 1, ADDR("/rtas"));
+	prom_debug("rtas_node: %x\n", rtas_node);
+	if (!PHANDLE_VALID(rtas_node))
+		return;
+
+	val = 0;
+	prom_getprop(rtas_node, "rtas-size", &val, sizeof(size));
+	size = be32_to_cpu(val);
+	if (size == 0)
+		return;
+
+	base = alloc_down(size, PAGE_SIZE, 0);
+	if (base == 0)
+		prom_panic("Could not allocate memory for RTAS\n");
+
+	rtas_inst = call_prom("open", 1, 1, ADDR("/rtas"));
+	if (!IHANDLE_VALID(rtas_inst)) {
+		prom_printf("opening rtas package failed (%x)\n", rtas_inst);
+		return;
+	}
+
+	prom_printf("instantiating rtas at 0x%x...", base);
+
+	if (call_prom_ret("call-method", 3, 2, &entry,
+			  ADDR("instantiate-rtas"),
+			  rtas_inst, base) != 0
+	    || entry == 0) {
+		prom_printf(" failed\n");
+		return;
+	}
+	prom_printf(" done\n");
+
+	reserve_mem(base, size);
+
+	val = cpu_to_be32(base);
+	prom_setprop(rtas_node, "/rtas", "linux,rtas-base",
+		     &val, sizeof(val));
+	val = cpu_to_be32(entry);
+	prom_setprop(rtas_node, "/rtas", "linux,rtas-entry",
+		     &val, sizeof(val));
+
+	/* Check if it supports "query-cpu-stopped-state" */
+	if (prom_getprop(rtas_node, "query-cpu-stopped-state",
+			 &val, sizeof(val)) != PROM_ERROR)
+		rtas_has_query_cpu_stopped = true;
+
+	prom_debug("rtas base     = 0x%x\n", base);
+	prom_debug("rtas entry    = 0x%x\n", entry);
+	prom_debug("rtas size     = 0x%x\n", size);
+
+	prom_debug("prom_instantiate_rtas: end...\n");
+}
+
+#ifdef CONFIG_PPC64
+/*
+ * Allocate room for and instantiate Stored Measurement Log (SML)
+ */
+static void __init prom_instantiate_sml(void)
+{
+	phandle ibmvtpm_node;
+	ihandle ibmvtpm_inst;
+	u32 entry = 0, size = 0, succ = 0;
+	u64 base;
+	__be32 val;
+
+	prom_debug("prom_instantiate_sml: start...\n");
+
+	ibmvtpm_node = call_prom("finddevice", 1, 1, ADDR("/vdevice/vtpm"));
+	prom_debug("ibmvtpm_node: %x\n", ibmvtpm_node);
+	if (!PHANDLE_VALID(ibmvtpm_node))
+		return;
+
+	ibmvtpm_inst = call_prom("open", 1, 1, ADDR("/vdevice/vtpm"));
+	if (!IHANDLE_VALID(ibmvtpm_inst)) {
+		prom_printf("opening vtpm package failed (%x)\n", ibmvtpm_inst);
+		return;
+	}
+
+	if (prom_getprop(ibmvtpm_node, "ibm,sml-efi-reformat-supported",
+			 &val, sizeof(val)) != PROM_ERROR) {
+		if (call_prom_ret("call-method", 2, 2, &succ,
+				  ADDR("reformat-sml-to-efi-alignment"),
+				  ibmvtpm_inst) != 0 || succ == 0) {
+			prom_printf("Reformat SML to EFI alignment failed\n");
+			return;
+		}
+
+		if (call_prom_ret("call-method", 2, 2, &size,
+				  ADDR("sml-get-allocated-size"),
+				  ibmvtpm_inst) != 0 || size == 0) {
+			prom_printf("SML get allocated size failed\n");
+			return;
+		}
+	} else {
+		if (call_prom_ret("call-method", 2, 2, &size,
+				  ADDR("sml-get-handover-size"),
+				  ibmvtpm_inst) != 0 || size == 0) {
+			prom_printf("SML get handover size failed\n");
+			return;
+		}
+	}
+
+	base = alloc_down(size, PAGE_SIZE, 0);
+	if (base == 0)
+		prom_panic("Could not allocate memory for sml\n");
+
+	prom_printf("instantiating sml at 0x%llx...", base);
+
+	memset((void *)base, 0, size);
+
+	if (call_prom_ret("call-method", 4, 2, &entry,
+			  ADDR("sml-handover"),
+			  ibmvtpm_inst, size, base) != 0 || entry == 0) {
+		prom_printf("SML handover failed\n");
+		return;
+	}
+	prom_printf(" done\n");
+
+	reserve_mem(base, size);
+
+	prom_setprop(ibmvtpm_node, "/vdevice/vtpm", "linux,sml-base",
+		     &base, sizeof(base));
+	prom_setprop(ibmvtpm_node, "/vdevice/vtpm", "linux,sml-size",
+		     &size, sizeof(size));
+
+	prom_debug("sml base     = 0x%llx\n", base);
+	prom_debug("sml size     = 0x%x\n", size);
+
+	prom_debug("prom_instantiate_sml: end...\n");
+}
+
+/*
+ * Allocate room for and initialize TCE tables
+ */
+#ifdef __BIG_ENDIAN__
+static void __init prom_initialize_tce_table(void)
+{
+	phandle node;
+	ihandle phb_node;
+	char compatible[64], type[64], model[64];
+	char *path = prom_scratch;
+	u64 base, align;
+	u32 minalign, minsize;
+	u64 tce_entry, *tce_entryp;
+	u64 local_alloc_top, local_alloc_bottom;
+	u64 i;
+
+	if (prom_iommu_off)
+		return;
+
+	prom_debug("starting prom_initialize_tce_table\n");
+
+	/* Cache current top of allocs so we reserve a single block */
+	local_alloc_top = alloc_top_high;
+	local_alloc_bottom = local_alloc_top;
+
+	/* Search all nodes looking for PHBs. */
+	for (node = 0; prom_next_node(&node); ) {
+		compatible[0] = 0;
+		type[0] = 0;
+		model[0] = 0;
+		prom_getprop(node, "compatible",
+			     compatible, sizeof(compatible));
+		prom_getprop(node, "device_type", type, sizeof(type));
+		prom_getprop(node, "model", model, sizeof(model));
+
+		if ((type[0] == 0) || (prom_strstr(type, "pci") == NULL))
+			continue;
+
+		/* Keep the old logic intact to avoid regression. */
+		if (compatible[0] != 0) {
+			if ((prom_strstr(compatible, "python") == NULL) &&
+			    (prom_strstr(compatible, "Speedwagon") == NULL) &&
+			    (prom_strstr(compatible, "Winnipeg") == NULL))
+				continue;
+		} else if (model[0] != 0) {
+			if ((prom_strstr(model, "ython") == NULL) &&
+			    (prom_strstr(model, "peedwagon") == NULL) &&
+			    (prom_strstr(model, "innipeg") == NULL))
+				continue;
+		}
+
+		if (prom_getprop(node, "tce-table-minalign", &minalign,
+				 sizeof(minalign)) == PROM_ERROR)
+			minalign = 0;
+		if (prom_getprop(node, "tce-table-minsize", &minsize,
+				 sizeof(minsize)) == PROM_ERROR)
+			minsize = 4UL << 20;
+
+		/*
+		 * Even though we read what OF wants, we just set the table
+		 * size to 4 MB.  This is enough to map 2GB of PCI DMA space.
+		 * By doing this, we avoid the pitfalls of trying to DMA to
+		 * MMIO space and the DMA alias hole.
+		 */
+		minsize = 4UL << 20;
+
+		/* Align to the greater of the align or size */
+		align = max(minalign, minsize);
+		base = alloc_down(minsize, align, 1);
+		if (base == 0)
+			prom_panic("ERROR, cannot find space for TCE table.\n");
+		if (base < local_alloc_bottom)
+			local_alloc_bottom = base;
+
+		/* It seems OF doesn't null-terminate the path :-( */
+		memset(path, 0, sizeof(prom_scratch));
+		/* Call OF to setup the TCE hardware */
+		if (call_prom("package-to-path", 3, 1, node,
+			      path, sizeof(prom_scratch) - 1) == PROM_ERROR) {
+			prom_printf("package-to-path failed\n");
+		}
+
+		/* Save away the TCE table attributes for later use. */
+		prom_setprop(node, path, "linux,tce-base", &base, sizeof(base));
+		prom_setprop(node, path, "linux,tce-size", &minsize, sizeof(minsize));
+
+		prom_debug("TCE table: %s\n", path);
+		prom_debug("\tnode = 0x%x\n", node);
+		prom_debug("\tbase = 0x%llx\n", base);
+		prom_debug("\tsize = 0x%x\n", minsize);
+
+		/* Initialize the table to have a one-to-one mapping
+		 * over the allocated size.
+		 */
+		tce_entryp = (u64 *)base;
+		for (i = 0; i < (minsize >> 3) ;tce_entryp++, i++) {
+			tce_entry = (i << PAGE_SHIFT);
+			tce_entry |= 0x3;
+			*tce_entryp = tce_entry;
+		}
+
+		prom_printf("opening PHB %s", path);
+		phb_node = call_prom("open", 1, 1, path);
+		if (phb_node == 0)
+			prom_printf("... failed\n");
+		else
+			prom_printf("... done\n");
+
+		call_prom("call-method", 6, 0, ADDR("set-64-bit-addressing"),
+			  phb_node, -1, minsize,
+			  (u32) base, (u32) (base >> 32));
+		call_prom("close", 1, 0, phb_node);
+	}
+
+	reserve_mem(local_alloc_bottom, local_alloc_top - local_alloc_bottom);
+
+	/* These are only really needed if there is a memory limit in
+	 * effect, but we don't know so export them always. */
+	prom_tce_alloc_start = local_alloc_bottom;
+	prom_tce_alloc_end = local_alloc_top;
+
+	/* Flag the first invalid entry */
+	prom_debug("ending prom_initialize_tce_table\n");
+}
+#endif /* __BIG_ENDIAN__ */
+#endif /* CONFIG_PPC64 */
+
+/*
+ * With CHRP SMP we need to use the OF to start the other processors.
+ * We can't wait until smp_boot_cpus (the OF is trashed by then)
+ * so we have to put the processors into a holding pattern controlled
+ * by the kernel (not OF) before we destroy the OF.
+ *
+ * This uses a chunk of low memory, puts some holding pattern
+ * code there and sends the other processors off to there until
+ * smp_boot_cpus tells them to do something.  The holding pattern
+ * checks that address until its cpu # is there, when it is that
+ * cpu jumps to __secondary_start().  smp_boot_cpus() takes care
+ * of setting those values.
+ *
+ * We also use physical address 0x4 here to tell when a cpu
+ * is in its holding pattern code.
+ *
+ * -- Cort
+ */
+/*
+ * We want to reference the copy of __secondary_hold_* in the
+ * 0 - 0x100 address range
+ */
+#define LOW_ADDR(x)	(((unsigned long) &(x)) & 0xff)
+
+static void __init prom_hold_cpus(void)
+{
+	unsigned long i;
+	phandle node;
+	char type[64];
+	unsigned long *spinloop
+		= (void *) LOW_ADDR(__secondary_hold_spinloop);
+	unsigned long *acknowledge
+		= (void *) LOW_ADDR(__secondary_hold_acknowledge);
+	unsigned long secondary_hold = LOW_ADDR(__secondary_hold);
+
+	/*
+	 * On pseries, if RTAS supports "query-cpu-stopped-state",
+	 * we skip this stage, the CPUs will be started by the
+	 * kernel using RTAS.
+	 */
+	if ((of_platform == PLATFORM_PSERIES ||
+	     of_platform == PLATFORM_PSERIES_LPAR) &&
+	    rtas_has_query_cpu_stopped) {
+		prom_printf("prom_hold_cpus: skipped\n");
+		return;
+	}
+
+	prom_debug("prom_hold_cpus: start...\n");
+	prom_debug("    1) spinloop       = 0x%lx\n", (unsigned long)spinloop);
+	prom_debug("    1) *spinloop      = 0x%lx\n", *spinloop);
+	prom_debug("    1) acknowledge    = 0x%lx\n",
+		   (unsigned long)acknowledge);
+	prom_debug("    1) *acknowledge   = 0x%lx\n", *acknowledge);
+	prom_debug("    1) secondary_hold = 0x%lx\n", secondary_hold);
+
+	/* Set the common spinloop variable, so all of the secondary cpus
+	 * will block when they are awakened from their OF spinloop.
+	 * This must occur for both SMP and non SMP kernels, since OF will
+	 * be trashed when we move the kernel.
+	 */
+	*spinloop = 0;
+
+	/* look for cpus */
+	for (node = 0; prom_next_node(&node); ) {
+		unsigned int cpu_no;
+		__be32 reg;
+
+		type[0] = 0;
+		prom_getprop(node, "device_type", type, sizeof(type));
+		if (prom_strcmp(type, "cpu") != 0)
+			continue;
+
+		/* Skip non-configured cpus. */
+		if (prom_getprop(node, "status", type, sizeof(type)) > 0)
+			if (prom_strcmp(type, "okay") != 0)
+				continue;
+
+		reg = cpu_to_be32(-1); /* make sparse happy */
+		prom_getprop(node, "reg", &reg, sizeof(reg));
+		cpu_no = be32_to_cpu(reg);
+
+		prom_debug("cpu hw idx   = %u\n", cpu_no);
+
+		/* Init the acknowledge var which will be reset by
+		 * the secondary cpu when it awakens from its OF
+		 * spinloop.
+		 */
+		*acknowledge = (unsigned long)-1;
+
+		if (cpu_no != prom.cpu) {
+			/* Primary Thread of non-boot cpu or any thread */
+			prom_printf("starting cpu hw idx %u... ", cpu_no);
+			call_prom("start-cpu", 3, 0, node,
+				  secondary_hold, cpu_no);
+
+			for (i = 0; (i < 100000000) && 
+			     (*acknowledge == ((unsigned long)-1)); i++ )
+				mb();
+
+			if (*acknowledge == cpu_no)
+				prom_printf("done\n");
+			else
+				prom_printf("failed: %lx\n", *acknowledge);
+		}
+#ifdef CONFIG_SMP
+		else
+			prom_printf("boot cpu hw idx %u\n", cpu_no);
+#endif /* CONFIG_SMP */
+	}
+
+	prom_debug("prom_hold_cpus: end...\n");
+}
+
+
+static void __init prom_init_client_services(unsigned long pp)
+{
+	/* Get a handle to the prom entry point before anything else */
+	prom_entry = pp;
+
+	/* get a handle for the stdout device */
+	prom.chosen = call_prom("finddevice", 1, 1, ADDR("/chosen"));
+	if (!PHANDLE_VALID(prom.chosen))
+		prom_panic("cannot find chosen"); /* msg won't be printed :( */
+
+	/* get device tree root */
+	prom.root = call_prom("finddevice", 1, 1, ADDR("/"));
+	if (!PHANDLE_VALID(prom.root))
+		prom_panic("cannot find device tree root"); /* msg won't be printed :( */
+
+	prom.mmumap = 0;
+}
+
+#ifdef CONFIG_PPC32
+/*
+ * For really old powermacs, we need to map things we claim.
+ * For that, we need the ihandle of the mmu.
+ * Also, on the longtrail, we need to work around other bugs.
+ */
+static void __init prom_find_mmu(void)
+{
+	phandle oprom;
+	char version[64];
+
+	oprom = call_prom("finddevice", 1, 1, ADDR("/openprom"));
+	if (!PHANDLE_VALID(oprom))
+		return;
+	if (prom_getprop(oprom, "model", version, sizeof(version)) <= 0)
+		return;
+	version[sizeof(version) - 1] = 0;
+	/* XXX might need to add other versions here */
+	if (prom_strcmp(version, "Open Firmware, 1.0.5") == 0)
+		of_workarounds = OF_WA_CLAIM;
+	else if (prom_strncmp(version, "FirmWorks,3.", 12) == 0) {
+		of_workarounds = OF_WA_CLAIM | OF_WA_LONGTRAIL;
+		call_prom("interpret", 1, 1, "dev /memory 0 to allow-reclaim");
+	} else
+		return;
+	prom.memory = call_prom("open", 1, 1, ADDR("/memory"));
+	prom_getprop(prom.chosen, "mmu", &prom.mmumap,
+		     sizeof(prom.mmumap));
+	prom.mmumap = be32_to_cpu(prom.mmumap);
+	if (!IHANDLE_VALID(prom.memory) || !IHANDLE_VALID(prom.mmumap))
+		of_workarounds &= ~OF_WA_CLAIM;		/* hmmm */
+}
+#else
+#define prom_find_mmu()
+#endif
+
+static void __init prom_init_stdout(void)
+{
+	char *path = of_stdout_device;
+	char type[16];
+	phandle stdout_node;
+	__be32 val;
+
+	if (prom_getprop(prom.chosen, "stdout", &val, sizeof(val)) <= 0)
+		prom_panic("cannot find stdout");
+
+	prom.stdout = be32_to_cpu(val);
+
+	/* Get the full OF pathname of the stdout device */
+	memset(path, 0, 256);
+	call_prom("instance-to-path", 3, 1, prom.stdout, path, 255);
+	prom_printf("OF stdout device is: %s\n", of_stdout_device);
+	prom_setprop(prom.chosen, "/chosen", "linux,stdout-path",
+		     path, prom_strlen(path) + 1);
+
+	/* instance-to-package fails on PA-Semi */
+	stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout);
+	if (stdout_node != PROM_ERROR) {
+		val = cpu_to_be32(stdout_node);
+
+		/* If it's a display, note it */
+		memset(type, 0, sizeof(type));
+		prom_getprop(stdout_node, "device_type", type, sizeof(type));
+		if (prom_strcmp(type, "display") == 0)
+			prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0);
+	}
+}
+
+static int __init prom_find_machine_type(void)
+{
+	static char compat[256] __prombss;
+	int len, i = 0;
+#ifdef CONFIG_PPC64
+	phandle rtas;
+	int x;
+#endif
+
+	/* Look for a PowerMac or a Cell */
+	len = prom_getprop(prom.root, "compatible",
+			   compat, sizeof(compat)-1);
+	if (len > 0) {
+		compat[len] = 0;
+		while (i < len) {
+			char *p = &compat[i];
+			int sl = prom_strlen(p);
+			if (sl == 0)
+				break;
+			if (prom_strstr(p, "Power Macintosh") ||
+			    prom_strstr(p, "MacRISC"))
+				return PLATFORM_POWERMAC;
+#ifdef CONFIG_PPC64
+			/* We must make sure we don't detect the IBM Cell
+			 * blades as pSeries due to some firmware issues,
+			 * so we do it here.
+			 */
+			if (prom_strstr(p, "IBM,CBEA") ||
+			    prom_strstr(p, "IBM,CPBW-1.0"))
+				return PLATFORM_GENERIC;
+#endif /* CONFIG_PPC64 */
+			i += sl + 1;
+		}
+	}
+#ifdef CONFIG_PPC64
+	/* Try to figure out if it's an IBM pSeries or any other
+	 * PAPR compliant platform. We assume it is if :
+	 *  - /device_type is "chrp" (please, do NOT use that for future
+	 *    non-IBM designs !
+	 *  - it has /rtas
+	 */
+	len = prom_getprop(prom.root, "device_type",
+			   compat, sizeof(compat)-1);
+	if (len <= 0)
+		return PLATFORM_GENERIC;
+	if (prom_strcmp(compat, "chrp"))
+		return PLATFORM_GENERIC;
+
+	/* Default to pSeries. We need to know if we are running LPAR */
+	rtas = call_prom("finddevice", 1, 1, ADDR("/rtas"));
+	if (!PHANDLE_VALID(rtas))
+		return PLATFORM_GENERIC;
+	x = prom_getproplen(rtas, "ibm,hypertas-functions");
+	if (x != PROM_ERROR) {
+		prom_debug("Hypertas detected, assuming LPAR !\n");
+		return PLATFORM_PSERIES_LPAR;
+	}
+	return PLATFORM_PSERIES;
+#else
+	return PLATFORM_GENERIC;
+#endif
+}
+
+static int __init prom_set_color(ihandle ih, int i, int r, int g, int b)
+{
+	return call_prom("call-method", 6, 1, ADDR("color!"), ih, i, b, g, r);
+}
+
+/*
+ * If we have a display that we don't know how to drive,
+ * we will want to try to execute OF's open method for it
+ * later.  However, OF will probably fall over if we do that
+ * we've taken over the MMU.
+ * So we check whether we will need to open the display,
+ * and if so, open it now.
+ */
+static void __init prom_check_displays(void)
+{
+	char type[16], *path;
+	phandle node;
+	ihandle ih;
+	int i;
+
+	static const unsigned char default_colors[] __initconst = {
+		0x00, 0x00, 0x00,
+		0x00, 0x00, 0xaa,
+		0x00, 0xaa, 0x00,
+		0x00, 0xaa, 0xaa,
+		0xaa, 0x00, 0x00,
+		0xaa, 0x00, 0xaa,
+		0xaa, 0xaa, 0x00,
+		0xaa, 0xaa, 0xaa,
+		0x55, 0x55, 0x55,
+		0x55, 0x55, 0xff,
+		0x55, 0xff, 0x55,
+		0x55, 0xff, 0xff,
+		0xff, 0x55, 0x55,
+		0xff, 0x55, 0xff,
+		0xff, 0xff, 0x55,
+		0xff, 0xff, 0xff
+	};
+	const unsigned char *clut;
+
+	prom_debug("Looking for displays\n");
+	for (node = 0; prom_next_node(&node); ) {
+		memset(type, 0, sizeof(type));
+		prom_getprop(node, "device_type", type, sizeof(type));
+		if (prom_strcmp(type, "display") != 0)
+			continue;
+
+		/* It seems OF doesn't null-terminate the path :-( */
+		path = prom_scratch;
+		memset(path, 0, sizeof(prom_scratch));
+
+		/*
+		 * leave some room at the end of the path for appending extra
+		 * arguments
+		 */
+		if (call_prom("package-to-path", 3, 1, node, path,
+			      sizeof(prom_scratch) - 10) == PROM_ERROR)
+			continue;
+		prom_printf("found display   : %s, opening... ", path);
+		
+		ih = call_prom("open", 1, 1, path);
+		if (ih == 0) {
+			prom_printf("failed\n");
+			continue;
+		}
+
+		/* Success */
+		prom_printf("done\n");
+		prom_setprop(node, path, "linux,opened", NULL, 0);
+
+		/* Setup a usable color table when the appropriate
+		 * method is available. Should update this to set-colors */
+		clut = default_colors;
+		for (i = 0; i < 16; i++, clut += 3)
+			if (prom_set_color(ih, i, clut[0], clut[1],
+					   clut[2]) != 0)
+				break;
+
+#ifdef CONFIG_LOGO_LINUX_CLUT224
+		clut = PTRRELOC(logo_linux_clut224.clut);
+		for (i = 0; i < logo_linux_clut224.clutsize; i++, clut += 3)
+			if (prom_set_color(ih, i + 32, clut[0], clut[1],
+					   clut[2]) != 0)
+				break;
+#endif /* CONFIG_LOGO_LINUX_CLUT224 */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
+		if (prom_getprop(node, "linux,boot-display", NULL, 0) !=
+		    PROM_ERROR) {
+			u32 width, height, pitch, addr;
+
+			prom_printf("Setting btext !\n");
+
+			if (prom_getprop(node, "width", &width, 4) == PROM_ERROR)
+				return;
+
+			if (prom_getprop(node, "height", &height, 4) == PROM_ERROR)
+				return;
+
+			if (prom_getprop(node, "linebytes", &pitch, 4) == PROM_ERROR)
+				return;
+
+			if (prom_getprop(node, "address", &addr, 4) == PROM_ERROR)
+				return;
+
+			prom_printf("W=%d H=%d LB=%d addr=0x%x\n",
+				    width, height, pitch, addr);
+			btext_setup_display(width, height, 8, pitch, addr);
+			btext_prepare_BAT();
+		}
+#endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
+	}
+}
+
+
+/* Return (relocated) pointer to this much memory: moves initrd if reqd. */
+static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end,
+			      unsigned long needed, unsigned long align)
+{
+	void *ret;
+
+	*mem_start = ALIGN(*mem_start, align);
+	while ((*mem_start + needed) > *mem_end) {
+		unsigned long room, chunk;
+
+		prom_debug("Chunk exhausted, claiming more at %lx...\n",
+			   alloc_bottom);
+		room = alloc_top - alloc_bottom;
+		if (room > DEVTREE_CHUNK_SIZE)
+			room = DEVTREE_CHUNK_SIZE;
+		if (room < PAGE_SIZE)
+			prom_panic("No memory for flatten_device_tree "
+				   "(no room)\n");
+		chunk = alloc_up(room, 0);
+		if (chunk == 0)
+			prom_panic("No memory for flatten_device_tree "
+				   "(claim failed)\n");
+		*mem_end = chunk + room;
+	}
+
+	ret = (void *)*mem_start;
+	*mem_start += needed;
+
+	return ret;
+}
+
+#define dt_push_token(token, mem_start, mem_end) do { 			\
+		void *room = make_room(mem_start, mem_end, 4, 4);	\
+		*(__be32 *)room = cpu_to_be32(token);			\
+	} while(0)
+
+static unsigned long __init dt_find_string(char *str)
+{
+	char *s, *os;
+
+	s = os = (char *)dt_string_start;
+	s += 4;
+	while (s <  (char *)dt_string_end) {
+		if (prom_strcmp(s, str) == 0)
+			return s - os;
+		s += prom_strlen(s) + 1;
+	}
+	return 0;
+}
+
+/*
+ * The Open Firmware 1275 specification states properties must be 31 bytes or
+ * less, however not all firmwares obey this. Make it 64 bytes to be safe.
+ */
+#define MAX_PROPERTY_NAME 64
+
+static void __init scan_dt_build_strings(phandle node,
+					 unsigned long *mem_start,
+					 unsigned long *mem_end)
+{
+	char *prev_name, *namep, *sstart;
+	unsigned long soff;
+	phandle child;
+
+	sstart =  (char *)dt_string_start;
+
+	/* get and store all property names */
+	prev_name = "";
+	for (;;) {
+		/* 64 is max len of name including nul. */
+		namep = make_room(mem_start, mem_end, MAX_PROPERTY_NAME, 1);
+		if (call_prom("nextprop", 3, 1, node, prev_name, namep) != 1) {
+			/* No more nodes: unwind alloc */
+			*mem_start = (unsigned long)namep;
+			break;
+		}
+
+ 		/* skip "name" */
+		if (prom_strcmp(namep, "name") == 0) {
+ 			*mem_start = (unsigned long)namep;
+ 			prev_name = "name";
+ 			continue;
+ 		}
+		/* get/create string entry */
+		soff = dt_find_string(namep);
+		if (soff != 0) {
+			*mem_start = (unsigned long)namep;
+			namep = sstart + soff;
+		} else {
+			/* Trim off some if we can */
+			*mem_start = (unsigned long)namep + prom_strlen(namep) + 1;
+			dt_string_end = *mem_start;
+		}
+		prev_name = namep;
+	}
+
+	/* do all our children */
+	child = call_prom("child", 1, 1, node);
+	while (child != 0) {
+		scan_dt_build_strings(child, mem_start, mem_end);
+		child = call_prom("peer", 1, 1, child);
+	}
+}
+
+static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
+					unsigned long *mem_end)
+{
+	phandle child;
+	char *namep, *prev_name, *sstart, *p, *ep, *lp, *path;
+	unsigned long soff;
+	unsigned char *valp;
+	static char pname[MAX_PROPERTY_NAME] __prombss;
+	int l, room, has_phandle = 0;
+
+	dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end);
+
+	/* get the node's full name */
+	namep = (char *)*mem_start;
+	room = *mem_end - *mem_start;
+	if (room > 255)
+		room = 255;
+	l = call_prom("package-to-path", 3, 1, node, namep, room);
+	if (l >= 0) {
+		/* Didn't fit?  Get more room. */
+		if (l >= room) {
+			if (l >= *mem_end - *mem_start)
+				namep = make_room(mem_start, mem_end, l+1, 1);
+			call_prom("package-to-path", 3, 1, node, namep, l);
+		}
+		namep[l] = '\0';
+
+		/* Fixup an Apple bug where they have bogus \0 chars in the
+		 * middle of the path in some properties, and extract
+		 * the unit name (everything after the last '/').
+		 */
+		for (lp = p = namep, ep = namep + l; p < ep; p++) {
+			if (*p == '/')
+				lp = namep;
+			else if (*p != 0)
+				*lp++ = *p;
+		}
+		*lp = 0;
+		*mem_start = ALIGN((unsigned long)lp + 1, 4);
+	}
+
+	/* get it again for debugging */
+	path = prom_scratch;
+	memset(path, 0, sizeof(prom_scratch));
+	call_prom("package-to-path", 3, 1, node, path, sizeof(prom_scratch) - 1);
+
+	/* get and store all properties */
+	prev_name = "";
+	sstart = (char *)dt_string_start;
+	for (;;) {
+		if (call_prom("nextprop", 3, 1, node, prev_name,
+			      pname) != 1)
+			break;
+
+ 		/* skip "name" */
+		if (prom_strcmp(pname, "name") == 0) {
+ 			prev_name = "name";
+ 			continue;
+ 		}
+
+		/* find string offset */
+		soff = dt_find_string(pname);
+		if (soff == 0) {
+			prom_printf("WARNING: Can't find string index for"
+				    " <%s>, node %s\n", pname, path);
+			break;
+		}
+		prev_name = sstart + soff;
+
+		/* get length */
+		l = call_prom("getproplen", 2, 1, node, pname);
+
+		/* sanity checks */
+		if (l == PROM_ERROR)
+			continue;
+
+		/* push property head */
+		dt_push_token(OF_DT_PROP, mem_start, mem_end);
+		dt_push_token(l, mem_start, mem_end);
+		dt_push_token(soff, mem_start, mem_end);
+
+		/* push property content */
+		valp = make_room(mem_start, mem_end, l, 4);
+		call_prom("getprop", 4, 1, node, pname, valp, l);
+		*mem_start = ALIGN(*mem_start, 4);
+
+		if (!prom_strcmp(pname, "phandle"))
+			has_phandle = 1;
+	}
+
+	/* Add a "phandle" property if none already exist */
+	if (!has_phandle) {
+		soff = dt_find_string("phandle");
+		if (soff == 0)
+			prom_printf("WARNING: Can't find string index for <phandle> node %s\n", path);
+		else {
+			dt_push_token(OF_DT_PROP, mem_start, mem_end);
+			dt_push_token(4, mem_start, mem_end);
+			dt_push_token(soff, mem_start, mem_end);
+			valp = make_room(mem_start, mem_end, 4, 4);
+			*(__be32 *)valp = cpu_to_be32(node);
+		}
+	}
+
+	/* do all our children */
+	child = call_prom("child", 1, 1, node);
+	while (child != 0) {
+		scan_dt_build_struct(child, mem_start, mem_end);
+		child = call_prom("peer", 1, 1, child);
+	}
+
+	dt_push_token(OF_DT_END_NODE, mem_start, mem_end);
+}
+
+static void __init flatten_device_tree(void)
+{
+	phandle root;
+	unsigned long mem_start, mem_end, room;
+	struct boot_param_header *hdr;
+	char *namep;
+	u64 *rsvmap;
+
+	/*
+	 * Check how much room we have between alloc top & bottom (+/- a
+	 * few pages), crop to 1MB, as this is our "chunk" size
+	 */
+	room = alloc_top - alloc_bottom - 0x4000;
+	if (room > DEVTREE_CHUNK_SIZE)
+		room = DEVTREE_CHUNK_SIZE;
+	prom_debug("starting device tree allocs at %lx\n", alloc_bottom);
+
+	/* Now try to claim that */
+	mem_start = (unsigned long)alloc_up(room, PAGE_SIZE);
+	if (mem_start == 0)
+		prom_panic("Can't allocate initial device-tree chunk\n");
+	mem_end = mem_start + room;
+
+	/* Get root of tree */
+	root = call_prom("peer", 1, 1, (phandle)0);
+	if (root == (phandle)0)
+		prom_panic ("couldn't get device tree root\n");
+
+	/* Build header and make room for mem rsv map */ 
+	mem_start = ALIGN(mem_start, 4);
+	hdr = make_room(&mem_start, &mem_end,
+			sizeof(struct boot_param_header), 4);
+	dt_header_start = (unsigned long)hdr;
+	rsvmap = make_room(&mem_start, &mem_end, sizeof(mem_reserve_map), 8);
+
+	/* Start of strings */
+	mem_start = PAGE_ALIGN(mem_start);
+	dt_string_start = mem_start;
+	mem_start += 4; /* hole */
+
+	/* Add "phandle" in there, we'll need it */
+	namep = make_room(&mem_start, &mem_end, 16, 1);
+	prom_strscpy_pad(namep, "phandle", sizeof("phandle"));
+	mem_start = (unsigned long)namep + prom_strlen(namep) + 1;
+
+	/* Build string array */
+	prom_printf("Building dt strings...\n"); 
+	scan_dt_build_strings(root, &mem_start, &mem_end);
+	dt_string_end = mem_start;
+
+	/* Build structure */
+	mem_start = PAGE_ALIGN(mem_start);
+	dt_struct_start = mem_start;
+	prom_printf("Building dt structure...\n"); 
+	scan_dt_build_struct(root, &mem_start, &mem_end);
+	dt_push_token(OF_DT_END, &mem_start, &mem_end);
+	dt_struct_end = PAGE_ALIGN(mem_start);
+
+	/* Finish header */
+	hdr->boot_cpuid_phys = cpu_to_be32(prom.cpu);
+	hdr->magic = cpu_to_be32(OF_DT_HEADER);
+	hdr->totalsize = cpu_to_be32(dt_struct_end - dt_header_start);
+	hdr->off_dt_struct = cpu_to_be32(dt_struct_start - dt_header_start);
+	hdr->off_dt_strings = cpu_to_be32(dt_string_start - dt_header_start);
+	hdr->dt_strings_size = cpu_to_be32(dt_string_end - dt_string_start);
+	hdr->off_mem_rsvmap = cpu_to_be32(((unsigned long)rsvmap) - dt_header_start);
+	hdr->version = cpu_to_be32(OF_DT_VERSION);
+	/* Version 16 is not backward compatible */
+	hdr->last_comp_version = cpu_to_be32(0x10);
+
+	/* Copy the reserve map in */
+	memcpy(rsvmap, mem_reserve_map, sizeof(mem_reserve_map));
+
+#ifdef DEBUG_PROM
+	{
+		int i;
+		prom_printf("reserved memory map:\n");
+		for (i = 0; i < mem_reserve_cnt; i++)
+			prom_printf("  %llx - %llx\n",
+				    be64_to_cpu(mem_reserve_map[i].base),
+				    be64_to_cpu(mem_reserve_map[i].size));
+	}
+#endif
+	/* Bump mem_reserve_cnt to cause further reservations to fail
+	 * since it's too late.
+	 */
+	mem_reserve_cnt = MEM_RESERVE_MAP_SIZE;
+
+	prom_printf("Device tree strings 0x%lx -> 0x%lx\n",
+		    dt_string_start, dt_string_end);
+	prom_printf("Device tree struct  0x%lx -> 0x%lx\n",
+		    dt_struct_start, dt_struct_end);
+}
+
+#ifdef CONFIG_PPC_MAPLE
+/* PIBS Version 1.05.0000 04/26/2005 has an incorrect /ht/isa/ranges property.
+ * The values are bad, and it doesn't even have the right number of cells. */
+static void __init fixup_device_tree_maple(void)
+{
+	phandle isa;
+	u32 rloc = 0x01002000; /* IO space; PCI device = 4 */
+	u32 isa_ranges[6];
+	char *name;
+
+	name = "/ht@0/isa@4";
+	isa = call_prom("finddevice", 1, 1, ADDR(name));
+	if (!PHANDLE_VALID(isa)) {
+		name = "/ht@0/isa@6";
+		isa = call_prom("finddevice", 1, 1, ADDR(name));
+		rloc = 0x01003000; /* IO space; PCI device = 6 */
+	}
+	if (!PHANDLE_VALID(isa))
+		return;
+
+	if (prom_getproplen(isa, "ranges") != 12)
+		return;
+	if (prom_getprop(isa, "ranges", isa_ranges, sizeof(isa_ranges))
+		== PROM_ERROR)
+		return;
+
+	if (isa_ranges[0] != 0x1 ||
+		isa_ranges[1] != 0xf4000000 ||
+		isa_ranges[2] != 0x00010000)
+		return;
+
+	prom_printf("Fixing up bogus ISA range on Maple/Apache...\n");
+
+	isa_ranges[0] = 0x1;
+	isa_ranges[1] = 0x0;
+	isa_ranges[2] = rloc;
+	isa_ranges[3] = 0x0;
+	isa_ranges[4] = 0x0;
+	isa_ranges[5] = 0x00010000;
+	prom_setprop(isa, name, "ranges",
+			isa_ranges, sizeof(isa_ranges));
+}
+
+#define CPC925_MC_START		0xf8000000
+#define CPC925_MC_LENGTH	0x1000000
+/* The values for memory-controller don't have right number of cells */
+static void __init fixup_device_tree_maple_memory_controller(void)
+{
+	phandle mc;
+	u32 mc_reg[4];
+	char *name = "/hostbridge@f8000000";
+	u32 ac, sc;
+
+	mc = call_prom("finddevice", 1, 1, ADDR(name));
+	if (!PHANDLE_VALID(mc))
+		return;
+
+	if (prom_getproplen(mc, "reg") != 8)
+		return;
+
+	prom_getprop(prom.root, "#address-cells", &ac, sizeof(ac));
+	prom_getprop(prom.root, "#size-cells", &sc, sizeof(sc));
+	if ((ac != 2) || (sc != 2))
+		return;
+
+	if (prom_getprop(mc, "reg", mc_reg, sizeof(mc_reg)) == PROM_ERROR)
+		return;
+
+	if (mc_reg[0] != CPC925_MC_START || mc_reg[1] != CPC925_MC_LENGTH)
+		return;
+
+	prom_printf("Fixing up bogus hostbridge on Maple...\n");
+
+	mc_reg[0] = 0x0;
+	mc_reg[1] = CPC925_MC_START;
+	mc_reg[2] = 0x0;
+	mc_reg[3] = CPC925_MC_LENGTH;
+	prom_setprop(mc, name, "reg", mc_reg, sizeof(mc_reg));
+}
+#else
+#define fixup_device_tree_maple()
+#define fixup_device_tree_maple_memory_controller()
+#endif
+
+#ifdef CONFIG_PPC_CHRP
+/*
+ * Pegasos and BriQ lacks the "ranges" property in the isa node
+ * Pegasos needs decimal IRQ 14/15, not hexadecimal
+ * Pegasos has the IDE configured in legacy mode, but advertised as native
+ */
+static void __init fixup_device_tree_chrp(void)
+{
+	phandle ph;
+	u32 prop[6];
+	u32 rloc = 0x01006000; /* IO space; PCI device = 12 */
+	char *name;
+	int rc;
+
+	name = "/pci@80000000/isa@c";
+	ph = call_prom("finddevice", 1, 1, ADDR(name));
+	if (!PHANDLE_VALID(ph)) {
+		name = "/pci@ff500000/isa@6";
+		ph = call_prom("finddevice", 1, 1, ADDR(name));
+		rloc = 0x01003000; /* IO space; PCI device = 6 */
+	}
+	if (PHANDLE_VALID(ph)) {
+		rc = prom_getproplen(ph, "ranges");
+		if (rc == 0 || rc == PROM_ERROR) {
+			prom_printf("Fixing up missing ISA range on Pegasos...\n");
+
+			prop[0] = 0x1;
+			prop[1] = 0x0;
+			prop[2] = rloc;
+			prop[3] = 0x0;
+			prop[4] = 0x0;
+			prop[5] = 0x00010000;
+			prom_setprop(ph, name, "ranges", prop, sizeof(prop));
+		}
+	}
+
+	name = "/pci@80000000/ide@C,1";
+	ph = call_prom("finddevice", 1, 1, ADDR(name));
+	if (PHANDLE_VALID(ph)) {
+		prom_printf("Fixing up IDE interrupt on Pegasos...\n");
+		prop[0] = 14;
+		prop[1] = 0x0;
+		prom_setprop(ph, name, "interrupts", prop, 2*sizeof(u32));
+		prom_printf("Fixing up IDE class-code on Pegasos...\n");
+		rc = prom_getprop(ph, "class-code", prop, sizeof(u32));
+		if (rc == sizeof(u32)) {
+			prop[0] &= ~0x5;
+			prom_setprop(ph, name, "class-code", prop, sizeof(u32));
+		}
+	}
+}
+#else
+#define fixup_device_tree_chrp()
+#endif
+
+#if defined(CONFIG_PPC64) && defined(CONFIG_PPC_PMAC)
+static void __init fixup_device_tree_pmac(void)
+{
+	phandle u3, i2c, mpic;
+	u32 u3_rev;
+	u32 interrupts[2];
+	u32 parent;
+
+	/* Some G5s have a missing interrupt definition, fix it up here */
+	u3 = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000"));
+	if (!PHANDLE_VALID(u3))
+		return;
+	i2c = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000/i2c@f8001000"));
+	if (!PHANDLE_VALID(i2c))
+		return;
+	mpic = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000/mpic@f8040000"));
+	if (!PHANDLE_VALID(mpic))
+		return;
+
+	/* check if proper rev of u3 */
+	if (prom_getprop(u3, "device-rev", &u3_rev, sizeof(u3_rev))
+	    == PROM_ERROR)
+		return;
+	if (u3_rev < 0x35 || u3_rev > 0x39)
+		return;
+	/* does it need fixup ? */
+	if (prom_getproplen(i2c, "interrupts") > 0)
+		return;
+
+	prom_printf("fixing up bogus interrupts for u3 i2c...\n");
+
+	/* interrupt on this revision of u3 is number 0 and level */
+	interrupts[0] = 0;
+	interrupts[1] = 1;
+	prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupts",
+		     &interrupts, sizeof(interrupts));
+	parent = (u32)mpic;
+	prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupt-parent",
+		     &parent, sizeof(parent));
+}
+#else
+#define fixup_device_tree_pmac()
+#endif
+
+#ifdef CONFIG_PPC_EFIKA
+/*
+ * The MPC5200 FEC driver requires an phy-handle property to tell it how
+ * to talk to the phy.  If the phy-handle property is missing, then this
+ * function is called to add the appropriate nodes and link it to the
+ * ethernet node.
+ */
+static void __init fixup_device_tree_efika_add_phy(void)
+{
+	u32 node;
+	char prop[64];
+	int rv;
+
+	/* Check if /builtin/ethernet exists - bail if it doesn't */
+	node = call_prom("finddevice", 1, 1, ADDR("/builtin/ethernet"));
+	if (!PHANDLE_VALID(node))
+		return;
+
+	/* Check if the phy-handle property exists - bail if it does */
+	rv = prom_getprop(node, "phy-handle", prop, sizeof(prop));
+	if (rv <= 0)
+		return;
+
+	/*
+	 * At this point the ethernet device doesn't have a phy described.
+	 * Now we need to add the missing phy node and linkage
+	 */
+
+	/* Check for an MDIO bus node - if missing then create one */
+	node = call_prom("finddevice", 1, 1, ADDR("/builtin/mdio"));
+	if (!PHANDLE_VALID(node)) {
+		prom_printf("Adding Ethernet MDIO node\n");
+		call_prom("interpret", 1, 1,
+			" s\" /builtin\" find-device"
+			" new-device"
+				" 1 encode-int s\" #address-cells\" property"
+				" 0 encode-int s\" #size-cells\" property"
+				" s\" mdio\" device-name"
+				" s\" fsl,mpc5200b-mdio\" encode-string"
+				" s\" compatible\" property"
+				" 0xf0003000 0x400 reg"
+				" 0x2 encode-int"
+				" 0x5 encode-int encode+"
+				" 0x3 encode-int encode+"
+				" s\" interrupts\" property"
+			" finish-device");
+	}
+
+	/* Check for a PHY device node - if missing then create one and
+	 * give it's phandle to the ethernet node */
+	node = call_prom("finddevice", 1, 1,
+			 ADDR("/builtin/mdio/ethernet-phy"));
+	if (!PHANDLE_VALID(node)) {
+		prom_printf("Adding Ethernet PHY node\n");
+		call_prom("interpret", 1, 1,
+			" s\" /builtin/mdio\" find-device"
+			" new-device"
+				" s\" ethernet-phy\" device-name"
+				" 0x10 encode-int s\" reg\" property"
+				" my-self"
+				" ihandle>phandle"
+			" finish-device"
+			" s\" /builtin/ethernet\" find-device"
+				" encode-int"
+				" s\" phy-handle\" property"
+			" device-end");
+	}
+}
+
+static void __init fixup_device_tree_efika(void)
+{
+	int sound_irq[3] = { 2, 2, 0 };
+	int bcomm_irq[3*16] = { 3,0,0, 3,1,0, 3,2,0, 3,3,0,
+				3,4,0, 3,5,0, 3,6,0, 3,7,0,
+				3,8,0, 3,9,0, 3,10,0, 3,11,0,
+				3,12,0, 3,13,0, 3,14,0, 3,15,0 };
+	u32 node;
+	char prop[64];
+	int rv, len;
+
+	/* Check if we're really running on a EFIKA */
+	node = call_prom("finddevice", 1, 1, ADDR("/"));
+	if (!PHANDLE_VALID(node))
+		return;
+
+	rv = prom_getprop(node, "model", prop, sizeof(prop));
+	if (rv == PROM_ERROR)
+		return;
+	if (prom_strcmp(prop, "EFIKA5K2"))
+		return;
+
+	prom_printf("Applying EFIKA device tree fixups\n");
+
+	/* Claiming to be 'chrp' is death */
+	node = call_prom("finddevice", 1, 1, ADDR("/"));
+	rv = prom_getprop(node, "device_type", prop, sizeof(prop));
+	if (rv != PROM_ERROR && (prom_strcmp(prop, "chrp") == 0))
+		prom_setprop(node, "/", "device_type", "efika", sizeof("efika"));
+
+	/* CODEGEN,description is exposed in /proc/cpuinfo so
+	   fix that too */
+	rv = prom_getprop(node, "CODEGEN,description", prop, sizeof(prop));
+	if (rv != PROM_ERROR && (prom_strstr(prop, "CHRP")))
+		prom_setprop(node, "/", "CODEGEN,description",
+			     "Efika 5200B PowerPC System",
+			     sizeof("Efika 5200B PowerPC System"));
+
+	/* Fixup bestcomm interrupts property */
+	node = call_prom("finddevice", 1, 1, ADDR("/builtin/bestcomm"));
+	if (PHANDLE_VALID(node)) {
+		len = prom_getproplen(node, "interrupts");
+		if (len == 12) {
+			prom_printf("Fixing bestcomm interrupts property\n");
+			prom_setprop(node, "/builtin/bestcom", "interrupts",
+				     bcomm_irq, sizeof(bcomm_irq));
+		}
+	}
+
+	/* Fixup sound interrupts property */
+	node = call_prom("finddevice", 1, 1, ADDR("/builtin/sound"));
+	if (PHANDLE_VALID(node)) {
+		rv = prom_getprop(node, "interrupts", prop, sizeof(prop));
+		if (rv == PROM_ERROR) {
+			prom_printf("Adding sound interrupts property\n");
+			prom_setprop(node, "/builtin/sound", "interrupts",
+				     sound_irq, sizeof(sound_irq));
+		}
+	}
+
+	/* Make sure ethernet phy-handle property exists */
+	fixup_device_tree_efika_add_phy();
+}
+#else
+#define fixup_device_tree_efika()
+#endif
+
+#ifdef CONFIG_PPC_PASEMI_NEMO
+/*
+ * CFE supplied on Nemo is broken in several ways, biggest
+ * problem is that it reassigns ISA interrupts to unused mpic ints.
+ * Add an interrupt-controller property for the io-bridge to use
+ * and correct the ints so we can attach them to an irq_domain
+ */
+static void __init fixup_device_tree_pasemi(void)
+{
+	u32 interrupts[2], parent, rval, val = 0;
+	char *name, *pci_name;
+	phandle iob, node;
+
+	/* Find the root pci node */
+	name = "/pxp@0,e0000000";
+	iob = call_prom("finddevice", 1, 1, ADDR(name));
+	if (!PHANDLE_VALID(iob))
+		return;
+
+	/* check if interrupt-controller node set yet */
+	if (prom_getproplen(iob, "interrupt-controller") !=PROM_ERROR)
+		return;
+
+	prom_printf("adding interrupt-controller property for SB600...\n");
+
+	prom_setprop(iob, name, "interrupt-controller", &val, 0);
+
+	pci_name = "/pxp@0,e0000000/pci@11";
+	node = call_prom("finddevice", 1, 1, ADDR(pci_name));
+	parent = ADDR(iob);
+
+	for( ; prom_next_node(&node); ) {
+		/* scan each node for one with an interrupt */
+		if (!PHANDLE_VALID(node))
+			continue;
+
+		rval = prom_getproplen(node, "interrupts");
+		if (rval == 0 || rval == PROM_ERROR)
+			continue;
+
+		prom_getprop(node, "interrupts", &interrupts, sizeof(interrupts));
+		if ((interrupts[0] < 212) || (interrupts[0] > 222))
+			continue;
+
+		/* found a node, update both interrupts and interrupt-parent */
+		if ((interrupts[0] >= 212) && (interrupts[0] <= 215))
+			interrupts[0] -= 203;
+		if ((interrupts[0] >= 216) && (interrupts[0] <= 220))
+			interrupts[0] -= 213;
+		if (interrupts[0] == 221)
+			interrupts[0] = 14;
+		if (interrupts[0] == 222)
+			interrupts[0] = 8;
+
+		prom_setprop(node, pci_name, "interrupts", interrupts,
+					sizeof(interrupts));
+		prom_setprop(node, pci_name, "interrupt-parent", &parent,
+					sizeof(parent));
+	}
+
+	/*
+	 * The io-bridge has device_type set to 'io-bridge' change it to 'isa'
+	 * so that generic isa-bridge code can add the SB600 and its on-board
+	 * peripherals.
+	 */
+	name = "/pxp@0,e0000000/io-bridge@0";
+	iob = call_prom("finddevice", 1, 1, ADDR(name));
+	if (!PHANDLE_VALID(iob))
+		return;
+
+	/* device_type is already set, just change it. */
+
+	prom_printf("Changing device_type of SB600 node...\n");
+
+	prom_setprop(iob, name, "device_type", "isa", sizeof("isa"));
+}
+#else	/* !CONFIG_PPC_PASEMI_NEMO */
+static inline void fixup_device_tree_pasemi(void) { }
+#endif
+
+static void __init fixup_device_tree(void)
+{
+	fixup_device_tree_maple();
+	fixup_device_tree_maple_memory_controller();
+	fixup_device_tree_chrp();
+	fixup_device_tree_pmac();
+	fixup_device_tree_efika();
+	fixup_device_tree_pasemi();
+}
+
+static void __init prom_find_boot_cpu(void)
+{
+	__be32 rval;
+	ihandle prom_cpu;
+	phandle cpu_pkg;
+
+	rval = 0;
+	if (prom_getprop(prom.chosen, "cpu", &rval, sizeof(rval)) <= 0)
+		return;
+	prom_cpu = be32_to_cpu(rval);
+
+	cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu);
+
+	if (!PHANDLE_VALID(cpu_pkg))
+		return;
+
+	prom_getprop(cpu_pkg, "reg", &rval, sizeof(rval));
+	prom.cpu = be32_to_cpu(rval);
+
+	prom_debug("Booting CPU hw index = %d\n", prom.cpu);
+}
+
+static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (r3 && r4 && r4 != 0xdeadbeef) {
+		__be64 val;
+
+		prom_initrd_start = is_kernel_addr(r3) ? __pa(r3) : r3;
+		prom_initrd_end = prom_initrd_start + r4;
+
+		val = cpu_to_be64(prom_initrd_start);
+		prom_setprop(prom.chosen, "/chosen", "linux,initrd-start",
+			     &val, sizeof(val));
+		val = cpu_to_be64(prom_initrd_end);
+		prom_setprop(prom.chosen, "/chosen", "linux,initrd-end",
+			     &val, sizeof(val));
+
+		reserve_mem(prom_initrd_start,
+			    prom_initrd_end - prom_initrd_start);
+
+		prom_debug("initrd_start=0x%lx\n", prom_initrd_start);
+		prom_debug("initrd_end=0x%lx\n", prom_initrd_end);
+	}
+#endif /* CONFIG_BLK_DEV_INITRD */
+}
+
+#ifdef CONFIG_PPC_SVM
+/*
+ * Perform the Enter Secure Mode ultracall.
+ */
+static int __init enter_secure_mode(unsigned long kbase, unsigned long fdt)
+{
+	register unsigned long r3 asm("r3") = UV_ESM;
+	register unsigned long r4 asm("r4") = kbase;
+	register unsigned long r5 asm("r5") = fdt;
+
+	asm volatile("sc 2" : "+r"(r3) : "r"(r4), "r"(r5));
+
+	return r3;
+}
+
+/*
+ * Call the Ultravisor to transfer us to secure memory if we have an ESM blob.
+ */
+static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt)
+{
+	int ret;
+
+	if (!prom_svm_enable)
+		return;
+
+	/* Switch to secure mode. */
+	prom_printf("Switching to secure mode.\n");
+
+	/*
+	 * The ultravisor will do an integrity check of the kernel image but we
+	 * relocated it so the check will fail. Restore the original image by
+	 * relocating it back to the kernel virtual base address.
+	 */
+	relocate(KERNELBASE);
+
+	ret = enter_secure_mode(kbase, fdt);
+
+	/* Relocate the kernel again. */
+	relocate(kbase);
+
+	if (ret != U_SUCCESS) {
+		prom_printf("Returned %d from switching to secure mode.\n", ret);
+		prom_rtas_os_term("Switch to secure mode failed.\n");
+	}
+}
+#else
+static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt)
+{
+}
+#endif /* CONFIG_PPC_SVM */
+
+/*
+ * We enter here early on, when the Open Firmware prom is still
+ * handling exceptions and the MMU hash table for us.
+ */
+
+unsigned long __init prom_init(unsigned long r3, unsigned long r4,
+			       unsigned long pp,
+			       unsigned long r6, unsigned long r7,
+			       unsigned long kbase)
+{	
+	unsigned long hdr;
+
+#ifdef CONFIG_PPC32
+	unsigned long offset = reloc_offset();
+	reloc_got2(offset);
+#endif
+
+	/*
+	 * First zero the BSS
+	 */
+	memset(&__bss_start, 0, __bss_stop - __bss_start);
+
+	/*
+	 * Init interface to Open Firmware, get some node references,
+	 * like /chosen
+	 */
+	prom_init_client_services(pp);
+
+	/*
+	 * See if this OF is old enough that we need to do explicit maps
+	 * and other workarounds
+	 */
+	prom_find_mmu();
+
+	/*
+	 * Init prom stdout device
+	 */
+	prom_init_stdout();
+
+	prom_printf("Preparing to boot %s", linux_banner);
+
+	/*
+	 * Get default machine type. At this point, we do not differentiate
+	 * between pSeries SMP and pSeries LPAR
+	 */
+	of_platform = prom_find_machine_type();
+	prom_printf("Detected machine type: %x\n", of_platform);
+
+#ifndef CONFIG_NONSTATIC_KERNEL
+	/* Bail if this is a kdump kernel. */
+	if (PHYSICAL_START > 0)
+		prom_panic("Error: You can't boot a kdump kernel from OF!\n");
+#endif
+
+	/*
+	 * Check for an initrd
+	 */
+	prom_check_initrd(r3, r4);
+
+	/*
+	 * Do early parsing of command line
+	 */
+	early_cmdline_parse();
+
+#ifdef CONFIG_PPC_PSERIES
+	/*
+	 * On pSeries, inform the firmware about our capabilities
+	 */
+	if (of_platform == PLATFORM_PSERIES ||
+	    of_platform == PLATFORM_PSERIES_LPAR)
+		prom_send_capabilities();
+#endif
+
+	/*
+	 * Copy the CPU hold code
+	 */
+	if (of_platform != PLATFORM_POWERMAC)
+		copy_and_flush(0, kbase, 0x100, 0);
+
+	/*
+	 * Initialize memory management within prom_init
+	 */
+	prom_init_mem();
+
+	/*
+	 * Determine which cpu is actually running right _now_
+	 */
+	prom_find_boot_cpu();
+
+	/* 
+	 * Initialize display devices
+	 */
+	prom_check_displays();
+
+#if defined(CONFIG_PPC64) && defined(__BIG_ENDIAN__)
+	/*
+	 * Initialize IOMMU (TCE tables) on pSeries. Do that before anything else
+	 * that uses the allocator, we need to make sure we get the top of memory
+	 * available for us here...
+	 */
+	if (of_platform == PLATFORM_PSERIES)
+		prom_initialize_tce_table();
+#endif
+
+	/*
+	 * On non-powermacs, try to instantiate RTAS. PowerMacs don't
+	 * have a usable RTAS implementation.
+	 */
+	if (of_platform != PLATFORM_POWERMAC)
+		prom_instantiate_rtas();
+
+#ifdef CONFIG_PPC64
+	/* instantiate sml */
+	prom_instantiate_sml();
+#endif
+
+	/*
+	 * On non-powermacs, put all CPUs in spin-loops.
+	 *
+	 * PowerMacs use a different mechanism to spin CPUs
+	 *
+	 * (This must be done after instantiating RTAS)
+	 */
+	if (of_platform != PLATFORM_POWERMAC)
+		prom_hold_cpus();
+
+	/*
+	 * Fill in some infos for use by the kernel later on
+	 */
+	if (prom_memory_limit) {
+		__be64 val = cpu_to_be64(prom_memory_limit);
+		prom_setprop(prom.chosen, "/chosen", "linux,memory-limit",
+			     &val, sizeof(val));
+	}
+#ifdef CONFIG_PPC64
+	if (prom_iommu_off)
+		prom_setprop(prom.chosen, "/chosen", "linux,iommu-off",
+			     NULL, 0);
+
+	if (prom_iommu_force_on)
+		prom_setprop(prom.chosen, "/chosen", "linux,iommu-force-on",
+			     NULL, 0);
+
+	if (prom_tce_alloc_start) {
+		prom_setprop(prom.chosen, "/chosen", "linux,tce-alloc-start",
+			     &prom_tce_alloc_start,
+			     sizeof(prom_tce_alloc_start));
+		prom_setprop(prom.chosen, "/chosen", "linux,tce-alloc-end",
+			     &prom_tce_alloc_end,
+			     sizeof(prom_tce_alloc_end));
+	}
+#endif
+
+	/*
+	 * Fixup any known bugs in the device-tree
+	 */
+	fixup_device_tree();
+
+	/*
+	 * Now finally create the flattened device-tree
+	 */
+	prom_printf("copying OF device tree...\n");
+	flatten_device_tree();
+
+	/*
+	 * in case stdin is USB and still active on IBM machines...
+	 * Unfortunately quiesce crashes on some powermacs if we have
+	 * closed stdin already (in particular the powerbook 101).
+	 */
+	if (of_platform != PLATFORM_POWERMAC)
+		prom_close_stdin();
+
+	/*
+	 * Call OF "quiesce" method to shut down pending DMA's from
+	 * devices etc...
+	 */
+	prom_printf("Quiescing Open Firmware ...\n");
+	call_prom("quiesce", 0, 0);
+
+	/*
+	 * And finally, call the kernel passing it the flattened device
+	 * tree and NULL as r5, thus triggering the new entry point which
+	 * is common to us and kexec
+	 */
+	hdr = dt_header_start;
+
+	prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
+	prom_debug("->dt_header_start=0x%lx\n", hdr);
+
+#ifdef CONFIG_PPC32
+	reloc_got2(-offset);
+#endif
+
+	/* Move to secure memory if we're supposed to be secure guests. */
+	setup_secure_guest(kbase, hdr);
+
+	__start(hdr, kbase, 0, 0, 0, 0, 0);
+
+	return 0;
+}
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
new file mode 100644
index 0000000000..69623b9045
--- /dev/null
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -0,0 +1,94 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Copyright © 2008 IBM Corporation
+#
+
+# This script checks prom_init.o to see what external symbols it
+# is using, if it finds symbols not in the whitelist it returns
+# an error. The point of this is to discourage people from
+# intentionally or accidentally adding new code to prom_init.c
+# which has side effects on other parts of the kernel.
+
+# If you really need to reference something from prom_init.o add
+# it to the list below:
+
+has_renamed_memintrinsics()
+{
+	grep -q "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} && \
+		! grep -q "^CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX=y" ${KCONFIG_CONFIG}
+}
+
+if has_renamed_memintrinsics
+then
+	MEM_FUNCS="__memcpy __memset"
+else
+	MEM_FUNCS="memcpy memset"
+fi
+
+WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
+_end enter_prom $MEM_FUNCS reloc_offset __secondary_hold
+__secondary_hold_acknowledge __secondary_hold_spinloop __start
+logo_linux_clut224 btext_prepare_BAT
+reloc_got2 kernstart_addr memstart_addr linux_banner _stext
+btext_setup_display TOC. relocate"
+
+NM="$1"
+OBJ="$2"
+
+ERROR=0
+
+check_section()
+{
+    file=$1
+    section=$2
+    size=$(objdump -h -j $section $file 2>/dev/null | awk "\$2 == \"$section\" {print \$3}")
+    size=${size:-0}
+    if [ $size -ne 0 ]; then
+	ERROR=1
+	echo "Error: Section $section not empty in prom_init.c" >&2
+    fi
+}
+
+for UNDEF in $($NM -u $OBJ | awk '{print $2}')
+do
+	# On 64-bit nm gives us the function descriptors, which have
+	# a leading . on the name, so strip it off here.
+	UNDEF="${UNDEF#.}"
+
+	case "$KBUILD_VERBOSE" in
+	*1*)
+		echo "Checking prom_init.o symbol '$UNDEF'" ;;
+	esac
+
+	OK=0
+	for WHITE in $WHITELIST
+	do
+		if [ "$UNDEF" = "$WHITE" ]; then
+			OK=1
+			break
+		fi
+	done
+
+	# ignore register save/restore funcitons
+	case $UNDEF in
+	_restgpr_*|_restgpr0_*|_rest32gpr_*)
+		OK=1
+		;;
+	_savegpr_*|_savegpr0_*|_save32gpr_*)
+		OK=1
+		;;
+	esac
+
+	if [ $OK -eq 0 ]; then
+		ERROR=1
+		echo "Error: External symbol '$UNDEF' referenced" \
+		     "from prom_init.c" >&2
+	fi
+done
+
+check_section $OBJ .data
+check_section $OBJ .bss
+check_section $OBJ .init.data
+
+exit $ERROR
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
new file mode 100644
index 0000000000..9cb7f88df5
--- /dev/null
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/ioport.h>
+#include <linux/etherdevice.h>
+#include <linux/of_address.h>
+#include <asm/prom.h>
+
+void of_parse_dma_window(struct device_node *dn, const __be32 *dma_window,
+			 unsigned long *busno, unsigned long *phys,
+			 unsigned long *size)
+{
+	u32 cells;
+	const __be32 *prop;
+
+	/* busno is always one cell */
+	*busno = of_read_number(dma_window, 1);
+	dma_window++;
+
+	prop = of_get_property(dn, "ibm,#dma-address-cells", NULL);
+	if (!prop)
+		prop = of_get_property(dn, "#address-cells", NULL);
+
+	cells = prop ? of_read_number(prop, 1) : of_n_addr_cells(dn);
+	*phys = of_read_number(dma_window, cells);
+
+	dma_window += cells;
+
+	prop = of_get_property(dn, "ibm,#dma-size-cells", NULL);
+	cells = prop ? of_read_number(prop, 1) : of_n_size_cells(dn);
+	*size = of_read_number(dma_window, cells);
+}
diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile
new file mode 100644
index 0000000000..77abd1a5a5
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/Makefile
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux kernel.
+#
+
+CFLAGS_ptrace-view.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'
+
+obj-y				+= ptrace.o ptrace-view.o
+obj-y				+= ptrace-fpu.o
+obj-$(CONFIG_COMPAT)		+= ptrace32.o
+obj-$(CONFIG_VSX)		+= ptrace-vsx.o
+ifneq ($(CONFIG_VSX),y)
+obj-y				+= ptrace-novsx.o
+endif
+obj-$(CONFIG_ALTIVEC)		+= ptrace-altivec.o
+obj-$(CONFIG_SPE)		+= ptrace-spe.o
+obj-$(CONFIG_PPC_TRANSACTIONAL_MEM)	+= ptrace-tm.o
+obj-$(CONFIG_PPC_ADV_DEBUG_REGS)	+= ptrace-adv.o
+ifneq ($(CONFIG_PPC_ADV_DEBUG_REGS),y)
+obj-y				+= ptrace-noadv.o
+endif
diff --git a/arch/powerpc/kernel/ptrace/ptrace-adv.c b/arch/powerpc/kernel/ptrace/ptrace-adv.c
new file mode 100644
index 0000000000..399f5d94a3
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-adv.c
@@ -0,0 +1,494 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/hw_breakpoint.h>
+
+#include "ptrace-decl.h"
+
+void user_enable_single_step(struct task_struct *task)
+{
+	struct pt_regs *regs = task->thread.regs;
+
+	if (regs != NULL) {
+		task->thread.debug.dbcr0 &= ~DBCR0_BT;
+		task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+		regs_set_return_msr(regs, regs->msr | MSR_DE);
+	}
+	set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+	struct pt_regs *regs = task->thread.regs;
+
+	if (regs != NULL) {
+		task->thread.debug.dbcr0 &= ~DBCR0_IC;
+		task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT;
+		regs_set_return_msr(regs, regs->msr | MSR_DE);
+	}
+	set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+	struct pt_regs *regs = task->thread.regs;
+
+	if (regs != NULL) {
+		/*
+		 * The logic to disable single stepping should be as
+		 * simple as turning off the Instruction Complete flag.
+		 * And, after doing so, if all debug flags are off, turn
+		 * off DBCR0(IDM) and MSR(DE) .... Torez
+		 */
+		task->thread.debug.dbcr0 &= ~(DBCR0_IC | DBCR0_BT);
+		/*
+		 * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set.
+		 */
+		if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
+					task->thread.debug.dbcr1)) {
+			/*
+			 * All debug events were off.....
+			 */
+			task->thread.debug.dbcr0 &= ~DBCR0_IDM;
+			regs_set_return_msr(regs, regs->msr & ~MSR_DE);
+		}
+	}
+	clear_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void ppc_gethwdinfo(struct ppc_debug_info *dbginfo)
+{
+	dbginfo->version = 1;
+	dbginfo->num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS;
+	dbginfo->num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS;
+	dbginfo->num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS;
+	dbginfo->data_bp_alignment = 4;
+	dbginfo->sizeof_condition = 4;
+	dbginfo->features = PPC_DEBUG_FEATURE_INSN_BP_RANGE |
+			    PPC_DEBUG_FEATURE_INSN_BP_MASK;
+	if (IS_ENABLED(CONFIG_PPC_ADV_DEBUG_DAC_RANGE))
+		dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_RANGE |
+				     PPC_DEBUG_FEATURE_DATA_BP_MASK;
+}
+
+int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
+			unsigned long __user *datalp)
+{
+	/* We only support one DABR and no IABRS at the moment */
+	if (addr > 0)
+		return -EINVAL;
+	return put_user(child->thread.debug.dac1, datalp);
+}
+
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data)
+{
+	struct pt_regs *regs = task->thread.regs;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	int ret;
+	struct thread_struct *thread = &task->thread;
+	struct perf_event *bp;
+	struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+	/* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
+	 *  For embedded processors we support one DAC and no IAC's at the
+	 *  moment.
+	 */
+	if (addr > 0)
+		return -EINVAL;
+
+	/* The bottom 3 bits in dabr are flags */
+	if ((data & ~0x7UL) >= TASK_SIZE)
+		return -EIO;
+
+	/* As described above, it was assumed 3 bits were passed with the data
+	 *  address, but we will assume only the mode bits will be passed
+	 *  as to not cause alignment restrictions for DAC-based processors.
+	 */
+
+	/* DAC's hold the whole address without any mode flags */
+	task->thread.debug.dac1 = data & ~0x3UL;
+
+	if (task->thread.debug.dac1 == 0) {
+		dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+		if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
+					task->thread.debug.dbcr1)) {
+			regs_set_return_msr(regs, regs->msr & ~MSR_DE);
+			task->thread.debug.dbcr0 &= ~DBCR0_IDM;
+		}
+		return 0;
+	}
+
+	/* Read or Write bits must be set */
+
+	if (!(data & 0x3UL))
+		return -EINVAL;
+
+	/* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 register */
+	task->thread.debug.dbcr0 |= DBCR0_IDM;
+
+	/* Check for write and read flags and set DBCR0 accordingly */
+	dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+	if (data & 0x1UL)
+		dbcr_dac(task) |= DBCR_DAC1R;
+	if (data & 0x2UL)
+		dbcr_dac(task) |= DBCR_DAC1W;
+	regs_set_return_msr(regs, regs->msr | MSR_DE);
+	return 0;
+}
+
+static long set_instruction_bp(struct task_struct *child,
+			       struct ppc_hw_breakpoint *bp_info)
+{
+	int slot;
+	int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0);
+	int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0);
+	int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0);
+	int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0);
+
+	if (dbcr_iac_range(child) & DBCR_IAC12MODE)
+		slot2_in_use = 1;
+	if (dbcr_iac_range(child) & DBCR_IAC34MODE)
+		slot4_in_use = 1;
+
+	if (bp_info->addr >= TASK_SIZE)
+		return -EIO;
+
+	if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) {
+		/* Make sure range is valid. */
+		if (bp_info->addr2 >= TASK_SIZE)
+			return -EIO;
+
+		/* We need a pair of IAC regsisters */
+		if (!slot1_in_use && !slot2_in_use) {
+			slot = 1;
+			child->thread.debug.iac1 = bp_info->addr;
+			child->thread.debug.iac2 = bp_info->addr2;
+			child->thread.debug.dbcr0 |= DBCR0_IAC1;
+			if (bp_info->addr_mode ==
+					PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
+				dbcr_iac_range(child) |= DBCR_IAC12X;
+			else
+				dbcr_iac_range(child) |= DBCR_IAC12I;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+		} else if ((!slot3_in_use) && (!slot4_in_use)) {
+			slot = 3;
+			child->thread.debug.iac3 = bp_info->addr;
+			child->thread.debug.iac4 = bp_info->addr2;
+			child->thread.debug.dbcr0 |= DBCR0_IAC3;
+			if (bp_info->addr_mode ==
+					PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
+				dbcr_iac_range(child) |= DBCR_IAC34X;
+			else
+				dbcr_iac_range(child) |= DBCR_IAC34I;
+#endif
+		} else {
+			return -ENOSPC;
+		}
+	} else {
+		/* We only need one.  If possible leave a pair free in
+		 * case a range is needed later
+		 */
+		if (!slot1_in_use) {
+			/*
+			 * Don't use iac1 if iac1-iac2 are free and either
+			 * iac3 or iac4 (but not both) are free
+			 */
+			if (slot2_in_use || slot3_in_use == slot4_in_use) {
+				slot = 1;
+				child->thread.debug.iac1 = bp_info->addr;
+				child->thread.debug.dbcr0 |= DBCR0_IAC1;
+				goto out;
+			}
+		}
+		if (!slot2_in_use) {
+			slot = 2;
+			child->thread.debug.iac2 = bp_info->addr;
+			child->thread.debug.dbcr0 |= DBCR0_IAC2;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+		} else if (!slot3_in_use) {
+			slot = 3;
+			child->thread.debug.iac3 = bp_info->addr;
+			child->thread.debug.dbcr0 |= DBCR0_IAC3;
+		} else if (!slot4_in_use) {
+			slot = 4;
+			child->thread.debug.iac4 = bp_info->addr;
+			child->thread.debug.dbcr0 |= DBCR0_IAC4;
+#endif
+		} else {
+			return -ENOSPC;
+		}
+	}
+out:
+	child->thread.debug.dbcr0 |= DBCR0_IDM;
+	regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE);
+
+	return slot;
+}
+
+static int del_instruction_bp(struct task_struct *child, int slot)
+{
+	switch (slot) {
+	case 1:
+		if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0)
+			return -ENOENT;
+
+		if (dbcr_iac_range(child) & DBCR_IAC12MODE) {
+			/* address range - clear slots 1 & 2 */
+			child->thread.debug.iac2 = 0;
+			dbcr_iac_range(child) &= ~DBCR_IAC12MODE;
+		}
+		child->thread.debug.iac1 = 0;
+		child->thread.debug.dbcr0 &= ~DBCR0_IAC1;
+		break;
+	case 2:
+		if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0)
+			return -ENOENT;
+
+		if (dbcr_iac_range(child) & DBCR_IAC12MODE)
+			/* used in a range */
+			return -EINVAL;
+		child->thread.debug.iac2 = 0;
+		child->thread.debug.dbcr0 &= ~DBCR0_IAC2;
+		break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	case 3:
+		if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0)
+			return -ENOENT;
+
+		if (dbcr_iac_range(child) & DBCR_IAC34MODE) {
+			/* address range - clear slots 3 & 4 */
+			child->thread.debug.iac4 = 0;
+			dbcr_iac_range(child) &= ~DBCR_IAC34MODE;
+		}
+		child->thread.debug.iac3 = 0;
+		child->thread.debug.dbcr0 &= ~DBCR0_IAC3;
+		break;
+	case 4:
+		if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0)
+			return -ENOENT;
+
+		if (dbcr_iac_range(child) & DBCR_IAC34MODE)
+			/* Used in a range */
+			return -EINVAL;
+		child->thread.debug.iac4 = 0;
+		child->thread.debug.dbcr0 &= ~DBCR0_IAC4;
+		break;
+#endif
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
+{
+	int byte_enable =
+		(bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT)
+		& 0xf;
+	int condition_mode =
+		bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE;
+	int slot;
+
+	if (byte_enable && condition_mode == 0)
+		return -EINVAL;
+
+	if (bp_info->addr >= TASK_SIZE)
+		return -EIO;
+
+	if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) {
+		slot = 1;
+		if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+			dbcr_dac(child) |= DBCR_DAC1R;
+		if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+			dbcr_dac(child) |= DBCR_DAC1W;
+		child->thread.debug.dac1 = (unsigned long)bp_info->addr;
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+		if (byte_enable) {
+			child->thread.debug.dvc1 =
+				(unsigned long)bp_info->condition_value;
+			child->thread.debug.dbcr2 |=
+				((byte_enable << DBCR2_DVC1BE_SHIFT) |
+				 (condition_mode << DBCR2_DVC1M_SHIFT));
+		}
+#endif
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+	} else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
+		/* Both dac1 and dac2 are part of a range */
+		return -ENOSPC;
+#endif
+	} else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) {
+		slot = 2;
+		if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+			dbcr_dac(child) |= DBCR_DAC2R;
+		if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+			dbcr_dac(child) |= DBCR_DAC2W;
+		child->thread.debug.dac2 = (unsigned long)bp_info->addr;
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+		if (byte_enable) {
+			child->thread.debug.dvc2 =
+				(unsigned long)bp_info->condition_value;
+			child->thread.debug.dbcr2 |=
+				((byte_enable << DBCR2_DVC2BE_SHIFT) |
+				 (condition_mode << DBCR2_DVC2M_SHIFT));
+		}
+#endif
+	} else {
+		return -ENOSPC;
+	}
+	child->thread.debug.dbcr0 |= DBCR0_IDM;
+	regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE);
+
+	return slot + 4;
+}
+
+static int del_dac(struct task_struct *child, int slot)
+{
+	if (slot == 1) {
+		if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0)
+			return -ENOENT;
+
+		child->thread.debug.dac1 = 0;
+		dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+		if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
+			child->thread.debug.dac2 = 0;
+			child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
+		}
+		child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);
+#endif
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+		child->thread.debug.dvc1 = 0;
+#endif
+	} else if (slot == 2) {
+		if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0)
+			return -ENOENT;
+
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+		if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE)
+			/* Part of a range */
+			return -EINVAL;
+		child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);
+#endif
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+		child->thread.debug.dvc2 = 0;
+#endif
+		child->thread.debug.dac2 = 0;
+		dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W);
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+static int set_dac_range(struct task_struct *child,
+			 struct ppc_hw_breakpoint *bp_info)
+{
+	int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK;
+
+	/* We don't allow range watchpoints to be used with DVC */
+	if (bp_info->condition_mode)
+		return -EINVAL;
+
+	/*
+	 * Best effort to verify the address range.  The user/supervisor bits
+	 * prevent trapping in kernel space, but let's fail on an obvious bad
+	 * range.  The simple test on the mask is not fool-proof, and any
+	 * exclusive range will spill over into kernel space.
+	 */
+	if (bp_info->addr >= TASK_SIZE)
+		return -EIO;
+	if (mode == PPC_BREAKPOINT_MODE_MASK) {
+		/*
+		 * dac2 is a bitmask.  Don't allow a mask that makes a
+		 * kernel space address from a valid dac1 value
+		 */
+		if (~((unsigned long)bp_info->addr2) >= TASK_SIZE)
+			return -EIO;
+	} else {
+		/*
+		 * For range breakpoints, addr2 must also be a valid address
+		 */
+		if (bp_info->addr2 >= TASK_SIZE)
+			return -EIO;
+	}
+
+	if (child->thread.debug.dbcr0 &
+	    (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W))
+		return -ENOSPC;
+
+	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+		child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);
+	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+		child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM);
+	child->thread.debug.dac1 = bp_info->addr;
+	child->thread.debug.dac2 = bp_info->addr2;
+	if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
+		child->thread.debug.dbcr2  |= DBCR2_DAC12M;
+	else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
+		child->thread.debug.dbcr2  |= DBCR2_DAC12MX;
+	else	/* PPC_BREAKPOINT_MODE_MASK */
+		child->thread.debug.dbcr2  |= DBCR2_DAC12MM;
+	regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE);
+
+	return 5;
+}
+#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */
+
+long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
+{
+	if (bp_info->version != 1)
+		return -ENOTSUPP;
+	/*
+	 * Check for invalid flags and combinations
+	 */
+	if (bp_info->trigger_type == 0 ||
+	    (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE |
+				       PPC_BREAKPOINT_TRIGGER_RW)) ||
+	    (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) ||
+	    (bp_info->condition_mode &
+	     ~(PPC_BREAKPOINT_CONDITION_MODE |
+	       PPC_BREAKPOINT_CONDITION_BE_ALL)))
+		return -EINVAL;
+#if CONFIG_PPC_ADV_DEBUG_DVCS == 0
+	if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
+		return -EINVAL;
+#endif
+
+	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) {
+		if (bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE ||
+		    bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
+			return -EINVAL;
+		return set_instruction_bp(child, bp_info);
+	}
+	if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
+		return set_dac(child, bp_info);
+
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+	return set_dac_range(child, bp_info);
+#else
+	return -EINVAL;
+#endif
+}
+
+long ppc_del_hwdebug(struct task_struct *child, long data)
+{
+	int rc;
+
+	if (data <= 4)
+		rc = del_instruction_bp(child, (int)data);
+	else
+		rc = del_dac(child, (int)data - 4);
+
+	if (!rc) {
+		if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0,
+					child->thread.debug.dbcr1)) {
+			child->thread.debug.dbcr0 &= ~DBCR0_IDM;
+			regs_set_return_msr(child->thread.regs,
+					child->thread.regs->msr & ~MSR_DE);
+		}
+	}
+	return rc;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-altivec.c b/arch/powerpc/kernel/ptrace/ptrace-altivec.c
new file mode 100644
index 0000000000..0d9bc4bd49
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-altivec.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/elf.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
+ * The transfer totals 34 quadword.  Quadwords 0-31 contain the
+ * corresponding vector registers.  Quadword 32 contains the vscr as the
+ * last word (offset 12) within that quadword.  Quadword 33 contains the
+ * vrsave as the first word (offset 0) within the quadword.
+ *
+ * This definition of the VMX state is compatible with the current PPC32
+ * ptrace interface.  This allows signal handling and ptrace to use the
+ * same structures.  This also simplifies the implementation of a bi-arch
+ * (combined (32- and 64-bit) gdb.
+ */
+
+int vr_active(struct task_struct *target, const struct user_regset *regset)
+{
+	flush_altivec_to_thread(target);
+	return target->thread.used_vr ? regset->n : 0;
+}
+
+/*
+ * Regardless of transactions, 'vr_state' holds the current running
+ * value of all the VMX registers and 'ckvr_state' holds the last
+ * checkpointed value of all the VMX registers for the current
+ * transaction to fall back on in case it aborts.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ *	vector128	vr[32];
+ *	vector128	vscr;
+ *	vector128	vrsave;
+ * };
+ */
+int vr_get(struct task_struct *target, const struct user_regset *regset,
+	   struct membuf to)
+{
+	union {
+		elf_vrreg_t reg;
+		u32 word;
+	} vrsave;
+
+	flush_altivec_to_thread(target);
+
+	BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
+		     offsetof(struct thread_vr_state, vr[32]));
+
+	membuf_write(&to, &target->thread.vr_state, 33 * sizeof(vector128));
+	/*
+	 * Copy out only the low-order word of vrsave.
+	 */
+	memset(&vrsave, 0, sizeof(vrsave));
+	vrsave.word = target->thread.vrsave;
+	return membuf_write(&to, &vrsave, sizeof(vrsave));
+}
+
+/*
+ * Regardless of transactions, 'vr_state' holds the current running
+ * value of all the VMX registers and 'ckvr_state' holds the last
+ * checkpointed value of all the VMX registers for the current
+ * transaction to fall back on in case it aborts.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ *	vector128	vr[32];
+ *	vector128	vscr;
+ *	vector128	vrsave;
+ * };
+ */
+int vr_set(struct task_struct *target, const struct user_regset *regset,
+	   unsigned int pos, unsigned int count,
+	   const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	flush_altivec_to_thread(target);
+
+	BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
+		     offsetof(struct thread_vr_state, vr[32]));
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &target->thread.vr_state, 0,
+				 33 * sizeof(vector128));
+	if (!ret && count > 0) {
+		/*
+		 * We use only the first word of vrsave.
+		 */
+		int start, end;
+		union {
+			elf_vrreg_t reg;
+			u32 word;
+		} vrsave;
+		memset(&vrsave, 0, sizeof(vrsave));
+
+		vrsave.word = target->thread.vrsave;
+
+		start = 33 * sizeof(vector128);
+		end = start + sizeof(vrsave);
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
+					 start, end);
+		if (!ret)
+			target->thread.vrsave = vrsave.word;
+	}
+
+	return ret;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h
new file mode 100644
index 0000000000..4171a57271
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <linux/regset.h>
+
+/*
+ * Set of msr bits that gdb can change on behalf of a process.
+ */
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+#define MSR_DEBUGCHANGE	0
+#else
+#define MSR_DEBUGCHANGE	(MSR_SE | MSR_BE)
+#endif
+
+/*
+ * Max register writeable via put_reg
+ */
+#ifdef CONFIG_PPC32
+#define PT_MAX_PUT_REG	PT_MQ
+#else
+#define PT_MAX_PUT_REG	PT_CCR
+#endif
+
+#define TVSO(f)	(offsetof(struct thread_vr_state, f))
+#define TFSO(f)	(offsetof(struct thread_fp_state, f))
+#define TSO(f)	(offsetof(struct thread_struct, f))
+
+/*
+ * These are our native regset flavors.
+ */
+enum powerpc_regset {
+	REGSET_GPR,
+	REGSET_FPR,
+#ifdef CONFIG_ALTIVEC
+	REGSET_VMX,
+#endif
+#ifdef CONFIG_VSX
+	REGSET_VSX,
+#endif
+#ifdef CONFIG_SPE
+	REGSET_SPE,
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	REGSET_TM_CGPR,		/* TM checkpointed GPR registers */
+	REGSET_TM_CFPR,		/* TM checkpointed FPR registers */
+	REGSET_TM_CVMX,		/* TM checkpointed VMX registers */
+	REGSET_TM_CVSX,		/* TM checkpointed VSX registers */
+	REGSET_TM_SPR,		/* TM specific SPR registers */
+	REGSET_TM_CTAR,		/* TM checkpointed TAR register */
+	REGSET_TM_CPPR,		/* TM checkpointed PPR register */
+	REGSET_TM_CDSCR,	/* TM checkpointed DSCR register */
+#endif
+#ifdef CONFIG_PPC64
+	REGSET_PPR,		/* PPR register */
+	REGSET_DSCR,		/* DSCR register */
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	REGSET_TAR,		/* TAR register */
+	REGSET_EBB,		/* EBB registers */
+	REGSET_PMR,		/* Performance Monitor Registers */
+	REGSET_DEXCR,		/* DEXCR registers */
+#ifdef CONFIG_CHECKPOINT_RESTORE
+	REGSET_HASHKEYR,	/* HASHKEYR register */
+#endif
+#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+	REGSET_PKEY,		/* AMR register */
+#endif
+};
+
+/* ptrace-(no)vsx */
+
+user_regset_get2_fn fpr_get;
+int fpr_set(struct task_struct *target, const struct user_regset *regset,
+	    unsigned int pos, unsigned int count,
+	    const void *kbuf, const void __user *ubuf);
+
+/* ptrace-vsx */
+
+int vsr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn vsr_get;
+int vsr_set(struct task_struct *target, const struct user_regset *regset,
+	    unsigned int pos, unsigned int count,
+	    const void *kbuf, const void __user *ubuf);
+
+/* ptrace-altivec */
+
+int vr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn vr_get;
+int vr_set(struct task_struct *target, const struct user_regset *regset,
+	   unsigned int pos, unsigned int count,
+	   const void *kbuf, const void __user *ubuf);
+
+/* ptrace-spe */
+
+int evr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn evr_get;
+int evr_set(struct task_struct *target, const struct user_regset *regset,
+	    unsigned int pos, unsigned int count,
+	    const void *kbuf, const void __user *ubuf);
+
+/* ptrace */
+
+int gpr32_get_common(struct task_struct *target,
+		     const struct user_regset *regset,
+		     struct membuf to,
+		     unsigned long *regs);
+int gpr32_set_common(struct task_struct *target,
+		     const struct user_regset *regset,
+		     unsigned int pos, unsigned int count,
+		     const void *kbuf, const void __user *ubuf,
+		     unsigned long *regs);
+
+/* ptrace-tm */
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void flush_tmregs_to_thread(struct task_struct *tsk);
+#else
+static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
+#endif
+
+int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_cgpr_get;
+int tm_cgpr_set(struct task_struct *target, const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf);
+int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_cfpr_get;
+int tm_cfpr_set(struct task_struct *target, const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf);
+int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_cvmx_get;
+int tm_cvmx_set(struct task_struct *target, const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf);
+int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_cvsx_get;
+int tm_cvsx_set(struct task_struct *target, const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf);
+int tm_spr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_spr_get;
+int tm_spr_set(struct task_struct *target, const struct user_regset *regset,
+	       unsigned int pos, unsigned int count,
+	       const void *kbuf, const void __user *ubuf);
+int tm_tar_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_tar_get;
+int tm_tar_set(struct task_struct *target, const struct user_regset *regset,
+	       unsigned int pos, unsigned int count,
+	       const void *kbuf, const void __user *ubuf);
+int tm_ppr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_ppr_get;
+int tm_ppr_set(struct task_struct *target, const struct user_regset *regset,
+	       unsigned int pos, unsigned int count,
+	       const void *kbuf, const void __user *ubuf);
+int tm_dscr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_dscr_get;
+int tm_dscr_set(struct task_struct *target, const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf);
+user_regset_get2_fn tm_cgpr32_get;
+int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset,
+		  unsigned int pos, unsigned int count,
+		  const void *kbuf, const void __user *ubuf);
+
+/* ptrace-view */
+
+int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data);
+int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data);
+
+extern const struct user_regset_view user_ppc_native_view;
+
+/* ptrace-fpu */
+int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data);
+int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data);
+
+/* ptrace-(no)adv */
+void ppc_gethwdinfo(struct ppc_debug_info *dbginfo);
+int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
+			unsigned long __user *datalp);
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data);
+long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info);
+long ppc_del_hwdebug(struct task_struct *child, long data);
diff --git a/arch/powerpc/kernel/ptrace/ptrace-fpu.c b/arch/powerpc/kernel/ptrace/ptrace-fpu.c
new file mode 100644
index 0000000000..09c49632bf
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-fpu.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data)
+{
+#ifdef CONFIG_PPC_FPU_REGS
+	unsigned int fpidx = index - PT_FPR0;
+#endif
+
+	if (index > PT_FPSCR)
+		return -EIO;
+
+#ifdef CONFIG_PPC_FPU_REGS
+	flush_fp_to_thread(child);
+	if (fpidx < (PT_FPSCR - PT_FPR0)) {
+		if (IS_ENABLED(CONFIG_PPC32))
+			// On 32-bit the index we are passed refers to 32-bit words
+			*data = ((u32 *)child->thread.fp_state.fpr)[fpidx];
+		else
+			memcpy(data, &child->thread.TS_FPR(fpidx), sizeof(long));
+	} else
+		*data = child->thread.fp_state.fpscr;
+#else
+	*data = 0;
+#endif
+
+	return 0;
+}
+
+int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data)
+{
+#ifdef CONFIG_PPC_FPU_REGS
+	unsigned int fpidx = index - PT_FPR0;
+#endif
+
+	if (index > PT_FPSCR)
+		return -EIO;
+
+#ifdef CONFIG_PPC_FPU_REGS
+	flush_fp_to_thread(child);
+	if (fpidx < (PT_FPSCR - PT_FPR0)) {
+		if (IS_ENABLED(CONFIG_PPC32))
+			// On 32-bit the index we are passed refers to 32-bit words
+			((u32 *)child->thread.fp_state.fpr)[fpidx] = data;
+		else
+			memcpy(&child->thread.TS_FPR(fpidx), &data, sizeof(long));
+	} else
+		child->thread.fp_state.fpscr = data;
+#endif
+
+	return 0;
+}
+
diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
new file mode 100644
index 0000000000..a5dd7d2e2c
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/hw_breakpoint.h>
+
+#include <asm/debug.h>
+
+#include "ptrace-decl.h"
+
+void user_enable_single_step(struct task_struct *task)
+{
+	struct pt_regs *regs = task->thread.regs;
+
+	if (regs != NULL)
+		regs_set_return_msr(regs, (regs->msr & ~MSR_BE) | MSR_SE);
+	set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+	struct pt_regs *regs = task->thread.regs;
+
+	if (regs != NULL)
+		regs_set_return_msr(regs, (regs->msr & ~MSR_SE) | MSR_BE);
+	set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+	struct pt_regs *regs = task->thread.regs;
+
+	if (regs != NULL)
+		regs_set_return_msr(regs, regs->msr & ~(MSR_SE | MSR_BE));
+
+	clear_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void ppc_gethwdinfo(struct ppc_debug_info *dbginfo)
+{
+	dbginfo->version = 1;
+	dbginfo->num_instruction_bps = 0;
+	if (ppc_breakpoint_available())
+		dbginfo->num_data_bps = nr_wp_slots();
+	else
+		dbginfo->num_data_bps = 0;
+	dbginfo->num_condition_regs = 0;
+	dbginfo->data_bp_alignment = sizeof(long);
+	dbginfo->sizeof_condition = 0;
+	if (IS_ENABLED(CONFIG_HAVE_HW_BREAKPOINT)) {
+		dbginfo->features = PPC_DEBUG_FEATURE_DATA_BP_RANGE;
+		if (dawr_enabled())
+			dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR;
+	} else {
+		dbginfo->features = 0;
+	}
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_ARCH_31;
+}
+
+int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
+			unsigned long __user *datalp)
+{
+	unsigned long dabr_fake;
+
+	/* We only support one DABR and no IABRS at the moment */
+	if (addr > 0)
+		return -EINVAL;
+	dabr_fake = ((child->thread.hw_brk[0].address & (~HW_BRK_TYPE_DABR)) |
+		     (child->thread.hw_brk[0].type & HW_BRK_TYPE_DABR));
+	return put_user(dabr_fake, datalp);
+}
+
+/*
+ * ptrace_set_debugreg() fakes DABR and DABR is only one. So even if
+ * internal hw supports more than one watchpoint, we support only one
+ * watchpoint with this interface.
+ */
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data)
+{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	int ret;
+	struct thread_struct *thread = &task->thread;
+	struct perf_event *bp;
+	struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+	bool set_bp = true;
+	struct arch_hw_breakpoint hw_brk;
+
+	/* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
+	 *  For embedded processors we support one DAC and no IAC's at the
+	 *  moment.
+	 */
+	if (addr > 0)
+		return -EINVAL;
+
+	/* The bottom 3 bits in dabr are flags */
+	if ((data & ~0x7UL) >= TASK_SIZE)
+		return -EIO;
+
+	/* For processors using DABR (i.e. 970), the bottom 3 bits are flags.
+	 *  It was assumed, on previous implementations, that 3 bits were
+	 *  passed together with the data address, fitting the design of the
+	 *  DABR register, as follows:
+	 *
+	 *  bit 0: Read flag
+	 *  bit 1: Write flag
+	 *  bit 2: Breakpoint translation
+	 *
+	 *  Thus, we use them here as so.
+	 */
+
+	/* Ensure breakpoint translation bit is set */
+	if (data && !(data & HW_BRK_TYPE_TRANSLATE))
+		return -EIO;
+	hw_brk.address = data & (~HW_BRK_TYPE_DABR);
+	hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
+	hw_brk.len = DABR_MAX_LEN;
+	hw_brk.hw_len = DABR_MAX_LEN;
+	set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR);
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	bp = thread->ptrace_bps[0];
+	if (!set_bp) {
+		if (bp) {
+			unregister_hw_breakpoint(bp);
+			thread->ptrace_bps[0] = NULL;
+		}
+		return 0;
+	}
+	if (bp) {
+		attr = bp->attr;
+		attr.bp_addr = hw_brk.address;
+		attr.bp_len = DABR_MAX_LEN;
+		arch_bp_generic_fields(hw_brk.type, &attr.bp_type);
+
+		/* Enable breakpoint */
+		attr.disabled = false;
+
+		ret =  modify_user_hw_breakpoint(bp, &attr);
+		if (ret)
+			return ret;
+
+		thread->ptrace_bps[0] = bp;
+		thread->hw_brk[0] = hw_brk;
+		return 0;
+	}
+
+	/* Create a new breakpoint request if one doesn't exist already */
+	hw_breakpoint_init(&attr);
+	attr.bp_addr = hw_brk.address;
+	attr.bp_len = DABR_MAX_LEN;
+	arch_bp_generic_fields(hw_brk.type,
+			       &attr.bp_type);
+
+	thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
+					       ptrace_triggered, NULL, task);
+	if (IS_ERR(bp)) {
+		thread->ptrace_bps[0] = NULL;
+		return PTR_ERR(bp);
+	}
+
+#else /* !CONFIG_HAVE_HW_BREAKPOINT */
+	if (set_bp && (!ppc_breakpoint_available()))
+		return -ENODEV;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+	task->thread.hw_brk[0] = hw_brk;
+	return 0;
+}
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+static int find_empty_ptrace_bp(struct thread_struct *thread)
+{
+	int i;
+
+	for (i = 0; i < nr_wp_slots(); i++) {
+		if (!thread->ptrace_bps[i])
+			return i;
+	}
+	return -1;
+}
+#endif
+
+static int find_empty_hw_brk(struct thread_struct *thread)
+{
+	int i;
+
+	for (i = 0; i < nr_wp_slots(); i++) {
+		if (!thread->hw_brk[i].address)
+			return i;
+	}
+	return -1;
+}
+
+long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
+{
+	int i;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	int len = 0;
+	struct thread_struct *thread = &child->thread;
+	struct perf_event *bp;
+	struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+	struct arch_hw_breakpoint brk;
+
+	if (bp_info->version != 1)
+		return -ENOTSUPP;
+	/*
+	 * We only support one data breakpoint
+	 */
+	if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 ||
+	    (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 ||
+	    bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
+		return -EINVAL;
+
+	if ((unsigned long)bp_info->addr >= TASK_SIZE)
+		return -EIO;
+
+	brk.address = ALIGN_DOWN(bp_info->addr, HW_BREAKPOINT_SIZE);
+	brk.type = HW_BRK_TYPE_TRANSLATE | HW_BRK_TYPE_PRIV_ALL;
+	brk.len = DABR_MAX_LEN;
+	brk.hw_len = DABR_MAX_LEN;
+	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+		brk.type |= HW_BRK_TYPE_READ;
+	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+		brk.type |= HW_BRK_TYPE_WRITE;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
+		len = bp_info->addr2 - bp_info->addr;
+	else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
+		len = 1;
+	else
+		return -EINVAL;
+
+	i = find_empty_ptrace_bp(thread);
+	if (i < 0)
+		return -ENOSPC;
+
+	/* Create a new breakpoint request if one doesn't exist already */
+	hw_breakpoint_init(&attr);
+	attr.bp_addr = (unsigned long)bp_info->addr;
+	attr.bp_len = len;
+	arch_bp_generic_fields(brk.type, &attr.bp_type);
+
+	bp = register_user_hw_breakpoint(&attr, ptrace_triggered, NULL, child);
+	thread->ptrace_bps[i] = bp;
+	if (IS_ERR(bp)) {
+		thread->ptrace_bps[i] = NULL;
+		return PTR_ERR(bp);
+	}
+
+	return i + 1;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+	if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT)
+		return -EINVAL;
+
+	i = find_empty_hw_brk(&child->thread);
+	if (i < 0)
+		return -ENOSPC;
+
+	if (!ppc_breakpoint_available())
+		return -ENODEV;
+
+	child->thread.hw_brk[i] = brk;
+
+	return i + 1;
+}
+
+long ppc_del_hwdebug(struct task_struct *child, long data)
+{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	int ret = 0;
+	struct thread_struct *thread = &child->thread;
+	struct perf_event *bp;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+	if (data < 1 || data > nr_wp_slots())
+		return -EINVAL;
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	bp = thread->ptrace_bps[data - 1];
+	if (bp) {
+		unregister_hw_breakpoint(bp);
+		thread->ptrace_bps[data - 1] = NULL;
+	} else {
+		ret = -ENOENT;
+	}
+	return ret;
+#else /* CONFIG_HAVE_HW_BREAKPOINT */
+	if (!(child->thread.hw_brk[data - 1].flags & HW_BRK_FLAG_DISABLED) &&
+	    child->thread.hw_brk[data - 1].address == 0)
+		return -ENOENT;
+
+	child->thread.hw_brk[data - 1].address = 0;
+	child->thread.hw_brk[data - 1].type = 0;
+	child->thread.hw_brk[data - 1].flags = 0;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+	return 0;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-novsx.c b/arch/powerpc/kernel/ptrace/ptrace-novsx.c
new file mode 100644
index 0000000000..7433f3db97
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-novsx.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ *	u64	fpr[32];
+ *	u64	fpscr;
+ * };
+ */
+int fpr_get(struct task_struct *target, const struct user_regset *regset,
+	    struct membuf to)
+{
+#ifdef CONFIG_PPC_FPU_REGS
+	BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
+		     offsetof(struct thread_fp_state, fpr[32]));
+
+	flush_fp_to_thread(target);
+
+	return membuf_write(&to, &target->thread.fp_state, 33 * sizeof(u64));
+#else
+	return membuf_write(&to, &empty_zero_page, 33 * sizeof(u64));
+#endif
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ *	u64	fpr[32];
+ *	u64	fpscr;
+ * };
+ *
+ */
+int fpr_set(struct task_struct *target, const struct user_regset *regset,
+	    unsigned int pos, unsigned int count,
+	    const void *kbuf, const void __user *ubuf)
+{
+#ifdef CONFIG_PPC_FPU_REGS
+	BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
+		     offsetof(struct thread_fp_state, fpr[32]));
+
+	flush_fp_to_thread(target);
+
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.fp_state, 0, -1);
+#else
+	return 0;
+#endif
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-spe.c b/arch/powerpc/kernel/ptrace/ptrace-spe.c
new file mode 100644
index 0000000000..47034d0690
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-spe.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * For get_evrregs/set_evrregs functions 'data' has the following layout:
+ *
+ * struct {
+ *   u32 evr[32];
+ *   u64 acc;
+ *   u32 spefscr;
+ * }
+ */
+
+int evr_active(struct task_struct *target, const struct user_regset *regset)
+{
+	flush_spe_to_thread(target);
+	return target->thread.used_spe ? regset->n : 0;
+}
+
+int evr_get(struct task_struct *target, const struct user_regset *regset,
+	    struct membuf to)
+{
+	flush_spe_to_thread(target);
+
+	membuf_write(&to, &target->thread.evr, sizeof(target->thread.evr));
+
+	BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
+		     offsetof(struct thread_struct, spefscr));
+
+	return membuf_write(&to, &target->thread.acc,
+				sizeof(u64) + sizeof(u32));
+}
+
+int evr_set(struct task_struct *target, const struct user_regset *regset,
+	    unsigned int pos, unsigned int count,
+	    const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	flush_spe_to_thread(target);
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &target->thread.evr,
+				 0, sizeof(target->thread.evr));
+
+	BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
+		     offsetof(struct thread_struct, spefscr));
+
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &target->thread.acc,
+					 sizeof(target->thread.evr), -1);
+
+	return ret;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c
new file mode 100644
index 0000000000..210ea834e6
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c
@@ -0,0 +1,788 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+#include <asm/tm.h>
+#include <asm/asm-prototypes.h>
+
+#include "ptrace-decl.h"
+
+void flush_tmregs_to_thread(struct task_struct *tsk)
+{
+	/*
+	 * If task is not current, it will have been flushed already to
+	 * it's thread_struct during __switch_to().
+	 *
+	 * A reclaim flushes ALL the state or if not in TM save TM SPRs
+	 * in the appropriate thread structures from live.
+	 */
+
+	if (!cpu_has_feature(CPU_FTR_TM) || tsk != current)
+		return;
+
+	if (MSR_TM_SUSPENDED(mfmsr())) {
+		tm_reclaim_current(TM_CAUSE_SIGNAL);
+	} else {
+		tm_enable();
+		tm_save_sprs(&tsk->thread);
+	}
+}
+
+static unsigned long get_user_ckpt_msr(struct task_struct *task)
+{
+	return task->thread.ckpt_regs.msr | task->thread.fpexc_mode;
+}
+
+static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr)
+{
+	task->thread.ckpt_regs.msr &= ~MSR_DEBUGCHANGE;
+	task->thread.ckpt_regs.msr |= msr & MSR_DEBUGCHANGE;
+	return 0;
+}
+
+static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap)
+{
+	set_trap(&task->thread.ckpt_regs, trap);
+	return 0;
+}
+
+/**
+ * tm_cgpr_active - get active number of registers in CGPR
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed GPR category.
+ */
+int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return 0;
+
+	return regset->n;
+}
+
+/**
+ * tm_cgpr_get - get CGPR registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @to:		Destination of copy.
+ *
+ * This function gets transaction checkpointed GPR registers.
+ *
+ * When the transaction is active, 'ckpt_regs' holds all the checkpointed
+ * GPR register values for the current transaction to fall back on if it
+ * aborts in between. This function gets those checkpointed GPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ *	struct pt_regs ckpt_regs;
+ * };
+ */
+int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset,
+		struct membuf to)
+{
+	struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr));
+#ifdef CONFIG_PPC64
+	struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe));
+#endif
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	flush_tmregs_to_thread(target);
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+
+	membuf_write(&to, &target->thread.ckpt_regs, sizeof(struct user_pt_regs));
+
+	membuf_store(&to_msr, get_user_ckpt_msr(target));
+#ifdef CONFIG_PPC64
+	membuf_store(&to_softe, 0x1ul);
+#endif
+	return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) -
+			sizeof(struct user_pt_regs));
+}
+
+/*
+ * tm_cgpr_set - set the CGPR registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @pos:	The buffer position.
+ * @count:	Number of bytes to copy.
+ * @kbuf:	Kernel buffer to copy into.
+ * @ubuf:	User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed GPR registers.
+ *
+ * When the transaction is active, 'ckpt_regs' holds the checkpointed
+ * GPR register values for the current transaction to fall back on if it
+ * aborts in between. This function sets those checkpointed GPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ *	struct pt_regs ckpt_regs;
+ * };
+ */
+int tm_cgpr_set(struct task_struct *target, const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	unsigned long reg;
+	int ret;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	flush_tmregs_to_thread(target);
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &target->thread.ckpt_regs,
+				 0, PT_MSR * sizeof(reg));
+
+	if (!ret && count > 0) {
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+					 PT_MSR * sizeof(reg),
+					 (PT_MSR + 1) * sizeof(reg));
+		if (!ret)
+			ret = set_user_ckpt_msr(target, reg);
+	}
+
+	BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+		     offsetof(struct pt_regs, msr) + sizeof(long));
+
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &target->thread.ckpt_regs.orig_gpr3,
+					 PT_ORIG_R3 * sizeof(reg),
+					 (PT_MAX_PUT_REG + 1) * sizeof(reg));
+
+	if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
+		user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					  (PT_MAX_PUT_REG + 1) * sizeof(reg),
+					  PT_TRAP * sizeof(reg));
+
+	if (!ret && count > 0) {
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+					 PT_TRAP * sizeof(reg),
+					 (PT_TRAP + 1) * sizeof(reg));
+		if (!ret)
+			ret = set_user_ckpt_trap(target, reg);
+	}
+
+	if (!ret)
+		user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					  (PT_TRAP + 1) * sizeof(reg), -1);
+
+	return ret;
+}
+
+/**
+ * tm_cfpr_active - get active number of registers in CFPR
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed FPR category.
+ */
+int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return 0;
+
+	return regset->n;
+}
+
+/**
+ * tm_cfpr_get - get CFPR registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @to:		Destination of copy.
+ *
+ * This function gets in transaction checkpointed FPR registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * values for the current transaction to fall back on if it aborts
+ * in between. This function gets those checkpointed FPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ *	u64	fpr[32];
+ *	u64	fpscr;
+ *};
+ */
+int tm_cfpr_get(struct task_struct *target, const struct user_regset *regset,
+		struct membuf to)
+{
+	u64 buf[33];
+	int i;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	flush_tmregs_to_thread(target);
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+
+	/* copy to local buffer then write that out */
+	for (i = 0; i < 32 ; i++)
+		buf[i] = target->thread.TS_CKFPR(i);
+	buf[32] = target->thread.ckfp_state.fpscr;
+	return membuf_write(&to, buf, sizeof(buf));
+}
+
+/**
+ * tm_cfpr_set - set CFPR registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @pos:	The buffer position.
+ * @count:	Number of bytes to copy.
+ * @kbuf:	Kernel buffer to copy into.
+ * @ubuf:	User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed FPR registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * FPR register values for the current transaction to fall back on
+ * if it aborts in between. This function sets these checkpointed
+ * FPR registers. The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ *	u64	fpr[32];
+ *	u64	fpscr;
+ *};
+ */
+int tm_cfpr_set(struct task_struct *target, const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	u64 buf[33];
+	int i;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	flush_tmregs_to_thread(target);
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+
+	for (i = 0; i < 32; i++)
+		buf[i] = target->thread.TS_CKFPR(i);
+	buf[32] = target->thread.ckfp_state.fpscr;
+
+	/* copy to local buffer then write that out */
+	i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+	if (i)
+		return i;
+	for (i = 0; i < 32 ; i++)
+		target->thread.TS_CKFPR(i) = buf[i];
+	target->thread.ckfp_state.fpscr = buf[32];
+	return 0;
+}
+
+/**
+ * tm_cvmx_active - get active number of registers in CVMX
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in checkpointed VMX category.
+ */
+int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return 0;
+
+	return regset->n;
+}
+
+/**
+ * tm_cvmx_get - get CMVX registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @to:		Destination of copy.
+ *
+ * This function gets in transaction checkpointed VMX registers.
+ *
+ * When the transaction is active 'ckvr_state' and 'ckvrsave' hold
+ * the checkpointed values for the current transaction to fall
+ * back on if it aborts in between. The userspace interface buffer
+ * layout is as follows.
+ *
+ * struct data {
+ *	vector128	vr[32];
+ *	vector128	vscr;
+ *	vector128	vrsave;
+ *};
+ */
+int tm_cvmx_get(struct task_struct *target, const struct user_regset *regset,
+		struct membuf to)
+{
+	union {
+		elf_vrreg_t reg;
+		u32 word;
+	} vrsave;
+	BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	/* Flush the state */
+	flush_tmregs_to_thread(target);
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+
+	membuf_write(&to, &target->thread.ckvr_state, 33 * sizeof(vector128));
+	/*
+	 * Copy out only the low-order word of vrsave.
+	 */
+	memset(&vrsave, 0, sizeof(vrsave));
+	vrsave.word = target->thread.ckvrsave;
+	return membuf_write(&to, &vrsave, sizeof(vrsave));
+}
+
+/**
+ * tm_cvmx_set - set CMVX registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @pos:	The buffer position.
+ * @count:	Number of bytes to copy.
+ * @kbuf:	Kernel buffer to copy into.
+ * @ubuf:	User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed VMX registers.
+ *
+ * When the transaction is active 'ckvr_state' and 'ckvrsave' hold
+ * the checkpointed values for the current transaction to fall
+ * back on if it aborts in between. The userspace interface buffer
+ * layout is as follows.
+ *
+ * struct data {
+ *	vector128	vr[32];
+ *	vector128	vscr;
+ *	vector128	vrsave;
+ *};
+ */
+int tm_cvmx_set(struct task_struct *target, const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	flush_tmregs_to_thread(target);
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.ckvr_state,
+				 0, 33 * sizeof(vector128));
+	if (!ret && count > 0) {
+		/*
+		 * We use only the low-order word of vrsave.
+		 */
+		union {
+			elf_vrreg_t reg;
+			u32 word;
+		} vrsave;
+		memset(&vrsave, 0, sizeof(vrsave));
+		vrsave.word = target->thread.ckvrsave;
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
+					 33 * sizeof(vector128), -1);
+		if (!ret)
+			target->thread.ckvrsave = vrsave.word;
+	}
+
+	return ret;
+}
+
+/**
+ * tm_cvsx_active - get active number of registers in CVSX
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed VSX category.
+ */
+int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return 0;
+
+	flush_vsx_to_thread(target);
+	return target->thread.used_vsr ? regset->n : 0;
+}
+
+/**
+ * tm_cvsx_get - get CVSX registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @to:		Destination of copy.
+ *
+ * This function gets in transaction checkpointed VSX registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * values for the current transaction to fall back on if it aborts
+ * in between. This function gets those checkpointed VSX registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ *	u64	vsx[32];
+ *};
+ */
+int tm_cvsx_get(struct task_struct *target, const struct user_regset *regset,
+		struct membuf to)
+{
+	u64 buf[32];
+	int i;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	/* Flush the state */
+	flush_tmregs_to_thread(target);
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+	flush_vsx_to_thread(target);
+
+	for (i = 0; i < 32 ; i++)
+		buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
+	return membuf_write(&to, buf, 32 * sizeof(double));
+}
+
+/**
+ * tm_cvsx_set - set CFPR registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @pos:	The buffer position.
+ * @count:	Number of bytes to copy.
+ * @kbuf:	Kernel buffer to copy into.
+ * @ubuf:	User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed VSX registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * VSX register values for the current transaction to fall back on
+ * if it aborts in between. This function sets these checkpointed
+ * FPR registers. The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ *	u64	vsx[32];
+ *};
+ */
+int tm_cvsx_set(struct task_struct *target, const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	u64 buf[32];
+	int ret, i;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	/* Flush the state */
+	flush_tmregs_to_thread(target);
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+	flush_vsx_to_thread(target);
+
+	for (i = 0; i < 32 ; i++)
+		buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 buf, 0, 32 * sizeof(double));
+	if (!ret)
+		for (i = 0; i < 32 ; i++)
+			target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+
+	return ret;
+}
+
+/**
+ * tm_spr_active - get active number of registers in TM SPR
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ *
+ * This function checks the active number of available
+ * regisers in the transactional memory SPR category.
+ */
+int tm_spr_active(struct task_struct *target, const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	return regset->n;
+}
+
+/**
+ * tm_spr_get - get the TM related SPR registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @to:		Destination of copy.
+ *
+ * This function gets transactional memory related SPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct {
+ *	u64		tm_tfhar;
+ *	u64		tm_texasr;
+ *	u64		tm_tfiar;
+ * };
+ */
+int tm_spr_get(struct task_struct *target, const struct user_regset *regset,
+	       struct membuf to)
+{
+	/* Build tests */
+	BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
+	BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
+	BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	/* Flush the states */
+	flush_tmregs_to_thread(target);
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+
+	/* TFHAR register */
+	membuf_write(&to, &target->thread.tm_tfhar, sizeof(u64));
+	/* TEXASR register */
+	membuf_write(&to, &target->thread.tm_texasr, sizeof(u64));
+	/* TFIAR register */
+	return membuf_write(&to, &target->thread.tm_tfiar, sizeof(u64));
+}
+
+/**
+ * tm_spr_set - set the TM related SPR registers
+ * @target:	The target task.
+ * @regset:	The user regset structure.
+ * @pos:	The buffer position.
+ * @count:	Number of bytes to copy.
+ * @kbuf:	Kernel buffer to copy into.
+ * @ubuf:	User buffer to copy from.
+ *
+ * This function sets transactional memory related SPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct {
+ *	u64		tm_tfhar;
+ *	u64		tm_texasr;
+ *	u64		tm_tfiar;
+ * };
+ */
+int tm_spr_set(struct task_struct *target, const struct user_regset *regset,
+	       unsigned int pos, unsigned int count,
+	       const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	/* Build tests */
+	BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
+	BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
+	BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	/* Flush the states */
+	flush_tmregs_to_thread(target);
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+
+	/* TFHAR register */
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &target->thread.tm_tfhar, 0, sizeof(u64));
+
+	/* TEXASR register */
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &target->thread.tm_texasr, sizeof(u64),
+					 2 * sizeof(u64));
+
+	/* TFIAR register */
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &target->thread.tm_tfiar,
+					 2 * sizeof(u64), 3 * sizeof(u64));
+	return ret;
+}
+
+int tm_tar_active(struct task_struct *target, const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (MSR_TM_ACTIVE(target->thread.regs->msr))
+		return regset->n;
+
+	return 0;
+}
+
+int tm_tar_get(struct task_struct *target, const struct user_regset *regset,
+	       struct membuf to)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	return membuf_write(&to, &target->thread.tm_tar, sizeof(u64));
+}
+
+int tm_tar_set(struct task_struct *target, const struct user_regset *regset,
+	       unsigned int pos, unsigned int count,
+	       const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &target->thread.tm_tar, 0, sizeof(u64));
+	return ret;
+}
+
+int tm_ppr_active(struct task_struct *target, const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (MSR_TM_ACTIVE(target->thread.regs->msr))
+		return regset->n;
+
+	return 0;
+}
+
+
+int tm_ppr_get(struct task_struct *target, const struct user_regset *regset,
+	       struct membuf to)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	return membuf_write(&to, &target->thread.tm_ppr, sizeof(u64));
+}
+
+int tm_ppr_set(struct task_struct *target, const struct user_regset *regset,
+	       unsigned int pos, unsigned int count,
+	       const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &target->thread.tm_ppr, 0, sizeof(u64));
+	return ret;
+}
+
+int tm_dscr_active(struct task_struct *target, const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (MSR_TM_ACTIVE(target->thread.regs->msr))
+		return regset->n;
+
+	return 0;
+}
+
+int tm_dscr_get(struct task_struct *target, const struct user_regset *regset,
+		struct membuf to)
+{
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	return membuf_write(&to, &target->thread.tm_dscr, sizeof(u64));
+}
+
+int tm_dscr_set(struct task_struct *target, const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+
+	if (!cpu_has_feature(CPU_FTR_TM))
+		return -ENODEV;
+
+	if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+		return -ENODATA;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &target->thread.tm_dscr, 0, sizeof(u64));
+	return ret;
+}
+
+int tm_cgpr32_get(struct task_struct *target, const struct user_regset *regset,
+		  struct membuf to)
+{
+	gpr32_get_common(target, regset, to,
+				&target->thread.ckpt_regs.gpr[0]);
+	return membuf_zero(&to, ELF_NGREG * sizeof(u32));
+}
+
+int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset,
+		  unsigned int pos, unsigned int count,
+		  const void *kbuf, const void __user *ubuf)
+{
+	return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
+				&target->thread.ckpt_regs.gpr[0]);
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
new file mode 100644
index 0000000000..584cf5c3df
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
@@ -0,0 +1,953 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/nospec.h>
+#include <linux/pkeys.h>
+
+#include "ptrace-decl.h"
+
+struct pt_regs_offset {
+	const char *name;
+	int offset;
+};
+
+#define STR(s)	#s			/* convert to string */
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define GPR_OFFSET_NAME(num)	\
+	{.name = STR(r##num), .offset = offsetof(struct pt_regs, gpr[num])}, \
+	{.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+	GPR_OFFSET_NAME(0),
+	GPR_OFFSET_NAME(1),
+	GPR_OFFSET_NAME(2),
+	GPR_OFFSET_NAME(3),
+	GPR_OFFSET_NAME(4),
+	GPR_OFFSET_NAME(5),
+	GPR_OFFSET_NAME(6),
+	GPR_OFFSET_NAME(7),
+	GPR_OFFSET_NAME(8),
+	GPR_OFFSET_NAME(9),
+	GPR_OFFSET_NAME(10),
+	GPR_OFFSET_NAME(11),
+	GPR_OFFSET_NAME(12),
+	GPR_OFFSET_NAME(13),
+	GPR_OFFSET_NAME(14),
+	GPR_OFFSET_NAME(15),
+	GPR_OFFSET_NAME(16),
+	GPR_OFFSET_NAME(17),
+	GPR_OFFSET_NAME(18),
+	GPR_OFFSET_NAME(19),
+	GPR_OFFSET_NAME(20),
+	GPR_OFFSET_NAME(21),
+	GPR_OFFSET_NAME(22),
+	GPR_OFFSET_NAME(23),
+	GPR_OFFSET_NAME(24),
+	GPR_OFFSET_NAME(25),
+	GPR_OFFSET_NAME(26),
+	GPR_OFFSET_NAME(27),
+	GPR_OFFSET_NAME(28),
+	GPR_OFFSET_NAME(29),
+	GPR_OFFSET_NAME(30),
+	GPR_OFFSET_NAME(31),
+	REG_OFFSET_NAME(nip),
+	REG_OFFSET_NAME(msr),
+	REG_OFFSET_NAME(ctr),
+	REG_OFFSET_NAME(link),
+	REG_OFFSET_NAME(xer),
+	REG_OFFSET_NAME(ccr),
+#ifdef CONFIG_PPC64
+	REG_OFFSET_NAME(softe),
+#else
+	REG_OFFSET_NAME(mq),
+#endif
+	REG_OFFSET_NAME(trap),
+	REG_OFFSET_NAME(dar),
+	REG_OFFSET_NAME(dsisr),
+	REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name:	the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+	const struct pt_regs_offset *roff;
+	for (roff = regoffset_table; roff->name != NULL; roff++)
+		if (!strcmp(roff->name, name))
+			return roff->offset;
+	return -EINVAL;
+}
+
+/**
+ * regs_query_register_name() - query register name from its offset
+ * @offset:	the offset of a register in struct pt_regs.
+ *
+ * regs_query_register_name() returns the name of a register from its
+ * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
+ */
+const char *regs_query_register_name(unsigned int offset)
+{
+	const struct pt_regs_offset *roff;
+	for (roff = regoffset_table; roff->name != NULL; roff++)
+		if (roff->offset == offset)
+			return roff->name;
+	return NULL;
+}
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+static unsigned long get_user_msr(struct task_struct *task)
+{
+	return task->thread.regs->msr | task->thread.fpexc_mode;
+}
+
+static __always_inline int set_user_msr(struct task_struct *task, unsigned long msr)
+{
+	unsigned long newmsr = (task->thread.regs->msr & ~MSR_DEBUGCHANGE) |
+				(msr & MSR_DEBUGCHANGE);
+	regs_set_return_msr(task->thread.regs, newmsr);
+	return 0;
+}
+
+#ifdef CONFIG_PPC64
+static int get_user_dscr(struct task_struct *task, unsigned long *data)
+{
+	*data = task->thread.dscr;
+	return 0;
+}
+
+static int set_user_dscr(struct task_struct *task, unsigned long dscr)
+{
+	task->thread.dscr = dscr;
+	task->thread.dscr_inherit = 1;
+	return 0;
+}
+#else
+static int get_user_dscr(struct task_struct *task, unsigned long *data)
+{
+	return -EIO;
+}
+
+static int set_user_dscr(struct task_struct *task, unsigned long dscr)
+{
+	return -EIO;
+}
+#endif
+
+/*
+ * We prevent mucking around with the reserved area of trap
+ * which are used internally by the kernel.
+ */
+static __always_inline int set_user_trap(struct task_struct *task, unsigned long trap)
+{
+	set_trap(task->thread.regs, trap);
+	return 0;
+}
+
+/*
+ * Get contents of register REGNO in task TASK.
+ */
+int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
+{
+	unsigned int regs_max;
+
+	if (task->thread.regs == NULL || !data)
+		return -EIO;
+
+	if (regno == PT_MSR) {
+		*data = get_user_msr(task);
+		return 0;
+	}
+
+	if (regno == PT_DSCR)
+		return get_user_dscr(task, data);
+
+	/*
+	 * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is
+	 * no more used as a flag, lets force usr to always see the softe value as 1
+	 * which means interrupts are not soft disabled.
+	 */
+	if (IS_ENABLED(CONFIG_PPC64) && regno == PT_SOFTE) {
+		*data = 1;
+		return  0;
+	}
+
+	regs_max = sizeof(struct user_pt_regs) / sizeof(unsigned long);
+	if (regno < regs_max) {
+		regno = array_index_nospec(regno, regs_max);
+		*data = ((unsigned long *)task->thread.regs)[regno];
+		return 0;
+	}
+
+	return -EIO;
+}
+
+/*
+ * Write contents of register REGNO in task TASK.
+ */
+int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data)
+{
+	if (task->thread.regs == NULL)
+		return -EIO;
+
+	if (regno == PT_MSR)
+		return set_user_msr(task, data);
+	if (regno == PT_TRAP)
+		return set_user_trap(task, data);
+	if (regno == PT_DSCR)
+		return set_user_dscr(task, data);
+
+	if (regno <= PT_MAX_PUT_REG) {
+		regno = array_index_nospec(regno, PT_MAX_PUT_REG + 1);
+		((unsigned long *)task->thread.regs)[regno] = data;
+		return 0;
+	}
+	return -EIO;
+}
+
+static int gpr_get(struct task_struct *target, const struct user_regset *regset,
+		   struct membuf to)
+{
+	struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr));
+#ifdef CONFIG_PPC64
+	struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe));
+#endif
+	if (target->thread.regs == NULL)
+		return -EIO;
+
+	membuf_write(&to, target->thread.regs, sizeof(struct user_pt_regs));
+
+	membuf_store(&to_msr, get_user_msr(target));
+#ifdef CONFIG_PPC64
+	membuf_store(&to_softe, 0x1ul);
+#endif
+	return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) -
+				 sizeof(struct user_pt_regs));
+}
+
+static int gpr_set(struct task_struct *target, const struct user_regset *regset,
+		   unsigned int pos, unsigned int count, const void *kbuf,
+		   const void __user *ubuf)
+{
+	unsigned long reg;
+	int ret;
+
+	if (target->thread.regs == NULL)
+		return -EIO;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 target->thread.regs,
+				 0, PT_MSR * sizeof(reg));
+
+	if (!ret && count > 0) {
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+					 PT_MSR * sizeof(reg),
+					 (PT_MSR + 1) * sizeof(reg));
+		if (!ret)
+			ret = set_user_msr(target, reg);
+	}
+
+	BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+		     offsetof(struct pt_regs, msr) + sizeof(long));
+
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &target->thread.regs->orig_gpr3,
+					 PT_ORIG_R3 * sizeof(reg),
+					 (PT_MAX_PUT_REG + 1) * sizeof(reg));
+
+	if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
+		user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					  (PT_MAX_PUT_REG + 1) * sizeof(reg),
+					  PT_TRAP * sizeof(reg));
+
+	if (!ret && count > 0) {
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+					 PT_TRAP * sizeof(reg),
+					 (PT_TRAP + 1) * sizeof(reg));
+		if (!ret)
+			ret = set_user_trap(target, reg);
+	}
+
+	if (!ret)
+		user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					  (PT_TRAP + 1) * sizeof(reg), -1);
+
+	return ret;
+}
+
+#ifdef CONFIG_PPC64
+static int ppr_get(struct task_struct *target, const struct user_regset *regset,
+		   struct membuf to)
+{
+	if (!target->thread.regs)
+		return -EINVAL;
+
+	return membuf_write(&to, &target->thread.regs->ppr, sizeof(u64));
+}
+
+static int ppr_set(struct task_struct *target, const struct user_regset *regset,
+		   unsigned int pos, unsigned int count, const void *kbuf,
+		   const void __user *ubuf)
+{
+	if (!target->thread.regs)
+		return -EINVAL;
+
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.regs->ppr, 0, sizeof(u64));
+}
+
+static int dscr_get(struct task_struct *target, const struct user_regset *regset,
+		    struct membuf to)
+{
+	return membuf_write(&to, &target->thread.dscr, sizeof(u64));
+}
+static int dscr_set(struct task_struct *target, const struct user_regset *regset,
+		    unsigned int pos, unsigned int count, const void *kbuf,
+		    const void __user *ubuf)
+{
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.dscr, 0, sizeof(u64));
+}
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+static int tar_get(struct task_struct *target, const struct user_regset *regset,
+		   struct membuf to)
+{
+	return membuf_write(&to, &target->thread.tar, sizeof(u64));
+}
+static int tar_set(struct task_struct *target, const struct user_regset *regset,
+		   unsigned int pos, unsigned int count, const void *kbuf,
+		   const void __user *ubuf)
+{
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.tar, 0, sizeof(u64));
+}
+
+static int ebb_active(struct task_struct *target, const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return -ENODEV;
+
+	if (target->thread.used_ebb)
+		return regset->n;
+
+	return 0;
+}
+
+static int ebb_get(struct task_struct *target, const struct user_regset *regset,
+		   struct membuf to)
+{
+	/* Build tests */
+	BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
+	BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return -ENODEV;
+
+	if (!target->thread.used_ebb)
+		return -ENODATA;
+
+	return membuf_write(&to, &target->thread.ebbrr, 3 * sizeof(unsigned long));
+}
+
+static int ebb_set(struct task_struct *target, const struct user_regset *regset,
+		   unsigned int pos, unsigned int count, const void *kbuf,
+		   const void __user *ubuf)
+{
+	int ret = 0;
+
+	/* Build tests */
+	BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
+	BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return -ENODEV;
+
+	if (target->thread.used_ebb)
+		return -ENODATA;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.ebbrr,
+				 0, sizeof(unsigned long));
+
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &target->thread.ebbhr, sizeof(unsigned long),
+					 2 * sizeof(unsigned long));
+
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &target->thread.bescr, 2 * sizeof(unsigned long),
+					 3 * sizeof(unsigned long));
+
+	return ret;
+}
+static int pmu_active(struct task_struct *target, const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return -ENODEV;
+
+	return regset->n;
+}
+
+static int pmu_get(struct task_struct *target, const struct user_regset *regset,
+		   struct membuf to)
+{
+	/* Build tests */
+	BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
+	BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
+	BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
+	BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return -ENODEV;
+
+	return membuf_write(&to, &target->thread.siar, 5 * sizeof(unsigned long));
+}
+
+static int pmu_set(struct task_struct *target, const struct user_regset *regset,
+		   unsigned int pos, unsigned int count, const void *kbuf,
+		   const void __user *ubuf)
+{
+	int ret = 0;
+
+	/* Build tests */
+	BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
+	BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
+	BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
+	BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return -ENODEV;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.siar,
+				 0, sizeof(unsigned long));
+
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &target->thread.sdar, sizeof(unsigned long),
+					 2 * sizeof(unsigned long));
+
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &target->thread.sier, 2 * sizeof(unsigned long),
+					 3 * sizeof(unsigned long));
+
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &target->thread.mmcr2, 3 * sizeof(unsigned long),
+					 4 * sizeof(unsigned long));
+
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &target->thread.mmcr0, 4 * sizeof(unsigned long),
+					 5 * sizeof(unsigned long));
+	return ret;
+}
+
+static int dexcr_active(struct task_struct *target, const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_31))
+		return -ENODEV;
+
+	return regset->n;
+}
+
+static int dexcr_get(struct task_struct *target, const struct user_regset *regset,
+		     struct membuf to)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_31))
+		return -ENODEV;
+
+	/*
+	 * The DEXCR is currently static across all CPUs, so we don't
+	 * store the target's value anywhere, but the static value
+	 * will also be correct.
+	 */
+	membuf_store(&to, (u64)lower_32_bits(DEXCR_INIT));
+
+	/*
+	 * Technically the HDEXCR is per-cpu, but a hypervisor can't reasonably
+	 * change it between CPUs of the same guest.
+	 */
+	return membuf_store(&to, (u64)lower_32_bits(mfspr(SPRN_HDEXCR_RO)));
+}
+
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static int hashkeyr_active(struct task_struct *target, const struct user_regset *regset)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_31))
+		return -ENODEV;
+
+	return regset->n;
+}
+
+static int hashkeyr_get(struct task_struct *target, const struct user_regset *regset,
+			struct membuf to)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_31))
+		return -ENODEV;
+
+	return membuf_store(&to, target->thread.hashkeyr);
+}
+
+static int hashkeyr_set(struct task_struct *target, const struct user_regset *regset,
+			unsigned int pos, unsigned int count, const void *kbuf,
+			const void __user *ubuf)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_31))
+		return -ENODEV;
+
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.hashkeyr,
+				  0, sizeof(unsigned long));
+}
+#endif /* CONFIG_CHECKPOINT_RESTORE */
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_MEM_KEYS
+static int pkey_active(struct task_struct *target, const struct user_regset *regset)
+{
+	if (!arch_pkeys_enabled())
+		return -ENODEV;
+
+	return regset->n;
+}
+
+static int pkey_get(struct task_struct *target, const struct user_regset *regset,
+		    struct membuf to)
+{
+
+	if (!arch_pkeys_enabled())
+		return -ENODEV;
+
+	membuf_store(&to, target->thread.regs->amr);
+	membuf_store(&to, target->thread.regs->iamr);
+	return membuf_store(&to, default_uamor);
+}
+
+static int pkey_set(struct task_struct *target, const struct user_regset *regset,
+		    unsigned int pos, unsigned int count, const void *kbuf,
+		    const void __user *ubuf)
+{
+	u64 new_amr;
+	int ret;
+
+	if (!arch_pkeys_enabled())
+		return -ENODEV;
+
+	/* Only the AMR can be set from userspace */
+	if (pos != 0 || count != sizeof(new_amr))
+		return -EINVAL;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &new_amr, 0, sizeof(new_amr));
+	if (ret)
+		return ret;
+
+	/*
+	 * UAMOR determines which bits of the AMR can be set from userspace.
+	 * UAMOR value 0b11 indicates that the AMR value can be modified
+	 * from userspace. If the kernel is using a specific key, we avoid
+	 * userspace modifying the AMR value for that key by masking them
+	 * via UAMOR 0b00.
+	 *
+	 * Pick the AMR values for the keys that kernel is using. This
+	 * will be indicated by the ~default_uamor bits.
+	 */
+	target->thread.regs->amr = (new_amr & default_uamor) |
+		(target->thread.regs->amr & ~default_uamor);
+
+	return 0;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
+static const struct user_regset native_regsets[] = {
+	[REGSET_GPR] = {
+		.core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
+		.size = sizeof(long), .align = sizeof(long),
+		.regset_get = gpr_get, .set = gpr_set
+	},
+	[REGSET_FPR] = {
+		.core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
+		.size = sizeof(double), .align = sizeof(double),
+		.regset_get = fpr_get, .set = fpr_set
+	},
+#ifdef CONFIG_ALTIVEC
+	[REGSET_VMX] = {
+		.core_note_type = NT_PPC_VMX, .n = 34,
+		.size = sizeof(vector128), .align = sizeof(vector128),
+		.active = vr_active, .regset_get = vr_get, .set = vr_set
+	},
+#endif
+#ifdef CONFIG_VSX
+	[REGSET_VSX] = {
+		.core_note_type = NT_PPC_VSX, .n = 32,
+		.size = sizeof(double), .align = sizeof(double),
+		.active = vsr_active, .regset_get = vsr_get, .set = vsr_set
+	},
+#endif
+#ifdef CONFIG_SPE
+	[REGSET_SPE] = {
+		.core_note_type = NT_PPC_SPE, .n = 35,
+		.size = sizeof(u32), .align = sizeof(u32),
+		.active = evr_active, .regset_get = evr_get, .set = evr_set
+	},
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	[REGSET_TM_CGPR] = {
+		.core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG,
+		.size = sizeof(long), .align = sizeof(long),
+		.active = tm_cgpr_active, .regset_get = tm_cgpr_get, .set = tm_cgpr_set
+	},
+	[REGSET_TM_CFPR] = {
+		.core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG,
+		.size = sizeof(double), .align = sizeof(double),
+		.active = tm_cfpr_active, .regset_get = tm_cfpr_get, .set = tm_cfpr_set
+	},
+	[REGSET_TM_CVMX] = {
+		.core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX,
+		.size = sizeof(vector128), .align = sizeof(vector128),
+		.active = tm_cvmx_active, .regset_get = tm_cvmx_get, .set = tm_cvmx_set
+	},
+	[REGSET_TM_CVSX] = {
+		.core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX,
+		.size = sizeof(double), .align = sizeof(double),
+		.active = tm_cvsx_active, .regset_get = tm_cvsx_get, .set = tm_cvsx_set
+	},
+	[REGSET_TM_SPR] = {
+		.core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = tm_spr_active, .regset_get = tm_spr_get, .set = tm_spr_set
+	},
+	[REGSET_TM_CTAR] = {
+		.core_note_type = NT_PPC_TM_CTAR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = tm_tar_active, .regset_get = tm_tar_get, .set = tm_tar_set
+	},
+	[REGSET_TM_CPPR] = {
+		.core_note_type = NT_PPC_TM_CPPR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = tm_ppr_active, .regset_get = tm_ppr_get, .set = tm_ppr_set
+	},
+	[REGSET_TM_CDSCR] = {
+		.core_note_type = NT_PPC_TM_CDSCR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = tm_dscr_active, .regset_get = tm_dscr_get, .set = tm_dscr_set
+	},
+#endif
+#ifdef CONFIG_PPC64
+	[REGSET_PPR] = {
+		.core_note_type = NT_PPC_PPR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.regset_get = ppr_get, .set = ppr_set
+	},
+	[REGSET_DSCR] = {
+		.core_note_type = NT_PPC_DSCR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.regset_get = dscr_get, .set = dscr_set
+	},
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	[REGSET_TAR] = {
+		.core_note_type = NT_PPC_TAR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.regset_get = tar_get, .set = tar_set
+	},
+	[REGSET_EBB] = {
+		.core_note_type = NT_PPC_EBB, .n = ELF_NEBB,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = ebb_active, .regset_get = ebb_get, .set = ebb_set
+	},
+	[REGSET_PMR] = {
+		.core_note_type = NT_PPC_PMU, .n = ELF_NPMU,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = pmu_active, .regset_get = pmu_get, .set = pmu_set
+	},
+	[REGSET_DEXCR] = {
+		.core_note_type = NT_PPC_DEXCR, .n = ELF_NDEXCR,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = dexcr_active, .regset_get = dexcr_get
+	},
+#ifdef CONFIG_CHECKPOINT_RESTORE
+	[REGSET_HASHKEYR] = {
+		.core_note_type = NT_PPC_HASHKEYR, .n = ELF_NHASHKEYR,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = hashkeyr_active, .regset_get = hashkeyr_get, .set = hashkeyr_set
+	},
+#endif
+#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+	[REGSET_PKEY] = {
+		.core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = pkey_active, .regset_get = pkey_get, .set = pkey_set
+	},
+#endif
+};
+
+const struct user_regset_view user_ppc_native_view = {
+	.name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI,
+	.regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
+};
+
+#include <linux/compat.h>
+
+int gpr32_get_common(struct task_struct *target,
+		     const struct user_regset *regset,
+		     struct membuf to, unsigned long *regs)
+{
+	int i;
+
+	for (i = 0; i < PT_MSR; i++)
+		membuf_store(&to, (u32)regs[i]);
+	membuf_store(&to, (u32)get_user_msr(target));
+	for (i++ ; i < PT_REGS_COUNT; i++)
+		membuf_store(&to, (u32)regs[i]);
+	return membuf_zero(&to, (ELF_NGREG - PT_REGS_COUNT) * sizeof(u32));
+}
+
+static int gpr32_set_common_kernel(struct task_struct *target,
+				   const struct user_regset *regset,
+				   unsigned int pos, unsigned int count,
+				   const void *kbuf, unsigned long *regs)
+{
+	const compat_ulong_t *k = kbuf;
+
+	pos /= sizeof(compat_ulong_t);
+	count /= sizeof(compat_ulong_t);
+
+	for (; count > 0 && pos < PT_MSR; --count)
+		regs[pos++] = *k++;
+
+	if (count > 0 && pos == PT_MSR) {
+		set_user_msr(target, *k++);
+		++pos;
+		--count;
+	}
+
+	for (; count > 0 && pos <= PT_MAX_PUT_REG; --count)
+		regs[pos++] = *k++;
+	for (; count > 0 && pos < PT_TRAP; --count, ++pos)
+		++k;
+
+	if (count > 0 && pos == PT_TRAP) {
+		set_user_trap(target, *k++);
+		++pos;
+		--count;
+	}
+
+	kbuf = k;
+	pos *= sizeof(compat_ulong_t);
+	count *= sizeof(compat_ulong_t);
+	user_regset_copyin_ignore(&pos, &count, &kbuf, NULL,
+				  (PT_TRAP + 1) * sizeof(compat_ulong_t), -1);
+	return 0;
+}
+
+static int gpr32_set_common_user(struct task_struct *target,
+				 const struct user_regset *regset,
+				 unsigned int pos, unsigned int count,
+				 const void __user *ubuf, unsigned long *regs)
+{
+	const compat_ulong_t __user *u = ubuf;
+	const void *kbuf = NULL;
+	compat_ulong_t reg;
+
+	if (!user_read_access_begin(u, count))
+		return -EFAULT;
+
+	pos /= sizeof(reg);
+	count /= sizeof(reg);
+
+	for (; count > 0 && pos < PT_MSR; --count) {
+		unsafe_get_user(reg, u++, Efault);
+		regs[pos++] = reg;
+	}
+
+	if (count > 0 && pos == PT_MSR) {
+		unsafe_get_user(reg, u++, Efault);
+		set_user_msr(target, reg);
+		++pos;
+		--count;
+	}
+
+	for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) {
+		unsafe_get_user(reg, u++, Efault);
+		regs[pos++] = reg;
+	}
+	for (; count > 0 && pos < PT_TRAP; --count, ++pos)
+		unsafe_get_user(reg, u++, Efault);
+
+	if (count > 0 && pos == PT_TRAP) {
+		unsafe_get_user(reg, u++, Efault);
+		set_user_trap(target, reg);
+		++pos;
+		--count;
+	}
+	user_read_access_end();
+
+	ubuf = u;
+	pos *= sizeof(reg);
+	count *= sizeof(reg);
+	user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+				  (PT_TRAP + 1) * sizeof(reg), -1);
+	return 0;
+
+Efault:
+	user_read_access_end();
+	return -EFAULT;
+}
+
+int gpr32_set_common(struct task_struct *target,
+		     const struct user_regset *regset,
+		     unsigned int pos, unsigned int count,
+		     const void *kbuf, const void __user *ubuf,
+		     unsigned long *regs)
+{
+	if (kbuf)
+		return gpr32_set_common_kernel(target, regset, pos, count, kbuf, regs);
+	else
+		return gpr32_set_common_user(target, regset, pos, count, ubuf, regs);
+}
+
+static int gpr32_get(struct task_struct *target,
+		     const struct user_regset *regset,
+		     struct membuf to)
+{
+	if (target->thread.regs == NULL)
+		return -EIO;
+
+	return gpr32_get_common(target, regset, to,
+			&target->thread.regs->gpr[0]);
+}
+
+static int gpr32_set(struct task_struct *target,
+		     const struct user_regset *regset,
+		     unsigned int pos, unsigned int count,
+		     const void *kbuf, const void __user *ubuf)
+{
+	if (target->thread.regs == NULL)
+		return -EIO;
+
+	return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
+			&target->thread.regs->gpr[0]);
+}
+
+/*
+ * These are the regset flavors matching the CONFIG_PPC32 native set.
+ */
+static const struct user_regset compat_regsets[] = {
+	[REGSET_GPR] = {
+		.core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
+		.size = sizeof(compat_long_t), .align = sizeof(compat_long_t),
+		.regset_get = gpr32_get, .set = gpr32_set
+	},
+	[REGSET_FPR] = {
+		.core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
+		.size = sizeof(double), .align = sizeof(double),
+		.regset_get = fpr_get, .set = fpr_set
+	},
+#ifdef CONFIG_ALTIVEC
+	[REGSET_VMX] = {
+		.core_note_type = NT_PPC_VMX, .n = 34,
+		.size = sizeof(vector128), .align = sizeof(vector128),
+		.active = vr_active, .regset_get = vr_get, .set = vr_set
+	},
+#endif
+#ifdef CONFIG_SPE
+	[REGSET_SPE] = {
+		.core_note_type = NT_PPC_SPE, .n = 35,
+		.size = sizeof(u32), .align = sizeof(u32),
+		.active = evr_active, .regset_get = evr_get, .set = evr_set
+	},
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	[REGSET_TM_CGPR] = {
+		.core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG,
+		.size = sizeof(long), .align = sizeof(long),
+		.active = tm_cgpr_active,
+		.regset_get = tm_cgpr32_get, .set = tm_cgpr32_set
+	},
+	[REGSET_TM_CFPR] = {
+		.core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG,
+		.size = sizeof(double), .align = sizeof(double),
+		.active = tm_cfpr_active, .regset_get = tm_cfpr_get, .set = tm_cfpr_set
+	},
+	[REGSET_TM_CVMX] = {
+		.core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX,
+		.size = sizeof(vector128), .align = sizeof(vector128),
+		.active = tm_cvmx_active, .regset_get = tm_cvmx_get, .set = tm_cvmx_set
+	},
+	[REGSET_TM_CVSX] = {
+		.core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX,
+		.size = sizeof(double), .align = sizeof(double),
+		.active = tm_cvsx_active, .regset_get = tm_cvsx_get, .set = tm_cvsx_set
+	},
+	[REGSET_TM_SPR] = {
+		.core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = tm_spr_active, .regset_get = tm_spr_get, .set = tm_spr_set
+	},
+	[REGSET_TM_CTAR] = {
+		.core_note_type = NT_PPC_TM_CTAR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = tm_tar_active, .regset_get = tm_tar_get, .set = tm_tar_set
+	},
+	[REGSET_TM_CPPR] = {
+		.core_note_type = NT_PPC_TM_CPPR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = tm_ppr_active, .regset_get = tm_ppr_get, .set = tm_ppr_set
+	},
+	[REGSET_TM_CDSCR] = {
+		.core_note_type = NT_PPC_TM_CDSCR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = tm_dscr_active, .regset_get = tm_dscr_get, .set = tm_dscr_set
+	},
+#endif
+#ifdef CONFIG_PPC64
+	[REGSET_PPR] = {
+		.core_note_type = NT_PPC_PPR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.regset_get = ppr_get, .set = ppr_set
+	},
+	[REGSET_DSCR] = {
+		.core_note_type = NT_PPC_DSCR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.regset_get = dscr_get, .set = dscr_set
+	},
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	[REGSET_TAR] = {
+		.core_note_type = NT_PPC_TAR, .n = 1,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.regset_get = tar_get, .set = tar_set
+	},
+	[REGSET_EBB] = {
+		.core_note_type = NT_PPC_EBB, .n = ELF_NEBB,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = ebb_active, .regset_get = ebb_get, .set = ebb_set
+	},
+#endif
+};
+
+static const struct user_regset_view user_ppc_compat_view = {
+	.name = "ppc", .e_machine = EM_PPC, .ei_osabi = ELF_OSABI,
+	.regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	if (IS_ENABLED(CONFIG_COMPAT) && is_tsk_32bit_task(task))
+		return &user_ppc_compat_view;
+	return &user_ppc_native_view;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-vsx.c b/arch/powerpc/kernel/ptrace/ptrace-vsx.c
new file mode 100644
index 0000000000..7df08004c4
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-vsx.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ *	u64	fpr[32];
+ *	u64	fpscr;
+ * };
+ */
+int fpr_get(struct task_struct *target, const struct user_regset *regset,
+	    struct membuf to)
+{
+	u64 buf[33];
+	int i;
+
+	flush_fp_to_thread(target);
+
+	/* copy to local buffer then write that out */
+	for (i = 0; i < 32 ; i++)
+		buf[i] = target->thread.TS_FPR(i);
+	buf[32] = target->thread.fp_state.fpscr;
+	return membuf_write(&to, buf, 33 * sizeof(u64));
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ *	u64	fpr[32];
+ *	u64	fpscr;
+ * };
+ *
+ */
+int fpr_set(struct task_struct *target, const struct user_regset *regset,
+	    unsigned int pos, unsigned int count,
+	    const void *kbuf, const void __user *ubuf)
+{
+	u64 buf[33];
+	int i;
+
+	flush_fp_to_thread(target);
+
+	for (i = 0; i < 32 ; i++)
+		buf[i] = target->thread.TS_FPR(i);
+	buf[32] = target->thread.fp_state.fpscr;
+
+	/* copy to local buffer then write that out */
+	i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+	if (i)
+		return i;
+
+	for (i = 0; i < 32 ; i++)
+		target->thread.TS_FPR(i) = buf[i];
+	target->thread.fp_state.fpscr = buf[32];
+	return 0;
+}
+
+/*
+ * Currently to set and get all the vsx state, you need to call
+ * the fp and VMX calls as well.  This only get/sets the lower 32
+ * 128bit VSX registers.
+ */
+
+int vsr_active(struct task_struct *target, const struct user_regset *regset)
+{
+	flush_vsx_to_thread(target);
+	return target->thread.used_vsr ? regset->n : 0;
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last
+ * checkpointed value of all FPR registers for the current
+ * transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ *	u64	vsx[32];
+ * };
+ */
+int vsr_get(struct task_struct *target, const struct user_regset *regset,
+	    struct membuf to)
+{
+	u64 buf[32];
+	int i;
+
+	flush_tmregs_to_thread(target);
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+	flush_vsx_to_thread(target);
+
+	for (i = 0; i < 32 ; i++)
+		buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+
+	return membuf_write(&to, buf, 32 * sizeof(double));
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last
+ * checkpointed value of all FPR registers for the current
+ * transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ *	u64	vsx[32];
+ * };
+ */
+int vsr_set(struct task_struct *target, const struct user_regset *regset,
+	    unsigned int pos, unsigned int count,
+	    const void *kbuf, const void __user *ubuf)
+{
+	u64 buf[32];
+	int ret, i;
+
+	flush_tmregs_to_thread(target);
+	flush_fp_to_thread(target);
+	flush_altivec_to_thread(target);
+	flush_vsx_to_thread(target);
+
+	for (i = 0; i < 32 ; i++)
+		buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 buf, 0, 32 * sizeof(double));
+	if (!ret)
+		for (i = 0; i < 32 ; i++)
+			target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+
+	return ret;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
new file mode 100644
index 0000000000..5d7a72b41a
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -0,0 +1,450 @@
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Derived from "arch/m68k/kernel/ptrace.c"
+ *  Copyright (C) 1994 by Hamish Macdonald
+ *  Taken from linux/kernel/ptrace.c and modified for M680x0.
+ *  linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
+ *
+ * Modified by Cort Dougan (cort@hq.fsmlabs.com)
+ * and Paul Mackerras (paulus@samba.org).
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License.  See the file README.legal in the main directory of
+ * this archive for more details.
+ */
+
+#include <linux/regset.h>
+#include <linux/ptrace.h>
+#include <linux/audit.h>
+#include <linux/context_tracking.h>
+#include <linux/syscalls.h>
+
+#include <asm/switch_to.h>
+#include <asm/debug.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure single step bits etc are not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+	/* make sure the single step bit is not set. */
+	user_disable_single_step(child);
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
+{
+	int ret = -EPERM;
+	void __user *datavp = (void __user *) data;
+	unsigned long __user *datalp = datavp;
+
+	switch (request) {
+	/* read the word at location addr in the USER area. */
+	case PTRACE_PEEKUSR: {
+		unsigned long index, tmp;
+
+		ret = -EIO;
+		/* convert to index and check */
+		index = addr / sizeof(long);
+		if ((addr & (sizeof(long) - 1)) || !child->thread.regs)
+			break;
+
+		if (index < PT_FPR0)
+			ret = ptrace_get_reg(child, (int) index, &tmp);
+		else
+			ret = ptrace_get_fpr(child, index, &tmp);
+
+		if (ret)
+			break;
+		ret = put_user(tmp, datalp);
+		break;
+	}
+
+	/* write the word at location addr in the USER area */
+	case PTRACE_POKEUSR: {
+		unsigned long index;
+
+		ret = -EIO;
+		/* convert to index and check */
+		index = addr / sizeof(long);
+		if ((addr & (sizeof(long) - 1)) || !child->thread.regs)
+			break;
+
+		if (index < PT_FPR0)
+			ret = ptrace_put_reg(child, index, data);
+		else
+			ret = ptrace_put_fpr(child, index, data);
+		break;
+	}
+
+	case PPC_PTRACE_GETHWDBGINFO: {
+		struct ppc_debug_info dbginfo;
+
+		ppc_gethwdinfo(&dbginfo);
+
+		if (copy_to_user(datavp, &dbginfo,
+				 sizeof(struct ppc_debug_info)))
+			return -EFAULT;
+		return 0;
+	}
+
+	case PPC_PTRACE_SETHWDEBUG: {
+		struct ppc_hw_breakpoint bp_info;
+
+		if (copy_from_user(&bp_info, datavp,
+				   sizeof(struct ppc_hw_breakpoint)))
+			return -EFAULT;
+		return ppc_set_hwdebug(child, &bp_info);
+	}
+
+	case PPC_PTRACE_DELHWDEBUG: {
+		ret = ppc_del_hwdebug(child, data);
+		break;
+	}
+
+	case PTRACE_GET_DEBUGREG:
+		ret = ptrace_get_debugreg(child, addr, datalp);
+		break;
+
+	case PTRACE_SET_DEBUGREG:
+		ret = ptrace_set_debugreg(child, addr, data);
+		break;
+
+#ifdef CONFIG_PPC64
+	case PTRACE_GETREGS64:
+#endif
+	case PTRACE_GETREGS:	/* Get all pt_regs from the child. */
+		return copy_regset_to_user(child, &user_ppc_native_view,
+					   REGSET_GPR,
+					   0, sizeof(struct user_pt_regs),
+					   datavp);
+
+#ifdef CONFIG_PPC64
+	case PTRACE_SETREGS64:
+#endif
+	case PTRACE_SETREGS:	/* Set all gp regs in the child. */
+		return copy_regset_from_user(child, &user_ppc_native_view,
+					     REGSET_GPR,
+					     0, sizeof(struct user_pt_regs),
+					     datavp);
+
+	case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */
+		return copy_regset_to_user(child, &user_ppc_native_view,
+					   REGSET_FPR,
+					   0, sizeof(elf_fpregset_t),
+					   datavp);
+
+	case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */
+		return copy_regset_from_user(child, &user_ppc_native_view,
+					     REGSET_FPR,
+					     0, sizeof(elf_fpregset_t),
+					     datavp);
+
+#ifdef CONFIG_ALTIVEC
+	case PTRACE_GETVRREGS:
+		return copy_regset_to_user(child, &user_ppc_native_view,
+					   REGSET_VMX,
+					   0, (33 * sizeof(vector128) +
+					       sizeof(u32)),
+					   datavp);
+
+	case PTRACE_SETVRREGS:
+		return copy_regset_from_user(child, &user_ppc_native_view,
+					     REGSET_VMX,
+					     0, (33 * sizeof(vector128) +
+						 sizeof(u32)),
+					     datavp);
+#endif
+#ifdef CONFIG_VSX
+	case PTRACE_GETVSRREGS:
+		return copy_regset_to_user(child, &user_ppc_native_view,
+					   REGSET_VSX,
+					   0, 32 * sizeof(double),
+					   datavp);
+
+	case PTRACE_SETVSRREGS:
+		return copy_regset_from_user(child, &user_ppc_native_view,
+					     REGSET_VSX,
+					     0, 32 * sizeof(double),
+					     datavp);
+#endif
+#ifdef CONFIG_SPE
+	case PTRACE_GETEVRREGS:
+		/* Get the child spe register state. */
+		return copy_regset_to_user(child, &user_ppc_native_view,
+					   REGSET_SPE, 0, 35 * sizeof(u32),
+					   datavp);
+
+	case PTRACE_SETEVRREGS:
+		/* Set the child spe register state. */
+		return copy_regset_from_user(child, &user_ppc_native_view,
+					     REGSET_SPE, 0, 35 * sizeof(u32),
+					     datavp);
+#endif
+
+	default:
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+	return ret;
+}
+
+#ifdef CONFIG_SECCOMP
+static int do_seccomp(struct pt_regs *regs)
+{
+	if (!test_thread_flag(TIF_SECCOMP))
+		return 0;
+
+	/*
+	 * The ABI we present to seccomp tracers is that r3 contains
+	 * the syscall return value and orig_gpr3 contains the first
+	 * syscall parameter. This is different to the ptrace ABI where
+	 * both r3 and orig_gpr3 contain the first syscall parameter.
+	 */
+	regs->gpr[3] = -ENOSYS;
+
+	/*
+	 * We use the __ version here because we have already checked
+	 * TIF_SECCOMP. If this fails, there is nothing left to do, we
+	 * have already loaded -ENOSYS into r3, or seccomp has put
+	 * something else in r3 (via SECCOMP_RET_ERRNO/TRACE).
+	 */
+	if (__secure_computing(NULL))
+		return -1;
+
+	/*
+	 * The syscall was allowed by seccomp, restore the register
+	 * state to what audit expects.
+	 * Note that we use orig_gpr3, which means a seccomp tracer can
+	 * modify the first syscall parameter (in orig_gpr3) and also
+	 * allow the syscall to proceed.
+	 */
+	regs->gpr[3] = regs->orig_gpr3;
+
+	return 0;
+}
+#else
+static inline int do_seccomp(struct pt_regs *regs) { return 0; }
+#endif /* CONFIG_SECCOMP */
+
+/**
+ * do_syscall_trace_enter() - Do syscall tracing on kernel entry.
+ * @regs: the pt_regs of the task to trace (current)
+ *
+ * Performs various types of tracing on syscall entry. This includes seccomp,
+ * ptrace, syscall tracepoints and audit.
+ *
+ * The pt_regs are potentially visible to userspace via ptrace, so their
+ * contents is ABI.
+ *
+ * One or more of the tracers may modify the contents of pt_regs, in particular
+ * to modify arguments or even the syscall number itself.
+ *
+ * It's also possible that a tracer can choose to reject the system call. In
+ * that case this function will return an illegal syscall number, and will put
+ * an appropriate return value in regs->r3.
+ *
+ * Return: the (possibly changed) syscall number.
+ */
+long do_syscall_trace_enter(struct pt_regs *regs)
+{
+	u32 flags;
+
+	flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
+
+	if (flags) {
+		int rc = ptrace_report_syscall_entry(regs);
+
+		if (unlikely(flags & _TIF_SYSCALL_EMU)) {
+			/*
+			 * A nonzero return code from
+			 * ptrace_report_syscall_entry() tells us to prevent
+			 * the syscall execution, but we are not going to
+			 * execute it anyway.
+			 *
+			 * Returning -1 will skip the syscall execution. We want
+			 * to avoid clobbering any registers, so we don't goto
+			 * the skip label below.
+			 */
+			return -1;
+		}
+
+		if (rc) {
+			/*
+			 * The tracer decided to abort the syscall. Note that
+			 * the tracer may also just change regs->gpr[0] to an
+			 * invalid syscall number, that is handled below on the
+			 * exit path.
+			 */
+			goto skip;
+		}
+	}
+
+	/* Run seccomp after ptrace; allow it to set gpr[3]. */
+	if (do_seccomp(regs))
+		return -1;
+
+	/* Avoid trace and audit when syscall is invalid. */
+	if (regs->gpr[0] >= NR_syscalls)
+		goto skip;
+
+	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+		trace_sys_enter(regs, regs->gpr[0]);
+
+	if (!is_32bit_task())
+		audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
+				    regs->gpr[5], regs->gpr[6]);
+	else
+		audit_syscall_entry(regs->gpr[0],
+				    regs->gpr[3] & 0xffffffff,
+				    regs->gpr[4] & 0xffffffff,
+				    regs->gpr[5] & 0xffffffff,
+				    regs->gpr[6] & 0xffffffff);
+
+	/* Return the possibly modified but valid syscall number */
+	return regs->gpr[0];
+
+skip:
+	/*
+	 * If we are aborting explicitly, or if the syscall number is
+	 * now invalid, set the return value to -ENOSYS.
+	 */
+	regs->gpr[3] = -ENOSYS;
+	return -1;
+}
+
+void do_syscall_trace_leave(struct pt_regs *regs)
+{
+	int step;
+
+	audit_syscall_exit(regs);
+
+	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+		trace_sys_exit(regs, regs->result);
+
+	step = test_thread_flag(TIF_SINGLESTEP);
+	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
+		ptrace_report_syscall_exit(regs, step);
+}
+
+void __init pt_regs_check(void);
+
+/*
+ * Dummy function, its purpose is to break the build if struct pt_regs and
+ * struct user_pt_regs don't match.
+ */
+void __init pt_regs_check(void)
+{
+	BUILD_BUG_ON(offsetof(struct pt_regs, gpr) !=
+		     offsetof(struct user_pt_regs, gpr));
+	BUILD_BUG_ON(offsetof(struct pt_regs, nip) !=
+		     offsetof(struct user_pt_regs, nip));
+	BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
+		     offsetof(struct user_pt_regs, msr));
+	BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+		     offsetof(struct user_pt_regs, orig_gpr3));
+	BUILD_BUG_ON(offsetof(struct pt_regs, ctr) !=
+		     offsetof(struct user_pt_regs, ctr));
+	BUILD_BUG_ON(offsetof(struct pt_regs, link) !=
+		     offsetof(struct user_pt_regs, link));
+	BUILD_BUG_ON(offsetof(struct pt_regs, xer) !=
+		     offsetof(struct user_pt_regs, xer));
+	BUILD_BUG_ON(offsetof(struct pt_regs, ccr) !=
+		     offsetof(struct user_pt_regs, ccr));
+#ifdef __powerpc64__
+	BUILD_BUG_ON(offsetof(struct pt_regs, softe) !=
+		     offsetof(struct user_pt_regs, softe));
+#else
+	BUILD_BUG_ON(offsetof(struct pt_regs, mq) !=
+		     offsetof(struct user_pt_regs, mq));
+#endif
+	BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
+		     offsetof(struct user_pt_regs, trap));
+	BUILD_BUG_ON(offsetof(struct pt_regs, dar) !=
+		     offsetof(struct user_pt_regs, dar));
+	BUILD_BUG_ON(offsetof(struct pt_regs, dear) !=
+		     offsetof(struct user_pt_regs, dar));
+	BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) !=
+		     offsetof(struct user_pt_regs, dsisr));
+	BUILD_BUG_ON(offsetof(struct pt_regs, esr) !=
+		     offsetof(struct user_pt_regs, dsisr));
+	BUILD_BUG_ON(offsetof(struct pt_regs, result) !=
+		     offsetof(struct user_pt_regs, result));
+
+	BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
+
+	// Now check that the pt_regs offsets match the uapi #defines
+	#define CHECK_REG(_pt, _reg) \
+		BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \
+				     sizeof(unsigned long)));
+
+	CHECK_REG(PT_R0,  gpr[0]);
+	CHECK_REG(PT_R1,  gpr[1]);
+	CHECK_REG(PT_R2,  gpr[2]);
+	CHECK_REG(PT_R3,  gpr[3]);
+	CHECK_REG(PT_R4,  gpr[4]);
+	CHECK_REG(PT_R5,  gpr[5]);
+	CHECK_REG(PT_R6,  gpr[6]);
+	CHECK_REG(PT_R7,  gpr[7]);
+	CHECK_REG(PT_R8,  gpr[8]);
+	CHECK_REG(PT_R9,  gpr[9]);
+	CHECK_REG(PT_R10, gpr[10]);
+	CHECK_REG(PT_R11, gpr[11]);
+	CHECK_REG(PT_R12, gpr[12]);
+	CHECK_REG(PT_R13, gpr[13]);
+	CHECK_REG(PT_R14, gpr[14]);
+	CHECK_REG(PT_R15, gpr[15]);
+	CHECK_REG(PT_R16, gpr[16]);
+	CHECK_REG(PT_R17, gpr[17]);
+	CHECK_REG(PT_R18, gpr[18]);
+	CHECK_REG(PT_R19, gpr[19]);
+	CHECK_REG(PT_R20, gpr[20]);
+	CHECK_REG(PT_R21, gpr[21]);
+	CHECK_REG(PT_R22, gpr[22]);
+	CHECK_REG(PT_R23, gpr[23]);
+	CHECK_REG(PT_R24, gpr[24]);
+	CHECK_REG(PT_R25, gpr[25]);
+	CHECK_REG(PT_R26, gpr[26]);
+	CHECK_REG(PT_R27, gpr[27]);
+	CHECK_REG(PT_R28, gpr[28]);
+	CHECK_REG(PT_R29, gpr[29]);
+	CHECK_REG(PT_R30, gpr[30]);
+	CHECK_REG(PT_R31, gpr[31]);
+	CHECK_REG(PT_NIP, nip);
+	CHECK_REG(PT_MSR, msr);
+	CHECK_REG(PT_ORIG_R3, orig_gpr3);
+	CHECK_REG(PT_CTR, ctr);
+	CHECK_REG(PT_LNK, link);
+	CHECK_REG(PT_XER, xer);
+	CHECK_REG(PT_CCR, ccr);
+#ifdef CONFIG_PPC64
+	CHECK_REG(PT_SOFTE, softe);
+#else
+	CHECK_REG(PT_MQ, mq);
+#endif
+	CHECK_REG(PT_TRAP, trap);
+	CHECK_REG(PT_DAR, dar);
+	CHECK_REG(PT_DSISR, dsisr);
+	CHECK_REG(PT_RESULT, result);
+	#undef CHECK_REG
+
+	BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long));
+
+	/*
+	 * PT_DSCR isn't a real reg, but it's important that it doesn't overlap the
+	 * real registers.
+	 */
+	BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long));
+
+	// ptrace_get/put_fpr() rely on PPC32 and VSX being incompatible
+	BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_VSX));
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c
new file mode 100644
index 0000000000..19c2248089
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace32.c
@@ -0,0 +1,305 @@
+/*
+ * ptrace for 32-bit processes running on a 64-bit kernel.
+ *
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Derived from "arch/m68k/kernel/ptrace.c"
+ *  Copyright (C) 1994 by Hamish Macdonald
+ *  Taken from linux/kernel/ptrace.c and modified for M680x0.
+ *  linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
+ *
+ * Modified by Cort Dougan (cort@hq.fsmlabs.com)
+ * and Paul Mackerras (paulus@samba.org).
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License.  See the file COPYING in the main directory of
+ * this archive for more details.
+ */
+
+#include <linux/ptrace.h>
+#include <linux/regset.h>
+#include <linux/compat.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+/* Macros to workout the correct index for the FPR in the thread struct */
+#define FPRNUMBER(i) (((i) - PT_FPR0) >> 1)
+#define FPRHALF(i) (((i) - PT_FPR0) & 1)
+#define FPRINDEX(i) TS_FPRWIDTH * FPRNUMBER(i) * 2 + FPRHALF(i)
+
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+			compat_ulong_t caddr, compat_ulong_t cdata)
+{
+	unsigned long addr = caddr;
+	unsigned long data = cdata;
+	int ret;
+
+	switch (request) {
+	/*
+	 * Read 4 bytes of the other process' storage
+	 *  data is a pointer specifying where the user wants the
+	 *	4 bytes copied into
+	 *  addr is a pointer in the user's storage that contains an 8 byte
+	 *	address in the other process of the 4 bytes that is to be read
+	 * (this is run in a 32-bit process looking at a 64-bit process)
+	 * when I and D space are separate, these will need to be fixed.
+	 */
+	case PPC_PTRACE_PEEKTEXT_3264:
+	case PPC_PTRACE_PEEKDATA_3264: {
+		u32 tmp;
+		int copied;
+		u32 __user * addrOthers;
+
+		ret = -EIO;
+
+		/* Get the addr in the other process that we want to read */
+		if (get_user(addrOthers, (u32 __user * __user *)addr) != 0)
+			break;
+
+		copied = ptrace_access_vm(child, (u64)addrOthers, &tmp,
+				sizeof(tmp), FOLL_FORCE);
+		if (copied != sizeof(tmp))
+			break;
+		ret = put_user(tmp, (u32 __user *)data);
+		break;
+	}
+
+	/* Read a register (specified by ADDR) out of the "user area" */
+	case PTRACE_PEEKUSR: {
+		int index;
+		unsigned long tmp;
+
+		ret = -EIO;
+		/* convert to index and check */
+		index = (unsigned long) addr >> 2;
+		if ((addr & 3) || (index > PT_FPSCR32))
+			break;
+
+		if (index < PT_FPR0) {
+			ret = ptrace_get_reg(child, index, &tmp);
+			if (ret)
+				break;
+		} else {
+			flush_fp_to_thread(child);
+			/*
+			 * the user space code considers the floating point
+			 * to be an array of unsigned int (32 bits) - the
+			 * index passed in is based on this assumption.
+			 */
+			tmp = ((unsigned int *)child->thread.fp_state.fpr)
+				[FPRINDEX(index)];
+		}
+		ret = put_user((unsigned int)tmp, (u32 __user *)data);
+		break;
+	}
+  
+	/*
+	 * Read 4 bytes out of the other process' pt_regs area
+	 *  data is a pointer specifying where the user wants the
+	 *	4 bytes copied into
+	 *  addr is the offset into the other process' pt_regs structure
+	 *	that is to be read
+	 * (this is run in a 32-bit process looking at a 64-bit process)
+	 */
+	case PPC_PTRACE_PEEKUSR_3264: {
+		u32 index;
+		u32 reg32bits;
+		u64 tmp;
+		u32 numReg;
+		u32 part;
+
+		ret = -EIO;
+		/* Determine which register the user wants */
+		index = (u64)addr >> 2;
+		numReg = index / 2;
+		/* Determine which part of the register the user wants */
+		if (index % 2)
+			part = 1;  /* want the 2nd half of the register (right-most). */
+		else
+			part = 0;  /* want the 1st half of the register (left-most). */
+
+		/* Validate the input - check to see if address is on the wrong boundary
+		 * or beyond the end of the user area
+		 */
+		if ((addr & 3) || numReg > PT_FPSCR)
+			break;
+
+		if (numReg >= PT_FPR0) {
+			flush_fp_to_thread(child);
+			/* get 64 bit FPR */
+			tmp = child->thread.fp_state.fpr[numReg - PT_FPR0][0];
+		} else { /* register within PT_REGS struct */
+			unsigned long tmp2;
+			ret = ptrace_get_reg(child, numReg, &tmp2);
+			if (ret)
+				break;
+			tmp = tmp2;
+		} 
+		reg32bits = ((u32*)&tmp)[part];
+		ret = put_user(reg32bits, (u32 __user *)data);
+		break;
+	}
+
+	/*
+	 * Write 4 bytes into the other process' storage
+	 *  data is the 4 bytes that the user wants written
+	 *  addr is a pointer in the user's storage that contains an
+	 *	8 byte address in the other process where the 4 bytes
+	 *	that is to be written
+	 * (this is run in a 32-bit process looking at a 64-bit process)
+	 * when I and D space are separate, these will need to be fixed.
+	 */
+	case PPC_PTRACE_POKETEXT_3264:
+	case PPC_PTRACE_POKEDATA_3264: {
+		u32 tmp = data;
+		u32 __user * addrOthers;
+
+		/* Get the addr in the other process that we want to write into */
+		ret = -EIO;
+		if (get_user(addrOthers, (u32 __user * __user *)addr) != 0)
+			break;
+		ret = 0;
+		if (ptrace_access_vm(child, (u64)addrOthers, &tmp,
+					sizeof(tmp),
+					FOLL_FORCE | FOLL_WRITE) == sizeof(tmp))
+			break;
+		ret = -EIO;
+		break;
+	}
+
+	/* write the word at location addr in the USER area */
+	case PTRACE_POKEUSR: {
+		unsigned long index;
+
+		ret = -EIO;
+		/* convert to index and check */
+		index = (unsigned long) addr >> 2;
+		if ((addr & 3) || (index > PT_FPSCR32))
+			break;
+
+		if (index < PT_FPR0) {
+			ret = ptrace_put_reg(child, index, data);
+		} else {
+			flush_fp_to_thread(child);
+			/*
+			 * the user space code considers the floating point
+			 * to be an array of unsigned int (32 bits) - the
+			 * index passed in is based on this assumption.
+			 */
+			((unsigned int *)child->thread.fp_state.fpr)
+				[FPRINDEX(index)] = data;
+			ret = 0;
+		}
+		break;
+	}
+
+	/*
+	 * Write 4 bytes into the other process' pt_regs area
+	 *  data is the 4 bytes that the user wants written
+	 *  addr is the offset into the other process' pt_regs structure
+	 *	that is to be written into
+	 * (this is run in a 32-bit process looking at a 64-bit process)
+	 */
+	case PPC_PTRACE_POKEUSR_3264: {
+		u32 index;
+		u32 numReg;
+
+		ret = -EIO;
+		/* Determine which register the user wants */
+		index = (u64)addr >> 2;
+		numReg = index / 2;
+
+		/*
+		 * Validate the input - check to see if address is on the
+		 * wrong boundary or beyond the end of the user area
+		 */
+		if ((addr & 3) || (numReg > PT_FPSCR))
+			break;
+		if (numReg < PT_FPR0) {
+			unsigned long freg;
+			ret = ptrace_get_reg(child, numReg, &freg);
+			if (ret)
+				break;
+			if (index % 2)
+				freg = (freg & ~0xfffffffful) | (data & 0xfffffffful);
+			else
+				freg = (freg & 0xfffffffful) | (data << 32);
+			ret = ptrace_put_reg(child, numReg, freg);
+		} else {
+			u64 *tmp;
+			flush_fp_to_thread(child);
+			/* get 64 bit FPR ... */
+			tmp = &child->thread.fp_state.fpr[numReg - PT_FPR0][0];
+			/* ... write the 32 bit part we want */
+			((u32 *)tmp)[index % 2] = data;
+			ret = 0;
+		}
+		break;
+	}
+
+	case PTRACE_GET_DEBUGREG: {
+#ifndef CONFIG_PPC_ADV_DEBUG_REGS
+		unsigned long dabr_fake;
+#endif
+		ret = -EINVAL;
+		/* We only support one DABR and no IABRS at the moment */
+		if (addr > 0)
+			break;
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+		ret = put_user(child->thread.debug.dac1, (u32 __user *)data);
+#else
+		dabr_fake = (
+			(child->thread.hw_brk[0].address & (~HW_BRK_TYPE_DABR)) |
+			(child->thread.hw_brk[0].type & HW_BRK_TYPE_DABR));
+		ret = put_user(dabr_fake, (u32 __user *)data);
+#endif
+		break;
+	}
+
+	case PTRACE_GETREGS:	/* Get all pt_regs from the child. */
+		return copy_regset_to_user(
+			child, task_user_regset_view(current), 0,
+			0, PT_REGS_COUNT * sizeof(compat_long_t),
+			compat_ptr(data));
+
+	case PTRACE_SETREGS:	/* Set all gp regs in the child. */
+		return copy_regset_from_user(
+			child, task_user_regset_view(current), 0,
+			0, PT_REGS_COUNT * sizeof(compat_long_t),
+			compat_ptr(data));
+
+	case PTRACE_GETFPREGS:
+	case PTRACE_SETFPREGS:
+	case PTRACE_GETVRREGS:
+	case PTRACE_SETVRREGS:
+	case PTRACE_GETVSRREGS:
+	case PTRACE_SETVSRREGS:
+	case PTRACE_GETREGS64:
+	case PTRACE_SETREGS64:
+	case PTRACE_KILL:
+	case PTRACE_SINGLESTEP:
+	case PTRACE_DETACH:
+	case PTRACE_SET_DEBUGREG:
+	case PTRACE_SYSCALL:
+	case PTRACE_CONT:
+	case PPC_PTRACE_GETHWDBGINFO:
+	case PPC_PTRACE_SETHWDEBUG:
+	case PPC_PTRACE_DELHWDEBUG:
+		ret = arch_ptrace(child, request, addr, data);
+		break;
+
+	default:
+		ret = compat_ptrace_request(child, request, addr, data);
+		break;
+	}
+
+	return ret;
+}
diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S
new file mode 100644
index 0000000000..0508c14b4c
--- /dev/null
+++ b/arch/powerpc/kernel/reloc_32.S
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Code to process dynamic relocations for PPC32.
+ *
+ * Copyrights (C) IBM Corporation, 2011.
+ *	Author: Suzuki Poulose <suzuki@in.ibm.com>
+ *
+ *  - Based on ppc64 code - reloc_64.S
+ */
+
+#include <asm/ppc_asm.h>
+
+/* Dynamic section table entry tags */
+DT_RELA = 7			/* Tag for Elf32_Rela section */
+DT_RELASZ = 8			/* Size of the Rela relocs */
+DT_RELAENT = 9			/* Size of one Rela reloc entry */
+
+STN_UNDEF = 0			/* Undefined symbol index */
+STB_LOCAL = 0			/* Local binding for the symbol */
+
+R_PPC_ADDR16_LO = 4		/* Lower half of (S+A) */
+R_PPC_ADDR16_HI = 5		/* Upper half of (S+A) */
+R_PPC_ADDR16_HA = 6		/* High Adjusted (S+A) */
+R_PPC_RELATIVE = 22
+
+/*
+ * r3 = desired final address
+ */
+
+_GLOBAL(relocate)
+
+	mflr	r0		/* Save our LR */
+	bcl	20,31,$+4	/* Find our current runtime address */
+0:	mflr	r12		/* Make it accessible */
+	mtlr	r0
+
+	lwz	r11, (p_dyn - 0b)(r12)
+	add	r11, r11, r12	/* runtime address of .dynamic section */
+	lwz	r9, (p_rela - 0b)(r12)
+	add	r9, r9, r12	/* runtime address of .rela.dyn section */
+	lwz	r10, (p_st - 0b)(r12)
+	add	r10, r10, r12	/* runtime address of _stext section */
+	lwz	r13, (p_sym - 0b)(r12)
+	add	r13, r13, r12	/* runtime address of .dynsym section */
+
+	/*
+	 * Scan the dynamic section for RELA, RELASZ entries
+	 */
+	li	r6, 0
+	li	r7, 0
+	li	r8, 0
+1:	lwz	r5, 0(r11)	/* ELF_Dyn.d_tag */
+	cmpwi	r5, 0		/* End of ELF_Dyn[] */
+	beq	eodyn
+	cmpwi	r5, DT_RELA
+	bne	relasz
+	lwz	r7, 4(r11)	/* r7 = rela.link */
+	b	skip
+relasz:
+	cmpwi	r5, DT_RELASZ
+	bne	relaent
+	lwz	r8, 4(r11)	/* r8 = Total Rela relocs size */
+	b	skip
+relaent:
+	cmpwi	r5, DT_RELAENT
+	bne	skip
+	lwz	r6, 4(r11)	/* r6 = Size of one Rela reloc */
+skip:
+	addi	r11, r11, 8
+	b	1b
+eodyn:				/* End of Dyn Table scan */
+
+	/* Check if we have found all the entries */
+	cmpwi	r7, 0
+	beq	done
+	cmpwi	r8, 0
+	beq	done
+	cmpwi	r6, 0
+	beq	done
+
+
+	/*
+	 * Work out the current offset from the link time address of .rela
+	 * section.
+	 *  cur_offset[r7] = rela.run[r9] - rela.link [r7]
+	 *  _stext.link[r12] = _stext.run[r10] - cur_offset[r7]
+	 *  final_offset[r3] = _stext.final[r3] - _stext.link[r12]
+	 */
+	subf	r7, r7, r9	/* cur_offset */
+	subf	r12, r7, r10
+	subf	r3, r12, r3	/* final_offset */
+
+	subf	r8, r6, r8	/* relaz -= relaent */
+	/*
+	 * Scan through the .rela table and process each entry
+	 * r9	- points to the current .rela table entry
+	 * r13	- points to the symbol table
+	 */
+
+	/*
+	 * Check if we have a relocation based on symbol
+	 * r5 will hold the value of the symbol.
+	 */
+applyrela:
+	lwz	r4, 4(r9)		/* r4 = rela.r_info */
+	srwi	r5, r4, 8		/* ELF32_R_SYM(r_info) */
+	cmpwi	r5, STN_UNDEF	/* sym == STN_UNDEF ? */
+	beq	get_type	/* value = 0 */
+	/* Find the value of the symbol at index(r5) */
+	slwi	r5, r5, 4		/* r5 = r5 * sizeof(Elf32_Sym) */
+	add	r12, r13, r5	/* r12 = &__dyn_sym[Index] */
+
+	/*
+	 * GNU ld has a bug, where dynamic relocs based on
+	 * STB_LOCAL symbols, the value should be assumed
+	 * to be zero. - Alan Modra
+	 */
+	/* XXX: Do we need to check if we are using GNU ld ? */
+	lbz	r5, 12(r12)	/* r5 = dyn_sym[Index].st_info */
+	extrwi	r5, r5, 4, 24	/* r5 = ELF32_ST_BIND(r5) */
+	cmpwi	r5, STB_LOCAL	/* st_value = 0, ld bug */
+	beq	get_type	/* We have r5 = 0 */
+	lwz	r5, 4(r12)	/* r5 = __dyn_sym[Index].st_value */
+
+get_type:
+	/* Load the relocation type to r4 */
+	extrwi	r4, r4, 8, 24	/* r4 = ELF32_R_TYPE(r_info) = ((char*)r4)[3] */
+
+	/* R_PPC_RELATIVE */
+	cmpwi	r4, R_PPC_RELATIVE
+	bne	hi16
+	lwz	r4, 0(r9)	/* r_offset */
+	lwz	r0, 8(r9)	/* r_addend */
+	add	r0, r0, r3	/* final addend */
+	stwx	r0, r4, r7	/* memory[r4+r7]) = (u32)r0 */
+	b	nxtrela		/* continue */
+
+	/* R_PPC_ADDR16_HI */
+hi16:
+	cmpwi	r4, R_PPC_ADDR16_HI
+	bne	ha16
+	lwz	r4, 0(r9)	/* r_offset */
+	lwz	r0, 8(r9)	/* r_addend */
+	add	r0, r0, r3
+	add	r0, r0, r5	/* r0 = (S+A+Offset) */
+	extrwi	r0, r0, 16, 0	/* r0 = (r0 >> 16) */
+	b	store_half
+
+	/* R_PPC_ADDR16_HA */
+ha16:
+	cmpwi	r4, R_PPC_ADDR16_HA
+	bne	lo16
+	lwz	r4, 0(r9)	/* r_offset */
+	lwz	r0, 8(r9)	/* r_addend */
+	add	r0, r0, r3
+	add	r0, r0, r5	/* r0 = (S+A+Offset) */
+	extrwi	r5, r0, 1, 16	/* Extract bit 16 */
+	extrwi	r0, r0, 16, 0	/* r0 = (r0 >> 16) */
+	add	r0, r0, r5	/* Add it to r0 */
+	b	store_half
+
+	/* R_PPC_ADDR16_LO */
+lo16:
+	cmpwi	r4, R_PPC_ADDR16_LO
+	bne	unknown_type
+	lwz	r4, 0(r9)	/* r_offset */
+	lwz	r0, 8(r9)	/* r_addend */
+	add	r0, r0, r3
+	add	r0, r0, r5	/* r0 = (S+A+Offset) */
+	extrwi	r0, r0, 16, 16	/* r0 &= 0xffff */
+	/* Fall through to */
+
+	/* Store half word */
+store_half:
+	sthx	r0, r4, r7	/* memory[r4+r7] = (u16)r0 */
+
+nxtrela:
+	/*
+	 * We have to flush the modified instructions to the
+	 * main storage from the d-cache. And also, invalidate the
+	 * cached instructions in i-cache which has been modified.
+	 *
+	 * We delay the sync / isync operation till the end, since
+	 * we won't be executing the modified instructions until
+	 * we return from here.
+	 */
+	dcbst	r4,r7
+	sync			/* Ensure the data is flushed before icbi */
+	icbi	r4,r7
+unknown_type:
+	cmpwi	r8, 0		/* relasz = 0 ? */
+	ble	done
+	add	r9, r9, r6	/* move to next entry in the .rela table */
+	subf	r8, r6, r8	/* relasz -= relaent */
+	b	applyrela
+
+done:
+	sync			/* Wait for the flush to finish */
+	isync			/* Discard prefetched instructions */
+	blr
+
+p_dyn:		.long	__dynamic_start - 0b
+p_rela:		.long	__rela_dyn_start - 0b
+p_sym:		.long	__dynamic_symtab - 0b
+p_st:		.long	_stext - 0b
diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S
new file mode 100644
index 0000000000..efd52f2e70
--- /dev/null
+++ b/arch/powerpc/kernel/reloc_64.S
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Code to process dynamic relocations in the kernel.
+ *
+ * Copyright 2008 Paul Mackerras, IBM Corp.
+ */
+
+#include <asm/ppc_asm.h>
+
+RELA = 7
+RELASZ = 8
+RELAENT = 9
+R_PPC64_RELATIVE = 22
+R_PPC64_UADDR64 = 43
+
+/*
+ * r3 = desired final address of kernel
+ */
+_GLOBAL(relocate)
+	mflr	r0
+	bcl	20,31,$+4
+0:	mflr	r12		/* r12 has runtime addr of label 0 */
+	mtlr	r0
+	ld	r11,(p_dyn - 0b)(r12)
+	add	r11,r11,r12	/* r11 has runtime addr of .dynamic section */
+	ld	r9,(p_rela - 0b)(r12)
+	add	r9,r9,r12	/* r9 has runtime addr of .rela.dyn section */
+	ld	r10,(p_st - 0b)(r12)
+	add	r10,r10,r12	/* r10 has runtime addr of _stext */
+	ld	r4,(p_sym - 0b)(r12)
+	add	r4,r4,r12	/* r4 has runtime addr of .dynsym */
+
+	/*
+	 * Scan the dynamic section for the RELA, RELASZ and RELAENT entries.
+	 */
+	li	r7,0
+	li	r8,0
+.Ltags:
+	ld	r6,0(r11)	/* get tag */
+	cmpdi	r6,0
+	beq	.Lend_of_list		/* end of list */
+	cmpdi	r6,RELA
+	bne	2f
+	ld	r7,8(r11)	/* get RELA pointer in r7 */
+	b	4f
+2:	cmpdi	r6,RELASZ
+	bne	3f
+	ld	r8,8(r11)	/* get RELASZ value in r8 */
+	b	4f
+3:	cmpdi	r6,RELAENT
+	bne	4f
+	ld	r12,8(r11)	/* get RELAENT value in r12 */
+4:	addi	r11,r11,16
+	b	.Ltags
+.Lend_of_list:
+	cmpdi	r7,0		/* check we have RELA, RELASZ, RELAENT */
+	cmpdi	cr1,r8,0
+	beq	.Lout
+	beq	cr1,.Lout
+	cmpdi	r12,0
+	beq	.Lout
+
+	/*
+	 * Work out linktime address of _stext and hence the
+	 * relocation offset to be applied.
+	 * cur_offset [r7] = rela.run [r9] - rela.link [r7]
+	 * _stext.link [r10] = _stext.run [r10] - cur_offset [r7]
+	 * final_offset [r3] = _stext.final [r3] - _stext.link [r10]
+	 */
+	subf	r7,r7,r9	/* cur_offset */
+	subf	r10,r7,r10
+	subf	r3,r10,r3	/* final_offset */
+
+	/*
+	 * Run through the list of relocations and process the
+	 * R_PPC64_RELATIVE and R_PPC64_UADDR64 ones.
+	 */
+	divd	r8,r8,r12	/* RELASZ / RELAENT */
+	mtctr	r8
+.Lrels:	ld	r0,8(r9)		/* ELF64_R_TYPE(reloc->r_info) */
+	cmpdi	r0,R_PPC64_RELATIVE
+	bne	.Luaddr64
+	ld	r6,0(r9)	/* reloc->r_offset */
+	ld	r0,16(r9)	/* reloc->r_addend */
+	b	.Lstore
+.Luaddr64:
+	srdi	r5,r0,32	/* ELF64_R_SYM(reloc->r_info) */
+	clrldi	r0,r0,32
+	cmpdi	r0,R_PPC64_UADDR64
+	bne	.Lnext
+	ld	r6,0(r9)
+	ld	r0,16(r9)
+	mulli	r5,r5,24	/* 24 == sizeof(elf64_sym) */
+	add	r5,r5,r4	/* elf64_sym[ELF64_R_SYM] */
+	ld	r5,8(r5)
+	add	r0,r0,r5
+.Lstore:
+	add	r0,r0,r3
+	stdx	r0,r7,r6
+.Lnext:
+	add	r9,r9,r12
+	bdnz	.Lrels
+.Lout:
+	blr
+
+.balign 8
+p_dyn:	.8byte	__dynamic_start - 0b
+p_rela:	.8byte	__rela_dyn_start - 0b
+p_sym:		.8byte __dynamic_symtab - 0b
+p_st:	.8byte	_stext - 0b
+
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
new file mode 100644
index 0000000000..9454b8395b
--- /dev/null
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -0,0 +1,767 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *   Copyright (C) 2000 Tilmann Bitterberg
+ *   (tilmann@bitterberg.de)
+ *
+ *   RTAS (Runtime Abstraction Services) stuff
+ *   Intention is to provide a clean user interface
+ *   to use the RTAS.
+ *
+ *   TODO:
+ *   Split off a header file and maybe move it to a different
+ *   location. Write Documentation on what the /proc/rtas/ entries
+ *   actually do.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/ctype.h>
+#include <linux/time.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/bitops.h>
+#include <linux/rtc.h>
+#include <linux/of.h>
+
+#include <linux/uaccess.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/rtas.h>
+#include <asm/machdep.h> /* for ppc_md */
+#include <asm/time.h>
+
+/* Token for Sensors */
+#define KEY_SWITCH		0x0001
+#define ENCLOSURE_SWITCH	0x0002
+#define THERMAL_SENSOR		0x0003
+#define LID_STATUS		0x0004
+#define POWER_SOURCE		0x0005
+#define BATTERY_VOLTAGE		0x0006
+#define BATTERY_REMAINING	0x0007
+#define BATTERY_PERCENTAGE	0x0008
+#define EPOW_SENSOR		0x0009
+#define BATTERY_CYCLESTATE	0x000a
+#define BATTERY_CHARGING	0x000b
+
+/* IBM specific sensors */
+#define IBM_SURVEILLANCE	0x2328 /* 9000 */
+#define IBM_FANRPM		0x2329 /* 9001 */
+#define IBM_VOLTAGE		0x232a /* 9002 */
+#define IBM_DRCONNECTOR		0x232b /* 9003 */
+#define IBM_POWERSUPPLY		0x232c /* 9004 */
+
+/* Status return values */
+#define SENSOR_CRITICAL_HIGH	13
+#define SENSOR_WARNING_HIGH	12
+#define SENSOR_NORMAL		11
+#define SENSOR_WARNING_LOW	10
+#define SENSOR_CRITICAL_LOW	 9
+#define SENSOR_SUCCESS		 0
+#define SENSOR_HW_ERROR		-1
+#define SENSOR_BUSY		-2
+#define SENSOR_NOT_EXIST	-3
+#define SENSOR_DR_ENTITY	-9000
+
+/* Location Codes */
+#define LOC_SCSI_DEV_ADDR	'A'
+#define LOC_SCSI_DEV_LOC	'B'
+#define LOC_CPU			'C'
+#define LOC_DISKETTE		'D'
+#define LOC_ETHERNET		'E'
+#define LOC_FAN			'F'
+#define LOC_GRAPHICS		'G'
+/* reserved / not used		'H' */
+#define LOC_IO_ADAPTER		'I'
+/* reserved / not used		'J' */
+#define LOC_KEYBOARD		'K'
+#define LOC_LCD			'L'
+#define LOC_MEMORY		'M'
+#define LOC_NV_MEMORY		'N'
+#define LOC_MOUSE		'O'
+#define LOC_PLANAR		'P'
+#define LOC_OTHER_IO		'Q'
+#define LOC_PARALLEL		'R'
+#define LOC_SERIAL		'S'
+#define LOC_DEAD_RING		'T'
+#define LOC_RACKMOUNTED		'U' /* for _u_nit is rack mounted */
+#define LOC_VOLTAGE		'V'
+#define LOC_SWITCH_ADAPTER	'W'
+#define LOC_OTHER		'X'
+#define LOC_FIRMWARE		'Y'
+#define LOC_SCSI		'Z'
+
+/* Tokens for indicators */
+#define TONE_FREQUENCY		0x0001 /* 0 - 1000 (HZ)*/
+#define TONE_VOLUME		0x0002 /* 0 - 100 (%) */
+#define SYSTEM_POWER_STATE	0x0003 
+#define WARNING_LIGHT		0x0004
+#define DISK_ACTIVITY_LIGHT	0x0005
+#define HEX_DISPLAY_UNIT	0x0006
+#define BATTERY_WARNING_TIME	0x0007
+#define CONDITION_CYCLE_REQUEST	0x0008
+#define SURVEILLANCE_INDICATOR	0x2328 /* 9000 */
+#define DR_ACTION		0x2329 /* 9001 */
+#define DR_INDICATOR		0x232a /* 9002 */
+/* 9003 - 9004: Vendor specific */
+/* 9006 - 9999: Vendor specific */
+
+/* other */
+#define MAX_SENSORS		 17  /* I only know of 17 sensors */    
+#define MAX_LINELENGTH          256
+#define SENSOR_PREFIX		"ibm,sensor-"
+#define cel_to_fahr(x)		((x*9/5)+32)
+
+struct individual_sensor {
+	unsigned int token;
+	unsigned int quant;
+};
+
+struct rtas_sensors {
+        struct individual_sensor sensor[MAX_SENSORS];
+	unsigned int quant;
+};
+
+/* Globals */
+static struct rtas_sensors sensors;
+static struct device_node *rtas_node = NULL;
+static unsigned long power_on_time = 0; /* Save the time the user set */
+static char progress_led[MAX_LINELENGTH];
+
+static unsigned long rtas_tone_frequency = 1000;
+static unsigned long rtas_tone_volume = 0;
+
+/* ****************************************************************** */
+/* Declarations */
+static int ppc_rtas_sensors_show(struct seq_file *m, void *v);
+static int ppc_rtas_clock_show(struct seq_file *m, void *v);
+static ssize_t ppc_rtas_clock_write(struct file *file,
+		const char __user *buf, size_t count, loff_t *ppos);
+static int ppc_rtas_progress_show(struct seq_file *m, void *v);
+static ssize_t ppc_rtas_progress_write(struct file *file,
+		const char __user *buf, size_t count, loff_t *ppos);
+static int ppc_rtas_poweron_show(struct seq_file *m, void *v);
+static ssize_t ppc_rtas_poweron_write(struct file *file,
+		const char __user *buf, size_t count, loff_t *ppos);
+
+static ssize_t ppc_rtas_tone_freq_write(struct file *file,
+		const char __user *buf, size_t count, loff_t *ppos);
+static int ppc_rtas_tone_freq_show(struct seq_file *m, void *v);
+static ssize_t ppc_rtas_tone_volume_write(struct file *file,
+		const char __user *buf, size_t count, loff_t *ppos);
+static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v);
+static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v);
+
+static int poweron_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ppc_rtas_poweron_show, NULL);
+}
+
+static const struct proc_ops ppc_rtas_poweron_proc_ops = {
+	.proc_open	= poweron_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_write	= ppc_rtas_poweron_write,
+	.proc_release	= single_release,
+};
+
+static int progress_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ppc_rtas_progress_show, NULL);
+}
+
+static const struct proc_ops ppc_rtas_progress_proc_ops = {
+	.proc_open	= progress_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_write	= ppc_rtas_progress_write,
+	.proc_release	= single_release,
+};
+
+static int clock_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ppc_rtas_clock_show, NULL);
+}
+
+static const struct proc_ops ppc_rtas_clock_proc_ops = {
+	.proc_open	= clock_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_write	= ppc_rtas_clock_write,
+	.proc_release	= single_release,
+};
+
+static int tone_freq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ppc_rtas_tone_freq_show, NULL);
+}
+
+static const struct proc_ops ppc_rtas_tone_freq_proc_ops = {
+	.proc_open	= tone_freq_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_write	= ppc_rtas_tone_freq_write,
+	.proc_release	= single_release,
+};
+
+static int tone_volume_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ppc_rtas_tone_volume_show, NULL);
+}
+
+static const struct proc_ops ppc_rtas_tone_volume_proc_ops = {
+	.proc_open	= tone_volume_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_write	= ppc_rtas_tone_volume_write,
+	.proc_release	= single_release,
+};
+
+static int ppc_rtas_find_all_sensors(void);
+static void ppc_rtas_process_sensor(struct seq_file *m,
+	struct individual_sensor *s, int state, int error, const char *loc);
+static char *ppc_rtas_process_error(int error);
+static void get_location_code(struct seq_file *m,
+	struct individual_sensor *s, const char *loc);
+static void check_location_string(struct seq_file *m, const char *c);
+static void check_location(struct seq_file *m, const char *c);
+
+static int __init proc_rtas_init(void)
+{
+	if (!machine_is(pseries))
+		return -ENODEV;
+
+	rtas_node = of_find_node_by_name(NULL, "rtas");
+	if (rtas_node == NULL)
+		return -ENODEV;
+
+	proc_create("powerpc/rtas/progress", 0644, NULL,
+		    &ppc_rtas_progress_proc_ops);
+	proc_create("powerpc/rtas/clock", 0644, NULL,
+		    &ppc_rtas_clock_proc_ops);
+	proc_create("powerpc/rtas/poweron", 0644, NULL,
+		    &ppc_rtas_poweron_proc_ops);
+	proc_create_single("powerpc/rtas/sensors", 0444, NULL,
+			ppc_rtas_sensors_show);
+	proc_create("powerpc/rtas/frequency", 0644, NULL,
+		    &ppc_rtas_tone_freq_proc_ops);
+	proc_create("powerpc/rtas/volume", 0644, NULL,
+		    &ppc_rtas_tone_volume_proc_ops);
+	proc_create_single("powerpc/rtas/rmo_buffer", 0400, NULL,
+			ppc_rtas_rmo_buf_show);
+	return 0;
+}
+
+__initcall(proc_rtas_init);
+
+static int parse_number(const char __user *p, size_t count, u64 *val)
+{
+	char buf[40];
+
+	if (count > 39)
+		return -EINVAL;
+
+	if (copy_from_user(buf, p, count))
+		return -EFAULT;
+
+	buf[count] = 0;
+
+	return kstrtoull(buf, 10, val);
+}
+
+/* ****************************************************************** */
+/* POWER-ON-TIME                                                      */
+/* ****************************************************************** */
+static ssize_t ppc_rtas_poweron_write(struct file *file,
+		const char __user *buf, size_t count, loff_t *ppos)
+{
+	struct rtc_time tm;
+	time64_t nowtime;
+	int error = parse_number(buf, count, &nowtime);
+	if (error)
+		return error;
+
+	power_on_time = nowtime; /* save the time */
+
+	rtc_time64_to_tm(nowtime, &tm);
+
+	error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_FOR_POWER_ON), 7, 1, NULL,
+			  tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+			  tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */);
+	if (error)
+		printk(KERN_WARNING "error: setting poweron time returned: %s\n", 
+				ppc_rtas_process_error(error));
+	return count;
+}
+/* ****************************************************************** */
+static int ppc_rtas_poweron_show(struct seq_file *m, void *v)
+{
+	if (power_on_time == 0)
+		seq_printf(m, "Power on time not set\n");
+	else
+		seq_printf(m, "%lu\n",power_on_time);
+	return 0;
+}
+
+/* ****************************************************************** */
+/* PROGRESS                                                           */
+/* ****************************************************************** */
+static ssize_t ppc_rtas_progress_write(struct file *file,
+		const char __user *buf, size_t count, loff_t *ppos)
+{
+	unsigned long hex;
+
+	if (count >= MAX_LINELENGTH)
+		count = MAX_LINELENGTH -1;
+	if (copy_from_user(progress_led, buf, count)) { /* save the string */
+		return -EFAULT;
+	}
+	progress_led[count] = 0;
+
+	/* Lets see if the user passed hexdigits */
+	hex = simple_strtoul(progress_led, NULL, 10);
+
+	rtas_progress ((char *)progress_led, hex);
+	return count;
+
+	/* clear the line */
+	/* rtas_progress("                   ", 0xffff);*/
+}
+/* ****************************************************************** */
+static int ppc_rtas_progress_show(struct seq_file *m, void *v)
+{
+	if (progress_led[0])
+		seq_printf(m, "%s\n", progress_led);
+	return 0;
+}
+
+/* ****************************************************************** */
+/* CLOCK                                                              */
+/* ****************************************************************** */
+static ssize_t ppc_rtas_clock_write(struct file *file,
+		const char __user *buf, size_t count, loff_t *ppos)
+{
+	struct rtc_time tm;
+	time64_t nowtime;
+	int error = parse_number(buf, count, &nowtime);
+	if (error)
+		return error;
+
+	rtc_time64_to_tm(nowtime, &tm);
+	error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_OF_DAY), 7, 1, NULL,
+			  tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+			  tm.tm_hour, tm.tm_min, tm.tm_sec, 0);
+	if (error)
+		printk(KERN_WARNING "error: setting the clock returned: %s\n", 
+				ppc_rtas_process_error(error));
+	return count;
+}
+/* ****************************************************************** */
+static int ppc_rtas_clock_show(struct seq_file *m, void *v)
+{
+	int ret[8];
+	int error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret);
+
+	if (error) {
+		printk(KERN_WARNING "error: reading the clock returned: %s\n", 
+				ppc_rtas_process_error(error));
+		seq_printf(m, "0");
+	} else { 
+		unsigned int year, mon, day, hour, min, sec;
+		year = ret[0]; mon  = ret[1]; day  = ret[2];
+		hour = ret[3]; min  = ret[4]; sec  = ret[5];
+		seq_printf(m, "%lld\n",
+				mktime64(year, mon, day, hour, min, sec));
+	}
+	return 0;
+}
+
+/* ****************************************************************** */
+/* SENSOR STUFF                                                       */
+/* ****************************************************************** */
+static int ppc_rtas_sensors_show(struct seq_file *m, void *v)
+{
+	int i,j;
+	int state, error;
+	int get_sensor_state = rtas_function_token(RTAS_FN_GET_SENSOR_STATE);
+
+	seq_printf(m, "RTAS (RunTime Abstraction Services) Sensor Information\n");
+	seq_printf(m, "Sensor\t\tValue\t\tCondition\tLocation\n");
+	seq_printf(m, "********************************************************\n");
+
+	if (ppc_rtas_find_all_sensors() != 0) {
+		seq_printf(m, "\nNo sensors are available\n");
+		return 0;
+	}
+
+	for (i=0; i<sensors.quant; i++) {
+		struct individual_sensor *p = &sensors.sensor[i];
+		char rstr[64];
+		const char *loc;
+		int llen, offs;
+
+		sprintf (rstr, SENSOR_PREFIX"%04d", p->token);
+		loc = of_get_property(rtas_node, rstr, &llen);
+
+		/* A sensor may have multiple instances */
+		for (j = 0, offs = 0; j <= p->quant; j++) {
+			error =	rtas_call(get_sensor_state, 2, 2, &state, 
+				  	  p->token, j);
+
+			ppc_rtas_process_sensor(m, p, state, error, loc);
+			seq_putc(m, '\n');
+			if (loc) {
+				offs += strlen(loc) + 1;
+				loc += strlen(loc) + 1;
+				if (offs >= llen)
+					loc = NULL;
+			}
+		}
+	}
+	return 0;
+}
+
+/* ****************************************************************** */
+
+static int ppc_rtas_find_all_sensors(void)
+{
+	const unsigned int *utmp;
+	int len, i;
+
+	utmp = of_get_property(rtas_node, "rtas-sensors", &len);
+	if (utmp == NULL) {
+		printk (KERN_ERR "error: could not get rtas-sensors\n");
+		return 1;
+	}
+
+	sensors.quant = len / 8;      /* int + int */
+
+	for (i=0; i<sensors.quant; i++) {
+		sensors.sensor[i].token = *utmp++;
+		sensors.sensor[i].quant = *utmp++;
+	}
+	return 0;
+}
+
+/* ****************************************************************** */
+/*
+ * Builds a string of what rtas returned
+ */
+static char *ppc_rtas_process_error(int error)
+{
+	switch (error) {
+		case SENSOR_CRITICAL_HIGH:
+			return "(critical high)";
+		case SENSOR_WARNING_HIGH:
+			return "(warning high)";
+		case SENSOR_NORMAL:
+			return "(normal)";
+		case SENSOR_WARNING_LOW:
+			return "(warning low)";
+		case SENSOR_CRITICAL_LOW:
+			return "(critical low)";
+		case SENSOR_SUCCESS:
+			return "(read ok)";
+		case SENSOR_HW_ERROR:
+			return "(hardware error)";
+		case SENSOR_BUSY:
+			return "(busy)";
+		case SENSOR_NOT_EXIST:
+			return "(non existent)";
+		case SENSOR_DR_ENTITY:
+			return "(dr entity removed)";
+		default:
+			return "(UNKNOWN)";
+	}
+}
+
+/* ****************************************************************** */
+/*
+ * Builds a string out of what the sensor said
+ */
+
+static void ppc_rtas_process_sensor(struct seq_file *m,
+	struct individual_sensor *s, int state, int error, const char *loc)
+{
+	/* Defined return vales */
+	const char * key_switch[]        = { "Off\t", "Normal\t", "Secure\t", 
+						"Maintenance" };
+	const char * enclosure_switch[]  = { "Closed", "Open" };
+	const char * lid_status[]        = { " ", "Open", "Closed" };
+	const char * power_source[]      = { "AC\t", "Battery", 
+		  				"AC & Battery" };
+	const char * battery_remaining[] = { "Very Low", "Low", "Mid", "High" };
+	const char * epow_sensor[]       = { 
+		"EPOW Reset", "Cooling warning", "Power warning",
+		"System shutdown", "System halt", "EPOW main enclosure",
+		"EPOW power off" };
+	const char * battery_cyclestate[]  = { "None", "In progress", 
+						"Requested" };
+	const char * battery_charging[]    = { "Charging", "Discharging",
+						"No current flow" };
+	const char * ibm_drconnector[]     = { "Empty", "Present", "Unusable", 
+						"Exchange" };
+
+	int have_strings = 0;
+	int num_states = 0;
+	int temperature = 0;
+	int unknown = 0;
+
+	/* What kind of sensor do we have here? */
+	
+	switch (s->token) {
+		case KEY_SWITCH:
+			seq_printf(m, "Key switch:\t");
+			num_states = sizeof(key_switch) / sizeof(char *);
+			if (state < num_states) {
+				seq_printf(m, "%s\t", key_switch[state]);
+				have_strings = 1;
+			}
+			break;
+		case ENCLOSURE_SWITCH:
+			seq_printf(m, "Enclosure switch:\t");
+			num_states = sizeof(enclosure_switch) / sizeof(char *);
+			if (state < num_states) {
+				seq_printf(m, "%s\t", 
+						enclosure_switch[state]);
+				have_strings = 1;
+			}
+			break;
+		case THERMAL_SENSOR:
+			seq_printf(m, "Temp. (C/F):\t");
+			temperature = 1;
+			break;
+		case LID_STATUS:
+			seq_printf(m, "Lid status:\t");
+			num_states = sizeof(lid_status) / sizeof(char *);
+			if (state < num_states) {
+				seq_printf(m, "%s\t", lid_status[state]);
+				have_strings = 1;
+			}
+			break;
+		case POWER_SOURCE:
+			seq_printf(m, "Power source:\t");
+			num_states = sizeof(power_source) / sizeof(char *);
+			if (state < num_states) {
+				seq_printf(m, "%s\t", 
+						power_source[state]);
+				have_strings = 1;
+			}
+			break;
+		case BATTERY_VOLTAGE:
+			seq_printf(m, "Battery voltage:\t");
+			break;
+		case BATTERY_REMAINING:
+			seq_printf(m, "Battery remaining:\t");
+			num_states = sizeof(battery_remaining) / sizeof(char *);
+			if (state < num_states)
+			{
+				seq_printf(m, "%s\t", 
+						battery_remaining[state]);
+				have_strings = 1;
+			}
+			break;
+		case BATTERY_PERCENTAGE:
+			seq_printf(m, "Battery percentage:\t");
+			break;
+		case EPOW_SENSOR:
+			seq_printf(m, "EPOW Sensor:\t");
+			num_states = sizeof(epow_sensor) / sizeof(char *);
+			if (state < num_states) {
+				seq_printf(m, "%s\t", epow_sensor[state]);
+				have_strings = 1;
+			}
+			break;
+		case BATTERY_CYCLESTATE:
+			seq_printf(m, "Battery cyclestate:\t");
+			num_states = sizeof(battery_cyclestate) / 
+				     	sizeof(char *);
+			if (state < num_states) {
+				seq_printf(m, "%s\t", 
+						battery_cyclestate[state]);
+				have_strings = 1;
+			}
+			break;
+		case BATTERY_CHARGING:
+			seq_printf(m, "Battery Charging:\t");
+			num_states = sizeof(battery_charging) / sizeof(char *);
+			if (state < num_states) {
+				seq_printf(m, "%s\t", 
+						battery_charging[state]);
+				have_strings = 1;
+			}
+			break;
+		case IBM_SURVEILLANCE:
+			seq_printf(m, "Surveillance:\t");
+			break;
+		case IBM_FANRPM:
+			seq_printf(m, "Fan (rpm):\t");
+			break;
+		case IBM_VOLTAGE:
+			seq_printf(m, "Voltage (mv):\t");
+			break;
+		case IBM_DRCONNECTOR:
+			seq_printf(m, "DR connector:\t");
+			num_states = sizeof(ibm_drconnector) / sizeof(char *);
+			if (state < num_states) {
+				seq_printf(m, "%s\t", 
+						ibm_drconnector[state]);
+				have_strings = 1;
+			}
+			break;
+		case IBM_POWERSUPPLY:
+			seq_printf(m, "Powersupply:\t");
+			break;
+		default:
+			seq_printf(m,  "Unknown sensor (type %d), ignoring it\n",
+					s->token);
+			unknown = 1;
+			have_strings = 1;
+			break;
+	}
+	if (have_strings == 0) {
+		if (temperature) {
+			seq_printf(m, "%4d /%4d\t", state, cel_to_fahr(state));
+		} else
+			seq_printf(m, "%10d\t", state);
+	}
+	if (unknown == 0) {
+		seq_printf(m, "%s\t", ppc_rtas_process_error(error));
+		get_location_code(m, s, loc);
+	}
+}
+
+/* ****************************************************************** */
+
+static void check_location(struct seq_file *m, const char *c)
+{
+	switch (c[0]) {
+		case LOC_PLANAR:
+			seq_printf(m, "Planar #%c", c[1]);
+			break;
+		case LOC_CPU:
+			seq_printf(m, "CPU #%c", c[1]);
+			break;
+		case LOC_FAN:
+			seq_printf(m, "Fan #%c", c[1]);
+			break;
+		case LOC_RACKMOUNTED:
+			seq_printf(m, "Rack #%c", c[1]);
+			break;
+		case LOC_VOLTAGE:
+			seq_printf(m, "Voltage #%c", c[1]);
+			break;
+		case LOC_LCD:
+			seq_printf(m, "LCD #%c", c[1]);
+			break;
+		case '.':
+			seq_printf(m, "- %c", c[1]);
+			break;
+		default:
+			seq_printf(m, "Unknown location");
+			break;
+	}
+}
+
+
+/* ****************************************************************** */
+/* 
+ * Format: 
+ * ${LETTER}${NUMBER}[[-/]${LETTER}${NUMBER} [ ... ] ]
+ * the '.' may be an abbreviation
+ */
+static void check_location_string(struct seq_file *m, const char *c)
+{
+	while (*c) {
+		if (isalpha(*c) || *c == '.')
+			check_location(m, c);
+		else if (*c == '/' || *c == '-')
+			seq_printf(m, " at ");
+		c++;
+	}
+}
+
+
+/* ****************************************************************** */
+
+static void get_location_code(struct seq_file *m, struct individual_sensor *s,
+		const char *loc)
+{
+	if (!loc || !*loc) {
+		seq_printf(m, "---");/* does not have a location */
+	} else {
+		check_location_string(m, loc);
+	}
+	seq_putc(m, ' ');
+}
+/* ****************************************************************** */
+/* INDICATORS - Tone Frequency                                        */
+/* ****************************************************************** */
+static ssize_t ppc_rtas_tone_freq_write(struct file *file,
+		const char __user *buf, size_t count, loff_t *ppos)
+{
+	u64 freq;
+	int error = parse_number(buf, count, &freq);
+	if (error)
+		return error;
+
+	rtas_tone_frequency = freq; /* save it for later */
+	error = rtas_call(rtas_function_token(RTAS_FN_SET_INDICATOR), 3, 1, NULL,
+			  TONE_FREQUENCY, 0, freq);
+	if (error)
+		printk(KERN_WARNING "error: setting tone frequency returned: %s\n", 
+				ppc_rtas_process_error(error));
+	return count;
+}
+/* ****************************************************************** */
+static int ppc_rtas_tone_freq_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "%lu\n", rtas_tone_frequency);
+	return 0;
+}
+/* ****************************************************************** */
+/* INDICATORS - Tone Volume                                           */
+/* ****************************************************************** */
+static ssize_t ppc_rtas_tone_volume_write(struct file *file,
+		const char __user *buf, size_t count, loff_t *ppos)
+{
+	u64 volume;
+	int error = parse_number(buf, count, &volume);
+	if (error)
+		return error;
+
+	if (volume > 100)
+		volume = 100;
+	
+        rtas_tone_volume = volume; /* save it for later */
+	error = rtas_call(rtas_function_token(RTAS_FN_SET_INDICATOR), 3, 1, NULL,
+			  TONE_VOLUME, 0, volume);
+	if (error)
+		printk(KERN_WARNING "error: setting tone volume returned: %s\n", 
+				ppc_rtas_process_error(error));
+	return count;
+}
+/* ****************************************************************** */
+static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "%lu\n", rtas_tone_volume);
+	return 0;
+}
+
+/**
+ * ppc_rtas_rmo_buf_show() - Describe RTAS-addressable region for user space.
+ *
+ * Base + size description of a range of RTAS-addressable memory set
+ * aside for user space to use as work area(s) for certain RTAS
+ * functions. User space accesses this region via /dev/mem. Apart from
+ * security policies, the kernel does not arbitrate or serialize
+ * access to this region, and user space must ensure that concurrent
+ * users do not interfere with each other.
+ */
+static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_USER_REGION_SIZE);
+	return 0;
+}
diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c
new file mode 100644
index 0000000000..6996214532
--- /dev/null
+++ b/arch/powerpc/kernel/rtas-rtc.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/init.h>
+#include <linux/rtc.h>
+#include <linux/delay.h>
+#include <linux/ratelimit.h>
+#include <asm/rtas.h>
+#include <asm/time.h>
+
+
+#define MAX_RTC_WAIT 5000	/* 5 sec */
+
+time64_t __init rtas_get_boot_time(void)
+{
+	int ret[8];
+	int error;
+	unsigned int wait_time;
+	u64 max_wait_tb;
+
+	max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
+	do {
+		error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret);
+
+		wait_time = rtas_busy_delay_time(error);
+		if (wait_time) {
+			/* This is boot time so we spin. */
+			udelay(wait_time*1000);
+		}
+	} while (wait_time && (get_tb() < max_wait_tb));
+
+	if (error != 0) {
+		printk_ratelimited(KERN_WARNING
+				   "error: reading the clock failed (%d)\n",
+				   error);
+		return 0;
+	}
+
+	return mktime64(ret[0], ret[1], ret[2], ret[3], ret[4], ret[5]);
+}
+
+/* NOTE: get_rtc_time will get an error if executed in interrupt context
+ * and if a delay is needed to read the clock.  In this case we just
+ * silently return without updating rtc_tm.
+ */
+void rtas_get_rtc_time(struct rtc_time *rtc_tm)
+{
+        int ret[8];
+	int error;
+	unsigned int wait_time;
+	u64 max_wait_tb;
+
+	max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
+	do {
+		error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret);
+
+		wait_time = rtas_busy_delay_time(error);
+		if (wait_time) {
+			if (in_interrupt()) {
+				memset(rtc_tm, 0, sizeof(struct rtc_time));
+				printk_ratelimited(KERN_WARNING
+						   "error: reading clock "
+						   "would delay interrupt\n");
+				return;	/* delay not allowed */
+			}
+			msleep(wait_time);
+		}
+	} while (wait_time && (get_tb() < max_wait_tb));
+
+	if (error != 0) {
+		printk_ratelimited(KERN_WARNING
+				   "error: reading the clock failed (%d)\n",
+				   error);
+		return;
+        }
+
+	rtc_tm->tm_sec = ret[5];
+	rtc_tm->tm_min = ret[4];
+	rtc_tm->tm_hour = ret[3];
+	rtc_tm->tm_mday = ret[2];
+	rtc_tm->tm_mon = ret[1] - 1;
+	rtc_tm->tm_year = ret[0] - 1900;
+}
+
+int rtas_set_rtc_time(struct rtc_time *tm)
+{
+	int error, wait_time;
+	u64 max_wait_tb;
+
+	max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
+	do {
+		error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_OF_DAY), 7, 1, NULL,
+				  tm->tm_year + 1900, tm->tm_mon + 1,
+				  tm->tm_mday, tm->tm_hour, tm->tm_min,
+				  tm->tm_sec, 0);
+
+		wait_time = rtas_busy_delay_time(error);
+		if (wait_time) {
+			if (in_interrupt())
+				return 1;	/* probably decrementer */
+			msleep(wait_time);
+		}
+	} while (wait_time && (get_tb() < max_wait_tb));
+
+	if (error != 0)
+		printk_ratelimited(KERN_WARNING
+				   "error: setting the clock failed (%d)\n",
+				   error);
+
+        return 0;
+}
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
new file mode 100644
index 0000000000..87d65bdd3e
--- /dev/null
+++ b/arch/powerpc/kernel/rtas.c
@@ -0,0 +1,2088 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *
+ * Procedures for interfacing to the RTAS on CHRP machines.
+ *
+ * Peter Bergner, IBM	March 2001.
+ * Copyright (C) 2001 IBM.
+ */
+
+#define pr_fmt(fmt)	"rtas: " fmt
+
+#include <linux/bsearch.h>
+#include <linux/capability.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/kconfig.h>
+#include <linux/kernel.h>
+#include <linux/lockdep.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/security.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stdarg.h>
+#include <linux/syscalls.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/xarray.h>
+
+#include <asm/delay.h>
+#include <asm/firmware.h>
+#include <asm/interrupt.h>
+#include <asm/machdep.h>
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/rtas-work-area.h>
+#include <asm/rtas.h>
+#include <asm/time.h>
+#include <asm/trace.h>
+#include <asm/udbg.h>
+
+struct rtas_filter {
+	/* Indexes into the args buffer, -1 if not used */
+	const int buf_idx1;
+	const int size_idx1;
+	const int buf_idx2;
+	const int size_idx2;
+	/*
+	 * Assumed buffer size per the spec if the function does not
+	 * have a size parameter, e.g. ibm,errinjct. 0 if unused.
+	 */
+	const int fixed_size;
+};
+
+/**
+ * struct rtas_function - Descriptor for RTAS functions.
+ *
+ * @token: Value of @name if it exists under the /rtas node.
+ * @name: Function name.
+ * @filter: If non-NULL, invoking this function via the rtas syscall is
+ *          generally allowed, and @filter describes constraints on the
+ *          arguments. See also @banned_for_syscall_on_le.
+ * @banned_for_syscall_on_le: Set when call via sys_rtas is generally allowed
+ *                            but specifically restricted on ppc64le. Such
+ *                            functions are believed to have no users on
+ *                            ppc64le, and we want to keep it that way. It does
+ *                            not make sense for this to be set when @filter
+ *                            is NULL.
+ */
+struct rtas_function {
+	s32 token;
+	const bool banned_for_syscall_on_le:1;
+	const char * const name;
+	const struct rtas_filter *filter;
+};
+
+static struct rtas_function rtas_function_table[] __ro_after_init = {
+	[RTAS_FNIDX__CHECK_EXCEPTION] = {
+		.name = "check-exception",
+	},
+	[RTAS_FNIDX__DISPLAY_CHARACTER] = {
+		.name = "display-character",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = -1, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__EVENT_SCAN] = {
+		.name = "event-scan",
+	},
+	[RTAS_FNIDX__FREEZE_TIME_BASE] = {
+		.name = "freeze-time-base",
+	},
+	[RTAS_FNIDX__GET_POWER_LEVEL] = {
+		.name = "get-power-level",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = -1, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__GET_SENSOR_STATE] = {
+		.name = "get-sensor-state",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = -1, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__GET_TERM_CHAR] = {
+		.name = "get-term-char",
+	},
+	[RTAS_FNIDX__GET_TIME_OF_DAY] = {
+		.name = "get-time-of-day",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = -1, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__IBM_ACTIVATE_FIRMWARE] = {
+		.name = "ibm,activate-firmware",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = -1, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__IBM_CBE_START_PTCAL] = {
+		.name = "ibm,cbe-start-ptcal",
+	},
+	[RTAS_FNIDX__IBM_CBE_STOP_PTCAL] = {
+		.name = "ibm,cbe-stop-ptcal",
+	},
+	[RTAS_FNIDX__IBM_CHANGE_MSI] = {
+		.name = "ibm,change-msi",
+	},
+	[RTAS_FNIDX__IBM_CLOSE_ERRINJCT] = {
+		.name = "ibm,close-errinjct",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = -1, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__IBM_CONFIGURE_BRIDGE] = {
+		.name = "ibm,configure-bridge",
+	},
+	[RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR] = {
+		.name = "ibm,configure-connector",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = 0, .size_idx1 = -1,
+			.buf_idx2 = 1, .size_idx2 = -1,
+			.fixed_size = 4096,
+		},
+	},
+	[RTAS_FNIDX__IBM_CONFIGURE_KERNEL_DUMP] = {
+		.name = "ibm,configure-kernel-dump",
+	},
+	[RTAS_FNIDX__IBM_CONFIGURE_PE] = {
+		.name = "ibm,configure-pe",
+	},
+	[RTAS_FNIDX__IBM_CREATE_PE_DMA_WINDOW] = {
+		.name = "ibm,create-pe-dma-window",
+	},
+	[RTAS_FNIDX__IBM_DISPLAY_MESSAGE] = {
+		.name = "ibm,display-message",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = 0, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__IBM_ERRINJCT] = {
+		.name = "ibm,errinjct",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = 2, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+			.fixed_size = 1024,
+		},
+	},
+	[RTAS_FNIDX__IBM_EXTI2C] = {
+		.name = "ibm,exti2c",
+	},
+	[RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO] = {
+		.name = "ibm,get-config-addr-info",
+	},
+	[RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO2] = {
+		.name = "ibm,get-config-addr-info2",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = -1, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__IBM_GET_DYNAMIC_SENSOR_STATE] = {
+		.name = "ibm,get-dynamic-sensor-state",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = 1, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__IBM_GET_INDICES] = {
+		.name = "ibm,get-indices",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = 2, .size_idx1 = 3,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY] = {
+		.name = "ibm,get-rio-topology",
+	},
+	[RTAS_FNIDX__IBM_GET_SYSTEM_PARAMETER] = {
+		.name = "ibm,get-system-parameter",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = 1, .size_idx1 = 2,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__IBM_GET_VPD] = {
+		.name = "ibm,get-vpd",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = 0, .size_idx1 = -1,
+			.buf_idx2 = 1, .size_idx2 = 2,
+		},
+	},
+	[RTAS_FNIDX__IBM_GET_XIVE] = {
+		.name = "ibm,get-xive",
+	},
+	[RTAS_FNIDX__IBM_INT_OFF] = {
+		.name = "ibm,int-off",
+	},
+	[RTAS_FNIDX__IBM_INT_ON] = {
+		.name = "ibm,int-on",
+	},
+	[RTAS_FNIDX__IBM_IO_QUIESCE_ACK] = {
+		.name = "ibm,io-quiesce-ack",
+	},
+	[RTAS_FNIDX__IBM_LPAR_PERFTOOLS] = {
+		.name = "ibm,lpar-perftools",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = 2, .size_idx1 = 3,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE] = {
+		.name = "ibm,manage-flash-image",
+	},
+	[RTAS_FNIDX__IBM_MANAGE_STORAGE_PRESERVATION] = {
+		.name = "ibm,manage-storage-preservation",
+	},
+	[RTAS_FNIDX__IBM_NMI_INTERLOCK] = {
+		.name = "ibm,nmi-interlock",
+	},
+	[RTAS_FNIDX__IBM_NMI_REGISTER] = {
+		.name = "ibm,nmi-register",
+	},
+	[RTAS_FNIDX__IBM_OPEN_ERRINJCT] = {
+		.name = "ibm,open-errinjct",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = -1, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__IBM_OPEN_SRIOV_ALLOW_UNFREEZE] = {
+		.name = "ibm,open-sriov-allow-unfreeze",
+	},
+	[RTAS_FNIDX__IBM_OPEN_SRIOV_MAP_PE_NUMBER] = {
+		.name = "ibm,open-sriov-map-pe-number",
+	},
+	[RTAS_FNIDX__IBM_OS_TERM] = {
+		.name = "ibm,os-term",
+	},
+	[RTAS_FNIDX__IBM_PARTNER_CONTROL] = {
+		.name = "ibm,partner-control",
+	},
+	[RTAS_FNIDX__IBM_PHYSICAL_ATTESTATION] = {
+		.name = "ibm,physical-attestation",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = 0, .size_idx1 = 1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__IBM_PLATFORM_DUMP] = {
+		.name = "ibm,platform-dump",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = 4, .size_idx1 = 5,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__IBM_POWER_OFF_UPS] = {
+		.name = "ibm,power-off-ups",
+	},
+	[RTAS_FNIDX__IBM_QUERY_INTERRUPT_SOURCE_NUMBER] = {
+		.name = "ibm,query-interrupt-source-number",
+	},
+	[RTAS_FNIDX__IBM_QUERY_PE_DMA_WINDOW] = {
+		.name = "ibm,query-pe-dma-window",
+	},
+	[RTAS_FNIDX__IBM_READ_PCI_CONFIG] = {
+		.name = "ibm,read-pci-config",
+	},
+	[RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE] = {
+		.name = "ibm,read-slot-reset-state",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = -1, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2] = {
+		.name = "ibm,read-slot-reset-state2",
+	},
+	[RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW] = {
+		.name = "ibm,remove-pe-dma-window",
+	},
+	[RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS] = {
+		.name = "ibm,reset-pe-dma-windows",
+	},
+	[RTAS_FNIDX__IBM_SCAN_LOG_DUMP] = {
+		.name = "ibm,scan-log-dump",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = 0, .size_idx1 = 1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR] = {
+		.name = "ibm,set-dynamic-indicator",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = 2, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__IBM_SET_EEH_OPTION] = {
+		.name = "ibm,set-eeh-option",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = -1, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__IBM_SET_SLOT_RESET] = {
+		.name = "ibm,set-slot-reset",
+	},
+	[RTAS_FNIDX__IBM_SET_SYSTEM_PARAMETER] = {
+		.name = "ibm,set-system-parameter",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = 1, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__IBM_SET_XIVE] = {
+		.name = "ibm,set-xive",
+	},
+	[RTAS_FNIDX__IBM_SLOT_ERROR_DETAIL] = {
+		.name = "ibm,slot-error-detail",
+	},
+	[RTAS_FNIDX__IBM_SUSPEND_ME] = {
+		.name = "ibm,suspend-me",
+		.banned_for_syscall_on_le = true,
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = -1, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__IBM_TUNE_DMA_PARMS] = {
+		.name = "ibm,tune-dma-parms",
+	},
+	[RTAS_FNIDX__IBM_UPDATE_FLASH_64_AND_REBOOT] = {
+		.name = "ibm,update-flash-64-and-reboot",
+	},
+	[RTAS_FNIDX__IBM_UPDATE_NODES] = {
+		.name = "ibm,update-nodes",
+		.banned_for_syscall_on_le = true,
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = 0, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+			.fixed_size = 4096,
+		},
+	},
+	[RTAS_FNIDX__IBM_UPDATE_PROPERTIES] = {
+		.name = "ibm,update-properties",
+		.banned_for_syscall_on_le = true,
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = 0, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+			.fixed_size = 4096,
+		},
+	},
+	[RTAS_FNIDX__IBM_VALIDATE_FLASH_IMAGE] = {
+		.name = "ibm,validate-flash-image",
+	},
+	[RTAS_FNIDX__IBM_WRITE_PCI_CONFIG] = {
+		.name = "ibm,write-pci-config",
+	},
+	[RTAS_FNIDX__NVRAM_FETCH] = {
+		.name = "nvram-fetch",
+	},
+	[RTAS_FNIDX__NVRAM_STORE] = {
+		.name = "nvram-store",
+	},
+	[RTAS_FNIDX__POWER_OFF] = {
+		.name = "power-off",
+	},
+	[RTAS_FNIDX__PUT_TERM_CHAR] = {
+		.name = "put-term-char",
+	},
+	[RTAS_FNIDX__QUERY_CPU_STOPPED_STATE] = {
+		.name = "query-cpu-stopped-state",
+	},
+	[RTAS_FNIDX__READ_PCI_CONFIG] = {
+		.name = "read-pci-config",
+	},
+	[RTAS_FNIDX__RTAS_LAST_ERROR] = {
+		.name = "rtas-last-error",
+	},
+	[RTAS_FNIDX__SET_INDICATOR] = {
+		.name = "set-indicator",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = -1, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__SET_POWER_LEVEL] = {
+		.name = "set-power-level",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = -1, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__SET_TIME_FOR_POWER_ON] = {
+		.name = "set-time-for-power-on",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = -1, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__SET_TIME_OF_DAY] = {
+		.name = "set-time-of-day",
+		.filter = &(const struct rtas_filter) {
+			.buf_idx1 = -1, .size_idx1 = -1,
+			.buf_idx2 = -1, .size_idx2 = -1,
+		},
+	},
+	[RTAS_FNIDX__START_CPU] = {
+		.name = "start-cpu",
+	},
+	[RTAS_FNIDX__STOP_SELF] = {
+		.name = "stop-self",
+	},
+	[RTAS_FNIDX__SYSTEM_REBOOT] = {
+		.name = "system-reboot",
+	},
+	[RTAS_FNIDX__THAW_TIME_BASE] = {
+		.name = "thaw-time-base",
+	},
+	[RTAS_FNIDX__WRITE_PCI_CONFIG] = {
+		.name = "write-pci-config",
+	},
+};
+
+/*
+ * Nearly all RTAS calls need to be serialized. All uses of the
+ * default rtas_args block must hold rtas_lock.
+ *
+ * Exceptions to the RTAS serialization requirement (e.g. stop-self)
+ * must use a separate rtas_args structure.
+ */
+static DEFINE_RAW_SPINLOCK(rtas_lock);
+static struct rtas_args rtas_args;
+
+/**
+ * rtas_function_token() - RTAS function token lookup.
+ * @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN.
+ *
+ * Context: Any context.
+ * Return: the token value for the function if implemented by this platform,
+ *         otherwise RTAS_UNKNOWN_SERVICE.
+ */
+s32 rtas_function_token(const rtas_fn_handle_t handle)
+{
+	const size_t index = handle.index;
+	const bool out_of_bounds = index >= ARRAY_SIZE(rtas_function_table);
+
+	if (WARN_ONCE(out_of_bounds, "invalid function index %zu", index))
+		return RTAS_UNKNOWN_SERVICE;
+	/*
+	 * Various drivers attempt token lookups on non-RTAS
+	 * platforms.
+	 */
+	if (!rtas.dev)
+		return RTAS_UNKNOWN_SERVICE;
+
+	return rtas_function_table[index].token;
+}
+EXPORT_SYMBOL_GPL(rtas_function_token);
+
+static int rtas_function_cmp(const void *a, const void *b)
+{
+	const struct rtas_function *f1 = a;
+	const struct rtas_function *f2 = b;
+
+	return strcmp(f1->name, f2->name);
+}
+
+/*
+ * Boot-time initialization of the function table needs the lookup to
+ * return a non-const-qualified object. Use rtas_name_to_function()
+ * in all other contexts.
+ */
+static struct rtas_function *__rtas_name_to_function(const char *name)
+{
+	const struct rtas_function key = {
+		.name = name,
+	};
+	struct rtas_function *found;
+
+	found = bsearch(&key, rtas_function_table, ARRAY_SIZE(rtas_function_table),
+			sizeof(rtas_function_table[0]), rtas_function_cmp);
+
+	return found;
+}
+
+static const struct rtas_function *rtas_name_to_function(const char *name)
+{
+	return __rtas_name_to_function(name);
+}
+
+static DEFINE_XARRAY(rtas_token_to_function_xarray);
+
+static int __init rtas_token_to_function_xarray_init(void)
+{
+	int err = 0;
+
+	for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) {
+		const struct rtas_function *func = &rtas_function_table[i];
+		const s32 token = func->token;
+
+		if (token == RTAS_UNKNOWN_SERVICE)
+			continue;
+
+		err = xa_err(xa_store(&rtas_token_to_function_xarray,
+				      token, (void *)func, GFP_KERNEL));
+		if (err)
+			break;
+	}
+
+	return err;
+}
+arch_initcall(rtas_token_to_function_xarray_init);
+
+/*
+ * For use by sys_rtas(), where the token value is provided by user
+ * space and we don't want to warn on failed lookups.
+ */
+static const struct rtas_function *rtas_token_to_function_untrusted(s32 token)
+{
+	return xa_load(&rtas_token_to_function_xarray, token);
+}
+
+/*
+ * Reverse lookup for deriving the function descriptor from a
+ * known-good token value in contexts where the former is not already
+ * available. @token must be valid, e.g. derived from the result of a
+ * prior lookup against the function table.
+ */
+static const struct rtas_function *rtas_token_to_function(s32 token)
+{
+	const struct rtas_function *func;
+
+	if (WARN_ONCE(token < 0, "invalid token %d", token))
+		return NULL;
+
+	func = rtas_token_to_function_untrusted(token);
+
+	if (WARN_ONCE(!func, "unexpected failed lookup for token %d", token))
+		return NULL;
+
+	return func;
+}
+
+/* This is here deliberately so it's only used in this file */
+void enter_rtas(unsigned long);
+
+static void __do_enter_rtas(struct rtas_args *args)
+{
+	enter_rtas(__pa(args));
+	srr_regs_clobbered(); /* rtas uses SRRs, invalidate */
+}
+
+static void __do_enter_rtas_trace(struct rtas_args *args)
+{
+	const char *name = NULL;
+
+	if (args == &rtas_args)
+		lockdep_assert_held(&rtas_lock);
+	/*
+	 * If the tracepoints that consume the function name aren't
+	 * active, avoid the lookup.
+	 */
+	if ((trace_rtas_input_enabled() || trace_rtas_output_enabled())) {
+		const s32 token = be32_to_cpu(args->token);
+		const struct rtas_function *func = rtas_token_to_function(token);
+
+		name = func->name;
+	}
+
+	trace_rtas_input(args, name);
+	trace_rtas_ll_entry(args);
+
+	__do_enter_rtas(args);
+
+	trace_rtas_ll_exit(args);
+	trace_rtas_output(args, name);
+}
+
+static void do_enter_rtas(struct rtas_args *args)
+{
+	const unsigned long msr = mfmsr();
+	/*
+	 * Situations where we want to skip any active tracepoints for
+	 * safety reasons:
+	 *
+	 * 1. The last code executed on an offline CPU as it stops,
+	 *    i.e. we're about to call stop-self. The tracepoints'
+	 *    function name lookup uses xarray, which uses RCU, which
+	 *    isn't valid to call on an offline CPU.  Any events
+	 *    emitted on an offline CPU will be discarded anyway.
+	 *
+	 * 2. In real mode, as when invoking ibm,nmi-interlock from
+	 *    the pseries MCE handler. We cannot count on trace
+	 *    buffers or the entries in rtas_token_to_function_xarray
+	 *    to be contained in the RMO.
+	 */
+	const unsigned long mask = MSR_IR | MSR_DR;
+	const bool can_trace = likely(cpu_online(raw_smp_processor_id()) &&
+				      (msr & mask) == mask);
+	/*
+	 * Make sure MSR[RI] is currently enabled as it will be forced later
+	 * in enter_rtas.
+	 */
+	BUG_ON(!(msr & MSR_RI));
+
+	BUG_ON(!irqs_disabled());
+
+	hard_irq_disable(); /* Ensure MSR[EE] is disabled on PPC64 */
+
+	if (can_trace)
+		__do_enter_rtas_trace(args);
+	else
+		__do_enter_rtas(args);
+}
+
+struct rtas_t rtas;
+
+DEFINE_SPINLOCK(rtas_data_buf_lock);
+EXPORT_SYMBOL_GPL(rtas_data_buf_lock);
+
+char rtas_data_buf[RTAS_DATA_BUF_SIZE] __aligned(SZ_4K);
+EXPORT_SYMBOL_GPL(rtas_data_buf);
+
+unsigned long rtas_rmo_buf;
+
+/*
+ * If non-NULL, this gets called when the kernel terminates.
+ * This is done like this so rtas_flash can be a module.
+ */
+void (*rtas_flash_term_hook)(int);
+EXPORT_SYMBOL_GPL(rtas_flash_term_hook);
+
+/*
+ * call_rtas_display_status and call_rtas_display_status_delay
+ * are designed only for very early low-level debugging, which
+ * is why the token is hard-coded to 10.
+ */
+static void call_rtas_display_status(unsigned char c)
+{
+	unsigned long flags;
+
+	if (!rtas.base)
+		return;
+
+	raw_spin_lock_irqsave(&rtas_lock, flags);
+	rtas_call_unlocked(&rtas_args, 10, 1, 1, NULL, c);
+	raw_spin_unlock_irqrestore(&rtas_lock, flags);
+}
+
+static void call_rtas_display_status_delay(char c)
+{
+	static int pending_newline = 0;  /* did last write end with unprinted newline? */
+	static int width = 16;
+
+	if (c == '\n') {	
+		while (width-- > 0)
+			call_rtas_display_status(' ');
+		width = 16;
+		mdelay(500);
+		pending_newline = 1;
+	} else {
+		if (pending_newline) {
+			call_rtas_display_status('\r');
+			call_rtas_display_status('\n');
+		} 
+		pending_newline = 0;
+		if (width--) {
+			call_rtas_display_status(c);
+			udelay(10000);
+		}
+	}
+}
+
+void __init udbg_init_rtas_panel(void)
+{
+	udbg_putc = call_rtas_display_status_delay;
+}
+
+#ifdef CONFIG_UDBG_RTAS_CONSOLE
+
+/* If you think you're dying before early_init_dt_scan_rtas() does its
+ * work, you can hard code the token values for your firmware here and
+ * hardcode rtas.base/entry etc.
+ */
+static unsigned int rtas_putchar_token = RTAS_UNKNOWN_SERVICE;
+static unsigned int rtas_getchar_token = RTAS_UNKNOWN_SERVICE;
+
+static void udbg_rtascon_putc(char c)
+{
+	int tries;
+
+	if (!rtas.base)
+		return;
+
+	/* Add CRs before LFs */
+	if (c == '\n')
+		udbg_rtascon_putc('\r');
+
+	/* if there is more than one character to be displayed, wait a bit */
+	for (tries = 0; tries < 16; tries++) {
+		if (rtas_call(rtas_putchar_token, 1, 1, NULL, c) == 0)
+			break;
+		udelay(1000);
+	}
+}
+
+static int udbg_rtascon_getc_poll(void)
+{
+	int c;
+
+	if (!rtas.base)
+		return -1;
+
+	if (rtas_call(rtas_getchar_token, 0, 2, &c))
+		return -1;
+
+	return c;
+}
+
+static int udbg_rtascon_getc(void)
+{
+	int c;
+
+	while ((c = udbg_rtascon_getc_poll()) == -1)
+		;
+
+	return c;
+}
+
+
+void __init udbg_init_rtas_console(void)
+{
+	udbg_putc = udbg_rtascon_putc;
+	udbg_getc = udbg_rtascon_getc;
+	udbg_getc_poll = udbg_rtascon_getc_poll;
+}
+#endif /* CONFIG_UDBG_RTAS_CONSOLE */
+
+void rtas_progress(char *s, unsigned short hex)
+{
+	struct device_node *root;
+	int width;
+	const __be32 *p;
+	char *os;
+	static int display_character, set_indicator;
+	static int display_width, display_lines, form_feed;
+	static const int *row_width;
+	static DEFINE_SPINLOCK(progress_lock);
+	static int current_line;
+	static int pending_newline = 0;  /* did last write end with unprinted newline? */
+
+	if (!rtas.base)
+		return;
+
+	if (display_width == 0) {
+		display_width = 0x10;
+		if ((root = of_find_node_by_path("/rtas"))) {
+			if ((p = of_get_property(root,
+					"ibm,display-line-length", NULL)))
+				display_width = be32_to_cpu(*p);
+			if ((p = of_get_property(root,
+					"ibm,form-feed", NULL)))
+				form_feed = be32_to_cpu(*p);
+			if ((p = of_get_property(root,
+					"ibm,display-number-of-lines", NULL)))
+				display_lines = be32_to_cpu(*p);
+			row_width = of_get_property(root,
+					"ibm,display-truncation-length", NULL);
+			of_node_put(root);
+		}
+		display_character = rtas_function_token(RTAS_FN_DISPLAY_CHARACTER);
+		set_indicator = rtas_function_token(RTAS_FN_SET_INDICATOR);
+	}
+
+	if (display_character == RTAS_UNKNOWN_SERVICE) {
+		/* use hex display if available */
+		if (set_indicator != RTAS_UNKNOWN_SERVICE)
+			rtas_call(set_indicator, 3, 1, NULL, 6, 0, hex);
+		return;
+	}
+
+	spin_lock(&progress_lock);
+
+	/*
+	 * Last write ended with newline, but we didn't print it since
+	 * it would just clear the bottom line of output. Print it now
+	 * instead.
+	 *
+	 * If no newline is pending and form feed is supported, clear the
+	 * display with a form feed; otherwise, print a CR to start output
+	 * at the beginning of the line.
+	 */
+	if (pending_newline) {
+		rtas_call(display_character, 1, 1, NULL, '\r');
+		rtas_call(display_character, 1, 1, NULL, '\n');
+		pending_newline = 0;
+	} else {
+		current_line = 0;
+		if (form_feed)
+			rtas_call(display_character, 1, 1, NULL,
+				  (char)form_feed);
+		else
+			rtas_call(display_character, 1, 1, NULL, '\r');
+	}
+ 
+	if (row_width)
+		width = row_width[current_line];
+	else
+		width = display_width;
+	os = s;
+	while (*os) {
+		if (*os == '\n' || *os == '\r') {
+			/* If newline is the last character, save it
+			 * until next call to avoid bumping up the
+			 * display output.
+			 */
+			if (*os == '\n' && !os[1]) {
+				pending_newline = 1;
+				current_line++;
+				if (current_line > display_lines-1)
+					current_line = display_lines-1;
+				spin_unlock(&progress_lock);
+				return;
+			}
+ 
+			/* RTAS wants CR-LF, not just LF */
+ 
+			if (*os == '\n') {
+				rtas_call(display_character, 1, 1, NULL, '\r');
+				rtas_call(display_character, 1, 1, NULL, '\n');
+			} else {
+				/* CR might be used to re-draw a line, so we'll
+				 * leave it alone and not add LF.
+				 */
+				rtas_call(display_character, 1, 1, NULL, *os);
+			}
+ 
+			if (row_width)
+				width = row_width[current_line];
+			else
+				width = display_width;
+		} else {
+			width--;
+			rtas_call(display_character, 1, 1, NULL, *os);
+		}
+ 
+		os++;
+ 
+		/* if we overwrite the screen length */
+		if (width <= 0)
+			while ((*os != 0) && (*os != '\n') && (*os != '\r'))
+				os++;
+	}
+ 
+	spin_unlock(&progress_lock);
+}
+EXPORT_SYMBOL_GPL(rtas_progress);		/* needed by rtas_flash module */
+
+int rtas_token(const char *service)
+{
+	const struct rtas_function *func;
+	const __be32 *tokp;
+
+	if (rtas.dev == NULL)
+		return RTAS_UNKNOWN_SERVICE;
+
+	func = rtas_name_to_function(service);
+	if (func)
+		return func->token;
+	/*
+	 * The caller is looking up a name that is not known to be an
+	 * RTAS function. Either it's a function that needs to be
+	 * added to the table, or they're misusing rtas_token() to
+	 * access non-function properties of the /rtas node. Warn and
+	 * fall back to the legacy behavior.
+	 */
+	WARN_ONCE(1, "unknown function `%s`, should it be added to rtas_function_table?\n",
+		  service);
+
+	tokp = of_get_property(rtas.dev, service, NULL);
+	return tokp ? be32_to_cpu(*tokp) : RTAS_UNKNOWN_SERVICE;
+}
+EXPORT_SYMBOL_GPL(rtas_token);
+
+int rtas_service_present(const char *service)
+{
+	return rtas_token(service) != RTAS_UNKNOWN_SERVICE;
+}
+
+#ifdef CONFIG_RTAS_ERROR_LOGGING
+
+static u32 rtas_error_log_max __ro_after_init = RTAS_ERROR_LOG_MAX;
+
+/*
+ * Return the firmware-specified size of the error log buffer
+ *  for all rtas calls that require an error buffer argument.
+ *  This includes 'check-exception' and 'rtas-last-error'.
+ */
+int rtas_get_error_log_max(void)
+{
+	return rtas_error_log_max;
+}
+
+static void __init init_error_log_max(void)
+{
+	static const char propname[] __initconst = "rtas-error-log-max";
+	u32 max;
+
+	if (of_property_read_u32(rtas.dev, propname, &max)) {
+		pr_warn("%s not found, using default of %u\n",
+			propname, RTAS_ERROR_LOG_MAX);
+		max = RTAS_ERROR_LOG_MAX;
+	}
+
+	if (max > RTAS_ERROR_LOG_MAX) {
+		pr_warn("%s = %u, clamping max error log size to %u\n",
+			propname, max, RTAS_ERROR_LOG_MAX);
+		max = RTAS_ERROR_LOG_MAX;
+	}
+
+	rtas_error_log_max = max;
+}
+
+
+static char rtas_err_buf[RTAS_ERROR_LOG_MAX];
+
+/** Return a copy of the detailed error text associated with the
+ *  most recent failed call to rtas.  Because the error text
+ *  might go stale if there are any other intervening rtas calls,
+ *  this routine must be called atomically with whatever produced
+ *  the error (i.e. with rtas_lock still held from the previous call).
+ */
+static char *__fetch_rtas_last_error(char *altbuf)
+{
+	const s32 token = rtas_function_token(RTAS_FN_RTAS_LAST_ERROR);
+	struct rtas_args err_args, save_args;
+	u32 bufsz;
+	char *buf = NULL;
+
+	lockdep_assert_held(&rtas_lock);
+
+	if (token == -1)
+		return NULL;
+
+	bufsz = rtas_get_error_log_max();
+
+	err_args.token = cpu_to_be32(token);
+	err_args.nargs = cpu_to_be32(2);
+	err_args.nret = cpu_to_be32(1);
+	err_args.args[0] = cpu_to_be32(__pa(rtas_err_buf));
+	err_args.args[1] = cpu_to_be32(bufsz);
+	err_args.args[2] = 0;
+
+	save_args = rtas_args;
+	rtas_args = err_args;
+
+	do_enter_rtas(&rtas_args);
+
+	err_args = rtas_args;
+	rtas_args = save_args;
+
+	/* Log the error in the unlikely case that there was one. */
+	if (unlikely(err_args.args[2] == 0)) {
+		if (altbuf) {
+			buf = altbuf;
+		} else {
+			buf = rtas_err_buf;
+			if (slab_is_available())
+				buf = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC);
+		}
+		if (buf)
+			memmove(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX);
+	}
+
+	return buf;
+}
+
+#define get_errorlog_buffer()	kmalloc(RTAS_ERROR_LOG_MAX, GFP_KERNEL)
+
+#else /* CONFIG_RTAS_ERROR_LOGGING */
+#define __fetch_rtas_last_error(x)	NULL
+#define get_errorlog_buffer()		NULL
+static void __init init_error_log_max(void) {}
+#endif
+
+
+static void
+va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret,
+		      va_list list)
+{
+	int i;
+
+	args->token = cpu_to_be32(token);
+	args->nargs = cpu_to_be32(nargs);
+	args->nret  = cpu_to_be32(nret);
+	args->rets  = &(args->args[nargs]);
+
+	for (i = 0; i < nargs; ++i)
+		args->args[i] = cpu_to_be32(va_arg(list, __u32));
+
+	for (i = 0; i < nret; ++i)
+		args->rets[i] = 0;
+
+	do_enter_rtas(args);
+}
+
+/**
+ * rtas_call_unlocked() - Invoke an RTAS firmware function without synchronization.
+ * @args: RTAS parameter block to be used for the call, must obey RTAS addressing
+ *        constraints.
+ * @token: Identifies the function being invoked.
+ * @nargs: Number of input parameters. Does not include token.
+ * @nret: Number of output parameters, including the call status.
+ * @....: List of @nargs input parameters.
+ *
+ * Invokes the RTAS function indicated by @token, which the caller
+ * should obtain via rtas_function_token().
+ *
+ * This function is similar to rtas_call(), but must be used with a
+ * limited set of RTAS calls specifically exempted from the general
+ * requirement that only one RTAS call may be in progress at any
+ * time. Examples include stop-self and ibm,nmi-interlock.
+ */
+void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...)
+{
+	va_list list;
+
+	va_start(list, nret);
+	va_rtas_call_unlocked(args, token, nargs, nret, list);
+	va_end(list);
+}
+
+static bool token_is_restricted_errinjct(s32 token)
+{
+	return token == rtas_function_token(RTAS_FN_IBM_OPEN_ERRINJCT) ||
+	       token == rtas_function_token(RTAS_FN_IBM_ERRINJCT);
+}
+
+/**
+ * rtas_call() - Invoke an RTAS firmware function.
+ * @token: Identifies the function being invoked.
+ * @nargs: Number of input parameters. Does not include token.
+ * @nret: Number of output parameters, including the call status.
+ * @outputs: Array of @nret output words.
+ * @....: List of @nargs input parameters.
+ *
+ * Invokes the RTAS function indicated by @token, which the caller
+ * should obtain via rtas_function_token().
+ *
+ * The @nargs and @nret arguments must match the number of input and
+ * output parameters specified for the RTAS function.
+ *
+ * rtas_call() returns RTAS status codes, not conventional Linux errno
+ * values. Callers must translate any failure to an appropriate errno
+ * in syscall context. Most callers of RTAS functions that can return
+ * -2 or 990x should use rtas_busy_delay() to correctly handle those
+ * statuses before calling again.
+ *
+ * The return value descriptions are adapted from 7.2.8 [RTAS] Return
+ * Codes of the PAPR and CHRP specifications.
+ *
+ * Context: Process context preferably, interrupt context if
+ *          necessary.  Acquires an internal spinlock and may perform
+ *          GFP_ATOMIC slab allocation in error path. Unsafe for NMI
+ *          context.
+ * Return:
+ * *                          0 - RTAS function call succeeded.
+ * *                         -1 - RTAS function encountered a hardware or
+ *                                platform error, or the token is invalid,
+ *                                or the function is restricted by kernel policy.
+ * *                         -2 - Specs say "A necessary hardware device was busy,
+ *                                and the requested function could not be
+ *                                performed. The operation should be retried at
+ *                                a later time." This is misleading, at least with
+ *                                respect to current RTAS implementations. What it
+ *                                usually means in practice is that the function
+ *                                could not be completed while meeting RTAS's
+ *                                deadline for returning control to the OS (250us
+ *                                for PAPR/PowerVM, typically), but the call may be
+ *                                immediately reattempted to resume work on it.
+ * *                         -3 - Parameter error.
+ * *                         -7 - Unexpected state change.
+ * *                9000...9899 - Vendor-specific success codes.
+ * *                9900...9905 - Advisory extended delay. Caller should try
+ *                                again after ~10^x ms has elapsed, where x is
+ *                                the last digit of the status [0-5]. Again going
+ *                                beyond the PAPR text, 990x on PowerVM indicates
+ *                                contention for RTAS-internal resources. Other
+ *                                RTAS call sequences in progress should be
+ *                                allowed to complete before reattempting the
+ *                                call.
+ * *                      -9000 - Multi-level isolation error.
+ * *              -9999...-9004 - Vendor-specific error codes.
+ * * Additional negative values - Function-specific error.
+ * * Additional positive values - Function-specific success.
+ */
+int rtas_call(int token, int nargs, int nret, int *outputs, ...)
+{
+	struct pin_cookie cookie;
+	va_list list;
+	int i;
+	unsigned long flags;
+	struct rtas_args *args;
+	char *buff_copy = NULL;
+	int ret;
+
+	if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE)
+		return -1;
+
+	if (token_is_restricted_errinjct(token)) {
+		/*
+		 * It would be nicer to not discard the error value
+		 * from security_locked_down(), but callers expect an
+		 * RTAS status, not an errno.
+		 */
+		if (security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION))
+			return -1;
+	}
+
+	if ((mfmsr() & (MSR_IR|MSR_DR)) != (MSR_IR|MSR_DR)) {
+		WARN_ON_ONCE(1);
+		return -1;
+	}
+
+	raw_spin_lock_irqsave(&rtas_lock, flags);
+	cookie = lockdep_pin_lock(&rtas_lock);
+
+	/* We use the global rtas args buffer */
+	args = &rtas_args;
+
+	va_start(list, outputs);
+	va_rtas_call_unlocked(args, token, nargs, nret, list);
+	va_end(list);
+
+	/* A -1 return code indicates that the last command couldn't
+	   be completed due to a hardware error. */
+	if (be32_to_cpu(args->rets[0]) == -1)
+		buff_copy = __fetch_rtas_last_error(NULL);
+
+	if (nret > 1 && outputs != NULL)
+		for (i = 0; i < nret-1; ++i)
+			outputs[i] = be32_to_cpu(args->rets[i + 1]);
+	ret = (nret > 0) ? be32_to_cpu(args->rets[0]) : 0;
+
+	lockdep_unpin_lock(&rtas_lock, cookie);
+	raw_spin_unlock_irqrestore(&rtas_lock, flags);
+
+	if (buff_copy) {
+		log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0);
+		if (slab_is_available())
+			kfree(buff_copy);
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rtas_call);
+
+/**
+ * rtas_busy_delay_time() - From an RTAS status value, calculate the
+ *                          suggested delay time in milliseconds.
+ *
+ * @status: a value returned from rtas_call() or similar APIs which return
+ *          the status of a RTAS function call.
+ *
+ * Context: Any context.
+ *
+ * Return:
+ * * 100000 - If @status is 9905.
+ * * 10000  - If @status is 9904.
+ * * 1000   - If @status is 9903.
+ * * 100    - If @status is 9902.
+ * * 10     - If @status is 9901.
+ * * 1      - If @status is either 9900 or -2. This is "wrong" for -2, but
+ *            some callers depend on this behavior, and the worst outcome
+ *            is that they will delay for longer than necessary.
+ * * 0      - If @status is not a busy or extended delay value.
+ */
+unsigned int rtas_busy_delay_time(int status)
+{
+	int order;
+	unsigned int ms = 0;
+
+	if (status == RTAS_BUSY) {
+		ms = 1;
+	} else if (status >= RTAS_EXTENDED_DELAY_MIN &&
+		   status <= RTAS_EXTENDED_DELAY_MAX) {
+		order = status - RTAS_EXTENDED_DELAY_MIN;
+		for (ms = 1; order > 0; order--)
+			ms *= 10;
+	}
+
+	return ms;
+}
+
+/*
+ * Early boot fallback for rtas_busy_delay().
+ */
+static bool __init rtas_busy_delay_early(int status)
+{
+	static size_t successive_ext_delays __initdata;
+	bool retry;
+
+	switch (status) {
+	case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX:
+		/*
+		 * In the unlikely case that we receive an extended
+		 * delay status in early boot, the OS is probably not
+		 * the cause, and there's nothing we can do to clear
+		 * the condition. Best we can do is delay for a bit
+		 * and hope it's transient. Lie to the caller if it
+		 * seems like we're stuck in a retry loop.
+		 */
+		mdelay(1);
+		retry = true;
+		successive_ext_delays += 1;
+		if (successive_ext_delays > 1000) {
+			pr_err("too many extended delays, giving up\n");
+			dump_stack();
+			retry = false;
+			successive_ext_delays = 0;
+		}
+		break;
+	case RTAS_BUSY:
+		retry = true;
+		successive_ext_delays = 0;
+		break;
+	default:
+		retry = false;
+		successive_ext_delays = 0;
+		break;
+	}
+
+	return retry;
+}
+
+/**
+ * rtas_busy_delay() - helper for RTAS busy and extended delay statuses
+ *
+ * @status: a value returned from rtas_call() or similar APIs which return
+ *          the status of a RTAS function call.
+ *
+ * Context: Process context. May sleep or schedule.
+ *
+ * Return:
+ * * true  - @status is RTAS_BUSY or an extended delay hint. The
+ *           caller may assume that the CPU has been yielded if necessary,
+ *           and that an appropriate delay for @status has elapsed.
+ *           Generally the caller should reattempt the RTAS call which
+ *           yielded @status.
+ *
+ * * false - @status is not @RTAS_BUSY nor an extended delay hint. The
+ *           caller is responsible for handling @status.
+ */
+bool __ref rtas_busy_delay(int status)
+{
+	unsigned int ms;
+	bool ret;
+
+	/*
+	 * Can't do timed sleeps before timekeeping is up.
+	 */
+	if (system_state < SYSTEM_SCHEDULING)
+		return rtas_busy_delay_early(status);
+
+	switch (status) {
+	case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX:
+		ret = true;
+		ms = rtas_busy_delay_time(status);
+		/*
+		 * The extended delay hint can be as high as 100 seconds.
+		 * Surely any function returning such a status is either
+		 * buggy or isn't going to be significantly slowed by us
+		 * polling at 1HZ. Clamp the sleep time to one second.
+		 */
+		ms = clamp(ms, 1U, 1000U);
+		/*
+		 * The delay hint is an order-of-magnitude suggestion, not
+		 * a minimum. It is fine, possibly even advantageous, for
+		 * us to pause for less time than hinted. For small values,
+		 * use usleep_range() to ensure we don't sleep much longer
+		 * than actually needed.
+		 *
+		 * See Documentation/timers/timers-howto.rst for
+		 * explanation of the threshold used here. In effect we use
+		 * usleep_range() for 9900 and 9901, msleep() for
+		 * 9902-9905.
+		 */
+		if (ms <= 20)
+			usleep_range(ms * 100, ms * 1000);
+		else
+			msleep(ms);
+		break;
+	case RTAS_BUSY:
+		ret = true;
+		/*
+		 * We should call again immediately if there's no other
+		 * work to do.
+		 */
+		cond_resched();
+		break;
+	default:
+		ret = false;
+		/*
+		 * Not a busy or extended delay status; the caller should
+		 * handle @status itself. Ensure we warn on misuses in
+		 * atomic context regardless.
+		 */
+		might_sleep();
+		break;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rtas_busy_delay);
+
+int rtas_error_rc(int rtas_rc)
+{
+	int rc;
+
+	switch (rtas_rc) {
+	case RTAS_HARDWARE_ERROR:	/* Hardware Error */
+		rc = -EIO;
+		break;
+	case RTAS_INVALID_PARAMETER:	/* Bad indicator/domain/etc */
+		rc = -EINVAL;
+		break;
+	case -9000:			/* Isolation error */
+		rc = -EFAULT;
+		break;
+	case -9001:			/* Outstanding TCE/PTE */
+		rc = -EEXIST;
+		break;
+	case -9002:			/* No usable slot */
+		rc = -ENODEV;
+		break;
+	default:
+		pr_err("%s: unexpected error %d\n", __func__, rtas_rc);
+		rc = -ERANGE;
+		break;
+	}
+	return rc;
+}
+EXPORT_SYMBOL_GPL(rtas_error_rc);
+
+int rtas_get_power_level(int powerdomain, int *level)
+{
+	int token = rtas_function_token(RTAS_FN_GET_POWER_LEVEL);
+	int rc;
+
+	if (token == RTAS_UNKNOWN_SERVICE)
+		return -ENOENT;
+
+	while ((rc = rtas_call(token, 1, 2, level, powerdomain)) == RTAS_BUSY)
+		udelay(1);
+
+	if (rc < 0)
+		return rtas_error_rc(rc);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(rtas_get_power_level);
+
+int rtas_set_power_level(int powerdomain, int level, int *setlevel)
+{
+	int token = rtas_function_token(RTAS_FN_SET_POWER_LEVEL);
+	int rc;
+
+	if (token == RTAS_UNKNOWN_SERVICE)
+		return -ENOENT;
+
+	do {
+		rc = rtas_call(token, 2, 2, setlevel, powerdomain, level);
+	} while (rtas_busy_delay(rc));
+
+	if (rc < 0)
+		return rtas_error_rc(rc);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(rtas_set_power_level);
+
+int rtas_get_sensor(int sensor, int index, int *state)
+{
+	int token = rtas_function_token(RTAS_FN_GET_SENSOR_STATE);
+	int rc;
+
+	if (token == RTAS_UNKNOWN_SERVICE)
+		return -ENOENT;
+
+	do {
+		rc = rtas_call(token, 2, 2, state, sensor, index);
+	} while (rtas_busy_delay(rc));
+
+	if (rc < 0)
+		return rtas_error_rc(rc);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(rtas_get_sensor);
+
+int rtas_get_sensor_fast(int sensor, int index, int *state)
+{
+	int token = rtas_function_token(RTAS_FN_GET_SENSOR_STATE);
+	int rc;
+
+	if (token == RTAS_UNKNOWN_SERVICE)
+		return -ENOENT;
+
+	rc = rtas_call(token, 2, 2, state, sensor, index);
+	WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN &&
+				    rc <= RTAS_EXTENDED_DELAY_MAX));
+
+	if (rc < 0)
+		return rtas_error_rc(rc);
+	return rc;
+}
+
+bool rtas_indicator_present(int token, int *maxindex)
+{
+	int proplen, count, i;
+	const struct indicator_elem {
+		__be32 token;
+		__be32 maxindex;
+	} *indicators;
+
+	indicators = of_get_property(rtas.dev, "rtas-indicators", &proplen);
+	if (!indicators)
+		return false;
+
+	count = proplen / sizeof(struct indicator_elem);
+
+	for (i = 0; i < count; i++) {
+		if (__be32_to_cpu(indicators[i].token) != token)
+			continue;
+		if (maxindex)
+			*maxindex = __be32_to_cpu(indicators[i].maxindex);
+		return true;
+	}
+
+	return false;
+}
+
+int rtas_set_indicator(int indicator, int index, int new_value)
+{
+	int token = rtas_function_token(RTAS_FN_SET_INDICATOR);
+	int rc;
+
+	if (token == RTAS_UNKNOWN_SERVICE)
+		return -ENOENT;
+
+	do {
+		rc = rtas_call(token, 3, 1, NULL, indicator, index, new_value);
+	} while (rtas_busy_delay(rc));
+
+	if (rc < 0)
+		return rtas_error_rc(rc);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(rtas_set_indicator);
+
+/*
+ * Ignoring RTAS extended delay
+ */
+int rtas_set_indicator_fast(int indicator, int index, int new_value)
+{
+	int token = rtas_function_token(RTAS_FN_SET_INDICATOR);
+	int rc;
+
+	if (token == RTAS_UNKNOWN_SERVICE)
+		return -ENOENT;
+
+	rc = rtas_call(token, 3, 1, NULL, indicator, index, new_value);
+
+	WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN &&
+				    rc <= RTAS_EXTENDED_DELAY_MAX));
+
+	if (rc < 0)
+		return rtas_error_rc(rc);
+
+	return rc;
+}
+
+/**
+ * rtas_ibm_suspend_me() - Call ibm,suspend-me to suspend the LPAR.
+ *
+ * @fw_status: RTAS call status will be placed here if not NULL.
+ *
+ * rtas_ibm_suspend_me() should be called only on a CPU which has
+ * received H_CONTINUE from the H_JOIN hcall. All other active CPUs
+ * should be waiting to return from H_JOIN.
+ *
+ * rtas_ibm_suspend_me() may suspend execution of the OS
+ * indefinitely. Callers should take appropriate measures upon return, such as
+ * resetting watchdog facilities.
+ *
+ * Callers may choose to retry this call if @fw_status is
+ * %RTAS_THREADS_ACTIVE.
+ *
+ * Return:
+ * 0          - The partition has resumed from suspend, possibly after
+ *              migration to a different host.
+ * -ECANCELED - The operation was aborted.
+ * -EAGAIN    - There were other CPUs not in H_JOIN at the time of the call.
+ * -EBUSY     - Some other condition prevented the suspend from succeeding.
+ * -EIO       - Hardware/platform error.
+ */
+int rtas_ibm_suspend_me(int *fw_status)
+{
+	int token = rtas_function_token(RTAS_FN_IBM_SUSPEND_ME);
+	int fwrc;
+	int ret;
+
+	fwrc = rtas_call(token, 0, 1, NULL);
+
+	switch (fwrc) {
+	case 0:
+		ret = 0;
+		break;
+	case RTAS_SUSPEND_ABORTED:
+		ret = -ECANCELED;
+		break;
+	case RTAS_THREADS_ACTIVE:
+		ret = -EAGAIN;
+		break;
+	case RTAS_NOT_SUSPENDABLE:
+	case RTAS_OUTSTANDING_COPROC:
+		ret = -EBUSY;
+		break;
+	case -1:
+	default:
+		ret = -EIO;
+		break;
+	}
+
+	if (fw_status)
+		*fw_status = fwrc;
+
+	return ret;
+}
+
+void __noreturn rtas_restart(char *cmd)
+{
+	if (rtas_flash_term_hook)
+		rtas_flash_term_hook(SYS_RESTART);
+	pr_emerg("system-reboot returned %d\n",
+		 rtas_call(rtas_function_token(RTAS_FN_SYSTEM_REBOOT), 0, 1, NULL));
+	for (;;);
+}
+
+void rtas_power_off(void)
+{
+	if (rtas_flash_term_hook)
+		rtas_flash_term_hook(SYS_POWER_OFF);
+	/* allow power on only with power button press */
+	pr_emerg("power-off returned %d\n",
+		 rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1));
+	for (;;);
+}
+
+void __noreturn rtas_halt(void)
+{
+	if (rtas_flash_term_hook)
+		rtas_flash_term_hook(SYS_HALT);
+	/* allow power on only with power button press */
+	pr_emerg("power-off returned %d\n",
+		 rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1));
+	for (;;);
+}
+
+/* Must be in the RMO region, so we place it here */
+static char rtas_os_term_buf[2048];
+static bool ibm_extended_os_term;
+
+void rtas_os_term(char *str)
+{
+	s32 token = rtas_function_token(RTAS_FN_IBM_OS_TERM);
+	static struct rtas_args args;
+	int status;
+
+	/*
+	 * Firmware with the ibm,extended-os-term property is guaranteed
+	 * to always return from an ibm,os-term call. Earlier versions without
+	 * this property may terminate the partition which we want to avoid
+	 * since it interferes with panic_timeout.
+	 */
+
+	if (token == RTAS_UNKNOWN_SERVICE || !ibm_extended_os_term)
+		return;
+
+	snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str);
+
+	/*
+	 * Keep calling as long as RTAS returns a "try again" status,
+	 * but don't use rtas_busy_delay(), which potentially
+	 * schedules.
+	 */
+	do {
+		rtas_call_unlocked(&args, token, 1, 1, NULL, __pa(rtas_os_term_buf));
+		status = be32_to_cpu(args.rets[0]);
+	} while (rtas_busy_delay_time(status));
+
+	if (status != 0)
+		pr_emerg("ibm,os-term call failed %d\n", status);
+}
+
+/**
+ * rtas_activate_firmware() - Activate a new version of firmware.
+ *
+ * Context: This function may sleep.
+ *
+ * Activate a new version of partition firmware. The OS must call this
+ * after resuming from a partition hibernation or migration in order
+ * to maintain the ability to perform live firmware updates. It's not
+ * catastrophic for this method to be absent or to fail; just log the
+ * condition in that case.
+ */
+void rtas_activate_firmware(void)
+{
+	int token = rtas_function_token(RTAS_FN_IBM_ACTIVATE_FIRMWARE);
+	int fwrc;
+
+	if (token == RTAS_UNKNOWN_SERVICE) {
+		pr_notice("ibm,activate-firmware method unavailable\n");
+		return;
+	}
+
+	do {
+		fwrc = rtas_call(token, 0, 1, NULL);
+	} while (rtas_busy_delay(fwrc));
+
+	if (fwrc)
+		pr_err("ibm,activate-firmware failed (%i)\n", fwrc);
+}
+
+/**
+ * get_pseries_errorlog() - Find a specific pseries error log in an RTAS
+ *                          extended event log.
+ * @log: RTAS error/event log
+ * @section_id: two character section identifier
+ *
+ * Return: A pointer to the specified errorlog or NULL if not found.
+ */
+noinstr struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
+						      uint16_t section_id)
+{
+	struct rtas_ext_event_log_v6 *ext_log =
+		(struct rtas_ext_event_log_v6 *)log->buffer;
+	struct pseries_errorlog *sect;
+	unsigned char *p, *log_end;
+	uint32_t ext_log_length = rtas_error_extended_log_length(log);
+	uint8_t log_format = rtas_ext_event_log_format(ext_log);
+	uint32_t company_id = rtas_ext_event_company_id(ext_log);
+
+	/* Check that we understand the format */
+	if (ext_log_length < sizeof(struct rtas_ext_event_log_v6) ||
+	    log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG ||
+	    company_id != RTAS_V6EXT_COMPANY_ID_IBM)
+		return NULL;
+
+	log_end = log->buffer + ext_log_length;
+	p = ext_log->vendor_log;
+
+	while (p < log_end) {
+		sect = (struct pseries_errorlog *)p;
+		if (pseries_errorlog_id(sect) == section_id)
+			return sect;
+		p += pseries_errorlog_length(sect);
+	}
+
+	return NULL;
+}
+
+/*
+ * The sys_rtas syscall, as originally designed, allows root to pass
+ * arbitrary physical addresses to RTAS calls. A number of RTAS calls
+ * can be abused to write to arbitrary memory and do other things that
+ * are potentially harmful to system integrity, and thus should only
+ * be used inside the kernel and not exposed to userspace.
+ *
+ * All known legitimate users of the sys_rtas syscall will only ever
+ * pass addresses that fall within the RMO buffer, and use a known
+ * subset of RTAS calls.
+ *
+ * Accordingly, we filter RTAS requests to check that the call is
+ * permitted, and that provided pointers fall within the RMO buffer.
+ * If a function is allowed to be invoked via the syscall, then its
+ * entry in the rtas_functions table points to a rtas_filter that
+ * describes its constraints, with the indexes of the parameters which
+ * are expected to contain addresses and sizes of buffers allocated
+ * inside the RMO buffer.
+ */
+
+static bool in_rmo_buf(u32 base, u32 end)
+{
+	return base >= rtas_rmo_buf &&
+		base < (rtas_rmo_buf + RTAS_USER_REGION_SIZE) &&
+		base <= end &&
+		end >= rtas_rmo_buf &&
+		end < (rtas_rmo_buf + RTAS_USER_REGION_SIZE);
+}
+
+static bool block_rtas_call(int token, int nargs,
+			    struct rtas_args *args)
+{
+	const struct rtas_function *func;
+	const struct rtas_filter *f;
+	const bool is_platform_dump = token == rtas_function_token(RTAS_FN_IBM_PLATFORM_DUMP);
+	const bool is_config_conn = token == rtas_function_token(RTAS_FN_IBM_CONFIGURE_CONNECTOR);
+	u32 base, size, end;
+
+	/*
+	 * If this token doesn't correspond to a function the kernel
+	 * understands, you're not allowed to call it.
+	 */
+	func = rtas_token_to_function_untrusted(token);
+	if (!func)
+		goto err;
+	/*
+	 * And only functions with filters attached are allowed.
+	 */
+	f = func->filter;
+	if (!f)
+		goto err;
+	/*
+	 * And some functions aren't allowed on LE.
+	 */
+	if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) && func->banned_for_syscall_on_le)
+		goto err;
+
+	if (f->buf_idx1 != -1) {
+		base = be32_to_cpu(args->args[f->buf_idx1]);
+		if (f->size_idx1 != -1)
+			size = be32_to_cpu(args->args[f->size_idx1]);
+		else if (f->fixed_size)
+			size = f->fixed_size;
+		else
+			size = 1;
+
+		end = base + size - 1;
+
+		/*
+		 * Special case for ibm,platform-dump - NULL buffer
+		 * address is used to indicate end of dump processing
+		 */
+		if (is_platform_dump && base == 0)
+			return false;
+
+		if (!in_rmo_buf(base, end))
+			goto err;
+	}
+
+	if (f->buf_idx2 != -1) {
+		base = be32_to_cpu(args->args[f->buf_idx2]);
+		if (f->size_idx2 != -1)
+			size = be32_to_cpu(args->args[f->size_idx2]);
+		else if (f->fixed_size)
+			size = f->fixed_size;
+		else
+			size = 1;
+		end = base + size - 1;
+
+		/*
+		 * Special case for ibm,configure-connector where the
+		 * address can be 0
+		 */
+		if (is_config_conn && base == 0)
+			return false;
+
+		if (!in_rmo_buf(base, end))
+			goto err;
+	}
+
+	return false;
+err:
+	pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n");
+	pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n",
+			   token, nargs, current->comm);
+	return true;
+}
+
+/* We assume to be passed big endian arguments */
+SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
+{
+	struct pin_cookie cookie;
+	struct rtas_args args;
+	unsigned long flags;
+	char *buff_copy, *errbuf = NULL;
+	int nargs, nret, token;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!rtas.entry)
+		return -EINVAL;
+
+	if (copy_from_user(&args, uargs, 3 * sizeof(u32)) != 0)
+		return -EFAULT;
+
+	nargs = be32_to_cpu(args.nargs);
+	nret  = be32_to_cpu(args.nret);
+	token = be32_to_cpu(args.token);
+
+	if (nargs >= ARRAY_SIZE(args.args)
+	    || nret > ARRAY_SIZE(args.args)
+	    || nargs + nret > ARRAY_SIZE(args.args))
+		return -EINVAL;
+
+	/* Copy in args. */
+	if (copy_from_user(args.args, uargs->args,
+			   nargs * sizeof(rtas_arg_t)) != 0)
+		return -EFAULT;
+
+	if (token == RTAS_UNKNOWN_SERVICE)
+		return -EINVAL;
+
+	args.rets = &args.args[nargs];
+	memset(args.rets, 0, nret * sizeof(rtas_arg_t));
+
+	if (block_rtas_call(token, nargs, &args))
+		return -EINVAL;
+
+	if (token_is_restricted_errinjct(token)) {
+		int err;
+
+		err = security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION);
+		if (err)
+			return err;
+	}
+
+	/* Need to handle ibm,suspend_me call specially */
+	if (token == rtas_function_token(RTAS_FN_IBM_SUSPEND_ME)) {
+
+		/*
+		 * rtas_ibm_suspend_me assumes the streamid handle is in cpu
+		 * endian, or at least the hcall within it requires it.
+		 */
+		int rc = 0;
+		u64 handle = ((u64)be32_to_cpu(args.args[0]) << 32)
+		              | be32_to_cpu(args.args[1]);
+		rc = rtas_syscall_dispatch_ibm_suspend_me(handle);
+		if (rc == -EAGAIN)
+			args.rets[0] = cpu_to_be32(RTAS_NOT_SUSPENDABLE);
+		else if (rc == -EIO)
+			args.rets[0] = cpu_to_be32(-1);
+		else if (rc)
+			return rc;
+		goto copy_return;
+	}
+
+	buff_copy = get_errorlog_buffer();
+
+	raw_spin_lock_irqsave(&rtas_lock, flags);
+	cookie = lockdep_pin_lock(&rtas_lock);
+
+	rtas_args = args;
+	do_enter_rtas(&rtas_args);
+	args = rtas_args;
+
+	/* A -1 return code indicates that the last command couldn't
+	   be completed due to a hardware error. */
+	if (be32_to_cpu(args.rets[0]) == -1)
+		errbuf = __fetch_rtas_last_error(buff_copy);
+
+	lockdep_unpin_lock(&rtas_lock, cookie);
+	raw_spin_unlock_irqrestore(&rtas_lock, flags);
+
+	if (buff_copy) {
+		if (errbuf)
+			log_error(errbuf, ERR_TYPE_RTAS_LOG, 0);
+		kfree(buff_copy);
+	}
+
+ copy_return:
+	/* Copy out args. */
+	if (copy_to_user(uargs->args + nargs,
+			 args.args + nargs,
+			 nret * sizeof(rtas_arg_t)) != 0)
+		return -EFAULT;
+
+	return 0;
+}
+
+static void __init rtas_function_table_init(void)
+{
+	struct property *prop;
+
+	for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) {
+		struct rtas_function *curr = &rtas_function_table[i];
+		struct rtas_function *prior;
+		int cmp;
+
+		curr->token = RTAS_UNKNOWN_SERVICE;
+
+		if (i == 0)
+			continue;
+		/*
+		 * Ensure table is sorted correctly for binary search
+		 * on function names.
+		 */
+		prior = &rtas_function_table[i - 1];
+
+		cmp = strcmp(prior->name, curr->name);
+		if (cmp < 0)
+			continue;
+
+		if (cmp == 0) {
+			pr_err("'%s' has duplicate function table entries\n",
+			       curr->name);
+		} else {
+			pr_err("function table unsorted: '%s' wrongly precedes '%s'\n",
+			       prior->name, curr->name);
+		}
+	}
+
+	for_each_property_of_node(rtas.dev, prop) {
+		struct rtas_function *func;
+
+		if (prop->length != sizeof(u32))
+			continue;
+
+		func = __rtas_name_to_function(prop->name);
+		if (!func)
+			continue;
+
+		func->token = be32_to_cpup((__be32 *)prop->value);
+
+		pr_debug("function %s has token %u\n", func->name, func->token);
+	}
+}
+
+/*
+ * Call early during boot, before mem init, to retrieve the RTAS
+ * information from the device-tree and allocate the RMO buffer for userland
+ * accesses.
+ */
+void __init rtas_initialize(void)
+{
+	unsigned long rtas_region = RTAS_INSTANTIATE_MAX;
+	u32 base, size, entry;
+	int no_base, no_size, no_entry;
+
+	/* Get RTAS dev node and fill up our "rtas" structure with infos
+	 * about it.
+	 */
+	rtas.dev = of_find_node_by_name(NULL, "rtas");
+	if (!rtas.dev)
+		return;
+
+	no_base = of_property_read_u32(rtas.dev, "linux,rtas-base", &base);
+	no_size = of_property_read_u32(rtas.dev, "rtas-size", &size);
+	if (no_base || no_size) {
+		of_node_put(rtas.dev);
+		rtas.dev = NULL;
+		return;
+	}
+
+	rtas.base = base;
+	rtas.size = size;
+	no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry);
+	rtas.entry = no_entry ? rtas.base : entry;
+
+	init_error_log_max();
+
+	/* Must be called before any function token lookups */
+	rtas_function_table_init();
+
+	/*
+	 * Discover this now to avoid a device tree lookup in the
+	 * panic path.
+	 */
+	ibm_extended_os_term = of_property_read_bool(rtas.dev, "ibm,extended-os-term");
+
+	/* If RTAS was found, allocate the RMO buffer for it and look for
+	 * the stop-self token if any
+	 */
+#ifdef CONFIG_PPC64
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX);
+#endif
+	rtas_rmo_buf = memblock_phys_alloc_range(RTAS_USER_REGION_SIZE, PAGE_SIZE,
+						 0, rtas_region);
+	if (!rtas_rmo_buf)
+		panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n",
+		      PAGE_SIZE, &rtas_region);
+
+	rtas_work_area_reserve_arena(rtas_region);
+}
+
+int __init early_init_dt_scan_rtas(unsigned long node,
+		const char *uname, int depth, void *data)
+{
+	const u32 *basep, *entryp, *sizep;
+
+	if (depth != 1 || strcmp(uname, "rtas") != 0)
+		return 0;
+
+	basep  = of_get_flat_dt_prop(node, "linux,rtas-base", NULL);
+	entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL);
+	sizep  = of_get_flat_dt_prop(node, "rtas-size", NULL);
+
+#ifdef CONFIG_PPC64
+	/* need this feature to decide the crashkernel offset */
+	if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL))
+		powerpc_firmware_features |= FW_FEATURE_LPAR;
+#endif
+
+	if (basep && entryp && sizep) {
+		rtas.base = *basep;
+		rtas.entry = *entryp;
+		rtas.size = *sizep;
+	}
+
+#ifdef CONFIG_UDBG_RTAS_CONSOLE
+	basep = of_get_flat_dt_prop(node, "put-term-char", NULL);
+	if (basep)
+		rtas_putchar_token = *basep;
+
+	basep = of_get_flat_dt_prop(node, "get-term-char", NULL);
+	if (basep)
+		rtas_getchar_token = *basep;
+
+	if (rtas_putchar_token != RTAS_UNKNOWN_SERVICE &&
+	    rtas_getchar_token != RTAS_UNKNOWN_SERVICE)
+		udbg_init_rtas_console();
+
+#endif
+
+	/* break now */
+	return 1;
+}
+
+static DEFINE_RAW_SPINLOCK(timebase_lock);
+static u64 timebase = 0;
+
+void rtas_give_timebase(void)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&timebase_lock, flags);
+	hard_irq_disable();
+	rtas_call(rtas_function_token(RTAS_FN_FREEZE_TIME_BASE), 0, 1, NULL);
+	timebase = get_tb();
+	raw_spin_unlock(&timebase_lock);
+
+	while (timebase)
+		barrier();
+	rtas_call(rtas_function_token(RTAS_FN_THAW_TIME_BASE), 0, 1, NULL);
+	local_irq_restore(flags);
+}
+
+void rtas_take_timebase(void)
+{
+	while (!timebase)
+		barrier();
+	raw_spin_lock(&timebase_lock);
+	set_tb(timebase >> 32, timebase & 0xffffffff);
+	timebase = 0;
+	raw_spin_unlock(&timebase_lock);
+}
diff --git a/arch/powerpc/kernel/rtas_entry.S b/arch/powerpc/kernel/rtas_entry.S
new file mode 100644
index 0000000000..6ce95ddadb
--- /dev/null
+++ b/arch/powerpc/kernel/rtas_entry.S
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <asm/asm-offsets.h>
+#include <asm/bug.h>
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+
+/*
+ * RTAS is called with MSR IR, DR, EE disabled, and LR in the return address.
+ *
+ * Note: r3 is an input parameter to rtas, so don't trash it...
+ */
+
+#ifdef CONFIG_PPC32
+_GLOBAL(enter_rtas)
+	stwu	r1,-INT_FRAME_SIZE(r1)
+	mflr	r0
+	stw	r0,INT_FRAME_SIZE+4(r1)
+	LOAD_REG_ADDR(r4, rtas)
+	lis	r6,1f@ha	/* physical return address for rtas */
+	addi	r6,r6,1f@l
+	tophys(r6,r6)
+	lwz	r8,RTASENTRY(r4)
+	lwz	r4,RTASBASE(r4)
+	mfmsr	r9
+	stw	r9,8(r1)
+	li	r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
+	mtlr	r6
+	stw	r1, THREAD + RTAS_SP(r2)
+	mtspr	SPRN_SRR0,r8
+	mtspr	SPRN_SRR1,r9
+	rfi
+1:
+	lis	r8, 1f@h
+	ori	r8, r8, 1f@l
+	LOAD_REG_IMMEDIATE(r9,MSR_KERNEL)
+	mtspr	SPRN_SRR0,r8
+	mtspr	SPRN_SRR1,r9
+	rfi			/* Reactivate MMU translation */
+1:
+	lwz	r8,INT_FRAME_SIZE+4(r1)	/* get return address */
+	lwz	r9,8(r1)	/* original msr value */
+	addi	r1,r1,INT_FRAME_SIZE
+	li	r0,0
+	stw	r0, THREAD + RTAS_SP(r2)
+	mtlr	r8
+	mtmsr	r9
+	blr			/* return to caller */
+_ASM_NOKPROBE_SYMBOL(enter_rtas)
+
+#else /* CONFIG_PPC32 */
+#include <asm/exception-64s.h>
+
+/*
+ * 32-bit rtas on 64-bit machines has the additional problem that RTAS may
+ * not preserve the upper parts of registers it uses.
+ */
+_GLOBAL(enter_rtas)
+	mflr	r0
+	std	r0,16(r1)
+	stdu	r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space. */
+
+	/* Because RTAS is running in 32b mode, it clobbers the high order half
+	 * of all registers that it saves.  We therefore save those registers
+	 * RTAS might touch to the stack.  (r0, r3-r12 are caller saved)
+	 */
+	SAVE_GPR(2, r1)			/* Save the TOC */
+	SAVE_NVGPRS(r1)			/* Save the non-volatiles */
+
+	mfcr	r4
+	std	r4,_CCR(r1)
+	mfctr	r5
+	std	r5,_CTR(r1)
+	mfspr	r6,SPRN_XER
+	std	r6,_XER(r1)
+	mfdar	r7
+	std	r7,_DAR(r1)
+	mfdsisr	r8
+	std	r8,_DSISR(r1)
+
+	/* Temporary workaround to clear CR until RTAS can be modified to
+	 * ignore all bits.
+	 */
+	li	r0,0
+	mtcr	r0
+
+	mfmsr	r6
+
+	/* Unfortunately, the stack pointer and the MSR are also clobbered,
+	 * so they are saved in the PACA which allows us to restore
+	 * our original state after RTAS returns.
+	 */
+	std	r1,PACAR1(r13)
+	std	r6,PACASAVEDMSR(r13)
+
+	/* Setup our real return addr */
+	LOAD_REG_ADDR(r4,rtas_return_loc)
+	clrldi	r4,r4,2			/* convert to realmode address */
+	mtlr	r4
+
+__enter_rtas:
+	LOAD_REG_ADDR(r4, rtas)
+	ld	r5,RTASENTRY(r4)	/* get the rtas->entry value */
+	ld	r4,RTASBASE(r4)		/* get the rtas->base value */
+
+	/*
+	 * RTAS runs in 32-bit big endian real mode, but leave MSR[RI] on as we
+	 * may hit NMI (SRESET or MCE) while in RTAS. RTAS should disable RI in
+	 * its critical regions (as specified in PAPR+ section 7.2.1). MSR[S]
+	 * is not impacted by RFI_TO_KERNEL (only urfid can unset it). So if
+	 * MSR[S] is set, it will remain when entering RTAS.
+	 * If we're in HV mode, RTAS must also run in HV mode, so extract MSR_HV
+	 * from the saved MSR value and insert into the value RTAS will use.
+	 */
+	extrdi	r0, r6, 1, 63 - MSR_HV_LG
+	LOAD_REG_IMMEDIATE(r6, MSR_ME | MSR_RI)
+	insrdi	r6, r0, 1, 63 - MSR_HV_LG
+
+	li      r0,0
+	mtmsrd  r0,1                    /* disable RI before using SRR0/1 */
+	
+	mtspr	SPRN_SRR0,r5
+	mtspr	SPRN_SRR1,r6
+	RFI_TO_KERNEL
+	b	.	/* prevent speculative execution */
+rtas_return_loc:
+	FIXUP_ENDIAN
+
+	/* Set SF before anything. */
+	LOAD_REG_IMMEDIATE(r6, MSR_KERNEL & ~(MSR_IR|MSR_DR))
+	mtmsrd	r6
+
+	/* relocation is off at this point */
+	GET_PACA(r13)
+
+	bcl	20,31,$+4
+0:	mflr	r3
+	ld	r3,(1f-0b)(r3)		/* get &rtas_restore_regs */
+
+	ld	r1,PACAR1(r13)		/* Restore our SP */
+	ld	r4,PACASAVEDMSR(r13)	/* Restore our MSR */
+
+	mtspr	SPRN_SRR0,r3
+	mtspr	SPRN_SRR1,r4
+	RFI_TO_KERNEL
+	b	.	/* prevent speculative execution */
+_ASM_NOKPROBE_SYMBOL(enter_rtas)
+_ASM_NOKPROBE_SYMBOL(__enter_rtas)
+_ASM_NOKPROBE_SYMBOL(rtas_return_loc)
+
+	.align	3
+1:	.8byte	rtas_restore_regs
+
+rtas_restore_regs:
+	/* relocation is on at this point */
+	REST_GPR(2, r1)			/* Restore the TOC */
+	REST_NVGPRS(r1)			/* Restore the non-volatiles */
+
+	ld	r4,_CCR(r1)
+	mtcr	r4
+	ld	r5,_CTR(r1)
+	mtctr	r5
+	ld	r6,_XER(r1)
+	mtspr	SPRN_XER,r6
+	ld	r7,_DAR(r1)
+	mtdar	r7
+	ld	r8,_DSISR(r1)
+	mtdsisr	r8
+
+	addi	r1,r1,SWITCH_FRAME_SIZE	/* Unstack our frame */
+	ld	r0,16(r1)		/* get return address */
+
+	mtlr	r0
+	blr				/* return to caller */
+
+#endif /* CONFIG_PPC32 */
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
new file mode 100644
index 0000000000..359577ec16
--- /dev/null
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -0,0 +1,776 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  c 2001 PPC 64 Team, IBM Corp
+ *
+ * /proc/powerpc/rtas/firmware_flash interface
+ *
+ * This file implements a firmware_flash interface to pump a firmware
+ * image into the kernel.  At reboot time rtas_restart() will see the
+ * firmware image and flash it as it reboots (see rtas.c).
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/reboot.h>
+#include <asm/delay.h>
+#include <linux/uaccess.h>
+#include <asm/rtas.h>
+
+#define MODULE_VERS "1.0"
+#define MODULE_NAME "rtas_flash"
+
+#define FIRMWARE_FLASH_NAME "firmware_flash"   
+#define FIRMWARE_UPDATE_NAME "firmware_update"
+#define MANAGE_FLASH_NAME "manage_flash"
+#define VALIDATE_FLASH_NAME "validate_flash"
+
+/* General RTAS Status Codes */
+#define RTAS_RC_SUCCESS  0
+#define RTAS_RC_HW_ERR	-1
+#define RTAS_RC_BUSY	-2
+
+/* Flash image status values */
+#define FLASH_AUTH           -9002 /* RTAS Not Service Authority Partition */
+#define FLASH_NO_OP          -1099 /* No operation initiated by user */	
+#define FLASH_IMG_SHORT	     -1005 /* Flash image shorter than expected */
+#define FLASH_IMG_BAD_LEN    -1004 /* Bad length value in flash list block */
+#define FLASH_IMG_NULL_DATA  -1003 /* Bad data value in flash list block */
+#define FLASH_IMG_READY      0     /* Firmware img ready for flash on reboot */
+
+/* Manage image status values */
+#define MANAGE_AUTH          -9002 /* RTAS Not Service Authority Partition */
+#define MANAGE_ACTIVE_ERR    -9001 /* RTAS Cannot Overwrite Active Img */
+#define MANAGE_NO_OP         -1099 /* No operation initiated by user */
+#define MANAGE_PARAM_ERR     -3    /* RTAS Parameter Error */
+#define MANAGE_HW_ERR        -1    /* RTAS Hardware Error */
+
+/* Validate image status values */
+#define VALIDATE_AUTH          -9002 /* RTAS Not Service Authority Partition */
+#define VALIDATE_NO_OP         -1099 /* No operation initiated by the user */
+#define VALIDATE_INCOMPLETE    -1002 /* User copied < VALIDATE_BUF_SIZE */
+#define VALIDATE_READY	       -1001 /* Firmware image ready for validation */
+#define VALIDATE_PARAM_ERR     -3    /* RTAS Parameter Error */
+#define VALIDATE_HW_ERR        -1    /* RTAS Hardware Error */
+
+/* ibm,validate-flash-image update result tokens */
+#define VALIDATE_TMP_UPDATE    0     /* T side will be updated */
+#define VALIDATE_FLASH_AUTH    1     /* Partition does not have authority */
+#define VALIDATE_INVALID_IMG   2     /* Candidate image is not valid */
+#define VALIDATE_CUR_UNKNOWN   3     /* Current fixpack level is unknown */
+/*
+ * Current T side will be committed to P side before being replace with new
+ * image, and the new image is downlevel from current image
+ */
+#define VALIDATE_TMP_COMMIT_DL 4
+/*
+ * Current T side will be committed to P side before being replaced with new
+ * image
+ */
+#define VALIDATE_TMP_COMMIT    5
+/*
+ * T side will be updated with a downlevel image
+ */
+#define VALIDATE_TMP_UPDATE_DL 6
+/*
+ * The candidate image's release date is later than the system's firmware
+ * service entitlement date - service warranty period has expired
+ */
+#define VALIDATE_OUT_OF_WRNTY  7
+
+/* ibm,manage-flash-image operation tokens */
+#define RTAS_REJECT_TMP_IMG   0
+#define RTAS_COMMIT_TMP_IMG   1
+
+/* Array sizes */
+#define VALIDATE_BUF_SIZE 4096    
+#define VALIDATE_MSG_LEN  256
+#define RTAS_MSG_MAXLEN   64
+
+/* Quirk - RTAS requires 4k list length and block size */
+#define RTAS_BLKLIST_LENGTH 4096
+#define RTAS_BLK_SIZE 4096
+
+struct flash_block {
+	char *data;
+	unsigned long length;
+};
+
+/* This struct is very similar but not identical to
+ * that needed by the rtas flash update.
+ * All we need to do for rtas is rewrite num_blocks
+ * into a version/length and translate the pointers
+ * to absolute.
+ */
+#define FLASH_BLOCKS_PER_NODE ((RTAS_BLKLIST_LENGTH - 16) / sizeof(struct flash_block))
+struct flash_block_list {
+	unsigned long num_blocks;
+	struct flash_block_list *next;
+	struct flash_block blocks[FLASH_BLOCKS_PER_NODE];
+};
+
+static struct flash_block_list *rtas_firmware_flash_list;
+
+/* Use slab cache to guarantee 4k alignment */
+static struct kmem_cache *flash_block_cache = NULL;
+
+#define FLASH_BLOCK_LIST_VERSION (1UL)
+
+/*
+ * Local copy of the flash block list.
+ *
+ * The rtas_firmware_flash_list variable will be
+ * set once the data is fully read.
+ *
+ * For convenience as we build the list we use virtual addrs,
+ * we do not fill in the version number, and the length field
+ * is treated as the number of entries currently in the block
+ * (i.e. not a byte count).  This is all fixed when calling 
+ * the flash routine.
+ */
+
+/* Status int must be first member of struct */
+struct rtas_update_flash_t
+{
+	int status;			/* Flash update status */
+	struct flash_block_list *flist; /* Local copy of flash block list */
+};
+
+/* Status int must be first member of struct */
+struct rtas_manage_flash_t
+{
+	int status;			/* Returned status */
+};
+
+/* Status int must be first member of struct */
+struct rtas_validate_flash_t
+{
+	int status;		 	/* Returned status */	
+	char *buf;			/* Candidate image buffer */
+	unsigned int buf_size;		/* Size of image buf */
+	unsigned int update_results;	/* Update results token */
+};
+
+static struct rtas_update_flash_t rtas_update_flash_data;
+static struct rtas_manage_flash_t rtas_manage_flash_data;
+static struct rtas_validate_flash_t rtas_validate_flash_data;
+static DEFINE_MUTEX(rtas_update_flash_mutex);
+static DEFINE_MUTEX(rtas_manage_flash_mutex);
+static DEFINE_MUTEX(rtas_validate_flash_mutex);
+
+/* Do simple sanity checks on the flash image. */
+static int flash_list_valid(struct flash_block_list *flist)
+{
+	struct flash_block_list *f;
+	int i;
+	unsigned long block_size, image_size;
+
+	/* Paranoid self test here.  We also collect the image size. */
+	image_size = 0;
+	for (f = flist; f; f = f->next) {
+		for (i = 0; i < f->num_blocks; i++) {
+			if (f->blocks[i].data == NULL) {
+				return FLASH_IMG_NULL_DATA;
+			}
+			block_size = f->blocks[i].length;
+			if (block_size <= 0 || block_size > RTAS_BLK_SIZE) {
+				return FLASH_IMG_BAD_LEN;
+			}
+			image_size += block_size;
+		}
+	}
+
+	if (image_size < (256 << 10)) {
+		if (image_size < 2) 
+			return FLASH_NO_OP;
+	}
+
+	printk(KERN_INFO "FLASH: flash image with %ld bytes stored for hardware flash on reboot\n", image_size);
+
+	return FLASH_IMG_READY;
+}
+
+static void free_flash_list(struct flash_block_list *f)
+{
+	struct flash_block_list *next;
+	int i;
+
+	while (f) {
+		for (i = 0; i < f->num_blocks; i++)
+			kmem_cache_free(flash_block_cache, f->blocks[i].data);
+		next = f->next;
+		kmem_cache_free(flash_block_cache, f);
+		f = next;
+	}
+}
+
+static int rtas_flash_release(struct inode *inode, struct file *file)
+{
+	struct rtas_update_flash_t *const uf = &rtas_update_flash_data;
+
+	mutex_lock(&rtas_update_flash_mutex);
+
+	if (uf->flist) {    
+		/* File was opened in write mode for a new flash attempt */
+		/* Clear saved list */
+		if (rtas_firmware_flash_list) {
+			free_flash_list(rtas_firmware_flash_list);
+			rtas_firmware_flash_list = NULL;
+		}
+
+		if (uf->status != FLASH_AUTH)  
+			uf->status = flash_list_valid(uf->flist);
+
+		if (uf->status == FLASH_IMG_READY) 
+			rtas_firmware_flash_list = uf->flist;
+		else
+			free_flash_list(uf->flist);
+
+		uf->flist = NULL;
+	}
+
+	mutex_unlock(&rtas_update_flash_mutex);
+	return 0;
+}
+
+static size_t get_flash_status_msg(int status, char *buf)
+{
+	const char *msg;
+	size_t len;
+
+	switch (status) {
+	case FLASH_AUTH:
+		msg = "error: this partition does not have service authority\n";
+		break;
+	case FLASH_NO_OP:
+		msg = "info: no firmware image for flash\n";
+		break;
+	case FLASH_IMG_SHORT:
+		msg = "error: flash image short\n";
+		break;
+	case FLASH_IMG_BAD_LEN:
+		msg = "error: internal error bad length\n";
+		break;
+	case FLASH_IMG_NULL_DATA:
+		msg = "error: internal error null data\n";
+		break;
+	case FLASH_IMG_READY:
+		msg = "ready: firmware image ready for flash on reboot\n";
+		break;
+	default:
+		return sprintf(buf, "error: unexpected status value %d\n",
+			       status);
+	}
+
+	len = strlen(msg);
+	memcpy(buf, msg, len + 1);
+	return len;
+}
+
+/* Reading the proc file will show status (not the firmware contents) */
+static ssize_t rtas_flash_read_msg(struct file *file, char __user *buf,
+				   size_t count, loff_t *ppos)
+{
+	struct rtas_update_flash_t *const uf = &rtas_update_flash_data;
+	char msg[RTAS_MSG_MAXLEN];
+	size_t len;
+	int status;
+
+	mutex_lock(&rtas_update_flash_mutex);
+	status = uf->status;
+	mutex_unlock(&rtas_update_flash_mutex);
+
+	/* Read as text message */
+	len = get_flash_status_msg(status, msg);
+	return simple_read_from_buffer(buf, count, ppos, msg, len);
+}
+
+static ssize_t rtas_flash_read_num(struct file *file, char __user *buf,
+				   size_t count, loff_t *ppos)
+{
+	struct rtas_update_flash_t *const uf = &rtas_update_flash_data;
+	char msg[RTAS_MSG_MAXLEN];
+	int status;
+
+	mutex_lock(&rtas_update_flash_mutex);
+	status = uf->status;
+	mutex_unlock(&rtas_update_flash_mutex);
+
+	/* Read as number */
+	sprintf(msg, "%d\n", status);
+	return simple_read_from_buffer(buf, count, ppos, msg, strlen(msg));
+}
+
+/* We could be much more efficient here.  But to keep this function
+ * simple we allocate a page to the block list no matter how small the
+ * count is.  If the system is low on memory it will be just as well
+ * that we fail....
+ */
+static ssize_t rtas_flash_write(struct file *file, const char __user *buffer,
+				size_t count, loff_t *off)
+{
+	struct rtas_update_flash_t *const uf = &rtas_update_flash_data;
+	char *p;
+	int next_free, rc;
+	struct flash_block_list *fl;
+
+	mutex_lock(&rtas_update_flash_mutex);
+
+	if (uf->status == FLASH_AUTH || count == 0)
+		goto out;	/* discard data */
+
+	/* In the case that the image is not ready for flashing, the memory
+	 * allocated for the block list will be freed upon the release of the 
+	 * proc file
+	 */
+	if (uf->flist == NULL) {
+		uf->flist = kmem_cache_zalloc(flash_block_cache, GFP_KERNEL);
+		if (!uf->flist)
+			goto nomem;
+	}
+
+	fl = uf->flist;
+	while (fl->next)
+		fl = fl->next; /* seek to last block_list for append */
+	next_free = fl->num_blocks;
+	if (next_free == FLASH_BLOCKS_PER_NODE) {
+		/* Need to allocate another block_list */
+		fl->next = kmem_cache_zalloc(flash_block_cache, GFP_KERNEL);
+		if (!fl->next)
+			goto nomem;
+		fl = fl->next;
+		next_free = 0;
+	}
+
+	if (count > RTAS_BLK_SIZE)
+		count = RTAS_BLK_SIZE;
+	p = kmem_cache_zalloc(flash_block_cache, GFP_KERNEL);
+	if (!p)
+		goto nomem;
+	
+	if(copy_from_user(p, buffer, count)) {
+		kmem_cache_free(flash_block_cache, p);
+		rc = -EFAULT;
+		goto error;
+	}
+	fl->blocks[next_free].data = p;
+	fl->blocks[next_free].length = count;
+	fl->num_blocks++;
+out:
+	mutex_unlock(&rtas_update_flash_mutex);
+	return count;
+
+nomem:
+	rc = -ENOMEM;
+error:
+	mutex_unlock(&rtas_update_flash_mutex);
+	return rc;
+}
+
+/*
+ * Flash management routines.
+ */
+static void manage_flash(struct rtas_manage_flash_t *args_buf, unsigned int op)
+{
+	s32 rc;
+
+	do {
+		rc = rtas_call(rtas_function_token(RTAS_FN_IBM_MANAGE_FLASH_IMAGE), 1, 1,
+			       NULL, op);
+	} while (rtas_busy_delay(rc));
+
+	args_buf->status = rc;
+}
+
+static ssize_t manage_flash_read(struct file *file, char __user *buf,
+			       size_t count, loff_t *ppos)
+{
+	struct rtas_manage_flash_t *const args_buf = &rtas_manage_flash_data;
+	char msg[RTAS_MSG_MAXLEN];
+	int msglen, status;
+
+	mutex_lock(&rtas_manage_flash_mutex);
+	status = args_buf->status;
+	mutex_unlock(&rtas_manage_flash_mutex);
+
+	msglen = sprintf(msg, "%d\n", status);
+	return simple_read_from_buffer(buf, count, ppos, msg, msglen);
+}
+
+static ssize_t manage_flash_write(struct file *file, const char __user *buf,
+				size_t count, loff_t *off)
+{
+	struct rtas_manage_flash_t *const args_buf = &rtas_manage_flash_data;
+	static const char reject_str[] = "0";
+	static const char commit_str[] = "1";
+	char stkbuf[10];
+	int op, rc;
+
+	mutex_lock(&rtas_manage_flash_mutex);
+
+	if ((args_buf->status == MANAGE_AUTH) || (count == 0))
+		goto out;
+		
+	op = -1;
+	if (buf) {
+		if (count > 9) count = 9;
+		rc = -EFAULT;
+		if (copy_from_user (stkbuf, buf, count))
+			goto error;
+		if (strncmp(stkbuf, reject_str, strlen(reject_str)) == 0) 
+			op = RTAS_REJECT_TMP_IMG;
+		else if (strncmp(stkbuf, commit_str, strlen(commit_str)) == 0) 
+			op = RTAS_COMMIT_TMP_IMG;
+	}
+	
+	if (op == -1) {   /* buf is empty, or contains invalid string */
+		rc = -EINVAL;
+		goto error;
+	}
+
+	manage_flash(args_buf, op);
+out:
+	mutex_unlock(&rtas_manage_flash_mutex);
+	return count;
+
+error:
+	mutex_unlock(&rtas_manage_flash_mutex);
+	return rc;
+}
+
+/*
+ * Validation routines.
+ */
+static void validate_flash(struct rtas_validate_flash_t *args_buf)
+{
+	int token = rtas_function_token(RTAS_FN_IBM_VALIDATE_FLASH_IMAGE);
+	int update_results;
+	s32 rc;	
+
+	rc = 0;
+	do {
+		spin_lock(&rtas_data_buf_lock);
+		memcpy(rtas_data_buf, args_buf->buf, VALIDATE_BUF_SIZE);
+		rc = rtas_call(token, 2, 2, &update_results, 
+			       (u32) __pa(rtas_data_buf), args_buf->buf_size);
+		memcpy(args_buf->buf, rtas_data_buf, VALIDATE_BUF_SIZE);
+		spin_unlock(&rtas_data_buf_lock);
+	} while (rtas_busy_delay(rc));
+
+	args_buf->status = rc;
+	args_buf->update_results = update_results;
+}
+
+static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf, 
+		                   char *msg, int msglen)
+{
+	int n;
+
+	if (args_buf->status >= VALIDATE_TMP_UPDATE) { 
+		n = sprintf(msg, "%d\n", args_buf->update_results);
+		if ((args_buf->update_results >= VALIDATE_CUR_UNKNOWN) ||
+		    (args_buf->update_results == VALIDATE_TMP_UPDATE))
+			n += snprintf(msg + n, msglen - n, "%s\n",
+					args_buf->buf);
+	} else {
+		n = sprintf(msg, "%d\n", args_buf->status);
+	}
+	return n;
+}
+
+static ssize_t validate_flash_read(struct file *file, char __user *buf,
+			       size_t count, loff_t *ppos)
+{
+	struct rtas_validate_flash_t *const args_buf =
+		&rtas_validate_flash_data;
+	char msg[VALIDATE_MSG_LEN];
+	int msglen;
+
+	mutex_lock(&rtas_validate_flash_mutex);
+	msglen = get_validate_flash_msg(args_buf, msg, VALIDATE_MSG_LEN);
+	mutex_unlock(&rtas_validate_flash_mutex);
+
+	return simple_read_from_buffer(buf, count, ppos, msg, msglen);
+}
+
+static ssize_t validate_flash_write(struct file *file, const char __user *buf,
+				    size_t count, loff_t *off)
+{
+	struct rtas_validate_flash_t *const args_buf =
+		&rtas_validate_flash_data;
+	int rc;
+
+	mutex_lock(&rtas_validate_flash_mutex);
+
+	/* We are only interested in the first 4K of the
+	 * candidate image */
+	if ((*off >= VALIDATE_BUF_SIZE) || 
+		(args_buf->status == VALIDATE_AUTH)) {
+		*off += count;
+		mutex_unlock(&rtas_validate_flash_mutex);
+		return count;
+	}
+
+	if (*off + count >= VALIDATE_BUF_SIZE)  {
+		count = VALIDATE_BUF_SIZE - *off;
+		args_buf->status = VALIDATE_READY;	
+	} else {
+		args_buf->status = VALIDATE_INCOMPLETE;
+	}
+
+	if (!access_ok(buf, count)) {
+		rc = -EFAULT;
+		goto done;
+	}
+	if (copy_from_user(args_buf->buf + *off, buf, count)) {
+		rc = -EFAULT;
+		goto done;
+	}
+
+	*off += count;
+	rc = count;
+done:
+	mutex_unlock(&rtas_validate_flash_mutex);
+	return rc;
+}
+
+static int validate_flash_release(struct inode *inode, struct file *file)
+{
+	struct rtas_validate_flash_t *const args_buf =
+		&rtas_validate_flash_data;
+
+	mutex_lock(&rtas_validate_flash_mutex);
+
+	if (args_buf->status == VALIDATE_READY) {
+		args_buf->buf_size = VALIDATE_BUF_SIZE;
+		validate_flash(args_buf);
+	}
+
+	mutex_unlock(&rtas_validate_flash_mutex);
+	return 0;
+}
+
+/*
+ * On-reboot flash update applicator.
+ */
+static void rtas_flash_firmware(int reboot_type)
+{
+	unsigned long image_size;
+	struct flash_block_list *f, *next, *flist;
+	unsigned long rtas_block_list;
+	int i, status, update_token;
+
+	if (rtas_firmware_flash_list == NULL)
+		return;		/* nothing to do */
+
+	if (reboot_type != SYS_RESTART) {
+		printk(KERN_ALERT "FLASH: firmware flash requires a reboot\n");
+		printk(KERN_ALERT "FLASH: the firmware image will NOT be flashed\n");
+		return;
+	}
+
+	update_token = rtas_function_token(RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT);
+	if (update_token == RTAS_UNKNOWN_SERVICE) {
+		printk(KERN_ALERT "FLASH: ibm,update-flash-64-and-reboot "
+		       "is not available -- not a service partition?\n");
+		printk(KERN_ALERT "FLASH: firmware will not be flashed\n");
+		return;
+	}
+
+	/*
+	 * Just before starting the firmware flash, cancel the event scan work
+	 * to avoid any soft lockup issues.
+	 */
+	rtas_cancel_event_scan();
+
+	/*
+	 * NOTE: the "first" block must be under 4GB, so we create
+	 * an entry with no data blocks in the reserved buffer in
+	 * the kernel data segment.
+	 */
+	spin_lock(&rtas_data_buf_lock);
+	flist = (struct flash_block_list *)&rtas_data_buf[0];
+	flist->num_blocks = 0;
+	flist->next = rtas_firmware_flash_list;
+	rtas_block_list = __pa(flist);
+	if (rtas_block_list >= 4UL*1024*1024*1024) {
+		printk(KERN_ALERT "FLASH: kernel bug...flash list header addr above 4GB\n");
+		spin_unlock(&rtas_data_buf_lock);
+		return;
+	}
+
+	printk(KERN_ALERT "FLASH: preparing saved firmware image for flash\n");
+	/* Update the block_list in place. */
+	rtas_firmware_flash_list = NULL; /* too hard to backout on error */
+	image_size = 0;
+	for (f = flist; f; f = next) {
+		/* Translate data addrs to absolute */
+		for (i = 0; i < f->num_blocks; i++) {
+			f->blocks[i].data = (char *)cpu_to_be64(__pa(f->blocks[i].data));
+			image_size += f->blocks[i].length;
+			f->blocks[i].length = cpu_to_be64(f->blocks[i].length);
+		}
+		next = f->next;
+		/* Don't translate NULL pointer for last entry */
+		if (f->next)
+			f->next = (struct flash_block_list *)cpu_to_be64(__pa(f->next));
+		else
+			f->next = NULL;
+		/* make num_blocks into the version/length field */
+		f->num_blocks = (FLASH_BLOCK_LIST_VERSION << 56) | ((f->num_blocks+1)*16);
+		f->num_blocks = cpu_to_be64(f->num_blocks);
+	}
+
+	printk(KERN_ALERT "FLASH: flash image is %ld bytes\n", image_size);
+	printk(KERN_ALERT "FLASH: performing flash and reboot\n");
+	rtas_progress("Flashing        \n", 0x0);
+	rtas_progress("Please Wait...  ", 0x0);
+	printk(KERN_ALERT "FLASH: this will take several minutes.  Do not power off!\n");
+	status = rtas_call(update_token, 1, 1, NULL, rtas_block_list);
+	switch (status) {	/* should only get "bad" status */
+	    case 0:
+		printk(KERN_ALERT "FLASH: success\n");
+		break;
+	    case -1:
+		printk(KERN_ALERT "FLASH: hardware error.  Firmware may not be not flashed\n");
+		break;
+	    case -3:
+		printk(KERN_ALERT "FLASH: image is corrupt or not correct for this platform.  Firmware not flashed\n");
+		break;
+	    case -4:
+		printk(KERN_ALERT "FLASH: flash failed when partially complete.  System may not reboot\n");
+		break;
+	    default:
+		printk(KERN_ALERT "FLASH: unknown flash return code %d\n", status);
+		break;
+	}
+	spin_unlock(&rtas_data_buf_lock);
+}
+
+/*
+ * Manifest of proc files to create
+ */
+struct rtas_flash_file {
+	const char *filename;
+	const rtas_fn_handle_t handle;
+	int *status;
+	const struct proc_ops ops;
+};
+
+static const struct rtas_flash_file rtas_flash_files[] = {
+	{
+		.filename	= "powerpc/rtas/" FIRMWARE_FLASH_NAME,
+		.handle		= RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT,
+		.status		= &rtas_update_flash_data.status,
+		.ops.proc_read	= rtas_flash_read_msg,
+		.ops.proc_write	= rtas_flash_write,
+		.ops.proc_release = rtas_flash_release,
+		.ops.proc_lseek	= default_llseek,
+	},
+	{
+		.filename	= "powerpc/rtas/" FIRMWARE_UPDATE_NAME,
+		.handle		= RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT,
+		.status		= &rtas_update_flash_data.status,
+		.ops.proc_read	= rtas_flash_read_num,
+		.ops.proc_write	= rtas_flash_write,
+		.ops.proc_release = rtas_flash_release,
+		.ops.proc_lseek	= default_llseek,
+	},
+	{
+		.filename	= "powerpc/rtas/" VALIDATE_FLASH_NAME,
+		.handle		= RTAS_FN_IBM_VALIDATE_FLASH_IMAGE,
+		.status		= &rtas_validate_flash_data.status,
+		.ops.proc_read	= validate_flash_read,
+		.ops.proc_write	= validate_flash_write,
+		.ops.proc_release = validate_flash_release,
+		.ops.proc_lseek	= default_llseek,
+	},
+	{
+		.filename	= "powerpc/rtas/" MANAGE_FLASH_NAME,
+		.handle		= RTAS_FN_IBM_MANAGE_FLASH_IMAGE,
+		.status		= &rtas_manage_flash_data.status,
+		.ops.proc_read	= manage_flash_read,
+		.ops.proc_write	= manage_flash_write,
+		.ops.proc_lseek	= default_llseek,
+	}
+};
+
+static int __init rtas_flash_init(void)
+{
+	int i;
+
+	if (rtas_function_token(RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT) == RTAS_UNKNOWN_SERVICE) {
+		pr_info("rtas_flash: no firmware flash support\n");
+		return -EINVAL;
+	}
+
+	rtas_validate_flash_data.buf = kzalloc(VALIDATE_BUF_SIZE, GFP_KERNEL);
+	if (!rtas_validate_flash_data.buf)
+		return -ENOMEM;
+
+	flash_block_cache = kmem_cache_create_usercopy("rtas_flash_cache",
+						       RTAS_BLK_SIZE, RTAS_BLK_SIZE,
+						       0, 0, RTAS_BLK_SIZE, NULL);
+	if (!flash_block_cache) {
+		printk(KERN_ERR "%s: failed to create block cache\n",
+				__func__);
+		goto enomem_buf;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(rtas_flash_files); i++) {
+		const struct rtas_flash_file *f = &rtas_flash_files[i];
+		int token;
+
+		if (!proc_create(f->filename, 0600, NULL, &f->ops))
+			goto enomem;
+
+		/*
+		 * This code assumes that the status int is the first member of the
+		 * struct
+		 */
+		token = rtas_function_token(f->handle);
+		if (token == RTAS_UNKNOWN_SERVICE)
+			*f->status = FLASH_AUTH;
+		else
+			*f->status = FLASH_NO_OP;
+	}
+
+	rtas_flash_term_hook = rtas_flash_firmware;
+	return 0;
+
+enomem:
+	while (--i >= 0) {
+		const struct rtas_flash_file *f = &rtas_flash_files[i];
+		remove_proc_entry(f->filename, NULL);
+	}
+
+	kmem_cache_destroy(flash_block_cache);
+enomem_buf:
+	kfree(rtas_validate_flash_data.buf);
+	return -ENOMEM;
+}
+
+static void __exit rtas_flash_cleanup(void)
+{
+	int i;
+
+	rtas_flash_term_hook = NULL;
+
+	if (rtas_firmware_flash_list) {
+		free_flash_list(rtas_firmware_flash_list);
+		rtas_firmware_flash_list = NULL;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(rtas_flash_files); i++) {
+		const struct rtas_flash_file *f = &rtas_flash_files[i];
+		remove_proc_entry(f->filename, NULL);
+	}
+
+	kmem_cache_destroy(flash_block_cache);
+	kfree(rtas_validate_flash_data.buf);
+}
+
+module_init(rtas_flash_init);
+module_exit(rtas_flash_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
new file mode 100644
index 0000000000..e1fdc7473b
--- /dev/null
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * RTAS specific routines for PCI.
+ *
+ * Based on code from pci.c, chrp_pci.c and pSeries_pci.c
+ */
+
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/pgtable.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/rtas.h>
+#include <asm/mpic.h>
+#include <asm/ppc-pci.h>
+#include <asm/eeh.h>
+
+/* RTAS tokens */
+static int read_pci_config;
+static int write_pci_config;
+static int ibm_read_pci_config;
+static int ibm_write_pci_config;
+
+static inline int config_access_valid(struct pci_dn *dn, int where)
+{
+	if (where < 256)
+		return 1;
+	if (where < 4096 && dn->pci_ext_config_space)
+		return 1;
+
+	return 0;
+}
+
+int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
+{
+	int returnval = -1;
+	unsigned long buid, addr;
+	int ret;
+
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	if (!config_access_valid(pdn, where))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+#ifdef CONFIG_EEH
+	if (pdn->edev && pdn->edev->pe &&
+	    (pdn->edev->pe->state & EEH_PE_CFG_BLOCKED))
+		return PCIBIOS_SET_FAILED;
+#endif
+
+	addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
+	buid = pdn->phb->buid;
+	if (buid) {
+		ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval,
+				addr, BUID_HI(buid), BUID_LO(buid), size);
+	} else {
+		ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size);
+	}
+	*val = returnval;
+
+	if (ret)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_pci_read_config(struct pci_bus *bus,
+				unsigned int devfn,
+				int where, int size, u32 *val)
+{
+	struct pci_dn *pdn;
+	int ret;
+
+	*val = 0xFFFFFFFF;
+
+	pdn = pci_get_pdn_by_devfn(bus, devfn);
+
+	/* Validity of pdn is checked in here */
+	ret = rtas_read_config(pdn, where, size, val);
+	if (*val == EEH_IO_ERROR_VALUE(size) &&
+	    eeh_dev_check_failure(pdn_to_eeh_dev(pdn)))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	return ret;
+}
+
+int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
+{
+	unsigned long buid, addr;
+	int ret;
+
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	if (!config_access_valid(pdn, where))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+#ifdef CONFIG_EEH
+	if (pdn->edev && pdn->edev->pe &&
+	    (pdn->edev->pe->state & EEH_PE_CFG_BLOCKED))
+		return PCIBIOS_SET_FAILED;
+#endif
+
+	addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
+	buid = pdn->phb->buid;
+	if (buid) {
+		ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr,
+			BUID_HI(buid), BUID_LO(buid), size, (ulong) val);
+	} else {
+		ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val);
+	}
+
+	if (ret)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_pci_write_config(struct pci_bus *bus,
+				 unsigned int devfn,
+				 int where, int size, u32 val)
+{
+	struct pci_dn *pdn;
+
+	pdn = pci_get_pdn_by_devfn(bus, devfn);
+
+	/* Validity of pdn is checked in here. */
+	return rtas_write_config(pdn, where, size, val);
+}
+
+static struct pci_ops rtas_pci_ops = {
+	.read = rtas_pci_read_config,
+	.write = rtas_pci_write_config,
+};
+
+static int is_python(struct device_node *dev)
+{
+	const char *model = of_get_property(dev, "model", NULL);
+
+	if (model && strstr(model, "Python"))
+		return 1;
+
+	return 0;
+}
+
+static void python_countermeasures(struct device_node *dev)
+{
+	struct resource registers;
+	void __iomem *chip_regs;
+	volatile u32 val;
+
+	if (of_address_to_resource(dev, 0, &registers)) {
+		printk(KERN_ERR "Can't get address for Python workarounds !\n");
+		return;
+	}
+
+	/* Python's register file is 1 MB in size. */
+	chip_regs = ioremap(registers.start & ~(0xfffffUL), 0x100000);
+
+	/*
+	 * Firmware doesn't always clear this bit which is critical
+	 * for good performance - Anton
+	 */
+
+#define PRG_CL_RESET_VALID 0x00010000
+
+	val = in_be32(chip_regs + 0xf6030);
+	if (val & PRG_CL_RESET_VALID) {
+		printk(KERN_INFO "Python workaround: ");
+		val &= ~PRG_CL_RESET_VALID;
+		out_be32(chip_regs + 0xf6030, val);
+		/*
+		 * We must read it back for changes to
+		 * take effect
+		 */
+		val = in_be32(chip_regs + 0xf6030);
+		printk("reg0: %x\n", val);
+	}
+
+	iounmap(chip_regs);
+}
+
+void __init init_pci_config_tokens(void)
+{
+	read_pci_config = rtas_function_token(RTAS_FN_READ_PCI_CONFIG);
+	write_pci_config = rtas_function_token(RTAS_FN_WRITE_PCI_CONFIG);
+	ibm_read_pci_config = rtas_function_token(RTAS_FN_IBM_READ_PCI_CONFIG);
+	ibm_write_pci_config = rtas_function_token(RTAS_FN_IBM_WRITE_PCI_CONFIG);
+}
+
+unsigned long get_phb_buid(struct device_node *phb)
+{
+	struct resource r;
+
+	if (ibm_read_pci_config == -1)
+		return 0;
+	if (of_address_to_resource(phb, 0, &r))
+		return 0;
+	return r.start;
+}
+
+static int phb_set_bus_ranges(struct device_node *dev,
+			      struct pci_controller *phb)
+{
+	const __be32 *bus_range;
+	unsigned int len;
+
+	bus_range = of_get_property(dev, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int)) {
+		return 1;
+ 	}
+
+	phb->first_busno = be32_to_cpu(bus_range[0]);
+	phb->last_busno  = be32_to_cpu(bus_range[1]);
+
+	return 0;
+}
+
+int rtas_setup_phb(struct pci_controller *phb)
+{
+	struct device_node *dev = phb->dn;
+
+	if (is_python(dev))
+		python_countermeasures(dev);
+
+	if (phb_set_bus_ranges(dev, phb))
+		return 1;
+
+	phb->ops = &rtas_pci_ops;
+	phb->buid = get_phb_buid(dev);
+
+	return 0;
+}
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
new file mode 100644
index 0000000000..9bba469239
--- /dev/null
+++ b/arch/powerpc/kernel/rtasd.c
@@ -0,0 +1,584 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * Communication to userspace based on kernel/printk.c
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/poll.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/spinlock.h>
+#include <linux/cpu.h>
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include <linux/topology.h>
+
+#include <linux/uaccess.h>
+#include <asm/io.h>
+#include <asm/rtas.h>
+#include <asm/nvram.h>
+#include <linux/atomic.h>
+#include <asm/machdep.h>
+#include <asm/topology.h>
+
+
+static DEFINE_SPINLOCK(rtasd_log_lock);
+
+static DECLARE_WAIT_QUEUE_HEAD(rtas_log_wait);
+
+static char *rtas_log_buf;
+static unsigned long rtas_log_start;
+static unsigned long rtas_log_size;
+
+static int surveillance_timeout = -1;
+
+static unsigned int rtas_error_log_max;
+static unsigned int rtas_error_log_buffer_max;
+
+/* RTAS service tokens */
+static unsigned int event_scan;
+static unsigned int rtas_event_scan_rate;
+
+static bool full_rtas_msgs;
+
+/* Stop logging to nvram after first fatal error */
+static int logging_enabled; /* Until we initialize everything,
+                             * make sure we don't try logging
+                             * anything */
+static int error_log_cnt;
+
+/*
+ * Since we use 32 bit RTAS, the physical address of this must be below
+ * 4G or else bad things happen. Allocate this in the kernel data and
+ * make it big enough.
+ */
+static unsigned char logdata[RTAS_ERROR_LOG_MAX];
+
+static char *rtas_type[] = {
+	"Unknown", "Retry", "TCE Error", "Internal Device Failure",
+	"Timeout", "Data Parity", "Address Parity", "Cache Parity",
+	"Address Invalid", "ECC Uncorrected", "ECC Corrupted",
+};
+
+static char *rtas_event_type(int type)
+{
+	if ((type > 0) && (type < 11))
+		return rtas_type[type];
+
+	switch (type) {
+		case RTAS_TYPE_EPOW:
+			return "EPOW";
+		case RTAS_TYPE_PLATFORM:
+			return "Platform Error";
+		case RTAS_TYPE_IO:
+			return "I/O Event";
+		case RTAS_TYPE_INFO:
+			return "Platform Information Event";
+		case RTAS_TYPE_DEALLOC:
+			return "Resource Deallocation Event";
+		case RTAS_TYPE_DUMP:
+			return "Dump Notification Event";
+		case RTAS_TYPE_PRRN:
+			return "Platform Resource Reassignment Event";
+		case RTAS_TYPE_HOTPLUG:
+			return "Hotplug Event";
+	}
+
+	return rtas_type[0];
+}
+
+/* To see this info, grep RTAS /var/log/messages and each entry
+ * will be collected together with obvious begin/end.
+ * There will be a unique identifier on the begin and end lines.
+ * This will persist across reboots.
+ *
+ * format of error logs returned from RTAS:
+ * bytes	(size)	: contents
+ * --------------------------------------------------------
+ * 0-7		(8)	: rtas_error_log
+ * 8-47		(40)	: extended info
+ * 48-51	(4)	: vendor id
+ * 52-1023 (vendor specific) : location code and debug data
+ */
+static void printk_log_rtas(char *buf, int len)
+{
+
+	int i,j,n = 0;
+	int perline = 16;
+	char buffer[64];
+	char * str = "RTAS event";
+
+	if (full_rtas_msgs) {
+		printk(RTAS_DEBUG "%d -------- %s begin --------\n",
+		       error_log_cnt, str);
+
+		/*
+		 * Print perline bytes on each line, each line will start
+		 * with RTAS and a changing number, so syslogd will
+		 * print lines that are otherwise the same.  Separate every
+		 * 4 bytes with a space.
+		 */
+		for (i = 0; i < len; i++) {
+			j = i % perline;
+			if (j == 0) {
+				memset(buffer, 0, sizeof(buffer));
+				n = sprintf(buffer, "RTAS %d:", i/perline);
+			}
+
+			if ((i % 4) == 0)
+				n += sprintf(buffer+n, " ");
+
+			n += sprintf(buffer+n, "%02x", (unsigned char)buf[i]);
+
+			if (j == (perline-1))
+				printk(KERN_DEBUG "%s\n", buffer);
+		}
+		if ((i % perline) != 0)
+			printk(KERN_DEBUG "%s\n", buffer);
+
+		printk(RTAS_DEBUG "%d -------- %s end ----------\n",
+		       error_log_cnt, str);
+	} else {
+		struct rtas_error_log *errlog = (struct rtas_error_log *)buf;
+
+		printk(RTAS_DEBUG "event: %d, Type: %s (%d), Severity: %d\n",
+		       error_log_cnt,
+		       rtas_event_type(rtas_error_type(errlog)),
+		       rtas_error_type(errlog),
+		       rtas_error_severity(errlog));
+	}
+}
+
+static int log_rtas_len(char * buf)
+{
+	int len;
+	struct rtas_error_log *err;
+	uint32_t extended_log_length;
+
+	/* rtas fixed header */
+	len = 8;
+	err = (struct rtas_error_log *)buf;
+	extended_log_length = rtas_error_extended_log_length(err);
+	if (rtas_error_extended(err) && extended_log_length) {
+
+		/* extended header */
+		len += extended_log_length;
+	}
+
+	if (rtas_error_log_max == 0)
+		rtas_error_log_max = rtas_get_error_log_max();
+
+	if (len > rtas_error_log_max)
+		len = rtas_error_log_max;
+
+	return len;
+}
+
+/*
+ * First write to nvram, if fatal error, that is the only
+ * place we log the info.  The error will be picked up
+ * on the next reboot by rtasd.  If not fatal, run the
+ * method for the type of error.  Currently, only RTAS
+ * errors have methods implemented, but in the future
+ * there might be a need to store data in nvram before a
+ * call to panic().
+ *
+ * XXX We write to nvram periodically, to indicate error has
+ * been written and sync'd, but there is a possibility
+ * that if we don't shutdown correctly, a duplicate error
+ * record will be created on next reboot.
+ */
+void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
+{
+	unsigned long offset;
+	unsigned long s;
+	int len = 0;
+
+	pr_debug("rtasd: logging event\n");
+	if (buf == NULL)
+		return;
+
+	spin_lock_irqsave(&rtasd_log_lock, s);
+
+	/* get length and increase count */
+	switch (err_type & ERR_TYPE_MASK) {
+	case ERR_TYPE_RTAS_LOG:
+		len = log_rtas_len(buf);
+		if (!(err_type & ERR_FLAG_BOOT))
+			error_log_cnt++;
+		break;
+	case ERR_TYPE_KERNEL_PANIC:
+	default:
+		WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
+		spin_unlock_irqrestore(&rtasd_log_lock, s);
+		return;
+	}
+
+#ifdef CONFIG_PPC64
+	/* Write error to NVRAM */
+	if (logging_enabled && !(err_type & ERR_FLAG_BOOT))
+		nvram_write_error_log(buf, len, err_type, error_log_cnt);
+#endif /* CONFIG_PPC64 */
+
+	/*
+	 * rtas errors can occur during boot, and we do want to capture
+	 * those somewhere, even if nvram isn't ready (why not?), and even
+	 * if rtasd isn't ready. Put them into the boot log, at least.
+	 */
+	if ((err_type & ERR_TYPE_MASK) == ERR_TYPE_RTAS_LOG)
+		printk_log_rtas(buf, len);
+
+	/* Check to see if we need to or have stopped logging */
+	if (fatal || !logging_enabled) {
+		logging_enabled = 0;
+		WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
+		spin_unlock_irqrestore(&rtasd_log_lock, s);
+		return;
+	}
+
+	/* call type specific method for error */
+	switch (err_type & ERR_TYPE_MASK) {
+	case ERR_TYPE_RTAS_LOG:
+		offset = rtas_error_log_buffer_max *
+			((rtas_log_start+rtas_log_size) & LOG_NUMBER_MASK);
+
+		/* First copy over sequence number */
+		memcpy(&rtas_log_buf[offset], (void *) &error_log_cnt, sizeof(int));
+
+		/* Second copy over error log data */
+		offset += sizeof(int);
+		memcpy(&rtas_log_buf[offset], buf, len);
+
+		if (rtas_log_size < LOG_NUMBER)
+			rtas_log_size += 1;
+		else
+			rtas_log_start += 1;
+
+		WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
+		spin_unlock_irqrestore(&rtasd_log_lock, s);
+		wake_up_interruptible(&rtas_log_wait);
+		break;
+	case ERR_TYPE_KERNEL_PANIC:
+	default:
+		WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
+		spin_unlock_irqrestore(&rtasd_log_lock, s);
+		return;
+	}
+}
+
+static void handle_rtas_event(const struct rtas_error_log *log)
+{
+	if (!machine_is(pseries))
+		return;
+
+	if (rtas_error_type(log) == RTAS_TYPE_PRRN)
+		pr_info_ratelimited("Platform resource reassignment ignored.\n");
+}
+
+static int rtas_log_open(struct inode * inode, struct file * file)
+{
+	return 0;
+}
+
+static int rtas_log_release(struct inode * inode, struct file * file)
+{
+	return 0;
+}
+
+/* This will check if all events are logged, if they are then, we
+ * know that we can safely clear the events in NVRAM.
+ * Next we'll sit and wait for something else to log.
+ */
+static ssize_t rtas_log_read(struct file * file, char __user * buf,
+			 size_t count, loff_t *ppos)
+{
+	int error;
+	char *tmp;
+	unsigned long s;
+	unsigned long offset;
+
+	if (!buf || count < rtas_error_log_buffer_max)
+		return -EINVAL;
+
+	count = rtas_error_log_buffer_max;
+
+	if (!access_ok(buf, count))
+		return -EFAULT;
+
+	tmp = kmalloc(count, GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&rtasd_log_lock, s);
+
+	/* if it's 0, then we know we got the last one (the one in NVRAM) */
+	while (rtas_log_size == 0) {
+		if (file->f_flags & O_NONBLOCK) {
+			spin_unlock_irqrestore(&rtasd_log_lock, s);
+			error = -EAGAIN;
+			goto out;
+		}
+
+		if (!logging_enabled) {
+			spin_unlock_irqrestore(&rtasd_log_lock, s);
+			error = -ENODATA;
+			goto out;
+		}
+#ifdef CONFIG_PPC64
+		nvram_clear_error_log();
+#endif /* CONFIG_PPC64 */
+
+		spin_unlock_irqrestore(&rtasd_log_lock, s);
+		error = wait_event_interruptible(rtas_log_wait, rtas_log_size);
+		if (error)
+			goto out;
+		spin_lock_irqsave(&rtasd_log_lock, s);
+	}
+
+	offset = rtas_error_log_buffer_max * (rtas_log_start & LOG_NUMBER_MASK);
+	memcpy(tmp, &rtas_log_buf[offset], count);
+
+	rtas_log_start += 1;
+	rtas_log_size -= 1;
+	spin_unlock_irqrestore(&rtasd_log_lock, s);
+
+	error = copy_to_user(buf, tmp, count) ? -EFAULT : count;
+out:
+	kfree(tmp);
+	return error;
+}
+
+static __poll_t rtas_log_poll(struct file *file, poll_table * wait)
+{
+	poll_wait(file, &rtas_log_wait, wait);
+	if (rtas_log_size)
+		return EPOLLIN | EPOLLRDNORM;
+	return 0;
+}
+
+static const struct proc_ops rtas_log_proc_ops = {
+	.proc_read	= rtas_log_read,
+	.proc_poll	= rtas_log_poll,
+	.proc_open	= rtas_log_open,
+	.proc_release	= rtas_log_release,
+	.proc_lseek	= noop_llseek,
+};
+
+static int enable_surveillance(int timeout)
+{
+	int error;
+
+	error = rtas_set_indicator(SURVEILLANCE_TOKEN, 0, timeout);
+
+	if (error == 0)
+		return 0;
+
+	if (error == -EINVAL) {
+		printk(KERN_DEBUG "rtasd: surveillance not supported\n");
+		return 0;
+	}
+
+	printk(KERN_ERR "rtasd: could not update surveillance\n");
+	return -1;
+}
+
+static void do_event_scan(void)
+{
+	int error;
+	do {
+		memset(logdata, 0, rtas_error_log_max);
+		error = rtas_call(event_scan, 4, 1, NULL,
+				  RTAS_EVENT_SCAN_ALL_EVENTS, 0,
+				  __pa(logdata), rtas_error_log_max);
+		if (error == -1) {
+			printk(KERN_ERR "event-scan failed\n");
+			break;
+		}
+
+		if (error == 0) {
+			if (rtas_error_type((struct rtas_error_log *)logdata) !=
+			    RTAS_TYPE_PRRN)
+				pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG,
+						  0);
+			handle_rtas_event((struct rtas_error_log *)logdata);
+		}
+
+	} while(error == 0);
+}
+
+static void rtas_event_scan(struct work_struct *w);
+static DECLARE_DELAYED_WORK(event_scan_work, rtas_event_scan);
+
+/*
+ * Delay should be at least one second since some machines have problems if
+ * we call event-scan too quickly.
+ */
+static unsigned long event_scan_delay = 1*HZ;
+static int first_pass = 1;
+
+static void rtas_event_scan(struct work_struct *w)
+{
+	unsigned int cpu;
+
+	do_event_scan();
+
+	cpus_read_lock();
+
+	/* raw_ OK because just using CPU as starting point. */
+	cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
+        if (cpu >= nr_cpu_ids) {
+		cpu = cpumask_first(cpu_online_mask);
+
+		if (first_pass) {
+			first_pass = 0;
+			event_scan_delay = 30*HZ/rtas_event_scan_rate;
+
+			if (surveillance_timeout != -1) {
+				pr_debug("rtasd: enabling surveillance\n");
+				enable_surveillance(surveillance_timeout);
+				pr_debug("rtasd: surveillance enabled\n");
+			}
+		}
+	}
+
+	schedule_delayed_work_on(cpu, &event_scan_work,
+		__round_jiffies_relative(event_scan_delay, cpu));
+
+	cpus_read_unlock();
+}
+
+#ifdef CONFIG_PPC64
+static void __init retrieve_nvram_error_log(void)
+{
+	unsigned int err_type ;
+	int rc ;
+
+	/* See if we have any error stored in NVRAM */
+	memset(logdata, 0, rtas_error_log_max);
+	rc = nvram_read_error_log(logdata, rtas_error_log_max,
+	                          &err_type, &error_log_cnt);
+	/* We can use rtas_log_buf now */
+	logging_enabled = 1;
+	if (!rc) {
+		if (err_type != ERR_FLAG_ALREADY_LOGGED) {
+			pSeries_log_error(logdata, err_type | ERR_FLAG_BOOT, 0);
+		}
+	}
+}
+#else /* CONFIG_PPC64 */
+static void __init retrieve_nvram_error_log(void)
+{
+}
+#endif /* CONFIG_PPC64 */
+
+static void __init start_event_scan(void)
+{
+	printk(KERN_DEBUG "RTAS daemon started\n");
+	pr_debug("rtasd: will sleep for %d milliseconds\n",
+		 (30000 / rtas_event_scan_rate));
+
+	/* Retrieve errors from nvram if any */
+	retrieve_nvram_error_log();
+
+	schedule_delayed_work_on(cpumask_first(cpu_online_mask),
+				 &event_scan_work, event_scan_delay);
+}
+
+/* Cancel the rtas event scan work */
+void rtas_cancel_event_scan(void)
+{
+	cancel_delayed_work_sync(&event_scan_work);
+}
+EXPORT_SYMBOL_GPL(rtas_cancel_event_scan);
+
+static int __init rtas_event_scan_init(void)
+{
+	int err;
+
+	if (!machine_is(pseries) && !machine_is(chrp))
+		return 0;
+
+	/* No RTAS */
+	event_scan = rtas_function_token(RTAS_FN_EVENT_SCAN);
+	if (event_scan == RTAS_UNKNOWN_SERVICE) {
+		printk(KERN_INFO "rtasd: No event-scan on system\n");
+		return -ENODEV;
+	}
+
+	err = of_property_read_u32(rtas.dev, "rtas-event-scan-rate", &rtas_event_scan_rate);
+	if (err) {
+		printk(KERN_ERR "rtasd: no rtas-event-scan-rate on system\n");
+		return -ENODEV;
+	}
+
+	if (!rtas_event_scan_rate) {
+		/* Broken firmware: take a rate of zero to mean don't scan */
+		printk(KERN_DEBUG "rtasd: scan rate is 0, not scanning\n");
+		return 0;
+	}
+
+	/* Make room for the sequence number */
+	rtas_error_log_max = rtas_get_error_log_max();
+	rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int);
+
+	rtas_log_buf = vmalloc(array_size(LOG_NUMBER,
+					  rtas_error_log_buffer_max));
+	if (!rtas_log_buf) {
+		printk(KERN_ERR "rtasd: no memory\n");
+		return -ENOMEM;
+	}
+
+	start_event_scan();
+
+	return 0;
+}
+arch_initcall(rtas_event_scan_init);
+
+static int __init rtas_init(void)
+{
+	struct proc_dir_entry *entry;
+
+	if (!machine_is(pseries) && !machine_is(chrp))
+		return 0;
+
+	if (!rtas_log_buf)
+		return -ENODEV;
+
+	entry = proc_create("powerpc/rtas/error_log", 0400, NULL,
+			    &rtas_log_proc_ops);
+	if (!entry)
+		printk(KERN_ERR "Failed to create error_log proc entry\n");
+
+	return 0;
+}
+__initcall(rtas_init);
+
+static int __init surveillance_setup(char *str)
+{
+	int i;
+
+	/* We only do surveillance on pseries */
+	if (!machine_is(pseries))
+		return 0;
+
+	if (get_option(&str,&i)) {
+		if (i >= 0 && i <= 255)
+			surveillance_timeout = i;
+	}
+
+	return 1;
+}
+__setup("surveillance=", surveillance_setup);
+
+static int __init rtasmsgs_setup(char *str)
+{
+	return (kstrtobool(str, &full_rtas_msgs) == 0);
+}
+__setup("rtasmsgs=", rtasmsgs_setup);
diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c
new file mode 100644
index 0000000000..f9af305d95
--- /dev/null
+++ b/arch/powerpc/kernel/secure_boot.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ */
+#include <linux/types.h>
+#include <linux/of.h>
+#include <asm/secure_boot.h>
+
+static struct device_node *get_ppc_fw_sb_node(void)
+{
+	static const struct of_device_id ids[] = {
+		{ .compatible = "ibm,secureboot", },
+		{ .compatible = "ibm,secureboot-v1", },
+		{ .compatible = "ibm,secureboot-v2", },
+		{},
+	};
+
+	return of_find_matching_node(NULL, ids);
+}
+
+bool is_ppc_secureboot_enabled(void)
+{
+	struct device_node *node;
+	bool enabled = false;
+	u32 secureboot;
+
+	node = get_ppc_fw_sb_node();
+	enabled = of_property_read_bool(node, "os-secureboot-enforcing");
+	of_node_put(node);
+
+	if (enabled)
+		goto out;
+
+	if (!of_property_read_u32(of_root, "ibm,secure-boot", &secureboot))
+		enabled = (secureboot > 1);
+
+out:
+	pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled");
+
+	return enabled;
+}
+
+bool is_ppc_trustedboot_enabled(void)
+{
+	struct device_node *node;
+	bool enabled = false;
+	u32 trustedboot;
+
+	node = get_ppc_fw_sb_node();
+	enabled = of_property_read_bool(node, "trusted-enabled");
+	of_node_put(node);
+
+	if (enabled)
+		goto out;
+
+	if (!of_property_read_u32(of_root, "ibm,trusted-boot", &trustedboot))
+		enabled = (trustedboot > 0);
+
+out:
+	pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled");
+
+	return enabled;
+}
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
new file mode 100644
index 0000000000..4856e1a516
--- /dev/null
+++ b/arch/powerpc/kernel/security.c
@@ -0,0 +1,866 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Security related flags and so on.
+//
+// Copyright 2018, Michael Ellerman, IBM Corporation.
+
+#include <linux/cpu.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/memblock.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
+#include <linux/seq_buf.h>
+#include <linux/debugfs.h>
+
+#include <asm/asm-prototypes.h>
+#include <asm/code-patching.h>
+#include <asm/security_features.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/inst.h>
+
+#include "setup.h"
+
+u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
+
+enum branch_cache_flush_type {
+	BRANCH_CACHE_FLUSH_NONE	= 0x1,
+	BRANCH_CACHE_FLUSH_SW	= 0x2,
+	BRANCH_CACHE_FLUSH_HW	= 0x4,
+};
+static enum branch_cache_flush_type count_cache_flush_type = BRANCH_CACHE_FLUSH_NONE;
+static enum branch_cache_flush_type link_stack_flush_type = BRANCH_CACHE_FLUSH_NONE;
+
+bool barrier_nospec_enabled;
+static bool no_nospec;
+static bool btb_flush_enabled;
+#if defined(CONFIG_PPC_E500) || defined(CONFIG_PPC_BOOK3S_64)
+static bool no_spectrev2;
+#endif
+
+static void enable_barrier_nospec(bool enable)
+{
+	barrier_nospec_enabled = enable;
+	do_barrier_nospec_fixups(enable);
+}
+
+void __init setup_barrier_nospec(void)
+{
+	bool enable;
+
+	/*
+	 * It would make sense to check SEC_FTR_SPEC_BAR_ORI31 below as well.
+	 * But there's a good reason not to. The two flags we check below are
+	 * both are enabled by default in the kernel, so if the hcall is not
+	 * functional they will be enabled.
+	 * On a system where the host firmware has been updated (so the ori
+	 * functions as a barrier), but on which the hypervisor (KVM/Qemu) has
+	 * not been updated, we would like to enable the barrier. Dropping the
+	 * check for SEC_FTR_SPEC_BAR_ORI31 achieves that. The only downside is
+	 * we potentially enable the barrier on systems where the host firmware
+	 * is not updated, but that's harmless as it's a no-op.
+	 */
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+		 security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR);
+
+	if (!no_nospec && !cpu_mitigations_off())
+		enable_barrier_nospec(enable);
+}
+
+static int __init handle_nospectre_v1(char *p)
+{
+	no_nospec = true;
+
+	return 0;
+}
+early_param("nospectre_v1", handle_nospectre_v1);
+
+#ifdef CONFIG_DEBUG_FS
+static int barrier_nospec_set(void *data, u64 val)
+{
+	switch (val) {
+	case 0:
+	case 1:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (!!val == !!barrier_nospec_enabled)
+		return 0;
+
+	enable_barrier_nospec(!!val);
+
+	return 0;
+}
+
+static int barrier_nospec_get(void *data, u64 *val)
+{
+	*val = barrier_nospec_enabled ? 1 : 0;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_barrier_nospec, barrier_nospec_get,
+			 barrier_nospec_set, "%llu\n");
+
+static __init int barrier_nospec_debugfs_init(void)
+{
+	debugfs_create_file_unsafe("barrier_nospec", 0600,
+				   arch_debugfs_dir, NULL,
+				   &fops_barrier_nospec);
+	return 0;
+}
+device_initcall(barrier_nospec_debugfs_init);
+
+static __init int security_feature_debugfs_init(void)
+{
+	debugfs_create_x64("security_features", 0400, arch_debugfs_dir,
+			   &powerpc_security_features);
+	return 0;
+}
+device_initcall(security_feature_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
+
+#if defined(CONFIG_PPC_E500) || defined(CONFIG_PPC_BOOK3S_64)
+static int __init handle_nospectre_v2(char *p)
+{
+	no_spectrev2 = true;
+
+	return 0;
+}
+early_param("nospectre_v2", handle_nospectre_v2);
+#endif /* CONFIG_PPC_E500 || CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_E500
+void __init setup_spectre_v2(void)
+{
+	if (no_spectrev2 || cpu_mitigations_off())
+		do_btb_flush_fixups();
+	else
+		btb_flush_enabled = true;
+}
+#endif /* CONFIG_PPC_E500 */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bool thread_priv;
+
+	thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV);
+
+	if (rfi_flush) {
+		struct seq_buf s;
+		seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+		seq_buf_printf(&s, "Mitigation: RFI Flush");
+		if (thread_priv)
+			seq_buf_printf(&s, ", L1D private per thread");
+
+		seq_buf_printf(&s, "\n");
+
+		return s.len;
+	}
+
+	if (thread_priv)
+		return sprintf(buf, "Vulnerable: L1D private per thread\n");
+
+	if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
+	    !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
+		return sprintf(buf, "Not affected\n");
+
+	return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return cpu_show_meltdown(dev, attr, buf);
+}
+#endif
+
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct seq_buf s;
+
+	seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+	if (security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR)) {
+		if (barrier_nospec_enabled)
+			seq_buf_printf(&s, "Mitigation: __user pointer sanitization");
+		else
+			seq_buf_printf(&s, "Vulnerable");
+
+		if (security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31))
+			seq_buf_printf(&s, ", ori31 speculation barrier enabled");
+
+		seq_buf_printf(&s, "\n");
+	} else
+		seq_buf_printf(&s, "Not affected\n");
+
+	return s.len;
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct seq_buf s;
+	bool bcs, ccd;
+
+	seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+	bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED);
+	ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED);
+
+	if (bcs || ccd) {
+		seq_buf_printf(&s, "Mitigation: ");
+
+		if (bcs)
+			seq_buf_printf(&s, "Indirect branch serialisation (kernel only)");
+
+		if (bcs && ccd)
+			seq_buf_printf(&s, ", ");
+
+		if (ccd)
+			seq_buf_printf(&s, "Indirect branch cache disabled");
+
+	} else if (count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) {
+		seq_buf_printf(&s, "Mitigation: Software count cache flush");
+
+		if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW)
+			seq_buf_printf(&s, " (hardware accelerated)");
+
+	} else if (btb_flush_enabled) {
+		seq_buf_printf(&s, "Mitigation: Branch predictor state flush");
+	} else {
+		seq_buf_printf(&s, "Vulnerable");
+	}
+
+	if (bcs || ccd || count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) {
+		if (link_stack_flush_type != BRANCH_CACHE_FLUSH_NONE)
+			seq_buf_printf(&s, ", Software link stack flush");
+		if (link_stack_flush_type == BRANCH_CACHE_FLUSH_HW)
+			seq_buf_printf(&s, " (hardware accelerated)");
+	}
+
+	seq_buf_printf(&s, "\n");
+
+	return s.len;
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * Store-forwarding barrier support.
+ */
+
+static enum stf_barrier_type stf_enabled_flush_types;
+static bool no_stf_barrier;
+static bool stf_barrier;
+
+static int __init handle_no_stf_barrier(char *p)
+{
+	pr_info("stf-barrier: disabled on command line.");
+	no_stf_barrier = true;
+	return 0;
+}
+
+early_param("no_stf_barrier", handle_no_stf_barrier);
+
+enum stf_barrier_type stf_barrier_type_get(void)
+{
+	return stf_enabled_flush_types;
+}
+
+/* This is the generic flag used by other architectures */
+static int __init handle_ssbd(char *p)
+{
+	if (!p || strncmp(p, "auto", 5) == 0 || strncmp(p, "on", 2) == 0 ) {
+		/* Until firmware tells us, we have the barrier with auto */
+		return 0;
+	} else if (strncmp(p, "off", 3) == 0) {
+		handle_no_stf_barrier(NULL);
+		return 0;
+	} else
+		return 1;
+
+	return 0;
+}
+early_param("spec_store_bypass_disable", handle_ssbd);
+
+/* This is the generic flag used by other architectures */
+static int __init handle_no_ssbd(char *p)
+{
+	handle_no_stf_barrier(NULL);
+	return 0;
+}
+early_param("nospec_store_bypass_disable", handle_no_ssbd);
+
+static void stf_barrier_enable(bool enable)
+{
+	if (enable)
+		do_stf_barrier_fixups(stf_enabled_flush_types);
+	else
+		do_stf_barrier_fixups(STF_BARRIER_NONE);
+
+	stf_barrier = enable;
+}
+
+void setup_stf_barrier(void)
+{
+	enum stf_barrier_type type;
+	bool enable;
+
+	/* Default to fallback in case fw-features are not available */
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		type = STF_BARRIER_EIEIO;
+	else if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		type = STF_BARRIER_SYNC_ORI;
+	else if (cpu_has_feature(CPU_FTR_ARCH_206))
+		type = STF_BARRIER_FALLBACK;
+	else
+		type = STF_BARRIER_NONE;
+
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+		 security_ftr_enabled(SEC_FTR_STF_BARRIER);
+
+	if (type == STF_BARRIER_FALLBACK) {
+		pr_info("stf-barrier: fallback barrier available\n");
+	} else if (type == STF_BARRIER_SYNC_ORI) {
+		pr_info("stf-barrier: hwsync barrier available\n");
+	} else if (type == STF_BARRIER_EIEIO) {
+		pr_info("stf-barrier: eieio barrier available\n");
+	}
+
+	stf_enabled_flush_types = type;
+
+	if (!no_stf_barrier && !cpu_mitigations_off())
+		stf_barrier_enable(enable);
+}
+
+ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	if (stf_barrier && stf_enabled_flush_types != STF_BARRIER_NONE) {
+		const char *type;
+		switch (stf_enabled_flush_types) {
+		case STF_BARRIER_EIEIO:
+			type = "eieio";
+			break;
+		case STF_BARRIER_SYNC_ORI:
+			type = "hwsync";
+			break;
+		case STF_BARRIER_FALLBACK:
+			type = "fallback";
+			break;
+		default:
+			type = "unknown";
+		}
+		return sprintf(buf, "Mitigation: Kernel entry/exit barrier (%s)\n", type);
+	}
+
+	if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
+	    !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
+		return sprintf(buf, "Not affected\n");
+
+	return sprintf(buf, "Vulnerable\n");
+}
+
+static int ssb_prctl_get(struct task_struct *task)
+{
+	/*
+	 * The STF_BARRIER feature is on by default, so if it's off that means
+	 * firmware has explicitly said the CPU is not vulnerable via either
+	 * the hypercall or device tree.
+	 */
+	if (!security_ftr_enabled(SEC_FTR_STF_BARRIER))
+		return PR_SPEC_NOT_AFFECTED;
+
+	/*
+	 * If the system's CPU has no known barrier (see setup_stf_barrier())
+	 * then assume that the CPU is not vulnerable.
+	 */
+	if (stf_enabled_flush_types == STF_BARRIER_NONE)
+		return PR_SPEC_NOT_AFFECTED;
+
+	/*
+	 * Otherwise the CPU is vulnerable. The barrier is not a global or
+	 * per-process mitigation, so the only value that can be reported here
+	 * is PR_SPEC_ENABLE, which appears as "vulnerable" in /proc.
+	 */
+	return PR_SPEC_ENABLE;
+}
+
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+{
+	switch (which) {
+	case PR_SPEC_STORE_BYPASS:
+		return ssb_prctl_get(task);
+	default:
+		return -ENODEV;
+	}
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int stf_barrier_set(void *data, u64 val)
+{
+	bool enable;
+
+	if (val == 1)
+		enable = true;
+	else if (val == 0)
+		enable = false;
+	else
+		return -EINVAL;
+
+	/* Only do anything if we're changing state */
+	if (enable != stf_barrier)
+		stf_barrier_enable(enable);
+
+	return 0;
+}
+
+static int stf_barrier_get(void *data, u64 *val)
+{
+	*val = stf_barrier ? 1 : 0;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set,
+			 "%llu\n");
+
+static __init int stf_barrier_debugfs_init(void)
+{
+	debugfs_create_file_unsafe("stf_barrier", 0600, arch_debugfs_dir,
+				   NULL, &fops_stf_barrier);
+	return 0;
+}
+device_initcall(stf_barrier_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
+
+static void update_branch_cache_flush(void)
+{
+	u32 *site, __maybe_unused *site2;
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	site = &patch__call_kvm_flush_link_stack;
+	site2 = &patch__call_kvm_flush_link_stack_p9;
+	// This controls the branch from guest_exit_cont to kvm_flush_link_stack
+	if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
+		patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
+		patch_instruction_site(site2, ppc_inst(PPC_RAW_NOP()));
+	} else {
+		// Could use HW flush, but that could also flush count cache
+		patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
+		patch_branch_site(site2, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
+	}
+#endif
+
+	// Patch out the bcctr first, then nop the rest
+	site = &patch__call_flush_branch_caches3;
+	patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
+	site = &patch__call_flush_branch_caches2;
+	patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
+	site = &patch__call_flush_branch_caches1;
+	patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
+
+	// This controls the branch from _switch to flush_branch_caches
+	if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE &&
+	    link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
+		// Nothing to be done
+
+	} else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW &&
+		   link_stack_flush_type == BRANCH_CACHE_FLUSH_HW) {
+		// Patch in the bcctr last
+		site = &patch__call_flush_branch_caches1;
+		patch_instruction_site(site, ppc_inst(0x39207fff)); // li r9,0x7fff
+		site = &patch__call_flush_branch_caches2;
+		patch_instruction_site(site, ppc_inst(0x7d2903a6)); // mtctr r9
+		site = &patch__call_flush_branch_caches3;
+		patch_instruction_site(site, ppc_inst(PPC_INST_BCCTR_FLUSH));
+
+	} else {
+		patch_branch_site(site, (u64)&flush_branch_caches, BRANCH_SET_LINK);
+
+		// If we just need to flush the link stack, early return
+		if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) {
+			patch_instruction_site(&patch__flush_link_stack_return,
+					       ppc_inst(PPC_RAW_BLR()));
+
+		// If we have flush instruction, early return
+		} else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW) {
+			patch_instruction_site(&patch__flush_count_cache_return,
+					       ppc_inst(PPC_RAW_BLR()));
+		}
+	}
+}
+
+static void toggle_branch_cache_flush(bool enable)
+{
+	if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) {
+		if (count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE)
+			count_cache_flush_type = BRANCH_CACHE_FLUSH_NONE;
+
+		pr_info("count-cache-flush: flush disabled.\n");
+	} else {
+		if (security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) {
+			count_cache_flush_type = BRANCH_CACHE_FLUSH_HW;
+			pr_info("count-cache-flush: hardware flush enabled.\n");
+		} else {
+			count_cache_flush_type = BRANCH_CACHE_FLUSH_SW;
+			pr_info("count-cache-flush: software flush enabled.\n");
+		}
+	}
+
+	if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK)) {
+		if (link_stack_flush_type != BRANCH_CACHE_FLUSH_NONE)
+			link_stack_flush_type = BRANCH_CACHE_FLUSH_NONE;
+
+		pr_info("link-stack-flush: flush disabled.\n");
+	} else {
+		if (security_ftr_enabled(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST)) {
+			link_stack_flush_type = BRANCH_CACHE_FLUSH_HW;
+			pr_info("link-stack-flush: hardware flush enabled.\n");
+		} else {
+			link_stack_flush_type = BRANCH_CACHE_FLUSH_SW;
+			pr_info("link-stack-flush: software flush enabled.\n");
+		}
+	}
+
+	update_branch_cache_flush();
+}
+
+void setup_count_cache_flush(void)
+{
+	bool enable = true;
+
+	if (no_spectrev2 || cpu_mitigations_off()) {
+		if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED) ||
+		    security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED))
+			pr_warn("Spectre v2 mitigations not fully under software control, can't disable\n");
+
+		enable = false;
+	}
+
+	/*
+	 * There's no firmware feature flag/hypervisor bit to tell us we need to
+	 * flush the link stack on context switch. So we set it here if we see
+	 * either of the Spectre v2 mitigations that aim to protect userspace.
+	 */
+	if (security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED) ||
+	    security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE))
+		security_ftr_set(SEC_FTR_FLUSH_LINK_STACK);
+
+	toggle_branch_cache_flush(enable);
+}
+
+static enum l1d_flush_type enabled_flush_types;
+static void *l1d_flush_fallback_area;
+static bool no_rfi_flush;
+static bool no_entry_flush;
+static bool no_uaccess_flush;
+bool rfi_flush;
+static bool entry_flush;
+static bool uaccess_flush;
+DEFINE_STATIC_KEY_FALSE(uaccess_flush_key);
+EXPORT_SYMBOL(uaccess_flush_key);
+
+static int __init handle_no_rfi_flush(char *p)
+{
+	pr_info("rfi-flush: disabled on command line.");
+	no_rfi_flush = true;
+	return 0;
+}
+early_param("no_rfi_flush", handle_no_rfi_flush);
+
+static int __init handle_no_entry_flush(char *p)
+{
+	pr_info("entry-flush: disabled on command line.");
+	no_entry_flush = true;
+	return 0;
+}
+early_param("no_entry_flush", handle_no_entry_flush);
+
+static int __init handle_no_uaccess_flush(char *p)
+{
+	pr_info("uaccess-flush: disabled on command line.");
+	no_uaccess_flush = true;
+	return 0;
+}
+early_param("no_uaccess_flush", handle_no_uaccess_flush);
+
+/*
+ * The RFI flush is not KPTI, but because users will see doco that says to use
+ * nopti we hijack that option here to also disable the RFI flush.
+ */
+static int __init handle_no_pti(char *p)
+{
+	pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
+	handle_no_rfi_flush(NULL);
+	return 0;
+}
+early_param("nopti", handle_no_pti);
+
+static void do_nothing(void *unused)
+{
+	/*
+	 * We don't need to do the flush explicitly, just enter+exit kernel is
+	 * sufficient, the RFI exit handlers will do the right thing.
+	 */
+}
+
+void rfi_flush_enable(bool enable)
+{
+	if (enable) {
+		do_rfi_flush_fixups(enabled_flush_types);
+		on_each_cpu(do_nothing, NULL, 1);
+	} else
+		do_rfi_flush_fixups(L1D_FLUSH_NONE);
+
+	rfi_flush = enable;
+}
+
+static void entry_flush_enable(bool enable)
+{
+	if (enable) {
+		do_entry_flush_fixups(enabled_flush_types);
+		on_each_cpu(do_nothing, NULL, 1);
+	} else {
+		do_entry_flush_fixups(L1D_FLUSH_NONE);
+	}
+
+	entry_flush = enable;
+}
+
+static void uaccess_flush_enable(bool enable)
+{
+	if (enable) {
+		do_uaccess_flush_fixups(enabled_flush_types);
+		static_branch_enable(&uaccess_flush_key);
+		on_each_cpu(do_nothing, NULL, 1);
+	} else {
+		static_branch_disable(&uaccess_flush_key);
+		do_uaccess_flush_fixups(L1D_FLUSH_NONE);
+	}
+
+	uaccess_flush = enable;
+}
+
+static void __ref init_fallback_flush(void)
+{
+	u64 l1d_size, limit;
+	int cpu;
+
+	/* Only allocate the fallback flush area once (at boot time). */
+	if (l1d_flush_fallback_area)
+		return;
+
+	l1d_size = ppc64_caches.l1d.size;
+
+	/*
+	 * If there is no d-cache-size property in the device tree, l1d_size
+	 * could be zero. That leads to the loop in the asm wrapping around to
+	 * 2^64-1, and then walking off the end of the fallback area and
+	 * eventually causing a page fault which is fatal. Just default to
+	 * something vaguely sane.
+	 */
+	if (!l1d_size)
+		l1d_size = (64 * 1024);
+
+	limit = min(ppc64_bolted_size(), ppc64_rma_size);
+
+	/*
+	 * Align to L1d size, and size it at 2x L1d size, to catch possible
+	 * hardware prefetch runoff. We don't have a recipe for load patterns to
+	 * reliably avoid the prefetcher.
+	 */
+	l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2,
+						l1d_size, MEMBLOCK_LOW_LIMIT,
+						limit, NUMA_NO_NODE);
+	if (!l1d_flush_fallback_area)
+		panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n",
+		      __func__, l1d_size * 2, l1d_size, &limit);
+
+
+	for_each_possible_cpu(cpu) {
+		struct paca_struct *paca = paca_ptrs[cpu];
+		paca->rfi_flush_fallback_area = l1d_flush_fallback_area;
+		paca->l1d_flush_size = l1d_size;
+	}
+}
+
+void setup_rfi_flush(enum l1d_flush_type types, bool enable)
+{
+	if (types & L1D_FLUSH_FALLBACK) {
+		pr_info("rfi-flush: fallback displacement flush available\n");
+		init_fallback_flush();
+	}
+
+	if (types & L1D_FLUSH_ORI)
+		pr_info("rfi-flush: ori type flush available\n");
+
+	if (types & L1D_FLUSH_MTTRIG)
+		pr_info("rfi-flush: mttrig type flush available\n");
+
+	enabled_flush_types = types;
+
+	if (!cpu_mitigations_off() && !no_rfi_flush)
+		rfi_flush_enable(enable);
+}
+
+void setup_entry_flush(bool enable)
+{
+	if (cpu_mitigations_off())
+		return;
+
+	if (!no_entry_flush)
+		entry_flush_enable(enable);
+}
+
+void setup_uaccess_flush(bool enable)
+{
+	if (cpu_mitigations_off())
+		return;
+
+	if (!no_uaccess_flush)
+		uaccess_flush_enable(enable);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int count_cache_flush_set(void *data, u64 val)
+{
+	bool enable;
+
+	if (val == 1)
+		enable = true;
+	else if (val == 0)
+		enable = false;
+	else
+		return -EINVAL;
+
+	toggle_branch_cache_flush(enable);
+
+	return 0;
+}
+
+static int count_cache_flush_get(void *data, u64 *val)
+{
+	if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE)
+		*val = 0;
+	else
+		*val = 1;
+
+	return 0;
+}
+
+static int link_stack_flush_get(void *data, u64 *val)
+{
+	if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE)
+		*val = 0;
+	else
+		*val = 1;
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get,
+			 count_cache_flush_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_link_stack_flush, link_stack_flush_get,
+			 count_cache_flush_set, "%llu\n");
+
+static __init int count_cache_flush_debugfs_init(void)
+{
+	debugfs_create_file_unsafe("count_cache_flush", 0600,
+				   arch_debugfs_dir, NULL,
+				   &fops_count_cache_flush);
+	debugfs_create_file_unsafe("link_stack_flush", 0600,
+				   arch_debugfs_dir, NULL,
+				   &fops_link_stack_flush);
+	return 0;
+}
+device_initcall(count_cache_flush_debugfs_init);
+
+static int rfi_flush_set(void *data, u64 val)
+{
+	bool enable;
+
+	if (val == 1)
+		enable = true;
+	else if (val == 0)
+		enable = false;
+	else
+		return -EINVAL;
+
+	/* Only do anything if we're changing state */
+	if (enable != rfi_flush)
+		rfi_flush_enable(enable);
+
+	return 0;
+}
+
+static int rfi_flush_get(void *data, u64 *val)
+{
+	*val = rfi_flush ? 1 : 0;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
+
+static int entry_flush_set(void *data, u64 val)
+{
+	bool enable;
+
+	if (val == 1)
+		enable = true;
+	else if (val == 0)
+		enable = false;
+	else
+		return -EINVAL;
+
+	/* Only do anything if we're changing state */
+	if (enable != entry_flush)
+		entry_flush_enable(enable);
+
+	return 0;
+}
+
+static int entry_flush_get(void *data, u64 *val)
+{
+	*val = entry_flush ? 1 : 0;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n");
+
+static int uaccess_flush_set(void *data, u64 val)
+{
+	bool enable;
+
+	if (val == 1)
+		enable = true;
+	else if (val == 0)
+		enable = false;
+	else
+		return -EINVAL;
+
+	/* Only do anything if we're changing state */
+	if (enable != uaccess_flush)
+		uaccess_flush_enable(enable);
+
+	return 0;
+}
+
+static int uaccess_flush_get(void *data, u64 *val)
+{
+	*val = uaccess_flush ? 1 : 0;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n");
+
+static __init int rfi_flush_debugfs_init(void)
+{
+	debugfs_create_file("rfi_flush", 0600, arch_debugfs_dir, NULL, &fops_rfi_flush);
+	debugfs_create_file("entry_flush", 0600, arch_debugfs_dir, NULL, &fops_entry_flush);
+	debugfs_create_file("uaccess_flush", 0600, arch_debugfs_dir, NULL, &fops_uaccess_flush);
+	return 0;
+}
+device_initcall(rfi_flush_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
+#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/secvar-ops.c b/arch/powerpc/kernel/secvar-ops.c
new file mode 100644
index 0000000000..19172a2804
--- /dev/null
+++ b/arch/powerpc/kernel/secvar-ops.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ *
+ * This file initializes secvar operations for PowerPC Secureboot
+ */
+
+#include <linux/cache.h>
+#include <asm/secvar.h>
+#include <asm/bug.h>
+
+const struct secvar_operations *secvar_ops __ro_after_init = NULL;
+
+int set_secvar_ops(const struct secvar_operations *ops)
+{
+	if (WARN_ON_ONCE(secvar_ops))
+		return -EBUSY;
+
+	secvar_ops = ops;
+
+	return 0;
+}
diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c
new file mode 100644
index 0000000000..eb3c053f32
--- /dev/null
+++ b/arch/powerpc/kernel/secvar-sysfs.c
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 IBM Corporation <nayna@linux.ibm.com>
+ *
+ * This code exposes secure variables to user via sysfs
+ */
+
+#define pr_fmt(fmt) "secvar-sysfs: "fmt
+
+#include <linux/slab.h>
+#include <linux/compat.h>
+#include <linux/string.h>
+#include <linux/of.h>
+#include <asm/secvar.h>
+
+#define NAME_MAX_SIZE	   1024
+
+static struct kobject *secvar_kobj;
+static struct kset *secvar_kset;
+
+static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr,
+			   char *buf)
+{
+	char tmp[32];
+	ssize_t len = secvar_ops->format(tmp, sizeof(tmp));
+
+	if (len > 0)
+		return sysfs_emit(buf, "%s\n", tmp);
+	else if (len < 0)
+		pr_err("Error %zd reading format string\n", len);
+	else
+		pr_err("Got empty format string from backend\n");
+
+	return -EIO;
+}
+
+
+static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr,
+			 char *buf)
+{
+	u64 dsize;
+	int rc;
+
+	rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize);
+	if (rc) {
+		if (rc != -ENOENT)
+			pr_err("Error retrieving %s variable size %d\n", kobj->name, rc);
+		return rc;
+	}
+
+	return sysfs_emit(buf, "%llu\n", dsize);
+}
+
+static ssize_t data_read(struct file *filep, struct kobject *kobj,
+			 struct bin_attribute *attr, char *buf, loff_t off,
+			 size_t count)
+{
+	char *data;
+	u64 dsize;
+	int rc;
+
+	rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize);
+	if (rc) {
+		if (rc != -ENOENT)
+			pr_err("Error getting %s variable size %d\n", kobj->name, rc);
+		return rc;
+	}
+	pr_debug("dsize is %llu\n", dsize);
+
+	data = kzalloc(dsize, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, data, &dsize);
+	if (rc) {
+		pr_err("Error getting %s variable %d\n", kobj->name, rc);
+		goto data_fail;
+	}
+
+	rc = memory_read_from_buffer(buf, count, &off, data, dsize);
+
+data_fail:
+	kfree(data);
+	return rc;
+}
+
+static ssize_t update_write(struct file *filep, struct kobject *kobj,
+			    struct bin_attribute *attr, char *buf, loff_t off,
+			    size_t count)
+{
+	int rc;
+
+	pr_debug("count is %ld\n", count);
+	rc = secvar_ops->set(kobj->name, strlen(kobj->name) + 1, buf, count);
+	if (rc) {
+		pr_err("Error setting the %s variable %d\n", kobj->name, rc);
+		return rc;
+	}
+
+	return count;
+}
+
+static struct kobj_attribute format_attr = __ATTR_RO(format);
+
+static struct kobj_attribute size_attr = __ATTR_RO(size);
+
+static struct bin_attribute data_attr = __BIN_ATTR_RO(data, 0);
+
+static struct bin_attribute update_attr = __BIN_ATTR_WO(update, 0);
+
+static struct bin_attribute *secvar_bin_attrs[] = {
+	&data_attr,
+	&update_attr,
+	NULL,
+};
+
+static struct attribute *secvar_attrs[] = {
+	&size_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group secvar_attr_group = {
+	.attrs = secvar_attrs,
+	.bin_attrs = secvar_bin_attrs,
+};
+__ATTRIBUTE_GROUPS(secvar_attr);
+
+static struct kobj_type secvar_ktype = {
+	.sysfs_ops	= &kobj_sysfs_ops,
+	.default_groups = secvar_attr_groups,
+};
+
+static int update_kobj_size(void)
+{
+
+	u64 varsize;
+	int rc = secvar_ops->max_size(&varsize);
+
+	if (rc)
+		return rc;
+
+	data_attr.size = varsize;
+	update_attr.size = varsize;
+
+	return 0;
+}
+
+static int secvar_sysfs_config(struct kobject *kobj)
+{
+	struct attribute_group config_group = {
+		.name = "config",
+		.attrs = (struct attribute **)secvar_ops->config_attrs,
+	};
+
+	if (secvar_ops->config_attrs)
+		return sysfs_create_group(kobj, &config_group);
+
+	return 0;
+}
+
+static int add_var(const char *name)
+{
+	struct kobject *kobj;
+	int rc;
+
+	kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
+	if (!kobj)
+		return -ENOMEM;
+
+	kobject_init(kobj, &secvar_ktype);
+
+	rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name);
+	if (rc) {
+		pr_warn("kobject_add error %d for attribute: %s\n", rc,
+			name);
+		kobject_put(kobj);
+		return rc;
+	}
+
+	kobject_uevent(kobj, KOBJ_ADD);
+	return 0;
+}
+
+static int secvar_sysfs_load(void)
+{
+	u64 namesize = 0;
+	char *name;
+	int rc;
+
+	name = kzalloc(NAME_MAX_SIZE, GFP_KERNEL);
+	if (!name)
+		return -ENOMEM;
+
+	do {
+		rc = secvar_ops->get_next(name, &namesize, NAME_MAX_SIZE);
+		if (rc) {
+			if (rc != -ENOENT)
+				pr_err("error getting secvar from firmware %d\n", rc);
+			else
+				rc = 0;
+
+			break;
+		}
+
+		rc = add_var(name);
+	} while (!rc);
+
+	kfree(name);
+	return rc;
+}
+
+static int secvar_sysfs_load_static(void)
+{
+	const char * const *name_ptr = secvar_ops->var_names;
+	int rc;
+
+	while (*name_ptr) {
+		rc = add_var(*name_ptr);
+		if (rc)
+			return rc;
+		name_ptr++;
+	}
+
+	return 0;
+}
+
+static int secvar_sysfs_init(void)
+{
+	u64 max_size;
+	int rc;
+
+	if (!secvar_ops) {
+		pr_warn("Failed to retrieve secvar operations\n");
+		return -ENODEV;
+	}
+
+	secvar_kobj = kobject_create_and_add("secvar", firmware_kobj);
+	if (!secvar_kobj) {
+		pr_err("Failed to create firmware kobj\n");
+		return -ENOMEM;
+	}
+
+	rc = sysfs_create_file(secvar_kobj, &format_attr.attr);
+	if (rc) {
+		pr_err("Failed to create format object\n");
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	secvar_kset = kset_create_and_add("vars", NULL, secvar_kobj);
+	if (!secvar_kset) {
+		pr_err("sysfs kobject registration failed\n");
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	rc = update_kobj_size();
+	if (rc) {
+		pr_err("Cannot read the size of the attribute\n");
+		goto err;
+	}
+
+	rc = secvar_sysfs_config(secvar_kobj);
+	if (rc) {
+		pr_err("Failed to create config directory\n");
+		goto err;
+	}
+
+	if (secvar_ops->get_next)
+		rc = secvar_sysfs_load();
+	else
+		rc = secvar_sysfs_load_static();
+
+	if (rc) {
+		pr_err("Failed to create variable attributes\n");
+		goto err;
+	}
+
+	// Due to sysfs limitations, we will only ever get a write buffer of
+	// up to 1 page in size. Print a warning if this is potentially going
+	// to cause problems, so that the user is aware.
+	secvar_ops->max_size(&max_size);
+	if (max_size > PAGE_SIZE)
+		pr_warn_ratelimited("PAGE_SIZE (%lu) is smaller than maximum object size (%llu), writes are limited to PAGE_SIZE\n",
+				    PAGE_SIZE, max_size);
+
+	return 0;
+err:
+	kobject_put(secvar_kobj);
+	return rc;
+}
+
+late_initcall(secvar_sysfs_init);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
new file mode 100644
index 0000000000..20f72cd1d8
--- /dev/null
+++ b/arch/powerpc/kernel/setup-common.c
@@ -0,0 +1,1013 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Common boot and setup code for both 32-bit and 64-bit.
+ * Extracted from arch/powerpc/kernel/setup_64.c.
+ *
+ * Copyright (C) 2001 PPC64 Team, IBM Corp
+ */
+
+#undef DEBUG
+
+#include <linux/export.h>
+#include <linux/panic_notifier.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/delay.h>
+#include <linux/initrd.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/seq_file.h>
+#include <linux/ioport.h>
+#include <linux/console.h>
+#include <linux/screen_info.h>
+#include <linux/root_dev.h>
+#include <linux/cpu.h>
+#include <linux/unistd.h>
+#include <linux/seq_buf.h>
+#include <linux/serial.h>
+#include <linux/serial_8250.h>
+#include <linux/percpu.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_irq.h>
+#include <linux/hugetlb.h>
+#include <linux/pgtable.h>
+#include <asm/io.h>
+#include <asm/paca.h>
+#include <asm/processor.h>
+#include <asm/vdso_datapage.h>
+#include <asm/smp.h>
+#include <asm/elf.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/cputable.h>
+#include <asm/sections.h>
+#include <asm/firmware.h>
+#include <asm/btext.h>
+#include <asm/nvram.h>
+#include <asm/setup.h>
+#include <asm/rtas.h>
+#include <asm/iommu.h>
+#include <asm/serial.h>
+#include <asm/cache.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/xmon.h>
+#include <asm/cputhreads.h>
+#include <mm/mmu_decl.h>
+#include <asm/archrandom.h>
+#include <asm/fadump.h>
+#include <asm/udbg.h>
+#include <asm/hugetlb.h>
+#include <asm/livepatch.h>
+#include <asm/mmu_context.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/kasan.h>
+#include <asm/mce.h>
+
+#include "setup.h"
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/* The main machine-dep calls structure
+ */
+struct machdep_calls ppc_md;
+EXPORT_SYMBOL(ppc_md);
+struct machdep_calls *machine_id;
+EXPORT_SYMBOL(machine_id);
+
+int boot_cpuid = -1;
+EXPORT_SYMBOL_GPL(boot_cpuid);
+
+#ifdef CONFIG_PPC64
+int boot_cpu_hwid = -1;
+#endif
+
+/*
+ * These are used in binfmt_elf.c to put aux entries on the stack
+ * for each elf executable being started.
+ */
+int dcache_bsize;
+int icache_bsize;
+
+/*
+ * This still seems to be needed... -- paulus
+ */ 
+struct screen_info screen_info = {
+	.orig_x = 0,
+	.orig_y = 25,
+	.orig_video_cols = 80,
+	.orig_video_lines = 25,
+	.orig_video_isVGA = 1,
+	.orig_video_points = 16
+};
+#if defined(CONFIG_FB_VGA16_MODULE)
+EXPORT_SYMBOL(screen_info);
+#endif
+
+/* Variables required to store legacy IO irq routing */
+int of_i8042_kbd_irq;
+EXPORT_SYMBOL_GPL(of_i8042_kbd_irq);
+int of_i8042_aux_irq;
+EXPORT_SYMBOL_GPL(of_i8042_aux_irq);
+
+#ifdef __DO_IRQ_CANON
+/* XXX should go elsewhere eventually */
+int ppc_do_canonicalize_irqs;
+EXPORT_SYMBOL(ppc_do_canonicalize_irqs);
+#endif
+
+#ifdef CONFIG_CRASH_CORE
+/* This keeps a track of which one is the crashing cpu. */
+int crashing_cpu = -1;
+#endif
+
+/* also used by kexec */
+void machine_shutdown(void)
+{
+	/*
+	 * if fadump is active, cleanup the fadump registration before we
+	 * shutdown.
+	 */
+	fadump_cleanup();
+
+	if (ppc_md.machine_shutdown)
+		ppc_md.machine_shutdown();
+}
+
+static void machine_hang(void)
+{
+	pr_emerg("System Halted, OK to turn off power\n");
+	local_irq_disable();
+	while (1)
+		;
+}
+
+void machine_restart(char *cmd)
+{
+	machine_shutdown();
+	if (ppc_md.restart)
+		ppc_md.restart(cmd);
+
+	smp_send_stop();
+
+	do_kernel_restart(cmd);
+	mdelay(1000);
+
+	machine_hang();
+}
+
+void machine_power_off(void)
+{
+	machine_shutdown();
+	do_kernel_power_off();
+	smp_send_stop();
+	machine_hang();
+}
+/* Used by the G5 thermal driver */
+EXPORT_SYMBOL_GPL(machine_power_off);
+
+void (*pm_power_off)(void);
+EXPORT_SYMBOL_GPL(pm_power_off);
+
+size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs)
+{
+	if (max_longs && ppc_md.get_random_seed && ppc_md.get_random_seed(v))
+		return 1;
+	return 0;
+}
+EXPORT_SYMBOL(arch_get_random_seed_longs);
+
+void machine_halt(void)
+{
+	machine_shutdown();
+	if (ppc_md.halt)
+		ppc_md.halt();
+
+	smp_send_stop();
+	machine_hang();
+}
+
+#ifdef CONFIG_SMP
+DEFINE_PER_CPU(unsigned int, cpu_pvr);
+#endif
+
+static void show_cpuinfo_summary(struct seq_file *m)
+{
+	struct device_node *root;
+	const char *model = NULL;
+	unsigned long bogosum = 0;
+	int i;
+
+	if (IS_ENABLED(CONFIG_SMP) && IS_ENABLED(CONFIG_PPC32)) {
+		for_each_online_cpu(i)
+			bogosum += loops_per_jiffy;
+		seq_printf(m, "total bogomips\t: %lu.%02lu\n",
+			   bogosum / (500000 / HZ), bogosum / (5000 / HZ) % 100);
+	}
+	seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq);
+	if (ppc_md.name)
+		seq_printf(m, "platform\t: %s\n", ppc_md.name);
+	root = of_find_node_by_path("/");
+	if (root)
+		model = of_get_property(root, "model", NULL);
+	if (model)
+		seq_printf(m, "model\t\t: %s\n", model);
+	of_node_put(root);
+
+	if (ppc_md.show_cpuinfo != NULL)
+		ppc_md.show_cpuinfo(m);
+
+	/* Display the amount of memory */
+	if (IS_ENABLED(CONFIG_PPC32))
+		seq_printf(m, "Memory\t\t: %d MB\n",
+			   (unsigned int)(total_memory / (1024 * 1024)));
+}
+
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	unsigned long cpu_id = (unsigned long)v - 1;
+	unsigned int pvr;
+	unsigned long proc_freq;
+	unsigned short maj;
+	unsigned short min;
+
+#ifdef CONFIG_SMP
+	pvr = per_cpu(cpu_pvr, cpu_id);
+#else
+	pvr = mfspr(SPRN_PVR);
+#endif
+	maj = (pvr >> 8) & 0xFF;
+	min = pvr & 0xFF;
+
+	seq_printf(m, "processor\t: %lu\ncpu\t\t: ", cpu_id);
+
+	if (cur_cpu_spec->pvr_mask && cur_cpu_spec->cpu_name)
+		seq_puts(m, cur_cpu_spec->cpu_name);
+	else
+		seq_printf(m, "unknown (%08x)", pvr);
+
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		seq_puts(m, ", altivec supported");
+
+	seq_putc(m, '\n');
+
+#ifdef CONFIG_TAU
+	if (cpu_has_feature(CPU_FTR_TAU)) {
+		if (IS_ENABLED(CONFIG_TAU_AVERAGE)) {
+			/* more straightforward, but potentially misleading */
+			seq_printf(m,  "temperature \t: %u C (uncalibrated)\n",
+				   cpu_temp(cpu_id));
+		} else {
+			/* show the actual temp sensor range */
+			u32 temp;
+			temp = cpu_temp_both(cpu_id);
+			seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n",
+				   temp & 0xff, temp >> 16);
+		}
+	}
+#endif /* CONFIG_TAU */
+
+	/*
+	 * Platforms that have variable clock rates, should implement
+	 * the method ppc_md.get_proc_freq() that reports the clock
+	 * rate of a given cpu. The rest can use ppc_proc_freq to
+	 * report the clock rate that is same across all cpus.
+	 */
+	if (ppc_md.get_proc_freq)
+		proc_freq = ppc_md.get_proc_freq(cpu_id);
+	else
+		proc_freq = ppc_proc_freq;
+
+	if (proc_freq)
+		seq_printf(m, "clock\t\t: %lu.%06luMHz\n",
+			   proc_freq / 1000000, proc_freq % 1000000);
+
+	/* If we are a Freescale core do a simple check so
+	 * we don't have to keep adding cases in the future */
+	if (PVR_VER(pvr) & 0x8000) {
+		switch (PVR_VER(pvr)) {
+		case 0x8000:	/* 7441/7450/7451, Voyager */
+		case 0x8001:	/* 7445/7455, Apollo 6 */
+		case 0x8002:	/* 7447/7457, Apollo 7 */
+		case 0x8003:	/* 7447A, Apollo 7 PM */
+		case 0x8004:	/* 7448, Apollo 8 */
+		case 0x800c:	/* 7410, Nitro */
+			maj = ((pvr >> 8) & 0xF);
+			min = PVR_MIN(pvr);
+			break;
+		default:	/* e500/book-e */
+			maj = PVR_MAJ(pvr);
+			min = PVR_MIN(pvr);
+			break;
+		}
+	} else {
+		switch (PVR_VER(pvr)) {
+			case 0x1008:	/* 740P/750P ?? */
+				maj = ((pvr >> 8) & 0xFF) - 1;
+				min = pvr & 0xFF;
+				break;
+			case 0x004e: /* POWER9 bits 12-15 give chip type */
+			case 0x0080: /* POWER10 bit 12 gives SMT8/4 */
+				maj = (pvr >> 8) & 0x0F;
+				min = pvr & 0xFF;
+				break;
+			default:
+				maj = (pvr >> 8) & 0xFF;
+				min = pvr & 0xFF;
+				break;
+		}
+	}
+
+	seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n",
+		   maj, min, PVR_VER(pvr), PVR_REV(pvr));
+
+	if (IS_ENABLED(CONFIG_PPC32))
+		seq_printf(m, "bogomips\t: %lu.%02lu\n", loops_per_jiffy / (500000 / HZ),
+			   (loops_per_jiffy / (5000 / HZ)) % 100);
+
+	seq_putc(m, '\n');
+
+	/* If this is the last cpu, print the summary */
+	if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids)
+		show_cpuinfo_summary(m);
+
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	if (*pos == 0)	/* just in case, cpu 0 is not the first */
+		*pos = cpumask_first(cpu_online_mask);
+	else
+		*pos = cpumask_next(*pos - 1, cpu_online_mask);
+	if ((*pos) < nr_cpu_ids)
+		return (void *)(unsigned long)(*pos + 1);
+	return NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= show_cpuinfo,
+};
+
+void __init check_for_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	DBG(" -> check_for_initrd()  initrd_start=0x%lx  initrd_end=0x%lx\n",
+	    initrd_start, initrd_end);
+
+	/* If we were passed an initrd, set the ROOT_DEV properly if the values
+	 * look sensible. If not, clear initrd reference.
+	 */
+	if (is_kernel_addr(initrd_start) && is_kernel_addr(initrd_end) &&
+	    initrd_end > initrd_start)
+		ROOT_DEV = Root_RAM0;
+	else
+		initrd_start = initrd_end = 0;
+
+	if (initrd_start)
+		pr_info("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end);
+
+	DBG(" <- check_for_initrd()\n");
+#endif /* CONFIG_BLK_DEV_INITRD */
+}
+
+#ifdef CONFIG_SMP
+
+int threads_per_core, threads_per_subcore, threads_shift __read_mostly;
+cpumask_t threads_core_mask __read_mostly;
+EXPORT_SYMBOL_GPL(threads_per_core);
+EXPORT_SYMBOL_GPL(threads_per_subcore);
+EXPORT_SYMBOL_GPL(threads_shift);
+EXPORT_SYMBOL_GPL(threads_core_mask);
+
+static void __init cpu_init_thread_core_maps(int tpc)
+{
+	int i;
+
+	threads_per_core = tpc;
+	threads_per_subcore = tpc;
+	cpumask_clear(&threads_core_mask);
+
+	/* This implementation only supports power of 2 number of threads
+	 * for simplicity and performance
+	 */
+	threads_shift = ilog2(tpc);
+	BUG_ON(tpc != (1 << threads_shift));
+
+	for (i = 0; i < tpc; i++)
+		cpumask_set_cpu(i, &threads_core_mask);
+
+	printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n",
+	       tpc, tpc > 1 ? "s" : "");
+	printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift);
+}
+
+
+u32 *cpu_to_phys_id = NULL;
+
+/**
+ * setup_cpu_maps - initialize the following cpu maps:
+ *                  cpu_possible_mask
+ *                  cpu_present_mask
+ *
+ * Having the possible map set up early allows us to restrict allocations
+ * of things like irqstacks to nr_cpu_ids rather than NR_CPUS.
+ *
+ * We do not initialize the online map here; cpus set their own bits in
+ * cpu_online_mask as they come up.
+ *
+ * This function is valid only for Open Firmware systems.  finish_device_tree
+ * must be called before using this.
+ *
+ * While we're here, we may as well set the "physical" cpu ids in the paca.
+ *
+ * NOTE: This must match the parsing done in early_init_dt_scan_cpus.
+ */
+void __init smp_setup_cpu_maps(void)
+{
+	struct device_node *dn;
+	int cpu = 0;
+	int nthreads = 1;
+
+	DBG("smp_setup_cpu_maps()\n");
+
+	cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32),
+					__alignof__(u32));
+	if (!cpu_to_phys_id)
+		panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
+		      __func__, nr_cpu_ids * sizeof(u32), __alignof__(u32));
+
+	for_each_node_by_type(dn, "cpu") {
+		const __be32 *intserv;
+		__be32 cpu_be;
+		int j, len;
+
+		DBG("  * %pOF...\n", dn);
+
+		intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
+				&len);
+		if (intserv) {
+			DBG("    ibm,ppc-interrupt-server#s -> %lu threads\n",
+			    (len / sizeof(int)));
+		} else {
+			DBG("    no ibm,ppc-interrupt-server#s -> 1 thread\n");
+			intserv = of_get_property(dn, "reg", &len);
+			if (!intserv) {
+				cpu_be = cpu_to_be32(cpu);
+				/* XXX: what is this? uninitialized?? */
+				intserv = &cpu_be;	/* assume logical == phys */
+				len = 4;
+			}
+		}
+
+		nthreads = len / sizeof(int);
+
+		for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
+			bool avail;
+
+			DBG("    thread %d -> cpu %d (hard id %d)\n",
+			    j, cpu, be32_to_cpu(intserv[j]));
+
+			avail = of_device_is_available(dn);
+			if (!avail)
+				avail = !of_property_match_string(dn,
+						"enable-method", "spin-table");
+
+			set_cpu_present(cpu, avail);
+			set_cpu_possible(cpu, true);
+			cpu_to_phys_id[cpu] = be32_to_cpu(intserv[j]);
+			cpu++;
+		}
+
+		if (cpu >= nr_cpu_ids) {
+			of_node_put(dn);
+			break;
+		}
+	}
+
+	/* If no SMT supported, nthreads is forced to 1 */
+	if (!cpu_has_feature(CPU_FTR_SMT)) {
+		DBG("  SMT disabled ! nthreads forced to 1\n");
+		nthreads = 1;
+	}
+
+#ifdef CONFIG_PPC64
+	/*
+	 * On pSeries LPAR, we need to know how many cpus
+	 * could possibly be added to this partition.
+	 */
+	if (firmware_has_feature(FW_FEATURE_LPAR) &&
+	    (dn = of_find_node_by_path("/rtas"))) {
+		int num_addr_cell, num_size_cell, maxcpus;
+		const __be32 *ireg;
+
+		num_addr_cell = of_n_addr_cells(dn);
+		num_size_cell = of_n_size_cells(dn);
+
+		ireg = of_get_property(dn, "ibm,lrdr-capacity", NULL);
+
+		if (!ireg)
+			goto out;
+
+		maxcpus = be32_to_cpup(ireg + num_addr_cell + num_size_cell);
+
+		/* Double maxcpus for processors which have SMT capability */
+		if (cpu_has_feature(CPU_FTR_SMT))
+			maxcpus *= nthreads;
+
+		if (maxcpus > nr_cpu_ids) {
+			printk(KERN_WARNING
+			       "Partition configured for %d cpus, "
+			       "operating system maximum is %u.\n",
+			       maxcpus, nr_cpu_ids);
+			maxcpus = nr_cpu_ids;
+		} else
+			printk(KERN_INFO "Partition configured for %d cpus.\n",
+			       maxcpus);
+
+		for (cpu = 0; cpu < maxcpus; cpu++)
+			set_cpu_possible(cpu, true);
+	out:
+		of_node_put(dn);
+	}
+	vdso_data->processorCount = num_present_cpus();
+#endif /* CONFIG_PPC64 */
+
+        /* Initialize CPU <=> thread mapping/
+	 *
+	 * WARNING: We assume that the number of threads is the same for
+	 * every CPU in the system. If that is not the case, then some code
+	 * here will have to be reworked
+	 */
+	cpu_init_thread_core_maps(nthreads);
+
+	/* Now that possible cpus are set, set nr_cpu_ids for later use */
+	setup_nr_cpu_ids();
+
+	free_unused_pacas();
+}
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_PCSPKR_PLATFORM
+static __init int add_pcspkr(void)
+{
+	struct device_node *np;
+	struct platform_device *pd;
+	int ret;
+
+	np = of_find_compatible_node(NULL, NULL, "pnpPNP,100");
+	of_node_put(np);
+	if (!np)
+		return -ENODEV;
+
+	pd = platform_device_alloc("pcspkr", -1);
+	if (!pd)
+		return -ENOMEM;
+
+	ret = platform_device_add(pd);
+	if (ret)
+		platform_device_put(pd);
+
+	return ret;
+}
+device_initcall(add_pcspkr);
+#endif	/* CONFIG_PCSPKR_PLATFORM */
+
+static char ppc_hw_desc_buf[128] __initdata;
+
+struct seq_buf ppc_hw_desc __initdata = {
+	.buffer = ppc_hw_desc_buf,
+	.size = sizeof(ppc_hw_desc_buf),
+	.len = 0,
+	.readpos = 0,
+};
+
+static __init void probe_machine(void)
+{
+	extern struct machdep_calls __machine_desc_start;
+	extern struct machdep_calls __machine_desc_end;
+	unsigned int i;
+
+	/*
+	 * Iterate all ppc_md structures until we find the proper
+	 * one for the current machine type
+	 */
+	DBG("Probing machine type ...\n");
+
+	/*
+	 * Check ppc_md is empty, if not we have a bug, ie, we setup an
+	 * entry before probe_machine() which will be overwritten
+	 */
+	for (i = 0; i < (sizeof(ppc_md) / sizeof(void *)); i++) {
+		if (((void **)&ppc_md)[i]) {
+			printk(KERN_ERR "Entry %d in ppc_md non empty before"
+			       " machine probe !\n", i);
+		}
+	}
+
+	for (machine_id = &__machine_desc_start;
+	     machine_id < &__machine_desc_end;
+	     machine_id++) {
+		DBG("  %s ...\n", machine_id->name);
+		if (machine_id->compatible && !of_machine_is_compatible(machine_id->compatible))
+			continue;
+		memcpy(&ppc_md, machine_id, sizeof(struct machdep_calls));
+		if (ppc_md.probe && !ppc_md.probe())
+			continue;
+		DBG("   %s match !\n", machine_id->name);
+		break;
+	}
+	/* What can we do if we didn't find ? */
+	if (machine_id >= &__machine_desc_end) {
+		pr_err("No suitable machine description found !\n");
+		for (;;);
+	}
+
+	// Append the machine name to other info we've gathered
+	seq_buf_puts(&ppc_hw_desc, ppc_md.name);
+
+	// Set the generic hardware description shown in oopses
+	dump_stack_set_arch_desc(ppc_hw_desc.buffer);
+
+	pr_info("Hardware name: %s\n", ppc_hw_desc.buffer);
+}
+
+/* Match a class of boards, not a specific device configuration. */
+int check_legacy_ioport(unsigned long base_port)
+{
+	struct device_node *parent, *np = NULL;
+	int ret = -ENODEV;
+
+	switch(base_port) {
+	case I8042_DATA_REG:
+		if (!(np = of_find_compatible_node(NULL, NULL, "pnpPNP,303")))
+			np = of_find_compatible_node(NULL, NULL, "pnpPNP,f03");
+		if (np) {
+			parent = of_get_parent(np);
+
+			of_i8042_kbd_irq = irq_of_parse_and_map(parent, 0);
+			if (!of_i8042_kbd_irq)
+				of_i8042_kbd_irq = 1;
+
+			of_i8042_aux_irq = irq_of_parse_and_map(parent, 1);
+			if (!of_i8042_aux_irq)
+				of_i8042_aux_irq = 12;
+
+			of_node_put(np);
+			np = parent;
+			break;
+		}
+		np = of_find_node_by_type(NULL, "8042");
+		/* Pegasos has no device_type on its 8042 node, look for the
+		 * name instead */
+		if (!np)
+			np = of_find_node_by_name(NULL, "8042");
+		if (np) {
+			of_i8042_kbd_irq = 1;
+			of_i8042_aux_irq = 12;
+		}
+		break;
+	case FDC_BASE: /* FDC1 */
+		np = of_find_node_by_type(NULL, "fdc");
+		break;
+	default:
+		/* ipmi is supposed to fail here */
+		break;
+	}
+	if (!np)
+		return ret;
+	parent = of_get_parent(np);
+	if (parent) {
+		if (of_node_is_type(parent, "isa"))
+			ret = 0;
+		of_node_put(parent);
+	}
+	of_node_put(np);
+	return ret;
+}
+EXPORT_SYMBOL(check_legacy_ioport);
+
+/*
+ * Panic notifiers setup
+ *
+ * We have 3 notifiers for powerpc, each one from a different "nature":
+ *
+ * - ppc_panic_fadump_handler() is a hypervisor notifier, which hard-disables
+ *   IRQs and deal with the Firmware-Assisted dump, when it is configured;
+ *   should run early in the panic path.
+ *
+ * - dump_kernel_offset() is an informative notifier, just showing the KASLR
+ *   offset if we have RANDOMIZE_BASE set.
+ *
+ * - ppc_panic_platform_handler() is a low-level handler that's registered
+ *   only if the platform wishes to perform final actions in the panic path,
+ *   hence it should run late and might not even return. Currently, only
+ *   pseries and ps3 platforms register callbacks.
+ */
+static int ppc_panic_fadump_handler(struct notifier_block *this,
+				    unsigned long event, void *ptr)
+{
+	/*
+	 * panic does a local_irq_disable, but we really
+	 * want interrupts to be hard disabled.
+	 */
+	hard_irq_disable();
+
+	/*
+	 * If firmware-assisted dump has been registered then trigger
+	 * its callback and let the firmware handles everything else.
+	 */
+	crash_fadump(NULL, ptr);
+
+	return NOTIFY_DONE;
+}
+
+static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
+			      void *p)
+{
+	pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n",
+		 kaslr_offset(), KERNELBASE);
+
+	return NOTIFY_DONE;
+}
+
+static int ppc_panic_platform_handler(struct notifier_block *this,
+				      unsigned long event, void *ptr)
+{
+	/*
+	 * This handler is only registered if we have a panic callback
+	 * on ppc_md, hence NULL check is not needed.
+	 * Also, it may not return, so it runs really late on panic path.
+	 */
+	ppc_md.panic(ptr);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ppc_fadump_block = {
+	.notifier_call = ppc_panic_fadump_handler,
+	.priority = INT_MAX, /* run early, to notify the firmware ASAP */
+};
+
+static struct notifier_block kernel_offset_notifier = {
+	.notifier_call = dump_kernel_offset,
+};
+
+static struct notifier_block ppc_panic_block = {
+	.notifier_call = ppc_panic_platform_handler,
+	.priority = INT_MIN, /* may not return; must be done last */
+};
+
+void __init setup_panic(void)
+{
+	/* Hard-disables IRQs + deal with FW-assisted dump (fadump) */
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &ppc_fadump_block);
+
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0)
+		atomic_notifier_chain_register(&panic_notifier_list,
+					       &kernel_offset_notifier);
+
+	/* Low-level platform-specific routines that should run on panic */
+	if (ppc_md.panic)
+		atomic_notifier_chain_register(&panic_notifier_list,
+					       &ppc_panic_block);
+}
+
+#ifdef CONFIG_CHECK_CACHE_COHERENCY
+/*
+ * For platforms that have configurable cache-coherency.  This function
+ * checks that the cache coherency setting of the kernel matches the setting
+ * left by the firmware, as indicated in the device tree.  Since a mismatch
+ * will eventually result in DMA failures, we print * and error and call
+ * BUG() in that case.
+ */
+
+#define KERNEL_COHERENCY	(!IS_ENABLED(CONFIG_NOT_COHERENT_CACHE))
+
+static int __init check_cache_coherency(void)
+{
+	struct device_node *np;
+	const void *prop;
+	bool devtree_coherency;
+
+	np = of_find_node_by_path("/");
+	prop = of_get_property(np, "coherency-off", NULL);
+	of_node_put(np);
+
+	devtree_coherency = prop ? false : true;
+
+	if (devtree_coherency != KERNEL_COHERENCY) {
+		printk(KERN_ERR
+			"kernel coherency:%s != device tree_coherency:%s\n",
+			KERNEL_COHERENCY ? "on" : "off",
+			devtree_coherency ? "on" : "off");
+		BUG();
+	}
+
+	return 0;
+}
+
+late_initcall(check_cache_coherency);
+#endif /* CONFIG_CHECK_CACHE_COHERENCY */
+
+void ppc_printk_progress(char *s, unsigned short hex)
+{
+	pr_info("%s\n", s);
+}
+
+static __init void print_system_info(void)
+{
+	pr_info("-----------------------------------------------------\n");
+	pr_info("phys_mem_size     = 0x%llx\n",
+		(unsigned long long)memblock_phys_mem_size());
+
+	pr_info("dcache_bsize      = 0x%x\n", dcache_bsize);
+	pr_info("icache_bsize      = 0x%x\n", icache_bsize);
+
+	pr_info("cpu_features      = 0x%016lx\n", cur_cpu_spec->cpu_features);
+	pr_info("  possible        = 0x%016lx\n",
+		(unsigned long)CPU_FTRS_POSSIBLE);
+	pr_info("  always          = 0x%016lx\n",
+		(unsigned long)CPU_FTRS_ALWAYS);
+	pr_info("cpu_user_features = 0x%08x 0x%08x\n",
+		cur_cpu_spec->cpu_user_features,
+		cur_cpu_spec->cpu_user_features2);
+	pr_info("mmu_features      = 0x%08x\n", cur_cpu_spec->mmu_features);
+#ifdef CONFIG_PPC64
+	pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
+#ifdef CONFIG_PPC_BOOK3S
+	pr_info("vmalloc start     = 0x%lx\n", KERN_VIRT_START);
+	pr_info("IO start          = 0x%lx\n", KERN_IO_START);
+	pr_info("vmemmap start     = 0x%lx\n", (unsigned long)vmemmap);
+#endif
+#endif
+
+	if (!early_radix_enabled())
+		print_system_hash_info();
+
+	if (PHYSICAL_START > 0)
+		pr_info("physical_start    = 0x%llx\n",
+		       (unsigned long long)PHYSICAL_START);
+	pr_info("-----------------------------------------------------\n");
+}
+
+#ifdef CONFIG_SMP
+static void __init smp_setup_pacas(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		if (cpu == smp_processor_id())
+			continue;
+		allocate_paca(cpu);
+		set_hard_smp_processor_id(cpu, cpu_to_phys_id[cpu]);
+	}
+
+	memblock_free(cpu_to_phys_id, nr_cpu_ids * sizeof(u32));
+	cpu_to_phys_id = NULL;
+}
+#endif
+
+/*
+ * Called into from start_kernel this initializes memblock, which is used
+ * to manage page allocation until mem_init is called.
+ */
+void __init setup_arch(char **cmdline_p)
+{
+	kasan_init();
+
+	*cmdline_p = boot_command_line;
+
+	/* Set a half-reasonable default so udelay does something sensible */
+	loops_per_jiffy = 500000000 / HZ;
+
+	/* Unflatten the device-tree passed by prom_init or kexec */
+	unflatten_device_tree();
+
+	/*
+	 * Initialize cache line/block info from device-tree (on ppc64) or
+	 * just cputable (on ppc32).
+	 */
+	initialize_cache_info();
+
+	/* Initialize RTAS if available. */
+	rtas_initialize();
+
+	/* Check if we have an initrd provided via the device-tree. */
+	check_for_initrd();
+
+	/* Probe the machine type, establish ppc_md. */
+	probe_machine();
+
+	/* Setup panic notifier if requested by the platform. */
+	setup_panic();
+
+	/*
+	 * Configure ppc_md.power_save (ppc32 only, 64-bit machines do
+	 * it from their respective probe() function.
+	 */
+	setup_power_save();
+
+	/* Discover standard serial ports. */
+	find_legacy_serial_ports();
+
+	/* Register early console with the printk subsystem. */
+	register_early_udbg_console();
+
+	/* Setup the various CPU maps based on the device-tree. */
+	smp_setup_cpu_maps();
+
+	/* Initialize xmon. */
+	xmon_setup();
+
+	/* Check the SMT related command line arguments (ppc64). */
+	check_smt_enabled();
+
+	/* Parse memory topology */
+	mem_topology_setup();
+	/* Set max_mapnr before paging_init() */
+	set_max_mapnr(max_pfn);
+
+	/*
+	 * Release secondary cpus out of their spinloops at 0x60 now that
+	 * we can map physical -> logical CPU ids.
+	 *
+	 * Freescale Book3e parts spin in a loop provided by firmware,
+	 * so smp_release_cpus() does nothing for them.
+	 */
+#ifdef CONFIG_SMP
+	smp_setup_pacas();
+
+	/* On BookE, setup per-core TLB data structures. */
+	setup_tlb_core_data();
+#endif
+
+	/* Print various info about the machine that has been gathered so far. */
+	print_system_info();
+
+	klp_init_thread_info(&init_task);
+
+	setup_initial_init_mm(_stext, _etext, _edata, _end);
+	/* sched_init() does the mmgrab(&init_mm) for the primary CPU */
+	VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm)));
+	cpumask_set_cpu(smp_processor_id(), mm_cpumask(&init_mm));
+	inc_mm_active_cpus(&init_mm);
+	mm_iommu_init(&init_mm);
+
+	irqstack_early_init();
+	exc_lvl_early_init();
+	emergency_stack_init();
+
+	mce_init();
+	smp_release_cpus();
+
+	initmem_init();
+
+	/*
+	 * Reserve large chunks of memory for use by CMA for KVM and hugetlb. These must
+	 * be called after initmem_init(), so that pageblock_order is initialised.
+	 */
+	kvm_cma_reserve();
+	gigantic_hugetlb_cma_reserve();
+
+	early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
+
+	if (ppc_md.setup_arch)
+		ppc_md.setup_arch();
+
+	setup_barrier_nospec();
+	setup_spectre_v2();
+
+	paging_init();
+
+	/* Initialize the MMU context management stuff. */
+	mmu_context_init();
+
+	/* Interrupt code needs to be 64K-aligned. */
+	if (IS_ENABLED(CONFIG_PPC64) && (unsigned long)_stext & 0xffff)
+		panic("Kernelbase not 64K-aligned (0x%lx)!\n",
+		      (unsigned long)_stext);
+}
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
new file mode 100644
index 0000000000..7912bb50a7
--- /dev/null
+++ b/arch/powerpc/kernel/setup.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Prototypes for functions that are shared between setup_(32|64|common).c
+ *
+ * Copyright 2016 Michael Ellerman, IBM Corporation.
+ */
+
+#ifndef __ARCH_POWERPC_KERNEL_SETUP_H
+#define __ARCH_POWERPC_KERNEL_SETUP_H
+
+void initialize_cache_info(void);
+void irqstack_early_init(void);
+
+#ifdef CONFIG_PPC32
+void setup_power_save(void);
+#else
+static inline void setup_power_save(void) { }
+#endif
+
+#if defined(CONFIG_PPC64) && defined(CONFIG_SMP)
+void check_smt_enabled(void);
+#else
+static inline void check_smt_enabled(void) { }
+#endif
+
+#if defined(CONFIG_PPC_BOOK3E_64) && defined(CONFIG_SMP)
+void setup_tlb_core_data(void);
+#else
+static inline void setup_tlb_core_data(void) { }
+#endif
+
+#ifdef CONFIG_BOOKE_OR_40x
+void exc_lvl_early_init(void);
+#else
+static inline void exc_lvl_early_init(void) { }
+#endif
+
+#if defined(CONFIG_PPC64) || defined(CONFIG_VMAP_STACK)
+void emergency_stack_init(void);
+#else
+static inline void emergency_stack_init(void) { }
+#endif
+
+#ifdef CONFIG_PPC64
+u64 ppc64_bolted_size(void);
+
+/* Default SPR values from firmware/kexec */
+extern unsigned long spr_default_dscr;
+#endif
+
+/*
+ * Having this in kvm_ppc.h makes include dependencies too
+ * tricky to solve for setup-common.c so have it here.
+ */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void kvm_cma_reserve(void);
+#else
+static inline void kvm_cma_reserve(void) { }
+#endif
+
+#ifdef CONFIG_TAU
+u32 cpu_temp(unsigned long cpu);
+u32 cpu_temp_both(unsigned long cpu);
+u32 tau_interrupts(unsigned long cpu);
+#endif /* CONFIG_TAU */
+
+#endif /* __ARCH_POWERPC_KERNEL_SETUP_H */
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
new file mode 100644
index 0000000000..b761cc1a40
--- /dev/null
+++ b/arch/powerpc/kernel/setup_32.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Common prep/pmac/chrp boot and setup code.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/delay.h>
+#include <linux/initrd.h>
+#include <linux/tty.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/cpu.h>
+#include <linux/console.h>
+#include <linux/memblock.h>
+#include <linux/export.h>
+#include <linux/nvram.h>
+#include <linux/pgtable.h>
+#include <linux/of_fdt.h>
+#include <linux/irq.h>
+
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/elf.h>
+#include <asm/cputable.h>
+#include <asm/bootx.h>
+#include <asm/btext.h>
+#include <asm/machdep.h>
+#include <linux/uaccess.h>
+#include <asm/pmac_feature.h>
+#include <asm/sections.h>
+#include <asm/nvram.h>
+#include <asm/xmon.h>
+#include <asm/time.h>
+#include <asm/serial.h>
+#include <asm/udbg.h>
+#include <asm/code-patching.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/asm-prototypes.h>
+#include <asm/kdump.h>
+#include <asm/feature-fixups.h>
+#include <asm/early_ioremap.h>
+
+#include "setup.h"
+
+#define DBG(fmt...)
+
+extern void bootx_init(unsigned long r4, unsigned long phys);
+
+int boot_cpuid_phys;
+EXPORT_SYMBOL_GPL(boot_cpuid_phys);
+
+int smp_hw_index[NR_CPUS];
+EXPORT_SYMBOL(smp_hw_index);
+
+unsigned int DMA_MODE_READ;
+unsigned int DMA_MODE_WRITE;
+
+EXPORT_SYMBOL(DMA_MODE_READ);
+EXPORT_SYMBOL(DMA_MODE_WRITE);
+
+/*
+ * This is run before start_kernel(), the kernel has been relocated
+ * and we are running with enough of the MMU enabled to have our
+ * proper kernel virtual addresses
+ *
+ * We do the initial parsing of the flat device-tree and prepares
+ * for the MMU to be fully initialized.
+ */
+notrace void __init machine_init(u64 dt_ptr)
+{
+	u32 *addr = (u32 *)patch_site_addr(&patch__memset_nocache);
+	ppc_inst_t insn;
+
+	/* Configure static keys first, now that we're relocated. */
+	setup_feature_keys();
+
+	early_ioremap_init();
+
+	/* Enable early debugging if any specified (see udbg.h) */
+	udbg_early_init();
+
+	patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_RAW_NOP()));
+
+	create_cond_branch(&insn, addr, branch_target(addr), 0x820000);
+	patch_instruction(addr, insn);	/* replace b by bne cr0 */
+
+	/* Do some early initialization based on the flat device tree */
+	early_init_devtree(__va(dt_ptr));
+
+	early_init_mmu();
+
+	setup_kdump_trampoline();
+}
+
+/* Checks "l2cr=xxxx" command-line option */
+static int __init ppc_setup_l2cr(char *str)
+{
+	if (cpu_has_feature(CPU_FTR_L2CR)) {
+		unsigned long val = simple_strtoul(str, NULL, 0);
+		printk(KERN_INFO "l2cr set to %lx\n", val);
+		_set_L2CR(0);		/* force invalidate by disable cache */
+		_set_L2CR(val);		/* and enable it */
+	}
+	return 1;
+}
+__setup("l2cr=", ppc_setup_l2cr);
+
+/* Checks "l3cr=xxxx" command-line option */
+static int __init ppc_setup_l3cr(char *str)
+{
+	if (cpu_has_feature(CPU_FTR_L3CR)) {
+		unsigned long val = simple_strtoul(str, NULL, 0);
+		printk(KERN_INFO "l3cr set to %lx\n", val);
+		_set_L3CR(val);		/* and enable it */
+	}
+	return 1;
+}
+__setup("l3cr=", ppc_setup_l3cr);
+
+static int __init ppc_init(void)
+{
+	/* clear the progress line */
+	if (ppc_md.progress)
+		ppc_md.progress("             ", 0xffff);
+
+	/* call platform init */
+	if (ppc_md.init != NULL) {
+		ppc_md.init();
+	}
+	return 0;
+}
+arch_initcall(ppc_init);
+
+static void *__init alloc_stack(void)
+{
+	void *ptr = memblock_alloc(THREAD_SIZE, THREAD_ALIGN);
+
+	if (!ptr)
+		panic("cannot allocate %d bytes for stack at %pS\n",
+		      THREAD_SIZE, (void *)_RET_IP_);
+
+	return ptr;
+}
+
+void __init irqstack_early_init(void)
+{
+	unsigned int i;
+
+	if (IS_ENABLED(CONFIG_VMAP_STACK))
+		return;
+
+	/* interrupt stacks must be in lowmem, we get that for free on ppc32
+	 * as the memblock is limited to lowmem by default */
+	for_each_possible_cpu(i) {
+		softirq_ctx[i] = alloc_stack();
+		hardirq_ctx[i] = alloc_stack();
+	}
+}
+
+#ifdef CONFIG_VMAP_STACK
+void *emergency_ctx[NR_CPUS] __ro_after_init = {[0] = &init_stack};
+
+void __init emergency_stack_init(void)
+{
+	unsigned int i;
+
+	for_each_possible_cpu(i)
+		emergency_ctx[i] = alloc_stack();
+}
+#endif
+
+#ifdef CONFIG_BOOKE_OR_40x
+void __init exc_lvl_early_init(void)
+{
+	unsigned int i, hw_cpu;
+
+	/* interrupt stacks must be in lowmem, we get that for free on ppc32
+	 * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */
+	for_each_possible_cpu(i) {
+#ifdef CONFIG_SMP
+		hw_cpu = get_hard_smp_processor_id(i);
+#else
+		hw_cpu = 0;
+#endif
+
+		critirq_ctx[hw_cpu] = alloc_stack();
+#ifdef CONFIG_BOOKE
+		dbgirq_ctx[hw_cpu] = alloc_stack();
+		mcheckirq_ctx[hw_cpu] = alloc_stack();
+#endif
+	}
+}
+#endif
+
+void __init setup_power_save(void)
+{
+#ifdef CONFIG_PPC_BOOK3S_32
+	if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
+	    cpu_has_feature(CPU_FTR_CAN_NAP))
+		ppc_md.power_save = ppc6xx_idle;
+#endif
+
+#ifdef CONFIG_PPC_E500
+	if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
+	    cpu_has_feature(CPU_FTR_CAN_NAP))
+		ppc_md.power_save = e500_idle;
+#endif
+}
+
+__init void initialize_cache_info(void)
+{
+	/*
+	 * Set cache line size based on type of cpu as a default.
+	 * Systems with OF can look in the properties on the cpu node(s)
+	 * for a possibly more accurate value.
+	 */
+	dcache_bsize = cur_cpu_spec->dcache_bsize;
+	icache_bsize = cur_cpu_spec->icache_bsize;
+}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
new file mode 100644
index 0000000000..246201d0d8
--- /dev/null
+++ b/arch/powerpc/kernel/setup_64.c
@@ -0,0 +1,932 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * 
+ * Common boot and setup code.
+ *
+ * Copyright (C) 2001 PPC64 Team, IBM Corp
+ */
+
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/delay.h>
+#include <linux/initrd.h>
+#include <linux/seq_file.h>
+#include <linux/ioport.h>
+#include <linux/console.h>
+#include <linux/utsname.h>
+#include <linux/tty.h>
+#include <linux/root_dev.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/unistd.h>
+#include <linux/serial.h>
+#include <linux/serial_8250.h>
+#include <linux/memblock.h>
+#include <linux/pci.h>
+#include <linux/lockdep.h>
+#include <linux/memory.h>
+#include <linux/nmi.h>
+#include <linux/pgtable.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+
+#include <asm/asm-prototypes.h>
+#include <asm/kvm_guest.h>
+#include <asm/io.h>
+#include <asm/kdump.h>
+#include <asm/processor.h>
+#include <asm/smp.h>
+#include <asm/elf.h>
+#include <asm/machdep.h>
+#include <asm/paca.h>
+#include <asm/time.h>
+#include <asm/cputable.h>
+#include <asm/dt_cpu_ftrs.h>
+#include <asm/sections.h>
+#include <asm/btext.h>
+#include <asm/nvram.h>
+#include <asm/setup.h>
+#include <asm/rtas.h>
+#include <asm/iommu.h>
+#include <asm/serial.h>
+#include <asm/cache.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/firmware.h>
+#include <asm/xmon.h>
+#include <asm/udbg.h>
+#include <asm/kexec.h>
+#include <asm/code-patching.h>
+#include <asm/ftrace.h>
+#include <asm/opal.h>
+#include <asm/cputhreads.h>
+#include <asm/hw_irq.h>
+#include <asm/feature-fixups.h>
+#include <asm/kup.h>
+#include <asm/early_ioremap.h>
+#include <asm/pgalloc.h>
+
+#include "setup.h"
+
+int spinning_secondaries;
+u64 ppc64_pft_size;
+
+struct ppc64_caches ppc64_caches = {
+	.l1d = {
+		.block_size = 0x40,
+		.log_block_size = 6,
+	},
+	.l1i = {
+		.block_size = 0x40,
+		.log_block_size = 6
+	},
+};
+EXPORT_SYMBOL_GPL(ppc64_caches);
+
+#if defined(CONFIG_PPC_BOOK3E_64) && defined(CONFIG_SMP)
+void __init setup_tlb_core_data(void)
+{
+	int cpu;
+
+	BUILD_BUG_ON(offsetof(struct tlb_core_data, lock) != 0);
+
+	for_each_possible_cpu(cpu) {
+		int first = cpu_first_thread_sibling(cpu);
+
+		/*
+		 * If we boot via kdump on a non-primary thread,
+		 * make sure we point at the thread that actually
+		 * set up this TLB.
+		 */
+		if (cpu_first_thread_sibling(boot_cpuid) == first)
+			first = boot_cpuid;
+
+		paca_ptrs[cpu]->tcd_ptr = &paca_ptrs[first]->tcd;
+
+		/*
+		 * If we have threads, we need either tlbsrx.
+		 * or e6500 tablewalk mode, or else TLB handlers
+		 * will be racy and could produce duplicate entries.
+		 * Should we panic instead?
+		 */
+		WARN_ONCE(smt_enabled_at_boot >= 2 &&
+			  book3e_htw_mode != PPC_HTW_E6500,
+			  "%s: unsupported MMU configuration\n", __func__);
+	}
+}
+#endif
+
+#ifdef CONFIG_SMP
+
+static char *smt_enabled_cmdline;
+
+/* Look for ibm,smt-enabled OF option */
+void __init check_smt_enabled(void)
+{
+	struct device_node *dn;
+	const char *smt_option;
+
+	/* Default to enabling all threads */
+	smt_enabled_at_boot = threads_per_core;
+
+	/* Allow the command line to overrule the OF option */
+	if (smt_enabled_cmdline) {
+		if (!strcmp(smt_enabled_cmdline, "on"))
+			smt_enabled_at_boot = threads_per_core;
+		else if (!strcmp(smt_enabled_cmdline, "off"))
+			smt_enabled_at_boot = 0;
+		else {
+			int smt;
+			int rc;
+
+			rc = kstrtoint(smt_enabled_cmdline, 10, &smt);
+			if (!rc)
+				smt_enabled_at_boot =
+					min(threads_per_core, smt);
+		}
+	} else {
+		dn = of_find_node_by_path("/options");
+		if (dn) {
+			smt_option = of_get_property(dn, "ibm,smt-enabled",
+						     NULL);
+
+			if (smt_option) {
+				if (!strcmp(smt_option, "on"))
+					smt_enabled_at_boot = threads_per_core;
+				else if (!strcmp(smt_option, "off"))
+					smt_enabled_at_boot = 0;
+			}
+
+			of_node_put(dn);
+		}
+	}
+}
+
+/* Look for smt-enabled= cmdline option */
+static int __init early_smt_enabled(char *p)
+{
+	smt_enabled_cmdline = p;
+	return 0;
+}
+early_param("smt-enabled", early_smt_enabled);
+
+#endif /* CONFIG_SMP */
+
+/** Fix up paca fields required for the boot cpu */
+static void __init fixup_boot_paca(struct paca_struct *boot_paca)
+{
+	/* The boot cpu is started */
+	boot_paca->cpu_start = 1;
+#ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * Give the early boot machine check stack somewhere to use, use
+	 * half of the init stack. This is a bit hacky but there should not be
+	 * deep stack usage in early init so shouldn't overflow it or overwrite
+	 * things.
+	 */
+	boot_paca->mc_emergency_sp = (void *)&init_thread_union +
+		(THREAD_SIZE/2);
+#endif
+	/* Allow percpu accesses to work until we setup percpu data */
+	boot_paca->data_offset = 0;
+	/* Mark interrupts soft and hard disabled in PACA */
+	boot_paca->irq_soft_mask = IRQS_DISABLED;
+	boot_paca->irq_happened = PACA_IRQ_HARD_DIS;
+	WARN_ON(mfmsr() & MSR_EE);
+}
+
+static void __init configure_exceptions(void)
+{
+	/*
+	 * Setup the trampolines from the lowmem exception vectors
+	 * to the kdump kernel when not using a relocatable kernel.
+	 */
+	setup_kdump_trampoline();
+
+	/* Under a PAPR hypervisor, we need hypercalls */
+	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+		/*
+		 * - PR KVM does not support AIL mode interrupts in the host
+		 *   while a PR guest is running.
+		 *
+		 * - SCV system call interrupt vectors are only implemented for
+		 *   AIL mode interrupts.
+		 *
+		 * - On pseries, AIL mode can only be enabled and disabled
+		 *   system-wide so when a PR VM is created on a pseries host,
+		 *   all CPUs of the host are set to AIL=0 mode.
+		 *
+		 * - Therefore host CPUs must not execute scv while a PR VM
+		 *   exists.
+		 *
+		 * - SCV support can not be disabled dynamically because the
+		 *   feature is advertised to host userspace. Disabling the
+		 *   facility and emulating it would be possible but is not
+		 *   implemented.
+		 *
+		 * - So SCV support is blanket disabled if PR KVM could possibly
+		 *   run. That is, PR support compiled in, booting on pseries
+		 *   with hash MMU.
+		 */
+		if (IS_ENABLED(CONFIG_KVM_BOOK3S_PR_POSSIBLE) && !radix_enabled()) {
+			init_task.thread.fscr &= ~FSCR_SCV;
+			cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV;
+		}
+
+		/* Enable AIL if possible */
+		if (!pseries_enable_reloc_on_exc()) {
+			init_task.thread.fscr &= ~FSCR_SCV;
+			cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV;
+		}
+
+		/*
+		 * Tell the hypervisor that we want our exceptions to
+		 * be taken in little endian mode.
+		 *
+		 * We don't call this for big endian as our calling convention
+		 * makes us always enter in BE, and the call may fail under
+		 * some circumstances with kdump.
+		 */
+#ifdef __LITTLE_ENDIAN__
+		pseries_little_endian_exceptions();
+#endif
+	} else {
+		/* Set endian mode using OPAL */
+		if (firmware_has_feature(FW_FEATURE_OPAL))
+			opal_configure_cores();
+
+		/* AIL on native is done in cpu_ready_for_interrupts() */
+	}
+}
+
+static void cpu_ready_for_interrupts(void)
+{
+	/*
+	 * Enable AIL if supported, and we are in hypervisor mode. This
+	 * is called once for every processor.
+	 *
+	 * If we are not in hypervisor mode the job is done once for
+	 * the whole partition in configure_exceptions().
+	 */
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		unsigned long lpcr = mfspr(SPRN_LPCR);
+		unsigned long new_lpcr = lpcr;
+
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			/* P10 DD1 does not have HAIL */
+			if (pvr_version_is(PVR_POWER10) &&
+					(mfspr(SPRN_PVR) & 0xf00) == 0x100)
+				new_lpcr |= LPCR_AIL_3;
+			else
+				new_lpcr |= LPCR_HAIL;
+		} else if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+			new_lpcr |= LPCR_AIL_3;
+		}
+
+		if (new_lpcr != lpcr)
+			mtspr(SPRN_LPCR, new_lpcr);
+	}
+
+	/*
+	 * Set HFSCR:TM based on CPU features:
+	 * In the special case of TM no suspend (P9N DD2.1), Linux is
+	 * told TM is off via the dt-ftrs but told to (partially) use
+	 * it via OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED. So HFSCR[TM]
+	 * will be off from dt-ftrs but we need to turn it on for the
+	 * no suspend case.
+	 */
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		if (cpu_has_feature(CPU_FTR_TM_COMP))
+			mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) | HFSCR_TM);
+		else
+			mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM);
+	}
+
+	/* Set IR and DR in PACA MSR */
+	get_paca()->kernel_msr = MSR_KERNEL;
+}
+
+unsigned long spr_default_dscr = 0;
+
+static void __init record_spr_defaults(void)
+{
+	if (early_cpu_has_feature(CPU_FTR_DSCR))
+		spr_default_dscr = mfspr(SPRN_DSCR);
+}
+
+/*
+ * Early initialization entry point. This is called by head.S
+ * with MMU translation disabled. We rely on the "feature" of
+ * the CPU that ignores the top 2 bits of the address in real
+ * mode so we can access kernel globals normally provided we
+ * only toy with things in the RMO region. From here, we do
+ * some early parsing of the device-tree to setup out MEMBLOCK
+ * data structures, and allocate & initialize the hash table
+ * and segment tables so we can start running with translation
+ * enabled.
+ *
+ * It is this function which will call the probe() callback of
+ * the various platform types and copy the matching one to the
+ * global ppc_md structure. Your platform can eventually do
+ * some very early initializations from the probe() routine, but
+ * this is not recommended, be very careful as, for example, the
+ * device-tree is not accessible via normal means at this point.
+ */
+
+void __init early_setup(unsigned long dt_ptr)
+{
+	static __initdata struct paca_struct boot_paca;
+
+	/* -------- printk is _NOT_ safe to use here ! ------- */
+
+	/*
+	 * Assume we're on cpu 0 for now.
+	 *
+	 * We need to load a PACA very early for a few reasons.
+	 *
+	 * The stack protector canary is stored in the paca, so as soon as we
+	 * call any stack protected code we need r13 pointing somewhere valid.
+	 *
+	 * If we are using kcov it will call in_task() in its instrumentation,
+	 * which relies on the current task from the PACA.
+	 *
+	 * dt_cpu_ftrs_init() calls into generic OF/fdt code, as well as
+	 * printk(), which can trigger both stack protector and kcov.
+	 *
+	 * percpu variables and spin locks also use the paca.
+	 *
+	 * So set up a temporary paca. It will be replaced below once we know
+	 * what CPU we are on.
+	 */
+	initialise_paca(&boot_paca, 0);
+	fixup_boot_paca(&boot_paca);
+	WARN_ON(local_paca != 0);
+	setup_paca(&boot_paca); /* install the paca into registers */
+
+	/* -------- printk is now safe to use ------- */
+
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && (mfmsr() & MSR_HV))
+		enable_machine_check();
+
+	/* Try new device tree based feature discovery ... */
+	if (!dt_cpu_ftrs_init(__va(dt_ptr)))
+		/* Otherwise use the old style CPU table */
+		identify_cpu(0, mfspr(SPRN_PVR));
+
+	/* Enable early debugging if any specified (see udbg.h) */
+	udbg_early_init();
+
+	udbg_printf(" -> %s(), dt_ptr: 0x%lx\n", __func__, dt_ptr);
+
+	/*
+	 * Do early initialization using the flattened device
+	 * tree, such as retrieving the physical memory map or
+	 * calculating/retrieving the hash table size, discover
+	 * boot_cpuid and boot_cpu_hwid.
+	 */
+	early_init_devtree(__va(dt_ptr));
+
+	allocate_paca_ptrs();
+	allocate_paca(boot_cpuid);
+	set_hard_smp_processor_id(boot_cpuid, boot_cpu_hwid);
+	fixup_boot_paca(paca_ptrs[boot_cpuid]);
+	setup_paca(paca_ptrs[boot_cpuid]); /* install the paca into registers */
+	// smp_processor_id() now reports boot_cpuid
+
+#ifdef CONFIG_SMP
+	task_thread_info(current)->cpu = boot_cpuid; // fix task_cpu(current)
+#endif
+
+	/*
+	 * Configure exception handlers. This include setting up trampolines
+	 * if needed, setting exception endian mode, etc...
+	 */
+	configure_exceptions();
+
+	/*
+	 * Configure Kernel Userspace Protection. This needs to happen before
+	 * feature fixups for platforms that implement this using features.
+	 */
+	setup_kup();
+
+	/* Apply all the dynamic patching */
+	apply_feature_fixups();
+	setup_feature_keys();
+
+	/* Initialize the hash table or TLB handling */
+	early_init_mmu();
+
+	early_ioremap_setup();
+
+	/*
+	 * After firmware and early platform setup code has set things up,
+	 * we note the SPR values for configurable control/performance
+	 * registers, and use those as initial defaults.
+	 */
+	record_spr_defaults();
+
+	/*
+	 * At this point, we can let interrupts switch to virtual mode
+	 * (the MMU has been setup), so adjust the MSR in the PACA to
+	 * have IR and DR set and enable AIL if it exists
+	 */
+	cpu_ready_for_interrupts();
+
+	/*
+	 * We enable ftrace here, but since we only support DYNAMIC_FTRACE, it
+	 * will only actually get enabled on the boot cpu much later once
+	 * ftrace itself has been initialized.
+	 */
+	this_cpu_enable_ftrace();
+
+	udbg_printf(" <- %s()\n", __func__);
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
+	/*
+	 * This needs to be done *last* (after the above udbg_printf() even)
+	 *
+	 * Right after we return from this function, we turn on the MMU
+	 * which means the real-mode access trick that btext does will
+	 * no longer work, it needs to switch to using a real MMU
+	 * mapping. This call will ensure that it does
+	 */
+	btext_map();
+#endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
+}
+
+#ifdef CONFIG_SMP
+void early_setup_secondary(void)
+{
+	/* Mark interrupts disabled in PACA */
+	irq_soft_mask_set(IRQS_DISABLED);
+
+	/* Initialize the hash table or TLB handling */
+	early_init_mmu_secondary();
+
+	/* Perform any KUP setup that is per-cpu */
+	setup_kup();
+
+	/*
+	 * At this point, we can let interrupts switch to virtual mode
+	 * (the MMU has been setup), so adjust the MSR in the PACA to
+	 * have IR and DR set.
+	 */
+	cpu_ready_for_interrupts();
+}
+
+#endif /* CONFIG_SMP */
+
+void __noreturn panic_smp_self_stop(void)
+{
+	hard_irq_disable();
+	spin_begin();
+	while (1)
+		spin_cpu_relax();
+}
+
+#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
+static bool use_spinloop(void)
+{
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S)) {
+		/*
+		 * See comments in head_64.S -- not all platforms insert
+		 * secondaries at __secondary_hold and wait at the spin
+		 * loop.
+		 */
+		if (firmware_has_feature(FW_FEATURE_OPAL))
+			return false;
+		return true;
+	}
+
+	/*
+	 * When book3e boots from kexec, the ePAPR spin table does
+	 * not get used.
+	 */
+	return of_property_read_bool(of_chosen, "linux,booted-from-kexec");
+}
+
+void smp_release_cpus(void)
+{
+	unsigned long *ptr;
+	int i;
+
+	if (!use_spinloop())
+		return;
+
+	/* All secondary cpus are spinning on a common spinloop, release them
+	 * all now so they can start to spin on their individual paca
+	 * spinloops. For non SMP kernels, the secondary cpus never get out
+	 * of the common spinloop.
+	 */
+
+	ptr  = (unsigned long *)((unsigned long)&__secondary_hold_spinloop
+			- PHYSICAL_START);
+	*ptr = ppc_function_entry(generic_secondary_smp_init);
+
+	/* And wait a bit for them to catch up */
+	for (i = 0; i < 100000; i++) {
+		mb();
+		HMT_low();
+		if (spinning_secondaries == 0)
+			break;
+		udelay(1);
+	}
+	pr_debug("spinning_secondaries = %d\n", spinning_secondaries);
+}
+#endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */
+
+/*
+ * Initialize some remaining members of the ppc64_caches and systemcfg
+ * structures
+ * (at least until we get rid of them completely). This is mostly some
+ * cache informations about the CPU that will be used by cache flush
+ * routines and/or provided to userland
+ */
+
+static void __init init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
+			    u32 bsize, u32 sets)
+{
+	info->size = size;
+	info->sets = sets;
+	info->line_size = lsize;
+	info->block_size = bsize;
+	info->log_block_size = __ilog2(bsize);
+	if (bsize)
+		info->blocks_per_page = PAGE_SIZE / bsize;
+	else
+		info->blocks_per_page = 0;
+
+	if (sets == 0)
+		info->assoc = 0xffff;
+	else
+		info->assoc = size / (sets * lsize);
+}
+
+static bool __init parse_cache_info(struct device_node *np,
+				    bool icache,
+				    struct ppc_cache_info *info)
+{
+	static const char *ipropnames[] __initdata = {
+		"i-cache-size",
+		"i-cache-sets",
+		"i-cache-block-size",
+		"i-cache-line-size",
+	};
+	static const char *dpropnames[] __initdata = {
+		"d-cache-size",
+		"d-cache-sets",
+		"d-cache-block-size",
+		"d-cache-line-size",
+	};
+	const char **propnames = icache ? ipropnames : dpropnames;
+	const __be32 *sizep, *lsizep, *bsizep, *setsp;
+	u32 size, lsize, bsize, sets;
+	bool success = true;
+
+	size = 0;
+	sets = -1u;
+	lsize = bsize = cur_cpu_spec->dcache_bsize;
+	sizep = of_get_property(np, propnames[0], NULL);
+	if (sizep != NULL)
+		size = be32_to_cpu(*sizep);
+	setsp = of_get_property(np, propnames[1], NULL);
+	if (setsp != NULL)
+		sets = be32_to_cpu(*setsp);
+	bsizep = of_get_property(np, propnames[2], NULL);
+	lsizep = of_get_property(np, propnames[3], NULL);
+	if (bsizep == NULL)
+		bsizep = lsizep;
+	if (lsizep == NULL)
+		lsizep = bsizep;
+	if (lsizep != NULL)
+		lsize = be32_to_cpu(*lsizep);
+	if (bsizep != NULL)
+		bsize = be32_to_cpu(*bsizep);
+	if (sizep == NULL || bsizep == NULL || lsizep == NULL)
+		success = false;
+
+	/*
+	 * OF is weird .. it represents fully associative caches
+	 * as "1 way" which doesn't make much sense and doesn't
+	 * leave room for direct mapped. We'll assume that 0
+	 * in OF means direct mapped for that reason.
+	 */
+	if (sets == 1)
+		sets = 0;
+	else if (sets == 0)
+		sets = 1;
+
+	init_cache_info(info, size, lsize, bsize, sets);
+
+	return success;
+}
+
+void __init initialize_cache_info(void)
+{
+	struct device_node *cpu = NULL, *l2, *l3 = NULL;
+	u32 pvr;
+
+	/*
+	 * All shipping POWER8 machines have a firmware bug that
+	 * puts incorrect information in the device-tree. This will
+	 * be (hopefully) fixed for future chips but for now hard
+	 * code the values if we are running on one of these
+	 */
+	pvr = PVR_VER(mfspr(SPRN_PVR));
+	if (pvr == PVR_POWER8 || pvr == PVR_POWER8E ||
+	    pvr == PVR_POWER8NVL) {
+						/* size    lsize   blk  sets */
+		init_cache_info(&ppc64_caches.l1i, 0x8000,   128,  128, 32);
+		init_cache_info(&ppc64_caches.l1d, 0x10000,  128,  128, 64);
+		init_cache_info(&ppc64_caches.l2,  0x80000,  128,  0,   512);
+		init_cache_info(&ppc64_caches.l3,  0x800000, 128,  0,   8192);
+	} else
+		cpu = of_find_node_by_type(NULL, "cpu");
+
+	/*
+	 * We're assuming *all* of the CPUs have the same
+	 * d-cache and i-cache sizes... -Peter
+	 */
+	if (cpu) {
+		if (!parse_cache_info(cpu, false, &ppc64_caches.l1d))
+			pr_warn("Argh, can't find dcache properties !\n");
+
+		if (!parse_cache_info(cpu, true, &ppc64_caches.l1i))
+			pr_warn("Argh, can't find icache properties !\n");
+
+		/*
+		 * Try to find the L2 and L3 if any. Assume they are
+		 * unified and use the D-side properties.
+		 */
+		l2 = of_find_next_cache_node(cpu);
+		of_node_put(cpu);
+		if (l2) {
+			parse_cache_info(l2, false, &ppc64_caches.l2);
+			l3 = of_find_next_cache_node(l2);
+			of_node_put(l2);
+		}
+		if (l3) {
+			parse_cache_info(l3, false, &ppc64_caches.l3);
+			of_node_put(l3);
+		}
+	}
+
+	/* For use by binfmt_elf */
+	dcache_bsize = ppc64_caches.l1d.block_size;
+	icache_bsize = ppc64_caches.l1i.block_size;
+
+	cur_cpu_spec->dcache_bsize = dcache_bsize;
+	cur_cpu_spec->icache_bsize = icache_bsize;
+}
+
+/*
+ * This returns the limit below which memory accesses to the linear
+ * mapping are guarnateed not to cause an architectural exception (e.g.,
+ * TLB or SLB miss fault).
+ *
+ * This is used to allocate PACAs and various interrupt stacks that
+ * that are accessed early in interrupt handlers that must not cause
+ * re-entrant interrupts.
+ */
+__init u64 ppc64_bolted_size(void)
+{
+#ifdef CONFIG_PPC_BOOK3E_64
+	/* Freescale BookE bolts the entire linear mapping */
+	/* XXX: BookE ppc64_rma_limit setup seems to disagree? */
+	if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E))
+		return linear_map_top;
+	/* Other BookE, we assume the first GB is bolted */
+	return 1ul << 30;
+#else
+	/* BookS radix, does not take faults on linear mapping */
+	if (early_radix_enabled())
+		return ULONG_MAX;
+
+	/* BookS hash, the first segment is bolted */
+	if (early_mmu_has_feature(MMU_FTR_1T_SEGMENT))
+		return 1UL << SID_SHIFT_1T;
+	return 1UL << SID_SHIFT;
+#endif
+}
+
+static void *__init alloc_stack(unsigned long limit, int cpu)
+{
+	void *ptr;
+
+	BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16);
+
+	ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_ALIGN,
+				     MEMBLOCK_LOW_LIMIT, limit,
+				     early_cpu_to_node(cpu));
+	if (!ptr)
+		panic("cannot allocate stacks");
+
+	return ptr;
+}
+
+void __init irqstack_early_init(void)
+{
+	u64 limit = ppc64_bolted_size();
+	unsigned int i;
+
+	/*
+	 * Interrupt stacks must be in the first segment since we
+	 * cannot afford to take SLB misses on them. They are not
+	 * accessed in realmode.
+	 */
+	for_each_possible_cpu(i) {
+		softirq_ctx[i] = alloc_stack(limit, i);
+		hardirq_ctx[i] = alloc_stack(limit, i);
+	}
+}
+
+#ifdef CONFIG_PPC_BOOK3E_64
+void __init exc_lvl_early_init(void)
+{
+	unsigned int i;
+
+	for_each_possible_cpu(i) {
+		void *sp;
+
+		sp = alloc_stack(ULONG_MAX, i);
+		critirq_ctx[i] = sp;
+		paca_ptrs[i]->crit_kstack = sp + THREAD_SIZE;
+
+		sp = alloc_stack(ULONG_MAX, i);
+		dbgirq_ctx[i] = sp;
+		paca_ptrs[i]->dbg_kstack = sp + THREAD_SIZE;
+
+		sp = alloc_stack(ULONG_MAX, i);
+		mcheckirq_ctx[i] = sp;
+		paca_ptrs[i]->mc_kstack = sp + THREAD_SIZE;
+	}
+
+	if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
+		patch_exception(0x040, exc_debug_debug_book3e);
+}
+#endif
+
+/*
+ * Stack space used when we detect a bad kernel stack pointer, and
+ * early in SMP boots before relocation is enabled. Exclusive emergency
+ * stack for machine checks.
+ */
+void __init emergency_stack_init(void)
+{
+	u64 limit, mce_limit;
+	unsigned int i;
+
+	/*
+	 * Emergency stacks must be under 256MB, we cannot afford to take
+	 * SLB misses on them. The ABI also requires them to be 128-byte
+	 * aligned.
+	 *
+	 * Since we use these as temporary stacks during secondary CPU
+	 * bringup, machine check, system reset, and HMI, we need to get
+	 * at them in real mode. This means they must also be within the RMO
+	 * region.
+	 *
+	 * The IRQ stacks allocated elsewhere in this file are zeroed and
+	 * initialized in kernel/irq.c. These are initialized here in order
+	 * to have emergency stacks available as early as possible.
+	 */
+	limit = mce_limit = min(ppc64_bolted_size(), ppc64_rma_size);
+
+	/*
+	 * Machine check on pseries calls rtas, but can't use the static
+	 * rtas_args due to a machine check hitting while the lock is held.
+	 * rtas args have to be under 4GB, so the machine check stack is
+	 * limited to 4GB so args can be put on stack.
+	 */
+	if (firmware_has_feature(FW_FEATURE_LPAR) && mce_limit > SZ_4G)
+		mce_limit = SZ_4G;
+
+	for_each_possible_cpu(i) {
+		paca_ptrs[i]->emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+		/* emergency stack for NMI exception handling. */
+		paca_ptrs[i]->nmi_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
+
+		/* emergency stack for machine check exception handling. */
+		paca_ptrs[i]->mc_emergency_sp = alloc_stack(mce_limit, i) + THREAD_SIZE;
+#endif
+	}
+}
+
+#ifdef CONFIG_SMP
+static int pcpu_cpu_distance(unsigned int from, unsigned int to)
+{
+	if (early_cpu_to_node(from) == early_cpu_to_node(to))
+		return LOCAL_DISTANCE;
+	else
+		return REMOTE_DISTANCE;
+}
+
+static __init int pcpu_cpu_to_node(int cpu)
+{
+	return early_cpu_to_node(cpu);
+}
+
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
+
+void __init setup_per_cpu_areas(void)
+{
+	const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
+	size_t atom_size;
+	unsigned long delta;
+	unsigned int cpu;
+	int rc = -EINVAL;
+
+	/*
+	 * BookE and BookS radix are historical values and should be revisited.
+	 */
+	if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) {
+		atom_size = SZ_1M;
+	} else if (radix_enabled()) {
+		atom_size = PAGE_SIZE;
+	} else if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) {
+		/*
+		 * Linear mapping is one of 4K, 1M and 16M.  For 4K, no need
+		 * to group units.  For larger mappings, use 1M atom which
+		 * should be large enough to contain a number of units.
+		 */
+		if (mmu_linear_psize == MMU_PAGE_4K)
+			atom_size = PAGE_SIZE;
+		else
+			atom_size = SZ_1M;
+	}
+
+	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+		rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
+					    pcpu_cpu_to_node);
+		if (rc)
+			pr_warn("PERCPU: %s allocator failed (%d), "
+				"falling back to page size\n",
+				pcpu_fc_names[pcpu_chosen_fc], rc);
+	}
+
+	if (rc < 0)
+		rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node);
+	if (rc < 0)
+		panic("cannot initialize percpu area (err=%d)", rc);
+
+	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+	for_each_possible_cpu(cpu) {
+                __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
+		paca_ptrs[cpu]->data_offset = __per_cpu_offset[cpu];
+	}
+}
+#endif
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+unsigned long memory_block_size_bytes(void)
+{
+	if (ppc_md.memory_block_size)
+		return ppc_md.memory_block_size();
+
+	return MIN_MEMORY_BLOCK_SIZE;
+}
+#endif
+
+#if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO)
+struct ppc_pci_io ppc_pci_io;
+EXPORT_SYMBOL(ppc_pci_io);
+#endif
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
+u64 hw_nmi_get_sample_period(int watchdog_thresh)
+{
+	return ppc_proc_freq * watchdog_thresh;
+}
+#endif
+
+/*
+ * The perf based hardlockup detector breaks PMU event based branches, so
+ * disable it by default. Book3S has a soft-nmi hardlockup detector based
+ * on the decrementer interrupt, so it does not suffer from this problem.
+ *
+ * It is likely to get false positives in KVM guests, so disable it there
+ * by default too. PowerVM will not stop or arbitrarily oversubscribe
+ * CPUs, but give a minimum regular allotment even with SPLPAR, so enable
+ * the detector for non-KVM guests, assume PowerVM.
+ */
+static int __init disable_hardlockup_detector(void)
+{
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
+	hardlockup_detector_disable();
+#else
+	if (firmware_has_feature(FW_FEATURE_LPAR)) {
+		if (is_kvm_guest())
+			hardlockup_detector_disable();
+	}
+#endif
+
+	return 0;
+}
+early_initcall(disable_hardlockup_detector);
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
new file mode 100644
index 0000000000..68a91e553e
--- /dev/null
+++ b/arch/powerpc/kernel/signal.c
@@ -0,0 +1,373 @@
+/*
+ * Common signal handling code for both 32 and 64 bits
+ *
+ *    Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation
+ *    Extracted from signal_32.c and signal_64.c
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License.  See the file README.legal in the main directory of
+ * this archive for more details.
+ */
+
+#include <linux/resume_user_mode.h>
+#include <linux/signal.h>
+#include <linux/uprobes.h>
+#include <linux/key.h>
+#include <linux/context_tracking.h>
+#include <linux/livepatch.h>
+#include <linux/syscalls.h>
+#include <asm/hw_breakpoint.h>
+#include <linux/uaccess.h>
+#include <asm/switch_to.h>
+#include <asm/unistd.h>
+#include <asm/debug.h>
+#include <asm/tm.h>
+
+#include "signal.h"
+
+#ifdef CONFIG_VSX
+unsigned long copy_fpr_to_user(void __user *to,
+			       struct task_struct *task)
+{
+	u64 buf[ELF_NFPREG];
+	int i;
+
+	/* save FPR copy to local buffer then write to the thread_struct */
+	for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+		buf[i] = task->thread.TS_FPR(i);
+	buf[i] = task->thread.fp_state.fpscr;
+	return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
+}
+
+unsigned long copy_fpr_from_user(struct task_struct *task,
+				 void __user *from)
+{
+	u64 buf[ELF_NFPREG];
+	int i;
+
+	if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
+		return 1;
+	for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+		task->thread.TS_FPR(i) = buf[i];
+	task->thread.fp_state.fpscr = buf[i];
+
+	return 0;
+}
+
+unsigned long copy_vsx_to_user(void __user *to,
+			       struct task_struct *task)
+{
+	u64 buf[ELF_NVSRHALFREG];
+	int i;
+
+	/* save FPR copy to local buffer then write to the thread_struct */
+	for (i = 0; i < ELF_NVSRHALFREG; i++)
+		buf[i] = task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+	return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
+}
+
+unsigned long copy_vsx_from_user(struct task_struct *task,
+				 void __user *from)
+{
+	u64 buf[ELF_NVSRHALFREG];
+	int i;
+
+	if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
+		return 1;
+	for (i = 0; i < ELF_NVSRHALFREG ; i++)
+		task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+	return 0;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+unsigned long copy_ckfpr_to_user(void __user *to,
+				  struct task_struct *task)
+{
+	u64 buf[ELF_NFPREG];
+	int i;
+
+	/* save FPR copy to local buffer then write to the thread_struct */
+	for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+		buf[i] = task->thread.TS_CKFPR(i);
+	buf[i] = task->thread.ckfp_state.fpscr;
+	return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
+}
+
+unsigned long copy_ckfpr_from_user(struct task_struct *task,
+					  void __user *from)
+{
+	u64 buf[ELF_NFPREG];
+	int i;
+
+	if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
+		return 1;
+	for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+		task->thread.TS_CKFPR(i) = buf[i];
+	task->thread.ckfp_state.fpscr = buf[i];
+
+	return 0;
+}
+
+unsigned long copy_ckvsx_to_user(void __user *to,
+				  struct task_struct *task)
+{
+	u64 buf[ELF_NVSRHALFREG];
+	int i;
+
+	/* save FPR copy to local buffer then write to the thread_struct */
+	for (i = 0; i < ELF_NVSRHALFREG; i++)
+		buf[i] = task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
+	return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
+}
+
+unsigned long copy_ckvsx_from_user(struct task_struct *task,
+					  void __user *from)
+{
+	u64 buf[ELF_NVSRHALFREG];
+	int i;
+
+	if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
+		return 1;
+	for (i = 0; i < ELF_NVSRHALFREG ; i++)
+		task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+	return 0;
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#endif
+
+/* Log an error when sending an unhandled signal to a process. Controlled
+ * through debug.exception-trace sysctl.
+ */
+
+int show_unhandled_signals = 1;
+
+unsigned long get_min_sigframe_size(void)
+{
+	if (IS_ENABLED(CONFIG_PPC64))
+		return get_min_sigframe_size_64();
+	else
+		return get_min_sigframe_size_32();
+}
+
+#ifdef CONFIG_COMPAT
+unsigned long get_min_sigframe_size_compat(void)
+{
+	return get_min_sigframe_size_32();
+}
+#endif
+
+/*
+ * Allocate space for the signal frame
+ */
+static unsigned long get_tm_stackpointer(struct task_struct *tsk);
+
+void __user *get_sigframe(struct ksignal *ksig, struct task_struct *tsk,
+			  size_t frame_size, int is_32)
+{
+        unsigned long oldsp, newsp;
+	unsigned long sp = get_tm_stackpointer(tsk);
+
+        /* Default to using normal stack */
+	if (is_32)
+		oldsp = sp & 0x0ffffffffUL;
+	else
+		oldsp = sp;
+	oldsp = sigsp(oldsp, ksig);
+	newsp = (oldsp - frame_size) & ~0xFUL;
+
+        return (void __user *)newsp;
+}
+
+static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
+				  int has_handler)
+{
+	unsigned long ret = regs->gpr[3];
+	int restart = 1;
+
+	/* syscall ? */
+	if (!trap_is_syscall(regs))
+		return;
+
+	if (trap_norestart(regs))
+		return;
+
+	/* error signalled ? */
+	if (trap_is_scv(regs)) {
+		/* 32-bit compat mode sign extend? */
+		if (!IS_ERR_VALUE(ret))
+			return;
+		ret = -ret;
+	} else if (!(regs->ccr & 0x10000000)) {
+		return;
+	}
+
+	switch (ret) {
+	case ERESTART_RESTARTBLOCK:
+	case ERESTARTNOHAND:
+		/* ERESTARTNOHAND means that the syscall should only be
+		 * restarted if there was no handler for the signal, and since
+		 * we only get here if there is a handler, we dont restart.
+		 */
+		restart = !has_handler;
+		break;
+	case ERESTARTSYS:
+		/* ERESTARTSYS means to restart the syscall if there is no
+		 * handler or the handler was registered with SA_RESTART
+		 */
+		restart = !has_handler || (ka->sa.sa_flags & SA_RESTART) != 0;
+		break;
+	case ERESTARTNOINTR:
+		/* ERESTARTNOINTR means that the syscall should be
+		 * called again after the signal handler returns.
+		 */
+		break;
+	default:
+		return;
+	}
+	if (restart) {
+		if (ret == ERESTART_RESTARTBLOCK)
+			regs->gpr[0] = __NR_restart_syscall;
+		else
+			regs->gpr[3] = regs->orig_gpr3;
+		regs_add_return_ip(regs, -4);
+		regs->result = 0;
+	} else {
+		if (trap_is_scv(regs)) {
+			regs->result = -EINTR;
+			regs->gpr[3] = -EINTR;
+		} else {
+			regs->result = -EINTR;
+			regs->gpr[3] = EINTR;
+			regs->ccr |= 0x10000000;
+		}
+	}
+}
+
+static void do_signal(struct task_struct *tsk)
+{
+	sigset_t *oldset = sigmask_to_save();
+	struct ksignal ksig = { .sig = 0 };
+	int ret;
+
+	BUG_ON(tsk != current);
+
+	get_signal(&ksig);
+
+	/* Is there any syscall restart business here ? */
+	check_syscall_restart(tsk->thread.regs, &ksig.ka, ksig.sig > 0);
+
+	if (ksig.sig <= 0) {
+		/* No signal to deliver -- put the saved sigmask back */
+		restore_saved_sigmask();
+		set_trap_norestart(tsk->thread.regs);
+		return;               /* no signals delivered */
+	}
+
+        /*
+	 * Reenable the DABR before delivering the signal to
+	 * user space. The DABR will have been cleared if it
+	 * triggered inside the kernel.
+	 */
+	if (!IS_ENABLED(CONFIG_PPC_ADV_DEBUG_REGS)) {
+		int i;
+
+		for (i = 0; i < nr_wp_slots(); i++) {
+			if (tsk->thread.hw_brk[i].address && tsk->thread.hw_brk[i].type)
+				__set_breakpoint(i, &tsk->thread.hw_brk[i]);
+		}
+	}
+
+	/* Re-enable the breakpoints for the signal stack */
+	thread_change_pc(tsk, tsk->thread.regs);
+
+	rseq_signal_deliver(&ksig, tsk->thread.regs);
+
+	if (is_32bit_task()) {
+        	if (ksig.ka.sa.sa_flags & SA_SIGINFO)
+			ret = handle_rt_signal32(&ksig, oldset, tsk);
+		else
+			ret = handle_signal32(&ksig, oldset, tsk);
+	} else {
+		ret = handle_rt_signal64(&ksig, oldset, tsk);
+	}
+
+	set_trap_norestart(tsk->thread.regs);
+	signal_setup_done(ret, &ksig, test_thread_flag(TIF_SINGLESTEP));
+}
+
+void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
+{
+	if (thread_info_flags & _TIF_UPROBE)
+		uprobe_notify_resume(regs);
+
+	if (thread_info_flags & _TIF_PATCH_PENDING)
+		klp_update_patch_state(current);
+
+	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) {
+		BUG_ON(regs != current->thread.regs);
+		do_signal(current);
+	}
+
+	if (thread_info_flags & _TIF_NOTIFY_RESUME)
+		resume_user_mode_work(regs);
+}
+
+static unsigned long get_tm_stackpointer(struct task_struct *tsk)
+{
+	/* When in an active transaction that takes a signal, we need to be
+	 * careful with the stack.  It's possible that the stack has moved back
+	 * up after the tbegin.  The obvious case here is when the tbegin is
+	 * called inside a function that returns before a tend.  In this case,
+	 * the stack is part of the checkpointed transactional memory state.
+	 * If we write over this non transactionally or in suspend, we are in
+	 * trouble because if we get a tm abort, the program counter and stack
+	 * pointer will be back at the tbegin but our in memory stack won't be
+	 * valid anymore.
+	 *
+	 * To avoid this, when taking a signal in an active transaction, we
+	 * need to use the stack pointer from the checkpointed state, rather
+	 * than the speculated state.  This ensures that the signal context
+	 * (written tm suspended) will be written below the stack required for
+	 * the rollback.  The transaction is aborted because of the treclaim,
+	 * so any memory written between the tbegin and the signal will be
+	 * rolled back anyway.
+	 *
+	 * For signals taken in non-TM or suspended mode, we use the
+	 * normal/non-checkpointed stack pointer.
+	 */
+	struct pt_regs *regs = tsk->thread.regs;
+	unsigned long ret = regs->gpr[1];
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	BUG_ON(tsk != current);
+
+	if (MSR_TM_ACTIVE(regs->msr)) {
+		preempt_disable();
+		tm_reclaim_current(TM_CAUSE_SIGNAL);
+		if (MSR_TM_TRANSACTIONAL(regs->msr))
+			ret = tsk->thread.ckpt_regs.gpr[1];
+
+		/*
+		 * If we treclaim, we must clear the current thread's TM bits
+		 * before re-enabling preemption. Otherwise we might be
+		 * preempted and have the live MSR[TS] changed behind our back
+		 * (tm_recheckpoint_new_task() would recheckpoint). Besides, we
+		 * enter the signal handler in non-transactional state.
+		 */
+		regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK);
+		preempt_enable();
+	}
+#endif
+	return ret;
+}
+
+static const char fm32[] = KERN_INFO "%s[%d]: bad frame in %s: %p nip %08lx lr %08lx\n";
+static const char fm64[] = KERN_INFO "%s[%d]: bad frame in %s: %p nip %016lx lr %016lx\n";
+
+void signal_fault(struct task_struct *tsk, struct pt_regs *regs,
+		  const char *where, void __user *ptr)
+{
+	if (show_unhandled_signals)
+		printk_ratelimited(regs->msr & MSR_64BIT ? fm64 : fm32, tsk->comm,
+				   task_pid_nr(tsk), where, ptr, regs->nip, regs->link);
+}
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
new file mode 100644
index 0000000000..a429c57ed4
--- /dev/null
+++ b/arch/powerpc/kernel/signal.h
@@ -0,0 +1,210 @@
+/*
+ *    Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation
+ *    Extracted from signal_32.c and signal_64.c
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License.  See the file README.legal in the main directory of
+ * this archive for more details.
+ */
+
+#ifndef _POWERPC_ARCH_SIGNAL_H
+#define _POWERPC_ARCH_SIGNAL_H
+
+void __user *get_sigframe(struct ksignal *ksig, struct task_struct *tsk,
+			  size_t frame_size, int is_32);
+
+extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
+			   struct task_struct *tsk);
+
+extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
+			      struct task_struct *tsk);
+
+static inline int __get_user_sigset(sigset_t *dst, const sigset_t __user *src)
+{
+	BUILD_BUG_ON(sizeof(sigset_t) != sizeof(u64));
+
+	return __get_user(dst->sig[0], (u64 __user *)&src->sig[0]);
+}
+#define unsafe_get_user_sigset(dst, src, label) do {			\
+	sigset_t *__dst = dst;						\
+	const sigset_t __user *__src = src;				\
+	int i;								\
+									\
+	for (i = 0; i < _NSIG_WORDS; i++)				\
+		unsafe_get_user(__dst->sig[i], &__src->sig[i], label);	\
+} while (0)
+
+#ifdef CONFIG_VSX
+extern unsigned long copy_vsx_to_user(void __user *to,
+				      struct task_struct *task);
+extern unsigned long copy_ckvsx_to_user(void __user *to,
+					       struct task_struct *task);
+extern unsigned long copy_vsx_from_user(struct task_struct *task,
+					void __user *from);
+extern unsigned long copy_ckvsx_from_user(struct task_struct *task,
+						 void __user *from);
+unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task);
+unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task);
+unsigned long copy_fpr_from_user(struct task_struct *task, void __user *from);
+unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
+
+#define unsafe_copy_fpr_to_user(to, task, label)	do {		\
+	struct task_struct *__t = task;					\
+	u64 __user *buf = (u64 __user *)to;				\
+	int i;								\
+									\
+	for (i = 0; i < ELF_NFPREG - 1 ; i++)				\
+		unsafe_put_user(__t->thread.TS_FPR(i), &buf[i], label); \
+	unsafe_put_user(__t->thread.fp_state.fpscr, &buf[i], label);	\
+} while (0)
+
+#define unsafe_copy_vsx_to_user(to, task, label)	do {		\
+	struct task_struct *__t = task;					\
+	u64 __user *buf = (u64 __user *)to;				\
+	int i;								\
+									\
+	for (i = 0; i < ELF_NVSRHALFREG ; i++)				\
+		unsafe_put_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \
+				&buf[i], label);\
+} while (0)
+
+#define unsafe_copy_fpr_from_user(task, from, label)	do {		\
+	struct task_struct *__t = task;					\
+	u64 __user *buf = (u64 __user *)from;				\
+	int i;								\
+									\
+	for (i = 0; i < ELF_NFPREG - 1; i++)				\
+		unsafe_get_user(__t->thread.TS_FPR(i), &buf[i], label); \
+	unsafe_get_user(__t->thread.fp_state.fpscr, &buf[i], label);	\
+} while (0)
+
+#define unsafe_copy_vsx_from_user(task, from, label)	do {		\
+	struct task_struct *__t = task;					\
+	u64 __user *buf = (u64 __user *)from;				\
+	int i;								\
+									\
+	for (i = 0; i < ELF_NVSRHALFREG ; i++)				\
+		unsafe_get_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \
+				&buf[i], label);			\
+} while (0)
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+#define unsafe_copy_ckfpr_to_user(to, task, label)	do {		\
+	struct task_struct *__t = task;					\
+	u64 __user *buf = (u64 __user *)to;				\
+	int i;								\
+									\
+	for (i = 0; i < ELF_NFPREG - 1 ; i++)				\
+		unsafe_put_user(__t->thread.TS_CKFPR(i), &buf[i], label);\
+	unsafe_put_user(__t->thread.ckfp_state.fpscr, &buf[i], label);	\
+} while (0)
+
+#define unsafe_copy_ckvsx_to_user(to, task, label)	do {		\
+	struct task_struct *__t = task;					\
+	u64 __user *buf = (u64 __user *)to;				\
+	int i;								\
+									\
+	for (i = 0; i < ELF_NVSRHALFREG ; i++)				\
+		unsafe_put_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \
+				&buf[i], label);\
+} while (0)
+
+#define unsafe_copy_ckfpr_from_user(task, from, label)	do {		\
+	struct task_struct *__t = task;					\
+	u64 __user *buf = (u64 __user *)from;				\
+	int i;								\
+									\
+	for (i = 0; i < ELF_NFPREG - 1 ; i++)				\
+		unsafe_get_user(__t->thread.TS_CKFPR(i), &buf[i], label);\
+	unsafe_get_user(__t->thread.ckfp_state.fpscr, &buf[i], failed);	\
+} while (0)
+
+#define unsafe_copy_ckvsx_from_user(task, from, label)	do {		\
+	struct task_struct *__t = task;					\
+	u64 __user *buf = (u64 __user *)from;				\
+	int i;								\
+									\
+	for (i = 0; i < ELF_NVSRHALFREG ; i++)				\
+		unsafe_get_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \
+				&buf[i], label);			\
+} while (0)
+#endif
+#elif defined(CONFIG_PPC_FPU_REGS)
+
+#define unsafe_copy_fpr_to_user(to, task, label)		\
+	unsafe_copy_to_user(to, (task)->thread.fp_state.fpr,	\
+			    ELF_NFPREG * sizeof(double), label)
+
+#define unsafe_copy_fpr_from_user(task, from, label)			\
+	unsafe_copy_from_user((task)->thread.fp_state.fpr, from,	\
+			    ELF_NFPREG * sizeof(double), label)
+
+static inline unsigned long
+copy_fpr_to_user(void __user *to, struct task_struct *task)
+{
+	return __copy_to_user(to, task->thread.fp_state.fpr,
+			      ELF_NFPREG * sizeof(double));
+}
+
+static inline unsigned long
+copy_fpr_from_user(struct task_struct *task, void __user *from)
+{
+	return __copy_from_user(task->thread.fp_state.fpr, from,
+			      ELF_NFPREG * sizeof(double));
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+#define unsafe_copy_ckfpr_to_user(to, task, label)		\
+	unsafe_copy_to_user(to, (task)->thread.ckfp_state.fpr,	\
+			    ELF_NFPREG * sizeof(double), label)
+
+inline unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task)
+{
+	return __copy_to_user(to, task->thread.ckfp_state.fpr,
+			      ELF_NFPREG * sizeof(double));
+}
+
+static inline unsigned long
+copy_ckfpr_from_user(struct task_struct *task, void __user *from)
+{
+	return __copy_from_user(task->thread.ckfp_state.fpr, from,
+				ELF_NFPREG * sizeof(double));
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#else
+#define unsafe_copy_fpr_to_user(to, task, label) do { if (0) goto label;} while (0)
+
+#define unsafe_copy_fpr_from_user(task, from, label) do { if (0) goto label;} while (0)
+
+static inline unsigned long
+copy_fpr_to_user(void __user *to, struct task_struct *task)
+{
+	return 0;
+}
+
+static inline unsigned long
+copy_fpr_from_user(struct task_struct *task, void __user *from)
+{
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_PPC64
+
+extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
+			      struct task_struct *tsk);
+
+#else /* CONFIG_PPC64 */
+
+static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
+				     struct task_struct *tsk)
+{
+	return -EFAULT;
+}
+
+#endif /* !defined(CONFIG_PPC64) */
+
+void signal_fault(struct task_struct *tsk, struct pt_regs *regs,
+		  const char *where, void __user *ptr);
+
+#endif  /* _POWERPC_ARCH_SIGNAL_H */
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
new file mode 100644
index 0000000000..7a718ed32b
--- /dev/null
+++ b/arch/powerpc/kernel/signal_32.c
@@ -0,0 +1,1359 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Signal handling for 32bit PPC and 32bit tasks on 64bit PPC
+ *
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ * Copyright (C) 2001 IBM
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ *
+ *  Derived from "arch/i386/kernel/signal.c"
+ *    Copyright (C) 1991, 1992 Linus Torvalds
+ *    1997-11-28  Modified for POSIX.1b signals by Richard Henderson
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/elf.h>
+#include <linux/ptrace.h>
+#include <linux/pagemap.h>
+#include <linux/ratelimit.h>
+#include <linux/syscalls.h>
+#ifdef CONFIG_PPC64
+#include <linux/compat.h>
+#else
+#include <linux/wait.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/binfmts.h>
+#endif
+
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/syscalls.h>
+#include <asm/sigcontext.h>
+#include <asm/vdso.h>
+#include <asm/switch_to.h>
+#include <asm/tm.h>
+#include <asm/asm-prototypes.h>
+#ifdef CONFIG_PPC64
+#include <asm/syscalls_32.h>
+#include <asm/unistd.h>
+#else
+#include <asm/ucontext.h>
+#endif
+
+#include "signal.h"
+
+
+#ifdef CONFIG_PPC64
+#define old_sigaction	old_sigaction32
+#define sigcontext	sigcontext32
+#define mcontext	mcontext32
+#define ucontext	ucontext32
+
+/*
+ * Userspace code may pass a ucontext which doesn't include VSX added
+ * at the end.  We need to check for this case.
+ */
+#define UCONTEXTSIZEWITHOUTVSX \
+		(sizeof(struct ucontext) - sizeof(elf_vsrreghalf_t32))
+
+/*
+ * Returning 0 means we return to userspace via
+ * ret_from_except and thus restore all user
+ * registers from *regs.  This is what we need
+ * to do when a signal has been delivered.
+ */
+
+#define GP_REGS_SIZE	min(sizeof(elf_gregset_t32), sizeof(struct pt_regs32))
+#undef __SIGNAL_FRAMESIZE
+#define __SIGNAL_FRAMESIZE	__SIGNAL_FRAMESIZE32
+#undef ELF_NVRREG
+#define ELF_NVRREG	ELF_NVRREG32
+
+/*
+ * Functions for flipping sigsets (thanks to brain dead generic
+ * implementation that makes things simple for little endian only)
+ */
+#define unsafe_put_sigset_t	unsafe_put_compat_sigset
+#define unsafe_get_sigset_t	unsafe_get_compat_sigset
+
+#define to_user_ptr(p)		ptr_to_compat(p)
+#define from_user_ptr(p)	compat_ptr(p)
+
+static __always_inline int
+__unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame)
+{
+	elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
+	int val, i;
+
+	for (i = 0; i <= PT_RESULT; i ++) {
+		/* Force usr to alway see softe as 1 (interrupts enabled) */
+		if (i == PT_SOFTE)
+			val = 1;
+		else
+			val = gregs[i];
+
+		unsafe_put_user(val, &frame->mc_gregs[i], failed);
+	}
+	return 0;
+
+failed:
+	return 1;
+}
+
+static __always_inline int
+__unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr)
+{
+	elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
+	int i;
+
+	for (i = 0; i <= PT_RESULT; i++) {
+		if ((i == PT_MSR) || (i == PT_SOFTE))
+			continue;
+		unsafe_get_user(gregs[i], &sr->mc_gregs[i], failed);
+	}
+	return 0;
+
+failed:
+	return 1;
+}
+
+#else /* CONFIG_PPC64 */
+
+#define GP_REGS_SIZE	min(sizeof(elf_gregset_t), sizeof(struct pt_regs))
+
+#define unsafe_put_sigset_t(uset, set, label) do {			\
+	sigset_t __user *__us = uset	;				\
+	const sigset_t *__s = set;					\
+									\
+	unsafe_copy_to_user(__us, __s, sizeof(*__us), label);		\
+} while (0)
+
+#define unsafe_get_sigset_t	unsafe_get_user_sigset
+
+#define to_user_ptr(p)		((unsigned long)(p))
+#define from_user_ptr(p)	((void __user *)(p))
+
+static __always_inline int
+__unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame)
+{
+	unsafe_copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE, failed);
+	return 0;
+
+failed:
+	return 1;
+}
+
+static __always_inline
+int __unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr)
+{
+	/* copy up to but not including MSR */
+	unsafe_copy_from_user(regs, &sr->mc_gregs, PT_MSR * sizeof(elf_greg_t), failed);
+
+	/* copy from orig_r3 (the word after the MSR) up to the end */
+	unsafe_copy_from_user(&regs->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3],
+			      GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t), failed);
+
+	return 0;
+
+failed:
+	return 1;
+}
+#endif
+
+#define unsafe_save_general_regs(regs, frame, label) do {	\
+	if (__unsafe_save_general_regs(regs, frame))		\
+		goto label;					\
+} while (0)
+
+#define unsafe_restore_general_regs(regs, frame, label) do {	\
+	if (__unsafe_restore_general_regs(regs, frame))		\
+		goto label;					\
+} while (0)
+
+/*
+ * When we have signals to deliver, we set up on the
+ * user stack, going down from the original stack pointer:
+ *	an ABI gap of 56 words
+ *	an mcontext struct
+ *	a sigcontext struct
+ *	a gap of __SIGNAL_FRAMESIZE bytes
+ *
+ * Each of these things must be a multiple of 16 bytes in size. The following
+ * structure represent all of this except the __SIGNAL_FRAMESIZE gap
+ *
+ */
+struct sigframe {
+	struct sigcontext sctx;		/* the sigcontext */
+	struct mcontext	mctx;		/* all the register values */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	struct sigcontext sctx_transact;
+	struct mcontext	mctx_transact;
+#endif
+	/*
+	 * Programs using the rs6000/xcoff abi can save up to 19 gp
+	 * regs and 18 fp regs below sp before decrementing it.
+	 */
+	int			abigap[56];
+};
+
+/*
+ *  When we have rt signals to deliver, we set up on the
+ *  user stack, going down from the original stack pointer:
+ *	one rt_sigframe struct (siginfo + ucontext + ABI gap)
+ *	a gap of __SIGNAL_FRAMESIZE+16 bytes
+ *  (the +16 is to get the siginfo and ucontext in the same
+ *  positions as in older kernels).
+ *
+ *  Each of these things must be a multiple of 16 bytes in size.
+ *
+ */
+struct rt_sigframe {
+#ifdef CONFIG_PPC64
+	compat_siginfo_t info;
+#else
+	struct siginfo info;
+#endif
+	struct ucontext	uc;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	struct ucontext	uc_transact;
+#endif
+	/*
+	 * Programs using the rs6000/xcoff abi can save up to 19 gp
+	 * regs and 18 fp regs below sp before decrementing it.
+	 */
+	int			abigap[56];
+};
+
+unsigned long get_min_sigframe_size_32(void)
+{
+	return max(sizeof(struct rt_sigframe) + __SIGNAL_FRAMESIZE + 16,
+		   sizeof(struct sigframe) + __SIGNAL_FRAMESIZE);
+}
+
+/*
+ * Save the current user registers on the user stack.
+ * We only save the altivec/spe registers if the process has used
+ * altivec/spe instructions at some point.
+ */
+static void prepare_save_user_regs(int ctx_has_vsx_region)
+{
+	/* Make sure floating point registers are stored in regs */
+	flush_fp_to_thread(current);
+#ifdef CONFIG_ALTIVEC
+	if (current->thread.used_vr)
+		flush_altivec_to_thread(current);
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		current->thread.vrsave = mfspr(SPRN_VRSAVE);
+#endif
+#ifdef CONFIG_VSX
+	if (current->thread.used_vsr && ctx_has_vsx_region)
+		flush_vsx_to_thread(current);
+#endif
+#ifdef CONFIG_SPE
+	if (current->thread.used_spe)
+		flush_spe_to_thread(current);
+#endif
+}
+
+static __always_inline int
+__unsafe_save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
+			struct mcontext __user *tm_frame, int ctx_has_vsx_region)
+{
+	unsigned long msr = regs->msr;
+
+	/* save general registers */
+	unsafe_save_general_regs(regs, frame, failed);
+
+#ifdef CONFIG_ALTIVEC
+	/* save altivec registers */
+	if (current->thread.used_vr) {
+		unsafe_copy_to_user(&frame->mc_vregs, &current->thread.vr_state,
+				    ELF_NVRREG * sizeof(vector128), failed);
+		/* set MSR_VEC in the saved MSR value to indicate that
+		   frame->mc_vregs contains valid data */
+		msr |= MSR_VEC;
+	}
+	/* else assert((regs->msr & MSR_VEC) == 0) */
+
+	/* We always copy to/from vrsave, it's 0 if we don't have or don't
+	 * use altivec. Since VSCR only contains 32 bits saved in the least
+	 * significant bits of a vector, we "cheat" and stuff VRSAVE in the
+	 * most significant bits of that same vector. --BenH
+	 * Note that the current VRSAVE value is in the SPR at this point.
+	 */
+	unsafe_put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32],
+			failed);
+#endif /* CONFIG_ALTIVEC */
+	unsafe_copy_fpr_to_user(&frame->mc_fregs, current, failed);
+
+	/*
+	 * Clear the MSR VSX bit to indicate there is no valid state attached
+	 * to this context, except in the specific case below where we set it.
+	 */
+	msr &= ~MSR_VSX;
+#ifdef CONFIG_VSX
+	/*
+	 * Copy VSR 0-31 upper half from thread_struct to local
+	 * buffer, then write that to userspace.  Also set MSR_VSX in
+	 * the saved MSR value to indicate that frame->mc_vregs
+	 * contains valid data
+	 */
+	if (current->thread.used_vsr && ctx_has_vsx_region) {
+		unsafe_copy_vsx_to_user(&frame->mc_vsregs, current, failed);
+		msr |= MSR_VSX;
+	}
+#endif /* CONFIG_VSX */
+#ifdef CONFIG_SPE
+	/* save spe registers */
+	if (current->thread.used_spe) {
+		unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr,
+				    ELF_NEVRREG * sizeof(u32), failed);
+		/* set MSR_SPE in the saved MSR value to indicate that
+		   frame->mc_vregs contains valid data */
+		msr |= MSR_SPE;
+	}
+	/* else assert((regs->msr & MSR_SPE) == 0) */
+
+	/* We always copy to/from spefscr */
+	unsafe_put_user(current->thread.spefscr,
+			(u32 __user *)&frame->mc_vregs + ELF_NEVRREG, failed);
+#endif /* CONFIG_SPE */
+
+	unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed);
+
+	/* We need to write 0 the MSR top 32 bits in the tm frame so that we
+	 * can check it on the restore to see if TM is active
+	 */
+	if (tm_frame)
+		unsafe_put_user(0, &tm_frame->mc_gregs[PT_MSR], failed);
+
+	return 0;
+
+failed:
+	return 1;
+}
+
+#define unsafe_save_user_regs(regs, frame, tm_frame, has_vsx, label) do { \
+	if (__unsafe_save_user_regs(regs, frame, tm_frame, has_vsx))	\
+		goto label;						\
+} while (0)
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Save the current user registers on the user stack.
+ * We only save the altivec/spe registers if the process has used
+ * altivec/spe instructions at some point.
+ * We also save the transactional registers to a second ucontext in the
+ * frame.
+ *
+ * See __unsafe_save_user_regs() and signal_64.c:setup_tm_sigcontexts().
+ */
+static void prepare_save_tm_user_regs(void)
+{
+	WARN_ON(tm_suspend_disabled);
+
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		current->thread.ckvrsave = mfspr(SPRN_VRSAVE);
+}
+
+static __always_inline int
+save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame,
+			 struct mcontext __user *tm_frame, unsigned long msr)
+{
+	/* Save both sets of general registers */
+	unsafe_save_general_regs(&current->thread.ckpt_regs, frame, failed);
+	unsafe_save_general_regs(regs, tm_frame, failed);
+
+	/* Stash the top half of the 64bit MSR into the 32bit MSR word
+	 * of the transactional mcontext.  This way we have a backward-compatible
+	 * MSR in the 'normal' (checkpointed) mcontext and additionally one can
+	 * also look at what type of transaction (T or S) was active at the
+	 * time of the signal.
+	 */
+	unsafe_put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR], failed);
+
+	/* save altivec registers */
+	if (current->thread.used_vr) {
+		unsafe_copy_to_user(&frame->mc_vregs, &current->thread.ckvr_state,
+				    ELF_NVRREG * sizeof(vector128), failed);
+		if (msr & MSR_VEC)
+			unsafe_copy_to_user(&tm_frame->mc_vregs,
+					    &current->thread.vr_state,
+					    ELF_NVRREG * sizeof(vector128), failed);
+		else
+			unsafe_copy_to_user(&tm_frame->mc_vregs,
+					    &current->thread.ckvr_state,
+					    ELF_NVRREG * sizeof(vector128), failed);
+
+		/* set MSR_VEC in the saved MSR value to indicate that
+		 * frame->mc_vregs contains valid data
+		 */
+		msr |= MSR_VEC;
+	}
+
+	/* We always copy to/from vrsave, it's 0 if we don't have or don't
+	 * use altivec. Since VSCR only contains 32 bits saved in the least
+	 * significant bits of a vector, we "cheat" and stuff VRSAVE in the
+	 * most significant bits of that same vector. --BenH
+	 */
+	unsafe_put_user(current->thread.ckvrsave,
+			(u32 __user *)&frame->mc_vregs[32], failed);
+	if (msr & MSR_VEC)
+		unsafe_put_user(current->thread.vrsave,
+				(u32 __user *)&tm_frame->mc_vregs[32], failed);
+	else
+		unsafe_put_user(current->thread.ckvrsave,
+				(u32 __user *)&tm_frame->mc_vregs[32], failed);
+
+	unsafe_copy_ckfpr_to_user(&frame->mc_fregs, current, failed);
+	if (msr & MSR_FP)
+		unsafe_copy_fpr_to_user(&tm_frame->mc_fregs, current, failed);
+	else
+		unsafe_copy_ckfpr_to_user(&tm_frame->mc_fregs, current, failed);
+
+	/*
+	 * Copy VSR 0-31 upper half from thread_struct to local
+	 * buffer, then write that to userspace.  Also set MSR_VSX in
+	 * the saved MSR value to indicate that frame->mc_vregs
+	 * contains valid data
+	 */
+	if (current->thread.used_vsr) {
+		unsafe_copy_ckvsx_to_user(&frame->mc_vsregs, current, failed);
+		if (msr & MSR_VSX)
+			unsafe_copy_vsx_to_user(&tm_frame->mc_vsregs, current, failed);
+		else
+			unsafe_copy_ckvsx_to_user(&tm_frame->mc_vsregs, current, failed);
+
+		msr |= MSR_VSX;
+	}
+
+	unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed);
+
+	return 0;
+
+failed:
+	return 1;
+}
+#else
+static void prepare_save_tm_user_regs(void) { }
+
+static __always_inline int
+save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame,
+			 struct mcontext __user *tm_frame, unsigned long msr)
+{
+	return 0;
+}
+#endif
+
+#define unsafe_save_tm_user_regs(regs, frame, tm_frame, msr, label) do { \
+	if (save_tm_user_regs_unsafe(regs, frame, tm_frame, msr))	\
+		goto label;						\
+} while (0)
+
+/*
+ * Restore the current user register values from the user stack,
+ * (except for MSR).
+ */
+static long restore_user_regs(struct pt_regs *regs,
+			      struct mcontext __user *sr, int sig)
+{
+	unsigned int save_r2 = 0;
+	unsigned long msr;
+#ifdef CONFIG_VSX
+	int i;
+#endif
+
+	if (!user_read_access_begin(sr, sizeof(*sr)))
+		return 1;
+	/*
+	 * restore general registers but not including MSR or SOFTE. Also
+	 * take care of keeping r2 (TLS) intact if not a signal
+	 */
+	if (!sig)
+		save_r2 = (unsigned int)regs->gpr[2];
+	unsafe_restore_general_regs(regs, sr, failed);
+	set_trap_norestart(regs);
+	unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
+	if (!sig)
+		regs->gpr[2] = (unsigned long) save_r2;
+
+	/* if doing signal return, restore the previous little-endian mode */
+	if (sig)
+		regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
+
+#ifdef CONFIG_ALTIVEC
+	/*
+	 * Force the process to reload the altivec registers from
+	 * current->thread when it next does altivec instructions
+	 */
+	regs_set_return_msr(regs, regs->msr & ~MSR_VEC);
+	if (msr & MSR_VEC) {
+		/* restore altivec registers from the stack */
+		unsafe_copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
+				      sizeof(sr->mc_vregs), failed);
+		current->thread.used_vr = true;
+	} else if (current->thread.used_vr)
+		memset(&current->thread.vr_state, 0,
+		       ELF_NVRREG * sizeof(vector128));
+
+	/* Always get VRSAVE back */
+	unsafe_get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32], failed);
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		mtspr(SPRN_VRSAVE, current->thread.vrsave);
+#endif /* CONFIG_ALTIVEC */
+	unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
+
+#ifdef CONFIG_VSX
+	/*
+	 * Force the process to reload the VSX registers from
+	 * current->thread when it next does VSX instruction.
+	 */
+	regs_set_return_msr(regs, regs->msr & ~MSR_VSX);
+	if (msr & MSR_VSX) {
+		/*
+		 * Restore altivec registers from the stack to a local
+		 * buffer, then write this out to the thread_struct
+		 */
+		unsafe_copy_vsx_from_user(current, &sr->mc_vsregs, failed);
+		current->thread.used_vsr = true;
+	} else if (current->thread.used_vsr)
+		for (i = 0; i < 32 ; i++)
+			current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+#endif /* CONFIG_VSX */
+	/*
+	 * force the process to reload the FP registers from
+	 * current->thread when it next does FP instructions
+	 */
+	regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
+
+#ifdef CONFIG_SPE
+	/*
+	 * Force the process to reload the spe registers from
+	 * current->thread when it next does spe instructions.
+	 * Since this is user ABI, we must enforce the sizing.
+	 */
+	BUILD_BUG_ON(sizeof(current->thread.spe) != ELF_NEVRREG * sizeof(u32));
+	regs_set_return_msr(regs, regs->msr & ~MSR_SPE);
+	if (msr & MSR_SPE) {
+		/* restore spe registers from the stack */
+		unsafe_copy_from_user(&current->thread.spe, &sr->mc_vregs,
+				      sizeof(current->thread.spe), failed);
+		current->thread.used_spe = true;
+	} else if (current->thread.used_spe)
+		memset(&current->thread.spe, 0, sizeof(current->thread.spe));
+
+	/* Always get SPEFSCR back */
+	unsafe_get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed);
+#endif /* CONFIG_SPE */
+
+	user_read_access_end();
+	return 0;
+
+failed:
+	user_read_access_end();
+	return 1;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Restore the current user register values from the user stack, except for
+ * MSR, and recheckpoint the original checkpointed register state for processes
+ * in transactions.
+ */
+static long restore_tm_user_regs(struct pt_regs *regs,
+				 struct mcontext __user *sr,
+				 struct mcontext __user *tm_sr)
+{
+	unsigned long msr, msr_hi;
+	int i;
+
+	if (tm_suspend_disabled)
+		return 1;
+	/*
+	 * restore general registers but not including MSR or SOFTE. Also
+	 * take care of keeping r2 (TLS) intact if not a signal.
+	 * See comment in signal_64.c:restore_tm_sigcontexts();
+	 * TFHAR is restored from the checkpointed NIP; TEXASR and TFIAR
+	 * were set by the signal delivery.
+	 */
+	if (!user_read_access_begin(sr, sizeof(*sr)))
+		return 1;
+
+	unsafe_restore_general_regs(&current->thread.ckpt_regs, sr, failed);
+	unsafe_get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP], failed);
+	unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
+
+	/* Restore the previous little-endian mode */
+	regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
+
+	regs_set_return_msr(regs, regs->msr & ~MSR_VEC);
+	if (msr & MSR_VEC) {
+		/* restore altivec registers from the stack */
+		unsafe_copy_from_user(&current->thread.ckvr_state, &sr->mc_vregs,
+				      sizeof(sr->mc_vregs), failed);
+		current->thread.used_vr = true;
+	} else if (current->thread.used_vr) {
+		memset(&current->thread.vr_state, 0,
+		       ELF_NVRREG * sizeof(vector128));
+		memset(&current->thread.ckvr_state, 0,
+		       ELF_NVRREG * sizeof(vector128));
+	}
+
+	/* Always get VRSAVE back */
+	unsafe_get_user(current->thread.ckvrsave,
+			(u32 __user *)&sr->mc_vregs[32], failed);
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		mtspr(SPRN_VRSAVE, current->thread.ckvrsave);
+
+	regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
+
+	unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
+
+	regs_set_return_msr(regs, regs->msr & ~MSR_VSX);
+	if (msr & MSR_VSX) {
+		/*
+		 * Restore altivec registers from the stack to a local
+		 * buffer, then write this out to the thread_struct
+		 */
+		unsafe_copy_ckvsx_from_user(current, &sr->mc_vsregs, failed);
+		current->thread.used_vsr = true;
+	} else if (current->thread.used_vsr)
+		for (i = 0; i < 32 ; i++) {
+			current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+			current->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+		}
+
+	user_read_access_end();
+
+	if (!user_read_access_begin(tm_sr, sizeof(*tm_sr)))
+		return 1;
+
+	unsafe_restore_general_regs(regs, tm_sr, failed);
+
+	/* restore altivec registers from the stack */
+	if (msr & MSR_VEC)
+		unsafe_copy_from_user(&current->thread.vr_state, &tm_sr->mc_vregs,
+				      sizeof(sr->mc_vregs), failed);
+
+	/* Always get VRSAVE back */
+	unsafe_get_user(current->thread.vrsave,
+			(u32 __user *)&tm_sr->mc_vregs[32], failed);
+
+	unsafe_copy_ckfpr_from_user(current, &tm_sr->mc_fregs, failed);
+
+	if (msr & MSR_VSX) {
+		/*
+		 * Restore altivec registers from the stack to a local
+		 * buffer, then write this out to the thread_struct
+		 */
+		unsafe_copy_vsx_from_user(current, &tm_sr->mc_vsregs, failed);
+		current->thread.used_vsr = true;
+	}
+
+	/* Get the top half of the MSR from the user context */
+	unsafe_get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR], failed);
+	msr_hi <<= 32;
+
+	user_read_access_end();
+
+	/* If TM bits are set to the reserved value, it's an invalid context */
+	if (MSR_TM_RESV(msr_hi))
+		return 1;
+
+	/*
+	 * Disabling preemption, since it is unsafe to be preempted
+	 * with MSR[TS] set without recheckpointing.
+	 */
+	preempt_disable();
+
+	/*
+	 * CAUTION:
+	 * After regs->MSR[TS] being updated, make sure that get_user(),
+	 * put_user() or similar functions are *not* called. These
+	 * functions can generate page faults which will cause the process
+	 * to be de-scheduled with MSR[TS] set but without calling
+	 * tm_recheckpoint(). This can cause a bug.
+	 *
+	 * Pull in the MSR TM bits from the user context
+	 */
+	regs_set_return_msr(regs, (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK));
+	/* Now, recheckpoint.  This loads up all of the checkpointed (older)
+	 * registers, including FP and V[S]Rs.  After recheckpointing, the
+	 * transactional versions should be loaded.
+	 */
+	tm_enable();
+	/* Make sure the transaction is marked as failed */
+	current->thread.tm_texasr |= TEXASR_FS;
+	/* This loads the checkpointed FP/VEC state, if used */
+	tm_recheckpoint(&current->thread);
+
+	/* This loads the speculative FP/VEC state, if used */
+	msr_check_and_set(msr & (MSR_FP | MSR_VEC));
+	if (msr & MSR_FP) {
+		load_fp_state(&current->thread.fp_state);
+		regs_set_return_msr(regs, regs->msr | (MSR_FP | current->thread.fpexc_mode));
+	}
+	if (msr & MSR_VEC) {
+		load_vr_state(&current->thread.vr_state);
+		regs_set_return_msr(regs, regs->msr | MSR_VEC);
+	}
+
+	preempt_enable();
+
+	return 0;
+
+failed:
+	user_read_access_end();
+	return 1;
+}
+#else
+static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *sr,
+				 struct mcontext __user *tm_sr)
+{
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_PPC64
+
+#define copy_siginfo_to_user	copy_siginfo_to_user32
+
+#endif /* CONFIG_PPC64 */
+
+/*
+ * Set up a signal frame for a "real-time" signal handler
+ * (one which gets siginfo).
+ */
+int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
+		       struct task_struct *tsk)
+{
+	struct rt_sigframe __user *frame;
+	struct mcontext __user *mctx;
+	struct mcontext __user *tm_mctx = NULL;
+	unsigned long newsp = 0;
+	unsigned long tramp;
+	struct pt_regs *regs = tsk->thread.regs;
+	/* Save the thread's msr before get_tm_stackpointer() changes it */
+	unsigned long msr = regs->msr;
+
+	/* Set up Signal Frame */
+	frame = get_sigframe(ksig, tsk, sizeof(*frame), 1);
+	mctx = &frame->uc.uc_mcontext;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	tm_mctx = &frame->uc_transact.uc_mcontext;
+#endif
+	if (MSR_TM_ACTIVE(msr))
+		prepare_save_tm_user_regs();
+	else
+		prepare_save_user_regs(1);
+
+	if (!user_access_begin(frame, sizeof(*frame)))
+		goto badframe;
+
+	/* Put the siginfo & fill in most of the ucontext */
+	unsafe_put_user(0, &frame->uc.uc_flags, failed);
+#ifdef CONFIG_PPC64
+	unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->gpr[1], failed);
+#else
+	unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], failed);
+#endif
+	unsafe_put_user(to_user_ptr(&frame->uc.uc_mcontext), &frame->uc.uc_regs, failed);
+
+	if (MSR_TM_ACTIVE(msr)) {
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+		unsafe_put_user((unsigned long)&frame->uc_transact,
+				&frame->uc.uc_link, failed);
+		unsafe_put_user((unsigned long)tm_mctx,
+				&frame->uc_transact.uc_regs, failed);
+#endif
+		unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, failed);
+	} else {
+		unsafe_put_user(0, &frame->uc.uc_link, failed);
+		unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed);
+	}
+
+	/* Save user registers on the stack */
+	if (tsk->mm->context.vdso) {
+		tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp_rt32);
+	} else {
+		tramp = (unsigned long)mctx->mc_pad;
+		unsafe_put_user(PPC_RAW_LI(_R0, __NR_rt_sigreturn), &mctx->mc_pad[0], failed);
+		unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed);
+		asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
+	}
+	unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, failed);
+
+	user_access_end();
+
+	if (copy_siginfo_to_user(&frame->info, &ksig->info))
+		goto badframe;
+
+	regs->link = tramp;
+
+#ifdef CONFIG_PPC_FPU_REGS
+	tsk->thread.fp_state.fpscr = 0;	/* turn off all fp exceptions */
+#endif
+
+	/* create a stack frame for the caller of the handler */
+	newsp = ((unsigned long)frame) - (__SIGNAL_FRAMESIZE + 16);
+	if (put_user(regs->gpr[1], (u32 __user *)newsp))
+		goto badframe;
+
+	/* Fill registers for signal handler */
+	regs->gpr[1] = newsp;
+	regs->gpr[3] = ksig->sig;
+	regs->gpr[4] = (unsigned long)&frame->info;
+	regs->gpr[5] = (unsigned long)&frame->uc;
+	regs->gpr[6] = (unsigned long)frame;
+	regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler);
+	/* enter the signal handler in native-endian mode */
+	regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE));
+
+	return 0;
+
+failed:
+	user_access_end();
+
+badframe:
+	signal_fault(tsk, regs, "handle_rt_signal32", frame);
+
+	return 1;
+}
+
+/*
+ * OK, we're invoking a handler
+ */
+int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
+		struct task_struct *tsk)
+{
+	struct sigcontext __user *sc;
+	struct sigframe __user *frame;
+	struct mcontext __user *mctx;
+	struct mcontext __user *tm_mctx = NULL;
+	unsigned long newsp = 0;
+	unsigned long tramp;
+	struct pt_regs *regs = tsk->thread.regs;
+	/* Save the thread's msr before get_tm_stackpointer() changes it */
+	unsigned long msr = regs->msr;
+
+	/* Set up Signal Frame */
+	frame = get_sigframe(ksig, tsk, sizeof(*frame), 1);
+	mctx = &frame->mctx;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	tm_mctx = &frame->mctx_transact;
+#endif
+	if (MSR_TM_ACTIVE(msr))
+		prepare_save_tm_user_regs();
+	else
+		prepare_save_user_regs(1);
+
+	if (!user_access_begin(frame, sizeof(*frame)))
+		goto badframe;
+	sc = (struct sigcontext __user *) &frame->sctx;
+
+#if _NSIG != 64
+#error "Please adjust handle_signal()"
+#endif
+	unsafe_put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler, failed);
+	unsafe_put_user(oldset->sig[0], &sc->oldmask, failed);
+#ifdef CONFIG_PPC64
+	unsafe_put_user((oldset->sig[0] >> 32), &sc->_unused[3], failed);
+#else
+	unsafe_put_user(oldset->sig[1], &sc->_unused[3], failed);
+#endif
+	unsafe_put_user(to_user_ptr(mctx), &sc->regs, failed);
+	unsafe_put_user(ksig->sig, &sc->signal, failed);
+
+	if (MSR_TM_ACTIVE(msr))
+		unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, failed);
+	else
+		unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed);
+
+	if (tsk->mm->context.vdso) {
+		tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp32);
+	} else {
+		tramp = (unsigned long)mctx->mc_pad;
+		unsafe_put_user(PPC_RAW_LI(_R0, __NR_sigreturn), &mctx->mc_pad[0], failed);
+		unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed);
+		asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
+	}
+	user_access_end();
+
+	regs->link = tramp;
+
+#ifdef CONFIG_PPC_FPU_REGS
+	tsk->thread.fp_state.fpscr = 0;	/* turn off all fp exceptions */
+#endif
+
+	/* create a stack frame for the caller of the handler */
+	newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
+	if (put_user(regs->gpr[1], (u32 __user *)newsp))
+		goto badframe;
+
+	regs->gpr[1] = newsp;
+	regs->gpr[3] = ksig->sig;
+	regs->gpr[4] = (unsigned long) sc;
+	regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler);
+	/* enter the signal handler in native-endian mode */
+	regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE));
+
+	return 0;
+
+failed:
+	user_access_end();
+
+badframe:
+	signal_fault(tsk, regs, "handle_signal32", frame);
+
+	return 1;
+}
+
+static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int sig)
+{
+	sigset_t set;
+	struct mcontext __user *mcp;
+
+	if (!user_read_access_begin(ucp, sizeof(*ucp)))
+		return -EFAULT;
+
+	unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);
+#ifdef CONFIG_PPC64
+	{
+		u32 cmcp;
+
+		unsafe_get_user(cmcp, &ucp->uc_regs, failed);
+		mcp = (struct mcontext __user *)(u64)cmcp;
+	}
+#else
+	unsafe_get_user(mcp, &ucp->uc_regs, failed);
+#endif
+	user_read_access_end();
+
+	set_current_blocked(&set);
+	if (restore_user_regs(regs, mcp, sig))
+		return -EFAULT;
+
+	return 0;
+
+failed:
+	user_read_access_end();
+	return -EFAULT;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static int do_setcontext_tm(struct ucontext __user *ucp,
+			    struct ucontext __user *tm_ucp,
+			    struct pt_regs *regs)
+{
+	sigset_t set;
+	struct mcontext __user *mcp;
+	struct mcontext __user *tm_mcp;
+	u32 cmcp;
+	u32 tm_cmcp;
+
+	if (!user_read_access_begin(ucp, sizeof(*ucp)))
+		return -EFAULT;
+
+	unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);
+	unsafe_get_user(cmcp, &ucp->uc_regs, failed);
+
+	user_read_access_end();
+
+	if (__get_user(tm_cmcp, &tm_ucp->uc_regs))
+		return -EFAULT;
+	mcp = (struct mcontext __user *)(u64)cmcp;
+	tm_mcp = (struct mcontext __user *)(u64)tm_cmcp;
+	/* no need to check access_ok(mcp), since mcp < 4GB */
+
+	set_current_blocked(&set);
+	if (restore_tm_user_regs(regs, mcp, tm_mcp))
+		return -EFAULT;
+
+	return 0;
+
+failed:
+	user_read_access_end();
+	return -EFAULT;
+}
+#endif
+
+#ifdef CONFIG_PPC64
+COMPAT_SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
+		       struct ucontext __user *, new_ctx, int, ctx_size)
+#else
+SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
+		       struct ucontext __user *, new_ctx, long, ctx_size)
+#endif
+{
+	struct pt_regs *regs = current_pt_regs();
+	int ctx_has_vsx_region = 0;
+
+#ifdef CONFIG_PPC64
+	unsigned long new_msr = 0;
+
+	if (new_ctx) {
+		struct mcontext __user *mcp;
+		u32 cmcp;
+
+		/*
+		 * Get pointer to the real mcontext.  No need for
+		 * access_ok since we are dealing with compat
+		 * pointers.
+		 */
+		if (__get_user(cmcp, &new_ctx->uc_regs))
+			return -EFAULT;
+		mcp = (struct mcontext __user *)(u64)cmcp;
+		if (__get_user(new_msr, &mcp->mc_gregs[PT_MSR]))
+			return -EFAULT;
+	}
+	/*
+	 * Check that the context is not smaller than the original
+	 * size (with VMX but without VSX)
+	 */
+	if (ctx_size < UCONTEXTSIZEWITHOUTVSX)
+		return -EINVAL;
+	/*
+	 * If the new context state sets the MSR VSX bits but
+	 * it doesn't provide VSX state.
+	 */
+	if ((ctx_size < sizeof(struct ucontext)) &&
+	    (new_msr & MSR_VSX))
+		return -EINVAL;
+	/* Does the context have enough room to store VSX data? */
+	if (ctx_size >= sizeof(struct ucontext))
+		ctx_has_vsx_region = 1;
+#else
+	/* Context size is for future use. Right now, we only make sure
+	 * we are passed something we understand
+	 */
+	if (ctx_size < sizeof(struct ucontext))
+		return -EINVAL;
+#endif
+	if (old_ctx != NULL) {
+		struct mcontext __user *mctx;
+
+		/*
+		 * old_ctx might not be 16-byte aligned, in which
+		 * case old_ctx->uc_mcontext won't be either.
+		 * Because we have the old_ctx->uc_pad2 field
+		 * before old_ctx->uc_mcontext, we need to round down
+		 * from &old_ctx->uc_mcontext to a 16-byte boundary.
+		 */
+		mctx = (struct mcontext __user *)
+			((unsigned long) &old_ctx->uc_mcontext & ~0xfUL);
+		prepare_save_user_regs(ctx_has_vsx_region);
+		if (!user_write_access_begin(old_ctx, ctx_size))
+			return -EFAULT;
+		unsafe_save_user_regs(regs, mctx, NULL, ctx_has_vsx_region, failed);
+		unsafe_put_sigset_t(&old_ctx->uc_sigmask, &current->blocked, failed);
+		unsafe_put_user(to_user_ptr(mctx), &old_ctx->uc_regs, failed);
+		user_write_access_end();
+	}
+	if (new_ctx == NULL)
+		return 0;
+	if (!access_ok(new_ctx, ctx_size) ||
+	    fault_in_readable((char __user *)new_ctx, ctx_size))
+		return -EFAULT;
+
+	/*
+	 * If we get a fault copying the context into the kernel's
+	 * image of the user's registers, we can't just return -EFAULT
+	 * because the user's registers will be corrupted.  For instance
+	 * the NIP value may have been updated but not some of the
+	 * other registers.  Given that we have done the access_ok
+	 * and successfully read the first and last bytes of the region
+	 * above, this should only happen in an out-of-memory situation
+	 * or if another thread unmaps the region containing the context.
+	 * We kill the task with a SIGSEGV in this situation.
+	 */
+	if (do_setcontext(new_ctx, regs, 0)) {
+		force_exit_sig(SIGSEGV);
+		return -EFAULT;
+	}
+
+	set_thread_flag(TIF_RESTOREALL);
+	return 0;
+
+failed:
+	user_write_access_end();
+	return -EFAULT;
+}
+
+#ifdef CONFIG_PPC64
+COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
+#else
+SYSCALL_DEFINE0(rt_sigreturn)
+#endif
+{
+	struct rt_sigframe __user *rt_sf;
+	struct pt_regs *regs = current_pt_regs();
+	int tm_restore = 0;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	struct ucontext __user *uc_transact;
+	unsigned long msr_hi;
+	unsigned long tmp;
+#endif
+	/* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	rt_sf = (struct rt_sigframe __user *)
+		(regs->gpr[1] + __SIGNAL_FRAMESIZE + 16);
+	if (!access_ok(rt_sf, sizeof(*rt_sf)))
+		goto bad;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/*
+	 * If there is a transactional state then throw it away.
+	 * The purpose of a sigreturn is to destroy all traces of the
+	 * signal frame, this includes any transactional state created
+	 * within in. We only check for suspended as we can never be
+	 * active in the kernel, we are active, there is nothing better to
+	 * do than go ahead and Bad Thing later.
+	 * The cause is not important as there will never be a
+	 * recheckpoint so it's not user visible.
+	 */
+	if (MSR_TM_SUSPENDED(mfmsr()))
+		tm_reclaim_current(0);
+
+	if (__get_user(tmp, &rt_sf->uc.uc_link))
+		goto bad;
+	uc_transact = (struct ucontext __user *)(uintptr_t)tmp;
+	if (uc_transact) {
+		u32 cmcp;
+		struct mcontext __user *mcp;
+
+		if (__get_user(cmcp, &uc_transact->uc_regs))
+			return -EFAULT;
+		mcp = (struct mcontext __user *)(u64)cmcp;
+		/* The top 32 bits of the MSR are stashed in the transactional
+		 * ucontext. */
+		if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR]))
+			goto bad;
+
+		if (MSR_TM_ACTIVE(msr_hi<<32)) {
+			/* Trying to start TM on non TM system */
+			if (!cpu_has_feature(CPU_FTR_TM))
+				goto bad;
+			/* We only recheckpoint on return if we're
+			 * transaction.
+			 */
+			tm_restore = 1;
+			if (do_setcontext_tm(&rt_sf->uc, uc_transact, regs))
+				goto bad;
+		}
+	}
+	if (!tm_restore) {
+		/*
+		 * Unset regs->msr because ucontext MSR TS is not
+		 * set, and recheckpoint was not called. This avoid
+		 * hitting a TM Bad thing at RFID
+		 */
+		regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK);
+	}
+	/* Fall through, for non-TM restore */
+#endif
+	if (!tm_restore)
+		if (do_setcontext(&rt_sf->uc, regs, 1))
+			goto bad;
+
+	/*
+	 * It's not clear whether or why it is desirable to save the
+	 * sigaltstack setting on signal delivery and restore it on
+	 * signal return.  But other architectures do this and we have
+	 * always done it up until now so it is probably better not to
+	 * change it.  -- paulus
+	 */
+#ifdef CONFIG_PPC64
+	if (compat_restore_altstack(&rt_sf->uc.uc_stack))
+		goto bad;
+#else
+	if (restore_altstack(&rt_sf->uc.uc_stack))
+		goto bad;
+#endif
+	set_thread_flag(TIF_RESTOREALL);
+	return 0;
+
+ bad:
+	signal_fault(current, regs, "sys_rt_sigreturn", rt_sf);
+
+	force_sig(SIGSEGV);
+	return 0;
+}
+
+#ifdef CONFIG_PPC32
+SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
+			 int, ndbg, struct sig_dbg_op __user *, dbg)
+{
+	struct pt_regs *regs = current_pt_regs();
+	struct sig_dbg_op op;
+	int i;
+	unsigned long new_msr = regs->msr;
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+	unsigned long new_dbcr0 = current->thread.debug.dbcr0;
+#endif
+
+	for (i=0; i<ndbg; i++) {
+		if (copy_from_user(&op, dbg + i, sizeof(op)))
+			return -EFAULT;
+		switch (op.dbg_type) {
+		case SIG_DBG_SINGLE_STEPPING:
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+			if (op.dbg_value) {
+				new_msr |= MSR_DE;
+				new_dbcr0 |= (DBCR0_IDM | DBCR0_IC);
+			} else {
+				new_dbcr0 &= ~DBCR0_IC;
+				if (!DBCR_ACTIVE_EVENTS(new_dbcr0,
+						current->thread.debug.dbcr1)) {
+					new_msr &= ~MSR_DE;
+					new_dbcr0 &= ~DBCR0_IDM;
+				}
+			}
+#else
+			if (op.dbg_value)
+				new_msr |= MSR_SE;
+			else
+				new_msr &= ~MSR_SE;
+#endif
+			break;
+		case SIG_DBG_BRANCH_TRACING:
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+			return -EINVAL;
+#else
+			if (op.dbg_value)
+				new_msr |= MSR_BE;
+			else
+				new_msr &= ~MSR_BE;
+#endif
+			break;
+
+		default:
+			return -EINVAL;
+		}
+	}
+
+	/* We wait until here to actually install the values in the
+	   registers so if we fail in the above loop, it will not
+	   affect the contents of these registers.  After this point,
+	   failure is a problem, anyway, and it's very unlikely unless
+	   the user is really doing something wrong. */
+	regs_set_return_msr(regs, new_msr);
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+	current->thread.debug.dbcr0 = new_dbcr0;
+#endif
+
+	if (!access_ok(ctx, sizeof(*ctx)) ||
+	    fault_in_readable((char __user *)ctx, sizeof(*ctx)))
+		return -EFAULT;
+
+	/*
+	 * If we get a fault copying the context into the kernel's
+	 * image of the user's registers, we can't just return -EFAULT
+	 * because the user's registers will be corrupted.  For instance
+	 * the NIP value may have been updated but not some of the
+	 * other registers.  Given that we have done the access_ok
+	 * and successfully read the first and last bytes of the region
+	 * above, this should only happen in an out-of-memory situation
+	 * or if another thread unmaps the region containing the context.
+	 * We kill the task with a SIGSEGV in this situation.
+	 */
+	if (do_setcontext(ctx, regs, 1)) {
+		signal_fault(current, regs, "sys_debug_setcontext", ctx);
+
+		force_sig(SIGSEGV);
+		goto out;
+	}
+
+	/*
+	 * It's not clear whether or why it is desirable to save the
+	 * sigaltstack setting on signal delivery and restore it on
+	 * signal return.  But other architectures do this and we have
+	 * always done it up until now so it is probably better not to
+	 * change it.  -- paulus
+	 */
+	restore_altstack(&ctx->uc_stack);
+
+	set_thread_flag(TIF_RESTOREALL);
+ out:
+	return 0;
+}
+#endif
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+#ifdef CONFIG_PPC64
+COMPAT_SYSCALL_DEFINE0(sigreturn)
+#else
+SYSCALL_DEFINE0(sigreturn)
+#endif
+{
+	struct pt_regs *regs = current_pt_regs();
+	struct sigframe __user *sf;
+	struct sigcontext __user *sc;
+	struct sigcontext sigctx;
+	struct mcontext __user *sr;
+	sigset_t set;
+	struct mcontext __user *mcp;
+	struct mcontext __user *tm_mcp = NULL;
+	unsigned long long msr_hi = 0;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE);
+	sc = &sf->sctx;
+	if (copy_from_user(&sigctx, sc, sizeof(sigctx)))
+		goto badframe;
+
+#ifdef CONFIG_PPC64
+	/*
+	 * Note that PPC32 puts the upper 32 bits of the sigmask in the
+	 * unused part of the signal stackframe
+	 */
+	set.sig[0] = sigctx.oldmask + ((long)(sigctx._unused[3]) << 32);
+#else
+	set.sig[0] = sigctx.oldmask;
+	set.sig[1] = sigctx._unused[3];
+#endif
+	set_current_blocked(&set);
+
+	mcp = (struct mcontext __user *)&sf->mctx;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	tm_mcp = (struct mcontext __user *)&sf->mctx_transact;
+	if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR]))
+		goto badframe;
+#endif
+	if (MSR_TM_ACTIVE(msr_hi<<32)) {
+		if (!cpu_has_feature(CPU_FTR_TM))
+			goto badframe;
+		if (restore_tm_user_regs(regs, mcp, tm_mcp))
+			goto badframe;
+	} else {
+		sr = (struct mcontext __user *)from_user_ptr(sigctx.regs);
+		if (restore_user_regs(regs, sr, 1)) {
+			signal_fault(current, regs, "sys_sigreturn", sr);
+
+			force_sig(SIGSEGV);
+			return 0;
+		}
+	}
+
+	set_thread_flag(TIF_RESTOREALL);
+	return 0;
+
+badframe:
+	signal_fault(current, regs, "sys_sigreturn", sc);
+
+	force_sig(SIGSEGV);
+	return 0;
+}
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
new file mode 100644
index 0000000000..86bb5bb4c1
--- /dev/null
+++ b/arch/powerpc/kernel/signal_64.c
@@ -0,0 +1,977 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  PowerPC version 
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Derived from "arch/i386/kernel/signal.c"
+ *    Copyright (C) 1991, 1992 Linus Torvalds
+ *    1997-11-28  Modified for POSIX.1b signals by Richard Henderson
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/elf.h>
+#include <linux/ptrace.h>
+#include <linux/ratelimit.h>
+#include <linux/syscalls.h>
+#include <linux/pagemap.h>
+
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <linux/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/cacheflush.h>
+#include <asm/syscalls.h>
+#include <asm/vdso.h>
+#include <asm/switch_to.h>
+#include <asm/tm.h>
+#include <asm/asm-prototypes.h>
+
+#include "signal.h"
+
+
+#define GP_REGS_SIZE	min(sizeof(elf_gregset_t), sizeof(struct pt_regs))
+#define FP_REGS_SIZE	sizeof(elf_fpregset_t)
+
+#define TRAMP_TRACEBACK	4
+#define TRAMP_SIZE	7
+
+/*
+ * When we have signals to deliver, we set up on the user stack,
+ * going down from the original stack pointer:
+ *	1) a rt_sigframe struct which contains the ucontext	
+ *	2) a gap of __SIGNAL_FRAMESIZE bytes which acts as a dummy caller
+ *	   frame for the signal handler.
+ */
+
+struct rt_sigframe {
+	/* sys_rt_sigreturn requires the ucontext be the first field */
+	struct ucontext uc;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	struct ucontext uc_transact;
+#endif
+	unsigned long _unused[2];
+	unsigned int tramp[TRAMP_SIZE];
+	struct siginfo __user *pinfo;
+	void __user *puc;
+	struct siginfo info;
+	/* New 64 bit little-endian ABI allows redzone of 512 bytes below sp */
+	char abigap[USER_REDZONE_SIZE];
+} __attribute__ ((aligned (16)));
+
+unsigned long get_min_sigframe_size_64(void)
+{
+	return sizeof(struct rt_sigframe) + __SIGNAL_FRAMESIZE;
+}
+
+/*
+ * This computes a quad word aligned pointer inside the vmx_reserve array
+ * element. For historical reasons sigcontext might not be quad word aligned,
+ * but the location we write the VMX regs to must be. See the comment in
+ * sigcontext for more detail.
+ */
+#ifdef CONFIG_ALTIVEC
+static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user *sc)
+{
+	return (elf_vrreg_t __user *) (((unsigned long)sc->vmx_reserve + 15) & ~0xful);
+}
+#endif
+
+static void prepare_setup_sigcontext(struct task_struct *tsk)
+{
+#ifdef CONFIG_ALTIVEC
+	/* save altivec registers */
+	if (tsk->thread.used_vr)
+		flush_altivec_to_thread(tsk);
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		tsk->thread.vrsave = mfspr(SPRN_VRSAVE);
+#endif /* CONFIG_ALTIVEC */
+
+	flush_fp_to_thread(tsk);
+
+#ifdef CONFIG_VSX
+	if (tsk->thread.used_vsr)
+		flush_vsx_to_thread(tsk);
+#endif /* CONFIG_VSX */
+}
+
+/*
+ * Set up the sigcontext for the signal frame.
+ */
+
+#define unsafe_setup_sigcontext(sc, tsk, signr, set, handler, ctx_has_vsx_region, label)\
+do {											\
+	if (__unsafe_setup_sigcontext(sc, tsk, signr, set, handler, ctx_has_vsx_region))\
+		goto label;								\
+} while (0)
+static long notrace __unsafe_setup_sigcontext(struct sigcontext __user *sc,
+					struct task_struct *tsk, int signr, sigset_t *set,
+					unsigned long handler, int ctx_has_vsx_region)
+{
+	/* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
+	 * process never used altivec yet (MSR_VEC is zero in pt_regs of
+	 * the context). This is very important because we must ensure we
+	 * don't lose the VRSAVE content that may have been set prior to
+	 * the process doing its first vector operation
+	 * Userland shall check AT_HWCAP to know whether it can rely on the
+	 * v_regs pointer or not
+	 */
+#ifdef CONFIG_ALTIVEC
+	elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc);
+#endif
+	struct pt_regs *regs = tsk->thread.regs;
+	unsigned long msr = regs->msr;
+	/* Force usr to always see softe as 1 (interrupts enabled) */
+	unsigned long softe = 0x1;
+
+	BUG_ON(tsk != current);
+
+#ifdef CONFIG_ALTIVEC
+	unsafe_put_user(v_regs, &sc->v_regs, efault_out);
+
+	/* save altivec registers */
+	if (tsk->thread.used_vr) {
+		/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
+		unsafe_copy_to_user(v_regs, &tsk->thread.vr_state,
+				    33 * sizeof(vector128), efault_out);
+		/* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg)
+		 * contains valid data.
+		 */
+		msr |= MSR_VEC;
+	}
+	/* We always copy to/from vrsave, it's 0 if we don't have or don't
+	 * use altivec.
+	 */
+	unsafe_put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out);
+#else /* CONFIG_ALTIVEC */
+	unsafe_put_user(0, &sc->v_regs, efault_out);
+#endif /* CONFIG_ALTIVEC */
+	/* copy fpr regs and fpscr */
+	unsafe_copy_fpr_to_user(&sc->fp_regs, tsk, efault_out);
+
+	/*
+	 * Clear the MSR VSX bit to indicate there is no valid state attached
+	 * to this context, except in the specific case below where we set it.
+	 */
+	msr &= ~MSR_VSX;
+#ifdef CONFIG_VSX
+	/*
+	 * Copy VSX low doubleword to local buffer for formatting,
+	 * then out to userspace.  Update v_regs to point after the
+	 * VMX data.
+	 */
+	if (tsk->thread.used_vsr && ctx_has_vsx_region) {
+		v_regs += ELF_NVRREG;
+		unsafe_copy_vsx_to_user(v_regs, tsk, efault_out);
+		/* set MSR_VSX in the MSR value in the frame to
+		 * indicate that sc->vs_reg) contains valid data.
+		 */
+		msr |= MSR_VSX;
+	}
+#endif /* CONFIG_VSX */
+	unsafe_put_user(&sc->gp_regs, &sc->regs, efault_out);
+	unsafe_copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE, efault_out);
+	unsafe_put_user(msr, &sc->gp_regs[PT_MSR], efault_out);
+	unsafe_put_user(softe, &sc->gp_regs[PT_SOFTE], efault_out);
+	unsafe_put_user(signr, &sc->signal, efault_out);
+	unsafe_put_user(handler, &sc->handler, efault_out);
+	if (set != NULL)
+		unsafe_put_user(set->sig[0], &sc->oldmask, efault_out);
+
+	return 0;
+
+efault_out:
+	return -EFAULT;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * As above, but Transactional Memory is in use, so deliver sigcontexts
+ * containing checkpointed and transactional register states.
+ *
+ * To do this, we treclaim (done before entering here) to gather both sets of
+ * registers and set up the 'normal' sigcontext registers with rolled-back
+ * register values such that a simple signal handler sees a correct
+ * checkpointed register state.  If interested, a TM-aware sighandler can
+ * examine the transactional registers in the 2nd sigcontext to determine the
+ * real origin of the signal.
+ */
+static long setup_tm_sigcontexts(struct sigcontext __user *sc,
+				 struct sigcontext __user *tm_sc,
+				 struct task_struct *tsk,
+				 int signr, sigset_t *set, unsigned long handler,
+				 unsigned long msr)
+{
+	/* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
+	 * process never used altivec yet (MSR_VEC is zero in pt_regs of
+	 * the context). This is very important because we must ensure we
+	 * don't lose the VRSAVE content that may have been set prior to
+	 * the process doing its first vector operation
+	 * Userland shall check AT_HWCAP to know wether it can rely on the
+	 * v_regs pointer or not.
+	 */
+#ifdef CONFIG_ALTIVEC
+	elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc);
+	elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc);
+#endif
+	struct pt_regs *regs = tsk->thread.regs;
+	long err = 0;
+
+	BUG_ON(tsk != current);
+
+	BUG_ON(!MSR_TM_ACTIVE(msr));
+
+	WARN_ON(tm_suspend_disabled);
+
+	/* Restore checkpointed FP, VEC, and VSX bits from ckpt_regs as
+	 * it contains the correct FP, VEC, VSX state after we treclaimed
+	 * the transaction and giveup_all() was called on reclaiming.
+	 */
+	msr |= tsk->thread.ckpt_regs.msr & (MSR_FP | MSR_VEC | MSR_VSX);
+
+#ifdef CONFIG_ALTIVEC
+	err |= __put_user(v_regs, &sc->v_regs);
+	err |= __put_user(tm_v_regs, &tm_sc->v_regs);
+
+	/* save altivec registers */
+	if (tsk->thread.used_vr) {
+		/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
+		err |= __copy_to_user(v_regs, &tsk->thread.ckvr_state,
+				      33 * sizeof(vector128));
+		/* If VEC was enabled there are transactional VRs valid too,
+		 * else they're a copy of the checkpointed VRs.
+		 */
+		if (msr & MSR_VEC)
+			err |= __copy_to_user(tm_v_regs,
+					      &tsk->thread.vr_state,
+					      33 * sizeof(vector128));
+		else
+			err |= __copy_to_user(tm_v_regs,
+					      &tsk->thread.ckvr_state,
+					      33 * sizeof(vector128));
+
+		/* set MSR_VEC in the MSR value in the frame to indicate
+		 * that sc->v_reg contains valid data.
+		 */
+		msr |= MSR_VEC;
+	}
+	/* We always copy to/from vrsave, it's 0 if we don't have or don't
+	 * use altivec.
+	 */
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		tsk->thread.ckvrsave = mfspr(SPRN_VRSAVE);
+	err |= __put_user(tsk->thread.ckvrsave, (u32 __user *)&v_regs[33]);
+	if (msr & MSR_VEC)
+		err |= __put_user(tsk->thread.vrsave,
+				  (u32 __user *)&tm_v_regs[33]);
+	else
+		err |= __put_user(tsk->thread.ckvrsave,
+				  (u32 __user *)&tm_v_regs[33]);
+
+#else /* CONFIG_ALTIVEC */
+	err |= __put_user(0, &sc->v_regs);
+	err |= __put_user(0, &tm_sc->v_regs);
+#endif /* CONFIG_ALTIVEC */
+
+	/* copy fpr regs and fpscr */
+	err |= copy_ckfpr_to_user(&sc->fp_regs, tsk);
+	if (msr & MSR_FP)
+		err |= copy_fpr_to_user(&tm_sc->fp_regs, tsk);
+	else
+		err |= copy_ckfpr_to_user(&tm_sc->fp_regs, tsk);
+
+#ifdef CONFIG_VSX
+	/*
+	 * Copy VSX low doubleword to local buffer for formatting,
+	 * then out to userspace.  Update v_regs to point after the
+	 * VMX data.
+	 */
+	if (tsk->thread.used_vsr) {
+		v_regs += ELF_NVRREG;
+		tm_v_regs += ELF_NVRREG;
+
+		err |= copy_ckvsx_to_user(v_regs, tsk);
+
+		if (msr & MSR_VSX)
+			err |= copy_vsx_to_user(tm_v_regs, tsk);
+		else
+			err |= copy_ckvsx_to_user(tm_v_regs, tsk);
+
+		/* set MSR_VSX in the MSR value in the frame to
+		 * indicate that sc->vs_reg) contains valid data.
+		 */
+		msr |= MSR_VSX;
+	}
+#endif /* CONFIG_VSX */
+
+	err |= __put_user(&sc->gp_regs, &sc->regs);
+	err |= __put_user(&tm_sc->gp_regs, &tm_sc->regs);
+	err |= __copy_to_user(&tm_sc->gp_regs, regs, GP_REGS_SIZE);
+	err |= __copy_to_user(&sc->gp_regs,
+			      &tsk->thread.ckpt_regs, GP_REGS_SIZE);
+	err |= __put_user(msr, &tm_sc->gp_regs[PT_MSR]);
+	err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
+	err |= __put_user(signr, &sc->signal);
+	err |= __put_user(handler, &sc->handler);
+	if (set != NULL)
+		err |=  __put_user(set->sig[0], &sc->oldmask);
+
+	return err;
+}
+#endif
+
+/*
+ * Restore the sigcontext from the signal frame.
+ */
+#define unsafe_restore_sigcontext(tsk, set, sig, sc, label) do {	\
+	if (__unsafe_restore_sigcontext(tsk, set, sig, sc))		\
+		goto label;						\
+} while (0)
+static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_t *set,
+						int sig, struct sigcontext __user *sc)
+{
+#ifdef CONFIG_ALTIVEC
+	elf_vrreg_t __user *v_regs;
+#endif
+	unsigned long save_r13 = 0;
+	unsigned long msr;
+	struct pt_regs *regs = tsk->thread.regs;
+#ifdef CONFIG_VSX
+	int i;
+#endif
+
+	BUG_ON(tsk != current);
+
+	/* If this is not a signal return, we preserve the TLS in r13 */
+	if (!sig)
+		save_r13 = regs->gpr[13];
+
+	/* copy the GPRs */
+	unsafe_copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr), efault_out);
+	unsafe_get_user(regs->nip, &sc->gp_regs[PT_NIP], efault_out);
+	/* get MSR separately, transfer the LE bit if doing signal return */
+	unsafe_get_user(msr, &sc->gp_regs[PT_MSR], efault_out);
+	if (sig)
+		regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
+	unsafe_get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3], efault_out);
+	unsafe_get_user(regs->ctr, &sc->gp_regs[PT_CTR], efault_out);
+	unsafe_get_user(regs->link, &sc->gp_regs[PT_LNK], efault_out);
+	unsafe_get_user(regs->xer, &sc->gp_regs[PT_XER], efault_out);
+	unsafe_get_user(regs->ccr, &sc->gp_regs[PT_CCR], efault_out);
+	/* Don't allow userspace to set SOFTE */
+	set_trap_norestart(regs);
+	unsafe_get_user(regs->dar, &sc->gp_regs[PT_DAR], efault_out);
+	unsafe_get_user(regs->dsisr, &sc->gp_regs[PT_DSISR], efault_out);
+	unsafe_get_user(regs->result, &sc->gp_regs[PT_RESULT], efault_out);
+
+	if (!sig)
+		regs->gpr[13] = save_r13;
+	if (set != NULL)
+		unsafe_get_user(set->sig[0], &sc->oldmask, efault_out);
+
+	/*
+	 * Force reload of FP/VEC/VSX so userspace sees any changes.
+	 * Clear these bits from the user process' MSR before copying into the
+	 * thread struct. If we are rescheduled or preempted and another task
+	 * uses FP/VEC/VSX, and this process has the MSR bits set, then the
+	 * context switch code will save the current CPU state into the
+	 * thread_struct - possibly overwriting the data we are updating here.
+	 */
+	regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX));
+
+#ifdef CONFIG_ALTIVEC
+	unsafe_get_user(v_regs, &sc->v_regs, efault_out);
+	if (v_regs && !access_ok(v_regs, 34 * sizeof(vector128)))
+		return -EFAULT;
+	/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
+	if (v_regs != NULL && (msr & MSR_VEC) != 0) {
+		unsafe_copy_from_user(&tsk->thread.vr_state, v_regs,
+				      33 * sizeof(vector128), efault_out);
+		tsk->thread.used_vr = true;
+	} else if (tsk->thread.used_vr) {
+		memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128));
+	}
+	/* Always get VRSAVE back */
+	if (v_regs != NULL)
+		unsafe_get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out);
+	else
+		tsk->thread.vrsave = 0;
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		mtspr(SPRN_VRSAVE, tsk->thread.vrsave);
+#endif /* CONFIG_ALTIVEC */
+	/* restore floating point */
+	unsafe_copy_fpr_from_user(tsk, &sc->fp_regs, efault_out);
+#ifdef CONFIG_VSX
+	/*
+	 * Get additional VSX data. Update v_regs to point after the
+	 * VMX data.  Copy VSX low doubleword from userspace to local
+	 * buffer for formatting, then into the taskstruct.
+	 */
+	v_regs += ELF_NVRREG;
+	if ((msr & MSR_VSX) != 0) {
+		unsafe_copy_vsx_from_user(tsk, v_regs, efault_out);
+		tsk->thread.used_vsr = true;
+	} else {
+		for (i = 0; i < 32 ; i++)
+			tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+	}
+#endif
+	return 0;
+
+efault_out:
+	return -EFAULT;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Restore the two sigcontexts from the frame of a transactional processes.
+ */
+
+static long restore_tm_sigcontexts(struct task_struct *tsk,
+				   struct sigcontext __user *sc,
+				   struct sigcontext __user *tm_sc)
+{
+#ifdef CONFIG_ALTIVEC
+	elf_vrreg_t __user *v_regs, *tm_v_regs;
+#endif
+	unsigned long err = 0;
+	unsigned long msr;
+	struct pt_regs *regs = tsk->thread.regs;
+#ifdef CONFIG_VSX
+	int i;
+#endif
+
+	BUG_ON(tsk != current);
+
+	if (tm_suspend_disabled)
+		return -EINVAL;
+
+	/* copy the GPRs */
+	err |= __copy_from_user(regs->gpr, tm_sc->gp_regs, sizeof(regs->gpr));
+	err |= __copy_from_user(&tsk->thread.ckpt_regs, sc->gp_regs,
+				sizeof(regs->gpr));
+
+	/*
+	 * TFHAR is restored from the checkpointed 'wound-back' ucontext's NIP.
+	 * TEXASR was set by the signal delivery reclaim, as was TFIAR.
+	 * Users doing anything abhorrent like thread-switching w/ signals for
+	 * TM-Suspended code will have to back TEXASR/TFIAR up themselves.
+	 * For the case of getting a signal and simply returning from it,
+	 * we don't need to re-copy them here.
+	 */
+	err |= __get_user(regs->nip, &tm_sc->gp_regs[PT_NIP]);
+	err |= __get_user(tsk->thread.tm_tfhar, &sc->gp_regs[PT_NIP]);
+
+	/* get MSR separately, transfer the LE bit if doing signal return */
+	err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
+	/* Don't allow reserved mode. */
+	if (MSR_TM_RESV(msr))
+		return -EINVAL;
+
+	/* pull in MSR LE from user context */
+	regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
+
+	/* The following non-GPR non-FPR non-VR state is also checkpointed: */
+	err |= __get_user(regs->ctr, &tm_sc->gp_regs[PT_CTR]);
+	err |= __get_user(regs->link, &tm_sc->gp_regs[PT_LNK]);
+	err |= __get_user(regs->xer, &tm_sc->gp_regs[PT_XER]);
+	err |= __get_user(regs->ccr, &tm_sc->gp_regs[PT_CCR]);
+	err |= __get_user(tsk->thread.ckpt_regs.ctr,
+			  &sc->gp_regs[PT_CTR]);
+	err |= __get_user(tsk->thread.ckpt_regs.link,
+			  &sc->gp_regs[PT_LNK]);
+	err |= __get_user(tsk->thread.ckpt_regs.xer,
+			  &sc->gp_regs[PT_XER]);
+	err |= __get_user(tsk->thread.ckpt_regs.ccr,
+			  &sc->gp_regs[PT_CCR]);
+	/* Don't allow userspace to set SOFTE */
+	set_trap_norestart(regs);
+	/* These regs are not checkpointed; they can go in 'regs'. */
+	err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]);
+	err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
+	err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]);
+
+	/*
+	 * Force reload of FP/VEC.
+	 * This has to be done before copying stuff into tsk->thread.fpr/vr
+	 * for the reasons explained in the previous comment.
+	 */
+	regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX));
+
+#ifdef CONFIG_ALTIVEC
+	err |= __get_user(v_regs, &sc->v_regs);
+	err |= __get_user(tm_v_regs, &tm_sc->v_regs);
+	if (err)
+		return err;
+	if (v_regs && !access_ok(v_regs, 34 * sizeof(vector128)))
+		return -EFAULT;
+	if (tm_v_regs && !access_ok(tm_v_regs, 34 * sizeof(vector128)))
+		return -EFAULT;
+	/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
+	if (v_regs != NULL && tm_v_regs != NULL && (msr & MSR_VEC) != 0) {
+		err |= __copy_from_user(&tsk->thread.ckvr_state, v_regs,
+					33 * sizeof(vector128));
+		err |= __copy_from_user(&tsk->thread.vr_state, tm_v_regs,
+					33 * sizeof(vector128));
+		current->thread.used_vr = true;
+	}
+	else if (tsk->thread.used_vr) {
+		memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128));
+		memset(&tsk->thread.ckvr_state, 0, 33 * sizeof(vector128));
+	}
+	/* Always get VRSAVE back */
+	if (v_regs != NULL && tm_v_regs != NULL) {
+		err |= __get_user(tsk->thread.ckvrsave,
+				  (u32 __user *)&v_regs[33]);
+		err |= __get_user(tsk->thread.vrsave,
+				  (u32 __user *)&tm_v_regs[33]);
+	}
+	else {
+		tsk->thread.vrsave = 0;
+		tsk->thread.ckvrsave = 0;
+	}
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		mtspr(SPRN_VRSAVE, tsk->thread.vrsave);
+#endif /* CONFIG_ALTIVEC */
+	/* restore floating point */
+	err |= copy_fpr_from_user(tsk, &tm_sc->fp_regs);
+	err |= copy_ckfpr_from_user(tsk, &sc->fp_regs);
+#ifdef CONFIG_VSX
+	/*
+	 * Get additional VSX data. Update v_regs to point after the
+	 * VMX data.  Copy VSX low doubleword from userspace to local
+	 * buffer for formatting, then into the taskstruct.
+	 */
+	if (v_regs && ((msr & MSR_VSX) != 0)) {
+		v_regs += ELF_NVRREG;
+		tm_v_regs += ELF_NVRREG;
+		err |= copy_vsx_from_user(tsk, tm_v_regs);
+		err |= copy_ckvsx_from_user(tsk, v_regs);
+		tsk->thread.used_vsr = true;
+	} else {
+		for (i = 0; i < 32 ; i++) {
+			tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+			tsk->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+		}
+	}
+#endif
+	tm_enable();
+	/* Make sure the transaction is marked as failed */
+	tsk->thread.tm_texasr |= TEXASR_FS;
+
+	/*
+	 * Disabling preemption, since it is unsafe to be preempted
+	 * with MSR[TS] set without recheckpointing.
+	 */
+	preempt_disable();
+
+	/* pull in MSR TS bits from user context */
+	regs_set_return_msr(regs, regs->msr | (msr & MSR_TS_MASK));
+
+	/*
+	 * Ensure that TM is enabled in regs->msr before we leave the signal
+	 * handler. It could be the case that (a) user disabled the TM bit
+	 * through the manipulation of the MSR bits in uc_mcontext or (b) the
+	 * TM bit was disabled because a sufficient number of context switches
+	 * happened whilst in the signal handler and load_tm overflowed,
+	 * disabling the TM bit. In either case we can end up with an illegal
+	 * TM state leading to a TM Bad Thing when we return to userspace.
+	 *
+	 * CAUTION:
+	 * After regs->MSR[TS] being updated, make sure that get_user(),
+	 * put_user() or similar functions are *not* called. These
+	 * functions can generate page faults which will cause the process
+	 * to be de-scheduled with MSR[TS] set but without calling
+	 * tm_recheckpoint(). This can cause a bug.
+	 */
+	regs_set_return_msr(regs, regs->msr | MSR_TM);
+
+	/* This loads the checkpointed FP/VEC state, if used */
+	tm_recheckpoint(&tsk->thread);
+
+	msr_check_and_set(msr & (MSR_FP | MSR_VEC));
+	if (msr & MSR_FP) {
+		load_fp_state(&tsk->thread.fp_state);
+		regs_set_return_msr(regs, regs->msr | (MSR_FP | tsk->thread.fpexc_mode));
+	}
+	if (msr & MSR_VEC) {
+		load_vr_state(&tsk->thread.vr_state);
+		regs_set_return_msr(regs, regs->msr | MSR_VEC);
+	}
+
+	preempt_enable();
+
+	return err;
+}
+#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */
+static long restore_tm_sigcontexts(struct task_struct *tsk, struct sigcontext __user *sc,
+				   struct sigcontext __user *tm_sc)
+{
+	return -EINVAL;
+}
+#endif
+
+/*
+ * Setup the trampoline code on the stack
+ */
+static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp)
+{
+	int i;
+	long err = 0;
+
+	/* Call the handler and pop the dummy stackframe*/
+	err |= __put_user(PPC_RAW_BCTRL(), &tramp[0]);
+	err |= __put_user(PPC_RAW_ADDI(_R1, _R1, __SIGNAL_FRAMESIZE), &tramp[1]);
+
+	err |= __put_user(PPC_RAW_LI(_R0, syscall), &tramp[2]);
+	err |= __put_user(PPC_RAW_SC(), &tramp[3]);
+
+	/* Minimal traceback info */
+	for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++)
+		err |= __put_user(0, &tramp[i]);
+
+	if (!err)
+		flush_icache_range((unsigned long) &tramp[0],
+			   (unsigned long) &tramp[TRAMP_SIZE]);
+
+	return err;
+}
+
+/*
+ * Userspace code may pass a ucontext which doesn't include VSX added
+ * at the end.  We need to check for this case.
+ */
+#define UCONTEXTSIZEWITHOUTVSX \
+		(sizeof(struct ucontext) - 32*sizeof(long))
+
+/*
+ * Handle {get,set,swap}_context operations
+ */
+SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
+		struct ucontext __user *, new_ctx, long, ctx_size)
+{
+	sigset_t set;
+	unsigned long new_msr = 0;
+	int ctx_has_vsx_region = 0;
+
+	if (new_ctx &&
+	    get_user(new_msr, &new_ctx->uc_mcontext.gp_regs[PT_MSR]))
+		return -EFAULT;
+	/*
+	 * Check that the context is not smaller than the original
+	 * size (with VMX but without VSX)
+	 */
+	if (ctx_size < UCONTEXTSIZEWITHOUTVSX)
+		return -EINVAL;
+	/*
+	 * If the new context state sets the MSR VSX bits but
+	 * it doesn't provide VSX state.
+	 */
+	if ((ctx_size < sizeof(struct ucontext)) &&
+	    (new_msr & MSR_VSX))
+		return -EINVAL;
+	/* Does the context have enough room to store VSX data? */
+	if (ctx_size >= sizeof(struct ucontext))
+		ctx_has_vsx_region = 1;
+
+	if (old_ctx != NULL) {
+		prepare_setup_sigcontext(current);
+		if (!user_write_access_begin(old_ctx, ctx_size))
+			return -EFAULT;
+
+		unsafe_setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL,
+					0, ctx_has_vsx_region, efault_out);
+		unsafe_copy_to_user(&old_ctx->uc_sigmask, &current->blocked,
+				    sizeof(sigset_t), efault_out);
+
+		user_write_access_end();
+	}
+	if (new_ctx == NULL)
+		return 0;
+	if (!access_ok(new_ctx, ctx_size) ||
+	    fault_in_readable((char __user *)new_ctx, ctx_size))
+		return -EFAULT;
+
+	/*
+	 * If we get a fault copying the context into the kernel's
+	 * image of the user's registers, we can't just return -EFAULT
+	 * because the user's registers will be corrupted.  For instance
+	 * the NIP value may have been updated but not some of the
+	 * other registers.  Given that we have done the access_ok
+	 * and successfully read the first and last bytes of the region
+	 * above, this should only happen in an out-of-memory situation
+	 * or if another thread unmaps the region containing the context.
+	 * We kill the task with a SIGSEGV in this situation.
+	 */
+
+	if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) {
+		force_exit_sig(SIGSEGV);
+		return -EFAULT;
+	}
+	set_current_blocked(&set);
+
+	if (!user_read_access_begin(new_ctx, ctx_size))
+		return -EFAULT;
+	if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
+		user_read_access_end();
+		force_exit_sig(SIGSEGV);
+		return -EFAULT;
+	}
+	user_read_access_end();
+
+	/* This returns like rt_sigreturn */
+	set_thread_flag(TIF_RESTOREALL);
+
+	return 0;
+
+efault_out:
+	user_write_access_end();
+	return -EFAULT;
+}
+
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+
+SYSCALL_DEFINE0(rt_sigreturn)
+{
+	struct pt_regs *regs = current_pt_regs();
+	struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1];
+	sigset_t set;
+	unsigned long msr;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	if (!access_ok(uc, sizeof(*uc)))
+		goto badframe;
+
+	if (__get_user_sigset(&set, &uc->uc_sigmask))
+		goto badframe;
+	set_current_blocked(&set);
+
+	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM)) {
+		/*
+		 * If there is a transactional state then throw it away.
+		 * The purpose of a sigreturn is to destroy all traces of the
+		 * signal frame, this includes any transactional state created
+		 * within in. We only check for suspended as we can never be
+		 * active in the kernel, we are active, there is nothing better to
+		 * do than go ahead and Bad Thing later.
+		 * The cause is not important as there will never be a
+		 * recheckpoint so it's not user visible.
+		 */
+		if (MSR_TM_SUSPENDED(mfmsr()))
+			tm_reclaim_current(0);
+
+		/*
+		 * Disable MSR[TS] bit also, so, if there is an exception in the
+		 * code below (as a page fault in copy_ckvsx_to_user()), it does
+		 * not recheckpoint this task if there was a context switch inside
+		 * the exception.
+		 *
+		 * A major page fault can indirectly call schedule(). A reschedule
+		 * process in the middle of an exception can have a side effect
+		 * (Changing the CPU MSR[TS] state), since schedule() is called
+		 * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended
+		 * (switch_to() calls tm_recheckpoint() for the 'new' process). In
+		 * this case, the process continues to be the same in the CPU, but
+		 * the CPU state just changed.
+		 *
+		 * This can cause a TM Bad Thing, since the MSR in the stack will
+		 * have the MSR[TS]=0, and this is what will be used to RFID.
+		 *
+		 * Clearing MSR[TS] state here will avoid a recheckpoint if there
+		 * is any process reschedule in kernel space. The MSR[TS] state
+		 * does not need to be saved also, since it will be replaced with
+		 * the MSR[TS] that came from user context later, at
+		 * restore_tm_sigcontexts.
+		 */
+		regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK);
+
+		if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
+			goto badframe;
+	}
+
+	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && MSR_TM_ACTIVE(msr)) {
+		/* We recheckpoint on return. */
+		struct ucontext __user *uc_transact;
+
+		/* Trying to start TM on non TM system */
+		if (!cpu_has_feature(CPU_FTR_TM))
+			goto badframe;
+
+		if (__get_user(uc_transact, &uc->uc_link))
+			goto badframe;
+		if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
+					   &uc_transact->uc_mcontext))
+			goto badframe;
+	} else {
+		/*
+		 * Fall through, for non-TM restore
+		 *
+		 * Unset MSR[TS] on the thread regs since MSR from user
+		 * context does not have MSR active, and recheckpoint was
+		 * not called since restore_tm_sigcontexts() was not called
+		 * also.
+		 *
+		 * If not unsetting it, the code can RFID to userspace with
+		 * MSR[TS] set, but without CPU in the proper state,
+		 * causing a TM bad thing.
+		 */
+		regs_set_return_msr(current->thread.regs,
+				current->thread.regs->msr & ~MSR_TS_MASK);
+		if (!user_read_access_begin(&uc->uc_mcontext, sizeof(uc->uc_mcontext)))
+			goto badframe;
+
+		unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext,
+					  badframe_block);
+
+		user_read_access_end();
+	}
+
+	if (restore_altstack(&uc->uc_stack))
+		goto badframe;
+
+	set_thread_flag(TIF_RESTOREALL);
+
+	return 0;
+
+badframe_block:
+	user_read_access_end();
+badframe:
+	signal_fault(current, regs, "rt_sigreturn", uc);
+
+	force_sig(SIGSEGV);
+	return 0;
+}
+
+int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
+		struct task_struct *tsk)
+{
+	struct rt_sigframe __user *frame;
+	unsigned long newsp = 0;
+	long err = 0;
+	struct pt_regs *regs = tsk->thread.regs;
+	/* Save the thread's msr before get_tm_stackpointer() changes it */
+	unsigned long msr = regs->msr;
+
+	frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
+
+	/*
+	 * This only applies when calling unsafe_setup_sigcontext() and must be
+	 * called before opening the uaccess window.
+	 */
+	if (!MSR_TM_ACTIVE(msr))
+		prepare_setup_sigcontext(tsk);
+
+	if (!user_write_access_begin(frame, sizeof(*frame)))
+		goto badframe;
+
+	unsafe_put_user(&frame->info, &frame->pinfo, badframe_block);
+	unsafe_put_user(&frame->uc, &frame->puc, badframe_block);
+
+	/* Create the ucontext.  */
+	unsafe_put_user(0, &frame->uc.uc_flags, badframe_block);
+	unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe_block);
+
+	if (MSR_TM_ACTIVE(msr)) {
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+		/* The ucontext_t passed to userland points to the second
+		 * ucontext_t (for transactional state) with its uc_link ptr.
+		 */
+		unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe_block);
+
+		user_write_access_end();
+
+		err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
+					    &frame->uc_transact.uc_mcontext,
+					    tsk, ksig->sig, NULL,
+					    (unsigned long)ksig->ka.sa.sa_handler,
+					    msr);
+
+		if (!user_write_access_begin(&frame->uc.uc_sigmask,
+					     sizeof(frame->uc.uc_sigmask)))
+			goto badframe;
+
+#endif
+	} else {
+		unsafe_put_user(0, &frame->uc.uc_link, badframe_block);
+		unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
+					NULL, (unsigned long)ksig->ka.sa.sa_handler,
+					1, badframe_block);
+	}
+
+	unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block);
+	user_write_access_end();
+
+	/* Save the siginfo outside of the unsafe block. */
+	if (copy_siginfo_to_user(&frame->info, &ksig->info))
+		goto badframe;
+
+	/* Make sure signal handler doesn't get spurious FP exceptions */
+	tsk->thread.fp_state.fpscr = 0;
+
+	/* Set up to return from userspace. */
+	if (tsk->mm->context.vdso) {
+		regs_set_return_ip(regs, VDSO64_SYMBOL(tsk->mm->context.vdso, sigtramp_rt64));
+	} else {
+		err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]);
+		if (err)
+			goto badframe;
+		regs_set_return_ip(regs, (unsigned long) &frame->tramp[0]);
+	}
+
+	/* Allocate a dummy caller frame for the signal handler. */
+	newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
+	err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
+
+	/* Set up "regs" so we "return" to the signal handler. */
+	if (is_elf2_task()) {
+		regs->ctr = (unsigned long) ksig->ka.sa.sa_handler;
+		regs->gpr[12] = regs->ctr;
+	} else {
+		/* Handler is *really* a pointer to the function descriptor for
+		 * the signal routine.  The first entry in the function
+		 * descriptor is the entry address of signal and the second
+		 * entry is the TOC value we need to use.
+		 */
+		struct func_desc __user *ptr =
+			(struct func_desc __user *)ksig->ka.sa.sa_handler;
+
+		err |= get_user(regs->ctr, &ptr->addr);
+		err |= get_user(regs->gpr[2], &ptr->toc);
+	}
+
+	/* enter the signal handler in native-endian mode */
+	regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE));
+	regs->gpr[1] = newsp;
+	regs->gpr[3] = ksig->sig;
+	regs->result = 0;
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
+		regs->gpr[4] = (unsigned long)&frame->info;
+		regs->gpr[5] = (unsigned long)&frame->uc;
+		regs->gpr[6] = (unsigned long) frame;
+	} else {
+		regs->gpr[4] = (unsigned long)&frame->uc.uc_mcontext;
+	}
+	if (err)
+		goto badframe;
+
+	return 0;
+
+badframe_block:
+	user_write_access_end();
+badframe:
+	signal_fault(current, regs, "handle_rt_signal64", frame);
+
+	return 1;
+}
diff --git a/arch/powerpc/kernel/smp-tbsync.c b/arch/powerpc/kernel/smp-tbsync.c
new file mode 100644
index 0000000000..21c39355b2
--- /dev/null
+++ b/arch/powerpc/kernel/smp-tbsync.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Smp timebase synchronization for ppc.
+ *
+ * Copyright (C) 2003 Samuel Rydh (samuel@ibrium.se)
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <asm/smp.h>
+#include <asm/time.h>
+
+#define NUM_ITER		300
+
+enum {
+	kExit=0, kSetAndTest, kTest
+};
+
+static struct {
+	volatile u64		tb;
+	volatile u64		mark;
+	volatile int		cmd;
+	volatile int		handshake;
+	int			filler[2];
+
+	volatile int		ack;
+	int			filler2[7];
+
+	volatile int		race_result;
+} *tbsync;
+
+static volatile int		running;
+
+static void enter_contest(u64 mark, long add)
+{
+	while (get_tb() < mark)
+		tbsync->race_result = add;
+}
+
+void smp_generic_take_timebase(void)
+{
+	int cmd;
+	u64 tb;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	while (!running)
+		barrier();
+	rmb();
+
+	for (;;) {
+		tbsync->ack = 1;
+		while (!tbsync->handshake)
+			barrier();
+		rmb();
+
+		cmd = tbsync->cmd;
+		tb = tbsync->tb;
+		mb();
+		tbsync->ack = 0;
+		if (cmd == kExit)
+			break;
+
+		while (tbsync->handshake)
+			barrier();
+		if (cmd == kSetAndTest)
+			set_tb(tb >> 32, tb & 0xfffffffful);
+		enter_contest(tbsync->mark, -1);
+	}
+	local_irq_restore(flags);
+}
+
+static int start_contest(int cmd, long offset, int num)
+{
+	int i, score=0;
+	u64 tb;
+	u64 mark;
+
+	tbsync->cmd = cmd;
+
+	local_irq_disable();
+	for (i = -3; i < num; ) {
+		tb = get_tb() + 400;
+		tbsync->tb = tb + offset;
+		tbsync->mark = mark = tb + 400;
+
+		wmb();
+
+		tbsync->handshake = 1;
+		while (tbsync->ack)
+			barrier();
+
+		while (get_tb() <= tb)
+			barrier();
+		tbsync->handshake = 0;
+		enter_contest(mark, 1);
+
+		while (!tbsync->ack)
+			barrier();
+
+		if (i++ > 0)
+			score += tbsync->race_result;
+	}
+	local_irq_enable();
+	return score;
+}
+
+void smp_generic_give_timebase(void)
+{
+	int i, score, score2, old, min=0, max=5000, offset=1000;
+
+	pr_debug("Software timebase sync\n");
+
+	/* if this fails then this kernel won't work anyway... */
+	tbsync = kzalloc( sizeof(*tbsync), GFP_KERNEL );
+	mb();
+	running = 1;
+
+	while (!tbsync->ack)
+		barrier();
+
+	pr_debug("Got ack\n");
+
+	/* binary search */
+	for (old = -1; old != offset ; offset = (min+max) / 2) {
+		score = start_contest(kSetAndTest, offset, NUM_ITER);
+
+		pr_debug("score %d, offset %d\n", score, offset );
+
+		if( score > 0 )
+			max = offset;
+		else
+			min = offset;
+		old = offset;
+	}
+	score = start_contest(kSetAndTest, min, NUM_ITER);
+	score2 = start_contest(kSetAndTest, max, NUM_ITER);
+
+	pr_debug("Min %d (score %d), Max %d (score %d)\n",
+		 min, score, max, score2);
+	score = abs(score);
+	score2 = abs(score2);
+	offset = (score < score2) ? min : max;
+
+	/* guard against inaccurate mttb */
+	for (i = 0; i < 10; i++) {
+		start_contest(kSetAndTest, offset, NUM_ITER/10);
+
+		if ((score2 = start_contest(kTest, offset, NUM_ITER)) < 0)
+			score2 = -score2;
+		if (score2 <= score || score2 < 20)
+			break;
+	}
+	pr_debug("Final offset: %d (%d/%d)\n", offset, score2, NUM_ITER );
+
+	/* exiting */
+	tbsync->cmd = kExit;
+	wmb();
+	tbsync->handshake = 1;
+	while (tbsync->ack)
+		barrier();
+	tbsync->handshake = 0;
+	kfree(tbsync);
+	tbsync = NULL;
+	running = 0;
+}
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
new file mode 100644
index 0000000000..5826f5108a
--- /dev/null
+++ b/arch/powerpc/kernel/smp.c
@@ -0,0 +1,1791 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SMP support for ppc.
+ *
+ * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great
+ * deal of code from the sparc and intel versions.
+ *
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ *
+ * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
+ * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task_stack.h>
+#include <linux/sched/topology.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+#include <linux/topology.h>
+#include <linux/profile.h>
+#include <linux/processor.h>
+#include <linux/random.h>
+#include <linux/stackprotector.h>
+#include <linux/pgtable.h>
+#include <linux/clockchips.h>
+#include <linux/kexec.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
+#include <asm/kvm_ppc.h>
+#include <asm/dbell.h>
+#include <asm/page.h>
+#include <asm/smp.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/mmu_context.h>
+#include <asm/cputhreads.h>
+#include <asm/cputable.h>
+#include <asm/mpic.h>
+#include <asm/vdso_datapage.h>
+#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+#endif
+#include <asm/vdso.h>
+#include <asm/debug.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/ftrace.h>
+#include <asm/kup.h>
+#include <asm/fadump.h>
+
+#include <trace/events/ipi.h>
+
+#ifdef DEBUG
+#include <asm/udbg.h>
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+/* State of each CPU during hotplug phases */
+static DEFINE_PER_CPU(int, cpu_state) = { 0 };
+#endif
+
+struct task_struct *secondary_current;
+bool has_big_cores;
+bool coregroup_enabled;
+bool thread_group_shares_l2;
+bool thread_group_shares_l3;
+
+DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
+static DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map);
+
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
+EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+EXPORT_SYMBOL_GPL(has_big_cores);
+
+enum {
+#ifdef CONFIG_SCHED_SMT
+	smt_idx,
+#endif
+	cache_idx,
+	mc_idx,
+	die_idx,
+};
+
+#define MAX_THREAD_LIST_SIZE	8
+#define THREAD_GROUP_SHARE_L1   1
+#define THREAD_GROUP_SHARE_L2_L3 2
+struct thread_groups {
+	unsigned int property;
+	unsigned int nr_groups;
+	unsigned int threads_per_group;
+	unsigned int thread_list[MAX_THREAD_LIST_SIZE];
+};
+
+/* Maximum number of properties that groups of threads within a core can share */
+#define MAX_THREAD_GROUP_PROPERTIES 2
+
+struct thread_groups_list {
+	unsigned int nr_properties;
+	struct thread_groups property_tgs[MAX_THREAD_GROUP_PROPERTIES];
+};
+
+static struct thread_groups_list tgl[NR_CPUS] __initdata;
+/*
+ * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to
+ * the set its siblings that share the L1-cache.
+ */
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
+
+/*
+ * On some big-cores system, thread_group_l2_cache_map for each CPU
+ * corresponds to the set its siblings within the core that share the
+ * L2-cache.
+ */
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+
+/*
+ * On P10, thread_group_l3_cache_map for each CPU is equal to the
+ * thread_group_l2_cache_map
+ */
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
+
+/* SMP operations for this machine */
+struct smp_ops_t *smp_ops;
+
+/* Can't be static due to PowerMac hackery */
+volatile unsigned int cpu_callin_map[NR_CPUS];
+
+int smt_enabled_at_boot = 1;
+
+/*
+ * Returns 1 if the specified cpu should be brought up during boot.
+ * Used to inhibit booting threads if they've been disabled or
+ * limited on the command line
+ */
+int smp_generic_cpu_bootable(unsigned int nr)
+{
+	/* Special case - we inhibit secondary thread startup
+	 * during boot if the user requests it.
+	 */
+	if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) {
+		if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
+			return 0;
+		if (smt_enabled_at_boot
+		    && cpu_thread_in_core(nr) >= smt_enabled_at_boot)
+			return 0;
+	}
+
+	return 1;
+}
+
+
+#ifdef CONFIG_PPC64
+int smp_generic_kick_cpu(int nr)
+{
+	if (nr < 0 || nr >= nr_cpu_ids)
+		return -EINVAL;
+
+	/*
+	 * The processor is currently spinning, waiting for the
+	 * cpu_start field to become non-zero After we set cpu_start,
+	 * the processor will continue on to secondary_start
+	 */
+	if (!paca_ptrs[nr]->cpu_start) {
+		paca_ptrs[nr]->cpu_start = 1;
+		smp_mb();
+		return 0;
+	}
+
+#ifdef CONFIG_HOTPLUG_CPU
+	/*
+	 * Ok it's not there, so it might be soft-unplugged, let's
+	 * try to bring it back
+	 */
+	generic_set_cpu_up(nr);
+	smp_wmb();
+	smp_send_reschedule(nr);
+#endif /* CONFIG_HOTPLUG_CPU */
+
+	return 0;
+}
+#endif /* CONFIG_PPC64 */
+
+static irqreturn_t call_function_action(int irq, void *data)
+{
+	generic_smp_call_function_interrupt();
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t reschedule_action(int irq, void *data)
+{
+	scheduler_ipi();
+	return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+static irqreturn_t tick_broadcast_ipi_action(int irq, void *data)
+{
+	timer_broadcast_interrupt();
+	return IRQ_HANDLED;
+}
+#endif
+
+#ifdef CONFIG_NMI_IPI
+static irqreturn_t nmi_ipi_action(int irq, void *data)
+{
+	smp_handle_nmi_ipi(get_irq_regs());
+	return IRQ_HANDLED;
+}
+#endif
+
+static irq_handler_t smp_ipi_action[] = {
+	[PPC_MSG_CALL_FUNCTION] =  call_function_action,
+	[PPC_MSG_RESCHEDULE] = reschedule_action,
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+	[PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action,
+#endif
+#ifdef CONFIG_NMI_IPI
+	[PPC_MSG_NMI_IPI] = nmi_ipi_action,
+#endif
+};
+
+/*
+ * The NMI IPI is a fallback and not truly non-maskable. It is simpler
+ * than going through the call function infrastructure, and strongly
+ * serialized, so it is more appropriate for debugging.
+ */
+const char *smp_ipi_name[] = {
+	[PPC_MSG_CALL_FUNCTION] =  "ipi call function",
+	[PPC_MSG_RESCHEDULE] = "ipi reschedule",
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+	[PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast",
+#endif
+#ifdef CONFIG_NMI_IPI
+	[PPC_MSG_NMI_IPI] = "nmi ipi",
+#endif
+};
+
+/* optional function to request ipi, for controllers with >= 4 ipis */
+int smp_request_message_ipi(int virq, int msg)
+{
+	int err;
+
+	if (msg < 0 || msg > PPC_MSG_NMI_IPI)
+		return -EINVAL;
+#ifndef CONFIG_NMI_IPI
+	if (msg == PPC_MSG_NMI_IPI)
+		return 1;
+#endif
+
+	err = request_irq(virq, smp_ipi_action[msg],
+			  IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND,
+			  smp_ipi_name[msg], NULL);
+	WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n",
+		virq, smp_ipi_name[msg], err);
+
+	return err;
+}
+
+#ifdef CONFIG_PPC_SMP_MUXED_IPI
+struct cpu_messages {
+	long messages;			/* current messages */
+};
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
+
+void smp_muxed_ipi_set_message(int cpu, int msg)
+{
+	struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+	char *message = (char *)&info->messages;
+
+	/*
+	 * Order previous accesses before accesses in the IPI handler.
+	 */
+	smp_mb();
+	WRITE_ONCE(message[msg], 1);
+}
+
+void smp_muxed_ipi_message_pass(int cpu, int msg)
+{
+	smp_muxed_ipi_set_message(cpu, msg);
+
+	/*
+	 * cause_ipi functions are required to include a full barrier
+	 * before doing whatever causes the IPI.
+	 */
+	smp_ops->cause_ipi(cpu);
+}
+
+#ifdef __BIG_ENDIAN__
+#define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
+#else
+#define IPI_MESSAGE(A) (1uL << (8 * (A)))
+#endif
+
+irqreturn_t smp_ipi_demux(void)
+{
+	mb();	/* order any irq clear */
+
+	return smp_ipi_demux_relaxed();
+}
+
+/* sync-free variant. Callers should ensure synchronization */
+irqreturn_t smp_ipi_demux_relaxed(void)
+{
+	struct cpu_messages *info;
+	unsigned long all;
+
+	info = this_cpu_ptr(&ipi_message);
+	do {
+		all = xchg(&info->messages, 0);
+#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+		/*
+		 * Must check for PPC_MSG_RM_HOST_ACTION messages
+		 * before PPC_MSG_CALL_FUNCTION messages because when
+		 * a VM is destroyed, we call kick_all_cpus_sync()
+		 * to ensure that any pending PPC_MSG_RM_HOST_ACTION
+		 * messages have completed before we free any VCPUs.
+		 */
+		if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION))
+			kvmppc_xics_ipi_action();
+#endif
+		if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
+			generic_smp_call_function_interrupt();
+		if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
+			scheduler_ipi();
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+		if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST))
+			timer_broadcast_interrupt();
+#endif
+#ifdef CONFIG_NMI_IPI
+		if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI))
+			nmi_ipi_action(0, NULL);
+#endif
+	} while (READ_ONCE(info->messages));
+
+	return IRQ_HANDLED;
+}
+#endif /* CONFIG_PPC_SMP_MUXED_IPI */
+
+static inline void do_message_pass(int cpu, int msg)
+{
+	if (smp_ops->message_pass)
+		smp_ops->message_pass(cpu, msg);
+#ifdef CONFIG_PPC_SMP_MUXED_IPI
+	else
+		smp_muxed_ipi_message_pass(cpu, msg);
+#endif
+}
+
+void arch_smp_send_reschedule(int cpu)
+{
+	if (likely(smp_ops))
+		do_message_pass(cpu, PPC_MSG_RESCHEDULE);
+}
+EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+	do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, mask)
+		do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
+}
+
+#ifdef CONFIG_NMI_IPI
+
+/*
+ * "NMI IPI" system.
+ *
+ * NMI IPIs may not be recoverable, so should not be used as ongoing part of
+ * a running system. They can be used for crash, debug, halt/reboot, etc.
+ *
+ * The IPI call waits with interrupts disabled until all targets enter the
+ * NMI handler, then returns. Subsequent IPIs can be issued before targets
+ * have returned from their handlers, so there is no guarantee about
+ * concurrency or re-entrancy.
+ *
+ * A new NMI can be issued before all targets exit the handler.
+ *
+ * The IPI call may time out without all targets entering the NMI handler.
+ * In that case, there is some logic to recover (and ignore subsequent
+ * NMI interrupts that may eventually be raised), but the platform interrupt
+ * handler may not be able to distinguish this from other exception causes,
+ * which may cause a crash.
+ */
+
+static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0);
+static struct cpumask nmi_ipi_pending_mask;
+static bool nmi_ipi_busy = false;
+static void (*nmi_ipi_function)(struct pt_regs *) = NULL;
+
+noinstr static void nmi_ipi_lock_start(unsigned long *flags)
+{
+	raw_local_irq_save(*flags);
+	hard_irq_disable();
+	while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
+		raw_local_irq_restore(*flags);
+		spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0);
+		raw_local_irq_save(*flags);
+		hard_irq_disable();
+	}
+}
+
+noinstr static void nmi_ipi_lock(void)
+{
+	while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
+		spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0);
+}
+
+noinstr static void nmi_ipi_unlock(void)
+{
+	smp_mb();
+	WARN_ON(raw_atomic_read(&__nmi_ipi_lock) != 1);
+	raw_atomic_set(&__nmi_ipi_lock, 0);
+}
+
+noinstr static void nmi_ipi_unlock_end(unsigned long *flags)
+{
+	nmi_ipi_unlock();
+	raw_local_irq_restore(*flags);
+}
+
+/*
+ * Platform NMI handler calls this to ack
+ */
+noinstr int smp_handle_nmi_ipi(struct pt_regs *regs)
+{
+	void (*fn)(struct pt_regs *) = NULL;
+	unsigned long flags;
+	int me = raw_smp_processor_id();
+	int ret = 0;
+
+	/*
+	 * Unexpected NMIs are possible here because the interrupt may not
+	 * be able to distinguish NMI IPIs from other types of NMIs, or
+	 * because the caller may have timed out.
+	 */
+	nmi_ipi_lock_start(&flags);
+	if (cpumask_test_cpu(me, &nmi_ipi_pending_mask)) {
+		cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
+		fn = READ_ONCE(nmi_ipi_function);
+		WARN_ON_ONCE(!fn);
+		ret = 1;
+	}
+	nmi_ipi_unlock_end(&flags);
+
+	if (fn)
+		fn(regs);
+
+	return ret;
+}
+
+static void do_smp_send_nmi_ipi(int cpu, bool safe)
+{
+	if (!safe && smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu))
+		return;
+
+	if (cpu >= 0) {
+		do_message_pass(cpu, PPC_MSG_NMI_IPI);
+	} else {
+		int c;
+
+		for_each_online_cpu(c) {
+			if (c == raw_smp_processor_id())
+				continue;
+			do_message_pass(c, PPC_MSG_NMI_IPI);
+		}
+	}
+}
+
+/*
+ * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
+ * - fn is the target callback function.
+ * - delay_us > 0 is the delay before giving up waiting for targets to
+ *   begin executing the handler, == 0 specifies indefinite delay.
+ */
+static int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *),
+				u64 delay_us, bool safe)
+{
+	unsigned long flags;
+	int me = raw_smp_processor_id();
+	int ret = 1;
+
+	BUG_ON(cpu == me);
+	BUG_ON(cpu < 0 && cpu != NMI_IPI_ALL_OTHERS);
+
+	if (unlikely(!smp_ops))
+		return 0;
+
+	nmi_ipi_lock_start(&flags);
+	while (nmi_ipi_busy) {
+		nmi_ipi_unlock_end(&flags);
+		spin_until_cond(!nmi_ipi_busy);
+		nmi_ipi_lock_start(&flags);
+	}
+	nmi_ipi_busy = true;
+	nmi_ipi_function = fn;
+
+	WARN_ON_ONCE(!cpumask_empty(&nmi_ipi_pending_mask));
+
+	if (cpu < 0) {
+		/* ALL_OTHERS */
+		cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask);
+		cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
+	} else {
+		cpumask_set_cpu(cpu, &nmi_ipi_pending_mask);
+	}
+
+	nmi_ipi_unlock();
+
+	/* Interrupts remain hard disabled */
+
+	do_smp_send_nmi_ipi(cpu, safe);
+
+	nmi_ipi_lock();
+	/* nmi_ipi_busy is set here, so unlock/lock is okay */
+	while (!cpumask_empty(&nmi_ipi_pending_mask)) {
+		nmi_ipi_unlock();
+		udelay(1);
+		nmi_ipi_lock();
+		if (delay_us) {
+			delay_us--;
+			if (!delay_us)
+				break;
+		}
+	}
+
+	if (!cpumask_empty(&nmi_ipi_pending_mask)) {
+		/* Timeout waiting for CPUs to call smp_handle_nmi_ipi */
+		ret = 0;
+		cpumask_clear(&nmi_ipi_pending_mask);
+	}
+
+	nmi_ipi_function = NULL;
+	nmi_ipi_busy = false;
+
+	nmi_ipi_unlock_end(&flags);
+
+	return ret;
+}
+
+int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
+{
+	return __smp_send_nmi_ipi(cpu, fn, delay_us, false);
+}
+
+int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
+{
+	return __smp_send_nmi_ipi(cpu, fn, delay_us, true);
+}
+#endif /* CONFIG_NMI_IPI */
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+void tick_broadcast(const struct cpumask *mask)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, mask)
+		do_message_pass(cpu, PPC_MSG_TICK_BROADCAST);
+}
+#endif
+
+#ifdef CONFIG_DEBUGGER
+static void debugger_ipi_callback(struct pt_regs *regs)
+{
+	debugger_ipi(regs);
+}
+
+void smp_send_debugger_break(void)
+{
+	smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000);
+}
+#endif
+
+#ifdef CONFIG_KEXEC_CORE
+void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
+{
+	int cpu;
+
+	smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000);
+	if (kdump_in_progress() && crash_wake_offline) {
+		for_each_present_cpu(cpu) {
+			if (cpu_online(cpu))
+				continue;
+			/*
+			 * crash_ipi_callback will wait for
+			 * all cpus, including offline CPUs.
+			 * We don't care about nmi_ipi_function.
+			 * Offline cpus will jump straight into
+			 * crash_ipi_callback, we can skip the
+			 * entire NMI dance and waiting for
+			 * cpus to clear pending mask, etc.
+			 */
+			do_smp_send_nmi_ipi(cpu, false);
+		}
+	}
+}
+#endif
+
+void crash_smp_send_stop(void)
+{
+	static bool stopped = false;
+
+	/*
+	 * In case of fadump, register data for all CPUs is captured by f/w
+	 * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before
+	 * this rtas call to avoid tricky post processing of those CPUs'
+	 * backtraces.
+	 */
+	if (should_fadump_crash())
+		return;
+
+	if (stopped)
+		return;
+
+	stopped = true;
+
+#ifdef CONFIG_KEXEC_CORE
+	if (kexec_crash_image) {
+		crash_kexec_prepare();
+		return;
+	}
+#endif
+
+	smp_send_stop();
+}
+
+#ifdef CONFIG_NMI_IPI
+static void nmi_stop_this_cpu(struct pt_regs *regs)
+{
+	/*
+	 * IRQs are already hard disabled by the smp_handle_nmi_ipi.
+	 */
+	set_cpu_online(smp_processor_id(), false);
+
+	spin_begin();
+	while (1)
+		spin_cpu_relax();
+}
+
+void smp_send_stop(void)
+{
+	smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, nmi_stop_this_cpu, 1000000);
+}
+
+#else /* CONFIG_NMI_IPI */
+
+static void stop_this_cpu(void *dummy)
+{
+	hard_irq_disable();
+
+	/*
+	 * Offlining CPUs in stop_this_cpu can result in scheduler warnings,
+	 * (see commit de6e5d38417e), but printk_safe_flush_on_panic() wants
+	 * to know other CPUs are offline before it breaks locks to flush
+	 * printk buffers, in case we panic()ed while holding the lock.
+	 */
+	set_cpu_online(smp_processor_id(), false);
+
+	spin_begin();
+	while (1)
+		spin_cpu_relax();
+}
+
+void smp_send_stop(void)
+{
+	static bool stopped = false;
+
+	/*
+	 * Prevent waiting on csd lock from a previous smp_send_stop.
+	 * This is racy, but in general callers try to do the right
+	 * thing and only fire off one smp_send_stop (e.g., see
+	 * kernel/panic.c)
+	 */
+	if (stopped)
+		return;
+
+	stopped = true;
+
+	smp_call_function(stop_this_cpu, NULL, 0);
+}
+#endif /* CONFIG_NMI_IPI */
+
+static struct task_struct *current_set[NR_CPUS];
+
+static void smp_store_cpu_info(int id)
+{
+	per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR);
+#ifdef CONFIG_PPC_E500
+	per_cpu(next_tlbcam_idx, id)
+		= (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
+#endif
+}
+
+/*
+ * Relationships between CPUs are maintained in a set of per-cpu cpumasks so
+ * rather than just passing around the cpumask we pass around a function that
+ * returns the that cpumask for the given CPU.
+ */
+static void set_cpus_related(int i, int j, struct cpumask *(*get_cpumask)(int))
+{
+	cpumask_set_cpu(i, get_cpumask(j));
+	cpumask_set_cpu(j, get_cpumask(i));
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void set_cpus_unrelated(int i, int j,
+		struct cpumask *(*get_cpumask)(int))
+{
+	cpumask_clear_cpu(i, get_cpumask(j));
+	cpumask_clear_cpu(j, get_cpumask(i));
+}
+#endif
+
+/*
+ * Extends set_cpus_related. Instead of setting one CPU at a time in
+ * dstmask, set srcmask at oneshot. dstmask should be super set of srcmask.
+ */
+static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int),
+				struct cpumask *(*dstmask)(int))
+{
+	struct cpumask *mask;
+	int k;
+
+	mask = srcmask(j);
+	for_each_cpu(k, srcmask(i))
+		cpumask_or(dstmask(k), dstmask(k), mask);
+
+	if (i == j)
+		return;
+
+	mask = srcmask(i);
+	for_each_cpu(k, srcmask(j))
+		cpumask_or(dstmask(k), dstmask(k), mask);
+}
+
+/*
+ * parse_thread_groups: Parses the "ibm,thread-groups" device tree
+ *                      property for the CPU device node @dn and stores
+ *                      the parsed output in the thread_groups_list
+ *                      structure @tglp.
+ *
+ * @dn: The device node of the CPU device.
+ * @tglp: Pointer to a thread group list structure into which the parsed
+ *      output of "ibm,thread-groups" is stored.
+ *
+ * ibm,thread-groups[0..N-1] array defines which group of threads in
+ * the CPU-device node can be grouped together based on the property.
+ *
+ * This array can represent thread groupings for multiple properties.
+ *
+ * ibm,thread-groups[i + 0] tells us the property based on which the
+ * threads are being grouped together. If this value is 1, it implies
+ * that the threads in the same group share L1, translation cache. If
+ * the value is 2, it implies that the threads in the same group share
+ * the same L2 cache.
+ *
+ * ibm,thread-groups[i+1] tells us how many such thread groups exist for the
+ * property ibm,thread-groups[i]
+ *
+ * ibm,thread-groups[i+2] tells us the number of threads in each such
+ * group.
+ * Suppose k = (ibm,thread-groups[i+1] * ibm,thread-groups[i+2]), then,
+ *
+ * ibm,thread-groups[i+3..i+k+2] (is the list of threads identified by
+ * "ibm,ppc-interrupt-server#s" arranged as per their membership in
+ * the grouping.
+ *
+ * Example:
+ * If "ibm,thread-groups" = [1,2,4,8,10,12,14,9,11,13,15,2,2,4,8,10,12,14,9,11,13,15]
+ * This can be decomposed up into two consecutive arrays:
+ * a) [1,2,4,8,10,12,14,9,11,13,15]
+ * b) [2,2,4,8,10,12,14,9,11,13,15]
+ *
+ * where in,
+ *
+ * a) provides information of Property "1" being shared by "2" groups,
+ *  each with "4" threads each. The "ibm,ppc-interrupt-server#s" of
+ *  the first group is {8,10,12,14} and the
+ *  "ibm,ppc-interrupt-server#s" of the second group is
+ *  {9,11,13,15}. Property "1" is indicative of the thread in the
+ *  group sharing L1 cache, translation cache and Instruction Data
+ *  flow.
+ *
+ * b) provides information of Property "2" being shared by "2" groups,
+ *  each group with "4" threads. The "ibm,ppc-interrupt-server#s" of
+ *  the first group is {8,10,12,14} and the
+ *  "ibm,ppc-interrupt-server#s" of the second group is
+ *  {9,11,13,15}. Property "2" indicates that the threads in each
+ *  group share the L2-cache.
+ *
+ * Returns 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ */
+static int parse_thread_groups(struct device_node *dn,
+			       struct thread_groups_list *tglp)
+{
+	unsigned int property_idx = 0;
+	u32 *thread_group_array;
+	size_t total_threads;
+	int ret = 0, count;
+	u32 *thread_list;
+	int i = 0;
+
+	count = of_property_count_u32_elems(dn, "ibm,thread-groups");
+	thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL);
+	ret = of_property_read_u32_array(dn, "ibm,thread-groups",
+					 thread_group_array, count);
+	if (ret)
+		goto out_free;
+
+	while (i < count && property_idx < MAX_THREAD_GROUP_PROPERTIES) {
+		int j;
+		struct thread_groups *tg = &tglp->property_tgs[property_idx++];
+
+		tg->property = thread_group_array[i];
+		tg->nr_groups = thread_group_array[i + 1];
+		tg->threads_per_group = thread_group_array[i + 2];
+		total_threads = tg->nr_groups * tg->threads_per_group;
+
+		thread_list = &thread_group_array[i + 3];
+
+		for (j = 0; j < total_threads; j++)
+			tg->thread_list[j] = thread_list[j];
+		i = i + 3 + total_threads;
+	}
+
+	tglp->nr_properties = property_idx;
+
+out_free:
+	kfree(thread_group_array);
+	return ret;
+}
+
+/*
+ * get_cpu_thread_group_start : Searches the thread group in tg->thread_list
+ *                              that @cpu belongs to.
+ *
+ * @cpu : The logical CPU whose thread group is being searched.
+ * @tg : The thread-group structure of the CPU node which @cpu belongs
+ *       to.
+ *
+ * Returns the index to tg->thread_list that points to the start
+ * of the thread_group that @cpu belongs to.
+ *
+ * Returns -1 if cpu doesn't belong to any of the groups pointed to by
+ * tg->thread_list.
+ */
+static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
+{
+	int hw_cpu_id = get_hard_smp_processor_id(cpu);
+	int i, j;
+
+	for (i = 0; i < tg->nr_groups; i++) {
+		int group_start = i * tg->threads_per_group;
+
+		for (j = 0; j < tg->threads_per_group; j++) {
+			int idx = group_start + j;
+
+			if (tg->thread_list[idx] == hw_cpu_id)
+				return group_start;
+		}
+	}
+
+	return -1;
+}
+
+static struct thread_groups *__init get_thread_groups(int cpu,
+						      int group_property,
+						      int *err)
+{
+	struct device_node *dn = of_get_cpu_node(cpu, NULL);
+	struct thread_groups_list *cpu_tgl = &tgl[cpu];
+	struct thread_groups *tg = NULL;
+	int i;
+	*err = 0;
+
+	if (!dn) {
+		*err = -ENODATA;
+		return NULL;
+	}
+
+	if (!cpu_tgl->nr_properties) {
+		*err = parse_thread_groups(dn, cpu_tgl);
+		if (*err)
+			goto out;
+	}
+
+	for (i = 0; i < cpu_tgl->nr_properties; i++) {
+		if (cpu_tgl->property_tgs[i].property == group_property) {
+			tg = &cpu_tgl->property_tgs[i];
+			break;
+		}
+	}
+
+	if (!tg)
+		*err = -EINVAL;
+out:
+	of_node_put(dn);
+	return tg;
+}
+
+static int __init update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg,
+					       int cpu, int cpu_group_start)
+{
+	int first_thread = cpu_first_thread_sibling(cpu);
+	int i;
+
+	zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
+
+	for (i = first_thread; i < first_thread + threads_per_core; i++) {
+		int i_group_start = get_cpu_thread_group_start(i, tg);
+
+		if (unlikely(i_group_start == -1)) {
+			WARN_ON_ONCE(1);
+			return -ENODATA;
+		}
+
+		if (i_group_start == cpu_group_start)
+			cpumask_set_cpu(i, *mask);
+	}
+
+	return 0;
+}
+
+static int __init init_thread_group_cache_map(int cpu, int cache_property)
+
+{
+	int cpu_group_start = -1, err = 0;
+	struct thread_groups *tg = NULL;
+	cpumask_var_t *mask = NULL;
+
+	if (cache_property != THREAD_GROUP_SHARE_L1 &&
+	    cache_property != THREAD_GROUP_SHARE_L2_L3)
+		return -EINVAL;
+
+	tg = get_thread_groups(cpu, cache_property, &err);
+
+	if (!tg)
+		return err;
+
+	cpu_group_start = get_cpu_thread_group_start(cpu, tg);
+
+	if (unlikely(cpu_group_start == -1)) {
+		WARN_ON_ONCE(1);
+		return -ENODATA;
+	}
+
+	if (cache_property == THREAD_GROUP_SHARE_L1) {
+		mask = &per_cpu(thread_group_l1_cache_map, cpu);
+		update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
+	}
+	else if (cache_property == THREAD_GROUP_SHARE_L2_L3) {
+		mask = &per_cpu(thread_group_l2_cache_map, cpu);
+		update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
+		mask = &per_cpu(thread_group_l3_cache_map, cpu);
+		update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
+	}
+
+
+	return 0;
+}
+
+static bool shared_caches;
+
+#ifdef CONFIG_SCHED_SMT
+/* cpumask of CPUs with asymmetric SMT dependency */
+static int powerpc_smt_flags(void)
+{
+	int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+
+	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
+		flags |= SD_ASYM_PACKING;
+	}
+	return flags;
+}
+#endif
+
+/*
+ * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
+ * This topology makes it *much* cheaper to migrate tasks between adjacent cores
+ * since the migrated task remains cache hot. We want to take advantage of this
+ * at the scheduler level so an extra topology level is required.
+ */
+static int powerpc_shared_cache_flags(void)
+{
+	return SD_SHARE_PKG_RESOURCES;
+}
+
+/*
+ * We can't just pass cpu_l2_cache_mask() directly because
+ * returns a non-const pointer and the compiler barfs on that.
+ */
+static const struct cpumask *shared_cache_mask(int cpu)
+{
+	return per_cpu(cpu_l2_cache_map, cpu);
+}
+
+#ifdef CONFIG_SCHED_SMT
+static const struct cpumask *smallcore_smt_mask(int cpu)
+{
+	return cpu_smallcore_mask(cpu);
+}
+#endif
+
+static struct cpumask *cpu_coregroup_mask(int cpu)
+{
+	return per_cpu(cpu_coregroup_map, cpu);
+}
+
+static bool has_coregroup_support(void)
+{
+	return coregroup_enabled;
+}
+
+static const struct cpumask *cpu_mc_mask(int cpu)
+{
+	return cpu_coregroup_mask(cpu);
+}
+
+static struct sched_domain_topology_level powerpc_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+	{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+	{ shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
+	{ cpu_mc_mask, SD_INIT_NAME(MC) },
+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
+	{ NULL, },
+};
+
+static int __init init_big_cores(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L1);
+
+		if (err)
+			return err;
+
+		zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map, cpu),
+					GFP_KERNEL,
+					cpu_to_node(cpu));
+	}
+
+	has_big_cores = true;
+
+	for_each_possible_cpu(cpu) {
+		int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3);
+
+		if (err)
+			return err;
+	}
+
+	thread_group_shares_l2 = true;
+	thread_group_shares_l3 = true;
+	pr_debug("L2/L3 cache only shared by the threads in the small core\n");
+
+	return 0;
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+	unsigned int cpu, num_threads;
+
+	DBG("smp_prepare_cpus\n");
+
+	/* 
+	 * setup_cpu may need to be called on the boot cpu. We haven't
+	 * spun any cpus up but lets be paranoid.
+	 */
+	BUG_ON(boot_cpuid != smp_processor_id());
+
+	/* Fixup boot cpu */
+	smp_store_cpu_info(boot_cpuid);
+	cpu_callin_map[boot_cpuid] = 1;
+
+	for_each_possible_cpu(cpu) {
+		zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu),
+					GFP_KERNEL, cpu_to_node(cpu));
+		zalloc_cpumask_var_node(&per_cpu(cpu_l2_cache_map, cpu),
+					GFP_KERNEL, cpu_to_node(cpu));
+		zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
+					GFP_KERNEL, cpu_to_node(cpu));
+		if (has_coregroup_support())
+			zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map, cpu),
+						GFP_KERNEL, cpu_to_node(cpu));
+
+#ifdef CONFIG_NUMA
+		/*
+		 * numa_node_id() works after this.
+		 */
+		if (cpu_present(cpu)) {
+			set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
+			set_cpu_numa_mem(cpu,
+				local_memory_node(numa_cpu_lookup_table[cpu]));
+		}
+#endif
+	}
+
+	/* Init the cpumasks so the boot CPU is related to itself */
+	cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
+	cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
+	cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
+
+	if (has_coregroup_support())
+		cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid));
+
+	init_big_cores();
+	if (has_big_cores) {
+		cpumask_set_cpu(boot_cpuid,
+				cpu_smallcore_mask(boot_cpuid));
+	}
+
+	if (cpu_to_chip_id(boot_cpuid) != -1) {
+		int idx = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
+
+		/*
+		 * All threads of a core will all belong to the same core,
+		 * chip_id_lookup_table will have one entry per core.
+		 * Assumption: if boot_cpuid doesn't have a chip-id, then no
+		 * other CPUs, will also not have chip-id.
+		 */
+		chip_id_lookup_table = kcalloc(idx, sizeof(int), GFP_KERNEL);
+		if (chip_id_lookup_table)
+			memset(chip_id_lookup_table, -1, sizeof(int) * idx);
+	}
+
+	if (smp_ops && smp_ops->probe)
+		smp_ops->probe();
+
+	// Initalise the generic SMT topology support
+	num_threads = 1;
+	if (smt_enabled_at_boot)
+		num_threads = smt_enabled_at_boot;
+	cpu_smt_set_num_threads(num_threads, threads_per_core);
+}
+
+void smp_prepare_boot_cpu(void)
+{
+	BUG_ON(smp_processor_id() != boot_cpuid);
+#ifdef CONFIG_PPC64
+	paca_ptrs[boot_cpuid]->__current = current;
+#endif
+	set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
+	current_set[boot_cpuid] = current;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+int generic_cpu_disable(void)
+{
+	unsigned int cpu = smp_processor_id();
+
+	if (cpu == boot_cpuid)
+		return -EBUSY;
+
+	set_cpu_online(cpu, false);
+#ifdef CONFIG_PPC64
+	vdso_data->processorCount--;
+#endif
+	/* Update affinity of all IRQs previously aimed at this CPU */
+	irq_migrate_all_off_this_cpu();
+
+	/*
+	 * Depending on the details of the interrupt controller, it's possible
+	 * that one of the interrupts we just migrated away from this CPU is
+	 * actually already pending on this CPU. If we leave it in that state
+	 * the interrupt will never be EOI'ed, and will never fire again. So
+	 * temporarily enable interrupts here, to allow any pending interrupt to
+	 * be received (and EOI'ed), before we take this CPU offline.
+	 */
+	local_irq_enable();
+	mdelay(1);
+	local_irq_disable();
+
+	return 0;
+}
+
+void generic_cpu_die(unsigned int cpu)
+{
+	int i;
+
+	for (i = 0; i < 100; i++) {
+		smp_rmb();
+		if (is_cpu_dead(cpu))
+			return;
+		msleep(100);
+	}
+	printk(KERN_ERR "CPU%d didn't die...\n", cpu);
+}
+
+void generic_set_cpu_dead(unsigned int cpu)
+{
+	per_cpu(cpu_state, cpu) = CPU_DEAD;
+}
+
+/*
+ * The cpu_state should be set to CPU_UP_PREPARE in kick_cpu(), otherwise
+ * the cpu_state is always CPU_DEAD after calling generic_set_cpu_dead(),
+ * which makes the delay in generic_cpu_die() not happen.
+ */
+void generic_set_cpu_up(unsigned int cpu)
+{
+	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
+}
+
+int generic_check_cpu_restart(unsigned int cpu)
+{
+	return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE;
+}
+
+int is_cpu_dead(unsigned int cpu)
+{
+	return per_cpu(cpu_state, cpu) == CPU_DEAD;
+}
+
+static bool secondaries_inhibited(void)
+{
+	return kvm_hv_mode_active();
+}
+
+#else /* HOTPLUG_CPU */
+
+#define secondaries_inhibited()		0
+
+#endif
+
+static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
+{
+#ifdef CONFIG_PPC64
+	paca_ptrs[cpu]->__current = idle;
+	paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
+				 THREAD_SIZE - STACK_FRAME_MIN_SIZE;
+#endif
+	task_thread_info(idle)->cpu = cpu;
+	secondary_current = current_set[cpu] = idle;
+}
+
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+	const unsigned long boot_spin_ms = 5 * MSEC_PER_SEC;
+	const bool booting = system_state < SYSTEM_RUNNING;
+	const unsigned long hp_spin_ms = 1;
+	unsigned long deadline;
+	int rc;
+	const unsigned long spin_wait_ms = booting ? boot_spin_ms : hp_spin_ms;
+
+	/*
+	 * Don't allow secondary threads to come online if inhibited
+	 */
+	if (threads_per_core > 1 && secondaries_inhibited() &&
+	    cpu_thread_in_subcore(cpu))
+		return -EBUSY;
+
+	if (smp_ops == NULL ||
+	    (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
+		return -EINVAL;
+
+	cpu_idle_thread_init(cpu, tidle);
+
+	/*
+	 * The platform might need to allocate resources prior to bringing
+	 * up the CPU
+	 */
+	if (smp_ops->prepare_cpu) {
+		rc = smp_ops->prepare_cpu(cpu);
+		if (rc)
+			return rc;
+	}
+
+	/* Make sure callin-map entry is 0 (can be leftover a CPU
+	 * hotplug
+	 */
+	cpu_callin_map[cpu] = 0;
+
+	/* The information for processor bringup must
+	 * be written out to main store before we release
+	 * the processor.
+	 */
+	smp_mb();
+
+	/* wake up cpus */
+	DBG("smp: kicking cpu %d\n", cpu);
+	rc = smp_ops->kick_cpu(cpu);
+	if (rc) {
+		pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc);
+		return rc;
+	}
+
+	/*
+	 * At boot time, simply spin on the callin word until the
+	 * deadline passes.
+	 *
+	 * At run time, spin for an optimistic amount of time to avoid
+	 * sleeping in the common case.
+	 */
+	deadline = jiffies + msecs_to_jiffies(spin_wait_ms);
+	spin_until_cond(cpu_callin_map[cpu] || time_is_before_jiffies(deadline));
+
+	if (!cpu_callin_map[cpu] && system_state >= SYSTEM_RUNNING) {
+		const unsigned long sleep_interval_us = 10 * USEC_PER_MSEC;
+		const unsigned long sleep_wait_ms = 100 * MSEC_PER_SEC;
+
+		deadline = jiffies + msecs_to_jiffies(sleep_wait_ms);
+		while (!cpu_callin_map[cpu] && time_is_after_jiffies(deadline))
+			fsleep(sleep_interval_us);
+	}
+
+	if (!cpu_callin_map[cpu]) {
+		printk(KERN_ERR "Processor %u is stuck.\n", cpu);
+		return -ENOENT;
+	}
+
+	DBG("Processor %u found.\n", cpu);
+
+	if (smp_ops->give_timebase)
+		smp_ops->give_timebase();
+
+	/* Wait until cpu puts itself in the online & active maps */
+	spin_until_cond(cpu_online(cpu));
+
+	return 0;
+}
+
+/* Return the value of the reg property corresponding to the given
+ * logical cpu.
+ */
+int cpu_to_core_id(int cpu)
+{
+	struct device_node *np;
+	int id = -1;
+
+	np = of_get_cpu_node(cpu, NULL);
+	if (!np)
+		goto out;
+
+	id = of_get_cpu_hwid(np, 0);
+out:
+	of_node_put(np);
+	return id;
+}
+EXPORT_SYMBOL_GPL(cpu_to_core_id);
+
+/* Helper routines for cpu to core mapping */
+int cpu_core_index_of_thread(int cpu)
+{
+	return cpu >> threads_shift;
+}
+EXPORT_SYMBOL_GPL(cpu_core_index_of_thread);
+
+int cpu_first_thread_of_core(int core)
+{
+	return core << threads_shift;
+}
+EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
+
+/* Must be called when no change can occur to cpu_present_mask,
+ * i.e. during cpu online or offline.
+ */
+static struct device_node *cpu_to_l2cache(int cpu)
+{
+	struct device_node *np;
+	struct device_node *cache;
+
+	if (!cpu_present(cpu))
+		return NULL;
+
+	np = of_get_cpu_node(cpu, NULL);
+	if (np == NULL)
+		return NULL;
+
+	cache = of_find_next_cache_node(np);
+
+	of_node_put(np);
+
+	return cache;
+}
+
+static bool update_mask_by_l2(int cpu, cpumask_var_t *mask)
+{
+	struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
+	struct device_node *l2_cache, *np;
+	int i;
+
+	if (has_big_cores)
+		submask_fn = cpu_smallcore_mask;
+
+	/*
+	 * If the threads in a thread-group share L2 cache, then the
+	 * L2-mask can be obtained from thread_group_l2_cache_map.
+	 */
+	if (thread_group_shares_l2) {
+		cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu));
+
+		for_each_cpu(i, per_cpu(thread_group_l2_cache_map, cpu)) {
+			if (cpu_online(i))
+				set_cpus_related(i, cpu, cpu_l2_cache_mask);
+		}
+
+		/* Verify that L1-cache siblings are a subset of L2 cache-siblings */
+		if (!cpumask_equal(submask_fn(cpu), cpu_l2_cache_mask(cpu)) &&
+		    !cpumask_subset(submask_fn(cpu), cpu_l2_cache_mask(cpu))) {
+			pr_warn_once("CPU %d : Inconsistent L1 and L2 cache siblings\n",
+				     cpu);
+		}
+
+		return true;
+	}
+
+	l2_cache = cpu_to_l2cache(cpu);
+	if (!l2_cache || !*mask) {
+		/* Assume only core siblings share cache with this CPU */
+		for_each_cpu(i, cpu_sibling_mask(cpu))
+			set_cpus_related(cpu, i, cpu_l2_cache_mask);
+
+		return false;
+	}
+
+	cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
+
+	/* Update l2-cache mask with all the CPUs that are part of submask */
+	or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask);
+
+	/* Skip all CPUs already part of current CPU l2-cache mask */
+	cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(cpu));
+
+	for_each_cpu(i, *mask) {
+		/*
+		 * when updating the marks the current CPU has not been marked
+		 * online, but we need to update the cache masks
+		 */
+		np = cpu_to_l2cache(i);
+
+		/* Skip all CPUs already part of current CPU l2-cache */
+		if (np == l2_cache) {
+			or_cpumasks_related(cpu, i, submask_fn, cpu_l2_cache_mask);
+			cpumask_andnot(*mask, *mask, submask_fn(i));
+		} else {
+			cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(i));
+		}
+
+		of_node_put(np);
+	}
+	of_node_put(l2_cache);
+
+	return true;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void remove_cpu_from_masks(int cpu)
+{
+	struct cpumask *(*mask_fn)(int) = cpu_sibling_mask;
+	int i;
+
+	unmap_cpu_from_node(cpu);
+
+	if (shared_caches)
+		mask_fn = cpu_l2_cache_mask;
+
+	for_each_cpu(i, mask_fn(cpu)) {
+		set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
+		set_cpus_unrelated(cpu, i, cpu_sibling_mask);
+		if (has_big_cores)
+			set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
+	}
+
+	for_each_cpu(i, cpu_core_mask(cpu))
+		set_cpus_unrelated(cpu, i, cpu_core_mask);
+
+	if (has_coregroup_support()) {
+		for_each_cpu(i, cpu_coregroup_mask(cpu))
+			set_cpus_unrelated(cpu, i, cpu_coregroup_mask);
+	}
+}
+#endif
+
+static inline void add_cpu_to_smallcore_masks(int cpu)
+{
+	int i;
+
+	if (!has_big_cores)
+		return;
+
+	cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
+
+	for_each_cpu(i, per_cpu(thread_group_l1_cache_map, cpu)) {
+		if (cpu_online(i))
+			set_cpus_related(i, cpu, cpu_smallcore_mask);
+	}
+}
+
+static void update_coregroup_mask(int cpu, cpumask_var_t *mask)
+{
+	struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
+	int coregroup_id = cpu_to_coregroup_id(cpu);
+	int i;
+
+	if (shared_caches)
+		submask_fn = cpu_l2_cache_mask;
+
+	if (!*mask) {
+		/* Assume only siblings are part of this CPU's coregroup */
+		for_each_cpu(i, submask_fn(cpu))
+			set_cpus_related(cpu, i, cpu_coregroup_mask);
+
+		return;
+	}
+
+	cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
+
+	/* Update coregroup mask with all the CPUs that are part of submask */
+	or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask);
+
+	/* Skip all CPUs already part of coregroup mask */
+	cpumask_andnot(*mask, *mask, cpu_coregroup_mask(cpu));
+
+	for_each_cpu(i, *mask) {
+		/* Skip all CPUs not part of this coregroup */
+		if (coregroup_id == cpu_to_coregroup_id(i)) {
+			or_cpumasks_related(cpu, i, submask_fn, cpu_coregroup_mask);
+			cpumask_andnot(*mask, *mask, submask_fn(i));
+		} else {
+			cpumask_andnot(*mask, *mask, cpu_coregroup_mask(i));
+		}
+	}
+}
+
+static void add_cpu_to_masks(int cpu)
+{
+	struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
+	int first_thread = cpu_first_thread_sibling(cpu);
+	cpumask_var_t mask;
+	int chip_id = -1;
+	bool ret;
+	int i;
+
+	/*
+	 * This CPU will not be in the online mask yet so we need to manually
+	 * add it to it's own thread sibling mask.
+	 */
+	map_cpu_to_node(cpu, cpu_to_node(cpu));
+	cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
+	cpumask_set_cpu(cpu, cpu_core_mask(cpu));
+
+	for (i = first_thread; i < first_thread + threads_per_core; i++)
+		if (cpu_online(i))
+			set_cpus_related(i, cpu, cpu_sibling_mask);
+
+	add_cpu_to_smallcore_masks(cpu);
+
+	/* In CPU-hotplug path, hence use GFP_ATOMIC */
+	ret = alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu));
+	update_mask_by_l2(cpu, &mask);
+
+	if (has_coregroup_support())
+		update_coregroup_mask(cpu, &mask);
+
+	if (chip_id_lookup_table && ret)
+		chip_id = cpu_to_chip_id(cpu);
+
+	if (shared_caches)
+		submask_fn = cpu_l2_cache_mask;
+
+	/* Update core_mask with all the CPUs that are part of submask */
+	or_cpumasks_related(cpu, cpu, submask_fn, cpu_core_mask);
+
+	/* Skip all CPUs already part of current CPU core mask */
+	cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu));
+
+	/* If chip_id is -1; limit the cpu_core_mask to within DIE*/
+	if (chip_id == -1)
+		cpumask_and(mask, mask, cpu_cpu_mask(cpu));
+
+	for_each_cpu(i, mask) {
+		if (chip_id == cpu_to_chip_id(i)) {
+			or_cpumasks_related(cpu, i, submask_fn, cpu_core_mask);
+			cpumask_andnot(mask, mask, submask_fn(i));
+		} else {
+			cpumask_andnot(mask, mask, cpu_core_mask(i));
+		}
+	}
+
+	free_cpumask_var(mask);
+}
+
+/* Activate a secondary processor. */
+__no_stack_protector
+void start_secondary(void *unused)
+{
+	unsigned int cpu = raw_smp_processor_id();
+
+	/* PPC64 calls setup_kup() in early_setup_secondary() */
+	if (IS_ENABLED(CONFIG_PPC32))
+		setup_kup();
+
+	mmgrab_lazy_tlb(&init_mm);
+	current->active_mm = &init_mm;
+	VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm)));
+	cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
+	inc_mm_active_cpus(&init_mm);
+
+	smp_store_cpu_info(cpu);
+	set_dec(tb_ticks_per_jiffy);
+	rcu_cpu_starting(cpu);
+	cpu_callin_map[cpu] = 1;
+
+	if (smp_ops->setup_cpu)
+		smp_ops->setup_cpu(cpu);
+	if (smp_ops->take_timebase)
+		smp_ops->take_timebase();
+
+	secondary_cpu_time_init();
+
+#ifdef CONFIG_PPC64
+	if (system_state == SYSTEM_RUNNING)
+		vdso_data->processorCount++;
+
+	vdso_getcpu_init();
+#endif
+	set_numa_node(numa_cpu_lookup_table[cpu]);
+	set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
+
+	/* Update topology CPU masks */
+	add_cpu_to_masks(cpu);
+
+	/*
+	 * Check for any shared caches. Note that this must be done on a
+	 * per-core basis because one core in the pair might be disabled.
+	 */
+	if (!shared_caches) {
+		struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
+		struct cpumask *mask = cpu_l2_cache_mask(cpu);
+
+		if (has_big_cores)
+			sibling_mask = cpu_smallcore_mask;
+
+		if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu)))
+			shared_caches = true;
+	}
+
+	smp_wmb();
+	notify_cpu_starting(cpu);
+	set_cpu_online(cpu, true);
+
+	boot_init_stack_canary();
+
+	local_irq_enable();
+
+	/* We can enable ftrace for secondary cpus now */
+	this_cpu_enable_ftrace();
+
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+
+	BUG();
+}
+
+static void __init fixup_topology(void)
+{
+	int i;
+
+#ifdef CONFIG_SCHED_SMT
+	if (has_big_cores) {
+		pr_info("Big cores detected but using small core scheduling\n");
+		powerpc_topology[smt_idx].mask = smallcore_smt_mask;
+	}
+#endif
+
+	if (!has_coregroup_support())
+		powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask;
+
+	/*
+	 * Try to consolidate topology levels here instead of
+	 * allowing scheduler to degenerate.
+	 * - Dont consolidate if masks are different.
+	 * - Dont consolidate if sd_flags exists and are different.
+	 */
+	for (i = 1; i <= die_idx; i++) {
+		if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask)
+			continue;
+
+		if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags &&
+				powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags)
+			continue;
+
+		if (!powerpc_topology[i - 1].sd_flags)
+			powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags;
+
+		powerpc_topology[i].mask = powerpc_topology[i + 1].mask;
+		powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags;
+#ifdef CONFIG_SCHED_DEBUG
+		powerpc_topology[i].name = powerpc_topology[i + 1].name;
+#endif
+	}
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+	/*
+	 * We are running pinned to the boot CPU, see rest_init().
+	 */
+	if (smp_ops && smp_ops->setup_cpu)
+		smp_ops->setup_cpu(boot_cpuid);
+
+	if (smp_ops && smp_ops->bringup_done)
+		smp_ops->bringup_done();
+
+	dump_numa_cpu_topology();
+
+	fixup_topology();
+	set_sched_topology(powerpc_topology);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+int __cpu_disable(void)
+{
+	int cpu = smp_processor_id();
+	int err;
+
+	if (!smp_ops->cpu_disable)
+		return -ENOSYS;
+
+	this_cpu_disable_ftrace();
+
+	err = smp_ops->cpu_disable();
+	if (err)
+		return err;
+
+	/* Update sibling maps */
+	remove_cpu_from_masks(cpu);
+
+	return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+	/*
+	 * This could perhaps be a generic call in idlea_task_dead(), but
+	 * that requires testing from all archs, so first put it here to
+	 */
+	VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(&init_mm)));
+	dec_mm_active_cpus(&init_mm);
+	cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
+
+	if (smp_ops->cpu_die)
+		smp_ops->cpu_die(cpu);
+}
+
+void __noreturn arch_cpu_idle_dead(void)
+{
+	/*
+	 * Disable on the down path. This will be re-enabled by
+	 * start_secondary() via start_secondary_resume() below
+	 */
+	this_cpu_disable_ftrace();
+
+	if (smp_ops->cpu_offline_self)
+		smp_ops->cpu_offline_self();
+
+	/* If we return, we re-enter start_secondary */
+	start_secondary_resume();
+}
+
+#endif
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
new file mode 100644
index 0000000000..e6a958a5da
--- /dev/null
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Stack trace utility functions etc.
+ *
+ * Copyright 2008 Christoph Hellwig, IBM Corp.
+ * Copyright 2018 SUSE Linux GmbH
+ * Copyright 2018 Nick Piggin, Michael Ellerman, IBM Corp.
+ */
+
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+#include <linux/nmi.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
+#include <linux/stacktrace.h>
+#include <asm/ptrace.h>
+#include <asm/processor.h>
+#include <linux/ftrace.h>
+#include <asm/kprobes.h>
+
+#include <asm/paca.h>
+
+void __no_sanitize_address arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+					   struct task_struct *task, struct pt_regs *regs)
+{
+	unsigned long sp;
+
+	if (regs && !consume_entry(cookie, regs->nip))
+		return;
+
+	if (regs)
+		sp = regs->gpr[1];
+	else if (task == current)
+		sp = current_stack_frame();
+	else
+		sp = task->thread.ksp;
+
+	for (;;) {
+		unsigned long *stack = (unsigned long *) sp;
+		unsigned long newsp, ip;
+
+		if (!validate_sp(sp, task))
+			return;
+
+		newsp = stack[0];
+		ip = stack[STACK_FRAME_LR_SAVE];
+
+		if (!consume_entry(cookie, ip))
+			return;
+
+		sp = newsp;
+	}
+}
+
+/*
+ * This function returns an error if it detects any unreliable features of the
+ * stack.  Otherwise it guarantees that the stack trace is reliable.
+ *
+ * If the task is not 'current', the caller *must* ensure the task is inactive.
+ */
+int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+						   void *cookie, struct task_struct *task)
+{
+	unsigned long sp;
+	unsigned long newsp;
+	unsigned long stack_page = (unsigned long)task_stack_page(task);
+	unsigned long stack_end;
+	int graph_idx = 0;
+	bool firstframe;
+
+	stack_end = stack_page + THREAD_SIZE;
+
+	// See copy_thread() for details.
+	if (task->flags & PF_KTHREAD)
+		stack_end -= STACK_FRAME_MIN_SIZE;
+	else
+		stack_end -= STACK_USER_INT_FRAME_SIZE;
+
+	if (task == current)
+		sp = current_stack_frame();
+	else
+		sp = task->thread.ksp;
+
+	if (sp < stack_page + sizeof(struct thread_struct) ||
+	    sp > stack_end - STACK_FRAME_MIN_SIZE) {
+		return -EINVAL;
+	}
+
+	for (firstframe = true; sp != stack_end;
+	     firstframe = false, sp = newsp) {
+		unsigned long *stack = (unsigned long *) sp;
+		unsigned long ip;
+
+		/* sanity check: ABI requires SP to be aligned 16 bytes. */
+		if (sp & 0xF)
+			return -EINVAL;
+
+		newsp = stack[0];
+		/* Stack grows downwards; unwinder may only go up. */
+		if (newsp <= sp)
+			return -EINVAL;
+
+		if (newsp != stack_end &&
+		    newsp > stack_end - STACK_FRAME_MIN_SIZE) {
+			return -EINVAL; /* invalid backlink, too far up. */
+		}
+
+		/*
+		 * We can only trust the bottom frame's backlink, the
+		 * rest of the frame may be uninitialized, continue to
+		 * the next.
+		 */
+		if (firstframe)
+			continue;
+
+		/* Mark stacktraces with exception frames as unreliable. */
+		if (sp <= stack_end - STACK_INT_FRAME_SIZE &&
+		    stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
+			return -EINVAL;
+		}
+
+		/* Examine the saved LR: it must point into kernel code. */
+		ip = stack[STACK_FRAME_LR_SAVE];
+		if (!__kernel_text_address(ip))
+			return -EINVAL;
+
+		/*
+		 * FIXME: IMHO these tests do not belong in
+		 * arch-dependent code, they are generic.
+		 */
+		ip = ftrace_graph_ret_addr(task, &graph_idx, ip, stack);
+#ifdef CONFIG_KPROBES
+		/*
+		 * Mark stacktraces with kretprobed functions on them
+		 * as unreliable.
+		 */
+		if (ip == (unsigned long)__kretprobe_trampoline)
+			return -EINVAL;
+#endif
+
+		if (!consume_entry(cookie, ip))
+			return -EINVAL;
+	}
+	return 0;
+}
+
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
+static void handle_backtrace_ipi(struct pt_regs *regs)
+{
+	nmi_cpu_backtrace(regs);
+}
+
+static void raise_backtrace_ipi(cpumask_t *mask)
+{
+	struct paca_struct *p;
+	unsigned int cpu;
+	u64 delay_us;
+
+	for_each_cpu(cpu, mask) {
+		if (cpu == smp_processor_id()) {
+			handle_backtrace_ipi(NULL);
+			continue;
+		}
+
+		delay_us = 5 * USEC_PER_SEC;
+
+		if (smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, delay_us)) {
+			// Now wait up to 5s for the other CPU to do its backtrace
+			while (cpumask_test_cpu(cpu, mask) && delay_us) {
+				udelay(1);
+				delay_us--;
+			}
+
+			// Other CPU cleared itself from the mask
+			if (delay_us)
+				continue;
+		}
+
+		p = paca_ptrs[cpu];
+
+		cpumask_clear_cpu(cpu, mask);
+
+		pr_warn("CPU %d didn't respond to backtrace IPI, inspecting paca.\n", cpu);
+		if (!virt_addr_valid(p)) {
+			pr_warn("paca pointer appears corrupt? (%px)\n", p);
+			continue;
+		}
+
+		pr_warn("irq_soft_mask: 0x%02x in_mce: %d in_nmi: %d",
+			p->irq_soft_mask, p->in_mce, p->in_nmi);
+
+		if (virt_addr_valid(p->__current))
+			pr_cont(" current: %d (%s)\n", p->__current->pid,
+				p->__current->comm);
+		else
+			pr_cont(" current pointer corrupt? (%px)\n", p->__current);
+
+		pr_warn("Back trace of paca->saved_r1 (0x%016llx) (possibly stale):\n", p->saved_r1);
+		show_stack(p->__current, (unsigned long *)p->saved_r1, KERN_WARNING);
+	}
+}
+
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
+{
+	nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace_ipi);
+}
+#endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */
diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c
new file mode 100644
index 0000000000..863a7aa246
--- /dev/null
+++ b/arch/powerpc/kernel/static_call.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/memory.h>
+#include <linux/static_call.h>
+
+#include <asm/code-patching.h>
+
+void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
+{
+	int err;
+	bool is_ret0 = (func == __static_call_return0);
+	unsigned long target = (unsigned long)(is_ret0 ? tramp + PPC_SCT_RET0 : func);
+	bool is_short = is_offset_in_branch_range((long)target - (long)tramp);
+
+	if (!tramp)
+		return;
+
+	mutex_lock(&text_mutex);
+
+	if (func && !is_short) {
+		err = patch_instruction(tramp + PPC_SCT_DATA, ppc_inst(target));
+		if (err)
+			goto out;
+	}
+
+	if (!func)
+		err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR()));
+	else if (is_short)
+		err = patch_branch(tramp, target, 0);
+	else
+		err = patch_instruction(tramp, ppc_inst(PPC_RAW_NOP()));
+out:
+	mutex_unlock(&text_mutex);
+
+	if (err)
+		panic("%s: patching failed %pS at %pS\n", __func__, func, tramp);
+}
+EXPORT_SYMBOL_GPL(arch_static_call_transform);
diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c
new file mode 100644
index 0000000000..b84992c108
--- /dev/null
+++ b/arch/powerpc/kernel/suspend.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Suspend support specific for power.
+ *
+ * Copyright (c) 2002 Pavel Machek <pavel@ucw.cz>
+ * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
+ */
+
+#include <linux/mm.h>
+#include <linux/suspend.h>
+#include <asm/page.h>
+#include <asm/sections.h>
+
+/*
+ *	pfn_is_nosave - check if given pfn is in the 'nosave' section
+ */
+
+int pfn_is_nosave(unsigned long pfn)
+{
+	unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
+	unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
+	return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
diff --git a/arch/powerpc/kernel/switch.S b/arch/powerpc/kernel/switch.S
new file mode 100644
index 0000000000..608c0ce7ce
--- /dev/null
+++ b/arch/powerpc/kernel/switch.S
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include <linux/objtool.h>
+#include <asm/asm-offsets.h>
+#include <asm/code-patching-asm.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/kup.h>
+#include <asm/thread_info.h>
+
+.section ".text","ax",@progbits
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * Cancel all explict user streams as they will have no use after context
+ * switch and will stop the HW from creating streams itself
+ */
+#define STOP_STREAMS		\
+	DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6)
+
+#define FLUSH_COUNT_CACHE	\
+1:	nop;			\
+	patch_site 1b, patch__call_flush_branch_caches1; \
+1:	nop;			\
+	patch_site 1b, patch__call_flush_branch_caches2; \
+1:	nop;			\
+	patch_site 1b, patch__call_flush_branch_caches3
+
+.macro nops number
+	.rept \number
+	nop
+	.endr
+.endm
+
+.balign 32
+.global flush_branch_caches
+flush_branch_caches:
+	/* Save LR into r9 */
+	mflr	r9
+
+	// Flush the link stack
+	.rept 64
+	ANNOTATE_INTRA_FUNCTION_CALL
+	bl	.+4
+	.endr
+	b	1f
+	nops	6
+
+	.balign 32
+	/* Restore LR */
+1:	mtlr	r9
+
+	// If we're just flushing the link stack, return here
+3:	nop
+	patch_site 3b patch__flush_link_stack_return
+
+	li	r9,0x7fff
+	mtctr	r9
+
+	PPC_BCCTR_FLUSH
+
+2:	nop
+	patch_site 2b patch__flush_count_cache_return
+
+	nops	3
+
+	.rept 278
+	.balign 32
+	PPC_BCCTR_FLUSH
+	nops	7
+	.endr
+
+	blr
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+.balign 32
+/*
+ * New stack pointer in r8, old stack pointer in r1, must not clobber r3
+ */
+pin_stack_slb:
+BEGIN_FTR_SECTION
+	clrrdi	r6,r8,28	/* get its ESID */
+	clrrdi	r9,r1,28	/* get current sp ESID */
+FTR_SECTION_ELSE
+	clrrdi	r6,r8,40	/* get its 1T ESID */
+	clrrdi	r9,r1,40	/* get current sp 1T ESID */
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_1T_SEGMENT)
+	clrldi.	r0,r6,2		/* is new ESID c00000000? */
+	cmpd	cr1,r6,r9	/* or is new ESID the same as current ESID? */
+	cror	eq,4*cr1+eq,eq
+	beq	2f		/* if yes, don't slbie it */
+
+	/* Bolt in the new stack SLB entry */
+	ld	r7,KSP_VSID(r4)	/* Get new stack's VSID */
+	oris	r0,r6,(SLB_ESID_V)@h
+	ori	r0,r0,(SLB_NUM_BOLTED-1)@l
+BEGIN_FTR_SECTION
+	li	r9,MMU_SEGSIZE_1T	/* insert B field */
+	oris	r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h
+	rldimi	r7,r9,SLB_VSID_SSIZE_SHIFT,0
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
+
+	/* Update the last bolted SLB.  No write barriers are needed
+	 * here, provided we only update the current CPU's SLB shadow
+	 * buffer.
+	 */
+	ld	r9,PACA_SLBSHADOWPTR(r13)
+	li	r12,0
+	std	r12,SLBSHADOW_STACKESID(r9)	/* Clear ESID */
+	li	r12,SLBSHADOW_STACKVSID
+	STDX_BE	r7,r12,r9			/* Save VSID */
+	li	r12,SLBSHADOW_STACKESID
+	STDX_BE	r0,r12,r9			/* Save ESID */
+
+	/* No need to check for MMU_FTR_NO_SLBIE_B here, since when
+	 * we have 1TB segments, the only CPUs known to have the errata
+	 * only support less than 1TB of system memory and we'll never
+	 * actually hit this code path.
+	 */
+
+	isync
+	slbie	r6
+BEGIN_FTR_SECTION
+	slbie	r6		/* Workaround POWER5 < DD2.1 issue */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+	slbmte	r7,r0
+	isync
+2:	blr
+	.size pin_stack_slb,.-pin_stack_slb
+#endif /* CONFIG_PPC_64S_HASH_MMU */
+
+#else
+#define STOP_STREAMS
+#define FLUSH_COUNT_CACHE
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+/*
+ * do_switch_32/64 have the same calling convention as _switch, i.e., r3,r4
+ * are prev and next thread_struct *, and returns prev task_struct * in r3.
+
+ * This switches the stack, current, and does other task switch housekeeping.
+ */
+.macro do_switch_32
+	tophys(r0,r4)
+	mtspr	SPRN_SPRG_THREAD,r0	/* Update current THREAD phys addr */
+	lwz	r1,KSP(r4)	/* Load new stack pointer */
+
+	/* save the old current 'last' for return value */
+	mr	r3,r2
+	addi	r2,r4,-THREAD	/* Update current */
+.endm
+
+.macro do_switch_64
+	ld	r8,KSP(r4)	/* Load new stack pointer */
+
+	kuap_check_amr r9, r10
+
+	FLUSH_COUNT_CACHE	/* Clobbers r9, ctr */
+
+	STOP_STREAMS		/* Clobbers r6 */
+
+	addi	r3,r3,-THREAD	/* old thread -> task_struct for return value */
+	addi	r6,r4,-THREAD	/* new thread -> task_struct */
+	std	r6,PACACURRENT(r13)	/* Set new task_struct to 'current' */
+#if defined(CONFIG_STACKPROTECTOR)
+	ld	r6, TASK_CANARY(r6)
+	std	r6, PACA_CANARY(r13)
+#endif
+	/* Set new PACAKSAVE */
+	clrrdi	r7,r8,THREAD_SHIFT	/* base of new stack */
+	addi	r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE
+	std	r7,PACAKSAVE(r13)
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+BEGIN_MMU_FTR_SECTION
+	bl	pin_stack_slb
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
+#endif
+	/*
+	 * PMU interrupts in radix may come in here. They will use r1, not
+	 * PACAKSAVE, so this stack switch will not cause a problem. They
+	 * will store to the process stack, which may then be migrated to
+	 * another CPU. However the rq lock release on this CPU paired with
+	 * the rq lock acquire on the new CPU before the stack becomes
+	 * active on the new CPU, will order those stores.
+	 */
+	mr	r1,r8		/* start using new stack pointer */
+.endm
+
+/*
+ * This routine switches between two different tasks.  The process
+ * state of one is saved on its kernel stack.  Then the state
+ * of the other is restored from its kernel stack.  The memory
+ * management hardware is updated to the second process's state.
+ * Finally, we can return to the second process.
+ * On entry, r3 points to the THREAD for the current task, r4
+ * points to the THREAD for the new task.
+ *
+ * This routine is always called with interrupts disabled.
+ *
+ * Note: there are two ways to get to the "going out" portion
+ * of this code; either by coming in via the entry (_switch)
+ * or via "fork" which must set up an environment equivalent
+ * to the "_switch" path.  If you change this , you'll have to
+ * change the fork code also.
+ *
+ * The code which creates the new task context is in 'copy_thread'
+ * in arch/ppc/kernel/process.c
+ *
+ * Note: this uses SWITCH_FRAME_SIZE rather than USER_INT_FRAME_SIZE
+ * because we don't need to leave the redzone ABI gap at the top of
+ * the kernel stack.
+ */
+_GLOBAL(_switch)
+	PPC_CREATE_STACK_FRAME(SWITCH_FRAME_SIZE)
+	PPC_STL		r1,KSP(r3)	/* Set old stack pointer */
+	SAVE_NVGPRS(r1)			/* volatiles are caller-saved -- Cort */
+	PPC_STL		r0,_NIP(r1)	/* Return to switch caller */
+	mfcr		r0
+	stw		r0,_CCR(r1)
+
+	/*
+	 * On SMP kernels, care must be taken because a task may be
+	 * scheduled off CPUx and on to CPUy. Memory ordering must be
+	 * considered.
+	 *
+	 * Cacheable stores on CPUx will be visible when the task is
+	 * scheduled on CPUy by virtue of the core scheduler barriers
+	 * (see "Notes on Program-Order guarantees on SMP systems." in
+	 * kernel/sched/core.c).
+	 *
+	 * Uncacheable stores in the case of involuntary preemption must
+	 * be taken care of. The smp_mb__after_spinlock() in __schedule()
+	 * is implemented as hwsync on powerpc, which orders MMIO too. So
+	 * long as there is an hwsync in the context switch path, it will
+	 * be executed on the source CPU after the task has performed
+	 * all MMIO ops on that CPU, and on the destination CPU before the
+	 * task performs any MMIO ops there.
+	 */
+
+	/*
+	 * The kernel context switch path must contain a spin_lock,
+	 * which contains larx/stcx, which will clear any reservation
+	 * of the task being switched.
+	 */
+
+#ifdef CONFIG_PPC32
+	do_switch_32
+#else
+	do_switch_64
+#endif
+
+	lwz	r0,_CCR(r1)
+	mtcrf	0xFF,r0
+	REST_NVGPRS(r1)		/* volatiles are destroyed -- Cort */
+	PPC_LL	r0,_NIP(r1)	/* Return to _switch caller in new task */
+	mtlr	r0
+	addi	r1,r1,SWITCH_FRAME_SIZE
+	blr
diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c
new file mode 100644
index 0000000000..41dcb21752
--- /dev/null
+++ b/arch/powerpc/kernel/swsusp.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Common powerpc suspend code for 32 and 64 bits
+ *
+ * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
+ */
+
+#include <linux/sched.h>
+#include <linux/suspend.h>
+#include <asm/current.h>
+#include <asm/mmu_context.h>
+#include <asm/switch_to.h>
+
+void save_processor_state(void)
+{
+	/*
+	 * flush out all the special registers so we don't need
+	 * to save them in the snapshot
+	 */
+	flush_all_to_thread(current);
+
+#ifdef CONFIG_PPC64
+	hard_irq_disable();
+#endif
+
+}
+
+void restore_processor_state(void)
+{
+#ifdef CONFIG_PPC32
+	switch_mmu_context(current->active_mm, current->active_mm, NULL);
+#endif
+}
diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
new file mode 100644
index 0000000000..ffb7932648
--- /dev/null
+++ b/arch/powerpc/kernel/swsusp_32.S
@@ -0,0 +1,414 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/threads.h>
+#include <linux/linkage.h>
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/mmu.h>
+#include <asm/feature-fixups.h>
+
+/*
+ * Structure for storing CPU registers on the save area.
+ */
+#define SL_SP		0
+#define SL_PC		4
+#define SL_MSR		8
+#define SL_SDR1		0xc
+#define SL_SPRG0	0x10	/* 4 sprg's */
+#define SL_DBAT0	0x20
+#define SL_IBAT0	0x28
+#define SL_DBAT1	0x30
+#define SL_IBAT1	0x38
+#define SL_DBAT2	0x40
+#define SL_IBAT2	0x48
+#define SL_DBAT3	0x50
+#define SL_IBAT3	0x58
+#define SL_DBAT4	0x60
+#define SL_IBAT4	0x68
+#define SL_DBAT5	0x70
+#define SL_IBAT5	0x78
+#define SL_DBAT6	0x80
+#define SL_IBAT6	0x88
+#define SL_DBAT7	0x90
+#define SL_IBAT7	0x98
+#define SL_TB		0xa0
+#define SL_R2		0xa8
+#define SL_CR		0xac
+#define SL_LR		0xb0
+#define SL_R12		0xb4	/* r12 to r31 */
+#define SL_SIZE		(SL_R12 + 80)
+
+	.section .data
+	.align	5
+
+_GLOBAL(swsusp_save_area)
+	.space	SL_SIZE
+
+
+	.section .text
+	.align	5
+
+_GLOBAL(swsusp_arch_suspend)
+
+	lis	r11,swsusp_save_area@h
+	ori	r11,r11,swsusp_save_area@l
+
+	mflr	r0
+	stw	r0,SL_LR(r11)
+	mfcr	r0
+	stw	r0,SL_CR(r11)
+	stw	r1,SL_SP(r11)
+	stw	r2,SL_R2(r11)
+	stmw	r12,SL_R12(r11)
+
+	/* Save MSR & SDR1 */
+	mfmsr	r4
+	stw	r4,SL_MSR(r11)
+	mfsdr1	r4
+	stw	r4,SL_SDR1(r11)
+
+	/* Get a stable timebase and save it */
+1:	mftbu	r4
+	stw	r4,SL_TB(r11)
+	mftb	r5
+	stw	r5,SL_TB+4(r11)
+	mftbu	r3
+	cmpw	r3,r4
+	bne	1b
+
+	/* Save SPRGs */
+	mfsprg	r4,0
+	stw	r4,SL_SPRG0(r11)
+	mfsprg	r4,1
+	stw	r4,SL_SPRG0+4(r11)
+	mfsprg	r4,2
+	stw	r4,SL_SPRG0+8(r11)
+	mfsprg	r4,3
+	stw	r4,SL_SPRG0+12(r11)
+
+	/* Save BATs */
+	mfdbatu	r4,0
+	stw	r4,SL_DBAT0(r11)
+	mfdbatl	r4,0
+	stw	r4,SL_DBAT0+4(r11)
+	mfdbatu	r4,1
+	stw	r4,SL_DBAT1(r11)
+	mfdbatl	r4,1
+	stw	r4,SL_DBAT1+4(r11)
+	mfdbatu	r4,2
+	stw	r4,SL_DBAT2(r11)
+	mfdbatl	r4,2
+	stw	r4,SL_DBAT2+4(r11)
+	mfdbatu	r4,3
+	stw	r4,SL_DBAT3(r11)
+	mfdbatl	r4,3
+	stw	r4,SL_DBAT3+4(r11)
+	mfibatu	r4,0
+	stw	r4,SL_IBAT0(r11)
+	mfibatl	r4,0
+	stw	r4,SL_IBAT0+4(r11)
+	mfibatu	r4,1
+	stw	r4,SL_IBAT1(r11)
+	mfibatl	r4,1
+	stw	r4,SL_IBAT1+4(r11)
+	mfibatu	r4,2
+	stw	r4,SL_IBAT2(r11)
+	mfibatl	r4,2
+	stw	r4,SL_IBAT2+4(r11)
+	mfibatu	r4,3
+	stw	r4,SL_IBAT3(r11)
+	mfibatl	r4,3
+	stw	r4,SL_IBAT3+4(r11)
+
+BEGIN_MMU_FTR_SECTION
+	mfspr	r4,SPRN_DBAT4U
+	stw	r4,SL_DBAT4(r11)
+	mfspr	r4,SPRN_DBAT4L
+	stw	r4,SL_DBAT4+4(r11)
+	mfspr	r4,SPRN_DBAT5U
+	stw	r4,SL_DBAT5(r11)
+	mfspr	r4,SPRN_DBAT5L
+	stw	r4,SL_DBAT5+4(r11)
+	mfspr	r4,SPRN_DBAT6U
+	stw	r4,SL_DBAT6(r11)
+	mfspr	r4,SPRN_DBAT6L
+	stw	r4,SL_DBAT6+4(r11)
+	mfspr	r4,SPRN_DBAT7U
+	stw	r4,SL_DBAT7(r11)
+	mfspr	r4,SPRN_DBAT7L
+	stw	r4,SL_DBAT7+4(r11)
+	mfspr	r4,SPRN_IBAT4U
+	stw	r4,SL_IBAT4(r11)
+	mfspr	r4,SPRN_IBAT4L
+	stw	r4,SL_IBAT4+4(r11)
+	mfspr	r4,SPRN_IBAT5U
+	stw	r4,SL_IBAT5(r11)
+	mfspr	r4,SPRN_IBAT5L
+	stw	r4,SL_IBAT5+4(r11)
+	mfspr	r4,SPRN_IBAT6U
+	stw	r4,SL_IBAT6(r11)
+	mfspr	r4,SPRN_IBAT6L
+	stw	r4,SL_IBAT6+4(r11)
+	mfspr	r4,SPRN_IBAT7U
+	stw	r4,SL_IBAT7(r11)
+	mfspr	r4,SPRN_IBAT7L
+	stw	r4,SL_IBAT7+4(r11)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+
+#if  0
+	/* Backup various CPU config stuffs */
+	bl	__save_cpu_setup
+#endif
+	/* Call the low level suspend stuff (we should probably have made
+	 * a stackframe...
+	 */
+	bl	swsusp_save
+
+	/* Restore LR from the save area */
+	lis	r11,swsusp_save_area@h
+	ori	r11,r11,swsusp_save_area@l
+	lwz	r0,SL_LR(r11)
+	mtlr	r0
+
+	blr
+
+
+/* Resume code */
+_GLOBAL(swsusp_arch_resume)
+
+#ifdef CONFIG_ALTIVEC
+	/* Stop pending alitvec streams and memory accesses */
+BEGIN_FTR_SECTION
+	PPC_DSSALL
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+ 	sync
+
+	/* Disable MSR:DR to make sure we don't take a TLB or
+	 * hash miss during the copy, as our hash table will
+	 * for a while be unusable. For .text, we assume we are
+	 * covered by a BAT. This works only for non-G5 at this
+	 * point. G5 will need a better approach, possibly using
+	 * a small temporary hash table filled with large mappings,
+	 * disabling the MMU completely isn't a good option for
+	 * performance reasons.
+	 * (Note that 750's may have the same performance issue as
+	 * the G5 in this case, we should investigate using moving
+	 * BATs for these CPUs)
+	 */
+	mfmsr	r0
+	sync
+	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
+	mtmsr	r0
+	sync
+	isync
+
+	/* Load ptr the list of pages to copy in r3 */
+	lis	r11,(restore_pblist - KERNELBASE)@h
+	ori	r11,r11,restore_pblist@l
+	lwz	r10,0(r11)
+
+	/* Copy the pages. This is a very basic implementation, to
+	 * be replaced by something more cache efficient */
+1:
+	tophys(r3,r10)
+	li	r0,256
+	mtctr	r0
+	lwz	r11,pbe_address(r3)	/* source */
+	tophys(r5,r11)
+	lwz	r10,pbe_orig_address(r3)	/* destination */
+	tophys(r6,r10)
+2:
+	lwz	r8,0(r5)
+	lwz	r9,4(r5)
+	lwz	r10,8(r5)
+	lwz	r11,12(r5)
+	addi	r5,r5,16
+	stw	r8,0(r6)
+	stw	r9,4(r6)
+	stw	r10,8(r6)
+	stw	r11,12(r6)
+	addi	r6,r6,16
+	bdnz	2b
+	lwz		r10,pbe_next(r3)
+	cmpwi	0,r10,0
+	bne	1b
+
+	/* Do a very simple cache flush/inval of the L1 to ensure
+	 * coherency of the icache
+	 */
+	lis	r3,0x0002
+	mtctr	r3
+	li	r3, 0
+1:
+	lwz	r0,0(r3)
+	addi	r3,r3,0x0020
+	bdnz	1b
+	isync
+	sync
+
+	/* Now flush those cache lines */
+	lis	r3,0x0002
+	mtctr	r3
+	li	r3, 0
+1:
+	dcbf	0,r3
+	addi	r3,r3,0x0020
+	bdnz	1b
+	sync
+
+	/* Ok, we are now running with the kernel data of the old
+	 * kernel fully restored. We can get to the save area
+	 * easily now. As for the rest of the code, it assumes the
+	 * loader kernel and the booted one are exactly identical
+	 */
+	lis	r11,swsusp_save_area@h
+	ori	r11,r11,swsusp_save_area@l
+	tophys(r11,r11)
+
+#if 0
+	/* Restore various CPU config stuffs */
+	bl	__restore_cpu_setup
+#endif
+	/* Restore the BATs, and SDR1.  Then we can turn on the MMU.
+	 * This is a bit hairy as we are running out of those BATs,
+	 * but first, our code is probably in the icache, and we are
+	 * writing the same value to the BAT, so that should be fine,
+	 * though a better solution will have to be found long-term
+	 */
+	lwz	r4,SL_SDR1(r11)
+	mtsdr1	r4
+	lwz	r4,SL_SPRG0(r11)
+	mtsprg	0,r4
+	lwz	r4,SL_SPRG0+4(r11)
+	mtsprg	1,r4
+	lwz	r4,SL_SPRG0+8(r11)
+	mtsprg	2,r4
+	lwz	r4,SL_SPRG0+12(r11)
+	mtsprg	3,r4
+
+#if 0
+	lwz	r4,SL_DBAT0(r11)
+	mtdbatu	0,r4
+	lwz	r4,SL_DBAT0+4(r11)
+	mtdbatl	0,r4
+	lwz	r4,SL_DBAT1(r11)
+	mtdbatu	1,r4
+	lwz	r4,SL_DBAT1+4(r11)
+	mtdbatl	1,r4
+	lwz	r4,SL_DBAT2(r11)
+	mtdbatu	2,r4
+	lwz	r4,SL_DBAT2+4(r11)
+	mtdbatl	2,r4
+	lwz	r4,SL_DBAT3(r11)
+	mtdbatu	3,r4
+	lwz	r4,SL_DBAT3+4(r11)
+	mtdbatl	3,r4
+	lwz	r4,SL_IBAT0(r11)
+	mtibatu	0,r4
+	lwz	r4,SL_IBAT0+4(r11)
+	mtibatl	0,r4
+	lwz	r4,SL_IBAT1(r11)
+	mtibatu	1,r4
+	lwz	r4,SL_IBAT1+4(r11)
+	mtibatl	1,r4
+	lwz	r4,SL_IBAT2(r11)
+	mtibatu	2,r4
+	lwz	r4,SL_IBAT2+4(r11)
+	mtibatl	2,r4
+	lwz	r4,SL_IBAT3(r11)
+	mtibatu	3,r4
+	lwz	r4,SL_IBAT3+4(r11)
+	mtibatl	3,r4
+BEGIN_MMU_FTR_SECTION
+	lwz	r4,SL_DBAT4(r11)
+	mtspr	SPRN_DBAT4U,r4
+	lwz	r4,SL_DBAT4+4(r11)
+	mtspr	SPRN_DBAT4L,r4
+	lwz	r4,SL_DBAT5(r11)
+	mtspr	SPRN_DBAT5U,r4
+	lwz	r4,SL_DBAT5+4(r11)
+	mtspr	SPRN_DBAT5L,r4
+	lwz	r4,SL_DBAT6(r11)
+	mtspr	SPRN_DBAT6U,r4
+	lwz	r4,SL_DBAT6+4(r11)
+	mtspr	SPRN_DBAT6L,r4
+	lwz	r4,SL_DBAT7(r11)
+	mtspr	SPRN_DBAT7U,r4
+	lwz	r4,SL_DBAT7+4(r11)
+	mtspr	SPRN_DBAT7L,r4
+	lwz	r4,SL_IBAT4(r11)
+	mtspr	SPRN_IBAT4U,r4
+	lwz	r4,SL_IBAT4+4(r11)
+	mtspr	SPRN_IBAT4L,r4
+	lwz	r4,SL_IBAT5(r11)
+	mtspr	SPRN_IBAT5U,r4
+	lwz	r4,SL_IBAT5+4(r11)
+	mtspr	SPRN_IBAT5L,r4
+	lwz	r4,SL_IBAT6(r11)
+	mtspr	SPRN_IBAT6U,r4
+	lwz	r4,SL_IBAT6+4(r11)
+	mtspr	SPRN_IBAT6L,r4
+	lwz	r4,SL_IBAT7(r11)
+	mtspr	SPRN_IBAT7U,r4
+	lwz	r4,SL_IBAT7+4(r11)
+	mtspr	SPRN_IBAT7L,r4
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+#endif
+
+	/* Flush all TLBs */
+	lis	r4,0x1000
+1:	addic.	r4,r4,-0x1000
+	tlbie	r4
+	bgt	1b
+	sync
+
+	/* restore the MSR and turn on the MMU */
+	lwz	r3,SL_MSR(r11)
+	bl	turn_on_mmu
+	tovirt(r11,r11)
+
+	/* Restore TB */
+	li	r3,0
+	mttbl	r3
+	lwz	r3,SL_TB(r11)
+	lwz	r4,SL_TB+4(r11)
+	mttbu	r3
+	mttbl	r4
+
+	/* Kick decrementer */
+	li	r0,1
+	mtdec	r0
+
+	/* Restore the callee-saved registers and return */
+	lwz	r0,SL_CR(r11)
+	mtcr	r0
+	lwz	r2,SL_R2(r11)
+	lmw	r12,SL_R12(r11)
+	lwz	r1,SL_SP(r11)
+	lwz	r0,SL_LR(r11)
+	mtlr	r0
+
+	// XXX Note: we don't really need to call swsusp_resume
+
+	li	r3,0
+	blr
+_ASM_NOKPROBE_SYMBOL(swsusp_arch_resume)
+
+/* FIXME:This construct is actually not useful since we don't shut
+ * down the instruction MMU, we could just flip back MSR-DR on.
+ */
+SYM_FUNC_START_LOCAL(turn_on_mmu)
+	mflr	r4
+	mtsrr0	r4
+	mtsrr1	r3
+	sync
+	isync
+	rfi
+_ASM_NOKPROBE_SYMBOL(turn_on_mmu)
+SYM_FUNC_END(turn_on_mmu)
+
diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c
new file mode 100644
index 0000000000..16ee3baaf0
--- /dev/null
+++ b/arch/powerpc/kernel/swsusp_64.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PowerPC 64-bit swsusp implementation
+ *
+ * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
+ */
+
+#include <asm/iommu.h>
+#include <linux/irq.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/nmi.h>
+
+void do_after_copyback(void)
+{
+	iommu_restore();
+	touch_softlockup_watchdog();
+	mb();
+}
diff --git a/arch/powerpc/kernel/swsusp_85xx.S b/arch/powerpc/kernel/swsusp_85xx.S
new file mode 100644
index 0000000000..88cfdbd530
--- /dev/null
+++ b/arch/powerpc/kernel/swsusp_85xx.S
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Based on swsusp_32.S, modified for FSL BookE by
+ * Anton Vorontsov <avorontsov@ru.mvista.com>
+ * Copyright (c) 2009-2010 MontaVista Software, LLC.
+ */
+
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/mmu.h>
+
+/*
+ * Structure for storing CPU registers on the save area.
+ */
+#define SL_SP		0
+#define SL_PC		4
+#define SL_MSR		8
+#define SL_TCR		0xc
+#define SL_SPRG0	0x10
+#define SL_SPRG1	0x14
+#define SL_SPRG2	0x18
+#define SL_SPRG3	0x1c
+#define SL_SPRG4	0x20
+#define SL_SPRG5	0x24
+#define SL_SPRG6	0x28
+#define SL_SPRG7	0x2c
+#define SL_TBU		0x30
+#define SL_TBL		0x34
+#define SL_R2		0x38
+#define SL_CR		0x3c
+#define SL_LR		0x40
+#define SL_R12		0x44	/* r12 to r31 */
+#define SL_SIZE		(SL_R12 + 80)
+
+	.section .data
+	.align	5
+
+_GLOBAL(swsusp_save_area)
+	.space	SL_SIZE
+
+
+	.section .text
+	.align	5
+
+_GLOBAL(swsusp_arch_suspend)
+	lis	r11,swsusp_save_area@h
+	ori	r11,r11,swsusp_save_area@l
+
+	mflr	r0
+	stw	r0,SL_LR(r11)
+	mfcr	r0
+	stw	r0,SL_CR(r11)
+	stw	r1,SL_SP(r11)
+	stw	r2,SL_R2(r11)
+	stmw	r12,SL_R12(r11)
+
+	/* Save MSR & TCR */
+	mfmsr	r4
+	stw	r4,SL_MSR(r11)
+	mfspr	r4,SPRN_TCR
+	stw	r4,SL_TCR(r11)
+
+	/* Get a stable timebase and save it */
+1:	mfspr	r4,SPRN_TBRU
+	stw	r4,SL_TBU(r11)
+	mfspr	r5,SPRN_TBRL
+	stw	r5,SL_TBL(r11)
+	mfspr	r3,SPRN_TBRU
+	cmpw	r3,r4
+	bne	1b
+
+	/* Save SPRGs */
+	mfspr	r4,SPRN_SPRG0
+	stw	r4,SL_SPRG0(r11)
+	mfspr	r4,SPRN_SPRG1
+	stw	r4,SL_SPRG1(r11)
+	mfspr	r4,SPRN_SPRG2
+	stw	r4,SL_SPRG2(r11)
+	mfspr	r4,SPRN_SPRG3
+	stw	r4,SL_SPRG3(r11)
+	mfspr	r4,SPRN_SPRG4
+	stw	r4,SL_SPRG4(r11)
+	mfspr	r4,SPRN_SPRG5
+	stw	r4,SL_SPRG5(r11)
+	mfspr	r4,SPRN_SPRG6
+	stw	r4,SL_SPRG6(r11)
+	mfspr	r4,SPRN_SPRG7
+	stw	r4,SL_SPRG7(r11)
+
+	/* Call the low level suspend stuff (we should probably have made
+	 * a stackframe...
+	 */
+	bl	swsusp_save
+
+	/* Restore LR from the save area */
+	lis	r11,swsusp_save_area@h
+	ori	r11,r11,swsusp_save_area@l
+	lwz	r0,SL_LR(r11)
+	mtlr	r0
+
+	blr
+
+_GLOBAL(swsusp_arch_resume)
+	sync
+
+	/* Load ptr the list of pages to copy in r3 */
+	lis	r11,(restore_pblist)@h
+	ori	r11,r11,restore_pblist@l
+	lwz	r3,0(r11)
+
+	/* Copy the pages. This is a very basic implementation, to
+	 * be replaced by something more cache efficient */
+1:
+	li	r0,256
+	mtctr	r0
+	lwz	r5,pbe_address(r3)	/* source */
+	lwz	r6,pbe_orig_address(r3)	/* destination */
+2:
+	lwz	r8,0(r5)
+	lwz	r9,4(r5)
+	lwz	r10,8(r5)
+	lwz	r11,12(r5)
+	addi	r5,r5,16
+	stw	r8,0(r6)
+	stw	r9,4(r6)
+	stw	r10,8(r6)
+	stw	r11,12(r6)
+	addi	r6,r6,16
+	bdnz	2b
+	lwz	r3,pbe_next(r3)
+	cmpwi	0,r3,0
+	bne	1b
+
+	bl flush_dcache_L1
+	bl flush_instruction_cache
+
+	lis	r11,swsusp_save_area@h
+	ori	r11,r11,swsusp_save_area@l
+
+	/*
+	 * Mappings from virtual addresses to physical addresses may be
+	 * different than they were prior to restoring hibernation state. 
+	 * Invalidate the TLB so that the boot CPU is using the new
+	 * mappings.
+	 */
+	bl	_tlbil_all
+
+	lwz	r4,SL_SPRG0(r11)
+	mtspr	SPRN_SPRG0,r4
+	lwz	r4,SL_SPRG1(r11)
+	mtspr	SPRN_SPRG1,r4
+	lwz	r4,SL_SPRG2(r11)
+	mtspr	SPRN_SPRG2,r4
+	lwz	r4,SL_SPRG3(r11)
+	mtspr	SPRN_SPRG3,r4
+	lwz	r4,SL_SPRG4(r11)
+	mtspr	SPRN_SPRG4,r4
+	lwz	r4,SL_SPRG5(r11)
+	mtspr	SPRN_SPRG5,r4
+	lwz	r4,SL_SPRG6(r11)
+	mtspr	SPRN_SPRG6,r4
+	lwz	r4,SL_SPRG7(r11)
+	mtspr	SPRN_SPRG7,r4
+
+	/* restore the MSR */
+	lwz	r3,SL_MSR(r11)
+	mtmsr	r3
+
+	/* Restore TB */
+	li	r3,0
+	mtspr	SPRN_TBWL,r3
+	lwz	r3,SL_TBU(r11)
+	lwz	r4,SL_TBL(r11)
+	mtspr	SPRN_TBWU,r3
+	mtspr	SPRN_TBWL,r4
+
+	/* Restore TCR and clear any pending bits in TSR. */
+	lwz	r4,SL_TCR(r11)
+	mtspr	SPRN_TCR,r4
+	lis	r4, (TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS)@h
+	mtspr	SPRN_TSR,r4
+
+	/* Kick decrementer */
+	li	r0,1
+	mtdec	r0
+
+	/* Restore the callee-saved registers and return */
+	lwz	r0,SL_CR(r11)
+	mtcr	r0
+	lwz	r2,SL_R2(r11)
+	lmw	r12,SL_R12(r11)
+	lwz	r1,SL_SP(r11)
+	lwz	r0,SL_LR(r11)
+	mtlr	r0
+
+	li	r3,0
+	blr
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
new file mode 100644
index 0000000000..f645652c26
--- /dev/null
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * PowerPC 64-bit swsusp implementation
+ *
+ * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
+ */
+
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/feature-fixups.h>
+
+/*
+ * Structure for storing CPU registers on the save area.
+ */
+#define SL_r1		0x00	/* stack pointer */
+#define SL_PC		0x08
+#define SL_MSR		0x10
+#define SL_SDR1		0x18
+#define SL_XER		0x20
+#define SL_TB		0x40
+#define SL_r2		0x48
+#define SL_CR		0x50
+#define SL_LR		0x58
+#define SL_r12		0x60
+#define SL_r13		0x68
+#define SL_r14		0x70
+#define SL_r15		0x78
+#define SL_r16		0x80
+#define SL_r17		0x88
+#define SL_r18		0x90
+#define SL_r19		0x98
+#define SL_r20		0xa0
+#define SL_r21		0xa8
+#define SL_r22		0xb0
+#define SL_r23		0xb8
+#define SL_r24		0xc0
+#define SL_r25		0xc8
+#define SL_r26		0xd0
+#define SL_r27		0xd8
+#define SL_r28		0xe0
+#define SL_r29		0xe8
+#define SL_r30		0xf0
+#define SL_r31		0xf8
+#define SL_SPRG1	0x100
+#define SL_TCR		0x108
+#define SL_SIZE		SL_TCR+8
+
+/* these macros rely on the save area being
+ * pointed to by r11 */
+
+#define SAVE_SPR(register)		\
+	mfspr	r0, SPRN_##register	;\
+	std	r0, SL_##register(r11)
+#define RESTORE_SPR(register)		\
+	ld	r0, SL_##register(r11)	;\
+	mtspr	SPRN_##register, r0
+#define SAVE_SPECIAL(special)		\
+	mf##special	r0		;\
+	std	r0, SL_##special(r11)
+#define RESTORE_SPECIAL(special)	\
+	ld	r0, SL_##special(r11)	;\
+	mt##special	r0
+#define SAVE_REGISTER(reg)		\
+	std	reg, SL_##reg(r11)
+#define RESTORE_REGISTER(reg)		\
+	ld	reg, SL_##reg(r11)
+
+/* space for storing cpu state */
+	.section .data
+	.align  5
+swsusp_save_area:
+	.space SL_SIZE
+
+	.section .text
+	.align  5
+_GLOBAL(swsusp_arch_suspend)
+	LOAD_REG_ADDR(r11, swsusp_save_area)
+	SAVE_SPECIAL(LR)
+	SAVE_REGISTER(r1)
+	SAVE_SPECIAL(CR)
+	SAVE_SPECIAL(TB)
+	SAVE_REGISTER(r2)
+	SAVE_REGISTER(r12)
+	SAVE_REGISTER(r13)
+	SAVE_REGISTER(r14)
+	SAVE_REGISTER(r15)
+	SAVE_REGISTER(r16)
+	SAVE_REGISTER(r17)
+	SAVE_REGISTER(r18)
+	SAVE_REGISTER(r19)
+	SAVE_REGISTER(r20)
+	SAVE_REGISTER(r21)
+	SAVE_REGISTER(r22)
+	SAVE_REGISTER(r23)
+	SAVE_REGISTER(r24)
+	SAVE_REGISTER(r25)
+	SAVE_REGISTER(r26)
+	SAVE_REGISTER(r27)
+	SAVE_REGISTER(r28)
+	SAVE_REGISTER(r29)
+	SAVE_REGISTER(r30)
+	SAVE_REGISTER(r31)
+	SAVE_SPECIAL(MSR)
+	SAVE_SPECIAL(XER)
+#ifdef CONFIG_PPC_BOOK3S_64
+BEGIN_FW_FTR_SECTION
+	SAVE_SPECIAL(SDR1)
+END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
+#else
+	SAVE_SPR(TCR)
+
+	/* Save SPRG1, SPRG1 be used save paca */
+	SAVE_SPR(SPRG1)
+#endif
+
+	/* we push the stack up 128 bytes but don't store the
+	 * stack pointer on the stack like a real stackframe */
+	addi	r1,r1,-128
+
+	bl swsusp_save
+
+	/* restore LR */
+	LOAD_REG_ADDR(r11, swsusp_save_area)
+	RESTORE_SPECIAL(LR)
+	addi	r1,r1,128
+
+	blr
+
+/* Resume code */
+_GLOBAL(swsusp_arch_resume)
+	/* Stop pending alitvec streams and memory accesses */
+BEGIN_FTR_SECTION
+	PPC_DSSALL
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+	sync
+
+	LOAD_REG_ADDR(r11, restore_pblist)
+	ld	r12,0(r12)
+
+	cmpdi	r12,0
+	beq-	nothing_to_copy
+	li	r15,PAGE_SIZE>>3
+copyloop:
+	ld	r13,pbe_address(r12)
+	ld	r14,pbe_orig_address(r12)
+
+	mtctr	r15
+	li	r10,0
+copy_page_loop:
+	ldx	r0,r10,r13
+	stdx	r0,r10,r14
+	addi	r10,r10,8
+	bdnz copy_page_loop
+
+	ld	r12,pbe_next(r12)
+	cmpdi	r12,0
+	bne+	copyloop
+nothing_to_copy:
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* flush caches */
+	lis	r3, 0x10
+	mtctr	r3
+	li	r3, 0
+	ori	r3, r3, CONFIG_KERNEL_START>>48
+	li	r0, 48
+	sld	r3, r3, r0
+	li	r0, 0
+1:
+	dcbf	0,r3
+	addi	r3,r3,0x20
+	bdnz	1b
+
+	sync
+
+	tlbia
+#endif
+
+	LOAD_REG_ADDR(r11, swsusp_save_area)
+
+	RESTORE_SPECIAL(CR)
+
+	/* restore timebase */
+	/* load saved tb */
+	ld	r1, SL_TB(r11)
+	/* get upper 32 bits of it */
+	srdi	r2, r1, 32
+	/* clear tb lower to avoid wrap */
+	li	r0, 0
+	mttbl	r0
+	/* set tb upper */
+	mttbu	r2
+	/* set tb lower */
+	mttbl	r1
+
+	/* restore registers */
+	RESTORE_REGISTER(r1)
+	RESTORE_REGISTER(r2)
+	RESTORE_REGISTER(r12)
+	RESTORE_REGISTER(r13)
+	RESTORE_REGISTER(r14)
+	RESTORE_REGISTER(r15)
+	RESTORE_REGISTER(r16)
+	RESTORE_REGISTER(r17)
+	RESTORE_REGISTER(r18)
+	RESTORE_REGISTER(r19)
+	RESTORE_REGISTER(r20)
+	RESTORE_REGISTER(r21)
+	RESTORE_REGISTER(r22)
+	RESTORE_REGISTER(r23)
+	RESTORE_REGISTER(r24)
+	RESTORE_REGISTER(r25)
+	RESTORE_REGISTER(r26)
+	RESTORE_REGISTER(r27)
+	RESTORE_REGISTER(r28)
+	RESTORE_REGISTER(r29)
+	RESTORE_REGISTER(r30)
+	RESTORE_REGISTER(r31)
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* can't use RESTORE_SPECIAL(MSR) */
+	ld	r0, SL_MSR(r11)
+	mtmsrd	r0, 0
+BEGIN_FW_FTR_SECTION
+	RESTORE_SPECIAL(SDR1)
+END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
+#else
+	/* Restore SPRG1, be used to save paca */
+	ld	r0, SL_SPRG1(r11)
+	mtsprg	1, r0
+
+	RESTORE_SPECIAL(MSR)
+
+	/* Restore TCR and clear any pending bits in TSR. */
+	RESTORE_SPR(TCR)
+	lis	r0, (TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS)@h
+	mtspr	SPRN_TSR, r0
+
+	/* Kick decrementer */
+	li	r0, 1
+	mtdec	r0
+
+	/* Invalidate all tlbs */
+	bl	_tlbil_all
+#endif
+	RESTORE_SPECIAL(XER)
+
+	sync
+
+	addi	r1,r1,-128
+#ifdef CONFIG_PPC_BOOK3S_64
+	bl	slb_flush_and_restore_bolted
+#endif
+	bl	do_after_copyback
+	addi	r1,r1,128
+
+	LOAD_REG_ADDR(r11, swsusp_save_area)
+	RESTORE_SPECIAL(LR)
+
+	li	r3, 0
+	blr
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
new file mode 100644
index 0000000000..d451a82292
--- /dev/null
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * sys_ppc32.c: 32-bit system calls with complex calling conventions.
+ *
+ * Copyright (C) 2001 IBM
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ *
+ * 32-bit system calls with 64-bit arguments pass those in register pairs.
+ * This must be specially dealt with on 64-bit kernels. The compat_arg_u64_dual
+ * in generic compat syscalls is not always usable because the register
+ * pairing is constrained depending on preceding arguments.
+ *
+ * An analogous problem exists on 32-bit kernels with ARCH_HAS_SYSCALL_WRAPPER,
+ * the defined system call functions take the pt_regs as an argument, and there
+ * is a mapping macro which maps registers to arguments
+ * (SC_POWERPC_REGS_TO_ARGS) which also does not deal with these 64-bit
+ * arguments.
+ *
+ * This file contains these system calls.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h> 
+#include <linux/mm.h> 
+#include <linux/file.h> 
+#include <linux/signal.h>
+#include <linux/resource.h>
+#include <linux/times.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/poll.h>
+#include <linux/personality.h>
+#include <linux/stat.h>
+#include <linux/in.h>
+#include <linux/syscalls.h>
+#include <linux/unistd.h>
+#include <linux/sysctl.h>
+#include <linux/binfmts.h>
+#include <linux/security.h>
+#include <linux/compat.h>
+#include <linux/ptrace.h>
+#include <linux/elf.h>
+#include <linux/ipc.h>
+#include <linux/slab.h>
+
+#include <asm/ptrace.h>
+#include <asm/types.h>
+#include <linux/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/time.h>
+#include <asm/mmu_context.h>
+#include <asm/ppc-pci.h>
+#include <asm/syscalls.h>
+#include <asm/switch_to.h>
+
+#ifdef CONFIG_PPC32
+#define PPC32_SYSCALL_DEFINE4	SYSCALL_DEFINE4
+#define PPC32_SYSCALL_DEFINE5	SYSCALL_DEFINE5
+#define PPC32_SYSCALL_DEFINE6	SYSCALL_DEFINE6
+#else
+#define PPC32_SYSCALL_DEFINE4	COMPAT_SYSCALL_DEFINE4
+#define PPC32_SYSCALL_DEFINE5	COMPAT_SYSCALL_DEFINE5
+#define PPC32_SYSCALL_DEFINE6	COMPAT_SYSCALL_DEFINE6
+#endif
+
+PPC32_SYSCALL_DEFINE6(ppc_pread64,
+		       unsigned int, fd,
+		       char __user *, ubuf, compat_size_t, count,
+		       u32, reg6, u32, pos1, u32, pos2)
+{
+	return ksys_pread64(fd, ubuf, count, merge_64(pos1, pos2));
+}
+
+PPC32_SYSCALL_DEFINE6(ppc_pwrite64,
+		       unsigned int, fd,
+		       const char __user *, ubuf, compat_size_t, count,
+		       u32, reg6, u32, pos1, u32, pos2)
+{
+	return ksys_pwrite64(fd, ubuf, count, merge_64(pos1, pos2));
+}
+
+PPC32_SYSCALL_DEFINE5(ppc_readahead,
+		       int, fd, u32, r4,
+		       u32, offset1, u32, offset2, u32, count)
+{
+	return ksys_readahead(fd, merge_64(offset1, offset2), count);
+}
+
+PPC32_SYSCALL_DEFINE4(ppc_truncate64,
+		       const char __user *, path, u32, reg4,
+		       unsigned long, len1, unsigned long, len2)
+{
+	return ksys_truncate(path, merge_64(len1, len2));
+}
+
+PPC32_SYSCALL_DEFINE4(ppc_ftruncate64,
+		       unsigned int, fd, u32, reg4,
+		       unsigned long, len1, unsigned long, len2)
+{
+	return ksys_ftruncate(fd, merge_64(len1, len2));
+}
+
+PPC32_SYSCALL_DEFINE6(ppc32_fadvise64,
+		       int, fd, u32, unused, u32, offset1, u32, offset2,
+		       size_t, len, int, advice)
+{
+	return ksys_fadvise64_64(fd, merge_64(offset1, offset2), len,
+				 advice);
+}
+
+PPC32_SYSCALL_DEFINE6(ppc_sync_file_range2,
+		       int, fd, unsigned int, flags,
+		       unsigned int, offset1, unsigned int, offset2,
+		       unsigned int, nbytes1, unsigned int, nbytes2)
+{
+	loff_t offset = merge_64(offset1, offset2);
+	loff_t nbytes = merge_64(nbytes1, nbytes2);
+
+	return ksys_sync_file_range(fd, offset, nbytes, flags);
+}
+
+#ifdef CONFIG_PPC32
+SYSCALL_DEFINE6(ppc_fallocate,
+		int, fd, int, mode,
+		u32, offset1, u32, offset2, u32, len1, u32, len2)
+{
+	return ksys_fallocate(fd, mode,
+			      merge_64(offset1, offset2),
+			      merge_64(len1, len2));
+}
+#endif
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
new file mode 100644
index 0000000000..77fedb190c
--- /dev/null
+++ b/arch/powerpc/kernel/syscall.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/compat.h>
+#include <linux/context_tracking.h>
+#include <linux/randomize_kstack.h>
+
+#include <asm/interrupt.h>
+#include <asm/kup.h>
+#include <asm/syscall.h>
+#include <asm/time.h>
+#include <asm/tm.h>
+#include <asm/unistd.h>
+
+
+/* Has to run notrace because it is entered not completely "reconciled" */
+notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
+{
+	long ret;
+	syscall_fn f;
+
+	kuap_lock();
+
+	add_random_kstack_offset();
+
+	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+
+	trace_hardirqs_off(); /* finish reconciling */
+
+	CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
+	user_exit_irqoff();
+
+	BUG_ON(regs_is_unrecoverable(regs));
+	BUG_ON(!(regs->msr & MSR_PR));
+	BUG_ON(arch_irq_disabled_regs(regs));
+
+#ifdef CONFIG_PPC_PKEY
+	if (mmu_has_feature(MMU_FTR_PKEY)) {
+		unsigned long amr, iamr;
+		bool flush_needed = false;
+		/*
+		 * When entering from userspace we mostly have the AMR/IAMR
+		 * different from kernel default values. Hence don't compare.
+		 */
+		amr = mfspr(SPRN_AMR);
+		iamr = mfspr(SPRN_IAMR);
+		regs->amr  = amr;
+		regs->iamr = iamr;
+		if (mmu_has_feature(MMU_FTR_KUAP)) {
+			mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
+			flush_needed = true;
+		}
+		if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
+			mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
+			flush_needed = true;
+		}
+		if (flush_needed)
+			isync();
+	} else
+#endif
+		kuap_assert_locked();
+
+	booke_restore_dbcr0();
+
+	account_cpu_user_entry();
+
+	account_stolen_time();
+
+	/*
+	 * This is not required for the syscall exit path, but makes the
+	 * stack frame look nicer. If this was initialised in the first stack
+	 * frame, or if the unwinder was taught the first stack frame always
+	 * returns to user with IRQS_ENABLED, this store could be avoided!
+	 */
+	irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
+
+	/*
+	 * If system call is called with TM active, set _TIF_RESTOREALL to
+	 * prevent RFSCV being used to return to userspace, because POWER9
+	 * TM implementation has problems with this instruction returning to
+	 * transactional state. Final register values are not relevant because
+	 * the transaction will be aborted upon return anyway. Or in the case
+	 * of unsupported_scv SIGILL fault, the return state does not much
+	 * matter because it's an edge case.
+	 */
+	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+			unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
+		set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
+
+	/*
+	 * If the system call was made with a transaction active, doom it and
+	 * return without performing the system call. Unless it was an
+	 * unsupported scv vector, in which case it's treated like an illegal
+	 * instruction.
+	 */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
+	    !trap_is_unsupported_scv(regs)) {
+		/* Enable TM in the kernel, and disable EE (for scv) */
+		hard_irq_disable();
+		mtmsr(mfmsr() | MSR_TM);
+
+		/* tabort, this dooms the transaction, nothing else */
+		asm volatile(".long 0x7c00071d | ((%0) << 16)"
+				:: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
+
+		/*
+		 * Userspace will never see the return value. Execution will
+		 * resume after the tbegin. of the aborted transaction with the
+		 * checkpointed register state. A context switch could occur
+		 * or signal delivered to the process before resuming the
+		 * doomed transaction context, but that should all be handled
+		 * as expected.
+		 */
+		return -ENOSYS;
+	}
+#endif // CONFIG_PPC_TRANSACTIONAL_MEM
+
+	local_irq_enable();
+
+	if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {
+		if (unlikely(trap_is_unsupported_scv(regs))) {
+			/* Unsupported scv vector */
+			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+			return regs->gpr[3];
+		}
+		/*
+		 * We use the return value of do_syscall_trace_enter() as the
+		 * syscall number. If the syscall was rejected for any reason
+		 * do_syscall_trace_enter() returns an invalid syscall number
+		 * and the test against NR_syscalls will fail and the return
+		 * value to be used is in regs->gpr[3].
+		 */
+		r0 = do_syscall_trace_enter(regs);
+		if (unlikely(r0 >= NR_syscalls))
+			return regs->gpr[3];
+
+	} else if (unlikely(r0 >= NR_syscalls)) {
+		if (unlikely(trap_is_unsupported_scv(regs))) {
+			/* Unsupported scv vector */
+			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+			return regs->gpr[3];
+		}
+		return -ENOSYS;
+	}
+
+	/* May be faster to do array_index_nospec? */
+	barrier_nospec();
+
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+	// No COMPAT if we have SYSCALL_WRAPPER, see Kconfig
+	f = (void *)sys_call_table[r0];
+	ret = f(regs);
+#else
+	if (unlikely(is_compat_task())) {
+		unsigned long r3, r4, r5, r6, r7, r8;
+
+		f = (void *)compat_sys_call_table[r0];
+
+		r3 = regs->gpr[3] & 0x00000000ffffffffULL;
+		r4 = regs->gpr[4] & 0x00000000ffffffffULL;
+		r5 = regs->gpr[5] & 0x00000000ffffffffULL;
+		r6 = regs->gpr[6] & 0x00000000ffffffffULL;
+		r7 = regs->gpr[7] & 0x00000000ffffffffULL;
+		r8 = regs->gpr[8] & 0x00000000ffffffffULL;
+
+		ret = f(r3, r4, r5, r6, r7, r8);
+	} else {
+		f = (void *)sys_call_table[r0];
+
+		ret = f(regs->gpr[3], regs->gpr[4], regs->gpr[5],
+			regs->gpr[6], regs->gpr[7], regs->gpr[8]);
+	}
+#endif
+
+	/*
+	 * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
+	 * so the maximum stack offset is 1k bytes (10 bits).
+	 *
+	 * The actual entropy will be further reduced by the compiler when
+	 * applying stack alignment constraints: the powerpc architecture
+	 * may have two kinds of stack alignment (16-bytes and 8-bytes).
+	 *
+	 * So the resulting 6 or 7 bits of entropy is seen in SP[9:4] or SP[9:3].
+	 */
+	choose_random_kstack_offset(mftb());
+
+	return ret;
+}
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
new file mode 100644
index 0000000000..68ebb23a5a
--- /dev/null
+++ b/arch/powerpc/kernel/syscalls.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Implementation of various system calls for Linux/PowerPC
+ *
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Derived from "arch/i386/kernel/sys_i386.c"
+ * Adapted from the i386 version by Gary Thomas
+ * Modified by Cort Dougan (cort@cs.nmt.edu)
+ * and Paul Mackerras (paulus@cs.anu.edu.au).
+ *
+ * This file contains various random system calls that
+ * have a non-standard calling sequence on the Linux/PPC
+ * platform.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/syscalls.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/mman.h>
+#include <linux/sys.h>
+#include <linux/ipc.h>
+#include <linux/utsname.h>
+#include <linux/file.h>
+#include <linux/personality.h>
+
+#include <linux/uaccess.h>
+#include <asm/syscalls.h>
+#include <asm/time.h>
+#include <asm/unistd.h>
+
+static long do_mmap2(unsigned long addr, size_t len,
+		     unsigned long prot, unsigned long flags,
+		     unsigned long fd, unsigned long off, int shift)
+{
+	if (!arch_validate_prot(prot, addr))
+		return -EINVAL;
+
+	if (!IS_ALIGNED(off, 1 << shift))
+		return -EINVAL;
+
+	return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> shift);
+}
+
+SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len,
+		unsigned long, prot, unsigned long, flags,
+		unsigned long, fd, unsigned long, pgoff)
+{
+	return do_mmap2(addr, len, prot, flags, fd, pgoff, PAGE_SHIFT-12);
+}
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE6(mmap2,
+		       unsigned long, addr, size_t, len,
+		       unsigned long, prot, unsigned long, flags,
+		       unsigned long, fd, unsigned long, off_4k)
+{
+	return do_mmap2(addr, len, prot, flags, fd, off_4k, PAGE_SHIFT-12);
+}
+#endif
+
+SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len,
+		unsigned long, prot, unsigned long, flags,
+		unsigned long, fd, off_t, offset)
+{
+	return do_mmap2(addr, len, prot, flags, fd, offset, PAGE_SHIFT);
+}
+
+#ifdef CONFIG_PPC64
+static long do_ppc64_personality(unsigned long personality)
+{
+	long ret;
+
+	if (personality(current->personality) == PER_LINUX32
+	    && personality(personality) == PER_LINUX)
+		personality = (personality & ~PER_MASK) | PER_LINUX32;
+	ret = ksys_personality(personality);
+	if (personality(ret) == PER_LINUX32)
+		ret = (ret & ~PER_MASK) | PER_LINUX;
+	return ret;
+}
+
+SYSCALL_DEFINE1(ppc64_personality, unsigned long, personality)
+{
+	return do_ppc64_personality(personality);
+}
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE1(ppc64_personality, unsigned long, personality)
+{
+	return do_ppc64_personality(personality);
+}
+#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_PPC64 */
+
+SYSCALL_DEFINE6(ppc_fadvise64_64,
+		int, fd, int, advice, u32, offset_high, u32, offset_low,
+		u32, len_high, u32, len_low)
+{
+	return ksys_fadvise64_64(fd, merge_64(offset_high, offset_low),
+				 merge_64(len_high, len_low), advice);
+}
+
+SYSCALL_DEFINE0(switch_endian)
+{
+	struct thread_info *ti;
+
+	regs_set_return_msr(current->thread.regs,
+				current->thread.regs->msr ^ MSR_LE);
+
+	/*
+	 * Set TIF_RESTOREALL so that r3 isn't clobbered on return to
+	 * userspace. That also has the effect of restoring the non-volatile
+	 * GPRs, so we saved them on the way in here.
+	 */
+	ti = current_thread_info();
+	ti->flags |= _TIF_RESTOREALL;
+
+	return 0;
+}
diff --git a/arch/powerpc/kernel/syscalls/Makefile b/arch/powerpc/kernel/syscalls/Makefile
new file mode 100644
index 0000000000..9d7bd81510
--- /dev/null
+++ b/arch/powerpc/kernel/syscalls/Makefile
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: GPL-2.0
+kapi := arch/$(SRCARCH)/include/generated/asm
+uapi := arch/$(SRCARCH)/include/generated/uapi/asm
+
+$(shell mkdir -p $(uapi) $(kapi))
+
+syscall := $(src)/syscall.tbl
+syshdr := $(srctree)/scripts/syscallhdr.sh
+systbl := $(srctree)/scripts/syscalltbl.sh
+
+quiet_cmd_syshdr = SYSHDR  $@
+      cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr --abis $(abis) $< $@
+
+quiet_cmd_systbl = SYSTBL  $@
+      cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis $(abis) $< $@
+
+$(uapi)/unistd_32.h: abis := common,nospu,32
+$(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
+	$(call if_changed,syshdr)
+
+$(uapi)/unistd_64.h: abis := common,nospu,64
+$(uapi)/unistd_64.h: $(syscall) $(syshdr) FORCE
+	$(call if_changed,syshdr)
+
+$(kapi)/syscall_table_32.h: abis := common,nospu,32
+$(kapi)/syscall_table_32.h: $(syscall) $(systbl) FORCE
+	$(call if_changed,systbl)
+
+$(kapi)/syscall_table_64.h: abis := common,nospu,64
+$(kapi)/syscall_table_64.h: $(syscall) $(systbl) FORCE
+	$(call if_changed,systbl)
+
+$(kapi)/syscall_table_spu.h: abis := common,spu
+$(kapi)/syscall_table_spu.h: $(syscall) $(systbl) FORCE
+	$(call if_changed,systbl)
+
+uapisyshdr-y		+= unistd_32.h unistd_64.h
+kapisyshdr-y		+= syscall_table_32.h		\
+			   syscall_table_64.h		\
+			   syscall_table_spu.h
+
+uapisyshdr-y	:= $(addprefix $(uapi)/, $(uapisyshdr-y))
+kapisyshdr-y	:= $(addprefix $(kapi)/, $(kapisyshdr-y))
+targets		+= $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y))
+
+PHONY += all
+all: $(uapisyshdr-y) $(kapisyshdr-y)
+	@:
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
new file mode 100644
index 0000000000..20e50586e8
--- /dev/null
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -0,0 +1,541 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# system call numbers and entry vectors for powerpc
+#
+# The format is:
+# <number> <abi> <name> <entry point> <compat entry point>
+#
+# The <abi> can be common, spu, nospu, 64, or 32 for this file.
+#
+0	nospu	restart_syscall			sys_restart_syscall
+1	nospu	exit				sys_exit
+2	nospu	fork				sys_fork
+3	common	read				sys_read
+4	common	write				sys_write
+5	common	open				sys_open			compat_sys_open
+6	common	close				sys_close
+7	common	waitpid				sys_waitpid
+8	common	creat				sys_creat
+9	common	link				sys_link
+10	common	unlink				sys_unlink
+11	nospu	execve				sys_execve			compat_sys_execve
+12	common	chdir				sys_chdir
+13	32	time				sys_time32
+13	64	time				sys_time
+13	spu	time				sys_time
+14	common	mknod				sys_mknod
+15	common	chmod				sys_chmod
+16	common	lchown				sys_lchown
+17	common	break				sys_ni_syscall
+18	32	oldstat				sys_stat			sys_ni_syscall
+18	64	oldstat				sys_ni_syscall
+18	spu	oldstat				sys_ni_syscall
+19	common	lseek				sys_lseek			compat_sys_lseek
+20	common	getpid				sys_getpid
+21	nospu	mount				sys_mount
+22	32	umount				sys_oldumount
+22	64	umount				sys_ni_syscall
+22	spu	umount				sys_ni_syscall
+23	common	setuid				sys_setuid
+24	common	getuid				sys_getuid
+25	32	stime				sys_stime32
+25	64	stime				sys_stime
+25	spu	stime				sys_stime
+26	nospu	ptrace				sys_ptrace			compat_sys_ptrace
+27	common	alarm				sys_alarm
+28	32	oldfstat			sys_fstat			sys_ni_syscall
+28	64	oldfstat			sys_ni_syscall
+28	spu	oldfstat			sys_ni_syscall
+29	nospu	pause				sys_pause
+30	32	utime				sys_utime32
+30	64	utime				sys_utime
+31	common	stty				sys_ni_syscall
+32	common	gtty				sys_ni_syscall
+33	common	access				sys_access
+34	common	nice				sys_nice
+35	common	ftime				sys_ni_syscall
+36	common	sync				sys_sync
+37	common	kill				sys_kill
+38	common	rename				sys_rename
+39	common	mkdir				sys_mkdir
+40	common	rmdir				sys_rmdir
+41	common	dup				sys_dup
+42	common	pipe				sys_pipe
+43	common	times				sys_times			compat_sys_times
+44	common	prof				sys_ni_syscall
+45	common	brk				sys_brk
+46	common	setgid				sys_setgid
+47	common	getgid				sys_getgid
+48	nospu	signal				sys_signal
+49	common	geteuid				sys_geteuid
+50	common	getegid				sys_getegid
+51	nospu	acct				sys_acct
+52	nospu	umount2				sys_umount
+53	common	lock				sys_ni_syscall
+54	common	ioctl				sys_ioctl			compat_sys_ioctl
+55	common	fcntl				sys_fcntl			compat_sys_fcntl
+56	common	mpx				sys_ni_syscall
+57	common	setpgid				sys_setpgid
+58	common	ulimit				sys_ni_syscall
+59	32	oldolduname			sys_olduname
+59	64	oldolduname			sys_ni_syscall
+59	spu	oldolduname			sys_ni_syscall
+60	common	umask				sys_umask
+61	common	chroot				sys_chroot
+62	nospu	ustat				sys_ustat			compat_sys_ustat
+63	common	dup2				sys_dup2
+64	common	getppid				sys_getppid
+65	common	getpgrp				sys_getpgrp
+66	common	setsid				sys_setsid
+67	32	sigaction			sys_sigaction			compat_sys_sigaction
+67	64	sigaction			sys_ni_syscall
+67	spu	sigaction			sys_ni_syscall
+68	common	sgetmask			sys_sgetmask
+69	common	ssetmask			sys_ssetmask
+70	common	setreuid			sys_setreuid
+71	common	setregid			sys_setregid
+72	32	sigsuspend			sys_sigsuspend
+72	64	sigsuspend			sys_ni_syscall
+72	spu	sigsuspend			sys_ni_syscall
+73	32	sigpending			sys_sigpending			compat_sys_sigpending
+73	64	sigpending			sys_ni_syscall
+73	spu	sigpending			sys_ni_syscall
+74	common	sethostname			sys_sethostname
+75	common	setrlimit			sys_setrlimit			compat_sys_setrlimit
+76	32	getrlimit			sys_old_getrlimit		compat_sys_old_getrlimit
+76	64	getrlimit			sys_ni_syscall
+76	spu	getrlimit			sys_ni_syscall
+77	common	getrusage			sys_getrusage			compat_sys_getrusage
+78	common	gettimeofday			sys_gettimeofday		compat_sys_gettimeofday
+79	common	settimeofday			sys_settimeofday		compat_sys_settimeofday
+80	common	getgroups			sys_getgroups
+81	common	setgroups			sys_setgroups
+82	32	select				sys_old_select			compat_sys_old_select
+82	64	select				sys_ni_syscall
+82	spu	select				sys_ni_syscall
+83	common	symlink				sys_symlink
+84	32	oldlstat			sys_lstat			sys_ni_syscall
+84	64	oldlstat			sys_ni_syscall
+84	spu	oldlstat			sys_ni_syscall
+85	common	readlink			sys_readlink
+86	nospu	uselib				sys_uselib
+87	nospu	swapon				sys_swapon
+88	nospu	reboot				sys_reboot
+89	32	readdir				sys_old_readdir			compat_sys_old_readdir
+89	64	readdir				sys_ni_syscall
+89	spu	readdir				sys_ni_syscall
+90	common	mmap				sys_mmap
+91	common	munmap				sys_munmap
+92	common	truncate			sys_truncate			compat_sys_truncate
+93	common	ftruncate			sys_ftruncate			compat_sys_ftruncate
+94	common	fchmod				sys_fchmod
+95	common	fchown				sys_fchown
+96	common	getpriority			sys_getpriority
+97	common	setpriority			sys_setpriority
+98	common	profil				sys_ni_syscall
+99	nospu	statfs				sys_statfs			compat_sys_statfs
+100	nospu	fstatfs				sys_fstatfs			compat_sys_fstatfs
+101	common	ioperm				sys_ni_syscall
+102	common	socketcall			sys_socketcall			compat_sys_socketcall
+103	common	syslog				sys_syslog
+104	common	setitimer			sys_setitimer			compat_sys_setitimer
+105	common	getitimer			sys_getitimer			compat_sys_getitimer
+106	common	stat				sys_newstat			compat_sys_newstat
+107	common	lstat				sys_newlstat			compat_sys_newlstat
+108	common	fstat				sys_newfstat			compat_sys_newfstat
+109	32	olduname			sys_uname
+109	64	olduname			sys_ni_syscall
+109	spu	olduname			sys_ni_syscall
+110	common	iopl				sys_ni_syscall
+111	common	vhangup				sys_vhangup
+112	common	idle				sys_ni_syscall
+113	common	vm86				sys_ni_syscall
+114	common	wait4				sys_wait4			compat_sys_wait4
+115	nospu	swapoff				sys_swapoff
+116	common	sysinfo				sys_sysinfo			compat_sys_sysinfo
+117	nospu	ipc				sys_ipc				compat_sys_ipc
+118	common	fsync				sys_fsync
+119	32	sigreturn			sys_sigreturn			compat_sys_sigreturn
+119	64	sigreturn			sys_ni_syscall
+119	spu	sigreturn			sys_ni_syscall
+120	nospu	clone				sys_clone
+121	common	setdomainname			sys_setdomainname
+122	common	uname				sys_newuname
+123	common	modify_ldt			sys_ni_syscall
+124	32	adjtimex			sys_adjtimex_time32
+124	64	adjtimex			sys_adjtimex
+124	spu	adjtimex			sys_adjtimex
+125	common	mprotect			sys_mprotect
+126	32	sigprocmask			sys_sigprocmask			compat_sys_sigprocmask
+126	64	sigprocmask			sys_ni_syscall
+126	spu	sigprocmask			sys_ni_syscall
+127	common	create_module			sys_ni_syscall
+128	nospu	init_module			sys_init_module
+129	nospu	delete_module			sys_delete_module
+130	common	get_kernel_syms			sys_ni_syscall
+131	nospu	quotactl			sys_quotactl
+132	common	getpgid				sys_getpgid
+133	common	fchdir				sys_fchdir
+134	common	bdflush				sys_ni_syscall
+135	common	sysfs				sys_sysfs
+136	32	personality			sys_personality			compat_sys_ppc64_personality
+136	64	personality			sys_ppc64_personality
+136	spu	personality			sys_ppc64_personality
+137	common	afs_syscall			sys_ni_syscall
+138	common	setfsuid			sys_setfsuid
+139	common	setfsgid			sys_setfsgid
+140	common	_llseek				sys_llseek
+141	common	getdents			sys_getdents			compat_sys_getdents
+142	common	_newselect			sys_select			compat_sys_select
+143	common	flock				sys_flock
+144	common	msync				sys_msync
+145	common	readv				sys_readv
+146	common	writev				sys_writev
+147	common	getsid				sys_getsid
+148	common	fdatasync			sys_fdatasync
+149	nospu	_sysctl				sys_ni_syscall
+150	common	mlock				sys_mlock
+151	common	munlock				sys_munlock
+152	common	mlockall			sys_mlockall
+153	common	munlockall			sys_munlockall
+154	common	sched_setparam			sys_sched_setparam
+155	common	sched_getparam			sys_sched_getparam
+156	common	sched_setscheduler		sys_sched_setscheduler
+157	common	sched_getscheduler		sys_sched_getscheduler
+158	common	sched_yield			sys_sched_yield
+159	common	sched_get_priority_max		sys_sched_get_priority_max
+160	common	sched_get_priority_min		sys_sched_get_priority_min
+161	32	sched_rr_get_interval		sys_sched_rr_get_interval_time32
+161	64	sched_rr_get_interval		sys_sched_rr_get_interval
+161	spu	sched_rr_get_interval		sys_sched_rr_get_interval
+162	32	nanosleep			sys_nanosleep_time32
+162	64	nanosleep			sys_nanosleep
+162	spu	nanosleep			sys_nanosleep
+163	common	mremap				sys_mremap
+164	common	setresuid			sys_setresuid
+165	common	getresuid			sys_getresuid
+166	common	query_module			sys_ni_syscall
+167	common	poll				sys_poll
+168	common	nfsservctl			sys_ni_syscall
+169	common	setresgid			sys_setresgid
+170	common	getresgid			sys_getresgid
+171	common	prctl				sys_prctl
+172	nospu	rt_sigreturn			sys_rt_sigreturn		compat_sys_rt_sigreturn
+173	nospu	rt_sigaction			sys_rt_sigaction		compat_sys_rt_sigaction
+174	nospu	rt_sigprocmask			sys_rt_sigprocmask		compat_sys_rt_sigprocmask
+175	nospu	rt_sigpending			sys_rt_sigpending		compat_sys_rt_sigpending
+176	32	rt_sigtimedwait			sys_rt_sigtimedwait_time32	compat_sys_rt_sigtimedwait_time32
+176	64	rt_sigtimedwait			sys_rt_sigtimedwait
+177	nospu 	rt_sigqueueinfo			sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
+178	nospu 	rt_sigsuspend			sys_rt_sigsuspend		compat_sys_rt_sigsuspend
+179	32	pread64				sys_ppc_pread64			compat_sys_ppc_pread64
+179	64	pread64				sys_pread64
+180	32	pwrite64			sys_ppc_pwrite64		compat_sys_ppc_pwrite64
+180	64	pwrite64			sys_pwrite64
+181	common	chown				sys_chown
+182	common	getcwd				sys_getcwd
+183	common	capget				sys_capget
+184	common	capset				sys_capset
+185	nospu	sigaltstack			sys_sigaltstack			compat_sys_sigaltstack
+186	32	sendfile			sys_sendfile			compat_sys_sendfile
+186	64	sendfile			sys_sendfile64
+186	spu	sendfile			sys_sendfile64
+187	common	getpmsg				sys_ni_syscall
+188	common 	putpmsg				sys_ni_syscall
+189	nospu	vfork				sys_vfork
+190	common	ugetrlimit			sys_getrlimit			compat_sys_getrlimit
+191	32	readahead			sys_ppc_readahead		compat_sys_ppc_readahead
+191	64	readahead			sys_readahead
+192	32	mmap2				sys_mmap2			compat_sys_mmap2
+193	32	truncate64			sys_ppc_truncate64		compat_sys_ppc_truncate64
+194	32	ftruncate64			sys_ppc_ftruncate64		compat_sys_ppc_ftruncate64
+195	32	stat64				sys_stat64
+196	32	lstat64				sys_lstat64
+197	32	fstat64				sys_fstat64
+198	nospu 	pciconfig_read			sys_pciconfig_read
+199	nospu 	pciconfig_write			sys_pciconfig_write
+200	nospu 	pciconfig_iobase		sys_pciconfig_iobase
+201	common 	multiplexer			sys_ni_syscall
+202	common	getdents64			sys_getdents64
+203	common	pivot_root			sys_pivot_root
+204	32	fcntl64				sys_fcntl64			compat_sys_fcntl64
+205	common	madvise				sys_madvise
+206	common	mincore				sys_mincore
+207	common	gettid				sys_gettid
+208	common	tkill				sys_tkill
+209	common	setxattr			sys_setxattr
+210	common	lsetxattr			sys_lsetxattr
+211	common	fsetxattr			sys_fsetxattr
+212	common	getxattr			sys_getxattr
+213	common	lgetxattr			sys_lgetxattr
+214	common	fgetxattr			sys_fgetxattr
+215	common	listxattr			sys_listxattr
+216	common	llistxattr			sys_llistxattr
+217	common	flistxattr			sys_flistxattr
+218	common	removexattr			sys_removexattr
+219	common	lremovexattr			sys_lremovexattr
+220	common	fremovexattr			sys_fremovexattr
+221	32	futex				sys_futex_time32
+221	64	futex				sys_futex
+221	spu	futex				sys_futex
+222	common	sched_setaffinity		sys_sched_setaffinity		compat_sys_sched_setaffinity
+223	common	sched_getaffinity		sys_sched_getaffinity		compat_sys_sched_getaffinity
+# 224 unused
+225	common	tuxcall				sys_ni_syscall
+226	32	sendfile64			sys_sendfile64			compat_sys_sendfile64
+227	common	io_setup			sys_io_setup			compat_sys_io_setup
+228	common	io_destroy			sys_io_destroy
+229	32	io_getevents			sys_io_getevents_time32
+229	64	io_getevents			sys_io_getevents
+229	spu	io_getevents			sys_io_getevents
+230	common	io_submit			sys_io_submit			compat_sys_io_submit
+231	common	io_cancel			sys_io_cancel
+232	nospu	set_tid_address			sys_set_tid_address
+233	32	fadvise64			sys_ppc32_fadvise64		compat_sys_ppc32_fadvise64
+233	64	fadvise64			sys_fadvise64
+234	nospu	exit_group			sys_exit_group
+235	nospu	lookup_dcookie			sys_lookup_dcookie		compat_sys_lookup_dcookie
+236	common	epoll_create			sys_epoll_create
+237	common	epoll_ctl			sys_epoll_ctl
+238	common	epoll_wait			sys_epoll_wait
+239	common	remap_file_pages		sys_remap_file_pages
+240	common	timer_create			sys_timer_create		compat_sys_timer_create
+241	32	timer_settime			sys_timer_settime32
+241	64	timer_settime			sys_timer_settime
+241	spu	timer_settime			sys_timer_settime
+242	32	timer_gettime			sys_timer_gettime32
+242	64	timer_gettime			sys_timer_gettime
+242	spu	timer_gettime			sys_timer_gettime
+243	common	timer_getoverrun		sys_timer_getoverrun
+244	common	timer_delete			sys_timer_delete
+245	32	clock_settime			sys_clock_settime32
+245	64	clock_settime			sys_clock_settime
+245	spu	clock_settime			sys_clock_settime
+246	32	clock_gettime			sys_clock_gettime32
+246	64	clock_gettime			sys_clock_gettime
+246	spu	clock_gettime			sys_clock_gettime
+247	32	clock_getres			sys_clock_getres_time32
+247	64	clock_getres			sys_clock_getres
+247	spu	clock_getres			sys_clock_getres
+248	32	clock_nanosleep			sys_clock_nanosleep_time32
+248	64	clock_nanosleep			sys_clock_nanosleep
+248	spu	clock_nanosleep			sys_clock_nanosleep
+249	nospu	swapcontext			sys_swapcontext			compat_sys_swapcontext
+250	common	tgkill				sys_tgkill
+251	32	utimes				sys_utimes_time32
+251	64	utimes				sys_utimes
+251	spu	utimes				sys_utimes
+252	common	statfs64			sys_statfs64			compat_sys_statfs64
+253	common	fstatfs64			sys_fstatfs64			compat_sys_fstatfs64
+254	32	fadvise64_64			sys_ppc_fadvise64_64
+254	spu	fadvise64_64			sys_ni_syscall
+255	common	rtas				sys_rtas
+256	32	sys_debug_setcontext		sys_debug_setcontext		sys_ni_syscall
+256	64	sys_debug_setcontext		sys_ni_syscall
+256	spu	sys_debug_setcontext		sys_ni_syscall
+# 257 reserved for vserver
+258	nospu	migrate_pages			sys_migrate_pages
+259	nospu	mbind				sys_mbind
+260	nospu	get_mempolicy			sys_get_mempolicy
+261	nospu	set_mempolicy			sys_set_mempolicy
+262	nospu	mq_open				sys_mq_open			compat_sys_mq_open
+263	nospu	mq_unlink			sys_mq_unlink
+264	32	mq_timedsend			sys_mq_timedsend_time32
+264	64	mq_timedsend			sys_mq_timedsend
+265	32	mq_timedreceive			sys_mq_timedreceive_time32
+265	64	mq_timedreceive			sys_mq_timedreceive
+266	nospu	mq_notify			sys_mq_notify			compat_sys_mq_notify
+267	nospu	mq_getsetattr			sys_mq_getsetattr		compat_sys_mq_getsetattr
+268	nospu	kexec_load			sys_kexec_load			compat_sys_kexec_load
+269	nospu	add_key				sys_add_key
+270	nospu	request_key			sys_request_key
+271	nospu	keyctl				sys_keyctl			compat_sys_keyctl
+272	nospu	waitid				sys_waitid			compat_sys_waitid
+273	nospu	ioprio_set			sys_ioprio_set
+274	nospu	ioprio_get			sys_ioprio_get
+275	nospu	inotify_init			sys_inotify_init
+276	nospu	inotify_add_watch		sys_inotify_add_watch
+277	nospu	inotify_rm_watch		sys_inotify_rm_watch
+278	nospu	spu_run				sys_spu_run
+279	nospu	spu_create			sys_spu_create
+280	32	pselect6			sys_pselect6_time32		compat_sys_pselect6_time32
+280	64	pselect6			sys_pselect6
+281	32	ppoll				sys_ppoll_time32		compat_sys_ppoll_time32
+281	64	ppoll				sys_ppoll
+282	common	unshare				sys_unshare
+283	common	splice				sys_splice
+284	common	tee				sys_tee
+285	common	vmsplice			sys_vmsplice
+286	common	openat				sys_openat			compat_sys_openat
+287	common	mkdirat				sys_mkdirat
+288	common	mknodat				sys_mknodat
+289	common	fchownat			sys_fchownat
+290	32	futimesat			sys_futimesat_time32
+290	64	futimesat			sys_futimesat
+290	spu	utimesat			sys_futimesat
+291	32	fstatat64			sys_fstatat64
+291	64	newfstatat			sys_newfstatat
+291	spu	newfstatat			sys_newfstatat
+292	common	unlinkat			sys_unlinkat
+293	common	renameat			sys_renameat
+294	common	linkat				sys_linkat
+295	common	symlinkat			sys_symlinkat
+296	common	readlinkat			sys_readlinkat
+297	common	fchmodat			sys_fchmodat
+298	common	faccessat			sys_faccessat
+299	common	get_robust_list			sys_get_robust_list		compat_sys_get_robust_list
+300	common	set_robust_list			sys_set_robust_list		compat_sys_set_robust_list
+301	common	move_pages			sys_move_pages
+302	common	getcpu				sys_getcpu
+303	nospu	epoll_pwait			sys_epoll_pwait			compat_sys_epoll_pwait
+304	32	utimensat			sys_utimensat_time32
+304	64	utimensat			sys_utimensat
+304	spu	utimensat			sys_utimensat
+305	common	signalfd			sys_signalfd			compat_sys_signalfd
+306	common	timerfd_create			sys_timerfd_create
+307	common	eventfd				sys_eventfd
+308	32	sync_file_range2		sys_ppc_sync_file_range2	compat_sys_ppc_sync_file_range2
+308	64	sync_file_range2		sys_sync_file_range2
+308	spu	sync_file_range2		sys_sync_file_range2
+309	32	fallocate			sys_ppc_fallocate		compat_sys_fallocate
+309	64	fallocate			sys_fallocate
+310	nospu	subpage_prot			sys_subpage_prot
+311	32	timerfd_settime			sys_timerfd_settime32
+311	64	timerfd_settime			sys_timerfd_settime
+311	spu	timerfd_settime			sys_timerfd_settime
+312	32	timerfd_gettime			sys_timerfd_gettime32
+312	64	timerfd_gettime			sys_timerfd_gettime
+312	spu	timerfd_gettime			sys_timerfd_gettime
+313	common	signalfd4			sys_signalfd4			compat_sys_signalfd4
+314	common	eventfd2			sys_eventfd2
+315	common	epoll_create1			sys_epoll_create1
+316	common	dup3				sys_dup3
+317	common	pipe2				sys_pipe2
+318	nospu	inotify_init1			sys_inotify_init1
+319	common	perf_event_open			sys_perf_event_open
+320	common	preadv				sys_preadv			compat_sys_preadv
+321	common	pwritev				sys_pwritev			compat_sys_pwritev
+322	nospu	rt_tgsigqueueinfo		sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
+323	nospu	fanotify_init			sys_fanotify_init
+324	nospu	fanotify_mark			sys_fanotify_mark		compat_sys_fanotify_mark
+325	common	prlimit64			sys_prlimit64
+326	common	socket				sys_socket
+327	common	bind				sys_bind
+328	common	connect				sys_connect
+329	common	listen				sys_listen
+330	common	accept				sys_accept
+331	common	getsockname			sys_getsockname
+332	common	getpeername			sys_getpeername
+333	common	socketpair			sys_socketpair
+334	common	send				sys_send
+335	common	sendto				sys_sendto
+336	common	recv				sys_recv			compat_sys_recv
+337	common	recvfrom			sys_recvfrom			compat_sys_recvfrom
+338	common	shutdown			sys_shutdown
+339	common	setsockopt			sys_setsockopt			sys_setsockopt
+340	common	getsockopt			sys_getsockopt			sys_getsockopt
+341	common	sendmsg				sys_sendmsg			compat_sys_sendmsg
+342	common	recvmsg				sys_recvmsg			compat_sys_recvmsg
+343	32	recvmmsg			sys_recvmmsg_time32		compat_sys_recvmmsg_time32
+343	64	recvmmsg			sys_recvmmsg
+343	spu	recvmmsg			sys_recvmmsg
+344	common	accept4				sys_accept4
+345	common	name_to_handle_at		sys_name_to_handle_at
+346	common	open_by_handle_at		sys_open_by_handle_at		compat_sys_open_by_handle_at
+347	32	clock_adjtime			sys_clock_adjtime32
+347	64	clock_adjtime			sys_clock_adjtime
+347	spu	clock_adjtime			sys_clock_adjtime
+348	common	syncfs				sys_syncfs
+349	common	sendmmsg			sys_sendmmsg			compat_sys_sendmmsg
+350	common	setns				sys_setns
+351	nospu	process_vm_readv		sys_process_vm_readv
+352	nospu	process_vm_writev		sys_process_vm_writev
+353	nospu	finit_module			sys_finit_module
+354	nospu	kcmp				sys_kcmp
+355	common	sched_setattr			sys_sched_setattr
+356	common	sched_getattr			sys_sched_getattr
+357	common	renameat2			sys_renameat2
+358	common	seccomp				sys_seccomp
+359	common	getrandom			sys_getrandom
+360	common	memfd_create			sys_memfd_create
+361	common	bpf				sys_bpf
+362	nospu	execveat			sys_execveat			compat_sys_execveat
+363	32	switch_endian			sys_ni_syscall
+363	64	switch_endian			sys_switch_endian
+363	spu	switch_endian			sys_ni_syscall
+364	common	userfaultfd			sys_userfaultfd
+365	common	membarrier			sys_membarrier
+# 366-377 originally left for IPC, now unused
+378	nospu	mlock2				sys_mlock2
+379	nospu	copy_file_range			sys_copy_file_range
+380	common	preadv2				sys_preadv2			compat_sys_preadv2
+381	common	pwritev2			sys_pwritev2			compat_sys_pwritev2
+382	nospu	kexec_file_load			sys_kexec_file_load
+383	nospu	statx				sys_statx
+384	nospu	pkey_alloc			sys_pkey_alloc
+385	nospu	pkey_free			sys_pkey_free
+386	nospu	pkey_mprotect			sys_pkey_mprotect
+387	nospu	rseq				sys_rseq
+388	32	io_pgetevents			sys_io_pgetevents_time32	compat_sys_io_pgetevents
+388	64	io_pgetevents			sys_io_pgetevents
+# room for arch specific syscalls
+392	64	semtimedop			sys_semtimedop
+393	common	semget				sys_semget
+394	common	semctl				sys_semctl			compat_sys_semctl
+395	common	shmget				sys_shmget
+396	common	shmctl				sys_shmctl			compat_sys_shmctl
+397	common	shmat				sys_shmat			compat_sys_shmat
+398	common	shmdt				sys_shmdt
+399	common	msgget				sys_msgget
+400	common	msgsnd				sys_msgsnd			compat_sys_msgsnd
+401	common	msgrcv				sys_msgrcv			compat_sys_msgrcv
+402	common	msgctl				sys_msgctl			compat_sys_msgctl
+403	32	clock_gettime64			sys_clock_gettime		sys_clock_gettime
+404	32	clock_settime64			sys_clock_settime		sys_clock_settime
+405	32	clock_adjtime64			sys_clock_adjtime		sys_clock_adjtime
+406	32	clock_getres_time64		sys_clock_getres		sys_clock_getres
+407	32	clock_nanosleep_time64		sys_clock_nanosleep		sys_clock_nanosleep
+408	32	timer_gettime64			sys_timer_gettime		sys_timer_gettime
+409	32	timer_settime64			sys_timer_settime		sys_timer_settime
+410	32	timerfd_gettime64		sys_timerfd_gettime		sys_timerfd_gettime
+411	32	timerfd_settime64		sys_timerfd_settime		sys_timerfd_settime
+412	32	utimensat_time64		sys_utimensat			sys_utimensat
+413	32	pselect6_time64			sys_pselect6			compat_sys_pselect6_time64
+414	32	ppoll_time64			sys_ppoll			compat_sys_ppoll_time64
+416	32	io_pgetevents_time64		sys_io_pgetevents		sys_io_pgetevents
+417	32	recvmmsg_time64			sys_recvmmsg			compat_sys_recvmmsg_time64
+418	32	mq_timedsend_time64		sys_mq_timedsend		sys_mq_timedsend
+419	32	mq_timedreceive_time64		sys_mq_timedreceive		sys_mq_timedreceive
+420	32	semtimedop_time64		sys_semtimedop			sys_semtimedop
+421	32	rt_sigtimedwait_time64		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time64
+422	32	futex_time64			sys_futex			sys_futex
+423	32	sched_rr_get_interval_time64	sys_sched_rr_get_interval	sys_sched_rr_get_interval
+424	common	pidfd_send_signal		sys_pidfd_send_signal
+425	common	io_uring_setup			sys_io_uring_setup
+426	common	io_uring_enter			sys_io_uring_enter
+427	common	io_uring_register		sys_io_uring_register
+428	common	open_tree			sys_open_tree
+429	common	move_mount			sys_move_mount
+430	common	fsopen				sys_fsopen
+431	common	fsconfig			sys_fsconfig
+432	common	fsmount				sys_fsmount
+433	common	fspick				sys_fspick
+434	common	pidfd_open			sys_pidfd_open
+435	nospu	clone3				sys_clone3
+436	common	close_range			sys_close_range
+437	common	openat2				sys_openat2
+438	common	pidfd_getfd			sys_pidfd_getfd
+439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
+442	common	mount_setattr			sys_mount_setattr
+443	common	quotactl_fd			sys_quotactl_fd
+444	common	landlock_create_ruleset		sys_landlock_create_ruleset
+445	common	landlock_add_rule		sys_landlock_add_rule
+446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
+449	common  futex_waitv                     sys_futex_waitv
+450 	nospu	set_mempolicy_home_node		sys_set_mempolicy_home_node
+451	common	cachestat			sys_cachestat
+452	common	fchmodat2			sys_fchmodat2
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
new file mode 100644
index 0000000000..0f39a6b841
--- /dev/null
+++ b/arch/powerpc/kernel/sysfs.c
@@ -0,0 +1,1185 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/export.h>
+#include <linux/nodemask.h>
+#include <linux/cpumask.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/hvcall.h>
+#include <asm/machdep.h>
+#include <asm/smp.h>
+#include <asm/pmc.h>
+#include <asm/firmware.h>
+#include <asm/idle.h>
+#include <asm/svm.h>
+
+#include "cacheinfo.h"
+#include "setup.h"
+
+#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+#include <asm/lppaca.h>
+#endif
+
+static DEFINE_PER_CPU(struct cpu, cpu_devices);
+
+#ifdef CONFIG_PPC64
+
+/*
+ * Snooze delay has not been hooked up since 3fa8cad82b94 ("powerpc/pseries/cpuidle:
+ * smt-snooze-delay cleanup.") and has been broken even longer. As was foretold in
+ * 2014:
+ *
+ *  "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean
+ *  up the kernel code."
+ *
+ * powerpc-utils stopped using it as of 1.3.8. At some point in the future this
+ * code should be removed.
+ */
+
+static ssize_t store_smt_snooze_delay(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf,
+				      size_t count)
+{
+	pr_warn_once("%s (%d) stored to unsupported smt_snooze_delay, which has no effect.\n",
+		     current->comm, current->pid);
+	return count;
+}
+
+static ssize_t show_smt_snooze_delay(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	pr_warn_once("%s (%d) read from unsupported smt_snooze_delay\n",
+		     current->comm, current->pid);
+	return sprintf(buf, "100\n");
+}
+
+static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
+		   store_smt_snooze_delay);
+
+static int __init setup_smt_snooze_delay(char *str)
+{
+	if (!cpu_has_feature(CPU_FTR_SMT))
+		return 1;
+
+	pr_warn("smt-snooze-delay command line option has no effect\n");
+	return 1;
+}
+__setup("smt-snooze-delay=", setup_smt_snooze_delay);
+
+#endif /* CONFIG_PPC64 */
+
+#define __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, EXTRA) \
+static void read_##NAME(void *val) \
+{ \
+	*(unsigned long *)val = mfspr(ADDRESS);	\
+} \
+static void write_##NAME(void *val) \
+{ \
+	EXTRA; \
+	mtspr(ADDRESS, *(unsigned long *)val);	\
+}
+
+#define __SYSFS_SPRSETUP_SHOW_STORE(NAME) \
+static ssize_t show_##NAME(struct device *dev, \
+			struct device_attribute *attr, \
+			char *buf) \
+{ \
+	struct cpu *cpu = container_of(dev, struct cpu, dev); \
+	unsigned long val; \
+	smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1);	\
+	return sprintf(buf, "%lx\n", val); \
+} \
+static ssize_t __used \
+	store_##NAME(struct device *dev, struct device_attribute *attr, \
+			const char *buf, size_t count) \
+{ \
+	struct cpu *cpu = container_of(dev, struct cpu, dev); \
+	unsigned long val; \
+	int ret = sscanf(buf, "%lx", &val); \
+	if (ret != 1) \
+		return -EINVAL; \
+	smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \
+	return count; \
+}
+
+#define SYSFS_PMCSETUP(NAME, ADDRESS) \
+	__SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ppc_enable_pmcs()) \
+	__SYSFS_SPRSETUP_SHOW_STORE(NAME)
+#define SYSFS_SPRSETUP(NAME, ADDRESS) \
+	__SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ) \
+	__SYSFS_SPRSETUP_SHOW_STORE(NAME)
+
+#define SYSFS_SPRSETUP_SHOW_STORE(NAME) \
+	__SYSFS_SPRSETUP_SHOW_STORE(NAME)
+
+#ifdef CONFIG_PPC64
+
+/*
+ * This is the system wide DSCR register default value. Any
+ * change to this default value through the sysfs interface
+ * will update all per cpu DSCR default values across the
+ * system stored in their respective PACA structures.
+ */
+static unsigned long dscr_default;
+
+/**
+ * read_dscr() - Fetch the cpu specific DSCR default
+ * @val:	Returned cpu specific DSCR default value
+ *
+ * This function returns the per cpu DSCR default value
+ * for any cpu which is contained in it's PACA structure.
+ */
+static void read_dscr(void *val)
+{
+	*(unsigned long *)val = get_paca()->dscr_default;
+}
+
+
+/**
+ * write_dscr() - Update the cpu specific DSCR default
+ * @val:	New cpu specific DSCR default value to update
+ *
+ * This function updates the per cpu DSCR default value
+ * for any cpu which is contained in it's PACA structure.
+ */
+static void write_dscr(void *val)
+{
+	get_paca()->dscr_default = *(unsigned long *)val;
+	if (!current->thread.dscr_inherit) {
+		current->thread.dscr = *(unsigned long *)val;
+		mtspr(SPRN_DSCR, *(unsigned long *)val);
+	}
+}
+
+SYSFS_SPRSETUP_SHOW_STORE(dscr);
+static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
+
+static void add_write_permission_dev_attr(struct device_attribute *attr)
+{
+	attr->attr.mode |= 0200;
+}
+
+/**
+ * show_dscr_default() - Fetch the system wide DSCR default
+ * @dev:	Device structure
+ * @attr:	Device attribute structure
+ * @buf:	Interface buffer
+ *
+ * This function returns the system wide DSCR default value.
+ */
+static ssize_t show_dscr_default(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lx\n", dscr_default);
+}
+
+/**
+ * store_dscr_default() - Update the system wide DSCR default
+ * @dev:	Device structure
+ * @attr:	Device attribute structure
+ * @buf:	Interface buffer
+ * @count:	Size of the update
+ *
+ * This function updates the system wide DSCR default value.
+ */
+static ssize_t __used store_dscr_default(struct device *dev,
+		struct device_attribute *attr, const char *buf,
+		size_t count)
+{
+	unsigned long val;
+	int ret = 0;
+
+	ret = sscanf(buf, "%lx", &val);
+	if (ret != 1)
+		return -EINVAL;
+	dscr_default = val;
+
+	on_each_cpu(write_dscr, &val, 1);
+
+	return count;
+}
+
+static DEVICE_ATTR(dscr_default, 0600,
+		show_dscr_default, store_dscr_default);
+
+static void __init sysfs_create_dscr_default(void)
+{
+	if (cpu_has_feature(CPU_FTR_DSCR)) {
+		struct device *dev_root;
+		int cpu;
+
+		dscr_default = spr_default_dscr;
+		for_each_possible_cpu(cpu)
+			paca_ptrs[cpu]->dscr_default = dscr_default;
+
+		dev_root = bus_get_dev_root(&cpu_subsys);
+		if (dev_root) {
+			device_create_file(dev_root, &dev_attr_dscr_default);
+			put_device(dev_root);
+		}
+	}
+}
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_PPC_E500
+#define MAX_BIT				63
+
+static u64 pw20_wt;
+static u64 altivec_idle_wt;
+
+static unsigned int get_idle_ticks_bit(u64 ns)
+{
+	u64 cycle;
+
+	if (ns >= 10000)
+		cycle = div_u64(ns + 500, 1000) * tb_ticks_per_usec;
+	else
+		cycle = div_u64(ns * tb_ticks_per_usec, 1000);
+
+	if (!cycle)
+		return 0;
+
+	return ilog2(cycle);
+}
+
+static void do_show_pwrmgtcr0(void *val)
+{
+	u32 *value = val;
+
+	*value = mfspr(SPRN_PWRMGTCR0);
+}
+
+static ssize_t show_pw20_state(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	u32 value;
+	unsigned int cpu = dev->id;
+
+	smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
+
+	value &= PWRMGTCR0_PW20_WAIT;
+
+	return sprintf(buf, "%u\n", value ? 1 : 0);
+}
+
+static void do_store_pw20_state(void *val)
+{
+	u32 *value = val;
+	u32 pw20_state;
+
+	pw20_state = mfspr(SPRN_PWRMGTCR0);
+
+	if (*value)
+		pw20_state |= PWRMGTCR0_PW20_WAIT;
+	else
+		pw20_state &= ~PWRMGTCR0_PW20_WAIT;
+
+	mtspr(SPRN_PWRMGTCR0, pw20_state);
+}
+
+static ssize_t store_pw20_state(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	u32 value;
+	unsigned int cpu = dev->id;
+
+	if (kstrtou32(buf, 0, &value))
+		return -EINVAL;
+
+	if (value > 1)
+		return -EINVAL;
+
+	smp_call_function_single(cpu, do_store_pw20_state, &value, 1);
+
+	return count;
+}
+
+static ssize_t show_pw20_wait_time(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	u32 value;
+	u64 tb_cycle = 1;
+	u64 time;
+
+	unsigned int cpu = dev->id;
+
+	if (!pw20_wt) {
+		smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
+		value = (value & PWRMGTCR0_PW20_ENT) >>
+					PWRMGTCR0_PW20_ENT_SHIFT;
+
+		tb_cycle = (tb_cycle << (MAX_BIT - value + 1));
+		/* convert ms to ns */
+		if (tb_ticks_per_usec > 1000) {
+			time = div_u64(tb_cycle, tb_ticks_per_usec / 1000);
+		} else {
+			u32 rem_us;
+
+			time = div_u64_rem(tb_cycle, tb_ticks_per_usec,
+						&rem_us);
+			time = time * 1000 + rem_us * 1000 / tb_ticks_per_usec;
+		}
+	} else {
+		time = pw20_wt;
+	}
+
+	return sprintf(buf, "%llu\n", time > 0 ? time : 0);
+}
+
+static void set_pw20_wait_entry_bit(void *val)
+{
+	u32 *value = val;
+	u32 pw20_idle;
+
+	pw20_idle = mfspr(SPRN_PWRMGTCR0);
+
+	/* Set Automatic PW20 Core Idle Count */
+	/* clear count */
+	pw20_idle &= ~PWRMGTCR0_PW20_ENT;
+
+	/* set count */
+	pw20_idle |= ((MAX_BIT - *value) << PWRMGTCR0_PW20_ENT_SHIFT);
+
+	mtspr(SPRN_PWRMGTCR0, pw20_idle);
+}
+
+static ssize_t store_pw20_wait_time(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	u32 entry_bit;
+	u64 value;
+
+	unsigned int cpu = dev->id;
+
+	if (kstrtou64(buf, 0, &value))
+		return -EINVAL;
+
+	if (!value)
+		return -EINVAL;
+
+	entry_bit = get_idle_ticks_bit(value);
+	if (entry_bit > MAX_BIT)
+		return -EINVAL;
+
+	pw20_wt = value;
+
+	smp_call_function_single(cpu, set_pw20_wait_entry_bit,
+				&entry_bit, 1);
+
+	return count;
+}
+
+static ssize_t show_altivec_idle(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	u32 value;
+	unsigned int cpu = dev->id;
+
+	smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
+
+	value &= PWRMGTCR0_AV_IDLE_PD_EN;
+
+	return sprintf(buf, "%u\n", value ? 1 : 0);
+}
+
+static void do_store_altivec_idle(void *val)
+{
+	u32 *value = val;
+	u32 altivec_idle;
+
+	altivec_idle = mfspr(SPRN_PWRMGTCR0);
+
+	if (*value)
+		altivec_idle |= PWRMGTCR0_AV_IDLE_PD_EN;
+	else
+		altivec_idle &= ~PWRMGTCR0_AV_IDLE_PD_EN;
+
+	mtspr(SPRN_PWRMGTCR0, altivec_idle);
+}
+
+static ssize_t store_altivec_idle(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	u32 value;
+	unsigned int cpu = dev->id;
+
+	if (kstrtou32(buf, 0, &value))
+		return -EINVAL;
+
+	if (value > 1)
+		return -EINVAL;
+
+	smp_call_function_single(cpu, do_store_altivec_idle, &value, 1);
+
+	return count;
+}
+
+static ssize_t show_altivec_idle_wait_time(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	u32 value;
+	u64 tb_cycle = 1;
+	u64 time;
+
+	unsigned int cpu = dev->id;
+
+	if (!altivec_idle_wt) {
+		smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
+		value = (value & PWRMGTCR0_AV_IDLE_CNT) >>
+					PWRMGTCR0_AV_IDLE_CNT_SHIFT;
+
+		tb_cycle = (tb_cycle << (MAX_BIT - value + 1));
+		/* convert ms to ns */
+		if (tb_ticks_per_usec > 1000) {
+			time = div_u64(tb_cycle, tb_ticks_per_usec / 1000);
+		} else {
+			u32 rem_us;
+
+			time = div_u64_rem(tb_cycle, tb_ticks_per_usec,
+						&rem_us);
+			time = time * 1000 + rem_us * 1000 / tb_ticks_per_usec;
+		}
+	} else {
+		time = altivec_idle_wt;
+	}
+
+	return sprintf(buf, "%llu\n", time > 0 ? time : 0);
+}
+
+static void set_altivec_idle_wait_entry_bit(void *val)
+{
+	u32 *value = val;
+	u32 altivec_idle;
+
+	altivec_idle = mfspr(SPRN_PWRMGTCR0);
+
+	/* Set Automatic AltiVec Idle Count */
+	/* clear count */
+	altivec_idle &= ~PWRMGTCR0_AV_IDLE_CNT;
+
+	/* set count */
+	altivec_idle |= ((MAX_BIT - *value) << PWRMGTCR0_AV_IDLE_CNT_SHIFT);
+
+	mtspr(SPRN_PWRMGTCR0, altivec_idle);
+}
+
+static ssize_t store_altivec_idle_wait_time(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	u32 entry_bit;
+	u64 value;
+
+	unsigned int cpu = dev->id;
+
+	if (kstrtou64(buf, 0, &value))
+		return -EINVAL;
+
+	if (!value)
+		return -EINVAL;
+
+	entry_bit = get_idle_ticks_bit(value);
+	if (entry_bit > MAX_BIT)
+		return -EINVAL;
+
+	altivec_idle_wt = value;
+
+	smp_call_function_single(cpu, set_altivec_idle_wait_entry_bit,
+				&entry_bit, 1);
+
+	return count;
+}
+
+/*
+ * Enable/Disable interface:
+ * 0, disable. 1, enable.
+ */
+static DEVICE_ATTR(pw20_state, 0600, show_pw20_state, store_pw20_state);
+static DEVICE_ATTR(altivec_idle, 0600, show_altivec_idle, store_altivec_idle);
+
+/*
+ * Set wait time interface:(Nanosecond)
+ * Example: Base on TBfreq is 41MHZ.
+ * 1~48(ns): TB[63]
+ * 49~97(ns): TB[62]
+ * 98~195(ns): TB[61]
+ * 196~390(ns): TB[60]
+ * 391~780(ns): TB[59]
+ * 781~1560(ns): TB[58]
+ * ...
+ */
+static DEVICE_ATTR(pw20_wait_time, 0600,
+			show_pw20_wait_time,
+			store_pw20_wait_time);
+static DEVICE_ATTR(altivec_idle_wait_time, 0600,
+			show_altivec_idle_wait_time,
+			store_altivec_idle_wait_time);
+#endif
+
+/*
+ * Enabling PMCs will slow partition context switch times so we only do
+ * it the first time we write to the PMCs.
+ */
+
+static DEFINE_PER_CPU(char, pmcs_enabled);
+
+void ppc_enable_pmcs(void)
+{
+	ppc_set_pmu_inuse(1);
+
+	/* Only need to enable them once */
+	if (__this_cpu_read(pmcs_enabled))
+		return;
+
+	__this_cpu_write(pmcs_enabled, 1);
+
+	if (ppc_md.enable_pmcs)
+		ppc_md.enable_pmcs();
+}
+EXPORT_SYMBOL(ppc_enable_pmcs);
+
+
+
+/* Let's define all possible registers, we'll only hook up the ones
+ * that are implemented on the current processor
+ */
+
+#ifdef CONFIG_PMU_SYSFS
+#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_BOOK3S_32)
+#define HAS_PPC_PMC_CLASSIC	1
+#define HAS_PPC_PMC_IBM		1
+#endif
+
+#ifdef CONFIG_PPC64
+#define HAS_PPC_PMC_PA6T	1
+#define HAS_PPC_PMC56          1
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+#define HAS_PPC_PMC_G4		1
+#endif
+#endif /* CONFIG_PMU_SYSFS */
+
+#if defined(CONFIG_PPC64) && defined(CONFIG_DEBUG_MISC)
+#define HAS_PPC_PA6T
+#endif
+/*
+ * SPRs which are not related to PMU.
+ */
+#ifdef CONFIG_PPC64
+SYSFS_SPRSETUP(purr, SPRN_PURR);
+SYSFS_SPRSETUP(spurr, SPRN_SPURR);
+SYSFS_SPRSETUP(pir, SPRN_PIR);
+SYSFS_SPRSETUP(tscr, SPRN_TSCR);
+
+/*
+  Lets only enable read for phyp resources and
+  enable write when needed with a separate function.
+  Lets be conservative and default to pseries.
+*/
+static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
+static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
+static DEVICE_ATTR(pir, 0400, show_pir, NULL);
+static DEVICE_ATTR(tscr, 0600, show_tscr, store_tscr);
+#endif /* CONFIG_PPC64 */
+
+#ifdef HAS_PPC_PMC_CLASSIC
+SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
+SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
+SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
+SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
+SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
+SYSFS_PMCSETUP(pmc4, SPRN_PMC4);
+SYSFS_PMCSETUP(pmc5, SPRN_PMC5);
+SYSFS_PMCSETUP(pmc6, SPRN_PMC6);
+#endif
+
+#ifdef HAS_PPC_PMC_G4
+SYSFS_PMCSETUP(mmcr2, SPRN_MMCR2);
+#endif
+
+#ifdef HAS_PPC_PMC56
+SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
+SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
+
+SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
+SYSFS_PMCSETUP(mmcr3, SPRN_MMCR3);
+
+static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static DEVICE_ATTR(mmcr3, 0600, show_mmcr3, store_mmcr3);
+#endif /* HAS_PPC_PMC56 */
+
+
+
+
+#ifdef HAS_PPC_PMC_PA6T
+SYSFS_PMCSETUP(pa6t_pmc0, SPRN_PA6T_PMC0);
+SYSFS_PMCSETUP(pa6t_pmc1, SPRN_PA6T_PMC1);
+SYSFS_PMCSETUP(pa6t_pmc2, SPRN_PA6T_PMC2);
+SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3);
+SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4);
+SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5);
+#endif
+
+#ifdef HAS_PPC_PA6T
+SYSFS_SPRSETUP(hid0, SPRN_HID0);
+SYSFS_SPRSETUP(hid1, SPRN_HID1);
+SYSFS_SPRSETUP(hid4, SPRN_HID4);
+SYSFS_SPRSETUP(hid5, SPRN_HID5);
+SYSFS_SPRSETUP(ima0, SPRN_PA6T_IMA0);
+SYSFS_SPRSETUP(ima1, SPRN_PA6T_IMA1);
+SYSFS_SPRSETUP(ima2, SPRN_PA6T_IMA2);
+SYSFS_SPRSETUP(ima3, SPRN_PA6T_IMA3);
+SYSFS_SPRSETUP(ima4, SPRN_PA6T_IMA4);
+SYSFS_SPRSETUP(ima5, SPRN_PA6T_IMA5);
+SYSFS_SPRSETUP(ima6, SPRN_PA6T_IMA6);
+SYSFS_SPRSETUP(ima7, SPRN_PA6T_IMA7);
+SYSFS_SPRSETUP(ima8, SPRN_PA6T_IMA8);
+SYSFS_SPRSETUP(ima9, SPRN_PA6T_IMA9);
+SYSFS_SPRSETUP(imaat, SPRN_PA6T_IMAAT);
+SYSFS_SPRSETUP(btcr, SPRN_PA6T_BTCR);
+SYSFS_SPRSETUP(pccr, SPRN_PA6T_PCCR);
+SYSFS_SPRSETUP(rpccr, SPRN_PA6T_RPCCR);
+SYSFS_SPRSETUP(der, SPRN_PA6T_DER);
+SYSFS_SPRSETUP(mer, SPRN_PA6T_MER);
+SYSFS_SPRSETUP(ber, SPRN_PA6T_BER);
+SYSFS_SPRSETUP(ier, SPRN_PA6T_IER);
+SYSFS_SPRSETUP(sier, SPRN_PA6T_SIER);
+SYSFS_SPRSETUP(siar, SPRN_PA6T_SIAR);
+SYSFS_SPRSETUP(tsr0, SPRN_PA6T_TSR0);
+SYSFS_SPRSETUP(tsr1, SPRN_PA6T_TSR1);
+SYSFS_SPRSETUP(tsr2, SPRN_PA6T_TSR2);
+SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3);
+#endif /* HAS_PPC_PA6T */
+
+#ifdef HAS_PPC_PMC_IBM
+static struct device_attribute ibm_common_attrs[] = {
+	__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
+	__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
+};
+#endif /* HAS_PPC_PMC_IBM */
+
+#ifdef HAS_PPC_PMC_G4
+static struct device_attribute g4_common_attrs[] = {
+	__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
+	__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
+	__ATTR(mmcr2, 0600, show_mmcr2, store_mmcr2),
+};
+#endif /* HAS_PPC_PMC_G4 */
+
+#ifdef HAS_PPC_PMC_CLASSIC
+static struct device_attribute classic_pmc_attrs[] = {
+	__ATTR(pmc1, 0600, show_pmc1, store_pmc1),
+	__ATTR(pmc2, 0600, show_pmc2, store_pmc2),
+	__ATTR(pmc3, 0600, show_pmc3, store_pmc3),
+	__ATTR(pmc4, 0600, show_pmc4, store_pmc4),
+	__ATTR(pmc5, 0600, show_pmc5, store_pmc5),
+	__ATTR(pmc6, 0600, show_pmc6, store_pmc6),
+#ifdef HAS_PPC_PMC56
+	__ATTR(pmc7, 0600, show_pmc7, store_pmc7),
+	__ATTR(pmc8, 0600, show_pmc8, store_pmc8),
+#endif
+};
+#endif
+
+#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T)
+static struct device_attribute pa6t_attrs[] = {
+#ifdef HAS_PPC_PMC_PA6T
+	__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
+	__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
+	__ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0),
+	__ATTR(pmc1, 0600, show_pa6t_pmc1, store_pa6t_pmc1),
+	__ATTR(pmc2, 0600, show_pa6t_pmc2, store_pa6t_pmc2),
+	__ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3),
+	__ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4),
+	__ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5),
+#endif
+#ifdef HAS_PPC_PA6T
+	__ATTR(hid0, 0600, show_hid0, store_hid0),
+	__ATTR(hid1, 0600, show_hid1, store_hid1),
+	__ATTR(hid4, 0600, show_hid4, store_hid4),
+	__ATTR(hid5, 0600, show_hid5, store_hid5),
+	__ATTR(ima0, 0600, show_ima0, store_ima0),
+	__ATTR(ima1, 0600, show_ima1, store_ima1),
+	__ATTR(ima2, 0600, show_ima2, store_ima2),
+	__ATTR(ima3, 0600, show_ima3, store_ima3),
+	__ATTR(ima4, 0600, show_ima4, store_ima4),
+	__ATTR(ima5, 0600, show_ima5, store_ima5),
+	__ATTR(ima6, 0600, show_ima6, store_ima6),
+	__ATTR(ima7, 0600, show_ima7, store_ima7),
+	__ATTR(ima8, 0600, show_ima8, store_ima8),
+	__ATTR(ima9, 0600, show_ima9, store_ima9),
+	__ATTR(imaat, 0600, show_imaat, store_imaat),
+	__ATTR(btcr, 0600, show_btcr, store_btcr),
+	__ATTR(pccr, 0600, show_pccr, store_pccr),
+	__ATTR(rpccr, 0600, show_rpccr, store_rpccr),
+	__ATTR(der, 0600, show_der, store_der),
+	__ATTR(mer, 0600, show_mer, store_mer),
+	__ATTR(ber, 0600, show_ber, store_ber),
+	__ATTR(ier, 0600, show_ier, store_ier),
+	__ATTR(sier, 0600, show_sier, store_sier),
+	__ATTR(siar, 0600, show_siar, store_siar),
+	__ATTR(tsr0, 0600, show_tsr0, store_tsr0),
+	__ATTR(tsr1, 0600, show_tsr1, store_tsr1),
+	__ATTR(tsr2, 0600, show_tsr2, store_tsr2),
+	__ATTR(tsr3, 0600, show_tsr3, store_tsr3),
+#endif /* HAS_PPC_PA6T */
+};
+#endif
+
+#ifdef CONFIG_PPC_SVM
+static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", is_secure_guest());
+}
+static DEVICE_ATTR(svm, 0444, show_svm, NULL);
+
+static void __init create_svm_file(void)
+{
+	struct device *dev_root = bus_get_dev_root(&cpu_subsys);
+
+	if (dev_root) {
+		device_create_file(dev_root, &dev_attr_svm);
+		put_device(dev_root);
+	}
+}
+#else
+static void __init create_svm_file(void)
+{
+}
+#endif /* CONFIG_PPC_SVM */
+
+#ifdef CONFIG_PPC_PSERIES
+static void read_idle_purr(void *val)
+{
+	u64 *ret = val;
+
+	*ret = read_this_idle_purr();
+}
+
+static ssize_t idle_purr_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
+	u64 val;
+
+	smp_call_function_single(cpu->dev.id, read_idle_purr, &val, 1);
+	return sprintf(buf, "%llx\n", val);
+}
+static DEVICE_ATTR(idle_purr, 0400, idle_purr_show, NULL);
+
+static void create_idle_purr_file(struct device *s)
+{
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		device_create_file(s, &dev_attr_idle_purr);
+}
+
+static void remove_idle_purr_file(struct device *s)
+{
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		device_remove_file(s, &dev_attr_idle_purr);
+}
+
+static void read_idle_spurr(void *val)
+{
+	u64 *ret = val;
+
+	*ret = read_this_idle_spurr();
+}
+
+static ssize_t idle_spurr_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
+	u64 val;
+
+	smp_call_function_single(cpu->dev.id, read_idle_spurr, &val, 1);
+	return sprintf(buf, "%llx\n", val);
+}
+static DEVICE_ATTR(idle_spurr, 0400, idle_spurr_show, NULL);
+
+static void create_idle_spurr_file(struct device *s)
+{
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		device_create_file(s, &dev_attr_idle_spurr);
+}
+
+static void remove_idle_spurr_file(struct device *s)
+{
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		device_remove_file(s, &dev_attr_idle_spurr);
+}
+
+#else /* CONFIG_PPC_PSERIES */
+#define create_idle_purr_file(s)
+#define remove_idle_purr_file(s)
+#define create_idle_spurr_file(s)
+#define remove_idle_spurr_file(s)
+#endif /* CONFIG_PPC_PSERIES */
+
+static int register_cpu_online(unsigned int cpu)
+{
+	struct cpu *c = &per_cpu(cpu_devices, cpu);
+	struct device *s = &c->dev;
+	struct device_attribute *attrs, *pmc_attrs;
+	int i, nattrs;
+
+	/* For cpus present at boot a reference was already grabbed in register_cpu() */
+	if (!s->of_node)
+		s->of_node = of_get_cpu_node(cpu, NULL);
+
+#ifdef CONFIG_PPC64
+	if (cpu_has_feature(CPU_FTR_SMT))
+		device_create_file(s, &dev_attr_smt_snooze_delay);
+#endif
+
+	/* PMC stuff */
+	switch (cur_cpu_spec->pmc_type) {
+#ifdef HAS_PPC_PMC_IBM
+	case PPC_PMC_IBM:
+		attrs = ibm_common_attrs;
+		nattrs = ARRAY_SIZE(ibm_common_attrs);
+		pmc_attrs = classic_pmc_attrs;
+		break;
+#endif /* HAS_PPC_PMC_IBM */
+#ifdef HAS_PPC_PMC_G4
+	case PPC_PMC_G4:
+		attrs = g4_common_attrs;
+		nattrs = ARRAY_SIZE(g4_common_attrs);
+		pmc_attrs = classic_pmc_attrs;
+		break;
+#endif /* HAS_PPC_PMC_G4 */
+#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T)
+	case PPC_PMC_PA6T:
+		/* PA Semi starts counting at PMC0 */
+		attrs = pa6t_attrs;
+		nattrs = ARRAY_SIZE(pa6t_attrs);
+		pmc_attrs = NULL;
+		break;
+#endif
+	default:
+		attrs = NULL;
+		nattrs = 0;
+		pmc_attrs = NULL;
+	}
+
+	for (i = 0; i < nattrs; i++)
+		device_create_file(s, &attrs[i]);
+
+	if (pmc_attrs)
+		for (i = 0; i < cur_cpu_spec->num_pmcs; i++)
+			device_create_file(s, &pmc_attrs[i]);
+
+#ifdef CONFIG_PPC64
+#ifdef	CONFIG_PMU_SYSFS
+	if (cpu_has_feature(CPU_FTR_MMCRA))
+		device_create_file(s, &dev_attr_mmcra);
+
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		device_create_file(s, &dev_attr_mmcr3);
+#endif /* CONFIG_PMU_SYSFS */
+
+	if (cpu_has_feature(CPU_FTR_PURR)) {
+		if (!firmware_has_feature(FW_FEATURE_LPAR))
+			add_write_permission_dev_attr(&dev_attr_purr);
+		device_create_file(s, &dev_attr_purr);
+		create_idle_purr_file(s);
+	}
+
+	if (cpu_has_feature(CPU_FTR_SPURR)) {
+		device_create_file(s, &dev_attr_spurr);
+		create_idle_spurr_file(s);
+	}
+
+	if (cpu_has_feature(CPU_FTR_DSCR))
+		device_create_file(s, &dev_attr_dscr);
+
+	if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
+		device_create_file(s, &dev_attr_pir);
+
+	if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+		!firmware_has_feature(FW_FEATURE_LPAR))
+		device_create_file(s, &dev_attr_tscr);
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_PPC_E500
+	if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) {
+		device_create_file(s, &dev_attr_pw20_state);
+		device_create_file(s, &dev_attr_pw20_wait_time);
+
+		device_create_file(s, &dev_attr_altivec_idle);
+		device_create_file(s, &dev_attr_altivec_idle_wait_time);
+	}
+#endif
+	cacheinfo_cpu_online(cpu);
+	return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int unregister_cpu_online(unsigned int cpu)
+{
+	struct cpu *c = &per_cpu(cpu_devices, cpu);
+	struct device *s = &c->dev;
+	struct device_attribute *attrs, *pmc_attrs;
+	int i, nattrs;
+
+	if (WARN_RATELIMIT(!c->hotpluggable, "cpu %d can't be offlined\n", cpu))
+		return -EBUSY;
+
+#ifdef CONFIG_PPC64
+	if (cpu_has_feature(CPU_FTR_SMT))
+		device_remove_file(s, &dev_attr_smt_snooze_delay);
+#endif
+
+	/* PMC stuff */
+	switch (cur_cpu_spec->pmc_type) {
+#ifdef HAS_PPC_PMC_IBM
+	case PPC_PMC_IBM:
+		attrs = ibm_common_attrs;
+		nattrs = ARRAY_SIZE(ibm_common_attrs);
+		pmc_attrs = classic_pmc_attrs;
+		break;
+#endif /* HAS_PPC_PMC_IBM */
+#ifdef HAS_PPC_PMC_G4
+	case PPC_PMC_G4:
+		attrs = g4_common_attrs;
+		nattrs = ARRAY_SIZE(g4_common_attrs);
+		pmc_attrs = classic_pmc_attrs;
+		break;
+#endif /* HAS_PPC_PMC_G4 */
+#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T)
+	case PPC_PMC_PA6T:
+		/* PA Semi starts counting at PMC0 */
+		attrs = pa6t_attrs;
+		nattrs = ARRAY_SIZE(pa6t_attrs);
+		pmc_attrs = NULL;
+		break;
+#endif
+	default:
+		attrs = NULL;
+		nattrs = 0;
+		pmc_attrs = NULL;
+	}
+
+	for (i = 0; i < nattrs; i++)
+		device_remove_file(s, &attrs[i]);
+
+	if (pmc_attrs)
+		for (i = 0; i < cur_cpu_spec->num_pmcs; i++)
+			device_remove_file(s, &pmc_attrs[i]);
+
+#ifdef CONFIG_PPC64
+#ifdef CONFIG_PMU_SYSFS
+	if (cpu_has_feature(CPU_FTR_MMCRA))
+		device_remove_file(s, &dev_attr_mmcra);
+
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		device_remove_file(s, &dev_attr_mmcr3);
+#endif /* CONFIG_PMU_SYSFS */
+
+	if (cpu_has_feature(CPU_FTR_PURR)) {
+		device_remove_file(s, &dev_attr_purr);
+		remove_idle_purr_file(s);
+	}
+
+	if (cpu_has_feature(CPU_FTR_SPURR)) {
+		device_remove_file(s, &dev_attr_spurr);
+		remove_idle_spurr_file(s);
+	}
+
+	if (cpu_has_feature(CPU_FTR_DSCR))
+		device_remove_file(s, &dev_attr_dscr);
+
+	if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
+		device_remove_file(s, &dev_attr_pir);
+
+	if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+		!firmware_has_feature(FW_FEATURE_LPAR))
+		device_remove_file(s, &dev_attr_tscr);
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_PPC_E500
+	if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) {
+		device_remove_file(s, &dev_attr_pw20_state);
+		device_remove_file(s, &dev_attr_pw20_wait_time);
+
+		device_remove_file(s, &dev_attr_altivec_idle);
+		device_remove_file(s, &dev_attr_altivec_idle_wait_time);
+	}
+#endif
+	cacheinfo_cpu_offline(cpu);
+	of_node_put(s->of_node);
+	s->of_node = NULL;
+	return 0;
+}
+#else /* !CONFIG_HOTPLUG_CPU */
+#define unregister_cpu_online NULL
+#endif
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+ssize_t arch_cpu_probe(const char *buf, size_t count)
+{
+	if (ppc_md.cpu_probe)
+		return ppc_md.cpu_probe(buf, count);
+
+	return -EINVAL;
+}
+
+ssize_t arch_cpu_release(const char *buf, size_t count)
+{
+	if (ppc_md.cpu_release)
+		return ppc_md.cpu_release(buf, count);
+
+	return -EINVAL;
+}
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
+static DEFINE_MUTEX(cpu_mutex);
+
+int cpu_add_dev_attr(struct device_attribute *attr)
+{
+	int cpu;
+
+	mutex_lock(&cpu_mutex);
+
+	for_each_possible_cpu(cpu) {
+		device_create_file(get_cpu_device(cpu), attr);
+	}
+
+	mutex_unlock(&cpu_mutex);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cpu_add_dev_attr);
+
+int cpu_add_dev_attr_group(struct attribute_group *attrs)
+{
+	int cpu;
+	struct device *dev;
+	int ret;
+
+	mutex_lock(&cpu_mutex);
+
+	for_each_possible_cpu(cpu) {
+		dev = get_cpu_device(cpu);
+		ret = sysfs_create_group(&dev->kobj, attrs);
+		WARN_ON(ret != 0);
+	}
+
+	mutex_unlock(&cpu_mutex);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cpu_add_dev_attr_group);
+
+
+void cpu_remove_dev_attr(struct device_attribute *attr)
+{
+	int cpu;
+
+	mutex_lock(&cpu_mutex);
+
+	for_each_possible_cpu(cpu) {
+		device_remove_file(get_cpu_device(cpu), attr);
+	}
+
+	mutex_unlock(&cpu_mutex);
+}
+EXPORT_SYMBOL_GPL(cpu_remove_dev_attr);
+
+void cpu_remove_dev_attr_group(struct attribute_group *attrs)
+{
+	int cpu;
+	struct device *dev;
+
+	mutex_lock(&cpu_mutex);
+
+	for_each_possible_cpu(cpu) {
+		dev = get_cpu_device(cpu);
+		sysfs_remove_group(&dev->kobj, attrs);
+	}
+
+	mutex_unlock(&cpu_mutex);
+}
+EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group);
+
+
+/* NUMA stuff */
+
+#ifdef CONFIG_NUMA
+int sysfs_add_device_to_node(struct device *dev, int nid)
+{
+	struct node *node = node_devices[nid];
+	return sysfs_create_link(&node->dev.kobj, &dev->kobj,
+			kobject_name(&dev->kobj));
+}
+EXPORT_SYMBOL_GPL(sysfs_add_device_to_node);
+
+void sysfs_remove_device_from_node(struct device *dev, int nid)
+{
+	struct node *node = node_devices[nid];
+	sysfs_remove_link(&node->dev.kobj, kobject_name(&dev->kobj));
+}
+EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node);
+#endif
+
+/* Only valid if CPU is present. */
+static ssize_t show_physical_id(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct cpu *cpu = container_of(dev, struct cpu, dev);
+
+	return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->dev.id));
+}
+static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL);
+
+static int __init topology_init(void)
+{
+	int cpu, r;
+
+	for_each_possible_cpu(cpu) {
+		struct cpu *c = &per_cpu(cpu_devices, cpu);
+
+#ifdef CONFIG_HOTPLUG_CPU
+		/*
+		 * For now, we just see if the system supports making
+		 * the RTAS calls for CPU hotplug.  But, there may be a
+		 * more comprehensive way to do this for an individual
+		 * CPU.  For instance, the boot cpu might never be valid
+		 * for hotplugging.
+		 */
+		if (smp_ops && smp_ops->cpu_offline_self)
+			c->hotpluggable = 1;
+#endif
+
+		if (cpu_online(cpu) || c->hotpluggable) {
+			register_cpu(c, cpu);
+
+			device_create_file(&c->dev, &dev_attr_physical_id);
+		}
+	}
+	r = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/topology:online",
+			      register_cpu_online, unregister_cpu_online);
+	WARN_ON(r < 0);
+#ifdef CONFIG_PPC64
+	sysfs_create_dscr_default();
+#endif /* CONFIG_PPC64 */
+
+	create_svm_file();
+
+	return 0;
+}
+subsys_initcall(topology_init);
diff --git a/arch/powerpc/kernel/systbl.c b/arch/powerpc/kernel/systbl.c
new file mode 100644
index 0000000000..4305f2a216
--- /dev/null
+++ b/arch/powerpc/kernel/systbl.c
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains the table of syscall-handling functions.
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
+ * and Paul Mackerras.
+ *
+ * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
+ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) 
+ */
+
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <asm/unistd.h>
+#include <asm/syscalls.h>
+
+#undef __SYSCALL_WITH_COMPAT
+#define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr, entry)
+
+#undef __SYSCALL
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+#define __SYSCALL(nr, entry) [nr] = entry,
+#else
+/*
+ * Coerce syscall handlers with arbitrary parameters to common type
+ * requires cast to void* to avoid -Wcast-function-type.
+ */
+#define __SYSCALL(nr, entry) [nr] = (void *) entry,
+#endif
+
+const syscall_fn sys_call_table[] = {
+#ifdef CONFIG_PPC64
+#include <asm/syscall_table_64.h>
+#else
+#include <asm/syscall_table_32.h>
+#endif
+};
+
+#ifdef CONFIG_COMPAT
+#undef __SYSCALL_WITH_COMPAT
+#define __SYSCALL_WITH_COMPAT(nr, native, compat)	__SYSCALL(nr, compat)
+const syscall_fn compat_sys_call_table[] = {
+#include <asm/syscall_table_32.h>
+};
+#endif /* CONFIG_COMPAT */
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
new file mode 100644
index 0000000000..cba6dd15de
--- /dev/null
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * temp.c	Thermal management for cpu's with Thermal Assist Units
+ *
+ * Written by Troy Benjegerdes <hozer@drgw.net>
+ *
+ * TODO:
+ * dynamic power management to limit peak CPU temp (using ICTC)
+ * calibration???
+ *
+ * Silly, crazy ideas: use cpu load (from scheduler) and ICTC to extend battery
+ * life in portables, and add a 'performance/watt' metric somewhere in /proc
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/workqueue.h>
+
+#include <asm/interrupt.h>
+#include <asm/io.h>
+#include <asm/reg.h>
+#include <asm/nvram.h>
+#include <asm/cache.h>
+#include <asm/8xx_immap.h>
+#include <asm/machdep.h>
+
+#include "setup.h"
+
+static struct tau_temp
+{
+	int interrupts;
+	unsigned char low;
+	unsigned char high;
+	unsigned char grew;
+} tau[NR_CPUS];
+
+static bool tau_int_enable;
+
+/* TODO: put these in a /proc interface, with some sanity checks, and maybe
+ * dynamic adjustment to minimize # of interrupts */
+/* configurable values for step size and how much to expand the window when
+ * we get an interrupt. These are based on the limit that was out of range */
+#define step_size		2	/* step size when temp goes out of range */
+#define window_expand		1	/* expand the window by this much */
+/* configurable values for shrinking the window */
+#define shrink_timer	2000	/* period between shrinking the window */
+#define min_window	2	/* minimum window size, degrees C */
+
+static void set_thresholds(unsigned long cpu)
+{
+	u32 maybe_tie = tau_int_enable ? THRM1_TIE : 0;
+
+	/* setup THRM1, threshold, valid bit, interrupt when below threshold */
+	mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | maybe_tie | THRM1_TID);
+
+	/* setup THRM2, threshold, valid bit, interrupt when above threshold */
+	mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | maybe_tie);
+}
+
+static void TAUupdate(int cpu)
+{
+	u32 thrm;
+	u32 bits = THRM1_TIV | THRM1_TIN | THRM1_V;
+
+	/* if both thresholds are crossed, the step_sizes cancel out
+	 * and the window winds up getting expanded twice. */
+	thrm = mfspr(SPRN_THRM1);
+	if ((thrm & bits) == bits) {
+		mtspr(SPRN_THRM1, 0);
+
+		if (tau[cpu].low >= step_size) {
+			tau[cpu].low -= step_size;
+			tau[cpu].high -= (step_size - window_expand);
+		}
+		tau[cpu].grew = 1;
+		pr_debug("%s: low threshold crossed\n", __func__);
+	}
+	thrm = mfspr(SPRN_THRM2);
+	if ((thrm & bits) == bits) {
+		mtspr(SPRN_THRM2, 0);
+
+		if (tau[cpu].high <= 127 - step_size) {
+			tau[cpu].low += (step_size - window_expand);
+			tau[cpu].high += step_size;
+		}
+		tau[cpu].grew = 1;
+		pr_debug("%s: high threshold crossed\n", __func__);
+	}
+}
+
+#ifdef CONFIG_TAU_INT
+/*
+ * TAU interrupts - called when we have a thermal assist unit interrupt
+ * with interrupts disabled
+ */
+
+DEFINE_INTERRUPT_HANDLER_ASYNC(TAUException)
+{
+	int cpu = smp_processor_id();
+
+	tau[cpu].interrupts++;
+
+	TAUupdate(cpu);
+}
+#endif /* CONFIG_TAU_INT */
+
+static void tau_timeout(void * info)
+{
+	int cpu;
+	int size;
+	int shrink;
+
+	cpu = smp_processor_id();
+
+	if (!tau_int_enable)
+		TAUupdate(cpu);
+
+	/* Stop thermal sensor comparisons and interrupts */
+	mtspr(SPRN_THRM3, 0);
+
+	size = tau[cpu].high - tau[cpu].low;
+	if (size > min_window && ! tau[cpu].grew) {
+		/* do an exponential shrink of half the amount currently over size */
+		shrink = (2 + size - min_window) / 4;
+		if (shrink) {
+			tau[cpu].low += shrink;
+			tau[cpu].high -= shrink;
+		} else { /* size must have been min_window + 1 */
+			tau[cpu].low += 1;
+#if 1 /* debug */
+			if ((tau[cpu].high - tau[cpu].low) != min_window){
+				printk(KERN_ERR "temp.c: line %d, logic error\n", __LINE__);
+			}
+#endif
+		}
+	}
+
+	tau[cpu].grew = 0;
+
+	set_thresholds(cpu);
+
+	/* Restart thermal sensor comparisons and interrupts.
+	 * The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet"
+	 * recommends that "the maximum value be set in THRM3 under all
+	 * conditions."
+	 */
+	mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E);
+}
+
+static struct workqueue_struct *tau_workq;
+
+static void tau_work_func(struct work_struct *work)
+{
+	msleep(shrink_timer);
+	on_each_cpu(tau_timeout, NULL, 0);
+	/* schedule ourselves to be run again */
+	queue_work(tau_workq, work);
+}
+
+static DECLARE_WORK(tau_work, tau_work_func);
+
+/*
+ * setup the TAU
+ *
+ * Set things up to use THRM1 as a temperature lower bound, and THRM2 as an upper bound.
+ * Start off at zero
+ */
+
+int tau_initialized = 0;
+
+static void __init TAU_init_smp(void *info)
+{
+	unsigned long cpu = smp_processor_id();
+
+	/* set these to a reasonable value and let the timer shrink the
+	 * window */
+	tau[cpu].low = 5;
+	tau[cpu].high = 120;
+
+	set_thresholds(cpu);
+}
+
+static int __init TAU_init(void)
+{
+	/* We assume in SMP that if one CPU has TAU support, they
+	 * all have it --BenH
+	 */
+	if (!cpu_has_feature(CPU_FTR_TAU)) {
+		printk("Thermal assist unit not available\n");
+		tau_initialized = 0;
+		return 1;
+	}
+
+	tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) &&
+			 !strcmp(cur_cpu_spec->platform, "ppc750");
+
+	tau_workq = alloc_ordered_workqueue("tau", 0);
+	if (!tau_workq)
+		return -ENOMEM;
+
+	on_each_cpu(TAU_init_smp, NULL, 0);
+
+	queue_work(tau_workq, &tau_work);
+
+	pr_info("Thermal assist unit using %s, shrink_timer: %d ms\n",
+		tau_int_enable ? "interrupts" : "workqueue", shrink_timer);
+	tau_initialized = 1;
+
+	return 0;
+}
+
+__initcall(TAU_init);
+
+/*
+ * return current temp
+ */
+
+u32 cpu_temp_both(unsigned long cpu)
+{
+	return ((tau[cpu].high << 16) | tau[cpu].low);
+}
+
+u32 cpu_temp(unsigned long cpu)
+{
+	return ((tau[cpu].high + tau[cpu].low) / 2);
+}
+
+u32 tau_interrupts(unsigned long cpu)
+{
+	return (tau[cpu].interrupts);
+}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
new file mode 100644
index 0000000000..df20cf201f
--- /dev/null
+++ b/arch/powerpc/kernel/time.c
@@ -0,0 +1,1032 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Common time routines among all ppc machines.
+ *
+ * Written by Cort Dougan (cort@cs.nmt.edu) to merge
+ * Paul Mackerras' version and mine for PReP and Pmac.
+ * MPC8xx/MBX changes by Dan Malek (dmalek@jlc.net).
+ * Converted for 64-bit by Mike Corrigan (mikejc@us.ibm.com)
+ *
+ * First round of bugfixes by Gabriel Paubert (paubert@iram.es)
+ * to make clock more stable (2.4.0-test5). The only thing
+ * that this code assumes is that the timebases have been synchronized
+ * by firmware on SMP and are never stopped (never do sleep
+ * on SMP then, nap and doze are OK).
+ * 
+ * Speeded up do_gettimeofday by getting rid of references to
+ * xtime (which required locks for consistency). (mikejc@us.ibm.com)
+ *
+ * TODO (not necessarily in this file):
+ * - improve precision and reproducibility of timebase frequency
+ * measurement at boot time.
+ * - for astronomical applications: add a new function to get
+ * non ambiguous timestamps even around leap seconds. This needs
+ * a new timestamp format and a good name.
+ *
+ * 1997-09-10  Updated NTP code according to technical memorandum Jan '96
+ *             "A Kernel Model for Precision Timekeeping" by Dave Mills
+ */
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <linux/sched/cputime.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/kernel_stat.h>
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/profile.h>
+#include <linux/cpu.h>
+#include <linux/security.h>
+#include <linux/percpu.h>
+#include <linux/rtc.h>
+#include <linux/jiffies.h>
+#include <linux/posix-timers.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/irq_work.h>
+#include <linux/of_clk.h>
+#include <linux/suspend.h>
+#include <linux/processor.h>
+#include <linux/mc146818rtc.h>
+#include <linux/platform_device.h>
+
+#include <asm/trace.h>
+#include <asm/interrupt.h>
+#include <asm/io.h>
+#include <asm/nvram.h>
+#include <asm/cache.h>
+#include <asm/machdep.h>
+#include <linux/uaccess.h>
+#include <asm/time.h>
+#include <asm/irq.h>
+#include <asm/div64.h>
+#include <asm/smp.h>
+#include <asm/vdso_datapage.h>
+#include <asm/firmware.h>
+#include <asm/mce.h>
+
+/* powerpc clocksource/clockevent code */
+
+#include <linux/clockchips.h>
+#include <linux/timekeeper_internal.h>
+
+static u64 timebase_read(struct clocksource *);
+static struct clocksource clocksource_timebase = {
+	.name         = "timebase",
+	.rating       = 400,
+	.flags        = CLOCK_SOURCE_IS_CONTINUOUS,
+	.mask         = CLOCKSOURCE_MASK(64),
+	.read         = timebase_read,
+	.vdso_clock_mode	= VDSO_CLOCKMODE_ARCHTIMER,
+};
+
+#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
+u64 decrementer_max = DECREMENTER_DEFAULT_MAX;
+EXPORT_SYMBOL_GPL(decrementer_max); /* for KVM HDEC */
+
+static int decrementer_set_next_event(unsigned long evt,
+				      struct clock_event_device *dev);
+static int decrementer_shutdown(struct clock_event_device *evt);
+
+struct clock_event_device decrementer_clockevent = {
+	.name			= "decrementer",
+	.rating			= 200,
+	.irq			= 0,
+	.set_next_event		= decrementer_set_next_event,
+	.set_state_oneshot_stopped = decrementer_shutdown,
+	.set_state_shutdown	= decrementer_shutdown,
+	.tick_resume		= decrementer_shutdown,
+	.features		= CLOCK_EVT_FEAT_ONESHOT |
+				  CLOCK_EVT_FEAT_C3STOP,
+};
+EXPORT_SYMBOL(decrementer_clockevent);
+
+/*
+ * This always puts next_tb beyond now, so the clock event will never fire
+ * with the usual comparison, no need for a separate test for stopped.
+ */
+#define DEC_CLOCKEVENT_STOPPED ~0ULL
+DEFINE_PER_CPU(u64, decrementers_next_tb) = DEC_CLOCKEVENT_STOPPED;
+EXPORT_SYMBOL_GPL(decrementers_next_tb);
+static DEFINE_PER_CPU(struct clock_event_device, decrementers);
+
+#define XSEC_PER_SEC (1024*1024)
+
+#ifdef CONFIG_PPC64
+#define SCALE_XSEC(xsec, max)	(((xsec) * max) / XSEC_PER_SEC)
+#else
+/* compute ((xsec << 12) * max) >> 32 */
+#define SCALE_XSEC(xsec, max)	mulhwu((xsec) << 12, max)
+#endif
+
+unsigned long tb_ticks_per_jiffy;
+unsigned long tb_ticks_per_usec = 100; /* sane default */
+EXPORT_SYMBOL(tb_ticks_per_usec);
+unsigned long tb_ticks_per_sec;
+EXPORT_SYMBOL(tb_ticks_per_sec);	/* for cputime conversions */
+
+DEFINE_SPINLOCK(rtc_lock);
+EXPORT_SYMBOL_GPL(rtc_lock);
+
+static u64 tb_to_ns_scale __read_mostly;
+static unsigned tb_to_ns_shift __read_mostly;
+static u64 boot_tb __read_mostly;
+
+extern struct timezone sys_tz;
+static long timezone_offset;
+
+unsigned long ppc_proc_freq;
+EXPORT_SYMBOL_GPL(ppc_proc_freq);
+unsigned long ppc_tb_freq;
+EXPORT_SYMBOL_GPL(ppc_tb_freq);
+
+bool tb_invalid;
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+/*
+ * Read the SPURR on systems that have it, otherwise the PURR,
+ * or if that doesn't exist return the timebase value passed in.
+ */
+static inline unsigned long read_spurr(unsigned long tb)
+{
+	if (cpu_has_feature(CPU_FTR_SPURR))
+		return mfspr(SPRN_SPURR);
+	if (cpu_has_feature(CPU_FTR_PURR))
+		return mfspr(SPRN_PURR);
+	return tb;
+}
+
+/*
+ * Account time for a transition between system, hard irq
+ * or soft irq state.
+ */
+static unsigned long vtime_delta_scaled(struct cpu_accounting_data *acct,
+					unsigned long now, unsigned long stime)
+{
+	unsigned long stime_scaled = 0;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+	unsigned long nowscaled, deltascaled;
+	unsigned long utime, utime_scaled;
+
+	nowscaled = read_spurr(now);
+	deltascaled = nowscaled - acct->startspurr;
+	acct->startspurr = nowscaled;
+	utime = acct->utime - acct->utime_sspurr;
+	acct->utime_sspurr = acct->utime;
+
+	/*
+	 * Because we don't read the SPURR on every kernel entry/exit,
+	 * deltascaled includes both user and system SPURR ticks.
+	 * Apportion these ticks to system SPURR ticks and user
+	 * SPURR ticks in the same ratio as the system time (delta)
+	 * and user time (udelta) values obtained from the timebase
+	 * over the same interval.  The system ticks get accounted here;
+	 * the user ticks get saved up in paca->user_time_scaled to be
+	 * used by account_process_tick.
+	 */
+	stime_scaled = stime;
+	utime_scaled = utime;
+	if (deltascaled != stime + utime) {
+		if (utime) {
+			stime_scaled = deltascaled * stime / (stime + utime);
+			utime_scaled = deltascaled - stime_scaled;
+		} else {
+			stime_scaled = deltascaled;
+		}
+	}
+	acct->utime_scaled += utime_scaled;
+#endif
+
+	return stime_scaled;
+}
+
+static unsigned long vtime_delta(struct cpu_accounting_data *acct,
+				 unsigned long *stime_scaled,
+				 unsigned long *steal_time)
+{
+	unsigned long now, stime;
+
+	WARN_ON_ONCE(!irqs_disabled());
+
+	now = mftb();
+	stime = now - acct->starttime;
+	acct->starttime = now;
+
+	*stime_scaled = vtime_delta_scaled(acct, now, stime);
+
+	if (IS_ENABLED(CONFIG_PPC_SPLPAR) &&
+			firmware_has_feature(FW_FEATURE_SPLPAR))
+		*steal_time = pseries_calculate_stolen_time(now);
+	else
+		*steal_time = 0;
+
+	return stime;
+}
+
+static void vtime_delta_kernel(struct cpu_accounting_data *acct,
+			       unsigned long *stime, unsigned long *stime_scaled)
+{
+	unsigned long steal_time;
+
+	*stime = vtime_delta(acct, stime_scaled, &steal_time);
+	*stime -= min(*stime, steal_time);
+	acct->steal_time += steal_time;
+}
+
+void vtime_account_kernel(struct task_struct *tsk)
+{
+	struct cpu_accounting_data *acct = get_accounting(tsk);
+	unsigned long stime, stime_scaled;
+
+	vtime_delta_kernel(acct, &stime, &stime_scaled);
+
+	if (tsk->flags & PF_VCPU) {
+		acct->gtime += stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+		acct->utime_scaled += stime_scaled;
+#endif
+	} else {
+		acct->stime += stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+		acct->stime_scaled += stime_scaled;
+#endif
+	}
+}
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
+
+void vtime_account_idle(struct task_struct *tsk)
+{
+	unsigned long stime, stime_scaled, steal_time;
+	struct cpu_accounting_data *acct = get_accounting(tsk);
+
+	stime = vtime_delta(acct, &stime_scaled, &steal_time);
+	acct->idle_time += stime + steal_time;
+}
+
+static void vtime_account_irq_field(struct cpu_accounting_data *acct,
+				    unsigned long *field)
+{
+	unsigned long stime, stime_scaled;
+
+	vtime_delta_kernel(acct, &stime, &stime_scaled);
+	*field += stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+	acct->stime_scaled += stime_scaled;
+#endif
+}
+
+void vtime_account_softirq(struct task_struct *tsk)
+{
+	struct cpu_accounting_data *acct = get_accounting(tsk);
+	vtime_account_irq_field(acct, &acct->softirq_time);
+}
+
+void vtime_account_hardirq(struct task_struct *tsk)
+{
+	struct cpu_accounting_data *acct = get_accounting(tsk);
+	vtime_account_irq_field(acct, &acct->hardirq_time);
+}
+
+static void vtime_flush_scaled(struct task_struct *tsk,
+			       struct cpu_accounting_data *acct)
+{
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+	if (acct->utime_scaled)
+		tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled);
+	if (acct->stime_scaled)
+		tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled);
+
+	acct->utime_scaled = 0;
+	acct->utime_sspurr = 0;
+	acct->stime_scaled = 0;
+#endif
+}
+
+/*
+ * Account the whole cputime accumulated in the paca
+ * Must be called with interrupts disabled.
+ * Assumes that vtime_account_kernel/idle() has been called
+ * recently (i.e. since the last entry from usermode) so that
+ * get_paca()->user_time_scaled is up to date.
+ */
+void vtime_flush(struct task_struct *tsk)
+{
+	struct cpu_accounting_data *acct = get_accounting(tsk);
+
+	if (acct->utime)
+		account_user_time(tsk, cputime_to_nsecs(acct->utime));
+
+	if (acct->gtime)
+		account_guest_time(tsk, cputime_to_nsecs(acct->gtime));
+
+	if (IS_ENABLED(CONFIG_PPC_SPLPAR) && acct->steal_time) {
+		account_steal_time(cputime_to_nsecs(acct->steal_time));
+		acct->steal_time = 0;
+	}
+
+	if (acct->idle_time)
+		account_idle_time(cputime_to_nsecs(acct->idle_time));
+
+	if (acct->stime)
+		account_system_index_time(tsk, cputime_to_nsecs(acct->stime),
+					  CPUTIME_SYSTEM);
+
+	if (acct->hardirq_time)
+		account_system_index_time(tsk, cputime_to_nsecs(acct->hardirq_time),
+					  CPUTIME_IRQ);
+	if (acct->softirq_time)
+		account_system_index_time(tsk, cputime_to_nsecs(acct->softirq_time),
+					  CPUTIME_SOFTIRQ);
+
+	vtime_flush_scaled(tsk, acct);
+
+	acct->utime = 0;
+	acct->gtime = 0;
+	acct->idle_time = 0;
+	acct->stime = 0;
+	acct->hardirq_time = 0;
+	acct->softirq_time = 0;
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+void __no_kcsan __delay(unsigned long loops)
+{
+	unsigned long start;
+
+	spin_begin();
+	if (tb_invalid) {
+		/*
+		 * TB is in error state and isn't ticking anymore.
+		 * HMI handler was unable to recover from TB error.
+		 * Return immediately, so that kernel won't get stuck here.
+		 */
+		spin_cpu_relax();
+	} else {
+		start = mftb();
+		while (mftb() - start < loops)
+			spin_cpu_relax();
+	}
+	spin_end();
+}
+EXPORT_SYMBOL(__delay);
+
+void __no_kcsan udelay(unsigned long usecs)
+{
+	__delay(tb_ticks_per_usec * usecs);
+}
+EXPORT_SYMBOL(udelay);
+
+#ifdef CONFIG_SMP
+unsigned long profile_pc(struct pt_regs *regs)
+{
+	unsigned long pc = instruction_pointer(regs);
+
+	if (in_lock_functions(pc))
+		return regs->link;
+
+	return pc;
+}
+EXPORT_SYMBOL(profile_pc);
+#endif
+
+#ifdef CONFIG_IRQ_WORK
+
+/*
+ * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
+ */
+#ifdef CONFIG_PPC64
+static inline unsigned long test_irq_work_pending(void)
+{
+	unsigned long x;
+
+	asm volatile("lbz %0,%1(13)"
+		: "=r" (x)
+		: "i" (offsetof(struct paca_struct, irq_work_pending)));
+	return x;
+}
+
+static inline void set_irq_work_pending_flag(void)
+{
+	asm volatile("stb %0,%1(13)" : :
+		"r" (1),
+		"i" (offsetof(struct paca_struct, irq_work_pending)));
+}
+
+static inline void clear_irq_work_pending(void)
+{
+	asm volatile("stb %0,%1(13)" : :
+		"r" (0),
+		"i" (offsetof(struct paca_struct, irq_work_pending)));
+}
+
+#else /* 32-bit */
+
+DEFINE_PER_CPU(u8, irq_work_pending);
+
+#define set_irq_work_pending_flag()	__this_cpu_write(irq_work_pending, 1)
+#define test_irq_work_pending()		__this_cpu_read(irq_work_pending)
+#define clear_irq_work_pending()	__this_cpu_write(irq_work_pending, 0)
+
+#endif /* 32 vs 64 bit */
+
+void arch_irq_work_raise(void)
+{
+	/*
+	 * 64-bit code that uses irq soft-mask can just cause an immediate
+	 * interrupt here that gets soft masked, if this is called under
+	 * local_irq_disable(). It might be possible to prevent that happening
+	 * by noticing interrupts are disabled and setting decrementer pending
+	 * to be replayed when irqs are enabled. The problem there is that
+	 * tracing can call irq_work_raise, including in code that does low
+	 * level manipulations of irq soft-mask state (e.g., trace_hardirqs_on)
+	 * which could get tangled up if we're messing with the same state
+	 * here.
+	 */
+	preempt_disable();
+	set_irq_work_pending_flag();
+	set_dec(1);
+	preempt_enable();
+}
+
+static void set_dec_or_work(u64 val)
+{
+	set_dec(val);
+	/* We may have raced with new irq work */
+	if (unlikely(test_irq_work_pending()))
+		set_dec(1);
+}
+
+#else  /* CONFIG_IRQ_WORK */
+
+#define test_irq_work_pending()	0
+#define clear_irq_work_pending()
+
+static void set_dec_or_work(u64 val)
+{
+	set_dec(val);
+}
+#endif /* CONFIG_IRQ_WORK */
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void timer_rearm_host_dec(u64 now)
+{
+	u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
+
+	WARN_ON_ONCE(!arch_irqs_disabled());
+	WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+	if (now >= *next_tb) {
+		local_paca->irq_happened |= PACA_IRQ_DEC;
+	} else {
+		now = *next_tb - now;
+		if (now > decrementer_max)
+			now = decrementer_max;
+		set_dec_or_work(now);
+	}
+}
+EXPORT_SYMBOL_GPL(timer_rearm_host_dec);
+#endif
+
+/*
+ * timer_interrupt - gets called when the decrementer overflows,
+ * with interrupts disabled.
+ */
+DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
+{
+	struct clock_event_device *evt = this_cpu_ptr(&decrementers);
+	u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
+	struct pt_regs *old_regs;
+	u64 now;
+
+	/*
+	 * Some implementations of hotplug will get timer interrupts while
+	 * offline, just ignore these.
+	 */
+	if (unlikely(!cpu_online(smp_processor_id()))) {
+		set_dec(decrementer_max);
+		return;
+	}
+
+	/* Conditionally hard-enable interrupts. */
+	if (should_hard_irq_enable(regs)) {
+		/*
+		 * Ensure a positive value is written to the decrementer, or
+		 * else some CPUs will continue to take decrementer exceptions.
+		 * When the PPC_WATCHDOG (decrementer based) is configured,
+		 * keep this at most 31 bits, which is about 4 seconds on most
+		 * systems, which gives the watchdog a chance of catching timer
+		 * interrupt hard lockups.
+		 */
+		if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
+			set_dec(0x7fffffff);
+		else
+			set_dec(decrementer_max);
+
+		do_hard_irq_enable();
+	}
+
+#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
+	if (atomic_read(&ppc_n_lost_interrupts) != 0)
+		__do_IRQ(regs);
+#endif
+
+	old_regs = set_irq_regs(regs);
+
+	trace_timer_interrupt_entry(regs);
+
+	if (test_irq_work_pending()) {
+		clear_irq_work_pending();
+		mce_run_irq_context_handlers();
+		irq_work_run();
+	}
+
+	now = get_tb();
+	if (now >= *next_tb) {
+		evt->event_handler(evt);
+		__this_cpu_inc(irq_stat.timer_irqs_event);
+	} else {
+		now = *next_tb - now;
+		if (now > decrementer_max)
+			now = decrementer_max;
+		set_dec_or_work(now);
+		__this_cpu_inc(irq_stat.timer_irqs_others);
+	}
+
+	trace_timer_interrupt_exit(regs);
+
+	set_irq_regs(old_regs);
+}
+EXPORT_SYMBOL(timer_interrupt);
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+void timer_broadcast_interrupt(void)
+{
+	tick_receive_broadcast();
+	__this_cpu_inc(irq_stat.broadcast_irqs_event);
+}
+#endif
+
+#ifdef CONFIG_SUSPEND
+/* Overrides the weak version in kernel/power/main.c */
+void arch_suspend_disable_irqs(void)
+{
+	if (ppc_md.suspend_disable_irqs)
+		ppc_md.suspend_disable_irqs();
+
+	/* Disable the decrementer, so that it doesn't interfere
+	 * with suspending.
+	 */
+
+	set_dec(decrementer_max);
+	local_irq_disable();
+	set_dec(decrementer_max);
+}
+
+/* Overrides the weak version in kernel/power/main.c */
+void arch_suspend_enable_irqs(void)
+{
+	local_irq_enable();
+
+	if (ppc_md.suspend_enable_irqs)
+		ppc_md.suspend_enable_irqs();
+}
+#endif
+
+unsigned long long tb_to_ns(unsigned long long ticks)
+{
+	return mulhdu(ticks, tb_to_ns_scale) << tb_to_ns_shift;
+}
+EXPORT_SYMBOL_GPL(tb_to_ns);
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ *
+ * Note: mulhdu(a, b) (multiply high double unsigned) returns
+ * the high 64 bits of a * b, i.e. (a * b) >> 64, where a and b
+ * are 64-bit unsigned numbers.
+ */
+notrace unsigned long long sched_clock(void)
+{
+	return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
+}
+
+
+#ifdef CONFIG_PPC_PSERIES
+
+/*
+ * Running clock - attempts to give a view of time passing for a virtualised
+ * kernels.
+ * Uses the VTB register if available otherwise a next best guess.
+ */
+unsigned long long running_clock(void)
+{
+	/*
+	 * Don't read the VTB as a host since KVM does not switch in host
+	 * timebase into the VTB when it takes a guest off the CPU, reading the
+	 * VTB would result in reading 'last switched out' guest VTB.
+	 *
+	 * Host kernels are often compiled with CONFIG_PPC_PSERIES checked, it
+	 * would be unsafe to rely only on the #ifdef above.
+	 */
+	if (firmware_has_feature(FW_FEATURE_LPAR) &&
+	    cpu_has_feature(CPU_FTR_ARCH_207S))
+		return mulhdu(get_vtb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
+
+	/*
+	 * This is a next best approximation without a VTB.
+	 * On a host which is running bare metal there should never be any stolen
+	 * time and on a host which doesn't do any virtualisation TB *should* equal
+	 * VTB so it makes no difference anyway.
+	 */
+	return local_clock() - kcpustat_this_cpu->cpustat[CPUTIME_STEAL];
+}
+#endif
+
+static int __init get_freq(char *name, int cells, unsigned long *val)
+{
+	struct device_node *cpu;
+	const __be32 *fp;
+	int found = 0;
+
+	/* The cpu node should have timebase and clock frequency properties */
+	cpu = of_find_node_by_type(NULL, "cpu");
+
+	if (cpu) {
+		fp = of_get_property(cpu, name, NULL);
+		if (fp) {
+			found = 1;
+			*val = of_read_ulong(fp, cells);
+		}
+
+		of_node_put(cpu);
+	}
+
+	return found;
+}
+
+static void start_cpu_decrementer(void)
+{
+#ifdef CONFIG_BOOKE_OR_40x
+	unsigned int tcr;
+
+	/* Clear any pending timer interrupts */
+	mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS);
+
+	tcr = mfspr(SPRN_TCR);
+	/*
+	 * The watchdog may have already been enabled by u-boot. So leave
+	 * TRC[WP] (Watchdog Period) alone.
+	 */
+	tcr &= TCR_WP_MASK;	/* Clear all bits except for TCR[WP] */
+	tcr |= TCR_DIE;		/* Enable decrementer */
+	mtspr(SPRN_TCR, tcr);
+#endif
+}
+
+void __init generic_calibrate_decr(void)
+{
+	ppc_tb_freq = DEFAULT_TB_FREQ;		/* hardcoded default */
+
+	if (!get_freq("ibm,extended-timebase-frequency", 2, &ppc_tb_freq) &&
+	    !get_freq("timebase-frequency", 1, &ppc_tb_freq)) {
+
+		printk(KERN_ERR "WARNING: Estimating decrementer frequency "
+				"(not found)\n");
+	}
+
+	ppc_proc_freq = DEFAULT_PROC_FREQ;	/* hardcoded default */
+
+	if (!get_freq("ibm,extended-clock-frequency", 2, &ppc_proc_freq) &&
+	    !get_freq("clock-frequency", 1, &ppc_proc_freq)) {
+
+		printk(KERN_ERR "WARNING: Estimating processor frequency "
+				"(not found)\n");
+	}
+}
+
+int update_persistent_clock64(struct timespec64 now)
+{
+	struct rtc_time tm;
+
+	if (!ppc_md.set_rtc_time)
+		return -ENODEV;
+
+	rtc_time64_to_tm(now.tv_sec + 1 + timezone_offset, &tm);
+
+	return ppc_md.set_rtc_time(&tm);
+}
+
+static void __read_persistent_clock(struct timespec64 *ts)
+{
+	struct rtc_time tm;
+	static int first = 1;
+
+	ts->tv_nsec = 0;
+	/* XXX this is a little fragile but will work okay in the short term */
+	if (first) {
+		first = 0;
+		if (ppc_md.time_init)
+			timezone_offset = ppc_md.time_init();
+
+		/* get_boot_time() isn't guaranteed to be safe to call late */
+		if (ppc_md.get_boot_time) {
+			ts->tv_sec = ppc_md.get_boot_time() - timezone_offset;
+			return;
+		}
+	}
+	if (!ppc_md.get_rtc_time) {
+		ts->tv_sec = 0;
+		return;
+	}
+	ppc_md.get_rtc_time(&tm);
+
+	ts->tv_sec = rtc_tm_to_time64(&tm);
+}
+
+void read_persistent_clock64(struct timespec64 *ts)
+{
+	__read_persistent_clock(ts);
+
+	/* Sanitize it in case real time clock is set below EPOCH */
+	if (ts->tv_sec < 0) {
+		ts->tv_sec = 0;
+		ts->tv_nsec = 0;
+	}
+		
+}
+
+/* clocksource code */
+static notrace u64 timebase_read(struct clocksource *cs)
+{
+	return (u64)get_tb();
+}
+
+static void __init clocksource_init(void)
+{
+	struct clocksource *clock = &clocksource_timebase;
+
+	if (clocksource_register_hz(clock, tb_ticks_per_sec)) {
+		printk(KERN_ERR "clocksource: %s is already registered\n",
+		       clock->name);
+		return;
+	}
+
+	printk(KERN_INFO "clocksource: %s mult[%x] shift[%d] registered\n",
+	       clock->name, clock->mult, clock->shift);
+}
+
+static int decrementer_set_next_event(unsigned long evt,
+				      struct clock_event_device *dev)
+{
+	__this_cpu_write(decrementers_next_tb, get_tb() + evt);
+	set_dec_or_work(evt);
+
+	return 0;
+}
+
+static int decrementer_shutdown(struct clock_event_device *dev)
+{
+	__this_cpu_write(decrementers_next_tb, DEC_CLOCKEVENT_STOPPED);
+	set_dec_or_work(decrementer_max);
+
+	return 0;
+}
+
+static void register_decrementer_clockevent(int cpu)
+{
+	struct clock_event_device *dec = &per_cpu(decrementers, cpu);
+
+	*dec = decrementer_clockevent;
+	dec->cpumask = cpumask_of(cpu);
+
+	clockevents_config_and_register(dec, ppc_tb_freq, 2, decrementer_max);
+
+	printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n",
+		    dec->name, dec->mult, dec->shift, cpu);
+
+	/* Set values for KVM, see kvm_emulate_dec() */
+	decrementer_clockevent.mult = dec->mult;
+	decrementer_clockevent.shift = dec->shift;
+}
+
+static void enable_large_decrementer(void)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		return;
+
+	if (decrementer_max <= DECREMENTER_DEFAULT_MAX)
+		return;
+
+	/*
+	 * If we're running as the hypervisor we need to enable the LD manually
+	 * otherwise firmware should have done it for us.
+	 */
+	if (cpu_has_feature(CPU_FTR_HVMODE))
+		mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_LD);
+}
+
+static void __init set_decrementer_max(void)
+{
+	struct device_node *cpu;
+	u32 bits = 32;
+
+	/* Prior to ISAv3 the decrementer is always 32 bit */
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		return;
+
+	cpu = of_find_node_by_type(NULL, "cpu");
+
+	if (of_property_read_u32(cpu, "ibm,dec-bits", &bits) == 0) {
+		if (bits > 64 || bits < 32) {
+			pr_warn("time_init: firmware supplied invalid ibm,dec-bits");
+			bits = 32;
+		}
+
+		/* calculate the signed maximum given this many bits */
+		decrementer_max = (1ul << (bits - 1)) - 1;
+	}
+
+	of_node_put(cpu);
+
+	pr_info("time_init: %u bit decrementer (max: %llx)\n",
+		bits, decrementer_max);
+}
+
+static void __init init_decrementer_clockevent(void)
+{
+	register_decrementer_clockevent(smp_processor_id());
+}
+
+void secondary_cpu_time_init(void)
+{
+	/* Enable and test the large decrementer for this cpu */
+	enable_large_decrementer();
+
+	/* Start the decrementer on CPUs that have manual control
+	 * such as BookE
+	 */
+	start_cpu_decrementer();
+
+	/* FIME: Should make unrelated change to move snapshot_timebase
+	 * call here ! */
+	register_decrementer_clockevent(smp_processor_id());
+}
+
+/* This function is only called on the boot processor */
+void __init time_init(void)
+{
+	struct div_result res;
+	u64 scale;
+	unsigned shift;
+
+	/* Normal PowerPC with timebase register */
+	if (ppc_md.calibrate_decr)
+		ppc_md.calibrate_decr();
+	else
+		generic_calibrate_decr();
+
+	printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
+	       ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);
+	printk(KERN_DEBUG "time_init: processor frequency   = %lu.%.6lu MHz\n",
+	       ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
+
+	tb_ticks_per_jiffy = ppc_tb_freq / HZ;
+	tb_ticks_per_sec = ppc_tb_freq;
+	tb_ticks_per_usec = ppc_tb_freq / 1000000;
+
+	/*
+	 * Compute scale factor for sched_clock.
+	 * The calibrate_decr() function has set tb_ticks_per_sec,
+	 * which is the timebase frequency.
+	 * We compute 1e9 * 2^64 / tb_ticks_per_sec and interpret
+	 * the 128-bit result as a 64.64 fixed-point number.
+	 * We then shift that number right until it is less than 1.0,
+	 * giving us the scale factor and shift count to use in
+	 * sched_clock().
+	 */
+	div128_by_32(1000000000, 0, tb_ticks_per_sec, &res);
+	scale = res.result_low;
+	for (shift = 0; res.result_high != 0; ++shift) {
+		scale = (scale >> 1) | (res.result_high << 63);
+		res.result_high >>= 1;
+	}
+	tb_to_ns_scale = scale;
+	tb_to_ns_shift = shift;
+	/* Save the current timebase to pretty up CONFIG_PRINTK_TIME */
+	boot_tb = get_tb();
+
+	/* If platform provided a timezone (pmac), we correct the time */
+	if (timezone_offset) {
+		sys_tz.tz_minuteswest = -timezone_offset / 60;
+		sys_tz.tz_dsttime = 0;
+	}
+
+	vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
+
+	/* initialise and enable the large decrementer (if we have one) */
+	set_decrementer_max();
+	enable_large_decrementer();
+
+	/* Start the decrementer on CPUs that have manual control
+	 * such as BookE
+	 */
+	start_cpu_decrementer();
+
+	/* Register the clocksource */
+	clocksource_init();
+
+	init_decrementer_clockevent();
+	tick_setup_hrtimer_broadcast();
+
+	of_clk_init(NULL);
+	enable_sched_clock_irqtime();
+}
+
+/*
+ * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit
+ * result.
+ */
+void div128_by_32(u64 dividend_high, u64 dividend_low,
+		  unsigned divisor, struct div_result *dr)
+{
+	unsigned long a, b, c, d;
+	unsigned long w, x, y, z;
+	u64 ra, rb, rc;
+
+	a = dividend_high >> 32;
+	b = dividend_high & 0xffffffff;
+	c = dividend_low >> 32;
+	d = dividend_low & 0xffffffff;
+
+	w = a / divisor;
+	ra = ((u64)(a - (w * divisor)) << 32) + b;
+
+	rb = ((u64) do_div(ra, divisor) << 32) + c;
+	x = ra;
+
+	rc = ((u64) do_div(rb, divisor) << 32) + d;
+	y = rb;
+
+	do_div(rc, divisor);
+	z = rc;
+
+	dr->result_high = ((u64)w << 32) + x;
+	dr->result_low  = ((u64)y << 32) + z;
+
+}
+
+/* We don't need to calibrate delay, we use the CPU timebase for that */
+void calibrate_delay(void)
+{
+	/* Some generic code (such as spinlock debug) use loops_per_jiffy
+	 * as the number of __delay(1) in a jiffy, so make it so
+	 */
+	loops_per_jiffy = tb_ticks_per_jiffy;
+}
+
+#if IS_ENABLED(CONFIG_RTC_DRV_GENERIC)
+static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
+{
+	ppc_md.get_rtc_time(tm);
+	return 0;
+}
+
+static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
+{
+	if (!ppc_md.set_rtc_time)
+		return -EOPNOTSUPP;
+
+	if (ppc_md.set_rtc_time(tm) < 0)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static const struct rtc_class_ops rtc_generic_ops = {
+	.read_time = rtc_generic_get_time,
+	.set_time = rtc_generic_set_time,
+};
+
+static int __init rtc_init(void)
+{
+	struct platform_device *pdev;
+
+	if (!ppc_md.get_rtc_time)
+		return -ENODEV;
+
+	pdev = platform_device_register_data(NULL, "rtc-generic", -1,
+					     &rtc_generic_ops,
+					     sizeof(rtc_generic_ops));
+
+	return PTR_ERR_OR_ZERO(pdev);
+}
+
+device_initcall(rtc_init);
+#endif
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
new file mode 100644
index 0000000000..a9cd650716
--- /dev/null
+++ b/arch/powerpc/kernel/tm.S
@@ -0,0 +1,554 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Transactional memory support routines to reclaim and recheckpoint
+ * transactional process state.
+ *
+ * Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation.
+ */
+
+#include <linux/export.h>
+#include <asm/asm-offsets.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+#include <asm/bug.h>
+#include <asm/feature-fixups.h>
+
+#ifdef CONFIG_VSX
+/* See fpu.S, this is borrowed from there */
+#define __SAVE_32FPRS_VSRS(n,c,base)		\
+BEGIN_FTR_SECTION				\
+	b	2f;				\
+END_FTR_SECTION_IFSET(CPU_FTR_VSX);		\
+	SAVE_32FPRS(n,base);			\
+	b	3f;				\
+2:	SAVE_32VSRS(n,c,base);			\
+3:
+#define __REST_32FPRS_VSRS(n,c,base)		\
+BEGIN_FTR_SECTION				\
+	b	2f;				\
+END_FTR_SECTION_IFSET(CPU_FTR_VSX);		\
+	REST_32FPRS(n,base);			\
+	b	3f;				\
+2:	REST_32VSRS(n,c,base);			\
+3:
+#else
+#define __SAVE_32FPRS_VSRS(n,c,base)	SAVE_32FPRS(n, base)
+#define __REST_32FPRS_VSRS(n,c,base)	REST_32FPRS(n, base)
+#endif
+#define SAVE_32FPRS_VSRS(n,c,base) \
+	__SAVE_32FPRS_VSRS(n,__REG_##c,__REG_##base)
+#define REST_32FPRS_VSRS(n,c,base) \
+	__REST_32FPRS_VSRS(n,__REG_##c,__REG_##base)
+
+/* Stack frame offsets for local variables. */
+#define TM_FRAME_L0	TM_FRAME_SIZE-16
+#define TM_FRAME_L1	TM_FRAME_SIZE-8
+
+
+/* In order to access the TM SPRs, TM must be enabled.  So, do so: */
+_GLOBAL(tm_enable)
+	mfmsr	r4
+	li	r3, MSR_TM >> 32
+	sldi	r3, r3, 32
+	and.	r0, r4, r3
+	bne	1f
+	or	r4, r4, r3
+	mtmsrd	r4
+1:	blr
+EXPORT_SYMBOL_GPL(tm_enable);
+
+_GLOBAL(tm_disable)
+	mfmsr	r4
+	li	r3, MSR_TM >> 32
+	sldi	r3, r3, 32
+	andc	r4, r4, r3
+	mtmsrd	r4
+	blr
+EXPORT_SYMBOL_GPL(tm_disable);
+
+_GLOBAL(tm_save_sprs)
+	mfspr	r0, SPRN_TFHAR
+	std	r0, THREAD_TM_TFHAR(r3)
+	mfspr	r0, SPRN_TEXASR
+	std	r0, THREAD_TM_TEXASR(r3)
+	mfspr	r0, SPRN_TFIAR
+	std	r0, THREAD_TM_TFIAR(r3)
+	blr
+
+_GLOBAL(tm_restore_sprs)
+	ld	r0, THREAD_TM_TFHAR(r3)
+	mtspr	SPRN_TFHAR, r0
+	ld	r0, THREAD_TM_TEXASR(r3)
+	mtspr	SPRN_TEXASR, r0
+	ld	r0, THREAD_TM_TFIAR(r3)
+	mtspr	SPRN_TFIAR, r0
+	blr
+
+	/* Passed an 8-bit failure cause as first argument. */
+_GLOBAL(tm_abort)
+	TABORT(R3)
+	blr
+EXPORT_SYMBOL_GPL(tm_abort);
+
+/*
+ * void tm_reclaim(struct thread_struct *thread,
+ *		   uint8_t cause)
+ *
+ *	- Performs a full reclaim.  This destroys outstanding
+ *	  transactions and updates thread.ckpt_regs, thread.ckfp_state and
+ *	  thread.ckvr_state with the original checkpointed state.  Note that
+ *	  thread->regs is unchanged.
+ *
+ * Purpose is to both abort transactions of, and preserve the state of,
+ * a transactions at a context switch. We preserve/restore both sets of process
+ * state to restore them when the thread's scheduled again.  We continue in
+ * userland as though nothing happened, but when the transaction is resumed
+ * they will abort back to the checkpointed state we save out here.
+ *
+ * Call with IRQs off, stacks get all out of sync for some periods in here!
+ */
+_GLOBAL(tm_reclaim)
+	mfcr	r5
+	mflr	r0
+	stw	r5, 8(r1)
+	std	r0, 16(r1)
+	std	r2, STK_GOT(r1)
+	stdu	r1, -TM_FRAME_SIZE(r1)
+
+	/* We've a struct pt_regs at [r1+STACK_INT_FRAME_REGS]. */
+
+	std	r3, STK_PARAM(R3)(r1)
+	SAVE_NVGPRS(r1)
+
+	/*
+	 * Save kernel live AMR since it will be clobbered by treclaim
+	 * but can be used elsewhere later in kernel space.
+	 */
+	mfspr	r3, SPRN_AMR
+	std	r3, TM_FRAME_L1(r1)
+
+	/* We need to setup MSR for VSX register save instructions. */
+	mfmsr	r14
+	mr	r15, r14
+	ori	r15, r15, MSR_FP
+	li	r16, 0
+	ori	r16, r16, MSR_EE /* IRQs hard off */
+	andc	r15, r15, r16
+	oris	r15, r15, MSR_VEC@h
+#ifdef CONFIG_VSX
+	BEGIN_FTR_SECTION
+	oris	r15,r15, MSR_VSX@h
+	END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+	mtmsrd	r15
+	std	r14, TM_FRAME_L0(r1)
+
+	/* Do sanity check on MSR to make sure we are suspended */
+	li	r7, (MSR_TS_S)@higher
+	srdi	r6, r14, 32
+	and	r6, r6, r7
+1:	tdeqi   r6, 0
+	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
+
+	/* Stash the stack pointer away for use after reclaim */
+	std	r1, PACAR1(r13)
+
+	/* Clear MSR RI since we are about to use SCRATCH0, EE is already off */
+	li	r5, 0
+	mtmsrd	r5, 1
+
+	/*
+	 * BE CAREFUL HERE:
+	 * At this point we can't take an SLB miss since we have MSR_RI
+	 * off. Load only to/from the stack/paca which are in SLB bolted regions
+	 * until we turn MSR RI back on.
+	 *
+	 * The moment we treclaim, ALL of our GPRs will switch
+	 * to user register state.  (FPRs, CCR etc. also!)
+	 * Use an sprg and a tm_scratch in the PACA to shuffle.
+	 */
+	TRECLAIM(R4)				/* Cause in r4 */
+
+	/*
+	 * ******************** GPRs ********************
+	 * Stash the checkpointed r13 in the scratch SPR and get the real paca.
+	 */
+	SET_SCRATCH0(r13)
+	GET_PACA(r13)
+
+	/*
+	 * Stash the checkpointed r1 away in paca->tm_scratch and get the real
+	 * stack pointer back into r1.
+	 */
+	std	r1, PACATMSCRATCH(r13)
+	ld	r1, PACAR1(r13)
+
+	std	r11, GPR11(r1)			/* Temporary stash */
+
+	/*
+	 * Move the saved user r1 to the kernel stack in case PACATMSCRATCH is
+	 * clobbered by an exception once we turn on MSR_RI below.
+	 */
+	ld	r11, PACATMSCRATCH(r13)
+	std	r11, GPR1(r1)
+
+	/*
+	 * Store r13 away so we can free up the scratch SPR for the SLB fault
+	 * handler (needed once we start accessing the thread_struct).
+	 */
+	GET_SCRATCH0(r11)
+	std	r11, GPR13(r1)
+
+	/* Reset MSR RI so we can take SLB faults again */
+	li	r11, MSR_RI
+	mtmsrd	r11, 1
+
+	/* Store the PPR in r11 and reset to decent value */
+	mfspr	r11, SPRN_PPR
+	HMT_MEDIUM
+
+	/* Now get some more GPRS free */
+	std	r7, GPR7(r1)			/* Temporary stash */
+	std	r12, GPR12(r1)			/* ''   ''    ''   */
+	ld	r12, STK_PARAM(R3)(r1)		/* Param 0, thread_struct * */
+
+	std	r11, THREAD_TM_PPR(r12)		/* Store PPR and free r11 */
+
+	addi	r7, r12, PT_CKPT_REGS		/* Thread's ckpt_regs */
+
+	/*
+	 * Make r7 look like an exception frame so that we can use the neat
+	 * GPRx(n) macros. r7 is NOT a pt_regs ptr!
+	 */
+	subi	r7, r7, STACK_INT_FRAME_REGS
+
+	/* Sync the userland GPRs 2-12, 14-31 to thread->regs: */
+	SAVE_GPR(0, r7)				/* user r0 */
+	SAVE_GPRS(2, 6, r7)			/* user r2-r6 */
+	SAVE_GPRS(8, 10, r7)			/* user r8-r10 */
+	ld	r3, GPR1(r1)			/* user r1 */
+	ld	r4, GPR7(r1)			/* user r7 */
+	ld	r5, GPR11(r1)			/* user r11 */
+	ld	r6, GPR12(r1)			/* user r12 */
+	ld	r8, GPR13(r1)			/* user r13 */
+	std	r3, GPR1(r7)
+	std	r4, GPR7(r7)
+	std	r5, GPR11(r7)
+	std	r6, GPR12(r7)
+	std	r8, GPR13(r7)
+
+	SAVE_NVGPRS(r7)				/* user r14-r31 */
+
+	/* ******************** NIP ******************** */
+	mfspr	r3, SPRN_TFHAR
+	std	r3, _NIP(r7)			/* Returns to failhandler */
+	/*
+	 * The checkpointed NIP is ignored when rescheduling/rechkpting,
+	 * but is used in signal return to 'wind back' to the abort handler.
+	 */
+
+	/* ***************** CTR, LR, CR, XER ********** */
+	mfctr	r3
+	mflr	r4
+	mfcr	r5
+	mfxer	r6
+
+	std	r3, _CTR(r7)
+	std	r4, _LINK(r7)
+	std	r5, _CCR(r7)
+	std	r6, _XER(r7)
+
+	/* ******************** TAR, DSCR ********** */
+	mfspr	r3, SPRN_TAR
+	mfspr	r4, SPRN_DSCR
+
+	std	r3, THREAD_TM_TAR(r12)
+	std	r4, THREAD_TM_DSCR(r12)
+
+        /* ******************** AMR **************** */
+        mfspr	r3, SPRN_AMR
+        std	r3, THREAD_TM_AMR(r12)
+
+	/*
+	 * MSR and flags: We don't change CRs, and we don't need to alter MSR.
+	 */
+
+
+	/*
+	 * ******************** FPR/VR/VSRs ************
+	 * After reclaiming, capture the checkpointed FPRs/VRs.
+	 *
+	 * We enabled VEC/FP/VSX in the msr above, so we can execute these
+	 * instructions!
+	 */
+	mr	r3, r12
+
+	/* Altivec (VEC/VMX/VR)*/
+	addi	r7, r3, THREAD_CKVRSTATE
+	SAVE_32VRS(0, r6, r7)	/* r6 scratch, r7 ckvr_state */
+	mfvscr	v0
+	li	r6, VRSTATE_VSCR
+	stvx	v0, r7, r6
+
+	/* VRSAVE */
+	mfspr	r0, SPRN_VRSAVE
+	std	r0, THREAD_CKVRSAVE(r3)
+
+	/* Floating Point (FP) */
+	addi	r7, r3, THREAD_CKFPSTATE
+	SAVE_32FPRS_VSRS(0, R6, R7)	/* r6 scratch, r7 ckfp_state */
+	mffs    fr0
+	stfd    fr0,FPSTATE_FPSCR(r7)
+
+
+	/*
+	 * TM regs, incl TEXASR -- these live in thread_struct.  Note they've
+	 * been updated by the treclaim, to explain to userland the failure
+	 * cause (aborted).
+	 */
+	mfspr	r0, SPRN_TEXASR
+	mfspr	r3, SPRN_TFHAR
+	mfspr	r4, SPRN_TFIAR
+	std	r0, THREAD_TM_TEXASR(r12)
+	std	r3, THREAD_TM_TFHAR(r12)
+	std	r4, THREAD_TM_TFIAR(r12)
+
+	/* Restore kernel live AMR */
+	ld	r8, TM_FRAME_L1(r1)
+	mtspr	SPRN_AMR, r8
+
+	/* Restore original MSR/IRQ state & clear TM mode */
+	ld	r14, TM_FRAME_L0(r1)		/* Orig MSR */
+
+	li	r15, 0
+	rldimi  r14, r15, MSR_TS_LG, (63-MSR_TS_LG)-1
+	mtmsrd  r14
+
+	REST_NVGPRS(r1)
+
+	addi    r1, r1, TM_FRAME_SIZE
+	lwz	r4, 8(r1)
+	ld	r0, 16(r1)
+	mtcr	r4
+	mtlr	r0
+	ld	r2, STK_GOT(r1)
+
+	/* Load CPU's default DSCR */
+	ld	r0, PACA_DSCR_DEFAULT(r13)
+	mtspr	SPRN_DSCR, r0
+
+	blr
+
+
+	/*
+	 * void __tm_recheckpoint(struct thread_struct *thread)
+	 *	- Restore the checkpointed register state saved by tm_reclaim
+	 *	  when we switch_to a process.
+	 *
+	 *	Call with IRQs off, stacks get all out of sync for
+	 *	some periods in here!
+	 */
+_GLOBAL(__tm_recheckpoint)
+	mfcr	r5
+	mflr	r0
+	stw	r5, 8(r1)
+	std	r0, 16(r1)
+	std	r2, STK_GOT(r1)
+	stdu	r1, -TM_FRAME_SIZE(r1)
+
+	/*
+	 * We've a struct pt_regs at [r1+STACK_INT_FRAME_REGS].
+	 * This is used for backing up the NVGPRs:
+	 */
+	SAVE_NVGPRS(r1)
+
+	/*
+	 * Save kernel live AMR since it will be clobbered for trechkpt
+	 * but can be used elsewhere later in kernel space.
+	 */
+	mfspr	r8, SPRN_AMR
+	std	r8, TM_FRAME_L0(r1)
+
+	/* Load complete register state from ts_ckpt* registers */
+
+	addi	r7, r3, PT_CKPT_REGS		/* Thread's ckpt_regs */
+
+	/*
+	 * Make r7 look like an exception frame so that we can use the neat
+	 * GPRx(n) macros. r7 is now NOT a pt_regs ptr!
+	 */
+	subi	r7, r7, STACK_INT_FRAME_REGS
+
+	/* We need to setup MSR for FP/VMX/VSX register save instructions. */
+	mfmsr	r6
+	mr	r5, r6
+	ori	r5, r5, MSR_FP
+#ifdef CONFIG_ALTIVEC
+	oris	r5, r5, MSR_VEC@h
+#endif
+#ifdef CONFIG_VSX
+	BEGIN_FTR_SECTION
+	oris	r5,r5, MSR_VSX@h
+	END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+	mtmsrd	r5
+
+#ifdef CONFIG_ALTIVEC
+	/*
+	 * FP and VEC registers: These are recheckpointed from
+	 * thread.ckfp_state and thread.ckvr_state respectively. The
+	 * thread.fp_state[] version holds the 'live' (transactional)
+	 * and will be loaded subsequently by any FPUnavailable trap.
+	 */
+	addi	r8, r3, THREAD_CKVRSTATE
+	li	r5, VRSTATE_VSCR
+	lvx	v0, r8, r5
+	mtvscr	v0
+	REST_32VRS(0, r5, r8)			/* r5 scratch, r8 ptr */
+	ld	r5, THREAD_CKVRSAVE(r3)
+	mtspr	SPRN_VRSAVE, r5
+#endif
+
+	addi	r8, r3, THREAD_CKFPSTATE
+	lfd	fr0, FPSTATE_FPSCR(r8)
+	MTFSF_L(fr0)
+	REST_32FPRS_VSRS(0, R4, R8)
+
+	mtmsr	r6				/* FP/Vec off again! */
+
+restore_gprs:
+
+	/* ****************** CTR, LR, XER ************* */
+	ld	r4, _CTR(r7)
+	ld	r5, _LINK(r7)
+	ld	r8, _XER(r7)
+
+	mtctr	r4
+	mtlr	r5
+	mtxer	r8
+
+	/* ******************** TAR ******************** */
+	ld	r4, THREAD_TM_TAR(r3)
+	mtspr	SPRN_TAR,	r4
+
+	/* ******************** AMR ******************** */
+	ld	r4, THREAD_TM_AMR(r3)
+	mtspr	SPRN_AMR, r4
+
+	/* Load up the PPR and DSCR in GPRs only at this stage */
+	ld	r5, THREAD_TM_DSCR(r3)
+	ld	r6, THREAD_TM_PPR(r3)
+
+	REST_GPR(0, r7)				/* GPR0 */
+	REST_GPRS(2, 4, r7)			/* GPR2-4 */
+	REST_GPRS(8, 12, r7)			/* GPR8-12 */
+	REST_GPRS(14, 31, r7)			/* GPR14-31 */
+
+	/* Load up PPR and DSCR here so we don't run with user values for long */
+	mtspr	SPRN_DSCR, r5
+	mtspr	SPRN_PPR, r6
+
+	/*
+	 * Do final sanity check on TEXASR to make sure FS is set. Do this
+	 * here before we load up the userspace r1 so any bugs we hit will get
+	 * a call chain.
+	 */
+	mfspr	r5, SPRN_TEXASR
+	srdi	r5, r5, 16
+	li	r6, (TEXASR_FS)@h
+	and	r6, r6, r5
+1:	tdeqi	r6, 0
+	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
+
+	/*
+	 * Do final sanity check on MSR to make sure we are not transactional
+	 * or suspended.
+	 */
+	mfmsr   r6
+	li	r5, (MSR_TS_MASK)@higher
+	srdi	r6, r6, 32
+	and	r6, r6, r5
+1:	tdnei   r6, 0
+	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
+
+	/* Restore CR */
+	ld	r6, _CCR(r7)
+	mtcr    r6
+
+	REST_GPR(6, r7)
+
+	/*
+	 * Store user r1 and r5 and r13 on the stack (in the unused save
+	 * areas / compiler reserved areas), so that we can access them after
+	 * we clear MSR RI.
+	 */
+
+	REST_GPR(5, r7)
+	std	r5, -8(r1)
+	ld	r5, GPR13(r7)
+	std	r5, -16(r1)
+	ld	r5, GPR1(r7)
+	std	r5, -24(r1)
+
+	REST_GPR(7, r7)
+
+	/* Stash the stack pointer away for use after recheckpoint */
+	std	r1, PACAR1(r13)
+
+	/* Clear MSR RI since we are about to clobber r13. EE is already off */
+	li	r5, 0
+	mtmsrd	r5, 1
+
+	/*
+	 * BE CAREFUL HERE:
+	 * At this point we can't take an SLB miss since we have MSR_RI
+	 * off. Load only to/from the stack/paca which are in SLB bolted regions
+	 * until we turn MSR RI back on.
+	 */
+
+	ld	r5, -8(r1)
+	ld	r13, -16(r1)
+	ld	r1, -24(r1)
+
+	/* Commit register state as checkpointed state: */
+	TRECHKPT
+
+	HMT_MEDIUM
+
+	/*
+	 * Our transactional state has now changed.
+	 *
+	 * Now just get out of here.  Transactional (current) state will be
+	 * updated once restore is called on the return path in the _switch-ed
+	 * -to process.
+	 */
+
+	GET_PACA(r13)
+	ld	r1, PACAR1(r13)
+
+	/* R13, R1 is restored, so we are recoverable again.  EE is still off */
+	li	r4, MSR_RI
+	mtmsrd	r4, 1
+
+	/* Restore kernel live AMR */
+	ld	r8, TM_FRAME_L0(r1)
+	mtspr	SPRN_AMR, r8
+
+	REST_NVGPRS(r1)
+
+	addi    r1, r1, TM_FRAME_SIZE
+	lwz	r4, 8(r1)
+	ld	r0, 16(r1)
+	mtcr	r4
+	mtlr	r0
+	ld	r2, STK_GOT(r1)
+
+	/* Load CPU's default DSCR */
+	ld	r0, PACA_DSCR_DEFAULT(r13)
+	mtspr	SPRN_DSCR, r0
+
+	blr
+
+	/* ****************************************************************** */
diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile
new file mode 100644
index 0000000000..125f4ca588
--- /dev/null
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the powerpc trace subsystem
+#
+
+ifdef CONFIG_FUNCTION_TRACER
+# do not trace tracer code
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_ftrace_64_pg.o = $(CC_FLAGS_FTRACE)
+endif
+
+obj32-$(CONFIG_FUNCTION_TRACER)		+= ftrace.o ftrace_entry.o
+ifdef CONFIG_MPROFILE_KERNEL
+obj64-$(CONFIG_FUNCTION_TRACER)		+= ftrace.o ftrace_entry.o
+else
+obj64-$(CONFIG_FUNCTION_TRACER)		+= ftrace_64_pg.o ftrace_64_pg_entry.o
+endif
+obj-$(CONFIG_TRACING)			+= trace_clock.o
+
+obj-$(CONFIG_PPC64)			+= $(obj64-y)
+obj-$(CONFIG_PPC32)			+= $(obj32-y)
+
+# Disable GCOV, KCOV & sanitizers in odd or sensitive code
+GCOV_PROFILE_ftrace.o := n
+KCOV_INSTRUMENT_ftrace.o := n
+KCSAN_SANITIZE_ftrace.o := n
+UBSAN_SANITIZE_ftrace.o := n
+GCOV_PROFILE_ftrace_64_pg.o := n
+KCOV_INSTRUMENT_ftrace_64_pg.o := n
+KCSAN_SANITIZE_ftrace_64_pg.o := n
+UBSAN_SANITIZE_ftrace_64_pg.o := n
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
new file mode 100644
index 0000000000..82010629cf
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Code for replacing ftrace calls with jumps.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box.
+ *
+ * Added function graph tracer code, taken from x86 that was written
+ * by Frederic Weisbecker, and ported to PPC by Steven Rostedt.
+ *
+ */
+
+#define pr_fmt(fmt) "ftrace-powerpc: " fmt
+
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/cacheflush.h>
+#include <asm/code-patching.h>
+#include <asm/ftrace.h>
+#include <asm/syscall.h>
+#include <asm/inst.h>
+
+#define	NUM_FTRACE_TRAMPS	2
+static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
+
+static ppc_inst_t ftrace_create_branch_inst(unsigned long ip, unsigned long addr, int link)
+{
+	ppc_inst_t op;
+
+	WARN_ON(!is_offset_in_branch_range(addr - ip));
+	create_branch(&op, (u32 *)ip, addr, link ? BRANCH_SET_LINK : 0);
+
+	return op;
+}
+
+static inline int ftrace_read_inst(unsigned long ip, ppc_inst_t *op)
+{
+	if (copy_inst_from_kernel_nofault(op, (void *)ip)) {
+		pr_err("0x%lx: fetching instruction failed\n", ip);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static inline int ftrace_validate_inst(unsigned long ip, ppc_inst_t inst)
+{
+	ppc_inst_t op;
+	int ret;
+
+	ret = ftrace_read_inst(ip, &op);
+	if (!ret && !ppc_inst_equal(op, inst)) {
+		pr_err("0x%lx: expected (%08lx) != found (%08lx)\n",
+		       ip, ppc_inst_as_ulong(inst), ppc_inst_as_ulong(op));
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static inline int ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new)
+{
+	int ret = ftrace_validate_inst(ip, old);
+
+	if (!ret)
+		ret = patch_instruction((u32 *)ip, new);
+
+	return ret;
+}
+
+static int is_bl_op(ppc_inst_t op)
+{
+	return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0);
+}
+
+static unsigned long find_ftrace_tramp(unsigned long ip)
+{
+	int i;
+
+	for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+		if (!ftrace_tramps[i])
+			continue;
+		else if (is_offset_in_branch_range(ftrace_tramps[i] - ip))
+			return ftrace_tramps[i];
+
+	return 0;
+}
+
+static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_inst_t *call_inst)
+{
+	unsigned long ip = rec->ip;
+	unsigned long stub;
+
+	if (is_offset_in_branch_range(addr - ip)) {
+		/* Within range */
+		stub = addr;
+#ifdef CONFIG_MODULES
+	} else if (rec->arch.mod) {
+		/* Module code would be going to one of the module stubs */
+		stub = (addr == (unsigned long)ftrace_caller ? rec->arch.mod->arch.tramp :
+							       rec->arch.mod->arch.tramp_regs);
+#endif
+	} else if (core_kernel_text(ip)) {
+		/* We would be branching to one of our ftrace stubs */
+		stub = find_ftrace_tramp(ip);
+		if (!stub) {
+			pr_err("0x%lx: No ftrace stubs reachable\n", ip);
+			return -EINVAL;
+		}
+	} else {
+		return -EINVAL;
+	}
+
+	*call_inst = ftrace_create_branch_inst(ip, stub, 1);
+	return 0;
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr)
+{
+	/* This should never be called since we override ftrace_replace_code() */
+	WARN_ON(1);
+	return -EINVAL;
+}
+#endif
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	ppc_inst_t old, new;
+	int ret;
+
+	/* This can only ever be called during module load */
+	if (WARN_ON(!IS_ENABLED(CONFIG_MODULES) || core_kernel_text(rec->ip)))
+		return -EINVAL;
+
+	old = ppc_inst(PPC_RAW_NOP());
+	ret = ftrace_get_call_inst(rec, addr, &new);
+	if (ret)
+		return ret;
+
+	return ftrace_modify_code(rec->ip, old, new);
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
+{
+	/*
+	 * This should never be called since we override ftrace_replace_code(),
+	 * as well as ftrace_init_nop()
+	 */
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+void ftrace_replace_code(int enable)
+{
+	ppc_inst_t old, new, call_inst, new_call_inst;
+	ppc_inst_t nop_inst = ppc_inst(PPC_RAW_NOP());
+	unsigned long ip, new_addr, addr;
+	struct ftrace_rec_iter *iter;
+	struct dyn_ftrace *rec;
+	int ret = 0, update;
+
+	for_ftrace_rec_iter(iter) {
+		rec = ftrace_rec_iter_record(iter);
+		ip = rec->ip;
+
+		if (rec->flags & FTRACE_FL_DISABLED && !(rec->flags & FTRACE_FL_ENABLED))
+			continue;
+
+		addr = ftrace_get_addr_curr(rec);
+		new_addr = ftrace_get_addr_new(rec);
+		update = ftrace_update_record(rec, enable);
+
+		switch (update) {
+		case FTRACE_UPDATE_IGNORE:
+		default:
+			continue;
+		case FTRACE_UPDATE_MODIFY_CALL:
+			ret = ftrace_get_call_inst(rec, new_addr, &new_call_inst);
+			ret |= ftrace_get_call_inst(rec, addr, &call_inst);
+			old = call_inst;
+			new = new_call_inst;
+			break;
+		case FTRACE_UPDATE_MAKE_NOP:
+			ret = ftrace_get_call_inst(rec, addr, &call_inst);
+			old = call_inst;
+			new = nop_inst;
+			break;
+		case FTRACE_UPDATE_MAKE_CALL:
+			ret = ftrace_get_call_inst(rec, new_addr, &call_inst);
+			old = nop_inst;
+			new = call_inst;
+			break;
+		}
+
+		if (!ret)
+			ret = ftrace_modify_code(ip, old, new);
+		if (ret)
+			goto out;
+	}
+
+out:
+	if (ret)
+		ftrace_bug(ret, rec);
+	return;
+}
+
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+{
+	unsigned long addr, ip = rec->ip;
+	ppc_inst_t old, new;
+	int ret = 0;
+
+	/* Verify instructions surrounding the ftrace location */
+	if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) {
+		/* Expect nops */
+		ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_NOP()));
+		if (!ret)
+			ret = ftrace_validate_inst(ip, ppc_inst(PPC_RAW_NOP()));
+	} else if (IS_ENABLED(CONFIG_PPC32)) {
+		/* Expected sequence: 'mflr r0', 'stw r0,4(r1)', 'bl _mcount' */
+		ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0)));
+		if (!ret)
+			ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 4)));
+	} else if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) {
+		/* Expected sequence: 'mflr r0', ['std r0,16(r1)'], 'bl _mcount' */
+		ret = ftrace_read_inst(ip - 4, &old);
+		if (!ret && !ppc_inst_equal(old, ppc_inst(PPC_RAW_MFLR(_R0)))) {
+			ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0)));
+			ret |= ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STD(_R0, _R1, 16)));
+		}
+	} else {
+		return -EINVAL;
+	}
+
+	if (ret)
+		return ret;
+
+	if (!core_kernel_text(ip)) {
+		if (!mod) {
+			pr_err("0x%lx: No module provided for non-kernel address\n", ip);
+			return -EFAULT;
+		}
+		rec->arch.mod = mod;
+	}
+
+	/* Nop-out the ftrace location */
+	new = ppc_inst(PPC_RAW_NOP());
+	addr = MCOUNT_ADDR;
+	if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) {
+		/* we instead patch-in the 'mflr r0' */
+		old = ppc_inst(PPC_RAW_NOP());
+		new = ppc_inst(PPC_RAW_MFLR(_R0));
+		ret = ftrace_modify_code(ip - 4, old, new);
+	} else if (is_offset_in_branch_range(addr - ip)) {
+		/* Within range */
+		old = ftrace_create_branch_inst(ip, addr, 1);
+		ret = ftrace_modify_code(ip, old, new);
+	} else if (core_kernel_text(ip) || (IS_ENABLED(CONFIG_MODULES) && mod)) {
+		/*
+		 * We would be branching to a linker-generated stub, or to the module _mcount
+		 * stub. Let's just confirm we have a 'bl' here.
+		 */
+		ret = ftrace_read_inst(ip, &old);
+		if (ret)
+			return ret;
+		if (!is_bl_op(old)) {
+			pr_err("0x%lx: expected (bl) != found (%08lx)\n", ip, ppc_inst_as_ulong(old));
+			return -EINVAL;
+		}
+		ret = patch_instruction((u32 *)ip, new);
+	} else {
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long ip = (unsigned long)(&ftrace_call);
+	ppc_inst_t old, new;
+	int ret;
+
+	old = ppc_inst_read((u32 *)&ftrace_call);
+	new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1);
+	ret = ftrace_modify_code(ip, old, new);
+
+	/* Also update the regs callback function */
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !ret) {
+		ip = (unsigned long)(&ftrace_regs_call);
+		old = ppc_inst_read((u32 *)&ftrace_regs_call);
+		new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1);
+		ret = ftrace_modify_code(ip, old, new);
+	}
+
+	return ret;
+}
+
+/*
+ * Use the default ftrace_modify_all_code, but without
+ * stop_machine().
+ */
+void arch_ftrace_update_code(int command)
+{
+	ftrace_modify_all_code(command);
+}
+
+void ftrace_free_init_tramp(void)
+{
+	int i;
+
+	for (i = 0; i < NUM_FTRACE_TRAMPS && ftrace_tramps[i]; i++)
+		if (ftrace_tramps[i] == (unsigned long)ftrace_tramp_init) {
+			ftrace_tramps[i] = 0;
+			return;
+		}
+}
+
+static void __init add_ftrace_tramp(unsigned long tramp)
+{
+	int i;
+
+	for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+		if (!ftrace_tramps[i]) {
+			ftrace_tramps[i] = tramp;
+			return;
+		}
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+	unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init };
+	unsigned long addr = FTRACE_REGS_ADDR;
+	long reladdr;
+	int i;
+	u32 stub_insns[] = {
+#ifdef CONFIG_PPC_KERNEL_PCREL
+		/* pla r12,addr */
+		PPC_PREFIX_MLS | __PPC_PRFX_R(1),
+		PPC_INST_PADDI | ___PPC_RT(_R12),
+		PPC_RAW_MTCTR(_R12),
+		PPC_RAW_BCTR()
+#elif defined(CONFIG_PPC64)
+		PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)),
+		PPC_RAW_ADDIS(_R12, _R12, 0),
+		PPC_RAW_ADDI(_R12, _R12, 0),
+		PPC_RAW_MTCTR(_R12),
+		PPC_RAW_BCTR()
+#else
+		PPC_RAW_LIS(_R12, 0),
+		PPC_RAW_ADDI(_R12, _R12, 0),
+		PPC_RAW_MTCTR(_R12),
+		PPC_RAW_BCTR()
+#endif
+	};
+
+	if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+		for (i = 0; i < 2; i++) {
+			reladdr = addr - (unsigned long)tramp[i];
+
+			if (reladdr >= (long)SZ_8G || reladdr < -(long)SZ_8G) {
+				pr_err("Address of %ps out of range of pcrel address.\n",
+					(void *)addr);
+				return -1;
+			}
+
+			memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+			tramp[i][0] |= IMM_H18(reladdr);
+			tramp[i][1] |= IMM_L(reladdr);
+			add_ftrace_tramp((unsigned long)tramp[i]);
+		}
+	} else if (IS_ENABLED(CONFIG_PPC64)) {
+		reladdr = addr - kernel_toc_addr();
+
+		if (reladdr >= (long)SZ_2G || reladdr < -(long long)SZ_2G) {
+			pr_err("Address of %ps out of range of kernel_toc.\n",
+				(void *)addr);
+			return -1;
+		}
+
+		for (i = 0; i < 2; i++) {
+			memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+			tramp[i][1] |= PPC_HA(reladdr);
+			tramp[i][2] |= PPC_LO(reladdr);
+			add_ftrace_tramp((unsigned long)tramp[i]);
+		}
+	} else {
+		for (i = 0; i < 2; i++) {
+			memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+			tramp[i][0] |= PPC_HA(addr);
+			tramp[i][1] |= PPC_LO(addr);
+			add_ftrace_tramp((unsigned long)tramp[i]);
+		}
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+		       struct ftrace_ops *op, struct ftrace_regs *fregs)
+{
+	unsigned long sp = fregs->regs.gpr[1];
+	int bit;
+
+	if (unlikely(ftrace_graph_is_dead()))
+		goto out;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		goto out;
+
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
+	if (bit < 0)
+		goto out;
+
+	if (!function_graph_enter(parent_ip, ip, 0, (unsigned long *)sp))
+		parent_ip = ppc_function_entry(return_to_handler);
+
+	ftrace_test_recursion_unlock(bit);
+out:
+	fregs->regs.link = parent_ip;
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c
new file mode 100644
index 0000000000..7b85c3b460
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c
@@ -0,0 +1,846 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Code for replacing ftrace calls with jumps.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box.
+ *
+ * Added function graph tracer code, taken from x86 that was written
+ * by Frederic Weisbecker, and ported to PPC by Steven Rostedt.
+ *
+ */
+
+#define pr_fmt(fmt) "ftrace-powerpc: " fmt
+
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/cacheflush.h>
+#include <asm/code-patching.h>
+#include <asm/ftrace.h>
+#include <asm/syscall.h>
+#include <asm/inst.h>
+
+/*
+ * We generally only have a single long_branch tramp and at most 2 or 3 plt
+ * tramps generated. But, we don't use the plt tramps currently. We also allot
+ * 2 tramps after .text and .init.text. So, we only end up with around 3 usable
+ * tramps in total. Set aside 8 just to be sure.
+ */
+#define	NUM_FTRACE_TRAMPS	8
+static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
+
+static ppc_inst_t
+ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
+{
+	ppc_inst_t op;
+
+	addr = ppc_function_entry((void *)addr);
+
+	/* if (link) set op to 'bl' else 'b' */
+	create_branch(&op, (u32 *)ip, addr, link ? BRANCH_SET_LINK : 0);
+
+	return op;
+}
+
+static inline int
+ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new)
+{
+	ppc_inst_t replaced;
+
+	/*
+	 * Note:
+	 * We are paranoid about modifying text, as if a bug was to happen, it
+	 * could cause us to read or write to someplace that could cause harm.
+	 * Carefully read and modify the code with probe_kernel_*(), and make
+	 * sure what we read is what we expected it to be before modifying it.
+	 */
+
+	/* read the text we want to modify */
+	if (copy_inst_from_kernel_nofault(&replaced, (void *)ip))
+		return -EFAULT;
+
+	/* Make sure it is what we expect it to be */
+	if (!ppc_inst_equal(replaced, old)) {
+		pr_err("%p: replaced (%08lx) != old (%08lx)", (void *)ip,
+		       ppc_inst_as_ulong(replaced), ppc_inst_as_ulong(old));
+		return -EINVAL;
+	}
+
+	/* replace the text with the new text */
+	return patch_instruction((u32 *)ip, new);
+}
+
+/*
+ * Helper functions that are the same for both PPC64 and PPC32.
+ */
+static int test_24bit_addr(unsigned long ip, unsigned long addr)
+{
+	addr = ppc_function_entry((void *)addr);
+
+	return is_offset_in_branch_range(addr - ip);
+}
+
+static int is_bl_op(ppc_inst_t op)
+{
+	return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0);
+}
+
+static int is_b_op(ppc_inst_t op)
+{
+	return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BRANCH(0);
+}
+
+static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op)
+{
+	int offset;
+
+	offset = PPC_LI(ppc_inst_val(op));
+	/* make it signed */
+	if (offset & 0x02000000)
+		offset |= 0xfe000000;
+
+	return ip + (long)offset;
+}
+
+#ifdef CONFIG_MODULES
+static int
+__ftrace_make_nop(struct module *mod,
+		  struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long entry, ptr, tramp;
+	unsigned long ip = rec->ip;
+	ppc_inst_t op, pop;
+
+	/* read where this goes */
+	if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
+		pr_err("Fetching opcode failed.\n");
+		return -EFAULT;
+	}
+
+	/* Make sure that this is still a 24bit jump */
+	if (!is_bl_op(op)) {
+		pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
+		return -EINVAL;
+	}
+
+	/* lets find where the pointer goes */
+	tramp = find_bl_target(ip, op);
+
+	pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+	if (module_trampoline_target(mod, tramp, &ptr)) {
+		pr_err("Failed to get trampoline target\n");
+		return -EFAULT;
+	}
+
+	pr_devel("trampoline target %lx", ptr);
+
+	entry = ppc_global_function_entry((void *)addr);
+	/* This should match what was called */
+	if (ptr != entry) {
+		pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+		return -EINVAL;
+	}
+
+	if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) {
+		if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) {
+			pr_err("Fetching instruction at %lx failed.\n", ip - 4);
+			return -EFAULT;
+		}
+
+		/* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
+		if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) &&
+		    !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) {
+			pr_err("Unexpected instruction %08lx around bl _mcount\n",
+			       ppc_inst_as_ulong(op));
+			return -EINVAL;
+		}
+	} else if (IS_ENABLED(CONFIG_PPC64)) {
+		/*
+		 * Check what is in the next instruction. We can see ld r2,40(r1), but
+		 * on first pass after boot we will see mflr r0.
+		 */
+		if (copy_inst_from_kernel_nofault(&op, (void *)(ip + 4))) {
+			pr_err("Fetching op failed.\n");
+			return -EFAULT;
+		}
+
+		if (!ppc_inst_equal(op,  ppc_inst(PPC_INST_LD_TOC))) {
+			pr_err("Expected %08lx found %08lx\n", PPC_INST_LD_TOC,
+			       ppc_inst_as_ulong(op));
+			return -EINVAL;
+		}
+	}
+
+	/*
+	 * When using -mprofile-kernel or PPC32 there is no load to jump over.
+	 *
+	 * Otherwise our original call site looks like:
+	 *
+	 * bl <tramp>
+	 * ld r2,XX(r1)
+	 *
+	 * Milton Miller pointed out that we can not simply nop the branch.
+	 * If a task was preempted when calling a trace function, the nops
+	 * will remove the way to restore the TOC in r2 and the r2 TOC will
+	 * get corrupted.
+	 *
+	 * Use a b +8 to jump over the load.
+	 */
+	if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32))
+		pop = ppc_inst(PPC_RAW_NOP());
+	else
+		pop = ppc_inst(PPC_RAW_BRANCH(8));	/* b +8 */
+
+	if (patch_instruction((u32 *)ip, pop)) {
+		pr_err("Patching NOP failed.\n");
+		return -EPERM;
+	}
+
+	return 0;
+}
+#else
+static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
+{
+	return 0;
+}
+#endif /* CONFIG_MODULES */
+
+static unsigned long find_ftrace_tramp(unsigned long ip)
+{
+	int i;
+
+	/*
+	 * We have the compiler generated long_branch tramps at the end
+	 * and we prefer those
+	 */
+	for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--)
+		if (!ftrace_tramps[i])
+			continue;
+		else if (is_offset_in_branch_range(ftrace_tramps[i] - ip))
+			return ftrace_tramps[i];
+
+	return 0;
+}
+
+static int add_ftrace_tramp(unsigned long tramp)
+{
+	int i;
+
+	for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+		if (!ftrace_tramps[i]) {
+			ftrace_tramps[i] = tramp;
+			return 0;
+		}
+
+	return -1;
+}
+
+/*
+ * If this is a compiler generated long_branch trampoline (essentially, a
+ * trampoline that has a branch to _mcount()), we re-write the branch to
+ * instead go to ftrace_[regs_]caller() and note down the location of this
+ * trampoline.
+ */
+static int setup_mcount_compiler_tramp(unsigned long tramp)
+{
+	int i;
+	ppc_inst_t op;
+	unsigned long ptr;
+
+	/* Is this a known long jump tramp? */
+	for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+		if (ftrace_tramps[i] == tramp)
+			return 0;
+
+	/* New trampoline -- read where this goes */
+	if (copy_inst_from_kernel_nofault(&op, (void *)tramp)) {
+		pr_debug("Fetching opcode failed.\n");
+		return -1;
+	}
+
+	/* Is this a 24 bit branch? */
+	if (!is_b_op(op)) {
+		pr_debug("Trampoline is not a long branch tramp.\n");
+		return -1;
+	}
+
+	/* lets find where the pointer goes */
+	ptr = find_bl_target(tramp, op);
+
+	if (ptr != ppc_global_function_entry((void *)_mcount)) {
+		pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr);
+		return -1;
+	}
+
+	/* Let's re-write the tramp to go to ftrace_[regs_]caller */
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+		ptr = ppc_global_function_entry((void *)ftrace_regs_caller);
+	else
+		ptr = ppc_global_function_entry((void *)ftrace_caller);
+
+	if (patch_branch((u32 *)tramp, ptr, 0)) {
+		pr_debug("REL24 out of range!\n");
+		return -1;
+	}
+
+	if (add_ftrace_tramp(tramp)) {
+		pr_debug("No tramp locations left\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long tramp, ip = rec->ip;
+	ppc_inst_t op;
+
+	/* Read where this goes */
+	if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
+		pr_err("Fetching opcode failed.\n");
+		return -EFAULT;
+	}
+
+	/* Make sure that this is still a 24bit jump */
+	if (!is_bl_op(op)) {
+		pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
+		return -EINVAL;
+	}
+
+	/* Let's find where the pointer goes */
+	tramp = find_bl_target(ip, op);
+
+	pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+	if (setup_mcount_compiler_tramp(tramp)) {
+		/* Are other trampolines reachable? */
+		if (!find_ftrace_tramp(ip)) {
+			pr_err("No ftrace trampolines reachable from %ps\n",
+					(void *)ip);
+			return -EINVAL;
+		}
+	}
+
+	if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) {
+		pr_err("Patching NOP failed.\n");
+		return -EPERM;
+	}
+
+	return 0;
+}
+
+int ftrace_make_nop(struct module *mod,
+		    struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	ppc_inst_t old, new;
+
+	/*
+	 * If the calling address is more that 24 bits away,
+	 * then we had to use a trampoline to make the call.
+	 * Otherwise just update the call site.
+	 */
+	if (test_24bit_addr(ip, addr)) {
+		/* within range */
+		old = ftrace_call_replace(ip, addr, 1);
+		new = ppc_inst(PPC_RAW_NOP());
+		return ftrace_modify_code(ip, old, new);
+	} else if (core_kernel_text(ip)) {
+		return __ftrace_make_nop_kernel(rec, addr);
+	} else if (!IS_ENABLED(CONFIG_MODULES)) {
+		return -EINVAL;
+	}
+
+	/*
+	 * Out of range jumps are called from modules.
+	 * We should either already have a pointer to the module
+	 * or it has been passed in.
+	 */
+	if (!rec->arch.mod) {
+		if (!mod) {
+			pr_err("No module loaded addr=%lx\n", addr);
+			return -EFAULT;
+		}
+		rec->arch.mod = mod;
+	} else if (mod) {
+		if (mod != rec->arch.mod) {
+			pr_err("Record mod %p not equal to passed in mod %p\n",
+			       rec->arch.mod, mod);
+			return -EINVAL;
+		}
+		/* nothing to do if mod == rec->arch.mod */
+	} else
+		mod = rec->arch.mod;
+
+	return __ftrace_make_nop(mod, rec, addr);
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * Examine the existing instructions for __ftrace_make_call.
+ * They should effectively be a NOP, and follow formal constraints,
+ * depending on the ABI. Return false if they don't.
+ */
+static bool expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)
+{
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+		return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()));
+	else
+		return ppc_inst_equal(op0, ppc_inst(PPC_RAW_BRANCH(8))) &&
+		       ppc_inst_equal(op1, ppc_inst(PPC_INST_LD_TOC));
+}
+
+static int
+__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	ppc_inst_t op[2];
+	void *ip = (void *)rec->ip;
+	unsigned long entry, ptr, tramp;
+	struct module *mod = rec->arch.mod;
+
+	/* read where this goes */
+	if (copy_inst_from_kernel_nofault(op, ip))
+		return -EFAULT;
+
+	if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) &&
+	    copy_inst_from_kernel_nofault(op + 1, ip + 4))
+		return -EFAULT;
+
+	if (!expected_nop_sequence(ip, op[0], op[1])) {
+		pr_err("Unexpected call sequence at %p: %08lx %08lx\n", ip,
+		       ppc_inst_as_ulong(op[0]), ppc_inst_as_ulong(op[1]));
+		return -EINVAL;
+	}
+
+	/* If we never set up ftrace trampoline(s), then bail */
+	if (!mod->arch.tramp ||
+	    (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !mod->arch.tramp_regs)) {
+		pr_err("No ftrace trampoline\n");
+		return -EINVAL;
+	}
+
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && rec->flags & FTRACE_FL_REGS)
+		tramp = mod->arch.tramp_regs;
+	else
+		tramp = mod->arch.tramp;
+
+	if (module_trampoline_target(mod, tramp, &ptr)) {
+		pr_err("Failed to get trampoline target\n");
+		return -EFAULT;
+	}
+
+	pr_devel("trampoline target %lx", ptr);
+
+	entry = ppc_global_function_entry((void *)addr);
+	/* This should match what was called */
+	if (ptr != entry) {
+		pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+		return -EINVAL;
+	}
+
+	if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
+		pr_err("REL24 out of range!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#else
+static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	return 0;
+}
+#endif /* CONFIG_MODULES */
+
+static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+	ppc_inst_t op;
+	void *ip = (void *)rec->ip;
+	unsigned long tramp, entry, ptr;
+
+	/* Make sure we're being asked to patch branch to a known ftrace addr */
+	entry = ppc_global_function_entry((void *)ftrace_caller);
+	ptr = ppc_global_function_entry((void *)addr);
+
+	if (ptr != entry && IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+		entry = ppc_global_function_entry((void *)ftrace_regs_caller);
+
+	if (ptr != entry) {
+		pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr);
+		return -EINVAL;
+	}
+
+	/* Make sure we have a nop */
+	if (copy_inst_from_kernel_nofault(&op, ip)) {
+		pr_err("Unable to read ftrace location %p\n", ip);
+		return -EFAULT;
+	}
+
+	if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) {
+		pr_err("Unexpected call sequence at %p: %08lx\n",
+		       ip, ppc_inst_as_ulong(op));
+		return -EINVAL;
+	}
+
+	tramp = find_ftrace_tramp((unsigned long)ip);
+	if (!tramp) {
+		pr_err("No ftrace trampolines reachable from %ps\n", ip);
+		return -EINVAL;
+	}
+
+	if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
+		pr_err("Error patching branch to ftrace tramp!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	ppc_inst_t old, new;
+
+	/*
+	 * If the calling address is more that 24 bits away,
+	 * then we had to use a trampoline to make the call.
+	 * Otherwise just update the call site.
+	 */
+	if (test_24bit_addr(ip, addr)) {
+		/* within range */
+		old = ppc_inst(PPC_RAW_NOP());
+		new = ftrace_call_replace(ip, addr, 1);
+		return ftrace_modify_code(ip, old, new);
+	} else if (core_kernel_text(ip)) {
+		return __ftrace_make_call_kernel(rec, addr);
+	} else if (!IS_ENABLED(CONFIG_MODULES)) {
+		/* We should not get here without modules */
+		return -EINVAL;
+	}
+
+	/*
+	 * Out of range jumps are called from modules.
+	 * Being that we are converting from nop, it had better
+	 * already have a module defined.
+	 */
+	if (!rec->arch.mod) {
+		pr_err("No module loaded\n");
+		return -EINVAL;
+	}
+
+	return __ftrace_make_call(rec, addr);
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#ifdef CONFIG_MODULES
+static int
+__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+					unsigned long addr)
+{
+	ppc_inst_t op;
+	unsigned long ip = rec->ip;
+	unsigned long entry, ptr, tramp;
+	struct module *mod = rec->arch.mod;
+
+	/* If we never set up ftrace trampolines, then bail */
+	if (!mod->arch.tramp || !mod->arch.tramp_regs) {
+		pr_err("No ftrace trampoline\n");
+		return -EINVAL;
+	}
+
+	/* read where this goes */
+	if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
+		pr_err("Fetching opcode failed.\n");
+		return -EFAULT;
+	}
+
+	/* Make sure that this is still a 24bit jump */
+	if (!is_bl_op(op)) {
+		pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
+		return -EINVAL;
+	}
+
+	/* lets find where the pointer goes */
+	tramp = find_bl_target(ip, op);
+	entry = ppc_global_function_entry((void *)old_addr);
+
+	pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+	if (tramp != entry) {
+		/* old_addr is not within range, so we must have used a trampoline */
+		if (module_trampoline_target(mod, tramp, &ptr)) {
+			pr_err("Failed to get trampoline target\n");
+			return -EFAULT;
+		}
+
+		pr_devel("trampoline target %lx", ptr);
+
+		/* This should match what was called */
+		if (ptr != entry) {
+			pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+			return -EINVAL;
+		}
+	}
+
+	/* The new target may be within range */
+	if (test_24bit_addr(ip, addr)) {
+		/* within range */
+		if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) {
+			pr_err("REL24 out of range!\n");
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	if (rec->flags & FTRACE_FL_REGS)
+		tramp = mod->arch.tramp_regs;
+	else
+		tramp = mod->arch.tramp;
+
+	if (module_trampoline_target(mod, tramp, &ptr)) {
+		pr_err("Failed to get trampoline target\n");
+		return -EFAULT;
+	}
+
+	pr_devel("trampoline target %lx", ptr);
+
+	entry = ppc_global_function_entry((void *)addr);
+	/* This should match what was called */
+	if (ptr != entry) {
+		pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+		return -EINVAL;
+	}
+
+	if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) {
+		pr_err("REL24 out of range!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#else
+static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr)
+{
+	return 0;
+}
+#endif
+
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+			unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	ppc_inst_t old, new;
+
+	/*
+	 * If the calling address is more that 24 bits away,
+	 * then we had to use a trampoline to make the call.
+	 * Otherwise just update the call site.
+	 */
+	if (test_24bit_addr(ip, addr) && test_24bit_addr(ip, old_addr)) {
+		/* within range */
+		old = ftrace_call_replace(ip, old_addr, 1);
+		new = ftrace_call_replace(ip, addr, 1);
+		return ftrace_modify_code(ip, old, new);
+	} else if (core_kernel_text(ip)) {
+		/*
+		 * We always patch out of range locations to go to the regs
+		 * variant, so there is nothing to do here
+		 */
+		return 0;
+	} else if (!IS_ENABLED(CONFIG_MODULES)) {
+		/* We should not get here without modules */
+		return -EINVAL;
+	}
+
+	/*
+	 * Out of range jumps are called from modules.
+	 */
+	if (!rec->arch.mod) {
+		pr_err("No module loaded\n");
+		return -EINVAL;
+	}
+
+	return __ftrace_modify_call(rec, old_addr, addr);
+}
+#endif
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long ip = (unsigned long)(&ftrace_call);
+	ppc_inst_t old, new;
+	int ret;
+
+	old = ppc_inst_read((u32 *)&ftrace_call);
+	new = ftrace_call_replace(ip, (unsigned long)func, 1);
+	ret = ftrace_modify_code(ip, old, new);
+
+	/* Also update the regs callback function */
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !ret) {
+		ip = (unsigned long)(&ftrace_regs_call);
+		old = ppc_inst_read((u32 *)&ftrace_regs_call);
+		new = ftrace_call_replace(ip, (unsigned long)func, 1);
+		ret = ftrace_modify_code(ip, old, new);
+	}
+
+	return ret;
+}
+
+/*
+ * Use the default ftrace_modify_all_code, but without
+ * stop_machine().
+ */
+void arch_ftrace_update_code(int command)
+{
+	ftrace_modify_all_code(command);
+}
+
+#ifdef CONFIG_PPC64
+#define PACATOC offsetof(struct paca_struct, kernel_toc)
+
+extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
+
+void ftrace_free_init_tramp(void)
+{
+	int i;
+
+	for (i = 0; i < NUM_FTRACE_TRAMPS && ftrace_tramps[i]; i++)
+		if (ftrace_tramps[i] == (unsigned long)ftrace_tramp_init) {
+			ftrace_tramps[i] = 0;
+			return;
+		}
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+	int i;
+	unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init };
+	u32 stub_insns[] = {
+		PPC_RAW_LD(_R12, _R13, PACATOC),
+		PPC_RAW_ADDIS(_R12, _R12, 0),
+		PPC_RAW_ADDI(_R12, _R12, 0),
+		PPC_RAW_MTCTR(_R12),
+		PPC_RAW_BCTR()
+	};
+	unsigned long addr;
+	long reladdr;
+
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+		addr = ppc_global_function_entry((void *)ftrace_regs_caller);
+	else
+		addr = ppc_global_function_entry((void *)ftrace_caller);
+
+	reladdr = addr - kernel_toc_addr();
+
+	if (reladdr >= SZ_2G || reladdr < -(long)SZ_2G) {
+		pr_err("Address of %ps out of range of kernel_toc.\n",
+				(void *)addr);
+		return -1;
+	}
+
+	for (i = 0; i < 2; i++) {
+		memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+		tramp[i][1] |= PPC_HA(reladdr);
+		tramp[i][2] |= PPC_LO(reladdr);
+		add_ftrace_tramp((unsigned long)tramp[i]);
+	}
+
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+extern void ftrace_graph_call(void);
+extern void ftrace_graph_stub(void);
+
+static int ftrace_modify_ftrace_graph_caller(bool enable)
+{
+	unsigned long ip = (unsigned long)(&ftrace_graph_call);
+	unsigned long addr = (unsigned long)(&ftrace_graph_caller);
+	unsigned long stub = (unsigned long)(&ftrace_graph_stub);
+	ppc_inst_t old, new;
+
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS))
+		return 0;
+
+	old = ftrace_call_replace(ip, enable ? stub : addr, 0);
+	new = ftrace_call_replace(ip, enable ? addr : stub, 0);
+
+	return ftrace_modify_code(ip, old, new);
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_ftrace_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_ftrace_graph_caller(false);
+}
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info. Return the address we want to divert to.
+ */
+static unsigned long
+__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp)
+{
+	unsigned long return_hooker;
+	int bit;
+
+	if (unlikely(ftrace_graph_is_dead()))
+		goto out;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		goto out;
+
+	bit = ftrace_test_recursion_trylock(ip, parent);
+	if (bit < 0)
+		goto out;
+
+	return_hooker = ppc_function_entry(return_to_handler);
+
+	if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
+		parent = return_hooker;
+
+	ftrace_test_recursion_unlock(bit);
+out:
+	return parent;
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+		       struct ftrace_ops *op, struct ftrace_regs *fregs)
+{
+	fregs->regs.link = __prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]);
+}
+#else
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
+				    unsigned long sp)
+{
+	return __prepare_ftrace_return(parent, ip, sp);
+}
+#endif
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+char *arch_ftrace_match_adjust(char *str, const char *search)
+{
+	if (str[0] == '.' && search[0] != '.')
+		return str + 1;
+	else
+		return str;
+}
+#endif /* CONFIG_PPC64_ELF_ABI_V1 */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S b/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S
new file mode 100644
index 0000000000..a8a7f28404
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Split from ftrace_64.S
+ */
+
+#include <linux/export.h>
+#include <linux/magic.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/ppc-opcode.h>
+
+_GLOBAL_TOC(ftrace_caller)
+	lbz	r3, PACA_FTRACE_ENABLED(r13)
+	cmpdi	r3, 0
+	beqlr
+
+	/* Taken from output of objdump from lib64/glibc */
+	mflr	r3
+	ld	r11, 0(r1)
+	stdu	r1, -112(r1)
+	std	r3, 128(r1)
+	ld	r4, 16(r11)
+	subi	r3, r3, MCOUNT_INSN_SIZE
+.globl ftrace_call
+ftrace_call:
+	bl	ftrace_stub
+	nop
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+	b	ftrace_graph_stub
+_GLOBAL(ftrace_graph_stub)
+#endif
+	ld	r0, 128(r1)
+	mtlr	r0
+	addi	r1, r1, 112
+
+_GLOBAL(ftrace_stub)
+	blr
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(ftrace_graph_caller)
+	addi	r5, r1, 112
+	/* load r4 with local address */
+	ld	r4, 128(r1)
+	subi	r4, r4, MCOUNT_INSN_SIZE
+
+	/* Grab the LR out of the caller stack frame */
+	ld	r11, 112(r1)
+	ld	r3, 16(r11)
+
+	bl	prepare_ftrace_return
+	nop
+
+	/*
+	 * prepare_ftrace_return gives us the address we divert to.
+	 * Change the LR in the callers stack frame to this.
+	 */
+	ld	r11, 112(r1)
+	std	r3, 16(r11)
+
+	ld	r0, 128(r1)
+	mtlr	r0
+	addi	r1, r1, 112
+	blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+.pushsection ".tramp.ftrace.text","aw",@progbits;
+.globl ftrace_tramp_text
+ftrace_tramp_text:
+	.space 32
+.popsection
+
+.pushsection ".tramp.ftrace.init","aw",@progbits;
+.globl ftrace_tramp_init
+ftrace_tramp_init:
+	.space 32
+.popsection
+
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+EXPORT_SYMBOL(_mcount)
+	mflr	r12
+	mtctr	r12
+	mtlr	r0
+	bctr
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(return_to_handler)
+	/* need to save return values */
+#ifdef CONFIG_PPC64
+	std	r4,  -32(r1)
+	std	r3,  -24(r1)
+	/* save TOC */
+	std	r2,  -16(r1)
+	std	r31, -8(r1)
+	mr	r31, r1
+	stdu	r1, -112(r1)
+
+	/*
+	 * We might be called from a module.
+	 * Switch to our TOC to run inside the core kernel.
+	 */
+	LOAD_PACA_TOC()
+#else
+	stwu	r1, -16(r1)
+	stw	r3, 8(r1)
+	stw	r4, 12(r1)
+#endif
+
+	bl	ftrace_return_to_handler
+	nop
+
+	/* return value has real return address */
+	mtlr	r3
+
+#ifdef CONFIG_PPC64
+	ld	r1, 0(r1)
+	ld	r4,  -32(r1)
+	ld	r3,  -24(r1)
+	ld	r2,  -16(r1)
+	ld	r31, -8(r1)
+#else
+	lwz	r3, 8(r1)
+	lwz	r4, 12(r1)
+	addi	r1, r1, 16
+#endif
+
+	/* Jump back to real return address */
+	blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S
new file mode 100644
index 0000000000..40677416d7
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_entry.S
@@ -0,0 +1,323 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Split from ftrace_64.S
+ */
+
+#include <linux/export.h>
+#include <linux/magic.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/ppc-opcode.h>
+#include <asm/thread_info.h>
+#include <asm/bug.h>
+#include <asm/ptrace.h>
+
+/*
+ *
+ * ftrace_caller()/ftrace_regs_caller() is the function that replaces _mcount()
+ * when ftrace is active.
+ *
+ * We arrive here after a function A calls function B, and we are the trace
+ * function for B. When we enter r1 points to A's stack frame, B has not yet
+ * had a chance to allocate one yet.
+ *
+ * Additionally r2 may point either to the TOC for A, or B, depending on
+ * whether B did a TOC setup sequence before calling us.
+ *
+ * On entry the LR points back to the _mcount() call site, and r0 holds the
+ * saved LR as it was on entry to B, ie. the original return address at the
+ * call site in A.
+ *
+ * Our job is to save the register state into a struct pt_regs (on the stack)
+ * and then arrange for the ftrace function to be called.
+ */
+.macro	ftrace_regs_entry allregs
+	/* Create a minimal stack frame for representing B */
+	PPC_STLU	r1, -STACK_FRAME_MIN_SIZE(r1)
+
+	/* Create our stack frame + pt_regs */
+	PPC_STLU	r1,-SWITCH_FRAME_SIZE(r1)
+
+	/* Save all gprs to pt_regs */
+	SAVE_GPR(0, r1)
+	SAVE_GPRS(3, 10, r1)
+
+#ifdef CONFIG_PPC64
+	/* Save the original return address in A's stack frame */
+	std	r0, LRSAVE+SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE(r1)
+	/* Ok to continue? */
+	lbz	r3, PACA_FTRACE_ENABLED(r13)
+	cmpdi	r3, 0
+	beq	ftrace_no_trace
+#endif
+
+	.if \allregs == 1
+	SAVE_GPR(2, r1)
+	SAVE_GPRS(11, 31, r1)
+	.else
+#ifdef CONFIG_LIVEPATCH_64
+	SAVE_GPR(14, r1)
+#endif
+	.endif
+
+	/* Save previous stack pointer (r1) */
+	addi	r8, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
+	PPC_STL	r8, GPR1(r1)
+
+	.if \allregs == 1
+	/* Load special regs for save below */
+	mfmsr   r8
+	mfctr   r9
+	mfxer   r10
+	mfcr	r11
+	.else
+	/* Clear MSR to flag as ftrace_caller versus frace_regs_caller */
+	li	r8, 0
+	.endif
+
+	/* Get the _mcount() call site out of LR */
+	mflr	r7
+	/* Save it as pt_regs->nip */
+	PPC_STL	r7, _NIP(r1)
+	/* Also save it in B's stackframe header for proper unwind */
+	PPC_STL	r7, LRSAVE+SWITCH_FRAME_SIZE(r1)
+	/* Save the read LR in pt_regs->link */
+	PPC_STL	r0, _LINK(r1)
+
+#ifdef CONFIG_PPC64
+	/* Save callee's TOC in the ABI compliant location */
+	std	r2, STK_GOT(r1)
+	LOAD_PACA_TOC()		/* get kernel TOC in r2 */
+	LOAD_REG_ADDR(r3, function_trace_op)
+	ld	r5,0(r3)
+#else
+	lis	r3,function_trace_op@ha
+	lwz	r5,function_trace_op@l(r3)
+#endif
+
+#ifdef CONFIG_LIVEPATCH_64
+	mr	r14, r7		/* remember old NIP */
+#endif
+
+	/* Calculate ip from nip-4 into r3 for call below */
+	subi    r3, r7, MCOUNT_INSN_SIZE
+
+	/* Put the original return address in r4 as parent_ip */
+	mr	r4, r0
+
+	/* Save special regs */
+	PPC_STL	r8, _MSR(r1)
+	.if \allregs == 1
+	PPC_STL	r9, _CTR(r1)
+	PPC_STL	r10, _XER(r1)
+	PPC_STL	r11, _CCR(r1)
+	.endif
+
+	/* Load &pt_regs in r6 for call below */
+	addi    r6, r1, STACK_INT_FRAME_REGS
+.endm
+
+.macro	ftrace_regs_exit allregs
+	/* Load ctr with the possibly modified NIP */
+	PPC_LL	r3, _NIP(r1)
+	mtctr	r3
+
+#ifdef CONFIG_LIVEPATCH_64
+	cmpd	r14, r3		/* has NIP been altered? */
+#endif
+
+	/* Restore gprs */
+	.if \allregs == 1
+	REST_GPRS(2, 31, r1)
+	.else
+	REST_GPRS(3, 10, r1)
+#ifdef CONFIG_LIVEPATCH_64
+	REST_GPR(14, r1)
+#endif
+	.endif
+
+	/* Restore possibly modified LR */
+	PPC_LL	r0, _LINK(r1)
+	mtlr	r0
+
+#ifdef CONFIG_PPC64
+	/* Restore callee's TOC */
+	ld	r2, STK_GOT(r1)
+#endif
+
+	/* Pop our stack frame */
+	addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
+
+#ifdef CONFIG_LIVEPATCH_64
+        /* Based on the cmpd above, if the NIP was altered handle livepatch */
+	bne-	livepatch_handler
+#endif
+	bctr			/* jump after _mcount site */
+.endm
+
+_GLOBAL(ftrace_regs_caller)
+	ftrace_regs_entry 1
+	/* ftrace_call(r3, r4, r5, r6) */
+.globl ftrace_regs_call
+ftrace_regs_call:
+	bl	ftrace_stub
+	nop
+	ftrace_regs_exit 1
+
+_GLOBAL(ftrace_caller)
+	ftrace_regs_entry 0
+	/* ftrace_call(r3, r4, r5, r6) */
+.globl ftrace_call
+ftrace_call:
+	bl	ftrace_stub
+	nop
+	ftrace_regs_exit 0
+
+_GLOBAL(ftrace_stub)
+	blr
+
+#ifdef CONFIG_PPC64
+ftrace_no_trace:
+	mflr	r3
+	mtctr	r3
+	REST_GPR(3, r1)
+	addi	r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
+	mtlr	r0
+	bctr
+#endif
+
+#ifdef CONFIG_LIVEPATCH_64
+	/*
+	 * This function runs in the mcount context, between two functions. As
+	 * such it can only clobber registers which are volatile and used in
+	 * function linkage.
+	 *
+	 * We get here when a function A, calls another function B, but B has
+	 * been live patched with a new function C.
+	 *
+	 * On entry:
+	 *  - we have no stack frame and can not allocate one
+	 *  - LR points back to the original caller (in A)
+	 *  - CTR holds the new NIP in C
+	 *  - r0, r11 & r12 are free
+	 */
+livepatch_handler:
+	ld	r12, PACA_THREAD_INFO(r13)
+
+	/* Allocate 3 x 8 bytes */
+	ld	r11, TI_livepatch_sp(r12)
+	addi	r11, r11, 24
+	std	r11, TI_livepatch_sp(r12)
+
+	/* Save toc & real LR on livepatch stack */
+	std	r2,  -24(r11)
+	mflr	r12
+	std	r12, -16(r11)
+
+	/* Store stack end marker */
+	lis     r12, STACK_END_MAGIC@h
+	ori     r12, r12, STACK_END_MAGIC@l
+	std	r12, -8(r11)
+
+	/* Put ctr in r12 for global entry and branch there */
+	mfctr	r12
+	bctrl
+
+	/*
+	 * Now we are returning from the patched function to the original
+	 * caller A. We are free to use r11, r12 and we can use r2 until we
+	 * restore it.
+	 */
+
+	ld	r12, PACA_THREAD_INFO(r13)
+
+	ld	r11, TI_livepatch_sp(r12)
+
+	/* Check stack marker hasn't been trashed */
+	lis     r2,  STACK_END_MAGIC@h
+	ori     r2,  r2, STACK_END_MAGIC@l
+	ld	r12, -8(r11)
+1:	tdne	r12, r2
+	EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0
+
+	/* Restore LR & toc from livepatch stack */
+	ld	r12, -16(r11)
+	mtlr	r12
+	ld	r2,  -24(r11)
+
+	/* Pop livepatch stack frame */
+	ld	r12, PACA_THREAD_INFO(r13)
+	subi	r11, r11, 24
+	std	r11, TI_livepatch_sp(r12)
+
+	/* Return to original caller of live patched function */
+	blr
+#endif /* CONFIG_LIVEPATCH */
+
+#ifndef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+EXPORT_SYMBOL(_mcount)
+	mflr	r12
+	mtctr	r12
+	mtlr	r0
+	bctr
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(return_to_handler)
+	/* need to save return values */
+#ifdef CONFIG_PPC64
+	std	r4,  -32(r1)
+	std	r3,  -24(r1)
+	/* save TOC */
+	std	r2,  -16(r1)
+	std	r31, -8(r1)
+	mr	r31, r1
+	stdu	r1, -112(r1)
+
+	/*
+	 * We might be called from a module.
+	 * Switch to our TOC to run inside the core kernel.
+	 */
+	LOAD_PACA_TOC()
+#else
+	stwu	r1, -16(r1)
+	stw	r3, 8(r1)
+	stw	r4, 12(r1)
+#endif
+
+	bl	ftrace_return_to_handler
+	nop
+
+	/* return value has real return address */
+	mtlr	r3
+
+#ifdef CONFIG_PPC64
+	ld	r1, 0(r1)
+	ld	r4,  -32(r1)
+	ld	r3,  -24(r1)
+	ld	r2,  -16(r1)
+	ld	r31, -8(r1)
+#else
+	lwz	r3, 8(r1)
+	lwz	r4, 12(r1)
+	addi	r1, r1, 16
+#endif
+
+	/* Jump back to real return address */
+	blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+.pushsection ".tramp.ftrace.text","aw",@progbits;
+.globl ftrace_tramp_text
+ftrace_tramp_text:
+	.space 32
+.popsection
+
+.pushsection ".tramp.ftrace.init","aw",@progbits;
+.globl ftrace_tramp_init
+ftrace_tramp_init:
+	.space 32
+.popsection
diff --git a/arch/powerpc/kernel/trace/trace_clock.c b/arch/powerpc/kernel/trace/trace_clock.c
new file mode 100644
index 0000000000..b0143a3137
--- /dev/null
+++ b/arch/powerpc/kernel/trace/trace_clock.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright (C) 2015 Naveen N. Rao, IBM Corporation
+ */
+
+#include <asm/trace_clock.h>
+#include <asm/time.h>
+
+u64 notrace trace_clock_ppc_tb(void)
+{
+	return get_tb();
+}
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
new file mode 100644
index 0000000000..fe3f720c9c
--- /dev/null
+++ b/arch/powerpc/kernel/traps.c
@@ -0,0 +1,2330 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Copyright (C) 1995-1996  Gary Thomas (gdt@linuxppc.org)
+ *  Copyright 2007-2010 Freescale Semiconductor, Inc.
+ *
+ *  Modified by Cort Dougan (cort@cs.nmt.edu)
+ *  and Paul Mackerras (paulus@samba.org)
+ */
+
+/*
+ * This file handles the architecture-dependent parts of hardware exceptions
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/pkeys.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/extable.h>
+#include <linux/module.h>	/* print_modules */
+#include <linux/prctl.h>
+#include <linux/delay.h>
+#include <linux/kprobes.h>
+#include <linux/kexec.h>
+#include <linux/backlight.h>
+#include <linux/bug.h>
+#include <linux/kdebug.h>
+#include <linux/ratelimit.h>
+#include <linux/context_tracking.h>
+#include <linux/smp.h>
+#include <linux/console.h>
+#include <linux/kmsg_dump.h>
+#include <linux/debugfs.h>
+
+#include <asm/emulated_ops.h>
+#include <linux/uaccess.h>
+#include <asm/interrupt.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/pmc.h>
+#include <asm/reg.h>
+#ifdef CONFIG_PMAC_BACKLIGHT
+#include <asm/backlight.h>
+#endif
+#ifdef CONFIG_PPC64
+#include <asm/firmware.h>
+#include <asm/processor.h>
+#endif
+#include <asm/kexec.h>
+#include <asm/ppc-opcode.h>
+#include <asm/rio.h>
+#include <asm/fadump.h>
+#include <asm/switch_to.h>
+#include <asm/tm.h>
+#include <asm/debug.h>
+#include <asm/asm-prototypes.h>
+#include <asm/hmi.h>
+#include <sysdev/fsl_pci.h>
+#include <asm/kprobes.h>
+#include <asm/stacktrace.h>
+#include <asm/nmi.h>
+#include <asm/disassemble.h>
+#include <asm/udbg.h>
+
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
+int (*__debugger)(struct pt_regs *regs) __read_mostly;
+int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly;
+int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly;
+int (*__debugger_sstep)(struct pt_regs *regs) __read_mostly;
+int (*__debugger_iabr_match)(struct pt_regs *regs) __read_mostly;
+int (*__debugger_break_match)(struct pt_regs *regs) __read_mostly;
+int (*__debugger_fault_handler)(struct pt_regs *regs) __read_mostly;
+
+EXPORT_SYMBOL(__debugger);
+EXPORT_SYMBOL(__debugger_ipi);
+EXPORT_SYMBOL(__debugger_bpt);
+EXPORT_SYMBOL(__debugger_sstep);
+EXPORT_SYMBOL(__debugger_iabr_match);
+EXPORT_SYMBOL(__debugger_break_match);
+EXPORT_SYMBOL(__debugger_fault_handler);
+#endif
+
+/* Transactional Memory trap debug */
+#ifdef TM_DEBUG_SW
+#define TM_DEBUG(x...) printk(KERN_INFO x)
+#else
+#define TM_DEBUG(x...) do { } while(0)
+#endif
+
+static const char *signame(int signr)
+{
+	switch (signr) {
+	case SIGBUS:	return "bus error";
+	case SIGFPE:	return "floating point exception";
+	case SIGILL:	return "illegal instruction";
+	case SIGSEGV:	return "segfault";
+	case SIGTRAP:	return "unhandled trap";
+	}
+
+	return "unknown signal";
+}
+
+/*
+ * Trap & Exception support
+ */
+
+#ifdef CONFIG_PMAC_BACKLIGHT
+static void pmac_backlight_unblank(void)
+{
+	mutex_lock(&pmac_backlight_mutex);
+	if (pmac_backlight) {
+		struct backlight_properties *props;
+
+		props = &pmac_backlight->props;
+		props->brightness = props->max_brightness;
+		props->power = FB_BLANK_UNBLANK;
+		backlight_update_status(pmac_backlight);
+	}
+	mutex_unlock(&pmac_backlight_mutex);
+}
+#else
+static inline void pmac_backlight_unblank(void) { }
+#endif
+
+/*
+ * If oops/die is expected to crash the machine, return true here.
+ *
+ * This should not be expected to be 100% accurate, there may be
+ * notifiers registered or other unexpected conditions that may bring
+ * down the kernel. Or if the current process in the kernel is holding
+ * locks or has other critical state, the kernel may become effectively
+ * unusable anyway.
+ */
+bool die_will_crash(void)
+{
+	if (should_fadump_crash())
+		return true;
+	if (kexec_should_crash(current))
+		return true;
+	if (in_interrupt() || panic_on_oops ||
+			!current->pid || is_global_init(current))
+		return true;
+
+	return false;
+}
+
+static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+static int die_owner = -1;
+static unsigned int die_nest_count;
+static int die_counter;
+
+extern void panic_flush_kmsg_start(void)
+{
+	/*
+	 * These are mostly taken from kernel/panic.c, but tries to do
+	 * relatively minimal work. Don't use delay functions (TB may
+	 * be broken), don't crash dump (need to set a firmware log),
+	 * don't run notifiers. We do want to get some information to
+	 * Linux console.
+	 */
+	console_verbose();
+	bust_spinlocks(1);
+}
+
+extern void panic_flush_kmsg_end(void)
+{
+	kmsg_dump(KMSG_DUMP_PANIC);
+	bust_spinlocks(0);
+	debug_locks_off();
+	console_flush_on_panic(CONSOLE_FLUSH_PENDING);
+}
+
+static unsigned long oops_begin(struct pt_regs *regs)
+{
+	int cpu;
+	unsigned long flags;
+
+	oops_enter();
+
+	/* racy, but better than risking deadlock. */
+	raw_local_irq_save(flags);
+	cpu = smp_processor_id();
+	if (!arch_spin_trylock(&die_lock)) {
+		if (cpu == die_owner)
+			/* nested oops. should stop eventually */;
+		else
+			arch_spin_lock(&die_lock);
+	}
+	die_nest_count++;
+	die_owner = cpu;
+	console_verbose();
+	bust_spinlocks(1);
+	if (machine_is(powermac))
+		pmac_backlight_unblank();
+	return flags;
+}
+NOKPROBE_SYMBOL(oops_begin);
+
+static void oops_end(unsigned long flags, struct pt_regs *regs,
+			       int signr)
+{
+	bust_spinlocks(0);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+	die_nest_count--;
+	oops_exit();
+	printk("\n");
+	if (!die_nest_count) {
+		/* Nest count reaches zero, release the lock. */
+		die_owner = -1;
+		arch_spin_unlock(&die_lock);
+	}
+	raw_local_irq_restore(flags);
+
+	/*
+	 * system_reset_excption handles debugger, crash dump, panic, for 0x100
+	 */
+	if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
+		return;
+
+	crash_fadump(regs, "die oops");
+
+	if (kexec_should_crash(current))
+		crash_kexec(regs);
+
+	if (!signr)
+		return;
+
+	/*
+	 * While our oops output is serialised by a spinlock, output
+	 * from panic() called below can race and corrupt it. If we
+	 * know we are going to panic, delay for 1 second so we have a
+	 * chance to get clean backtraces from all CPUs that are oopsing.
+	 */
+	if (in_interrupt() || panic_on_oops || !current->pid ||
+	    is_global_init(current)) {
+		mdelay(MSEC_PER_SEC);
+	}
+
+	if (panic_on_oops)
+		panic("Fatal exception");
+	make_task_dead(signr);
+}
+NOKPROBE_SYMBOL(oops_end);
+
+static char *get_mmu_str(void)
+{
+	if (early_radix_enabled())
+		return " MMU=Radix";
+	if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+		return " MMU=Hash";
+	return "";
+}
+
+static int __die(const char *str, struct pt_regs *regs, long err)
+{
+	printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
+
+	printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
+	       IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
+	       PAGE_SIZE / 1024, get_mmu_str(),
+	       IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
+	       IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
+	       IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
+	       debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
+	       IS_ENABLED(CONFIG_NUMA) ? " NUMA" : "",
+	       ppc_md.name ? ppc_md.name : "");
+
+	if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP)
+		return 1;
+
+	print_modules();
+	show_regs(regs);
+
+	return 0;
+}
+NOKPROBE_SYMBOL(__die);
+
+void die(const char *str, struct pt_regs *regs, long err)
+{
+	unsigned long flags;
+
+	/*
+	 * system_reset_excption handles debugger, crash dump, panic, for 0x100
+	 */
+	if (TRAP(regs) != INTERRUPT_SYSTEM_RESET) {
+		if (debugger(regs))
+			return;
+	}
+
+	flags = oops_begin(regs);
+	if (__die(str, regs, err))
+		err = 0;
+	oops_end(flags, regs, err);
+}
+NOKPROBE_SYMBOL(die);
+
+void user_single_step_report(struct pt_regs *regs)
+{
+	force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip);
+}
+
+static void show_signal_msg(int signr, struct pt_regs *regs, int code,
+			    unsigned long addr)
+{
+	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+				      DEFAULT_RATELIMIT_BURST);
+
+	if (!show_unhandled_signals)
+		return;
+
+	if (!unhandled_signal(current, signr))
+		return;
+
+	if (!__ratelimit(&rs))
+		return;
+
+	pr_info("%s[%d]: %s (%d) at %lx nip %lx lr %lx code %x",
+		current->comm, current->pid, signame(signr), signr,
+		addr, regs->nip, regs->link, code);
+
+	print_vma_addr(KERN_CONT " in ", regs->nip);
+
+	pr_cont("\n");
+
+	show_user_instructions(regs);
+}
+
+static bool exception_common(int signr, struct pt_regs *regs, int code,
+			      unsigned long addr)
+{
+	if (!user_mode(regs)) {
+		die("Exception in kernel mode", regs, signr);
+		return false;
+	}
+
+	/*
+	 * Must not enable interrupts even for user-mode exception, because
+	 * this can be called from machine check, which may be a NMI or IRQ
+	 * which don't like interrupts being enabled. Could check for
+	 * in_hardirq || in_nmi perhaps, but there doesn't seem to be a good
+	 * reason why _exception() should enable irqs for an exception handler,
+	 * the handlers themselves do that directly.
+	 */
+
+	show_signal_msg(signr, regs, code, addr);
+
+	current->thread.trap_nr = code;
+
+	return true;
+}
+
+void _exception_pkey(struct pt_regs *regs, unsigned long addr, int key)
+{
+	if (!exception_common(SIGSEGV, regs, SEGV_PKUERR, addr))
+		return;
+
+	force_sig_pkuerr((void __user *) addr, key);
+}
+
+void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
+{
+	if (!exception_common(signr, regs, code, addr))
+		return;
+
+	force_sig_fault(signr, code, (void __user *)addr);
+}
+
+/*
+ * The interrupt architecture has a quirk in that the HV interrupts excluding
+ * the NMIs (0x100 and 0x200) do not clear MSR[RI] at entry. The first thing
+ * that an interrupt handler must do is save off a GPR into a scratch register,
+ * and all interrupts on POWERNV (HV=1) use the HSPRG1 register as scratch.
+ * Therefore an NMI can clobber an HV interrupt's live HSPRG1 without noticing
+ * that it is non-reentrant, which leads to random data corruption.
+ *
+ * The solution is for NMI interrupts in HV mode to check if they originated
+ * from these critical HV interrupt regions. If so, then mark them not
+ * recoverable.
+ *
+ * An alternative would be for HV NMIs to use SPRG for scratch to avoid the
+ * HSPRG1 clobber, however this would cause guest SPRG to be clobbered. Linux
+ * guests should always have MSR[RI]=0 when its scratch SPRG is in use, so
+ * that would work. However any other guest OS that may have the SPRG live
+ * and MSR[RI]=1 could encounter silent corruption.
+ *
+ * Builds that do not support KVM could take this second option to increase
+ * the recoverability of NMIs.
+ */
+noinstr void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_POWERNV
+	unsigned long kbase = (unsigned long)_stext;
+	unsigned long nip = regs->nip;
+
+	if (!(regs->msr & MSR_RI))
+		return;
+	if (!(regs->msr & MSR_HV))
+		return;
+	if (regs->msr & MSR_PR)
+		return;
+
+	/*
+	 * Now test if the interrupt has hit a range that may be using
+	 * HSPRG1 without having RI=0 (i.e., an HSRR interrupt). The
+	 * problem ranges all run un-relocated. Test real and virt modes
+	 * at the same time by dropping the high bit of the nip (virt mode
+	 * entry points still have the +0x4000 offset).
+	 */
+	nip &= ~0xc000000000000000ULL;
+	if ((nip >= 0x500 && nip < 0x600) || (nip >= 0x4500 && nip < 0x4600))
+		goto nonrecoverable;
+	if ((nip >= 0x980 && nip < 0xa00) || (nip >= 0x4980 && nip < 0x4a00))
+		goto nonrecoverable;
+	if ((nip >= 0xe00 && nip < 0xec0) || (nip >= 0x4e00 && nip < 0x4ec0))
+		goto nonrecoverable;
+	if ((nip >= 0xf80 && nip < 0xfa0) || (nip >= 0x4f80 && nip < 0x4fa0))
+		goto nonrecoverable;
+
+	/* Trampoline code runs un-relocated so subtract kbase. */
+	if (nip >= (unsigned long)(start_real_trampolines - kbase) &&
+			nip < (unsigned long)(end_real_trampolines - kbase))
+		goto nonrecoverable;
+	if (nip >= (unsigned long)(start_virt_trampolines - kbase) &&
+			nip < (unsigned long)(end_virt_trampolines - kbase))
+		goto nonrecoverable;
+	return;
+
+nonrecoverable:
+	regs->msr &= ~MSR_RI;
+	local_paca->hsrr_valid = 0;
+	local_paca->srr_valid = 0;
+#endif
+}
+DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception)
+{
+	unsigned long hsrr0, hsrr1;
+	bool saved_hsrrs = false;
+
+	/*
+	 * System reset can interrupt code where HSRRs are live and MSR[RI]=1.
+	 * The system reset interrupt itself may clobber HSRRs (e.g., to call
+	 * OPAL), so save them here and restore them before returning.
+	 *
+	 * Machine checks don't need to save HSRRs, as the real mode handler
+	 * is careful to avoid them, and the regular handler is not delivered
+	 * as an NMI.
+	 */
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		hsrr0 = mfspr(SPRN_HSRR0);
+		hsrr1 = mfspr(SPRN_HSRR1);
+		saved_hsrrs = true;
+	}
+
+	hv_nmi_check_nonrecoverable(regs);
+
+	__this_cpu_inc(irq_stat.sreset_irqs);
+
+	/* See if any machine dependent calls */
+	if (ppc_md.system_reset_exception) {
+		if (ppc_md.system_reset_exception(regs))
+			goto out;
+	}
+
+	if (debugger(regs))
+		goto out;
+
+	kmsg_dump(KMSG_DUMP_OOPS);
+	/*
+	 * A system reset is a request to dump, so we always send
+	 * it through the crashdump code (if fadump or kdump are
+	 * registered).
+	 */
+	crash_fadump(regs, "System Reset");
+
+	crash_kexec(regs);
+
+	/*
+	 * We aren't the primary crash CPU. We need to send it
+	 * to a holding pattern to avoid it ending up in the panic
+	 * code.
+	 */
+	crash_kexec_secondary(regs);
+
+	/*
+	 * No debugger or crash dump registered, print logs then
+	 * panic.
+	 */
+	die("System Reset", regs, SIGABRT);
+
+	mdelay(2*MSEC_PER_SEC); /* Wait a little while for others to print */
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+	nmi_panic(regs, "System Reset");
+
+out:
+#ifdef CONFIG_PPC_BOOK3S_64
+	BUG_ON(get_paca()->in_nmi == 0);
+	if (get_paca()->in_nmi > 1)
+		die("Unrecoverable nested System Reset", regs, SIGABRT);
+#endif
+	/* Must die if the interrupt is not recoverable */
+	if (regs_is_unrecoverable(regs)) {
+		/* For the reason explained in die_mce, nmi_exit before die */
+		nmi_exit();
+		die("Unrecoverable System Reset", regs, SIGABRT);
+	}
+
+	if (saved_hsrrs) {
+		mtspr(SPRN_HSRR0, hsrr0);
+		mtspr(SPRN_HSRR1, hsrr1);
+	}
+
+	/* What should we do here? We could issue a shutdown or hard reset. */
+
+	return 0;
+}
+
+/*
+ * I/O accesses can cause machine checks on powermacs.
+ * Check if the NIP corresponds to the address of a sync
+ * instruction for which there is an entry in the exception
+ * table.
+ *  -- paulus.
+ */
+static inline int check_io_access(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC32
+	unsigned long msr = regs->msr;
+	const struct exception_table_entry *entry;
+	unsigned int *nip = (unsigned int *)regs->nip;
+
+	if (((msr & 0xffff0000) == 0 || (msr & (0x80000 | 0x40000)))
+	    && (entry = search_exception_tables(regs->nip)) != NULL) {
+		/*
+		 * Check that it's a sync instruction, or somewhere
+		 * in the twi; isync; nop sequence that inb/inw/inl uses.
+		 * As the address is in the exception table
+		 * we should be able to read the instr there.
+		 * For the debug message, we look at the preceding
+		 * load or store.
+		 */
+		if (*nip == PPC_RAW_NOP())
+			nip -= 2;
+		else if (*nip == PPC_RAW_ISYNC())
+			--nip;
+		if (*nip == PPC_RAW_SYNC() || get_op(*nip) == OP_TRAP) {
+			unsigned int rb;
+
+			--nip;
+			rb = (*nip >> 11) & 0x1f;
+			printk(KERN_DEBUG "%s bad port %lx at %p\n",
+			       (*nip & 0x100)? "OUT to": "IN from",
+			       regs->gpr[rb] - _IO_BASE, nip);
+			regs_set_recoverable(regs);
+			regs_set_return_ip(regs, extable_fixup(entry));
+			return 1;
+		}
+	}
+#endif /* CONFIG_PPC32 */
+	return 0;
+}
+
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+/* On 4xx, the reason for the machine check or program exception
+   is in the ESR. */
+#define get_reason(regs)	((regs)->esr)
+#define REASON_FP		ESR_FP
+#define REASON_ILLEGAL		(ESR_PIL | ESR_PUO)
+#define REASON_PRIVILEGED	ESR_PPR
+#define REASON_TRAP		ESR_PTR
+#define REASON_PREFIXED		0
+#define REASON_BOUNDARY		0
+
+/* single-step stuff */
+#define single_stepping(regs)	(current->thread.debug.dbcr0 & DBCR0_IC)
+#define clear_single_step(regs)	(current->thread.debug.dbcr0 &= ~DBCR0_IC)
+#define clear_br_trace(regs)	do {} while(0)
+#else
+/* On non-4xx, the reason for the machine check or program
+   exception is in the MSR. */
+#define get_reason(regs)	((regs)->msr)
+#define REASON_TM		SRR1_PROGTM
+#define REASON_FP		SRR1_PROGFPE
+#define REASON_ILLEGAL		SRR1_PROGILL
+#define REASON_PRIVILEGED	SRR1_PROGPRIV
+#define REASON_TRAP		SRR1_PROGTRAP
+#define REASON_PREFIXED		SRR1_PREFIXED
+#define REASON_BOUNDARY		SRR1_BOUNDARY
+
+#define single_stepping(regs)	((regs)->msr & MSR_SE)
+#define clear_single_step(regs)	(regs_set_return_msr((regs), (regs)->msr & ~MSR_SE))
+#define clear_br_trace(regs)	(regs_set_return_msr((regs), (regs)->msr & ~MSR_BE))
+#endif
+
+#define inst_length(reason)	(((reason) & REASON_PREFIXED) ? 8 : 4)
+
+#if defined(CONFIG_PPC_E500)
+int machine_check_e500mc(struct pt_regs *regs)
+{
+	unsigned long mcsr = mfspr(SPRN_MCSR);
+	unsigned long pvr = mfspr(SPRN_PVR);
+	unsigned long reason = mcsr;
+	int recoverable = 1;
+
+	if (reason & MCSR_LD) {
+		recoverable = fsl_rio_mcheck_exception(regs);
+		if (recoverable == 1)
+			goto silent_out;
+	}
+
+	printk("Machine check in kernel mode.\n");
+	printk("Caused by (from MCSR=%lx): ", reason);
+
+	if (reason & MCSR_MCP)
+		pr_cont("Machine Check Signal\n");
+
+	if (reason & MCSR_ICPERR) {
+		pr_cont("Instruction Cache Parity Error\n");
+
+		/*
+		 * This is recoverable by invalidating the i-cache.
+		 */
+		mtspr(SPRN_L1CSR1, mfspr(SPRN_L1CSR1) | L1CSR1_ICFI);
+		while (mfspr(SPRN_L1CSR1) & L1CSR1_ICFI)
+			;
+
+		/*
+		 * This will generally be accompanied by an instruction
+		 * fetch error report -- only treat MCSR_IF as fatal
+		 * if it wasn't due to an L1 parity error.
+		 */
+		reason &= ~MCSR_IF;
+	}
+
+	if (reason & MCSR_DCPERR_MC) {
+		pr_cont("Data Cache Parity Error\n");
+
+		/*
+		 * In write shadow mode we auto-recover from the error, but it
+		 * may still get logged and cause a machine check.  We should
+		 * only treat the non-write shadow case as non-recoverable.
+		 */
+		/* On e6500 core, L1 DCWS (Data cache write shadow mode) bit
+		 * is not implemented but L1 data cache always runs in write
+		 * shadow mode. Hence on data cache parity errors HW will
+		 * automatically invalidate the L1 Data Cache.
+		 */
+		if (PVR_VER(pvr) != PVR_VER_E6500) {
+			if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS))
+				recoverable = 0;
+		}
+	}
+
+	if (reason & MCSR_L2MMU_MHIT) {
+		pr_cont("Hit on multiple TLB entries\n");
+		recoverable = 0;
+	}
+
+	if (reason & MCSR_NMI)
+		pr_cont("Non-maskable interrupt\n");
+
+	if (reason & MCSR_IF) {
+		pr_cont("Instruction Fetch Error Report\n");
+		recoverable = 0;
+	}
+
+	if (reason & MCSR_LD) {
+		pr_cont("Load Error Report\n");
+		recoverable = 0;
+	}
+
+	if (reason & MCSR_ST) {
+		pr_cont("Store Error Report\n");
+		recoverable = 0;
+	}
+
+	if (reason & MCSR_LDG) {
+		pr_cont("Guarded Load Error Report\n");
+		recoverable = 0;
+	}
+
+	if (reason & MCSR_TLBSYNC)
+		pr_cont("Simultaneous tlbsync operations\n");
+
+	if (reason & MCSR_BSL2_ERR) {
+		pr_cont("Level 2 Cache Error\n");
+		recoverable = 0;
+	}
+
+	if (reason & MCSR_MAV) {
+		u64 addr;
+
+		addr = mfspr(SPRN_MCAR);
+		addr |= (u64)mfspr(SPRN_MCARU) << 32;
+
+		pr_cont("Machine Check %s Address: %#llx\n",
+		       reason & MCSR_MEA ? "Effective" : "Physical", addr);
+	}
+
+silent_out:
+	mtspr(SPRN_MCSR, mcsr);
+	return mfspr(SPRN_MCSR) == 0 && recoverable;
+}
+
+int machine_check_e500(struct pt_regs *regs)
+{
+	unsigned long reason = mfspr(SPRN_MCSR);
+
+	if (reason & MCSR_BUS_RBERR) {
+		if (fsl_rio_mcheck_exception(regs))
+			return 1;
+		if (fsl_pci_mcheck_exception(regs))
+			return 1;
+	}
+
+	printk("Machine check in kernel mode.\n");
+	printk("Caused by (from MCSR=%lx): ", reason);
+
+	if (reason & MCSR_MCP)
+		pr_cont("Machine Check Signal\n");
+	if (reason & MCSR_ICPERR)
+		pr_cont("Instruction Cache Parity Error\n");
+	if (reason & MCSR_DCP_PERR)
+		pr_cont("Data Cache Push Parity Error\n");
+	if (reason & MCSR_DCPERR)
+		pr_cont("Data Cache Parity Error\n");
+	if (reason & MCSR_BUS_IAERR)
+		pr_cont("Bus - Instruction Address Error\n");
+	if (reason & MCSR_BUS_RAERR)
+		pr_cont("Bus - Read Address Error\n");
+	if (reason & MCSR_BUS_WAERR)
+		pr_cont("Bus - Write Address Error\n");
+	if (reason & MCSR_BUS_IBERR)
+		pr_cont("Bus - Instruction Data Error\n");
+	if (reason & MCSR_BUS_RBERR)
+		pr_cont("Bus - Read Data Bus Error\n");
+	if (reason & MCSR_BUS_WBERR)
+		pr_cont("Bus - Write Data Bus Error\n");
+	if (reason & MCSR_BUS_IPERR)
+		pr_cont("Bus - Instruction Parity Error\n");
+	if (reason & MCSR_BUS_RPERR)
+		pr_cont("Bus - Read Parity Error\n");
+
+	return 0;
+}
+
+int machine_check_generic(struct pt_regs *regs)
+{
+	return 0;
+}
+#elif defined(CONFIG_PPC32)
+int machine_check_generic(struct pt_regs *regs)
+{
+	unsigned long reason = regs->msr;
+
+	printk("Machine check in kernel mode.\n");
+	printk("Caused by (from SRR1=%lx): ", reason);
+	switch (reason & 0x601F0000) {
+	case 0x80000:
+		pr_cont("Machine check signal\n");
+		break;
+	case 0x40000:
+	case 0x140000:	/* 7450 MSS error and TEA */
+		pr_cont("Transfer error ack signal\n");
+		break;
+	case 0x20000:
+		pr_cont("Data parity error signal\n");
+		break;
+	case 0x10000:
+		pr_cont("Address parity error signal\n");
+		break;
+	case 0x20000000:
+		pr_cont("L1 Data Cache error\n");
+		break;
+	case 0x40000000:
+		pr_cont("L1 Instruction Cache error\n");
+		break;
+	case 0x00100000:
+		pr_cont("L2 data cache parity error\n");
+		break;
+	default:
+		pr_cont("Unknown values in msr\n");
+	}
+	return 0;
+}
+#endif /* everything else */
+
+void die_mce(const char *str, struct pt_regs *regs, long err)
+{
+	/*
+	 * The machine check wants to kill the interrupted context,
+	 * but make_task_dead() checks for in_interrupt() and panics
+	 * in that case, so exit the irq/nmi before calling die.
+	 */
+	if (in_nmi())
+		nmi_exit();
+	else
+		irq_exit();
+	die(str, regs, err);
+}
+
+/*
+ * BOOK3S_64 does not usually call this handler as a non-maskable interrupt
+ * (it uses its own early real-mode handler to handle the MCE proper
+ * and then raises irq_work to call this handler when interrupts are
+ * enabled). The only time when this is not true is if the early handler
+ * is unrecoverable, then it does call this directly to try to get a
+ * message out.
+ */
+static void __machine_check_exception(struct pt_regs *regs)
+{
+	int recover = 0;
+
+	__this_cpu_inc(irq_stat.mce_exceptions);
+
+	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
+
+	/* See if any machine dependent calls. In theory, we would want
+	 * to call the CPU first, and call the ppc_md. one if the CPU
+	 * one returns a positive number. However there is existing code
+	 * that assumes the board gets a first chance, so let's keep it
+	 * that way for now and fix things later. --BenH.
+	 */
+	if (ppc_md.machine_check_exception)
+		recover = ppc_md.machine_check_exception(regs);
+	else if (cur_cpu_spec->machine_check)
+		recover = cur_cpu_spec->machine_check(regs);
+
+	if (recover > 0)
+		goto bail;
+
+	if (debugger_fault_handler(regs))
+		goto bail;
+
+	if (check_io_access(regs))
+		goto bail;
+
+	die_mce("Machine check", regs, SIGBUS);
+
+bail:
+	/* Must die if the interrupt is not recoverable */
+	if (regs_is_unrecoverable(regs))
+		die_mce("Unrecoverable Machine check", regs, SIGBUS);
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_INTERRUPT_HANDLER_RAW(machine_check_early_boot)
+{
+	udbg_printf("Machine check (early boot)\n");
+	udbg_printf("SRR0=0x%016lx   SRR1=0x%016lx\n", regs->nip, regs->msr);
+	udbg_printf(" DAR=0x%016lx  DSISR=0x%08lx\n", regs->dar, regs->dsisr);
+	udbg_printf("  LR=0x%016lx     R1=0x%08lx\n", regs->link, regs->gpr[1]);
+	udbg_printf("------\n");
+	die("Machine check (early boot)", regs, SIGBUS);
+	for (;;)
+		;
+	return 0;
+}
+
+DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async)
+{
+	__machine_check_exception(regs);
+}
+#endif
+DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
+{
+	__machine_check_exception(regs);
+
+	return 0;
+}
+
+DEFINE_INTERRUPT_HANDLER(SMIException) /* async? */
+{
+	die("System Management Interrupt", regs, SIGABRT);
+}
+
+#ifdef CONFIG_VSX
+static void p9_hmi_special_emu(struct pt_regs *regs)
+{
+	unsigned int ra, rb, t, i, sel, instr, rc;
+	const void __user *addr;
+	u8 vbuf[16] __aligned(16), *vdst;
+	unsigned long ea, msr, msr_mask;
+	bool swap;
+
+	if (__get_user(instr, (unsigned int __user *)regs->nip))
+		return;
+
+	/*
+	 * lxvb16x	opcode: 0x7c0006d8
+	 * lxvd2x	opcode: 0x7c000698
+	 * lxvh8x	opcode: 0x7c000658
+	 * lxvw4x	opcode: 0x7c000618
+	 */
+	if ((instr & 0xfc00073e) != 0x7c000618) {
+		pr_devel("HMI vec emu: not vector CI %i:%s[%d] nip=%016lx"
+			 " instr=%08x\n",
+			 smp_processor_id(), current->comm, current->pid,
+			 regs->nip, instr);
+		return;
+	}
+
+	/* Grab vector registers into the task struct */
+	msr = regs->msr; /* Grab msr before we flush the bits */
+	flush_vsx_to_thread(current);
+	enable_kernel_altivec();
+
+	/*
+	 * Is userspace running with a different endian (this is rare but
+	 * not impossible)
+	 */
+	swap = (msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
+
+	/* Decode the instruction */
+	ra = (instr >> 16) & 0x1f;
+	rb = (instr >> 11) & 0x1f;
+	t = (instr >> 21) & 0x1f;
+	if (instr & 1)
+		vdst = (u8 *)&current->thread.vr_state.vr[t];
+	else
+		vdst = (u8 *)&current->thread.fp_state.fpr[t][0];
+
+	/* Grab the vector address */
+	ea = regs->gpr[rb] + (ra ? regs->gpr[ra] : 0);
+	if (is_32bit_task())
+		ea &= 0xfffffffful;
+	addr = (__force const void __user *)ea;
+
+	/* Check it */
+	if (!access_ok(addr, 16)) {
+		pr_devel("HMI vec emu: bad access %i:%s[%d] nip=%016lx"
+			 " instr=%08x addr=%016lx\n",
+			 smp_processor_id(), current->comm, current->pid,
+			 regs->nip, instr, (unsigned long)addr);
+		return;
+	}
+
+	/* Read the vector */
+	rc = 0;
+	if ((unsigned long)addr & 0xfUL)
+		/* unaligned case */
+		rc = __copy_from_user_inatomic(vbuf, addr, 16);
+	else
+		__get_user_atomic_128_aligned(vbuf, addr, rc);
+	if (rc) {
+		pr_devel("HMI vec emu: page fault %i:%s[%d] nip=%016lx"
+			 " instr=%08x addr=%016lx\n",
+			 smp_processor_id(), current->comm, current->pid,
+			 regs->nip, instr, (unsigned long)addr);
+		return;
+	}
+
+	pr_devel("HMI vec emu: emulated vector CI %i:%s[%d] nip=%016lx"
+		 " instr=%08x addr=%016lx\n",
+		 smp_processor_id(), current->comm, current->pid, regs->nip,
+		 instr, (unsigned long) addr);
+
+	/* Grab instruction "selector" */
+	sel = (instr >> 6) & 3;
+
+	/*
+	 * Check to make sure the facility is actually enabled. This
+	 * could happen if we get a false positive hit.
+	 *
+	 * lxvd2x/lxvw4x always check MSR VSX sel = 0,2
+	 * lxvh8x/lxvb16x check MSR VSX or VEC depending on VSR used sel = 1,3
+	 */
+	msr_mask = MSR_VSX;
+	if ((sel & 1) && (instr & 1)) /* lxvh8x & lxvb16x + VSR >= 32 */
+		msr_mask = MSR_VEC;
+	if (!(msr & msr_mask)) {
+		pr_devel("HMI vec emu: MSR fac clear %i:%s[%d] nip=%016lx"
+			 " instr=%08x msr:%016lx\n",
+			 smp_processor_id(), current->comm, current->pid,
+			 regs->nip, instr, msr);
+		return;
+	}
+
+	/* Do logging here before we modify sel based on endian */
+	switch (sel) {
+	case 0:	/* lxvw4x */
+		PPC_WARN_EMULATED(lxvw4x, regs);
+		break;
+	case 1: /* lxvh8x */
+		PPC_WARN_EMULATED(lxvh8x, regs);
+		break;
+	case 2: /* lxvd2x */
+		PPC_WARN_EMULATED(lxvd2x, regs);
+		break;
+	case 3: /* lxvb16x */
+		PPC_WARN_EMULATED(lxvb16x, regs);
+		break;
+	}
+
+#ifdef __LITTLE_ENDIAN__
+	/*
+	 * An LE kernel stores the vector in the task struct as an LE
+	 * byte array (effectively swapping both the components and
+	 * the content of the components). Those instructions expect
+	 * the components to remain in ascending address order, so we
+	 * swap them back.
+	 *
+	 * If we are running a BE user space, the expectation is that
+	 * of a simple memcpy, so forcing the emulation to look like
+	 * a lxvb16x should do the trick.
+	 */
+	if (swap)
+		sel = 3;
+
+	switch (sel) {
+	case 0:	/* lxvw4x */
+		for (i = 0; i < 4; i++)
+			((u32 *)vdst)[i] = ((u32 *)vbuf)[3-i];
+		break;
+	case 1: /* lxvh8x */
+		for (i = 0; i < 8; i++)
+			((u16 *)vdst)[i] = ((u16 *)vbuf)[7-i];
+		break;
+	case 2: /* lxvd2x */
+		for (i = 0; i < 2; i++)
+			((u64 *)vdst)[i] = ((u64 *)vbuf)[1-i];
+		break;
+	case 3: /* lxvb16x */
+		for (i = 0; i < 16; i++)
+			vdst[i] = vbuf[15-i];
+		break;
+	}
+#else /* __LITTLE_ENDIAN__ */
+	/* On a big endian kernel, a BE userspace only needs a memcpy */
+	if (!swap)
+		sel = 3;
+
+	/* Otherwise, we need to swap the content of the components */
+	switch (sel) {
+	case 0:	/* lxvw4x */
+		for (i = 0; i < 4; i++)
+			((u32 *)vdst)[i] = cpu_to_le32(((u32 *)vbuf)[i]);
+		break;
+	case 1: /* lxvh8x */
+		for (i = 0; i < 8; i++)
+			((u16 *)vdst)[i] = cpu_to_le16(((u16 *)vbuf)[i]);
+		break;
+	case 2: /* lxvd2x */
+		for (i = 0; i < 2; i++)
+			((u64 *)vdst)[i] = cpu_to_le64(((u64 *)vbuf)[i]);
+		break;
+	case 3: /* lxvb16x */
+		memcpy(vdst, vbuf, 16);
+		break;
+	}
+#endif /* !__LITTLE_ENDIAN__ */
+
+	/* Go to next instruction */
+	regs_add_return_ip(regs, 4);
+}
+#endif /* CONFIG_VSX */
+
+DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception)
+{
+	struct pt_regs *old_regs;
+
+	old_regs = set_irq_regs(regs);
+
+#ifdef CONFIG_VSX
+	/* Real mode flagged P9 special emu is needed */
+	if (local_paca->hmi_p9_special_emu) {
+		local_paca->hmi_p9_special_emu = 0;
+
+		/*
+		 * We don't want to take page faults while doing the
+		 * emulation, we just replay the instruction if necessary.
+		 */
+		pagefault_disable();
+		p9_hmi_special_emu(regs);
+		pagefault_enable();
+	}
+#endif /* CONFIG_VSX */
+
+	if (ppc_md.handle_hmi_exception)
+		ppc_md.handle_hmi_exception(regs);
+
+	set_irq_regs(old_regs);
+}
+
+DEFINE_INTERRUPT_HANDLER(unknown_exception)
+{
+	printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
+	       regs->nip, regs->msr, regs->trap);
+
+	_exception(SIGTRAP, regs, TRAP_UNK, 0);
+}
+
+DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception)
+{
+	printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
+	       regs->nip, regs->msr, regs->trap);
+
+	_exception(SIGTRAP, regs, TRAP_UNK, 0);
+}
+
+DEFINE_INTERRUPT_HANDLER_NMI(unknown_nmi_exception)
+{
+	printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
+	       regs->nip, regs->msr, regs->trap);
+
+	_exception(SIGTRAP, regs, TRAP_UNK, 0);
+
+	return 0;
+}
+
+DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception)
+{
+	if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
+					5, SIGTRAP) == NOTIFY_STOP)
+		return;
+	if (debugger_iabr_match(regs))
+		return;
+	_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
+}
+
+DEFINE_INTERRUPT_HANDLER(RunModeException)
+{
+	_exception(SIGTRAP, regs, TRAP_UNK, 0);
+}
+
+static void __single_step_exception(struct pt_regs *regs)
+{
+	clear_single_step(regs);
+	clear_br_trace(regs);
+
+	if (kprobe_post_handler(regs))
+		return;
+
+	if (notify_die(DIE_SSTEP, "single_step", regs, 5,
+					5, SIGTRAP) == NOTIFY_STOP)
+		return;
+	if (debugger_sstep(regs))
+		return;
+
+	_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
+}
+
+DEFINE_INTERRUPT_HANDLER(single_step_exception)
+{
+	__single_step_exception(regs);
+}
+
+/*
+ * After we have successfully emulated an instruction, we have to
+ * check if the instruction was being single-stepped, and if so,
+ * pretend we got a single-step exception.  This was pointed out
+ * by Kumar Gala.  -- paulus
+ */
+void emulate_single_step(struct pt_regs *regs)
+{
+	if (single_stepping(regs))
+		__single_step_exception(regs);
+}
+
+#ifdef CONFIG_PPC_FPU_REGS
+static inline int __parse_fpscr(unsigned long fpscr)
+{
+	int ret = FPE_FLTUNK;
+
+	/* Invalid operation */
+	if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX))
+		ret = FPE_FLTINV;
+
+	/* Overflow */
+	else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX))
+		ret = FPE_FLTOVF;
+
+	/* Underflow */
+	else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX))
+		ret = FPE_FLTUND;
+
+	/* Divide by zero */
+	else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX))
+		ret = FPE_FLTDIV;
+
+	/* Inexact result */
+	else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX))
+		ret = FPE_FLTRES;
+
+	return ret;
+}
+#endif
+
+static void parse_fpe(struct pt_regs *regs)
+{
+	int code = 0;
+
+	flush_fp_to_thread(current);
+
+#ifdef CONFIG_PPC_FPU_REGS
+	code = __parse_fpscr(current->thread.fp_state.fpscr);
+#endif
+
+	_exception(SIGFPE, regs, code, regs->nip);
+}
+
+/*
+ * Illegal instruction emulation support.  Originally written to
+ * provide the PVR to user applications using the mfspr rd, PVR.
+ * Return non-zero if we can't emulate, or -EFAULT if the associated
+ * memory access caused an access fault.  Return zero on success.
+ *
+ * There are a couple of ways to do this, either "decode" the instruction
+ * or directly match lots of bits.  In this case, matching lots of
+ * bits is faster and easier.
+ *
+ */
+static int emulate_string_inst(struct pt_regs *regs, u32 instword)
+{
+	u8 rT = (instword >> 21) & 0x1f;
+	u8 rA = (instword >> 16) & 0x1f;
+	u8 NB_RB = (instword >> 11) & 0x1f;
+	u32 num_bytes;
+	unsigned long EA;
+	int pos = 0;
+
+	/* Early out if we are an invalid form of lswx */
+	if ((instword & PPC_INST_STRING_MASK) == PPC_INST_LSWX)
+		if ((rT == rA) || (rT == NB_RB))
+			return -EINVAL;
+
+	EA = (rA == 0) ? 0 : regs->gpr[rA];
+
+	switch (instword & PPC_INST_STRING_MASK) {
+		case PPC_INST_LSWX:
+		case PPC_INST_STSWX:
+			EA += NB_RB;
+			num_bytes = regs->xer & 0x7f;
+			break;
+		case PPC_INST_LSWI:
+		case PPC_INST_STSWI:
+			num_bytes = (NB_RB == 0) ? 32 : NB_RB;
+			break;
+		default:
+			return -EINVAL;
+	}
+
+	while (num_bytes != 0)
+	{
+		u8 val;
+		u32 shift = 8 * (3 - (pos & 0x3));
+
+		/* if process is 32-bit, clear upper 32 bits of EA */
+		if ((regs->msr & MSR_64BIT) == 0)
+			EA &= 0xFFFFFFFF;
+
+		switch ((instword & PPC_INST_STRING_MASK)) {
+			case PPC_INST_LSWX:
+			case PPC_INST_LSWI:
+				if (get_user(val, (u8 __user *)EA))
+					return -EFAULT;
+				/* first time updating this reg,
+				 * zero it out */
+				if (pos == 0)
+					regs->gpr[rT] = 0;
+				regs->gpr[rT] |= val << shift;
+				break;
+			case PPC_INST_STSWI:
+			case PPC_INST_STSWX:
+				val = regs->gpr[rT] >> shift;
+				if (put_user(val, (u8 __user *)EA))
+					return -EFAULT;
+				break;
+		}
+		/* move EA to next address */
+		EA += 1;
+		num_bytes--;
+
+		/* manage our position within the register */
+		if (++pos == 4) {
+			pos = 0;
+			if (++rT == 32)
+				rT = 0;
+		}
+	}
+
+	return 0;
+}
+
+static int emulate_popcntb_inst(struct pt_regs *regs, u32 instword)
+{
+	u32 ra,rs;
+	unsigned long tmp;
+
+	ra = (instword >> 16) & 0x1f;
+	rs = (instword >> 21) & 0x1f;
+
+	tmp = regs->gpr[rs];
+	tmp = tmp - ((tmp >> 1) & 0x5555555555555555ULL);
+	tmp = (tmp & 0x3333333333333333ULL) + ((tmp >> 2) & 0x3333333333333333ULL);
+	tmp = (tmp + (tmp >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
+	regs->gpr[ra] = tmp;
+
+	return 0;
+}
+
+static int emulate_isel(struct pt_regs *regs, u32 instword)
+{
+	u8 rT = (instword >> 21) & 0x1f;
+	u8 rA = (instword >> 16) & 0x1f;
+	u8 rB = (instword >> 11) & 0x1f;
+	u8 BC = (instword >> 6) & 0x1f;
+	u8 bit;
+	unsigned long tmp;
+
+	tmp = (rA == 0) ? 0 : regs->gpr[rA];
+	bit = (regs->ccr >> (31 - BC)) & 0x1;
+
+	regs->gpr[rT] = bit ? tmp : regs->gpr[rB];
+
+	return 0;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static inline bool tm_abort_check(struct pt_regs *regs, int cause)
+{
+        /* If we're emulating a load/store in an active transaction, we cannot
+         * emulate it as the kernel operates in transaction suspended context.
+         * We need to abort the transaction.  This creates a persistent TM
+         * abort so tell the user what caused it with a new code.
+	 */
+	if (MSR_TM_TRANSACTIONAL(regs->msr)) {
+		tm_enable();
+		tm_abort(cause);
+		return true;
+	}
+	return false;
+}
+#else
+static inline bool tm_abort_check(struct pt_regs *regs, int reason)
+{
+	return false;
+}
+#endif
+
+static int emulate_instruction(struct pt_regs *regs)
+{
+	u32 instword;
+	u32 rd;
+
+	if (!user_mode(regs))
+		return -EINVAL;
+
+	if (get_user(instword, (u32 __user *)(regs->nip)))
+		return -EFAULT;
+
+	/* Emulate the mfspr rD, PVR. */
+	if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
+		PPC_WARN_EMULATED(mfpvr, regs);
+		rd = (instword >> 21) & 0x1f;
+		regs->gpr[rd] = mfspr(SPRN_PVR);
+		return 0;
+	}
+
+	/* Emulating the dcba insn is just a no-op.  */
+	if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
+		PPC_WARN_EMULATED(dcba, regs);
+		return 0;
+	}
+
+	/* Emulate the mcrxr insn.  */
+	if ((instword & PPC_INST_MCRXR_MASK) == PPC_INST_MCRXR) {
+		int shift = (instword >> 21) & 0x1c;
+		unsigned long msk = 0xf0000000UL >> shift;
+
+		PPC_WARN_EMULATED(mcrxr, regs);
+		regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
+		regs->xer &= ~0xf0000000UL;
+		return 0;
+	}
+
+	/* Emulate load/store string insn. */
+	if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
+		if (tm_abort_check(regs,
+				   TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT))
+			return -EINVAL;
+		PPC_WARN_EMULATED(string, regs);
+		return emulate_string_inst(regs, instword);
+	}
+
+	/* Emulate the popcntb (Population Count Bytes) instruction. */
+	if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
+		PPC_WARN_EMULATED(popcntb, regs);
+		return emulate_popcntb_inst(regs, instword);
+	}
+
+	/* Emulate isel (Integer Select) instruction */
+	if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
+		PPC_WARN_EMULATED(isel, regs);
+		return emulate_isel(regs, instword);
+	}
+
+	/* Emulate sync instruction variants */
+	if ((instword & PPC_INST_SYNC_MASK) == PPC_INST_SYNC) {
+		PPC_WARN_EMULATED(sync, regs);
+		asm volatile("sync");
+		return 0;
+	}
+
+#ifdef CONFIG_PPC64
+	/* Emulate the mfspr rD, DSCR. */
+	if ((((instword & PPC_INST_MFSPR_DSCR_USER_MASK) ==
+		PPC_INST_MFSPR_DSCR_USER) ||
+	     ((instword & PPC_INST_MFSPR_DSCR_MASK) ==
+		PPC_INST_MFSPR_DSCR)) &&
+			cpu_has_feature(CPU_FTR_DSCR)) {
+		PPC_WARN_EMULATED(mfdscr, regs);
+		rd = (instword >> 21) & 0x1f;
+		regs->gpr[rd] = mfspr(SPRN_DSCR);
+		return 0;
+	}
+	/* Emulate the mtspr DSCR, rD. */
+	if ((((instword & PPC_INST_MTSPR_DSCR_USER_MASK) ==
+		PPC_INST_MTSPR_DSCR_USER) ||
+	     ((instword & PPC_INST_MTSPR_DSCR_MASK) ==
+		PPC_INST_MTSPR_DSCR)) &&
+			cpu_has_feature(CPU_FTR_DSCR)) {
+		PPC_WARN_EMULATED(mtdscr, regs);
+		rd = (instword >> 21) & 0x1f;
+		current->thread.dscr = regs->gpr[rd];
+		current->thread.dscr_inherit = 1;
+		mtspr(SPRN_DSCR, current->thread.dscr);
+		return 0;
+	}
+#endif
+
+	return -EINVAL;
+}
+
+int is_valid_bugaddr(unsigned long addr)
+{
+	return is_kernel_addr(addr);
+}
+
+#ifdef CONFIG_MATH_EMULATION
+static int emulate_math(struct pt_regs *regs)
+{
+	int ret;
+
+	ret = do_mathemu(regs);
+	if (ret >= 0)
+		PPC_WARN_EMULATED(math, regs);
+
+	switch (ret) {
+	case 0:
+		emulate_single_step(regs);
+		return 0;
+	case 1: {
+			int code = 0;
+			code = __parse_fpscr(current->thread.fp_state.fpscr);
+			_exception(SIGFPE, regs, code, regs->nip);
+			return 0;
+		}
+	case -EFAULT:
+		_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
+		return 0;
+	}
+
+	return -1;
+}
+#else
+static inline int emulate_math(struct pt_regs *regs) { return -1; }
+#endif
+
+static void do_program_check(struct pt_regs *regs)
+{
+	unsigned int reason = get_reason(regs);
+
+	/* We can now get here via a FP Unavailable exception if the core
+	 * has no FPU, in that case the reason flags will be 0 */
+
+	if (reason & REASON_FP) {
+		/* IEEE FP exception */
+		parse_fpe(regs);
+		return;
+	}
+	if (reason & REASON_TRAP) {
+		unsigned long bugaddr;
+		/* Debugger is first in line to stop recursive faults in
+		 * rcu_lock, notify_die, or atomic_notifier_call_chain */
+		if (debugger_bpt(regs))
+			return;
+
+		if (kprobe_handler(regs))
+			return;
+
+		/* trap exception */
+		if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
+				== NOTIFY_STOP)
+			return;
+
+		bugaddr = regs->nip;
+		/*
+		 * Fixup bugaddr for BUG_ON() in real mode
+		 */
+		if (!is_kernel_addr(bugaddr) && !(regs->msr & MSR_IR))
+			bugaddr += PAGE_OFFSET;
+
+		if (!(regs->msr & MSR_PR) &&  /* not user-mode */
+		    report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
+			regs_add_return_ip(regs, 4);
+			return;
+		}
+
+		/* User mode considers other cases after enabling IRQs */
+		if (!user_mode(regs)) {
+			_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
+			return;
+		}
+	}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (reason & REASON_TM) {
+		/* This is a TM "Bad Thing Exception" program check.
+		 * This occurs when:
+		 * -  An rfid/hrfid/mtmsrd attempts to cause an illegal
+		 *    transition in TM states.
+		 * -  A trechkpt is attempted when transactional.
+		 * -  A treclaim is attempted when non transactional.
+		 * -  A tend is illegally attempted.
+		 * -  writing a TM SPR when transactional.
+		 *
+		 * If usermode caused this, it's done something illegal and
+		 * gets a SIGILL slap on the wrist.  We call it an illegal
+		 * operand to distinguish from the instruction just being bad
+		 * (e.g. executing a 'tend' on a CPU without TM!); it's an
+		 * illegal /placement/ of a valid instruction.
+		 */
+		if (user_mode(regs)) {
+			_exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
+			return;
+		} else {
+			printk(KERN_EMERG "Unexpected TM Bad Thing exception "
+			       "at %lx (msr 0x%lx) tm_scratch=%llx\n",
+			       regs->nip, regs->msr, get_paca()->tm_scratch);
+			die("Unrecoverable exception", regs, SIGABRT);
+		}
+	}
+#endif
+
+	/*
+	 * If we took the program check in the kernel skip down to sending a
+	 * SIGILL. The subsequent cases all relate to user space, such as
+	 * emulating instructions which we should only do for user space. We
+	 * also do not want to enable interrupts for kernel faults because that
+	 * might lead to further faults, and loose the context of the original
+	 * exception.
+	 */
+	if (!user_mode(regs))
+		goto sigill;
+
+	interrupt_cond_local_irq_enable(regs);
+
+	/*
+	 * (reason & REASON_TRAP) is mostly handled before enabling IRQs,
+	 * except get_user_instr() can sleep so we cannot reliably inspect the
+	 * current instruction in that context. Now that we know we are
+	 * handling a user space trap and can sleep, we can check if the trap
+	 * was a hashchk failure.
+	 */
+	if (reason & REASON_TRAP) {
+		if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) {
+			ppc_inst_t insn;
+
+			if (get_user_instr(insn, (void __user *)regs->nip)) {
+				_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
+				return;
+			}
+
+			if (ppc_inst_primary_opcode(insn) == 31 &&
+			    get_xop(ppc_inst_val(insn)) == OP_31_XOP_HASHCHK) {
+				_exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
+				return;
+			}
+		}
+
+		_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
+		return;
+	}
+
+	/* (reason & REASON_ILLEGAL) would be the obvious thing here,
+	 * but there seems to be a hardware bug on the 405GP (RevD)
+	 * that means ESR is sometimes set incorrectly - either to
+	 * ESR_DST (!?) or 0.  In the process of chasing this with the
+	 * hardware people - not sure if it can happen on any illegal
+	 * instruction or only on FP instructions, whether there is a
+	 * pattern to occurrences etc. -dgibson 31/Mar/2003
+	 */
+	if (!emulate_math(regs))
+		return;
+
+	/* Try to emulate it if we should. */
+	if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) {
+		switch (emulate_instruction(regs)) {
+		case 0:
+			regs_add_return_ip(regs, 4);
+			emulate_single_step(regs);
+			return;
+		case -EFAULT:
+			_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
+			return;
+		}
+	}
+
+sigill:
+	if (reason & REASON_PRIVILEGED)
+		_exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
+	else
+		_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+
+}
+
+DEFINE_INTERRUPT_HANDLER(program_check_exception)
+{
+	do_program_check(regs);
+}
+
+/*
+ * This occurs when running in hypervisor mode on POWER6 or later
+ * and an illegal instruction is encountered.
+ */
+DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt)
+{
+	regs_set_return_msr(regs, regs->msr | REASON_ILLEGAL);
+	do_program_check(regs);
+}
+
+DEFINE_INTERRUPT_HANDLER(alignment_exception)
+{
+	int sig, code, fixed = 0;
+	unsigned long  reason;
+
+	interrupt_cond_local_irq_enable(regs);
+
+	reason = get_reason(regs);
+	if (reason & REASON_BOUNDARY) {
+		sig = SIGBUS;
+		code = BUS_ADRALN;
+		goto bad;
+	}
+
+	if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT))
+		return;
+
+	/* we don't implement logging of alignment exceptions */
+	if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS))
+		fixed = fix_alignment(regs);
+
+	if (fixed == 1) {
+		/* skip over emulated instruction */
+		regs_add_return_ip(regs, inst_length(reason));
+		emulate_single_step(regs);
+		return;
+	}
+
+	/* Operand address was bad */
+	if (fixed == -EFAULT) {
+		sig = SIGSEGV;
+		code = SEGV_ACCERR;
+	} else {
+		sig = SIGBUS;
+		code = BUS_ADRALN;
+	}
+bad:
+	if (user_mode(regs))
+		_exception(sig, regs, code, regs->dar);
+	else
+		bad_page_fault(regs, sig);
+}
+
+DEFINE_INTERRUPT_HANDLER(stack_overflow_exception)
+{
+	die("Kernel stack overflow", regs, SIGSEGV);
+}
+
+DEFINE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception)
+{
+	printk(KERN_EMERG "Unrecoverable FP Unavailable Exception "
+			  "%lx at %lx\n", regs->trap, regs->nip);
+	die("Unrecoverable FP Unavailable Exception", regs, SIGABRT);
+}
+
+DEFINE_INTERRUPT_HANDLER(altivec_unavailable_exception)
+{
+	if (user_mode(regs)) {
+		/* A user program has executed an altivec instruction,
+		   but this kernel doesn't support altivec. */
+		_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+		return;
+	}
+
+	printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception "
+			"%lx at %lx\n", regs->trap, regs->nip);
+	die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
+}
+
+DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception)
+{
+	if (user_mode(regs)) {
+		/* A user program has executed an vsx instruction,
+		   but this kernel doesn't support vsx. */
+		_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+		return;
+	}
+
+	printk(KERN_EMERG "Unrecoverable VSX Unavailable Exception "
+			"%lx at %lx\n", regs->trap, regs->nip);
+	die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT);
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static void tm_unavailable(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (user_mode(regs)) {
+		current->thread.load_tm++;
+		regs_set_return_msr(regs, regs->msr | MSR_TM);
+		tm_enable();
+		tm_restore_sprs(&current->thread);
+		return;
+	}
+#endif
+	pr_emerg("Unrecoverable TM Unavailable Exception "
+			"%lx at %lx\n", regs->trap, regs->nip);
+	die("Unrecoverable TM Unavailable Exception", regs, SIGABRT);
+}
+
+DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception)
+{
+	static char *facility_strings[] = {
+		[FSCR_FP_LG] = "FPU",
+		[FSCR_VECVSX_LG] = "VMX/VSX",
+		[FSCR_DSCR_LG] = "DSCR",
+		[FSCR_PM_LG] = "PMU SPRs",
+		[FSCR_BHRB_LG] = "BHRB",
+		[FSCR_TM_LG] = "TM",
+		[FSCR_EBB_LG] = "EBB",
+		[FSCR_TAR_LG] = "TAR",
+		[FSCR_MSGP_LG] = "MSGP",
+		[FSCR_SCV_LG] = "SCV",
+		[FSCR_PREFIX_LG] = "PREFIX",
+	};
+	char *facility = "unknown";
+	u64 value;
+	u32 instword, rd;
+	u8 status;
+	bool hv;
+
+	hv = (TRAP(regs) == INTERRUPT_H_FAC_UNAVAIL);
+	if (hv)
+		value = mfspr(SPRN_HFSCR);
+	else
+		value = mfspr(SPRN_FSCR);
+
+	status = value >> 56;
+	if ((hv || status >= 2) &&
+	    (status < ARRAY_SIZE(facility_strings)) &&
+	    facility_strings[status])
+		facility = facility_strings[status];
+
+	/* We should not have taken this interrupt in kernel */
+	if (!user_mode(regs)) {
+		pr_emerg("Facility '%s' unavailable (%d) exception in kernel mode at %lx\n",
+			 facility, status, regs->nip);
+		die("Unexpected facility unavailable exception", regs, SIGABRT);
+	}
+
+	interrupt_cond_local_irq_enable(regs);
+
+	if (status == FSCR_DSCR_LG) {
+		/*
+		 * User is accessing the DSCR register using the problem
+		 * state only SPR number (0x03) either through a mfspr or
+		 * a mtspr instruction. If it is a write attempt through
+		 * a mtspr, then we set the inherit bit. This also allows
+		 * the user to write or read the register directly in the
+		 * future by setting via the FSCR DSCR bit. But in case it
+		 * is a read DSCR attempt through a mfspr instruction, we
+		 * just emulate the instruction instead. This code path will
+		 * always emulate all the mfspr instructions till the user
+		 * has attempted at least one mtspr instruction. This way it
+		 * preserves the same behaviour when the user is accessing
+		 * the DSCR through privilege level only SPR number (0x11)
+		 * which is emulated through illegal instruction exception.
+		 * We always leave HFSCR DSCR set.
+		 */
+		if (get_user(instword, (u32 __user *)(regs->nip))) {
+			pr_err("Failed to fetch the user instruction\n");
+			return;
+		}
+
+		/* Write into DSCR (mtspr 0x03, RS) */
+		if ((instword & PPC_INST_MTSPR_DSCR_USER_MASK)
+				== PPC_INST_MTSPR_DSCR_USER) {
+			rd = (instword >> 21) & 0x1f;
+			current->thread.dscr = regs->gpr[rd];
+			current->thread.dscr_inherit = 1;
+			current->thread.fscr |= FSCR_DSCR;
+			mtspr(SPRN_FSCR, current->thread.fscr);
+		}
+
+		/* Read from DSCR (mfspr RT, 0x03) */
+		if ((instword & PPC_INST_MFSPR_DSCR_USER_MASK)
+				== PPC_INST_MFSPR_DSCR_USER) {
+			if (emulate_instruction(regs)) {
+				pr_err("DSCR based mfspr emulation failed\n");
+				return;
+			}
+			regs_add_return_ip(regs, 4);
+			emulate_single_step(regs);
+		}
+		return;
+	}
+
+	if (status == FSCR_TM_LG) {
+		/*
+		 * If we're here then the hardware is TM aware because it
+		 * generated an exception with FSRM_TM set.
+		 *
+		 * If cpu_has_feature(CPU_FTR_TM) is false, then either firmware
+		 * told us not to do TM, or the kernel is not built with TM
+		 * support.
+		 *
+		 * If both of those things are true, then userspace can spam the
+		 * console by triggering the printk() below just by continually
+		 * doing tbegin (or any TM instruction). So in that case just
+		 * send the process a SIGILL immediately.
+		 */
+		if (!cpu_has_feature(CPU_FTR_TM))
+			goto out;
+
+		tm_unavailable(regs);
+		return;
+	}
+
+	pr_err_ratelimited("%sFacility '%s' unavailable (%d), exception at 0x%lx, MSR=%lx\n",
+		hv ? "Hypervisor " : "", facility, status, regs->nip, regs->msr);
+
+out:
+	_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+}
+#endif
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+
+DEFINE_INTERRUPT_HANDLER(fp_unavailable_tm)
+{
+	/* Note:  This does not handle any kind of FP laziness. */
+
+	TM_DEBUG("FP Unavailable trap whilst transactional at 0x%lx, MSR=%lx\n",
+		 regs->nip, regs->msr);
+
+        /* We can only have got here if the task started using FP after
+         * beginning the transaction.  So, the transactional regs are just a
+         * copy of the checkpointed ones.  But, we still need to recheckpoint
+         * as we're enabling FP for the process; it will return, abort the
+         * transaction, and probably retry but now with FP enabled.  So the
+         * checkpointed FP registers need to be loaded.
+	 */
+	tm_reclaim_current(TM_CAUSE_FAC_UNAV);
+
+	/*
+	 * Reclaim initially saved out bogus (lazy) FPRs to ckfp_state, and
+	 * then it was overwrite by the thr->fp_state by tm_reclaim_thread().
+	 *
+	 * At this point, ck{fp,vr}_state contains the exact values we want to
+	 * recheckpoint.
+	 */
+
+	/* Enable FP for the task: */
+	current->thread.load_fp = 1;
+
+	/*
+	 * Recheckpoint all the checkpointed ckpt, ck{fp, vr}_state registers.
+	 */
+	tm_recheckpoint(&current->thread);
+}
+
+DEFINE_INTERRUPT_HANDLER(altivec_unavailable_tm)
+{
+	/* See the comments in fp_unavailable_tm().  This function operates
+	 * the same way.
+	 */
+
+	TM_DEBUG("Vector Unavailable trap whilst transactional at 0x%lx,"
+		 "MSR=%lx\n",
+		 regs->nip, regs->msr);
+	tm_reclaim_current(TM_CAUSE_FAC_UNAV);
+	current->thread.load_vec = 1;
+	tm_recheckpoint(&current->thread);
+	current->thread.used_vr = 1;
+}
+
+DEFINE_INTERRUPT_HANDLER(vsx_unavailable_tm)
+{
+	/* See the comments in fp_unavailable_tm().  This works similarly,
+	 * though we're loading both FP and VEC registers in here.
+	 *
+	 * If FP isn't in use, load FP regs.  If VEC isn't in use, load VEC
+	 * regs.  Either way, set MSR_VSX.
+	 */
+
+	TM_DEBUG("VSX Unavailable trap whilst transactional at 0x%lx,"
+		 "MSR=%lx\n",
+		 regs->nip, regs->msr);
+
+	current->thread.used_vsr = 1;
+
+	/* This reclaims FP and/or VR regs if they're already enabled */
+	tm_reclaim_current(TM_CAUSE_FAC_UNAV);
+
+	current->thread.load_vec = 1;
+	current->thread.load_fp = 1;
+
+	tm_recheckpoint(&current->thread);
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+#ifdef CONFIG_PPC64
+DECLARE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi);
+DEFINE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi)
+{
+	__this_cpu_inc(irq_stat.pmu_irqs);
+
+	perf_irq(regs);
+
+	return 0;
+}
+#endif
+
+DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async);
+DEFINE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async)
+{
+	__this_cpu_inc(irq_stat.pmu_irqs);
+
+	perf_irq(regs);
+}
+
+DEFINE_INTERRUPT_HANDLER_RAW(performance_monitor_exception)
+{
+	/*
+	 * On 64-bit, if perf interrupts hit in a local_irq_disable
+	 * (soft-masked) region, we consider them as NMIs. This is required to
+	 * prevent hash faults on user addresses when reading callchains (and
+	 * looks better from an irq tracing perspective).
+	 */
+	if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs)))
+		performance_monitor_exception_nmi(regs);
+	else
+		performance_monitor_exception_async(regs);
+
+	return 0;
+}
+
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
+{
+	int changed = 0;
+	/*
+	 * Determine the cause of the debug event, clear the
+	 * event flags and send a trap to the handler. Torez
+	 */
+	if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
+		dbcr_dac(current) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+		current->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
+#endif
+		do_send_trap(regs, mfspr(SPRN_DAC1), debug_status,
+			     5);
+		changed |= 0x01;
+	}  else if (debug_status & (DBSR_DAC2R | DBSR_DAC2W)) {
+		dbcr_dac(current) &= ~(DBCR_DAC2R | DBCR_DAC2W);
+		do_send_trap(regs, mfspr(SPRN_DAC2), debug_status,
+			     6);
+		changed |= 0x01;
+	}  else if (debug_status & DBSR_IAC1) {
+		current->thread.debug.dbcr0 &= ~DBCR0_IAC1;
+		dbcr_iac_range(current) &= ~DBCR_IAC12MODE;
+		do_send_trap(regs, mfspr(SPRN_IAC1), debug_status,
+			     1);
+		changed |= 0x01;
+	}  else if (debug_status & DBSR_IAC2) {
+		current->thread.debug.dbcr0 &= ~DBCR0_IAC2;
+		do_send_trap(regs, mfspr(SPRN_IAC2), debug_status,
+			     2);
+		changed |= 0x01;
+	}  else if (debug_status & DBSR_IAC3) {
+		current->thread.debug.dbcr0 &= ~DBCR0_IAC3;
+		dbcr_iac_range(current) &= ~DBCR_IAC34MODE;
+		do_send_trap(regs, mfspr(SPRN_IAC3), debug_status,
+			     3);
+		changed |= 0x01;
+	}  else if (debug_status & DBSR_IAC4) {
+		current->thread.debug.dbcr0 &= ~DBCR0_IAC4;
+		do_send_trap(regs, mfspr(SPRN_IAC4), debug_status,
+			     4);
+		changed |= 0x01;
+	}
+	/*
+	 * At the point this routine was called, the MSR(DE) was turned off.
+	 * Check all other debug flags and see if that bit needs to be turned
+	 * back on or not.
+	 */
+	if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
+			       current->thread.debug.dbcr1))
+		regs_set_return_msr(regs, regs->msr | MSR_DE);
+	else
+		/* Make sure the IDM flag is off */
+		current->thread.debug.dbcr0 &= ~DBCR0_IDM;
+
+	if (changed & 0x01)
+		mtspr(SPRN_DBCR0, current->thread.debug.dbcr0);
+}
+
+DEFINE_INTERRUPT_HANDLER(DebugException)
+{
+	unsigned long debug_status = regs->dsisr;
+
+	current->thread.debug.dbsr = debug_status;
+
+	/* Hack alert: On BookE, Branch Taken stops on the branch itself, while
+	 * on server, it stops on the target of the branch. In order to simulate
+	 * the server behaviour, we thus restart right away with a single step
+	 * instead of stopping here when hitting a BT
+	 */
+	if (debug_status & DBSR_BT) {
+		regs_set_return_msr(regs, regs->msr & ~MSR_DE);
+
+		/* Disable BT */
+		mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_BT);
+		/* Clear the BT event */
+		mtspr(SPRN_DBSR, DBSR_BT);
+
+		/* Do the single step trick only when coming from userspace */
+		if (user_mode(regs)) {
+			current->thread.debug.dbcr0 &= ~DBCR0_BT;
+			current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+			regs_set_return_msr(regs, regs->msr | MSR_DE);
+			return;
+		}
+
+		if (kprobe_post_handler(regs))
+			return;
+
+		if (notify_die(DIE_SSTEP, "block_step", regs, 5,
+			       5, SIGTRAP) == NOTIFY_STOP) {
+			return;
+		}
+		if (debugger_sstep(regs))
+			return;
+	} else if (debug_status & DBSR_IC) { 	/* Instruction complete */
+		regs_set_return_msr(regs, regs->msr & ~MSR_DE);
+
+		/* Disable instruction completion */
+		mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC);
+		/* Clear the instruction completion event */
+		mtspr(SPRN_DBSR, DBSR_IC);
+
+		if (kprobe_post_handler(regs))
+			return;
+
+		if (notify_die(DIE_SSTEP, "single_step", regs, 5,
+			       5, SIGTRAP) == NOTIFY_STOP) {
+			return;
+		}
+
+		if (debugger_sstep(regs))
+			return;
+
+		if (user_mode(regs)) {
+			current->thread.debug.dbcr0 &= ~DBCR0_IC;
+			if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
+					       current->thread.debug.dbcr1))
+				regs_set_return_msr(regs, regs->msr | MSR_DE);
+			else
+				/* Make sure the IDM bit is off */
+				current->thread.debug.dbcr0 &= ~DBCR0_IDM;
+		}
+
+		_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
+	} else
+		handle_debug(regs, debug_status);
+}
+#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
+
+#ifdef CONFIG_ALTIVEC
+DEFINE_INTERRUPT_HANDLER(altivec_assist_exception)
+{
+	int err;
+
+	if (!user_mode(regs)) {
+		printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode"
+		       " at %lx\n", regs->nip);
+		die("Kernel VMX/Altivec assist exception", regs, SIGILL);
+	}
+
+	flush_altivec_to_thread(current);
+
+	PPC_WARN_EMULATED(altivec, regs);
+	err = emulate_altivec(regs);
+	if (err == 0) {
+		regs_add_return_ip(regs, 4); /* skip emulated instruction */
+		emulate_single_step(regs);
+		return;
+	}
+
+	if (err == -EFAULT) {
+		/* got an error reading the instruction */
+		_exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
+	} else {
+		/* didn't recognize the instruction */
+		/* XXX quick hack for now: set the non-Java bit in the VSCR */
+		printk_ratelimited(KERN_ERR "Unrecognized altivec instruction "
+				   "in %s at %lx\n", current->comm, regs->nip);
+		current->thread.vr_state.vscr.u[3] |= 0x10000;
+	}
+}
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_PPC_85xx
+DEFINE_INTERRUPT_HANDLER(CacheLockingException)
+{
+	unsigned long error_code = regs->dsisr;
+
+	/* We treat cache locking instructions from the user
+	 * as priv ops, in the future we could try to do
+	 * something smarter
+	 */
+	if (error_code & (ESR_DLK|ESR_ILK))
+		_exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
+	return;
+}
+#endif /* CONFIG_PPC_85xx */
+
+#ifdef CONFIG_SPE
+DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException)
+{
+	unsigned long spefscr;
+	int fpexc_mode;
+	int code = FPE_FLTUNK;
+	int err;
+
+	interrupt_cond_local_irq_enable(regs);
+
+	flush_spe_to_thread(current);
+
+	spefscr = current->thread.spefscr;
+	fpexc_mode = current->thread.fpexc_mode;
+
+	if ((spefscr & SPEFSCR_FOVF) && (fpexc_mode & PR_FP_EXC_OVF)) {
+		code = FPE_FLTOVF;
+	}
+	else if ((spefscr & SPEFSCR_FUNF) && (fpexc_mode & PR_FP_EXC_UND)) {
+		code = FPE_FLTUND;
+	}
+	else if ((spefscr & SPEFSCR_FDBZ) && (fpexc_mode & PR_FP_EXC_DIV))
+		code = FPE_FLTDIV;
+	else if ((spefscr & SPEFSCR_FINV) && (fpexc_mode & PR_FP_EXC_INV)) {
+		code = FPE_FLTINV;
+	}
+	else if ((spefscr & (SPEFSCR_FG | SPEFSCR_FX)) && (fpexc_mode & PR_FP_EXC_RES))
+		code = FPE_FLTRES;
+
+	err = do_spe_mathemu(regs);
+	if (err == 0) {
+		regs_add_return_ip(regs, 4); /* skip emulated instruction */
+		emulate_single_step(regs);
+		return;
+	}
+
+	if (err == -EFAULT) {
+		/* got an error reading the instruction */
+		_exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
+	} else if (err == -EINVAL) {
+		/* didn't recognize the instruction */
+		printk(KERN_ERR "unrecognized spe instruction "
+		       "in %s at %lx\n", current->comm, regs->nip);
+	} else {
+		_exception(SIGFPE, regs, code, regs->nip);
+	}
+
+	return;
+}
+
+DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException)
+{
+	int err;
+
+	interrupt_cond_local_irq_enable(regs);
+
+	preempt_disable();
+	if (regs->msr & MSR_SPE)
+		giveup_spe(current);
+	preempt_enable();
+
+	regs_add_return_ip(regs, -4);
+	err = speround_handler(regs);
+	if (err == 0) {
+		regs_add_return_ip(regs, 4); /* skip emulated instruction */
+		emulate_single_step(regs);
+		return;
+	}
+
+	if (err == -EFAULT) {
+		/* got an error reading the instruction */
+		_exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
+	} else if (err == -EINVAL) {
+		/* didn't recognize the instruction */
+		printk(KERN_ERR "unrecognized spe instruction "
+		       "in %s at %lx\n", current->comm, regs->nip);
+	} else {
+		_exception(SIGFPE, regs, FPE_FLTUNK, regs->nip);
+		return;
+	}
+}
+#endif
+
+/*
+ * We enter here if we get an unrecoverable exception, that is, one
+ * that happened at a point where the RI (recoverable interrupt) bit
+ * in the MSR is 0.  This indicates that SRR0/1 are live, and that
+ * we therefore lost state by taking this exception.
+ */
+void __noreturn unrecoverable_exception(struct pt_regs *regs)
+{
+	pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
+		 regs->trap, regs->nip, regs->msr);
+	die("Unrecoverable exception", regs, SIGABRT);
+	/* die() should not return */
+	for (;;)
+		;
+}
+
+#if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x)
+DEFINE_INTERRUPT_HANDLER_NMI(WatchdogException)
+{
+	printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n");
+	mtspr(SPRN_TCR, mfspr(SPRN_TCR) & ~TCR_WIE);
+	return 0;
+}
+#endif
+
+/*
+ * We enter here if we discover during exception entry that we are
+ * running in supervisor mode with a userspace value in the stack pointer.
+ */
+DEFINE_INTERRUPT_HANDLER(kernel_bad_stack)
+{
+	printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n",
+	       regs->gpr[1], regs->nip);
+	die("Bad kernel stack pointer", regs, SIGABRT);
+}
+
+#ifdef CONFIG_PPC_EMULATED_STATS
+
+#define WARN_EMULATED_SETUP(type)	.type = { .name = #type }
+
+struct ppc_emulated ppc_emulated = {
+#ifdef CONFIG_ALTIVEC
+	WARN_EMULATED_SETUP(altivec),
+#endif
+	WARN_EMULATED_SETUP(dcba),
+	WARN_EMULATED_SETUP(dcbz),
+	WARN_EMULATED_SETUP(fp_pair),
+	WARN_EMULATED_SETUP(isel),
+	WARN_EMULATED_SETUP(mcrxr),
+	WARN_EMULATED_SETUP(mfpvr),
+	WARN_EMULATED_SETUP(multiple),
+	WARN_EMULATED_SETUP(popcntb),
+	WARN_EMULATED_SETUP(spe),
+	WARN_EMULATED_SETUP(string),
+	WARN_EMULATED_SETUP(sync),
+	WARN_EMULATED_SETUP(unaligned),
+#ifdef CONFIG_MATH_EMULATION
+	WARN_EMULATED_SETUP(math),
+#endif
+#ifdef CONFIG_VSX
+	WARN_EMULATED_SETUP(vsx),
+#endif
+#ifdef CONFIG_PPC64
+	WARN_EMULATED_SETUP(mfdscr),
+	WARN_EMULATED_SETUP(mtdscr),
+	WARN_EMULATED_SETUP(lq_stq),
+	WARN_EMULATED_SETUP(lxvw4x),
+	WARN_EMULATED_SETUP(lxvh8x),
+	WARN_EMULATED_SETUP(lxvd2x),
+	WARN_EMULATED_SETUP(lxvb16x),
+#endif
+};
+
+u32 ppc_warn_emulated;
+
+void ppc_warn_emulated_print(const char *type)
+{
+	pr_warn_ratelimited("%s used emulated %s instruction\n", current->comm,
+			    type);
+}
+
+static int __init ppc_warn_emulated_init(void)
+{
+	struct dentry *dir;
+	unsigned int i;
+	struct ppc_emulated_entry *entries = (void *)&ppc_emulated;
+
+	dir = debugfs_create_dir("emulated_instructions",
+				 arch_debugfs_dir);
+
+	debugfs_create_u32("do_warn", 0644, dir, &ppc_warn_emulated);
+
+	for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++)
+		debugfs_create_u32(entries[i].name, 0644, dir,
+				   (u32 *)&entries[i].val.counter);
+
+	return 0;
+}
+
+device_initcall(ppc_warn_emulated_init);
+
+#endif /* CONFIG_PPC_EMULATED_STATS */
diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S
new file mode 100644
index 0000000000..80a1f9a430
--- /dev/null
+++ b/arch/powerpc/kernel/ucall.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Generic code to perform an ultravisor call.
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+
+_GLOBAL(ucall_norets)
+EXPORT_SYMBOL_GPL(ucall_norets)
+	sc	2	/* Invoke the ultravisor */
+	blr		/* Return r3 = status */
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
new file mode 100644
index 0000000000..92b3fc258d
--- /dev/null
+++ b/arch/powerpc/kernel/udbg.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * polling mode stateless debugging stuff, originally for NS16550 Serial Ports
+ *
+ * c 2001 PPC 64 Team, IBM Corp
+ */
+
+#include <linux/stdarg.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/console.h>
+#include <linux/init.h>
+#include <asm/processor.h>
+#include <asm/udbg.h>
+
+void (*udbg_putc)(char c);
+void (*udbg_flush)(void);
+int (*udbg_getc)(void);
+int (*udbg_getc_poll)(void);
+
+/*
+ * Early debugging facilities. You can enable _one_ of these via .config,
+ * if you do so your kernel _will not boot_ on anything else. Be careful.
+ */
+void __init udbg_early_init(void)
+{
+#if defined(CONFIG_PPC_EARLY_DEBUG_LPAR)
+	/* For LPAR machines that have an HVC console on vterm 0 */
+	udbg_init_debug_lpar();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_LPAR_HVSI)
+	/* For LPAR machines that have an HVSI console on vterm 0 */
+	udbg_init_debug_lpar_hvsi();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_G5)
+	/* For use on Apple G5 machines */
+	udbg_init_pmac_realmode();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_RTAS_PANEL)
+	/* RTAS panel debug */
+	udbg_init_rtas_panel();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_RTAS_CONSOLE)
+	/* RTAS console debug */
+	udbg_init_rtas_console();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_MAPLE)
+	/* Maple real mode debug */
+	udbg_init_maple_realmode();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE)
+	udbg_init_pas_realmode();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_BOOTX)
+	udbg_init_btext();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_44x)
+	/* PPC44x debug */
+	udbg_init_44x_as1();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_40x)
+	/* PPC40x debug */
+	udbg_init_40x_realmode();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_CPM)
+	udbg_init_cpm();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO)
+	udbg_init_usbgecko();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_MEMCONS)
+	/* In memory console */
+	udbg_init_memcons();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_EHV_BC)
+	udbg_init_ehv_bc();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_PS3GELIC)
+	udbg_init_ps3gelic();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_OPAL_RAW)
+	udbg_init_debug_opal_raw();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_OPAL_HVSI)
+	udbg_init_debug_opal_hvsi();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_16550)
+	udbg_init_debug_16550();
+#endif
+
+#ifdef CONFIG_PPC_EARLY_DEBUG
+	console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
+
+	register_early_udbg_console();
+#endif
+}
+
+/* udbg library, used by xmon et al */
+void udbg_puts(const char *s)
+{
+	if (udbg_putc) {
+		char c;
+
+		if (s && *s != '\0') {
+			while ((c = *s++) != '\0')
+				udbg_putc(c);
+		}
+
+		if (udbg_flush)
+			udbg_flush();
+	}
+#if 0
+	else {
+		printk("%s", s);
+	}
+#endif
+}
+
+int udbg_write(const char *s, int n)
+{
+	int remain = n;
+	char c;
+
+	if (!udbg_putc)
+		return 0;
+
+	if (s && *s != '\0') {
+		while (((c = *s++) != '\0') && (remain-- > 0)) {
+			udbg_putc(c);
+		}
+	}
+
+	if (udbg_flush)
+		udbg_flush();
+
+	return n - remain;
+}
+
+#define UDBG_BUFSIZE 256
+void udbg_printf(const char *fmt, ...)
+{
+	if (udbg_putc) {
+		char buf[UDBG_BUFSIZE];
+		va_list args;
+
+		va_start(args, fmt);
+		vsnprintf(buf, UDBG_BUFSIZE, fmt, args);
+		udbg_puts(buf);
+		va_end(args);
+	}
+}
+
+void __init udbg_progress(char *s, unsigned short hex)
+{
+	udbg_puts(s);
+	udbg_puts("\n");
+}
+
+/*
+ * Early boot console based on udbg
+ */
+static void udbg_console_write(struct console *con, const char *s,
+		unsigned int n)
+{
+	udbg_write(s, n);
+}
+
+static struct console udbg_console = {
+	.name	= "udbg",
+	.write	= udbg_console_write,
+	.flags	= CON_PRINTBUFFER | CON_ENABLED | CON_BOOT | CON_ANYTIME,
+	.index	= 0,
+};
+
+/*
+ * Called by setup_system after ppc_md->probe and ppc_md->early_init.
+ * Call it again after setting udbg_putc in ppc_md->setup_arch.
+ */
+void __init register_early_udbg_console(void)
+{
+	if (early_console)
+		return;
+
+	if (!udbg_putc)
+		return;
+
+	if (strstr(boot_command_line, "udbg-immortal")) {
+		printk(KERN_INFO "early console immortal !\n");
+		udbg_console.flags &= ~CON_BOOT;
+	}
+	early_console = &udbg_console;
+	register_console(&udbg_console);
+}
+
+#if 0   /* if you want to use this as a regular output console */
+console_initcall(register_udbg_console);
+#endif
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
new file mode 100644
index 0000000000..74ddf836f7
--- /dev/null
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * udbg for NS16550 compatible serial ports
+ *
+ * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
+ */
+#include <linux/types.h>
+#include <asm/udbg.h>
+#include <asm/io.h>
+#include <asm/reg_a2.h>
+#include <asm/early_ioremap.h>
+
+extern u8 real_readb(volatile u8 __iomem  *addr);
+extern void real_writeb(u8 data, volatile u8 __iomem *addr);
+extern u8 real_205_readb(volatile u8 __iomem  *addr);
+extern void real_205_writeb(u8 data, volatile u8 __iomem *addr);
+
+#define UART_RBR	0
+#define UART_IER	1
+#define UART_FCR	2
+#define UART_LCR	3
+#define UART_MCR	4
+#define UART_LSR	5
+#define UART_MSR	6
+#define UART_SCR	7
+#define UART_THR	UART_RBR
+#define UART_IIR	UART_FCR
+#define UART_DLL	UART_RBR
+#define UART_DLM	UART_IER
+#define UART_DLAB	UART_LCR
+
+#define LSR_DR   0x01  /* Data ready */
+#define LSR_OE   0x02  /* Overrun */
+#define LSR_PE   0x04  /* Parity error */
+#define LSR_FE   0x08  /* Framing error */
+#define LSR_BI   0x10  /* Break */
+#define LSR_THRE 0x20  /* Xmit holding register empty */
+#define LSR_TEMT 0x40  /* Xmitter empty */
+#define LSR_ERR  0x80  /* Error */
+
+#define LCR_DLAB 0x80
+
+static u8 (*udbg_uart_in)(unsigned int reg);
+static void (*udbg_uart_out)(unsigned int reg, u8 data);
+
+static void udbg_uart_flush(void)
+{
+	if (!udbg_uart_in)
+		return;
+
+	/* wait for idle */
+	while ((udbg_uart_in(UART_LSR) & LSR_THRE) == 0)
+		cpu_relax();
+}
+
+static void udbg_uart_putc(char c)
+{
+	if (!udbg_uart_out)
+		return;
+
+	if (c == '\n')
+		udbg_uart_putc('\r');
+	udbg_uart_flush();
+	udbg_uart_out(UART_THR, c);
+}
+
+static int udbg_uart_getc_poll(void)
+{
+	if (!udbg_uart_in)
+		return -1;
+
+	if (!(udbg_uart_in(UART_LSR) & LSR_DR))
+		return udbg_uart_in(UART_RBR);
+
+	return -1;
+}
+
+static int udbg_uart_getc(void)
+{
+	if (!udbg_uart_in)
+		return -1;
+	/* wait for char */
+	while (!(udbg_uart_in(UART_LSR) & LSR_DR))
+		cpu_relax();
+	return udbg_uart_in(UART_RBR);
+}
+
+static void __init udbg_use_uart(void)
+{
+	udbg_putc = udbg_uart_putc;
+	udbg_flush = udbg_uart_flush;
+	udbg_getc = udbg_uart_getc;
+	udbg_getc_poll = udbg_uart_getc_poll;
+}
+
+void __init udbg_uart_setup(unsigned int speed, unsigned int clock)
+{
+	unsigned int dll, base_bauds;
+
+	if (!udbg_uart_out)
+		return;
+
+	if (clock == 0)
+		clock = 1843200;
+	if (speed == 0)
+		speed = 9600;
+
+	base_bauds = clock / 16;
+	dll = base_bauds / speed;
+
+	udbg_uart_out(UART_LCR, 0x00);
+	udbg_uart_out(UART_IER, 0xff);
+	udbg_uart_out(UART_IER, 0x00);
+	udbg_uart_out(UART_LCR, LCR_DLAB);
+	udbg_uart_out(UART_DLL, dll & 0xff);
+	udbg_uart_out(UART_DLM, dll >> 8);
+	/* 8 data, 1 stop, no parity */
+	udbg_uart_out(UART_LCR, 0x3);
+	/* RTS/DTR */
+	udbg_uart_out(UART_MCR, 0x3);
+	/* Clear & enable FIFOs */
+	udbg_uart_out(UART_FCR, 0x7);
+}
+
+unsigned int __init udbg_probe_uart_speed(unsigned int clock)
+{
+	unsigned int dll, dlm, divisor, prescaler, speed;
+	u8 old_lcr;
+
+	old_lcr = udbg_uart_in(UART_LCR);
+
+	/* select divisor latch registers.  */
+	udbg_uart_out(UART_LCR, old_lcr | LCR_DLAB);
+
+	/* now, read the divisor */
+	dll = udbg_uart_in(UART_DLL);
+	dlm = udbg_uart_in(UART_DLM);
+	divisor = dlm << 8 | dll;
+
+	/* check prescaling */
+	if (udbg_uart_in(UART_MCR) & 0x80)
+		prescaler = 4;
+	else
+		prescaler = 1;
+
+	/* restore the LCR */
+	udbg_uart_out(UART_LCR, old_lcr);
+
+	/* calculate speed */
+	speed = (clock / prescaler) / (divisor * 16);
+
+	/* sanity check */
+	if (speed > (clock / 16))
+		speed = 9600;
+
+	return speed;
+}
+
+static union {
+	unsigned char __iomem *mmio_base;
+	unsigned long pio_base;
+} udbg_uart;
+
+static unsigned int udbg_uart_stride = 1;
+
+static u8 udbg_uart_in_pio(unsigned int reg)
+{
+	return inb(udbg_uart.pio_base + (reg * udbg_uart_stride));
+}
+
+static void udbg_uart_out_pio(unsigned int reg, u8 data)
+{
+	outb(data, udbg_uart.pio_base + (reg * udbg_uart_stride));
+}
+
+void __init udbg_uart_init_pio(unsigned long port, unsigned int stride)
+{
+	if (!port)
+		return;
+	udbg_uart.pio_base = port;
+	udbg_uart_stride = stride;
+	udbg_uart_in = udbg_uart_in_pio;
+	udbg_uart_out = udbg_uart_out_pio;
+	udbg_use_uart();
+}
+
+static u8 udbg_uart_in_mmio(unsigned int reg)
+{
+	return in_8(udbg_uart.mmio_base + (reg * udbg_uart_stride));
+}
+
+static void udbg_uart_out_mmio(unsigned int reg, u8 data)
+{
+	out_8(udbg_uart.mmio_base + (reg * udbg_uart_stride), data);
+}
+
+
+void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride)
+{
+	if (!addr)
+		return;
+	udbg_uart.mmio_base = addr;
+	udbg_uart_stride = stride;
+	udbg_uart_in = udbg_uart_in_mmio;
+	udbg_uart_out = udbg_uart_out_mmio;
+	udbg_use_uart();
+}
+
+#ifdef CONFIG_PPC_MAPLE
+
+#define UDBG_UART_MAPLE_ADDR	((void __iomem *)0xf40003f8)
+
+static u8 udbg_uart_in_maple(unsigned int reg)
+{
+	return real_readb(UDBG_UART_MAPLE_ADDR + reg);
+}
+
+static void udbg_uart_out_maple(unsigned int reg, u8 val)
+{
+	real_writeb(val, UDBG_UART_MAPLE_ADDR + reg);
+}
+
+void __init udbg_init_maple_realmode(void)
+{
+	udbg_uart_in = udbg_uart_in_maple;
+	udbg_uart_out = udbg_uart_out_maple;
+	udbg_use_uart();
+}
+
+#endif /* CONFIG_PPC_MAPLE */
+
+#ifdef CONFIG_PPC_PASEMI
+
+#define UDBG_UART_PAS_ADDR	((void __iomem *)0xfcff03f8UL)
+
+static u8 udbg_uart_in_pas(unsigned int reg)
+{
+	return real_205_readb(UDBG_UART_PAS_ADDR + reg);
+}
+
+static void udbg_uart_out_pas(unsigned int reg, u8 val)
+{
+	real_205_writeb(val, UDBG_UART_PAS_ADDR + reg);
+}
+
+void __init udbg_init_pas_realmode(void)
+{
+	udbg_uart_in = udbg_uart_in_pas;
+	udbg_uart_out = udbg_uart_out_pas;
+	udbg_use_uart();
+}
+
+#endif /* CONFIG_PPC_PASEMI */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_44x
+
+#include <platforms/44x/44x.h>
+
+static u8 udbg_uart_in_44x_as1(unsigned int reg)
+{
+	return as1_readb((void __iomem *)PPC44x_EARLY_DEBUG_VIRTADDR + reg);
+}
+
+static void udbg_uart_out_44x_as1(unsigned int reg, u8 val)
+{
+	as1_writeb(val, (void __iomem *)PPC44x_EARLY_DEBUG_VIRTADDR + reg);
+}
+
+void __init udbg_init_44x_as1(void)
+{
+	udbg_uart_in = udbg_uart_in_44x_as1;
+	udbg_uart_out = udbg_uart_out_44x_as1;
+	udbg_use_uart();
+}
+
+#endif /* CONFIG_PPC_EARLY_DEBUG_44x */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_40x
+
+static u8 udbg_uart_in_40x(unsigned int reg)
+{
+	return real_readb((void __iomem *)CONFIG_PPC_EARLY_DEBUG_40x_PHYSADDR
+			  + reg);
+}
+
+static void udbg_uart_out_40x(unsigned int reg, u8 val)
+{
+	real_writeb(val, (void __iomem *)CONFIG_PPC_EARLY_DEBUG_40x_PHYSADDR
+		    + reg);
+}
+
+void __init udbg_init_40x_realmode(void)
+{
+	udbg_uart_in = udbg_uart_in_40x;
+	udbg_uart_out = udbg_uart_out_40x;
+	udbg_use_uart();
+}
+
+#endif /* CONFIG_PPC_EARLY_DEBUG_40x */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_16550
+
+static void __iomem *udbg_uart_early_addr;
+
+void __init udbg_init_debug_16550(void)
+{
+	udbg_uart_early_addr = early_ioremap(CONFIG_PPC_EARLY_DEBUG_16550_PHYSADDR, 0x1000);
+	udbg_uart_init_mmio(udbg_uart_early_addr, CONFIG_PPC_EARLY_DEBUG_16550_STRIDE);
+}
+
+static int __init udbg_init_debug_16550_ioremap(void)
+{
+	void __iomem *addr;
+
+	if (!udbg_uart_early_addr)
+		return 0;
+
+	addr = ioremap(CONFIG_PPC_EARLY_DEBUG_16550_PHYSADDR, 0x1000);
+	if (WARN_ON(!addr))
+		return -ENOMEM;
+
+	udbg_uart_init_mmio(addr, CONFIG_PPC_EARLY_DEBUG_16550_STRIDE);
+	early_iounmap(udbg_uart_early_addr, 0x1000);
+	udbg_uart_early_addr = NULL;
+
+	return 0;
+}
+
+early_initcall(udbg_init_debug_16550_ioremap);
+
+#endif /* CONFIG_PPC_EARLY_DEBUG_16550 */
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
new file mode 100644
index 0000000000..95a41ae9df
--- /dev/null
+++ b/arch/powerpc/kernel/uprobes.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * User-space Probes (UProbes) for powerpc
+ *
+ * Copyright IBM Corporation, 2007-2012
+ *
+ * Adapted from the x86 port by Ananth N Mavinakayanahalli <ananth@in.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/uprobes.h>
+#include <linux/uaccess.h>
+#include <linux/kdebug.h>
+
+#include <asm/sstep.h>
+#include <asm/inst.h>
+
+#define UPROBE_TRAP_NR	UINT_MAX
+
+/**
+ * is_trap_insn - check if the instruction is a trap variant
+ * @insn: instruction to be checked.
+ * Returns true if @insn is a trap variant.
+ */
+bool is_trap_insn(uprobe_opcode_t *insn)
+{
+	return (is_trap(*insn));
+}
+
+/**
+ * arch_uprobe_analyze_insn
+ * @mm: the probed address space.
+ * @arch_uprobe: the probepoint information.
+ * @addr: vaddr to probe.
+ * Return 0 on success or a -ve number on error.
+ */
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe,
+		struct mm_struct *mm, unsigned long addr)
+{
+	if (addr & 0x03)
+		return -EINVAL;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+	    ppc_inst_prefixed(ppc_inst_read(auprobe->insn)) &&
+	    (addr & 0x3f) == 60) {
+		pr_info_ratelimited("Cannot register a uprobe on 64 byte unaligned prefixed instruction\n");
+		return -EINVAL;
+	}
+
+	if (!can_single_step(ppc_inst_val(ppc_inst_read(auprobe->insn)))) {
+		pr_info_ratelimited("Cannot register a uprobe on instructions that can't be single stepped\n");
+		return -ENOTSUPP;
+	}
+
+	return 0;
+}
+
+/*
+ * arch_uprobe_pre_xol - prepare to execute out of line.
+ * @auprobe: the probepoint information.
+ * @regs: reflects the saved user state of current task.
+ */
+int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	struct arch_uprobe_task *autask = &current->utask->autask;
+
+	autask->saved_trap_nr = current->thread.trap_nr;
+	current->thread.trap_nr = UPROBE_TRAP_NR;
+	regs_set_return_ip(regs, current->utask->xol_vaddr);
+
+	user_enable_single_step(current);
+	return 0;
+}
+
+/**
+ * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs
+ * @regs: Reflects the saved state of the task after it has hit a breakpoint
+ * instruction.
+ * Return the address of the breakpoint instruction.
+ */
+unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
+{
+	return instruction_pointer(regs);
+}
+
+/*
+ * If xol insn itself traps and generates a signal (SIGILL/SIGSEGV/etc),
+ * then detect the case where a singlestepped instruction jumps back to its
+ * own address. It is assumed that anything like do_page_fault/do_trap/etc
+ * sets thread.trap_nr != UINT_MAX.
+ *
+ * arch_uprobe_pre_xol/arch_uprobe_post_xol save/restore thread.trap_nr,
+ * arch_uprobe_xol_was_trapped() simply checks that ->trap_nr is not equal to
+ * UPROBE_TRAP_NR == UINT_MAX set by arch_uprobe_pre_xol().
+ */
+bool arch_uprobe_xol_was_trapped(struct task_struct *t)
+{
+	if (t->thread.trap_nr != UPROBE_TRAP_NR)
+		return true;
+
+	return false;
+}
+
+/*
+ * Called after single-stepping. To avoid the SMP problems that can
+ * occur when we temporarily put back the original opcode to
+ * single-step, we single-stepped a copy of the instruction.
+ *
+ * This function prepares to resume execution after the single-step.
+ */
+int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+
+	WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);
+
+	current->thread.trap_nr = utask->autask.saved_trap_nr;
+
+	/*
+	 * On powerpc, except for loads and stores, most instructions
+	 * including ones that alter code flow (branches, calls, returns)
+	 * are emulated in the kernel. We get here only if the emulation
+	 * support doesn't exist and have to fix-up the next instruction
+	 * to be executed.
+	 */
+	regs_set_return_ip(regs, (unsigned long)ppc_inst_next((void *)utask->vaddr, auprobe->insn));
+
+	user_disable_single_step(current);
+	return 0;
+}
+
+/* callback routine for handling exceptions. */
+int arch_uprobe_exception_notify(struct notifier_block *self,
+				unsigned long val, void *data)
+{
+	struct die_args *args = data;
+	struct pt_regs *regs = args->regs;
+
+	/* regs == NULL is a kernel bug */
+	if (WARN_ON(!regs))
+		return NOTIFY_DONE;
+
+	/* We are only interested in userspace traps */
+	if (!user_mode(regs))
+		return NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_BPT:
+		if (uprobe_pre_sstep_notifier(regs))
+			return NOTIFY_STOP;
+		break;
+	case DIE_SSTEP:
+		if (uprobe_post_sstep_notifier(regs))
+			return NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+/*
+ * This function gets called when XOL instruction either gets trapped or
+ * the thread has a fatal signal, so reset the instruction pointer to its
+ * probed address.
+ */
+void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+
+	current->thread.trap_nr = utask->autask.saved_trap_nr;
+	instruction_pointer_set(regs, utask->vaddr);
+
+	user_disable_single_step(current);
+}
+
+/*
+ * See if the instruction can be emulated.
+ * Returns true if instruction was emulated, false otherwise.
+ */
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	int ret;
+
+	/*
+	 * emulate_step() returns 1 if the insn was successfully emulated.
+	 * For all other cases, we need to single-step in hardware.
+	 */
+	ret = emulate_step(regs, ppc_inst_read(auprobe->insn));
+	if (ret > 0)
+		return true;
+
+	return false;
+}
+
+unsigned long
+arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs)
+{
+	unsigned long orig_ret_vaddr;
+
+	orig_ret_vaddr = regs->link;
+
+	/* Replace the return addr with trampoline addr */
+	regs->link = trampoline_vaddr;
+
+	return orig_ret_vaddr;
+}
+
+bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
+				struct pt_regs *regs)
+{
+	if (ctx == RP_CHECK_CHAIN_CALL)
+		return regs->gpr[1] <= ret->stack;
+	else
+		return regs->gpr[1] < ret->stack;
+}
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
new file mode 100644
index 0000000000..7a2ff9010f
--- /dev/null
+++ b/arch/powerpc/kernel/vdso.c
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/*
+ *    Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
+ *			 <benh@kernel.crashing.org>
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/elf.h>
+#include <linux/security.h>
+#include <linux/memblock.h>
+#include <linux/syscalls.h>
+#include <linux/time_namespace.h>
+#include <vdso/datapage.h>
+
+#include <asm/syscall.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/sections.h>
+#include <asm/firmware.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+#include <asm/setup.h>
+
+/* The alignment of the vDSO */
+#define VDSO_ALIGNMENT	(1 << 16)
+
+extern char vdso32_start, vdso32_end;
+extern char vdso64_start, vdso64_end;
+
+long sys_ni_syscall(void);
+
+/*
+ * The vdso data page (aka. systemcfg for old ppc64 fans) is here.
+ * Once the early boot kernel code no longer needs to muck around
+ * with it, it will become dynamically allocated
+ */
+static union {
+	struct vdso_arch_data	data;
+	u8			page[PAGE_SIZE];
+} vdso_data_store __page_aligned_data;
+struct vdso_arch_data *vdso_data = &vdso_data_store.data;
+
+enum vvar_pages {
+	VVAR_DATA_PAGE_OFFSET,
+	VVAR_TIMENS_PAGE_OFFSET,
+	VVAR_NR_PAGES,
+};
+
+static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma,
+		       unsigned long text_size)
+{
+	unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+
+	if (new_size != text_size)
+		return -EINVAL;
+
+	current->mm->context.vdso = (void __user *)new_vma->vm_start;
+
+	return 0;
+}
+
+static int vdso32_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
+{
+	return vdso_mremap(sm, new_vma, &vdso32_end - &vdso32_start);
+}
+
+static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
+{
+	return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start);
+}
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+			     struct vm_area_struct *vma, struct vm_fault *vmf);
+
+static struct vm_special_mapping vvar_spec __ro_after_init = {
+	.name = "[vvar]",
+	.fault = vvar_fault,
+};
+
+static struct vm_special_mapping vdso32_spec __ro_after_init = {
+	.name = "[vdso]",
+	.mremap = vdso32_mremap,
+};
+
+static struct vm_special_mapping vdso64_spec __ro_after_init = {
+	.name = "[vdso]",
+	.mremap = vdso64_mremap,
+};
+
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+	return ((struct vdso_arch_data *)vvar_page)->data;
+}
+
+/*
+ * The vvar mapping contains data for a specific time namespace, so when a task
+ * changes namespace we must unmap its vvar data for the old namespace.
+ * Subsequent faults will map in data for the new namespace.
+ *
+ * For more details see timens_setup_vdso_data().
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+	struct mm_struct *mm = task->mm;
+	VMA_ITERATOR(vmi, mm, 0);
+	struct vm_area_struct *vma;
+
+	mmap_read_lock(mm);
+	for_each_vma(vmi, vma) {
+		if (vma_is_special_mapping(vma, &vvar_spec))
+			zap_vma_pages(vma);
+	}
+	mmap_read_unlock(mm);
+
+	return 0;
+}
+#endif
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+			     struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page *timens_page = find_timens_vvar_page(vma);
+	unsigned long pfn;
+
+	switch (vmf->pgoff) {
+	case VVAR_DATA_PAGE_OFFSET:
+		if (timens_page)
+			pfn = page_to_pfn(timens_page);
+		else
+			pfn = virt_to_pfn(vdso_data);
+		break;
+#ifdef CONFIG_TIME_NS
+	case VVAR_TIMENS_PAGE_OFFSET:
+		/*
+		 * If a task belongs to a time namespace then a namespace
+		 * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+		 * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+		 * offset.
+		 * See also the comment near timens_setup_vdso_data().
+		 */
+		if (!timens_page)
+			return VM_FAULT_SIGBUS;
+		pfn = virt_to_pfn(vdso_data);
+		break;
+#endif /* CONFIG_TIME_NS */
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+
+	return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
+/*
+ * This is called from binfmt_elf, we create the special vma for the
+ * vDSO and insert it into the mm struct tree
+ */
+static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	unsigned long vdso_size, vdso_base, mappings_size;
+	struct vm_special_mapping *vdso_spec;
+	unsigned long vvar_size = VVAR_NR_PAGES * PAGE_SIZE;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+
+	if (is_32bit_task()) {
+		vdso_spec = &vdso32_spec;
+		vdso_size = &vdso32_end - &vdso32_start;
+	} else {
+		vdso_spec = &vdso64_spec;
+		vdso_size = &vdso64_end - &vdso64_start;
+	}
+
+	mappings_size = vdso_size + vvar_size;
+	mappings_size += (VDSO_ALIGNMENT - 1) & PAGE_MASK;
+
+	/*
+	 * Pick a base address for the vDSO in process space.
+	 * Add enough to the size so that the result can be aligned.
+	 */
+	vdso_base = get_unmapped_area(NULL, 0, mappings_size, 0, 0);
+	if (IS_ERR_VALUE(vdso_base))
+		return vdso_base;
+
+	/* Add required alignment. */
+	vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT);
+
+	/*
+	 * Put vDSO base into mm struct. We need to do this before calling
+	 * install_special_mapping or the perf counter mmap tracking code
+	 * will fail to recognise it as a vDSO.
+	 */
+	mm->context.vdso = (void __user *)vdso_base + vvar_size;
+
+	vma = _install_special_mapping(mm, vdso_base, vvar_size,
+				       VM_READ | VM_MAYREAD | VM_IO |
+				       VM_DONTDUMP | VM_PFNMAP, &vvar_spec);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	/*
+	 * our vma flags don't have VM_WRITE so by default, the process isn't
+	 * allowed to write those pages.
+	 * gdb can break that with ptrace interface, and thus trigger COW on
+	 * those pages but it's then your responsibility to never do that on
+	 * the "data" page of the vDSO or you'll stop getting kernel updates
+	 * and your nice userland gettimeofday will be totally dead.
+	 * It's fine to use that for setting breakpoints in the vDSO code
+	 * pages though.
+	 */
+	vma = _install_special_mapping(mm, vdso_base + vvar_size, vdso_size,
+				       VM_READ | VM_EXEC | VM_MAYREAD |
+				       VM_MAYWRITE | VM_MAYEXEC, vdso_spec);
+	if (IS_ERR(vma))
+		do_munmap(mm, vdso_base, vvar_size, NULL);
+
+	return PTR_ERR_OR_ZERO(vma);
+}
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	struct mm_struct *mm = current->mm;
+	int rc;
+
+	mm->context.vdso = NULL;
+
+	if (mmap_write_lock_killable(mm))
+		return -EINTR;
+
+	rc = __arch_setup_additional_pages(bprm, uses_interp);
+	if (rc)
+		mm->context.vdso = NULL;
+
+	mmap_write_unlock(mm);
+	return rc;
+}
+
+#define VDSO_DO_FIXUPS(type, value, bits, sec) do {					\
+	void *__start = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_start);	\
+	void *__end = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_end);	\
+											\
+	do_##type##_fixups((value), __start, __end);					\
+} while (0)
+
+static void __init vdso_fixup_features(void)
+{
+#ifdef CONFIG_PPC64
+	VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 64, ftr_fixup);
+	VDSO_DO_FIXUPS(feature, cur_cpu_spec->mmu_features, 64, mmu_ftr_fixup);
+	VDSO_DO_FIXUPS(feature, powerpc_firmware_features, 64, fw_ftr_fixup);
+	VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 64, lwsync_fixup);
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_VDSO32
+	VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 32, ftr_fixup);
+	VDSO_DO_FIXUPS(feature, cur_cpu_spec->mmu_features, 32, mmu_ftr_fixup);
+#ifdef CONFIG_PPC64
+	VDSO_DO_FIXUPS(feature, powerpc_firmware_features, 32, fw_ftr_fixup);
+#endif /* CONFIG_PPC64 */
+	VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 32, lwsync_fixup);
+#endif
+}
+
+/*
+ * Called from setup_arch to initialize the bitmap of available
+ * syscalls in the systemcfg page
+ */
+static void __init vdso_setup_syscall_map(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < NR_syscalls; i++) {
+		if (sys_call_table[i] != (void *)&sys_ni_syscall)
+			vdso_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f);
+		if (IS_ENABLED(CONFIG_COMPAT) &&
+		    compat_sys_call_table[i] != (void *)&sys_ni_syscall)
+			vdso_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f);
+	}
+}
+
+#ifdef CONFIG_PPC64
+int vdso_getcpu_init(void)
+{
+	unsigned long cpu, node, val;
+
+	/*
+	 * SPRG_VDSO contains the CPU in the bottom 16 bits and the NUMA node
+	 * in the next 16 bits.  The VDSO uses this to implement getcpu().
+	 */
+	cpu = get_cpu();
+	WARN_ON_ONCE(cpu > 0xffff);
+
+	node = cpu_to_node(cpu);
+	WARN_ON_ONCE(node > 0xffff);
+
+	val = (cpu & 0xffff) | ((node & 0xffff) << 16);
+	mtspr(SPRN_SPRG_VDSO_WRITE, val);
+	get_paca()->sprg_vdso = val;
+
+	put_cpu();
+
+	return 0;
+}
+/* We need to call this before SMP init */
+early_initcall(vdso_getcpu_init);
+#endif
+
+static struct page ** __init vdso_setup_pages(void *start, void *end)
+{
+	int i;
+	struct page **pagelist;
+	int pages = (end - start) >> PAGE_SHIFT;
+
+	pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL);
+	if (!pagelist)
+		panic("%s: Cannot allocate page list for VDSO", __func__);
+
+	for (i = 0; i < pages; i++)
+		pagelist[i] = virt_to_page(start + i * PAGE_SIZE);
+
+	return pagelist;
+}
+
+static int __init vdso_init(void)
+{
+#ifdef CONFIG_PPC64
+	/*
+	 * Fill up the "systemcfg" stuff for backward compatibility
+	 */
+	strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64");
+	vdso_data->version.major = SYSTEMCFG_MAJOR;
+	vdso_data->version.minor = SYSTEMCFG_MINOR;
+	vdso_data->processor = mfspr(SPRN_PVR);
+	/*
+	 * Fake the old platform number for pSeries and add
+	 * in LPAR bit if necessary
+	 */
+	vdso_data->platform = 0x100;
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		vdso_data->platform |= 1;
+	vdso_data->physicalMemorySize = memblock_phys_mem_size();
+	vdso_data->dcache_size = ppc64_caches.l1d.size;
+	vdso_data->dcache_line_size = ppc64_caches.l1d.line_size;
+	vdso_data->icache_size = ppc64_caches.l1i.size;
+	vdso_data->icache_line_size = ppc64_caches.l1i.line_size;
+	vdso_data->dcache_block_size = ppc64_caches.l1d.block_size;
+	vdso_data->icache_block_size = ppc64_caches.l1i.block_size;
+	vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size;
+	vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size;
+#endif /* CONFIG_PPC64 */
+
+	vdso_setup_syscall_map();
+
+	vdso_fixup_features();
+
+	if (IS_ENABLED(CONFIG_VDSO32))
+		vdso32_spec.pages = vdso_setup_pages(&vdso32_start, &vdso32_end);
+
+	if (IS_ENABLED(CONFIG_PPC64))
+		vdso64_spec.pages = vdso_setup_pages(&vdso64_start, &vdso64_end);
+
+	smp_wmb();
+
+	return 0;
+}
+arch_initcall(vdso_init);
diff --git a/arch/powerpc/kernel/vdso/.gitignore b/arch/powerpc/kernel/vdso/.gitignore
new file mode 100644
index 0000000000..dd9bdd6775
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/.gitignore
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+vdso32.lds
+vdso32.so.dbg
+vdso64.lds
+vdso64.so.dbg
diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile
new file mode 100644
index 0000000000..0c7d82c270
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/Makefile
@@ -0,0 +1,118 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# List of files in the vdso, has to be asm only for now
+
+# Include the generic Makefile to check the built vdso.
+include $(srctree)/lib/vdso/Makefile
+
+obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o note-32.o getcpu-32.o
+obj-vdso64 = sigtramp64-64.o gettimeofday-64.o datapage-64.o cacheflush-64.o note-64.o getcpu-64.o
+
+ifneq ($(c-gettimeofday-y),)
+  CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y)
+  CFLAGS_vgettimeofday-32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+  CFLAGS_vgettimeofday-32.o += $(call cc-option, -fno-stack-protector)
+  CFLAGS_vgettimeofday-32.o += -DDISABLE_BRANCH_PROFILING
+  CFLAGS_vgettimeofday-32.o += -ffreestanding -fasynchronous-unwind-tables
+  CFLAGS_REMOVE_vgettimeofday-32.o = $(CC_FLAGS_FTRACE)
+  CFLAGS_REMOVE_vgettimeofday-32.o += -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc
+  # This flag is supported by clang for 64-bit but not 32-bit so it will cause
+  # an unused command line flag warning for this file.
+  ifdef CONFIG_CC_IS_CLANG
+  CFLAGS_REMOVE_vgettimeofday-32.o += -fno-stack-clash-protection
+  endif
+  CFLAGS_vgettimeofday-64.o += -include $(c-gettimeofday-y)
+  CFLAGS_vgettimeofday-64.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+  CFLAGS_vgettimeofday-64.o += $(call cc-option, -fno-stack-protector)
+  CFLAGS_vgettimeofday-64.o += -DDISABLE_BRANCH_PROFILING
+  CFLAGS_vgettimeofday-64.o += -ffreestanding -fasynchronous-unwind-tables
+  CFLAGS_REMOVE_vgettimeofday-64.o = $(CC_FLAGS_FTRACE)
+# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true
+# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is
+# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code
+# generation is minimal, it will just use r29 instead.
+  CFLAGS_vgettimeofday-64.o += $(call cc-option, -ffixed-r30)
+endif
+
+# Build rules
+
+ifdef CROSS32_COMPILE
+    VDSOCC := $(CROSS32_COMPILE)gcc
+else
+    VDSOCC := $(CC)
+endif
+
+targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+
+GCOV_PROFILE := n
+KCOV_INSTRUMENT := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
+
+ccflags-y := -fno-common -fno-builtin
+ldflags-y := -Wl,--hash-style=both -nostdlib -shared -z noexecstack $(CLANG_FLAGS)
+ldflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld)
+ldflags-$(CONFIG_LD_ORPHAN_WARN) += -Wl,--orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL)
+
+# Filter flags that clang will warn are unused for linking
+ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CFLAGS))
+
+CC32FLAGS := -m32
+LD32FLAGS := -Wl,-soname=linux-vdso32.so.1
+AS32FLAGS := -D__VDSO32__
+
+LD64FLAGS := -Wl,-soname=linux-vdso64.so.1
+AS64FLAGS := -D__VDSO64__
+
+targets += vdso32.lds
+CPPFLAGS_vdso32.lds += -P -C -Upowerpc
+targets += vdso64.lds
+CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o FORCE
+	$(call if_changed,vdso32ld_and_check)
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o FORCE
+	$(call if_changed,vdso64ld_and_check)
+
+# assembly rules for the .S files
+$(obj-vdso32): %-32.o: %.S FORCE
+	$(call if_changed_dep,vdso32as)
+$(obj)/vgettimeofday-32.o: %-32.o: %.c FORCE
+	$(call if_changed_dep,vdso32cc)
+$(obj-vdso64): %-64.o: %.S FORCE
+	$(call if_changed_dep,vdso64as)
+$(obj)/vgettimeofday-64.o: %-64.o: %.c FORCE
+	$(call if_changed_dep,cc_o_c)
+
+# Generate VDSO offsets using helper script
+gen-vdso32sym := $(srctree)/$(src)/gen_vdso32_offsets.sh
+quiet_cmd_vdso32sym = VDSO32SYM $@
+      cmd_vdso32sym = $(NM) $< | $(gen-vdso32sym) | LC_ALL=C sort > $@
+gen-vdso64sym := $(srctree)/$(src)/gen_vdso64_offsets.sh
+quiet_cmd_vdso64sym = VDSO64SYM $@
+      cmd_vdso64sym = $(NM) $< | $(gen-vdso64sym) | LC_ALL=C sort > $@
+
+include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE
+	$(call if_changed,vdso32sym)
+include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE
+	$(call if_changed,vdso64sym)
+
+# actual build commands
+quiet_cmd_vdso32ld_and_check = VDSO32L $@
+      cmd_vdso32ld_and_check = $(VDSOCC) $(ldflags-y) $(CC32FLAGS) $(LD32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check)
+quiet_cmd_vdso32as = VDSO32A $@
+      cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) $(AS32FLAGS) -c -o $@ $<
+quiet_cmd_vdso32cc = VDSO32C $@
+      cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $<
+
+quiet_cmd_vdso64ld_and_check = VDSO64L $@
+      cmd_vdso64ld_and_check = $(VDSOCC) $(ldflags-y) $(LD64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check)
+quiet_cmd_vdso64as = VDSO64A $@
+      cmd_vdso64as = $(VDSOCC) $(a_flags) $(AS64FLAGS) -c -o $@ $<
+
+OBJECT_FILES_NON_STANDARD := y
diff --git a/arch/powerpc/kernel/vdso/cacheflush.S b/arch/powerpc/kernel/vdso/cacheflush.S
new file mode 100644
index 0000000000..0085ae464d
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/cacheflush.S
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * vDSO provided cache flush routines
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ *                    IBM Corp.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+
+	.text
+
+/*
+ * Default "generic" version of __kernel_sync_dicache.
+ *
+ * void __kernel_sync_dicache(unsigned long start, unsigned long end)
+ *
+ * Flushes the data cache & invalidate the instruction cache for the
+ * provided range [start, end[
+ */
+V_FUNCTION_BEGIN(__kernel_sync_dicache)
+  .cfi_startproc
+BEGIN_FTR_SECTION
+	b	3f
+END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+#ifdef CONFIG_PPC64
+	mflr	r12
+  .cfi_register lr,r12
+	get_datapage	r10
+	mtlr	r12
+  .cfi_restore	lr
+#endif
+
+#ifdef CONFIG_PPC64
+	lwz	r7,CFG_DCACHE_BLOCKSZ(r10)
+	addi	r5,r7,-1
+#else
+	li	r5, L1_CACHE_BYTES - 1
+#endif
+	andc	r6,r3,r5		/* round low to line bdy */
+	subf	r8,r6,r4		/* compute length */
+	add	r8,r8,r5		/* ensure we get enough */
+#ifdef CONFIG_PPC64
+	lwz	r9,CFG_DCACHE_LOGBLOCKSZ(r10)
+	PPC_SRL.	r8,r8,r9		/* compute line count */
+#else
+	srwi.	r8, r8, L1_CACHE_SHIFT
+	mr	r7, r6
+#endif
+	crclr	cr0*4+so
+	beqlr				/* nothing to do? */
+	mtctr	r8
+1:	dcbst	0,r6
+#ifdef CONFIG_PPC64
+	add	r6,r6,r7
+#else
+	addi	r6, r6, L1_CACHE_BYTES
+#endif
+	bdnz	1b
+	sync
+
+/* Now invalidate the instruction cache */
+
+#ifdef CONFIG_PPC64
+	lwz	r7,CFG_ICACHE_BLOCKSZ(r10)
+	addi	r5,r7,-1
+	andc	r6,r3,r5		/* round low to line bdy */
+	subf	r8,r6,r4		/* compute length */
+	add	r8,r8,r5
+	lwz	r9,CFG_ICACHE_LOGBLOCKSZ(r10)
+	PPC_SRL.	r8,r8,r9		/* compute line count */
+	crclr	cr0*4+so
+	beqlr				/* nothing to do? */
+#endif
+	mtctr	r8
+#ifdef CONFIG_PPC64
+2:	icbi	0,r6
+	add	r6,r6,r7
+#else
+2:	icbi	0, r7
+	addi	r7, r7, L1_CACHE_BYTES
+#endif
+	bdnz	2b
+	isync
+	li	r3,0
+	blr
+3:
+	crclr	cr0*4+so
+	sync
+	icbi	0,r1
+	isync
+	li	r3,0
+	blr
+  .cfi_endproc
+V_FUNCTION_END(__kernel_sync_dicache)
diff --git a/arch/powerpc/kernel/vdso/datapage.S b/arch/powerpc/kernel/vdso/datapage.S
new file mode 100644
index 0000000000..db8e167f01
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/datapage.S
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Access to the shared data page by the vDSO & syscall map
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+
+	.text
+
+/*
+ * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
+ *
+ * returns a pointer to the syscall map. the map is agnostic to the
+ * size of "long", unlike kernel bitops, it stores bits from top to
+ * bottom so that memory actually contains a linear bitmap
+ * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of
+ * 32 bits int at N >> 5.
+ */
+V_FUNCTION_BEGIN(__kernel_get_syscall_map)
+  .cfi_startproc
+	mflr	r12
+  .cfi_register lr,r12
+	mr.	r4,r3
+	get_datapage	r3
+	mtlr	r12
+#ifdef __powerpc64__
+	addi	r3,r3,CFG_SYSCALL_MAP64
+#else
+	addi	r3,r3,CFG_SYSCALL_MAP32
+#endif
+	crclr	cr0*4+so
+	beqlr
+	li	r0,NR_syscalls
+	stw	r0,0(r4)
+	blr
+  .cfi_endproc
+V_FUNCTION_END(__kernel_get_syscall_map)
+
+/*
+ * void unsigned long long  __kernel_get_tbfreq(void);
+ *
+ * returns the timebase frequency in HZ
+ */
+V_FUNCTION_BEGIN(__kernel_get_tbfreq)
+  .cfi_startproc
+	mflr	r12
+  .cfi_register lr,r12
+	get_datapage	r3
+#ifndef __powerpc64__
+	lwz	r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
+#endif
+	PPC_LL	r3,CFG_TB_TICKS_PER_SEC(r3)
+	mtlr	r12
+	crclr	cr0*4+so
+	blr
+  .cfi_endproc
+V_FUNCTION_END(__kernel_get_tbfreq)
diff --git a/arch/powerpc/kernel/vdso/gen_vdso32_offsets.sh b/arch/powerpc/kernel/vdso/gen_vdso32_offsets.sh
new file mode 100755
index 0000000000..c7b54a5dcd
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/gen_vdso32_offsets.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Author: Will Deacon <will.deacon@arm.com
+#
+
+LC_ALL=C
+sed -n -e 's/^00*/0/' -e \
+'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso32_offset_\2\t0x\1/p'
diff --git a/arch/powerpc/kernel/vdso/gen_vdso64_offsets.sh b/arch/powerpc/kernel/vdso/gen_vdso64_offsets.sh
new file mode 100755
index 0000000000..4bf15ffd59
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/gen_vdso64_offsets.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Author: Will Deacon <will.deacon@arm.com
+#
+
+LC_ALL=C
+sed -n -e 's/^00*/0/' -e \
+'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso64_offset_\2\t0x\1/p'
diff --git a/arch/powerpc/kernel/vdso/getcpu.S b/arch/powerpc/kernel/vdso/getcpu.S
new file mode 100644
index 0000000000..8e08ccf190
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/getcpu.S
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+
+	.text
+/*
+ * Exact prototype of getcpu
+ *
+ * int __kernel_getcpu(unsigned *cpu, unsigned *node);
+ *
+ */
+#if defined(CONFIG_PPC64)
+V_FUNCTION_BEGIN(__kernel_getcpu)
+  .cfi_startproc
+	mfspr	r5,SPRN_SPRG_VDSO_READ
+	PPC_LCMPI	cr0,r3,0
+	PPC_LCMPI	cr1,r4,0
+	clrlwi  r6,r5,16
+	rlwinm  r7,r5,16,31-15,31-0
+	beq	cr0,1f
+	stw	r6,0(r3)
+1:	crclr	cr0*4+so
+	li	r3,0			/* always success */
+	beqlr	cr1
+	stw	r7,0(r4)
+	blr
+  .cfi_endproc
+V_FUNCTION_END(__kernel_getcpu)
+#elif !defined(CONFIG_SMP)
+V_FUNCTION_BEGIN(__kernel_getcpu)
+  .cfi_startproc
+	cmpwi	cr0, r3, 0
+	cmpwi	cr1, r4, 0
+	li	r5, 0
+	beq	cr0, 1f
+	stw	r5, 0(r3)
+1:	li	r3, 0			/* always success */
+	crclr	cr0*4+so
+	beqlr	cr1
+	stw	r5, 0(r4)
+	blr
+  .cfi_endproc
+V_FUNCTION_END(__kernel_getcpu)
+#endif
diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S
new file mode 100644
index 0000000000..48fc665805
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/gettimeofday.S
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Userland implementation of gettimeofday() for processes
+ * for use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org,
+ *                    IBM Corp.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+/*
+ * The macro sets two stack frames, one for the caller and one for the callee
+ * because there are no requirement for the caller to set a stack frame when
+ * calling VDSO so it may have omitted to set one, especially on PPC64
+ */
+
+.macro cvdso_call funct call_time=0
+  .cfi_startproc
+	PPC_STLU	r1, -PPC_MIN_STKFRM(r1)
+  .cfi_adjust_cfa_offset PPC_MIN_STKFRM
+	mflr		r0
+	PPC_STLU	r1, -PPC_MIN_STKFRM(r1)
+  .cfi_adjust_cfa_offset PPC_MIN_STKFRM
+	PPC_STL		r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
+  .cfi_rel_offset lr, PPC_MIN_STKFRM + PPC_LR_STKOFF
+#ifdef __powerpc64__
+	PPC_STL		r2, PPC_MIN_STKFRM + STK_GOT(r1)
+  .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT
+#endif
+	get_datapage	r5
+	.ifeq	\call_time
+	addi		r5, r5, VDSO_DATA_OFFSET
+	.else
+	addi		r4, r5, VDSO_DATA_OFFSET
+	.endif
+#ifdef __powerpc64__
+	bl		CFUNC(DOTSYM(\funct))
+#else
+	bl		\funct
+#endif
+	PPC_LL		r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
+#ifdef __powerpc64__
+	PPC_LL		r2, PPC_MIN_STKFRM + STK_GOT(r1)
+  .cfi_restore r2
+#endif
+	.ifeq	\call_time
+	cmpwi		r3, 0
+	.endif
+	mtlr		r0
+	addi		r1, r1, 2 * PPC_MIN_STKFRM
+  .cfi_restore lr
+  .cfi_def_cfa_offset 0
+	crclr		so
+	.ifeq	\call_time
+	beqlr+
+	crset		so
+	neg		r3, r3
+	.endif
+	blr
+  .cfi_endproc
+.endm
+
+	.text
+/*
+ * Exact prototype of gettimeofday
+ *
+ * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_gettimeofday)
+	cvdso_call __c_kernel_gettimeofday
+V_FUNCTION_END(__kernel_gettimeofday)
+
+/*
+ * Exact prototype of clock_gettime()
+ *
+ * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_gettime)
+	cvdso_call __c_kernel_clock_gettime
+V_FUNCTION_END(__kernel_clock_gettime)
+
+/*
+ * Exact prototype of clock_gettime64()
+ *
+ * int __kernel_clock_gettime64(clockid_t clock_id, struct __timespec64 *ts);
+ *
+ */
+#ifndef __powerpc64__
+V_FUNCTION_BEGIN(__kernel_clock_gettime64)
+	cvdso_call __c_kernel_clock_gettime64
+V_FUNCTION_END(__kernel_clock_gettime64)
+#endif
+
+/*
+ * Exact prototype of clock_getres()
+ *
+ * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_getres)
+	cvdso_call __c_kernel_clock_getres
+V_FUNCTION_END(__kernel_clock_getres)
+
+
+/*
+ * Exact prototype of time()
+ *
+ * time_t time(time *t);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_time)
+	cvdso_call __c_kernel_time call_time=1
+V_FUNCTION_END(__kernel_time)
+
+/* Routines for restoring integer registers, called by the compiler.  */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer restore area.  */
+#ifndef __powerpc64__
+_GLOBAL(_restgpr_31_x)
+_GLOBAL(_rest32gpr_31_x)
+	lwz	r0,4(r11)
+	lwz	r31,-4(r11)
+	mtlr	r0
+	mr	r1,r11
+	blr
+#endif
diff --git a/arch/powerpc/kernel/vdso/note.S b/arch/powerpc/kernel/vdso/note.S
new file mode 100644
index 0000000000..227a732739
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/note.S
@@ -0,0 +1,28 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/build-salt.h>
+
+#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type)			      \
+	.section name, flags;						      \
+	.balign 4;							      \
+	.long 1f - 0f;		/* name length */			      \
+	.long 3f - 2f;		/* data length */			      \
+	.long type;		/* note type */				      \
+0:	.asciz vendor;		/* vendor name */			      \
+1:	.balign 4;							      \
+2:
+
+#define ASM_ELF_NOTE_END						      \
+3:	.balign 4;		/* pad out section */			      \
+	.previous
+
+	ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0)
+	.long LINUX_VERSION_CODE
+	ASM_ELF_NOTE_END
+
+BUILD_SALT
diff --git a/arch/powerpc/kernel/vdso/sigtramp32.S b/arch/powerpc/kernel/vdso/sigtramp32.S
new file mode 100644
index 0000000000..0bcc5e5fe7
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/sigtramp32.S
@@ -0,0 +1,295 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Signal trampolines for 32 bits processes in a ppc64 kernel for
+ * use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+
+	.text
+
+/* The nop here is a hack.  The dwarf2 unwind routines subtract 1 from
+   the return address to get an address in the middle of the presumed
+   call instruction.  Since we don't have a call here, we artificially
+   extend the range covered by the unwind info by adding a nop before
+   the real start.  */
+	nop
+V_FUNCTION_BEGIN(__kernel_sigtramp32)
+.Lsig_start = . - 4
+	li	r0,__NR_sigreturn
+	sc
+.Lsig_end:
+V_FUNCTION_END(__kernel_sigtramp32)
+
+.Lsigrt_start:
+	nop
+V_FUNCTION_BEGIN(__kernel_sigtramp_rt32)
+	li	r0,__NR_rt_sigreturn
+	sc
+.Lsigrt_end:
+V_FUNCTION_END(__kernel_sigtramp_rt32)
+
+	.section .eh_frame,"a",@progbits
+
+/* Register r1 can be found at offset 4 of a pt_regs structure.
+   A pointer to the pt_regs is stored in memory at the old sp plus PTREGS.  */
+#define cfa_save \
+  .byte 0x0f;			/* DW_CFA_def_cfa_expression */		\
+  .uleb128 9f - 1f;		/*   length */				\
+1:									\
+  .byte 0x71; .sleb128 PTREGS;	/*     DW_OP_breg1 */			\
+  .byte 0x06;			/*     DW_OP_deref */			\
+  .byte 0x23; .uleb128 RSIZE;	/*     DW_OP_plus_uconst */		\
+  .byte 0x06;			/*     DW_OP_deref */			\
+9:
+
+/* Register REGNO can be found at offset OFS of a pt_regs structure.
+   A pointer to the pt_regs is stored in memory at the old sp plus PTREGS.  */
+#define rsave(regno, ofs) \
+  .byte 0x10;			/* DW_CFA_expression */			\
+  .uleb128 regno;		/*   regno */				\
+  .uleb128 9f - 1f;		/*   length */				\
+1:									\
+  .byte 0x71; .sleb128 PTREGS;	/*     DW_OP_breg1 */			\
+  .byte 0x06;			/*     DW_OP_deref */			\
+  .ifne ofs;								\
+    .byte 0x23; .uleb128 ofs;	/*     DW_OP_plus_uconst */		\
+  .endif;								\
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+   of the VMX reg struct.  The VMX reg struct is at offset VREGS of
+   the pt_regs struct.  This macro is for REGNO == 0, and contains
+   'subroutines' that the other macros jump to.  */
+#define vsave_msr0(regno) \
+  .byte 0x10;			/* DW_CFA_expression */			\
+  .uleb128 regno + 77;		/*   regno */				\
+  .uleb128 9f - 1f;		/*   length */				\
+1:									\
+  .byte 0x30 + regno;		/*     DW_OP_lit0 */			\
+2:									\
+  .byte 0x40;			/*     DW_OP_lit16 */			\
+  .byte 0x1e;			/*     DW_OP_mul */			\
+3:									\
+  .byte 0x71; .sleb128 PTREGS;	/*     DW_OP_breg1 */			\
+  .byte 0x06;			/*     DW_OP_deref */			\
+  .byte 0x12;			/*     DW_OP_dup */			\
+  .byte 0x23;			/*     DW_OP_plus_uconst */		\
+    .uleb128 33*RSIZE;		/*       msr offset */			\
+  .byte 0x06;			/*     DW_OP_deref */			\
+  .byte 0x0c; .long 1 << 25;	/*     DW_OP_const4u */			\
+  .byte 0x1a;			/*     DW_OP_and */			\
+  .byte 0x12;			/*     DW_OP_dup, ret 0 if bra taken */	\
+  .byte 0x30;			/*     DW_OP_lit0 */			\
+  .byte 0x29;			/*     DW_OP_eq */			\
+  .byte 0x28; .short 0x7fff;	/*     DW_OP_bra to end */		\
+  .byte 0x13;			/*     DW_OP_drop, pop the 0 */		\
+  .byte 0x23; .uleb128 VREGS;	/*     DW_OP_plus_uconst */		\
+  .byte 0x22;			/*     DW_OP_plus */			\
+  .byte 0x2f; .short 0x7fff;	/*     DW_OP_skip to end */		\
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+   of the VMX reg struct.  REGNO is 1 thru 31.  */
+#define vsave_msr1(regno) \
+  .byte 0x10;			/* DW_CFA_expression */			\
+  .uleb128 regno + 77;		/*   regno */				\
+  .uleb128 9f - 1f;		/*   length */				\
+1:									\
+  .byte 0x30 + regno;		/*     DW_OP_lit n */			\
+  .byte 0x2f; .short 2b - 9f;	/*     DW_OP_skip */			\
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of
+   the VMX save block.  */
+#define vsave_msr2(regno, ofs) \
+  .byte 0x10;			/* DW_CFA_expression */			\
+  .uleb128 regno + 77;		/*   regno */				\
+  .uleb128 9f - 1f;		/*   length */				\
+1:									\
+  .byte 0x0a; .short ofs;	/*     DW_OP_const2u */			\
+  .byte 0x2f; .short 3b - 9f;	/*     DW_OP_skip */			\
+9:
+
+/* VMX register REGNO is at offset OFS of the VMX save area.  */
+#define vsave(regno, ofs) \
+  .byte 0x10;			/* DW_CFA_expression */			\
+  .uleb128 regno + 77;		/*   regno */				\
+  .uleb128 9f - 1f;		/*   length */				\
+1:									\
+  .byte 0x71; .sleb128 PTREGS;	/*     DW_OP_breg1 */			\
+  .byte 0x06;			/*     DW_OP_deref */			\
+  .byte 0x23; .uleb128 VREGS;	/*     DW_OP_plus_uconst */		\
+  .byte 0x23; .uleb128 ofs;	/*     DW_OP_plus_uconst */		\
+9:
+
+/* This is where the pt_regs pointer can be found on the stack.  */
+#define PTREGS 64+28
+
+/* Size of regs.  */
+#define RSIZE 4
+
+/* This is the offset of the VMX regs.  */
+#define VREGS 48*RSIZE+34*8
+
+/* Describe where general purpose regs are saved.  */
+#define EH_FRAME_GEN \
+  cfa_save;								\
+  rsave ( 0,  0*RSIZE);							\
+  rsave ( 2,  2*RSIZE);							\
+  rsave ( 3,  3*RSIZE);							\
+  rsave ( 4,  4*RSIZE);							\
+  rsave ( 5,  5*RSIZE);							\
+  rsave ( 6,  6*RSIZE);							\
+  rsave ( 7,  7*RSIZE);							\
+  rsave ( 8,  8*RSIZE);							\
+  rsave ( 9,  9*RSIZE);							\
+  rsave (10, 10*RSIZE);							\
+  rsave (11, 11*RSIZE);							\
+  rsave (12, 12*RSIZE);							\
+  rsave (13, 13*RSIZE);							\
+  rsave (14, 14*RSIZE);							\
+  rsave (15, 15*RSIZE);							\
+  rsave (16, 16*RSIZE);							\
+  rsave (17, 17*RSIZE);							\
+  rsave (18, 18*RSIZE);							\
+  rsave (19, 19*RSIZE);							\
+  rsave (20, 20*RSIZE);							\
+  rsave (21, 21*RSIZE);							\
+  rsave (22, 22*RSIZE);							\
+  rsave (23, 23*RSIZE);							\
+  rsave (24, 24*RSIZE);							\
+  rsave (25, 25*RSIZE);							\
+  rsave (26, 26*RSIZE);							\
+  rsave (27, 27*RSIZE);							\
+  rsave (28, 28*RSIZE);							\
+  rsave (29, 29*RSIZE);							\
+  rsave (30, 30*RSIZE);							\
+  rsave (31, 31*RSIZE);							\
+  rsave (67, 32*RSIZE);		/* ap, used as temp for nip */		\
+  rsave (65, 36*RSIZE);		/* lr */				\
+  rsave (70, 38*RSIZE)		/* cr */
+
+/* Describe where the FP regs are saved.  */
+#define EH_FRAME_FP \
+  rsave (32, 48*RSIZE +  0*8);						\
+  rsave (33, 48*RSIZE +  1*8);						\
+  rsave (34, 48*RSIZE +  2*8);						\
+  rsave (35, 48*RSIZE +  3*8);						\
+  rsave (36, 48*RSIZE +  4*8);						\
+  rsave (37, 48*RSIZE +  5*8);						\
+  rsave (38, 48*RSIZE +  6*8);						\
+  rsave (39, 48*RSIZE +  7*8);						\
+  rsave (40, 48*RSIZE +  8*8);						\
+  rsave (41, 48*RSIZE +  9*8);						\
+  rsave (42, 48*RSIZE + 10*8);						\
+  rsave (43, 48*RSIZE + 11*8);						\
+  rsave (44, 48*RSIZE + 12*8);						\
+  rsave (45, 48*RSIZE + 13*8);						\
+  rsave (46, 48*RSIZE + 14*8);						\
+  rsave (47, 48*RSIZE + 15*8);						\
+  rsave (48, 48*RSIZE + 16*8);						\
+  rsave (49, 48*RSIZE + 17*8);						\
+  rsave (50, 48*RSIZE + 18*8);						\
+  rsave (51, 48*RSIZE + 19*8);						\
+  rsave (52, 48*RSIZE + 20*8);						\
+  rsave (53, 48*RSIZE + 21*8);						\
+  rsave (54, 48*RSIZE + 22*8);						\
+  rsave (55, 48*RSIZE + 23*8);						\
+  rsave (56, 48*RSIZE + 24*8);						\
+  rsave (57, 48*RSIZE + 25*8);						\
+  rsave (58, 48*RSIZE + 26*8);						\
+  rsave (59, 48*RSIZE + 27*8);						\
+  rsave (60, 48*RSIZE + 28*8);						\
+  rsave (61, 48*RSIZE + 29*8);						\
+  rsave (62, 48*RSIZE + 30*8);						\
+  rsave (63, 48*RSIZE + 31*8)
+
+/* Describe where the VMX regs are saved.  */
+#ifdef CONFIG_ALTIVEC
+#define EH_FRAME_VMX \
+  vsave_msr0 ( 0);							\
+  vsave_msr1 ( 1);							\
+  vsave_msr1 ( 2);							\
+  vsave_msr1 ( 3);							\
+  vsave_msr1 ( 4);							\
+  vsave_msr1 ( 5);							\
+  vsave_msr1 ( 6);							\
+  vsave_msr1 ( 7);							\
+  vsave_msr1 ( 8);							\
+  vsave_msr1 ( 9);							\
+  vsave_msr1 (10);							\
+  vsave_msr1 (11);							\
+  vsave_msr1 (12);							\
+  vsave_msr1 (13);							\
+  vsave_msr1 (14);							\
+  vsave_msr1 (15);							\
+  vsave_msr1 (16);							\
+  vsave_msr1 (17);							\
+  vsave_msr1 (18);							\
+  vsave_msr1 (19);							\
+  vsave_msr1 (20);							\
+  vsave_msr1 (21);							\
+  vsave_msr1 (22);							\
+  vsave_msr1 (23);							\
+  vsave_msr1 (24);							\
+  vsave_msr1 (25);							\
+  vsave_msr1 (26);							\
+  vsave_msr1 (27);							\
+  vsave_msr1 (28);							\
+  vsave_msr1 (29);							\
+  vsave_msr1 (30);							\
+  vsave_msr1 (31);							\
+  vsave_msr2 (33, 32*16+12);						\
+  vsave      (32, 32*16)
+#else
+#define EH_FRAME_VMX
+#endif
+
+.Lcie:
+	.long .Lcie_end - .Lcie_start
+.Lcie_start:
+	.long 0			/* CIE ID */
+	.byte 1			/* Version number */
+	.string "zRS"		/* NUL-terminated augmentation string */
+	.uleb128 4		/* Code alignment factor */
+	.sleb128 -4		/* Data alignment factor */
+	.byte 67		/* Return address register column, ap */
+	.uleb128 1		/* Augmentation value length */
+	.byte 0x1b		/* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */
+	.byte 0x0c,1,0		/* DW_CFA_def_cfa: r1 ofs 0 */
+	.balign 4
+.Lcie_end:
+
+	.long .Lfde0_end - .Lfde0_start
+.Lfde0_start:
+	.long .Lfde0_start - .Lcie	/* CIE pointer. */
+	.long .Lsig_start - .		/* PC start, length */
+	.long .Lsig_end - .Lsig_start
+	.uleb128 0			/* Augmentation */
+	EH_FRAME_GEN
+	EH_FRAME_FP
+	EH_FRAME_VMX
+	.balign 4
+.Lfde0_end:
+
+/* We have a different stack layout for rt_sigreturn.  */
+#undef PTREGS
+#define PTREGS 64+16+128+20+28
+
+	.long .Lfde1_end - .Lfde1_start
+.Lfde1_start:
+	.long .Lfde1_start - .Lcie	/* CIE pointer. */
+	.long .Lsigrt_start - .		/* PC start, length */
+	.long .Lsigrt_end - .Lsigrt_start
+	.uleb128 0			/* Augmentation */
+	EH_FRAME_GEN
+	EH_FRAME_FP
+	EH_FRAME_VMX
+	.balign 4
+.Lfde1_end:
diff --git a/arch/powerpc/kernel/vdso/sigtramp64.S b/arch/powerpc/kernel/vdso/sigtramp64.S
new file mode 100644
index 0000000000..2d40675612
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/sigtramp64.S
@@ -0,0 +1,313 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Signal trampoline for 64 bits processes in a ppc64 kernel for
+ * use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
+ */
+#include <asm/cache.h>		/* IFETCH_ALIGN_BYTES */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+#include <asm/ptrace.h>		/* XXX for __SIGNAL_FRAMESIZE */
+
+	.text
+
+/*
+ * __kernel_start_sigtramp_rt64 and __kernel_sigtramp_rt64 together
+ * are one function split in two parts. The kernel jumps to the former
+ * and the signal handler indirectly (by blr) returns to the latter.
+ * __kernel_sigtramp_rt64 needs to point to the return address so
+ * glibc can correctly identify the trampoline stack frame.
+ */
+	.balign 8
+	.balign IFETCH_ALIGN_BYTES
+V_FUNCTION_BEGIN(__kernel_start_sigtramp_rt64)
+.Lsigrt_start:
+	bctrl	/* call the handler */
+V_FUNCTION_END(__kernel_start_sigtramp_rt64)
+V_FUNCTION_BEGIN(__kernel_sigtramp_rt64)
+	addi	r1, r1, __SIGNAL_FRAMESIZE
+	li	r0,__NR_rt_sigreturn
+	sc
+.Lsigrt_end:
+V_FUNCTION_END(__kernel_sigtramp_rt64)
+/* The .balign 8 above and the following zeros mimic the old stack
+   trampoline layout.  The last magic value is the ucontext pointer,
+   chosen in such a way that older libgcc unwind code returns a zero
+   for a sigcontext pointer.  */
+	.long 0,0,0
+	.quad 0,-21*8
+
+/* Register r1 can be found at offset 8 of a pt_regs structure.
+   A pointer to the pt_regs is stored in memory at the old sp plus PTREGS.  */
+#define cfa_save \
+  .byte 0x0f;			/* DW_CFA_def_cfa_expression */		\
+  .uleb128 9f - 1f;		/*   length */				\
+1:									\
+  .byte 0x71; .sleb128 PTREGS;	/*     DW_OP_breg1 */			\
+  .byte 0x06;			/*     DW_OP_deref */			\
+  .byte 0x23; .uleb128 RSIZE;	/*     DW_OP_plus_uconst */		\
+  .byte 0x06;			/*     DW_OP_deref */			\
+9:
+
+/* Register REGNO can be found at offset OFS of a pt_regs structure.
+   A pointer to the pt_regs is stored in memory at the old sp plus PTREGS.  */
+#define rsave(regno, ofs) \
+  .byte 0x10;			/* DW_CFA_expression */			\
+  .uleb128 regno;		/*   regno */				\
+  .uleb128 9f - 1f;		/*   length */				\
+1:									\
+  .byte 0x71; .sleb128 PTREGS;	/*     DW_OP_breg1 */			\
+  .byte 0x06;			/*     DW_OP_deref */			\
+  .ifne ofs;								\
+    .byte 0x23; .uleb128 ofs;	/*     DW_OP_plus_uconst */		\
+  .endif;								\
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+   of the VMX reg struct.  A pointer to the VMX reg struct is at VREGS in
+   the pt_regs struct.  This macro is for REGNO == 0, and contains
+   'subroutines' that the other macros jump to.  */
+#define vsave_msr0(regno) \
+  .byte 0x10;			/* DW_CFA_expression */			\
+  .uleb128 regno + 77;		/*   regno */				\
+  .uleb128 9f - 1f;		/*   length */				\
+1:									\
+  .byte 0x30 + regno;		/*     DW_OP_lit0 */			\
+2:									\
+  .byte 0x40;			/*     DW_OP_lit16 */			\
+  .byte 0x1e;			/*     DW_OP_mul */			\
+3:									\
+  .byte 0x71; .sleb128 PTREGS;	/*     DW_OP_breg1 */			\
+  .byte 0x06;			/*     DW_OP_deref */			\
+  .byte 0x12;			/*     DW_OP_dup */			\
+  .byte 0x23;			/*     DW_OP_plus_uconst */		\
+    .uleb128 33*RSIZE;		/*       msr offset */			\
+  .byte 0x06;			/*     DW_OP_deref */			\
+  .byte 0x0c; .long 1 << 25;	/*     DW_OP_const4u */			\
+  .byte 0x1a;			/*     DW_OP_and */			\
+  .byte 0x12;			/*     DW_OP_dup, ret 0 if bra taken */	\
+  .byte 0x30;			/*     DW_OP_lit0 */			\
+  .byte 0x29;			/*     DW_OP_eq */			\
+  .byte 0x28; .short 0x7fff;	/*     DW_OP_bra to end */		\
+  .byte 0x13;			/*     DW_OP_drop, pop the 0 */		\
+  .byte 0x23; .uleb128 VREGS;	/*     DW_OP_plus_uconst */		\
+  .byte 0x06;			/*     DW_OP_deref */			\
+  .byte 0x22;			/*     DW_OP_plus */			\
+  .byte 0x2f; .short 0x7fff;	/*     DW_OP_skip to end */		\
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+   of the VMX reg struct.  REGNO is 1 thru 31.  */
+#define vsave_msr1(regno) \
+  .byte 0x10;			/* DW_CFA_expression */			\
+  .uleb128 regno + 77;		/*   regno */				\
+  .uleb128 9f - 1f;		/*   length */				\
+1:									\
+  .byte 0x30 + regno;		/*     DW_OP_lit n */			\
+  .byte 0x2f; .short 2b - 9f;	/*     DW_OP_skip */			\
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of
+   the VMX save block.  */
+#define vsave_msr2(regno, ofs) \
+  .byte 0x10;			/* DW_CFA_expression */			\
+  .uleb128 regno + 77;		/*   regno */				\
+  .uleb128 9f - 1f;		/*   length */				\
+1:									\
+  .byte 0x0a; .short ofs;	/*     DW_OP_const2u */			\
+  .byte 0x2f; .short 3b - 9f;	/*     DW_OP_skip */			\
+9:
+
+/* VMX register REGNO is at offset OFS of the VMX save area.  */
+#define vsave(regno, ofs) \
+  .byte 0x10;			/* DW_CFA_expression */			\
+  .uleb128 regno + 77;		/*   regno */				\
+  .uleb128 9f - 1f;		/*   length */				\
+1:									\
+  .byte 0x71; .sleb128 PTREGS;	/*     DW_OP_breg1 */			\
+  .byte 0x06;			/*     DW_OP_deref */			\
+  .byte 0x23; .uleb128 VREGS;	/*     DW_OP_plus_uconst */		\
+  .byte 0x06;			/*     DW_OP_deref */			\
+  .byte 0x23; .uleb128 ofs;	/*     DW_OP_plus_uconst */		\
+9:
+
+/* This is where the pt_regs pointer can be found on the stack.  */
+#define PTREGS	128+168+56
+
+/* Size of regs.  */
+#define RSIZE	8
+
+/* Size of CR reg in DWARF unwind info. */
+#define CRSIZE	4
+
+/* Offset of CR reg within a full word. */
+#ifdef __LITTLE_ENDIAN__
+#define CROFF 0
+#else
+#define CROFF (RSIZE - CRSIZE)
+#endif
+
+/* This is the offset of the VMX reg pointer.  */
+#define VREGS	48*RSIZE+33*8
+
+/* Describe where general purpose regs are saved.  */
+#define EH_FRAME_GEN \
+  cfa_save;								\
+  rsave ( 0,  0*RSIZE);							\
+  rsave ( 2,  2*RSIZE);							\
+  rsave ( 3,  3*RSIZE);							\
+  rsave ( 4,  4*RSIZE);							\
+  rsave ( 5,  5*RSIZE);							\
+  rsave ( 6,  6*RSIZE);							\
+  rsave ( 7,  7*RSIZE);							\
+  rsave ( 8,  8*RSIZE);							\
+  rsave ( 9,  9*RSIZE);							\
+  rsave (10, 10*RSIZE);							\
+  rsave (11, 11*RSIZE);							\
+  rsave (12, 12*RSIZE);							\
+  rsave (13, 13*RSIZE);							\
+  rsave (14, 14*RSIZE);							\
+  rsave (15, 15*RSIZE);							\
+  rsave (16, 16*RSIZE);							\
+  rsave (17, 17*RSIZE);							\
+  rsave (18, 18*RSIZE);							\
+  rsave (19, 19*RSIZE);							\
+  rsave (20, 20*RSIZE);							\
+  rsave (21, 21*RSIZE);							\
+  rsave (22, 22*RSIZE);							\
+  rsave (23, 23*RSIZE);							\
+  rsave (24, 24*RSIZE);							\
+  rsave (25, 25*RSIZE);							\
+  rsave (26, 26*RSIZE);							\
+  rsave (27, 27*RSIZE);							\
+  rsave (28, 28*RSIZE);							\
+  rsave (29, 29*RSIZE);							\
+  rsave (30, 30*RSIZE);							\
+  rsave (31, 31*RSIZE);							\
+  rsave (67, 32*RSIZE);		/* ap, used as temp for nip */		\
+  rsave (65, 36*RSIZE);		/* lr */				\
+  rsave (68, 38*RSIZE + CROFF);	/* cr fields */				\
+  rsave (69, 38*RSIZE + CROFF);						\
+  rsave (70, 38*RSIZE + CROFF);						\
+  rsave (71, 38*RSIZE + CROFF);						\
+  rsave (72, 38*RSIZE + CROFF);						\
+  rsave (73, 38*RSIZE + CROFF);						\
+  rsave (74, 38*RSIZE + CROFF);						\
+  rsave (75, 38*RSIZE + CROFF)
+
+/* Describe where the FP regs are saved.  */
+#define EH_FRAME_FP \
+  rsave (32, 48*RSIZE +  0*8);						\
+  rsave (33, 48*RSIZE +  1*8);						\
+  rsave (34, 48*RSIZE +  2*8);						\
+  rsave (35, 48*RSIZE +  3*8);						\
+  rsave (36, 48*RSIZE +  4*8);						\
+  rsave (37, 48*RSIZE +  5*8);						\
+  rsave (38, 48*RSIZE +  6*8);						\
+  rsave (39, 48*RSIZE +  7*8);						\
+  rsave (40, 48*RSIZE +  8*8);						\
+  rsave (41, 48*RSIZE +  9*8);						\
+  rsave (42, 48*RSIZE + 10*8);						\
+  rsave (43, 48*RSIZE + 11*8);						\
+  rsave (44, 48*RSIZE + 12*8);						\
+  rsave (45, 48*RSIZE + 13*8);						\
+  rsave (46, 48*RSIZE + 14*8);						\
+  rsave (47, 48*RSIZE + 15*8);						\
+  rsave (48, 48*RSIZE + 16*8);						\
+  rsave (49, 48*RSIZE + 17*8);						\
+  rsave (50, 48*RSIZE + 18*8);						\
+  rsave (51, 48*RSIZE + 19*8);						\
+  rsave (52, 48*RSIZE + 20*8);						\
+  rsave (53, 48*RSIZE + 21*8);						\
+  rsave (54, 48*RSIZE + 22*8);						\
+  rsave (55, 48*RSIZE + 23*8);						\
+  rsave (56, 48*RSIZE + 24*8);						\
+  rsave (57, 48*RSIZE + 25*8);						\
+  rsave (58, 48*RSIZE + 26*8);						\
+  rsave (59, 48*RSIZE + 27*8);						\
+  rsave (60, 48*RSIZE + 28*8);						\
+  rsave (61, 48*RSIZE + 29*8);						\
+  rsave (62, 48*RSIZE + 30*8);						\
+  rsave (63, 48*RSIZE + 31*8)
+
+/* Describe where the VMX regs are saved.  */
+#ifdef CONFIG_ALTIVEC
+#define EH_FRAME_VMX \
+  vsave_msr0 ( 0);							\
+  vsave_msr1 ( 1);							\
+  vsave_msr1 ( 2);							\
+  vsave_msr1 ( 3);							\
+  vsave_msr1 ( 4);							\
+  vsave_msr1 ( 5);							\
+  vsave_msr1 ( 6);							\
+  vsave_msr1 ( 7);							\
+  vsave_msr1 ( 8);							\
+  vsave_msr1 ( 9);							\
+  vsave_msr1 (10);							\
+  vsave_msr1 (11);							\
+  vsave_msr1 (12);							\
+  vsave_msr1 (13);							\
+  vsave_msr1 (14);							\
+  vsave_msr1 (15);							\
+  vsave_msr1 (16);							\
+  vsave_msr1 (17);							\
+  vsave_msr1 (18);							\
+  vsave_msr1 (19);							\
+  vsave_msr1 (20);							\
+  vsave_msr1 (21);							\
+  vsave_msr1 (22);							\
+  vsave_msr1 (23);							\
+  vsave_msr1 (24);							\
+  vsave_msr1 (25);							\
+  vsave_msr1 (26);							\
+  vsave_msr1 (27);							\
+  vsave_msr1 (28);							\
+  vsave_msr1 (29);							\
+  vsave_msr1 (30);							\
+  vsave_msr1 (31);							\
+  vsave_msr2 (33, 32*16+12);						\
+  vsave      (32, 33*16)
+#else
+#define EH_FRAME_VMX
+#endif
+
+	.section .eh_frame,"a",@progbits
+.Lcie:
+	.long .Lcie_end - .Lcie_start
+.Lcie_start:
+	.long 0			/* CIE ID */
+	.byte 1			/* Version number */
+	.string "zRS"		/* NUL-terminated augmentation string */
+	.uleb128 4		/* Code alignment factor */
+	.sleb128 -8		/* Data alignment factor */
+	.byte 67		/* Return address register column, ap */
+	.uleb128 1		/* Augmentation value length */
+	.byte 0x14		/* DW_EH_PE_pcrel | DW_EH_PE_udata8. */
+	.byte 0x0c,1,0		/* DW_CFA_def_cfa: r1 ofs 0 */
+	.balign 8
+.Lcie_end:
+
+	.long .Lfde0_end - .Lfde0_start
+.Lfde0_start:
+	.long .Lfde0_start - .Lcie	/* CIE pointer. */
+	.quad .Lsigrt_start - .		/* PC start, length */
+	.quad .Lsigrt_end - .Lsigrt_start
+	.uleb128 0			/* Augmentation */
+	EH_FRAME_GEN
+	EH_FRAME_FP
+	EH_FRAME_VMX
+# Do we really need to describe the frame at this point?  ie. will
+# we ever have some call chain that returns somewhere past the addi?
+# I don't think so, since gcc doesn't support async signals.
+#	.byte 0x41		/* DW_CFA_advance_loc 1*4 */
+#undef PTREGS
+#define PTREGS 168+56
+#	EH_FRAME_GEN
+#	EH_FRAME_FP
+#	EH_FRAME_VMX
+	.balign 8
+.Lfde0_end:
diff --git a/arch/powerpc/kernel/vdso/vdso32.lds.S b/arch/powerpc/kernel/vdso/vdso32.lds.S
new file mode 100644
index 0000000000..426e1ccc69
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/vdso32.lds.S
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This is the infamous ld script for the 32 bits vdso
+ * library
+ */
+#include <asm/vdso.h>
+#include <asm/page.h>
+#include <asm-generic/vmlinux.lds.h>
+
+#ifdef __LITTLE_ENDIAN__
+OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle")
+#else
+OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc")
+#endif
+OUTPUT_ARCH(powerpc:common)
+
+SECTIONS
+{
+	PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE);
+	. = SIZEOF_HEADERS;
+
+	.hash          	: { *(.hash) }			:text
+	.gnu.hash      	: { *(.gnu.hash) }
+	.dynsym        	: { *(.dynsym) }
+	.dynstr        	: { *(.dynstr) }
+	.gnu.version   	: { *(.gnu.version) }
+	.gnu.version_d 	: { *(.gnu.version_d) }
+	.gnu.version_r 	: { *(.gnu.version_r) }
+
+	.note		: { *(.note.*) }		:text	:note
+
+	. = ALIGN(16);
+	.text		: {
+		*(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*)
+	}		      	      			:text
+	PROVIDE(__etext = .);
+	PROVIDE(_etext = .);
+	PROVIDE(etext = .);
+
+	. = ALIGN(8);
+	VDSO_ftr_fixup_start = .;
+	__ftr_fixup	: { *(__ftr_fixup) }
+	VDSO_ftr_fixup_end = .;
+
+	. = ALIGN(8);
+	VDSO_mmu_ftr_fixup_start = .;
+	__mmu_ftr_fixup	: { *(__mmu_ftr_fixup) }
+	VDSO_mmu_ftr_fixup_end = .;
+
+	. = ALIGN(8);
+	VDSO_lwsync_fixup_start = .;
+	__lwsync_fixup	: { *(__lwsync_fixup) }
+	VDSO_lwsync_fixup_end = .;
+
+#ifdef CONFIG_PPC64
+	. = ALIGN(8);
+	VDSO_fw_ftr_fixup_start = .;
+	__fw_ftr_fixup	: { *(__fw_ftr_fixup) }
+	VDSO_fw_ftr_fixup_end = .;
+#endif
+
+	/*
+	 * Other stuff is appended to the text segment:
+	 */
+	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+	.rodata1	: { *(.rodata1) }
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+	.gcc_except_table : { *(.gcc_except_table) }
+	.fixup		: { *(.fixup) }
+
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+	.got		: { *(.got) }			:text
+	.plt		: { *(.plt) }
+
+	_end = .;
+	__end = .;
+	PROVIDE(end = .);
+
+	DWARF_DEBUG
+	ELF_DETAILS
+
+	/DISCARD/	: {
+		*(.note.GNU-stack)
+		*(*.EMB.apuinfo)
+		*(.branch_lt)
+		*(.data .data.* .gnu.linkonce.d.* .sdata*)
+		*(.bss .sbss .dynbss .dynsbss)
+		*(.got1 .glink .iplt .rela*)
+	}
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME	0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
+	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
+	note		PT_NOTE FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+	VDSO_VERSION_STRING {
+	global:
+		__kernel_get_syscall_map;
+		__kernel_gettimeofday;
+		__kernel_clock_gettime;
+		__kernel_clock_gettime64;
+		__kernel_clock_getres;
+		__kernel_time;
+		__kernel_get_tbfreq;
+		__kernel_sync_dicache;
+		__kernel_sigtramp32;
+		__kernel_sigtramp_rt32;
+#if defined(CONFIG_PPC64) || !defined(CONFIG_SMP)
+		__kernel_getcpu;
+#endif
+
+	local: *;
+	};
+}
+
+/*
+ * Make the sigreturn code visible to the kernel.
+ */
+VDSO_sigtramp32		= __kernel_sigtramp32;
+VDSO_sigtramp_rt32	= __kernel_sigtramp_rt32;
diff --git a/arch/powerpc/kernel/vdso/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S
new file mode 100644
index 0000000000..bda6c8cdd4
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/vdso64.lds.S
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This is the infamous ld script for the 64 bits vdso
+ * library
+ */
+#include <asm/vdso.h>
+#include <asm/page.h>
+#include <asm-generic/vmlinux.lds.h>
+
+#ifdef __LITTLE_ENDIAN__
+OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle")
+#else
+OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", "elf64-powerpc")
+#endif
+OUTPUT_ARCH(powerpc:common64)
+
+SECTIONS
+{
+	PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE);
+	. = SIZEOF_HEADERS;
+
+	.hash		: { *(.hash) }			:text
+	.gnu.hash	: { *(.gnu.hash) }
+	.dynsym		: { *(.dynsym) }
+	.dynstr		: { *(.dynstr) }
+	.gnu.version	: { *(.gnu.version) }
+	.gnu.version_d	: { *(.gnu.version_d) }
+	.gnu.version_r	: { *(.gnu.version_r) }
+
+	.note		: { *(.note.*) }		:text	:note
+
+	. = ALIGN(16);
+	.text		: {
+		*(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*)
+		*(.sfpr)
+	}						:text
+	PROVIDE(__etext = .);
+	PROVIDE(_etext = .);
+	PROVIDE(etext = .);
+
+	. = ALIGN(8);
+	VDSO_ftr_fixup_start = .;
+	__ftr_fixup	: { *(__ftr_fixup) }
+	VDSO_ftr_fixup_end = .;
+
+	. = ALIGN(8);
+	VDSO_mmu_ftr_fixup_start = .;
+	__mmu_ftr_fixup	: { *(__mmu_ftr_fixup) }
+	VDSO_mmu_ftr_fixup_end = .;
+
+	. = ALIGN(8);
+	VDSO_lwsync_fixup_start = .;
+	__lwsync_fixup	: { *(__lwsync_fixup) }
+	VDSO_lwsync_fixup_end = .;
+
+	. = ALIGN(8);
+	VDSO_fw_ftr_fixup_start = .;
+	__fw_ftr_fixup	: { *(__fw_ftr_fixup) }
+	VDSO_fw_ftr_fixup_end = .;
+
+	/*
+	 * Other stuff is appended to the text segment:
+	 */
+	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+	.rodata1	: { *(.rodata1) }
+
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+	.gcc_except_table : { *(.gcc_except_table) }
+	.rela.dyn ALIGN(8) : { *(.rela.dyn) }
+
+	.got ALIGN(8)	: { *(.got .toc) }
+
+	_end = .;
+	PROVIDE(end = .);
+
+	DWARF_DEBUG
+	ELF_DETAILS
+
+	/DISCARD/	: {
+		*(.note.GNU-stack)
+		*(*.EMB.apuinfo)
+		*(.branch_lt)
+		*(.data .data.* .gnu.linkonce.d.* .sdata*)
+		*(.bss .sbss .dynbss .dynsbss)
+		*(.opd)
+		*(.glink .iplt .plt .rela*)
+	}
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME	0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
+	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
+	note		PT_NOTE FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+	VDSO_VERSION_STRING {
+	global:
+		__kernel_get_syscall_map;
+		__kernel_gettimeofday;
+		__kernel_clock_gettime;
+		__kernel_clock_getres;
+		__kernel_get_tbfreq;
+		__kernel_sync_dicache;
+		__kernel_sigtramp_rt64;
+		__kernel_getcpu;
+		__kernel_time;
+
+	local: *;
+	};
+}
+
+/*
+ * Make the sigreturn code visible to the kernel.
+ */
+VDSO_sigtramp_rt64	= __kernel_start_sigtramp_rt64;
diff --git a/arch/powerpc/kernel/vdso/vgettimeofday.c b/arch/powerpc/kernel/vdso/vgettimeofday.c
new file mode 100644
index 0000000000..55a287c9a7
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/vgettimeofday.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Powerpc userspace implementations of gettimeofday() and similar.
+ */
+#include <linux/time.h>
+#include <linux/types.h>
+
+#ifdef __powerpc64__
+int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts,
+			     const struct vdso_data *vd)
+{
+	return __cvdso_clock_gettime_data(vd, clock, ts);
+}
+
+int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res,
+			    const struct vdso_data *vd)
+{
+	return __cvdso_clock_getres_data(vd, clock_id, res);
+}
+#else
+int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts,
+			     const struct vdso_data *vd)
+{
+	return __cvdso_clock_gettime32_data(vd, clock, ts);
+}
+
+int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts,
+			       const struct vdso_data *vd)
+{
+	return __cvdso_clock_gettime_data(vd, clock, ts);
+}
+
+int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res,
+			    const struct vdso_data *vd)
+{
+	return __cvdso_clock_getres_time32_data(vd, clock_id, res);
+}
+#endif
+
+int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz,
+			    const struct vdso_data *vd)
+{
+	return __cvdso_gettimeofday_data(vd, tv, tz);
+}
+
+__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd)
+{
+	return __cvdso_time_data(vd, time);
+}
diff --git a/arch/powerpc/kernel/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32_wrapper.S
new file mode 100644
index 0000000000..10f92f265d
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32_wrapper.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+	__PAGE_ALIGNED_DATA
+
+	.globl vdso32_start, vdso32_end
+	.balign PAGE_SIZE
+vdso32_start:
+	.incbin "arch/powerpc/kernel/vdso/vdso32.so.dbg"
+	.balign PAGE_SIZE
+vdso32_end:
+
+	.previous
diff --git a/arch/powerpc/kernel/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64_wrapper.S
new file mode 100644
index 0000000000..839d1a6141
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64_wrapper.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+	__PAGE_ALIGNED_DATA
+
+	.globl vdso64_start, vdso64_end
+	.balign PAGE_SIZE
+vdso64_start:
+	.incbin "arch/powerpc/kernel/vdso/vdso64.so.dbg"
+	.balign PAGE_SIZE
+vdso64_end:
+
+	.previous
diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c
new file mode 100644
index 0000000000..fd9432875e
--- /dev/null
+++ b/arch/powerpc/kernel/vecemu.c
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Routines to emulate some Altivec/VMX instructions, specifically
+ * those that can trap when given denormalized operands in Java mode.
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+#include <asm/processor.h>
+#include <asm/switch_to.h>
+#include <linux/uaccess.h>
+#include <asm/inst.h>
+
+/* Functions in vector.S */
+extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
+extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
+extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
+extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
+extern void vrefp(vector128 *dst, vector128 *src);
+extern void vrsqrtefp(vector128 *dst, vector128 *src);
+extern void vexptep(vector128 *dst, vector128 *src);
+
+static unsigned int exp2s[8] = {
+	0x800000,
+	0x8b95c2,
+	0x9837f0,
+	0xa5fed7,
+	0xb504f3,
+	0xc5672a,
+	0xd744fd,
+	0xeac0c7
+};
+
+/*
+ * Computes an estimate of 2^x.  The `s' argument is the 32-bit
+ * single-precision floating-point representation of x.
+ */
+static unsigned int eexp2(unsigned int s)
+{
+	int exp, pwr;
+	unsigned int mant, frac;
+
+	/* extract exponent field from input */
+	exp = ((s >> 23) & 0xff) - 127;
+	if (exp > 7) {
+		/* check for NaN input */
+		if (exp == 128 && (s & 0x7fffff) != 0)
+			return s | 0x400000;	/* return QNaN */
+		/* 2^-big = 0, 2^+big = +Inf */
+		return (s & 0x80000000)? 0: 0x7f800000;	/* 0 or +Inf */
+	}
+	if (exp < -23)
+		return 0x3f800000;	/* 1.0 */
+
+	/* convert to fixed point integer in 9.23 representation */
+	pwr = (s & 0x7fffff) | 0x800000;
+	if (exp > 0)
+		pwr <<= exp;
+	else
+		pwr >>= -exp;
+	if (s & 0x80000000)
+		pwr = -pwr;
+
+	/* extract integer part, which becomes exponent part of result */
+	exp = (pwr >> 23) + 126;
+	if (exp >= 254)
+		return 0x7f800000;
+	if (exp < -23)
+		return 0;
+
+	/* table lookup on top 3 bits of fraction to get mantissa */
+	mant = exp2s[(pwr >> 20) & 7];
+
+	/* linear interpolation using remaining 20 bits of fraction */
+	asm("mulhwu %0,%1,%2" : "=r" (frac)
+	    : "r" (pwr << 12), "r" (0x172b83ff));
+	asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
+	mant += frac;
+
+	if (exp >= 0)
+		return mant + (exp << 23);
+
+	/* denormalized result */
+	exp = -exp;
+	mant += 1 << (exp - 1);
+	return mant >> exp;
+}
+
+/*
+ * Computes an estimate of log_2(x).  The `s' argument is the 32-bit
+ * single-precision floating-point representation of x.
+ */
+static unsigned int elog2(unsigned int s)
+{
+	int exp, mant, lz, frac;
+
+	exp = s & 0x7f800000;
+	mant = s & 0x7fffff;
+	if (exp == 0x7f800000) {	/* Inf or NaN */
+		if (mant != 0)
+			s |= 0x400000;	/* turn NaN into QNaN */
+		return s;
+	}
+	if ((exp | mant) == 0)		/* +0 or -0 */
+		return 0xff800000;	/* return -Inf */
+
+	if (exp == 0) {
+		/* denormalized */
+		asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
+		mant <<= lz - 8;
+		exp = (-118 - lz) << 23;
+	} else {
+		mant |= 0x800000;
+		exp -= 127 << 23;
+	}
+
+	if (mant >= 0xb504f3) {				/* 2^0.5 * 2^23 */
+		exp |= 0x400000;			/* 0.5 * 2^23 */
+		asm("mulhwu %0,%1,%2" : "=r" (mant)
+		    : "r" (mant), "r" (0xb504f334));	/* 2^-0.5 * 2^32 */
+	}
+	if (mant >= 0x9837f0) {				/* 2^0.25 * 2^23 */
+		exp |= 0x200000;			/* 0.25 * 2^23 */
+		asm("mulhwu %0,%1,%2" : "=r" (mant)
+		    : "r" (mant), "r" (0xd744fccb));	/* 2^-0.25 * 2^32 */
+	}
+	if (mant >= 0x8b95c2) {				/* 2^0.125 * 2^23 */
+		exp |= 0x100000;			/* 0.125 * 2^23 */
+		asm("mulhwu %0,%1,%2" : "=r" (mant)
+		    : "r" (mant), "r" (0xeac0c6e8));	/* 2^-0.125 * 2^32 */
+	}
+	if (mant > 0x800000) {				/* 1.0 * 2^23 */
+		/* calculate (mant - 1) * 1.381097463 */
+		/* 1.381097463 == 0.125 / (2^0.125 - 1) */
+		asm("mulhwu %0,%1,%2" : "=r" (frac)
+		    : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
+		exp += frac;
+	}
+	s = exp & 0x80000000;
+	if (exp != 0) {
+		if (s)
+			exp = -exp;
+		asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
+		lz = 8 - lz;
+		if (lz > 0)
+			exp >>= lz;
+		else if (lz < 0)
+			exp <<= -lz;
+		s += ((lz + 126) << 23) + exp;
+	}
+	return s;
+}
+
+#define VSCR_SAT	1
+
+static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
+{
+	int exp, mant;
+
+	exp = (x >> 23) & 0xff;
+	mant = x & 0x7fffff;
+	if (exp == 255 && mant != 0)
+		return 0;		/* NaN -> 0 */
+	exp = exp - 127 + scale;
+	if (exp < 0)
+		return 0;		/* round towards zero */
+	if (exp >= 31) {
+		/* saturate, unless the result would be -2^31 */
+		if (x + (scale << 23) != 0xcf000000)
+			*vscrp |= VSCR_SAT;
+		return (x & 0x80000000)? 0x80000000: 0x7fffffff;
+	}
+	mant |= 0x800000;
+	mant = (mant << 7) >> (30 - exp);
+	return (x & 0x80000000)? -mant: mant;
+}
+
+static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
+{
+	int exp;
+	unsigned int mant;
+
+	exp = (x >> 23) & 0xff;
+	mant = x & 0x7fffff;
+	if (exp == 255 && mant != 0)
+		return 0;		/* NaN -> 0 */
+	exp = exp - 127 + scale;
+	if (exp < 0)
+		return 0;		/* round towards zero */
+	if (x & 0x80000000) {
+		/* negative => saturate to 0 */
+		*vscrp |= VSCR_SAT;
+		return 0;
+	}
+	if (exp >= 32) {
+		/* saturate */
+		*vscrp |= VSCR_SAT;
+		return 0xffffffff;
+	}
+	mant |= 0x800000;
+	mant = (mant << 8) >> (31 - exp);
+	return mant;
+}
+
+/* Round to floating integer, towards 0 */
+static unsigned int rfiz(unsigned int x)
+{
+	int exp;
+
+	exp = ((x >> 23) & 0xff) - 127;
+	if (exp == 128 && (x & 0x7fffff) != 0)
+		return x | 0x400000;	/* NaN -> make it a QNaN */
+	if (exp >= 23)
+		return x;		/* it's an integer already (or Inf) */
+	if (exp < 0)
+		return x & 0x80000000;	/* |x| < 1.0 rounds to 0 */
+	return x & ~(0x7fffff >> exp);
+}
+
+/* Round to floating integer, towards +/- Inf */
+static unsigned int rfii(unsigned int x)
+{
+	int exp, mask;
+
+	exp = ((x >> 23) & 0xff) - 127;
+	if (exp == 128 && (x & 0x7fffff) != 0)
+		return x | 0x400000;	/* NaN -> make it a QNaN */
+	if (exp >= 23)
+		return x;		/* it's an integer already (or Inf) */
+	if ((x & 0x7fffffff) == 0)
+		return x;		/* +/-0 -> +/-0 */
+	if (exp < 0)
+		/* 0 < |x| < 1.0 rounds to +/- 1.0 */
+		return (x & 0x80000000) | 0x3f800000;
+	mask = 0x7fffff >> exp;
+	/* mantissa overflows into exponent - that's OK,
+	   it can't overflow into the sign bit */
+	return (x + mask) & ~mask;
+}
+
+/* Round to floating integer, to nearest */
+static unsigned int rfin(unsigned int x)
+{
+	int exp, half;
+
+	exp = ((x >> 23) & 0xff) - 127;
+	if (exp == 128 && (x & 0x7fffff) != 0)
+		return x | 0x400000;	/* NaN -> make it a QNaN */
+	if (exp >= 23)
+		return x;		/* it's an integer already (or Inf) */
+	if (exp < -1)
+		return x & 0x80000000;	/* |x| < 0.5 -> +/-0 */
+	if (exp == -1)
+		/* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
+		return (x & 0x80000000) | 0x3f800000;
+	half = 0x400000 >> exp;
+	/* add 0.5 to the magnitude and chop off the fraction bits */
+	return (x + half) & ~(0x7fffff >> exp);
+}
+
+int emulate_altivec(struct pt_regs *regs)
+{
+	ppc_inst_t instr;
+	unsigned int i, word;
+	unsigned int va, vb, vc, vd;
+	vector128 *vrs;
+
+	if (get_user_instr(instr, (void __user *)regs->nip))
+		return -EFAULT;
+
+	word = ppc_inst_val(instr);
+	if (ppc_inst_primary_opcode(instr) != 4)
+		return -EINVAL;		/* not an altivec instruction */
+	vd = (word >> 21) & 0x1f;
+	va = (word >> 16) & 0x1f;
+	vb = (word >> 11) & 0x1f;
+	vc = (word >> 6) & 0x1f;
+
+	vrs = current->thread.vr_state.vr;
+	switch (word & 0x3f) {
+	case 10:
+		switch (vc) {
+		case 0:	/* vaddfp */
+			vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
+			break;
+		case 1:	/* vsubfp */
+			vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
+			break;
+		case 4:	/* vrefp */
+			vrefp(&vrs[vd], &vrs[vb]);
+			break;
+		case 5:	/* vrsqrtefp */
+			vrsqrtefp(&vrs[vd], &vrs[vb]);
+			break;
+		case 6:	/* vexptefp */
+			for (i = 0; i < 4; ++i)
+				vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
+			break;
+		case 7:	/* vlogefp */
+			for (i = 0; i < 4; ++i)
+				vrs[vd].u[i] = elog2(vrs[vb].u[i]);
+			break;
+		case 8:		/* vrfin */
+			for (i = 0; i < 4; ++i)
+				vrs[vd].u[i] = rfin(vrs[vb].u[i]);
+			break;
+		case 9:		/* vrfiz */
+			for (i = 0; i < 4; ++i)
+				vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
+			break;
+		case 10:	/* vrfip */
+			for (i = 0; i < 4; ++i) {
+				u32 x = vrs[vb].u[i];
+				x = (x & 0x80000000)? rfiz(x): rfii(x);
+				vrs[vd].u[i] = x;
+			}
+			break;
+		case 11:	/* vrfim */
+			for (i = 0; i < 4; ++i) {
+				u32 x = vrs[vb].u[i];
+				x = (x & 0x80000000)? rfii(x): rfiz(x);
+				vrs[vd].u[i] = x;
+			}
+			break;
+		case 14:	/* vctuxs */
+			for (i = 0; i < 4; ++i)
+				vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
+					&current->thread.vr_state.vscr.u[3]);
+			break;
+		case 15:	/* vctsxs */
+			for (i = 0; i < 4; ++i)
+				vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
+					&current->thread.vr_state.vscr.u[3]);
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case 46:	/* vmaddfp */
+		vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
+		break;
+	case 47:	/* vnmsubfp */
+		vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
new file mode 100644
index 0000000000..80b3f6e476
--- /dev/null
+++ b/arch/powerpc/kernel/vector.S
@@ -0,0 +1,354 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
+#include <linux/linkage.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/reg.h>
+#include <asm/asm-offsets.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/asm-compat.h>
+
+/*
+ * Load state from memory into VMX registers including VSCR.
+ * Assumes the caller has enabled VMX in the MSR.
+ */
+_GLOBAL(load_vr_state)
+	li	r4,VRSTATE_VSCR
+	lvx	v0,r4,r3
+	mtvscr	v0
+	REST_32VRS(0,r4,r3)
+	blr
+EXPORT_SYMBOL(load_vr_state)
+_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */
+
+/*
+ * Store VMX state into memory, including VSCR.
+ * Assumes the caller has enabled VMX in the MSR.
+ */
+_GLOBAL(store_vr_state)
+	SAVE_32VRS(0, r4, r3)
+	mfvscr	v0
+	li	r4, VRSTATE_VSCR
+	stvx	v0, r4, r3
+	lvx	v0, 0, r3
+	blr
+EXPORT_SYMBOL(store_vr_state)
+
+/*
+ * Disable VMX for the task which had it previously,
+ * and save its vector registers in its thread_struct.
+ * Enables the VMX for use in the kernel on return.
+ * On SMP we know the VMX is free, since we give it up every
+ * switch (ie, no lazy save of the vector registers).
+ *
+ * Note that on 32-bit this can only use registers that will be
+ * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
+ */
+_GLOBAL(load_up_altivec)
+	mfmsr	r5			/* grab the current MSR */
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+	ori	r5,r5,MSR_RI
+#endif
+	oris	r5,r5,MSR_VEC@h
+	MTMSRD(r5)			/* enable use of AltiVec now */
+	isync
+
+	/*
+	 * While userspace in general ignores VRSAVE, glibc uses it as a boolean
+	 * to optimise userspace context save/restore. Whenever we take an
+	 * altivec unavailable exception we must set VRSAVE to something non
+	 * zero. Set it to all 1s. See also the programming note in the ISA.
+	 */
+	mfspr	r4,SPRN_VRSAVE
+	cmpwi	0,r4,0
+	bne+	1f
+	li	r4,-1
+	mtspr	SPRN_VRSAVE,r4
+1:
+	/* enable use of VMX after return */
+#ifdef CONFIG_PPC32
+	addi	r5,r2,THREAD
+	oris	r9,r9,MSR_VEC@h
+#else
+	ld	r4,PACACURRENT(r13)
+	addi	r5,r4,THREAD		/* Get THREAD */
+	oris	r12,r12,MSR_VEC@h
+	std	r12,_MSR(r1)
+#ifdef CONFIG_PPC_BOOK3S_64
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+#endif
+#endif
+	li	r4,1
+	stb	r4,THREAD_LOAD_VEC(r5)
+	addi	r6,r5,THREAD_VRSTATE
+	li	r10,VRSTATE_VSCR
+	stw	r4,THREAD_USED_VR(r5)
+	lvx	v0,r10,r6
+	mtvscr	v0
+	REST_32VRS(0,r4,r6)
+	/* restore registers and return */
+	blr
+_ASM_NOKPROBE_SYMBOL(load_up_altivec)
+
+/*
+ * save_altivec(tsk)
+ * Save the vector registers to its thread_struct
+ */
+_GLOBAL(save_altivec)
+	addi	r3,r3,THREAD		/* want THREAD of task */
+	PPC_LL	r7,THREAD_VRSAVEAREA(r3)
+	PPC_LL	r5,PT_REGS(r3)
+	PPC_LCMPI	0,r7,0
+	bne	2f
+	addi	r7,r3,THREAD_VRSTATE
+2:	SAVE_32VRS(0,r4,r7)
+	mfvscr	v0
+	li	r4,VRSTATE_VSCR
+	stvx	v0,r4,r7
+	lvx	v0,0,r7
+	blr
+
+#ifdef CONFIG_VSX
+
+#ifdef CONFIG_PPC32
+#error This asm code isn't ready for 32-bit kernels
+#endif
+
+/*
+ * load_up_vsx(unused, unused, tsk)
+ * Disable VSX for the task which had it previously,
+ * and save its vector registers in its thread_struct.
+ * Reuse the fp and vsx saves, but first check to see if they have
+ * been saved already.
+ */
+_GLOBAL(load_up_vsx)
+/* Load FP and VSX registers if they haven't been done yet */
+	andi.	r5,r12,MSR_FP
+	beql+	load_up_fpu		/* skip if already loaded */
+	andis.	r5,r12,MSR_VEC@h
+	beql+	load_up_altivec		/* skip if already loaded */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+	li	r5,MSR_RI
+	mtmsrd	r5,1
+#endif
+
+	ld	r4,PACACURRENT(r13)
+	addi	r4,r4,THREAD		/* Get THREAD */
+	li	r6,1
+	stw	r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
+	/* enable use of VSX after return */
+	oris	r12,r12,MSR_VSX@h
+	std	r12,_MSR(r1)
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+	b	fast_interrupt_return_srr
+
+#endif /* CONFIG_VSX */
+
+
+/*
+ * The routines below are in assembler so we can closely control the
+ * usage of floating-point registers.  These routines must be called
+ * with preempt disabled.
+ */
+	.data
+#ifdef CONFIG_PPC32
+fpzero:
+	.long	0
+fpone:
+	.long	0x3f800000	/* 1.0 in single-precision FP */
+fphalf:
+	.long	0x3f000000	/* 0.5 in single-precision FP */
+
+#define LDCONST(fr, name)	\
+	lis	r11,name@ha;	\
+	lfs	fr,name@l(r11)
+#else
+
+fpzero:
+	.quad	0
+fpone:
+	.quad	0x3ff0000000000000	/* 1.0 */
+fphalf:
+	.quad	0x3fe0000000000000	/* 0.5 */
+
+#ifdef CONFIG_PPC_KERNEL_PCREL
+#define LDCONST(fr, name)		\
+	pla	r11,name@pcrel;		\
+	lfd	fr,0(r11)
+#else
+#define LDCONST(fr, name)		\
+	addis	r11,r2,name@toc@ha;	\
+	lfd	fr,name@toc@l(r11)
+#endif
+#endif
+	.text
+/*
+ * Internal routine to enable floating point and set FPSCR to 0.
+ * Don't call it from C; it doesn't use the normal calling convention.
+ */
+SYM_FUNC_START_LOCAL(fpenable)
+#ifdef CONFIG_PPC32
+	stwu	r1,-64(r1)
+#else
+	stdu	r1,-64(r1)
+#endif
+	mfmsr	r10
+	ori	r11,r10,MSR_FP
+	mtmsr	r11
+	isync
+	stfd	fr0,24(r1)
+	stfd	fr1,16(r1)
+	stfd	fr31,8(r1)
+	LDCONST(fr1, fpzero)
+	mffs	fr31
+	MTFSF_L(fr1)
+	blr
+SYM_FUNC_END(fpenable)
+
+fpdisable:
+	mtlr	r12
+	MTFSF_L(fr31)
+	lfd	fr31,8(r1)
+	lfd	fr1,16(r1)
+	lfd	fr0,24(r1)
+	mtmsr	r10
+	isync
+	addi	r1,r1,64
+	blr
+
+/*
+ * Vector add, floating point.
+ */
+_GLOBAL(vaddfp)
+	mflr	r12
+	bl	fpenable
+	li	r0,4
+	mtctr	r0
+	li	r6,0
+1:	lfsx	fr0,r4,r6
+	lfsx	fr1,r5,r6
+	fadds	fr0,fr0,fr1
+	stfsx	fr0,r3,r6
+	addi	r6,r6,4
+	bdnz	1b
+	b	fpdisable
+
+/*
+ * Vector subtract, floating point.
+ */
+_GLOBAL(vsubfp)
+	mflr	r12
+	bl	fpenable
+	li	r0,4
+	mtctr	r0
+	li	r6,0
+1:	lfsx	fr0,r4,r6
+	lfsx	fr1,r5,r6
+	fsubs	fr0,fr0,fr1
+	stfsx	fr0,r3,r6
+	addi	r6,r6,4
+	bdnz	1b
+	b	fpdisable
+
+/*
+ * Vector multiply and add, floating point.
+ */
+_GLOBAL(vmaddfp)
+	mflr	r12
+	bl	fpenable
+	stfd	fr2,32(r1)
+	li	r0,4
+	mtctr	r0
+	li	r7,0
+1:	lfsx	fr0,r4,r7
+	lfsx	fr1,r5,r7
+	lfsx	fr2,r6,r7
+	fmadds	fr0,fr0,fr2,fr1
+	stfsx	fr0,r3,r7
+	addi	r7,r7,4
+	bdnz	1b
+	lfd	fr2,32(r1)
+	b	fpdisable
+
+/*
+ * Vector negative multiply and subtract, floating point.
+ */
+_GLOBAL(vnmsubfp)
+	mflr	r12
+	bl	fpenable
+	stfd	fr2,32(r1)
+	li	r0,4
+	mtctr	r0
+	li	r7,0
+1:	lfsx	fr0,r4,r7
+	lfsx	fr1,r5,r7
+	lfsx	fr2,r6,r7
+	fnmsubs	fr0,fr0,fr2,fr1
+	stfsx	fr0,r3,r7
+	addi	r7,r7,4
+	bdnz	1b
+	lfd	fr2,32(r1)
+	b	fpdisable
+
+/*
+ * Vector reciprocal estimate.  We just compute 1.0/x.
+ * r3 -> destination, r4 -> source.
+ */
+_GLOBAL(vrefp)
+	mflr	r12
+	bl	fpenable
+	li	r0,4
+	LDCONST(fr1, fpone)
+	mtctr	r0
+	li	r6,0
+1:	lfsx	fr0,r4,r6
+	fdivs	fr0,fr1,fr0
+	stfsx	fr0,r3,r6
+	addi	r6,r6,4
+	bdnz	1b
+	b	fpdisable
+
+/*
+ * Vector reciprocal square-root estimate, floating point.
+ * We use the frsqrte instruction for the initial estimate followed
+ * by 2 iterations of Newton-Raphson to get sufficient accuracy.
+ * r3 -> destination, r4 -> source.
+ */
+_GLOBAL(vrsqrtefp)
+	mflr	r12
+	bl	fpenable
+	stfd	fr2,32(r1)
+	stfd	fr3,40(r1)
+	stfd	fr4,48(r1)
+	stfd	fr5,56(r1)
+	li	r0,4
+	LDCONST(fr4, fpone)
+	LDCONST(fr5, fphalf)
+	mtctr	r0
+	li	r6,0
+1:	lfsx	fr0,r4,r6
+	frsqrte	fr1,fr0		/* r = frsqrte(s) */
+	fmuls	fr3,fr1,fr0	/* r * s */
+	fmuls	fr2,fr1,fr5	/* r * 0.5 */
+	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
+	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
+	fmuls	fr3,fr1,fr0	/* r * s */
+	fmuls	fr2,fr1,fr5	/* r * 0.5 */
+	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
+	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
+	stfsx	fr1,r3,r6
+	addi	r6,r6,4
+	bdnz	1b
+	lfd	fr5,56(r1)
+	lfd	fr4,48(r1)
+	lfd	fr3,40(r1)
+	lfd	fr2,32(r1)
+	b	fpdisable
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
new file mode 100644
index 0000000000..1c5970df32
--- /dev/null
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -0,0 +1,424 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef CONFIG_PPC64
+#define PROVIDE32(x)	PROVIDE(__unused__##x)
+#else
+#define PROVIDE32(x)	PROVIDE(x)
+#endif
+
+#define BSS_FIRST_SECTIONS *(.bss.prominit)
+#define EMITS_PT_NOTE
+#define RO_EXCEPTION_TABLE_ALIGN	0
+#define RUNTIME_DISCARD_EXIT
+
+#define SOFT_MASK_TABLE(align)						\
+	. = ALIGN(align);						\
+	__soft_mask_table : AT(ADDR(__soft_mask_table) - LOAD_OFFSET) {	\
+		__start___soft_mask_table = .;				\
+		KEEP(*(__soft_mask_table))				\
+		__stop___soft_mask_table = .;				\
+	}
+
+#define RESTART_TABLE(align)						\
+	. = ALIGN(align);						\
+	__restart_table : AT(ADDR(__restart_table) - LOAD_OFFSET) {	\
+		__start___restart_table = .;				\
+		KEEP(*(__restart_table))				\
+		__stop___restart_table = .;				\
+	}
+
+#include <asm/page.h>
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+
+#define STRICT_ALIGN_SIZE	(1 << CONFIG_DATA_SHIFT)
+
+#if STRICT_ALIGN_SIZE < PAGE_SIZE
+#error "CONFIG_DATA_SHIFT must be >= PAGE_SHIFT"
+#endif
+
+ENTRY(_stext)
+
+PHDRS {
+	text PT_LOAD FLAGS(7); /* RWX */
+	note PT_NOTE FLAGS(0);
+}
+
+#ifdef CONFIG_PPC64
+OUTPUT_ARCH(powerpc:common64)
+jiffies = jiffies_64;
+#else
+OUTPUT_ARCH(powerpc:common)
+jiffies = jiffies_64 + 4;
+#endif
+SECTIONS
+{
+	. = KERNELBASE;
+
+/*
+ * Text, read only data and other permanent read-only sections
+ */
+
+	_text = .;
+	_stext = .;
+
+	/*
+	 * Head text.
+	 * This needs to be in its own output section to avoid ld placing
+	 * branch trampoline stubs randomly throughout the fixed sections,
+	 * which it will do (even if the branch comes from another section)
+	 * in order to optimize stub generation.
+	 */
+	.head.text : AT(ADDR(.head.text) - LOAD_OFFSET) {
+#ifdef CONFIG_PPC64
+		KEEP(*(.head.text.first_256B));
+#ifdef CONFIG_PPC_BOOK3E_64
+#else
+		KEEP(*(.head.text.real_vectors));
+		*(.head.text.real_trampolines);
+		KEEP(*(.head.text.virt_vectors));
+		*(.head.text.virt_trampolines);
+# if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+		KEEP(*(.head.data.fwnmi_page));
+# endif
+#endif
+#else /* !CONFIG_PPC64 */
+		HEAD_TEXT
+#endif
+	} :text
+
+	__head_end = .;
+
+#ifdef CONFIG_PPC64
+	/*
+	 * ALIGN(0) overrides the default output section alignment because
+	 * this needs to start right after .head.text in order for fixed
+	 * section placement to work.
+	 */
+	.text ALIGN(0) : AT(ADDR(.text) - LOAD_OFFSET) {
+#ifdef CONFIG_LD_HEAD_STUB_CATCH
+		KEEP(*(.linker_stub_catch));
+		. = . ;
+#endif
+
+#else
+	.text : AT(ADDR(.text) - LOAD_OFFSET) {
+		ALIGN_FUNCTION();
+#endif
+		/* careful! __ftr_alt_* sections need to be close to .text */
+		*(.text.hot .text.hot.* TEXT_MAIN .text.fixup .text.unlikely .text.unlikely.* .fixup __ftr_alt_* .ref.text);
+		*(.tramp.ftrace.text);
+		NOINSTR_TEXT
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
+		/*
+		 * -Os builds call FP save/restore functions. The powerpc64
+		 * linker generates those on demand in the .sfpr section.
+		 * .sfpr gets placed at the beginning of a group of input
+		 * sections, which can break start-of-text offset if it is
+		 * included with the main text sections, so put it by itself.
+		 */
+		*(.sfpr);
+		*(.text.asan.* .text.tsan.*)
+		MEM_KEEP(init.text)
+		MEM_KEEP(exit.text)
+	} :text
+
+	. = ALIGN(PAGE_SIZE);
+	_etext = .;
+	PROVIDE32 (etext = .);
+
+	/* Read-only data */
+	RO_DATA(PAGE_SIZE)
+
+#ifdef CONFIG_PPC32
+	.sdata2 : AT(ADDR(.sdata2) - LOAD_OFFSET) {
+		*(.sdata2)
+	}
+#endif
+
+	.data.rel.ro : AT(ADDR(.data.rel.ro) - LOAD_OFFSET) {
+		*(.data.rel.ro .data.rel.ro.*)
+	}
+
+	.branch_lt : AT(ADDR(.branch_lt) - LOAD_OFFSET) {
+		*(.branch_lt)
+	}
+
+#ifdef CONFIG_PPC32
+	.got1 : AT(ADDR(.got1) - LOAD_OFFSET) {
+		*(.got1)
+	}
+	.got2 : AT(ADDR(.got2) - LOAD_OFFSET) {
+		__got2_start = .;
+		*(.got2)
+		__got2_end = .;
+	}
+	.got : AT(ADDR(.got) - LOAD_OFFSET) {
+		*(.got)
+		*(.got.plt)
+	}
+	.plt : AT(ADDR(.plt) - LOAD_OFFSET) {
+		/* XXX: is .plt (and .got.plt) required? */
+		*(.plt)
+	}
+
+#else /* CONFIG_PPC32 */
+#ifndef CONFIG_PPC_KERNEL_PCREL
+	.toc1 : AT(ADDR(.toc1) - LOAD_OFFSET) {
+		*(.toc1)
+	}
+#endif
+
+	.got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) {
+#ifdef CONFIG_PPC_KERNEL_PCREL
+		*(.got)
+#else
+		*(.got .toc)
+#endif
+	}
+
+	SOFT_MASK_TABLE(8)
+	RESTART_TABLE(8)
+
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+	.opd : AT(ADDR(.opd) - LOAD_OFFSET) {
+		__start_opd = .;
+		KEEP(*(.opd))
+		__end_opd = .;
+	}
+#endif
+
+	. = ALIGN(8);
+	__stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) {
+		__start___stf_entry_barrier_fixup = .;
+		*(__stf_entry_barrier_fixup)
+		__stop___stf_entry_barrier_fixup = .;
+	}
+
+	. = ALIGN(8);
+	__uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) {
+		__start___uaccess_flush_fixup = .;
+		*(__uaccess_flush_fixup)
+		__stop___uaccess_flush_fixup = .;
+	}
+
+	. = ALIGN(8);
+	__entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) {
+		__start___entry_flush_fixup = .;
+		*(__entry_flush_fixup)
+		__stop___entry_flush_fixup = .;
+	}
+
+	. = ALIGN(8);
+	__scv_entry_flush_fixup : AT(ADDR(__scv_entry_flush_fixup) - LOAD_OFFSET) {
+		__start___scv_entry_flush_fixup = .;
+		*(__scv_entry_flush_fixup)
+		__stop___scv_entry_flush_fixup = .;
+	}
+
+	. = ALIGN(8);
+	__stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) {
+		__start___stf_exit_barrier_fixup = .;
+		*(__stf_exit_barrier_fixup)
+		__stop___stf_exit_barrier_fixup = .;
+	}
+
+	. = ALIGN(8);
+	__rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
+		__start___rfi_flush_fixup = .;
+		*(__rfi_flush_fixup)
+		__stop___rfi_flush_fixup = .;
+	}
+#endif /* CONFIG_PPC32 */
+
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+	. = ALIGN(8);
+	__spec_barrier_fixup : AT(ADDR(__spec_barrier_fixup) - LOAD_OFFSET) {
+		__start___barrier_nospec_fixup = .;
+		*(__barrier_nospec_fixup)
+		__stop___barrier_nospec_fixup = .;
+	}
+#endif /* CONFIG_PPC_BARRIER_NOSPEC */
+
+#ifdef CONFIG_PPC_E500
+	. = ALIGN(8);
+	__spec_btb_flush_fixup : AT(ADDR(__spec_btb_flush_fixup) - LOAD_OFFSET) {
+		__start__btb_flush_fixup = .;
+		*(__btb_flush_fixup)
+		__stop__btb_flush_fixup = .;
+	}
+#endif
+
+	/*
+	 * Various code relies on __init_begin being at the strict RWX boundary.
+	 */
+	. = ALIGN(STRICT_ALIGN_SIZE);
+	__srwx_boundary = .;
+	__end_rodata = .;
+	__init_begin = .;
+
+/*
+ * Init sections discarded at runtime
+ */
+	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+		_sinittext = .;
+		INIT_TEXT
+
+		/*
+		 *.init.text might be RO so we must ensure this section ends on
+		 * a page boundary.
+		 */
+		. = ALIGN(PAGE_SIZE);
+		_einittext = .;
+		*(.tramp.ftrace.init);
+	} :text
+
+	/* .exit.text is discarded at runtime, not link time,
+	 * to deal with references from __bug_table
+	 */
+	.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
+		EXIT_TEXT
+	}
+
+	. = ALIGN(PAGE_SIZE);
+
+	INIT_DATA_SECTION(16)
+
+	. = ALIGN(8);
+	__ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) {
+		__start___ftr_fixup = .;
+		KEEP(*(__ftr_fixup))
+		__stop___ftr_fixup = .;
+	}
+	. = ALIGN(8);
+	__mmu_ftr_fixup : AT(ADDR(__mmu_ftr_fixup) - LOAD_OFFSET) {
+		__start___mmu_ftr_fixup = .;
+		KEEP(*(__mmu_ftr_fixup))
+		__stop___mmu_ftr_fixup = .;
+	}
+	. = ALIGN(8);
+	__lwsync_fixup : AT(ADDR(__lwsync_fixup) - LOAD_OFFSET) {
+		__start___lwsync_fixup = .;
+		KEEP(*(__lwsync_fixup))
+		__stop___lwsync_fixup = .;
+	}
+#ifdef CONFIG_PPC64
+	. = ALIGN(8);
+	__fw_ftr_fixup : AT(ADDR(__fw_ftr_fixup) - LOAD_OFFSET) {
+		__start___fw_ftr_fixup = .;
+		KEEP(*(__fw_ftr_fixup))
+		__stop___fw_ftr_fixup = .;
+	}
+#endif
+
+	PERCPU_SECTION(L1_CACHE_BYTES)
+
+	. = ALIGN(8);
+	.machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {
+		__machine_desc_start = . ;
+		KEEP(*(.machine.desc))
+		__machine_desc_end = . ;
+	}
+#ifdef CONFIG_RELOCATABLE
+	. = ALIGN(8);
+	.dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET)
+	{
+		__dynamic_symtab = .;
+		*(.dynsym)
+	}
+	.dynstr : AT(ADDR(.dynstr) - LOAD_OFFSET) { *(.dynstr) }
+	.dynamic : AT(ADDR(.dynamic) - LOAD_OFFSET)
+	{
+		__dynamic_start = .;
+		*(.dynamic)
+	}
+	.hash : AT(ADDR(.hash) - LOAD_OFFSET) { *(.hash) }
+	.gnu.hash : AT(ADDR(.gnu.hash) - LOAD_OFFSET) { *(.gnu.hash) }
+	.interp : AT(ADDR(.interp) - LOAD_OFFSET) { *(.interp) }
+	.rela.dyn : AT(ADDR(.rela.dyn) - LOAD_OFFSET)
+	{
+		__rela_dyn_start = .;
+		*(.rela*)
+	}
+#endif
+	/* .exit.data is discarded at runtime, not link time,
+	 * to deal with references from .exit.text
+	 */
+	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
+		EXIT_DATA
+	}
+
+	/* freed after init ends here */
+	. = ALIGN(PAGE_SIZE);
+	__init_end = .;
+
+/*
+ * And now the various read/write data
+ */
+
+	. = ALIGN(PAGE_SIZE);
+	_sdata = .;
+
+	.data : AT(ADDR(.data) - LOAD_OFFSET) {
+		DATA_DATA
+		*(.data.rel*)
+#ifdef CONFIG_PPC32
+		*(SDATA_MAIN)
+#endif
+	}
+
+	/* The initial task and kernel stack */
+	INIT_TASK_DATA_SECTION(THREAD_ALIGN)
+
+	.data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) {
+		PAGE_ALIGNED_DATA(PAGE_SIZE)
+	}
+
+	.data..cacheline_aligned : AT(ADDR(.data..cacheline_aligned) - LOAD_OFFSET) {
+		CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
+	}
+
+	.data..read_mostly : AT(ADDR(.data..read_mostly) - LOAD_OFFSET) {
+		READ_MOSTLY_DATA(L1_CACHE_BYTES)
+	}
+
+	. = ALIGN(PAGE_SIZE);
+	.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
+		NOSAVE_DATA
+	}
+
+	BUG_TABLE
+
+	. = ALIGN(PAGE_SIZE);
+	_edata  =  .;
+	PROVIDE32 (edata = .);
+
+/*
+ * And finally the bss
+ */
+
+	BSS_SECTION(0, 0, 0)
+
+	. = ALIGN(PAGE_SIZE);
+	_end = . ;
+	PROVIDE32 (end = .);
+
+	DWARF_DEBUG
+	ELF_DETAILS
+
+	DISCARDS
+	/DISCARD/ : {
+		*(*.EMB.apuinfo)
+		*(.glink .iplt .plt)
+		*(.gnu.version*)
+		*(.gnu.attributes)
+		*(.eh_frame)
+#ifndef CONFIG_RELOCATABLE
+		*(.rela*)
+#endif
+	}
+}
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
new file mode 100644
index 0000000000..8c464a5d82
--- /dev/null
+++ b/arch/powerpc/kernel/watchdog.c
@@ -0,0 +1,591 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Watchdog support on powerpc systems.
+ *
+ * Copyright 2017, IBM Corporation.
+ *
+ * This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c
+ */
+
+#define pr_fmt(fmt) "watchdog: " fmt
+
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+#include <linux/nmi.h>
+#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/kprobes.h>
+#include <linux/hardirq.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/kdebug.h>
+#include <linux/sched/debug.h>
+#include <linux/delay.h>
+#include <linux/processor.h>
+#include <linux/smp.h>
+
+#include <asm/interrupt.h>
+#include <asm/paca.h>
+#include <asm/nmi.h>
+
+/*
+ * The powerpc watchdog ensures that each CPU is able to service timers.
+ * The watchdog sets up a simple timer on each CPU to run once per timer
+ * period, and updates a per-cpu timestamp and a "pending" cpumask. This is
+ * the heartbeat.
+ *
+ * Then there are two systems to check that the heartbeat is still running.
+ * The local soft-NMI, and the SMP checker.
+ *
+ * The soft-NMI checker can detect lockups on the local CPU. When interrupts
+ * are disabled with local_irq_disable(), platforms that use soft-masking
+ * can leave hardware interrupts enabled and handle them with a masked
+ * interrupt handler. The masked handler can send the timer interrupt to the
+ * watchdog's soft_nmi_interrupt(), which appears to Linux as an NMI
+ * interrupt, and can be used to detect CPUs stuck with IRQs disabled.
+ *
+ * The soft-NMI checker will compare the heartbeat timestamp for this CPU
+ * with the current time, and take action if the difference exceeds the
+ * watchdog threshold.
+ *
+ * The limitation of the soft-NMI watchdog is that it does not work when
+ * interrupts are hard disabled or otherwise not being serviced. This is
+ * solved by also having a SMP watchdog where all CPUs check all other
+ * CPUs heartbeat.
+ *
+ * The SMP checker can detect lockups on other CPUs. A global "pending"
+ * cpumask is kept, containing all CPUs which enable the watchdog. Each
+ * CPU clears their pending bit in their heartbeat timer. When the bitmask
+ * becomes empty, the last CPU to clear its pending bit updates a global
+ * timestamp and refills the pending bitmask.
+ *
+ * In the heartbeat timer, if any CPU notices that the global timestamp has
+ * not been updated for a period exceeding the watchdog threshold, then it
+ * means the CPU(s) with their bit still set in the pending mask have had
+ * their heartbeat stop, and action is taken.
+ *
+ * Some platforms implement true NMI IPIs, which can be used by the SMP
+ * watchdog to detect an unresponsive CPU and pull it out of its stuck
+ * state with the NMI IPI, to get crash/debug data from it. This way the
+ * SMP watchdog can detect hardware interrupts off lockups.
+ */
+
+static cpumask_t wd_cpus_enabled __read_mostly;
+
+static u64 wd_panic_timeout_tb __read_mostly; /* timebase ticks until panic */
+static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */
+
+static u64 wd_timer_period_ms __read_mostly;  /* interval between heartbeat */
+
+static DEFINE_PER_CPU(struct hrtimer, wd_hrtimer);
+static DEFINE_PER_CPU(u64, wd_timer_tb);
+
+/* SMP checker bits */
+static unsigned long __wd_smp_lock;
+static unsigned long __wd_reporting;
+static unsigned long __wd_nmi_output;
+static cpumask_t wd_smp_cpus_pending;
+static cpumask_t wd_smp_cpus_stuck;
+static u64 wd_smp_last_reset_tb;
+
+#ifdef CONFIG_PPC_PSERIES
+static u64 wd_timeout_pct;
+#endif
+
+/*
+ * Try to take the exclusive watchdog action / NMI IPI / printing lock.
+ * wd_smp_lock must be held. If this fails, we should return and wait
+ * for the watchdog to kick in again (or another CPU to trigger it).
+ *
+ * Importantly, if hardlockup_panic is set, wd_try_report failure should
+ * not delay the panic, because whichever other CPU is reporting will
+ * call panic.
+ */
+static bool wd_try_report(void)
+{
+	if (__wd_reporting)
+		return false;
+	__wd_reporting = 1;
+	return true;
+}
+
+/* End printing after successful wd_try_report. wd_smp_lock not required. */
+static void wd_end_reporting(void)
+{
+	smp_mb(); /* End printing "critical section" */
+	WARN_ON_ONCE(__wd_reporting == 0);
+	WRITE_ONCE(__wd_reporting, 0);
+}
+
+static inline void wd_smp_lock(unsigned long *flags)
+{
+	/*
+	 * Avoid locking layers if possible.
+	 * This may be called from low level interrupt handlers at some
+	 * point in future.
+	 */
+	raw_local_irq_save(*flags);
+	hard_irq_disable(); /* Make it soft-NMI safe */
+	while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) {
+		raw_local_irq_restore(*flags);
+		spin_until_cond(!test_bit(0, &__wd_smp_lock));
+		raw_local_irq_save(*flags);
+		hard_irq_disable();
+	}
+}
+
+static inline void wd_smp_unlock(unsigned long *flags)
+{
+	clear_bit_unlock(0, &__wd_smp_lock);
+	raw_local_irq_restore(*flags);
+}
+
+static void wd_lockup_ipi(struct pt_regs *regs)
+{
+	int cpu = raw_smp_processor_id();
+	u64 tb = get_tb();
+
+	pr_emerg("CPU %d Hard LOCKUP\n", cpu);
+	pr_emerg("CPU %d TB:%lld, last heartbeat TB:%lld (%lldms ago)\n",
+		 cpu, tb, per_cpu(wd_timer_tb, cpu),
+		 tb_to_ns(tb - per_cpu(wd_timer_tb, cpu)) / 1000000);
+	print_modules();
+	print_irqtrace_events(current);
+	if (regs)
+		show_regs(regs);
+	else
+		dump_stack();
+
+	/*
+	 * __wd_nmi_output must be set after we printk from NMI context.
+	 *
+	 * printk from NMI context defers printing to the console to irq_work.
+	 * If that NMI was taken in some code that is hard-locked, then irqs
+	 * are disabled so irq_work will never fire. That can result in the
+	 * hard lockup messages being delayed (indefinitely, until something
+	 * else kicks the console drivers).
+	 *
+	 * Setting __wd_nmi_output will cause another CPU to notice and kick
+	 * the console drivers for us.
+	 *
+	 * xchg is not needed here (it could be a smp_mb and store), but xchg
+	 * gives the memory ordering and atomicity required.
+	 */
+	xchg(&__wd_nmi_output, 1);
+
+	/* Do not panic from here because that can recurse into NMI IPI layer */
+}
+
+static bool set_cpu_stuck(int cpu)
+{
+	cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
+	cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+	/*
+	 * See wd_smp_clear_cpu_pending()
+	 */
+	smp_mb();
+	if (cpumask_empty(&wd_smp_cpus_pending)) {
+		wd_smp_last_reset_tb = get_tb();
+		cpumask_andnot(&wd_smp_cpus_pending,
+				&wd_cpus_enabled,
+				&wd_smp_cpus_stuck);
+		return true;
+	}
+	return false;
+}
+
+static void watchdog_smp_panic(int cpu)
+{
+	static cpumask_t wd_smp_cpus_ipi; // protected by reporting
+	unsigned long flags;
+	u64 tb, last_reset;
+	int c;
+
+	wd_smp_lock(&flags);
+	/* Double check some things under lock */
+	tb = get_tb();
+	last_reset = wd_smp_last_reset_tb;
+	if ((s64)(tb - last_reset) < (s64)wd_smp_panic_timeout_tb)
+		goto out;
+	if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending))
+		goto out;
+	if (!wd_try_report())
+		goto out;
+	for_each_online_cpu(c) {
+		if (!cpumask_test_cpu(c, &wd_smp_cpus_pending))
+			continue;
+		if (c == cpu)
+			continue; // should not happen
+
+		__cpumask_set_cpu(c, &wd_smp_cpus_ipi);
+		if (set_cpu_stuck(c))
+			break;
+	}
+	if (cpumask_empty(&wd_smp_cpus_ipi)) {
+		wd_end_reporting();
+		goto out;
+	}
+	wd_smp_unlock(&flags);
+
+	pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n",
+		 cpu, cpumask_pr_args(&wd_smp_cpus_ipi));
+	pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n",
+		 cpu, tb, last_reset, tb_to_ns(tb - last_reset) / 1000000);
+
+	if (!sysctl_hardlockup_all_cpu_backtrace) {
+		/*
+		 * Try to trigger the stuck CPUs, unless we are going to
+		 * get a backtrace on all of them anyway.
+		 */
+		for_each_cpu(c, &wd_smp_cpus_ipi) {
+			smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
+			__cpumask_clear_cpu(c, &wd_smp_cpus_ipi);
+		}
+	} else {
+		trigger_allbutcpu_cpu_backtrace(cpu);
+		cpumask_clear(&wd_smp_cpus_ipi);
+	}
+
+	if (hardlockup_panic)
+		nmi_panic(NULL, "Hard LOCKUP");
+
+	wd_end_reporting();
+
+	return;
+
+out:
+	wd_smp_unlock(&flags);
+}
+
+static void wd_smp_clear_cpu_pending(int cpu)
+{
+	if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) {
+		if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
+			struct pt_regs *regs = get_irq_regs();
+			unsigned long flags;
+
+			pr_emerg("CPU %d became unstuck TB:%lld\n",
+				 cpu, get_tb());
+			print_irqtrace_events(current);
+			if (regs)
+				show_regs(regs);
+			else
+				dump_stack();
+
+			wd_smp_lock(&flags);
+			cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
+			wd_smp_unlock(&flags);
+		} else {
+			/*
+			 * The last CPU to clear pending should have reset the
+			 * watchdog so we generally should not find it empty
+			 * here if our CPU was clear. However it could happen
+			 * due to a rare race with another CPU taking the
+			 * last CPU out of the mask concurrently.
+			 *
+			 * We can't add a warning for it. But just in case
+			 * there is a problem with the watchdog that is causing
+			 * the mask to not be reset, try to kick it along here.
+			 */
+			if (unlikely(cpumask_empty(&wd_smp_cpus_pending)))
+				goto none_pending;
+		}
+		return;
+	}
+
+	/*
+	 * All other updates to wd_smp_cpus_pending are performed under
+	 * wd_smp_lock. All of them are atomic except the case where the
+	 * mask becomes empty and is reset. This will not happen here because
+	 * cpu was tested to be in the bitmap (above), and a CPU only clears
+	 * its own bit. _Except_ in the case where another CPU has detected a
+	 * hard lockup on our CPU and takes us out of the pending mask. So in
+	 * normal operation there will be no race here, no problem.
+	 *
+	 * In the lockup case, this atomic clear-bit vs a store that refills
+	 * other bits in the accessed word wll not be a problem. The bit clear
+	 * is atomic so it will not cause the store to get lost, and the store
+	 * will never set this bit so it will not overwrite the bit clear. The
+	 * only way for a stuck CPU to return to the pending bitmap is to
+	 * become unstuck itself.
+	 */
+	cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+
+	/*
+	 * Order the store to clear pending with the load(s) to check all
+	 * words in the pending mask to check they are all empty. This orders
+	 * with the same barrier on another CPU. This prevents two CPUs
+	 * clearing the last 2 pending bits, but neither seeing the other's
+	 * store when checking if the mask is empty, and missing an empty
+	 * mask, which ends with a false positive.
+	 */
+	smp_mb();
+	if (cpumask_empty(&wd_smp_cpus_pending)) {
+		unsigned long flags;
+
+none_pending:
+		/*
+		 * Double check under lock because more than one CPU could see
+		 * a clear mask with the lockless check after clearing their
+		 * pending bits.
+		 */
+		wd_smp_lock(&flags);
+		if (cpumask_empty(&wd_smp_cpus_pending)) {
+			wd_smp_last_reset_tb = get_tb();
+			cpumask_andnot(&wd_smp_cpus_pending,
+					&wd_cpus_enabled,
+					&wd_smp_cpus_stuck);
+		}
+		wd_smp_unlock(&flags);
+	}
+}
+
+static void watchdog_timer_interrupt(int cpu)
+{
+	u64 tb = get_tb();
+
+	per_cpu(wd_timer_tb, cpu) = tb;
+
+	wd_smp_clear_cpu_pending(cpu);
+
+	if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb)
+		watchdog_smp_panic(cpu);
+
+	if (__wd_nmi_output && xchg(&__wd_nmi_output, 0)) {
+		/*
+		 * Something has called printk from NMI context. It might be
+		 * stuck, so this triggers a flush that will get that
+		 * printk output to the console.
+		 *
+		 * See wd_lockup_ipi.
+		 */
+		printk_trigger_flush();
+	}
+}
+
+DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
+{
+	unsigned long flags;
+	int cpu = raw_smp_processor_id();
+	u64 tb;
+
+	/* should only arrive from kernel, with irqs disabled */
+	WARN_ON_ONCE(!arch_irq_disabled_regs(regs));
+
+	if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
+		return 0;
+
+	__this_cpu_inc(irq_stat.soft_nmi_irqs);
+
+	tb = get_tb();
+	if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
+		/*
+		 * Taking wd_smp_lock here means it is a soft-NMI lock, which
+		 * means we can't take any regular or irqsafe spin locks while
+		 * holding this lock. This is why timers can't printk while
+		 * holding the lock.
+		 */
+		wd_smp_lock(&flags);
+		if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) {
+			wd_smp_unlock(&flags);
+			return 0;
+		}
+		if (!wd_try_report()) {
+			wd_smp_unlock(&flags);
+			/* Couldn't report, try again in 100ms */
+			mtspr(SPRN_DEC, 100 * tb_ticks_per_usec * 1000);
+			return 0;
+		}
+
+		set_cpu_stuck(cpu);
+
+		wd_smp_unlock(&flags);
+
+		pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n",
+			 cpu, (void *)regs->nip);
+		pr_emerg("CPU %d TB:%lld, last heartbeat TB:%lld (%lldms ago)\n",
+			 cpu, tb, per_cpu(wd_timer_tb, cpu),
+			 tb_to_ns(tb - per_cpu(wd_timer_tb, cpu)) / 1000000);
+		print_modules();
+		print_irqtrace_events(current);
+		show_regs(regs);
+
+		xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi
+
+		if (sysctl_hardlockup_all_cpu_backtrace)
+			trigger_allbutcpu_cpu_backtrace(cpu);
+
+		if (hardlockup_panic)
+			nmi_panic(regs, "Hard LOCKUP");
+
+		wd_end_reporting();
+	}
+	/*
+	 * We are okay to change DEC in soft_nmi_interrupt because the masked
+	 * handler has marked a DEC as pending, so the timer interrupt will be
+	 * replayed as soon as local irqs are enabled again.
+	 */
+	if (wd_panic_timeout_tb < 0x7fffffff)
+		mtspr(SPRN_DEC, wd_panic_timeout_tb);
+
+	return 0;
+}
+
+static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
+{
+	int cpu = smp_processor_id();
+
+	if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED))
+		return HRTIMER_NORESTART;
+
+	if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+		return HRTIMER_NORESTART;
+
+	watchdog_timer_interrupt(cpu);
+
+	hrtimer_forward_now(hrtimer, ms_to_ktime(wd_timer_period_ms));
+
+	return HRTIMER_RESTART;
+}
+
+void arch_touch_nmi_watchdog(void)
+{
+	unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
+	int cpu = smp_processor_id();
+	u64 tb;
+
+	if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+		return;
+
+	tb = get_tb();
+	if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
+		per_cpu(wd_timer_tb, cpu) = tb;
+		wd_smp_clear_cpu_pending(cpu);
+	}
+}
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
+
+static void start_watchdog(void *arg)
+{
+	struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
+	int cpu = smp_processor_id();
+	unsigned long flags;
+
+	if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
+		WARN_ON(1);
+		return;
+	}
+
+	if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED))
+		return;
+
+	if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+		return;
+
+	wd_smp_lock(&flags);
+	cpumask_set_cpu(cpu, &wd_cpus_enabled);
+	if (cpumask_weight(&wd_cpus_enabled) == 1) {
+		cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
+		wd_smp_last_reset_tb = get_tb();
+	}
+	wd_smp_unlock(&flags);
+
+	*this_cpu_ptr(&wd_timer_tb) = get_tb();
+
+	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer->function = watchdog_timer_fn;
+	hrtimer_start(hrtimer, ms_to_ktime(wd_timer_period_ms),
+		      HRTIMER_MODE_REL_PINNED);
+}
+
+static int start_watchdog_on_cpu(unsigned int cpu)
+{
+	return smp_call_function_single(cpu, start_watchdog, NULL, true);
+}
+
+static void stop_watchdog(void *arg)
+{
+	struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
+	int cpu = smp_processor_id();
+	unsigned long flags;
+
+	if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
+		return; /* Can happen in CPU unplug case */
+
+	hrtimer_cancel(hrtimer);
+
+	wd_smp_lock(&flags);
+	cpumask_clear_cpu(cpu, &wd_cpus_enabled);
+	wd_smp_unlock(&flags);
+
+	wd_smp_clear_cpu_pending(cpu);
+}
+
+static int stop_watchdog_on_cpu(unsigned int cpu)
+{
+	return smp_call_function_single(cpu, stop_watchdog, NULL, true);
+}
+
+static void watchdog_calc_timeouts(void)
+{
+	u64 threshold = watchdog_thresh;
+
+#ifdef CONFIG_PPC_PSERIES
+	threshold += (READ_ONCE(wd_timeout_pct) * threshold) / 100;
+#endif
+
+	wd_panic_timeout_tb = threshold * ppc_tb_freq;
+
+	/* Have the SMP detector trigger a bit later */
+	wd_smp_panic_timeout_tb = wd_panic_timeout_tb * 3 / 2;
+
+	/* 2/5 is the factor that the perf based detector uses */
+	wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
+}
+
+void watchdog_hardlockup_stop(void)
+{
+	int cpu;
+
+	for_each_cpu(cpu, &wd_cpus_enabled)
+		stop_watchdog_on_cpu(cpu);
+}
+
+void watchdog_hardlockup_start(void)
+{
+	int cpu;
+
+	watchdog_calc_timeouts();
+	for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
+		start_watchdog_on_cpu(cpu);
+}
+
+/*
+ * Invoked from core watchdog init.
+ */
+int __init watchdog_hardlockup_probe(void)
+{
+	int err;
+
+	err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+					"powerpc/watchdog:online",
+					start_watchdog_on_cpu,
+					stop_watchdog_on_cpu);
+	if (err < 0) {
+		pr_warn("could not be initialized");
+		return err;
+	}
+	return 0;
+}
+
+#ifdef CONFIG_PPC_PSERIES
+void watchdog_hardlockup_set_timeout_pct(u64 pct)
+{
+	pr_info("Set the NMI watchdog timeout factor to %llu%%\n", pct);
+	WRITE_ONCE(wd_timeout_pct, pct);
+	lockup_detector_reconfigure();
+}
+#endif
diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile
new file mode 100644
index 0000000000..0c2abe7f99
--- /dev/null
+++ b/arch/powerpc/kexec/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux kernel.
+#
+
+obj-y				+= core.o crash.o core_$(BITS).o
+
+obj-$(CONFIG_PPC32)		+= relocate_32.o
+
+obj-$(CONFIG_KEXEC_FILE)	+= file_load.o ranges.o file_load_$(BITS).o elf_$(BITS).o
+
+# Disable GCOV, KCOV & sanitizers in odd or sensitive code
+GCOV_PROFILE_core_$(BITS).o := n
+KCOV_INSTRUMENT_core_$(BITS).o := n
+UBSAN_SANITIZE_core_$(BITS).o := n
+KASAN_SANITIZE_core.o := n
+KASAN_SANITIZE_core_$(BITS) := n
diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
new file mode 100644
index 0000000000..005269ac32
--- /dev/null
+++ b/arch/powerpc/kexec/core.c
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Code to handle transition of Linux booting another kernel.
+ *
+ * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
+ * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ * Copyright (C) 2005 IBM Corporation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/reboot.h>
+#include <linux/threads.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/irq.h>
+#include <linux/ftrace.h>
+
+#include <asm/kdump.h>
+#include <asm/machdep.h>
+#include <asm/pgalloc.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/firmware.h>
+
+void machine_kexec_mask_interrupts(void) {
+	unsigned int i;
+	struct irq_desc *desc;
+
+	for_each_irq_desc(i, desc) {
+		struct irq_chip *chip;
+
+		chip = irq_desc_get_chip(desc);
+		if (!chip)
+			continue;
+
+		if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
+			chip->irq_eoi(&desc->irq_data);
+
+		if (chip->irq_mask)
+			chip->irq_mask(&desc->irq_data);
+
+		if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
+			chip->irq_disable(&desc->irq_data);
+	}
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	default_machine_crash_shutdown(regs);
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void arch_crash_save_vmcoreinfo(void)
+{
+
+#ifdef CONFIG_NUMA
+	VMCOREINFO_SYMBOL(node_data);
+	VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
+#endif
+#ifndef CONFIG_NUMA
+	VMCOREINFO_SYMBOL(contig_page_data);
+#endif
+#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
+	VMCOREINFO_SYMBOL(vmemmap_list);
+	VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
+	VMCOREINFO_SYMBOL(mmu_psize_defs);
+	VMCOREINFO_STRUCT_SIZE(vmemmap_backing);
+	VMCOREINFO_OFFSET(vmemmap_backing, list);
+	VMCOREINFO_OFFSET(vmemmap_backing, phys);
+	VMCOREINFO_OFFSET(vmemmap_backing, virt_addr);
+	VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
+	VMCOREINFO_OFFSET(mmu_psize_def, shift);
+#endif
+	VMCOREINFO_SYMBOL(cur_cpu_spec);
+	VMCOREINFO_OFFSET(cpu_spec, mmu_features);
+	vmcoreinfo_append_str("NUMBER(RADIX_MMU)=%d\n", early_radix_enabled());
+	vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+void machine_kexec(struct kimage *image)
+{
+	int save_ftrace_enabled;
+
+	save_ftrace_enabled = __ftrace_enabled_save();
+	this_cpu_disable_ftrace();
+
+	if (ppc_md.machine_kexec)
+		ppc_md.machine_kexec(image);
+	else
+		default_machine_kexec(image);
+
+	this_cpu_enable_ftrace();
+	__ftrace_enabled_restore(save_ftrace_enabled);
+
+	/* Fall back to normal restart if we're still alive. */
+	machine_restart(NULL);
+	for(;;);
+}
+
+void __init reserve_crashkernel(void)
+{
+	unsigned long long crash_size, crash_base, total_mem_sz;
+	int ret;
+
+	total_mem_sz = memory_limit ? memory_limit : memblock_phys_mem_size();
+	/* use common parsing */
+	ret = parse_crashkernel(boot_command_line, total_mem_sz,
+			&crash_size, &crash_base);
+	if (ret == 0 && crash_size > 0) {
+		crashk_res.start = crash_base;
+		crashk_res.end = crash_base + crash_size - 1;
+	}
+
+	if (crashk_res.end == crashk_res.start) {
+		crashk_res.start = crashk_res.end = 0;
+		return;
+	}
+
+	/* We might have got these values via the command line or the
+	 * device tree, either way sanitise them now. */
+
+	crash_size = resource_size(&crashk_res);
+
+#ifndef CONFIG_NONSTATIC_KERNEL
+	if (crashk_res.start != KDUMP_KERNELBASE)
+		printk("Crash kernel location must be 0x%x\n",
+				KDUMP_KERNELBASE);
+
+	crashk_res.start = KDUMP_KERNELBASE;
+#else
+	if (!crashk_res.start) {
+#ifdef CONFIG_PPC64
+		/*
+		 * On the LPAR platform place the crash kernel to mid of
+		 * RMA size (max. of 512MB) to ensure the crash kernel
+		 * gets enough space to place itself and some stack to be
+		 * in the first segment. At the same time normal kernel
+		 * also get enough space to allocate memory for essential
+		 * system resource in the first segment. Keep the crash
+		 * kernel starts at 128MB offset on other platforms.
+		 */
+		if (firmware_has_feature(FW_FEATURE_LPAR))
+			crashk_res.start = min_t(u64, ppc64_rma_size / 2, SZ_512M);
+		else
+			crashk_res.start = min_t(u64, ppc64_rma_size / 2, SZ_128M);
+#else
+		crashk_res.start = KDUMP_KERNELBASE;
+#endif
+	}
+
+	crash_base = PAGE_ALIGN(crashk_res.start);
+	if (crash_base != crashk_res.start) {
+		printk("Crash kernel base must be aligned to 0x%lx\n",
+				PAGE_SIZE);
+		crashk_res.start = crash_base;
+	}
+
+#endif
+	crash_size = PAGE_ALIGN(crash_size);
+	crashk_res.end = crashk_res.start + crash_size - 1;
+
+	/* The crash region must not overlap the current kernel */
+	if (overlaps_crashkernel(__pa(_stext), _end - _stext)) {
+		printk(KERN_WARNING
+			"Crash kernel can not overlap current kernel\n");
+		crashk_res.start = crashk_res.end = 0;
+		return;
+	}
+
+	/* Crash kernel trumps memory limit */
+	if (memory_limit && memory_limit <= crashk_res.end) {
+		memory_limit = crashk_res.end + 1;
+		total_mem_sz = memory_limit;
+		printk("Adjusted memory limit for crashkernel, now 0x%llx\n",
+		       memory_limit);
+	}
+
+	printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+			"for crashkernel (System RAM: %ldMB)\n",
+			(unsigned long)(crash_size >> 20),
+			(unsigned long)(crashk_res.start >> 20),
+			(unsigned long)(total_mem_sz >> 20));
+
+	if (!memblock_is_region_memory(crashk_res.start, crash_size) ||
+	    memblock_reserve(crashk_res.start, crash_size)) {
+		pr_err("Failed to reserve memory for crashkernel!\n");
+		crashk_res.start = crashk_res.end = 0;
+		return;
+	}
+}
+
+int __init overlaps_crashkernel(unsigned long start, unsigned long size)
+{
+	return (start + size) > crashk_res.start && start <= crashk_res.end;
+}
+
+/* Values we need to export to the second kernel via the device tree. */
+static phys_addr_t kernel_end;
+static phys_addr_t crashk_base;
+static phys_addr_t crashk_size;
+static unsigned long long mem_limit;
+
+static struct property kernel_end_prop = {
+	.name = "linux,kernel-end",
+	.length = sizeof(phys_addr_t),
+	.value = &kernel_end,
+};
+
+static struct property crashk_base_prop = {
+	.name = "linux,crashkernel-base",
+	.length = sizeof(phys_addr_t),
+	.value = &crashk_base
+};
+
+static struct property crashk_size_prop = {
+	.name = "linux,crashkernel-size",
+	.length = sizeof(phys_addr_t),
+	.value = &crashk_size,
+};
+
+static struct property memory_limit_prop = {
+	.name = "linux,memory-limit",
+	.length = sizeof(unsigned long long),
+	.value = &mem_limit,
+};
+
+#define cpu_to_be_ulong	__PASTE(cpu_to_be, BITS_PER_LONG)
+
+static void __init export_crashk_values(struct device_node *node)
+{
+	/* There might be existing crash kernel properties, but we can't
+	 * be sure what's in them, so remove them. */
+	of_remove_property(node, of_find_property(node,
+				"linux,crashkernel-base", NULL));
+	of_remove_property(node, of_find_property(node,
+				"linux,crashkernel-size", NULL));
+
+	if (crashk_res.start != 0) {
+		crashk_base = cpu_to_be_ulong(crashk_res.start),
+		of_add_property(node, &crashk_base_prop);
+		crashk_size = cpu_to_be_ulong(resource_size(&crashk_res));
+		of_add_property(node, &crashk_size_prop);
+	}
+
+	/*
+	 * memory_limit is required by the kexec-tools to limit the
+	 * crash regions to the actual memory used.
+	 */
+	mem_limit = cpu_to_be_ulong(memory_limit);
+	of_update_property(node, &memory_limit_prop);
+}
+
+static int __init kexec_setup(void)
+{
+	struct device_node *node;
+
+	node = of_find_node_by_path("/chosen");
+	if (!node)
+		return -ENOENT;
+
+	/* remove any stale properties so ours can be found */
+	of_remove_property(node, of_find_property(node, kernel_end_prop.name, NULL));
+
+	/* information needed by userspace when using default_machine_kexec */
+	kernel_end = cpu_to_be_ulong(__pa(_end));
+	of_add_property(node, &kernel_end_prop);
+
+	export_crashk_values(node);
+
+	of_node_put(node);
+	return 0;
+}
+late_initcall(kexec_setup);
diff --git a/arch/powerpc/kexec/core_32.c b/arch/powerpc/kexec/core_32.c
new file mode 100644
index 0000000000..c95f96850c
--- /dev/null
+++ b/arch/powerpc/kexec/core_32.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PPC32 code to handle Linux booting another kernel.
+ *
+ * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
+ * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ * Copyright (C) 2005 IBM Corporation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <asm/cacheflush.h>
+#include <asm/hw_irq.h>
+#include <asm/io.h>
+
+typedef void (*relocate_new_kernel_t)(
+				unsigned long indirection_page,
+				unsigned long reboot_code_buffer,
+				unsigned long start_address) __noreturn;
+
+/*
+ * This is a generic machine_kexec function suitable at least for
+ * non-OpenFirmware embedded platforms.
+ * It merely copies the image relocation code to the control page and
+ * jumps to it.
+ * A platform specific function may just call this one.
+ */
+void default_machine_kexec(struct kimage *image)
+{
+	extern const unsigned int relocate_new_kernel_size;
+	unsigned long page_list;
+	unsigned long reboot_code_buffer, reboot_code_buffer_phys;
+	relocate_new_kernel_t rnk;
+
+	/* Interrupts aren't acceptable while we reboot */
+	local_irq_disable();
+
+	/* mask each interrupt so we are in a more sane state for the
+	 * kexec kernel */
+	machine_kexec_mask_interrupts();
+
+	page_list = image->head;
+
+	/* we need both effective and real address here */
+	reboot_code_buffer =
+			(unsigned long)page_address(image->control_code_page);
+	reboot_code_buffer_phys = virt_to_phys((void *)reboot_code_buffer);
+
+	/* copy our kernel relocation code to the control code page */
+	memcpy((void *)reboot_code_buffer, relocate_new_kernel,
+						relocate_new_kernel_size);
+
+	flush_icache_range(reboot_code_buffer,
+				reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
+	printk(KERN_INFO "Bye!\n");
+
+	if (!IS_ENABLED(CONFIG_PPC_85xx) && !IS_ENABLED(CONFIG_44x))
+		relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start);
+
+	/* now call it */
+	rnk = (relocate_new_kernel_t) reboot_code_buffer;
+	(*rnk)(page_list, reboot_code_buffer_phys, image->start);
+}
+
+int machine_kexec_prepare(struct kimage *image)
+{
+	return 0;
+}
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
new file mode 100644
index 0000000000..a79e28c91e
--- /dev/null
+++ b/arch/powerpc/kexec/core_64.c
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PPC64 code to handle Linux booting another kernel.
+ *
+ * Copyright (C) 2004-2005, IBM Corp.
+ *
+ * Created by: Milton D Miller II
+ */
+
+
+#include <linux/kexec.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+#include <linux/init_task.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/hardirq.h>
+#include <linux/of.h>
+
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/machdep.h>
+#include <asm/cacheflush.h>
+#include <asm/firmware.h>
+#include <asm/paca.h>
+#include <asm/mmu.h>
+#include <asm/sections.h>	/* _end */
+#include <asm/smp.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/svm.h>
+#include <asm/ultravisor.h>
+
+int machine_kexec_prepare(struct kimage *image)
+{
+	int i;
+	unsigned long begin, end;	/* limits of segment */
+	unsigned long low, high;	/* limits of blocked memory range */
+	struct device_node *node;
+	const unsigned long *basep;
+	const unsigned int *sizep;
+
+	/*
+	 * Since we use the kernel fault handlers and paging code to
+	 * handle the virtual mode, we must make sure no destination
+	 * overlaps kernel static data or bss.
+	 */
+	for (i = 0; i < image->nr_segments; i++)
+		if (image->segment[i].mem < __pa(_end))
+			return -ETXTBSY;
+
+	/* We also should not overwrite the tce tables */
+	for_each_node_by_type(node, "pci") {
+		basep = of_get_property(node, "linux,tce-base", NULL);
+		sizep = of_get_property(node, "linux,tce-size", NULL);
+		if (basep == NULL || sizep == NULL)
+			continue;
+
+		low = *basep;
+		high = low + (*sizep);
+
+		for (i = 0; i < image->nr_segments; i++) {
+			begin = image->segment[i].mem;
+			end = begin + image->segment[i].memsz;
+
+			if ((begin < high) && (end > low)) {
+				of_node_put(node);
+				return -ETXTBSY;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/* Called during kexec sequence with MMU off */
+static notrace void copy_segments(unsigned long ind)
+{
+	unsigned long entry;
+	unsigned long *ptr;
+	void *dest;
+	void *addr;
+
+	/*
+	 * We rely on kexec_load to create a lists that properly
+	 * initializes these pointers before they are used.
+	 * We will still crash if the list is wrong, but at least
+	 * the compiler will be quiet.
+	 */
+	ptr = NULL;
+	dest = NULL;
+
+	for (entry = ind; !(entry & IND_DONE); entry = *ptr++) {
+		addr = __va(entry & PAGE_MASK);
+
+		switch (entry & IND_FLAGS) {
+		case IND_DESTINATION:
+			dest = addr;
+			break;
+		case IND_INDIRECTION:
+			ptr = addr;
+			break;
+		case IND_SOURCE:
+			copy_page(dest, addr);
+			dest += PAGE_SIZE;
+		}
+	}
+}
+
+/* Called during kexec sequence with MMU off */
+notrace void kexec_copy_flush(struct kimage *image)
+{
+	long i, nr_segments = image->nr_segments;
+	struct  kexec_segment ranges[KEXEC_SEGMENT_MAX];
+
+	/* save the ranges on the stack to efficiently flush the icache */
+	memcpy(ranges, image->segment, sizeof(ranges));
+
+	/*
+	 * After this call we may not use anything allocated in dynamic
+	 * memory, including *image.
+	 *
+	 * Only globals and the stack are allowed.
+	 */
+	copy_segments(image->head);
+
+	/*
+	 * we need to clear the icache for all dest pages sometime,
+	 * including ones that were in place on the original copy
+	 */
+	for (i = 0; i < nr_segments; i++)
+		flush_icache_range((unsigned long)__va(ranges[i].mem),
+			(unsigned long)__va(ranges[i].mem + ranges[i].memsz));
+}
+
+#ifdef CONFIG_SMP
+
+static int kexec_all_irq_disabled = 0;
+
+static void kexec_smp_down(void *arg)
+{
+	local_irq_disable();
+	hard_irq_disable();
+
+	mb(); /* make sure our irqs are disabled before we say they are */
+	get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
+	while(kexec_all_irq_disabled == 0)
+		cpu_relax();
+	mb(); /* make sure all irqs are disabled before this */
+	hw_breakpoint_disable();
+	/*
+	 * Now every CPU has IRQs off, we can clear out any pending
+	 * IPIs and be sure that no more will come in after this.
+	 */
+	if (ppc_md.kexec_cpu_down)
+		ppc_md.kexec_cpu_down(0, 1);
+
+	reset_sprs();
+
+	kexec_smp_wait();
+	/* NOTREACHED */
+}
+
+static void kexec_prepare_cpus_wait(int wait_state)
+{
+	int my_cpu, i, notified=-1;
+
+	hw_breakpoint_disable();
+	my_cpu = get_cpu();
+	/* Make sure each CPU has at least made it to the state we need.
+	 *
+	 * FIXME: There is a (slim) chance of a problem if not all of the CPUs
+	 * are correctly onlined.  If somehow we start a CPU on boot with RTAS
+	 * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in
+	 * time, the boot CPU will timeout.  If it does eventually execute
+	 * stuff, the secondary will start up (paca_ptrs[]->cpu_start was
+	 * written) and get into a peculiar state.
+	 * If the platform supports smp_ops->take_timebase(), the secondary CPU
+	 * will probably be spinning in there.  If not (i.e. pseries), the
+	 * secondary will continue on and try to online itself/idle/etc. If it
+	 * survives that, we need to find these
+	 * possible-but-not-online-but-should-be CPUs and chaperone them into
+	 * kexec_smp_wait().
+	 */
+	for_each_online_cpu(i) {
+		if (i == my_cpu)
+			continue;
+
+		while (paca_ptrs[i]->kexec_state < wait_state) {
+			barrier();
+			if (i != notified) {
+				printk(KERN_INFO "kexec: waiting for cpu %d "
+				       "(physical %d) to enter %i state\n",
+				       i, paca_ptrs[i]->hw_cpu_id, wait_state);
+				notified = i;
+			}
+		}
+	}
+	mb();
+}
+
+/*
+ * We need to make sure each present CPU is online.  The next kernel will scan
+ * the device tree and assume primary threads are online and query secondary
+ * threads via RTAS to online them if required.  If we don't online primary
+ * threads, they will be stuck.  However, we also online secondary threads as we
+ * may be using 'cede offline'.  In this case RTAS doesn't see the secondary
+ * threads as offline -- and again, these CPUs will be stuck.
+ *
+ * So, we online all CPUs that should be running, including secondary threads.
+ */
+static void wake_offline_cpus(void)
+{
+	int cpu = 0;
+
+	for_each_present_cpu(cpu) {
+		if (!cpu_online(cpu)) {
+			printk(KERN_INFO "kexec: Waking offline cpu %d.\n",
+			       cpu);
+			WARN_ON(add_cpu(cpu));
+		}
+	}
+}
+
+static void kexec_prepare_cpus(void)
+{
+	wake_offline_cpus();
+	smp_call_function(kexec_smp_down, NULL, /* wait */0);
+	local_irq_disable();
+	hard_irq_disable();
+
+	mb(); /* make sure IRQs are disabled before we say they are */
+	get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
+
+	kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF);
+	/* we are sure every CPU has IRQs off at this point */
+	kexec_all_irq_disabled = 1;
+
+	/*
+	 * Before removing MMU mappings make sure all CPUs have entered real
+	 * mode:
+	 */
+	kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);
+
+	/* after we tell the others to go down */
+	if (ppc_md.kexec_cpu_down)
+		ppc_md.kexec_cpu_down(0, 0);
+
+	put_cpu();
+}
+
+#else /* ! SMP */
+
+static void kexec_prepare_cpus(void)
+{
+	/*
+	 * move the secondarys to us so that we can copy
+	 * the new kernel 0-0x100 safely
+	 *
+	 * do this if kexec in setup.c ?
+	 *
+	 * We need to release the cpus if we are ever going from an
+	 * UP to an SMP kernel.
+	 */
+	smp_release_cpus();
+	if (ppc_md.kexec_cpu_down)
+		ppc_md.kexec_cpu_down(0, 0);
+	local_irq_disable();
+	hard_irq_disable();
+}
+
+#endif /* SMP */
+
+/*
+ * kexec thread structure and stack.
+ *
+ * We need to make sure that this is 16384-byte aligned due to the
+ * way process stacks are handled.  It also must be statically allocated
+ * or allocated as part of the kimage, because everything else may be
+ * overwritten when we copy the kexec image.  We piggyback on the
+ * "init_task" linker section here to statically allocate a stack.
+ *
+ * We could use a smaller stack if we don't care about anything using
+ * current, but that audit has not been performed.
+ */
+static union thread_union kexec_stack __init_task_data =
+	{ };
+
+/*
+ * For similar reasons to the stack above, the kexecing CPU needs to be on a
+ * static PACA; we switch to kexec_paca.
+ */
+static struct paca_struct kexec_paca;
+
+/* Our assembly helper, in misc_64.S */
+extern void kexec_sequence(void *newstack, unsigned long start,
+			   void *image, void *control,
+			   void (*clear_all)(void),
+			   bool copy_with_mmu_off) __noreturn;
+
+/* too late to fail here */
+void default_machine_kexec(struct kimage *image)
+{
+	bool copy_with_mmu_off;
+
+	/* prepare control code if any */
+
+	/*
+        * If the kexec boot is the normal one, need to shutdown other cpus
+        * into our wait loop and quiesce interrupts.
+        * Otherwise, in the case of crashed mode (crashing_cpu >= 0),
+        * stopping other CPUs and collecting their pt_regs is done before
+        * using debugger IPI.
+        */
+
+	if (!kdump_in_progress())
+		kexec_prepare_cpus();
+
+	printk("kexec: Starting switchover sequence.\n");
+
+	/* switch to a staticly allocated stack.  Based on irq stack code.
+	 * We setup preempt_count to avoid using VMX in memcpy.
+	 * XXX: the task struct will likely be invalid once we do the copy!
+	 */
+	current_thread_info()->flags = 0;
+	current_thread_info()->preempt_count = HARDIRQ_OFFSET;
+
+	/* We need a static PACA, too; copy this CPU's PACA over and switch to
+	 * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using
+	 * non-static data.
+	 */
+	memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct));
+	kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL;
+#ifdef CONFIG_PPC_PSERIES
+	kexec_paca.lppaca_ptr = NULL;
+#endif
+
+	if (is_secure_guest() && !(image->preserve_context ||
+				   image->type == KEXEC_TYPE_CRASH)) {
+		uv_unshare_all_pages();
+		printk("kexec: Unshared all shared pages.\n");
+	}
+
+	paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
+
+	setup_paca(&kexec_paca);
+
+	/*
+	 * The lppaca should be unregistered at this point so the HV won't
+	 * touch it. In the case of a crash, none of the lppacas are
+	 * unregistered so there is not much we can do about it here.
+	 */
+
+	/*
+	 * On Book3S, the copy must happen with the MMU off if we are either
+	 * using Radix page tables or we are not in an LPAR since we can
+	 * overwrite the page tables while copying.
+	 *
+	 * In an LPAR, we keep the MMU on otherwise we can't access beyond
+	 * the RMA. On BookE there is no real MMU off mode, so we have to
+	 * keep it enabled as well (but then we have bolted TLB entries).
+	 */
+#ifdef CONFIG_PPC_BOOK3E_64
+	copy_with_mmu_off = false;
+#else
+	copy_with_mmu_off = radix_enabled() ||
+		!(firmware_has_feature(FW_FEATURE_LPAR) ||
+		  firmware_has_feature(FW_FEATURE_PS3_LV1));
+#endif
+
+	/* Some things are best done in assembly.  Finding globals with
+	 * a toc is easier in C, so pass in what we can.
+	 */
+	kexec_sequence(&kexec_stack, image->start, image,
+		       page_address(image->control_code_page),
+		       mmu_cleanup_all, copy_with_mmu_off);
+	/* NOTREACHED */
+}
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+/* Values we need to export to the second kernel via the device tree. */
+static unsigned long htab_base;
+static unsigned long htab_size;
+
+static struct property htab_base_prop = {
+	.name = "linux,htab-base",
+	.length = sizeof(unsigned long),
+	.value = &htab_base,
+};
+
+static struct property htab_size_prop = {
+	.name = "linux,htab-size",
+	.length = sizeof(unsigned long),
+	.value = &htab_size,
+};
+
+static int __init export_htab_values(void)
+{
+	struct device_node *node;
+
+	/* On machines with no htab htab_address is NULL */
+	if (!htab_address)
+		return -ENODEV;
+
+	node = of_find_node_by_path("/chosen");
+	if (!node)
+		return -ENODEV;
+
+	/* remove any stale properties so ours can be found */
+	of_remove_property(node, of_find_property(node, htab_base_prop.name, NULL));
+	of_remove_property(node, of_find_property(node, htab_size_prop.name, NULL));
+
+	htab_base = cpu_to_be64(__pa(htab_address));
+	of_add_property(node, &htab_base_prop);
+	htab_size = cpu_to_be64(htab_size_bytes);
+	of_add_property(node, &htab_size_prop);
+
+	of_node_put(node);
+	return 0;
+}
+late_initcall(export_htab_values);
+#endif /* CONFIG_PPC_64S_HASH_MMU */
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
new file mode 100644
index 0000000000..ef5c2d25ec
--- /dev/null
+++ b/arch/powerpc/kexec/crash.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Architecture specific (PPC64) functions for kexec based crash dumps.
+ *
+ * Copyright (C) 2005, IBM Corp.
+ *
+ * Created by: Haren Myneni
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/export.h>
+#include <linux/crash_dump.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/types.h>
+
+#include <asm/processor.h>
+#include <asm/machdep.h>
+#include <asm/kexec.h>
+#include <asm/smp.h>
+#include <asm/setjmp.h>
+#include <asm/debug.h>
+#include <asm/interrupt.h>
+
+/*
+ * The primary CPU waits a while for all secondary CPUs to enter. This is to
+ * avoid sending an IPI if the secondary CPUs are entering
+ * crash_kexec_secondary on their own (eg via a system reset).
+ *
+ * The secondary timeout has to be longer than the primary. Both timeouts are
+ * in milliseconds.
+ */
+#define PRIMARY_TIMEOUT		500
+#define SECONDARY_TIMEOUT	1000
+
+#define IPI_TIMEOUT		10000
+#define REAL_MODE_TIMEOUT	10000
+
+static int time_to_dump;
+
+/*
+ * In case of system reset, secondary CPUs enter crash_kexec_secondary with out
+ * having to send an IPI explicitly. So, indicate if the crash is via
+ * system reset to avoid sending another IPI.
+ */
+static int is_via_system_reset;
+
+/*
+ * crash_wake_offline should be set to 1 by platforms that intend to wake
+ * up offline cpus prior to jumping to a kdump kernel. Currently powernv
+ * sets it to 1, since we want to avoid things from happening when an
+ * offline CPU wakes up due to something like an HMI (malfunction error),
+ * which propagates to all threads.
+ */
+int crash_wake_offline;
+
+#define CRASH_HANDLER_MAX 3
+/* List of shutdown handles */
+static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX];
+static DEFINE_SPINLOCK(crash_handlers_lock);
+
+static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
+static int crash_shutdown_cpu = -1;
+
+static int handle_fault(struct pt_regs *regs)
+{
+	if (crash_shutdown_cpu == smp_processor_id())
+		longjmp(crash_shutdown_buf, 1);
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+
+static atomic_t cpus_in_crash;
+void crash_ipi_callback(struct pt_regs *regs)
+{
+	static cpumask_t cpus_state_saved = CPU_MASK_NONE;
+
+	int cpu = smp_processor_id();
+
+	hard_irq_disable();
+	if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
+		crash_save_cpu(regs, cpu);
+		cpumask_set_cpu(cpu, &cpus_state_saved);
+	}
+
+	atomic_inc(&cpus_in_crash);
+	smp_mb__after_atomic();
+
+	/*
+	 * Starting the kdump boot.
+	 * This barrier is needed to make sure that all CPUs are stopped.
+	 */
+	while (!time_to_dump)
+		cpu_relax();
+
+	if (ppc_md.kexec_cpu_down)
+		ppc_md.kexec_cpu_down(1, 1);
+
+#ifdef CONFIG_PPC64
+	kexec_smp_wait();
+#else
+	for (;;);	/* FIXME */
+#endif
+
+	/* NOTREACHED */
+}
+
+static void crash_kexec_prepare_cpus(void)
+{
+	unsigned int msecs;
+	volatile unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
+	volatile int tries = 0;
+	int (*old_handler)(struct pt_regs *regs);
+
+	printk(KERN_EMERG "Sending IPI to other CPUs\n");
+
+	if (crash_wake_offline)
+		ncpus = num_present_cpus() - 1;
+
+	/*
+	 * If we came in via system reset, secondaries enter via crash_kexec_secondary().
+	 * So, wait a while for the secondary CPUs to enter for that case.
+	 * Else, send IPI to all other CPUs.
+	 */
+	if (is_via_system_reset)
+		mdelay(PRIMARY_TIMEOUT);
+	else
+		crash_send_ipi(crash_ipi_callback);
+	smp_wmb();
+
+again:
+	/*
+	 * FIXME: Until we will have the way to stop other CPUs reliably,
+	 * the crash CPU will send an IPI and wait for other CPUs to
+	 * respond.
+	 */
+	msecs = IPI_TIMEOUT;
+	while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0))
+		mdelay(1);
+
+	/* Would it be better to replace the trap vector here? */
+
+	if (atomic_read(&cpus_in_crash) >= ncpus) {
+		printk(KERN_EMERG "IPI complete\n");
+		return;
+	}
+
+	printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n",
+		ncpus - atomic_read(&cpus_in_crash));
+
+	/*
+	 * If we have a panic timeout set then we can't wait indefinitely
+	 * for someone to activate system reset. We also give up on the
+	 * second time through if system reset fail to work.
+	 */
+	if ((panic_timeout > 0) || (tries > 0))
+		return;
+
+	/*
+	 * A system reset will cause all CPUs to take an 0x100 exception.
+	 * The primary CPU returns here via setjmp, and the secondary
+	 * CPUs reexecute the crash_kexec_secondary path.
+	 */
+	old_handler = __debugger;
+	__debugger = handle_fault;
+	crash_shutdown_cpu = smp_processor_id();
+
+	if (setjmp(crash_shutdown_buf) == 0) {
+		printk(KERN_EMERG "Activate system reset (dumprestart) "
+				  "to stop other cpu(s)\n");
+
+		/*
+		 * A system reset will force all CPUs to execute the
+		 * crash code again. We need to reset cpus_in_crash so we
+		 * wait for everyone to do this.
+		 */
+		atomic_set(&cpus_in_crash, 0);
+		smp_mb();
+
+		while (atomic_read(&cpus_in_crash) < ncpus)
+			cpu_relax();
+	}
+
+	crash_shutdown_cpu = -1;
+	__debugger = old_handler;
+
+	tries++;
+	goto again;
+}
+
+/*
+ * This function will be called by secondary cpus.
+ */
+void crash_kexec_secondary(struct pt_regs *regs)
+{
+	unsigned long flags;
+	int msecs = SECONDARY_TIMEOUT;
+
+	local_irq_save(flags);
+
+	/* Wait for the primary crash CPU to signal its progress */
+	while (crashing_cpu < 0) {
+		if (--msecs < 0) {
+			/* No response, kdump image may not have been loaded */
+			local_irq_restore(flags);
+			return;
+		}
+
+		mdelay(1);
+	}
+
+	crash_ipi_callback(regs);
+}
+
+#else	/* ! CONFIG_SMP */
+
+static void crash_kexec_prepare_cpus(void)
+{
+	/*
+	 * move the secondaries to us so that we can copy
+	 * the new kernel 0-0x100 safely
+	 *
+	 * do this if kexec in setup.c ?
+	 */
+#ifdef CONFIG_PPC64
+	smp_release_cpus();
+#else
+	/* FIXME */
+#endif
+}
+
+void crash_kexec_secondary(struct pt_regs *regs)
+{
+}
+#endif	/* CONFIG_SMP */
+
+/* wait for all the CPUs to hit real mode but timeout if they don't come in */
+#if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
+noinstr static void __maybe_unused crash_kexec_wait_realmode(int cpu)
+{
+	unsigned int msecs;
+	int i;
+
+	msecs = REAL_MODE_TIMEOUT;
+	for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
+		if (i == cpu)
+			continue;
+
+		while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) {
+			barrier();
+			if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
+				break;
+			msecs--;
+			mdelay(1);
+		}
+	}
+	mb();
+}
+#else
+static inline void crash_kexec_wait_realmode(int cpu) {}
+#endif	/* CONFIG_SMP && CONFIG_PPC64 */
+
+void crash_kexec_prepare(void)
+{
+	/* Avoid hardlocking with irresponsive CPU holding logbuf_lock */
+	printk_deferred_enter();
+
+	/*
+	 * This function is only called after the system
+	 * has panicked or is otherwise in a critical state.
+	 * The minimum amount of code to allow a kexec'd kernel
+	 * to run successfully needs to happen here.
+	 *
+	 * In practice this means stopping other cpus in
+	 * an SMP system.
+	 * The kernel is broken so disable interrupts.
+	 */
+	hard_irq_disable();
+
+	/*
+	 * Make a note of crashing cpu. Will be used in machine_kexec
+	 * such that another IPI will not be sent.
+	 */
+	crashing_cpu = smp_processor_id();
+
+	crash_kexec_prepare_cpus();
+}
+
+/*
+ * Register a function to be called on shutdown.  Only use this if you
+ * can't reset your device in the second kernel.
+ */
+int crash_shutdown_register(crash_shutdown_t handler)
+{
+	unsigned int i, rc;
+
+	spin_lock(&crash_handlers_lock);
+	for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
+		if (!crash_shutdown_handles[i]) {
+			/* Insert handle at first empty entry */
+			crash_shutdown_handles[i] = handler;
+			rc = 0;
+			break;
+		}
+
+	if (i == CRASH_HANDLER_MAX) {
+		printk(KERN_ERR "Crash shutdown handles full, "
+		       "not registered.\n");
+		rc = 1;
+	}
+
+	spin_unlock(&crash_handlers_lock);
+	return rc;
+}
+EXPORT_SYMBOL(crash_shutdown_register);
+
+int crash_shutdown_unregister(crash_shutdown_t handler)
+{
+	unsigned int i, rc;
+
+	spin_lock(&crash_handlers_lock);
+	for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
+		if (crash_shutdown_handles[i] == handler)
+			break;
+
+	if (i == CRASH_HANDLER_MAX) {
+		printk(KERN_ERR "Crash shutdown handle not found\n");
+		rc = 1;
+	} else {
+		/* Shift handles down */
+		for (; i < (CRASH_HANDLER_MAX - 1); i++)
+			crash_shutdown_handles[i] =
+				crash_shutdown_handles[i+1];
+		/*
+		 * Reset last entry to NULL now that it has been shifted down,
+		 * this will allow new handles to be added here.
+		 */
+		crash_shutdown_handles[i] = NULL;
+		rc = 0;
+	}
+
+	spin_unlock(&crash_handlers_lock);
+	return rc;
+}
+EXPORT_SYMBOL(crash_shutdown_unregister);
+
+void default_machine_crash_shutdown(struct pt_regs *regs)
+{
+	volatile unsigned int i;
+	int (*old_handler)(struct pt_regs *regs);
+
+	if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
+		is_via_system_reset = 1;
+
+	crash_smp_send_stop();
+
+	crash_save_cpu(regs, crashing_cpu);
+
+	time_to_dump = 1;
+
+	crash_kexec_wait_realmode(crashing_cpu);
+
+	machine_kexec_mask_interrupts();
+
+	/*
+	 * Call registered shutdown routines safely.  Swap out
+	 * __debugger_fault_handler, and replace on exit.
+	 */
+	old_handler = __debugger_fault_handler;
+	__debugger_fault_handler = handle_fault;
+	crash_shutdown_cpu = smp_processor_id();
+	for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) {
+		if (setjmp(crash_shutdown_buf) == 0) {
+			/*
+			 * Insert syncs and delay to ensure
+			 * instructions in the dangerous region don't
+			 * leak away from this protected region.
+			 */
+			asm volatile("sync; isync");
+			/* dangerous region */
+			crash_shutdown_handles[i]();
+			asm volatile("sync; isync");
+		}
+	}
+	crash_shutdown_cpu = -1;
+	__debugger_fault_handler = old_handler;
+
+	if (ppc_md.kexec_cpu_down)
+		ppc_md.kexec_cpu_down(1, 0);
+}
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
new file mode 100644
index 0000000000..eeb258002d
--- /dev/null
+++ b/arch/powerpc/kexec/elf_64.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Load ELF vmlinux file for the kexec_file_load syscall.
+ *
+ * Copyright (C) 2004  Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004  IBM Corp.
+ * Copyright (C) 2005  R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006  Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2016  IBM Corporation
+ *
+ * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c.
+ * Heavily modified for the kernel by
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
+ */
+
+#define pr_fmt(fmt)	"kexec_elf: " fmt
+
+#include <linux/elf.h>
+#include <linux/kexec.h>
+#include <linux/libfdt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+static void *elf64_load(struct kimage *image, char *kernel_buf,
+			unsigned long kernel_len, char *initrd,
+			unsigned long initrd_len, char *cmdline,
+			unsigned long cmdline_len)
+{
+	int ret;
+	unsigned long kernel_load_addr;
+	unsigned long initrd_load_addr = 0, fdt_load_addr;
+	void *fdt;
+	const void *slave_code;
+	struct elfhdr ehdr;
+	char *modified_cmdline = NULL;
+	struct kexec_elf_info elf_info;
+	struct kexec_buf kbuf = { .image = image, .buf_min = 0,
+				  .buf_max = ppc64_rma_size };
+	struct kexec_buf pbuf = { .image = image, .buf_min = 0,
+				  .buf_max = ppc64_rma_size, .top_down = true,
+				  .mem = KEXEC_BUF_MEM_UNKNOWN };
+
+	ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (image->type == KEXEC_TYPE_CRASH) {
+		/* min & max buffer values for kdump case */
+		kbuf.buf_min = pbuf.buf_min = crashk_res.start;
+		kbuf.buf_max = pbuf.buf_max =
+				((crashk_res.end < ppc64_rma_size) ?
+				 crashk_res.end : (ppc64_rma_size - 1));
+	}
+
+	ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr);
+	if (ret)
+		goto out;
+
+	pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr);
+
+	ret = kexec_load_purgatory(image, &pbuf);
+	if (ret) {
+		pr_err("Loading purgatory failed.\n");
+		goto out;
+	}
+
+	pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem);
+
+	/* Load additional segments needed for panic kernel */
+	if (image->type == KEXEC_TYPE_CRASH) {
+		ret = load_crashdump_segments_ppc64(image, &kbuf);
+		if (ret) {
+			pr_err("Failed to load kdump kernel segments\n");
+			goto out;
+		}
+
+		/* Setup cmdline for kdump kernel case */
+		modified_cmdline = setup_kdump_cmdline(image, cmdline,
+						       cmdline_len);
+		if (!modified_cmdline) {
+			pr_err("Setting up cmdline for kdump kernel failed\n");
+			ret = -EINVAL;
+			goto out;
+		}
+		cmdline = modified_cmdline;
+	}
+
+	if (initrd != NULL) {
+		kbuf.buffer = initrd;
+		kbuf.bufsz = kbuf.memsz = initrd_len;
+		kbuf.buf_align = PAGE_SIZE;
+		kbuf.top_down = false;
+		kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+		ret = kexec_add_buffer(&kbuf);
+		if (ret)
+			goto out;
+		initrd_load_addr = kbuf.mem;
+
+		pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr);
+	}
+
+	fdt = of_kexec_alloc_and_setup_fdt(image, initrd_load_addr,
+					   initrd_len, cmdline,
+					   kexec_extra_fdt_size_ppc64(image));
+	if (!fdt) {
+		pr_err("Error setting up the new device tree.\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = setup_new_fdt_ppc64(image, fdt, initrd_load_addr,
+				  initrd_len, cmdline);
+	if (ret)
+		goto out_free_fdt;
+
+	fdt_pack(fdt);
+
+	kbuf.buffer = fdt;
+	kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);
+	kbuf.buf_align = PAGE_SIZE;
+	kbuf.top_down = true;
+	kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+	ret = kexec_add_buffer(&kbuf);
+	if (ret)
+		goto out_free_fdt;
+
+	/* FDT will be freed in arch_kimage_file_post_load_cleanup */
+	image->arch.fdt = fdt;
+
+	fdt_load_addr = kbuf.mem;
+
+	pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr);
+
+	slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset;
+	ret = setup_purgatory_ppc64(image, slave_code, fdt, kernel_load_addr,
+				    fdt_load_addr);
+	if (ret)
+		pr_err("Error setting up the purgatory.\n");
+
+	goto out;
+
+out_free_fdt:
+	kvfree(fdt);
+out:
+	kfree(modified_cmdline);
+	kexec_free_elf_info(&elf_info);
+
+	return ret ? ERR_PTR(ret) : NULL;
+}
+
+const struct kexec_file_ops kexec_elf64_ops = {
+	.probe = kexec_elf_probe,
+	.load = elf64_load,
+};
diff --git a/arch/powerpc/kexec/file_load.c b/arch/powerpc/kexec/file_load.c
new file mode 100644
index 0000000000..4284f76cbe
--- /dev/null
+++ b/arch/powerpc/kexec/file_load.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * powerpc code to implement the kexec_file_load syscall
+ *
+ * Copyright (C) 2004  Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004  IBM Corp.
+ * Copyright (C) 2004,2005  Milton D Miller II, IBM Corporation
+ * Copyright (C) 2005  R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006  Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2016  IBM Corporation
+ *
+ * Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c.
+ * Heavily modified for the kernel by
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
+ */
+
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <asm/setup.h>
+
+#define SLAVE_CODE_SIZE		256	/* First 0x100 bytes */
+
+/**
+ * setup_kdump_cmdline - Prepend "elfcorehdr=<addr> " to command line
+ *                       of kdump kernel for exporting the core.
+ * @image:               Kexec image
+ * @cmdline:             Command line parameters to update.
+ * @cmdline_len:         Length of the cmdline parameters.
+ *
+ * kdump segment must be setup before calling this function.
+ *
+ * Returns new cmdline buffer for kdump kernel on success, NULL otherwise.
+ */
+char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
+			  unsigned long cmdline_len)
+{
+	int elfcorehdr_strlen;
+	char *cmdline_ptr;
+
+	cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
+	if (!cmdline_ptr)
+		return NULL;
+
+	elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ",
+				    image->elf_load_addr);
+
+	if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) {
+		pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n");
+		kfree(cmdline_ptr);
+		return NULL;
+	}
+
+	memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len);
+	// Ensure it's nul terminated
+	cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0';
+	return cmdline_ptr;
+}
+
+/**
+ * setup_purgatory - initialize the purgatory's global variables
+ * @image:		kexec image.
+ * @slave_code:		Slave code for the purgatory.
+ * @fdt:		Flattened device tree for the next kernel.
+ * @kernel_load_addr:	Address where the kernel is loaded.
+ * @fdt_load_addr:	Address where the flattened device tree is loaded.
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int setup_purgatory(struct kimage *image, const void *slave_code,
+		    const void *fdt, unsigned long kernel_load_addr,
+		    unsigned long fdt_load_addr)
+{
+	unsigned int *slave_code_buf, master_entry;
+	int ret;
+
+	slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL);
+	if (!slave_code_buf)
+		return -ENOMEM;
+
+	/* Get the slave code from the new kernel and put it in purgatory. */
+	ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
+					     slave_code_buf, SLAVE_CODE_SIZE,
+					     true);
+	if (ret) {
+		kfree(slave_code_buf);
+		return ret;
+	}
+
+	master_entry = slave_code_buf[0];
+	memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE);
+	slave_code_buf[0] = master_entry;
+	ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
+					     slave_code_buf, SLAVE_CODE_SIZE,
+					     false);
+	kfree(slave_code_buf);
+
+	ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr,
+					     sizeof(kernel_load_addr), false);
+	if (ret)
+		return ret;
+	ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr,
+					     sizeof(fdt_load_addr), false);
+	if (ret)
+		return ret;
+
+	return 0;
+}
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
new file mode 100644
index 0000000000..a3de5369d2
--- /dev/null
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -0,0 +1,1354 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ppc64 code to implement the kexec_file_load syscall
+ *
+ * Copyright (C) 2004  Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004  IBM Corp.
+ * Copyright (C) 2004,2005  Milton D Miller II, IBM Corporation
+ * Copyright (C) 2005  R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006  Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2020  IBM Corporation
+ *
+ * Based on kexec-tools' kexec-ppc64.c, kexec-elf-rel-ppc64.c, fs2dt.c.
+ * Heavily modified for the kernel by
+ * Hari Bathini, IBM Corporation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <linux/of.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/setup.h>
+#include <asm/drmem.h>
+#include <asm/firmware.h>
+#include <asm/kexec_ranges.h>
+#include <asm/crashdump-ppc64.h>
+#include <asm/mmzone.h>
+#include <asm/iommu.h>
+#include <asm/prom.h>
+#include <asm/plpks.h>
+
+struct umem_info {
+	u64 *buf;		/* data buffer for usable-memory property */
+	u32 size;		/* size allocated for the data buffer */
+	u32 max_entries;	/* maximum no. of entries */
+	u32 idx;		/* index of current entry */
+
+	/* usable memory ranges to look up */
+	unsigned int nr_ranges;
+	const struct range *ranges;
+};
+
+const struct kexec_file_ops * const kexec_file_loaders[] = {
+	&kexec_elf64_ops,
+	NULL
+};
+
+/**
+ * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
+ *                             regions like opal/rtas, tce-table, initrd,
+ *                             kernel, htab which should be avoided while
+ *                             setting up kexec load segments.
+ * @mem_ranges:                Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
+{
+	int ret;
+
+	ret = add_tce_mem_ranges(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_initrd_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_htab_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_kernel_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_rtas_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_opal_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_reserved_mem_ranges(mem_ranges);
+	if (ret)
+		goto out;
+
+	/* exclude memory ranges should be sorted for easy lookup */
+	sort_memory_ranges(*mem_ranges, true);
+out:
+	if (ret)
+		pr_err("Failed to setup exclude memory ranges\n");
+	return ret;
+}
+
+/**
+ * get_usable_memory_ranges - Get usable memory ranges. This list includes
+ *                            regions like crashkernel, opal/rtas & tce-table,
+ *                            that kdump kernel could use.
+ * @mem_ranges:               Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int get_usable_memory_ranges(struct crash_mem **mem_ranges)
+{
+	int ret;
+
+	/*
+	 * Early boot failure observed on guests when low memory (first memory
+	 * block?) is not added to usable memory. So, add [0, crashk_res.end]
+	 * instead of [crashk_res.start, crashk_res.end] to workaround it.
+	 * Also, crashed kernel's memory must be added to reserve map to
+	 * avoid kdump kernel from using it.
+	 */
+	ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
+	if (ret)
+		goto out;
+
+	ret = add_rtas_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_opal_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_tce_mem_ranges(mem_ranges);
+out:
+	if (ret)
+		pr_err("Failed to setup usable memory ranges\n");
+	return ret;
+}
+
+/**
+ * get_crash_memory_ranges - Get crash memory ranges. This list includes
+ *                           first/crashing kernel's memory regions that
+ *                           would be exported via an elfcore.
+ * @mem_ranges:              Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int get_crash_memory_ranges(struct crash_mem **mem_ranges)
+{
+	phys_addr_t base, end;
+	struct crash_mem *tmem;
+	u64 i;
+	int ret;
+
+	for_each_mem_range(i, &base, &end) {
+		u64 size = end - base;
+
+		/* Skip backup memory region, which needs a separate entry */
+		if (base == BACKUP_SRC_START) {
+			if (size > BACKUP_SRC_SIZE) {
+				base = BACKUP_SRC_END + 1;
+				size -= BACKUP_SRC_SIZE;
+			} else
+				continue;
+		}
+
+		ret = add_mem_range(mem_ranges, base, size);
+		if (ret)
+			goto out;
+
+		/* Try merging adjacent ranges before reallocation attempt */
+		if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
+			sort_memory_ranges(*mem_ranges, true);
+	}
+
+	/* Reallocate memory ranges if there is no space to split ranges */
+	tmem = *mem_ranges;
+	if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
+		tmem = realloc_mem_ranges(mem_ranges);
+		if (!tmem)
+			goto out;
+	}
+
+	/* Exclude crashkernel region */
+	ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
+	if (ret)
+		goto out;
+
+	/*
+	 * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL
+	 *        regions are exported to save their context at the time of
+	 *        crash, they should actually be backed up just like the
+	 *        first 64K bytes of memory.
+	 */
+	ret = add_rtas_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_opal_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	/* create a separate program header for the backup region */
+	ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);
+	if (ret)
+		goto out;
+
+	sort_memory_ranges(*mem_ranges, false);
+out:
+	if (ret)
+		pr_err("Failed to setup crash memory ranges\n");
+	return ret;
+}
+
+/**
+ * get_reserved_memory_ranges - Get reserve memory ranges. This list includes
+ *                              memory regions that should be added to the
+ *                              memory reserve map to ensure the region is
+ *                              protected from any mischief.
+ * @mem_ranges:                 Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int get_reserved_memory_ranges(struct crash_mem **mem_ranges)
+{
+	int ret;
+
+	ret = add_rtas_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_tce_mem_ranges(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_reserved_mem_ranges(mem_ranges);
+out:
+	if (ret)
+		pr_err("Failed to setup reserved memory ranges\n");
+	return ret;
+}
+
+/**
+ * __locate_mem_hole_top_down - Looks top down for a large enough memory hole
+ *                              in the memory regions between buf_min & buf_max
+ *                              for the buffer. If found, sets kbuf->mem.
+ * @kbuf:                       Buffer contents and memory parameters.
+ * @buf_min:                    Minimum address for the buffer.
+ * @buf_max:                    Maximum address for the buffer.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int __locate_mem_hole_top_down(struct kexec_buf *kbuf,
+				      u64 buf_min, u64 buf_max)
+{
+	int ret = -EADDRNOTAVAIL;
+	phys_addr_t start, end;
+	u64 i;
+
+	for_each_mem_range_rev(i, &start, &end) {
+		/*
+		 * memblock uses [start, end) convention while it is
+		 * [start, end] here. Fix the off-by-one to have the
+		 * same convention.
+		 */
+		end -= 1;
+
+		if (start > buf_max)
+			continue;
+
+		/* Memory hole not found */
+		if (end < buf_min)
+			break;
+
+		/* Adjust memory region based on the given range */
+		if (start < buf_min)
+			start = buf_min;
+		if (end > buf_max)
+			end = buf_max;
+
+		start = ALIGN(start, kbuf->buf_align);
+		if (start < end && (end - start + 1) >= kbuf->memsz) {
+			/* Suitable memory range found. Set kbuf->mem */
+			kbuf->mem = ALIGN_DOWN(end - kbuf->memsz + 1,
+					       kbuf->buf_align);
+			ret = 0;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * locate_mem_hole_top_down_ppc64 - Skip special memory regions to find a
+ *                                  suitable buffer with top down approach.
+ * @kbuf:                           Buffer contents and memory parameters.
+ * @buf_min:                        Minimum address for the buffer.
+ * @buf_max:                        Maximum address for the buffer.
+ * @emem:                           Exclude memory ranges.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int locate_mem_hole_top_down_ppc64(struct kexec_buf *kbuf,
+					  u64 buf_min, u64 buf_max,
+					  const struct crash_mem *emem)
+{
+	int i, ret = 0, err = -EADDRNOTAVAIL;
+	u64 start, end, tmin, tmax;
+
+	tmax = buf_max;
+	for (i = (emem->nr_ranges - 1); i >= 0; i--) {
+		start = emem->ranges[i].start;
+		end = emem->ranges[i].end;
+
+		if (start > tmax)
+			continue;
+
+		if (end < tmax) {
+			tmin = (end < buf_min ? buf_min : end + 1);
+			ret = __locate_mem_hole_top_down(kbuf, tmin, tmax);
+			if (!ret)
+				return 0;
+		}
+
+		tmax = start - 1;
+
+		if (tmax < buf_min) {
+			ret = err;
+			break;
+		}
+		ret = 0;
+	}
+
+	if (!ret) {
+		tmin = buf_min;
+		ret = __locate_mem_hole_top_down(kbuf, tmin, tmax);
+	}
+	return ret;
+}
+
+/**
+ * __locate_mem_hole_bottom_up - Looks bottom up for a large enough memory hole
+ *                               in the memory regions between buf_min & buf_max
+ *                               for the buffer. If found, sets kbuf->mem.
+ * @kbuf:                        Buffer contents and memory parameters.
+ * @buf_min:                     Minimum address for the buffer.
+ * @buf_max:                     Maximum address for the buffer.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf,
+				       u64 buf_min, u64 buf_max)
+{
+	int ret = -EADDRNOTAVAIL;
+	phys_addr_t start, end;
+	u64 i;
+
+	for_each_mem_range(i, &start, &end) {
+		/*
+		 * memblock uses [start, end) convention while it is
+		 * [start, end] here. Fix the off-by-one to have the
+		 * same convention.
+		 */
+		end -= 1;
+
+		if (end < buf_min)
+			continue;
+
+		/* Memory hole not found */
+		if (start > buf_max)
+			break;
+
+		/* Adjust memory region based on the given range */
+		if (start < buf_min)
+			start = buf_min;
+		if (end > buf_max)
+			end = buf_max;
+
+		start = ALIGN(start, kbuf->buf_align);
+		if (start < end && (end - start + 1) >= kbuf->memsz) {
+			/* Suitable memory range found. Set kbuf->mem */
+			kbuf->mem = start;
+			ret = 0;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * locate_mem_hole_bottom_up_ppc64 - Skip special memory regions to find a
+ *                                   suitable buffer with bottom up approach.
+ * @kbuf:                            Buffer contents and memory parameters.
+ * @buf_min:                         Minimum address for the buffer.
+ * @buf_max:                         Maximum address for the buffer.
+ * @emem:                            Exclude memory ranges.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf,
+					   u64 buf_min, u64 buf_max,
+					   const struct crash_mem *emem)
+{
+	int i, ret = 0, err = -EADDRNOTAVAIL;
+	u64 start, end, tmin, tmax;
+
+	tmin = buf_min;
+	for (i = 0; i < emem->nr_ranges; i++) {
+		start = emem->ranges[i].start;
+		end = emem->ranges[i].end;
+
+		if (end < tmin)
+			continue;
+
+		if (start > tmin) {
+			tmax = (start > buf_max ? buf_max : start - 1);
+			ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax);
+			if (!ret)
+				return 0;
+		}
+
+		tmin = end + 1;
+
+		if (tmin > buf_max) {
+			ret = err;
+			break;
+		}
+		ret = 0;
+	}
+
+	if (!ret) {
+		tmax = buf_max;
+		ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax);
+	}
+	return ret;
+}
+
+/**
+ * check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries
+ * @um_info:                  Usable memory buffer and ranges info.
+ * @cnt:                      No. of entries to accommodate.
+ *
+ * Frees up the old buffer if memory reallocation fails.
+ *
+ * Returns buffer on success, NULL on error.
+ */
+static u64 *check_realloc_usable_mem(struct umem_info *um_info, int cnt)
+{
+	u32 new_size;
+	u64 *tbuf;
+
+	if ((um_info->idx + cnt) <= um_info->max_entries)
+		return um_info->buf;
+
+	new_size = um_info->size + MEM_RANGE_CHUNK_SZ;
+	tbuf = krealloc(um_info->buf, new_size, GFP_KERNEL);
+	if (tbuf) {
+		um_info->buf = tbuf;
+		um_info->size = new_size;
+		um_info->max_entries = (um_info->size / sizeof(u64));
+	}
+
+	return tbuf;
+}
+
+/**
+ * add_usable_mem - Add the usable memory ranges within the given memory range
+ *                  to the buffer
+ * @um_info:        Usable memory buffer and ranges info.
+ * @base:           Base address of memory range to look for.
+ * @end:            End address of memory range to look for.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_usable_mem(struct umem_info *um_info, u64 base, u64 end)
+{
+	u64 loc_base, loc_end;
+	bool add;
+	int i;
+
+	for (i = 0; i < um_info->nr_ranges; i++) {
+		add = false;
+		loc_base = um_info->ranges[i].start;
+		loc_end = um_info->ranges[i].end;
+		if (loc_base >= base && loc_end <= end)
+			add = true;
+		else if (base < loc_end && end > loc_base) {
+			if (loc_base < base)
+				loc_base = base;
+			if (loc_end > end)
+				loc_end = end;
+			add = true;
+		}
+
+		if (add) {
+			if (!check_realloc_usable_mem(um_info, 2))
+				return -ENOMEM;
+
+			um_info->buf[um_info->idx++] = cpu_to_be64(loc_base);
+			um_info->buf[um_info->idx++] =
+					cpu_to_be64(loc_end - loc_base + 1);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * kdump_setup_usable_lmb - This is a callback function that gets called by
+ *                          walk_drmem_lmbs for every LMB to set its
+ *                          usable memory ranges.
+ * @lmb:                    LMB info.
+ * @usm:                    linux,drconf-usable-memory property value.
+ * @data:                   Pointer to usable memory buffer and ranges info.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int kdump_setup_usable_lmb(struct drmem_lmb *lmb, const __be32 **usm,
+				  void *data)
+{
+	struct umem_info *um_info;
+	int tmp_idx, ret;
+	u64 base, end;
+
+	/*
+	 * kdump load isn't supported on kernels already booted with
+	 * linux,drconf-usable-memory property.
+	 */
+	if (*usm) {
+		pr_err("linux,drconf-usable-memory property already exists!");
+		return -EINVAL;
+	}
+
+	um_info = data;
+	tmp_idx = um_info->idx;
+	if (!check_realloc_usable_mem(um_info, 1))
+		return -ENOMEM;
+
+	um_info->idx++;
+	base = lmb->base_addr;
+	end = base + drmem_lmb_size() - 1;
+	ret = add_usable_mem(um_info, base, end);
+	if (!ret) {
+		/*
+		 * Update the no. of ranges added. Two entries (base & size)
+		 * for every range added.
+		 */
+		um_info->buf[tmp_idx] =
+				cpu_to_be64((um_info->idx - tmp_idx - 1) / 2);
+	}
+
+	return ret;
+}
+
+#define NODE_PATH_LEN		256
+/**
+ * add_usable_mem_property - Add usable memory property for the given
+ *                           memory node.
+ * @fdt:                     Flattened device tree for the kdump kernel.
+ * @dn:                      Memory node.
+ * @um_info:                 Usable memory buffer and ranges info.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_usable_mem_property(void *fdt, struct device_node *dn,
+				   struct umem_info *um_info)
+{
+	int n_mem_addr_cells, n_mem_size_cells, node;
+	char path[NODE_PATH_LEN];
+	int i, len, ranges, ret;
+	const __be32 *prop;
+	u64 base, end;
+
+	of_node_get(dn);
+
+	if (snprintf(path, NODE_PATH_LEN, "%pOF", dn) > (NODE_PATH_LEN - 1)) {
+		pr_err("Buffer (%d) too small for memory node: %pOF\n",
+		       NODE_PATH_LEN, dn);
+		return -EOVERFLOW;
+	}
+	pr_debug("Memory node path: %s\n", path);
+
+	/* Now that we know the path, find its offset in kdump kernel's fdt */
+	node = fdt_path_offset(fdt, path);
+	if (node < 0) {
+		pr_err("Malformed device tree: error reading %s\n", path);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Get the address & size cells */
+	n_mem_addr_cells = of_n_addr_cells(dn);
+	n_mem_size_cells = of_n_size_cells(dn);
+	pr_debug("address cells: %d, size cells: %d\n", n_mem_addr_cells,
+		 n_mem_size_cells);
+
+	um_info->idx  = 0;
+	if (!check_realloc_usable_mem(um_info, 2)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	prop = of_get_property(dn, "reg", &len);
+	if (!prop || len <= 0) {
+		ret = 0;
+		goto out;
+	}
+
+	/*
+	 * "reg" property represents sequence of (addr,size) tuples
+	 * each representing a memory range.
+	 */
+	ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
+
+	for (i = 0; i < ranges; i++) {
+		base = of_read_number(prop, n_mem_addr_cells);
+		prop += n_mem_addr_cells;
+		end = base + of_read_number(prop, n_mem_size_cells) - 1;
+		prop += n_mem_size_cells;
+
+		ret = add_usable_mem(um_info, base, end);
+		if (ret)
+			goto out;
+	}
+
+	/*
+	 * No kdump kernel usable memory found in this memory node.
+	 * Write (0,0) tuple in linux,usable-memory property for
+	 * this region to be ignored.
+	 */
+	if (um_info->idx == 0) {
+		um_info->buf[0] = 0;
+		um_info->buf[1] = 0;
+		um_info->idx = 2;
+	}
+
+	ret = fdt_setprop(fdt, node, "linux,usable-memory", um_info->buf,
+			  (um_info->idx * sizeof(u64)));
+
+out:
+	of_node_put(dn);
+	return ret;
+}
+
+
+/**
+ * update_usable_mem_fdt - Updates kdump kernel's fdt with linux,usable-memory
+ *                         and linux,drconf-usable-memory DT properties as
+ *                         appropriate to restrict its memory usage.
+ * @fdt:                   Flattened device tree for the kdump kernel.
+ * @usable_mem:            Usable memory ranges for kdump kernel.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int update_usable_mem_fdt(void *fdt, struct crash_mem *usable_mem)
+{
+	struct umem_info um_info;
+	struct device_node *dn;
+	int node, ret = 0;
+
+	if (!usable_mem) {
+		pr_err("Usable memory ranges for kdump kernel not found\n");
+		return -ENOENT;
+	}
+
+	node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory");
+	if (node == -FDT_ERR_NOTFOUND)
+		pr_debug("No dynamic reconfiguration memory found\n");
+	else if (node < 0) {
+		pr_err("Malformed device tree: error reading /ibm,dynamic-reconfiguration-memory.\n");
+		return -EINVAL;
+	}
+
+	um_info.buf  = NULL;
+	um_info.size = 0;
+	um_info.max_entries = 0;
+	um_info.idx  = 0;
+	/* Memory ranges to look up */
+	um_info.ranges = &(usable_mem->ranges[0]);
+	um_info.nr_ranges = usable_mem->nr_ranges;
+
+	dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+	if (dn) {
+		ret = walk_drmem_lmbs(dn, &um_info, kdump_setup_usable_lmb);
+		of_node_put(dn);
+
+		if (ret) {
+			pr_err("Could not setup linux,drconf-usable-memory property for kdump\n");
+			goto out;
+		}
+
+		ret = fdt_setprop(fdt, node, "linux,drconf-usable-memory",
+				  um_info.buf, (um_info.idx * sizeof(u64)));
+		if (ret) {
+			pr_err("Failed to update fdt with linux,drconf-usable-memory property: %s",
+			       fdt_strerror(ret));
+			goto out;
+		}
+	}
+
+	/*
+	 * Walk through each memory node and set linux,usable-memory property
+	 * for the corresponding node in kdump kernel's fdt.
+	 */
+	for_each_node_by_type(dn, "memory") {
+		ret = add_usable_mem_property(fdt, dn, &um_info);
+		if (ret) {
+			pr_err("Failed to set linux,usable-memory property for %s node",
+			       dn->full_name);
+			of_node_put(dn);
+			goto out;
+		}
+	}
+
+out:
+	kfree(um_info.buf);
+	return ret;
+}
+
+/**
+ * load_backup_segment - Locate a memory hole to place the backup region.
+ * @image:               Kexec image.
+ * @kbuf:                Buffer contents and memory parameters.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int load_backup_segment(struct kimage *image, struct kexec_buf *kbuf)
+{
+	void *buf;
+	int ret;
+
+	/*
+	 * Setup a source buffer for backup segment.
+	 *
+	 * A source buffer has no meaning for backup region as data will
+	 * be copied from backup source, after crash, in the purgatory.
+	 * But as load segment code doesn't recognize such segments,
+	 * setup a dummy source buffer to keep it happy for now.
+	 */
+	buf = vzalloc(BACKUP_SRC_SIZE);
+	if (!buf)
+		return -ENOMEM;
+
+	kbuf->buffer = buf;
+	kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
+	kbuf->bufsz = kbuf->memsz = BACKUP_SRC_SIZE;
+	kbuf->top_down = false;
+
+	ret = kexec_add_buffer(kbuf);
+	if (ret) {
+		vfree(buf);
+		return ret;
+	}
+
+	image->arch.backup_buf = buf;
+	image->arch.backup_start = kbuf->mem;
+	return 0;
+}
+
+/**
+ * update_backup_region_phdr - Update backup region's offset for the core to
+ *                             export the region appropriately.
+ * @image:                     Kexec image.
+ * @ehdr:                      ELF core header.
+ *
+ * Assumes an exclusive program header is setup for the backup region
+ * in the ELF headers
+ *
+ * Returns nothing.
+ */
+static void update_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr)
+{
+	Elf64_Phdr *phdr;
+	unsigned int i;
+
+	phdr = (Elf64_Phdr *)(ehdr + 1);
+	for (i = 0; i < ehdr->e_phnum; i++) {
+		if (phdr->p_paddr == BACKUP_SRC_START) {
+			phdr->p_offset = image->arch.backup_start;
+			pr_debug("Backup region offset updated to 0x%lx\n",
+				 image->arch.backup_start);
+			return;
+		}
+	}
+}
+
+/**
+ * load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr
+ *                           segment needed to load kdump kernel.
+ * @image:                   Kexec image.
+ * @kbuf:                    Buffer contents and memory parameters.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf)
+{
+	struct crash_mem *cmem = NULL;
+	unsigned long headers_sz;
+	void *headers = NULL;
+	int ret;
+
+	ret = get_crash_memory_ranges(&cmem);
+	if (ret)
+		goto out;
+
+	/* Setup elfcorehdr segment */
+	ret = crash_prepare_elf64_headers(cmem, false, &headers, &headers_sz);
+	if (ret) {
+		pr_err("Failed to prepare elf headers for the core\n");
+		goto out;
+	}
+
+	/* Fix the offset for backup region in the ELF header */
+	update_backup_region_phdr(image, headers);
+
+	kbuf->buffer = headers;
+	kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
+	kbuf->bufsz = kbuf->memsz = headers_sz;
+	kbuf->top_down = false;
+
+	ret = kexec_add_buffer(kbuf);
+	if (ret) {
+		vfree(headers);
+		goto out;
+	}
+
+	image->elf_load_addr = kbuf->mem;
+	image->elf_headers_sz = headers_sz;
+	image->elf_headers = headers;
+out:
+	kfree(cmem);
+	return ret;
+}
+
+/**
+ * load_crashdump_segments_ppc64 - Initialize the additional segements needed
+ *                                 to load kdump kernel.
+ * @image:                         Kexec image.
+ * @kbuf:                          Buffer contents and memory parameters.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int load_crashdump_segments_ppc64(struct kimage *image,
+				  struct kexec_buf *kbuf)
+{
+	int ret;
+
+	/* Load backup segment - first 64K bytes of the crashing kernel */
+	ret = load_backup_segment(image, kbuf);
+	if (ret) {
+		pr_err("Failed to load backup segment\n");
+		return ret;
+	}
+	pr_debug("Loaded the backup region at 0x%lx\n", kbuf->mem);
+
+	/* Load elfcorehdr segment - to export crashing kernel's vmcore */
+	ret = load_elfcorehdr_segment(image, kbuf);
+	if (ret) {
+		pr_err("Failed to load elfcorehdr segment\n");
+		return ret;
+	}
+	pr_debug("Loaded elf core header at 0x%lx, bufsz=0x%lx memsz=0x%lx\n",
+		 image->elf_load_addr, kbuf->bufsz, kbuf->memsz);
+
+	return 0;
+}
+
+/**
+ * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global
+ *                         variables and call setup_purgatory() to initialize
+ *                         common global variable.
+ * @image:                 kexec image.
+ * @slave_code:            Slave code for the purgatory.
+ * @fdt:                   Flattened device tree for the next kernel.
+ * @kernel_load_addr:      Address where the kernel is loaded.
+ * @fdt_load_addr:         Address where the flattened device tree is loaded.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
+			  const void *fdt, unsigned long kernel_load_addr,
+			  unsigned long fdt_load_addr)
+{
+	struct device_node *dn = NULL;
+	int ret;
+
+	ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
+			      fdt_load_addr);
+	if (ret)
+		goto out;
+
+	if (image->type == KEXEC_TYPE_CRASH) {
+		u32 my_run_at_load = 1;
+
+		/*
+		 * Tell relocatable kernel to run at load address
+		 * via the word meant for that at 0x5c.
+		 */
+		ret = kexec_purgatory_get_set_symbol(image, "run_at_load",
+						     &my_run_at_load,
+						     sizeof(my_run_at_load),
+						     false);
+		if (ret)
+			goto out;
+	}
+
+	/* Tell purgatory where to look for backup region */
+	ret = kexec_purgatory_get_set_symbol(image, "backup_start",
+					     &image->arch.backup_start,
+					     sizeof(image->arch.backup_start),
+					     false);
+	if (ret)
+		goto out;
+
+	/* Setup OPAL base & entry values */
+	dn = of_find_node_by_path("/ibm,opal");
+	if (dn) {
+		u64 val;
+
+		of_property_read_u64(dn, "opal-base-address", &val);
+		ret = kexec_purgatory_get_set_symbol(image, "opal_base", &val,
+						     sizeof(val), false);
+		if (ret)
+			goto out;
+
+		of_property_read_u64(dn, "opal-entry-address", &val);
+		ret = kexec_purgatory_get_set_symbol(image, "opal_entry", &val,
+						     sizeof(val), false);
+	}
+out:
+	if (ret)
+		pr_err("Failed to setup purgatory symbols");
+	of_node_put(dn);
+	return ret;
+}
+
+/**
+ * cpu_node_size - Compute the size of a CPU node in the FDT.
+ *                 This should be done only once and the value is stored in
+ *                 a static variable.
+ * Returns the max size of a CPU node in the FDT.
+ */
+static unsigned int cpu_node_size(void)
+{
+	static unsigned int size;
+	struct device_node *dn;
+	struct property *pp;
+
+	/*
+	 * Don't compute it twice, we are assuming that the per CPU node size
+	 * doesn't change during the system's life.
+	 */
+	if (size)
+		return size;
+
+	dn = of_find_node_by_type(NULL, "cpu");
+	if (WARN_ON_ONCE(!dn)) {
+		// Unlikely to happen
+		return 0;
+	}
+
+	/*
+	 * We compute the sub node size for a CPU node, assuming it
+	 * will be the same for all.
+	 */
+	size += strlen(dn->name) + 5;
+	for_each_property_of_node(dn, pp) {
+		size += strlen(pp->name);
+		size += pp->length;
+	}
+
+	of_node_put(dn);
+	return size;
+}
+
+/**
+ * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to
+ *                              setup FDT for kexec/kdump kernel.
+ * @image:                      kexec image being loaded.
+ *
+ * Returns the estimated extra size needed for kexec/kdump kernel FDT.
+ */
+unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
+{
+	unsigned int cpu_nodes, extra_size = 0;
+	struct device_node *dn;
+	u64 usm_entries;
+
+	// Budget some space for the password blob. There's already extra space
+	// for the key name
+	if (plpks_is_available())
+		extra_size += (unsigned int)plpks_get_passwordlen();
+
+	if (image->type != KEXEC_TYPE_CRASH)
+		return extra_size;
+
+	/*
+	 * For kdump kernel, account for linux,usable-memory and
+	 * linux,drconf-usable-memory properties. Get an approximate on the
+	 * number of usable memory entries and use for FDT size estimation.
+	 */
+	if (drmem_lmb_size()) {
+		usm_entries = ((memory_hotplug_max() / drmem_lmb_size()) +
+			       (2 * (resource_size(&crashk_res) / drmem_lmb_size())));
+		extra_size += (unsigned int)(usm_entries * sizeof(u64));
+	}
+
+	/*
+	 * Get the number of CPU nodes in the current DT. This allows to
+	 * reserve places for CPU nodes added since the boot time.
+	 */
+	cpu_nodes = 0;
+	for_each_node_by_type(dn, "cpu") {
+		cpu_nodes++;
+	}
+
+	if (cpu_nodes > boot_cpu_node_count)
+		extra_size += (cpu_nodes - boot_cpu_node_count) * cpu_node_size();
+
+	return extra_size;
+}
+
+/**
+ * add_node_props - Reads node properties from device node structure and add
+ *                  them to fdt.
+ * @fdt:            Flattened device tree of the kernel
+ * @node_offset:    offset of the node to add a property at
+ * @dn:             device node pointer
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_node_props(void *fdt, int node_offset, const struct device_node *dn)
+{
+	int ret = 0;
+	struct property *pp;
+
+	if (!dn)
+		return -EINVAL;
+
+	for_each_property_of_node(dn, pp) {
+		ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length);
+		if (ret < 0) {
+			pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret));
+			return ret;
+		}
+	}
+	return ret;
+}
+
+/**
+ * update_cpus_node - Update cpus node of flattened device tree using of_root
+ *                    device node.
+ * @fdt:              Flattened device tree of the kernel.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int update_cpus_node(void *fdt)
+{
+	struct device_node *cpus_node, *dn;
+	int cpus_offset, cpus_subnode_offset, ret = 0;
+
+	cpus_offset = fdt_path_offset(fdt, "/cpus");
+	if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) {
+		pr_err("Malformed device tree: error reading /cpus node: %s\n",
+		       fdt_strerror(cpus_offset));
+		return cpus_offset;
+	}
+
+	if (cpus_offset > 0) {
+		ret = fdt_del_node(fdt, cpus_offset);
+		if (ret < 0) {
+			pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret));
+			return -EINVAL;
+		}
+	}
+
+	/* Add cpus node to fdt */
+	cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus");
+	if (cpus_offset < 0) {
+		pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset));
+		return -EINVAL;
+	}
+
+	/* Add cpus node properties */
+	cpus_node = of_find_node_by_path("/cpus");
+	ret = add_node_props(fdt, cpus_offset, cpus_node);
+	of_node_put(cpus_node);
+	if (ret < 0)
+		return ret;
+
+	/* Loop through all subnodes of cpus and add them to fdt */
+	for_each_node_by_type(dn, "cpu") {
+		cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name);
+		if (cpus_subnode_offset < 0) {
+			pr_err("Unable to add %s subnode: %s\n", dn->full_name,
+			       fdt_strerror(cpus_subnode_offset));
+			ret = cpus_subnode_offset;
+			goto out;
+		}
+
+		ret = add_node_props(fdt, cpus_subnode_offset, dn);
+		if (ret < 0)
+			goto out;
+	}
+out:
+	of_node_put(dn);
+	return ret;
+}
+
+static int copy_property(void *fdt, int node_offset, const struct device_node *dn,
+			 const char *propname)
+{
+	const void *prop, *fdtprop;
+	int len = 0, fdtlen = 0;
+
+	prop = of_get_property(dn, propname, &len);
+	fdtprop = fdt_getprop(fdt, node_offset, propname, &fdtlen);
+
+	if (fdtprop && !prop)
+		return fdt_delprop(fdt, node_offset, propname);
+	else if (prop)
+		return fdt_setprop(fdt, node_offset, propname, prop, len);
+	else
+		return -FDT_ERR_NOTFOUND;
+}
+
+static int update_pci_dma_nodes(void *fdt, const char *dmapropname)
+{
+	struct device_node *dn;
+	int pci_offset, root_offset, ret = 0;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		return 0;
+
+	root_offset = fdt_path_offset(fdt, "/");
+	for_each_node_with_property(dn, dmapropname) {
+		pci_offset = fdt_subnode_offset(fdt, root_offset, of_node_full_name(dn));
+		if (pci_offset < 0)
+			continue;
+
+		ret = copy_property(fdt, pci_offset, dn, "ibm,dma-window");
+		if (ret < 0)
+			break;
+		ret = copy_property(fdt, pci_offset, dn, dmapropname);
+		if (ret < 0)
+			break;
+	}
+
+	return ret;
+}
+
+/**
+ * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel
+ *                       being loaded.
+ * @image:               kexec image being loaded.
+ * @fdt:                 Flattened device tree for the next kernel.
+ * @initrd_load_addr:    Address where the next initrd will be loaded.
+ * @initrd_len:          Size of the next initrd, or 0 if there will be none.
+ * @cmdline:             Command line for the next kernel, or NULL if there will
+ *                       be none.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
+			unsigned long initrd_load_addr,
+			unsigned long initrd_len, const char *cmdline)
+{
+	struct crash_mem *umem = NULL, *rmem = NULL;
+	int i, nr_ranges, ret;
+
+	/*
+	 * Restrict memory usage for kdump kernel by setting up
+	 * usable memory ranges and memory reserve map.
+	 */
+	if (image->type == KEXEC_TYPE_CRASH) {
+		ret = get_usable_memory_ranges(&umem);
+		if (ret)
+			goto out;
+
+		ret = update_usable_mem_fdt(fdt, umem);
+		if (ret) {
+			pr_err("Error setting up usable-memory property for kdump kernel\n");
+			goto out;
+		}
+
+		/*
+		 * Ensure we don't touch crashed kernel's memory except the
+		 * first 64K of RAM, which will be backed up.
+		 */
+		ret = fdt_add_mem_rsv(fdt, BACKUP_SRC_END + 1,
+				      crashk_res.start - BACKUP_SRC_SIZE);
+		if (ret) {
+			pr_err("Error reserving crash memory: %s\n",
+			       fdt_strerror(ret));
+			goto out;
+		}
+
+		/* Ensure backup region is not used by kdump/capture kernel */
+		ret = fdt_add_mem_rsv(fdt, image->arch.backup_start,
+				      BACKUP_SRC_SIZE);
+		if (ret) {
+			pr_err("Error reserving memory for backup: %s\n",
+			       fdt_strerror(ret));
+			goto out;
+		}
+	}
+
+	/* Update cpus nodes information to account hotplug CPUs. */
+	ret =  update_cpus_node(fdt);
+	if (ret < 0)
+		goto out;
+
+	ret = update_pci_dma_nodes(fdt, DIRECT64_PROPNAME);
+	if (ret < 0)
+		goto out;
+
+	ret = update_pci_dma_nodes(fdt, DMA64_PROPNAME);
+	if (ret < 0)
+		goto out;
+
+	/* Update memory reserve map */
+	ret = get_reserved_memory_ranges(&rmem);
+	if (ret)
+		goto out;
+
+	nr_ranges = rmem ? rmem->nr_ranges : 0;
+	for (i = 0; i < nr_ranges; i++) {
+		u64 base, size;
+
+		base = rmem->ranges[i].start;
+		size = rmem->ranges[i].end - base + 1;
+		ret = fdt_add_mem_rsv(fdt, base, size);
+		if (ret) {
+			pr_err("Error updating memory reserve map: %s\n",
+			       fdt_strerror(ret));
+			goto out;
+		}
+	}
+
+	// If we have PLPKS active, we need to provide the password to the new kernel
+	if (plpks_is_available())
+		ret = plpks_populate_fdt(fdt);
+
+out:
+	kfree(rmem);
+	kfree(umem);
+	return ret;
+}
+
+/**
+ * arch_kexec_locate_mem_hole - Skip special memory regions like rtas, opal,
+ *                              tce-table, reserved-ranges & such (exclude
+ *                              memory ranges) as they can't be used for kexec
+ *                              segment buffer. Sets kbuf->mem when a suitable
+ *                              memory hole is found.
+ * @kbuf:                       Buffer contents and memory parameters.
+ *
+ * Assumes minimum of PAGE_SIZE alignment for kbuf->memsz & kbuf->buf_align.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
+{
+	struct crash_mem **emem;
+	u64 buf_min, buf_max;
+	int ret;
+
+	/* Look up the exclude ranges list while locating the memory hole */
+	emem = &(kbuf->image->arch.exclude_ranges);
+	if (!(*emem) || ((*emem)->nr_ranges == 0)) {
+		pr_warn("No exclude range list. Using the default locate mem hole method\n");
+		return kexec_locate_mem_hole(kbuf);
+	}
+
+	buf_min = kbuf->buf_min;
+	buf_max = kbuf->buf_max;
+	/* Segments for kdump kernel should be within crashkernel region */
+	if (kbuf->image->type == KEXEC_TYPE_CRASH) {
+		buf_min = (buf_min < crashk_res.start ?
+			   crashk_res.start : buf_min);
+		buf_max = (buf_max > crashk_res.end ?
+			   crashk_res.end : buf_max);
+	}
+
+	if (buf_min > buf_max) {
+		pr_err("Invalid buffer min and/or max values\n");
+		return -EINVAL;
+	}
+
+	if (kbuf->top_down)
+		ret = locate_mem_hole_top_down_ppc64(kbuf, buf_min, buf_max,
+						     *emem);
+	else
+		ret = locate_mem_hole_bottom_up_ppc64(kbuf, buf_min, buf_max,
+						      *emem);
+
+	/* Add the buffer allocated to the exclude list for the next lookup */
+	if (!ret) {
+		add_mem_range(emem, kbuf->mem, kbuf->memsz);
+		sort_memory_ranges(*emem, true);
+	} else {
+		pr_err("Failed to locate memory buffer of size %lu\n",
+		       kbuf->memsz);
+	}
+	return ret;
+}
+
+/**
+ * arch_kexec_kernel_image_probe - Does additional handling needed to setup
+ *                                 kexec segments.
+ * @image:                         kexec image being loaded.
+ * @buf:                           Buffer pointing to elf data.
+ * @buf_len:                       Length of the buffer.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+				  unsigned long buf_len)
+{
+	int ret;
+
+	/* Get exclude memory ranges needed for setting up kexec segments */
+	ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges));
+	if (ret) {
+		pr_err("Failed to setup exclude memory ranges for buffer lookup\n");
+		return ret;
+	}
+
+	return kexec_image_probe_default(image, buf, buf_len);
+}
+
+/**
+ * arch_kimage_file_post_load_cleanup - Frees up all the allocations done
+ *                                      while loading the image.
+ * @image:                              kexec image being loaded.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+	kfree(image->arch.exclude_ranges);
+	image->arch.exclude_ranges = NULL;
+
+	vfree(image->arch.backup_buf);
+	image->arch.backup_buf = NULL;
+
+	vfree(image->elf_headers);
+	image->elf_headers = NULL;
+	image->elf_headers_sz = 0;
+
+	kvfree(image->arch.fdt);
+	image->arch.fdt = NULL;
+
+	return kexec_image_post_load_cleanup_default(image);
+}
diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c
new file mode 100644
index 0000000000..fb3e12f152
--- /dev/null
+++ b/arch/powerpc/kexec/ranges.c
@@ -0,0 +1,412 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * powerpc code to implement the kexec_file_load syscall
+ *
+ * Copyright (C) 2004  Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004  IBM Corp.
+ * Copyright (C) 2004,2005  Milton D Miller II, IBM Corporation
+ * Copyright (C) 2005  R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006  Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2020  IBM Corporation
+ *
+ * Based on kexec-tools' kexec-ppc64.c, fs2dt.c.
+ * Heavily modified for the kernel by
+ * Hari Bathini, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "kexec ranges: " fmt
+
+#include <linux/sort.h>
+#include <linux/kexec.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <asm/sections.h>
+#include <asm/kexec_ranges.h>
+
+/**
+ * get_max_nr_ranges - Get the max no. of ranges crash_mem structure
+ *                     could hold, given the size allocated for it.
+ * @size:              Allocation size of crash_mem structure.
+ *
+ * Returns the maximum no. of ranges.
+ */
+static inline unsigned int get_max_nr_ranges(size_t size)
+{
+	return ((size - sizeof(struct crash_mem)) /
+		sizeof(struct range));
+}
+
+/**
+ * get_mem_rngs_size - Get the allocated size of mem_rngs based on
+ *                     max_nr_ranges and chunk size.
+ * @mem_rngs:          Memory ranges.
+ *
+ * Returns the maximum size of @mem_rngs.
+ */
+static inline size_t get_mem_rngs_size(struct crash_mem *mem_rngs)
+{
+	size_t size;
+
+	if (!mem_rngs)
+		return 0;
+
+	size = (sizeof(struct crash_mem) +
+		(mem_rngs->max_nr_ranges * sizeof(struct range)));
+
+	/*
+	 * Memory is allocated in size multiple of MEM_RANGE_CHUNK_SZ.
+	 * So, align to get the actual length.
+	 */
+	return ALIGN(size, MEM_RANGE_CHUNK_SZ);
+}
+
+/**
+ * __add_mem_range - add a memory range to memory ranges list.
+ * @mem_ranges:      Range list to add the memory range to.
+ * @base:            Base address of the range to add.
+ * @size:            Size of the memory range to add.
+ *
+ * (Re)allocates memory, if needed.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int __add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
+{
+	struct crash_mem *mem_rngs = *mem_ranges;
+
+	if (!mem_rngs || (mem_rngs->nr_ranges == mem_rngs->max_nr_ranges)) {
+		mem_rngs = realloc_mem_ranges(mem_ranges);
+		if (!mem_rngs)
+			return -ENOMEM;
+	}
+
+	mem_rngs->ranges[mem_rngs->nr_ranges].start = base;
+	mem_rngs->ranges[mem_rngs->nr_ranges].end = base + size - 1;
+	pr_debug("Added memory range [%#016llx - %#016llx] at index %d\n",
+		 base, base + size - 1, mem_rngs->nr_ranges);
+	mem_rngs->nr_ranges++;
+	return 0;
+}
+
+/**
+ * __merge_memory_ranges - Merges the given memory ranges list.
+ * @mem_rngs:              Range list to merge.
+ *
+ * Assumes a sorted range list.
+ *
+ * Returns nothing.
+ */
+static void __merge_memory_ranges(struct crash_mem *mem_rngs)
+{
+	struct range *ranges;
+	int i, idx;
+
+	if (!mem_rngs)
+		return;
+
+	idx = 0;
+	ranges = &(mem_rngs->ranges[0]);
+	for (i = 1; i < mem_rngs->nr_ranges; i++) {
+		if (ranges[i].start <= (ranges[i-1].end + 1))
+			ranges[idx].end = ranges[i].end;
+		else {
+			idx++;
+			if (i == idx)
+				continue;
+
+			ranges[idx] = ranges[i];
+		}
+	}
+	mem_rngs->nr_ranges = idx + 1;
+}
+
+/* cmp_func_t callback to sort ranges with sort() */
+static int rngcmp(const void *_x, const void *_y)
+{
+	const struct range *x = _x, *y = _y;
+
+	if (x->start > y->start)
+		return 1;
+	if (x->start < y->start)
+		return -1;
+	return 0;
+}
+
+/**
+ * sort_memory_ranges - Sorts the given memory ranges list.
+ * @mem_rngs:           Range list to sort.
+ * @merge:              If true, merge the list after sorting.
+ *
+ * Returns nothing.
+ */
+void sort_memory_ranges(struct crash_mem *mem_rngs, bool merge)
+{
+	int i;
+
+	if (!mem_rngs)
+		return;
+
+	/* Sort the ranges in-place */
+	sort(&(mem_rngs->ranges[0]), mem_rngs->nr_ranges,
+	     sizeof(mem_rngs->ranges[0]), rngcmp, NULL);
+
+	if (merge)
+		__merge_memory_ranges(mem_rngs);
+
+	/* For debugging purpose */
+	pr_debug("Memory ranges:\n");
+	for (i = 0; i < mem_rngs->nr_ranges; i++) {
+		pr_debug("\t[%03d][%#016llx - %#016llx]\n", i,
+			 mem_rngs->ranges[i].start,
+			 mem_rngs->ranges[i].end);
+	}
+}
+
+/**
+ * realloc_mem_ranges - reallocate mem_ranges with size incremented
+ *                      by MEM_RANGE_CHUNK_SZ. Frees up the old memory,
+ *                      if memory allocation fails.
+ * @mem_ranges:         Memory ranges to reallocate.
+ *
+ * Returns pointer to reallocated memory on success, NULL otherwise.
+ */
+struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges)
+{
+	struct crash_mem *mem_rngs = *mem_ranges;
+	unsigned int nr_ranges;
+	size_t size;
+
+	size = get_mem_rngs_size(mem_rngs);
+	nr_ranges = mem_rngs ? mem_rngs->nr_ranges : 0;
+
+	size += MEM_RANGE_CHUNK_SZ;
+	mem_rngs = krealloc(*mem_ranges, size, GFP_KERNEL);
+	if (!mem_rngs) {
+		kfree(*mem_ranges);
+		*mem_ranges = NULL;
+		return NULL;
+	}
+
+	mem_rngs->nr_ranges = nr_ranges;
+	mem_rngs->max_nr_ranges = get_max_nr_ranges(size);
+	*mem_ranges = mem_rngs;
+
+	return mem_rngs;
+}
+
+/**
+ * add_mem_range - Updates existing memory range, if there is an overlap.
+ *                 Else, adds a new memory range.
+ * @mem_ranges:    Range list to add the memory range to.
+ * @base:          Base address of the range to add.
+ * @size:          Size of the memory range to add.
+ *
+ * (Re)allocates memory, if needed.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
+{
+	struct crash_mem *mem_rngs = *mem_ranges;
+	u64 mstart, mend, end;
+	unsigned int i;
+
+	if (!size)
+		return 0;
+
+	end = base + size - 1;
+
+	if (!mem_rngs || !(mem_rngs->nr_ranges))
+		return __add_mem_range(mem_ranges, base, size);
+
+	for (i = 0; i < mem_rngs->nr_ranges; i++) {
+		mstart = mem_rngs->ranges[i].start;
+		mend = mem_rngs->ranges[i].end;
+		if (base < mend && end > mstart) {
+			if (base < mstart)
+				mem_rngs->ranges[i].start = base;
+			if (end > mend)
+				mem_rngs->ranges[i].end = end;
+			return 0;
+		}
+	}
+
+	return __add_mem_range(mem_ranges, base, size);
+}
+
+/**
+ * add_tce_mem_ranges - Adds tce-table range to the given memory ranges list.
+ * @mem_ranges:         Range list to add the memory range(s) to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int add_tce_mem_ranges(struct crash_mem **mem_ranges)
+{
+	struct device_node *dn = NULL;
+	int ret = 0;
+
+	for_each_node_by_type(dn, "pci") {
+		u64 base;
+		u32 size;
+
+		ret = of_property_read_u64(dn, "linux,tce-base", &base);
+		ret |= of_property_read_u32(dn, "linux,tce-size", &size);
+		if (ret) {
+			/*
+			 * It is ok to have pci nodes without tce. So, ignore
+			 * property does not exist error.
+			 */
+			if (ret == -EINVAL) {
+				ret = 0;
+				continue;
+			}
+			break;
+		}
+
+		ret = add_mem_range(mem_ranges, base, size);
+		if (ret)
+			break;
+	}
+
+	of_node_put(dn);
+	return ret;
+}
+
+/**
+ * add_initrd_mem_range - Adds initrd range to the given memory ranges list,
+ *                        if the initrd was retained.
+ * @mem_ranges:           Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int add_initrd_mem_range(struct crash_mem **mem_ranges)
+{
+	u64 base, end;
+	int ret;
+
+	/* This range means something, only if initrd was retained */
+	if (!strstr(saved_command_line, "retain_initrd"))
+		return 0;
+
+	ret = of_property_read_u64(of_chosen, "linux,initrd-start", &base);
+	ret |= of_property_read_u64(of_chosen, "linux,initrd-end", &end);
+	if (!ret)
+		ret = add_mem_range(mem_ranges, base, end - base + 1);
+
+	return ret;
+}
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+/**
+ * add_htab_mem_range - Adds htab range to the given memory ranges list,
+ *                      if it exists
+ * @mem_ranges:         Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int add_htab_mem_range(struct crash_mem **mem_ranges)
+{
+	if (!htab_address)
+		return 0;
+
+	return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes);
+}
+#endif
+
+/**
+ * add_kernel_mem_range - Adds kernel text region to the given
+ *                        memory ranges list.
+ * @mem_ranges:           Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int add_kernel_mem_range(struct crash_mem **mem_ranges)
+{
+	return add_mem_range(mem_ranges, 0, __pa(_end));
+}
+
+/**
+ * add_rtas_mem_range - Adds RTAS region to the given memory ranges list.
+ * @mem_ranges:         Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int add_rtas_mem_range(struct crash_mem **mem_ranges)
+{
+	struct device_node *dn;
+	u32 base, size;
+	int ret = 0;
+
+	dn = of_find_node_by_path("/rtas");
+	if (!dn)
+		return 0;
+
+	ret = of_property_read_u32(dn, "linux,rtas-base", &base);
+	ret |= of_property_read_u32(dn, "rtas-size", &size);
+	if (!ret)
+		ret = add_mem_range(mem_ranges, base, size);
+
+	of_node_put(dn);
+	return ret;
+}
+
+/**
+ * add_opal_mem_range - Adds OPAL region to the given memory ranges list.
+ * @mem_ranges:         Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int add_opal_mem_range(struct crash_mem **mem_ranges)
+{
+	struct device_node *dn;
+	u64 base, size;
+	int ret;
+
+	dn = of_find_node_by_path("/ibm,opal");
+	if (!dn)
+		return 0;
+
+	ret = of_property_read_u64(dn, "opal-base-address", &base);
+	ret |= of_property_read_u64(dn, "opal-runtime-size", &size);
+	if (!ret)
+		ret = add_mem_range(mem_ranges, base, size);
+
+	of_node_put(dn);
+	return ret;
+}
+
+/**
+ * add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w
+ *                           to the given memory ranges list.
+ * @mem_ranges:              Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int add_reserved_mem_ranges(struct crash_mem **mem_ranges)
+{
+	int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0;
+	const __be32 *prop;
+
+	prop = of_get_property(of_root, "reserved-ranges", &len);
+	if (!prop)
+		return 0;
+
+	n_mem_addr_cells = of_n_addr_cells(of_root);
+	n_mem_size_cells = of_n_size_cells(of_root);
+	cells = n_mem_addr_cells + n_mem_size_cells;
+
+	/* Each reserved range is an (address,size) pair */
+	for (i = 0; i < (len / (sizeof(u32) * cells)); i++) {
+		u64 base, size;
+
+		base = of_read_number(prop + (i * cells), n_mem_addr_cells);
+		size = of_read_number(prop + (i * cells) + n_mem_addr_cells,
+				      n_mem_size_cells);
+
+		ret = add_mem_range(mem_ranges, base, size);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
diff --git a/arch/powerpc/kexec/relocate_32.S b/arch/powerpc/kexec/relocate_32.S
new file mode 100644
index 0000000000..d9f0dd9b34
--- /dev/null
+++ b/arch/powerpc/kexec/relocate_32.S
@@ -0,0 +1,500 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains kexec low-level functions.
+ *
+ * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
+ * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ * PPC44x port. Copyright (C) 2011,  IBM Corporation
+ * 		Author: Suzuki Poulose <suzuki@in.ibm.com>
+ */
+
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/kexec.h>
+
+	.text
+
+	/*
+	 * Must be relocatable PIC code callable as a C function.
+	 */
+	.globl relocate_new_kernel
+relocate_new_kernel:
+	/* r3 = page_list   */
+	/* r4 = reboot_code_buffer */
+	/* r5 = start_address      */
+
+#ifdef CONFIG_PPC_85xx
+
+	mr	r29, r3
+	mr	r30, r4
+	mr	r31, r5
+
+#define ENTRY_MAPPING_KEXEC_SETUP
+#include <kernel/85xx_entry_mapping.S>
+#undef ENTRY_MAPPING_KEXEC_SETUP
+
+	mr      r3, r29
+	mr      r4, r30
+	mr      r5, r31
+
+	li	r0, 0
+#elif defined(CONFIG_44x)
+
+	/* Save our parameters */
+	mr	r29, r3
+	mr	r30, r4
+	mr	r31, r5
+
+#ifdef CONFIG_PPC_47x
+	/* Check for 47x cores */
+	mfspr	r3,SPRN_PVR
+	srwi	r3,r3,16
+	cmplwi	cr0,r3,PVR_476FPE@h
+	beq	setup_map_47x
+	cmplwi	cr0,r3,PVR_476@h
+	beq	setup_map_47x
+	cmplwi	cr0,r3,PVR_476_ISS@h
+	beq	setup_map_47x
+#endif /* CONFIG_PPC_47x */
+
+/*
+ * Code for setting up 1:1 mapping for PPC440x for KEXEC
+ *
+ * We cannot switch off the MMU on PPC44x.
+ * So we:
+ * 1) Invalidate all the mappings except the one we are running from.
+ * 2) Create a tmp mapping for our code in the other address space(TS) and
+ *    jump to it. Invalidate the entry we started in.
+ * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS.
+ * 4) Jump to the 1:1 mapping in original TS.
+ * 5) Invalidate the tmp mapping.
+ *
+ * - Based on the kexec support code for FSL BookE
+ *
+ */
+
+	/*
+	 * Load the PID with kernel PID (0).
+	 * Also load our MSR_IS and TID to MMUCR for TLB search.
+	 */
+	li	r3, 0
+	mtspr	SPRN_PID, r3
+	mfmsr	r4
+	andi.	r4,r4,MSR_IS@l
+	beq	wmmucr
+	oris	r3,r3,PPC44x_MMUCR_STS@h
+wmmucr:
+	mtspr	SPRN_MMUCR,r3
+	sync
+
+	/*
+	 * Invalidate all the TLB entries except the current entry
+	 * where we are running from
+	 */
+	bcl	20,31,$+4			/* Find our address */
+0:	mflr	r5				/* Make it accessible */
+	tlbsx	r23,0,r5			/* Find entry we are in */
+	li	r4,0				/* Start at TLB entry 0 */
+	li	r3,0				/* Set PAGEID inval value */
+1:	cmpw	r23,r4				/* Is this our entry? */
+	beq	skip				/* If so, skip the inval */
+	tlbwe	r3,r4,PPC44x_TLB_PAGEID		/* If not, inval the entry */
+skip:
+	addi	r4,r4,1				/* Increment */
+	cmpwi	r4,64				/* Are we done?	*/
+	bne	1b				/* If not, repeat */
+	isync
+
+	/* Create a temp mapping and jump to it */
+	andi.	r6, r23, 1		/* Find the index to use */
+	addi	r24, r6, 1		/* r24 will contain 1 or 2 */
+
+	mfmsr	r9			/* get the MSR */
+	rlwinm	r5, r9, 27, 31, 31	/* Extract the MSR[IS] */
+	xori	r7, r5, 1		/* Use the other address space */
+
+	/* Read the current mapping entries */
+	tlbre	r3, r23, PPC44x_TLB_PAGEID
+	tlbre	r4, r23, PPC44x_TLB_XLAT
+	tlbre	r5, r23, PPC44x_TLB_ATTRIB
+
+	/* Save our current XLAT entry */
+	mr	r25, r4
+
+	/* Extract the TLB PageSize */
+	li	r10, 1 			/* r10 will hold PageSize */
+	rlwinm	r11, r3, 0, 24, 27	/* bits 24-27 */
+
+	/* XXX: As of now we use 256M, 4K pages */
+	cmpwi	r11, PPC44x_TLB_256M
+	bne	tlb_4k
+	rotlwi	r10, r10, 28		/* r10 = 256M */
+	b	write_out
+tlb_4k:
+	cmpwi	r11, PPC44x_TLB_4K
+	bne	default
+	rotlwi	r10, r10, 12		/* r10 = 4K */
+	b	write_out
+default:
+	rotlwi	r10, r10, 10		/* r10 = 1K */
+
+write_out:
+	/*
+	 * Write out the tmp 1:1 mapping for this code in other address space
+	 * Fixup  EPN = RPN , TS=other address space
+	 */
+	insrwi	r3, r7, 1, 23		/* Bit 23 is TS for PAGEID field */
+
+	/* Write out the tmp mapping entries */
+	tlbwe	r3, r24, PPC44x_TLB_PAGEID
+	tlbwe	r4, r24, PPC44x_TLB_XLAT
+	tlbwe	r5, r24, PPC44x_TLB_ATTRIB
+
+	subi	r11, r10, 1		/* PageOffset Mask = PageSize - 1 */
+	not	r10, r11		/* Mask for PageNum */
+
+	/* Switch to other address space in MSR */
+	insrwi	r9, r7, 1, 26		/* Set MSR[IS] = r7 */
+
+	bcl	20,31,$+4
+1:	mflr	r8
+	addi	r8, r8, (2f-1b)		/* Find the target offset */
+
+	/* Jump to the tmp mapping */
+	mtspr	SPRN_SRR0, r8
+	mtspr	SPRN_SRR1, r9
+	rfi
+
+2:
+	/* Invalidate the entry we were executing from */
+	li	r3, 0
+	tlbwe	r3, r23, PPC44x_TLB_PAGEID
+
+	/* attribute fields. rwx for SUPERVISOR mode */
+	li	r5, 0
+	ori	r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
+
+	/* Create 1:1 mapping in 256M pages */
+	xori	r7, r7, 1			/* Revert back to Original TS */
+
+	li	r8, 0				/* PageNumber */
+	li	r6, 3				/* TLB Index, start at 3  */
+
+next_tlb:
+	rotlwi	r3, r8, 28			/* Create EPN (bits 0-3) */
+	mr	r4, r3				/* RPN = EPN  */
+	ori	r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */
+	insrwi	r3, r7, 1, 23			/* Set TS from r7 */
+
+	tlbwe	r3, r6, PPC44x_TLB_PAGEID	/* PageID field : EPN, V, SIZE */
+	tlbwe	r4, r6, PPC44x_TLB_XLAT		/* Address translation : RPN   */
+	tlbwe	r5, r6, PPC44x_TLB_ATTRIB	/* Attributes */
+
+	addi	r8, r8, 1			/* Increment PN */
+	addi	r6, r6, 1			/* Increment TLB Index */
+	cmpwi	r8, 8				/* Are we done ? */
+	bne	next_tlb
+	isync
+
+	/* Jump to the new mapping 1:1 */
+	li	r9,0
+	insrwi	r9, r7, 1, 26			/* Set MSR[IS] = r7 */
+
+	bcl	20,31,$+4
+1:	mflr	r8
+	and	r8, r8, r11			/* Get our offset within page */
+	addi	r8, r8, (2f-1b)
+
+	and	r5, r25, r10			/* Get our target PageNum */
+	or	r8, r8, r5			/* Target jump address */
+
+	mtspr	SPRN_SRR0, r8
+	mtspr	SPRN_SRR1, r9
+	rfi
+2:
+	/* Invalidate the tmp entry we used */
+	li	r3, 0
+	tlbwe	r3, r24, PPC44x_TLB_PAGEID
+	sync
+	b	ppc44x_map_done
+
+#ifdef CONFIG_PPC_47x
+
+	/* 1:1 mapping for 47x */
+
+setup_map_47x:
+
+	/*
+	 * Load the kernel pid (0) to PID and also to MMUCR[TID].
+	 * Also set the MSR IS->MMUCR STS
+	 */
+	li	r3, 0
+	mtspr	SPRN_PID, r3			/* Set PID */
+	mfmsr	r4				/* Get MSR */
+	andi.	r4, r4, MSR_IS@l		/* TS=1? */
+	beq	1f				/* If not, leave STS=0 */
+	oris	r3, r3, PPC47x_MMUCR_STS@h	/* Set STS=1 */
+1:	mtspr	SPRN_MMUCR, r3			/* Put MMUCR */
+	sync
+
+	/* Find the entry we are running from */
+	bcl	20,31,$+4
+2:	mflr	r23
+	tlbsx	r23, 0, r23
+	tlbre	r24, r23, 0			/* TLB Word 0 */
+	tlbre	r25, r23, 1			/* TLB Word 1 */
+	tlbre	r26, r23, 2			/* TLB Word 2 */
+
+
+	/*
+	 * Invalidates all the tlb entries by writing to 256 RPNs(r4)
+	 * of 4k page size in all  4 ways (0-3 in r3).
+	 * This would invalidate the entire UTLB including the one we are
+	 * running from. However the shadow TLB entries would help us
+	 * to continue the execution, until we flush them (rfi/isync).
+	 */
+	addis	r3, 0, 0x8000			/* specify the way */
+	addi	r4, 0, 0			/* TLB Word0 = (EPN=0, VALID = 0) */
+	addi	r5, 0, 0
+	b	clear_utlb_entry
+
+	/* Align the loop to speed things up. from head_44x.S */
+	.align	6
+
+clear_utlb_entry:
+
+	tlbwe	r4, r3, 0
+	tlbwe	r5, r3, 1
+	tlbwe	r5, r3, 2
+	addis	r3, r3, 0x2000			/* Increment the way */
+	cmpwi	r3, 0
+	bne	clear_utlb_entry
+	addis	r3, 0, 0x8000
+	addis	r4, r4, 0x100			/* Increment the EPN */
+	cmpwi	r4, 0
+	bne	clear_utlb_entry
+
+	/* Create the entries in the other address space */
+	mfmsr	r5
+	rlwinm	r7, r5, 27, 31, 31		/* Get the TS (Bit 26) from MSR */
+	xori	r7, r7, 1			/* r7 = !TS */
+
+	insrwi	r24, r7, 1, 21			/* Change the TS in the saved TLB word 0 */
+
+	/*
+	 * write out the TLB entries for the tmp mapping
+	 * Use way '0' so that we could easily invalidate it later.
+	 */
+	lis	r3, 0x8000			/* Way '0' */
+
+	tlbwe	r24, r3, 0
+	tlbwe	r25, r3, 1
+	tlbwe	r26, r3, 2
+
+	/* Update the msr to the new TS */
+	insrwi	r5, r7, 1, 26
+
+	bcl	20,31,$+4
+1:	mflr	r6
+	addi	r6, r6, (2f-1b)
+
+	mtspr	SPRN_SRR0, r6
+	mtspr	SPRN_SRR1, r5
+	rfi
+
+	/*
+	 * Now we are in the tmp address space.
+	 * Create a 1:1 mapping for 0-2GiB in the original TS.
+	 */
+2:
+	li	r3, 0
+	li	r4, 0				/* TLB Word 0 */
+	li	r5, 0				/* TLB Word 1 */
+	li	r6, 0
+	ori	r6, r6, PPC47x_TLB2_S_RWX	/* TLB word 2 */
+
+	li	r8, 0				/* PageIndex */
+
+	xori	r7, r7, 1			/* revert back to original TS */
+
+write_utlb:
+	rotlwi	r5, r8, 28			/* RPN = PageIndex * 256M */
+						/* ERPN = 0 as we don't use memory above 2G */
+
+	mr	r4, r5				/* EPN = RPN */
+	ori	r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M)
+	insrwi	r4, r7, 1, 21			/* Insert the TS to Word 0 */
+
+	tlbwe	r4, r3, 0			/* Write out the entries */
+	tlbwe	r5, r3, 1
+	tlbwe	r6, r3, 2
+	addi	r8, r8, 1
+	cmpwi	r8, 8				/* Have we completed ? */
+	bne	write_utlb
+
+	/* make sure we complete the TLB write up */
+	isync
+
+	/*
+	 * Prepare to jump to the 1:1 mapping.
+	 * 1) Extract page size of the tmp mapping
+	 *    DSIZ = TLB_Word0[22:27]
+	 * 2) Calculate the physical address of the address
+	 *    to jump to.
+	 */
+	rlwinm	r10, r24, 0, 22, 27
+
+	cmpwi	r10, PPC47x_TLB0_4K
+	bne	0f
+	li	r10, 0x1000			/* r10 = 4k */
+	bl	1f
+
+0:
+	/* Defaults to 256M */
+	lis	r10, 0x1000
+
+	bcl	20,31,$+4
+1:	mflr	r4
+	addi	r4, r4, (2f-1b)			/* virtual address  of 2f */
+
+	subi	r11, r10, 1			/* offsetmask = Pagesize - 1 */
+	not	r10, r11			/* Pagemask = ~(offsetmask) */
+
+	and	r5, r25, r10			/* Physical page */
+	and	r6, r4, r11			/* offset within the current page */
+
+	or	r5, r5, r6			/* Physical address for 2f */
+
+	/* Switch the TS in MSR to the original one */
+	mfmsr	r8
+	insrwi	r8, r7, 1, 26
+
+	mtspr	SPRN_SRR1, r8
+	mtspr	SPRN_SRR0, r5
+	rfi
+
+2:
+	/* Invalidate the tmp mapping */
+	lis	r3, 0x8000			/* Way '0' */
+
+	clrrwi	r24, r24, 12			/* Clear the valid bit */
+	tlbwe	r24, r3, 0
+	tlbwe	r25, r3, 1
+	tlbwe	r26, r3, 2
+
+	/* Make sure we complete the TLB write and flush the shadow TLB */
+	isync
+
+#endif
+
+ppc44x_map_done:
+
+
+	/* Restore the parameters */
+	mr	r3, r29
+	mr	r4, r30
+	mr	r5, r31
+
+	li	r0, 0
+#else
+	li	r0, 0
+
+	/*
+	 * Set Machine Status Register to a known status,
+	 * switch the MMU off and jump to 1: in a single step.
+	 */
+
+	mr	r8, r0
+	ori     r8, r8, MSR_RI|MSR_ME
+	mtspr	SPRN_SRR1, r8
+	addi	r8, r4, 1f - relocate_new_kernel
+	mtspr	SPRN_SRR0, r8
+	sync
+	rfi
+
+1:
+#endif
+	/* from this point address translation is turned off */
+	/* and interrupts are disabled */
+
+	/* set a new stack at the bottom of our page... */
+	/* (not really needed now) */
+	addi	r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */
+	stw	r0, 0(r1)
+
+	/* Do the copies */
+	li	r6, 0 /* checksum */
+	mr	r0, r3
+	b	1f
+
+0:	/* top, read another word for the indirection page */
+	lwzu	r0, 4(r3)
+
+1:
+	/* is it a destination page? (r8) */
+	rlwinm.	r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */
+	beq	2f
+
+	rlwinm	r8, r0, 0, 0, 19 /* clear kexec flags, page align */
+	b	0b
+
+2:	/* is it an indirection page? (r3) */
+	rlwinm.	r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */
+	beq	2f
+
+	rlwinm	r3, r0, 0, 0, 19 /* clear kexec flags, page align */
+	subi	r3, r3, 4
+	b	0b
+
+2:	/* are we done? */
+	rlwinm.	r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */
+	beq	2f
+	b	3f
+
+2:	/* is it a source page? (r9) */
+	rlwinm.	r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */
+	beq	0b
+
+	rlwinm	r9, r0, 0, 0, 19 /* clear kexec flags, page align */
+
+	li	r7, PAGE_SIZE / 4
+	mtctr   r7
+	subi    r9, r9, 4
+	subi    r8, r8, 4
+9:
+	lwzu    r0, 4(r9)  /* do the copy */
+	xor	r6, r6, r0
+	stwu    r0, 4(r8)
+	dcbst	0, r8
+	sync
+	icbi	0, r8
+	bdnz    9b
+
+	addi    r9, r9, 4
+	addi    r8, r8, 4
+	b	0b
+
+3:
+
+	/* To be certain of avoiding problems with self-modifying code
+	 * execute a serializing instruction here.
+	 */
+	isync
+	sync
+
+	mfspr	r3, SPRN_PIR /* current core we are running on */
+	mr	r4, r5 /* load physical address of chunk called */
+
+	/* jump to the entry point, usually the setup routine */
+	mtlr	r5
+	blrl
+
+1:	b	1b
+
+relocate_new_kernel_end:
+
+	.globl relocate_new_kernel_size
+relocate_new_kernel_size:
+	.long relocate_new_kernel_end - relocate_new_kernel
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
new file mode 100644
index 0000000000..9026119542
--- /dev/null
+++ b/arch/powerpc/kvm/Kconfig
@@ -0,0 +1,253 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# KVM configuration
+#
+
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+	bool "Virtualization"
+	help
+	  Say Y here to get to see options for using your Linux host to run
+	  other operating systems inside virtual machines (guests).
+	  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and
+	  disabled.
+
+if VIRTUALIZATION
+
+config KVM
+	bool
+	select PREEMPT_NOTIFIERS
+	select HAVE_KVM_EVENTFD
+	select HAVE_KVM_VCPU_ASYNC_IOCTL
+	select KVM_VFIO
+	select IRQ_BYPASS_MANAGER
+	select HAVE_KVM_IRQ_BYPASS
+	select INTERVAL_TREE
+
+config KVM_BOOK3S_HANDLER
+	bool
+
+config KVM_BOOK3S_32_HANDLER
+	bool
+	select KVM_BOOK3S_HANDLER
+	select KVM_MMIO
+
+config KVM_BOOK3S_64_HANDLER
+	bool
+	select KVM_BOOK3S_HANDLER
+
+config KVM_BOOK3S_PR_POSSIBLE
+	bool
+	select KVM_MMIO
+	select MMU_NOTIFIER
+
+config KVM_BOOK3S_HV_POSSIBLE
+	bool
+
+config KVM_BOOK3S_32
+	tristate "KVM support for PowerPC book3s_32 processors"
+	depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT
+	depends on !CONTEXT_TRACKING_USER
+	select KVM
+	select KVM_BOOK3S_32_HANDLER
+	select KVM_BOOK3S_PR_POSSIBLE
+	select PPC_FPU
+	help
+	  Support running unmodified book3s_32 guest kernels
+	  in virtual machines on book3s_32 host processors.
+
+	  This module provides access to the hardware capabilities through
+	  a character device node named /dev/kvm.
+
+	  If unsure, say N.
+
+config KVM_BOOK3S_64
+	tristate "KVM support for PowerPC book3s_64 processors"
+	depends on PPC_BOOK3S_64
+	select KVM_BOOK3S_64_HANDLER
+	select KVM
+	select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
+	select PPC_64S_HASH_MMU
+	select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_PSERIES || PPC_POWERNV)
+	help
+	  Support running unmodified book3s_64 and book3s_32 guest kernels
+	  in virtual machines on book3s_64 host processors.
+
+	  This module provides access to the hardware capabilities through
+	  a character device node named /dev/kvm.
+
+	  If unsure, say N.
+
+config KVM_BOOK3S_64_HV
+	tristate "KVM for POWER7 and later using hypervisor mode in host"
+	depends on KVM_BOOK3S_64 && PPC_POWERNV
+	select KVM_BOOK3S_HV_POSSIBLE
+	select MMU_NOTIFIER
+	select CMA
+	help
+	  Support running unmodified book3s_64 guest kernels in
+	  virtual machines on POWER7 and newer processors that have
+	  hypervisor mode available to the host.
+
+	  If you say Y here, KVM will use the hardware virtualization
+	  facilities of POWER7 (and later) processors, meaning that
+	  guest operating systems will run at full hardware speed
+	  using supervisor and user modes.  However, this also means
+	  that KVM is not usable under PowerVM (pHyp), is only usable
+	  on POWER7 or later processors, and cannot emulate a
+	  different processor from the host processor.
+
+	  If unsure, say N.
+
+config KVM_BOOK3S_64_PR
+	tristate "KVM support without using hypervisor mode in host"
+	depends on KVM_BOOK3S_64
+	depends on !CONTEXT_TRACKING_USER
+	select KVM_BOOK3S_PR_POSSIBLE
+	help
+	  Support running guest kernels in virtual machines on processors
+	  without using hypervisor mode in the host, by running the
+	  guest in user mode (problem state) and emulating all
+	  privileged instructions and registers.
+
+	  This is only available for hash MMU mode and only supports
+	  guests that use hash MMU mode.
+
+	  This is not as fast as using hypervisor mode, but works on
+	  machines where hypervisor mode is not available or not usable,
+	  and can emulate processors that are different from the host
+	  processor, including emulating 32-bit processors on a 64-bit
+	  host.
+
+	  Selecting this option will cause the SCV facility to be
+	  disabled when the kernel is booted on the pseries platform in
+	  hash MMU mode (regardless of PR VMs running). When any PR VMs
+	  are running, "AIL" mode is disabled which may slow interrupts
+	  and system calls on the host.
+
+config KVM_BOOK3S_HV_EXIT_TIMING
+	bool
+
+config KVM_BOOK3S_HV_P9_TIMING
+	bool "Detailed timing for the P9 entry point"
+	select KVM_BOOK3S_HV_EXIT_TIMING
+	depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS
+	help
+	  Calculate time taken for each vcpu during vcpu entry and
+	  exit, time spent inside the guest and time spent handling
+	  hypercalls and page faults. The total, minimum and maximum
+	  times in nanoseconds together with the number of executions
+	  are reported in debugfs in kvm/vm#/vcpu#/timings.
+
+	  If unsure, say N.
+
+config KVM_BOOK3S_HV_P8_TIMING
+	bool "Detailed timing for hypervisor real-mode code (for POWER8)"
+	select KVM_BOOK3S_HV_EXIT_TIMING
+	depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS && !KVM_BOOK3S_HV_P9_TIMING
+	help
+	  Calculate time taken for each vcpu in the real-mode guest entry,
+	  exit, and interrupt handling code, plus time spent in the guest
+	  and in nap mode due to idle (cede) while other threads are still
+	  in the guest.  The total, minimum and maximum times in nanoseconds
+	  together with the number of executions are reported in debugfs in
+	  kvm/vm#/vcpu#/timings.  The overhead is of the order of 30 - 40
+	  ns per exit on POWER8.
+
+	  If unsure, say N.
+
+config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND
+	bool "Nested L0 host workaround for L1 KVM host PMU handling bug" if EXPERT
+	depends on KVM_BOOK3S_HV_POSSIBLE
+	default !EXPERT
+	help
+	  Old nested HV capable Linux guests have a bug where they don't
+	  reflect the PMU in-use status of their L2 guest to the L0 host
+	  while the L2 PMU registers are live. This can result in loss
+	  of L2 PMU register state, causing perf to not work correctly in
+	  L2 guests.
+
+	  Selecting this option for the L0 host implements a workaround for
+	  those buggy L1s which saves the L2 state, at the cost of performance
+	  in all nested-capable guest entry/exit.
+
+config KVM_BOOKE_HV
+	bool
+
+config KVM_EXIT_TIMING
+	bool "Detailed exit timing"
+	depends on KVM_E500V2 || KVM_E500MC
+	help
+	  Calculate elapsed time for every exit/enter cycle. A per-vcpu
+	  report is available in debugfs kvm/vm#_vcpu#_timing.
+	  The overhead is relatively small, however it is not recommended for
+	  production environments.
+
+	  If unsure, say N.
+
+config KVM_E500V2
+	bool "KVM support for PowerPC E500v2 processors"
+	depends on PPC_E500 && !PPC_E500MC
+	depends on !CONTEXT_TRACKING_USER
+	select KVM
+	select KVM_MMIO
+	select MMU_NOTIFIER
+	help
+	  Support running unmodified E500 guest kernels in virtual machines on
+	  E500v2 host processors.
+
+	  This module provides access to the hardware capabilities through
+	  a character device node named /dev/kvm.
+
+	  If unsure, say N.
+
+config KVM_E500MC
+	bool "KVM support for PowerPC E500MC/E5500/E6500 processors"
+	depends on PPC_E500MC
+	depends on !CONTEXT_TRACKING_USER
+	select KVM
+	select KVM_MMIO
+	select KVM_BOOKE_HV
+	select MMU_NOTIFIER
+	help
+	  Support running unmodified E500MC/E5500/E6500 guest kernels in
+	  virtual machines on E500MC/E5500/E6500 host processors.
+
+	  This module provides access to the hardware capabilities through
+	  a character device node named /dev/kvm.
+
+	  If unsure, say N.
+
+config KVM_MPIC
+	bool "KVM in-kernel MPIC emulation"
+	depends on KVM && PPC_E500
+	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
+	select HAVE_KVM_IRQ_ROUTING
+	select HAVE_KVM_MSI
+	help
+	  Enable support for emulating MPIC devices inside the
+	  host kernel, rather than relying on userspace to emulate.
+	  Currently, support is limited to certain versions of
+	  Freescale's MPIC implementation.
+
+config KVM_XICS
+	bool "KVM in-kernel XICS emulation"
+	depends on KVM_BOOK3S_64 && !KVM_MPIC
+	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
+	default y
+	help
+	  Include support for the XICS (eXternal Interrupt Controller
+	  Specification) interrupt controller architecture used on
+	  IBM POWER (pSeries) servers.
+
+config KVM_XIVE
+	bool
+	default y
+	depends on KVM_XICS && PPC_XIVE_NATIVE && KVM_BOOK3S_HV_POSSIBLE
+
+endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
new file mode 100644
index 0000000000..5319d889b1
--- /dev/null
+++ b/arch/powerpc/kvm/Makefile
@@ -0,0 +1,139 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
+
+include $(srctree)/virt/kvm/Makefile.kvm
+
+common-objs-y += powerpc.o emulate_loadstore.o
+obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
+obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o
+
+AFLAGS_booke_interrupts.o := -I$(objtree)/$(obj)
+
+kvm-e500-objs := \
+	$(common-objs-y) \
+	emulate.o \
+	booke.o \
+	booke_emulate.o \
+	booke_interrupts.o \
+	e500.o \
+	e500_mmu.o \
+	e500_mmu_host.o \
+	e500_emulate.o
+kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs)
+
+kvm-e500mc-objs := \
+	$(common-objs-y) \
+	emulate.o \
+	booke.o \
+	booke_emulate.o \
+	bookehv_interrupts.o \
+	e500mc.o \
+	e500_mmu.o \
+	e500_mmu_host.o \
+	e500_emulate.o
+kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
+
+kvm-pr-y := \
+	fpu.o \
+	emulate.o \
+	book3s_paired_singles.o \
+	book3s_pr.o \
+	book3s_pr_papr.o \
+	book3s_emulate.o \
+	book3s_interrupts.o \
+	book3s_mmu_hpte.o \
+	book3s_64_mmu_host.o \
+	book3s_64_mmu.o \
+	book3s_32_mmu.o
+
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
+	book3s_64_entry.o \
+	tm.o
+
+ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
+	book3s_rmhandlers.o
+endif
+
+kvm-hv-y += \
+	book3s_hv.o \
+	book3s_hv_interrupts.o \
+	book3s_64_mmu_hv.o \
+	book3s_64_mmu_radix.o \
+	book3s_hv_nested.o
+
+kvm-hv-$(CONFIG_PPC_UV) += \
+	book3s_hv_uvmem.o
+
+kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
+	book3s_hv_tm.o
+
+kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
+	book3s_hv_rm_xics.o
+
+kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
+	book3s_hv_tm_builtin.o
+
+ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
+	book3s_hv_hmi.o \
+	book3s_hv_p9_entry.o \
+	book3s_hv_rmhandlers.o \
+	book3s_hv_rm_mmu.o \
+	book3s_hv_ras.o \
+	book3s_hv_builtin.o \
+	book3s_hv_p9_perf.o \
+	$(kvm-book3s_64-builtin-tm-objs-y) \
+	$(kvm-book3s_64-builtin-xics-objs-y)
+endif
+
+kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
+	book3s_xics.o
+
+kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o book3s_xive_native.o
+kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o
+
+kvm-book3s_64-module-objs := \
+	$(common-objs-y) \
+	book3s.o \
+	book3s_rtas.o \
+	$(kvm-book3s_64-objs-y)
+
+kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
+
+kvm-book3s_32-objs := \
+	$(common-objs-y) \
+	emulate.o \
+	fpu.o \
+	book3s_paired_singles.o \
+	book3s.o \
+	book3s_pr.o \
+	book3s_emulate.o \
+	book3s_interrupts.o \
+	book3s_mmu_hpte.o \
+	book3s_32_mmu_host.o \
+	book3s_32_mmu.o
+kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
+
+kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
+
+kvm-y += $(kvm-objs-m) $(kvm-objs-y)
+
+obj-$(CONFIG_KVM_E500V2) += kvm.o
+obj-$(CONFIG_KVM_E500MC) += kvm.o
+obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o
+obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o
+
+obj-$(CONFIG_KVM_BOOK3S_64_PR) += kvm-pr.o
+obj-$(CONFIG_KVM_BOOK3S_64_HV) += kvm-hv.o
+
+obj-y += $(kvm-book3s_64-builtin-objs-y)
+
+# KVM does a lot in real-mode, and 64-bit Book3S KASAN doesn't support that
+ifdef CONFIG_PPC_BOOK3S_64
+KASAN_SANITIZE := n
+endif
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
new file mode 100644
index 0000000000..686d8d9eda
--- /dev/null
+++ b/arch/powerpc/kvm/book3s.c
@@ -0,0 +1,1113 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ *    Alexander Graf <agraf@suse.de>
+ *    Kevin Wolf <mail@kevin-wolf.de>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/44x.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/cacheflush.h>
+#include <linux/uaccess.h>
+#include <asm/io.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu_context.h>
+#include <asm/page.h>
+#include <asm/xive.h>
+
+#include "book3s.h"
+#include "trace.h"
+
+/* #define EXIT_DEBUG */
+
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+	KVM_GENERIC_VM_STATS(),
+	STATS_DESC_ICOUNTER(VM, num_2M_pages),
+	STATS_DESC_ICOUNTER(VM, num_1G_pages)
+};
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+	.name_size = KVM_STATS_NAME_SIZE,
+	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+	.id_offset = sizeof(struct kvm_stats_header),
+	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+		       sizeof(kvm_vm_stats_desc),
+};
+
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+	KVM_GENERIC_VCPU_STATS(),
+	STATS_DESC_COUNTER(VCPU, sum_exits),
+	STATS_DESC_COUNTER(VCPU, mmio_exits),
+	STATS_DESC_COUNTER(VCPU, signal_exits),
+	STATS_DESC_COUNTER(VCPU, light_exits),
+	STATS_DESC_COUNTER(VCPU, itlb_real_miss_exits),
+	STATS_DESC_COUNTER(VCPU, itlb_virt_miss_exits),
+	STATS_DESC_COUNTER(VCPU, dtlb_real_miss_exits),
+	STATS_DESC_COUNTER(VCPU, dtlb_virt_miss_exits),
+	STATS_DESC_COUNTER(VCPU, syscall_exits),
+	STATS_DESC_COUNTER(VCPU, isi_exits),
+	STATS_DESC_COUNTER(VCPU, dsi_exits),
+	STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
+	STATS_DESC_COUNTER(VCPU, dec_exits),
+	STATS_DESC_COUNTER(VCPU, ext_intr_exits),
+	STATS_DESC_COUNTER(VCPU, halt_successful_wait),
+	STATS_DESC_COUNTER(VCPU, dbell_exits),
+	STATS_DESC_COUNTER(VCPU, gdbell_exits),
+	STATS_DESC_COUNTER(VCPU, ld),
+	STATS_DESC_COUNTER(VCPU, st),
+	STATS_DESC_COUNTER(VCPU, pf_storage),
+	STATS_DESC_COUNTER(VCPU, pf_instruc),
+	STATS_DESC_COUNTER(VCPU, sp_storage),
+	STATS_DESC_COUNTER(VCPU, sp_instruc),
+	STATS_DESC_COUNTER(VCPU, queue_intr),
+	STATS_DESC_COUNTER(VCPU, ld_slow),
+	STATS_DESC_COUNTER(VCPU, st_slow),
+	STATS_DESC_COUNTER(VCPU, pthru_all),
+	STATS_DESC_COUNTER(VCPU, pthru_host),
+	STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
+};
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+	.name_size = KVM_STATS_NAME_SIZE,
+	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+	.id_offset = sizeof(struct kvm_stats_header),
+	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+		       sizeof(kvm_vcpu_stats_desc),
+};
+
+static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
+			unsigned long pending_now, unsigned long old_pending)
+{
+	if (is_kvmppc_hv_enabled(vcpu->kvm))
+		return;
+	if (pending_now)
+		kvmppc_set_int_pending(vcpu, 1);
+	else if (old_pending)
+		kvmppc_set_int_pending(vcpu, 0);
+}
+
+static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
+{
+	ulong crit_raw;
+	ulong crit_r1;
+	bool crit;
+
+	if (is_kvmppc_hv_enabled(vcpu->kvm))
+		return false;
+
+	crit_raw = kvmppc_get_critical(vcpu);
+	crit_r1 = kvmppc_get_gpr(vcpu, 1);
+
+	/* Truncate crit indicators in 32 bit mode */
+	if (!(kvmppc_get_msr(vcpu) & MSR_SF)) {
+		crit_raw &= 0xffffffff;
+		crit_r1 &= 0xffffffff;
+	}
+
+	/* Critical section when crit == r1 */
+	crit = (crit_raw == crit_r1);
+	/* ... and we're in supervisor mode */
+	crit = crit && !(kvmppc_get_msr(vcpu) & MSR_PR);
+
+	return crit;
+}
+
+void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
+{
+	vcpu->kvm->arch.kvm_ops->inject_interrupt(vcpu, vec, flags);
+}
+
+static int kvmppc_book3s_vec2irqprio(unsigned int vec)
+{
+	unsigned int prio;
+
+	switch (vec) {
+	case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET;		break;
+	case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK;	break;
+	case 0x300: prio = BOOK3S_IRQPRIO_DATA_STORAGE;		break;
+	case 0x380: prio = BOOK3S_IRQPRIO_DATA_SEGMENT;		break;
+	case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE;		break;
+	case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT;		break;
+	case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL;		break;
+	case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT;		break;
+	case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM;		break;
+	case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL;		break;
+	case 0x900: prio = BOOK3S_IRQPRIO_DECREMENTER;		break;
+	case 0xc00: prio = BOOK3S_IRQPRIO_SYSCALL;		break;
+	case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG;		break;
+	case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC;		break;
+	case 0xf40: prio = BOOK3S_IRQPRIO_VSX;			break;
+	case 0xf60: prio = BOOK3S_IRQPRIO_FAC_UNAVAIL;		break;
+	default:    prio = BOOK3S_IRQPRIO_MAX;			break;
+	}
+
+	return prio;
+}
+
+void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
+					  unsigned int vec)
+{
+	unsigned long old_pending = vcpu->arch.pending_exceptions;
+
+	clear_bit(kvmppc_book3s_vec2irqprio(vec),
+		  &vcpu->arch.pending_exceptions);
+
+	kvmppc_update_int_pending(vcpu, vcpu->arch.pending_exceptions,
+				  old_pending);
+}
+
+void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
+{
+	vcpu->stat.queue_intr++;
+
+	set_bit(kvmppc_book3s_vec2irqprio(vec),
+		&vcpu->arch.pending_exceptions);
+#ifdef EXIT_DEBUG
+	printk(KERN_INFO "Queueing interrupt %x\n", vec);
+#endif
+}
+EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio);
+
+void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong srr1_flags)
+{
+	/* might as well deliver this straight away */
+	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_MACHINE_CHECK, srr1_flags);
+}
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check);
+
+void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu)
+{
+	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_SYSCALL, 0);
+}
+EXPORT_SYMBOL(kvmppc_core_queue_syscall);
+
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong srr1_flags)
+{
+	/* might as well deliver this straight away */
+	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, srr1_flags);
+}
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_program);
+
+void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu, ulong srr1_flags)
+{
+	/* might as well deliver this straight away */
+	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, srr1_flags);
+}
+
+void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu, ulong srr1_flags)
+{
+	/* might as well deliver this straight away */
+	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_ALTIVEC, srr1_flags);
+}
+
+void kvmppc_core_queue_vsx_unavail(struct kvm_vcpu *vcpu, ulong srr1_flags)
+{
+	/* might as well deliver this straight away */
+	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_VSX, srr1_flags);
+}
+
+void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
+{
+	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
+}
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_dec);
+
+int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
+{
+	return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
+}
+EXPORT_SYMBOL_GPL(kvmppc_core_pending_dec);
+
+void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
+{
+	kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
+}
+EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec);
+
+void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
+                                struct kvm_interrupt *irq)
+{
+	/*
+	 * This case (KVM_INTERRUPT_SET) should never actually arise for
+	 * a pseries guest (because pseries guests expect their interrupt
+	 * controllers to continue asserting an external interrupt request
+	 * until it is acknowledged at the interrupt controller), but is
+	 * included to avoid ABI breakage and potentially for other
+	 * sorts of guest.
+	 *
+	 * There is a subtlety here: HV KVM does not test the
+	 * external_oneshot flag in the code that synthesizes
+	 * external interrupts for the guest just before entering
+	 * the guest.  That is OK even if userspace did do a
+	 * KVM_INTERRUPT_SET on a pseries guest vcpu, because the
+	 * caller (kvm_vcpu_ioctl_interrupt) does a kvm_vcpu_kick()
+	 * which ends up doing a smp_send_reschedule(), which will
+	 * pull the guest all the way out to the host, meaning that
+	 * we will call kvmppc_core_prepare_to_enter() before entering
+	 * the guest again, and that will handle the external_oneshot
+	 * flag correctly.
+	 */
+	if (irq->irq == KVM_INTERRUPT_SET)
+		vcpu->arch.external_oneshot = 1;
+
+	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
+}
+
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
+{
+	kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
+}
+
+void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong srr1_flags,
+				    ulong dar, ulong dsisr)
+{
+	kvmppc_set_dar(vcpu, dar);
+	kvmppc_set_dsisr(vcpu, dsisr);
+	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE, srr1_flags);
+}
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_data_storage);
+
+void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong srr1_flags)
+{
+	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE, srr1_flags);
+}
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_inst_storage);
+
+static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
+					 unsigned int priority)
+{
+	int deliver = 1;
+	int vec = 0;
+	bool crit = kvmppc_critical_section(vcpu);
+
+	switch (priority) {
+	case BOOK3S_IRQPRIO_DECREMENTER:
+		deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
+		vec = BOOK3S_INTERRUPT_DECREMENTER;
+		break;
+	case BOOK3S_IRQPRIO_EXTERNAL:
+		deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
+		vec = BOOK3S_INTERRUPT_EXTERNAL;
+		break;
+	case BOOK3S_IRQPRIO_SYSTEM_RESET:
+		vec = BOOK3S_INTERRUPT_SYSTEM_RESET;
+		break;
+	case BOOK3S_IRQPRIO_MACHINE_CHECK:
+		vec = BOOK3S_INTERRUPT_MACHINE_CHECK;
+		break;
+	case BOOK3S_IRQPRIO_DATA_STORAGE:
+		vec = BOOK3S_INTERRUPT_DATA_STORAGE;
+		break;
+	case BOOK3S_IRQPRIO_INST_STORAGE:
+		vec = BOOK3S_INTERRUPT_INST_STORAGE;
+		break;
+	case BOOK3S_IRQPRIO_DATA_SEGMENT:
+		vec = BOOK3S_INTERRUPT_DATA_SEGMENT;
+		break;
+	case BOOK3S_IRQPRIO_INST_SEGMENT:
+		vec = BOOK3S_INTERRUPT_INST_SEGMENT;
+		break;
+	case BOOK3S_IRQPRIO_ALIGNMENT:
+		vec = BOOK3S_INTERRUPT_ALIGNMENT;
+		break;
+	case BOOK3S_IRQPRIO_PROGRAM:
+		vec = BOOK3S_INTERRUPT_PROGRAM;
+		break;
+	case BOOK3S_IRQPRIO_VSX:
+		vec = BOOK3S_INTERRUPT_VSX;
+		break;
+	case BOOK3S_IRQPRIO_ALTIVEC:
+		vec = BOOK3S_INTERRUPT_ALTIVEC;
+		break;
+	case BOOK3S_IRQPRIO_FP_UNAVAIL:
+		vec = BOOK3S_INTERRUPT_FP_UNAVAIL;
+		break;
+	case BOOK3S_IRQPRIO_SYSCALL:
+		vec = BOOK3S_INTERRUPT_SYSCALL;
+		break;
+	case BOOK3S_IRQPRIO_DEBUG:
+		vec = BOOK3S_INTERRUPT_TRACE;
+		break;
+	case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR:
+		vec = BOOK3S_INTERRUPT_PERFMON;
+		break;
+	case BOOK3S_IRQPRIO_FAC_UNAVAIL:
+		vec = BOOK3S_INTERRUPT_FAC_UNAVAIL;
+		break;
+	default:
+		deliver = 0;
+		printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority);
+		break;
+	}
+
+#if 0
+	printk(KERN_INFO "Deliver interrupt 0x%x? %x\n", vec, deliver);
+#endif
+
+	if (deliver)
+		kvmppc_inject_interrupt(vcpu, vec, 0);
+
+	return deliver;
+}
+
+/*
+ * This function determines if an irqprio should be cleared once issued.
+ */
+static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
+{
+	switch (priority) {
+		case BOOK3S_IRQPRIO_DECREMENTER:
+			/* DEC interrupts get cleared by mtdec */
+			return false;
+		case BOOK3S_IRQPRIO_EXTERNAL:
+			/*
+			 * External interrupts get cleared by userspace
+			 * except when set by the KVM_INTERRUPT ioctl with
+			 * KVM_INTERRUPT_SET (not KVM_INTERRUPT_SET_LEVEL).
+			 */
+			if (vcpu->arch.external_oneshot) {
+				vcpu->arch.external_oneshot = 0;
+				return true;
+			}
+			return false;
+	}
+
+	return true;
+}
+
+int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
+{
+	unsigned long *pending = &vcpu->arch.pending_exceptions;
+	unsigned long old_pending = vcpu->arch.pending_exceptions;
+	unsigned int priority;
+
+#ifdef EXIT_DEBUG
+	if (vcpu->arch.pending_exceptions)
+		printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions);
+#endif
+	priority = __ffs(*pending);
+	while (priority < BOOK3S_IRQPRIO_MAX) {
+		if (kvmppc_book3s_irqprio_deliver(vcpu, priority) &&
+		    clear_irqprio(vcpu, priority)) {
+			clear_bit(priority, &vcpu->arch.pending_exceptions);
+			break;
+		}
+
+		priority = find_next_bit(pending,
+					 BITS_PER_BYTE * sizeof(*pending),
+					 priority + 1);
+	}
+
+	/* Tell the guest about our interrupt status */
+	kvmppc_update_int_pending(vcpu, *pending, old_pending);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter);
+
+kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing,
+			bool *writable)
+{
+	ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM;
+	gfn_t gfn = gpa >> PAGE_SHIFT;
+
+	if (!(kvmppc_get_msr(vcpu) & MSR_SF))
+		mp_pa = (uint32_t)mp_pa;
+
+	/* Magic page override */
+	gpa &= ~0xFFFULL;
+	if (unlikely(mp_pa) && unlikely((gpa & KVM_PAM) == mp_pa)) {
+		ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
+		kvm_pfn_t pfn;
+
+		pfn = (kvm_pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT;
+		get_page(pfn_to_page(pfn));
+		if (writable)
+			*writable = true;
+		return pfn;
+	}
+
+	return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable);
+}
+EXPORT_SYMBOL_GPL(kvmppc_gpa_to_pfn);
+
+int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid,
+		 enum xlate_readwrite xlrw, struct kvmppc_pte *pte)
+{
+	bool data = (xlid == XLATE_DATA);
+	bool iswrite = (xlrw == XLATE_WRITE);
+	int relocated = (kvmppc_get_msr(vcpu) & (data ? MSR_DR : MSR_IR));
+	int r;
+
+	if (relocated) {
+		r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data, iswrite);
+	} else {
+		pte->eaddr = eaddr;
+		pte->raddr = eaddr & KVM_PAM;
+		pte->vpage = VSID_REAL | eaddr >> 12;
+		pte->may_read = true;
+		pte->may_write = true;
+		pte->may_execute = true;
+		r = 0;
+
+		if ((kvmppc_get_msr(vcpu) & (MSR_IR | MSR_DR)) == MSR_DR &&
+		    !data) {
+			if ((vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) &&
+			    ((eaddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS))
+			pte->raddr &= ~SPLIT_HACK_MASK;
+		}
+	}
+
+	return r;
+}
+
+/*
+ * Returns prefixed instructions with the prefix in the high 32 bits
+ * of *inst and suffix in the low 32 bits.  This is the same convention
+ * as used in HEIR, vcpu->arch.last_inst and vcpu->arch.emul_inst.
+ * Like vcpu->arch.last_inst but unlike vcpu->arch.emul_inst, each
+ * half of the value needs byte-swapping if the guest endianness is
+ * different from the host endianness.
+ */
+int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
+		enum instruction_fetch_type type, unsigned long *inst)
+{
+	ulong pc = kvmppc_get_pc(vcpu);
+	int r;
+	u32 iw;
+
+	if (type == INST_SC)
+		pc -= 4;
+
+	r = kvmppc_ld(vcpu, &pc, sizeof(u32), &iw, false);
+	if (r != EMULATE_DONE)
+		return EMULATE_AGAIN;
+	/*
+	 * If [H]SRR1 indicates that the instruction that caused the
+	 * current interrupt is a prefixed instruction, get the suffix.
+	 */
+	if (kvmppc_get_msr(vcpu) & SRR1_PREFIXED) {
+		u32 suffix;
+		pc += 4;
+		r = kvmppc_ld(vcpu, &pc, sizeof(u32), &suffix, false);
+		if (r != EMULATE_DONE)
+			return EMULATE_AGAIN;
+		*inst = ((u64)iw << 32) | suffix;
+	} else {
+		*inst = iw;
+	}
+	return r;
+}
+EXPORT_SYMBOL_GPL(kvmppc_load_last_inst);
+
+int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	int ret;
+
+	vcpu_load(vcpu);
+	ret = vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+	vcpu_put(vcpu);
+
+	return ret;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	int ret;
+
+	vcpu_load(vcpu);
+	ret = vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+	vcpu_put(vcpu);
+
+	return ret;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	int i;
+
+	regs->pc = kvmppc_get_pc(vcpu);
+	regs->cr = kvmppc_get_cr(vcpu);
+	regs->ctr = kvmppc_get_ctr(vcpu);
+	regs->lr = kvmppc_get_lr(vcpu);
+	regs->xer = kvmppc_get_xer(vcpu);
+	regs->msr = kvmppc_get_msr(vcpu);
+	regs->srr0 = kvmppc_get_srr0(vcpu);
+	regs->srr1 = kvmppc_get_srr1(vcpu);
+	regs->pid = vcpu->arch.pid;
+	regs->sprg0 = kvmppc_get_sprg0(vcpu);
+	regs->sprg1 = kvmppc_get_sprg1(vcpu);
+	regs->sprg2 = kvmppc_get_sprg2(vcpu);
+	regs->sprg3 = kvmppc_get_sprg3(vcpu);
+	regs->sprg4 = kvmppc_get_sprg4(vcpu);
+	regs->sprg5 = kvmppc_get_sprg5(vcpu);
+	regs->sprg6 = kvmppc_get_sprg6(vcpu);
+	regs->sprg7 = kvmppc_get_sprg7(vcpu);
+
+	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+		regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	int i;
+
+	kvmppc_set_pc(vcpu, regs->pc);
+	kvmppc_set_cr(vcpu, regs->cr);
+	kvmppc_set_ctr(vcpu, regs->ctr);
+	kvmppc_set_lr(vcpu, regs->lr);
+	kvmppc_set_xer(vcpu, regs->xer);
+	kvmppc_set_msr(vcpu, regs->msr);
+	kvmppc_set_srr0(vcpu, regs->srr0);
+	kvmppc_set_srr1(vcpu, regs->srr1);
+	kvmppc_set_sprg0(vcpu, regs->sprg0);
+	kvmppc_set_sprg1(vcpu, regs->sprg1);
+	kvmppc_set_sprg2(vcpu, regs->sprg2);
+	kvmppc_set_sprg3(vcpu, regs->sprg3);
+	kvmppc_set_sprg4(vcpu, regs->sprg4);
+	kvmppc_set_sprg5(vcpu, regs->sprg5);
+	kvmppc_set_sprg6(vcpu, regs->sprg6);
+	kvmppc_set_sprg7(vcpu, regs->sprg7);
+
+	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+		kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EOPNOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EOPNOTSUPP;
+}
+
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+			union kvmppc_one_reg *val)
+{
+	int r = 0;
+	long int i;
+
+	r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val);
+	if (r == -EINVAL) {
+		r = 0;
+		switch (id) {
+		case KVM_REG_PPC_DAR:
+			*val = get_reg_val(id, kvmppc_get_dar(vcpu));
+			break;
+		case KVM_REG_PPC_DSISR:
+			*val = get_reg_val(id, kvmppc_get_dsisr(vcpu));
+			break;
+		case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
+			i = id - KVM_REG_PPC_FPR0;
+			*val = get_reg_val(id, VCPU_FPR(vcpu, i));
+			break;
+		case KVM_REG_PPC_FPSCR:
+			*val = get_reg_val(id, vcpu->arch.fp.fpscr);
+			break;
+#ifdef CONFIG_VSX
+		case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
+			if (cpu_has_feature(CPU_FTR_VSX)) {
+				i = id - KVM_REG_PPC_VSR0;
+				val->vsxval[0] = vcpu->arch.fp.fpr[i][0];
+				val->vsxval[1] = vcpu->arch.fp.fpr[i][1];
+			} else {
+				r = -ENXIO;
+			}
+			break;
+#endif /* CONFIG_VSX */
+		case KVM_REG_PPC_DEBUG_INST:
+			*val = get_reg_val(id, INS_TW);
+			break;
+#ifdef CONFIG_KVM_XICS
+		case KVM_REG_PPC_ICP_STATE:
+			if (!vcpu->arch.icp && !vcpu->arch.xive_vcpu) {
+				r = -ENXIO;
+				break;
+			}
+			if (xics_on_xive())
+				*val = get_reg_val(id, kvmppc_xive_get_icp(vcpu));
+			else
+				*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
+			break;
+#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+		case KVM_REG_PPC_VP_STATE:
+			if (!vcpu->arch.xive_vcpu) {
+				r = -ENXIO;
+				break;
+			}
+			if (xive_enabled())
+				r = kvmppc_xive_native_get_vp(vcpu, val);
+			else
+				r = -ENXIO;
+			break;
+#endif /* CONFIG_KVM_XIVE */
+		case KVM_REG_PPC_FSCR:
+			*val = get_reg_val(id, vcpu->arch.fscr);
+			break;
+		case KVM_REG_PPC_TAR:
+			*val = get_reg_val(id, vcpu->arch.tar);
+			break;
+		case KVM_REG_PPC_EBBHR:
+			*val = get_reg_val(id, vcpu->arch.ebbhr);
+			break;
+		case KVM_REG_PPC_EBBRR:
+			*val = get_reg_val(id, vcpu->arch.ebbrr);
+			break;
+		case KVM_REG_PPC_BESCR:
+			*val = get_reg_val(id, vcpu->arch.bescr);
+			break;
+		case KVM_REG_PPC_IC:
+			*val = get_reg_val(id, vcpu->arch.ic);
+			break;
+		default:
+			r = -EINVAL;
+			break;
+		}
+	}
+
+	return r;
+}
+
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
+			union kvmppc_one_reg *val)
+{
+	int r = 0;
+	long int i;
+
+	r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val);
+	if (r == -EINVAL) {
+		r = 0;
+		switch (id) {
+		case KVM_REG_PPC_DAR:
+			kvmppc_set_dar(vcpu, set_reg_val(id, *val));
+			break;
+		case KVM_REG_PPC_DSISR:
+			kvmppc_set_dsisr(vcpu, set_reg_val(id, *val));
+			break;
+		case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
+			i = id - KVM_REG_PPC_FPR0;
+			VCPU_FPR(vcpu, i) = set_reg_val(id, *val);
+			break;
+		case KVM_REG_PPC_FPSCR:
+			vcpu->arch.fp.fpscr = set_reg_val(id, *val);
+			break;
+#ifdef CONFIG_VSX
+		case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
+			if (cpu_has_feature(CPU_FTR_VSX)) {
+				i = id - KVM_REG_PPC_VSR0;
+				vcpu->arch.fp.fpr[i][0] = val->vsxval[0];
+				vcpu->arch.fp.fpr[i][1] = val->vsxval[1];
+			} else {
+				r = -ENXIO;
+			}
+			break;
+#endif /* CONFIG_VSX */
+#ifdef CONFIG_KVM_XICS
+		case KVM_REG_PPC_ICP_STATE:
+			if (!vcpu->arch.icp && !vcpu->arch.xive_vcpu) {
+				r = -ENXIO;
+				break;
+			}
+			if (xics_on_xive())
+				r = kvmppc_xive_set_icp(vcpu, set_reg_val(id, *val));
+			else
+				r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val));
+			break;
+#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+		case KVM_REG_PPC_VP_STATE:
+			if (!vcpu->arch.xive_vcpu) {
+				r = -ENXIO;
+				break;
+			}
+			if (xive_enabled())
+				r = kvmppc_xive_native_set_vp(vcpu, val);
+			else
+				r = -ENXIO;
+			break;
+#endif /* CONFIG_KVM_XIVE */
+		case KVM_REG_PPC_FSCR:
+			vcpu->arch.fscr = set_reg_val(id, *val);
+			break;
+		case KVM_REG_PPC_TAR:
+			vcpu->arch.tar = set_reg_val(id, *val);
+			break;
+		case KVM_REG_PPC_EBBHR:
+			vcpu->arch.ebbhr = set_reg_val(id, *val);
+			break;
+		case KVM_REG_PPC_EBBRR:
+			vcpu->arch.ebbrr = set_reg_val(id, *val);
+			break;
+		case KVM_REG_PPC_BESCR:
+			vcpu->arch.bescr = set_reg_val(id, *val);
+			break;
+		case KVM_REG_PPC_IC:
+			vcpu->arch.ic = set_reg_val(id, *val);
+			break;
+		default:
+			r = -EINVAL;
+			break;
+		}
+	}
+
+	return r;
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu);
+}
+
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
+{
+	vcpu->kvm->arch.kvm_ops->set_msr(vcpu, msr);
+}
+EXPORT_SYMBOL_GPL(kvmppc_set_msr);
+
+int kvmppc_vcpu_run(struct kvm_vcpu *vcpu)
+{
+	return vcpu->kvm->arch.kvm_ops->vcpu_run(vcpu);
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+                                  struct kvm_translation *tr)
+{
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					struct kvm_guest_debug *dbg)
+{
+	vcpu_load(vcpu);
+	vcpu->guest_debug = dbg->control;
+	vcpu_put(vcpu);
+	return 0;
+}
+
+void kvmppc_decrementer_func(struct kvm_vcpu *vcpu)
+{
+	kvmppc_core_queue_dec(vcpu);
+	kvm_vcpu_kick(vcpu);
+}
+
+int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu)
+{
+	return vcpu->kvm->arch.kvm_ops->vcpu_create(vcpu);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+	vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
+}
+
+int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
+{
+	return vcpu->kvm->arch.kvm_ops->check_requests(vcpu);
+}
+
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+
+}
+
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+	return kvm->arch.kvm_ops->get_dirty_log(kvm, log);
+}
+
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+	kvm->arch.kvm_ops->free_memslot(slot);
+}
+
+void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+	kvm->arch.kvm_ops->flush_memslot(kvm, memslot);
+}
+
+int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+				      const struct kvm_memory_slot *old,
+				      struct kvm_memory_slot *new,
+				      enum kvm_mr_change change)
+{
+	return kvm->arch.kvm_ops->prepare_memory_region(kvm, old, new, change);
+}
+
+void kvmppc_core_commit_memory_region(struct kvm *kvm,
+				struct kvm_memory_slot *old,
+				const struct kvm_memory_slot *new,
+				enum kvm_mr_change change)
+{
+	kvm->arch.kvm_ops->commit_memory_region(kvm, old, new, change);
+}
+
+bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+	return kvm->arch.kvm_ops->unmap_gfn_range(kvm, range);
+}
+
+bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+	return kvm->arch.kvm_ops->age_gfn(kvm, range);
+}
+
+bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+	return kvm->arch.kvm_ops->test_age_gfn(kvm, range);
+}
+
+bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+	return kvm->arch.kvm_ops->set_spte_gfn(kvm, range);
+}
+
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+
+#ifdef CONFIG_PPC64
+	INIT_LIST_HEAD_RCU(&kvm->arch.spapr_tce_tables);
+	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
+	mutex_init(&kvm->arch.rtas_token_lock);
+#endif
+
+	return kvm->arch.kvm_ops->init_vm(kvm);
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+	kvm->arch.kvm_ops->destroy_vm(kvm);
+
+#ifdef CONFIG_PPC64
+	kvmppc_rtas_tokens_free(kvm);
+	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
+#endif
+
+#ifdef CONFIG_KVM_XICS
+	/*
+	 * Free the XIVE and XICS devices which are not directly freed by the
+	 * device 'release' method
+	 */
+	kfree(kvm->arch.xive_devices.native);
+	kvm->arch.xive_devices.native = NULL;
+	kfree(kvm->arch.xive_devices.xics_on_xive);
+	kvm->arch.xive_devices.xics_on_xive = NULL;
+	kfree(kvm->arch.xics_device);
+	kvm->arch.xics_device = NULL;
+#endif /* CONFIG_KVM_XICS */
+}
+
+int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu)
+{
+	unsigned long size = kvmppc_get_gpr(vcpu, 4);
+	unsigned long addr = kvmppc_get_gpr(vcpu, 5);
+	u64 buf;
+	int srcu_idx;
+	int ret;
+
+	if (!is_power_of_2(size) || (size > sizeof(buf)))
+		return H_TOO_HARD;
+
+	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+	ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, size, &buf);
+	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+	if (ret != 0)
+		return H_TOO_HARD;
+
+	switch (size) {
+	case 1:
+		kvmppc_set_gpr(vcpu, 4, *(u8 *)&buf);
+		break;
+
+	case 2:
+		kvmppc_set_gpr(vcpu, 4, be16_to_cpu(*(__be16 *)&buf));
+		break;
+
+	case 4:
+		kvmppc_set_gpr(vcpu, 4, be32_to_cpu(*(__be32 *)&buf));
+		break;
+
+	case 8:
+		kvmppc_set_gpr(vcpu, 4, be64_to_cpu(*(__be64 *)&buf));
+		break;
+
+	default:
+		BUG();
+	}
+
+	return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_logical_ci_load);
+
+int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu)
+{
+	unsigned long size = kvmppc_get_gpr(vcpu, 4);
+	unsigned long addr = kvmppc_get_gpr(vcpu, 5);
+	unsigned long val = kvmppc_get_gpr(vcpu, 6);
+	u64 buf;
+	int srcu_idx;
+	int ret;
+
+	switch (size) {
+	case 1:
+		*(u8 *)&buf = val;
+		break;
+
+	case 2:
+		*(__be16 *)&buf = cpu_to_be16(val);
+		break;
+
+	case 4:
+		*(__be32 *)&buf = cpu_to_be32(val);
+		break;
+
+	case 8:
+		*(__be64 *)&buf = cpu_to_be64(val);
+		break;
+
+	default:
+		return H_TOO_HARD;
+	}
+
+	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+	ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, size, &buf);
+	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+	if (ret != 0)
+		return H_TOO_HARD;
+
+	return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_logical_ci_store);
+
+int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall)
+{
+	return kvm->arch.kvm_ops->hcall_implemented(hcall);
+}
+
+#ifdef CONFIG_KVM_XICS
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+		bool line_status)
+{
+	if (xics_on_xive())
+		return kvmppc_xive_set_irq(kvm, irq_source_id, irq, level,
+					   line_status);
+	else
+		return kvmppc_xics_set_irq(kvm, irq_source_id, irq, level,
+					   line_status);
+}
+
+int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *irq_entry,
+			      struct kvm *kvm, int irq_source_id,
+			      int level, bool line_status)
+{
+	return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi,
+			   level, line_status);
+}
+static int kvmppc_book3s_set_irq(struct kvm_kernel_irq_routing_entry *e,
+				 struct kvm *kvm, int irq_source_id, int level,
+				 bool line_status)
+{
+	return kvm_set_irq(kvm, irq_source_id, e->gsi, level, line_status);
+}
+
+int kvm_irq_map_gsi(struct kvm *kvm,
+		    struct kvm_kernel_irq_routing_entry *entries, int gsi)
+{
+	entries->gsi = gsi;
+	entries->type = KVM_IRQ_ROUTING_IRQCHIP;
+	entries->set = kvmppc_book3s_set_irq;
+	entries->irqchip.irqchip = 0;
+	entries->irqchip.pin = gsi;
+	return 1;
+}
+
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	return pin;
+}
+
+#endif /* CONFIG_KVM_XICS */
+
+static int kvmppc_book3s_init(void)
+{
+	int r;
+
+	r = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+	if (r)
+		return r;
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+	r = kvmppc_book3s_init_pr();
+#endif
+
+#ifdef CONFIG_KVM_XICS
+#ifdef CONFIG_KVM_XIVE
+	if (xics_on_xive()) {
+		kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
+		if (kvmppc_xive_native_supported())
+			kvm_register_device_ops(&kvm_xive_native_ops,
+						KVM_DEV_TYPE_XIVE);
+	} else
+#endif
+		kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
+#endif
+	return r;
+}
+
+static void kvmppc_book3s_exit(void)
+{
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+	kvmppc_book3s_exit_pr();
+#endif
+	kvm_exit();
+}
+
+module_init(kvmppc_book3s_init);
+module_exit(kvmppc_book3s_exit);
+
+/* On 32bit this is our one and only kernel module */
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
+#endif
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
new file mode 100644
index 0000000000..58391b4b32
--- /dev/null
+++ b/arch/powerpc/kvm/book3s.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ */
+
+#ifndef __POWERPC_KVM_BOOK3S_H__
+#define __POWERPC_KVM_BOOK3S_H__
+
+extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
+					 struct kvm_memory_slot *memslot);
+extern bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range);
+extern bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
+extern bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
+extern bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
+
+extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_emulate_op_pr(struct kvm_vcpu *vcpu,
+				     unsigned int inst, int *advance);
+extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu,
+					int sprn, ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu,
+					int sprn, ulong *spr_val);
+extern int kvmppc_book3s_init_pr(void);
+void kvmppc_book3s_exit_pr(void);
+extern int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val);
+#else
+static inline void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) {}
+#endif
+
+extern void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr);
+extern void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
+
+#endif
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
new file mode 100644
index 0000000000..0215f32932
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+
+/* #define DEBUG_MMU */
+/* #define DEBUG_MMU_PTE */
+/* #define DEBUG_MMU_PTE_IP 0xfff14c40 */
+
+#ifdef DEBUG_MMU
+#define dprintk(X...) printk(KERN_INFO X)
+#else
+#define dprintk(X...) do { } while(0)
+#endif
+
+#ifdef DEBUG_MMU_PTE
+#define dprintk_pte(X...) printk(KERN_INFO X)
+#else
+#define dprintk_pte(X...) do { } while(0)
+#endif
+
+#define PTEG_FLAG_ACCESSED	0x00000100
+#define PTEG_FLAG_DIRTY		0x00000080
+#ifndef SID_SHIFT
+#define SID_SHIFT		28
+#endif
+
+static inline bool check_debug_ip(struct kvm_vcpu *vcpu)
+{
+#ifdef DEBUG_MMU_PTE_IP
+	return vcpu->arch.regs.nip == DEBUG_MMU_PTE_IP;
+#else
+	return true;
+#endif
+}
+
+static inline u32 sr_vsid(u32 sr_raw)
+{
+	return sr_raw & 0x0fffffff;
+}
+
+static inline bool sr_valid(u32 sr_raw)
+{
+	return (sr_raw & 0x80000000) ? false : true;
+}
+
+static inline bool sr_ks(u32 sr_raw)
+{
+	return (sr_raw & 0x40000000) ? true: false;
+}
+
+static inline bool sr_kp(u32 sr_raw)
+{
+	return (sr_raw & 0x20000000) ? true: false;
+}
+
+static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
+					  struct kvmppc_pte *pte, bool data,
+					  bool iswrite);
+static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
+					     u64 *vsid);
+
+static u32 find_sr(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+	return kvmppc_get_sr(vcpu, (eaddr >> 28) & 0xf);
+}
+
+static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
+					 bool data)
+{
+	u64 vsid;
+	struct kvmppc_pte pte;
+
+	if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data, false))
+		return pte.vpage;
+
+	kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
+	return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16);
+}
+
+static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu,
+				      u32 sre, gva_t eaddr,
+				      bool primary)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+	u32 page, hash, pteg, htabmask;
+	hva_t r;
+
+	page = (eaddr & 0x0FFFFFFF) >> 12;
+	htabmask = ((vcpu_book3s->sdr1 & 0x1FF) << 16) | 0xFFC0;
+
+	hash = ((sr_vsid(sre) ^ page) << 6);
+	if (!primary)
+		hash = ~hash;
+	hash &= htabmask;
+
+	pteg = (vcpu_book3s->sdr1 & 0xffff0000) | hash;
+
+	dprintk("MMU: pc=0x%lx eaddr=0x%lx sdr1=0x%llx pteg=0x%x vsid=0x%x\n",
+		kvmppc_get_pc(vcpu), eaddr, vcpu_book3s->sdr1, pteg,
+		sr_vsid(sre));
+
+	r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT);
+	if (kvm_is_error_hva(r))
+		return r;
+	return r | (pteg & ~PAGE_MASK);
+}
+
+static u32 kvmppc_mmu_book3s_32_get_ptem(u32 sre, gva_t eaddr, bool primary)
+{
+	return ((eaddr & 0x0fffffff) >> 22) | (sr_vsid(sre) << 7) |
+	       (primary ? 0 : 0x40) | 0x80000000;
+}
+
+static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
+					  struct kvmppc_pte *pte, bool data,
+					  bool iswrite)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+	struct kvmppc_bat *bat;
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		if (data)
+			bat = &vcpu_book3s->dbat[i];
+		else
+			bat = &vcpu_book3s->ibat[i];
+
+		if (kvmppc_get_msr(vcpu) & MSR_PR) {
+			if (!bat->vp)
+				continue;
+		} else {
+			if (!bat->vs)
+				continue;
+		}
+
+		if (check_debug_ip(vcpu))
+		{
+			dprintk_pte("%cBAT %02d: 0x%lx - 0x%x (0x%x)\n",
+				    data ? 'd' : 'i', i, eaddr, bat->bepi,
+				    bat->bepi_mask);
+		}
+		if ((eaddr & bat->bepi_mask) == bat->bepi) {
+			u64 vsid;
+			kvmppc_mmu_book3s_32_esid_to_vsid(vcpu,
+				eaddr >> SID_SHIFT, &vsid);
+			vsid <<= 16;
+			pte->vpage = (((u64)eaddr >> 12) & 0xffff) | vsid;
+
+			pte->raddr = bat->brpn | (eaddr & ~bat->bepi_mask);
+			pte->may_read = bat->pp;
+			pte->may_write = bat->pp > 1;
+			pte->may_execute = true;
+			if (!pte->may_read) {
+				printk(KERN_INFO "BAT is not readable!\n");
+				continue;
+			}
+			if (iswrite && !pte->may_write) {
+				dprintk_pte("BAT is read-only!\n");
+				continue;
+			}
+
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
+				     struct kvmppc_pte *pte, bool data,
+				     bool iswrite, bool primary)
+{
+	u32 sre;
+	hva_t ptegp;
+	u32 pteg[16];
+	u32 pte0, pte1;
+	u32 ptem = 0;
+	int i;
+	int found = 0;
+
+	sre = find_sr(vcpu, eaddr);
+
+	dprintk_pte("SR 0x%lx: vsid=0x%x, raw=0x%x\n", eaddr >> 28,
+		    sr_vsid(sre), sre);
+
+	pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
+
+	ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu, sre, eaddr, primary);
+	if (kvm_is_error_hva(ptegp)) {
+		printk(KERN_INFO "KVM: Invalid PTEG!\n");
+		goto no_page_found;
+	}
+
+	ptem = kvmppc_mmu_book3s_32_get_ptem(sre, eaddr, primary);
+
+	if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) {
+		printk_ratelimited(KERN_ERR
+			"KVM: Can't copy data from 0x%lx!\n", ptegp);
+		goto no_page_found;
+	}
+
+	for (i=0; i<16; i+=2) {
+		pte0 = be32_to_cpu(pteg[i]);
+		pte1 = be32_to_cpu(pteg[i + 1]);
+		if (ptem == pte0) {
+			u8 pp;
+
+			pte->raddr = (pte1 & ~(0xFFFULL)) | (eaddr & 0xFFF);
+			pp = pte1 & 3;
+
+			if ((sr_kp(sre) &&  (kvmppc_get_msr(vcpu) & MSR_PR)) ||
+			    (sr_ks(sre) && !(kvmppc_get_msr(vcpu) & MSR_PR)))
+				pp |= 4;
+
+			pte->may_write = false;
+			pte->may_read = false;
+			pte->may_execute = true;
+			switch (pp) {
+				case 0:
+				case 1:
+				case 2:
+				case 6:
+					pte->may_write = true;
+					fallthrough;
+				case 3:
+				case 5:
+				case 7:
+					pte->may_read = true;
+					break;
+			}
+
+			dprintk_pte("MMU: Found PTE -> %x %x - %x\n",
+				    pte0, pte1, pp);
+			found = 1;
+			break;
+		}
+	}
+
+	/* Update PTE C and A bits, so the guest's swapper knows we used the
+	   page */
+	if (found) {
+		u32 pte_r = pte1;
+		char __user *addr = (char __user *) (ptegp + (i+1) * sizeof(u32));
+
+		/*
+		 * Use single-byte writes to update the HPTE, to
+		 * conform to what real hardware does.
+		 */
+		if (pte->may_read && !(pte_r & PTEG_FLAG_ACCESSED)) {
+			pte_r |= PTEG_FLAG_ACCESSED;
+			put_user(pte_r >> 8, addr + 2);
+		}
+		if (iswrite && pte->may_write && !(pte_r & PTEG_FLAG_DIRTY)) {
+			pte_r |= PTEG_FLAG_DIRTY;
+			put_user(pte_r, addr + 3);
+		}
+		if (!pte->may_read || (iswrite && !pte->may_write))
+			return -EPERM;
+		return 0;
+	}
+
+no_page_found:
+
+	if (check_debug_ip(vcpu)) {
+		dprintk_pte("KVM MMU: No PTE found (sdr1=0x%llx ptegp=0x%lx)\n",
+			    to_book3s(vcpu)->sdr1, ptegp);
+		for (i=0; i<16; i+=2) {
+			dprintk_pte("   %02d: 0x%x - 0x%x (0x%x)\n",
+				    i, be32_to_cpu(pteg[i]),
+				    be32_to_cpu(pteg[i+1]), ptem);
+		}
+	}
+
+	return -ENOENT;
+}
+
+static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
+				      struct kvmppc_pte *pte, bool data,
+				      bool iswrite)
+{
+	int r;
+	ulong mp_ea = vcpu->arch.magic_page_ea;
+
+	pte->eaddr = eaddr;
+	pte->page_size = MMU_PAGE_4K;
+
+	/* Magic page override */
+	if (unlikely(mp_ea) &&
+	    unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
+	    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
+		pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
+		pte->raddr = vcpu->arch.magic_page_pa | (pte->raddr & 0xfff);
+		pte->raddr &= KVM_PAM;
+		pte->may_execute = true;
+		pte->may_read = true;
+		pte->may_write = true;
+
+		return 0;
+	}
+
+	r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data, iswrite);
+	if (r < 0)
+		r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte,
+						   data, iswrite, true);
+	if (r == -ENOENT)
+		r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte,
+						   data, iswrite, false);
+
+	return r;
+}
+
+
+static u32 kvmppc_mmu_book3s_32_mfsrin(struct kvm_vcpu *vcpu, u32 srnum)
+{
+	return kvmppc_get_sr(vcpu, srnum);
+}
+
+static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
+					ulong value)
+{
+	kvmppc_set_sr(vcpu, srnum, value);
+	kvmppc_mmu_map_segment(vcpu, srnum << SID_SHIFT);
+}
+
+static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large)
+{
+	unsigned long i;
+	struct kvm_vcpu *v;
+
+	/* flush this VA on all cpus */
+	kvm_for_each_vcpu(i, v, vcpu->kvm)
+		kvmppc_mmu_pte_flush(v, ea, 0x0FFFF000);
+}
+
+static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
+					     u64 *vsid)
+{
+	ulong ea = esid << SID_SHIFT;
+	u32 sr;
+	u64 gvsid = esid;
+	u64 msr = kvmppc_get_msr(vcpu);
+
+	if (msr & (MSR_DR|MSR_IR)) {
+		sr = find_sr(vcpu, ea);
+		if (sr_valid(sr))
+			gvsid = sr_vsid(sr);
+	}
+
+	/* In case we only have one of MSR_IR or MSR_DR set, let's put
+	   that in the real-mode context (and hope RM doesn't access
+	   high memory) */
+	switch (msr & (MSR_DR|MSR_IR)) {
+	case 0:
+		*vsid = VSID_REAL | esid;
+		break;
+	case MSR_IR:
+		*vsid = VSID_REAL_IR | gvsid;
+		break;
+	case MSR_DR:
+		*vsid = VSID_REAL_DR | gvsid;
+		break;
+	case MSR_DR|MSR_IR:
+		if (sr_valid(sr))
+			*vsid = sr_vsid(sr);
+		else
+			*vsid = VSID_BAT | gvsid;
+		break;
+	default:
+		BUG();
+	}
+
+	if (msr & MSR_PR)
+		*vsid |= VSID_PR;
+
+	return 0;
+}
+
+static bool kvmppc_mmu_book3s_32_is_dcbz32(struct kvm_vcpu *vcpu)
+{
+	return true;
+}
+
+
+void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
+
+	mmu->mtsrin = kvmppc_mmu_book3s_32_mtsrin;
+	mmu->mfsrin = kvmppc_mmu_book3s_32_mfsrin;
+	mmu->xlate = kvmppc_mmu_book3s_32_xlate;
+	mmu->tlbie = kvmppc_mmu_book3s_32_tlbie;
+	mmu->esid_to_vsid = kvmppc_mmu_book3s_32_esid_to_vsid;
+	mmu->ea_to_vp = kvmppc_mmu_book3s_32_ea_to_vp;
+	mmu->is_dcbz32 = kvmppc_mmu_book3s_32_is_dcbz32;
+
+	mmu->slbmte = NULL;
+	mmu->slbmfee = NULL;
+	mmu->slbmfev = NULL;
+	mmu->slbfee = NULL;
+	mmu->slbie = NULL;
+	mmu->slbia = NULL;
+}
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
new file mode 100644
index 0000000000..4b3a8d80cf
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ *     Alexander Graf <agraf@suse.de>
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/book3s/32/mmu-hash.h>
+#include <asm/machdep.h>
+#include <asm/mmu_context.h>
+#include <asm/hw_irq.h>
+#include "book3s.h"
+
+/* #define DEBUG_MMU */
+/* #define DEBUG_SR */
+
+#ifdef DEBUG_MMU
+#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__)
+#else
+#define dprintk_mmu(a, ...) do { } while(0)
+#endif
+
+#ifdef DEBUG_SR
+#define dprintk_sr(a, ...) printk(KERN_INFO a, __VA_ARGS__)
+#else
+#define dprintk_sr(a, ...) do { } while(0)
+#endif
+
+#if PAGE_SHIFT != 12
+#error Unknown page size
+#endif
+
+#ifdef CONFIG_SMP
+#error XXX need to grab mmu_hash_lock
+#endif
+
+#ifdef CONFIG_PTE_64BIT
+#error Only 32 bit pages are supported for now
+#endif
+
+static ulong htab;
+static u32 htabmask;
+
+void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
+{
+	volatile u32 *pteg;
+
+	/* Remove from host HTAB */
+	pteg = (u32*)pte->slot;
+	pteg[0] = 0;
+
+	/* And make sure it's gone from the TLB too */
+	asm volatile ("sync");
+	asm volatile ("tlbie %0" : : "r" (pte->pte.eaddr) : "memory");
+	asm volatile ("sync");
+	asm volatile ("tlbsync");
+}
+
+/* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using
+ * a hash, so we don't waste cycles on looping */
+static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid)
+{
+	return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK));
+}
+
+
+static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
+{
+	struct kvmppc_sid_map *map;
+	u16 sid_map_mask;
+
+	if (kvmppc_get_msr(vcpu) & MSR_PR)
+		gvsid |= VSID_PR;
+
+	sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
+	map = &to_book3s(vcpu)->sid_map[sid_map_mask];
+	if (map->guest_vsid == gvsid) {
+		dprintk_sr("SR: Searching 0x%llx -> 0x%llx\n",
+			    gvsid, map->host_vsid);
+		return map;
+	}
+
+	map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - sid_map_mask];
+	if (map->guest_vsid == gvsid) {
+		dprintk_sr("SR: Searching 0x%llx -> 0x%llx\n",
+			    gvsid, map->host_vsid);
+		return map;
+	}
+
+	dprintk_sr("SR: Searching 0x%llx -> not found\n", gvsid);
+	return NULL;
+}
+
+static u32 *kvmppc_mmu_get_pteg(struct kvm_vcpu *vcpu, u32 vsid, u32 eaddr,
+				bool primary)
+{
+	u32 page, hash;
+	ulong pteg = htab;
+
+	page = (eaddr & ~ESID_MASK) >> 12;
+
+	hash = ((vsid ^ page) << 6);
+	if (!primary)
+		hash = ~hash;
+
+	hash &= htabmask;
+
+	pteg |= hash;
+
+	dprintk_mmu("htab: %lx | hash: %x | htabmask: %x | pteg: %lx\n",
+		htab, hash, htabmask, pteg);
+
+	return (u32*)pteg;
+}
+
+extern char etext[];
+
+int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
+			bool iswrite)
+{
+	kvm_pfn_t hpaddr;
+	u64 vpn;
+	u64 vsid;
+	struct kvmppc_sid_map *map;
+	volatile u32 *pteg;
+	u32 eaddr = orig_pte->eaddr;
+	u32 pteg0, pteg1;
+	register int rr = 0;
+	bool primary = false;
+	bool evict = false;
+	struct hpte_cache *pte;
+	int r = 0;
+	bool writable;
+
+	/* Get host physical address for gpa */
+	hpaddr = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable);
+	if (is_error_noslot_pfn(hpaddr)) {
+		printk(KERN_INFO "Couldn't get guest page for gpa %lx!\n",
+				 orig_pte->raddr);
+		r = -EINVAL;
+		goto out;
+	}
+	hpaddr <<= PAGE_SHIFT;
+
+	/* and write the mapping ea -> hpa into the pt */
+	vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid);
+	map = find_sid_vsid(vcpu, vsid);
+	if (!map) {
+		kvmppc_mmu_map_segment(vcpu, eaddr);
+		map = find_sid_vsid(vcpu, vsid);
+	}
+	BUG_ON(!map);
+
+	vsid = map->host_vsid;
+	vpn = (vsid << (SID_SHIFT - VPN_SHIFT)) |
+		((eaddr & ~ESID_MASK) >> VPN_SHIFT);
+next_pteg:
+	if (rr == 16) {
+		primary = !primary;
+		evict = true;
+		rr = 0;
+	}
+
+	pteg = kvmppc_mmu_get_pteg(vcpu, vsid, eaddr, primary);
+
+	/* not evicting yet */
+	if (!evict && (pteg[rr] & PTE_V)) {
+		rr += 2;
+		goto next_pteg;
+	}
+
+	dprintk_mmu("KVM: old PTEG: %p (%d)\n", pteg, rr);
+	dprintk_mmu("KVM:   %08x - %08x\n", pteg[0], pteg[1]);
+	dprintk_mmu("KVM:   %08x - %08x\n", pteg[2], pteg[3]);
+	dprintk_mmu("KVM:   %08x - %08x\n", pteg[4], pteg[5]);
+	dprintk_mmu("KVM:   %08x - %08x\n", pteg[6], pteg[7]);
+	dprintk_mmu("KVM:   %08x - %08x\n", pteg[8], pteg[9]);
+	dprintk_mmu("KVM:   %08x - %08x\n", pteg[10], pteg[11]);
+	dprintk_mmu("KVM:   %08x - %08x\n", pteg[12], pteg[13]);
+	dprintk_mmu("KVM:   %08x - %08x\n", pteg[14], pteg[15]);
+
+	pteg0 = ((eaddr & 0x0fffffff) >> 22) | (vsid << 7) | PTE_V |
+		(primary ? 0 : PTE_SEC);
+	pteg1 = hpaddr | PTE_M | PTE_R | PTE_C;
+
+	if (orig_pte->may_write && writable) {
+		pteg1 |= PP_RWRW;
+		mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
+	} else {
+		pteg1 |= PP_RWRX;
+	}
+
+	if (orig_pte->may_execute)
+		kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT);
+
+	local_irq_disable();
+
+	if (pteg[rr]) {
+		pteg[rr] = 0;
+		asm volatile ("sync");
+	}
+	pteg[rr + 1] = pteg1;
+	pteg[rr] = pteg0;
+	asm volatile ("sync");
+
+	local_irq_enable();
+
+	dprintk_mmu("KVM: new PTEG: %p\n", pteg);
+	dprintk_mmu("KVM:   %08x - %08x\n", pteg[0], pteg[1]);
+	dprintk_mmu("KVM:   %08x - %08x\n", pteg[2], pteg[3]);
+	dprintk_mmu("KVM:   %08x - %08x\n", pteg[4], pteg[5]);
+	dprintk_mmu("KVM:   %08x - %08x\n", pteg[6], pteg[7]);
+	dprintk_mmu("KVM:   %08x - %08x\n", pteg[8], pteg[9]);
+	dprintk_mmu("KVM:   %08x - %08x\n", pteg[10], pteg[11]);
+	dprintk_mmu("KVM:   %08x - %08x\n", pteg[12], pteg[13]);
+	dprintk_mmu("KVM:   %08x - %08x\n", pteg[14], pteg[15]);
+
+
+	/* Now tell our Shadow PTE code about the new page */
+
+	pte = kvmppc_mmu_hpte_cache_next(vcpu);
+	if (!pte) {
+		kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
+		r = -EAGAIN;
+		goto out;
+	}
+
+	dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n",
+		    orig_pte->may_write ? 'w' : '-',
+		    orig_pte->may_execute ? 'x' : '-',
+		    orig_pte->eaddr, (ulong)pteg, vpn,
+		    orig_pte->vpage, hpaddr);
+
+	pte->slot = (ulong)&pteg[rr];
+	pte->host_vpn = vpn;
+	pte->pte = *orig_pte;
+	pte->pfn = hpaddr >> PAGE_SHIFT;
+
+	kvmppc_mmu_hpte_cache_map(vcpu, pte);
+
+	kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
+out:
+	return r;
+}
+
+void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
+{
+	kvmppc_mmu_pte_vflush(vcpu, pte->vpage, 0xfffffffffULL);
+}
+
+static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
+{
+	struct kvmppc_sid_map *map;
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+	u16 sid_map_mask;
+	static int backwards_map = 0;
+
+	if (kvmppc_get_msr(vcpu) & MSR_PR)
+		gvsid |= VSID_PR;
+
+	/* We might get collisions that trap in preceding order, so let's
+	   map them differently */
+
+	sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
+	if (backwards_map)
+		sid_map_mask = SID_MAP_MASK - sid_map_mask;
+
+	map = &to_book3s(vcpu)->sid_map[sid_map_mask];
+
+	/* Make sure we're taking the other map next time */
+	backwards_map = !backwards_map;
+
+	/* Uh-oh ... out of mappings. Let's flush! */
+	if (vcpu_book3s->vsid_next >= VSID_POOL_SIZE) {
+		vcpu_book3s->vsid_next = 0;
+		memset(vcpu_book3s->sid_map, 0,
+		       sizeof(struct kvmppc_sid_map) * SID_MAP_NUM);
+		kvmppc_mmu_pte_flush(vcpu, 0, 0);
+		kvmppc_mmu_flush_segments(vcpu);
+	}
+	map->host_vsid = vcpu_book3s->vsid_pool[vcpu_book3s->vsid_next];
+	vcpu_book3s->vsid_next++;
+
+	map->guest_vsid = gvsid;
+	map->valid = true;
+
+	return map;
+}
+
+int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
+{
+	u32 esid = eaddr >> SID_SHIFT;
+	u64 gvsid;
+	u32 sr;
+	struct kvmppc_sid_map *map;
+	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+	int r = 0;
+
+	if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) {
+		/* Invalidate an entry */
+		svcpu->sr[esid] = SR_INVALID;
+		r = -ENOENT;
+		goto out;
+	}
+
+	map = find_sid_vsid(vcpu, gvsid);
+	if (!map)
+		map = create_sid_map(vcpu, gvsid);
+
+	map->guest_esid = esid;
+	sr = map->host_vsid | SR_KP;
+	svcpu->sr[esid] = sr;
+
+	dprintk_sr("MMU: mtsr %d, 0x%x\n", esid, sr);
+
+out:
+	svcpu_put(svcpu);
+	return r;
+}
+
+void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
+{
+	int i;
+	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+
+	dprintk_sr("MMU: flushing all segments (%d)\n", ARRAY_SIZE(svcpu->sr));
+	for (i = 0; i < ARRAY_SIZE(svcpu->sr); i++)
+		svcpu->sr[i] = SR_INVALID;
+
+	svcpu_put(svcpu);
+}
+
+void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	kvmppc_mmu_hpte_destroy(vcpu);
+	preempt_disable();
+	for (i = 0; i < SID_CONTEXTS; i++)
+		__destroy_context(to_book3s(vcpu)->context_id[i]);
+	preempt_enable();
+}
+
+int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+	int err;
+	ulong sdr1;
+	int i;
+	int j;
+
+	for (i = 0; i < SID_CONTEXTS; i++) {
+		err = __init_new_context();
+		if (err < 0)
+			goto init_fail;
+		vcpu3s->context_id[i] = err;
+
+		/* Remember context id for this combination */
+		for (j = 0; j < 16; j++)
+			vcpu3s->vsid_pool[(i * 16) + j] = CTX_TO_VSID(err, j);
+	}
+
+	vcpu3s->vsid_next = 0;
+
+	/* Remember where the HTAB is */
+	asm ( "mfsdr1 %0" : "=r"(sdr1) );
+	htabmask = ((sdr1 & 0x1FF) << 16) | 0xFFC0;
+	htab = (ulong)__va(sdr1 & 0xffff0000);
+
+	kvmppc_mmu_hpte_init(vcpu);
+
+	return 0;
+
+init_fail:
+	for (j = 0; j < i; j++) {
+		if (!vcpu3s->context_id[j])
+			continue;
+
+		__destroy_context(to_book3s(vcpu)->context_id[j]);
+	}
+
+	return -1;
+}
diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S
new file mode 100644
index 0000000000..6cfcd20d46
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_32_sr.S
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+/******************************************************************************
+ *                                                                            *
+ *                               Entry code                                   *
+ *                                                                            *
+ *****************************************************************************/
+
+.macro LOAD_GUEST_SEGMENTS
+
+	/* Required state:
+	 *
+	 * MSR = ~IR|DR
+	 * R1 = host R1
+	 * R2 = host R2
+	 * R3 = shadow vcpu
+	 * all other volatile GPRS = free except R4, R6
+	 * SVCPU[CR]  = guest CR
+	 * SVCPU[XER] = guest XER
+	 * SVCPU[CTR] = guest CTR
+	 * SVCPU[LR]  = guest LR
+	 */
+
+#define XCHG_SR(n)	lwz	r9, (SVCPU_SR+(n*4))(r3);  \
+			mtsr	n, r9
+
+	XCHG_SR(0)
+	XCHG_SR(1)
+	XCHG_SR(2)
+	XCHG_SR(3)
+	XCHG_SR(4)
+	XCHG_SR(5)
+	XCHG_SR(6)
+	XCHG_SR(7)
+	XCHG_SR(8)
+	XCHG_SR(9)
+	XCHG_SR(10)
+	XCHG_SR(11)
+	XCHG_SR(12)
+	XCHG_SR(13)
+	XCHG_SR(14)
+	XCHG_SR(15)
+
+	/* Clear BATs. */
+
+#define KVM_KILL_BAT(n, reg)		\
+        mtspr   SPRN_IBAT##n##U,reg;	\
+        mtspr   SPRN_IBAT##n##L,reg;	\
+        mtspr   SPRN_DBAT##n##U,reg;	\
+        mtspr   SPRN_DBAT##n##L,reg;	\
+
+        li	r9, 0
+	KVM_KILL_BAT(0, r9)
+	KVM_KILL_BAT(1, r9)
+	KVM_KILL_BAT(2, r9)
+	KVM_KILL_BAT(3, r9)
+
+.endm
+
+/******************************************************************************
+ *                                                                            *
+ *                               Exit code                                    *
+ *                                                                            *
+ *****************************************************************************/
+
+.macro LOAD_HOST_SEGMENTS
+
+	/* Register usage at this point:
+	 *
+	 * R1         = host R1
+	 * R2         = host R2
+	 * R12        = exit handler id
+	 * R13        = shadow vcpu - SHADOW_VCPU_OFF
+	 * SVCPU.*    = guest *
+	 * SVCPU[CR]  = guest CR
+	 * SVCPU[XER] = guest XER
+	 * SVCPU[CTR] = guest CTR
+	 * SVCPU[LR]  = guest LR
+	 *
+	 */
+
+	/* Restore BATs */
+
+	/* We only overwrite the upper part, so we only restoree
+	   the upper part. */
+#define KVM_LOAD_BAT(n, reg, RA, RB)	\
+	lwz	RA,(n*16)+0(reg);	\
+	lwz	RB,(n*16)+4(reg);	\
+	mtspr	SPRN_IBAT##n##U,RA;	\
+	mtspr	SPRN_IBAT##n##L,RB;	\
+	lwz	RA,(n*16)+8(reg);	\
+	lwz	RB,(n*16)+12(reg);	\
+	mtspr	SPRN_DBAT##n##U,RA;	\
+	mtspr	SPRN_DBAT##n##L,RB;	\
+
+	lis     r9, BATS@ha
+	addi    r9, r9, BATS@l
+	tophys(r9, r9)
+	KVM_LOAD_BAT(0, r9, r10, r11)
+	KVM_LOAD_BAT(1, r9, r10, r11)
+	KVM_LOAD_BAT(2, r9, r10, r11)
+	KVM_LOAD_BAT(3, r9, r10, r11)
+
+	/* Restore Segment Registers */
+
+	/* 0xc - 0xf */
+
+        li      r0, 4
+        mtctr   r0
+	LOAD_REG_IMMEDIATE(r3, 0x20000000 | (0x111 * 0xc))
+        lis     r4, 0xc000
+3:      mtsrin  r3, r4
+        addi    r3, r3, 0x111     /* increment VSID */
+        addis   r4, r4, 0x1000    /* address of next segment */
+        bdnz    3b
+
+	/* 0x0 - 0xb */
+
+	/* switch_mmu_context() needs paging, let's enable it */
+	mfmsr   r9
+	ori     r11, r9, MSR_DR
+	mtmsr   r11
+	sync
+
+	/* switch_mmu_context() clobbers r12, rescue it */
+	SAVE_GPR(12, r1)
+
+	/* Calling switch_mmu_context(<inv>, current->mm, <inv>); */
+	lwz	r4, MM(r2)
+	bl	switch_mmu_context
+
+	/* restore r12 */
+	REST_GPR(12, r1)
+
+	/* Disable paging again */
+	mfmsr   r9
+	li      r6, MSR_DR
+	andc    r9, r9, r6
+	mtmsr	r9
+	sync
+
+.endm
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
new file mode 100644
index 0000000000..3b361af873
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -0,0 +1,429 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <linux/export.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+#include <asm/code-patching-asm.h>
+#include <asm/exception-64s.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_book3s_asm.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+#include <asm/ultravisor-api.h>
+
+/*
+ * These are branched to from interrupt handlers in exception-64s.S which set
+ * IKVM_REAL or IKVM_VIRT, if HSTATE_IN_GUEST was found to be non-zero.
+ */
+
+/*
+ * This is a hcall, so register convention is as
+ * Documentation/powerpc/papr_hcalls.rst.
+ *
+ * This may also be a syscall from PR-KVM userspace that is to be
+ * reflected to the PR guest kernel, so registers may be set up for
+ * a system call rather than hcall. We don't currently clobber
+ * anything here, but the 0xc00 handler has already clobbered CTR
+ * and CR0, so PR-KVM can not support a guest kernel that preserves
+ * those registers across its system calls.
+ *
+ * The state of registers is as kvmppc_interrupt, except CFAR is not
+ * saved, R13 is not in SCRATCH0, and R10 does not contain the trap.
+ */
+.global	kvmppc_hcall
+.balign IFETCH_ALIGN_BYTES
+kvmppc_hcall:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	lbz	r10,HSTATE_IN_GUEST(r13)
+	cmpwi	r10,KVM_GUEST_MODE_HV_P9
+	beq	kvmppc_p9_exit_hcall
+#endif
+	ld	r10,PACA_EXGEN+EX_R13(r13)
+	SET_SCRATCH0(r10)
+	li	r10,0xc00
+	/* Now we look like kvmppc_interrupt */
+	li	r11,PACA_EXGEN
+	b	.Lgot_save_area
+
+/*
+ * KVM interrupt entry occurs after GEN_INT_ENTRY runs, and follows that
+ * call convention:
+ *
+ * guest R9-R13, CTR, CFAR, PPR saved in PACA EX_xxx save area
+ * guest (H)DAR, (H)DSISR are also in the save area for relevant interrupts
+ * guest R13 also saved in SCRATCH0
+ * R13		= PACA
+ * R11		= (H)SRR0
+ * R12		= (H)SRR1
+ * R9		= guest CR
+ * PPR is set to medium
+ *
+ * With the addition for KVM:
+ * R10		= trap vector
+ */
+.global	kvmppc_interrupt
+.balign IFETCH_ALIGN_BYTES
+kvmppc_interrupt:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	std	r10,HSTATE_SCRATCH0(r13)
+	lbz	r10,HSTATE_IN_GUEST(r13)
+	cmpwi	r10,KVM_GUEST_MODE_HV_P9
+	beq	kvmppc_p9_exit_interrupt
+	ld	r10,HSTATE_SCRATCH0(r13)
+#endif
+	li	r11,PACA_EXGEN
+	cmpdi	r10,0x200
+	bgt+	.Lgot_save_area
+	li	r11,PACA_EXMC
+	beq	.Lgot_save_area
+	li	r11,PACA_EXNMI
+.Lgot_save_area:
+	add	r11,r11,r13
+BEGIN_FTR_SECTION
+	ld	r12,EX_CFAR(r11)
+	std	r12,HSTATE_CFAR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+	ld	r12,EX_CTR(r11)
+	mtctr	r12
+BEGIN_FTR_SECTION
+	ld	r12,EX_PPR(r11)
+	std	r12,HSTATE_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+	ld	r12,EX_R12(r11)
+	std	r12,HSTATE_SCRATCH0(r13)
+	sldi	r12,r9,32
+	or	r12,r12,r10
+	ld	r9,EX_R9(r11)
+	ld	r10,EX_R10(r11)
+	ld	r11,EX_R11(r11)
+
+	/*
+	 * Hcalls and other interrupts come here after normalising register
+	 * contents and save locations:
+	 *
+	 * R12		= (guest CR << 32) | interrupt vector
+	 * R13		= PACA
+	 * guest R12 saved in shadow HSTATE_SCRATCH0
+	 * guest R13 saved in SPRN_SCRATCH0
+	 */
+	std	r9,HSTATE_SCRATCH2(r13)
+	lbz	r9,HSTATE_IN_GUEST(r13)
+	cmpwi	r9,KVM_GUEST_MODE_SKIP
+	beq-	.Lmaybe_skip
+.Lno_skip:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	cmpwi	r9,KVM_GUEST_MODE_GUEST
+	beq	kvmppc_interrupt_pr
+#endif
+	b	kvmppc_interrupt_hv
+#else
+	b	kvmppc_interrupt_pr
+#endif
+
+/*
+ * "Skip" interrupts are part of a trick KVM uses a with hash guests to load
+ * the faulting instruction in guest memory from the hypervisor without
+ * walking page tables.
+ *
+ * When the guest takes a fault that requires the hypervisor to load the
+ * instruction (e.g., MMIO emulation), KVM is running in real-mode with HV=1
+ * and the guest MMU context loaded. It sets KVM_GUEST_MODE_SKIP, and sets
+ * MSR[DR]=1 while leaving MSR[IR]=0, so it continues to fetch HV instructions
+ * but loads and stores will access the guest context. This is used to load
+ * the faulting instruction using the faulting guest effective address.
+ *
+ * However the guest context may not be able to translate, or it may cause a
+ * machine check or other issue, which results in a fault in the host
+ * (even with KVM-HV).
+ *
+ * These faults come here because KVM_GUEST_MODE_SKIP was set, so if they
+ * are (or are likely) caused by that load, the instruction is skipped by
+ * just returning with the PC advanced +4, where it is noticed the load did
+ * not execute and it goes to the slow path which walks the page tables to
+ * read guest memory.
+ */
+.Lmaybe_skip:
+	cmpwi	r12,BOOK3S_INTERRUPT_MACHINE_CHECK
+	beq	1f
+	cmpwi	r12,BOOK3S_INTERRUPT_DATA_STORAGE
+	beq	1f
+	cmpwi	r12,BOOK3S_INTERRUPT_DATA_SEGMENT
+	beq	1f
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	/* HSRR interrupts get 2 added to interrupt number */
+	cmpwi	r12,BOOK3S_INTERRUPT_H_DATA_STORAGE | 0x2
+	beq	2f
+#endif
+	b	.Lno_skip
+1:	mfspr	r9,SPRN_SRR0
+	addi	r9,r9,4
+	mtspr	SPRN_SRR0,r9
+	ld	r12,HSTATE_SCRATCH0(r13)
+	ld	r9,HSTATE_SCRATCH2(r13)
+	GET_SCRATCH0(r13)
+	RFI_TO_KERNEL
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+2:	mfspr	r9,SPRN_HSRR0
+	addi	r9,r9,4
+	mtspr	SPRN_HSRR0,r9
+	ld	r12,HSTATE_SCRATCH0(r13)
+	ld	r9,HSTATE_SCRATCH2(r13)
+	GET_SCRATCH0(r13)
+	HRFI_TO_KERNEL
+#endif
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+
+/* Stack frame offsets for kvmppc_p9_enter_guest */
+#define SFS			(144 + STACK_FRAME_MIN_SIZE)
+#define STACK_SLOT_NVGPRS	(SFS - 144)	/* 18 gprs */
+
+/*
+ * void kvmppc_p9_enter_guest(struct vcpu *vcpu);
+ *
+ * Enter the guest on a ISAv3.0 or later system.
+ */
+.balign	IFETCH_ALIGN_BYTES
+_GLOBAL(kvmppc_p9_enter_guest)
+EXPORT_SYMBOL_GPL(kvmppc_p9_enter_guest)
+	mflr	r0
+	std	r0,PPC_LR_STKOFF(r1)
+	stdu	r1,-SFS(r1)
+
+	std	r1,HSTATE_HOST_R1(r13)
+
+	mfcr	r4
+	stw	r4,SFS+8(r1)
+
+	reg = 14
+	.rept	18
+	std	reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+	reg = reg + 1
+	.endr
+
+	ld	r4,VCPU_LR(r3)
+	mtlr	r4
+	ld	r4,VCPU_CTR(r3)
+	mtctr	r4
+	ld	r4,VCPU_XER(r3)
+	mtspr	SPRN_XER,r4
+
+	ld	r1,VCPU_CR(r3)
+
+BEGIN_FTR_SECTION
+	ld	r4,VCPU_CFAR(r3)
+	mtspr	SPRN_CFAR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+	ld	r4,VCPU_PPR(r3)
+	mtspr	SPRN_PPR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+	reg = 4
+	.rept	28
+	ld	reg,__VCPU_GPR(reg)(r3)
+	reg = reg + 1
+	.endr
+
+	ld	r4,VCPU_KVM(r3)
+	lbz	r4,KVM_SECURE_GUEST(r4)
+	cmpdi	r4,0
+	ld	r4,VCPU_GPR(R4)(r3)
+	bne	.Lret_to_ultra
+
+	mtcr	r1
+
+	ld	r0,VCPU_GPR(R0)(r3)
+	ld	r1,VCPU_GPR(R1)(r3)
+	ld	r2,VCPU_GPR(R2)(r3)
+	ld	r3,VCPU_GPR(R3)(r3)
+
+	HRFI_TO_GUEST
+	b	.
+
+	/*
+	 * Use UV_RETURN ultracall to return control back to the Ultravisor
+	 * after processing an hypercall or interrupt that was forwarded
+	 * (a.k.a. reflected) to the Hypervisor.
+	 *
+	 * All registers have already been reloaded except the ucall requires:
+	 *   R0 = hcall result
+	 *   R2 = SRR1, so UV can detect a synthesized interrupt (if any)
+	 *   R3 = UV_RETURN
+	 */
+.Lret_to_ultra:
+	mtcr	r1
+	ld	r1,VCPU_GPR(R1)(r3)
+
+	ld	r0,VCPU_GPR(R3)(r3)
+	mfspr	r2,SPRN_SRR1
+	LOAD_REG_IMMEDIATE(r3, UV_RETURN)
+	sc	2
+
+/*
+ * kvmppc_p9_exit_hcall and kvmppc_p9_exit_interrupt are branched to from
+ * above if the interrupt was taken for a guest that was entered via
+ * kvmppc_p9_enter_guest().
+ *
+ * The exit code recovers the host stack and vcpu pointer, saves all guest GPRs
+ * and CR, LR, XER as well as guest MSR and NIA into the VCPU, then re-
+ * establishes the host stack and registers to return from the
+ * kvmppc_p9_enter_guest() function, which saves CTR and other guest registers
+ * (SPRs and FP, VEC, etc).
+ */
+.balign	IFETCH_ALIGN_BYTES
+kvmppc_p9_exit_hcall:
+	mfspr	r11,SPRN_SRR0
+	mfspr	r12,SPRN_SRR1
+	li	r10,0xc00
+	std	r10,HSTATE_SCRATCH0(r13)
+
+.balign	IFETCH_ALIGN_BYTES
+kvmppc_p9_exit_interrupt:
+	/*
+	 * If set to KVM_GUEST_MODE_HV_P9 but we're still in the
+	 * hypervisor, that means we can't return from the entry stack.
+	 */
+	rldicl. r10,r12,64-MSR_HV_LG,63
+	bne-	kvmppc_p9_bad_interrupt
+
+	std     r1,HSTATE_SCRATCH1(r13)
+	std     r3,HSTATE_SCRATCH2(r13)
+	ld	r1,HSTATE_HOST_R1(r13)
+	ld	r3,HSTATE_KVM_VCPU(r13)
+
+	std	r9,VCPU_CR(r3)
+
+1:
+	std	r11,VCPU_PC(r3)
+	std	r12,VCPU_MSR(r3)
+
+	reg = 14
+	.rept	18
+	std	reg,__VCPU_GPR(reg)(r3)
+	reg = reg + 1
+	.endr
+
+	/* r1, r3, r9-r13 are saved to vcpu by C code */
+	std	r0,VCPU_GPR(R0)(r3)
+	std	r2,VCPU_GPR(R2)(r3)
+	reg = 4
+	.rept	5
+	std	reg,__VCPU_GPR(reg)(r3)
+	reg = reg + 1
+	.endr
+
+	LOAD_PACA_TOC()
+
+	mflr	r4
+	std	r4,VCPU_LR(r3)
+	mfspr	r4,SPRN_XER
+	std	r4,VCPU_XER(r3)
+
+	reg = 14
+	.rept	18
+	ld	reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+	reg = reg + 1
+	.endr
+
+	lwz	r4,SFS+8(r1)
+	mtcr	r4
+
+	/*
+	 * Flush the link stack here, before executing the first blr on the
+	 * way out of the guest.
+	 *
+	 * The link stack won't match coming out of the guest anyway so the
+	 * only cost is the flush itself. The call clobbers r0.
+	 */
+1:	nop
+	patch_site 1b patch__call_kvm_flush_link_stack_p9
+
+	addi	r1,r1,SFS
+	ld	r0,PPC_LR_STKOFF(r1)
+	mtlr	r0
+	blr
+
+/*
+ * Took an interrupt somewhere right before HRFID to guest, so registers are
+ * in a bad way. Return things hopefully enough to run host virtual code and
+ * run the Linux interrupt handler (SRESET or MCE) to print something useful.
+ *
+ * We could be really clever and save all host registers in known locations
+ * before setting HSTATE_IN_GUEST, then restoring them all here, and setting
+ * return address to a fixup that sets them up again. But that's a lot of
+ * effort for a small bit of code. Lots of other things to do first.
+ */
+kvmppc_p9_bad_interrupt:
+BEGIN_MMU_FTR_SECTION
+	/*
+	 * Hash host doesn't try to recover MMU (requires host SLB reload)
+	 */
+	b	.
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
+	/*
+	 * Clean up guest registers to give host a chance to run.
+	 */
+	li	r10,0
+	mtspr	SPRN_AMR,r10
+	mtspr	SPRN_IAMR,r10
+	mtspr	SPRN_CIABR,r10
+	mtspr	SPRN_DAWRX0,r10
+BEGIN_FTR_SECTION
+	mtspr	SPRN_DAWRX1,r10
+END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
+
+	/*
+	 * Switch to host MMU mode (don't have the real host PID but we aren't
+	 * going back to userspace).
+	 */
+	hwsync
+	isync
+
+	mtspr	SPRN_PID,r10
+
+	ld	r10, HSTATE_KVM_VCPU(r13)
+	ld	r10, VCPU_KVM(r10)
+	lwz	r10, KVM_HOST_LPID(r10)
+	mtspr	SPRN_LPID,r10
+
+	ld	r10, HSTATE_KVM_VCPU(r13)
+	ld	r10, VCPU_KVM(r10)
+	ld	r10, KVM_HOST_LPCR(r10)
+	mtspr	SPRN_LPCR,r10
+
+	isync
+
+	/*
+	 * Set GUEST_MODE_NONE so the handler won't branch to KVM, and clear
+	 * MSR_RI in r12 ([H]SRR1) so the handler won't try to return.
+	 */
+	li	r10,KVM_GUEST_MODE_NONE
+	stb	r10,HSTATE_IN_GUEST(r13)
+	li	r10,MSR_RI
+	andc	r12,r12,r10
+
+	/*
+	 * Go back to interrupt handler. MCE and SRESET have their specific
+	 * PACA save area so they should be used directly. They set up their
+	 * own stack. The other handlers all use EXGEN. They will use the
+	 * guest r1 if it looks like a kernel stack, so just load the
+	 * emergency stack and go to program check for all other interrupts.
+	 */
+	ld	r10,HSTATE_SCRATCH0(r13)
+	cmpwi	r10,BOOK3S_INTERRUPT_MACHINE_CHECK
+	beq	.Lcall_machine_check_common
+
+	cmpwi	r10,BOOK3S_INTERRUPT_SYSTEM_RESET
+	beq	.Lcall_system_reset_common
+
+	b	.
+
+.Lcall_machine_check_common:
+	b	machine_check_common
+
+.Lcall_system_reset_common:
+	b	system_reset_common
+#endif
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
new file mode 100644
index 0000000000..61290282fd
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -0,0 +1,670 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/book3s/64/mmu-hash.h>
+
+/* #define DEBUG_MMU */
+
+#ifdef DEBUG_MMU
+#define dprintk(X...) printk(KERN_INFO X)
+#else
+#define dprintk(X...) do { } while(0)
+#endif
+
+static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
+				struct kvm_vcpu *vcpu,
+				gva_t eaddr)
+{
+	int i;
+	u64 esid = GET_ESID(eaddr);
+	u64 esid_1t = GET_ESID_1T(eaddr);
+
+	for (i = 0; i < vcpu->arch.slb_nr; i++) {
+		u64 cmp_esid = esid;
+
+		if (!vcpu->arch.slb[i].valid)
+			continue;
+
+		if (vcpu->arch.slb[i].tb)
+			cmp_esid = esid_1t;
+
+		if (vcpu->arch.slb[i].esid == cmp_esid)
+			return &vcpu->arch.slb[i];
+	}
+
+	dprintk("KVM: No SLB entry found for 0x%lx [%llx | %llx]\n",
+		eaddr, esid, esid_1t);
+	for (i = 0; i < vcpu->arch.slb_nr; i++) {
+	    if (vcpu->arch.slb[i].vsid)
+		dprintk("  %d: %c%c%c %llx %llx\n", i,
+			vcpu->arch.slb[i].valid ? 'v' : ' ',
+			vcpu->arch.slb[i].large ? 'l' : ' ',
+			vcpu->arch.slb[i].tb    ? 't' : ' ',
+			vcpu->arch.slb[i].esid,
+			vcpu->arch.slb[i].vsid);
+	}
+
+	return NULL;
+}
+
+static int kvmppc_slb_sid_shift(struct kvmppc_slb *slbe)
+{
+	return slbe->tb ? SID_SHIFT_1T : SID_SHIFT;
+}
+
+static u64 kvmppc_slb_offset_mask(struct kvmppc_slb *slbe)
+{
+	return (1ul << kvmppc_slb_sid_shift(slbe)) - 1;
+}
+
+static u64 kvmppc_slb_calc_vpn(struct kvmppc_slb *slb, gva_t eaddr)
+{
+	eaddr &= kvmppc_slb_offset_mask(slb);
+
+	return (eaddr >> VPN_SHIFT) |
+		((slb->vsid) << (kvmppc_slb_sid_shift(slb) - VPN_SHIFT));
+}
+
+static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
+					 bool data)
+{
+	struct kvmppc_slb *slb;
+
+	slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr);
+	if (!slb)
+		return 0;
+
+	return kvmppc_slb_calc_vpn(slb, eaddr);
+}
+
+static int mmu_pagesize(int mmu_pg)
+{
+	switch (mmu_pg) {
+	case MMU_PAGE_64K:
+		return 16;
+	case MMU_PAGE_16M:
+		return 24;
+	}
+	return 12;
+}
+
+static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
+{
+	return mmu_pagesize(slbe->base_page_size);
+}
+
+static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
+{
+	int p = kvmppc_mmu_book3s_64_get_pagesize(slbe);
+
+	return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p);
+}
+
+static hva_t kvmppc_mmu_book3s_64_get_pteg(struct kvm_vcpu *vcpu,
+				struct kvmppc_slb *slbe, gva_t eaddr,
+				bool second)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+	u64 hash, pteg, htabsize;
+	u32 ssize;
+	hva_t r;
+	u64 vpn;
+
+	htabsize = ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1);
+
+	vpn = kvmppc_slb_calc_vpn(slbe, eaddr);
+	ssize = slbe->tb ? MMU_SEGSIZE_1T : MMU_SEGSIZE_256M;
+	hash = hpt_hash(vpn, kvmppc_mmu_book3s_64_get_pagesize(slbe), ssize);
+	if (second)
+		hash = ~hash;
+	hash &= ((1ULL << 39ULL) - 1ULL);
+	hash &= htabsize;
+	hash <<= 7ULL;
+
+	pteg = vcpu_book3s->sdr1 & 0xfffffffffffc0000ULL;
+	pteg |= hash;
+
+	dprintk("MMU: page=0x%x sdr1=0x%llx pteg=0x%llx vsid=0x%llx\n",
+		page, vcpu_book3s->sdr1, pteg, slbe->vsid);
+
+	/* When running a PAPR guest, SDR1 contains a HVA address instead
+           of a GPA */
+	if (vcpu->arch.papr_enabled)
+		r = pteg;
+	else
+		r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT);
+
+	if (kvm_is_error_hva(r))
+		return r;
+	return r | (pteg & ~PAGE_MASK);
+}
+
+static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr)
+{
+	int p = kvmppc_mmu_book3s_64_get_pagesize(slbe);
+	u64 avpn;
+
+	avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
+	avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p);
+
+	if (p < 16)
+		avpn >>= ((80 - p) - 56) - 8;	/* 16 - p */
+	else
+		avpn <<= p - 16;
+
+	return avpn;
+}
+
+/*
+ * Return page size encoded in the second word of a HPTE, or
+ * -1 for an invalid encoding for the base page size indicated by
+ * the SLB entry.  This doesn't handle mixed pagesize segments yet.
+ */
+static int decode_pagesize(struct kvmppc_slb *slbe, u64 r)
+{
+	switch (slbe->base_page_size) {
+	case MMU_PAGE_64K:
+		if ((r & 0xf000) == 0x1000)
+			return MMU_PAGE_64K;
+		break;
+	case MMU_PAGE_16M:
+		if ((r & 0xff000) == 0)
+			return MMU_PAGE_16M;
+		break;
+	}
+	return -1;
+}
+
+static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
+				      struct kvmppc_pte *gpte, bool data,
+				      bool iswrite)
+{
+	struct kvmppc_slb *slbe;
+	hva_t ptegp;
+	u64 pteg[16];
+	u64 avpn = 0;
+	u64 r;
+	u64 v_val, v_mask;
+	u64 eaddr_mask;
+	int i;
+	u8 pp, key = 0;
+	bool found = false;
+	bool second = false;
+	int pgsize;
+	ulong mp_ea = vcpu->arch.magic_page_ea;
+
+	/* Magic page override */
+	if (unlikely(mp_ea) &&
+	    unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
+	    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
+		gpte->eaddr = eaddr;
+		gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
+		gpte->raddr = vcpu->arch.magic_page_pa | (gpte->raddr & 0xfff);
+		gpte->raddr &= KVM_PAM;
+		gpte->may_execute = true;
+		gpte->may_read = true;
+		gpte->may_write = true;
+		gpte->page_size = MMU_PAGE_4K;
+		gpte->wimg = HPTE_R_M;
+
+		return 0;
+	}
+
+	slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr);
+	if (!slbe)
+		goto no_seg_found;
+
+	avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
+	v_val = avpn & HPTE_V_AVPN;
+
+	if (slbe->tb)
+		v_val |= SLB_VSID_B_1T;
+	if (slbe->large)
+		v_val |= HPTE_V_LARGE;
+	v_val |= HPTE_V_VALID;
+
+	v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID |
+		HPTE_V_SECONDARY;
+
+	pgsize = slbe->large ? MMU_PAGE_16M : MMU_PAGE_4K;
+
+	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
+
+do_second:
+	ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu, slbe, eaddr, second);
+	if (kvm_is_error_hva(ptegp))
+		goto no_page_found;
+
+	if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) {
+		printk_ratelimited(KERN_ERR
+			"KVM: Can't copy data from 0x%lx!\n", ptegp);
+		goto no_page_found;
+	}
+
+	if ((kvmppc_get_msr(vcpu) & MSR_PR) && slbe->Kp)
+		key = 4;
+	else if (!(kvmppc_get_msr(vcpu) & MSR_PR) && slbe->Ks)
+		key = 4;
+
+	for (i=0; i<16; i+=2) {
+		u64 pte0 = be64_to_cpu(pteg[i]);
+		u64 pte1 = be64_to_cpu(pteg[i + 1]);
+
+		/* Check all relevant fields of 1st dword */
+		if ((pte0 & v_mask) == v_val) {
+			/* If large page bit is set, check pgsize encoding */
+			if (slbe->large &&
+			    (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
+				pgsize = decode_pagesize(slbe, pte1);
+				if (pgsize < 0)
+					continue;
+			}
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		if (second)
+			goto no_page_found;
+		v_val |= HPTE_V_SECONDARY;
+		second = true;
+		goto do_second;
+	}
+
+	r = be64_to_cpu(pteg[i+1]);
+	pp = (r & HPTE_R_PP) | key;
+	if (r & HPTE_R_PP0)
+		pp |= 8;
+
+	gpte->eaddr = eaddr;
+	gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
+
+	eaddr_mask = (1ull << mmu_pagesize(pgsize)) - 1;
+	gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask);
+	gpte->page_size = pgsize;
+	gpte->may_execute = ((r & HPTE_R_N) ? false : true);
+	if (unlikely(vcpu->arch.disable_kernel_nx) &&
+	    !(kvmppc_get_msr(vcpu) & MSR_PR))
+		gpte->may_execute = true;
+	gpte->may_read = false;
+	gpte->may_write = false;
+	gpte->wimg = r & HPTE_R_WIMG;
+
+	switch (pp) {
+	case 0:
+	case 1:
+	case 2:
+	case 6:
+		gpte->may_write = true;
+		fallthrough;
+	case 3:
+	case 5:
+	case 7:
+	case 10:
+		gpte->may_read = true;
+		break;
+	}
+
+	dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
+		"-> 0x%lx\n",
+		eaddr, avpn, gpte->vpage, gpte->raddr);
+
+	/* Update PTE R and C bits, so the guest's swapper knows we used the
+	 * page */
+	if (gpte->may_read && !(r & HPTE_R_R)) {
+		/*
+		 * Set the accessed flag.
+		 * We have to write this back with a single byte write
+		 * because another vcpu may be accessing this on
+		 * non-PAPR platforms such as mac99, and this is
+		 * what real hardware does.
+		 */
+                char __user *addr = (char __user *) (ptegp + (i + 1) * sizeof(u64));
+		r |= HPTE_R_R;
+		put_user(r >> 8, addr + 6);
+	}
+	if (iswrite && gpte->may_write && !(r & HPTE_R_C)) {
+		/* Set the dirty flag */
+		/* Use a single byte write */
+                char __user *addr = (char __user *) (ptegp + (i + 1) * sizeof(u64));
+		r |= HPTE_R_C;
+		put_user(r, addr + 7);
+	}
+
+	mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
+
+	if (!gpte->may_read || (iswrite && !gpte->may_write))
+		return -EPERM;
+	return 0;
+
+no_page_found:
+	mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
+	return -ENOENT;
+
+no_seg_found:
+	dprintk("KVM MMU: Trigger segment fault\n");
+	return -EINVAL;
+}
+
+static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
+{
+	u64 esid, esid_1t;
+	int slb_nr;
+	struct kvmppc_slb *slbe;
+
+	dprintk("KVM MMU: slbmte(0x%llx, 0x%llx)\n", rs, rb);
+
+	esid = GET_ESID(rb);
+	esid_1t = GET_ESID_1T(rb);
+	slb_nr = rb & 0xfff;
+
+	if (slb_nr > vcpu->arch.slb_nr)
+		return;
+
+	slbe = &vcpu->arch.slb[slb_nr];
+
+	slbe->large = (rs & SLB_VSID_L) ? 1 : 0;
+	slbe->tb    = (rs & SLB_VSID_B_1T) ? 1 : 0;
+	slbe->esid  = slbe->tb ? esid_1t : esid;
+	slbe->vsid  = (rs & ~SLB_VSID_B) >> (kvmppc_slb_sid_shift(slbe) - 16);
+	slbe->valid = (rb & SLB_ESID_V) ? 1 : 0;
+	slbe->Ks    = (rs & SLB_VSID_KS) ? 1 : 0;
+	slbe->Kp    = (rs & SLB_VSID_KP) ? 1 : 0;
+	slbe->nx    = (rs & SLB_VSID_N) ? 1 : 0;
+	slbe->class = (rs & SLB_VSID_C) ? 1 : 0;
+
+	slbe->base_page_size = MMU_PAGE_4K;
+	if (slbe->large) {
+		if (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE) {
+			switch (rs & SLB_VSID_LP) {
+			case SLB_VSID_LP_00:
+				slbe->base_page_size = MMU_PAGE_16M;
+				break;
+			case SLB_VSID_LP_01:
+				slbe->base_page_size = MMU_PAGE_64K;
+				break;
+			}
+		} else
+			slbe->base_page_size = MMU_PAGE_16M;
+	}
+
+	slbe->orige = rb & (ESID_MASK | SLB_ESID_V);
+	slbe->origv = rs;
+
+	/* Map the new segment */
+	kvmppc_mmu_map_segment(vcpu, esid << SID_SHIFT);
+}
+
+static int kvmppc_mmu_book3s_64_slbfee(struct kvm_vcpu *vcpu, gva_t eaddr,
+				       ulong *ret_slb)
+{
+	struct kvmppc_slb *slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr);
+
+	if (slbe) {
+		*ret_slb = slbe->origv;
+		return 0;
+	}
+	*ret_slb = 0;
+	return -ENOENT;
+}
+
+static u64 kvmppc_mmu_book3s_64_slbmfee(struct kvm_vcpu *vcpu, u64 slb_nr)
+{
+	struct kvmppc_slb *slbe;
+
+	if (slb_nr > vcpu->arch.slb_nr)
+		return 0;
+
+	slbe = &vcpu->arch.slb[slb_nr];
+
+	return slbe->orige;
+}
+
+static u64 kvmppc_mmu_book3s_64_slbmfev(struct kvm_vcpu *vcpu, u64 slb_nr)
+{
+	struct kvmppc_slb *slbe;
+
+	if (slb_nr > vcpu->arch.slb_nr)
+		return 0;
+
+	slbe = &vcpu->arch.slb[slb_nr];
+
+	return slbe->origv;
+}
+
+static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea)
+{
+	struct kvmppc_slb *slbe;
+	u64 seg_size;
+
+	dprintk("KVM MMU: slbie(0x%llx)\n", ea);
+
+	slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
+
+	if (!slbe)
+		return;
+
+	dprintk("KVM MMU: slbie(0x%llx, 0x%llx)\n", ea, slbe->esid);
+
+	slbe->valid = false;
+	slbe->orige = 0;
+	slbe->origv = 0;
+
+	seg_size = 1ull << kvmppc_slb_sid_shift(slbe);
+	kvmppc_mmu_flush_segment(vcpu, ea & ~(seg_size - 1), seg_size);
+}
+
+static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	dprintk("KVM MMU: slbia()\n");
+
+	for (i = 1; i < vcpu->arch.slb_nr; i++) {
+		vcpu->arch.slb[i].valid = false;
+		vcpu->arch.slb[i].orige = 0;
+		vcpu->arch.slb[i].origv = 0;
+	}
+
+	if (kvmppc_get_msr(vcpu) & MSR_IR) {
+		kvmppc_mmu_flush_segments(vcpu);
+		kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+	}
+}
+
+static void kvmppc_mmu_book3s_64_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
+					ulong value)
+{
+	u64 rb = 0, rs = 0;
+
+	/*
+	 * According to Book3 2.01 mtsrin is implemented as:
+	 *
+	 * The SLB entry specified by (RB)32:35 is loaded from register
+	 * RS, as follows.
+	 *
+	 * SLBE Bit	Source			SLB Field
+	 *
+	 * 0:31		0x0000_0000		ESID-0:31
+	 * 32:35	(RB)32:35		ESID-32:35
+	 * 36		0b1			V
+	 * 37:61	0x00_0000|| 0b0		VSID-0:24
+	 * 62:88	(RS)37:63		VSID-25:51
+	 * 89:91	(RS)33:35		Ks Kp N
+	 * 92		(RS)36			L ((RS)36 must be 0b0)
+	 * 93		0b0			C
+	 */
+
+	dprintk("KVM MMU: mtsrin(0x%x, 0x%lx)\n", srnum, value);
+
+	/* ESID = srnum */
+	rb |= (srnum & 0xf) << 28;
+	/* Set the valid bit */
+	rb |= 1 << 27;
+	/* Index = ESID */
+	rb |= srnum;
+
+	/* VSID = VSID */
+	rs |= (value & 0xfffffff) << 12;
+	/* flags = flags */
+	rs |= ((value >> 28) & 0x7) << 9;
+
+	kvmppc_mmu_book3s_64_slbmte(vcpu, rs, rb);
+}
+
+static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
+				       bool large)
+{
+	u64 mask = 0xFFFFFFFFFULL;
+	unsigned long i;
+	struct kvm_vcpu *v;
+
+	dprintk("KVM MMU: tlbie(0x%lx)\n", va);
+
+	/*
+	 * The tlbie instruction changed behaviour starting with
+	 * POWER6.  POWER6 and later don't have the large page flag
+	 * in the instruction but in the RB value, along with bits
+	 * indicating page and segment sizes.
+	 */
+	if (vcpu->arch.hflags & BOOK3S_HFLAG_NEW_TLBIE) {
+		/* POWER6 or later */
+		if (va & 1) {		/* L bit */
+			if ((va & 0xf000) == 0x1000)
+				mask = 0xFFFFFFFF0ULL;	/* 64k page */
+			else
+				mask = 0xFFFFFF000ULL;	/* 16M page */
+		}
+	} else {
+		/* older processors, e.g. PPC970 */
+		if (large)
+			mask = 0xFFFFFF000ULL;
+	}
+	/* flush this VA on all vcpus */
+	kvm_for_each_vcpu(i, v, vcpu->kvm)
+		kvmppc_mmu_pte_vflush(v, va >> 12, mask);
+}
+
+#ifdef CONFIG_PPC_64K_PAGES
+static int segment_contains_magic_page(struct kvm_vcpu *vcpu, ulong esid)
+{
+	ulong mp_ea = vcpu->arch.magic_page_ea;
+
+	return mp_ea && !(kvmppc_get_msr(vcpu) & MSR_PR) &&
+		(mp_ea >> SID_SHIFT) == esid;
+}
+#endif
+
+static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
+					     u64 *vsid)
+{
+	ulong ea = esid << SID_SHIFT;
+	struct kvmppc_slb *slb;
+	u64 gvsid = esid;
+	ulong mp_ea = vcpu->arch.magic_page_ea;
+	int pagesize = MMU_PAGE_64K;
+	u64 msr = kvmppc_get_msr(vcpu);
+
+	if (msr & (MSR_DR|MSR_IR)) {
+		slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
+		if (slb) {
+			gvsid = slb->vsid;
+			pagesize = slb->base_page_size;
+			if (slb->tb) {
+				gvsid <<= SID_SHIFT_1T - SID_SHIFT;
+				gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1);
+				gvsid |= VSID_1T;
+			}
+		}
+	}
+
+	switch (msr & (MSR_DR|MSR_IR)) {
+	case 0:
+		gvsid = VSID_REAL | esid;
+		break;
+	case MSR_IR:
+		gvsid |= VSID_REAL_IR;
+		break;
+	case MSR_DR:
+		gvsid |= VSID_REAL_DR;
+		break;
+	case MSR_DR|MSR_IR:
+		if (!slb)
+			goto no_slb;
+
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+#ifdef CONFIG_PPC_64K_PAGES
+	/*
+	 * Mark this as a 64k segment if the host is using
+	 * 64k pages, the host MMU supports 64k pages and
+	 * the guest segment page size is >= 64k,
+	 * but not if this segment contains the magic page.
+	 */
+	if (pagesize >= MMU_PAGE_64K &&
+	    mmu_psize_defs[MMU_PAGE_64K].shift &&
+	    !segment_contains_magic_page(vcpu, esid))
+		gvsid |= VSID_64K;
+#endif
+
+	if (kvmppc_get_msr(vcpu) & MSR_PR)
+		gvsid |= VSID_PR;
+
+	*vsid = gvsid;
+	return 0;
+
+no_slb:
+	/* Catch magic page case */
+	if (unlikely(mp_ea) &&
+	    unlikely(esid == (mp_ea >> SID_SHIFT)) &&
+	    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
+		*vsid = VSID_REAL | esid;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static bool kvmppc_mmu_book3s_64_is_dcbz32(struct kvm_vcpu *vcpu)
+{
+	return (to_book3s(vcpu)->hid[5] & 0x80);
+}
+
+void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
+
+	mmu->mfsrin = NULL;
+	mmu->mtsrin = kvmppc_mmu_book3s_64_mtsrin;
+	mmu->slbmte = kvmppc_mmu_book3s_64_slbmte;
+	mmu->slbmfee = kvmppc_mmu_book3s_64_slbmfee;
+	mmu->slbmfev = kvmppc_mmu_book3s_64_slbmfev;
+	mmu->slbfee = kvmppc_mmu_book3s_64_slbfee;
+	mmu->slbie = kvmppc_mmu_book3s_64_slbie;
+	mmu->slbia = kvmppc_mmu_book3s_64_slbia;
+	mmu->xlate = kvmppc_mmu_book3s_64_xlate;
+	mmu->tlbie = kvmppc_mmu_book3s_64_tlbie;
+	mmu->esid_to_vsid = kvmppc_mmu_book3s_64_esid_to_vsid;
+	mmu->ea_to_vp = kvmppc_mmu_book3s_64_ea_to_vp;
+	mmu->is_dcbz32 = kvmppc_mmu_book3s_64_is_dcbz32;
+
+	vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
+}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
new file mode 100644
index 0000000000..bc6a381b53
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -0,0 +1,407 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2009 SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ *     Alexander Graf <agraf@suse.de>
+ *     Kevin Wolf <mail@kevin-wolf.de>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/pkeys.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/book3s/64/mmu-hash.h>
+#include <asm/machdep.h>
+#include <asm/mmu_context.h>
+#include <asm/hw_irq.h>
+#include "trace_pr.h"
+#include "book3s.h"
+
+#define PTE_SIZE 12
+
+void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
+{
+	mmu_hash_ops.hpte_invalidate(pte->slot, pte->host_vpn,
+				     pte->pagesize, pte->pagesize,
+				     MMU_SEGSIZE_256M, false);
+}
+
+/* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using
+ * a hash, so we don't waste cycles on looping */
+static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid)
+{
+	return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK));
+}
+
+
+static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
+{
+	struct kvmppc_sid_map *map;
+	u16 sid_map_mask;
+
+	if (kvmppc_get_msr(vcpu) & MSR_PR)
+		gvsid |= VSID_PR;
+
+	sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
+	map = &to_book3s(vcpu)->sid_map[sid_map_mask];
+	if (map->valid && (map->guest_vsid == gvsid)) {
+		trace_kvm_book3s_slb_found(gvsid, map->host_vsid);
+		return map;
+	}
+
+	map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - sid_map_mask];
+	if (map->valid && (map->guest_vsid == gvsid)) {
+		trace_kvm_book3s_slb_found(gvsid, map->host_vsid);
+		return map;
+	}
+
+	trace_kvm_book3s_slb_fail(sid_map_mask, gvsid);
+	return NULL;
+}
+
+int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
+			bool iswrite)
+{
+	unsigned long vpn;
+	kvm_pfn_t hpaddr;
+	ulong hash, hpteg;
+	u64 vsid;
+	int ret;
+	int rflags = 0x192;
+	int vflags = 0;
+	int attempt = 0;
+	struct kvmppc_sid_map *map;
+	int r = 0;
+	int hpsize = MMU_PAGE_4K;
+	bool writable;
+	unsigned long mmu_seq;
+	struct kvm *kvm = vcpu->kvm;
+	struct hpte_cache *cpte;
+	unsigned long gfn = orig_pte->raddr >> PAGE_SHIFT;
+	unsigned long pfn;
+
+	/* used to check for invalidations in progress */
+	mmu_seq = kvm->mmu_invalidate_seq;
+	smp_rmb();
+
+	/* Get host physical address for gpa */
+	pfn = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable);
+	if (is_error_noslot_pfn(pfn)) {
+		printk(KERN_INFO "Couldn't get guest page for gpa %lx!\n",
+		       orig_pte->raddr);
+		r = -EINVAL;
+		goto out;
+	}
+	hpaddr = pfn << PAGE_SHIFT;
+
+	/* and write the mapping ea -> hpa into the pt */
+	vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid);
+	map = find_sid_vsid(vcpu, vsid);
+	if (!map) {
+		ret = kvmppc_mmu_map_segment(vcpu, orig_pte->eaddr);
+		WARN_ON(ret < 0);
+		map = find_sid_vsid(vcpu, vsid);
+	}
+	if (!map) {
+		printk(KERN_ERR "KVM: Segment map for 0x%llx (0x%lx) failed\n",
+				vsid, orig_pte->eaddr);
+		WARN_ON(true);
+		r = -EINVAL;
+		goto out;
+	}
+
+	vpn = hpt_vpn(orig_pte->eaddr, map->host_vsid, MMU_SEGSIZE_256M);
+
+	kvm_set_pfn_accessed(pfn);
+	if (!orig_pte->may_write || !writable)
+		rflags |= PP_RXRX;
+	else {
+		mark_page_dirty(vcpu->kvm, gfn);
+		kvm_set_pfn_dirty(pfn);
+	}
+
+	if (!orig_pte->may_execute)
+		rflags |= HPTE_R_N;
+	else
+		kvmppc_mmu_flush_icache(pfn);
+
+	rflags |= pte_to_hpte_pkey_bits(0, HPTE_USE_KERNEL_KEY);
+	rflags = (rflags & ~HPTE_R_WIMG) | orig_pte->wimg;
+
+	/*
+	 * Use 64K pages if possible; otherwise, on 64K page kernels,
+	 * we need to transfer 4 more bits from guest real to host real addr.
+	 */
+	if (vsid & VSID_64K)
+		hpsize = MMU_PAGE_64K;
+	else
+		hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
+
+	hash = hpt_hash(vpn, mmu_psize_defs[hpsize].shift, MMU_SEGSIZE_256M);
+
+	cpte = kvmppc_mmu_hpte_cache_next(vcpu);
+
+	spin_lock(&kvm->mmu_lock);
+	if (!cpte || mmu_invalidate_retry(kvm, mmu_seq)) {
+		r = -EAGAIN;
+		goto out_unlock;
+	}
+
+map_again:
+	hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+
+	/* In case we tried normal mapping already, let's nuke old entries */
+	if (attempt > 1)
+		if (mmu_hash_ops.hpte_remove(hpteg) < 0) {
+			r = -1;
+			goto out_unlock;
+		}
+
+	ret = mmu_hash_ops.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
+				       hpsize, hpsize, MMU_SEGSIZE_256M);
+
+	if (ret == -1) {
+		/* If we couldn't map a primary PTE, try a secondary */
+		hash = ~hash;
+		vflags ^= HPTE_V_SECONDARY;
+		attempt++;
+		goto map_again;
+	} else if (ret < 0) {
+		r = -EIO;
+		goto out_unlock;
+	} else {
+		trace_kvm_book3s_64_mmu_map(rflags, hpteg,
+					    vpn, hpaddr, orig_pte);
+
+		/*
+		 * The mmu_hash_ops code may give us a secondary entry even
+		 * though we asked for a primary. Fix up.
+		 */
+		if ((ret & _PTEIDX_SECONDARY) && !(vflags & HPTE_V_SECONDARY)) {
+			hash = ~hash;
+			hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+		}
+
+		cpte->slot = hpteg + (ret & 7);
+		cpte->host_vpn = vpn;
+		cpte->pte = *orig_pte;
+		cpte->pfn = pfn;
+		cpte->pagesize = hpsize;
+
+		kvmppc_mmu_hpte_cache_map(vcpu, cpte);
+		cpte = NULL;
+	}
+
+out_unlock:
+	spin_unlock(&kvm->mmu_lock);
+	kvm_release_pfn_clean(pfn);
+	if (cpte)
+		kvmppc_mmu_hpte_cache_free(cpte);
+
+out:
+	return r;
+}
+
+void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
+{
+	u64 mask = 0xfffffffffULL;
+	u64 vsid;
+
+	vcpu->arch.mmu.esid_to_vsid(vcpu, pte->eaddr >> SID_SHIFT, &vsid);
+	if (vsid & VSID_64K)
+		mask = 0xffffffff0ULL;
+	kvmppc_mmu_pte_vflush(vcpu, pte->vpage, mask);
+}
+
+static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
+{
+	unsigned long vsid_bits = VSID_BITS_65_256M;
+	struct kvmppc_sid_map *map;
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+	u16 sid_map_mask;
+	static int backwards_map;
+
+	if (kvmppc_get_msr(vcpu) & MSR_PR)
+		gvsid |= VSID_PR;
+
+	/* We might get collisions that trap in preceding order, so let's
+	   map them differently */
+
+	sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
+	if (backwards_map)
+		sid_map_mask = SID_MAP_MASK - sid_map_mask;
+
+	map = &to_book3s(vcpu)->sid_map[sid_map_mask];
+
+	/* Make sure we're taking the other map next time */
+	backwards_map = !backwards_map;
+
+	/* Uh-oh ... out of mappings. Let's flush! */
+	if (vcpu_book3s->proto_vsid_next == vcpu_book3s->proto_vsid_max) {
+		vcpu_book3s->proto_vsid_next = vcpu_book3s->proto_vsid_first;
+		memset(vcpu_book3s->sid_map, 0,
+		       sizeof(struct kvmppc_sid_map) * SID_MAP_NUM);
+		kvmppc_mmu_pte_flush(vcpu, 0, 0);
+		kvmppc_mmu_flush_segments(vcpu);
+	}
+
+	if (mmu_has_feature(MMU_FTR_68_BIT_VA))
+		vsid_bits = VSID_BITS_256M;
+
+	map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++,
+				       VSID_MULTIPLIER_256M, vsid_bits);
+
+	map->guest_vsid = gvsid;
+	map->valid = true;
+
+	trace_kvm_book3s_slb_map(sid_map_mask, gvsid, map->host_vsid);
+
+	return map;
+}
+
+static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
+{
+	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+	int i;
+	int max_slb_size = 64;
+	int found_inval = -1;
+	int r;
+
+	/* Are we overwriting? */
+	for (i = 0; i < svcpu->slb_max; i++) {
+		if (!(svcpu->slb[i].esid & SLB_ESID_V))
+			found_inval = i;
+		else if ((svcpu->slb[i].esid & ESID_MASK) == esid) {
+			r = i;
+			goto out;
+		}
+	}
+
+	/* Found a spare entry that was invalidated before */
+	if (found_inval >= 0) {
+		r = found_inval;
+		goto out;
+	}
+
+	/* No spare invalid entry, so create one */
+
+	if (mmu_slb_size < 64)
+		max_slb_size = mmu_slb_size;
+
+	/* Overflowing -> purge */
+	if ((svcpu->slb_max) == max_slb_size)
+		kvmppc_mmu_flush_segments(vcpu);
+
+	r = svcpu->slb_max;
+	svcpu->slb_max++;
+
+out:
+	svcpu_put(svcpu);
+	return r;
+}
+
+int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
+{
+	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+	u64 esid = eaddr >> SID_SHIFT;
+	u64 slb_esid = (eaddr & ESID_MASK) | SLB_ESID_V;
+	u64 slb_vsid = SLB_VSID_USER;
+	u64 gvsid;
+	int slb_index;
+	struct kvmppc_sid_map *map;
+	int r = 0;
+
+	slb_index = kvmppc_mmu_next_segment(vcpu, eaddr & ESID_MASK);
+
+	if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) {
+		/* Invalidate an entry */
+		svcpu->slb[slb_index].esid = 0;
+		r = -ENOENT;
+		goto out;
+	}
+
+	map = find_sid_vsid(vcpu, gvsid);
+	if (!map)
+		map = create_sid_map(vcpu, gvsid);
+
+	map->guest_esid = esid;
+
+	slb_vsid |= (map->host_vsid << 12);
+	slb_vsid &= ~SLB_VSID_KP;
+	slb_esid |= slb_index;
+
+#ifdef CONFIG_PPC_64K_PAGES
+	/* Set host segment base page size to 64K if possible */
+	if (gvsid & VSID_64K)
+		slb_vsid |= mmu_psize_defs[MMU_PAGE_64K].sllp;
+#endif
+
+	svcpu->slb[slb_index].esid = slb_esid;
+	svcpu->slb[slb_index].vsid = slb_vsid;
+
+	trace_kvm_book3s_slbmte(slb_vsid, slb_esid);
+
+out:
+	svcpu_put(svcpu);
+	return r;
+}
+
+void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong ea, ulong seg_size)
+{
+	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+	ulong seg_mask = -seg_size;
+	int i;
+
+	for (i = 0; i < svcpu->slb_max; i++) {
+		if ((svcpu->slb[i].esid & SLB_ESID_V) &&
+		    (svcpu->slb[i].esid & seg_mask) == ea) {
+			/* Invalidate this entry */
+			svcpu->slb[i].esid = 0;
+		}
+	}
+
+	svcpu_put(svcpu);
+}
+
+void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+	svcpu->slb_max = 0;
+	svcpu->slb[0].esid = 0;
+	svcpu_put(svcpu);
+}
+
+void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)
+{
+	kvmppc_mmu_hpte_destroy(vcpu);
+	__destroy_context(to_book3s(vcpu)->context_id[0]);
+}
+
+int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+	int err;
+
+	err = hash__alloc_context_id();
+	if (err < 0)
+		return -1;
+	vcpu3s->context_id[0] = err;
+
+	vcpu3s->proto_vsid_max = ((u64)(vcpu3s->context_id[0] + 1)
+				  << ESID_BITS) - 1;
+	vcpu3s->proto_vsid_first = (u64)vcpu3s->context_id[0] << ESID_BITS;
+	vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first;
+
+	kvmppc_mmu_hpte_init(vcpu);
+
+	return 0;
+}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
new file mode 100644
index 0000000000..fdfc2a62dd
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -0,0 +1,2150 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/hugetlb.h>
+#include <linux/vmalloc.h>
+#include <linux/srcu.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/debugfs.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/book3s/64/mmu-hash.h>
+#include <asm/hvcall.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+#include <asm/cputable.h>
+#include <asm/pte-walk.h>
+
+#include "book3s.h"
+#include "book3s_hv.h"
+#include "trace_hv.h"
+
+//#define DEBUG_RESIZE_HPT	1
+
+#ifdef DEBUG_RESIZE_HPT
+#define resize_hpt_debug(resize, ...)				\
+	do {							\
+		printk(KERN_DEBUG "RESIZE HPT %p: ", resize);	\
+		printk(__VA_ARGS__);				\
+	} while (0)
+#else
+#define resize_hpt_debug(resize, ...)				\
+	do { } while (0)
+#endif
+
+static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
+				long pte_index, unsigned long pteh,
+				unsigned long ptel, unsigned long *pte_idx_ret);
+
+struct kvm_resize_hpt {
+	/* These fields read-only after init */
+	struct kvm *kvm;
+	struct work_struct work;
+	u32 order;
+
+	/* These fields protected by kvm->arch.mmu_setup_lock */
+
+	/* Possible values and their usage:
+	 *  <0     an error occurred during allocation,
+	 *  -EBUSY allocation is in the progress,
+	 *  0      allocation made successfully.
+	 */
+	int error;
+
+	/* Private to the work thread, until error != -EBUSY,
+	 * then protected by kvm->arch.mmu_setup_lock.
+	 */
+	struct kvm_hpt_info hpt;
+};
+
+int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
+{
+	unsigned long hpt = 0;
+	int cma = 0;
+	struct page *page = NULL;
+	struct revmap_entry *rev;
+	unsigned long npte;
+
+	if ((order < PPC_MIN_HPT_ORDER) || (order > PPC_MAX_HPT_ORDER))
+		return -EINVAL;
+
+	page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT));
+	if (page) {
+		hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+		memset((void *)hpt, 0, (1ul << order));
+		cma = 1;
+	}
+
+	if (!hpt)
+		hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_RETRY_MAYFAIL
+				       |__GFP_NOWARN, order - PAGE_SHIFT);
+
+	if (!hpt)
+		return -ENOMEM;
+
+	/* HPTEs are 2**4 bytes long */
+	npte = 1ul << (order - 4);
+
+	/* Allocate reverse map array */
+	rev = vmalloc(array_size(npte, sizeof(struct revmap_entry)));
+	if (!rev) {
+		if (cma)
+			kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT));
+		else
+			free_pages(hpt, order - PAGE_SHIFT);
+		return -ENOMEM;
+	}
+
+	info->order = order;
+	info->virt = hpt;
+	info->cma = cma;
+	info->rev = rev;
+
+	return 0;
+}
+
+void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info)
+{
+	atomic64_set(&kvm->arch.mmio_update, 0);
+	kvm->arch.hpt = *info;
+	kvm->arch.sdr1 = __pa(info->virt) | (info->order - 18);
+
+	pr_debug("KVM guest htab at %lx (order %ld), LPID %x\n",
+		 info->virt, (long)info->order, kvm->arch.lpid);
+}
+
+int kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
+{
+	int err = -EBUSY;
+	struct kvm_hpt_info info;
+
+	mutex_lock(&kvm->arch.mmu_setup_lock);
+	if (kvm->arch.mmu_ready) {
+		kvm->arch.mmu_ready = 0;
+		/* order mmu_ready vs. vcpus_running */
+		smp_mb();
+		if (atomic_read(&kvm->arch.vcpus_running)) {
+			kvm->arch.mmu_ready = 1;
+			goto out;
+		}
+	}
+	if (kvm_is_radix(kvm)) {
+		err = kvmppc_switch_mmu_to_hpt(kvm);
+		if (err)
+			goto out;
+	}
+
+	if (kvm->arch.hpt.order == order) {
+		/* We already have a suitable HPT */
+
+		/* Set the entire HPT to 0, i.e. invalid HPTEs */
+		memset((void *)kvm->arch.hpt.virt, 0, 1ul << order);
+		/*
+		 * Reset all the reverse-mapping chains for all memslots
+		 */
+		kvmppc_rmap_reset(kvm);
+		err = 0;
+		goto out;
+	}
+
+	if (kvm->arch.hpt.virt) {
+		kvmppc_free_hpt(&kvm->arch.hpt);
+		kvmppc_rmap_reset(kvm);
+	}
+
+	err = kvmppc_allocate_hpt(&info, order);
+	if (err < 0)
+		goto out;
+	kvmppc_set_hpt(kvm, &info);
+
+out:
+	if (err == 0)
+		/* Ensure that each vcpu will flush its TLB on next entry. */
+		cpumask_setall(&kvm->arch.need_tlb_flush);
+
+	mutex_unlock(&kvm->arch.mmu_setup_lock);
+	return err;
+}
+
+void kvmppc_free_hpt(struct kvm_hpt_info *info)
+{
+	vfree(info->rev);
+	info->rev = NULL;
+	if (info->cma)
+		kvm_free_hpt_cma(virt_to_page((void *)info->virt),
+				 1 << (info->order - PAGE_SHIFT));
+	else if (info->virt)
+		free_pages(info->virt, info->order - PAGE_SHIFT);
+	info->virt = 0;
+	info->order = 0;
+}
+
+/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
+static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
+{
+	return (pgsize > 0x1000) ? HPTE_V_LARGE : 0;
+}
+
+/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */
+static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize)
+{
+	return (pgsize == 0x10000) ? 0x1000 : 0;
+}
+
+void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
+		     unsigned long porder)
+{
+	unsigned long i;
+	unsigned long npages;
+	unsigned long hp_v, hp_r;
+	unsigned long addr, hash;
+	unsigned long psize;
+	unsigned long hp0, hp1;
+	unsigned long idx_ret;
+	long ret;
+	struct kvm *kvm = vcpu->kvm;
+
+	psize = 1ul << porder;
+	npages = memslot->npages >> (porder - PAGE_SHIFT);
+
+	/* VRMA can't be > 1TB */
+	if (npages > 1ul << (40 - porder))
+		npages = 1ul << (40 - porder);
+	/* Can't use more than 1 HPTE per HPTEG */
+	if (npages > kvmppc_hpt_mask(&kvm->arch.hpt) + 1)
+		npages = kvmppc_hpt_mask(&kvm->arch.hpt) + 1;
+
+	hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
+		HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
+	hp1 = hpte1_pgsize_encoding(psize) |
+		HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
+
+	for (i = 0; i < npages; ++i) {
+		addr = i << porder;
+		/* can't use hpt_hash since va > 64 bits */
+		hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25)))
+			& kvmppc_hpt_mask(&kvm->arch.hpt);
+		/*
+		 * We assume that the hash table is empty and no
+		 * vcpus are using it at this stage.  Since we create
+		 * at most one HPTE per HPTEG, we just assume entry 7
+		 * is available and use it.
+		 */
+		hash = (hash << 3) + 7;
+		hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
+		hp_r = hp1 | addr;
+		ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, hash, hp_v, hp_r,
+						 &idx_ret);
+		if (ret != H_SUCCESS) {
+			pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
+			       addr, ret);
+			break;
+		}
+	}
+}
+
+int kvmppc_mmu_hv_init(void)
+{
+	unsigned long nr_lpids;
+
+	if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
+		return -EINVAL;
+
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		if (WARN_ON(mfspr(SPRN_LPID) != 0))
+			return -EINVAL;
+		nr_lpids = 1UL << mmu_lpid_bits;
+	} else {
+		nr_lpids = 1UL << KVM_MAX_NESTED_GUESTS_SHIFT;
+	}
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+		/* POWER7 has 10-bit LPIDs, POWER8 has 12-bit LPIDs */
+		if (cpu_has_feature(CPU_FTR_ARCH_207S))
+			WARN_ON(nr_lpids != 1UL << 12);
+		else
+			WARN_ON(nr_lpids != 1UL << 10);
+
+		/*
+		 * Reserve the last implemented LPID use in partition
+		 * switching for POWER7 and POWER8.
+		 */
+		nr_lpids -= 1;
+	}
+
+	kvmppc_init_lpid(nr_lpids);
+
+	return 0;
+}
+
+static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
+				long pte_index, unsigned long pteh,
+				unsigned long ptel, unsigned long *pte_idx_ret)
+{
+	long ret;
+
+	preempt_disable();
+	ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
+				kvm->mm->pgd, false, pte_idx_ret);
+	preempt_enable();
+	if (ret == H_TOO_HARD) {
+		/* this can't happen */
+		pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n");
+		ret = H_RESOURCE;	/* or something */
+	}
+	return ret;
+
+}
+
+static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
+							 gva_t eaddr)
+{
+	u64 mask;
+	int i;
+
+	for (i = 0; i < vcpu->arch.slb_nr; i++) {
+		if (!(vcpu->arch.slb[i].orige & SLB_ESID_V))
+			continue;
+
+		if (vcpu->arch.slb[i].origv & SLB_VSID_B_1T)
+			mask = ESID_MASK_1T;
+		else
+			mask = ESID_MASK;
+
+		if (((vcpu->arch.slb[i].orige ^ eaddr) & mask) == 0)
+			return &vcpu->arch.slb[i];
+	}
+	return NULL;
+}
+
+static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r,
+			unsigned long ea)
+{
+	unsigned long ra_mask;
+
+	ra_mask = kvmppc_actual_pgsz(v, r) - 1;
+	return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask);
+}
+
+static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
+			struct kvmppc_pte *gpte, bool data, bool iswrite)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvmppc_slb *slbe;
+	unsigned long slb_v;
+	unsigned long pp, key;
+	unsigned long v, orig_v, gr;
+	__be64 *hptep;
+	long int index;
+	int virtmode = __kvmppc_get_msr_hv(vcpu) & (data ? MSR_DR : MSR_IR);
+
+	if (kvm_is_radix(vcpu->kvm))
+		return kvmppc_mmu_radix_xlate(vcpu, eaddr, gpte, data, iswrite);
+
+	/* Get SLB entry */
+	if (virtmode) {
+		slbe = kvmppc_mmu_book3s_hv_find_slbe(vcpu, eaddr);
+		if (!slbe)
+			return -EINVAL;
+		slb_v = slbe->origv;
+	} else {
+		/* real mode access */
+		slb_v = vcpu->kvm->arch.vrma_slb_v;
+	}
+
+	preempt_disable();
+	/* Find the HPTE in the hash table */
+	index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v,
+					 HPTE_V_VALID | HPTE_V_ABSENT);
+	if (index < 0) {
+		preempt_enable();
+		return -ENOENT;
+	}
+	hptep = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
+	v = orig_v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		v = hpte_new_to_old_v(v, be64_to_cpu(hptep[1]));
+	gr = kvm->arch.hpt.rev[index].guest_rpte;
+
+	unlock_hpte(hptep, orig_v);
+	preempt_enable();
+
+	gpte->eaddr = eaddr;
+	gpte->vpage = ((v & HPTE_V_AVPN) << 4) | ((eaddr >> 12) & 0xfff);
+
+	/* Get PP bits and key for permission check */
+	pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
+	key = (__kvmppc_get_msr_hv(vcpu) & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
+	key &= slb_v;
+
+	/* Calculate permissions */
+	gpte->may_read = hpte_read_permission(pp, key);
+	gpte->may_write = hpte_write_permission(pp, key);
+	gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G));
+
+	/* Storage key permission check for POWER7 */
+	if (data && virtmode) {
+		int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr);
+		if (amrfield & 1)
+			gpte->may_read = 0;
+		if (amrfield & 2)
+			gpte->may_write = 0;
+	}
+
+	/* Get the guest physical address */
+	gpte->raddr = kvmppc_mmu_get_real_addr(v, gr, eaddr);
+	return 0;
+}
+
+/*
+ * Quick test for whether an instruction is a load or a store.
+ * If the instruction is a load or a store, then this will indicate
+ * which it is, at least on server processors.  (Embedded processors
+ * have some external PID instructions that don't follow the rule
+ * embodied here.)  If the instruction isn't a load or store, then
+ * this doesn't return anything useful.
+ */
+static int instruction_is_store(ppc_inst_t instr)
+{
+	unsigned int mask;
+	unsigned int suffix;
+
+	mask = 0x10000000;
+	suffix = ppc_inst_val(instr);
+	if (ppc_inst_prefixed(instr))
+		suffix = ppc_inst_suffix(instr);
+	else if ((suffix & 0xfc000000) == 0x7c000000)
+		mask = 0x100;		/* major opcode 31 */
+	return (suffix & mask) != 0;
+}
+
+int kvmppc_hv_emulate_mmio(struct kvm_vcpu *vcpu,
+			   unsigned long gpa, gva_t ea, int is_store)
+{
+	ppc_inst_t last_inst;
+	bool is_prefixed = !!(kvmppc_get_msr(vcpu) & SRR1_PREFIXED);
+
+	/*
+	 * Fast path - check if the guest physical address corresponds to a
+	 * device on the FAST_MMIO_BUS, if so we can avoid loading the
+	 * instruction all together, then we can just handle it and return.
+	 */
+	if (is_store) {
+		int idx, ret;
+
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+		ret = kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, (gpa_t) gpa, 0,
+				       NULL);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
+		if (!ret) {
+			kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + (is_prefixed ? 8 : 4));
+			return RESUME_GUEST;
+		}
+	}
+
+	/*
+	 * If we fail, we just return to the guest and try executing it again.
+	 */
+	if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) !=
+		EMULATE_DONE)
+		return RESUME_GUEST;
+
+	/*
+	 * WARNING: We do not know for sure whether the instruction we just
+	 * read from memory is the same that caused the fault in the first
+	 * place.
+	 *
+	 * If the fault is prefixed but the instruction is not or vice
+	 * versa, try again so that we don't advance pc the wrong amount.
+	 */
+	if (ppc_inst_prefixed(last_inst) != is_prefixed)
+		return RESUME_GUEST;
+
+	/*
+	 * If the instruction we read is neither an load or a store,
+	 * then it can't access memory, so we don't need to worry about
+	 * enforcing access permissions.  So, assuming it is a load or
+	 * store, we just check that its direction (load or store) is
+	 * consistent with the original fault, since that's what we
+	 * checked the access permissions against.  If there is a mismatch
+	 * we just return and retry the instruction.
+	 */
+
+	if (instruction_is_store(last_inst) != !!is_store)
+		return RESUME_GUEST;
+
+	/*
+	 * Emulated accesses are emulated by looking at the hash for
+	 * translation once, then performing the access later. The
+	 * translation could be invalidated in the meantime in which
+	 * point performing the subsequent memory access on the old
+	 * physical address could possibly be a security hole for the
+	 * guest (but not the host).
+	 *
+	 * This is less of an issue for MMIO stores since they aren't
+	 * globally visible. It could be an issue for MMIO loads to
+	 * a certain extent but we'll ignore it for now.
+	 */
+
+	vcpu->arch.paddr_accessed = gpa;
+	vcpu->arch.vaddr_accessed = ea;
+	return kvmppc_emulate_mmio(vcpu);
+}
+
+int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
+				unsigned long ea, unsigned long dsisr)
+{
+	struct kvm *kvm = vcpu->kvm;
+	unsigned long hpte[3], r;
+	unsigned long hnow_v, hnow_r;
+	__be64 *hptep;
+	unsigned long mmu_seq, psize, pte_size;
+	unsigned long gpa_base, gfn_base;
+	unsigned long gpa, gfn, hva, pfn, hpa;
+	struct kvm_memory_slot *memslot;
+	unsigned long *rmap;
+	struct revmap_entry *rev;
+	struct page *page;
+	long index, ret;
+	bool is_ci;
+	bool writing, write_ok;
+	unsigned int shift;
+	unsigned long rcbits;
+	long mmio_update;
+	pte_t pte, *ptep;
+
+	if (kvm_is_radix(kvm))
+		return kvmppc_book3s_radix_page_fault(vcpu, ea, dsisr);
+
+	/*
+	 * Real-mode code has already searched the HPT and found the
+	 * entry we're interested in.  Lock the entry and check that
+	 * it hasn't changed.  If it has, just return and re-execute the
+	 * instruction.
+	 */
+	if (ea != vcpu->arch.pgfault_addr)
+		return RESUME_GUEST;
+
+	if (vcpu->arch.pgfault_cache) {
+		mmio_update = atomic64_read(&kvm->arch.mmio_update);
+		if (mmio_update == vcpu->arch.pgfault_cache->mmio_update) {
+			r = vcpu->arch.pgfault_cache->rpte;
+			psize = kvmppc_actual_pgsz(vcpu->arch.pgfault_hpte[0],
+						   r);
+			gpa_base = r & HPTE_R_RPN & ~(psize - 1);
+			gfn_base = gpa_base >> PAGE_SHIFT;
+			gpa = gpa_base | (ea & (psize - 1));
+			return kvmppc_hv_emulate_mmio(vcpu, gpa, ea,
+						dsisr & DSISR_ISSTORE);
+		}
+	}
+	index = vcpu->arch.pgfault_index;
+	hptep = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
+	rev = &kvm->arch.hpt.rev[index];
+	preempt_disable();
+	while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
+		cpu_relax();
+	hpte[0] = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
+	hpte[1] = be64_to_cpu(hptep[1]);
+	hpte[2] = r = rev->guest_rpte;
+	unlock_hpte(hptep, hpte[0]);
+	preempt_enable();
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		hpte[0] = hpte_new_to_old_v(hpte[0], hpte[1]);
+		hpte[1] = hpte_new_to_old_r(hpte[1]);
+	}
+	if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
+	    hpte[1] != vcpu->arch.pgfault_hpte[1])
+		return RESUME_GUEST;
+
+	/* Translate the logical address and get the page */
+	psize = kvmppc_actual_pgsz(hpte[0], r);
+	gpa_base = r & HPTE_R_RPN & ~(psize - 1);
+	gfn_base = gpa_base >> PAGE_SHIFT;
+	gpa = gpa_base | (ea & (psize - 1));
+	gfn = gpa >> PAGE_SHIFT;
+	memslot = gfn_to_memslot(kvm, gfn);
+
+	trace_kvm_page_fault_enter(vcpu, hpte, memslot, ea, dsisr);
+
+	/* No memslot means it's an emulated MMIO region */
+	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+		return kvmppc_hv_emulate_mmio(vcpu, gpa, ea,
+					      dsisr & DSISR_ISSTORE);
+
+	/*
+	 * This should never happen, because of the slot_is_aligned()
+	 * check in kvmppc_do_h_enter().
+	 */
+	if (gfn_base < memslot->base_gfn)
+		return -EFAULT;
+
+	/* used to check for invalidations in progress */
+	mmu_seq = kvm->mmu_invalidate_seq;
+	smp_rmb();
+
+	ret = -EFAULT;
+	page = NULL;
+	writing = (dsisr & DSISR_ISSTORE) != 0;
+	/* If writing != 0, then the HPTE must allow writing, if we get here */
+	write_ok = writing;
+	hva = gfn_to_hva_memslot(memslot, gfn);
+
+	/*
+	 * Do a fast check first, since __gfn_to_pfn_memslot doesn't
+	 * do it with !atomic && !async, which is how we call it.
+	 * We always ask for write permission since the common case
+	 * is that the page is writable.
+	 */
+	if (get_user_page_fast_only(hva, FOLL_WRITE, &page)) {
+		write_ok = true;
+	} else {
+		/* Call KVM generic code to do the slow-path check */
+		pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL,
+					   writing, &write_ok, NULL);
+		if (is_error_noslot_pfn(pfn))
+			return -EFAULT;
+		page = NULL;
+		if (pfn_valid(pfn)) {
+			page = pfn_to_page(pfn);
+			if (PageReserved(page))
+				page = NULL;
+		}
+	}
+
+	/*
+	 * Read the PTE from the process' radix tree and use that
+	 * so we get the shift and attribute bits.
+	 */
+	spin_lock(&kvm->mmu_lock);
+	ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift);
+	pte = __pte(0);
+	if (ptep)
+		pte = READ_ONCE(*ptep);
+	spin_unlock(&kvm->mmu_lock);
+	/*
+	 * If the PTE disappeared temporarily due to a THP
+	 * collapse, just return and let the guest try again.
+	 */
+	if (!pte_present(pte)) {
+		if (page)
+			put_page(page);
+		return RESUME_GUEST;
+	}
+	hpa = pte_pfn(pte) << PAGE_SHIFT;
+	pte_size = PAGE_SIZE;
+	if (shift)
+		pte_size = 1ul << shift;
+	is_ci = pte_ci(pte);
+
+	if (psize > pte_size)
+		goto out_put;
+	if (pte_size > psize)
+		hpa |= hva & (pte_size - psize);
+
+	/* Check WIMG vs. the actual page we're accessing */
+	if (!hpte_cache_flags_ok(r, is_ci)) {
+		if (is_ci)
+			goto out_put;
+		/*
+		 * Allow guest to map emulated device memory as
+		 * uncacheable, but actually make it cacheable.
+		 */
+		r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M;
+	}
+
+	/*
+	 * Set the HPTE to point to hpa.
+	 * Since the hpa is at PAGE_SIZE granularity, make sure we
+	 * don't mask out lower-order bits if psize < PAGE_SIZE.
+	 */
+	if (psize < PAGE_SIZE)
+		psize = PAGE_SIZE;
+	r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | hpa;
+	if (hpte_is_writable(r) && !write_ok)
+		r = hpte_make_readonly(r);
+	ret = RESUME_GUEST;
+	preempt_disable();
+	while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
+		cpu_relax();
+	hnow_v = be64_to_cpu(hptep[0]);
+	hnow_r = be64_to_cpu(hptep[1]);
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		hnow_v = hpte_new_to_old_v(hnow_v, hnow_r);
+		hnow_r = hpte_new_to_old_r(hnow_r);
+	}
+
+	/*
+	 * If the HPT is being resized, don't update the HPTE,
+	 * instead let the guest retry after the resize operation is complete.
+	 * The synchronization for mmu_ready test vs. set is provided
+	 * by the HPTE lock.
+	 */
+	if (!kvm->arch.mmu_ready)
+		goto out_unlock;
+
+	if ((hnow_v & ~HPTE_V_HVLOCK) != hpte[0] || hnow_r != hpte[1] ||
+	    rev->guest_rpte != hpte[2])
+		/* HPTE has been changed under us; let the guest retry */
+		goto out_unlock;
+	hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
+
+	/* Always put the HPTE in the rmap chain for the page base address */
+	rmap = &memslot->arch.rmap[gfn_base - memslot->base_gfn];
+	lock_rmap(rmap);
+
+	/* Check if we might have been invalidated; let the guest retry if so */
+	ret = RESUME_GUEST;
+	if (mmu_invalidate_retry(vcpu->kvm, mmu_seq)) {
+		unlock_rmap(rmap);
+		goto out_unlock;
+	}
+
+	/* Only set R/C in real HPTE if set in both *rmap and guest_rpte */
+	rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
+	r &= rcbits | ~(HPTE_R_R | HPTE_R_C);
+
+	if (be64_to_cpu(hptep[0]) & HPTE_V_VALID) {
+		/* HPTE was previously valid, so we need to invalidate it */
+		unlock_rmap(rmap);
+		hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
+		kvmppc_invalidate_hpte(kvm, hptep, index);
+		/* don't lose previous R and C bits */
+		r |= be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
+	} else {
+		kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
+	}
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		r = hpte_old_to_new_r(hpte[0], r);
+		hpte[0] = hpte_old_to_new_v(hpte[0]);
+	}
+	hptep[1] = cpu_to_be64(r);
+	eieio();
+	__unlock_hpte(hptep, hpte[0]);
+	asm volatile("ptesync" : : : "memory");
+	preempt_enable();
+	if (page && hpte_is_writable(r))
+		set_page_dirty_lock(page);
+
+ out_put:
+	trace_kvm_page_fault_exit(vcpu, hpte, ret);
+
+	if (page)
+		put_page(page);
+	return ret;
+
+ out_unlock:
+	__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
+	preempt_enable();
+	goto out_put;
+}
+
+void kvmppc_rmap_reset(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int srcu_idx, bkt;
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, bkt, slots) {
+		/* Mutual exclusion with kvm_unmap_hva_range etc. */
+		spin_lock(&kvm->mmu_lock);
+		/*
+		 * This assumes it is acceptable to lose reference and
+		 * change bits across a reset.
+		 */
+		memset(memslot->arch.rmap, 0,
+		       memslot->npages * sizeof(*memslot->arch.rmap));
+		spin_unlock(&kvm->mmu_lock);
+	}
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+}
+
+/* Must be called with both HPTE and rmap locked */
+static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
+			      struct kvm_memory_slot *memslot,
+			      unsigned long *rmapp, unsigned long gfn)
+{
+	__be64 *hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
+	struct revmap_entry *rev = kvm->arch.hpt.rev;
+	unsigned long j, h;
+	unsigned long ptel, psize, rcbits;
+
+	j = rev[i].forw;
+	if (j == i) {
+		/* chain is now empty */
+		*rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
+	} else {
+		/* remove i from chain */
+		h = rev[i].back;
+		rev[h].forw = j;
+		rev[j].back = h;
+		rev[i].forw = rev[i].back = i;
+		*rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
+	}
+
+	/* Now check and modify the HPTE */
+	ptel = rev[i].guest_rpte;
+	psize = kvmppc_actual_pgsz(be64_to_cpu(hptep[0]), ptel);
+	if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
+	    hpte_rpn(ptel, psize) == gfn) {
+		hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
+		kvmppc_invalidate_hpte(kvm, hptep, i);
+		hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
+		/* Harvest R and C */
+		rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
+		*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+		if ((rcbits & HPTE_R_C) && memslot->dirty_bitmap)
+			kvmppc_update_dirty_map(memslot, gfn, psize);
+		if (rcbits & ~rev[i].guest_rpte) {
+			rev[i].guest_rpte = ptel | rcbits;
+			note_hpte_modification(kvm, &rev[i]);
+		}
+	}
+}
+
+static void kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
+			    unsigned long gfn)
+{
+	unsigned long i;
+	__be64 *hptep;
+	unsigned long *rmapp;
+
+	rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
+	for (;;) {
+		lock_rmap(rmapp);
+		if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
+			unlock_rmap(rmapp);
+			break;
+		}
+
+		/*
+		 * To avoid an ABBA deadlock with the HPTE lock bit,
+		 * we can't spin on the HPTE lock while holding the
+		 * rmap chain lock.
+		 */
+		i = *rmapp & KVMPPC_RMAP_INDEX;
+		hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
+		if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
+			/* unlock rmap before spinning on the HPTE lock */
+			unlock_rmap(rmapp);
+			while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK)
+				cpu_relax();
+			continue;
+		}
+
+		kvmppc_unmap_hpte(kvm, i, memslot, rmapp, gfn);
+		unlock_rmap(rmapp);
+		__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
+	}
+}
+
+bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+	gfn_t gfn;
+
+	if (kvm_is_radix(kvm)) {
+		for (gfn = range->start; gfn < range->end; gfn++)
+			kvm_unmap_radix(kvm, range->slot, gfn);
+	} else {
+		for (gfn = range->start; gfn < range->end; gfn++)
+			kvm_unmap_rmapp(kvm, range->slot, gfn);
+	}
+
+	return false;
+}
+
+void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
+				  struct kvm_memory_slot *memslot)
+{
+	unsigned long gfn;
+	unsigned long n;
+	unsigned long *rmapp;
+
+	gfn = memslot->base_gfn;
+	rmapp = memslot->arch.rmap;
+	if (kvm_is_radix(kvm)) {
+		kvmppc_radix_flush_memslot(kvm, memslot);
+		return;
+	}
+
+	for (n = memslot->npages; n; --n, ++gfn) {
+		/*
+		 * Testing the present bit without locking is OK because
+		 * the memslot has been marked invalid already, and hence
+		 * no new HPTEs referencing this page can be created,
+		 * thus the present bit can't go from 0 to 1.
+		 */
+		if (*rmapp & KVMPPC_RMAP_PRESENT)
+			kvm_unmap_rmapp(kvm, memslot, gfn);
+		++rmapp;
+	}
+}
+
+static bool kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
+			  unsigned long gfn)
+{
+	struct revmap_entry *rev = kvm->arch.hpt.rev;
+	unsigned long head, i, j;
+	__be64 *hptep;
+	bool ret = false;
+	unsigned long *rmapp;
+
+	rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
+ retry:
+	lock_rmap(rmapp);
+	if (*rmapp & KVMPPC_RMAP_REFERENCED) {
+		*rmapp &= ~KVMPPC_RMAP_REFERENCED;
+		ret = true;
+	}
+	if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
+		unlock_rmap(rmapp);
+		return ret;
+	}
+
+	i = head = *rmapp & KVMPPC_RMAP_INDEX;
+	do {
+		hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
+		j = rev[i].forw;
+
+		/* If this HPTE isn't referenced, ignore it */
+		if (!(be64_to_cpu(hptep[1]) & HPTE_R_R))
+			continue;
+
+		if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
+			/* unlock rmap before spinning on the HPTE lock */
+			unlock_rmap(rmapp);
+			while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK)
+				cpu_relax();
+			goto retry;
+		}
+
+		/* Now check and modify the HPTE */
+		if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
+		    (be64_to_cpu(hptep[1]) & HPTE_R_R)) {
+			kvmppc_clear_ref_hpte(kvm, hptep, i);
+			if (!(rev[i].guest_rpte & HPTE_R_R)) {
+				rev[i].guest_rpte |= HPTE_R_R;
+				note_hpte_modification(kvm, &rev[i]);
+			}
+			ret = true;
+		}
+		__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
+	} while ((i = j) != head);
+
+	unlock_rmap(rmapp);
+	return ret;
+}
+
+bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+	gfn_t gfn;
+	bool ret = false;
+
+	if (kvm_is_radix(kvm)) {
+		for (gfn = range->start; gfn < range->end; gfn++)
+			ret |= kvm_age_radix(kvm, range->slot, gfn);
+	} else {
+		for (gfn = range->start; gfn < range->end; gfn++)
+			ret |= kvm_age_rmapp(kvm, range->slot, gfn);
+	}
+
+	return ret;
+}
+
+static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
+			       unsigned long gfn)
+{
+	struct revmap_entry *rev = kvm->arch.hpt.rev;
+	unsigned long head, i, j;
+	unsigned long *hp;
+	bool ret = true;
+	unsigned long *rmapp;
+
+	rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
+	if (*rmapp & KVMPPC_RMAP_REFERENCED)
+		return true;
+
+	lock_rmap(rmapp);
+	if (*rmapp & KVMPPC_RMAP_REFERENCED)
+		goto out;
+
+	if (*rmapp & KVMPPC_RMAP_PRESENT) {
+		i = head = *rmapp & KVMPPC_RMAP_INDEX;
+		do {
+			hp = (unsigned long *)(kvm->arch.hpt.virt + (i << 4));
+			j = rev[i].forw;
+			if (be64_to_cpu(hp[1]) & HPTE_R_R)
+				goto out;
+		} while ((i = j) != head);
+	}
+	ret = false;
+
+ out:
+	unlock_rmap(rmapp);
+	return ret;
+}
+
+bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+	WARN_ON(range->start + 1 != range->end);
+
+	if (kvm_is_radix(kvm))
+		return kvm_test_age_radix(kvm, range->slot, range->start);
+	else
+		return kvm_test_age_rmapp(kvm, range->slot, range->start);
+}
+
+bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+	WARN_ON(range->start + 1 != range->end);
+
+	if (kvm_is_radix(kvm))
+		kvm_unmap_radix(kvm, range->slot, range->start);
+	else
+		kvm_unmap_rmapp(kvm, range->slot, range->start);
+
+	return false;
+}
+
+static int vcpus_running(struct kvm *kvm)
+{
+	return atomic_read(&kvm->arch.vcpus_running) != 0;
+}
+
+/*
+ * Returns the number of system pages that are dirty.
+ * This can be more than 1 if we find a huge-page HPTE.
+ */
+static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
+{
+	struct revmap_entry *rev = kvm->arch.hpt.rev;
+	unsigned long head, i, j;
+	unsigned long n;
+	unsigned long v, r;
+	__be64 *hptep;
+	int npages_dirty = 0;
+
+ retry:
+	lock_rmap(rmapp);
+	if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
+		unlock_rmap(rmapp);
+		return npages_dirty;
+	}
+
+	i = head = *rmapp & KVMPPC_RMAP_INDEX;
+	do {
+		unsigned long hptep1;
+		hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
+		j = rev[i].forw;
+
+		/*
+		 * Checking the C (changed) bit here is racy since there
+		 * is no guarantee about when the hardware writes it back.
+		 * If the HPTE is not writable then it is stable since the
+		 * page can't be written to, and we would have done a tlbie
+		 * (which forces the hardware to complete any writeback)
+		 * when making the HPTE read-only.
+		 * If vcpus are running then this call is racy anyway
+		 * since the page could get dirtied subsequently, so we
+		 * expect there to be a further call which would pick up
+		 * any delayed C bit writeback.
+		 * Otherwise we need to do the tlbie even if C==0 in
+		 * order to pick up any delayed writeback of C.
+		 */
+		hptep1 = be64_to_cpu(hptep[1]);
+		if (!(hptep1 & HPTE_R_C) &&
+		    (!hpte_is_writable(hptep1) || vcpus_running(kvm)))
+			continue;
+
+		if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
+			/* unlock rmap before spinning on the HPTE lock */
+			unlock_rmap(rmapp);
+			while (hptep[0] & cpu_to_be64(HPTE_V_HVLOCK))
+				cpu_relax();
+			goto retry;
+		}
+
+		/* Now check and modify the HPTE */
+		if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) {
+			__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
+			continue;
+		}
+
+		/* need to make it temporarily absent so C is stable */
+		hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
+		kvmppc_invalidate_hpte(kvm, hptep, i);
+		v = be64_to_cpu(hptep[0]);
+		r = be64_to_cpu(hptep[1]);
+		if (r & HPTE_R_C) {
+			hptep[1] = cpu_to_be64(r & ~HPTE_R_C);
+			if (!(rev[i].guest_rpte & HPTE_R_C)) {
+				rev[i].guest_rpte |= HPTE_R_C;
+				note_hpte_modification(kvm, &rev[i]);
+			}
+			n = kvmppc_actual_pgsz(v, r);
+			n = (n + PAGE_SIZE - 1) >> PAGE_SHIFT;
+			if (n > npages_dirty)
+				npages_dirty = n;
+			eieio();
+		}
+		v &= ~HPTE_V_ABSENT;
+		v |= HPTE_V_VALID;
+		__unlock_hpte(hptep, v);
+	} while ((i = j) != head);
+
+	unlock_rmap(rmapp);
+	return npages_dirty;
+}
+
+void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
+			      struct kvm_memory_slot *memslot,
+			      unsigned long *map)
+{
+	unsigned long gfn;
+
+	if (!vpa->dirty || !vpa->pinned_addr)
+		return;
+	gfn = vpa->gpa >> PAGE_SHIFT;
+	if (gfn < memslot->base_gfn ||
+	    gfn >= memslot->base_gfn + memslot->npages)
+		return;
+
+	vpa->dirty = false;
+	if (map)
+		__set_bit_le(gfn - memslot->base_gfn, map);
+}
+
+long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
+			struct kvm_memory_slot *memslot, unsigned long *map)
+{
+	unsigned long i;
+	unsigned long *rmapp;
+
+	preempt_disable();
+	rmapp = memslot->arch.rmap;
+	for (i = 0; i < memslot->npages; ++i) {
+		int npages = kvm_test_clear_dirty_npages(kvm, rmapp);
+		/*
+		 * Note that if npages > 0 then i must be a multiple of npages,
+		 * since we always put huge-page HPTEs in the rmap chain
+		 * corresponding to their page base address.
+		 */
+		if (npages)
+			set_dirty_bits(map, i, npages);
+		++rmapp;
+	}
+	preempt_enable();
+	return 0;
+}
+
+void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
+			    unsigned long *nb_ret)
+{
+	struct kvm_memory_slot *memslot;
+	unsigned long gfn = gpa >> PAGE_SHIFT;
+	struct page *page, *pages[1];
+	int npages;
+	unsigned long hva, offset;
+	int srcu_idx;
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	memslot = gfn_to_memslot(kvm, gfn);
+	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+		goto err;
+	hva = gfn_to_hva_memslot(memslot, gfn);
+	npages = get_user_pages_fast(hva, 1, FOLL_WRITE, pages);
+	if (npages < 1)
+		goto err;
+	page = pages[0];
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+	offset = gpa & (PAGE_SIZE - 1);
+	if (nb_ret)
+		*nb_ret = PAGE_SIZE - offset;
+	return page_address(page) + offset;
+
+ err:
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	return NULL;
+}
+
+void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
+			     bool dirty)
+{
+	struct page *page = virt_to_page(va);
+	struct kvm_memory_slot *memslot;
+	unsigned long gfn;
+	int srcu_idx;
+
+	put_page(page);
+
+	if (!dirty)
+		return;
+
+	/* We need to mark this page dirty in the memslot dirty_bitmap, if any */
+	gfn = gpa >> PAGE_SHIFT;
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	memslot = gfn_to_memslot(kvm, gfn);
+	if (memslot && memslot->dirty_bitmap)
+		set_bit_le(gfn - memslot->base_gfn, memslot->dirty_bitmap);
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+}
+
+/*
+ * HPT resizing
+ */
+static int resize_hpt_allocate(struct kvm_resize_hpt *resize)
+{
+	int rc;
+
+	rc = kvmppc_allocate_hpt(&resize->hpt, resize->order);
+	if (rc < 0)
+		return rc;
+
+	resize_hpt_debug(resize, "%s(): HPT @ 0x%lx\n", __func__,
+			 resize->hpt.virt);
+
+	return 0;
+}
+
+static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
+					    unsigned long idx)
+{
+	struct kvm *kvm = resize->kvm;
+	struct kvm_hpt_info *old = &kvm->arch.hpt;
+	struct kvm_hpt_info *new = &resize->hpt;
+	unsigned long old_hash_mask = (1ULL << (old->order - 7)) - 1;
+	unsigned long new_hash_mask = (1ULL << (new->order - 7)) - 1;
+	__be64 *hptep, *new_hptep;
+	unsigned long vpte, rpte, guest_rpte;
+	int ret;
+	struct revmap_entry *rev;
+	unsigned long apsize, avpn, pteg, hash;
+	unsigned long new_idx, new_pteg, replace_vpte;
+	int pshift;
+
+	hptep = (__be64 *)(old->virt + (idx << 4));
+
+	/* Guest is stopped, so new HPTEs can't be added or faulted
+	 * in, only unmapped or altered by host actions.  So, it's
+	 * safe to check this before we take the HPTE lock */
+	vpte = be64_to_cpu(hptep[0]);
+	if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT))
+		return 0; /* nothing to do */
+
+	while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
+		cpu_relax();
+
+	vpte = be64_to_cpu(hptep[0]);
+
+	ret = 0;
+	if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT))
+		/* Nothing to do */
+		goto out;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		rpte = be64_to_cpu(hptep[1]);
+		vpte = hpte_new_to_old_v(vpte, rpte);
+	}
+
+	/* Unmap */
+	rev = &old->rev[idx];
+	guest_rpte = rev->guest_rpte;
+
+	ret = -EIO;
+	apsize = kvmppc_actual_pgsz(vpte, guest_rpte);
+	if (!apsize)
+		goto out;
+
+	if (vpte & HPTE_V_VALID) {
+		unsigned long gfn = hpte_rpn(guest_rpte, apsize);
+		int srcu_idx = srcu_read_lock(&kvm->srcu);
+		struct kvm_memory_slot *memslot =
+			__gfn_to_memslot(kvm_memslots(kvm), gfn);
+
+		if (memslot) {
+			unsigned long *rmapp;
+			rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
+
+			lock_rmap(rmapp);
+			kvmppc_unmap_hpte(kvm, idx, memslot, rmapp, gfn);
+			unlock_rmap(rmapp);
+		}
+
+		srcu_read_unlock(&kvm->srcu, srcu_idx);
+	}
+
+	/* Reload PTE after unmap */
+	vpte = be64_to_cpu(hptep[0]);
+	BUG_ON(vpte & HPTE_V_VALID);
+	BUG_ON(!(vpte & HPTE_V_ABSENT));
+
+	ret = 0;
+	if (!(vpte & HPTE_V_BOLTED))
+		goto out;
+
+	rpte = be64_to_cpu(hptep[1]);
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		vpte = hpte_new_to_old_v(vpte, rpte);
+		rpte = hpte_new_to_old_r(rpte);
+	}
+
+	pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
+	avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23);
+	pteg = idx / HPTES_PER_GROUP;
+	if (vpte & HPTE_V_SECONDARY)
+		pteg = ~pteg;
+
+	if (!(vpte & HPTE_V_1TB_SEG)) {
+		unsigned long offset, vsid;
+
+		/* We only have 28 - 23 bits of offset in avpn */
+		offset = (avpn & 0x1f) << 23;
+		vsid = avpn >> 5;
+		/* We can find more bits from the pteg value */
+		if (pshift < 23)
+			offset |= ((vsid ^ pteg) & old_hash_mask) << pshift;
+
+		hash = vsid ^ (offset >> pshift);
+	} else {
+		unsigned long offset, vsid;
+
+		/* We only have 40 - 23 bits of seg_off in avpn */
+		offset = (avpn & 0x1ffff) << 23;
+		vsid = avpn >> 17;
+		if (pshift < 23)
+			offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) << pshift;
+
+		hash = vsid ^ (vsid << 25) ^ (offset >> pshift);
+	}
+
+	new_pteg = hash & new_hash_mask;
+	if (vpte & HPTE_V_SECONDARY)
+		new_pteg = ~hash & new_hash_mask;
+
+	new_idx = new_pteg * HPTES_PER_GROUP + (idx % HPTES_PER_GROUP);
+	new_hptep = (__be64 *)(new->virt + (new_idx << 4));
+
+	replace_vpte = be64_to_cpu(new_hptep[0]);
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		unsigned long replace_rpte = be64_to_cpu(new_hptep[1]);
+		replace_vpte = hpte_new_to_old_v(replace_vpte, replace_rpte);
+	}
+
+	if (replace_vpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
+		BUG_ON(new->order >= old->order);
+
+		if (replace_vpte & HPTE_V_BOLTED) {
+			if (vpte & HPTE_V_BOLTED)
+				/* Bolted collision, nothing we can do */
+				ret = -ENOSPC;
+			/* Discard the new HPTE */
+			goto out;
+		}
+
+		/* Discard the previous HPTE */
+	}
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		rpte = hpte_old_to_new_r(vpte, rpte);
+		vpte = hpte_old_to_new_v(vpte);
+	}
+
+	new_hptep[1] = cpu_to_be64(rpte);
+	new->rev[new_idx].guest_rpte = guest_rpte;
+	/* No need for a barrier, since new HPT isn't active */
+	new_hptep[0] = cpu_to_be64(vpte);
+	unlock_hpte(new_hptep, vpte);
+
+out:
+	unlock_hpte(hptep, vpte);
+	return ret;
+}
+
+static int resize_hpt_rehash(struct kvm_resize_hpt *resize)
+{
+	struct kvm *kvm = resize->kvm;
+	unsigned  long i;
+	int rc;
+
+	for (i = 0; i < kvmppc_hpt_npte(&kvm->arch.hpt); i++) {
+		rc = resize_hpt_rehash_hpte(resize, i);
+		if (rc != 0)
+			return rc;
+	}
+
+	return 0;
+}
+
+static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
+{
+	struct kvm *kvm = resize->kvm;
+	struct kvm_hpt_info hpt_tmp;
+
+	/* Exchange the pending tables in the resize structure with
+	 * the active tables */
+
+	resize_hpt_debug(resize, "resize_hpt_pivot()\n");
+
+	spin_lock(&kvm->mmu_lock);
+	asm volatile("ptesync" : : : "memory");
+
+	hpt_tmp = kvm->arch.hpt;
+	kvmppc_set_hpt(kvm, &resize->hpt);
+	resize->hpt = hpt_tmp;
+
+	spin_unlock(&kvm->mmu_lock);
+
+	synchronize_srcu_expedited(&kvm->srcu);
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		kvmppc_setup_partition_table(kvm);
+
+	resize_hpt_debug(resize, "resize_hpt_pivot() done\n");
+}
+
+static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
+{
+	if (WARN_ON(!mutex_is_locked(&kvm->arch.mmu_setup_lock)))
+		return;
+
+	if (!resize)
+		return;
+
+	if (resize->error != -EBUSY) {
+		if (resize->hpt.virt)
+			kvmppc_free_hpt(&resize->hpt);
+		kfree(resize);
+	}
+
+	if (kvm->arch.resize_hpt == resize)
+		kvm->arch.resize_hpt = NULL;
+}
+
+static void resize_hpt_prepare_work(struct work_struct *work)
+{
+	struct kvm_resize_hpt *resize = container_of(work,
+						     struct kvm_resize_hpt,
+						     work);
+	struct kvm *kvm = resize->kvm;
+	int err = 0;
+
+	if (WARN_ON(resize->error != -EBUSY))
+		return;
+
+	mutex_lock(&kvm->arch.mmu_setup_lock);
+
+	/* Request is still current? */
+	if (kvm->arch.resize_hpt == resize) {
+		/* We may request large allocations here:
+		 * do not sleep with kvm->arch.mmu_setup_lock held for a while.
+		 */
+		mutex_unlock(&kvm->arch.mmu_setup_lock);
+
+		resize_hpt_debug(resize, "%s(): order = %d\n", __func__,
+				 resize->order);
+
+		err = resize_hpt_allocate(resize);
+
+		/* We have strict assumption about -EBUSY
+		 * when preparing for HPT resize.
+		 */
+		if (WARN_ON(err == -EBUSY))
+			err = -EINPROGRESS;
+
+		mutex_lock(&kvm->arch.mmu_setup_lock);
+		/* It is possible that kvm->arch.resize_hpt != resize
+		 * after we grab kvm->arch.mmu_setup_lock again.
+		 */
+	}
+
+	resize->error = err;
+
+	if (kvm->arch.resize_hpt != resize)
+		resize_hpt_release(kvm, resize);
+
+	mutex_unlock(&kvm->arch.mmu_setup_lock);
+}
+
+int kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
+				    struct kvm_ppc_resize_hpt *rhpt)
+{
+	unsigned long flags = rhpt->flags;
+	unsigned long shift = rhpt->shift;
+	struct kvm_resize_hpt *resize;
+	int ret;
+
+	if (flags != 0 || kvm_is_radix(kvm))
+		return -EINVAL;
+
+	if (shift && ((shift < 18) || (shift > 46)))
+		return -EINVAL;
+
+	mutex_lock(&kvm->arch.mmu_setup_lock);
+
+	resize = kvm->arch.resize_hpt;
+
+	if (resize) {
+		if (resize->order == shift) {
+			/* Suitable resize in progress? */
+			ret = resize->error;
+			if (ret == -EBUSY)
+				ret = 100; /* estimated time in ms */
+			else if (ret)
+				resize_hpt_release(kvm, resize);
+
+			goto out;
+		}
+
+		/* not suitable, cancel it */
+		resize_hpt_release(kvm, resize);
+	}
+
+	ret = 0;
+	if (!shift)
+		goto out; /* nothing to do */
+
+	/* start new resize */
+
+	resize = kzalloc(sizeof(*resize), GFP_KERNEL);
+	if (!resize) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	resize->error = -EBUSY;
+	resize->order = shift;
+	resize->kvm = kvm;
+	INIT_WORK(&resize->work, resize_hpt_prepare_work);
+	kvm->arch.resize_hpt = resize;
+
+	schedule_work(&resize->work);
+
+	ret = 100; /* estimated time in ms */
+
+out:
+	mutex_unlock(&kvm->arch.mmu_setup_lock);
+	return ret;
+}
+
+static void resize_hpt_boot_vcpu(void *opaque)
+{
+	/* Nothing to do, just force a KVM exit */
+}
+
+int kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
+				   struct kvm_ppc_resize_hpt *rhpt)
+{
+	unsigned long flags = rhpt->flags;
+	unsigned long shift = rhpt->shift;
+	struct kvm_resize_hpt *resize;
+	int ret;
+
+	if (flags != 0 || kvm_is_radix(kvm))
+		return -EINVAL;
+
+	if (shift && ((shift < 18) || (shift > 46)))
+		return -EINVAL;
+
+	mutex_lock(&kvm->arch.mmu_setup_lock);
+
+	resize = kvm->arch.resize_hpt;
+
+	/* This shouldn't be possible */
+	ret = -EIO;
+	if (WARN_ON(!kvm->arch.mmu_ready))
+		goto out_no_hpt;
+
+	/* Stop VCPUs from running while we mess with the HPT */
+	kvm->arch.mmu_ready = 0;
+	smp_mb();
+
+	/* Boot all CPUs out of the guest so they re-read
+	 * mmu_ready */
+	on_each_cpu(resize_hpt_boot_vcpu, NULL, 1);
+
+	ret = -ENXIO;
+	if (!resize || (resize->order != shift))
+		goto out;
+
+	ret = resize->error;
+	if (ret)
+		goto out;
+
+	ret = resize_hpt_rehash(resize);
+	if (ret)
+		goto out;
+
+	resize_hpt_pivot(resize);
+
+out:
+	/* Let VCPUs run again */
+	kvm->arch.mmu_ready = 1;
+	smp_mb();
+out_no_hpt:
+	resize_hpt_release(kvm, resize);
+	mutex_unlock(&kvm->arch.mmu_setup_lock);
+	return ret;
+}
+
+/*
+ * Functions for reading and writing the hash table via reads and
+ * writes on a file descriptor.
+ *
+ * Reads return the guest view of the hash table, which has to be
+ * pieced together from the real hash table and the guest_rpte
+ * values in the revmap array.
+ *
+ * On writes, each HPTE written is considered in turn, and if it
+ * is valid, it is written to the HPT as if an H_ENTER with the
+ * exact flag set was done.  When the invalid count is non-zero
+ * in the header written to the stream, the kernel will make
+ * sure that that many HPTEs are invalid, and invalidate them
+ * if not.
+ */
+
+struct kvm_htab_ctx {
+	unsigned long	index;
+	unsigned long	flags;
+	struct kvm	*kvm;
+	int		first_pass;
+};
+
+#define HPTE_SIZE	(2 * sizeof(unsigned long))
+
+/*
+ * Returns 1 if this HPT entry has been modified or has pending
+ * R/C bit changes.
+ */
+static int hpte_dirty(struct revmap_entry *revp, __be64 *hptp)
+{
+	unsigned long rcbits_unset;
+
+	if (revp->guest_rpte & HPTE_GR_MODIFIED)
+		return 1;
+
+	/* Also need to consider changes in reference and changed bits */
+	rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
+	if ((be64_to_cpu(hptp[0]) & HPTE_V_VALID) &&
+	    (be64_to_cpu(hptp[1]) & rcbits_unset))
+		return 1;
+
+	return 0;
+}
+
+static long record_hpte(unsigned long flags, __be64 *hptp,
+			unsigned long *hpte, struct revmap_entry *revp,
+			int want_valid, int first_pass)
+{
+	unsigned long v, r, hr;
+	unsigned long rcbits_unset;
+	int ok = 1;
+	int valid, dirty;
+
+	/* Unmodified entries are uninteresting except on the first pass */
+	dirty = hpte_dirty(revp, hptp);
+	if (!first_pass && !dirty)
+		return 0;
+
+	valid = 0;
+	if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)) {
+		valid = 1;
+		if ((flags & KVM_GET_HTAB_BOLTED_ONLY) &&
+		    !(be64_to_cpu(hptp[0]) & HPTE_V_BOLTED))
+			valid = 0;
+	}
+	if (valid != want_valid)
+		return 0;
+
+	v = r = 0;
+	if (valid || dirty) {
+		/* lock the HPTE so it's stable and read it */
+		preempt_disable();
+		while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
+			cpu_relax();
+		v = be64_to_cpu(hptp[0]);
+		hr = be64_to_cpu(hptp[1]);
+		if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+			v = hpte_new_to_old_v(v, hr);
+			hr = hpte_new_to_old_r(hr);
+		}
+
+		/* re-evaluate valid and dirty from synchronized HPTE value */
+		valid = !!(v & HPTE_V_VALID);
+		dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
+
+		/* Harvest R and C into guest view if necessary */
+		rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
+		if (valid && (rcbits_unset & hr)) {
+			revp->guest_rpte |= (hr &
+				(HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED;
+			dirty = 1;
+		}
+
+		if (v & HPTE_V_ABSENT) {
+			v &= ~HPTE_V_ABSENT;
+			v |= HPTE_V_VALID;
+			valid = 1;
+		}
+		if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
+			valid = 0;
+
+		r = revp->guest_rpte;
+		/* only clear modified if this is the right sort of entry */
+		if (valid == want_valid && dirty) {
+			r &= ~HPTE_GR_MODIFIED;
+			revp->guest_rpte = r;
+		}
+		unlock_hpte(hptp, be64_to_cpu(hptp[0]));
+		preempt_enable();
+		if (!(valid == want_valid && (first_pass || dirty)))
+			ok = 0;
+	}
+	hpte[0] = cpu_to_be64(v);
+	hpte[1] = cpu_to_be64(r);
+	return ok;
+}
+
+static ssize_t kvm_htab_read(struct file *file, char __user *buf,
+			     size_t count, loff_t *ppos)
+{
+	struct kvm_htab_ctx *ctx = file->private_data;
+	struct kvm *kvm = ctx->kvm;
+	struct kvm_get_htab_header hdr;
+	__be64 *hptp;
+	struct revmap_entry *revp;
+	unsigned long i, nb, nw;
+	unsigned long __user *lbuf;
+	struct kvm_get_htab_header __user *hptr;
+	unsigned long flags;
+	int first_pass;
+	unsigned long hpte[2];
+
+	if (!access_ok(buf, count))
+		return -EFAULT;
+	if (kvm_is_radix(kvm))
+		return 0;
+
+	first_pass = ctx->first_pass;
+	flags = ctx->flags;
+
+	i = ctx->index;
+	hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
+	revp = kvm->arch.hpt.rev + i;
+	lbuf = (unsigned long __user *)buf;
+
+	nb = 0;
+	while (nb + sizeof(hdr) + HPTE_SIZE < count) {
+		/* Initialize header */
+		hptr = (struct kvm_get_htab_header __user *)buf;
+		hdr.n_valid = 0;
+		hdr.n_invalid = 0;
+		nw = nb;
+		nb += sizeof(hdr);
+		lbuf = (unsigned long __user *)(buf + sizeof(hdr));
+
+		/* Skip uninteresting entries, i.e. clean on not-first pass */
+		if (!first_pass) {
+			while (i < kvmppc_hpt_npte(&kvm->arch.hpt) &&
+			       !hpte_dirty(revp, hptp)) {
+				++i;
+				hptp += 2;
+				++revp;
+			}
+		}
+		hdr.index = i;
+
+		/* Grab a series of valid entries */
+		while (i < kvmppc_hpt_npte(&kvm->arch.hpt) &&
+		       hdr.n_valid < 0xffff &&
+		       nb + HPTE_SIZE < count &&
+		       record_hpte(flags, hptp, hpte, revp, 1, first_pass)) {
+			/* valid entry, write it out */
+			++hdr.n_valid;
+			if (__put_user(hpte[0], lbuf) ||
+			    __put_user(hpte[1], lbuf + 1))
+				return -EFAULT;
+			nb += HPTE_SIZE;
+			lbuf += 2;
+			++i;
+			hptp += 2;
+			++revp;
+		}
+		/* Now skip invalid entries while we can */
+		while (i < kvmppc_hpt_npte(&kvm->arch.hpt) &&
+		       hdr.n_invalid < 0xffff &&
+		       record_hpte(flags, hptp, hpte, revp, 0, first_pass)) {
+			/* found an invalid entry */
+			++hdr.n_invalid;
+			++i;
+			hptp += 2;
+			++revp;
+		}
+
+		if (hdr.n_valid || hdr.n_invalid) {
+			/* write back the header */
+			if (__copy_to_user(hptr, &hdr, sizeof(hdr)))
+				return -EFAULT;
+			nw = nb;
+			buf = (char __user *)lbuf;
+		} else {
+			nb = nw;
+		}
+
+		/* Check if we've wrapped around the hash table */
+		if (i >= kvmppc_hpt_npte(&kvm->arch.hpt)) {
+			i = 0;
+			ctx->first_pass = 0;
+			break;
+		}
+	}
+
+	ctx->index = i;
+
+	return nb;
+}
+
+static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
+			      size_t count, loff_t *ppos)
+{
+	struct kvm_htab_ctx *ctx = file->private_data;
+	struct kvm *kvm = ctx->kvm;
+	struct kvm_get_htab_header hdr;
+	unsigned long i, j;
+	unsigned long v, r;
+	unsigned long __user *lbuf;
+	__be64 *hptp;
+	unsigned long tmp[2];
+	ssize_t nb;
+	long int err, ret;
+	int mmu_ready;
+	int pshift;
+
+	if (!access_ok(buf, count))
+		return -EFAULT;
+	if (kvm_is_radix(kvm))
+		return -EINVAL;
+
+	/* lock out vcpus from running while we're doing this */
+	mutex_lock(&kvm->arch.mmu_setup_lock);
+	mmu_ready = kvm->arch.mmu_ready;
+	if (mmu_ready) {
+		kvm->arch.mmu_ready = 0;	/* temporarily */
+		/* order mmu_ready vs. vcpus_running */
+		smp_mb();
+		if (atomic_read(&kvm->arch.vcpus_running)) {
+			kvm->arch.mmu_ready = 1;
+			mutex_unlock(&kvm->arch.mmu_setup_lock);
+			return -EBUSY;
+		}
+	}
+
+	err = 0;
+	for (nb = 0; nb + sizeof(hdr) <= count; ) {
+		err = -EFAULT;
+		if (__copy_from_user(&hdr, buf, sizeof(hdr)))
+			break;
+
+		err = 0;
+		if (nb + hdr.n_valid * HPTE_SIZE > count)
+			break;
+
+		nb += sizeof(hdr);
+		buf += sizeof(hdr);
+
+		err = -EINVAL;
+		i = hdr.index;
+		if (i >= kvmppc_hpt_npte(&kvm->arch.hpt) ||
+		    i + hdr.n_valid + hdr.n_invalid > kvmppc_hpt_npte(&kvm->arch.hpt))
+			break;
+
+		hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
+		lbuf = (unsigned long __user *)buf;
+		for (j = 0; j < hdr.n_valid; ++j) {
+			__be64 hpte_v;
+			__be64 hpte_r;
+
+			err = -EFAULT;
+			if (__get_user(hpte_v, lbuf) ||
+			    __get_user(hpte_r, lbuf + 1))
+				goto out;
+			v = be64_to_cpu(hpte_v);
+			r = be64_to_cpu(hpte_r);
+			err = -EINVAL;
+			if (!(v & HPTE_V_VALID))
+				goto out;
+			pshift = kvmppc_hpte_base_page_shift(v, r);
+			if (pshift <= 0)
+				goto out;
+			lbuf += 2;
+			nb += HPTE_SIZE;
+
+			if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT))
+				kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
+			err = -EIO;
+			ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r,
+							 tmp);
+			if (ret != H_SUCCESS) {
+				pr_err("%s ret %ld i=%ld v=%lx r=%lx\n", __func__, ret, i, v, r);
+				goto out;
+			}
+			if (!mmu_ready && is_vrma_hpte(v)) {
+				unsigned long senc, lpcr;
+
+				senc = slb_pgsize_encoding(1ul << pshift);
+				kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+					(VRMA_VSID << SLB_VSID_SHIFT_1T);
+				if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+					lpcr = senc << (LPCR_VRMASD_SH - 4);
+					kvmppc_update_lpcr(kvm, lpcr,
+							   LPCR_VRMASD);
+				} else {
+					kvmppc_setup_partition_table(kvm);
+				}
+				mmu_ready = 1;
+			}
+			++i;
+			hptp += 2;
+		}
+
+		for (j = 0; j < hdr.n_invalid; ++j) {
+			if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT))
+				kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
+			++i;
+			hptp += 2;
+		}
+		err = 0;
+	}
+
+ out:
+	/* Order HPTE updates vs. mmu_ready */
+	smp_wmb();
+	kvm->arch.mmu_ready = mmu_ready;
+	mutex_unlock(&kvm->arch.mmu_setup_lock);
+
+	if (err)
+		return err;
+	return nb;
+}
+
+static int kvm_htab_release(struct inode *inode, struct file *filp)
+{
+	struct kvm_htab_ctx *ctx = filp->private_data;
+
+	filp->private_data = NULL;
+	if (!(ctx->flags & KVM_GET_HTAB_WRITE))
+		atomic_dec(&ctx->kvm->arch.hpte_mod_interest);
+	kvm_put_kvm(ctx->kvm);
+	kfree(ctx);
+	return 0;
+}
+
+static const struct file_operations kvm_htab_fops = {
+	.read		= kvm_htab_read,
+	.write		= kvm_htab_write,
+	.llseek		= default_llseek,
+	.release	= kvm_htab_release,
+};
+
+int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
+{
+	int ret;
+	struct kvm_htab_ctx *ctx;
+	int rwflag;
+
+	/* reject flags we don't recognize */
+	if (ghf->flags & ~(KVM_GET_HTAB_BOLTED_ONLY | KVM_GET_HTAB_WRITE))
+		return -EINVAL;
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+	kvm_get_kvm(kvm);
+	ctx->kvm = kvm;
+	ctx->index = ghf->start_index;
+	ctx->flags = ghf->flags;
+	ctx->first_pass = 1;
+
+	rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
+	ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
+	if (ret < 0) {
+		kfree(ctx);
+		kvm_put_kvm_no_destroy(kvm);
+		return ret;
+	}
+
+	if (rwflag == O_RDONLY) {
+		mutex_lock(&kvm->slots_lock);
+		atomic_inc(&kvm->arch.hpte_mod_interest);
+		/* make sure kvmppc_do_h_enter etc. see the increment */
+		synchronize_srcu_expedited(&kvm->srcu);
+		mutex_unlock(&kvm->slots_lock);
+	}
+
+	return ret;
+}
+
+struct debugfs_htab_state {
+	struct kvm	*kvm;
+	struct mutex	mutex;
+	unsigned long	hpt_index;
+	int		chars_left;
+	int		buf_index;
+	char		buf[64];
+};
+
+static int debugfs_htab_open(struct inode *inode, struct file *file)
+{
+	struct kvm *kvm = inode->i_private;
+	struct debugfs_htab_state *p;
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	kvm_get_kvm(kvm);
+	p->kvm = kvm;
+	mutex_init(&p->mutex);
+	file->private_data = p;
+
+	return nonseekable_open(inode, file);
+}
+
+static int debugfs_htab_release(struct inode *inode, struct file *file)
+{
+	struct debugfs_htab_state *p = file->private_data;
+
+	kvm_put_kvm(p->kvm);
+	kfree(p);
+	return 0;
+}
+
+static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
+				 size_t len, loff_t *ppos)
+{
+	struct debugfs_htab_state *p = file->private_data;
+	ssize_t ret, r;
+	unsigned long i, n;
+	unsigned long v, hr, gr;
+	struct kvm *kvm;
+	__be64 *hptp;
+
+	kvm = p->kvm;
+	if (kvm_is_radix(kvm))
+		return 0;
+
+	ret = mutex_lock_interruptible(&p->mutex);
+	if (ret)
+		return ret;
+
+	if (p->chars_left) {
+		n = p->chars_left;
+		if (n > len)
+			n = len;
+		r = copy_to_user(buf, p->buf + p->buf_index, n);
+		n -= r;
+		p->chars_left -= n;
+		p->buf_index += n;
+		buf += n;
+		len -= n;
+		ret = n;
+		if (r) {
+			if (!n)
+				ret = -EFAULT;
+			goto out;
+		}
+	}
+
+	i = p->hpt_index;
+	hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
+	for (; len != 0 && i < kvmppc_hpt_npte(&kvm->arch.hpt);
+	     ++i, hptp += 2) {
+		if (!(be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)))
+			continue;
+
+		/* lock the HPTE so it's stable and read it */
+		preempt_disable();
+		while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
+			cpu_relax();
+		v = be64_to_cpu(hptp[0]) & ~HPTE_V_HVLOCK;
+		hr = be64_to_cpu(hptp[1]);
+		gr = kvm->arch.hpt.rev[i].guest_rpte;
+		unlock_hpte(hptp, v);
+		preempt_enable();
+
+		if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
+			continue;
+
+		n = scnprintf(p->buf, sizeof(p->buf),
+			      "%6lx %.16lx %.16lx %.16lx\n",
+			      i, v, hr, gr);
+		p->chars_left = n;
+		if (n > len)
+			n = len;
+		r = copy_to_user(buf, p->buf, n);
+		n -= r;
+		p->chars_left -= n;
+		p->buf_index = n;
+		buf += n;
+		len -= n;
+		ret += n;
+		if (r) {
+			if (!ret)
+				ret = -EFAULT;
+			goto out;
+		}
+	}
+	p->hpt_index = i;
+
+ out:
+	mutex_unlock(&p->mutex);
+	return ret;
+}
+
+static ssize_t debugfs_htab_write(struct file *file, const char __user *buf,
+			   size_t len, loff_t *ppos)
+{
+	return -EACCES;
+}
+
+static const struct file_operations debugfs_htab_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = debugfs_htab_open,
+	.release = debugfs_htab_release,
+	.read	 = debugfs_htab_read,
+	.write	 = debugfs_htab_write,
+	.llseek	 = generic_file_llseek,
+};
+
+void kvmppc_mmu_debugfs_init(struct kvm *kvm)
+{
+	debugfs_create_file("htab", 0400, kvm->debugfs_dentry, kvm,
+			    &debugfs_htab_fops);
+}
+
+void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
+
+	vcpu->arch.slb_nr = 32;		/* POWER7/POWER8 */
+
+	mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
+
+	vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
+}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
new file mode 100644
index 0000000000..10aacbf924
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -0,0 +1,1492 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright 2016 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/debugfs.h>
+#include <linux/pgtable.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include "book3s_hv.h"
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgalloc.h>
+#include <asm/pte-walk.h>
+#include <asm/ultravisor.h>
+#include <asm/kvm_book3s_uvmem.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/firmware.h>
+
+/*
+ * Supported radix tree geometry.
+ * Like p9, we support either 5 or 9 bits at the first (lowest) level,
+ * for a page size of 64k or 4k.
+ */
+static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 };
+
+unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
+					      gva_t eaddr, void *to, void *from,
+					      unsigned long n)
+{
+	int old_pid, old_lpid;
+	unsigned long quadrant, ret = n;
+	bool is_load = !!to;
+
+	/* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */
+	if (kvmhv_on_pseries())
+		return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr,
+					  (to != NULL) ? __pa(to): 0,
+					  (from != NULL) ? __pa(from): 0, n);
+
+	if (eaddr & (0xFFFUL << 52))
+		return ret;
+
+	quadrant = 1;
+	if (!pid)
+		quadrant = 2;
+	if (is_load)
+		from = (void *) (eaddr | (quadrant << 62));
+	else
+		to = (void *) (eaddr | (quadrant << 62));
+
+	preempt_disable();
+
+	asm volatile("hwsync" ::: "memory");
+	isync();
+	/* switch the lpid first to avoid running host with unallocated pid */
+	old_lpid = mfspr(SPRN_LPID);
+	if (old_lpid != lpid)
+		mtspr(SPRN_LPID, lpid);
+	if (quadrant == 1) {
+		old_pid = mfspr(SPRN_PID);
+		if (old_pid != pid)
+			mtspr(SPRN_PID, pid);
+	}
+	isync();
+
+	pagefault_disable();
+	if (is_load)
+		ret = __copy_from_user_inatomic(to, (const void __user *)from, n);
+	else
+		ret = __copy_to_user_inatomic((void __user *)to, from, n);
+	pagefault_enable();
+
+	asm volatile("hwsync" ::: "memory");
+	isync();
+	/* switch the pid first to avoid running host with unallocated pid */
+	if (quadrant == 1 && pid != old_pid)
+		mtspr(SPRN_PID, old_pid);
+	if (lpid != old_lpid)
+		mtspr(SPRN_LPID, old_lpid);
+	isync();
+
+	preempt_enable();
+
+	return ret;
+}
+
+static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
+					  void *to, void *from, unsigned long n)
+{
+	int lpid = vcpu->kvm->arch.lpid;
+	int pid = vcpu->arch.pid;
+
+	/* This would cause a data segment intr so don't allow the access */
+	if (eaddr & (0x3FFUL << 52))
+		return -EINVAL;
+
+	/* Should we be using the nested lpid */
+	if (vcpu->arch.nested)
+		lpid = vcpu->arch.nested->shadow_lpid;
+
+	/* If accessing quadrant 3 then pid is expected to be 0 */
+	if (((eaddr >> 62) & 0x3) == 0x3)
+		pid = 0;
+
+	eaddr &= ~(0xFFFUL << 52);
+
+	return __kvmhv_copy_tofrom_guest_radix(lpid, pid, eaddr, to, from, n);
+}
+
+long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to,
+				 unsigned long n)
+{
+	long ret;
+
+	ret = kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, to, NULL, n);
+	if (ret > 0)
+		memset(to + (n - ret), 0, ret);
+
+	return ret;
+}
+
+long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *from,
+			       unsigned long n)
+{
+	return kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, NULL, from, n);
+}
+
+int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
+			       struct kvmppc_pte *gpte, u64 root,
+			       u64 *pte_ret_p)
+{
+	struct kvm *kvm = vcpu->kvm;
+	int ret, level, ps;
+	unsigned long rts, bits, offset, index;
+	u64 pte, base, gpa;
+	__be64 rpte;
+
+	rts = ((root & RTS1_MASK) >> (RTS1_SHIFT - 3)) |
+		((root & RTS2_MASK) >> RTS2_SHIFT);
+	bits = root & RPDS_MASK;
+	base = root & RPDB_MASK;
+
+	offset = rts + 31;
+
+	/* Current implementations only support 52-bit space */
+	if (offset != 52)
+		return -EINVAL;
+
+	/* Walk each level of the radix tree */
+	for (level = 3; level >= 0; --level) {
+		u64 addr;
+		/* Check a valid size */
+		if (level && bits != p9_supported_radix_bits[level])
+			return -EINVAL;
+		if (level == 0 && !(bits == 5 || bits == 9))
+			return -EINVAL;
+		offset -= bits;
+		index = (eaddr >> offset) & ((1UL << bits) - 1);
+		/* Check that low bits of page table base are zero */
+		if (base & ((1UL << (bits + 3)) - 1))
+			return -EINVAL;
+		/* Read the entry from guest memory */
+		addr = base + (index * sizeof(rpte));
+
+		kvm_vcpu_srcu_read_lock(vcpu);
+		ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte));
+		kvm_vcpu_srcu_read_unlock(vcpu);
+		if (ret) {
+			if (pte_ret_p)
+				*pte_ret_p = addr;
+			return ret;
+		}
+		pte = __be64_to_cpu(rpte);
+		if (!(pte & _PAGE_PRESENT))
+			return -ENOENT;
+		/* Check if a leaf entry */
+		if (pte & _PAGE_PTE)
+			break;
+		/* Get ready to walk the next level */
+		base = pte & RPDB_MASK;
+		bits = pte & RPDS_MASK;
+	}
+
+	/* Need a leaf at lowest level; 512GB pages not supported */
+	if (level < 0 || level == 3)
+		return -EINVAL;
+
+	/* We found a valid leaf PTE */
+	/* Offset is now log base 2 of the page size */
+	gpa = pte & 0x01fffffffffff000ul;
+	if (gpa & ((1ul << offset) - 1))
+		return -EINVAL;
+	gpa |= eaddr & ((1ul << offset) - 1);
+	for (ps = MMU_PAGE_4K; ps < MMU_PAGE_COUNT; ++ps)
+		if (offset == mmu_psize_defs[ps].shift)
+			break;
+	gpte->page_size = ps;
+	gpte->page_shift = offset;
+
+	gpte->eaddr = eaddr;
+	gpte->raddr = gpa;
+
+	/* Work out permissions */
+	gpte->may_read = !!(pte & _PAGE_READ);
+	gpte->may_write = !!(pte & _PAGE_WRITE);
+	gpte->may_execute = !!(pte & _PAGE_EXEC);
+
+	gpte->rc = pte & (_PAGE_ACCESSED | _PAGE_DIRTY);
+
+	if (pte_ret_p)
+		*pte_ret_p = pte;
+
+	return 0;
+}
+
+/*
+ * Used to walk a partition or process table radix tree in guest memory
+ * Note: We exploit the fact that a partition table and a process
+ * table have the same layout, a partition-scoped page table and a
+ * process-scoped page table have the same layout, and the 2nd
+ * doubleword of a partition table entry has the same layout as
+ * the PTCR register.
+ */
+int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
+				     struct kvmppc_pte *gpte, u64 table,
+				     int table_index, u64 *pte_ret_p)
+{
+	struct kvm *kvm = vcpu->kvm;
+	int ret;
+	unsigned long size, ptbl, root;
+	struct prtb_entry entry;
+
+	if ((table & PRTS_MASK) > 24)
+		return -EINVAL;
+	size = 1ul << ((table & PRTS_MASK) + 12);
+
+	/* Is the table big enough to contain this entry? */
+	if ((table_index * sizeof(entry)) >= size)
+		return -EINVAL;
+
+	/* Read the table to find the root of the radix tree */
+	ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry));
+	kvm_vcpu_srcu_read_lock(vcpu);
+	ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry));
+	kvm_vcpu_srcu_read_unlock(vcpu);
+	if (ret)
+		return ret;
+
+	/* Root is stored in the first double word */
+	root = be64_to_cpu(entry.prtb0);
+
+	return kvmppc_mmu_walk_radix_tree(vcpu, eaddr, gpte, root, pte_ret_p);
+}
+
+int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
+			   struct kvmppc_pte *gpte, bool data, bool iswrite)
+{
+	u32 pid;
+	u64 pte;
+	int ret;
+
+	/* Work out effective PID */
+	switch (eaddr >> 62) {
+	case 0:
+		pid = vcpu->arch.pid;
+		break;
+	case 3:
+		pid = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = kvmppc_mmu_radix_translate_table(vcpu, eaddr, gpte,
+				vcpu->kvm->arch.process_table, pid, &pte);
+	if (ret)
+		return ret;
+
+	/* Check privilege (applies only to process scoped translations) */
+	if (kvmppc_get_msr(vcpu) & MSR_PR) {
+		if (pte & _PAGE_PRIVILEGED) {
+			gpte->may_read = 0;
+			gpte->may_write = 0;
+			gpte->may_execute = 0;
+		}
+	} else {
+		if (!(pte & _PAGE_PRIVILEGED)) {
+			/* Check AMR/IAMR to see if strict mode is in force */
+			if (kvmppc_get_amr_hv(vcpu) & (1ul << 62))
+				gpte->may_read = 0;
+			if (kvmppc_get_amr_hv(vcpu) & (1ul << 63))
+				gpte->may_write = 0;
+			if (vcpu->arch.iamr & (1ul << 62))
+				gpte->may_execute = 0;
+		}
+	}
+
+	return 0;
+}
+
+void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
+			     unsigned int pshift, unsigned int lpid)
+{
+	unsigned long psize = PAGE_SIZE;
+	int psi;
+	long rc;
+	unsigned long rb;
+
+	if (pshift)
+		psize = 1UL << pshift;
+	else
+		pshift = PAGE_SHIFT;
+
+	addr &= ~(psize - 1);
+
+	if (!kvmhv_on_pseries()) {
+		radix__flush_tlb_lpid_page(lpid, addr, psize);
+		return;
+	}
+
+	psi = shift_to_mmu_psize(pshift);
+
+	if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) {
+		rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
+		rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
+					lpid, rb);
+	} else {
+		rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+					    H_RPTI_TYPE_NESTED |
+					    H_RPTI_TYPE_TLB,
+					    psize_to_rpti_pgsize(psi),
+					    addr, addr + psize);
+	}
+
+	if (rc)
+		pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc);
+}
+
+static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned int lpid)
+{
+	long rc;
+
+	if (!kvmhv_on_pseries()) {
+		radix__flush_pwc_lpid(lpid);
+		return;
+	}
+
+	if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+		rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
+					lpid, TLBIEL_INVAL_SET_LPID);
+	else
+		rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+					    H_RPTI_TYPE_NESTED |
+					    H_RPTI_TYPE_PWC, H_RPTI_PAGE_ALL,
+					    0, -1UL);
+	if (rc)
+		pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc);
+}
+
+static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
+				      unsigned long clr, unsigned long set,
+				      unsigned long addr, unsigned int shift)
+{
+	return __radix_pte_update(ptep, clr, set);
+}
+
+static void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr,
+			     pte_t *ptep, pte_t pte)
+{
+	radix__set_pte_at(kvm->mm, addr, ptep, pte, 0);
+}
+
+static struct kmem_cache *kvm_pte_cache;
+static struct kmem_cache *kvm_pmd_cache;
+
+static pte_t *kvmppc_pte_alloc(void)
+{
+	pte_t *pte;
+
+	pte = kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL);
+	/* pmd_populate() will only reference _pa(pte). */
+	kmemleak_ignore(pte);
+
+	return pte;
+}
+
+static void kvmppc_pte_free(pte_t *ptep)
+{
+	kmem_cache_free(kvm_pte_cache, ptep);
+}
+
+static pmd_t *kvmppc_pmd_alloc(void)
+{
+	pmd_t *pmd;
+
+	pmd = kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL);
+	/* pud_populate() will only reference _pa(pmd). */
+	kmemleak_ignore(pmd);
+
+	return pmd;
+}
+
+static void kvmppc_pmd_free(pmd_t *pmdp)
+{
+	kmem_cache_free(kvm_pmd_cache, pmdp);
+}
+
+/* Called with kvm->mmu_lock held */
+void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
+		      unsigned int shift,
+		      const struct kvm_memory_slot *memslot,
+		      unsigned int lpid)
+
+{
+	unsigned long old;
+	unsigned long gfn = gpa >> PAGE_SHIFT;
+	unsigned long page_size = PAGE_SIZE;
+	unsigned long hpa;
+
+	old = kvmppc_radix_update_pte(kvm, pte, ~0UL, 0, gpa, shift);
+	kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid);
+
+	/* The following only applies to L1 entries */
+	if (lpid != kvm->arch.lpid)
+		return;
+
+	if (!memslot) {
+		memslot = gfn_to_memslot(kvm, gfn);
+		if (!memslot)
+			return;
+	}
+	if (shift) { /* 1GB or 2MB page */
+		page_size = 1ul << shift;
+		if (shift == PMD_SHIFT)
+			kvm->stat.num_2M_pages--;
+		else if (shift == PUD_SHIFT)
+			kvm->stat.num_1G_pages--;
+	}
+
+	gpa &= ~(page_size - 1);
+	hpa = old & PTE_RPN_MASK;
+	kvmhv_remove_nest_rmap_range(kvm, memslot, gpa, hpa, page_size);
+
+	if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap)
+		kvmppc_update_dirty_map(memslot, gfn, page_size);
+}
+
+/*
+ * kvmppc_free_p?d are used to free existing page tables, and recursively
+ * descend and clear and free children.
+ * Callers are responsible for flushing the PWC.
+ *
+ * When page tables are being unmapped/freed as part of page fault path
+ * (full == false), valid ptes are generally not expected; however, there
+ * is one situation where they arise, which is when dirty page logging is
+ * turned off for a memslot while the VM is running.  The new memslot
+ * becomes visible to page faults before the memslot commit function
+ * gets to flush the memslot, which can lead to a 2MB page mapping being
+ * installed for a guest physical address where there are already 64kB
+ * (or 4kB) mappings (of sub-pages of the same 2MB page).
+ */
+static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full,
+				  unsigned int lpid)
+{
+	if (full) {
+		memset(pte, 0, sizeof(long) << RADIX_PTE_INDEX_SIZE);
+	} else {
+		pte_t *p = pte;
+		unsigned long it;
+
+		for (it = 0; it < PTRS_PER_PTE; ++it, ++p) {
+			if (pte_val(*p) == 0)
+				continue;
+			kvmppc_unmap_pte(kvm, p,
+					 pte_pfn(*p) << PAGE_SHIFT,
+					 PAGE_SHIFT, NULL, lpid);
+		}
+	}
+
+	kvmppc_pte_free(pte);
+}
+
+static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full,
+				  unsigned int lpid)
+{
+	unsigned long im;
+	pmd_t *p = pmd;
+
+	for (im = 0; im < PTRS_PER_PMD; ++im, ++p) {
+		if (!pmd_present(*p))
+			continue;
+		if (pmd_is_leaf(*p)) {
+			if (full) {
+				pmd_clear(p);
+			} else {
+				WARN_ON_ONCE(1);
+				kvmppc_unmap_pte(kvm, (pte_t *)p,
+					 pte_pfn(*(pte_t *)p) << PAGE_SHIFT,
+					 PMD_SHIFT, NULL, lpid);
+			}
+		} else {
+			pte_t *pte;
+
+			pte = pte_offset_kernel(p, 0);
+			kvmppc_unmap_free_pte(kvm, pte, full, lpid);
+			pmd_clear(p);
+		}
+	}
+	kvmppc_pmd_free(pmd);
+}
+
+static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud,
+				  unsigned int lpid)
+{
+	unsigned long iu;
+	pud_t *p = pud;
+
+	for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) {
+		if (!pud_present(*p))
+			continue;
+		if (pud_is_leaf(*p)) {
+			pud_clear(p);
+		} else {
+			pmd_t *pmd;
+
+			pmd = pmd_offset(p, 0);
+			kvmppc_unmap_free_pmd(kvm, pmd, true, lpid);
+			pud_clear(p);
+		}
+	}
+	pud_free(kvm->mm, pud);
+}
+
+void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, unsigned int lpid)
+{
+	unsigned long ig;
+
+	for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
+		p4d_t *p4d = p4d_offset(pgd, 0);
+		pud_t *pud;
+
+		if (!p4d_present(*p4d))
+			continue;
+		pud = pud_offset(p4d, 0);
+		kvmppc_unmap_free_pud(kvm, pud, lpid);
+		p4d_clear(p4d);
+	}
+}
+
+void kvmppc_free_radix(struct kvm *kvm)
+{
+	if (kvm->arch.pgtable) {
+		kvmppc_free_pgtable_radix(kvm, kvm->arch.pgtable,
+					  kvm->arch.lpid);
+		pgd_free(kvm->mm, kvm->arch.pgtable);
+		kvm->arch.pgtable = NULL;
+	}
+}
+
+static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd,
+					unsigned long gpa, unsigned int lpid)
+{
+	pte_t *pte = pte_offset_kernel(pmd, 0);
+
+	/*
+	 * Clearing the pmd entry then flushing the PWC ensures that the pte
+	 * page no longer be cached by the MMU, so can be freed without
+	 * flushing the PWC again.
+	 */
+	pmd_clear(pmd);
+	kvmppc_radix_flush_pwc(kvm, lpid);
+
+	kvmppc_unmap_free_pte(kvm, pte, false, lpid);
+}
+
+static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
+					unsigned long gpa, unsigned int lpid)
+{
+	pmd_t *pmd = pmd_offset(pud, 0);
+
+	/*
+	 * Clearing the pud entry then flushing the PWC ensures that the pmd
+	 * page and any children pte pages will no longer be cached by the MMU,
+	 * so can be freed without flushing the PWC again.
+	 */
+	pud_clear(pud);
+	kvmppc_radix_flush_pwc(kvm, lpid);
+
+	kvmppc_unmap_free_pmd(kvm, pmd, false, lpid);
+}
+
+/*
+ * There are a number of bits which may differ between different faults to
+ * the same partition scope entry. RC bits, in the course of cleaning and
+ * aging. And the write bit can change, either the access could have been
+ * upgraded, or a read fault could happen concurrently with a write fault
+ * that sets those bits first.
+ */
+#define PTE_BITS_MUST_MATCH (~(_PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED))
+
+int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
+		      unsigned long gpa, unsigned int level,
+		      unsigned long mmu_seq, unsigned int lpid,
+		      unsigned long *rmapp, struct rmap_nested **n_rmap)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud, *new_pud = NULL;
+	pmd_t *pmd, *new_pmd = NULL;
+	pte_t *ptep, *new_ptep = NULL;
+	int ret;
+
+	/* Traverse the guest's 2nd-level tree, allocate new levels needed */
+	pgd = pgtable + pgd_index(gpa);
+	p4d = p4d_offset(pgd, gpa);
+
+	pud = NULL;
+	if (p4d_present(*p4d))
+		pud = pud_offset(p4d, gpa);
+	else
+		new_pud = pud_alloc_one(kvm->mm, gpa);
+
+	pmd = NULL;
+	if (pud && pud_present(*pud) && !pud_is_leaf(*pud))
+		pmd = pmd_offset(pud, gpa);
+	else if (level <= 1)
+		new_pmd = kvmppc_pmd_alloc();
+
+	if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
+		new_ptep = kvmppc_pte_alloc();
+
+	/* Check if we might have been invalidated; let the guest retry if so */
+	spin_lock(&kvm->mmu_lock);
+	ret = -EAGAIN;
+	if (mmu_invalidate_retry(kvm, mmu_seq))
+		goto out_unlock;
+
+	/* Now traverse again under the lock and change the tree */
+	ret = -ENOMEM;
+	if (p4d_none(*p4d)) {
+		if (!new_pud)
+			goto out_unlock;
+		p4d_populate(kvm->mm, p4d, new_pud);
+		new_pud = NULL;
+	}
+	pud = pud_offset(p4d, gpa);
+	if (pud_is_leaf(*pud)) {
+		unsigned long hgpa = gpa & PUD_MASK;
+
+		/* Check if we raced and someone else has set the same thing */
+		if (level == 2) {
+			if (pud_raw(*pud) == pte_raw(pte)) {
+				ret = 0;
+				goto out_unlock;
+			}
+			/* Valid 1GB page here already, add our extra bits */
+			WARN_ON_ONCE((pud_val(*pud) ^ pte_val(pte)) &
+							PTE_BITS_MUST_MATCH);
+			kvmppc_radix_update_pte(kvm, (pte_t *)pud,
+					      0, pte_val(pte), hgpa, PUD_SHIFT);
+			ret = 0;
+			goto out_unlock;
+		}
+		/*
+		 * If we raced with another CPU which has just put
+		 * a 1GB pte in after we saw a pmd page, try again.
+		 */
+		if (!new_pmd) {
+			ret = -EAGAIN;
+			goto out_unlock;
+		}
+		/* Valid 1GB page here already, remove it */
+		kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT, NULL,
+				 lpid);
+	}
+	if (level == 2) {
+		if (!pud_none(*pud)) {
+			/*
+			 * There's a page table page here, but we wanted to
+			 * install a large page, so remove and free the page
+			 * table page.
+			 */
+			kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa, lpid);
+		}
+		kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
+		if (rmapp && n_rmap)
+			kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
+		ret = 0;
+		goto out_unlock;
+	}
+	if (pud_none(*pud)) {
+		if (!new_pmd)
+			goto out_unlock;
+		pud_populate(kvm->mm, pud, new_pmd);
+		new_pmd = NULL;
+	}
+	pmd = pmd_offset(pud, gpa);
+	if (pmd_is_leaf(*pmd)) {
+		unsigned long lgpa = gpa & PMD_MASK;
+
+		/* Check if we raced and someone else has set the same thing */
+		if (level == 1) {
+			if (pmd_raw(*pmd) == pte_raw(pte)) {
+				ret = 0;
+				goto out_unlock;
+			}
+			/* Valid 2MB page here already, add our extra bits */
+			WARN_ON_ONCE((pmd_val(*pmd) ^ pte_val(pte)) &
+							PTE_BITS_MUST_MATCH);
+			kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
+					0, pte_val(pte), lgpa, PMD_SHIFT);
+			ret = 0;
+			goto out_unlock;
+		}
+
+		/*
+		 * If we raced with another CPU which has just put
+		 * a 2MB pte in after we saw a pte page, try again.
+		 */
+		if (!new_ptep) {
+			ret = -EAGAIN;
+			goto out_unlock;
+		}
+		/* Valid 2MB page here already, remove it */
+		kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT, NULL,
+				 lpid);
+	}
+	if (level == 1) {
+		if (!pmd_none(*pmd)) {
+			/*
+			 * There's a page table page here, but we wanted to
+			 * install a large page, so remove and free the page
+			 * table page.
+			 */
+			kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa, lpid);
+		}
+		kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
+		if (rmapp && n_rmap)
+			kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
+		ret = 0;
+		goto out_unlock;
+	}
+	if (pmd_none(*pmd)) {
+		if (!new_ptep)
+			goto out_unlock;
+		pmd_populate(kvm->mm, pmd, new_ptep);
+		new_ptep = NULL;
+	}
+	ptep = pte_offset_kernel(pmd, gpa);
+	if (pte_present(*ptep)) {
+		/* Check if someone else set the same thing */
+		if (pte_raw(*ptep) == pte_raw(pte)) {
+			ret = 0;
+			goto out_unlock;
+		}
+		/* Valid page here already, add our extra bits */
+		WARN_ON_ONCE((pte_val(*ptep) ^ pte_val(pte)) &
+							PTE_BITS_MUST_MATCH);
+		kvmppc_radix_update_pte(kvm, ptep, 0, pte_val(pte), gpa, 0);
+		ret = 0;
+		goto out_unlock;
+	}
+	kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
+	if (rmapp && n_rmap)
+		kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
+	ret = 0;
+
+ out_unlock:
+	spin_unlock(&kvm->mmu_lock);
+	if (new_pud)
+		pud_free(kvm->mm, new_pud);
+	if (new_pmd)
+		kvmppc_pmd_free(new_pmd);
+	if (new_ptep)
+		kvmppc_pte_free(new_ptep);
+	return ret;
+}
+
+bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, bool writing,
+			     unsigned long gpa, unsigned int lpid)
+{
+	unsigned long pgflags;
+	unsigned int shift;
+	pte_t *ptep;
+
+	/*
+	 * Need to set an R or C bit in the 2nd-level tables;
+	 * since we are just helping out the hardware here,
+	 * it is sufficient to do what the hardware does.
+	 */
+	pgflags = _PAGE_ACCESSED;
+	if (writing)
+		pgflags |= _PAGE_DIRTY;
+
+	if (nested)
+		ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift);
+	else
+		ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
+
+	if (ptep && pte_present(*ptep) && (!writing || pte_write(*ptep))) {
+		kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift);
+		return true;
+	}
+	return false;
+}
+
+int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
+				   unsigned long gpa,
+				   struct kvm_memory_slot *memslot,
+				   bool writing, bool kvm_ro,
+				   pte_t *inserted_pte, unsigned int *levelp)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct page *page = NULL;
+	unsigned long mmu_seq;
+	unsigned long hva, gfn = gpa >> PAGE_SHIFT;
+	bool upgrade_write = false;
+	bool *upgrade_p = &upgrade_write;
+	pte_t pte, *ptep;
+	unsigned int shift, level;
+	int ret;
+	bool large_enable;
+
+	/* used to check for invalidations in progress */
+	mmu_seq = kvm->mmu_invalidate_seq;
+	smp_rmb();
+
+	/*
+	 * Do a fast check first, since __gfn_to_pfn_memslot doesn't
+	 * do it with !atomic && !async, which is how we call it.
+	 * We always ask for write permission since the common case
+	 * is that the page is writable.
+	 */
+	hva = gfn_to_hva_memslot(memslot, gfn);
+	if (!kvm_ro && get_user_page_fast_only(hva, FOLL_WRITE, &page)) {
+		upgrade_write = true;
+	} else {
+		unsigned long pfn;
+
+		/* Call KVM generic code to do the slow-path check */
+		pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL,
+					   writing, upgrade_p, NULL);
+		if (is_error_noslot_pfn(pfn))
+			return -EFAULT;
+		page = NULL;
+		if (pfn_valid(pfn)) {
+			page = pfn_to_page(pfn);
+			if (PageReserved(page))
+				page = NULL;
+		}
+	}
+
+	/*
+	 * Read the PTE from the process' radix tree and use that
+	 * so we get the shift and attribute bits.
+	 */
+	spin_lock(&kvm->mmu_lock);
+	ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift);
+	pte = __pte(0);
+	if (ptep)
+		pte = READ_ONCE(*ptep);
+	spin_unlock(&kvm->mmu_lock);
+	/*
+	 * If the PTE disappeared temporarily due to a THP
+	 * collapse, just return and let the guest try again.
+	 */
+	if (!pte_present(pte)) {
+		if (page)
+			put_page(page);
+		return RESUME_GUEST;
+	}
+
+	/* If we're logging dirty pages, always map single pages */
+	large_enable = !(memslot->flags & KVM_MEM_LOG_DIRTY_PAGES);
+
+	/* Get pte level from shift/size */
+	if (large_enable && shift == PUD_SHIFT &&
+	    (gpa & (PUD_SIZE - PAGE_SIZE)) ==
+	    (hva & (PUD_SIZE - PAGE_SIZE))) {
+		level = 2;
+	} else if (large_enable && shift == PMD_SHIFT &&
+		   (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+		   (hva & (PMD_SIZE - PAGE_SIZE))) {
+		level = 1;
+	} else {
+		level = 0;
+		if (shift > PAGE_SHIFT) {
+			/*
+			 * If the pte maps more than one page, bring over
+			 * bits from the virtual address to get the real
+			 * address of the specific single page we want.
+			 */
+			unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
+			pte = __pte(pte_val(pte) | (hva & rpnmask));
+		}
+	}
+
+	pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
+	if (writing || upgrade_write) {
+		if (pte_val(pte) & _PAGE_WRITE)
+			pte = __pte(pte_val(pte) | _PAGE_DIRTY);
+	} else {
+		pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
+	}
+
+	/* Allocate space in the tree and write the PTE */
+	ret = kvmppc_create_pte(kvm, kvm->arch.pgtable, pte, gpa, level,
+				mmu_seq, kvm->arch.lpid, NULL, NULL);
+	if (inserted_pte)
+		*inserted_pte = pte;
+	if (levelp)
+		*levelp = level;
+
+	if (page) {
+		if (!ret && (pte_val(pte) & _PAGE_WRITE))
+			set_page_dirty_lock(page);
+		put_page(page);
+	}
+
+	/* Increment number of large pages if we (successfully) inserted one */
+	if (!ret) {
+		if (level == 1)
+			kvm->stat.num_2M_pages++;
+		else if (level == 2)
+			kvm->stat.num_1G_pages++;
+	}
+
+	return ret;
+}
+
+int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu,
+				   unsigned long ea, unsigned long dsisr)
+{
+	struct kvm *kvm = vcpu->kvm;
+	unsigned long gpa, gfn;
+	struct kvm_memory_slot *memslot;
+	long ret;
+	bool writing = !!(dsisr & DSISR_ISSTORE);
+	bool kvm_ro = false;
+
+	/* Check for unusual errors */
+	if (dsisr & DSISR_UNSUPP_MMU) {
+		pr_err("KVM: Got unsupported MMU fault\n");
+		return -EFAULT;
+	}
+	if (dsisr & DSISR_BADACCESS) {
+		/* Reflect to the guest as DSI */
+		pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr);
+		kvmppc_core_queue_data_storage(vcpu,
+				kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+				ea, dsisr);
+		return RESUME_GUEST;
+	}
+
+	/* Translate the logical address */
+	gpa = vcpu->arch.fault_gpa & ~0xfffUL;
+	gpa &= ~0xF000000000000000ul;
+	gfn = gpa >> PAGE_SHIFT;
+	if (!(dsisr & DSISR_PRTABLE_FAULT))
+		gpa |= ea & 0xfff;
+
+	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+		return kvmppc_send_page_to_uv(kvm, gfn);
+
+	/* Get the corresponding memslot */
+	memslot = gfn_to_memslot(kvm, gfn);
+
+	/* No memslot means it's an emulated MMIO region */
+	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
+		if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS |
+			     DSISR_SET_RC)) {
+			/*
+			 * Bad address in guest page table tree, or other
+			 * unusual error - reflect it to the guest as DSI.
+			 */
+			kvmppc_core_queue_data_storage(vcpu,
+					kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+					ea, dsisr);
+			return RESUME_GUEST;
+		}
+		return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing);
+	}
+
+	if (memslot->flags & KVM_MEM_READONLY) {
+		if (writing) {
+			/* give the guest a DSI */
+			kvmppc_core_queue_data_storage(vcpu,
+					kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+					ea, DSISR_ISSTORE | DSISR_PROTFAULT);
+			return RESUME_GUEST;
+		}
+		kvm_ro = true;
+	}
+
+	/* Failed to set the reference/change bits */
+	if (dsisr & DSISR_SET_RC) {
+		spin_lock(&kvm->mmu_lock);
+		if (kvmppc_hv_handle_set_rc(kvm, false, writing,
+					    gpa, kvm->arch.lpid))
+			dsisr &= ~DSISR_SET_RC;
+		spin_unlock(&kvm->mmu_lock);
+
+		if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
+			       DSISR_PROTFAULT | DSISR_SET_RC)))
+			return RESUME_GUEST;
+	}
+
+	/* Try to insert a pte */
+	ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot, writing,
+					     kvm_ro, NULL, NULL);
+
+	if (ret == 0 || ret == -EAGAIN)
+		ret = RESUME_GUEST;
+	return ret;
+}
+
+/* Called with kvm->mmu_lock held */
+void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+		     unsigned long gfn)
+{
+	pte_t *ptep;
+	unsigned long gpa = gfn << PAGE_SHIFT;
+	unsigned int shift;
+
+	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) {
+		uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT);
+		return;
+	}
+
+	ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
+	if (ptep && pte_present(*ptep))
+		kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
+				 kvm->arch.lpid);
+}
+
+/* Called with kvm->mmu_lock held */
+bool kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+		   unsigned long gfn)
+{
+	pte_t *ptep;
+	unsigned long gpa = gfn << PAGE_SHIFT;
+	unsigned int shift;
+	bool ref = false;
+	unsigned long old, *rmapp;
+
+	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+		return ref;
+
+	ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
+	if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
+		old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
+					      gpa, shift);
+		/* XXX need to flush tlb here? */
+		/* Also clear bit in ptes in shadow pgtable for nested guests */
+		rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
+		kvmhv_update_nest_rmap_rc_list(kvm, rmapp, _PAGE_ACCESSED, 0,
+					       old & PTE_RPN_MASK,
+					       1UL << shift);
+		ref = true;
+	}
+	return ref;
+}
+
+/* Called with kvm->mmu_lock held */
+bool kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+			unsigned long gfn)
+
+{
+	pte_t *ptep;
+	unsigned long gpa = gfn << PAGE_SHIFT;
+	unsigned int shift;
+	bool ref = false;
+
+	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+		return ref;
+
+	ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
+	if (ptep && pte_present(*ptep) && pte_young(*ptep))
+		ref = true;
+	return ref;
+}
+
+/* Returns the number of PAGE_SIZE pages that are dirty */
+static int kvm_radix_test_clear_dirty(struct kvm *kvm,
+				struct kvm_memory_slot *memslot, int pagenum)
+{
+	unsigned long gfn = memslot->base_gfn + pagenum;
+	unsigned long gpa = gfn << PAGE_SHIFT;
+	pte_t *ptep, pte;
+	unsigned int shift;
+	int ret = 0;
+	unsigned long old, *rmapp;
+
+	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+		return ret;
+
+	/*
+	 * For performance reasons we don't hold kvm->mmu_lock while walking the
+	 * partition scoped table.
+	 */
+	ptep = find_kvm_secondary_pte_unlocked(kvm, gpa, &shift);
+	if (!ptep)
+		return 0;
+
+	pte = READ_ONCE(*ptep);
+	if (pte_present(pte) && pte_dirty(pte)) {
+		spin_lock(&kvm->mmu_lock);
+		/*
+		 * Recheck the pte again
+		 */
+		if (pte_val(pte) != pte_val(*ptep)) {
+			/*
+			 * We have KVM_MEM_LOG_DIRTY_PAGES enabled. Hence we can
+			 * only find PAGE_SIZE pte entries here. We can continue
+			 * to use the pte addr returned by above page table
+			 * walk.
+			 */
+			if (!pte_present(*ptep) || !pte_dirty(*ptep)) {
+				spin_unlock(&kvm->mmu_lock);
+				return 0;
+			}
+		}
+
+		ret = 1;
+		VM_BUG_ON(shift);
+		old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
+					      gpa, shift);
+		kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid);
+		/* Also clear bit in ptes in shadow pgtable for nested guests */
+		rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
+		kvmhv_update_nest_rmap_rc_list(kvm, rmapp, _PAGE_DIRTY, 0,
+					       old & PTE_RPN_MASK,
+					       1UL << shift);
+		spin_unlock(&kvm->mmu_lock);
+	}
+	return ret;
+}
+
+long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
+			struct kvm_memory_slot *memslot, unsigned long *map)
+{
+	unsigned long i, j;
+	int npages;
+
+	for (i = 0; i < memslot->npages; i = j) {
+		npages = kvm_radix_test_clear_dirty(kvm, memslot, i);
+
+		/*
+		 * Note that if npages > 0 then i must be a multiple of npages,
+		 * since huge pages are only used to back the guest at guest
+		 * real addresses that are a multiple of their size.
+		 * Since we have at most one PTE covering any given guest
+		 * real address, if npages > 1 we can skip to i + npages.
+		 */
+		j = i + 1;
+		if (npages) {
+			set_dirty_bits(map, i, npages);
+			j = i + npages;
+		}
+	}
+	return 0;
+}
+
+void kvmppc_radix_flush_memslot(struct kvm *kvm,
+				const struct kvm_memory_slot *memslot)
+{
+	unsigned long n;
+	pte_t *ptep;
+	unsigned long gpa;
+	unsigned int shift;
+
+	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)
+		kvmppc_uvmem_drop_pages(memslot, kvm, true);
+
+	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+		return;
+
+	gpa = memslot->base_gfn << PAGE_SHIFT;
+	spin_lock(&kvm->mmu_lock);
+	for (n = memslot->npages; n; --n) {
+		ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
+		if (ptep && pte_present(*ptep))
+			kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
+					 kvm->arch.lpid);
+		gpa += PAGE_SIZE;
+	}
+	/*
+	 * Increase the mmu notifier sequence number to prevent any page
+	 * fault that read the memslot earlier from writing a PTE.
+	 */
+	kvm->mmu_invalidate_seq++;
+	spin_unlock(&kvm->mmu_lock);
+}
+
+static void add_rmmu_ap_encoding(struct kvm_ppc_rmmu_info *info,
+				 int psize, int *indexp)
+{
+	if (!mmu_psize_defs[psize].shift)
+		return;
+	info->ap_encodings[*indexp] = mmu_psize_defs[psize].shift |
+		(mmu_psize_defs[psize].ap << 29);
+	++(*indexp);
+}
+
+int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info)
+{
+	int i;
+
+	if (!radix_enabled())
+		return -EINVAL;
+	memset(info, 0, sizeof(*info));
+
+	/* 4k page size */
+	info->geometries[0].page_shift = 12;
+	info->geometries[0].level_bits[0] = 9;
+	for (i = 1; i < 4; ++i)
+		info->geometries[0].level_bits[i] = p9_supported_radix_bits[i];
+	/* 64k page size */
+	info->geometries[1].page_shift = 16;
+	for (i = 0; i < 4; ++i)
+		info->geometries[1].level_bits[i] = p9_supported_radix_bits[i];
+
+	i = 0;
+	add_rmmu_ap_encoding(info, MMU_PAGE_4K, &i);
+	add_rmmu_ap_encoding(info, MMU_PAGE_64K, &i);
+	add_rmmu_ap_encoding(info, MMU_PAGE_2M, &i);
+	add_rmmu_ap_encoding(info, MMU_PAGE_1G, &i);
+
+	return 0;
+}
+
+int kvmppc_init_vm_radix(struct kvm *kvm)
+{
+	kvm->arch.pgtable = pgd_alloc(kvm->mm);
+	if (!kvm->arch.pgtable)
+		return -ENOMEM;
+	return 0;
+}
+
+static void pte_ctor(void *addr)
+{
+	memset(addr, 0, RADIX_PTE_TABLE_SIZE);
+}
+
+static void pmd_ctor(void *addr)
+{
+	memset(addr, 0, RADIX_PMD_TABLE_SIZE);
+}
+
+struct debugfs_radix_state {
+	struct kvm	*kvm;
+	struct mutex	mutex;
+	unsigned long	gpa;
+	int		lpid;
+	int		chars_left;
+	int		buf_index;
+	char		buf[128];
+	u8		hdr;
+};
+
+static int debugfs_radix_open(struct inode *inode, struct file *file)
+{
+	struct kvm *kvm = inode->i_private;
+	struct debugfs_radix_state *p;
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	kvm_get_kvm(kvm);
+	p->kvm = kvm;
+	mutex_init(&p->mutex);
+	file->private_data = p;
+
+	return nonseekable_open(inode, file);
+}
+
+static int debugfs_radix_release(struct inode *inode, struct file *file)
+{
+	struct debugfs_radix_state *p = file->private_data;
+
+	kvm_put_kvm(p->kvm);
+	kfree(p);
+	return 0;
+}
+
+static ssize_t debugfs_radix_read(struct file *file, char __user *buf,
+				 size_t len, loff_t *ppos)
+{
+	struct debugfs_radix_state *p = file->private_data;
+	ssize_t ret, r;
+	unsigned long n;
+	struct kvm *kvm;
+	unsigned long gpa;
+	pgd_t *pgt;
+	struct kvm_nested_guest *nested;
+	pgd_t *pgdp;
+	p4d_t p4d, *p4dp;
+	pud_t pud, *pudp;
+	pmd_t pmd, *pmdp;
+	pte_t *ptep;
+	int shift;
+	unsigned long pte;
+
+	kvm = p->kvm;
+	if (!kvm_is_radix(kvm))
+		return 0;
+
+	ret = mutex_lock_interruptible(&p->mutex);
+	if (ret)
+		return ret;
+
+	if (p->chars_left) {
+		n = p->chars_left;
+		if (n > len)
+			n = len;
+		r = copy_to_user(buf, p->buf + p->buf_index, n);
+		n -= r;
+		p->chars_left -= n;
+		p->buf_index += n;
+		buf += n;
+		len -= n;
+		ret = n;
+		if (r) {
+			if (!n)
+				ret = -EFAULT;
+			goto out;
+		}
+	}
+
+	gpa = p->gpa;
+	nested = NULL;
+	pgt = NULL;
+	while (len != 0 && p->lpid >= 0) {
+		if (gpa >= RADIX_PGTABLE_RANGE) {
+			gpa = 0;
+			pgt = NULL;
+			if (nested) {
+				kvmhv_put_nested(nested);
+				nested = NULL;
+			}
+			p->lpid = kvmhv_nested_next_lpid(kvm, p->lpid);
+			p->hdr = 0;
+			if (p->lpid < 0)
+				break;
+		}
+		if (!pgt) {
+			if (p->lpid == 0) {
+				pgt = kvm->arch.pgtable;
+			} else {
+				nested = kvmhv_get_nested(kvm, p->lpid, false);
+				if (!nested) {
+					gpa = RADIX_PGTABLE_RANGE;
+					continue;
+				}
+				pgt = nested->shadow_pgtable;
+			}
+		}
+		n = 0;
+		if (!p->hdr) {
+			if (p->lpid > 0)
+				n = scnprintf(p->buf, sizeof(p->buf),
+					      "\nNested LPID %d: ", p->lpid);
+			n += scnprintf(p->buf + n, sizeof(p->buf) - n,
+				      "pgdir: %lx\n", (unsigned long)pgt);
+			p->hdr = 1;
+			goto copy;
+		}
+
+		pgdp = pgt + pgd_index(gpa);
+		p4dp = p4d_offset(pgdp, gpa);
+		p4d = READ_ONCE(*p4dp);
+		if (!(p4d_val(p4d) & _PAGE_PRESENT)) {
+			gpa = (gpa & P4D_MASK) + P4D_SIZE;
+			continue;
+		}
+
+		pudp = pud_offset(&p4d, gpa);
+		pud = READ_ONCE(*pudp);
+		if (!(pud_val(pud) & _PAGE_PRESENT)) {
+			gpa = (gpa & PUD_MASK) + PUD_SIZE;
+			continue;
+		}
+		if (pud_val(pud) & _PAGE_PTE) {
+			pte = pud_val(pud);
+			shift = PUD_SHIFT;
+			goto leaf;
+		}
+
+		pmdp = pmd_offset(&pud, gpa);
+		pmd = READ_ONCE(*pmdp);
+		if (!(pmd_val(pmd) & _PAGE_PRESENT)) {
+			gpa = (gpa & PMD_MASK) + PMD_SIZE;
+			continue;
+		}
+		if (pmd_val(pmd) & _PAGE_PTE) {
+			pte = pmd_val(pmd);
+			shift = PMD_SHIFT;
+			goto leaf;
+		}
+
+		ptep = pte_offset_kernel(&pmd, gpa);
+		pte = pte_val(READ_ONCE(*ptep));
+		if (!(pte & _PAGE_PRESENT)) {
+			gpa += PAGE_SIZE;
+			continue;
+		}
+		shift = PAGE_SHIFT;
+	leaf:
+		n = scnprintf(p->buf, sizeof(p->buf),
+			      " %lx: %lx %d\n", gpa, pte, shift);
+		gpa += 1ul << shift;
+	copy:
+		p->chars_left = n;
+		if (n > len)
+			n = len;
+		r = copy_to_user(buf, p->buf, n);
+		n -= r;
+		p->chars_left -= n;
+		p->buf_index = n;
+		buf += n;
+		len -= n;
+		ret += n;
+		if (r) {
+			if (!ret)
+				ret = -EFAULT;
+			break;
+		}
+	}
+	p->gpa = gpa;
+	if (nested)
+		kvmhv_put_nested(nested);
+
+ out:
+	mutex_unlock(&p->mutex);
+	return ret;
+}
+
+static ssize_t debugfs_radix_write(struct file *file, const char __user *buf,
+			   size_t len, loff_t *ppos)
+{
+	return -EACCES;
+}
+
+static const struct file_operations debugfs_radix_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = debugfs_radix_open,
+	.release = debugfs_radix_release,
+	.read	 = debugfs_radix_read,
+	.write	 = debugfs_radix_write,
+	.llseek	 = generic_file_llseek,
+};
+
+void kvmhv_radix_debugfs_init(struct kvm *kvm)
+{
+	debugfs_create_file("radix", 0400, kvm->debugfs_dentry, kvm,
+			    &debugfs_radix_fops);
+}
+
+int kvmppc_radix_init(void)
+{
+	unsigned long size = sizeof(void *) << RADIX_PTE_INDEX_SIZE;
+
+	kvm_pte_cache = kmem_cache_create("kvm-pte", size, size, 0, pte_ctor);
+	if (!kvm_pte_cache)
+		return -ENOMEM;
+
+	size = sizeof(void *) << RADIX_PMD_INDEX_SIZE;
+
+	kvm_pmd_cache = kmem_cache_create("kvm-pmd", size, size, 0, pmd_ctor);
+	if (!kvm_pmd_cache) {
+		kmem_cache_destroy(kvm_pte_cache);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void kvmppc_radix_exit(void)
+{
+	kmem_cache_destroy(kvm_pte_cache);
+	kmem_cache_destroy(kvm_pmd_cache);
+}
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
new file mode 100644
index 0000000000..4d958dd21e
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+
+#define SHADOW_SLB_ENTRY_LEN	0x10
+#define OFFSET_ESID(x)		(SHADOW_SLB_ENTRY_LEN * x)
+#define OFFSET_VSID(x)		((SHADOW_SLB_ENTRY_LEN * x) + 8)
+
+/******************************************************************************
+ *                                                                            *
+ *                               Entry code                                   *
+ *                                                                            *
+ *****************************************************************************/
+
+.macro LOAD_GUEST_SEGMENTS
+
+	/* Required state:
+	 *
+	 * MSR = ~IR|DR
+	 * R13 = PACA
+	 * R1 = host R1
+	 * R2 = host R2
+	 * R3 = shadow vcpu
+	 * all other volatile GPRS = free except R4, R6
+	 * SVCPU[CR]  = guest CR
+	 * SVCPU[XER] = guest XER
+	 * SVCPU[CTR] = guest CTR
+	 * SVCPU[LR]  = guest LR
+	 */
+
+BEGIN_FW_FTR_SECTION
+
+	/* Declare SLB shadow as 0 entries big */
+
+	ld	r11, PACA_SLBSHADOWPTR(r13)
+	li	r8, 0
+	stb	r8, 3(r11)
+
+END_FW_FTR_SECTION_IFSET(FW_FEATURE_LPAR)
+
+	/* Flush SLB */
+
+	li	r10, 0
+	slbmte	r10, r10
+	slbia
+
+	/* Fill SLB with our shadow */
+
+	lbz	r12, SVCPU_SLB_MAX(r3)
+	mulli	r12, r12, 16
+	addi	r12, r12, SVCPU_SLB
+	add	r12, r12, r3
+
+	/* for (r11 = kvm_slb; r11 < kvm_slb + kvm_slb_size; r11+=slb_entry) */
+	li	r11, SVCPU_SLB
+	add	r11, r11, r3
+
+slb_loop_enter:
+
+	ld	r10, 0(r11)
+
+	andis.	r9, r10, SLB_ESID_V@h
+	beq	slb_loop_enter_skip
+
+	ld	r9, 8(r11)
+	slbmte	r9, r10
+
+slb_loop_enter_skip:
+	addi	r11, r11, 16
+	cmpd	cr0, r11, r12
+	blt	slb_loop_enter
+
+slb_do_enter:
+
+.endm
+
+/******************************************************************************
+ *                                                                            *
+ *                               Exit code                                    *
+ *                                                                            *
+ *****************************************************************************/
+
+.macro LOAD_HOST_SEGMENTS
+
+	/* Register usage at this point:
+	 *
+	 * R1         = host R1
+	 * R2         = host R2
+	 * R12        = exit handler id
+	 * R13        = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64]
+	 * SVCPU.*    = guest *
+	 * SVCPU[CR]  = guest CR
+	 * SVCPU[XER] = guest XER
+	 * SVCPU[CTR] = guest CTR
+	 * SVCPU[LR]  = guest LR
+	 *
+	 */
+
+	/* Remove all SLB entries that are in use. */
+
+	li	r0, 0
+	slbmte	r0, r0
+	slbia
+
+	/* Restore bolted entries from the shadow */
+
+	ld	r11, PACA_SLBSHADOWPTR(r13)
+
+BEGIN_FW_FTR_SECTION
+
+	/* Declare SLB shadow as SLB_NUM_BOLTED entries big */
+
+	li	r8, SLB_NUM_BOLTED
+	stb	r8, 3(r11)
+
+END_FW_FTR_SECTION_IFSET(FW_FEATURE_LPAR)
+
+	/* Manually load all entries from shadow SLB */
+
+	li	r8, SLBSHADOW_SAVEAREA
+	li	r7, SLBSHADOW_SAVEAREA + 8
+
+	.rept	SLB_NUM_BOLTED
+	LDX_BE	r10, r11, r8
+	cmpdi	r10, 0
+	beq	1f
+	LDX_BE	r9, r11, r7
+	slbmte	r9, r10
+1:	addi	r7, r7, SHADOW_SLB_ENTRY_LEN
+	addi	r8, r8, SHADOW_SLB_ENTRY_LEN
+	.endr
+
+	isync
+	sync
+
+slb_do_exit:
+
+.endm
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
new file mode 100644
index 0000000000..93b695b289
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -0,0 +1,798 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/sched/signal.h>
+#include <linux/hugetlb.h>
+#include <linux/list.h>
+#include <linux/anon_inodes.h>
+#include <linux/iommu.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/book3s/64/mmu-hash.h>
+#include <asm/hvcall.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+#include <asm/udbg.h>
+#include <asm/iommu.h>
+#include <asm/tce.h>
+#include <asm/mmu_context.h>
+
+static struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
+	unsigned long liobn)
+{
+	struct kvmppc_spapr_tce_table *stt;
+
+	list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list)
+		if (stt->liobn == liobn)
+			return stt;
+
+	return NULL;
+}
+
+static unsigned long kvmppc_tce_pages(unsigned long iommu_pages)
+{
+	return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
+}
+
+static unsigned long kvmppc_stt_pages(unsigned long tce_pages)
+{
+	unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) +
+			(tce_pages * sizeof(struct page *));
+
+	return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE;
+}
+
+static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head)
+{
+	struct kvmppc_spapr_tce_iommu_table *stit = container_of(head,
+			struct kvmppc_spapr_tce_iommu_table, rcu);
+
+	iommu_tce_table_put(stit->tbl);
+
+	kfree(stit);
+}
+
+static void kvm_spapr_tce_liobn_put(struct kref *kref)
+{
+	struct kvmppc_spapr_tce_iommu_table *stit = container_of(kref,
+			struct kvmppc_spapr_tce_iommu_table, kref);
+
+	list_del_rcu(&stit->next);
+
+	call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free);
+}
+
+extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
+		struct iommu_group *grp)
+{
+	int i;
+	struct kvmppc_spapr_tce_table *stt;
+	struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
+	struct iommu_table_group *table_group = NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
+
+		table_group = iommu_group_get_iommudata(grp);
+		if (WARN_ON(!table_group))
+			continue;
+
+		list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
+			for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+				if (table_group->tables[i] != stit->tbl)
+					continue;
+
+				kref_put(&stit->kref, kvm_spapr_tce_liobn_put);
+			}
+		}
+		cond_resched_rcu();
+	}
+	rcu_read_unlock();
+}
+
+extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
+		struct iommu_group *grp)
+{
+	struct kvmppc_spapr_tce_table *stt = NULL;
+	bool found = false;
+	struct iommu_table *tbl = NULL;
+	struct iommu_table_group *table_group;
+	long i;
+	struct kvmppc_spapr_tce_iommu_table *stit;
+	struct fd f;
+
+	f = fdget(tablefd);
+	if (!f.file)
+		return -EBADF;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
+		if (stt == f.file->private_data) {
+			found = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	fdput(f);
+
+	if (!found)
+		return -EINVAL;
+
+	table_group = iommu_group_get_iommudata(grp);
+	if (WARN_ON(!table_group))
+		return -EFAULT;
+
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+		struct iommu_table *tbltmp = table_group->tables[i];
+
+		if (!tbltmp)
+			continue;
+		/* Make sure hardware table parameters are compatible */
+		if ((tbltmp->it_page_shift <= stt->page_shift) &&
+				(tbltmp->it_offset << tbltmp->it_page_shift ==
+				 stt->offset << stt->page_shift) &&
+				(tbltmp->it_size << tbltmp->it_page_shift >=
+				 stt->size << stt->page_shift)) {
+			/*
+			 * Reference the table to avoid races with
+			 * add/remove DMA windows.
+			 */
+			tbl = iommu_tce_table_get(tbltmp);
+			break;
+		}
+	}
+	if (!tbl)
+		return -EINVAL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
+		if (tbl != stit->tbl)
+			continue;
+
+		if (!kref_get_unless_zero(&stit->kref)) {
+			/* stit is being destroyed */
+			iommu_tce_table_put(tbl);
+			rcu_read_unlock();
+			return -ENOTTY;
+		}
+		/*
+		 * The table is already known to this KVM, we just increased
+		 * its KVM reference counter and can return.
+		 */
+		rcu_read_unlock();
+		return 0;
+	}
+	rcu_read_unlock();
+
+	stit = kzalloc(sizeof(*stit), GFP_KERNEL);
+	if (!stit) {
+		iommu_tce_table_put(tbl);
+		return -ENOMEM;
+	}
+
+	stit->tbl = tbl;
+	kref_init(&stit->kref);
+
+	list_add_rcu(&stit->next, &stt->iommu_tables);
+
+	return 0;
+}
+
+static void release_spapr_tce_table(struct rcu_head *head)
+{
+	struct kvmppc_spapr_tce_table *stt = container_of(head,
+			struct kvmppc_spapr_tce_table, rcu);
+	unsigned long i, npages = kvmppc_tce_pages(stt->size);
+
+	for (i = 0; i < npages; i++)
+		if (stt->pages[i])
+			__free_page(stt->pages[i]);
+
+	kfree(stt);
+}
+
+static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt,
+		unsigned long sttpage)
+{
+	struct page *page = stt->pages[sttpage];
+
+	if (page)
+		return page;
+
+	mutex_lock(&stt->alloc_lock);
+	page = stt->pages[sttpage];
+	if (!page) {
+		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		WARN_ON_ONCE(!page);
+		if (page)
+			stt->pages[sttpage] = page;
+	}
+	mutex_unlock(&stt->alloc_lock);
+
+	return page;
+}
+
+static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
+{
+	struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
+	struct page *page;
+
+	if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
+		return VM_FAULT_SIGBUS;
+
+	page = kvm_spapr_get_tce_page(stt, vmf->pgoff);
+	if (!page)
+		return VM_FAULT_OOM;
+
+	get_page(page);
+	vmf->page = page;
+	return 0;
+}
+
+static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
+	.fault = kvm_spapr_tce_fault,
+};
+
+static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	vma->vm_ops = &kvm_spapr_tce_vm_ops;
+	return 0;
+}
+
+static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
+{
+	struct kvmppc_spapr_tce_table *stt = filp->private_data;
+	struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
+	struct kvm *kvm = stt->kvm;
+
+	mutex_lock(&kvm->lock);
+	list_del_rcu(&stt->list);
+	mutex_unlock(&kvm->lock);
+
+	list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
+		WARN_ON(!kref_read(&stit->kref));
+		while (1) {
+			if (kref_put(&stit->kref, kvm_spapr_tce_liobn_put))
+				break;
+		}
+	}
+
+	account_locked_vm(kvm->mm,
+		kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
+
+	kvm_put_kvm(stt->kvm);
+
+	call_rcu(&stt->rcu, release_spapr_tce_table);
+
+	return 0;
+}
+
+static const struct file_operations kvm_spapr_tce_fops = {
+	.mmap           = kvm_spapr_tce_mmap,
+	.release	= kvm_spapr_tce_release,
+};
+
+int kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
+				  struct kvm_create_spapr_tce_64 *args)
+{
+	struct kvmppc_spapr_tce_table *stt = NULL;
+	struct kvmppc_spapr_tce_table *siter;
+	struct mm_struct *mm = kvm->mm;
+	unsigned long npages;
+	int ret;
+
+	if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
+		(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
+		return -EINVAL;
+
+	npages = kvmppc_tce_pages(args->size);
+	ret = account_locked_vm(mm, kvmppc_stt_pages(npages), true);
+	if (ret)
+		return ret;
+
+	ret = -ENOMEM;
+	stt = kzalloc(struct_size(stt, pages, npages), GFP_KERNEL | __GFP_NOWARN);
+	if (!stt)
+		goto fail_acct;
+
+	stt->liobn = args->liobn;
+	stt->page_shift = args->page_shift;
+	stt->offset = args->offset;
+	stt->size = args->size;
+	stt->kvm = kvm;
+	mutex_init(&stt->alloc_lock);
+	INIT_LIST_HEAD_RCU(&stt->iommu_tables);
+
+	mutex_lock(&kvm->lock);
+
+	/* Check this LIOBN hasn't been previously allocated */
+	ret = 0;
+	list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
+		if (siter->liobn == args->liobn) {
+			ret = -EBUSY;
+			break;
+		}
+	}
+
+	kvm_get_kvm(kvm);
+	if (!ret)
+		ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
+				       stt, O_RDWR | O_CLOEXEC);
+
+	if (ret >= 0)
+		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
+	else
+		kvm_put_kvm_no_destroy(kvm);
+
+	mutex_unlock(&kvm->lock);
+
+	if (ret >= 0)
+		return ret;
+
+	kfree(stt);
+ fail_acct:
+	account_locked_vm(mm, kvmppc_stt_pages(npages), false);
+	return ret;
+}
+
+static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
+		unsigned long *ua)
+{
+	unsigned long gfn = tce >> PAGE_SHIFT;
+	struct kvm_memory_slot *memslot;
+
+	memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
+	if (!memslot)
+		return -EINVAL;
+
+	*ua = __gfn_to_hva_memslot(memslot, gfn) |
+		(tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
+
+	return 0;
+}
+
+static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
+		unsigned long tce)
+{
+	unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+	enum dma_data_direction dir = iommu_tce_direction(tce);
+	struct kvmppc_spapr_tce_iommu_table *stit;
+	unsigned long ua = 0;
+
+	/* Allow userspace to poison TCE table */
+	if (dir == DMA_NONE)
+		return H_SUCCESS;
+
+	if (iommu_tce_check_gpa(stt->page_shift, gpa))
+		return H_TOO_HARD;
+
+	if (kvmppc_tce_to_ua(stt->kvm, tce, &ua))
+		return H_TOO_HARD;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
+		unsigned long hpa = 0;
+		struct mm_iommu_table_group_mem_t *mem;
+		long shift = stit->tbl->it_page_shift;
+
+		mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift);
+		if (!mem || mm_iommu_ua_to_hpa(mem, ua, shift, &hpa)) {
+			rcu_read_unlock();
+			return H_TOO_HARD;
+		}
+	}
+	rcu_read_unlock();
+
+	return H_SUCCESS;
+}
+
+/*
+ * Handles TCE requests for emulated devices.
+ * Puts guest TCE values to the table and expects user space to convert them.
+ * Cannot fail so kvmppc_tce_validate must be called before it.
+ */
+static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
+		unsigned long idx, unsigned long tce)
+{
+	struct page *page;
+	u64 *tbl;
+	unsigned long sttpage;
+
+	idx -= stt->offset;
+	sttpage = idx / TCES_PER_PAGE;
+	page = stt->pages[sttpage];
+
+	if (!page) {
+		/* We allow any TCE, not just with read|write permissions */
+		if (!tce)
+			return;
+
+		page = kvm_spapr_get_tce_page(stt, sttpage);
+		if (!page)
+			return;
+	}
+	tbl = page_to_virt(page);
+
+	tbl[idx % TCES_PER_PAGE] = tce;
+}
+
+static void kvmppc_clear_tce(struct mm_struct *mm, struct kvmppc_spapr_tce_table *stt,
+		struct iommu_table *tbl, unsigned long entry)
+{
+	unsigned long i;
+	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
+	unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift);
+
+	for (i = 0; i < subpages; ++i) {
+		unsigned long hpa = 0;
+		enum dma_data_direction dir = DMA_NONE;
+
+		iommu_tce_xchg_no_kill(mm, tbl, io_entry + i, &hpa, &dir);
+	}
+}
+
+static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
+		struct iommu_table *tbl, unsigned long entry)
+{
+	struct mm_iommu_table_group_mem_t *mem = NULL;
+	const unsigned long pgsize = 1ULL << tbl->it_page_shift;
+	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
+
+	if (!pua)
+		return H_SUCCESS;
+
+	mem = mm_iommu_lookup(kvm->mm, be64_to_cpu(*pua), pgsize);
+	if (!mem)
+		return H_TOO_HARD;
+
+	mm_iommu_mapped_dec(mem);
+
+	*pua = cpu_to_be64(0);
+
+	return H_SUCCESS;
+}
+
+static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
+		struct iommu_table *tbl, unsigned long entry)
+{
+	enum dma_data_direction dir = DMA_NONE;
+	unsigned long hpa = 0;
+	long ret;
+
+	if (WARN_ON_ONCE(iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa,
+					&dir)))
+		return H_TOO_HARD;
+
+	if (dir == DMA_NONE)
+		return H_SUCCESS;
+
+	ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
+	if (ret != H_SUCCESS)
+		iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, &dir);
+
+	return ret;
+}
+
+static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
+		struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
+		unsigned long entry)
+{
+	unsigned long i, ret = H_SUCCESS;
+	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
+	unsigned long io_entry = entry * subpages;
+
+	for (i = 0; i < subpages; ++i) {
+		ret = kvmppc_tce_iommu_do_unmap(kvm, tbl, io_entry + i);
+		if (ret != H_SUCCESS)
+			break;
+	}
+
+	iommu_tce_kill(tbl, io_entry, subpages);
+
+	return ret;
+}
+
+static long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
+		unsigned long entry, unsigned long ua,
+		enum dma_data_direction dir)
+{
+	long ret;
+	unsigned long hpa;
+	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+	struct mm_iommu_table_group_mem_t *mem;
+
+	if (!pua)
+		/* it_userspace allocation might be delayed */
+		return H_TOO_HARD;
+
+	mem = mm_iommu_lookup(kvm->mm, ua, 1ULL << tbl->it_page_shift);
+	if (!mem)
+		/* This only handles v2 IOMMU type, v1 is handled via ioctl() */
+		return H_TOO_HARD;
+
+	if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa)))
+		return H_TOO_HARD;
+
+	if (mm_iommu_mapped_inc(mem))
+		return H_TOO_HARD;
+
+	ret = iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, &dir);
+	if (WARN_ON_ONCE(ret)) {
+		mm_iommu_mapped_dec(mem);
+		return H_TOO_HARD;
+	}
+
+	if (dir != DMA_NONE)
+		kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
+
+	*pua = cpu_to_be64(ua);
+
+	return 0;
+}
+
+static long kvmppc_tce_iommu_map(struct kvm *kvm,
+		struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
+		unsigned long entry, unsigned long ua,
+		enum dma_data_direction dir)
+{
+	unsigned long i, pgoff, ret = H_SUCCESS;
+	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
+	unsigned long io_entry = entry * subpages;
+
+	for (i = 0, pgoff = 0; i < subpages;
+			++i, pgoff += IOMMU_PAGE_SIZE(tbl)) {
+
+		ret = kvmppc_tce_iommu_do_map(kvm, tbl,
+				io_entry + i, ua + pgoff, dir);
+		if (ret != H_SUCCESS)
+			break;
+	}
+
+	iommu_tce_kill(tbl, io_entry, subpages);
+
+	return ret;
+}
+
+long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+		      unsigned long ioba, unsigned long tce)
+{
+	struct kvmppc_spapr_tce_table *stt;
+	long ret, idx;
+	struct kvmppc_spapr_tce_iommu_table *stit;
+	unsigned long entry, ua = 0;
+	enum dma_data_direction dir;
+
+	/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
+	/* 	    liobn, ioba, tce); */
+
+	stt = kvmppc_find_table(vcpu->kvm, liobn);
+	if (!stt)
+		return H_TOO_HARD;
+
+	ret = kvmppc_ioba_validate(stt, ioba, 1);
+	if (ret != H_SUCCESS)
+		return ret;
+
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+	ret = kvmppc_tce_validate(stt, tce);
+	if (ret != H_SUCCESS)
+		goto unlock_exit;
+
+	dir = iommu_tce_direction(tce);
+
+	if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
+		ret = H_PARAMETER;
+		goto unlock_exit;
+	}
+
+	entry = ioba >> stt->page_shift;
+
+	list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+		if (dir == DMA_NONE)
+			ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt,
+					stit->tbl, entry);
+		else
+			ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
+					entry, ua, dir);
+
+
+		if (ret != H_SUCCESS) {
+			kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry);
+			goto unlock_exit;
+		}
+	}
+
+	kvmppc_tce_put(stt, entry, tce);
+
+unlock_exit:
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
+
+long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+		unsigned long liobn, unsigned long ioba,
+		unsigned long tce_list, unsigned long npages)
+{
+	struct kvmppc_spapr_tce_table *stt;
+	long i, ret = H_SUCCESS, idx;
+	unsigned long entry, ua = 0;
+	u64 __user *tces;
+	u64 tce;
+	struct kvmppc_spapr_tce_iommu_table *stit;
+
+	stt = kvmppc_find_table(vcpu->kvm, liobn);
+	if (!stt)
+		return H_TOO_HARD;
+
+	entry = ioba >> stt->page_shift;
+	/*
+	 * SPAPR spec says that the maximum size of the list is 512 TCEs
+	 * so the whole table fits in 4K page
+	 */
+	if (npages > 512)
+		return H_PARAMETER;
+
+	if (tce_list & (SZ_4K - 1))
+		return H_PARAMETER;
+
+	ret = kvmppc_ioba_validate(stt, ioba, npages);
+	if (ret != H_SUCCESS)
+		return ret;
+
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+	if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua)) {
+		ret = H_TOO_HARD;
+		goto unlock_exit;
+	}
+	tces = (u64 __user *) ua;
+
+	for (i = 0; i < npages; ++i) {
+		if (get_user(tce, tces + i)) {
+			ret = H_TOO_HARD;
+			goto unlock_exit;
+		}
+		tce = be64_to_cpu(tce);
+
+		ret = kvmppc_tce_validate(stt, tce);
+		if (ret != H_SUCCESS)
+			goto unlock_exit;
+	}
+
+	for (i = 0; i < npages; ++i) {
+		/*
+		 * This looks unsafe, because we validate, then regrab
+		 * the TCE from userspace which could have been changed by
+		 * another thread.
+		 *
+		 * But it actually is safe, because the relevant checks will be
+		 * re-executed in the following code.  If userspace tries to
+		 * change this dodgily it will result in a messier failure mode
+		 * but won't threaten the host.
+		 */
+		if (get_user(tce, tces + i)) {
+			ret = H_TOO_HARD;
+			goto unlock_exit;
+		}
+		tce = be64_to_cpu(tce);
+
+		if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
+			ret = H_PARAMETER;
+			goto unlock_exit;
+		}
+
+		list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+			ret = kvmppc_tce_iommu_map(vcpu->kvm, stt,
+					stit->tbl, entry + i, ua,
+					iommu_tce_direction(tce));
+
+			if (ret != H_SUCCESS) {
+				kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl,
+						 entry + i);
+				goto unlock_exit;
+			}
+		}
+
+		kvmppc_tce_put(stt, entry + i, tce);
+	}
+
+unlock_exit:
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect);
+
+long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
+		unsigned long liobn, unsigned long ioba,
+		unsigned long tce_value, unsigned long npages)
+{
+	struct kvmppc_spapr_tce_table *stt;
+	long i, ret;
+	struct kvmppc_spapr_tce_iommu_table *stit;
+
+	stt = kvmppc_find_table(vcpu->kvm, liobn);
+	if (!stt)
+		return H_TOO_HARD;
+
+	ret = kvmppc_ioba_validate(stt, ioba, npages);
+	if (ret != H_SUCCESS)
+		return ret;
+
+	/* Check permission bits only to allow userspace poison TCE for debug */
+	if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
+		return H_PARAMETER;
+
+	list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+		unsigned long entry = ioba >> stt->page_shift;
+
+		for (i = 0; i < npages; ++i) {
+			ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt,
+					stit->tbl, entry + i);
+
+			if (ret == H_SUCCESS)
+				continue;
+
+			if (ret == H_TOO_HARD)
+				return ret;
+
+			WARN_ON_ONCE(1);
+			kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry + i);
+		}
+	}
+
+	for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
+		kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
+
+long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+		      unsigned long ioba)
+{
+	struct kvmppc_spapr_tce_table *stt;
+	long ret;
+	unsigned long idx;
+	struct page *page;
+	u64 *tbl;
+
+	stt = kvmppc_find_table(vcpu->kvm, liobn);
+	if (!stt)
+		return H_TOO_HARD;
+
+	ret = kvmppc_ioba_validate(stt, ioba, 1);
+	if (ret != H_SUCCESS)
+		return ret;
+
+	idx = (ioba >> stt->page_shift) - stt->offset;
+	page = stt->pages[idx / TCES_PER_PAGE];
+	if (!page) {
+		vcpu->arch.regs.gpr[4] = 0;
+		return H_SUCCESS;
+	}
+	tbl = (u64 *)page_address(page);
+
+	vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE];
+
+	return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_get_tce);
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
new file mode 100644
index 0000000000..5bbfb2eed1
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -0,0 +1,1072 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include <asm/kvm_book3s.h>
+#include <asm/reg.h>
+#include <asm/switch_to.h>
+#include <asm/time.h>
+#include <asm/tm.h>
+#include "book3s.h"
+#include <asm/asm-prototypes.h>
+
+#define OP_19_XOP_RFID		18
+#define OP_19_XOP_RFI		50
+
+#define OP_31_XOP_MFMSR		83
+#define OP_31_XOP_MTMSR		146
+#define OP_31_XOP_MTMSRD	178
+#define OP_31_XOP_MTSR		210
+#define OP_31_XOP_MTSRIN	242
+#define OP_31_XOP_TLBIEL	274
+/* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */
+#define OP_31_XOP_FAKE_SC1	308
+#define OP_31_XOP_SLBMTE	402
+#define OP_31_XOP_SLBIE		434
+#define OP_31_XOP_SLBIA		498
+#define OP_31_XOP_MFSR		595
+#define OP_31_XOP_MFSRIN	659
+#define OP_31_XOP_DCBA		758
+#define OP_31_XOP_SLBMFEV	851
+#define OP_31_XOP_EIOIO		854
+#define OP_31_XOP_SLBMFEE	915
+#define OP_31_XOP_SLBFEE	979
+
+#define OP_31_XOP_TBEGIN	654
+#define OP_31_XOP_TABORT	910
+
+#define OP_31_XOP_TRECLAIM	942
+#define OP_31_XOP_TRCHKPT	1006
+
+/* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */
+#define OP_31_XOP_DCBZ		1010
+
+#define OP_LFS			48
+#define OP_LFD			50
+#define OP_STFS			52
+#define OP_STFD			54
+
+#define SPRN_GQR0		912
+#define SPRN_GQR1		913
+#define SPRN_GQR2		914
+#define SPRN_GQR3		915
+#define SPRN_GQR4		916
+#define SPRN_GQR5		917
+#define SPRN_GQR6		918
+#define SPRN_GQR7		919
+
+enum priv_level {
+	PRIV_PROBLEM = 0,
+	PRIV_SUPER = 1,
+	PRIV_HYPER = 2,
+};
+
+static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level)
+{
+	/* PAPR VMs only access supervisor SPRs */
+	if (vcpu->arch.papr_enabled && (level > PRIV_SUPER))
+		return false;
+
+	/* Limit user space to its own small SPR set */
+	if ((kvmppc_get_msr(vcpu) & MSR_PR) && level > PRIV_PROBLEM)
+		return false;
+
+	return true;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static inline void kvmppc_copyto_vcpu_tm(struct kvm_vcpu *vcpu)
+{
+	memcpy(&vcpu->arch.gpr_tm[0], &vcpu->arch.regs.gpr[0],
+			sizeof(vcpu->arch.gpr_tm));
+	memcpy(&vcpu->arch.fp_tm, &vcpu->arch.fp,
+			sizeof(struct thread_fp_state));
+	memcpy(&vcpu->arch.vr_tm, &vcpu->arch.vr,
+			sizeof(struct thread_vr_state));
+	vcpu->arch.ppr_tm = vcpu->arch.ppr;
+	vcpu->arch.dscr_tm = vcpu->arch.dscr;
+	vcpu->arch.amr_tm = vcpu->arch.amr;
+	vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
+	vcpu->arch.tar_tm = vcpu->arch.tar;
+	vcpu->arch.lr_tm = vcpu->arch.regs.link;
+	vcpu->arch.cr_tm = vcpu->arch.regs.ccr;
+	vcpu->arch.xer_tm = vcpu->arch.regs.xer;
+	vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
+}
+
+static inline void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu)
+{
+	memcpy(&vcpu->arch.regs.gpr[0], &vcpu->arch.gpr_tm[0],
+			sizeof(vcpu->arch.regs.gpr));
+	memcpy(&vcpu->arch.fp, &vcpu->arch.fp_tm,
+			sizeof(struct thread_fp_state));
+	memcpy(&vcpu->arch.vr, &vcpu->arch.vr_tm,
+			sizeof(struct thread_vr_state));
+	vcpu->arch.ppr = vcpu->arch.ppr_tm;
+	vcpu->arch.dscr = vcpu->arch.dscr_tm;
+	vcpu->arch.amr = vcpu->arch.amr_tm;
+	vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
+	vcpu->arch.tar = vcpu->arch.tar_tm;
+	vcpu->arch.regs.link = vcpu->arch.lr_tm;
+	vcpu->arch.regs.ccr = vcpu->arch.cr_tm;
+	vcpu->arch.regs.xer = vcpu->arch.xer_tm;
+	vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
+}
+
+static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val)
+{
+	unsigned long guest_msr = kvmppc_get_msr(vcpu);
+	int fc_val = ra_val ? ra_val : 1;
+	uint64_t texasr;
+
+	/* CR0 = 0 | MSR[TS] | 0 */
+	vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)) |
+		(((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1))
+		 << CR0_SHIFT);
+
+	preempt_disable();
+	tm_enable();
+	texasr = mfspr(SPRN_TEXASR);
+	kvmppc_save_tm_pr(vcpu);
+	kvmppc_copyfrom_vcpu_tm(vcpu);
+
+	/* failure recording depends on Failure Summary bit */
+	if (!(texasr & TEXASR_FS)) {
+		texasr &= ~TEXASR_FC;
+		texasr |= ((u64)fc_val << TEXASR_FC_LG) | TEXASR_FS;
+
+		texasr &= ~(TEXASR_PR | TEXASR_HV);
+		if (kvmppc_get_msr(vcpu) & MSR_PR)
+			texasr |= TEXASR_PR;
+
+		if (kvmppc_get_msr(vcpu) & MSR_HV)
+			texasr |= TEXASR_HV;
+
+		vcpu->arch.texasr = texasr;
+		vcpu->arch.tfiar = kvmppc_get_pc(vcpu);
+		mtspr(SPRN_TEXASR, texasr);
+		mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
+	}
+	tm_disable();
+	/*
+	 * treclaim need quit to non-transactional state.
+	 */
+	guest_msr &= ~(MSR_TS_MASK);
+	kvmppc_set_msr(vcpu, guest_msr);
+	preempt_enable();
+
+	if (vcpu->arch.shadow_fscr & FSCR_TAR)
+		mtspr(SPRN_TAR, vcpu->arch.tar);
+}
+
+static void kvmppc_emulate_trchkpt(struct kvm_vcpu *vcpu)
+{
+	unsigned long guest_msr = kvmppc_get_msr(vcpu);
+
+	preempt_disable();
+	/*
+	 * need flush FP/VEC/VSX to vcpu save area before
+	 * copy.
+	 */
+	kvmppc_giveup_ext(vcpu, MSR_VSX);
+	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+	kvmppc_copyto_vcpu_tm(vcpu);
+	kvmppc_save_tm_sprs(vcpu);
+
+	/*
+	 * as a result of trecheckpoint. set TS to suspended.
+	 */
+	guest_msr &= ~(MSR_TS_MASK);
+	guest_msr |= MSR_TS_S;
+	kvmppc_set_msr(vcpu, guest_msr);
+	kvmppc_restore_tm_pr(vcpu);
+	preempt_enable();
+}
+
+/* emulate tabort. at guest privilege state */
+void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val)
+{
+	/* currently we only emulate tabort. but no emulation of other
+	 * tabort variants since there is no kernel usage of them at
+	 * present.
+	 */
+	unsigned long guest_msr = kvmppc_get_msr(vcpu);
+	uint64_t org_texasr;
+
+	preempt_disable();
+	tm_enable();
+	org_texasr = mfspr(SPRN_TEXASR);
+	tm_abort(ra_val);
+
+	/* CR0 = 0 | MSR[TS] | 0 */
+	vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)) |
+		(((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1))
+		 << CR0_SHIFT);
+
+	vcpu->arch.texasr = mfspr(SPRN_TEXASR);
+	/* failure recording depends on Failure Summary bit,
+	 * and tabort will be treated as nops in non-transactional
+	 * state.
+	 */
+	if (!(org_texasr & TEXASR_FS) &&
+			MSR_TM_ACTIVE(guest_msr)) {
+		vcpu->arch.texasr &= ~(TEXASR_PR | TEXASR_HV);
+		if (guest_msr & MSR_PR)
+			vcpu->arch.texasr |= TEXASR_PR;
+
+		if (guest_msr & MSR_HV)
+			vcpu->arch.texasr |= TEXASR_HV;
+
+		vcpu->arch.tfiar = kvmppc_get_pc(vcpu);
+	}
+	tm_disable();
+	preempt_enable();
+}
+
+#endif
+
+int kvmppc_core_emulate_op_pr(struct kvm_vcpu *vcpu,
+			      unsigned int inst, int *advance)
+{
+	int emulated = EMULATE_DONE;
+	int rt = get_rt(inst);
+	int rs = get_rs(inst);
+	int ra = get_ra(inst);
+	int rb = get_rb(inst);
+	u32 inst_sc = 0x44000002;
+
+	switch (get_op(inst)) {
+	case 0:
+		emulated = EMULATE_FAIL;
+		if ((kvmppc_get_msr(vcpu) & MSR_LE) &&
+		    (inst == swab32(inst_sc))) {
+			/*
+			 * This is the byte reversed syscall instruction of our
+			 * hypercall handler. Early versions of LE Linux didn't
+			 * swap the instructions correctly and ended up in
+			 * illegal instructions.
+			 * Just always fail hypercalls on these broken systems.
+			 */
+			kvmppc_set_gpr(vcpu, 3, EV_UNIMPLEMENTED);
+			kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
+			emulated = EMULATE_DONE;
+		}
+		break;
+	case 19:
+		switch (get_xop(inst)) {
+		case OP_19_XOP_RFID:
+		case OP_19_XOP_RFI: {
+			unsigned long srr1 = kvmppc_get_srr1(vcpu);
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+			unsigned long cur_msr = kvmppc_get_msr(vcpu);
+
+			/*
+			 * add rules to fit in ISA specification regarding TM
+			 * state transition in TM disable/Suspended state,
+			 * and target TM state is TM inactive(00) state. (the
+			 * change should be suppressed).
+			 */
+			if (((cur_msr & MSR_TM) == 0) &&
+				((srr1 & MSR_TM) == 0) &&
+				MSR_TM_SUSPENDED(cur_msr) &&
+				!MSR_TM_ACTIVE(srr1))
+				srr1 |= MSR_TS_S;
+#endif
+			kvmppc_set_pc(vcpu, kvmppc_get_srr0(vcpu));
+			kvmppc_set_msr(vcpu, srr1);
+			*advance = 0;
+			break;
+		}
+
+		default:
+			emulated = EMULATE_FAIL;
+			break;
+		}
+		break;
+	case 31:
+		switch (get_xop(inst)) {
+		case OP_31_XOP_MFMSR:
+			kvmppc_set_gpr(vcpu, rt, kvmppc_get_msr(vcpu));
+			break;
+		case OP_31_XOP_MTMSRD:
+		{
+			ulong rs_val = kvmppc_get_gpr(vcpu, rs);
+			if (inst & 0x10000) {
+				ulong new_msr = kvmppc_get_msr(vcpu);
+				new_msr &= ~(MSR_RI | MSR_EE);
+				new_msr |= rs_val & (MSR_RI | MSR_EE);
+				kvmppc_set_msr_fast(vcpu, new_msr);
+			} else
+				kvmppc_set_msr(vcpu, rs_val);
+			break;
+		}
+		case OP_31_XOP_MTMSR:
+			kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs));
+			break;
+		case OP_31_XOP_MFSR:
+		{
+			int srnum;
+
+			srnum = kvmppc_get_field(inst, 12 + 32, 15 + 32);
+			if (vcpu->arch.mmu.mfsrin) {
+				u32 sr;
+				sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
+				kvmppc_set_gpr(vcpu, rt, sr);
+			}
+			break;
+		}
+		case OP_31_XOP_MFSRIN:
+		{
+			int srnum;
+
+			srnum = (kvmppc_get_gpr(vcpu, rb) >> 28) & 0xf;
+			if (vcpu->arch.mmu.mfsrin) {
+				u32 sr;
+				sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
+				kvmppc_set_gpr(vcpu, rt, sr);
+			}
+			break;
+		}
+		case OP_31_XOP_MTSR:
+			vcpu->arch.mmu.mtsrin(vcpu,
+				(inst >> 16) & 0xf,
+				kvmppc_get_gpr(vcpu, rs));
+			break;
+		case OP_31_XOP_MTSRIN:
+			vcpu->arch.mmu.mtsrin(vcpu,
+				(kvmppc_get_gpr(vcpu, rb) >> 28) & 0xf,
+				kvmppc_get_gpr(vcpu, rs));
+			break;
+		case OP_31_XOP_TLBIE:
+		case OP_31_XOP_TLBIEL:
+		{
+			bool large = (inst & 0x00200000) ? true : false;
+			ulong addr = kvmppc_get_gpr(vcpu, rb);
+			vcpu->arch.mmu.tlbie(vcpu, addr, large);
+			break;
+		}
+#ifdef CONFIG_PPC_BOOK3S_64
+		case OP_31_XOP_FAKE_SC1:
+		{
+			/* SC 1 papr hypercalls */
+			ulong cmd = kvmppc_get_gpr(vcpu, 3);
+			int i;
+
+		        if ((kvmppc_get_msr(vcpu) & MSR_PR) ||
+			    !vcpu->arch.papr_enabled) {
+				emulated = EMULATE_FAIL;
+				break;
+			}
+
+			if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE)
+				break;
+
+			vcpu->run->papr_hcall.nr = cmd;
+			for (i = 0; i < 9; ++i) {
+				ulong gpr = kvmppc_get_gpr(vcpu, 4 + i);
+				vcpu->run->papr_hcall.args[i] = gpr;
+			}
+
+			vcpu->run->exit_reason = KVM_EXIT_PAPR_HCALL;
+			vcpu->arch.hcall_needed = 1;
+			emulated = EMULATE_EXIT_USER;
+			break;
+		}
+#endif
+		case OP_31_XOP_EIOIO:
+			break;
+		case OP_31_XOP_SLBMTE:
+			if (!vcpu->arch.mmu.slbmte)
+				return EMULATE_FAIL;
+
+			vcpu->arch.mmu.slbmte(vcpu,
+					kvmppc_get_gpr(vcpu, rs),
+					kvmppc_get_gpr(vcpu, rb));
+			break;
+		case OP_31_XOP_SLBIE:
+			if (!vcpu->arch.mmu.slbie)
+				return EMULATE_FAIL;
+
+			vcpu->arch.mmu.slbie(vcpu,
+					kvmppc_get_gpr(vcpu, rb));
+			break;
+		case OP_31_XOP_SLBIA:
+			if (!vcpu->arch.mmu.slbia)
+				return EMULATE_FAIL;
+
+			vcpu->arch.mmu.slbia(vcpu);
+			break;
+		case OP_31_XOP_SLBFEE:
+			if (!(inst & 1) || !vcpu->arch.mmu.slbfee) {
+				return EMULATE_FAIL;
+			} else {
+				ulong b, t;
+				ulong cr = kvmppc_get_cr(vcpu) & ~CR0_MASK;
+
+				b = kvmppc_get_gpr(vcpu, rb);
+				if (!vcpu->arch.mmu.slbfee(vcpu, b, &t))
+					cr |= 2 << CR0_SHIFT;
+				kvmppc_set_gpr(vcpu, rt, t);
+				/* copy XER[SO] bit to CR0[SO] */
+				cr |= (vcpu->arch.regs.xer & 0x80000000) >>
+					(31 - CR0_SHIFT);
+				kvmppc_set_cr(vcpu, cr);
+			}
+			break;
+		case OP_31_XOP_SLBMFEE:
+			if (!vcpu->arch.mmu.slbmfee) {
+				emulated = EMULATE_FAIL;
+			} else {
+				ulong t, rb_val;
+
+				rb_val = kvmppc_get_gpr(vcpu, rb);
+				t = vcpu->arch.mmu.slbmfee(vcpu, rb_val);
+				kvmppc_set_gpr(vcpu, rt, t);
+			}
+			break;
+		case OP_31_XOP_SLBMFEV:
+			if (!vcpu->arch.mmu.slbmfev) {
+				emulated = EMULATE_FAIL;
+			} else {
+				ulong t, rb_val;
+
+				rb_val = kvmppc_get_gpr(vcpu, rb);
+				t = vcpu->arch.mmu.slbmfev(vcpu, rb_val);
+				kvmppc_set_gpr(vcpu, rt, t);
+			}
+			break;
+		case OP_31_XOP_DCBA:
+			/* Gets treated as NOP */
+			break;
+		case OP_31_XOP_DCBZ:
+		{
+			ulong rb_val = kvmppc_get_gpr(vcpu, rb);
+			ulong ra_val = 0;
+			ulong addr, vaddr;
+			u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+			u32 dsisr;
+			int r;
+
+			if (ra)
+				ra_val = kvmppc_get_gpr(vcpu, ra);
+
+			addr = (ra_val + rb_val) & ~31ULL;
+			if (!(kvmppc_get_msr(vcpu) & MSR_SF))
+				addr &= 0xffffffff;
+			vaddr = addr;
+
+			r = kvmppc_st(vcpu, &addr, 32, zeros, true);
+			if ((r == -ENOENT) || (r == -EPERM)) {
+				*advance = 0;
+				kvmppc_set_dar(vcpu, vaddr);
+				vcpu->arch.fault_dar = vaddr;
+
+				dsisr = DSISR_ISSTORE;
+				if (r == -ENOENT)
+					dsisr |= DSISR_NOHPTE;
+				else if (r == -EPERM)
+					dsisr |= DSISR_PROTFAULT;
+
+				kvmppc_set_dsisr(vcpu, dsisr);
+				vcpu->arch.fault_dsisr = dsisr;
+
+				kvmppc_book3s_queue_irqprio(vcpu,
+					BOOK3S_INTERRUPT_DATA_STORAGE);
+			}
+
+			break;
+		}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+		case OP_31_XOP_TBEGIN:
+		{
+			if (!cpu_has_feature(CPU_FTR_TM))
+				break;
+
+			if (!(kvmppc_get_msr(vcpu) & MSR_TM)) {
+				kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG);
+				emulated = EMULATE_AGAIN;
+				break;
+			}
+
+			if (!(kvmppc_get_msr(vcpu) & MSR_PR)) {
+				preempt_disable();
+				vcpu->arch.regs.ccr = (CR0_TBEGIN_FAILURE |
+				  (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)));
+
+				vcpu->arch.texasr = (TEXASR_FS | TEXASR_EXACT |
+					(((u64)(TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT))
+						 << TEXASR_FC_LG));
+
+				if ((inst >> 21) & 0x1)
+					vcpu->arch.texasr |= TEXASR_ROT;
+
+				if (kvmppc_get_msr(vcpu) & MSR_HV)
+					vcpu->arch.texasr |= TEXASR_HV;
+
+				vcpu->arch.tfhar = kvmppc_get_pc(vcpu) + 4;
+				vcpu->arch.tfiar = kvmppc_get_pc(vcpu);
+
+				kvmppc_restore_tm_sprs(vcpu);
+				preempt_enable();
+			} else
+				emulated = EMULATE_FAIL;
+			break;
+		}
+		case OP_31_XOP_TABORT:
+		{
+			ulong guest_msr = kvmppc_get_msr(vcpu);
+			unsigned long ra_val = 0;
+
+			if (!cpu_has_feature(CPU_FTR_TM))
+				break;
+
+			if (!(kvmppc_get_msr(vcpu) & MSR_TM)) {
+				kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG);
+				emulated = EMULATE_AGAIN;
+				break;
+			}
+
+			/* only emulate for privilege guest, since problem state
+			 * guest can run with TM enabled and we don't expect to
+			 * trap at here for that case.
+			 */
+			WARN_ON(guest_msr & MSR_PR);
+
+			if (ra)
+				ra_val = kvmppc_get_gpr(vcpu, ra);
+
+			kvmppc_emulate_tabort(vcpu, ra_val);
+			break;
+		}
+		case OP_31_XOP_TRECLAIM:
+		{
+			ulong guest_msr = kvmppc_get_msr(vcpu);
+			unsigned long ra_val = 0;
+
+			if (!cpu_has_feature(CPU_FTR_TM))
+				break;
+
+			if (!(kvmppc_get_msr(vcpu) & MSR_TM)) {
+				kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG);
+				emulated = EMULATE_AGAIN;
+				break;
+			}
+
+			/* generate interrupts based on priorities */
+			if (guest_msr & MSR_PR) {
+				/* Privileged Instruction type Program Interrupt */
+				kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+				emulated = EMULATE_AGAIN;
+				break;
+			}
+
+			if (!MSR_TM_ACTIVE(guest_msr)) {
+				/* TM bad thing interrupt */
+				kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+				emulated = EMULATE_AGAIN;
+				break;
+			}
+
+			if (ra)
+				ra_val = kvmppc_get_gpr(vcpu, ra);
+			kvmppc_emulate_treclaim(vcpu, ra_val);
+			break;
+		}
+		case OP_31_XOP_TRCHKPT:
+		{
+			ulong guest_msr = kvmppc_get_msr(vcpu);
+			unsigned long texasr;
+
+			if (!cpu_has_feature(CPU_FTR_TM))
+				break;
+
+			if (!(kvmppc_get_msr(vcpu) & MSR_TM)) {
+				kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG);
+				emulated = EMULATE_AGAIN;
+				break;
+			}
+
+			/* generate interrupt based on priorities */
+			if (guest_msr & MSR_PR) {
+				/* Privileged Instruction type Program Intr */
+				kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+				emulated = EMULATE_AGAIN;
+				break;
+			}
+
+			tm_enable();
+			texasr = mfspr(SPRN_TEXASR);
+			tm_disable();
+
+			if (MSR_TM_ACTIVE(guest_msr) ||
+				!(texasr & (TEXASR_FS))) {
+				/* TM bad thing interrupt */
+				kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+				emulated = EMULATE_AGAIN;
+				break;
+			}
+
+			kvmppc_emulate_trchkpt(vcpu);
+			break;
+		}
+#endif
+		default:
+			emulated = EMULATE_FAIL;
+		}
+		break;
+	default:
+		emulated = EMULATE_FAIL;
+	}
+
+	if (emulated == EMULATE_FAIL)
+		emulated = kvmppc_emulate_paired_single(vcpu);
+
+	return emulated;
+}
+
+void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper,
+                    u32 val)
+{
+	if (upper) {
+		/* Upper BAT */
+		u32 bl = (val >> 2) & 0x7ff;
+		bat->bepi_mask = (~bl << 17);
+		bat->bepi = val & 0xfffe0000;
+		bat->vs = (val & 2) ? 1 : 0;
+		bat->vp = (val & 1) ? 1 : 0;
+		bat->raw = (bat->raw & 0xffffffff00000000ULL) | val;
+	} else {
+		/* Lower BAT */
+		bat->brpn = val & 0xfffe0000;
+		bat->wimg = (val >> 3) & 0xf;
+		bat->pp = val & 3;
+		bat->raw = (bat->raw & 0x00000000ffffffffULL) | ((u64)val << 32);
+	}
+}
+
+static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+	struct kvmppc_bat *bat;
+
+	switch (sprn) {
+	case SPRN_IBAT0U ... SPRN_IBAT3L:
+		bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2];
+		break;
+	case SPRN_IBAT4U ... SPRN_IBAT7L:
+		bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)];
+		break;
+	case SPRN_DBAT0U ... SPRN_DBAT3L:
+		bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2];
+		break;
+	case SPRN_DBAT4U ... SPRN_DBAT7L:
+		bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)];
+		break;
+	default:
+		BUG();
+	}
+
+	return bat;
+}
+
+int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
+{
+	int emulated = EMULATE_DONE;
+
+	switch (sprn) {
+	case SPRN_SDR1:
+		if (!spr_allowed(vcpu, PRIV_HYPER))
+			goto unprivileged;
+		to_book3s(vcpu)->sdr1 = spr_val;
+		break;
+	case SPRN_DSISR:
+		kvmppc_set_dsisr(vcpu, spr_val);
+		break;
+	case SPRN_DAR:
+		kvmppc_set_dar(vcpu, spr_val);
+		break;
+	case SPRN_HIOR:
+		to_book3s(vcpu)->hior = spr_val;
+		break;
+	case SPRN_IBAT0U ... SPRN_IBAT3L:
+	case SPRN_IBAT4U ... SPRN_IBAT7L:
+	case SPRN_DBAT0U ... SPRN_DBAT3L:
+	case SPRN_DBAT4U ... SPRN_DBAT7L:
+	{
+		struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn);
+
+		kvmppc_set_bat(vcpu, bat, !(sprn % 2), (u32)spr_val);
+		/* BAT writes happen so rarely that we're ok to flush
+		 * everything here */
+		kvmppc_mmu_pte_flush(vcpu, 0, 0);
+		kvmppc_mmu_flush_segments(vcpu);
+		break;
+	}
+	case SPRN_HID0:
+		to_book3s(vcpu)->hid[0] = spr_val;
+		break;
+	case SPRN_HID1:
+		to_book3s(vcpu)->hid[1] = spr_val;
+		break;
+	case SPRN_HID2:
+		to_book3s(vcpu)->hid[2] = spr_val;
+		break;
+	case SPRN_HID2_GEKKO:
+		to_book3s(vcpu)->hid[2] = spr_val;
+		/* HID2.PSE controls paired single on gekko */
+		switch (vcpu->arch.pvr) {
+		case 0x00080200:	/* lonestar 2.0 */
+		case 0x00088202:	/* lonestar 2.2 */
+		case 0x70000100:	/* gekko 1.0 */
+		case 0x00080100:	/* gekko 2.0 */
+		case 0x00083203:	/* gekko 2.3a */
+		case 0x00083213:	/* gekko 2.3b */
+		case 0x00083204:	/* gekko 2.4 */
+		case 0x00083214:	/* gekko 2.4e (8SE) - retail HW2 */
+		case 0x00087200:	/* broadway */
+			if (vcpu->arch.hflags & BOOK3S_HFLAG_NATIVE_PS) {
+				/* Native paired singles */
+			} else if (spr_val & (1 << 29)) { /* HID2.PSE */
+				vcpu->arch.hflags |= BOOK3S_HFLAG_PAIRED_SINGLE;
+				kvmppc_giveup_ext(vcpu, MSR_FP);
+			} else {
+				vcpu->arch.hflags &= ~BOOK3S_HFLAG_PAIRED_SINGLE;
+			}
+			break;
+		}
+		break;
+	case SPRN_HID4:
+	case SPRN_HID4_GEKKO:
+		to_book3s(vcpu)->hid[4] = spr_val;
+		break;
+	case SPRN_HID5:
+		to_book3s(vcpu)->hid[5] = spr_val;
+		/* guest HID5 set can change is_dcbz32 */
+		if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+		    (mfmsr() & MSR_HV))
+			vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
+		break;
+	case SPRN_GQR0:
+	case SPRN_GQR1:
+	case SPRN_GQR2:
+	case SPRN_GQR3:
+	case SPRN_GQR4:
+	case SPRN_GQR5:
+	case SPRN_GQR6:
+	case SPRN_GQR7:
+		to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val;
+		break;
+#ifdef CONFIG_PPC_BOOK3S_64
+	case SPRN_FSCR:
+		kvmppc_set_fscr(vcpu, spr_val);
+		break;
+	case SPRN_BESCR:
+		vcpu->arch.bescr = spr_val;
+		break;
+	case SPRN_EBBHR:
+		vcpu->arch.ebbhr = spr_val;
+		break;
+	case SPRN_EBBRR:
+		vcpu->arch.ebbrr = spr_val;
+		break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case SPRN_TFHAR:
+	case SPRN_TEXASR:
+	case SPRN_TFIAR:
+		if (!cpu_has_feature(CPU_FTR_TM))
+			break;
+
+		if (!(kvmppc_get_msr(vcpu) & MSR_TM)) {
+			kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG);
+			emulated = EMULATE_AGAIN;
+			break;
+		}
+
+		if (MSR_TM_ACTIVE(kvmppc_get_msr(vcpu)) &&
+			!((MSR_TM_SUSPENDED(kvmppc_get_msr(vcpu))) &&
+					(sprn == SPRN_TFHAR))) {
+			/* it is illegal to mtspr() TM regs in
+			 * other than non-transactional state, with
+			 * the exception of TFHAR in suspend state.
+			 */
+			kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+			emulated = EMULATE_AGAIN;
+			break;
+		}
+
+		tm_enable();
+		if (sprn == SPRN_TFHAR)
+			mtspr(SPRN_TFHAR, spr_val);
+		else if (sprn == SPRN_TEXASR)
+			mtspr(SPRN_TEXASR, spr_val);
+		else
+			mtspr(SPRN_TFIAR, spr_val);
+		tm_disable();
+
+		break;
+#endif
+#endif
+	case SPRN_ICTC:
+	case SPRN_THRM1:
+	case SPRN_THRM2:
+	case SPRN_THRM3:
+	case SPRN_CTRLF:
+	case SPRN_CTRLT:
+	case SPRN_L2CR:
+	case SPRN_DSCR:
+	case SPRN_MMCR0_GEKKO:
+	case SPRN_MMCR1_GEKKO:
+	case SPRN_PMC1_GEKKO:
+	case SPRN_PMC2_GEKKO:
+	case SPRN_PMC3_GEKKO:
+	case SPRN_PMC4_GEKKO:
+	case SPRN_WPAR_GEKKO:
+	case SPRN_MSSSR0:
+	case SPRN_DABR:
+#ifdef CONFIG_PPC_BOOK3S_64
+	case SPRN_MMCRS:
+	case SPRN_MMCRA:
+	case SPRN_MMCR0:
+	case SPRN_MMCR1:
+	case SPRN_MMCR2:
+	case SPRN_UMMCR2:
+	case SPRN_UAMOR:
+	case SPRN_IAMR:
+	case SPRN_AMR:
+#endif
+		break;
+unprivileged:
+	default:
+		pr_info_ratelimited("KVM: invalid SPR write: %d\n", sprn);
+		if (sprn & 0x10) {
+			if (kvmppc_get_msr(vcpu) & MSR_PR) {
+				kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+				emulated = EMULATE_AGAIN;
+			}
+		} else {
+			if ((kvmppc_get_msr(vcpu) & MSR_PR) || sprn == 0) {
+				kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+				emulated = EMULATE_AGAIN;
+			}
+		}
+		break;
+	}
+
+	return emulated;
+}
+
+int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
+{
+	int emulated = EMULATE_DONE;
+
+	switch (sprn) {
+	case SPRN_IBAT0U ... SPRN_IBAT3L:
+	case SPRN_IBAT4U ... SPRN_IBAT7L:
+	case SPRN_DBAT0U ... SPRN_DBAT3L:
+	case SPRN_DBAT4U ... SPRN_DBAT7L:
+	{
+		struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn);
+
+		if (sprn % 2)
+			*spr_val = bat->raw >> 32;
+		else
+			*spr_val = bat->raw;
+
+		break;
+	}
+	case SPRN_SDR1:
+		if (!spr_allowed(vcpu, PRIV_HYPER))
+			goto unprivileged;
+		*spr_val = to_book3s(vcpu)->sdr1;
+		break;
+	case SPRN_DSISR:
+		*spr_val = kvmppc_get_dsisr(vcpu);
+		break;
+	case SPRN_DAR:
+		*spr_val = kvmppc_get_dar(vcpu);
+		break;
+	case SPRN_HIOR:
+		*spr_val = to_book3s(vcpu)->hior;
+		break;
+	case SPRN_HID0:
+		*spr_val = to_book3s(vcpu)->hid[0];
+		break;
+	case SPRN_HID1:
+		*spr_val = to_book3s(vcpu)->hid[1];
+		break;
+	case SPRN_HID2:
+	case SPRN_HID2_GEKKO:
+		*spr_val = to_book3s(vcpu)->hid[2];
+		break;
+	case SPRN_HID4:
+	case SPRN_HID4_GEKKO:
+		*spr_val = to_book3s(vcpu)->hid[4];
+		break;
+	case SPRN_HID5:
+		*spr_val = to_book3s(vcpu)->hid[5];
+		break;
+	case SPRN_CFAR:
+	case SPRN_DSCR:
+		*spr_val = 0;
+		break;
+	case SPRN_PURR:
+		/*
+		 * On exit we would have updated purr
+		 */
+		*spr_val = vcpu->arch.purr;
+		break;
+	case SPRN_SPURR:
+		/*
+		 * On exit we would have updated spurr
+		 */
+		*spr_val = vcpu->arch.spurr;
+		break;
+	case SPRN_VTB:
+		*spr_val = to_book3s(vcpu)->vtb;
+		break;
+	case SPRN_IC:
+		*spr_val = vcpu->arch.ic;
+		break;
+	case SPRN_GQR0:
+	case SPRN_GQR1:
+	case SPRN_GQR2:
+	case SPRN_GQR3:
+	case SPRN_GQR4:
+	case SPRN_GQR5:
+	case SPRN_GQR6:
+	case SPRN_GQR7:
+		*spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0];
+		break;
+#ifdef CONFIG_PPC_BOOK3S_64
+	case SPRN_FSCR:
+		*spr_val = vcpu->arch.fscr;
+		break;
+	case SPRN_BESCR:
+		*spr_val = vcpu->arch.bescr;
+		break;
+	case SPRN_EBBHR:
+		*spr_val = vcpu->arch.ebbhr;
+		break;
+	case SPRN_EBBRR:
+		*spr_val = vcpu->arch.ebbrr;
+		break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case SPRN_TFHAR:
+	case SPRN_TEXASR:
+	case SPRN_TFIAR:
+		if (!cpu_has_feature(CPU_FTR_TM))
+			break;
+
+		if (!(kvmppc_get_msr(vcpu) & MSR_TM)) {
+			kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG);
+			emulated = EMULATE_AGAIN;
+			break;
+		}
+
+		tm_enable();
+		if (sprn == SPRN_TFHAR)
+			*spr_val = mfspr(SPRN_TFHAR);
+		else if (sprn == SPRN_TEXASR)
+			*spr_val = mfspr(SPRN_TEXASR);
+		else if (sprn == SPRN_TFIAR)
+			*spr_val = mfspr(SPRN_TFIAR);
+		tm_disable();
+		break;
+#endif
+#endif
+	case SPRN_THRM1:
+	case SPRN_THRM2:
+	case SPRN_THRM3:
+	case SPRN_CTRLF:
+	case SPRN_CTRLT:
+	case SPRN_L2CR:
+	case SPRN_MMCR0_GEKKO:
+	case SPRN_MMCR1_GEKKO:
+	case SPRN_PMC1_GEKKO:
+	case SPRN_PMC2_GEKKO:
+	case SPRN_PMC3_GEKKO:
+	case SPRN_PMC4_GEKKO:
+	case SPRN_WPAR_GEKKO:
+	case SPRN_MSSSR0:
+	case SPRN_DABR:
+#ifdef CONFIG_PPC_BOOK3S_64
+	case SPRN_MMCRS:
+	case SPRN_MMCRA:
+	case SPRN_MMCR0:
+	case SPRN_MMCR1:
+	case SPRN_MMCR2:
+	case SPRN_UMMCR2:
+	case SPRN_TIR:
+	case SPRN_UAMOR:
+	case SPRN_IAMR:
+	case SPRN_AMR:
+#endif
+		*spr_val = 0;
+		break;
+	default:
+unprivileged:
+		pr_info_ratelimited("KVM: invalid SPR read: %d\n", sprn);
+		if (sprn & 0x10) {
+			if (kvmppc_get_msr(vcpu) & MSR_PR) {
+				kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+				emulated = EMULATE_AGAIN;
+			}
+		} else {
+			if ((kvmppc_get_msr(vcpu) & MSR_PR) || sprn == 0 ||
+			    sprn == 4 || sprn == 5 || sprn == 6) {
+				kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+				emulated = EMULATE_AGAIN;
+			}
+		}
+
+		break;
+	}
+
+	return emulated;
+}
+
+u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst)
+{
+	return make_dsisr(inst);
+}
+
+ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * Linux's fix_alignment() assumes that DAR is valid, so can we
+	 */
+	return vcpu->arch.fault_dar;
+#else
+	ulong dar = 0;
+	ulong ra = get_ra(inst);
+	ulong rb = get_rb(inst);
+
+	switch (get_op(inst)) {
+	case OP_LFS:
+	case OP_LFD:
+	case OP_STFD:
+	case OP_STFS:
+		if (ra)
+			dar = kvmppc_get_gpr(vcpu, ra);
+		dar += (s32)((s16)inst);
+		break;
+	case 31:
+		if (ra)
+			dar = kvmppc_get_gpr(vcpu, ra);
+		dar += kvmppc_get_gpr(vcpu, rb);
+		break;
+	default:
+		printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst);
+		break;
+	}
+
+	return dar;
+#endif
+}
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
new file mode 100644
index 0000000000..f08565885d
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <linux/export.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline);
+#endif
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline);
+#endif
+
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
new file mode 100644
index 0000000000..0429488ba1
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -0,0 +1,6360 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ *    Paul Mackerras <paulus@au1.ibm.com>
+ *    Alexander Graf <agraf@suse.de>
+ *    Kevin Wolf <mail@kevin-wolf.de>
+ *
+ * Description: KVM functions specific to running on Book 3S
+ * processors in hypervisor mode (specifically POWER7 and later).
+ *
+ * This file is derived from arch/powerpc/kvm/book3s.c,
+ * by Alexander Graf <agraf@suse.de>.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/preempt.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/stat.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/anon_inodes.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/spinlock.h>
+#include <linux/page-flags.h>
+#include <linux/srcu.h>
+#include <linux/miscdevice.h>
+#include <linux/debugfs.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <linux/hugetlb.h>
+#include <linux/kvm_irqfd.h>
+#include <linux/irqbypass.h>
+#include <linux/module.h>
+#include <linux/compiler.h>
+#include <linux/of.h>
+#include <linux/irqdomain.h>
+#include <linux/smp.h>
+
+#include <asm/ftrace.h>
+#include <asm/reg.h>
+#include <asm/ppc-opcode.h>
+#include <asm/asm-prototypes.h>
+#include <asm/archrandom.h>
+#include <asm/debug.h>
+#include <asm/disassemble.h>
+#include <asm/cputable.h>
+#include <asm/cacheflush.h>
+#include <linux/uaccess.h>
+#include <asm/interrupt.h>
+#include <asm/io.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu_context.h>
+#include <asm/lppaca.h>
+#include <asm/pmc.h>
+#include <asm/processor.h>
+#include <asm/cputhreads.h>
+#include <asm/page.h>
+#include <asm/hvcall.h>
+#include <asm/switch_to.h>
+#include <asm/smp.h>
+#include <asm/dbell.h>
+#include <asm/hmi.h>
+#include <asm/pnv-pci.h>
+#include <asm/mmu.h>
+#include <asm/opal.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/kvm_book3s_uvmem.h>
+#include <asm/ultravisor.h>
+#include <asm/dtl.h>
+#include <asm/plpar_wrappers.h>
+
+#include <trace/events/ipi.h>
+
+#include "book3s.h"
+#include "book3s_hv.h"
+
+#define CREATE_TRACE_POINTS
+#include "trace_hv.h"
+
+/* #define EXIT_DEBUG */
+/* #define EXIT_DEBUG_SIMPLE */
+/* #define EXIT_DEBUG_INT */
+
+/* Used to indicate that a guest page fault needs to be handled */
+#define RESUME_PAGE_FAULT	(RESUME_GUEST | RESUME_FLAG_ARCH1)
+/* Used to indicate that a guest passthrough interrupt needs to be handled */
+#define RESUME_PASSTHROUGH	(RESUME_GUEST | RESUME_FLAG_ARCH2)
+
+/* Used as a "null" value for timebase values */
+#define TB_NIL	(~(u64)0)
+
+static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
+
+static int dynamic_mt_modes = 6;
+module_param(dynamic_mt_modes, int, 0644);
+MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
+static int target_smt_mode;
+module_param(target_smt_mode, int, 0644);
+MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
+
+static bool one_vm_per_core;
+module_param(one_vm_per_core, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires POWER8 or older)");
+
+#ifdef CONFIG_KVM_XICS
+static const struct kernel_param_ops module_param_ops = {
+	.set = param_set_int,
+	.get = param_get_int,
+};
+
+module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass, 0644);
+MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization");
+
+module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644);
+MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
+#endif
+
+/* If set, guests are allowed to create and control nested guests */
+static bool nested = true;
+module_param(nested, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(nested, "Enable nested virtualization (only on POWER9)");
+
+static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
+
+/*
+ * RWMR values for POWER8.  These control the rate at which PURR
+ * and SPURR count and should be set according to the number of
+ * online threads in the vcore being run.
+ */
+#define RWMR_RPA_P8_1THREAD	0x164520C62609AECAUL
+#define RWMR_RPA_P8_2THREAD	0x7FFF2908450D8DA9UL
+#define RWMR_RPA_P8_3THREAD	0x164520C62609AECAUL
+#define RWMR_RPA_P8_4THREAD	0x199A421245058DA9UL
+#define RWMR_RPA_P8_5THREAD	0x164520C62609AECAUL
+#define RWMR_RPA_P8_6THREAD	0x164520C62609AECAUL
+#define RWMR_RPA_P8_7THREAD	0x164520C62609AECAUL
+#define RWMR_RPA_P8_8THREAD	0x164520C62609AECAUL
+
+static unsigned long p8_rwmr_values[MAX_SMT_THREADS + 1] = {
+	RWMR_RPA_P8_1THREAD,
+	RWMR_RPA_P8_1THREAD,
+	RWMR_RPA_P8_2THREAD,
+	RWMR_RPA_P8_3THREAD,
+	RWMR_RPA_P8_4THREAD,
+	RWMR_RPA_P8_5THREAD,
+	RWMR_RPA_P8_6THREAD,
+	RWMR_RPA_P8_7THREAD,
+	RWMR_RPA_P8_8THREAD,
+};
+
+static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
+		int *ip)
+{
+	int i = *ip;
+	struct kvm_vcpu *vcpu;
+
+	while (++i < MAX_SMT_THREADS) {
+		vcpu = READ_ONCE(vc->runnable_threads[i]);
+		if (vcpu) {
+			*ip = i;
+			return vcpu;
+		}
+	}
+	return NULL;
+}
+
+/* Used to traverse the list of runnable threads for a given vcore */
+#define for_each_runnable_thread(i, vcpu, vc) \
+	for (i = -1; (vcpu = next_runnable_thread(vc, &i)); )
+
+static bool kvmppc_ipi_thread(int cpu)
+{
+	unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+
+	/* If we're a nested hypervisor, fall back to ordinary IPIs for now */
+	if (kvmhv_on_pseries())
+		return false;
+
+	/* On POWER9 we can use msgsnd to IPI any cpu */
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		msg |= get_hard_smp_processor_id(cpu);
+		smp_mb();
+		__asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+		return true;
+	}
+
+	/* On POWER8 for IPIs to threads in the same core, use msgsnd */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		preempt_disable();
+		if (cpu_first_thread_sibling(cpu) ==
+		    cpu_first_thread_sibling(smp_processor_id())) {
+			msg |= cpu_thread_in_core(cpu);
+			smp_mb();
+			__asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+			preempt_enable();
+			return true;
+		}
+		preempt_enable();
+	}
+
+#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
+	if (cpu >= 0 && cpu < nr_cpu_ids) {
+		if (paca_ptrs[cpu]->kvm_hstate.xics_phys) {
+			xics_wake_cpu(cpu);
+			return true;
+		}
+		opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY);
+		return true;
+	}
+#endif
+
+	return false;
+}
+
+static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
+{
+	int cpu;
+	struct rcuwait *waitp;
+
+	/*
+	 * rcuwait_wake_up contains smp_mb() which orders prior stores that
+	 * create pending work vs below loads of cpu fields. The other side
+	 * is the barrier in vcpu run that orders setting the cpu fields vs
+	 * testing for pending work.
+	 */
+
+	waitp = kvm_arch_vcpu_get_wait(vcpu);
+	if (rcuwait_wake_up(waitp))
+		++vcpu->stat.generic.halt_wakeup;
+
+	cpu = READ_ONCE(vcpu->arch.thread_cpu);
+	if (cpu >= 0 && kvmppc_ipi_thread(cpu))
+		return;
+
+	/* CPU points to the first thread of the core */
+	cpu = vcpu->cpu;
+	if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu))
+		smp_send_reschedule(cpu);
+}
+
+/*
+ * We use the vcpu_load/put functions to measure stolen time.
+ *
+ * Stolen time is counted as time when either the vcpu is able to
+ * run as part of a virtual core, but the task running the vcore
+ * is preempted or sleeping, or when the vcpu needs something done
+ * in the kernel by the task running the vcpu, but that task is
+ * preempted or sleeping.  Those two things have to be counted
+ * separately, since one of the vcpu tasks will take on the job
+ * of running the core, and the other vcpu tasks in the vcore will
+ * sleep waiting for it to do that, but that sleep shouldn't count
+ * as stolen time.
+ *
+ * Hence we accumulate stolen time when the vcpu can run as part of
+ * a vcore using vc->stolen_tb, and the stolen time when the vcpu
+ * needs its task to do other things in the kernel (for example,
+ * service a page fault) in busy_stolen.  We don't accumulate
+ * stolen time for a vcore when it is inactive, or for a vcpu
+ * when it is in state RUNNING or NOTREADY.  NOTREADY is a bit of
+ * a misnomer; it means that the vcpu task is not executing in
+ * the KVM_VCPU_RUN ioctl, i.e. it is in userspace or elsewhere in
+ * the kernel.  We don't have any way of dividing up that time
+ * between time that the vcpu is genuinely stopped, time that
+ * the task is actively working on behalf of the vcpu, and time
+ * that the task is preempted, so we don't count any of it as
+ * stolen.
+ *
+ * Updates to busy_stolen are protected by arch.tbacct_lock;
+ * updates to vc->stolen_tb are protected by the vcore->stoltb_lock
+ * lock.  The stolen times are measured in units of timebase ticks.
+ * (Note that the != TB_NIL checks below are purely defensive;
+ * they should never fail.)
+ *
+ * The POWER9 path is simpler, one vcpu per virtual core so the
+ * former case does not exist. If a vcpu is preempted when it is
+ * BUSY_IN_HOST and not ceded or otherwise blocked, then accumulate
+ * the stolen cycles in busy_stolen. RUNNING is not a preemptible
+ * state in the P9 path.
+ */
+
+static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc, u64 tb)
+{
+	unsigned long flags;
+
+	WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
+	spin_lock_irqsave(&vc->stoltb_lock, flags);
+	vc->preempt_tb = tb;
+	spin_unlock_irqrestore(&vc->stoltb_lock, flags);
+}
+
+static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc, u64 tb)
+{
+	unsigned long flags;
+
+	WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
+	spin_lock_irqsave(&vc->stoltb_lock, flags);
+	if (vc->preempt_tb != TB_NIL) {
+		vc->stolen_tb += tb - vc->preempt_tb;
+		vc->preempt_tb = TB_NIL;
+	}
+	spin_unlock_irqrestore(&vc->stoltb_lock, flags);
+}
+
+static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	unsigned long flags;
+	u64 now;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		if (vcpu->arch.busy_preempt != TB_NIL) {
+			WARN_ON_ONCE(vcpu->arch.state != KVMPPC_VCPU_BUSY_IN_HOST);
+			vc->stolen_tb += mftb() - vcpu->arch.busy_preempt;
+			vcpu->arch.busy_preempt = TB_NIL;
+		}
+		return;
+	}
+
+	now = mftb();
+
+	/*
+	 * We can test vc->runner without taking the vcore lock,
+	 * because only this task ever sets vc->runner to this
+	 * vcpu, and once it is set to this vcpu, only this task
+	 * ever sets it to NULL.
+	 */
+	if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
+		kvmppc_core_end_stolen(vc, now);
+
+	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
+	if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
+	    vcpu->arch.busy_preempt != TB_NIL) {
+		vcpu->arch.busy_stolen += now - vcpu->arch.busy_preempt;
+		vcpu->arch.busy_preempt = TB_NIL;
+	}
+	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
+}
+
+static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	unsigned long flags;
+	u64 now;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		/*
+		 * In the P9 path, RUNNABLE is not preemptible
+		 * (nor takes host interrupts)
+		 */
+		WARN_ON_ONCE(vcpu->arch.state == KVMPPC_VCPU_RUNNABLE);
+		/*
+		 * Account stolen time when preempted while the vcpu task is
+		 * running in the kernel (but not in qemu, which is INACTIVE).
+		 */
+		if (task_is_running(current) &&
+				vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
+			vcpu->arch.busy_preempt = mftb();
+		return;
+	}
+
+	now = mftb();
+
+	if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
+		kvmppc_core_start_stolen(vc, now);
+
+	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
+	if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
+		vcpu->arch.busy_preempt = now;
+	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
+}
+
+static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
+{
+	vcpu->arch.pvr = pvr;
+}
+
+/* Dummy value used in computing PCR value below */
+#define PCR_ARCH_31    (PCR_ARCH_300 << 1)
+
+static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
+{
+	unsigned long host_pcr_bit = 0, guest_pcr_bit = 0;
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+	/* We can (emulate) our own architecture version and anything older */
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		host_pcr_bit = PCR_ARCH_31;
+	else if (cpu_has_feature(CPU_FTR_ARCH_300))
+		host_pcr_bit = PCR_ARCH_300;
+	else if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		host_pcr_bit = PCR_ARCH_207;
+	else if (cpu_has_feature(CPU_FTR_ARCH_206))
+		host_pcr_bit = PCR_ARCH_206;
+	else
+		host_pcr_bit = PCR_ARCH_205;
+
+	/* Determine lowest PCR bit needed to run guest in given PVR level */
+	guest_pcr_bit = host_pcr_bit;
+	if (arch_compat) {
+		switch (arch_compat) {
+		case PVR_ARCH_205:
+			guest_pcr_bit = PCR_ARCH_205;
+			break;
+		case PVR_ARCH_206:
+		case PVR_ARCH_206p:
+			guest_pcr_bit = PCR_ARCH_206;
+			break;
+		case PVR_ARCH_207:
+			guest_pcr_bit = PCR_ARCH_207;
+			break;
+		case PVR_ARCH_300:
+			guest_pcr_bit = PCR_ARCH_300;
+			break;
+		case PVR_ARCH_31:
+			guest_pcr_bit = PCR_ARCH_31;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	/* Check requested PCR bits don't exceed our capabilities */
+	if (guest_pcr_bit > host_pcr_bit)
+		return -EINVAL;
+
+	spin_lock(&vc->lock);
+	vc->arch_compat = arch_compat;
+	/*
+	 * Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit
+	 * Also set all reserved PCR bits
+	 */
+	vc->pcr = (host_pcr_bit - guest_pcr_bit) | PCR_MASK;
+	spin_unlock(&vc->lock);
+
+	return 0;
+}
+
+static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
+{
+	int r;
+
+	pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
+	pr_err("pc  = %.16lx  msr = %.16llx  trap = %x\n",
+	       vcpu->arch.regs.nip, vcpu->arch.shregs.msr, vcpu->arch.trap);
+	for (r = 0; r < 16; ++r)
+		pr_err("r%2d = %.16lx  r%d = %.16lx\n",
+		       r, kvmppc_get_gpr(vcpu, r),
+		       r+16, kvmppc_get_gpr(vcpu, r+16));
+	pr_err("ctr = %.16lx  lr  = %.16lx\n",
+	       vcpu->arch.regs.ctr, vcpu->arch.regs.link);
+	pr_err("srr0 = %.16llx srr1 = %.16llx\n",
+	       vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
+	pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
+	       vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
+	pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
+	       vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
+	pr_err("cr = %.8lx  xer = %.16lx  dsisr = %.8x\n",
+	       vcpu->arch.regs.ccr, vcpu->arch.regs.xer, vcpu->arch.shregs.dsisr);
+	pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
+	pr_err("fault dar = %.16lx dsisr = %.8x\n",
+	       vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+	pr_err("SLB (%d entries):\n", vcpu->arch.slb_max);
+	for (r = 0; r < vcpu->arch.slb_max; ++r)
+		pr_err("  ESID = %.16llx VSID = %.16llx\n",
+		       vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
+	pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.16lx\n",
+	       vcpu->arch.vcore->lpcr, vcpu->kvm->arch.sdr1,
+	       vcpu->arch.last_inst);
+}
+
+static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
+{
+	return kvm_get_vcpu_by_id(kvm, id);
+}
+
+static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
+{
+	vpa->__old_status |= LPPACA_OLD_SHARED_PROC;
+	vpa->yield_count = cpu_to_be32(1);
+}
+
+static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v,
+		   unsigned long addr, unsigned long len)
+{
+	/* check address is cacheline aligned */
+	if (addr & (L1_CACHE_BYTES - 1))
+		return -EINVAL;
+	spin_lock(&vcpu->arch.vpa_update_lock);
+	if (v->next_gpa != addr || v->len != len) {
+		v->next_gpa = addr;
+		v->len = addr ? len : 0;
+		v->update_pending = 1;
+	}
+	spin_unlock(&vcpu->arch.vpa_update_lock);
+	return 0;
+}
+
+/* Length for a per-processor buffer is passed in at offset 4 in the buffer */
+struct reg_vpa {
+	u32 dummy;
+	union {
+		__be16 hword;
+		__be32 word;
+	} length;
+};
+
+static int vpa_is_registered(struct kvmppc_vpa *vpap)
+{
+	if (vpap->update_pending)
+		return vpap->next_gpa != 0;
+	return vpap->pinned_addr != NULL;
+}
+
+static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
+				       unsigned long flags,
+				       unsigned long vcpuid, unsigned long vpa)
+{
+	struct kvm *kvm = vcpu->kvm;
+	unsigned long len, nb;
+	void *va;
+	struct kvm_vcpu *tvcpu;
+	int err;
+	int subfunc;
+	struct kvmppc_vpa *vpap;
+
+	tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
+	if (!tvcpu)
+		return H_PARAMETER;
+
+	subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK;
+	if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL ||
+	    subfunc == H_VPA_REG_SLB) {
+		/* Registering new area - address must be cache-line aligned */
+		if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa)
+			return H_PARAMETER;
+
+		/* convert logical addr to kernel addr and read length */
+		va = kvmppc_pin_guest_page(kvm, vpa, &nb);
+		if (va == NULL)
+			return H_PARAMETER;
+		if (subfunc == H_VPA_REG_VPA)
+			len = be16_to_cpu(((struct reg_vpa *)va)->length.hword);
+		else
+			len = be32_to_cpu(((struct reg_vpa *)va)->length.word);
+		kvmppc_unpin_guest_page(kvm, va, vpa, false);
+
+		/* Check length */
+		if (len > nb || len < sizeof(struct reg_vpa))
+			return H_PARAMETER;
+	} else {
+		vpa = 0;
+		len = 0;
+	}
+
+	err = H_PARAMETER;
+	vpap = NULL;
+	spin_lock(&tvcpu->arch.vpa_update_lock);
+
+	switch (subfunc) {
+	case H_VPA_REG_VPA:		/* register VPA */
+		/*
+		 * The size of our lppaca is 1kB because of the way we align
+		 * it for the guest to avoid crossing a 4kB boundary. We only
+		 * use 640 bytes of the structure though, so we should accept
+		 * clients that set a size of 640.
+		 */
+		BUILD_BUG_ON(sizeof(struct lppaca) != 640);
+		if (len < sizeof(struct lppaca))
+			break;
+		vpap = &tvcpu->arch.vpa;
+		err = 0;
+		break;
+
+	case H_VPA_REG_DTL:		/* register DTL */
+		if (len < sizeof(struct dtl_entry))
+			break;
+		len -= len % sizeof(struct dtl_entry);
+
+		/* Check that they have previously registered a VPA */
+		err = H_RESOURCE;
+		if (!vpa_is_registered(&tvcpu->arch.vpa))
+			break;
+
+		vpap = &tvcpu->arch.dtl;
+		err = 0;
+		break;
+
+	case H_VPA_REG_SLB:		/* register SLB shadow buffer */
+		/* Check that they have previously registered a VPA */
+		err = H_RESOURCE;
+		if (!vpa_is_registered(&tvcpu->arch.vpa))
+			break;
+
+		vpap = &tvcpu->arch.slb_shadow;
+		err = 0;
+		break;
+
+	case H_VPA_DEREG_VPA:		/* deregister VPA */
+		/* Check they don't still have a DTL or SLB buf registered */
+		err = H_RESOURCE;
+		if (vpa_is_registered(&tvcpu->arch.dtl) ||
+		    vpa_is_registered(&tvcpu->arch.slb_shadow))
+			break;
+
+		vpap = &tvcpu->arch.vpa;
+		err = 0;
+		break;
+
+	case H_VPA_DEREG_DTL:		/* deregister DTL */
+		vpap = &tvcpu->arch.dtl;
+		err = 0;
+		break;
+
+	case H_VPA_DEREG_SLB:		/* deregister SLB shadow buffer */
+		vpap = &tvcpu->arch.slb_shadow;
+		err = 0;
+		break;
+	}
+
+	if (vpap) {
+		vpap->next_gpa = vpa;
+		vpap->len = len;
+		vpap->update_pending = 1;
+	}
+
+	spin_unlock(&tvcpu->arch.vpa_update_lock);
+
+	return err;
+}
+
+static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
+{
+	struct kvm *kvm = vcpu->kvm;
+	void *va;
+	unsigned long nb;
+	unsigned long gpa;
+
+	/*
+	 * We need to pin the page pointed to by vpap->next_gpa,
+	 * but we can't call kvmppc_pin_guest_page under the lock
+	 * as it does get_user_pages() and down_read().  So we
+	 * have to drop the lock, pin the page, then get the lock
+	 * again and check that a new area didn't get registered
+	 * in the meantime.
+	 */
+	for (;;) {
+		gpa = vpap->next_gpa;
+		spin_unlock(&vcpu->arch.vpa_update_lock);
+		va = NULL;
+		nb = 0;
+		if (gpa)
+			va = kvmppc_pin_guest_page(kvm, gpa, &nb);
+		spin_lock(&vcpu->arch.vpa_update_lock);
+		if (gpa == vpap->next_gpa)
+			break;
+		/* sigh... unpin that one and try again */
+		if (va)
+			kvmppc_unpin_guest_page(kvm, va, gpa, false);
+	}
+
+	vpap->update_pending = 0;
+	if (va && nb < vpap->len) {
+		/*
+		 * If it's now too short, it must be that userspace
+		 * has changed the mappings underlying guest memory,
+		 * so unregister the region.
+		 */
+		kvmppc_unpin_guest_page(kvm, va, gpa, false);
+		va = NULL;
+	}
+	if (vpap->pinned_addr)
+		kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa,
+					vpap->dirty);
+	vpap->gpa = gpa;
+	vpap->pinned_addr = va;
+	vpap->dirty = false;
+	if (va)
+		vpap->pinned_end = va + vpap->len;
+}
+
+static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
+{
+	if (!(vcpu->arch.vpa.update_pending ||
+	      vcpu->arch.slb_shadow.update_pending ||
+	      vcpu->arch.dtl.update_pending))
+		return;
+
+	spin_lock(&vcpu->arch.vpa_update_lock);
+	if (vcpu->arch.vpa.update_pending) {
+		kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
+		if (vcpu->arch.vpa.pinned_addr)
+			init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
+	}
+	if (vcpu->arch.dtl.update_pending) {
+		kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
+		vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
+		vcpu->arch.dtl_index = 0;
+	}
+	if (vcpu->arch.slb_shadow.update_pending)
+		kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
+	spin_unlock(&vcpu->arch.vpa_update_lock);
+}
+
+/*
+ * Return the accumulated stolen time for the vcore up until `now'.
+ * The caller should hold the vcore lock.
+ */
+static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
+{
+	u64 p;
+	unsigned long flags;
+
+	WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
+	spin_lock_irqsave(&vc->stoltb_lock, flags);
+	p = vc->stolen_tb;
+	if (vc->vcore_state != VCORE_INACTIVE &&
+	    vc->preempt_tb != TB_NIL)
+		p += now - vc->preempt_tb;
+	spin_unlock_irqrestore(&vc->stoltb_lock, flags);
+	return p;
+}
+
+static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
+					struct lppaca *vpa,
+					unsigned int pcpu, u64 now,
+					unsigned long stolen)
+{
+	struct dtl_entry *dt;
+
+	dt = vcpu->arch.dtl_ptr;
+
+	if (!dt)
+		return;
+
+	dt->dispatch_reason = 7;
+	dt->preempt_reason = 0;
+	dt->processor_id = cpu_to_be16(pcpu + vcpu->arch.ptid);
+	dt->enqueue_to_dispatch_time = cpu_to_be32(stolen);
+	dt->ready_to_enqueue_time = 0;
+	dt->waiting_to_ready_time = 0;
+	dt->timebase = cpu_to_be64(now);
+	dt->fault_addr = 0;
+	dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu));
+	dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr);
+
+	++dt;
+	if (dt == vcpu->arch.dtl.pinned_end)
+		dt = vcpu->arch.dtl.pinned_addr;
+	vcpu->arch.dtl_ptr = dt;
+	/* order writing *dt vs. writing vpa->dtl_idx */
+	smp_wmb();
+	vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index);
+
+	/* vcpu->arch.dtl.dirty is set by the caller */
+}
+
+static void kvmppc_update_vpa_dispatch(struct kvm_vcpu *vcpu,
+				       struct kvmppc_vcore *vc)
+{
+	struct lppaca *vpa;
+	unsigned long stolen;
+	unsigned long core_stolen;
+	u64 now;
+	unsigned long flags;
+
+	vpa = vcpu->arch.vpa.pinned_addr;
+	if (!vpa)
+		return;
+
+	now = mftb();
+
+	core_stolen = vcore_stolen_time(vc, now);
+	stolen = core_stolen - vcpu->arch.stolen_logged;
+	vcpu->arch.stolen_logged = core_stolen;
+	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
+	stolen += vcpu->arch.busy_stolen;
+	vcpu->arch.busy_stolen = 0;
+	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
+
+	vpa->enqueue_dispatch_tb = cpu_to_be64(be64_to_cpu(vpa->enqueue_dispatch_tb) + stolen);
+
+	__kvmppc_create_dtl_entry(vcpu, vpa, vc->pcpu, now + vc->tb_offset, stolen);
+
+	vcpu->arch.vpa.dirty = true;
+}
+
+static void kvmppc_update_vpa_dispatch_p9(struct kvm_vcpu *vcpu,
+				       struct kvmppc_vcore *vc,
+				       u64 now)
+{
+	struct lppaca *vpa;
+	unsigned long stolen;
+	unsigned long stolen_delta;
+
+	vpa = vcpu->arch.vpa.pinned_addr;
+	if (!vpa)
+		return;
+
+	stolen = vc->stolen_tb;
+	stolen_delta = stolen - vcpu->arch.stolen_logged;
+	vcpu->arch.stolen_logged = stolen;
+
+	vpa->enqueue_dispatch_tb = cpu_to_be64(stolen);
+
+	__kvmppc_create_dtl_entry(vcpu, vpa, vc->pcpu, now, stolen_delta);
+
+	vcpu->arch.vpa.dirty = true;
+}
+
+/* See if there is a doorbell interrupt pending for a vcpu */
+static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
+{
+	int thr;
+	struct kvmppc_vcore *vc;
+
+	if (vcpu->arch.doorbell_request)
+		return true;
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		return false;
+	/*
+	 * Ensure that the read of vcore->dpdes comes after the read
+	 * of vcpu->doorbell_request.  This barrier matches the
+	 * smp_wmb() in kvmppc_guest_entry_inject().
+	 */
+	smp_rmb();
+	vc = vcpu->arch.vcore;
+	thr = vcpu->vcpu_id - vc->first_vcpuid;
+	return !!(vc->dpdes & (1 << thr));
+}
+
+static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207)
+		return true;
+	if ((!vcpu->arch.vcore->arch_compat) &&
+	    cpu_has_feature(CPU_FTR_ARCH_207S))
+		return true;
+	return false;
+}
+
+static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
+			     unsigned long resource, unsigned long value1,
+			     unsigned long value2)
+{
+	switch (resource) {
+	case H_SET_MODE_RESOURCE_SET_CIABR:
+		if (!kvmppc_power8_compatible(vcpu))
+			return H_P2;
+		if (value2)
+			return H_P4;
+		if (mflags)
+			return H_UNSUPPORTED_FLAG_START;
+		/* Guests can't breakpoint the hypervisor */
+		if ((value1 & CIABR_PRIV) == CIABR_PRIV_HYPER)
+			return H_P3;
+		kvmppc_set_ciabr_hv(vcpu, value1);
+		return H_SUCCESS;
+	case H_SET_MODE_RESOURCE_SET_DAWR0:
+		if (!kvmppc_power8_compatible(vcpu))
+			return H_P2;
+		if (!ppc_breakpoint_available())
+			return H_P2;
+		if (mflags)
+			return H_UNSUPPORTED_FLAG_START;
+		if (value2 & DABRX_HYP)
+			return H_P4;
+		kvmppc_set_dawr0_hv(vcpu, value1);
+		kvmppc_set_dawrx0_hv(vcpu, value2);
+		return H_SUCCESS;
+	case H_SET_MODE_RESOURCE_SET_DAWR1:
+		if (!kvmppc_power8_compatible(vcpu))
+			return H_P2;
+		if (!ppc_breakpoint_available())
+			return H_P2;
+		if (!cpu_has_feature(CPU_FTR_DAWR1))
+			return H_P2;
+		if (!vcpu->kvm->arch.dawr1_enabled)
+			return H_FUNCTION;
+		if (mflags)
+			return H_UNSUPPORTED_FLAG_START;
+		if (value2 & DABRX_HYP)
+			return H_P4;
+		kvmppc_set_dawr1_hv(vcpu, value1);
+		kvmppc_set_dawrx1_hv(vcpu, value2);
+		return H_SUCCESS;
+	case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
+		/*
+		 * KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved.
+		 * Keep this in synch with kvmppc_filter_guest_lpcr_hv.
+		 */
+		if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
+				kvmhv_vcpu_is_radix(vcpu) && mflags == 3)
+			return H_UNSUPPORTED_FLAG_START;
+		return H_TOO_HARD;
+	default:
+		return H_TOO_HARD;
+	}
+}
+
+/* Copy guest memory in place - must reside within a single memslot */
+static int kvmppc_copy_guest(struct kvm *kvm, gpa_t to, gpa_t from,
+				  unsigned long len)
+{
+	struct kvm_memory_slot *to_memslot = NULL;
+	struct kvm_memory_slot *from_memslot = NULL;
+	unsigned long to_addr, from_addr;
+	int r;
+
+	/* Get HPA for from address */
+	from_memslot = gfn_to_memslot(kvm, from >> PAGE_SHIFT);
+	if (!from_memslot)
+		return -EFAULT;
+	if ((from + len) >= ((from_memslot->base_gfn + from_memslot->npages)
+			     << PAGE_SHIFT))
+		return -EINVAL;
+	from_addr = gfn_to_hva_memslot(from_memslot, from >> PAGE_SHIFT);
+	if (kvm_is_error_hva(from_addr))
+		return -EFAULT;
+	from_addr |= (from & (PAGE_SIZE - 1));
+
+	/* Get HPA for to address */
+	to_memslot = gfn_to_memslot(kvm, to >> PAGE_SHIFT);
+	if (!to_memslot)
+		return -EFAULT;
+	if ((to + len) >= ((to_memslot->base_gfn + to_memslot->npages)
+			   << PAGE_SHIFT))
+		return -EINVAL;
+	to_addr = gfn_to_hva_memslot(to_memslot, to >> PAGE_SHIFT);
+	if (kvm_is_error_hva(to_addr))
+		return -EFAULT;
+	to_addr |= (to & (PAGE_SIZE - 1));
+
+	/* Perform copy */
+	r = raw_copy_in_user((void __user *)to_addr, (void __user *)from_addr,
+			     len);
+	if (r)
+		return -EFAULT;
+	mark_page_dirty(kvm, to >> PAGE_SHIFT);
+	return 0;
+}
+
+static long kvmppc_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
+			       unsigned long dest, unsigned long src)
+{
+	u64 pg_sz = SZ_4K;		/* 4K page size */
+	u64 pg_mask = SZ_4K - 1;
+	int ret;
+
+	/* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */
+	if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE |
+		      H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED))
+		return H_PARAMETER;
+
+	/* dest (and src if copy_page flag set) must be page aligned */
+	if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask)))
+		return H_PARAMETER;
+
+	/* zero and/or copy the page as determined by the flags */
+	if (flags & H_COPY_PAGE) {
+		ret = kvmppc_copy_guest(vcpu->kvm, dest, src, pg_sz);
+		if (ret < 0)
+			return H_PARAMETER;
+	} else if (flags & H_ZERO_PAGE) {
+		ret = kvm_clear_guest(vcpu->kvm, dest, pg_sz);
+		if (ret < 0)
+			return H_PARAMETER;
+	}
+
+	/* We can ignore the remaining flags */
+
+	return H_SUCCESS;
+}
+
+static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
+{
+	struct kvmppc_vcore *vcore = target->arch.vcore;
+
+	/*
+	 * We expect to have been called by the real mode handler
+	 * (kvmppc_rm_h_confer()) which would have directly returned
+	 * H_SUCCESS if the source vcore wasn't idle (e.g. if it may
+	 * have useful work to do and should not confer) so we don't
+	 * recheck that here.
+	 *
+	 * In the case of the P9 single vcpu per vcore case, the real
+	 * mode handler is not called but no other threads are in the
+	 * source vcore.
+	 */
+	if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+		spin_lock(&vcore->lock);
+		if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
+		    vcore->vcore_state != VCORE_INACTIVE &&
+		    vcore->runner)
+			target = vcore->runner;
+		spin_unlock(&vcore->lock);
+	}
+
+	return kvm_vcpu_yield_to(target);
+}
+
+static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
+{
+	int yield_count = 0;
+	struct lppaca *lppaca;
+
+	spin_lock(&vcpu->arch.vpa_update_lock);
+	lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
+	if (lppaca)
+		yield_count = be32_to_cpu(lppaca->yield_count);
+	spin_unlock(&vcpu->arch.vpa_update_lock);
+	return yield_count;
+}
+
+/*
+ * H_RPT_INVALIDATE hcall handler for nested guests.
+ *
+ * Handles only nested process-scoped invalidation requests in L0.
+ */
+static int kvmppc_nested_h_rpt_invalidate(struct kvm_vcpu *vcpu)
+{
+	unsigned long type = kvmppc_get_gpr(vcpu, 6);
+	unsigned long pid, pg_sizes, start, end;
+
+	/*
+	 * The partition-scoped invalidations aren't handled here in L0.
+	 */
+	if (type & H_RPTI_TYPE_NESTED)
+		return RESUME_HOST;
+
+	pid = kvmppc_get_gpr(vcpu, 4);
+	pg_sizes = kvmppc_get_gpr(vcpu, 7);
+	start = kvmppc_get_gpr(vcpu, 8);
+	end = kvmppc_get_gpr(vcpu, 9);
+
+	do_h_rpt_invalidate_prt(pid, vcpu->arch.nested->shadow_lpid,
+				type, pg_sizes, start, end);
+
+	kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+	return RESUME_GUEST;
+}
+
+static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
+				    unsigned long id, unsigned long target,
+				    unsigned long type, unsigned long pg_sizes,
+				    unsigned long start, unsigned long end)
+{
+	if (!kvm_is_radix(vcpu->kvm))
+		return H_UNSUPPORTED;
+
+	if (end < start)
+		return H_P5;
+
+	/*
+	 * Partition-scoped invalidation for nested guests.
+	 */
+	if (type & H_RPTI_TYPE_NESTED) {
+		if (!nesting_enabled(vcpu->kvm))
+			return H_FUNCTION;
+
+		/* Support only cores as target */
+		if (target != H_RPTI_TARGET_CMMU)
+			return H_P2;
+
+		return do_h_rpt_invalidate_pat(vcpu, id, type, pg_sizes,
+					       start, end);
+	}
+
+	/*
+	 * Process-scoped invalidation for L1 guests.
+	 */
+	do_h_rpt_invalidate_prt(id, vcpu->kvm->arch.lpid,
+				type, pg_sizes, start, end);
+	return H_SUCCESS;
+}
+
+int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	unsigned long req = kvmppc_get_gpr(vcpu, 3);
+	unsigned long target, ret = H_SUCCESS;
+	int yield_count;
+	struct kvm_vcpu *tvcpu;
+	int idx, rc;
+
+	if (req <= MAX_HCALL_OPCODE &&
+	    !test_bit(req/4, vcpu->kvm->arch.enabled_hcalls))
+		return RESUME_HOST;
+
+	switch (req) {
+	case H_REMOVE:
+		ret = kvmppc_h_remove(vcpu, kvmppc_get_gpr(vcpu, 4),
+					kvmppc_get_gpr(vcpu, 5),
+					kvmppc_get_gpr(vcpu, 6));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_ENTER:
+		ret = kvmppc_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
+					kvmppc_get_gpr(vcpu, 5),
+					kvmppc_get_gpr(vcpu, 6),
+					kvmppc_get_gpr(vcpu, 7));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_READ:
+		ret = kvmppc_h_read(vcpu, kvmppc_get_gpr(vcpu, 4),
+					kvmppc_get_gpr(vcpu, 5));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_CLEAR_MOD:
+		ret = kvmppc_h_clear_mod(vcpu, kvmppc_get_gpr(vcpu, 4),
+					kvmppc_get_gpr(vcpu, 5));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_CLEAR_REF:
+		ret = kvmppc_h_clear_ref(vcpu, kvmppc_get_gpr(vcpu, 4),
+					kvmppc_get_gpr(vcpu, 5));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_PROTECT:
+		ret = kvmppc_h_protect(vcpu, kvmppc_get_gpr(vcpu, 4),
+					kvmppc_get_gpr(vcpu, 5),
+					kvmppc_get_gpr(vcpu, 6));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_BULK_REMOVE:
+		ret = kvmppc_h_bulk_remove(vcpu);
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+
+	case H_CEDE:
+		break;
+	case H_PROD:
+		target = kvmppc_get_gpr(vcpu, 4);
+		tvcpu = kvmppc_find_vcpu(kvm, target);
+		if (!tvcpu) {
+			ret = H_PARAMETER;
+			break;
+		}
+		tvcpu->arch.prodded = 1;
+		smp_mb(); /* This orders prodded store vs ceded load */
+		if (tvcpu->arch.ceded)
+			kvmppc_fast_vcpu_kick_hv(tvcpu);
+		break;
+	case H_CONFER:
+		target = kvmppc_get_gpr(vcpu, 4);
+		if (target == -1)
+			break;
+		tvcpu = kvmppc_find_vcpu(kvm, target);
+		if (!tvcpu) {
+			ret = H_PARAMETER;
+			break;
+		}
+		yield_count = kvmppc_get_gpr(vcpu, 5);
+		if (kvmppc_get_yield_count(tvcpu) != yield_count)
+			break;
+		kvm_arch_vcpu_yield_to(tvcpu);
+		break;
+	case H_REGISTER_VPA:
+		ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
+					kvmppc_get_gpr(vcpu, 5),
+					kvmppc_get_gpr(vcpu, 6));
+		break;
+	case H_RTAS:
+		if (list_empty(&kvm->arch.rtas_tokens))
+			return RESUME_HOST;
+
+		idx = srcu_read_lock(&kvm->srcu);
+		rc = kvmppc_rtas_hcall(vcpu);
+		srcu_read_unlock(&kvm->srcu, idx);
+
+		if (rc == -ENOENT)
+			return RESUME_HOST;
+		else if (rc == 0)
+			break;
+
+		/* Send the error out to userspace via KVM_RUN */
+		return rc;
+	case H_LOGICAL_CI_LOAD:
+		ret = kvmppc_h_logical_ci_load(vcpu);
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_LOGICAL_CI_STORE:
+		ret = kvmppc_h_logical_ci_store(vcpu);
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_SET_MODE:
+		ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4),
+					kvmppc_get_gpr(vcpu, 5),
+					kvmppc_get_gpr(vcpu, 6),
+					kvmppc_get_gpr(vcpu, 7));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_XIRR:
+	case H_CPPR:
+	case H_EOI:
+	case H_IPI:
+	case H_IPOLL:
+	case H_XIRR_X:
+		if (kvmppc_xics_enabled(vcpu)) {
+			if (xics_on_xive()) {
+				ret = H_NOT_AVAILABLE;
+				return RESUME_GUEST;
+			}
+			ret = kvmppc_xics_hcall(vcpu, req);
+			break;
+		}
+		return RESUME_HOST;
+	case H_SET_DABR:
+		ret = kvmppc_h_set_dabr(vcpu, kvmppc_get_gpr(vcpu, 4));
+		break;
+	case H_SET_XDABR:
+		ret = kvmppc_h_set_xdabr(vcpu, kvmppc_get_gpr(vcpu, 4),
+						kvmppc_get_gpr(vcpu, 5));
+		break;
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+	case H_GET_TCE:
+		ret = kvmppc_h_get_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
+						kvmppc_get_gpr(vcpu, 5));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_PUT_TCE:
+		ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
+						kvmppc_get_gpr(vcpu, 5),
+						kvmppc_get_gpr(vcpu, 6));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_PUT_TCE_INDIRECT:
+		ret = kvmppc_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4),
+						kvmppc_get_gpr(vcpu, 5),
+						kvmppc_get_gpr(vcpu, 6),
+						kvmppc_get_gpr(vcpu, 7));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_STUFF_TCE:
+		ret = kvmppc_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
+						kvmppc_get_gpr(vcpu, 5),
+						kvmppc_get_gpr(vcpu, 6),
+						kvmppc_get_gpr(vcpu, 7));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+#endif
+	case H_RANDOM:
+		if (!arch_get_random_seed_longs(&vcpu->arch.regs.gpr[4], 1))
+			ret = H_HARDWARE;
+		break;
+	case H_RPT_INVALIDATE:
+		ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4),
+					      kvmppc_get_gpr(vcpu, 5),
+					      kvmppc_get_gpr(vcpu, 6),
+					      kvmppc_get_gpr(vcpu, 7),
+					      kvmppc_get_gpr(vcpu, 8),
+					      kvmppc_get_gpr(vcpu, 9));
+		break;
+
+	case H_SET_PARTITION_TABLE:
+		ret = H_FUNCTION;
+		if (nesting_enabled(kvm))
+			ret = kvmhv_set_partition_table(vcpu);
+		break;
+	case H_ENTER_NESTED:
+		ret = H_FUNCTION;
+		if (!nesting_enabled(kvm))
+			break;
+		ret = kvmhv_enter_nested_guest(vcpu);
+		if (ret == H_INTERRUPT) {
+			kvmppc_set_gpr(vcpu, 3, 0);
+			vcpu->arch.hcall_needed = 0;
+			return -EINTR;
+		} else if (ret == H_TOO_HARD) {
+			kvmppc_set_gpr(vcpu, 3, 0);
+			vcpu->arch.hcall_needed = 0;
+			return RESUME_HOST;
+		}
+		break;
+	case H_TLB_INVALIDATE:
+		ret = H_FUNCTION;
+		if (nesting_enabled(kvm))
+			ret = kvmhv_do_nested_tlbie(vcpu);
+		break;
+	case H_COPY_TOFROM_GUEST:
+		ret = H_FUNCTION;
+		if (nesting_enabled(kvm))
+			ret = kvmhv_copy_tofrom_guest_nested(vcpu);
+		break;
+	case H_PAGE_INIT:
+		ret = kvmppc_h_page_init(vcpu, kvmppc_get_gpr(vcpu, 4),
+					 kvmppc_get_gpr(vcpu, 5),
+					 kvmppc_get_gpr(vcpu, 6));
+		break;
+	case H_SVM_PAGE_IN:
+		ret = H_UNSUPPORTED;
+		if (kvmppc_get_srr1(vcpu) & MSR_S)
+			ret = kvmppc_h_svm_page_in(kvm,
+						   kvmppc_get_gpr(vcpu, 4),
+						   kvmppc_get_gpr(vcpu, 5),
+						   kvmppc_get_gpr(vcpu, 6));
+		break;
+	case H_SVM_PAGE_OUT:
+		ret = H_UNSUPPORTED;
+		if (kvmppc_get_srr1(vcpu) & MSR_S)
+			ret = kvmppc_h_svm_page_out(kvm,
+						    kvmppc_get_gpr(vcpu, 4),
+						    kvmppc_get_gpr(vcpu, 5),
+						    kvmppc_get_gpr(vcpu, 6));
+		break;
+	case H_SVM_INIT_START:
+		ret = H_UNSUPPORTED;
+		if (kvmppc_get_srr1(vcpu) & MSR_S)
+			ret = kvmppc_h_svm_init_start(kvm);
+		break;
+	case H_SVM_INIT_DONE:
+		ret = H_UNSUPPORTED;
+		if (kvmppc_get_srr1(vcpu) & MSR_S)
+			ret = kvmppc_h_svm_init_done(kvm);
+		break;
+	case H_SVM_INIT_ABORT:
+		/*
+		 * Even if that call is made by the Ultravisor, the SSR1 value
+		 * is the guest context one, with the secure bit clear as it has
+		 * not yet been secured. So we can't check it here.
+		 * Instead the kvm->arch.secure_guest flag is checked inside
+		 * kvmppc_h_svm_init_abort().
+		 */
+		ret = kvmppc_h_svm_init_abort(kvm);
+		break;
+
+	default:
+		return RESUME_HOST;
+	}
+	WARN_ON_ONCE(ret == H_TOO_HARD);
+	kvmppc_set_gpr(vcpu, 3, ret);
+	vcpu->arch.hcall_needed = 0;
+	return RESUME_GUEST;
+}
+
+/*
+ * Handle H_CEDE in the P9 path where we don't call the real-mode hcall
+ * handlers in book3s_hv_rmhandlers.S.
+ *
+ * This has to be done early, not in kvmppc_pseries_do_hcall(), so
+ * that the cede logic in kvmppc_run_single_vcpu() works properly.
+ */
+static void kvmppc_cede(struct kvm_vcpu *vcpu)
+{
+	__kvmppc_set_msr_hv(vcpu, __kvmppc_get_msr_hv(vcpu) | MSR_EE);
+	vcpu->arch.ceded = 1;
+	smp_mb();
+	if (vcpu->arch.prodded) {
+		vcpu->arch.prodded = 0;
+		smp_mb();
+		vcpu->arch.ceded = 0;
+	}
+}
+
+static int kvmppc_hcall_impl_hv(unsigned long cmd)
+{
+	switch (cmd) {
+	case H_CEDE:
+	case H_PROD:
+	case H_CONFER:
+	case H_REGISTER_VPA:
+	case H_SET_MODE:
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+	case H_GET_TCE:
+	case H_PUT_TCE:
+	case H_PUT_TCE_INDIRECT:
+	case H_STUFF_TCE:
+#endif
+	case H_LOGICAL_CI_LOAD:
+	case H_LOGICAL_CI_STORE:
+#ifdef CONFIG_KVM_XICS
+	case H_XIRR:
+	case H_CPPR:
+	case H_EOI:
+	case H_IPI:
+	case H_IPOLL:
+	case H_XIRR_X:
+#endif
+	case H_PAGE_INIT:
+	case H_RPT_INVALIDATE:
+		return 1;
+	}
+
+	/* See if it's in the real-mode table */
+	return kvmppc_hcall_impl_hv_realmode(cmd);
+}
+
+static int kvmppc_emulate_debug_inst(struct kvm_vcpu *vcpu)
+{
+	ppc_inst_t last_inst;
+
+	if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) !=
+					EMULATE_DONE) {
+		/*
+		 * Fetch failed, so return to guest and
+		 * try executing it again.
+		 */
+		return RESUME_GUEST;
+	}
+
+	if (ppc_inst_val(last_inst) == KVMPPC_INST_SW_BREAKPOINT) {
+		vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+		vcpu->run->debug.arch.address = kvmppc_get_pc(vcpu);
+		return RESUME_HOST;
+	} else {
+		kvmppc_core_queue_program(vcpu, SRR1_PROGILL |
+				(kvmppc_get_msr(vcpu) & SRR1_PREFIXED));
+		return RESUME_GUEST;
+	}
+}
+
+static void do_nothing(void *x)
+{
+}
+
+static unsigned long kvmppc_read_dpdes(struct kvm_vcpu *vcpu)
+{
+	int thr, cpu, pcpu, nthreads;
+	struct kvm_vcpu *v;
+	unsigned long dpdes;
+
+	nthreads = vcpu->kvm->arch.emul_smt_mode;
+	dpdes = 0;
+	cpu = vcpu->vcpu_id & ~(nthreads - 1);
+	for (thr = 0; thr < nthreads; ++thr, ++cpu) {
+		v = kvmppc_find_vcpu(vcpu->kvm, cpu);
+		if (!v)
+			continue;
+		/*
+		 * If the vcpu is currently running on a physical cpu thread,
+		 * interrupt it in order to pull it out of the guest briefly,
+		 * which will update its vcore->dpdes value.
+		 */
+		pcpu = READ_ONCE(v->cpu);
+		if (pcpu >= 0)
+			smp_call_function_single(pcpu, do_nothing, NULL, 1);
+		if (kvmppc_doorbell_pending(v))
+			dpdes |= 1 << thr;
+	}
+	return dpdes;
+}
+
+/*
+ * On POWER9, emulate doorbell-related instructions in order to
+ * give the guest the illusion of running on a multi-threaded core.
+ * The instructions emulated are msgsndp, msgclrp, mfspr TIR,
+ * and mfspr DPDES.
+ */
+static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
+{
+	u32 inst, rb, thr;
+	unsigned long arg;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_vcpu *tvcpu;
+	ppc_inst_t pinst;
+
+	if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &pinst) != EMULATE_DONE)
+		return RESUME_GUEST;
+	inst = ppc_inst_val(pinst);
+	if (get_op(inst) != 31)
+		return EMULATE_FAIL;
+	rb = get_rb(inst);
+	thr = vcpu->vcpu_id & (kvm->arch.emul_smt_mode - 1);
+	switch (get_xop(inst)) {
+	case OP_31_XOP_MSGSNDP:
+		arg = kvmppc_get_gpr(vcpu, rb);
+		if (((arg >> 27) & 0x1f) != PPC_DBELL_SERVER)
+			break;
+		arg &= 0x7f;
+		if (arg >= kvm->arch.emul_smt_mode)
+			break;
+		tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg);
+		if (!tvcpu)
+			break;
+		if (!tvcpu->arch.doorbell_request) {
+			tvcpu->arch.doorbell_request = 1;
+			kvmppc_fast_vcpu_kick_hv(tvcpu);
+		}
+		break;
+	case OP_31_XOP_MSGCLRP:
+		arg = kvmppc_get_gpr(vcpu, rb);
+		if (((arg >> 27) & 0x1f) != PPC_DBELL_SERVER)
+			break;
+		vcpu->arch.vcore->dpdes = 0;
+		vcpu->arch.doorbell_request = 0;
+		break;
+	case OP_31_XOP_MFSPR:
+		switch (get_sprn(inst)) {
+		case SPRN_TIR:
+			arg = thr;
+			break;
+		case SPRN_DPDES:
+			arg = kvmppc_read_dpdes(vcpu);
+			break;
+		default:
+			return EMULATE_FAIL;
+		}
+		kvmppc_set_gpr(vcpu, get_rt(inst), arg);
+		break;
+	default:
+		return EMULATE_FAIL;
+	}
+	kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
+	return RESUME_GUEST;
+}
+
+/*
+ * If the lppaca had pmcregs_in_use clear when we exited the guest, then
+ * HFSCR_PM is cleared for next entry. If the guest then tries to access
+ * the PMU SPRs, we get this facility unavailable interrupt. Putting HFSCR_PM
+ * back in the guest HFSCR will cause the next entry to load the PMU SPRs and
+ * allow the guest access to continue.
+ */
+static int kvmppc_pmu_unavailable(struct kvm_vcpu *vcpu)
+{
+	if (!(vcpu->arch.hfscr_permitted & HFSCR_PM))
+		return EMULATE_FAIL;
+
+	kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_PM);
+
+	return RESUME_GUEST;
+}
+
+static int kvmppc_ebb_unavailable(struct kvm_vcpu *vcpu)
+{
+	if (!(vcpu->arch.hfscr_permitted & HFSCR_EBB))
+		return EMULATE_FAIL;
+
+	kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_EBB);
+
+	return RESUME_GUEST;
+}
+
+static int kvmppc_tm_unavailable(struct kvm_vcpu *vcpu)
+{
+	if (!(vcpu->arch.hfscr_permitted & HFSCR_TM))
+		return EMULATE_FAIL;
+
+	kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_TM);
+
+	return RESUME_GUEST;
+}
+
+static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
+				 struct task_struct *tsk)
+{
+	struct kvm_run *run = vcpu->run;
+	int r = RESUME_HOST;
+
+	vcpu->stat.sum_exits++;
+
+	/*
+	 * This can happen if an interrupt occurs in the last stages
+	 * of guest entry or the first stages of guest exit (i.e. after
+	 * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
+	 * and before setting it to KVM_GUEST_MODE_HOST_HV).
+	 * That can happen due to a bug, or due to a machine check
+	 * occurring at just the wrong time.
+	 */
+	if (__kvmppc_get_msr_hv(vcpu) & MSR_HV) {
+		printk(KERN_EMERG "KVM trap in HV mode!\n");
+		printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+			vcpu->arch.trap, kvmppc_get_pc(vcpu),
+			vcpu->arch.shregs.msr);
+		kvmppc_dump_regs(vcpu);
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		run->hw.hardware_exit_reason = vcpu->arch.trap;
+		return RESUME_HOST;
+	}
+	run->exit_reason = KVM_EXIT_UNKNOWN;
+	run->ready_for_interrupt_injection = 1;
+	switch (vcpu->arch.trap) {
+	/* We're good on these - the host merely wanted to get our attention */
+	case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER:
+		WARN_ON_ONCE(1); /* Should never happen */
+		vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
+		fallthrough;
+	case BOOK3S_INTERRUPT_HV_DECREMENTER:
+		vcpu->stat.dec_exits++;
+		r = RESUME_GUEST;
+		break;
+	case BOOK3S_INTERRUPT_EXTERNAL:
+	case BOOK3S_INTERRUPT_H_DOORBELL:
+	case BOOK3S_INTERRUPT_H_VIRT:
+		vcpu->stat.ext_intr_exits++;
+		r = RESUME_GUEST;
+		break;
+	/* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/
+	case BOOK3S_INTERRUPT_HMI:
+	case BOOK3S_INTERRUPT_PERFMON:
+	case BOOK3S_INTERRUPT_SYSTEM_RESET:
+		r = RESUME_GUEST;
+		break;
+	case BOOK3S_INTERRUPT_MACHINE_CHECK: {
+		static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+					      DEFAULT_RATELIMIT_BURST);
+		/*
+		 * Print the MCE event to host console. Ratelimit so the guest
+		 * can't flood the host log.
+		 */
+		if (__ratelimit(&rs))
+			machine_check_print_event_info(&vcpu->arch.mce_evt,false, true);
+
+		/*
+		 * If the guest can do FWNMI, exit to userspace so it can
+		 * deliver a FWNMI to the guest.
+		 * Otherwise we synthesize a machine check for the guest
+		 * so that it knows that the machine check occurred.
+		 */
+		if (!vcpu->kvm->arch.fwnmi_enabled) {
+			ulong flags = (__kvmppc_get_msr_hv(vcpu) & 0x083c0000) |
+					(kvmppc_get_msr(vcpu) & SRR1_PREFIXED);
+			kvmppc_core_queue_machine_check(vcpu, flags);
+			r = RESUME_GUEST;
+			break;
+		}
+
+		/* Exit to guest with KVM_EXIT_NMI as exit reason */
+		run->exit_reason = KVM_EXIT_NMI;
+		run->hw.hardware_exit_reason = vcpu->arch.trap;
+		/* Clear out the old NMI status from run->flags */
+		run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK;
+		/* Now set the NMI status */
+		if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED)
+			run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
+		else
+			run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
+
+		r = RESUME_HOST;
+		break;
+	}
+	case BOOK3S_INTERRUPT_PROGRAM:
+	{
+		ulong flags;
+		/*
+		 * Normally program interrupts are delivered directly
+		 * to the guest by the hardware, but we can get here
+		 * as a result of a hypervisor emulation interrupt
+		 * (e40) getting turned into a 700 by BML RTAS.
+		 */
+		flags = (__kvmppc_get_msr_hv(vcpu) & 0x1f0000ull) |
+			(kvmppc_get_msr(vcpu) & SRR1_PREFIXED);
+		kvmppc_core_queue_program(vcpu, flags);
+		r = RESUME_GUEST;
+		break;
+	}
+	case BOOK3S_INTERRUPT_SYSCALL:
+	{
+		int i;
+
+		if (unlikely(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) {
+			/*
+			 * Guest userspace executed sc 1. This can only be
+			 * reached by the P9 path because the old path
+			 * handles this case in realmode hcall handlers.
+			 */
+			if (!kvmhv_vcpu_is_radix(vcpu)) {
+				/*
+				 * A guest could be running PR KVM, so this
+				 * may be a PR KVM hcall. It must be reflected
+				 * to the guest kernel as a sc interrupt.
+				 */
+				kvmppc_core_queue_syscall(vcpu);
+			} else {
+				/*
+				 * Radix guests can not run PR KVM or nested HV
+				 * hash guests which might run PR KVM, so this
+				 * is always a privilege fault. Send a program
+				 * check to guest kernel.
+				 */
+				kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+			}
+			r = RESUME_GUEST;
+			break;
+		}
+
+		/*
+		 * hcall - gather args and set exit_reason. This will next be
+		 * handled by kvmppc_pseries_do_hcall which may be able to deal
+		 * with it and resume guest, or may punt to userspace.
+		 */
+		run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
+		for (i = 0; i < 9; ++i)
+			run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
+		run->exit_reason = KVM_EXIT_PAPR_HCALL;
+		vcpu->arch.hcall_needed = 1;
+		r = RESUME_HOST;
+		break;
+	}
+	/*
+	 * We get these next two if the guest accesses a page which it thinks
+	 * it has mapped but which is not actually present, either because
+	 * it is for an emulated I/O device or because the corresonding
+	 * host page has been paged out.
+	 *
+	 * Any other HDSI/HISI interrupts have been handled already for P7/8
+	 * guests. For POWER9 hash guests not using rmhandlers, basic hash
+	 * fault handling is done here.
+	 */
+	case BOOK3S_INTERRUPT_H_DATA_STORAGE: {
+		unsigned long vsid;
+		long err;
+
+		if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
+		    unlikely(vcpu->arch.fault_dsisr == HDSISR_CANARY)) {
+			r = RESUME_GUEST; /* Just retry if it's the canary */
+			break;
+		}
+
+		if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
+			/*
+			 * Radix doesn't require anything, and pre-ISAv3.0 hash
+			 * already attempted to handle this in rmhandlers. The
+			 * hash fault handling below is v3 only (it uses ASDR
+			 * via fault_gpa).
+			 */
+			r = RESUME_PAGE_FAULT;
+			break;
+		}
+
+		if (!(vcpu->arch.fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT))) {
+			kvmppc_core_queue_data_storage(vcpu,
+				kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+				vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+			r = RESUME_GUEST;
+			break;
+		}
+
+		if (!(__kvmppc_get_msr_hv(vcpu) & MSR_DR))
+			vsid = vcpu->kvm->arch.vrma_slb_v;
+		else
+			vsid = vcpu->arch.fault_gpa;
+
+		err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
+				vsid, vcpu->arch.fault_dsisr, true);
+		if (err == 0) {
+			r = RESUME_GUEST;
+		} else if (err == -1 || err == -2) {
+			r = RESUME_PAGE_FAULT;
+		} else {
+			kvmppc_core_queue_data_storage(vcpu,
+				kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+				vcpu->arch.fault_dar, err);
+			r = RESUME_GUEST;
+		}
+		break;
+	}
+	case BOOK3S_INTERRUPT_H_INST_STORAGE: {
+		unsigned long vsid;
+		long err;
+
+		vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
+		vcpu->arch.fault_dsisr = __kvmppc_get_msr_hv(vcpu) &
+			DSISR_SRR1_MATCH_64S;
+		if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
+			/*
+			 * Radix doesn't require anything, and pre-ISAv3.0 hash
+			 * already attempted to handle this in rmhandlers. The
+			 * hash fault handling below is v3 only (it uses ASDR
+			 * via fault_gpa).
+			 */
+			if (__kvmppc_get_msr_hv(vcpu) & HSRR1_HISI_WRITE)
+				vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
+			r = RESUME_PAGE_FAULT;
+			break;
+		}
+
+		if (!(vcpu->arch.fault_dsisr & SRR1_ISI_NOPT)) {
+			kvmppc_core_queue_inst_storage(vcpu,
+				vcpu->arch.fault_dsisr |
+				(kvmppc_get_msr(vcpu) & SRR1_PREFIXED));
+			r = RESUME_GUEST;
+			break;
+		}
+
+		if (!(__kvmppc_get_msr_hv(vcpu) & MSR_IR))
+			vsid = vcpu->kvm->arch.vrma_slb_v;
+		else
+			vsid = vcpu->arch.fault_gpa;
+
+		err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
+				vsid, vcpu->arch.fault_dsisr, false);
+		if (err == 0) {
+			r = RESUME_GUEST;
+		} else if (err == -1) {
+			r = RESUME_PAGE_FAULT;
+		} else {
+			kvmppc_core_queue_inst_storage(vcpu,
+				err | (kvmppc_get_msr(vcpu) & SRR1_PREFIXED));
+			r = RESUME_GUEST;
+		}
+		break;
+	}
+
+	/*
+	 * This occurs if the guest executes an illegal instruction.
+	 * If the guest debug is disabled, generate a program interrupt
+	 * to the guest. If guest debug is enabled, we need to check
+	 * whether the instruction is a software breakpoint instruction.
+	 * Accordingly return to Guest or Host.
+	 */
+	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
+		if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED)
+			vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ?
+				swab32(vcpu->arch.emul_inst) :
+				vcpu->arch.emul_inst;
+		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
+			r = kvmppc_emulate_debug_inst(vcpu);
+		} else {
+			kvmppc_core_queue_program(vcpu, SRR1_PROGILL |
+				(kvmppc_get_msr(vcpu) & SRR1_PREFIXED));
+			r = RESUME_GUEST;
+		}
+		break;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case BOOK3S_INTERRUPT_HV_SOFTPATCH:
+		/*
+		 * This occurs for various TM-related instructions that
+		 * we need to emulate on POWER9 DD2.2.  We have already
+		 * handled the cases where the guest was in real-suspend
+		 * mode and was transitioning to transactional state.
+		 */
+		r = kvmhv_p9_tm_emulation(vcpu);
+		if (r != -1)
+			break;
+		fallthrough; /* go to facility unavailable handler */
+#endif
+
+	/*
+	 * This occurs if the guest (kernel or userspace), does something that
+	 * is prohibited by HFSCR.
+	 * On POWER9, this could be a doorbell instruction that we need
+	 * to emulate.
+	 * Otherwise, we just generate a program interrupt to the guest.
+	 */
+	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: {
+		u64 cause = kvmppc_get_hfscr_hv(vcpu) >> 56;
+
+		r = EMULATE_FAIL;
+		if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+			if (cause == FSCR_MSGP_LG)
+				r = kvmppc_emulate_doorbell_instr(vcpu);
+			if (cause == FSCR_PM_LG)
+				r = kvmppc_pmu_unavailable(vcpu);
+			if (cause == FSCR_EBB_LG)
+				r = kvmppc_ebb_unavailable(vcpu);
+			if (cause == FSCR_TM_LG)
+				r = kvmppc_tm_unavailable(vcpu);
+		}
+		if (r == EMULATE_FAIL) {
+			kvmppc_core_queue_program(vcpu, SRR1_PROGILL |
+				(kvmppc_get_msr(vcpu) & SRR1_PREFIXED));
+			r = RESUME_GUEST;
+		}
+		break;
+	}
+
+	case BOOK3S_INTERRUPT_HV_RM_HARD:
+		r = RESUME_PASSTHROUGH;
+		break;
+	default:
+		kvmppc_dump_regs(vcpu);
+		printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+			vcpu->arch.trap, kvmppc_get_pc(vcpu),
+			__kvmppc_get_msr_hv(vcpu));
+		run->hw.hardware_exit_reason = vcpu->arch.trap;
+		r = RESUME_HOST;
+		break;
+	}
+
+	return r;
+}
+
+static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
+{
+	int r;
+	int srcu_idx;
+
+	vcpu->stat.sum_exits++;
+
+	/*
+	 * This can happen if an interrupt occurs in the last stages
+	 * of guest entry or the first stages of guest exit (i.e. after
+	 * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
+	 * and before setting it to KVM_GUEST_MODE_HOST_HV).
+	 * That can happen due to a bug, or due to a machine check
+	 * occurring at just the wrong time.
+	 */
+	if (__kvmppc_get_msr_hv(vcpu) & MSR_HV) {
+		pr_emerg("KVM trap in HV mode while nested!\n");
+		pr_emerg("trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+			 vcpu->arch.trap, kvmppc_get_pc(vcpu),
+			 __kvmppc_get_msr_hv(vcpu));
+		kvmppc_dump_regs(vcpu);
+		return RESUME_HOST;
+	}
+	switch (vcpu->arch.trap) {
+	/* We're good on these - the host merely wanted to get our attention */
+	case BOOK3S_INTERRUPT_HV_DECREMENTER:
+		vcpu->stat.dec_exits++;
+		r = RESUME_GUEST;
+		break;
+	case BOOK3S_INTERRUPT_EXTERNAL:
+		vcpu->stat.ext_intr_exits++;
+		r = RESUME_HOST;
+		break;
+	case BOOK3S_INTERRUPT_H_DOORBELL:
+	case BOOK3S_INTERRUPT_H_VIRT:
+		vcpu->stat.ext_intr_exits++;
+		r = RESUME_GUEST;
+		break;
+	/* These need to go to the nested HV */
+	case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER:
+		vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
+		vcpu->stat.dec_exits++;
+		r = RESUME_HOST;
+		break;
+	/* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/
+	case BOOK3S_INTERRUPT_HMI:
+	case BOOK3S_INTERRUPT_PERFMON:
+	case BOOK3S_INTERRUPT_SYSTEM_RESET:
+		r = RESUME_GUEST;
+		break;
+	case BOOK3S_INTERRUPT_MACHINE_CHECK:
+	{
+		static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+					      DEFAULT_RATELIMIT_BURST);
+		/* Pass the machine check to the L1 guest */
+		r = RESUME_HOST;
+		/* Print the MCE event to host console. */
+		if (__ratelimit(&rs))
+			machine_check_print_event_info(&vcpu->arch.mce_evt, false, true);
+		break;
+	}
+	/*
+	 * We get these next two if the guest accesses a page which it thinks
+	 * it has mapped but which is not actually present, either because
+	 * it is for an emulated I/O device or because the corresonding
+	 * host page has been paged out.
+	 */
+	case BOOK3S_INTERRUPT_H_DATA_STORAGE:
+		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+		r = kvmhv_nested_page_fault(vcpu);
+		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+		break;
+	case BOOK3S_INTERRUPT_H_INST_STORAGE:
+		vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
+		vcpu->arch.fault_dsisr = kvmppc_get_msr(vcpu) &
+					 DSISR_SRR1_MATCH_64S;
+		if (__kvmppc_get_msr_hv(vcpu) & HSRR1_HISI_WRITE)
+			vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
+		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+		r = kvmhv_nested_page_fault(vcpu);
+		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+		break;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case BOOK3S_INTERRUPT_HV_SOFTPATCH:
+		/*
+		 * This occurs for various TM-related instructions that
+		 * we need to emulate on POWER9 DD2.2.  We have already
+		 * handled the cases where the guest was in real-suspend
+		 * mode and was transitioning to transactional state.
+		 */
+		r = kvmhv_p9_tm_emulation(vcpu);
+		if (r != -1)
+			break;
+		fallthrough; /* go to facility unavailable handler */
+#endif
+
+	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: {
+		u64 cause = vcpu->arch.hfscr >> 56;
+
+		/*
+		 * Only pass HFU interrupts to the L1 if the facility is
+		 * permitted but disabled by the L1's HFSCR, otherwise
+		 * the interrupt does not make sense to the L1 so turn
+		 * it into a HEAI.
+		 */
+		if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) ||
+				(vcpu->arch.nested_hfscr & (1UL << cause))) {
+			ppc_inst_t pinst;
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST;
+
+			/*
+			 * If the fetch failed, return to guest and
+			 * try executing it again.
+			 */
+			r = kvmppc_get_last_inst(vcpu, INST_GENERIC, &pinst);
+			vcpu->arch.emul_inst = ppc_inst_val(pinst);
+			if (r != EMULATE_DONE)
+				r = RESUME_GUEST;
+			else
+				r = RESUME_HOST;
+		} else {
+			r = RESUME_HOST;
+		}
+
+		break;
+	}
+
+	case BOOK3S_INTERRUPT_HV_RM_HARD:
+		vcpu->arch.trap = 0;
+		r = RESUME_GUEST;
+		if (!xics_on_xive())
+			kvmppc_xics_rm_complete(vcpu, 0);
+		break;
+	case BOOK3S_INTERRUPT_SYSCALL:
+	{
+		unsigned long req = kvmppc_get_gpr(vcpu, 3);
+
+		/*
+		 * The H_RPT_INVALIDATE hcalls issued by nested
+		 * guests for process-scoped invalidations when
+		 * GTSE=0, are handled here in L0.
+		 */
+		if (req == H_RPT_INVALIDATE) {
+			r = kvmppc_nested_h_rpt_invalidate(vcpu);
+			break;
+		}
+
+		r = RESUME_HOST;
+		break;
+	}
+	default:
+		r = RESUME_HOST;
+		break;
+	}
+
+	return r;
+}
+
+static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu,
+					    struct kvm_sregs *sregs)
+{
+	int i;
+
+	memset(sregs, 0, sizeof(struct kvm_sregs));
+	sregs->pvr = vcpu->arch.pvr;
+	for (i = 0; i < vcpu->arch.slb_max; i++) {
+		sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
+		sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
+	}
+
+	return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
+					    struct kvm_sregs *sregs)
+{
+	int i, j;
+
+	/* Only accept the same PVR as the host's, since we can't spoof it */
+	if (sregs->pvr != vcpu->arch.pvr)
+		return -EINVAL;
+
+	j = 0;
+	for (i = 0; i < vcpu->arch.slb_nr; i++) {
+		if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) {
+			vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe;
+			vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv;
+			++j;
+		}
+	}
+	vcpu->arch.slb_max = j;
+
+	return 0;
+}
+
+/*
+ * Enforce limits on guest LPCR values based on hardware availability,
+ * guest configuration, and possibly hypervisor support and security
+ * concerns.
+ */
+unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr)
+{
+	/* LPCR_TC only applies to HPT guests */
+	if (kvm_is_radix(kvm))
+		lpcr &= ~LPCR_TC;
+
+	/* On POWER8 and above, userspace can modify AIL */
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		lpcr &= ~LPCR_AIL;
+	if ((lpcr & LPCR_AIL) != LPCR_AIL_3)
+		lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */
+	/*
+	 * On some POWER9s we force AIL off for radix guests to prevent
+	 * executing in MSR[HV]=1 mode with the MMU enabled and PIDR set to
+	 * guest, which can result in Q0 translations with LPID=0 PID=PIDR to
+	 * be cached, which the host TLB management does not expect.
+	 */
+	if (kvm_is_radix(kvm) && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+		lpcr &= ~LPCR_AIL;
+
+	/*
+	 * On POWER9, allow userspace to enable large decrementer for the
+	 * guest, whether or not the host has it enabled.
+	 */
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		lpcr &= ~LPCR_LD;
+
+	return lpcr;
+}
+
+static void verify_lpcr(struct kvm *kvm, unsigned long lpcr)
+{
+	if (lpcr != kvmppc_filter_lpcr_hv(kvm, lpcr)) {
+		WARN_ONCE(1, "lpcr 0x%lx differs from filtered 0x%lx\n",
+			  lpcr, kvmppc_filter_lpcr_hv(kvm, lpcr));
+	}
+}
+
+static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
+		bool preserve_top32)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	u64 mask;
+
+	spin_lock(&vc->lock);
+
+	/*
+	 * Userspace can only modify
+	 * DPFD (default prefetch depth), ILE (interrupt little-endian),
+	 * TC (translation control), AIL (alternate interrupt location),
+	 * LD (large decrementer).
+	 * These are subject to restrictions from kvmppc_filter_lcpr_hv().
+	 */
+	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD;
+
+	/* Broken 32-bit version of LPCR must not clear top bits */
+	if (preserve_top32)
+		mask &= 0xFFFFFFFF;
+
+	new_lpcr = kvmppc_filter_lpcr_hv(kvm,
+			(vc->lpcr & ~mask) | (new_lpcr & mask));
+
+	/*
+	 * If ILE (interrupt little-endian) has changed, update the
+	 * MSR_LE bit in the intr_msr for each vcpu in this vcore.
+	 */
+	if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) {
+		struct kvm_vcpu *vcpu;
+		unsigned long i;
+
+		kvm_for_each_vcpu(i, vcpu, kvm) {
+			if (vcpu->arch.vcore != vc)
+				continue;
+			if (new_lpcr & LPCR_ILE)
+				vcpu->arch.intr_msr |= MSR_LE;
+			else
+				vcpu->arch.intr_msr &= ~MSR_LE;
+		}
+	}
+
+	vc->lpcr = new_lpcr;
+
+	spin_unlock(&vc->lock);
+}
+
+static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
+				 union kvmppc_one_reg *val)
+{
+	int r = 0;
+	long int i;
+
+	switch (id) {
+	case KVM_REG_PPC_DEBUG_INST:
+		*val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
+		break;
+	case KVM_REG_PPC_HIOR:
+		*val = get_reg_val(id, 0);
+		break;
+	case KVM_REG_PPC_DABR:
+		*val = get_reg_val(id, vcpu->arch.dabr);
+		break;
+	case KVM_REG_PPC_DABRX:
+		*val = get_reg_val(id, vcpu->arch.dabrx);
+		break;
+	case KVM_REG_PPC_DSCR:
+		*val = get_reg_val(id, kvmppc_get_dscr_hv(vcpu));
+		break;
+	case KVM_REG_PPC_PURR:
+		*val = get_reg_val(id, kvmppc_get_purr_hv(vcpu));
+		break;
+	case KVM_REG_PPC_SPURR:
+		*val = get_reg_val(id, kvmppc_get_spurr_hv(vcpu));
+		break;
+	case KVM_REG_PPC_AMR:
+		*val = get_reg_val(id, kvmppc_get_amr_hv(vcpu));
+		break;
+	case KVM_REG_PPC_UAMOR:
+		*val = get_reg_val(id, kvmppc_get_uamor_hv(vcpu));
+		break;
+	case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1:
+		i = id - KVM_REG_PPC_MMCR0;
+		*val = get_reg_val(id, kvmppc_get_mmcr_hv(vcpu, i));
+		break;
+	case KVM_REG_PPC_MMCR2:
+		*val = get_reg_val(id, kvmppc_get_mmcr_hv(vcpu, 2));
+		break;
+	case KVM_REG_PPC_MMCRA:
+		*val = get_reg_val(id, kvmppc_get_mmcra_hv(vcpu));
+		break;
+	case KVM_REG_PPC_MMCRS:
+		*val = get_reg_val(id, vcpu->arch.mmcrs);
+		break;
+	case KVM_REG_PPC_MMCR3:
+		*val = get_reg_val(id, kvmppc_get_mmcr_hv(vcpu, 3));
+		break;
+	case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
+		i = id - KVM_REG_PPC_PMC1;
+		*val = get_reg_val(id, kvmppc_get_pmc_hv(vcpu, i));
+		break;
+	case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
+		i = id - KVM_REG_PPC_SPMC1;
+		*val = get_reg_val(id, vcpu->arch.spmc[i]);
+		break;
+	case KVM_REG_PPC_SIAR:
+		*val = get_reg_val(id, kvmppc_get_siar_hv(vcpu));
+		break;
+	case KVM_REG_PPC_SDAR:
+		*val = get_reg_val(id, kvmppc_get_siar_hv(vcpu));
+		break;
+	case KVM_REG_PPC_SIER:
+		*val = get_reg_val(id, kvmppc_get_sier_hv(vcpu, 0));
+		break;
+	case KVM_REG_PPC_SIER2:
+		*val = get_reg_val(id, kvmppc_get_sier_hv(vcpu, 1));
+		break;
+	case KVM_REG_PPC_SIER3:
+		*val = get_reg_val(id, kvmppc_get_sier_hv(vcpu, 2));
+		break;
+	case KVM_REG_PPC_IAMR:
+		*val = get_reg_val(id, kvmppc_get_iamr_hv(vcpu));
+		break;
+	case KVM_REG_PPC_PSPB:
+		*val = get_reg_val(id, kvmppc_get_pspb_hv(vcpu));
+		break;
+	case KVM_REG_PPC_DPDES:
+		/*
+		 * On POWER9, where we are emulating msgsndp etc.,
+		 * we return 1 bit for each vcpu, which can come from
+		 * either vcore->dpdes or doorbell_request.
+		 * On POWER8, doorbell_request is 0.
+		 */
+		if (cpu_has_feature(CPU_FTR_ARCH_300))
+			*val = get_reg_val(id, vcpu->arch.doorbell_request);
+		else
+			*val = get_reg_val(id, vcpu->arch.vcore->dpdes);
+		break;
+	case KVM_REG_PPC_VTB:
+		*val = get_reg_val(id, vcpu->arch.vcore->vtb);
+		break;
+	case KVM_REG_PPC_DAWR:
+		*val = get_reg_val(id, kvmppc_get_dawr0_hv(vcpu));
+		break;
+	case KVM_REG_PPC_DAWRX:
+		*val = get_reg_val(id, kvmppc_get_dawrx0_hv(vcpu));
+		break;
+	case KVM_REG_PPC_DAWR1:
+		*val = get_reg_val(id, kvmppc_get_dawr1_hv(vcpu));
+		break;
+	case KVM_REG_PPC_DAWRX1:
+		*val = get_reg_val(id, kvmppc_get_dawrx1_hv(vcpu));
+		break;
+	case KVM_REG_PPC_CIABR:
+		*val = get_reg_val(id, kvmppc_get_ciabr_hv(vcpu));
+		break;
+	case KVM_REG_PPC_CSIGR:
+		*val = get_reg_val(id, vcpu->arch.csigr);
+		break;
+	case KVM_REG_PPC_TACR:
+		*val = get_reg_val(id, vcpu->arch.tacr);
+		break;
+	case KVM_REG_PPC_TCSCR:
+		*val = get_reg_val(id, vcpu->arch.tcscr);
+		break;
+	case KVM_REG_PPC_PID:
+		*val = get_reg_val(id, vcpu->arch.pid);
+		break;
+	case KVM_REG_PPC_ACOP:
+		*val = get_reg_val(id, vcpu->arch.acop);
+		break;
+	case KVM_REG_PPC_WORT:
+		*val = get_reg_val(id, kvmppc_get_wort_hv(vcpu));
+		break;
+	case KVM_REG_PPC_TIDR:
+		*val = get_reg_val(id, vcpu->arch.tid);
+		break;
+	case KVM_REG_PPC_PSSCR:
+		*val = get_reg_val(id, vcpu->arch.psscr);
+		break;
+	case KVM_REG_PPC_VPA_ADDR:
+		spin_lock(&vcpu->arch.vpa_update_lock);
+		*val = get_reg_val(id, vcpu->arch.vpa.next_gpa);
+		spin_unlock(&vcpu->arch.vpa_update_lock);
+		break;
+	case KVM_REG_PPC_VPA_SLB:
+		spin_lock(&vcpu->arch.vpa_update_lock);
+		val->vpaval.addr = vcpu->arch.slb_shadow.next_gpa;
+		val->vpaval.length = vcpu->arch.slb_shadow.len;
+		spin_unlock(&vcpu->arch.vpa_update_lock);
+		break;
+	case KVM_REG_PPC_VPA_DTL:
+		spin_lock(&vcpu->arch.vpa_update_lock);
+		val->vpaval.addr = vcpu->arch.dtl.next_gpa;
+		val->vpaval.length = vcpu->arch.dtl.len;
+		spin_unlock(&vcpu->arch.vpa_update_lock);
+		break;
+	case KVM_REG_PPC_TB_OFFSET:
+		*val = get_reg_val(id, vcpu->arch.vcore->tb_offset);
+		break;
+	case KVM_REG_PPC_LPCR:
+	case KVM_REG_PPC_LPCR_64:
+		*val = get_reg_val(id, vcpu->arch.vcore->lpcr);
+		break;
+	case KVM_REG_PPC_PPR:
+		*val = get_reg_val(id, kvmppc_get_ppr_hv(vcpu));
+		break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case KVM_REG_PPC_TFHAR:
+		*val = get_reg_val(id, vcpu->arch.tfhar);
+		break;
+	case KVM_REG_PPC_TFIAR:
+		*val = get_reg_val(id, vcpu->arch.tfiar);
+		break;
+	case KVM_REG_PPC_TEXASR:
+		*val = get_reg_val(id, vcpu->arch.texasr);
+		break;
+	case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
+		i = id - KVM_REG_PPC_TM_GPR0;
+		*val = get_reg_val(id, vcpu->arch.gpr_tm[i]);
+		break;
+	case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
+	{
+		int j;
+		i = id - KVM_REG_PPC_TM_VSR0;
+		if (i < 32)
+			for (j = 0; j < TS_FPRWIDTH; j++)
+				val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j];
+		else {
+			if (cpu_has_feature(CPU_FTR_ALTIVEC))
+				val->vval = vcpu->arch.vr_tm.vr[i-32];
+			else
+				r = -ENXIO;
+		}
+		break;
+	}
+	case KVM_REG_PPC_TM_CR:
+		*val = get_reg_val(id, vcpu->arch.cr_tm);
+		break;
+	case KVM_REG_PPC_TM_XER:
+		*val = get_reg_val(id, vcpu->arch.xer_tm);
+		break;
+	case KVM_REG_PPC_TM_LR:
+		*val = get_reg_val(id, vcpu->arch.lr_tm);
+		break;
+	case KVM_REG_PPC_TM_CTR:
+		*val = get_reg_val(id, vcpu->arch.ctr_tm);
+		break;
+	case KVM_REG_PPC_TM_FPSCR:
+		*val = get_reg_val(id, vcpu->arch.fp_tm.fpscr);
+		break;
+	case KVM_REG_PPC_TM_AMR:
+		*val = get_reg_val(id, vcpu->arch.amr_tm);
+		break;
+	case KVM_REG_PPC_TM_PPR:
+		*val = get_reg_val(id, vcpu->arch.ppr_tm);
+		break;
+	case KVM_REG_PPC_TM_VRSAVE:
+		*val = get_reg_val(id, vcpu->arch.vrsave_tm);
+		break;
+	case KVM_REG_PPC_TM_VSCR:
+		if (cpu_has_feature(CPU_FTR_ALTIVEC))
+			*val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]);
+		else
+			r = -ENXIO;
+		break;
+	case KVM_REG_PPC_TM_DSCR:
+		*val = get_reg_val(id, vcpu->arch.dscr_tm);
+		break;
+	case KVM_REG_PPC_TM_TAR:
+		*val = get_reg_val(id, vcpu->arch.tar_tm);
+		break;
+#endif
+	case KVM_REG_PPC_ARCH_COMPAT:
+		*val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
+		break;
+	case KVM_REG_PPC_DEC_EXPIRY:
+		*val = get_reg_val(id, vcpu->arch.dec_expires);
+		break;
+	case KVM_REG_PPC_ONLINE:
+		*val = get_reg_val(id, vcpu->arch.online);
+		break;
+	case KVM_REG_PPC_PTCR:
+		*val = get_reg_val(id, vcpu->kvm->arch.l1_ptcr);
+		break;
+	case KVM_REG_PPC_FSCR:
+		*val = get_reg_val(id, kvmppc_get_fscr_hv(vcpu));
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
+
+static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
+				 union kvmppc_one_reg *val)
+{
+	int r = 0;
+	long int i;
+	unsigned long addr, len;
+
+	switch (id) {
+	case KVM_REG_PPC_HIOR:
+		/* Only allow this to be set to zero */
+		if (set_reg_val(id, *val))
+			r = -EINVAL;
+		break;
+	case KVM_REG_PPC_DABR:
+		vcpu->arch.dabr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_DABRX:
+		vcpu->arch.dabrx = set_reg_val(id, *val) & ~DABRX_HYP;
+		break;
+	case KVM_REG_PPC_DSCR:
+		kvmppc_set_dscr_hv(vcpu, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_PURR:
+		kvmppc_set_purr_hv(vcpu, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_SPURR:
+		kvmppc_set_spurr_hv(vcpu, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_AMR:
+		kvmppc_set_amr_hv(vcpu, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_UAMOR:
+		kvmppc_set_uamor_hv(vcpu, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1:
+		i = id - KVM_REG_PPC_MMCR0;
+		kvmppc_set_mmcr_hv(vcpu, i, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_MMCR2:
+		kvmppc_set_mmcr_hv(vcpu, 2, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_MMCRA:
+		kvmppc_set_mmcra_hv(vcpu, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_MMCRS:
+		vcpu->arch.mmcrs = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_MMCR3:
+		*val = get_reg_val(id, vcpu->arch.mmcr[3]);
+		break;
+	case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
+		i = id - KVM_REG_PPC_PMC1;
+		kvmppc_set_pmc_hv(vcpu, i, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
+		i = id - KVM_REG_PPC_SPMC1;
+		vcpu->arch.spmc[i] = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_SIAR:
+		kvmppc_set_siar_hv(vcpu, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_SDAR:
+		kvmppc_set_sdar_hv(vcpu, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_SIER:
+		kvmppc_set_sier_hv(vcpu, 0, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_SIER2:
+		kvmppc_set_sier_hv(vcpu, 1, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_SIER3:
+		kvmppc_set_sier_hv(vcpu, 2, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_IAMR:
+		kvmppc_set_iamr_hv(vcpu, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_PSPB:
+		kvmppc_set_pspb_hv(vcpu, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_DPDES:
+		if (cpu_has_feature(CPU_FTR_ARCH_300))
+			vcpu->arch.doorbell_request = set_reg_val(id, *val) & 1;
+		else
+			vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_VTB:
+		vcpu->arch.vcore->vtb = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_DAWR:
+		kvmppc_set_dawr0_hv(vcpu, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_DAWRX:
+		kvmppc_set_dawrx0_hv(vcpu, set_reg_val(id, *val) & ~DAWRX_HYP);
+		break;
+	case KVM_REG_PPC_DAWR1:
+		kvmppc_set_dawr1_hv(vcpu, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_DAWRX1:
+		kvmppc_set_dawrx1_hv(vcpu, set_reg_val(id, *val) & ~DAWRX_HYP);
+		break;
+	case KVM_REG_PPC_CIABR:
+		kvmppc_set_ciabr_hv(vcpu, set_reg_val(id, *val));
+		/* Don't allow setting breakpoints in hypervisor code */
+		if ((kvmppc_get_ciabr_hv(vcpu) & CIABR_PRIV) == CIABR_PRIV_HYPER)
+			kvmppc_set_ciabr_hv(vcpu, kvmppc_get_ciabr_hv(vcpu) & ~CIABR_PRIV);
+		break;
+	case KVM_REG_PPC_CSIGR:
+		vcpu->arch.csigr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TACR:
+		vcpu->arch.tacr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TCSCR:
+		vcpu->arch.tcscr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_PID:
+		vcpu->arch.pid = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_ACOP:
+		vcpu->arch.acop = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_WORT:
+		kvmppc_set_wort_hv(vcpu, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_TIDR:
+		vcpu->arch.tid = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_PSSCR:
+		vcpu->arch.psscr = set_reg_val(id, *val) & PSSCR_GUEST_VIS;
+		break;
+	case KVM_REG_PPC_VPA_ADDR:
+		addr = set_reg_val(id, *val);
+		r = -EINVAL;
+		if (!addr && (vcpu->arch.slb_shadow.next_gpa ||
+			      vcpu->arch.dtl.next_gpa))
+			break;
+		r = set_vpa(vcpu, &vcpu->arch.vpa, addr, sizeof(struct lppaca));
+		break;
+	case KVM_REG_PPC_VPA_SLB:
+		addr = val->vpaval.addr;
+		len = val->vpaval.length;
+		r = -EINVAL;
+		if (addr && !vcpu->arch.vpa.next_gpa)
+			break;
+		r = set_vpa(vcpu, &vcpu->arch.slb_shadow, addr, len);
+		break;
+	case KVM_REG_PPC_VPA_DTL:
+		addr = val->vpaval.addr;
+		len = val->vpaval.length;
+		r = -EINVAL;
+		if (addr && (len < sizeof(struct dtl_entry) ||
+			     !vcpu->arch.vpa.next_gpa))
+			break;
+		len -= len % sizeof(struct dtl_entry);
+		r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
+		break;
+	case KVM_REG_PPC_TB_OFFSET:
+	{
+		/* round up to multiple of 2^24 */
+		u64 tb_offset = ALIGN(set_reg_val(id, *val), 1UL << 24);
+
+		/*
+		 * Now that we know the timebase offset, update the
+		 * decrementer expiry with a guest timebase value. If
+		 * the userspace does not set DEC_EXPIRY, this ensures
+		 * a migrated vcpu at least starts with an expired
+		 * decrementer, which is better than a large one that
+		 * causes a hang.
+		 */
+		if (!vcpu->arch.dec_expires && tb_offset)
+			vcpu->arch.dec_expires = get_tb() + tb_offset;
+
+		vcpu->arch.vcore->tb_offset = tb_offset;
+		break;
+	}
+	case KVM_REG_PPC_LPCR:
+		kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true);
+		break;
+	case KVM_REG_PPC_LPCR_64:
+		kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), false);
+		break;
+	case KVM_REG_PPC_PPR:
+		kvmppc_set_ppr_hv(vcpu, set_reg_val(id, *val));
+		break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case KVM_REG_PPC_TFHAR:
+		vcpu->arch.tfhar = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TFIAR:
+		vcpu->arch.tfiar = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TEXASR:
+		vcpu->arch.texasr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
+		i = id - KVM_REG_PPC_TM_GPR0;
+		vcpu->arch.gpr_tm[i] = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
+	{
+		int j;
+		i = id - KVM_REG_PPC_TM_VSR0;
+		if (i < 32)
+			for (j = 0; j < TS_FPRWIDTH; j++)
+				vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j];
+		else
+			if (cpu_has_feature(CPU_FTR_ALTIVEC))
+				vcpu->arch.vr_tm.vr[i-32] = val->vval;
+			else
+				r = -ENXIO;
+		break;
+	}
+	case KVM_REG_PPC_TM_CR:
+		vcpu->arch.cr_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_XER:
+		vcpu->arch.xer_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_LR:
+		vcpu->arch.lr_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_CTR:
+		vcpu->arch.ctr_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_FPSCR:
+		vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_AMR:
+		vcpu->arch.amr_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_PPR:
+		vcpu->arch.ppr_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_VRSAVE:
+		vcpu->arch.vrsave_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_VSCR:
+		if (cpu_has_feature(CPU_FTR_ALTIVEC))
+			vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val);
+		else
+			r = - ENXIO;
+		break;
+	case KVM_REG_PPC_TM_DSCR:
+		vcpu->arch.dscr_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_TAR:
+		vcpu->arch.tar_tm = set_reg_val(id, *val);
+		break;
+#endif
+	case KVM_REG_PPC_ARCH_COMPAT:
+		r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
+		break;
+	case KVM_REG_PPC_DEC_EXPIRY:
+		vcpu->arch.dec_expires = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_ONLINE:
+		i = set_reg_val(id, *val);
+		if (i && !vcpu->arch.online)
+			atomic_inc(&vcpu->arch.vcore->online_count);
+		else if (!i && vcpu->arch.online)
+			atomic_dec(&vcpu->arch.vcore->online_count);
+		vcpu->arch.online = i;
+		break;
+	case KVM_REG_PPC_PTCR:
+		vcpu->kvm->arch.l1_ptcr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_FSCR:
+		kvmppc_set_fscr_hv(vcpu, set_reg_val(id, *val));
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
+
+/*
+ * On POWER9, threads are independent and can be in different partitions.
+ * Therefore we consider each thread to be a subcore.
+ * There is a restriction that all threads have to be in the same
+ * MMU mode (radix or HPT), unfortunately, but since we only support
+ * HPT guests on a HPT host so far, that isn't an impediment yet.
+ */
+static int threads_per_vcore(struct kvm *kvm)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		return 1;
+	return threads_per_subcore;
+}
+
+static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
+{
+	struct kvmppc_vcore *vcore;
+
+	vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
+
+	if (vcore == NULL)
+		return NULL;
+
+	spin_lock_init(&vcore->lock);
+	spin_lock_init(&vcore->stoltb_lock);
+	rcuwait_init(&vcore->wait);
+	vcore->preempt_tb = TB_NIL;
+	vcore->lpcr = kvm->arch.lpcr;
+	vcore->first_vcpuid = id;
+	vcore->kvm = kvm;
+	INIT_LIST_HEAD(&vcore->preempt_list);
+
+	return vcore;
+}
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+static struct debugfs_timings_element {
+	const char *name;
+	size_t offset;
+} timings[] = {
+#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING
+	{"vcpu_entry",	offsetof(struct kvm_vcpu, arch.vcpu_entry)},
+	{"guest_entry",	offsetof(struct kvm_vcpu, arch.guest_entry)},
+	{"in_guest",	offsetof(struct kvm_vcpu, arch.in_guest)},
+	{"guest_exit",	offsetof(struct kvm_vcpu, arch.guest_exit)},
+	{"vcpu_exit",	offsetof(struct kvm_vcpu, arch.vcpu_exit)},
+	{"hypercall",	offsetof(struct kvm_vcpu, arch.hcall)},
+	{"page_fault",	offsetof(struct kvm_vcpu, arch.pg_fault)},
+#else
+	{"rm_entry",	offsetof(struct kvm_vcpu, arch.rm_entry)},
+	{"rm_intr",	offsetof(struct kvm_vcpu, arch.rm_intr)},
+	{"rm_exit",	offsetof(struct kvm_vcpu, arch.rm_exit)},
+	{"guest",	offsetof(struct kvm_vcpu, arch.guest_time)},
+	{"cede",	offsetof(struct kvm_vcpu, arch.cede_time)},
+#endif
+};
+
+#define N_TIMINGS	(ARRAY_SIZE(timings))
+
+struct debugfs_timings_state {
+	struct kvm_vcpu	*vcpu;
+	unsigned int	buflen;
+	char		buf[N_TIMINGS * 100];
+};
+
+static int debugfs_timings_open(struct inode *inode, struct file *file)
+{
+	struct kvm_vcpu *vcpu = inode->i_private;
+	struct debugfs_timings_state *p;
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	kvm_get_kvm(vcpu->kvm);
+	p->vcpu = vcpu;
+	file->private_data = p;
+
+	return nonseekable_open(inode, file);
+}
+
+static int debugfs_timings_release(struct inode *inode, struct file *file)
+{
+	struct debugfs_timings_state *p = file->private_data;
+
+	kvm_put_kvm(p->vcpu->kvm);
+	kfree(p);
+	return 0;
+}
+
+static ssize_t debugfs_timings_read(struct file *file, char __user *buf,
+				    size_t len, loff_t *ppos)
+{
+	struct debugfs_timings_state *p = file->private_data;
+	struct kvm_vcpu *vcpu = p->vcpu;
+	char *s, *buf_end;
+	struct kvmhv_tb_accumulator tb;
+	u64 count;
+	loff_t pos;
+	ssize_t n;
+	int i, loops;
+	bool ok;
+
+	if (!p->buflen) {
+		s = p->buf;
+		buf_end = s + sizeof(p->buf);
+		for (i = 0; i < N_TIMINGS; ++i) {
+			struct kvmhv_tb_accumulator *acc;
+
+			acc = (struct kvmhv_tb_accumulator *)
+				((unsigned long)vcpu + timings[i].offset);
+			ok = false;
+			for (loops = 0; loops < 1000; ++loops) {
+				count = acc->seqcount;
+				if (!(count & 1)) {
+					smp_rmb();
+					tb = *acc;
+					smp_rmb();
+					if (count == acc->seqcount) {
+						ok = true;
+						break;
+					}
+				}
+				udelay(1);
+			}
+			if (!ok)
+				snprintf(s, buf_end - s, "%s: stuck\n",
+					timings[i].name);
+			else
+				snprintf(s, buf_end - s,
+					"%s: %llu %llu %llu %llu\n",
+					timings[i].name, count / 2,
+					tb_to_ns(tb.tb_total),
+					tb_to_ns(tb.tb_min),
+					tb_to_ns(tb.tb_max));
+			s += strlen(s);
+		}
+		p->buflen = s - p->buf;
+	}
+
+	pos = *ppos;
+	if (pos >= p->buflen)
+		return 0;
+	if (len > p->buflen - pos)
+		len = p->buflen - pos;
+	n = copy_to_user(buf, p->buf + pos, len);
+	if (n) {
+		if (n == len)
+			return -EFAULT;
+		len -= n;
+	}
+	*ppos = pos + len;
+	return len;
+}
+
+static ssize_t debugfs_timings_write(struct file *file, const char __user *buf,
+				     size_t len, loff_t *ppos)
+{
+	return -EACCES;
+}
+
+static const struct file_operations debugfs_timings_ops = {
+	.owner	 = THIS_MODULE,
+	.open	 = debugfs_timings_open,
+	.release = debugfs_timings_release,
+	.read	 = debugfs_timings_read,
+	.write	 = debugfs_timings_write,
+	.llseek	 = generic_file_llseek,
+};
+
+/* Create a debugfs directory for the vcpu */
+static int kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_300) == IS_ENABLED(CONFIG_KVM_BOOK3S_HV_P9_TIMING))
+		debugfs_create_file("timings", 0444, debugfs_dentry, vcpu,
+				    &debugfs_timings_ops);
+	return 0;
+}
+
+#else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
+static int kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
+{
+	return 0;
+}
+#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
+
+static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
+{
+	int err;
+	int core;
+	struct kvmppc_vcore *vcore;
+	struct kvm *kvm;
+	unsigned int id;
+
+	kvm = vcpu->kvm;
+	id = vcpu->vcpu_id;
+
+	vcpu->arch.shared = &vcpu->arch.shregs;
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	/*
+	 * The shared struct is never shared on HV,
+	 * so we can always use host endianness
+	 */
+#ifdef __BIG_ENDIAN__
+	vcpu->arch.shared_big_endian = true;
+#else
+	vcpu->arch.shared_big_endian = false;
+#endif
+#endif
+	kvmppc_set_mmcr_hv(vcpu, 0, MMCR0_FC);
+
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		kvmppc_set_mmcr_hv(vcpu, 0, kvmppc_get_mmcr_hv(vcpu, 0) | MMCR0_PMCCEXT);
+		kvmppc_set_mmcra_hv(vcpu, MMCRA_BHRB_DISABLE);
+	}
+
+	kvmppc_set_ctrl_hv(vcpu, CTRL_RUNLATCH);
+	/* default to host PVR, since we can't spoof it */
+	kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
+	spin_lock_init(&vcpu->arch.vpa_update_lock);
+	spin_lock_init(&vcpu->arch.tbacct_lock);
+	vcpu->arch.busy_preempt = TB_NIL;
+	__kvmppc_set_msr_hv(vcpu, MSR_ME);
+	vcpu->arch.intr_msr = MSR_SF | MSR_ME;
+
+	/*
+	 * Set the default HFSCR for the guest from the host value.
+	 * This value is only used on POWER9 and later.
+	 * On >= POWER9, we want to virtualize the doorbell facility, so we
+	 * don't set the HFSCR_MSGP bit, and that causes those instructions
+	 * to trap and then we emulate them.
+	 */
+	kvmppc_set_hfscr_hv(vcpu, HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB |
+			    HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP);
+
+	/* On POWER10 and later, allow prefixed instructions */
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_PREFIX);
+
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) & mfspr(SPRN_HFSCR));
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+		if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+			kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_TM);
+#endif
+	}
+	if (cpu_has_feature(CPU_FTR_TM_COMP))
+		vcpu->arch.hfscr |= HFSCR_TM;
+
+	vcpu->arch.hfscr_permitted = kvmppc_get_hfscr_hv(vcpu);
+
+	/*
+	 * PM, EBB, TM are demand-faulted so start with it clear.
+	 */
+	kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) & ~(HFSCR_PM | HFSCR_EBB | HFSCR_TM));
+
+	kvmppc_mmu_book3s_hv_init(vcpu);
+
+	vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
+
+	init_waitqueue_head(&vcpu->arch.cpu_run);
+
+	mutex_lock(&kvm->lock);
+	vcore = NULL;
+	err = -EINVAL;
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		if (id >= (KVM_MAX_VCPUS * kvm->arch.emul_smt_mode)) {
+			pr_devel("KVM: VCPU ID too high\n");
+			core = KVM_MAX_VCORES;
+		} else {
+			BUG_ON(kvm->arch.smt_mode != 1);
+			core = kvmppc_pack_vcpu_id(kvm, id);
+		}
+	} else {
+		core = id / kvm->arch.smt_mode;
+	}
+	if (core < KVM_MAX_VCORES) {
+		vcore = kvm->arch.vcores[core];
+		if (vcore && cpu_has_feature(CPU_FTR_ARCH_300)) {
+			pr_devel("KVM: collision on id %u", id);
+			vcore = NULL;
+		} else if (!vcore) {
+			/*
+			 * Take mmu_setup_lock for mutual exclusion
+			 * with kvmppc_update_lpcr().
+			 */
+			err = -ENOMEM;
+			vcore = kvmppc_vcore_create(kvm,
+					id & ~(kvm->arch.smt_mode - 1));
+			mutex_lock(&kvm->arch.mmu_setup_lock);
+			kvm->arch.vcores[core] = vcore;
+			kvm->arch.online_vcores++;
+			mutex_unlock(&kvm->arch.mmu_setup_lock);
+		}
+	}
+	mutex_unlock(&kvm->lock);
+
+	if (!vcore)
+		return err;
+
+	spin_lock(&vcore->lock);
+	++vcore->num_threads;
+	spin_unlock(&vcore->lock);
+	vcpu->arch.vcore = vcore;
+	vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
+	vcpu->arch.thread_cpu = -1;
+	vcpu->arch.prev_cpu = -1;
+
+	vcpu->arch.cpu_type = KVM_CPU_3S_64;
+	kvmppc_sanity_check(vcpu);
+
+	return 0;
+}
+
+static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
+			      unsigned long flags)
+{
+	int err;
+	int esmt = 0;
+
+	if (flags)
+		return -EINVAL;
+	if (smt_mode > MAX_SMT_THREADS || !is_power_of_2(smt_mode))
+		return -EINVAL;
+	if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+		/*
+		 * On POWER8 (or POWER7), the threading mode is "strict",
+		 * so we pack smt_mode vcpus per vcore.
+		 */
+		if (smt_mode > threads_per_subcore)
+			return -EINVAL;
+	} else {
+		/*
+		 * On POWER9, the threading mode is "loose",
+		 * so each vcpu gets its own vcore.
+		 */
+		esmt = smt_mode;
+		smt_mode = 1;
+	}
+	mutex_lock(&kvm->lock);
+	err = -EBUSY;
+	if (!kvm->arch.online_vcores) {
+		kvm->arch.smt_mode = smt_mode;
+		kvm->arch.emul_smt_mode = esmt;
+		err = 0;
+	}
+	mutex_unlock(&kvm->lock);
+
+	return err;
+}
+
+static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
+{
+	if (vpa->pinned_addr)
+		kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa,
+					vpa->dirty);
+}
+
+static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu)
+{
+	spin_lock(&vcpu->arch.vpa_update_lock);
+	unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
+	unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
+	unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
+	spin_unlock(&vcpu->arch.vpa_update_lock);
+}
+
+static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu)
+{
+	/* Indicate we want to get back into the guest */
+	return 1;
+}
+
+static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
+{
+	unsigned long dec_nsec, now;
+
+	now = get_tb();
+	if (now > kvmppc_dec_expires_host_tb(vcpu)) {
+		/* decrementer has already gone negative */
+		kvmppc_core_queue_dec(vcpu);
+		kvmppc_core_prepare_to_enter(vcpu);
+		return;
+	}
+	dec_nsec = tb_to_ns(kvmppc_dec_expires_host_tb(vcpu) - now);
+	hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL);
+	vcpu->arch.timer_running = 1;
+}
+
+extern int __kvmppc_vcore_entry(void);
+
+static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
+				   struct kvm_vcpu *vcpu, u64 tb)
+{
+	u64 now;
+
+	if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
+		return;
+	spin_lock_irq(&vcpu->arch.tbacct_lock);
+	now = tb;
+	vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) -
+		vcpu->arch.stolen_logged;
+	vcpu->arch.busy_preempt = now;
+	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+	spin_unlock_irq(&vcpu->arch.tbacct_lock);
+	--vc->n_runnable;
+	WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], NULL);
+}
+
+static int kvmppc_grab_hwthread(int cpu)
+{
+	struct paca_struct *tpaca;
+	long timeout = 10000;
+
+	tpaca = paca_ptrs[cpu];
+
+	/* Ensure the thread won't go into the kernel if it wakes */
+	tpaca->kvm_hstate.kvm_vcpu = NULL;
+	tpaca->kvm_hstate.kvm_vcore = NULL;
+	tpaca->kvm_hstate.napping = 0;
+	smp_wmb();
+	tpaca->kvm_hstate.hwthread_req = 1;
+
+	/*
+	 * If the thread is already executing in the kernel (e.g. handling
+	 * a stray interrupt), wait for it to get back to nap mode.
+	 * The smp_mb() is to ensure that our setting of hwthread_req
+	 * is visible before we look at hwthread_state, so if this
+	 * races with the code at system_reset_pSeries and the thread
+	 * misses our setting of hwthread_req, we are sure to see its
+	 * setting of hwthread_state, and vice versa.
+	 */
+	smp_mb();
+	while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) {
+		if (--timeout <= 0) {
+			pr_err("KVM: couldn't grab cpu %d\n", cpu);
+			return -EBUSY;
+		}
+		udelay(1);
+	}
+	return 0;
+}
+
+static void kvmppc_release_hwthread(int cpu)
+{
+	struct paca_struct *tpaca;
+
+	tpaca = paca_ptrs[cpu];
+	tpaca->kvm_hstate.hwthread_req = 0;
+	tpaca->kvm_hstate.kvm_vcpu = NULL;
+	tpaca->kvm_hstate.kvm_vcore = NULL;
+	tpaca->kvm_hstate.kvm_split_mode = NULL;
+}
+
+static DEFINE_PER_CPU(struct kvm *, cpu_in_guest);
+
+static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
+{
+	struct kvm_nested_guest *nested = vcpu->arch.nested;
+	cpumask_t *need_tlb_flush;
+	int i;
+
+	if (nested)
+		need_tlb_flush = &nested->need_tlb_flush;
+	else
+		need_tlb_flush = &kvm->arch.need_tlb_flush;
+
+	cpu = cpu_first_tlb_thread_sibling(cpu);
+	for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
+					i += cpu_tlb_thread_sibling_step())
+		cpumask_set_cpu(i, need_tlb_flush);
+
+	/*
+	 * Make sure setting of bit in need_tlb_flush precedes testing of
+	 * cpu_in_guest. The matching barrier on the other side is hwsync
+	 * when switching to guest MMU mode, which happens between
+	 * cpu_in_guest being set to the guest kvm, and need_tlb_flush bit
+	 * being tested.
+	 */
+	smp_mb();
+
+	for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
+					i += cpu_tlb_thread_sibling_step()) {
+		struct kvm *running = *per_cpu_ptr(&cpu_in_guest, i);
+
+		if (running == kvm)
+			smp_call_function_single(i, do_nothing, NULL, 1);
+	}
+}
+
+static void do_migrate_away_vcpu(void *arg)
+{
+	struct kvm_vcpu *vcpu = arg;
+	struct kvm *kvm = vcpu->kvm;
+
+	/*
+	 * If the guest has GTSE, it may execute tlbie, so do a eieio; tlbsync;
+	 * ptesync sequence on the old CPU before migrating to a new one, in
+	 * case we interrupted the guest between a tlbie ; eieio ;
+	 * tlbsync; ptesync sequence.
+	 *
+	 * Otherwise, ptesync is sufficient for ordering tlbiel sequences.
+	 */
+	if (kvm->arch.lpcr & LPCR_GTSE)
+		asm volatile("eieio; tlbsync; ptesync");
+	else
+		asm volatile("ptesync");
+}
+
+static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
+{
+	struct kvm_nested_guest *nested = vcpu->arch.nested;
+	struct kvm *kvm = vcpu->kvm;
+	int prev_cpu;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return;
+
+	if (nested)
+		prev_cpu = nested->prev_cpu[vcpu->arch.nested_vcpu_id];
+	else
+		prev_cpu = vcpu->arch.prev_cpu;
+
+	/*
+	 * With radix, the guest can do TLB invalidations itself,
+	 * and it could choose to use the local form (tlbiel) if
+	 * it is invalidating a translation that has only ever been
+	 * used on one vcpu.  However, that doesn't mean it has
+	 * only ever been used on one physical cpu, since vcpus
+	 * can move around between pcpus.  To cope with this, when
+	 * a vcpu moves from one pcpu to another, we need to tell
+	 * any vcpus running on the same core as this vcpu previously
+	 * ran to flush the TLB.
+	 */
+	if (prev_cpu != pcpu) {
+		if (prev_cpu >= 0) {
+			if (cpu_first_tlb_thread_sibling(prev_cpu) !=
+			    cpu_first_tlb_thread_sibling(pcpu))
+				radix_flush_cpu(kvm, prev_cpu, vcpu);
+
+			smp_call_function_single(prev_cpu,
+					do_migrate_away_vcpu, vcpu, 1);
+		}
+		if (nested)
+			nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
+		else
+			vcpu->arch.prev_cpu = pcpu;
+	}
+}
+
+static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
+{
+	int cpu;
+	struct paca_struct *tpaca;
+
+	cpu = vc->pcpu;
+	if (vcpu) {
+		if (vcpu->arch.timer_running) {
+			hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+			vcpu->arch.timer_running = 0;
+		}
+		cpu += vcpu->arch.ptid;
+		vcpu->cpu = vc->pcpu;
+		vcpu->arch.thread_cpu = cpu;
+	}
+	tpaca = paca_ptrs[cpu];
+	tpaca->kvm_hstate.kvm_vcpu = vcpu;
+	tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
+	tpaca->kvm_hstate.fake_suspend = 0;
+	/* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
+	smp_wmb();
+	tpaca->kvm_hstate.kvm_vcore = vc;
+	if (cpu != smp_processor_id())
+		kvmppc_ipi_thread(cpu);
+}
+
+static void kvmppc_wait_for_nap(int n_threads)
+{
+	int cpu = smp_processor_id();
+	int i, loops;
+
+	if (n_threads <= 1)
+		return;
+	for (loops = 0; loops < 1000000; ++loops) {
+		/*
+		 * Check if all threads are finished.
+		 * We set the vcore pointer when starting a thread
+		 * and the thread clears it when finished, so we look
+		 * for any threads that still have a non-NULL vcore ptr.
+		 */
+		for (i = 1; i < n_threads; ++i)
+			if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore)
+				break;
+		if (i == n_threads) {
+			HMT_medium();
+			return;
+		}
+		HMT_low();
+	}
+	HMT_medium();
+	for (i = 1; i < n_threads; ++i)
+		if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore)
+			pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
+}
+
+/*
+ * Check that we are on thread 0 and that any other threads in
+ * this core are off-line.  Then grab the threads so they can't
+ * enter the kernel.
+ */
+static int on_primary_thread(void)
+{
+	int cpu = smp_processor_id();
+	int thr;
+
+	/* Are we on a primary subcore? */
+	if (cpu_thread_in_subcore(cpu))
+		return 0;
+
+	thr = 0;
+	while (++thr < threads_per_subcore)
+		if (cpu_online(cpu + thr))
+			return 0;
+
+	/* Grab all hw threads so they can't go into the kernel */
+	for (thr = 1; thr < threads_per_subcore; ++thr) {
+		if (kvmppc_grab_hwthread(cpu + thr)) {
+			/* Couldn't grab one; let the others go */
+			do {
+				kvmppc_release_hwthread(cpu + thr);
+			} while (--thr > 0);
+			return 0;
+		}
+	}
+	return 1;
+}
+
+/*
+ * A list of virtual cores for each physical CPU.
+ * These are vcores that could run but their runner VCPU tasks are
+ * (or may be) preempted.
+ */
+struct preempted_vcore_list {
+	struct list_head	list;
+	spinlock_t		lock;
+};
+
+static DEFINE_PER_CPU(struct preempted_vcore_list, preempted_vcores);
+
+static void init_vcore_lists(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct preempted_vcore_list *lp = &per_cpu(preempted_vcores, cpu);
+		spin_lock_init(&lp->lock);
+		INIT_LIST_HEAD(&lp->list);
+	}
+}
+
+static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
+{
+	struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
+
+	WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
+	vc->vcore_state = VCORE_PREEMPT;
+	vc->pcpu = smp_processor_id();
+	if (vc->num_threads < threads_per_vcore(vc->kvm)) {
+		spin_lock(&lp->lock);
+		list_add_tail(&vc->preempt_list, &lp->list);
+		spin_unlock(&lp->lock);
+	}
+
+	/* Start accumulating stolen time */
+	kvmppc_core_start_stolen(vc, mftb());
+}
+
+static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
+{
+	struct preempted_vcore_list *lp;
+
+	WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
+	kvmppc_core_end_stolen(vc, mftb());
+	if (!list_empty(&vc->preempt_list)) {
+		lp = &per_cpu(preempted_vcores, vc->pcpu);
+		spin_lock(&lp->lock);
+		list_del_init(&vc->preempt_list);
+		spin_unlock(&lp->lock);
+	}
+	vc->vcore_state = VCORE_INACTIVE;
+}
+
+/*
+ * This stores information about the virtual cores currently
+ * assigned to a physical core.
+ */
+struct core_info {
+	int		n_subcores;
+	int		max_subcore_threads;
+	int		total_threads;
+	int		subcore_threads[MAX_SUBCORES];
+	struct kvmppc_vcore *vc[MAX_SUBCORES];
+};
+
+/*
+ * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7
+ * respectively in 2-way micro-threading (split-core) mode on POWER8.
+ */
+static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
+
+static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
+{
+	memset(cip, 0, sizeof(*cip));
+	cip->n_subcores = 1;
+	cip->max_subcore_threads = vc->num_threads;
+	cip->total_threads = vc->num_threads;
+	cip->subcore_threads[0] = vc->num_threads;
+	cip->vc[0] = vc;
+}
+
+static bool subcore_config_ok(int n_subcores, int n_threads)
+{
+	/*
+	 * POWER9 "SMT4" cores are permanently in what is effectively a 4-way
+	 * split-core mode, with one thread per subcore.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		return n_subcores <= 4 && n_threads == 1;
+
+	/* On POWER8, can only dynamically split if unsplit to begin with */
+	if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
+		return false;
+	if (n_subcores > MAX_SUBCORES)
+		return false;
+	if (n_subcores > 1) {
+		if (!(dynamic_mt_modes & 2))
+			n_subcores = 4;
+		if (n_subcores > 2 && !(dynamic_mt_modes & 4))
+			return false;
+	}
+
+	return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
+}
+
+static void init_vcore_to_run(struct kvmppc_vcore *vc)
+{
+	vc->entry_exit_map = 0;
+	vc->in_guest = 0;
+	vc->napping_threads = 0;
+	vc->conferring_threads = 0;
+	vc->tb_offset_applied = 0;
+}
+
+static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
+{
+	int n_threads = vc->num_threads;
+	int sub;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return false;
+
+	/* In one_vm_per_core mode, require all vcores to be from the same vm */
+	if (one_vm_per_core && vc->kvm != cip->vc[0]->kvm)
+		return false;
+
+	if (n_threads < cip->max_subcore_threads)
+		n_threads = cip->max_subcore_threads;
+	if (!subcore_config_ok(cip->n_subcores + 1, n_threads))
+		return false;
+	cip->max_subcore_threads = n_threads;
+
+	sub = cip->n_subcores;
+	++cip->n_subcores;
+	cip->total_threads += vc->num_threads;
+	cip->subcore_threads[sub] = vc->num_threads;
+	cip->vc[sub] = vc;
+	init_vcore_to_run(vc);
+	list_del_init(&vc->preempt_list);
+
+	return true;
+}
+
+/*
+ * Work out whether it is possible to piggyback the execution of
+ * vcore *pvc onto the execution of the other vcores described in *cip.
+ */
+static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
+			  int target_threads)
+{
+	if (cip->total_threads + pvc->num_threads > target_threads)
+		return false;
+
+	return can_dynamic_split(pvc, cip);
+}
+
+static void prepare_threads(struct kvmppc_vcore *vc)
+{
+	int i;
+	struct kvm_vcpu *vcpu;
+
+	for_each_runnable_thread(i, vcpu, vc) {
+		if (signal_pending(vcpu->arch.run_task))
+			vcpu->arch.ret = -EINTR;
+		else if (vcpu->arch.vpa.update_pending ||
+			 vcpu->arch.slb_shadow.update_pending ||
+			 vcpu->arch.dtl.update_pending)
+			vcpu->arch.ret = RESUME_GUEST;
+		else
+			continue;
+		kvmppc_remove_runnable(vc, vcpu, mftb());
+		wake_up(&vcpu->arch.cpu_run);
+	}
+}
+
+static void collect_piggybacks(struct core_info *cip, int target_threads)
+{
+	struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
+	struct kvmppc_vcore *pvc, *vcnext;
+
+	spin_lock(&lp->lock);
+	list_for_each_entry_safe(pvc, vcnext, &lp->list, preempt_list) {
+		if (!spin_trylock(&pvc->lock))
+			continue;
+		prepare_threads(pvc);
+		if (!pvc->n_runnable || !pvc->kvm->arch.mmu_ready) {
+			list_del_init(&pvc->preempt_list);
+			if (pvc->runner == NULL) {
+				pvc->vcore_state = VCORE_INACTIVE;
+				kvmppc_core_end_stolen(pvc, mftb());
+			}
+			spin_unlock(&pvc->lock);
+			continue;
+		}
+		if (!can_piggyback(pvc, cip, target_threads)) {
+			spin_unlock(&pvc->lock);
+			continue;
+		}
+		kvmppc_core_end_stolen(pvc, mftb());
+		pvc->vcore_state = VCORE_PIGGYBACK;
+		if (cip->total_threads >= target_threads)
+			break;
+	}
+	spin_unlock(&lp->lock);
+}
+
+static bool recheck_signals_and_mmu(struct core_info *cip)
+{
+	int sub, i;
+	struct kvm_vcpu *vcpu;
+	struct kvmppc_vcore *vc;
+
+	for (sub = 0; sub < cip->n_subcores; ++sub) {
+		vc = cip->vc[sub];
+		if (!vc->kvm->arch.mmu_ready)
+			return true;
+		for_each_runnable_thread(i, vcpu, vc)
+			if (signal_pending(vcpu->arch.run_task))
+				return true;
+	}
+	return false;
+}
+
+static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
+{
+	int still_running = 0, i;
+	u64 now;
+	long ret;
+	struct kvm_vcpu *vcpu;
+
+	spin_lock(&vc->lock);
+	now = get_tb();
+	for_each_runnable_thread(i, vcpu, vc) {
+		/*
+		 * It's safe to unlock the vcore in the loop here, because
+		 * for_each_runnable_thread() is safe against removal of
+		 * the vcpu, and the vcore state is VCORE_EXITING here,
+		 * so any vcpus becoming runnable will have their arch.trap
+		 * set to zero and can't actually run in the guest.
+		 */
+		spin_unlock(&vc->lock);
+		/* cancel pending dec exception if dec is positive */
+		if (now < kvmppc_dec_expires_host_tb(vcpu) &&
+		    kvmppc_core_pending_dec(vcpu))
+			kvmppc_core_dequeue_dec(vcpu);
+
+		trace_kvm_guest_exit(vcpu);
+
+		ret = RESUME_GUEST;
+		if (vcpu->arch.trap)
+			ret = kvmppc_handle_exit_hv(vcpu,
+						    vcpu->arch.run_task);
+
+		vcpu->arch.ret = ret;
+		vcpu->arch.trap = 0;
+
+		spin_lock(&vc->lock);
+		if (is_kvmppc_resume_guest(vcpu->arch.ret)) {
+			if (vcpu->arch.pending_exceptions)
+				kvmppc_core_prepare_to_enter(vcpu);
+			if (vcpu->arch.ceded)
+				kvmppc_set_timer(vcpu);
+			else
+				++still_running;
+		} else {
+			kvmppc_remove_runnable(vc, vcpu, mftb());
+			wake_up(&vcpu->arch.cpu_run);
+		}
+	}
+	if (!is_master) {
+		if (still_running > 0) {
+			kvmppc_vcore_preempt(vc);
+		} else if (vc->runner) {
+			vc->vcore_state = VCORE_PREEMPT;
+			kvmppc_core_start_stolen(vc, mftb());
+		} else {
+			vc->vcore_state = VCORE_INACTIVE;
+		}
+		if (vc->n_runnable > 0 && vc->runner == NULL) {
+			/* make sure there's a candidate runner awake */
+			i = -1;
+			vcpu = next_runnable_thread(vc, &i);
+			wake_up(&vcpu->arch.cpu_run);
+		}
+	}
+	spin_unlock(&vc->lock);
+}
+
+/*
+ * Clear core from the list of active host cores as we are about to
+ * enter the guest. Only do this if it is the primary thread of the
+ * core (not if a subcore) that is entering the guest.
+ */
+static inline int kvmppc_clear_host_core(unsigned int cpu)
+{
+	int core;
+
+	if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
+		return 0;
+	/*
+	 * Memory barrier can be omitted here as we will do a smp_wmb()
+	 * later in kvmppc_start_thread and we need ensure that state is
+	 * visible to other CPUs only after we enter guest.
+	 */
+	core = cpu >> threads_shift;
+	kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0;
+	return 0;
+}
+
+/*
+ * Advertise this core as an active host core since we exited the guest
+ * Only need to do this if it is the primary thread of the core that is
+ * exiting.
+ */
+static inline int kvmppc_set_host_core(unsigned int cpu)
+{
+	int core;
+
+	if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
+		return 0;
+
+	/*
+	 * Memory barrier can be omitted here because we do a spin_unlock
+	 * immediately after this which provides the memory barrier.
+	 */
+	core = cpu >> threads_shift;
+	kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1;
+	return 0;
+}
+
+static void set_irq_happened(int trap)
+{
+	switch (trap) {
+	case BOOK3S_INTERRUPT_EXTERNAL:
+		local_paca->irq_happened |= PACA_IRQ_EE;
+		break;
+	case BOOK3S_INTERRUPT_H_DOORBELL:
+		local_paca->irq_happened |= PACA_IRQ_DBELL;
+		break;
+	case BOOK3S_INTERRUPT_HMI:
+		local_paca->irq_happened |= PACA_IRQ_HMI;
+		break;
+	case BOOK3S_INTERRUPT_SYSTEM_RESET:
+		replay_system_reset();
+		break;
+	}
+}
+
+/*
+ * Run a set of guest threads on a physical core.
+ * Called with vc->lock held.
+ */
+static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
+{
+	struct kvm_vcpu *vcpu;
+	int i;
+	int srcu_idx;
+	struct core_info core_info;
+	struct kvmppc_vcore *pvc;
+	struct kvm_split_mode split_info, *sip;
+	int split, subcore_size, active;
+	int sub;
+	bool thr0_done;
+	unsigned long cmd_bit, stat_bit;
+	int pcpu, thr;
+	int target_threads;
+	int controlled_threads;
+	int trap;
+	bool is_power8;
+
+	if (WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)))
+		return;
+
+	/*
+	 * Remove from the list any threads that have a signal pending
+	 * or need a VPA update done
+	 */
+	prepare_threads(vc);
+
+	/* if the runner is no longer runnable, let the caller pick a new one */
+	if (vc->runner->arch.state != KVMPPC_VCPU_RUNNABLE)
+		return;
+
+	/*
+	 * Initialize *vc.
+	 */
+	init_vcore_to_run(vc);
+	vc->preempt_tb = TB_NIL;
+
+	/*
+	 * Number of threads that we will be controlling: the same as
+	 * the number of threads per subcore, except on POWER9,
+	 * where it's 1 because the threads are (mostly) independent.
+	 */
+	controlled_threads = threads_per_vcore(vc->kvm);
+
+	/*
+	 * Make sure we are running on primary threads, and that secondary
+	 * threads are offline.  Also check if the number of threads in this
+	 * guest are greater than the current system threads per guest.
+	 */
+	if ((controlled_threads > 1) &&
+	    ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
+		for_each_runnable_thread(i, vcpu, vc) {
+			vcpu->arch.ret = -EBUSY;
+			kvmppc_remove_runnable(vc, vcpu, mftb());
+			wake_up(&vcpu->arch.cpu_run);
+		}
+		goto out;
+	}
+
+	/*
+	 * See if we could run any other vcores on the physical core
+	 * along with this one.
+	 */
+	init_core_info(&core_info, vc);
+	pcpu = smp_processor_id();
+	target_threads = controlled_threads;
+	if (target_smt_mode && target_smt_mode < target_threads)
+		target_threads = target_smt_mode;
+	if (vc->num_threads < target_threads)
+		collect_piggybacks(&core_info, target_threads);
+
+	/*
+	 * Hard-disable interrupts, and check resched flag and signals.
+	 * If we need to reschedule or deliver a signal, clean up
+	 * and return without going into the guest(s).
+	 * If the mmu_ready flag has been cleared, don't go into the
+	 * guest because that means a HPT resize operation is in progress.
+	 */
+	local_irq_disable();
+	hard_irq_disable();
+	if (lazy_irq_pending() || need_resched() ||
+	    recheck_signals_and_mmu(&core_info)) {
+		local_irq_enable();
+		vc->vcore_state = VCORE_INACTIVE;
+		/* Unlock all except the primary vcore */
+		for (sub = 1; sub < core_info.n_subcores; ++sub) {
+			pvc = core_info.vc[sub];
+			/* Put back on to the preempted vcores list */
+			kvmppc_vcore_preempt(pvc);
+			spin_unlock(&pvc->lock);
+		}
+		for (i = 0; i < controlled_threads; ++i)
+			kvmppc_release_hwthread(pcpu + i);
+		return;
+	}
+
+	kvmppc_clear_host_core(pcpu);
+
+	/* Decide on micro-threading (split-core) mode */
+	subcore_size = threads_per_subcore;
+	cmd_bit = stat_bit = 0;
+	split = core_info.n_subcores;
+	sip = NULL;
+	is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S);
+
+	if (split > 1) {
+		sip = &split_info;
+		memset(&split_info, 0, sizeof(split_info));
+		for (sub = 0; sub < core_info.n_subcores; ++sub)
+			split_info.vc[sub] = core_info.vc[sub];
+
+		if (is_power8) {
+			if (split == 2 && (dynamic_mt_modes & 2)) {
+				cmd_bit = HID0_POWER8_1TO2LPAR;
+				stat_bit = HID0_POWER8_2LPARMODE;
+			} else {
+				split = 4;
+				cmd_bit = HID0_POWER8_1TO4LPAR;
+				stat_bit = HID0_POWER8_4LPARMODE;
+			}
+			subcore_size = MAX_SMT_THREADS / split;
+			split_info.rpr = mfspr(SPRN_RPR);
+			split_info.pmmar = mfspr(SPRN_PMMAR);
+			split_info.ldbar = mfspr(SPRN_LDBAR);
+			split_info.subcore_size = subcore_size;
+		} else {
+			split_info.subcore_size = 1;
+		}
+
+		/* order writes to split_info before kvm_split_mode pointer */
+		smp_wmb();
+	}
+
+	for (thr = 0; thr < controlled_threads; ++thr) {
+		struct paca_struct *paca = paca_ptrs[pcpu + thr];
+
+		paca->kvm_hstate.napping = 0;
+		paca->kvm_hstate.kvm_split_mode = sip;
+	}
+
+	/* Initiate micro-threading (split-core) on POWER8 if required */
+	if (cmd_bit) {
+		unsigned long hid0 = mfspr(SPRN_HID0);
+
+		hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS;
+		mb();
+		mtspr(SPRN_HID0, hid0);
+		isync();
+		for (;;) {
+			hid0 = mfspr(SPRN_HID0);
+			if (hid0 & stat_bit)
+				break;
+			cpu_relax();
+		}
+	}
+
+	/*
+	 * On POWER8, set RWMR register.
+	 * Since it only affects PURR and SPURR, it doesn't affect
+	 * the host, so we don't save/restore the host value.
+	 */
+	if (is_power8) {
+		unsigned long rwmr_val = RWMR_RPA_P8_8THREAD;
+		int n_online = atomic_read(&vc->online_count);
+
+		/*
+		 * Use the 8-thread value if we're doing split-core
+		 * or if the vcore's online count looks bogus.
+		 */
+		if (split == 1 && threads_per_subcore == MAX_SMT_THREADS &&
+		    n_online >= 1 && n_online <= MAX_SMT_THREADS)
+			rwmr_val = p8_rwmr_values[n_online];
+		mtspr(SPRN_RWMR, rwmr_val);
+	}
+
+	/* Start all the threads */
+	active = 0;
+	for (sub = 0; sub < core_info.n_subcores; ++sub) {
+		thr = is_power8 ? subcore_thread_map[sub] : sub;
+		thr0_done = false;
+		active |= 1 << thr;
+		pvc = core_info.vc[sub];
+		pvc->pcpu = pcpu + thr;
+		for_each_runnable_thread(i, vcpu, pvc) {
+			/*
+			 * XXX: is kvmppc_start_thread called too late here?
+			 * It updates vcpu->cpu and vcpu->arch.thread_cpu
+			 * which are used by kvmppc_fast_vcpu_kick_hv(), but
+			 * kick is called after new exceptions become available
+			 * and exceptions are checked earlier than here, by
+			 * kvmppc_core_prepare_to_enter.
+			 */
+			kvmppc_start_thread(vcpu, pvc);
+			kvmppc_update_vpa_dispatch(vcpu, pvc);
+			trace_kvm_guest_enter(vcpu);
+			if (!vcpu->arch.ptid)
+				thr0_done = true;
+			active |= 1 << (thr + vcpu->arch.ptid);
+		}
+		/*
+		 * We need to start the first thread of each subcore
+		 * even if it doesn't have a vcpu.
+		 */
+		if (!thr0_done)
+			kvmppc_start_thread(NULL, pvc);
+	}
+
+	/*
+	 * Ensure that split_info.do_nap is set after setting
+	 * the vcore pointer in the PACA of the secondaries.
+	 */
+	smp_mb();
+
+	/*
+	 * When doing micro-threading, poke the inactive threads as well.
+	 * This gets them to the nap instruction after kvm_do_nap,
+	 * which reduces the time taken to unsplit later.
+	 */
+	if (cmd_bit) {
+		split_info.do_nap = 1;	/* ask secondaries to nap when done */
+		for (thr = 1; thr < threads_per_subcore; ++thr)
+			if (!(active & (1 << thr)))
+				kvmppc_ipi_thread(pcpu + thr);
+	}
+
+	vc->vcore_state = VCORE_RUNNING;
+	preempt_disable();
+
+	trace_kvmppc_run_core(vc, 0);
+
+	for (sub = 0; sub < core_info.n_subcores; ++sub)
+		spin_unlock(&core_info.vc[sub]->lock);
+
+	guest_timing_enter_irqoff();
+
+	srcu_idx = srcu_read_lock(&vc->kvm->srcu);
+
+	guest_state_enter_irqoff();
+	this_cpu_disable_ftrace();
+
+	trap = __kvmppc_vcore_entry();
+
+	this_cpu_enable_ftrace();
+	guest_state_exit_irqoff();
+
+	srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
+
+	set_irq_happened(trap);
+
+	spin_lock(&vc->lock);
+	/* prevent other vcpu threads from doing kvmppc_start_thread() now */
+	vc->vcore_state = VCORE_EXITING;
+
+	/* wait for secondary threads to finish writing their state to memory */
+	kvmppc_wait_for_nap(controlled_threads);
+
+	/* Return to whole-core mode if we split the core earlier */
+	if (cmd_bit) {
+		unsigned long hid0 = mfspr(SPRN_HID0);
+		unsigned long loops = 0;
+
+		hid0 &= ~HID0_POWER8_DYNLPARDIS;
+		stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
+		mb();
+		mtspr(SPRN_HID0, hid0);
+		isync();
+		for (;;) {
+			hid0 = mfspr(SPRN_HID0);
+			if (!(hid0 & stat_bit))
+				break;
+			cpu_relax();
+			++loops;
+		}
+		split_info.do_nap = 0;
+	}
+
+	kvmppc_set_host_core(pcpu);
+
+	if (!vtime_accounting_enabled_this_cpu()) {
+		local_irq_enable();
+		/*
+		 * Service IRQs here before guest_timing_exit_irqoff() so any
+		 * ticks that occurred while running the guest are accounted to
+		 * the guest. If vtime accounting is enabled, accounting uses
+		 * TB rather than ticks, so it can be done without enabling
+		 * interrupts here, which has the problem that it accounts
+		 * interrupt processing overhead to the host.
+		 */
+		local_irq_disable();
+	}
+	guest_timing_exit_irqoff();
+
+	local_irq_enable();
+
+	/* Let secondaries go back to the offline loop */
+	for (i = 0; i < controlled_threads; ++i) {
+		kvmppc_release_hwthread(pcpu + i);
+		if (sip && sip->napped[i])
+			kvmppc_ipi_thread(pcpu + i);
+	}
+
+	spin_unlock(&vc->lock);
+
+	/* make sure updates to secondary vcpu structs are visible now */
+	smp_mb();
+
+	preempt_enable();
+
+	for (sub = 0; sub < core_info.n_subcores; ++sub) {
+		pvc = core_info.vc[sub];
+		post_guest_process(pvc, pvc == vc);
+	}
+
+	spin_lock(&vc->lock);
+
+ out:
+	vc->vcore_state = VCORE_INACTIVE;
+	trace_kvmppc_run_core(vc, 1);
+}
+
+static inline bool hcall_is_xics(unsigned long req)
+{
+	return req == H_EOI || req == H_CPPR || req == H_IPI ||
+		req == H_IPOLL || req == H_XIRR || req == H_XIRR_X;
+}
+
+static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu)
+{
+	struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+	if (lp) {
+		u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
+		lp->yield_count = cpu_to_be32(yield_count);
+		vcpu->arch.vpa.dirty = 1;
+	}
+}
+
+/* call our hypervisor to load up HV regs and go */
+static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	unsigned long host_psscr;
+	unsigned long msr;
+	struct hv_guest_state hvregs;
+	struct p9_host_os_sprs host_os_sprs;
+	s64 dec;
+	int trap;
+
+	msr = mfmsr();
+
+	save_p9_host_os_sprs(&host_os_sprs);
+
+	/*
+	 * We need to save and restore the guest visible part of the
+	 * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
+	 * doesn't do this for us. Note only required if pseries since
+	 * this is done in kvmhv_vcpu_entry_p9() below otherwise.
+	 */
+	host_psscr = mfspr(SPRN_PSSCR_PR);
+
+	kvmppc_msr_hard_disable_set_facilities(vcpu, msr);
+	if (lazy_irq_pending())
+		return 0;
+
+	if (unlikely(load_vcpu_state(vcpu, &host_os_sprs)))
+		msr = mfmsr(); /* TM restore can update msr */
+
+	if (vcpu->arch.psscr != host_psscr)
+		mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
+
+	kvmhv_save_hv_regs(vcpu, &hvregs);
+	hvregs.lpcr = lpcr;
+	hvregs.amor = ~0;
+	vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
+	hvregs.version = HV_GUEST_STATE_VERSION;
+	if (vcpu->arch.nested) {
+		hvregs.lpid = vcpu->arch.nested->shadow_lpid;
+		hvregs.vcpu_token = vcpu->arch.nested_vcpu_id;
+	} else {
+		hvregs.lpid = vcpu->kvm->arch.lpid;
+		hvregs.vcpu_token = vcpu->vcpu_id;
+	}
+	hvregs.hdec_expiry = time_limit;
+
+	/*
+	 * When setting DEC, we must always deal with irq_work_raise
+	 * via NMI vs setting DEC. The problem occurs right as we
+	 * switch into guest mode if a NMI hits and sets pending work
+	 * and sets DEC, then that will apply to the guest and not
+	 * bring us back to the host.
+	 *
+	 * irq_work_raise could check a flag (or possibly LPCR[HDICE]
+	 * for example) and set HDEC to 1? That wouldn't solve the
+	 * nested hv case which needs to abort the hcall or zero the
+	 * time limit.
+	 *
+	 * XXX: Another day's problem.
+	 */
+	mtspr(SPRN_DEC, kvmppc_dec_expires_host_tb(vcpu) - *tb);
+
+	mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+	mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
+	switch_pmu_to_guest(vcpu, &host_os_sprs);
+	accumulate_time(vcpu, &vcpu->arch.in_guest);
+	trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
+				  __pa(&vcpu->arch.regs));
+	accumulate_time(vcpu, &vcpu->arch.guest_exit);
+	kvmhv_restore_hv_return_state(vcpu, &hvregs);
+	switch_pmu_to_host(vcpu, &host_os_sprs);
+	vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
+	vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
+	vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+	vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
+
+	store_vcpu_state(vcpu);
+
+	dec = mfspr(SPRN_DEC);
+	if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
+		dec = (s32) dec;
+	*tb = mftb();
+	vcpu->arch.dec_expires = dec + (*tb + vc->tb_offset);
+
+	timer_rearm_host_dec(*tb);
+
+	restore_p9_host_os_sprs(vcpu, &host_os_sprs);
+	if (vcpu->arch.psscr != host_psscr)
+		mtspr(SPRN_PSSCR_PR, host_psscr);
+
+	return trap;
+}
+
+/*
+ * Guest entry for POWER9 and later CPUs.
+ */
+static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
+			 unsigned long lpcr, u64 *tb)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *nested = vcpu->arch.nested;
+	u64 next_timer;
+	int trap;
+
+	next_timer = timer_get_next_tb();
+	if (*tb >= next_timer)
+		return BOOK3S_INTERRUPT_HV_DECREMENTER;
+	if (next_timer < time_limit)
+		time_limit = next_timer;
+	else if (*tb >= time_limit) /* nested time limit */
+		return BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER;
+
+	vcpu->arch.ceded = 0;
+
+	vcpu_vpa_increment_dispatch(vcpu);
+
+	if (kvmhv_on_pseries()) {
+		trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb);
+
+		/* H_CEDE has to be handled now, not later */
+		if (trap == BOOK3S_INTERRUPT_SYSCALL && !nested &&
+		    kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
+			kvmppc_cede(vcpu);
+			kvmppc_set_gpr(vcpu, 3, 0);
+			trap = 0;
+		}
+
+	} else if (nested) {
+		__this_cpu_write(cpu_in_guest, kvm);
+		trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb);
+		__this_cpu_write(cpu_in_guest, NULL);
+
+	} else {
+		kvmppc_xive_push_vcpu(vcpu);
+
+		__this_cpu_write(cpu_in_guest, kvm);
+		trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb);
+		__this_cpu_write(cpu_in_guest, NULL);
+
+		if (trap == BOOK3S_INTERRUPT_SYSCALL &&
+		    !(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) {
+			unsigned long req = kvmppc_get_gpr(vcpu, 3);
+
+			/*
+			 * XIVE rearm and XICS hcalls must be handled
+			 * before xive context is pulled (is this
+			 * true?)
+			 */
+			if (req == H_CEDE) {
+				/* H_CEDE has to be handled now */
+				kvmppc_cede(vcpu);
+				if (!kvmppc_xive_rearm_escalation(vcpu)) {
+					/*
+					 * Pending escalation so abort
+					 * the cede.
+					 */
+					vcpu->arch.ceded = 0;
+				}
+				kvmppc_set_gpr(vcpu, 3, 0);
+				trap = 0;
+
+			} else if (req == H_ENTER_NESTED) {
+				/*
+				 * L2 should not run with the L1
+				 * context so rearm and pull it.
+				 */
+				if (!kvmppc_xive_rearm_escalation(vcpu)) {
+					/*
+					 * Pending escalation so abort
+					 * H_ENTER_NESTED.
+					 */
+					kvmppc_set_gpr(vcpu, 3, 0);
+					trap = 0;
+				}
+
+			} else if (hcall_is_xics(req)) {
+				int ret;
+
+				ret = kvmppc_xive_xics_hcall(vcpu, req);
+				if (ret != H_TOO_HARD) {
+					kvmppc_set_gpr(vcpu, 3, ret);
+					trap = 0;
+				}
+			}
+		}
+		kvmppc_xive_pull_vcpu(vcpu);
+
+		if (kvm_is_radix(kvm))
+			vcpu->arch.slb_max = 0;
+	}
+
+	vcpu_vpa_increment_dispatch(vcpu);
+
+	return trap;
+}
+
+/*
+ * Wait for some other vcpu thread to execute us, and
+ * wake us up when we need to handle something in the host.
+ */
+static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
+				 struct kvm_vcpu *vcpu, int wait_state)
+{
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
+	if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
+		spin_unlock(&vc->lock);
+		schedule();
+		spin_lock(&vc->lock);
+	}
+	finish_wait(&vcpu->arch.cpu_run, &wait);
+}
+
+static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
+{
+	if (!halt_poll_ns_grow)
+		return;
+
+	vc->halt_poll_ns *= halt_poll_ns_grow;
+	if (vc->halt_poll_ns < halt_poll_ns_grow_start)
+		vc->halt_poll_ns = halt_poll_ns_grow_start;
+}
+
+static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
+{
+	if (halt_poll_ns_shrink == 0)
+		vc->halt_poll_ns = 0;
+	else
+		vc->halt_poll_ns /= halt_poll_ns_shrink;
+}
+
+#ifdef CONFIG_KVM_XICS
+static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
+{
+	if (!xics_on_xive())
+		return false;
+	return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr <
+		vcpu->arch.xive_saved_state.cppr;
+}
+#else
+static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
+{
+	return false;
+}
+#endif /* CONFIG_KVM_XICS */
+
+static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.pending_exceptions || vcpu->arch.prodded ||
+	    kvmppc_doorbell_pending(vcpu) || xive_interrupt_pending(vcpu))
+		return true;
+
+	return false;
+}
+
+static bool kvmppc_vcpu_check_block(struct kvm_vcpu *vcpu)
+{
+	if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
+		return true;
+	return false;
+}
+
+/*
+ * Check to see if any of the runnable vcpus on the vcore have pending
+ * exceptions or are no longer ceded
+ */
+static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
+{
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	for_each_runnable_thread(i, vcpu, vc) {
+		if (kvmppc_vcpu_check_block(vcpu))
+			return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * All the vcpus in this vcore are idle, so wait for a decrementer
+ * or external interrupt to one of the vcpus.  vc->lock is held.
+ */
+static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
+{
+	ktime_t cur, start_poll, start_wait;
+	int do_sleep = 1;
+	u64 block_ns;
+
+	WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
+	/* Poll for pending exceptions and ceded state */
+	cur = start_poll = ktime_get();
+	if (vc->halt_poll_ns) {
+		ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns);
+		++vc->runner->stat.generic.halt_attempted_poll;
+
+		vc->vcore_state = VCORE_POLLING;
+		spin_unlock(&vc->lock);
+
+		do {
+			if (kvmppc_vcore_check_block(vc)) {
+				do_sleep = 0;
+				break;
+			}
+			cur = ktime_get();
+		} while (kvm_vcpu_can_poll(cur, stop));
+
+		spin_lock(&vc->lock);
+		vc->vcore_state = VCORE_INACTIVE;
+
+		if (!do_sleep) {
+			++vc->runner->stat.generic.halt_successful_poll;
+			goto out;
+		}
+	}
+
+	prepare_to_rcuwait(&vc->wait);
+	set_current_state(TASK_INTERRUPTIBLE);
+	if (kvmppc_vcore_check_block(vc)) {
+		finish_rcuwait(&vc->wait);
+		do_sleep = 0;
+		/* If we polled, count this as a successful poll */
+		if (vc->halt_poll_ns)
+			++vc->runner->stat.generic.halt_successful_poll;
+		goto out;
+	}
+
+	start_wait = ktime_get();
+
+	vc->vcore_state = VCORE_SLEEPING;
+	trace_kvmppc_vcore_blocked(vc->runner, 0);
+	spin_unlock(&vc->lock);
+	schedule();
+	finish_rcuwait(&vc->wait);
+	spin_lock(&vc->lock);
+	vc->vcore_state = VCORE_INACTIVE;
+	trace_kvmppc_vcore_blocked(vc->runner, 1);
+	++vc->runner->stat.halt_successful_wait;
+
+	cur = ktime_get();
+
+out:
+	block_ns = ktime_to_ns(cur) - ktime_to_ns(start_poll);
+
+	/* Attribute wait time */
+	if (do_sleep) {
+		vc->runner->stat.generic.halt_wait_ns +=
+			ktime_to_ns(cur) - ktime_to_ns(start_wait);
+		KVM_STATS_LOG_HIST_UPDATE(
+				vc->runner->stat.generic.halt_wait_hist,
+				ktime_to_ns(cur) - ktime_to_ns(start_wait));
+		/* Attribute failed poll time */
+		if (vc->halt_poll_ns) {
+			vc->runner->stat.generic.halt_poll_fail_ns +=
+				ktime_to_ns(start_wait) -
+				ktime_to_ns(start_poll);
+			KVM_STATS_LOG_HIST_UPDATE(
+				vc->runner->stat.generic.halt_poll_fail_hist,
+				ktime_to_ns(start_wait) -
+				ktime_to_ns(start_poll));
+		}
+	} else {
+		/* Attribute successful poll time */
+		if (vc->halt_poll_ns) {
+			vc->runner->stat.generic.halt_poll_success_ns +=
+				ktime_to_ns(cur) -
+				ktime_to_ns(start_poll);
+			KVM_STATS_LOG_HIST_UPDATE(
+				vc->runner->stat.generic.halt_poll_success_hist,
+				ktime_to_ns(cur) - ktime_to_ns(start_poll));
+		}
+	}
+
+	/* Adjust poll time */
+	if (halt_poll_ns) {
+		if (block_ns <= vc->halt_poll_ns)
+			;
+		/* We slept and blocked for longer than the max halt time */
+		else if (vc->halt_poll_ns && block_ns > halt_poll_ns)
+			shrink_halt_poll_ns(vc);
+		/* We slept and our poll time is too small */
+		else if (vc->halt_poll_ns < halt_poll_ns &&
+				block_ns < halt_poll_ns)
+			grow_halt_poll_ns(vc);
+		if (vc->halt_poll_ns > halt_poll_ns)
+			vc->halt_poll_ns = halt_poll_ns;
+	} else
+		vc->halt_poll_ns = 0;
+
+	trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
+}
+
+/*
+ * This never fails for a radix guest, as none of the operations it does
+ * for a radix guest can fail or have a way to report failure.
+ */
+static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
+{
+	int r = 0;
+	struct kvm *kvm = vcpu->kvm;
+
+	mutex_lock(&kvm->arch.mmu_setup_lock);
+	if (!kvm->arch.mmu_ready) {
+		if (!kvm_is_radix(kvm))
+			r = kvmppc_hv_setup_htab_rma(vcpu);
+		if (!r) {
+			if (cpu_has_feature(CPU_FTR_ARCH_300))
+				kvmppc_setup_partition_table(kvm);
+			kvm->arch.mmu_ready = 1;
+		}
+	}
+	mutex_unlock(&kvm->arch.mmu_setup_lock);
+	return r;
+}
+
+static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	int n_ceded, i, r;
+	struct kvmppc_vcore *vc;
+	struct kvm_vcpu *v;
+
+	trace_kvmppc_run_vcpu_enter(vcpu);
+
+	run->exit_reason = 0;
+	vcpu->arch.ret = RESUME_GUEST;
+	vcpu->arch.trap = 0;
+	kvmppc_update_vpas(vcpu);
+
+	/*
+	 * Synchronize with other threads in this virtual core
+	 */
+	vc = vcpu->arch.vcore;
+	spin_lock(&vc->lock);
+	vcpu->arch.ceded = 0;
+	vcpu->arch.run_task = current;
+	vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
+	vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
+	vcpu->arch.busy_preempt = TB_NIL;
+	WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], vcpu);
+	++vc->n_runnable;
+
+	/*
+	 * This happens the first time this is called for a vcpu.
+	 * If the vcore is already running, we may be able to start
+	 * this thread straight away and have it join in.
+	 */
+	if (!signal_pending(current)) {
+		if ((vc->vcore_state == VCORE_PIGGYBACK ||
+		     vc->vcore_state == VCORE_RUNNING) &&
+			   !VCORE_IS_EXITING(vc)) {
+			kvmppc_update_vpa_dispatch(vcpu, vc);
+			kvmppc_start_thread(vcpu, vc);
+			trace_kvm_guest_enter(vcpu);
+		} else if (vc->vcore_state == VCORE_SLEEPING) {
+		        rcuwait_wake_up(&vc->wait);
+		}
+
+	}
+
+	while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
+	       !signal_pending(current)) {
+		/* See if the MMU is ready to go */
+		if (!vcpu->kvm->arch.mmu_ready) {
+			spin_unlock(&vc->lock);
+			r = kvmhv_setup_mmu(vcpu);
+			spin_lock(&vc->lock);
+			if (r) {
+				run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+				run->fail_entry.
+					hardware_entry_failure_reason = 0;
+				vcpu->arch.ret = r;
+				break;
+			}
+		}
+
+		if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
+			kvmppc_vcore_end_preempt(vc);
+
+		if (vc->vcore_state != VCORE_INACTIVE) {
+			kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
+			continue;
+		}
+		for_each_runnable_thread(i, v, vc) {
+			kvmppc_core_prepare_to_enter(v);
+			if (signal_pending(v->arch.run_task)) {
+				kvmppc_remove_runnable(vc, v, mftb());
+				v->stat.signal_exits++;
+				v->run->exit_reason = KVM_EXIT_INTR;
+				v->arch.ret = -EINTR;
+				wake_up(&v->arch.cpu_run);
+			}
+		}
+		if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
+			break;
+		n_ceded = 0;
+		for_each_runnable_thread(i, v, vc) {
+			if (!kvmppc_vcpu_woken(v))
+				n_ceded += v->arch.ceded;
+			else
+				v->arch.ceded = 0;
+		}
+		vc->runner = vcpu;
+		if (n_ceded == vc->n_runnable) {
+			kvmppc_vcore_blocked(vc);
+		} else if (need_resched()) {
+			kvmppc_vcore_preempt(vc);
+			/* Let something else run */
+			cond_resched_lock(&vc->lock);
+			if (vc->vcore_state == VCORE_PREEMPT)
+				kvmppc_vcore_end_preempt(vc);
+		} else {
+			kvmppc_run_core(vc);
+		}
+		vc->runner = NULL;
+	}
+
+	while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
+	       (vc->vcore_state == VCORE_RUNNING ||
+		vc->vcore_state == VCORE_EXITING ||
+		vc->vcore_state == VCORE_PIGGYBACK))
+		kvmppc_wait_for_exec(vc, vcpu, TASK_UNINTERRUPTIBLE);
+
+	if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
+		kvmppc_vcore_end_preempt(vc);
+
+	if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
+		kvmppc_remove_runnable(vc, vcpu, mftb());
+		vcpu->stat.signal_exits++;
+		run->exit_reason = KVM_EXIT_INTR;
+		vcpu->arch.ret = -EINTR;
+	}
+
+	if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
+		/* Wake up some vcpu to run the core */
+		i = -1;
+		v = next_runnable_thread(vc, &i);
+		wake_up(&v->arch.cpu_run);
+	}
+
+	trace_kvmppc_run_vcpu_exit(vcpu);
+	spin_unlock(&vc->lock);
+	return vcpu->arch.ret;
+}
+
+int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
+			  unsigned long lpcr)
+{
+	struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
+	struct kvm_run *run = vcpu->run;
+	int trap, r, pcpu;
+	int srcu_idx;
+	struct kvmppc_vcore *vc;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *nested = vcpu->arch.nested;
+	unsigned long flags;
+	u64 tb;
+
+	trace_kvmppc_run_vcpu_enter(vcpu);
+
+	run->exit_reason = 0;
+	vcpu->arch.ret = RESUME_GUEST;
+	vcpu->arch.trap = 0;
+
+	vc = vcpu->arch.vcore;
+	vcpu->arch.ceded = 0;
+	vcpu->arch.run_task = current;
+	vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
+
+	/* See if the MMU is ready to go */
+	if (unlikely(!kvm->arch.mmu_ready)) {
+		r = kvmhv_setup_mmu(vcpu);
+		if (r) {
+			run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+			run->fail_entry.hardware_entry_failure_reason = 0;
+			vcpu->arch.ret = r;
+			return r;
+		}
+	}
+
+	if (need_resched())
+		cond_resched();
+
+	kvmppc_update_vpas(vcpu);
+
+	preempt_disable();
+	pcpu = smp_processor_id();
+	if (kvm_is_radix(kvm))
+		kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+
+	/* flags save not required, but irq_pmu has no disable/enable API */
+	powerpc_local_irq_pmu_save(flags);
+
+	vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
+
+	if (signal_pending(current))
+		goto sigpend;
+	if (need_resched() || !kvm->arch.mmu_ready)
+		goto out;
+
+	vcpu->cpu = pcpu;
+	vcpu->arch.thread_cpu = pcpu;
+	vc->pcpu = pcpu;
+	local_paca->kvm_hstate.kvm_vcpu = vcpu;
+	local_paca->kvm_hstate.ptid = 0;
+	local_paca->kvm_hstate.fake_suspend = 0;
+
+	/*
+	 * Orders set cpu/thread_cpu vs testing for pending interrupts and
+	 * doorbells below. The other side is when these fields are set vs
+	 * kvmppc_fast_vcpu_kick_hv reading the cpu/thread_cpu fields to
+	 * kick a vCPU to notice the pending interrupt.
+	 */
+	smp_mb();
+
+	if (!nested) {
+		kvmppc_core_prepare_to_enter(vcpu);
+		if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
+			     &vcpu->arch.pending_exceptions) ||
+		    xive_interrupt_pending(vcpu)) {
+			/*
+			 * For nested HV, don't synthesize but always pass MER,
+			 * the L0 will be able to optimise that more
+			 * effectively than manipulating registers directly.
+			 */
+			if (!kvmhv_on_pseries() && (__kvmppc_get_msr_hv(vcpu) & MSR_EE))
+				kvmppc_inject_interrupt_hv(vcpu,
+							   BOOK3S_INTERRUPT_EXTERNAL, 0);
+			else
+				lpcr |= LPCR_MER;
+		}
+	} else if (vcpu->arch.pending_exceptions ||
+		   vcpu->arch.doorbell_request ||
+		   xive_interrupt_pending(vcpu)) {
+		vcpu->arch.ret = RESUME_HOST;
+		goto out;
+	}
+
+	if (vcpu->arch.timer_running) {
+		hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+		vcpu->arch.timer_running = 0;
+	}
+
+	tb = mftb();
+
+	kvmppc_update_vpa_dispatch_p9(vcpu, vc, tb + vc->tb_offset);
+
+	trace_kvm_guest_enter(vcpu);
+
+	guest_timing_enter_irqoff();
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+
+	guest_state_enter_irqoff();
+	this_cpu_disable_ftrace();
+
+	trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr, &tb);
+	vcpu->arch.trap = trap;
+
+	this_cpu_enable_ftrace();
+	guest_state_exit_irqoff();
+
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+	set_irq_happened(trap);
+
+	vcpu->cpu = -1;
+	vcpu->arch.thread_cpu = -1;
+	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+
+	if (!vtime_accounting_enabled_this_cpu()) {
+		powerpc_local_irq_pmu_restore(flags);
+		/*
+		 * Service IRQs here before guest_timing_exit_irqoff() so any
+		 * ticks that occurred while running the guest are accounted to
+		 * the guest. If vtime accounting is enabled, accounting uses
+		 * TB rather than ticks, so it can be done without enabling
+		 * interrupts here, which has the problem that it accounts
+		 * interrupt processing overhead to the host.
+		 */
+		powerpc_local_irq_pmu_save(flags);
+	}
+	guest_timing_exit_irqoff();
+
+	powerpc_local_irq_pmu_restore(flags);
+
+	preempt_enable();
+
+	/*
+	 * cancel pending decrementer exception if DEC is now positive, or if
+	 * entering a nested guest in which case the decrementer is now owned
+	 * by L2 and the L1 decrementer is provided in hdec_expires
+	 */
+	if (kvmppc_core_pending_dec(vcpu) &&
+			((tb < kvmppc_dec_expires_host_tb(vcpu)) ||
+			 (trap == BOOK3S_INTERRUPT_SYSCALL &&
+			  kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED)))
+		kvmppc_core_dequeue_dec(vcpu);
+
+	trace_kvm_guest_exit(vcpu);
+	r = RESUME_GUEST;
+	if (trap) {
+		if (!nested)
+			r = kvmppc_handle_exit_hv(vcpu, current);
+		else
+			r = kvmppc_handle_nested_exit(vcpu);
+	}
+	vcpu->arch.ret = r;
+
+	if (is_kvmppc_resume_guest(r) && !kvmppc_vcpu_check_block(vcpu)) {
+		kvmppc_set_timer(vcpu);
+
+		prepare_to_rcuwait(wait);
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (signal_pending(current)) {
+				vcpu->stat.signal_exits++;
+				run->exit_reason = KVM_EXIT_INTR;
+				vcpu->arch.ret = -EINTR;
+				break;
+			}
+
+			if (kvmppc_vcpu_check_block(vcpu))
+				break;
+
+			trace_kvmppc_vcore_blocked(vcpu, 0);
+			schedule();
+			trace_kvmppc_vcore_blocked(vcpu, 1);
+		}
+		finish_rcuwait(wait);
+	}
+	vcpu->arch.ceded = 0;
+
+ done:
+	trace_kvmppc_run_vcpu_exit(vcpu);
+
+	return vcpu->arch.ret;
+
+ sigpend:
+	vcpu->stat.signal_exits++;
+	run->exit_reason = KVM_EXIT_INTR;
+	vcpu->arch.ret = -EINTR;
+ out:
+	vcpu->cpu = -1;
+	vcpu->arch.thread_cpu = -1;
+	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+	powerpc_local_irq_pmu_restore(flags);
+	preempt_enable();
+	goto done;
+}
+
+static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	int r;
+	int srcu_idx;
+	struct kvm *kvm;
+	unsigned long msr;
+
+	start_timing(vcpu, &vcpu->arch.vcpu_entry);
+
+	if (!vcpu->arch.sane) {
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		return -EINVAL;
+	}
+
+	/* No need to go into the guest when all we'll do is come back out */
+	if (signal_pending(current)) {
+		run->exit_reason = KVM_EXIT_INTR;
+		return -EINTR;
+	}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/*
+	 * Don't allow entry with a suspended transaction, because
+	 * the guest entry/exit code will lose it.
+	 */
+	if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+	    (current->thread.regs->msr & MSR_TM)) {
+		if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
+			run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+			run->fail_entry.hardware_entry_failure_reason = 0;
+			return -EINVAL;
+		}
+	}
+#endif
+
+	/*
+	 * Force online to 1 for the sake of old userspace which doesn't
+	 * set it.
+	 */
+	if (!vcpu->arch.online) {
+		atomic_inc(&vcpu->arch.vcore->online_count);
+		vcpu->arch.online = 1;
+	}
+
+	kvmppc_core_prepare_to_enter(vcpu);
+
+	kvm = vcpu->kvm;
+	atomic_inc(&kvm->arch.vcpus_running);
+	/* Order vcpus_running vs. mmu_ready, see kvmppc_alloc_reset_hpt */
+	smp_mb();
+
+	msr = 0;
+	if (IS_ENABLED(CONFIG_PPC_FPU))
+		msr |= MSR_FP;
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		msr |= MSR_VEC;
+	if (cpu_has_feature(CPU_FTR_VSX))
+		msr |= MSR_VSX;
+	if ((cpu_has_feature(CPU_FTR_TM) ||
+	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
+			(kvmppc_get_hfscr_hv(vcpu) & HFSCR_TM))
+		msr |= MSR_TM;
+	msr = msr_check_and_set(msr);
+
+	kvmppc_save_user_regs();
+
+	kvmppc_save_current_sprs();
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		vcpu->arch.waitp = &vcpu->arch.vcore->wait;
+	vcpu->arch.pgdir = kvm->mm->pgd;
+	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+
+	do {
+		accumulate_time(vcpu, &vcpu->arch.guest_entry);
+		if (cpu_has_feature(CPU_FTR_ARCH_300))
+			r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
+						  vcpu->arch.vcore->lpcr);
+		else
+			r = kvmppc_run_vcpu(vcpu);
+
+		if (run->exit_reason == KVM_EXIT_PAPR_HCALL) {
+			accumulate_time(vcpu, &vcpu->arch.hcall);
+
+			if (WARN_ON_ONCE(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) {
+				/*
+				 * These should have been caught reflected
+				 * into the guest by now. Final sanity check:
+				 * don't allow userspace to execute hcalls in
+				 * the hypervisor.
+				 */
+				r = RESUME_GUEST;
+				continue;
+			}
+			trace_kvm_hcall_enter(vcpu);
+			r = kvmppc_pseries_do_hcall(vcpu);
+			trace_kvm_hcall_exit(vcpu, r);
+			kvmppc_core_prepare_to_enter(vcpu);
+		} else if (r == RESUME_PAGE_FAULT) {
+			accumulate_time(vcpu, &vcpu->arch.pg_fault);
+			srcu_idx = srcu_read_lock(&kvm->srcu);
+			r = kvmppc_book3s_hv_page_fault(vcpu,
+				vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+			srcu_read_unlock(&kvm->srcu, srcu_idx);
+		} else if (r == RESUME_PASSTHROUGH) {
+			if (WARN_ON(xics_on_xive()))
+				r = H_SUCCESS;
+			else
+				r = kvmppc_xics_rm_complete(vcpu, 0);
+		}
+	} while (is_kvmppc_resume_guest(r));
+	accumulate_time(vcpu, &vcpu->arch.vcpu_exit);
+
+	vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
+	atomic_dec(&kvm->arch.vcpus_running);
+
+	srr_regs_clobbered();
+
+	end_timing(vcpu);
+
+	return r;
+}
+
+static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
+				     int shift, int sllp)
+{
+	(*sps)->page_shift = shift;
+	(*sps)->slb_enc = sllp;
+	(*sps)->enc[0].page_shift = shift;
+	(*sps)->enc[0].pte_enc = kvmppc_pgsize_lp_encoding(shift, shift);
+	/*
+	 * Add 16MB MPSS support (may get filtered out by userspace)
+	 */
+	if (shift != 24) {
+		int penc = kvmppc_pgsize_lp_encoding(shift, 24);
+		if (penc != -1) {
+			(*sps)->enc[1].page_shift = 24;
+			(*sps)->enc[1].pte_enc = penc;
+		}
+	}
+	(*sps)++;
+}
+
+static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
+					 struct kvm_ppc_smmu_info *info)
+{
+	struct kvm_ppc_one_seg_page_size *sps;
+
+	/*
+	 * POWER7, POWER8 and POWER9 all support 32 storage keys for data.
+	 * POWER7 doesn't support keys for instruction accesses,
+	 * POWER8 and POWER9 do.
+	 */
+	info->data_keys = 32;
+	info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0;
+
+	/* POWER7, 8 and 9 all have 1T segments and 32-entry SLB */
+	info->flags = KVM_PPC_PAGE_SIZES_REAL | KVM_PPC_1T_SEGMENTS;
+	info->slb_size = 32;
+
+	/* We only support these sizes for now, and no muti-size segments */
+	sps = &info->sps[0];
+	kvmppc_add_seg_page_size(&sps, 12, 0);
+	kvmppc_add_seg_page_size(&sps, 16, SLB_VSID_L | SLB_VSID_LP_01);
+	kvmppc_add_seg_page_size(&sps, 24, SLB_VSID_L);
+
+	/* If running as a nested hypervisor, we don't support HPT guests */
+	if (kvmhv_on_pseries())
+		info->flags |= KVM_PPC_NO_HASH;
+
+	return 0;
+}
+
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
+					 struct kvm_dirty_log *log)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int r;
+	unsigned long n, i;
+	unsigned long *buf, *p;
+	struct kvm_vcpu *vcpu;
+
+	mutex_lock(&kvm->slots_lock);
+
+	r = -EINVAL;
+	if (log->slot >= KVM_USER_MEM_SLOTS)
+		goto out;
+
+	slots = kvm_memslots(kvm);
+	memslot = id_to_memslot(slots, log->slot);
+	r = -ENOENT;
+	if (!memslot || !memslot->dirty_bitmap)
+		goto out;
+
+	/*
+	 * Use second half of bitmap area because both HPT and radix
+	 * accumulate bits in the first half.
+	 */
+	n = kvm_dirty_bitmap_bytes(memslot);
+	buf = memslot->dirty_bitmap + n / sizeof(long);
+	memset(buf, 0, n);
+
+	if (kvm_is_radix(kvm))
+		r = kvmppc_hv_get_dirty_log_radix(kvm, memslot, buf);
+	else
+		r = kvmppc_hv_get_dirty_log_hpt(kvm, memslot, buf);
+	if (r)
+		goto out;
+
+	/*
+	 * We accumulate dirty bits in the first half of the
+	 * memslot's dirty_bitmap area, for when pages are paged
+	 * out or modified by the host directly.  Pick up these
+	 * bits and add them to the map.
+	 */
+	p = memslot->dirty_bitmap;
+	for (i = 0; i < n / sizeof(long); ++i)
+		buf[i] |= xchg(&p[i], 0);
+
+	/* Harvest dirty bits from VPA and DTL updates */
+	/* Note: we never modify the SLB shadow buffer areas */
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		spin_lock(&vcpu->arch.vpa_update_lock);
+		kvmppc_harvest_vpa_dirty(&vcpu->arch.vpa, memslot, buf);
+		kvmppc_harvest_vpa_dirty(&vcpu->arch.dtl, memslot, buf);
+		spin_unlock(&vcpu->arch.vpa_update_lock);
+	}
+
+	r = -EFAULT;
+	if (copy_to_user(log->dirty_bitmap, buf, n))
+		goto out;
+
+	r = 0;
+out:
+	mutex_unlock(&kvm->slots_lock);
+	return r;
+}
+
+static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *slot)
+{
+	vfree(slot->arch.rmap);
+	slot->arch.rmap = NULL;
+}
+
+static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
+				const struct kvm_memory_slot *old,
+				struct kvm_memory_slot *new,
+				enum kvm_mr_change change)
+{
+	if (change == KVM_MR_CREATE) {
+		unsigned long size = array_size(new->npages, sizeof(*new->arch.rmap));
+
+		if ((size >> PAGE_SHIFT) > totalram_pages())
+			return -ENOMEM;
+
+		new->arch.rmap = vzalloc(size);
+		if (!new->arch.rmap)
+			return -ENOMEM;
+	} else if (change != KVM_MR_DELETE) {
+		new->arch.rmap = old->arch.rmap;
+	}
+
+	return 0;
+}
+
+static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
+				struct kvm_memory_slot *old,
+				const struct kvm_memory_slot *new,
+				enum kvm_mr_change change)
+{
+	/*
+	 * If we are creating or modifying a memslot, it might make
+	 * some address that was previously cached as emulated
+	 * MMIO be no longer emulated MMIO, so invalidate
+	 * all the caches of emulated MMIO translations.
+	 */
+	if (change != KVM_MR_DELETE)
+		atomic64_inc(&kvm->arch.mmio_update);
+
+	/*
+	 * For change == KVM_MR_MOVE or KVM_MR_DELETE, higher levels
+	 * have already called kvm_arch_flush_shadow_memslot() to
+	 * flush shadow mappings.  For KVM_MR_CREATE we have no
+	 * previous mappings.  So the only case to handle is
+	 * KVM_MR_FLAGS_ONLY when the KVM_MEM_LOG_DIRTY_PAGES bit
+	 * has been changed.
+	 * For radix guests, we flush on setting KVM_MEM_LOG_DIRTY_PAGES
+	 * to get rid of any THP PTEs in the partition-scoped page tables
+	 * so we can track dirtiness at the page level; we flush when
+	 * clearing KVM_MEM_LOG_DIRTY_PAGES so that we can go back to
+	 * using THP PTEs.
+	 */
+	if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) &&
+	    ((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES))
+		kvmppc_radix_flush_memslot(kvm, old);
+	/*
+	 * If UV hasn't yet called H_SVM_INIT_START, don't register memslots.
+	 */
+	if (!kvm->arch.secure_guest)
+		return;
+
+	switch (change) {
+	case KVM_MR_CREATE:
+		/*
+		 * @TODO kvmppc_uvmem_memslot_create() can fail and
+		 * return error. Fix this.
+		 */
+		kvmppc_uvmem_memslot_create(kvm, new);
+		break;
+	case KVM_MR_DELETE:
+		kvmppc_uvmem_memslot_delete(kvm, old);
+		break;
+	default:
+		/* TODO: Handle KVM_MR_MOVE */
+		break;
+	}
+}
+
+/*
+ * Update LPCR values in kvm->arch and in vcores.
+ * Caller must hold kvm->arch.mmu_setup_lock (for mutual exclusion
+ * of kvm->arch.lpcr update).
+ */
+void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
+{
+	long int i;
+	u32 cores_done = 0;
+
+	if ((kvm->arch.lpcr & mask) == lpcr)
+		return;
+
+	kvm->arch.lpcr = (kvm->arch.lpcr & ~mask) | lpcr;
+
+	for (i = 0; i < KVM_MAX_VCORES; ++i) {
+		struct kvmppc_vcore *vc = kvm->arch.vcores[i];
+		if (!vc)
+			continue;
+
+		spin_lock(&vc->lock);
+		vc->lpcr = (vc->lpcr & ~mask) | lpcr;
+		verify_lpcr(kvm, vc->lpcr);
+		spin_unlock(&vc->lock);
+		if (++cores_done >= kvm->arch.online_vcores)
+			break;
+	}
+}
+
+void kvmppc_setup_partition_table(struct kvm *kvm)
+{
+	unsigned long dw0, dw1;
+
+	if (!kvm_is_radix(kvm)) {
+		/* PS field - page size for VRMA */
+		dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) |
+			((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1);
+		/* HTABSIZE and HTABORG fields */
+		dw0 |= kvm->arch.sdr1;
+
+		/* Second dword as set by userspace */
+		dw1 = kvm->arch.process_table;
+	} else {
+		dw0 = PATB_HR | radix__get_tree_size() |
+			__pa(kvm->arch.pgtable) | RADIX_PGD_INDEX_SIZE;
+		dw1 = PATB_GR | kvm->arch.process_table;
+	}
+	kvmhv_set_ptbl_entry(kvm->arch.lpid, dw0, dw1);
+}
+
+/*
+ * Set up HPT (hashed page table) and RMA (real-mode area).
+ * Must be called with kvm->arch.mmu_setup_lock held.
+ */
+static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
+{
+	int err = 0;
+	struct kvm *kvm = vcpu->kvm;
+	unsigned long hva;
+	struct kvm_memory_slot *memslot;
+	struct vm_area_struct *vma;
+	unsigned long lpcr = 0, senc;
+	unsigned long psize, porder;
+	int srcu_idx;
+
+	/* Allocate hashed page table (if not done already) and reset it */
+	if (!kvm->arch.hpt.virt) {
+		int order = KVM_DEFAULT_HPT_ORDER;
+		struct kvm_hpt_info info;
+
+		err = kvmppc_allocate_hpt(&info, order);
+		/* If we get here, it means userspace didn't specify a
+		 * size explicitly.  So, try successively smaller
+		 * sizes if the default failed. */
+		while ((err == -ENOMEM) && --order >= PPC_MIN_HPT_ORDER)
+			err  = kvmppc_allocate_hpt(&info, order);
+
+		if (err < 0) {
+			pr_err("KVM: Couldn't alloc HPT\n");
+			goto out;
+		}
+
+		kvmppc_set_hpt(kvm, &info);
+	}
+
+	/* Look up the memslot for guest physical address 0 */
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	memslot = gfn_to_memslot(kvm, 0);
+
+	/* We must have some memory at 0 by now */
+	err = -EINVAL;
+	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+		goto out_srcu;
+
+	/* Look up the VMA for the start of this memory slot */
+	hva = memslot->userspace_addr;
+	mmap_read_lock(kvm->mm);
+	vma = vma_lookup(kvm->mm, hva);
+	if (!vma || (vma->vm_flags & VM_IO))
+		goto up_out;
+
+	psize = vma_kernel_pagesize(vma);
+
+	mmap_read_unlock(kvm->mm);
+
+	/* We can handle 4k, 64k or 16M pages in the VRMA */
+	if (psize >= 0x1000000)
+		psize = 0x1000000;
+	else if (psize >= 0x10000)
+		psize = 0x10000;
+	else
+		psize = 0x1000;
+	porder = __ilog2(psize);
+
+	senc = slb_pgsize_encoding(psize);
+	kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+		(VRMA_VSID << SLB_VSID_SHIFT_1T);
+	/* Create HPTEs in the hash page table for the VRMA */
+	kvmppc_map_vrma(vcpu, memslot, porder);
+
+	/* Update VRMASD field in the LPCR */
+	if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+		/* the -4 is to account for senc values starting at 0x10 */
+		lpcr = senc << (LPCR_VRMASD_SH - 4);
+		kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
+	}
+
+	/* Order updates to kvm->arch.lpcr etc. vs. mmu_ready */
+	smp_wmb();
+	err = 0;
+ out_srcu:
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+ out:
+	return err;
+
+ up_out:
+	mmap_read_unlock(kvm->mm);
+	goto out_srcu;
+}
+
+/*
+ * Must be called with kvm->arch.mmu_setup_lock held and
+ * mmu_ready = 0 and no vcpus running.
+ */
+int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
+{
+	unsigned long lpcr, lpcr_mask;
+
+	if (nesting_enabled(kvm))
+		kvmhv_release_all_nested(kvm);
+	kvmppc_rmap_reset(kvm);
+	kvm->arch.process_table = 0;
+	/* Mutual exclusion with kvm_unmap_gfn_range etc. */
+	spin_lock(&kvm->mmu_lock);
+	kvm->arch.radix = 0;
+	spin_unlock(&kvm->mmu_lock);
+	kvmppc_free_radix(kvm);
+
+	lpcr = LPCR_VPM1;
+	lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		lpcr_mask |= LPCR_HAIL;
+	kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+
+	return 0;
+}
+
+/*
+ * Must be called with kvm->arch.mmu_setup_lock held and
+ * mmu_ready = 0 and no vcpus running.
+ */
+int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
+{
+	unsigned long lpcr, lpcr_mask;
+	int err;
+
+	err = kvmppc_init_vm_radix(kvm);
+	if (err)
+		return err;
+	kvmppc_rmap_reset(kvm);
+	/* Mutual exclusion with kvm_unmap_gfn_range etc. */
+	spin_lock(&kvm->mmu_lock);
+	kvm->arch.radix = 1;
+	spin_unlock(&kvm->mmu_lock);
+	kvmppc_free_hpt(&kvm->arch.hpt);
+
+	lpcr = LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+	lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		lpcr_mask |= LPCR_HAIL;
+		if (cpu_has_feature(CPU_FTR_HVMODE) &&
+				(kvm->arch.host_lpcr & LPCR_HAIL))
+			lpcr |= LPCR_HAIL;
+	}
+	kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+
+	return 0;
+}
+
+#ifdef CONFIG_KVM_XICS
+/*
+ * Allocate a per-core structure for managing state about which cores are
+ * running in the host versus the guest and for exchanging data between
+ * real mode KVM and CPU running in the host.
+ * This is only done for the first VM.
+ * The allocated structure stays even if all VMs have stopped.
+ * It is only freed when the kvm-hv module is unloaded.
+ * It's OK for this routine to fail, we just don't support host
+ * core operations like redirecting H_IPI wakeups.
+ */
+void kvmppc_alloc_host_rm_ops(void)
+{
+	struct kvmppc_host_rm_ops *ops;
+	unsigned long l_ops;
+	int cpu, core;
+	int size;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		return;
+
+	/* Not the first time here ? */
+	if (kvmppc_host_rm_ops_hv != NULL)
+		return;
+
+	ops = kzalloc(sizeof(struct kvmppc_host_rm_ops), GFP_KERNEL);
+	if (!ops)
+		return;
+
+	size = cpu_nr_cores() * sizeof(struct kvmppc_host_rm_core);
+	ops->rm_core = kzalloc(size, GFP_KERNEL);
+
+	if (!ops->rm_core) {
+		kfree(ops);
+		return;
+	}
+
+	cpus_read_lock();
+
+	for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
+		if (!cpu_online(cpu))
+			continue;
+
+		core = cpu >> threads_shift;
+		ops->rm_core[core].rm_state.in_host = 1;
+	}
+
+	ops->vcpu_kick = kvmppc_fast_vcpu_kick_hv;
+
+	/*
+	 * Make the contents of the kvmppc_host_rm_ops structure visible
+	 * to other CPUs before we assign it to the global variable.
+	 * Do an atomic assignment (no locks used here), but if someone
+	 * beats us to it, just free our copy and return.
+	 */
+	smp_wmb();
+	l_ops = (unsigned long) ops;
+
+	if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
+		cpus_read_unlock();
+		kfree(ops->rm_core);
+		kfree(ops);
+		return;
+	}
+
+	cpuhp_setup_state_nocalls_cpuslocked(CPUHP_KVM_PPC_BOOK3S_PREPARE,
+					     "ppc/kvm_book3s:prepare",
+					     kvmppc_set_host_core,
+					     kvmppc_clear_host_core);
+	cpus_read_unlock();
+}
+
+void kvmppc_free_host_rm_ops(void)
+{
+	if (kvmppc_host_rm_ops_hv) {
+		cpuhp_remove_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE);
+		kfree(kvmppc_host_rm_ops_hv->rm_core);
+		kfree(kvmppc_host_rm_ops_hv);
+		kvmppc_host_rm_ops_hv = NULL;
+	}
+}
+#endif
+
+static int kvmppc_core_init_vm_hv(struct kvm *kvm)
+{
+	unsigned long lpcr, lpid;
+	int ret;
+
+	mutex_init(&kvm->arch.uvmem_lock);
+	INIT_LIST_HEAD(&kvm->arch.uvmem_pfns);
+	mutex_init(&kvm->arch.mmu_setup_lock);
+
+	/* Allocate the guest's logical partition ID */
+
+	lpid = kvmppc_alloc_lpid();
+	if ((long)lpid < 0)
+		return -ENOMEM;
+	kvm->arch.lpid = lpid;
+
+	kvmppc_alloc_host_rm_ops();
+
+	kvmhv_vm_nested_init(kvm);
+
+	/*
+	 * Since we don't flush the TLB when tearing down a VM,
+	 * and this lpid might have previously been used,
+	 * make sure we flush on each core before running the new VM.
+	 * On POWER9, the tlbie in mmu_partition_table_set_entry()
+	 * does this flush for us.
+	 */
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		cpumask_setall(&kvm->arch.need_tlb_flush);
+
+	/* Start out with the default set of hcalls enabled */
+	memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
+	       sizeof(kvm->arch.enabled_hcalls));
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
+
+	/* Init LPCR for virtual RMA mode */
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		kvm->arch.host_lpid = mfspr(SPRN_LPID);
+		kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
+		lpcr &= LPCR_PECE | LPCR_LPES;
+	} else {
+		/*
+		 * The L2 LPES mode will be set by the L0 according to whether
+		 * or not it needs to take external interrupts in HV mode.
+		 */
+		lpcr = 0;
+	}
+	lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
+		LPCR_VPM0 | LPCR_VPM1;
+	kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
+		(VRMA_VSID << SLB_VSID_SHIFT_1T);
+	/* On POWER8 turn on online bit to enable PURR/SPURR */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		lpcr |= LPCR_ONL;
+	/*
+	 * On POWER9, VPM0 bit is reserved (VPM0=1 behaviour is assumed)
+	 * Set HVICE bit to enable hypervisor virtualization interrupts.
+	 * Set HEIC to prevent OS interrupts to go to hypervisor (should
+	 * be unnecessary but better safe than sorry in case we re-enable
+	 * EE in HV mode with this LPCR still set)
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		lpcr &= ~LPCR_VPM0;
+		lpcr |= LPCR_HVICE | LPCR_HEIC;
+
+		/*
+		 * If xive is enabled, we route 0x500 interrupts directly
+		 * to the guest.
+		 */
+		if (xics_on_xive())
+			lpcr |= LPCR_LPES;
+	}
+
+	/*
+	 * If the host uses radix, the guest starts out as radix.
+	 */
+	if (radix_enabled()) {
+		kvm->arch.radix = 1;
+		kvm->arch.mmu_ready = 1;
+		lpcr &= ~LPCR_VPM1;
+		lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+		if (cpu_has_feature(CPU_FTR_HVMODE) &&
+		    cpu_has_feature(CPU_FTR_ARCH_31) &&
+		    (kvm->arch.host_lpcr & LPCR_HAIL))
+			lpcr |= LPCR_HAIL;
+		ret = kvmppc_init_vm_radix(kvm);
+		if (ret) {
+			kvmppc_free_lpid(kvm->arch.lpid);
+			return ret;
+		}
+		kvmppc_setup_partition_table(kvm);
+	}
+
+	verify_lpcr(kvm, lpcr);
+	kvm->arch.lpcr = lpcr;
+
+	/* Initialization for future HPT resizes */
+	kvm->arch.resize_hpt = NULL;
+
+	/*
+	 * Work out how many sets the TLB has, for the use of
+	 * the TLB invalidation loop in book3s_hv_rmhandlers.S.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		/*
+		 * P10 will flush all the congruence class with a single tlbiel
+		 */
+		kvm->arch.tlb_sets = 1;
+	} else if (radix_enabled())
+		kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX;	/* 128 */
+	else if (cpu_has_feature(CPU_FTR_ARCH_300))
+		kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH;	/* 256 */
+	else if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		kvm->arch.tlb_sets = POWER8_TLB_SETS;		/* 512 */
+	else
+		kvm->arch.tlb_sets = POWER7_TLB_SETS;		/* 128 */
+
+	/*
+	 * Track that we now have a HV mode VM active. This blocks secondary
+	 * CPU threads from coming online.
+	 */
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		kvm_hv_vm_activated();
+
+	/*
+	 * Initialize smt_mode depending on processor.
+	 * POWER8 and earlier have to use "strict" threading, where
+	 * all vCPUs in a vcore have to run on the same (sub)core,
+	 * whereas on POWER9 the threads can each run a different
+	 * guest.
+	 */
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		kvm->arch.smt_mode = threads_per_subcore;
+	else
+		kvm->arch.smt_mode = 1;
+	kvm->arch.emul_smt_mode = 1;
+
+	return 0;
+}
+
+static int kvmppc_arch_create_vm_debugfs_hv(struct kvm *kvm)
+{
+	kvmppc_mmu_debugfs_init(kvm);
+	if (radix_enabled())
+		kvmhv_radix_debugfs_init(kvm);
+	return 0;
+}
+
+static void kvmppc_free_vcores(struct kvm *kvm)
+{
+	long int i;
+
+	for (i = 0; i < KVM_MAX_VCORES; ++i)
+		kfree(kvm->arch.vcores[i]);
+	kvm->arch.online_vcores = 0;
+}
+
+static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		kvm_hv_vm_deactivated();
+
+	kvmppc_free_vcores(kvm);
+
+
+	if (kvm_is_radix(kvm))
+		kvmppc_free_radix(kvm);
+	else
+		kvmppc_free_hpt(&kvm->arch.hpt);
+
+	/* Perform global invalidation and return lpid to the pool */
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		if (nesting_enabled(kvm))
+			kvmhv_release_all_nested(kvm);
+		kvm->arch.process_table = 0;
+		if (kvm->arch.secure_guest)
+			uv_svm_terminate(kvm->arch.lpid);
+		kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
+	}
+
+	kvmppc_free_lpid(kvm->arch.lpid);
+
+	kvmppc_free_pimap(kvm);
+}
+
+/* We don't need to emulate any privileged instructions or dcbz */
+static int kvmppc_core_emulate_op_hv(struct kvm_vcpu *vcpu,
+				     unsigned int inst, int *advance)
+{
+	return EMULATE_FAIL;
+}
+
+static int kvmppc_core_emulate_mtspr_hv(struct kvm_vcpu *vcpu, int sprn,
+					ulong spr_val)
+{
+	return EMULATE_FAIL;
+}
+
+static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
+					ulong *spr_val)
+{
+	return EMULATE_FAIL;
+}
+
+static int kvmppc_core_check_processor_compat_hv(void)
+{
+	if (cpu_has_feature(CPU_FTR_HVMODE) &&
+	    cpu_has_feature(CPU_FTR_ARCH_206))
+		return 0;
+
+	/* POWER9 in radix mode is capable of being a nested hypervisor. */
+	if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled())
+		return 0;
+
+	return -EIO;
+}
+
+#ifdef CONFIG_KVM_XICS
+
+void kvmppc_free_pimap(struct kvm *kvm)
+{
+	kfree(kvm->arch.pimap);
+}
+
+static struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(void)
+{
+	return kzalloc(sizeof(struct kvmppc_passthru_irqmap), GFP_KERNEL);
+}
+
+static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
+{
+	struct irq_desc *desc;
+	struct kvmppc_irq_map *irq_map;
+	struct kvmppc_passthru_irqmap *pimap;
+	struct irq_chip *chip;
+	int i, rc = 0;
+	struct irq_data *host_data;
+
+	if (!kvm_irq_bypass)
+		return 1;
+
+	desc = irq_to_desc(host_irq);
+	if (!desc)
+		return -EIO;
+
+	mutex_lock(&kvm->lock);
+
+	pimap = kvm->arch.pimap;
+	if (pimap == NULL) {
+		/* First call, allocate structure to hold IRQ map */
+		pimap = kvmppc_alloc_pimap();
+		if (pimap == NULL) {
+			mutex_unlock(&kvm->lock);
+			return -ENOMEM;
+		}
+		kvm->arch.pimap = pimap;
+	}
+
+	/*
+	 * For now, we only support interrupts for which the EOI operation
+	 * is an OPAL call followed by a write to XIRR, since that's
+	 * what our real-mode EOI code does, or a XIVE interrupt
+	 */
+	chip = irq_data_get_irq_chip(&desc->irq_data);
+	if (!chip || !is_pnv_opal_msi(chip)) {
+		pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
+			host_irq, guest_gsi);
+		mutex_unlock(&kvm->lock);
+		return -ENOENT;
+	}
+
+	/*
+	 * See if we already have an entry for this guest IRQ number.
+	 * If it's mapped to a hardware IRQ number, that's an error,
+	 * otherwise re-use this entry.
+	 */
+	for (i = 0; i < pimap->n_mapped; i++) {
+		if (guest_gsi == pimap->mapped[i].v_hwirq) {
+			if (pimap->mapped[i].r_hwirq) {
+				mutex_unlock(&kvm->lock);
+				return -EINVAL;
+			}
+			break;
+		}
+	}
+
+	if (i == KVMPPC_PIRQ_MAPPED) {
+		mutex_unlock(&kvm->lock);
+		return -EAGAIN;		/* table is full */
+	}
+
+	irq_map = &pimap->mapped[i];
+
+	irq_map->v_hwirq = guest_gsi;
+	irq_map->desc = desc;
+
+	/*
+	 * Order the above two stores before the next to serialize with
+	 * the KVM real mode handler.
+	 */
+	smp_wmb();
+
+	/*
+	 * The 'host_irq' number is mapped in the PCI-MSI domain but
+	 * the underlying calls, which will EOI the interrupt in real
+	 * mode, need an HW IRQ number mapped in the XICS IRQ domain.
+	 */
+	host_data = irq_domain_get_irq_data(irq_get_default_host(), host_irq);
+	irq_map->r_hwirq = (unsigned int)irqd_to_hwirq(host_data);
+
+	if (i == pimap->n_mapped)
+		pimap->n_mapped++;
+
+	if (xics_on_xive())
+		rc = kvmppc_xive_set_mapped(kvm, guest_gsi, host_irq);
+	else
+		kvmppc_xics_set_mapped(kvm, guest_gsi, irq_map->r_hwirq);
+	if (rc)
+		irq_map->r_hwirq = 0;
+
+	mutex_unlock(&kvm->lock);
+
+	return 0;
+}
+
+static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
+{
+	struct irq_desc *desc;
+	struct kvmppc_passthru_irqmap *pimap;
+	int i, rc = 0;
+
+	if (!kvm_irq_bypass)
+		return 0;
+
+	desc = irq_to_desc(host_irq);
+	if (!desc)
+		return -EIO;
+
+	mutex_lock(&kvm->lock);
+	if (!kvm->arch.pimap)
+		goto unlock;
+
+	pimap = kvm->arch.pimap;
+
+	for (i = 0; i < pimap->n_mapped; i++) {
+		if (guest_gsi == pimap->mapped[i].v_hwirq)
+			break;
+	}
+
+	if (i == pimap->n_mapped) {
+		mutex_unlock(&kvm->lock);
+		return -ENODEV;
+	}
+
+	if (xics_on_xive())
+		rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, host_irq);
+	else
+		kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
+
+	/* invalidate the entry (what to do on error from the above ?) */
+	pimap->mapped[i].r_hwirq = 0;
+
+	/*
+	 * We don't free this structure even when the count goes to
+	 * zero. The structure is freed when we destroy the VM.
+	 */
+ unlock:
+	mutex_unlock(&kvm->lock);
+	return rc;
+}
+
+static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons,
+					     struct irq_bypass_producer *prod)
+{
+	int ret = 0;
+	struct kvm_kernel_irqfd *irqfd =
+		container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+	irqfd->producer = prod;
+
+	ret = kvmppc_set_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
+	if (ret)
+		pr_info("kvmppc_set_passthru_irq (irq %d, gsi %d) fails: %d\n",
+			prod->irq, irqfd->gsi, ret);
+
+	return ret;
+}
+
+static void kvmppc_irq_bypass_del_producer_hv(struct irq_bypass_consumer *cons,
+					      struct irq_bypass_producer *prod)
+{
+	int ret;
+	struct kvm_kernel_irqfd *irqfd =
+		container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+	irqfd->producer = NULL;
+
+	/*
+	 * When producer of consumer is unregistered, we change back to
+	 * default external interrupt handling mode - KVM real mode
+	 * will switch back to host.
+	 */
+	ret = kvmppc_clr_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
+	if (ret)
+		pr_warn("kvmppc_clr_passthru_irq (irq %d, gsi %d) fails: %d\n",
+			prod->irq, irqfd->gsi, ret);
+}
+#endif
+
+static int kvm_arch_vm_ioctl_hv(struct file *filp,
+				unsigned int ioctl, unsigned long arg)
+{
+	struct kvm *kvm __maybe_unused = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	int r;
+
+	switch (ioctl) {
+
+	case KVM_PPC_ALLOCATE_HTAB: {
+		u32 htab_order;
+
+		/* If we're a nested hypervisor, we currently only support radix */
+		if (kvmhv_on_pseries()) {
+			r = -EOPNOTSUPP;
+			break;
+		}
+
+		r = -EFAULT;
+		if (get_user(htab_order, (u32 __user *)argp))
+			break;
+		r = kvmppc_alloc_reset_hpt(kvm, htab_order);
+		if (r)
+			break;
+		r = 0;
+		break;
+	}
+
+	case KVM_PPC_GET_HTAB_FD: {
+		struct kvm_get_htab_fd ghf;
+
+		r = -EFAULT;
+		if (copy_from_user(&ghf, argp, sizeof(ghf)))
+			break;
+		r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
+		break;
+	}
+
+	case KVM_PPC_RESIZE_HPT_PREPARE: {
+		struct kvm_ppc_resize_hpt rhpt;
+
+		r = -EFAULT;
+		if (copy_from_user(&rhpt, argp, sizeof(rhpt)))
+			break;
+
+		r = kvm_vm_ioctl_resize_hpt_prepare(kvm, &rhpt);
+		break;
+	}
+
+	case KVM_PPC_RESIZE_HPT_COMMIT: {
+		struct kvm_ppc_resize_hpt rhpt;
+
+		r = -EFAULT;
+		if (copy_from_user(&rhpt, argp, sizeof(rhpt)))
+			break;
+
+		r = kvm_vm_ioctl_resize_hpt_commit(kvm, &rhpt);
+		break;
+	}
+
+	default:
+		r = -ENOTTY;
+	}
+
+	return r;
+}
+
+/*
+ * List of hcall numbers to enable by default.
+ * For compatibility with old userspace, we enable by default
+ * all hcalls that were implemented before the hcall-enabling
+ * facility was added.  Note this list should not include H_RTAS.
+ */
+static unsigned int default_hcall_list[] = {
+	H_REMOVE,
+	H_ENTER,
+	H_READ,
+	H_PROTECT,
+	H_BULK_REMOVE,
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+	H_GET_TCE,
+	H_PUT_TCE,
+#endif
+	H_SET_DABR,
+	H_SET_XDABR,
+	H_CEDE,
+	H_PROD,
+	H_CONFER,
+	H_REGISTER_VPA,
+#ifdef CONFIG_KVM_XICS
+	H_EOI,
+	H_CPPR,
+	H_IPI,
+	H_IPOLL,
+	H_XIRR,
+	H_XIRR_X,
+#endif
+	0
+};
+
+static void init_default_hcalls(void)
+{
+	int i;
+	unsigned int hcall;
+
+	for (i = 0; default_hcall_list[i]; ++i) {
+		hcall = default_hcall_list[i];
+		WARN_ON(!kvmppc_hcall_impl_hv(hcall));
+		__set_bit(hcall / 4, default_enabled_hcalls);
+	}
+}
+
+static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
+{
+	unsigned long lpcr;
+	int radix;
+	int err;
+
+	/* If not on a POWER9, reject it */
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		return -ENODEV;
+
+	/* If any unknown flags set, reject it */
+	if (cfg->flags & ~(KVM_PPC_MMUV3_RADIX | KVM_PPC_MMUV3_GTSE))
+		return -EINVAL;
+
+	/* GR (guest radix) bit in process_table field must match */
+	radix = !!(cfg->flags & KVM_PPC_MMUV3_RADIX);
+	if (!!(cfg->process_table & PATB_GR) != radix)
+		return -EINVAL;
+
+	/* Process table size field must be reasonable, i.e. <= 24 */
+	if ((cfg->process_table & PRTS_MASK) > 24)
+		return -EINVAL;
+
+	/* We can change a guest to/from radix now, if the host is radix */
+	if (radix && !radix_enabled())
+		return -EINVAL;
+
+	/* If we're a nested hypervisor, we currently only support radix */
+	if (kvmhv_on_pseries() && !radix)
+		return -EINVAL;
+
+	mutex_lock(&kvm->arch.mmu_setup_lock);
+	if (radix != kvm_is_radix(kvm)) {
+		if (kvm->arch.mmu_ready) {
+			kvm->arch.mmu_ready = 0;
+			/* order mmu_ready vs. vcpus_running */
+			smp_mb();
+			if (atomic_read(&kvm->arch.vcpus_running)) {
+				kvm->arch.mmu_ready = 1;
+				err = -EBUSY;
+				goto out_unlock;
+			}
+		}
+		if (radix)
+			err = kvmppc_switch_mmu_to_radix(kvm);
+		else
+			err = kvmppc_switch_mmu_to_hpt(kvm);
+		if (err)
+			goto out_unlock;
+	}
+
+	kvm->arch.process_table = cfg->process_table;
+	kvmppc_setup_partition_table(kvm);
+
+	lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0;
+	kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE);
+	err = 0;
+
+ out_unlock:
+	mutex_unlock(&kvm->arch.mmu_setup_lock);
+	return err;
+}
+
+static int kvmhv_enable_nested(struct kvm *kvm)
+{
+	if (!nested)
+		return -EPERM;
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		return -ENODEV;
+	if (!radix_enabled())
+		return -ENODEV;
+
+	/* kvm == NULL means the caller is testing if the capability exists */
+	if (kvm)
+		kvm->arch.nested_enable = true;
+	return 0;
+}
+
+static int kvmhv_load_from_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
+				 int size)
+{
+	int rc = -EINVAL;
+
+	if (kvmhv_vcpu_is_radix(vcpu)) {
+		rc = kvmhv_copy_from_guest_radix(vcpu, *eaddr, ptr, size);
+
+		if (rc > 0)
+			rc = -EINVAL;
+	}
+
+	/* For now quadrants are the only way to access nested guest memory */
+	if (rc && vcpu->arch.nested)
+		rc = -EAGAIN;
+
+	return rc;
+}
+
+static int kvmhv_store_to_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
+				int size)
+{
+	int rc = -EINVAL;
+
+	if (kvmhv_vcpu_is_radix(vcpu)) {
+		rc = kvmhv_copy_to_guest_radix(vcpu, *eaddr, ptr, size);
+
+		if (rc > 0)
+			rc = -EINVAL;
+	}
+
+	/* For now quadrants are the only way to access nested guest memory */
+	if (rc && vcpu->arch.nested)
+		rc = -EAGAIN;
+
+	return rc;
+}
+
+static void unpin_vpa_reset(struct kvm *kvm, struct kvmppc_vpa *vpa)
+{
+	unpin_vpa(kvm, vpa);
+	vpa->gpa = 0;
+	vpa->pinned_addr = NULL;
+	vpa->dirty = false;
+	vpa->update_pending = 0;
+}
+
+/*
+ * Enable a guest to become a secure VM, or test whether
+ * that could be enabled.
+ * Called when the KVM_CAP_PPC_SECURE_GUEST capability is
+ * tested (kvm == NULL) or enabled (kvm != NULL).
+ */
+static int kvmhv_enable_svm(struct kvm *kvm)
+{
+	if (!kvmppc_uvmem_available())
+		return -EINVAL;
+	if (kvm)
+		kvm->arch.svm_enabled = 1;
+	return 0;
+}
+
+/*
+ *  IOCTL handler to turn off secure mode of guest
+ *
+ * - Release all device pages
+ * - Issue ucall to terminate the guest on the UV side
+ * - Unpin the VPA pages.
+ * - Reinit the partition scoped page tables
+ */
+static int kvmhv_svm_off(struct kvm *kvm)
+{
+	struct kvm_vcpu *vcpu;
+	int mmu_was_ready;
+	int srcu_idx;
+	int ret = 0;
+	unsigned long i;
+
+	if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+		return ret;
+
+	mutex_lock(&kvm->arch.mmu_setup_lock);
+	mmu_was_ready = kvm->arch.mmu_ready;
+	if (kvm->arch.mmu_ready) {
+		kvm->arch.mmu_ready = 0;
+		/* order mmu_ready vs. vcpus_running */
+		smp_mb();
+		if (atomic_read(&kvm->arch.vcpus_running)) {
+			kvm->arch.mmu_ready = 1;
+			ret = -EBUSY;
+			goto out;
+		}
+	}
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+		struct kvm_memory_slot *memslot;
+		struct kvm_memslots *slots = __kvm_memslots(kvm, i);
+		int bkt;
+
+		if (!slots)
+			continue;
+
+		kvm_for_each_memslot(memslot, bkt, slots) {
+			kvmppc_uvmem_drop_pages(memslot, kvm, true);
+			uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
+		}
+	}
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+	ret = uv_svm_terminate(kvm->arch.lpid);
+	if (ret != U_SUCCESS) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 * When secure guest is reset, all the guest pages are sent
+	 * to UV via UV_PAGE_IN before the non-boot vcpus get a
+	 * chance to run and unpin their VPA pages. Unpinning of all
+	 * VPA pages is done here explicitly so that VPA pages
+	 * can be migrated to the secure side.
+	 *
+	 * This is required to for the secure SMP guest to reboot
+	 * correctly.
+	 */
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		spin_lock(&vcpu->arch.vpa_update_lock);
+		unpin_vpa_reset(kvm, &vcpu->arch.dtl);
+		unpin_vpa_reset(kvm, &vcpu->arch.slb_shadow);
+		unpin_vpa_reset(kvm, &vcpu->arch.vpa);
+		spin_unlock(&vcpu->arch.vpa_update_lock);
+	}
+
+	kvmppc_setup_partition_table(kvm);
+	kvm->arch.secure_guest = 0;
+	kvm->arch.mmu_ready = mmu_was_ready;
+out:
+	mutex_unlock(&kvm->arch.mmu_setup_lock);
+	return ret;
+}
+
+static int kvmhv_enable_dawr1(struct kvm *kvm)
+{
+	if (!cpu_has_feature(CPU_FTR_DAWR1))
+		return -ENODEV;
+
+	/* kvm == NULL means the caller is testing if the capability exists */
+	if (kvm)
+		kvm->arch.dawr1_enabled = true;
+	return 0;
+}
+
+static bool kvmppc_hash_v3_possible(void)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		return false;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return false;
+
+	/*
+	 * POWER9 chips before version 2.02 can't have some threads in
+	 * HPT mode and some in radix mode on the same core.
+	 */
+	if (radix_enabled()) {
+		unsigned int pvr = mfspr(SPRN_PVR);
+		if ((pvr >> 16) == PVR_POWER9 &&
+		    (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
+		     ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
+			return false;
+	}
+
+	return true;
+}
+
+static struct kvmppc_ops kvm_ops_hv = {
+	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
+	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
+	.get_one_reg = kvmppc_get_one_reg_hv,
+	.set_one_reg = kvmppc_set_one_reg_hv,
+	.vcpu_load   = kvmppc_core_vcpu_load_hv,
+	.vcpu_put    = kvmppc_core_vcpu_put_hv,
+	.inject_interrupt = kvmppc_inject_interrupt_hv,
+	.set_msr     = kvmppc_set_msr_hv,
+	.vcpu_run    = kvmppc_vcpu_run_hv,
+	.vcpu_create = kvmppc_core_vcpu_create_hv,
+	.vcpu_free   = kvmppc_core_vcpu_free_hv,
+	.check_requests = kvmppc_core_check_requests_hv,
+	.get_dirty_log  = kvm_vm_ioctl_get_dirty_log_hv,
+	.flush_memslot  = kvmppc_core_flush_memslot_hv,
+	.prepare_memory_region = kvmppc_core_prepare_memory_region_hv,
+	.commit_memory_region  = kvmppc_core_commit_memory_region_hv,
+	.unmap_gfn_range = kvm_unmap_gfn_range_hv,
+	.age_gfn = kvm_age_gfn_hv,
+	.test_age_gfn = kvm_test_age_gfn_hv,
+	.set_spte_gfn = kvm_set_spte_gfn_hv,
+	.free_memslot = kvmppc_core_free_memslot_hv,
+	.init_vm =  kvmppc_core_init_vm_hv,
+	.destroy_vm = kvmppc_core_destroy_vm_hv,
+	.get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv,
+	.emulate_op = kvmppc_core_emulate_op_hv,
+	.emulate_mtspr = kvmppc_core_emulate_mtspr_hv,
+	.emulate_mfspr = kvmppc_core_emulate_mfspr_hv,
+	.fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
+	.arch_vm_ioctl  = kvm_arch_vm_ioctl_hv,
+	.hcall_implemented = kvmppc_hcall_impl_hv,
+#ifdef CONFIG_KVM_XICS
+	.irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv,
+	.irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv,
+#endif
+	.configure_mmu = kvmhv_configure_mmu,
+	.get_rmmu_info = kvmhv_get_rmmu_info,
+	.set_smt_mode = kvmhv_set_smt_mode,
+	.enable_nested = kvmhv_enable_nested,
+	.load_from_eaddr = kvmhv_load_from_eaddr,
+	.store_to_eaddr = kvmhv_store_to_eaddr,
+	.enable_svm = kvmhv_enable_svm,
+	.svm_off = kvmhv_svm_off,
+	.enable_dawr1 = kvmhv_enable_dawr1,
+	.hash_v3_possible = kvmppc_hash_v3_possible,
+	.create_vcpu_debugfs = kvmppc_arch_create_vcpu_debugfs_hv,
+	.create_vm_debugfs = kvmppc_arch_create_vm_debugfs_hv,
+};
+
+static int kvm_init_subcore_bitmap(void)
+{
+	int i, j;
+	int nr_cores = cpu_nr_cores();
+	struct sibling_subcore_state *sibling_subcore_state;
+
+	for (i = 0; i < nr_cores; i++) {
+		int first_cpu = i * threads_per_core;
+		int node = cpu_to_node(first_cpu);
+
+		/* Ignore if it is already allocated. */
+		if (paca_ptrs[first_cpu]->sibling_subcore_state)
+			continue;
+
+		sibling_subcore_state =
+			kzalloc_node(sizeof(struct sibling_subcore_state),
+							GFP_KERNEL, node);
+		if (!sibling_subcore_state)
+			return -ENOMEM;
+
+
+		for (j = 0; j < threads_per_core; j++) {
+			int cpu = first_cpu + j;
+
+			paca_ptrs[cpu]->sibling_subcore_state =
+						sibling_subcore_state;
+		}
+	}
+	return 0;
+}
+
+static int kvmppc_radix_possible(void)
+{
+	return cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled();
+}
+
+static int kvmppc_book3s_init_hv(void)
+{
+	int r;
+
+	if (!tlbie_capable) {
+		pr_err("KVM-HV: Host does not support TLBIE\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * FIXME!! Do we need to check on all cpus ?
+	 */
+	r = kvmppc_core_check_processor_compat_hv();
+	if (r < 0)
+		return -ENODEV;
+
+	r = kvmhv_nested_init();
+	if (r)
+		return r;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+		r = kvm_init_subcore_bitmap();
+		if (r)
+			goto err;
+	}
+
+	/*
+	 * We need a way of accessing the XICS interrupt controller,
+	 * either directly, via paca_ptrs[cpu]->kvm_hstate.xics_phys, or
+	 * indirectly, via OPAL.
+	 */
+#ifdef CONFIG_SMP
+	if (!xics_on_xive() && !kvmhv_on_pseries() &&
+	    !local_paca->kvm_hstate.xics_phys) {
+		struct device_node *np;
+
+		np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
+		if (!np) {
+			pr_err("KVM-HV: Cannot determine method for accessing XICS\n");
+			r = -ENODEV;
+			goto err;
+		}
+		/* presence of intc confirmed - node can be dropped again */
+		of_node_put(np);
+	}
+#endif
+
+	init_default_hcalls();
+
+	init_vcore_lists();
+
+	r = kvmppc_mmu_hv_init();
+	if (r)
+		goto err;
+
+	if (kvmppc_radix_possible()) {
+		r = kvmppc_radix_init();
+		if (r)
+			goto err;
+	}
+
+	r = kvmppc_uvmem_init();
+	if (r < 0) {
+		pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r);
+		return r;
+	}
+
+	kvm_ops_hv.owner = THIS_MODULE;
+	kvmppc_hv_ops = &kvm_ops_hv;
+
+	return 0;
+
+err:
+	kvmhv_nested_exit();
+	kvmppc_radix_exit();
+
+	return r;
+}
+
+static void kvmppc_book3s_exit_hv(void)
+{
+	kvmppc_uvmem_free();
+	kvmppc_free_host_rm_ops();
+	if (kvmppc_radix_possible())
+		kvmppc_radix_exit();
+	kvmppc_hv_ops = NULL;
+	kvmhv_nested_exit();
+}
+
+module_init(kvmppc_book3s_init_hv);
+module_exit(kvmppc_book3s_exit_hv);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/powerpc/kvm/book3s_hv.h b/arch/powerpc/kvm/book3s_hv.h
new file mode 100644
index 0000000000..95241764df
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv.h
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Privileged (non-hypervisor) host registers to save.
+ */
+struct p9_host_os_sprs {
+	unsigned long iamr;
+	unsigned long amr;
+
+	unsigned int pmc1;
+	unsigned int pmc2;
+	unsigned int pmc3;
+	unsigned int pmc4;
+	unsigned int pmc5;
+	unsigned int pmc6;
+	unsigned long mmcr0;
+	unsigned long mmcr1;
+	unsigned long mmcr2;
+	unsigned long mmcr3;
+	unsigned long mmcra;
+	unsigned long siar;
+	unsigned long sier1;
+	unsigned long sier2;
+	unsigned long sier3;
+	unsigned long sdar;
+};
+
+static inline bool nesting_enabled(struct kvm *kvm)
+{
+	return kvm->arch.nested_enable && kvm_is_radix(kvm);
+}
+
+bool load_vcpu_state(struct kvm_vcpu *vcpu,
+			   struct p9_host_os_sprs *host_os_sprs);
+void store_vcpu_state(struct kvm_vcpu *vcpu);
+void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs);
+void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
+				    struct p9_host_os_sprs *host_os_sprs);
+void switch_pmu_to_guest(struct kvm_vcpu *vcpu,
+			    struct p9_host_os_sprs *host_os_sprs);
+void switch_pmu_to_host(struct kvm_vcpu *vcpu,
+			    struct p9_host_os_sprs *host_os_sprs);
+
+#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING
+void accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next);
+#define start_timing(vcpu, next) accumulate_time(vcpu, next)
+#define end_timing(vcpu) accumulate_time(vcpu, NULL)
+#else
+#define accumulate_time(vcpu, next) do {} while (0)
+#define start_timing(vcpu, next) do {} while (0)
+#define end_timing(vcpu) do {} while (0)
+#endif
+
+static inline void __kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 val)
+{
+	vcpu->arch.shregs.msr = val;
+}
+
+static inline u64 __kvmppc_get_msr_hv(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.shregs.msr;
+}
+
+#define KVMPPC_BOOK3S_HV_VCPU_ACCESSOR_SET(reg, size)			\
+static inline void kvmppc_set_##reg ##_hv(struct kvm_vcpu *vcpu, u##size val)	\
+{									\
+	vcpu->arch.reg = val;						\
+}
+
+#define KVMPPC_BOOK3S_HV_VCPU_ACCESSOR_GET(reg, size)			\
+static inline u##size kvmppc_get_##reg ##_hv(struct kvm_vcpu *vcpu)	\
+{									\
+	return vcpu->arch.reg;						\
+}
+
+#define KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(reg, size)			\
+	KVMPPC_BOOK3S_HV_VCPU_ACCESSOR_SET(reg, size)			\
+	KVMPPC_BOOK3S_HV_VCPU_ACCESSOR_GET(reg, size)			\
+
+#define KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR_SET(reg, size)		\
+static inline void kvmppc_set_##reg ##_hv(struct kvm_vcpu *vcpu, int i, u##size val)	\
+{									\
+	vcpu->arch.reg[i] = val;					\
+}
+
+#define KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR_GET(reg, size)		\
+static inline u##size kvmppc_get_##reg ##_hv(struct kvm_vcpu *vcpu, int i)	\
+{									\
+	return vcpu->arch.reg[i];					\
+}
+
+#define KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR(reg, size)			\
+	KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR_SET(reg, size)		\
+	KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR_GET(reg, size)		\
+
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(mmcra, 64)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(hfscr, 64)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(fscr, 64)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(dscr, 64)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(purr, 64)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(spurr, 64)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(amr, 64)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(uamor, 64)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(siar, 64)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(sdar, 64)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(iamr, 64)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(dawr0, 64)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(dawr1, 64)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(dawrx0, 64)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(dawrx1, 64)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(ciabr, 64)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(wort, 64)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(ppr, 64)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(ctrl, 64)
+
+KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR(mmcr, 64)
+KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR(sier, 64)
+KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR(pmc, 32)
+
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(pspb, 32)
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
new file mode 100644
index 0000000000..663f5222f3
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -0,0 +1,625 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/cpu.h>
+#include <linux/kvm_host.h>
+#include <linux/preempt.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/memblock.h>
+#include <linux/sizes.h>
+#include <linux/cma.h>
+#include <linux/bitops.h>
+
+#include <asm/cputable.h>
+#include <asm/interrupt.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/machdep.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/dbell.h>
+#include <asm/cputhreads.h>
+#include <asm/io.h>
+#include <asm/opal.h>
+#include <asm/smp.h>
+
+#define KVM_CMA_CHUNK_ORDER	18
+
+#include "book3s_xics.h"
+#include "book3s_xive.h"
+#include "book3s_hv.h"
+
+/*
+ * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
+ * should be power of 2.
+ */
+#define HPT_ALIGN_PAGES		((1 << 18) >> PAGE_SHIFT) /* 256k */
+/*
+ * By default we reserve 5% of memory for hash pagetable allocation.
+ */
+static unsigned long kvm_cma_resv_ratio = 5;
+
+static struct cma *kvm_cma;
+
+static int __init early_parse_kvm_cma_resv(char *p)
+{
+	pr_debug("%s(%s)\n", __func__, p);
+	if (!p)
+		return -EINVAL;
+	return kstrtoul(p, 0, &kvm_cma_resv_ratio);
+}
+early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
+
+struct page *kvm_alloc_hpt_cma(unsigned long nr_pages)
+{
+	VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+
+	return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES),
+			 false);
+}
+EXPORT_SYMBOL_GPL(kvm_alloc_hpt_cma);
+
+void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages)
+{
+	cma_release(kvm_cma, page, nr_pages);
+}
+EXPORT_SYMBOL_GPL(kvm_free_hpt_cma);
+
+/**
+ * kvm_cma_reserve() - reserve area for kvm hash pagetable
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the memblock allocator
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory.
+ */
+void __init kvm_cma_reserve(void)
+{
+	unsigned long align_size;
+	phys_addr_t selected_size;
+
+	/*
+	 * We need CMA reservation only when we are in HV mode
+	 */
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return;
+
+	selected_size = PAGE_ALIGN(memblock_phys_mem_size() * kvm_cma_resv_ratio / 100);
+	if (selected_size) {
+		pr_info("%s: reserving %ld MiB for global area\n", __func__,
+			 (unsigned long)selected_size / SZ_1M);
+		align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
+		cma_declare_contiguous(0, selected_size, 0, align_size,
+			KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, "kvm_cma",
+			&kvm_cma);
+	}
+}
+
+/*
+ * Real-mode H_CONFER implementation.
+ * We check if we are the only vcpu out of this virtual core
+ * still running in the guest and not ceded.  If so, we pop up
+ * to the virtual-mode implementation; if not, just return to
+ * the guest.
+ */
+long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
+			    unsigned int yield_count)
+{
+	struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
+	int ptid = local_paca->kvm_hstate.ptid;
+	int threads_running;
+	int threads_ceded;
+	int threads_conferring;
+	u64 stop = get_tb() + 10 * tb_ticks_per_usec;
+	int rv = H_SUCCESS; /* => don't yield */
+
+	set_bit(ptid, &vc->conferring_threads);
+	while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) {
+		threads_running = VCORE_ENTRY_MAP(vc);
+		threads_ceded = vc->napping_threads;
+		threads_conferring = vc->conferring_threads;
+		if ((threads_ceded | threads_conferring) == threads_running) {
+			rv = H_TOO_HARD; /* => do yield */
+			break;
+		}
+	}
+	clear_bit(ptid, &vc->conferring_threads);
+	return rv;
+}
+
+/*
+ * When running HV mode KVM we need to block certain operations while KVM VMs
+ * exist in the system. We use a counter of VMs to track this.
+ *
+ * One of the operations we need to block is onlining of secondaries, so we
+ * protect hv_vm_count with cpus_read_lock/unlock().
+ */
+static atomic_t hv_vm_count;
+
+void kvm_hv_vm_activated(void)
+{
+	cpus_read_lock();
+	atomic_inc(&hv_vm_count);
+	cpus_read_unlock();
+}
+EXPORT_SYMBOL_GPL(kvm_hv_vm_activated);
+
+void kvm_hv_vm_deactivated(void)
+{
+	cpus_read_lock();
+	atomic_dec(&hv_vm_count);
+	cpus_read_unlock();
+}
+EXPORT_SYMBOL_GPL(kvm_hv_vm_deactivated);
+
+bool kvm_hv_mode_active(void)
+{
+	return atomic_read(&hv_vm_count) != 0;
+}
+
+extern int hcall_real_table[], hcall_real_table_end[];
+
+int kvmppc_hcall_impl_hv_realmode(unsigned long cmd)
+{
+	cmd /= 4;
+	if (cmd < hcall_real_table_end - hcall_real_table &&
+	    hcall_real_table[cmd])
+		return 1;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
+
+int kvmppc_hwrng_present(void)
+{
+	return ppc_md.get_random_seed != NULL;
+}
+EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
+
+long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
+{
+	if (ppc_md.get_random_seed &&
+	    ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
+		return H_SUCCESS;
+
+	return H_HARDWARE;
+}
+
+/*
+ * Send an interrupt or message to another CPU.
+ * The caller needs to include any barrier needed to order writes
+ * to memory vs. the IPI/message.
+ */
+void kvmhv_rm_send_ipi(int cpu)
+{
+	void __iomem *xics_phys;
+	unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+
+	/* On POWER9 we can use msgsnd for any destination cpu. */
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		msg |= get_hard_smp_processor_id(cpu);
+		__asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+		return;
+	}
+
+	/* On POWER8 for IPIs to threads in the same core, use msgsnd. */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+	    cpu_first_thread_sibling(cpu) ==
+	    cpu_first_thread_sibling(raw_smp_processor_id())) {
+		msg |= cpu_thread_in_core(cpu);
+		__asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+		return;
+	}
+
+	/* We should never reach this */
+	if (WARN_ON_ONCE(xics_on_xive()))
+	    return;
+
+	/* Else poke the target with an IPI */
+	xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys;
+	if (xics_phys)
+		__raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR);
+	else
+		opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY);
+}
+
+/*
+ * The following functions are called from the assembly code
+ * in book3s_hv_rmhandlers.S.
+ */
+static void kvmhv_interrupt_vcore(struct kvmppc_vcore *vc, int active)
+{
+	int cpu = vc->pcpu;
+
+	/* Order setting of exit map vs. msgsnd/IPI */
+	smp_mb();
+	for (; active; active >>= 1, ++cpu)
+		if (active & 1)
+			kvmhv_rm_send_ipi(cpu);
+}
+
+void kvmhv_commence_exit(int trap)
+{
+	struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
+	int ptid = local_paca->kvm_hstate.ptid;
+	struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
+	int me, ee, i;
+
+	/* Set our bit in the threads-exiting-guest map in the 0xff00
+	   bits of vcore->entry_exit_map */
+	me = 0x100 << ptid;
+	do {
+		ee = vc->entry_exit_map;
+	} while (cmpxchg(&vc->entry_exit_map, ee, ee | me) != ee);
+
+	/* Are we the first here? */
+	if ((ee >> 8) != 0)
+		return;
+
+	/*
+	 * Trigger the other threads in this vcore to exit the guest.
+	 * If this is a hypervisor decrementer interrupt then they
+	 * will be already on their way out of the guest.
+	 */
+	if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER)
+		kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid));
+
+	/*
+	 * If we are doing dynamic micro-threading, interrupt the other
+	 * subcores to pull them out of their guests too.
+	 */
+	if (!sip)
+		return;
+
+	for (i = 0; i < MAX_SUBCORES; ++i) {
+		vc = sip->vc[i];
+		if (!vc)
+			break;
+		do {
+			ee = vc->entry_exit_map;
+			/* Already asked to exit? */
+			if ((ee >> 8) != 0)
+				break;
+		} while (cmpxchg(&vc->entry_exit_map, ee,
+				 ee | VCORE_EXIT_REQ) != ee);
+		if ((ee >> 8) == 0)
+			kvmhv_interrupt_vcore(vc, ee);
+	}
+}
+
+struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
+EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv);
+
+#ifdef CONFIG_KVM_XICS
+static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap,
+					 u32 xisr)
+{
+	int i;
+
+	/*
+	 * We access the mapped array here without a lock.  That
+	 * is safe because we never reduce the number of entries
+	 * in the array and we never change the v_hwirq field of
+	 * an entry once it is set.
+	 *
+	 * We have also carefully ordered the stores in the writer
+	 * and the loads here in the reader, so that if we find a matching
+	 * hwirq here, the associated GSI and irq_desc fields are valid.
+	 */
+	for (i = 0; i < pimap->n_mapped; i++)  {
+		if (xisr == pimap->mapped[i].r_hwirq) {
+			/*
+			 * Order subsequent reads in the caller to serialize
+			 * with the writer.
+			 */
+			smp_rmb();
+			return &pimap->mapped[i];
+		}
+	}
+	return NULL;
+}
+
+/*
+ * If we have an interrupt that's not an IPI, check if we have a
+ * passthrough adapter and if so, check if this external interrupt
+ * is for the adapter.
+ * We will attempt to deliver the IRQ directly to the target VCPU's
+ * ICP, the virtual ICP (based on affinity - the xive value in ICS).
+ *
+ * If the delivery fails or if this is not for a passthrough adapter,
+ * return to the host to handle this interrupt. We earlier
+ * saved a copy of the XIRR in the PACA, it will be picked up by
+ * the host ICP driver.
+ */
+static int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again)
+{
+	struct kvmppc_passthru_irqmap *pimap;
+	struct kvmppc_irq_map *irq_map;
+	struct kvm_vcpu *vcpu;
+
+	vcpu = local_paca->kvm_hstate.kvm_vcpu;
+	if (!vcpu)
+		return 1;
+	pimap = kvmppc_get_passthru_irqmap(vcpu->kvm);
+	if (!pimap)
+		return 1;
+	irq_map = get_irqmap(pimap, xisr);
+	if (!irq_map)
+		return 1;
+
+	/* We're handling this interrupt, generic code doesn't need to */
+	local_paca->kvm_hstate.saved_xirr = 0;
+
+	return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap, again);
+}
+
+#else
+static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again)
+{
+	return 1;
+}
+#endif
+
+/*
+ * Determine what sort of external interrupt is pending (if any).
+ * Returns:
+ *	0 if no interrupt is pending
+ *	1 if an interrupt is pending that needs to be handled by the host
+ *	2 Passthrough that needs completion in the host
+ *	-1 if there was a guest wakeup IPI (which has now been cleared)
+ *	-2 if there is PCI passthrough external interrupt that was handled
+ */
+static long kvmppc_read_one_intr(bool *again);
+
+long kvmppc_read_intr(void)
+{
+	long ret = 0;
+	long rc;
+	bool again;
+
+	if (xive_enabled())
+		return 1;
+
+	do {
+		again = false;
+		rc = kvmppc_read_one_intr(&again);
+		if (rc && (ret == 0 || rc > ret))
+			ret = rc;
+	} while (again);
+	return ret;
+}
+
+static long kvmppc_read_one_intr(bool *again)
+{
+	void __iomem *xics_phys;
+	u32 h_xirr;
+	__be32 xirr;
+	u32 xisr;
+	u8 host_ipi;
+	int64_t rc;
+
+	if (xive_enabled())
+		return 1;
+
+	/* see if a host IPI is pending */
+	host_ipi = READ_ONCE(local_paca->kvm_hstate.host_ipi);
+	if (host_ipi)
+		return 1;
+
+	/* Now read the interrupt from the ICP */
+	xics_phys = local_paca->kvm_hstate.xics_phys;
+	rc = 0;
+	if (!xics_phys)
+		rc = opal_int_get_xirr(&xirr, false);
+	else
+		xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
+	if (rc < 0)
+		return 1;
+
+	/*
+	 * Save XIRR for later. Since we get control in reverse endian
+	 * on LE systems, save it byte reversed and fetch it back in
+	 * host endian. Note that xirr is the value read from the
+	 * XIRR register, while h_xirr is the host endian version.
+	 */
+	h_xirr = be32_to_cpu(xirr);
+	local_paca->kvm_hstate.saved_xirr = h_xirr;
+	xisr = h_xirr & 0xffffff;
+	/*
+	 * Ensure that the store/load complete to guarantee all side
+	 * effects of loading from XIRR has completed
+	 */
+	smp_mb();
+
+	/* if nothing pending in the ICP */
+	if (!xisr)
+		return 0;
+
+	/* We found something in the ICP...
+	 *
+	 * If it is an IPI, clear the MFRR and EOI it.
+	 */
+	if (xisr == XICS_IPI) {
+		rc = 0;
+		if (xics_phys) {
+			__raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
+			__raw_rm_writel(xirr, xics_phys + XICS_XIRR);
+		} else {
+			opal_int_set_mfrr(hard_smp_processor_id(), 0xff);
+			rc = opal_int_eoi(h_xirr);
+		}
+		/* If rc > 0, there is another interrupt pending */
+		*again = rc > 0;
+
+		/*
+		 * Need to ensure side effects of above stores
+		 * complete before proceeding.
+		 */
+		smp_mb();
+
+		/*
+		 * We need to re-check host IPI now in case it got set in the
+		 * meantime. If it's clear, we bounce the interrupt to the
+		 * guest
+		 */
+		host_ipi = READ_ONCE(local_paca->kvm_hstate.host_ipi);
+		if (unlikely(host_ipi != 0)) {
+			/* We raced with the host,
+			 * we need to resend that IPI, bummer
+			 */
+			if (xics_phys)
+				__raw_rm_writeb(IPI_PRIORITY,
+						xics_phys + XICS_MFRR);
+			else
+				opal_int_set_mfrr(hard_smp_processor_id(),
+						  IPI_PRIORITY);
+			/* Let side effects complete */
+			smp_mb();
+			return 1;
+		}
+
+		/* OK, it's an IPI for us */
+		local_paca->kvm_hstate.saved_xirr = 0;
+		return -1;
+	}
+
+	return kvmppc_check_passthru(xisr, xirr, again);
+}
+
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.ceded = 0;
+	if (vcpu->arch.timer_running) {
+		hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+		vcpu->arch.timer_running = 0;
+	}
+}
+
+void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
+{
+	/* Guest must always run with ME enabled, HV disabled. */
+	msr = (msr | MSR_ME) & ~MSR_HV;
+
+	/*
+	 * Check for illegal transactional state bit combination
+	 * and if we find it, force the TS field to a safe state.
+	 */
+	if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
+		msr &= ~MSR_TS_MASK;
+	__kvmppc_set_msr_hv(vcpu, msr);
+	kvmppc_end_cede(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvmppc_set_msr_hv);
+
+static void inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+	unsigned long msr, pc, new_msr, new_pc;
+
+	msr = kvmppc_get_msr(vcpu);
+	pc = kvmppc_get_pc(vcpu);
+	new_msr = vcpu->arch.intr_msr;
+	new_pc = vec;
+
+	/* If transactional, change to suspend mode on IRQ delivery */
+	if (MSR_TM_TRANSACTIONAL(msr))
+		new_msr |= MSR_TS_S;
+	else
+		new_msr |= msr & MSR_TS_MASK;
+
+	/*
+	 * Perform MSR and PC adjustment for LPCR[AIL]=3 if it is set and
+	 * applicable. AIL=2 is not supported.
+	 *
+	 * AIL does not apply to SRESET, MCE, or HMI (which is never
+	 * delivered to the guest), and does not apply if IR=0 or DR=0.
+	 */
+	if (vec != BOOK3S_INTERRUPT_SYSTEM_RESET &&
+	    vec != BOOK3S_INTERRUPT_MACHINE_CHECK &&
+	    (vcpu->arch.vcore->lpcr & LPCR_AIL) == LPCR_AIL_3 &&
+	    (msr & (MSR_IR|MSR_DR)) == (MSR_IR|MSR_DR) ) {
+		new_msr |= MSR_IR | MSR_DR;
+		new_pc += 0xC000000000004000ULL;
+	}
+
+	kvmppc_set_srr0(vcpu, pc);
+	kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
+	kvmppc_set_pc(vcpu, new_pc);
+	__kvmppc_set_msr_hv(vcpu, new_msr);
+}
+
+void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+	inject_interrupt(vcpu, vec, srr1_flags);
+	kvmppc_end_cede(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvmppc_inject_interrupt_hv);
+
+/*
+ * Is there a PRIV_DOORBELL pending for the guest (on POWER9)?
+ * Can we inject a Decrementer or a External interrupt?
+ */
+void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
+{
+	int ext;
+	unsigned long lpcr;
+
+	WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
+	/* Insert EXTERNAL bit into LPCR at the MER bit position */
+	ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1;
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr |= ext << LPCR_MER_SH;
+	mtspr(SPRN_LPCR, lpcr);
+	isync();
+
+	if (vcpu->arch.shregs.msr & MSR_EE) {
+		if (ext) {
+			inject_interrupt(vcpu, BOOK3S_INTERRUPT_EXTERNAL, 0);
+		} else {
+			long int dec = mfspr(SPRN_DEC);
+			if (!(lpcr & LPCR_LD))
+				dec = (int) dec;
+			if (dec < 0)
+				inject_interrupt(vcpu,
+					BOOK3S_INTERRUPT_DECREMENTER, 0);
+		}
+	}
+
+	if (vcpu->arch.doorbell_request) {
+		mtspr(SPRN_DPDES, 1);
+		vcpu->arch.vcore->dpdes = 1;
+		smp_wmb();
+		vcpu->arch.doorbell_request = 0;
+	}
+}
+
+static void flush_guest_tlb(struct kvm *kvm)
+{
+	unsigned long rb, set;
+
+	rb = PPC_BIT(52);	/* IS = 2 */
+	for (set = 0; set < kvm->arch.tlb_sets; ++set) {
+		/* R=0 PRS=0 RIC=0 */
+		asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+			     : : "r" (rb), "i" (0), "i" (0), "i" (0),
+			       "r" (0) : "memory");
+		rb += PPC_BIT(51);	/* increment set number */
+	}
+	asm volatile("ptesync": : :"memory");
+}
+
+void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu)
+{
+	if (cpumask_test_cpu(pcpu, &kvm->arch.need_tlb_flush)) {
+		flush_guest_tlb(kvm);
+
+		/* Clear the bit after the TLB flush */
+		cpumask_clear_cpu(pcpu, &kvm->arch.need_tlb_flush);
+	}
+}
+EXPORT_SYMBOL_GPL(kvmppc_check_need_tlb_flush);
diff --git a/arch/powerpc/kvm/book3s_hv_hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c
new file mode 100644
index 0000000000..1ec50c6967
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_hmi.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Hypervisor Maintenance Interrupt (HMI) handling.
+ *
+ * Copyright 2015 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <asm/paca.h>
+#include <asm/hmi.h>
+#include <asm/processor.h>
+
+void wait_for_subcore_guest_exit(void)
+{
+	int i;
+
+	/*
+	 * NULL bitmap pointer indicates that KVM module hasn't
+	 * been loaded yet and hence no guests are running, or running
+	 * on POWER9 or newer CPU.
+	 *
+	 * If no KVM is in use, no need to co-ordinate among threads
+	 * as all of them will always be in host and no one is going
+	 * to modify TB other than the opal hmi handler.
+	 *
+	 * POWER9 and newer don't need this synchronisation.
+	 *
+	 * Hence, just return from here.
+	 */
+	if (!local_paca->sibling_subcore_state)
+		return;
+
+	for (i = 0; i < MAX_SUBCORE_PER_CORE; i++)
+		while (local_paca->sibling_subcore_state->in_guest[i])
+			cpu_relax();
+}
+
+void wait_for_tb_resync(void)
+{
+	if (!local_paca->sibling_subcore_state)
+		return;
+
+	while (test_bit(CORE_TB_RESYNC_REQ_BIT,
+				&local_paca->sibling_subcore_state->flags))
+		cpu_relax();
+}
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
new file mode 100644
index 0000000000..c0deeea7ee
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * Derived from book3s_interrupts.S, which is:
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <linux/linkage.h>
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+#include <asm/exception-64s.h>
+#include <asm/ppc-opcode.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+
+/*****************************************************************************
+ *                                                                           *
+ *     Guest entry / exit code that is in kernel module memory (vmalloc)     *
+ *                                                                           *
+ ****************************************************************************/
+
+/* Registers:
+ *  none
+ */
+_GLOBAL(__kvmppc_vcore_entry)
+
+	/* Write correct stack frame */
+	mflr	r0
+	std	r0,PPC_LR_STKOFF(r1)
+
+	/* Save host state to the stack */
+	stdu	r1, -SWITCH_FRAME_SIZE(r1)
+
+	/* Save non-volatile registers (r14 - r31) and CR */
+	SAVE_NVGPRS(r1)
+	mfcr	r3
+	std	r3, _CCR(r1)
+
+	/* Save host DSCR */
+	mfspr	r3, SPRN_DSCR
+	std	r3, HSTATE_DSCR(r13)
+
+BEGIN_FTR_SECTION
+	/* Save host DABR */
+	mfspr	r3, SPRN_DABR
+	std	r3, HSTATE_DABR(r13)
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+
+	/* Save host PMU registers */
+	bl	kvmhv_save_host_pmu
+
+	/*
+	 * Put whatever is in the decrementer into the
+	 * hypervisor decrementer.
+	 * Because of a hardware deviation in P8,
+	 * we need to set LPCR[HDICE] before writing HDEC.
+	 */
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	ld	r6, VCORE_KVM(r5)
+	ld	r9, KVM_HOST_LPCR(r6)
+	ori	r8, r9, LPCR_HDICE
+	mtspr	SPRN_LPCR, r8
+	isync
+	mfspr	r8,SPRN_DEC
+	mftb	r7
+	extsw	r8,r8
+	mtspr	SPRN_HDEC,r8
+	add	r8,r8,r7
+	std	r8,HSTATE_DECEXP(r13)
+
+	/* Jump to partition switch code */
+	bl	kvmppc_hv_entry_trampoline
+	nop
+
+/*
+ * We return here in virtual mode after the guest exits
+ * with something that we can't handle in real mode.
+ * Interrupts are still hard-disabled.
+ */
+
+	/*
+	 * Register usage at this point:
+	 *
+	 * R1       = host R1
+	 * R2       = host R2
+	 * R3       = trap number on this thread
+	 * R12      = exit handler id
+	 * R13      = PACA
+	 */
+
+	/* Restore non-volatile host registers (r14 - r31) and CR */
+	REST_NVGPRS(r1)
+	ld	r4, _CCR(r1)
+	mtcr	r4
+
+	addi    r1, r1, SWITCH_FRAME_SIZE
+	ld	r0, PPC_LR_STKOFF(r1)
+	mtlr	r0
+	blr
+
+/*
+ * void kvmhv_save_host_pmu(void)
+ */
+SYM_FUNC_START_LOCAL(kvmhv_save_host_pmu)
+BEGIN_FTR_SECTION
+	/* Work around P8 PMAE bug */
+	li	r3, -1
+	clrrdi	r3, r3, 10
+	mfspr	r8, SPRN_MMCR2
+	mtspr	SPRN_MMCR2, r3		/* freeze all counters using MMCR2 */
+	isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	li	r3, 1
+	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
+	mfspr	r7, SPRN_MMCR0		/* save MMCR0 */
+	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable interrupts */
+	mfspr	r6, SPRN_MMCRA
+	/* Clear MMCRA in order to disable SDAR updates */
+	li	r5, 0
+	mtspr	SPRN_MMCRA, r5
+	isync
+	lbz	r5, PACA_PMCINUSE(r13)	/* is the host using the PMU? */
+	cmpwi	r5, 0
+	beq	31f			/* skip if not */
+	mfspr	r5, SPRN_MMCR1
+	mfspr	r9, SPRN_SIAR
+	mfspr	r10, SPRN_SDAR
+	std	r7, HSTATE_MMCR0(r13)
+	std	r5, HSTATE_MMCR1(r13)
+	std	r6, HSTATE_MMCRA(r13)
+	std	r9, HSTATE_SIAR(r13)
+	std	r10, HSTATE_SDAR(r13)
+BEGIN_FTR_SECTION
+	mfspr	r9, SPRN_SIER
+	std	r8, HSTATE_MMCR2(r13)
+	std	r9, HSTATE_SIER(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	mfspr	r3, SPRN_PMC1
+	mfspr	r5, SPRN_PMC2
+	mfspr	r6, SPRN_PMC3
+	mfspr	r7, SPRN_PMC4
+	mfspr	r8, SPRN_PMC5
+	mfspr	r9, SPRN_PMC6
+	stw	r3, HSTATE_PMC1(r13)
+	stw	r5, HSTATE_PMC2(r13)
+	stw	r6, HSTATE_PMC3(r13)
+	stw	r7, HSTATE_PMC4(r13)
+	stw	r8, HSTATE_PMC5(r13)
+	stw	r9, HSTATE_PMC6(r13)
+31:	blr
+SYM_FUNC_END(kvmhv_save_host_pmu)
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
new file mode 100644
index 0000000000..377d0b4a05
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -0,0 +1,1678 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corporation, 2018
+ * Authors Suraj Jitindar Singh <sjitindarsingh@gmail.com>
+ *	   Paul Mackerras <paulus@ozlabs.org>
+ *
+ * Description: KVM functions specific to running nested KVM-HV guests
+ * on Book3S processors (specifically POWER9 and later).
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/llist.h>
+#include <linux/pgtable.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu.h>
+#include <asm/pgalloc.h>
+#include <asm/pte-walk.h>
+#include <asm/reg.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/firmware.h>
+
+static struct patb_entry *pseries_partition_tb;
+
+static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp);
+static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free);
+
+void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+	hr->pcr = vc->pcr | PCR_MASK;
+	hr->dpdes = vc->dpdes;
+	hr->hfscr = vcpu->arch.hfscr;
+	hr->tb_offset = vc->tb_offset;
+	hr->dawr0 = vcpu->arch.dawr0;
+	hr->dawrx0 = vcpu->arch.dawrx0;
+	hr->ciabr = vcpu->arch.ciabr;
+	hr->purr = vcpu->arch.purr;
+	hr->spurr = vcpu->arch.spurr;
+	hr->ic = vcpu->arch.ic;
+	hr->vtb = vc->vtb;
+	hr->srr0 = vcpu->arch.shregs.srr0;
+	hr->srr1 = vcpu->arch.shregs.srr1;
+	hr->sprg[0] = vcpu->arch.shregs.sprg0;
+	hr->sprg[1] = vcpu->arch.shregs.sprg1;
+	hr->sprg[2] = vcpu->arch.shregs.sprg2;
+	hr->sprg[3] = vcpu->arch.shregs.sprg3;
+	hr->pidr = vcpu->arch.pid;
+	hr->cfar = vcpu->arch.cfar;
+	hr->ppr = vcpu->arch.ppr;
+	hr->dawr1 = vcpu->arch.dawr1;
+	hr->dawrx1 = vcpu->arch.dawrx1;
+}
+
+/* Use noinline_for_stack due to https://bugs.llvm.org/show_bug.cgi?id=49610 */
+static noinline_for_stack void byteswap_pt_regs(struct pt_regs *regs)
+{
+	unsigned long *addr = (unsigned long *) regs;
+
+	for (; addr < ((unsigned long *) (regs + 1)); addr++)
+		*addr = swab64(*addr);
+}
+
+static void byteswap_hv_regs(struct hv_guest_state *hr)
+{
+	hr->version = swab64(hr->version);
+	hr->lpid = swab32(hr->lpid);
+	hr->vcpu_token = swab32(hr->vcpu_token);
+	hr->lpcr = swab64(hr->lpcr);
+	hr->pcr = swab64(hr->pcr) | PCR_MASK;
+	hr->amor = swab64(hr->amor);
+	hr->dpdes = swab64(hr->dpdes);
+	hr->hfscr = swab64(hr->hfscr);
+	hr->tb_offset = swab64(hr->tb_offset);
+	hr->dawr0 = swab64(hr->dawr0);
+	hr->dawrx0 = swab64(hr->dawrx0);
+	hr->ciabr = swab64(hr->ciabr);
+	hr->hdec_expiry = swab64(hr->hdec_expiry);
+	hr->purr = swab64(hr->purr);
+	hr->spurr = swab64(hr->spurr);
+	hr->ic = swab64(hr->ic);
+	hr->vtb = swab64(hr->vtb);
+	hr->hdar = swab64(hr->hdar);
+	hr->hdsisr = swab64(hr->hdsisr);
+	hr->heir = swab64(hr->heir);
+	hr->asdr = swab64(hr->asdr);
+	hr->srr0 = swab64(hr->srr0);
+	hr->srr1 = swab64(hr->srr1);
+	hr->sprg[0] = swab64(hr->sprg[0]);
+	hr->sprg[1] = swab64(hr->sprg[1]);
+	hr->sprg[2] = swab64(hr->sprg[2]);
+	hr->sprg[3] = swab64(hr->sprg[3]);
+	hr->pidr = swab64(hr->pidr);
+	hr->cfar = swab64(hr->cfar);
+	hr->ppr = swab64(hr->ppr);
+	hr->dawr1 = swab64(hr->dawr1);
+	hr->dawrx1 = swab64(hr->dawrx1);
+}
+
+static void save_hv_return_state(struct kvm_vcpu *vcpu,
+				 struct hv_guest_state *hr)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+	hr->dpdes = vc->dpdes;
+	hr->purr = vcpu->arch.purr;
+	hr->spurr = vcpu->arch.spurr;
+	hr->ic = vcpu->arch.ic;
+	hr->vtb = vc->vtb;
+	hr->srr0 = vcpu->arch.shregs.srr0;
+	hr->srr1 = vcpu->arch.shregs.srr1;
+	hr->sprg[0] = vcpu->arch.shregs.sprg0;
+	hr->sprg[1] = vcpu->arch.shregs.sprg1;
+	hr->sprg[2] = vcpu->arch.shregs.sprg2;
+	hr->sprg[3] = vcpu->arch.shregs.sprg3;
+	hr->pidr = vcpu->arch.pid;
+	hr->cfar = vcpu->arch.cfar;
+	hr->ppr = vcpu->arch.ppr;
+	switch (vcpu->arch.trap) {
+	case BOOK3S_INTERRUPT_H_DATA_STORAGE:
+		hr->hdar = vcpu->arch.fault_dar;
+		hr->hdsisr = vcpu->arch.fault_dsisr;
+		hr->asdr = vcpu->arch.fault_gpa;
+		break;
+	case BOOK3S_INTERRUPT_H_INST_STORAGE:
+		hr->asdr = vcpu->arch.fault_gpa;
+		break;
+	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+		hr->hfscr = ((~HFSCR_INTR_CAUSE & hr->hfscr) |
+			     (HFSCR_INTR_CAUSE & vcpu->arch.hfscr));
+		break;
+	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
+		hr->heir = vcpu->arch.emul_inst;
+		break;
+	}
+}
+
+static void restore_hv_regs(struct kvm_vcpu *vcpu, const struct hv_guest_state *hr)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+	vc->pcr = hr->pcr | PCR_MASK;
+	vc->dpdes = hr->dpdes;
+	vcpu->arch.hfscr = hr->hfscr;
+	vcpu->arch.dawr0 = hr->dawr0;
+	vcpu->arch.dawrx0 = hr->dawrx0;
+	vcpu->arch.ciabr = hr->ciabr;
+	vcpu->arch.purr = hr->purr;
+	vcpu->arch.spurr = hr->spurr;
+	vcpu->arch.ic = hr->ic;
+	vc->vtb = hr->vtb;
+	vcpu->arch.shregs.srr0 = hr->srr0;
+	vcpu->arch.shregs.srr1 = hr->srr1;
+	vcpu->arch.shregs.sprg0 = hr->sprg[0];
+	vcpu->arch.shregs.sprg1 = hr->sprg[1];
+	vcpu->arch.shregs.sprg2 = hr->sprg[2];
+	vcpu->arch.shregs.sprg3 = hr->sprg[3];
+	vcpu->arch.pid = hr->pidr;
+	vcpu->arch.cfar = hr->cfar;
+	vcpu->arch.ppr = hr->ppr;
+	vcpu->arch.dawr1 = hr->dawr1;
+	vcpu->arch.dawrx1 = hr->dawrx1;
+}
+
+void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
+				   struct hv_guest_state *hr)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+	vc->dpdes = hr->dpdes;
+	vcpu->arch.hfscr = hr->hfscr;
+	vcpu->arch.purr = hr->purr;
+	vcpu->arch.spurr = hr->spurr;
+	vcpu->arch.ic = hr->ic;
+	vc->vtb = hr->vtb;
+	vcpu->arch.fault_dar = hr->hdar;
+	vcpu->arch.fault_dsisr = hr->hdsisr;
+	vcpu->arch.fault_gpa = hr->asdr;
+	vcpu->arch.emul_inst = hr->heir;
+	vcpu->arch.shregs.srr0 = hr->srr0;
+	vcpu->arch.shregs.srr1 = hr->srr1;
+	vcpu->arch.shregs.sprg0 = hr->sprg[0];
+	vcpu->arch.shregs.sprg1 = hr->sprg[1];
+	vcpu->arch.shregs.sprg2 = hr->sprg[2];
+	vcpu->arch.shregs.sprg3 = hr->sprg[3];
+	vcpu->arch.pid = hr->pidr;
+	vcpu->arch.cfar = hr->cfar;
+	vcpu->arch.ppr = hr->ppr;
+}
+
+static void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr)
+{
+	/* No need to reflect the page fault to L1, we've handled it */
+	vcpu->arch.trap = 0;
+
+	/*
+	 * Since the L2 gprs have already been written back into L1 memory when
+	 * we complete the mmio, store the L1 memory location of the L2 gpr
+	 * being loaded into by the mmio so that the loaded value can be
+	 * written there in kvmppc_complete_mmio_load()
+	 */
+	if (((vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) == KVM_MMIO_REG_GPR)
+	    && (vcpu->mmio_is_write == 0)) {
+		vcpu->arch.nested_io_gpr = (gpa_t) regs_ptr +
+					   offsetof(struct pt_regs,
+						    gpr[vcpu->arch.io_gpr]);
+		vcpu->arch.io_gpr = KVM_MMIO_REG_NESTED_GPR;
+	}
+}
+
+static int kvmhv_read_guest_state_and_regs(struct kvm_vcpu *vcpu,
+					   struct hv_guest_state *l2_hv,
+					   struct pt_regs *l2_regs,
+					   u64 hv_ptr, u64 regs_ptr)
+{
+	int size;
+
+	if (kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv->version,
+				sizeof(l2_hv->version)))
+		return -1;
+
+	if (kvmppc_need_byteswap(vcpu))
+		l2_hv->version = swab64(l2_hv->version);
+
+	size = hv_guest_state_size(l2_hv->version);
+	if (size < 0)
+		return -1;
+
+	return kvm_vcpu_read_guest(vcpu, hv_ptr, l2_hv, size) ||
+		kvm_vcpu_read_guest(vcpu, regs_ptr, l2_regs,
+				    sizeof(struct pt_regs));
+}
+
+static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu,
+					    struct hv_guest_state *l2_hv,
+					    struct pt_regs *l2_regs,
+					    u64 hv_ptr, u64 regs_ptr)
+{
+	int size;
+
+	size = hv_guest_state_size(l2_hv->version);
+	if (size < 0)
+		return -1;
+
+	return kvm_vcpu_write_guest(vcpu, hv_ptr, l2_hv, size) ||
+		kvm_vcpu_write_guest(vcpu, regs_ptr, l2_regs,
+				     sizeof(struct pt_regs));
+}
+
+static void load_l2_hv_regs(struct kvm_vcpu *vcpu,
+			    const struct hv_guest_state *l2_hv,
+			    const struct hv_guest_state *l1_hv, u64 *lpcr)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	u64 mask;
+
+	restore_hv_regs(vcpu, l2_hv);
+
+	/*
+	 * Don't let L1 change LPCR bits for the L2 except these:
+	 */
+	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | LPCR_MER;
+
+	/*
+	 * Additional filtering is required depending on hardware
+	 * and configuration.
+	 */
+	*lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
+				      (vc->lpcr & ~mask) | (*lpcr & mask));
+
+	/*
+	 * Don't let L1 enable features for L2 which we don't allow for L1,
+	 * but preserve the interrupt cause field.
+	 */
+	vcpu->arch.hfscr = l2_hv->hfscr & (HFSCR_INTR_CAUSE | vcpu->arch.hfscr_permitted);
+
+	/* Don't let data address watchpoint match in hypervisor state */
+	vcpu->arch.dawrx0 = l2_hv->dawrx0 & ~DAWRX_HYP;
+	vcpu->arch.dawrx1 = l2_hv->dawrx1 & ~DAWRX_HYP;
+
+	/* Don't let completed instruction address breakpt match in HV state */
+	if ((l2_hv->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
+		vcpu->arch.ciabr = l2_hv->ciabr & ~CIABR_PRIV;
+}
+
+long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
+{
+	long int err, r;
+	struct kvm_nested_guest *l2;
+	struct pt_regs l2_regs, saved_l1_regs;
+	struct hv_guest_state l2_hv = {0}, saved_l1_hv;
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	u64 hv_ptr, regs_ptr;
+	u64 hdec_exp, lpcr;
+	s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
+
+	if (vcpu->kvm->arch.l1_ptcr == 0)
+		return H_NOT_AVAILABLE;
+
+	if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
+		return H_BAD_MODE;
+
+	/* copy parameters in */
+	hv_ptr = kvmppc_get_gpr(vcpu, 4);
+	regs_ptr = kvmppc_get_gpr(vcpu, 5);
+	kvm_vcpu_srcu_read_lock(vcpu);
+	err = kvmhv_read_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
+					      hv_ptr, regs_ptr);
+	kvm_vcpu_srcu_read_unlock(vcpu);
+	if (err)
+		return H_PARAMETER;
+
+	if (kvmppc_need_byteswap(vcpu))
+		byteswap_hv_regs(&l2_hv);
+	if (l2_hv.version > HV_GUEST_STATE_VERSION)
+		return H_P2;
+
+	if (kvmppc_need_byteswap(vcpu))
+		byteswap_pt_regs(&l2_regs);
+	if (l2_hv.vcpu_token >= NR_CPUS)
+		return H_PARAMETER;
+
+	/*
+	 * L1 must have set up a suspended state to enter the L2 in a
+	 * transactional state, and only in that case. These have to be
+	 * filtered out here to prevent causing a TM Bad Thing in the
+	 * host HRFID. We could synthesize a TM Bad Thing back to the L1
+	 * here but there doesn't seem like much point.
+	 */
+	if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) {
+		if (!MSR_TM_ACTIVE(l2_regs.msr))
+			return H_BAD_MODE;
+	} else {
+		if (l2_regs.msr & MSR_TS_MASK)
+			return H_BAD_MODE;
+		if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_TS_MASK))
+			return H_BAD_MODE;
+	}
+
+	/* translate lpid */
+	l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true);
+	if (!l2)
+		return H_PARAMETER;
+	if (!l2->l1_gr_to_hr) {
+		mutex_lock(&l2->tlb_lock);
+		kvmhv_update_ptbl_cache(l2);
+		mutex_unlock(&l2->tlb_lock);
+	}
+
+	/* save l1 values of things */
+	vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
+	saved_l1_regs = vcpu->arch.regs;
+	kvmhv_save_hv_regs(vcpu, &saved_l1_hv);
+
+	/* convert TB values/offsets to host (L0) values */
+	hdec_exp = l2_hv.hdec_expiry - vc->tb_offset;
+	vc->tb_offset += l2_hv.tb_offset;
+	vcpu->arch.dec_expires += l2_hv.tb_offset;
+
+	/* set L1 state to L2 state */
+	vcpu->arch.nested = l2;
+	vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
+	vcpu->arch.nested_hfscr = l2_hv.hfscr;
+	vcpu->arch.regs = l2_regs;
+
+	/* Guest must always run with ME enabled, HV disabled. */
+	vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV;
+
+	lpcr = l2_hv.lpcr;
+	load_l2_hv_regs(vcpu, &l2_hv, &saved_l1_hv, &lpcr);
+
+	vcpu->arch.ret = RESUME_GUEST;
+	vcpu->arch.trap = 0;
+	do {
+		r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
+	} while (is_kvmppc_resume_guest(r));
+
+	/* save L2 state for return */
+	l2_regs = vcpu->arch.regs;
+	l2_regs.msr = vcpu->arch.shregs.msr;
+	delta_purr = vcpu->arch.purr - l2_hv.purr;
+	delta_spurr = vcpu->arch.spurr - l2_hv.spurr;
+	delta_ic = vcpu->arch.ic - l2_hv.ic;
+	delta_vtb = vc->vtb - l2_hv.vtb;
+	save_hv_return_state(vcpu, &l2_hv);
+
+	/* restore L1 state */
+	vcpu->arch.nested = NULL;
+	vcpu->arch.regs = saved_l1_regs;
+	vcpu->arch.shregs.msr = saved_l1_regs.msr & ~MSR_TS_MASK;
+	/* set L1 MSR TS field according to L2 transaction state */
+	if (l2_regs.msr & MSR_TS_MASK)
+		vcpu->arch.shregs.msr |= MSR_TS_S;
+	vc->tb_offset = saved_l1_hv.tb_offset;
+	/* XXX: is this always the same delta as saved_l1_hv.tb_offset? */
+	vcpu->arch.dec_expires -= l2_hv.tb_offset;
+	restore_hv_regs(vcpu, &saved_l1_hv);
+	vcpu->arch.purr += delta_purr;
+	vcpu->arch.spurr += delta_spurr;
+	vcpu->arch.ic += delta_ic;
+	vc->vtb += delta_vtb;
+
+	kvmhv_put_nested(l2);
+
+	/* copy l2_hv_state and regs back to guest */
+	if (kvmppc_need_byteswap(vcpu)) {
+		byteswap_hv_regs(&l2_hv);
+		byteswap_pt_regs(&l2_regs);
+	}
+	kvm_vcpu_srcu_read_lock(vcpu);
+	err = kvmhv_write_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
+					       hv_ptr, regs_ptr);
+	kvm_vcpu_srcu_read_unlock(vcpu);
+	if (err)
+		return H_AUTHORITY;
+
+	if (r == -EINTR)
+		return H_INTERRUPT;
+
+	if (vcpu->mmio_needed) {
+		kvmhv_nested_mmio_needed(vcpu, regs_ptr);
+		return H_TOO_HARD;
+	}
+
+	return vcpu->arch.trap;
+}
+
+long kvmhv_nested_init(void)
+{
+	long int ptb_order;
+	unsigned long ptcr;
+	long rc;
+
+	if (!kvmhv_on_pseries())
+		return 0;
+	if (!radix_enabled())
+		return -ENODEV;
+
+	/* Partition table entry is 1<<4 bytes in size, hence the 4. */
+	ptb_order = KVM_MAX_NESTED_GUESTS_SHIFT + 4;
+	/* Minimum partition table size is 1<<12 bytes */
+	if (ptb_order < 12)
+		ptb_order = 12;
+	pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order,
+				       GFP_KERNEL);
+	if (!pseries_partition_tb) {
+		pr_err("kvm-hv: failed to allocated nested partition table\n");
+		return -ENOMEM;
+	}
+
+	ptcr = __pa(pseries_partition_tb) | (ptb_order - 12);
+	rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr);
+	if (rc != H_SUCCESS) {
+		pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n",
+		       rc);
+		kfree(pseries_partition_tb);
+		pseries_partition_tb = NULL;
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+void kvmhv_nested_exit(void)
+{
+	/*
+	 * N.B. the kvmhv_on_pseries() test is there because it enables
+	 * the compiler to remove the call to plpar_hcall_norets()
+	 * when CONFIG_PPC_PSERIES=n.
+	 */
+	if (kvmhv_on_pseries() && pseries_partition_tb) {
+		plpar_hcall_norets(H_SET_PARTITION_TABLE, 0);
+		kfree(pseries_partition_tb);
+		pseries_partition_tb = NULL;
+	}
+}
+
+static void kvmhv_flush_lpid(unsigned int lpid)
+{
+	long rc;
+
+	if (!kvmhv_on_pseries()) {
+		radix__flush_all_lpid(lpid);
+		return;
+	}
+
+	if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+		rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
+					lpid, TLBIEL_INVAL_SET_LPID);
+	else
+		rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+					    H_RPTI_TYPE_NESTED |
+					    H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+					    H_RPTI_TYPE_PAT,
+					    H_RPTI_PAGE_ALL, 0, -1UL);
+	if (rc)
+		pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
+}
+
+void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1)
+{
+	if (!kvmhv_on_pseries()) {
+		mmu_partition_table_set_entry(lpid, dw0, dw1, true);
+		return;
+	}
+
+	pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0);
+	pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1);
+	/* L0 will do the necessary barriers */
+	kvmhv_flush_lpid(lpid);
+}
+
+static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp)
+{
+	unsigned long dw0;
+
+	dw0 = PATB_HR | radix__get_tree_size() |
+		__pa(gp->shadow_pgtable) | RADIX_PGD_INDEX_SIZE;
+	kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table);
+}
+
+/*
+ * Handle the H_SET_PARTITION_TABLE hcall.
+ * r4 = guest real address of partition table + log_2(size) - 12
+ * (formatted as for the PTCR).
+ */
+long kvmhv_set_partition_table(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	unsigned long ptcr = kvmppc_get_gpr(vcpu, 4);
+	int srcu_idx;
+	long ret = H_SUCCESS;
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	/* Check partition size and base address. */
+	if ((ptcr & PRTS_MASK) + 12 - 4 > KVM_MAX_NESTED_GUESTS_SHIFT ||
+	    !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT))
+		ret = H_PARAMETER;
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	if (ret == H_SUCCESS)
+		kvm->arch.l1_ptcr = ptcr;
+
+	return ret;
+}
+
+/*
+ * Handle the H_COPY_TOFROM_GUEST hcall.
+ * r4 = L1 lpid of nested guest
+ * r5 = pid
+ * r6 = eaddr to access
+ * r7 = to buffer (L1 gpa)
+ * r8 = from buffer (L1 gpa)
+ * r9 = n bytes to copy
+ */
+long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
+{
+	struct kvm_nested_guest *gp;
+	int l1_lpid = kvmppc_get_gpr(vcpu, 4);
+	int pid = kvmppc_get_gpr(vcpu, 5);
+	gva_t eaddr = kvmppc_get_gpr(vcpu, 6);
+	gpa_t gp_to = (gpa_t) kvmppc_get_gpr(vcpu, 7);
+	gpa_t gp_from = (gpa_t) kvmppc_get_gpr(vcpu, 8);
+	void *buf;
+	unsigned long n = kvmppc_get_gpr(vcpu, 9);
+	bool is_load = !!gp_to;
+	long rc;
+
+	if (gp_to && gp_from) /* One must be NULL to determine the direction */
+		return H_PARAMETER;
+
+	if (eaddr & (0xFFFUL << 52))
+		return H_PARAMETER;
+
+	buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN);
+	if (!buf)
+		return H_NO_MEM;
+
+	gp = kvmhv_get_nested(vcpu->kvm, l1_lpid, false);
+	if (!gp) {
+		rc = H_PARAMETER;
+		goto out_free;
+	}
+
+	mutex_lock(&gp->tlb_lock);
+
+	if (is_load) {
+		/* Load from the nested guest into our buffer */
+		rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid,
+						     eaddr, buf, NULL, n);
+		if (rc)
+			goto not_found;
+
+		/* Write what was loaded into our buffer back to the L1 guest */
+		kvm_vcpu_srcu_read_lock(vcpu);
+		rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n);
+		kvm_vcpu_srcu_read_unlock(vcpu);
+		if (rc)
+			goto not_found;
+	} else {
+		/* Load the data to be stored from the L1 guest into our buf */
+		kvm_vcpu_srcu_read_lock(vcpu);
+		rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n);
+		kvm_vcpu_srcu_read_unlock(vcpu);
+		if (rc)
+			goto not_found;
+
+		/* Store from our buffer into the nested guest */
+		rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid,
+						     eaddr, NULL, buf, n);
+		if (rc)
+			goto not_found;
+	}
+
+out_unlock:
+	mutex_unlock(&gp->tlb_lock);
+	kvmhv_put_nested(gp);
+out_free:
+	kfree(buf);
+	return rc;
+not_found:
+	rc = H_NOT_FOUND;
+	goto out_unlock;
+}
+
+/*
+ * Reload the partition table entry for a guest.
+ * Caller must hold gp->tlb_lock.
+ */
+static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp)
+{
+	int ret;
+	struct patb_entry ptbl_entry;
+	unsigned long ptbl_addr;
+	struct kvm *kvm = gp->l1_host;
+
+	ret = -EFAULT;
+	ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4);
+	if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4))) {
+		int srcu_idx = srcu_read_lock(&kvm->srcu);
+		ret = kvm_read_guest(kvm, ptbl_addr,
+				     &ptbl_entry, sizeof(ptbl_entry));
+		srcu_read_unlock(&kvm->srcu, srcu_idx);
+	}
+	if (ret) {
+		gp->l1_gr_to_hr = 0;
+		gp->process_table = 0;
+	} else {
+		gp->l1_gr_to_hr = be64_to_cpu(ptbl_entry.patb0);
+		gp->process_table = be64_to_cpu(ptbl_entry.patb1);
+	}
+	kvmhv_set_nested_ptbl(gp);
+}
+
+void kvmhv_vm_nested_init(struct kvm *kvm)
+{
+	idr_init(&kvm->arch.kvm_nested_guest_idr);
+}
+
+static struct kvm_nested_guest *__find_nested(struct kvm *kvm, int lpid)
+{
+	return idr_find(&kvm->arch.kvm_nested_guest_idr, lpid);
+}
+
+static bool __prealloc_nested(struct kvm *kvm, int lpid)
+{
+	if (idr_alloc(&kvm->arch.kvm_nested_guest_idr,
+				NULL, lpid, lpid + 1, GFP_KERNEL) != lpid)
+		return false;
+	return true;
+}
+
+static void __add_nested(struct kvm *kvm, int lpid, struct kvm_nested_guest *gp)
+{
+	if (idr_replace(&kvm->arch.kvm_nested_guest_idr, gp, lpid))
+		WARN_ON(1);
+}
+
+static void __remove_nested(struct kvm *kvm, int lpid)
+{
+	idr_remove(&kvm->arch.kvm_nested_guest_idr, lpid);
+}
+
+static struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
+{
+	struct kvm_nested_guest *gp;
+	long shadow_lpid;
+
+	gp = kzalloc(sizeof(*gp), GFP_KERNEL);
+	if (!gp)
+		return NULL;
+	gp->l1_host = kvm;
+	gp->l1_lpid = lpid;
+	mutex_init(&gp->tlb_lock);
+	gp->shadow_pgtable = pgd_alloc(kvm->mm);
+	if (!gp->shadow_pgtable)
+		goto out_free;
+	shadow_lpid = kvmppc_alloc_lpid();
+	if (shadow_lpid < 0)
+		goto out_free2;
+	gp->shadow_lpid = shadow_lpid;
+	gp->radix = 1;
+
+	memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu));
+
+	return gp;
+
+ out_free2:
+	pgd_free(kvm->mm, gp->shadow_pgtable);
+ out_free:
+	kfree(gp);
+	return NULL;
+}
+
+/*
+ * Free up any resources allocated for a nested guest.
+ */
+static void kvmhv_release_nested(struct kvm_nested_guest *gp)
+{
+	struct kvm *kvm = gp->l1_host;
+
+	if (gp->shadow_pgtable) {
+		/*
+		 * No vcpu is using this struct and no call to
+		 * kvmhv_get_nested can find this struct,
+		 * so we don't need to hold kvm->mmu_lock.
+		 */
+		kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
+					  gp->shadow_lpid);
+		pgd_free(kvm->mm, gp->shadow_pgtable);
+	}
+	kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0);
+	kvmppc_free_lpid(gp->shadow_lpid);
+	kfree(gp);
+}
+
+static void kvmhv_remove_nested(struct kvm_nested_guest *gp)
+{
+	struct kvm *kvm = gp->l1_host;
+	int lpid = gp->l1_lpid;
+	long ref;
+
+	spin_lock(&kvm->mmu_lock);
+	if (gp == __find_nested(kvm, lpid)) {
+		__remove_nested(kvm, lpid);
+		--gp->refcnt;
+	}
+	ref = gp->refcnt;
+	spin_unlock(&kvm->mmu_lock);
+	if (ref == 0)
+		kvmhv_release_nested(gp);
+}
+
+/*
+ * Free up all nested resources allocated for this guest.
+ * This is called with no vcpus of the guest running, when
+ * switching the guest to HPT mode or when destroying the
+ * guest.
+ */
+void kvmhv_release_all_nested(struct kvm *kvm)
+{
+	int lpid;
+	struct kvm_nested_guest *gp;
+	struct kvm_nested_guest *freelist = NULL;
+	struct kvm_memory_slot *memslot;
+	int srcu_idx, bkt;
+
+	spin_lock(&kvm->mmu_lock);
+	idr_for_each_entry(&kvm->arch.kvm_nested_guest_idr, gp, lpid) {
+		__remove_nested(kvm, lpid);
+		if (--gp->refcnt == 0) {
+			gp->next = freelist;
+			freelist = gp;
+		}
+	}
+	idr_destroy(&kvm->arch.kvm_nested_guest_idr);
+	/* idr is empty and may be reused at this point */
+	spin_unlock(&kvm->mmu_lock);
+	while ((gp = freelist) != NULL) {
+		freelist = gp->next;
+		kvmhv_release_nested(gp);
+	}
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	kvm_for_each_memslot(memslot, bkt, kvm_memslots(kvm))
+		kvmhv_free_memslot_nest_rmap(memslot);
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+}
+
+/* caller must hold gp->tlb_lock */
+static void kvmhv_flush_nested(struct kvm_nested_guest *gp)
+{
+	struct kvm *kvm = gp->l1_host;
+
+	spin_lock(&kvm->mmu_lock);
+	kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, gp->shadow_lpid);
+	spin_unlock(&kvm->mmu_lock);
+	kvmhv_flush_lpid(gp->shadow_lpid);
+	kvmhv_update_ptbl_cache(gp);
+	if (gp->l1_gr_to_hr == 0)
+		kvmhv_remove_nested(gp);
+}
+
+struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
+					  bool create)
+{
+	struct kvm_nested_guest *gp, *newgp;
+
+	if (l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4)))
+		return NULL;
+
+	spin_lock(&kvm->mmu_lock);
+	gp = __find_nested(kvm, l1_lpid);
+	if (gp)
+		++gp->refcnt;
+	spin_unlock(&kvm->mmu_lock);
+
+	if (gp || !create)
+		return gp;
+
+	newgp = kvmhv_alloc_nested(kvm, l1_lpid);
+	if (!newgp)
+		return NULL;
+
+	if (!__prealloc_nested(kvm, l1_lpid)) {
+		kvmhv_release_nested(newgp);
+		return NULL;
+	}
+
+	spin_lock(&kvm->mmu_lock);
+	gp = __find_nested(kvm, l1_lpid);
+	if (!gp) {
+		__add_nested(kvm, l1_lpid, newgp);
+		++newgp->refcnt;
+		gp = newgp;
+		newgp = NULL;
+	}
+	++gp->refcnt;
+	spin_unlock(&kvm->mmu_lock);
+
+	if (newgp)
+		kvmhv_release_nested(newgp);
+
+	return gp;
+}
+
+void kvmhv_put_nested(struct kvm_nested_guest *gp)
+{
+	struct kvm *kvm = gp->l1_host;
+	long ref;
+
+	spin_lock(&kvm->mmu_lock);
+	ref = --gp->refcnt;
+	spin_unlock(&kvm->mmu_lock);
+	if (ref == 0)
+		kvmhv_release_nested(gp);
+}
+
+pte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid,
+				 unsigned long ea, unsigned *hshift)
+{
+	struct kvm_nested_guest *gp;
+	pte_t *pte;
+
+	gp = __find_nested(kvm, lpid);
+	if (!gp)
+		return NULL;
+
+	VM_WARN(!spin_is_locked(&kvm->mmu_lock),
+		"%s called with kvm mmu_lock not held \n", __func__);
+	pte = __find_linux_pte(gp->shadow_pgtable, ea, NULL, hshift);
+
+	return pte;
+}
+
+static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2)
+{
+	return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK |
+				       RMAP_NESTED_GPA_MASK));
+}
+
+void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
+			    struct rmap_nested **n_rmap)
+{
+	struct llist_node *entry = ((struct llist_head *) rmapp)->first;
+	struct rmap_nested *cursor;
+	u64 rmap, new_rmap = (*n_rmap)->rmap;
+
+	/* Are there any existing entries? */
+	if (!(*rmapp)) {
+		/* No -> use the rmap as a single entry */
+		*rmapp = new_rmap | RMAP_NESTED_IS_SINGLE_ENTRY;
+		return;
+	}
+
+	/* Do any entries match what we're trying to insert? */
+	for_each_nest_rmap_safe(cursor, entry, &rmap) {
+		if (kvmhv_n_rmap_is_equal(rmap, new_rmap))
+			return;
+	}
+
+	/* Do we need to create a list or just add the new entry? */
+	rmap = *rmapp;
+	if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
+		*rmapp = 0UL;
+	llist_add(&((*n_rmap)->list), (struct llist_head *) rmapp);
+	if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
+		(*n_rmap)->list.next = (struct llist_node *) rmap;
+
+	/* Set NULL so not freed by caller */
+	*n_rmap = NULL;
+}
+
+static void kvmhv_update_nest_rmap_rc(struct kvm *kvm, u64 n_rmap,
+				      unsigned long clr, unsigned long set,
+				      unsigned long hpa, unsigned long mask)
+{
+	unsigned long gpa;
+	unsigned int shift, lpid;
+	pte_t *ptep;
+
+	gpa = n_rmap & RMAP_NESTED_GPA_MASK;
+	lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
+
+	/* Find the pte */
+	ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift);
+	/*
+	 * If the pte is present and the pfn is still the same, update the pte.
+	 * If the pfn has changed then this is a stale rmap entry, the nested
+	 * gpa actually points somewhere else now, and there is nothing to do.
+	 * XXX A future optimisation would be to remove the rmap entry here.
+	 */
+	if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) {
+		__radix_pte_update(ptep, clr, set);
+		kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid);
+	}
+}
+
+/*
+ * For a given list of rmap entries, update the rc bits in all ptes in shadow
+ * page tables for nested guests which are referenced by the rmap list.
+ */
+void kvmhv_update_nest_rmap_rc_list(struct kvm *kvm, unsigned long *rmapp,
+				    unsigned long clr, unsigned long set,
+				    unsigned long hpa, unsigned long nbytes)
+{
+	struct llist_node *entry = ((struct llist_head *) rmapp)->first;
+	struct rmap_nested *cursor;
+	unsigned long rmap, mask;
+
+	if ((clr | set) & ~(_PAGE_DIRTY | _PAGE_ACCESSED))
+		return;
+
+	mask = PTE_RPN_MASK & ~(nbytes - 1);
+	hpa &= mask;
+
+	for_each_nest_rmap_safe(cursor, entry, &rmap)
+		kvmhv_update_nest_rmap_rc(kvm, rmap, clr, set, hpa, mask);
+}
+
+static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap,
+				   unsigned long hpa, unsigned long mask)
+{
+	struct kvm_nested_guest *gp;
+	unsigned long gpa;
+	unsigned int shift, lpid;
+	pte_t *ptep;
+
+	gpa = n_rmap & RMAP_NESTED_GPA_MASK;
+	lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
+	gp = __find_nested(kvm, lpid);
+	if (!gp)
+		return;
+
+	/* Find and invalidate the pte */
+	ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift);
+	/* Don't spuriously invalidate ptes if the pfn has changed */
+	if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa))
+		kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
+}
+
+static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp,
+					unsigned long hpa, unsigned long mask)
+{
+	struct llist_node *entry = llist_del_all((struct llist_head *) rmapp);
+	struct rmap_nested *cursor;
+	unsigned long rmap;
+
+	for_each_nest_rmap_safe(cursor, entry, &rmap) {
+		kvmhv_remove_nest_rmap(kvm, rmap, hpa, mask);
+		kfree(cursor);
+	}
+}
+
+/* called with kvm->mmu_lock held */
+void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
+				  const struct kvm_memory_slot *memslot,
+				  unsigned long gpa, unsigned long hpa,
+				  unsigned long nbytes)
+{
+	unsigned long gfn, end_gfn;
+	unsigned long addr_mask;
+
+	if (!memslot)
+		return;
+	gfn = (gpa >> PAGE_SHIFT) - memslot->base_gfn;
+	end_gfn = gfn + (nbytes >> PAGE_SHIFT);
+
+	addr_mask = PTE_RPN_MASK & ~(nbytes - 1);
+	hpa &= addr_mask;
+
+	for (; gfn < end_gfn; gfn++) {
+		unsigned long *rmap = &memslot->arch.rmap[gfn];
+		kvmhv_remove_nest_rmap_list(kvm, rmap, hpa, addr_mask);
+	}
+}
+
+static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free)
+{
+	unsigned long page;
+
+	for (page = 0; page < free->npages; page++) {
+		unsigned long rmap, *rmapp = &free->arch.rmap[page];
+		struct rmap_nested *cursor;
+		struct llist_node *entry;
+
+		entry = llist_del_all((struct llist_head *) rmapp);
+		for_each_nest_rmap_safe(cursor, entry, &rmap)
+			kfree(cursor);
+	}
+}
+
+static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu,
+					struct kvm_nested_guest *gp,
+					long gpa, int *shift_ret)
+{
+	struct kvm *kvm = vcpu->kvm;
+	bool ret = false;
+	pte_t *ptep;
+	int shift;
+
+	spin_lock(&kvm->mmu_lock);
+	ptep = find_kvm_nested_guest_pte(kvm, gp->l1_lpid, gpa, &shift);
+	if (!shift)
+		shift = PAGE_SHIFT;
+	if (ptep && pte_present(*ptep)) {
+		kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
+		ret = true;
+	}
+	spin_unlock(&kvm->mmu_lock);
+
+	if (shift_ret)
+		*shift_ret = shift;
+	return ret;
+}
+
+static inline int get_ric(unsigned int instr)
+{
+	return (instr >> 18) & 0x3;
+}
+
+static inline int get_prs(unsigned int instr)
+{
+	return (instr >> 17) & 0x1;
+}
+
+static inline int get_r(unsigned int instr)
+{
+	return (instr >> 16) & 0x1;
+}
+
+static inline int get_lpid(unsigned long r_val)
+{
+	return r_val & 0xffffffff;
+}
+
+static inline int get_is(unsigned long r_val)
+{
+	return (r_val >> 10) & 0x3;
+}
+
+static inline int get_ap(unsigned long r_val)
+{
+	return (r_val >> 5) & 0x7;
+}
+
+static inline long get_epn(unsigned long r_val)
+{
+	return r_val >> 12;
+}
+
+static int kvmhv_emulate_tlbie_tlb_addr(struct kvm_vcpu *vcpu, int lpid,
+					int ap, long epn)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *gp;
+	long npages;
+	int shift, shadow_shift;
+	unsigned long addr;
+
+	shift = ap_to_shift(ap);
+	addr = epn << 12;
+	if (shift < 0)
+		/* Invalid ap encoding */
+		return -EINVAL;
+
+	addr &= ~((1UL << shift) - 1);
+	npages = 1UL << (shift - PAGE_SHIFT);
+
+	gp = kvmhv_get_nested(kvm, lpid, false);
+	if (!gp) /* No such guest -> nothing to do */
+		return 0;
+	mutex_lock(&gp->tlb_lock);
+
+	/* There may be more than one host page backing this single guest pte */
+	do {
+		kvmhv_invalidate_shadow_pte(vcpu, gp, addr, &shadow_shift);
+
+		npages -= 1UL << (shadow_shift - PAGE_SHIFT);
+		addr += 1UL << shadow_shift;
+	} while (npages > 0);
+
+	mutex_unlock(&gp->tlb_lock);
+	kvmhv_put_nested(gp);
+	return 0;
+}
+
+static void kvmhv_emulate_tlbie_lpid(struct kvm_vcpu *vcpu,
+				     struct kvm_nested_guest *gp, int ric)
+{
+	struct kvm *kvm = vcpu->kvm;
+
+	mutex_lock(&gp->tlb_lock);
+	switch (ric) {
+	case 0:
+		/* Invalidate TLB */
+		spin_lock(&kvm->mmu_lock);
+		kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
+					  gp->shadow_lpid);
+		kvmhv_flush_lpid(gp->shadow_lpid);
+		spin_unlock(&kvm->mmu_lock);
+		break;
+	case 1:
+		/*
+		 * Invalidate PWC
+		 * We don't cache this -> nothing to do
+		 */
+		break;
+	case 2:
+		/* Invalidate TLB, PWC and caching of partition table entries */
+		kvmhv_flush_nested(gp);
+		break;
+	default:
+		break;
+	}
+	mutex_unlock(&gp->tlb_lock);
+}
+
+static void kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *gp;
+	int lpid;
+
+	spin_lock(&kvm->mmu_lock);
+	idr_for_each_entry(&kvm->arch.kvm_nested_guest_idr, gp, lpid) {
+		spin_unlock(&kvm->mmu_lock);
+		kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
+		spin_lock(&kvm->mmu_lock);
+	}
+	spin_unlock(&kvm->mmu_lock);
+}
+
+static int kvmhv_emulate_priv_tlbie(struct kvm_vcpu *vcpu, unsigned int instr,
+				    unsigned long rsval, unsigned long rbval)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *gp;
+	int r, ric, prs, is, ap;
+	int lpid;
+	long epn;
+	int ret = 0;
+
+	ric = get_ric(instr);
+	prs = get_prs(instr);
+	r = get_r(instr);
+	lpid = get_lpid(rsval);
+	is = get_is(rbval);
+
+	/*
+	 * These cases are invalid and are not handled:
+	 * r   != 1 -> Only radix supported
+	 * prs == 1 -> Not HV privileged
+	 * ric == 3 -> No cluster bombs for radix
+	 * is  == 1 -> Partition scoped translations not associated with pid
+	 * (!is) && (ric == 1 || ric == 2) -> Not supported by ISA
+	 */
+	if ((!r) || (prs) || (ric == 3) || (is == 1) ||
+	    ((!is) && (ric == 1 || ric == 2)))
+		return -EINVAL;
+
+	switch (is) {
+	case 0:
+		/*
+		 * We know ric == 0
+		 * Invalidate TLB for a given target address
+		 */
+		epn = get_epn(rbval);
+		ap = get_ap(rbval);
+		ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, epn);
+		break;
+	case 2:
+		/* Invalidate matching LPID */
+		gp = kvmhv_get_nested(kvm, lpid, false);
+		if (gp) {
+			kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
+			kvmhv_put_nested(gp);
+		}
+		break;
+	case 3:
+		/* Invalidate ALL LPIDs */
+		kvmhv_emulate_tlbie_all_lpid(vcpu, ric);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+/*
+ * This handles the H_TLB_INVALIDATE hcall.
+ * Parameters are (r4) tlbie instruction code, (r5) rS contents,
+ * (r6) rB contents.
+ */
+long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
+{
+	int ret;
+
+	ret = kvmhv_emulate_priv_tlbie(vcpu, kvmppc_get_gpr(vcpu, 4),
+			kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6));
+	if (ret)
+		return H_PARAMETER;
+	return H_SUCCESS;
+}
+
+static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu,
+					 unsigned long lpid, unsigned long ric)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *gp;
+
+	gp = kvmhv_get_nested(kvm, lpid, false);
+	if (gp) {
+		kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
+		kvmhv_put_nested(gp);
+	}
+	return H_SUCCESS;
+}
+
+/*
+ * Number of pages above which we invalidate the entire LPID rather than
+ * flush individual pages.
+ */
+static unsigned long tlb_range_flush_page_ceiling __read_mostly = 33;
+
+static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu,
+					 unsigned long lpid,
+					 unsigned long pg_sizes,
+					 unsigned long start,
+					 unsigned long end)
+{
+	int ret = H_P4;
+	unsigned long addr, nr_pages;
+	struct mmu_psize_def *def;
+	unsigned long psize, ap, page_size;
+	bool flush_lpid;
+
+	for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+		def = &mmu_psize_defs[psize];
+		if (!(pg_sizes & def->h_rpt_pgsize))
+			continue;
+
+		nr_pages = (end - start) >> def->shift;
+		flush_lpid = nr_pages > tlb_range_flush_page_ceiling;
+		if (flush_lpid)
+			return do_tlb_invalidate_nested_all(vcpu, lpid,
+							RIC_FLUSH_TLB);
+		addr = start;
+		ap = mmu_get_ap(psize);
+		page_size = 1UL << def->shift;
+		do {
+			ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap,
+						   get_epn(addr));
+			if (ret)
+				return H_P4;
+			addr += page_size;
+		} while (addr < end);
+	}
+	return ret;
+}
+
+/*
+ * Performs partition-scoped invalidations for nested guests
+ * as part of H_RPT_INVALIDATE hcall.
+ */
+long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
+			     unsigned long type, unsigned long pg_sizes,
+			     unsigned long start, unsigned long end)
+{
+	/*
+	 * If L2 lpid isn't valid, we need to return H_PARAMETER.
+	 *
+	 * However, nested KVM issues a L2 lpid flush call when creating
+	 * partition table entries for L2. This happens even before the
+	 * corresponding shadow lpid is created in HV which happens in
+	 * H_ENTER_NESTED call. Since we can't differentiate this case from
+	 * the invalid case, we ignore such flush requests and return success.
+	 */
+	if (!__find_nested(vcpu->kvm, lpid))
+		return H_SUCCESS;
+
+	/*
+	 * A flush all request can be handled by a full lpid flush only.
+	 */
+	if ((type & H_RPTI_TYPE_NESTED_ALL) == H_RPTI_TYPE_NESTED_ALL)
+		return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_ALL);
+
+	/*
+	 * We don't need to handle a PWC flush like process table here,
+	 * because intermediate partition scoped table in nested guest doesn't
+	 * really have PWC. Only level we have PWC is in L0 and for nested
+	 * invalidate at L0 we always do kvm_flush_lpid() which does
+	 * radix__flush_all_lpid(). For range invalidate at any level, we
+	 * are not removing the higher level page tables and hence there is
+	 * no PWC invalidate needed.
+	 *
+	 * if (type & H_RPTI_TYPE_PWC) {
+	 *	ret = do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_PWC);
+	 *	if (ret)
+	 *		return H_P4;
+	 * }
+	 */
+
+	if (start == 0 && end == -1)
+		return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_TLB);
+
+	if (type & H_RPTI_TYPE_TLB)
+		return do_tlb_invalidate_nested_tlb(vcpu, lpid, pg_sizes,
+						    start, end);
+	return H_SUCCESS;
+}
+
+/* Used to convert a nested guest real address to a L1 guest real address */
+static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
+				       struct kvm_nested_guest *gp,
+				       unsigned long n_gpa, unsigned long dsisr,
+				       struct kvmppc_pte *gpte_p)
+{
+	u64 fault_addr, flags = dsisr & DSISR_ISSTORE;
+	int ret;
+
+	ret = kvmppc_mmu_walk_radix_tree(vcpu, n_gpa, gpte_p, gp->l1_gr_to_hr,
+					 &fault_addr);
+
+	if (ret) {
+		/* We didn't find a pte */
+		if (ret == -EINVAL) {
+			/* Unsupported mmu config */
+			flags |= DSISR_UNSUPP_MMU;
+		} else if (ret == -ENOENT) {
+			/* No translation found */
+			flags |= DSISR_NOHPTE;
+		} else if (ret == -EFAULT) {
+			/* Couldn't access L1 real address */
+			flags |= DSISR_PRTABLE_FAULT;
+			vcpu->arch.fault_gpa = fault_addr;
+		} else {
+			/* Unknown error */
+			return ret;
+		}
+		goto forward_to_l1;
+	} else {
+		/* We found a pte -> check permissions */
+		if (dsisr & DSISR_ISSTORE) {
+			/* Can we write? */
+			if (!gpte_p->may_write) {
+				flags |= DSISR_PROTFAULT;
+				goto forward_to_l1;
+			}
+		} else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
+			/* Can we execute? */
+			if (!gpte_p->may_execute) {
+				flags |= SRR1_ISI_N_G_OR_CIP;
+				goto forward_to_l1;
+			}
+		} else {
+			/* Can we read? */
+			if (!gpte_p->may_read && !gpte_p->may_write) {
+				flags |= DSISR_PROTFAULT;
+				goto forward_to_l1;
+			}
+		}
+	}
+
+	return 0;
+
+forward_to_l1:
+	vcpu->arch.fault_dsisr = flags;
+	if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
+		vcpu->arch.shregs.msr &= SRR1_MSR_BITS;
+		vcpu->arch.shregs.msr |= flags;
+	}
+	return RESUME_HOST;
+}
+
+static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu,
+				       struct kvm_nested_guest *gp,
+				       unsigned long n_gpa,
+				       struct kvmppc_pte gpte,
+				       unsigned long dsisr)
+{
+	struct kvm *kvm = vcpu->kvm;
+	bool writing = !!(dsisr & DSISR_ISSTORE);
+	u64 pgflags;
+	long ret;
+
+	/* Are the rc bits set in the L1 partition scoped pte? */
+	pgflags = _PAGE_ACCESSED;
+	if (writing)
+		pgflags |= _PAGE_DIRTY;
+	if (pgflags & ~gpte.rc)
+		return RESUME_HOST;
+
+	spin_lock(&kvm->mmu_lock);
+	/* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */
+	ret = kvmppc_hv_handle_set_rc(kvm, false, writing,
+				      gpte.raddr, kvm->arch.lpid);
+	if (!ret) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	/* Set the rc bit in the pte of the shadow_pgtable for the nest guest */
+	ret = kvmppc_hv_handle_set_rc(kvm, true, writing,
+				      n_gpa, gp->l1_lpid);
+	if (!ret)
+		ret = -EINVAL;
+	else
+		ret = 0;
+
+out_unlock:
+	spin_unlock(&kvm->mmu_lock);
+	return ret;
+}
+
+static inline int kvmppc_radix_level_to_shift(int level)
+{
+	switch (level) {
+	case 2:
+		return PUD_SHIFT;
+	case 1:
+		return PMD_SHIFT;
+	default:
+		return PAGE_SHIFT;
+	}
+}
+
+static inline int kvmppc_radix_shift_to_level(int shift)
+{
+	if (shift == PUD_SHIFT)
+		return 2;
+	if (shift == PMD_SHIFT)
+		return 1;
+	if (shift == PAGE_SHIFT)
+		return 0;
+	WARN_ON_ONCE(1);
+	return 0;
+}
+
+/* called with gp->tlb_lock held */
+static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
+					  struct kvm_nested_guest *gp)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_memory_slot *memslot;
+	struct rmap_nested *n_rmap;
+	struct kvmppc_pte gpte;
+	pte_t pte, *pte_p;
+	unsigned long mmu_seq;
+	unsigned long dsisr = vcpu->arch.fault_dsisr;
+	unsigned long ea = vcpu->arch.fault_dar;
+	unsigned long *rmapp;
+	unsigned long n_gpa, gpa, gfn, perm = 0UL;
+	unsigned int shift, l1_shift, level;
+	bool writing = !!(dsisr & DSISR_ISSTORE);
+	bool kvm_ro = false;
+	long int ret;
+
+	if (!gp->l1_gr_to_hr) {
+		kvmhv_update_ptbl_cache(gp);
+		if (!gp->l1_gr_to_hr)
+			return RESUME_HOST;
+	}
+
+	/* Convert the nested guest real address into a L1 guest real address */
+
+	n_gpa = vcpu->arch.fault_gpa & ~0xF000000000000FFFULL;
+	if (!(dsisr & DSISR_PRTABLE_FAULT))
+		n_gpa |= ea & 0xFFF;
+	ret = kvmhv_translate_addr_nested(vcpu, gp, n_gpa, dsisr, &gpte);
+
+	/*
+	 * If the hardware found a translation but we don't now have a usable
+	 * translation in the l1 partition-scoped tree, remove the shadow pte
+	 * and let the guest retry.
+	 */
+	if (ret == RESUME_HOST &&
+	    (dsisr & (DSISR_PROTFAULT | DSISR_BADACCESS | DSISR_NOEXEC_OR_G |
+		      DSISR_BAD_COPYPASTE)))
+		goto inval;
+	if (ret)
+		return ret;
+
+	/* Failed to set the reference/change bits */
+	if (dsisr & DSISR_SET_RC) {
+		ret = kvmhv_handle_nested_set_rc(vcpu, gp, n_gpa, gpte, dsisr);
+		if (ret == RESUME_HOST)
+			return ret;
+		if (ret)
+			goto inval;
+		dsisr &= ~DSISR_SET_RC;
+		if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
+			       DSISR_PROTFAULT)))
+			return RESUME_GUEST;
+	}
+
+	/*
+	 * We took an HISI or HDSI while we were running a nested guest which
+	 * means we have no partition scoped translation for that. This means
+	 * we need to insert a pte for the mapping into our shadow_pgtable.
+	 */
+
+	l1_shift = gpte.page_shift;
+	if (l1_shift < PAGE_SHIFT) {
+		/* We don't support l1 using a page size smaller than our own */
+		pr_err("KVM: L1 guest page shift (%d) less than our own (%d)\n",
+			l1_shift, PAGE_SHIFT);
+		return -EINVAL;
+	}
+	gpa = gpte.raddr;
+	gfn = gpa >> PAGE_SHIFT;
+
+	/* 1. Get the corresponding host memslot */
+
+	memslot = gfn_to_memslot(kvm, gfn);
+	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
+		if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS)) {
+			/* unusual error -> reflect to the guest as a DSI */
+			kvmppc_core_queue_data_storage(vcpu,
+					kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+					ea, dsisr);
+			return RESUME_GUEST;
+		}
+
+		/* passthrough of emulated MMIO case */
+		return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing);
+	}
+	if (memslot->flags & KVM_MEM_READONLY) {
+		if (writing) {
+			/* Give the guest a DSI */
+			kvmppc_core_queue_data_storage(vcpu,
+					kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+					ea, DSISR_ISSTORE | DSISR_PROTFAULT);
+			return RESUME_GUEST;
+		}
+		kvm_ro = true;
+	}
+
+	/* 2. Find the host pte for this L1 guest real address */
+
+	/* Used to check for invalidations in progress */
+	mmu_seq = kvm->mmu_invalidate_seq;
+	smp_rmb();
+
+	/* See if can find translation in our partition scoped tables for L1 */
+	pte = __pte(0);
+	spin_lock(&kvm->mmu_lock);
+	pte_p = find_kvm_secondary_pte(kvm, gpa, &shift);
+	if (!shift)
+		shift = PAGE_SHIFT;
+	if (pte_p)
+		pte = *pte_p;
+	spin_unlock(&kvm->mmu_lock);
+
+	if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) {
+		/* No suitable pte found -> try to insert a mapping */
+		ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot,
+					writing, kvm_ro, &pte, &level);
+		if (ret == -EAGAIN)
+			return RESUME_GUEST;
+		else if (ret)
+			return ret;
+		shift = kvmppc_radix_level_to_shift(level);
+	}
+	/* Align gfn to the start of the page */
+	gfn = (gpa & ~((1UL << shift) - 1)) >> PAGE_SHIFT;
+
+	/* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */
+
+	/* The permissions is the combination of the host and l1 guest ptes */
+	perm |= gpte.may_read ? 0UL : _PAGE_READ;
+	perm |= gpte.may_write ? 0UL : _PAGE_WRITE;
+	perm |= gpte.may_execute ? 0UL : _PAGE_EXEC;
+	/* Only set accessed/dirty (rc) bits if set in host and l1 guest ptes */
+	perm |= (gpte.rc & _PAGE_ACCESSED) ? 0UL : _PAGE_ACCESSED;
+	perm |= ((gpte.rc & _PAGE_DIRTY) && writing) ? 0UL : _PAGE_DIRTY;
+	pte = __pte(pte_val(pte) & ~perm);
+
+	/* What size pte can we insert? */
+	if (shift > l1_shift) {
+		u64 mask;
+		unsigned int actual_shift = PAGE_SHIFT;
+		if (PMD_SHIFT < l1_shift)
+			actual_shift = PMD_SHIFT;
+		mask = (1UL << shift) - (1UL << actual_shift);
+		pte = __pte(pte_val(pte) | (gpa & mask));
+		shift = actual_shift;
+	}
+	level = kvmppc_radix_shift_to_level(shift);
+	n_gpa &= ~((1UL << shift) - 1);
+
+	/* 4. Insert the pte into our shadow_pgtable */
+
+	n_rmap = kzalloc(sizeof(*n_rmap), GFP_KERNEL);
+	if (!n_rmap)
+		return RESUME_GUEST; /* Let the guest try again */
+	n_rmap->rmap = (n_gpa & RMAP_NESTED_GPA_MASK) |
+		(((unsigned long) gp->l1_lpid) << RMAP_NESTED_LPID_SHIFT);
+	rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
+	ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level,
+				mmu_seq, gp->shadow_lpid, rmapp, &n_rmap);
+	kfree(n_rmap);
+	if (ret == -EAGAIN)
+		ret = RESUME_GUEST;	/* Let the guest try again */
+
+	return ret;
+
+ inval:
+	kvmhv_invalidate_shadow_pte(vcpu, gp, n_gpa, NULL);
+	return RESUME_GUEST;
+}
+
+long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu)
+{
+	struct kvm_nested_guest *gp = vcpu->arch.nested;
+	long int ret;
+
+	mutex_lock(&gp->tlb_lock);
+	ret = __kvmhv_nested_page_fault(vcpu, gp);
+	mutex_unlock(&gp->tlb_lock);
+	return ret;
+}
+
+int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid)
+{
+	int ret = lpid + 1;
+
+	spin_lock(&kvm->mmu_lock);
+	if (!idr_get_next(&kvm->arch.kvm_nested_guest_idr, &ret))
+		ret = -1;
+	spin_unlock(&kvm->mmu_lock);
+
+	return ret;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
new file mode 100644
index 0000000000..34f1db2128
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -0,0 +1,930 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <asm/asm-prototypes.h>
+#include <asm/dbell.h>
+#include <asm/ppc-opcode.h>
+
+#include "book3s_hv.h"
+
+static void load_spr_state(struct kvm_vcpu *vcpu,
+				struct p9_host_os_sprs *host_os_sprs)
+{
+	/* TAR is very fast */
+	mtspr(SPRN_TAR, vcpu->arch.tar);
+
+#ifdef CONFIG_ALTIVEC
+	if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
+	    current->thread.vrsave != vcpu->arch.vrsave)
+		mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
+#endif
+
+	if (vcpu->arch.hfscr & HFSCR_EBB) {
+		if (current->thread.ebbhr != vcpu->arch.ebbhr)
+			mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
+		if (current->thread.ebbrr != vcpu->arch.ebbrr)
+			mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
+		if (current->thread.bescr != vcpu->arch.bescr)
+			mtspr(SPRN_BESCR, vcpu->arch.bescr);
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
+			current->thread.tidr != vcpu->arch.tid)
+		mtspr(SPRN_TIDR, vcpu->arch.tid);
+	if (host_os_sprs->iamr != vcpu->arch.iamr)
+		mtspr(SPRN_IAMR, vcpu->arch.iamr);
+	if (host_os_sprs->amr != vcpu->arch.amr)
+		mtspr(SPRN_AMR, vcpu->arch.amr);
+	if (vcpu->arch.uamor != 0)
+		mtspr(SPRN_UAMOR, vcpu->arch.uamor);
+	if (current->thread.fscr != vcpu->arch.fscr)
+		mtspr(SPRN_FSCR, vcpu->arch.fscr);
+	if (current->thread.dscr != vcpu->arch.dscr)
+		mtspr(SPRN_DSCR, vcpu->arch.dscr);
+	if (vcpu->arch.pspb != 0)
+		mtspr(SPRN_PSPB, vcpu->arch.pspb);
+
+	/*
+	 * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI]
+	 * clear (or hstate set appropriately to catch those registers
+	 * being clobbered if we take a MCE or SRESET), so those are done
+	 * later.
+	 */
+
+	if (!(vcpu->arch.ctrl & 1))
+		mtspr(SPRN_CTRLT, 0);
+}
+
+static void store_spr_state(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.tar = mfspr(SPRN_TAR);
+
+#ifdef CONFIG_ALTIVEC
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
+#endif
+
+	if (vcpu->arch.hfscr & HFSCR_EBB) {
+		vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
+		vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
+		vcpu->arch.bescr = mfspr(SPRN_BESCR);
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TIDR))
+		vcpu->arch.tid = mfspr(SPRN_TIDR);
+	vcpu->arch.iamr = mfspr(SPRN_IAMR);
+	vcpu->arch.amr = mfspr(SPRN_AMR);
+	vcpu->arch.uamor = mfspr(SPRN_UAMOR);
+	vcpu->arch.fscr = mfspr(SPRN_FSCR);
+	vcpu->arch.dscr = mfspr(SPRN_DSCR);
+	vcpu->arch.pspb = mfspr(SPRN_PSPB);
+
+	vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
+}
+
+/* Returns true if current MSR and/or guest MSR may have changed */
+bool load_vcpu_state(struct kvm_vcpu *vcpu,
+		     struct p9_host_os_sprs *host_os_sprs)
+{
+	bool ret = false;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (cpu_has_feature(CPU_FTR_TM) ||
+	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+		unsigned long guest_msr = vcpu->arch.shregs.msr;
+		if (MSR_TM_ACTIVE(guest_msr)) {
+			kvmppc_restore_tm_hv(vcpu, guest_msr, true);
+			ret = true;
+		} else if (vcpu->arch.hfscr & HFSCR_TM) {
+			mtspr(SPRN_TEXASR, vcpu->arch.texasr);
+			mtspr(SPRN_TFHAR, vcpu->arch.tfhar);
+			mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
+		}
+	}
+#endif
+
+	load_spr_state(vcpu, host_os_sprs);
+
+	load_fp_state(&vcpu->arch.fp);
+#ifdef CONFIG_ALTIVEC
+	load_vr_state(&vcpu->arch.vr);
+#endif
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(load_vcpu_state);
+
+void store_vcpu_state(struct kvm_vcpu *vcpu)
+{
+	store_spr_state(vcpu);
+
+	store_fp_state(&vcpu->arch.fp);
+#ifdef CONFIG_ALTIVEC
+	store_vr_state(&vcpu->arch.vr);
+#endif
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (cpu_has_feature(CPU_FTR_TM) ||
+	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+		unsigned long guest_msr = vcpu->arch.shregs.msr;
+		if (MSR_TM_ACTIVE(guest_msr)) {
+			kvmppc_save_tm_hv(vcpu, guest_msr, true);
+		} else if (vcpu->arch.hfscr & HFSCR_TM) {
+			vcpu->arch.texasr = mfspr(SPRN_TEXASR);
+			vcpu->arch.tfhar = mfspr(SPRN_TFHAR);
+			vcpu->arch.tfiar = mfspr(SPRN_TFIAR);
+
+			if (!vcpu->arch.nested) {
+				vcpu->arch.load_tm++; /* see load_ebb comment */
+				if (!vcpu->arch.load_tm)
+					vcpu->arch.hfscr &= ~HFSCR_TM;
+			}
+		}
+	}
+#endif
+}
+EXPORT_SYMBOL_GPL(store_vcpu_state);
+
+void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
+{
+	host_os_sprs->iamr = mfspr(SPRN_IAMR);
+	host_os_sprs->amr = mfspr(SPRN_AMR);
+}
+EXPORT_SYMBOL_GPL(save_p9_host_os_sprs);
+
+/* vcpu guest regs must already be saved */
+void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
+			     struct p9_host_os_sprs *host_os_sprs)
+{
+	/*
+	 * current->thread.xxx registers must all be restored to host
+	 * values before a potential context switch, otherwise the context
+	 * switch itself will overwrite current->thread.xxx with the values
+	 * from the guest SPRs.
+	 */
+
+	mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
+
+	if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
+			current->thread.tidr != vcpu->arch.tid)
+		mtspr(SPRN_TIDR, current->thread.tidr);
+	if (host_os_sprs->iamr != vcpu->arch.iamr)
+		mtspr(SPRN_IAMR, host_os_sprs->iamr);
+	if (vcpu->arch.uamor != 0)
+		mtspr(SPRN_UAMOR, 0);
+	if (host_os_sprs->amr != vcpu->arch.amr)
+		mtspr(SPRN_AMR, host_os_sprs->amr);
+	if (current->thread.fscr != vcpu->arch.fscr)
+		mtspr(SPRN_FSCR, current->thread.fscr);
+	if (current->thread.dscr != vcpu->arch.dscr)
+		mtspr(SPRN_DSCR, current->thread.dscr);
+	if (vcpu->arch.pspb != 0)
+		mtspr(SPRN_PSPB, 0);
+
+	/* Save guest CTRL register, set runlatch to 1 */
+	if (!(vcpu->arch.ctrl & 1))
+		mtspr(SPRN_CTRLT, 1);
+
+#ifdef CONFIG_ALTIVEC
+	if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
+	    vcpu->arch.vrsave != current->thread.vrsave)
+		mtspr(SPRN_VRSAVE, current->thread.vrsave);
+#endif
+	if (vcpu->arch.hfscr & HFSCR_EBB) {
+		if (vcpu->arch.bescr != current->thread.bescr)
+			mtspr(SPRN_BESCR, current->thread.bescr);
+		if (vcpu->arch.ebbhr != current->thread.ebbhr)
+			mtspr(SPRN_EBBHR, current->thread.ebbhr);
+		if (vcpu->arch.ebbrr != current->thread.ebbrr)
+			mtspr(SPRN_EBBRR, current->thread.ebbrr);
+
+		if (!vcpu->arch.nested) {
+			/*
+			 * This is like load_fp in context switching, turn off
+			 * the facility after it wraps the u8 to try avoiding
+			 * saving and restoring the registers each partition
+			 * switch.
+			 */
+			vcpu->arch.load_ebb++;
+			if (!vcpu->arch.load_ebb)
+				vcpu->arch.hfscr &= ~HFSCR_EBB;
+		}
+	}
+
+	if (vcpu->arch.tar != current->thread.tar)
+		mtspr(SPRN_TAR, current->thread.tar);
+}
+EXPORT_SYMBOL_GPL(restore_p9_host_os_sprs);
+
+#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING
+void accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	struct kvmhv_tb_accumulator *curr;
+	u64 tb = mftb() - vc->tb_offset_applied;
+	u64 prev_tb;
+	u64 delta;
+	u64 seq;
+
+	curr = vcpu->arch.cur_activity;
+	vcpu->arch.cur_activity = next;
+	prev_tb = vcpu->arch.cur_tb_start;
+	vcpu->arch.cur_tb_start = tb;
+
+	if (!curr)
+		return;
+
+	delta = tb - prev_tb;
+
+	seq = curr->seqcount;
+	curr->seqcount = seq + 1;
+	smp_wmb();
+	curr->tb_total += delta;
+	if (seq == 0 || delta < curr->tb_min)
+		curr->tb_min = delta;
+	if (delta > curr->tb_max)
+		curr->tb_max = delta;
+	smp_wmb();
+	curr->seqcount = seq + 2;
+}
+EXPORT_SYMBOL_GPL(accumulate_time);
+#endif
+
+static inline u64 mfslbv(unsigned int idx)
+{
+	u64 slbev;
+
+	asm volatile("slbmfev  %0,%1" : "=r" (slbev) : "r" (idx));
+
+	return slbev;
+}
+
+static inline u64 mfslbe(unsigned int idx)
+{
+	u64 slbee;
+
+	asm volatile("slbmfee  %0,%1" : "=r" (slbee) : "r" (idx));
+
+	return slbee;
+}
+
+static inline void mtslb(u64 slbee, u64 slbev)
+{
+	asm volatile("slbmte %0,%1" :: "r" (slbev), "r" (slbee));
+}
+
+static inline void clear_slb_entry(unsigned int idx)
+{
+	mtslb(idx, 0);
+}
+
+static inline void slb_clear_invalidate_partition(void)
+{
+	clear_slb_entry(0);
+	asm volatile(PPC_SLBIA(6));
+}
+
+/*
+ * Malicious or buggy radix guests may have inserted SLB entries
+ * (only 0..3 because radix always runs with UPRT=1), so these must
+ * be cleared here to avoid side-channels. slbmte is used rather
+ * than slbia, as it won't clear cached translations.
+ */
+static void radix_clear_slb(void)
+{
+	int i;
+
+	for (i = 0; i < 4; i++)
+		clear_slb_entry(i);
+}
+
+static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
+{
+	struct kvm_nested_guest *nested = vcpu->arch.nested;
+	u32 lpid;
+	u32 pid;
+
+	lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
+	pid = vcpu->arch.pid;
+
+	/*
+	 * Prior memory accesses to host PID Q3 must be completed before we
+	 * start switching, and stores must be drained to avoid not-my-LPAR
+	 * logic (see switch_mmu_to_host).
+	 */
+	asm volatile("hwsync" ::: "memory");
+	isync();
+	mtspr(SPRN_LPID, lpid);
+	mtspr(SPRN_LPCR, lpcr);
+	mtspr(SPRN_PID, pid);
+	/*
+	 * isync not required here because we are HRFID'ing to guest before
+	 * any guest context access, which is context synchronising.
+	 */
+}
+
+static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
+{
+	u32 lpid;
+	u32 pid;
+	int i;
+
+	lpid = kvm->arch.lpid;
+	pid = vcpu->arch.pid;
+
+	/*
+	 * See switch_mmu_to_guest_radix. ptesync should not be required here
+	 * even if the host is in HPT mode because speculative accesses would
+	 * not cause RC updates (we are in real mode).
+	 */
+	asm volatile("hwsync" ::: "memory");
+	isync();
+	mtspr(SPRN_LPID, lpid);
+	mtspr(SPRN_LPCR, lpcr);
+	mtspr(SPRN_PID, pid);
+
+	for (i = 0; i < vcpu->arch.slb_max; i++)
+		mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv);
+	/*
+	 * isync not required here, see switch_mmu_to_guest_radix.
+	 */
+}
+
+static void switch_mmu_to_host(struct kvm *kvm, u32 pid)
+{
+	u32 lpid = kvm->arch.host_lpid;
+	u64 lpcr = kvm->arch.host_lpcr;
+
+	/*
+	 * The guest has exited, so guest MMU context is no longer being
+	 * non-speculatively accessed, but a hwsync is needed before the
+	 * mtLPIDR / mtPIDR switch, in order to ensure all stores are drained,
+	 * so the not-my-LPAR tlbie logic does not overlook them.
+	 */
+	asm volatile("hwsync" ::: "memory");
+	isync();
+	mtspr(SPRN_PID, pid);
+	mtspr(SPRN_LPID, lpid);
+	mtspr(SPRN_LPCR, lpcr);
+	/*
+	 * isync is not required after the switch, because mtmsrd with L=0
+	 * is performed after this switch, which is context synchronising.
+	 */
+
+	if (!radix_enabled())
+		slb_restore_bolted_realmode();
+}
+
+static void save_clear_host_mmu(struct kvm *kvm)
+{
+	if (!radix_enabled()) {
+		/*
+		 * Hash host could save and restore host SLB entries to
+		 * reduce SLB fault overheads of VM exits, but for now the
+		 * existing code clears all entries and restores just the
+		 * bolted ones when switching back to host.
+		 */
+		slb_clear_invalidate_partition();
+	}
+}
+
+static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
+{
+	if (kvm_is_radix(kvm)) {
+		radix_clear_slb();
+	} else {
+		int i;
+		int nr = 0;
+
+		/*
+		 * This must run before switching to host (radix host can't
+		 * access all SLBs).
+		 */
+		for (i = 0; i < vcpu->arch.slb_nr; i++) {
+			u64 slbee, slbev;
+
+			slbee = mfslbe(i);
+			if (slbee & SLB_ESID_V) {
+				slbev = mfslbv(i);
+				vcpu->arch.slb[nr].orige = slbee | i;
+				vcpu->arch.slb[nr].origv = slbev;
+				nr++;
+			}
+		}
+		vcpu->arch.slb_max = nr;
+		slb_clear_invalidate_partition();
+	}
+}
+
+static void flush_guest_tlb(struct kvm *kvm)
+{
+	unsigned long rb, set;
+
+	rb = PPC_BIT(52);	/* IS = 2 */
+	if (kvm_is_radix(kvm)) {
+		/* R=1 PRS=1 RIC=2 */
+		asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+			     : : "r" (rb), "i" (1), "i" (1), "i" (2),
+			       "r" (0) : "memory");
+		for (set = 1; set < kvm->arch.tlb_sets; ++set) {
+			rb += PPC_BIT(51);	/* increment set number */
+			/* R=1 PRS=1 RIC=0 */
+			asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+				     : : "r" (rb), "i" (1), "i" (1), "i" (0),
+				       "r" (0) : "memory");
+		}
+		asm volatile("ptesync": : :"memory");
+		// POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
+		asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory");
+	} else {
+		for (set = 0; set < kvm->arch.tlb_sets; ++set) {
+			/* R=0 PRS=0 RIC=0 */
+			asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+				     : : "r" (rb), "i" (0), "i" (0), "i" (0),
+				       "r" (0) : "memory");
+			rb += PPC_BIT(51);	/* increment set number */
+		}
+		asm volatile("ptesync": : :"memory");
+		// POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
+		asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
+	}
+}
+
+static void check_need_tlb_flush(struct kvm *kvm, int pcpu,
+				 struct kvm_nested_guest *nested)
+{
+	cpumask_t *need_tlb_flush;
+	bool all_set = true;
+	int i;
+
+	if (nested)
+		need_tlb_flush = &nested->need_tlb_flush;
+	else
+		need_tlb_flush = &kvm->arch.need_tlb_flush;
+
+	if (likely(!cpumask_test_cpu(pcpu, need_tlb_flush)))
+		return;
+
+	/*
+	 * Individual threads can come in here, but the TLB is shared between
+	 * the 4 threads in a core, hence invalidating on one thread
+	 * invalidates for all, so only invalidate the first time (if all bits
+	 * were set.  The others must still execute a ptesync.
+	 *
+	 * If a race occurs and two threads do the TLB flush, that is not a
+	 * problem, just sub-optimal.
+	 */
+	for (i = cpu_first_tlb_thread_sibling(pcpu);
+			i <= cpu_last_tlb_thread_sibling(pcpu);
+			i += cpu_tlb_thread_sibling_step()) {
+		if (!cpumask_test_cpu(i, need_tlb_flush)) {
+			all_set = false;
+			break;
+		}
+	}
+	if (all_set)
+		flush_guest_tlb(kvm);
+	else
+		asm volatile("ptesync" ::: "memory");
+
+	/* Clear the bit after the TLB flush */
+	cpumask_clear_cpu(pcpu, need_tlb_flush);
+}
+
+unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr)
+{
+	unsigned long msr_needed = 0;
+
+	msr &= ~MSR_EE;
+
+	/* MSR bits may have been cleared by context switch so must recheck */
+	if (IS_ENABLED(CONFIG_PPC_FPU))
+		msr_needed |= MSR_FP;
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		msr_needed |= MSR_VEC;
+	if (cpu_has_feature(CPU_FTR_VSX))
+		msr_needed |= MSR_VSX;
+	if ((cpu_has_feature(CPU_FTR_TM) ||
+	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
+			(vcpu->arch.hfscr & HFSCR_TM))
+		msr_needed |= MSR_TM;
+
+	/*
+	 * This could be combined with MSR[RI] clearing, but that expands
+	 * the unrecoverable window. It would be better to cover unrecoverable
+	 * with KVM bad interrupt handling rather than use MSR[RI] at all.
+	 *
+	 * Much more difficult and less worthwhile to combine with IR/DR
+	 * disable.
+	 */
+	if ((msr & msr_needed) != msr_needed) {
+		msr |= msr_needed;
+		__mtmsrd(msr, 0);
+	} else {
+		__hard_irq_disable();
+	}
+	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+	return msr;
+}
+EXPORT_SYMBOL_GPL(kvmppc_msr_hard_disable_set_facilities);
+
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb)
+{
+	struct p9_host_os_sprs host_os_sprs;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *nested = vcpu->arch.nested;
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	s64 hdec, dec;
+	u64 purr, spurr;
+	u64 *exsave;
+	int trap;
+	unsigned long msr;
+	unsigned long host_hfscr;
+	unsigned long host_ciabr;
+	unsigned long host_dawr0;
+	unsigned long host_dawrx0;
+	unsigned long host_psscr;
+	unsigned long host_hpsscr;
+	unsigned long host_pidr;
+	unsigned long host_dawr1;
+	unsigned long host_dawrx1;
+	unsigned long dpdes;
+
+	hdec = time_limit - *tb;
+	if (hdec < 0)
+		return BOOK3S_INTERRUPT_HV_DECREMENTER;
+
+	WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
+	WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME));
+
+	vcpu->arch.ceded = 0;
+
+	/* Save MSR for restore, with EE clear. */
+	msr = mfmsr() & ~MSR_EE;
+
+	host_hfscr = mfspr(SPRN_HFSCR);
+	host_ciabr = mfspr(SPRN_CIABR);
+	host_psscr = mfspr(SPRN_PSSCR_PR);
+	if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+		host_hpsscr = mfspr(SPRN_PSSCR);
+	host_pidr = mfspr(SPRN_PID);
+
+	if (dawr_enabled()) {
+		host_dawr0 = mfspr(SPRN_DAWR0);
+		host_dawrx0 = mfspr(SPRN_DAWRX0);
+		if (cpu_has_feature(CPU_FTR_DAWR1)) {
+			host_dawr1 = mfspr(SPRN_DAWR1);
+			host_dawrx1 = mfspr(SPRN_DAWRX1);
+		}
+	}
+
+	local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
+	local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
+
+	save_p9_host_os_sprs(&host_os_sprs);
+
+	msr = kvmppc_msr_hard_disable_set_facilities(vcpu, msr);
+	if (lazy_irq_pending()) {
+		trap = 0;
+		goto out;
+	}
+
+	if (unlikely(load_vcpu_state(vcpu, &host_os_sprs)))
+		msr = mfmsr(); /* MSR may have been updated */
+
+	if (vc->tb_offset) {
+		u64 new_tb = *tb + vc->tb_offset;
+		mtspr(SPRN_TBU40, new_tb);
+		if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
+			new_tb += 0x1000000;
+			mtspr(SPRN_TBU40, new_tb);
+		}
+		*tb = new_tb;
+		vc->tb_offset_applied = vc->tb_offset;
+	}
+
+	mtspr(SPRN_VTB, vc->vtb);
+	mtspr(SPRN_PURR, vcpu->arch.purr);
+	mtspr(SPRN_SPURR, vcpu->arch.spurr);
+
+	if (vc->pcr)
+		mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
+	if (vcpu->arch.doorbell_request) {
+		vcpu->arch.doorbell_request = 0;
+		mtspr(SPRN_DPDES, 1);
+	}
+
+	if (dawr_enabled()) {
+		if (vcpu->arch.dawr0 != host_dawr0)
+			mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
+		if (vcpu->arch.dawrx0 != host_dawrx0)
+			mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
+		if (cpu_has_feature(CPU_FTR_DAWR1)) {
+			if (vcpu->arch.dawr1 != host_dawr1)
+				mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
+			if (vcpu->arch.dawrx1 != host_dawrx1)
+				mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
+		}
+	}
+	if (vcpu->arch.ciabr != host_ciabr)
+		mtspr(SPRN_CIABR, vcpu->arch.ciabr);
+
+
+	if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+		mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
+		      (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+	} else {
+		if (vcpu->arch.psscr != host_psscr)
+			mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
+	}
+
+	mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
+
+	mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
+	mtspr(SPRN_HSRR1, (vcpu->arch.shregs.msr & ~MSR_HV) | MSR_ME);
+
+	/*
+	 * On POWER9 DD2.1 and below, sometimes on a Hypervisor Data Storage
+	 * Interrupt (HDSI) the HDSISR is not be updated at all.
+	 *
+	 * To work around this we put a canary value into the HDSISR before
+	 * returning to a guest and then check for this canary when we take a
+	 * HDSI. If we find the canary on a HDSI, we know the hardware didn't
+	 * update the HDSISR. In this case we return to the guest to retake the
+	 * HDSI which should correctly update the HDSISR the second time HDSI
+	 * entry.
+	 *
+	 * The "radix prefetch bug" test can be used to test for this bug, as
+	 * it also exists fo DD2.1 and below.
+	 */
+	if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+		mtspr(SPRN_HDSISR, HDSISR_CANARY);
+
+	mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
+	mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
+	mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
+	mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
+
+	/*
+	 * It might be preferable to load_vcpu_state here, in order to get the
+	 * GPR/FP register loads executing in parallel with the previous mtSPR
+	 * instructions, but for now that can't be done because the TM handling
+	 * in load_vcpu_state can change some SPRs and vcpu state (nip, msr).
+	 * But TM could be split out if this would be a significant benefit.
+	 */
+
+	/*
+	 * MSR[RI] does not need to be cleared (and is not, for radix guests
+	 * with no prefetch bug), because in_guest is set. If we take a SRESET
+	 * or MCE with in_guest set but still in HV mode, then
+	 * kvmppc_p9_bad_interrupt handles the interrupt, which effectively
+	 * clears MSR[RI] and doesn't return.
+	 */
+	WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_HV_P9);
+	barrier(); /* Open in_guest critical section */
+
+	/*
+	 * Hash host, hash guest, or radix guest with prefetch bug, all have
+	 * to disable the MMU before switching to guest MMU state.
+	 */
+	if (!radix_enabled() || !kvm_is_radix(kvm) ||
+			cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+		__mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
+
+	save_clear_host_mmu(kvm);
+
+	if (kvm_is_radix(kvm))
+		switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
+	else
+		switch_mmu_to_guest_hpt(kvm, vcpu, lpcr);
+
+	/* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */
+	check_need_tlb_flush(kvm, vc->pcpu, nested);
+
+	/*
+	 * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
+	 * so set guest LPCR (with HDICE) before writing HDEC.
+	 */
+	mtspr(SPRN_HDEC, hdec);
+
+	mtspr(SPRN_DEC, vcpu->arch.dec_expires - *tb);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+tm_return_to_guest:
+#endif
+	mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+	mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
+	mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
+	mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
+
+	switch_pmu_to_guest(vcpu, &host_os_sprs);
+	accumulate_time(vcpu, &vcpu->arch.in_guest);
+
+	kvmppc_p9_enter_guest(vcpu);
+
+	accumulate_time(vcpu, &vcpu->arch.guest_exit);
+	switch_pmu_to_host(vcpu, &host_os_sprs);
+
+	/* XXX: Could get these from r11/12 and paca exsave instead */
+	vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0);
+	vcpu->arch.shregs.srr1 = mfspr(SPRN_SRR1);
+	vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
+	vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+
+	/* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */
+	trap = local_paca->kvm_hstate.scratch0 & ~0x2;
+
+	if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK))
+		exsave = local_paca->exgen;
+	else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET)
+		exsave = local_paca->exnmi;
+	else /* trap == 0x200 */
+		exsave = local_paca->exmc;
+
+	vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1;
+	vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2;
+
+	/*
+	 * After reading machine check regs (DAR, DSISR, SRR0/1) and hstate
+	 * scratch (which we need to move into exsave to make re-entrant vs
+	 * SRESET/MCE), register state is protected from reentrancy. However
+	 * timebase, MMU, among other state is still set to guest, so don't
+	 * enable MSR[RI] here. It gets enabled at the end, after in_guest
+	 * is cleared.
+	 *
+	 * It is possible an NMI could come in here, which is why it is
+	 * important to save the above state early so it can be debugged.
+	 */
+
+	vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)];
+	vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)];
+	vcpu->arch.regs.gpr[11] = exsave[EX_R11/sizeof(u64)];
+	vcpu->arch.regs.gpr[12] = exsave[EX_R12/sizeof(u64)];
+	vcpu->arch.regs.gpr[13] = exsave[EX_R13/sizeof(u64)];
+	vcpu->arch.ppr = exsave[EX_PPR/sizeof(u64)];
+	vcpu->arch.cfar = exsave[EX_CFAR/sizeof(u64)];
+	vcpu->arch.regs.ctr = exsave[EX_CTR/sizeof(u64)];
+
+	vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
+
+	if (unlikely(trap == BOOK3S_INTERRUPT_MACHINE_CHECK)) {
+		vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
+		vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
+		kvmppc_realmode_machine_check(vcpu);
+
+	} else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) {
+		kvmppc_p9_realmode_hmi_handler(vcpu);
+
+	} else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) {
+		vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
+
+	} else if (trap == BOOK3S_INTERRUPT_H_DATA_STORAGE) {
+		vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
+		vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
+		vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
+
+	} else if (trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
+		vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
+
+	} else if (trap == BOOK3S_INTERRUPT_H_FAC_UNAVAIL) {
+		vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/*
+	 * Softpatch interrupt for transactional memory emulation cases
+	 * on POWER9 DD2.2.  This is early in the guest exit path - we
+	 * haven't saved registers or done a treclaim yet.
+	 */
+	} else if (trap == BOOK3S_INTERRUPT_HV_SOFTPATCH) {
+		vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
+
+		/*
+		 * The cases we want to handle here are those where the guest
+		 * is in real suspend mode and is trying to transition to
+		 * transactional mode.
+		 */
+		if (!local_paca->kvm_hstate.fake_suspend &&
+				(vcpu->arch.shregs.msr & MSR_TS_S)) {
+			if (kvmhv_p9_tm_emulation_early(vcpu)) {
+				/*
+				 * Go straight back into the guest with the
+				 * new NIP/MSR as set by TM emulation.
+				 */
+				mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
+				mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr);
+				goto tm_return_to_guest;
+			}
+		}
+#endif
+	}
+
+	/* Advance host PURR/SPURR by the amount used by guest */
+	purr = mfspr(SPRN_PURR);
+	spurr = mfspr(SPRN_SPURR);
+	local_paca->kvm_hstate.host_purr += purr - vcpu->arch.purr;
+	local_paca->kvm_hstate.host_spurr += spurr - vcpu->arch.spurr;
+	vcpu->arch.purr = purr;
+	vcpu->arch.spurr = spurr;
+
+	vcpu->arch.ic = mfspr(SPRN_IC);
+	vcpu->arch.pid = mfspr(SPRN_PID);
+	vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
+
+	vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
+	vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
+	vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
+	vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
+
+	dpdes = mfspr(SPRN_DPDES);
+	if (dpdes)
+		vcpu->arch.doorbell_request = 1;
+
+	vc->vtb = mfspr(SPRN_VTB);
+
+	dec = mfspr(SPRN_DEC);
+	if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
+		dec = (s32) dec;
+	*tb = mftb();
+	vcpu->arch.dec_expires = dec + *tb;
+
+	if (vc->tb_offset_applied) {
+		u64 new_tb = *tb - vc->tb_offset_applied;
+		mtspr(SPRN_TBU40, new_tb);
+		if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
+			new_tb += 0x1000000;
+			mtspr(SPRN_TBU40, new_tb);
+		}
+		*tb = new_tb;
+		vc->tb_offset_applied = 0;
+	}
+
+	save_clear_guest_mmu(kvm, vcpu);
+	switch_mmu_to_host(kvm, host_pidr);
+
+	/*
+	 * Enable MSR here in order to have facilities enabled to save
+	 * guest registers. This enables MMU (if we were in realmode), so
+	 * only switch MMU on after the MMU is switched to host, to avoid
+	 * the P9_RADIX_PREFETCH_BUG or hash guest context.
+	 */
+	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+			vcpu->arch.shregs.msr & MSR_TS_MASK)
+		msr |= MSR_TS_S;
+	__mtmsrd(msr, 0);
+
+	store_vcpu_state(vcpu);
+
+	mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr);
+	mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr);
+
+	if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+		/* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
+		mtspr(SPRN_PSSCR, host_hpsscr |
+		      (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+	}
+
+	mtspr(SPRN_HFSCR, host_hfscr);
+	if (vcpu->arch.ciabr != host_ciabr)
+		mtspr(SPRN_CIABR, host_ciabr);
+
+	if (dawr_enabled()) {
+		if (vcpu->arch.dawr0 != host_dawr0)
+			mtspr(SPRN_DAWR0, host_dawr0);
+		if (vcpu->arch.dawrx0 != host_dawrx0)
+			mtspr(SPRN_DAWRX0, host_dawrx0);
+		if (cpu_has_feature(CPU_FTR_DAWR1)) {
+			if (vcpu->arch.dawr1 != host_dawr1)
+				mtspr(SPRN_DAWR1, host_dawr1);
+			if (vcpu->arch.dawrx1 != host_dawrx1)
+				mtspr(SPRN_DAWRX1, host_dawrx1);
+		}
+	}
+
+	if (dpdes)
+		mtspr(SPRN_DPDES, 0);
+	if (vc->pcr)
+		mtspr(SPRN_PCR, PCR_MASK);
+
+	/* HDEC must be at least as large as DEC, so decrementer_max fits */
+	mtspr(SPRN_HDEC, decrementer_max);
+
+	timer_rearm_host_dec(*tb);
+
+	restore_p9_host_os_sprs(vcpu, &host_os_sprs);
+
+	barrier(); /* Close in_guest critical section */
+	WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_NONE);
+	/* Interrupts are recoverable at this point */
+
+	/*
+	 * cp_abort is required if the processor supports local copy-paste
+	 * to clear the copy buffer that was under control of the guest.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		asm volatile(PPC_CP_ABORT);
+
+out:
+	return trap;
+}
+EXPORT_SYMBOL_GPL(kvmhv_vcpu_entry_p9);
diff --git a/arch/powerpc/kvm/book3s_hv_p9_perf.c b/arch/powerpc/kvm/book3s_hv_p9_perf.c
new file mode 100644
index 0000000000..44d24cca3d
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_p9_perf.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <asm/kvm_ppc.h>
+#include <asm/pmc.h>
+
+#include "book3s_hv.h"
+
+static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra)
+{
+	if (!(mmcr0 & MMCR0_FC))
+		goto do_freeze;
+	if (mmcra & MMCRA_SAMPLE_ENABLE)
+		goto do_freeze;
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		if (!(mmcr0 & MMCR0_PMCCEXT))
+			goto do_freeze;
+		if (!(mmcra & MMCRA_BHRB_DISABLE))
+			goto do_freeze;
+	}
+	return;
+
+do_freeze:
+	mmcr0 = MMCR0_FC;
+	mmcra = 0;
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		mmcr0 |= MMCR0_PMCCEXT;
+		mmcra = MMCRA_BHRB_DISABLE;
+	}
+
+	mtspr(SPRN_MMCR0, mmcr0);
+	mtspr(SPRN_MMCRA, mmcra);
+	isync();
+}
+
+void switch_pmu_to_guest(struct kvm_vcpu *vcpu,
+			 struct p9_host_os_sprs *host_os_sprs)
+{
+	struct lppaca *lp;
+	int load_pmu = 1;
+
+	lp = vcpu->arch.vpa.pinned_addr;
+	if (lp)
+		load_pmu = lp->pmcregs_in_use;
+
+	/* Save host */
+	if (ppc_get_pmu_inuse()) {
+		/* POWER9, POWER10 do not implement HPMC or SPMC */
+
+		host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0);
+		host_os_sprs->mmcra = mfspr(SPRN_MMCRA);
+
+		freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra);
+
+		host_os_sprs->pmc1 = mfspr(SPRN_PMC1);
+		host_os_sprs->pmc2 = mfspr(SPRN_PMC2);
+		host_os_sprs->pmc3 = mfspr(SPRN_PMC3);
+		host_os_sprs->pmc4 = mfspr(SPRN_PMC4);
+		host_os_sprs->pmc5 = mfspr(SPRN_PMC5);
+		host_os_sprs->pmc6 = mfspr(SPRN_PMC6);
+		host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1);
+		host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2);
+		host_os_sprs->sdar = mfspr(SPRN_SDAR);
+		host_os_sprs->siar = mfspr(SPRN_SIAR);
+		host_os_sprs->sier1 = mfspr(SPRN_SIER);
+
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3);
+			host_os_sprs->sier2 = mfspr(SPRN_SIER2);
+			host_os_sprs->sier3 = mfspr(SPRN_SIER3);
+		}
+	}
+
+#ifdef CONFIG_PPC_PSERIES
+	/* After saving PMU, before loading guest PMU, flip pmcregs_in_use */
+	if (kvmhv_on_pseries()) {
+		barrier();
+		get_lppaca()->pmcregs_in_use = load_pmu;
+		barrier();
+	}
+#endif
+
+	/*
+	 * Load guest. If the VPA said the PMCs are not in use but the guest
+	 * tried to access them anyway, HFSCR[PM] will be set by the HFAC
+	 * fault so we can make forward progress.
+	 */
+	if (load_pmu || (vcpu->arch.hfscr & HFSCR_PM)) {
+		mtspr(SPRN_PMC1, vcpu->arch.pmc[0]);
+		mtspr(SPRN_PMC2, vcpu->arch.pmc[1]);
+		mtspr(SPRN_PMC3, vcpu->arch.pmc[2]);
+		mtspr(SPRN_PMC4, vcpu->arch.pmc[3]);
+		mtspr(SPRN_PMC5, vcpu->arch.pmc[4]);
+		mtspr(SPRN_PMC6, vcpu->arch.pmc[5]);
+		mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]);
+		mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]);
+		mtspr(SPRN_SDAR, vcpu->arch.sdar);
+		mtspr(SPRN_SIAR, vcpu->arch.siar);
+		mtspr(SPRN_SIER, vcpu->arch.sier[0]);
+
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			mtspr(SPRN_MMCR3, vcpu->arch.mmcr[3]);
+			mtspr(SPRN_SIER2, vcpu->arch.sier[1]);
+			mtspr(SPRN_SIER3, vcpu->arch.sier[2]);
+		}
+
+		/* Set MMCRA then MMCR0 last */
+		mtspr(SPRN_MMCRA, vcpu->arch.mmcra);
+		mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]);
+		/* No isync necessary because we're starting counters */
+
+		if (!vcpu->arch.nested &&
+		    (vcpu->arch.hfscr_permitted & HFSCR_PM))
+			vcpu->arch.hfscr |= HFSCR_PM;
+	}
+}
+EXPORT_SYMBOL_GPL(switch_pmu_to_guest);
+
+void switch_pmu_to_host(struct kvm_vcpu *vcpu,
+			struct p9_host_os_sprs *host_os_sprs)
+{
+	struct lppaca *lp;
+	int save_pmu = 1;
+
+	lp = vcpu->arch.vpa.pinned_addr;
+	if (lp)
+		save_pmu = lp->pmcregs_in_use;
+	if (IS_ENABLED(CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND)) {
+		/*
+		 * Save pmu if this guest is capable of running nested guests.
+		 * This is option is for old L1s that do not set their
+		 * lppaca->pmcregs_in_use properly when entering their L2.
+		 */
+		save_pmu |= nesting_enabled(vcpu->kvm);
+	}
+
+	if (save_pmu) {
+		vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0);
+		vcpu->arch.mmcra = mfspr(SPRN_MMCRA);
+
+		freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra);
+
+		vcpu->arch.pmc[0] = mfspr(SPRN_PMC1);
+		vcpu->arch.pmc[1] = mfspr(SPRN_PMC2);
+		vcpu->arch.pmc[2] = mfspr(SPRN_PMC3);
+		vcpu->arch.pmc[3] = mfspr(SPRN_PMC4);
+		vcpu->arch.pmc[4] = mfspr(SPRN_PMC5);
+		vcpu->arch.pmc[5] = mfspr(SPRN_PMC6);
+		vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1);
+		vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2);
+		vcpu->arch.sdar = mfspr(SPRN_SDAR);
+		vcpu->arch.siar = mfspr(SPRN_SIAR);
+		vcpu->arch.sier[0] = mfspr(SPRN_SIER);
+
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3);
+			vcpu->arch.sier[1] = mfspr(SPRN_SIER2);
+			vcpu->arch.sier[2] = mfspr(SPRN_SIER3);
+		}
+
+	} else if (vcpu->arch.hfscr & HFSCR_PM) {
+		/*
+		 * The guest accessed PMC SPRs without specifying they should
+		 * be preserved, or it cleared pmcregs_in_use after the last
+		 * access. Just ensure they are frozen.
+		 */
+		freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA));
+
+		/*
+		 * Demand-fault PMU register access in the guest.
+		 *
+		 * This is used to grab the guest's VPA pmcregs_in_use value
+		 * and reflect it into the host's VPA in the case of a nested
+		 * hypervisor.
+		 *
+		 * It also avoids having to zero-out SPRs after each guest
+		 * exit to avoid side-channels when.
+		 *
+		 * This is cleared here when we exit the guest, so later HFSCR
+		 * interrupt handling can add it back to run the guest with
+		 * PM enabled next time.
+		 */
+		if (!vcpu->arch.nested)
+			vcpu->arch.hfscr &= ~HFSCR_PM;
+	} /* otherwise the PMU should still be frozen */
+
+#ifdef CONFIG_PPC_PSERIES
+	if (kvmhv_on_pseries()) {
+		barrier();
+		get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
+		barrier();
+	}
+#endif
+
+	if (ppc_get_pmu_inuse()) {
+		mtspr(SPRN_PMC1, host_os_sprs->pmc1);
+		mtspr(SPRN_PMC2, host_os_sprs->pmc2);
+		mtspr(SPRN_PMC3, host_os_sprs->pmc3);
+		mtspr(SPRN_PMC4, host_os_sprs->pmc4);
+		mtspr(SPRN_PMC5, host_os_sprs->pmc5);
+		mtspr(SPRN_PMC6, host_os_sprs->pmc6);
+		mtspr(SPRN_MMCR1, host_os_sprs->mmcr1);
+		mtspr(SPRN_MMCR2, host_os_sprs->mmcr2);
+		mtspr(SPRN_SDAR, host_os_sprs->sdar);
+		mtspr(SPRN_SIAR, host_os_sprs->siar);
+		mtspr(SPRN_SIER, host_os_sprs->sier1);
+
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			mtspr(SPRN_MMCR3, host_os_sprs->mmcr3);
+			mtspr(SPRN_SIER2, host_os_sprs->sier2);
+			mtspr(SPRN_SIER3, host_os_sprs->sier3);
+		}
+
+		/* Set MMCRA then MMCR0 last */
+		mtspr(SPRN_MMCRA, host_os_sprs->mmcra);
+		mtspr(SPRN_MMCR0, host_os_sprs->mmcr0);
+		isync();
+	}
+}
+EXPORT_SYMBOL_GPL(switch_pmu_to_host);
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
new file mode 100644
index 0000000000..82be6d8751
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -0,0 +1,377 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright 2012 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/kernel.h>
+#include <asm/lppaca.h>
+#include <asm/opal.h>
+#include <asm/mce.h>
+#include <asm/machdep.h>
+#include <asm/cputhreads.h>
+#include <asm/hmi.h>
+#include <asm/kvm_ppc.h>
+
+/* SRR1 bits for machine check on POWER7 */
+#define SRR1_MC_LDSTERR		(1ul << (63-42))
+#define SRR1_MC_IFETCH_SH	(63-45)
+#define SRR1_MC_IFETCH_MASK	0x7
+#define SRR1_MC_IFETCH_SLBPAR		2	/* SLB parity error */
+#define SRR1_MC_IFETCH_SLBMULTI		3	/* SLB multi-hit */
+#define SRR1_MC_IFETCH_SLBPARMULTI	4	/* SLB parity + multi-hit */
+#define SRR1_MC_IFETCH_TLBMULTI		5	/* I-TLB multi-hit */
+
+/* DSISR bits for machine check on POWER7 */
+#define DSISR_MC_DERAT_MULTI	0x800		/* D-ERAT multi-hit */
+#define DSISR_MC_TLB_MULTI	0x400		/* D-TLB multi-hit */
+#define DSISR_MC_SLB_PARITY	0x100		/* SLB parity error */
+#define DSISR_MC_SLB_MULTI	0x080		/* SLB multi-hit */
+#define DSISR_MC_SLB_PARMULTI	0x040		/* SLB parity + multi-hit */
+
+/* POWER7 SLB flush and reload */
+static void reload_slb(struct kvm_vcpu *vcpu)
+{
+	struct slb_shadow *slb;
+	unsigned long i, n;
+
+	/* First clear out SLB */
+	asm volatile("slbmte %0,%0; slbia" : : "r" (0));
+
+	/* Do they have an SLB shadow buffer registered? */
+	slb = vcpu->arch.slb_shadow.pinned_addr;
+	if (!slb)
+		return;
+
+	/* Sanity check */
+	n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
+	if ((void *) &slb->save_area[n] > vcpu->arch.slb_shadow.pinned_end)
+		return;
+
+	/* Load up the SLB from that */
+	for (i = 0; i < n; ++i) {
+		unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
+		unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
+
+		rb = (rb & ~0xFFFul) | i;	/* insert entry number */
+		asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
+	}
+}
+
+/*
+ * On POWER7, see if we can handle a machine check that occurred inside
+ * the guest in real mode, without switching to the host partition.
+ */
+static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
+{
+	unsigned long srr1 = vcpu->arch.shregs.msr;
+	long handled = 1;
+
+	if (srr1 & SRR1_MC_LDSTERR) {
+		/* error on load/store */
+		unsigned long dsisr = vcpu->arch.shregs.dsisr;
+
+		if (dsisr & (DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |
+			     DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI)) {
+			/* flush and reload SLB; flushes D-ERAT too */
+			reload_slb(vcpu);
+			dsisr &= ~(DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |
+				   DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);
+		}
+		if (dsisr & DSISR_MC_TLB_MULTI) {
+			tlbiel_all_lpid(vcpu->kvm->arch.radix);
+			dsisr &= ~DSISR_MC_TLB_MULTI;
+		}
+		/* Any other errors we don't understand? */
+		if (dsisr & 0xffffffffUL)
+			handled = 0;
+	}
+
+	switch ((srr1 >> SRR1_MC_IFETCH_SH) & SRR1_MC_IFETCH_MASK) {
+	case 0:
+		break;
+	case SRR1_MC_IFETCH_SLBPAR:
+	case SRR1_MC_IFETCH_SLBMULTI:
+	case SRR1_MC_IFETCH_SLBPARMULTI:
+		reload_slb(vcpu);
+		break;
+	case SRR1_MC_IFETCH_TLBMULTI:
+		tlbiel_all_lpid(vcpu->kvm->arch.radix);
+		break;
+	default:
+		handled = 0;
+	}
+
+	return handled;
+}
+
+void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
+{
+	struct machine_check_event mce_evt;
+	long handled;
+
+	if (vcpu->kvm->arch.fwnmi_enabled) {
+		/* FWNMI guests handle their own recovery */
+		handled = 0;
+	} else {
+		handled = kvmppc_realmode_mc_power7(vcpu);
+	}
+
+	/*
+	 * Now get the event and stash it in the vcpu struct so it can
+	 * be handled by the primary thread in virtual mode.  We can't
+	 * call machine_check_queue_event() here if we are running on
+	 * an offline secondary thread.
+	 */
+	if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
+		if (handled && mce_evt.version == MCE_V1)
+			mce_evt.disposition = MCE_DISPOSITION_RECOVERED;
+	} else {
+		memset(&mce_evt, 0, sizeof(mce_evt));
+	}
+
+	vcpu->arch.mce_evt = mce_evt;
+}
+
+
+long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	long ret = 0;
+
+	/*
+	 * Unapply and clear the offset first. That way, if the TB was not
+	 * resynced then it will remain in host-offset, and if it was resynced
+	 * then it is brought into host-offset. Then the tb offset is
+	 * re-applied before continuing with the KVM exit.
+	 *
+	 * This way, we don't need to actually know whether not OPAL resynced
+	 * the timebase or do any of the complicated dance that the P7/8
+	 * path requires.
+	 */
+	if (vc->tb_offset_applied) {
+		u64 new_tb = mftb() - vc->tb_offset_applied;
+		mtspr(SPRN_TBU40, new_tb);
+		if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
+			new_tb += 0x1000000;
+			mtspr(SPRN_TBU40, new_tb);
+		}
+		vc->tb_offset_applied = 0;
+	}
+
+	local_paca->hmi_irqs++;
+
+	if (hmi_handle_debugtrig(NULL) >= 0) {
+		ret = 1;
+		goto out;
+	}
+
+	if (ppc_md.hmi_exception_early)
+		ppc_md.hmi_exception_early(NULL);
+
+out:
+	if (vc->tb_offset) {
+		u64 new_tb = mftb() + vc->tb_offset;
+		mtspr(SPRN_TBU40, new_tb);
+		if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
+			new_tb += 0x1000000;
+			mtspr(SPRN_TBU40, new_tb);
+		}
+		vc->tb_offset_applied = vc->tb_offset;
+	}
+
+	return ret;
+}
+
+/*
+ * The following subcore HMI handling is all only for pre-POWER9 CPUs.
+ */
+
+/* Check if dynamic split is in force and return subcore size accordingly. */
+static inline int kvmppc_cur_subcore_size(void)
+{
+	if (local_paca->kvm_hstate.kvm_split_mode)
+		return local_paca->kvm_hstate.kvm_split_mode->subcore_size;
+
+	return threads_per_subcore;
+}
+
+void kvmppc_subcore_enter_guest(void)
+{
+	int thread_id, subcore_id;
+
+	thread_id = cpu_thread_in_core(local_paca->paca_index);
+	subcore_id = thread_id / kvmppc_cur_subcore_size();
+
+	local_paca->sibling_subcore_state->in_guest[subcore_id] = 1;
+}
+EXPORT_SYMBOL_GPL(kvmppc_subcore_enter_guest);
+
+void kvmppc_subcore_exit_guest(void)
+{
+	int thread_id, subcore_id;
+
+	thread_id = cpu_thread_in_core(local_paca->paca_index);
+	subcore_id = thread_id / kvmppc_cur_subcore_size();
+
+	local_paca->sibling_subcore_state->in_guest[subcore_id] = 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_subcore_exit_guest);
+
+static bool kvmppc_tb_resync_required(void)
+{
+	if (test_and_set_bit(CORE_TB_RESYNC_REQ_BIT,
+				&local_paca->sibling_subcore_state->flags))
+		return false;
+
+	return true;
+}
+
+static void kvmppc_tb_resync_done(void)
+{
+	clear_bit(CORE_TB_RESYNC_REQ_BIT,
+			&local_paca->sibling_subcore_state->flags);
+}
+
+/*
+ * kvmppc_realmode_hmi_handler() is called only by primary thread during
+ * guest exit path.
+ *
+ * There are multiple reasons why HMI could occur, one of them is
+ * Timebase (TB) error. If this HMI is due to TB error, then TB would
+ * have been in stopped state. The opal hmi handler Will fix it and
+ * restore the TB value with host timebase value. For HMI caused due
+ * to non-TB errors, opal hmi handler will not touch/restore TB register
+ * and hence there won't be any change in TB value.
+ *
+ * Since we are not sure about the cause of this HMI, we can't be sure
+ * about the content of TB register whether it holds guest or host timebase
+ * value. Hence the idea is to resync the TB on every HMI, so that we
+ * know about the exact state of the TB value. Resync TB call will
+ * restore TB to host timebase.
+ *
+ * Things to consider:
+ * - On TB error, HMI interrupt is reported on all the threads of the core
+ *   that has encountered TB error irrespective of split-core mode.
+ * - The very first thread on the core that get chance to fix TB error
+ *   would rsync the TB with local chipTOD value.
+ * - The resync TB is a core level action i.e. it will sync all the TBs
+ *   in that core independent of split-core mode. This means if we trigger
+ *   TB sync from a thread from one subcore, it would affect TB values of
+ *   sibling subcores of the same core.
+ *
+ * All threads need to co-ordinate before making opal hmi handler.
+ * All threads will use sibling_subcore_state->in_guest[] (shared by all
+ * threads in the core) in paca which holds information about whether
+ * sibling subcores are in Guest mode or host mode. The in_guest[] array
+ * is of size MAX_SUBCORE_PER_CORE=4, indexed using subcore id to set/unset
+ * subcore status. Only primary threads from each subcore is responsible
+ * to set/unset its designated array element while entering/exiting the
+ * guset.
+ *
+ * After invoking opal hmi handler call, one of the thread (of entire core)
+ * will need to resync the TB. Bit 63 from subcore state bitmap flags
+ * (sibling_subcore_state->flags) will be used to co-ordinate between
+ * primary threads to decide who takes up the responsibility.
+ *
+ * This is what we do:
+ * - Primary thread from each subcore tries to set resync required bit[63]
+ *   of paca->sibling_subcore_state->flags.
+ * - The first primary thread that is able to set the flag takes the
+ *   responsibility of TB resync. (Let us call it as thread leader)
+ * - All other threads which are in host will call
+ *   wait_for_subcore_guest_exit() and wait for in_guest[0-3] from
+ *   paca->sibling_subcore_state to get cleared.
+ * - All the primary thread will clear its subcore status from subcore
+ *   state in_guest[] array respectively.
+ * - Once all primary threads clear in_guest[0-3], all of them will invoke
+ *   opal hmi handler.
+ * - Now all threads will wait for TB resync to complete by invoking
+ *   wait_for_tb_resync() except the thread leader.
+ * - Thread leader will do a TB resync by invoking opal_resync_timebase()
+ *   call and the it will clear the resync required bit.
+ * - All other threads will now come out of resync wait loop and proceed
+ *   with individual execution.
+ * - On return of this function, primary thread will signal all
+ *   secondary threads to proceed.
+ * - All secondary threads will eventually call opal hmi handler on
+ *   their exit path.
+ *
+ * Returns 1 if the timebase offset should be applied, 0 if not.
+ */
+
+long kvmppc_realmode_hmi_handler(void)
+{
+	bool resync_req;
+
+	local_paca->hmi_irqs++;
+
+	if (hmi_handle_debugtrig(NULL) >= 0)
+		return 1;
+
+	/*
+	 * By now primary thread has already completed guest->host
+	 * partition switch but haven't signaled secondaries yet.
+	 * All the secondary threads on this subcore is waiting
+	 * for primary thread to signal them to go ahead.
+	 *
+	 * For threads from subcore which isn't in guest, they all will
+	 * wait until all other subcores on this core exit the guest.
+	 *
+	 * Now set the resync required bit. If you are the first to
+	 * set this bit then kvmppc_tb_resync_required() function will
+	 * return true. For rest all other subcores
+	 * kvmppc_tb_resync_required() will return false.
+	 *
+	 * If resync_req == true, then this thread is responsible to
+	 * initiate TB resync after hmi handler has completed.
+	 * All other threads on this core will wait until this thread
+	 * clears the resync required bit flag.
+	 */
+	resync_req = kvmppc_tb_resync_required();
+
+	/* Reset the subcore status to indicate it has exited guest */
+	kvmppc_subcore_exit_guest();
+
+	/*
+	 * Wait for other subcores on this core to exit the guest.
+	 * All the primary threads and threads from subcore that are
+	 * not in guest will wait here until all subcores are out
+	 * of guest context.
+	 */
+	wait_for_subcore_guest_exit();
+
+	/*
+	 * At this point we are sure that primary threads from each
+	 * subcore on this core have completed guest->host partition
+	 * switch. Now it is safe to call HMI handler.
+	 */
+	if (ppc_md.hmi_exception_early)
+		ppc_md.hmi_exception_early(NULL);
+
+	/*
+	 * Check if this thread is responsible to resync TB.
+	 * All other threads will wait until this thread completes the
+	 * TB resync.
+	 */
+	if (resync_req) {
+		opal_resync_timebase();
+		/* Reset TB resync req bit */
+		kvmppc_tb_resync_done();
+	} else {
+		wait_for_tb_resync();
+	}
+
+	/*
+	 * Reset tb_offset_applied so the guest exit code won't try
+	 * to subtract the previous timebase offset from the timebase.
+	 */
+	if (local_paca->kvm_hstate.kvm_vcore)
+		local_paca->kvm_hstate.kvm_vcore->tb_offset_applied = 0;
+
+	return 0;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
new file mode 100644
index 0000000000..9182324dbe
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -0,0 +1,1300 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/hugetlb.h>
+#include <linux/module.h>
+#include <linux/log2.h>
+#include <linux/sizes.h>
+
+#include <asm/trace.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/book3s/64/mmu-hash.h>
+#include <asm/hvcall.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+#include <asm/pte-walk.h>
+
+/* Translate address of a vmalloc'd thing to a linear map address */
+static void *real_vmalloc_addr(void *addr)
+{
+	return __va(ppc_find_vmap_phys((unsigned long)addr));
+}
+
+/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
+static int global_invalidates(struct kvm *kvm)
+{
+	int global;
+	int cpu;
+
+	/*
+	 * If there is only one vcore, and it's currently running,
+	 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
+	 * we can use tlbiel as long as we mark all other physical
+	 * cores as potentially having stale TLB entries for this lpid.
+	 * Otherwise, don't use tlbiel.
+	 */
+	if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
+		global = 0;
+	else
+		global = 1;
+
+	/* LPID has been switched to host if in virt mode so can't do local */
+	if (!global && (mfmsr() & (MSR_IR|MSR_DR)))
+		global = 1;
+
+	if (!global) {
+		/* any other core might now have stale TLB entries... */
+		smp_wmb();
+		cpumask_setall(&kvm->arch.need_tlb_flush);
+		cpu = local_paca->kvm_hstate.kvm_vcore->pcpu;
+		cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
+	}
+
+	return global;
+}
+
+/*
+ * Add this HPTE into the chain for the real page.
+ * Must be called with the chain locked; it unlocks the chain.
+ */
+void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
+			     unsigned long *rmap, long pte_index, int realmode)
+{
+	struct revmap_entry *head, *tail;
+	unsigned long i;
+
+	if (*rmap & KVMPPC_RMAP_PRESENT) {
+		i = *rmap & KVMPPC_RMAP_INDEX;
+		head = &kvm->arch.hpt.rev[i];
+		if (realmode)
+			head = real_vmalloc_addr(head);
+		tail = &kvm->arch.hpt.rev[head->back];
+		if (realmode)
+			tail = real_vmalloc_addr(tail);
+		rev->forw = i;
+		rev->back = head->back;
+		tail->forw = pte_index;
+		head->back = pte_index;
+	} else {
+		rev->forw = rev->back = pte_index;
+		*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
+			pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT;
+	}
+	unlock_rmap(rmap);
+}
+EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
+
+/* Update the dirty bitmap of a memslot */
+void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot,
+			     unsigned long gfn, unsigned long psize)
+{
+	unsigned long npages;
+
+	if (!psize || !memslot->dirty_bitmap)
+		return;
+	npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE;
+	gfn -= memslot->base_gfn;
+	set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages);
+}
+EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map);
+
+static void kvmppc_set_dirty_from_hpte(struct kvm *kvm,
+				unsigned long hpte_v, unsigned long hpte_gr)
+{
+	struct kvm_memory_slot *memslot;
+	unsigned long gfn;
+	unsigned long psize;
+
+	psize = kvmppc_actual_pgsz(hpte_v, hpte_gr);
+	gfn = hpte_rpn(hpte_gr, psize);
+	memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
+	if (memslot && memslot->dirty_bitmap)
+		kvmppc_update_dirty_map(memslot, gfn, psize);
+}
+
+/* Returns a pointer to the revmap entry for the page mapped by a HPTE */
+static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
+				      unsigned long hpte_gr,
+				      struct kvm_memory_slot **memslotp,
+				      unsigned long *gfnp)
+{
+	struct kvm_memory_slot *memslot;
+	unsigned long *rmap;
+	unsigned long gfn;
+
+	gfn = hpte_rpn(hpte_gr, kvmppc_actual_pgsz(hpte_v, hpte_gr));
+	memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
+	if (memslotp)
+		*memslotp = memslot;
+	if (gfnp)
+		*gfnp = gfn;
+	if (!memslot)
+		return NULL;
+
+	rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
+	return rmap;
+}
+
+/* Remove this HPTE from the chain for a real page */
+static void remove_revmap_chain(struct kvm *kvm, long pte_index,
+				struct revmap_entry *rev,
+				unsigned long hpte_v, unsigned long hpte_r)
+{
+	struct revmap_entry *next, *prev;
+	unsigned long ptel, head;
+	unsigned long *rmap;
+	unsigned long rcbits;
+	struct kvm_memory_slot *memslot;
+	unsigned long gfn;
+
+	rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
+	ptel = rev->guest_rpte |= rcbits;
+	rmap = revmap_for_hpte(kvm, hpte_v, ptel, &memslot, &gfn);
+	if (!rmap)
+		return;
+	lock_rmap(rmap);
+
+	head = *rmap & KVMPPC_RMAP_INDEX;
+	next = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->forw]);
+	prev = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->back]);
+	next->back = rev->back;
+	prev->forw = rev->forw;
+	if (head == pte_index) {
+		head = rev->forw;
+		if (head == pte_index)
+			*rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
+		else
+			*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
+	}
+	*rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+	if (rcbits & HPTE_R_C)
+		kvmppc_update_dirty_map(memslot, gfn,
+					kvmppc_actual_pgsz(hpte_v, hpte_r));
+	unlock_rmap(rmap);
+}
+
+long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
+		       long pte_index, unsigned long pteh, unsigned long ptel,
+		       pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
+{
+	unsigned long i, pa, gpa, gfn, psize;
+	unsigned long slot_fn, hva;
+	__be64 *hpte;
+	struct revmap_entry *rev;
+	unsigned long g_ptel;
+	struct kvm_memory_slot *memslot;
+	unsigned hpage_shift;
+	bool is_ci;
+	unsigned long *rmap;
+	pte_t *ptep;
+	unsigned int writing;
+	unsigned long mmu_seq;
+	unsigned long rcbits;
+
+	if (kvm_is_radix(kvm))
+		return H_FUNCTION;
+	/*
+	 * The HPTE gets used by compute_tlbie_rb() to set TLBIE bits, so
+	 * these functions should work together -- must ensure a guest can not
+	 * cause problems with the TLBIE that KVM executes.
+	 */
+	if ((pteh >> HPTE_V_SSIZE_SHIFT) & 0x2) {
+		/* B=0b1x is a reserved value, disallow it. */
+		return H_PARAMETER;
+	}
+	psize = kvmppc_actual_pgsz(pteh, ptel);
+	if (!psize)
+		return H_PARAMETER;
+	writing = hpte_is_writable(ptel);
+	pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
+	ptel &= ~HPTE_GR_RESERVED;
+	g_ptel = ptel;
+
+	/* used later to detect if we might have been invalidated */
+	mmu_seq = kvm->mmu_invalidate_seq;
+	smp_rmb();
+
+	/* Find the memslot (if any) for this address */
+	gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
+	gfn = gpa >> PAGE_SHIFT;
+	memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
+	pa = 0;
+	is_ci = false;
+	rmap = NULL;
+	if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
+		/* Emulated MMIO - mark this with key=31 */
+		pteh |= HPTE_V_ABSENT;
+		ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
+		goto do_insert;
+	}
+
+	/* Check if the requested page fits entirely in the memslot. */
+	if (!slot_is_aligned(memslot, psize))
+		return H_PARAMETER;
+	slot_fn = gfn - memslot->base_gfn;
+	rmap = &memslot->arch.rmap[slot_fn];
+
+	/* Translate to host virtual address */
+	hva = __gfn_to_hva_memslot(memslot, gfn);
+
+	arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
+	ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &hpage_shift);
+	if (ptep) {
+		pte_t pte;
+		unsigned int host_pte_size;
+
+		if (hpage_shift)
+			host_pte_size = 1ul << hpage_shift;
+		else
+			host_pte_size = PAGE_SIZE;
+		/*
+		 * We should always find the guest page size
+		 * to <= host page size, if host is using hugepage
+		 */
+		if (host_pte_size < psize) {
+			arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
+			return H_PARAMETER;
+		}
+		pte = kvmppc_read_update_linux_pte(ptep, writing);
+		if (pte_present(pte) && !pte_protnone(pte)) {
+			if (writing && !pte_write(pte))
+				/* make the actual HPTE be read-only */
+				ptel = hpte_make_readonly(ptel);
+			is_ci = pte_ci(pte);
+			pa = pte_pfn(pte) << PAGE_SHIFT;
+			pa |= hva & (host_pte_size - 1);
+			pa |= gpa & ~PAGE_MASK;
+		}
+	}
+	arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
+
+	ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1);
+	ptel |= pa;
+
+	if (pa)
+		pteh |= HPTE_V_VALID;
+	else {
+		pteh |= HPTE_V_ABSENT;
+		ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
+	}
+
+	/*If we had host pte mapping then  Check WIMG */
+	if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) {
+		if (is_ci)
+			return H_PARAMETER;
+		/*
+		 * Allow guest to map emulated device memory as
+		 * uncacheable, but actually make it cacheable.
+		 */
+		ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
+		ptel |= HPTE_R_M;
+	}
+
+	/* Find and lock the HPTEG slot to use */
+ do_insert:
+	if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
+		return H_PARAMETER;
+	if (likely((flags & H_EXACT) == 0)) {
+		pte_index &= ~7UL;
+		hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
+		for (i = 0; i < 8; ++i) {
+			if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 &&
+			    try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
+					  HPTE_V_ABSENT))
+				break;
+			hpte += 2;
+		}
+		if (i == 8) {
+			/*
+			 * Since try_lock_hpte doesn't retry (not even stdcx.
+			 * failures), it could be that there is a free slot
+			 * but we transiently failed to lock it.  Try again,
+			 * actually locking each slot and checking it.
+			 */
+			hpte -= 16;
+			for (i = 0; i < 8; ++i) {
+				u64 pte;
+				while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+					cpu_relax();
+				pte = be64_to_cpu(hpte[0]);
+				if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
+					break;
+				__unlock_hpte(hpte, pte);
+				hpte += 2;
+			}
+			if (i == 8)
+				return H_PTEG_FULL;
+		}
+		pte_index += i;
+	} else {
+		hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
+		if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
+				   HPTE_V_ABSENT)) {
+			/* Lock the slot and check again */
+			u64 pte;
+
+			while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+				cpu_relax();
+			pte = be64_to_cpu(hpte[0]);
+			if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
+				__unlock_hpte(hpte, pte);
+				return H_PTEG_FULL;
+			}
+		}
+	}
+
+	/* Save away the guest's idea of the second HPTE dword */
+	rev = &kvm->arch.hpt.rev[pte_index];
+	if (realmode)
+		rev = real_vmalloc_addr(rev);
+	if (rev) {
+		rev->guest_rpte = g_ptel;
+		note_hpte_modification(kvm, rev);
+	}
+
+	/* Link HPTE into reverse-map chain */
+	if (pteh & HPTE_V_VALID) {
+		if (realmode)
+			rmap = real_vmalloc_addr(rmap);
+		lock_rmap(rmap);
+		/* Check for pending invalidations under the rmap chain lock */
+		if (mmu_invalidate_retry(kvm, mmu_seq)) {
+			/* inval in progress, write a non-present HPTE */
+			pteh |= HPTE_V_ABSENT;
+			pteh &= ~HPTE_V_VALID;
+			ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
+			unlock_rmap(rmap);
+		} else {
+			kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
+						realmode);
+			/* Only set R/C in real HPTE if already set in *rmap */
+			rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
+			ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
+		}
+	}
+
+	/* Convert to new format on P9 */
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		ptel = hpte_old_to_new_r(pteh, ptel);
+		pteh = hpte_old_to_new_v(pteh);
+	}
+	hpte[1] = cpu_to_be64(ptel);
+
+	/* Write the first HPTE dword, unlocking the HPTE and making it valid */
+	eieio();
+	__unlock_hpte(hpte, pteh);
+	asm volatile("ptesync" : : : "memory");
+
+	*pte_idx_ret = pte_index;
+	return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
+
+long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+		    long pte_index, unsigned long pteh, unsigned long ptel)
+{
+	return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
+				 vcpu->arch.pgdir, true,
+				 &vcpu->arch.regs.gpr[4]);
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_enter);
+
+#ifdef __BIG_ENDIAN__
+#define LOCK_TOKEN	(*(u32 *)(&get_paca()->lock_token))
+#else
+#define LOCK_TOKEN	(*(u32 *)(&get_paca()->paca_index))
+#endif
+
+static inline int is_mmio_hpte(unsigned long v, unsigned long r)
+{
+	return ((v & HPTE_V_ABSENT) &&
+		(r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
+		(HPTE_R_KEY_HI | HPTE_R_KEY_LO));
+}
+
+static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid)
+{
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+		/* Radix flush for a hash guest */
+
+		unsigned long rb,rs,prs,r,ric;
+
+		rb = PPC_BIT(52); /* IS = 2 */
+		rs = 0;  /* lpid = 0 */
+		prs = 0; /* partition scoped */
+		r = 1;   /* radix format */
+		ric = 0; /* RIC_FLSUH_TLB */
+
+		/*
+		 * Need the extra ptesync to make sure we don't
+		 * re-order the tlbie
+		 */
+		asm volatile("ptesync": : :"memory");
+		asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+			     : : "r"(rb), "i"(r), "i"(prs),
+			       "i"(ric), "r"(rs) : "memory");
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+		asm volatile("ptesync": : :"memory");
+		asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
+			     "r" (rb_value), "r" (lpid));
+	}
+}
+
+static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
+		      long npages, int global, bool need_sync)
+{
+	long i;
+
+	/*
+	 * We use the POWER9 5-operand versions of tlbie and tlbiel here.
+	 * Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores
+	 * the RS field, this is backwards-compatible with P7 and P8.
+	 */
+	if (global) {
+		if (need_sync)
+			asm volatile("ptesync" : : : "memory");
+		for (i = 0; i < npages; ++i) {
+			asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
+				     "r" (rbvalues[i]), "r" (kvm->arch.lpid));
+		}
+
+		fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid);
+		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+	} else {
+		if (need_sync)
+			asm volatile("ptesync" : : : "memory");
+		for (i = 0; i < npages; ++i) {
+			asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : :
+				     "r" (rbvalues[i]), "r" (0));
+		}
+		asm volatile("ptesync" : : : "memory");
+	}
+}
+
+long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
+			unsigned long pte_index, unsigned long avpn,
+			unsigned long *hpret)
+{
+	__be64 *hpte;
+	unsigned long v, r, rb;
+	struct revmap_entry *rev;
+	u64 pte, orig_pte, pte_r;
+
+	if (kvm_is_radix(kvm))
+		return H_FUNCTION;
+	if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
+		return H_PARAMETER;
+	hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
+	while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+		cpu_relax();
+	pte = orig_pte = be64_to_cpu(hpte[0]);
+	pte_r = be64_to_cpu(hpte[1]);
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		pte = hpte_new_to_old_v(pte, pte_r);
+		pte_r = hpte_new_to_old_r(pte_r);
+	}
+	if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
+	    ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
+	    ((flags & H_ANDCOND) && (pte & avpn) != 0)) {
+		__unlock_hpte(hpte, orig_pte);
+		return H_NOT_FOUND;
+	}
+
+	rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
+	v = pte & ~HPTE_V_HVLOCK;
+	if (v & HPTE_V_VALID) {
+		hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
+		rb = compute_tlbie_rb(v, pte_r, pte_index);
+		do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
+		/*
+		 * The reference (R) and change (C) bits in a HPT
+		 * entry can be set by hardware at any time up until
+		 * the HPTE is invalidated and the TLB invalidation
+		 * sequence has completed.  This means that when
+		 * removing a HPTE, we need to re-read the HPTE after
+		 * the invalidation sequence has completed in order to
+		 * obtain reliable values of R and C.
+		 */
+		remove_revmap_chain(kvm, pte_index, rev, v,
+				    be64_to_cpu(hpte[1]));
+	}
+	r = rev->guest_rpte & ~HPTE_GR_RESERVED;
+	note_hpte_modification(kvm, rev);
+	unlock_hpte(hpte, 0);
+
+	if (is_mmio_hpte(v, pte_r))
+		atomic64_inc(&kvm->arch.mmio_update);
+
+	if (v & HPTE_V_ABSENT)
+		v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID;
+	hpret[0] = v;
+	hpret[1] = r;
+	return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_do_h_remove);
+
+long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
+		     unsigned long pte_index, unsigned long avpn)
+{
+	return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
+				  &vcpu->arch.regs.gpr[4]);
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_remove);
+
+long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	unsigned long *args = &vcpu->arch.regs.gpr[4];
+	__be64 *hp, *hptes[4];
+	unsigned long tlbrb[4];
+	long int i, j, k, n, found, indexes[4];
+	unsigned long flags, req, pte_index, rcbits;
+	int global;
+	long int ret = H_SUCCESS;
+	struct revmap_entry *rev, *revs[4];
+	u64 hp0, hp1;
+
+	if (kvm_is_radix(kvm))
+		return H_FUNCTION;
+	global = global_invalidates(kvm);
+	for (i = 0; i < 4 && ret == H_SUCCESS; ) {
+		n = 0;
+		for (; i < 4; ++i) {
+			j = i * 2;
+			pte_index = args[j];
+			flags = pte_index >> 56;
+			pte_index &= ((1ul << 56) - 1);
+			req = flags >> 6;
+			flags &= 3;
+			if (req == 3) {		/* no more requests */
+				i = 4;
+				break;
+			}
+			if (req != 1 || flags == 3 ||
+			    pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) {
+				/* parameter error */
+				args[j] = ((0xa0 | flags) << 56) + pte_index;
+				ret = H_PARAMETER;
+				break;
+			}
+			hp = (__be64 *) (kvm->arch.hpt.virt + (pte_index << 4));
+			/* to avoid deadlock, don't spin except for first */
+			if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
+				if (n)
+					break;
+				while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
+					cpu_relax();
+			}
+			found = 0;
+			hp0 = be64_to_cpu(hp[0]);
+			hp1 = be64_to_cpu(hp[1]);
+			if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+				hp0 = hpte_new_to_old_v(hp0, hp1);
+				hp1 = hpte_new_to_old_r(hp1);
+			}
+			if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) {
+				switch (flags & 3) {
+				case 0:		/* absolute */
+					found = 1;
+					break;
+				case 1:		/* andcond */
+					if (!(hp0 & args[j + 1]))
+						found = 1;
+					break;
+				case 2:		/* AVPN */
+					if ((hp0 & ~0x7fUL) == args[j + 1])
+						found = 1;
+					break;
+				}
+			}
+			if (!found) {
+				hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+				args[j] = ((0x90 | flags) << 56) + pte_index;
+				continue;
+			}
+
+			args[j] = ((0x80 | flags) << 56) + pte_index;
+			rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
+			note_hpte_modification(kvm, rev);
+
+			if (!(hp0 & HPTE_V_VALID)) {
+				/* insert R and C bits from PTE */
+				rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
+				args[j] |= rcbits << (56 - 5);
+				hp[0] = 0;
+				if (is_mmio_hpte(hp0, hp1))
+					atomic64_inc(&kvm->arch.mmio_update);
+				continue;
+			}
+
+			/* leave it locked */
+			hp[0] &= ~cpu_to_be64(HPTE_V_VALID);
+			tlbrb[n] = compute_tlbie_rb(hp0, hp1, pte_index);
+			indexes[n] = j;
+			hptes[n] = hp;
+			revs[n] = rev;
+			++n;
+		}
+
+		if (!n)
+			break;
+
+		/* Now that we've collected a batch, do the tlbies */
+		do_tlbies(kvm, tlbrb, n, global, true);
+
+		/* Read PTE low words after tlbie to get final R/C values */
+		for (k = 0; k < n; ++k) {
+			j = indexes[k];
+			pte_index = args[j] & ((1ul << 56) - 1);
+			hp = hptes[k];
+			rev = revs[k];
+			remove_revmap_chain(kvm, pte_index, rev,
+				be64_to_cpu(hp[0]), be64_to_cpu(hp[1]));
+			rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
+			args[j] |= rcbits << (56 - 5);
+			__unlock_hpte(hp, 0);
+		}
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_bulk_remove);
+
+long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
+		      unsigned long pte_index, unsigned long avpn)
+{
+	struct kvm *kvm = vcpu->kvm;
+	__be64 *hpte;
+	struct revmap_entry *rev;
+	unsigned long v, r, rb, mask, bits;
+	u64 pte_v, pte_r;
+
+	if (kvm_is_radix(kvm))
+		return H_FUNCTION;
+	if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
+		return H_PARAMETER;
+
+	hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
+	while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+		cpu_relax();
+	v = pte_v = be64_to_cpu(hpte[0]);
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		v = hpte_new_to_old_v(v, be64_to_cpu(hpte[1]));
+	if ((v & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
+	    ((flags & H_AVPN) && (v & ~0x7fUL) != avpn)) {
+		__unlock_hpte(hpte, pte_v);
+		return H_NOT_FOUND;
+	}
+
+	pte_r = be64_to_cpu(hpte[1]);
+	bits = (flags << 55) & HPTE_R_PP0;
+	bits |= (flags << 48) & HPTE_R_KEY_HI;
+	bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
+
+	/* Update guest view of 2nd HPTE dword */
+	mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
+		HPTE_R_KEY_HI | HPTE_R_KEY_LO;
+	rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
+	if (rev) {
+		r = (rev->guest_rpte & ~mask) | bits;
+		rev->guest_rpte = r;
+		note_hpte_modification(kvm, rev);
+	}
+
+	/* Update HPTE */
+	if (v & HPTE_V_VALID) {
+		/*
+		 * If the page is valid, don't let it transition from
+		 * readonly to writable.  If it should be writable, we'll
+		 * take a trap and let the page fault code sort it out.
+		 */
+		r = (pte_r & ~mask) | bits;
+		if (hpte_is_writable(r) && !hpte_is_writable(pte_r))
+			r = hpte_make_readonly(r);
+		/* If the PTE is changing, invalidate it first */
+		if (r != pte_r) {
+			rb = compute_tlbie_rb(v, r, pte_index);
+			hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) |
+					      HPTE_V_ABSENT);
+			do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
+			/* Don't lose R/C bit updates done by hardware */
+			r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C);
+			hpte[1] = cpu_to_be64(r);
+		}
+	}
+	unlock_hpte(hpte, pte_v & ~HPTE_V_HVLOCK);
+	asm volatile("ptesync" : : : "memory");
+	if (is_mmio_hpte(v, pte_r))
+		atomic64_inc(&kvm->arch.mmio_update);
+
+	return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_protect);
+
+long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
+		   unsigned long pte_index)
+{
+	struct kvm *kvm = vcpu->kvm;
+	__be64 *hpte;
+	unsigned long v, r;
+	int i, n = 1;
+	struct revmap_entry *rev = NULL;
+
+	if (kvm_is_radix(kvm))
+		return H_FUNCTION;
+	if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
+		return H_PARAMETER;
+	if (flags & H_READ_4) {
+		pte_index &= ~3;
+		n = 4;
+	}
+	rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
+	for (i = 0; i < n; ++i, ++pte_index) {
+		hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
+		v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
+		r = be64_to_cpu(hpte[1]);
+		if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+			v = hpte_new_to_old_v(v, r);
+			r = hpte_new_to_old_r(r);
+		}
+		if (v & HPTE_V_ABSENT) {
+			v &= ~HPTE_V_ABSENT;
+			v |= HPTE_V_VALID;
+		}
+		if (v & HPTE_V_VALID) {
+			r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
+			r &= ~HPTE_GR_RESERVED;
+		}
+		vcpu->arch.regs.gpr[4 + i * 2] = v;
+		vcpu->arch.regs.gpr[5 + i * 2] = r;
+	}
+	return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_read);
+
+long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
+			unsigned long pte_index)
+{
+	struct kvm *kvm = vcpu->kvm;
+	__be64 *hpte;
+	unsigned long v, r, gr;
+	struct revmap_entry *rev;
+	unsigned long *rmap;
+	long ret = H_NOT_FOUND;
+
+	if (kvm_is_radix(kvm))
+		return H_FUNCTION;
+	if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
+		return H_PARAMETER;
+
+	rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
+	hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
+	while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+		cpu_relax();
+	v = be64_to_cpu(hpte[0]);
+	r = be64_to_cpu(hpte[1]);
+	if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
+		goto out;
+
+	gr = rev->guest_rpte;
+	if (rev->guest_rpte & HPTE_R_R) {
+		rev->guest_rpte &= ~HPTE_R_R;
+		note_hpte_modification(kvm, rev);
+	}
+	if (v & HPTE_V_VALID) {
+		gr |= r & (HPTE_R_R | HPTE_R_C);
+		if (r & HPTE_R_R) {
+			kvmppc_clear_ref_hpte(kvm, hpte, pte_index);
+			rmap = revmap_for_hpte(kvm, v, gr, NULL, NULL);
+			if (rmap) {
+				lock_rmap(rmap);
+				*rmap |= KVMPPC_RMAP_REFERENCED;
+				unlock_rmap(rmap);
+			}
+		}
+	}
+	vcpu->arch.regs.gpr[4] = gr;
+	ret = H_SUCCESS;
+ out:
+	unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_clear_ref);
+
+long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
+			unsigned long pte_index)
+{
+	struct kvm *kvm = vcpu->kvm;
+	__be64 *hpte;
+	unsigned long v, r, gr;
+	struct revmap_entry *rev;
+	long ret = H_NOT_FOUND;
+
+	if (kvm_is_radix(kvm))
+		return H_FUNCTION;
+	if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
+		return H_PARAMETER;
+
+	rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
+	hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
+	while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+		cpu_relax();
+	v = be64_to_cpu(hpte[0]);
+	r = be64_to_cpu(hpte[1]);
+	if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
+		goto out;
+
+	gr = rev->guest_rpte;
+	if (gr & HPTE_R_C) {
+		rev->guest_rpte &= ~HPTE_R_C;
+		note_hpte_modification(kvm, rev);
+	}
+	if (v & HPTE_V_VALID) {
+		/* need to make it temporarily absent so C is stable */
+		hpte[0] |= cpu_to_be64(HPTE_V_ABSENT);
+		kvmppc_invalidate_hpte(kvm, hpte, pte_index);
+		r = be64_to_cpu(hpte[1]);
+		gr |= r & (HPTE_R_R | HPTE_R_C);
+		if (r & HPTE_R_C) {
+			hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
+			eieio();
+			kvmppc_set_dirty_from_hpte(kvm, v, gr);
+		}
+	}
+	vcpu->arch.regs.gpr[4] = gr;
+	ret = H_SUCCESS;
+ out:
+	unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_clear_mod);
+
+static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
+			  unsigned long gpa, int writing, unsigned long *hpa,
+			  struct kvm_memory_slot **memslot_p)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_memory_slot *memslot;
+	unsigned long gfn, hva, pa, psize = PAGE_SHIFT;
+	unsigned int shift;
+	pte_t *ptep, pte;
+
+	/* Find the memslot for this address */
+	gfn = gpa >> PAGE_SHIFT;
+	memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
+	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+		return H_PARAMETER;
+
+	/* Translate to host virtual address */
+	hva = __gfn_to_hva_memslot(memslot, gfn);
+
+	/* Try to find the host pte for that virtual address */
+	ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift);
+	if (!ptep)
+		return H_TOO_HARD;
+	pte = kvmppc_read_update_linux_pte(ptep, writing);
+	if (!pte_present(pte))
+		return H_TOO_HARD;
+
+	/* Convert to a physical address */
+	if (shift)
+		psize = 1UL << shift;
+	pa = pte_pfn(pte) << PAGE_SHIFT;
+	pa |= hva & (psize - 1);
+	pa |= gpa & ~PAGE_MASK;
+
+	if (hpa)
+		*hpa = pa;
+	if (memslot_p)
+		*memslot_p = memslot;
+
+	return H_SUCCESS;
+}
+
+static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu,
+				       unsigned long dest)
+{
+	struct kvm_memory_slot *memslot;
+	struct kvm *kvm = vcpu->kvm;
+	unsigned long pa, mmu_seq;
+	long ret = H_SUCCESS;
+	int i;
+
+	/* Used later to detect if we might have been invalidated */
+	mmu_seq = kvm->mmu_invalidate_seq;
+	smp_rmb();
+
+	arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
+
+	ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &pa, &memslot);
+	if (ret != H_SUCCESS)
+		goto out_unlock;
+
+	/* Zero the page */
+	for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES)
+		dcbz((void *)pa);
+	kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE);
+
+out_unlock:
+	arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
+	return ret;
+}
+
+static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu,
+				       unsigned long dest, unsigned long src)
+{
+	unsigned long dest_pa, src_pa, mmu_seq;
+	struct kvm_memory_slot *dest_memslot;
+	struct kvm *kvm = vcpu->kvm;
+	long ret = H_SUCCESS;
+
+	/* Used later to detect if we might have been invalidated */
+	mmu_seq = kvm->mmu_invalidate_seq;
+	smp_rmb();
+
+	arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
+	ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &dest_pa, &dest_memslot);
+	if (ret != H_SUCCESS)
+		goto out_unlock;
+
+	ret = kvmppc_get_hpa(vcpu, mmu_seq, src, 0, &src_pa, NULL);
+	if (ret != H_SUCCESS)
+		goto out_unlock;
+
+	/* Copy the page */
+	memcpy((void *)dest_pa, (void *)src_pa, SZ_4K);
+
+	kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE);
+
+out_unlock:
+	arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
+	return ret;
+}
+
+long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
+			   unsigned long dest, unsigned long src)
+{
+	struct kvm *kvm = vcpu->kvm;
+	u64 pg_mask = SZ_4K - 1;	/* 4K page size */
+	long ret = H_SUCCESS;
+
+	/* Don't handle radix mode here, go up to the virtual mode handler */
+	if (kvm_is_radix(kvm))
+		return H_TOO_HARD;
+
+	/* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */
+	if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE |
+		      H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED))
+		return H_PARAMETER;
+
+	/* dest (and src if copy_page flag set) must be page aligned */
+	if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask)))
+		return H_PARAMETER;
+
+	/* zero and/or copy the page as determined by the flags */
+	if (flags & H_COPY_PAGE)
+		ret = kvmppc_do_h_page_init_copy(vcpu, dest, src);
+	else if (flags & H_ZERO_PAGE)
+		ret = kvmppc_do_h_page_init_zero(vcpu, dest);
+
+	/* We can ignore the other flags */
+
+	return ret;
+}
+
+void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
+			unsigned long pte_index)
+{
+	unsigned long rb;
+	u64 hp0, hp1;
+
+	hptep[0] &= ~cpu_to_be64(HPTE_V_VALID);
+	hp0 = be64_to_cpu(hptep[0]);
+	hp1 = be64_to_cpu(hptep[1]);
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		hp0 = hpte_new_to_old_v(hp0, hp1);
+		hp1 = hpte_new_to_old_r(hp1);
+	}
+	rb = compute_tlbie_rb(hp0, hp1, pte_index);
+	do_tlbies(kvm, &rb, 1, 1, true);
+}
+EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
+
+void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
+			   unsigned long pte_index)
+{
+	unsigned long rb;
+	unsigned char rbyte;
+	u64 hp0, hp1;
+
+	hp0 = be64_to_cpu(hptep[0]);
+	hp1 = be64_to_cpu(hptep[1]);
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		hp0 = hpte_new_to_old_v(hp0, hp1);
+		hp1 = hpte_new_to_old_r(hp1);
+	}
+	rb = compute_tlbie_rb(hp0, hp1, pte_index);
+	rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8;
+	/* modify only the second-last byte, which contains the ref bit */
+	*((char *)hptep + 14) = rbyte;
+	do_tlbies(kvm, &rb, 1, 1, false);
+}
+EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
+
+static int slb_base_page_shift[4] = {
+	24,	/* 16M */
+	16,	/* 64k */
+	34,	/* 16G */
+	20,	/* 1M, unsupported */
+};
+
+static struct mmio_hpte_cache_entry *mmio_cache_search(struct kvm_vcpu *vcpu,
+		unsigned long eaddr, unsigned long slb_v, long mmio_update)
+{
+	struct mmio_hpte_cache_entry *entry = NULL;
+	unsigned int pshift;
+	unsigned int i;
+
+	for (i = 0; i < MMIO_HPTE_CACHE_SIZE; i++) {
+		entry = &vcpu->arch.mmio_cache.entry[i];
+		if (entry->mmio_update == mmio_update) {
+			pshift = entry->slb_base_pshift;
+			if ((entry->eaddr >> pshift) == (eaddr >> pshift) &&
+			    entry->slb_v == slb_v)
+				return entry;
+		}
+	}
+	return NULL;
+}
+
+static struct mmio_hpte_cache_entry *
+			next_mmio_cache_entry(struct kvm_vcpu *vcpu)
+{
+	unsigned int index = vcpu->arch.mmio_cache.index;
+
+	vcpu->arch.mmio_cache.index++;
+	if (vcpu->arch.mmio_cache.index == MMIO_HPTE_CACHE_SIZE)
+		vcpu->arch.mmio_cache.index = 0;
+
+	return &vcpu->arch.mmio_cache.entry[index];
+}
+
+/* When called from virtmode, this func should be protected by
+ * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK
+ * can trigger deadlock issue.
+ */
+long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
+			      unsigned long valid)
+{
+	unsigned int i;
+	unsigned int pshift;
+	unsigned long somask;
+	unsigned long vsid, hash;
+	unsigned long avpn;
+	__be64 *hpte;
+	unsigned long mask, val;
+	unsigned long v, r, orig_v;
+
+	/* Get page shift, work out hash and AVPN etc. */
+	mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
+	val = 0;
+	pshift = 12;
+	if (slb_v & SLB_VSID_L) {
+		mask |= HPTE_V_LARGE;
+		val |= HPTE_V_LARGE;
+		pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4];
+	}
+	if (slb_v & SLB_VSID_B_1T) {
+		somask = (1UL << 40) - 1;
+		vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T;
+		vsid ^= vsid << 25;
+	} else {
+		somask = (1UL << 28) - 1;
+		vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
+	}
+	hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvmppc_hpt_mask(&kvm->arch.hpt);
+	avpn = slb_v & ~(somask >> 16);	/* also includes B */
+	avpn |= (eaddr & somask) >> 16;
+
+	if (pshift >= 24)
+		avpn &= ~((1UL << (pshift - 16)) - 1);
+	else
+		avpn &= ~0x7fUL;
+	val |= avpn;
+
+	for (;;) {
+		hpte = (__be64 *)(kvm->arch.hpt.virt + (hash << 7));
+
+		for (i = 0; i < 16; i += 2) {
+			/* Read the PTE racily */
+			v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
+			if (cpu_has_feature(CPU_FTR_ARCH_300))
+				v = hpte_new_to_old_v(v, be64_to_cpu(hpte[i+1]));
+
+			/* Check valid/absent, hash, segment size and AVPN */
+			if (!(v & valid) || (v & mask) != val)
+				continue;
+
+			/* Lock the PTE and read it under the lock */
+			while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
+				cpu_relax();
+			v = orig_v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
+			r = be64_to_cpu(hpte[i+1]);
+			if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+				v = hpte_new_to_old_v(v, r);
+				r = hpte_new_to_old_r(r);
+			}
+
+			/*
+			 * Check the HPTE again, including base page size
+			 */
+			if ((v & valid) && (v & mask) == val &&
+			    kvmppc_hpte_base_page_shift(v, r) == pshift)
+				/* Return with the HPTE still locked */
+				return (hash << 3) + (i >> 1);
+
+			__unlock_hpte(&hpte[i], orig_v);
+		}
+
+		if (val & HPTE_V_SECONDARY)
+			break;
+		val |= HPTE_V_SECONDARY;
+		hash = hash ^ kvmppc_hpt_mask(&kvm->arch.hpt);
+	}
+	return -1;
+}
+EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte);
+
+/*
+ * Called in real mode to check whether an HPTE not found fault
+ * is due to accessing a paged-out page or an emulated MMIO page,
+ * or if a protection fault is due to accessing a page that the
+ * guest wanted read/write access to but which we made read-only.
+ * Returns a possibly modified status (DSISR) value if not
+ * (i.e. pass the interrupt to the guest),
+ * -1 to pass the fault up to host kernel mode code, -2 to do that
+ * and also load the instruction word (for MMIO emulation),
+ * or 0 if we should make the guest retry the access.
+ */
+long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
+			  unsigned long slb_v, unsigned int status, bool data)
+{
+	struct kvm *kvm = vcpu->kvm;
+	long int index;
+	unsigned long v, r, gr, orig_v;
+	__be64 *hpte;
+	unsigned long valid;
+	struct revmap_entry *rev;
+	unsigned long pp, key;
+	struct mmio_hpte_cache_entry *cache_entry = NULL;
+	long mmio_update = 0;
+
+	/* For protection fault, expect to find a valid HPTE */
+	valid = HPTE_V_VALID;
+	if (status & DSISR_NOHPTE) {
+		valid |= HPTE_V_ABSENT;
+		mmio_update = atomic64_read(&kvm->arch.mmio_update);
+		cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update);
+	}
+	if (cache_entry) {
+		index = cache_entry->pte_index;
+		v = cache_entry->hpte_v;
+		r = cache_entry->hpte_r;
+		gr = cache_entry->rpte;
+	} else {
+		index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
+		if (index < 0) {
+			if (status & DSISR_NOHPTE)
+				return status;	/* there really was no HPTE */
+			return 0;	/* for prot fault, HPTE disappeared */
+		}
+		hpte = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
+		v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
+		r = be64_to_cpu(hpte[1]);
+		if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+			v = hpte_new_to_old_v(v, r);
+			r = hpte_new_to_old_r(r);
+		}
+		rev = real_vmalloc_addr(&kvm->arch.hpt.rev[index]);
+		gr = rev->guest_rpte;
+
+		unlock_hpte(hpte, orig_v);
+	}
+
+	/* For not found, if the HPTE is valid by now, retry the instruction */
+	if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
+		return 0;
+
+	/* Check access permissions to the page */
+	pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
+	key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
+	status &= ~DSISR_NOHPTE;	/* DSISR_NOHPTE == SRR1_ISI_NOPT */
+	if (!data) {
+		if (gr & (HPTE_R_N | HPTE_R_G))
+			return status | SRR1_ISI_N_G_OR_CIP;
+		if (!hpte_read_permission(pp, slb_v & key))
+			return status | SRR1_ISI_PROT;
+	} else if (status & DSISR_ISSTORE) {
+		/* check write permission */
+		if (!hpte_write_permission(pp, slb_v & key))
+			return status | DSISR_PROTFAULT;
+	} else {
+		if (!hpte_read_permission(pp, slb_v & key))
+			return status | DSISR_PROTFAULT;
+	}
+
+	/* Check storage key, if applicable */
+	if (data && (vcpu->arch.shregs.msr & MSR_DR)) {
+		unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr);
+		if (status & DSISR_ISSTORE)
+			perm >>= 1;
+		if (perm & 1)
+			return status | DSISR_KEYFAULT;
+	}
+
+	/* Save HPTE info for virtual-mode handler */
+	vcpu->arch.pgfault_addr = addr;
+	vcpu->arch.pgfault_index = index;
+	vcpu->arch.pgfault_hpte[0] = v;
+	vcpu->arch.pgfault_hpte[1] = r;
+	vcpu->arch.pgfault_cache = cache_entry;
+
+	/* Check the storage key to see if it is possibly emulated MMIO */
+	if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
+	    (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) {
+		if (!cache_entry) {
+			unsigned int pshift = 12;
+			unsigned int pshift_index;
+
+			if (slb_v & SLB_VSID_L) {
+				pshift_index = ((slb_v & SLB_VSID_LP) >> 4);
+				pshift = slb_base_page_shift[pshift_index];
+			}
+			cache_entry = next_mmio_cache_entry(vcpu);
+			cache_entry->eaddr = addr;
+			cache_entry->slb_base_pshift = pshift;
+			cache_entry->pte_index = index;
+			cache_entry->hpte_v = v;
+			cache_entry->hpte_r = r;
+			cache_entry->rpte = gr;
+			cache_entry->slb_v = slb_v;
+			cache_entry->mmio_update = mmio_update;
+		}
+		if (data && (vcpu->arch.shregs.msr & MSR_IR))
+			return -2;	/* MMIO emulation - load instr word */
+	}
+
+	return -1;		/* send fault up to host kernel mode */
+}
+EXPORT_SYMBOL_GPL(kvmppc_hpte_hv_fault);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
new file mode 100644
index 0000000000..e165bfa842
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -0,0 +1,924 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/kernel_stat.h>
+#include <linux/pgtable.h>
+
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/synch.h>
+#include <asm/cputhreads.h>
+#include <asm/ppc-opcode.h>
+#include <asm/pnv-pci.h>
+#include <asm/opal.h>
+#include <asm/smp.h>
+
+#include "book3s_xics.h"
+
+#define DEBUG_PASSUP
+
+int h_ipi_redirect = 1;
+EXPORT_SYMBOL(h_ipi_redirect);
+int kvm_irq_bypass = 1;
+EXPORT_SYMBOL(kvm_irq_bypass);
+
+static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+			    u32 new_irq, bool check_resend);
+static int xics_opal_set_server(unsigned int hw_irq, int server_cpu);
+
+/* -- ICS routines -- */
+static void ics_rm_check_resend(struct kvmppc_xics *xics,
+				struct kvmppc_ics *ics, struct kvmppc_icp *icp)
+{
+	int i;
+
+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+		struct ics_irq_state *state = &ics->irq_state[i];
+		if (state->resend)
+			icp_rm_deliver_irq(xics, icp, state->number, true);
+	}
+
+}
+
+/* -- ICP routines -- */
+
+#ifdef CONFIG_SMP
+static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
+{
+	int hcpu;
+
+	hcpu = hcore << threads_shift;
+	kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
+	smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
+	kvmppc_set_host_ipi(hcpu);
+	smp_mb();
+	kvmhv_rm_send_ipi(hcpu);
+}
+#else
+static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { }
+#endif
+
+/*
+ * We start the search from our current CPU Id in the core map
+ * and go in a circle until we get back to our ID looking for a
+ * core that is running in host context and that hasn't already
+ * been targeted for another rm_host_ops.
+ *
+ * In the future, could consider using a fairer algorithm (one
+ * that distributes the IPIs better)
+ *
+ * Returns -1, if no CPU could be found in the host
+ * Else, returns a CPU Id which has been reserved for use
+ */
+static inline int grab_next_hostcore(int start,
+		struct kvmppc_host_rm_core *rm_core, int max, int action)
+{
+	bool success;
+	int core;
+	union kvmppc_rm_state old, new;
+
+	for (core = start + 1; core < max; core++)  {
+		old = new = READ_ONCE(rm_core[core].rm_state);
+
+		if (!old.in_host || old.rm_action)
+			continue;
+
+		/* Try to grab this host core if not taken already. */
+		new.rm_action = action;
+
+		success = cmpxchg64(&rm_core[core].rm_state.raw,
+						old.raw, new.raw) == old.raw;
+		if (success) {
+			/*
+			 * Make sure that the store to the rm_action is made
+			 * visible before we return to caller (and the
+			 * subsequent store to rm_data) to synchronize with
+			 * the IPI handler.
+			 */
+			smp_wmb();
+			return core;
+		}
+	}
+
+	return -1;
+}
+
+static inline int find_available_hostcore(int action)
+{
+	int core;
+	int my_core = smp_processor_id() >> threads_shift;
+	struct kvmppc_host_rm_core *rm_core = kvmppc_host_rm_ops_hv->rm_core;
+
+	core = grab_next_hostcore(my_core, rm_core, cpu_nr_cores(), action);
+	if (core == -1)
+		core = grab_next_hostcore(core, rm_core, my_core, action);
+
+	return core;
+}
+
+static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
+				struct kvm_vcpu *this_vcpu)
+{
+	struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
+	int cpu;
+	int hcore;
+
+	/* Mark the target VCPU as having an interrupt pending */
+	vcpu->stat.queue_intr++;
+	set_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
+
+	/* Kick self ? Just set MER and return */
+	if (vcpu == this_vcpu) {
+		mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER);
+		return;
+	}
+
+	/*
+	 * Check if the core is loaded,
+	 * if not, find an available host core to post to wake the VCPU,
+	 * if we can't find one, set up state to eventually return too hard.
+	 */
+	cpu = vcpu->arch.thread_cpu;
+	if (cpu < 0 || cpu >= nr_cpu_ids) {
+		hcore = -1;
+		if (kvmppc_host_rm_ops_hv && h_ipi_redirect)
+			hcore = find_available_hostcore(XICS_RM_KICK_VCPU);
+		if (hcore != -1) {
+			icp_send_hcore_msg(hcore, vcpu);
+		} else {
+			this_icp->rm_action |= XICS_RM_KICK_VCPU;
+			this_icp->rm_kick_target = vcpu;
+		}
+		return;
+	}
+
+	smp_mb();
+	kvmhv_rm_send_ipi(cpu);
+}
+
+static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
+{
+	/* Note: Only called on self ! */
+	clear_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
+	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
+}
+
+static inline bool icp_rm_try_update(struct kvmppc_icp *icp,
+				     union kvmppc_icp_state old,
+				     union kvmppc_icp_state new)
+{
+	struct kvm_vcpu *this_vcpu = local_paca->kvm_hstate.kvm_vcpu;
+	bool success;
+
+	/* Calculate new output value */
+	new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
+
+	/* Attempt atomic update */
+	success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
+	if (!success)
+		goto bail;
+
+	/*
+	 * Check for output state update
+	 *
+	 * Note that this is racy since another processor could be updating
+	 * the state already. This is why we never clear the interrupt output
+	 * here, we only ever set it. The clear only happens prior to doing
+	 * an update and only by the processor itself. Currently we do it
+	 * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
+	 *
+	 * We also do not try to figure out whether the EE state has changed,
+	 * we unconditionally set it if the new state calls for it. The reason
+	 * for that is that we opportunistically remove the pending interrupt
+	 * flag when raising CPPR, so we need to set it back here if an
+	 * interrupt is still pending.
+	 */
+	if (new.out_ee)
+		icp_rm_set_vcpu_irq(icp->vcpu, this_vcpu);
+
+	/* Expose the state change for debug purposes */
+	this_vcpu->arch.icp->rm_dbgstate = new;
+	this_vcpu->arch.icp->rm_dbgtgt = icp->vcpu;
+
+ bail:
+	return success;
+}
+
+static inline int check_too_hard(struct kvmppc_xics *xics,
+				 struct kvmppc_icp *icp)
+{
+	return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS;
+}
+
+static void icp_rm_check_resend(struct kvmppc_xics *xics,
+			     struct kvmppc_icp *icp)
+{
+	u32 icsid;
+
+	/* Order this load with the test for need_resend in the caller */
+	smp_rmb();
+	for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) {
+		struct kvmppc_ics *ics = xics->ics[icsid];
+
+		if (!test_and_clear_bit(icsid, icp->resend_map))
+			continue;
+		if (!ics)
+			continue;
+		ics_rm_check_resend(xics, ics, icp);
+	}
+}
+
+static bool icp_rm_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
+			       u32 *reject)
+{
+	union kvmppc_icp_state old_state, new_state;
+	bool success;
+
+	do {
+		old_state = new_state = READ_ONCE(icp->state);
+
+		*reject = 0;
+
+		/* See if we can deliver */
+		success = new_state.cppr > priority &&
+			new_state.mfrr > priority &&
+			new_state.pending_pri > priority;
+
+		/*
+		 * If we can, check for a rejection and perform the
+		 * delivery
+		 */
+		if (success) {
+			*reject = new_state.xisr;
+			new_state.xisr = irq;
+			new_state.pending_pri = priority;
+		} else {
+			/*
+			 * If we failed to deliver we set need_resend
+			 * so a subsequent CPPR state change causes us
+			 * to try a new delivery.
+			 */
+			new_state.need_resend = true;
+		}
+
+	} while (!icp_rm_try_update(icp, old_state, new_state));
+
+	return success;
+}
+
+static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+			    u32 new_irq, bool check_resend)
+{
+	struct ics_irq_state *state;
+	struct kvmppc_ics *ics;
+	u32 reject;
+	u16 src;
+
+	/*
+	 * This is used both for initial delivery of an interrupt and
+	 * for subsequent rejection.
+	 *
+	 * Rejection can be racy vs. resends. We have evaluated the
+	 * rejection in an atomic ICP transaction which is now complete,
+	 * so potentially the ICP can already accept the interrupt again.
+	 *
+	 * So we need to retry the delivery. Essentially the reject path
+	 * boils down to a failed delivery. Always.
+	 *
+	 * Now the interrupt could also have moved to a different target,
+	 * thus we may need to re-do the ICP lookup as well
+	 */
+
+ again:
+	/* Get the ICS state and lock it */
+	ics = kvmppc_xics_find_ics(xics, new_irq, &src);
+	if (!ics) {
+		/* Unsafe increment, but this does not need to be accurate */
+		xics->err_noics++;
+		return;
+	}
+	state = &ics->irq_state[src];
+
+	/* Get a lock on the ICS */
+	arch_spin_lock(&ics->lock);
+
+	/* Get our server */
+	if (!icp || state->server != icp->server_num) {
+		icp = kvmppc_xics_find_server(xics->kvm, state->server);
+		if (!icp) {
+			/* Unsafe increment again*/
+			xics->err_noicp++;
+			goto out;
+		}
+	}
+
+	if (check_resend)
+		if (!state->resend)
+			goto out;
+
+	/* Clear the resend bit of that interrupt */
+	state->resend = 0;
+
+	/*
+	 * If masked, bail out
+	 *
+	 * Note: PAPR doesn't mention anything about masked pending
+	 * when doing a resend, only when doing a delivery.
+	 *
+	 * However that would have the effect of losing a masked
+	 * interrupt that was rejected and isn't consistent with
+	 * the whole masked_pending business which is about not
+	 * losing interrupts that occur while masked.
+	 *
+	 * I don't differentiate normal deliveries and resends, this
+	 * implementation will differ from PAPR and not lose such
+	 * interrupts.
+	 */
+	if (state->priority == MASKED) {
+		state->masked_pending = 1;
+		goto out;
+	}
+
+	/*
+	 * Try the delivery, this will set the need_resend flag
+	 * in the ICP as part of the atomic transaction if the
+	 * delivery is not possible.
+	 *
+	 * Note that if successful, the new delivery might have itself
+	 * rejected an interrupt that was "delivered" before we took the
+	 * ics spin lock.
+	 *
+	 * In this case we do the whole sequence all over again for the
+	 * new guy. We cannot assume that the rejected interrupt is less
+	 * favored than the new one, and thus doesn't need to be delivered,
+	 * because by the time we exit icp_rm_try_to_deliver() the target
+	 * processor may well have already consumed & completed it, and thus
+	 * the rejected interrupt might actually be already acceptable.
+	 */
+	if (icp_rm_try_to_deliver(icp, new_irq, state->priority, &reject)) {
+		/*
+		 * Delivery was successful, did we reject somebody else ?
+		 */
+		if (reject && reject != XICS_IPI) {
+			arch_spin_unlock(&ics->lock);
+			icp->n_reject++;
+			new_irq = reject;
+			check_resend = 0;
+			goto again;
+		}
+	} else {
+		/*
+		 * We failed to deliver the interrupt we need to set the
+		 * resend map bit and mark the ICS state as needing a resend
+		 */
+		state->resend = 1;
+
+		/*
+		 * Make sure when checking resend, we don't miss the resend
+		 * if resend_map bit is seen and cleared.
+		 */
+		smp_wmb();
+		set_bit(ics->icsid, icp->resend_map);
+
+		/*
+		 * If the need_resend flag got cleared in the ICP some time
+		 * between icp_rm_try_to_deliver() atomic update and now, then
+		 * we know it might have missed the resend_map bit. So we
+		 * retry
+		 */
+		smp_mb();
+		if (!icp->state.need_resend) {
+			state->resend = 0;
+			arch_spin_unlock(&ics->lock);
+			check_resend = 0;
+			goto again;
+		}
+	}
+ out:
+	arch_spin_unlock(&ics->lock);
+}
+
+static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+			     u8 new_cppr)
+{
+	union kvmppc_icp_state old_state, new_state;
+	bool resend;
+
+	/*
+	 * This handles several related states in one operation:
+	 *
+	 * ICP State: Down_CPPR
+	 *
+	 * Load CPPR with new value and if the XISR is 0
+	 * then check for resends:
+	 *
+	 * ICP State: Resend
+	 *
+	 * If MFRR is more favored than CPPR, check for IPIs
+	 * and notify ICS of a potential resend. This is done
+	 * asynchronously (when used in real mode, we will have
+	 * to exit here).
+	 *
+	 * We do not handle the complete Check_IPI as documented
+	 * here. In the PAPR, this state will be used for both
+	 * Set_MFRR and Down_CPPR. However, we know that we aren't
+	 * changing the MFRR state here so we don't need to handle
+	 * the case of an MFRR causing a reject of a pending irq,
+	 * this will have been handled when the MFRR was set in the
+	 * first place.
+	 *
+	 * Thus we don't have to handle rejects, only resends.
+	 *
+	 * When implementing real mode for HV KVM, resend will lead to
+	 * a H_TOO_HARD return and the whole transaction will be handled
+	 * in virtual mode.
+	 */
+	do {
+		old_state = new_state = READ_ONCE(icp->state);
+
+		/* Down_CPPR */
+		new_state.cppr = new_cppr;
+
+		/*
+		 * Cut down Resend / Check_IPI / IPI
+		 *
+		 * The logic is that we cannot have a pending interrupt
+		 * trumped by an IPI at this point (see above), so we
+		 * know that either the pending interrupt is already an
+		 * IPI (in which case we don't care to override it) or
+		 * it's either more favored than us or non existent
+		 */
+		if (new_state.mfrr < new_cppr &&
+		    new_state.mfrr <= new_state.pending_pri) {
+			new_state.pending_pri = new_state.mfrr;
+			new_state.xisr = XICS_IPI;
+		}
+
+		/* Latch/clear resend bit */
+		resend = new_state.need_resend;
+		new_state.need_resend = 0;
+
+	} while (!icp_rm_try_update(icp, old_state, new_state));
+
+	/*
+	 * Now handle resend checks. Those are asynchronous to the ICP
+	 * state update in HW (ie bus transactions) so we can handle them
+	 * separately here as well.
+	 */
+	if (resend) {
+		icp->n_check_resend++;
+		icp_rm_check_resend(xics, icp);
+	}
+}
+
+unsigned long xics_rm_h_xirr_x(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.regs.gpr[5] = get_tb();
+	return xics_rm_h_xirr(vcpu);
+}
+
+unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu)
+{
+	union kvmppc_icp_state old_state, new_state;
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	u32 xirr;
+
+	if (!xics || !xics->real_mode)
+		return H_TOO_HARD;
+
+	/* First clear the interrupt */
+	icp_rm_clr_vcpu_irq(icp->vcpu);
+
+	/*
+	 * ICP State: Accept_Interrupt
+	 *
+	 * Return the pending interrupt (if any) along with the
+	 * current CPPR, then clear the XISR & set CPPR to the
+	 * pending priority
+	 */
+	do {
+		old_state = new_state = READ_ONCE(icp->state);
+
+		xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
+		if (!old_state.xisr)
+			break;
+		new_state.cppr = new_state.pending_pri;
+		new_state.pending_pri = 0xff;
+		new_state.xisr = 0;
+
+	} while (!icp_rm_try_update(icp, old_state, new_state));
+
+	/* Return the result in GPR4 */
+	vcpu->arch.regs.gpr[4] = xirr;
+
+	return check_too_hard(xics, icp);
+}
+
+int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+		  unsigned long mfrr)
+{
+	union kvmppc_icp_state old_state, new_state;
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp, *this_icp = vcpu->arch.icp;
+	u32 reject;
+	bool resend;
+	bool local;
+
+	if (!xics || !xics->real_mode)
+		return H_TOO_HARD;
+
+	local = this_icp->server_num == server;
+	if (local)
+		icp = this_icp;
+	else
+		icp = kvmppc_xics_find_server(vcpu->kvm, server);
+	if (!icp)
+		return H_PARAMETER;
+
+	/*
+	 * ICP state: Set_MFRR
+	 *
+	 * If the CPPR is more favored than the new MFRR, then
+	 * nothing needs to be done as there can be no XISR to
+	 * reject.
+	 *
+	 * ICP state: Check_IPI
+	 *
+	 * If the CPPR is less favored, then we might be replacing
+	 * an interrupt, and thus need to possibly reject it.
+	 *
+	 * ICP State: IPI
+	 *
+	 * Besides rejecting any pending interrupts, we also
+	 * update XISR and pending_pri to mark IPI as pending.
+	 *
+	 * PAPR does not describe this state, but if the MFRR is being
+	 * made less favored than its earlier value, there might be
+	 * a previously-rejected interrupt needing to be resent.
+	 * Ideally, we would want to resend only if
+	 *	prio(pending_interrupt) < mfrr &&
+	 *	prio(pending_interrupt) < cppr
+	 * where pending interrupt is the one that was rejected. But
+	 * we don't have that state, so we simply trigger a resend
+	 * whenever the MFRR is made less favored.
+	 */
+	do {
+		old_state = new_state = READ_ONCE(icp->state);
+
+		/* Set_MFRR */
+		new_state.mfrr = mfrr;
+
+		/* Check_IPI */
+		reject = 0;
+		resend = false;
+		if (mfrr < new_state.cppr) {
+			/* Reject a pending interrupt if not an IPI */
+			if (mfrr <= new_state.pending_pri) {
+				reject = new_state.xisr;
+				new_state.pending_pri = mfrr;
+				new_state.xisr = XICS_IPI;
+			}
+		}
+
+		if (mfrr > old_state.mfrr) {
+			resend = new_state.need_resend;
+			new_state.need_resend = 0;
+		}
+	} while (!icp_rm_try_update(icp, old_state, new_state));
+
+	/* Handle reject in real mode */
+	if (reject && reject != XICS_IPI) {
+		this_icp->n_reject++;
+		icp_rm_deliver_irq(xics, icp, reject, false);
+	}
+
+	/* Handle resends in real mode */
+	if (resend) {
+		this_icp->n_check_resend++;
+		icp_rm_check_resend(xics, icp);
+	}
+
+	return check_too_hard(xics, this_icp);
+}
+
+int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+	union kvmppc_icp_state old_state, new_state;
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	u32 reject;
+
+	if (!xics || !xics->real_mode)
+		return H_TOO_HARD;
+
+	/*
+	 * ICP State: Set_CPPR
+	 *
+	 * We can safely compare the new value with the current
+	 * value outside of the transaction as the CPPR is only
+	 * ever changed by the processor on itself
+	 */
+	if (cppr > icp->state.cppr) {
+		icp_rm_down_cppr(xics, icp, cppr);
+		goto bail;
+	} else if (cppr == icp->state.cppr)
+		return H_SUCCESS;
+
+	/*
+	 * ICP State: Up_CPPR
+	 *
+	 * The processor is raising its priority, this can result
+	 * in a rejection of a pending interrupt:
+	 *
+	 * ICP State: Reject_Current
+	 *
+	 * We can remove EE from the current processor, the update
+	 * transaction will set it again if needed
+	 */
+	icp_rm_clr_vcpu_irq(icp->vcpu);
+
+	do {
+		old_state = new_state = READ_ONCE(icp->state);
+
+		reject = 0;
+		new_state.cppr = cppr;
+
+		if (cppr <= new_state.pending_pri) {
+			reject = new_state.xisr;
+			new_state.xisr = 0;
+			new_state.pending_pri = 0xff;
+		}
+
+	} while (!icp_rm_try_update(icp, old_state, new_state));
+
+	/*
+	 * Check for rejects. They are handled by doing a new delivery
+	 * attempt (see comments in icp_rm_deliver_irq).
+	 */
+	if (reject && reject != XICS_IPI) {
+		icp->n_reject++;
+		icp_rm_deliver_irq(xics, icp, reject, false);
+	}
+ bail:
+	return check_too_hard(xics, icp);
+}
+
+static int ics_rm_eoi(struct kvm_vcpu *vcpu, u32 irq)
+{
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u16 src;
+	u32 pq_old, pq_new;
+
+	/*
+	 * ICS EOI handling: For LSI, if P bit is still set, we need to
+	 * resend it.
+	 *
+	 * For MSI, we move Q bit into P (and clear Q). If it is set,
+	 * resend it.
+	 */
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics)
+		goto bail;
+
+	state = &ics->irq_state[src];
+
+	if (state->lsi)
+		pq_new = state->pq_state;
+	else
+		do {
+			pq_old = state->pq_state;
+			pq_new = pq_old >> 1;
+		} while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
+
+	if (pq_new & PQ_PRESENTED)
+		icp_rm_deliver_irq(xics, NULL, irq, false);
+
+	if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) {
+		icp->rm_action |= XICS_RM_NOTIFY_EOI;
+		icp->rm_eoied_irq = irq;
+	}
+
+	/* Handle passthrough interrupts */
+	if (state->host_irq) {
+		++vcpu->stat.pthru_all;
+		if (state->intr_cpu != -1) {
+			int pcpu = raw_smp_processor_id();
+
+			pcpu = cpu_first_thread_sibling(pcpu);
+			++vcpu->stat.pthru_host;
+			if (state->intr_cpu != pcpu) {
+				++vcpu->stat.pthru_bad_aff;
+				xics_opal_set_server(state->host_irq, pcpu);
+			}
+			state->intr_cpu = -1;
+		}
+	}
+
+ bail:
+	return check_too_hard(xics, icp);
+}
+
+int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	u32 irq = xirr & 0x00ffffff;
+
+	if (!xics || !xics->real_mode)
+		return H_TOO_HARD;
+
+	/*
+	 * ICP State: EOI
+	 *
+	 * Note: If EOI is incorrectly used by SW to lower the CPPR
+	 * value (ie more favored), we do not check for rejection of
+	 * a pending interrupt, this is a SW error and PAPR specifies
+	 * that we don't have to deal with it.
+	 *
+	 * The sending of an EOI to the ICS is handled after the
+	 * CPPR update
+	 *
+	 * ICP State: Down_CPPR which we handle
+	 * in a separate function as it's shared with H_CPPR.
+	 */
+	icp_rm_down_cppr(xics, icp, xirr >> 24);
+
+	/* IPIs have no EOI */
+	if (irq == XICS_IPI)
+		return check_too_hard(xics, icp);
+
+	return ics_rm_eoi(vcpu, irq);
+}
+
+static unsigned long eoi_rc;
+
+static void icp_eoi(struct irq_data *d, u32 hwirq, __be32 xirr, bool *again)
+{
+	void __iomem *xics_phys;
+	int64_t rc;
+
+	rc = pnv_opal_pci_msi_eoi(d);
+
+	if (rc)
+		eoi_rc = rc;
+
+	iosync();
+
+	/* EOI it */
+	xics_phys = local_paca->kvm_hstate.xics_phys;
+	if (xics_phys) {
+		__raw_rm_writel(xirr, xics_phys + XICS_XIRR);
+	} else {
+		rc = opal_int_eoi(be32_to_cpu(xirr));
+		*again = rc > 0;
+	}
+}
+
+static int xics_opal_set_server(unsigned int hw_irq, int server_cpu)
+{
+	unsigned int mangle_cpu = get_hard_smp_processor_id(server_cpu) << 2;
+
+	return opal_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY);
+}
+
+/*
+ * Increment a per-CPU 32-bit unsigned integer variable.
+ * Safe to call in real-mode. Handles vmalloc'ed addresses
+ *
+ * ToDo: Make this work for any integral type
+ */
+
+static inline void this_cpu_inc_rm(unsigned int __percpu *addr)
+{
+	unsigned long l;
+	unsigned int *raddr;
+	int cpu = smp_processor_id();
+
+	raddr = per_cpu_ptr(addr, cpu);
+	l = (unsigned long)raddr;
+
+	if (get_region_id(l) == VMALLOC_REGION_ID) {
+		l = vmalloc_to_phys(raddr);
+		raddr = (unsigned int *)l;
+	}
+	++*raddr;
+}
+
+/*
+ * We don't try to update the flags in the irq_desc 'istate' field in
+ * here as would happen in the normal IRQ handling path for several reasons:
+ *  - state flags represent internal IRQ state and are not expected to be
+ *    updated outside the IRQ subsystem
+ *  - more importantly, these are useful for edge triggered interrupts,
+ *    IRQ probing, etc., but we are only handling MSI/MSIx interrupts here
+ *    and these states shouldn't apply to us.
+ *
+ * However, we do update irq_stats - we somewhat duplicate the code in
+ * kstat_incr_irqs_this_cpu() for this since this function is defined
+ * in irq/internal.h which we don't want to include here.
+ * The only difference is that desc->kstat_irqs is an allocated per CPU
+ * variable and could have been vmalloc'ed, so we can't directly
+ * call __this_cpu_inc() on it. The kstat structure is a static
+ * per CPU variable and it should be accessible by real-mode KVM.
+ *
+ */
+static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc)
+{
+	this_cpu_inc_rm(desc->kstat_irqs);
+	__this_cpu_inc(kstat.irqs_sum);
+}
+
+long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
+				 __be32 xirr,
+				 struct kvmppc_irq_map *irq_map,
+				 struct kvmppc_passthru_irqmap *pimap,
+				 bool *again)
+{
+	struct kvmppc_xics *xics;
+	struct kvmppc_icp *icp;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u32 irq;
+	u16 src;
+	u32 pq_old, pq_new;
+
+	irq = irq_map->v_hwirq;
+	xics = vcpu->kvm->arch.xics;
+	icp = vcpu->arch.icp;
+
+	kvmppc_rm_handle_irq_desc(irq_map->desc);
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics)
+		return 2;
+
+	state = &ics->irq_state[src];
+
+	/* only MSIs register bypass producers, so it must be MSI here */
+	do {
+		pq_old = state->pq_state;
+		pq_new = ((pq_old << 1) & 3) | PQ_PRESENTED;
+	} while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
+
+	/* Test P=1, Q=0, this is the only case where we present */
+	if (pq_new == PQ_PRESENTED)
+		icp_rm_deliver_irq(xics, icp, irq, false);
+
+	/* EOI the interrupt */
+	icp_eoi(irq_desc_get_irq_data(irq_map->desc), irq_map->r_hwirq, xirr, again);
+
+	if (check_too_hard(xics, icp) == H_TOO_HARD)
+		return 2;
+	else
+		return -2;
+}
+
+/*  --- Non-real mode XICS-related built-in routines ---  */
+
+/*
+ * Host Operations poked by RM KVM
+ */
+static void rm_host_ipi_action(int action, void *data)
+{
+	switch (action) {
+	case XICS_RM_KICK_VCPU:
+		kvmppc_host_rm_ops_hv->vcpu_kick(data);
+		break;
+	default:
+		WARN(1, "Unexpected rm_action=%d data=%p\n", action, data);
+		break;
+	}
+
+}
+
+void kvmppc_xics_ipi_action(void)
+{
+	int core;
+	unsigned int cpu = smp_processor_id();
+	struct kvmppc_host_rm_core *rm_corep;
+
+	core = cpu >> threads_shift;
+	rm_corep = &kvmppc_host_rm_ops_hv->rm_core[core];
+
+	if (rm_corep->rm_data) {
+		rm_host_ipi_action(rm_corep->rm_state.rm_action,
+							rm_corep->rm_data);
+		/* Order these stores against the real mode KVM */
+		rm_corep->rm_data = NULL;
+		smp_wmb();
+		rm_corep->rm_state.rm_action = 0;
+	}
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
new file mode 100644
index 0000000000..ea7ad200b3
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -0,0 +1,3026 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * Derived from book3s_rmhandlers.S and other files, which are:
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <linux/export.h>
+#include <linux/linkage.h>
+#include <linux/objtool.h>
+#include <asm/ppc_asm.h>
+#include <asm/code-patching-asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/hvcall.h>
+#include <asm/asm-offsets.h>
+#include <asm/exception-64s.h>
+#include <asm/kvm_book3s_asm.h>
+#include <asm/book3s/64/mmu-hash.h>
+#include <asm/tm.h>
+#include <asm/opal.h>
+#include <asm/thread_info.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+#include <asm/cpuidle.h>
+
+/* Values in HSTATE_NAPPING(r13) */
+#define NAPPING_CEDE	1
+#define NAPPING_NOVCPU	2
+#define NAPPING_UNSPLIT	3
+
+/* Stack frame offsets for kvmppc_hv_entry */
+#define SFS			160
+#define STACK_SLOT_TRAP		(SFS-4)
+#define STACK_SLOT_TID		(SFS-16)
+#define STACK_SLOT_PSSCR	(SFS-24)
+#define STACK_SLOT_PID		(SFS-32)
+#define STACK_SLOT_IAMR		(SFS-40)
+#define STACK_SLOT_CIABR	(SFS-48)
+#define STACK_SLOT_DAWR0	(SFS-56)
+#define STACK_SLOT_DAWRX0	(SFS-64)
+#define STACK_SLOT_HFSCR	(SFS-72)
+#define STACK_SLOT_AMR		(SFS-80)
+#define STACK_SLOT_UAMOR	(SFS-88)
+#define STACK_SLOT_FSCR		(SFS-96)
+
+/*
+ * Use the last LPID (all implemented LPID bits = 1) for partition switching.
+ * This is reserved in the LPID allocator. POWER7 only implements 0x3ff, but
+ * we write 0xfff into the LPID SPR anyway, which seems to work and just
+ * ignores the top bits.
+ */
+#define   LPID_RSVD		0xfff
+
+/*
+ * Call kvmppc_hv_entry in real mode.
+ * Must be called with interrupts hard-disabled.
+ *
+ * Input Registers:
+ *
+ * LR = return address to continue at after eventually re-enabling MMU
+ */
+_GLOBAL_TOC(kvmppc_hv_entry_trampoline)
+	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+	stdu	r1, -112(r1)
+	mfmsr	r10
+	std	r10, HSTATE_HOST_MSR(r13)
+	LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
+	li	r0,MSR_RI
+	andc	r0,r10,r0
+	li	r6,MSR_IR | MSR_DR
+	andc	r6,r10,r6
+	mtmsrd	r0,1		/* clear RI in MSR */
+	mtsrr0	r5
+	mtsrr1	r6
+	RFI_TO_KERNEL
+
+kvmppc_call_hv_entry:
+	ld	r4, HSTATE_KVM_VCPU(r13)
+	bl	kvmppc_hv_entry
+
+	/* Back from guest - restore host state and return to caller */
+
+BEGIN_FTR_SECTION
+	/* Restore host DABR and DABRX */
+	ld	r5,HSTATE_DABR(r13)
+	li	r6,7
+	mtspr	SPRN_DABR,r5
+	mtspr	SPRN_DABRX,r6
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+
+	/* Restore SPRG3 */
+	ld	r3,PACA_SPRG_VDSO(r13)
+	mtspr	SPRN_SPRG_VDSO_WRITE,r3
+
+	/* Reload the host's PMU registers */
+	bl	kvmhv_load_host_pmu
+
+	/*
+	 * Reload DEC.  HDEC interrupts were disabled when
+	 * we reloaded the host's LPCR value.
+	 */
+	ld	r3, HSTATE_DECEXP(r13)
+	mftb	r4
+	subf	r4, r4, r3
+	mtspr	SPRN_DEC, r4
+
+	/* hwthread_req may have got set by cede or no vcpu, so clear it */
+	li	r0, 0
+	stb	r0, HSTATE_HWTHREAD_REQ(r13)
+
+	/*
+	 * For external interrupts we need to call the Linux
+	 * handler to process the interrupt. We do that by jumping
+	 * to absolute address 0x500 for external interrupts.
+	 * The [h]rfid at the end of the handler will return to
+	 * the book3s_hv_interrupts.S code. For other interrupts
+	 * we do the rfid to get back to the book3s_hv_interrupts.S
+	 * code here.
+	 */
+	ld	r8, 112+PPC_LR_STKOFF(r1)
+	addi	r1, r1, 112
+	ld	r7, HSTATE_HOST_MSR(r13)
+
+	/* Return the trap number on this thread as the return value */
+	mr	r3, r12
+
+	/* RFI into the highmem handler */
+	mfmsr	r6
+	li	r0, MSR_RI
+	andc	r6, r6, r0
+	mtmsrd	r6, 1			/* Clear RI in MSR */
+	mtsrr0	r8
+	mtsrr1	r7
+	RFI_TO_KERNEL
+
+kvmppc_primary_no_guest:
+	/* We handle this much like a ceded vcpu */
+	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+	/* HDEC may be larger than DEC for arch >= v3.00, but since the */
+	/* HDEC value came from DEC in the first place, it will fit */
+	mfspr	r3, SPRN_HDEC
+	mtspr	SPRN_DEC, r3
+	/*
+	 * Make sure the primary has finished the MMU switch.
+	 * We should never get here on a secondary thread, but
+	 * check it for robustness' sake.
+	 */
+	ld	r5, HSTATE_KVM_VCORE(r13)
+65:	lbz	r0, VCORE_IN_GUEST(r5)
+	cmpwi	r0, 0
+	beq	65b
+	/* Set LPCR. */
+	ld	r8,VCORE_LPCR(r5)
+	mtspr	SPRN_LPCR,r8
+	isync
+	/* set our bit in napping_threads */
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	lbz	r7, HSTATE_PTID(r13)
+	li	r0, 1
+	sld	r0, r0, r7
+	addi	r6, r5, VCORE_NAPPING_THREADS
+1:	lwarx	r3, 0, r6
+	or	r3, r3, r0
+	stwcx.	r3, 0, r6
+	bne	1b
+	/* order napping_threads update vs testing entry_exit_map */
+	isync
+	li	r12, 0
+	lwz	r7, VCORE_ENTRY_EXIT(r5)
+	cmpwi	r7, 0x100
+	bge	kvm_novcpu_exit	/* another thread already exiting */
+	li	r3, NAPPING_NOVCPU
+	stb	r3, HSTATE_NAPPING(r13)
+
+	li	r3, 0		/* Don't wake on privileged (OS) doorbell */
+	b	kvm_do_nap
+
+/*
+ * kvm_novcpu_wakeup
+ *	Entered from kvm_start_guest if kvm_hstate.napping is set
+ *	to NAPPING_NOVCPU
+ *		r2 = kernel TOC
+ *		r13 = paca
+ */
+kvm_novcpu_wakeup:
+	ld	r1, HSTATE_HOST_R1(r13)
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	li	r0, 0
+	stb	r0, HSTATE_NAPPING(r13)
+
+	/* check the wake reason */
+	bl	kvmppc_check_wake_reason
+
+	/*
+	 * Restore volatile registers since we could have called
+	 * a C routine in kvmppc_check_wake_reason.
+	 *	r5 = VCORE
+	 */
+	ld	r5, HSTATE_KVM_VCORE(r13)
+
+	/* see if any other thread is already exiting */
+	lwz	r0, VCORE_ENTRY_EXIT(r5)
+	cmpwi	r0, 0x100
+	bge	kvm_novcpu_exit
+
+	/* clear our bit in napping_threads */
+	lbz	r7, HSTATE_PTID(r13)
+	li	r0, 1
+	sld	r0, r0, r7
+	addi	r6, r5, VCORE_NAPPING_THREADS
+4:	lwarx	r7, 0, r6
+	andc	r7, r7, r0
+	stwcx.	r7, 0, r6
+	bne	4b
+
+	/* See if the wake reason means we need to exit */
+	cmpdi	r3, 0
+	bge	kvm_novcpu_exit
+
+	/* See if our timeslice has expired (HDEC is negative) */
+	mfspr	r0, SPRN_HDEC
+	extsw	r0, r0
+	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
+	cmpdi	r0, 0
+	blt	kvm_novcpu_exit
+
+	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
+	ld	r4, HSTATE_KVM_VCPU(r13)
+	cmpdi	r4, 0
+	beq	kvmppc_primary_no_guest
+
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+	addi	r3, r4, VCPU_TB_RMENTRY
+	bl	kvmhv_start_timing
+#endif
+	b	kvmppc_got_guest
+
+kvm_novcpu_exit:
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+	ld	r4, HSTATE_KVM_VCPU(r13)
+	cmpdi	r4, 0
+	beq	13f
+	addi	r3, r4, VCPU_TB_RMEXIT
+	bl	kvmhv_accumulate_time
+#endif
+13:	mr	r3, r12
+	stw	r12, STACK_SLOT_TRAP(r1)
+	bl	kvmhv_commence_exit
+	nop
+	b	kvmhv_switch_to_host
+
+/*
+ * We come in here when wakened from Linux offline idle code.
+ * Relocation is off
+ * r3 contains the SRR1 wakeup value, SRR1 is trashed.
+ */
+_GLOBAL(idle_kvm_start_guest)
+	mfcr	r5
+	mflr	r0
+	std	r5, 8(r1)	// Save CR in caller's frame
+	std	r0, 16(r1)	// Save LR in caller's frame
+	// Create frame on emergency stack
+	ld	r4, PACAEMERGSP(r13)
+	stdu	r1, -SWITCH_FRAME_SIZE(r4)
+	// Switch to new frame on emergency stack
+	mr	r1, r4
+	std	r3, 32(r1)	// Save SRR1 wakeup value
+	SAVE_NVGPRS(r1)
+
+	/*
+	 * Could avoid this and pass it through in r3. For now,
+	 * code expects it to be in SRR1.
+	 */
+	mtspr	SPRN_SRR1,r3
+
+	li	r0,0
+	stb	r0,PACA_FTRACE_ENABLED(r13)
+
+	li	r0,KVM_HWTHREAD_IN_KVM
+	stb	r0,HSTATE_HWTHREAD_STATE(r13)
+
+	/* kvm cede / napping does not come through here */
+	lbz	r0,HSTATE_NAPPING(r13)
+	twnei	r0,0
+
+	b	1f
+
+kvm_unsplit_wakeup:
+	li	r0, 0
+	stb	r0, HSTATE_NAPPING(r13)
+
+1:
+
+	/*
+	 * We weren't napping due to cede, so this must be a secondary
+	 * thread being woken up to run a guest, or being woken up due
+	 * to a stray IPI.  (Or due to some machine check or hypervisor
+	 * maintenance interrupt while the core is in KVM.)
+	 */
+
+	/* Check the wake reason in SRR1 to see why we got here */
+	bl	kvmppc_check_wake_reason
+	/*
+	 * kvmppc_check_wake_reason could invoke a C routine, but we
+	 * have no volatile registers to restore when we return.
+	 */
+
+	cmpdi	r3, 0
+	bge	kvm_no_guest
+
+	/* get vcore pointer, NULL if we have nothing to run */
+	ld	r5,HSTATE_KVM_VCORE(r13)
+	cmpdi	r5,0
+	/* if we have no vcore to run, go back to sleep */
+	beq	kvm_no_guest
+
+kvm_secondary_got_guest:
+
+	// About to go to guest, clear saved SRR1
+	li	r0, 0
+	std	r0, 32(r1)
+
+	/* Set HSTATE_DSCR(r13) to something sensible */
+	ld	r6, PACA_DSCR_DEFAULT(r13)
+	std	r6, HSTATE_DSCR(r13)
+
+	/* On thread 0 of a subcore, set HDEC to max */
+	lbz	r4, HSTATE_PTID(r13)
+	cmpwi	r4, 0
+	bne	63f
+	lis	r6,0x7fff		/* MAX_INT@h */
+	mtspr	SPRN_HDEC, r6
+	/* and set per-LPAR registers, if doing dynamic micro-threading */
+	ld	r6, HSTATE_SPLIT_MODE(r13)
+	cmpdi	r6, 0
+	beq	63f
+	ld	r0, KVM_SPLIT_RPR(r6)
+	mtspr	SPRN_RPR, r0
+	ld	r0, KVM_SPLIT_PMMAR(r6)
+	mtspr	SPRN_PMMAR, r0
+	ld	r0, KVM_SPLIT_LDBAR(r6)
+	mtspr	SPRN_LDBAR, r0
+	isync
+63:
+	/* Order load of vcpu after load of vcore */
+	lwsync
+	ld	r4, HSTATE_KVM_VCPU(r13)
+	bl	kvmppc_hv_entry
+
+	/* Back from the guest, go back to nap */
+	/* Clear our vcpu and vcore pointers so we don't come back in early */
+	li	r0, 0
+	std	r0, HSTATE_KVM_VCPU(r13)
+	/*
+	 * Once we clear HSTATE_KVM_VCORE(r13), the code in
+	 * kvmppc_run_core() is going to assume that all our vcpu
+	 * state is visible in memory.  This lwsync makes sure
+	 * that that is true.
+	 */
+	lwsync
+	std	r0, HSTATE_KVM_VCORE(r13)
+
+	/*
+	 * All secondaries exiting guest will fall through this path.
+	 * Before proceeding, just check for HMI interrupt and
+	 * invoke opal hmi handler. By now we are sure that the
+	 * primary thread on this core/subcore has already made partition
+	 * switch/TB resync and we are good to call opal hmi handler.
+	 */
+	cmpwi	r12, BOOK3S_INTERRUPT_HMI
+	bne	kvm_no_guest
+
+	li	r3,0			/* NULL argument */
+	bl	CFUNC(hmi_exception_realmode)
+/*
+ * At this point we have finished executing in the guest.
+ * We need to wait for hwthread_req to become zero, since
+ * we may not turn on the MMU while hwthread_req is non-zero.
+ * While waiting we also need to check if we get given a vcpu to run.
+ */
+kvm_no_guest:
+	lbz	r3, HSTATE_HWTHREAD_REQ(r13)
+	cmpwi	r3, 0
+	bne	53f
+	HMT_MEDIUM
+	li	r0, KVM_HWTHREAD_IN_KERNEL
+	stb	r0, HSTATE_HWTHREAD_STATE(r13)
+	/* need to recheck hwthread_req after a barrier, to avoid race */
+	sync
+	lbz	r3, HSTATE_HWTHREAD_REQ(r13)
+	cmpwi	r3, 0
+	bne	54f
+
+	/*
+	 * Jump to idle_return_gpr_loss, which returns to the
+	 * idle_kvm_start_guest caller.
+	 */
+	li	r3, LPCR_PECE0
+	mfspr	r4, SPRN_LPCR
+	rlwimi	r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
+	mtspr	SPRN_LPCR, r4
+	// Return SRR1 wakeup value, or 0 if we went into the guest
+	ld	r3, 32(r1)
+	REST_NVGPRS(r1)
+	ld	r1, 0(r1)	// Switch back to caller stack
+	ld	r0, 16(r1)	// Reload LR
+	ld	r5, 8(r1)	// Reload CR
+	mtlr	r0
+	mtcr	r5
+	blr
+
+53:
+	HMT_LOW
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	cmpdi	r5, 0
+	bne	60f
+	ld	r3, HSTATE_SPLIT_MODE(r13)
+	cmpdi	r3, 0
+	beq	kvm_no_guest
+	lbz	r0, KVM_SPLIT_DO_NAP(r3)
+	cmpwi	r0, 0
+	beq	kvm_no_guest
+	HMT_MEDIUM
+	b	kvm_unsplit_nap
+60:	HMT_MEDIUM
+	b	kvm_secondary_got_guest
+
+54:	li	r0, KVM_HWTHREAD_IN_KVM
+	stb	r0, HSTATE_HWTHREAD_STATE(r13)
+	b	kvm_no_guest
+
+/*
+ * Here the primary thread is trying to return the core to
+ * whole-core mode, so we need to nap.
+ */
+kvm_unsplit_nap:
+	/*
+	 * When secondaries are napping in kvm_unsplit_nap() with
+	 * hwthread_req = 1, HMI goes ignored even though subcores are
+	 * already exited the guest. Hence HMI keeps waking up secondaries
+	 * from nap in a loop and secondaries always go back to nap since
+	 * no vcore is assigned to them. This makes impossible for primary
+	 * thread to get hold of secondary threads resulting into a soft
+	 * lockup in KVM path.
+	 *
+	 * Let us check if HMI is pending and handle it before we go to nap.
+	 */
+	cmpwi	r12, BOOK3S_INTERRUPT_HMI
+	bne	55f
+	li	r3, 0			/* NULL argument */
+	bl	CFUNC(hmi_exception_realmode)
+55:
+	/*
+	 * Ensure that secondary doesn't nap when it has
+	 * its vcore pointer set.
+	 */
+	sync		/* matches smp_mb() before setting split_info.do_nap */
+	ld	r0, HSTATE_KVM_VCORE(r13)
+	cmpdi	r0, 0
+	bne	kvm_no_guest
+	/* clear any pending message */
+BEGIN_FTR_SECTION
+	lis	r6, (PPC_DBELL_SERVER << (63-36))@h
+	PPC_MSGCLR(6)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	/* Set kvm_split_mode.napped[tid] = 1 */
+	ld	r3, HSTATE_SPLIT_MODE(r13)
+	li	r0, 1
+	lhz	r4, PACAPACAINDEX(r13)
+	clrldi	r4, r4, 61	/* micro-threading => P8 => 8 threads/core */
+	addi	r4, r4, KVM_SPLIT_NAPPED
+	stbx	r0, r3, r4
+	/* Check the do_nap flag again after setting napped[] */
+	sync
+	lbz	r0, KVM_SPLIT_DO_NAP(r3)
+	cmpwi	r0, 0
+	beq	57f
+	li	r3, NAPPING_UNSPLIT
+	stb	r3, HSTATE_NAPPING(r13)
+	li	r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
+	mfspr	r5, SPRN_LPCR
+	rlwimi	r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
+	b	kvm_nap_sequence
+
+57:	li	r0, 0
+	stbx	r0, r3, r4
+	b	kvm_no_guest
+
+/******************************************************************************
+ *                                                                            *
+ *                               Entry code                                   *
+ *                                                                            *
+ *****************************************************************************/
+
+SYM_CODE_START_LOCAL(kvmppc_hv_entry)
+
+	/* Required state:
+	 *
+	 * R4 = vcpu pointer (or NULL)
+	 * MSR = ~IR|DR
+	 * R13 = PACA
+	 * R1 = host R1
+	 * R2 = TOC
+	 * all other volatile GPRS = free
+	 * Does not preserve non-volatile GPRs or CR fields
+	 */
+	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+	stdu	r1, -SFS(r1)
+
+	/* Save R1 in the PACA */
+	std	r1, HSTATE_HOST_R1(r13)
+
+	li	r6, KVM_GUEST_MODE_HOST_HV
+	stb	r6, HSTATE_IN_GUEST(r13)
+
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+	/* Store initial timestamp */
+	cmpdi	r4, 0
+	beq	1f
+	addi	r3, r4, VCPU_TB_RMENTRY
+	bl	kvmhv_start_timing
+1:
+#endif
+
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	ld	r9, VCORE_KVM(r5)	/* pointer to struct kvm */
+
+	/*
+	 * POWER7/POWER8 host -> guest partition switch code.
+	 * We don't have to lock against concurrent tlbies,
+	 * but we do have to coordinate across hardware threads.
+	 */
+	/* Set bit in entry map iff exit map is zero. */
+	li	r7, 1
+	lbz	r6, HSTATE_PTID(r13)
+	sld	r7, r7, r6
+	addi	r8, r5, VCORE_ENTRY_EXIT
+21:	lwarx	r3, 0, r8
+	cmpwi	r3, 0x100		/* any threads starting to exit? */
+	bge	secondary_too_late	/* if so we're too late to the party */
+	or	r3, r3, r7
+	stwcx.	r3, 0, r8
+	bne	21b
+
+	/* Primary thread switches to guest partition. */
+	cmpwi	r6,0
+	bne	10f
+
+	lwz	r7,KVM_LPID(r9)
+	ld	r6,KVM_SDR1(r9)
+	li	r0,LPID_RSVD		/* switch to reserved LPID */
+	mtspr	SPRN_LPID,r0
+	ptesync
+	mtspr	SPRN_SDR1,r6		/* switch to partition page table */
+	mtspr	SPRN_LPID,r7
+	isync
+
+	/* See if we need to flush the TLB. */
+	mr	r3, r9			/* kvm pointer */
+	lhz	r4, PACAPACAINDEX(r13)	/* physical cpu number */
+	li	r5, 0			/* nested vcpu pointer */
+	bl	kvmppc_check_need_tlb_flush
+	nop
+	ld	r5, HSTATE_KVM_VCORE(r13)
+
+	/* Add timebase offset onto timebase */
+22:	ld	r8,VCORE_TB_OFFSET(r5)
+	cmpdi	r8,0
+	beq	37f
+	std	r8, VCORE_TB_OFFSET_APPL(r5)
+	mftb	r6		/* current host timebase */
+	add	r8,r8,r6
+	mtspr	SPRN_TBU40,r8	/* update upper 40 bits */
+	mftb	r7		/* check if lower 24 bits overflowed */
+	clrldi	r6,r6,40
+	clrldi	r7,r7,40
+	cmpld	r7,r6
+	bge	37f
+	addis	r8,r8,0x100	/* if so, increment upper 40 bits */
+	mtspr	SPRN_TBU40,r8
+
+	/* Load guest PCR value to select appropriate compat mode */
+37:	ld	r7, VCORE_PCR(r5)
+	LOAD_REG_IMMEDIATE(r6, PCR_MASK)
+	cmpld	r7, r6
+	beq	38f
+	or	r7, r7, r6
+	mtspr	SPRN_PCR, r7
+38:
+
+BEGIN_FTR_SECTION
+	/* DPDES and VTB are shared between threads */
+	ld	r8, VCORE_DPDES(r5)
+	ld	r7, VCORE_VTB(r5)
+	mtspr	SPRN_DPDES, r8
+	mtspr	SPRN_VTB, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+	/* Mark the subcore state as inside guest */
+	bl	kvmppc_subcore_enter_guest
+	nop
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	ld	r4, HSTATE_KVM_VCPU(r13)
+	li	r0,1
+	stb	r0,VCORE_IN_GUEST(r5)	/* signal secondaries to continue */
+
+	/* Do we have a guest vcpu to run? */
+10:	cmpdi	r4, 0
+	beq	kvmppc_primary_no_guest
+kvmppc_got_guest:
+	/* Increment yield count if they have a VPA */
+	ld	r3, VCPU_VPA(r4)
+	cmpdi	r3, 0
+	beq	25f
+	li	r6, LPPACA_YIELDCOUNT
+	LWZX_BE	r5, r3, r6
+	addi	r5, r5, 1
+	STWX_BE	r5, r3, r6
+	li	r6, 1
+	stb	r6, VCPU_VPA_DIRTY(r4)
+25:
+
+	/* Save purr/spurr */
+	mfspr	r5,SPRN_PURR
+	mfspr	r6,SPRN_SPURR
+	std	r5,HSTATE_PURR(r13)
+	std	r6,HSTATE_SPURR(r13)
+	ld	r7,VCPU_PURR(r4)
+	ld	r8,VCPU_SPURR(r4)
+	mtspr	SPRN_PURR,r7
+	mtspr	SPRN_SPURR,r8
+
+	/* Save host values of some registers */
+BEGIN_FTR_SECTION
+	mfspr	r5, SPRN_CIABR
+	mfspr	r6, SPRN_DAWR0
+	mfspr	r7, SPRN_DAWRX0
+	mfspr	r8, SPRN_IAMR
+	std	r5, STACK_SLOT_CIABR(r1)
+	std	r6, STACK_SLOT_DAWR0(r1)
+	std	r7, STACK_SLOT_DAWRX0(r1)
+	std	r8, STACK_SLOT_IAMR(r1)
+	mfspr	r5, SPRN_FSCR
+	std	r5, STACK_SLOT_FSCR(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+	mfspr	r5, SPRN_AMR
+	std	r5, STACK_SLOT_AMR(r1)
+	mfspr	r6, SPRN_UAMOR
+	std	r6, STACK_SLOT_UAMOR(r1)
+
+BEGIN_FTR_SECTION
+	/* Set partition DABR */
+	/* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
+	lwz	r5,VCPU_DABRX(r4)
+	ld	r6,VCPU_DABR(r4)
+	mtspr	SPRN_DABRX,r5
+	mtspr	SPRN_DABR,r6
+	isync
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+	b	91f
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
+	/*
+	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
+	 */
+	mr      r3, r4
+	ld      r4, VCPU_MSR(r3)
+	li	r5, 0			/* don't preserve non-vol regs */
+	bl	kvmppc_restore_tm_hv
+	nop
+	ld	r4, HSTATE_KVM_VCPU(r13)
+91:
+#endif
+
+	/* Load guest PMU registers; r4 = vcpu pointer here */
+	mr	r3, r4
+	bl	kvmhv_load_guest_pmu
+
+	/* Load up FP, VMX and VSX registers */
+	ld	r4, HSTATE_KVM_VCPU(r13)
+	bl	kvmppc_load_fp
+
+	ld	r14, VCPU_GPR(R14)(r4)
+	ld	r15, VCPU_GPR(R15)(r4)
+	ld	r16, VCPU_GPR(R16)(r4)
+	ld	r17, VCPU_GPR(R17)(r4)
+	ld	r18, VCPU_GPR(R18)(r4)
+	ld	r19, VCPU_GPR(R19)(r4)
+	ld	r20, VCPU_GPR(R20)(r4)
+	ld	r21, VCPU_GPR(R21)(r4)
+	ld	r22, VCPU_GPR(R22)(r4)
+	ld	r23, VCPU_GPR(R23)(r4)
+	ld	r24, VCPU_GPR(R24)(r4)
+	ld	r25, VCPU_GPR(R25)(r4)
+	ld	r26, VCPU_GPR(R26)(r4)
+	ld	r27, VCPU_GPR(R27)(r4)
+	ld	r28, VCPU_GPR(R28)(r4)
+	ld	r29, VCPU_GPR(R29)(r4)
+	ld	r30, VCPU_GPR(R30)(r4)
+	ld	r31, VCPU_GPR(R31)(r4)
+
+	/* Switch DSCR to guest value */
+	ld	r5, VCPU_DSCR(r4)
+	mtspr	SPRN_DSCR, r5
+
+BEGIN_FTR_SECTION
+	/* Skip next section on POWER7 */
+	b	8f
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+	/* Load up POWER8-specific registers */
+	ld	r5, VCPU_IAMR(r4)
+	lwz	r6, VCPU_PSPB(r4)
+	ld	r7, VCPU_FSCR(r4)
+	mtspr	SPRN_IAMR, r5
+	mtspr	SPRN_PSPB, r6
+	mtspr	SPRN_FSCR, r7
+	/*
+	 * Handle broken DAWR case by not writing it. This means we
+	 * can still store the DAWR register for migration.
+	 */
+	LOAD_REG_ADDR(r5, dawr_force_enable)
+	lbz	r5, 0(r5)
+	cmpdi	r5, 0
+	beq	1f
+	ld	r5, VCPU_DAWR0(r4)
+	ld	r6, VCPU_DAWRX0(r4)
+	mtspr	SPRN_DAWR0, r5
+	mtspr	SPRN_DAWRX0, r6
+1:
+	ld	r7, VCPU_CIABR(r4)
+	ld	r8, VCPU_TAR(r4)
+	mtspr	SPRN_CIABR, r7
+	mtspr	SPRN_TAR, r8
+	ld	r5, VCPU_IC(r4)
+	ld	r8, VCPU_EBBHR(r4)
+	mtspr	SPRN_IC, r5
+	mtspr	SPRN_EBBHR, r8
+	ld	r5, VCPU_EBBRR(r4)
+	ld	r6, VCPU_BESCR(r4)
+	lwz	r7, VCPU_GUEST_PID(r4)
+	ld	r8, VCPU_WORT(r4)
+	mtspr	SPRN_EBBRR, r5
+	mtspr	SPRN_BESCR, r6
+	mtspr	SPRN_PID, r7
+	mtspr	SPRN_WORT, r8
+	/* POWER8-only registers */
+	ld	r5, VCPU_TCSCR(r4)
+	ld	r6, VCPU_ACOP(r4)
+	ld	r7, VCPU_CSIGR(r4)
+	ld	r8, VCPU_TACR(r4)
+	mtspr	SPRN_TCSCR, r5
+	mtspr	SPRN_ACOP, r6
+	mtspr	SPRN_CSIGR, r7
+	mtspr	SPRN_TACR, r8
+	nop
+8:
+
+	ld	r5, VCPU_SPRG0(r4)
+	ld	r6, VCPU_SPRG1(r4)
+	ld	r7, VCPU_SPRG2(r4)
+	ld	r8, VCPU_SPRG3(r4)
+	mtspr	SPRN_SPRG0, r5
+	mtspr	SPRN_SPRG1, r6
+	mtspr	SPRN_SPRG2, r7
+	mtspr	SPRN_SPRG3, r8
+
+	/* Load up DAR and DSISR */
+	ld	r5, VCPU_DAR(r4)
+	lwz	r6, VCPU_DSISR(r4)
+	mtspr	SPRN_DAR, r5
+	mtspr	SPRN_DSISR, r6
+
+	/* Restore AMR and UAMOR, set AMOR to all 1s */
+	ld	r5,VCPU_AMR(r4)
+	ld	r6,VCPU_UAMOR(r4)
+	mtspr	SPRN_AMR,r5
+	mtspr	SPRN_UAMOR,r6
+
+	/* Restore state of CTRL run bit; the host currently has it set to 1 */
+	lwz	r5,VCPU_CTRL(r4)
+	andi.	r5,r5,1
+	bne	4f
+	li	r6,0
+	mtspr	SPRN_CTRLT,r6
+4:
+	/* Secondary threads wait for primary to have done partition switch */
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	lbz	r6, HSTATE_PTID(r13)
+	cmpwi	r6, 0
+	beq	21f
+	lbz	r0, VCORE_IN_GUEST(r5)
+	cmpwi	r0, 0
+	bne	21f
+	HMT_LOW
+20:	lwz	r3, VCORE_ENTRY_EXIT(r5)
+	cmpwi	r3, 0x100
+	bge	no_switch_exit
+	lbz	r0, VCORE_IN_GUEST(r5)
+	cmpwi	r0, 0
+	beq	20b
+	HMT_MEDIUM
+21:
+	/* Set LPCR. */
+	ld	r8,VCORE_LPCR(r5)
+	mtspr	SPRN_LPCR,r8
+	isync
+
+	/*
+	 * Set the decrementer to the guest decrementer.
+	 */
+	ld	r8,VCPU_DEC_EXPIRES(r4)
+	mftb	r7
+	subf	r3,r7,r8
+	mtspr	SPRN_DEC,r3
+
+	/* Check if HDEC expires soon */
+	mfspr	r3, SPRN_HDEC
+	extsw	r3, r3
+	cmpdi	r3, 512		/* 1 microsecond */
+	blt	hdec_soon
+
+	/* Clear out and reload the SLB */
+	li	r6, 0
+	slbmte	r6, r6
+	PPC_SLBIA(6)
+	ptesync
+
+	/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
+	lwz	r5,VCPU_SLB_MAX(r4)
+	cmpwi	r5,0
+	beq	9f
+	mtctr	r5
+	addi	r6,r4,VCPU_SLB
+1:	ld	r8,VCPU_SLB_E(r6)
+	ld	r9,VCPU_SLB_V(r6)
+	slbmte	r9,r8
+	addi	r6,r6,VCPU_SLB_SIZE
+	bdnz	1b
+9:
+
+deliver_guest_interrupt:	/* r4 = vcpu, r13 = paca */
+	/* Check if we can deliver an external or decrementer interrupt now */
+	ld	r0, VCPU_PENDING_EXC(r4)
+	cmpdi	r0, 0
+	beq	71f
+	mr	r3, r4
+	bl	CFUNC(kvmppc_guest_entry_inject_int)
+	ld	r4, HSTATE_KVM_VCPU(r13)
+71:
+	ld	r6, VCPU_SRR0(r4)
+	ld	r7, VCPU_SRR1(r4)
+	mtspr	SPRN_SRR0, r6
+	mtspr	SPRN_SRR1, r7
+
+	ld	r10, VCPU_PC(r4)
+	ld	r11, VCPU_MSR(r4)
+	/* r11 = vcpu->arch.msr & ~MSR_HV */
+	rldicl	r11, r11, 63 - MSR_HV_LG, 1
+	rotldi	r11, r11, 1 + MSR_HV_LG
+	ori	r11, r11, MSR_ME
+
+	ld	r6, VCPU_CTR(r4)
+	ld	r7, VCPU_XER(r4)
+	mtctr	r6
+	mtxer	r7
+
+/*
+ * Required state:
+ * R4 = vcpu
+ * R10: value for HSRR0
+ * R11: value for HSRR1
+ * R13 = PACA
+ */
+fast_guest_return:
+	li	r0,0
+	stb	r0,VCPU_CEDED(r4)	/* cancel cede */
+	mtspr	SPRN_HSRR0,r10
+	mtspr	SPRN_HSRR1,r11
+
+	/* Activate guest mode, so faults get handled by KVM */
+	li	r9, KVM_GUEST_MODE_GUEST_HV
+	stb	r9, HSTATE_IN_GUEST(r13)
+
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+	/* Accumulate timing */
+	addi	r3, r4, VCPU_TB_GUEST
+	bl	kvmhv_accumulate_time
+#endif
+
+	/* Enter guest */
+
+BEGIN_FTR_SECTION
+	ld	r5, VCPU_CFAR(r4)
+	mtspr	SPRN_CFAR, r5
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+	ld	r0, VCPU_PPR(r4)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+	ld	r5, VCPU_LR(r4)
+	mtlr	r5
+
+	ld	r1, VCPU_GPR(R1)(r4)
+	ld	r5, VCPU_GPR(R5)(r4)
+	ld	r8, VCPU_GPR(R8)(r4)
+	ld	r9, VCPU_GPR(R9)(r4)
+	ld	r10, VCPU_GPR(R10)(r4)
+	ld	r11, VCPU_GPR(R11)(r4)
+	ld	r12, VCPU_GPR(R12)(r4)
+	ld	r13, VCPU_GPR(R13)(r4)
+
+BEGIN_FTR_SECTION
+	mtspr	SPRN_PPR, r0
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+	ld	r6, VCPU_GPR(R6)(r4)
+	ld	r7, VCPU_GPR(R7)(r4)
+
+	ld	r0, VCPU_CR(r4)
+	mtcr	r0
+
+	ld	r0, VCPU_GPR(R0)(r4)
+	ld	r2, VCPU_GPR(R2)(r4)
+	ld	r3, VCPU_GPR(R3)(r4)
+	ld	r4, VCPU_GPR(R4)(r4)
+	HRFI_TO_GUEST
+	b	.
+SYM_CODE_END(kvmppc_hv_entry)
+
+secondary_too_late:
+	li	r12, 0
+	stw	r12, STACK_SLOT_TRAP(r1)
+	cmpdi	r4, 0
+	beq	11f
+	stw	r12, VCPU_TRAP(r4)
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+	addi	r3, r4, VCPU_TB_RMEXIT
+	bl	kvmhv_accumulate_time
+#endif
+11:	b	kvmhv_switch_to_host
+
+no_switch_exit:
+	HMT_MEDIUM
+	li	r12, 0
+	b	12f
+hdec_soon:
+	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
+12:	stw	r12, VCPU_TRAP(r4)
+	mr	r9, r4
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+	addi	r3, r4, VCPU_TB_RMEXIT
+	bl	kvmhv_accumulate_time
+#endif
+	b	guest_bypass
+
+/******************************************************************************
+ *                                                                            *
+ *                               Exit code                                    *
+ *                                                                            *
+ *****************************************************************************/
+
+/*
+ * We come here from the first-level interrupt handlers.
+ */
+	.globl	kvmppc_interrupt_hv
+kvmppc_interrupt_hv:
+	/*
+	 * Register contents:
+	 * R9		= HSTATE_IN_GUEST
+	 * R12		= (guest CR << 32) | interrupt vector
+	 * R13		= PACA
+	 * guest R12 saved in shadow VCPU SCRATCH0
+	 * guest R13 saved in SPRN_SCRATCH0
+	 * guest R9 saved in HSTATE_SCRATCH2
+	 */
+	/* We're now back in the host but in guest MMU context */
+	cmpwi	r9,KVM_GUEST_MODE_HOST_HV
+	beq	kvmppc_bad_host_intr
+	li	r9, KVM_GUEST_MODE_HOST_HV
+	stb	r9, HSTATE_IN_GUEST(r13)
+
+	ld	r9, HSTATE_KVM_VCPU(r13)
+
+	/* Save registers */
+
+	std	r0, VCPU_GPR(R0)(r9)
+	std	r1, VCPU_GPR(R1)(r9)
+	std	r2, VCPU_GPR(R2)(r9)
+	std	r3, VCPU_GPR(R3)(r9)
+	std	r4, VCPU_GPR(R4)(r9)
+	std	r5, VCPU_GPR(R5)(r9)
+	std	r6, VCPU_GPR(R6)(r9)
+	std	r7, VCPU_GPR(R7)(r9)
+	std	r8, VCPU_GPR(R8)(r9)
+	ld	r0, HSTATE_SCRATCH2(r13)
+	std	r0, VCPU_GPR(R9)(r9)
+	std	r10, VCPU_GPR(R10)(r9)
+	std	r11, VCPU_GPR(R11)(r9)
+	ld	r3, HSTATE_SCRATCH0(r13)
+	std	r3, VCPU_GPR(R12)(r9)
+	/* CR is in the high half of r12 */
+	srdi	r4, r12, 32
+	std	r4, VCPU_CR(r9)
+BEGIN_FTR_SECTION
+	ld	r3, HSTATE_CFAR(r13)
+	std	r3, VCPU_CFAR(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+	ld	r4, HSTATE_PPR(r13)
+	std	r4, VCPU_PPR(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+	/* Restore R1/R2 so we can handle faults */
+	ld	r1, HSTATE_HOST_R1(r13)
+	LOAD_PACA_TOC()
+
+	mfspr	r10, SPRN_SRR0
+	mfspr	r11, SPRN_SRR1
+	std	r10, VCPU_SRR0(r9)
+	std	r11, VCPU_SRR1(r9)
+	/* trap is in the low half of r12, clear CR from the high half */
+	clrldi	r12, r12, 32
+	andi.	r0, r12, 2		/* need to read HSRR0/1? */
+	beq	1f
+	mfspr	r10, SPRN_HSRR0
+	mfspr	r11, SPRN_HSRR1
+	clrrdi	r12, r12, 2
+1:	std	r10, VCPU_PC(r9)
+	std	r11, VCPU_MSR(r9)
+
+	GET_SCRATCH0(r3)
+	mflr	r4
+	std	r3, VCPU_GPR(R13)(r9)
+	std	r4, VCPU_LR(r9)
+
+	stw	r12,VCPU_TRAP(r9)
+
+	/*
+	 * Now that we have saved away SRR0/1 and HSRR0/1,
+	 * interrupts are recoverable in principle, so set MSR_RI.
+	 * This becomes important for relocation-on interrupts from
+	 * the guest, which we can get in radix mode on POWER9.
+	 */
+	li	r0, MSR_RI
+	mtmsrd	r0, 1
+
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+	addi	r3, r9, VCPU_TB_RMINTR
+	mr	r4, r9
+	bl	kvmhv_accumulate_time
+	ld	r5, VCPU_GPR(R5)(r9)
+	ld	r6, VCPU_GPR(R6)(r9)
+	ld	r7, VCPU_GPR(R7)(r9)
+	ld	r8, VCPU_GPR(R8)(r9)
+#endif
+
+	/* Save HEIR (HV emulation assist reg) in emul_inst
+	   if this is an HEI (HV emulation interrupt, e40) */
+	li	r3,KVM_INST_FETCH_FAILED
+	std	r3,VCPU_LAST_INST(r9)
+	cmpwi	r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
+	bne	11f
+	mfspr	r3,SPRN_HEIR
+11:	std	r3,VCPU_HEIR(r9)
+
+	/* these are volatile across C function calls */
+	mfctr	r3
+	mfxer	r4
+	std	r3, VCPU_CTR(r9)
+	std	r4, VCPU_XER(r9)
+
+	/* Save more register state  */
+	mfdar	r3
+	mfdsisr	r4
+	std	r3, VCPU_DAR(r9)
+	stw	r4, VCPU_DSISR(r9)
+
+	/* If this is a page table miss then see if it's theirs or ours */
+	cmpwi	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
+	beq	kvmppc_hdsi
+	std	r3, VCPU_FAULT_DAR(r9)
+	stw	r4, VCPU_FAULT_DSISR(r9)
+	cmpwi	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
+	beq	kvmppc_hisi
+
+	/* See if this is a leftover HDEC interrupt */
+	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
+	bne	2f
+	mfspr	r3,SPRN_HDEC
+	extsw	r3, r3
+	cmpdi	r3,0
+	mr	r4,r9
+	bge	fast_guest_return
+2:
+	/* See if this is an hcall we can handle in real mode */
+	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
+	beq	hcall_try_real_mode
+
+	/* Hypervisor doorbell - exit only if host IPI flag set */
+	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
+	bne	3f
+	lbz	r0, HSTATE_HOST_IPI(r13)
+	cmpwi	r0, 0
+	beq	maybe_reenter_guest
+	b	guest_exit_cont
+3:
+	/* If it's a hypervisor facility unavailable interrupt, save HFSCR */
+	cmpwi	r12, BOOK3S_INTERRUPT_H_FAC_UNAVAIL
+	bne	14f
+	mfspr	r3, SPRN_HFSCR
+	std	r3, VCPU_HFSCR(r9)
+	b	guest_exit_cont
+14:
+	/* External interrupt ? */
+	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
+	beq	kvmppc_guest_external
+	/* See if it is a machine check */
+	cmpwi	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+	beq	machine_check_realmode
+	/* Or a hypervisor maintenance interrupt */
+	cmpwi	r12, BOOK3S_INTERRUPT_HMI
+	beq	hmi_realmode
+
+guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
+
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+	addi	r3, r9, VCPU_TB_RMEXIT
+	mr	r4, r9
+	bl	kvmhv_accumulate_time
+#endif
+
+	/*
+	 * Possibly flush the link stack here, before we do a blr in
+	 * kvmhv_switch_to_host.
+	 */
+1:	nop
+	patch_site 1b patch__call_kvm_flush_link_stack
+
+	/* For hash guest, read the guest SLB and save it away */
+	li	r5, 0
+	lwz	r0,VCPU_SLB_NR(r9)	/* number of entries in SLB */
+	mtctr	r0
+	li	r6,0
+	addi	r7,r9,VCPU_SLB
+1:	slbmfee	r8,r6
+	andis.	r0,r8,SLB_ESID_V@h
+	beq	2f
+	add	r8,r8,r6		/* put index in */
+	slbmfev	r3,r6
+	std	r8,VCPU_SLB_E(r7)
+	std	r3,VCPU_SLB_V(r7)
+	addi	r7,r7,VCPU_SLB_SIZE
+	addi	r5,r5,1
+2:	addi	r6,r6,1
+	bdnz	1b
+	/* Finally clear out the SLB */
+	li	r0,0
+	slbmte	r0,r0
+	PPC_SLBIA(6)
+	ptesync
+	stw	r5,VCPU_SLB_MAX(r9)
+
+	/* load host SLB entries */
+	ld	r8,PACA_SLBSHADOWPTR(r13)
+
+	.rept	SLB_NUM_BOLTED
+	li	r3, SLBSHADOW_SAVEAREA
+	LDX_BE	r5, r8, r3
+	addi	r3, r3, 8
+	LDX_BE	r6, r8, r3
+	andis.	r7,r5,SLB_ESID_V@h
+	beq	1f
+	slbmte	r6,r5
+1:	addi	r8,r8,16
+	.endr
+
+guest_bypass:
+	stw	r12, STACK_SLOT_TRAP(r1)
+
+	/* Save DEC */
+	/* Do this before kvmhv_commence_exit so we know TB is guest TB */
+	ld	r3, HSTATE_KVM_VCORE(r13)
+	mfspr	r5,SPRN_DEC
+	mftb	r6
+	extsw	r5,r5
+16:	add	r5,r5,r6
+	std	r5,VCPU_DEC_EXPIRES(r9)
+
+	/* Increment exit count, poke other threads to exit */
+	mr 	r3, r12
+	bl	kvmhv_commence_exit
+	nop
+	ld	r9, HSTATE_KVM_VCPU(r13)
+
+	/* Stop others sending VCPU interrupts to this physical CPU */
+	li	r0, -1
+	stw	r0, VCPU_CPU(r9)
+	stw	r0, VCPU_THREAD_CPU(r9)
+
+	/* Save guest CTRL register, set runlatch to 1 if it was clear */
+	mfspr	r6,SPRN_CTRLF
+	stw	r6,VCPU_CTRL(r9)
+	andi.	r0,r6,1
+	bne	4f
+	li	r6,1
+	mtspr	SPRN_CTRLT,r6
+4:
+	/*
+	 * Save the guest PURR/SPURR
+	 */
+	mfspr	r5,SPRN_PURR
+	mfspr	r6,SPRN_SPURR
+	ld	r7,VCPU_PURR(r9)
+	ld	r8,VCPU_SPURR(r9)
+	std	r5,VCPU_PURR(r9)
+	std	r6,VCPU_SPURR(r9)
+	subf	r5,r7,r5
+	subf	r6,r8,r6
+
+	/*
+	 * Restore host PURR/SPURR and add guest times
+	 * so that the time in the guest gets accounted.
+	 */
+	ld	r3,HSTATE_PURR(r13)
+	ld	r4,HSTATE_SPURR(r13)
+	add	r3,r3,r5
+	add	r4,r4,r6
+	mtspr	SPRN_PURR,r3
+	mtspr	SPRN_SPURR,r4
+
+BEGIN_FTR_SECTION
+	b	8f
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+	/* Save POWER8-specific registers */
+	mfspr	r5, SPRN_IAMR
+	mfspr	r6, SPRN_PSPB
+	mfspr	r7, SPRN_FSCR
+	std	r5, VCPU_IAMR(r9)
+	stw	r6, VCPU_PSPB(r9)
+	std	r7, VCPU_FSCR(r9)
+	mfspr	r5, SPRN_IC
+	mfspr	r7, SPRN_TAR
+	std	r5, VCPU_IC(r9)
+	std	r7, VCPU_TAR(r9)
+	mfspr	r8, SPRN_EBBHR
+	std	r8, VCPU_EBBHR(r9)
+	mfspr	r5, SPRN_EBBRR
+	mfspr	r6, SPRN_BESCR
+	mfspr	r7, SPRN_PID
+	mfspr	r8, SPRN_WORT
+	std	r5, VCPU_EBBRR(r9)
+	std	r6, VCPU_BESCR(r9)
+	stw	r7, VCPU_GUEST_PID(r9)
+	std	r8, VCPU_WORT(r9)
+	mfspr	r5, SPRN_TCSCR
+	mfspr	r6, SPRN_ACOP
+	mfspr	r7, SPRN_CSIGR
+	mfspr	r8, SPRN_TACR
+	std	r5, VCPU_TCSCR(r9)
+	std	r6, VCPU_ACOP(r9)
+	std	r7, VCPU_CSIGR(r9)
+	std	r8, VCPU_TACR(r9)
+BEGIN_FTR_SECTION
+	ld	r5, STACK_SLOT_FSCR(r1)
+	mtspr	SPRN_FSCR, r5
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	/*
+	 * Restore various registers to 0, where non-zero values
+	 * set by the guest could disrupt the host.
+	 */
+	li	r0, 0
+	mtspr	SPRN_PSPB, r0
+	mtspr	SPRN_WORT, r0
+	mtspr	SPRN_TCSCR, r0
+	/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
+	li	r0, 1
+	sldi	r0, r0, 31
+	mtspr	SPRN_MMCRS, r0
+
+	/* Save and restore AMR, IAMR and UAMOR before turning on the MMU */
+	ld	r8, STACK_SLOT_IAMR(r1)
+	mtspr	SPRN_IAMR, r8
+
+8:	/* Power7 jumps back in here */
+	mfspr	r5,SPRN_AMR
+	mfspr	r6,SPRN_UAMOR
+	std	r5,VCPU_AMR(r9)
+	std	r6,VCPU_UAMOR(r9)
+	ld	r5,STACK_SLOT_AMR(r1)
+	ld	r6,STACK_SLOT_UAMOR(r1)
+	mtspr	SPRN_AMR, r5
+	mtspr	SPRN_UAMOR, r6
+
+	/* Switch DSCR back to host value */
+	mfspr	r8, SPRN_DSCR
+	ld	r7, HSTATE_DSCR(r13)
+	std	r8, VCPU_DSCR(r9)
+	mtspr	SPRN_DSCR, r7
+
+	/* Save non-volatile GPRs */
+	std	r14, VCPU_GPR(R14)(r9)
+	std	r15, VCPU_GPR(R15)(r9)
+	std	r16, VCPU_GPR(R16)(r9)
+	std	r17, VCPU_GPR(R17)(r9)
+	std	r18, VCPU_GPR(R18)(r9)
+	std	r19, VCPU_GPR(R19)(r9)
+	std	r20, VCPU_GPR(R20)(r9)
+	std	r21, VCPU_GPR(R21)(r9)
+	std	r22, VCPU_GPR(R22)(r9)
+	std	r23, VCPU_GPR(R23)(r9)
+	std	r24, VCPU_GPR(R24)(r9)
+	std	r25, VCPU_GPR(R25)(r9)
+	std	r26, VCPU_GPR(R26)(r9)
+	std	r27, VCPU_GPR(R27)(r9)
+	std	r28, VCPU_GPR(R28)(r9)
+	std	r29, VCPU_GPR(R29)(r9)
+	std	r30, VCPU_GPR(R30)(r9)
+	std	r31, VCPU_GPR(R31)(r9)
+
+	/* Save SPRGs */
+	mfspr	r3, SPRN_SPRG0
+	mfspr	r4, SPRN_SPRG1
+	mfspr	r5, SPRN_SPRG2
+	mfspr	r6, SPRN_SPRG3
+	std	r3, VCPU_SPRG0(r9)
+	std	r4, VCPU_SPRG1(r9)
+	std	r5, VCPU_SPRG2(r9)
+	std	r6, VCPU_SPRG3(r9)
+
+	/* save FP state */
+	mr	r3, r9
+	bl	kvmppc_save_fp
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+	b	91f
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
+	/*
+	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
+	 */
+	mr      r3, r9
+	ld      r4, VCPU_MSR(r3)
+	li	r5, 0			/* don't preserve non-vol regs */
+	bl	kvmppc_save_tm_hv
+	nop
+	ld	r9, HSTATE_KVM_VCPU(r13)
+91:
+#endif
+
+	/* Increment yield count if they have a VPA */
+	ld	r8, VCPU_VPA(r9)	/* do they have a VPA? */
+	cmpdi	r8, 0
+	beq	25f
+	li	r4, LPPACA_YIELDCOUNT
+	LWZX_BE	r3, r8, r4
+	addi	r3, r3, 1
+	STWX_BE	r3, r8, r4
+	li	r3, 1
+	stb	r3, VCPU_VPA_DIRTY(r9)
+25:
+	/* Save PMU registers if requested */
+	/* r8 and cr0.eq are live here */
+	mr	r3, r9
+	li	r4, 1
+	beq	21f			/* if no VPA, save PMU stuff anyway */
+	lbz	r4, LPPACA_PMCINUSE(r8)
+21:	bl	kvmhv_save_guest_pmu
+	ld	r9, HSTATE_KVM_VCPU(r13)
+
+	/* Restore host values of some registers */
+BEGIN_FTR_SECTION
+	ld	r5, STACK_SLOT_CIABR(r1)
+	ld	r6, STACK_SLOT_DAWR0(r1)
+	ld	r7, STACK_SLOT_DAWRX0(r1)
+	mtspr	SPRN_CIABR, r5
+	/*
+	 * If the DAWR doesn't work, it's ok to write these here as
+	 * this value should always be zero
+	*/
+	mtspr	SPRN_DAWR0, r6
+	mtspr	SPRN_DAWRX0, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+	/*
+	 * POWER7/POWER8 guest -> host partition switch code.
+	 * We don't have to lock against tlbies but we do
+	 * have to coordinate the hardware threads.
+	 * Here STACK_SLOT_TRAP(r1) contains the trap number.
+	 */
+kvmhv_switch_to_host:
+	/* Secondary threads wait for primary to do partition switch */
+	ld	r5,HSTATE_KVM_VCORE(r13)
+	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
+	lbz	r3,HSTATE_PTID(r13)
+	cmpwi	r3,0
+	beq	15f
+	HMT_LOW
+13:	lbz	r3,VCORE_IN_GUEST(r5)
+	cmpwi	r3,0
+	bne	13b
+	HMT_MEDIUM
+	b	16f
+
+	/* Primary thread waits for all the secondaries to exit guest */
+15:	lwz	r3,VCORE_ENTRY_EXIT(r5)
+	rlwinm	r0,r3,32-8,0xff
+	clrldi	r3,r3,56
+	cmpw	r3,r0
+	bne	15b
+	isync
+
+	/* Did we actually switch to the guest at all? */
+	lbz	r6, VCORE_IN_GUEST(r5)
+	cmpwi	r6, 0
+	beq	19f
+
+	/* Primary thread switches back to host partition */
+	lwz	r7,KVM_HOST_LPID(r4)
+	ld	r6,KVM_HOST_SDR1(r4)
+	li	r8,LPID_RSVD		/* switch to reserved LPID */
+	mtspr	SPRN_LPID,r8
+	ptesync
+	mtspr	SPRN_SDR1,r6		/* switch to host page table */
+	mtspr	SPRN_LPID,r7
+	isync
+
+BEGIN_FTR_SECTION
+	/* DPDES and VTB are shared between threads */
+	mfspr	r7, SPRN_DPDES
+	mfspr	r8, SPRN_VTB
+	std	r7, VCORE_DPDES(r5)
+	std	r8, VCORE_VTB(r5)
+	/* clear DPDES so we don't get guest doorbells in the host */
+	li	r8, 0
+	mtspr	SPRN_DPDES, r8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+	/* Subtract timebase offset from timebase */
+	ld	r8, VCORE_TB_OFFSET_APPL(r5)
+	cmpdi	r8,0
+	beq	17f
+	li	r0, 0
+	std	r0, VCORE_TB_OFFSET_APPL(r5)
+	mftb	r6			/* current guest timebase */
+	subf	r8,r8,r6
+	mtspr	SPRN_TBU40,r8		/* update upper 40 bits */
+	mftb	r7			/* check if lower 24 bits overflowed */
+	clrldi	r6,r6,40
+	clrldi	r7,r7,40
+	cmpld	r7,r6
+	bge	17f
+	addis	r8,r8,0x100		/* if so, increment upper 40 bits */
+	mtspr	SPRN_TBU40,r8
+
+17:
+	/*
+	 * If this is an HMI, we called kvmppc_realmode_hmi_handler
+	 * above, which may or may not have already called
+	 * kvmppc_subcore_exit_guest.  Fortunately, all that
+	 * kvmppc_subcore_exit_guest does is clear a flag, so calling
+	 * it again here is benign even if kvmppc_realmode_hmi_handler
+	 * has already called it.
+	 */
+	bl	kvmppc_subcore_exit_guest
+	nop
+30:	ld	r5,HSTATE_KVM_VCORE(r13)
+	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
+
+	/* Reset PCR */
+	ld	r0, VCORE_PCR(r5)
+	LOAD_REG_IMMEDIATE(r6, PCR_MASK)
+	cmpld	r0, r6
+	beq	18f
+	mtspr	SPRN_PCR, r6
+18:
+	/* Signal secondary CPUs to continue */
+	li	r0, 0
+	stb	r0,VCORE_IN_GUEST(r5)
+19:	lis	r8,0x7fff		/* MAX_INT@h */
+	mtspr	SPRN_HDEC,r8
+
+16:	ld	r8,KVM_HOST_LPCR(r4)
+	mtspr	SPRN_LPCR,r8
+	isync
+
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+	/* Finish timing, if we have a vcpu */
+	ld	r4, HSTATE_KVM_VCPU(r13)
+	cmpdi	r4, 0
+	li	r3, 0
+	beq	2f
+	bl	kvmhv_accumulate_time
+2:
+#endif
+	/* Unset guest mode */
+	li	r0, KVM_GUEST_MODE_NONE
+	stb	r0, HSTATE_IN_GUEST(r13)
+
+	lwz	r12, STACK_SLOT_TRAP(r1)	/* return trap # in r12 */
+	ld	r0, SFS+PPC_LR_STKOFF(r1)
+	addi	r1, r1, SFS
+	mtlr	r0
+	blr
+
+.balign 32
+.global kvm_flush_link_stack
+kvm_flush_link_stack:
+	/* Save LR into r0 */
+	mflr	r0
+
+	/* Flush the link stack. On Power8 it's up to 32 entries in size. */
+	.rept 32
+	ANNOTATE_INTRA_FUNCTION_CALL
+	bl	.+4
+	.endr
+
+	/* And on Power9 it's up to 64. */
+BEGIN_FTR_SECTION
+	.rept 32
+	ANNOTATE_INTRA_FUNCTION_CALL
+	bl	.+4
+	.endr
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
+	/* Restore LR */
+	mtlr	r0
+	blr
+
+kvmppc_guest_external:
+	/* External interrupt, first check for host_ipi. If this is
+	 * set, we know the host wants us out so let's do it now
+	 */
+	bl	CFUNC(kvmppc_read_intr)
+
+	/*
+	 * Restore the active volatile registers after returning from
+	 * a C function.
+	 */
+	ld	r9, HSTATE_KVM_VCPU(r13)
+	li	r12, BOOK3S_INTERRUPT_EXTERNAL
+
+	/*
+	 * kvmppc_read_intr return codes:
+	 *
+	 * Exit to host (r3 > 0)
+	 *   1 An interrupt is pending that needs to be handled by the host
+	 *     Exit guest and return to host by branching to guest_exit_cont
+	 *
+	 *   2 Passthrough that needs completion in the host
+	 *     Exit guest and return to host by branching to guest_exit_cont
+	 *     However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
+	 *     to indicate to the host to complete handling the interrupt
+	 *
+	 * Before returning to guest, we check if any CPU is heading out
+	 * to the host and if so, we head out also. If no CPUs are heading
+	 * check return values <= 0.
+	 *
+	 * Return to guest (r3 <= 0)
+	 *  0 No external interrupt is pending
+	 * -1 A guest wakeup IPI (which has now been cleared)
+	 *    In either case, we return to guest to deliver any pending
+	 *    guest interrupts.
+	 *
+	 * -2 A PCI passthrough external interrupt was handled
+	 *    (interrupt was delivered directly to guest)
+	 *    Return to guest to deliver any pending guest interrupts.
+	 */
+
+	cmpdi	r3, 1
+	ble	1f
+
+	/* Return code = 2 */
+	li	r12, BOOK3S_INTERRUPT_HV_RM_HARD
+	stw	r12, VCPU_TRAP(r9)
+	b	guest_exit_cont
+
+1:	/* Return code <= 1 */
+	cmpdi	r3, 0
+	bgt	guest_exit_cont
+
+	/* Return code <= 0 */
+maybe_reenter_guest:
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	lwz	r0, VCORE_ENTRY_EXIT(r5)
+	cmpwi	r0, 0x100
+	mr	r4, r9
+	blt	deliver_guest_interrupt
+	b	guest_exit_cont
+
+/*
+ * Check whether an HDSI is an HPTE not found fault or something else.
+ * If it is an HPTE not found fault that is due to the guest accessing
+ * a page that they have mapped but which we have paged out, then
+ * we continue on with the guest exit path.  In all other cases,
+ * reflect the HDSI to the guest as a DSI.
+ */
+kvmppc_hdsi:
+	mfspr	r4, SPRN_HDAR
+	mfspr	r6, SPRN_HDSISR
+	/* HPTE not found fault or protection fault? */
+	andis.	r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
+	beq	1f			/* if not, send it to the guest */
+	andi.	r0, r11, MSR_DR		/* data relocation enabled? */
+	beq	3f
+	clrrdi	r0, r4, 28
+	PPC_SLBFEE_DOT(R5, R0)		/* if so, look up SLB */
+	li	r0, BOOK3S_INTERRUPT_DATA_SEGMENT
+	bne	7f			/* if no SLB entry found */
+4:	std	r4, VCPU_FAULT_DAR(r9)
+	stw	r6, VCPU_FAULT_DSISR(r9)
+
+	/* Search the hash table. */
+	mr	r3, r9			/* vcpu pointer */
+	li	r7, 1			/* data fault */
+	bl	CFUNC(kvmppc_hpte_hv_fault)
+	ld	r9, HSTATE_KVM_VCPU(r13)
+	ld	r10, VCPU_PC(r9)
+	ld	r11, VCPU_MSR(r9)
+	li	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
+	cmpdi	r3, 0			/* retry the instruction */
+	beq	6f
+	cmpdi	r3, -1			/* handle in kernel mode */
+	beq	guest_exit_cont
+	cmpdi	r3, -2			/* MMIO emulation; need instr word */
+	beq	2f
+
+	/* Synthesize a DSI (or DSegI) for the guest */
+	ld	r4, VCPU_FAULT_DAR(r9)
+	mr	r6, r3
+1:	li	r0, BOOK3S_INTERRUPT_DATA_STORAGE
+	mtspr	SPRN_DSISR, r6
+7:	mtspr	SPRN_DAR, r4
+	mtspr	SPRN_SRR0, r10
+	mtspr	SPRN_SRR1, r11
+	mr	r10, r0
+	bl	kvmppc_msr_interrupt
+fast_interrupt_c_return:
+6:	ld	r7, VCPU_CTR(r9)
+	ld	r8, VCPU_XER(r9)
+	mtctr	r7
+	mtxer	r8
+	mr	r4, r9
+	b	fast_guest_return
+
+3:	ld	r5, VCPU_KVM(r9)	/* not relocated, use VRMA */
+	ld	r5, KVM_VRMA_SLB_V(r5)
+	b	4b
+
+	/* If this is for emulated MMIO, load the instruction word */
+2:	li	r8, KVM_INST_FETCH_FAILED	/* In case lwz faults */
+
+	/* Set guest mode to 'jump over instruction' so if lwz faults
+	 * we'll just continue at the next IP. */
+	li	r0, KVM_GUEST_MODE_SKIP
+	stb	r0, HSTATE_IN_GUEST(r13)
+
+	/* Do the access with MSR:DR enabled */
+	mfmsr	r3
+	ori	r4, r3, MSR_DR		/* Enable paging for data */
+	mtmsrd	r4
+	lwz	r8, 0(r10)
+	mtmsrd	r3
+
+	/* Store the result */
+	std	r8, VCPU_LAST_INST(r9)
+
+	/* Unset guest mode. */
+	li	r0, KVM_GUEST_MODE_HOST_HV
+	stb	r0, HSTATE_IN_GUEST(r13)
+	b	guest_exit_cont
+
+/*
+ * Similarly for an HISI, reflect it to the guest as an ISI unless
+ * it is an HPTE not found fault for a page that we have paged out.
+ */
+kvmppc_hisi:
+	andis.	r0, r11, SRR1_ISI_NOPT@h
+	beq	1f
+	andi.	r0, r11, MSR_IR		/* instruction relocation enabled? */
+	beq	3f
+	clrrdi	r0, r10, 28
+	PPC_SLBFEE_DOT(R5, R0)		/* if so, look up SLB */
+	li	r0, BOOK3S_INTERRUPT_INST_SEGMENT
+	bne	7f			/* if no SLB entry found */
+4:
+	/* Search the hash table. */
+	mr	r3, r9			/* vcpu pointer */
+	mr	r4, r10
+	mr	r6, r11
+	li	r7, 0			/* instruction fault */
+	bl	CFUNC(kvmppc_hpte_hv_fault)
+	ld	r9, HSTATE_KVM_VCPU(r13)
+	ld	r10, VCPU_PC(r9)
+	ld	r11, VCPU_MSR(r9)
+	li	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
+	cmpdi	r3, 0			/* retry the instruction */
+	beq	fast_interrupt_c_return
+	cmpdi	r3, -1			/* handle in kernel mode */
+	beq	guest_exit_cont
+
+	/* Synthesize an ISI (or ISegI) for the guest */
+	mr	r11, r3
+1:	li	r0, BOOK3S_INTERRUPT_INST_STORAGE
+7:	mtspr	SPRN_SRR0, r10
+	mtspr	SPRN_SRR1, r11
+	mr	r10, r0
+	bl	kvmppc_msr_interrupt
+	b	fast_interrupt_c_return
+
+3:	ld	r6, VCPU_KVM(r9)	/* not relocated, use VRMA */
+	ld	r5, KVM_VRMA_SLB_V(r6)
+	b	4b
+
+/*
+ * Try to handle an hcall in real mode.
+ * Returns to the guest if we handle it, or continues on up to
+ * the kernel if we can't (i.e. if we don't have a handler for
+ * it, or if the handler returns H_TOO_HARD).
+ *
+ * r5 - r8 contain hcall args,
+ * r9 = vcpu, r10 = pc, r11 = msr, r12 = trap, r13 = paca
+ */
+hcall_try_real_mode:
+	ld	r3,VCPU_GPR(R3)(r9)
+	andi.	r0,r11,MSR_PR
+	/* sc 1 from userspace - reflect to guest syscall */
+	bne	sc_1_fast_return
+	clrrdi	r3,r3,2
+	cmpldi	r3,hcall_real_table_end - hcall_real_table
+	bge	guest_exit_cont
+	/* See if this hcall is enabled for in-kernel handling */
+	ld	r4, VCPU_KVM(r9)
+	srdi	r0, r3, 8	/* r0 = (r3 / 4) >> 6 */
+	sldi	r0, r0, 3	/* index into kvm->arch.enabled_hcalls[] */
+	add	r4, r4, r0
+	ld	r0, KVM_ENABLED_HCALLS(r4)
+	rlwinm	r4, r3, 32-2, 0x3f	/* r4 = (r3 / 4) & 0x3f */
+	srd	r0, r0, r4
+	andi.	r0, r0, 1
+	beq	guest_exit_cont
+	/* Get pointer to handler, if any, and call it */
+	LOAD_REG_ADDR(r4, hcall_real_table)
+	lwax	r3,r3,r4
+	cmpwi	r3,0
+	beq	guest_exit_cont
+	add	r12,r3,r4
+	mtctr	r12
+	mr	r3,r9		/* get vcpu pointer */
+	ld	r4,VCPU_GPR(R4)(r9)
+	bctrl
+	cmpdi	r3,H_TOO_HARD
+	beq	hcall_real_fallback
+	ld	r4,HSTATE_KVM_VCPU(r13)
+	std	r3,VCPU_GPR(R3)(r4)
+	ld	r10,VCPU_PC(r4)
+	ld	r11,VCPU_MSR(r4)
+	b	fast_guest_return
+
+sc_1_fast_return:
+	mtspr	SPRN_SRR0,r10
+	mtspr	SPRN_SRR1,r11
+	li	r10, BOOK3S_INTERRUPT_SYSCALL
+	bl	kvmppc_msr_interrupt
+	mr	r4,r9
+	b	fast_guest_return
+
+	/* We've attempted a real mode hcall, but it's punted it back
+	 * to userspace.  We need to restore some clobbered volatiles
+	 * before resuming the pass-it-to-qemu path */
+hcall_real_fallback:
+	li	r12,BOOK3S_INTERRUPT_SYSCALL
+	ld	r9, HSTATE_KVM_VCPU(r13)
+
+	b	guest_exit_cont
+
+	.globl	hcall_real_table
+hcall_real_table:
+	.long	0		/* 0 - unused */
+	.long	DOTSYM(kvmppc_h_remove) - hcall_real_table
+	.long	DOTSYM(kvmppc_h_enter) - hcall_real_table
+	.long	DOTSYM(kvmppc_h_read) - hcall_real_table
+	.long	DOTSYM(kvmppc_h_clear_mod) - hcall_real_table
+	.long	DOTSYM(kvmppc_h_clear_ref) - hcall_real_table
+	.long	DOTSYM(kvmppc_h_protect) - hcall_real_table
+	.long	0		/* 0x1c */
+	.long	0		/* 0x20 */
+	.long	0		/* 0x24 - H_SET_SPRG0 */
+	.long	DOTSYM(kvmppc_h_set_dabr) - hcall_real_table
+	.long	DOTSYM(kvmppc_rm_h_page_init) - hcall_real_table
+	.long	0		/* 0x30 */
+	.long	0		/* 0x34 */
+	.long	0		/* 0x38 */
+	.long	0		/* 0x3c */
+	.long	0		/* 0x40 */
+	.long	0		/* 0x44 */
+	.long	0		/* 0x48 */
+	.long	0		/* 0x4c */
+	.long	0		/* 0x50 */
+	.long	0		/* 0x54 */
+	.long	0		/* 0x58 */
+	.long	0		/* 0x5c */
+	.long	0		/* 0x60 */
+#ifdef CONFIG_KVM_XICS
+	.long	DOTSYM(xics_rm_h_eoi) - hcall_real_table
+	.long	DOTSYM(xics_rm_h_cppr) - hcall_real_table
+	.long	DOTSYM(xics_rm_h_ipi) - hcall_real_table
+	.long	0		/* 0x70 - H_IPOLL */
+	.long	DOTSYM(xics_rm_h_xirr) - hcall_real_table
+#else
+	.long	0		/* 0x64 - H_EOI */
+	.long	0		/* 0x68 - H_CPPR */
+	.long	0		/* 0x6c - H_IPI */
+	.long	0		/* 0x70 - H_IPOLL */
+	.long	0		/* 0x74 - H_XIRR */
+#endif
+	.long	0		/* 0x78 */
+	.long	0		/* 0x7c */
+	.long	0		/* 0x80 */
+	.long	0		/* 0x84 */
+	.long	0		/* 0x88 */
+	.long	0		/* 0x8c */
+	.long	0		/* 0x90 */
+	.long	0		/* 0x94 */
+	.long	0		/* 0x98 */
+	.long	0		/* 0x9c */
+	.long	0		/* 0xa0 */
+	.long	0		/* 0xa4 */
+	.long	0		/* 0xa8 */
+	.long	0		/* 0xac */
+	.long	0		/* 0xb0 */
+	.long	0		/* 0xb4 */
+	.long	0		/* 0xb8 */
+	.long	0		/* 0xbc */
+	.long	0		/* 0xc0 */
+	.long	0		/* 0xc4 */
+	.long	0		/* 0xc8 */
+	.long	0		/* 0xcc */
+	.long	0		/* 0xd0 */
+	.long	0		/* 0xd4 */
+	.long	0		/* 0xd8 */
+	.long	0		/* 0xdc */
+	.long	DOTSYM(kvmppc_h_cede) - hcall_real_table
+	.long	DOTSYM(kvmppc_rm_h_confer) - hcall_real_table
+	.long	0		/* 0xe8 */
+	.long	0		/* 0xec */
+	.long	0		/* 0xf0 */
+	.long	0		/* 0xf4 */
+	.long	0		/* 0xf8 */
+	.long	0		/* 0xfc */
+	.long	0		/* 0x100 */
+	.long	0		/* 0x104 */
+	.long	0		/* 0x108 */
+	.long	0		/* 0x10c */
+	.long	0		/* 0x110 */
+	.long	0		/* 0x114 */
+	.long	0		/* 0x118 */
+	.long	0		/* 0x11c */
+	.long	0		/* 0x120 */
+	.long	DOTSYM(kvmppc_h_bulk_remove) - hcall_real_table
+	.long	0		/* 0x128 */
+	.long	0		/* 0x12c */
+	.long	0		/* 0x130 */
+	.long	DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
+	.long	0		/* 0x138 */
+	.long	0		/* 0x13c */
+	.long	0		/* 0x140 */
+	.long	0		/* 0x144 */
+	.long	0		/* 0x148 */
+	.long	0		/* 0x14c */
+	.long	0		/* 0x150 */
+	.long	0		/* 0x154 */
+	.long	0		/* 0x158 */
+	.long	0		/* 0x15c */
+	.long	0		/* 0x160 */
+	.long	0		/* 0x164 */
+	.long	0		/* 0x168 */
+	.long	0		/* 0x16c */
+	.long	0		/* 0x170 */
+	.long	0		/* 0x174 */
+	.long	0		/* 0x178 */
+	.long	0		/* 0x17c */
+	.long	0		/* 0x180 */
+	.long	0		/* 0x184 */
+	.long	0		/* 0x188 */
+	.long	0		/* 0x18c */
+	.long	0		/* 0x190 */
+	.long	0		/* 0x194 */
+	.long	0		/* 0x198 */
+	.long	0		/* 0x19c */
+	.long	0		/* 0x1a0 */
+	.long	0		/* 0x1a4 */
+	.long	0		/* 0x1a8 */
+	.long	0		/* 0x1ac */
+	.long	0		/* 0x1b0 */
+	.long	0		/* 0x1b4 */
+	.long	0		/* 0x1b8 */
+	.long	0		/* 0x1bc */
+	.long	0		/* 0x1c0 */
+	.long	0		/* 0x1c4 */
+	.long	0		/* 0x1c8 */
+	.long	0		/* 0x1cc */
+	.long	0		/* 0x1d0 */
+	.long	0		/* 0x1d4 */
+	.long	0		/* 0x1d8 */
+	.long	0		/* 0x1dc */
+	.long	0		/* 0x1e0 */
+	.long	0		/* 0x1e4 */
+	.long	0		/* 0x1e8 */
+	.long	0		/* 0x1ec */
+	.long	0		/* 0x1f0 */
+	.long	0		/* 0x1f4 */
+	.long	0		/* 0x1f8 */
+	.long	0		/* 0x1fc */
+	.long	0		/* 0x200 */
+	.long	0		/* 0x204 */
+	.long	0		/* 0x208 */
+	.long	0		/* 0x20c */
+	.long	0		/* 0x210 */
+	.long	0		/* 0x214 */
+	.long	0		/* 0x218 */
+	.long	0		/* 0x21c */
+	.long	0		/* 0x220 */
+	.long	0		/* 0x224 */
+	.long	0		/* 0x228 */
+	.long	0		/* 0x22c */
+	.long	0		/* 0x230 */
+	.long	0		/* 0x234 */
+	.long	0		/* 0x238 */
+	.long	0		/* 0x23c */
+	.long	0		/* 0x240 */
+	.long	0		/* 0x244 */
+	.long	0		/* 0x248 */
+	.long	0		/* 0x24c */
+	.long	0		/* 0x250 */
+	.long	0		/* 0x254 */
+	.long	0		/* 0x258 */
+	.long	0		/* 0x25c */
+	.long	0		/* 0x260 */
+	.long	0		/* 0x264 */
+	.long	0		/* 0x268 */
+	.long	0		/* 0x26c */
+	.long	0		/* 0x270 */
+	.long	0		/* 0x274 */
+	.long	0		/* 0x278 */
+	.long	0		/* 0x27c */
+	.long	0		/* 0x280 */
+	.long	0		/* 0x284 */
+	.long	0		/* 0x288 */
+	.long	0		/* 0x28c */
+	.long	0		/* 0x290 */
+	.long	0		/* 0x294 */
+	.long	0		/* 0x298 */
+	.long	0		/* 0x29c */
+	.long	0		/* 0x2a0 */
+	.long	0		/* 0x2a4 */
+	.long	0		/* 0x2a8 */
+	.long	0		/* 0x2ac */
+	.long	0		/* 0x2b0 */
+	.long	0		/* 0x2b4 */
+	.long	0		/* 0x2b8 */
+	.long	0		/* 0x2bc */
+	.long	0		/* 0x2c0 */
+	.long	0		/* 0x2c4 */
+	.long	0		/* 0x2c8 */
+	.long	0		/* 0x2cc */
+	.long	0		/* 0x2d0 */
+	.long	0		/* 0x2d4 */
+	.long	0		/* 0x2d8 */
+	.long	0		/* 0x2dc */
+	.long	0		/* 0x2e0 */
+	.long	0		/* 0x2e4 */
+	.long	0		/* 0x2e8 */
+	.long	0		/* 0x2ec */
+	.long	0		/* 0x2f0 */
+	.long	0		/* 0x2f4 */
+	.long	0		/* 0x2f8 */
+#ifdef CONFIG_KVM_XICS
+	.long	DOTSYM(xics_rm_h_xirr_x) - hcall_real_table
+#else
+	.long	0		/* 0x2fc - H_XIRR_X*/
+#endif
+	.long	DOTSYM(kvmppc_rm_h_random) - hcall_real_table
+	.globl	hcall_real_table_end
+hcall_real_table_end:
+
+_GLOBAL_TOC(kvmppc_h_set_xdabr)
+EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr)
+	andi.	r0, r5, DABRX_USER | DABRX_KERNEL
+	beq	6f
+	li	r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI
+	andc.	r0, r5, r0
+	beq	3f
+6:	li	r3, H_PARAMETER
+	blr
+
+_GLOBAL_TOC(kvmppc_h_set_dabr)
+EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr)
+	li	r5, DABRX_USER | DABRX_KERNEL
+3:
+BEGIN_FTR_SECTION
+	b	2f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	std	r4,VCPU_DABR(r3)
+	stw	r5, VCPU_DABRX(r3)
+	mtspr	SPRN_DABRX, r5
+	/* Work around P7 bug where DABR can get corrupted on mtspr */
+1:	mtspr	SPRN_DABR,r4
+	mfspr	r5, SPRN_DABR
+	cmpd	r4, r5
+	bne	1b
+	isync
+	li	r3,0
+	blr
+
+2:
+	LOAD_REG_ADDR(r11, dawr_force_enable)
+	lbz	r11, 0(r11)
+	cmpdi	r11, 0
+	bne	3f
+	li	r3, H_HARDWARE
+	blr
+3:
+	/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
+	rlwimi	r5, r4, 5, DAWRX_DR | DAWRX_DW
+	rlwimi	r5, r4, 2, DAWRX_WT
+	clrrdi	r4, r4, 3
+	std	r4, VCPU_DAWR0(r3)
+	std	r5, VCPU_DAWRX0(r3)
+	/*
+	 * If came in through the real mode hcall handler then it is necessary
+	 * to write the registers since the return path won't. Otherwise it is
+	 * sufficient to store then in the vcpu struct as they will be loaded
+	 * next time the vcpu is run.
+	 */
+	mfmsr	r6
+	andi.	r6, r6, MSR_DR		/* in real mode? */
+	bne	4f
+	mtspr	SPRN_DAWR0, r4
+	mtspr	SPRN_DAWRX0, r5
+4:	li	r3, 0
+	blr
+
+_GLOBAL(kvmppc_h_cede)		/* r3 = vcpu pointer, r11 = msr, r13 = paca */
+	ori	r11,r11,MSR_EE
+	std	r11,VCPU_MSR(r3)
+	li	r0,1
+	stb	r0,VCPU_CEDED(r3)
+	sync			/* order setting ceded vs. testing prodded */
+	lbz	r5,VCPU_PRODDED(r3)
+	cmpwi	r5,0
+	bne	kvm_cede_prodded
+	li	r12,0		/* set trap to 0 to say hcall is handled */
+	stw	r12,VCPU_TRAP(r3)
+	li	r0,H_SUCCESS
+	std	r0,VCPU_GPR(R3)(r3)
+
+	/*
+	 * Set our bit in the bitmask of napping threads unless all the
+	 * other threads are already napping, in which case we send this
+	 * up to the host.
+	 */
+	ld	r5,HSTATE_KVM_VCORE(r13)
+	lbz	r6,HSTATE_PTID(r13)
+	lwz	r8,VCORE_ENTRY_EXIT(r5)
+	clrldi	r8,r8,56
+	li	r0,1
+	sld	r0,r0,r6
+	addi	r6,r5,VCORE_NAPPING_THREADS
+31:	lwarx	r4,0,r6
+	or	r4,r4,r0
+	cmpw	r4,r8
+	beq	kvm_cede_exit
+	stwcx.	r4,0,r6
+	bne	31b
+	/* order napping_threads update vs testing entry_exit_map */
+	isync
+	li	r0,NAPPING_CEDE
+	stb	r0,HSTATE_NAPPING(r13)
+	lwz	r7,VCORE_ENTRY_EXIT(r5)
+	cmpwi	r7,0x100
+	bge	33f		/* another thread already exiting */
+
+/*
+ * Although not specifically required by the architecture, POWER7
+ * preserves the following registers in nap mode, even if an SMT mode
+ * switch occurs: SLB entries, PURR, SPURR, AMOR, UAMOR, AMR, SPRG0-3,
+ * DAR, DSISR, DABR, DABRX, DSCR, PMCx, MMCRx, SIAR, SDAR.
+ */
+	/* Save non-volatile GPRs */
+	std	r14, VCPU_GPR(R14)(r3)
+	std	r15, VCPU_GPR(R15)(r3)
+	std	r16, VCPU_GPR(R16)(r3)
+	std	r17, VCPU_GPR(R17)(r3)
+	std	r18, VCPU_GPR(R18)(r3)
+	std	r19, VCPU_GPR(R19)(r3)
+	std	r20, VCPU_GPR(R20)(r3)
+	std	r21, VCPU_GPR(R21)(r3)
+	std	r22, VCPU_GPR(R22)(r3)
+	std	r23, VCPU_GPR(R23)(r3)
+	std	r24, VCPU_GPR(R24)(r3)
+	std	r25, VCPU_GPR(R25)(r3)
+	std	r26, VCPU_GPR(R26)(r3)
+	std	r27, VCPU_GPR(R27)(r3)
+	std	r28, VCPU_GPR(R28)(r3)
+	std	r29, VCPU_GPR(R29)(r3)
+	std	r30, VCPU_GPR(R30)(r3)
+	std	r31, VCPU_GPR(R31)(r3)
+
+	/* save FP state */
+	bl	kvmppc_save_fp
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+	b	91f
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
+	/*
+	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
+	 */
+	ld	r3, HSTATE_KVM_VCPU(r13)
+	ld      r4, VCPU_MSR(r3)
+	li	r5, 0			/* don't preserve non-vol regs */
+	bl	kvmppc_save_tm_hv
+	nop
+91:
+#endif
+
+	/*
+	 * Set DEC to the smaller of DEC and HDEC, so that we wake
+	 * no later than the end of our timeslice (HDEC interrupts
+	 * don't wake us from nap).
+	 */
+	mfspr	r3, SPRN_DEC
+	mfspr	r4, SPRN_HDEC
+	mftb	r5
+	extsw	r3, r3
+	extsw	r4, r4
+	cmpd	r3, r4
+	ble	67f
+	mtspr	SPRN_DEC, r4
+67:
+	/* save expiry time of guest decrementer */
+	add	r3, r3, r5
+	ld	r4, HSTATE_KVM_VCPU(r13)
+	std	r3, VCPU_DEC_EXPIRES(r4)
+
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+	ld	r4, HSTATE_KVM_VCPU(r13)
+	addi	r3, r4, VCPU_TB_CEDE
+	bl	kvmhv_accumulate_time
+#endif
+
+	lis	r3, LPCR_PECEDP@h	/* Do wake on privileged doorbell */
+
+	/* Go back to host stack */
+	ld	r1, HSTATE_HOST_R1(r13)
+
+	/*
+	 * Take a nap until a decrementer or external or doobell interrupt
+	 * occurs, with PECE1 and PECE0 set in LPCR.
+	 * On POWER8, set PECEDH, and if we are ceding, also set PECEDP.
+	 * Also clear the runlatch bit before napping.
+	 */
+kvm_do_nap:
+	li	r0,0
+	mtspr	SPRN_CTRLT, r0
+
+	li	r0,1
+	stb	r0,HSTATE_HWTHREAD_REQ(r13)
+	mfspr	r5,SPRN_LPCR
+	ori	r5,r5,LPCR_PECE0 | LPCR_PECE1
+BEGIN_FTR_SECTION
+	ori	r5, r5, LPCR_PECEDH
+	rlwimi	r5, r3, 0, LPCR_PECEDP
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+kvm_nap_sequence:		/* desired LPCR value in r5 */
+	li	r3, PNV_THREAD_NAP
+	mtspr	SPRN_LPCR,r5
+	isync
+
+	bl	isa206_idle_insn_mayloss
+
+	li	r0,1
+	mtspr	SPRN_CTRLT, r0
+
+	mtspr	SPRN_SRR1, r3
+
+	li	r0, 0
+	stb	r0, PACA_FTRACE_ENABLED(r13)
+
+	li	r0, KVM_HWTHREAD_IN_KVM
+	stb	r0, HSTATE_HWTHREAD_STATE(r13)
+
+	lbz	r0, HSTATE_NAPPING(r13)
+	cmpwi	r0, NAPPING_CEDE
+	beq	kvm_end_cede
+	cmpwi	r0, NAPPING_NOVCPU
+	beq	kvm_novcpu_wakeup
+	cmpwi	r0, NAPPING_UNSPLIT
+	beq	kvm_unsplit_wakeup
+	twi	31,0,0 /* Nap state must not be zero */
+
+33:	mr	r4, r3
+	li	r3, 0
+	li	r12, 0
+	b	34f
+
+kvm_end_cede:
+	/* Woken by external or decrementer interrupt */
+
+	/* get vcpu pointer */
+	ld	r4, HSTATE_KVM_VCPU(r13)
+
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+	addi	r3, r4, VCPU_TB_RMINTR
+	bl	kvmhv_accumulate_time
+#endif
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+	b	91f
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
+	/*
+	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
+	 */
+	mr      r3, r4
+	ld      r4, VCPU_MSR(r3)
+	li	r5, 0			/* don't preserve non-vol regs */
+	bl	kvmppc_restore_tm_hv
+	nop
+	ld	r4, HSTATE_KVM_VCPU(r13)
+91:
+#endif
+
+	/* load up FP state */
+	bl	kvmppc_load_fp
+
+	/* Restore guest decrementer */
+	ld	r3, VCPU_DEC_EXPIRES(r4)
+	mftb	r7
+	subf	r3, r7, r3
+	mtspr	SPRN_DEC, r3
+
+	/* Load NV GPRS */
+	ld	r14, VCPU_GPR(R14)(r4)
+	ld	r15, VCPU_GPR(R15)(r4)
+	ld	r16, VCPU_GPR(R16)(r4)
+	ld	r17, VCPU_GPR(R17)(r4)
+	ld	r18, VCPU_GPR(R18)(r4)
+	ld	r19, VCPU_GPR(R19)(r4)
+	ld	r20, VCPU_GPR(R20)(r4)
+	ld	r21, VCPU_GPR(R21)(r4)
+	ld	r22, VCPU_GPR(R22)(r4)
+	ld	r23, VCPU_GPR(R23)(r4)
+	ld	r24, VCPU_GPR(R24)(r4)
+	ld	r25, VCPU_GPR(R25)(r4)
+	ld	r26, VCPU_GPR(R26)(r4)
+	ld	r27, VCPU_GPR(R27)(r4)
+	ld	r28, VCPU_GPR(R28)(r4)
+	ld	r29, VCPU_GPR(R29)(r4)
+	ld	r30, VCPU_GPR(R30)(r4)
+	ld	r31, VCPU_GPR(R31)(r4)
+
+	/* Check the wake reason in SRR1 to see why we got here */
+	bl	kvmppc_check_wake_reason
+
+	/*
+	 * Restore volatile registers since we could have called a
+	 * C routine in kvmppc_check_wake_reason
+	 *	r4 = VCPU
+	 * r3 tells us whether we need to return to host or not
+	 * WARNING: it gets checked further down:
+	 * should not modify r3 until this check is done.
+	 */
+	ld	r4, HSTATE_KVM_VCPU(r13)
+
+	/* clear our bit in vcore->napping_threads */
+34:	ld	r5,HSTATE_KVM_VCORE(r13)
+	lbz	r7,HSTATE_PTID(r13)
+	li	r0,1
+	sld	r0,r0,r7
+	addi	r6,r5,VCORE_NAPPING_THREADS
+32:	lwarx	r7,0,r6
+	andc	r7,r7,r0
+	stwcx.	r7,0,r6
+	bne	32b
+	li	r0,0
+	stb	r0,HSTATE_NAPPING(r13)
+
+	/* See if the wake reason saved in r3 means we need to exit */
+	stw	r12, VCPU_TRAP(r4)
+	mr	r9, r4
+	cmpdi	r3, 0
+	bgt	guest_exit_cont
+	b	maybe_reenter_guest
+
+	/* cede when already previously prodded case */
+kvm_cede_prodded:
+	li	r0,0
+	stb	r0,VCPU_PRODDED(r3)
+	sync			/* order testing prodded vs. clearing ceded */
+	stb	r0,VCPU_CEDED(r3)
+	li	r3,H_SUCCESS
+	blr
+
+	/* we've ceded but we want to give control to the host */
+kvm_cede_exit:
+	ld	r9, HSTATE_KVM_VCPU(r13)
+	b	guest_exit_cont
+
+	/* Try to do machine check recovery in real mode */
+machine_check_realmode:
+	mr	r3, r9		/* get vcpu pointer */
+	bl	kvmppc_realmode_machine_check
+	nop
+	/* all machine checks go to virtual mode for further handling */
+	ld	r9, HSTATE_KVM_VCPU(r13)
+	li	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+	b	guest_exit_cont
+
+/*
+ * Call C code to handle a HMI in real mode.
+ * Only the primary thread does the call, secondary threads are handled
+ * by calling hmi_exception_realmode() after kvmppc_hv_entry returns.
+ * r9 points to the vcpu on entry
+ */
+hmi_realmode:
+	lbz	r0, HSTATE_PTID(r13)
+	cmpwi	r0, 0
+	bne	guest_exit_cont
+	bl	CFUNC(kvmppc_realmode_hmi_handler)
+	ld	r9, HSTATE_KVM_VCPU(r13)
+	li	r12, BOOK3S_INTERRUPT_HMI
+	b	guest_exit_cont
+
+/*
+ * Check the reason we woke from nap, and take appropriate action.
+ * Returns (in r3):
+ *	0 if nothing needs to be done
+ *	1 if something happened that needs to be handled by the host
+ *	-1 if there was a guest wakeup (IPI or msgsnd)
+ *	-2 if we handled a PCI passthrough interrupt (returned by
+ *		kvmppc_read_intr only)
+ *
+ * Also sets r12 to the interrupt vector for any interrupt that needs
+ * to be handled now by the host (0x500 for external interrupt), or zero.
+ * Modifies all volatile registers (since it may call a C function).
+ * This routine calls kvmppc_read_intr, a C function, if an external
+ * interrupt is pending.
+ */
+SYM_FUNC_START_LOCAL(kvmppc_check_wake_reason)
+	mfspr	r6, SPRN_SRR1
+BEGIN_FTR_SECTION
+	rlwinm	r6, r6, 45-31, 0xf	/* extract wake reason field (P8) */
+FTR_SECTION_ELSE
+	rlwinm	r6, r6, 45-31, 0xe	/* P7 wake reason field is 3 bits */
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
+	cmpwi	r6, 8			/* was it an external interrupt? */
+	beq	7f			/* if so, see what it was */
+	li	r3, 0
+	li	r12, 0
+	cmpwi	r6, 6			/* was it the decrementer? */
+	beq	0f
+BEGIN_FTR_SECTION
+	cmpwi	r6, 5			/* privileged doorbell? */
+	beq	0f
+	cmpwi	r6, 3			/* hypervisor doorbell? */
+	beq	3f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	cmpwi	r6, 0xa			/* Hypervisor maintenance ? */
+	beq	4f
+	li	r3, 1			/* anything else, return 1 */
+0:	blr
+
+	/* hypervisor doorbell */
+3:	li	r12, BOOK3S_INTERRUPT_H_DOORBELL
+
+	/*
+	 * Clear the doorbell as we will invoke the handler
+	 * explicitly in the guest exit path.
+	 */
+	lis	r6, (PPC_DBELL_SERVER << (63-36))@h
+	PPC_MSGCLR(6)
+	/* see if it's a host IPI */
+	li	r3, 1
+	lbz	r0, HSTATE_HOST_IPI(r13)
+	cmpwi	r0, 0
+	bnelr
+	/* if not, return -1 */
+	li	r3, -1
+	blr
+
+	/* Woken up due to Hypervisor maintenance interrupt */
+4:	li	r12, BOOK3S_INTERRUPT_HMI
+	li	r3, 1
+	blr
+
+	/* external interrupt - create a stack frame so we can call C */
+7:	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+	stdu	r1, -PPC_MIN_STKFRM(r1)
+	bl	CFUNC(kvmppc_read_intr)
+	nop
+	li	r12, BOOK3S_INTERRUPT_EXTERNAL
+	cmpdi	r3, 1
+	ble	1f
+
+	/*
+	 * Return code of 2 means PCI passthrough interrupt, but
+	 * we need to return back to host to complete handling the
+	 * interrupt. Trap reason is expected in r12 by guest
+	 * exit code.
+	 */
+	li	r12, BOOK3S_INTERRUPT_HV_RM_HARD
+1:
+	ld	r0, PPC_MIN_STKFRM+PPC_LR_STKOFF(r1)
+	addi	r1, r1, PPC_MIN_STKFRM
+	mtlr	r0
+	blr
+SYM_FUNC_END(kvmppc_check_wake_reason)
+
+/*
+ * Save away FP, VMX and VSX registers.
+ * r3 = vcpu pointer
+ * N.B. r30 and r31 are volatile across this function,
+ * thus it is not callable from C.
+ */
+SYM_FUNC_START_LOCAL(kvmppc_save_fp)
+	mflr	r30
+	mr	r31,r3
+	mfmsr	r5
+	ori	r8,r5,MSR_FP
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+	oris	r8,r8,MSR_VEC@h
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+	oris	r8,r8,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+	mtmsrd	r8
+	addi	r3,r3,VCPU_FPRS
+	bl	store_fp_state
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+	addi	r3,r31,VCPU_VRS
+	bl	store_vr_state
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+	mfspr	r6,SPRN_VRSAVE
+	stw	r6,VCPU_VRSAVE(r31)
+	mtlr	r30
+	blr
+SYM_FUNC_END(kvmppc_save_fp)
+
+/*
+ * Load up FP, VMX and VSX registers
+ * r4 = vcpu pointer
+ * N.B. r30 and r31 are volatile across this function,
+ * thus it is not callable from C.
+ */
+SYM_FUNC_START_LOCAL(kvmppc_load_fp)
+	mflr	r30
+	mr	r31,r4
+	mfmsr	r9
+	ori	r8,r9,MSR_FP
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+	oris	r8,r8,MSR_VEC@h
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+	oris	r8,r8,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+	mtmsrd	r8
+	addi	r3,r4,VCPU_FPRS
+	bl	load_fp_state
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+	addi	r3,r31,VCPU_VRS
+	bl	load_vr_state
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+	lwz	r7,VCPU_VRSAVE(r31)
+	mtspr	SPRN_VRSAVE,r7
+	mtlr	r30
+	mr	r4,r31
+	blr
+SYM_FUNC_END(kvmppc_load_fp)
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Save transactional state and TM-related registers.
+ * Called with r3 pointing to the vcpu struct and r4 containing
+ * the guest MSR value.
+ * r5 is non-zero iff non-volatile register state needs to be maintained.
+ * If r5 == 0, this can modify all checkpointed registers, but
+ * restores r1 and r2 before exit.
+ */
+_GLOBAL_TOC(kvmppc_save_tm_hv)
+EXPORT_SYMBOL_GPL(kvmppc_save_tm_hv)
+	/* See if we need to handle fake suspend mode */
+BEGIN_FTR_SECTION
+	b	__kvmppc_save_tm
+END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
+
+	lbz	r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */
+	cmpwi	r0, 0
+	beq	__kvmppc_save_tm
+
+	/* The following code handles the fake_suspend = 1 case */
+	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+	stdu	r1, -TM_FRAME_SIZE(r1)
+
+	/* Turn on TM. */
+	mfmsr	r8
+	li	r0, 1
+	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+	mtmsrd	r8
+
+	rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */
+	beq	4f
+BEGIN_FTR_SECTION
+	bl	pnv_power9_force_smt4_catch
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
+	nop
+
+	/*
+	 * It's possible that treclaim. may modify registers, if we have lost
+	 * track of fake-suspend state in the guest due to it using rfscv.
+	 * Save and restore registers in case this occurs.
+	 */
+	mfspr	r3, SPRN_DSCR
+	mfspr	r4, SPRN_XER
+	mfspr	r5, SPRN_AMR
+	/* SPRN_TAR would need to be saved here if the kernel ever used it */
+	mfcr	r12
+	SAVE_NVGPRS(r1)
+	SAVE_GPR(2, r1)
+	SAVE_GPR(3, r1)
+	SAVE_GPR(4, r1)
+	SAVE_GPR(5, r1)
+	stw	r12, 8(r1)
+	std	r1, HSTATE_HOST_R1(r13)
+
+	/* We have to treclaim here because that's the only way to do S->N */
+	li	r3, TM_CAUSE_KVM_RESCHED
+	TRECLAIM(R3)
+
+	GET_PACA(r13)
+	ld	r1, HSTATE_HOST_R1(r13)
+	REST_GPR(2, r1)
+	REST_GPR(3, r1)
+	REST_GPR(4, r1)
+	REST_GPR(5, r1)
+	lwz	r12, 8(r1)
+	REST_NVGPRS(r1)
+	mtspr	SPRN_DSCR, r3
+	mtspr	SPRN_XER, r4
+	mtspr	SPRN_AMR, r5
+	mtcr	r12
+	HMT_MEDIUM
+
+	/*
+	 * We were in fake suspend, so we are not going to save the
+	 * register state as the guest checkpointed state (since
+	 * we already have it), therefore we can now use any volatile GPR.
+	 * In fact treclaim in fake suspend state doesn't modify
+	 * any registers.
+	 */
+
+BEGIN_FTR_SECTION
+	bl	pnv_power9_force_smt4_release
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
+	nop
+
+4:
+	mfspr	r3, SPRN_PSSCR
+	/* PSSCR_FAKE_SUSPEND is a write-only bit, but clear it anyway */
+	li	r0, PSSCR_FAKE_SUSPEND
+	andc	r3, r3, r0
+	mtspr	SPRN_PSSCR, r3
+
+	/* Don't save TEXASR, use value from last exit in real suspend state */
+	ld	r9, HSTATE_KVM_VCPU(r13)
+	mfspr	r5, SPRN_TFHAR
+	mfspr	r6, SPRN_TFIAR
+	std	r5, VCPU_TFHAR(r9)
+	std	r6, VCPU_TFIAR(r9)
+
+	addi	r1, r1, TM_FRAME_SIZE
+	ld	r0, PPC_LR_STKOFF(r1)
+	mtlr	r0
+	blr
+
+/*
+ * Restore transactional state and TM-related registers.
+ * Called with r3 pointing to the vcpu struct
+ * and r4 containing the guest MSR value.
+ * r5 is non-zero iff non-volatile register state needs to be maintained.
+ * This potentially modifies all checkpointed registers.
+ * It restores r1 and r2 from the PACA.
+ */
+_GLOBAL_TOC(kvmppc_restore_tm_hv)
+EXPORT_SYMBOL_GPL(kvmppc_restore_tm_hv)
+	/*
+	 * If we are doing TM emulation for the guest on a POWER9 DD2,
+	 * then we don't actually do a trechkpt -- we either set up
+	 * fake-suspend mode, or emulate a TM rollback.
+	 */
+BEGIN_FTR_SECTION
+	b	__kvmppc_restore_tm
+END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
+	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+
+	li	r0, 0
+	stb	r0, HSTATE_FAKE_SUSPEND(r13)
+
+	/* Turn on TM so we can restore TM SPRs */
+	mfmsr	r5
+	li	r0, 1
+	rldimi	r5, r0, MSR_TM_LG, 63-MSR_TM_LG
+	mtmsrd	r5
+
+	/*
+	 * The user may change these outside of a transaction, so they must
+	 * always be context switched.
+	 */
+	ld	r5, VCPU_TFHAR(r3)
+	ld	r6, VCPU_TFIAR(r3)
+	ld	r7, VCPU_TEXASR(r3)
+	mtspr	SPRN_TFHAR, r5
+	mtspr	SPRN_TFIAR, r6
+	mtspr	SPRN_TEXASR, r7
+
+	rldicl. r5, r4, 64 - MSR_TS_S_LG, 62
+	beqlr		/* TM not active in guest */
+
+	/* Make sure the failure summary is set */
+	oris	r7, r7, (TEXASR_FS)@h
+	mtspr	SPRN_TEXASR, r7
+
+	cmpwi	r5, 1		/* check for suspended state */
+	bgt	10f
+	stb	r5, HSTATE_FAKE_SUSPEND(r13)
+	b	9f		/* and return */
+10:	stdu	r1, -PPC_MIN_STKFRM(r1)
+	/* guest is in transactional state, so simulate rollback */
+	bl	kvmhv_emulate_tm_rollback
+	nop
+	addi	r1, r1, PPC_MIN_STKFRM
+9:	ld	r0, PPC_LR_STKOFF(r1)
+	mtlr	r0
+	blr
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+/*
+ * We come here if we get any exception or interrupt while we are
+ * executing host real mode code while in guest MMU context.
+ * r12 is (CR << 32) | vector
+ * r13 points to our PACA
+ * r12 is saved in HSTATE_SCRATCH0(r13)
+ * r9 is saved in HSTATE_SCRATCH2(r13)
+ * r13 is saved in HSPRG1
+ * cfar is saved in HSTATE_CFAR(r13)
+ * ppr is saved in HSTATE_PPR(r13)
+ */
+kvmppc_bad_host_intr:
+	/*
+	 * Switch to the emergency stack, but start half-way down in
+	 * case we were already on it.
+	 */
+	mr	r9, r1
+	std	r1, PACAR1(r13)
+	ld	r1, PACAEMERGSP(r13)
+	subi	r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE
+	std	r9, 0(r1)
+	std	r0, GPR0(r1)
+	std	r9, GPR1(r1)
+	std	r2, GPR2(r1)
+	SAVE_GPRS(3, 8, r1)
+	srdi	r0, r12, 32
+	clrldi	r12, r12, 32
+	std	r0, _CCR(r1)
+	std	r12, _TRAP(r1)
+	andi.	r0, r12, 2
+	beq	1f
+	mfspr	r3, SPRN_HSRR0
+	mfspr	r4, SPRN_HSRR1
+	mfspr	r5, SPRN_HDAR
+	mfspr	r6, SPRN_HDSISR
+	b	2f
+1:	mfspr	r3, SPRN_SRR0
+	mfspr	r4, SPRN_SRR1
+	mfspr	r5, SPRN_DAR
+	mfspr	r6, SPRN_DSISR
+2:	std	r3, _NIP(r1)
+	std	r4, _MSR(r1)
+	std	r5, _DAR(r1)
+	std	r6, _DSISR(r1)
+	ld	r9, HSTATE_SCRATCH2(r13)
+	ld	r12, HSTATE_SCRATCH0(r13)
+	GET_SCRATCH0(r0)
+	SAVE_GPRS(9, 12, r1)
+	std	r0, GPR13(r1)
+	SAVE_NVGPRS(r1)
+	ld	r5, HSTATE_CFAR(r13)
+	std	r5, ORIG_GPR3(r1)
+	mflr	r3
+	mfctr	r4
+	mfxer	r5
+	lbz	r6, PACAIRQSOFTMASK(r13)
+	std	r3, _LINK(r1)
+	std	r4, _CTR(r1)
+	std	r5, _XER(r1)
+	std	r6, SOFTE(r1)
+	LOAD_PACA_TOC()
+	LOAD_REG_IMMEDIATE(3, STACK_FRAME_REGS_MARKER)
+	std	r3, STACK_INT_FRAME_MARKER(r1)
+
+	/*
+	 * XXX On POWER7 and POWER8, we just spin here since we don't
+	 * know what the other threads are doing (and we don't want to
+	 * coordinate with them) - but at least we now have register state
+	 * in memory that we might be able to look at from another CPU.
+	 */
+	b	.
+
+/*
+ * This mimics the MSR transition on IRQ delivery.  The new guest MSR is taken
+ * from VCPU_INTR_MSR and is modified based on the required TM state changes.
+ *   r11 has the guest MSR value (in/out)
+ *   r9 has a vcpu pointer (in)
+ *   r0 is used as a scratch register
+ */
+SYM_FUNC_START_LOCAL(kvmppc_msr_interrupt)
+	rldicl	r0, r11, 64 - MSR_TS_S_LG, 62
+	cmpwi	r0, 2 /* Check if we are in transactional state..  */
+	ld	r11, VCPU_INTR_MSR(r9)
+	bne	1f
+	/* ... if transactional, change to suspended */
+	li	r0, 1
+1:	rldimi	r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+	blr
+SYM_FUNC_END(kvmppc_msr_interrupt)
+
+/*
+ * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu)
+ *
+ * Load up guest PMU state.  R3 points to the vcpu struct.
+ */
+SYM_FUNC_START_LOCAL(kvmhv_load_guest_pmu)
+	mr	r4, r3
+	mflr	r0
+	li	r3, 1
+	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
+	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
+	isync
+BEGIN_FTR_SECTION
+	ld	r3, VCPU_MMCR(r4)
+	andi.	r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
+	cmpwi	r5, MMCR0_PMAO
+	beql	kvmppc_fix_pmao
+END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
+	lwz	r3, VCPU_PMC(r4)	/* always load up guest PMU registers */
+	lwz	r5, VCPU_PMC + 4(r4)	/* to prevent information leak */
+	lwz	r6, VCPU_PMC + 8(r4)
+	lwz	r7, VCPU_PMC + 12(r4)
+	lwz	r8, VCPU_PMC + 16(r4)
+	lwz	r9, VCPU_PMC + 20(r4)
+	mtspr	SPRN_PMC1, r3
+	mtspr	SPRN_PMC2, r5
+	mtspr	SPRN_PMC3, r6
+	mtspr	SPRN_PMC4, r7
+	mtspr	SPRN_PMC5, r8
+	mtspr	SPRN_PMC6, r9
+	ld	r3, VCPU_MMCR(r4)
+	ld	r5, VCPU_MMCR + 8(r4)
+	ld	r6, VCPU_MMCRA(r4)
+	ld	r7, VCPU_SIAR(r4)
+	ld	r8, VCPU_SDAR(r4)
+	mtspr	SPRN_MMCR1, r5
+	mtspr	SPRN_MMCRA, r6
+	mtspr	SPRN_SIAR, r7
+	mtspr	SPRN_SDAR, r8
+BEGIN_FTR_SECTION
+	ld	r5, VCPU_MMCR + 16(r4)
+	ld	r6, VCPU_SIER(r4)
+	mtspr	SPRN_MMCR2, r5
+	mtspr	SPRN_SIER, r6
+	lwz	r7, VCPU_PMC + 24(r4)
+	lwz	r8, VCPU_PMC + 28(r4)
+	ld	r9, VCPU_MMCRS(r4)
+	mtspr	SPRN_SPMC1, r7
+	mtspr	SPRN_SPMC2, r8
+	mtspr	SPRN_MMCRS, r9
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	mtspr	SPRN_MMCR0, r3
+	isync
+	mtlr	r0
+	blr
+SYM_FUNC_END(kvmhv_load_guest_pmu)
+
+/*
+ * void kvmhv_load_host_pmu(void)
+ *
+ * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu.
+ */
+SYM_FUNC_START_LOCAL(kvmhv_load_host_pmu)
+	mflr	r0
+	lbz	r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
+	cmpwi	r4, 0
+	beq	23f			/* skip if not */
+BEGIN_FTR_SECTION
+	ld	r3, HSTATE_MMCR0(r13)
+	andi.	r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
+	cmpwi	r4, MMCR0_PMAO
+	beql	kvmppc_fix_pmao
+END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
+	lwz	r3, HSTATE_PMC1(r13)
+	lwz	r4, HSTATE_PMC2(r13)
+	lwz	r5, HSTATE_PMC3(r13)
+	lwz	r6, HSTATE_PMC4(r13)
+	lwz	r8, HSTATE_PMC5(r13)
+	lwz	r9, HSTATE_PMC6(r13)
+	mtspr	SPRN_PMC1, r3
+	mtspr	SPRN_PMC2, r4
+	mtspr	SPRN_PMC3, r5
+	mtspr	SPRN_PMC4, r6
+	mtspr	SPRN_PMC5, r8
+	mtspr	SPRN_PMC6, r9
+	ld	r3, HSTATE_MMCR0(r13)
+	ld	r4, HSTATE_MMCR1(r13)
+	ld	r5, HSTATE_MMCRA(r13)
+	ld	r6, HSTATE_SIAR(r13)
+	ld	r7, HSTATE_SDAR(r13)
+	mtspr	SPRN_MMCR1, r4
+	mtspr	SPRN_MMCRA, r5
+	mtspr	SPRN_SIAR, r6
+	mtspr	SPRN_SDAR, r7
+BEGIN_FTR_SECTION
+	ld	r8, HSTATE_MMCR2(r13)
+	ld	r9, HSTATE_SIER(r13)
+	mtspr	SPRN_MMCR2, r8
+	mtspr	SPRN_SIER, r9
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	mtspr	SPRN_MMCR0, r3
+	isync
+	mtlr	r0
+23:	blr
+SYM_FUNC_END(kvmhv_load_host_pmu)
+
+/*
+ * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use)
+ *
+ * Save guest PMU state into the vcpu struct.
+ * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA)
+ */
+SYM_FUNC_START_LOCAL(kvmhv_save_guest_pmu)
+	mr	r9, r3
+	mr	r8, r4
+BEGIN_FTR_SECTION
+	/*
+	 * POWER8 seems to have a hardware bug where setting
+	 * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
+	 * when some counters are already negative doesn't seem
+	 * to cause a performance monitor alert (and hence interrupt).
+	 * The effect of this is that when saving the PMU state,
+	 * if there is no PMU alert pending when we read MMCR0
+	 * before freezing the counters, but one becomes pending
+	 * before we read the counters, we lose it.
+	 * To work around this, we need a way to freeze the counters
+	 * before reading MMCR0.  Normally, freezing the counters
+	 * is done by writing MMCR0 (to set MMCR0[FC]) which
+	 * unavoidably writes MMCR0[PMA0] as well.  On POWER8,
+	 * we can also freeze the counters using MMCR2, by writing
+	 * 1s to all the counter freeze condition bits (there are
+	 * 9 bits each for 6 counters).
+	 */
+	li	r3, -1			/* set all freeze bits */
+	clrrdi	r3, r3, 10
+	mfspr	r10, SPRN_MMCR2
+	mtspr	SPRN_MMCR2, r3
+	isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	li	r3, 1
+	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
+	mfspr	r4, SPRN_MMCR0		/* save MMCR0 */
+	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
+	mfspr	r6, SPRN_MMCRA
+	/* Clear MMCRA in order to disable SDAR updates */
+	li	r7, 0
+	mtspr	SPRN_MMCRA, r7
+	isync
+	cmpwi	r8, 0			/* did they ask for PMU stuff to be saved? */
+	bne	21f
+	std	r3, VCPU_MMCR(r9)	/* if not, set saved MMCR0 to FC */
+	b	22f
+21:	mfspr	r5, SPRN_MMCR1
+	mfspr	r7, SPRN_SIAR
+	mfspr	r8, SPRN_SDAR
+	std	r4, VCPU_MMCR(r9)
+	std	r5, VCPU_MMCR + 8(r9)
+	std	r6, VCPU_MMCRA(r9)
+BEGIN_FTR_SECTION
+	std	r10, VCPU_MMCR + 16(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+	std	r7, VCPU_SIAR(r9)
+	std	r8, VCPU_SDAR(r9)
+	mfspr	r3, SPRN_PMC1
+	mfspr	r4, SPRN_PMC2
+	mfspr	r5, SPRN_PMC3
+	mfspr	r6, SPRN_PMC4
+	mfspr	r7, SPRN_PMC5
+	mfspr	r8, SPRN_PMC6
+	stw	r3, VCPU_PMC(r9)
+	stw	r4, VCPU_PMC + 4(r9)
+	stw	r5, VCPU_PMC + 8(r9)
+	stw	r6, VCPU_PMC + 12(r9)
+	stw	r7, VCPU_PMC + 16(r9)
+	stw	r8, VCPU_PMC + 20(r9)
+BEGIN_FTR_SECTION
+	mfspr	r5, SPRN_SIER
+	std	r5, VCPU_SIER(r9)
+	mfspr	r6, SPRN_SPMC1
+	mfspr	r7, SPRN_SPMC2
+	mfspr	r8, SPRN_MMCRS
+	stw	r6, VCPU_PMC + 24(r9)
+	stw	r7, VCPU_PMC + 28(r9)
+	std	r8, VCPU_MMCRS(r9)
+	lis	r4, 0x8000
+	mtspr	SPRN_MMCRS, r4
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+22:	blr
+SYM_FUNC_END(kvmhv_save_guest_pmu)
+
+/*
+ * This works around a hardware bug on POWER8E processors, where
+ * writing a 1 to the MMCR0[PMAO] bit doesn't generate a
+ * performance monitor interrupt.  Instead, when we need to have
+ * an interrupt pending, we have to arrange for a counter to overflow.
+ */
+kvmppc_fix_pmao:
+	li	r3, 0
+	mtspr	SPRN_MMCR2, r3
+	lis	r3, (MMCR0_PMXE | MMCR0_FCECE)@h
+	ori	r3, r3, MMCR0_PMCjCE | MMCR0_C56RUN
+	mtspr	SPRN_MMCR0, r3
+	lis	r3, 0x7fff
+	ori	r3, r3, 0xffff
+	mtspr	SPRN_PMC6, r3
+	isync
+	blr
+
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+/*
+ * Start timing an activity
+ * r3 = pointer to time accumulation struct, r4 = vcpu
+ */
+kvmhv_start_timing:
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	ld	r6, VCORE_TB_OFFSET_APPL(r5)
+	mftb	r5
+	subf	r5, r6, r5	/* subtract current timebase offset */
+	std	r3, VCPU_CUR_ACTIVITY(r4)
+	std	r5, VCPU_ACTIVITY_START(r4)
+	blr
+
+/*
+ * Accumulate time to one activity and start another.
+ * r3 = pointer to new time accumulation struct, r4 = vcpu
+ */
+kvmhv_accumulate_time:
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	ld	r8, VCORE_TB_OFFSET_APPL(r5)
+	ld	r5, VCPU_CUR_ACTIVITY(r4)
+	ld	r6, VCPU_ACTIVITY_START(r4)
+	std	r3, VCPU_CUR_ACTIVITY(r4)
+	mftb	r7
+	subf	r7, r8, r7	/* subtract current timebase offset */
+	std	r7, VCPU_ACTIVITY_START(r4)
+	cmpdi	r5, 0
+	beqlr
+	subf	r3, r6, r7
+	ld	r8, TAS_SEQCOUNT(r5)
+	cmpdi	r8, 0
+	addi	r8, r8, 1
+	std	r8, TAS_SEQCOUNT(r5)
+	lwsync
+	ld	r7, TAS_TOTAL(r5)
+	add	r7, r7, r3
+	std	r7, TAS_TOTAL(r5)
+	ld	r6, TAS_MIN(r5)
+	ld	r7, TAS_MAX(r5)
+	beq	3f
+	cmpd	r3, r6
+	bge	1f
+3:	std	r3, TAS_MIN(r5)
+1:	cmpd	r3, r7
+	ble	2f
+	std	r3, TAS_MAX(r5)
+2:	lwsync
+	addi	r8, r8, 1
+	std	r8, TAS_SEQCOUNT(r5)
+	blr
+#endif
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
new file mode 100644
index 0000000000..866cadd700
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_book3s_64.h>
+#include <asm/reg.h>
+#include <asm/ppc-opcode.h>
+
+static void emulate_tx_failure(struct kvm_vcpu *vcpu, u64 failure_cause)
+{
+	u64 texasr, tfiar;
+	u64 msr = vcpu->arch.shregs.msr;
+
+	tfiar = vcpu->arch.regs.nip & ~0x3ull;
+	texasr = (failure_cause << 56) | TEXASR_ABORT | TEXASR_FS | TEXASR_EXACT;
+	if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr))
+		texasr |= TEXASR_SUSP;
+	if (msr & MSR_PR) {
+		texasr |= TEXASR_PR;
+		tfiar |= 1;
+	}
+	vcpu->arch.tfiar = tfiar;
+	/* Preserve ROT and TL fields of existing TEXASR */
+	vcpu->arch.texasr = (vcpu->arch.texasr & 0x3ffffff) | texasr;
+}
+
+/*
+ * This gets called on a softpatch interrupt on POWER9 DD2.2 processors.
+ * We expect to find a TM-related instruction to be emulated.  The
+ * instruction image is in vcpu->arch.emul_inst.  If the guest was in
+ * TM suspended or transactional state, the checkpointed state has been
+ * reclaimed and is in the vcpu struct.  The CPU is in virtual mode in
+ * host context.
+ */
+int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
+{
+	u32 instr = vcpu->arch.emul_inst;
+	u64 msr = vcpu->arch.shregs.msr;
+	u64 newmsr, bescr;
+	int ra, rs;
+
+	/*
+	 * The TM softpatch interrupt sets NIP to the instruction following
+	 * the faulting instruction, which is not executed. Rewind nip to the
+	 * faulting instruction so it looks like a normal synchronous
+	 * interrupt, then update nip in the places where the instruction is
+	 * emulated.
+	 */
+	vcpu->arch.regs.nip -= 4;
+
+	/*
+	 * rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit
+	 * in these instructions, so masking bit 31 out doesn't change these
+	 * instructions. For treclaim., tsr., and trechkpt. instructions if bit
+	 * 31 = 0 then they are per ISA invalid forms, however P9 UM, in section
+	 * 4.6.10 Book II Invalid Forms, informs specifically that ignoring bit
+	 * 31 is an acceptable way to handle these invalid forms that have
+	 * bit 31 = 0. Moreover, for emulation purposes both forms (w/ and wo/
+	 * bit 31 set) can generate a softpatch interrupt. Hence both forms
+	 * are handled below for these instructions so they behave the same way.
+	 */
+	switch (instr & PO_XOP_OPCODE_MASK) {
+	case PPC_INST_RFID:
+		/* XXX do we need to check for PR=0 here? */
+		newmsr = vcpu->arch.shregs.srr1;
+		/* should only get here for Sx -> T1 transition */
+		WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
+			       MSR_TM_TRANSACTIONAL(newmsr) &&
+			       (newmsr & MSR_TM)));
+		newmsr = sanitize_msr(newmsr);
+		vcpu->arch.shregs.msr = newmsr;
+		vcpu->arch.cfar = vcpu->arch.regs.nip;
+		vcpu->arch.regs.nip = vcpu->arch.shregs.srr0;
+		return RESUME_GUEST;
+
+	case PPC_INST_RFEBB:
+		if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) {
+			/* generate an illegal instruction interrupt */
+			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+			return RESUME_GUEST;
+		}
+		/* check EBB facility is available */
+		if (!(vcpu->arch.hfscr & HFSCR_EBB)) {
+			vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+			vcpu->arch.hfscr |= (u64)FSCR_EBB_LG << 56;
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+			return -1; /* rerun host interrupt handler */
+		}
+		if ((msr & MSR_PR) && !(vcpu->arch.fscr & FSCR_EBB)) {
+			/* generate a facility unavailable interrupt */
+			vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+			vcpu->arch.fscr |= (u64)FSCR_EBB_LG << 56;
+			kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
+			return RESUME_GUEST;
+		}
+		bescr = vcpu->arch.bescr;
+		/* expect to see a S->T transition requested */
+		WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
+			       ((bescr >> 30) & 3) == 2));
+		bescr &= ~BESCR_GE;
+		if (instr & (1 << 11))
+			bescr |= BESCR_GE;
+		vcpu->arch.bescr = bescr;
+		msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+		vcpu->arch.shregs.msr = msr;
+		vcpu->arch.cfar = vcpu->arch.regs.nip;
+		vcpu->arch.regs.nip = vcpu->arch.ebbrr;
+		return RESUME_GUEST;
+
+	case PPC_INST_MTMSRD:
+		/* XXX do we need to check for PR=0 here? */
+		rs = (instr >> 21) & 0x1f;
+		newmsr = kvmppc_get_gpr(vcpu, rs);
+		/* check this is a Sx -> T1 transition */
+		WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
+			       MSR_TM_TRANSACTIONAL(newmsr) &&
+			       (newmsr & MSR_TM)));
+		/* mtmsrd doesn't change LE */
+		newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE);
+		newmsr = sanitize_msr(newmsr);
+		vcpu->arch.shregs.msr = newmsr;
+		vcpu->arch.regs.nip += 4;
+		return RESUME_GUEST;
+
+	/* ignore bit 31, see comment above */
+	case (PPC_INST_TSR & PO_XOP_OPCODE_MASK):
+		/* check for PR=1 and arch 2.06 bit set in PCR */
+		if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) {
+			/* generate an illegal instruction interrupt */
+			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+			return RESUME_GUEST;
+		}
+		/* check for TM disabled in the HFSCR or MSR */
+		if (!(vcpu->arch.hfscr & HFSCR_TM)) {
+			vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+			vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56;
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+			return -1; /* rerun host interrupt handler */
+		}
+		if (!(msr & MSR_TM)) {
+			/* generate a facility unavailable interrupt */
+			vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+			vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56;
+			kvmppc_book3s_queue_irqprio(vcpu,
+						BOOK3S_INTERRUPT_FAC_UNAVAIL);
+			return RESUME_GUEST;
+		}
+		/* Set CR0 to indicate previous transactional state */
+		vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
+			(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
+		/* L=1 => tresume, L=0 => tsuspend */
+		if (instr & (1 << 21)) {
+			if (MSR_TM_SUSPENDED(msr))
+				msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+		} else {
+			if (MSR_TM_TRANSACTIONAL(msr))
+				msr = (msr & ~MSR_TS_MASK) | MSR_TS_S;
+		}
+		vcpu->arch.shregs.msr = msr;
+		vcpu->arch.regs.nip += 4;
+		return RESUME_GUEST;
+
+	/* ignore bit 31, see comment above */
+	case (PPC_INST_TRECLAIM & PO_XOP_OPCODE_MASK):
+		/* check for TM disabled in the HFSCR or MSR */
+		if (!(vcpu->arch.hfscr & HFSCR_TM)) {
+			vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+			vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56;
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+			return -1; /* rerun host interrupt handler */
+		}
+		if (!(msr & MSR_TM)) {
+			/* generate a facility unavailable interrupt */
+			vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+			vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56;
+			kvmppc_book3s_queue_irqprio(vcpu,
+						BOOK3S_INTERRUPT_FAC_UNAVAIL);
+			return RESUME_GUEST;
+		}
+		/* If no transaction active, generate TM bad thing */
+		if (!MSR_TM_ACTIVE(msr)) {
+			kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+			return RESUME_GUEST;
+		}
+		/* If failure was not previously recorded, recompute TEXASR */
+		if (!(vcpu->arch.orig_texasr & TEXASR_FS)) {
+			ra = (instr >> 16) & 0x1f;
+			if (ra)
+				ra = kvmppc_get_gpr(vcpu, ra) & 0xff;
+			emulate_tx_failure(vcpu, ra);
+		}
+
+		copy_from_checkpoint(vcpu);
+
+		/* Set CR0 to indicate previous transactional state */
+		vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
+			(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
+		vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
+		vcpu->arch.regs.nip += 4;
+		return RESUME_GUEST;
+
+	/* ignore bit 31, see comment above */
+	case (PPC_INST_TRECHKPT & PO_XOP_OPCODE_MASK):
+		/* XXX do we need to check for PR=0 here? */
+		/* check for TM disabled in the HFSCR or MSR */
+		if (!(vcpu->arch.hfscr & HFSCR_TM)) {
+			vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+			vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56;
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+			return -1; /* rerun host interrupt handler */
+		}
+		if (!(msr & MSR_TM)) {
+			/* generate a facility unavailable interrupt */
+			vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+			vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56;
+			kvmppc_book3s_queue_irqprio(vcpu,
+						BOOK3S_INTERRUPT_FAC_UNAVAIL);
+			return RESUME_GUEST;
+		}
+		/* If transaction active or TEXASR[FS] = 0, bad thing */
+		if (MSR_TM_ACTIVE(msr) || !(vcpu->arch.texasr & TEXASR_FS)) {
+			kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+			return RESUME_GUEST;
+		}
+
+		copy_to_checkpoint(vcpu);
+
+		/* Set CR0 to indicate previous transactional state */
+		vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
+			(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
+		vcpu->arch.shregs.msr = msr | MSR_TS_S;
+		vcpu->arch.regs.nip += 4;
+		return RESUME_GUEST;
+	}
+
+	/* What should we do here? We didn't recognize the instruction */
+	kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+	pr_warn_ratelimited("Unrecognized TM-related instruction %#x for emulation", instr);
+
+	return RESUME_GUEST;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
new file mode 100644
index 0000000000..fad931f224
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_book3s_64.h>
+#include <asm/reg.h>
+#include <asm/ppc-opcode.h>
+
+/*
+ * This handles the cases where the guest is in real suspend mode
+ * and we want to get back to the guest without dooming the transaction.
+ * The caller has checked that the guest is in real-suspend mode
+ * (MSR[TS] = S and the fake-suspend flag is not set).
+ */
+int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
+{
+	u32 instr = vcpu->arch.emul_inst;
+	u64 newmsr, msr, bescr;
+	int rs;
+
+	/*
+	 * rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit
+	 * in these instructions, so masking bit 31 out doesn't change these
+	 * instructions. For the tsr. instruction if bit 31 = 0 then it is per
+	 * ISA an invalid form, however P9 UM, in section 4.6.10 Book II Invalid
+	 * Forms, informs specifically that ignoring bit 31 is an acceptable way
+	 * to handle TM-related invalid forms that have bit 31 = 0. Moreover,
+	 * for emulation purposes both forms (w/ and wo/ bit 31 set) can
+	 * generate a softpatch interrupt. Hence both forms are handled below
+	 * for tsr. to make them behave the same way.
+	 */
+	switch (instr & PO_XOP_OPCODE_MASK) {
+	case PPC_INST_RFID:
+		/* XXX do we need to check for PR=0 here? */
+		newmsr = vcpu->arch.shregs.srr1;
+		/* should only get here for Sx -> T1 transition */
+		if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM)))
+			return 0;
+		newmsr = sanitize_msr(newmsr);
+		vcpu->arch.shregs.msr = newmsr;
+		vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
+		vcpu->arch.regs.nip = vcpu->arch.shregs.srr0;
+		return 1;
+
+	case PPC_INST_RFEBB:
+		/* check for PR=1 and arch 2.06 bit set in PCR */
+		msr = vcpu->arch.shregs.msr;
+		if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206))
+			return 0;
+		/* check EBB facility is available */
+		if (!(vcpu->arch.hfscr & HFSCR_EBB) ||
+		    ((msr & MSR_PR) && !(mfspr(SPRN_FSCR) & FSCR_EBB)))
+			return 0;
+		bescr = mfspr(SPRN_BESCR);
+		/* expect to see a S->T transition requested */
+		if (((bescr >> 30) & 3) != 2)
+			return 0;
+		bescr &= ~BESCR_GE;
+		if (instr & (1 << 11))
+			bescr |= BESCR_GE;
+		mtspr(SPRN_BESCR, bescr);
+		msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+		vcpu->arch.shregs.msr = msr;
+		vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
+		vcpu->arch.regs.nip = mfspr(SPRN_EBBRR);
+		return 1;
+
+	case PPC_INST_MTMSRD:
+		/* XXX do we need to check for PR=0 here? */
+		rs = (instr >> 21) & 0x1f;
+		newmsr = kvmppc_get_gpr(vcpu, rs);
+		msr = vcpu->arch.shregs.msr;
+		/* check this is a Sx -> T1 transition */
+		if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM)))
+			return 0;
+		/* mtmsrd doesn't change LE */
+		newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE);
+		newmsr = sanitize_msr(newmsr);
+		vcpu->arch.shregs.msr = newmsr;
+		return 1;
+
+	/* ignore bit 31, see comment above */
+	case (PPC_INST_TSR & PO_XOP_OPCODE_MASK):
+		/* we know the MSR has the TS field = S (0b01) here */
+		msr = vcpu->arch.shregs.msr;
+		/* check for PR=1 and arch 2.06 bit set in PCR */
+		if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206))
+			return 0;
+		/* check for TM disabled in the HFSCR or MSR */
+		if (!(vcpu->arch.hfscr & HFSCR_TM) || !(msr & MSR_TM))
+			return 0;
+		/* L=1 => tresume => set TS to T (0b10) */
+		if (instr & (1 << 21))
+			vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+		/* Set CR0 to 0b0010 */
+		vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
+			0x20000000;
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * This is called when we are returning to a guest in TM transactional
+ * state.  We roll the guest state back to the checkpointed state.
+ */
+void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.shregs.msr &= ~MSR_TS_MASK;	/* go to N state */
+	vcpu->arch.regs.nip = vcpu->arch.tfhar;
+	copy_from_checkpoint(vcpu);
+	vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | 0xa0000000;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
new file mode 100644
index 0000000000..e2d6f9327f
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -0,0 +1,1223 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Secure pages management: Migration of pages between normal and secure
+ * memory of KVM guests.
+ *
+ * Copyright 2018 Bharata B Rao, IBM Corp. <bharata@linux.ibm.com>
+ */
+
+/*
+ * A pseries guest can be run as secure guest on Ultravisor-enabled
+ * POWER platforms. On such platforms, this driver will be used to manage
+ * the movement of guest pages between the normal memory managed by
+ * hypervisor (HV) and secure memory managed by Ultravisor (UV).
+ *
+ * The page-in or page-out requests from UV will come to HV as hcalls and
+ * HV will call back into UV via ultracalls to satisfy these page requests.
+ *
+ * Private ZONE_DEVICE memory equal to the amount of secure memory
+ * available in the platform for running secure guests is hotplugged.
+ * Whenever a page belonging to the guest becomes secure, a page from this
+ * private device memory is used to represent and track that secure page
+ * on the HV side. Some pages (like virtio buffers, VPA pages etc) are
+ * shared between UV and HV. However such pages aren't represented by
+ * device private memory and mappings to shared memory exist in both
+ * UV and HV page tables.
+ */
+
+/*
+ * Notes on locking
+ *
+ * kvm->arch.uvmem_lock is a per-guest lock that prevents concurrent
+ * page-in and page-out requests for the same GPA. Concurrent accesses
+ * can either come via UV (guest vCPUs requesting for same page)
+ * or when HV and guest simultaneously access the same page.
+ * This mutex serializes the migration of page from HV(normal) to
+ * UV(secure) and vice versa. So the serialization points are around
+ * migrate_vma routines and page-in/out routines.
+ *
+ * Per-guest mutex comes with a cost though. Mainly it serializes the
+ * fault path as page-out can occur when HV faults on accessing secure
+ * guest pages. Currently UV issues page-in requests for all the guest
+ * PFNs one at a time during early boot (UV_ESM uvcall), so this is
+ * not a cause for concern. Also currently the number of page-outs caused
+ * by HV touching secure pages is very very low. If an when UV supports
+ * overcommitting, then we might see concurrent guest driven page-outs.
+ *
+ * Locking order
+ *
+ * 1. kvm->srcu - Protects KVM memslots
+ * 2. kvm->mm->mmap_lock - find_vma, migrate_vma_pages and helpers, ksm_madvise
+ * 3. kvm->arch.uvmem_lock - protects read/writes to uvmem slots thus acting
+ *			     as sync-points for page-in/out
+ */
+
+/*
+ * Notes on page size
+ *
+ * Currently UV uses 2MB mappings internally, but will issue H_SVM_PAGE_IN
+ * and H_SVM_PAGE_OUT hcalls in PAGE_SIZE(64K) granularity. HV tracks
+ * secure GPAs at 64K page size and maintains one device PFN for each
+ * 64K secure GPA. UV_PAGE_IN and UV_PAGE_OUT calls by HV are also issued
+ * for 64K page at a time.
+ *
+ * HV faulting on secure pages: When HV touches any secure page, it
+ * faults and issues a UV_PAGE_OUT request with 64K page size. Currently
+ * UV splits and remaps the 2MB page if necessary and copies out the
+ * required 64K page contents.
+ *
+ * Shared pages: Whenever guest shares a secure page, UV will split and
+ * remap the 2MB page if required and issue H_SVM_PAGE_IN with 64K page size.
+ *
+ * HV invalidating a page: When a regular page belonging to secure
+ * guest gets unmapped, HV informs UV with UV_PAGE_INVAL of 64K
+ * page size. Using 64K page size is correct here because any non-secure
+ * page will essentially be of 64K page size. Splitting by UV during sharing
+ * and page-out ensures this.
+ *
+ * Page fault handling: When HV handles page fault of a page belonging
+ * to secure guest, it sends that to UV with a 64K UV_PAGE_IN request.
+ * Using 64K size is correct here too as UV would have split the 2MB page
+ * into 64k mappings and would have done page-outs earlier.
+ *
+ * In summary, the current secure pages handling code in HV assumes
+ * 64K page size and in fact fails any page-in/page-out requests of
+ * non-64K size upfront. If and when UV starts supporting multiple
+ * page-sizes, we need to break this assumption.
+ */
+
+#include <linux/pagemap.h>
+#include <linux/migrate.h>
+#include <linux/kvm_host.h>
+#include <linux/ksm.h>
+#include <linux/of.h>
+#include <linux/memremap.h>
+#include <asm/ultravisor.h>
+#include <asm/mman.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s_uvmem.h>
+
+static struct dev_pagemap kvmppc_uvmem_pgmap;
+static unsigned long *kvmppc_uvmem_bitmap;
+static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock);
+
+/*
+ * States of a GFN
+ * ---------------
+ * The GFN can be in one of the following states.
+ *
+ * (a) Secure - The GFN is secure. The GFN is associated with
+ *	a Secure VM, the contents of the GFN is not accessible
+ *	to the Hypervisor.  This GFN can be backed by a secure-PFN,
+ *	or can be backed by a normal-PFN with contents encrypted.
+ *	The former is true when the GFN is paged-in into the
+ *	ultravisor. The latter is true when the GFN is paged-out
+ *	of the ultravisor.
+ *
+ * (b) Shared - The GFN is shared. The GFN is associated with a
+ *	a secure VM. The contents of the GFN is accessible to
+ *	Hypervisor. This GFN is backed by a normal-PFN and its
+ *	content is un-encrypted.
+ *
+ * (c) Normal - The GFN is a normal. The GFN is associated with
+ *	a normal VM. The contents of the GFN is accessible to
+ *	the Hypervisor. Its content is never encrypted.
+ *
+ * States of a VM.
+ * ---------------
+ *
+ * Normal VM:  A VM whose contents are always accessible to
+ *	the hypervisor.  All its GFNs are normal-GFNs.
+ *
+ * Secure VM: A VM whose contents are not accessible to the
+ *	hypervisor without the VM's consent.  Its GFNs are
+ *	either Shared-GFN or Secure-GFNs.
+ *
+ * Transient VM: A Normal VM that is transitioning to secure VM.
+ *	The transition starts on successful return of
+ *	H_SVM_INIT_START, and ends on successful return
+ *	of H_SVM_INIT_DONE. This transient VM, can have GFNs
+ *	in any of the three states; i.e Secure-GFN, Shared-GFN,
+ *	and Normal-GFN.	The VM never executes in this state
+ *	in supervisor-mode.
+ *
+ * Memory slot State.
+ * -----------------------------
+ *	The state of a memory slot mirrors the state of the
+ *	VM the memory slot is associated with.
+ *
+ * VM State transition.
+ * --------------------
+ *
+ *  A VM always starts in Normal Mode.
+ *
+ *  H_SVM_INIT_START moves the VM into transient state. During this
+ *  time the Ultravisor may request some of its GFNs to be shared or
+ *  secured. So its GFNs can be in one of the three GFN states.
+ *
+ *  H_SVM_INIT_DONE moves the VM entirely from transient state to
+ *  secure-state. At this point any left-over normal-GFNs are
+ *  transitioned to Secure-GFN.
+ *
+ *  H_SVM_INIT_ABORT moves the transient VM back to normal VM.
+ *  All its GFNs are moved to Normal-GFNs.
+ *
+ *  UV_TERMINATE transitions the secure-VM back to normal-VM. All
+ *  the secure-GFN and shared-GFNs are tranistioned to normal-GFN
+ *  Note: The contents of the normal-GFN is undefined at this point.
+ *
+ * GFN state implementation:
+ * -------------------------
+ *
+ * Secure GFN is associated with a secure-PFN; also called uvmem_pfn,
+ * when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag
+ * set, and contains the value of the secure-PFN.
+ * It is associated with a normal-PFN; also called mem_pfn, when
+ * the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set.
+ * The value of the normal-PFN is not tracked.
+ *
+ * Shared GFN is associated with a normal-PFN. Its pfn[] has
+ * KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN
+ * is not tracked.
+ *
+ * Normal GFN is associated with normal-PFN. Its pfn[] has
+ * no flag set. The value of the normal-PFN is not tracked.
+ *
+ * Life cycle of a GFN
+ * --------------------
+ *
+ * --------------------------------------------------------------
+ * |        |     Share  |  Unshare | SVM       |H_SVM_INIT_DONE|
+ * |        |operation   |operation | abort/    |               |
+ * |        |            |          | terminate |               |
+ * -------------------------------------------------------------
+ * |        |            |          |           |               |
+ * | Secure |     Shared | Secure   |Normal     |Secure         |
+ * |        |            |          |           |               |
+ * | Shared |     Shared | Secure   |Normal     |Shared         |
+ * |        |            |          |           |               |
+ * | Normal |     Shared | Secure   |Normal     |Secure         |
+ * --------------------------------------------------------------
+ *
+ * Life cycle of a VM
+ * --------------------
+ *
+ * --------------------------------------------------------------------
+ * |         |  start    |  H_SVM_  |H_SVM_   |H_SVM_     |UV_SVM_    |
+ * |         |  VM       |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE  |
+ * |         |           |          |         |           |           |
+ * --------- ----------------------------------------------------------
+ * |         |           |          |         |           |           |
+ * | Normal  | Normal    | Transient|Error    |Error      |Normal     |
+ * |         |           |          |         |           |           |
+ * | Secure  |   Error   | Error    |Error    |Error      |Normal     |
+ * |         |           |          |         |           |           |
+ * |Transient|   N/A     | Error    |Secure   |Normal     |Normal     |
+ * --------------------------------------------------------------------
+ */
+
+#define KVMPPC_GFN_UVMEM_PFN	(1UL << 63)
+#define KVMPPC_GFN_MEM_PFN	(1UL << 62)
+#define KVMPPC_GFN_SHARED	(1UL << 61)
+#define KVMPPC_GFN_SECURE	(KVMPPC_GFN_UVMEM_PFN | KVMPPC_GFN_MEM_PFN)
+#define KVMPPC_GFN_FLAG_MASK	(KVMPPC_GFN_SECURE | KVMPPC_GFN_SHARED)
+#define KVMPPC_GFN_PFN_MASK	(~KVMPPC_GFN_FLAG_MASK)
+
+struct kvmppc_uvmem_slot {
+	struct list_head list;
+	unsigned long nr_pfns;
+	unsigned long base_pfn;
+	unsigned long *pfns;
+};
+struct kvmppc_uvmem_page_pvt {
+	struct kvm *kvm;
+	unsigned long gpa;
+	bool skip_page_out;
+	bool remove_gfn;
+};
+
+bool kvmppc_uvmem_available(void)
+{
+	/*
+	 * If kvmppc_uvmem_bitmap != NULL, then there is an ultravisor
+	 * and our data structures have been initialized successfully.
+	 */
+	return !!kvmppc_uvmem_bitmap;
+}
+
+int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
+{
+	struct kvmppc_uvmem_slot *p;
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+	p->pfns = vcalloc(slot->npages, sizeof(*p->pfns));
+	if (!p->pfns) {
+		kfree(p);
+		return -ENOMEM;
+	}
+	p->nr_pfns = slot->npages;
+	p->base_pfn = slot->base_gfn;
+
+	mutex_lock(&kvm->arch.uvmem_lock);
+	list_add(&p->list, &kvm->arch.uvmem_pfns);
+	mutex_unlock(&kvm->arch.uvmem_lock);
+
+	return 0;
+}
+
+/*
+ * All device PFNs are already released by the time we come here.
+ */
+void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot)
+{
+	struct kvmppc_uvmem_slot *p, *next;
+
+	mutex_lock(&kvm->arch.uvmem_lock);
+	list_for_each_entry_safe(p, next, &kvm->arch.uvmem_pfns, list) {
+		if (p->base_pfn == slot->base_gfn) {
+			vfree(p->pfns);
+			list_del(&p->list);
+			kfree(p);
+			break;
+		}
+	}
+	mutex_unlock(&kvm->arch.uvmem_lock);
+}
+
+static void kvmppc_mark_gfn(unsigned long gfn, struct kvm *kvm,
+			unsigned long flag, unsigned long uvmem_pfn)
+{
+	struct kvmppc_uvmem_slot *p;
+
+	list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
+		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
+			unsigned long index = gfn - p->base_pfn;
+
+			if (flag == KVMPPC_GFN_UVMEM_PFN)
+				p->pfns[index] = uvmem_pfn | flag;
+			else
+				p->pfns[index] = flag;
+			return;
+		}
+	}
+}
+
+/* mark the GFN as secure-GFN associated with @uvmem pfn device-PFN. */
+static void kvmppc_gfn_secure_uvmem_pfn(unsigned long gfn,
+			unsigned long uvmem_pfn, struct kvm *kvm)
+{
+	kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_UVMEM_PFN, uvmem_pfn);
+}
+
+/* mark the GFN as secure-GFN associated with a memory-PFN. */
+static void kvmppc_gfn_secure_mem_pfn(unsigned long gfn, struct kvm *kvm)
+{
+	kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_MEM_PFN, 0);
+}
+
+/* mark the GFN as a shared GFN. */
+static void kvmppc_gfn_shared(unsigned long gfn, struct kvm *kvm)
+{
+	kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_SHARED, 0);
+}
+
+/* mark the GFN as a non-existent GFN. */
+static void kvmppc_gfn_remove(unsigned long gfn, struct kvm *kvm)
+{
+	kvmppc_mark_gfn(gfn, kvm, 0, 0);
+}
+
+/* return true, if the GFN is a secure-GFN backed by a secure-PFN */
+static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
+				    unsigned long *uvmem_pfn)
+{
+	struct kvmppc_uvmem_slot *p;
+
+	list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
+		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
+			unsigned long index = gfn - p->base_pfn;
+
+			if (p->pfns[index] & KVMPPC_GFN_UVMEM_PFN) {
+				if (uvmem_pfn)
+					*uvmem_pfn = p->pfns[index] &
+						     KVMPPC_GFN_PFN_MASK;
+				return true;
+			} else
+				return false;
+		}
+	}
+	return false;
+}
+
+/*
+ * starting from *gfn search for the next available GFN that is not yet
+ * transitioned to a secure GFN.  return the value of that GFN in *gfn.  If a
+ * GFN is found, return true, else return false
+ *
+ * Must be called with kvm->arch.uvmem_lock  held.
+ */
+static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot,
+		struct kvm *kvm, unsigned long *gfn)
+{
+	struct kvmppc_uvmem_slot *p = NULL, *iter;
+	bool ret = false;
+	unsigned long i;
+
+	list_for_each_entry(iter, &kvm->arch.uvmem_pfns, list)
+		if (*gfn >= iter->base_pfn && *gfn < iter->base_pfn + iter->nr_pfns) {
+			p = iter;
+			break;
+		}
+	if (!p)
+		return ret;
+	/*
+	 * The code below assumes, one to one correspondence between
+	 * kvmppc_uvmem_slot and memslot.
+	 */
+	for (i = *gfn; i < p->base_pfn + p->nr_pfns; i++) {
+		unsigned long index = i - p->base_pfn;
+
+		if (!(p->pfns[index] & KVMPPC_GFN_FLAG_MASK)) {
+			*gfn = i;
+			ret = true;
+			break;
+		}
+	}
+	return ret;
+}
+
+static int kvmppc_memslot_page_merge(struct kvm *kvm,
+		const struct kvm_memory_slot *memslot, bool merge)
+{
+	unsigned long gfn = memslot->base_gfn;
+	unsigned long end, start = gfn_to_hva(kvm, gfn);
+	unsigned long vm_flags;
+	int ret = 0;
+	struct vm_area_struct *vma;
+	int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE;
+
+	if (kvm_is_error_hva(start))
+		return H_STATE;
+
+	end = start + (memslot->npages << PAGE_SHIFT);
+
+	mmap_write_lock(kvm->mm);
+	do {
+		vma = find_vma_intersection(kvm->mm, start, end);
+		if (!vma) {
+			ret = H_STATE;
+			break;
+		}
+		vma_start_write(vma);
+		/* Copy vm_flags to avoid partial modifications in ksm_madvise */
+		vm_flags = vma->vm_flags;
+		ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+			  merge_flag, &vm_flags);
+		if (ret) {
+			ret = H_STATE;
+			break;
+		}
+		vm_flags_reset(vma, vm_flags);
+		start = vma->vm_end;
+	} while (end > vma->vm_end);
+
+	mmap_write_unlock(kvm->mm);
+	return ret;
+}
+
+static void __kvmppc_uvmem_memslot_delete(struct kvm *kvm,
+		const struct kvm_memory_slot *memslot)
+{
+	uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
+	kvmppc_uvmem_slot_free(kvm, memslot);
+	kvmppc_memslot_page_merge(kvm, memslot, true);
+}
+
+static int __kvmppc_uvmem_memslot_create(struct kvm *kvm,
+		const struct kvm_memory_slot *memslot)
+{
+	int ret = H_PARAMETER;
+
+	if (kvmppc_memslot_page_merge(kvm, memslot, false))
+		return ret;
+
+	if (kvmppc_uvmem_slot_init(kvm, memslot))
+		goto out1;
+
+	ret = uv_register_mem_slot(kvm->arch.lpid,
+				   memslot->base_gfn << PAGE_SHIFT,
+				   memslot->npages * PAGE_SIZE,
+				   0, memslot->id);
+	if (ret < 0) {
+		ret = H_PARAMETER;
+		goto out;
+	}
+	return 0;
+out:
+	kvmppc_uvmem_slot_free(kvm, memslot);
+out1:
+	kvmppc_memslot_page_merge(kvm, memslot, true);
+	return ret;
+}
+
+unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot, *m;
+	int ret = H_SUCCESS;
+	int srcu_idx, bkt;
+
+	kvm->arch.secure_guest = KVMPPC_SECURE_INIT_START;
+
+	if (!kvmppc_uvmem_bitmap)
+		return H_UNSUPPORTED;
+
+	/* Only radix guests can be secure guests */
+	if (!kvm_is_radix(kvm))
+		return H_UNSUPPORTED;
+
+	/* NAK the transition to secure if not enabled */
+	if (!kvm->arch.svm_enabled)
+		return H_AUTHORITY;
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+
+	/* register the memslot */
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, bkt, slots) {
+		ret = __kvmppc_uvmem_memslot_create(kvm, memslot);
+		if (ret)
+			break;
+	}
+
+	if (ret) {
+		slots = kvm_memslots(kvm);
+		kvm_for_each_memslot(m, bkt, slots) {
+			if (m == memslot)
+				break;
+			__kvmppc_uvmem_memslot_delete(kvm, memslot);
+		}
+	}
+
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	return ret;
+}
+
+/*
+ * Provision a new page on HV side and copy over the contents
+ * from secure memory using UV_PAGE_OUT uvcall.
+ * Caller must held kvm->arch.uvmem_lock.
+ */
+static int __kvmppc_svm_page_out(struct vm_area_struct *vma,
+		unsigned long start,
+		unsigned long end, unsigned long page_shift,
+		struct kvm *kvm, unsigned long gpa, struct page *fault_page)
+{
+	unsigned long src_pfn, dst_pfn = 0;
+	struct migrate_vma mig = { 0 };
+	struct page *dpage, *spage;
+	struct kvmppc_uvmem_page_pvt *pvt;
+	unsigned long pfn;
+	int ret = U_SUCCESS;
+
+	memset(&mig, 0, sizeof(mig));
+	mig.vma = vma;
+	mig.start = start;
+	mig.end = end;
+	mig.src = &src_pfn;
+	mig.dst = &dst_pfn;
+	mig.pgmap_owner = &kvmppc_uvmem_pgmap;
+	mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+	mig.fault_page = fault_page;
+
+	/* The requested page is already paged-out, nothing to do */
+	if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL))
+		return ret;
+
+	ret = migrate_vma_setup(&mig);
+	if (ret)
+		return -1;
+
+	spage = migrate_pfn_to_page(*mig.src);
+	if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE))
+		goto out_finalize;
+
+	if (!is_zone_device_page(spage))
+		goto out_finalize;
+
+	dpage = alloc_page_vma(GFP_HIGHUSER, vma, start);
+	if (!dpage) {
+		ret = -1;
+		goto out_finalize;
+	}
+
+	lock_page(dpage);
+	pvt = spage->zone_device_data;
+	pfn = page_to_pfn(dpage);
+
+	/*
+	 * This function is used in two cases:
+	 * - When HV touches a secure page, for which we do UV_PAGE_OUT
+	 * - When a secure page is converted to shared page, we *get*
+	 *   the page to essentially unmap the device page. In this
+	 *   case we skip page-out.
+	 */
+	if (!pvt->skip_page_out)
+		ret = uv_page_out(kvm->arch.lpid, pfn << page_shift,
+				  gpa, 0, page_shift);
+
+	if (ret == U_SUCCESS)
+		*mig.dst = migrate_pfn(pfn);
+	else {
+		unlock_page(dpage);
+		__free_page(dpage);
+		goto out_finalize;
+	}
+
+	migrate_vma_pages(&mig);
+
+out_finalize:
+	migrate_vma_finalize(&mig);
+	return ret;
+}
+
+static inline int kvmppc_svm_page_out(struct vm_area_struct *vma,
+				      unsigned long start, unsigned long end,
+				      unsigned long page_shift,
+				      struct kvm *kvm, unsigned long gpa,
+				      struct page *fault_page)
+{
+	int ret;
+
+	mutex_lock(&kvm->arch.uvmem_lock);
+	ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa,
+				fault_page);
+	mutex_unlock(&kvm->arch.uvmem_lock);
+
+	return ret;
+}
+
+/*
+ * Drop device pages that we maintain for the secure guest
+ *
+ * We first mark the pages to be skipped from UV_PAGE_OUT when there
+ * is HV side fault on these pages. Next we *get* these pages, forcing
+ * fault on them, do fault time migration to replace the device PTEs in
+ * QEMU page table with normal PTEs from newly allocated pages.
+ */
+void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot,
+			     struct kvm *kvm, bool skip_page_out)
+{
+	int i;
+	struct kvmppc_uvmem_page_pvt *pvt;
+	struct page *uvmem_page;
+	struct vm_area_struct *vma = NULL;
+	unsigned long uvmem_pfn, gfn;
+	unsigned long addr;
+
+	mmap_read_lock(kvm->mm);
+
+	addr = slot->userspace_addr;
+
+	gfn = slot->base_gfn;
+	for (i = slot->npages; i; --i, ++gfn, addr += PAGE_SIZE) {
+
+		/* Fetch the VMA if addr is not in the latest fetched one */
+		if (!vma || addr >= vma->vm_end) {
+			vma = vma_lookup(kvm->mm, addr);
+			if (!vma) {
+				pr_err("Can't find VMA for gfn:0x%lx\n", gfn);
+				break;
+			}
+		}
+
+		mutex_lock(&kvm->arch.uvmem_lock);
+
+		if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+			uvmem_page = pfn_to_page(uvmem_pfn);
+			pvt = uvmem_page->zone_device_data;
+			pvt->skip_page_out = skip_page_out;
+			pvt->remove_gfn = true;
+
+			if (__kvmppc_svm_page_out(vma, addr, addr + PAGE_SIZE,
+						  PAGE_SHIFT, kvm, pvt->gpa, NULL))
+				pr_err("Can't page out gpa:0x%lx addr:0x%lx\n",
+				       pvt->gpa, addr);
+		} else {
+			/* Remove the shared flag if any */
+			kvmppc_gfn_remove(gfn, kvm);
+		}
+
+		mutex_unlock(&kvm->arch.uvmem_lock);
+	}
+
+	mmap_read_unlock(kvm->mm);
+}
+
+unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
+{
+	int srcu_idx, bkt;
+	struct kvm_memory_slot *memslot;
+
+	/*
+	 * Expect to be called only after INIT_START and before INIT_DONE.
+	 * If INIT_DONE was completed, use normal VM termination sequence.
+	 */
+	if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+		return H_UNSUPPORTED;
+
+	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+		return H_STATE;
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+
+	kvm_for_each_memslot(memslot, bkt, kvm_memslots(kvm))
+		kvmppc_uvmem_drop_pages(memslot, kvm, false);
+
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+	kvm->arch.secure_guest = 0;
+	uv_svm_terminate(kvm->arch.lpid);
+
+	return H_PARAMETER;
+}
+
+/*
+ * Get a free device PFN from the pool
+ *
+ * Called when a normal page is moved to secure memory (UV_PAGE_IN). Device
+ * PFN will be used to keep track of the secure page on HV side.
+ *
+ * Called with kvm->arch.uvmem_lock held
+ */
+static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
+{
+	struct page *dpage = NULL;
+	unsigned long bit, uvmem_pfn;
+	struct kvmppc_uvmem_page_pvt *pvt;
+	unsigned long pfn_last, pfn_first;
+
+	pfn_first = kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT;
+	pfn_last = pfn_first +
+		   (range_len(&kvmppc_uvmem_pgmap.range) >> PAGE_SHIFT);
+
+	spin_lock(&kvmppc_uvmem_bitmap_lock);
+	bit = find_first_zero_bit(kvmppc_uvmem_bitmap,
+				  pfn_last - pfn_first);
+	if (bit >= (pfn_last - pfn_first))
+		goto out;
+	bitmap_set(kvmppc_uvmem_bitmap, bit, 1);
+	spin_unlock(&kvmppc_uvmem_bitmap_lock);
+
+	pvt = kzalloc(sizeof(*pvt), GFP_KERNEL);
+	if (!pvt)
+		goto out_clear;
+
+	uvmem_pfn = bit + pfn_first;
+	kvmppc_gfn_secure_uvmem_pfn(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
+
+	pvt->gpa = gpa;
+	pvt->kvm = kvm;
+
+	dpage = pfn_to_page(uvmem_pfn);
+	dpage->zone_device_data = pvt;
+	zone_device_page_init(dpage);
+	return dpage;
+out_clear:
+	spin_lock(&kvmppc_uvmem_bitmap_lock);
+	bitmap_clear(kvmppc_uvmem_bitmap, bit, 1);
+out:
+	spin_unlock(&kvmppc_uvmem_bitmap_lock);
+	return NULL;
+}
+
+/*
+ * Alloc a PFN from private device memory pool. If @pagein is true,
+ * copy page from normal memory to secure memory using UV_PAGE_IN uvcall.
+ */
+static int kvmppc_svm_page_in(struct vm_area_struct *vma,
+		unsigned long start,
+		unsigned long end, unsigned long gpa, struct kvm *kvm,
+		unsigned long page_shift,
+		bool pagein)
+{
+	unsigned long src_pfn, dst_pfn = 0;
+	struct migrate_vma mig = { 0 };
+	struct page *spage;
+	unsigned long pfn;
+	struct page *dpage;
+	int ret = 0;
+
+	memset(&mig, 0, sizeof(mig));
+	mig.vma = vma;
+	mig.start = start;
+	mig.end = end;
+	mig.src = &src_pfn;
+	mig.dst = &dst_pfn;
+	mig.flags = MIGRATE_VMA_SELECT_SYSTEM;
+
+	ret = migrate_vma_setup(&mig);
+	if (ret)
+		return ret;
+
+	if (!(*mig.src & MIGRATE_PFN_MIGRATE)) {
+		ret = -1;
+		goto out_finalize;
+	}
+
+	dpage = kvmppc_uvmem_get_page(gpa, kvm);
+	if (!dpage) {
+		ret = -1;
+		goto out_finalize;
+	}
+
+	if (pagein) {
+		pfn = *mig.src >> MIGRATE_PFN_SHIFT;
+		spage = migrate_pfn_to_page(*mig.src);
+		if (spage) {
+			ret = uv_page_in(kvm->arch.lpid, pfn << page_shift,
+					gpa, 0, page_shift);
+			if (ret)
+				goto out_finalize;
+		}
+	}
+
+	*mig.dst = migrate_pfn(page_to_pfn(dpage));
+	migrate_vma_pages(&mig);
+out_finalize:
+	migrate_vma_finalize(&mig);
+	return ret;
+}
+
+static int kvmppc_uv_migrate_mem_slot(struct kvm *kvm,
+		const struct kvm_memory_slot *memslot)
+{
+	unsigned long gfn = memslot->base_gfn;
+	struct vm_area_struct *vma;
+	unsigned long start, end;
+	int ret = 0;
+
+	mmap_read_lock(kvm->mm);
+	mutex_lock(&kvm->arch.uvmem_lock);
+	while (kvmppc_next_nontransitioned_gfn(memslot, kvm, &gfn)) {
+		ret = H_STATE;
+		start = gfn_to_hva(kvm, gfn);
+		if (kvm_is_error_hva(start))
+			break;
+
+		end = start + (1UL << PAGE_SHIFT);
+		vma = find_vma_intersection(kvm->mm, start, end);
+		if (!vma || vma->vm_start > start || vma->vm_end < end)
+			break;
+
+		ret = kvmppc_svm_page_in(vma, start, end,
+				(gfn << PAGE_SHIFT), kvm, PAGE_SHIFT, false);
+		if (ret) {
+			ret = H_STATE;
+			break;
+		}
+
+		/* relinquish the cpu if needed */
+		cond_resched();
+	}
+	mutex_unlock(&kvm->arch.uvmem_lock);
+	mmap_read_unlock(kvm->mm);
+	return ret;
+}
+
+unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int srcu_idx, bkt;
+	long ret = H_SUCCESS;
+
+	if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+		return H_UNSUPPORTED;
+
+	/* migrate any unmoved normal pfn to device pfns*/
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, bkt, slots) {
+		ret = kvmppc_uv_migrate_mem_slot(kvm, memslot);
+		if (ret) {
+			/*
+			 * The pages will remain transitioned.
+			 * Its the callers responsibility to
+			 * terminate the VM, which will undo
+			 * all state of the VM. Till then
+			 * this VM is in a erroneous state.
+			 * Its KVMPPC_SECURE_INIT_DONE will
+			 * remain unset.
+			 */
+			ret = H_STATE;
+			goto out;
+		}
+	}
+
+	kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE;
+	pr_info("LPID %d went secure\n", kvm->arch.lpid);
+
+out:
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	return ret;
+}
+
+/*
+ * Shares the page with HV, thus making it a normal page.
+ *
+ * - If the page is already secure, then provision a new page and share
+ * - If the page is a normal page, share the existing page
+ *
+ * In the former case, uses dev_pagemap_ops.migrate_to_ram handler
+ * to unmap the device page from QEMU's page tables.
+ */
+static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa,
+		unsigned long page_shift)
+{
+
+	int ret = H_PARAMETER;
+	struct page *uvmem_page;
+	struct kvmppc_uvmem_page_pvt *pvt;
+	unsigned long pfn;
+	unsigned long gfn = gpa >> page_shift;
+	int srcu_idx;
+	unsigned long uvmem_pfn;
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	mutex_lock(&kvm->arch.uvmem_lock);
+	if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+		uvmem_page = pfn_to_page(uvmem_pfn);
+		pvt = uvmem_page->zone_device_data;
+		pvt->skip_page_out = true;
+		/*
+		 * do not drop the GFN. It is a valid GFN
+		 * that is transitioned to a shared GFN.
+		 */
+		pvt->remove_gfn = false;
+	}
+
+retry:
+	mutex_unlock(&kvm->arch.uvmem_lock);
+	pfn = gfn_to_pfn(kvm, gfn);
+	if (is_error_noslot_pfn(pfn))
+		goto out;
+
+	mutex_lock(&kvm->arch.uvmem_lock);
+	if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+		uvmem_page = pfn_to_page(uvmem_pfn);
+		pvt = uvmem_page->zone_device_data;
+		pvt->skip_page_out = true;
+		pvt->remove_gfn = false; /* it continues to be a valid GFN */
+		kvm_release_pfn_clean(pfn);
+		goto retry;
+	}
+
+	if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
+				page_shift)) {
+		kvmppc_gfn_shared(gfn, kvm);
+		ret = H_SUCCESS;
+	}
+	kvm_release_pfn_clean(pfn);
+	mutex_unlock(&kvm->arch.uvmem_lock);
+out:
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	return ret;
+}
+
+/*
+ * H_SVM_PAGE_IN: Move page from normal memory to secure memory.
+ *
+ * H_PAGE_IN_SHARED flag makes the page shared which means that the same
+ * memory in is visible from both UV and HV.
+ */
+unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
+		unsigned long flags,
+		unsigned long page_shift)
+{
+	unsigned long start, end;
+	struct vm_area_struct *vma;
+	int srcu_idx;
+	unsigned long gfn = gpa >> page_shift;
+	int ret;
+
+	if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+		return H_UNSUPPORTED;
+
+	if (page_shift != PAGE_SHIFT)
+		return H_P3;
+
+	if (flags & ~H_PAGE_IN_SHARED)
+		return H_P2;
+
+	if (flags & H_PAGE_IN_SHARED)
+		return kvmppc_share_page(kvm, gpa, page_shift);
+
+	ret = H_PARAMETER;
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	mmap_read_lock(kvm->mm);
+
+	start = gfn_to_hva(kvm, gfn);
+	if (kvm_is_error_hva(start))
+		goto out;
+
+	mutex_lock(&kvm->arch.uvmem_lock);
+	/* Fail the page-in request of an already paged-in page */
+	if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL))
+		goto out_unlock;
+
+	end = start + (1UL << page_shift);
+	vma = find_vma_intersection(kvm->mm, start, end);
+	if (!vma || vma->vm_start > start || vma->vm_end < end)
+		goto out_unlock;
+
+	if (kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
+				true))
+		goto out_unlock;
+
+	ret = H_SUCCESS;
+
+out_unlock:
+	mutex_unlock(&kvm->arch.uvmem_lock);
+out:
+	mmap_read_unlock(kvm->mm);
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	return ret;
+}
+
+
+/*
+ * Fault handler callback that gets called when HV touches any page that
+ * has been moved to secure memory, we ask UV to give back the page by
+ * issuing UV_PAGE_OUT uvcall.
+ *
+ * This eventually results in dropping of device PFN and the newly
+ * provisioned page/PFN gets populated in QEMU page tables.
+ */
+static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf)
+{
+	struct kvmppc_uvmem_page_pvt *pvt = vmf->page->zone_device_data;
+
+	if (kvmppc_svm_page_out(vmf->vma, vmf->address,
+				vmf->address + PAGE_SIZE, PAGE_SHIFT,
+				pvt->kvm, pvt->gpa, vmf->page))
+		return VM_FAULT_SIGBUS;
+	else
+		return 0;
+}
+
+/*
+ * Release the device PFN back to the pool
+ *
+ * Gets called when secure GFN tranistions from a secure-PFN
+ * to a normal PFN during H_SVM_PAGE_OUT.
+ * Gets called with kvm->arch.uvmem_lock held.
+ */
+static void kvmppc_uvmem_page_free(struct page *page)
+{
+	unsigned long pfn = page_to_pfn(page) -
+			(kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT);
+	struct kvmppc_uvmem_page_pvt *pvt;
+
+	spin_lock(&kvmppc_uvmem_bitmap_lock);
+	bitmap_clear(kvmppc_uvmem_bitmap, pfn, 1);
+	spin_unlock(&kvmppc_uvmem_bitmap_lock);
+
+	pvt = page->zone_device_data;
+	page->zone_device_data = NULL;
+	if (pvt->remove_gfn)
+		kvmppc_gfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
+	else
+		kvmppc_gfn_secure_mem_pfn(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
+	kfree(pvt);
+}
+
+static const struct dev_pagemap_ops kvmppc_uvmem_ops = {
+	.page_free = kvmppc_uvmem_page_free,
+	.migrate_to_ram	= kvmppc_uvmem_migrate_to_ram,
+};
+
+/*
+ * H_SVM_PAGE_OUT: Move page from secure memory to normal memory.
+ */
+unsigned long
+kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa,
+		      unsigned long flags, unsigned long page_shift)
+{
+	unsigned long gfn = gpa >> page_shift;
+	unsigned long start, end;
+	struct vm_area_struct *vma;
+	int srcu_idx;
+	int ret;
+
+	if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+		return H_UNSUPPORTED;
+
+	if (page_shift != PAGE_SHIFT)
+		return H_P3;
+
+	if (flags)
+		return H_P2;
+
+	ret = H_PARAMETER;
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	mmap_read_lock(kvm->mm);
+	start = gfn_to_hva(kvm, gfn);
+	if (kvm_is_error_hva(start))
+		goto out;
+
+	end = start + (1UL << page_shift);
+	vma = find_vma_intersection(kvm->mm, start, end);
+	if (!vma || vma->vm_start > start || vma->vm_end < end)
+		goto out;
+
+	if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa, NULL))
+		ret = H_SUCCESS;
+out:
+	mmap_read_unlock(kvm->mm);
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	return ret;
+}
+
+int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
+{
+	unsigned long pfn;
+	int ret = U_SUCCESS;
+
+	pfn = gfn_to_pfn(kvm, gfn);
+	if (is_error_noslot_pfn(pfn))
+		return -EFAULT;
+
+	mutex_lock(&kvm->arch.uvmem_lock);
+	if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL))
+		goto out;
+
+	ret = uv_page_in(kvm->arch.lpid, pfn << PAGE_SHIFT, gfn << PAGE_SHIFT,
+			 0, PAGE_SHIFT);
+out:
+	kvm_release_pfn_clean(pfn);
+	mutex_unlock(&kvm->arch.uvmem_lock);
+	return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT;
+}
+
+int kvmppc_uvmem_memslot_create(struct kvm *kvm, const struct kvm_memory_slot *new)
+{
+	int ret = __kvmppc_uvmem_memslot_create(kvm, new);
+
+	if (!ret)
+		ret = kvmppc_uv_migrate_mem_slot(kvm, new);
+
+	return ret;
+}
+
+void kvmppc_uvmem_memslot_delete(struct kvm *kvm, const struct kvm_memory_slot *old)
+{
+	__kvmppc_uvmem_memslot_delete(kvm, old);
+}
+
+static u64 kvmppc_get_secmem_size(void)
+{
+	struct device_node *np;
+	int i, len;
+	const __be32 *prop;
+	u64 size = 0;
+
+	/*
+	 * First try the new ibm,secure-memory nodes which supersede the
+	 * secure-memory-ranges property.
+	 * If we found some, no need to read the deprecated ones.
+	 */
+	for_each_compatible_node(np, NULL, "ibm,secure-memory") {
+		prop = of_get_property(np, "reg", &len);
+		if (!prop)
+			continue;
+		size += of_read_number(prop + 2, 2);
+	}
+	if (size)
+		return size;
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware");
+	if (!np)
+		goto out;
+
+	prop = of_get_property(np, "secure-memory-ranges", &len);
+	if (!prop)
+		goto out_put;
+
+	for (i = 0; i < len / (sizeof(*prop) * 4); i++)
+		size += of_read_number(prop + (i * 4) + 2, 2);
+
+out_put:
+	of_node_put(np);
+out:
+	return size;
+}
+
+int kvmppc_uvmem_init(void)
+{
+	int ret = 0;
+	unsigned long size;
+	struct resource *res;
+	void *addr;
+	unsigned long pfn_last, pfn_first;
+
+	size = kvmppc_get_secmem_size();
+	if (!size) {
+		/*
+		 * Don't fail the initialization of kvm-hv module if
+		 * the platform doesn't export ibm,uv-firmware node.
+		 * Let normal guests run on such PEF-disabled platform.
+		 */
+		pr_info("KVMPPC-UVMEM: No support for secure guests\n");
+		goto out;
+	}
+
+	res = request_free_mem_region(&iomem_resource, size, "kvmppc_uvmem");
+	if (IS_ERR(res)) {
+		ret = PTR_ERR(res);
+		goto out;
+	}
+
+	kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE;
+	kvmppc_uvmem_pgmap.range.start = res->start;
+	kvmppc_uvmem_pgmap.range.end = res->end;
+	kvmppc_uvmem_pgmap.nr_range = 1;
+	kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops;
+	/* just one global instance: */
+	kvmppc_uvmem_pgmap.owner = &kvmppc_uvmem_pgmap;
+	addr = memremap_pages(&kvmppc_uvmem_pgmap, NUMA_NO_NODE);
+	if (IS_ERR(addr)) {
+		ret = PTR_ERR(addr);
+		goto out_free_region;
+	}
+
+	pfn_first = res->start >> PAGE_SHIFT;
+	pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT);
+	kvmppc_uvmem_bitmap = bitmap_zalloc(pfn_last - pfn_first, GFP_KERNEL);
+	if (!kvmppc_uvmem_bitmap) {
+		ret = -ENOMEM;
+		goto out_unmap;
+	}
+
+	pr_info("KVMPPC-UVMEM: Secure Memory size 0x%lx\n", size);
+	return ret;
+out_unmap:
+	memunmap_pages(&kvmppc_uvmem_pgmap);
+out_free_region:
+	release_mem_region(res->start, size);
+out:
+	return ret;
+}
+
+void kvmppc_uvmem_free(void)
+{
+	if (!kvmppc_uvmem_bitmap)
+		return;
+
+	memunmap_pages(&kvmppc_uvmem_pgmap);
+	release_mem_region(kvmppc_uvmem_pgmap.range.start,
+			   range_len(&kvmppc_uvmem_pgmap.range));
+	bitmap_free(kvmppc_uvmem_bitmap);
+}
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
new file mode 100644
index 0000000000..f4bec2fc51
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+#include <asm/exception-64s.h>
+#include <asm/asm-compat.h>
+
+#if defined(CONFIG_PPC_BOOK3S_64)
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+#define FUNC(name) 		name
+#else
+#define FUNC(name) 		GLUE(.,name)
+#endif
+#define GET_SHADOW_VCPU(reg)    addi	reg, r13, PACA_SVCPU
+
+#elif defined(CONFIG_PPC_BOOK3S_32)
+#define FUNC(name)		name
+#define GET_SHADOW_VCPU(reg)	lwz     reg, (THREAD + THREAD_KVM_SVCPU)(r2)
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#define VCPU_LOAD_NVGPRS(vcpu) \
+	PPC_LL	r14, VCPU_GPR(R14)(vcpu); \
+	PPC_LL	r15, VCPU_GPR(R15)(vcpu); \
+	PPC_LL	r16, VCPU_GPR(R16)(vcpu); \
+	PPC_LL	r17, VCPU_GPR(R17)(vcpu); \
+	PPC_LL	r18, VCPU_GPR(R18)(vcpu); \
+	PPC_LL	r19, VCPU_GPR(R19)(vcpu); \
+	PPC_LL	r20, VCPU_GPR(R20)(vcpu); \
+	PPC_LL	r21, VCPU_GPR(R21)(vcpu); \
+	PPC_LL	r22, VCPU_GPR(R22)(vcpu); \
+	PPC_LL	r23, VCPU_GPR(R23)(vcpu); \
+	PPC_LL	r24, VCPU_GPR(R24)(vcpu); \
+	PPC_LL	r25, VCPU_GPR(R25)(vcpu); \
+	PPC_LL	r26, VCPU_GPR(R26)(vcpu); \
+	PPC_LL	r27, VCPU_GPR(R27)(vcpu); \
+	PPC_LL	r28, VCPU_GPR(R28)(vcpu); \
+	PPC_LL	r29, VCPU_GPR(R29)(vcpu); \
+	PPC_LL	r30, VCPU_GPR(R30)(vcpu); \
+	PPC_LL	r31, VCPU_GPR(R31)(vcpu); \
+
+/*****************************************************************************
+ *                                                                           *
+ *     Guest entry / exit code that is in kernel module memory (highmem)     *
+ *                                                                           *
+ ****************************************************************************/
+
+/* Registers:
+ *  r3: vcpu pointer
+ */
+_GLOBAL(__kvmppc_vcpu_run)
+
+kvm_start_entry:
+	/* Write correct stack frame */
+	mflr	r0
+	PPC_STL	r0,PPC_LR_STKOFF(r1)
+
+	/* Save host state to the stack */
+	PPC_STLU r1, -SWITCH_FRAME_SIZE(r1)
+
+	/* Save r3 (vcpu) */
+	SAVE_GPR(3, r1)
+
+	/* Save non-volatile registers (r14 - r31) */
+	SAVE_NVGPRS(r1)
+
+	/* Save CR */
+	mfcr	r14
+	stw	r14, _CCR(r1)
+
+	/* Save LR */
+	PPC_STL	r0, _LINK(r1)
+
+	/* Load non-volatile guest state from the vcpu */
+	VCPU_LOAD_NVGPRS(r3)
+
+kvm_start_lightweight:
+	/* Copy registers into shadow vcpu so we can access them in real mode */
+	bl	FUNC(kvmppc_copy_to_svcpu)
+	nop
+	REST_GPR(3, r1)
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* Get the dcbz32 flag */
+	PPC_LL	r0, VCPU_HFLAGS(r3)
+	rldicl	r0, r0, 0, 63		/* r3 &= 1 */
+	stb	r0, HSTATE_RESTORE_HID5(r13)
+
+	/* Load up guest SPRG3 value, since it's user readable */
+	lbz	r4, VCPU_SHAREDBE(r3)
+	cmpwi	r4, 0
+	ld	r5, VCPU_SHARED(r3)
+	beq	sprg3_little_endian
+sprg3_big_endian:
+#ifdef __BIG_ENDIAN__
+	ld	r4, VCPU_SHARED_SPRG3(r5)
+#else
+	addi	r5, r5, VCPU_SHARED_SPRG3
+	ldbrx	r4, 0, r5
+#endif
+	b	after_sprg3_load
+sprg3_little_endian:
+#ifdef __LITTLE_ENDIAN__
+	ld	r4, VCPU_SHARED_SPRG3(r5)
+#else
+	addi	r5, r5, VCPU_SHARED_SPRG3
+	ldbrx	r4, 0, r5
+#endif
+
+after_sprg3_load:
+	mtspr	SPRN_SPRG3, r4
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+	PPC_LL	r4, VCPU_SHADOW_MSR(r3)	/* get shadow_msr */
+
+	/* Jump to segment patching handler and into our guest */
+	bl	FUNC(kvmppc_entry_trampoline)
+	nop
+
+/*
+ * This is the handler in module memory. It gets jumped at from the
+ * lowmem trampoline code, so it's basically the guest exit code.
+ *
+ */
+
+	/*
+	 * Register usage at this point:
+	 *
+	 * R1       = host R1
+	 * R2       = host R2
+	 * R12      = exit handler id
+	 * R13      = PACA
+	 * SVCPU.*  = guest *
+	 * MSR.EE   = 1
+	 *
+	 */
+
+	PPC_LL	r3, GPR3(r1)		/* vcpu pointer */
+
+	/*
+	 * kvmppc_copy_from_svcpu can clobber volatile registers, save
+	 * the exit handler id to the vcpu and restore it from there later.
+	 */
+	stw	r12, VCPU_TRAP(r3)
+
+	/* Transfer reg values from shadow vcpu back to vcpu struct */
+
+	bl	FUNC(kvmppc_copy_from_svcpu)
+	nop
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * Reload kernel SPRG3 value.
+	 * No need to save guest value as usermode can't modify SPRG3.
+	 */
+	ld	r3, PACA_SPRG_VDSO(r13)
+	mtspr	SPRN_SPRG_VDSO_WRITE, r3
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+	/* R7 = vcpu */
+	PPC_LL	r7, GPR3(r1)
+
+	PPC_STL	r14, VCPU_GPR(R14)(r7)
+	PPC_STL	r15, VCPU_GPR(R15)(r7)
+	PPC_STL	r16, VCPU_GPR(R16)(r7)
+	PPC_STL	r17, VCPU_GPR(R17)(r7)
+	PPC_STL	r18, VCPU_GPR(R18)(r7)
+	PPC_STL	r19, VCPU_GPR(R19)(r7)
+	PPC_STL	r20, VCPU_GPR(R20)(r7)
+	PPC_STL	r21, VCPU_GPR(R21)(r7)
+	PPC_STL	r22, VCPU_GPR(R22)(r7)
+	PPC_STL	r23, VCPU_GPR(R23)(r7)
+	PPC_STL	r24, VCPU_GPR(R24)(r7)
+	PPC_STL	r25, VCPU_GPR(R25)(r7)
+	PPC_STL	r26, VCPU_GPR(R26)(r7)
+	PPC_STL	r27, VCPU_GPR(R27)(r7)
+	PPC_STL	r28, VCPU_GPR(R28)(r7)
+	PPC_STL	r29, VCPU_GPR(R29)(r7)
+	PPC_STL	r30, VCPU_GPR(R30)(r7)
+	PPC_STL	r31, VCPU_GPR(R31)(r7)
+
+	/* Pass the exit number as 2nd argument to kvmppc_handle_exit */
+	lwz	r4, VCPU_TRAP(r7)
+
+	/* Restore r3 (vcpu) */
+	REST_GPR(3, r1)
+	bl	FUNC(kvmppc_handle_exit_pr)
+
+	/* If RESUME_GUEST, get back in the loop */
+	cmpwi	r3, RESUME_GUEST
+	beq	kvm_loop_lightweight
+
+	cmpwi	r3, RESUME_GUEST_NV
+	beq	kvm_loop_heavyweight
+
+kvm_exit_loop:
+
+	PPC_LL	r4, _LINK(r1)
+	mtlr	r4
+
+	lwz	r14, _CCR(r1)
+	mtcr	r14
+
+	/* Restore non-volatile host registers (r14 - r31) */
+	REST_NVGPRS(r1)
+
+	addi    r1, r1, SWITCH_FRAME_SIZE
+	blr
+
+kvm_loop_heavyweight:
+
+	PPC_LL	r4, _LINK(r1)
+	PPC_STL r4, (PPC_LR_STKOFF + SWITCH_FRAME_SIZE)(r1)
+
+	/* Load vcpu */
+	REST_GPR(3, r1)
+
+	/* Load non-volatile guest state from the vcpu */
+	VCPU_LOAD_NVGPRS(r3)
+
+	/* Jump back into the beginning of this function */
+	b	kvm_start_lightweight
+
+kvm_loop_lightweight:
+
+	/* We'll need the vcpu pointer */
+	REST_GPR(3, r1)
+
+	/* Jump back into the beginning of this function */
+	b	kvm_start_lightweight
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
new file mode 100644
index 0000000000..ce79ac33e8
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -0,0 +1,386 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ *     Alexander Graf <agraf@suse.de>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/hash.h>
+#include <linux/slab.h>
+#include <linux/rculist.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/machdep.h>
+#include <asm/mmu_context.h>
+#include <asm/hw_irq.h>
+
+#include "trace_pr.h"
+
+#define PTE_SIZE	12
+
+static struct kmem_cache *hpte_cache;
+
+static inline u64 kvmppc_mmu_hash_pte(u64 eaddr)
+{
+	return hash_64(eaddr >> PTE_SIZE, HPTEG_HASH_BITS_PTE);
+}
+
+static inline u64 kvmppc_mmu_hash_pte_long(u64 eaddr)
+{
+	return hash_64((eaddr & 0x0ffff000) >> PTE_SIZE,
+		       HPTEG_HASH_BITS_PTE_LONG);
+}
+
+static inline u64 kvmppc_mmu_hash_vpte(u64 vpage)
+{
+	return hash_64(vpage & 0xfffffffffULL, HPTEG_HASH_BITS_VPTE);
+}
+
+static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage)
+{
+	return hash_64((vpage & 0xffffff000ULL) >> 12,
+		       HPTEG_HASH_BITS_VPTE_LONG);
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline u64 kvmppc_mmu_hash_vpte_64k(u64 vpage)
+{
+	return hash_64((vpage & 0xffffffff0ULL) >> 4,
+		       HPTEG_HASH_BITS_VPTE_64K);
+}
+#endif
+
+void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
+{
+	u64 index;
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+
+	trace_kvm_book3s_mmu_map(pte);
+
+	spin_lock(&vcpu3s->mmu_lock);
+
+	/* Add to ePTE list */
+	index = kvmppc_mmu_hash_pte(pte->pte.eaddr);
+	hlist_add_head_rcu(&pte->list_pte, &vcpu3s->hpte_hash_pte[index]);
+
+	/* Add to ePTE_long list */
+	index = kvmppc_mmu_hash_pte_long(pte->pte.eaddr);
+	hlist_add_head_rcu(&pte->list_pte_long,
+			   &vcpu3s->hpte_hash_pte_long[index]);
+
+	/* Add to vPTE list */
+	index = kvmppc_mmu_hash_vpte(pte->pte.vpage);
+	hlist_add_head_rcu(&pte->list_vpte, &vcpu3s->hpte_hash_vpte[index]);
+
+	/* Add to vPTE_long list */
+	index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage);
+	hlist_add_head_rcu(&pte->list_vpte_long,
+			   &vcpu3s->hpte_hash_vpte_long[index]);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* Add to vPTE_64k list */
+	index = kvmppc_mmu_hash_vpte_64k(pte->pte.vpage);
+	hlist_add_head_rcu(&pte->list_vpte_64k,
+			   &vcpu3s->hpte_hash_vpte_64k[index]);
+#endif
+
+	vcpu3s->hpte_cache_count++;
+
+	spin_unlock(&vcpu3s->mmu_lock);
+}
+
+static void free_pte_rcu(struct rcu_head *head)
+{
+	struct hpte_cache *pte = container_of(head, struct hpte_cache, rcu_head);
+	kmem_cache_free(hpte_cache, pte);
+}
+
+static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
+{
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+
+	trace_kvm_book3s_mmu_invalidate(pte);
+
+	/* Different for 32 and 64 bit */
+	kvmppc_mmu_invalidate_pte(vcpu, pte);
+
+	spin_lock(&vcpu3s->mmu_lock);
+
+	/* pte already invalidated in between? */
+	if (hlist_unhashed(&pte->list_pte)) {
+		spin_unlock(&vcpu3s->mmu_lock);
+		return;
+	}
+
+	hlist_del_init_rcu(&pte->list_pte);
+	hlist_del_init_rcu(&pte->list_pte_long);
+	hlist_del_init_rcu(&pte->list_vpte);
+	hlist_del_init_rcu(&pte->list_vpte_long);
+#ifdef CONFIG_PPC_BOOK3S_64
+	hlist_del_init_rcu(&pte->list_vpte_64k);
+#endif
+	vcpu3s->hpte_cache_count--;
+
+	spin_unlock(&vcpu3s->mmu_lock);
+
+	call_rcu(&pte->rcu_head, free_pte_rcu);
+}
+
+static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+	struct hpte_cache *pte;
+	int i;
+
+	rcu_read_lock();
+
+	for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
+		struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i];
+
+		hlist_for_each_entry_rcu(pte, list, list_vpte_long)
+			invalidate_pte(vcpu, pte);
+	}
+
+	rcu_read_unlock();
+}
+
+static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea)
+{
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+	struct hlist_head *list;
+	struct hpte_cache *pte;
+
+	/* Find the list of entries in the map */
+	list = &vcpu3s->hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)];
+
+	rcu_read_lock();
+
+	/* Check the list for matching entries and invalidate */
+	hlist_for_each_entry_rcu(pte, list, list_pte)
+		if ((pte->pte.eaddr & ~0xfffUL) == guest_ea)
+			invalidate_pte(vcpu, pte);
+
+	rcu_read_unlock();
+}
+
+static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea)
+{
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+	struct hlist_head *list;
+	struct hpte_cache *pte;
+
+	/* Find the list of entries in the map */
+	list = &vcpu3s->hpte_hash_pte_long[
+			kvmppc_mmu_hash_pte_long(guest_ea)];
+
+	rcu_read_lock();
+
+	/* Check the list for matching entries and invalidate */
+	hlist_for_each_entry_rcu(pte, list, list_pte_long)
+		if ((pte->pte.eaddr & 0x0ffff000UL) == guest_ea)
+			invalidate_pte(vcpu, pte);
+
+	rcu_read_unlock();
+}
+
+void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
+{
+	trace_kvm_book3s_mmu_flush("", vcpu, guest_ea, ea_mask);
+	guest_ea &= ea_mask;
+
+	switch (ea_mask) {
+	case ~0xfffUL:
+		kvmppc_mmu_pte_flush_page(vcpu, guest_ea);
+		break;
+	case 0x0ffff000:
+		kvmppc_mmu_pte_flush_long(vcpu, guest_ea);
+		break;
+	case 0:
+		/* Doing a complete flush -> start from scratch */
+		kvmppc_mmu_pte_flush_all(vcpu);
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+}
+
+/* Flush with mask 0xfffffffff */
+static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)
+{
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+	struct hlist_head *list;
+	struct hpte_cache *pte;
+	u64 vp_mask = 0xfffffffffULL;
+
+	list = &vcpu3s->hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)];
+
+	rcu_read_lock();
+
+	/* Check the list for matching entries and invalidate */
+	hlist_for_each_entry_rcu(pte, list, list_vpte)
+		if ((pte->pte.vpage & vp_mask) == guest_vp)
+			invalidate_pte(vcpu, pte);
+
+	rcu_read_unlock();
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/* Flush with mask 0xffffffff0 */
+static void kvmppc_mmu_pte_vflush_64k(struct kvm_vcpu *vcpu, u64 guest_vp)
+{
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+	struct hlist_head *list;
+	struct hpte_cache *pte;
+	u64 vp_mask = 0xffffffff0ULL;
+
+	list = &vcpu3s->hpte_hash_vpte_64k[
+		kvmppc_mmu_hash_vpte_64k(guest_vp)];
+
+	rcu_read_lock();
+
+	/* Check the list for matching entries and invalidate */
+	hlist_for_each_entry_rcu(pte, list, list_vpte_64k)
+		if ((pte->pte.vpage & vp_mask) == guest_vp)
+			invalidate_pte(vcpu, pte);
+
+	rcu_read_unlock();
+}
+#endif
+
+/* Flush with mask 0xffffff000 */
+static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)
+{
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+	struct hlist_head *list;
+	struct hpte_cache *pte;
+	u64 vp_mask = 0xffffff000ULL;
+
+	list = &vcpu3s->hpte_hash_vpte_long[
+		kvmppc_mmu_hash_vpte_long(guest_vp)];
+
+	rcu_read_lock();
+
+	/* Check the list for matching entries and invalidate */
+	hlist_for_each_entry_rcu(pte, list, list_vpte_long)
+		if ((pte->pte.vpage & vp_mask) == guest_vp)
+			invalidate_pte(vcpu, pte);
+
+	rcu_read_unlock();
+}
+
+void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
+{
+	trace_kvm_book3s_mmu_flush("v", vcpu, guest_vp, vp_mask);
+	guest_vp &= vp_mask;
+
+	switch(vp_mask) {
+	case 0xfffffffffULL:
+		kvmppc_mmu_pte_vflush_short(vcpu, guest_vp);
+		break;
+#ifdef CONFIG_PPC_BOOK3S_64
+	case 0xffffffff0ULL:
+		kvmppc_mmu_pte_vflush_64k(vcpu, guest_vp);
+		break;
+#endif
+	case 0xffffff000ULL:
+		kvmppc_mmu_pte_vflush_long(vcpu, guest_vp);
+		break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+}
+
+void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
+{
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+	struct hpte_cache *pte;
+	int i;
+
+	trace_kvm_book3s_mmu_flush("p", vcpu, pa_start, pa_end);
+
+	rcu_read_lock();
+
+	for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
+		struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i];
+
+		hlist_for_each_entry_rcu(pte, list, list_vpte_long)
+			if ((pte->pte.raddr >= pa_start) &&
+			    (pte->pte.raddr < pa_end))
+				invalidate_pte(vcpu, pte);
+	}
+
+	rcu_read_unlock();
+}
+
+struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+	struct hpte_cache *pte;
+
+	if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM)
+		kvmppc_mmu_pte_flush_all(vcpu);
+
+	pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
+
+	return pte;
+}
+
+void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte)
+{
+	kmem_cache_free(hpte_cache, pte);
+}
+
+void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu)
+{
+	kvmppc_mmu_pte_flush(vcpu, 0, 0);
+}
+
+static void kvmppc_mmu_hpte_init_hash(struct hlist_head *hash_list, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		INIT_HLIST_HEAD(&hash_list[i]);
+}
+
+int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+
+	/* init hpte lookup hashes */
+	kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_pte,
+				  ARRAY_SIZE(vcpu3s->hpte_hash_pte));
+	kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_pte_long,
+				  ARRAY_SIZE(vcpu3s->hpte_hash_pte_long));
+	kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte,
+				  ARRAY_SIZE(vcpu3s->hpte_hash_vpte));
+	kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long,
+				  ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long));
+#ifdef CONFIG_PPC_BOOK3S_64
+	kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_64k,
+				  ARRAY_SIZE(vcpu3s->hpte_hash_vpte_64k));
+#endif
+
+	spin_lock_init(&vcpu3s->mmu_lock);
+
+	return 0;
+}
+
+int kvmppc_mmu_hpte_sysinit(void)
+{
+	/* init hpte slab cache */
+	hpte_cache = kmem_cache_create("kvm-spt", sizeof(struct hpte_cache),
+				       sizeof(struct hpte_cache), 0, NULL);
+
+	return 0;
+}
+
+void kvmppc_mmu_hpte_sysexit(void)
+{
+	kmem_cache_destroy(hpte_cache);
+}
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
new file mode 100644
index 0000000000..bc39c76c9d
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -0,0 +1,1263 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright Novell Inc 2010
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/kvm.h>
+#include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_fpu.h>
+#include <asm/reg.h>
+#include <asm/cacheflush.h>
+#include <asm/switch_to.h>
+#include <linux/vmalloc.h>
+
+/* #define DEBUG */
+
+#ifdef DEBUG
+#define dprintk printk
+#else
+#define dprintk(...) do { } while(0);
+#endif
+
+#define OP_LFS			48
+#define OP_LFSU			49
+#define OP_LFD			50
+#define OP_LFDU			51
+#define OP_STFS			52
+#define OP_STFSU		53
+#define OP_STFD			54
+#define OP_STFDU		55
+#define OP_PSQ_L		56
+#define OP_PSQ_LU		57
+#define OP_PSQ_ST		60
+#define OP_PSQ_STU		61
+
+#define OP_31_LFSX		535
+#define OP_31_LFSUX		567
+#define OP_31_LFDX		599
+#define OP_31_LFDUX		631
+#define OP_31_STFSX		663
+#define OP_31_STFSUX		695
+#define OP_31_STFX		727
+#define OP_31_STFUX		759
+#define OP_31_LWIZX		887
+#define OP_31_STFIWX		983
+
+#define OP_59_FADDS		21
+#define OP_59_FSUBS		20
+#define OP_59_FSQRTS		22
+#define OP_59_FDIVS		18
+#define OP_59_FRES		24
+#define OP_59_FMULS		25
+#define OP_59_FRSQRTES		26
+#define OP_59_FMSUBS		28
+#define OP_59_FMADDS		29
+#define OP_59_FNMSUBS		30
+#define OP_59_FNMADDS		31
+
+#define OP_63_FCMPU		0
+#define OP_63_FCPSGN		8
+#define OP_63_FRSP		12
+#define OP_63_FCTIW		14
+#define OP_63_FCTIWZ		15
+#define OP_63_FDIV		18
+#define OP_63_FADD		21
+#define OP_63_FSQRT		22
+#define OP_63_FSEL		23
+#define OP_63_FRE		24
+#define OP_63_FMUL		25
+#define OP_63_FRSQRTE		26
+#define OP_63_FMSUB		28
+#define OP_63_FMADD		29
+#define OP_63_FNMSUB		30
+#define OP_63_FNMADD		31
+#define OP_63_FCMPO		32
+#define OP_63_MTFSB1		38 // XXX
+#define OP_63_FSUB		20
+#define OP_63_FNEG		40
+#define OP_63_MCRFS		64
+#define OP_63_MTFSB0		70
+#define OP_63_FMR		72
+#define OP_63_MTFSFI		134
+#define OP_63_FABS		264
+#define OP_63_MFFS		583
+#define OP_63_MTFSF		711
+
+#define OP_4X_PS_CMPU0		0
+#define OP_4X_PSQ_LX		6
+#define OP_4XW_PSQ_STX		7
+#define OP_4A_PS_SUM0		10
+#define OP_4A_PS_SUM1		11
+#define OP_4A_PS_MULS0		12
+#define OP_4A_PS_MULS1		13
+#define OP_4A_PS_MADDS0		14
+#define OP_4A_PS_MADDS1		15
+#define OP_4A_PS_DIV		18
+#define OP_4A_PS_SUB		20
+#define OP_4A_PS_ADD		21
+#define OP_4A_PS_SEL		23
+#define OP_4A_PS_RES		24
+#define OP_4A_PS_MUL		25
+#define OP_4A_PS_RSQRTE		26
+#define OP_4A_PS_MSUB		28
+#define OP_4A_PS_MADD		29
+#define OP_4A_PS_NMSUB		30
+#define OP_4A_PS_NMADD		31
+#define OP_4X_PS_CMPO0		32
+#define OP_4X_PSQ_LUX		38
+#define OP_4XW_PSQ_STUX		39
+#define OP_4X_PS_NEG		40
+#define OP_4X_PS_CMPU1		64
+#define OP_4X_PS_MR		72
+#define OP_4X_PS_CMPO1		96
+#define OP_4X_PS_NABS		136
+#define OP_4X_PS_ABS		264
+#define OP_4X_PS_MERGE00	528
+#define OP_4X_PS_MERGE01	560
+#define OP_4X_PS_MERGE10	592
+#define OP_4X_PS_MERGE11	624
+
+#define SCALAR_NONE		0
+#define SCALAR_HIGH		(1 << 0)
+#define SCALAR_LOW		(1 << 1)
+#define SCALAR_NO_PS0		(1 << 2)
+#define SCALAR_NO_PS1		(1 << 3)
+
+#define GQR_ST_TYPE_MASK	0x00000007
+#define GQR_ST_TYPE_SHIFT	0
+#define GQR_ST_SCALE_MASK	0x00003f00
+#define GQR_ST_SCALE_SHIFT	8
+#define GQR_LD_TYPE_MASK	0x00070000
+#define GQR_LD_TYPE_SHIFT	16
+#define GQR_LD_SCALE_MASK	0x3f000000
+#define GQR_LD_SCALE_SHIFT	24
+
+#define GQR_QUANTIZE_FLOAT	0
+#define GQR_QUANTIZE_U8		4
+#define GQR_QUANTIZE_U16	5
+#define GQR_QUANTIZE_S8		6
+#define GQR_QUANTIZE_S16	7
+
+#define FPU_LS_SINGLE		0
+#define FPU_LS_DOUBLE		1
+#define FPU_LS_SINGLE_LOW	2
+
+static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt)
+{
+	kvm_cvt_df(&VCPU_FPR(vcpu, rt), &vcpu->arch.qpr[rt]);
+}
+
+static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
+{
+	u32 dsisr;
+	u64 msr = kvmppc_get_msr(vcpu);
+
+	msr = kvmppc_set_field(msr, 33, 36, 0);
+	msr = kvmppc_set_field(msr, 42, 47, 0);
+	kvmppc_set_msr(vcpu, msr);
+	kvmppc_set_dar(vcpu, eaddr);
+	/* Page Fault */
+	dsisr = kvmppc_set_field(0, 33, 33, 1);
+	if (is_store)
+		dsisr = kvmppc_set_field(dsisr, 38, 38, 1);
+	kvmppc_set_dsisr(vcpu, dsisr);
+	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
+}
+
+static int kvmppc_emulate_fpr_load(struct kvm_vcpu *vcpu,
+				   int rs, ulong addr, int ls_type)
+{
+	int emulated = EMULATE_FAIL;
+	int r;
+	char tmp[8];
+	int len = sizeof(u32);
+
+	if (ls_type == FPU_LS_DOUBLE)
+		len = sizeof(u64);
+
+	/* read from memory */
+	r = kvmppc_ld(vcpu, &addr, len, tmp, true);
+	vcpu->arch.paddr_accessed = addr;
+
+	if (r < 0) {
+		kvmppc_inject_pf(vcpu, addr, false);
+		goto done_load;
+	} else if (r == EMULATE_DO_MMIO) {
+		emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FPR | rs,
+					      len, 1);
+		goto done_load;
+	}
+
+	emulated = EMULATE_DONE;
+
+	/* put in registers */
+	switch (ls_type) {
+	case FPU_LS_SINGLE:
+		kvm_cvt_fd((u32*)tmp, &VCPU_FPR(vcpu, rs));
+		vcpu->arch.qpr[rs] = *((u32*)tmp);
+		break;
+	case FPU_LS_DOUBLE:
+		VCPU_FPR(vcpu, rs) = *((u64*)tmp);
+		break;
+	}
+
+	dprintk(KERN_INFO "KVM: FPR_LD [0x%llx] at 0x%lx (%d)\n", *(u64*)tmp,
+			  addr, len);
+
+done_load:
+	return emulated;
+}
+
+static int kvmppc_emulate_fpr_store(struct kvm_vcpu *vcpu,
+				    int rs, ulong addr, int ls_type)
+{
+	int emulated = EMULATE_FAIL;
+	int r;
+	char tmp[8];
+	u64 val;
+	int len;
+
+	switch (ls_type) {
+	case FPU_LS_SINGLE:
+		kvm_cvt_df(&VCPU_FPR(vcpu, rs), (u32*)tmp);
+		val = *((u32*)tmp);
+		len = sizeof(u32);
+		break;
+	case FPU_LS_SINGLE_LOW:
+		*((u32*)tmp) = VCPU_FPR(vcpu, rs);
+		val = VCPU_FPR(vcpu, rs) & 0xffffffff;
+		len = sizeof(u32);
+		break;
+	case FPU_LS_DOUBLE:
+		*((u64*)tmp) = VCPU_FPR(vcpu, rs);
+		val = VCPU_FPR(vcpu, rs);
+		len = sizeof(u64);
+		break;
+	default:
+		val = 0;
+		len = 0;
+	}
+
+	r = kvmppc_st(vcpu, &addr, len, tmp, true);
+	vcpu->arch.paddr_accessed = addr;
+	if (r < 0) {
+		kvmppc_inject_pf(vcpu, addr, true);
+	} else if (r == EMULATE_DO_MMIO) {
+		emulated = kvmppc_handle_store(vcpu, val, len, 1);
+	} else {
+		emulated = EMULATE_DONE;
+	}
+
+	dprintk(KERN_INFO "KVM: FPR_ST [0x%llx] at 0x%lx (%d)\n",
+			  val, addr, len);
+
+	return emulated;
+}
+
+static int kvmppc_emulate_psq_load(struct kvm_vcpu *vcpu,
+				   int rs, ulong addr, bool w, int i)
+{
+	int emulated = EMULATE_FAIL;
+	int r;
+	float one = 1.0;
+	u32 tmp[2];
+
+	/* read from memory */
+	if (w) {
+		r = kvmppc_ld(vcpu, &addr, sizeof(u32), tmp, true);
+		memcpy(&tmp[1], &one, sizeof(u32));
+	} else {
+		r = kvmppc_ld(vcpu, &addr, sizeof(u32) * 2, tmp, true);
+	}
+	vcpu->arch.paddr_accessed = addr;
+	if (r < 0) {
+		kvmppc_inject_pf(vcpu, addr, false);
+		goto done_load;
+	} else if ((r == EMULATE_DO_MMIO) && w) {
+		emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FPR | rs,
+					      4, 1);
+		vcpu->arch.qpr[rs] = tmp[1];
+		goto done_load;
+	} else if (r == EMULATE_DO_MMIO) {
+		emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FQPR | rs,
+					      8, 1);
+		goto done_load;
+	}
+
+	emulated = EMULATE_DONE;
+
+	/* put in registers */
+	kvm_cvt_fd(&tmp[0], &VCPU_FPR(vcpu, rs));
+	vcpu->arch.qpr[rs] = tmp[1];
+
+	dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0],
+			  tmp[1], addr, w ? 4 : 8);
+
+done_load:
+	return emulated;
+}
+
+static int kvmppc_emulate_psq_store(struct kvm_vcpu *vcpu,
+				    int rs, ulong addr, bool w, int i)
+{
+	int emulated = EMULATE_FAIL;
+	int r;
+	u32 tmp[2];
+	int len = w ? sizeof(u32) : sizeof(u64);
+
+	kvm_cvt_df(&VCPU_FPR(vcpu, rs), &tmp[0]);
+	tmp[1] = vcpu->arch.qpr[rs];
+
+	r = kvmppc_st(vcpu, &addr, len, tmp, true);
+	vcpu->arch.paddr_accessed = addr;
+	if (r < 0) {
+		kvmppc_inject_pf(vcpu, addr, true);
+	} else if ((r == EMULATE_DO_MMIO) && w) {
+		emulated = kvmppc_handle_store(vcpu, tmp[0], 4, 1);
+	} else if (r == EMULATE_DO_MMIO) {
+		u64 val = ((u64)tmp[0] << 32) | tmp[1];
+		emulated = kvmppc_handle_store(vcpu, val, 8, 1);
+	} else {
+		emulated = EMULATE_DONE;
+	}
+
+	dprintk(KERN_INFO "KVM: PSQ_ST [0x%x, 0x%x] at 0x%lx (%d)\n",
+			  tmp[0], tmp[1], addr, len);
+
+	return emulated;
+}
+
+/*
+ * Cuts out inst bits with ordering according to spec.
+ * That means the leftmost bit is zero. All given bits are included.
+ */
+static inline u32 inst_get_field(u32 inst, int msb, int lsb)
+{
+	return kvmppc_get_field(inst, msb + 32, lsb + 32);
+}
+
+static bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
+{
+	if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
+		return false;
+
+	switch (get_op(inst)) {
+	case OP_PSQ_L:
+	case OP_PSQ_LU:
+	case OP_PSQ_ST:
+	case OP_PSQ_STU:
+	case OP_LFS:
+	case OP_LFSU:
+	case OP_LFD:
+	case OP_LFDU:
+	case OP_STFS:
+	case OP_STFSU:
+	case OP_STFD:
+	case OP_STFDU:
+		return true;
+	case 4:
+		/* X form */
+		switch (inst_get_field(inst, 21, 30)) {
+		case OP_4X_PS_CMPU0:
+		case OP_4X_PSQ_LX:
+		case OP_4X_PS_CMPO0:
+		case OP_4X_PSQ_LUX:
+		case OP_4X_PS_NEG:
+		case OP_4X_PS_CMPU1:
+		case OP_4X_PS_MR:
+		case OP_4X_PS_CMPO1:
+		case OP_4X_PS_NABS:
+		case OP_4X_PS_ABS:
+		case OP_4X_PS_MERGE00:
+		case OP_4X_PS_MERGE01:
+		case OP_4X_PS_MERGE10:
+		case OP_4X_PS_MERGE11:
+			return true;
+		}
+		/* XW form */
+		switch (inst_get_field(inst, 25, 30)) {
+		case OP_4XW_PSQ_STX:
+		case OP_4XW_PSQ_STUX:
+			return true;
+		}
+		/* A form */
+		switch (inst_get_field(inst, 26, 30)) {
+		case OP_4A_PS_SUM1:
+		case OP_4A_PS_SUM0:
+		case OP_4A_PS_MULS0:
+		case OP_4A_PS_MULS1:
+		case OP_4A_PS_MADDS0:
+		case OP_4A_PS_MADDS1:
+		case OP_4A_PS_DIV:
+		case OP_4A_PS_SUB:
+		case OP_4A_PS_ADD:
+		case OP_4A_PS_SEL:
+		case OP_4A_PS_RES:
+		case OP_4A_PS_MUL:
+		case OP_4A_PS_RSQRTE:
+		case OP_4A_PS_MSUB:
+		case OP_4A_PS_MADD:
+		case OP_4A_PS_NMSUB:
+		case OP_4A_PS_NMADD:
+			return true;
+		}
+		break;
+	case 59:
+		switch (inst_get_field(inst, 21, 30)) {
+		case OP_59_FADDS:
+		case OP_59_FSUBS:
+		case OP_59_FDIVS:
+		case OP_59_FRES:
+		case OP_59_FRSQRTES:
+			return true;
+		}
+		switch (inst_get_field(inst, 26, 30)) {
+		case OP_59_FMULS:
+		case OP_59_FMSUBS:
+		case OP_59_FMADDS:
+		case OP_59_FNMSUBS:
+		case OP_59_FNMADDS:
+			return true;
+		}
+		break;
+	case 63:
+		switch (inst_get_field(inst, 21, 30)) {
+		case OP_63_MTFSB0:
+		case OP_63_MTFSB1:
+		case OP_63_MTFSF:
+		case OP_63_MTFSFI:
+		case OP_63_MCRFS:
+		case OP_63_MFFS:
+		case OP_63_FCMPU:
+		case OP_63_FCMPO:
+		case OP_63_FNEG:
+		case OP_63_FMR:
+		case OP_63_FABS:
+		case OP_63_FRSP:
+		case OP_63_FDIV:
+		case OP_63_FADD:
+		case OP_63_FSUB:
+		case OP_63_FCTIW:
+		case OP_63_FCTIWZ:
+		case OP_63_FRSQRTE:
+		case OP_63_FCPSGN:
+			return true;
+		}
+		switch (inst_get_field(inst, 26, 30)) {
+		case OP_63_FMUL:
+		case OP_63_FSEL:
+		case OP_63_FMSUB:
+		case OP_63_FMADD:
+		case OP_63_FNMSUB:
+		case OP_63_FNMADD:
+			return true;
+		}
+		break;
+	case 31:
+		switch (inst_get_field(inst, 21, 30)) {
+		case OP_31_LFSX:
+		case OP_31_LFSUX:
+		case OP_31_LFDX:
+		case OP_31_LFDUX:
+		case OP_31_STFSX:
+		case OP_31_STFSUX:
+		case OP_31_STFX:
+		case OP_31_STFUX:
+		case OP_31_STFIWX:
+			return true;
+		}
+		break;
+	}
+
+	return false;
+}
+
+static int get_d_signext(u32 inst)
+{
+	int d = inst & 0x8ff;
+
+	if (d & 0x800)
+		return -(d & 0x7ff);
+
+	return (d & 0x7ff);
+}
+
+static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
+				      int reg_out, int reg_in1, int reg_in2,
+				      int reg_in3, int scalar,
+				      void (*func)(u64 *fpscr,
+						 u32 *dst, u32 *src1,
+						 u32 *src2, u32 *src3))
+{
+	u32 *qpr = vcpu->arch.qpr;
+	u32 ps0_out;
+	u32 ps0_in1, ps0_in2, ps0_in3;
+	u32 ps1_in1, ps1_in2, ps1_in3;
+
+	/* RC */
+	WARN_ON(rc);
+
+	/* PS0 */
+	kvm_cvt_df(&VCPU_FPR(vcpu, reg_in1), &ps0_in1);
+	kvm_cvt_df(&VCPU_FPR(vcpu, reg_in2), &ps0_in2);
+	kvm_cvt_df(&VCPU_FPR(vcpu, reg_in3), &ps0_in3);
+
+	if (scalar & SCALAR_LOW)
+		ps0_in2 = qpr[reg_in2];
+
+	func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3);
+
+	dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n",
+			  ps0_in1, ps0_in2, ps0_in3, ps0_out);
+
+	if (!(scalar & SCALAR_NO_PS0))
+		kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out));
+
+	/* PS1 */
+	ps1_in1 = qpr[reg_in1];
+	ps1_in2 = qpr[reg_in2];
+	ps1_in3 = qpr[reg_in3];
+
+	if (scalar & SCALAR_HIGH)
+		ps1_in2 = ps0_in2;
+
+	if (!(scalar & SCALAR_NO_PS1))
+		func(&vcpu->arch.fp.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3);
+
+	dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n",
+			  ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]);
+
+	return EMULATE_DONE;
+}
+
+static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
+				    int reg_out, int reg_in1, int reg_in2,
+				    int scalar,
+				    void (*func)(u64 *fpscr,
+						 u32 *dst, u32 *src1,
+						 u32 *src2))
+{
+	u32 *qpr = vcpu->arch.qpr;
+	u32 ps0_out;
+	u32 ps0_in1, ps0_in2;
+	u32 ps1_out;
+	u32 ps1_in1, ps1_in2;
+
+	/* RC */
+	WARN_ON(rc);
+
+	/* PS0 */
+	kvm_cvt_df(&VCPU_FPR(vcpu, reg_in1), &ps0_in1);
+
+	if (scalar & SCALAR_LOW)
+		ps0_in2 = qpr[reg_in2];
+	else
+		kvm_cvt_df(&VCPU_FPR(vcpu, reg_in2), &ps0_in2);
+
+	func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in1, &ps0_in2);
+
+	if (!(scalar & SCALAR_NO_PS0)) {
+		dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n",
+				  ps0_in1, ps0_in2, ps0_out);
+
+		kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out));
+	}
+
+	/* PS1 */
+	ps1_in1 = qpr[reg_in1];
+	ps1_in2 = qpr[reg_in2];
+
+	if (scalar & SCALAR_HIGH)
+		ps1_in2 = ps0_in2;
+
+	func(&vcpu->arch.fp.fpscr, &ps1_out, &ps1_in1, &ps1_in2);
+
+	if (!(scalar & SCALAR_NO_PS1)) {
+		qpr[reg_out] = ps1_out;
+
+		dprintk(KERN_INFO "PS2 ps1 -> f(0x%x, 0x%x) = 0x%x\n",
+				  ps1_in1, ps1_in2, qpr[reg_out]);
+	}
+
+	return EMULATE_DONE;
+}
+
+static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc,
+				    int reg_out, int reg_in,
+				    void (*func)(u64 *t,
+						 u32 *dst, u32 *src1))
+{
+	u32 *qpr = vcpu->arch.qpr;
+	u32 ps0_out, ps0_in;
+	u32 ps1_in;
+
+	/* RC */
+	WARN_ON(rc);
+
+	/* PS0 */
+	kvm_cvt_df(&VCPU_FPR(vcpu, reg_in), &ps0_in);
+	func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in);
+
+	dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n",
+			  ps0_in, ps0_out);
+
+	kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out));
+
+	/* PS1 */
+	ps1_in = qpr[reg_in];
+	func(&vcpu->arch.fp.fpscr, &qpr[reg_out], &ps1_in);
+
+	dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n",
+			  ps1_in, qpr[reg_out]);
+
+	return EMULATE_DONE;
+}
+
+int kvmppc_emulate_paired_single(struct kvm_vcpu *vcpu)
+{
+	u32 inst;
+	ppc_inst_t pinst;
+	enum emulation_result emulated = EMULATE_DONE;
+	int ax_rd, ax_ra, ax_rb, ax_rc;
+	short full_d;
+	u64 *fpr_d, *fpr_a, *fpr_b, *fpr_c;
+
+	bool rcomp;
+	u32 cr;
+#ifdef DEBUG
+	int i;
+#endif
+
+	emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &pinst);
+	inst = ppc_inst_val(pinst);
+	if (emulated != EMULATE_DONE)
+		return emulated;
+
+	ax_rd = inst_get_field(inst, 6, 10);
+	ax_ra = inst_get_field(inst, 11, 15);
+	ax_rb = inst_get_field(inst, 16, 20);
+	ax_rc = inst_get_field(inst, 21, 25);
+	full_d = inst_get_field(inst, 16, 31);
+
+	fpr_d = &VCPU_FPR(vcpu, ax_rd);
+	fpr_a = &VCPU_FPR(vcpu, ax_ra);
+	fpr_b = &VCPU_FPR(vcpu, ax_rb);
+	fpr_c = &VCPU_FPR(vcpu, ax_rc);
+
+	rcomp = (inst & 1) ? true : false;
+	cr = kvmppc_get_cr(vcpu);
+
+	if (!kvmppc_inst_is_paired_single(vcpu, inst))
+		return EMULATE_FAIL;
+
+	if (!(kvmppc_get_msr(vcpu) & MSR_FP)) {
+		kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL);
+		return EMULATE_AGAIN;
+	}
+
+	kvmppc_giveup_ext(vcpu, MSR_FP);
+	preempt_disable();
+	enable_kernel_fp();
+	/* Do we need to clear FE0 / FE1 here? Don't think so. */
+
+#ifdef DEBUG
+	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fp.fpr); i++) {
+		u32 f;
+		kvm_cvt_df(&VCPU_FPR(vcpu, i), &f);
+		dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx    QPR[%d] = 0x%x\n",
+			i, f, VCPU_FPR(vcpu, i), i, vcpu->arch.qpr[i]);
+	}
+#endif
+
+	switch (get_op(inst)) {
+	case OP_PSQ_L:
+	{
+		ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0;
+		bool w = inst_get_field(inst, 16, 16) ? true : false;
+		int i = inst_get_field(inst, 17, 19);
+
+		addr += get_d_signext(inst);
+		emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i);
+		break;
+	}
+	case OP_PSQ_LU:
+	{
+		ulong addr = kvmppc_get_gpr(vcpu, ax_ra);
+		bool w = inst_get_field(inst, 16, 16) ? true : false;
+		int i = inst_get_field(inst, 17, 19);
+
+		addr += get_d_signext(inst);
+		emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i);
+
+		if (emulated == EMULATE_DONE)
+			kvmppc_set_gpr(vcpu, ax_ra, addr);
+		break;
+	}
+	case OP_PSQ_ST:
+	{
+		ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0;
+		bool w = inst_get_field(inst, 16, 16) ? true : false;
+		int i = inst_get_field(inst, 17, 19);
+
+		addr += get_d_signext(inst);
+		emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i);
+		break;
+	}
+	case OP_PSQ_STU:
+	{
+		ulong addr = kvmppc_get_gpr(vcpu, ax_ra);
+		bool w = inst_get_field(inst, 16, 16) ? true : false;
+		int i = inst_get_field(inst, 17, 19);
+
+		addr += get_d_signext(inst);
+		emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i);
+
+		if (emulated == EMULATE_DONE)
+			kvmppc_set_gpr(vcpu, ax_ra, addr);
+		break;
+	}
+	case 4:
+		/* X form */
+		switch (inst_get_field(inst, 21, 30)) {
+		case OP_4X_PS_CMPU0:
+			/* XXX */
+			emulated = EMULATE_FAIL;
+			break;
+		case OP_4X_PSQ_LX:
+		{
+			ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0;
+			bool w = inst_get_field(inst, 21, 21) ? true : false;
+			int i = inst_get_field(inst, 22, 24);
+
+			addr += kvmppc_get_gpr(vcpu, ax_rb);
+			emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i);
+			break;
+		}
+		case OP_4X_PS_CMPO0:
+			/* XXX */
+			emulated = EMULATE_FAIL;
+			break;
+		case OP_4X_PSQ_LUX:
+		{
+			ulong addr = kvmppc_get_gpr(vcpu, ax_ra);
+			bool w = inst_get_field(inst, 21, 21) ? true : false;
+			int i = inst_get_field(inst, 22, 24);
+
+			addr += kvmppc_get_gpr(vcpu, ax_rb);
+			emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i);
+
+			if (emulated == EMULATE_DONE)
+				kvmppc_set_gpr(vcpu, ax_ra, addr);
+			break;
+		}
+		case OP_4X_PS_NEG:
+			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb);
+			VCPU_FPR(vcpu, ax_rd) ^= 0x8000000000000000ULL;
+			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
+			vcpu->arch.qpr[ax_rd] ^= 0x80000000;
+			break;
+		case OP_4X_PS_CMPU1:
+			/* XXX */
+			emulated = EMULATE_FAIL;
+			break;
+		case OP_4X_PS_MR:
+			WARN_ON(rcomp);
+			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb);
+			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
+			break;
+		case OP_4X_PS_CMPO1:
+			/* XXX */
+			emulated = EMULATE_FAIL;
+			break;
+		case OP_4X_PS_NABS:
+			WARN_ON(rcomp);
+			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb);
+			VCPU_FPR(vcpu, ax_rd) |= 0x8000000000000000ULL;
+			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
+			vcpu->arch.qpr[ax_rd] |= 0x80000000;
+			break;
+		case OP_4X_PS_ABS:
+			WARN_ON(rcomp);
+			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb);
+			VCPU_FPR(vcpu, ax_rd) &= ~0x8000000000000000ULL;
+			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
+			vcpu->arch.qpr[ax_rd] &= ~0x80000000;
+			break;
+		case OP_4X_PS_MERGE00:
+			WARN_ON(rcomp);
+			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_ra);
+			/* vcpu->arch.qpr[ax_rd] = VCPU_FPR(vcpu, ax_rb); */
+			kvm_cvt_df(&VCPU_FPR(vcpu, ax_rb),
+				   &vcpu->arch.qpr[ax_rd]);
+			break;
+		case OP_4X_PS_MERGE01:
+			WARN_ON(rcomp);
+			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_ra);
+			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
+			break;
+		case OP_4X_PS_MERGE10:
+			WARN_ON(rcomp);
+			/* VCPU_FPR(vcpu, ax_rd) = vcpu->arch.qpr[ax_ra]; */
+			kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
+				   &VCPU_FPR(vcpu, ax_rd));
+			/* vcpu->arch.qpr[ax_rd] = VCPU_FPR(vcpu, ax_rb); */
+			kvm_cvt_df(&VCPU_FPR(vcpu, ax_rb),
+				   &vcpu->arch.qpr[ax_rd]);
+			break;
+		case OP_4X_PS_MERGE11:
+			WARN_ON(rcomp);
+			/* VCPU_FPR(vcpu, ax_rd) = vcpu->arch.qpr[ax_ra]; */
+			kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
+				   &VCPU_FPR(vcpu, ax_rd));
+			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
+			break;
+		}
+		/* XW form */
+		switch (inst_get_field(inst, 25, 30)) {
+		case OP_4XW_PSQ_STX:
+		{
+			ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0;
+			bool w = inst_get_field(inst, 21, 21) ? true : false;
+			int i = inst_get_field(inst, 22, 24);
+
+			addr += kvmppc_get_gpr(vcpu, ax_rb);
+			emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i);
+			break;
+		}
+		case OP_4XW_PSQ_STUX:
+		{
+			ulong addr = kvmppc_get_gpr(vcpu, ax_ra);
+			bool w = inst_get_field(inst, 21, 21) ? true : false;
+			int i = inst_get_field(inst, 22, 24);
+
+			addr += kvmppc_get_gpr(vcpu, ax_rb);
+			emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i);
+
+			if (emulated == EMULATE_DONE)
+				kvmppc_set_gpr(vcpu, ax_ra, addr);
+			break;
+		}
+		}
+		/* A form */
+		switch (inst_get_field(inst, 26, 30)) {
+		case OP_4A_PS_SUM1:
+			emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
+					ax_rb, ax_ra, SCALAR_NO_PS0 | SCALAR_HIGH, fps_fadds);
+			VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rc);
+			break;
+		case OP_4A_PS_SUM0:
+			emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
+					ax_ra, ax_rb, SCALAR_NO_PS1 | SCALAR_LOW, fps_fadds);
+			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rc];
+			break;
+		case OP_4A_PS_MULS0:
+			emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
+					ax_ra, ax_rc, SCALAR_HIGH, fps_fmuls);
+			break;
+		case OP_4A_PS_MULS1:
+			emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
+					ax_ra, ax_rc, SCALAR_LOW, fps_fmuls);
+			break;
+		case OP_4A_PS_MADDS0:
+			emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
+					ax_ra, ax_rc, ax_rb, SCALAR_HIGH, fps_fmadds);
+			break;
+		case OP_4A_PS_MADDS1:
+			emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
+					ax_ra, ax_rc, ax_rb, SCALAR_LOW, fps_fmadds);
+			break;
+		case OP_4A_PS_DIV:
+			emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
+					ax_ra, ax_rb, SCALAR_NONE, fps_fdivs);
+			break;
+		case OP_4A_PS_SUB:
+			emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
+					ax_ra, ax_rb, SCALAR_NONE, fps_fsubs);
+			break;
+		case OP_4A_PS_ADD:
+			emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
+					ax_ra, ax_rb, SCALAR_NONE, fps_fadds);
+			break;
+		case OP_4A_PS_SEL:
+			emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
+					ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fsel);
+			break;
+		case OP_4A_PS_RES:
+			emulated = kvmppc_ps_one_in(vcpu, rcomp, ax_rd,
+					ax_rb, fps_fres);
+			break;
+		case OP_4A_PS_MUL:
+			emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
+					ax_ra, ax_rc, SCALAR_NONE, fps_fmuls);
+			break;
+		case OP_4A_PS_RSQRTE:
+			emulated = kvmppc_ps_one_in(vcpu, rcomp, ax_rd,
+					ax_rb, fps_frsqrte);
+			break;
+		case OP_4A_PS_MSUB:
+			emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
+					ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fmsubs);
+			break;
+		case OP_4A_PS_MADD:
+			emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
+					ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fmadds);
+			break;
+		case OP_4A_PS_NMSUB:
+			emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
+					ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fnmsubs);
+			break;
+		case OP_4A_PS_NMADD:
+			emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
+					ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fnmadds);
+			break;
+		}
+		break;
+
+	/* Real FPU operations */
+
+	case OP_LFS:
+	{
+		ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
+
+		emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr,
+						   FPU_LS_SINGLE);
+		break;
+	}
+	case OP_LFSU:
+	{
+		ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
+
+		emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr,
+						   FPU_LS_SINGLE);
+
+		if (emulated == EMULATE_DONE)
+			kvmppc_set_gpr(vcpu, ax_ra, addr);
+		break;
+	}
+	case OP_LFD:
+	{
+		ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
+
+		emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr,
+						   FPU_LS_DOUBLE);
+		break;
+	}
+	case OP_LFDU:
+	{
+		ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
+
+		emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr,
+						   FPU_LS_DOUBLE);
+
+		if (emulated == EMULATE_DONE)
+			kvmppc_set_gpr(vcpu, ax_ra, addr);
+		break;
+	}
+	case OP_STFS:
+	{
+		ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
+
+		emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr,
+						    FPU_LS_SINGLE);
+		break;
+	}
+	case OP_STFSU:
+	{
+		ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
+
+		emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr,
+						    FPU_LS_SINGLE);
+
+		if (emulated == EMULATE_DONE)
+			kvmppc_set_gpr(vcpu, ax_ra, addr);
+		break;
+	}
+	case OP_STFD:
+	{
+		ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
+
+		emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr,
+						    FPU_LS_DOUBLE);
+		break;
+	}
+	case OP_STFDU:
+	{
+		ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
+
+		emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr,
+						    FPU_LS_DOUBLE);
+
+		if (emulated == EMULATE_DONE)
+			kvmppc_set_gpr(vcpu, ax_ra, addr);
+		break;
+	}
+	case 31:
+		switch (inst_get_field(inst, 21, 30)) {
+		case OP_31_LFSX:
+		{
+			ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0;
+
+			addr += kvmppc_get_gpr(vcpu, ax_rb);
+			emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd,
+							   addr, FPU_LS_SINGLE);
+			break;
+		}
+		case OP_31_LFSUX:
+		{
+			ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
+				     kvmppc_get_gpr(vcpu, ax_rb);
+
+			emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd,
+							   addr, FPU_LS_SINGLE);
+
+			if (emulated == EMULATE_DONE)
+				kvmppc_set_gpr(vcpu, ax_ra, addr);
+			break;
+		}
+		case OP_31_LFDX:
+		{
+			ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
+				     kvmppc_get_gpr(vcpu, ax_rb);
+
+			emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd,
+							   addr, FPU_LS_DOUBLE);
+			break;
+		}
+		case OP_31_LFDUX:
+		{
+			ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
+				     kvmppc_get_gpr(vcpu, ax_rb);
+
+			emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd,
+							   addr, FPU_LS_DOUBLE);
+
+			if (emulated == EMULATE_DONE)
+				kvmppc_set_gpr(vcpu, ax_ra, addr);
+			break;
+		}
+		case OP_31_STFSX:
+		{
+			ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
+				     kvmppc_get_gpr(vcpu, ax_rb);
+
+			emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
+							    addr, FPU_LS_SINGLE);
+			break;
+		}
+		case OP_31_STFSUX:
+		{
+			ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
+				     kvmppc_get_gpr(vcpu, ax_rb);
+
+			emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
+							    addr, FPU_LS_SINGLE);
+
+			if (emulated == EMULATE_DONE)
+				kvmppc_set_gpr(vcpu, ax_ra, addr);
+			break;
+		}
+		case OP_31_STFX:
+		{
+			ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
+				     kvmppc_get_gpr(vcpu, ax_rb);
+
+			emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
+							    addr, FPU_LS_DOUBLE);
+			break;
+		}
+		case OP_31_STFUX:
+		{
+			ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
+				     kvmppc_get_gpr(vcpu, ax_rb);
+
+			emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
+							    addr, FPU_LS_DOUBLE);
+
+			if (emulated == EMULATE_DONE)
+				kvmppc_set_gpr(vcpu, ax_ra, addr);
+			break;
+		}
+		case OP_31_STFIWX:
+		{
+			ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
+				     kvmppc_get_gpr(vcpu, ax_rb);
+
+			emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
+							    addr,
+							    FPU_LS_SINGLE_LOW);
+			break;
+		}
+			break;
+		}
+		break;
+	case 59:
+		switch (inst_get_field(inst, 21, 30)) {
+		case OP_59_FADDS:
+			fpd_fadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+			kvmppc_sync_qpr(vcpu, ax_rd);
+			break;
+		case OP_59_FSUBS:
+			fpd_fsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+			kvmppc_sync_qpr(vcpu, ax_rd);
+			break;
+		case OP_59_FDIVS:
+			fpd_fdivs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+			kvmppc_sync_qpr(vcpu, ax_rd);
+			break;
+		case OP_59_FRES:
+			fpd_fres(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
+			kvmppc_sync_qpr(vcpu, ax_rd);
+			break;
+		case OP_59_FRSQRTES:
+			fpd_frsqrtes(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
+			kvmppc_sync_qpr(vcpu, ax_rd);
+			break;
+		}
+		switch (inst_get_field(inst, 26, 30)) {
+		case OP_59_FMULS:
+			fpd_fmuls(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c);
+			kvmppc_sync_qpr(vcpu, ax_rd);
+			break;
+		case OP_59_FMSUBS:
+			fpd_fmsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+			kvmppc_sync_qpr(vcpu, ax_rd);
+			break;
+		case OP_59_FMADDS:
+			fpd_fmadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+			kvmppc_sync_qpr(vcpu, ax_rd);
+			break;
+		case OP_59_FNMSUBS:
+			fpd_fnmsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+			kvmppc_sync_qpr(vcpu, ax_rd);
+			break;
+		case OP_59_FNMADDS:
+			fpd_fnmadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+			kvmppc_sync_qpr(vcpu, ax_rd);
+			break;
+		}
+		break;
+	case 63:
+		switch (inst_get_field(inst, 21, 30)) {
+		case OP_63_MTFSB0:
+		case OP_63_MTFSB1:
+		case OP_63_MCRFS:
+		case OP_63_MTFSFI:
+			/* XXX need to implement */
+			break;
+		case OP_63_MFFS:
+			/* XXX missing CR */
+			*fpr_d = vcpu->arch.fp.fpscr;
+			break;
+		case OP_63_MTFSF:
+			/* XXX missing fm bits */
+			/* XXX missing CR */
+			vcpu->arch.fp.fpscr = *fpr_b;
+			break;
+		case OP_63_FCMPU:
+		{
+			u32 tmp_cr;
+			u32 cr0_mask = 0xf0000000;
+			u32 cr_shift = inst_get_field(inst, 6, 8) * 4;
+
+			fpd_fcmpu(&vcpu->arch.fp.fpscr, &tmp_cr, fpr_a, fpr_b);
+			cr &= ~(cr0_mask >> cr_shift);
+			cr |= (cr & cr0_mask) >> cr_shift;
+			break;
+		}
+		case OP_63_FCMPO:
+		{
+			u32 tmp_cr;
+			u32 cr0_mask = 0xf0000000;
+			u32 cr_shift = inst_get_field(inst, 6, 8) * 4;
+
+			fpd_fcmpo(&vcpu->arch.fp.fpscr, &tmp_cr, fpr_a, fpr_b);
+			cr &= ~(cr0_mask >> cr_shift);
+			cr |= (cr & cr0_mask) >> cr_shift;
+			break;
+		}
+		case OP_63_FNEG:
+			fpd_fneg(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
+			break;
+		case OP_63_FMR:
+			*fpr_d = *fpr_b;
+			break;
+		case OP_63_FABS:
+			fpd_fabs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
+			break;
+		case OP_63_FCPSGN:
+			fpd_fcpsgn(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+			break;
+		case OP_63_FDIV:
+			fpd_fdiv(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+			break;
+		case OP_63_FADD:
+			fpd_fadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+			break;
+		case OP_63_FSUB:
+			fpd_fsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b);
+			break;
+		case OP_63_FCTIW:
+			fpd_fctiw(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
+			break;
+		case OP_63_FCTIWZ:
+			fpd_fctiwz(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
+			break;
+		case OP_63_FRSP:
+			fpd_frsp(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
+			kvmppc_sync_qpr(vcpu, ax_rd);
+			break;
+		case OP_63_FRSQRTE:
+		{
+			double one = 1.0f;
+
+			/* fD = sqrt(fB) */
+			fpd_fsqrt(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b);
+			/* fD = 1.0f / fD */
+			fpd_fdiv(&vcpu->arch.fp.fpscr, &cr, fpr_d, (u64*)&one, fpr_d);
+			break;
+		}
+		}
+		switch (inst_get_field(inst, 26, 30)) {
+		case OP_63_FMUL:
+			fpd_fmul(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c);
+			break;
+		case OP_63_FSEL:
+			fpd_fsel(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+			break;
+		case OP_63_FMSUB:
+			fpd_fmsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+			break;
+		case OP_63_FMADD:
+			fpd_fmadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+			break;
+		case OP_63_FNMSUB:
+			fpd_fnmsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+			break;
+		case OP_63_FNMADD:
+			fpd_fnmadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
+			break;
+		}
+		break;
+	}
+
+#ifdef DEBUG
+	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fp.fpr); i++) {
+		u32 f;
+		kvm_cvt_df(&VCPU_FPR(vcpu, i), &f);
+		dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f);
+	}
+#endif
+
+	if (rcomp)
+		kvmppc_set_cr(vcpu, cr);
+
+	disable_kernel_fp();
+	preempt_enable();
+
+	return emulated;
+}
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
new file mode 100644
index 0000000000..9118242063
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -0,0 +1,2123 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ *    Alexander Graf <agraf@suse.de>
+ *    Kevin Wolf <mail@kevin-wolf.de>
+ *    Paul Mackerras <paulus@samba.org>
+ *
+ * Description:
+ * Functions relating to running KVM on Book 3S processors where
+ * we don't have access to hypervisor mode, and we run the guest
+ * in problem state (user mode).
+ *
+ * This file is derived from arch/powerpc/kvm/44x.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/export.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/cacheflush.h>
+#include <linux/uaccess.h>
+#include <asm/interrupt.h>
+#include <asm/io.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu_context.h>
+#include <asm/switch_to.h>
+#include <asm/firmware.h>
+#include <asm/setup.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <asm/asm-prototypes.h>
+#include <asm/tm.h>
+
+#include "book3s.h"
+
+#define CREATE_TRACE_POINTS
+#include "trace_pr.h"
+
+/* #define EXIT_DEBUG */
+/* #define DEBUG_EXT */
+
+static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
+			     ulong msr);
+#ifdef CONFIG_PPC_BOOK3S_64
+static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac);
+#endif
+
+/* Some compatibility defines */
+#ifdef CONFIG_PPC_BOOK3S_32
+#define MSR_USER32 MSR_USER
+#define MSR_USER64 MSR_USER
+#define HW_PAGE_SIZE PAGE_SIZE
+#define HPTE_R_M   _PAGE_COHERENT
+#endif
+
+static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
+{
+	ulong msr = kvmppc_get_msr(vcpu);
+	return (msr & (MSR_IR|MSR_DR)) == MSR_DR;
+}
+
+static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu)
+{
+	ulong msr = kvmppc_get_msr(vcpu);
+	ulong pc = kvmppc_get_pc(vcpu);
+
+	/* We are in DR only split real mode */
+	if ((msr & (MSR_IR|MSR_DR)) != MSR_DR)
+		return;
+
+	/* We have not fixed up the guest already */
+	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK)
+		return;
+
+	/* The code is in fixupable address space */
+	if (pc & SPLIT_HACK_MASK)
+		return;
+
+	vcpu->arch.hflags |= BOOK3S_HFLAG_SPLIT_HACK;
+	kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS);
+}
+
+static void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
+		ulong pc = kvmppc_get_pc(vcpu);
+		ulong lr = kvmppc_get_lr(vcpu);
+		if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+			kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
+		if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+			kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
+		vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
+	}
+}
+
+static void kvmppc_inject_interrupt_pr(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+	unsigned long msr, pc, new_msr, new_pc;
+
+	kvmppc_unfixup_split_real(vcpu);
+
+	msr = kvmppc_get_msr(vcpu);
+	pc = kvmppc_get_pc(vcpu);
+	new_msr = vcpu->arch.intr_msr;
+	new_pc = to_book3s(vcpu)->hior + vec;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* If transactional, change to suspend mode on IRQ delivery */
+	if (MSR_TM_TRANSACTIONAL(msr))
+		new_msr |= MSR_TS_S;
+	else
+		new_msr |= msr & MSR_TS_MASK;
+#endif
+
+	kvmppc_set_srr0(vcpu, pc);
+	kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
+	kvmppc_set_pc(vcpu, new_pc);
+	kvmppc_set_msr(vcpu, new_msr);
+}
+
+static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+	memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
+	svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
+	svcpu->in_use = 0;
+	svcpu_put(svcpu);
+
+	/* Disable AIL if supported */
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		if (cpu_has_feature(CPU_FTR_ARCH_207S))
+			mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL);
+		if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV))
+			mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) & ~FSCR_SCV);
+	}
+#endif
+
+	vcpu->cpu = smp_processor_id();
+#ifdef CONFIG_PPC_BOOK3S_32
+	current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu;
+#endif
+
+	if (kvmppc_is_split_real(vcpu))
+		kvmppc_fixup_split_real(vcpu);
+
+	kvmppc_restore_tm_pr(vcpu);
+}
+
+static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+	if (svcpu->in_use) {
+		kvmppc_copy_from_svcpu(vcpu);
+	}
+	memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
+	to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
+	svcpu_put(svcpu);
+
+	/* Enable AIL if supported */
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		if (cpu_has_feature(CPU_FTR_ARCH_207S))
+			mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3);
+		if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV))
+			mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) | FSCR_SCV);
+	}
+#endif
+
+	if (kvmppc_is_split_real(vcpu))
+		kvmppc_unfixup_split_real(vcpu);
+
+	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
+	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+	kvmppc_save_tm_pr(vcpu);
+
+	vcpu->cpu = -1;
+}
+
+/* Copy data needed by real-mode code from vcpu to shadow vcpu */
+void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+
+	svcpu->gpr[0] = vcpu->arch.regs.gpr[0];
+	svcpu->gpr[1] = vcpu->arch.regs.gpr[1];
+	svcpu->gpr[2] = vcpu->arch.regs.gpr[2];
+	svcpu->gpr[3] = vcpu->arch.regs.gpr[3];
+	svcpu->gpr[4] = vcpu->arch.regs.gpr[4];
+	svcpu->gpr[5] = vcpu->arch.regs.gpr[5];
+	svcpu->gpr[6] = vcpu->arch.regs.gpr[6];
+	svcpu->gpr[7] = vcpu->arch.regs.gpr[7];
+	svcpu->gpr[8] = vcpu->arch.regs.gpr[8];
+	svcpu->gpr[9] = vcpu->arch.regs.gpr[9];
+	svcpu->gpr[10] = vcpu->arch.regs.gpr[10];
+	svcpu->gpr[11] = vcpu->arch.regs.gpr[11];
+	svcpu->gpr[12] = vcpu->arch.regs.gpr[12];
+	svcpu->gpr[13] = vcpu->arch.regs.gpr[13];
+	svcpu->cr  = vcpu->arch.regs.ccr;
+	svcpu->xer = vcpu->arch.regs.xer;
+	svcpu->ctr = vcpu->arch.regs.ctr;
+	svcpu->lr  = vcpu->arch.regs.link;
+	svcpu->pc  = vcpu->arch.regs.nip;
+#ifdef CONFIG_PPC_BOOK3S_64
+	svcpu->shadow_fscr = vcpu->arch.shadow_fscr;
+#endif
+	/*
+	 * Now also save the current time base value. We use this
+	 * to find the guest purr and spurr value.
+	 */
+	vcpu->arch.entry_tb = get_tb();
+	vcpu->arch.entry_vtb = get_vtb();
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		vcpu->arch.entry_ic = mfspr(SPRN_IC);
+	svcpu->in_use = true;
+
+	svcpu_put(svcpu);
+}
+
+static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
+{
+	ulong guest_msr = kvmppc_get_msr(vcpu);
+	ulong smsr = guest_msr;
+
+	/* Guest MSR values */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE |
+		MSR_TM | MSR_TS_MASK;
+#else
+	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE;
+#endif
+	/* Process MSR values */
+	smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
+	/* External providers the guest reserved */
+	smsr |= (guest_msr & vcpu->arch.guest_owned_ext);
+	/* 64-bit Process MSR values */
+#ifdef CONFIG_PPC_BOOK3S_64
+	smsr |= MSR_HV;
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/*
+	 * in guest privileged state, we want to fail all TM transactions.
+	 * So disable MSR TM bit so that all tbegin. will be able to be
+	 * trapped into host.
+	 */
+	if (!(guest_msr & MSR_PR))
+		smsr &= ~MSR_TM;
+#endif
+	vcpu->arch.shadow_msr = smsr;
+}
+
+/* Copy data touched by real-mode code from shadow vcpu back to vcpu */
+void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	ulong old_msr;
+#endif
+
+	/*
+	 * Maybe we were already preempted and synced the svcpu from
+	 * our preempt notifiers. Don't bother touching this svcpu then.
+	 */
+	if (!svcpu->in_use)
+		goto out;
+
+	vcpu->arch.regs.gpr[0] = svcpu->gpr[0];
+	vcpu->arch.regs.gpr[1] = svcpu->gpr[1];
+	vcpu->arch.regs.gpr[2] = svcpu->gpr[2];
+	vcpu->arch.regs.gpr[3] = svcpu->gpr[3];
+	vcpu->arch.regs.gpr[4] = svcpu->gpr[4];
+	vcpu->arch.regs.gpr[5] = svcpu->gpr[5];
+	vcpu->arch.regs.gpr[6] = svcpu->gpr[6];
+	vcpu->arch.regs.gpr[7] = svcpu->gpr[7];
+	vcpu->arch.regs.gpr[8] = svcpu->gpr[8];
+	vcpu->arch.regs.gpr[9] = svcpu->gpr[9];
+	vcpu->arch.regs.gpr[10] = svcpu->gpr[10];
+	vcpu->arch.regs.gpr[11] = svcpu->gpr[11];
+	vcpu->arch.regs.gpr[12] = svcpu->gpr[12];
+	vcpu->arch.regs.gpr[13] = svcpu->gpr[13];
+	vcpu->arch.regs.ccr  = svcpu->cr;
+	vcpu->arch.regs.xer = svcpu->xer;
+	vcpu->arch.regs.ctr = svcpu->ctr;
+	vcpu->arch.regs.link  = svcpu->lr;
+	vcpu->arch.regs.nip  = svcpu->pc;
+	vcpu->arch.shadow_srr1 = svcpu->shadow_srr1;
+	vcpu->arch.fault_dar   = svcpu->fault_dar;
+	vcpu->arch.fault_dsisr = svcpu->fault_dsisr;
+	vcpu->arch.last_inst   = svcpu->last_inst;
+#ifdef CONFIG_PPC_BOOK3S_64
+	vcpu->arch.shadow_fscr = svcpu->shadow_fscr;
+#endif
+	/*
+	 * Update purr and spurr using time base on exit.
+	 */
+	vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb;
+	vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb;
+	to_book3s(vcpu)->vtb += get_vtb() - vcpu->arch.entry_vtb;
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/*
+	 * Unlike other MSR bits, MSR[TS]bits can be changed at guest without
+	 * notifying host:
+	 *  modified by unprivileged instructions like "tbegin"/"tend"/
+	 * "tresume"/"tsuspend" in PR KVM guest.
+	 *
+	 * It is necessary to sync here to calculate a correct shadow_msr.
+	 *
+	 * privileged guest's tbegin will be failed at present. So we
+	 * only take care of problem state guest.
+	 */
+	old_msr = kvmppc_get_msr(vcpu);
+	if (unlikely((old_msr & MSR_PR) &&
+		(vcpu->arch.shadow_srr1 & (MSR_TS_MASK)) !=
+				(old_msr & (MSR_TS_MASK)))) {
+		old_msr &= ~(MSR_TS_MASK);
+		old_msr |= (vcpu->arch.shadow_srr1 & (MSR_TS_MASK));
+		kvmppc_set_msr_fast(vcpu, old_msr);
+		kvmppc_recalc_shadow_msr(vcpu);
+	}
+#endif
+
+	svcpu->in_use = false;
+
+out:
+	svcpu_put(svcpu);
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu)
+{
+	tm_enable();
+	vcpu->arch.tfhar = mfspr(SPRN_TFHAR);
+	vcpu->arch.texasr = mfspr(SPRN_TEXASR);
+	vcpu->arch.tfiar = mfspr(SPRN_TFIAR);
+	tm_disable();
+}
+
+void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu)
+{
+	tm_enable();
+	mtspr(SPRN_TFHAR, vcpu->arch.tfhar);
+	mtspr(SPRN_TEXASR, vcpu->arch.texasr);
+	mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
+	tm_disable();
+}
+
+/* loadup math bits which is enabled at kvmppc_get_msr() but not enabled at
+ * hardware.
+ */
+static void kvmppc_handle_lost_math_exts(struct kvm_vcpu *vcpu)
+{
+	ulong exit_nr;
+	ulong ext_diff = (kvmppc_get_msr(vcpu) & ~vcpu->arch.guest_owned_ext) &
+		(MSR_FP | MSR_VEC | MSR_VSX);
+
+	if (!ext_diff)
+		return;
+
+	if (ext_diff == MSR_FP)
+		exit_nr = BOOK3S_INTERRUPT_FP_UNAVAIL;
+	else if (ext_diff == MSR_VEC)
+		exit_nr = BOOK3S_INTERRUPT_ALTIVEC;
+	else
+		exit_nr = BOOK3S_INTERRUPT_VSX;
+
+	kvmppc_handle_ext(vcpu, exit_nr, ext_diff);
+}
+
+void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu)
+{
+	if (!(MSR_TM_ACTIVE(kvmppc_get_msr(vcpu)))) {
+		kvmppc_save_tm_sprs(vcpu);
+		return;
+	}
+
+	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+	kvmppc_giveup_ext(vcpu, MSR_VSX);
+
+	preempt_disable();
+	_kvmppc_save_tm_pr(vcpu, mfmsr());
+	preempt_enable();
+}
+
+void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu)
+{
+	if (!MSR_TM_ACTIVE(kvmppc_get_msr(vcpu))) {
+		kvmppc_restore_tm_sprs(vcpu);
+		if (kvmppc_get_msr(vcpu) & MSR_TM) {
+			kvmppc_handle_lost_math_exts(vcpu);
+			if (vcpu->arch.fscr & FSCR_TAR)
+				kvmppc_handle_fac(vcpu, FSCR_TAR_LG);
+		}
+		return;
+	}
+
+	preempt_disable();
+	_kvmppc_restore_tm_pr(vcpu, kvmppc_get_msr(vcpu));
+	preempt_enable();
+
+	if (kvmppc_get_msr(vcpu) & MSR_TM) {
+		kvmppc_handle_lost_math_exts(vcpu);
+		if (vcpu->arch.fscr & FSCR_TAR)
+			kvmppc_handle_fac(vcpu, FSCR_TAR_LG);
+	}
+}
+#endif
+
+static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
+{
+	int r = 1; /* Indicate we want to get back into the guest */
+
+	/* We misuse TLB_FLUSH to indicate that we want to clear
+	   all shadow cache entries */
+	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+		kvmppc_mmu_pte_flush(vcpu, 0, 0);
+
+	return r;
+}
+
+/************* MMU Notifiers *************/
+static bool do_kvm_unmap_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+	unsigned long i;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvmppc_mmu_pte_pflush(vcpu, range->start << PAGE_SHIFT,
+				      range->end << PAGE_SHIFT);
+
+	return false;
+}
+
+static bool kvm_unmap_gfn_range_pr(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+	return do_kvm_unmap_gfn(kvm, range);
+}
+
+static bool kvm_age_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+	/* XXX could be more clever ;) */
+	return false;
+}
+
+static bool kvm_test_age_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+	/* XXX could be more clever ;) */
+	return false;
+}
+
+static bool kvm_set_spte_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+	/* The page will get remapped properly on its next fault */
+	return do_kvm_unmap_gfn(kvm, range);
+}
+
+/*****************************************/
+
+static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
+{
+	ulong old_msr;
+
+	/* For PAPR guest, make sure MSR reflects guest mode */
+	if (vcpu->arch.papr_enabled)
+		msr = (msr & ~MSR_HV) | MSR_ME;
+
+#ifdef EXIT_DEBUG
+	printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
+#endif
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/* We should never target guest MSR to TS=10 && PR=0,
+	 * since we always fail transaction for guest privilege
+	 * state.
+	 */
+	if (!(msr & MSR_PR) && MSR_TM_TRANSACTIONAL(msr))
+		kvmppc_emulate_tabort(vcpu,
+			TM_CAUSE_KVM_FAC_UNAV | TM_CAUSE_PERSISTENT);
+#endif
+
+	old_msr = kvmppc_get_msr(vcpu);
+	msr &= to_book3s(vcpu)->msr_mask;
+	kvmppc_set_msr_fast(vcpu, msr);
+	kvmppc_recalc_shadow_msr(vcpu);
+
+	if (msr & MSR_POW) {
+		if (!vcpu->arch.pending_exceptions) {
+			kvm_vcpu_halt(vcpu);
+			vcpu->stat.generic.halt_wakeup++;
+
+			/* Unset POW bit after we woke up */
+			msr &= ~MSR_POW;
+			kvmppc_set_msr_fast(vcpu, msr);
+		}
+	}
+
+	if (kvmppc_is_split_real(vcpu))
+		kvmppc_fixup_split_real(vcpu);
+	else
+		kvmppc_unfixup_split_real(vcpu);
+
+	if ((kvmppc_get_msr(vcpu) & (MSR_PR|MSR_IR|MSR_DR)) !=
+		   (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
+		kvmppc_mmu_flush_segments(vcpu);
+		kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+
+		/* Preload magic page segment when in kernel mode */
+		if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) {
+			struct kvm_vcpu_arch *a = &vcpu->arch;
+
+			if (msr & MSR_DR)
+				kvmppc_mmu_map_segment(vcpu, a->magic_page_ea);
+			else
+				kvmppc_mmu_map_segment(vcpu, a->magic_page_pa);
+		}
+	}
+
+	/*
+	 * When switching from 32 to 64-bit, we may have a stale 32-bit
+	 * magic page around, we need to flush it. Typically 32-bit magic
+	 * page will be instantiated when calling into RTAS. Note: We
+	 * assume that such transition only happens while in kernel mode,
+	 * ie, we never transition from user 32-bit to kernel 64-bit with
+	 * a 32-bit magic page around.
+	 */
+	if (vcpu->arch.magic_page_pa &&
+	    !(old_msr & MSR_PR) && !(old_msr & MSR_SF) && (msr & MSR_SF)) {
+		/* going from RTAS to normal kernel code */
+		kvmppc_mmu_pte_flush(vcpu, (uint32_t)vcpu->arch.magic_page_pa,
+				     ~0xFFFUL);
+	}
+
+	/* Preload FPU if it's enabled */
+	if (kvmppc_get_msr(vcpu) & MSR_FP)
+		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (kvmppc_get_msr(vcpu) & MSR_TM)
+		kvmppc_handle_lost_math_exts(vcpu);
+#endif
+}
+
+static void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
+{
+	u32 host_pvr;
+
+	vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB;
+	vcpu->arch.pvr = pvr;
+#ifdef CONFIG_PPC_BOOK3S_64
+	if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
+		kvmppc_mmu_book3s_64_init(vcpu);
+		if (!to_book3s(vcpu)->hior_explicit)
+			to_book3s(vcpu)->hior = 0xfff00000;
+		to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
+		vcpu->arch.cpu_type = KVM_CPU_3S_64;
+	} else
+#endif
+	{
+		kvmppc_mmu_book3s_32_init(vcpu);
+		if (!to_book3s(vcpu)->hior_explicit)
+			to_book3s(vcpu)->hior = 0;
+		to_book3s(vcpu)->msr_mask = 0xffffffffULL;
+		vcpu->arch.cpu_type = KVM_CPU_3S_32;
+	}
+
+	kvmppc_sanity_check(vcpu);
+
+	/* If we are in hypervisor level on 970, we can tell the CPU to
+	 * treat DCBZ as 32 bytes store */
+	vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
+	if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) &&
+	    !strcmp(cur_cpu_spec->platform, "ppc970"))
+		vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
+
+	/* Cell performs badly if MSR_FEx are set. So let's hope nobody
+	   really needs them in a VM on Cell and force disable them. */
+	if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
+		to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
+
+	/*
+	 * If they're asking for POWER6 or later, set the flag
+	 * indicating that we can do multiple large page sizes
+	 * and 1TB segments.
+	 * Also set the flag that indicates that tlbie has the large
+	 * page bit in the RB operand instead of the instruction.
+	 */
+	switch (PVR_VER(pvr)) {
+	case PVR_POWER6:
+	case PVR_POWER7:
+	case PVR_POWER7p:
+	case PVR_POWER8:
+	case PVR_POWER8E:
+	case PVR_POWER8NVL:
+	case PVR_POWER9:
+		vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
+			BOOK3S_HFLAG_NEW_TLBIE;
+		break;
+	}
+
+#ifdef CONFIG_PPC_BOOK3S_32
+	/* 32 bit Book3S always has 32 byte dcbz */
+	vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
+#endif
+
+	/* On some CPUs we can execute paired single operations natively */
+	asm ( "mfpvr %0" : "=r"(host_pvr));
+	switch (host_pvr) {
+	case 0x00080200:	/* lonestar 2.0 */
+	case 0x00088202:	/* lonestar 2.2 */
+	case 0x70000100:	/* gekko 1.0 */
+	case 0x00080100:	/* gekko 2.0 */
+	case 0x00083203:	/* gekko 2.3a */
+	case 0x00083213:	/* gekko 2.3b */
+	case 0x00083204:	/* gekko 2.4 */
+	case 0x00083214:	/* gekko 2.4e (8SE) - retail HW2 */
+	case 0x00087200:	/* broadway */
+		vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS;
+		/* Enable HID2.PSE - in case we need it later */
+		mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29));
+	}
+}
+
+/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
+ * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
+ * emulate 32 bytes dcbz length.
+ *
+ * The Book3s_64 inventors also realized this case and implemented a special bit
+ * in the HID5 register, which is a hypervisor ressource. Thus we can't use it.
+ *
+ * My approach here is to patch the dcbz instruction on executing pages.
+ */
+static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
+{
+	struct page *hpage;
+	u64 hpage_offset;
+	u32 *page;
+	int i;
+
+	hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
+	if (is_error_page(hpage))
+		return;
+
+	hpage_offset = pte->raddr & ~PAGE_MASK;
+	hpage_offset &= ~0xFFFULL;
+	hpage_offset /= 4;
+
+	get_page(hpage);
+	page = kmap_atomic(hpage);
+
+	/* patch dcbz into reserved instruction, so we trap */
+	for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
+		if ((be32_to_cpu(page[i]) & 0xff0007ff) == INS_DCBZ)
+			page[i] &= cpu_to_be32(0xfffffff7);
+
+	kunmap_atomic(page);
+	put_page(hpage);
+}
+
+static bool kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
+{
+	ulong mp_pa = vcpu->arch.magic_page_pa;
+
+	if (!(kvmppc_get_msr(vcpu) & MSR_SF))
+		mp_pa = (uint32_t)mp_pa;
+
+	gpa &= ~0xFFFULL;
+	if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) == (gpa & KVM_PAM))) {
+		return true;
+	}
+
+	return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT);
+}
+
+static int kvmppc_handle_pagefault(struct kvm_vcpu *vcpu,
+			    ulong eaddr, int vec)
+{
+	bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
+	bool iswrite = false;
+	int r = RESUME_GUEST;
+	int relocated;
+	int page_found = 0;
+	struct kvmppc_pte pte = { 0 };
+	bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false;
+	bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false;
+	u64 vsid;
+
+	relocated = data ? dr : ir;
+	if (data && (vcpu->arch.fault_dsisr & DSISR_ISSTORE))
+		iswrite = true;
+
+	/* Resolve real address if translation turned on */
+	if (relocated) {
+		page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data, iswrite);
+	} else {
+		pte.may_execute = true;
+		pte.may_read = true;
+		pte.may_write = true;
+		pte.raddr = eaddr & KVM_PAM;
+		pte.eaddr = eaddr;
+		pte.vpage = eaddr >> 12;
+		pte.page_size = MMU_PAGE_64K;
+		pte.wimg = HPTE_R_M;
+	}
+
+	switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
+	case 0:
+		pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
+		break;
+	case MSR_DR:
+		if (!data &&
+		    (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) &&
+		    ((pte.raddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS))
+			pte.raddr &= ~SPLIT_HACK_MASK;
+		fallthrough;
+	case MSR_IR:
+		vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
+
+		if ((kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) == MSR_DR)
+			pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
+		else
+			pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
+		pte.vpage |= vsid;
+
+		if (vsid == -1)
+			page_found = -EINVAL;
+		break;
+	}
+
+	if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+	   (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
+		/*
+		 * If we do the dcbz hack, we have to NX on every execution,
+		 * so we can patch the executing code. This renders our guest
+		 * NX-less.
+		 */
+		pte.may_execute = !data;
+	}
+
+	if (page_found == -ENOENT || page_found == -EPERM) {
+		/* Page not found in guest PTE entries, or protection fault */
+		u64 flags;
+
+		if (page_found == -EPERM)
+			flags = DSISR_PROTFAULT;
+		else
+			flags = DSISR_NOHPTE;
+		if (data) {
+			flags |= vcpu->arch.fault_dsisr & DSISR_ISSTORE;
+			kvmppc_core_queue_data_storage(vcpu, 0, eaddr, flags);
+		} else {
+			kvmppc_core_queue_inst_storage(vcpu, flags);
+		}
+	} else if (page_found == -EINVAL) {
+		/* Page not found in guest SLB */
+		kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
+		kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
+	} else if (kvmppc_visible_gpa(vcpu, pte.raddr)) {
+		if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) {
+			/*
+			 * There is already a host HPTE there, presumably
+			 * a read-only one for a page the guest thinks
+			 * is writable, so get rid of it first.
+			 */
+			kvmppc_mmu_unmap_page(vcpu, &pte);
+		}
+		/* The guest's PTE is not mapped yet. Map on the host */
+		if (kvmppc_mmu_map_page(vcpu, &pte, iswrite) == -EIO) {
+			/* Exit KVM if mapping failed */
+			vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+			return RESUME_HOST;
+		}
+		if (data)
+			vcpu->stat.sp_storage++;
+		else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+			 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
+			kvmppc_patch_dcbz(vcpu, &pte);
+	} else {
+		/* MMIO */
+		vcpu->stat.mmio_exits++;
+		vcpu->arch.paddr_accessed = pte.raddr;
+		vcpu->arch.vaddr_accessed = pte.eaddr;
+		r = kvmppc_emulate_mmio(vcpu);
+		if ( r == RESUME_HOST_NV )
+			r = RESUME_HOST;
+	}
+
+	return r;
+}
+
+/* Give up external provider (FPU, Altivec, VSX) */
+void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
+{
+	struct thread_struct *t = &current->thread;
+
+	/*
+	 * VSX instructions can access FP and vector registers, so if
+	 * we are giving up VSX, make sure we give up FP and VMX as well.
+	 */
+	if (msr & MSR_VSX)
+		msr |= MSR_FP | MSR_VEC;
+
+	msr &= vcpu->arch.guest_owned_ext;
+	if (!msr)
+		return;
+
+#ifdef DEBUG_EXT
+	printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
+#endif
+
+	if (msr & MSR_FP) {
+		/*
+		 * Note that on CPUs with VSX, giveup_fpu stores
+		 * both the traditional FP registers and the added VSX
+		 * registers into thread.fp_state.fpr[].
+		 */
+		if (t->regs->msr & MSR_FP)
+			giveup_fpu(current);
+		t->fp_save_area = NULL;
+	}
+
+#ifdef CONFIG_ALTIVEC
+	if (msr & MSR_VEC) {
+		if (current->thread.regs->msr & MSR_VEC)
+			giveup_altivec(current);
+		t->vr_save_area = NULL;
+	}
+#endif
+
+	vcpu->arch.guest_owned_ext &= ~(msr | MSR_VSX);
+	kvmppc_recalc_shadow_msr(vcpu);
+}
+
+/* Give up facility (TAR / EBB / DSCR) */
+void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (!(vcpu->arch.shadow_fscr & (1ULL << fac))) {
+		/* Facility not available to the guest, ignore giveup request*/
+		return;
+	}
+
+	switch (fac) {
+	case FSCR_TAR_LG:
+		vcpu->arch.tar = mfspr(SPRN_TAR);
+		mtspr(SPRN_TAR, current->thread.tar);
+		vcpu->arch.shadow_fscr &= ~FSCR_TAR;
+		break;
+	}
+#endif
+}
+
+/* Handle external providers (FPU, Altivec, VSX) */
+static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
+			     ulong msr)
+{
+	struct thread_struct *t = &current->thread;
+
+	/* When we have paired singles, we emulate in software */
+	if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
+		return RESUME_GUEST;
+
+	if (!(kvmppc_get_msr(vcpu) & msr)) {
+		kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+		return RESUME_GUEST;
+	}
+
+	if (msr == MSR_VSX) {
+		/* No VSX?  Give an illegal instruction interrupt */
+#ifdef CONFIG_VSX
+		if (!cpu_has_feature(CPU_FTR_VSX))
+#endif
+		{
+			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+			return RESUME_GUEST;
+		}
+
+		/*
+		 * We have to load up all the FP and VMX registers before
+		 * we can let the guest use VSX instructions.
+		 */
+		msr = MSR_FP | MSR_VEC | MSR_VSX;
+	}
+
+	/* See if we already own all the ext(s) needed */
+	msr &= ~vcpu->arch.guest_owned_ext;
+	if (!msr)
+		return RESUME_GUEST;
+
+#ifdef DEBUG_EXT
+	printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
+#endif
+
+	if (msr & MSR_FP) {
+		preempt_disable();
+		enable_kernel_fp();
+		load_fp_state(&vcpu->arch.fp);
+		disable_kernel_fp();
+		t->fp_save_area = &vcpu->arch.fp;
+		preempt_enable();
+	}
+
+	if (msr & MSR_VEC) {
+#ifdef CONFIG_ALTIVEC
+		preempt_disable();
+		enable_kernel_altivec();
+		load_vr_state(&vcpu->arch.vr);
+		disable_kernel_altivec();
+		t->vr_save_area = &vcpu->arch.vr;
+		preempt_enable();
+#endif
+	}
+
+	t->regs->msr |= msr;
+	vcpu->arch.guest_owned_ext |= msr;
+	kvmppc_recalc_shadow_msr(vcpu);
+
+	return RESUME_GUEST;
+}
+
+/*
+ * Kernel code using FP or VMX could have flushed guest state to
+ * the thread_struct; if so, get it back now.
+ */
+static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
+{
+	unsigned long lost_ext;
+
+	lost_ext = vcpu->arch.guest_owned_ext & ~current->thread.regs->msr;
+	if (!lost_ext)
+		return;
+
+	if (lost_ext & MSR_FP) {
+		preempt_disable();
+		enable_kernel_fp();
+		load_fp_state(&vcpu->arch.fp);
+		disable_kernel_fp();
+		preempt_enable();
+	}
+#ifdef CONFIG_ALTIVEC
+	if (lost_ext & MSR_VEC) {
+		preempt_disable();
+		enable_kernel_altivec();
+		load_vr_state(&vcpu->arch.vr);
+		disable_kernel_altivec();
+		preempt_enable();
+	}
+#endif
+	current->thread.regs->msr |= lost_ext;
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac)
+{
+	/* Inject the Interrupt Cause field and trigger a guest interrupt */
+	vcpu->arch.fscr &= ~(0xffULL << 56);
+	vcpu->arch.fscr |= (fac << 56);
+	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
+}
+
+static void kvmppc_emulate_fac(struct kvm_vcpu *vcpu, ulong fac)
+{
+	enum emulation_result er = EMULATE_FAIL;
+
+	if (!(kvmppc_get_msr(vcpu) & MSR_PR))
+		er = kvmppc_emulate_instruction(vcpu);
+
+	if ((er != EMULATE_DONE) && (er != EMULATE_AGAIN)) {
+		/* Couldn't emulate, trigger interrupt in guest */
+		kvmppc_trigger_fac_interrupt(vcpu, fac);
+	}
+}
+
+/* Enable facilities (TAR, EBB, DSCR) for the guest */
+static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac)
+{
+	bool guest_fac_enabled;
+	BUG_ON(!cpu_has_feature(CPU_FTR_ARCH_207S));
+
+	/*
+	 * Not every facility is enabled by FSCR bits, check whether the
+	 * guest has this facility enabled at all.
+	 */
+	switch (fac) {
+	case FSCR_TAR_LG:
+	case FSCR_EBB_LG:
+		guest_fac_enabled = (vcpu->arch.fscr & (1ULL << fac));
+		break;
+	case FSCR_TM_LG:
+		guest_fac_enabled = kvmppc_get_msr(vcpu) & MSR_TM;
+		break;
+	default:
+		guest_fac_enabled = false;
+		break;
+	}
+
+	if (!guest_fac_enabled) {
+		/* Facility not enabled by the guest */
+		kvmppc_trigger_fac_interrupt(vcpu, fac);
+		return RESUME_GUEST;
+	}
+
+	switch (fac) {
+	case FSCR_TAR_LG:
+		/* TAR switching isn't lazy in Linux yet */
+		current->thread.tar = mfspr(SPRN_TAR);
+		mtspr(SPRN_TAR, vcpu->arch.tar);
+		vcpu->arch.shadow_fscr |= FSCR_TAR;
+		break;
+	default:
+		kvmppc_emulate_fac(vcpu, fac);
+		break;
+	}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/* Since we disabled MSR_TM at privilege state, the mfspr instruction
+	 * for TM spr can trigger TM fac unavailable. In this case, the
+	 * emulation is handled by kvmppc_emulate_fac(), which invokes
+	 * kvmppc_emulate_mfspr() finally. But note the mfspr can include
+	 * RT for NV registers. So it need to restore those NV reg to reflect
+	 * the update.
+	 */
+	if ((fac == FSCR_TM_LG) && !(kvmppc_get_msr(vcpu) & MSR_PR))
+		return RESUME_GUEST_NV;
+#endif
+
+	return RESUME_GUEST;
+}
+
+void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr)
+{
+	if (fscr & FSCR_SCV)
+		fscr &= ~FSCR_SCV; /* SCV must not be enabled */
+	/* Prohibit prefixed instructions for now */
+	fscr &= ~FSCR_PREFIX;
+	if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) {
+		/* TAR got dropped, drop it in shadow too */
+		kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+	} else if (!(vcpu->arch.fscr & FSCR_TAR) && (fscr & FSCR_TAR)) {
+		vcpu->arch.fscr = fscr;
+		kvmppc_handle_fac(vcpu, FSCR_TAR_LG);
+		return;
+	}
+
+	vcpu->arch.fscr = fscr;
+}
+#endif
+
+static void kvmppc_setup_debug(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+		u64 msr = kvmppc_get_msr(vcpu);
+
+		kvmppc_set_msr(vcpu, msr | MSR_SE);
+	}
+}
+
+static void kvmppc_clear_debug(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+		u64 msr = kvmppc_get_msr(vcpu);
+
+		kvmppc_set_msr(vcpu, msr & ~MSR_SE);
+	}
+}
+
+static int kvmppc_exit_pr_progint(struct kvm_vcpu *vcpu, unsigned int exit_nr)
+{
+	enum emulation_result er;
+	ulong flags;
+	ppc_inst_t last_inst;
+	int emul, r;
+
+	/*
+	 * shadow_srr1 only contains valid flags if we came here via a program
+	 * exception. The other exceptions (emulation assist, FP unavailable,
+	 * etc.) do not provide flags in SRR1, so use an illegal-instruction
+	 * exception when injecting a program interrupt into the guest.
+	 */
+	if (exit_nr == BOOK3S_INTERRUPT_PROGRAM)
+		flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
+	else
+		flags = SRR1_PROGILL;
+
+	emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
+	if (emul != EMULATE_DONE)
+		return RESUME_GUEST;
+
+	if (kvmppc_get_msr(vcpu) & MSR_PR) {
+#ifdef EXIT_DEBUG
+		pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n",
+			kvmppc_get_pc(vcpu), ppc_inst_val(last_inst));
+#endif
+		if ((ppc_inst_val(last_inst) & 0xff0007ff) != (INS_DCBZ & 0xfffffff7)) {
+			kvmppc_core_queue_program(vcpu, flags);
+			return RESUME_GUEST;
+		}
+	}
+
+	vcpu->stat.emulated_inst_exits++;
+	er = kvmppc_emulate_instruction(vcpu);
+	switch (er) {
+	case EMULATE_DONE:
+		r = RESUME_GUEST_NV;
+		break;
+	case EMULATE_AGAIN:
+		r = RESUME_GUEST;
+		break;
+	case EMULATE_FAIL:
+		pr_crit("%s: emulation at %lx failed (%08x)\n",
+			__func__, kvmppc_get_pc(vcpu), ppc_inst_val(last_inst));
+		kvmppc_core_queue_program(vcpu, flags);
+		r = RESUME_GUEST;
+		break;
+	case EMULATE_DO_MMIO:
+		vcpu->run->exit_reason = KVM_EXIT_MMIO;
+		r = RESUME_HOST_NV;
+		break;
+	case EMULATE_EXIT_USER:
+		r = RESUME_HOST_NV;
+		break;
+	default:
+		BUG();
+	}
+
+	return r;
+}
+
+int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr)
+{
+	struct kvm_run *run = vcpu->run;
+	int r = RESUME_HOST;
+	int s;
+
+	vcpu->stat.sum_exits++;
+
+	run->exit_reason = KVM_EXIT_UNKNOWN;
+	run->ready_for_interrupt_injection = 1;
+
+	/* We get here with MSR.EE=1 */
+
+	trace_kvm_exit(exit_nr, vcpu);
+	guest_exit();
+
+	switch (exit_nr) {
+	case BOOK3S_INTERRUPT_INST_STORAGE:
+	{
+		ulong shadow_srr1 = vcpu->arch.shadow_srr1;
+		vcpu->stat.pf_instruc++;
+
+		if (kvmppc_is_split_real(vcpu))
+			kvmppc_fixup_split_real(vcpu);
+
+#ifdef CONFIG_PPC_BOOK3S_32
+		/* We set segments as unused segments when invalidating them. So
+		 * treat the respective fault as segment fault. */
+		{
+			struct kvmppc_book3s_shadow_vcpu *svcpu;
+			u32 sr;
+
+			svcpu = svcpu_get(vcpu);
+			sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT];
+			svcpu_put(svcpu);
+			if (sr == SR_INVALID) {
+				kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+				r = RESUME_GUEST;
+				break;
+			}
+		}
+#endif
+
+		/* only care about PTEG not found errors, but leave NX alone */
+		if (shadow_srr1 & 0x40000000) {
+			int idx = srcu_read_lock(&vcpu->kvm->srcu);
+			r = kvmppc_handle_pagefault(vcpu, kvmppc_get_pc(vcpu), exit_nr);
+			srcu_read_unlock(&vcpu->kvm->srcu, idx);
+			vcpu->stat.sp_instruc++;
+		} else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+			  (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
+			/*
+			 * XXX If we do the dcbz hack we use the NX bit to flush&patch the page,
+			 *     so we can't use the NX bit inside the guest. Let's cross our fingers,
+			 *     that no guest that needs the dcbz hack does NX.
+			 */
+			kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
+			r = RESUME_GUEST;
+		} else {
+			kvmppc_core_queue_inst_storage(vcpu,
+						shadow_srr1 & 0x58000000);
+			r = RESUME_GUEST;
+		}
+		break;
+	}
+	case BOOK3S_INTERRUPT_DATA_STORAGE:
+	{
+		ulong dar = kvmppc_get_fault_dar(vcpu);
+		u32 fault_dsisr = vcpu->arch.fault_dsisr;
+		vcpu->stat.pf_storage++;
+
+#ifdef CONFIG_PPC_BOOK3S_32
+		/* We set segments as unused segments when invalidating them. So
+		 * treat the respective fault as segment fault. */
+		{
+			struct kvmppc_book3s_shadow_vcpu *svcpu;
+			u32 sr;
+
+			svcpu = svcpu_get(vcpu);
+			sr = svcpu->sr[dar >> SID_SHIFT];
+			svcpu_put(svcpu);
+			if (sr == SR_INVALID) {
+				kvmppc_mmu_map_segment(vcpu, dar);
+				r = RESUME_GUEST;
+				break;
+			}
+		}
+#endif
+
+		/*
+		 * We need to handle missing shadow PTEs, and
+		 * protection faults due to us mapping a page read-only
+		 * when the guest thinks it is writable.
+		 */
+		if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) {
+			int idx = srcu_read_lock(&vcpu->kvm->srcu);
+			r = kvmppc_handle_pagefault(vcpu, dar, exit_nr);
+			srcu_read_unlock(&vcpu->kvm->srcu, idx);
+		} else {
+			kvmppc_core_queue_data_storage(vcpu, 0, dar, fault_dsisr);
+			r = RESUME_GUEST;
+		}
+		break;
+	}
+	case BOOK3S_INTERRUPT_DATA_SEGMENT:
+		if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
+			kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
+			kvmppc_book3s_queue_irqprio(vcpu,
+				BOOK3S_INTERRUPT_DATA_SEGMENT);
+		}
+		r = RESUME_GUEST;
+		break;
+	case BOOK3S_INTERRUPT_INST_SEGMENT:
+		if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) {
+			kvmppc_book3s_queue_irqprio(vcpu,
+				BOOK3S_INTERRUPT_INST_SEGMENT);
+		}
+		r = RESUME_GUEST;
+		break;
+	/* We're good on these - the host merely wanted to get our attention */
+	case BOOK3S_INTERRUPT_DECREMENTER:
+	case BOOK3S_INTERRUPT_HV_DECREMENTER:
+	case BOOK3S_INTERRUPT_DOORBELL:
+	case BOOK3S_INTERRUPT_H_DOORBELL:
+		vcpu->stat.dec_exits++;
+		r = RESUME_GUEST;
+		break;
+	case BOOK3S_INTERRUPT_EXTERNAL:
+	case BOOK3S_INTERRUPT_EXTERNAL_HV:
+	case BOOK3S_INTERRUPT_H_VIRT:
+		vcpu->stat.ext_intr_exits++;
+		r = RESUME_GUEST;
+		break;
+	case BOOK3S_INTERRUPT_HMI:
+	case BOOK3S_INTERRUPT_PERFMON:
+	case BOOK3S_INTERRUPT_SYSTEM_RESET:
+		r = RESUME_GUEST;
+		break;
+	case BOOK3S_INTERRUPT_PROGRAM:
+	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
+		r = kvmppc_exit_pr_progint(vcpu, exit_nr);
+		break;
+	case BOOK3S_INTERRUPT_SYSCALL:
+	{
+		ppc_inst_t last_sc;
+		int emul;
+
+		/* Get last sc for papr */
+		if (vcpu->arch.papr_enabled) {
+			/* The sc instruction points SRR0 to the next inst */
+			emul = kvmppc_get_last_inst(vcpu, INST_SC, &last_sc);
+			if (emul != EMULATE_DONE) {
+				kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) - 4);
+				r = RESUME_GUEST;
+				break;
+			}
+		}
+
+		if (vcpu->arch.papr_enabled &&
+		    (ppc_inst_val(last_sc) == 0x44000022) &&
+		    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
+			/* SC 1 papr hypercalls */
+			ulong cmd = kvmppc_get_gpr(vcpu, 3);
+			int i;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+			if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
+				r = RESUME_GUEST;
+				break;
+			}
+#endif
+
+			run->papr_hcall.nr = cmd;
+			for (i = 0; i < 9; ++i) {
+				ulong gpr = kvmppc_get_gpr(vcpu, 4 + i);
+				run->papr_hcall.args[i] = gpr;
+			}
+			run->exit_reason = KVM_EXIT_PAPR_HCALL;
+			vcpu->arch.hcall_needed = 1;
+			r = RESUME_HOST;
+		} else if (vcpu->arch.osi_enabled &&
+		    (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
+		    (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
+			/* MOL hypercalls */
+			u64 *gprs = run->osi.gprs;
+			int i;
+
+			run->exit_reason = KVM_EXIT_OSI;
+			for (i = 0; i < 32; i++)
+				gprs[i] = kvmppc_get_gpr(vcpu, i);
+			vcpu->arch.osi_needed = 1;
+			r = RESUME_HOST_NV;
+		} else if (!(kvmppc_get_msr(vcpu) & MSR_PR) &&
+		    (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
+			/* KVM PV hypercalls */
+			kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
+			r = RESUME_GUEST;
+		} else {
+			/* Guest syscalls */
+			vcpu->stat.syscall_exits++;
+			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+			r = RESUME_GUEST;
+		}
+		break;
+	}
+	case BOOK3S_INTERRUPT_FP_UNAVAIL:
+	case BOOK3S_INTERRUPT_ALTIVEC:
+	case BOOK3S_INTERRUPT_VSX:
+	{
+		int ext_msr = 0;
+		int emul;
+		ppc_inst_t last_inst;
+
+		if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) {
+			/* Do paired single instruction emulation */
+			emul = kvmppc_get_last_inst(vcpu, INST_GENERIC,
+						    &last_inst);
+			if (emul == EMULATE_DONE)
+				r = kvmppc_exit_pr_progint(vcpu, exit_nr);
+			else
+				r = RESUME_GUEST;
+
+			break;
+		}
+
+		/* Enable external provider */
+		switch (exit_nr) {
+		case BOOK3S_INTERRUPT_FP_UNAVAIL:
+			ext_msr = MSR_FP;
+			break;
+
+		case BOOK3S_INTERRUPT_ALTIVEC:
+			ext_msr = MSR_VEC;
+			break;
+
+		case BOOK3S_INTERRUPT_VSX:
+			ext_msr = MSR_VSX;
+			break;
+		}
+
+		r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr);
+		break;
+	}
+	case BOOK3S_INTERRUPT_ALIGNMENT:
+	{
+		ppc_inst_t last_inst;
+		int emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
+
+		if (emul == EMULATE_DONE) {
+			u32 dsisr;
+			u64 dar;
+
+			dsisr = kvmppc_alignment_dsisr(vcpu, ppc_inst_val(last_inst));
+			dar = kvmppc_alignment_dar(vcpu, ppc_inst_val(last_inst));
+
+			kvmppc_set_dsisr(vcpu, dsisr);
+			kvmppc_set_dar(vcpu, dar);
+
+			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+		}
+		r = RESUME_GUEST;
+		break;
+	}
+#ifdef CONFIG_PPC_BOOK3S_64
+	case BOOK3S_INTERRUPT_FAC_UNAVAIL:
+		r = kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56);
+		break;
+#endif
+	case BOOK3S_INTERRUPT_MACHINE_CHECK:
+		kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+		r = RESUME_GUEST;
+		break;
+	case BOOK3S_INTERRUPT_TRACE:
+		if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+			run->exit_reason = KVM_EXIT_DEBUG;
+			r = RESUME_HOST;
+		} else {
+			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+			r = RESUME_GUEST;
+		}
+		break;
+	default:
+	{
+		ulong shadow_srr1 = vcpu->arch.shadow_srr1;
+		/* Ugh - bork here! What did we get? */
+		printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
+			exit_nr, kvmppc_get_pc(vcpu), shadow_srr1);
+		r = RESUME_HOST;
+		BUG();
+		break;
+	}
+	}
+
+	if (!(r & RESUME_HOST)) {
+		/* To avoid clobbering exit_reason, only check for signals if
+		 * we aren't already exiting to userspace for some other
+		 * reason. */
+
+		/*
+		 * Interrupts could be timers for the guest which we have to
+		 * inject again, so let's postpone them until we're in the guest
+		 * and if we really did time things so badly, then we just exit
+		 * again due to a host external interrupt.
+		 */
+		s = kvmppc_prepare_to_enter(vcpu);
+		if (s <= 0)
+			r = s;
+		else {
+			/* interrupts now hard-disabled */
+			kvmppc_fix_ee_before_entry();
+		}
+
+		kvmppc_handle_lost_ext(vcpu);
+	}
+
+	trace_kvm_book3s_reenter(r, vcpu);
+
+	return r;
+}
+
+static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu,
+					    struct kvm_sregs *sregs)
+{
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+	int i;
+
+	sregs->pvr = vcpu->arch.pvr;
+
+	sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
+	if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
+		for (i = 0; i < 64; i++) {
+			sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige | i;
+			sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
+		}
+	} else {
+		for (i = 0; i < 16; i++)
+			sregs->u.s.ppc32.sr[i] = kvmppc_get_sr(vcpu, i);
+
+		for (i = 0; i < 8; i++) {
+			sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
+			sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
+		}
+	}
+
+	return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu,
+					    struct kvm_sregs *sregs)
+{
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+	int i;
+
+	kvmppc_set_pvr_pr(vcpu, sregs->pvr);
+
+	vcpu3s->sdr1 = sregs->u.s.sdr1;
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
+		/* Flush all SLB entries */
+		vcpu->arch.mmu.slbmte(vcpu, 0, 0);
+		vcpu->arch.mmu.slbia(vcpu);
+
+		for (i = 0; i < 64; i++) {
+			u64 rb = sregs->u.s.ppc64.slb[i].slbe;
+			u64 rs = sregs->u.s.ppc64.slb[i].slbv;
+
+			if (rb & SLB_ESID_V)
+				vcpu->arch.mmu.slbmte(vcpu, rs, rb);
+		}
+	} else
+#endif
+	{
+		for (i = 0; i < 16; i++) {
+			vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]);
+		}
+		for (i = 0; i < 8; i++) {
+			kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false,
+				       (u32)sregs->u.s.ppc32.ibat[i]);
+			kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true,
+				       (u32)(sregs->u.s.ppc32.ibat[i] >> 32));
+			kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false,
+				       (u32)sregs->u.s.ppc32.dbat[i]);
+			kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true,
+				       (u32)(sregs->u.s.ppc32.dbat[i] >> 32));
+		}
+	}
+
+	/* Flush the MMU after messing with the segments */
+	kvmppc_mmu_pte_flush(vcpu, 0, 0);
+
+	return 0;
+}
+
+static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
+				 union kvmppc_one_reg *val)
+{
+	int r = 0;
+
+	switch (id) {
+	case KVM_REG_PPC_DEBUG_INST:
+		*val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
+		break;
+	case KVM_REG_PPC_HIOR:
+		*val = get_reg_val(id, to_book3s(vcpu)->hior);
+		break;
+	case KVM_REG_PPC_VTB:
+		*val = get_reg_val(id, to_book3s(vcpu)->vtb);
+		break;
+	case KVM_REG_PPC_LPCR:
+	case KVM_REG_PPC_LPCR_64:
+		/*
+		 * We are only interested in the LPCR_ILE bit
+		 */
+		if (vcpu->arch.intr_msr & MSR_LE)
+			*val = get_reg_val(id, LPCR_ILE);
+		else
+			*val = get_reg_val(id, 0);
+		break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case KVM_REG_PPC_TFHAR:
+		*val = get_reg_val(id, vcpu->arch.tfhar);
+		break;
+	case KVM_REG_PPC_TFIAR:
+		*val = get_reg_val(id, vcpu->arch.tfiar);
+		break;
+	case KVM_REG_PPC_TEXASR:
+		*val = get_reg_val(id, vcpu->arch.texasr);
+		break;
+	case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
+		*val = get_reg_val(id,
+				vcpu->arch.gpr_tm[id-KVM_REG_PPC_TM_GPR0]);
+		break;
+	case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
+	{
+		int i, j;
+
+		i = id - KVM_REG_PPC_TM_VSR0;
+		if (i < 32)
+			for (j = 0; j < TS_FPRWIDTH; j++)
+				val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j];
+		else {
+			if (cpu_has_feature(CPU_FTR_ALTIVEC))
+				val->vval = vcpu->arch.vr_tm.vr[i-32];
+			else
+				r = -ENXIO;
+		}
+		break;
+	}
+	case KVM_REG_PPC_TM_CR:
+		*val = get_reg_val(id, vcpu->arch.cr_tm);
+		break;
+	case KVM_REG_PPC_TM_XER:
+		*val = get_reg_val(id, vcpu->arch.xer_tm);
+		break;
+	case KVM_REG_PPC_TM_LR:
+		*val = get_reg_val(id, vcpu->arch.lr_tm);
+		break;
+	case KVM_REG_PPC_TM_CTR:
+		*val = get_reg_val(id, vcpu->arch.ctr_tm);
+		break;
+	case KVM_REG_PPC_TM_FPSCR:
+		*val = get_reg_val(id, vcpu->arch.fp_tm.fpscr);
+		break;
+	case KVM_REG_PPC_TM_AMR:
+		*val = get_reg_val(id, vcpu->arch.amr_tm);
+		break;
+	case KVM_REG_PPC_TM_PPR:
+		*val = get_reg_val(id, vcpu->arch.ppr_tm);
+		break;
+	case KVM_REG_PPC_TM_VRSAVE:
+		*val = get_reg_val(id, vcpu->arch.vrsave_tm);
+		break;
+	case KVM_REG_PPC_TM_VSCR:
+		if (cpu_has_feature(CPU_FTR_ALTIVEC))
+			*val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]);
+		else
+			r = -ENXIO;
+		break;
+	case KVM_REG_PPC_TM_DSCR:
+		*val = get_reg_val(id, vcpu->arch.dscr_tm);
+		break;
+	case KVM_REG_PPC_TM_TAR:
+		*val = get_reg_val(id, vcpu->arch.tar_tm);
+		break;
+#endif
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
+
+static void kvmppc_set_lpcr_pr(struct kvm_vcpu *vcpu, u64 new_lpcr)
+{
+	if (new_lpcr & LPCR_ILE)
+		vcpu->arch.intr_msr |= MSR_LE;
+	else
+		vcpu->arch.intr_msr &= ~MSR_LE;
+}
+
+static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
+				 union kvmppc_one_reg *val)
+{
+	int r = 0;
+
+	switch (id) {
+	case KVM_REG_PPC_HIOR:
+		to_book3s(vcpu)->hior = set_reg_val(id, *val);
+		to_book3s(vcpu)->hior_explicit = true;
+		break;
+	case KVM_REG_PPC_VTB:
+		to_book3s(vcpu)->vtb = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_LPCR:
+	case KVM_REG_PPC_LPCR_64:
+		kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val));
+		break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case KVM_REG_PPC_TFHAR:
+		vcpu->arch.tfhar = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TFIAR:
+		vcpu->arch.tfiar = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TEXASR:
+		vcpu->arch.texasr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
+		vcpu->arch.gpr_tm[id - KVM_REG_PPC_TM_GPR0] =
+			set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
+	{
+		int i, j;
+
+		i = id - KVM_REG_PPC_TM_VSR0;
+		if (i < 32)
+			for (j = 0; j < TS_FPRWIDTH; j++)
+				vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j];
+		else
+			if (cpu_has_feature(CPU_FTR_ALTIVEC))
+				vcpu->arch.vr_tm.vr[i-32] = val->vval;
+			else
+				r = -ENXIO;
+		break;
+	}
+	case KVM_REG_PPC_TM_CR:
+		vcpu->arch.cr_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_XER:
+		vcpu->arch.xer_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_LR:
+		vcpu->arch.lr_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_CTR:
+		vcpu->arch.ctr_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_FPSCR:
+		vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_AMR:
+		vcpu->arch.amr_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_PPR:
+		vcpu->arch.ppr_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_VRSAVE:
+		vcpu->arch.vrsave_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_VSCR:
+		if (cpu_has_feature(CPU_FTR_ALTIVEC))
+			vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val);
+		else
+			r = -ENXIO;
+		break;
+	case KVM_REG_PPC_TM_DSCR:
+		vcpu->arch.dscr_tm = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_TM_TAR:
+		vcpu->arch.tar_tm = set_reg_val(id, *val);
+		break;
+#endif
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
+
+static int kvmppc_core_vcpu_create_pr(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s;
+	unsigned long p;
+	int err;
+
+	err = -ENOMEM;
+
+	vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
+	if (!vcpu_book3s)
+		goto out;
+	vcpu->arch.book3s = vcpu_book3s;
+
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+	vcpu->arch.shadow_vcpu =
+		kzalloc(sizeof(*vcpu->arch.shadow_vcpu), GFP_KERNEL);
+	if (!vcpu->arch.shadow_vcpu)
+		goto free_vcpu3s;
+#endif
+
+	p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
+	if (!p)
+		goto free_shadow_vcpu;
+	vcpu->arch.shared = (void *)p;
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* Always start the shared struct in native endian mode */
+#ifdef __BIG_ENDIAN__
+        vcpu->arch.shared_big_endian = true;
+#else
+        vcpu->arch.shared_big_endian = false;
+#endif
+
+	/*
+	 * Default to the same as the host if we're on sufficiently
+	 * recent machine that we have 1TB segments;
+	 * otherwise default to PPC970FX.
+	 */
+	vcpu->arch.pvr = 0x3C0301;
+	if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+		vcpu->arch.pvr = mfspr(SPRN_PVR);
+	vcpu->arch.intr_msr = MSR_SF;
+#else
+	/* default to book3s_32 (750) */
+	vcpu->arch.pvr = 0x84202;
+	vcpu->arch.intr_msr = 0;
+#endif
+	kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
+	vcpu->arch.slb_nr = 64;
+
+	vcpu->arch.shadow_msr = MSR_USER64 & ~MSR_LE;
+
+	err = kvmppc_mmu_init_pr(vcpu);
+	if (err < 0)
+		goto free_shared_page;
+
+	return 0;
+
+free_shared_page:
+	free_page((unsigned long)vcpu->arch.shared);
+free_shadow_vcpu:
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+	kfree(vcpu->arch.shadow_vcpu);
+free_vcpu3s:
+#endif
+	vfree(vcpu_book3s);
+out:
+	return err;
+}
+
+static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+
+	kvmppc_mmu_destroy_pr(vcpu);
+	free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+	kfree(vcpu->arch.shadow_vcpu);
+#endif
+	vfree(vcpu_book3s);
+}
+
+static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu)
+{
+	int ret;
+
+	/* Check if we can run the vcpu at all */
+	if (!vcpu->arch.sane) {
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		ret = -EINVAL;
+		goto out;
+	}
+
+	kvmppc_setup_debug(vcpu);
+
+	/*
+	 * Interrupts could be timers for the guest which we have to inject
+	 * again, so let's postpone them until we're in the guest and if we
+	 * really did time things so badly, then we just exit again due to
+	 * a host external interrupt.
+	 */
+	ret = kvmppc_prepare_to_enter(vcpu);
+	if (ret <= 0)
+		goto out;
+	/* interrupts now hard-disabled */
+
+	/* Save FPU, Altivec and VSX state */
+	giveup_all(current);
+
+	/* Preload FPU if it's enabled */
+	if (kvmppc_get_msr(vcpu) & MSR_FP)
+		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
+
+	kvmppc_fix_ee_before_entry();
+
+	ret = __kvmppc_vcpu_run(vcpu);
+
+	kvmppc_clear_debug(vcpu);
+
+	/* No need for guest_exit. It's done in handle_exit.
+	   We also get here with interrupts enabled. */
+
+	/* Make sure we save the guest FPU/Altivec/VSX state */
+	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
+
+	/* Make sure we save the guest TAR/EBB/DSCR state */
+	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+
+	srr_regs_clobbered();
+out:
+	vcpu->mode = OUTSIDE_GUEST_MODE;
+	return ret;
+}
+
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
+					 struct kvm_dirty_log *log)
+{
+	struct kvm_memory_slot *memslot;
+	struct kvm_vcpu *vcpu;
+	ulong ga, ga_end;
+	int is_dirty = 0;
+	int r;
+	unsigned long n;
+
+	mutex_lock(&kvm->slots_lock);
+
+	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
+	if (r)
+		goto out;
+
+	/* If nothing is dirty, don't bother messing with page tables. */
+	if (is_dirty) {
+		ga = memslot->base_gfn << PAGE_SHIFT;
+		ga_end = ga + (memslot->npages << PAGE_SHIFT);
+
+		kvm_for_each_vcpu(n, vcpu, kvm)
+			kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
+
+		n = kvm_dirty_bitmap_bytes(memslot);
+		memset(memslot->dirty_bitmap, 0, n);
+	}
+
+	r = 0;
+out:
+	mutex_unlock(&kvm->slots_lock);
+	return r;
+}
+
+static void kvmppc_core_flush_memslot_pr(struct kvm *kvm,
+					 struct kvm_memory_slot *memslot)
+{
+	return;
+}
+
+static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
+				const struct kvm_memory_slot *old,
+				struct kvm_memory_slot *new,
+				enum kvm_mr_change change)
+{
+	return 0;
+}
+
+static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
+				struct kvm_memory_slot *old,
+				const struct kvm_memory_slot *new,
+				enum kvm_mr_change change)
+{
+	return;
+}
+
+static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *slot)
+{
+	return;
+}
+
+#ifdef CONFIG_PPC64
+static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
+					 struct kvm_ppc_smmu_info *info)
+{
+	long int i;
+	struct kvm_vcpu *vcpu;
+
+	info->flags = 0;
+
+	/* SLB is always 64 entries */
+	info->slb_size = 64;
+
+	/* Standard 4k base page size segment */
+	info->sps[0].page_shift = 12;
+	info->sps[0].slb_enc = 0;
+	info->sps[0].enc[0].page_shift = 12;
+	info->sps[0].enc[0].pte_enc = 0;
+
+	/*
+	 * 64k large page size.
+	 * We only want to put this in if the CPUs we're emulating
+	 * support it, but unfortunately we don't have a vcpu easily
+	 * to hand here to test.  Just pick the first vcpu, and if
+	 * that doesn't exist yet, report the minimum capability,
+	 * i.e., no 64k pages.
+	 * 1T segment support goes along with 64k pages.
+	 */
+	i = 1;
+	vcpu = kvm_get_vcpu(kvm, 0);
+	if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
+		info->flags = KVM_PPC_1T_SEGMENTS;
+		info->sps[i].page_shift = 16;
+		info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01;
+		info->sps[i].enc[0].page_shift = 16;
+		info->sps[i].enc[0].pte_enc = 1;
+		++i;
+	}
+
+	/* Standard 16M large page size segment */
+	info->sps[i].page_shift = 24;
+	info->sps[i].slb_enc = SLB_VSID_L;
+	info->sps[i].enc[0].page_shift = 24;
+	info->sps[i].enc[0].pte_enc = 0;
+
+	return 0;
+}
+
+static int kvm_configure_mmu_pr(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		return -ENODEV;
+	/* Require flags and process table base and size to all be zero. */
+	if (cfg->flags || cfg->process_table)
+		return -EINVAL;
+	return 0;
+}
+
+#else
+static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
+					 struct kvm_ppc_smmu_info *info)
+{
+	/* We should not get called */
+	BUG();
+	return 0;
+}
+#endif /* CONFIG_PPC64 */
+
+static unsigned int kvm_global_user_count = 0;
+static DEFINE_SPINLOCK(kvm_global_user_count_lock);
+
+static int kvmppc_core_init_vm_pr(struct kvm *kvm)
+{
+	mutex_init(&kvm->arch.hpt_mutex);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* Start out with the default set of hcalls enabled */
+	kvmppc_pr_init_default_hcalls(kvm);
+#endif
+
+	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+		spin_lock(&kvm_global_user_count_lock);
+		if (++kvm_global_user_count == 1)
+			pseries_disable_reloc_on_exc();
+		spin_unlock(&kvm_global_user_count_lock);
+	}
+	return 0;
+}
+
+static void kvmppc_core_destroy_vm_pr(struct kvm *kvm)
+{
+#ifdef CONFIG_PPC64
+	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
+#endif
+
+	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+		spin_lock(&kvm_global_user_count_lock);
+		BUG_ON(kvm_global_user_count == 0);
+		if (--kvm_global_user_count == 0)
+			pseries_enable_reloc_on_exc();
+		spin_unlock(&kvm_global_user_count_lock);
+	}
+}
+
+static int kvmppc_core_check_processor_compat_pr(void)
+{
+	/*
+	 * PR KVM can work on POWER9 inside a guest partition
+	 * running in HPT mode.  It can't work if we are using
+	 * radix translation (because radix provides no way for
+	 * a process to have unique translations in quadrant 3).
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled())
+		return -EIO;
+	return 0;
+}
+
+static int kvm_arch_vm_ioctl_pr(struct file *filp,
+				unsigned int ioctl, unsigned long arg)
+{
+	return -ENOTTY;
+}
+
+static struct kvmppc_ops kvm_ops_pr = {
+	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr,
+	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr,
+	.get_one_reg = kvmppc_get_one_reg_pr,
+	.set_one_reg = kvmppc_set_one_reg_pr,
+	.vcpu_load   = kvmppc_core_vcpu_load_pr,
+	.vcpu_put    = kvmppc_core_vcpu_put_pr,
+	.inject_interrupt = kvmppc_inject_interrupt_pr,
+	.set_msr     = kvmppc_set_msr_pr,
+	.vcpu_run    = kvmppc_vcpu_run_pr,
+	.vcpu_create = kvmppc_core_vcpu_create_pr,
+	.vcpu_free   = kvmppc_core_vcpu_free_pr,
+	.check_requests = kvmppc_core_check_requests_pr,
+	.get_dirty_log = kvm_vm_ioctl_get_dirty_log_pr,
+	.flush_memslot = kvmppc_core_flush_memslot_pr,
+	.prepare_memory_region = kvmppc_core_prepare_memory_region_pr,
+	.commit_memory_region = kvmppc_core_commit_memory_region_pr,
+	.unmap_gfn_range = kvm_unmap_gfn_range_pr,
+	.age_gfn  = kvm_age_gfn_pr,
+	.test_age_gfn = kvm_test_age_gfn_pr,
+	.set_spte_gfn = kvm_set_spte_gfn_pr,
+	.free_memslot = kvmppc_core_free_memslot_pr,
+	.init_vm = kvmppc_core_init_vm_pr,
+	.destroy_vm = kvmppc_core_destroy_vm_pr,
+	.get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr,
+	.emulate_op = kvmppc_core_emulate_op_pr,
+	.emulate_mtspr = kvmppc_core_emulate_mtspr_pr,
+	.emulate_mfspr = kvmppc_core_emulate_mfspr_pr,
+	.fast_vcpu_kick = kvm_vcpu_kick,
+	.arch_vm_ioctl  = kvm_arch_vm_ioctl_pr,
+#ifdef CONFIG_PPC_BOOK3S_64
+	.hcall_implemented = kvmppc_hcall_impl_pr,
+	.configure_mmu = kvm_configure_mmu_pr,
+#endif
+	.giveup_ext = kvmppc_giveup_ext,
+};
+
+
+int kvmppc_book3s_init_pr(void)
+{
+	int r;
+
+	r = kvmppc_core_check_processor_compat_pr();
+	if (r < 0)
+		return r;
+
+	kvm_ops_pr.owner = THIS_MODULE;
+	kvmppc_pr_ops = &kvm_ops_pr;
+
+	r = kvmppc_mmu_hpte_sysinit();
+	return r;
+}
+
+void kvmppc_book3s_exit_pr(void)
+{
+	kvmppc_pr_ops = NULL;
+	kvmppc_mmu_hpte_sysexit();
+}
+
+/*
+ * We only support separate modules for book3s 64
+ */
+#ifdef CONFIG_PPC_BOOK3S_64
+
+module_init(kvmppc_book3s_init_pr);
+module_exit(kvmppc_book3s_exit_pr);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
+#endif
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
new file mode 100644
index 0000000000..b2c89e850d
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -0,0 +1,496 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2011. Freescale Inc. All rights reserved.
+ *
+ * Authors:
+ *    Alexander Graf <agraf@suse.de>
+ *    Paul Mackerras <paulus@samba.org>
+ *
+ * Description:
+ *
+ * Hypercall handling for running PAPR guests in PR KVM on Book 3S
+ * processors.
+ */
+
+#include <linux/anon_inodes.h>
+
+#include <linux/uaccess.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+
+#define HPTE_SIZE	16		/* bytes per HPT entry */
+
+static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+	unsigned long pteg_addr;
+
+	pte_index <<= 4;
+	pte_index &= ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1) << 7 | 0x70;
+	pteg_addr = vcpu_book3s->sdr1 & 0xfffffffffffc0000ULL;
+	pteg_addr |= pte_index;
+
+	return pteg_addr;
+}
+
+static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
+{
+	long flags = kvmppc_get_gpr(vcpu, 4);
+	long pte_index = kvmppc_get_gpr(vcpu, 5);
+	__be64 pteg[2 * 8];
+	__be64 *hpte;
+	unsigned long pteg_addr, i;
+	long int ret;
+
+	i = pte_index & 7;
+	pte_index &= ~7UL;
+	pteg_addr = get_pteg_addr(vcpu, pte_index);
+
+	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
+	ret = H_FUNCTION;
+	if (copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg)))
+		goto done;
+	hpte = pteg;
+
+	ret = H_PTEG_FULL;
+	if (likely((flags & H_EXACT) == 0)) {
+		for (i = 0; ; ++i) {
+			if (i == 8)
+				goto done;
+			if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0)
+				break;
+			hpte += 2;
+		}
+	} else {
+		hpte += i * 2;
+		if (*hpte & HPTE_V_VALID)
+			goto done;
+	}
+
+	hpte[0] = cpu_to_be64(kvmppc_get_gpr(vcpu, 6));
+	hpte[1] = cpu_to_be64(kvmppc_get_gpr(vcpu, 7));
+	pteg_addr += i * HPTE_SIZE;
+	ret = H_FUNCTION;
+	if (copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE))
+		goto done;
+	kvmppc_set_gpr(vcpu, 4, pte_index | i);
+	ret = H_SUCCESS;
+
+ done:
+	mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
+	kvmppc_set_gpr(vcpu, 3, ret);
+
+	return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
+{
+	unsigned long flags= kvmppc_get_gpr(vcpu, 4);
+	unsigned long pte_index = kvmppc_get_gpr(vcpu, 5);
+	unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
+	unsigned long v = 0, pteg, rb;
+	unsigned long pte[2];
+	long int ret;
+
+	pteg = get_pteg_addr(vcpu, pte_index);
+	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
+	ret = H_FUNCTION;
+	if (copy_from_user(pte, (void __user *)pteg, sizeof(pte)))
+		goto done;
+	pte[0] = be64_to_cpu((__force __be64)pte[0]);
+	pte[1] = be64_to_cpu((__force __be64)pte[1]);
+
+	ret = H_NOT_FOUND;
+	if ((pte[0] & HPTE_V_VALID) == 0 ||
+	    ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) ||
+	    ((flags & H_ANDCOND) && (pte[0] & avpn) != 0))
+		goto done;
+
+	ret = H_FUNCTION;
+	if (copy_to_user((void __user *)pteg, &v, sizeof(v)))
+		goto done;
+
+	rb = compute_tlbie_rb(pte[0], pte[1], pte_index);
+	vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
+
+	ret = H_SUCCESS;
+	kvmppc_set_gpr(vcpu, 4, pte[0]);
+	kvmppc_set_gpr(vcpu, 5, pte[1]);
+
+ done:
+	mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
+	kvmppc_set_gpr(vcpu, 3, ret);
+
+	return EMULATE_DONE;
+}
+
+/* Request defs for kvmppc_h_pr_bulk_remove() */
+#define H_BULK_REMOVE_TYPE             0xc000000000000000ULL
+#define   H_BULK_REMOVE_REQUEST        0x4000000000000000ULL
+#define   H_BULK_REMOVE_RESPONSE       0x8000000000000000ULL
+#define   H_BULK_REMOVE_END            0xc000000000000000ULL
+#define H_BULK_REMOVE_CODE             0x3000000000000000ULL
+#define   H_BULK_REMOVE_SUCCESS        0x0000000000000000ULL
+#define   H_BULK_REMOVE_NOT_FOUND      0x1000000000000000ULL
+#define   H_BULK_REMOVE_PARM           0x2000000000000000ULL
+#define   H_BULK_REMOVE_HW             0x3000000000000000ULL
+#define H_BULK_REMOVE_RC               0x0c00000000000000ULL
+#define H_BULK_REMOVE_FLAGS            0x0300000000000000ULL
+#define   H_BULK_REMOVE_ABSOLUTE       0x0000000000000000ULL
+#define   H_BULK_REMOVE_ANDCOND        0x0100000000000000ULL
+#define   H_BULK_REMOVE_AVPN           0x0200000000000000ULL
+#define H_BULK_REMOVE_PTEX             0x00ffffffffffffffULL
+#define H_BULK_REMOVE_MAX_BATCH        4
+
+static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
+{
+	int i;
+	int paramnr = 4;
+	int ret = H_SUCCESS;
+
+	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
+	for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
+		unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i));
+		unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1);
+		unsigned long pteg, rb, flags;
+		unsigned long pte[2];
+		unsigned long v = 0;
+
+		if ((tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) {
+			break; /* Exit success */
+		} else if ((tsh & H_BULK_REMOVE_TYPE) !=
+			   H_BULK_REMOVE_REQUEST) {
+			ret = H_PARAMETER;
+			break; /* Exit fail */
+		}
+
+		tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS;
+		tsh |= H_BULK_REMOVE_RESPONSE;
+
+		if ((tsh & H_BULK_REMOVE_ANDCOND) &&
+		    (tsh & H_BULK_REMOVE_AVPN)) {
+			tsh |= H_BULK_REMOVE_PARM;
+			kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh);
+			ret = H_PARAMETER;
+			break; /* Exit fail */
+		}
+
+		pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX);
+		if (copy_from_user(pte, (void __user *)pteg, sizeof(pte))) {
+			ret = H_FUNCTION;
+			break;
+		}
+		pte[0] = be64_to_cpu((__force __be64)pte[0]);
+		pte[1] = be64_to_cpu((__force __be64)pte[1]);
+
+		/* tsl = AVPN */
+		flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26;
+
+		if ((pte[0] & HPTE_V_VALID) == 0 ||
+		    ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != tsl) ||
+		    ((flags & H_ANDCOND) && (pte[0] & tsl) != 0)) {
+			tsh |= H_BULK_REMOVE_NOT_FOUND;
+		} else {
+			/* Splat the pteg in (userland) hpt */
+			if (copy_to_user((void __user *)pteg, &v, sizeof(v))) {
+				ret = H_FUNCTION;
+				break;
+			}
+
+			rb = compute_tlbie_rb(pte[0], pte[1],
+					      tsh & H_BULK_REMOVE_PTEX);
+			vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
+			tsh |= H_BULK_REMOVE_SUCCESS;
+			tsh |= (pte[1] & (HPTE_R_C | HPTE_R_R)) << 43;
+		}
+		kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh);
+	}
+	mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
+	kvmppc_set_gpr(vcpu, 3, ret);
+
+	return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
+{
+	unsigned long flags = kvmppc_get_gpr(vcpu, 4);
+	unsigned long pte_index = kvmppc_get_gpr(vcpu, 5);
+	unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
+	unsigned long rb, pteg, r, v;
+	unsigned long pte[2];
+	long int ret;
+
+	pteg = get_pteg_addr(vcpu, pte_index);
+	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
+	ret = H_FUNCTION;
+	if (copy_from_user(pte, (void __user *)pteg, sizeof(pte)))
+		goto done;
+	pte[0] = be64_to_cpu((__force __be64)pte[0]);
+	pte[1] = be64_to_cpu((__force __be64)pte[1]);
+
+	ret = H_NOT_FOUND;
+	if ((pte[0] & HPTE_V_VALID) == 0 ||
+	    ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn))
+		goto done;
+
+	v = pte[0];
+	r = pte[1];
+	r &= ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_HI |
+	       HPTE_R_KEY_LO);
+	r |= (flags << 55) & HPTE_R_PP0;
+	r |= (flags << 48) & HPTE_R_KEY_HI;
+	r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
+
+	pte[1] = r;
+
+	rb = compute_tlbie_rb(v, r, pte_index);
+	vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
+	pte[0] = (__force u64)cpu_to_be64(pte[0]);
+	pte[1] = (__force u64)cpu_to_be64(pte[1]);
+	ret = H_FUNCTION;
+	if (copy_to_user((void __user *)pteg, pte, sizeof(pte)))
+		goto done;
+	ret = H_SUCCESS;
+
+ done:
+	mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
+	kvmppc_set_gpr(vcpu, 3, ret);
+
+	return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_logical_ci_load(struct kvm_vcpu *vcpu)
+{
+	long rc;
+
+	rc = kvmppc_h_logical_ci_load(vcpu);
+	if (rc == H_TOO_HARD)
+		return EMULATE_FAIL;
+	kvmppc_set_gpr(vcpu, 3, rc);
+	return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
+{
+	long rc;
+
+	rc = kvmppc_h_logical_ci_store(vcpu);
+	if (rc == H_TOO_HARD)
+		return EMULATE_FAIL;
+	kvmppc_set_gpr(vcpu, 3, rc);
+	return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_set_mode(struct kvm_vcpu *vcpu)
+{
+	unsigned long mflags = kvmppc_get_gpr(vcpu, 4);
+	unsigned long resource = kvmppc_get_gpr(vcpu, 5);
+
+	if (resource == H_SET_MODE_RESOURCE_ADDR_TRANS_MODE) {
+		/* KVM PR does not provide AIL!=0 to guests */
+		if (mflags == 0)
+			kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+		else
+			kvmppc_set_gpr(vcpu, 3, H_UNSUPPORTED_FLAG_START - 63);
+		return EMULATE_DONE;
+	}
+	return EMULATE_FAIL;
+}
+
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
+{
+	unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
+	unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
+	unsigned long tce = kvmppc_get_gpr(vcpu, 6);
+	long rc;
+
+	rc = kvmppc_h_put_tce(vcpu, liobn, ioba, tce);
+	if (rc == H_TOO_HARD)
+		return EMULATE_FAIL;
+	kvmppc_set_gpr(vcpu, 3, rc);
+	return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_put_tce_indirect(struct kvm_vcpu *vcpu)
+{
+	unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
+	unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
+	unsigned long tce = kvmppc_get_gpr(vcpu, 6);
+	unsigned long npages = kvmppc_get_gpr(vcpu, 7);
+	long rc;
+
+	rc = kvmppc_h_put_tce_indirect(vcpu, liobn, ioba,
+			tce, npages);
+	if (rc == H_TOO_HARD)
+		return EMULATE_FAIL;
+	kvmppc_set_gpr(vcpu, 3, rc);
+	return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu)
+{
+	unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
+	unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
+	unsigned long tce_value = kvmppc_get_gpr(vcpu, 6);
+	unsigned long npages = kvmppc_get_gpr(vcpu, 7);
+	long rc;
+
+	rc = kvmppc_h_stuff_tce(vcpu, liobn, ioba, tce_value, npages);
+	if (rc == H_TOO_HARD)
+		return EMULATE_FAIL;
+	kvmppc_set_gpr(vcpu, 3, rc);
+	return EMULATE_DONE;
+}
+
+#else /* CONFIG_SPAPR_TCE_IOMMU */
+static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
+{
+	return EMULATE_FAIL;
+}
+
+static int kvmppc_h_pr_put_tce_indirect(struct kvm_vcpu *vcpu)
+{
+	return EMULATE_FAIL;
+}
+
+static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu)
+{
+	return EMULATE_FAIL;
+}
+#endif /* CONFIG_SPAPR_TCE_IOMMU */
+
+static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
+{
+	long rc = kvmppc_xics_hcall(vcpu, cmd);
+	kvmppc_set_gpr(vcpu, 3, rc);
+	return EMULATE_DONE;
+}
+
+int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
+{
+	int rc, idx;
+
+	if (cmd <= MAX_HCALL_OPCODE &&
+	    !test_bit(cmd/4, vcpu->kvm->arch.enabled_hcalls))
+		return EMULATE_FAIL;
+
+	switch (cmd) {
+	case H_ENTER:
+		return kvmppc_h_pr_enter(vcpu);
+	case H_REMOVE:
+		return kvmppc_h_pr_remove(vcpu);
+	case H_PROTECT:
+		return kvmppc_h_pr_protect(vcpu);
+	case H_BULK_REMOVE:
+		return kvmppc_h_pr_bulk_remove(vcpu);
+	case H_PUT_TCE:
+		return kvmppc_h_pr_put_tce(vcpu);
+	case H_PUT_TCE_INDIRECT:
+		return kvmppc_h_pr_put_tce_indirect(vcpu);
+	case H_STUFF_TCE:
+		return kvmppc_h_pr_stuff_tce(vcpu);
+	case H_CEDE:
+		kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
+		kvm_vcpu_halt(vcpu);
+		vcpu->stat.generic.halt_wakeup++;
+		return EMULATE_DONE;
+	case H_LOGICAL_CI_LOAD:
+		return kvmppc_h_pr_logical_ci_load(vcpu);
+	case H_LOGICAL_CI_STORE:
+		return kvmppc_h_pr_logical_ci_store(vcpu);
+	case H_SET_MODE:
+		return kvmppc_h_pr_set_mode(vcpu);
+	case H_XIRR:
+	case H_CPPR:
+	case H_EOI:
+	case H_IPI:
+	case H_IPOLL:
+	case H_XIRR_X:
+		if (kvmppc_xics_enabled(vcpu))
+			return kvmppc_h_pr_xics_hcall(vcpu, cmd);
+		break;
+	case H_RTAS:
+		if (list_empty(&vcpu->kvm->arch.rtas_tokens))
+			break;
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+		rc = kvmppc_rtas_hcall(vcpu);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
+		if (rc)
+			break;
+		kvmppc_set_gpr(vcpu, 3, 0);
+		return EMULATE_DONE;
+	}
+
+	return EMULATE_FAIL;
+}
+
+int kvmppc_hcall_impl_pr(unsigned long cmd)
+{
+	switch (cmd) {
+	case H_ENTER:
+	case H_REMOVE:
+	case H_PROTECT:
+	case H_BULK_REMOVE:
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+	case H_GET_TCE:
+	case H_PUT_TCE:
+	case H_PUT_TCE_INDIRECT:
+	case H_STUFF_TCE:
+#endif
+	case H_CEDE:
+	case H_LOGICAL_CI_LOAD:
+	case H_LOGICAL_CI_STORE:
+	case H_SET_MODE:
+#ifdef CONFIG_KVM_XICS
+	case H_XIRR:
+	case H_CPPR:
+	case H_EOI:
+	case H_IPI:
+	case H_IPOLL:
+	case H_XIRR_X:
+#endif
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * List of hcall numbers to enable by default.
+ * For compatibility with old userspace, we enable by default
+ * all hcalls that were implemented before the hcall-enabling
+ * facility was added.  Note this list should not include H_RTAS.
+ */
+static unsigned int default_hcall_list[] = {
+	H_ENTER,
+	H_REMOVE,
+	H_PROTECT,
+	H_BULK_REMOVE,
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+	H_GET_TCE,
+	H_PUT_TCE,
+#endif
+	H_CEDE,
+	H_SET_MODE,
+#ifdef CONFIG_KVM_XICS
+	H_XIRR,
+	H_CPPR,
+	H_EOI,
+	H_IPI,
+	H_IPOLL,
+	H_XIRR_X,
+#endif
+	0
+};
+
+void kvmppc_pr_init_default_hcalls(struct kvm *kvm)
+{
+	int i;
+	unsigned int hcall;
+
+	for (i = 0; default_hcall_list[i]; ++i) {
+		hcall = default_hcall_list[i];
+		WARN_ON(!kvmppc_hcall_impl_pr(hcall));
+		__set_bit(hcall / 4, kvm->arch.enabled_hcalls);
+	}
+}
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
new file mode 100644
index 0000000000..0a557ffca9
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+#include <asm/asm-compat.h>
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/exception-64s.h>
+#endif
+
+/*****************************************************************************
+ *                                                                           *
+ *        Real Mode handlers that need to be in low physical memory          *
+ *                                                                           *
+ ****************************************************************************/
+
+#if defined(CONFIG_PPC_BOOK3S_64)
+
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+#define FUNC(name) 		name
+#else
+#define FUNC(name) 		GLUE(.,name)
+#endif
+
+#elif defined(CONFIG_PPC_BOOK3S_32)
+
+#define FUNC(name)		name
+
+#define RFI_TO_KERNEL	rfi
+#define RFI_TO_GUEST	rfi
+
+.macro INTERRUPT_TRAMPOLINE intno
+
+.global kvmppc_trampoline_\intno
+kvmppc_trampoline_\intno:
+
+	mtspr	SPRN_SPRG_SCRATCH0, r13		/* Save r13 */
+
+	/*
+	 * First thing to do is to find out if we're coming
+	 * from a KVM guest or a Linux process.
+	 *
+	 * To distinguish, we check a magic byte in the PACA/current
+	 */
+	mfspr	r13, SPRN_SPRG_THREAD
+	lwz	r13, THREAD_KVM_SVCPU(r13)
+	/* PPC32 can have a NULL pointer - let's check for that */
+	mtspr   SPRN_SPRG_SCRATCH1, r12		/* Save r12 */
+	mfcr	r12
+	cmpwi	r13, 0
+	bne	1f
+2:	mtcr	r12
+	mfspr	r12, SPRN_SPRG_SCRATCH1
+	mfspr	r13, SPRN_SPRG_SCRATCH0		/* r13 = original r13 */
+	b	kvmppc_resume_\intno		/* Get back original handler */
+
+1:	tophys(r13, r13)
+	stw	r12, HSTATE_SCRATCH1(r13)
+	mfspr	r12, SPRN_SPRG_SCRATCH1
+	stw	r12, HSTATE_SCRATCH0(r13)
+	lbz	r12, HSTATE_IN_GUEST(r13)
+	cmpwi	r12, KVM_GUEST_MODE_NONE
+	bne	..kvmppc_handler_hasmagic_\intno
+	/* No KVM guest? Then jump back to the Linux handler! */
+	lwz	r12, HSTATE_SCRATCH1(r13)
+	b	2b
+
+	/* Now we know we're handling a KVM guest */
+..kvmppc_handler_hasmagic_\intno:
+
+	/* Should we just skip the faulting instruction? */
+	cmpwi	r12, KVM_GUEST_MODE_SKIP
+	beq	kvmppc_handler_skip_ins
+
+	/* Let's store which interrupt we're handling */
+	li	r12, \intno
+
+	/* Jump into the SLB exit code that goes to the highmem handler */
+	b	kvmppc_handler_trampoline_exit
+
+.endm
+
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_SYSTEM_RESET
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_MACHINE_CHECK
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_DATA_STORAGE
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_INST_STORAGE
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_EXTERNAL
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_ALIGNMENT
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_PROGRAM
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_FP_UNAVAIL
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_DECREMENTER
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_SYSCALL
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_TRACE
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_PERFMON
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_ALTIVEC
+
+/*
+ * Bring us back to the faulting code, but skip the
+ * faulting instruction.
+ *
+ * This is a generic exit path from the interrupt
+ * trampolines above.
+ *
+ * Input Registers:
+ *
+ * R12            = free
+ * R13            = Shadow VCPU (PACA)
+ * HSTATE.SCRATCH0 = guest R12
+ * HSTATE.SCRATCH1 = guest CR
+ * SPRG_SCRATCH0  = guest R13
+ *
+ */
+kvmppc_handler_skip_ins:
+
+	/* Patch the IP to the next instruction */
+	/* Note that prefixed instructions are disabled in PR KVM for now */
+	mfsrr0	r12
+	addi	r12, r12, 4
+	mtsrr0	r12
+
+	/* Clean up all state */
+	lwz	r12, HSTATE_SCRATCH1(r13)
+	mtcr	r12
+	PPC_LL	r12, HSTATE_SCRATCH0(r13)
+	GET_SCRATCH0(r13)
+
+	/* And get back into the code */
+	RFI_TO_KERNEL
+#endif
+
+/*
+ * Call kvmppc_handler_trampoline_enter in real mode
+ *
+ * On entry, r4 contains the guest shadow MSR
+ * MSR.EE has to be 0 when calling this function
+ */
+_GLOBAL_TOC(kvmppc_entry_trampoline)
+	mfmsr	r5
+	LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)
+	toreal(r7)
+
+	li	r6, MSR_IR | MSR_DR
+	andc	r6, r5, r6	/* Clear DR and IR in MSR value */
+	/*
+	 * Set EE in HOST_MSR so that it's enabled when we get into our
+	 * C exit handler function.
+	 */
+	ori	r5, r5, MSR_EE
+	mtsrr0	r7
+	mtsrr1	r6
+	RFI_TO_KERNEL
+
+#include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
new file mode 100644
index 0000000000..6808bda0db
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/err.h>
+
+#include <linux/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/rtas.h>
+#include <asm/xive.h>
+
+#ifdef CONFIG_KVM_XICS
+static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+	u32 irq, server, priority;
+	int rc;
+
+	if (be32_to_cpu(args->nargs) != 3 || be32_to_cpu(args->nret) != 1) {
+		rc = -3;
+		goto out;
+	}
+
+	irq = be32_to_cpu(args->args[0]);
+	server = be32_to_cpu(args->args[1]);
+	priority = be32_to_cpu(args->args[2]);
+
+	if (xics_on_xive())
+		rc = kvmppc_xive_set_xive(vcpu->kvm, irq, server, priority);
+	else
+		rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
+	if (rc)
+		rc = -3;
+out:
+	args->rets[0] = cpu_to_be32(rc);
+}
+
+static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+	u32 irq, server, priority;
+	int rc;
+
+	if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 3) {
+		rc = -3;
+		goto out;
+	}
+
+	irq = be32_to_cpu(args->args[0]);
+
+	server = priority = 0;
+	if (xics_on_xive())
+		rc = kvmppc_xive_get_xive(vcpu->kvm, irq, &server, &priority);
+	else
+		rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
+	if (rc) {
+		rc = -3;
+		goto out;
+	}
+
+	args->rets[1] = cpu_to_be32(server);
+	args->rets[2] = cpu_to_be32(priority);
+out:
+	args->rets[0] = cpu_to_be32(rc);
+}
+
+static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+	u32 irq;
+	int rc;
+
+	if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) {
+		rc = -3;
+		goto out;
+	}
+
+	irq = be32_to_cpu(args->args[0]);
+
+	if (xics_on_xive())
+		rc = kvmppc_xive_int_off(vcpu->kvm, irq);
+	else
+		rc = kvmppc_xics_int_off(vcpu->kvm, irq);
+	if (rc)
+		rc = -3;
+out:
+	args->rets[0] = cpu_to_be32(rc);
+}
+
+static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+	u32 irq;
+	int rc;
+
+	if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) {
+		rc = -3;
+		goto out;
+	}
+
+	irq = be32_to_cpu(args->args[0]);
+
+	if (xics_on_xive())
+		rc = kvmppc_xive_int_on(vcpu->kvm, irq);
+	else
+		rc = kvmppc_xics_int_on(vcpu->kvm, irq);
+	if (rc)
+		rc = -3;
+out:
+	args->rets[0] = cpu_to_be32(rc);
+}
+#endif /* CONFIG_KVM_XICS */
+
+struct rtas_handler {
+	void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
+	char *name;
+};
+
+static struct rtas_handler rtas_handlers[] = {
+#ifdef CONFIG_KVM_XICS
+	{ .name = "ibm,set-xive", .handler = kvm_rtas_set_xive },
+	{ .name = "ibm,get-xive", .handler = kvm_rtas_get_xive },
+	{ .name = "ibm,int-off",  .handler = kvm_rtas_int_off },
+	{ .name = "ibm,int-on",   .handler = kvm_rtas_int_on },
+#endif
+};
+
+struct rtas_token_definition {
+	struct list_head list;
+	struct rtas_handler *handler;
+	u64 token;
+};
+
+static int rtas_name_matches(char *s1, char *s2)
+{
+	struct kvm_rtas_token_args args;
+	return !strncmp(s1, s2, sizeof(args.name));
+}
+
+static int rtas_token_undefine(struct kvm *kvm, char *name)
+{
+	struct rtas_token_definition *d, *tmp;
+
+	lockdep_assert_held(&kvm->arch.rtas_token_lock);
+
+	list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
+		if (rtas_name_matches(d->handler->name, name)) {
+			list_del(&d->list);
+			kfree(d);
+			return 0;
+		}
+	}
+
+	/* It's not an error to undefine an undefined token */
+	return 0;
+}
+
+static int rtas_token_define(struct kvm *kvm, char *name, u64 token)
+{
+	struct rtas_token_definition *d;
+	struct rtas_handler *h = NULL;
+	bool found;
+	int i;
+
+	lockdep_assert_held(&kvm->arch.rtas_token_lock);
+
+	list_for_each_entry(d, &kvm->arch.rtas_tokens, list) {
+		if (d->token == token)
+			return -EEXIST;
+	}
+
+	found = false;
+	for (i = 0; i < ARRAY_SIZE(rtas_handlers); i++) {
+		h = &rtas_handlers[i];
+		if (rtas_name_matches(h->name, name)) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found)
+		return -ENOENT;
+
+	d = kzalloc(sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->handler = h;
+	d->token = token;
+
+	list_add_tail(&d->list, &kvm->arch.rtas_tokens);
+
+	return 0;
+}
+
+int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp)
+{
+	struct kvm_rtas_token_args args;
+	int rc;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+
+	mutex_lock(&kvm->arch.rtas_token_lock);
+
+	if (args.token)
+		rc = rtas_token_define(kvm, args.name, args.token);
+	else
+		rc = rtas_token_undefine(kvm, args.name);
+
+	mutex_unlock(&kvm->arch.rtas_token_lock);
+
+	return rc;
+}
+
+int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
+{
+	struct rtas_token_definition *d;
+	struct rtas_args args;
+	rtas_arg_t *orig_rets;
+	gpa_t args_phys;
+	int rc;
+
+	/*
+	 * r4 contains the guest physical address of the RTAS args
+	 * Mask off the top 4 bits since this is a guest real address
+	 */
+	args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM;
+
+	kvm_vcpu_srcu_read_lock(vcpu);
+	rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
+	kvm_vcpu_srcu_read_unlock(vcpu);
+	if (rc)
+		goto fail;
+
+	/*
+	 * args->rets is a pointer into args->args. Now that we've
+	 * copied args we need to fix it up to point into our copy,
+	 * not the guest args. We also need to save the original
+	 * value so we can restore it on the way out.
+	 */
+	orig_rets = args.rets;
+	if (be32_to_cpu(args.nargs) >= ARRAY_SIZE(args.args)) {
+		/*
+		 * Don't overflow our args array: ensure there is room for
+		 * at least rets[0] (even if the call specifies 0 nret).
+		 *
+		 * Each handler must then check for the correct nargs and nret
+		 * values, but they may always return failure in rets[0].
+		 */
+		rc = -EINVAL;
+		goto fail;
+	}
+	args.rets = &args.args[be32_to_cpu(args.nargs)];
+
+	mutex_lock(&vcpu->kvm->arch.rtas_token_lock);
+
+	rc = -ENOENT;
+	list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) {
+		if (d->token == be32_to_cpu(args.token)) {
+			d->handler->handler(vcpu, &args);
+			rc = 0;
+			break;
+		}
+	}
+
+	mutex_unlock(&vcpu->kvm->arch.rtas_token_lock);
+
+	if (rc == 0) {
+		args.rets = orig_rets;
+		rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args));
+		if (rc)
+			goto fail;
+	}
+
+	return rc;
+
+fail:
+	/*
+	 * We only get here if the guest has called RTAS with a bogus
+	 * args pointer or nargs/nret values that would overflow the
+	 * array. That means we can't get to the args, and so we can't
+	 * fail the RTAS call. So fail right out to userspace, which
+	 * should kill the guest.
+	 *
+	 * SLOF should actually pass the hcall return value from the
+	 * rtas handler call in r3, so enter_rtas could be modified to
+	 * return a failure indication in r3 and we could return such
+	 * errors to the guest rather than failing to host userspace.
+	 * However old guests that don't test for failure could then
+	 * continue silently after errors, so for now we won't do this.
+	 */
+	return rc;
+}
+EXPORT_SYMBOL_GPL(kvmppc_rtas_hcall);
+
+void kvmppc_rtas_tokens_free(struct kvm *kvm)
+{
+	struct rtas_token_definition *d, *tmp;
+
+	list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
+		list_del(&d->list);
+		kfree(d);
+	}
+}
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
new file mode 100644
index 0000000000..202046a83f
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -0,0 +1,412 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright SUSE Linux Products GmbH 2010
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+/* Real mode helpers */
+
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+
+#if defined(CONFIG_PPC_BOOK3S_64)
+
+#define GET_SHADOW_VCPU(reg)    \
+	mr	reg, r13
+
+#elif defined(CONFIG_PPC_BOOK3S_32)
+
+#define GET_SHADOW_VCPU(reg)    			\
+	tophys(reg, r2);       			\
+	lwz     reg, (THREAD + THREAD_KVM_SVCPU)(reg);	\
+	tophys(reg, reg)
+
+#endif
+
+/* Disable for nested KVM */
+#define USE_QUICK_LAST_INST
+
+
+/* Get helper functions for subarch specific functionality */
+
+#if defined(CONFIG_PPC_BOOK3S_64)
+#include "book3s_64_slb.S"
+#elif defined(CONFIG_PPC_BOOK3S_32)
+#include "book3s_32_sr.S"
+#endif
+
+/******************************************************************************
+ *                                                                            *
+ *                               Entry code                                   *
+ *                                                                            *
+ *****************************************************************************/
+
+.global kvmppc_handler_trampoline_enter
+kvmppc_handler_trampoline_enter:
+
+	/* Required state:
+	 *
+	 * MSR = ~IR|DR
+	 * R1 = host R1
+	 * R2 = host R2
+	 * R4 = guest shadow MSR
+	 * R5 = normal host MSR
+	 * R6 = current host MSR (EE, IR, DR off)
+	 * LR = highmem guest exit code
+	 * all other volatile GPRS = free
+	 * SVCPU[CR] = guest CR
+	 * SVCPU[XER] = guest XER
+	 * SVCPU[CTR] = guest CTR
+	 * SVCPU[LR] = guest LR
+	 */
+
+	/* r3 = shadow vcpu */
+	GET_SHADOW_VCPU(r3)
+
+	/* Save guest exit handler address and MSR */
+	mflr	r0
+	PPC_STL	r0, HSTATE_VMHANDLER(r3)
+	PPC_STL	r5, HSTATE_HOST_MSR(r3)
+
+	/* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */
+	PPC_STL	r1, HSTATE_HOST_R1(r3)
+	PPC_STL	r2, HSTATE_HOST_R2(r3)
+
+	/* Activate guest mode, so faults get handled by KVM */
+	li	r11, KVM_GUEST_MODE_GUEST
+	stb	r11, HSTATE_IN_GUEST(r3)
+
+	/* Switch to guest segment. This is subarch specific. */
+	LOAD_GUEST_SEGMENTS
+
+#ifdef CONFIG_PPC_BOOK3S_64
+BEGIN_FTR_SECTION
+	/* Save host FSCR */
+	mfspr	r8, SPRN_FSCR
+	std	r8, HSTATE_HOST_FSCR(r13)
+	/* Set FSCR during guest execution */
+	ld	r9, SVCPU_SHADOW_FSCR(r13)
+	mtspr	SPRN_FSCR, r9
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+	/* Some guests may need to have dcbz set to 32 byte length.
+	 *
+	 * Usually we ensure that by patching the guest's instructions
+	 * to trap on dcbz and emulate it in the hypervisor.
+	 *
+	 * If we can, we should tell the CPU to use 32 byte dcbz though,
+	 * because that's a lot faster.
+	 */
+	lbz	r0, HSTATE_RESTORE_HID5(r3)
+	cmpwi	r0, 0
+	beq	no_dcbz32_on
+
+	mfspr   r0,SPRN_HID5
+	ori     r0, r0, 0x80		/* XXX HID5_dcbz32 = 0x80 */
+	mtspr   SPRN_HID5,r0
+no_dcbz32_on:
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+	/* Enter guest */
+
+	PPC_LL	r8, SVCPU_CTR(r3)
+	PPC_LL	r9, SVCPU_LR(r3)
+	lwz	r10, SVCPU_CR(r3)
+	PPC_LL	r11, SVCPU_XER(r3)
+
+	mtctr	r8
+	mtlr	r9
+	mtcr	r10
+	mtxer	r11
+
+	/* Move SRR0 and SRR1 into the respective regs */
+	PPC_LL  r9, SVCPU_PC(r3)
+	/* First clear RI in our current MSR value */
+	li	r0, MSR_RI
+	andc	r6, r6, r0
+
+	PPC_LL	r0, SVCPU_R0(r3)
+	PPC_LL	r1, SVCPU_R1(r3)
+	PPC_LL	r2, SVCPU_R2(r3)
+	PPC_LL	r5, SVCPU_R5(r3)
+	PPC_LL	r7, SVCPU_R7(r3)
+	PPC_LL	r8, SVCPU_R8(r3)
+	PPC_LL	r10, SVCPU_R10(r3)
+	PPC_LL	r11, SVCPU_R11(r3)
+	PPC_LL	r12, SVCPU_R12(r3)
+	PPC_LL	r13, SVCPU_R13(r3)
+
+	MTMSR_EERI(r6)
+	mtsrr0	r9
+	mtsrr1	r4
+
+	PPC_LL	r4, SVCPU_R4(r3)
+	PPC_LL	r6, SVCPU_R6(r3)
+	PPC_LL	r9, SVCPU_R9(r3)
+	PPC_LL	r3, (SVCPU_R3)(r3)
+
+	RFI_TO_GUEST
+kvmppc_handler_trampoline_enter_end:
+
+
+
+/******************************************************************************
+ *                                                                            *
+ *                               Exit code                                    *
+ *                                                                            *
+ *****************************************************************************/
+
+.global kvmppc_interrupt_pr
+kvmppc_interrupt_pr:
+	/* 64-bit entry. Register usage at this point:
+	 *
+	 * SPRG_SCRATCH0   = guest R13
+	 * R9              = HSTATE_IN_GUEST
+	 * R12             = (guest CR << 32) | exit handler id
+	 * R13             = PACA
+	 * HSTATE.SCRATCH0 = guest R12
+	 * HSTATE.SCRATCH2 = guest R9
+	 */
+#ifdef CONFIG_PPC64
+	/* Match 32-bit entry */
+	ld	r9,HSTATE_SCRATCH2(r13)
+	rotldi	r12, r12, 32		  /* Flip R12 halves for stw */
+	stw	r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */
+	srdi	r12, r12, 32		  /* shift trap into low half */
+#endif
+
+.global kvmppc_handler_trampoline_exit
+kvmppc_handler_trampoline_exit:
+	/* Register usage at this point:
+	 *
+	 * SPRG_SCRATCH0   = guest R13
+	 * R12             = exit handler id
+	 * R13             = shadow vcpu (32-bit) or PACA (64-bit)
+	 * HSTATE.SCRATCH0 = guest R12
+	 * HSTATE.SCRATCH1 = guest CR
+	 */
+
+	/* Save registers */
+
+	PPC_STL	r0, SVCPU_R0(r13)
+	PPC_STL	r1, SVCPU_R1(r13)
+	PPC_STL	r2, SVCPU_R2(r13)
+	PPC_STL	r3, SVCPU_R3(r13)
+	PPC_STL	r4, SVCPU_R4(r13)
+	PPC_STL	r5, SVCPU_R5(r13)
+	PPC_STL	r6, SVCPU_R6(r13)
+	PPC_STL	r7, SVCPU_R7(r13)
+	PPC_STL	r8, SVCPU_R8(r13)
+	PPC_STL	r9, SVCPU_R9(r13)
+	PPC_STL	r10, SVCPU_R10(r13)
+	PPC_STL	r11, SVCPU_R11(r13)
+
+	/* Restore R1/R2 so we can handle faults */
+	PPC_LL	r1, HSTATE_HOST_R1(r13)
+	PPC_LL	r2, HSTATE_HOST_R2(r13)
+
+	/* Save guest PC and MSR */
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+	andi.	r0, r12, 0x2
+	cmpwi	cr1, r0, 0
+	beq	1f
+	mfspr	r3,SPRN_HSRR0
+	mfspr	r4,SPRN_HSRR1
+	andi.	r12,r12,0x3ffd
+	b	2f
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+#endif
+1:	mfsrr0	r3
+	mfsrr1	r4
+2:
+	PPC_STL	r3, SVCPU_PC(r13)
+	PPC_STL	r4, SVCPU_SHADOW_SRR1(r13)
+
+	/* Get scratch'ed off registers */
+	GET_SCRATCH0(r9)
+	PPC_LL	r8, HSTATE_SCRATCH0(r13)
+	lwz	r7, HSTATE_SCRATCH1(r13)
+
+	PPC_STL	r9, SVCPU_R13(r13)
+	PPC_STL	r8, SVCPU_R12(r13)
+	stw	r7, SVCPU_CR(r13)
+
+	/* Save more register state  */
+
+	mfxer	r5
+	mfdar	r6
+	mfdsisr	r7
+	mfctr	r8
+	mflr	r9
+
+	PPC_STL	r5, SVCPU_XER(r13)
+	PPC_STL	r6, SVCPU_FAULT_DAR(r13)
+	stw	r7, SVCPU_FAULT_DSISR(r13)
+	PPC_STL	r8, SVCPU_CTR(r13)
+	PPC_STL	r9, SVCPU_LR(r13)
+
+	/*
+	 * In order for us to easily get the last instruction,
+	 * we got the #vmexit at, we exploit the fact that the
+	 * virtual layout is still the same here, so we can just
+	 * ld from the guest's PC address
+	 */
+
+	/* We only load the last instruction when it's safe */
+	cmpwi	r12, BOOK3S_INTERRUPT_DATA_STORAGE
+	beq	ld_last_inst
+	cmpwi	r12, BOOK3S_INTERRUPT_PROGRAM
+	beq	ld_last_inst
+	cmpwi	r12, BOOK3S_INTERRUPT_SYSCALL
+	beq	ld_last_prev_inst
+	cmpwi	r12, BOOK3S_INTERRUPT_ALIGNMENT
+	beq-	ld_last_inst
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+	cmpwi	r12, BOOK3S_INTERRUPT_H_EMUL_ASSIST
+	beq-	ld_last_inst
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+BEGIN_FTR_SECTION
+	cmpwi	r12, BOOK3S_INTERRUPT_FAC_UNAVAIL
+	beq-	ld_last_inst
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+#endif
+
+	b	no_ld_last_inst
+
+ld_last_prev_inst:
+	addi	r3, r3, -4
+
+ld_last_inst:
+	/* Save off the guest instruction we're at */
+
+	/* In case lwz faults */
+	li	r0, KVM_INST_FETCH_FAILED
+
+#ifdef USE_QUICK_LAST_INST
+
+	/* Set guest mode to 'jump over instruction' so if lwz faults
+	 * we'll just continue at the next IP. */
+	li	r9, KVM_GUEST_MODE_SKIP
+	stb	r9, HSTATE_IN_GUEST(r13)
+
+	/*    1) enable paging for data */
+	mfmsr	r9
+	ori	r11, r9, MSR_DR			/* Enable paging for data */
+	mtmsr	r11
+	sync
+	/*    2) fetch the instruction */
+	lwz	r0, 0(r3)
+	/*    3) disable paging again */
+	mtmsr	r9
+	sync
+
+#endif
+	stw	r0, SVCPU_LAST_INST(r13)
+
+no_ld_last_inst:
+
+	/* Unset guest mode */
+	li	r9, KVM_GUEST_MODE_NONE
+	stb	r9, HSTATE_IN_GUEST(r13)
+
+	/* Switch back to host MMU */
+	LOAD_HOST_SEGMENTS
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+	lbz	r5, HSTATE_RESTORE_HID5(r13)
+	cmpwi	r5, 0
+	beq	no_dcbz32_off
+
+	li	r4, 0
+	mfspr   r5,SPRN_HID5
+	rldimi  r5,r4,6,56
+	mtspr   SPRN_HID5,r5
+
+no_dcbz32_off:
+
+BEGIN_FTR_SECTION
+	/* Save guest FSCR on a FAC_UNAVAIL interrupt */
+	cmpwi	r12, BOOK3S_INTERRUPT_FAC_UNAVAIL
+	bne+	no_fscr_save
+	mfspr	r7, SPRN_FSCR
+	std	r7, SVCPU_SHADOW_FSCR(r13)
+no_fscr_save:
+	/* Restore host FSCR */
+	ld	r8, HSTATE_HOST_FSCR(r13)
+	mtspr	SPRN_FSCR, r8
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+	/*
+	 * For some interrupts, we need to call the real Linux
+	 * handler, so it can do work for us. This has to happen
+	 * as if the interrupt arrived from the kernel though,
+	 * so let's fake it here where most state is restored.
+	 *
+	 * Having set up SRR0/1 with the address where we want
+	 * to continue with relocation on (potentially in module
+	 * space), we either just go straight there with rfi[d],
+	 * or we jump to an interrupt handler if there is an
+	 * interrupt to be handled first.  In the latter case,
+	 * the rfi[d] at the end of the interrupt handler will
+	 * get us back to where we want to continue.
+	 */
+
+	/* Register usage at this point:
+	 *
+	 * R1       = host R1
+	 * R2       = host R2
+	 * R10      = raw exit handler id
+	 * R12      = exit handler id
+	 * R13      = shadow vcpu (32-bit) or PACA (64-bit)
+	 * SVCPU.*  = guest *
+	 *
+	 */
+
+	PPC_LL	r6, HSTATE_HOST_MSR(r13)
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/*
+	 * We don't want to change MSR[TS] bits via rfi here.
+	 * The actual TM handling logic will be in host with
+	 * recovered DR/IR bits after HSTATE_VMHANDLER.
+	 * And MSR_TM can be enabled in HOST_MSR so rfid may
+	 * not suppress this change and can lead to exception.
+	 * Manually set MSR to prevent TS state change here.
+	 */
+	mfmsr   r7
+	rldicl  r7, r7, 64 - MSR_TS_S_LG, 62
+	rldimi  r6, r7, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+#endif
+	PPC_LL	r8, HSTATE_VMHANDLER(r13)
+
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+	beq	cr1, 1f
+	mtspr	SPRN_HSRR1, r6
+	mtspr	SPRN_HSRR0, r8
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+#endif
+1:	/* Restore host msr -> SRR1 */
+	mtsrr1	r6
+	/* Load highmem handler address */
+	mtsrr0	r8
+
+	/* RFI into the highmem handler, or jump to interrupt handler */
+	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
+	beqa	BOOK3S_INTERRUPT_EXTERNAL
+	cmpwi	r12, BOOK3S_INTERRUPT_DECREMENTER
+	beqa	BOOK3S_INTERRUPT_DECREMENTER
+	cmpwi	r12, BOOK3S_INTERRUPT_PERFMON
+	beqa	BOOK3S_INTERRUPT_PERFMON
+	cmpwi	r12, BOOK3S_INTERRUPT_DOORBELL
+	beqa	BOOK3S_INTERRUPT_DOORBELL
+
+	RFI_TO_KERNEL
+kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
new file mode 100644
index 0000000000..589a8f2571
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -0,0 +1,1507 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/anon_inodes.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/time.h>
+
+#include <linux/seq_file.h>
+
+#include "book3s_xics.h"
+
+#if 1
+#define XICS_DBG(fmt...) do { } while (0)
+#else
+#define XICS_DBG(fmt...) trace_printk(fmt)
+#endif
+
+#define ENABLE_REALMODE	true
+#define DEBUG_REALMODE	false
+
+/*
+ * LOCKING
+ * =======
+ *
+ * Each ICS has a spin lock protecting the information about the IRQ
+ * sources and avoiding simultaneous deliveries of the same interrupt.
+ *
+ * ICP operations are done via a single compare & swap transaction
+ * (most ICP state fits in the union kvmppc_icp_state)
+ */
+
+/*
+ * TODO
+ * ====
+ *
+ * - To speed up resends, keep a bitmap of "resend" set bits in the
+ *   ICS
+ *
+ * - Speed up server# -> ICP lookup (array ? hash table ?)
+ *
+ * - Make ICS lockless as well, or at least a per-interrupt lock or hashed
+ *   locks array to improve scalability
+ */
+
+/* -- ICS routines -- */
+
+static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+			    u32 new_irq, bool check_resend);
+
+/*
+ * Return value ideally indicates how the interrupt was handled, but no
+ * callers look at it (given that we don't implement KVM_IRQ_LINE_STATUS),
+ * so just return 0.
+ */
+static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
+{
+	struct ics_irq_state *state;
+	struct kvmppc_ics *ics;
+	u16 src;
+	u32 pq_old, pq_new;
+
+	XICS_DBG("ics deliver %#x (level: %d)\n", irq, level);
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics) {
+		XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n", irq);
+		return -EINVAL;
+	}
+	state = &ics->irq_state[src];
+	if (!state->exists)
+		return -EINVAL;
+
+	if (level == KVM_INTERRUPT_SET_LEVEL || level == KVM_INTERRUPT_SET)
+		level = 1;
+	else if (level == KVM_INTERRUPT_UNSET)
+		level = 0;
+	/*
+	 * Take other values the same as 1, consistent with original code.
+	 * maybe WARN here?
+	 */
+
+	if (!state->lsi && level == 0) /* noop for MSI */
+		return 0;
+
+	do {
+		pq_old = state->pq_state;
+		if (state->lsi) {
+			if (level) {
+				if (pq_old & PQ_PRESENTED)
+					/* Setting already set LSI ... */
+					return 0;
+
+				pq_new = PQ_PRESENTED;
+			} else
+				pq_new = 0;
+		} else
+			pq_new = ((pq_old << 1) & 3) | PQ_PRESENTED;
+	} while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
+
+	/* Test P=1, Q=0, this is the only case where we present */
+	if (pq_new == PQ_PRESENTED)
+		icp_deliver_irq(xics, NULL, irq, false);
+
+	/* Record which CPU this arrived on for passed-through interrupts */
+	if (state->host_irq)
+		state->intr_cpu = raw_smp_processor_id();
+
+	return 0;
+}
+
+static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
+			     struct kvmppc_icp *icp)
+{
+	int i;
+
+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+		struct ics_irq_state *state = &ics->irq_state[i];
+		if (state->resend) {
+			XICS_DBG("resend %#x prio %#x\n", state->number,
+				      state->priority);
+			icp_deliver_irq(xics, icp, state->number, true);
+		}
+	}
+}
+
+static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
+		       struct ics_irq_state *state,
+		       u32 server, u32 priority, u32 saved_priority)
+{
+	bool deliver;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	arch_spin_lock(&ics->lock);
+
+	state->server = server;
+	state->priority = priority;
+	state->saved_priority = saved_priority;
+	deliver = false;
+	if ((state->masked_pending || state->resend) && priority != MASKED) {
+		state->masked_pending = 0;
+		state->resend = 0;
+		deliver = true;
+	}
+
+	arch_spin_unlock(&ics->lock);
+	local_irq_restore(flags);
+
+	return deliver;
+}
+
+int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	struct kvmppc_icp *icp;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u16 src;
+
+	if (!xics)
+		return -ENODEV;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics)
+		return -EINVAL;
+	state = &ics->irq_state[src];
+
+	icp = kvmppc_xics_find_server(kvm, server);
+	if (!icp)
+		return -EINVAL;
+
+	XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n",
+		 irq, server, priority,
+		 state->masked_pending, state->resend);
+
+	if (write_xive(xics, ics, state, server, priority, priority))
+		icp_deliver_irq(xics, icp, irq, false);
+
+	return 0;
+}
+
+int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u16 src;
+	unsigned long flags;
+
+	if (!xics)
+		return -ENODEV;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics)
+		return -EINVAL;
+	state = &ics->irq_state[src];
+
+	local_irq_save(flags);
+	arch_spin_lock(&ics->lock);
+	*server = state->server;
+	*priority = state->priority;
+	arch_spin_unlock(&ics->lock);
+	local_irq_restore(flags);
+
+	return 0;
+}
+
+int kvmppc_xics_int_on(struct kvm *kvm, u32 irq)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	struct kvmppc_icp *icp;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u16 src;
+
+	if (!xics)
+		return -ENODEV;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics)
+		return -EINVAL;
+	state = &ics->irq_state[src];
+
+	icp = kvmppc_xics_find_server(kvm, state->server);
+	if (!icp)
+		return -EINVAL;
+
+	if (write_xive(xics, ics, state, state->server, state->saved_priority,
+		       state->saved_priority))
+		icp_deliver_irq(xics, icp, irq, false);
+
+	return 0;
+}
+
+int kvmppc_xics_int_off(struct kvm *kvm, u32 irq)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u16 src;
+
+	if (!xics)
+		return -ENODEV;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics)
+		return -EINVAL;
+	state = &ics->irq_state[src];
+
+	write_xive(xics, ics, state, state->server, MASKED, state->priority);
+
+	return 0;
+}
+
+/* -- ICP routines, including hcalls -- */
+
+static inline bool icp_try_update(struct kvmppc_icp *icp,
+				  union kvmppc_icp_state old,
+				  union kvmppc_icp_state new,
+				  bool change_self)
+{
+	bool success;
+
+	/* Calculate new output value */
+	new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
+
+	/* Attempt atomic update */
+	success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
+	if (!success)
+		goto bail;
+
+	XICS_DBG("UPD [%04lx] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
+		 icp->server_num,
+		 old.cppr, old.mfrr, old.pending_pri, old.xisr,
+		 old.need_resend, old.out_ee);
+	XICS_DBG("UPD        - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
+		 new.cppr, new.mfrr, new.pending_pri, new.xisr,
+		 new.need_resend, new.out_ee);
+	/*
+	 * Check for output state update
+	 *
+	 * Note that this is racy since another processor could be updating
+	 * the state already. This is why we never clear the interrupt output
+	 * here, we only ever set it. The clear only happens prior to doing
+	 * an update and only by the processor itself. Currently we do it
+	 * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
+	 *
+	 * We also do not try to figure out whether the EE state has changed,
+	 * we unconditionally set it if the new state calls for it. The reason
+	 * for that is that we opportunistically remove the pending interrupt
+	 * flag when raising CPPR, so we need to set it back here if an
+	 * interrupt is still pending.
+	 */
+	if (new.out_ee) {
+		kvmppc_book3s_queue_irqprio(icp->vcpu,
+					    BOOK3S_INTERRUPT_EXTERNAL);
+		if (!change_self)
+			kvmppc_fast_vcpu_kick(icp->vcpu);
+	}
+ bail:
+	return success;
+}
+
+static void icp_check_resend(struct kvmppc_xics *xics,
+			     struct kvmppc_icp *icp)
+{
+	u32 icsid;
+
+	/* Order this load with the test for need_resend in the caller */
+	smp_rmb();
+	for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) {
+		struct kvmppc_ics *ics = xics->ics[icsid];
+
+		if (!test_and_clear_bit(icsid, icp->resend_map))
+			continue;
+		if (!ics)
+			continue;
+		ics_check_resend(xics, ics, icp);
+	}
+}
+
+static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
+			       u32 *reject)
+{
+	union kvmppc_icp_state old_state, new_state;
+	bool success;
+
+	XICS_DBG("try deliver %#x(P:%#x) to server %#lx\n", irq, priority,
+		 icp->server_num);
+
+	do {
+		old_state = new_state = READ_ONCE(icp->state);
+
+		*reject = 0;
+
+		/* See if we can deliver */
+		success = new_state.cppr > priority &&
+			new_state.mfrr > priority &&
+			new_state.pending_pri > priority;
+
+		/*
+		 * If we can, check for a rejection and perform the
+		 * delivery
+		 */
+		if (success) {
+			*reject = new_state.xisr;
+			new_state.xisr = irq;
+			new_state.pending_pri = priority;
+		} else {
+			/*
+			 * If we failed to deliver we set need_resend
+			 * so a subsequent CPPR state change causes us
+			 * to try a new delivery.
+			 */
+			new_state.need_resend = true;
+		}
+
+	} while (!icp_try_update(icp, old_state, new_state, false));
+
+	return success;
+}
+
+static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+			    u32 new_irq, bool check_resend)
+{
+	struct ics_irq_state *state;
+	struct kvmppc_ics *ics;
+	u32 reject;
+	u16 src;
+	unsigned long flags;
+
+	/*
+	 * This is used both for initial delivery of an interrupt and
+	 * for subsequent rejection.
+	 *
+	 * Rejection can be racy vs. resends. We have evaluated the
+	 * rejection in an atomic ICP transaction which is now complete,
+	 * so potentially the ICP can already accept the interrupt again.
+	 *
+	 * So we need to retry the delivery. Essentially the reject path
+	 * boils down to a failed delivery. Always.
+	 *
+	 * Now the interrupt could also have moved to a different target,
+	 * thus we may need to re-do the ICP lookup as well
+	 */
+
+ again:
+	/* Get the ICS state and lock it */
+	ics = kvmppc_xics_find_ics(xics, new_irq, &src);
+	if (!ics) {
+		XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n", new_irq);
+		return;
+	}
+	state = &ics->irq_state[src];
+
+	/* Get a lock on the ICS */
+	local_irq_save(flags);
+	arch_spin_lock(&ics->lock);
+
+	/* Get our server */
+	if (!icp || state->server != icp->server_num) {
+		icp = kvmppc_xics_find_server(xics->kvm, state->server);
+		if (!icp) {
+			pr_warn("icp_deliver_irq: IRQ 0x%06x server 0x%x not found !\n",
+				new_irq, state->server);
+			goto out;
+		}
+	}
+
+	if (check_resend)
+		if (!state->resend)
+			goto out;
+
+	/* Clear the resend bit of that interrupt */
+	state->resend = 0;
+
+	/*
+	 * If masked, bail out
+	 *
+	 * Note: PAPR doesn't mention anything about masked pending
+	 * when doing a resend, only when doing a delivery.
+	 *
+	 * However that would have the effect of losing a masked
+	 * interrupt that was rejected and isn't consistent with
+	 * the whole masked_pending business which is about not
+	 * losing interrupts that occur while masked.
+	 *
+	 * I don't differentiate normal deliveries and resends, this
+	 * implementation will differ from PAPR and not lose such
+	 * interrupts.
+	 */
+	if (state->priority == MASKED) {
+		XICS_DBG("irq %#x masked pending\n", new_irq);
+		state->masked_pending = 1;
+		goto out;
+	}
+
+	/*
+	 * Try the delivery, this will set the need_resend flag
+	 * in the ICP as part of the atomic transaction if the
+	 * delivery is not possible.
+	 *
+	 * Note that if successful, the new delivery might have itself
+	 * rejected an interrupt that was "delivered" before we took the
+	 * ics spin lock.
+	 *
+	 * In this case we do the whole sequence all over again for the
+	 * new guy. We cannot assume that the rejected interrupt is less
+	 * favored than the new one, and thus doesn't need to be delivered,
+	 * because by the time we exit icp_try_to_deliver() the target
+	 * processor may well have already consumed & completed it, and thus
+	 * the rejected interrupt might actually be already acceptable.
+	 */
+	if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) {
+		/*
+		 * Delivery was successful, did we reject somebody else ?
+		 */
+		if (reject && reject != XICS_IPI) {
+			arch_spin_unlock(&ics->lock);
+			local_irq_restore(flags);
+			new_irq = reject;
+			check_resend = false;
+			goto again;
+		}
+	} else {
+		/*
+		 * We failed to deliver the interrupt we need to set the
+		 * resend map bit and mark the ICS state as needing a resend
+		 */
+		state->resend = 1;
+
+		/*
+		 * Make sure when checking resend, we don't miss the resend
+		 * if resend_map bit is seen and cleared.
+		 */
+		smp_wmb();
+		set_bit(ics->icsid, icp->resend_map);
+
+		/*
+		 * If the need_resend flag got cleared in the ICP some time
+		 * between icp_try_to_deliver() atomic update and now, then
+		 * we know it might have missed the resend_map bit. So we
+		 * retry
+		 */
+		smp_mb();
+		if (!icp->state.need_resend) {
+			state->resend = 0;
+			arch_spin_unlock(&ics->lock);
+			local_irq_restore(flags);
+			check_resend = false;
+			goto again;
+		}
+	}
+ out:
+	arch_spin_unlock(&ics->lock);
+	local_irq_restore(flags);
+}
+
+static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+			  u8 new_cppr)
+{
+	union kvmppc_icp_state old_state, new_state;
+	bool resend;
+
+	/*
+	 * This handles several related states in one operation:
+	 *
+	 * ICP State: Down_CPPR
+	 *
+	 * Load CPPR with new value and if the XISR is 0
+	 * then check for resends:
+	 *
+	 * ICP State: Resend
+	 *
+	 * If MFRR is more favored than CPPR, check for IPIs
+	 * and notify ICS of a potential resend. This is done
+	 * asynchronously (when used in real mode, we will have
+	 * to exit here).
+	 *
+	 * We do not handle the complete Check_IPI as documented
+	 * here. In the PAPR, this state will be used for both
+	 * Set_MFRR and Down_CPPR. However, we know that we aren't
+	 * changing the MFRR state here so we don't need to handle
+	 * the case of an MFRR causing a reject of a pending irq,
+	 * this will have been handled when the MFRR was set in the
+	 * first place.
+	 *
+	 * Thus we don't have to handle rejects, only resends.
+	 *
+	 * When implementing real mode for HV KVM, resend will lead to
+	 * a H_TOO_HARD return and the whole transaction will be handled
+	 * in virtual mode.
+	 */
+	do {
+		old_state = new_state = READ_ONCE(icp->state);
+
+		/* Down_CPPR */
+		new_state.cppr = new_cppr;
+
+		/*
+		 * Cut down Resend / Check_IPI / IPI
+		 *
+		 * The logic is that we cannot have a pending interrupt
+		 * trumped by an IPI at this point (see above), so we
+		 * know that either the pending interrupt is already an
+		 * IPI (in which case we don't care to override it) or
+		 * it's either more favored than us or non existent
+		 */
+		if (new_state.mfrr < new_cppr &&
+		    new_state.mfrr <= new_state.pending_pri) {
+			WARN_ON(new_state.xisr != XICS_IPI &&
+				new_state.xisr != 0);
+			new_state.pending_pri = new_state.mfrr;
+			new_state.xisr = XICS_IPI;
+		}
+
+		/* Latch/clear resend bit */
+		resend = new_state.need_resend;
+		new_state.need_resend = 0;
+
+	} while (!icp_try_update(icp, old_state, new_state, true));
+
+	/*
+	 * Now handle resend checks. Those are asynchronous to the ICP
+	 * state update in HW (ie bus transactions) so we can handle them
+	 * separately here too
+	 */
+	if (resend)
+		icp_check_resend(xics, icp);
+}
+
+static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
+{
+	union kvmppc_icp_state old_state, new_state;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	u32 xirr;
+
+	/* First, remove EE from the processor */
+	kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
+
+	/*
+	 * ICP State: Accept_Interrupt
+	 *
+	 * Return the pending interrupt (if any) along with the
+	 * current CPPR, then clear the XISR & set CPPR to the
+	 * pending priority
+	 */
+	do {
+		old_state = new_state = READ_ONCE(icp->state);
+
+		xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
+		if (!old_state.xisr)
+			break;
+		new_state.cppr = new_state.pending_pri;
+		new_state.pending_pri = 0xff;
+		new_state.xisr = 0;
+
+	} while (!icp_try_update(icp, old_state, new_state, true));
+
+	XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr);
+
+	return xirr;
+}
+
+static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+				 unsigned long mfrr)
+{
+	union kvmppc_icp_state old_state, new_state;
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp;
+	u32 reject;
+	bool resend;
+	bool local;
+
+	XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n",
+		 vcpu->vcpu_id, server, mfrr);
+
+	icp = vcpu->arch.icp;
+	local = icp->server_num == server;
+	if (!local) {
+		icp = kvmppc_xics_find_server(vcpu->kvm, server);
+		if (!icp)
+			return H_PARAMETER;
+	}
+
+	/*
+	 * ICP state: Set_MFRR
+	 *
+	 * If the CPPR is more favored than the new MFRR, then
+	 * nothing needs to be rejected as there can be no XISR to
+	 * reject.  If the MFRR is being made less favored then
+	 * there might be a previously-rejected interrupt needing
+	 * to be resent.
+	 *
+	 * ICP state: Check_IPI
+	 *
+	 * If the CPPR is less favored, then we might be replacing
+	 * an interrupt, and thus need to possibly reject it.
+	 *
+	 * ICP State: IPI
+	 *
+	 * Besides rejecting any pending interrupts, we also
+	 * update XISR and pending_pri to mark IPI as pending.
+	 *
+	 * PAPR does not describe this state, but if the MFRR is being
+	 * made less favored than its earlier value, there might be
+	 * a previously-rejected interrupt needing to be resent.
+	 * Ideally, we would want to resend only if
+	 *	prio(pending_interrupt) < mfrr &&
+	 *	prio(pending_interrupt) < cppr
+	 * where pending interrupt is the one that was rejected. But
+	 * we don't have that state, so we simply trigger a resend
+	 * whenever the MFRR is made less favored.
+	 */
+	do {
+		old_state = new_state = READ_ONCE(icp->state);
+
+		/* Set_MFRR */
+		new_state.mfrr = mfrr;
+
+		/* Check_IPI */
+		reject = 0;
+		resend = false;
+		if (mfrr < new_state.cppr) {
+			/* Reject a pending interrupt if not an IPI */
+			if (mfrr <= new_state.pending_pri) {
+				reject = new_state.xisr;
+				new_state.pending_pri = mfrr;
+				new_state.xisr = XICS_IPI;
+			}
+		}
+
+		if (mfrr > old_state.mfrr) {
+			resend = new_state.need_resend;
+			new_state.need_resend = 0;
+		}
+	} while (!icp_try_update(icp, old_state, new_state, local));
+
+	/* Handle reject */
+	if (reject && reject != XICS_IPI)
+		icp_deliver_irq(xics, icp, reject, false);
+
+	/* Handle resend */
+	if (resend)
+		icp_check_resend(xics, icp);
+
+	return H_SUCCESS;
+}
+
+static int kvmppc_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
+{
+	union kvmppc_icp_state state;
+	struct kvmppc_icp *icp;
+
+	icp = vcpu->arch.icp;
+	if (icp->server_num != server) {
+		icp = kvmppc_xics_find_server(vcpu->kvm, server);
+		if (!icp)
+			return H_PARAMETER;
+	}
+	state = READ_ONCE(icp->state);
+	kvmppc_set_gpr(vcpu, 4, ((u32)state.cppr << 24) | state.xisr);
+	kvmppc_set_gpr(vcpu, 5, state.mfrr);
+	return H_SUCCESS;
+}
+
+static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+	union kvmppc_icp_state old_state, new_state;
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	u32 reject;
+
+	XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr);
+
+	/*
+	 * ICP State: Set_CPPR
+	 *
+	 * We can safely compare the new value with the current
+	 * value outside of the transaction as the CPPR is only
+	 * ever changed by the processor on itself
+	 */
+	if (cppr > icp->state.cppr)
+		icp_down_cppr(xics, icp, cppr);
+	else if (cppr == icp->state.cppr)
+		return;
+
+	/*
+	 * ICP State: Up_CPPR
+	 *
+	 * The processor is raising its priority, this can result
+	 * in a rejection of a pending interrupt:
+	 *
+	 * ICP State: Reject_Current
+	 *
+	 * We can remove EE from the current processor, the update
+	 * transaction will set it again if needed
+	 */
+	kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
+
+	do {
+		old_state = new_state = READ_ONCE(icp->state);
+
+		reject = 0;
+		new_state.cppr = cppr;
+
+		if (cppr <= new_state.pending_pri) {
+			reject = new_state.xisr;
+			new_state.xisr = 0;
+			new_state.pending_pri = 0xff;
+		}
+
+	} while (!icp_try_update(icp, old_state, new_state, true));
+
+	/*
+	 * Check for rejects. They are handled by doing a new delivery
+	 * attempt (see comments in icp_deliver_irq).
+	 */
+	if (reject && reject != XICS_IPI)
+		icp_deliver_irq(xics, icp, reject, false);
+}
+
+static int ics_eoi(struct kvm_vcpu *vcpu, u32 irq)
+{
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u16 src;
+	u32 pq_old, pq_new;
+
+	/*
+	 * ICS EOI handling: For LSI, if P bit is still set, we need to
+	 * resend it.
+	 *
+	 * For MSI, we move Q bit into P (and clear Q). If it is set,
+	 * resend it.
+	 */
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics) {
+		XICS_DBG("ios_eoi: IRQ 0x%06x not found !\n", irq);
+		return H_PARAMETER;
+	}
+	state = &ics->irq_state[src];
+
+	if (state->lsi)
+		pq_new = state->pq_state;
+	else
+		do {
+			pq_old = state->pq_state;
+			pq_new = pq_old >> 1;
+		} while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
+
+	if (pq_new & PQ_PRESENTED)
+		icp_deliver_irq(xics, icp, irq, false);
+
+	kvm_notify_acked_irq(vcpu->kvm, 0, irq);
+
+	return H_SUCCESS;
+}
+
+static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	u32 irq = xirr & 0x00ffffff;
+
+	XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr);
+
+	/*
+	 * ICP State: EOI
+	 *
+	 * Note: If EOI is incorrectly used by SW to lower the CPPR
+	 * value (ie more favored), we do not check for rejection of
+	 * a pending interrupt, this is a SW error and PAPR specifies
+	 * that we don't have to deal with it.
+	 *
+	 * The sending of an EOI to the ICS is handled after the
+	 * CPPR update
+	 *
+	 * ICP State: Down_CPPR which we handle
+	 * in a separate function as it's shared with H_CPPR.
+	 */
+	icp_down_cppr(xics, icp, xirr >> 24);
+
+	/* IPIs have no EOI */
+	if (irq == XICS_IPI)
+		return H_SUCCESS;
+
+	return ics_eoi(vcpu, irq);
+}
+
+int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
+{
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+
+	XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n",
+		 hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt);
+
+	if (icp->rm_action & XICS_RM_KICK_VCPU) {
+		icp->n_rm_kick_vcpu++;
+		kvmppc_fast_vcpu_kick(icp->rm_kick_target);
+	}
+	if (icp->rm_action & XICS_RM_CHECK_RESEND) {
+		icp->n_rm_check_resend++;
+		icp_check_resend(xics, icp->rm_resend_icp);
+	}
+	if (icp->rm_action & XICS_RM_NOTIFY_EOI) {
+		icp->n_rm_notify_eoi++;
+		kvm_notify_acked_irq(vcpu->kvm, 0, icp->rm_eoied_irq);
+	}
+
+	icp->rm_action = 0;
+
+	return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xics_rm_complete);
+
+int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+{
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	unsigned long res;
+	int rc = H_SUCCESS;
+
+	/* Check if we have an ICP */
+	if (!xics || !vcpu->arch.icp)
+		return H_HARDWARE;
+
+	/* These requests don't have real-mode implementations at present */
+	switch (req) {
+	case H_XIRR_X:
+		res = kvmppc_h_xirr(vcpu);
+		kvmppc_set_gpr(vcpu, 4, res);
+		kvmppc_set_gpr(vcpu, 5, get_tb());
+		return rc;
+	case H_IPOLL:
+		rc = kvmppc_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
+		return rc;
+	}
+
+	/* Check for real mode returning too hard */
+	if (xics->real_mode && is_kvmppc_hv_enabled(vcpu->kvm))
+		return kvmppc_xics_rm_complete(vcpu, req);
+
+	switch (req) {
+	case H_XIRR:
+		res = kvmppc_h_xirr(vcpu);
+		kvmppc_set_gpr(vcpu, 4, res);
+		break;
+	case H_CPPR:
+		kvmppc_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
+		break;
+	case H_EOI:
+		rc = kvmppc_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
+		break;
+	case H_IPI:
+		rc = kvmppc_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
+				  kvmppc_get_gpr(vcpu, 5));
+		break;
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xics_hcall);
+
+
+/* -- Initialisation code etc. -- */
+
+static void xics_debugfs_irqmap(struct seq_file *m,
+				struct kvmppc_passthru_irqmap *pimap)
+{
+	int i;
+
+	if (!pimap)
+		return;
+	seq_printf(m, "========\nPIRQ mappings: %d maps\n===========\n",
+				pimap->n_mapped);
+	for (i = 0; i < pimap->n_mapped; i++)  {
+		seq_printf(m, "r_hwirq=%x, v_hwirq=%x\n",
+			pimap->mapped[i].r_hwirq, pimap->mapped[i].v_hwirq);
+	}
+}
+
+static int xics_debug_show(struct seq_file *m, void *private)
+{
+	struct kvmppc_xics *xics = m->private;
+	struct kvm *kvm = xics->kvm;
+	struct kvm_vcpu *vcpu;
+	int icsid;
+	unsigned long flags, i;
+	unsigned long t_rm_kick_vcpu, t_rm_check_resend;
+	unsigned long t_rm_notify_eoi;
+	unsigned long t_reject, t_check_resend;
+
+	if (!kvm)
+		return 0;
+
+	t_rm_kick_vcpu = 0;
+	t_rm_notify_eoi = 0;
+	t_rm_check_resend = 0;
+	t_check_resend = 0;
+	t_reject = 0;
+
+	xics_debugfs_irqmap(m, kvm->arch.pimap);
+
+	seq_printf(m, "=========\nICP state\n=========\n");
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvmppc_icp *icp = vcpu->arch.icp;
+		union kvmppc_icp_state state;
+
+		if (!icp)
+			continue;
+
+		state.raw = READ_ONCE(icp->state.raw);
+		seq_printf(m, "cpu server %#lx XIRR:%#x PPRI:%#x CPPR:%#x MFRR:%#x OUT:%d NR:%d\n",
+			   icp->server_num, state.xisr,
+			   state.pending_pri, state.cppr, state.mfrr,
+			   state.out_ee, state.need_resend);
+		t_rm_kick_vcpu += icp->n_rm_kick_vcpu;
+		t_rm_notify_eoi += icp->n_rm_notify_eoi;
+		t_rm_check_resend += icp->n_rm_check_resend;
+		t_check_resend += icp->n_check_resend;
+		t_reject += icp->n_reject;
+	}
+
+	seq_printf(m, "ICP Guest->Host totals: kick_vcpu=%lu check_resend=%lu notify_eoi=%lu\n",
+			t_rm_kick_vcpu, t_rm_check_resend,
+			t_rm_notify_eoi);
+	seq_printf(m, "ICP Real Mode totals: check_resend=%lu resend=%lu\n",
+			t_check_resend, t_reject);
+	for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) {
+		struct kvmppc_ics *ics = xics->ics[icsid];
+
+		if (!ics)
+			continue;
+
+		seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n",
+			   icsid);
+
+		local_irq_save(flags);
+		arch_spin_lock(&ics->lock);
+
+		for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+			struct ics_irq_state *irq = &ics->irq_state[i];
+
+			seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x pq_state %d resend %d masked pending %d\n",
+				   irq->number, irq->server, irq->priority,
+				   irq->saved_priority, irq->pq_state,
+				   irq->resend, irq->masked_pending);
+
+		}
+		arch_spin_unlock(&ics->lock);
+		local_irq_restore(flags);
+	}
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(xics_debug);
+
+static void xics_debugfs_init(struct kvmppc_xics *xics)
+{
+	xics->dentry = debugfs_create_file("xics", 0444, xics->kvm->debugfs_dentry,
+					   xics, &xics_debug_fops);
+
+	pr_debug("%s: created\n", __func__);
+}
+
+static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
+					struct kvmppc_xics *xics, int irq)
+{
+	struct kvmppc_ics *ics;
+	int i, icsid;
+
+	icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
+
+	mutex_lock(&kvm->lock);
+
+	/* ICS already exists - somebody else got here first */
+	if (xics->ics[icsid])
+		goto out;
+
+	/* Create the ICS */
+	ics = kzalloc(sizeof(struct kvmppc_ics), GFP_KERNEL);
+	if (!ics)
+		goto out;
+
+	ics->icsid = icsid;
+
+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+		ics->irq_state[i].number = (icsid << KVMPPC_XICS_ICS_SHIFT) | i;
+		ics->irq_state[i].priority = MASKED;
+		ics->irq_state[i].saved_priority = MASKED;
+	}
+	smp_wmb();
+	xics->ics[icsid] = ics;
+
+	if (icsid > xics->max_icsid)
+		xics->max_icsid = icsid;
+
+ out:
+	mutex_unlock(&kvm->lock);
+	return xics->ics[icsid];
+}
+
+static int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num)
+{
+	struct kvmppc_icp *icp;
+
+	if (!vcpu->kvm->arch.xics)
+		return -ENODEV;
+
+	if (kvmppc_xics_find_server(vcpu->kvm, server_num))
+		return -EEXIST;
+
+	icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL);
+	if (!icp)
+		return -ENOMEM;
+
+	icp->vcpu = vcpu;
+	icp->server_num = server_num;
+	icp->state.mfrr = MASKED;
+	icp->state.pending_pri = MASKED;
+	vcpu->arch.icp = icp;
+
+	XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id);
+
+	return 0;
+}
+
+u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	union kvmppc_icp_state state;
+
+	if (!icp)
+		return 0;
+	state = icp->state;
+	return ((u64)state.cppr << KVM_REG_PPC_ICP_CPPR_SHIFT) |
+		((u64)state.xisr << KVM_REG_PPC_ICP_XISR_SHIFT) |
+		((u64)state.mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) |
+		((u64)state.pending_pri << KVM_REG_PPC_ICP_PPRI_SHIFT);
+}
+
+int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
+{
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	union kvmppc_icp_state old_state, new_state;
+	struct kvmppc_ics *ics;
+	u8 cppr, mfrr, pending_pri;
+	u32 xisr;
+	u16 src;
+	bool resend;
+
+	if (!icp || !xics)
+		return -ENOENT;
+
+	cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT;
+	xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) &
+		KVM_REG_PPC_ICP_XISR_MASK;
+	mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT;
+	pending_pri = icpval >> KVM_REG_PPC_ICP_PPRI_SHIFT;
+
+	/* Require the new state to be internally consistent */
+	if (xisr == 0) {
+		if (pending_pri != 0xff)
+			return -EINVAL;
+	} else if (xisr == XICS_IPI) {
+		if (pending_pri != mfrr || pending_pri >= cppr)
+			return -EINVAL;
+	} else {
+		if (pending_pri >= mfrr || pending_pri >= cppr)
+			return -EINVAL;
+		ics = kvmppc_xics_find_ics(xics, xisr, &src);
+		if (!ics)
+			return -EINVAL;
+	}
+
+	new_state.raw = 0;
+	new_state.cppr = cppr;
+	new_state.xisr = xisr;
+	new_state.mfrr = mfrr;
+	new_state.pending_pri = pending_pri;
+
+	/*
+	 * Deassert the CPU interrupt request.
+	 * icp_try_update will reassert it if necessary.
+	 */
+	kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
+
+	/*
+	 * Note that if we displace an interrupt from old_state.xisr,
+	 * we don't mark it as rejected.  We expect userspace to set
+	 * the state of the interrupt sources to be consistent with
+	 * the ICP states (either before or afterwards, which doesn't
+	 * matter).  We do handle resends due to CPPR becoming less
+	 * favoured because that is necessary to end up with a
+	 * consistent state in the situation where userspace restores
+	 * the ICS states before the ICP states.
+	 */
+	do {
+		old_state = READ_ONCE(icp->state);
+
+		if (new_state.mfrr <= old_state.mfrr) {
+			resend = false;
+			new_state.need_resend = old_state.need_resend;
+		} else {
+			resend = old_state.need_resend;
+			new_state.need_resend = 0;
+		}
+	} while (!icp_try_update(icp, old_state, new_state, false));
+
+	if (resend)
+		icp_check_resend(xics, icp);
+
+	return 0;
+}
+
+static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
+{
+	int ret;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *irqp;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u16 idx;
+	u64 val, prio;
+	unsigned long flags;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &idx);
+	if (!ics)
+		return -ENOENT;
+
+	irqp = &ics->irq_state[idx];
+	local_irq_save(flags);
+	arch_spin_lock(&ics->lock);
+	ret = -ENOENT;
+	if (irqp->exists) {
+		val = irqp->server;
+		prio = irqp->priority;
+		if (prio == MASKED) {
+			val |= KVM_XICS_MASKED;
+			prio = irqp->saved_priority;
+		}
+		val |= prio << KVM_XICS_PRIORITY_SHIFT;
+		if (irqp->lsi) {
+			val |= KVM_XICS_LEVEL_SENSITIVE;
+			if (irqp->pq_state & PQ_PRESENTED)
+				val |= KVM_XICS_PENDING;
+		} else if (irqp->masked_pending || irqp->resend)
+			val |= KVM_XICS_PENDING;
+
+		if (irqp->pq_state & PQ_PRESENTED)
+			val |= KVM_XICS_PRESENTED;
+
+		if (irqp->pq_state & PQ_QUEUED)
+			val |= KVM_XICS_QUEUED;
+
+		ret = 0;
+	}
+	arch_spin_unlock(&ics->lock);
+	local_irq_restore(flags);
+
+	if (!ret && put_user(val, ubufp))
+		ret = -EFAULT;
+
+	return ret;
+}
+
+static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
+{
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *irqp;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u16 idx;
+	u64 val;
+	u8 prio;
+	u32 server;
+	unsigned long flags;
+
+	if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
+		return -ENOENT;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &idx);
+	if (!ics) {
+		ics = kvmppc_xics_create_ics(xics->kvm, xics, irq);
+		if (!ics)
+			return -ENOMEM;
+	}
+	irqp = &ics->irq_state[idx];
+	if (get_user(val, ubufp))
+		return -EFAULT;
+
+	server = val & KVM_XICS_DESTINATION_MASK;
+	prio = val >> KVM_XICS_PRIORITY_SHIFT;
+	if (prio != MASKED &&
+	    kvmppc_xics_find_server(xics->kvm, server) == NULL)
+		return -EINVAL;
+
+	local_irq_save(flags);
+	arch_spin_lock(&ics->lock);
+	irqp->server = server;
+	irqp->saved_priority = prio;
+	if (val & KVM_XICS_MASKED)
+		prio = MASKED;
+	irqp->priority = prio;
+	irqp->resend = 0;
+	irqp->masked_pending = 0;
+	irqp->lsi = 0;
+	irqp->pq_state = 0;
+	if (val & KVM_XICS_LEVEL_SENSITIVE)
+		irqp->lsi = 1;
+	/* If PENDING, set P in case P is not saved because of old code */
+	if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
+		irqp->pq_state |= PQ_PRESENTED;
+	if (val & KVM_XICS_QUEUED)
+		irqp->pq_state |= PQ_QUEUED;
+	irqp->exists = 1;
+	arch_spin_unlock(&ics->lock);
+	local_irq_restore(flags);
+
+	if (val & KVM_XICS_PENDING)
+		icp_deliver_irq(xics, NULL, irqp->number, false);
+
+	return 0;
+}
+
+int kvmppc_xics_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+			bool line_status)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+
+	if (!xics)
+		return -ENODEV;
+	return ics_deliver_irq(xics, irq, level);
+}
+
+static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct kvmppc_xics *xics = dev->private;
+
+	switch (attr->group) {
+	case KVM_DEV_XICS_GRP_SOURCES:
+		return xics_set_source(xics, attr->attr, attr->addr);
+	}
+	return -ENXIO;
+}
+
+static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct kvmppc_xics *xics = dev->private;
+
+	switch (attr->group) {
+	case KVM_DEV_XICS_GRP_SOURCES:
+		return xics_get_source(xics, attr->attr, attr->addr);
+	}
+	return -ENXIO;
+}
+
+static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_XICS_GRP_SOURCES:
+		if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
+		    attr->attr < KVMPPC_XICS_NR_IRQS)
+			return 0;
+		break;
+	}
+	return -ENXIO;
+}
+
+/*
+ * Called when device fd is closed. kvm->lock is held.
+ */
+static void kvmppc_xics_release(struct kvm_device *dev)
+{
+	struct kvmppc_xics *xics = dev->private;
+	unsigned long i;
+	struct kvm *kvm = xics->kvm;
+	struct kvm_vcpu *vcpu;
+
+	pr_devel("Releasing xics device\n");
+
+	/*
+	 * Since this is the device release function, we know that
+	 * userspace does not have any open fd referring to the
+	 * device.  Therefore there can not be any of the device
+	 * attribute set/get functions being executed concurrently,
+	 * and similarly, the connect_vcpu and set/clr_mapped
+	 * functions also cannot be being executed.
+	 */
+
+	debugfs_remove(xics->dentry);
+
+	/*
+	 * We should clean up the vCPU interrupt presenters first.
+	 */
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		/*
+		 * Take vcpu->mutex to ensure that no one_reg get/set ioctl
+		 * (i.e. kvmppc_xics_[gs]et_icp) can be done concurrently.
+		 * Holding the vcpu->mutex also means that execution is
+		 * excluded for the vcpu until the ICP was freed. When the vcpu
+		 * can execute again, vcpu->arch.icp and vcpu->arch.irq_type
+		 * have been cleared and the vcpu will not be going into the
+		 * XICS code anymore.
+		 */
+		mutex_lock(&vcpu->mutex);
+		kvmppc_xics_free_icp(vcpu);
+		mutex_unlock(&vcpu->mutex);
+	}
+
+	if (kvm)
+		kvm->arch.xics = NULL;
+
+	for (i = 0; i <= xics->max_icsid; i++) {
+		kfree(xics->ics[i]);
+		xics->ics[i] = NULL;
+	}
+	/*
+	 * A reference of the kvmppc_xics pointer is now kept under
+	 * the xics_device pointer of the machine for reuse. It is
+	 * freed when the VM is destroyed for now until we fix all the
+	 * execution paths.
+	 */
+	kfree(dev);
+}
+
+static struct kvmppc_xics *kvmppc_xics_get_device(struct kvm *kvm)
+{
+	struct kvmppc_xics **kvm_xics_device = &kvm->arch.xics_device;
+	struct kvmppc_xics *xics = *kvm_xics_device;
+
+	if (!xics) {
+		xics = kzalloc(sizeof(*xics), GFP_KERNEL);
+		*kvm_xics_device = xics;
+	} else {
+		memset(xics, 0, sizeof(*xics));
+	}
+
+	return xics;
+}
+
+static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
+{
+	struct kvmppc_xics *xics;
+	struct kvm *kvm = dev->kvm;
+
+	pr_devel("Creating xics for partition\n");
+
+	/* Already there ? */
+	if (kvm->arch.xics)
+		return -EEXIST;
+
+	xics = kvmppc_xics_get_device(kvm);
+	if (!xics)
+		return -ENOMEM;
+
+	dev->private = xics;
+	xics->dev = dev;
+	xics->kvm = kvm;
+	kvm->arch.xics = xics;
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+	    cpu_has_feature(CPU_FTR_HVMODE)) {
+		/* Enable real mode support */
+		xics->real_mode = ENABLE_REALMODE;
+		xics->real_mode_dbg = DEBUG_REALMODE;
+	}
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
+	return 0;
+}
+
+static void kvmppc_xics_init(struct kvm_device *dev)
+{
+	struct kvmppc_xics *xics = dev->private;
+
+	xics_debugfs_init(xics);
+}
+
+struct kvm_device_ops kvm_xics_ops = {
+	.name = "kvm-xics",
+	.create = kvmppc_xics_create,
+	.init = kvmppc_xics_init,
+	.release = kvmppc_xics_release,
+	.set_attr = xics_set_attr,
+	.get_attr = xics_get_attr,
+	.has_attr = xics_has_attr,
+};
+
+int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
+			     u32 xcpu)
+{
+	struct kvmppc_xics *xics = dev->private;
+	int r = -EBUSY;
+
+	if (dev->ops != &kvm_xics_ops)
+		return -EPERM;
+	if (xics->kvm != vcpu->kvm)
+		return -EPERM;
+	if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
+		return -EBUSY;
+
+	r = kvmppc_xics_create_icp(vcpu, xcpu);
+	if (!r)
+		vcpu->arch.irq_type = KVMPPC_IRQ_XICS;
+
+	return r;
+}
+
+void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
+{
+	if (!vcpu->arch.icp)
+		return;
+	kfree(vcpu->arch.icp);
+	vcpu->arch.icp = NULL;
+	vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
+}
+
+void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq,
+			    unsigned long host_irq)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	struct kvmppc_ics *ics;
+	u16 idx;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &idx);
+	if (!ics)
+		return;
+
+	ics->irq_state[idx].host_irq = host_irq;
+	ics->irq_state[idx].intr_cpu = -1;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xics_set_mapped);
+
+void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long irq,
+			    unsigned long host_irq)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	struct kvmppc_ics *ics;
+	u16 idx;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &idx);
+	if (!ics)
+		return;
+
+	ics->irq_state[idx].host_irq = 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xics_clr_mapped);
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
new file mode 100644
index 0000000000..08fb0843fa
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
+ */
+
+#ifndef _KVM_PPC_BOOK3S_XICS_H
+#define _KVM_PPC_BOOK3S_XICS_H
+
+#ifdef CONFIG_KVM_XICS
+/*
+ * We use a two-level tree to store interrupt source information.
+ * There are up to 1024 ICS nodes, each of which can represent
+ * 1024 sources.
+ */
+#define KVMPPC_XICS_MAX_ICS_ID	1023
+#define KVMPPC_XICS_ICS_SHIFT	10
+#define KVMPPC_XICS_IRQ_PER_ICS	(1 << KVMPPC_XICS_ICS_SHIFT)
+#define KVMPPC_XICS_SRC_MASK	(KVMPPC_XICS_IRQ_PER_ICS - 1)
+
+/*
+ * Interrupt source numbers below this are reserved, for example
+ * 0 is "no interrupt", and 2 is used for IPIs.
+ */
+#define KVMPPC_XICS_FIRST_IRQ	16
+#define KVMPPC_XICS_NR_IRQS	((KVMPPC_XICS_MAX_ICS_ID + 1) * \
+				 KVMPPC_XICS_IRQ_PER_ICS)
+
+/* Priority value to use for disabling an interrupt */
+#define MASKED	0xff
+
+#define PQ_PRESENTED	1
+#define PQ_QUEUED	2
+
+/* State for one irq source */
+struct ics_irq_state {
+	u32 number;
+	u32 server;
+	u32 pq_state;
+	u8  priority;
+	u8  saved_priority;
+	u8  resend;
+	u8  masked_pending;
+	u8  lsi;		/* level-sensitive interrupt */
+	u8  exists;
+	int intr_cpu;
+	u32 host_irq;
+};
+
+/* Atomic ICP state, updated with a single compare & swap */
+union kvmppc_icp_state {
+	unsigned long raw;
+	struct {
+		u8 out_ee:1;
+		u8 need_resend:1;
+		u8 cppr;
+		u8 mfrr;
+		u8 pending_pri;
+		u32 xisr;
+	};
+};
+
+/* One bit per ICS */
+#define ICP_RESEND_MAP_SIZE	(KVMPPC_XICS_MAX_ICS_ID / BITS_PER_LONG + 1)
+
+struct kvmppc_icp {
+	struct kvm_vcpu *vcpu;
+	unsigned long server_num;
+	union kvmppc_icp_state state;
+	unsigned long resend_map[ICP_RESEND_MAP_SIZE];
+
+	/* Real mode might find something too hard, here's the action
+	 * it might request from virtual mode
+	 */
+#define XICS_RM_KICK_VCPU	0x1
+#define XICS_RM_CHECK_RESEND	0x2
+#define XICS_RM_NOTIFY_EOI	0x8
+	u32 rm_action;
+	struct kvm_vcpu *rm_kick_target;
+	struct kvmppc_icp *rm_resend_icp;
+	u32  rm_reject;
+	u32  rm_eoied_irq;
+
+	/* Counters for each reason we exited real mode */
+	unsigned long n_rm_kick_vcpu;
+	unsigned long n_rm_check_resend;
+	unsigned long n_rm_notify_eoi;
+	/* Counters for handling ICP processing in real mode */
+	unsigned long n_check_resend;
+	unsigned long n_reject;
+
+	/* Debug stuff for real mode */
+	union kvmppc_icp_state rm_dbgstate;
+	struct kvm_vcpu *rm_dbgtgt;
+};
+
+struct kvmppc_ics {
+	arch_spinlock_t lock;
+	u16 icsid;
+	struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
+};
+
+struct kvmppc_xics {
+	struct kvm *kvm;
+	struct kvm_device *dev;
+	struct dentry *dentry;
+	u32 max_icsid;
+	bool real_mode;
+	bool real_mode_dbg;
+	u32 err_noics;
+	u32 err_noicp;
+	struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
+};
+
+static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm,
+							 u32 nr)
+{
+	struct kvm_vcpu *vcpu = NULL;
+	unsigned long i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num)
+			return vcpu->arch.icp;
+	}
+	return NULL;
+}
+
+static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
+						      u32 irq, u16 *source)
+{
+	u32 icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
+	u16 src = irq & KVMPPC_XICS_SRC_MASK;
+	struct kvmppc_ics *ics;
+
+	if (source)
+		*source = src;
+	if (icsid > KVMPPC_XICS_MAX_ICS_ID)
+		return NULL;
+	ics = xics->ics[icsid];
+	if (!ics)
+		return NULL;
+	return ics;
+}
+
+extern unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu);
+extern unsigned long xics_rm_h_xirr_x(struct kvm_vcpu *vcpu);
+extern int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+			 unsigned long mfrr);
+extern int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
+extern int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
+
+#endif /* CONFIG_KVM_XICS */
+#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
new file mode 100644
index 0000000000..f4115819e7
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -0,0 +1,2982 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "xive-kvm: " fmt
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/percpu.h>
+#include <linux/cpumask.h>
+#include <linux/uaccess.h>
+#include <linux/irqdomain.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/debug.h>
+#include <asm/time.h>
+#include <asm/opal.h>
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "book3s_xive.h"
+
+#define __x_eoi_page(xd)	((void __iomem *)((xd)->eoi_mmio))
+#define __x_trig_page(xd)	((void __iomem *)((xd)->trig_mmio))
+
+/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
+#define XICS_DUMMY	1
+
+static void xive_vm_ack_pending(struct kvmppc_xive_vcpu *xc)
+{
+	u8 cppr;
+	u16 ack;
+
+	/*
+	 * Ensure any previous store to CPPR is ordered vs.
+	 * the subsequent loads from PIPR or ACK.
+	 */
+	eieio();
+
+	/* Perform the acknowledge OS to register cycle. */
+	ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_OS_REG));
+
+	/* Synchronize subsequent queue accesses */
+	mb();
+
+	/* XXX Check grouping level */
+
+	/* Anything ? */
+	if (!((ack >> 8) & TM_QW1_NSR_EO))
+		return;
+
+	/* Grab CPPR of the most favored pending interrupt */
+	cppr = ack & 0xff;
+	if (cppr < 8)
+		xc->pending |= 1 << cppr;
+
+	/* Check consistency */
+	if (cppr >= xc->hw_cppr)
+		pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
+			smp_processor_id(), cppr, xc->hw_cppr);
+
+	/*
+	 * Update our image of the HW CPPR. We don't yet modify
+	 * xc->cppr, this will be done as we scan for interrupts
+	 * in the queues.
+	 */
+	xc->hw_cppr = cppr;
+}
+
+static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
+{
+	u64 val;
+
+	if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
+		offset |= XIVE_ESB_LD_ST_MO;
+
+	val = __raw_readq(__x_eoi_page(xd) + offset);
+#ifdef __LITTLE_ENDIAN__
+	val >>= 64-8;
+#endif
+	return (u8)val;
+}
+
+
+static void xive_vm_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
+{
+	/* If the XIVE supports the new "store EOI facility, use it */
+	if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
+		__raw_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
+	else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
+		/*
+		 * For LSIs the HW EOI cycle is used rather than PQ bits,
+		 * as they are automatically re-triggred in HW when still
+		 * pending.
+		 */
+		__raw_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
+	} else {
+		uint64_t eoi_val;
+
+		/*
+		 * Otherwise for EOI, we use the special MMIO that does
+		 * a clear of both P and Q and returns the old Q,
+		 * except for LSIs where we use the "EOI cycle" special
+		 * load.
+		 *
+		 * This allows us to then do a re-trigger if Q was set
+		 * rather than synthetizing an interrupt in software
+		 */
+		eoi_val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_00);
+
+		/* Re-trigger if needed */
+		if ((eoi_val & 1) && __x_trig_page(xd))
+			__raw_writeq(0, __x_trig_page(xd));
+	}
+}
+
+enum {
+	scan_fetch,
+	scan_poll,
+	scan_eoi,
+};
+
+static u32 xive_vm_scan_interrupts(struct kvmppc_xive_vcpu *xc,
+				       u8 pending, int scan_type)
+{
+	u32 hirq = 0;
+	u8 prio = 0xff;
+
+	/* Find highest pending priority */
+	while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
+		struct xive_q *q;
+		u32 idx, toggle;
+		__be32 *qpage;
+
+		/*
+		 * If pending is 0 this will return 0xff which is what
+		 * we want
+		 */
+		prio = ffs(pending) - 1;
+
+		/* Don't scan past the guest cppr */
+		if (prio >= xc->cppr || prio > 7) {
+			if (xc->mfrr < xc->cppr) {
+				prio = xc->mfrr;
+				hirq = XICS_IPI;
+			}
+			break;
+		}
+
+		/* Grab queue and pointers */
+		q = &xc->queues[prio];
+		idx = q->idx;
+		toggle = q->toggle;
+
+		/*
+		 * Snapshot the queue page. The test further down for EOI
+		 * must use the same "copy" that was used by __xive_read_eq
+		 * since qpage can be set concurrently and we don't want
+		 * to miss an EOI.
+		 */
+		qpage = READ_ONCE(q->qpage);
+
+skip_ipi:
+		/*
+		 * Try to fetch from the queue. Will return 0 for a
+		 * non-queueing priority (ie, qpage = 0).
+		 */
+		hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
+
+		/*
+		 * If this was a signal for an MFFR change done by
+		 * H_IPI we skip it. Additionally, if we were fetching
+		 * we EOI it now, thus re-enabling reception of a new
+		 * such signal.
+		 *
+		 * We also need to do that if prio is 0 and we had no
+		 * page for the queue. In this case, we have non-queued
+		 * IPI that needs to be EOId.
+		 *
+		 * This is safe because if we have another pending MFRR
+		 * change that wasn't observed above, the Q bit will have
+		 * been set and another occurrence of the IPI will trigger.
+		 */
+		if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
+			if (scan_type == scan_fetch) {
+				xive_vm_source_eoi(xc->vp_ipi,
+						       &xc->vp_ipi_data);
+				q->idx = idx;
+				q->toggle = toggle;
+			}
+			/* Loop back on same queue with updated idx/toggle */
+			WARN_ON(hirq && hirq != XICS_IPI);
+			if (hirq)
+				goto skip_ipi;
+		}
+
+		/* If it's the dummy interrupt, continue searching */
+		if (hirq == XICS_DUMMY)
+			goto skip_ipi;
+
+		/* Clear the pending bit if the queue is now empty */
+		if (!hirq) {
+			pending &= ~(1 << prio);
+
+			/*
+			 * Check if the queue count needs adjusting due to
+			 * interrupts being moved away.
+			 */
+			if (atomic_read(&q->pending_count)) {
+				int p = atomic_xchg(&q->pending_count, 0);
+
+				if (p) {
+					WARN_ON(p > atomic_read(&q->count));
+					atomic_sub(p, &q->count);
+				}
+			}
+		}
+
+		/*
+		 * If the most favoured prio we found pending is less
+		 * favored (or equal) than a pending IPI, we return
+		 * the IPI instead.
+		 */
+		if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
+			prio = xc->mfrr;
+			hirq = XICS_IPI;
+			break;
+		}
+
+		/* If fetching, update queue pointers */
+		if (scan_type == scan_fetch) {
+			q->idx = idx;
+			q->toggle = toggle;
+		}
+	}
+
+	/* If we are just taking a "peek", do nothing else */
+	if (scan_type == scan_poll)
+		return hirq;
+
+	/* Update the pending bits */
+	xc->pending = pending;
+
+	/*
+	 * If this is an EOI that's it, no CPPR adjustment done here,
+	 * all we needed was cleanup the stale pending bits and check
+	 * if there's anything left.
+	 */
+	if (scan_type == scan_eoi)
+		return hirq;
+
+	/*
+	 * If we found an interrupt, adjust what the guest CPPR should
+	 * be as if we had just fetched that interrupt from HW.
+	 *
+	 * Note: This can only make xc->cppr smaller as the previous
+	 * loop will only exit with hirq != 0 if prio is lower than
+	 * the current xc->cppr. Thus we don't need to re-check xc->mfrr
+	 * for pending IPIs.
+	 */
+	if (hirq)
+		xc->cppr = prio;
+	/*
+	 * If it was an IPI the HW CPPR might have been lowered too much
+	 * as the HW interrupt we use for IPIs is routed to priority 0.
+	 *
+	 * We re-sync it here.
+	 */
+	if (xc->cppr != xc->hw_cppr) {
+		xc->hw_cppr = xc->cppr;
+		__raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
+	}
+
+	return hirq;
+}
+
+static unsigned long xive_vm_h_xirr(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	u8 old_cppr;
+	u32 hirq;
+
+	pr_devel("H_XIRR\n");
+
+	xc->stat_vm_h_xirr++;
+
+	/* First collect pending bits from HW */
+	xive_vm_ack_pending(xc);
+
+	pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
+		 xc->pending, xc->hw_cppr, xc->cppr);
+
+	/* Grab previous CPPR and reverse map it */
+	old_cppr = xive_prio_to_guest(xc->cppr);
+
+	/* Scan for actual interrupts */
+	hirq = xive_vm_scan_interrupts(xc, xc->pending, scan_fetch);
+
+	pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
+		 hirq, xc->hw_cppr, xc->cppr);
+
+	/* That should never hit */
+	if (hirq & 0xff000000)
+		pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
+
+	/*
+	 * XXX We could check if the interrupt is masked here and
+	 * filter it. If we chose to do so, we would need to do:
+	 *
+	 *    if (masked) {
+	 *        lock();
+	 *        if (masked) {
+	 *            old_Q = true;
+	 *            hirq = 0;
+	 *        }
+	 *        unlock();
+	 *    }
+	 */
+
+	/* Return interrupt and old CPPR in GPR4 */
+	vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24);
+
+	return H_SUCCESS;
+}
+
+static unsigned long xive_vm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	u8 pending = xc->pending;
+	u32 hirq;
+
+	pr_devel("H_IPOLL(server=%ld)\n", server);
+
+	xc->stat_vm_h_ipoll++;
+
+	/* Grab the target VCPU if not the current one */
+	if (xc->server_num != server) {
+		vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
+		if (!vcpu)
+			return H_PARAMETER;
+		xc = vcpu->arch.xive_vcpu;
+
+		/* Scan all priorities */
+		pending = 0xff;
+	} else {
+		/* Grab pending interrupt if any */
+		__be64 qw1 = __raw_readq(xive_tima + TM_QW1_OS);
+		u8 pipr = be64_to_cpu(qw1) & 0xff;
+
+		if (pipr < 8)
+			pending |= 1 << pipr;
+	}
+
+	hirq = xive_vm_scan_interrupts(xc, pending, scan_poll);
+
+	/* Return interrupt and old CPPR in GPR4 */
+	vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24);
+
+	return H_SUCCESS;
+}
+
+static void xive_vm_push_pending_to_hw(struct kvmppc_xive_vcpu *xc)
+{
+	u8 pending, prio;
+
+	pending = xc->pending;
+	if (xc->mfrr != 0xff) {
+		if (xc->mfrr < 8)
+			pending |= 1 << xc->mfrr;
+		else
+			pending |= 0x80;
+	}
+	if (!pending)
+		return;
+	prio = ffs(pending) - 1;
+
+	__raw_writeb(prio, xive_tima + TM_SPC_SET_OS_PENDING);
+}
+
+static void xive_vm_scan_for_rerouted_irqs(struct kvmppc_xive *xive,
+					       struct kvmppc_xive_vcpu *xc)
+{
+	unsigned int prio;
+
+	/* For each priority that is now masked */
+	for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+		struct xive_q *q = &xc->queues[prio];
+		struct kvmppc_xive_irq_state *state;
+		struct kvmppc_xive_src_block *sb;
+		u32 idx, toggle, entry, irq, hw_num;
+		struct xive_irq_data *xd;
+		__be32 *qpage;
+		u16 src;
+
+		idx = q->idx;
+		toggle = q->toggle;
+		qpage = READ_ONCE(q->qpage);
+		if (!qpage)
+			continue;
+
+		/* For each interrupt in the queue */
+		for (;;) {
+			entry = be32_to_cpup(qpage + idx);
+
+			/* No more ? */
+			if ((entry >> 31) == toggle)
+				break;
+			irq = entry & 0x7fffffff;
+
+			/* Skip dummies and IPIs */
+			if (irq == XICS_DUMMY || irq == XICS_IPI)
+				goto next;
+			sb = kvmppc_xive_find_source(xive, irq, &src);
+			if (!sb)
+				goto next;
+			state = &sb->irq_state[src];
+
+			/* Has it been rerouted ? */
+			if (xc->server_num == state->act_server)
+				goto next;
+
+			/*
+			 * Allright, it *has* been re-routed, kill it from
+			 * the queue.
+			 */
+			qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
+
+			/* Find the HW interrupt */
+			kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+			/* If it's not an LSI, set PQ to 11 the EOI will force a resend */
+			if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
+				xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
+
+			/* EOI the source */
+			xive_vm_source_eoi(hw_num, xd);
+
+next:
+			idx = (idx + 1) & q->msk;
+			if (idx == 0)
+				toggle ^= 1;
+		}
+	}
+}
+
+static int xive_vm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+	u8 old_cppr;
+
+	pr_devel("H_CPPR(cppr=%ld)\n", cppr);
+
+	xc->stat_vm_h_cppr++;
+
+	/* Map CPPR */
+	cppr = xive_prio_from_guest(cppr);
+
+	/* Remember old and update SW state */
+	old_cppr = xc->cppr;
+	xc->cppr = cppr;
+
+	/*
+	 * Order the above update of xc->cppr with the subsequent
+	 * read of xc->mfrr inside push_pending_to_hw()
+	 */
+	smp_mb();
+
+	if (cppr > old_cppr) {
+		/*
+		 * We are masking less, we need to look for pending things
+		 * to deliver and set VP pending bits accordingly to trigger
+		 * a new interrupt otherwise we might miss MFRR changes for
+		 * which we have optimized out sending an IPI signal.
+		 */
+		xive_vm_push_pending_to_hw(xc);
+	} else {
+		/*
+		 * We are masking more, we need to check the queue for any
+		 * interrupt that has been routed to another CPU, take
+		 * it out (replace it with the dummy) and retrigger it.
+		 *
+		 * This is necessary since those interrupts may otherwise
+		 * never be processed, at least not until this CPU restores
+		 * its CPPR.
+		 *
+		 * This is in theory racy vs. HW adding new interrupts to
+		 * the queue. In practice this works because the interesting
+		 * cases are when the guest has done a set_xive() to move the
+		 * interrupt away, which flushes the xive, followed by the
+		 * target CPU doing a H_CPPR. So any new interrupt coming into
+		 * the queue must still be routed to us and isn't a source
+		 * of concern.
+		 */
+		xive_vm_scan_for_rerouted_irqs(xive, xc);
+	}
+
+	/* Apply new CPPR */
+	xc->hw_cppr = cppr;
+	__raw_writeb(cppr, xive_tima + TM_QW1_OS + TM_CPPR);
+
+	return H_SUCCESS;
+}
+
+static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct xive_irq_data *xd;
+	u8 new_cppr = xirr >> 24;
+	u32 irq = xirr & 0x00ffffff, hw_num;
+	u16 src;
+	int rc = 0;
+
+	pr_devel("H_EOI(xirr=%08lx)\n", xirr);
+
+	xc->stat_vm_h_eoi++;
+
+	xc->cppr = xive_prio_from_guest(new_cppr);
+
+	/*
+	 * IPIs are synthetized from MFRR and thus don't need
+	 * any special EOI handling. The underlying interrupt
+	 * used to signal MFRR changes is EOId when fetched from
+	 * the queue.
+	 */
+	if (irq == XICS_IPI || irq == 0) {
+		/*
+		 * This barrier orders the setting of xc->cppr vs.
+		 * subsequent test of xc->mfrr done inside
+		 * scan_interrupts and push_pending_to_hw
+		 */
+		smp_mb();
+		goto bail;
+	}
+
+	/* Find interrupt source */
+	sb = kvmppc_xive_find_source(xive, irq, &src);
+	if (!sb) {
+		pr_devel(" source not found !\n");
+		rc = H_PARAMETER;
+		/* Same as above */
+		smp_mb();
+		goto bail;
+	}
+	state = &sb->irq_state[src];
+	kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+	state->in_eoi = true;
+
+	/*
+	 * This barrier orders both setting of in_eoi above vs,
+	 * subsequent test of guest_priority, and the setting
+	 * of xc->cppr vs. subsequent test of xc->mfrr done inside
+	 * scan_interrupts and push_pending_to_hw
+	 */
+	smp_mb();
+
+again:
+	if (state->guest_priority == MASKED) {
+		arch_spin_lock(&sb->lock);
+		if (state->guest_priority != MASKED) {
+			arch_spin_unlock(&sb->lock);
+			goto again;
+		}
+		pr_devel(" EOI on saved P...\n");
+
+		/* Clear old_p, that will cause unmask to perform an EOI */
+		state->old_p = false;
+
+		arch_spin_unlock(&sb->lock);
+	} else {
+		pr_devel(" EOI on source...\n");
+
+		/* Perform EOI on the source */
+		xive_vm_source_eoi(hw_num, xd);
+
+		/* If it's an emulated LSI, check level and resend */
+		if (state->lsi && state->asserted)
+			__raw_writeq(0, __x_trig_page(xd));
+
+	}
+
+	/*
+	 * This barrier orders the above guest_priority check
+	 * and spin_lock/unlock with clearing in_eoi below.
+	 *
+	 * It also has to be a full mb() as it must ensure
+	 * the MMIOs done in source_eoi() are completed before
+	 * state->in_eoi is visible.
+	 */
+	mb();
+	state->in_eoi = false;
+bail:
+
+	/* Re-evaluate pending IRQs and update HW */
+	xive_vm_scan_interrupts(xc, xc->pending, scan_eoi);
+	xive_vm_push_pending_to_hw(xc);
+	pr_devel(" after scan pending=%02x\n", xc->pending);
+
+	/* Apply new CPPR */
+	xc->hw_cppr = xc->cppr;
+	__raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
+
+	return rc;
+}
+
+static int xive_vm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+			       unsigned long mfrr)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+	pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
+
+	xc->stat_vm_h_ipi++;
+
+	/* Find target */
+	vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
+	if (!vcpu)
+		return H_PARAMETER;
+	xc = vcpu->arch.xive_vcpu;
+
+	/* Locklessly write over MFRR */
+	xc->mfrr = mfrr;
+
+	/*
+	 * The load of xc->cppr below and the subsequent MMIO store
+	 * to the IPI must happen after the above mfrr update is
+	 * globally visible so that:
+	 *
+	 * - Synchronize with another CPU doing an H_EOI or a H_CPPR
+	 *   updating xc->cppr then reading xc->mfrr.
+	 *
+	 * - The target of the IPI sees the xc->mfrr update
+	 */
+	mb();
+
+	/* Shoot the IPI if most favored than target cppr */
+	if (mfrr < xc->cppr)
+		__raw_writeq(0, __x_trig_page(&xc->vp_ipi_data));
+
+	return H_SUCCESS;
+}
+
+/*
+ * We leave a gap of a couple of interrupts in the queue to
+ * account for the IPI and additional safety guard.
+ */
+#define XIVE_Q_GAP	2
+
+static bool kvmppc_xive_vcpu_has_save_restore(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+	/* Check enablement at VP level */
+	return xc->vp_cam & TM_QW1W2_HO;
+}
+
+bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = xc->xive;
+
+	if (xive->flags & KVMPPC_XIVE_FLAG_SAVE_RESTORE)
+		return kvmppc_xive_vcpu_has_save_restore(vcpu);
+
+	return true;
+}
+
+/*
+ * Push a vcpu's context to the XIVE on guest entry.
+ * This assumes we are in virtual mode (MMU on)
+ */
+void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
+{
+	void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
+	u64 pq;
+
+	/*
+	 * Nothing to do if the platform doesn't have a XIVE
+	 * or this vCPU doesn't have its own XIVE context
+	 * (e.g. because it's not using an in-kernel interrupt controller).
+	 */
+	if (!tima || !vcpu->arch.xive_cam_word)
+		return;
+
+	eieio();
+	if (!kvmppc_xive_vcpu_has_save_restore(vcpu))
+		__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
+	__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
+	vcpu->arch.xive_pushed = 1;
+	eieio();
+
+	/*
+	 * We clear the irq_pending flag. There is a small chance of a
+	 * race vs. the escalation interrupt happening on another
+	 * processor setting it again, but the only consequence is to
+	 * cause a spurious wakeup on the next H_CEDE, which is not an
+	 * issue.
+	 */
+	vcpu->arch.irq_pending = 0;
+
+	/*
+	 * In single escalation mode, if the escalation interrupt is
+	 * on, we mask it.
+	 */
+	if (vcpu->arch.xive_esc_on) {
+		pq = __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
+						  XIVE_ESB_SET_PQ_01));
+		mb();
+
+		/*
+		 * We have a possible subtle race here: The escalation
+		 * interrupt might have fired and be on its way to the
+		 * host queue while we mask it, and if we unmask it
+		 * early enough (re-cede right away), there is a
+		 * theoretical possibility that it fires again, thus
+		 * landing in the target queue more than once which is
+		 * a big no-no.
+		 *
+		 * Fortunately, solving this is rather easy. If the
+		 * above load setting PQ to 01 returns a previous
+		 * value where P is set, then we know the escalation
+		 * interrupt is somewhere on its way to the host. In
+		 * that case we simply don't clear the xive_esc_on
+		 * flag below. It will be eventually cleared by the
+		 * handler for the escalation interrupt.
+		 *
+		 * Then, when doing a cede, we check that flag again
+		 * before re-enabling the escalation interrupt, and if
+		 * set, we abort the cede.
+		 */
+		if (!(pq & XIVE_ESB_VAL_P))
+			/* Now P is 0, we can clear the flag */
+			vcpu->arch.xive_esc_on = 0;
+	}
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
+
+/*
+ * Pull a vcpu's context from the XIVE on guest exit.
+ * This assumes we are in virtual mode (MMU on)
+ */
+void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
+{
+	void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
+
+	if (!vcpu->arch.xive_pushed)
+		return;
+
+	/*
+	 * Should not have been pushed if there is no tima
+	 */
+	if (WARN_ON(!tima))
+		return;
+
+	eieio();
+	/* First load to pull the context, we ignore the value */
+	__raw_readl(tima + TM_SPC_PULL_OS_CTX);
+	/* Second load to recover the context state (Words 0 and 1) */
+	if (!kvmppc_xive_vcpu_has_save_restore(vcpu))
+		vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS);
+
+	/* Fixup some of the state for the next load */
+	vcpu->arch.xive_saved_state.lsmfb = 0;
+	vcpu->arch.xive_saved_state.ack = 0xff;
+	vcpu->arch.xive_pushed = 0;
+	eieio();
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu);
+
+bool kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu)
+{
+	void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr;
+	bool ret = true;
+
+	if (!esc_vaddr)
+		return ret;
+
+	/* we are using XIVE with single escalation */
+
+	if (vcpu->arch.xive_esc_on) {
+		/*
+		 * If we still have a pending escalation, abort the cede,
+		 * and we must set PQ to 10 rather than 00 so that we don't
+		 * potentially end up with two entries for the escalation
+		 * interrupt in the XIVE interrupt queue.  In that case
+		 * we also don't want to set xive_esc_on to 1 here in
+		 * case we race with xive_esc_irq().
+		 */
+		ret = false;
+		/*
+		 * The escalation interrupts are special as we don't EOI them.
+		 * There is no need to use the load-after-store ordering offset
+		 * to set PQ to 10 as we won't use StoreEOI.
+		 */
+		__raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_10);
+	} else {
+		vcpu->arch.xive_esc_on = true;
+		mb();
+		__raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00);
+	}
+	mb();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_rearm_escalation);
+
+/*
+ * This is a simple trigger for a generic XIVE IRQ. This must
+ * only be called for interrupts that support a trigger page
+ */
+static bool xive_irq_trigger(struct xive_irq_data *xd)
+{
+	/* This should be only for MSIs */
+	if (WARN_ON(xd->flags & XIVE_IRQ_FLAG_LSI))
+		return false;
+
+	/* Those interrupts should always have a trigger page */
+	if (WARN_ON(!xd->trig_mmio))
+		return false;
+
+	out_be64(xd->trig_mmio, 0);
+
+	return true;
+}
+
+static irqreturn_t xive_esc_irq(int irq, void *data)
+{
+	struct kvm_vcpu *vcpu = data;
+
+	vcpu->arch.irq_pending = 1;
+	smp_mb();
+	if (vcpu->arch.ceded || vcpu->arch.nested)
+		kvmppc_fast_vcpu_kick(vcpu);
+
+	/* Since we have the no-EOI flag, the interrupt is effectively
+	 * disabled now. Clearing xive_esc_on means we won't bother
+	 * doing so on the next entry.
+	 *
+	 * This also allows the entry code to know that if a PQ combination
+	 * of 10 is observed while xive_esc_on is true, it means the queue
+	 * contains an unprocessed escalation interrupt. We don't make use of
+	 * that knowledge today but might (see comment in book3s_hv_rmhandler.S)
+	 */
+	vcpu->arch.xive_esc_on = false;
+
+	/* This orders xive_esc_on = false vs. subsequent stale_p = true */
+	smp_wmb();	/* goes with smp_mb() in cleanup_single_escalation */
+
+	return IRQ_HANDLED;
+}
+
+int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
+				  bool single_escalation)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct xive_q *q = &xc->queues[prio];
+	char *name = NULL;
+	int rc;
+
+	/* Already there ? */
+	if (xc->esc_virq[prio])
+		return 0;
+
+	/* Hook up the escalation interrupt */
+	xc->esc_virq[prio] = irq_create_mapping(NULL, q->esc_irq);
+	if (!xc->esc_virq[prio]) {
+		pr_err("Failed to map escalation interrupt for queue %d of VCPU %d\n",
+		       prio, xc->server_num);
+		return -EIO;
+	}
+
+	if (single_escalation)
+		name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
+				 vcpu->kvm->arch.lpid, xc->server_num);
+	else
+		name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
+				 vcpu->kvm->arch.lpid, xc->server_num, prio);
+	if (!name) {
+		pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
+		       prio, xc->server_num);
+		rc = -ENOMEM;
+		goto error;
+	}
+
+	pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
+
+	rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
+			 IRQF_NO_THREAD, name, vcpu);
+	if (rc) {
+		pr_err("Failed to request escalation interrupt for queue %d of VCPU %d\n",
+		       prio, xc->server_num);
+		goto error;
+	}
+	xc->esc_virq_names[prio] = name;
+
+	/* In single escalation mode, we grab the ESB MMIO of the
+	 * interrupt and mask it. Also populate the VCPU v/raddr
+	 * of the ESB page for use by asm entry/exit code. Finally
+	 * set the XIVE_IRQ_FLAG_NO_EOI flag which will prevent the
+	 * core code from performing an EOI on the escalation
+	 * interrupt, thus leaving it effectively masked after
+	 * it fires once.
+	 */
+	if (single_escalation) {
+		struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
+		struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+		xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
+		vcpu->arch.xive_esc_raddr = xd->eoi_page;
+		vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
+		xd->flags |= XIVE_IRQ_FLAG_NO_EOI;
+	}
+
+	return 0;
+error:
+	irq_dispose_mapping(xc->esc_virq[prio]);
+	xc->esc_virq[prio] = 0;
+	kfree(name);
+	return rc;
+}
+
+static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = xc->xive;
+	struct xive_q *q =  &xc->queues[prio];
+	void *qpage;
+	int rc;
+
+	if (WARN_ON(q->qpage))
+		return 0;
+
+	/* Allocate the queue and retrieve infos on current node for now */
+	qpage = (__be32 *)__get_free_pages(GFP_KERNEL, xive->q_page_order);
+	if (!qpage) {
+		pr_err("Failed to allocate queue %d for VCPU %d\n",
+		       prio, xc->server_num);
+		return -ENOMEM;
+	}
+	memset(qpage, 0, 1 << xive->q_order);
+
+	/*
+	 * Reconfigure the queue. This will set q->qpage only once the
+	 * queue is fully configured. This is a requirement for prio 0
+	 * as we will stop doing EOIs for every IPI as soon as we observe
+	 * qpage being non-NULL, and instead will only EOI when we receive
+	 * corresponding queue 0 entries
+	 */
+	rc = xive_native_configure_queue(xc->vp_id, q, prio, qpage,
+					 xive->q_order, true);
+	if (rc)
+		pr_err("Failed to configure queue %d for VCPU %d\n",
+		       prio, xc->server_num);
+	return rc;
+}
+
+/* Called with xive->lock held */
+static int xive_check_provisioning(struct kvm *kvm, u8 prio)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvm_vcpu *vcpu;
+	unsigned long i;
+	int rc;
+
+	lockdep_assert_held(&xive->lock);
+
+	/* Already provisioned ? */
+	if (xive->qmap & (1 << prio))
+		return 0;
+
+	pr_devel("Provisioning prio... %d\n", prio);
+
+	/* Provision each VCPU and enable escalations if needed */
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (!vcpu->arch.xive_vcpu)
+			continue;
+		rc = xive_provision_queue(vcpu, prio);
+		if (rc == 0 && !kvmppc_xive_has_single_escalation(xive))
+			kvmppc_xive_attach_escalation(vcpu, prio,
+						      kvmppc_xive_has_single_escalation(xive));
+		if (rc)
+			return rc;
+	}
+
+	/* Order previous stores and mark it as provisioned */
+	mb();
+	xive->qmap |= (1 << prio);
+	return 0;
+}
+
+static void xive_inc_q_pending(struct kvm *kvm, u32 server, u8 prio)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvmppc_xive_vcpu *xc;
+	struct xive_q *q;
+
+	/* Locate target server */
+	vcpu = kvmppc_xive_find_server(kvm, server);
+	if (!vcpu) {
+		pr_warn("%s: Can't find server %d\n", __func__, server);
+		return;
+	}
+	xc = vcpu->arch.xive_vcpu;
+	if (WARN_ON(!xc))
+		return;
+
+	q = &xc->queues[prio];
+	atomic_inc(&q->pending_count);
+}
+
+static int xive_try_pick_queue(struct kvm_vcpu *vcpu, u8 prio)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct xive_q *q;
+	u32 max;
+
+	if (WARN_ON(!xc))
+		return -ENXIO;
+	if (!xc->valid)
+		return -ENXIO;
+
+	q = &xc->queues[prio];
+	if (WARN_ON(!q->qpage))
+		return -ENXIO;
+
+	/* Calculate max number of interrupts in that queue. */
+	max = (q->msk + 1) - XIVE_Q_GAP;
+	return atomic_add_unless(&q->count, 1, max) ? 0 : -EBUSY;
+}
+
+int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
+{
+	struct kvm_vcpu *vcpu;
+	unsigned long i;
+	int rc;
+
+	/* Locate target server */
+	vcpu = kvmppc_xive_find_server(kvm, *server);
+	if (!vcpu) {
+		pr_devel("Can't find server %d\n", *server);
+		return -EINVAL;
+	}
+
+	pr_devel("Finding irq target on 0x%x/%d...\n", *server, prio);
+
+	/* Try pick it */
+	rc = xive_try_pick_queue(vcpu, prio);
+	if (rc == 0)
+		return rc;
+
+	pr_devel(" .. failed, looking up candidate...\n");
+
+	/* Failed, pick another VCPU */
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (!vcpu->arch.xive_vcpu)
+			continue;
+		rc = xive_try_pick_queue(vcpu, prio);
+		if (rc == 0) {
+			*server = vcpu->arch.xive_vcpu->server_num;
+			pr_devel("  found on 0x%x/%d\n", *server, prio);
+			return rc;
+		}
+	}
+	pr_devel("  no available target !\n");
+
+	/* No available target ! */
+	return -EBUSY;
+}
+
+static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
+			     struct kvmppc_xive_src_block *sb,
+			     struct kvmppc_xive_irq_state *state)
+{
+	struct xive_irq_data *xd;
+	u32 hw_num;
+	u8 old_prio;
+	u64 val;
+
+	/*
+	 * Take the lock, set masked, try again if racing
+	 * with H_EOI
+	 */
+	for (;;) {
+		arch_spin_lock(&sb->lock);
+		old_prio = state->guest_priority;
+		state->guest_priority = MASKED;
+		mb();
+		if (!state->in_eoi)
+			break;
+		state->guest_priority = old_prio;
+		arch_spin_unlock(&sb->lock);
+	}
+
+	/* No change ? Bail */
+	if (old_prio == MASKED)
+		return old_prio;
+
+	/* Get the right irq */
+	kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+	/* Set PQ to 10, return old P and old Q and remember them */
+	val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_10);
+	state->old_p = !!(val & 2);
+	state->old_q = !!(val & 1);
+
+	/*
+	 * Synchronize hardware to sensure the queues are updated when
+	 * masking
+	 */
+	xive_native_sync_source(hw_num);
+
+	return old_prio;
+}
+
+static void xive_lock_for_unmask(struct kvmppc_xive_src_block *sb,
+				 struct kvmppc_xive_irq_state *state)
+{
+	/*
+	 * Take the lock try again if racing with H_EOI
+	 */
+	for (;;) {
+		arch_spin_lock(&sb->lock);
+		if (!state->in_eoi)
+			break;
+		arch_spin_unlock(&sb->lock);
+	}
+}
+
+static void xive_finish_unmask(struct kvmppc_xive *xive,
+			       struct kvmppc_xive_src_block *sb,
+			       struct kvmppc_xive_irq_state *state,
+			       u8 prio)
+{
+	struct xive_irq_data *xd;
+	u32 hw_num;
+
+	/* If we aren't changing a thing, move on */
+	if (state->guest_priority != MASKED)
+		goto bail;
+
+	/* Get the right irq */
+	kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+	/* Old Q set, set PQ to 11 */
+	if (state->old_q)
+		xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
+
+	/*
+	 * If not old P, then perform an "effective" EOI,
+	 * on the source. This will handle the cases where
+	 * FW EOI is needed.
+	 */
+	if (!state->old_p)
+		xive_vm_source_eoi(hw_num, xd);
+
+	/* Synchronize ordering and mark unmasked */
+	mb();
+bail:
+	state->guest_priority = prio;
+}
+
+/*
+ * Target an interrupt to a given server/prio, this will fallback
+ * to another server if necessary and perform the HW targetting
+ * updates as needed
+ *
+ * NOTE: Must be called with the state lock held
+ */
+static int xive_target_interrupt(struct kvm *kvm,
+				 struct kvmppc_xive_irq_state *state,
+				 u32 server, u8 prio)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	u32 hw_num;
+	int rc;
+
+	/*
+	 * This will return a tentative server and actual
+	 * priority. The count for that new target will have
+	 * already been incremented.
+	 */
+	rc = kvmppc_xive_select_target(kvm, &server, prio);
+
+	/*
+	 * We failed to find a target ? Not much we can do
+	 * at least until we support the GIQ.
+	 */
+	if (rc)
+		return rc;
+
+	/*
+	 * Increment the old queue pending count if there
+	 * was one so that the old queue count gets adjusted later
+	 * when observed to be empty.
+	 */
+	if (state->act_priority != MASKED)
+		xive_inc_q_pending(kvm,
+				   state->act_server,
+				   state->act_priority);
+	/*
+	 * Update state and HW
+	 */
+	state->act_priority = prio;
+	state->act_server = server;
+
+	/* Get the right irq */
+	kvmppc_xive_select_irq(state, &hw_num, NULL);
+
+	return xive_native_configure_irq(hw_num,
+					 kvmppc_xive_vp(xive, server),
+					 prio, state->number);
+}
+
+/*
+ * Targetting rules: In order to avoid losing track of
+ * pending interrupts across mask and unmask, which would
+ * allow queue overflows, we implement the following rules:
+ *
+ *  - Unless it was never enabled (or we run out of capacity)
+ *    an interrupt is always targetted at a valid server/queue
+ *    pair even when "masked" by the guest. This pair tends to
+ *    be the last one used but it can be changed under some
+ *    circumstances. That allows us to separate targetting
+ *    from masking, we only handle accounting during (re)targetting,
+ *    this also allows us to let an interrupt drain into its target
+ *    queue after masking, avoiding complex schemes to remove
+ *    interrupts out of remote processor queues.
+ *
+ *  - When masking, we set PQ to 10 and save the previous value
+ *    of P and Q.
+ *
+ *  - When unmasking, if saved Q was set, we set PQ to 11
+ *    otherwise we leave PQ to the HW state which will be either
+ *    10 if nothing happened or 11 if the interrupt fired while
+ *    masked. Effectively we are OR'ing the previous Q into the
+ *    HW Q.
+ *
+ *    Then if saved P is clear, we do an effective EOI (Q->P->Trigger)
+ *    which will unmask the interrupt and shoot a new one if Q was
+ *    set.
+ *
+ *    Otherwise (saved P is set) we leave PQ unchanged (so 10 or 11,
+ *    effectively meaning an H_EOI from the guest is still expected
+ *    for that interrupt).
+ *
+ *  - If H_EOI occurs while masked, we clear the saved P.
+ *
+ *  - When changing target, we account on the new target and
+ *    increment a separate "pending" counter on the old one.
+ *    This pending counter will be used to decrement the old
+ *    target's count when its queue has been observed empty.
+ */
+
+int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
+			 u32 priority)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u8 new_act_prio;
+	int rc = 0;
+	u16 idx;
+
+	if (!xive)
+		return -ENODEV;
+
+	pr_devel("set_xive ! irq 0x%x server 0x%x prio %d\n",
+		 irq, server, priority);
+
+	/* First, check provisioning of queues */
+	if (priority != MASKED) {
+		mutex_lock(&xive->lock);
+		rc = xive_check_provisioning(xive->kvm,
+			      xive_prio_from_guest(priority));
+		mutex_unlock(&xive->lock);
+	}
+	if (rc) {
+		pr_devel("  provisioning failure %d !\n", rc);
+		return rc;
+	}
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return -EINVAL;
+	state = &sb->irq_state[idx];
+
+	/*
+	 * We first handle masking/unmasking since the locking
+	 * might need to be retried due to EOIs, we'll handle
+	 * targetting changes later. These functions will return
+	 * with the SB lock held.
+	 *
+	 * xive_lock_and_mask() will also set state->guest_priority
+	 * but won't otherwise change other fields of the state.
+	 *
+	 * xive_lock_for_unmask will not actually unmask, this will
+	 * be done later by xive_finish_unmask() once the targetting
+	 * has been done, so we don't try to unmask an interrupt
+	 * that hasn't yet been targetted.
+	 */
+	if (priority == MASKED)
+		xive_lock_and_mask(xive, sb, state);
+	else
+		xive_lock_for_unmask(sb, state);
+
+
+	/*
+	 * Then we handle targetting.
+	 *
+	 * First calculate a new "actual priority"
+	 */
+	new_act_prio = state->act_priority;
+	if (priority != MASKED)
+		new_act_prio = xive_prio_from_guest(priority);
+
+	pr_devel(" new_act_prio=%x act_server=%x act_prio=%x\n",
+		 new_act_prio, state->act_server, state->act_priority);
+
+	/*
+	 * Then check if we actually need to change anything,
+	 *
+	 * The condition for re-targetting the interrupt is that
+	 * we have a valid new priority (new_act_prio is not 0xff)
+	 * and either the server or the priority changed.
+	 *
+	 * Note: If act_priority was ff and the new priority is
+	 *       also ff, we don't do anything and leave the interrupt
+	 *       untargetted. An attempt of doing an int_on on an
+	 *       untargetted interrupt will fail. If that is a problem
+	 *       we could initialize interrupts with valid default
+	 */
+
+	if (new_act_prio != MASKED &&
+	    (state->act_server != server ||
+	     state->act_priority != new_act_prio))
+		rc = xive_target_interrupt(kvm, state, server, new_act_prio);
+
+	/*
+	 * Perform the final unmasking of the interrupt source
+	 * if necessary
+	 */
+	if (priority != MASKED)
+		xive_finish_unmask(xive, sb, state, priority);
+
+	/*
+	 * Finally Update saved_priority to match. Only int_on/off
+	 * set this field to a different value.
+	 */
+	state->saved_priority = priority;
+
+	arch_spin_unlock(&sb->lock);
+	return rc;
+}
+
+int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
+			 u32 *priority)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+
+	if (!xive)
+		return -ENODEV;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return -EINVAL;
+	state = &sb->irq_state[idx];
+	arch_spin_lock(&sb->lock);
+	*server = state->act_server;
+	*priority = state->guest_priority;
+	arch_spin_unlock(&sb->lock);
+
+	return 0;
+}
+
+int kvmppc_xive_int_on(struct kvm *kvm, u32 irq)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+
+	if (!xive)
+		return -ENODEV;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return -EINVAL;
+	state = &sb->irq_state[idx];
+
+	pr_devel("int_on(irq=0x%x)\n", irq);
+
+	/*
+	 * Check if interrupt was not targetted
+	 */
+	if (state->act_priority == MASKED) {
+		pr_devel("int_on on untargetted interrupt\n");
+		return -EINVAL;
+	}
+
+	/* If saved_priority is 0xff, do nothing */
+	if (state->saved_priority == MASKED)
+		return 0;
+
+	/*
+	 * Lock and unmask it.
+	 */
+	xive_lock_for_unmask(sb, state);
+	xive_finish_unmask(xive, sb, state, state->saved_priority);
+	arch_spin_unlock(&sb->lock);
+
+	return 0;
+}
+
+int kvmppc_xive_int_off(struct kvm *kvm, u32 irq)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+
+	if (!xive)
+		return -ENODEV;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return -EINVAL;
+	state = &sb->irq_state[idx];
+
+	pr_devel("int_off(irq=0x%x)\n", irq);
+
+	/*
+	 * Lock and mask
+	 */
+	state->saved_priority = xive_lock_and_mask(xive, sb, state);
+	arch_spin_unlock(&sb->lock);
+
+	return 0;
+}
+
+static bool xive_restore_pending_irq(struct kvmppc_xive *xive, u32 irq)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return false;
+	state = &sb->irq_state[idx];
+	if (!state->valid)
+		return false;
+
+	/*
+	 * Trigger the IPI. This assumes we never restore a pass-through
+	 * interrupt which should be safe enough
+	 */
+	xive_irq_trigger(&state->ipi_data);
+
+	return true;
+}
+
+u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+	if (!xc)
+		return 0;
+
+	/* Return the per-cpu state for state saving/migration */
+	return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
+	       (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
+	       (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
+}
+
+int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+	u8 cppr, mfrr;
+	u32 xisr;
+
+	if (!xc || !xive)
+		return -ENOENT;
+
+	/* Grab individual state fields. We don't use pending_pri */
+	cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT;
+	xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) &
+		KVM_REG_PPC_ICP_XISR_MASK;
+	mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT;
+
+	pr_devel("set_icp vcpu %d cppr=0x%x mfrr=0x%x xisr=0x%x\n",
+		 xc->server_num, cppr, mfrr, xisr);
+
+	/*
+	 * We can't update the state of a "pushed" VCPU, but that
+	 * shouldn't happen because the vcpu->mutex makes running a
+	 * vcpu mutually exclusive with doing one_reg get/set on it.
+	 */
+	if (WARN_ON(vcpu->arch.xive_pushed))
+		return -EIO;
+
+	/* Update VCPU HW saved state */
+	vcpu->arch.xive_saved_state.cppr = cppr;
+	xc->hw_cppr = xc->cppr = cppr;
+
+	/*
+	 * Update MFRR state. If it's not 0xff, we mark the VCPU as
+	 * having a pending MFRR change, which will re-evaluate the
+	 * target. The VCPU will thus potentially get a spurious
+	 * interrupt but that's not a big deal.
+	 */
+	xc->mfrr = mfrr;
+	if (mfrr < cppr)
+		xive_irq_trigger(&xc->vp_ipi_data);
+
+	/*
+	 * Now saved XIRR is "interesting". It means there's something in
+	 * the legacy "1 element" queue... for an IPI we simply ignore it,
+	 * as the MFRR restore will handle that. For anything else we need
+	 * to force a resend of the source.
+	 * However the source may not have been setup yet. If that's the
+	 * case, we keep that info and increment a counter in the xive to
+	 * tell subsequent xive_set_source() to go look.
+	 */
+	if (xisr > XICS_IPI && !xive_restore_pending_irq(xive, xisr)) {
+		xc->delayed_irq = xisr;
+		xive->delayed_irqs++;
+		pr_devel("  xisr restore delayed\n");
+	}
+
+	return 0;
+}
+
+int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
+			   unsigned long host_irq)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	struct irq_data *host_data =
+		irq_domain_get_irq_data(irq_get_default_host(), host_irq);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(host_data);
+	u16 idx;
+	u8 prio;
+	int rc;
+
+	if (!xive)
+		return -ENODEV;
+
+	pr_debug("%s: GIRQ 0x%lx host IRQ %ld XIVE HW IRQ 0x%x\n",
+		 __func__, guest_irq, host_irq, hw_irq);
+
+	sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
+	if (!sb)
+		return -EINVAL;
+	state = &sb->irq_state[idx];
+
+	/*
+	 * Mark the passed-through interrupt as going to a VCPU,
+	 * this will prevent further EOIs and similar operations
+	 * from the XIVE code. It will also mask the interrupt
+	 * to either PQ=10 or 11 state, the latter if the interrupt
+	 * is pending. This will allow us to unmask or retrigger it
+	 * after routing it to the guest with a simple EOI.
+	 *
+	 * The "state" argument is a "token", all it needs is to be
+	 * non-NULL to switch to passed-through or NULL for the
+	 * other way around. We may not yet have an actual VCPU
+	 * target here and we don't really care.
+	 */
+	rc = irq_set_vcpu_affinity(host_irq, state);
+	if (rc) {
+		pr_err("Failed to set VCPU affinity for host IRQ %ld\n", host_irq);
+		return rc;
+	}
+
+	/*
+	 * Mask and read state of IPI. We need to know if its P bit
+	 * is set as that means it's potentially already using a
+	 * queue entry in the target
+	 */
+	prio = xive_lock_and_mask(xive, sb, state);
+	pr_devel(" old IPI prio %02x P:%d Q:%d\n", prio,
+		 state->old_p, state->old_q);
+
+	/* Turn the IPI hard off */
+	xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+
+	/*
+	 * Reset ESB guest mapping. Needed when ESB pages are exposed
+	 * to the guest in XIVE native mode
+	 */
+	if (xive->ops && xive->ops->reset_mapped)
+		xive->ops->reset_mapped(kvm, guest_irq);
+
+	/* Grab info about irq */
+	state->pt_number = hw_irq;
+	state->pt_data = irq_data_get_irq_handler_data(host_data);
+
+	/*
+	 * Configure the IRQ to match the existing configuration of
+	 * the IPI if it was already targetted. Otherwise this will
+	 * mask the interrupt in a lossy way (act_priority is 0xff)
+	 * which is fine for a never started interrupt.
+	 */
+	xive_native_configure_irq(hw_irq,
+				  kvmppc_xive_vp(xive, state->act_server),
+				  state->act_priority, state->number);
+
+	/*
+	 * We do an EOI to enable the interrupt (and retrigger if needed)
+	 * if the guest has the interrupt unmasked and the P bit was *not*
+	 * set in the IPI. If it was set, we know a slot may still be in
+	 * use in the target queue thus we have to wait for a guest
+	 * originated EOI
+	 */
+	if (prio != MASKED && !state->old_p)
+		xive_vm_source_eoi(hw_irq, state->pt_data);
+
+	/* Clear old_p/old_q as they are no longer relevant */
+	state->old_p = state->old_q = false;
+
+	/* Restore guest prio (unlocks EOI) */
+	mb();
+	state->guest_priority = prio;
+	arch_spin_unlock(&sb->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_set_mapped);
+
+int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
+			   unsigned long host_irq)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+	u8 prio;
+	int rc;
+
+	if (!xive)
+		return -ENODEV;
+
+	pr_debug("%s: GIRQ 0x%lx host IRQ %ld\n", __func__, guest_irq, host_irq);
+
+	sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
+	if (!sb)
+		return -EINVAL;
+	state = &sb->irq_state[idx];
+
+	/*
+	 * Mask and read state of IRQ. We need to know if its P bit
+	 * is set as that means it's potentially already using a
+	 * queue entry in the target
+	 */
+	prio = xive_lock_and_mask(xive, sb, state);
+	pr_devel(" old IRQ prio %02x P:%d Q:%d\n", prio,
+		 state->old_p, state->old_q);
+
+	/*
+	 * If old_p is set, the interrupt is pending, we switch it to
+	 * PQ=11. This will force a resend in the host so the interrupt
+	 * isn't lost to whatever host driver may pick it up
+	 */
+	if (state->old_p)
+		xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_11);
+
+	/* Release the passed-through interrupt to the host */
+	rc = irq_set_vcpu_affinity(host_irq, NULL);
+	if (rc) {
+		pr_err("Failed to clr VCPU affinity for host IRQ %ld\n", host_irq);
+		return rc;
+	}
+
+	/* Forget about the IRQ */
+	state->pt_number = 0;
+	state->pt_data = NULL;
+
+	/*
+	 * Reset ESB guest mapping. Needed when ESB pages are exposed
+	 * to the guest in XIVE native mode
+	 */
+	if (xive->ops && xive->ops->reset_mapped) {
+		xive->ops->reset_mapped(kvm, guest_irq);
+	}
+
+	/* Reconfigure the IPI */
+	xive_native_configure_irq(state->ipi_number,
+				  kvmppc_xive_vp(xive, state->act_server),
+				  state->act_priority, state->number);
+
+	/*
+	 * If old_p is set (we have a queue entry potentially
+	 * occupied) or the interrupt is masked, we set the IPI
+	 * to PQ=10 state. Otherwise we just re-enable it (PQ=00).
+	 */
+	if (prio == MASKED || state->old_p)
+		xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_10);
+	else
+		xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_00);
+
+	/* Restore guest prio (unlocks EOI) */
+	mb();
+	state->guest_priority = prio;
+	arch_spin_unlock(&sb->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped);
+
+void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	int i, j;
+
+	for (i = 0; i <= xive->max_sbid; i++) {
+		struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+
+		if (!sb)
+			continue;
+		for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
+			struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
+
+			if (!state->valid)
+				continue;
+			if (state->act_priority == MASKED)
+				continue;
+			if (state->act_server != xc->server_num)
+				continue;
+
+			/* Clean it up */
+			arch_spin_lock(&sb->lock);
+			state->act_priority = MASKED;
+			xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+			xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
+			if (state->pt_number) {
+				xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
+				xive_native_configure_irq(state->pt_number, 0, MASKED, 0);
+			}
+			arch_spin_unlock(&sb->lock);
+		}
+	}
+
+	/* Disable vcpu's escalation interrupt */
+	if (vcpu->arch.xive_esc_on) {
+		__raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
+					     XIVE_ESB_SET_PQ_01));
+		vcpu->arch.xive_esc_on = false;
+	}
+
+	/*
+	 * Clear pointers to escalation interrupt ESB.
+	 * This is safe because the vcpu->mutex is held, preventing
+	 * any other CPU from concurrently executing a KVM_RUN ioctl.
+	 */
+	vcpu->arch.xive_esc_vaddr = 0;
+	vcpu->arch.xive_esc_raddr = 0;
+}
+
+/*
+ * In single escalation mode, the escalation interrupt is marked so
+ * that EOI doesn't re-enable it, but just sets the stale_p flag to
+ * indicate that the P bit has already been dealt with.  However, the
+ * assembly code that enters the guest sets PQ to 00 without clearing
+ * stale_p (because it has no easy way to address it).  Hence we have
+ * to adjust stale_p before shutting down the interrupt.
+ */
+void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq)
+{
+	struct irq_data *d = irq_get_irq_data(irq);
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+	/*
+	 * This slightly odd sequence gives the right result
+	 * (i.e. stale_p set if xive_esc_on is false) even if
+	 * we race with xive_esc_irq() and xive_irq_eoi().
+	 */
+	xd->stale_p = false;
+	smp_mb();		/* paired with smb_wmb in xive_esc_irq */
+	if (!vcpu->arch.xive_esc_on)
+		xd->stale_p = true;
+}
+
+void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+	int i;
+
+	if (!kvmppc_xics_enabled(vcpu))
+		return;
+
+	if (!xc)
+		return;
+
+	pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num);
+
+	/* Ensure no interrupt is still routed to that VP */
+	xc->valid = false;
+	kvmppc_xive_disable_vcpu_interrupts(vcpu);
+
+	/* Mask the VP IPI */
+	xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
+
+	/* Free escalations */
+	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+		if (xc->esc_virq[i]) {
+			if (kvmppc_xive_has_single_escalation(xc->xive))
+				xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]);
+			free_irq(xc->esc_virq[i], vcpu);
+			irq_dispose_mapping(xc->esc_virq[i]);
+			kfree(xc->esc_virq_names[i]);
+		}
+	}
+
+	/* Disable the VP */
+	xive_native_disable_vp(xc->vp_id);
+
+	/* Clear the cam word so guest entry won't try to push context */
+	vcpu->arch.xive_cam_word = 0;
+
+	/* Free the queues */
+	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+		struct xive_q *q = &xc->queues[i];
+
+		xive_native_disable_queue(xc->vp_id, q, i);
+		if (q->qpage) {
+			free_pages((unsigned long)q->qpage,
+				   xive->q_page_order);
+			q->qpage = NULL;
+		}
+	}
+
+	/* Free the IPI */
+	if (xc->vp_ipi) {
+		xive_cleanup_irq_data(&xc->vp_ipi_data);
+		xive_native_free_irq(xc->vp_ipi);
+	}
+	/* Free the VP */
+	kfree(xc);
+
+	/* Cleanup the vcpu */
+	vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
+	vcpu->arch.xive_vcpu = NULL;
+}
+
+static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu)
+{
+	/* We have a block of xive->nr_servers VPs. We just need to check
+	 * packed vCPU ids are below that.
+	 */
+	return kvmppc_pack_vcpu_id(xive->kvm, cpu) < xive->nr_servers;
+}
+
+int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp)
+{
+	u32 vp_id;
+
+	if (!kvmppc_xive_vcpu_id_valid(xive, cpu)) {
+		pr_devel("Out of bounds !\n");
+		return -EINVAL;
+	}
+
+	if (xive->vp_base == XIVE_INVALID_VP) {
+		xive->vp_base = xive_native_alloc_vp_block(xive->nr_servers);
+		pr_devel("VP_Base=%x nr_servers=%d\n", xive->vp_base, xive->nr_servers);
+
+		if (xive->vp_base == XIVE_INVALID_VP)
+			return -ENOSPC;
+	}
+
+	vp_id = kvmppc_xive_vp(xive, cpu);
+	if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
+		pr_devel("Duplicate !\n");
+		return -EEXIST;
+	}
+
+	*vp = vp_id;
+
+	return 0;
+}
+
+int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
+			     struct kvm_vcpu *vcpu, u32 cpu)
+{
+	struct kvmppc_xive *xive = dev->private;
+	struct kvmppc_xive_vcpu *xc;
+	int i, r = -EBUSY;
+	u32 vp_id;
+
+	pr_devel("connect_vcpu(cpu=%d)\n", cpu);
+
+	if (dev->ops != &kvm_xive_ops) {
+		pr_devel("Wrong ops !\n");
+		return -EPERM;
+	}
+	if (xive->kvm != vcpu->kvm)
+		return -EPERM;
+	if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
+		return -EBUSY;
+
+	/* We need to synchronize with queue provisioning */
+	mutex_lock(&xive->lock);
+
+	r = kvmppc_xive_compute_vp_id(xive, cpu, &vp_id);
+	if (r)
+		goto bail;
+
+	xc = kzalloc(sizeof(*xc), GFP_KERNEL);
+	if (!xc) {
+		r = -ENOMEM;
+		goto bail;
+	}
+
+	vcpu->arch.xive_vcpu = xc;
+	xc->xive = xive;
+	xc->vcpu = vcpu;
+	xc->server_num = cpu;
+	xc->vp_id = vp_id;
+	xc->mfrr = 0xff;
+	xc->valid = true;
+
+	r = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
+	if (r)
+		goto bail;
+
+	if (!kvmppc_xive_check_save_restore(vcpu)) {
+		pr_err("inconsistent save-restore setup for VCPU %d\n", cpu);
+		r = -EIO;
+		goto bail;
+	}
+
+	/* Configure VCPU fields for use by assembly push/pull */
+	vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
+	vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
+
+	/* Allocate IPI */
+	xc->vp_ipi = xive_native_alloc_irq();
+	if (!xc->vp_ipi) {
+		pr_err("Failed to allocate xive irq for VCPU IPI\n");
+		r = -EIO;
+		goto bail;
+	}
+	pr_devel(" IPI=0x%x\n", xc->vp_ipi);
+
+	r = xive_native_populate_irq_data(xc->vp_ipi, &xc->vp_ipi_data);
+	if (r)
+		goto bail;
+
+	/*
+	 * Enable the VP first as the single escalation mode will
+	 * affect escalation interrupts numbering
+	 */
+	r = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive));
+	if (r) {
+		pr_err("Failed to enable VP in OPAL, err %d\n", r);
+		goto bail;
+	}
+
+	/*
+	 * Initialize queues. Initially we set them all for no queueing
+	 * and we enable escalation for queue 0 only which we'll use for
+	 * our mfrr change notifications. If the VCPU is hot-plugged, we
+	 * do handle provisioning however based on the existing "map"
+	 * of enabled queues.
+	 */
+	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+		struct xive_q *q = &xc->queues[i];
+
+		/* Single escalation, no queue 7 */
+		if (i == 7 && kvmppc_xive_has_single_escalation(xive))
+			break;
+
+		/* Is queue already enabled ? Provision it */
+		if (xive->qmap & (1 << i)) {
+			r = xive_provision_queue(vcpu, i);
+			if (r == 0 && !kvmppc_xive_has_single_escalation(xive))
+				kvmppc_xive_attach_escalation(
+					vcpu, i, kvmppc_xive_has_single_escalation(xive));
+			if (r)
+				goto bail;
+		} else {
+			r = xive_native_configure_queue(xc->vp_id,
+							q, i, NULL, 0, true);
+			if (r) {
+				pr_err("Failed to configure queue %d for VCPU %d\n",
+				       i, cpu);
+				goto bail;
+			}
+		}
+	}
+
+	/* If not done above, attach priority 0 escalation */
+	r = kvmppc_xive_attach_escalation(vcpu, 0, kvmppc_xive_has_single_escalation(xive));
+	if (r)
+		goto bail;
+
+	/* Route the IPI */
+	r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
+	if (!r)
+		xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_00);
+
+bail:
+	mutex_unlock(&xive->lock);
+	if (r) {
+		kvmppc_xive_cleanup_vcpu(vcpu);
+		return r;
+	}
+
+	vcpu->arch.irq_type = KVMPPC_IRQ_XICS;
+	return 0;
+}
+
+/*
+ * Scanning of queues before/after migration save
+ */
+static void xive_pre_save_set_queued(struct kvmppc_xive *xive, u32 irq)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return;
+
+	state = &sb->irq_state[idx];
+
+	/* Some sanity checking */
+	if (!state->valid) {
+		pr_err("invalid irq 0x%x in cpu queue!\n", irq);
+		return;
+	}
+
+	/*
+	 * If the interrupt is in a queue it should have P set.
+	 * We warn so that gets reported. A backtrace isn't useful
+	 * so no need to use a WARN_ON.
+	 */
+	if (!state->saved_p)
+		pr_err("Interrupt 0x%x is marked in a queue but P not set !\n", irq);
+
+	/* Set flag */
+	state->in_queue = true;
+}
+
+static void xive_pre_save_mask_irq(struct kvmppc_xive *xive,
+				   struct kvmppc_xive_src_block *sb,
+				   u32 irq)
+{
+	struct kvmppc_xive_irq_state *state = &sb->irq_state[irq];
+
+	if (!state->valid)
+		return;
+
+	/* Mask and save state, this will also sync HW queues */
+	state->saved_scan_prio = xive_lock_and_mask(xive, sb, state);
+
+	/* Transfer P and Q */
+	state->saved_p = state->old_p;
+	state->saved_q = state->old_q;
+
+	/* Unlock */
+	arch_spin_unlock(&sb->lock);
+}
+
+static void xive_pre_save_unmask_irq(struct kvmppc_xive *xive,
+				     struct kvmppc_xive_src_block *sb,
+				     u32 irq)
+{
+	struct kvmppc_xive_irq_state *state = &sb->irq_state[irq];
+
+	if (!state->valid)
+		return;
+
+	/*
+	 * Lock / exclude EOI (not technically necessary if the
+	 * guest isn't running concurrently. If this becomes a
+	 * performance issue we can probably remove the lock.
+	 */
+	xive_lock_for_unmask(sb, state);
+
+	/* Restore mask/prio if it wasn't masked */
+	if (state->saved_scan_prio != MASKED)
+		xive_finish_unmask(xive, sb, state, state->saved_scan_prio);
+
+	/* Unlock */
+	arch_spin_unlock(&sb->lock);
+}
+
+static void xive_pre_save_queue(struct kvmppc_xive *xive, struct xive_q *q)
+{
+	u32 idx = q->idx;
+	u32 toggle = q->toggle;
+	u32 irq;
+
+	do {
+		irq = __xive_read_eq(q->qpage, q->msk, &idx, &toggle);
+		if (irq > XICS_IPI)
+			xive_pre_save_set_queued(xive, irq);
+	} while(irq);
+}
+
+static void xive_pre_save_scan(struct kvmppc_xive *xive)
+{
+	struct kvm_vcpu *vcpu = NULL;
+	unsigned long i;
+	int j;
+
+	/*
+	 * See comment in xive_get_source() about how this
+	 * work. Collect a stable state for all interrupts
+	 */
+	for (i = 0; i <= xive->max_sbid; i++) {
+		struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+		if (!sb)
+			continue;
+		for (j = 0;  j < KVMPPC_XICS_IRQ_PER_ICS; j++)
+			xive_pre_save_mask_irq(xive, sb, j);
+	}
+
+	/* Then scan the queues and update the "in_queue" flag */
+	kvm_for_each_vcpu(i, vcpu, xive->kvm) {
+		struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+		if (!xc)
+			continue;
+		for (j = 0; j < KVMPPC_XIVE_Q_COUNT; j++) {
+			if (xc->queues[j].qpage)
+				xive_pre_save_queue(xive, &xc->queues[j]);
+		}
+	}
+
+	/* Finally restore interrupt states */
+	for (i = 0; i <= xive->max_sbid; i++) {
+		struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+		if (!sb)
+			continue;
+		for (j = 0;  j < KVMPPC_XICS_IRQ_PER_ICS; j++)
+			xive_pre_save_unmask_irq(xive, sb, j);
+	}
+}
+
+static void xive_post_save_scan(struct kvmppc_xive *xive)
+{
+	u32 i, j;
+
+	/* Clear all the in_queue flags */
+	for (i = 0; i <= xive->max_sbid; i++) {
+		struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+		if (!sb)
+			continue;
+		for (j = 0;  j < KVMPPC_XICS_IRQ_PER_ICS; j++)
+			sb->irq_state[j].in_queue = false;
+	}
+
+	/* Next get_source() will do a new scan */
+	xive->saved_src_count = 0;
+}
+
+/*
+ * This returns the source configuration and state to user space.
+ */
+static int xive_get_source(struct kvmppc_xive *xive, long irq, u64 addr)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u64 val, prio;
+	u16 idx;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return -ENOENT;
+
+	state = &sb->irq_state[idx];
+
+	if (!state->valid)
+		return -ENOENT;
+
+	pr_devel("get_source(%ld)...\n", irq);
+
+	/*
+	 * So to properly save the state into something that looks like a
+	 * XICS migration stream we cannot treat interrupts individually.
+	 *
+	 * We need, instead, mask them all (& save their previous PQ state)
+	 * to get a stable state in the HW, then sync them to ensure that
+	 * any interrupt that had already fired hits its queue, and finally
+	 * scan all the queues to collect which interrupts are still present
+	 * in the queues, so we can set the "pending" flag on them and
+	 * they can be resent on restore.
+	 *
+	 * So we do it all when the "first" interrupt gets saved, all the
+	 * state is collected at that point, the rest of xive_get_source()
+	 * will merely collect and convert that state to the expected
+	 * userspace bit mask.
+	 */
+	if (xive->saved_src_count == 0)
+		xive_pre_save_scan(xive);
+	xive->saved_src_count++;
+
+	/* Convert saved state into something compatible with xics */
+	val = state->act_server;
+	prio = state->saved_scan_prio;
+
+	if (prio == MASKED) {
+		val |= KVM_XICS_MASKED;
+		prio = state->saved_priority;
+	}
+	val |= prio << KVM_XICS_PRIORITY_SHIFT;
+	if (state->lsi) {
+		val |= KVM_XICS_LEVEL_SENSITIVE;
+		if (state->saved_p)
+			val |= KVM_XICS_PENDING;
+	} else {
+		if (state->saved_p)
+			val |= KVM_XICS_PRESENTED;
+
+		if (state->saved_q)
+			val |= KVM_XICS_QUEUED;
+
+		/*
+		 * We mark it pending (which will attempt a re-delivery)
+		 * if we are in a queue *or* we were masked and had
+		 * Q set which is equivalent to the XICS "masked pending"
+		 * state
+		 */
+		if (state->in_queue || (prio == MASKED && state->saved_q))
+			val |= KVM_XICS_PENDING;
+	}
+
+	/*
+	 * If that was the last interrupt saved, reset the
+	 * in_queue flags
+	 */
+	if (xive->saved_src_count == xive->src_count)
+		xive_post_save_scan(xive);
+
+	/* Copy the result to userspace */
+	if (put_user(val, ubufp))
+		return -EFAULT;
+
+	return 0;
+}
+
+struct kvmppc_xive_src_block *kvmppc_xive_create_src_block(
+	struct kvmppc_xive *xive, int irq)
+{
+	struct kvmppc_xive_src_block *sb;
+	int i, bid;
+
+	bid = irq >> KVMPPC_XICS_ICS_SHIFT;
+
+	mutex_lock(&xive->lock);
+
+	/* block already exists - somebody else got here first */
+	if (xive->src_blocks[bid])
+		goto out;
+
+	/* Create the ICS */
+	sb = kzalloc(sizeof(*sb), GFP_KERNEL);
+	if (!sb)
+		goto out;
+
+	sb->id = bid;
+
+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+		sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i;
+		sb->irq_state[i].eisn = 0;
+		sb->irq_state[i].guest_priority = MASKED;
+		sb->irq_state[i].saved_priority = MASKED;
+		sb->irq_state[i].act_priority = MASKED;
+	}
+	smp_wmb();
+	xive->src_blocks[bid] = sb;
+
+	if (bid > xive->max_sbid)
+		xive->max_sbid = bid;
+
+out:
+	mutex_unlock(&xive->lock);
+	return xive->src_blocks[bid];
+}
+
+static bool xive_check_delayed_irq(struct kvmppc_xive *xive, u32 irq)
+{
+	struct kvm *kvm = xive->kvm;
+	struct kvm_vcpu *vcpu = NULL;
+	unsigned long i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+		if (!xc)
+			continue;
+
+		if (xc->delayed_irq == irq) {
+			xc->delayed_irq = 0;
+			xive->delayed_irqs--;
+			return true;
+		}
+	}
+	return false;
+}
+
+static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u16 idx;
+	u64 val;
+	u8 act_prio, guest_prio;
+	u32 server;
+	int rc = 0;
+
+	if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
+		return -ENOENT;
+
+	pr_devel("set_source(irq=0x%lx)\n", irq);
+
+	/* Find the source */
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb) {
+		pr_devel("No source, creating source block...\n");
+		sb = kvmppc_xive_create_src_block(xive, irq);
+		if (!sb) {
+			pr_devel("Failed to create block...\n");
+			return -ENOMEM;
+		}
+	}
+	state = &sb->irq_state[idx];
+
+	/* Read user passed data */
+	if (get_user(val, ubufp)) {
+		pr_devel("fault getting user info !\n");
+		return -EFAULT;
+	}
+
+	server = val & KVM_XICS_DESTINATION_MASK;
+	guest_prio = val >> KVM_XICS_PRIORITY_SHIFT;
+
+	pr_devel("  val=0x016%llx (server=0x%x, guest_prio=%d)\n",
+		 val, server, guest_prio);
+
+	/*
+	 * If the source doesn't already have an IPI, allocate
+	 * one and get the corresponding data
+	 */
+	if (!state->ipi_number) {
+		state->ipi_number = xive_native_alloc_irq();
+		if (state->ipi_number == 0) {
+			pr_devel("Failed to allocate IPI !\n");
+			return -ENOMEM;
+		}
+		xive_native_populate_irq_data(state->ipi_number, &state->ipi_data);
+		pr_devel(" src_ipi=0x%x\n", state->ipi_number);
+	}
+
+	/*
+	 * We use lock_and_mask() to set us in the right masked
+	 * state. We will override that state from the saved state
+	 * further down, but this will handle the cases of interrupts
+	 * that need FW masking. We set the initial guest_priority to
+	 * 0 before calling it to ensure it actually performs the masking.
+	 */
+	state->guest_priority = 0;
+	xive_lock_and_mask(xive, sb, state);
+
+	/*
+	 * Now, we select a target if we have one. If we don't we
+	 * leave the interrupt untargetted. It means that an interrupt
+	 * can become "untargetted" across migration if it was masked
+	 * by set_xive() but there is little we can do about it.
+	 */
+
+	/* First convert prio and mark interrupt as untargetted */
+	act_prio = xive_prio_from_guest(guest_prio);
+	state->act_priority = MASKED;
+
+	/*
+	 * We need to drop the lock due to the mutex below. Hopefully
+	 * nothing is touching that interrupt yet since it hasn't been
+	 * advertized to a running guest yet
+	 */
+	arch_spin_unlock(&sb->lock);
+
+	/* If we have a priority target the interrupt */
+	if (act_prio != MASKED) {
+		/* First, check provisioning of queues */
+		mutex_lock(&xive->lock);
+		rc = xive_check_provisioning(xive->kvm, act_prio);
+		mutex_unlock(&xive->lock);
+
+		/* Target interrupt */
+		if (rc == 0)
+			rc = xive_target_interrupt(xive->kvm, state,
+						   server, act_prio);
+		/*
+		 * If provisioning or targetting failed, leave it
+		 * alone and masked. It will remain disabled until
+		 * the guest re-targets it.
+		 */
+	}
+
+	/*
+	 * Find out if this was a delayed irq stashed in an ICP,
+	 * in which case, treat it as pending
+	 */
+	if (xive->delayed_irqs && xive_check_delayed_irq(xive, irq)) {
+		val |= KVM_XICS_PENDING;
+		pr_devel("  Found delayed ! forcing PENDING !\n");
+	}
+
+	/* Cleanup the SW state */
+	state->old_p = false;
+	state->old_q = false;
+	state->lsi = false;
+	state->asserted = false;
+
+	/* Restore LSI state */
+	if (val & KVM_XICS_LEVEL_SENSITIVE) {
+		state->lsi = true;
+		if (val & KVM_XICS_PENDING)
+			state->asserted = true;
+		pr_devel("  LSI ! Asserted=%d\n", state->asserted);
+	}
+
+	/*
+	 * Restore P and Q. If the interrupt was pending, we
+	 * force Q and !P, which will trigger a resend.
+	 *
+	 * That means that a guest that had both an interrupt
+	 * pending (queued) and Q set will restore with only
+	 * one instance of that interrupt instead of 2, but that
+	 * is perfectly fine as coalescing interrupts that haven't
+	 * been presented yet is always allowed.
+	 */
+	if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
+		state->old_p = true;
+	if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
+		state->old_q = true;
+
+	pr_devel("  P=%d, Q=%d\n", state->old_p, state->old_q);
+
+	/*
+	 * If the interrupt was unmasked, update guest priority and
+	 * perform the appropriate state transition and do a
+	 * re-trigger if necessary.
+	 */
+	if (val & KVM_XICS_MASKED) {
+		pr_devel("  masked, saving prio\n");
+		state->guest_priority = MASKED;
+		state->saved_priority = guest_prio;
+	} else {
+		pr_devel("  unmasked, restoring to prio %d\n", guest_prio);
+		xive_finish_unmask(xive, sb, state, guest_prio);
+		state->saved_priority = guest_prio;
+	}
+
+	/* Increment the number of valid sources and mark this one valid */
+	if (!state->valid)
+		xive->src_count++;
+	state->valid = true;
+
+	return 0;
+}
+
+int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+			bool line_status)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u16 idx;
+
+	if (!xive)
+		return -ENODEV;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb)
+		return -EINVAL;
+
+	/* Perform locklessly .... (we need to do some RCUisms here...) */
+	state = &sb->irq_state[idx];
+	if (!state->valid)
+		return -EINVAL;
+
+	/* We don't allow a trigger on a passed-through interrupt */
+	if (state->pt_number)
+		return -EINVAL;
+
+	if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL)
+		state->asserted = true;
+	else if (level == 0 || level == KVM_INTERRUPT_UNSET) {
+		state->asserted = false;
+		return 0;
+	}
+
+	/* Trigger the IPI */
+	xive_irq_trigger(&state->ipi_data);
+
+	return 0;
+}
+
+int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr)
+{
+	u32 __user *ubufp = (u32 __user *) addr;
+	u32 nr_servers;
+	int rc = 0;
+
+	if (get_user(nr_servers, ubufp))
+		return -EFAULT;
+
+	pr_devel("%s nr_servers=%u\n", __func__, nr_servers);
+
+	if (!nr_servers || nr_servers > KVM_MAX_VCPU_IDS)
+		return -EINVAL;
+
+	mutex_lock(&xive->lock);
+	if (xive->vp_base != XIVE_INVALID_VP)
+		/* The VP block is allocated once and freed when the device
+		 * is released. Better not allow to change its size since its
+		 * used by connect_vcpu to validate vCPU ids are valid (eg,
+		 * setting it back to a higher value could allow connect_vcpu
+		 * to come up with a VP id that goes beyond the VP block, which
+		 * is likely to cause a crash in OPAL).
+		 */
+		rc = -EBUSY;
+	else if (nr_servers > KVM_MAX_VCPUS)
+		/* We don't need more servers. Higher vCPU ids get packed
+		 * down below KVM_MAX_VCPUS by kvmppc_pack_vcpu_id().
+		 */
+		xive->nr_servers = KVM_MAX_VCPUS;
+	else
+		xive->nr_servers = nr_servers;
+
+	mutex_unlock(&xive->lock);
+
+	return rc;
+}
+
+static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct kvmppc_xive *xive = dev->private;
+
+	/* We honor the existing XICS ioctl */
+	switch (attr->group) {
+	case KVM_DEV_XICS_GRP_SOURCES:
+		return xive_set_source(xive, attr->attr, attr->addr);
+	case KVM_DEV_XICS_GRP_CTRL:
+		switch (attr->attr) {
+		case KVM_DEV_XICS_NR_SERVERS:
+			return kvmppc_xive_set_nr_servers(xive, attr->addr);
+		}
+	}
+	return -ENXIO;
+}
+
+static int xive_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct kvmppc_xive *xive = dev->private;
+
+	/* We honor the existing XICS ioctl */
+	switch (attr->group) {
+	case KVM_DEV_XICS_GRP_SOURCES:
+		return xive_get_source(xive, attr->attr, attr->addr);
+	}
+	return -ENXIO;
+}
+
+static int xive_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	/* We honor the same limits as XICS, at least for now */
+	switch (attr->group) {
+	case KVM_DEV_XICS_GRP_SOURCES:
+		if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
+		    attr->attr < KVMPPC_XICS_NR_IRQS)
+			return 0;
+		break;
+	case KVM_DEV_XICS_GRP_CTRL:
+		switch (attr->attr) {
+		case KVM_DEV_XICS_NR_SERVERS:
+			return 0;
+		}
+	}
+	return -ENXIO;
+}
+
+static void kvmppc_xive_cleanup_irq(u32 hw_num, struct xive_irq_data *xd)
+{
+	xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
+	xive_native_configure_irq(hw_num, 0, MASKED, 0);
+}
+
+void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb)
+{
+	int i;
+
+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+		struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
+
+		if (!state->valid)
+			continue;
+
+		kvmppc_xive_cleanup_irq(state->ipi_number, &state->ipi_data);
+		xive_cleanup_irq_data(&state->ipi_data);
+		xive_native_free_irq(state->ipi_number);
+
+		/* Pass-through, cleanup too but keep IRQ hw data */
+		if (state->pt_number)
+			kvmppc_xive_cleanup_irq(state->pt_number, state->pt_data);
+
+		state->valid = false;
+	}
+}
+
+/*
+ * Called when device fd is closed.  kvm->lock is held.
+ */
+static void kvmppc_xive_release(struct kvm_device *dev)
+{
+	struct kvmppc_xive *xive = dev->private;
+	struct kvm *kvm = xive->kvm;
+	struct kvm_vcpu *vcpu;
+	unsigned long i;
+
+	pr_devel("Releasing xive device\n");
+
+	/*
+	 * Since this is the device release function, we know that
+	 * userspace does not have any open fd referring to the
+	 * device.  Therefore there can not be any of the device
+	 * attribute set/get functions being executed concurrently,
+	 * and similarly, the connect_vcpu and set/clr_mapped
+	 * functions also cannot be being executed.
+	 */
+
+	debugfs_remove(xive->dentry);
+
+	/*
+	 * We should clean up the vCPU interrupt presenters first.
+	 */
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		/*
+		 * Take vcpu->mutex to ensure that no one_reg get/set ioctl
+		 * (i.e. kvmppc_xive_[gs]et_icp) can be done concurrently.
+		 * Holding the vcpu->mutex also means that the vcpu cannot
+		 * be executing the KVM_RUN ioctl, and therefore it cannot
+		 * be executing the XIVE push or pull code or accessing
+		 * the XIVE MMIO regions.
+		 */
+		mutex_lock(&vcpu->mutex);
+		kvmppc_xive_cleanup_vcpu(vcpu);
+		mutex_unlock(&vcpu->mutex);
+	}
+
+	/*
+	 * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
+	 * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
+	 * against xive code getting called during vcpu execution or
+	 * set/get one_reg operations.
+	 */
+	kvm->arch.xive = NULL;
+
+	/* Mask and free interrupts */
+	for (i = 0; i <= xive->max_sbid; i++) {
+		if (xive->src_blocks[i])
+			kvmppc_xive_free_sources(xive->src_blocks[i]);
+		kfree(xive->src_blocks[i]);
+		xive->src_blocks[i] = NULL;
+	}
+
+	if (xive->vp_base != XIVE_INVALID_VP)
+		xive_native_free_vp_block(xive->vp_base);
+
+	/*
+	 * A reference of the kvmppc_xive pointer is now kept under
+	 * the xive_devices struct of the machine for reuse. It is
+	 * freed when the VM is destroyed for now until we fix all the
+	 * execution paths.
+	 */
+
+	kfree(dev);
+}
+
+/*
+ * When the guest chooses the interrupt mode (XICS legacy or XIVE
+ * native), the VM will switch of KVM device. The previous device will
+ * be "released" before the new one is created.
+ *
+ * Until we are sure all execution paths are well protected, provide a
+ * fail safe (transitional) method for device destruction, in which
+ * the XIVE device pointer is recycled and not directly freed.
+ */
+struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type)
+{
+	struct kvmppc_xive **kvm_xive_device = type == KVM_DEV_TYPE_XIVE ?
+		&kvm->arch.xive_devices.native :
+		&kvm->arch.xive_devices.xics_on_xive;
+	struct kvmppc_xive *xive = *kvm_xive_device;
+
+	if (!xive) {
+		xive = kzalloc(sizeof(*xive), GFP_KERNEL);
+		*kvm_xive_device = xive;
+	} else {
+		memset(xive, 0, sizeof(*xive));
+	}
+
+	return xive;
+}
+
+/*
+ * Create a XICS device with XIVE backend.  kvm->lock is held.
+ */
+static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
+{
+	struct kvmppc_xive *xive;
+	struct kvm *kvm = dev->kvm;
+
+	pr_devel("Creating xive for partition\n");
+
+	/* Already there ? */
+	if (kvm->arch.xive)
+		return -EEXIST;
+
+	xive = kvmppc_xive_get_device(kvm, type);
+	if (!xive)
+		return -ENOMEM;
+
+	dev->private = xive;
+	xive->dev = dev;
+	xive->kvm = kvm;
+	mutex_init(&xive->lock);
+
+	/* We use the default queue size set by the host */
+	xive->q_order = xive_native_default_eq_shift();
+	if (xive->q_order < PAGE_SHIFT)
+		xive->q_page_order = 0;
+	else
+		xive->q_page_order = xive->q_order - PAGE_SHIFT;
+
+	/* VP allocation is delayed to the first call to connect_vcpu */
+	xive->vp_base = XIVE_INVALID_VP;
+	/* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
+	 * on a POWER9 system.
+	 */
+	xive->nr_servers = KVM_MAX_VCPUS;
+
+	if (xive_native_has_single_escalation())
+		xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
+
+	if (xive_native_has_save_restore())
+		xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE;
+
+	kvm->arch.xive = xive;
+	return 0;
+}
+
+int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+	/* The VM should have configured XICS mode before doing XICS hcalls. */
+	if (!kvmppc_xics_enabled(vcpu))
+		return H_TOO_HARD;
+
+	switch (req) {
+	case H_XIRR:
+		return xive_vm_h_xirr(vcpu);
+	case H_CPPR:
+		return xive_vm_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
+	case H_EOI:
+		return xive_vm_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
+	case H_IPI:
+		return xive_vm_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
+					  kvmppc_get_gpr(vcpu, 5));
+	case H_IPOLL:
+		return xive_vm_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
+	case H_XIRR_X:
+		xive_vm_h_xirr(vcpu);
+		kvmppc_set_gpr(vcpu, 5, get_tb() + vc->tb_offset);
+		return H_SUCCESS;
+	}
+
+	return H_UNSUPPORTED;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_xics_hcall);
+
+int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	unsigned int i;
+
+	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+		struct xive_q *q = &xc->queues[i];
+		u32 i0, i1, idx;
+
+		if (!q->qpage && !xc->esc_virq[i])
+			continue;
+
+		if (q->qpage) {
+			seq_printf(m, "    q[%d]: ", i);
+			idx = q->idx;
+			i0 = be32_to_cpup(q->qpage + idx);
+			idx = (idx + 1) & q->msk;
+			i1 = be32_to_cpup(q->qpage + idx);
+			seq_printf(m, "T=%d %08x %08x...\n", q->toggle,
+				   i0, i1);
+		}
+		if (xc->esc_virq[i]) {
+			struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
+			struct xive_irq_data *xd =
+				irq_data_get_irq_handler_data(d);
+			u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
+
+			seq_printf(m, "    ESC %d %c%c EOI @%llx",
+				   xc->esc_virq[i],
+				   (pq & XIVE_ESB_VAL_P) ? 'P' : '-',
+				   (pq & XIVE_ESB_VAL_Q) ? 'Q' : '-',
+				   xd->eoi_page);
+			seq_puts(m, "\n");
+		}
+	}
+	return 0;
+}
+
+void kvmppc_xive_debug_show_sources(struct seq_file *m,
+				    struct kvmppc_xive_src_block *sb)
+{
+	int i;
+
+	seq_puts(m, "    LISN      HW/CHIP   TYPE    PQ      EISN    CPU/PRIO\n");
+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+		struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
+		struct xive_irq_data *xd;
+		u64 pq;
+		u32 hw_num;
+
+		if (!state->valid)
+			continue;
+
+		kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+		pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
+
+		seq_printf(m, "%08x  %08x/%02x", state->number, hw_num,
+			   xd->src_chip);
+		if (state->lsi)
+			seq_printf(m, " %cLSI", state->asserted ? '^' : ' ');
+		else
+			seq_puts(m, "  MSI");
+
+		seq_printf(m, " %s  %c%c  %08x   % 4d/%d",
+			   state->ipi_number == hw_num ? "IPI" : " PT",
+			   pq & XIVE_ESB_VAL_P ? 'P' : '-',
+			   pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
+			   state->eisn, state->act_server,
+			   state->act_priority);
+
+		seq_puts(m, "\n");
+	}
+}
+
+static int xive_debug_show(struct seq_file *m, void *private)
+{
+	struct kvmppc_xive *xive = m->private;
+	struct kvm *kvm = xive->kvm;
+	struct kvm_vcpu *vcpu;
+	u64 t_rm_h_xirr = 0;
+	u64 t_rm_h_ipoll = 0;
+	u64 t_rm_h_cppr = 0;
+	u64 t_rm_h_eoi = 0;
+	u64 t_rm_h_ipi = 0;
+	u64 t_vm_h_xirr = 0;
+	u64 t_vm_h_ipoll = 0;
+	u64 t_vm_h_cppr = 0;
+	u64 t_vm_h_eoi = 0;
+	u64 t_vm_h_ipi = 0;
+	unsigned long i;
+
+	if (!kvm)
+		return 0;
+
+	seq_puts(m, "=========\nVCPU state\n=========\n");
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+		if (!xc)
+			continue;
+
+		seq_printf(m, "VCPU %d: VP:%#x/%02x\n"
+			 "    CPPR:%#x HWCPPR:%#x MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n",
+			 xc->server_num, xc->vp_id, xc->vp_chip_id,
+			 xc->cppr, xc->hw_cppr,
+			 xc->mfrr, xc->pending,
+			 xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
+
+		kvmppc_xive_debug_show_queues(m, vcpu);
+
+		t_rm_h_xirr += xc->stat_rm_h_xirr;
+		t_rm_h_ipoll += xc->stat_rm_h_ipoll;
+		t_rm_h_cppr += xc->stat_rm_h_cppr;
+		t_rm_h_eoi += xc->stat_rm_h_eoi;
+		t_rm_h_ipi += xc->stat_rm_h_ipi;
+		t_vm_h_xirr += xc->stat_vm_h_xirr;
+		t_vm_h_ipoll += xc->stat_vm_h_ipoll;
+		t_vm_h_cppr += xc->stat_vm_h_cppr;
+		t_vm_h_eoi += xc->stat_vm_h_eoi;
+		t_vm_h_ipi += xc->stat_vm_h_ipi;
+	}
+
+	seq_puts(m, "Hcalls totals\n");
+	seq_printf(m, " H_XIRR  R=%10lld V=%10lld\n", t_rm_h_xirr, t_vm_h_xirr);
+	seq_printf(m, " H_IPOLL R=%10lld V=%10lld\n", t_rm_h_ipoll, t_vm_h_ipoll);
+	seq_printf(m, " H_CPPR  R=%10lld V=%10lld\n", t_rm_h_cppr, t_vm_h_cppr);
+	seq_printf(m, " H_EOI   R=%10lld V=%10lld\n", t_rm_h_eoi, t_vm_h_eoi);
+	seq_printf(m, " H_IPI   R=%10lld V=%10lld\n", t_rm_h_ipi, t_vm_h_ipi);
+
+	seq_puts(m, "=========\nSources\n=========\n");
+
+	for (i = 0; i <= xive->max_sbid; i++) {
+		struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+
+		if (sb) {
+			arch_spin_lock(&sb->lock);
+			kvmppc_xive_debug_show_sources(m, sb);
+			arch_spin_unlock(&sb->lock);
+		}
+	}
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(xive_debug);
+
+static void xive_debugfs_init(struct kvmppc_xive *xive)
+{
+	xive->dentry = debugfs_create_file("xive", S_IRUGO, xive->kvm->debugfs_dentry,
+					   xive, &xive_debug_fops);
+
+	pr_debug("%s: created\n", __func__);
+}
+
+static void kvmppc_xive_init(struct kvm_device *dev)
+{
+	struct kvmppc_xive *xive = dev->private;
+
+	/* Register some debug interfaces */
+	xive_debugfs_init(xive);
+}
+
+struct kvm_device_ops kvm_xive_ops = {
+	.name = "kvm-xive",
+	.create = kvmppc_xive_create,
+	.init = kvmppc_xive_init,
+	.release = kvmppc_xive_release,
+	.set_attr = xive_set_attr,
+	.get_attr = xive_get_attr,
+	.has_attr = xive_has_attr,
+};
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
new file mode 100644
index 0000000000..62bf39f537
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -0,0 +1,313 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
+ */
+
+#ifndef _KVM_PPC_BOOK3S_XIVE_H
+#define _KVM_PPC_BOOK3S_XIVE_H
+
+#ifdef CONFIG_KVM_XICS
+#include "book3s_xics.h"
+
+/*
+ * The XIVE Interrupt source numbers are within the range 0 to
+ * KVMPPC_XICS_NR_IRQS.
+ */
+#define KVMPPC_XIVE_FIRST_IRQ	0
+#define KVMPPC_XIVE_NR_IRQS	KVMPPC_XICS_NR_IRQS
+
+/*
+ * State for one guest irq source.
+ *
+ * For each guest source we allocate a HW interrupt in the XIVE
+ * which we use for all SW triggers. It will be unused for
+ * pass-through but it's easier to keep around as the same
+ * guest interrupt can alternatively be emulated or pass-through
+ * if a physical device is hot unplugged and replaced with an
+ * emulated one.
+ *
+ * This state structure is very similar to the XICS one with
+ * additional XIVE specific tracking.
+ */
+struct kvmppc_xive_irq_state {
+	bool valid;			/* Interrupt entry is valid */
+
+	u32 number;			/* Guest IRQ number */
+	u32 ipi_number;			/* XIVE IPI HW number */
+	struct xive_irq_data ipi_data;	/* XIVE IPI associated data */
+	u32 pt_number;			/* XIVE Pass-through number if any */
+	struct xive_irq_data *pt_data;	/* XIVE Pass-through associated data */
+
+	/* Targetting as set by guest */
+	u8 guest_priority;		/* Guest set priority */
+	u8 saved_priority;		/* Saved priority when masking */
+
+	/* Actual targetting */
+	u32 act_server;			/* Actual server */
+	u8 act_priority;		/* Actual priority */
+
+	/* Various state bits */
+	bool in_eoi;			/* Synchronize with H_EOI */
+	bool old_p;			/* P bit state when masking */
+	bool old_q;			/* Q bit state when masking */
+	bool lsi;			/* level-sensitive interrupt */
+	bool asserted;			/* Only for emulated LSI: current state */
+
+	/* Saved for migration state */
+	bool in_queue;
+	bool saved_p;
+	bool saved_q;
+	u8 saved_scan_prio;
+
+	/* Xive native */
+	u32 eisn;			/* Guest Effective IRQ number */
+};
+
+/* Select the "right" interrupt (IPI vs. passthrough) */
+static inline void kvmppc_xive_select_irq(struct kvmppc_xive_irq_state *state,
+					  u32 *out_hw_irq,
+					  struct xive_irq_data **out_xd)
+{
+	if (state->pt_number) {
+		if (out_hw_irq)
+			*out_hw_irq = state->pt_number;
+		if (out_xd)
+			*out_xd = state->pt_data;
+	} else {
+		if (out_hw_irq)
+			*out_hw_irq = state->ipi_number;
+		if (out_xd)
+			*out_xd = &state->ipi_data;
+	}
+}
+
+/*
+ * This corresponds to an "ICS" in XICS terminology, we use it
+ * as a mean to break up source information into multiple structures.
+ */
+struct kvmppc_xive_src_block {
+	arch_spinlock_t lock;
+	u16 id;
+	struct kvmppc_xive_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
+};
+
+struct kvmppc_xive;
+
+struct kvmppc_xive_ops {
+	int (*reset_mapped)(struct kvm *kvm, unsigned long guest_irq);
+};
+
+#define KVMPPC_XIVE_FLAG_SINGLE_ESCALATION 0x1
+#define KVMPPC_XIVE_FLAG_SAVE_RESTORE 0x2
+
+struct kvmppc_xive {
+	struct kvm *kvm;
+	struct kvm_device *dev;
+	struct dentry *dentry;
+
+	/* VP block associated with the VM */
+	u32	vp_base;
+
+	/* Blocks of sources */
+	struct kvmppc_xive_src_block *src_blocks[KVMPPC_XICS_MAX_ICS_ID + 1];
+	u32	max_sbid;
+
+	/*
+	 * For state save, we lazily scan the queues on the first interrupt
+	 * being migrated. We don't have a clean way to reset that flags
+	 * so we keep track of the number of valid sources and how many of
+	 * them were migrated so we can reset when all of them have been
+	 * processed.
+	 */
+	u32	src_count;
+	u32	saved_src_count;
+
+	/*
+	 * Some irqs are delayed on restore until the source is created,
+	 * keep track here of how many of them
+	 */
+	u32	delayed_irqs;
+
+	/* Which queues (priorities) are in use by the guest */
+	u8	qmap;
+
+	/* Queue orders */
+	u32	q_order;
+	u32	q_page_order;
+
+	/* Flags */
+	u8	flags;
+
+	/* Number of entries in the VP block */
+	u32	nr_servers;
+
+	struct kvmppc_xive_ops *ops;
+	struct address_space   *mapping;
+	struct mutex mapping_lock;
+	struct mutex lock;
+};
+
+#define KVMPPC_XIVE_Q_COUNT	8
+
+struct kvmppc_xive_vcpu {
+	struct kvmppc_xive	*xive;
+	struct kvm_vcpu		*vcpu;
+	bool			valid;
+
+	/* Server number. This is the HW CPU ID from a guest perspective */
+	u32			server_num;
+
+	/*
+	 * HW VP corresponding to this VCPU. This is the base of the VP
+	 * block plus the server number.
+	 */
+	u32			vp_id;
+	u32			vp_chip_id;
+	u32			vp_cam;
+
+	/* IPI used for sending ... IPIs */
+	u32			vp_ipi;
+	struct xive_irq_data	vp_ipi_data;
+
+	/* Local emulation state */
+	uint8_t			cppr;	/* guest CPPR */
+	uint8_t			hw_cppr;/* Hardware CPPR */
+	uint8_t			mfrr;
+	uint8_t			pending;
+
+	/* Each VP has 8 queues though we only provision some */
+	struct xive_q		queues[KVMPPC_XIVE_Q_COUNT];
+	u32			esc_virq[KVMPPC_XIVE_Q_COUNT];
+	char			*esc_virq_names[KVMPPC_XIVE_Q_COUNT];
+
+	/* Stash a delayed irq on restore from migration (see set_icp) */
+	u32			delayed_irq;
+
+	/* Stats */
+	u64			stat_rm_h_xirr;
+	u64			stat_rm_h_ipoll;
+	u64			stat_rm_h_cppr;
+	u64			stat_rm_h_eoi;
+	u64			stat_rm_h_ipi;
+	u64			stat_vm_h_xirr;
+	u64			stat_vm_h_ipoll;
+	u64			stat_vm_h_cppr;
+	u64			stat_vm_h_eoi;
+	u64			stat_vm_h_ipi;
+};
+
+static inline struct kvm_vcpu *kvmppc_xive_find_server(struct kvm *kvm, u32 nr)
+{
+	struct kvm_vcpu *vcpu = NULL;
+	unsigned long i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (vcpu->arch.xive_vcpu && nr == vcpu->arch.xive_vcpu->server_num)
+			return vcpu;
+	}
+	return NULL;
+}
+
+static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmppc_xive *xive,
+		u32 irq, u16 *source)
+{
+	u32 bid = irq >> KVMPPC_XICS_ICS_SHIFT;
+	u16 src = irq & KVMPPC_XICS_SRC_MASK;
+
+	if (source)
+		*source = src;
+	if (bid > KVMPPC_XICS_MAX_ICS_ID)
+		return NULL;
+	return xive->src_blocks[bid];
+}
+
+/*
+ * When the XIVE resources are allocated at the HW level, the VP
+ * structures describing the vCPUs of a guest are distributed among
+ * the chips to optimize the PowerBUS usage. For best performance, the
+ * guest vCPUs can be pinned to match the VP structure distribution.
+ *
+ * Currently, the VP identifiers are deduced from the vCPU id using
+ * the kvmppc_pack_vcpu_id() routine which is not incorrect but not
+ * optimal either. It VSMT is used, the result is not continuous and
+ * the constraints on HW resources described above can not be met.
+ */
+static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server)
+{
+	return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
+}
+
+static inline bool kvmppc_xive_vp_in_use(struct kvm *kvm, u32 vp_id)
+{
+	struct kvm_vcpu *vcpu = NULL;
+	unsigned long i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (vcpu->arch.xive_vcpu && vp_id == vcpu->arch.xive_vcpu->vp_id)
+			return true;
+	}
+	return false;
+}
+
+/*
+ * Mapping between guest priorities and host priorities
+ * is as follow.
+ *
+ * Guest request for 0...6 are honored. Guest request for anything
+ * higher results in a priority of 6 being applied.
+ *
+ * Similar mapping is done for CPPR values
+ */
+static inline u8 xive_prio_from_guest(u8 prio)
+{
+	if (prio == 0xff || prio < 6)
+		return prio;
+	return 6;
+}
+
+static inline u8 xive_prio_to_guest(u8 prio)
+{
+	return prio;
+}
+
+static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
+{
+	u32 cur;
+
+	if (!qpage)
+		return 0;
+	cur = be32_to_cpup(qpage + *idx);
+	if ((cur >> 31) == *toggle)
+		return 0;
+	*idx = (*idx + 1) & msk;
+	if (*idx == 0)
+		(*toggle) ^= 1;
+	return cur & 0x7fffffff;
+}
+
+/*
+ * Common Xive routines for XICS-over-XIVE and XIVE native
+ */
+void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu);
+int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu);
+void kvmppc_xive_debug_show_sources(struct seq_file *m,
+				    struct kvmppc_xive_src_block *sb);
+struct kvmppc_xive_src_block *kvmppc_xive_create_src_block(
+	struct kvmppc_xive *xive, int irq);
+void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb);
+int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio);
+int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
+				  bool single_escalation);
+struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
+void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq);
+int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp);
+int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr);
+bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu);
+
+static inline bool kvmppc_xive_has_single_escalation(struct kvmppc_xive *xive)
+{
+	return xive->flags & KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
+}
+
+#endif /* CONFIG_KVM_XICS */
+#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
new file mode 100644
index 0000000000..712ab91ced
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -0,0 +1,1284 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017-2019, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "xive-kvm: " fmt
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/file.h>
+#include <linux/irqdomain.h>
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/debug.h>
+#include <asm/opal.h>
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "book3s_xive.h"
+
+static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
+{
+	u64 val;
+
+	/*
+	 * The KVM XIVE native device does not use the XIVE_ESB_SET_PQ_10
+	 * load operation, so there is no need to enforce load-after-store
+	 * ordering.
+	 */
+
+	val = in_be64(xd->eoi_mmio + offset);
+	return (u8)val;
+}
+
+static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct xive_q *q = &xc->queues[prio];
+
+	xive_native_disable_queue(xc->vp_id, q, prio);
+	if (q->qpage) {
+		put_page(virt_to_page(q->qpage));
+		q->qpage = NULL;
+	}
+}
+
+static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q,
+					      u8 prio, __be32 *qpage,
+					      u32 order, bool can_escalate)
+{
+	int rc;
+	__be32 *qpage_prev = q->qpage;
+
+	rc = xive_native_configure_queue(vp_id, q, prio, qpage, order,
+					 can_escalate);
+	if (rc)
+		return rc;
+
+	if (qpage_prev)
+		put_page(virt_to_page(qpage_prev));
+
+	return rc;
+}
+
+void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	int i;
+
+	if (!kvmppc_xive_enabled(vcpu))
+		return;
+
+	if (!xc)
+		return;
+
+	pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
+
+	/* Ensure no interrupt is still routed to that VP */
+	xc->valid = false;
+	kvmppc_xive_disable_vcpu_interrupts(vcpu);
+
+	/* Free escalations */
+	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+		/* Free the escalation irq */
+		if (xc->esc_virq[i]) {
+			if (kvmppc_xive_has_single_escalation(xc->xive))
+				xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]);
+			free_irq(xc->esc_virq[i], vcpu);
+			irq_dispose_mapping(xc->esc_virq[i]);
+			kfree(xc->esc_virq_names[i]);
+			xc->esc_virq[i] = 0;
+		}
+	}
+
+	/* Disable the VP */
+	xive_native_disable_vp(xc->vp_id);
+
+	/* Clear the cam word so guest entry won't try to push context */
+	vcpu->arch.xive_cam_word = 0;
+
+	/* Free the queues */
+	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+		kvmppc_xive_native_cleanup_queue(vcpu, i);
+	}
+
+	/* Free the VP */
+	kfree(xc);
+
+	/* Cleanup the vcpu */
+	vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
+	vcpu->arch.xive_vcpu = NULL;
+}
+
+int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+				    struct kvm_vcpu *vcpu, u32 server_num)
+{
+	struct kvmppc_xive *xive = dev->private;
+	struct kvmppc_xive_vcpu *xc = NULL;
+	int rc;
+	u32 vp_id;
+
+	pr_devel("native_connect_vcpu(server=%d)\n", server_num);
+
+	if (dev->ops != &kvm_xive_native_ops) {
+		pr_devel("Wrong ops !\n");
+		return -EPERM;
+	}
+	if (xive->kvm != vcpu->kvm)
+		return -EPERM;
+	if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
+		return -EBUSY;
+
+	mutex_lock(&xive->lock);
+
+	rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id);
+	if (rc)
+		goto bail;
+
+	xc = kzalloc(sizeof(*xc), GFP_KERNEL);
+	if (!xc) {
+		rc = -ENOMEM;
+		goto bail;
+	}
+
+	vcpu->arch.xive_vcpu = xc;
+	xc->xive = xive;
+	xc->vcpu = vcpu;
+	xc->server_num = server_num;
+
+	xc->vp_id = vp_id;
+	xc->valid = true;
+	vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
+
+	rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
+	if (rc) {
+		pr_err("Failed to get VP info from OPAL: %d\n", rc);
+		goto bail;
+	}
+
+	if (!kvmppc_xive_check_save_restore(vcpu)) {
+		pr_err("inconsistent save-restore setup for VCPU %d\n", server_num);
+		rc = -EIO;
+		goto bail;
+	}
+
+	/*
+	 * Enable the VP first as the single escalation mode will
+	 * affect escalation interrupts numbering
+	 */
+	rc = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive));
+	if (rc) {
+		pr_err("Failed to enable VP in OPAL: %d\n", rc);
+		goto bail;
+	}
+
+	/* Configure VCPU fields for use by assembly push/pull */
+	vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
+	vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
+
+	/* TODO: reset all queues to a clean state ? */
+bail:
+	mutex_unlock(&xive->lock);
+	if (rc)
+		kvmppc_xive_native_cleanup_vcpu(vcpu);
+
+	return rc;
+}
+
+/*
+ * Device passthrough support
+ */
+static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq)
+{
+	struct kvmppc_xive *xive = kvm->arch.xive;
+	pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2;
+
+	if (irq >= KVMPPC_XIVE_NR_IRQS)
+		return -EINVAL;
+
+	/*
+	 * Clear the ESB pages of the IRQ number being mapped (or
+	 * unmapped) into the guest and let the VM fault handler
+	 * repopulate with the appropriate ESB pages (device or IC)
+	 */
+	pr_debug("clearing esb pages for girq 0x%lx\n", irq);
+	mutex_lock(&xive->mapping_lock);
+	if (xive->mapping)
+		unmap_mapping_range(xive->mapping,
+				    esb_pgoff << PAGE_SHIFT,
+				    2ull << PAGE_SHIFT, 1);
+	mutex_unlock(&xive->mapping_lock);
+	return 0;
+}
+
+static struct kvmppc_xive_ops kvmppc_xive_native_ops =  {
+	.reset_mapped = kvmppc_xive_native_reset_mapped,
+};
+
+static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct kvm_device *dev = vma->vm_file->private_data;
+	struct kvmppc_xive *xive = dev->private;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	struct xive_irq_data *xd;
+	u32 hw_num;
+	u16 src;
+	u64 page;
+	unsigned long irq;
+	u64 page_offset;
+
+	/*
+	 * Linux/KVM uses a two pages ESB setting, one for trigger and
+	 * one for EOI
+	 */
+	page_offset = vmf->pgoff - vma->vm_pgoff;
+	irq = page_offset / 2;
+
+	sb = kvmppc_xive_find_source(xive, irq, &src);
+	if (!sb) {
+		pr_devel("%s: source %lx not found !\n", __func__, irq);
+		return VM_FAULT_SIGBUS;
+	}
+
+	state = &sb->irq_state[src];
+
+	/* Some sanity checking */
+	if (!state->valid) {
+		pr_devel("%s: source %lx invalid !\n", __func__, irq);
+		return VM_FAULT_SIGBUS;
+	}
+
+	kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+	arch_spin_lock(&sb->lock);
+
+	/*
+	 * first/even page is for trigger
+	 * second/odd page is for EOI and management.
+	 */
+	page = page_offset % 2 ? xd->eoi_page : xd->trig_page;
+	arch_spin_unlock(&sb->lock);
+
+	if (WARN_ON(!page)) {
+		pr_err("%s: accessing invalid ESB page for source %lx !\n",
+		       __func__, irq);
+		return VM_FAULT_SIGBUS;
+	}
+
+	vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);
+	return VM_FAULT_NOPAGE;
+}
+
+static const struct vm_operations_struct xive_native_esb_vmops = {
+	.fault = xive_native_esb_fault,
+};
+
+static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+
+	switch (vmf->pgoff - vma->vm_pgoff) {
+	case 0: /* HW - forbid access */
+	case 1: /* HV - forbid access */
+		return VM_FAULT_SIGBUS;
+	case 2: /* OS */
+		vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT);
+		return VM_FAULT_NOPAGE;
+	case 3: /* USER - TODO */
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+static const struct vm_operations_struct xive_native_tima_vmops = {
+	.fault = xive_native_tima_fault,
+};
+
+static int kvmppc_xive_native_mmap(struct kvm_device *dev,
+				   struct vm_area_struct *vma)
+{
+	struct kvmppc_xive *xive = dev->private;
+
+	/* We only allow mappings at fixed offset for now */
+	if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) {
+		if (vma_pages(vma) > 4)
+			return -EINVAL;
+		vma->vm_ops = &xive_native_tima_vmops;
+	} else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) {
+		if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2)
+			return -EINVAL;
+		vma->vm_ops = &xive_native_esb_vmops;
+	} else {
+		return -EINVAL;
+	}
+
+	vm_flags_set(vma, VM_IO | VM_PFNMAP);
+	vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+
+	/*
+	 * Grab the KVM device file address_space to be able to clear
+	 * the ESB pages mapping when a device is passed-through into
+	 * the guest.
+	 */
+	xive->mapping = vma->vm_file->f_mapping;
+	return 0;
+}
+
+static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
+					 u64 addr)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u64 val;
+	u16 idx;
+	int rc;
+
+	pr_devel("%s irq=0x%lx\n", __func__, irq);
+
+	if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)
+		return -E2BIG;
+
+	sb = kvmppc_xive_find_source(xive, irq, &idx);
+	if (!sb) {
+		pr_debug("No source, creating source block...\n");
+		sb = kvmppc_xive_create_src_block(xive, irq);
+		if (!sb) {
+			pr_err("Failed to create block...\n");
+			return -ENOMEM;
+		}
+	}
+	state = &sb->irq_state[idx];
+
+	if (get_user(val, ubufp)) {
+		pr_err("fault getting user info !\n");
+		return -EFAULT;
+	}
+
+	arch_spin_lock(&sb->lock);
+
+	/*
+	 * If the source doesn't already have an IPI, allocate
+	 * one and get the corresponding data
+	 */
+	if (!state->ipi_number) {
+		state->ipi_number = xive_native_alloc_irq();
+		if (state->ipi_number == 0) {
+			pr_err("Failed to allocate IRQ !\n");
+			rc = -ENXIO;
+			goto unlock;
+		}
+		xive_native_populate_irq_data(state->ipi_number,
+					      &state->ipi_data);
+		pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
+			 state->ipi_number, irq);
+	}
+
+	/* Restore LSI state */
+	if (val & KVM_XIVE_LEVEL_SENSITIVE) {
+		state->lsi = true;
+		if (val & KVM_XIVE_LEVEL_ASSERTED)
+			state->asserted = true;
+		pr_devel("  LSI ! Asserted=%d\n", state->asserted);
+	}
+
+	/* Mask IRQ to start with */
+	state->act_server = 0;
+	state->act_priority = MASKED;
+	xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+	xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
+
+	/* Increment the number of valid sources and mark this one valid */
+	if (!state->valid)
+		xive->src_count++;
+	state->valid = true;
+
+	rc = 0;
+
+unlock:
+	arch_spin_unlock(&sb->lock);
+
+	return rc;
+}
+
+static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive,
+					struct kvmppc_xive_src_block *sb,
+					struct kvmppc_xive_irq_state *state,
+					u32 server, u8 priority, bool masked,
+					u32 eisn)
+{
+	struct kvm *kvm = xive->kvm;
+	u32 hw_num;
+	int rc = 0;
+
+	arch_spin_lock(&sb->lock);
+
+	if (state->act_server == server && state->act_priority == priority &&
+	    state->eisn == eisn)
+		goto unlock;
+
+	pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n",
+		 priority, server, masked, state->act_server,
+		 state->act_priority);
+
+	kvmppc_xive_select_irq(state, &hw_num, NULL);
+
+	if (priority != MASKED && !masked) {
+		rc = kvmppc_xive_select_target(kvm, &server, priority);
+		if (rc)
+			goto unlock;
+
+		state->act_priority = priority;
+		state->act_server = server;
+		state->eisn = eisn;
+
+		rc = xive_native_configure_irq(hw_num,
+					       kvmppc_xive_vp(xive, server),
+					       priority, eisn);
+	} else {
+		state->act_priority = MASKED;
+		state->act_server = 0;
+		state->eisn = 0;
+
+		rc = xive_native_configure_irq(hw_num, 0, MASKED, 0);
+	}
+
+unlock:
+	arch_spin_unlock(&sb->lock);
+	return rc;
+}
+
+static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive,
+						long irq, u64 addr)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u16 src;
+	u64 kvm_cfg;
+	u32 server;
+	u8 priority;
+	bool masked;
+	u32 eisn;
+
+	sb = kvmppc_xive_find_source(xive, irq, &src);
+	if (!sb)
+		return -ENOENT;
+
+	state = &sb->irq_state[src];
+
+	if (!state->valid)
+		return -EINVAL;
+
+	if (get_user(kvm_cfg, ubufp))
+		return -EFAULT;
+
+	pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg);
+
+	priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >>
+		KVM_XIVE_SOURCE_PRIORITY_SHIFT;
+	server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >>
+		KVM_XIVE_SOURCE_SERVER_SHIFT;
+	masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >>
+		KVM_XIVE_SOURCE_MASKED_SHIFT;
+	eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >>
+		KVM_XIVE_SOURCE_EISN_SHIFT;
+
+	if (priority != xive_prio_from_guest(priority)) {
+		pr_err("invalid priority for queue %d for VCPU %d\n",
+		       priority, server);
+		return -EINVAL;
+	}
+
+	return kvmppc_xive_native_update_source_config(xive, sb, state, server,
+						       priority, masked, eisn);
+}
+
+static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive,
+					  long irq, u64 addr)
+{
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	struct xive_irq_data *xd;
+	u32 hw_num;
+	u16 src;
+	int rc = 0;
+
+	pr_devel("%s irq=0x%lx", __func__, irq);
+
+	sb = kvmppc_xive_find_source(xive, irq, &src);
+	if (!sb)
+		return -ENOENT;
+
+	state = &sb->irq_state[src];
+
+	rc = -EINVAL;
+
+	arch_spin_lock(&sb->lock);
+
+	if (state->valid) {
+		kvmppc_xive_select_irq(state, &hw_num, &xd);
+		xive_native_sync_source(hw_num);
+		rc = 0;
+	}
+
+	arch_spin_unlock(&sb->lock);
+	return rc;
+}
+
+static int xive_native_validate_queue_size(u32 qshift)
+{
+	/*
+	 * We only support 64K pages for the moment. This is also
+	 * advertised in the DT property "ibm,xive-eq-sizes"
+	 */
+	switch (qshift) {
+	case 0: /* EQ reset */
+	case 16:
+		return 0;
+	case 12:
+	case 21:
+	case 24:
+	default:
+		return -EINVAL;
+	}
+}
+
+static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
+					       long eq_idx, u64 addr)
+{
+	struct kvm *kvm = xive->kvm;
+	struct kvm_vcpu *vcpu;
+	struct kvmppc_xive_vcpu *xc;
+	void __user *ubufp = (void __user *) addr;
+	u32 server;
+	u8 priority;
+	struct kvm_ppc_xive_eq kvm_eq;
+	int rc;
+	__be32 *qaddr = 0;
+	struct page *page;
+	struct xive_q *q;
+	gfn_t gfn;
+	unsigned long page_size;
+	int srcu_idx;
+
+	/*
+	 * Demangle priority/server tuple from the EQ identifier
+	 */
+	priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
+		KVM_XIVE_EQ_PRIORITY_SHIFT;
+	server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
+		KVM_XIVE_EQ_SERVER_SHIFT;
+
+	if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq)))
+		return -EFAULT;
+
+	vcpu = kvmppc_xive_find_server(kvm, server);
+	if (!vcpu) {
+		pr_err("Can't find server %d\n", server);
+		return -ENOENT;
+	}
+	xc = vcpu->arch.xive_vcpu;
+
+	if (priority != xive_prio_from_guest(priority)) {
+		pr_err("Trying to restore invalid queue %d for VCPU %d\n",
+		       priority, server);
+		return -EINVAL;
+	}
+	q = &xc->queues[priority];
+
+	pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
+		 __func__, server, priority, kvm_eq.flags,
+		 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
+
+	/* reset queue and disable queueing */
+	if (!kvm_eq.qshift) {
+		q->guest_qaddr  = 0;
+		q->guest_qshift = 0;
+
+		rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
+							NULL, 0, true);
+		if (rc) {
+			pr_err("Failed to reset queue %d for VCPU %d: %d\n",
+			       priority, xc->server_num, rc);
+			return rc;
+		}
+
+		return 0;
+	}
+
+	/*
+	 * sPAPR specifies a "Unconditional Notify (n) flag" for the
+	 * H_INT_SET_QUEUE_CONFIG hcall which forces notification
+	 * without using the coalescing mechanisms provided by the
+	 * XIVE END ESBs. This is required on KVM as notification
+	 * using the END ESBs is not supported.
+	 */
+	if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) {
+		pr_err("invalid flags %d\n", kvm_eq.flags);
+		return -EINVAL;
+	}
+
+	rc = xive_native_validate_queue_size(kvm_eq.qshift);
+	if (rc) {
+		pr_err("invalid queue size %d\n", kvm_eq.qshift);
+		return rc;
+	}
+
+	if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) {
+		pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr,
+		       1ull << kvm_eq.qshift);
+		return -EINVAL;
+	}
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	gfn = gpa_to_gfn(kvm_eq.qaddr);
+
+	page_size = kvm_host_page_size(vcpu, gfn);
+	if (1ull << kvm_eq.qshift > page_size) {
+		srcu_read_unlock(&kvm->srcu, srcu_idx);
+		pr_warn("Incompatible host page size %lx!\n", page_size);
+		return -EINVAL;
+	}
+
+	page = gfn_to_page(kvm, gfn);
+	if (is_error_page(page)) {
+		srcu_read_unlock(&kvm->srcu, srcu_idx);
+		pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
+		return -EINVAL;
+	}
+
+	qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+	/*
+	 * Backup the queue page guest address to the mark EQ page
+	 * dirty for migration.
+	 */
+	q->guest_qaddr  = kvm_eq.qaddr;
+	q->guest_qshift = kvm_eq.qshift;
+
+	 /*
+	  * Unconditional Notification is forced by default at the
+	  * OPAL level because the use of END ESBs is not supported by
+	  * Linux.
+	  */
+	rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
+					(__be32 *) qaddr, kvm_eq.qshift, true);
+	if (rc) {
+		pr_err("Failed to configure queue %d for VCPU %d: %d\n",
+		       priority, xc->server_num, rc);
+		put_page(page);
+		return rc;
+	}
+
+	/*
+	 * Only restore the queue state when needed. When doing the
+	 * H_INT_SET_SOURCE_CONFIG hcall, it should not.
+	 */
+	if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) {
+		rc = xive_native_set_queue_state(xc->vp_id, priority,
+						 kvm_eq.qtoggle,
+						 kvm_eq.qindex);
+		if (rc)
+			goto error;
+	}
+
+	rc = kvmppc_xive_attach_escalation(vcpu, priority,
+					   kvmppc_xive_has_single_escalation(xive));
+error:
+	if (rc)
+		kvmppc_xive_native_cleanup_queue(vcpu, priority);
+	return rc;
+}
+
+static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,
+					       long eq_idx, u64 addr)
+{
+	struct kvm *kvm = xive->kvm;
+	struct kvm_vcpu *vcpu;
+	struct kvmppc_xive_vcpu *xc;
+	struct xive_q *q;
+	void __user *ubufp = (u64 __user *) addr;
+	u32 server;
+	u8 priority;
+	struct kvm_ppc_xive_eq kvm_eq;
+	u64 qaddr;
+	u64 qshift;
+	u64 qeoi_page;
+	u32 escalate_irq;
+	u64 qflags;
+	int rc;
+
+	/*
+	 * Demangle priority/server tuple from the EQ identifier
+	 */
+	priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
+		KVM_XIVE_EQ_PRIORITY_SHIFT;
+	server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
+		KVM_XIVE_EQ_SERVER_SHIFT;
+
+	vcpu = kvmppc_xive_find_server(kvm, server);
+	if (!vcpu) {
+		pr_err("Can't find server %d\n", server);
+		return -ENOENT;
+	}
+	xc = vcpu->arch.xive_vcpu;
+
+	if (priority != xive_prio_from_guest(priority)) {
+		pr_err("invalid priority for queue %d for VCPU %d\n",
+		       priority, server);
+		return -EINVAL;
+	}
+	q = &xc->queues[priority];
+
+	memset(&kvm_eq, 0, sizeof(kvm_eq));
+
+	if (!q->qpage)
+		return 0;
+
+	rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift,
+					&qeoi_page, &escalate_irq, &qflags);
+	if (rc)
+		return rc;
+
+	kvm_eq.flags = 0;
+	if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
+		kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
+
+	kvm_eq.qshift = q->guest_qshift;
+	kvm_eq.qaddr  = q->guest_qaddr;
+
+	rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle,
+					 &kvm_eq.qindex);
+	if (rc)
+		return rc;
+
+	pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
+		 __func__, server, priority, kvm_eq.flags,
+		 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
+
+	if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb)
+{
+	int i;
+
+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+		struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
+
+		if (!state->valid)
+			continue;
+
+		if (state->act_priority == MASKED)
+			continue;
+
+		state->eisn = 0;
+		state->act_server = 0;
+		state->act_priority = MASKED;
+		xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+		xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
+		if (state->pt_number) {
+			xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
+			xive_native_configure_irq(state->pt_number,
+						  0, MASKED, 0);
+		}
+	}
+}
+
+static int kvmppc_xive_reset(struct kvmppc_xive *xive)
+{
+	struct kvm *kvm = xive->kvm;
+	struct kvm_vcpu *vcpu;
+	unsigned long i;
+
+	pr_devel("%s\n", __func__);
+
+	mutex_lock(&xive->lock);
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+		unsigned int prio;
+
+		if (!xc)
+			continue;
+
+		kvmppc_xive_disable_vcpu_interrupts(vcpu);
+
+		for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+
+			/* Single escalation, no queue 7 */
+			if (prio == 7 && kvmppc_xive_has_single_escalation(xive))
+				break;
+
+			if (xc->esc_virq[prio]) {
+				free_irq(xc->esc_virq[prio], vcpu);
+				irq_dispose_mapping(xc->esc_virq[prio]);
+				kfree(xc->esc_virq_names[prio]);
+				xc->esc_virq[prio] = 0;
+			}
+
+			kvmppc_xive_native_cleanup_queue(vcpu, prio);
+		}
+	}
+
+	for (i = 0; i <= xive->max_sbid; i++) {
+		struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+
+		if (sb) {
+			arch_spin_lock(&sb->lock);
+			kvmppc_xive_reset_sources(sb);
+			arch_spin_unlock(&sb->lock);
+		}
+	}
+
+	mutex_unlock(&xive->lock);
+
+	return 0;
+}
+
+static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb)
+{
+	int j;
+
+	for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
+		struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
+		struct xive_irq_data *xd;
+		u32 hw_num;
+
+		if (!state->valid)
+			continue;
+
+		/*
+		 * The struct kvmppc_xive_irq_state reflects the state
+		 * of the EAS configuration and not the state of the
+		 * source. The source is masked setting the PQ bits to
+		 * '-Q', which is what is being done before calling
+		 * the KVM_DEV_XIVE_EQ_SYNC control.
+		 *
+		 * If a source EAS is configured, OPAL syncs the XIVE
+		 * IC of the source and the XIVE IC of the previous
+		 * target if any.
+		 *
+		 * So it should be fine ignoring MASKED sources as
+		 * they have been synced already.
+		 */
+		if (state->act_priority == MASKED)
+			continue;
+
+		kvmppc_xive_select_irq(state, &hw_num, &xd);
+		xive_native_sync_source(hw_num);
+		xive_native_sync_queue(hw_num);
+	}
+}
+
+static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	unsigned int prio;
+	int srcu_idx;
+
+	if (!xc)
+		return -ENOENT;
+
+	for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+		struct xive_q *q = &xc->queues[prio];
+
+		if (!q->qpage)
+			continue;
+
+		/* Mark EQ page dirty for migration */
+		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+		mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr));
+		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+	}
+	return 0;
+}
+
+static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)
+{
+	struct kvm *kvm = xive->kvm;
+	struct kvm_vcpu *vcpu;
+	unsigned long i;
+
+	pr_devel("%s\n", __func__);
+
+	mutex_lock(&xive->lock);
+	for (i = 0; i <= xive->max_sbid; i++) {
+		struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+
+		if (sb) {
+			arch_spin_lock(&sb->lock);
+			kvmppc_xive_native_sync_sources(sb);
+			arch_spin_unlock(&sb->lock);
+		}
+	}
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		kvmppc_xive_native_vcpu_eq_sync(vcpu);
+	}
+	mutex_unlock(&xive->lock);
+
+	return 0;
+}
+
+static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
+				       struct kvm_device_attr *attr)
+{
+	struct kvmppc_xive *xive = dev->private;
+
+	switch (attr->group) {
+	case KVM_DEV_XIVE_GRP_CTRL:
+		switch (attr->attr) {
+		case KVM_DEV_XIVE_RESET:
+			return kvmppc_xive_reset(xive);
+		case KVM_DEV_XIVE_EQ_SYNC:
+			return kvmppc_xive_native_eq_sync(xive);
+		case KVM_DEV_XIVE_NR_SERVERS:
+			return kvmppc_xive_set_nr_servers(xive, attr->addr);
+		}
+		break;
+	case KVM_DEV_XIVE_GRP_SOURCE:
+		return kvmppc_xive_native_set_source(xive, attr->attr,
+						     attr->addr);
+	case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
+		return kvmppc_xive_native_set_source_config(xive, attr->attr,
+							    attr->addr);
+	case KVM_DEV_XIVE_GRP_EQ_CONFIG:
+		return kvmppc_xive_native_set_queue_config(xive, attr->attr,
+							   attr->addr);
+	case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
+		return kvmppc_xive_native_sync_source(xive, attr->attr,
+						      attr->addr);
+	}
+	return -ENXIO;
+}
+
+static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
+				       struct kvm_device_attr *attr)
+{
+	struct kvmppc_xive *xive = dev->private;
+
+	switch (attr->group) {
+	case KVM_DEV_XIVE_GRP_EQ_CONFIG:
+		return kvmppc_xive_native_get_queue_config(xive, attr->attr,
+							   attr->addr);
+	}
+	return -ENXIO;
+}
+
+static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
+				       struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_XIVE_GRP_CTRL:
+		switch (attr->attr) {
+		case KVM_DEV_XIVE_RESET:
+		case KVM_DEV_XIVE_EQ_SYNC:
+		case KVM_DEV_XIVE_NR_SERVERS:
+			return 0;
+		}
+		break;
+	case KVM_DEV_XIVE_GRP_SOURCE:
+	case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
+	case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
+		if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
+		    attr->attr < KVMPPC_XIVE_NR_IRQS)
+			return 0;
+		break;
+	case KVM_DEV_XIVE_GRP_EQ_CONFIG:
+		return 0;
+	}
+	return -ENXIO;
+}
+
+/*
+ * Called when device fd is closed.  kvm->lock is held.
+ */
+static void kvmppc_xive_native_release(struct kvm_device *dev)
+{
+	struct kvmppc_xive *xive = dev->private;
+	struct kvm *kvm = xive->kvm;
+	struct kvm_vcpu *vcpu;
+	unsigned long i;
+
+	pr_devel("Releasing xive native device\n");
+
+	/*
+	 * Clear the KVM device file address_space which is used to
+	 * unmap the ESB pages when a device is passed-through.
+	 */
+	mutex_lock(&xive->mapping_lock);
+	xive->mapping = NULL;
+	mutex_unlock(&xive->mapping_lock);
+
+	/*
+	 * Since this is the device release function, we know that
+	 * userspace does not have any open fd or mmap referring to
+	 * the device.  Therefore there can not be any of the
+	 * device attribute set/get, mmap, or page fault functions
+	 * being executed concurrently, and similarly, the
+	 * connect_vcpu and set/clr_mapped functions also cannot
+	 * be being executed.
+	 */
+
+	debugfs_remove(xive->dentry);
+
+	/*
+	 * We should clean up the vCPU interrupt presenters first.
+	 */
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		/*
+		 * Take vcpu->mutex to ensure that no one_reg get/set ioctl
+		 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done.
+		 * Holding the vcpu->mutex also means that the vcpu cannot
+		 * be executing the KVM_RUN ioctl, and therefore it cannot
+		 * be executing the XIVE push or pull code or accessing
+		 * the XIVE MMIO regions.
+		 */
+		mutex_lock(&vcpu->mutex);
+		kvmppc_xive_native_cleanup_vcpu(vcpu);
+		mutex_unlock(&vcpu->mutex);
+	}
+
+	/*
+	 * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
+	 * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
+	 * against xive code getting called during vcpu execution or
+	 * set/get one_reg operations.
+	 */
+	kvm->arch.xive = NULL;
+
+	for (i = 0; i <= xive->max_sbid; i++) {
+		if (xive->src_blocks[i])
+			kvmppc_xive_free_sources(xive->src_blocks[i]);
+		kfree(xive->src_blocks[i]);
+		xive->src_blocks[i] = NULL;
+	}
+
+	if (xive->vp_base != XIVE_INVALID_VP)
+		xive_native_free_vp_block(xive->vp_base);
+
+	/*
+	 * A reference of the kvmppc_xive pointer is now kept under
+	 * the xive_devices struct of the machine for reuse. It is
+	 * freed when the VM is destroyed for now until we fix all the
+	 * execution paths.
+	 */
+
+	kfree(dev);
+}
+
+/*
+ * Create a XIVE device.  kvm->lock is held.
+ */
+static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
+{
+	struct kvmppc_xive *xive;
+	struct kvm *kvm = dev->kvm;
+
+	pr_devel("Creating xive native device\n");
+
+	if (kvm->arch.xive)
+		return -EEXIST;
+
+	xive = kvmppc_xive_get_device(kvm, type);
+	if (!xive)
+		return -ENOMEM;
+
+	dev->private = xive;
+	xive->dev = dev;
+	xive->kvm = kvm;
+	mutex_init(&xive->mapping_lock);
+	mutex_init(&xive->lock);
+
+	/* VP allocation is delayed to the first call to connect_vcpu */
+	xive->vp_base = XIVE_INVALID_VP;
+	/* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
+	 * on a POWER9 system.
+	 */
+	xive->nr_servers = KVM_MAX_VCPUS;
+
+	if (xive_native_has_single_escalation())
+		xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
+
+	if (xive_native_has_save_restore())
+		xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE;
+
+	xive->ops = &kvmppc_xive_native_ops;
+
+	kvm->arch.xive = xive;
+	return 0;
+}
+
+/*
+ * Interrupt Pending Buffer (IPB) offset
+ */
+#define TM_IPB_SHIFT 40
+#define TM_IPB_MASK  (((u64) 0xFF) << TM_IPB_SHIFT)
+
+int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	u64 opal_state;
+	int rc;
+
+	if (!kvmppc_xive_enabled(vcpu))
+		return -EPERM;
+
+	if (!xc)
+		return -ENOENT;
+
+	/* Thread context registers. We only care about IPB and CPPR */
+	val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
+
+	/* Get the VP state from OPAL */
+	rc = xive_native_get_vp_state(xc->vp_id, &opal_state);
+	if (rc)
+		return rc;
+
+	/*
+	 * Capture the backup of IPB register in the NVT structure and
+	 * merge it in our KVM VP state.
+	 */
+	val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
+
+	pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
+		 __func__,
+		 vcpu->arch.xive_saved_state.nsr,
+		 vcpu->arch.xive_saved_state.cppr,
+		 vcpu->arch.xive_saved_state.ipb,
+		 vcpu->arch.xive_saved_state.pipr,
+		 vcpu->arch.xive_saved_state.w01,
+		 (u32) vcpu->arch.xive_cam_word, opal_state);
+
+	return 0;
+}
+
+int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+
+	pr_devel("%s w01=%016llx vp=%016llx\n", __func__,
+		 val->xive_timaval[0], val->xive_timaval[1]);
+
+	if (!kvmppc_xive_enabled(vcpu))
+		return -EPERM;
+
+	if (!xc || !xive)
+		return -ENOENT;
+
+	/* We can't update the state of a "pushed" VCPU	 */
+	if (WARN_ON(vcpu->arch.xive_pushed))
+		return -EBUSY;
+
+	/*
+	 * Restore the thread context registers. IPB and CPPR should
+	 * be the only ones that matter.
+	 */
+	vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
+
+	/*
+	 * There is no need to restore the XIVE internal state (IPB
+	 * stored in the NVT) as the IPB register was merged in KVM VP
+	 * state when captured.
+	 */
+	return 0;
+}
+
+bool kvmppc_xive_native_supported(void)
+{
+	return xive_native_has_queue_state_support();
+}
+
+static int xive_native_debug_show(struct seq_file *m, void *private)
+{
+	struct kvmppc_xive *xive = m->private;
+	struct kvm *kvm = xive->kvm;
+	struct kvm_vcpu *vcpu;
+	unsigned long i;
+
+	if (!kvm)
+		return 0;
+
+	seq_puts(m, "=========\nVCPU state\n=========\n");
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+		if (!xc)
+			continue;
+
+		seq_printf(m, "VCPU %d: VP=%#x/%02x\n"
+			   "    NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
+			   xc->server_num, xc->vp_id, xc->vp_chip_id,
+			   vcpu->arch.xive_saved_state.nsr,
+			   vcpu->arch.xive_saved_state.cppr,
+			   vcpu->arch.xive_saved_state.ipb,
+			   vcpu->arch.xive_saved_state.pipr,
+			   be64_to_cpu(vcpu->arch.xive_saved_state.w01),
+			   be32_to_cpu(vcpu->arch.xive_cam_word));
+
+		kvmppc_xive_debug_show_queues(m, vcpu);
+	}
+
+	seq_puts(m, "=========\nSources\n=========\n");
+
+	for (i = 0; i <= xive->max_sbid; i++) {
+		struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+
+		if (sb) {
+			arch_spin_lock(&sb->lock);
+			kvmppc_xive_debug_show_sources(m, sb);
+			arch_spin_unlock(&sb->lock);
+		}
+	}
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(xive_native_debug);
+
+static void xive_native_debugfs_init(struct kvmppc_xive *xive)
+{
+	xive->dentry = debugfs_create_file("xive", 0444, xive->kvm->debugfs_dentry,
+					   xive, &xive_native_debug_fops);
+
+	pr_debug("%s: created\n", __func__);
+}
+
+static void kvmppc_xive_native_init(struct kvm_device *dev)
+{
+	struct kvmppc_xive *xive = dev->private;
+
+	/* Register some debug interfaces */
+	xive_native_debugfs_init(xive);
+}
+
+struct kvm_device_ops kvm_xive_native_ops = {
+	.name = "kvm-xive-native",
+	.create = kvmppc_xive_native_create,
+	.init = kvmppc_xive_native_init,
+	.release = kvmppc_xive_native_release,
+	.set_attr = kvmppc_xive_native_set_attr,
+	.get_attr = kvmppc_xive_native_get_attr,
+	.has_attr = kvmppc_xive_native_has_attr,
+	.mmap = kvmppc_xive_native_mmap,
+};
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
new file mode 100644
index 0000000000..6a5be025a8
--- /dev/null
+++ b/arch/powerpc/kvm/booke.c
@@ -0,0 +1,2242 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright IBM Corp. 2007
+ * Copyright 2010-2011 Freescale Semiconductor, Inc.
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ *          Scott Wood <scottwood@freescale.com>
+ *          Varun Sethi <varun.sethi@freescale.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+
+#include <asm/cputable.h>
+#include <linux/uaccess.h>
+#include <asm/interrupt.h>
+#include <asm/kvm_ppc.h>
+#include <asm/cacheflush.h>
+#include <asm/dbell.h>
+#include <asm/hw_irq.h>
+#include <asm/irq.h>
+#include <asm/time.h>
+
+#include "timing.h"
+#include "booke.h"
+
+#define CREATE_TRACE_POINTS
+#include "trace_booke.h"
+
+unsigned long kvmppc_booke_handlers;
+
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+	KVM_GENERIC_VM_STATS(),
+	STATS_DESC_ICOUNTER(VM, num_2M_pages),
+	STATS_DESC_ICOUNTER(VM, num_1G_pages)
+};
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+	.name_size = KVM_STATS_NAME_SIZE,
+	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+	.id_offset = sizeof(struct kvm_stats_header),
+	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+		       sizeof(kvm_vm_stats_desc),
+};
+
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+	KVM_GENERIC_VCPU_STATS(),
+	STATS_DESC_COUNTER(VCPU, sum_exits),
+	STATS_DESC_COUNTER(VCPU, mmio_exits),
+	STATS_DESC_COUNTER(VCPU, signal_exits),
+	STATS_DESC_COUNTER(VCPU, light_exits),
+	STATS_DESC_COUNTER(VCPU, itlb_real_miss_exits),
+	STATS_DESC_COUNTER(VCPU, itlb_virt_miss_exits),
+	STATS_DESC_COUNTER(VCPU, dtlb_real_miss_exits),
+	STATS_DESC_COUNTER(VCPU, dtlb_virt_miss_exits),
+	STATS_DESC_COUNTER(VCPU, syscall_exits),
+	STATS_DESC_COUNTER(VCPU, isi_exits),
+	STATS_DESC_COUNTER(VCPU, dsi_exits),
+	STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
+	STATS_DESC_COUNTER(VCPU, dec_exits),
+	STATS_DESC_COUNTER(VCPU, ext_intr_exits),
+	STATS_DESC_COUNTER(VCPU, halt_successful_wait),
+	STATS_DESC_COUNTER(VCPU, dbell_exits),
+	STATS_DESC_COUNTER(VCPU, gdbell_exits),
+	STATS_DESC_COUNTER(VCPU, ld),
+	STATS_DESC_COUNTER(VCPU, st),
+	STATS_DESC_COUNTER(VCPU, pthru_all),
+	STATS_DESC_COUNTER(VCPU, pthru_host),
+	STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
+};
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+	.name_size = KVM_STATS_NAME_SIZE,
+	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+	.id_offset = sizeof(struct kvm_stats_header),
+	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+		       sizeof(kvm_vcpu_stats_desc),
+};
+
+/* TODO: use vcpu_printf() */
+void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	printk("pc:   %08lx msr:  %08llx\n", vcpu->arch.regs.nip,
+			vcpu->arch.shared->msr);
+	printk("lr:   %08lx ctr:  %08lx\n", vcpu->arch.regs.link,
+			vcpu->arch.regs.ctr);
+	printk("srr0: %08llx srr1: %08llx\n", vcpu->arch.shared->srr0,
+					    vcpu->arch.shared->srr1);
+
+	printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
+
+	for (i = 0; i < 32; i += 4) {
+		printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
+		       kvmppc_get_gpr(vcpu, i),
+		       kvmppc_get_gpr(vcpu, i+1),
+		       kvmppc_get_gpr(vcpu, i+2),
+		       kvmppc_get_gpr(vcpu, i+3));
+	}
+}
+
+#ifdef CONFIG_SPE
+void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu)
+{
+	preempt_disable();
+	enable_kernel_spe();
+	kvmppc_save_guest_spe(vcpu);
+	disable_kernel_spe();
+	vcpu->arch.shadow_msr &= ~MSR_SPE;
+	preempt_enable();
+}
+
+static void kvmppc_vcpu_enable_spe(struct kvm_vcpu *vcpu)
+{
+	preempt_disable();
+	enable_kernel_spe();
+	kvmppc_load_guest_spe(vcpu);
+	disable_kernel_spe();
+	vcpu->arch.shadow_msr |= MSR_SPE;
+	preempt_enable();
+}
+
+static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.shared->msr & MSR_SPE) {
+		if (!(vcpu->arch.shadow_msr & MSR_SPE))
+			kvmppc_vcpu_enable_spe(vcpu);
+	} else if (vcpu->arch.shadow_msr & MSR_SPE) {
+		kvmppc_vcpu_disable_spe(vcpu);
+	}
+}
+#else
+static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
+{
+}
+#endif
+
+/*
+ * Load up guest vcpu FP state if it's needed.
+ * It also set the MSR_FP in thread so that host know
+ * we're holding FPU, and then host can help to save
+ * guest vcpu FP state if other threads require to use FPU.
+ * This simulates an FP unavailable fault.
+ *
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+	if (!(current->thread.regs->msr & MSR_FP)) {
+		enable_kernel_fp();
+		load_fp_state(&vcpu->arch.fp);
+		disable_kernel_fp();
+		current->thread.fp_save_area = &vcpu->arch.fp;
+		current->thread.regs->msr |= MSR_FP;
+	}
+#endif
+}
+
+/*
+ * Save guest vcpu FP state into thread.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+	if (current->thread.regs->msr & MSR_FP)
+		giveup_fpu(current);
+	current->thread.fp_save_area = NULL;
+#endif
+}
+
+static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
+{
+#if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV)
+	/* We always treat the FP bit as enabled from the host
+	   perspective, so only need to adjust the shadow MSR */
+	vcpu->arch.shadow_msr &= ~MSR_FP;
+	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_FP;
+#endif
+}
+
+/*
+ * Simulate AltiVec unavailable fault to load guest state
+ * from thread to AltiVec unit.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_load_guest_altivec(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ALTIVEC
+	if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
+		if (!(current->thread.regs->msr & MSR_VEC)) {
+			enable_kernel_altivec();
+			load_vr_state(&vcpu->arch.vr);
+			disable_kernel_altivec();
+			current->thread.vr_save_area = &vcpu->arch.vr;
+			current->thread.regs->msr |= MSR_VEC;
+		}
+	}
+#endif
+}
+
+/*
+ * Save guest vcpu AltiVec state into thread.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_save_guest_altivec(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ALTIVEC
+	if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
+		if (current->thread.regs->msr & MSR_VEC)
+			giveup_altivec(current);
+		current->thread.vr_save_area = NULL;
+	}
+#endif
+}
+
+static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
+{
+	/* Synchronize guest's desire to get debug interrupts into shadow MSR */
+#ifndef CONFIG_KVM_BOOKE_HV
+	vcpu->arch.shadow_msr &= ~MSR_DE;
+	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE;
+#endif
+
+	/* Force enable debug interrupts when user space wants to debug */
+	if (vcpu->guest_debug) {
+#ifdef CONFIG_KVM_BOOKE_HV
+		/*
+		 * Since there is no shadow MSR, sync MSR_DE into the guest
+		 * visible MSR.
+		 */
+		vcpu->arch.shared->msr |= MSR_DE;
+#else
+		vcpu->arch.shadow_msr |= MSR_DE;
+		vcpu->arch.shared->msr &= ~MSR_DE;
+#endif
+	}
+}
+
+/*
+ * Helper function for "full" MSR writes.  No need to call this if only
+ * EE/CE/ME/DE/RI are changing.
+ */
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
+{
+	u32 old_msr = vcpu->arch.shared->msr;
+
+#ifdef CONFIG_KVM_BOOKE_HV
+	new_msr |= MSR_GS;
+#endif
+
+	vcpu->arch.shared->msr = new_msr;
+
+	kvmppc_mmu_msr_notify(vcpu, old_msr);
+	kvmppc_vcpu_sync_spe(vcpu);
+	kvmppc_vcpu_sync_fpu(vcpu);
+	kvmppc_vcpu_sync_debug(vcpu);
+}
+
+static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
+                                       unsigned int priority)
+{
+	trace_kvm_booke_queue_irqprio(vcpu, priority);
+	set_bit(priority, &vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu,
+				 ulong dear_flags, ulong esr_flags)
+{
+	vcpu->arch.queued_dear = dear_flags;
+	vcpu->arch.queued_esr = esr_flags;
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
+}
+
+void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong srr1_flags,
+				    ulong dear_flags, ulong esr_flags)
+{
+	WARN_ON_ONCE(srr1_flags);
+	vcpu->arch.queued_dear = dear_flags;
+	vcpu->arch.queued_esr = esr_flags;
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
+}
+
+void kvmppc_core_queue_itlb_miss(struct kvm_vcpu *vcpu)
+{
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
+}
+
+void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong esr_flags)
+{
+	vcpu->arch.queued_esr = esr_flags;
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
+}
+
+static void kvmppc_core_queue_alignment(struct kvm_vcpu *vcpu, ulong dear_flags,
+					ulong esr_flags)
+{
+	vcpu->arch.queued_dear = dear_flags;
+	vcpu->arch.queued_esr = esr_flags;
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALIGNMENT);
+}
+
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags)
+{
+	vcpu->arch.queued_esr = esr_flags;
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
+}
+
+void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu, ulong srr1_flags)
+{
+	WARN_ON_ONCE(srr1_flags);
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
+}
+
+#ifdef CONFIG_ALTIVEC
+void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu, ulong srr1_flags)
+{
+	WARN_ON_ONCE(srr1_flags);
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL);
+}
+#endif
+
+void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
+{
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
+}
+
+int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
+{
+	return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
+{
+	clear_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
+                                struct kvm_interrupt *irq)
+{
+	unsigned int prio = BOOKE_IRQPRIO_EXTERNAL;
+
+	if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
+		prio = BOOKE_IRQPRIO_EXTERNAL_LEVEL;
+
+	kvmppc_booke_queue_irqprio(vcpu, prio);
+}
+
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
+{
+	clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
+	clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
+}
+
+static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
+{
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
+}
+
+static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
+{
+	clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu)
+{
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DEBUG);
+}
+
+void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu)
+{
+	clear_bit(BOOKE_IRQPRIO_DEBUG, &vcpu->arch.pending_exceptions);
+}
+
+static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
+{
+	kvmppc_set_srr0(vcpu, srr0);
+	kvmppc_set_srr1(vcpu, srr1);
+}
+
+static void set_guest_csrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
+{
+	vcpu->arch.csrr0 = srr0;
+	vcpu->arch.csrr1 = srr1;
+}
+
+static void set_guest_dsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
+{
+	if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) {
+		vcpu->arch.dsrr0 = srr0;
+		vcpu->arch.dsrr1 = srr1;
+	} else {
+		set_guest_csrr(vcpu, srr0, srr1);
+	}
+}
+
+static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
+{
+	vcpu->arch.mcsrr0 = srr0;
+	vcpu->arch.mcsrr1 = srr1;
+}
+
+/* Deliver the interrupt of the corresponding priority, if possible. */
+static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
+                                        unsigned int priority)
+{
+	int allowed = 0;
+	ulong msr_mask = 0;
+	bool update_esr = false, update_dear = false, update_epr = false;
+	ulong crit_raw = vcpu->arch.shared->critical;
+	ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
+	bool crit;
+	bool keep_irq = false;
+	enum int_class int_class;
+	ulong new_msr = vcpu->arch.shared->msr;
+
+	/* Truncate crit indicators in 32 bit mode */
+	if (!(vcpu->arch.shared->msr & MSR_SF)) {
+		crit_raw &= 0xffffffff;
+		crit_r1 &= 0xffffffff;
+	}
+
+	/* Critical section when crit == r1 */
+	crit = (crit_raw == crit_r1);
+	/* ... and we're in supervisor mode */
+	crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
+
+	if (priority == BOOKE_IRQPRIO_EXTERNAL_LEVEL) {
+		priority = BOOKE_IRQPRIO_EXTERNAL;
+		keep_irq = true;
+	}
+
+	if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_flags)
+		update_epr = true;
+
+	switch (priority) {
+	case BOOKE_IRQPRIO_DTLB_MISS:
+	case BOOKE_IRQPRIO_DATA_STORAGE:
+	case BOOKE_IRQPRIO_ALIGNMENT:
+		update_dear = true;
+		fallthrough;
+	case BOOKE_IRQPRIO_INST_STORAGE:
+	case BOOKE_IRQPRIO_PROGRAM:
+		update_esr = true;
+		fallthrough;
+	case BOOKE_IRQPRIO_ITLB_MISS:
+	case BOOKE_IRQPRIO_SYSCALL:
+	case BOOKE_IRQPRIO_FP_UNAVAIL:
+#ifdef CONFIG_SPE_POSSIBLE
+	case BOOKE_IRQPRIO_SPE_UNAVAIL:
+	case BOOKE_IRQPRIO_SPE_FP_DATA:
+	case BOOKE_IRQPRIO_SPE_FP_ROUND:
+#endif
+#ifdef CONFIG_ALTIVEC
+	case BOOKE_IRQPRIO_ALTIVEC_UNAVAIL:
+	case BOOKE_IRQPRIO_ALTIVEC_ASSIST:
+#endif
+	case BOOKE_IRQPRIO_AP_UNAVAIL:
+		allowed = 1;
+		msr_mask = MSR_CE | MSR_ME | MSR_DE;
+		int_class = INT_CLASS_NONCRIT;
+		break;
+	case BOOKE_IRQPRIO_WATCHDOG:
+	case BOOKE_IRQPRIO_CRITICAL:
+	case BOOKE_IRQPRIO_DBELL_CRIT:
+		allowed = vcpu->arch.shared->msr & MSR_CE;
+		allowed = allowed && !crit;
+		msr_mask = MSR_ME;
+		int_class = INT_CLASS_CRIT;
+		break;
+	case BOOKE_IRQPRIO_MACHINE_CHECK:
+		allowed = vcpu->arch.shared->msr & MSR_ME;
+		allowed = allowed && !crit;
+		int_class = INT_CLASS_MC;
+		break;
+	case BOOKE_IRQPRIO_DECREMENTER:
+	case BOOKE_IRQPRIO_FIT:
+		keep_irq = true;
+		fallthrough;
+	case BOOKE_IRQPRIO_EXTERNAL:
+	case BOOKE_IRQPRIO_DBELL:
+		allowed = vcpu->arch.shared->msr & MSR_EE;
+		allowed = allowed && !crit;
+		msr_mask = MSR_CE | MSR_ME | MSR_DE;
+		int_class = INT_CLASS_NONCRIT;
+		break;
+	case BOOKE_IRQPRIO_DEBUG:
+		allowed = vcpu->arch.shared->msr & MSR_DE;
+		allowed = allowed && !crit;
+		msr_mask = MSR_ME;
+		if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
+			int_class = INT_CLASS_DBG;
+		else
+			int_class = INT_CLASS_CRIT;
+
+		break;
+	}
+
+	if (allowed) {
+		switch (int_class) {
+		case INT_CLASS_NONCRIT:
+			set_guest_srr(vcpu, vcpu->arch.regs.nip,
+				      vcpu->arch.shared->msr);
+			break;
+		case INT_CLASS_CRIT:
+			set_guest_csrr(vcpu, vcpu->arch.regs.nip,
+				       vcpu->arch.shared->msr);
+			break;
+		case INT_CLASS_DBG:
+			set_guest_dsrr(vcpu, vcpu->arch.regs.nip,
+				       vcpu->arch.shared->msr);
+			break;
+		case INT_CLASS_MC:
+			set_guest_mcsrr(vcpu, vcpu->arch.regs.nip,
+					vcpu->arch.shared->msr);
+			break;
+		}
+
+		vcpu->arch.regs.nip = vcpu->arch.ivpr |
+					vcpu->arch.ivor[priority];
+		if (update_esr)
+			kvmppc_set_esr(vcpu, vcpu->arch.queued_esr);
+		if (update_dear)
+			kvmppc_set_dar(vcpu, vcpu->arch.queued_dear);
+		if (update_epr) {
+			if (vcpu->arch.epr_flags & KVMPPC_EPR_USER)
+				kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
+			else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) {
+				BUG_ON(vcpu->arch.irq_type != KVMPPC_IRQ_MPIC);
+				kvmppc_mpic_set_epr(vcpu);
+			}
+		}
+
+		new_msr &= msr_mask;
+#if defined(CONFIG_64BIT)
+		if (vcpu->arch.epcr & SPRN_EPCR_ICM)
+			new_msr |= MSR_CM;
+#endif
+		kvmppc_set_msr(vcpu, new_msr);
+
+		if (!keep_irq)
+			clear_bit(priority, &vcpu->arch.pending_exceptions);
+	}
+
+#ifdef CONFIG_KVM_BOOKE_HV
+	/*
+	 * If an interrupt is pending but masked, raise a guest doorbell
+	 * so that we are notified when the guest enables the relevant
+	 * MSR bit.
+	 */
+	if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_EE)
+		kvmppc_set_pending_interrupt(vcpu, INT_CLASS_NONCRIT);
+	if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_CE)
+		kvmppc_set_pending_interrupt(vcpu, INT_CLASS_CRIT);
+	if (vcpu->arch.pending_exceptions & BOOKE_IRQPRIO_MACHINE_CHECK)
+		kvmppc_set_pending_interrupt(vcpu, INT_CLASS_MC);
+#endif
+
+	return allowed;
+}
+
+/*
+ * Return the number of jiffies until the next timeout.  If the timeout is
+ * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
+ * because the larger value can break the timer APIs.
+ */
+static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
+{
+	u64 tb, wdt_tb, wdt_ticks = 0;
+	u64 nr_jiffies = 0;
+	u32 period = TCR_GET_WP(vcpu->arch.tcr);
+
+	wdt_tb = 1ULL << (63 - period);
+	tb = get_tb();
+	/*
+	 * The watchdog timeout will hapeen when TB bit corresponding
+	 * to watchdog will toggle from 0 to 1.
+	 */
+	if (tb & wdt_tb)
+		wdt_ticks = wdt_tb;
+
+	wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
+
+	/* Convert timebase ticks to jiffies */
+	nr_jiffies = wdt_ticks;
+
+	if (do_div(nr_jiffies, tb_ticks_per_jiffy))
+		nr_jiffies++;
+
+	return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
+}
+
+static void arm_next_watchdog(struct kvm_vcpu *vcpu)
+{
+	unsigned long nr_jiffies;
+	unsigned long flags;
+
+	/*
+	 * If TSR_ENW and TSR_WIS are not set then no need to exit to
+	 * userspace, so clear the KVM_REQ_WATCHDOG request.
+	 */
+	if ((vcpu->arch.tsr & (TSR_ENW | TSR_WIS)) != (TSR_ENW | TSR_WIS))
+		kvm_clear_request(KVM_REQ_WATCHDOG, vcpu);
+
+	spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
+	nr_jiffies = watchdog_next_timeout(vcpu);
+	/*
+	 * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
+	 * then do not run the watchdog timer as this can break timer APIs.
+	 */
+	if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
+		mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
+	else
+		del_timer(&vcpu->arch.wdt_timer);
+	spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags);
+}
+
+static void kvmppc_watchdog_func(struct timer_list *t)
+{
+	struct kvm_vcpu *vcpu = from_timer(vcpu, t, arch.wdt_timer);
+	u32 tsr, new_tsr;
+	int final;
+
+	do {
+		new_tsr = tsr = vcpu->arch.tsr;
+		final = 0;
+
+		/* Time out event */
+		if (tsr & TSR_ENW) {
+			if (tsr & TSR_WIS)
+				final = 1;
+			else
+				new_tsr = tsr | TSR_WIS;
+		} else {
+			new_tsr = tsr | TSR_ENW;
+		}
+	} while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
+
+	if (new_tsr & TSR_WIS) {
+		smp_wmb();
+		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+		kvm_vcpu_kick(vcpu);
+	}
+
+	/*
+	 * If this is final watchdog expiry and some action is required
+	 * then exit to userspace.
+	 */
+	if (final && (vcpu->arch.tcr & TCR_WRC_MASK) &&
+	    vcpu->arch.watchdog_enabled) {
+		smp_wmb();
+		kvm_make_request(KVM_REQ_WATCHDOG, vcpu);
+		kvm_vcpu_kick(vcpu);
+	}
+
+	/*
+	 * Stop running the watchdog timer after final expiration to
+	 * prevent the host from being flooded with timers if the
+	 * guest sets a short period.
+	 * Timers will resume when TSR/TCR is updated next time.
+	 */
+	if (!final)
+		arm_next_watchdog(vcpu);
+}
+
+static void update_timer_ints(struct kvm_vcpu *vcpu)
+{
+	if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
+		kvmppc_core_queue_dec(vcpu);
+	else
+		kvmppc_core_dequeue_dec(vcpu);
+
+	if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
+		kvmppc_core_queue_watchdog(vcpu);
+	else
+		kvmppc_core_dequeue_watchdog(vcpu);
+}
+
+static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
+{
+	unsigned long *pending = &vcpu->arch.pending_exceptions;
+	unsigned int priority;
+
+	priority = __ffs(*pending);
+	while (priority < BOOKE_IRQPRIO_MAX) {
+		if (kvmppc_booke_irqprio_deliver(vcpu, priority))
+			break;
+
+		priority = find_next_bit(pending,
+		                         BITS_PER_BYTE * sizeof(*pending),
+		                         priority + 1);
+	}
+
+	/* Tell the guest about our interrupt status */
+	vcpu->arch.shared->int_pending = !!*pending;
+}
+
+/* Check pending exceptions and deliver one, if possible. */
+int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
+{
+	int r = 0;
+	WARN_ON_ONCE(!irqs_disabled());
+
+	kvmppc_core_check_exceptions(vcpu);
+
+	if (kvm_request_pending(vcpu)) {
+		/* Exception delivery raised request; start over */
+		return 1;
+	}
+
+	if (vcpu->arch.shared->msr & MSR_WE) {
+		local_irq_enable();
+		kvm_vcpu_halt(vcpu);
+		hard_irq_disable();
+
+		kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
+		r = 1;
+	}
+
+	return r;
+}
+
+int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
+{
+	int r = 1; /* Indicate we want to get back into the guest */
+
+	if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu))
+		update_timer_ints(vcpu);
+#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
+	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+		kvmppc_core_flush_tlb(vcpu);
+#endif
+
+	if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
+		vcpu->run->exit_reason = KVM_EXIT_WATCHDOG;
+		r = 0;
+	}
+
+	if (kvm_check_request(KVM_REQ_EPR_EXIT, vcpu)) {
+		vcpu->run->epr.epr = 0;
+		vcpu->arch.epr_needed = true;
+		vcpu->run->exit_reason = KVM_EXIT_EPR;
+		r = 0;
+	}
+
+	return r;
+}
+
+int kvmppc_vcpu_run(struct kvm_vcpu *vcpu)
+{
+	int ret, s;
+	struct debug_reg debug;
+
+	if (!vcpu->arch.sane) {
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		return -EINVAL;
+	}
+
+	s = kvmppc_prepare_to_enter(vcpu);
+	if (s <= 0) {
+		ret = s;
+		goto out;
+	}
+	/* interrupts now hard-disabled */
+
+#ifdef CONFIG_PPC_FPU
+	/* Save userspace FPU state in stack */
+	enable_kernel_fp();
+
+	/*
+	 * Since we can't trap on MSR_FP in GS-mode, we consider the guest
+	 * as always using the FPU.
+	 */
+	kvmppc_load_guest_fp(vcpu);
+#endif
+
+#ifdef CONFIG_ALTIVEC
+	/* Save userspace AltiVec state in stack */
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		enable_kernel_altivec();
+	/*
+	 * Since we can't trap on MSR_VEC in GS-mode, we consider the guest
+	 * as always using the AltiVec.
+	 */
+	kvmppc_load_guest_altivec(vcpu);
+#endif
+
+	/* Switch to guest debug context */
+	debug = vcpu->arch.dbg_reg;
+	switch_booke_debug_regs(&debug);
+	debug = current->thread.debug;
+	current->thread.debug = vcpu->arch.dbg_reg;
+
+	vcpu->arch.pgdir = vcpu->kvm->mm->pgd;
+	kvmppc_fix_ee_before_entry();
+
+	ret = __kvmppc_vcpu_run(vcpu);
+
+	/* No need for guest_exit. It's done in handle_exit.
+	   We also get here with interrupts enabled. */
+
+	/* Switch back to user space debug context */
+	switch_booke_debug_regs(&debug);
+	current->thread.debug = debug;
+
+#ifdef CONFIG_PPC_FPU
+	kvmppc_save_guest_fp(vcpu);
+#endif
+
+#ifdef CONFIG_ALTIVEC
+	kvmppc_save_guest_altivec(vcpu);
+#endif
+
+out:
+	vcpu->mode = OUTSIDE_GUEST_MODE;
+	return ret;
+}
+
+static int emulation_exit(struct kvm_vcpu *vcpu)
+{
+	enum emulation_result er;
+
+	er = kvmppc_emulate_instruction(vcpu);
+	switch (er) {
+	case EMULATE_DONE:
+		/* don't overwrite subtypes, just account kvm_stats */
+		kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
+		/* Future optimization: only reload non-volatiles if
+		 * they were actually modified by emulation. */
+		return RESUME_GUEST_NV;
+
+	case EMULATE_AGAIN:
+		return RESUME_GUEST;
+
+	case EMULATE_FAIL:
+		printk(KERN_CRIT "%s: emulation at %lx failed (%08lx)\n",
+		       __func__, vcpu->arch.regs.nip, vcpu->arch.last_inst);
+		/* For debugging, encode the failing instruction and
+		 * report it to userspace. */
+		vcpu->run->hw.hardware_exit_reason = ~0ULL << 32;
+		vcpu->run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
+		kvmppc_core_queue_program(vcpu, ESR_PIL);
+		return RESUME_HOST;
+
+	case EMULATE_EXIT_USER:
+		return RESUME_HOST;
+
+	default:
+		BUG();
+	}
+}
+
+static int kvmppc_handle_debug(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	struct debug_reg *dbg_reg = &(vcpu->arch.dbg_reg);
+	u32 dbsr = vcpu->arch.dbsr;
+
+	if (vcpu->guest_debug == 0) {
+		/*
+		 * Debug resources belong to Guest.
+		 * Imprecise debug event is not injected
+		 */
+		if (dbsr & DBSR_IDE) {
+			dbsr &= ~DBSR_IDE;
+			if (!dbsr)
+				return RESUME_GUEST;
+		}
+
+		if (dbsr && (vcpu->arch.shared->msr & MSR_DE) &&
+			    (vcpu->arch.dbg_reg.dbcr0 & DBCR0_IDM))
+			kvmppc_core_queue_debug(vcpu);
+
+		/* Inject a program interrupt if trap debug is not allowed */
+		if ((dbsr & DBSR_TIE) && !(vcpu->arch.shared->msr & MSR_DE))
+			kvmppc_core_queue_program(vcpu, ESR_PTR);
+
+		return RESUME_GUEST;
+	}
+
+	/*
+	 * Debug resource owned by userspace.
+	 * Clear guest dbsr (vcpu->arch.dbsr)
+	 */
+	vcpu->arch.dbsr = 0;
+	run->debug.arch.status = 0;
+	run->debug.arch.address = vcpu->arch.regs.nip;
+
+	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
+		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
+	} else {
+		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
+			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
+		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
+			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
+		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
+			run->debug.arch.address = dbg_reg->dac1;
+		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
+			run->debug.arch.address = dbg_reg->dac2;
+	}
+
+	return RESUME_HOST;
+}
+
+static void kvmppc_fill_pt_regs(struct pt_regs *regs)
+{
+	ulong r1, msr, lr;
+
+	asm("mr %0, 1" : "=r"(r1));
+	asm("mflr %0" : "=r"(lr));
+	asm("mfmsr %0" : "=r"(msr));
+
+	memset(regs, 0, sizeof(*regs));
+	regs->gpr[1] = r1;
+	regs->nip = _THIS_IP_;
+	regs->msr = msr;
+	regs->link = lr;
+}
+
+/*
+ * For interrupts needed to be handled by host interrupt handlers,
+ * corresponding host handler are called from here in similar way
+ * (but not exact) as they are called from low level handler
+ * (such as from arch/powerpc/kernel/head_fsl_booke.S).
+ */
+static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
+				     unsigned int exit_nr)
+{
+	struct pt_regs regs;
+
+	switch (exit_nr) {
+	case BOOKE_INTERRUPT_EXTERNAL:
+		kvmppc_fill_pt_regs(&regs);
+		do_IRQ(&regs);
+		break;
+	case BOOKE_INTERRUPT_DECREMENTER:
+		kvmppc_fill_pt_regs(&regs);
+		timer_interrupt(&regs);
+		break;
+#if defined(CONFIG_PPC_DOORBELL)
+	case BOOKE_INTERRUPT_DOORBELL:
+		kvmppc_fill_pt_regs(&regs);
+		doorbell_exception(&regs);
+		break;
+#endif
+	case BOOKE_INTERRUPT_MACHINE_CHECK:
+		/* FIXME */
+		break;
+	case BOOKE_INTERRUPT_PERFORMANCE_MONITOR:
+		kvmppc_fill_pt_regs(&regs);
+		performance_monitor_exception(&regs);
+		break;
+	case BOOKE_INTERRUPT_WATCHDOG:
+		kvmppc_fill_pt_regs(&regs);
+#ifdef CONFIG_BOOKE_WDT
+		WatchdogException(&regs);
+#else
+		unknown_exception(&regs);
+#endif
+		break;
+	case BOOKE_INTERRUPT_CRITICAL:
+		kvmppc_fill_pt_regs(&regs);
+		unknown_exception(&regs);
+		break;
+	case BOOKE_INTERRUPT_DEBUG:
+		/* Save DBSR before preemption is enabled */
+		vcpu->arch.dbsr = mfspr(SPRN_DBSR);
+		kvmppc_clear_dbsr();
+		break;
+	}
+}
+
+static int kvmppc_resume_inst_load(struct kvm_vcpu *vcpu,
+				  enum emulation_result emulated, u32 last_inst)
+{
+	switch (emulated) {
+	case EMULATE_AGAIN:
+		return RESUME_GUEST;
+
+	case EMULATE_FAIL:
+		pr_debug("%s: load instruction from guest address %lx failed\n",
+		       __func__, vcpu->arch.regs.nip);
+		/* For debugging, encode the failing instruction and
+		 * report it to userspace. */
+		vcpu->run->hw.hardware_exit_reason = ~0ULL << 32;
+		vcpu->run->hw.hardware_exit_reason |= last_inst;
+		kvmppc_core_queue_program(vcpu, ESR_PIL);
+		return RESUME_HOST;
+
+	default:
+		BUG();
+	}
+}
+
+/*
+ * kvmppc_handle_exit
+ *
+ * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
+ */
+int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr)
+{
+	struct kvm_run *run = vcpu->run;
+	int r = RESUME_HOST;
+	int s;
+	int idx;
+	u32 last_inst = KVM_INST_FETCH_FAILED;
+	ppc_inst_t pinst;
+	enum emulation_result emulated = EMULATE_DONE;
+
+	/* Fix irq state (pairs with kvmppc_fix_ee_before_entry()) */
+	kvmppc_fix_ee_after_exit();
+
+	/* update before a new last_exit_type is rewritten */
+	kvmppc_update_timing_stats(vcpu);
+
+	/* restart interrupts if they were meant for the host */
+	kvmppc_restart_interrupt(vcpu, exit_nr);
+
+	/*
+	 * get last instruction before being preempted
+	 * TODO: for e6500 check also BOOKE_INTERRUPT_LRAT_ERROR & ESR_DATA
+	 */
+	switch (exit_nr) {
+	case BOOKE_INTERRUPT_DATA_STORAGE:
+	case BOOKE_INTERRUPT_DTLB_MISS:
+	case BOOKE_INTERRUPT_HV_PRIV:
+		emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &pinst);
+		last_inst = ppc_inst_val(pinst);
+		break;
+	case BOOKE_INTERRUPT_PROGRAM:
+		/* SW breakpoints arrive as illegal instructions on HV */
+		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
+			emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &pinst);
+			last_inst = ppc_inst_val(pinst);
+		}
+		break;
+	default:
+		break;
+	}
+
+	trace_kvm_exit(exit_nr, vcpu);
+
+	context_tracking_guest_exit();
+	if (!vtime_accounting_enabled_this_cpu()) {
+		local_irq_enable();
+		/*
+		 * Service IRQs here before vtime_account_guest_exit() so any
+		 * ticks that occurred while running the guest are accounted to
+		 * the guest. If vtime accounting is enabled, accounting uses
+		 * TB rather than ticks, so it can be done without enabling
+		 * interrupts here, which has the problem that it accounts
+		 * interrupt processing overhead to the host.
+		 */
+		local_irq_disable();
+	}
+	vtime_account_guest_exit();
+
+	local_irq_enable();
+
+	run->exit_reason = KVM_EXIT_UNKNOWN;
+	run->ready_for_interrupt_injection = 1;
+
+	if (emulated != EMULATE_DONE) {
+		r = kvmppc_resume_inst_load(vcpu, emulated, last_inst);
+		goto out;
+	}
+
+	switch (exit_nr) {
+	case BOOKE_INTERRUPT_MACHINE_CHECK:
+		printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
+		kvmppc_dump_vcpu(vcpu);
+		/* For debugging, send invalid exit reason to user space */
+		run->hw.hardware_exit_reason = ~1ULL << 32;
+		run->hw.hardware_exit_reason |= mfspr(SPRN_MCSR);
+		r = RESUME_HOST;
+		break;
+
+	case BOOKE_INTERRUPT_EXTERNAL:
+		kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_DECREMENTER:
+		kvmppc_account_exit(vcpu, DEC_EXITS);
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_WATCHDOG:
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_DOORBELL:
+		kvmppc_account_exit(vcpu, DBELL_EXITS);
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_GUEST_DBELL_CRIT:
+		kvmppc_account_exit(vcpu, GDBELL_EXITS);
+
+		/*
+		 * We are here because there is a pending guest interrupt
+		 * which could not be delivered as MSR_CE or MSR_ME was not
+		 * set.  Once we break from here we will retry delivery.
+		 */
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_GUEST_DBELL:
+		kvmppc_account_exit(vcpu, GDBELL_EXITS);
+
+		/*
+		 * We are here because there is a pending guest interrupt
+		 * which could not be delivered as MSR_EE was not set.  Once
+		 * we break from here we will retry delivery.
+		 */
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_PERFORMANCE_MONITOR:
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_HV_PRIV:
+		r = emulation_exit(vcpu);
+		break;
+
+	case BOOKE_INTERRUPT_PROGRAM:
+		if ((vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) &&
+			(last_inst == KVMPPC_INST_SW_BREAKPOINT)) {
+			/*
+			 * We are here because of an SW breakpoint instr,
+			 * so lets return to host to handle.
+			 */
+			r = kvmppc_handle_debug(vcpu);
+			run->exit_reason = KVM_EXIT_DEBUG;
+			kvmppc_account_exit(vcpu, DEBUG_EXITS);
+			break;
+		}
+
+		if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) {
+			/*
+			 * Program traps generated by user-level software must
+			 * be handled by the guest kernel.
+			 *
+			 * In GS mode, hypervisor privileged instructions trap
+			 * on BOOKE_INTERRUPT_HV_PRIV, not here, so these are
+			 * actual program interrupts, handled by the guest.
+			 */
+			kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr);
+			r = RESUME_GUEST;
+			kvmppc_account_exit(vcpu, USR_PR_INST);
+			break;
+		}
+
+		r = emulation_exit(vcpu);
+		break;
+
+	case BOOKE_INTERRUPT_FP_UNAVAIL:
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
+		kvmppc_account_exit(vcpu, FP_UNAVAIL);
+		r = RESUME_GUEST;
+		break;
+
+#ifdef CONFIG_SPE
+	case BOOKE_INTERRUPT_SPE_UNAVAIL: {
+		if (vcpu->arch.shared->msr & MSR_SPE)
+			kvmppc_vcpu_enable_spe(vcpu);
+		else
+			kvmppc_booke_queue_irqprio(vcpu,
+						   BOOKE_IRQPRIO_SPE_UNAVAIL);
+		r = RESUME_GUEST;
+		break;
+	}
+
+	case BOOKE_INTERRUPT_SPE_FP_DATA:
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA);
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_SPE_FP_ROUND:
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
+		r = RESUME_GUEST;
+		break;
+#elif defined(CONFIG_SPE_POSSIBLE)
+	case BOOKE_INTERRUPT_SPE_UNAVAIL:
+		/*
+		 * Guest wants SPE, but host kernel doesn't support it.  Send
+		 * an "unimplemented operation" program check to the guest.
+		 */
+		kvmppc_core_queue_program(vcpu, ESR_PUO | ESR_SPV);
+		r = RESUME_GUEST;
+		break;
+
+	/*
+	 * These really should never happen without CONFIG_SPE,
+	 * as we should never enable the real MSR[SPE] in the guest.
+	 */
+	case BOOKE_INTERRUPT_SPE_FP_DATA:
+	case BOOKE_INTERRUPT_SPE_FP_ROUND:
+		printk(KERN_CRIT "%s: unexpected SPE interrupt %u at %08lx\n",
+		       __func__, exit_nr, vcpu->arch.regs.nip);
+		run->hw.hardware_exit_reason = exit_nr;
+		r = RESUME_HOST;
+		break;
+#endif /* CONFIG_SPE_POSSIBLE */
+
+/*
+ * On cores with Vector category, KVM is loaded only if CONFIG_ALTIVEC,
+ * see kvmppc_e500mc_check_processor_compat().
+ */
+#ifdef CONFIG_ALTIVEC
+	case BOOKE_INTERRUPT_ALTIVEC_UNAVAIL:
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL);
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_ALTIVEC_ASSIST:
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_ASSIST);
+		r = RESUME_GUEST;
+		break;
+#endif
+
+	case BOOKE_INTERRUPT_DATA_STORAGE:
+		kvmppc_core_queue_data_storage(vcpu, 0, vcpu->arch.fault_dear,
+		                               vcpu->arch.fault_esr);
+		kvmppc_account_exit(vcpu, DSI_EXITS);
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_INST_STORAGE:
+		kvmppc_core_queue_inst_storage(vcpu, vcpu->arch.fault_esr);
+		kvmppc_account_exit(vcpu, ISI_EXITS);
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_ALIGNMENT:
+		kvmppc_core_queue_alignment(vcpu, vcpu->arch.fault_dear,
+		                            vcpu->arch.fault_esr);
+		r = RESUME_GUEST;
+		break;
+
+#ifdef CONFIG_KVM_BOOKE_HV
+	case BOOKE_INTERRUPT_HV_SYSCALL:
+		if (!(vcpu->arch.shared->msr & MSR_PR)) {
+			kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
+		} else {
+			/*
+			 * hcall from guest userspace -- send privileged
+			 * instruction program check.
+			 */
+			kvmppc_core_queue_program(vcpu, ESR_PPR);
+		}
+
+		r = RESUME_GUEST;
+		break;
+#else
+	case BOOKE_INTERRUPT_SYSCALL:
+		if (!(vcpu->arch.shared->msr & MSR_PR) &&
+		    (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
+			/* KVM PV hypercalls */
+			kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
+			r = RESUME_GUEST;
+		} else {
+			/* Guest syscalls */
+			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
+		}
+		kvmppc_account_exit(vcpu, SYSCALL_EXITS);
+		r = RESUME_GUEST;
+		break;
+#endif
+
+	case BOOKE_INTERRUPT_DTLB_MISS: {
+		unsigned long eaddr = vcpu->arch.fault_dear;
+		int gtlb_index;
+		gpa_t gpaddr;
+		gfn_t gfn;
+
+#ifdef CONFIG_KVM_E500V2
+		if (!(vcpu->arch.shared->msr & MSR_PR) &&
+		    (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) {
+			kvmppc_map_magic(vcpu);
+			kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
+			r = RESUME_GUEST;
+
+			break;
+		}
+#endif
+
+		/* Check the guest TLB. */
+		gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
+		if (gtlb_index < 0) {
+			/* The guest didn't have a mapping for it. */
+			kvmppc_core_queue_dtlb_miss(vcpu,
+			                            vcpu->arch.fault_dear,
+			                            vcpu->arch.fault_esr);
+			kvmppc_mmu_dtlb_miss(vcpu);
+			kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
+			r = RESUME_GUEST;
+			break;
+		}
+
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+		gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
+		gfn = gpaddr >> PAGE_SHIFT;
+
+		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+			/* The guest TLB had a mapping, but the shadow TLB
+			 * didn't, and it is RAM. This could be because:
+			 * a) the entry is mapping the host kernel, or
+			 * b) the guest used a large mapping which we're faking
+			 * Either way, we need to satisfy the fault without
+			 * invoking the guest. */
+			kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
+			kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
+			r = RESUME_GUEST;
+		} else {
+			/* Guest has mapped and accessed a page which is not
+			 * actually RAM. */
+			vcpu->arch.paddr_accessed = gpaddr;
+			vcpu->arch.vaddr_accessed = eaddr;
+			r = kvmppc_emulate_mmio(vcpu);
+			kvmppc_account_exit(vcpu, MMIO_EXITS);
+		}
+
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
+		break;
+	}
+
+	case BOOKE_INTERRUPT_ITLB_MISS: {
+		unsigned long eaddr = vcpu->arch.regs.nip;
+		gpa_t gpaddr;
+		gfn_t gfn;
+		int gtlb_index;
+
+		r = RESUME_GUEST;
+
+		/* Check the guest TLB. */
+		gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr);
+		if (gtlb_index < 0) {
+			/* The guest didn't have a mapping for it. */
+			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
+			kvmppc_mmu_itlb_miss(vcpu);
+			kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
+			break;
+		}
+
+		kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
+
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+		gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
+		gfn = gpaddr >> PAGE_SHIFT;
+
+		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+			/* The guest TLB had a mapping, but the shadow TLB
+			 * didn't. This could be because:
+			 * a) the entry is mapping the host kernel, or
+			 * b) the guest used a large mapping which we're faking
+			 * Either way, we need to satisfy the fault without
+			 * invoking the guest. */
+			kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
+		} else {
+			/* Guest mapped and leaped at non-RAM! */
+			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
+		}
+
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
+		break;
+	}
+
+	case BOOKE_INTERRUPT_DEBUG: {
+		r = kvmppc_handle_debug(vcpu);
+		if (r == RESUME_HOST)
+			run->exit_reason = KVM_EXIT_DEBUG;
+		kvmppc_account_exit(vcpu, DEBUG_EXITS);
+		break;
+	}
+
+	default:
+		printk(KERN_EMERG "exit_nr %d\n", exit_nr);
+		BUG();
+	}
+
+out:
+	/*
+	 * To avoid clobbering exit_reason, only check for signals if we
+	 * aren't already exiting to userspace for some other reason.
+	 */
+	if (!(r & RESUME_HOST)) {
+		s = kvmppc_prepare_to_enter(vcpu);
+		if (s <= 0)
+			r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
+		else {
+			/* interrupts now hard-disabled */
+			kvmppc_fix_ee_before_entry();
+			kvmppc_load_guest_fp(vcpu);
+			kvmppc_load_guest_altivec(vcpu);
+		}
+	}
+
+	return r;
+}
+
+static void kvmppc_set_tsr(struct kvm_vcpu *vcpu, u32 new_tsr)
+{
+	u32 old_tsr = vcpu->arch.tsr;
+
+	vcpu->arch.tsr = new_tsr;
+
+	if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
+		arm_next_watchdog(vcpu);
+
+	update_timer_ints(vcpu);
+}
+
+int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	/* setup watchdog timer once */
+	spin_lock_init(&vcpu->arch.wdt_lock);
+	timer_setup(&vcpu->arch.wdt_timer, kvmppc_watchdog_func, 0);
+
+	/*
+	 * Clear DBSR.MRR to avoid guest debug interrupt as
+	 * this is of host interest
+	 */
+	mtspr(SPRN_DBSR, DBSR_MRR);
+	return 0;
+}
+
+void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+	del_timer_sync(&vcpu->arch.wdt_timer);
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	int i;
+
+	vcpu_load(vcpu);
+
+	regs->pc = vcpu->arch.regs.nip;
+	regs->cr = kvmppc_get_cr(vcpu);
+	regs->ctr = vcpu->arch.regs.ctr;
+	regs->lr = vcpu->arch.regs.link;
+	regs->xer = kvmppc_get_xer(vcpu);
+	regs->msr = vcpu->arch.shared->msr;
+	regs->srr0 = kvmppc_get_srr0(vcpu);
+	regs->srr1 = kvmppc_get_srr1(vcpu);
+	regs->pid = vcpu->arch.pid;
+	regs->sprg0 = kvmppc_get_sprg0(vcpu);
+	regs->sprg1 = kvmppc_get_sprg1(vcpu);
+	regs->sprg2 = kvmppc_get_sprg2(vcpu);
+	regs->sprg3 = kvmppc_get_sprg3(vcpu);
+	regs->sprg4 = kvmppc_get_sprg4(vcpu);
+	regs->sprg5 = kvmppc_get_sprg5(vcpu);
+	regs->sprg6 = kvmppc_get_sprg6(vcpu);
+	regs->sprg7 = kvmppc_get_sprg7(vcpu);
+
+	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+		regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
+
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	int i;
+
+	vcpu_load(vcpu);
+
+	vcpu->arch.regs.nip = regs->pc;
+	kvmppc_set_cr(vcpu, regs->cr);
+	vcpu->arch.regs.ctr = regs->ctr;
+	vcpu->arch.regs.link = regs->lr;
+	kvmppc_set_xer(vcpu, regs->xer);
+	kvmppc_set_msr(vcpu, regs->msr);
+	kvmppc_set_srr0(vcpu, regs->srr0);
+	kvmppc_set_srr1(vcpu, regs->srr1);
+	kvmppc_set_pid(vcpu, regs->pid);
+	kvmppc_set_sprg0(vcpu, regs->sprg0);
+	kvmppc_set_sprg1(vcpu, regs->sprg1);
+	kvmppc_set_sprg2(vcpu, regs->sprg2);
+	kvmppc_set_sprg3(vcpu, regs->sprg3);
+	kvmppc_set_sprg4(vcpu, regs->sprg4);
+	kvmppc_set_sprg5(vcpu, regs->sprg5);
+	kvmppc_set_sprg6(vcpu, regs->sprg6);
+	kvmppc_set_sprg7(vcpu, regs->sprg7);
+
+	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+		kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
+
+	vcpu_put(vcpu);
+	return 0;
+}
+
+static void get_sregs_base(struct kvm_vcpu *vcpu,
+                           struct kvm_sregs *sregs)
+{
+	u64 tb = get_tb();
+
+	sregs->u.e.features |= KVM_SREGS_E_BASE;
+
+	sregs->u.e.csrr0 = vcpu->arch.csrr0;
+	sregs->u.e.csrr1 = vcpu->arch.csrr1;
+	sregs->u.e.mcsr = vcpu->arch.mcsr;
+	sregs->u.e.esr = kvmppc_get_esr(vcpu);
+	sregs->u.e.dear = kvmppc_get_dar(vcpu);
+	sregs->u.e.tsr = vcpu->arch.tsr;
+	sregs->u.e.tcr = vcpu->arch.tcr;
+	sregs->u.e.dec = kvmppc_get_dec(vcpu, tb);
+	sregs->u.e.tb = tb;
+	sregs->u.e.vrsave = vcpu->arch.vrsave;
+}
+
+static int set_sregs_base(struct kvm_vcpu *vcpu,
+                          struct kvm_sregs *sregs)
+{
+	if (!(sregs->u.e.features & KVM_SREGS_E_BASE))
+		return 0;
+
+	vcpu->arch.csrr0 = sregs->u.e.csrr0;
+	vcpu->arch.csrr1 = sregs->u.e.csrr1;
+	vcpu->arch.mcsr = sregs->u.e.mcsr;
+	kvmppc_set_esr(vcpu, sregs->u.e.esr);
+	kvmppc_set_dar(vcpu, sregs->u.e.dear);
+	vcpu->arch.vrsave = sregs->u.e.vrsave;
+	kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
+
+	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) {
+		vcpu->arch.dec = sregs->u.e.dec;
+		kvmppc_emulate_dec(vcpu);
+	}
+
+	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR)
+		kvmppc_set_tsr(vcpu, sregs->u.e.tsr);
+
+	return 0;
+}
+
+static void get_sregs_arch206(struct kvm_vcpu *vcpu,
+                              struct kvm_sregs *sregs)
+{
+	sregs->u.e.features |= KVM_SREGS_E_ARCH206;
+
+	sregs->u.e.pir = vcpu->vcpu_id;
+	sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0;
+	sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1;
+	sregs->u.e.decar = vcpu->arch.decar;
+	sregs->u.e.ivpr = vcpu->arch.ivpr;
+}
+
+static int set_sregs_arch206(struct kvm_vcpu *vcpu,
+                             struct kvm_sregs *sregs)
+{
+	if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206))
+		return 0;
+
+	if (sregs->u.e.pir != vcpu->vcpu_id)
+		return -EINVAL;
+
+	vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0;
+	vcpu->arch.mcsrr1 = sregs->u.e.mcsrr1;
+	vcpu->arch.decar = sregs->u.e.decar;
+	vcpu->arch.ivpr = sregs->u.e.ivpr;
+
+	return 0;
+}
+
+int kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	sregs->u.e.features |= KVM_SREGS_E_IVOR;
+
+	sregs->u.e.ivor_low[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+	sregs->u.e.ivor_low[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+	sregs->u.e.ivor_low[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+	sregs->u.e.ivor_low[3] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+	sregs->u.e.ivor_low[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+	sregs->u.e.ivor_low[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+	sregs->u.e.ivor_low[6] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+	sregs->u.e.ivor_low[7] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+	sregs->u.e.ivor_low[8] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+	sregs->u.e.ivor_low[9] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+	sregs->u.e.ivor_low[10] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+	sregs->u.e.ivor_low[11] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+	sregs->u.e.ivor_low[12] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+	sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+	sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+	sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+	return 0;
+}
+
+int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
+		return 0;
+
+	vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = sregs->u.e.ivor_low[0];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = sregs->u.e.ivor_low[1];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = sregs->u.e.ivor_low[2];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = sregs->u.e.ivor_low[3];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = sregs->u.e.ivor_low[4];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = sregs->u.e.ivor_low[5];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = sregs->u.e.ivor_low[6];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = sregs->u.e.ivor_low[7];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = sregs->u.e.ivor_low[8];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = sregs->u.e.ivor_low[9];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = sregs->u.e.ivor_low[10];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = sregs->u.e.ivor_low[11];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = sregs->u.e.ivor_low[12];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = sregs->u.e.ivor_low[13];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = sregs->u.e.ivor_low[14];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = sregs->u.e.ivor_low[15];
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+                                  struct kvm_sregs *sregs)
+{
+	int ret;
+
+	vcpu_load(vcpu);
+
+	sregs->pvr = vcpu->arch.pvr;
+
+	get_sregs_base(vcpu, sregs);
+	get_sregs_arch206(vcpu, sregs);
+	ret = vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+
+	vcpu_put(vcpu);
+	return ret;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+                                  struct kvm_sregs *sregs)
+{
+	int ret = -EINVAL;
+
+	vcpu_load(vcpu);
+	if (vcpu->arch.pvr != sregs->pvr)
+		goto out;
+
+	ret = set_sregs_base(vcpu, sregs);
+	if (ret < 0)
+		goto out;
+
+	ret = set_sregs_arch206(vcpu, sregs);
+	if (ret < 0)
+		goto out;
+
+	ret = vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+
+out:
+	vcpu_put(vcpu);
+	return ret;
+}
+
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+			union kvmppc_one_reg *val)
+{
+	int r = 0;
+
+	switch (id) {
+	case KVM_REG_PPC_IAC1:
+		*val = get_reg_val(id, vcpu->arch.dbg_reg.iac1);
+		break;
+	case KVM_REG_PPC_IAC2:
+		*val = get_reg_val(id, vcpu->arch.dbg_reg.iac2);
+		break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	case KVM_REG_PPC_IAC3:
+		*val = get_reg_val(id, vcpu->arch.dbg_reg.iac3);
+		break;
+	case KVM_REG_PPC_IAC4:
+		*val = get_reg_val(id, vcpu->arch.dbg_reg.iac4);
+		break;
+#endif
+	case KVM_REG_PPC_DAC1:
+		*val = get_reg_val(id, vcpu->arch.dbg_reg.dac1);
+		break;
+	case KVM_REG_PPC_DAC2:
+		*val = get_reg_val(id, vcpu->arch.dbg_reg.dac2);
+		break;
+	case KVM_REG_PPC_EPR: {
+		u32 epr = kvmppc_get_epr(vcpu);
+		*val = get_reg_val(id, epr);
+		break;
+	}
+#if defined(CONFIG_64BIT)
+	case KVM_REG_PPC_EPCR:
+		*val = get_reg_val(id, vcpu->arch.epcr);
+		break;
+#endif
+	case KVM_REG_PPC_TCR:
+		*val = get_reg_val(id, vcpu->arch.tcr);
+		break;
+	case KVM_REG_PPC_TSR:
+		*val = get_reg_val(id, vcpu->arch.tsr);
+		break;
+	case KVM_REG_PPC_DEBUG_INST:
+		*val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
+		break;
+	case KVM_REG_PPC_VRSAVE:
+		*val = get_reg_val(id, vcpu->arch.vrsave);
+		break;
+	default:
+		r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val);
+		break;
+	}
+
+	return r;
+}
+
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
+			union kvmppc_one_reg *val)
+{
+	int r = 0;
+
+	switch (id) {
+	case KVM_REG_PPC_IAC1:
+		vcpu->arch.dbg_reg.iac1 = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_IAC2:
+		vcpu->arch.dbg_reg.iac2 = set_reg_val(id, *val);
+		break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	case KVM_REG_PPC_IAC3:
+		vcpu->arch.dbg_reg.iac3 = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_IAC4:
+		vcpu->arch.dbg_reg.iac4 = set_reg_val(id, *val);
+		break;
+#endif
+	case KVM_REG_PPC_DAC1:
+		vcpu->arch.dbg_reg.dac1 = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_DAC2:
+		vcpu->arch.dbg_reg.dac2 = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_EPR: {
+		u32 new_epr = set_reg_val(id, *val);
+		kvmppc_set_epr(vcpu, new_epr);
+		break;
+	}
+#if defined(CONFIG_64BIT)
+	case KVM_REG_PPC_EPCR: {
+		u32 new_epcr = set_reg_val(id, *val);
+		kvmppc_set_epcr(vcpu, new_epcr);
+		break;
+	}
+#endif
+	case KVM_REG_PPC_OR_TSR: {
+		u32 tsr_bits = set_reg_val(id, *val);
+		kvmppc_set_tsr_bits(vcpu, tsr_bits);
+		break;
+	}
+	case KVM_REG_PPC_CLEAR_TSR: {
+		u32 tsr_bits = set_reg_val(id, *val);
+		kvmppc_clr_tsr_bits(vcpu, tsr_bits);
+		break;
+	}
+	case KVM_REG_PPC_TSR: {
+		u32 tsr = set_reg_val(id, *val);
+		kvmppc_set_tsr(vcpu, tsr);
+		break;
+	}
+	case KVM_REG_PPC_TCR: {
+		u32 tcr = set_reg_val(id, *val);
+		kvmppc_set_tcr(vcpu, tcr);
+		break;
+	}
+	case KVM_REG_PPC_VRSAVE:
+		vcpu->arch.vrsave = set_reg_val(id, *val);
+		break;
+	default:
+		r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val);
+		break;
+	}
+
+	return r;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EOPNOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EOPNOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+                                  struct kvm_translation *tr)
+{
+	int r;
+
+	vcpu_load(vcpu);
+	r = kvmppc_core_vcpu_translate(vcpu, tr);
+	vcpu_put(vcpu);
+	return r;
+}
+
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+
+}
+
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+	return -EOPNOTSUPP;
+}
+
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+}
+
+int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+				      const struct kvm_memory_slot *old,
+				      struct kvm_memory_slot *new,
+				      enum kvm_mr_change change)
+{
+	return 0;
+}
+
+void kvmppc_core_commit_memory_region(struct kvm *kvm,
+				struct kvm_memory_slot *old,
+				const struct kvm_memory_slot *new,
+				enum kvm_mr_change change)
+{
+}
+
+void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+}
+
+void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr)
+{
+#if defined(CONFIG_64BIT)
+	vcpu->arch.epcr = new_epcr;
+#ifdef CONFIG_KVM_BOOKE_HV
+	vcpu->arch.shadow_epcr &= ~SPRN_EPCR_GICM;
+	if (vcpu->arch.epcr  & SPRN_EPCR_ICM)
+		vcpu->arch.shadow_epcr |= SPRN_EPCR_GICM;
+#endif
+#endif
+}
+
+void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
+{
+	vcpu->arch.tcr = new_tcr;
+	arm_next_watchdog(vcpu);
+	update_timer_ints(vcpu);
+}
+
+void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
+{
+	set_bits(tsr_bits, &vcpu->arch.tsr);
+	smp_wmb();
+	kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+	kvm_vcpu_kick(vcpu);
+}
+
+void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
+{
+	clear_bits(tsr_bits, &vcpu->arch.tsr);
+
+	/*
+	 * We may have stopped the watchdog due to
+	 * being stuck on final expiration.
+	 */
+	if (tsr_bits & (TSR_ENW | TSR_WIS))
+		arm_next_watchdog(vcpu);
+
+	update_timer_ints(vcpu);
+}
+
+void kvmppc_decrementer_func(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.tcr & TCR_ARE) {
+		vcpu->arch.dec = vcpu->arch.decar;
+		kvmppc_emulate_dec(vcpu);
+	}
+
+	kvmppc_set_tsr_bits(vcpu, TSR_DIS);
+}
+
+static int kvmppc_booke_add_breakpoint(struct debug_reg *dbg_reg,
+				       uint64_t addr, int index)
+{
+	switch (index) {
+	case 0:
+		dbg_reg->dbcr0 |= DBCR0_IAC1;
+		dbg_reg->iac1 = addr;
+		break;
+	case 1:
+		dbg_reg->dbcr0 |= DBCR0_IAC2;
+		dbg_reg->iac2 = addr;
+		break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	case 2:
+		dbg_reg->dbcr0 |= DBCR0_IAC3;
+		dbg_reg->iac3 = addr;
+		break;
+	case 3:
+		dbg_reg->dbcr0 |= DBCR0_IAC4;
+		dbg_reg->iac4 = addr;
+		break;
+#endif
+	default:
+		return -EINVAL;
+	}
+
+	dbg_reg->dbcr0 |= DBCR0_IDM;
+	return 0;
+}
+
+static int kvmppc_booke_add_watchpoint(struct debug_reg *dbg_reg, uint64_t addr,
+				       int type, int index)
+{
+	switch (index) {
+	case 0:
+		if (type & KVMPPC_DEBUG_WATCH_READ)
+			dbg_reg->dbcr0 |= DBCR0_DAC1R;
+		if (type & KVMPPC_DEBUG_WATCH_WRITE)
+			dbg_reg->dbcr0 |= DBCR0_DAC1W;
+		dbg_reg->dac1 = addr;
+		break;
+	case 1:
+		if (type & KVMPPC_DEBUG_WATCH_READ)
+			dbg_reg->dbcr0 |= DBCR0_DAC2R;
+		if (type & KVMPPC_DEBUG_WATCH_WRITE)
+			dbg_reg->dbcr0 |= DBCR0_DAC2W;
+		dbg_reg->dac2 = addr;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	dbg_reg->dbcr0 |= DBCR0_IDM;
+	return 0;
+}
+static void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap,
+				  bool set)
+{
+	/* XXX: Add similar MSR protection for BookE-PR */
+#ifdef CONFIG_KVM_BOOKE_HV
+	BUG_ON(prot_bitmap & ~(MSRP_UCLEP | MSRP_DEP | MSRP_PMMP));
+	if (set) {
+		if (prot_bitmap & MSR_UCLE)
+			vcpu->arch.shadow_msrp |= MSRP_UCLEP;
+		if (prot_bitmap & MSR_DE)
+			vcpu->arch.shadow_msrp |= MSRP_DEP;
+		if (prot_bitmap & MSR_PMM)
+			vcpu->arch.shadow_msrp |= MSRP_PMMP;
+	} else {
+		if (prot_bitmap & MSR_UCLE)
+			vcpu->arch.shadow_msrp &= ~MSRP_UCLEP;
+		if (prot_bitmap & MSR_DE)
+			vcpu->arch.shadow_msrp &= ~MSRP_DEP;
+		if (prot_bitmap & MSR_PMM)
+			vcpu->arch.shadow_msrp &= ~MSRP_PMMP;
+	}
+#endif
+}
+
+int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid,
+		 enum xlate_readwrite xlrw, struct kvmppc_pte *pte)
+{
+	int gtlb_index;
+	gpa_t gpaddr;
+
+#ifdef CONFIG_KVM_E500V2
+	if (!(vcpu->arch.shared->msr & MSR_PR) &&
+	    (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) {
+		pte->eaddr = eaddr;
+		pte->raddr = (vcpu->arch.magic_page_pa & PAGE_MASK) |
+			     (eaddr & ~PAGE_MASK);
+		pte->vpage = eaddr >> PAGE_SHIFT;
+		pte->may_read = true;
+		pte->may_write = true;
+		pte->may_execute = true;
+
+		return 0;
+	}
+#endif
+
+	/* Check the guest TLB. */
+	switch (xlid) {
+	case XLATE_INST:
+		gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr);
+		break;
+	case XLATE_DATA:
+		gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
+		break;
+	default:
+		BUG();
+	}
+
+	/* Do we have a TLB entry at all? */
+	if (gtlb_index < 0)
+		return -ENOENT;
+
+	gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
+
+	pte->eaddr = eaddr;
+	pte->raddr = (gpaddr & PAGE_MASK) | (eaddr & ~PAGE_MASK);
+	pte->vpage = eaddr >> PAGE_SHIFT;
+
+	/* XXX read permissions from the guest TLB */
+	pte->may_read = true;
+	pte->may_write = true;
+	pte->may_execute = true;
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					 struct kvm_guest_debug *dbg)
+{
+	struct debug_reg *dbg_reg;
+	int n, b = 0, w = 0;
+	int ret = 0;
+
+	vcpu_load(vcpu);
+
+	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
+		vcpu->arch.dbg_reg.dbcr0 = 0;
+		vcpu->guest_debug = 0;
+		kvm_guest_protect_msr(vcpu, MSR_DE, false);
+		goto out;
+	}
+
+	kvm_guest_protect_msr(vcpu, MSR_DE, true);
+	vcpu->guest_debug = dbg->control;
+	vcpu->arch.dbg_reg.dbcr0 = 0;
+
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+		vcpu->arch.dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+
+	/* Code below handles only HW breakpoints */
+	dbg_reg = &(vcpu->arch.dbg_reg);
+
+#ifdef CONFIG_KVM_BOOKE_HV
+	/*
+	 * On BookE-HV (e500mc) the guest is always executed with MSR.GS=1
+	 * DBCR1 and DBCR2 are set to trigger debug events when MSR.PR is 0
+	 */
+	dbg_reg->dbcr1 = 0;
+	dbg_reg->dbcr2 = 0;
+#else
+	/*
+	 * On BookE-PR (e500v2) the guest is always executed with MSR.PR=1
+	 * We set DBCR1 and DBCR2 to only trigger debug events when MSR.PR
+	 * is set.
+	 */
+	dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
+			  DBCR1_IAC4US;
+	dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
+#endif
+
+	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+		goto out;
+
+	ret = -EINVAL;
+	for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
+		uint64_t addr = dbg->arch.bp[n].addr;
+		uint32_t type = dbg->arch.bp[n].type;
+
+		if (type == KVMPPC_DEBUG_NONE)
+			continue;
+
+		if (type & ~(KVMPPC_DEBUG_WATCH_READ |
+			     KVMPPC_DEBUG_WATCH_WRITE |
+			     KVMPPC_DEBUG_BREAKPOINT))
+			goto out;
+
+		if (type & KVMPPC_DEBUG_BREAKPOINT) {
+			/* Setting H/W breakpoint */
+			if (kvmppc_booke_add_breakpoint(dbg_reg, addr, b++))
+				goto out;
+		} else {
+			/* Setting H/W watchpoint */
+			if (kvmppc_booke_add_watchpoint(dbg_reg, addr,
+							type, w++))
+				goto out;
+		}
+	}
+
+	ret = 0;
+out:
+	vcpu_put(vcpu);
+	return ret;
+}
+
+void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	vcpu->cpu = smp_processor_id();
+	current->thread.kvm_vcpu = vcpu;
+}
+
+void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	current->thread.kvm_vcpu = NULL;
+	vcpu->cpu = -1;
+
+	/* Clear pending debug event in DBSR */
+	kvmppc_clear_dbsr();
+}
+
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+	return kvm->arch.kvm_ops->init_vm(kvm);
+}
+
+int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu)
+{
+	int i;
+	int r;
+
+	r = vcpu->kvm->arch.kvm_ops->vcpu_create(vcpu);
+	if (r)
+		return r;
+
+	/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
+	vcpu->arch.regs.nip = 0;
+	vcpu->arch.shared->pir = vcpu->vcpu_id;
+	kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
+	kvmppc_set_msr(vcpu, 0);
+
+#ifndef CONFIG_KVM_BOOKE_HV
+	vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
+	vcpu->arch.shadow_pid = 1;
+	vcpu->arch.shared->msr = 0;
+#endif
+
+	/* Eye-catching numbers so we know if the guest takes an interrupt
+	 * before it's programmed its own IVPR/IVORs. */
+	vcpu->arch.ivpr = 0x55550000;
+	for (i = 0; i < BOOKE_IRQPRIO_MAX; i++)
+		vcpu->arch.ivor[i] = 0x7700 | i * 4;
+
+	kvmppc_init_timing_stats(vcpu);
+
+	r = kvmppc_core_vcpu_setup(vcpu);
+	if (r)
+		vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
+	kvmppc_sanity_check(vcpu);
+	return r;
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+	vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+	kvm->arch.kvm_ops->destroy_vm(kvm);
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu);
+}
+
+int __init kvmppc_booke_init(void)
+{
+#ifndef CONFIG_KVM_BOOKE_HV
+	unsigned long ivor[16];
+	unsigned long *handler = kvmppc_booke_handler_addr;
+	unsigned long max_ivor = 0;
+	unsigned long handler_len;
+	int i;
+
+	/* We install our own exception handlers by hijacking IVPR. IVPR must
+	 * be 16-bit aligned, so we need a 64KB allocation. */
+	kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+	                                         VCPU_SIZE_ORDER);
+	if (!kvmppc_booke_handlers)
+		return -ENOMEM;
+
+	/* XXX make sure our handlers are smaller than Linux's */
+
+	/* Copy our interrupt handlers to match host IVORs. That way we don't
+	 * have to swap the IVORs on every guest/host transition. */
+	ivor[0] = mfspr(SPRN_IVOR0);
+	ivor[1] = mfspr(SPRN_IVOR1);
+	ivor[2] = mfspr(SPRN_IVOR2);
+	ivor[3] = mfspr(SPRN_IVOR3);
+	ivor[4] = mfspr(SPRN_IVOR4);
+	ivor[5] = mfspr(SPRN_IVOR5);
+	ivor[6] = mfspr(SPRN_IVOR6);
+	ivor[7] = mfspr(SPRN_IVOR7);
+	ivor[8] = mfspr(SPRN_IVOR8);
+	ivor[9] = mfspr(SPRN_IVOR9);
+	ivor[10] = mfspr(SPRN_IVOR10);
+	ivor[11] = mfspr(SPRN_IVOR11);
+	ivor[12] = mfspr(SPRN_IVOR12);
+	ivor[13] = mfspr(SPRN_IVOR13);
+	ivor[14] = mfspr(SPRN_IVOR14);
+	ivor[15] = mfspr(SPRN_IVOR15);
+
+	for (i = 0; i < 16; i++) {
+		if (ivor[i] > max_ivor)
+			max_ivor = i;
+
+		handler_len = handler[i + 1] - handler[i];
+		memcpy((void *)kvmppc_booke_handlers + ivor[i],
+		       (void *)handler[i], handler_len);
+	}
+
+	handler_len = handler[max_ivor + 1] - handler[max_ivor];
+	flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers +
+			   ivor[max_ivor] + handler_len);
+#endif /* !BOOKE_HV */
+	return 0;
+}
+
+void __exit kvmppc_booke_exit(void)
+{
+	free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
+	kvm_exit();
+}
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
new file mode 100644
index 0000000000..9c5b8e7601
--- /dev/null
+++ b/arch/powerpc/kvm/booke.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __KVM_BOOKE_H__
+#define __KVM_BOOKE_H__
+
+#include <linux/types.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_ppc.h>
+#include <asm/switch_to.h>
+#include "timing.h"
+
+/* interrupt priortity ordering */
+#define BOOKE_IRQPRIO_DATA_STORAGE 0
+#define BOOKE_IRQPRIO_INST_STORAGE 1
+#define BOOKE_IRQPRIO_ALIGNMENT 2
+#define BOOKE_IRQPRIO_PROGRAM 3
+#define BOOKE_IRQPRIO_FP_UNAVAIL 4
+#ifdef CONFIG_SPE_POSSIBLE
+#define BOOKE_IRQPRIO_SPE_UNAVAIL 5
+#define BOOKE_IRQPRIO_SPE_FP_DATA 6
+#define BOOKE_IRQPRIO_SPE_FP_ROUND 7
+#endif
+#ifdef CONFIG_PPC_E500MC
+#define BOOKE_IRQPRIO_ALTIVEC_UNAVAIL 5
+#define BOOKE_IRQPRIO_ALTIVEC_ASSIST 6
+#endif
+#define BOOKE_IRQPRIO_SYSCALL 8
+#define BOOKE_IRQPRIO_AP_UNAVAIL 9
+#define BOOKE_IRQPRIO_DTLB_MISS 10
+#define BOOKE_IRQPRIO_ITLB_MISS 11
+#define BOOKE_IRQPRIO_MACHINE_CHECK 12
+#define BOOKE_IRQPRIO_DEBUG 13
+#define BOOKE_IRQPRIO_CRITICAL 14
+#define BOOKE_IRQPRIO_WATCHDOG 15
+#define BOOKE_IRQPRIO_EXTERNAL 16
+#define BOOKE_IRQPRIO_FIT 17
+#define BOOKE_IRQPRIO_DECREMENTER 18
+#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19
+/* Internal pseudo-irqprio for level triggered externals */
+#define BOOKE_IRQPRIO_EXTERNAL_LEVEL 20
+#define BOOKE_IRQPRIO_DBELL 21
+#define BOOKE_IRQPRIO_DBELL_CRIT 22
+#define BOOKE_IRQPRIO_MAX 23
+
+#define BOOKE_IRQMASK_EE ((1 << BOOKE_IRQPRIO_EXTERNAL_LEVEL) | \
+			  (1 << BOOKE_IRQPRIO_PERFORMANCE_MONITOR) | \
+			  (1 << BOOKE_IRQPRIO_DBELL) | \
+			  (1 << BOOKE_IRQPRIO_DECREMENTER) | \
+			  (1 << BOOKE_IRQPRIO_FIT) | \
+			  (1 << BOOKE_IRQPRIO_EXTERNAL))
+
+#define BOOKE_IRQMASK_CE ((1 << BOOKE_IRQPRIO_DBELL_CRIT) | \
+			  (1 << BOOKE_IRQPRIO_WATCHDOG) | \
+			  (1 << BOOKE_IRQPRIO_CRITICAL))
+
+extern unsigned long kvmppc_booke_handlers;
+extern unsigned long kvmppc_booke_handler_addr[];
+
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
+void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
+
+void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr);
+void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr);
+void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
+void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
+
+int kvmppc_booke_emulate_op(struct kvm_vcpu *vcpu,
+                            unsigned int inst, int *advance);
+int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val);
+int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val);
+
+/* low-level asm code to transfer guest state */
+void kvmppc_load_guest_spe(struct kvm_vcpu *vcpu);
+void kvmppc_save_guest_spe(struct kvm_vcpu *vcpu);
+
+/* high-level function, manages flags, host state */
+void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu);
+
+void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu);
+
+enum int_class {
+	INT_CLASS_NONCRIT,
+	INT_CLASS_CRIT,
+	INT_CLASS_MC,
+	INT_CLASS_DBG,
+};
+
+void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type);
+
+extern int kvmppc_core_emulate_op_e500(struct kvm_vcpu *vcpu,
+				       unsigned int inst, int *advance);
+extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
+					  ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
+					  ulong *spr_val);
+extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
+					  ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
+					  ulong *spr_val);
+
+static inline void kvmppc_clear_dbsr(void)
+{
+	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
+}
+
+int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr);
+
+#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
new file mode 100644
index 0000000000..d8d38aca71
--- /dev/null
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -0,0 +1,511 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright IBM Corp. 2008
+ * Copyright 2011 Freescale Semiconductor, Inc.
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/disassemble.h>
+
+#include "booke.h"
+
+#define OP_19_XOP_RFI     50
+#define OP_19_XOP_RFCI    51
+#define OP_19_XOP_RFDI    39
+
+#define OP_31_XOP_MFMSR   83
+#define OP_31_XOP_WRTEE   131
+#define OP_31_XOP_MTMSR   146
+#define OP_31_XOP_WRTEEI  163
+
+static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.regs.nip = vcpu->arch.shared->srr0;
+	kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
+}
+
+static void kvmppc_emul_rfdi(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.regs.nip = vcpu->arch.dsrr0;
+	kvmppc_set_msr(vcpu, vcpu->arch.dsrr1);
+}
+
+static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.regs.nip = vcpu->arch.csrr0;
+	kvmppc_set_msr(vcpu, vcpu->arch.csrr1);
+}
+
+int kvmppc_booke_emulate_op(struct kvm_vcpu *vcpu,
+                            unsigned int inst, int *advance)
+{
+	int emulated = EMULATE_DONE;
+	int rs = get_rs(inst);
+	int rt = get_rt(inst);
+
+	switch (get_op(inst)) {
+	case 19:
+		switch (get_xop(inst)) {
+		case OP_19_XOP_RFI:
+			kvmppc_emul_rfi(vcpu);
+			kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
+			*advance = 0;
+			break;
+
+		case OP_19_XOP_RFCI:
+			kvmppc_emul_rfci(vcpu);
+			kvmppc_set_exit_type(vcpu, EMULATED_RFCI_EXITS);
+			*advance = 0;
+			break;
+
+		case OP_19_XOP_RFDI:
+			kvmppc_emul_rfdi(vcpu);
+			kvmppc_set_exit_type(vcpu, EMULATED_RFDI_EXITS);
+			*advance = 0;
+			break;
+
+		default:
+			emulated = EMULATE_FAIL;
+			break;
+		}
+		break;
+
+	case 31:
+		switch (get_xop(inst)) {
+
+		case OP_31_XOP_MFMSR:
+			kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr);
+			kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
+			break;
+
+		case OP_31_XOP_MTMSR:
+			kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
+			kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs));
+			break;
+
+		case OP_31_XOP_WRTEE:
+			vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE)
+					| (kvmppc_get_gpr(vcpu, rs) & MSR_EE);
+			kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+			break;
+
+		case OP_31_XOP_WRTEEI:
+			vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE)
+							 | (inst & MSR_EE);
+			kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+			break;
+
+		default:
+			emulated = EMULATE_FAIL;
+		}
+
+		break;
+
+	default:
+		emulated = EMULATE_FAIL;
+	}
+
+	return emulated;
+}
+
+/*
+ * NOTE: some of these registers are not emulated on BOOKE_HV (GS-mode).
+ * Their backing store is in real registers, and these functions
+ * will return the wrong result if called for them in another context
+ * (such as debugging).
+ */
+int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
+{
+	int emulated = EMULATE_DONE;
+	bool debug_inst = false;
+
+	switch (sprn) {
+	case SPRN_DEAR:
+		vcpu->arch.shared->dar = spr_val;
+		break;
+	case SPRN_ESR:
+		vcpu->arch.shared->esr = spr_val;
+		break;
+	case SPRN_CSRR0:
+		vcpu->arch.csrr0 = spr_val;
+		break;
+	case SPRN_CSRR1:
+		vcpu->arch.csrr1 = spr_val;
+		break;
+	case SPRN_DSRR0:
+		vcpu->arch.dsrr0 = spr_val;
+		break;
+	case SPRN_DSRR1:
+		vcpu->arch.dsrr1 = spr_val;
+		break;
+	case SPRN_IAC1:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		debug_inst = true;
+		vcpu->arch.dbg_reg.iac1 = spr_val;
+		break;
+	case SPRN_IAC2:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		debug_inst = true;
+		vcpu->arch.dbg_reg.iac2 = spr_val;
+		break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	case SPRN_IAC3:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		debug_inst = true;
+		vcpu->arch.dbg_reg.iac3 = spr_val;
+		break;
+	case SPRN_IAC4:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		debug_inst = true;
+		vcpu->arch.dbg_reg.iac4 = spr_val;
+		break;
+#endif
+	case SPRN_DAC1:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		debug_inst = true;
+		vcpu->arch.dbg_reg.dac1 = spr_val;
+		break;
+	case SPRN_DAC2:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		debug_inst = true;
+		vcpu->arch.dbg_reg.dac2 = spr_val;
+		break;
+	case SPRN_DBCR0:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		debug_inst = true;
+		spr_val &= (DBCR0_IDM | DBCR0_IC | DBCR0_BT | DBCR0_TIE |
+			DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 | DBCR0_IAC4  |
+			DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W);
+
+		vcpu->arch.dbg_reg.dbcr0 = spr_val;
+		break;
+	case SPRN_DBCR1:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		debug_inst = true;
+		vcpu->arch.dbg_reg.dbcr1 = spr_val;
+		break;
+	case SPRN_DBCR2:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		debug_inst = true;
+		vcpu->arch.dbg_reg.dbcr2 = spr_val;
+		break;
+	case SPRN_DBSR:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		vcpu->arch.dbsr &= ~spr_val;
+		if (!(vcpu->arch.dbsr & ~DBSR_IDE))
+			kvmppc_core_dequeue_debug(vcpu);
+		break;
+	case SPRN_TSR:
+		kvmppc_clr_tsr_bits(vcpu, spr_val);
+		break;
+	case SPRN_TCR:
+		/*
+		 * WRC is a 2-bit field that is supposed to preserve its
+		 * value once written to non-zero.
+		 */
+		if (vcpu->arch.tcr & TCR_WRC_MASK) {
+			spr_val &= ~TCR_WRC_MASK;
+			spr_val |= vcpu->arch.tcr & TCR_WRC_MASK;
+		}
+		kvmppc_set_tcr(vcpu, spr_val);
+		break;
+
+	case SPRN_DECAR:
+		vcpu->arch.decar = spr_val;
+		break;
+	/*
+	 * Note: SPRG4-7 are user-readable.
+	 * These values are loaded into the real SPRGs when resuming the
+	 * guest (PR-mode only).
+	 */
+	case SPRN_SPRG4:
+		kvmppc_set_sprg4(vcpu, spr_val);
+		break;
+	case SPRN_SPRG5:
+		kvmppc_set_sprg5(vcpu, spr_val);
+		break;
+	case SPRN_SPRG6:
+		kvmppc_set_sprg6(vcpu, spr_val);
+		break;
+	case SPRN_SPRG7:
+		kvmppc_set_sprg7(vcpu, spr_val);
+		break;
+
+	case SPRN_IVPR:
+		vcpu->arch.ivpr = spr_val;
+#ifdef CONFIG_KVM_BOOKE_HV
+		mtspr(SPRN_GIVPR, spr_val);
+#endif
+		break;
+	case SPRN_IVOR0:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val;
+		break;
+	case SPRN_IVOR1:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = spr_val;
+		break;
+	case SPRN_IVOR2:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val;
+#ifdef CONFIG_KVM_BOOKE_HV
+		mtspr(SPRN_GIVOR2, spr_val);
+#endif
+		break;
+	case SPRN_IVOR3:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val;
+		break;
+	case SPRN_IVOR4:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = spr_val;
+		break;
+	case SPRN_IVOR5:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = spr_val;
+		break;
+	case SPRN_IVOR6:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = spr_val;
+		break;
+	case SPRN_IVOR7:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = spr_val;
+		break;
+	case SPRN_IVOR8:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val;
+#ifdef CONFIG_KVM_BOOKE_HV
+		mtspr(SPRN_GIVOR8, spr_val);
+#endif
+		break;
+	case SPRN_IVOR9:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val;
+		break;
+	case SPRN_IVOR10:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = spr_val;
+		break;
+	case SPRN_IVOR11:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = spr_val;
+		break;
+	case SPRN_IVOR12:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = spr_val;
+		break;
+	case SPRN_IVOR13:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = spr_val;
+		break;
+	case SPRN_IVOR14:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = spr_val;
+		break;
+	case SPRN_IVOR15:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val;
+		break;
+	case SPRN_MCSR:
+		vcpu->arch.mcsr &= ~spr_val;
+		break;
+#if defined(CONFIG_64BIT)
+	case SPRN_EPCR:
+		kvmppc_set_epcr(vcpu, spr_val);
+#ifdef CONFIG_KVM_BOOKE_HV
+		mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
+#endif
+		break;
+#endif
+	default:
+		emulated = EMULATE_FAIL;
+	}
+
+	if (debug_inst) {
+		current->thread.debug = vcpu->arch.dbg_reg;
+		switch_booke_debug_regs(&vcpu->arch.dbg_reg);
+	}
+	return emulated;
+}
+
+int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
+{
+	int emulated = EMULATE_DONE;
+
+	switch (sprn) {
+	case SPRN_IVPR:
+		*spr_val = vcpu->arch.ivpr;
+		break;
+	case SPRN_DEAR:
+		*spr_val = vcpu->arch.shared->dar;
+		break;
+	case SPRN_ESR:
+		*spr_val = vcpu->arch.shared->esr;
+		break;
+	case SPRN_EPR:
+		*spr_val = vcpu->arch.epr;
+		break;
+	case SPRN_CSRR0:
+		*spr_val = vcpu->arch.csrr0;
+		break;
+	case SPRN_CSRR1:
+		*spr_val = vcpu->arch.csrr1;
+		break;
+	case SPRN_DSRR0:
+		*spr_val = vcpu->arch.dsrr0;
+		break;
+	case SPRN_DSRR1:
+		*spr_val = vcpu->arch.dsrr1;
+		break;
+	case SPRN_IAC1:
+		*spr_val = vcpu->arch.dbg_reg.iac1;
+		break;
+	case SPRN_IAC2:
+		*spr_val = vcpu->arch.dbg_reg.iac2;
+		break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	case SPRN_IAC3:
+		*spr_val = vcpu->arch.dbg_reg.iac3;
+		break;
+	case SPRN_IAC4:
+		*spr_val = vcpu->arch.dbg_reg.iac4;
+		break;
+#endif
+	case SPRN_DAC1:
+		*spr_val = vcpu->arch.dbg_reg.dac1;
+		break;
+	case SPRN_DAC2:
+		*spr_val = vcpu->arch.dbg_reg.dac2;
+		break;
+	case SPRN_DBCR0:
+		*spr_val = vcpu->arch.dbg_reg.dbcr0;
+		if (vcpu->guest_debug)
+			*spr_val = *spr_val | DBCR0_EDM;
+		break;
+	case SPRN_DBCR1:
+		*spr_val = vcpu->arch.dbg_reg.dbcr1;
+		break;
+	case SPRN_DBCR2:
+		*spr_val = vcpu->arch.dbg_reg.dbcr2;
+		break;
+	case SPRN_DBSR:
+		*spr_val = vcpu->arch.dbsr;
+		break;
+	case SPRN_TSR:
+		*spr_val = vcpu->arch.tsr;
+		break;
+	case SPRN_TCR:
+		*spr_val = vcpu->arch.tcr;
+		break;
+
+	case SPRN_IVOR0:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+		break;
+	case SPRN_IVOR1:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+		break;
+	case SPRN_IVOR2:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+		break;
+	case SPRN_IVOR3:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+		break;
+	case SPRN_IVOR4:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+		break;
+	case SPRN_IVOR5:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+		break;
+	case SPRN_IVOR6:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+		break;
+	case SPRN_IVOR7:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+		break;
+	case SPRN_IVOR8:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+		break;
+	case SPRN_IVOR9:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+		break;
+	case SPRN_IVOR10:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+		break;
+	case SPRN_IVOR11:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+		break;
+	case SPRN_IVOR12:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+		break;
+	case SPRN_IVOR13:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+		break;
+	case SPRN_IVOR14:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+		break;
+	case SPRN_IVOR15:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+		break;
+	case SPRN_MCSR:
+		*spr_val = vcpu->arch.mcsr;
+		break;
+#if defined(CONFIG_64BIT)
+	case SPRN_EPCR:
+		*spr_val = vcpu->arch.epcr;
+		break;
+#endif
+
+	default:
+		emulated = EMULATE_FAIL;
+	}
+
+	return emulated;
+}
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
new file mode 100644
index 0000000000..205545d820
--- /dev/null
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -0,0 +1,535 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright IBM Corp. 2007
+ * Copyright 2011 Freescale Semiconductor, Inc.
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+
+/* The host stack layout: */
+#define HOST_R1         0 /* Implied by stwu. */
+#define HOST_CALLEE_LR  4
+#define HOST_RUN        8
+/* r2 is special: it holds 'current', and it made nonvolatile in the
+ * kernel with the -ffixed-r2 gcc option. */
+#define HOST_R2         12
+#define HOST_CR         16
+#define HOST_NV_GPRS    20
+#define __HOST_NV_GPR(n)  (HOST_NV_GPRS + ((n - 14) * 4))
+#define HOST_NV_GPR(n)  __HOST_NV_GPR(__REG_##n)
+#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + 4)
+#define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
+#define HOST_STACK_LR   (HOST_STACK_SIZE + 4) /* In caller stack frame. */
+
+#define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \
+                        (1<<BOOKE_INTERRUPT_DTLB_MISS) | \
+                        (1<<BOOKE_INTERRUPT_DEBUG))
+
+#define NEED_DEAR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
+                        (1<<BOOKE_INTERRUPT_DTLB_MISS) | \
+                        (1<<BOOKE_INTERRUPT_ALIGNMENT))
+
+#define NEED_ESR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
+                       (1<<BOOKE_INTERRUPT_INST_STORAGE) | \
+                       (1<<BOOKE_INTERRUPT_PROGRAM) | \
+                       (1<<BOOKE_INTERRUPT_DTLB_MISS) | \
+                       (1<<BOOKE_INTERRUPT_ALIGNMENT))
+
+.macro __KVM_HANDLER ivor_nr scratch srr0
+	/* Get pointer to vcpu and record exit number. */
+	mtspr	\scratch , r4
+	mfspr   r4, SPRN_SPRG_THREAD
+	lwz     r4, THREAD_KVM_VCPU(r4)
+	stw	r3, VCPU_GPR(R3)(r4)
+	stw	r5, VCPU_GPR(R5)(r4)
+	stw	r6, VCPU_GPR(R6)(r4)
+	mfspr	r3, \scratch
+	mfctr	r5
+	stw	r3, VCPU_GPR(R4)(r4)
+	stw	r5, VCPU_CTR(r4)
+	mfspr	r3, \srr0
+	lis	r6, kvmppc_resume_host@h
+	stw	r3, VCPU_PC(r4)
+	li	r5, \ivor_nr
+	ori	r6, r6, kvmppc_resume_host@l
+	mtctr	r6
+	bctr
+.endm
+
+.macro KVM_HANDLER ivor_nr scratch srr0
+_GLOBAL(kvmppc_handler_\ivor_nr)
+	__KVM_HANDLER \ivor_nr \scratch \srr0
+.endm
+
+.macro KVM_DBG_HANDLER ivor_nr scratch srr0
+_GLOBAL(kvmppc_handler_\ivor_nr)
+	mtspr   \scratch, r4
+	mfspr	r4, SPRN_SPRG_THREAD
+	lwz	r4, THREAD_KVM_VCPU(r4)
+	stw	r3, VCPU_CRIT_SAVE(r4)
+	mfcr	r3
+	mfspr	r4, SPRN_CSRR1
+	andi.	r4, r4, MSR_PR
+	bne	1f
+	/* debug interrupt happened in enter/exit path */
+	mfspr   r4, SPRN_CSRR1
+	rlwinm  r4, r4, 0, ~MSR_DE
+	mtspr   SPRN_CSRR1, r4
+	lis	r4, 0xffff
+	ori	r4, r4, 0xffff
+	mtspr	SPRN_DBSR, r4
+	mfspr	r4, SPRN_SPRG_THREAD
+	lwz	r4, THREAD_KVM_VCPU(r4)
+	mtcr	r3
+	lwz     r3, VCPU_CRIT_SAVE(r4)
+	mfspr   r4, \scratch
+	rfci
+1:	/* debug interrupt happened in guest */
+	mtcr	r3
+	mfspr	r4, SPRN_SPRG_THREAD
+	lwz	r4, THREAD_KVM_VCPU(r4)
+	lwz     r3, VCPU_CRIT_SAVE(r4)
+	mfspr   r4, \scratch
+	__KVM_HANDLER \ivor_nr \scratch \srr0
+.endm
+
+.macro KVM_HANDLER_ADDR ivor_nr
+	.long	kvmppc_handler_\ivor_nr
+.endm
+
+.macro KVM_HANDLER_END
+	.long	kvmppc_handlers_end
+.endm
+
+_GLOBAL(kvmppc_handlers_start)
+KVM_HANDLER BOOKE_INTERRUPT_CRITICAL SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
+KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK  SPRN_SPRG_RSCRATCH_MC SPRN_MCSRR0
+KVM_HANDLER BOOKE_INTERRUPT_DATA_STORAGE SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_INST_STORAGE SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_EXTERNAL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_ALIGNMENT SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_PROGRAM SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_FP_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_SYSCALL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_AP_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_DECREMENTER SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_FIT SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
+KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_DBG_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
+KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0
+_GLOBAL(kvmppc_handlers_end)
+
+/* Registers:
+ *  SPRG_SCRATCH0: guest r4
+ *  r4: vcpu pointer
+ *  r5: KVM exit number
+ */
+_GLOBAL(kvmppc_resume_host)
+	mfcr	r3
+	stw	r3, VCPU_CR(r4)
+	stw	r7, VCPU_GPR(R7)(r4)
+	stw	r8, VCPU_GPR(R8)(r4)
+	stw	r9, VCPU_GPR(R9)(r4)
+
+	li	r6, 1
+	slw	r6, r6, r5
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+	/* save exit time */
+1:
+	mfspr	r7, SPRN_TBRU
+	mfspr	r8, SPRN_TBRL
+	mfspr	r9, SPRN_TBRU
+	cmpw	r9, r7
+	bne	1b
+	stw	r8, VCPU_TIMING_EXIT_TBL(r4)
+	stw	r9, VCPU_TIMING_EXIT_TBU(r4)
+#endif
+
+	/* Save the faulting instruction and all GPRs for emulation. */
+	andi.	r7, r6, NEED_INST_MASK
+	beq	..skip_inst_copy
+	mfspr	r9, SPRN_SRR0
+	mfmsr	r8
+	ori	r7, r8, MSR_DS
+	mtmsr	r7
+	isync
+	lwz	r9, 0(r9)
+	mtmsr	r8
+	isync
+	stw	r9, VCPU_LAST_INST(r4)
+
+	stw	r15, VCPU_GPR(R15)(r4)
+	stw	r16, VCPU_GPR(R16)(r4)
+	stw	r17, VCPU_GPR(R17)(r4)
+	stw	r18, VCPU_GPR(R18)(r4)
+	stw	r19, VCPU_GPR(R19)(r4)
+	stw	r20, VCPU_GPR(R20)(r4)
+	stw	r21, VCPU_GPR(R21)(r4)
+	stw	r22, VCPU_GPR(R22)(r4)
+	stw	r23, VCPU_GPR(R23)(r4)
+	stw	r24, VCPU_GPR(R24)(r4)
+	stw	r25, VCPU_GPR(R25)(r4)
+	stw	r26, VCPU_GPR(R26)(r4)
+	stw	r27, VCPU_GPR(R27)(r4)
+	stw	r28, VCPU_GPR(R28)(r4)
+	stw	r29, VCPU_GPR(R29)(r4)
+	stw	r30, VCPU_GPR(R30)(r4)
+	stw	r31, VCPU_GPR(R31)(r4)
+..skip_inst_copy:
+
+	/* Also grab DEAR and ESR before the host can clobber them. */
+
+	andi.	r7, r6, NEED_DEAR_MASK
+	beq	..skip_dear
+	mfspr	r9, SPRN_DEAR
+	stw	r9, VCPU_FAULT_DEAR(r4)
+..skip_dear:
+
+	andi.	r7, r6, NEED_ESR_MASK
+	beq	..skip_esr
+	mfspr	r9, SPRN_ESR
+	stw	r9, VCPU_FAULT_ESR(r4)
+..skip_esr:
+
+	/* Save remaining volatile guest register state to vcpu. */
+	stw	r0, VCPU_GPR(R0)(r4)
+	stw	r1, VCPU_GPR(R1)(r4)
+	stw	r2, VCPU_GPR(R2)(r4)
+	stw	r10, VCPU_GPR(R10)(r4)
+	stw	r11, VCPU_GPR(R11)(r4)
+	stw	r12, VCPU_GPR(R12)(r4)
+	stw	r13, VCPU_GPR(R13)(r4)
+	stw	r14, VCPU_GPR(R14)(r4) /* We need a NV GPR below. */
+	mflr	r3
+	stw	r3, VCPU_LR(r4)
+	mfxer	r3
+	stw	r3, VCPU_XER(r4)
+
+	/* Restore host stack pointer and PID before IVPR, since the host
+	 * exception handlers use them. */
+	lwz	r1, VCPU_HOST_STACK(r4)
+	lwz	r3, VCPU_HOST_PID(r4)
+	mtspr	SPRN_PID, r3
+
+#ifdef CONFIG_PPC_85xx
+	/* we cheat and know that Linux doesn't use PID1 which is always 0 */
+	lis	r3, 0
+	mtspr	SPRN_PID1, r3
+#endif
+
+	/* Restore host IVPR before re-enabling interrupts. We cheat and know
+	 * that Linux IVPR is always 0xc0000000. */
+	lis	r3, 0xc000
+	mtspr	SPRN_IVPR, r3
+
+	/* Switch to kernel stack and jump to handler. */
+	LOAD_REG_ADDR(r3, kvmppc_handle_exit)
+	mtctr	r3
+	mr	r3, r4
+	lwz	r2, HOST_R2(r1)
+	mr	r14, r4 /* Save vcpu pointer. */
+
+	bctrl	/* kvmppc_handle_exit() */
+
+	/* Restore vcpu pointer and the nonvolatiles we used. */
+	mr	r4, r14
+	lwz	r14, VCPU_GPR(R14)(r4)
+
+	/* Sometimes instruction emulation must restore complete GPR state. */
+	andi.	r5, r3, RESUME_FLAG_NV
+	beq	..skip_nv_load
+	lwz	r15, VCPU_GPR(R15)(r4)
+	lwz	r16, VCPU_GPR(R16)(r4)
+	lwz	r17, VCPU_GPR(R17)(r4)
+	lwz	r18, VCPU_GPR(R18)(r4)
+	lwz	r19, VCPU_GPR(R19)(r4)
+	lwz	r20, VCPU_GPR(R20)(r4)
+	lwz	r21, VCPU_GPR(R21)(r4)
+	lwz	r22, VCPU_GPR(R22)(r4)
+	lwz	r23, VCPU_GPR(R23)(r4)
+	lwz	r24, VCPU_GPR(R24)(r4)
+	lwz	r25, VCPU_GPR(R25)(r4)
+	lwz	r26, VCPU_GPR(R26)(r4)
+	lwz	r27, VCPU_GPR(R27)(r4)
+	lwz	r28, VCPU_GPR(R28)(r4)
+	lwz	r29, VCPU_GPR(R29)(r4)
+	lwz	r30, VCPU_GPR(R30)(r4)
+	lwz	r31, VCPU_GPR(R31)(r4)
+..skip_nv_load:
+
+	/* Should we return to the guest? */
+	andi.	r5, r3, RESUME_FLAG_HOST
+	beq	lightweight_exit
+
+	srawi	r3, r3, 2 /* Shift -ERR back down. */
+
+heavyweight_exit:
+	/* Not returning to guest. */
+
+#ifdef CONFIG_SPE
+	/* save guest SPEFSCR and load host SPEFSCR */
+	mfspr	r9, SPRN_SPEFSCR
+	stw	r9, VCPU_SPEFSCR(r4)
+	lwz	r9, VCPU_HOST_SPEFSCR(r4)
+	mtspr	SPRN_SPEFSCR, r9
+#endif
+
+	/* We already saved guest volatile register state; now save the
+	 * non-volatiles. */
+	stw	r15, VCPU_GPR(R15)(r4)
+	stw	r16, VCPU_GPR(R16)(r4)
+	stw	r17, VCPU_GPR(R17)(r4)
+	stw	r18, VCPU_GPR(R18)(r4)
+	stw	r19, VCPU_GPR(R19)(r4)
+	stw	r20, VCPU_GPR(R20)(r4)
+	stw	r21, VCPU_GPR(R21)(r4)
+	stw	r22, VCPU_GPR(R22)(r4)
+	stw	r23, VCPU_GPR(R23)(r4)
+	stw	r24, VCPU_GPR(R24)(r4)
+	stw	r25, VCPU_GPR(R25)(r4)
+	stw	r26, VCPU_GPR(R26)(r4)
+	stw	r27, VCPU_GPR(R27)(r4)
+	stw	r28, VCPU_GPR(R28)(r4)
+	stw	r29, VCPU_GPR(R29)(r4)
+	stw	r30, VCPU_GPR(R30)(r4)
+	stw	r31, VCPU_GPR(R31)(r4)
+
+	/* Load host non-volatile register state from host stack. */
+	lwz	r14, HOST_NV_GPR(R14)(r1)
+	lwz	r15, HOST_NV_GPR(R15)(r1)
+	lwz	r16, HOST_NV_GPR(R16)(r1)
+	lwz	r17, HOST_NV_GPR(R17)(r1)
+	lwz	r18, HOST_NV_GPR(R18)(r1)
+	lwz	r19, HOST_NV_GPR(R19)(r1)
+	lwz	r20, HOST_NV_GPR(R20)(r1)
+	lwz	r21, HOST_NV_GPR(R21)(r1)
+	lwz	r22, HOST_NV_GPR(R22)(r1)
+	lwz	r23, HOST_NV_GPR(R23)(r1)
+	lwz	r24, HOST_NV_GPR(R24)(r1)
+	lwz	r25, HOST_NV_GPR(R25)(r1)
+	lwz	r26, HOST_NV_GPR(R26)(r1)
+	lwz	r27, HOST_NV_GPR(R27)(r1)
+	lwz	r28, HOST_NV_GPR(R28)(r1)
+	lwz	r29, HOST_NV_GPR(R29)(r1)
+	lwz	r30, HOST_NV_GPR(R30)(r1)
+	lwz	r31, HOST_NV_GPR(R31)(r1)
+
+	/* Return to kvm_vcpu_run(). */
+	lwz	r4, HOST_STACK_LR(r1)
+	lwz	r5, HOST_CR(r1)
+	addi	r1, r1, HOST_STACK_SIZE
+	mtlr	r4
+	mtcr	r5
+	/* r3 still contains the return code from kvmppc_handle_exit(). */
+	blr
+
+
+/* Registers:
+ *  r3: vcpu pointer
+ */
+_GLOBAL(__kvmppc_vcpu_run)
+	stwu	r1, -HOST_STACK_SIZE(r1)
+	stw	r1, VCPU_HOST_STACK(r3)	/* Save stack pointer to vcpu. */
+
+	/* Save host state to stack. */
+	mr	r4, r3
+	mflr	r3
+	stw	r3, HOST_STACK_LR(r1)
+	mfcr	r5
+	stw	r5, HOST_CR(r1)
+
+	/* Save host non-volatile register state to stack. */
+	stw	r14, HOST_NV_GPR(R14)(r1)
+	stw	r15, HOST_NV_GPR(R15)(r1)
+	stw	r16, HOST_NV_GPR(R16)(r1)
+	stw	r17, HOST_NV_GPR(R17)(r1)
+	stw	r18, HOST_NV_GPR(R18)(r1)
+	stw	r19, HOST_NV_GPR(R19)(r1)
+	stw	r20, HOST_NV_GPR(R20)(r1)
+	stw	r21, HOST_NV_GPR(R21)(r1)
+	stw	r22, HOST_NV_GPR(R22)(r1)
+	stw	r23, HOST_NV_GPR(R23)(r1)
+	stw	r24, HOST_NV_GPR(R24)(r1)
+	stw	r25, HOST_NV_GPR(R25)(r1)
+	stw	r26, HOST_NV_GPR(R26)(r1)
+	stw	r27, HOST_NV_GPR(R27)(r1)
+	stw	r28, HOST_NV_GPR(R28)(r1)
+	stw	r29, HOST_NV_GPR(R29)(r1)
+	stw	r30, HOST_NV_GPR(R30)(r1)
+	stw	r31, HOST_NV_GPR(R31)(r1)
+
+	/* Load guest non-volatiles. */
+	lwz	r14, VCPU_GPR(R14)(r4)
+	lwz	r15, VCPU_GPR(R15)(r4)
+	lwz	r16, VCPU_GPR(R16)(r4)
+	lwz	r17, VCPU_GPR(R17)(r4)
+	lwz	r18, VCPU_GPR(R18)(r4)
+	lwz	r19, VCPU_GPR(R19)(r4)
+	lwz	r20, VCPU_GPR(R20)(r4)
+	lwz	r21, VCPU_GPR(R21)(r4)
+	lwz	r22, VCPU_GPR(R22)(r4)
+	lwz	r23, VCPU_GPR(R23)(r4)
+	lwz	r24, VCPU_GPR(R24)(r4)
+	lwz	r25, VCPU_GPR(R25)(r4)
+	lwz	r26, VCPU_GPR(R26)(r4)
+	lwz	r27, VCPU_GPR(R27)(r4)
+	lwz	r28, VCPU_GPR(R28)(r4)
+	lwz	r29, VCPU_GPR(R29)(r4)
+	lwz	r30, VCPU_GPR(R30)(r4)
+	lwz	r31, VCPU_GPR(R31)(r4)
+
+#ifdef CONFIG_SPE
+	/* save host SPEFSCR and load guest SPEFSCR */
+	mfspr	r3, SPRN_SPEFSCR
+	stw	r3, VCPU_HOST_SPEFSCR(r4)
+	lwz	r3, VCPU_SPEFSCR(r4)
+	mtspr	SPRN_SPEFSCR, r3
+#endif
+
+lightweight_exit:
+	stw	r2, HOST_R2(r1)
+
+	mfspr	r3, SPRN_PID
+	stw	r3, VCPU_HOST_PID(r4)
+	lwz	r3, VCPU_SHADOW_PID(r4)
+	mtspr	SPRN_PID, r3
+
+#ifdef CONFIG_PPC_85xx
+	lwz	r3, VCPU_SHADOW_PID1(r4)
+	mtspr	SPRN_PID1, r3
+#endif
+
+	/* Load some guest volatiles. */
+	lwz	r0, VCPU_GPR(R0)(r4)
+	lwz	r2, VCPU_GPR(R2)(r4)
+	lwz	r9, VCPU_GPR(R9)(r4)
+	lwz	r10, VCPU_GPR(R10)(r4)
+	lwz	r11, VCPU_GPR(R11)(r4)
+	lwz	r12, VCPU_GPR(R12)(r4)
+	lwz	r13, VCPU_GPR(R13)(r4)
+	lwz	r3, VCPU_LR(r4)
+	mtlr	r3
+	lwz	r3, VCPU_XER(r4)
+	mtxer	r3
+
+	/* Switch the IVPR. XXX If we take a TLB miss after this we're screwed,
+	 * so how do we make sure vcpu won't fault? */
+	lis	r8, kvmppc_booke_handlers@ha
+	lwz	r8, kvmppc_booke_handlers@l(r8)
+	mtspr	SPRN_IVPR, r8
+
+	lwz	r5, VCPU_SHARED(r4)
+
+	/* Can't switch the stack pointer until after IVPR is switched,
+	 * because host interrupt handlers would get confused. */
+	lwz	r1, VCPU_GPR(R1)(r4)
+
+	/*
+	 * Host interrupt handlers may have clobbered these
+	 * guest-readable SPRGs, or the guest kernel may have
+	 * written directly to the shared area, so we
+	 * need to reload them here with the guest's values.
+	 */
+	PPC_LD(r3, VCPU_SHARED_SPRG4, r5)
+	mtspr	SPRN_SPRG4W, r3
+	PPC_LD(r3, VCPU_SHARED_SPRG5, r5)
+	mtspr	SPRN_SPRG5W, r3
+	PPC_LD(r3, VCPU_SHARED_SPRG6, r5)
+	mtspr	SPRN_SPRG6W, r3
+	PPC_LD(r3, VCPU_SHARED_SPRG7, r5)
+	mtspr	SPRN_SPRG7W, r3
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+	/* save enter time */
+1:
+	mfspr	r6, SPRN_TBRU
+	mfspr	r7, SPRN_TBRL
+	mfspr	r8, SPRN_TBRU
+	cmpw	r8, r6
+	bne	1b
+	stw	r7, VCPU_TIMING_LAST_ENTER_TBL(r4)
+	stw	r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
+#endif
+
+	/* Finish loading guest volatiles and jump to guest. */
+	lwz	r3, VCPU_CTR(r4)
+	lwz	r5, VCPU_CR(r4)
+	lwz	r6, VCPU_PC(r4)
+	lwz	r7, VCPU_SHADOW_MSR(r4)
+	mtctr	r3
+	mtcr	r5
+	mtsrr0	r6
+	mtsrr1	r7
+	lwz	r5, VCPU_GPR(R5)(r4)
+	lwz	r6, VCPU_GPR(R6)(r4)
+	lwz	r7, VCPU_GPR(R7)(r4)
+	lwz	r8, VCPU_GPR(R8)(r4)
+
+	/* Clear any debug events which occurred since we disabled MSR[DE].
+	 * XXX This gives us a 3-instruction window in which a breakpoint
+	 * intended for guest context could fire in the host instead. */
+	lis	r3, 0xffff
+	ori	r3, r3, 0xffff
+	mtspr	SPRN_DBSR, r3
+
+	lwz	r3, VCPU_GPR(R3)(r4)
+	lwz	r4, VCPU_GPR(R4)(r4)
+	rfi
+
+	.data
+	.align	4
+	.globl	kvmppc_booke_handler_addr
+kvmppc_booke_handler_addr:
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_CRITICAL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_MACHINE_CHECK
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_DATA_STORAGE
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_INST_STORAGE
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_EXTERNAL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_ALIGNMENT
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_PROGRAM
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_FP_UNAVAIL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_SYSCALL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_AP_UNAVAIL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_DECREMENTER
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_FIT
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_WATCHDOG
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_DTLB_MISS
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_ITLB_MISS
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_DEBUG
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_UNAVAIL
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_FP_DATA
+KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_FP_ROUND
+KVM_HANDLER_END /*Always keep this in end*/
+
+#ifdef CONFIG_SPE
+_GLOBAL(kvmppc_save_guest_spe)
+	cmpi	0,r3,0
+	beqlr-
+	SAVE_32EVRS(0, r4, r3, VCPU_EVR)
+	evxor   evr6, evr6, evr6
+	evmwumiaa evr6, evr6, evr6
+	li	r4,VCPU_ACC
+	evstddx evr6, r4, r3		/* save acc */
+	blr
+
+_GLOBAL(kvmppc_load_guest_spe)
+	cmpi	0,r3,0
+	beqlr-
+	li      r4,VCPU_ACC
+	evlddx  evr6,r4,r3
+	evmra   evr6,evr6		/* load acc */
+	REST_32EVRS(0, r4, r3, VCPU_EVR)
+	blr
+#endif
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
new file mode 100644
index 0000000000..8b4a402217
--- /dev/null
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -0,0 +1,673 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright (C) 2010-2011 Freescale Semiconductor, Inc.
+ *
+ * Author: Varun Sethi <varun.sethi@freescale.com>
+ * Author: Scott Wood <scotwood@freescale.com>
+ * Author: Mihai Caraman <mihai.caraman@freescale.com>
+ *
+ * This file is derived from arch/powerpc/kvm/booke_interrupts.S
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/asm-compat.h>
+#include <asm/asm-offsets.h>
+#include <asm/bitsperlong.h>
+
+#ifdef CONFIG_64BIT
+#include <asm/exception-64e.h>
+#include <asm/hw_irq.h>
+#include <asm/irqflags.h>
+#else
+#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
+#endif
+
+#define LONGBYTES		(BITS_PER_LONG / 8)
+
+#define VCPU_GUEST_SPRG(n)	(VCPU_GUEST_SPRGS + (n * LONGBYTES))
+
+/* The host stack layout: */
+#define HOST_R1         0 /* Implied by stwu. */
+#define HOST_CALLEE_LR  PPC_LR_STKOFF
+#define HOST_RUN        (HOST_CALLEE_LR + LONGBYTES)
+/*
+ * r2 is special: it holds 'current', and it made nonvolatile in the
+ * kernel with the -ffixed-r2 gcc option.
+ */
+#define HOST_R2         (HOST_RUN + LONGBYTES)
+#define HOST_CR         (HOST_R2 + LONGBYTES)
+#define HOST_NV_GPRS    (HOST_CR + LONGBYTES)
+#define __HOST_NV_GPR(n)  (HOST_NV_GPRS + ((n - 14) * LONGBYTES))
+#define HOST_NV_GPR(n)  __HOST_NV_GPR(__REG_##n)
+#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + LONGBYTES)
+#define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */
+/* LR in caller stack frame. */
+#define HOST_STACK_LR	(HOST_STACK_SIZE + PPC_LR_STKOFF)
+
+#define NEED_EMU		0x00000001 /* emulation -- save nv regs */
+#define NEED_DEAR		0x00000002 /* save faulting DEAR */
+#define NEED_ESR		0x00000004 /* save faulting ESR */
+
+/*
+ * On entry:
+ * r4 = vcpu, r5 = srr0, r6 = srr1
+ * saved in vcpu: cr, ctr, r3-r13
+ */
+.macro kvm_handler_common intno, srr0, flags
+	/* Restore host stack pointer */
+	PPC_STL	r1, VCPU_GPR(R1)(r4)
+	PPC_STL	r2, VCPU_GPR(R2)(r4)
+	PPC_LL	r1, VCPU_HOST_STACK(r4)
+	PPC_LL	r2, HOST_R2(r1)
+
+START_BTB_FLUSH_SECTION
+	BTB_FLUSH(r10)
+END_BTB_FLUSH_SECTION
+
+	mfspr	r10, SPRN_PID
+	lwz	r8, VCPU_HOST_PID(r4)
+	PPC_LL	r11, VCPU_SHARED(r4)
+	PPC_STL	r14, VCPU_GPR(R14)(r4) /* We need a non-volatile GPR. */
+	li	r14, \intno
+
+	stw	r10, VCPU_GUEST_PID(r4)
+	mtspr	SPRN_PID, r8
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+	/* save exit time */
+1:	mfspr	r7, SPRN_TBRU
+	mfspr	r8, SPRN_TBRL
+	mfspr	r9, SPRN_TBRU
+	cmpw	r9, r7
+	stw	r8, VCPU_TIMING_EXIT_TBL(r4)
+	bne-	1b
+	stw	r9, VCPU_TIMING_EXIT_TBU(r4)
+#endif
+
+	oris	r8, r6, MSR_CE@h
+	PPC_STD(r6, VCPU_SHARED_MSR, r11)
+	ori	r8, r8, MSR_ME | MSR_RI
+	PPC_STL	r5, VCPU_PC(r4)
+
+	/*
+	 * Make sure CE/ME/RI are set (if appropriate for exception type)
+	 * whether or not the guest had it set.  Since mfmsr/mtmsr are
+	 * somewhat expensive, skip in the common case where the guest
+	 * had all these bits set (and thus they're still set if
+	 * appropriate for the exception type).
+	 */
+	cmpw	r6, r8
+	beq	1f
+	mfmsr	r7
+	.if	\srr0 != SPRN_MCSRR0 && \srr0 != SPRN_CSRR0
+	oris	r7, r7, MSR_CE@h
+	.endif
+	.if	\srr0 != SPRN_MCSRR0
+	ori	r7, r7, MSR_ME | MSR_RI
+	.endif
+	mtmsr	r7
+1:
+
+	.if	\flags & NEED_EMU
+	PPC_STL	r15, VCPU_GPR(R15)(r4)
+	PPC_STL	r16, VCPU_GPR(R16)(r4)
+	PPC_STL	r17, VCPU_GPR(R17)(r4)
+	PPC_STL	r18, VCPU_GPR(R18)(r4)
+	PPC_STL	r19, VCPU_GPR(R19)(r4)
+	PPC_STL	r20, VCPU_GPR(R20)(r4)
+	PPC_STL	r21, VCPU_GPR(R21)(r4)
+	PPC_STL	r22, VCPU_GPR(R22)(r4)
+	PPC_STL	r23, VCPU_GPR(R23)(r4)
+	PPC_STL	r24, VCPU_GPR(R24)(r4)
+	PPC_STL	r25, VCPU_GPR(R25)(r4)
+	PPC_STL	r26, VCPU_GPR(R26)(r4)
+	PPC_STL	r27, VCPU_GPR(R27)(r4)
+	PPC_STL	r28, VCPU_GPR(R28)(r4)
+	PPC_STL	r29, VCPU_GPR(R29)(r4)
+	PPC_STL	r30, VCPU_GPR(R30)(r4)
+	PPC_STL	r31, VCPU_GPR(R31)(r4)
+
+	/*
+	 * We don't use external PID support. lwepx faults would need to be
+	 * handled by KVM and this implies aditional code in DO_KVM (for
+	 * DTB_MISS, DSI and LRAT) to check ESR[EPID] and EPLC[EGS] which
+	 * is too intrusive for the host. Get last instuction in
+	 * kvmppc_get_last_inst().
+	 */
+	li	r9, KVM_INST_FETCH_FAILED
+	PPC_STL	r9, VCPU_LAST_INST(r4)
+	.endif
+
+	.if	\flags & NEED_ESR
+	mfspr	r8, SPRN_ESR
+	PPC_STL	r8, VCPU_FAULT_ESR(r4)
+	.endif
+
+	.if	\flags & NEED_DEAR
+	mfspr	r9, SPRN_DEAR
+	PPC_STL	r9, VCPU_FAULT_DEAR(r4)
+	.endif
+
+	b	kvmppc_resume_host
+.endm
+
+#ifdef CONFIG_64BIT
+/* Exception types */
+#define EX_GEN			1
+#define EX_GDBELL		2
+#define EX_DBG			3
+#define EX_MC			4
+#define EX_CRIT			5
+#define EX_TLB			6
+
+/*
+ * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
+ */
+.macro kvm_handler intno type scratch, paca_ex, ex_r10, ex_r11, srr0, srr1, flags
+ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
+	mr	r11, r4
+	/*
+	 * Get vcpu from Paca: paca->__current.thread->kvm_vcpu
+	 */
+	PPC_LL	r4, PACACURRENT(r13)
+	PPC_LL	r4, (THREAD + THREAD_KVM_VCPU)(r4)
+	PPC_STL	r10, VCPU_CR(r4)
+	PPC_STL r11, VCPU_GPR(R4)(r4)
+	PPC_STL	r5, VCPU_GPR(R5)(r4)
+	PPC_STL	r6, VCPU_GPR(R6)(r4)
+	PPC_STL	r8, VCPU_GPR(R8)(r4)
+	PPC_STL	r9, VCPU_GPR(R9)(r4)
+	.if \type == EX_TLB
+	PPC_LL	r5, EX_TLB_R13(r12)
+	PPC_LL	r6, EX_TLB_R10(r12)
+	PPC_LL	r8, EX_TLB_R11(r12)
+	mfspr	r12, \scratch
+	.else
+	mfspr	r5, \scratch
+	PPC_LL	r6, (\paca_ex + \ex_r10)(r13)
+	PPC_LL	r8, (\paca_ex + \ex_r11)(r13)
+	.endif
+	PPC_STL r5, VCPU_GPR(R13)(r4)
+	PPC_STL r3, VCPU_GPR(R3)(r4)
+	PPC_STL r7, VCPU_GPR(R7)(r4)
+	PPC_STL r12, VCPU_GPR(R12)(r4)
+	PPC_STL r6, VCPU_GPR(R10)(r4)
+	PPC_STL r8, VCPU_GPR(R11)(r4)
+	mfctr	r5
+	PPC_STL	r5, VCPU_CTR(r4)
+	mfspr	r5, \srr0
+	mfspr	r6, \srr1
+	kvm_handler_common \intno, \srr0, \flags
+.endm
+
+#define EX_PARAMS(type) \
+	EX_##type, \
+	SPRN_SPRG_##type##_SCRATCH, \
+	PACA_EX##type, \
+	EX_R10, \
+	EX_R11
+
+#define EX_PARAMS_TLB \
+	EX_TLB, \
+	SPRN_SPRG_GEN_SCRATCH, \
+	PACA_EXTLB, \
+	EX_TLB_R10, \
+	EX_TLB_R11
+
+kvm_handler BOOKE_INTERRUPT_CRITICAL, EX_PARAMS(CRIT), \
+	SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_MACHINE_CHECK, EX_PARAMS(MC), \
+	SPRN_MCSRR0, SPRN_MCSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1,(NEED_EMU | NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_INST_STORAGE, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, NEED_ESR
+kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU)
+kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DECREMENTER, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_FIT, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_WATCHDOG, EX_PARAMS(CRIT),\
+	SPRN_CSRR0, SPRN_CSRR1, 0
+/*
+ * Only bolted TLB miss exception handlers are supported for now
+ */
+kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \
+	SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_ALTIVEC_ASSIST, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DOORBELL, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, EX_PARAMS(CRIT), \
+	SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_HV_PRIV, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, NEED_EMU
+kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, EX_PARAMS(GDBELL), \
+	SPRN_GSRR0, SPRN_GSRR1, 0
+kvm_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, EX_PARAMS(CRIT), \
+	SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
+	SPRN_DSRR0, SPRN_DSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
+	SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_LRAT_ERROR, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
+#else
+/*
+ * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
+ */
+.macro kvm_handler intno srr0, srr1, flags
+_GLOBAL(kvmppc_handler_\intno\()_\srr1)
+	PPC_LL	r11, THREAD_KVM_VCPU(r10)
+	PPC_STL r3, VCPU_GPR(R3)(r11)
+	mfspr	r3, SPRN_SPRG_RSCRATCH0
+	PPC_STL	r4, VCPU_GPR(R4)(r11)
+	PPC_LL	r4, THREAD_NORMSAVE(0)(r10)
+	PPC_STL	r5, VCPU_GPR(R5)(r11)
+	PPC_STL	r13, VCPU_CR(r11)
+	mfspr	r5, \srr0
+	PPC_STL	r3, VCPU_GPR(R10)(r11)
+	PPC_LL	r3, THREAD_NORMSAVE(2)(r10)
+	PPC_STL	r6, VCPU_GPR(R6)(r11)
+	PPC_STL	r4, VCPU_GPR(R11)(r11)
+	mfspr	r6, \srr1
+	PPC_STL	r7, VCPU_GPR(R7)(r11)
+	PPC_STL	r8, VCPU_GPR(R8)(r11)
+	PPC_STL	r9, VCPU_GPR(R9)(r11)
+	PPC_STL r3, VCPU_GPR(R13)(r11)
+	mfctr	r7
+	PPC_STL	r12, VCPU_GPR(R12)(r11)
+	PPC_STL	r7, VCPU_CTR(r11)
+	mr	r4, r11
+	kvm_handler_common \intno, \srr0, \flags
+.endm
+
+.macro kvm_lvl_handler intno scratch srr0, srr1, flags
+_GLOBAL(kvmppc_handler_\intno\()_\srr1)
+	mfspr	r10, SPRN_SPRG_THREAD
+	PPC_LL	r11, THREAD_KVM_VCPU(r10)
+	PPC_STL r3, VCPU_GPR(R3)(r11)
+	mfspr	r3, \scratch
+	PPC_STL	r4, VCPU_GPR(R4)(r11)
+	PPC_LL	r4, GPR9(r8)
+	PPC_STL	r5, VCPU_GPR(R5)(r11)
+	PPC_STL	r9, VCPU_CR(r11)
+	mfspr	r5, \srr0
+	PPC_STL	r3, VCPU_GPR(R8)(r11)
+	PPC_LL	r3, GPR10(r8)
+	PPC_STL	r6, VCPU_GPR(R6)(r11)
+	PPC_STL	r4, VCPU_GPR(R9)(r11)
+	mfspr	r6, \srr1
+	PPC_LL	r4, GPR11(r8)
+	PPC_STL	r7, VCPU_GPR(R7)(r11)
+	PPC_STL r3, VCPU_GPR(R10)(r11)
+	mfctr	r7
+	PPC_STL	r12, VCPU_GPR(R12)(r11)
+	PPC_STL r13, VCPU_GPR(R13)(r11)
+	PPC_STL	r4, VCPU_GPR(R11)(r11)
+	PPC_STL	r7, VCPU_CTR(r11)
+	mr	r4, r11
+	kvm_handler_common \intno, \srr0, \flags
+.endm
+
+kvm_lvl_handler BOOKE_INTERRUPT_CRITICAL, \
+	SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_MACHINE_CHECK, \
+	SPRN_SPRG_RSCRATCH_MC, SPRN_MCSRR0, SPRN_MCSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, \
+	SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR
+kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \
+	SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU)
+kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DECREMENTER, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_FIT, SPRN_SRR0, SPRN_SRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \
+	SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \
+	SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \
+	SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_HV_PRIV, SPRN_SRR0, SPRN_SRR1, NEED_EMU
+kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, SPRN_GSRR0, SPRN_GSRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, \
+	SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
+	SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
+	SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0
+#endif
+
+/* Registers:
+ *  SPRG_SCRATCH0: guest r10
+ *  r4: vcpu pointer
+ *  r11: vcpu->arch.shared
+ *  r14: KVM exit number
+ */
+_GLOBAL(kvmppc_resume_host)
+	/* Save remaining volatile guest register state to vcpu. */
+	mfspr	r3, SPRN_VRSAVE
+	PPC_STL	r0, VCPU_GPR(R0)(r4)
+	mflr	r5
+	mfspr	r6, SPRN_SPRG4
+	PPC_STL	r5, VCPU_LR(r4)
+	mfspr	r7, SPRN_SPRG5
+	stw	r3, VCPU_VRSAVE(r4)
+#ifdef CONFIG_64BIT
+	PPC_LL	r3, PACA_SPRG_VDSO(r13)
+#endif
+	mfspr	r5, SPRN_SPRG9
+	PPC_STD(r6, VCPU_SHARED_SPRG4, r11)
+	mfspr	r8, SPRN_SPRG6
+	PPC_STD(r7, VCPU_SHARED_SPRG5, r11)
+	mfspr	r9, SPRN_SPRG7
+#ifdef CONFIG_64BIT
+	mtspr	SPRN_SPRG_VDSO_WRITE, r3
+#endif
+	PPC_STD(r5, VCPU_SPRG9, r4)
+	PPC_STD(r8, VCPU_SHARED_SPRG6, r11)
+	mfxer	r3
+	PPC_STD(r9, VCPU_SHARED_SPRG7, r11)
+
+	/* save guest MAS registers and restore host mas4 & mas6 */
+	mfspr	r5, SPRN_MAS0
+	PPC_STL	r3, VCPU_XER(r4)
+	mfspr	r6, SPRN_MAS1
+	stw	r5, VCPU_SHARED_MAS0(r11)
+	mfspr	r7, SPRN_MAS2
+	stw	r6, VCPU_SHARED_MAS1(r11)
+	PPC_STD(r7, VCPU_SHARED_MAS2, r11)
+	mfspr	r5, SPRN_MAS3
+	mfspr	r6, SPRN_MAS4
+	stw	r5, VCPU_SHARED_MAS7_3+4(r11)
+	mfspr	r7, SPRN_MAS6
+	stw	r6, VCPU_SHARED_MAS4(r11)
+	mfspr	r5, SPRN_MAS7
+	lwz	r6, VCPU_HOST_MAS4(r4)
+	stw	r7, VCPU_SHARED_MAS6(r11)
+	lwz	r8, VCPU_HOST_MAS6(r4)
+	mtspr	SPRN_MAS4, r6
+	stw	r5, VCPU_SHARED_MAS7_3+0(r11)
+	mtspr	SPRN_MAS6, r8
+	/* Enable MAS register updates via exception */
+	mfspr	r3, SPRN_EPCR
+	rlwinm	r3, r3, 0, ~SPRN_EPCR_DMIUH
+	mtspr	SPRN_EPCR, r3
+	isync
+
+	/* Switch to kernel stack and jump to handler. */
+	mr	r3, r4
+	mr	r5, r14 /* intno */
+	mr	r14, r4 /* Save vcpu pointer. */
+	mr	r4, r5
+	bl	kvmppc_handle_exit
+
+	/* Restore vcpu pointer and the nonvolatiles we used. */
+	mr	r4, r14
+	PPC_LL	r14, VCPU_GPR(R14)(r4)
+
+	andi.	r5, r3, RESUME_FLAG_NV
+	beq	skip_nv_load
+	PPC_LL	r15, VCPU_GPR(R15)(r4)
+	PPC_LL	r16, VCPU_GPR(R16)(r4)
+	PPC_LL	r17, VCPU_GPR(R17)(r4)
+	PPC_LL	r18, VCPU_GPR(R18)(r4)
+	PPC_LL	r19, VCPU_GPR(R19)(r4)
+	PPC_LL	r20, VCPU_GPR(R20)(r4)
+	PPC_LL	r21, VCPU_GPR(R21)(r4)
+	PPC_LL	r22, VCPU_GPR(R22)(r4)
+	PPC_LL	r23, VCPU_GPR(R23)(r4)
+	PPC_LL	r24, VCPU_GPR(R24)(r4)
+	PPC_LL	r25, VCPU_GPR(R25)(r4)
+	PPC_LL	r26, VCPU_GPR(R26)(r4)
+	PPC_LL	r27, VCPU_GPR(R27)(r4)
+	PPC_LL	r28, VCPU_GPR(R28)(r4)
+	PPC_LL	r29, VCPU_GPR(R29)(r4)
+	PPC_LL	r30, VCPU_GPR(R30)(r4)
+	PPC_LL	r31, VCPU_GPR(R31)(r4)
+skip_nv_load:
+	/* Should we return to the guest? */
+	andi.	r5, r3, RESUME_FLAG_HOST
+	beq	lightweight_exit
+
+	srawi	r3, r3, 2 /* Shift -ERR back down. */
+
+heavyweight_exit:
+	/* Not returning to guest. */
+	PPC_LL	r5, HOST_STACK_LR(r1)
+	lwz	r6, HOST_CR(r1)
+
+	/*
+	 * We already saved guest volatile register state; now save the
+	 * non-volatiles.
+	 */
+
+	PPC_STL	r15, VCPU_GPR(R15)(r4)
+	PPC_STL	r16, VCPU_GPR(R16)(r4)
+	PPC_STL	r17, VCPU_GPR(R17)(r4)
+	PPC_STL	r18, VCPU_GPR(R18)(r4)
+	PPC_STL	r19, VCPU_GPR(R19)(r4)
+	PPC_STL	r20, VCPU_GPR(R20)(r4)
+	PPC_STL	r21, VCPU_GPR(R21)(r4)
+	PPC_STL	r22, VCPU_GPR(R22)(r4)
+	PPC_STL	r23, VCPU_GPR(R23)(r4)
+	PPC_STL	r24, VCPU_GPR(R24)(r4)
+	PPC_STL	r25, VCPU_GPR(R25)(r4)
+	PPC_STL	r26, VCPU_GPR(R26)(r4)
+	PPC_STL	r27, VCPU_GPR(R27)(r4)
+	PPC_STL	r28, VCPU_GPR(R28)(r4)
+	PPC_STL	r29, VCPU_GPR(R29)(r4)
+	PPC_STL	r30, VCPU_GPR(R30)(r4)
+	PPC_STL	r31, VCPU_GPR(R31)(r4)
+
+	/* Load host non-volatile register state from host stack. */
+	PPC_LL	r14, HOST_NV_GPR(R14)(r1)
+	PPC_LL	r15, HOST_NV_GPR(R15)(r1)
+	PPC_LL	r16, HOST_NV_GPR(R16)(r1)
+	PPC_LL	r17, HOST_NV_GPR(R17)(r1)
+	PPC_LL	r18, HOST_NV_GPR(R18)(r1)
+	PPC_LL	r19, HOST_NV_GPR(R19)(r1)
+	PPC_LL	r20, HOST_NV_GPR(R20)(r1)
+	PPC_LL	r21, HOST_NV_GPR(R21)(r1)
+	PPC_LL	r22, HOST_NV_GPR(R22)(r1)
+	PPC_LL	r23, HOST_NV_GPR(R23)(r1)
+	PPC_LL	r24, HOST_NV_GPR(R24)(r1)
+	PPC_LL	r25, HOST_NV_GPR(R25)(r1)
+	PPC_LL	r26, HOST_NV_GPR(R26)(r1)
+	PPC_LL	r27, HOST_NV_GPR(R27)(r1)
+	PPC_LL	r28, HOST_NV_GPR(R28)(r1)
+	PPC_LL	r29, HOST_NV_GPR(R29)(r1)
+	PPC_LL	r30, HOST_NV_GPR(R30)(r1)
+	PPC_LL	r31, HOST_NV_GPR(R31)(r1)
+
+	/* Return to kvm_vcpu_run(). */
+	mtlr	r5
+	mtcr	r6
+	addi	r1, r1, HOST_STACK_SIZE
+	/* r3 still contains the return code from kvmppc_handle_exit(). */
+	blr
+
+/* Registers:
+ *  r3: vcpu pointer
+ */
+_GLOBAL(__kvmppc_vcpu_run)
+	stwu	r1, -HOST_STACK_SIZE(r1)
+	PPC_STL	r1, VCPU_HOST_STACK(r3)	/* Save stack pointer to vcpu. */
+
+	/* Save host state to stack. */
+	mr	r4, r3
+	mflr	r3
+	mfcr	r5
+	PPC_STL	r3, HOST_STACK_LR(r1)
+
+	stw	r5, HOST_CR(r1)
+
+	/* Save host non-volatile register state to stack. */
+	PPC_STL	r14, HOST_NV_GPR(R14)(r1)
+	PPC_STL	r15, HOST_NV_GPR(R15)(r1)
+	PPC_STL	r16, HOST_NV_GPR(R16)(r1)
+	PPC_STL	r17, HOST_NV_GPR(R17)(r1)
+	PPC_STL	r18, HOST_NV_GPR(R18)(r1)
+	PPC_STL	r19, HOST_NV_GPR(R19)(r1)
+	PPC_STL	r20, HOST_NV_GPR(R20)(r1)
+	PPC_STL	r21, HOST_NV_GPR(R21)(r1)
+	PPC_STL	r22, HOST_NV_GPR(R22)(r1)
+	PPC_STL	r23, HOST_NV_GPR(R23)(r1)
+	PPC_STL	r24, HOST_NV_GPR(R24)(r1)
+	PPC_STL	r25, HOST_NV_GPR(R25)(r1)
+	PPC_STL	r26, HOST_NV_GPR(R26)(r1)
+	PPC_STL	r27, HOST_NV_GPR(R27)(r1)
+	PPC_STL	r28, HOST_NV_GPR(R28)(r1)
+	PPC_STL	r29, HOST_NV_GPR(R29)(r1)
+	PPC_STL	r30, HOST_NV_GPR(R30)(r1)
+	PPC_STL	r31, HOST_NV_GPR(R31)(r1)
+
+	/* Load guest non-volatiles. */
+	PPC_LL	r14, VCPU_GPR(R14)(r4)
+	PPC_LL	r15, VCPU_GPR(R15)(r4)
+	PPC_LL	r16, VCPU_GPR(R16)(r4)
+	PPC_LL	r17, VCPU_GPR(R17)(r4)
+	PPC_LL	r18, VCPU_GPR(R18)(r4)
+	PPC_LL	r19, VCPU_GPR(R19)(r4)
+	PPC_LL	r20, VCPU_GPR(R20)(r4)
+	PPC_LL	r21, VCPU_GPR(R21)(r4)
+	PPC_LL	r22, VCPU_GPR(R22)(r4)
+	PPC_LL	r23, VCPU_GPR(R23)(r4)
+	PPC_LL	r24, VCPU_GPR(R24)(r4)
+	PPC_LL	r25, VCPU_GPR(R25)(r4)
+	PPC_LL	r26, VCPU_GPR(R26)(r4)
+	PPC_LL	r27, VCPU_GPR(R27)(r4)
+	PPC_LL	r28, VCPU_GPR(R28)(r4)
+	PPC_LL	r29, VCPU_GPR(R29)(r4)
+	PPC_LL	r30, VCPU_GPR(R30)(r4)
+	PPC_LL	r31, VCPU_GPR(R31)(r4)
+
+
+lightweight_exit:
+	PPC_STL	r2, HOST_R2(r1)
+
+	mfspr	r3, SPRN_PID
+	stw	r3, VCPU_HOST_PID(r4)
+	lwz	r3, VCPU_GUEST_PID(r4)
+	mtspr	SPRN_PID, r3
+
+	PPC_LL	r11, VCPU_SHARED(r4)
+	/* Disable MAS register updates via exception */
+	mfspr	r3, SPRN_EPCR
+	oris	r3, r3, SPRN_EPCR_DMIUH@h
+	mtspr	SPRN_EPCR, r3
+	isync
+	/* Save host mas4 and mas6 and load guest MAS registers */
+	mfspr	r3, SPRN_MAS4
+	stw	r3, VCPU_HOST_MAS4(r4)
+	mfspr	r3, SPRN_MAS6
+	stw	r3, VCPU_HOST_MAS6(r4)
+	lwz	r3, VCPU_SHARED_MAS0(r11)
+	lwz	r5, VCPU_SHARED_MAS1(r11)
+	PPC_LD(r6, VCPU_SHARED_MAS2, r11)
+	lwz	r7, VCPU_SHARED_MAS7_3+4(r11)
+	lwz	r8, VCPU_SHARED_MAS4(r11)
+	mtspr	SPRN_MAS0, r3
+	mtspr	SPRN_MAS1, r5
+	mtspr	SPRN_MAS2, r6
+	mtspr	SPRN_MAS3, r7
+	mtspr	SPRN_MAS4, r8
+	lwz	r3, VCPU_SHARED_MAS6(r11)
+	lwz	r5, VCPU_SHARED_MAS7_3+0(r11)
+	mtspr	SPRN_MAS6, r3
+	mtspr	SPRN_MAS7, r5
+
+	/*
+	 * Host interrupt handlers may have clobbered these guest-readable
+	 * SPRGs, so we need to reload them here with the guest's values.
+	 */
+	lwz	r3, VCPU_VRSAVE(r4)
+	PPC_LD(r5, VCPU_SHARED_SPRG4, r11)
+	mtspr	SPRN_VRSAVE, r3
+	PPC_LD(r6, VCPU_SHARED_SPRG5, r11)
+	mtspr	SPRN_SPRG4W, r5
+	PPC_LD(r7, VCPU_SHARED_SPRG6, r11)
+	mtspr	SPRN_SPRG5W, r6
+	PPC_LD(r8, VCPU_SHARED_SPRG7, r11)
+	mtspr	SPRN_SPRG6W, r7
+	PPC_LD(r5, VCPU_SPRG9, r4)
+	mtspr	SPRN_SPRG7W, r8
+	mtspr	SPRN_SPRG9, r5
+
+	/* Load some guest volatiles. */
+	PPC_LL	r3, VCPU_LR(r4)
+	PPC_LL	r5, VCPU_XER(r4)
+	PPC_LL	r6, VCPU_CTR(r4)
+	PPC_LL	r7, VCPU_CR(r4)
+	PPC_LL	r8, VCPU_PC(r4)
+	PPC_LD(r9, VCPU_SHARED_MSR, r11)
+	PPC_LL	r0, VCPU_GPR(R0)(r4)
+	PPC_LL	r1, VCPU_GPR(R1)(r4)
+	PPC_LL	r2, VCPU_GPR(R2)(r4)
+	PPC_LL	r10, VCPU_GPR(R10)(r4)
+	PPC_LL	r11, VCPU_GPR(R11)(r4)
+	PPC_LL	r12, VCPU_GPR(R12)(r4)
+	PPC_LL	r13, VCPU_GPR(R13)(r4)
+	mtlr	r3
+	mtxer	r5
+	mtctr	r6
+	mtsrr0	r8
+	mtsrr1	r9
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+	/* save enter time */
+1:
+	mfspr	r6, SPRN_TBRU
+	mfspr	r9, SPRN_TBRL
+	mfspr	r8, SPRN_TBRU
+	cmpw	r8, r6
+	stw	r9, VCPU_TIMING_LAST_ENTER_TBL(r4)
+	bne	1b
+	stw	r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
+#endif
+
+	/*
+	 * Don't execute any instruction which can change CR after
+	 * below instruction.
+	 */
+	mtcr	r7
+
+	/* Finish loading guest volatiles and jump to guest. */
+	PPC_LL	r5, VCPU_GPR(R5)(r4)
+	PPC_LL	r6, VCPU_GPR(R6)(r4)
+	PPC_LL	r7, VCPU_GPR(R7)(r4)
+	PPC_LL	r8, VCPU_GPR(R8)(r4)
+	PPC_LL	r9, VCPU_GPR(R9)(r4)
+
+	PPC_LL	r3, VCPU_GPR(R3)(r4)
+	PPC_LL	r4, VCPU_GPR(R4)(r4)
+	rfi
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
new file mode 100644
index 0000000000..b0f6954287
--- /dev/null
+++ b/arch/powerpc/kvm/e500.c
@@ -0,0 +1,553 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, <yu.liu@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/44x.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/kvm_ppc.h>
+
+#include "../mm/mmu_decl.h"
+#include "booke.h"
+#include "e500.h"
+
+struct id {
+	unsigned long val;
+	struct id **pentry;
+};
+
+#define NUM_TIDS 256
+
+/*
+ * This table provide mappings from:
+ * (guestAS,guestTID,guestPR) --> ID of physical cpu
+ * guestAS	[0..1]
+ * guestTID	[0..255]
+ * guestPR	[0..1]
+ * ID		[1..255]
+ * Each vcpu keeps one vcpu_id_table.
+ */
+struct vcpu_id_table {
+	struct id id[2][NUM_TIDS][2];
+};
+
+/*
+ * This table provide reversed mappings of vcpu_id_table:
+ * ID --> address of vcpu_id_table item.
+ * Each physical core has one pcpu_id_table.
+ */
+struct pcpu_id_table {
+	struct id *entry[NUM_TIDS];
+};
+
+static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids);
+
+/* This variable keeps last used shadow ID on local core.
+ * The valid range of shadow ID is [1..255] */
+static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
+
+/*
+ * Allocate a free shadow id and setup a valid sid mapping in given entry.
+ * A mapping is only valid when vcpu_id_table and pcpu_id_table are match.
+ *
+ * The caller must have preemption disabled, and keep it that way until
+ * it has finished with the returned shadow id (either written into the
+ * TLB or arch.shadow_pid, or discarded).
+ */
+static inline int local_sid_setup_one(struct id *entry)
+{
+	unsigned long sid;
+	int ret = -1;
+
+	sid = __this_cpu_inc_return(pcpu_last_used_sid);
+	if (sid < NUM_TIDS) {
+		__this_cpu_write(pcpu_sids.entry[sid], entry);
+		entry->val = sid;
+		entry->pentry = this_cpu_ptr(&pcpu_sids.entry[sid]);
+		ret = sid;
+	}
+
+	/*
+	 * If sid == NUM_TIDS, we've run out of sids.  We return -1, and
+	 * the caller will invalidate everything and start over.
+	 *
+	 * sid > NUM_TIDS indicates a race, which we disable preemption to
+	 * avoid.
+	 */
+	WARN_ON(sid > NUM_TIDS);
+
+	return ret;
+}
+
+/*
+ * Check if given entry contain a valid shadow id mapping.
+ * An ID mapping is considered valid only if
+ * both vcpu and pcpu know this mapping.
+ *
+ * The caller must have preemption disabled, and keep it that way until
+ * it has finished with the returned shadow id (either written into the
+ * TLB or arch.shadow_pid, or discarded).
+ */
+static inline int local_sid_lookup(struct id *entry)
+{
+	if (entry && entry->val != 0 &&
+	    __this_cpu_read(pcpu_sids.entry[entry->val]) == entry &&
+	    entry->pentry == this_cpu_ptr(&pcpu_sids.entry[entry->val]))
+		return entry->val;
+	return -1;
+}
+
+/* Invalidate all id mappings on local core -- call with preempt disabled */
+static inline void local_sid_destroy_all(void)
+{
+	__this_cpu_write(pcpu_last_used_sid, 0);
+	memset(this_cpu_ptr(&pcpu_sids), 0, sizeof(pcpu_sids));
+}
+
+static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL);
+	return vcpu_e500->idt;
+}
+
+static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	kfree(vcpu_e500->idt);
+	vcpu_e500->idt = NULL;
+}
+
+/* Map guest pid to shadow.
+ * We use PID to keep shadow of current guest non-zero PID,
+ * and use PID1 to keep shadow of guest zero PID.
+ * So that guest tlbe with TID=0 can be accessed at any time */
+static void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	preempt_disable();
+	vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500,
+			get_cur_as(&vcpu_e500->vcpu),
+			get_cur_pid(&vcpu_e500->vcpu),
+			get_cur_pr(&vcpu_e500->vcpu), 1);
+	vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500,
+			get_cur_as(&vcpu_e500->vcpu), 0,
+			get_cur_pr(&vcpu_e500->vcpu), 1);
+	preempt_enable();
+}
+
+/* Invalidate all mappings on vcpu */
+static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table));
+
+	/* Update shadow pid when mappings are changed */
+	kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+}
+
+/* Invalidate one ID mapping on vcpu */
+static inline void kvmppc_e500_id_table_reset_one(
+			       struct kvmppc_vcpu_e500 *vcpu_e500,
+			       int as, int pid, int pr)
+{
+	struct vcpu_id_table *idt = vcpu_e500->idt;
+
+	BUG_ON(as >= 2);
+	BUG_ON(pid >= NUM_TIDS);
+	BUG_ON(pr >= 2);
+
+	idt->id[as][pid][pr].val = 0;
+	idt->id[as][pid][pr].pentry = NULL;
+
+	/* Update shadow pid when mappings are changed */
+	kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+}
+
+/*
+ * Map guest (vcpu,AS,ID,PR) to physical core shadow id.
+ * This function first lookup if a valid mapping exists,
+ * if not, then creates a new one.
+ *
+ * The caller must have preemption disabled, and keep it that way until
+ * it has finished with the returned shadow id (either written into the
+ * TLB or arch.shadow_pid, or discarded).
+ */
+unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
+				 unsigned int as, unsigned int gid,
+				 unsigned int pr, int avoid_recursion)
+{
+	struct vcpu_id_table *idt = vcpu_e500->idt;
+	int sid;
+
+	BUG_ON(as >= 2);
+	BUG_ON(gid >= NUM_TIDS);
+	BUG_ON(pr >= 2);
+
+	sid = local_sid_lookup(&idt->id[as][gid][pr]);
+
+	while (sid <= 0) {
+		/* No mapping yet */
+		sid = local_sid_setup_one(&idt->id[as][gid][pr]);
+		if (sid <= 0) {
+			_tlbil_all();
+			local_sid_destroy_all();
+		}
+
+		/* Update shadow pid when mappings are changed */
+		if (!avoid_recursion)
+			kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+	}
+
+	return sid;
+}
+
+unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu,
+				      struct kvm_book3e_206_tlb_entry *gtlbe)
+{
+	return kvmppc_e500_get_sid(to_e500(vcpu), get_tlb_ts(gtlbe),
+				   get_tlb_tid(gtlbe), get_cur_pr(vcpu), 0);
+}
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	if (vcpu->arch.pid != pid) {
+		vcpu_e500->pid[0] = vcpu->arch.pid = pid;
+		kvmppc_e500_recalc_shadow_pid(vcpu_e500);
+	}
+}
+
+/* gtlbe must not be mapped by more than one host tlbe */
+void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
+                           struct kvm_book3e_206_tlb_entry *gtlbe)
+{
+	struct vcpu_id_table *idt = vcpu_e500->idt;
+	unsigned int pr, tid, ts;
+	int pid;
+	u32 val, eaddr;
+	unsigned long flags;
+
+	ts = get_tlb_ts(gtlbe);
+	tid = get_tlb_tid(gtlbe);
+
+	preempt_disable();
+
+	/* One guest ID may be mapped to two shadow IDs */
+	for (pr = 0; pr < 2; pr++) {
+		/*
+		 * The shadow PID can have a valid mapping on at most one
+		 * host CPU.  In the common case, it will be valid on this
+		 * CPU, in which case we do a local invalidation of the
+		 * specific address.
+		 *
+		 * If the shadow PID is not valid on the current host CPU,
+		 * we invalidate the entire shadow PID.
+		 */
+		pid = local_sid_lookup(&idt->id[ts][tid][pr]);
+		if (pid <= 0) {
+			kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr);
+			continue;
+		}
+
+		/*
+		 * The guest is invalidating a 4K entry which is in a PID
+		 * that has a valid shadow mapping on this host CPU.  We
+		 * search host TLB to invalidate it's shadow TLB entry,
+		 * similar to __tlbil_va except that we need to look in AS1.
+		 */
+		val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS;
+		eaddr = get_tlb_eaddr(gtlbe);
+
+		local_irq_save(flags);
+
+		mtspr(SPRN_MAS6, val);
+		asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr));
+		val = mfspr(SPRN_MAS1);
+		if (val & MAS1_VALID) {
+			mtspr(SPRN_MAS1, val & ~MAS1_VALID);
+			asm volatile("tlbwe");
+		}
+
+		local_irq_restore(flags);
+	}
+
+	preempt_enable();
+}
+
+void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	kvmppc_e500_id_table_reset_all(vcpu_e500);
+}
+
+void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
+{
+	/* Recalc shadow pid since MSR changes */
+	kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
+}
+
+static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu)
+{
+	kvmppc_booke_vcpu_load(vcpu, cpu);
+
+	/* Shadow PID may be expired on local core */
+	kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
+}
+
+static void kvmppc_core_vcpu_put_e500(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_SPE
+	if (vcpu->arch.shadow_msr & MSR_SPE)
+		kvmppc_vcpu_disable_spe(vcpu);
+#endif
+
+	kvmppc_booke_vcpu_put(vcpu);
+}
+
+static int kvmppc_e500_check_processor_compat(void)
+{
+	int r;
+
+	if (strcmp(cur_cpu_spec->cpu_name, "e500v2") == 0)
+		r = 0;
+	else
+		r = -ENOTSUPP;
+
+	return r;
+}
+
+static void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	struct kvm_book3e_206_tlb_entry *tlbe;
+
+	/* Insert large initial mapping for guest. */
+	tlbe = get_entry(vcpu_e500, 1, 0);
+	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
+	tlbe->mas2 = 0;
+	tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;
+
+	/* 4K map for serial output. Used by kernel wrapper. */
+	tlbe = get_entry(vcpu_e500, 1, 1);
+	tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
+	tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
+	tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
+}
+
+int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	kvmppc_e500_tlb_setup(vcpu_e500);
+
+	/* Registers init */
+	vcpu->arch.pvr = mfspr(SPRN_PVR);
+	vcpu_e500->svr = mfspr(SPRN_SVR);
+
+	vcpu->arch.cpu_type = KVM_CPU_E500V2;
+
+	return 0;
+}
+
+static int kvmppc_core_get_sregs_e500(struct kvm_vcpu *vcpu,
+				      struct kvm_sregs *sregs)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_SPE |
+	                       KVM_SREGS_E_PM;
+	sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL;
+
+	sregs->u.e.impl.fsl.features = 0;
+	sregs->u.e.impl.fsl.svr = vcpu_e500->svr;
+	sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
+	sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
+
+	sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
+	sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
+	sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
+	sregs->u.e.ivor_high[3] =
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+
+	kvmppc_get_sregs_ivor(vcpu, sregs);
+	kvmppc_get_sregs_e500_tlb(vcpu, sregs);
+	return 0;
+}
+
+static int kvmppc_core_set_sregs_e500(struct kvm_vcpu *vcpu,
+				      struct kvm_sregs *sregs)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int ret;
+
+	if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
+		vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
+		vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0;
+		vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
+	}
+
+	ret = kvmppc_set_sregs_e500_tlb(vcpu, sregs);
+	if (ret < 0)
+		return ret;
+
+	if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
+		return 0;
+
+	if (sregs->u.e.features & KVM_SREGS_E_SPE) {
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] =
+			sregs->u.e.ivor_high[0];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] =
+			sregs->u.e.ivor_high[1];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] =
+			sregs->u.e.ivor_high[2];
+	}
+
+	if (sregs->u.e.features & KVM_SREGS_E_PM) {
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] =
+			sregs->u.e.ivor_high[3];
+	}
+
+	return kvmppc_set_sregs_ivor(vcpu, sregs);
+}
+
+static int kvmppc_get_one_reg_e500(struct kvm_vcpu *vcpu, u64 id,
+				   union kvmppc_one_reg *val)
+{
+	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+	return r;
+}
+
+static int kvmppc_set_one_reg_e500(struct kvm_vcpu *vcpu, u64 id,
+				   union kvmppc_one_reg *val)
+{
+	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+	return r;
+}
+
+static int kvmppc_core_vcpu_create_e500(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500;
+	int err;
+
+	BUILD_BUG_ON(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0);
+	vcpu_e500 = to_e500(vcpu);
+
+	if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
+		return -ENOMEM;
+
+	err = kvmppc_e500_tlb_init(vcpu_e500);
+	if (err)
+		goto uninit_id;
+
+	vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
+	if (!vcpu->arch.shared) {
+		err = -ENOMEM;
+		goto uninit_tlb;
+	}
+
+	return 0;
+
+uninit_tlb:
+	kvmppc_e500_tlb_uninit(vcpu_e500);
+uninit_id:
+	kvmppc_e500_id_table_free(vcpu_e500);
+	return err;
+}
+
+static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	free_page((unsigned long)vcpu->arch.shared);
+	kvmppc_e500_tlb_uninit(vcpu_e500);
+	kvmppc_e500_id_table_free(vcpu_e500);
+}
+
+static int kvmppc_core_init_vm_e500(struct kvm *kvm)
+{
+	return 0;
+}
+
+static void kvmppc_core_destroy_vm_e500(struct kvm *kvm)
+{
+}
+
+static struct kvmppc_ops kvm_ops_e500 = {
+	.get_sregs = kvmppc_core_get_sregs_e500,
+	.set_sregs = kvmppc_core_set_sregs_e500,
+	.get_one_reg = kvmppc_get_one_reg_e500,
+	.set_one_reg = kvmppc_set_one_reg_e500,
+	.vcpu_load   = kvmppc_core_vcpu_load_e500,
+	.vcpu_put    = kvmppc_core_vcpu_put_e500,
+	.vcpu_create = kvmppc_core_vcpu_create_e500,
+	.vcpu_free   = kvmppc_core_vcpu_free_e500,
+	.init_vm = kvmppc_core_init_vm_e500,
+	.destroy_vm = kvmppc_core_destroy_vm_e500,
+	.emulate_op = kvmppc_core_emulate_op_e500,
+	.emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
+	.emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
+	.create_vcpu_debugfs = kvmppc_create_vcpu_debugfs_e500,
+};
+
+static int __init kvmppc_e500_init(void)
+{
+	int r, i;
+	unsigned long ivor[3];
+	/* Process remaining handlers above the generic first 16 */
+	unsigned long *handler = &kvmppc_booke_handler_addr[16];
+	unsigned long handler_len;
+	unsigned long max_ivor = 0;
+
+	r = kvmppc_e500_check_processor_compat();
+	if (r)
+		goto err_out;
+
+	r = kvmppc_booke_init();
+	if (r)
+		goto err_out;
+
+	/* copy extra E500 exception handlers */
+	ivor[0] = mfspr(SPRN_IVOR32);
+	ivor[1] = mfspr(SPRN_IVOR33);
+	ivor[2] = mfspr(SPRN_IVOR34);
+	for (i = 0; i < 3; i++) {
+		if (ivor[i] > ivor[max_ivor])
+			max_ivor = i;
+
+		handler_len = handler[i + 1] - handler[i];
+		memcpy((void *)kvmppc_booke_handlers + ivor[i],
+		       (void *)handler[i], handler_len);
+	}
+	handler_len = handler[max_ivor + 1] - handler[max_ivor];
+	flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers +
+			   ivor[max_ivor] + handler_len);
+
+	r = kvm_init(sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+	if (r)
+		goto err_out;
+	kvm_ops_e500.owner = THIS_MODULE;
+	kvmppc_pr_ops = &kvm_ops_e500;
+
+err_out:
+	return r;
+}
+
+static void __exit kvmppc_e500_exit(void)
+{
+	kvmppc_pr_ops = NULL;
+	kvmppc_booke_exit();
+}
+
+module_init(kvmppc_e500_init);
+module_exit(kvmppc_e500_exit);
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
new file mode 100644
index 0000000000..6d0d329cbb
--- /dev/null
+++ b/arch/powerpc/kvm/e500.h
@@ -0,0 +1,339 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu <yu.liu@freescale.com>
+ *         Scott Wood <scottwood@freescale.com>
+ *         Ashish Kalra <ashish.kalra@freescale.com>
+ *         Varun Sethi <varun.sethi@freescale.com>
+ *
+ * Description:
+ * This file is based on arch/powerpc/kvm/44x_tlb.h and
+ * arch/powerpc/include/asm/kvm_44x.h by Hollis Blanchard <hollisb@us.ibm.com>,
+ * Copyright IBM Corp. 2007-2008
+ */
+
+#ifndef KVM_E500_H
+#define KVM_E500_H
+
+#include <linux/kvm_host.h>
+#include <asm/nohash/mmu-e500.h>
+#include <asm/tlb.h>
+#include <asm/cputhreads.h>
+
+enum vcpu_ftr {
+	VCPU_FTR_MMU_V2
+};
+
+#define E500_PID_NUM   3
+#define E500_TLB_NUM   2
+
+/* entry is mapped somewhere in host TLB */
+#define E500_TLB_VALID		(1 << 31)
+/* TLB1 entry is mapped by host TLB1, tracked by bitmaps */
+#define E500_TLB_BITMAP		(1 << 30)
+/* TLB1 entry is mapped by host TLB0 */
+#define E500_TLB_TLB0		(1 << 29)
+/* bits [6-5] MAS2_X1 and MAS2_X0 and [4-0] bits for WIMGE */
+#define E500_TLB_MAS2_ATTR	(0x7f)
+
+struct tlbe_ref {
+	kvm_pfn_t pfn;		/* valid only for TLB0, except briefly */
+	unsigned int flags;	/* E500_TLB_* */
+};
+
+struct tlbe_priv {
+	struct tlbe_ref ref;
+};
+
+#ifdef CONFIG_KVM_E500V2
+struct vcpu_id_table;
+#endif
+
+struct kvmppc_e500_tlb_params {
+	int entries, ways, sets;
+};
+
+struct kvmppc_vcpu_e500 {
+	struct kvm_vcpu vcpu;
+
+	/* Unmodified copy of the guest's TLB -- shared with host userspace. */
+	struct kvm_book3e_206_tlb_entry *gtlb_arch;
+
+	/* Starting entry number in gtlb_arch[] */
+	int gtlb_offset[E500_TLB_NUM];
+
+	/* KVM internal information associated with each guest TLB entry */
+	struct tlbe_priv *gtlb_priv[E500_TLB_NUM];
+
+	struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM];
+
+	unsigned int gtlb_nv[E500_TLB_NUM];
+
+	unsigned int host_tlb1_nv;
+
+	u32 svr;
+	u32 l1csr0;
+	u32 l1csr1;
+	u32 hid0;
+	u32 hid1;
+	u64 mcar;
+
+	struct page **shared_tlb_pages;
+	int num_shared_tlb_pages;
+
+	u64 *g2h_tlb1_map;
+	unsigned int *h2g_tlb1_rmap;
+
+	/* Minimum and maximum address mapped my TLB1 */
+	unsigned long tlb1_min_eaddr;
+	unsigned long tlb1_max_eaddr;
+
+#ifdef CONFIG_KVM_E500V2
+	u32 pid[E500_PID_NUM];
+
+	/* vcpu id table */
+	struct vcpu_id_table *idt;
+#endif
+};
+
+static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
+{
+	return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu);
+}
+
+
+/* This geometry is the legacy default -- can be overridden by userspace */
+#define KVM_E500_TLB0_WAY_SIZE		128
+#define KVM_E500_TLB0_WAY_NUM		2
+
+#define KVM_E500_TLB0_SIZE  (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
+#define KVM_E500_TLB1_SIZE  16
+
+#define index_of(tlbsel, esel)	(((tlbsel) << 16) | ((esel) & 0xFFFF))
+#define tlbsel_of(index)	((index) >> 16)
+#define esel_of(index)		((index) & 0xFFFF)
+
+#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
+#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
+#define MAS2_ATTRIB_MASK \
+	  (MAS2_X0 | MAS2_X1 | MAS2_E | MAS2_G)
+#define MAS3_ATTRIB_MASK \
+	  (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
+	   | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
+
+int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500,
+				ulong value);
+int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu);
+int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu);
+int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea);
+int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea);
+int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea);
+int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500);
+void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
+
+void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+
+int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+				union kvmppc_one_reg *val);
+int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+			       union kvmppc_one_reg *val);
+
+#ifdef CONFIG_KVM_E500V2
+unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500,
+				 unsigned int as, unsigned int gid,
+				 unsigned int pr, int avoid_recursion);
+#endif
+
+/* TLB helper functions */
+static inline unsigned int
+get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+	return (tlbe->mas1 >> 7) & 0x1f;
+}
+
+static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+	return tlbe->mas2 & MAS2_EPN;
+}
+
+static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+	unsigned int pgsize = get_tlb_size(tlbe);
+	return 1ULL << 10 << pgsize;
+}
+
+static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+	u64 bytes = get_tlb_bytes(tlbe);
+	return get_tlb_eaddr(tlbe) + bytes - 1;
+}
+
+static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+	return tlbe->mas7_3 & ~0xfffULL;
+}
+
+static inline unsigned int
+get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+	return (tlbe->mas1 >> 16) & 0xff;
+}
+
+static inline unsigned int
+get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+	return (tlbe->mas1 >> 12) & 0x1;
+}
+
+static inline unsigned int
+get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+	return (tlbe->mas1 >> 31) & 0x1;
+}
+
+static inline unsigned int
+get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+	return (tlbe->mas1 >> 30) & 0x1;
+}
+
+static inline unsigned int
+get_tlb_tsize(const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+	return (tlbe->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
+}
+
+static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.pid & 0xff;
+}
+
+static inline unsigned int get_cur_as(struct kvm_vcpu *vcpu)
+{
+	return !!(vcpu->arch.shared->msr & (MSR_IS | MSR_DS));
+}
+
+static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu)
+{
+	return !!(vcpu->arch.shared->msr & MSR_PR);
+}
+
+static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.shared->mas6 >> 16) & 0xff;
+}
+
+static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.shared->mas6 & 0x1;
+}
+
+static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu)
+{
+	/*
+	 * Manual says that tlbsel has 2 bits wide.
+	 * Since we only have two TLBs, only lower bit is used.
+	 */
+	return (vcpu->arch.shared->mas0 >> 28) & 0x1;
+}
+
+static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.shared->mas0 & 0xfff;
+}
+
+static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.shared->mas0 >> 16) & 0xfff;
+}
+
+static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+			const struct kvm_book3e_206_tlb_entry *tlbe)
+{
+	gpa_t gpa;
+
+	if (!get_tlb_v(tlbe))
+		return 0;
+
+#ifndef CONFIG_KVM_BOOKE_HV
+	/* Does it match current guest AS? */
+	/* XXX what about IS != DS? */
+	if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
+		return 0;
+#endif
+
+	gpa = get_tlb_raddr(tlbe);
+	if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+		/* Mapping is not for RAM. */
+		return 0;
+
+	return 1;
+}
+
+static inline struct kvm_book3e_206_tlb_entry *get_entry(
+	struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry)
+{
+	int offset = vcpu_e500->gtlb_offset[tlbsel];
+	return &vcpu_e500->gtlb_arch[offset + entry];
+}
+
+void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
+			   struct kvm_book3e_206_tlb_entry *gtlbe);
+void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500);
+
+#ifdef CONFIG_KVM_BOOKE_HV
+#define kvmppc_e500_get_tlb_stid(vcpu, gtlbe)       get_tlb_tid(gtlbe)
+#define get_tlbmiss_tid(vcpu)           get_cur_pid(vcpu)
+#define get_tlb_sts(gtlbe)              (gtlbe->mas1 & MAS1_TS)
+
+/*
+ * These functions should be called with preemption disabled
+ * and the returned value is valid only in that context
+ */
+static inline int get_thread_specific_lpid(int vm_lpid)
+{
+	int vcpu_lpid = vm_lpid;
+
+	if (threads_per_core == 2)
+		vcpu_lpid |= smp_processor_id() & 1;
+
+	return vcpu_lpid;
+}
+
+static inline int get_lpid(struct kvm_vcpu *vcpu)
+{
+	return get_thread_specific_lpid(vcpu->kvm->arch.lpid);
+}
+#else
+unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu,
+				      struct kvm_book3e_206_tlb_entry *gtlbe);
+
+static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	unsigned int tidseld = (vcpu->arch.shared->mas4 >> 16) & 0xf;
+
+	return vcpu_e500->pid[tidseld];
+}
+
+/* Force TS=1 for all guest mappings. */
+#define get_tlb_sts(gtlbe)              (MAS1_TS)
+#endif /* !BOOKE_HV */
+
+static inline bool has_feature(const struct kvm_vcpu *vcpu,
+			       enum vcpu_ftr ftr)
+{
+	bool has_ftr;
+	switch (ftr) {
+	case VCPU_FTR_MMU_V2:
+		has_ftr = ((vcpu->arch.mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2);
+		break;
+	default:
+		return false;
+	}
+	return has_ftr;
+}
+
+#endif /* KVM_E500_H */
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
new file mode 100644
index 0000000000..051102d50c
--- /dev/null
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -0,0 +1,452 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, <yu.liu@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/44x_emulate.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ */
+
+#include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include <asm/dbell.h>
+#include <asm/reg_booke.h>
+
+#include "booke.h"
+#include "e500.h"
+
+#define XOP_DCBTLS  166
+#define XOP_MSGSND  206
+#define XOP_MSGCLR  238
+#define XOP_MFTMR   366
+#define XOP_TLBIVAX 786
+#define XOP_TLBSX   914
+#define XOP_TLBRE   946
+#define XOP_TLBWE   978
+#define XOP_TLBILX  18
+#define XOP_EHPRIV  270
+
+#ifdef CONFIG_KVM_E500MC
+static int dbell2prio(ulong param)
+{
+	int msg = param & PPC_DBELL_TYPE_MASK;
+	int prio = -1;
+
+	switch (msg) {
+	case PPC_DBELL_TYPE(PPC_DBELL):
+		prio = BOOKE_IRQPRIO_DBELL;
+		break;
+	case PPC_DBELL_TYPE(PPC_DBELL_CRIT):
+		prio = BOOKE_IRQPRIO_DBELL_CRIT;
+		break;
+	default:
+		break;
+	}
+
+	return prio;
+}
+
+static int kvmppc_e500_emul_msgclr(struct kvm_vcpu *vcpu, int rb)
+{
+	ulong param = vcpu->arch.regs.gpr[rb];
+	int prio = dbell2prio(param);
+
+	if (prio < 0)
+		return EMULATE_FAIL;
+
+	clear_bit(prio, &vcpu->arch.pending_exceptions);
+	return EMULATE_DONE;
+}
+
+static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb)
+{
+	ulong param = vcpu->arch.regs.gpr[rb];
+	int prio = dbell2prio(rb);
+	int pir = param & PPC_DBELL_PIR_MASK;
+	unsigned long i;
+	struct kvm_vcpu *cvcpu;
+
+	if (prio < 0)
+		return EMULATE_FAIL;
+
+	kvm_for_each_vcpu(i, cvcpu, vcpu->kvm) {
+		int cpir = cvcpu->arch.shared->pir;
+		if ((param & PPC_DBELL_MSG_BRDCAST) || (cpir == pir)) {
+			set_bit(prio, &cvcpu->arch.pending_exceptions);
+			kvm_vcpu_kick(cvcpu);
+		}
+	}
+
+	return EMULATE_DONE;
+}
+#endif
+
+static int kvmppc_e500_emul_ehpriv(struct kvm_vcpu *vcpu,
+				   unsigned int inst, int *advance)
+{
+	int emulated = EMULATE_DONE;
+
+	switch (get_oc(inst)) {
+	case EHPRIV_OC_DEBUG:
+		vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+		vcpu->run->debug.arch.address = vcpu->arch.regs.nip;
+		vcpu->run->debug.arch.status = 0;
+		kvmppc_account_exit(vcpu, DEBUG_EXITS);
+		emulated = EMULATE_EXIT_USER;
+		*advance = 0;
+		break;
+	default:
+		emulated = EMULATE_FAIL;
+	}
+	return emulated;
+}
+
+static int kvmppc_e500_emul_dcbtls(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	/* Always fail to lock the cache */
+	vcpu_e500->l1csr0 |= L1CSR0_CUL;
+	return EMULATE_DONE;
+}
+
+static int kvmppc_e500_emul_mftmr(struct kvm_vcpu *vcpu, unsigned int inst,
+				  int rt)
+{
+	/* Expose one thread per vcpu */
+	if (get_tmrn(inst) == TMRN_TMCFG0) {
+		kvmppc_set_gpr(vcpu, rt,
+			       1 | (1 << TMRN_TMCFG0_NATHRD_SHIFT));
+		return EMULATE_DONE;
+	}
+
+	return EMULATE_FAIL;
+}
+
+int kvmppc_core_emulate_op_e500(struct kvm_vcpu *vcpu,
+				unsigned int inst, int *advance)
+{
+	int emulated = EMULATE_DONE;
+	int ra = get_ra(inst);
+	int rb = get_rb(inst);
+	int rt = get_rt(inst);
+	gva_t ea;
+
+	switch (get_op(inst)) {
+	case 31:
+		switch (get_xop(inst)) {
+
+		case XOP_DCBTLS:
+			emulated = kvmppc_e500_emul_dcbtls(vcpu);
+			break;
+
+#ifdef CONFIG_KVM_E500MC
+		case XOP_MSGSND:
+			emulated = kvmppc_e500_emul_msgsnd(vcpu, rb);
+			break;
+
+		case XOP_MSGCLR:
+			emulated = kvmppc_e500_emul_msgclr(vcpu, rb);
+			break;
+#endif
+
+		case XOP_TLBRE:
+			emulated = kvmppc_e500_emul_tlbre(vcpu);
+			break;
+
+		case XOP_TLBWE:
+			emulated = kvmppc_e500_emul_tlbwe(vcpu);
+			break;
+
+		case XOP_TLBSX:
+			ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
+			emulated = kvmppc_e500_emul_tlbsx(vcpu, ea);
+			break;
+
+		case XOP_TLBILX: {
+			int type = rt & 0x3;
+			ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
+			emulated = kvmppc_e500_emul_tlbilx(vcpu, type, ea);
+			break;
+		}
+
+		case XOP_TLBIVAX:
+			ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
+			emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
+			break;
+
+		case XOP_MFTMR:
+			emulated = kvmppc_e500_emul_mftmr(vcpu, inst, rt);
+			break;
+
+		case XOP_EHPRIV:
+			emulated = kvmppc_e500_emul_ehpriv(vcpu, inst, advance);
+			break;
+
+		default:
+			emulated = EMULATE_FAIL;
+		}
+
+		break;
+
+	default:
+		emulated = EMULATE_FAIL;
+	}
+
+	if (emulated == EMULATE_FAIL)
+		emulated = kvmppc_booke_emulate_op(vcpu, inst, advance);
+
+	return emulated;
+}
+
+int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int emulated = EMULATE_DONE;
+
+	switch (sprn) {
+#ifndef CONFIG_KVM_BOOKE_HV
+	case SPRN_PID:
+		kvmppc_set_pid(vcpu, spr_val);
+		break;
+	case SPRN_PID1:
+		if (spr_val != 0)
+			return EMULATE_FAIL;
+		vcpu_e500->pid[1] = spr_val;
+		break;
+	case SPRN_PID2:
+		if (spr_val != 0)
+			return EMULATE_FAIL;
+		vcpu_e500->pid[2] = spr_val;
+		break;
+	case SPRN_MAS0:
+		vcpu->arch.shared->mas0 = spr_val;
+		break;
+	case SPRN_MAS1:
+		vcpu->arch.shared->mas1 = spr_val;
+		break;
+	case SPRN_MAS2:
+		vcpu->arch.shared->mas2 = spr_val;
+		break;
+	case SPRN_MAS3:
+		vcpu->arch.shared->mas7_3 &= ~(u64)0xffffffff;
+		vcpu->arch.shared->mas7_3 |= spr_val;
+		break;
+	case SPRN_MAS4:
+		vcpu->arch.shared->mas4 = spr_val;
+		break;
+	case SPRN_MAS6:
+		vcpu->arch.shared->mas6 = spr_val;
+		break;
+	case SPRN_MAS7:
+		vcpu->arch.shared->mas7_3 &= (u64)0xffffffff;
+		vcpu->arch.shared->mas7_3 |= (u64)spr_val << 32;
+		break;
+#endif
+	case SPRN_L1CSR0:
+		vcpu_e500->l1csr0 = spr_val;
+		vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
+		break;
+	case SPRN_L1CSR1:
+		vcpu_e500->l1csr1 = spr_val;
+		vcpu_e500->l1csr1 &= ~(L1CSR1_ICFI | L1CSR1_ICLFR);
+		break;
+	case SPRN_HID0:
+		vcpu_e500->hid0 = spr_val;
+		break;
+	case SPRN_HID1:
+		vcpu_e500->hid1 = spr_val;
+		break;
+
+	case SPRN_MMUCSR0:
+		emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500,
+				spr_val);
+		break;
+
+	case SPRN_PWRMGTCR0:
+		/*
+		 * Guest relies on host power management configurations
+		 * Treat the request as a general store
+		 */
+		vcpu->arch.pwrmgtcr0 = spr_val;
+		break;
+
+	case SPRN_BUCSR:
+		/*
+		 * If we are here, it means that we have already flushed the
+		 * branch predictor, so just return to guest.
+		 */
+		break;
+
+	/* extra exceptions */
+#ifdef CONFIG_SPE_POSSIBLE
+	case SPRN_IVOR32:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val;
+		break;
+	case SPRN_IVOR33:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = spr_val;
+		break;
+	case SPRN_IVOR34:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val;
+		break;
+#endif
+#ifdef CONFIG_ALTIVEC
+	case SPRN_IVOR32:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL] = spr_val;
+		break;
+	case SPRN_IVOR33:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST] = spr_val;
+		break;
+#endif
+	case SPRN_IVOR35:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val;
+		break;
+#ifdef CONFIG_KVM_BOOKE_HV
+	case SPRN_IVOR36:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL] = spr_val;
+		break;
+	case SPRN_IVOR37:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT] = spr_val;
+		break;
+#endif
+	default:
+		emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val);
+	}
+
+	return emulated;
+}
+
+int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int emulated = EMULATE_DONE;
+
+	switch (sprn) {
+#ifndef CONFIG_KVM_BOOKE_HV
+	case SPRN_PID:
+		*spr_val = vcpu_e500->pid[0];
+		break;
+	case SPRN_PID1:
+		*spr_val = vcpu_e500->pid[1];
+		break;
+	case SPRN_PID2:
+		*spr_val = vcpu_e500->pid[2];
+		break;
+	case SPRN_MAS0:
+		*spr_val = vcpu->arch.shared->mas0;
+		break;
+	case SPRN_MAS1:
+		*spr_val = vcpu->arch.shared->mas1;
+		break;
+	case SPRN_MAS2:
+		*spr_val = vcpu->arch.shared->mas2;
+		break;
+	case SPRN_MAS3:
+		*spr_val = (u32)vcpu->arch.shared->mas7_3;
+		break;
+	case SPRN_MAS4:
+		*spr_val = vcpu->arch.shared->mas4;
+		break;
+	case SPRN_MAS6:
+		*spr_val = vcpu->arch.shared->mas6;
+		break;
+	case SPRN_MAS7:
+		*spr_val = vcpu->arch.shared->mas7_3 >> 32;
+		break;
+#endif
+	case SPRN_DECAR:
+		*spr_val = vcpu->arch.decar;
+		break;
+	case SPRN_TLB0CFG:
+		*spr_val = vcpu->arch.tlbcfg[0];
+		break;
+	case SPRN_TLB1CFG:
+		*spr_val = vcpu->arch.tlbcfg[1];
+		break;
+	case SPRN_TLB0PS:
+		if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
+			return EMULATE_FAIL;
+		*spr_val = vcpu->arch.tlbps[0];
+		break;
+	case SPRN_TLB1PS:
+		if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
+			return EMULATE_FAIL;
+		*spr_val = vcpu->arch.tlbps[1];
+		break;
+	case SPRN_L1CSR0:
+		*spr_val = vcpu_e500->l1csr0;
+		break;
+	case SPRN_L1CSR1:
+		*spr_val = vcpu_e500->l1csr1;
+		break;
+	case SPRN_HID0:
+		*spr_val = vcpu_e500->hid0;
+		break;
+	case SPRN_HID1:
+		*spr_val = vcpu_e500->hid1;
+		break;
+	case SPRN_SVR:
+		*spr_val = vcpu_e500->svr;
+		break;
+
+	case SPRN_MMUCSR0:
+		*spr_val = 0;
+		break;
+
+	case SPRN_MMUCFG:
+		*spr_val = vcpu->arch.mmucfg;
+		break;
+	case SPRN_EPTCFG:
+		if (!has_feature(vcpu, VCPU_FTR_MMU_V2))
+			return EMULATE_FAIL;
+		/*
+		 * Legacy Linux guests access EPTCFG register even if the E.PT
+		 * category is disabled in the VM. Give them a chance to live.
+		 */
+		*spr_val = vcpu->arch.eptcfg;
+		break;
+
+	case SPRN_PWRMGTCR0:
+		*spr_val = vcpu->arch.pwrmgtcr0;
+		break;
+
+	/* extra exceptions */
+#ifdef CONFIG_SPE_POSSIBLE
+	case SPRN_IVOR32:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
+		break;
+	case SPRN_IVOR33:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
+		break;
+	case SPRN_IVOR34:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
+		break;
+#endif
+#ifdef CONFIG_ALTIVEC
+	case SPRN_IVOR32:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL];
+		break;
+	case SPRN_IVOR33:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST];
+		break;
+#endif
+	case SPRN_IVOR35:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+		break;
+#ifdef CONFIG_KVM_BOOKE_HV
+	case SPRN_IVOR36:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL];
+		break;
+	case SPRN_IVOR37:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT];
+		break;
+#endif
+	default:
+		emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val);
+	}
+
+	return emulated;
+}
+
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
new file mode 100644
index 0000000000..e131fbecdc
--- /dev/null
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -0,0 +1,956 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, yu.liu@freescale.com
+ *         Scott Wood, scottwood@freescale.com
+ *         Ashish Kalra, ashish.kalra@freescale.com
+ *         Varun Sethi, varun.sethi@freescale.com
+ *         Alexander Graf, agraf@suse.de
+ *
+ * Description:
+ * This file is based on arch/powerpc/kvm/44x_tlb.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <linux/log2.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <linux/rwsem.h>
+#include <linux/vmalloc.h>
+#include <linux/hugetlb.h>
+#include <asm/kvm_ppc.h>
+
+#include "e500.h"
+#include "trace_booke.h"
+#include "timing.h"
+#include "e500_mmu_host.h"
+
+static inline unsigned int gtlb0_get_next_victim(
+		struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	unsigned int victim;
+
+	victim = vcpu_e500->gtlb_nv[0]++;
+	if (unlikely(vcpu_e500->gtlb_nv[0] >= vcpu_e500->gtlb_params[0].ways))
+		vcpu_e500->gtlb_nv[0] = 0;
+
+	return victim;
+}
+
+static int tlb0_set_base(gva_t addr, int sets, int ways)
+{
+	int set_base;
+
+	set_base = (addr >> PAGE_SHIFT) & (sets - 1);
+	set_base *= ways;
+
+	return set_base;
+}
+
+static int gtlb0_set_base(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t addr)
+{
+	return tlb0_set_base(addr, vcpu_e500->gtlb_params[0].sets,
+			     vcpu_e500->gtlb_params[0].ways);
+}
+
+static unsigned int get_tlb_esel(struct kvm_vcpu *vcpu, int tlbsel)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int esel = get_tlb_esel_bit(vcpu);
+
+	if (tlbsel == 0) {
+		esel &= vcpu_e500->gtlb_params[0].ways - 1;
+		esel += gtlb0_set_base(vcpu_e500, vcpu->arch.shared->mas2);
+	} else {
+		esel &= vcpu_e500->gtlb_params[tlbsel].entries - 1;
+	}
+
+	return esel;
+}
+
+/* Search the guest TLB for a matching entry. */
+static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
+		gva_t eaddr, int tlbsel, unsigned int pid, int as)
+{
+	int size = vcpu_e500->gtlb_params[tlbsel].entries;
+	unsigned int set_base, offset;
+	int i;
+
+	if (tlbsel == 0) {
+		set_base = gtlb0_set_base(vcpu_e500, eaddr);
+		size = vcpu_e500->gtlb_params[0].ways;
+	} else {
+		if (eaddr < vcpu_e500->tlb1_min_eaddr ||
+				eaddr > vcpu_e500->tlb1_max_eaddr)
+			return -1;
+		set_base = 0;
+	}
+
+	offset = vcpu_e500->gtlb_offset[tlbsel];
+
+	for (i = 0; i < size; i++) {
+		struct kvm_book3e_206_tlb_entry *tlbe =
+			&vcpu_e500->gtlb_arch[offset + set_base + i];
+		unsigned int tid;
+
+		if (eaddr < get_tlb_eaddr(tlbe))
+			continue;
+
+		if (eaddr > get_tlb_end(tlbe))
+			continue;
+
+		tid = get_tlb_tid(tlbe);
+		if (tid && (tid != pid))
+			continue;
+
+		if (!get_tlb_v(tlbe))
+			continue;
+
+		if (get_tlb_ts(tlbe) != as && as != -1)
+			continue;
+
+		return set_base + i;
+	}
+
+	return -1;
+}
+
+static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
+		gva_t eaddr, int as)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	unsigned int victim, tsized;
+	int tlbsel;
+
+	/* since we only have two TLBs, only lower bit is used. */
+	tlbsel = (vcpu->arch.shared->mas4 >> 28) & 0x1;
+	victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0;
+	tsized = (vcpu->arch.shared->mas4 >> 7) & 0x1f;
+
+	vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
+		| MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
+	vcpu->arch.shared->mas1 = MAS1_VALID | (as ? MAS1_TS : 0)
+		| MAS1_TID(get_tlbmiss_tid(vcpu))
+		| MAS1_TSIZE(tsized);
+	vcpu->arch.shared->mas2 = (eaddr & MAS2_EPN)
+		| (vcpu->arch.shared->mas4 & MAS2_ATTRIB_MASK);
+	vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
+	vcpu->arch.shared->mas6 = (vcpu->arch.shared->mas6 & MAS6_SPID1)
+		| (get_cur_pid(vcpu) << 16)
+		| (as ? MAS6_SAS : 0);
+}
+
+static void kvmppc_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	int size = vcpu_e500->gtlb_params[1].entries;
+	unsigned int offset;
+	gva_t eaddr;
+	int i;
+
+	vcpu_e500->tlb1_min_eaddr = ~0UL;
+	vcpu_e500->tlb1_max_eaddr = 0;
+	offset = vcpu_e500->gtlb_offset[1];
+
+	for (i = 0; i < size; i++) {
+		struct kvm_book3e_206_tlb_entry *tlbe =
+			&vcpu_e500->gtlb_arch[offset + i];
+
+		if (!get_tlb_v(tlbe))
+			continue;
+
+		eaddr = get_tlb_eaddr(tlbe);
+		vcpu_e500->tlb1_min_eaddr =
+				min(vcpu_e500->tlb1_min_eaddr, eaddr);
+
+		eaddr = get_tlb_end(tlbe);
+		vcpu_e500->tlb1_max_eaddr =
+				max(vcpu_e500->tlb1_max_eaddr, eaddr);
+	}
+}
+
+static int kvmppc_need_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500,
+				struct kvm_book3e_206_tlb_entry *gtlbe)
+{
+	unsigned long start, end, size;
+
+	size = get_tlb_bytes(gtlbe);
+	start = get_tlb_eaddr(gtlbe) & ~(size - 1);
+	end = start + size - 1;
+
+	return vcpu_e500->tlb1_min_eaddr == start ||
+			vcpu_e500->tlb1_max_eaddr == end;
+}
+
+/* This function is supposed to be called for a adding a new valid tlb entry */
+static void kvmppc_set_tlb1map_range(struct kvm_vcpu *vcpu,
+				struct kvm_book3e_206_tlb_entry *gtlbe)
+{
+	unsigned long start, end, size;
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	if (!get_tlb_v(gtlbe))
+		return;
+
+	size = get_tlb_bytes(gtlbe);
+	start = get_tlb_eaddr(gtlbe) & ~(size - 1);
+	end = start + size - 1;
+
+	vcpu_e500->tlb1_min_eaddr = min(vcpu_e500->tlb1_min_eaddr, start);
+	vcpu_e500->tlb1_max_eaddr = max(vcpu_e500->tlb1_max_eaddr, end);
+}
+
+static inline int kvmppc_e500_gtlbe_invalidate(
+				struct kvmppc_vcpu_e500 *vcpu_e500,
+				int tlbsel, int esel)
+{
+	struct kvm_book3e_206_tlb_entry *gtlbe =
+		get_entry(vcpu_e500, tlbsel, esel);
+
+	if (unlikely(get_tlb_iprot(gtlbe)))
+		return -1;
+
+	if (tlbsel == 1 && kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe))
+		kvmppc_recalc_tlb1map_range(vcpu_e500);
+
+	gtlbe->mas1 = 0;
+
+	return 0;
+}
+
+int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
+{
+	int esel;
+
+	if (value & MMUCSR0_TLB0FI)
+		for (esel = 0; esel < vcpu_e500->gtlb_params[0].entries; esel++)
+			kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel);
+	if (value & MMUCSR0_TLB1FI)
+		for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++)
+			kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
+
+	/* Invalidate all host shadow mappings */
+	kvmppc_core_flush_tlb(&vcpu_e500->vcpu);
+
+	return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	unsigned int ia;
+	int esel, tlbsel;
+
+	ia = (ea >> 2) & 0x1;
+
+	/* since we only have two TLBs, only lower bit is used. */
+	tlbsel = (ea >> 3) & 0x1;
+
+	if (ia) {
+		/* invalidate all entries */
+		for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries;
+		     esel++)
+			kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+	} else {
+		ea &= 0xfffff000;
+		esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel,
+				get_cur_pid(vcpu), -1);
+		if (esel >= 0)
+			kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+	}
+
+	/* Invalidate all host shadow mappings */
+	kvmppc_core_flush_tlb(&vcpu_e500->vcpu);
+
+	return EMULATE_DONE;
+}
+
+static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
+		       int pid, int type)
+{
+	struct kvm_book3e_206_tlb_entry *tlbe;
+	int tid, esel;
+
+	/* invalidate all entries */
+	for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) {
+		tlbe = get_entry(vcpu_e500, tlbsel, esel);
+		tid = get_tlb_tid(tlbe);
+		if (type == 0 || tid == pid) {
+			inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
+			kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+		}
+	}
+}
+
+static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid,
+		       gva_t ea)
+{
+	int tlbsel, esel;
+
+	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+		esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1);
+		if (esel >= 0) {
+			inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
+			kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+			break;
+		}
+	}
+}
+
+int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int pid = get_cur_spid(vcpu);
+
+	if (type == 0 || type == 1) {
+		tlbilx_all(vcpu_e500, 0, pid, type);
+		tlbilx_all(vcpu_e500, 1, pid, type);
+	} else if (type == 3) {
+		tlbilx_one(vcpu_e500, pid, ea);
+	}
+
+	return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int tlbsel, esel;
+	struct kvm_book3e_206_tlb_entry *gtlbe;
+
+	tlbsel = get_tlb_tlbsel(vcpu);
+	esel = get_tlb_esel(vcpu, tlbsel);
+
+	gtlbe = get_entry(vcpu_e500, tlbsel, esel);
+	vcpu->arch.shared->mas0 &= ~MAS0_NV(~0);
+	vcpu->arch.shared->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
+	vcpu->arch.shared->mas1 = gtlbe->mas1;
+	vcpu->arch.shared->mas2 = gtlbe->mas2;
+	vcpu->arch.shared->mas7_3 = gtlbe->mas7_3;
+
+	return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int as = !!get_cur_sas(vcpu);
+	unsigned int pid = get_cur_spid(vcpu);
+	int esel, tlbsel;
+	struct kvm_book3e_206_tlb_entry *gtlbe = NULL;
+
+	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+		esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
+		if (esel >= 0) {
+			gtlbe = get_entry(vcpu_e500, tlbsel, esel);
+			break;
+		}
+	}
+
+	if (gtlbe) {
+		esel &= vcpu_e500->gtlb_params[tlbsel].ways - 1;
+
+		vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel)
+			| MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
+		vcpu->arch.shared->mas1 = gtlbe->mas1;
+		vcpu->arch.shared->mas2 = gtlbe->mas2;
+		vcpu->arch.shared->mas7_3 = gtlbe->mas7_3;
+	} else {
+		int victim;
+
+		/* since we only have two TLBs, only lower bit is used. */
+		tlbsel = vcpu->arch.shared->mas4 >> 28 & 0x1;
+		victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0;
+
+		vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel)
+			| MAS0_ESEL(victim)
+			| MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
+		vcpu->arch.shared->mas1 =
+			  (vcpu->arch.shared->mas6 & MAS6_SPID0)
+			| ((vcpu->arch.shared->mas6 & MAS6_SAS) ? MAS1_TS : 0)
+			| (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0));
+		vcpu->arch.shared->mas2 &= MAS2_EPN;
+		vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 &
+					   MAS2_ATTRIB_MASK;
+		vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 |
+					     MAS3_U2 | MAS3_U3;
+	}
+
+	kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
+	return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	struct kvm_book3e_206_tlb_entry *gtlbe;
+	int tlbsel, esel;
+	int recal = 0;
+	int idx;
+
+	tlbsel = get_tlb_tlbsel(vcpu);
+	esel = get_tlb_esel(vcpu, tlbsel);
+
+	gtlbe = get_entry(vcpu_e500, tlbsel, esel);
+
+	if (get_tlb_v(gtlbe)) {
+		inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
+		if ((tlbsel == 1) &&
+			kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe))
+			recal = 1;
+	}
+
+	gtlbe->mas1 = vcpu->arch.shared->mas1;
+	gtlbe->mas2 = vcpu->arch.shared->mas2;
+	if (!(vcpu->arch.shared->msr & MSR_CM))
+		gtlbe->mas2 &= 0xffffffffUL;
+	gtlbe->mas7_3 = vcpu->arch.shared->mas7_3;
+
+	trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1,
+	                              gtlbe->mas2, gtlbe->mas7_3);
+
+	if (tlbsel == 1) {
+		/*
+		 * If a valid tlb1 entry is overwritten then recalculate the
+		 * min/max TLB1 map address range otherwise no need to look
+		 * in tlb1 array.
+		 */
+		if (recal)
+			kvmppc_recalc_tlb1map_range(vcpu_e500);
+		else
+			kvmppc_set_tlb1map_range(vcpu, gtlbe);
+	}
+
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
+	if (tlbe_is_host_safe(vcpu, gtlbe)) {
+		u64 eaddr = get_tlb_eaddr(gtlbe);
+		u64 raddr = get_tlb_raddr(gtlbe);
+
+		if (tlbsel == 0) {
+			gtlbe->mas1 &= ~MAS1_TSIZE(~0);
+			gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
+		}
+
+		/* Premap the faulting page */
+		kvmppc_mmu_map(vcpu, eaddr, raddr, index_of(tlbsel, esel));
+	}
+
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+	kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
+	return EMULATE_DONE;
+}
+
+static int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
+				  gva_t eaddr, unsigned int pid, int as)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int esel, tlbsel;
+
+	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+		esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as);
+		if (esel >= 0)
+			return index_of(tlbsel, esel);
+	}
+
+	return -1;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+                               struct kvm_translation *tr)
+{
+	int index;
+	gva_t eaddr;
+	u8 pid;
+	u8 as;
+
+	eaddr = tr->linear_address;
+	pid = (tr->linear_address >> 32) & 0xff;
+	as = (tr->linear_address >> 40) & 0x1;
+
+	index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as);
+	if (index < 0) {
+		tr->valid = 0;
+		return 0;
+	}
+
+	tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
+	/* XXX what does "writeable" and "usermode" even mean? */
+	tr->valid = 1;
+
+	return 0;
+}
+
+
+int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+	unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
+
+	return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
+}
+
+int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+	unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
+
+	return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
+}
+
+void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
+{
+	unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
+
+	kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.regs.nip, as);
+}
+
+void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
+{
+	unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
+
+	kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as);
+}
+
+gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
+			gva_t eaddr)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	struct kvm_book3e_206_tlb_entry *gtlbe;
+	u64 pgmask;
+
+	gtlbe = get_entry(vcpu_e500, tlbsel_of(index), esel_of(index));
+	pgmask = get_tlb_bytes(gtlbe) - 1;
+
+	return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
+}
+
+/*****************************************/
+
+static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	int i;
+
+	kvmppc_core_flush_tlb(&vcpu_e500->vcpu);
+	kfree(vcpu_e500->g2h_tlb1_map);
+	kfree(vcpu_e500->gtlb_priv[0]);
+	kfree(vcpu_e500->gtlb_priv[1]);
+
+	if (vcpu_e500->shared_tlb_pages) {
+		vfree((void *)(round_down((uintptr_t)vcpu_e500->gtlb_arch,
+					  PAGE_SIZE)));
+
+		for (i = 0; i < vcpu_e500->num_shared_tlb_pages; i++) {
+			set_page_dirty_lock(vcpu_e500->shared_tlb_pages[i]);
+			put_page(vcpu_e500->shared_tlb_pages[i]);
+		}
+
+		vcpu_e500->num_shared_tlb_pages = 0;
+
+		kfree(vcpu_e500->shared_tlb_pages);
+		vcpu_e500->shared_tlb_pages = NULL;
+	} else {
+		kfree(vcpu_e500->gtlb_arch);
+	}
+
+	vcpu_e500->gtlb_arch = NULL;
+}
+
+void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	sregs->u.e.mas0 = vcpu->arch.shared->mas0;
+	sregs->u.e.mas1 = vcpu->arch.shared->mas1;
+	sregs->u.e.mas2 = vcpu->arch.shared->mas2;
+	sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3;
+	sregs->u.e.mas4 = vcpu->arch.shared->mas4;
+	sregs->u.e.mas6 = vcpu->arch.shared->mas6;
+
+	sregs->u.e.mmucfg = vcpu->arch.mmucfg;
+	sregs->u.e.tlbcfg[0] = vcpu->arch.tlbcfg[0];
+	sregs->u.e.tlbcfg[1] = vcpu->arch.tlbcfg[1];
+	sregs->u.e.tlbcfg[2] = 0;
+	sregs->u.e.tlbcfg[3] = 0;
+}
+
+int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
+		vcpu->arch.shared->mas0 = sregs->u.e.mas0;
+		vcpu->arch.shared->mas1 = sregs->u.e.mas1;
+		vcpu->arch.shared->mas2 = sregs->u.e.mas2;
+		vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3;
+		vcpu->arch.shared->mas4 = sregs->u.e.mas4;
+		vcpu->arch.shared->mas6 = sregs->u.e.mas6;
+	}
+
+	return 0;
+}
+
+int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+				union kvmppc_one_reg *val)
+{
+	int r = 0;
+	long int i;
+
+	switch (id) {
+	case KVM_REG_PPC_MAS0:
+		*val = get_reg_val(id, vcpu->arch.shared->mas0);
+		break;
+	case KVM_REG_PPC_MAS1:
+		*val = get_reg_val(id, vcpu->arch.shared->mas1);
+		break;
+	case KVM_REG_PPC_MAS2:
+		*val = get_reg_val(id, vcpu->arch.shared->mas2);
+		break;
+	case KVM_REG_PPC_MAS7_3:
+		*val = get_reg_val(id, vcpu->arch.shared->mas7_3);
+		break;
+	case KVM_REG_PPC_MAS4:
+		*val = get_reg_val(id, vcpu->arch.shared->mas4);
+		break;
+	case KVM_REG_PPC_MAS6:
+		*val = get_reg_val(id, vcpu->arch.shared->mas6);
+		break;
+	case KVM_REG_PPC_MMUCFG:
+		*val = get_reg_val(id, vcpu->arch.mmucfg);
+		break;
+	case KVM_REG_PPC_EPTCFG:
+		*val = get_reg_val(id, vcpu->arch.eptcfg);
+		break;
+	case KVM_REG_PPC_TLB0CFG:
+	case KVM_REG_PPC_TLB1CFG:
+	case KVM_REG_PPC_TLB2CFG:
+	case KVM_REG_PPC_TLB3CFG:
+		i = id - KVM_REG_PPC_TLB0CFG;
+		*val = get_reg_val(id, vcpu->arch.tlbcfg[i]);
+		break;
+	case KVM_REG_PPC_TLB0PS:
+	case KVM_REG_PPC_TLB1PS:
+	case KVM_REG_PPC_TLB2PS:
+	case KVM_REG_PPC_TLB3PS:
+		i = id - KVM_REG_PPC_TLB0PS;
+		*val = get_reg_val(id, vcpu->arch.tlbps[i]);
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
+
+int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id,
+			       union kvmppc_one_reg *val)
+{
+	int r = 0;
+	long int i;
+
+	switch (id) {
+	case KVM_REG_PPC_MAS0:
+		vcpu->arch.shared->mas0 = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_MAS1:
+		vcpu->arch.shared->mas1 = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_MAS2:
+		vcpu->arch.shared->mas2 = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_MAS7_3:
+		vcpu->arch.shared->mas7_3 = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_MAS4:
+		vcpu->arch.shared->mas4 = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_MAS6:
+		vcpu->arch.shared->mas6 = set_reg_val(id, *val);
+		break;
+	/* Only allow MMU registers to be set to the config supported by KVM */
+	case KVM_REG_PPC_MMUCFG: {
+		u32 reg = set_reg_val(id, *val);
+		if (reg != vcpu->arch.mmucfg)
+			r = -EINVAL;
+		break;
+	}
+	case KVM_REG_PPC_EPTCFG: {
+		u32 reg = set_reg_val(id, *val);
+		if (reg != vcpu->arch.eptcfg)
+			r = -EINVAL;
+		break;
+	}
+	case KVM_REG_PPC_TLB0CFG:
+	case KVM_REG_PPC_TLB1CFG:
+	case KVM_REG_PPC_TLB2CFG:
+	case KVM_REG_PPC_TLB3CFG: {
+		/* MMU geometry (N_ENTRY/ASSOC) can be set only using SW_TLB */
+		u32 reg = set_reg_val(id, *val);
+		i = id - KVM_REG_PPC_TLB0CFG;
+		if (reg != vcpu->arch.tlbcfg[i])
+			r = -EINVAL;
+		break;
+	}
+	case KVM_REG_PPC_TLB0PS:
+	case KVM_REG_PPC_TLB1PS:
+	case KVM_REG_PPC_TLB2PS:
+	case KVM_REG_PPC_TLB3PS: {
+		u32 reg = set_reg_val(id, *val);
+		i = id - KVM_REG_PPC_TLB0PS;
+		if (reg != vcpu->arch.tlbps[i])
+			r = -EINVAL;
+		break;
+	}
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
+
+static int vcpu_mmu_geometry_update(struct kvm_vcpu *vcpu,
+		struct kvm_book3e_206_tlb_params *params)
+{
+	vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+	if (params->tlb_sizes[0] <= 2048)
+		vcpu->arch.tlbcfg[0] |= params->tlb_sizes[0];
+	vcpu->arch.tlbcfg[0] |= params->tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
+
+	vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+	vcpu->arch.tlbcfg[1] |= params->tlb_sizes[1];
+	vcpu->arch.tlbcfg[1] |= params->tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
+	return 0;
+}
+
+int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
+			      struct kvm_config_tlb *cfg)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	struct kvm_book3e_206_tlb_params params;
+	char *virt;
+	struct page **pages;
+	struct tlbe_priv *privs[2] = {};
+	u64 *g2h_bitmap;
+	size_t array_len;
+	u32 sets;
+	int num_pages, ret, i;
+
+	if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV)
+		return -EINVAL;
+
+	if (copy_from_user(&params, (void __user *)(uintptr_t)cfg->params,
+			   sizeof(params)))
+		return -EFAULT;
+
+	if (params.tlb_sizes[1] > 64)
+		return -EINVAL;
+	if (params.tlb_ways[1] != params.tlb_sizes[1])
+		return -EINVAL;
+	if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0)
+		return -EINVAL;
+	if (params.tlb_ways[2] != 0 || params.tlb_ways[3] != 0)
+		return -EINVAL;
+
+	if (!is_power_of_2(params.tlb_ways[0]))
+		return -EINVAL;
+
+	sets = params.tlb_sizes[0] >> ilog2(params.tlb_ways[0]);
+	if (!is_power_of_2(sets))
+		return -EINVAL;
+
+	array_len = params.tlb_sizes[0] + params.tlb_sizes[1];
+	array_len *= sizeof(struct kvm_book3e_206_tlb_entry);
+
+	if (cfg->array_len < array_len)
+		return -EINVAL;
+
+	num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) -
+		    cfg->array / PAGE_SIZE;
+	pages = kmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	ret = get_user_pages_fast(cfg->array, num_pages, FOLL_WRITE, pages);
+	if (ret < 0)
+		goto free_pages;
+
+	if (ret != num_pages) {
+		num_pages = ret;
+		ret = -EFAULT;
+		goto put_pages;
+	}
+
+	virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
+	if (!virt) {
+		ret = -ENOMEM;
+		goto put_pages;
+	}
+
+	privs[0] = kcalloc(params.tlb_sizes[0], sizeof(*privs[0]), GFP_KERNEL);
+	if (!privs[0]) {
+		ret = -ENOMEM;
+		goto put_pages;
+	}
+
+	privs[1] = kcalloc(params.tlb_sizes[1], sizeof(*privs[1]), GFP_KERNEL);
+	if (!privs[1]) {
+		ret = -ENOMEM;
+		goto free_privs_first;
+	}
+
+	g2h_bitmap = kcalloc(params.tlb_sizes[1],
+			     sizeof(*g2h_bitmap),
+			     GFP_KERNEL);
+	if (!g2h_bitmap) {
+		ret = -ENOMEM;
+		goto free_privs_second;
+	}
+
+	free_gtlb(vcpu_e500);
+
+	vcpu_e500->gtlb_priv[0] = privs[0];
+	vcpu_e500->gtlb_priv[1] = privs[1];
+	vcpu_e500->g2h_tlb1_map = g2h_bitmap;
+
+	vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *)
+		(virt + (cfg->array & (PAGE_SIZE - 1)));
+
+	vcpu_e500->gtlb_params[0].entries = params.tlb_sizes[0];
+	vcpu_e500->gtlb_params[1].entries = params.tlb_sizes[1];
+
+	vcpu_e500->gtlb_offset[0] = 0;
+	vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
+
+	/* Update vcpu's MMU geometry based on SW_TLB input */
+	vcpu_mmu_geometry_update(vcpu, &params);
+
+	vcpu_e500->shared_tlb_pages = pages;
+	vcpu_e500->num_shared_tlb_pages = num_pages;
+
+	vcpu_e500->gtlb_params[0].ways = params.tlb_ways[0];
+	vcpu_e500->gtlb_params[0].sets = sets;
+
+	vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1];
+	vcpu_e500->gtlb_params[1].sets = 1;
+
+	kvmppc_recalc_tlb1map_range(vcpu_e500);
+	return 0;
+ free_privs_second:
+	kfree(privs[1]);
+ free_privs_first:
+	kfree(privs[0]);
+ put_pages:
+	for (i = 0; i < num_pages; i++)
+		put_page(pages[i]);
+ free_pages:
+	kfree(pages);
+	return ret;
+}
+
+int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
+			     struct kvm_dirty_tlb *dirty)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	kvmppc_recalc_tlb1map_range(vcpu_e500);
+	kvmppc_core_flush_tlb(vcpu);
+	return 0;
+}
+
+/* Vcpu's MMU default configuration */
+static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
+		       struct kvmppc_e500_tlb_params *params)
+{
+	/* Initialize RASIZE, PIDSIZE, NTLBS and MAVN fields with host values*/
+	vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE;
+
+	/* Initialize TLBnCFG fields with host values and SW_TLB geometry*/
+	vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) &
+			     ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+	vcpu->arch.tlbcfg[0] |= params[0].entries;
+	vcpu->arch.tlbcfg[0] |= params[0].ways << TLBnCFG_ASSOC_SHIFT;
+
+	vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) &
+			     ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+	vcpu->arch.tlbcfg[1] |= params[1].entries;
+	vcpu->arch.tlbcfg[1] |= params[1].ways << TLBnCFG_ASSOC_SHIFT;
+
+	if (has_feature(vcpu, VCPU_FTR_MMU_V2)) {
+		vcpu->arch.tlbps[0] = mfspr(SPRN_TLB0PS);
+		vcpu->arch.tlbps[1] = mfspr(SPRN_TLB1PS);
+
+		vcpu->arch.mmucfg &= ~MMUCFG_LRAT;
+
+		/* Guest mmu emulation currently doesn't handle E.PT */
+		vcpu->arch.eptcfg = 0;
+		vcpu->arch.tlbcfg[0] &= ~TLBnCFG_PT;
+		vcpu->arch.tlbcfg[1] &= ~TLBnCFG_IND;
+	}
+
+	return 0;
+}
+
+int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
+
+	if (e500_mmu_host_init(vcpu_e500))
+		goto free_vcpu;
+
+	vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE;
+	vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE;
+
+	vcpu_e500->gtlb_params[0].ways = KVM_E500_TLB0_WAY_NUM;
+	vcpu_e500->gtlb_params[0].sets =
+		KVM_E500_TLB0_SIZE / KVM_E500_TLB0_WAY_NUM;
+
+	vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE;
+	vcpu_e500->gtlb_params[1].sets = 1;
+
+	vcpu_e500->gtlb_arch = kmalloc_array(KVM_E500_TLB0_SIZE +
+					     KVM_E500_TLB1_SIZE,
+					     sizeof(*vcpu_e500->gtlb_arch),
+					     GFP_KERNEL);
+	if (!vcpu_e500->gtlb_arch)
+		return -ENOMEM;
+
+	vcpu_e500->gtlb_offset[0] = 0;
+	vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE;
+
+	vcpu_e500->gtlb_priv[0] = kcalloc(vcpu_e500->gtlb_params[0].entries,
+					  sizeof(struct tlbe_ref),
+					  GFP_KERNEL);
+	if (!vcpu_e500->gtlb_priv[0])
+		goto free_vcpu;
+
+	vcpu_e500->gtlb_priv[1] = kcalloc(vcpu_e500->gtlb_params[1].entries,
+					  sizeof(struct tlbe_ref),
+					  GFP_KERNEL);
+	if (!vcpu_e500->gtlb_priv[1])
+		goto free_vcpu;
+
+	vcpu_e500->g2h_tlb1_map = kcalloc(vcpu_e500->gtlb_params[1].entries,
+					  sizeof(*vcpu_e500->g2h_tlb1_map),
+					  GFP_KERNEL);
+	if (!vcpu_e500->g2h_tlb1_map)
+		goto free_vcpu;
+
+	vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params);
+
+	kvmppc_recalc_tlb1map_range(vcpu_e500);
+	return 0;
+ free_vcpu:
+	free_gtlb(vcpu_e500);
+	return -1;
+}
+
+void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	free_gtlb(vcpu_e500);
+	e500_mmu_host_uninit(vcpu_e500);
+}
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
new file mode 100644
index 0000000000..ccb8f16ffe
--- /dev/null
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -0,0 +1,803 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, yu.liu@freescale.com
+ *         Scott Wood, scottwood@freescale.com
+ *         Ashish Kalra, ashish.kalra@freescale.com
+ *         Varun Sethi, varun.sethi@freescale.com
+ *         Alexander Graf, agraf@suse.de
+ *
+ * Description:
+ * This file is based on arch/powerpc/kvm/44x_tlb.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <linux/log2.h>
+#include <linux/uaccess.h>
+#include <linux/sched/mm.h>
+#include <linux/rwsem.h>
+#include <linux/vmalloc.h>
+#include <linux/hugetlb.h>
+#include <asm/kvm_ppc.h>
+#include <asm/pte-walk.h>
+
+#include "e500.h"
+#include "timing.h"
+#include "e500_mmu_host.h"
+
+#include "trace_booke.h"
+
+#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1)
+
+static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM];
+
+static inline unsigned int tlb1_max_shadow_size(void)
+{
+	/* reserve one entry for magic page */
+	return host_tlb_params[1].entries - tlbcam_index - 1;
+}
+
+static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
+{
+	/* Mask off reserved bits. */
+	mas3 &= MAS3_ATTRIB_MASK;
+
+#ifndef CONFIG_KVM_BOOKE_HV
+	if (!usermode) {
+		/* Guest is in supervisor mode,
+		 * so we need to translate guest
+		 * supervisor permissions into user permissions. */
+		mas3 &= ~E500_TLB_USER_PERM_MASK;
+		mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1;
+	}
+	mas3 |= E500_TLB_SUPER_PERM_MASK;
+#endif
+	return mas3;
+}
+
+/*
+ * writing shadow tlb entry to host TLB
+ */
+static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
+				     uint32_t mas0,
+				     uint32_t lpid)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	mtspr(SPRN_MAS0, mas0);
+	mtspr(SPRN_MAS1, stlbe->mas1);
+	mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2);
+	mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
+	mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32));
+#ifdef CONFIG_KVM_BOOKE_HV
+	mtspr(SPRN_MAS8, MAS8_TGS | get_thread_specific_lpid(lpid));
+#endif
+	asm volatile("isync; tlbwe" : : : "memory");
+
+#ifdef CONFIG_KVM_BOOKE_HV
+	/* Must clear mas8 for other host tlbwe's */
+	mtspr(SPRN_MAS8, 0);
+	isync();
+#endif
+	local_irq_restore(flags);
+
+	trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1,
+	                              stlbe->mas2, stlbe->mas7_3);
+}
+
+/*
+ * Acquire a mas0 with victim hint, as if we just took a TLB miss.
+ *
+ * We don't care about the address we're searching for, other than that it's
+ * in the right set and is not present in the TLB.  Using a zero PID and a
+ * userspace address means we don't have to set and then restore MAS5, or
+ * calculate a proper MAS6 value.
+ */
+static u32 get_host_mas0(unsigned long eaddr)
+{
+	unsigned long flags;
+	u32 mas0;
+	u32 mas4;
+
+	local_irq_save(flags);
+	mtspr(SPRN_MAS6, 0);
+	mas4 = mfspr(SPRN_MAS4);
+	mtspr(SPRN_MAS4, mas4 & ~MAS4_TLBSEL_MASK);
+	asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET));
+	mas0 = mfspr(SPRN_MAS0);
+	mtspr(SPRN_MAS4, mas4);
+	local_irq_restore(flags);
+
+	return mas0;
+}
+
+/* sesel is for tlb1 only */
+static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
+		int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe)
+{
+	u32 mas0;
+
+	if (tlbsel == 0) {
+		mas0 = get_host_mas0(stlbe->mas2);
+		__write_host_tlbe(stlbe, mas0, vcpu_e500->vcpu.kvm->arch.lpid);
+	} else {
+		__write_host_tlbe(stlbe,
+				  MAS0_TLBSEL(1) |
+				  MAS0_ESEL(to_htlb1_esel(sesel)),
+				  vcpu_e500->vcpu.kvm->arch.lpid);
+	}
+}
+
+/* sesel is for tlb1 only */
+static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
+			struct kvm_book3e_206_tlb_entry *gtlbe,
+			struct kvm_book3e_206_tlb_entry *stlbe,
+			int stlbsel, int sesel)
+{
+	int stid;
+
+	preempt_disable();
+	stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe);
+
+	stlbe->mas1 |= MAS1_TID(stid);
+	write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe);
+	preempt_enable();
+}
+
+#ifdef CONFIG_KVM_E500V2
+/* XXX should be a hook in the gva2hpa translation */
+void kvmppc_map_magic(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	struct kvm_book3e_206_tlb_entry magic;
+	ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
+	unsigned int stid;
+	kvm_pfn_t pfn;
+
+	pfn = (kvm_pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT;
+	get_page(pfn_to_page(pfn));
+
+	preempt_disable();
+	stid = kvmppc_e500_get_sid(vcpu_e500, 0, 0, 0, 0);
+
+	magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) |
+		     MAS1_TSIZE(BOOK3E_PAGESZ_4K);
+	magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M;
+	magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) |
+		       MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
+	magic.mas8 = 0;
+
+	__write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index), 0);
+	preempt_enable();
+}
+#endif
+
+void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
+			 int esel)
+{
+	struct kvm_book3e_206_tlb_entry *gtlbe =
+		get_entry(vcpu_e500, tlbsel, esel);
+	struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[tlbsel][esel].ref;
+
+	/* Don't bother with unmapped entries */
+	if (!(ref->flags & E500_TLB_VALID)) {
+		WARN(ref->flags & (E500_TLB_BITMAP | E500_TLB_TLB0),
+		     "%s: flags %x\n", __func__, ref->flags);
+		WARN_ON(tlbsel == 1 && vcpu_e500->g2h_tlb1_map[esel]);
+	}
+
+	if (tlbsel == 1 && ref->flags & E500_TLB_BITMAP) {
+		u64 tmp = vcpu_e500->g2h_tlb1_map[esel];
+		int hw_tlb_indx;
+		unsigned long flags;
+
+		local_irq_save(flags);
+		while (tmp) {
+			hw_tlb_indx = __ilog2_u64(tmp & -tmp);
+			mtspr(SPRN_MAS0,
+			      MAS0_TLBSEL(1) |
+			      MAS0_ESEL(to_htlb1_esel(hw_tlb_indx)));
+			mtspr(SPRN_MAS1, 0);
+			asm volatile("tlbwe");
+			vcpu_e500->h2g_tlb1_rmap[hw_tlb_indx] = 0;
+			tmp &= tmp - 1;
+		}
+		mb();
+		vcpu_e500->g2h_tlb1_map[esel] = 0;
+		ref->flags &= ~(E500_TLB_BITMAP | E500_TLB_VALID);
+		local_irq_restore(flags);
+	}
+
+	if (tlbsel == 1 && ref->flags & E500_TLB_TLB0) {
+		/*
+		 * TLB1 entry is backed by 4k pages. This should happen
+		 * rarely and is not worth optimizing. Invalidate everything.
+		 */
+		kvmppc_e500_tlbil_all(vcpu_e500);
+		ref->flags &= ~(E500_TLB_TLB0 | E500_TLB_VALID);
+	}
+
+	/*
+	 * If TLB entry is still valid then it's a TLB0 entry, and thus
+	 * backed by at most one host tlbe per shadow pid
+	 */
+	if (ref->flags & E500_TLB_VALID)
+		kvmppc_e500_tlbil_one(vcpu_e500, gtlbe);
+
+	/* Mark the TLB as not backed by the host anymore */
+	ref->flags = 0;
+}
+
+static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
+{
+	return tlbe->mas7_3 & (MAS3_SW|MAS3_UW);
+}
+
+static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
+					 struct kvm_book3e_206_tlb_entry *gtlbe,
+					 kvm_pfn_t pfn, unsigned int wimg)
+{
+	ref->pfn = pfn;
+	ref->flags = E500_TLB_VALID;
+
+	/* Use guest supplied MAS2_G and MAS2_E */
+	ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg;
+
+	/* Mark the page accessed */
+	kvm_set_pfn_accessed(pfn);
+
+	if (tlbe_is_writable(gtlbe))
+		kvm_set_pfn_dirty(pfn);
+}
+
+static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
+{
+	if (ref->flags & E500_TLB_VALID) {
+		/* FIXME: don't log bogus pfn for TLB1 */
+		trace_kvm_booke206_ref_release(ref->pfn, ref->flags);
+		ref->flags = 0;
+	}
+}
+
+static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	if (vcpu_e500->g2h_tlb1_map)
+		memset(vcpu_e500->g2h_tlb1_map, 0,
+		       sizeof(u64) * vcpu_e500->gtlb_params[1].entries);
+	if (vcpu_e500->h2g_tlb1_rmap)
+		memset(vcpu_e500->h2g_tlb1_rmap, 0,
+		       sizeof(unsigned int) * host_tlb_params[1].entries);
+}
+
+static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	int tlbsel;
+	int i;
+
+	for (tlbsel = 0; tlbsel <= 1; tlbsel++) {
+		for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) {
+			struct tlbe_ref *ref =
+				&vcpu_e500->gtlb_priv[tlbsel][i].ref;
+			kvmppc_e500_ref_release(ref);
+		}
+	}
+}
+
+void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	kvmppc_e500_tlbil_all(vcpu_e500);
+	clear_tlb_privs(vcpu_e500);
+	clear_tlb1_bitmap(vcpu_e500);
+}
+
+/* TID must be supplied by the caller */
+static void kvmppc_e500_setup_stlbe(
+	struct kvm_vcpu *vcpu,
+	struct kvm_book3e_206_tlb_entry *gtlbe,
+	int tsize, struct tlbe_ref *ref, u64 gvaddr,
+	struct kvm_book3e_206_tlb_entry *stlbe)
+{
+	kvm_pfn_t pfn = ref->pfn;
+	u32 pr = vcpu->arch.shared->msr & MSR_PR;
+
+	BUG_ON(!(ref->flags & E500_TLB_VALID));
+
+	/* Force IPROT=0 for all guest mappings. */
+	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
+	stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR);
+	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
+			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
+}
+
+static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+	u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
+	int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe,
+	struct tlbe_ref *ref)
+{
+	struct kvm_memory_slot *slot;
+	unsigned long pfn = 0; /* silence GCC warning */
+	unsigned long hva;
+	int pfnmap = 0;
+	int tsize = BOOK3E_PAGESZ_4K;
+	int ret = 0;
+	unsigned long mmu_seq;
+	struct kvm *kvm = vcpu_e500->vcpu.kvm;
+	unsigned long tsize_pages = 0;
+	pte_t *ptep;
+	unsigned int wimg = 0;
+	pgd_t *pgdir;
+	unsigned long flags;
+
+	/* used to check for invalidations in progress */
+	mmu_seq = kvm->mmu_invalidate_seq;
+	smp_rmb();
+
+	/*
+	 * Translate guest physical to true physical, acquiring
+	 * a page reference if it is normal, non-reserved memory.
+	 *
+	 * gfn_to_memslot() must succeed because otherwise we wouldn't
+	 * have gotten this far.  Eventually we should just pass the slot
+	 * pointer through from the first lookup.
+	 */
+	slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn);
+	hva = gfn_to_hva_memslot(slot, gfn);
+
+	if (tlbsel == 1) {
+		struct vm_area_struct *vma;
+		mmap_read_lock(kvm->mm);
+
+		vma = find_vma(kvm->mm, hva);
+		if (vma && hva >= vma->vm_start &&
+		    (vma->vm_flags & VM_PFNMAP)) {
+			/*
+			 * This VMA is a physically contiguous region (e.g.
+			 * /dev/mem) that bypasses normal Linux page
+			 * management.  Find the overlap between the
+			 * vma and the memslot.
+			 */
+
+			unsigned long start, end;
+			unsigned long slot_start, slot_end;
+
+			pfnmap = 1;
+
+			start = vma->vm_pgoff;
+			end = start +
+			      vma_pages(vma);
+
+			pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT);
+
+			slot_start = pfn - (gfn - slot->base_gfn);
+			slot_end = slot_start + slot->npages;
+
+			if (start < slot_start)
+				start = slot_start;
+			if (end > slot_end)
+				end = slot_end;
+
+			tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
+				MAS1_TSIZE_SHIFT;
+
+			/*
+			 * e500 doesn't implement the lowest tsize bit,
+			 * or 1K pages.
+			 */
+			tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
+
+			/*
+			 * Now find the largest tsize (up to what the guest
+			 * requested) that will cover gfn, stay within the
+			 * range, and for which gfn and pfn are mutually
+			 * aligned.
+			 */
+
+			for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
+				unsigned long gfn_start, gfn_end;
+				tsize_pages = 1UL << (tsize - 2);
+
+				gfn_start = gfn & ~(tsize_pages - 1);
+				gfn_end = gfn_start + tsize_pages;
+
+				if (gfn_start + pfn - gfn < start)
+					continue;
+				if (gfn_end + pfn - gfn > end)
+					continue;
+				if ((gfn & (tsize_pages - 1)) !=
+				    (pfn & (tsize_pages - 1)))
+					continue;
+
+				gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
+				pfn &= ~(tsize_pages - 1);
+				break;
+			}
+		} else if (vma && hva >= vma->vm_start &&
+			   is_vm_hugetlb_page(vma)) {
+			unsigned long psize = vma_kernel_pagesize(vma);
+
+			tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
+				MAS1_TSIZE_SHIFT;
+
+			/*
+			 * Take the largest page size that satisfies both host
+			 * and guest mapping
+			 */
+			tsize = min(__ilog2(psize) - 10, tsize);
+
+			/*
+			 * e500 doesn't implement the lowest tsize bit,
+			 * or 1K pages.
+			 */
+			tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
+		}
+
+		mmap_read_unlock(kvm->mm);
+	}
+
+	if (likely(!pfnmap)) {
+		tsize_pages = 1UL << (tsize + 10 - PAGE_SHIFT);
+		pfn = gfn_to_pfn_memslot(slot, gfn);
+		if (is_error_noslot_pfn(pfn)) {
+			if (printk_ratelimit())
+				pr_err("%s: real page not found for gfn %lx\n",
+				       __func__, (long)gfn);
+			return -EINVAL;
+		}
+
+		/* Align guest and physical address to page map boundaries */
+		pfn &= ~(tsize_pages - 1);
+		gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
+	}
+
+	spin_lock(&kvm->mmu_lock);
+	if (mmu_invalidate_retry(kvm, mmu_seq)) {
+		ret = -EAGAIN;
+		goto out;
+	}
+
+
+	pgdir = vcpu_e500->vcpu.arch.pgdir;
+	/*
+	 * We are just looking at the wimg bits, so we don't
+	 * care much about the trans splitting bit.
+	 * We are holding kvm->mmu_lock so a notifier invalidate
+	 * can't run hence pfn won't change.
+	 */
+	local_irq_save(flags);
+	ptep = find_linux_pte(pgdir, hva, NULL, NULL);
+	if (ptep) {
+		pte_t pte = READ_ONCE(*ptep);
+
+		if (pte_present(pte)) {
+			wimg = (pte_val(pte) >> PTE_WIMGE_SHIFT) &
+				MAS2_WIMGE_MASK;
+			local_irq_restore(flags);
+		} else {
+			local_irq_restore(flags);
+			pr_err_ratelimited("%s: pte not present: gfn %lx,pfn %lx\n",
+					   __func__, (long)gfn, pfn);
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+	kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg);
+
+	kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
+				ref, gvaddr, stlbe);
+
+	/* Clear i-cache for new pages */
+	kvmppc_mmu_flush_icache(pfn);
+
+out:
+	spin_unlock(&kvm->mmu_lock);
+
+	/* Drop refcount on page, so that mmu notifiers can clear it */
+	kvm_release_pfn_clean(pfn);
+
+	return ret;
+}
+
+/* XXX only map the one-one case, for now use TLB0 */
+static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel,
+				struct kvm_book3e_206_tlb_entry *stlbe)
+{
+	struct kvm_book3e_206_tlb_entry *gtlbe;
+	struct tlbe_ref *ref;
+	int stlbsel = 0;
+	int sesel = 0;
+	int r;
+
+	gtlbe = get_entry(vcpu_e500, 0, esel);
+	ref = &vcpu_e500->gtlb_priv[0][esel].ref;
+
+	r = kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
+			get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
+			gtlbe, 0, stlbe, ref);
+	if (r)
+		return r;
+
+	write_stlbe(vcpu_e500, gtlbe, stlbe, stlbsel, sesel);
+
+	return 0;
+}
+
+static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500,
+				     struct tlbe_ref *ref,
+				     int esel)
+{
+	unsigned int sesel = vcpu_e500->host_tlb1_nv++;
+
+	if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size()))
+		vcpu_e500->host_tlb1_nv = 0;
+
+	if (vcpu_e500->h2g_tlb1_rmap[sesel]) {
+		unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel] - 1;
+		vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel);
+	}
+
+	vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP;
+	vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel;
+	vcpu_e500->h2g_tlb1_rmap[sesel] = esel + 1;
+	WARN_ON(!(ref->flags & E500_TLB_VALID));
+
+	return sesel;
+}
+
+/* Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB. */
+/* For both one-one and one-to-many */
+static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+		u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
+		struct kvm_book3e_206_tlb_entry *stlbe, int esel)
+{
+	struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[1][esel].ref;
+	int sesel;
+	int r;
+
+	r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe,
+				   ref);
+	if (r)
+		return r;
+
+	/* Use TLB0 when we can only map a page with 4k */
+	if (get_tlb_tsize(stlbe) == BOOK3E_PAGESZ_4K) {
+		vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_TLB0;
+		write_stlbe(vcpu_e500, gtlbe, stlbe, 0, 0);
+		return 0;
+	}
+
+	/* Otherwise map into TLB1 */
+	sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, ref, esel);
+	write_stlbe(vcpu_e500, gtlbe, stlbe, 1, sesel);
+
+	return 0;
+}
+
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
+		    unsigned int index)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	struct tlbe_priv *priv;
+	struct kvm_book3e_206_tlb_entry *gtlbe, stlbe;
+	int tlbsel = tlbsel_of(index);
+	int esel = esel_of(index);
+
+	gtlbe = get_entry(vcpu_e500, tlbsel, esel);
+
+	switch (tlbsel) {
+	case 0:
+		priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
+
+		/* Triggers after clear_tlb_privs or on initial mapping */
+		if (!(priv->ref.flags & E500_TLB_VALID)) {
+			kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
+		} else {
+			kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K,
+						&priv->ref, eaddr, &stlbe);
+			write_stlbe(vcpu_e500, gtlbe, &stlbe, 0, 0);
+		}
+		break;
+
+	case 1: {
+		gfn_t gfn = gpaddr >> PAGE_SHIFT;
+		kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe, &stlbe,
+				     esel);
+		break;
+	}
+
+	default:
+		BUG();
+		break;
+	}
+}
+
+#ifdef CONFIG_KVM_BOOKE_HV
+int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
+		enum instruction_fetch_type type, unsigned long *instr)
+{
+	gva_t geaddr;
+	hpa_t addr;
+	hfn_t pfn;
+	hva_t eaddr;
+	u32 mas1, mas2, mas3;
+	u64 mas7_mas3;
+	struct page *page;
+	unsigned int addr_space, psize_shift;
+	bool pr;
+	unsigned long flags;
+
+	/* Search TLB for guest pc to get the real address */
+	geaddr = kvmppc_get_pc(vcpu);
+
+	addr_space = (vcpu->arch.shared->msr & MSR_IS) >> MSR_IR_LG;
+
+	local_irq_save(flags);
+	mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space);
+	mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(vcpu));
+	asm volatile("tlbsx 0, %[geaddr]\n" : :
+		     [geaddr] "r" (geaddr));
+	mtspr(SPRN_MAS5, 0);
+	mtspr(SPRN_MAS8, 0);
+	mas1 = mfspr(SPRN_MAS1);
+	mas2 = mfspr(SPRN_MAS2);
+	mas3 = mfspr(SPRN_MAS3);
+#ifdef CONFIG_64BIT
+	mas7_mas3 = mfspr(SPRN_MAS7_MAS3);
+#else
+	mas7_mas3 = ((u64)mfspr(SPRN_MAS7) << 32) | mas3;
+#endif
+	local_irq_restore(flags);
+
+	/*
+	 * If the TLB entry for guest pc was evicted, return to the guest.
+	 * There are high chances to find a valid TLB entry next time.
+	 */
+	if (!(mas1 & MAS1_VALID))
+		return EMULATE_AGAIN;
+
+	/*
+	 * Another thread may rewrite the TLB entry in parallel, don't
+	 * execute from the address if the execute permission is not set
+	 */
+	pr = vcpu->arch.shared->msr & MSR_PR;
+	if (unlikely((pr && !(mas3 & MAS3_UX)) ||
+		     (!pr && !(mas3 & MAS3_SX)))) {
+		pr_err_ratelimited(
+			"%s: Instruction emulation from guest address %08lx without execute permission\n",
+			__func__, geaddr);
+		return EMULATE_AGAIN;
+	}
+
+	/*
+	 * The real address will be mapped by a cacheable, memory coherent,
+	 * write-back page. Check for mismatches when LRAT is used.
+	 */
+	if (has_feature(vcpu, VCPU_FTR_MMU_V2) &&
+	    unlikely((mas2 & MAS2_I) || (mas2 & MAS2_W) || !(mas2 & MAS2_M))) {
+		pr_err_ratelimited(
+			"%s: Instruction emulation from guest address %08lx mismatches storage attributes\n",
+			__func__, geaddr);
+		return EMULATE_AGAIN;
+	}
+
+	/* Get pfn */
+	psize_shift = MAS1_GET_TSIZE(mas1) + 10;
+	addr = (mas7_mas3 & (~0ULL << psize_shift)) |
+	       (geaddr & ((1ULL << psize_shift) - 1ULL));
+	pfn = addr >> PAGE_SHIFT;
+
+	/* Guard against emulation from devices area */
+	if (unlikely(!page_is_ram(pfn))) {
+		pr_err_ratelimited("%s: Instruction emulation from non-RAM host address %08llx is not supported\n",
+			 __func__, addr);
+		return EMULATE_AGAIN;
+	}
+
+	/* Map a page and get guest's instruction */
+	page = pfn_to_page(pfn);
+	eaddr = (unsigned long)kmap_atomic(page);
+	*instr = *(u32 *)(eaddr | (unsigned long)(addr & ~PAGE_MASK));
+	kunmap_atomic((u32 *)eaddr);
+
+	return EMULATE_DONE;
+}
+#else
+int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
+		enum instruction_fetch_type type, unsigned long *instr)
+{
+	return EMULATE_AGAIN;
+}
+#endif
+
+/************* MMU Notifiers *************/
+
+static bool kvm_e500_mmu_unmap_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+	/*
+	 * Flush all shadow tlb entries everywhere. This is slow, but
+	 * we are 100% sure that we catch the to be unmapped page
+	 */
+	return true;
+}
+
+bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+	return kvm_e500_mmu_unmap_gfn(kvm, range);
+}
+
+bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+	/* XXX could be more clever ;) */
+	return false;
+}
+
+bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+	/* XXX could be more clever ;) */
+	return false;
+}
+
+bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+	/* The page will get remapped properly on its next fault */
+	return kvm_e500_mmu_unmap_gfn(kvm, range);
+}
+
+/*****************************************/
+
+int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY;
+	host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
+
+	/*
+	 * This should never happen on real e500 hardware, but is
+	 * architecturally possible -- e.g. in some weird nested
+	 * virtualization case.
+	 */
+	if (host_tlb_params[0].entries == 0 ||
+	    host_tlb_params[1].entries == 0) {
+		pr_err("%s: need to know host tlb size\n", __func__);
+		return -ENODEV;
+	}
+
+	host_tlb_params[0].ways = (mfspr(SPRN_TLB0CFG) & TLBnCFG_ASSOC) >>
+				  TLBnCFG_ASSOC_SHIFT;
+	host_tlb_params[1].ways = host_tlb_params[1].entries;
+
+	if (!is_power_of_2(host_tlb_params[0].entries) ||
+	    !is_power_of_2(host_tlb_params[0].ways) ||
+	    host_tlb_params[0].entries < host_tlb_params[0].ways ||
+	    host_tlb_params[0].ways == 0) {
+		pr_err("%s: bad tlb0 host config: %u entries %u ways\n",
+		       __func__, host_tlb_params[0].entries,
+		       host_tlb_params[0].ways);
+		return -ENODEV;
+	}
+
+	host_tlb_params[0].sets =
+		host_tlb_params[0].entries / host_tlb_params[0].ways;
+	host_tlb_params[1].sets = 1;
+	vcpu_e500->h2g_tlb1_rmap = kcalloc(host_tlb_params[1].entries,
+					   sizeof(*vcpu_e500->h2g_tlb1_rmap),
+					   GFP_KERNEL);
+	if (!vcpu_e500->h2g_tlb1_rmap)
+		return -EINVAL;
+
+	return 0;
+}
+
+void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	kfree(vcpu_e500->h2g_tlb1_rmap);
+}
diff --git a/arch/powerpc/kvm/e500_mmu_host.h b/arch/powerpc/kvm/e500_mmu_host.h
new file mode 100644
index 0000000000..d8178cc86b
--- /dev/null
+++ b/arch/powerpc/kvm/e500_mmu_host.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved.
+ */
+
+#ifndef KVM_E500_MMU_HOST_H
+#define KVM_E500_MMU_HOST_H
+
+void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
+			 int esel);
+
+int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500);
+void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
+
+#endif /* KVM_E500_MMU_HOST_H */
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
new file mode 100644
index 0000000000..e476e107a9
--- /dev/null
+++ b/arch/powerpc/kvm/e500mc.c
@@ -0,0 +1,431 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2010,2012 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Varun Sethi, <varun.sethi@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/e500.c,
+ * by Yu Liu <yu.liu@freescale.com>.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/kvm_ppc.h>
+#include <asm/dbell.h>
+#include <asm/ppc-opcode.h>
+
+#include "booke.h"
+#include "e500.h"
+
+void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type)
+{
+	enum ppc_dbell dbell_type;
+	unsigned long tag;
+
+	switch (type) {
+	case INT_CLASS_NONCRIT:
+		dbell_type = PPC_G_DBELL;
+		break;
+	case INT_CLASS_CRIT:
+		dbell_type = PPC_G_DBELL_CRIT;
+		break;
+	case INT_CLASS_MC:
+		dbell_type = PPC_G_DBELL_MC;
+		break;
+	default:
+		WARN_ONCE(1, "%s: unknown int type %d\n", __func__, type);
+		return;
+	}
+
+	preempt_disable();
+	tag = PPC_DBELL_LPID(get_lpid(vcpu)) | vcpu->vcpu_id;
+	mb();
+	ppc_msgsnd(dbell_type, 0, tag);
+	preempt_enable();
+}
+
+/* gtlbe must not be mapped by more than one host tlb entry */
+void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
+			   struct kvm_book3e_206_tlb_entry *gtlbe)
+{
+	unsigned int tid, ts;
+	gva_t eaddr;
+	u32 val;
+	unsigned long flags;
+
+	ts = get_tlb_ts(gtlbe);
+	tid = get_tlb_tid(gtlbe);
+
+	/* We search the host TLB to invalidate its shadow TLB entry */
+	val = (tid << 16) | ts;
+	eaddr = get_tlb_eaddr(gtlbe);
+
+	local_irq_save(flags);
+
+	mtspr(SPRN_MAS6, val);
+	mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu));
+
+	asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr));
+	val = mfspr(SPRN_MAS1);
+	if (val & MAS1_VALID) {
+		mtspr(SPRN_MAS1, val & ~MAS1_VALID);
+		asm volatile("tlbwe");
+	}
+	mtspr(SPRN_MAS5, 0);
+	/* NOTE: tlbsx also updates mas8, so clear it for host tlbwe */
+	mtspr(SPRN_MAS8, 0);
+	isync();
+
+	local_irq_restore(flags);
+}
+
+void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu));
+	/*
+	 * clang-17 and older could not assemble tlbilxlpid.
+	 * https://github.com/ClangBuiltLinux/linux/issues/1891
+	 */
+	asm volatile (PPC_TLBILX_LPID);
+	mtspr(SPRN_MAS5, 0);
+	local_irq_restore(flags);
+}
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
+{
+	vcpu->arch.pid = pid;
+}
+
+void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
+{
+}
+
+/* We use two lpids per VM */
+static DEFINE_PER_CPU(struct kvm_vcpu *[KVMPPC_NR_LPIDS], last_vcpu_of_lpid);
+
+static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	kvmppc_booke_vcpu_load(vcpu, cpu);
+
+	mtspr(SPRN_LPID, get_lpid(vcpu));
+	mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
+	mtspr(SPRN_GPIR, vcpu->vcpu_id);
+	mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp);
+	vcpu->arch.eplc = EPC_EGS | (get_lpid(vcpu) << EPC_ELPID_SHIFT);
+	vcpu->arch.epsc = vcpu->arch.eplc;
+	mtspr(SPRN_EPLC, vcpu->arch.eplc);
+	mtspr(SPRN_EPSC, vcpu->arch.epsc);
+
+	mtspr(SPRN_GIVPR, vcpu->arch.ivpr);
+	mtspr(SPRN_GIVOR2, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
+	mtspr(SPRN_GIVOR8, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]);
+	mtspr(SPRN_GSPRG0, (unsigned long)vcpu->arch.shared->sprg0);
+	mtspr(SPRN_GSPRG1, (unsigned long)vcpu->arch.shared->sprg1);
+	mtspr(SPRN_GSPRG2, (unsigned long)vcpu->arch.shared->sprg2);
+	mtspr(SPRN_GSPRG3, (unsigned long)vcpu->arch.shared->sprg3);
+
+	mtspr(SPRN_GSRR0, vcpu->arch.shared->srr0);
+	mtspr(SPRN_GSRR1, vcpu->arch.shared->srr1);
+
+	mtspr(SPRN_GEPR, vcpu->arch.epr);
+	mtspr(SPRN_GDEAR, vcpu->arch.shared->dar);
+	mtspr(SPRN_GESR, vcpu->arch.shared->esr);
+
+	if (vcpu->arch.oldpir != mfspr(SPRN_PIR) ||
+	    __this_cpu_read(last_vcpu_of_lpid[get_lpid(vcpu)]) != vcpu) {
+		kvmppc_e500_tlbil_all(vcpu_e500);
+		__this_cpu_write(last_vcpu_of_lpid[get_lpid(vcpu)], vcpu);
+	}
+}
+
+static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.eplc = mfspr(SPRN_EPLC);
+	vcpu->arch.epsc = mfspr(SPRN_EPSC);
+
+	vcpu->arch.shared->sprg0 = mfspr(SPRN_GSPRG0);
+	vcpu->arch.shared->sprg1 = mfspr(SPRN_GSPRG1);
+	vcpu->arch.shared->sprg2 = mfspr(SPRN_GSPRG2);
+	vcpu->arch.shared->sprg3 = mfspr(SPRN_GSPRG3);
+
+	vcpu->arch.shared->srr0 = mfspr(SPRN_GSRR0);
+	vcpu->arch.shared->srr1 = mfspr(SPRN_GSRR1);
+
+	vcpu->arch.epr = mfspr(SPRN_GEPR);
+	vcpu->arch.shared->dar = mfspr(SPRN_GDEAR);
+	vcpu->arch.shared->esr = mfspr(SPRN_GESR);
+
+	vcpu->arch.oldpir = mfspr(SPRN_PIR);
+
+	kvmppc_booke_vcpu_put(vcpu);
+}
+
+static int kvmppc_e500mc_check_processor_compat(void)
+{
+	int r;
+
+	if (strcmp(cur_cpu_spec->cpu_name, "e500mc") == 0)
+		r = 0;
+	else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
+		r = 0;
+#ifdef CONFIG_ALTIVEC
+	/*
+	 * Since guests have the privilege to enable AltiVec, we need AltiVec
+	 * support in the host to save/restore their context.
+	 * Don't use CPU_FTR_ALTIVEC to identify cores with AltiVec unit
+	 * because it's cleared in the absence of CONFIG_ALTIVEC!
+	 */
+	else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0)
+		r = 0;
+#endif
+	else
+		r = -ENOTSUPP;
+
+	return r;
+}
+
+int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	vcpu->arch.shadow_epcr = SPRN_EPCR_DSIGS | SPRN_EPCR_DGTMI | \
+				 SPRN_EPCR_DUVD;
+#ifdef CONFIG_64BIT
+	vcpu->arch.shadow_epcr |= SPRN_EPCR_ICM;
+#endif
+	vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_PMMP;
+
+	vcpu->arch.pvr = mfspr(SPRN_PVR);
+	vcpu_e500->svr = mfspr(SPRN_SVR);
+
+	vcpu->arch.cpu_type = KVM_CPU_E500MC;
+
+	return 0;
+}
+
+static int kvmppc_core_get_sregs_e500mc(struct kvm_vcpu *vcpu,
+					struct kvm_sregs *sregs)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_PM |
+			       KVM_SREGS_E_PC;
+	sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL;
+
+	sregs->u.e.impl.fsl.features = 0;
+	sregs->u.e.impl.fsl.svr = vcpu_e500->svr;
+	sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
+	sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
+
+	kvmppc_get_sregs_e500_tlb(vcpu, sregs);
+
+	sregs->u.e.ivor_high[3] =
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+	sregs->u.e.ivor_high[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL];
+	sregs->u.e.ivor_high[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT];
+
+	return kvmppc_get_sregs_ivor(vcpu, sregs);
+}
+
+static int kvmppc_core_set_sregs_e500mc(struct kvm_vcpu *vcpu,
+					struct kvm_sregs *sregs)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	int ret;
+
+	if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
+		vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
+		vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0;
+		vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
+	}
+
+	ret = kvmppc_set_sregs_e500_tlb(vcpu, sregs);
+	if (ret < 0)
+		return ret;
+
+	if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
+		return 0;
+
+	if (sregs->u.e.features & KVM_SREGS_E_PM) {
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] =
+			sregs->u.e.ivor_high[3];
+	}
+
+	if (sregs->u.e.features & KVM_SREGS_E_PC) {
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL] =
+			sregs->u.e.ivor_high[4];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT] =
+			sregs->u.e.ivor_high[5];
+	}
+
+	return kvmppc_set_sregs_ivor(vcpu, sregs);
+}
+
+static int kvmppc_get_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
+			      union kvmppc_one_reg *val)
+{
+	int r = 0;
+
+	switch (id) {
+	case KVM_REG_PPC_SPRG9:
+		*val = get_reg_val(id, vcpu->arch.sprg9);
+		break;
+	default:
+		r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+	}
+
+	return r;
+}
+
+static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
+			      union kvmppc_one_reg *val)
+{
+	int r = 0;
+
+	switch (id) {
+	case KVM_REG_PPC_SPRG9:
+		vcpu->arch.sprg9 = set_reg_val(id, *val);
+		break;
+	default:
+		r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
+	}
+
+	return r;
+}
+
+static int kvmppc_core_vcpu_create_e500mc(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500;
+	int err;
+
+	BUILD_BUG_ON(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0);
+	vcpu_e500 = to_e500(vcpu);
+
+	/* Invalid PIR value -- this LPID doesn't have valid state on any cpu */
+	vcpu->arch.oldpir = 0xffffffff;
+
+	err = kvmppc_e500_tlb_init(vcpu_e500);
+	if (err)
+		return err;
+
+	vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+	if (!vcpu->arch.shared) {
+		err = -ENOMEM;
+		goto uninit_tlb;
+	}
+
+	return 0;
+
+uninit_tlb:
+	kvmppc_e500_tlb_uninit(vcpu_e500);
+	return err;
+}
+
+static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	free_page((unsigned long)vcpu->arch.shared);
+	kvmppc_e500_tlb_uninit(vcpu_e500);
+}
+
+static int kvmppc_core_init_vm_e500mc(struct kvm *kvm)
+{
+	int lpid;
+
+	lpid = kvmppc_alloc_lpid();
+	if (lpid < 0)
+		return lpid;
+
+	/*
+	 * Use two lpids per VM on cores with two threads like e6500. Use
+	 * even numbers to speedup vcpu lpid computation with consecutive lpids
+	 * per VM. vm1 will use lpids 2 and 3, vm2 lpids 4 and 5, and so on.
+	 */
+	if (threads_per_core == 2)
+		lpid <<= 1;
+
+	kvm->arch.lpid = lpid;
+	return 0;
+}
+
+static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm)
+{
+	int lpid = kvm->arch.lpid;
+
+	if (threads_per_core == 2)
+		lpid >>= 1;
+
+	kvmppc_free_lpid(lpid);
+}
+
+static struct kvmppc_ops kvm_ops_e500mc = {
+	.get_sregs = kvmppc_core_get_sregs_e500mc,
+	.set_sregs = kvmppc_core_set_sregs_e500mc,
+	.get_one_reg = kvmppc_get_one_reg_e500mc,
+	.set_one_reg = kvmppc_set_one_reg_e500mc,
+	.vcpu_load   = kvmppc_core_vcpu_load_e500mc,
+	.vcpu_put    = kvmppc_core_vcpu_put_e500mc,
+	.vcpu_create = kvmppc_core_vcpu_create_e500mc,
+	.vcpu_free   = kvmppc_core_vcpu_free_e500mc,
+	.init_vm = kvmppc_core_init_vm_e500mc,
+	.destroy_vm = kvmppc_core_destroy_vm_e500mc,
+	.emulate_op = kvmppc_core_emulate_op_e500,
+	.emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
+	.emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
+	.create_vcpu_debugfs = kvmppc_create_vcpu_debugfs_e500,
+};
+
+static int __init kvmppc_e500mc_init(void)
+{
+	int r;
+
+	r = kvmppc_e500mc_check_processor_compat();
+	if (r)
+		goto err_out;
+
+	r = kvmppc_booke_init();
+	if (r)
+		goto err_out;
+
+	/*
+	 * Use two lpids per VM on dual threaded processors like e6500
+	 * to workarround the lack of tlb write conditional instruction.
+	 * Expose half the number of available hardware lpids to the lpid
+	 * allocator.
+	 */
+	kvmppc_init_lpid(KVMPPC_NR_LPIDS/threads_per_core);
+
+	r = kvm_init(sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+	if (r)
+		goto err_out;
+	kvm_ops_e500mc.owner = THIS_MODULE;
+	kvmppc_pr_ops = &kvm_ops_e500mc;
+
+err_out:
+	return r;
+}
+
+static void __exit kvmppc_e500mc_exit(void)
+{
+	kvmppc_pr_ops = NULL;
+	kvmppc_booke_exit();
+}
+
+module_init(kvmppc_e500mc_init);
+module_exit(kvmppc_e500mc_exit);
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
new file mode 100644
index 0000000000..355d5206e8
--- /dev/null
+++ b/arch/powerpc/kvm/emulate.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright IBM Corp. 2007
+ * Copyright 2011 Freescale Semiconductor, Inc.
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/jiffies.h>
+#include <linux/hrtimer.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm_host.h>
+#include <linux/clockchips.h>
+
+#include <asm/reg.h>
+#include <asm/time.h>
+#include <asm/byteorder.h>
+#include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include <asm/ppc-opcode.h>
+#include "timing.h"
+#include "trace.h"
+
+void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
+{
+	unsigned long dec_nsec;
+	unsigned long long dec_time;
+
+	pr_debug("mtDEC: %lx\n", vcpu->arch.dec);
+	hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+
+#ifdef CONFIG_PPC_BOOK3S
+	/* mtdec lowers the interrupt line when positive. */
+	kvmppc_core_dequeue_dec(vcpu);
+#endif
+
+#ifdef CONFIG_BOOKE
+	/* On BOOKE, DEC = 0 is as good as decrementer not enabled */
+	if (vcpu->arch.dec == 0)
+		return;
+#endif
+
+	/*
+	 * The decrementer ticks at the same rate as the timebase, so
+	 * that's how we convert the guest DEC value to the number of
+	 * host ticks.
+	 */
+
+	dec_time = vcpu->arch.dec;
+	/*
+	 * Guest timebase ticks at the same frequency as host timebase.
+	 * So use the host timebase calculations for decrementer emulation.
+	 */
+	dec_time = tb_to_ns(dec_time);
+	dec_nsec = do_div(dec_time, NSEC_PER_SEC);
+	hrtimer_start(&vcpu->arch.dec_timer,
+		ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL);
+	vcpu->arch.dec_jiffies = get_tb();
+}
+
+u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
+{
+	u64 jd = tb - vcpu->arch.dec_jiffies;
+
+#ifdef CONFIG_BOOKE
+	if (vcpu->arch.dec < jd)
+		return 0;
+#endif
+
+	return vcpu->arch.dec - jd;
+}
+
+static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+	enum emulation_result emulated = EMULATE_DONE;
+	ulong spr_val = kvmppc_get_gpr(vcpu, rs);
+
+	switch (sprn) {
+	case SPRN_SRR0:
+		kvmppc_set_srr0(vcpu, spr_val);
+		break;
+	case SPRN_SRR1:
+		kvmppc_set_srr1(vcpu, spr_val);
+		break;
+
+	/* XXX We need to context-switch the timebase for
+	 * watchdog and FIT. */
+	case SPRN_TBWL: break;
+	case SPRN_TBWU: break;
+
+	case SPRN_DEC:
+		vcpu->arch.dec = (u32) spr_val;
+		kvmppc_emulate_dec(vcpu);
+		break;
+
+	case SPRN_SPRG0:
+		kvmppc_set_sprg0(vcpu, spr_val);
+		break;
+	case SPRN_SPRG1:
+		kvmppc_set_sprg1(vcpu, spr_val);
+		break;
+	case SPRN_SPRG2:
+		kvmppc_set_sprg2(vcpu, spr_val);
+		break;
+	case SPRN_SPRG3:
+		kvmppc_set_sprg3(vcpu, spr_val);
+		break;
+
+	/* PIR can legally be written, but we ignore it */
+	case SPRN_PIR: break;
+
+	default:
+		emulated = vcpu->kvm->arch.kvm_ops->emulate_mtspr(vcpu, sprn,
+								  spr_val);
+		if (emulated == EMULATE_FAIL)
+			printk(KERN_INFO "mtspr: unknown spr "
+				"0x%x\n", sprn);
+		break;
+	}
+
+	kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
+
+	return emulated;
+}
+
+static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+	enum emulation_result emulated = EMULATE_DONE;
+	ulong spr_val = 0;
+
+	switch (sprn) {
+	case SPRN_SRR0:
+		spr_val = kvmppc_get_srr0(vcpu);
+		break;
+	case SPRN_SRR1:
+		spr_val = kvmppc_get_srr1(vcpu);
+		break;
+	case SPRN_PVR:
+		spr_val = vcpu->arch.pvr;
+		break;
+	case SPRN_PIR:
+		spr_val = vcpu->vcpu_id;
+		break;
+
+	/* Note: mftb and TBRL/TBWL are user-accessible, so
+	 * the guest can always access the real TB anyways.
+	 * In fact, we probably will never see these traps. */
+	case SPRN_TBWL:
+		spr_val = get_tb() >> 32;
+		break;
+	case SPRN_TBWU:
+		spr_val = get_tb();
+		break;
+
+	case SPRN_SPRG0:
+		spr_val = kvmppc_get_sprg0(vcpu);
+		break;
+	case SPRN_SPRG1:
+		spr_val = kvmppc_get_sprg1(vcpu);
+		break;
+	case SPRN_SPRG2:
+		spr_val = kvmppc_get_sprg2(vcpu);
+		break;
+	case SPRN_SPRG3:
+		spr_val = kvmppc_get_sprg3(vcpu);
+		break;
+	/* Note: SPRG4-7 are user-readable, so we don't get
+	 * a trap. */
+
+	case SPRN_DEC:
+		spr_val = kvmppc_get_dec(vcpu, get_tb());
+		break;
+	default:
+		emulated = vcpu->kvm->arch.kvm_ops->emulate_mfspr(vcpu, sprn,
+								  &spr_val);
+		if (unlikely(emulated == EMULATE_FAIL)) {
+			printk(KERN_INFO "mfspr: unknown spr "
+				"0x%x\n", sprn);
+		}
+		break;
+	}
+
+	if (emulated == EMULATE_DONE)
+		kvmppc_set_gpr(vcpu, rt, spr_val);
+	kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
+
+	return emulated;
+}
+
+/* XXX Should probably auto-generate instruction decoding for a particular core
+ * from opcode tables in the future. */
+int kvmppc_emulate_instruction(struct kvm_vcpu *vcpu)
+{
+	u32 inst;
+	ppc_inst_t pinst;
+	int rs, rt, sprn;
+	enum emulation_result emulated;
+	int advance = 1;
+
+	/* this default type might be overwritten by subcategories */
+	kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
+
+	emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &pinst);
+	inst = ppc_inst_val(pinst);
+	if (emulated != EMULATE_DONE)
+		return emulated;
+
+	pr_debug("Emulating opcode %d / %d\n", get_op(inst), get_xop(inst));
+
+	rs = get_rs(inst);
+	rt = get_rt(inst);
+	sprn = get_sprn(inst);
+
+	switch (get_op(inst)) {
+	case OP_TRAP:
+#ifdef CONFIG_PPC_BOOK3S
+	case OP_TRAP_64:
+		kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
+#else
+		kvmppc_core_queue_program(vcpu,
+					  vcpu->arch.shared->esr | ESR_PTR);
+#endif
+		advance = 0;
+		break;
+
+	case 31:
+		switch (get_xop(inst)) {
+
+		case OP_31_XOP_TRAP:
+#ifdef CONFIG_64BIT
+		case OP_31_XOP_TRAP_64:
+#endif
+#ifdef CONFIG_PPC_BOOK3S
+			kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
+#else
+			kvmppc_core_queue_program(vcpu,
+					vcpu->arch.shared->esr | ESR_PTR);
+#endif
+			advance = 0;
+			break;
+
+		case OP_31_XOP_MFSPR:
+			emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt);
+			if (emulated == EMULATE_AGAIN) {
+				emulated = EMULATE_DONE;
+				advance = 0;
+			}
+			break;
+
+		case OP_31_XOP_MTSPR:
+			emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs);
+			if (emulated == EMULATE_AGAIN) {
+				emulated = EMULATE_DONE;
+				advance = 0;
+			}
+			break;
+
+		case OP_31_XOP_TLBSYNC:
+			break;
+
+		default:
+			/* Attempt core-specific emulation below. */
+			emulated = EMULATE_FAIL;
+		}
+		break;
+
+	case 0:
+		/*
+		 * Instruction with primary opcode 0. Based on PowerISA
+		 * these are illegal instructions.
+		 */
+		if (inst == KVMPPC_INST_SW_BREAKPOINT) {
+			vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+			vcpu->run->debug.arch.status = 0;
+			vcpu->run->debug.arch.address = kvmppc_get_pc(vcpu);
+			emulated = EMULATE_EXIT_USER;
+			advance = 0;
+		} else
+			emulated = EMULATE_FAIL;
+
+		break;
+
+	default:
+		emulated = EMULATE_FAIL;
+	}
+
+	if (emulated == EMULATE_FAIL) {
+		emulated = vcpu->kvm->arch.kvm_ops->emulate_op(vcpu, inst,
+							       &advance);
+		if (emulated == EMULATE_AGAIN) {
+			advance = 0;
+		} else if (emulated == EMULATE_FAIL) {
+			advance = 0;
+			printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
+			       "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
+		}
+	}
+
+	trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated);
+
+	/* Advance past emulated instruction. */
+	/*
+	 * If this ever handles prefixed instructions, the 4
+	 * will need to become ppc_inst_len(pinst) instead.
+	 */
+	if (advance)
+		kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
+
+	return emulated;
+}
+EXPORT_SYMBOL_GPL(kvmppc_emulate_instruction);
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
new file mode 100644
index 0000000000..059c08ae03
--- /dev/null
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -0,0 +1,366 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright IBM Corp. 2007
+ * Copyright 2011 Freescale Semiconductor, Inc.
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/jiffies.h>
+#include <linux/hrtimer.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm_host.h>
+#include <linux/clockchips.h>
+
+#include <asm/reg.h>
+#include <asm/time.h>
+#include <asm/byteorder.h>
+#include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include <asm/ppc-opcode.h>
+#include <asm/sstep.h>
+#include "timing.h"
+#include "trace.h"
+
+#ifdef CONFIG_PPC_FPU
+static bool kvmppc_check_fp_disabled(struct kvm_vcpu *vcpu)
+{
+	if (!(kvmppc_get_msr(vcpu) & MSR_FP)) {
+		kvmppc_core_queue_fpunavail(vcpu, kvmppc_get_msr(vcpu) & SRR1_PREFIXED);
+		return true;
+	}
+
+	return false;
+}
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef CONFIG_VSX
+static bool kvmppc_check_vsx_disabled(struct kvm_vcpu *vcpu)
+{
+	if (!(kvmppc_get_msr(vcpu) & MSR_VSX)) {
+		kvmppc_core_queue_vsx_unavail(vcpu, kvmppc_get_msr(vcpu) & SRR1_PREFIXED);
+		return true;
+	}
+
+	return false;
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_ALTIVEC
+static bool kvmppc_check_altivec_disabled(struct kvm_vcpu *vcpu)
+{
+	if (!(kvmppc_get_msr(vcpu) & MSR_VEC)) {
+		kvmppc_core_queue_vec_unavail(vcpu, kvmppc_get_msr(vcpu) & SRR1_PREFIXED);
+		return true;
+	}
+
+	return false;
+}
+#endif /* CONFIG_ALTIVEC */
+
+/*
+ * XXX to do:
+ * lfiwax, lfiwzx
+ * vector loads and stores
+ *
+ * Instructions that trap when used on cache-inhibited mappings
+ * are not emulated here: multiple and string instructions,
+ * lq/stq, and the load-reserve/store-conditional instructions.
+ */
+int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
+{
+	ppc_inst_t inst;
+	enum emulation_result emulated = EMULATE_FAIL;
+	struct instruction_op op;
+
+	/* this default type might be overwritten by subcategories */
+	kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
+
+	emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst);
+	if (emulated != EMULATE_DONE)
+		return emulated;
+
+	vcpu->arch.mmio_vsx_copy_nums = 0;
+	vcpu->arch.mmio_vsx_offset = 0;
+	vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE;
+	vcpu->arch.mmio_sp64_extend = 0;
+	vcpu->arch.mmio_sign_extend = 0;
+	vcpu->arch.mmio_vmx_copy_nums = 0;
+	vcpu->arch.mmio_vmx_offset = 0;
+	vcpu->arch.mmio_host_swabbed = 0;
+
+	emulated = EMULATE_FAIL;
+	vcpu->arch.regs.msr = vcpu->arch.shared->msr;
+	if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) {
+		int type = op.type & INSTR_TYPE_MASK;
+		int size = GETSIZE(op.type);
+
+		vcpu->mmio_is_write = OP_IS_STORE(type);
+
+		switch (type) {
+		case LOAD:  {
+			int instr_byte_swap = op.type & BYTEREV;
+
+			if (op.type & SIGNEXT)
+				emulated = kvmppc_handle_loads(vcpu,
+						op.reg, size, !instr_byte_swap);
+			else
+				emulated = kvmppc_handle_load(vcpu,
+						op.reg, size, !instr_byte_swap);
+
+			if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
+				kvmppc_set_gpr(vcpu, op.update_reg, op.ea);
+
+			break;
+		}
+#ifdef CONFIG_PPC_FPU
+		case LOAD_FP:
+			if (kvmppc_check_fp_disabled(vcpu))
+				return EMULATE_DONE;
+
+			if (op.type & FPCONV)
+				vcpu->arch.mmio_sp64_extend = 1;
+
+			if (op.type & SIGNEXT)
+				emulated = kvmppc_handle_loads(vcpu,
+					     KVM_MMIO_REG_FPR|op.reg, size, 1);
+			else
+				emulated = kvmppc_handle_load(vcpu,
+					     KVM_MMIO_REG_FPR|op.reg, size, 1);
+
+			if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
+				kvmppc_set_gpr(vcpu, op.update_reg, op.ea);
+
+			break;
+#endif
+#ifdef CONFIG_ALTIVEC
+		case LOAD_VMX:
+			if (kvmppc_check_altivec_disabled(vcpu))
+				return EMULATE_DONE;
+
+			/* Hardware enforces alignment of VMX accesses */
+			vcpu->arch.vaddr_accessed &= ~((unsigned long)size - 1);
+			vcpu->arch.paddr_accessed &= ~((unsigned long)size - 1);
+
+			if (size == 16) { /* lvx */
+				vcpu->arch.mmio_copy_type =
+						KVMPPC_VMX_COPY_DWORD;
+			} else if (size == 4) { /* lvewx  */
+				vcpu->arch.mmio_copy_type =
+						KVMPPC_VMX_COPY_WORD;
+			} else if (size == 2) { /* lvehx  */
+				vcpu->arch.mmio_copy_type =
+						KVMPPC_VMX_COPY_HWORD;
+			} else if (size == 1) { /* lvebx  */
+				vcpu->arch.mmio_copy_type =
+						KVMPPC_VMX_COPY_BYTE;
+			} else
+				break;
+
+			vcpu->arch.mmio_vmx_offset =
+				(vcpu->arch.vaddr_accessed & 0xf)/size;
+
+			if (size == 16) {
+				vcpu->arch.mmio_vmx_copy_nums = 2;
+				emulated = kvmppc_handle_vmx_load(vcpu,
+						KVM_MMIO_REG_VMX|op.reg,
+						8, 1);
+			} else {
+				vcpu->arch.mmio_vmx_copy_nums = 1;
+				emulated = kvmppc_handle_vmx_load(vcpu,
+						KVM_MMIO_REG_VMX|op.reg,
+						size, 1);
+			}
+			break;
+#endif
+#ifdef CONFIG_VSX
+		case LOAD_VSX: {
+			int io_size_each;
+
+			if (op.vsx_flags & VSX_CHECK_VEC) {
+				if (kvmppc_check_altivec_disabled(vcpu))
+					return EMULATE_DONE;
+			} else {
+				if (kvmppc_check_vsx_disabled(vcpu))
+					return EMULATE_DONE;
+			}
+
+			if (op.vsx_flags & VSX_FPCONV)
+				vcpu->arch.mmio_sp64_extend = 1;
+
+			if (op.element_size == 8)  {
+				if (op.vsx_flags & VSX_SPLAT)
+					vcpu->arch.mmio_copy_type =
+						KVMPPC_VSX_COPY_DWORD_LOAD_DUMP;
+				else
+					vcpu->arch.mmio_copy_type =
+						KVMPPC_VSX_COPY_DWORD;
+			} else if (op.element_size == 4) {
+				if (op.vsx_flags & VSX_SPLAT)
+					vcpu->arch.mmio_copy_type =
+						KVMPPC_VSX_COPY_WORD_LOAD_DUMP;
+				else
+					vcpu->arch.mmio_copy_type =
+						KVMPPC_VSX_COPY_WORD;
+			} else
+				break;
+
+			if (size < op.element_size) {
+				/* precision convert case: lxsspx, etc */
+				vcpu->arch.mmio_vsx_copy_nums = 1;
+				io_size_each = size;
+			} else { /* lxvw4x, lxvd2x, etc */
+				vcpu->arch.mmio_vsx_copy_nums =
+					size/op.element_size;
+				io_size_each = op.element_size;
+			}
+
+			emulated = kvmppc_handle_vsx_load(vcpu,
+					KVM_MMIO_REG_VSX|op.reg, io_size_each,
+					1, op.type & SIGNEXT);
+			break;
+		}
+#endif
+		case STORE:
+			/* if need byte reverse, op.val has been reversed by
+			 * analyse_instr().
+			 */
+			emulated = kvmppc_handle_store(vcpu, op.val, size, 1);
+
+			if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
+				kvmppc_set_gpr(vcpu, op.update_reg, op.ea);
+
+			break;
+#ifdef CONFIG_PPC_FPU
+		case STORE_FP:
+			if (kvmppc_check_fp_disabled(vcpu))
+				return EMULATE_DONE;
+
+			/* The FP registers need to be flushed so that
+			 * kvmppc_handle_store() can read actual FP vals
+			 * from vcpu->arch.
+			 */
+			if (vcpu->kvm->arch.kvm_ops->giveup_ext)
+				vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu,
+						MSR_FP);
+
+			if (op.type & FPCONV)
+				vcpu->arch.mmio_sp64_extend = 1;
+
+			emulated = kvmppc_handle_store(vcpu,
+					VCPU_FPR(vcpu, op.reg), size, 1);
+
+			if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
+				kvmppc_set_gpr(vcpu, op.update_reg, op.ea);
+
+			break;
+#endif
+#ifdef CONFIG_ALTIVEC
+		case STORE_VMX:
+			if (kvmppc_check_altivec_disabled(vcpu))
+				return EMULATE_DONE;
+
+			/* Hardware enforces alignment of VMX accesses. */
+			vcpu->arch.vaddr_accessed &= ~((unsigned long)size - 1);
+			vcpu->arch.paddr_accessed &= ~((unsigned long)size - 1);
+
+			if (vcpu->kvm->arch.kvm_ops->giveup_ext)
+				vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu,
+						MSR_VEC);
+			if (size == 16) { /* stvx */
+				vcpu->arch.mmio_copy_type =
+						KVMPPC_VMX_COPY_DWORD;
+			} else if (size == 4) { /* stvewx  */
+				vcpu->arch.mmio_copy_type =
+						KVMPPC_VMX_COPY_WORD;
+			} else if (size == 2) { /* stvehx  */
+				vcpu->arch.mmio_copy_type =
+						KVMPPC_VMX_COPY_HWORD;
+			} else if (size == 1) { /* stvebx  */
+				vcpu->arch.mmio_copy_type =
+						KVMPPC_VMX_COPY_BYTE;
+			} else
+				break;
+
+			vcpu->arch.mmio_vmx_offset =
+				(vcpu->arch.vaddr_accessed & 0xf)/size;
+
+			if (size == 16) {
+				vcpu->arch.mmio_vmx_copy_nums = 2;
+				emulated = kvmppc_handle_vmx_store(vcpu,
+						op.reg, 8, 1);
+			} else {
+				vcpu->arch.mmio_vmx_copy_nums = 1;
+				emulated = kvmppc_handle_vmx_store(vcpu,
+						op.reg, size, 1);
+			}
+
+			break;
+#endif
+#ifdef CONFIG_VSX
+		case STORE_VSX: {
+			int io_size_each;
+
+			if (op.vsx_flags & VSX_CHECK_VEC) {
+				if (kvmppc_check_altivec_disabled(vcpu))
+					return EMULATE_DONE;
+			} else {
+				if (kvmppc_check_vsx_disabled(vcpu))
+					return EMULATE_DONE;
+			}
+
+			if (vcpu->kvm->arch.kvm_ops->giveup_ext)
+				vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu,
+						MSR_VSX);
+
+			if (op.vsx_flags & VSX_FPCONV)
+				vcpu->arch.mmio_sp64_extend = 1;
+
+			if (op.element_size == 8)
+				vcpu->arch.mmio_copy_type =
+						KVMPPC_VSX_COPY_DWORD;
+			else if (op.element_size == 4)
+				vcpu->arch.mmio_copy_type =
+						KVMPPC_VSX_COPY_WORD;
+			else
+				break;
+
+			if (size < op.element_size) {
+				/* precise conversion case, like stxsspx */
+				vcpu->arch.mmio_vsx_copy_nums = 1;
+				io_size_each = size;
+			} else { /* stxvw4x, stxvd2x, etc */
+				vcpu->arch.mmio_vsx_copy_nums =
+						size/op.element_size;
+				io_size_each = op.element_size;
+			}
+
+			emulated = kvmppc_handle_vsx_store(vcpu,
+					op.reg, io_size_each, 1);
+			break;
+		}
+#endif
+		case CACHEOP:
+			/* Do nothing. The guest is performing dcbi because
+			 * hardware DMA is not snooped by the dcache, but
+			 * emulated DMA either goes through the dcache as
+			 * normal writes, or the host kernel has handled dcache
+			 * coherence.
+			 */
+			emulated = EMULATE_DONE;
+			break;
+		default:
+			break;
+		}
+	}
+
+	trace_kvm_ppc_instr(ppc_inst_val(inst), kvmppc_get_pc(vcpu), emulated);
+
+	/* Advance past emulated instruction. */
+	if (emulated != EMULATE_FAIL)
+		kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + ppc_inst_len(inst));
+
+	return emulated;
+}
diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S
new file mode 100644
index 0000000000..b68e7f26a8
--- /dev/null
+++ b/arch/powerpc/kvm/fpu.S
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  FPU helper code to use FPU operations from inside the kernel
+ *
+ *    Copyright (C) 2010 Alexander Graf (agraf@suse.de)
+ */
+
+#include <linux/pgtable.h>
+#include <linux/linkage.h>
+
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/cputable.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+/* Instructions operating on single parameters */
+
+/*
+ * Single operation with one input operand
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (short*)&result
+ * R5 = (short*)&param1
+ */
+#define FPS_ONE_IN(name) 					\
+_GLOBAL(fps_ ## name);							\
+	lfd	0,0(r3);		/* load up fpscr value */	\
+	MTFSF_L(0);							\
+	lfs	0,0(r5);						\
+									\
+	name	0,0;							\
+									\
+	stfs	0,0(r4);						\
+	mffs	0;							\
+	stfd	0,0(r3);	/* save new fpscr value */	\
+	blr
+
+/*
+ * Single operation with two input operands
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (short*)&result
+ * R5 = (short*)&param1
+ * R6 = (short*)&param2
+ */
+#define FPS_TWO_IN(name) 					\
+_GLOBAL(fps_ ## name);							\
+	lfd	0,0(r3);		/* load up fpscr value */	\
+	MTFSF_L(0);							\
+	lfs	0,0(r5);						\
+	lfs	1,0(r6);						\
+									\
+	name	0,0,1;							\
+									\
+	stfs	0,0(r4);						\
+	mffs	0;							\
+	stfd	0,0(r3);		/* save new fpscr value */	\
+	blr
+
+/*
+ * Single operation with three input operands
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (short*)&result
+ * R5 = (short*)&param1
+ * R6 = (short*)&param2
+ * R7 = (short*)&param3
+ */
+#define FPS_THREE_IN(name) 					\
+_GLOBAL(fps_ ## name);							\
+	lfd	0,0(r3);		/* load up fpscr value */	\
+	MTFSF_L(0);							\
+	lfs	0,0(r5);						\
+	lfs	1,0(r6);						\
+	lfs	2,0(r7);						\
+									\
+	name	0,0,1,2;						\
+									\
+	stfs	0,0(r4);						\
+	mffs	0;							\
+	stfd	0,0(r3);		/* save new fpscr value */	\
+	blr
+
+FPS_ONE_IN(fres)
+FPS_ONE_IN(frsqrte)
+FPS_ONE_IN(fsqrts)
+FPS_TWO_IN(fadds)
+FPS_TWO_IN(fdivs)
+FPS_TWO_IN(fmuls)
+FPS_TWO_IN(fsubs)
+FPS_THREE_IN(fmadds)
+FPS_THREE_IN(fmsubs)
+FPS_THREE_IN(fnmadds)
+FPS_THREE_IN(fnmsubs)
+FPS_THREE_IN(fsel)
+
+
+/* Instructions operating on double parameters */
+
+/*
+ * Beginning of double instruction processing
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (u32*)&cr
+ * R5 = (double*)&result
+ * R6 = (double*)&param1
+ * R7 = (double*)&param2 [load_two]
+ * R8 = (double*)&param3 [load_three]
+ * LR = instruction call function
+ */
+SYM_FUNC_START_LOCAL(fpd_load_three)
+	lfd	2,0(r8)			/* load param3 */
+SYM_FUNC_START_LOCAL(fpd_load_two)
+	lfd	1,0(r7)			/* load param2 */
+SYM_FUNC_START_LOCAL(fpd_load_one)
+	lfd	0,0(r6)			/* load param1 */
+SYM_FUNC_START_LOCAL(fpd_load_none)
+	lfd	3,0(r3)			/* load up fpscr value */
+	MTFSF_L(3)
+	lwz	r6, 0(r4)		/* load cr */
+	mtcr	r6
+	blr
+SYM_FUNC_END(fpd_load_none)
+SYM_FUNC_END(fpd_load_one)
+SYM_FUNC_END(fpd_load_two)
+SYM_FUNC_END(fpd_load_three)
+
+/*
+ * End of double instruction processing
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (u32*)&cr
+ * R5 = (double*)&result
+ * LR = caller of instruction call function
+ */
+SYM_FUNC_START_LOCAL(fpd_return)
+	mfcr	r6
+	stfd	0,0(r5)			/* save result */
+	mffs	0
+	stfd	0,0(r3)			/* save new fpscr value */
+	stw	r6,0(r4)		/* save new cr value */
+	blr
+SYM_FUNC_END(fpd_return)
+
+/*
+ * Double operation with no input operand
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (u32*)&cr
+ * R5 = (double*)&result
+ */
+#define FPD_NONE_IN(name) 						\
+_GLOBAL(fpd_ ## name);							\
+	mflr	r12;							\
+	bl	fpd_load_none;						\
+	mtlr	r12;							\
+									\
+	name.	0;			/* call instruction */		\
+	b	fpd_return
+
+/*
+ * Double operation with one input operand
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (u32*)&cr
+ * R5 = (double*)&result
+ * R6 = (double*)&param1
+ */
+#define FPD_ONE_IN(name) 						\
+_GLOBAL(fpd_ ## name);							\
+	mflr	r12;							\
+	bl	fpd_load_one;						\
+	mtlr	r12;							\
+									\
+	name.	0,0;			/* call instruction */		\
+	b	fpd_return
+
+/*
+ * Double operation with two input operands
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (u32*)&cr
+ * R5 = (double*)&result
+ * R6 = (double*)&param1
+ * R7 = (double*)&param2
+ * R8 = (double*)&param3
+ */
+#define FPD_TWO_IN(name) 						\
+_GLOBAL(fpd_ ## name);							\
+	mflr	r12;							\
+	bl	fpd_load_two;						\
+	mtlr	r12;							\
+									\
+	name.	0,0,1;			/* call instruction */		\
+	b	fpd_return
+
+/*
+ * CR Double operation with two input operands
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (u32*)&cr
+ * R5 = (double*)&param1
+ * R6 = (double*)&param2
+ * R7 = (double*)&param3
+ */
+#define FPD_TWO_IN_CR(name)						\
+_GLOBAL(fpd_ ## name);							\
+	lfd	1,0(r6);		/* load param2 */		\
+	lfd	0,0(r5);		/* load param1 */		\
+	lfd	3,0(r3);		/* load up fpscr value */	\
+	MTFSF_L(3);							\
+	lwz	r6, 0(r4);		/* load cr */			\
+	mtcr	r6;							\
+									\
+	name	0,0,1;			/* call instruction */		\
+	mfcr	r6;							\
+	mffs	0;							\
+	stfd	0,0(r3);		/* save new fpscr value */	\
+	stw	r6,0(r4);		/* save new cr value */		\
+	blr
+
+/*
+ * Double operation with three input operands
+ *
+ * R3 = (double*)&fpscr
+ * R4 = (u32*)&cr
+ * R5 = (double*)&result
+ * R6 = (double*)&param1
+ * R7 = (double*)&param2
+ * R8 = (double*)&param3
+ */
+#define FPD_THREE_IN(name) 						\
+_GLOBAL(fpd_ ## name);							\
+	mflr	r12;							\
+	bl	fpd_load_three;						\
+	mtlr	r12;							\
+									\
+	name.	0,0,1,2;		/* call instruction */		\
+	b	fpd_return
+
+FPD_ONE_IN(fsqrts)
+FPD_ONE_IN(frsqrtes)
+FPD_ONE_IN(fres)
+FPD_ONE_IN(frsp)
+FPD_ONE_IN(fctiw)
+FPD_ONE_IN(fctiwz)
+FPD_ONE_IN(fsqrt)
+FPD_ONE_IN(fre)
+FPD_ONE_IN(frsqrte)
+FPD_ONE_IN(fneg)
+FPD_ONE_IN(fabs)
+FPD_TWO_IN(fadds)
+FPD_TWO_IN(fsubs)
+FPD_TWO_IN(fdivs)
+FPD_TWO_IN(fmuls)
+FPD_TWO_IN_CR(fcmpu)
+FPD_TWO_IN(fcpsgn)
+FPD_TWO_IN(fdiv)
+FPD_TWO_IN(fadd)
+FPD_TWO_IN(fmul)
+FPD_TWO_IN_CR(fcmpo)
+FPD_TWO_IN(fsub)
+FPD_THREE_IN(fmsubs)
+FPD_THREE_IN(fmadds)
+FPD_THREE_IN(fnmsubs)
+FPD_THREE_IN(fnmadds)
+FPD_THREE_IN(fsel)
+FPD_THREE_IN(fmsub)
+FPD_THREE_IN(fmadd)
+FPD_THREE_IN(fnmsub)
+FPD_THREE_IN(fnmadd)
+
+_GLOBAL(kvm_cvt_fd)
+	lfs	0,0(r3)
+	stfd	0,0(r4)
+	blr
+
+_GLOBAL(kvm_cvt_df)
+	lfd	0,0(r3)
+	stfs	0,0(r4)
+	blr
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
new file mode 100644
index 0000000000..23e9c2bd9f
--- /dev/null
+++ b/arch/powerpc/kvm/mpic.c
@@ -0,0 +1,1852 @@
+/*
+ * OpenPIC emulation
+ *
+ * Copyright (c) 2004 Jocelyn Mayer
+ *               2011 Alexander Graf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/anon_inodes.h>
+#include <linux/uaccess.h>
+#include <asm/mpic.h>
+#include <asm/kvm_para.h>
+#include <asm/kvm_ppc.h>
+#include <kvm/iodev.h>
+
+#define MAX_CPU     32
+#define MAX_SRC     256
+#define MAX_TMR     4
+#define MAX_IPI     4
+#define MAX_MSI     8
+#define MAX_IRQ     (MAX_SRC + MAX_IPI + MAX_TMR)
+#define VID         0x03	/* MPIC version ID */
+
+/* OpenPIC capability flags */
+#define OPENPIC_FLAG_IDR_CRIT     (1 << 0)
+#define OPENPIC_FLAG_ILR          (2 << 0)
+
+/* OpenPIC address map */
+#define OPENPIC_REG_SIZE             0x40000
+#define OPENPIC_GLB_REG_START        0x0
+#define OPENPIC_GLB_REG_SIZE         0x10F0
+#define OPENPIC_TMR_REG_START        0x10F0
+#define OPENPIC_TMR_REG_SIZE         0x220
+#define OPENPIC_MSI_REG_START        0x1600
+#define OPENPIC_MSI_REG_SIZE         0x200
+#define OPENPIC_SUMMARY_REG_START    0x3800
+#define OPENPIC_SUMMARY_REG_SIZE     0x800
+#define OPENPIC_SRC_REG_START        0x10000
+#define OPENPIC_SRC_REG_SIZE         (MAX_SRC * 0x20)
+#define OPENPIC_CPU_REG_START        0x20000
+#define OPENPIC_CPU_REG_SIZE         (0x100 + ((MAX_CPU - 1) * 0x1000))
+
+struct fsl_mpic_info {
+	int max_ext;
+};
+
+static struct fsl_mpic_info fsl_mpic_20 = {
+	.max_ext = 12,
+};
+
+static struct fsl_mpic_info fsl_mpic_42 = {
+	.max_ext = 12,
+};
+
+#define FRR_NIRQ_SHIFT    16
+#define FRR_NCPU_SHIFT     8
+#define FRR_VID_SHIFT      0
+
+#define VID_REVISION_1_2   2
+#define VID_REVISION_1_3   3
+
+#define VIR_GENERIC      0x00000000	/* Generic Vendor ID */
+
+#define GCR_RESET        0x80000000
+#define GCR_MODE_PASS    0x00000000
+#define GCR_MODE_MIXED   0x20000000
+#define GCR_MODE_PROXY   0x60000000
+
+#define TBCR_CI           0x80000000	/* count inhibit */
+#define TCCR_TOG          0x80000000	/* toggles when decrement to zero */
+
+#define IDR_EP_SHIFT      31
+#define IDR_EP_MASK       (1 << IDR_EP_SHIFT)
+#define IDR_CI0_SHIFT     30
+#define IDR_CI1_SHIFT     29
+#define IDR_P1_SHIFT      1
+#define IDR_P0_SHIFT      0
+
+#define ILR_INTTGT_MASK   0x000000ff
+#define ILR_INTTGT_INT    0x00
+#define ILR_INTTGT_CINT   0x01	/* critical */
+#define ILR_INTTGT_MCP    0x02	/* machine check */
+#define NUM_OUTPUTS       3
+
+#define MSIIR_OFFSET       0x140
+#define MSIIR_SRS_SHIFT    29
+#define MSIIR_SRS_MASK     (0x7 << MSIIR_SRS_SHIFT)
+#define MSIIR_IBS_SHIFT    24
+#define MSIIR_IBS_MASK     (0x1f << MSIIR_IBS_SHIFT)
+
+static int get_current_cpu(void)
+{
+#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
+	struct kvm_vcpu *vcpu = current->thread.kvm_vcpu;
+	return vcpu ? vcpu->arch.irq_cpu_id : -1;
+#else
+	/* XXX */
+	return -1;
+#endif
+}
+
+static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
+				      u32 val, int idx);
+static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
+				     u32 *ptr, int idx);
+static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
+				    uint32_t val);
+
+enum irq_type {
+	IRQ_TYPE_NORMAL = 0,
+	IRQ_TYPE_FSLINT,	/* FSL internal interrupt -- level only */
+	IRQ_TYPE_FSLSPECIAL,	/* FSL timer/IPI interrupt, edge, no polarity */
+};
+
+struct irq_queue {
+	/* Round up to the nearest 64 IRQs so that the queue length
+	 * won't change when moving between 32 and 64 bit hosts.
+	 */
+	unsigned long queue[BITS_TO_LONGS((MAX_IRQ + 63) & ~63)];
+	int next;
+	int priority;
+};
+
+struct irq_source {
+	uint32_t ivpr;		/* IRQ vector/priority register */
+	uint32_t idr;		/* IRQ destination register */
+	uint32_t destmask;	/* bitmap of CPU destinations */
+	int last_cpu;
+	int output;		/* IRQ level, e.g. ILR_INTTGT_INT */
+	int pending;		/* TRUE if IRQ is pending */
+	enum irq_type type;
+	bool level:1;		/* level-triggered */
+	bool nomask:1;	/* critical interrupts ignore mask on some FSL MPICs */
+};
+
+#define IVPR_MASK_SHIFT       31
+#define IVPR_MASK_MASK        (1 << IVPR_MASK_SHIFT)
+#define IVPR_ACTIVITY_SHIFT   30
+#define IVPR_ACTIVITY_MASK    (1 << IVPR_ACTIVITY_SHIFT)
+#define IVPR_MODE_SHIFT       29
+#define IVPR_MODE_MASK        (1 << IVPR_MODE_SHIFT)
+#define IVPR_POLARITY_SHIFT   23
+#define IVPR_POLARITY_MASK    (1 << IVPR_POLARITY_SHIFT)
+#define IVPR_SENSE_SHIFT      22
+#define IVPR_SENSE_MASK       (1 << IVPR_SENSE_SHIFT)
+
+#define IVPR_PRIORITY_MASK     (0xF << 16)
+#define IVPR_PRIORITY(_ivprr_) ((int)(((_ivprr_) & IVPR_PRIORITY_MASK) >> 16))
+#define IVPR_VECTOR(opp, _ivprr_) ((_ivprr_) & (opp)->vector_mask)
+
+/* IDR[EP/CI] are only for FSL MPIC prior to v4.0 */
+#define IDR_EP      0x80000000	/* external pin */
+#define IDR_CI      0x40000000	/* critical interrupt */
+
+struct irq_dest {
+	struct kvm_vcpu *vcpu;
+
+	int32_t ctpr;		/* CPU current task priority */
+	struct irq_queue raised;
+	struct irq_queue servicing;
+
+	/* Count of IRQ sources asserting on non-INT outputs */
+	uint32_t outputs_active[NUM_OUTPUTS];
+};
+
+#define MAX_MMIO_REGIONS 10
+
+struct openpic {
+	struct kvm *kvm;
+	struct kvm_device *dev;
+	struct kvm_io_device mmio;
+	const struct mem_reg *mmio_regions[MAX_MMIO_REGIONS];
+	int num_mmio_regions;
+
+	gpa_t reg_base;
+	spinlock_t lock;
+
+	/* Behavior control */
+	struct fsl_mpic_info *fsl;
+	uint32_t model;
+	uint32_t flags;
+	uint32_t nb_irqs;
+	uint32_t vid;
+	uint32_t vir;		/* Vendor identification register */
+	uint32_t vector_mask;
+	uint32_t tfrr_reset;
+	uint32_t ivpr_reset;
+	uint32_t idr_reset;
+	uint32_t brr1;
+	uint32_t mpic_mode_mask;
+
+	/* Global registers */
+	uint32_t frr;		/* Feature reporting register */
+	uint32_t gcr;		/* Global configuration register  */
+	uint32_t pir;		/* Processor initialization register */
+	uint32_t spve;		/* Spurious vector register */
+	uint32_t tfrr;		/* Timer frequency reporting register */
+	/* Source registers */
+	struct irq_source src[MAX_IRQ];
+	/* Local registers per output pin */
+	struct irq_dest dst[MAX_CPU];
+	uint32_t nb_cpus;
+	/* Timer registers */
+	struct {
+		uint32_t tccr;	/* Global timer current count register */
+		uint32_t tbcr;	/* Global timer base count register */
+	} timers[MAX_TMR];
+	/* Shared MSI registers */
+	struct {
+		uint32_t msir;	/* Shared Message Signaled Interrupt Register */
+	} msi[MAX_MSI];
+	uint32_t max_irq;
+	uint32_t irq_ipi0;
+	uint32_t irq_tim0;
+	uint32_t irq_msi;
+};
+
+
+static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst,
+			   int output)
+{
+	struct kvm_interrupt irq = {
+		.irq = KVM_INTERRUPT_SET_LEVEL,
+	};
+
+	if (!dst->vcpu) {
+		pr_debug("%s: destination cpu %d does not exist\n",
+			 __func__, (int)(dst - &opp->dst[0]));
+		return;
+	}
+
+	pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
+		output);
+
+	if (output != ILR_INTTGT_INT)	/* TODO */
+		return;
+
+	kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq);
+}
+
+static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst,
+			   int output)
+{
+	if (!dst->vcpu) {
+		pr_debug("%s: destination cpu %d does not exist\n",
+			 __func__, (int)(dst - &opp->dst[0]));
+		return;
+	}
+
+	pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
+		output);
+
+	if (output != ILR_INTTGT_INT)	/* TODO */
+		return;
+
+	kvmppc_core_dequeue_external(dst->vcpu);
+}
+
+static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ)
+{
+	set_bit(n_IRQ, q->queue);
+}
+
+static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ)
+{
+	clear_bit(n_IRQ, q->queue);
+}
+
+static void IRQ_check(struct openpic *opp, struct irq_queue *q)
+{
+	int irq = -1;
+	int next = -1;
+	int priority = -1;
+
+	for (;;) {
+		irq = find_next_bit(q->queue, opp->max_irq, irq + 1);
+		if (irq == opp->max_irq)
+			break;
+
+		pr_debug("IRQ_check: irq %d set ivpr_pr=%d pr=%d\n",
+			irq, IVPR_PRIORITY(opp->src[irq].ivpr), priority);
+
+		if (IVPR_PRIORITY(opp->src[irq].ivpr) > priority) {
+			next = irq;
+			priority = IVPR_PRIORITY(opp->src[irq].ivpr);
+		}
+	}
+
+	q->next = next;
+	q->priority = priority;
+}
+
+static int IRQ_get_next(struct openpic *opp, struct irq_queue *q)
+{
+	/* XXX: optimize */
+	IRQ_check(opp, q);
+
+	return q->next;
+}
+
+static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ,
+			   bool active, bool was_active)
+{
+	struct irq_dest *dst;
+	struct irq_source *src;
+	int priority;
+
+	dst = &opp->dst[n_CPU];
+	src = &opp->src[n_IRQ];
+
+	pr_debug("%s: IRQ %d active %d was %d\n",
+		__func__, n_IRQ, active, was_active);
+
+	if (src->output != ILR_INTTGT_INT) {
+		pr_debug("%s: output %d irq %d active %d was %d count %d\n",
+			__func__, src->output, n_IRQ, active, was_active,
+			dst->outputs_active[src->output]);
+
+		/* On Freescale MPIC, critical interrupts ignore priority,
+		 * IACK, EOI, etc.  Before MPIC v4.1 they also ignore
+		 * masking.
+		 */
+		if (active) {
+			if (!was_active &&
+			    dst->outputs_active[src->output]++ == 0) {
+				pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n",
+					__func__, src->output, n_CPU, n_IRQ);
+				mpic_irq_raise(opp, dst, src->output);
+			}
+		} else {
+			if (was_active &&
+			    --dst->outputs_active[src->output] == 0) {
+				pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n",
+					__func__, src->output, n_CPU, n_IRQ);
+				mpic_irq_lower(opp, dst, src->output);
+			}
+		}
+
+		return;
+	}
+
+	priority = IVPR_PRIORITY(src->ivpr);
+
+	/* Even if the interrupt doesn't have enough priority,
+	 * it is still raised, in case ctpr is lowered later.
+	 */
+	if (active)
+		IRQ_setbit(&dst->raised, n_IRQ);
+	else
+		IRQ_resetbit(&dst->raised, n_IRQ);
+
+	IRQ_check(opp, &dst->raised);
+
+	if (active && priority <= dst->ctpr) {
+		pr_debug("%s: IRQ %d priority %d too low for ctpr %d on CPU %d\n",
+			__func__, n_IRQ, priority, dst->ctpr, n_CPU);
+		active = 0;
+	}
+
+	if (active) {
+		if (IRQ_get_next(opp, &dst->servicing) >= 0 &&
+		    priority <= dst->servicing.priority) {
+			pr_debug("%s: IRQ %d is hidden by servicing IRQ %d on CPU %d\n",
+				__func__, n_IRQ, dst->servicing.next, n_CPU);
+		} else {
+			pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n",
+				__func__, n_CPU, n_IRQ, dst->raised.next);
+			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
+		}
+	} else {
+		IRQ_get_next(opp, &dst->servicing);
+		if (dst->raised.priority > dst->ctpr &&
+		    dst->raised.priority > dst->servicing.priority) {
+			pr_debug("%s: IRQ %d inactive, IRQ %d prio %d above %d/%d, CPU %d\n",
+				__func__, n_IRQ, dst->raised.next,
+				dst->raised.priority, dst->ctpr,
+				dst->servicing.priority, n_CPU);
+			/* IRQ line stays asserted */
+		} else {
+			pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n",
+				__func__, n_IRQ, dst->ctpr,
+				dst->servicing.priority, n_CPU);
+			mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
+		}
+	}
+}
+
+/* update pic state because registers for n_IRQ have changed value */
+static void openpic_update_irq(struct openpic *opp, int n_IRQ)
+{
+	struct irq_source *src;
+	bool active, was_active;
+	int i;
+
+	src = &opp->src[n_IRQ];
+	active = src->pending;
+
+	if ((src->ivpr & IVPR_MASK_MASK) && !src->nomask) {
+		/* Interrupt source is disabled */
+		pr_debug("%s: IRQ %d is disabled\n", __func__, n_IRQ);
+		active = false;
+	}
+
+	was_active = !!(src->ivpr & IVPR_ACTIVITY_MASK);
+
+	/*
+	 * We don't have a similar check for already-active because
+	 * ctpr may have changed and we need to withdraw the interrupt.
+	 */
+	if (!active && !was_active) {
+		pr_debug("%s: IRQ %d is already inactive\n", __func__, n_IRQ);
+		return;
+	}
+
+	if (active)
+		src->ivpr |= IVPR_ACTIVITY_MASK;
+	else
+		src->ivpr &= ~IVPR_ACTIVITY_MASK;
+
+	if (src->destmask == 0) {
+		/* No target */
+		pr_debug("%s: IRQ %d has no target\n", __func__, n_IRQ);
+		return;
+	}
+
+	if (src->destmask == (1 << src->last_cpu)) {
+		/* Only one CPU is allowed to receive this IRQ */
+		IRQ_local_pipe(opp, src->last_cpu, n_IRQ, active, was_active);
+	} else if (!(src->ivpr & IVPR_MODE_MASK)) {
+		/* Directed delivery mode */
+		for (i = 0; i < opp->nb_cpus; i++) {
+			if (src->destmask & (1 << i)) {
+				IRQ_local_pipe(opp, i, n_IRQ, active,
+					       was_active);
+			}
+		}
+	} else {
+		/* Distributed delivery mode */
+		for (i = src->last_cpu + 1; i != src->last_cpu; i++) {
+			if (i == opp->nb_cpus)
+				i = 0;
+
+			if (src->destmask & (1 << i)) {
+				IRQ_local_pipe(opp, i, n_IRQ, active,
+					       was_active);
+				src->last_cpu = i;
+				break;
+			}
+		}
+	}
+}
+
+static void openpic_set_irq(void *opaque, int n_IRQ, int level)
+{
+	struct openpic *opp = opaque;
+	struct irq_source *src;
+
+	if (n_IRQ >= MAX_IRQ) {
+		WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ);
+		return;
+	}
+
+	src = &opp->src[n_IRQ];
+	pr_debug("openpic: set irq %d = %d ivpr=0x%08x\n",
+		n_IRQ, level, src->ivpr);
+	if (src->level) {
+		/* level-sensitive irq */
+		src->pending = level;
+		openpic_update_irq(opp, n_IRQ);
+	} else {
+		/* edge-sensitive irq */
+		if (level) {
+			src->pending = 1;
+			openpic_update_irq(opp, n_IRQ);
+		}
+
+		if (src->output != ILR_INTTGT_INT) {
+			/* Edge-triggered interrupts shouldn't be used
+			 * with non-INT delivery, but just in case,
+			 * try to make it do something sane rather than
+			 * cause an interrupt storm.  This is close to
+			 * what you'd probably see happen in real hardware.
+			 */
+			src->pending = 0;
+			openpic_update_irq(opp, n_IRQ);
+		}
+	}
+}
+
+static void openpic_reset(struct openpic *opp)
+{
+	int i;
+
+	opp->gcr = GCR_RESET;
+	/* Initialise controller registers */
+	opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
+	    (opp->vid << FRR_VID_SHIFT);
+
+	opp->pir = 0;
+	opp->spve = -1 & opp->vector_mask;
+	opp->tfrr = opp->tfrr_reset;
+	/* Initialise IRQ sources */
+	for (i = 0; i < opp->max_irq; i++) {
+		opp->src[i].ivpr = opp->ivpr_reset;
+
+		switch (opp->src[i].type) {
+		case IRQ_TYPE_NORMAL:
+			opp->src[i].level =
+			    !!(opp->ivpr_reset & IVPR_SENSE_MASK);
+			break;
+
+		case IRQ_TYPE_FSLINT:
+			opp->src[i].ivpr |= IVPR_POLARITY_MASK;
+			break;
+
+		case IRQ_TYPE_FSLSPECIAL:
+			break;
+		}
+
+		write_IRQreg_idr(opp, i, opp->idr_reset);
+	}
+	/* Initialise IRQ destinations */
+	for (i = 0; i < MAX_CPU; i++) {
+		opp->dst[i].ctpr = 15;
+		memset(&opp->dst[i].raised, 0, sizeof(struct irq_queue));
+		opp->dst[i].raised.next = -1;
+		memset(&opp->dst[i].servicing, 0, sizeof(struct irq_queue));
+		opp->dst[i].servicing.next = -1;
+	}
+	/* Initialise timers */
+	for (i = 0; i < MAX_TMR; i++) {
+		opp->timers[i].tccr = 0;
+		opp->timers[i].tbcr = TBCR_CI;
+	}
+	/* Go out of RESET state */
+	opp->gcr = 0;
+}
+
+static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ)
+{
+	return opp->src[n_IRQ].idr;
+}
+
+static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ)
+{
+	if (opp->flags & OPENPIC_FLAG_ILR)
+		return opp->src[n_IRQ].output;
+
+	return 0xffffffff;
+}
+
+static inline uint32_t read_IRQreg_ivpr(struct openpic *opp, int n_IRQ)
+{
+	return opp->src[n_IRQ].ivpr;
+}
+
+static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
+				    uint32_t val)
+{
+	struct irq_source *src = &opp->src[n_IRQ];
+	uint32_t normal_mask = (1UL << opp->nb_cpus) - 1;
+	uint32_t crit_mask = 0;
+	uint32_t mask = normal_mask;
+	int crit_shift = IDR_EP_SHIFT - opp->nb_cpus;
+	int i;
+
+	if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
+		crit_mask = mask << crit_shift;
+		mask |= crit_mask | IDR_EP;
+	}
+
+	src->idr = val & mask;
+	pr_debug("Set IDR %d to 0x%08x\n", n_IRQ, src->idr);
+
+	if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
+		if (src->idr & crit_mask) {
+			if (src->idr & normal_mask) {
+				pr_debug("%s: IRQ configured for multiple output types, using critical\n",
+					__func__);
+			}
+
+			src->output = ILR_INTTGT_CINT;
+			src->nomask = true;
+			src->destmask = 0;
+
+			for (i = 0; i < opp->nb_cpus; i++) {
+				int n_ci = IDR_CI0_SHIFT - i;
+
+				if (src->idr & (1UL << n_ci))
+					src->destmask |= 1UL << i;
+			}
+		} else {
+			src->output = ILR_INTTGT_INT;
+			src->nomask = false;
+			src->destmask = src->idr & normal_mask;
+		}
+	} else {
+		src->destmask = src->idr;
+	}
+}
+
+static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ,
+				    uint32_t val)
+{
+	if (opp->flags & OPENPIC_FLAG_ILR) {
+		struct irq_source *src = &opp->src[n_IRQ];
+
+		src->output = val & ILR_INTTGT_MASK;
+		pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr,
+			src->output);
+
+		/* TODO: on MPIC v4.0 only, set nomask for non-INT */
+	}
+}
+
+static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ,
+				     uint32_t val)
+{
+	uint32_t mask;
+
+	/* NOTE when implementing newer FSL MPIC models: starting with v4.0,
+	 * the polarity bit is read-only on internal interrupts.
+	 */
+	mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK |
+	    IVPR_POLARITY_MASK | opp->vector_mask;
+
+	/* ACTIVITY bit is read-only */
+	opp->src[n_IRQ].ivpr =
+	    (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask);
+
+	/* For FSL internal interrupts, The sense bit is reserved and zero,
+	 * and the interrupt is always level-triggered.  Timers and IPIs
+	 * have no sense or polarity bits, and are edge-triggered.
+	 */
+	switch (opp->src[n_IRQ].type) {
+	case IRQ_TYPE_NORMAL:
+		opp->src[n_IRQ].level =
+		    !!(opp->src[n_IRQ].ivpr & IVPR_SENSE_MASK);
+		break;
+
+	case IRQ_TYPE_FSLINT:
+		opp->src[n_IRQ].ivpr &= ~IVPR_SENSE_MASK;
+		break;
+
+	case IRQ_TYPE_FSLSPECIAL:
+		opp->src[n_IRQ].ivpr &= ~(IVPR_POLARITY_MASK | IVPR_SENSE_MASK);
+		break;
+	}
+
+	openpic_update_irq(opp, n_IRQ);
+	pr_debug("Set IVPR %d to 0x%08x -> 0x%08x\n", n_IRQ, val,
+		opp->src[n_IRQ].ivpr);
+}
+
+static void openpic_gcr_write(struct openpic *opp, uint64_t val)
+{
+	if (val & GCR_RESET) {
+		openpic_reset(opp);
+		return;
+	}
+
+	opp->gcr &= ~opp->mpic_mode_mask;
+	opp->gcr |= val & opp->mpic_mode_mask;
+}
+
+static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val)
+{
+	struct openpic *opp = opaque;
+	int err = 0;
+
+	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
+	if (addr & 0xF)
+		return 0;
+
+	switch (addr) {
+	case 0x00:	/* Block Revision Register1 (BRR1) is Readonly */
+		break;
+	case 0x40:
+	case 0x50:
+	case 0x60:
+	case 0x70:
+	case 0x80:
+	case 0x90:
+	case 0xA0:
+	case 0xB0:
+		err = openpic_cpu_write_internal(opp, addr, val,
+						 get_current_cpu());
+		break;
+	case 0x1000:		/* FRR */
+		break;
+	case 0x1020:		/* GCR */
+		openpic_gcr_write(opp, val);
+		break;
+	case 0x1080:		/* VIR */
+		break;
+	case 0x1090:		/* PIR */
+		/*
+		 * This register is used to reset a CPU core --
+		 * let userspace handle it.
+		 */
+		err = -ENXIO;
+		break;
+	case 0x10A0:		/* IPI_IVPR */
+	case 0x10B0:
+	case 0x10C0:
+	case 0x10D0: {
+		int idx;
+		idx = (addr - 0x10A0) >> 4;
+		write_IRQreg_ivpr(opp, opp->irq_ipi0 + idx, val);
+		break;
+	}
+	case 0x10E0:		/* SPVE */
+		opp->spve = val & opp->vector_mask;
+		break;
+	default:
+		break;
+	}
+
+	return err;
+}
+
+static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+	struct openpic *opp = opaque;
+	u32 retval;
+	int err = 0;
+
+	pr_debug("%s: addr %#llx\n", __func__, addr);
+	retval = 0xFFFFFFFF;
+	if (addr & 0xF)
+		goto out;
+
+	switch (addr) {
+	case 0x1000:		/* FRR */
+		retval = opp->frr;
+		retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT;
+		break;
+	case 0x1020:		/* GCR */
+		retval = opp->gcr;
+		break;
+	case 0x1080:		/* VIR */
+		retval = opp->vir;
+		break;
+	case 0x1090:		/* PIR */
+		retval = 0x00000000;
+		break;
+	case 0x00:		/* Block Revision Register1 (BRR1) */
+		retval = opp->brr1;
+		break;
+	case 0x40:
+	case 0x50:
+	case 0x60:
+	case 0x70:
+	case 0x80:
+	case 0x90:
+	case 0xA0:
+	case 0xB0:
+		err = openpic_cpu_read_internal(opp, addr,
+			&retval, get_current_cpu());
+		break;
+	case 0x10A0:		/* IPI_IVPR */
+	case 0x10B0:
+	case 0x10C0:
+	case 0x10D0:
+		{
+			int idx;
+			idx = (addr - 0x10A0) >> 4;
+			retval = read_IRQreg_ivpr(opp, opp->irq_ipi0 + idx);
+		}
+		break;
+	case 0x10E0:		/* SPVE */
+		retval = opp->spve;
+		break;
+	default:
+		break;
+	}
+
+out:
+	pr_debug("%s: => 0x%08x\n", __func__, retval);
+	*ptr = retval;
+	return err;
+}
+
+static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val)
+{
+	struct openpic *opp = opaque;
+	int idx;
+
+	addr += 0x10f0;
+
+	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
+	if (addr & 0xF)
+		return 0;
+
+	if (addr == 0x10f0) {
+		/* TFRR */
+		opp->tfrr = val;
+		return 0;
+	}
+
+	idx = (addr >> 6) & 0x3;
+	addr = addr & 0x30;
+
+	switch (addr & 0x30) {
+	case 0x00:		/* TCCR */
+		break;
+	case 0x10:		/* TBCR */
+		if ((opp->timers[idx].tccr & TCCR_TOG) != 0 &&
+		    (val & TBCR_CI) == 0 &&
+		    (opp->timers[idx].tbcr & TBCR_CI) != 0)
+			opp->timers[idx].tccr &= ~TCCR_TOG;
+
+		opp->timers[idx].tbcr = val;
+		break;
+	case 0x20:		/* TVPR */
+		write_IRQreg_ivpr(opp, opp->irq_tim0 + idx, val);
+		break;
+	case 0x30:		/* TDR */
+		write_IRQreg_idr(opp, opp->irq_tim0 + idx, val);
+		break;
+	}
+
+	return 0;
+}
+
+static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+	struct openpic *opp = opaque;
+	uint32_t retval = -1;
+	int idx;
+
+	pr_debug("%s: addr %#llx\n", __func__, addr);
+	if (addr & 0xF)
+		goto out;
+
+	idx = (addr >> 6) & 0x3;
+	if (addr == 0x0) {
+		/* TFRR */
+		retval = opp->tfrr;
+		goto out;
+	}
+
+	switch (addr & 0x30) {
+	case 0x00:		/* TCCR */
+		retval = opp->timers[idx].tccr;
+		break;
+	case 0x10:		/* TBCR */
+		retval = opp->timers[idx].tbcr;
+		break;
+	case 0x20:		/* TIPV */
+		retval = read_IRQreg_ivpr(opp, opp->irq_tim0 + idx);
+		break;
+	case 0x30:		/* TIDE (TIDR) */
+		retval = read_IRQreg_idr(opp, opp->irq_tim0 + idx);
+		break;
+	}
+
+out:
+	pr_debug("%s: => 0x%08x\n", __func__, retval);
+	*ptr = retval;
+	return 0;
+}
+
+static int openpic_src_write(void *opaque, gpa_t addr, u32 val)
+{
+	struct openpic *opp = opaque;
+	int idx;
+
+	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
+
+	addr = addr & 0xffff;
+	idx = addr >> 5;
+
+	switch (addr & 0x1f) {
+	case 0x00:
+		write_IRQreg_ivpr(opp, idx, val);
+		break;
+	case 0x10:
+		write_IRQreg_idr(opp, idx, val);
+		break;
+	case 0x18:
+		write_IRQreg_ilr(opp, idx, val);
+		break;
+	}
+
+	return 0;
+}
+
+static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+	struct openpic *opp = opaque;
+	uint32_t retval;
+	int idx;
+
+	pr_debug("%s: addr %#llx\n", __func__, addr);
+	retval = 0xFFFFFFFF;
+
+	addr = addr & 0xffff;
+	idx = addr >> 5;
+
+	switch (addr & 0x1f) {
+	case 0x00:
+		retval = read_IRQreg_ivpr(opp, idx);
+		break;
+	case 0x10:
+		retval = read_IRQreg_idr(opp, idx);
+		break;
+	case 0x18:
+		retval = read_IRQreg_ilr(opp, idx);
+		break;
+	}
+
+	pr_debug("%s: => 0x%08x\n", __func__, retval);
+	*ptr = retval;
+	return 0;
+}
+
+static int openpic_msi_write(void *opaque, gpa_t addr, u32 val)
+{
+	struct openpic *opp = opaque;
+	int idx = opp->irq_msi;
+	int srs, ibs;
+
+	pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
+	if (addr & 0xF)
+		return 0;
+
+	switch (addr) {
+	case MSIIR_OFFSET:
+		srs = val >> MSIIR_SRS_SHIFT;
+		idx += srs;
+		ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT;
+		opp->msi[srs].msir |= 1 << ibs;
+		openpic_set_irq(opp, idx, 1);
+		break;
+	default:
+		/* most registers are read-only, thus ignored */
+		break;
+	}
+
+	return 0;
+}
+
+static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+	struct openpic *opp = opaque;
+	uint32_t r = 0;
+	int i, srs;
+
+	pr_debug("%s: addr %#llx\n", __func__, addr);
+	if (addr & 0xF)
+		return -ENXIO;
+
+	srs = addr >> 4;
+
+	switch (addr) {
+	case 0x00:
+	case 0x10:
+	case 0x20:
+	case 0x30:
+	case 0x40:
+	case 0x50:
+	case 0x60:
+	case 0x70:		/* MSIRs */
+		r = opp->msi[srs].msir;
+		/* Clear on read */
+		opp->msi[srs].msir = 0;
+		openpic_set_irq(opp, opp->irq_msi + srs, 0);
+		break;
+	case 0x120:		/* MSISR */
+		for (i = 0; i < MAX_MSI; i++)
+			r |= (opp->msi[i].msir ? 1 : 0) << i;
+		break;
+	}
+
+	pr_debug("%s: => 0x%08x\n", __func__, r);
+	*ptr = r;
+	return 0;
+}
+
+static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+	uint32_t r = 0;
+
+	pr_debug("%s: addr %#llx\n", __func__, addr);
+
+	/* TODO: EISR/EIMR */
+
+	*ptr = r;
+	return 0;
+}
+
+static int openpic_summary_write(void *opaque, gpa_t addr, u32 val)
+{
+	pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
+
+	/* TODO: EISR/EIMR */
+	return 0;
+}
+
+static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
+				      u32 val, int idx)
+{
+	struct openpic *opp = opaque;
+	struct irq_source *src;
+	struct irq_dest *dst;
+	int s_IRQ, n_IRQ;
+
+	pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx,
+		addr, val);
+
+	if (idx < 0)
+		return 0;
+
+	if (addr & 0xF)
+		return 0;
+
+	dst = &opp->dst[idx];
+	addr &= 0xFF0;
+	switch (addr) {
+	case 0x40:		/* IPIDR */
+	case 0x50:
+	case 0x60:
+	case 0x70:
+		idx = (addr - 0x40) >> 4;
+		/* we use IDE as mask which CPUs to deliver the IPI to still. */
+		opp->src[opp->irq_ipi0 + idx].destmask |= val;
+		openpic_set_irq(opp, opp->irq_ipi0 + idx, 1);
+		openpic_set_irq(opp, opp->irq_ipi0 + idx, 0);
+		break;
+	case 0x80:		/* CTPR */
+		dst->ctpr = val & 0x0000000F;
+
+		pr_debug("%s: set CPU %d ctpr to %d, raised %d servicing %d\n",
+			__func__, idx, dst->ctpr, dst->raised.priority,
+			dst->servicing.priority);
+
+		if (dst->raised.priority <= dst->ctpr) {
+			pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n",
+				__func__, idx);
+			mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
+		} else if (dst->raised.priority > dst->servicing.priority) {
+			pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n",
+				__func__, idx, dst->raised.next);
+			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
+		}
+
+		break;
+	case 0x90:		/* WHOAMI */
+		/* Read-only register */
+		break;
+	case 0xA0:		/* IACK */
+		/* Read-only register */
+		break;
+	case 0xB0: {		/* EOI */
+		int notify_eoi;
+
+		pr_debug("EOI\n");
+		s_IRQ = IRQ_get_next(opp, &dst->servicing);
+
+		if (s_IRQ < 0) {
+			pr_debug("%s: EOI with no interrupt in service\n",
+				__func__);
+			break;
+		}
+
+		IRQ_resetbit(&dst->servicing, s_IRQ);
+		/* Notify listeners that the IRQ is over */
+		notify_eoi = s_IRQ;
+		/* Set up next servicing IRQ */
+		s_IRQ = IRQ_get_next(opp, &dst->servicing);
+		/* Check queued interrupts. */
+		n_IRQ = IRQ_get_next(opp, &dst->raised);
+		src = &opp->src[n_IRQ];
+		if (n_IRQ != -1 &&
+		    (s_IRQ == -1 ||
+		     IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) {
+			pr_debug("Raise OpenPIC INT output cpu %d irq %d\n",
+				idx, n_IRQ);
+			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
+		}
+
+		spin_unlock(&opp->lock);
+		kvm_notify_acked_irq(opp->kvm, 0, notify_eoi);
+		spin_lock(&opp->lock);
+
+		break;
+	}
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val)
+{
+	struct openpic *opp = opaque;
+
+	return openpic_cpu_write_internal(opp, addr, val,
+					 (addr & 0x1f000) >> 12);
+}
+
+static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst,
+			     int cpu)
+{
+	struct irq_source *src;
+	int retval, irq;
+
+	pr_debug("Lower OpenPIC INT output\n");
+	mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
+
+	irq = IRQ_get_next(opp, &dst->raised);
+	pr_debug("IACK: irq=%d\n", irq);
+
+	if (irq == -1)
+		/* No more interrupt pending */
+		return opp->spve;
+
+	src = &opp->src[irq];
+	if (!(src->ivpr & IVPR_ACTIVITY_MASK) ||
+	    !(IVPR_PRIORITY(src->ivpr) > dst->ctpr)) {
+		pr_err("%s: bad raised IRQ %d ctpr %d ivpr 0x%08x\n",
+			__func__, irq, dst->ctpr, src->ivpr);
+		openpic_update_irq(opp, irq);
+		retval = opp->spve;
+	} else {
+		/* IRQ enter servicing state */
+		IRQ_setbit(&dst->servicing, irq);
+		retval = IVPR_VECTOR(opp, src->ivpr);
+	}
+
+	if (!src->level) {
+		/* edge-sensitive IRQ */
+		src->ivpr &= ~IVPR_ACTIVITY_MASK;
+		src->pending = 0;
+		IRQ_resetbit(&dst->raised, irq);
+	}
+
+	if ((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + MAX_IPI))) {
+		src->destmask &= ~(1 << cpu);
+		if (src->destmask && !src->level) {
+			/* trigger on CPUs that didn't know about it yet */
+			openpic_set_irq(opp, irq, 1);
+			openpic_set_irq(opp, irq, 0);
+			/* if all CPUs knew about it, set active bit again */
+			src->ivpr |= IVPR_ACTIVITY_MASK;
+		}
+	}
+
+	return retval;
+}
+
+void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
+{
+	struct openpic *opp = vcpu->arch.mpic;
+	int cpu = vcpu->arch.irq_cpu_id;
+	unsigned long flags;
+
+	spin_lock_irqsave(&opp->lock, flags);
+
+	if ((opp->gcr & opp->mpic_mode_mask) == GCR_MODE_PROXY)
+		kvmppc_set_epr(vcpu, openpic_iack(opp, &opp->dst[cpu], cpu));
+
+	spin_unlock_irqrestore(&opp->lock, flags);
+}
+
+static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
+				     u32 *ptr, int idx)
+{
+	struct openpic *opp = opaque;
+	struct irq_dest *dst;
+	uint32_t retval;
+
+	pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr);
+	retval = 0xFFFFFFFF;
+
+	if (idx < 0)
+		goto out;
+
+	if (addr & 0xF)
+		goto out;
+
+	dst = &opp->dst[idx];
+	addr &= 0xFF0;
+	switch (addr) {
+	case 0x80:		/* CTPR */
+		retval = dst->ctpr;
+		break;
+	case 0x90:		/* WHOAMI */
+		retval = idx;
+		break;
+	case 0xA0:		/* IACK */
+		retval = openpic_iack(opp, dst, idx);
+		break;
+	case 0xB0:		/* EOI */
+		retval = 0;
+		break;
+	default:
+		break;
+	}
+	pr_debug("%s: => 0x%08x\n", __func__, retval);
+
+out:
+	*ptr = retval;
+	return 0;
+}
+
+static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr)
+{
+	struct openpic *opp = opaque;
+
+	return openpic_cpu_read_internal(opp, addr, ptr,
+					 (addr & 0x1f000) >> 12);
+}
+
+struct mem_reg {
+	int (*read)(void *opaque, gpa_t addr, u32 *ptr);
+	int (*write)(void *opaque, gpa_t addr, u32 val);
+	gpa_t start_addr;
+	int size;
+};
+
+static const struct mem_reg openpic_gbl_mmio = {
+	.write = openpic_gbl_write,
+	.read = openpic_gbl_read,
+	.start_addr = OPENPIC_GLB_REG_START,
+	.size = OPENPIC_GLB_REG_SIZE,
+};
+
+static const struct mem_reg openpic_tmr_mmio = {
+	.write = openpic_tmr_write,
+	.read = openpic_tmr_read,
+	.start_addr = OPENPIC_TMR_REG_START,
+	.size = OPENPIC_TMR_REG_SIZE,
+};
+
+static const struct mem_reg openpic_cpu_mmio = {
+	.write = openpic_cpu_write,
+	.read = openpic_cpu_read,
+	.start_addr = OPENPIC_CPU_REG_START,
+	.size = OPENPIC_CPU_REG_SIZE,
+};
+
+static const struct mem_reg openpic_src_mmio = {
+	.write = openpic_src_write,
+	.read = openpic_src_read,
+	.start_addr = OPENPIC_SRC_REG_START,
+	.size = OPENPIC_SRC_REG_SIZE,
+};
+
+static const struct mem_reg openpic_msi_mmio = {
+	.read = openpic_msi_read,
+	.write = openpic_msi_write,
+	.start_addr = OPENPIC_MSI_REG_START,
+	.size = OPENPIC_MSI_REG_SIZE,
+};
+
+static const struct mem_reg openpic_summary_mmio = {
+	.read = openpic_summary_read,
+	.write = openpic_summary_write,
+	.start_addr = OPENPIC_SUMMARY_REG_START,
+	.size = OPENPIC_SUMMARY_REG_SIZE,
+};
+
+static void add_mmio_region(struct openpic *opp, const struct mem_reg *mr)
+{
+	if (opp->num_mmio_regions >= MAX_MMIO_REGIONS) {
+		WARN(1, "kvm mpic: too many mmio regions\n");
+		return;
+	}
+
+	opp->mmio_regions[opp->num_mmio_regions++] = mr;
+}
+
+static void fsl_common_init(struct openpic *opp)
+{
+	int i;
+	int virq = MAX_SRC;
+
+	add_mmio_region(opp, &openpic_msi_mmio);
+	add_mmio_region(opp, &openpic_summary_mmio);
+
+	opp->vid = VID_REVISION_1_2;
+	opp->vir = VIR_GENERIC;
+	opp->vector_mask = 0xFFFF;
+	opp->tfrr_reset = 0;
+	opp->ivpr_reset = IVPR_MASK_MASK;
+	opp->idr_reset = 1 << 0;
+	opp->max_irq = MAX_IRQ;
+
+	opp->irq_ipi0 = virq;
+	virq += MAX_IPI;
+	opp->irq_tim0 = virq;
+	virq += MAX_TMR;
+
+	BUG_ON(virq > MAX_IRQ);
+
+	opp->irq_msi = 224;
+
+	for (i = 0; i < opp->fsl->max_ext; i++)
+		opp->src[i].level = false;
+
+	/* Internal interrupts, including message and MSI */
+	for (i = 16; i < MAX_SRC; i++) {
+		opp->src[i].type = IRQ_TYPE_FSLINT;
+		opp->src[i].level = true;
+	}
+
+	/* timers and IPIs */
+	for (i = MAX_SRC; i < virq; i++) {
+		opp->src[i].type = IRQ_TYPE_FSLSPECIAL;
+		opp->src[i].level = false;
+	}
+}
+
+static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr)
+{
+	int i;
+
+	for (i = 0; i < opp->num_mmio_regions; i++) {
+		const struct mem_reg *mr = opp->mmio_regions[i];
+
+		if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
+			continue;
+
+		return mr->read(opp, addr - mr->start_addr, ptr);
+	}
+
+	return -ENXIO;
+}
+
+static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val)
+{
+	int i;
+
+	for (i = 0; i < opp->num_mmio_regions; i++) {
+		const struct mem_reg *mr = opp->mmio_regions[i];
+
+		if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
+			continue;
+
+		return mr->write(opp, addr - mr->start_addr, val);
+	}
+
+	return -ENXIO;
+}
+
+static int kvm_mpic_read(struct kvm_vcpu *vcpu,
+			 struct kvm_io_device *this,
+			 gpa_t addr, int len, void *ptr)
+{
+	struct openpic *opp = container_of(this, struct openpic, mmio);
+	int ret;
+	union {
+		u32 val;
+		u8 bytes[4];
+	} u;
+
+	if (addr & (len - 1)) {
+		pr_debug("%s: bad alignment %llx/%d\n",
+			 __func__, addr, len);
+		return -EINVAL;
+	}
+
+	spin_lock_irq(&opp->lock);
+	ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val);
+	spin_unlock_irq(&opp->lock);
+
+	/*
+	 * Technically only 32-bit accesses are allowed, but be nice to
+	 * people dumping registers a byte at a time -- it works in real
+	 * hardware (reads only, not writes).
+	 */
+	if (len == 4) {
+		*(u32 *)ptr = u.val;
+		pr_debug("%s: addr %llx ret %d len 4 val %x\n",
+			 __func__, addr, ret, u.val);
+	} else if (len == 1) {
+		*(u8 *)ptr = u.bytes[addr & 3];
+		pr_debug("%s: addr %llx ret %d len 1 val %x\n",
+			 __func__, addr, ret, u.bytes[addr & 3]);
+	} else {
+		pr_debug("%s: bad length %d\n", __func__, len);
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+static int kvm_mpic_write(struct kvm_vcpu *vcpu,
+			  struct kvm_io_device *this,
+			  gpa_t addr, int len, const void *ptr)
+{
+	struct openpic *opp = container_of(this, struct openpic, mmio);
+	int ret;
+
+	if (len != 4) {
+		pr_debug("%s: bad length %d\n", __func__, len);
+		return -EOPNOTSUPP;
+	}
+	if (addr & 3) {
+		pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len);
+		return -EOPNOTSUPP;
+	}
+
+	spin_lock_irq(&opp->lock);
+	ret = kvm_mpic_write_internal(opp, addr - opp->reg_base,
+				      *(const u32 *)ptr);
+	spin_unlock_irq(&opp->lock);
+
+	pr_debug("%s: addr %llx ret %d val %x\n",
+		 __func__, addr, ret, *(const u32 *)ptr);
+
+	return ret;
+}
+
+static const struct kvm_io_device_ops mpic_mmio_ops = {
+	.read = kvm_mpic_read,
+	.write = kvm_mpic_write,
+};
+
+static void map_mmio(struct openpic *opp)
+{
+	kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops);
+
+	kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS,
+				opp->reg_base, OPENPIC_REG_SIZE,
+				&opp->mmio);
+}
+
+static void unmap_mmio(struct openpic *opp)
+{
+	kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio);
+}
+
+static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr)
+{
+	u64 base;
+
+	if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64)))
+		return -EFAULT;
+
+	if (base & 0x3ffff) {
+		pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n",
+			 __func__, base);
+		return -EINVAL;
+	}
+
+	if (base == opp->reg_base)
+		return 0;
+
+	mutex_lock(&opp->kvm->slots_lock);
+
+	unmap_mmio(opp);
+	opp->reg_base = base;
+
+	pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n",
+		 __func__, base);
+
+	if (base == 0)
+		goto out;
+
+	map_mmio(opp);
+
+out:
+	mutex_unlock(&opp->kvm->slots_lock);
+	return 0;
+}
+
+#define ATTR_SET		0
+#define ATTR_GET		1
+
+static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type)
+{
+	int ret;
+
+	if (addr & 3)
+		return -ENXIO;
+
+	spin_lock_irq(&opp->lock);
+
+	if (type == ATTR_SET)
+		ret = kvm_mpic_write_internal(opp, addr, *val);
+	else
+		ret = kvm_mpic_read_internal(opp, addr, val);
+
+	spin_unlock_irq(&opp->lock);
+
+	pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val);
+
+	return ret;
+}
+
+static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct openpic *opp = dev->private;
+	u32 attr32;
+
+	switch (attr->group) {
+	case KVM_DEV_MPIC_GRP_MISC:
+		switch (attr->attr) {
+		case KVM_DEV_MPIC_BASE_ADDR:
+			return set_base_addr(opp, attr);
+		}
+
+		break;
+
+	case KVM_DEV_MPIC_GRP_REGISTER:
+		if (get_user(attr32, (u32 __user *)(long)attr->addr))
+			return -EFAULT;
+
+		return access_reg(opp, attr->attr, &attr32, ATTR_SET);
+
+	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
+		if (attr->attr > MAX_SRC)
+			return -EINVAL;
+
+		if (get_user(attr32, (u32 __user *)(long)attr->addr))
+			return -EFAULT;
+
+		if (attr32 != 0 && attr32 != 1)
+			return -EINVAL;
+
+		spin_lock_irq(&opp->lock);
+		openpic_set_irq(opp, attr->attr, attr32);
+		spin_unlock_irq(&opp->lock);
+		return 0;
+	}
+
+	return -ENXIO;
+}
+
+static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct openpic *opp = dev->private;
+	u64 attr64;
+	u32 attr32;
+	int ret;
+
+	switch (attr->group) {
+	case KVM_DEV_MPIC_GRP_MISC:
+		switch (attr->attr) {
+		case KVM_DEV_MPIC_BASE_ADDR:
+			mutex_lock(&opp->kvm->slots_lock);
+			attr64 = opp->reg_base;
+			mutex_unlock(&opp->kvm->slots_lock);
+
+			if (copy_to_user((u64 __user *)(long)attr->addr,
+					 &attr64, sizeof(u64)))
+				return -EFAULT;
+
+			return 0;
+		}
+
+		break;
+
+	case KVM_DEV_MPIC_GRP_REGISTER:
+		ret = access_reg(opp, attr->attr, &attr32, ATTR_GET);
+		if (ret)
+			return ret;
+
+		if (put_user(attr32, (u32 __user *)(long)attr->addr))
+			return -EFAULT;
+
+		return 0;
+
+	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
+		if (attr->attr > MAX_SRC)
+			return -EINVAL;
+
+		spin_lock_irq(&opp->lock);
+		attr32 = opp->src[attr->attr].pending;
+		spin_unlock_irq(&opp->lock);
+
+		if (put_user(attr32, (u32 __user *)(long)attr->addr))
+			return -EFAULT;
+
+		return 0;
+	}
+
+	return -ENXIO;
+}
+
+static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_MPIC_GRP_MISC:
+		switch (attr->attr) {
+		case KVM_DEV_MPIC_BASE_ADDR:
+			return 0;
+		}
+
+		break;
+
+	case KVM_DEV_MPIC_GRP_REGISTER:
+		return 0;
+
+	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
+		if (attr->attr > MAX_SRC)
+			break;
+
+		return 0;
+	}
+
+	return -ENXIO;
+}
+
+static void mpic_destroy(struct kvm_device *dev)
+{
+	struct openpic *opp = dev->private;
+
+	dev->kvm->arch.mpic = NULL;
+	kfree(opp);
+	kfree(dev);
+}
+
+static int mpic_set_default_irq_routing(struct openpic *opp)
+{
+	struct kvm_irq_routing_entry *routing;
+
+	/* Create a nop default map, so that dereferencing it still works */
+	routing = kzalloc((sizeof(*routing)), GFP_KERNEL);
+	if (!routing)
+		return -ENOMEM;
+
+	kvm_set_irq_routing(opp->kvm, routing, 0, 0);
+
+	kfree(routing);
+	return 0;
+}
+
+static int mpic_create(struct kvm_device *dev, u32 type)
+{
+	struct openpic *opp;
+	int ret;
+
+	/* We only support one MPIC at a time for now */
+	if (dev->kvm->arch.mpic)
+		return -EINVAL;
+
+	opp = kzalloc(sizeof(struct openpic), GFP_KERNEL);
+	if (!opp)
+		return -ENOMEM;
+
+	dev->private = opp;
+	opp->kvm = dev->kvm;
+	opp->dev = dev;
+	opp->model = type;
+	spin_lock_init(&opp->lock);
+
+	add_mmio_region(opp, &openpic_gbl_mmio);
+	add_mmio_region(opp, &openpic_tmr_mmio);
+	add_mmio_region(opp, &openpic_src_mmio);
+	add_mmio_region(opp, &openpic_cpu_mmio);
+
+	switch (opp->model) {
+	case KVM_DEV_TYPE_FSL_MPIC_20:
+		opp->fsl = &fsl_mpic_20;
+		opp->brr1 = 0x00400200;
+		opp->flags |= OPENPIC_FLAG_IDR_CRIT;
+		opp->nb_irqs = 80;
+		opp->mpic_mode_mask = GCR_MODE_MIXED;
+
+		fsl_common_init(opp);
+
+		break;
+
+	case KVM_DEV_TYPE_FSL_MPIC_42:
+		opp->fsl = &fsl_mpic_42;
+		opp->brr1 = 0x00400402;
+		opp->flags |= OPENPIC_FLAG_ILR;
+		opp->nb_irqs = 196;
+		opp->mpic_mode_mask = GCR_MODE_PROXY;
+
+		fsl_common_init(opp);
+
+		break;
+
+	default:
+		ret = -ENODEV;
+		goto err;
+	}
+
+	ret = mpic_set_default_irq_routing(opp);
+	if (ret)
+		goto err;
+
+	openpic_reset(opp);
+
+	smp_wmb();
+	dev->kvm->arch.mpic = opp;
+
+	return 0;
+
+err:
+	kfree(opp);
+	return ret;
+}
+
+struct kvm_device_ops kvm_mpic_ops = {
+	.name = "kvm-mpic",
+	.create = mpic_create,
+	.destroy = mpic_destroy,
+	.set_attr = mpic_set_attr,
+	.get_attr = mpic_get_attr,
+	.has_attr = mpic_has_attr,
+};
+
+int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
+			     u32 cpu)
+{
+	struct openpic *opp = dev->private;
+	int ret = 0;
+
+	if (dev->ops != &kvm_mpic_ops)
+		return -EPERM;
+	if (opp->kvm != vcpu->kvm)
+		return -EPERM;
+	if (cpu < 0 || cpu >= MAX_CPU)
+		return -EPERM;
+
+	spin_lock_irq(&opp->lock);
+
+	if (opp->dst[cpu].vcpu) {
+		ret = -EEXIST;
+		goto out;
+	}
+	if (vcpu->arch.irq_type) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	opp->dst[cpu].vcpu = vcpu;
+	opp->nb_cpus = max(opp->nb_cpus, cpu + 1);
+
+	vcpu->arch.mpic = opp;
+	vcpu->arch.irq_cpu_id = cpu;
+	vcpu->arch.irq_type = KVMPPC_IRQ_MPIC;
+
+	/* This might need to be changed if GCR gets extended */
+	if (opp->mpic_mode_mask == GCR_MODE_PROXY)
+		vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL;
+
+out:
+	spin_unlock_irq(&opp->lock);
+	return ret;
+}
+
+/*
+ * This should only happen immediately before the mpic is destroyed,
+ * so we shouldn't need to worry about anything still trying to
+ * access the vcpu pointer.
+ */
+void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu)
+{
+	BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu);
+
+	opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL;
+}
+
+/*
+ * Return value:
+ *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
+ *  = 0   Interrupt was coalesced (previous irq is still pending)
+ *  > 0   Number of CPUs interrupt was delivered to
+ */
+static int mpic_set_irq(struct kvm_kernel_irq_routing_entry *e,
+			struct kvm *kvm, int irq_source_id, int level,
+			bool line_status)
+{
+	u32 irq = e->irqchip.pin;
+	struct openpic *opp = kvm->arch.mpic;
+	unsigned long flags;
+
+	spin_lock_irqsave(&opp->lock, flags);
+	openpic_set_irq(opp, irq, level);
+	spin_unlock_irqrestore(&opp->lock, flags);
+
+	/* All code paths we care about don't check for the return value */
+	return 0;
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+		struct kvm *kvm, int irq_source_id, int level, bool line_status)
+{
+	struct openpic *opp = kvm->arch.mpic;
+	unsigned long flags;
+
+	spin_lock_irqsave(&opp->lock, flags);
+
+	/*
+	 * XXX We ignore the target address for now, as we only support
+	 *     a single MSI bank.
+	 */
+	openpic_msi_write(kvm->arch.mpic, MSIIR_OFFSET, e->msi.data);
+	spin_unlock_irqrestore(&opp->lock, flags);
+
+	/* All code paths we care about don't check for the return value */
+	return 0;
+}
+
+int kvm_set_routing_entry(struct kvm *kvm,
+			  struct kvm_kernel_irq_routing_entry *e,
+			  const struct kvm_irq_routing_entry *ue)
+{
+	int r = -EINVAL;
+
+	switch (ue->type) {
+	case KVM_IRQ_ROUTING_IRQCHIP:
+		e->set = mpic_set_irq;
+		e->irqchip.irqchip = ue->u.irqchip.irqchip;
+		e->irqchip.pin = ue->u.irqchip.pin;
+		if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
+			goto out;
+		break;
+	case KVM_IRQ_ROUTING_MSI:
+		e->set = kvm_set_msi;
+		e->msi.address_lo = ue->u.msi.address_lo;
+		e->msi.address_hi = ue->u.msi.address_hi;
+		e->msi.data = ue->u.msi.data;
+		break;
+	default:
+		goto out;
+	}
+
+	r = 0;
+out:
+	return r;
+}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
new file mode 100644
index 0000000000..7197c82566
--- /dev/null
+++ b/arch/powerpc/kvm/powerpc.c
@@ -0,0 +1,2552 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/vmalloc.h>
+#include <linux/hrtimer.h>
+#include <linux/sched/signal.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/module.h>
+#include <linux/irqbypass.h>
+#include <linux/kvm_irqfd.h>
+#include <linux/of.h>
+#include <asm/cputable.h>
+#include <linux/uaccess.h>
+#include <asm/kvm_ppc.h>
+#include <asm/cputhreads.h>
+#include <asm/irqflags.h>
+#include <asm/iommu.h>
+#include <asm/switch_to.h>
+#include <asm/xive.h>
+#ifdef CONFIG_PPC_PSERIES
+#include <asm/hvcall.h>
+#include <asm/plpar_wrappers.h>
+#endif
+#include <asm/ultravisor.h>
+#include <asm/setup.h>
+
+#include "timing.h"
+#include "../mm/mmu_decl.h"
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+struct kvmppc_ops *kvmppc_hv_ops;
+EXPORT_SYMBOL_GPL(kvmppc_hv_ops);
+struct kvmppc_ops *kvmppc_pr_ops;
+EXPORT_SYMBOL_GPL(kvmppc_pr_ops);
+
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
+{
+	return !!(v->arch.pending_exceptions) || kvm_request_pending(v);
+}
+
+bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
+{
+	return kvm_arch_vcpu_runnable(vcpu);
+}
+
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+{
+	return false;
+}
+
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+	return 1;
+}
+
+/*
+ * Common checks before entering the guest world.  Call with interrupts
+ * disabled.
+ *
+ * returns:
+ *
+ * == 1 if we're ready to go into guest state
+ * <= 0 if we need to go back to the host with return value
+ */
+int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
+{
+	int r;
+
+	WARN_ON(irqs_disabled());
+	hard_irq_disable();
+
+	while (true) {
+		if (need_resched()) {
+			local_irq_enable();
+			cond_resched();
+			hard_irq_disable();
+			continue;
+		}
+
+		if (signal_pending(current)) {
+			kvmppc_account_exit(vcpu, SIGNAL_EXITS);
+			vcpu->run->exit_reason = KVM_EXIT_INTR;
+			r = -EINTR;
+			break;
+		}
+
+		vcpu->mode = IN_GUEST_MODE;
+
+		/*
+		 * Reading vcpu->requests must happen after setting vcpu->mode,
+		 * so we don't miss a request because the requester sees
+		 * OUTSIDE_GUEST_MODE and assumes we'll be checking requests
+		 * before next entering the guest (and thus doesn't IPI).
+		 * This also orders the write to mode from any reads
+		 * to the page tables done while the VCPU is running.
+		 * Please see the comment in kvm_flush_remote_tlbs.
+		 */
+		smp_mb();
+
+		if (kvm_request_pending(vcpu)) {
+			/* Make sure we process requests preemptable */
+			local_irq_enable();
+			trace_kvm_check_requests(vcpu);
+			r = kvmppc_core_check_requests(vcpu);
+			hard_irq_disable();
+			if (r > 0)
+				continue;
+			break;
+		}
+
+		if (kvmppc_core_prepare_to_enter(vcpu)) {
+			/* interrupts got enabled in between, so we
+			   are back at square 1 */
+			continue;
+		}
+
+		guest_enter_irqoff();
+		return 1;
+	}
+
+	/* return to host */
+	local_irq_enable();
+	return r;
+}
+EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter);
+
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
+static void kvmppc_swab_shared(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared;
+	int i;
+
+	shared->sprg0 = swab64(shared->sprg0);
+	shared->sprg1 = swab64(shared->sprg1);
+	shared->sprg2 = swab64(shared->sprg2);
+	shared->sprg3 = swab64(shared->sprg3);
+	shared->srr0 = swab64(shared->srr0);
+	shared->srr1 = swab64(shared->srr1);
+	shared->dar = swab64(shared->dar);
+	shared->msr = swab64(shared->msr);
+	shared->dsisr = swab32(shared->dsisr);
+	shared->int_pending = swab32(shared->int_pending);
+	for (i = 0; i < ARRAY_SIZE(shared->sr); i++)
+		shared->sr[i] = swab32(shared->sr[i]);
+}
+#endif
+
+int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
+{
+	int nr = kvmppc_get_gpr(vcpu, 11);
+	int r;
+	unsigned long __maybe_unused param1 = kvmppc_get_gpr(vcpu, 3);
+	unsigned long __maybe_unused param2 = kvmppc_get_gpr(vcpu, 4);
+	unsigned long __maybe_unused param3 = kvmppc_get_gpr(vcpu, 5);
+	unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6);
+	unsigned long r2 = 0;
+
+	if (!(kvmppc_get_msr(vcpu) & MSR_SF)) {
+		/* 32 bit mode */
+		param1 &= 0xffffffff;
+		param2 &= 0xffffffff;
+		param3 &= 0xffffffff;
+		param4 &= 0xffffffff;
+	}
+
+	switch (nr) {
+	case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE):
+	{
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
+		/* Book3S can be little endian, find it out here */
+		int shared_big_endian = true;
+		if (vcpu->arch.intr_msr & MSR_LE)
+			shared_big_endian = false;
+		if (shared_big_endian != vcpu->arch.shared_big_endian)
+			kvmppc_swab_shared(vcpu);
+		vcpu->arch.shared_big_endian = shared_big_endian;
+#endif
+
+		if (!(param2 & MAGIC_PAGE_FLAG_NOT_MAPPED_NX)) {
+			/*
+			 * Older versions of the Linux magic page code had
+			 * a bug where they would map their trampoline code
+			 * NX. If that's the case, remove !PR NX capability.
+			 */
+			vcpu->arch.disable_kernel_nx = true;
+			kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+		}
+
+		vcpu->arch.magic_page_pa = param1 & ~0xfffULL;
+		vcpu->arch.magic_page_ea = param2 & ~0xfffULL;
+
+#ifdef CONFIG_PPC_64K_PAGES
+		/*
+		 * Make sure our 4k magic page is in the same window of a 64k
+		 * page within the guest and within the host's page.
+		 */
+		if ((vcpu->arch.magic_page_pa & 0xf000) !=
+		    ((ulong)vcpu->arch.shared & 0xf000)) {
+			void *old_shared = vcpu->arch.shared;
+			ulong shared = (ulong)vcpu->arch.shared;
+			void *new_shared;
+
+			shared &= PAGE_MASK;
+			shared |= vcpu->arch.magic_page_pa & 0xf000;
+			new_shared = (void*)shared;
+			memcpy(new_shared, old_shared, 0x1000);
+			vcpu->arch.shared = new_shared;
+		}
+#endif
+
+		r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
+
+		r = EV_SUCCESS;
+		break;
+	}
+	case KVM_HCALL_TOKEN(KVM_HC_FEATURES):
+		r = EV_SUCCESS;
+#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2)
+		r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
+#endif
+
+		/* Second return value is in r4 */
+		break;
+	case EV_HCALL_TOKEN(EV_IDLE):
+		r = EV_SUCCESS;
+		kvm_vcpu_halt(vcpu);
+		break;
+	default:
+		r = EV_UNIMPLEMENTED;
+		break;
+	}
+
+	kvmppc_set_gpr(vcpu, 4, r2);
+
+	return r;
+}
+EXPORT_SYMBOL_GPL(kvmppc_kvm_pv);
+
+int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
+{
+	int r = false;
+
+	/* We have to know what CPU to virtualize */
+	if (!vcpu->arch.pvr)
+		goto out;
+
+	/* PAPR only works with book3s_64 */
+	if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled)
+		goto out;
+
+	/* HV KVM can only do PAPR mode for now */
+	if (!vcpu->arch.papr_enabled && is_kvmppc_hv_enabled(vcpu->kvm))
+		goto out;
+
+#ifdef CONFIG_KVM_BOOKE_HV
+	if (!cpu_has_feature(CPU_FTR_EMB_HV))
+		goto out;
+#endif
+
+	r = true;
+
+out:
+	vcpu->arch.sane = r;
+	return r ? 0 : -EINVAL;
+}
+EXPORT_SYMBOL_GPL(kvmppc_sanity_check);
+
+int kvmppc_emulate_mmio(struct kvm_vcpu *vcpu)
+{
+	enum emulation_result er;
+	int r;
+
+	er = kvmppc_emulate_loadstore(vcpu);
+	switch (er) {
+	case EMULATE_DONE:
+		/* Future optimization: only reload non-volatiles if they were
+		 * actually modified. */
+		r = RESUME_GUEST_NV;
+		break;
+	case EMULATE_AGAIN:
+		r = RESUME_GUEST;
+		break;
+	case EMULATE_DO_MMIO:
+		vcpu->run->exit_reason = KVM_EXIT_MMIO;
+		/* We must reload nonvolatiles because "update" load/store
+		 * instructions modify register state. */
+		/* Future optimization: only reload non-volatiles if they were
+		 * actually modified. */
+		r = RESUME_HOST_NV;
+		break;
+	case EMULATE_FAIL:
+	{
+		ppc_inst_t last_inst;
+
+		kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
+		kvm_debug_ratelimited("Guest access to device memory using unsupported instruction (opcode: %#08x)\n",
+				      ppc_inst_val(last_inst));
+
+		/*
+		 * Injecting a Data Storage here is a bit more
+		 * accurate since the instruction that caused the
+		 * access could still be a valid one.
+		 */
+		if (!IS_ENABLED(CONFIG_BOOKE)) {
+			ulong dsisr = DSISR_BADACCESS;
+
+			if (vcpu->mmio_is_write)
+				dsisr |= DSISR_ISSTORE;
+
+			kvmppc_core_queue_data_storage(vcpu,
+					kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+					vcpu->arch.vaddr_accessed, dsisr);
+		} else {
+			/*
+			 * BookE does not send a SIGBUS on a bad
+			 * fault, so use a Program interrupt instead
+			 * to avoid a fault loop.
+			 */
+			kvmppc_core_queue_program(vcpu, 0);
+		}
+
+		r = RESUME_GUEST;
+		break;
+	}
+	default:
+		WARN_ON(1);
+		r = RESUME_GUEST;
+	}
+
+	return r;
+}
+EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio);
+
+int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
+	      bool data)
+{
+	ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK;
+	struct kvmppc_pte pte;
+	int r = -EINVAL;
+
+	vcpu->stat.st++;
+
+	if (vcpu->kvm->arch.kvm_ops && vcpu->kvm->arch.kvm_ops->store_to_eaddr)
+		r = vcpu->kvm->arch.kvm_ops->store_to_eaddr(vcpu, eaddr, ptr,
+							    size);
+
+	if ((!r) || (r == -EAGAIN))
+		return r;
+
+	r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
+			 XLATE_WRITE, &pte);
+	if (r < 0)
+		return r;
+
+	*eaddr = pte.raddr;
+
+	if (!pte.may_write)
+		return -EPERM;
+
+	/* Magic page override */
+	if (kvmppc_supports_magic_page(vcpu) && mp_pa &&
+	    ((pte.raddr & KVM_PAM & PAGE_MASK) == mp_pa) &&
+	    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
+		void *magic = vcpu->arch.shared;
+		magic += pte.eaddr & 0xfff;
+		memcpy(magic, ptr, size);
+		return EMULATE_DONE;
+	}
+
+	if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size))
+		return EMULATE_DO_MMIO;
+
+	return EMULATE_DONE;
+}
+EXPORT_SYMBOL_GPL(kvmppc_st);
+
+int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
+		      bool data)
+{
+	ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK;
+	struct kvmppc_pte pte;
+	int rc = -EINVAL;
+
+	vcpu->stat.ld++;
+
+	if (vcpu->kvm->arch.kvm_ops && vcpu->kvm->arch.kvm_ops->load_from_eaddr)
+		rc = vcpu->kvm->arch.kvm_ops->load_from_eaddr(vcpu, eaddr, ptr,
+							      size);
+
+	if ((!rc) || (rc == -EAGAIN))
+		return rc;
+
+	rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
+			  XLATE_READ, &pte);
+	if (rc)
+		return rc;
+
+	*eaddr = pte.raddr;
+
+	if (!pte.may_read)
+		return -EPERM;
+
+	if (!data && !pte.may_execute)
+		return -ENOEXEC;
+
+	/* Magic page override */
+	if (kvmppc_supports_magic_page(vcpu) && mp_pa &&
+	    ((pte.raddr & KVM_PAM & PAGE_MASK) == mp_pa) &&
+	    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
+		void *magic = vcpu->arch.shared;
+		magic += pte.eaddr & 0xfff;
+		memcpy(ptr, magic, size);
+		return EMULATE_DONE;
+	}
+
+	kvm_vcpu_srcu_read_lock(vcpu);
+	rc = kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size);
+	kvm_vcpu_srcu_read_unlock(vcpu);
+	if (rc)
+		return EMULATE_DO_MMIO;
+
+	return EMULATE_DONE;
+}
+EXPORT_SYMBOL_GPL(kvmppc_ld);
+
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
+{
+	struct kvmppc_ops *kvm_ops = NULL;
+	int r;
+
+	/*
+	 * if we have both HV and PR enabled, default is HV
+	 */
+	if (type == 0) {
+		if (kvmppc_hv_ops)
+			kvm_ops = kvmppc_hv_ops;
+		else
+			kvm_ops = kvmppc_pr_ops;
+		if (!kvm_ops)
+			goto err_out;
+	} else	if (type == KVM_VM_PPC_HV) {
+		if (!kvmppc_hv_ops)
+			goto err_out;
+		kvm_ops = kvmppc_hv_ops;
+	} else if (type == KVM_VM_PPC_PR) {
+		if (!kvmppc_pr_ops)
+			goto err_out;
+		kvm_ops = kvmppc_pr_ops;
+	} else
+		goto err_out;
+
+	if (!try_module_get(kvm_ops->owner))
+		return -ENOENT;
+
+	kvm->arch.kvm_ops = kvm_ops;
+	r = kvmppc_core_init_vm(kvm);
+	if (r)
+		module_put(kvm_ops->owner);
+	return r;
+err_out:
+	return -EINVAL;
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_XICS
+	/*
+	 * We call kick_all_cpus_sync() to ensure that all
+	 * CPUs have executed any pending IPIs before we
+	 * continue and free VCPUs structures below.
+	 */
+	if (is_kvmppc_hv_enabled(kvm))
+		kick_all_cpus_sync();
+#endif
+
+	kvm_destroy_vcpus(kvm);
+
+	mutex_lock(&kvm->lock);
+
+	kvmppc_core_destroy_vm(kvm);
+
+	mutex_unlock(&kvm->lock);
+
+	/* drop the module reference */
+	module_put(kvm->arch.kvm_ops->owner);
+}
+
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
+{
+	int r;
+	/* Assume we're using HV mode when the HV module is loaded */
+	int hv_enabled = kvmppc_hv_ops ? 1 : 0;
+
+	if (kvm) {
+		/*
+		 * Hooray - we know which VM type we're running on. Depend on
+		 * that rather than the guess above.
+		 */
+		hv_enabled = is_kvmppc_hv_enabled(kvm);
+	}
+
+	switch (ext) {
+#ifdef CONFIG_BOOKE
+	case KVM_CAP_PPC_BOOKE_SREGS:
+	case KVM_CAP_PPC_BOOKE_WATCHDOG:
+	case KVM_CAP_PPC_EPR:
+#else
+	case KVM_CAP_PPC_SEGSTATE:
+	case KVM_CAP_PPC_HIOR:
+	case KVM_CAP_PPC_PAPR:
+#endif
+	case KVM_CAP_PPC_UNSET_IRQ:
+	case KVM_CAP_PPC_IRQ_LEVEL:
+	case KVM_CAP_ENABLE_CAP:
+	case KVM_CAP_ONE_REG:
+	case KVM_CAP_IOEVENTFD:
+	case KVM_CAP_DEVICE_CTRL:
+	case KVM_CAP_IMMEDIATE_EXIT:
+	case KVM_CAP_SET_GUEST_DEBUG:
+		r = 1;
+		break;
+	case KVM_CAP_PPC_GUEST_DEBUG_SSTEP:
+	case KVM_CAP_PPC_PAIRED_SINGLES:
+	case KVM_CAP_PPC_OSI:
+	case KVM_CAP_PPC_GET_PVINFO:
+#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
+	case KVM_CAP_SW_TLB:
+#endif
+		/* We support this only for PR */
+		r = !hv_enabled;
+		break;
+#ifdef CONFIG_KVM_MPIC
+	case KVM_CAP_IRQ_MPIC:
+		r = 1;
+		break;
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	case KVM_CAP_SPAPR_TCE:
+	case KVM_CAP_SPAPR_TCE_64:
+		r = 1;
+		break;
+	case KVM_CAP_SPAPR_TCE_VFIO:
+		r = !!cpu_has_feature(CPU_FTR_HVMODE);
+		break;
+	case KVM_CAP_PPC_RTAS:
+	case KVM_CAP_PPC_FIXUP_HCALL:
+	case KVM_CAP_PPC_ENABLE_HCALL:
+#ifdef CONFIG_KVM_XICS
+	case KVM_CAP_IRQ_XICS:
+#endif
+	case KVM_CAP_PPC_GET_CPU_CHAR:
+		r = 1;
+		break;
+#ifdef CONFIG_KVM_XIVE
+	case KVM_CAP_PPC_IRQ_XIVE:
+		/*
+		 * We need XIVE to be enabled on the platform (implies
+		 * a POWER9 processor) and the PowerNV platform, as
+		 * nested is not yet supported.
+		 */
+		r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE) &&
+			kvmppc_xive_native_supported();
+		break;
+#endif
+
+#ifdef CONFIG_HAVE_KVM_IRQFD
+	case KVM_CAP_IRQFD_RESAMPLE:
+		r = !xive_enabled();
+		break;
+#endif
+
+	case KVM_CAP_PPC_ALLOC_HTAB:
+		r = hv_enabled;
+		break;
+#endif /* CONFIG_PPC_BOOK3S_64 */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	case KVM_CAP_PPC_SMT:
+		r = 0;
+		if (kvm) {
+			if (kvm->arch.emul_smt_mode > 1)
+				r = kvm->arch.emul_smt_mode;
+			else
+				r = kvm->arch.smt_mode;
+		} else if (hv_enabled) {
+			if (cpu_has_feature(CPU_FTR_ARCH_300))
+				r = 1;
+			else
+				r = threads_per_subcore;
+		}
+		break;
+	case KVM_CAP_PPC_SMT_POSSIBLE:
+		r = 1;
+		if (hv_enabled) {
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				r = ((threads_per_subcore << 1) - 1);
+			else
+				/* P9 can emulate dbells, so allow any mode */
+				r = 8 | 4 | 2 | 1;
+		}
+		break;
+	case KVM_CAP_PPC_RMA:
+		r = 0;
+		break;
+	case KVM_CAP_PPC_HWRNG:
+		r = kvmppc_hwrng_present();
+		break;
+	case KVM_CAP_PPC_MMU_RADIX:
+		r = !!(hv_enabled && radix_enabled());
+		break;
+	case KVM_CAP_PPC_MMU_HASH_V3:
+		r = !!(hv_enabled && kvmppc_hv_ops->hash_v3_possible &&
+		       kvmppc_hv_ops->hash_v3_possible());
+		break;
+	case KVM_CAP_PPC_NESTED_HV:
+		r = !!(hv_enabled && kvmppc_hv_ops->enable_nested &&
+		       !kvmppc_hv_ops->enable_nested(NULL));
+		break;
+#endif
+	case KVM_CAP_SYNC_MMU:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+		r = hv_enabled;
+#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+		r = 1;
+#else
+		r = 0;
+#endif
+		break;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	case KVM_CAP_PPC_HTAB_FD:
+		r = hv_enabled;
+		break;
+#endif
+	case KVM_CAP_NR_VCPUS:
+		/*
+		 * Recommending a number of CPUs is somewhat arbitrary; we
+		 * return the number of present CPUs for -HV (since a host
+		 * will have secondary threads "offline"), and for other KVM
+		 * implementations just count online CPUs.
+		 */
+		if (hv_enabled)
+			r = min_t(unsigned int, num_present_cpus(), KVM_MAX_VCPUS);
+		else
+			r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
+		break;
+	case KVM_CAP_MAX_VCPUS:
+		r = KVM_MAX_VCPUS;
+		break;
+	case KVM_CAP_MAX_VCPU_ID:
+		r = KVM_MAX_VCPU_IDS;
+		break;
+#ifdef CONFIG_PPC_BOOK3S_64
+	case KVM_CAP_PPC_GET_SMMU_INFO:
+		r = 1;
+		break;
+	case KVM_CAP_SPAPR_MULTITCE:
+		r = 1;
+		break;
+	case KVM_CAP_SPAPR_RESIZE_HPT:
+		r = !!hv_enabled;
+		break;
+#endif
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	case KVM_CAP_PPC_FWNMI:
+		r = hv_enabled;
+		break;
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case KVM_CAP_PPC_HTM:
+		r = !!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM) ||
+		     (hv_enabled && cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST));
+		break;
+#endif
+#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+	case KVM_CAP_PPC_SECURE_GUEST:
+		r = hv_enabled && kvmppc_hv_ops->enable_svm &&
+			!kvmppc_hv_ops->enable_svm(NULL);
+		break;
+	case KVM_CAP_PPC_DAWR1:
+		r = !!(hv_enabled && kvmppc_hv_ops->enable_dawr1 &&
+		       !kvmppc_hv_ops->enable_dawr1(NULL));
+		break;
+	case KVM_CAP_PPC_RPT_INVALIDATE:
+		r = 1;
+		break;
+#endif
+	case KVM_CAP_PPC_AIL_MODE_3:
+		r = 0;
+		/*
+		 * KVM PR, POWER7, and some POWER9s don't support AIL=3 mode.
+		 * The POWER9s can support it if the guest runs in hash mode,
+		 * but QEMU doesn't necessarily query the capability in time.
+		 */
+		if (hv_enabled) {
+			if (kvmhv_on_pseries()) {
+				if (pseries_reloc_on_exception())
+					r = 1;
+			} else if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+				  !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
+				r = 1;
+			}
+		}
+		break;
+	default:
+		r = 0;
+		break;
+	}
+	return r;
+
+}
+
+long kvm_arch_dev_ioctl(struct file *filp,
+                        unsigned int ioctl, unsigned long arg)
+{
+	return -EINVAL;
+}
+
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+	kvmppc_core_free_memslot(kvm, slot);
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				   const struct kvm_memory_slot *old,
+				   struct kvm_memory_slot *new,
+				   enum kvm_mr_change change)
+{
+	return kvmppc_core_prepare_memory_region(kvm, old, new, change);
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+				   struct kvm_memory_slot *old,
+				   const struct kvm_memory_slot *new,
+				   enum kvm_mr_change change)
+{
+	kvmppc_core_commit_memory_region(kvm, old, new, change);
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot)
+{
+	kvmppc_core_flush_memslot(kvm, slot);
+}
+
+int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
+{
+	return 0;
+}
+
+static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
+{
+	struct kvm_vcpu *vcpu;
+
+	vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer);
+	kvmppc_decrementer_func(vcpu);
+
+	return HRTIMER_NORESTART;
+}
+
+int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
+{
+	int err;
+
+	hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+	vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+	mutex_init(&vcpu->arch.exit_timing_lock);
+#endif
+	err = kvmppc_subarch_vcpu_init(vcpu);
+	if (err)
+		return err;
+
+	err = kvmppc_core_vcpu_create(vcpu);
+	if (err)
+		goto out_vcpu_uninit;
+
+	rcuwait_init(&vcpu->arch.wait);
+	vcpu->arch.waitp = &vcpu->arch.wait;
+	return 0;
+
+out_vcpu_uninit:
+	kvmppc_subarch_vcpu_uninit(vcpu);
+	return err;
+}
+
+void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	/* Make sure we're not using the vcpu anymore */
+	hrtimer_cancel(&vcpu->arch.dec_timer);
+
+	switch (vcpu->arch.irq_type) {
+	case KVMPPC_IRQ_MPIC:
+		kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
+		break;
+	case KVMPPC_IRQ_XICS:
+		if (xics_on_xive())
+			kvmppc_xive_cleanup_vcpu(vcpu);
+		else
+			kvmppc_xics_free_icp(vcpu);
+		break;
+	case KVMPPC_IRQ_XIVE:
+		kvmppc_xive_native_cleanup_vcpu(vcpu);
+		break;
+	}
+
+	kvmppc_core_vcpu_free(vcpu);
+
+	kvmppc_subarch_vcpu_uninit(vcpu);
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+	return kvmppc_core_pending_dec(vcpu);
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+#ifdef CONFIG_BOOKE
+	/*
+	 * vrsave (formerly usprg0) isn't used by Linux, but may
+	 * be used by the guest.
+	 *
+	 * On non-booke this is associated with Altivec and
+	 * is handled by code in book3s.c.
+	 */
+	mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
+#endif
+	kvmppc_core_vcpu_load(vcpu, cpu);
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	kvmppc_core_vcpu_put(vcpu);
+#ifdef CONFIG_BOOKE
+	vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
+#endif
+}
+
+/*
+ * irq_bypass_add_producer and irq_bypass_del_producer are only
+ * useful if the architecture supports PCI passthrough.
+ * irq_bypass_stop and irq_bypass_start are not needed and so
+ * kvm_ops are not defined for them.
+ */
+bool kvm_arch_has_irq_bypass(void)
+{
+	return ((kvmppc_hv_ops && kvmppc_hv_ops->irq_bypass_add_producer) ||
+		(kvmppc_pr_ops && kvmppc_pr_ops->irq_bypass_add_producer));
+}
+
+int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
+				     struct irq_bypass_producer *prod)
+{
+	struct kvm_kernel_irqfd *irqfd =
+		container_of(cons, struct kvm_kernel_irqfd, consumer);
+	struct kvm *kvm = irqfd->kvm;
+
+	if (kvm->arch.kvm_ops->irq_bypass_add_producer)
+		return kvm->arch.kvm_ops->irq_bypass_add_producer(cons, prod);
+
+	return 0;
+}
+
+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
+				      struct irq_bypass_producer *prod)
+{
+	struct kvm_kernel_irqfd *irqfd =
+		container_of(cons, struct kvm_kernel_irqfd, consumer);
+	struct kvm *kvm = irqfd->kvm;
+
+	if (kvm->arch.kvm_ops->irq_bypass_del_producer)
+		kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod);
+}
+
+#ifdef CONFIG_VSX
+static inline int kvmppc_get_vsr_dword_offset(int index)
+{
+	int offset;
+
+	if ((index != 0) && (index != 1))
+		return -1;
+
+#ifdef __BIG_ENDIAN
+	offset =  index;
+#else
+	offset = 1 - index;
+#endif
+
+	return offset;
+}
+
+static inline int kvmppc_get_vsr_word_offset(int index)
+{
+	int offset;
+
+	if ((index > 3) || (index < 0))
+		return -1;
+
+#ifdef __BIG_ENDIAN
+	offset = index;
+#else
+	offset = 3 - index;
+#endif
+	return offset;
+}
+
+static inline void kvmppc_set_vsr_dword(struct kvm_vcpu *vcpu,
+	u64 gpr)
+{
+	union kvmppc_one_reg val;
+	int offset = kvmppc_get_vsr_dword_offset(vcpu->arch.mmio_vsx_offset);
+	int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+	if (offset == -1)
+		return;
+
+	if (index >= 32) {
+		val.vval = VCPU_VSX_VR(vcpu, index - 32);
+		val.vsxval[offset] = gpr;
+		VCPU_VSX_VR(vcpu, index - 32) = val.vval;
+	} else {
+		VCPU_VSX_FPR(vcpu, index, offset) = gpr;
+	}
+}
+
+static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu,
+	u64 gpr)
+{
+	union kvmppc_one_reg val;
+	int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+	if (index >= 32) {
+		val.vval = VCPU_VSX_VR(vcpu, index - 32);
+		val.vsxval[0] = gpr;
+		val.vsxval[1] = gpr;
+		VCPU_VSX_VR(vcpu, index - 32) = val.vval;
+	} else {
+		VCPU_VSX_FPR(vcpu, index, 0) = gpr;
+		VCPU_VSX_FPR(vcpu, index, 1) = gpr;
+	}
+}
+
+static inline void kvmppc_set_vsr_word_dump(struct kvm_vcpu *vcpu,
+	u32 gpr)
+{
+	union kvmppc_one_reg val;
+	int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+	if (index >= 32) {
+		val.vsx32val[0] = gpr;
+		val.vsx32val[1] = gpr;
+		val.vsx32val[2] = gpr;
+		val.vsx32val[3] = gpr;
+		VCPU_VSX_VR(vcpu, index - 32) = val.vval;
+	} else {
+		val.vsx32val[0] = gpr;
+		val.vsx32val[1] = gpr;
+		VCPU_VSX_FPR(vcpu, index, 0) = val.vsxval[0];
+		VCPU_VSX_FPR(vcpu, index, 1) = val.vsxval[0];
+	}
+}
+
+static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu,
+	u32 gpr32)
+{
+	union kvmppc_one_reg val;
+	int offset = kvmppc_get_vsr_word_offset(vcpu->arch.mmio_vsx_offset);
+	int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+	int dword_offset, word_offset;
+
+	if (offset == -1)
+		return;
+
+	if (index >= 32) {
+		val.vval = VCPU_VSX_VR(vcpu, index - 32);
+		val.vsx32val[offset] = gpr32;
+		VCPU_VSX_VR(vcpu, index - 32) = val.vval;
+	} else {
+		dword_offset = offset / 2;
+		word_offset = offset % 2;
+		val.vsxval[0] = VCPU_VSX_FPR(vcpu, index, dword_offset);
+		val.vsx32val[word_offset] = gpr32;
+		VCPU_VSX_FPR(vcpu, index, dword_offset) = val.vsxval[0];
+	}
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_ALTIVEC
+static inline int kvmppc_get_vmx_offset_generic(struct kvm_vcpu *vcpu,
+		int index, int element_size)
+{
+	int offset;
+	int elts = sizeof(vector128)/element_size;
+
+	if ((index < 0) || (index >= elts))
+		return -1;
+
+	if (kvmppc_need_byteswap(vcpu))
+		offset = elts - index - 1;
+	else
+		offset = index;
+
+	return offset;
+}
+
+static inline int kvmppc_get_vmx_dword_offset(struct kvm_vcpu *vcpu,
+		int index)
+{
+	return kvmppc_get_vmx_offset_generic(vcpu, index, 8);
+}
+
+static inline int kvmppc_get_vmx_word_offset(struct kvm_vcpu *vcpu,
+		int index)
+{
+	return kvmppc_get_vmx_offset_generic(vcpu, index, 4);
+}
+
+static inline int kvmppc_get_vmx_hword_offset(struct kvm_vcpu *vcpu,
+		int index)
+{
+	return kvmppc_get_vmx_offset_generic(vcpu, index, 2);
+}
+
+static inline int kvmppc_get_vmx_byte_offset(struct kvm_vcpu *vcpu,
+		int index)
+{
+	return kvmppc_get_vmx_offset_generic(vcpu, index, 1);
+}
+
+
+static inline void kvmppc_set_vmx_dword(struct kvm_vcpu *vcpu,
+	u64 gpr)
+{
+	union kvmppc_one_reg val;
+	int offset = kvmppc_get_vmx_dword_offset(vcpu,
+			vcpu->arch.mmio_vmx_offset);
+	int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+	if (offset == -1)
+		return;
+
+	val.vval = VCPU_VSX_VR(vcpu, index);
+	val.vsxval[offset] = gpr;
+	VCPU_VSX_VR(vcpu, index) = val.vval;
+}
+
+static inline void kvmppc_set_vmx_word(struct kvm_vcpu *vcpu,
+	u32 gpr32)
+{
+	union kvmppc_one_reg val;
+	int offset = kvmppc_get_vmx_word_offset(vcpu,
+			vcpu->arch.mmio_vmx_offset);
+	int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+	if (offset == -1)
+		return;
+
+	val.vval = VCPU_VSX_VR(vcpu, index);
+	val.vsx32val[offset] = gpr32;
+	VCPU_VSX_VR(vcpu, index) = val.vval;
+}
+
+static inline void kvmppc_set_vmx_hword(struct kvm_vcpu *vcpu,
+	u16 gpr16)
+{
+	union kvmppc_one_reg val;
+	int offset = kvmppc_get_vmx_hword_offset(vcpu,
+			vcpu->arch.mmio_vmx_offset);
+	int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+	if (offset == -1)
+		return;
+
+	val.vval = VCPU_VSX_VR(vcpu, index);
+	val.vsx16val[offset] = gpr16;
+	VCPU_VSX_VR(vcpu, index) = val.vval;
+}
+
+static inline void kvmppc_set_vmx_byte(struct kvm_vcpu *vcpu,
+	u8 gpr8)
+{
+	union kvmppc_one_reg val;
+	int offset = kvmppc_get_vmx_byte_offset(vcpu,
+			vcpu->arch.mmio_vmx_offset);
+	int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+	if (offset == -1)
+		return;
+
+	val.vval = VCPU_VSX_VR(vcpu, index);
+	val.vsx8val[offset] = gpr8;
+	VCPU_VSX_VR(vcpu, index) = val.vval;
+}
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_PPC_FPU
+static inline u64 sp_to_dp(u32 fprs)
+{
+	u64 fprd;
+
+	preempt_disable();
+	enable_kernel_fp();
+	asm ("lfs%U1%X1 0,%1; stfd%U0%X0 0,%0" : "=m<>" (fprd) : "m<>" (fprs)
+	     : "fr0");
+	preempt_enable();
+	return fprd;
+}
+
+static inline u32 dp_to_sp(u64 fprd)
+{
+	u32 fprs;
+
+	preempt_disable();
+	enable_kernel_fp();
+	asm ("lfd%U1%X1 0,%1; stfs%U0%X0 0,%0" : "=m<>" (fprs) : "m<>" (fprd)
+	     : "fr0");
+	preempt_enable();
+	return fprs;
+}
+
+#else
+#define sp_to_dp(x)	(x)
+#define dp_to_sp(x)	(x)
+#endif /* CONFIG_PPC_FPU */
+
+static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	u64 gpr;
+
+	if (run->mmio.len > sizeof(gpr))
+		return;
+
+	if (!vcpu->arch.mmio_host_swabbed) {
+		switch (run->mmio.len) {
+		case 8: gpr = *(u64 *)run->mmio.data; break;
+		case 4: gpr = *(u32 *)run->mmio.data; break;
+		case 2: gpr = *(u16 *)run->mmio.data; break;
+		case 1: gpr = *(u8 *)run->mmio.data; break;
+		}
+	} else {
+		switch (run->mmio.len) {
+		case 8: gpr = swab64(*(u64 *)run->mmio.data); break;
+		case 4: gpr = swab32(*(u32 *)run->mmio.data); break;
+		case 2: gpr = swab16(*(u16 *)run->mmio.data); break;
+		case 1: gpr = *(u8 *)run->mmio.data; break;
+		}
+	}
+
+	/* conversion between single and double precision */
+	if ((vcpu->arch.mmio_sp64_extend) && (run->mmio.len == 4))
+		gpr = sp_to_dp(gpr);
+
+	if (vcpu->arch.mmio_sign_extend) {
+		switch (run->mmio.len) {
+#ifdef CONFIG_PPC64
+		case 4:
+			gpr = (s64)(s32)gpr;
+			break;
+#endif
+		case 2:
+			gpr = (s64)(s16)gpr;
+			break;
+		case 1:
+			gpr = (s64)(s8)gpr;
+			break;
+		}
+	}
+
+	switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) {
+	case KVM_MMIO_REG_GPR:
+		kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
+		break;
+	case KVM_MMIO_REG_FPR:
+		if (vcpu->kvm->arch.kvm_ops->giveup_ext)
+			vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_FP);
+
+		VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr;
+		break;
+#ifdef CONFIG_PPC_BOOK3S
+	case KVM_MMIO_REG_QPR:
+		vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
+		break;
+	case KVM_MMIO_REG_FQPR:
+		VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr;
+		vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
+		break;
+#endif
+#ifdef CONFIG_VSX
+	case KVM_MMIO_REG_VSX:
+		if (vcpu->kvm->arch.kvm_ops->giveup_ext)
+			vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_VSX);
+
+		if (vcpu->arch.mmio_copy_type == KVMPPC_VSX_COPY_DWORD)
+			kvmppc_set_vsr_dword(vcpu, gpr);
+		else if (vcpu->arch.mmio_copy_type == KVMPPC_VSX_COPY_WORD)
+			kvmppc_set_vsr_word(vcpu, gpr);
+		else if (vcpu->arch.mmio_copy_type ==
+				KVMPPC_VSX_COPY_DWORD_LOAD_DUMP)
+			kvmppc_set_vsr_dword_dump(vcpu, gpr);
+		else if (vcpu->arch.mmio_copy_type ==
+				KVMPPC_VSX_COPY_WORD_LOAD_DUMP)
+			kvmppc_set_vsr_word_dump(vcpu, gpr);
+		break;
+#endif
+#ifdef CONFIG_ALTIVEC
+	case KVM_MMIO_REG_VMX:
+		if (vcpu->kvm->arch.kvm_ops->giveup_ext)
+			vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_VEC);
+
+		if (vcpu->arch.mmio_copy_type == KVMPPC_VMX_COPY_DWORD)
+			kvmppc_set_vmx_dword(vcpu, gpr);
+		else if (vcpu->arch.mmio_copy_type == KVMPPC_VMX_COPY_WORD)
+			kvmppc_set_vmx_word(vcpu, gpr);
+		else if (vcpu->arch.mmio_copy_type ==
+				KVMPPC_VMX_COPY_HWORD)
+			kvmppc_set_vmx_hword(vcpu, gpr);
+		else if (vcpu->arch.mmio_copy_type ==
+				KVMPPC_VMX_COPY_BYTE)
+			kvmppc_set_vmx_byte(vcpu, gpr);
+		break;
+#endif
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	case KVM_MMIO_REG_NESTED_GPR:
+		if (kvmppc_need_byteswap(vcpu))
+			gpr = swab64(gpr);
+		kvm_vcpu_write_guest(vcpu, vcpu->arch.nested_io_gpr, &gpr,
+				     sizeof(gpr));
+		break;
+#endif
+	default:
+		BUG();
+	}
+}
+
+static int __kvmppc_handle_load(struct kvm_vcpu *vcpu,
+				unsigned int rt, unsigned int bytes,
+				int is_default_endian, int sign_extend)
+{
+	struct kvm_run *run = vcpu->run;
+	int idx, ret;
+	bool host_swabbed;
+
+	/* Pity C doesn't have a logical XOR operator */
+	if (kvmppc_need_byteswap(vcpu)) {
+		host_swabbed = is_default_endian;
+	} else {
+		host_swabbed = !is_default_endian;
+	}
+
+	if (bytes > sizeof(run->mmio.data))
+		return EMULATE_FAIL;
+
+	run->mmio.phys_addr = vcpu->arch.paddr_accessed;
+	run->mmio.len = bytes;
+	run->mmio.is_write = 0;
+
+	vcpu->arch.io_gpr = rt;
+	vcpu->arch.mmio_host_swabbed = host_swabbed;
+	vcpu->mmio_needed = 1;
+	vcpu->mmio_is_write = 0;
+	vcpu->arch.mmio_sign_extend = sign_extend;
+
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+	ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr,
+			      bytes, &run->mmio.data);
+
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+	if (!ret) {
+		kvmppc_complete_mmio_load(vcpu);
+		vcpu->mmio_needed = 0;
+		return EMULATE_DONE;
+	}
+
+	return EMULATE_DO_MMIO;
+}
+
+int kvmppc_handle_load(struct kvm_vcpu *vcpu,
+		       unsigned int rt, unsigned int bytes,
+		       int is_default_endian)
+{
+	return __kvmppc_handle_load(vcpu, rt, bytes, is_default_endian, 0);
+}
+EXPORT_SYMBOL_GPL(kvmppc_handle_load);
+
+/* Same as above, but sign extends */
+int kvmppc_handle_loads(struct kvm_vcpu *vcpu,
+			unsigned int rt, unsigned int bytes,
+			int is_default_endian)
+{
+	return __kvmppc_handle_load(vcpu, rt, bytes, is_default_endian, 1);
+}
+
+#ifdef CONFIG_VSX
+int kvmppc_handle_vsx_load(struct kvm_vcpu *vcpu,
+			unsigned int rt, unsigned int bytes,
+			int is_default_endian, int mmio_sign_extend)
+{
+	enum emulation_result emulated = EMULATE_DONE;
+
+	/* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
+	if (vcpu->arch.mmio_vsx_copy_nums > 4)
+		return EMULATE_FAIL;
+
+	while (vcpu->arch.mmio_vsx_copy_nums) {
+		emulated = __kvmppc_handle_load(vcpu, rt, bytes,
+			is_default_endian, mmio_sign_extend);
+
+		if (emulated != EMULATE_DONE)
+			break;
+
+		vcpu->arch.paddr_accessed += vcpu->run->mmio.len;
+
+		vcpu->arch.mmio_vsx_copy_nums--;
+		vcpu->arch.mmio_vsx_offset++;
+	}
+	return emulated;
+}
+#endif /* CONFIG_VSX */
+
+int kvmppc_handle_store(struct kvm_vcpu *vcpu,
+			u64 val, unsigned int bytes, int is_default_endian)
+{
+	struct kvm_run *run = vcpu->run;
+	void *data = run->mmio.data;
+	int idx, ret;
+	bool host_swabbed;
+
+	/* Pity C doesn't have a logical XOR operator */
+	if (kvmppc_need_byteswap(vcpu)) {
+		host_swabbed = is_default_endian;
+	} else {
+		host_swabbed = !is_default_endian;
+	}
+
+	if (bytes > sizeof(run->mmio.data))
+		return EMULATE_FAIL;
+
+	run->mmio.phys_addr = vcpu->arch.paddr_accessed;
+	run->mmio.len = bytes;
+	run->mmio.is_write = 1;
+	vcpu->mmio_needed = 1;
+	vcpu->mmio_is_write = 1;
+
+	if ((vcpu->arch.mmio_sp64_extend) && (bytes == 4))
+		val = dp_to_sp(val);
+
+	/* Store the value at the lowest bytes in 'data'. */
+	if (!host_swabbed) {
+		switch (bytes) {
+		case 8: *(u64 *)data = val; break;
+		case 4: *(u32 *)data = val; break;
+		case 2: *(u16 *)data = val; break;
+		case 1: *(u8  *)data = val; break;
+		}
+	} else {
+		switch (bytes) {
+		case 8: *(u64 *)data = swab64(val); break;
+		case 4: *(u32 *)data = swab32(val); break;
+		case 2: *(u16 *)data = swab16(val); break;
+		case 1: *(u8  *)data = val; break;
+		}
+	}
+
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+	ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr,
+			       bytes, &run->mmio.data);
+
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+	if (!ret) {
+		vcpu->mmio_needed = 0;
+		return EMULATE_DONE;
+	}
+
+	return EMULATE_DO_MMIO;
+}
+EXPORT_SYMBOL_GPL(kvmppc_handle_store);
+
+#ifdef CONFIG_VSX
+static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
+{
+	u32 dword_offset, word_offset;
+	union kvmppc_one_reg reg;
+	int vsx_offset = 0;
+	int copy_type = vcpu->arch.mmio_copy_type;
+	int result = 0;
+
+	switch (copy_type) {
+	case KVMPPC_VSX_COPY_DWORD:
+		vsx_offset =
+			kvmppc_get_vsr_dword_offset(vcpu->arch.mmio_vsx_offset);
+
+		if (vsx_offset == -1) {
+			result = -1;
+			break;
+		}
+
+		if (rs < 32) {
+			*val = VCPU_VSX_FPR(vcpu, rs, vsx_offset);
+		} else {
+			reg.vval = VCPU_VSX_VR(vcpu, rs - 32);
+			*val = reg.vsxval[vsx_offset];
+		}
+		break;
+
+	case KVMPPC_VSX_COPY_WORD:
+		vsx_offset =
+			kvmppc_get_vsr_word_offset(vcpu->arch.mmio_vsx_offset);
+
+		if (vsx_offset == -1) {
+			result = -1;
+			break;
+		}
+
+		if (rs < 32) {
+			dword_offset = vsx_offset / 2;
+			word_offset = vsx_offset % 2;
+			reg.vsxval[0] = VCPU_VSX_FPR(vcpu, rs, dword_offset);
+			*val = reg.vsx32val[word_offset];
+		} else {
+			reg.vval = VCPU_VSX_VR(vcpu, rs - 32);
+			*val = reg.vsx32val[vsx_offset];
+		}
+		break;
+
+	default:
+		result = -1;
+		break;
+	}
+
+	return result;
+}
+
+int kvmppc_handle_vsx_store(struct kvm_vcpu *vcpu,
+			int rs, unsigned int bytes, int is_default_endian)
+{
+	u64 val;
+	enum emulation_result emulated = EMULATE_DONE;
+
+	vcpu->arch.io_gpr = rs;
+
+	/* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
+	if (vcpu->arch.mmio_vsx_copy_nums > 4)
+		return EMULATE_FAIL;
+
+	while (vcpu->arch.mmio_vsx_copy_nums) {
+		if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1)
+			return EMULATE_FAIL;
+
+		emulated = kvmppc_handle_store(vcpu,
+			 val, bytes, is_default_endian);
+
+		if (emulated != EMULATE_DONE)
+			break;
+
+		vcpu->arch.paddr_accessed += vcpu->run->mmio.len;
+
+		vcpu->arch.mmio_vsx_copy_nums--;
+		vcpu->arch.mmio_vsx_offset++;
+	}
+
+	return emulated;
+}
+
+static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	enum emulation_result emulated = EMULATE_FAIL;
+	int r;
+
+	vcpu->arch.paddr_accessed += run->mmio.len;
+
+	if (!vcpu->mmio_is_write) {
+		emulated = kvmppc_handle_vsx_load(vcpu, vcpu->arch.io_gpr,
+			 run->mmio.len, 1, vcpu->arch.mmio_sign_extend);
+	} else {
+		emulated = kvmppc_handle_vsx_store(vcpu,
+			 vcpu->arch.io_gpr, run->mmio.len, 1);
+	}
+
+	switch (emulated) {
+	case EMULATE_DO_MMIO:
+		run->exit_reason = KVM_EXIT_MMIO;
+		r = RESUME_HOST;
+		break;
+	case EMULATE_FAIL:
+		pr_info("KVM: MMIO emulation failed (VSX repeat)\n");
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+		r = RESUME_HOST;
+		break;
+	default:
+		r = RESUME_GUEST;
+		break;
+	}
+	return r;
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_ALTIVEC
+int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu,
+		unsigned int rt, unsigned int bytes, int is_default_endian)
+{
+	enum emulation_result emulated = EMULATE_DONE;
+
+	if (vcpu->arch.mmio_vmx_copy_nums > 2)
+		return EMULATE_FAIL;
+
+	while (vcpu->arch.mmio_vmx_copy_nums) {
+		emulated = __kvmppc_handle_load(vcpu, rt, bytes,
+				is_default_endian, 0);
+
+		if (emulated != EMULATE_DONE)
+			break;
+
+		vcpu->arch.paddr_accessed += vcpu->run->mmio.len;
+		vcpu->arch.mmio_vmx_copy_nums--;
+		vcpu->arch.mmio_vmx_offset++;
+	}
+
+	return emulated;
+}
+
+static int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val)
+{
+	union kvmppc_one_reg reg;
+	int vmx_offset = 0;
+	int result = 0;
+
+	vmx_offset =
+		kvmppc_get_vmx_dword_offset(vcpu, vcpu->arch.mmio_vmx_offset);
+
+	if (vmx_offset == -1)
+		return -1;
+
+	reg.vval = VCPU_VSX_VR(vcpu, index);
+	*val = reg.vsxval[vmx_offset];
+
+	return result;
+}
+
+static int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val)
+{
+	union kvmppc_one_reg reg;
+	int vmx_offset = 0;
+	int result = 0;
+
+	vmx_offset =
+		kvmppc_get_vmx_word_offset(vcpu, vcpu->arch.mmio_vmx_offset);
+
+	if (vmx_offset == -1)
+		return -1;
+
+	reg.vval = VCPU_VSX_VR(vcpu, index);
+	*val = reg.vsx32val[vmx_offset];
+
+	return result;
+}
+
+static int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val)
+{
+	union kvmppc_one_reg reg;
+	int vmx_offset = 0;
+	int result = 0;
+
+	vmx_offset =
+		kvmppc_get_vmx_hword_offset(vcpu, vcpu->arch.mmio_vmx_offset);
+
+	if (vmx_offset == -1)
+		return -1;
+
+	reg.vval = VCPU_VSX_VR(vcpu, index);
+	*val = reg.vsx16val[vmx_offset];
+
+	return result;
+}
+
+static int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val)
+{
+	union kvmppc_one_reg reg;
+	int vmx_offset = 0;
+	int result = 0;
+
+	vmx_offset =
+		kvmppc_get_vmx_byte_offset(vcpu, vcpu->arch.mmio_vmx_offset);
+
+	if (vmx_offset == -1)
+		return -1;
+
+	reg.vval = VCPU_VSX_VR(vcpu, index);
+	*val = reg.vsx8val[vmx_offset];
+
+	return result;
+}
+
+int kvmppc_handle_vmx_store(struct kvm_vcpu *vcpu,
+		unsigned int rs, unsigned int bytes, int is_default_endian)
+{
+	u64 val = 0;
+	unsigned int index = rs & KVM_MMIO_REG_MASK;
+	enum emulation_result emulated = EMULATE_DONE;
+
+	if (vcpu->arch.mmio_vmx_copy_nums > 2)
+		return EMULATE_FAIL;
+
+	vcpu->arch.io_gpr = rs;
+
+	while (vcpu->arch.mmio_vmx_copy_nums) {
+		switch (vcpu->arch.mmio_copy_type) {
+		case KVMPPC_VMX_COPY_DWORD:
+			if (kvmppc_get_vmx_dword(vcpu, index, &val) == -1)
+				return EMULATE_FAIL;
+
+			break;
+		case KVMPPC_VMX_COPY_WORD:
+			if (kvmppc_get_vmx_word(vcpu, index, &val) == -1)
+				return EMULATE_FAIL;
+			break;
+		case KVMPPC_VMX_COPY_HWORD:
+			if (kvmppc_get_vmx_hword(vcpu, index, &val) == -1)
+				return EMULATE_FAIL;
+			break;
+		case KVMPPC_VMX_COPY_BYTE:
+			if (kvmppc_get_vmx_byte(vcpu, index, &val) == -1)
+				return EMULATE_FAIL;
+			break;
+		default:
+			return EMULATE_FAIL;
+		}
+
+		emulated = kvmppc_handle_store(vcpu, val, bytes,
+				is_default_endian);
+		if (emulated != EMULATE_DONE)
+			break;
+
+		vcpu->arch.paddr_accessed += vcpu->run->mmio.len;
+		vcpu->arch.mmio_vmx_copy_nums--;
+		vcpu->arch.mmio_vmx_offset++;
+	}
+
+	return emulated;
+}
+
+static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	enum emulation_result emulated = EMULATE_FAIL;
+	int r;
+
+	vcpu->arch.paddr_accessed += run->mmio.len;
+
+	if (!vcpu->mmio_is_write) {
+		emulated = kvmppc_handle_vmx_load(vcpu,
+				vcpu->arch.io_gpr, run->mmio.len, 1);
+	} else {
+		emulated = kvmppc_handle_vmx_store(vcpu,
+				vcpu->arch.io_gpr, run->mmio.len, 1);
+	}
+
+	switch (emulated) {
+	case EMULATE_DO_MMIO:
+		run->exit_reason = KVM_EXIT_MMIO;
+		r = RESUME_HOST;
+		break;
+	case EMULATE_FAIL:
+		pr_info("KVM: MMIO emulation failed (VMX repeat)\n");
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+		r = RESUME_HOST;
+		break;
+	default:
+		r = RESUME_GUEST;
+		break;
+	}
+	return r;
+}
+#endif /* CONFIG_ALTIVEC */
+
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+	int r = 0;
+	union kvmppc_one_reg val;
+	int size;
+
+	size = one_reg_size(reg->id);
+	if (size > sizeof(val))
+		return -EINVAL;
+
+	r = kvmppc_get_one_reg(vcpu, reg->id, &val);
+	if (r == -EINVAL) {
+		r = 0;
+		switch (reg->id) {
+#ifdef CONFIG_ALTIVEC
+		case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
+			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+				r = -ENXIO;
+				break;
+			}
+			val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
+			break;
+		case KVM_REG_PPC_VSCR:
+			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+				r = -ENXIO;
+				break;
+			}
+			val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+			break;
+		case KVM_REG_PPC_VRSAVE:
+			val = get_reg_val(reg->id, vcpu->arch.vrsave);
+			break;
+#endif /* CONFIG_ALTIVEC */
+		default:
+			r = -EINVAL;
+			break;
+		}
+	}
+
+	if (r)
+		return r;
+
+	if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
+		r = -EFAULT;
+
+	return r;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+	int r;
+	union kvmppc_one_reg val;
+	int size;
+
+	size = one_reg_size(reg->id);
+	if (size > sizeof(val))
+		return -EINVAL;
+
+	if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
+		return -EFAULT;
+
+	r = kvmppc_set_one_reg(vcpu, reg->id, &val);
+	if (r == -EINVAL) {
+		r = 0;
+		switch (reg->id) {
+#ifdef CONFIG_ALTIVEC
+		case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
+			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+				r = -ENXIO;
+				break;
+			}
+			vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+			break;
+		case KVM_REG_PPC_VSCR:
+			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+				r = -ENXIO;
+				break;
+			}
+			vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
+			break;
+		case KVM_REG_PPC_VRSAVE:
+			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+				r = -ENXIO;
+				break;
+			}
+			vcpu->arch.vrsave = set_reg_val(reg->id, val);
+			break;
+#endif /* CONFIG_ALTIVEC */
+		default:
+			r = -EINVAL;
+			break;
+		}
+	}
+
+	return r;
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	int r;
+
+	vcpu_load(vcpu);
+
+	if (vcpu->mmio_needed) {
+		vcpu->mmio_needed = 0;
+		if (!vcpu->mmio_is_write)
+			kvmppc_complete_mmio_load(vcpu);
+#ifdef CONFIG_VSX
+		if (vcpu->arch.mmio_vsx_copy_nums > 0) {
+			vcpu->arch.mmio_vsx_copy_nums--;
+			vcpu->arch.mmio_vsx_offset++;
+		}
+
+		if (vcpu->arch.mmio_vsx_copy_nums > 0) {
+			r = kvmppc_emulate_mmio_vsx_loadstore(vcpu);
+			if (r == RESUME_HOST) {
+				vcpu->mmio_needed = 1;
+				goto out;
+			}
+		}
+#endif
+#ifdef CONFIG_ALTIVEC
+		if (vcpu->arch.mmio_vmx_copy_nums > 0) {
+			vcpu->arch.mmio_vmx_copy_nums--;
+			vcpu->arch.mmio_vmx_offset++;
+		}
+
+		if (vcpu->arch.mmio_vmx_copy_nums > 0) {
+			r = kvmppc_emulate_mmio_vmx_loadstore(vcpu);
+			if (r == RESUME_HOST) {
+				vcpu->mmio_needed = 1;
+				goto out;
+			}
+		}
+#endif
+	} else if (vcpu->arch.osi_needed) {
+		u64 *gprs = run->osi.gprs;
+		int i;
+
+		for (i = 0; i < 32; i++)
+			kvmppc_set_gpr(vcpu, i, gprs[i]);
+		vcpu->arch.osi_needed = 0;
+	} else if (vcpu->arch.hcall_needed) {
+		int i;
+
+		kvmppc_set_gpr(vcpu, 3, run->papr_hcall.ret);
+		for (i = 0; i < 9; ++i)
+			kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]);
+		vcpu->arch.hcall_needed = 0;
+#ifdef CONFIG_BOOKE
+	} else if (vcpu->arch.epr_needed) {
+		kvmppc_set_epr(vcpu, run->epr.epr);
+		vcpu->arch.epr_needed = 0;
+#endif
+	}
+
+	kvm_sigset_activate(vcpu);
+
+	if (run->immediate_exit)
+		r = -EINTR;
+	else
+		r = kvmppc_vcpu_run(vcpu);
+
+	kvm_sigset_deactivate(vcpu);
+
+#ifdef CONFIG_ALTIVEC
+out:
+#endif
+
+	/*
+	 * We're already returning to userspace, don't pass the
+	 * RESUME_HOST flags along.
+	 */
+	if (r > 0)
+		r = 0;
+
+	vcpu_put(vcpu);
+	return r;
+}
+
+int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
+{
+	if (irq->irq == KVM_INTERRUPT_UNSET) {
+		kvmppc_core_dequeue_external(vcpu);
+		return 0;
+	}
+
+	kvmppc_core_queue_external(vcpu, irq);
+
+	kvm_vcpu_kick(vcpu);
+
+	return 0;
+}
+
+static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
+				     struct kvm_enable_cap *cap)
+{
+	int r;
+
+	if (cap->flags)
+		return -EINVAL;
+
+	switch (cap->cap) {
+	case KVM_CAP_PPC_OSI:
+		r = 0;
+		vcpu->arch.osi_enabled = true;
+		break;
+	case KVM_CAP_PPC_PAPR:
+		r = 0;
+		vcpu->arch.papr_enabled = true;
+		break;
+	case KVM_CAP_PPC_EPR:
+		r = 0;
+		if (cap->args[0])
+			vcpu->arch.epr_flags |= KVMPPC_EPR_USER;
+		else
+			vcpu->arch.epr_flags &= ~KVMPPC_EPR_USER;
+		break;
+#ifdef CONFIG_BOOKE
+	case KVM_CAP_PPC_BOOKE_WATCHDOG:
+		r = 0;
+		vcpu->arch.watchdog_enabled = true;
+		break;
+#endif
+#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
+	case KVM_CAP_SW_TLB: {
+		struct kvm_config_tlb cfg;
+		void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0];
+
+		r = -EFAULT;
+		if (copy_from_user(&cfg, user_ptr, sizeof(cfg)))
+			break;
+
+		r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg);
+		break;
+	}
+#endif
+#ifdef CONFIG_KVM_MPIC
+	case KVM_CAP_IRQ_MPIC: {
+		struct fd f;
+		struct kvm_device *dev;
+
+		r = -EBADF;
+		f = fdget(cap->args[0]);
+		if (!f.file)
+			break;
+
+		r = -EPERM;
+		dev = kvm_device_from_filp(f.file);
+		if (dev)
+			r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]);
+
+		fdput(f);
+		break;
+	}
+#endif
+#ifdef CONFIG_KVM_XICS
+	case KVM_CAP_IRQ_XICS: {
+		struct fd f;
+		struct kvm_device *dev;
+
+		r = -EBADF;
+		f = fdget(cap->args[0]);
+		if (!f.file)
+			break;
+
+		r = -EPERM;
+		dev = kvm_device_from_filp(f.file);
+		if (dev) {
+			if (xics_on_xive())
+				r = kvmppc_xive_connect_vcpu(dev, vcpu, cap->args[1]);
+			else
+				r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
+		}
+
+		fdput(f);
+		break;
+	}
+#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+	case KVM_CAP_PPC_IRQ_XIVE: {
+		struct fd f;
+		struct kvm_device *dev;
+
+		r = -EBADF;
+		f = fdget(cap->args[0]);
+		if (!f.file)
+			break;
+
+		r = -ENXIO;
+		if (!xive_enabled())
+			break;
+
+		r = -EPERM;
+		dev = kvm_device_from_filp(f.file);
+		if (dev)
+			r = kvmppc_xive_native_connect_vcpu(dev, vcpu,
+							    cap->args[1]);
+
+		fdput(f);
+		break;
+	}
+#endif /* CONFIG_KVM_XIVE */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	case KVM_CAP_PPC_FWNMI:
+		r = -EINVAL;
+		if (!is_kvmppc_hv_enabled(vcpu->kvm))
+			break;
+		r = 0;
+		vcpu->kvm->arch.fwnmi_enabled = true;
+		break;
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	if (!r)
+		r = kvmppc_sanity_check(vcpu);
+
+	return r;
+}
+
+bool kvm_arch_intc_initialized(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_MPIC
+	if (kvm->arch.mpic)
+		return true;
+#endif
+#ifdef CONFIG_KVM_XICS
+	if (kvm->arch.xics || kvm->arch.xive)
+		return true;
+#endif
+	return false;
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+                                    struct kvm_mp_state *mp_state)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+                                    struct kvm_mp_state *mp_state)
+{
+	return -EINVAL;
+}
+
+long kvm_arch_vcpu_async_ioctl(struct file *filp,
+			       unsigned int ioctl, unsigned long arg)
+{
+	struct kvm_vcpu *vcpu = filp->private_data;
+	void __user *argp = (void __user *)arg;
+
+	if (ioctl == KVM_INTERRUPT) {
+		struct kvm_interrupt irq;
+		if (copy_from_user(&irq, argp, sizeof(irq)))
+			return -EFAULT;
+		return kvm_vcpu_ioctl_interrupt(vcpu, &irq);
+	}
+	return -ENOIOCTLCMD;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+                         unsigned int ioctl, unsigned long arg)
+{
+	struct kvm_vcpu *vcpu = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	long r;
+
+	switch (ioctl) {
+	case KVM_ENABLE_CAP:
+	{
+		struct kvm_enable_cap cap;
+		r = -EFAULT;
+		if (copy_from_user(&cap, argp, sizeof(cap)))
+			goto out;
+		vcpu_load(vcpu);
+		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+		vcpu_put(vcpu);
+		break;
+	}
+
+	case KVM_SET_ONE_REG:
+	case KVM_GET_ONE_REG:
+	{
+		struct kvm_one_reg reg;
+		r = -EFAULT;
+		if (copy_from_user(&reg, argp, sizeof(reg)))
+			goto out;
+		if (ioctl == KVM_SET_ONE_REG)
+			r = kvm_vcpu_ioctl_set_one_reg(vcpu, &reg);
+		else
+			r = kvm_vcpu_ioctl_get_one_reg(vcpu, &reg);
+		break;
+	}
+
+#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
+	case KVM_DIRTY_TLB: {
+		struct kvm_dirty_tlb dirty;
+		r = -EFAULT;
+		if (copy_from_user(&dirty, argp, sizeof(dirty)))
+			goto out;
+		vcpu_load(vcpu);
+		r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty);
+		vcpu_put(vcpu);
+		break;
+	}
+#endif
+	default:
+		r = -EINVAL;
+	}
+
+out:
+	return r;
+}
+
+vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+	return VM_FAULT_SIGBUS;
+}
+
+static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
+{
+	u32 inst_nop = 0x60000000;
+#ifdef CONFIG_KVM_BOOKE_HV
+	u32 inst_sc1 = 0x44000022;
+	pvinfo->hcall[0] = cpu_to_be32(inst_sc1);
+	pvinfo->hcall[1] = cpu_to_be32(inst_nop);
+	pvinfo->hcall[2] = cpu_to_be32(inst_nop);
+	pvinfo->hcall[3] = cpu_to_be32(inst_nop);
+#else
+	u32 inst_lis = 0x3c000000;
+	u32 inst_ori = 0x60000000;
+	u32 inst_sc = 0x44000002;
+	u32 inst_imm_mask = 0xffff;
+
+	/*
+	 * The hypercall to get into KVM from within guest context is as
+	 * follows:
+	 *
+	 *    lis r0, r0, KVM_SC_MAGIC_R0@h
+	 *    ori r0, KVM_SC_MAGIC_R0@l
+	 *    sc
+	 *    nop
+	 */
+	pvinfo->hcall[0] = cpu_to_be32(inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask));
+	pvinfo->hcall[1] = cpu_to_be32(inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask));
+	pvinfo->hcall[2] = cpu_to_be32(inst_sc);
+	pvinfo->hcall[3] = cpu_to_be32(inst_nop);
+#endif
+
+	pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE;
+
+	return 0;
+}
+
+bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
+{
+	int ret = 0;
+
+#ifdef CONFIG_KVM_MPIC
+	ret = ret || (kvm->arch.mpic != NULL);
+#endif
+#ifdef CONFIG_KVM_XICS
+	ret = ret || (kvm->arch.xics != NULL);
+	ret = ret || (kvm->arch.xive != NULL);
+#endif
+	smp_rmb();
+	return ret;
+}
+
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
+			  bool line_status)
+{
+	if (!kvm_arch_irqchip_in_kernel(kvm))
+		return -ENXIO;
+
+	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+					irq_event->irq, irq_event->level,
+					line_status);
+	return 0;
+}
+
+
+int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+			    struct kvm_enable_cap *cap)
+{
+	int r;
+
+	if (cap->flags)
+		return -EINVAL;
+
+	switch (cap->cap) {
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+	case KVM_CAP_PPC_ENABLE_HCALL: {
+		unsigned long hcall = cap->args[0];
+
+		r = -EINVAL;
+		if (hcall > MAX_HCALL_OPCODE || (hcall & 3) ||
+		    cap->args[1] > 1)
+			break;
+		if (!kvmppc_book3s_hcall_implemented(kvm, hcall))
+			break;
+		if (cap->args[1])
+			set_bit(hcall / 4, kvm->arch.enabled_hcalls);
+		else
+			clear_bit(hcall / 4, kvm->arch.enabled_hcalls);
+		r = 0;
+		break;
+	}
+	case KVM_CAP_PPC_SMT: {
+		unsigned long mode = cap->args[0];
+		unsigned long flags = cap->args[1];
+
+		r = -EINVAL;
+		if (kvm->arch.kvm_ops->set_smt_mode)
+			r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags);
+		break;
+	}
+
+	case KVM_CAP_PPC_NESTED_HV:
+		r = -EINVAL;
+		if (!is_kvmppc_hv_enabled(kvm) ||
+		    !kvm->arch.kvm_ops->enable_nested)
+			break;
+		r = kvm->arch.kvm_ops->enable_nested(kvm);
+		break;
+#endif
+#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+	case KVM_CAP_PPC_SECURE_GUEST:
+		r = -EINVAL;
+		if (!is_kvmppc_hv_enabled(kvm) || !kvm->arch.kvm_ops->enable_svm)
+			break;
+		r = kvm->arch.kvm_ops->enable_svm(kvm);
+		break;
+	case KVM_CAP_PPC_DAWR1:
+		r = -EINVAL;
+		if (!is_kvmppc_hv_enabled(kvm) || !kvm->arch.kvm_ops->enable_dawr1)
+			break;
+		r = kvm->arch.kvm_ops->enable_dawr1(kvm);
+		break;
+#endif
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * These functions check whether the underlying hardware is safe
+ * against attacks based on observing the effects of speculatively
+ * executed instructions, and whether it supplies instructions for
+ * use in workarounds.  The information comes from firmware, either
+ * via the device tree on powernv platforms or from an hcall on
+ * pseries platforms.
+ */
+#ifdef CONFIG_PPC_PSERIES
+static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp)
+{
+	struct h_cpu_char_result c;
+	unsigned long rc;
+
+	if (!machine_is(pseries))
+		return -ENOTTY;
+
+	rc = plpar_get_cpu_characteristics(&c);
+	if (rc == H_SUCCESS) {
+		cp->character = c.character;
+		cp->behaviour = c.behaviour;
+		cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 |
+			KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED |
+			KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 |
+			KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 |
+			KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV |
+			KVM_PPC_CPU_CHAR_BR_HINT_HONOURED |
+			KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF |
+			KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS |
+			KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST;
+		cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY |
+			KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR |
+			KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR |
+			KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE;
+	}
+	return 0;
+}
+#else
+static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp)
+{
+	return -ENOTTY;
+}
+#endif
+
+static inline bool have_fw_feat(struct device_node *fw_features,
+				const char *state, const char *name)
+{
+	struct device_node *np;
+	bool r = false;
+
+	np = of_get_child_by_name(fw_features, name);
+	if (np) {
+		r = of_property_read_bool(np, state);
+		of_node_put(np);
+	}
+	return r;
+}
+
+static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp)
+{
+	struct device_node *np, *fw_features;
+	int r;
+
+	memset(cp, 0, sizeof(*cp));
+	r = pseries_get_cpu_char(cp);
+	if (r != -ENOTTY)
+		return r;
+
+	np = of_find_node_by_name(NULL, "ibm,opal");
+	if (np) {
+		fw_features = of_get_child_by_name(np, "fw-features");
+		of_node_put(np);
+		if (!fw_features)
+			return 0;
+		if (have_fw_feat(fw_features, "enabled",
+				 "inst-spec-barrier-ori31,31,0"))
+			cp->character |= KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31;
+		if (have_fw_feat(fw_features, "enabled",
+				 "fw-bcctrl-serialized"))
+			cp->character |= KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED;
+		if (have_fw_feat(fw_features, "enabled",
+				 "inst-l1d-flush-ori30,30,0"))
+			cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30;
+		if (have_fw_feat(fw_features, "enabled",
+				 "inst-l1d-flush-trig2"))
+			cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2;
+		if (have_fw_feat(fw_features, "enabled",
+				 "fw-l1d-thread-split"))
+			cp->character |= KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV;
+		if (have_fw_feat(fw_features, "enabled",
+				 "fw-count-cache-disabled"))
+			cp->character |= KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS;
+		if (have_fw_feat(fw_features, "enabled",
+				 "fw-count-cache-flush-bcctr2,0,0"))
+			cp->character |= KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST;
+		cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 |
+			KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED |
+			KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 |
+			KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 |
+			KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV |
+			KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS |
+			KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST;
+
+		if (have_fw_feat(fw_features, "enabled",
+				 "speculation-policy-favor-security"))
+			cp->behaviour |= KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY;
+		if (!have_fw_feat(fw_features, "disabled",
+				  "needs-l1d-flush-msr-pr-0-to-1"))
+			cp->behaviour |= KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR;
+		if (!have_fw_feat(fw_features, "disabled",
+				  "needs-spec-barrier-for-bound-checks"))
+			cp->behaviour |= KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
+		if (have_fw_feat(fw_features, "enabled",
+				 "needs-count-cache-flush-on-context-switch"))
+			cp->behaviour |= KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE;
+		cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY |
+			KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR |
+			KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR |
+			KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE;
+
+		of_node_put(fw_features);
+	}
+
+	return 0;
+}
+#endif
+
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
+{
+	struct kvm *kvm __maybe_unused = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	int r;
+
+	switch (ioctl) {
+	case KVM_PPC_GET_PVINFO: {
+		struct kvm_ppc_pvinfo pvinfo;
+		memset(&pvinfo, 0, sizeof(pvinfo));
+		r = kvm_vm_ioctl_get_pvinfo(&pvinfo);
+		if (copy_to_user(argp, &pvinfo, sizeof(pvinfo))) {
+			r = -EFAULT;
+			goto out;
+		}
+
+		break;
+	}
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+	case KVM_CREATE_SPAPR_TCE_64: {
+		struct kvm_create_spapr_tce_64 create_tce_64;
+
+		r = -EFAULT;
+		if (copy_from_user(&create_tce_64, argp, sizeof(create_tce_64)))
+			goto out;
+		if (create_tce_64.flags) {
+			r = -EINVAL;
+			goto out;
+		}
+		r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
+		goto out;
+	}
+	case KVM_CREATE_SPAPR_TCE: {
+		struct kvm_create_spapr_tce create_tce;
+		struct kvm_create_spapr_tce_64 create_tce_64;
+
+		r = -EFAULT;
+		if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
+			goto out;
+
+		create_tce_64.liobn = create_tce.liobn;
+		create_tce_64.page_shift = IOMMU_PAGE_SHIFT_4K;
+		create_tce_64.offset = 0;
+		create_tce_64.size = create_tce.window_size >>
+				IOMMU_PAGE_SHIFT_4K;
+		create_tce_64.flags = 0;
+		r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
+		goto out;
+	}
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	case KVM_PPC_GET_SMMU_INFO: {
+		struct kvm_ppc_smmu_info info;
+		struct kvm *kvm = filp->private_data;
+
+		memset(&info, 0, sizeof(info));
+		r = kvm->arch.kvm_ops->get_smmu_info(kvm, &info);
+		if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
+			r = -EFAULT;
+		break;
+	}
+	case KVM_PPC_RTAS_DEFINE_TOKEN: {
+		struct kvm *kvm = filp->private_data;
+
+		r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
+		break;
+	}
+	case KVM_PPC_CONFIGURE_V3_MMU: {
+		struct kvm *kvm = filp->private_data;
+		struct kvm_ppc_mmuv3_cfg cfg;
+
+		r = -EINVAL;
+		if (!kvm->arch.kvm_ops->configure_mmu)
+			goto out;
+		r = -EFAULT;
+		if (copy_from_user(&cfg, argp, sizeof(cfg)))
+			goto out;
+		r = kvm->arch.kvm_ops->configure_mmu(kvm, &cfg);
+		break;
+	}
+	case KVM_PPC_GET_RMMU_INFO: {
+		struct kvm *kvm = filp->private_data;
+		struct kvm_ppc_rmmu_info info;
+
+		r = -EINVAL;
+		if (!kvm->arch.kvm_ops->get_rmmu_info)
+			goto out;
+		r = kvm->arch.kvm_ops->get_rmmu_info(kvm, &info);
+		if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
+			r = -EFAULT;
+		break;
+	}
+	case KVM_PPC_GET_CPU_CHAR: {
+		struct kvm_ppc_cpu_char cpuchar;
+
+		r = kvmppc_get_cpu_char(&cpuchar);
+		if (r >= 0 && copy_to_user(argp, &cpuchar, sizeof(cpuchar)))
+			r = -EFAULT;
+		break;
+	}
+	case KVM_PPC_SVM_OFF: {
+		struct kvm *kvm = filp->private_data;
+
+		r = 0;
+		if (!kvm->arch.kvm_ops->svm_off)
+			goto out;
+
+		r = kvm->arch.kvm_ops->svm_off(kvm);
+		break;
+	}
+	default: {
+		struct kvm *kvm = filp->private_data;
+		r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
+	}
+#else /* CONFIG_PPC_BOOK3S_64 */
+	default:
+		r = -ENOTTY;
+#endif
+	}
+out:
+	return r;
+}
+
+static DEFINE_IDA(lpid_inuse);
+static unsigned long nr_lpids;
+
+long kvmppc_alloc_lpid(void)
+{
+	int lpid;
+
+	/* The host LPID must always be 0 (allocation starts at 1) */
+	lpid = ida_alloc_range(&lpid_inuse, 1, nr_lpids - 1, GFP_KERNEL);
+	if (lpid < 0) {
+		if (lpid == -ENOMEM)
+			pr_err("%s: Out of memory\n", __func__);
+		else
+			pr_err("%s: No LPIDs free\n", __func__);
+		return -ENOMEM;
+	}
+
+	return lpid;
+}
+EXPORT_SYMBOL_GPL(kvmppc_alloc_lpid);
+
+void kvmppc_free_lpid(long lpid)
+{
+	ida_free(&lpid_inuse, lpid);
+}
+EXPORT_SYMBOL_GPL(kvmppc_free_lpid);
+
+/* nr_lpids_param includes the host LPID */
+void kvmppc_init_lpid(unsigned long nr_lpids_param)
+{
+	nr_lpids = nr_lpids_param;
+}
+EXPORT_SYMBOL_GPL(kvmppc_init_lpid);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);
+
+void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
+{
+	if (vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs)
+		vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs(vcpu, debugfs_dentry);
+}
+
+int kvm_arch_create_vm_debugfs(struct kvm *kvm)
+{
+	if (kvm->arch.kvm_ops->create_vm_debugfs)
+		kvm->arch.kvm_ops->create_vm_debugfs(kvm);
+	return 0;
+}
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
new file mode 100644
index 0000000000..25071331f8
--- /dev/null
+++ b/arch/powerpc/kvm/timing.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+
+#include <asm/time.h>
+#include <asm-generic/div64.h>
+
+#include "timing.h"
+
+void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	/* Take a lock to avoid concurrent updates */
+	mutex_lock(&vcpu->arch.exit_timing_lock);
+
+	vcpu->arch.last_exit_type = 0xDEAD;
+	for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
+		vcpu->arch.timing_count_type[i] = 0;
+		vcpu->arch.timing_max_duration[i] = 0;
+		vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF;
+		vcpu->arch.timing_sum_duration[i] = 0;
+		vcpu->arch.timing_sum_quad_duration[i] = 0;
+	}
+	vcpu->arch.timing_last_exit = 0;
+	vcpu->arch.timing_exit.tv64 = 0;
+	vcpu->arch.timing_last_enter.tv64 = 0;
+
+	mutex_unlock(&vcpu->arch.exit_timing_lock);
+}
+
+static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
+{
+	u64 old;
+
+	mutex_lock(&vcpu->arch.exit_timing_lock);
+
+	vcpu->arch.timing_count_type[type]++;
+
+	/* sum */
+	old = vcpu->arch.timing_sum_duration[type];
+	vcpu->arch.timing_sum_duration[type] += duration;
+	if (unlikely(old > vcpu->arch.timing_sum_duration[type])) {
+		printk(KERN_ERR"%s - wrap adding sum of durations"
+			" old %lld new %lld type %d exit # of type %d\n",
+			__func__, old, vcpu->arch.timing_sum_duration[type],
+			type, vcpu->arch.timing_count_type[type]);
+	}
+
+	/* square sum */
+	old = vcpu->arch.timing_sum_quad_duration[type];
+	vcpu->arch.timing_sum_quad_duration[type] += (duration*duration);
+	if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) {
+		printk(KERN_ERR"%s - wrap adding sum of squared durations"
+			" old %lld new %lld type %d exit # of type %d\n",
+			__func__, old,
+			vcpu->arch.timing_sum_quad_duration[type],
+			type, vcpu->arch.timing_count_type[type]);
+	}
+
+	/* set min/max */
+	if (unlikely(duration < vcpu->arch.timing_min_duration[type]))
+		vcpu->arch.timing_min_duration[type] = duration;
+	if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
+		vcpu->arch.timing_max_duration[type] = duration;
+
+	mutex_unlock(&vcpu->arch.exit_timing_lock);
+}
+
+void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
+{
+	u64 exit = vcpu->arch.timing_last_exit;
+	u64 enter = vcpu->arch.timing_last_enter.tv64;
+
+	/* save exit time, used next exit when the reenter time is known */
+	vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64;
+
+	if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0))
+		return; /* skip incomplete cycle (e.g. after reset) */
+
+	/* update statistics for average and standard deviation */
+	add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type);
+	/* enter -> timing_last_exit is time spent in guest - log this too */
+	add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter),
+			TIMEINGUEST);
+}
+
+static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
+	[MMIO_EXITS] =              "MMIO",
+	[SIGNAL_EXITS] =            "SIGNAL",
+	[ITLB_REAL_MISS_EXITS] =    "ITLBREAL",
+	[ITLB_VIRT_MISS_EXITS] =    "ITLBVIRT",
+	[DTLB_REAL_MISS_EXITS] =    "DTLBREAL",
+	[DTLB_VIRT_MISS_EXITS] =    "DTLBVIRT",
+	[SYSCALL_EXITS] =           "SYSCALL",
+	[ISI_EXITS] =               "ISI",
+	[DSI_EXITS] =               "DSI",
+	[EMULATED_INST_EXITS] =     "EMULINST",
+	[EMULATED_MTMSRWE_EXITS] =  "EMUL_WAIT",
+	[EMULATED_WRTEE_EXITS] =    "EMUL_WRTEE",
+	[EMULATED_MTSPR_EXITS] =    "EMUL_MTSPR",
+	[EMULATED_MFSPR_EXITS] =    "EMUL_MFSPR",
+	[EMULATED_MTMSR_EXITS] =    "EMUL_MTMSR",
+	[EMULATED_MFMSR_EXITS] =    "EMUL_MFMSR",
+	[EMULATED_TLBSX_EXITS] =    "EMUL_TLBSX",
+	[EMULATED_TLBWE_EXITS] =    "EMUL_TLBWE",
+	[EMULATED_RFI_EXITS] =      "EMUL_RFI",
+	[DEC_EXITS] =               "DEC",
+	[EXT_INTR_EXITS] =          "EXTINT",
+	[HALT_WAKEUP] =             "HALT",
+	[USR_PR_INST] =             "USR_PR_INST",
+	[FP_UNAVAIL] =              "FP_UNAVAIL",
+	[DEBUG_EXITS] =             "DEBUG",
+	[TIMEINGUEST] =             "TIMEINGUEST"
+};
+
+static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
+{
+	struct kvm_vcpu *vcpu = m->private;
+	int i;
+	u64 min, max, sum, sum_quad;
+
+	seq_puts(m, "type	count	min	max	sum	sum_squared\n");
+
+	for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
+
+		min = vcpu->arch.timing_min_duration[i];
+		do_div(min, tb_ticks_per_usec);
+		max = vcpu->arch.timing_max_duration[i];
+		do_div(max, tb_ticks_per_usec);
+		sum = vcpu->arch.timing_sum_duration[i];
+		do_div(sum, tb_ticks_per_usec);
+		sum_quad = vcpu->arch.timing_sum_quad_duration[i];
+		do_div(sum_quad, tb_ticks_per_usec);
+
+		seq_printf(m, "%12s	%10d	%10lld	%10lld	%20lld	%20lld\n",
+			kvm_exit_names[i],
+			vcpu->arch.timing_count_type[i],
+			min,
+			max,
+			sum,
+			sum_quad);
+
+	}
+	return 0;
+}
+
+/* Write 'c' to clear the timing statistics. */
+static ssize_t kvmppc_exit_timing_write(struct file *file,
+				       const char __user *user_buf,
+				       size_t count, loff_t *ppos)
+{
+	int err = -EINVAL;
+	char c;
+
+	if (count > 1) {
+		goto done;
+	}
+
+	if (get_user(c, user_buf)) {
+		err = -EFAULT;
+		goto done;
+	}
+
+	if (c == 'c') {
+		struct seq_file *seqf = file->private_data;
+		struct kvm_vcpu *vcpu = seqf->private;
+		/* Write does not affect our buffers previously generated with
+		 * show. seq_file is locked here to prevent races of init with
+		 * a show call */
+		mutex_lock(&seqf->lock);
+		kvmppc_init_timing_stats(vcpu);
+		mutex_unlock(&seqf->lock);
+		err = count;
+	}
+
+done:
+	return err;
+}
+
+static int kvmppc_exit_timing_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, kvmppc_exit_timing_show, inode->i_private);
+}
+
+static const struct file_operations kvmppc_exit_timing_fops = {
+	.owner   = THIS_MODULE,
+	.open    = kvmppc_exit_timing_open,
+	.read    = seq_read,
+	.write   = kvmppc_exit_timing_write,
+	.llseek  = seq_lseek,
+	.release = single_release,
+};
+
+int kvmppc_create_vcpu_debugfs_e500(struct kvm_vcpu *vcpu,
+				    struct dentry *debugfs_dentry)
+{
+	debugfs_create_file("timing", 0666, debugfs_dentry,
+			    vcpu, &kvmppc_exit_timing_fops);
+	return 0;
+}
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
new file mode 100644
index 0000000000..45817ab82b
--- /dev/null
+++ b/arch/powerpc/kvm/timing.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#ifndef __POWERPC_KVM_EXITTIMING_H__
+#define __POWERPC_KVM_EXITTIMING_H__
+
+#include <linux/kvm_host.h>
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
+void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
+int kvmppc_create_vcpu_debugfs_e500(struct kvm_vcpu *vcpu,
+				    struct dentry *debugfs_dentry);
+
+static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
+{
+	vcpu->arch.last_exit_type = type;
+}
+
+#else
+/* if exit timing is not configured there is no need to build the c file */
+static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
+static inline int kvmppc_create_vcpu_debugfs_e500(struct kvm_vcpu *vcpu,
+						  struct dentry *debugfs_dentry)
+{
+	return 0;
+}
+static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
+#endif /* CONFIG_KVM_EXIT_TIMING */
+
+/* account the exit in kvm_stats */
+static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
+{
+	/* type has to be known at build time for optimization */
+
+	/* The BUILD_BUG_ON below breaks in funny ways, commented out
+	 * for now ... -BenH
+	BUILD_BUG_ON(!__builtin_constant_p(type));
+	*/
+	switch (type) {
+	case EXT_INTR_EXITS:
+		vcpu->stat.ext_intr_exits++;
+		break;
+	case DEC_EXITS:
+		vcpu->stat.dec_exits++;
+		break;
+	case EMULATED_INST_EXITS:
+		vcpu->stat.emulated_inst_exits++;
+		break;
+	case DSI_EXITS:
+		vcpu->stat.dsi_exits++;
+		break;
+	case ISI_EXITS:
+		vcpu->stat.isi_exits++;
+		break;
+	case SYSCALL_EXITS:
+		vcpu->stat.syscall_exits++;
+		break;
+	case DTLB_REAL_MISS_EXITS:
+		vcpu->stat.dtlb_real_miss_exits++;
+		break;
+	case DTLB_VIRT_MISS_EXITS:
+		vcpu->stat.dtlb_virt_miss_exits++;
+		break;
+	case MMIO_EXITS:
+		vcpu->stat.mmio_exits++;
+		break;
+	case ITLB_REAL_MISS_EXITS:
+		vcpu->stat.itlb_real_miss_exits++;
+		break;
+	case ITLB_VIRT_MISS_EXITS:
+		vcpu->stat.itlb_virt_miss_exits++;
+		break;
+	case SIGNAL_EXITS:
+		vcpu->stat.signal_exits++;
+		break;
+	case DBELL_EXITS:
+		vcpu->stat.dbell_exits++;
+		break;
+	case GDBELL_EXITS:
+		vcpu->stat.gdbell_exits++;
+		break;
+	}
+}
+
+/* wrapper to set exit time and account for it in kvm_stats */
+static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type)
+{
+	kvmppc_set_exit_type(vcpu, type);
+	kvmppc_account_exit_stat(vcpu, type);
+}
+
+#endif /* __POWERPC_KVM_EXITTIMING_H__ */
diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
new file mode 100644
index 0000000000..b506c4d9a8
--- /dev/null
+++ b/arch/powerpc/kvm/tm.S
@@ -0,0 +1,398 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Derived from book3s_hv_rmhandlers.S, which is:
+ *
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/export.h>
+#include <asm/reg.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/tm.h>
+#include <asm/cputable.h>
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
+
+/*
+ * Save transactional state and TM-related registers.
+ * Called with:
+ * - r3 pointing to the vcpu struct
+ * - r4 containing the MSR with current TS bits:
+ * 	(For HV KVM, it is VCPU_MSR ; For PR KVM, it is host MSR).
+ * - r5 containing a flag indicating that non-volatile registers
+ *	must be preserved.
+ * If r5 == 0, this can modify all checkpointed registers, but
+ * restores r1, r2 before exit.  If r5 != 0, this restores the
+ * MSR TM/FP/VEC/VSX bits to their state on entry.
+ */
+_GLOBAL(__kvmppc_save_tm)
+	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+	stdu    r1, -SWITCH_FRAME_SIZE(r1)
+
+	mr	r9, r3
+	cmpdi	cr7, r5, 0
+
+	/* Turn on TM. */
+	mfmsr	r8
+	mr	r10, r8
+	li	r0, 1
+	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+	ori     r8, r8, MSR_FP
+	oris    r8, r8, (MSR_VEC | MSR_VSX)@h
+	mtmsrd	r8
+
+	rldicl. r4, r4, 64 - MSR_TS_S_LG, 62
+	beq	1f	/* TM not active in guest. */
+
+	std	r1, HSTATE_SCRATCH2(r13)
+	std	r3, HSTATE_SCRATCH1(r13)
+
+	/* Save CR on the stack - even if r5 == 0 we need to get cr7 back. */
+	mfcr	r6
+	SAVE_GPR(6, r1)
+
+	/* Save DSCR so we can restore it to avoid running with user value */
+	mfspr	r7, SPRN_DSCR
+	SAVE_GPR(7, r1)
+
+	/*
+	 * We are going to do treclaim., which will modify all checkpointed
+	 * registers.  Save the non-volatile registers on the stack if
+	 * preservation of non-volatile state has been requested.
+	 */
+	beq	cr7, 3f
+	SAVE_NVGPRS(r1)
+
+	/* MSR[TS] will be 0 (non-transactional) once we do treclaim. */
+	li	r0, 0
+	rldimi	r10, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+	SAVE_GPR(10, r1)	/* final MSR value */
+3:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+BEGIN_FTR_SECTION
+	/* Emulation of the treclaim instruction needs TEXASR before treclaim */
+	mfspr	r6, SPRN_TEXASR
+	std	r6, VCPU_ORIG_TEXASR(r3)
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+#endif
+
+	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
+	li	r5, 0
+	mtmsrd	r5, 1
+
+	li	r3, TM_CAUSE_KVM_RESCHED
+
+	/* All GPRs are volatile at this point. */
+	TRECLAIM(R3)
+
+	/* Temporarily store r13 and r9 so we have some regs to play with */
+	SET_SCRATCH0(r13)
+	GET_PACA(r13)
+	std	r9, PACATMSCRATCH(r13)
+	ld	r9, HSTATE_SCRATCH1(r13)
+
+	/* Save away PPR soon so we don't run with user value. */
+	std	r0, VCPU_GPRS_TM(0)(r9)
+	mfspr	r0, SPRN_PPR
+	HMT_MEDIUM
+
+	/* Reload stack pointer. */
+	std	r1, VCPU_GPRS_TM(1)(r9)
+	ld	r1, HSTATE_SCRATCH2(r13)
+
+	/* Set MSR RI now we have r1 and r13 back. */
+	std	r2, VCPU_GPRS_TM(2)(r9)
+	li	r2, MSR_RI
+	mtmsrd	r2, 1
+
+	/* Reload TOC pointer. */
+	LOAD_PACA_TOC()
+
+	/* Save all but r0-r2, r9 & r13 */
+	reg = 3
+	.rept	29
+	.if (reg != 9) && (reg != 13)
+	std	reg, VCPU_GPRS_TM(reg)(r9)
+	.endif
+	reg = reg + 1
+	.endr
+	/* ... now save r13 */
+	GET_SCRATCH0(r4)
+	std	r4, VCPU_GPRS_TM(13)(r9)
+	/* ... and save r9 */
+	ld	r4, PACATMSCRATCH(r13)
+	std	r4, VCPU_GPRS_TM(9)(r9)
+
+	/* Restore host DSCR and CR values, after saving guest values */
+	mfcr	r6
+	mfspr	r7, SPRN_DSCR
+	stw	r6, VCPU_CR_TM(r9)
+	std	r7, VCPU_DSCR_TM(r9)
+	REST_GPR(6, r1)
+	REST_GPR(7, r1)
+	mtcr	r6
+	mtspr	SPRN_DSCR, r7
+
+	/* Save away checkpointed SPRs. */
+	std	r0, VCPU_PPR_TM(r9)
+	mflr	r5
+	mfctr	r7
+	mfspr	r8, SPRN_AMR
+	mfspr	r10, SPRN_TAR
+	mfxer	r11
+	std	r5, VCPU_LR_TM(r9)
+	std	r7, VCPU_CTR_TM(r9)
+	std	r8, VCPU_AMR_TM(r9)
+	std	r10, VCPU_TAR_TM(r9)
+	std	r11, VCPU_XER_TM(r9)
+
+	/* Save FP/VSX. */
+	addi	r3, r9, VCPU_FPRS_TM
+	bl	store_fp_state
+	addi	r3, r9, VCPU_VRS_TM
+	bl	store_vr_state
+	mfspr	r6, SPRN_VRSAVE
+	stw	r6, VCPU_VRSAVE_TM(r9)
+
+	/* Restore non-volatile registers if requested to */
+	beq	cr7, 1f
+	REST_NVGPRS(r1)
+	REST_GPR(10, r1)
+1:
+	/*
+	 * We need to save these SPRs after the treclaim so that the software
+	 * error code is recorded correctly in the TEXASR.  Also the user may
+	 * change these outside of a transaction, so they must always be
+	 * context switched.
+	 */
+	mfspr	r7, SPRN_TEXASR
+	std	r7, VCPU_TEXASR(r9)
+	mfspr	r5, SPRN_TFHAR
+	mfspr	r6, SPRN_TFIAR
+	std	r5, VCPU_TFHAR(r9)
+	std	r6, VCPU_TFIAR(r9)
+
+	/* Restore MSR state if requested */
+	beq	cr7, 2f
+	mtmsrd	r10, 0
+2:
+	addi	r1, r1, SWITCH_FRAME_SIZE
+	ld	r0, PPC_LR_STKOFF(r1)
+	mtlr	r0
+	blr
+
+/*
+ * _kvmppc_save_tm_pr() is a wrapper around __kvmppc_save_tm(), so that it can
+ * be invoked from C function by PR KVM only.
+ */
+_GLOBAL(_kvmppc_save_tm_pr)
+	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+	stdu    r1, -PPC_MIN_STKFRM(r1)
+
+	mfspr   r8, SPRN_TAR
+	std	r8, PPC_MIN_STKFRM-8(r1)
+
+	li	r5, 1		/* preserve non-volatile registers */
+	bl	__kvmppc_save_tm
+
+	ld	r8, PPC_MIN_STKFRM-8(r1)
+	mtspr   SPRN_TAR, r8
+
+	addi    r1, r1, PPC_MIN_STKFRM
+	ld	r0, PPC_LR_STKOFF(r1)
+	mtlr	r0
+	blr
+
+EXPORT_SYMBOL_GPL(_kvmppc_save_tm_pr);
+
+/*
+ * Restore transactional state and TM-related registers.
+ * Called with:
+ *  - r3 pointing to the vcpu struct.
+ *  - r4 is the guest MSR with desired TS bits:
+ * 	For HV KVM, it is VCPU_MSR
+ * 	For PR KVM, it is provided by caller
+ * - r5 containing a flag indicating that non-volatile registers
+ *	must be preserved.
+ * If r5 == 0, this potentially modifies all checkpointed registers, but
+ * restores r1, r2 from the PACA before exit.
+ * If r5 != 0, this restores the MSR TM/FP/VEC/VSX bits to their state on entry.
+ */
+_GLOBAL(__kvmppc_restore_tm)
+	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+
+	cmpdi	cr7, r5, 0
+
+	/* Turn on TM/FP/VSX/VMX so we can restore them. */
+	mfmsr	r5
+	mr	r10, r5
+	li	r6, MSR_TM >> 32
+	sldi	r6, r6, 32
+	or	r5, r5, r6
+	ori	r5, r5, MSR_FP
+	oris	r5, r5, (MSR_VEC | MSR_VSX)@h
+	mtmsrd	r5
+
+	/*
+	 * The user may change these outside of a transaction, so they must
+	 * always be context switched.
+	 */
+	ld	r5, VCPU_TFHAR(r3)
+	ld	r6, VCPU_TFIAR(r3)
+	ld	r7, VCPU_TEXASR(r3)
+	mtspr	SPRN_TFHAR, r5
+	mtspr	SPRN_TFIAR, r6
+	mtspr	SPRN_TEXASR, r7
+
+	mr	r5, r4
+	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+	beq	9f		/* TM not active in guest */
+
+	/* Make sure the failure summary is set, otherwise we'll program check
+	 * when we trechkpt.  It's possible that this might have been not set
+	 * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
+	 * host.
+	 */
+	oris	r7, r7, (TEXASR_FS)@h
+	mtspr	SPRN_TEXASR, r7
+
+	/*
+	 * Make a stack frame and save non-volatile registers if requested.
+	 */
+	stdu	r1, -SWITCH_FRAME_SIZE(r1)
+	std	r1, HSTATE_SCRATCH2(r13)
+
+	mfcr	r6
+	mfspr	r7, SPRN_DSCR
+	SAVE_GPR(2, r1)
+	SAVE_GPR(6, r1)
+	SAVE_GPR(7, r1)
+
+	beq	cr7, 4f
+	SAVE_NVGPRS(r1)
+
+	/* MSR[TS] will be 1 (suspended) once we do trechkpt */
+	li	r0, 1
+	rldimi	r10, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+	SAVE_GPR(10, r1)	/* final MSR value */
+4:
+	/*
+	 * We need to load up the checkpointed state for the guest.
+	 * We need to do this early as it will blow away any GPRs, VSRs and
+	 * some SPRs.
+	 */
+
+	mr	r31, r3
+	addi	r3, r31, VCPU_FPRS_TM
+	bl	load_fp_state
+	addi	r3, r31, VCPU_VRS_TM
+	bl	load_vr_state
+	mr	r3, r31
+	lwz	r7, VCPU_VRSAVE_TM(r3)
+	mtspr	SPRN_VRSAVE, r7
+
+	ld	r5, VCPU_LR_TM(r3)
+	lwz	r6, VCPU_CR_TM(r3)
+	ld	r7, VCPU_CTR_TM(r3)
+	ld	r8, VCPU_AMR_TM(r3)
+	ld	r9, VCPU_TAR_TM(r3)
+	ld	r10, VCPU_XER_TM(r3)
+	mtlr	r5
+	mtcr	r6
+	mtctr	r7
+	mtspr	SPRN_AMR, r8
+	mtspr	SPRN_TAR, r9
+	mtxer	r10
+
+	/*
+	 * Load up PPR and DSCR values but don't put them in the actual SPRs
+	 * till the last moment to avoid running with userspace PPR and DSCR for
+	 * too long.
+	 */
+	ld	r29, VCPU_DSCR_TM(r3)
+	ld	r30, VCPU_PPR_TM(r3)
+
+	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
+	li	r5, 0
+	mtmsrd	r5, 1
+
+	/* Load GPRs r0-r28 */
+	reg = 0
+	.rept	29
+	ld	reg, VCPU_GPRS_TM(reg)(r31)
+	reg = reg + 1
+	.endr
+
+	mtspr	SPRN_DSCR, r29
+	mtspr	SPRN_PPR, r30
+
+	/* Load final GPRs */
+	ld	29, VCPU_GPRS_TM(29)(r31)
+	ld	30, VCPU_GPRS_TM(30)(r31)
+	ld	31, VCPU_GPRS_TM(31)(r31)
+
+	/* TM checkpointed state is now setup.  All GPRs are now volatile. */
+	TRECHKPT
+
+	/* Now let's get back the state we need. */
+	HMT_MEDIUM
+	GET_PACA(r13)
+	ld	r1, HSTATE_SCRATCH2(r13)
+	REST_GPR(7, r1)
+	mtspr	SPRN_DSCR, r7
+
+	/* Set the MSR RI since we have our registers back. */
+	li	r5, MSR_RI
+	mtmsrd	r5, 1
+
+	/* Restore TOC pointer and CR */
+	REST_GPR(2, r1)
+	REST_GPR(6, r1)
+	mtcr	r6
+
+	/* Restore non-volatile registers if requested to. */
+	beq	cr7, 5f
+	REST_GPR(10, r1)
+	REST_NVGPRS(r1)
+
+5:	addi	r1, r1, SWITCH_FRAME_SIZE
+	ld	r0, PPC_LR_STKOFF(r1)
+	mtlr	r0
+
+9:	/* Restore MSR bits if requested */
+	beqlr	cr7
+	mtmsrd	r10, 0
+	blr
+
+/*
+ * _kvmppc_restore_tm_pr() is a wrapper around __kvmppc_restore_tm(), so that it
+ * can be invoked from C function by PR KVM only.
+ */
+_GLOBAL(_kvmppc_restore_tm_pr)
+	mflr	r0
+	std	r0, PPC_LR_STKOFF(r1)
+	stdu    r1, -PPC_MIN_STKFRM(r1)
+
+	/* save TAR so that it can be recovered later */
+	mfspr   r8, SPRN_TAR
+	std	r8, PPC_MIN_STKFRM-8(r1)
+
+	li	r5, 1
+	bl	__kvmppc_restore_tm
+
+	ld	r8, PPC_MIN_STKFRM-8(r1)
+	mtspr   SPRN_TAR, r8
+
+	addi    r1, r1, PPC_MIN_STKFRM
+	ld	r0, PPC_LR_STKOFF(r1)
+	mtlr	r0
+	blr
+
+EXPORT_SYMBOL_GPL(_kvmppc_restore_tm_pr);
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
new file mode 100644
index 0000000000..ea1d7c8083
--- /dev/null
+++ b/arch/powerpc/kvm/trace.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+
+/*
+ * Tracepoint for guest mode entry.
+ */
+TRACE_EVENT(kvm_ppc_instr,
+	TP_PROTO(unsigned int inst, unsigned long _pc, unsigned int emulate),
+	TP_ARGS(inst, _pc, emulate),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	inst		)
+		__field(	unsigned long,	pc		)
+		__field(	unsigned int,	emulate		)
+	),
+
+	TP_fast_assign(
+		__entry->inst		= inst;
+		__entry->pc		= _pc;
+		__entry->emulate	= emulate;
+	),
+
+	TP_printk("inst %u pc 0x%lx emulate %u\n",
+		  __entry->inst, __entry->pc, __entry->emulate)
+);
+
+TRACE_EVENT(kvm_stlb_inval,
+	TP_PROTO(unsigned int stlb_index),
+	TP_ARGS(stlb_index),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	stlb_index	)
+	),
+
+	TP_fast_assign(
+		__entry->stlb_index	= stlb_index;
+	),
+
+	TP_printk("stlb_index %u", __entry->stlb_index)
+);
+
+TRACE_EVENT(kvm_stlb_write,
+	TP_PROTO(unsigned int victim, unsigned int tid, unsigned int word0,
+		 unsigned int word1, unsigned int word2),
+	TP_ARGS(victim, tid, word0, word1, word2),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	victim		)
+		__field(	unsigned int,	tid		)
+		__field(	unsigned int,	word0		)
+		__field(	unsigned int,	word1		)
+		__field(	unsigned int,	word2		)
+	),
+
+	TP_fast_assign(
+		__entry->victim		= victim;
+		__entry->tid		= tid;
+		__entry->word0		= word0;
+		__entry->word1		= word1;
+		__entry->word2		= word2;
+	),
+
+	TP_printk("victim %u tid %u w0 %u w1 %u w2 %u",
+		__entry->victim, __entry->tid, __entry->word0,
+		__entry->word1, __entry->word2)
+);
+
+TRACE_EVENT(kvm_gtlb_write,
+	TP_PROTO(unsigned int gtlb_index, unsigned int tid, unsigned int word0,
+		 unsigned int word1, unsigned int word2),
+	TP_ARGS(gtlb_index, tid, word0, word1, word2),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	gtlb_index	)
+		__field(	unsigned int,	tid		)
+		__field(	unsigned int,	word0		)
+		__field(	unsigned int,	word1		)
+		__field(	unsigned int,	word2		)
+	),
+
+	TP_fast_assign(
+		__entry->gtlb_index	= gtlb_index;
+		__entry->tid		= tid;
+		__entry->word0		= word0;
+		__entry->word1		= word1;
+		__entry->word2		= word2;
+	),
+
+	TP_printk("gtlb_index %u tid %u w0 %u w1 %u w2 %u",
+		__entry->gtlb_index, __entry->tid, __entry->word0,
+		__entry->word1, __entry->word2)
+);
+
+TRACE_EVENT(kvm_check_requests,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(	__u32,	cpu_nr		)
+		__field(	__u32,	requests	)
+	),
+
+	TP_fast_assign(
+		__entry->cpu_nr		= vcpu->vcpu_id;
+		__entry->requests	= vcpu->requests;
+	),
+
+	TP_printk("vcpu=%x requests=%x",
+		__entry->cpu_nr, __entry->requests)
+);
+
+#endif /* _TRACE_KVM_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h
new file mode 100644
index 0000000000..372a82fa2d
--- /dev/null
+++ b/arch/powerpc/kvm/trace_book3s.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_KVM_BOOK3S_H)
+#define _TRACE_KVM_BOOK3S_H
+
+/*
+ * Common defines used by the trace macros in trace_pr.h and trace_hv.h
+ */
+
+#define kvm_trace_symbol_exit \
+	{0x100, "SYSTEM_RESET"}, \
+	{0x200, "MACHINE_CHECK"}, \
+	{0x300, "DATA_STORAGE"}, \
+	{0x380, "DATA_SEGMENT"}, \
+	{0x400, "INST_STORAGE"}, \
+	{0x480, "INST_SEGMENT"}, \
+	{0x500, "EXTERNAL"}, \
+	{0x502, "EXTERNAL_HV"}, \
+	{0x600, "ALIGNMENT"}, \
+	{0x700, "PROGRAM"}, \
+	{0x800, "FP_UNAVAIL"}, \
+	{0x900, "DECREMENTER"}, \
+	{0x980, "HV_DECREMENTER"}, \
+	{0xc00, "SYSCALL"}, \
+	{0xd00, "TRACE"}, \
+	{0xe00, "H_DATA_STORAGE"}, \
+	{0xe20, "H_INST_STORAGE"}, \
+	{0xe40, "H_EMUL_ASSIST"}, \
+	{0xf00, "PERFMON"}, \
+	{0xf20, "ALTIVEC"}, \
+	{0xf40, "VSX"}
+
+#endif
diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h
new file mode 100644
index 0000000000..eff6e82dbc
--- /dev/null
+++ b/arch/powerpc/kvm/trace_booke.h
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_KVM_BOOKE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_BOOKE_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm_booke
+
+#define kvm_trace_symbol_exit \
+	{0, "CRITICAL"}, \
+	{1, "MACHINE_CHECK"}, \
+	{2, "DATA_STORAGE"}, \
+	{3, "INST_STORAGE"}, \
+	{4, "EXTERNAL"}, \
+	{5, "ALIGNMENT"}, \
+	{6, "PROGRAM"}, \
+	{7, "FP_UNAVAIL"}, \
+	{8, "SYSCALL"}, \
+	{9, "AP_UNAVAIL"}, \
+	{10, "DECREMENTER"}, \
+	{11, "FIT"}, \
+	{12, "WATCHDOG"}, \
+	{13, "DTLB_MISS"}, \
+	{14, "ITLB_MISS"}, \
+	{15, "DEBUG"}, \
+	{32, "SPE_UNAVAIL"}, \
+	{33, "SPE_FP_DATA"}, \
+	{34, "SPE_FP_ROUND"}, \
+	{35, "PERFORMANCE_MONITOR"}, \
+	{36, "DOORBELL"}, \
+	{37, "DOORBELL_CRITICAL"}, \
+	{38, "GUEST_DBELL"}, \
+	{39, "GUEST_DBELL_CRIT"}, \
+	{40, "HV_SYSCALL"}, \
+	{41, "HV_PRIV"}
+
+TRACE_EVENT(kvm_exit,
+	TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
+	TP_ARGS(exit_nr, vcpu),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	exit_nr		)
+		__field(	unsigned long,	pc		)
+		__field(	unsigned long,	msr		)
+		__field(	unsigned long,	dar		)
+		__field(	unsigned long,	last_inst	)
+	),
+
+	TP_fast_assign(
+		__entry->exit_nr	= exit_nr;
+		__entry->pc		= kvmppc_get_pc(vcpu);
+		__entry->dar		= kvmppc_get_fault_dar(vcpu);
+		__entry->msr		= vcpu->arch.shared->msr;
+		__entry->last_inst	= vcpu->arch.last_inst;
+	),
+
+	TP_printk("exit=%s"
+		" | pc=0x%lx"
+		" | msr=0x%lx"
+		" | dar=0x%lx"
+		" | last_inst=0x%lx"
+		,
+		__print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
+		__entry->pc,
+		__entry->msr,
+		__entry->dar,
+		__entry->last_inst
+		)
+);
+
+TRACE_EVENT(kvm_booke206_stlb_write,
+	TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3),
+	TP_ARGS(mas0, mas8, mas1, mas2, mas7_3),
+
+	TP_STRUCT__entry(
+		__field(	__u32,	mas0		)
+		__field(	__u32,	mas8		)
+		__field(	__u32,	mas1		)
+		__field(	__u64,	mas2		)
+		__field(	__u64,	mas7_3		)
+	),
+
+	TP_fast_assign(
+		__entry->mas0		= mas0;
+		__entry->mas8		= mas8;
+		__entry->mas1		= mas1;
+		__entry->mas2		= mas2;
+		__entry->mas7_3		= mas7_3;
+	),
+
+	TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx",
+		__entry->mas0, __entry->mas8, __entry->mas1,
+		__entry->mas2, __entry->mas7_3)
+);
+
+TRACE_EVENT(kvm_booke206_gtlb_write,
+	TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3),
+	TP_ARGS(mas0, mas1, mas2, mas7_3),
+
+	TP_STRUCT__entry(
+		__field(	__u32,	mas0		)
+		__field(	__u32,	mas1		)
+		__field(	__u64,	mas2		)
+		__field(	__u64,	mas7_3		)
+	),
+
+	TP_fast_assign(
+		__entry->mas0		= mas0;
+		__entry->mas1		= mas1;
+		__entry->mas2		= mas2;
+		__entry->mas7_3		= mas7_3;
+	),
+
+	TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx",
+		__entry->mas0, __entry->mas1,
+		__entry->mas2, __entry->mas7_3)
+);
+
+TRACE_EVENT(kvm_booke206_ref_release,
+	TP_PROTO(__u64 pfn, __u32 flags),
+	TP_ARGS(pfn, flags),
+
+	TP_STRUCT__entry(
+		__field(	__u64,	pfn		)
+		__field(	__u32,	flags		)
+	),
+
+	TP_fast_assign(
+		__entry->pfn		= pfn;
+		__entry->flags		= flags;
+	),
+
+	TP_printk("pfn=%llx flags=%x",
+		__entry->pfn, __entry->flags)
+);
+
+#ifdef CONFIG_SPE_POSSIBLE
+#define kvm_trace_symbol_irqprio_spe \
+	{BOOKE_IRQPRIO_SPE_UNAVAIL, "SPE_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_SPE_FP_DATA, "SPE_FP_DATA"}, \
+	{BOOKE_IRQPRIO_SPE_FP_ROUND, "SPE_FP_ROUND"},
+#else
+#define kvm_trace_symbol_irqprio_spe
+#endif
+
+#ifdef CONFIG_PPC_E500MC
+#define kvm_trace_symbol_irqprio_e500mc \
+	{BOOKE_IRQPRIO_ALTIVEC_UNAVAIL, "ALTIVEC_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_ALTIVEC_ASSIST, "ALTIVEC_ASSIST"},
+#else
+#define kvm_trace_symbol_irqprio_e500mc
+#endif
+
+#define kvm_trace_symbol_irqprio \
+	kvm_trace_symbol_irqprio_spe \
+	kvm_trace_symbol_irqprio_e500mc \
+	{BOOKE_IRQPRIO_DATA_STORAGE, "DATA_STORAGE"}, \
+	{BOOKE_IRQPRIO_INST_STORAGE, "INST_STORAGE"}, \
+	{BOOKE_IRQPRIO_ALIGNMENT, "ALIGNMENT"}, \
+	{BOOKE_IRQPRIO_PROGRAM, "PROGRAM"}, \
+	{BOOKE_IRQPRIO_FP_UNAVAIL, "FP_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_SYSCALL, "SYSCALL"}, \
+	{BOOKE_IRQPRIO_AP_UNAVAIL, "AP_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_DTLB_MISS, "DTLB_MISS"}, \
+	{BOOKE_IRQPRIO_ITLB_MISS, "ITLB_MISS"}, \
+	{BOOKE_IRQPRIO_MACHINE_CHECK, "MACHINE_CHECK"}, \
+	{BOOKE_IRQPRIO_DEBUG, "DEBUG"}, \
+	{BOOKE_IRQPRIO_CRITICAL, "CRITICAL"}, \
+	{BOOKE_IRQPRIO_WATCHDOG, "WATCHDOG"}, \
+	{BOOKE_IRQPRIO_EXTERNAL, "EXTERNAL"}, \
+	{BOOKE_IRQPRIO_FIT, "FIT"}, \
+	{BOOKE_IRQPRIO_DECREMENTER, "DECREMENTER"}, \
+	{BOOKE_IRQPRIO_PERFORMANCE_MONITOR, "PERFORMANCE_MONITOR"}, \
+	{BOOKE_IRQPRIO_EXTERNAL_LEVEL, "EXTERNAL_LEVEL"}, \
+	{BOOKE_IRQPRIO_DBELL, "DBELL"}, \
+	{BOOKE_IRQPRIO_DBELL_CRIT, "DBELL_CRIT"} \
+
+TRACE_EVENT(kvm_booke_queue_irqprio,
+	TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
+	TP_ARGS(vcpu, priority),
+
+	TP_STRUCT__entry(
+		__field(	__u32,	cpu_nr		)
+		__field(	__u32,	priority		)
+		__field(	unsigned long,	pending		)
+	),
+
+	TP_fast_assign(
+		__entry->cpu_nr		= vcpu->vcpu_id;
+		__entry->priority	= priority;
+		__entry->pending	= vcpu->arch.pending_exceptions;
+	),
+
+	TP_printk("vcpu=%x prio=%s pending=%lx",
+		__entry->cpu_nr,
+		__print_symbolic(__entry->priority, kvm_trace_symbol_irqprio),
+		__entry->pending)
+);
+
+#endif
+
+/* This part must be outside protection */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_booke
+
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
new file mode 100644
index 0000000000..8d57c84285
--- /dev/null
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -0,0 +1,525 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_KVM_HV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_HV_H
+
+#include <linux/tracepoint.h>
+#include "trace_book3s.h"
+#include <asm/hvcall.h>
+#include <asm/kvm_asm.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm_hv
+
+#define kvm_trace_symbol_hcall \
+	{H_REMOVE,			"H_REMOVE"}, \
+	{H_ENTER,			"H_ENTER"}, \
+	{H_READ,			"H_READ"}, \
+	{H_CLEAR_MOD,			"H_CLEAR_MOD"}, \
+	{H_CLEAR_REF,			"H_CLEAR_REF"}, \
+	{H_PROTECT,			"H_PROTECT"}, \
+	{H_GET_TCE,			"H_GET_TCE"}, \
+	{H_PUT_TCE,			"H_PUT_TCE"}, \
+	{H_SET_SPRG0,			"H_SET_SPRG0"}, \
+	{H_SET_DABR,			"H_SET_DABR"}, \
+	{H_PAGE_INIT,			"H_PAGE_INIT"}, \
+	{H_SET_ASR,			"H_SET_ASR"}, \
+	{H_ASR_ON,			"H_ASR_ON"}, \
+	{H_ASR_OFF,			"H_ASR_OFF"}, \
+	{H_LOGICAL_CI_LOAD,		"H_LOGICAL_CI_LOAD"}, \
+	{H_LOGICAL_CI_STORE,		"H_LOGICAL_CI_STORE"}, \
+	{H_LOGICAL_CACHE_LOAD,		"H_LOGICAL_CACHE_LOAD"}, \
+	{H_LOGICAL_CACHE_STORE,		"H_LOGICAL_CACHE_STORE"}, \
+	{H_LOGICAL_ICBI,		"H_LOGICAL_ICBI"}, \
+	{H_LOGICAL_DCBF,		"H_LOGICAL_DCBF"}, \
+	{H_GET_TERM_CHAR,		"H_GET_TERM_CHAR"}, \
+	{H_PUT_TERM_CHAR,		"H_PUT_TERM_CHAR"}, \
+	{H_REAL_TO_LOGICAL,		"H_REAL_TO_LOGICAL"}, \
+	{H_HYPERVISOR_DATA,		"H_HYPERVISOR_DATA"}, \
+	{H_EOI,				"H_EOI"}, \
+	{H_CPPR,			"H_CPPR"}, \
+	{H_IPI,				"H_IPI"}, \
+	{H_IPOLL,			"H_IPOLL"}, \
+	{H_XIRR,			"H_XIRR"}, \
+	{H_PERFMON,			"H_PERFMON"}, \
+	{H_MIGRATE_DMA,			"H_MIGRATE_DMA"}, \
+	{H_REGISTER_VPA,		"H_REGISTER_VPA"}, \
+	{H_CEDE,			"H_CEDE"}, \
+	{H_CONFER,			"H_CONFER"}, \
+	{H_PROD,			"H_PROD"}, \
+	{H_GET_PPP,			"H_GET_PPP"}, \
+	{H_SET_PPP,			"H_SET_PPP"}, \
+	{H_PURR,			"H_PURR"}, \
+	{H_PIC,				"H_PIC"}, \
+	{H_REG_CRQ,			"H_REG_CRQ"}, \
+	{H_FREE_CRQ,			"H_FREE_CRQ"}, \
+	{H_VIO_SIGNAL,			"H_VIO_SIGNAL"}, \
+	{H_SEND_CRQ,			"H_SEND_CRQ"}, \
+	{H_COPY_RDMA,			"H_COPY_RDMA"}, \
+	{H_REGISTER_LOGICAL_LAN,	"H_REGISTER_LOGICAL_LAN"}, \
+	{H_FREE_LOGICAL_LAN,		"H_FREE_LOGICAL_LAN"}, \
+	{H_ADD_LOGICAL_LAN_BUFFER,	"H_ADD_LOGICAL_LAN_BUFFER"}, \
+	{H_SEND_LOGICAL_LAN,		"H_SEND_LOGICAL_LAN"}, \
+	{H_BULK_REMOVE,			"H_BULK_REMOVE"}, \
+	{H_MULTICAST_CTRL,		"H_MULTICAST_CTRL"}, \
+	{H_SET_XDABR,			"H_SET_XDABR"}, \
+	{H_STUFF_TCE,			"H_STUFF_TCE"}, \
+	{H_PUT_TCE_INDIRECT,		"H_PUT_TCE_INDIRECT"}, \
+	{H_CHANGE_LOGICAL_LAN_MAC,	"H_CHANGE_LOGICAL_LAN_MAC"}, \
+	{H_VTERM_PARTNER_INFO,		"H_VTERM_PARTNER_INFO"}, \
+	{H_REGISTER_VTERM,		"H_REGISTER_VTERM"}, \
+	{H_FREE_VTERM,			"H_FREE_VTERM"}, \
+	{H_RESET_EVENTS,		"H_RESET_EVENTS"}, \
+	{H_ALLOC_RESOURCE,		"H_ALLOC_RESOURCE"}, \
+	{H_FREE_RESOURCE,		"H_FREE_RESOURCE"}, \
+	{H_MODIFY_QP,			"H_MODIFY_QP"}, \
+	{H_QUERY_QP,			"H_QUERY_QP"}, \
+	{H_REREGISTER_PMR,		"H_REREGISTER_PMR"}, \
+	{H_REGISTER_SMR,		"H_REGISTER_SMR"}, \
+	{H_QUERY_MR,			"H_QUERY_MR"}, \
+	{H_QUERY_MW,			"H_QUERY_MW"}, \
+	{H_QUERY_HCA,			"H_QUERY_HCA"}, \
+	{H_QUERY_PORT,			"H_QUERY_PORT"}, \
+	{H_MODIFY_PORT,			"H_MODIFY_PORT"}, \
+	{H_DEFINE_AQP1,			"H_DEFINE_AQP1"}, \
+	{H_GET_TRACE_BUFFER,		"H_GET_TRACE_BUFFER"}, \
+	{H_DEFINE_AQP0,			"H_DEFINE_AQP0"}, \
+	{H_RESIZE_MR,			"H_RESIZE_MR"}, \
+	{H_ATTACH_MCQP,			"H_ATTACH_MCQP"}, \
+	{H_DETACH_MCQP,			"H_DETACH_MCQP"}, \
+	{H_CREATE_RPT,			"H_CREATE_RPT"}, \
+	{H_REMOVE_RPT,			"H_REMOVE_RPT"}, \
+	{H_REGISTER_RPAGES,		"H_REGISTER_RPAGES"}, \
+	{H_DISABLE_AND_GET,		"H_DISABLE_AND_GET"}, \
+	{H_ERROR_DATA,			"H_ERROR_DATA"}, \
+	{H_GET_HCA_INFO,		"H_GET_HCA_INFO"}, \
+	{H_GET_PERF_COUNT,		"H_GET_PERF_COUNT"}, \
+	{H_MANAGE_TRACE,		"H_MANAGE_TRACE"}, \
+	{H_GET_CPU_CHARACTERISTICS,	"H_GET_CPU_CHARACTERISTICS"}, \
+	{H_FREE_LOGICAL_LAN_BUFFER,	"H_FREE_LOGICAL_LAN_BUFFER"}, \
+	{H_QUERY_INT_STATE,		"H_QUERY_INT_STATE"}, \
+	{H_POLL_PENDING,		"H_POLL_PENDING"}, \
+	{H_ILLAN_ATTRIBUTES,		"H_ILLAN_ATTRIBUTES"}, \
+	{H_MODIFY_HEA_QP,		"H_MODIFY_HEA_QP"}, \
+	{H_QUERY_HEA_QP,		"H_QUERY_HEA_QP"}, \
+	{H_QUERY_HEA,			"H_QUERY_HEA"}, \
+	{H_QUERY_HEA_PORT,		"H_QUERY_HEA_PORT"}, \
+	{H_MODIFY_HEA_PORT,		"H_MODIFY_HEA_PORT"}, \
+	{H_REG_BCMC,			"H_REG_BCMC"}, \
+	{H_DEREG_BCMC,			"H_DEREG_BCMC"}, \
+	{H_REGISTER_HEA_RPAGES,		"H_REGISTER_HEA_RPAGES"}, \
+	{H_DISABLE_AND_GET_HEA,		"H_DISABLE_AND_GET_HEA"}, \
+	{H_GET_HEA_INFO,		"H_GET_HEA_INFO"}, \
+	{H_ALLOC_HEA_RESOURCE,		"H_ALLOC_HEA_RESOURCE"}, \
+	{H_ADD_CONN,			"H_ADD_CONN"}, \
+	{H_DEL_CONN,			"H_DEL_CONN"}, \
+	{H_JOIN,			"H_JOIN"}, \
+	{H_VASI_STATE,			"H_VASI_STATE"}, \
+	{H_ENABLE_CRQ,			"H_ENABLE_CRQ"}, \
+	{H_GET_EM_PARMS,		"H_GET_EM_PARMS"}, \
+	{H_GET_ENERGY_SCALE_INFO,	"H_GET_ENERGY_SCALE_INFO"}, \
+	{H_SET_MPP,			"H_SET_MPP"}, \
+	{H_GET_MPP,			"H_GET_MPP"}, \
+	{H_HOME_NODE_ASSOCIATIVITY,	"H_HOME_NODE_ASSOCIATIVITY"}, \
+	{H_BEST_ENERGY,			"H_BEST_ENERGY"}, \
+	{H_XIRR_X,			"H_XIRR_X"}, \
+	{H_RANDOM,			"H_RANDOM"}, \
+	{H_COP,				"H_COP"}, \
+	{H_GET_MPP_X,			"H_GET_MPP_X"}, \
+	{H_SET_MODE,			"H_SET_MODE"}, \
+	{H_REGISTER_PROC_TBL,		"H_REGISTER_PROC_TBL"}, \
+	{H_QUERY_VAS_CAPABILITIES,	"H_QUERY_VAS_CAPABILITIES"}, \
+	{H_INT_GET_SOURCE_INFO,		"H_INT_GET_SOURCE_INFO"}, \
+	{H_INT_SET_SOURCE_CONFIG,	"H_INT_SET_SOURCE_CONFIG"}, \
+	{H_INT_GET_QUEUE_INFO,		"H_INT_GET_QUEUE_INFO"}, \
+	{H_INT_SET_QUEUE_CONFIG,	"H_INT_SET_QUEUE_CONFIG"}, \
+	{H_INT_ESB,			"H_INT_ESB"}, \
+	{H_INT_RESET,			"H_INT_RESET"}, \
+	{H_RPT_INVALIDATE,		"H_RPT_INVALIDATE"}, \
+	{H_RTAS,			"H_RTAS"}, \
+	{H_LOGICAL_MEMOP,		"H_LOGICAL_MEMOP"}, \
+	{H_CAS,				"H_CAS"}, \
+	{H_UPDATE_DT,			"H_UPDATE_DT"}, \
+	{H_GET_PERF_COUNTER_INFO,	"H_GET_PERF_COUNTER_INFO"}, \
+	{H_SET_PARTITION_TABLE,		"H_SET_PARTITION_TABLE"}, \
+	{H_ENTER_NESTED,		"H_ENTER_NESTED"}, \
+	{H_TLB_INVALIDATE,		"H_TLB_INVALIDATE"}, \
+	{H_COPY_TOFROM_GUEST,		"H_COPY_TOFROM_GUEST"}
+
+
+#define kvm_trace_symbol_kvmret \
+	{RESUME_GUEST,			"RESUME_GUEST"}, \
+	{RESUME_GUEST_NV,		"RESUME_GUEST_NV"}, \
+	{RESUME_HOST,			"RESUME_HOST"}, \
+	{RESUME_HOST_NV,		"RESUME_HOST_NV"}
+
+#define kvm_trace_symbol_hcall_rc \
+	{H_SUCCESS,			"H_SUCCESS"}, \
+	{H_BUSY,			"H_BUSY"}, \
+	{H_CLOSED,			"H_CLOSED"}, \
+	{H_NOT_AVAILABLE,		"H_NOT_AVAILABLE"}, \
+	{H_CONSTRAINED,			"H_CONSTRAINED"}, \
+	{H_PARTIAL,			"H_PARTIAL"}, \
+	{H_IN_PROGRESS,			"H_IN_PROGRESS"}, \
+	{H_PAGE_REGISTERED,		"H_PAGE_REGISTERED"}, \
+	{H_PARTIAL_STORE,		"H_PARTIAL_STORE"}, \
+	{H_PENDING,			"H_PENDING"}, \
+	{H_CONTINUE,			"H_CONTINUE"}, \
+	{H_LONG_BUSY_START_RANGE,	"H_LONG_BUSY_START_RANGE"}, \
+	{H_LONG_BUSY_ORDER_1_MSEC,	"H_LONG_BUSY_ORDER_1_MSEC"}, \
+	{H_LONG_BUSY_ORDER_10_MSEC,	"H_LONG_BUSY_ORDER_10_MSEC"}, \
+	{H_LONG_BUSY_ORDER_100_MSEC,	"H_LONG_BUSY_ORDER_100_MSEC"}, \
+	{H_LONG_BUSY_ORDER_1_SEC,	"H_LONG_BUSY_ORDER_1_SEC"}, \
+	{H_LONG_BUSY_ORDER_10_SEC,	"H_LONG_BUSY_ORDER_10_SEC"}, \
+	{H_LONG_BUSY_ORDER_100_SEC,	"H_LONG_BUSY_ORDER_100_SEC"}, \
+	{H_LONG_BUSY_END_RANGE,		"H_LONG_BUSY_END_RANGE"}, \
+	{H_TOO_HARD,			"H_TOO_HARD"}, \
+	{H_HARDWARE,			"H_HARDWARE"}, \
+	{H_FUNCTION,			"H_FUNCTION"}, \
+	{H_PRIVILEGE,			"H_PRIVILEGE"}, \
+	{H_PARAMETER,			"H_PARAMETER"}, \
+	{H_BAD_MODE,			"H_BAD_MODE"}, \
+	{H_PTEG_FULL,			"H_PTEG_FULL"}, \
+	{H_NOT_FOUND,			"H_NOT_FOUND"}, \
+	{H_RESERVED_DABR,		"H_RESERVED_DABR"}, \
+	{H_NO_MEM,			"H_NO_MEM"}, \
+	{H_AUTHORITY,			"H_AUTHORITY"}, \
+	{H_PERMISSION,			"H_PERMISSION"}, \
+	{H_DROPPED,			"H_DROPPED"}, \
+	{H_SOURCE_PARM,			"H_SOURCE_PARM"}, \
+	{H_DEST_PARM,			"H_DEST_PARM"}, \
+	{H_REMOTE_PARM,			"H_REMOTE_PARM"}, \
+	{H_RESOURCE,			"H_RESOURCE"}, \
+	{H_ADAPTER_PARM,		"H_ADAPTER_PARM"}, \
+	{H_RH_PARM,			"H_RH_PARM"}, \
+	{H_RCQ_PARM,			"H_RCQ_PARM"}, \
+	{H_SCQ_PARM,			"H_SCQ_PARM"}, \
+	{H_EQ_PARM,			"H_EQ_PARM"}, \
+	{H_RT_PARM,			"H_RT_PARM"}, \
+	{H_ST_PARM,			"H_ST_PARM"}, \
+	{H_SIGT_PARM,			"H_SIGT_PARM"}, \
+	{H_TOKEN_PARM,			"H_TOKEN_PARM"}, \
+	{H_MLENGTH_PARM,		"H_MLENGTH_PARM"}, \
+	{H_MEM_PARM,			"H_MEM_PARM"}, \
+	{H_MEM_ACCESS_PARM,		"H_MEM_ACCESS_PARM"}, \
+	{H_ATTR_PARM,			"H_ATTR_PARM"}, \
+	{H_PORT_PARM,			"H_PORT_PARM"}, \
+	{H_MCG_PARM,			"H_MCG_PARM"}, \
+	{H_VL_PARM,			"H_VL_PARM"}, \
+	{H_TSIZE_PARM,			"H_TSIZE_PARM"}, \
+	{H_TRACE_PARM,			"H_TRACE_PARM"}, \
+	{H_MASK_PARM,			"H_MASK_PARM"}, \
+	{H_MCG_FULL,			"H_MCG_FULL"}, \
+	{H_ALIAS_EXIST,			"H_ALIAS_EXIST"}, \
+	{H_P_COUNTER,			"H_P_COUNTER"}, \
+	{H_TABLE_FULL,			"H_TABLE_FULL"}, \
+	{H_ALT_TABLE,			"H_ALT_TABLE"}, \
+	{H_MR_CONDITION,		"H_MR_CONDITION"}, \
+	{H_NOT_ENOUGH_RESOURCES,	"H_NOT_ENOUGH_RESOURCES"}, \
+	{H_R_STATE,			"H_R_STATE"}, \
+	{H_RESCINDED,			"H_RESCINDED"}, \
+	{H_P2,				"H_P2"}, \
+	{H_P3,				"H_P3"}, \
+	{H_P4,				"H_P4"}, \
+	{H_P5,				"H_P5"}, \
+	{H_P6,				"H_P6"}, \
+	{H_P7,				"H_P7"}, \
+	{H_P8,				"H_P8"}, \
+	{H_P9,				"H_P9"}, \
+	{H_TOO_BIG,			"H_TOO_BIG"}, \
+	{H_OVERLAP,			"H_OVERLAP"}, \
+	{H_INTERRUPT,			"H_INTERRUPT"}, \
+	{H_BAD_DATA,			"H_BAD_DATA"}, \
+	{H_NOT_ACTIVE,			"H_NOT_ACTIVE"}, \
+	{H_SG_LIST,			"H_SG_LIST"}, \
+	{H_OP_MODE,			"H_OP_MODE"}, \
+	{H_COP_HW,			"H_COP_HW"}, \
+	{H_UNSUPPORTED_FLAG_START,	"H_UNSUPPORTED_FLAG_START"}, \
+	{H_UNSUPPORTED_FLAG_END,	"H_UNSUPPORTED_FLAG_END"}, \
+	{H_MULTI_THREADS_ACTIVE,	"H_MULTI_THREADS_ACTIVE"}, \
+	{H_OUTSTANDING_COP_OPS,		"H_OUTSTANDING_COP_OPS"}
+
+TRACE_EVENT(kvm_guest_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	pc)
+		__field(unsigned long,  pending_exceptions)
+		__field(u8,		ceded)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id	= vcpu->vcpu_id;
+		__entry->pc		= kvmppc_get_pc(vcpu);
+		__entry->ceded		= vcpu->arch.ceded;
+		__entry->pending_exceptions  = vcpu->arch.pending_exceptions;
+	),
+
+	TP_printk("VCPU %d: pc=0x%lx pexcp=0x%lx ceded=%d",
+			__entry->vcpu_id,
+			__entry->pc,
+			__entry->pending_exceptions, __entry->ceded)
+);
+
+TRACE_EVENT(kvm_guest_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(int,		trap)
+		__field(unsigned long,	pc)
+		__field(unsigned long,	msr)
+		__field(u8,		ceded)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu->vcpu_id;
+		__entry->trap	 = vcpu->arch.trap;
+		__entry->ceded	 = vcpu->arch.ceded;
+		__entry->pc	 = kvmppc_get_pc(vcpu);
+		__entry->msr	 = vcpu->arch.shregs.msr;
+	),
+
+	TP_printk("VCPU %d: trap=%s pc=0x%lx msr=0x%lx, ceded=%d",
+		__entry->vcpu_id,
+		__print_symbolic(__entry->trap, kvm_trace_symbol_exit),
+		__entry->pc, __entry->msr, __entry->ceded
+	)
+);
+
+TRACE_EVENT(kvm_page_fault_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep,
+		 struct kvm_memory_slot *memslot, unsigned long ea,
+		 unsigned long dsisr),
+
+	TP_ARGS(vcpu, hptep, memslot, ea, dsisr),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	hpte_v)
+		__field(unsigned long,	hpte_r)
+		__field(unsigned long,	gpte_r)
+		__field(unsigned long,	ea)
+		__field(u64,		base_gfn)
+		__field(u32,		slot_flags)
+		__field(u32,		dsisr)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->hpte_v	  = hptep[0];
+		__entry->hpte_r	  = hptep[1];
+		__entry->gpte_r	  = hptep[2];
+		__entry->ea	  = ea;
+		__entry->dsisr	  = dsisr;
+		__entry->base_gfn = memslot ? memslot->base_gfn : -1UL;
+		__entry->slot_flags = memslot ? memslot->flags : 0;
+	),
+
+	TP_printk("VCPU %d: hpte=0x%lx:0x%lx guest=0x%lx ea=0x%lx,%x slot=0x%llx,0x%x",
+		   __entry->vcpu_id,
+		   __entry->hpte_v, __entry->hpte_r, __entry->gpte_r,
+		   __entry->ea, __entry->dsisr,
+		   __entry->base_gfn, __entry->slot_flags)
+);
+
+TRACE_EVENT(kvm_page_fault_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep, long ret),
+
+	TP_ARGS(vcpu, hptep, ret),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	hpte_v)
+		__field(unsigned long,	hpte_r)
+		__field(long,		ret)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->hpte_v	= hptep[0];
+		__entry->hpte_r	= hptep[1];
+		__entry->ret = ret;
+	),
+
+	TP_printk("VCPU %d: hpte=0x%lx:0x%lx ret=0x%lx",
+		   __entry->vcpu_id,
+		   __entry->hpte_v, __entry->hpte_r, __entry->ret)
+);
+
+TRACE_EVENT(kvm_hcall_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	req)
+		__field(unsigned long,	gpr4)
+		__field(unsigned long,	gpr5)
+		__field(unsigned long,	gpr6)
+		__field(unsigned long,	gpr7)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->req   = kvmppc_get_gpr(vcpu, 3);
+		__entry->gpr4  = kvmppc_get_gpr(vcpu, 4);
+		__entry->gpr5  = kvmppc_get_gpr(vcpu, 5);
+		__entry->gpr6  = kvmppc_get_gpr(vcpu, 6);
+		__entry->gpr7  = kvmppc_get_gpr(vcpu, 7);
+	),
+
+	TP_printk("VCPU %d: hcall=%s GPR4-7=0x%lx,0x%lx,0x%lx,0x%lx",
+		   __entry->vcpu_id,
+		   __print_symbolic(__entry->req, kvm_trace_symbol_hcall),
+		   __entry->gpr4, __entry->gpr5, __entry->gpr6, __entry->gpr7)
+);
+
+TRACE_EVENT(kvm_hcall_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu, int ret),
+
+	TP_ARGS(vcpu, ret),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	ret)
+		__field(unsigned long,	hcall_rc)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->ret	  = ret;
+		__entry->hcall_rc = kvmppc_get_gpr(vcpu, 3);
+	),
+
+	TP_printk("VCPU %d: ret=%s hcall_rc=%s",
+		   __entry->vcpu_id,
+		   __print_symbolic(__entry->ret, kvm_trace_symbol_kvmret),
+		   __print_symbolic(__entry->ret & RESUME_FLAG_HOST ?
+					H_TOO_HARD : __entry->hcall_rc,
+					kvm_trace_symbol_hcall_rc))
+);
+
+TRACE_EVENT(kvmppc_run_core,
+	TP_PROTO(struct kvmppc_vcore *vc, int where),
+
+	TP_ARGS(vc, where),
+
+	TP_STRUCT__entry(
+		__field(int,	n_runnable)
+		__field(int,	runner_vcpu)
+		__field(int,	where)
+		__field(pid_t,	tgid)
+	),
+
+	TP_fast_assign(
+		__entry->runner_vcpu	= vc->runner->vcpu_id;
+		__entry->n_runnable	= vc->n_runnable;
+		__entry->where		= where;
+		__entry->tgid		= current->tgid;
+	),
+
+	TP_printk("%s runner_vcpu==%d runnable=%d tgid=%d",
+		    __entry->where ? "Exit" : "Enter",
+		    __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_vcore_blocked,
+	TP_PROTO(struct kvm_vcpu *vcpu, int where),
+
+	TP_ARGS(vcpu, where),
+
+	TP_STRUCT__entry(
+		__field(int,	n_runnable)
+		__field(int,	runner_vcpu)
+		__field(int,	where)
+		__field(pid_t,	tgid)
+	),
+
+	TP_fast_assign(
+		__entry->runner_vcpu = vcpu->vcpu_id;
+		__entry->n_runnable  = vcpu->arch.vcore->n_runnable;
+		__entry->where       = where;
+		__entry->tgid	     = current->tgid;
+	),
+
+	TP_printk("%s runner_vcpu=%d runnable=%d tgid=%d",
+		   __entry->where ? "Exit" : "Enter",
+		   __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_vcore_wakeup,
+	TP_PROTO(int do_sleep, __u64 ns),
+
+	TP_ARGS(do_sleep, ns),
+
+	TP_STRUCT__entry(
+		__field(__u64,  ns)
+		__field(int,    waited)
+		__field(pid_t,  tgid)
+	),
+
+	TP_fast_assign(
+		__entry->ns     = ns;
+		__entry->waited = do_sleep;
+		__entry->tgid   = current->tgid;
+	),
+
+	TP_printk("%s time %llu ns, tgid=%d",
+		__entry->waited ? "wait" : "poll",
+		__entry->ns, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_run_vcpu_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(pid_t,		tgid)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->tgid	  = current->tgid;
+	),
+
+	TP_printk("VCPU %d: tgid=%d", __entry->vcpu_id, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_run_vcpu_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(int,		exit)
+		__field(int,		ret)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->exit     = vcpu->run->exit_reason;
+		__entry->ret      = vcpu->arch.ret;
+	),
+
+	TP_printk("VCPU %d: exit=%d, ret=%d",
+			__entry->vcpu_id, __entry->exit, __entry->ret)
+);
+
+#endif /* _TRACE_KVM_HV_H */
+
+/* This part must be outside protection */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_hv
+
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
new file mode 100644
index 0000000000..46a46d328f
--- /dev/null
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#if !defined(_TRACE_KVM_PR_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_PR_H
+
+#include <linux/tracepoint.h>
+#include "trace_book3s.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm_pr
+
+TRACE_EVENT(kvm_book3s_reenter,
+	TP_PROTO(int r, struct kvm_vcpu *vcpu),
+	TP_ARGS(r, vcpu),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	r		)
+		__field(	unsigned long,	pc		)
+	),
+
+	TP_fast_assign(
+		__entry->r		= r;
+		__entry->pc		= kvmppc_get_pc(vcpu);
+	),
+
+	TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc)
+);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+TRACE_EVENT(kvm_book3s_64_mmu_map,
+	TP_PROTO(int rflags, ulong hpteg, ulong va, kvm_pfn_t hpaddr,
+		 struct kvmppc_pte *orig_pte),
+	TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte),
+
+	TP_STRUCT__entry(
+		__field(	unsigned char,		flag_w		)
+		__field(	unsigned char,		flag_x		)
+		__field(	unsigned long,		eaddr		)
+		__field(	unsigned long,		hpteg		)
+		__field(	unsigned long,		va		)
+		__field(	unsigned long long,	vpage		)
+		__field(	unsigned long,		hpaddr		)
+	),
+
+	TP_fast_assign(
+		__entry->flag_w	= ((rflags & HPTE_R_PP) == 3) ? '-' : 'w';
+		__entry->flag_x	= (rflags & HPTE_R_N) ? '-' : 'x';
+		__entry->eaddr	= orig_pte->eaddr;
+		__entry->hpteg	= hpteg;
+		__entry->va	= va;
+		__entry->vpage	= orig_pte->vpage;
+		__entry->hpaddr	= hpaddr;
+	),
+
+	TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx",
+		  __entry->flag_w, __entry->flag_x, __entry->eaddr,
+		  __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr)
+);
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+TRACE_EVENT(kvm_book3s_mmu_map,
+	TP_PROTO(struct hpte_cache *pte),
+	TP_ARGS(pte),
+
+	TP_STRUCT__entry(
+		__field(	u64,		host_vpn	)
+		__field(	u64,		pfn		)
+		__field(	ulong,		eaddr		)
+		__field(	u64,		vpage		)
+		__field(	ulong,		raddr		)
+		__field(	int,		flags		)
+	),
+
+	TP_fast_assign(
+		__entry->host_vpn	= pte->host_vpn;
+		__entry->pfn		= pte->pfn;
+		__entry->eaddr		= pte->pte.eaddr;
+		__entry->vpage		= pte->pte.vpage;
+		__entry->raddr		= pte->pte.raddr;
+		__entry->flags		= (pte->pte.may_read ? 0x4 : 0) |
+					  (pte->pte.may_write ? 0x2 : 0) |
+					  (pte->pte.may_execute ? 0x1 : 0);
+	),
+
+	TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+		  __entry->host_vpn, __entry->pfn, __entry->eaddr,
+		  __entry->vpage, __entry->raddr, __entry->flags)
+);
+
+TRACE_EVENT(kvm_book3s_mmu_invalidate,
+	TP_PROTO(struct hpte_cache *pte),
+	TP_ARGS(pte),
+
+	TP_STRUCT__entry(
+		__field(	u64,		host_vpn	)
+		__field(	u64,		pfn		)
+		__field(	ulong,		eaddr		)
+		__field(	u64,		vpage		)
+		__field(	ulong,		raddr		)
+		__field(	int,		flags		)
+	),
+
+	TP_fast_assign(
+		__entry->host_vpn	= pte->host_vpn;
+		__entry->pfn		= pte->pfn;
+		__entry->eaddr		= pte->pte.eaddr;
+		__entry->vpage		= pte->pte.vpage;
+		__entry->raddr		= pte->pte.raddr;
+		__entry->flags		= (pte->pte.may_read ? 0x4 : 0) |
+					  (pte->pte.may_write ? 0x2 : 0) |
+					  (pte->pte.may_execute ? 0x1 : 0);
+	),
+
+	TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+		  __entry->host_vpn, __entry->pfn, __entry->eaddr,
+		  __entry->vpage, __entry->raddr, __entry->flags)
+);
+
+TRACE_EVENT(kvm_book3s_mmu_flush,
+	TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1,
+		 unsigned long long p2),
+	TP_ARGS(type, vcpu, p1, p2),
+
+	TP_STRUCT__entry(
+		__field(	int,			count		)
+		__field(	unsigned long long,	p1		)
+		__field(	unsigned long long,	p2		)
+		__field(	const char *,		type		)
+	),
+
+	TP_fast_assign(
+		__entry->count		= to_book3s(vcpu)->hpte_cache_count;
+		__entry->p1		= p1;
+		__entry->p2		= p2;
+		__entry->type		= type;
+	),
+
+	TP_printk("Flush %d %sPTEs: %llx - %llx",
+		  __entry->count, __entry->type, __entry->p1, __entry->p2)
+);
+
+TRACE_EVENT(kvm_book3s_slb_found,
+	TP_PROTO(unsigned long long gvsid, unsigned long long hvsid),
+	TP_ARGS(gvsid, hvsid),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long long,	gvsid		)
+		__field(	unsigned long long,	hvsid		)
+	),
+
+	TP_fast_assign(
+		__entry->gvsid		= gvsid;
+		__entry->hvsid		= hvsid;
+	),
+
+	TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid)
+);
+
+TRACE_EVENT(kvm_book3s_slb_fail,
+	TP_PROTO(u16 sid_map_mask, unsigned long long gvsid),
+	TP_ARGS(sid_map_mask, gvsid),
+
+	TP_STRUCT__entry(
+		__field(	unsigned short,		sid_map_mask	)
+		__field(	unsigned long long,	gvsid		)
+	),
+
+	TP_fast_assign(
+		__entry->sid_map_mask	= sid_map_mask;
+		__entry->gvsid		= gvsid;
+	),
+
+	TP_printk("%x/%x: %llx", __entry->sid_map_mask,
+		  SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid)
+);
+
+TRACE_EVENT(kvm_book3s_slb_map,
+	TP_PROTO(u16 sid_map_mask, unsigned long long gvsid,
+		 unsigned long long hvsid),
+	TP_ARGS(sid_map_mask, gvsid, hvsid),
+
+	TP_STRUCT__entry(
+		__field(	unsigned short,		sid_map_mask	)
+		__field(	unsigned long long,	guest_vsid	)
+		__field(	unsigned long long,	host_vsid	)
+	),
+
+	TP_fast_assign(
+		__entry->sid_map_mask	= sid_map_mask;
+		__entry->guest_vsid	= gvsid;
+		__entry->host_vsid	= hvsid;
+	),
+
+	TP_printk("%x: %llx -> %llx", __entry->sid_map_mask,
+		  __entry->guest_vsid, __entry->host_vsid)
+);
+
+TRACE_EVENT(kvm_book3s_slbmte,
+	TP_PROTO(u64 slb_vsid, u64 slb_esid),
+	TP_ARGS(slb_vsid, slb_esid),
+
+	TP_STRUCT__entry(
+		__field(	u64,	slb_vsid	)
+		__field(	u64,	slb_esid	)
+	),
+
+	TP_fast_assign(
+		__entry->slb_vsid	= slb_vsid;
+		__entry->slb_esid	= slb_esid;
+	),
+
+	TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid)
+);
+
+TRACE_EVENT(kvm_exit,
+	TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
+	TP_ARGS(exit_nr, vcpu),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	exit_nr		)
+		__field(	unsigned long,	pc		)
+		__field(	unsigned long,	msr		)
+		__field(	unsigned long,	dar		)
+		__field(	unsigned long,	srr1		)
+		__field(	unsigned long,	last_inst	)
+	),
+
+	TP_fast_assign(
+		__entry->exit_nr	= exit_nr;
+		__entry->pc		= kvmppc_get_pc(vcpu);
+		__entry->dar		= kvmppc_get_fault_dar(vcpu);
+		__entry->msr		= kvmppc_get_msr(vcpu);
+		__entry->srr1		= vcpu->arch.shadow_srr1;
+		__entry->last_inst	= vcpu->arch.last_inst;
+	),
+
+	TP_printk("exit=%s"
+		" | pc=0x%lx"
+		" | msr=0x%lx"
+		" | dar=0x%lx"
+		" | srr1=0x%lx"
+		" | last_inst=0x%lx"
+		,
+		__print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
+		__entry->pc,
+		__entry->msr,
+		__entry->dar,
+		__entry->srr1,
+		__entry->last_inst
+		)
+);
+
+#endif /* _TRACE_KVM_H */
+
+/* This part must be outside protection */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_pr
+
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
new file mode 100644
index 0000000000..6eac63e79a
--- /dev/null
+++ b/arch/powerpc/lib/Makefile
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for ppc-specific library files..
+#
+
+ccflags-$(CONFIG_PPC64)	:= $(NO_MINIMAL_TOC)
+
+CFLAGS_code-patching.o += -fno-stack-protector
+CFLAGS_feature-fixups.o += -fno-stack-protector
+
+CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE)
+
+KASAN_SANITIZE_code-patching.o := n
+KASAN_SANITIZE_feature-fixups.o := n
+# restart_table.o contains functions called in the NMI interrupt path
+# which can be in real mode. Disable KASAN.
+KASAN_SANITIZE_restart_table.o := n
+KCSAN_SANITIZE_code-patching.o := n
+KCSAN_SANITIZE_feature-fixups.o := n
+
+ifdef CONFIG_KASAN
+CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING
+endif
+
+CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+
+obj-y += code-patching.o feature-fixups.o pmem.o
+
+obj-$(CONFIG_CODE_PATCHING_SELFTEST) += test-code-patching.o
+
+ifndef CONFIG_KASAN
+obj-y	+=	string.o memcmp_$(BITS).o
+obj-$(CONFIG_PPC32)	+= strlen_32.o
+endif
+
+obj-$(CONFIG_PPC32)	+= div64.o copy_32.o crtsavres.o
+
+obj-$(CONFIG_FUNCTION_ERROR_INJECTION)	+= error-inject.o
+
+# See corresponding test in arch/powerpc/Makefile
+# 64-bit linker creates .sfpr on demand for final link (vmlinux),
+# so it is only needed for modules, and only for older linkers which
+# do not support --save-restore-funcs
+ifndef CONFIG_LD_IS_BFD
+always-$(CONFIG_PPC64)	+= crtsavres.o
+endif
+
+obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
+			       memcpy_power7.o restart_table.o
+
+obj64-y	+= copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
+	   memcpy_64.o copy_mc_64.o
+
+ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+obj-$(CONFIG_SMP)	+= qspinlock.o
+else
+obj64-$(CONFIG_SMP)	+= locks.o
+endif
+
+obj64-$(CONFIG_ALTIVEC)	+= vmx-helper.o
+obj64-$(CONFIG_KPROBES_SANITY_TEST)	+= test_emulate_step.o \
+					   test_emulate_step_exec_instr.o
+
+obj-y			+= checksum_$(BITS).o checksum_wrappers.o \
+			   string_$(BITS).o
+
+obj-y			+= sstep.o
+obj-$(CONFIG_PPC_FPU)	+= ldstfp.o
+obj64-y			+= quad.o
+
+obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
+
+obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
+
+obj-$(CONFIG_ALTIVEC)	+= xor_vmx.o xor_vmx_glue.o
+CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec)
+# Enable <altivec.h>
+CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include)
+
+obj-$(CONFIG_PPC64) += $(obj64-y)
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
new file mode 100644
index 0000000000..cd00b9bdd7
--- /dev/null
+++ b/arch/powerpc/lib/checksum_32.S
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains assembly-language implementations
+ * of IP-style 1's complement checksum routines.
+ *	
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
+ */
+
+#include <linux/export.h>
+#include <linux/sys.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+	.text
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * __csum_partial(buff, len, sum)
+ */
+_GLOBAL(__csum_partial)
+	subi	r3,r3,4
+	srawi.	r6,r4,2		/* Divide len by 4 and also clear carry */
+	beq	3f		/* if we're doing < 4 bytes */
+	andi.	r0,r3,2		/* Align buffer to longword boundary */
+	beq+	1f
+	lhz	r0,4(r3)	/* do 2 bytes to get aligned */
+	subi	r4,r4,2
+	addi	r3,r3,2
+	srwi.	r6,r4,2		/* # words to do */
+	adde	r5,r5,r0
+	beq	3f
+1:	andi.	r6,r6,3		/* Prepare to handle words 4 by 4 */
+	beq	21f
+	mtctr	r6
+2:	lwzu	r0,4(r3)
+	adde	r5,r5,r0
+	bdnz	2b
+21:	srwi.	r6,r4,4		/* # blocks of 4 words to do */
+	beq	3f
+	lwz	r0,4(r3)
+	mtctr	r6
+	lwz	r6,8(r3)
+	adde	r5,r5,r0
+	lwz	r7,12(r3)
+	adde	r5,r5,r6
+	lwzu	r8,16(r3)
+	adde	r5,r5,r7
+	bdz	23f
+22:	lwz	r0,4(r3)
+	adde	r5,r5,r8
+	lwz	r6,8(r3)
+	adde	r5,r5,r0
+	lwz	r7,12(r3)
+	adde	r5,r5,r6
+	lwzu	r8,16(r3)
+	adde	r5,r5,r7
+	bdnz	22b
+23:	adde	r5,r5,r8
+3:	andi.	r0,r4,2
+	beq+	4f
+	lhz	r0,4(r3)
+	addi	r3,r3,2
+	adde	r5,r5,r0
+4:	andi.	r0,r4,1
+	beq+	5f
+	lbz	r0,4(r3)
+	slwi	r0,r0,8		/* Upper byte of word */
+	adde	r5,r5,r0
+5:	addze	r3,r5		/* add in final carry */
+	blr
+EXPORT_SYMBOL(__csum_partial)
+
+/*
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in 0xffffffff, while copying the block to dst.
+ * If an access exception occurs it returns zero.
+ *
+ * csum_partial_copy_generic(src, dst, len)
+ */
+#define CSUM_COPY_16_BYTES_WITHEX(n)	\
+8 ## n ## 0:			\
+	lwz	r7,4(r4);	\
+8 ## n ## 1:			\
+	lwz	r8,8(r4);	\
+8 ## n ## 2:			\
+	lwz	r9,12(r4);	\
+8 ## n ## 3:			\
+	lwzu	r10,16(r4);	\
+8 ## n ## 4:			\
+	stw	r7,4(r6);	\
+	adde	r12,r12,r7;	\
+8 ## n ## 5:			\
+	stw	r8,8(r6);	\
+	adde	r12,r12,r8;	\
+8 ## n ## 6:			\
+	stw	r9,12(r6);	\
+	adde	r12,r12,r9;	\
+8 ## n ## 7:			\
+	stwu	r10,16(r6);	\
+	adde	r12,r12,r10
+
+#define CSUM_COPY_16_BYTES_EXCODE(n)		\
+	EX_TABLE(8 ## n ## 0b, fault);	\
+	EX_TABLE(8 ## n ## 1b, fault);	\
+	EX_TABLE(8 ## n ## 2b, fault);	\
+	EX_TABLE(8 ## n ## 3b, fault);	\
+	EX_TABLE(8 ## n ## 4b, fault);	\
+	EX_TABLE(8 ## n ## 5b, fault);	\
+	EX_TABLE(8 ## n ## 6b, fault);	\
+	EX_TABLE(8 ## n ## 7b, fault);
+
+	.text
+
+CACHELINE_BYTES = L1_CACHE_BYTES
+LG_CACHELINE_BYTES = L1_CACHE_SHIFT
+CACHELINE_MASK = (L1_CACHE_BYTES-1)
+
+_GLOBAL(csum_partial_copy_generic)
+	li	r12,-1
+	addic	r0,r0,0			/* clear carry */
+	addi	r6,r4,-4
+	neg	r0,r4
+	addi	r4,r3,-4
+	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
+	crset	4*cr7+eq
+	beq	58f
+
+	cmplw	0,r5,r0			/* is this more than total to do? */
+	blt	63f			/* if not much to do */
+	rlwinm	r7,r6,3,0x8
+	rlwnm	r12,r12,r7,0,31	/* odd destination address: rotate one byte */
+	cmplwi	cr7,r7,0	/* is destination address even ? */
+	andi.	r8,r0,3			/* get it word-aligned first */
+	mtctr	r8
+	beq+	61f
+	li	r3,0
+70:	lbz	r9,4(r4)		/* do some bytes */
+	addi	r4,r4,1
+	slwi	r3,r3,8
+	rlwimi	r3,r9,0,24,31
+71:	stb	r9,4(r6)
+	addi	r6,r6,1
+	bdnz	70b
+	adde	r12,r12,r3
+61:	subf	r5,r0,r5
+	srwi.	r0,r0,2
+	mtctr	r0
+	beq	58f
+72:	lwzu	r9,4(r4)		/* do some words */
+	adde	r12,r12,r9
+73:	stwu	r9,4(r6)
+	bdnz	72b
+
+58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
+	li	r11,4
+	beq	63f
+
+	/* Here we decide how far ahead to prefetch the source */
+	li	r3,4
+	cmpwi	r0,1
+	li	r7,0
+	ble	114f
+	li	r7,1
+#if MAX_COPY_PREFETCH > 1
+	/* Heuristically, for large transfers we prefetch
+	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
+	   we prefetch 1 cacheline ahead. */
+	cmpwi	r0,MAX_COPY_PREFETCH
+	ble	112f
+	li	r7,MAX_COPY_PREFETCH
+112:	mtctr	r7
+111:	dcbt	r3,r4
+	addi	r3,r3,CACHELINE_BYTES
+	bdnz	111b
+#else
+	dcbt	r3,r4
+	addi	r3,r3,CACHELINE_BYTES
+#endif /* MAX_COPY_PREFETCH > 1 */
+
+114:	subf	r8,r7,r0
+	mr	r0,r7
+	mtctr	r8
+
+53:	dcbt	r3,r4
+54:	dcbz	r11,r6
+/* the main body of the cacheline loop */
+	CSUM_COPY_16_BYTES_WITHEX(0)
+#if L1_CACHE_BYTES >= 32
+	CSUM_COPY_16_BYTES_WITHEX(1)
+#if L1_CACHE_BYTES >= 64
+	CSUM_COPY_16_BYTES_WITHEX(2)
+	CSUM_COPY_16_BYTES_WITHEX(3)
+#if L1_CACHE_BYTES >= 128
+	CSUM_COPY_16_BYTES_WITHEX(4)
+	CSUM_COPY_16_BYTES_WITHEX(5)
+	CSUM_COPY_16_BYTES_WITHEX(6)
+	CSUM_COPY_16_BYTES_WITHEX(7)
+#endif
+#endif
+#endif
+	bdnz	53b
+	cmpwi	r0,0
+	li	r3,4
+	li	r7,0
+	bne	114b
+
+63:	srwi.	r0,r5,2
+	mtctr	r0
+	beq	64f
+30:	lwzu	r0,4(r4)
+	adde	r12,r12,r0
+31:	stwu	r0,4(r6)
+	bdnz	30b
+
+64:	andi.	r0,r5,2
+	beq+	65f
+40:	lhz	r0,4(r4)
+	addi	r4,r4,2
+41:	sth	r0,4(r6)
+	adde	r12,r12,r0
+	addi	r6,r6,2
+65:	andi.	r0,r5,1
+	beq+	66f
+50:	lbz	r0,4(r4)
+51:	stb	r0,4(r6)
+	slwi	r0,r0,8
+	adde	r12,r12,r0
+66:	addze	r3,r12
+	beqlr+	cr7
+	rlwinm	r3,r3,8,0,31	/* odd destination address: rotate one byte */
+	blr
+
+fault:
+	li	r3,0
+	blr
+
+	EX_TABLE(70b, fault);
+	EX_TABLE(71b, fault);
+	EX_TABLE(72b, fault);
+	EX_TABLE(73b, fault);
+	EX_TABLE(54b, fault);
+
+/*
+ * this stuff handles faults in the cacheline loop and branches to either
+ * fault (if in read part) or fault (if in write part)
+ */
+	CSUM_COPY_16_BYTES_EXCODE(0)
+#if L1_CACHE_BYTES >= 32
+	CSUM_COPY_16_BYTES_EXCODE(1)
+#if L1_CACHE_BYTES >= 64
+	CSUM_COPY_16_BYTES_EXCODE(2)
+	CSUM_COPY_16_BYTES_EXCODE(3)
+#if L1_CACHE_BYTES >= 128
+	CSUM_COPY_16_BYTES_EXCODE(4)
+	CSUM_COPY_16_BYTES_EXCODE(5)
+	CSUM_COPY_16_BYTES_EXCODE(6)
+	CSUM_COPY_16_BYTES_EXCODE(7)
+#endif
+#endif
+#endif
+
+	EX_TABLE(30b, fault);
+	EX_TABLE(31b, fault);
+	EX_TABLE(40b, fault);
+	EX_TABLE(41b, fault);
+	EX_TABLE(50b, fault);
+	EX_TABLE(51b, fault);
+
+EXPORT_SYMBOL(csum_partial_copy_generic)
+
+/*
+ * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ *			   const struct in6_addr *daddr,
+ *			   __u32 len, __u8 proto, __wsum sum)
+ */
+
+_GLOBAL(csum_ipv6_magic)
+	lwz	r8, 0(r3)
+	lwz	r9, 4(r3)
+	addc	r0, r7, r8
+	lwz	r10, 8(r3)
+	adde	r0, r0, r9
+	lwz	r11, 12(r3)
+	adde	r0, r0, r10
+	lwz	r8, 0(r4)
+	adde	r0, r0, r11
+	lwz	r9, 4(r4)
+	adde	r0, r0, r8
+	lwz	r10, 8(r4)
+	adde	r0, r0, r9
+	lwz	r11, 12(r4)
+	adde	r0, r0, r10
+	add	r5, r5, r6	/* assumption: len + proto doesn't carry */
+	adde	r0, r0, r11
+	adde	r0, r0, r5
+	addze	r0, r0
+	rotlwi	r3, r0, 16
+	add	r3, r0, r3
+	not	r3, r3
+	rlwinm	r3, r3, 16, 16, 31
+	blr
+EXPORT_SYMBOL(csum_ipv6_magic)
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
new file mode 100644
index 0000000000..d53d8f09a2
--- /dev/null
+++ b/arch/powerpc/lib/checksum_64.S
@@ -0,0 +1,443 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains assembly-language implementations
+ * of IP-style 1's complement checksum routines.
+ *	
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
+ */
+
+#include <linux/export.h>
+#include <linux/sys.h>
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+/*
+ * Computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit).
+ *
+ * __csum_partial(r3=buff, r4=len, r5=sum)
+ */
+_GLOBAL(__csum_partial)
+	addic	r0,r5,0			/* clear carry */
+
+	srdi.	r6,r4,3			/* less than 8 bytes? */
+	beq	.Lcsum_tail_word
+
+	/*
+	 * If only halfword aligned, align to a double word. Since odd
+	 * aligned addresses should be rare and they would require more
+	 * work to calculate the correct checksum, we ignore that case
+	 * and take the potential slowdown of unaligned loads.
+	 */
+	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 >> 1) & 0x3 */
+	beq	.Lcsum_aligned
+
+	li	r7,4
+	sub	r6,r7,r6
+	mtctr	r6
+
+1:
+	lhz	r6,0(r3)		/* align to doubleword */
+	subi	r4,r4,2
+	addi	r3,r3,2
+	adde	r0,r0,r6
+	bdnz	1b
+
+.Lcsum_aligned:
+	/*
+	 * We unroll the loop such that each iteration is 64 bytes with an
+	 * entry and exit limb of 64 bytes, meaning a minimum size of
+	 * 128 bytes.
+	 */
+	srdi.	r6,r4,7
+	beq	.Lcsum_tail_doublewords		/* len < 128 */
+
+	srdi	r6,r4,6
+	subi	r6,r6,1
+	mtctr	r6
+
+	stdu	r1,-STACKFRAMESIZE(r1)
+	std	r14,STK_REG(R14)(r1)
+	std	r15,STK_REG(R15)(r1)
+	std	r16,STK_REG(R16)(r1)
+
+	ld	r6,0(r3)
+	ld	r9,8(r3)
+
+	ld	r10,16(r3)
+	ld	r11,24(r3)
+
+	/*
+	 * On POWER6 and POWER7 back to back adde instructions take 2 cycles
+	 * because of the XER dependency. This means the fastest this loop can
+	 * go is 16 cycles per iteration. The scheduling of the loop below has
+	 * been shown to hit this on both POWER6 and POWER7.
+	 */
+	.align 5
+2:
+	adde	r0,r0,r6
+	ld	r12,32(r3)
+	ld	r14,40(r3)
+
+	adde	r0,r0,r9
+	ld	r15,48(r3)
+	ld	r16,56(r3)
+	addi	r3,r3,64
+
+	adde	r0,r0,r10
+
+	adde	r0,r0,r11
+
+	adde	r0,r0,r12
+
+	adde	r0,r0,r14
+
+	adde	r0,r0,r15
+	ld	r6,0(r3)
+	ld	r9,8(r3)
+
+	adde	r0,r0,r16
+	ld	r10,16(r3)
+	ld	r11,24(r3)
+	bdnz	2b
+
+
+	adde	r0,r0,r6
+	ld	r12,32(r3)
+	ld	r14,40(r3)
+
+	adde	r0,r0,r9
+	ld	r15,48(r3)
+	ld	r16,56(r3)
+	addi	r3,r3,64
+
+	adde	r0,r0,r10
+	adde	r0,r0,r11
+	adde	r0,r0,r12
+	adde	r0,r0,r14
+	adde	r0,r0,r15
+	adde	r0,r0,r16
+
+	ld	r14,STK_REG(R14)(r1)
+	ld	r15,STK_REG(R15)(r1)
+	ld	r16,STK_REG(R16)(r1)
+	addi	r1,r1,STACKFRAMESIZE
+
+	andi.	r4,r4,63
+
+.Lcsum_tail_doublewords:		/* Up to 127 bytes to go */
+	srdi.	r6,r4,3
+	beq	.Lcsum_tail_word
+
+	mtctr	r6
+3:
+	ld	r6,0(r3)
+	addi	r3,r3,8
+	adde	r0,r0,r6
+	bdnz	3b
+
+	andi.	r4,r4,7
+
+.Lcsum_tail_word:			/* Up to 7 bytes to go */
+	srdi.	r6,r4,2
+	beq	.Lcsum_tail_halfword
+
+	lwz	r6,0(r3)
+	addi	r3,r3,4
+	adde	r0,r0,r6
+	subi	r4,r4,4
+
+.Lcsum_tail_halfword:			/* Up to 3 bytes to go */
+	srdi.	r6,r4,1
+	beq	.Lcsum_tail_byte
+
+	lhz	r6,0(r3)
+	addi	r3,r3,2
+	adde	r0,r0,r6
+	subi	r4,r4,2
+
+.Lcsum_tail_byte:			/* Up to 1 byte to go */
+	andi.	r6,r4,1
+	beq	.Lcsum_finish
+
+	lbz	r6,0(r3)
+#ifdef __BIG_ENDIAN__
+	sldi	r9,r6,8			/* Pad the byte out to 16 bits */
+	adde	r0,r0,r9
+#else
+	adde	r0,r0,r6
+#endif
+
+.Lcsum_finish:
+	addze	r0,r0			/* add in final carry */
+	rldicl	r4,r0,32,0		/* fold two 32 bit halves together */
+	add	r3,r4,r0
+	srdi	r3,r3,32
+	blr
+EXPORT_SYMBOL(__csum_partial)
+
+
+	.macro srcnr
+100:
+	EX_TABLE(100b,.Lerror_nr)
+	.endm
+
+	.macro source
+150:
+	EX_TABLE(150b,.Lerror)
+	.endm
+
+	.macro dstnr
+200:
+	EX_TABLE(200b,.Lerror_nr)
+	.endm
+
+	.macro dest
+250:
+	EX_TABLE(250b,.Lerror)
+	.endm
+
+/*
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in 0xffffffff (32-bit), while copying the block to dst.
+ * If an access exception occurs, it returns 0.
+ *
+ * csum_partial_copy_generic(r3=src, r4=dst, r5=len)
+ */
+_GLOBAL(csum_partial_copy_generic)
+	li	r6,-1
+	addic	r0,r6,0			/* clear carry */
+
+	srdi.	r6,r5,3			/* less than 8 bytes? */
+	beq	.Lcopy_tail_word
+
+	/*
+	 * If only halfword aligned, align to a double word. Since odd
+	 * aligned addresses should be rare and they would require more
+	 * work to calculate the correct checksum, we ignore that case
+	 * and take the potential slowdown of unaligned loads.
+	 *
+	 * If the source and destination are relatively unaligned we only
+	 * align the source. This keeps things simple.
+	 */
+	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 >> 1) & 0x3 */
+	beq	.Lcopy_aligned
+
+	li	r9,4
+	sub	r6,r9,r6
+	mtctr	r6
+
+1:
+srcnr;	lhz	r6,0(r3)		/* align to doubleword */
+	subi	r5,r5,2
+	addi	r3,r3,2
+	adde	r0,r0,r6
+dstnr;	sth	r6,0(r4)
+	addi	r4,r4,2
+	bdnz	1b
+
+.Lcopy_aligned:
+	/*
+	 * We unroll the loop such that each iteration is 64 bytes with an
+	 * entry and exit limb of 64 bytes, meaning a minimum size of
+	 * 128 bytes.
+	 */
+	srdi.	r6,r5,7
+	beq	.Lcopy_tail_doublewords		/* len < 128 */
+
+	srdi	r6,r5,6
+	subi	r6,r6,1
+	mtctr	r6
+
+	stdu	r1,-STACKFRAMESIZE(r1)
+	std	r14,STK_REG(R14)(r1)
+	std	r15,STK_REG(R15)(r1)
+	std	r16,STK_REG(R16)(r1)
+
+source;	ld	r6,0(r3)
+source;	ld	r9,8(r3)
+
+source;	ld	r10,16(r3)
+source;	ld	r11,24(r3)
+
+	/*
+	 * On POWER6 and POWER7 back to back adde instructions take 2 cycles
+	 * because of the XER dependency. This means the fastest this loop can
+	 * go is 16 cycles per iteration. The scheduling of the loop below has
+	 * been shown to hit this on both POWER6 and POWER7.
+	 */
+	.align 5
+2:
+	adde	r0,r0,r6
+source;	ld	r12,32(r3)
+source;	ld	r14,40(r3)
+
+	adde	r0,r0,r9
+source;	ld	r15,48(r3)
+source;	ld	r16,56(r3)
+	addi	r3,r3,64
+
+	adde	r0,r0,r10
+dest;	std	r6,0(r4)
+dest;	std	r9,8(r4)
+
+	adde	r0,r0,r11
+dest;	std	r10,16(r4)
+dest;	std	r11,24(r4)
+
+	adde	r0,r0,r12
+dest;	std	r12,32(r4)
+dest;	std	r14,40(r4)
+
+	adde	r0,r0,r14
+dest;	std	r15,48(r4)
+dest;	std	r16,56(r4)
+	addi	r4,r4,64
+
+	adde	r0,r0,r15
+source;	ld	r6,0(r3)
+source;	ld	r9,8(r3)
+
+	adde	r0,r0,r16
+source;	ld	r10,16(r3)
+source;	ld	r11,24(r3)
+	bdnz	2b
+
+
+	adde	r0,r0,r6
+source;	ld	r12,32(r3)
+source;	ld	r14,40(r3)
+
+	adde	r0,r0,r9
+source;	ld	r15,48(r3)
+source;	ld	r16,56(r3)
+	addi	r3,r3,64
+
+	adde	r0,r0,r10
+dest;	std	r6,0(r4)
+dest;	std	r9,8(r4)
+
+	adde	r0,r0,r11
+dest;	std	r10,16(r4)
+dest;	std	r11,24(r4)
+
+	adde	r0,r0,r12
+dest;	std	r12,32(r4)
+dest;	std	r14,40(r4)
+
+	adde	r0,r0,r14
+dest;	std	r15,48(r4)
+dest;	std	r16,56(r4)
+	addi	r4,r4,64
+
+	adde	r0,r0,r15
+	adde	r0,r0,r16
+
+	ld	r14,STK_REG(R14)(r1)
+	ld	r15,STK_REG(R15)(r1)
+	ld	r16,STK_REG(R16)(r1)
+	addi	r1,r1,STACKFRAMESIZE
+
+	andi.	r5,r5,63
+
+.Lcopy_tail_doublewords:		/* Up to 127 bytes to go */
+	srdi.	r6,r5,3
+	beq	.Lcopy_tail_word
+
+	mtctr	r6
+3:
+srcnr;	ld	r6,0(r3)
+	addi	r3,r3,8
+	adde	r0,r0,r6
+dstnr;	std	r6,0(r4)
+	addi	r4,r4,8
+	bdnz	3b
+
+	andi.	r5,r5,7
+
+.Lcopy_tail_word:			/* Up to 7 bytes to go */
+	srdi.	r6,r5,2
+	beq	.Lcopy_tail_halfword
+
+srcnr;	lwz	r6,0(r3)
+	addi	r3,r3,4
+	adde	r0,r0,r6
+dstnr;	stw	r6,0(r4)
+	addi	r4,r4,4
+	subi	r5,r5,4
+
+.Lcopy_tail_halfword:			/* Up to 3 bytes to go */
+	srdi.	r6,r5,1
+	beq	.Lcopy_tail_byte
+
+srcnr;	lhz	r6,0(r3)
+	addi	r3,r3,2
+	adde	r0,r0,r6
+dstnr;	sth	r6,0(r4)
+	addi	r4,r4,2
+	subi	r5,r5,2
+
+.Lcopy_tail_byte:			/* Up to 1 byte to go */
+	andi.	r6,r5,1
+	beq	.Lcopy_finish
+
+srcnr;	lbz	r6,0(r3)
+#ifdef __BIG_ENDIAN__
+	sldi	r9,r6,8			/* Pad the byte out to 16 bits */
+	adde	r0,r0,r9
+#else
+	adde	r0,r0,r6
+#endif
+dstnr;	stb	r6,0(r4)
+
+.Lcopy_finish:
+	addze	r0,r0			/* add in final carry */
+	rldicl	r4,r0,32,0		/* fold two 32 bit halves together */
+	add	r3,r4,r0
+	srdi	r3,r3,32
+	blr
+
+.Lerror:
+	ld	r14,STK_REG(R14)(r1)
+	ld	r15,STK_REG(R15)(r1)
+	ld	r16,STK_REG(R16)(r1)
+	addi	r1,r1,STACKFRAMESIZE
+.Lerror_nr:
+	li	r3,0
+	blr
+
+EXPORT_SYMBOL(csum_partial_copy_generic)
+
+/*
+ * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ *			   const struct in6_addr *daddr,
+ *			   __u32 len, __u8 proto, __wsum sum)
+ */
+
+_GLOBAL(csum_ipv6_magic)
+	ld	r8, 0(r3)
+	ld	r9, 8(r3)
+	add	r5, r5, r6
+	addc	r0, r8, r9
+	ld	r10, 0(r4)
+	ld	r11, 8(r4)
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+	rotldi	r5, r5, 8
+#endif
+	adde	r0, r0, r10
+	add	r5, r5, r7
+	adde	r0, r0, r11
+	adde	r0, r0, r5
+	addze	r0, r0
+	rotldi  r3, r0, 32		/* fold two 32 bit halves together */
+	add	r3, r0, r3
+	srdi	r0, r3, 32
+	rotlwi	r3, r0, 16		/* fold two 16 bit halves together */
+	add	r3, r0, r3
+	not	r3, r3
+	rlwinm	r3, r3, 16, 16, 31
+	blr
+EXPORT_SYMBOL(csum_ipv6_magic)
diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c
new file mode 100644
index 0000000000..1a14c87802
--- /dev/null
+++ b/arch/powerpc/lib/checksum_wrappers.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *
+ * Copyright (C) IBM Corporation, 2010
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <linux/export.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/checksum.h>
+#include <linux/uaccess.h>
+
+__wsum csum_and_copy_from_user(const void __user *src, void *dst,
+			       int len)
+{
+	__wsum csum;
+
+	if (unlikely(!user_read_access_begin(src, len)))
+		return 0;
+
+	csum = csum_partial_copy_generic((void __force *)src, dst, len);
+
+	user_read_access_end();
+	return csum;
+}
+
+__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len)
+{
+	__wsum csum;
+
+	if (unlikely(!user_write_access_begin(dst, len)))
+		return 0;
+
+	csum = csum_partial_copy_generic(src, (void __force *)dst, len);
+
+	user_write_access_end();
+	return csum;
+}
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
new file mode 100644
index 0000000000..b00112d7ad
--- /dev/null
+++ b/arch/powerpc/lib/code-patching.c
@@ -0,0 +1,500 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Copyright 2008 Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/kprobes.h>
+#include <linux/mmu_context.h>
+#include <linux/random.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/cpuhotplug.h>
+#include <linux/uaccess.h>
+#include <linux/jump_label.h>
+
+#include <asm/debug.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/page.h>
+#include <asm/code-patching.h>
+#include <asm/inst.h>
+
+static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr)
+{
+	if (!ppc_inst_prefixed(instr)) {
+		u32 val = ppc_inst_val(instr);
+
+		__put_kernel_nofault(patch_addr, &val, u32, failed);
+	} else {
+		u64 val = ppc_inst_as_ulong(instr);
+
+		__put_kernel_nofault(patch_addr, &val, u64, failed);
+	}
+
+	asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
+							    "r" (exec_addr));
+
+	return 0;
+
+failed:
+	return -EPERM;
+}
+
+int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+	return __patch_instruction(addr, instr, addr);
+}
+
+struct patch_context {
+	union {
+		struct vm_struct *area;
+		struct mm_struct *mm;
+	};
+	unsigned long addr;
+	pte_t *pte;
+};
+
+static DEFINE_PER_CPU(struct patch_context, cpu_patching_context);
+
+static int map_patch_area(void *addr, unsigned long text_poke_addr);
+static void unmap_patch_area(unsigned long addr);
+
+static bool mm_patch_enabled(void)
+{
+	return IS_ENABLED(CONFIG_SMP) && radix_enabled();
+}
+
+/*
+ * The following applies for Radix MMU. Hash MMU has different requirements,
+ * and so is not supported.
+ *
+ * Changing mm requires context synchronising instructions on both sides of
+ * the context switch, as well as a hwsync between the last instruction for
+ * which the address of an associated storage access was translated using
+ * the current context.
+ *
+ * switch_mm_irqs_off() performs an isync after the context switch. It is
+ * the responsibility of the caller to perform the CSI and hwsync before
+ * starting/stopping the temp mm.
+ */
+static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm)
+{
+	struct mm_struct *orig_mm = current->active_mm;
+
+	lockdep_assert_irqs_disabled();
+	switch_mm_irqs_off(orig_mm, temp_mm, current);
+
+	WARN_ON(!mm_is_thread_local(temp_mm));
+
+	suspend_breakpoints();
+	return orig_mm;
+}
+
+static void stop_using_temp_mm(struct mm_struct *temp_mm,
+			       struct mm_struct *orig_mm)
+{
+	lockdep_assert_irqs_disabled();
+	switch_mm_irqs_off(temp_mm, orig_mm, current);
+	restore_breakpoints();
+}
+
+static int text_area_cpu_up(unsigned int cpu)
+{
+	struct vm_struct *area;
+	unsigned long addr;
+	int err;
+
+	area = get_vm_area(PAGE_SIZE, VM_ALLOC);
+	if (!area) {
+		WARN_ONCE(1, "Failed to create text area for cpu %d\n",
+			cpu);
+		return -1;
+	}
+
+	// Map/unmap the area to ensure all page tables are pre-allocated
+	addr = (unsigned long)area->addr;
+	err = map_patch_area(empty_zero_page, addr);
+	if (err)
+		return err;
+
+	unmap_patch_area(addr);
+
+	this_cpu_write(cpu_patching_context.area, area);
+	this_cpu_write(cpu_patching_context.addr, addr);
+	this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr));
+
+	return 0;
+}
+
+static int text_area_cpu_down(unsigned int cpu)
+{
+	free_vm_area(this_cpu_read(cpu_patching_context.area));
+	this_cpu_write(cpu_patching_context.area, NULL);
+	this_cpu_write(cpu_patching_context.addr, 0);
+	this_cpu_write(cpu_patching_context.pte, NULL);
+	return 0;
+}
+
+static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr)
+{
+	struct mmu_gather tlb;
+
+	tlb_gather_mmu(&tlb, mm);
+	free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0);
+	mmput(mm);
+}
+
+static int text_area_cpu_up_mm(unsigned int cpu)
+{
+	struct mm_struct *mm;
+	unsigned long addr;
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	mm = mm_alloc();
+	if (WARN_ON(!mm))
+		goto fail_no_mm;
+
+	/*
+	 * Choose a random page-aligned address from the interval
+	 * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE].
+	 * The lower address bound is PAGE_SIZE to avoid the zero-page.
+	 */
+	addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT;
+
+	/*
+	 * PTE allocation uses GFP_KERNEL which means we need to
+	 * pre-allocate the PTE here because we cannot do the
+	 * allocation during patching when IRQs are disabled.
+	 *
+	 * Using get_locked_pte() to avoid open coding, the lock
+	 * is unnecessary.
+	 */
+	pte = get_locked_pte(mm, addr, &ptl);
+	if (!pte)
+		goto fail_no_pte;
+	pte_unmap_unlock(pte, ptl);
+
+	this_cpu_write(cpu_patching_context.mm, mm);
+	this_cpu_write(cpu_patching_context.addr, addr);
+
+	return 0;
+
+fail_no_pte:
+	put_patching_mm(mm, addr);
+fail_no_mm:
+	return -ENOMEM;
+}
+
+static int text_area_cpu_down_mm(unsigned int cpu)
+{
+	put_patching_mm(this_cpu_read(cpu_patching_context.mm),
+			this_cpu_read(cpu_patching_context.addr));
+
+	this_cpu_write(cpu_patching_context.mm, NULL);
+	this_cpu_write(cpu_patching_context.addr, 0);
+
+	return 0;
+}
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done);
+
+void __init poking_init(void)
+{
+	int ret;
+
+	if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+		return;
+
+	if (mm_patch_enabled())
+		ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+					"powerpc/text_poke_mm:online",
+					text_area_cpu_up_mm,
+					text_area_cpu_down_mm);
+	else
+		ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+					"powerpc/text_poke:online",
+					text_area_cpu_up,
+					text_area_cpu_down);
+
+	/* cpuhp_setup_state returns >= 0 on success */
+	if (WARN_ON(ret < 0))
+		return;
+
+	static_branch_enable(&poking_init_done);
+}
+
+static unsigned long get_patch_pfn(void *addr)
+{
+	if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr))
+		return vmalloc_to_pfn(addr);
+	else
+		return __pa_symbol(addr) >> PAGE_SHIFT;
+}
+
+/*
+ * This can be called for kernel text or a module.
+ */
+static int map_patch_area(void *addr, unsigned long text_poke_addr)
+{
+	unsigned long pfn = get_patch_pfn(addr);
+
+	return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL);
+}
+
+static void unmap_patch_area(unsigned long addr)
+{
+	pte_t *ptep;
+	pmd_t *pmdp;
+	pud_t *pudp;
+	p4d_t *p4dp;
+	pgd_t *pgdp;
+
+	pgdp = pgd_offset_k(addr);
+	if (WARN_ON(pgd_none(*pgdp)))
+		return;
+
+	p4dp = p4d_offset(pgdp, addr);
+	if (WARN_ON(p4d_none(*p4dp)))
+		return;
+
+	pudp = pud_offset(p4dp, addr);
+	if (WARN_ON(pud_none(*pudp)))
+		return;
+
+	pmdp = pmd_offset(pudp, addr);
+	if (WARN_ON(pmd_none(*pmdp)))
+		return;
+
+	ptep = pte_offset_kernel(pmdp, addr);
+	if (WARN_ON(pte_none(*ptep)))
+		return;
+
+	/*
+	 * In hash, pte_clear flushes the tlb, in radix, we have to
+	 */
+	pte_clear(&init_mm, addr, ptep);
+	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+}
+
+static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
+{
+	int err;
+	u32 *patch_addr;
+	unsigned long text_poke_addr;
+	pte_t *pte;
+	unsigned long pfn = get_patch_pfn(addr);
+	struct mm_struct *patching_mm;
+	struct mm_struct *orig_mm;
+	spinlock_t *ptl;
+
+	patching_mm = __this_cpu_read(cpu_patching_context.mm);
+	text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
+	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+
+	pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
+	if (!pte)
+		return -ENOMEM;
+
+	__set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+
+	/* order PTE update before use, also serves as the hwsync */
+	asm volatile("ptesync": : :"memory");
+
+	/* order context switch after arbitrary prior code */
+	isync();
+
+	orig_mm = start_using_temp_mm(patching_mm);
+
+	err = __patch_instruction(addr, instr, patch_addr);
+
+	/* hwsync performed by __patch_instruction (sync) if successful */
+	if (err)
+		mb();  /* sync */
+
+	/* context synchronisation performed by __patch_instruction (isync or exception) */
+	stop_using_temp_mm(patching_mm, orig_mm);
+
+	pte_clear(patching_mm, text_poke_addr, pte);
+	/*
+	 * ptesync to order PTE update before TLB invalidation done
+	 * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
+	 */
+	local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
+
+	pte_unmap_unlock(pte, ptl);
+
+	return err;
+}
+
+static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+	int err;
+	u32 *patch_addr;
+	unsigned long text_poke_addr;
+	pte_t *pte;
+	unsigned long pfn = get_patch_pfn(addr);
+
+	text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
+	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+
+	pte = __this_cpu_read(cpu_patching_context.pte);
+	__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+	/* See ptesync comment in radix__set_pte_at() */
+	if (radix_enabled())
+		asm volatile("ptesync": : :"memory");
+
+	err = __patch_instruction(addr, instr, patch_addr);
+
+	pte_clear(&init_mm, text_poke_addr, pte);
+	flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
+
+	return err;
+}
+
+int patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+	int err;
+	unsigned long flags;
+
+	/*
+	 * During early early boot patch_instruction is called
+	 * when text_poke_area is not ready, but we still need
+	 * to allow patching. We just do the plain old patching
+	 */
+	if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) ||
+	    !static_branch_likely(&poking_init_done))
+		return raw_patch_instruction(addr, instr);
+
+	local_irq_save(flags);
+	if (mm_patch_enabled())
+		err = __do_patch_instruction_mm(addr, instr);
+	else
+		err = __do_patch_instruction(addr, instr);
+	local_irq_restore(flags);
+
+	return err;
+}
+NOKPROBE_SYMBOL(patch_instruction);
+
+int patch_branch(u32 *addr, unsigned long target, int flags)
+{
+	ppc_inst_t instr;
+
+	if (create_branch(&instr, addr, target, flags))
+		return -ERANGE;
+
+	return patch_instruction(addr, instr);
+}
+
+/*
+ * Helper to check if a given instruction is a conditional branch
+ * Derived from the conditional checks in analyse_instr()
+ */
+bool is_conditional_branch(ppc_inst_t instr)
+{
+	unsigned int opcode = ppc_inst_primary_opcode(instr);
+
+	if (opcode == 16)       /* bc, bca, bcl, bcla */
+		return true;
+	if (opcode == 19) {
+		switch ((ppc_inst_val(instr) >> 1) & 0x3ff) {
+		case 16:        /* bclr, bclrl */
+		case 528:       /* bcctr, bcctrl */
+		case 560:       /* bctar, bctarl */
+			return true;
+		}
+	}
+	return false;
+}
+NOKPROBE_SYMBOL(is_conditional_branch);
+
+int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
+		       unsigned long target, int flags)
+{
+	long offset;
+
+	offset = target;
+	if (! (flags & BRANCH_ABSOLUTE))
+		offset = offset - (unsigned long)addr;
+
+	/* Check we can represent the target in the instruction format */
+	if (!is_offset_in_cond_branch_range(offset))
+		return 1;
+
+	/* Mask out the flags and target, so they don't step on each other. */
+	*instr = ppc_inst(0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC));
+
+	return 0;
+}
+
+int instr_is_relative_branch(ppc_inst_t instr)
+{
+	if (ppc_inst_val(instr) & BRANCH_ABSOLUTE)
+		return 0;
+
+	return instr_is_branch_iform(instr) || instr_is_branch_bform(instr);
+}
+
+int instr_is_relative_link_branch(ppc_inst_t instr)
+{
+	return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK);
+}
+
+static unsigned long branch_iform_target(const u32 *instr)
+{
+	signed long imm;
+
+	imm = ppc_inst_val(ppc_inst_read(instr)) & 0x3FFFFFC;
+
+	/* If the top bit of the immediate value is set this is negative */
+	if (imm & 0x2000000)
+		imm -= 0x4000000;
+
+	if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
+		imm += (unsigned long)instr;
+
+	return (unsigned long)imm;
+}
+
+static unsigned long branch_bform_target(const u32 *instr)
+{
+	signed long imm;
+
+	imm = ppc_inst_val(ppc_inst_read(instr)) & 0xFFFC;
+
+	/* If the top bit of the immediate value is set this is negative */
+	if (imm & 0x8000)
+		imm -= 0x10000;
+
+	if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
+		imm += (unsigned long)instr;
+
+	return (unsigned long)imm;
+}
+
+unsigned long branch_target(const u32 *instr)
+{
+	if (instr_is_branch_iform(ppc_inst_read(instr)))
+		return branch_iform_target(instr);
+	else if (instr_is_branch_bform(ppc_inst_read(instr)))
+		return branch_bform_target(instr);
+
+	return 0;
+}
+
+int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src)
+{
+	unsigned long target;
+	target = branch_target(src);
+
+	if (instr_is_branch_iform(ppc_inst_read(src)))
+		return create_branch(instr, dest, target,
+				     ppc_inst_val(ppc_inst_read(src)));
+	else if (instr_is_branch_bform(ppc_inst_read(src)))
+		return create_cond_branch(instr, dest, target,
+					  ppc_inst_val(ppc_inst_read(src)));
+
+	return 1;
+}
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
new file mode 100644
index 0000000000..933b685e7a
--- /dev/null
+++ b/arch/powerpc/lib/copy_32.S
@@ -0,0 +1,515 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Memory copy functions for 32-bit PowerPC.
+ *
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ */
+#include <linux/export.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+#include <asm/code-patching-asm.h>
+#include <asm/kasan.h>
+
+#define COPY_16_BYTES		\
+	lwz	r7,4(r4);	\
+	lwz	r8,8(r4);	\
+	lwz	r9,12(r4);	\
+	lwzu	r10,16(r4);	\
+	stw	r7,4(r6);	\
+	stw	r8,8(r6);	\
+	stw	r9,12(r6);	\
+	stwu	r10,16(r6)
+
+#define COPY_16_BYTES_WITHEX(n)	\
+8 ## n ## 0:			\
+	lwz	r7,4(r4);	\
+8 ## n ## 1:			\
+	lwz	r8,8(r4);	\
+8 ## n ## 2:			\
+	lwz	r9,12(r4);	\
+8 ## n ## 3:			\
+	lwzu	r10,16(r4);	\
+8 ## n ## 4:			\
+	stw	r7,4(r6);	\
+8 ## n ## 5:			\
+	stw	r8,8(r6);	\
+8 ## n ## 6:			\
+	stw	r9,12(r6);	\
+8 ## n ## 7:			\
+	stwu	r10,16(r6)
+
+#define COPY_16_BYTES_EXCODE(n)			\
+9 ## n ## 0:					\
+	addi	r5,r5,-(16 * n);		\
+	b	104f;				\
+9 ## n ## 1:					\
+	addi	r5,r5,-(16 * n);		\
+	b	105f;				\
+	EX_TABLE(8 ## n ## 0b,9 ## n ## 0b);	\
+	EX_TABLE(8 ## n ## 1b,9 ## n ## 0b);	\
+	EX_TABLE(8 ## n ## 2b,9 ## n ## 0b);	\
+	EX_TABLE(8 ## n ## 3b,9 ## n ## 0b);	\
+	EX_TABLE(8 ## n ## 4b,9 ## n ## 1b);	\
+	EX_TABLE(8 ## n ## 5b,9 ## n ## 1b);	\
+	EX_TABLE(8 ## n ## 6b,9 ## n ## 1b);	\
+	EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)
+
+	.text
+
+CACHELINE_BYTES = L1_CACHE_BYTES
+LG_CACHELINE_BYTES = L1_CACHE_SHIFT
+CACHELINE_MASK = (L1_CACHE_BYTES-1)
+
+#ifndef CONFIG_KASAN
+_GLOBAL(memset16)
+	rlwinm.	r0 ,r5, 31, 1, 31
+	addi	r6, r3, -4
+	beq-	2f
+	rlwimi	r4 ,r4 ,16 ,0 ,15
+	mtctr	r0
+1:	stwu	r4, 4(r6)
+	bdnz	1b
+2:	andi.	r0, r5, 1
+	beqlr
+	sth	r4, 4(r6)
+	blr
+EXPORT_SYMBOL(memset16)
+#endif
+
+/*
+ * Use dcbz on the complete cache lines in the destination
+ * to set them to zero.  This requires that the destination
+ * area is cacheable.  -- paulus
+ *
+ * During early init, cache might not be active yet, so dcbz cannot be used.
+ * We therefore skip the optimised bloc that uses dcbz. This jump is
+ * replaced by a nop once cache is active. This is done in machine_init()
+ */
+_GLOBAL_KASAN(memset)
+	cmplwi	0,r5,4
+	blt	7f
+
+	rlwimi	r4,r4,8,16,23
+	rlwimi	r4,r4,16,0,15
+
+	stw	r4,0(r3)
+	beqlr
+	andi.	r0,r3,3
+	add	r5,r0,r5
+	subf	r6,r0,r3
+	cmplwi	0,r4,0
+	/*
+	 * Skip optimised bloc until cache is enabled. Will be replaced
+	 * by 'bne' during boot to use normal procedure if r4 is not zero
+	 */
+5:	b	2f
+	patch_site	5b, patch__memset_nocache
+
+	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
+	add	r8,r7,r5
+	srwi	r9,r8,LG_CACHELINE_BYTES
+	addic.	r9,r9,-1	/* total number of complete cachelines */
+	ble	2f
+	xori	r0,r7,CACHELINE_MASK & ~3
+	srwi.	r0,r0,2
+	beq	3f
+	mtctr	r0
+4:	stwu	r4,4(r6)
+	bdnz	4b
+3:	mtctr	r9
+	li	r7,4
+10:	dcbz	r7,r6
+	addi	r6,r6,CACHELINE_BYTES
+	bdnz	10b
+	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
+	addi	r5,r5,4
+
+2:	srwi	r0,r5,2
+	mtctr	r0
+	bdz	6f
+1:	stwu	r4,4(r6)
+	bdnz	1b
+6:	andi.	r5,r5,3
+	beqlr
+	mtctr	r5
+	addi	r6,r6,3
+8:	stbu	r4,1(r6)
+	bdnz	8b
+	blr
+
+7:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r3,-1
+9:	stbu	r4,1(r6)
+	bdnz	9b
+	blr
+EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL_KASAN(memset)
+
+/*
+ * This version uses dcbz on the complete cache lines in the
+ * destination area to reduce memory traffic.  This requires that
+ * the destination area is cacheable.
+ * We only use this version if the source and dest don't overlap.
+ * -- paulus.
+ *
+ * During early init, cache might not be active yet, so dcbz cannot be used.
+ * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
+ * replaced by a nop once cache is active. This is done in machine_init()
+ */
+_GLOBAL_KASAN(memmove)
+	cmplw	0,r3,r4
+	bgt	backwards_memcpy
+	/* fall through */
+
+_GLOBAL_KASAN(memcpy)
+1:	b	generic_memcpy
+	patch_site	1b, patch__memcpy_nocache
+
+	add	r7,r3,r5		/* test if the src & dst overlap */
+	add	r8,r4,r5
+	cmplw	0,r4,r7
+	cmplw	1,r3,r8
+	crand	0,0,4			/* cr0.lt &= cr1.lt */
+	blt	generic_memcpy		/* if regions overlap */
+
+	addi	r4,r4,-4
+	addi	r6,r3,-4
+	neg	r0,r3
+	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
+	beq	58f
+
+	cmplw	0,r5,r0			/* is this more than total to do? */
+	blt	63f			/* if not much to do */
+	andi.	r8,r0,3			/* get it word-aligned first */
+	subf	r5,r0,r5
+	mtctr	r8
+	beq+	61f
+70:	lbz	r9,4(r4)		/* do some bytes */
+	addi	r4,r4,1
+	addi	r6,r6,1
+	stb	r9,3(r6)
+	bdnz	70b
+61:	srwi.	r0,r0,2
+	mtctr	r0
+	beq	58f
+72:	lwzu	r9,4(r4)		/* do some words */
+	stwu	r9,4(r6)
+	bdnz	72b
+
+58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
+	li	r11,4
+	mtctr	r0
+	beq	63f
+53:
+	dcbz	r11,r6
+	COPY_16_BYTES
+#if L1_CACHE_BYTES >= 32
+	COPY_16_BYTES
+#if L1_CACHE_BYTES >= 64
+	COPY_16_BYTES
+	COPY_16_BYTES
+#if L1_CACHE_BYTES >= 128
+	COPY_16_BYTES
+	COPY_16_BYTES
+	COPY_16_BYTES
+	COPY_16_BYTES
+#endif
+#endif
+#endif
+	bdnz	53b
+
+63:	srwi.	r0,r5,2
+	mtctr	r0
+	beq	64f
+30:	lwzu	r0,4(r4)
+	stwu	r0,4(r6)
+	bdnz	30b
+
+64:	andi.	r0,r5,3
+	mtctr	r0
+	beq+	65f
+	addi	r4,r4,3
+	addi	r6,r6,3
+40:	lbzu	r0,1(r4)
+	stbu	r0,1(r6)
+	bdnz	40b
+65:	blr
+EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL_KASAN(memcpy)
+EXPORT_SYMBOL_KASAN(memmove)
+
+generic_memcpy:
+	srwi.	r7,r5,3
+	addi	r6,r3,-4
+	addi	r4,r4,-4
+	beq	2f			/* if less than 8 bytes to do */
+	andi.	r0,r6,3			/* get dest word aligned */
+	mtctr	r7
+	bne	5f
+1:	lwz	r7,4(r4)
+	lwzu	r8,8(r4)
+	stw	r7,4(r6)
+	stwu	r8,8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,4(r4)
+	addi	r5,r5,-4
+	stwu	r0,4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r4,r4,3
+	addi	r6,r6,3
+4:	lbzu	r0,1(r4)
+	stbu	r0,1(r6)
+	bdnz	4b
+	blr
+5:	subfic	r0,r0,4
+	mtctr	r0
+6:	lbz	r7,4(r4)
+	addi	r4,r4,1
+	stb	r7,4(r6)
+	addi	r6,r6,1
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
+
+_GLOBAL(backwards_memcpy)
+	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
+	add	r6,r3,r5
+	add	r4,r4,r5
+	beq	2f
+	andi.	r0,r6,3
+	mtctr	r7
+	bne	5f
+1:	lwz	r7,-4(r4)
+	lwzu	r8,-8(r4)
+	stw	r7,-4(r6)
+	stwu	r8,-8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,-4(r4)
+	subi	r5,r5,4
+	stwu	r0,-4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+4:	lbzu	r0,-1(r4)
+	stbu	r0,-1(r6)
+	bdnz	4b
+	blr
+5:	mtctr	r0
+6:	lbzu	r7,-1(r4)
+	stbu	r7,-1(r6)
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
+
+_GLOBAL(__copy_tofrom_user)
+	addi	r4,r4,-4
+	addi	r6,r3,-4
+	neg	r0,r3
+	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
+	beq	58f
+
+	cmplw	0,r5,r0			/* is this more than total to do? */
+	blt	63f			/* if not much to do */
+	andi.	r8,r0,3			/* get it word-aligned first */
+	mtctr	r8
+	beq+	61f
+70:	lbz	r9,4(r4)		/* do some bytes */
+71:	stb	r9,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	70b
+61:	subf	r5,r0,r5
+	srwi.	r0,r0,2
+	mtctr	r0
+	beq	58f
+72:	lwzu	r9,4(r4)		/* do some words */
+73:	stwu	r9,4(r6)
+	bdnz	72b
+
+	EX_TABLE(70b,100f)
+	EX_TABLE(71b,101f)
+	EX_TABLE(72b,102f)
+	EX_TABLE(73b,103f)
+
+58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
+	li	r11,4
+	beq	63f
+
+	/* Here we decide how far ahead to prefetch the source */
+	li	r3,4
+	cmpwi	r0,1
+	li	r7,0
+	ble	114f
+	li	r7,1
+#if MAX_COPY_PREFETCH > 1
+	/* Heuristically, for large transfers we prefetch
+	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
+	   we prefetch 1 cacheline ahead. */
+	cmpwi	r0,MAX_COPY_PREFETCH
+	ble	112f
+	li	r7,MAX_COPY_PREFETCH
+112:	mtctr	r7
+111:	dcbt	r3,r4
+	addi	r3,r3,CACHELINE_BYTES
+	bdnz	111b
+#else
+	dcbt	r3,r4
+	addi	r3,r3,CACHELINE_BYTES
+#endif /* MAX_COPY_PREFETCH > 1 */
+
+114:	subf	r8,r7,r0
+	mr	r0,r7
+	mtctr	r8
+
+53:	dcbt	r3,r4
+54:	dcbz	r11,r6
+	EX_TABLE(54b,105f)
+/* the main body of the cacheline loop */
+	COPY_16_BYTES_WITHEX(0)
+#if L1_CACHE_BYTES >= 32
+	COPY_16_BYTES_WITHEX(1)
+#if L1_CACHE_BYTES >= 64
+	COPY_16_BYTES_WITHEX(2)
+	COPY_16_BYTES_WITHEX(3)
+#if L1_CACHE_BYTES >= 128
+	COPY_16_BYTES_WITHEX(4)
+	COPY_16_BYTES_WITHEX(5)
+	COPY_16_BYTES_WITHEX(6)
+	COPY_16_BYTES_WITHEX(7)
+#endif
+#endif
+#endif
+	bdnz	53b
+	cmpwi	r0,0
+	li	r3,4
+	li	r7,0
+	bne	114b
+
+63:	srwi.	r0,r5,2
+	mtctr	r0
+	beq	64f
+30:	lwzu	r0,4(r4)
+31:	stwu	r0,4(r6)
+	bdnz	30b
+
+64:	andi.	r0,r5,3
+	mtctr	r0
+	beq+	65f
+40:	lbz	r0,4(r4)
+41:	stb	r0,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	40b
+65:	li	r3,0
+	blr
+
+/* read fault, initial single-byte copy */
+100:	li	r9,0
+	b	90f
+/* write fault, initial single-byte copy */
+101:	li	r9,1
+90:	subf	r5,r8,r5
+	li	r3,0
+	b	99f
+/* read fault, initial word copy */
+102:	li	r9,0
+	b	91f
+/* write fault, initial word copy */
+103:	li	r9,1
+91:	li	r3,2
+	b	99f
+
+/*
+ * this stuff handles faults in the cacheline loop and branches to either
+ * 104f (if in read part) or 105f (if in write part), after updating r5
+ */
+	COPY_16_BYTES_EXCODE(0)
+#if L1_CACHE_BYTES >= 32
+	COPY_16_BYTES_EXCODE(1)
+#if L1_CACHE_BYTES >= 64
+	COPY_16_BYTES_EXCODE(2)
+	COPY_16_BYTES_EXCODE(3)
+#if L1_CACHE_BYTES >= 128
+	COPY_16_BYTES_EXCODE(4)
+	COPY_16_BYTES_EXCODE(5)
+	COPY_16_BYTES_EXCODE(6)
+	COPY_16_BYTES_EXCODE(7)
+#endif
+#endif
+#endif
+
+/* read fault in cacheline loop */
+104:	li	r9,0
+	b	92f
+/* fault on dcbz (effectively a write fault) */
+/* or write fault in cacheline loop */
+105:	li	r9,1
+92:	li	r3,LG_CACHELINE_BYTES
+	mfctr	r8
+	add	r0,r0,r8
+	b	106f
+/* read fault in final word loop */
+108:	li	r9,0
+	b	93f
+/* write fault in final word loop */
+109:	li	r9,1
+93:	andi.	r5,r5,3
+	li	r3,2
+	b	99f
+/* read fault in final byte loop */
+110:	li	r9,0
+	b	94f
+/* write fault in final byte loop */
+111:	li	r9,1
+94:	li	r5,0
+	li	r3,0
+/*
+ * At this stage the number of bytes not copied is
+ * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
+ */
+99:	mfctr	r0
+106:	slw	r3,r0,r3
+	add.	r3,r3,r5
+	beq	120f			/* shouldn't happen */
+	cmpwi	0,r9,0
+	bne	120f
+/* for a read fault, first try to continue the copy one byte at a time */
+	mtctr	r3
+130:	lbz	r0,4(r4)
+131:	stb	r0,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	130b
+/* then clear out the destination: r3 bytes starting at 4(r6) */
+132:	mfctr	r3
+120:	blr
+
+	EX_TABLE(30b,108b)
+	EX_TABLE(31b,109b)
+	EX_TABLE(40b,110b)
+	EX_TABLE(41b,111b)
+	EX_TABLE(130b,132b)
+	EX_TABLE(131b,120b)
+
+EXPORT_SYMBOL(__copy_tofrom_user)
diff --git a/arch/powerpc/lib/copy_mc_64.S b/arch/powerpc/lib/copy_mc_64.S
new file mode 100644
index 0000000000..bf1014b28f
--- /dev/null
+++ b/arch/powerpc/lib/copy_mc_64.S
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) IBM Corporation, 2011
+ * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
+ * Author - Balbir Singh <bsingharora@gmail.com>
+ */
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/errno.h>
+
+	.macro err1
+100:
+	EX_TABLE(100b,.Ldo_err1)
+	.endm
+
+	.macro err2
+200:
+	EX_TABLE(200b,.Ldo_err2)
+	.endm
+
+	.macro err3
+300:	EX_TABLE(300b,.Ldone)
+	.endm
+
+.Ldo_err2:
+	ld	r22,STK_REG(R22)(r1)
+	ld	r21,STK_REG(R21)(r1)
+	ld	r20,STK_REG(R20)(r1)
+	ld	r19,STK_REG(R19)(r1)
+	ld	r18,STK_REG(R18)(r1)
+	ld	r17,STK_REG(R17)(r1)
+	ld	r16,STK_REG(R16)(r1)
+	ld	r15,STK_REG(R15)(r1)
+	ld	r14,STK_REG(R14)(r1)
+	addi	r1,r1,STACKFRAMESIZE
+.Ldo_err1:
+	/* Do a byte by byte copy to get the exact remaining size */
+	mtctr	r7
+46:
+err3;	lbz	r0,0(r4)
+	addi	r4,r4,1
+err3;	stb	r0,0(r3)
+	addi	r3,r3,1
+	bdnz	46b
+	li	r3,0
+	blr
+
+.Ldone:
+	mfctr	r3
+	blr
+
+
+_GLOBAL(copy_mc_generic)
+	mr	r7,r5
+	cmpldi	r5,16
+	blt	.Lshort_copy
+
+.Lcopy:
+	/* Get the source 8B aligned */
+	neg	r6,r4
+	mtocrf	0x01,r6
+	clrldi	r6,r6,(64-3)
+
+	bf	cr7*4+3,1f
+err1;	lbz	r0,0(r4)
+	addi	r4,r4,1
+err1;	stb	r0,0(r3)
+	addi	r3,r3,1
+	subi	r7,r7,1
+
+1:	bf	cr7*4+2,2f
+err1;	lhz	r0,0(r4)
+	addi	r4,r4,2
+err1;	sth	r0,0(r3)
+	addi	r3,r3,2
+	subi	r7,r7,2
+
+2:	bf	cr7*4+1,3f
+err1;	lwz	r0,0(r4)
+	addi	r4,r4,4
+err1;	stw	r0,0(r3)
+	addi	r3,r3,4
+	subi	r7,r7,4
+
+3:	sub	r5,r5,r6
+	cmpldi	r5,128
+
+	mflr	r0
+	stdu	r1,-STACKFRAMESIZE(r1)
+	std	r14,STK_REG(R14)(r1)
+	std	r15,STK_REG(R15)(r1)
+	std	r16,STK_REG(R16)(r1)
+	std	r17,STK_REG(R17)(r1)
+	std	r18,STK_REG(R18)(r1)
+	std	r19,STK_REG(R19)(r1)
+	std	r20,STK_REG(R20)(r1)
+	std	r21,STK_REG(R21)(r1)
+	std	r22,STK_REG(R22)(r1)
+	std	r0,STACKFRAMESIZE+16(r1)
+
+	blt	5f
+	srdi	r6,r5,7
+	mtctr	r6
+
+	/* Now do cacheline (128B) sized loads and stores. */
+	.align	5
+4:
+err2;	ld	r0,0(r4)
+err2;	ld	r6,8(r4)
+err2;	ld	r8,16(r4)
+err2;	ld	r9,24(r4)
+err2;	ld	r10,32(r4)
+err2;	ld	r11,40(r4)
+err2;	ld	r12,48(r4)
+err2;	ld	r14,56(r4)
+err2;	ld	r15,64(r4)
+err2;	ld	r16,72(r4)
+err2;	ld	r17,80(r4)
+err2;	ld	r18,88(r4)
+err2;	ld	r19,96(r4)
+err2;	ld	r20,104(r4)
+err2;	ld	r21,112(r4)
+err2;	ld	r22,120(r4)
+	addi	r4,r4,128
+err2;	std	r0,0(r3)
+err2;	std	r6,8(r3)
+err2;	std	r8,16(r3)
+err2;	std	r9,24(r3)
+err2;	std	r10,32(r3)
+err2;	std	r11,40(r3)
+err2;	std	r12,48(r3)
+err2;	std	r14,56(r3)
+err2;	std	r15,64(r3)
+err2;	std	r16,72(r3)
+err2;	std	r17,80(r3)
+err2;	std	r18,88(r3)
+err2;	std	r19,96(r3)
+err2;	std	r20,104(r3)
+err2;	std	r21,112(r3)
+err2;	std	r22,120(r3)
+	addi	r3,r3,128
+	subi	r7,r7,128
+	bdnz	4b
+
+	clrldi	r5,r5,(64-7)
+
+	/* Up to 127B to go */
+5:	srdi	r6,r5,4
+	mtocrf	0x01,r6
+
+6:	bf	cr7*4+1,7f
+err2;	ld	r0,0(r4)
+err2;	ld	r6,8(r4)
+err2;	ld	r8,16(r4)
+err2;	ld	r9,24(r4)
+err2;	ld	r10,32(r4)
+err2;	ld	r11,40(r4)
+err2;	ld	r12,48(r4)
+err2;	ld	r14,56(r4)
+	addi	r4,r4,64
+err2;	std	r0,0(r3)
+err2;	std	r6,8(r3)
+err2;	std	r8,16(r3)
+err2;	std	r9,24(r3)
+err2;	std	r10,32(r3)
+err2;	std	r11,40(r3)
+err2;	std	r12,48(r3)
+err2;	std	r14,56(r3)
+	addi	r3,r3,64
+	subi	r7,r7,64
+
+7:	ld	r14,STK_REG(R14)(r1)
+	ld	r15,STK_REG(R15)(r1)
+	ld	r16,STK_REG(R16)(r1)
+	ld	r17,STK_REG(R17)(r1)
+	ld	r18,STK_REG(R18)(r1)
+	ld	r19,STK_REG(R19)(r1)
+	ld	r20,STK_REG(R20)(r1)
+	ld	r21,STK_REG(R21)(r1)
+	ld	r22,STK_REG(R22)(r1)
+	addi	r1,r1,STACKFRAMESIZE
+
+	/* Up to 63B to go */
+	bf	cr7*4+2,8f
+err1;	ld	r0,0(r4)
+err1;	ld	r6,8(r4)
+err1;	ld	r8,16(r4)
+err1;	ld	r9,24(r4)
+	addi	r4,r4,32
+err1;	std	r0,0(r3)
+err1;	std	r6,8(r3)
+err1;	std	r8,16(r3)
+err1;	std	r9,24(r3)
+	addi	r3,r3,32
+	subi	r7,r7,32
+
+	/* Up to 31B to go */
+8:	bf	cr7*4+3,9f
+err1;	ld	r0,0(r4)
+err1;	ld	r6,8(r4)
+	addi	r4,r4,16
+err1;	std	r0,0(r3)
+err1;	std	r6,8(r3)
+	addi	r3,r3,16
+	subi	r7,r7,16
+
+9:	clrldi	r5,r5,(64-4)
+
+	/* Up to 15B to go */
+.Lshort_copy:
+	mtocrf	0x01,r5
+	bf	cr7*4+0,12f
+err1;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
+err1;	lwz	r6,4(r4)
+	addi	r4,r4,8
+err1;	stw	r0,0(r3)
+err1;	stw	r6,4(r3)
+	addi	r3,r3,8
+	subi	r7,r7,8
+
+12:	bf	cr7*4+1,13f
+err1;	lwz	r0,0(r4)
+	addi	r4,r4,4
+err1;	stw	r0,0(r3)
+	addi	r3,r3,4
+	subi	r7,r7,4
+
+13:	bf	cr7*4+2,14f
+err1;	lhz	r0,0(r4)
+	addi	r4,r4,2
+err1;	sth	r0,0(r3)
+	addi	r3,r3,2
+	subi	r7,r7,2
+
+14:	bf	cr7*4+3,15f
+err1;	lbz	r0,0(r4)
+err1;	stb	r0,0(r3)
+
+15:	li	r3,0
+	blr
+
+EXPORT_SYMBOL_GPL(copy_mc_generic);
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
new file mode 100644
index 0000000000..f33a2e6088
--- /dev/null
+++ b/arch/powerpc/lib/copypage_64.S
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2008 Mark Nelson, IBM Corp.
+ */
+#include <linux/export.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/feature-fixups.h>
+
+_GLOBAL_TOC(copy_page)
+BEGIN_FTR_SECTION
+	lis	r5,PAGE_SIZE@h
+FTR_SECTION_ELSE
+#ifdef CONFIG_PPC_BOOK3S_64
+	b	copypage_power7
+#endif
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+	ori	r5,r5,PAGE_SIZE@l
+#ifdef CONFIG_PPC_KERNEL_PCREL
+	/*
+	 * Hack for toolchain - prefixed instructions cause label difference to
+	 * be non-constant even if 8 byte alignment is known, so they can not
+	 * be put in FTR sections.
+	 */
+	LOAD_REG_ADDR(r10, ppc64_caches)
+BEGIN_FTR_SECTION
+#else
+BEGIN_FTR_SECTION
+	LOAD_REG_ADDR(r10, ppc64_caches)
+#endif
+	lwz	r11,DCACHEL1LOGBLOCKSIZE(r10)	/* log2 of cache block size */
+	lwz     r12,DCACHEL1BLOCKSIZE(r10)	/* get cache block size */
+	li	r9,0
+	srd	r8,r5,r11
+
+	mtctr	r8
+.Lsetup:
+	dcbt	r9,r4
+	dcbz	r9,r3
+	add	r9,r9,r12
+	bdnz	.Lsetup
+END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
+	addi	r3,r3,-8
+	srdi    r8,r5,7		/* page is copied in 128 byte strides */
+	addi	r8,r8,-1	/* one stride copied outside loop */
+
+	mtctr	r8
+
+	ld	r5,0(r4)
+	ld	r6,8(r4)
+	ld	r7,16(r4)
+	ldu	r8,24(r4)
+1:	std	r5,8(r3)
+	std	r6,16(r3)
+	ld	r9,8(r4)
+	ld	r10,16(r4)
+	std	r7,24(r3)
+	std	r8,32(r3)
+	ld	r11,24(r4)
+	ld	r12,32(r4)
+	std	r9,40(r3)
+	std	r10,48(r3)
+	ld	r5,40(r4)
+	ld	r6,48(r4)
+	std	r11,56(r3)
+	std	r12,64(r3)
+	ld	r7,56(r4)
+	ld	r8,64(r4)
+	std	r5,72(r3)
+	std	r6,80(r3)
+	ld	r9,72(r4)
+	ld	r10,80(r4)
+	std	r7,88(r3)
+	std	r8,96(r3)
+	ld	r11,88(r4)
+	ld	r12,96(r4)
+	std	r9,104(r3)
+	std	r10,112(r3)
+	ld	r5,104(r4)
+	ld	r6,112(r4)
+	std	r11,120(r3)
+	stdu	r12,128(r3)
+	ld	r7,120(r4)
+	ldu	r8,128(r4)
+	bdnz	1b
+
+	std	r5,8(r3)
+	std	r6,16(r3)
+	ld	r9,8(r4)
+	ld	r10,16(r4)
+	std	r7,24(r3)
+	std	r8,32(r3)
+	ld	r11,24(r4)
+	ld	r12,32(r4)
+	std	r9,40(r3)
+	std	r10,48(r3)
+	ld	r5,40(r4)
+	ld	r6,48(r4)
+	std	r11,56(r3)
+	std	r12,64(r3)
+	ld	r7,56(r4)
+	ld	r8,64(r4)
+	std	r5,72(r3)
+	std	r6,80(r3)
+	ld	r9,72(r4)
+	ld	r10,80(r4)
+	std	r7,88(r3)
+	std	r8,96(r3)
+	ld	r11,88(r4)
+	ld	r12,96(r4)
+	std	r9,104(r3)
+	std	r10,112(r3)
+	std	r11,120(r3)
+	std	r12,128(r3)
+	blr
+EXPORT_SYMBOL(copy_page)
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
new file mode 100644
index 0000000000..a783973f12
--- /dev/null
+++ b/arch/powerpc/lib/copypage_power7.S
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+
+_GLOBAL(copypage_power7)
+	/*
+	 * We prefetch both the source and destination using enhanced touch
+	 * instructions. We use a stream ID of 0 for the load side and
+	 * 1 for the store side. Since source and destination are page
+	 * aligned we don't need to clear the bottom 7 bits of either
+	 * address.
+	 */
+	ori	r9,r3,1		/* stream=1 => to */
+
+#ifdef CONFIG_PPC_64K_PAGES
+	lis	r7,0x0E01	/* depth=7
+				 * units/cachelines=512 */
+#else
+	lis	r7,0x0E00	/* depth=7 */
+	ori	r7,r7,0x1000	/* units/cachelines=32 */
+#endif
+	ori	r10,r7,1	/* stream=1 */
+
+	lis	r8,0x8000	/* GO=1 */
+	clrldi	r8,r8,32
+
+	/* setup read stream 0  */
+	dcbt	0,r4,0b01000  	/* addr from */
+	dcbt	0,r7,0b01010   /* length and depth from */
+	/* setup write stream 1 */
+	dcbtst	0,r9,0b01000   /* addr to */
+	dcbtst	0,r10,0b01010  /* length and depth to */
+	eieio
+	dcbt	0,r8,0b01010	/* all streams GO */
+
+#ifdef CONFIG_ALTIVEC
+	mflr	r0
+	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+	std	r0,16(r1)
+	stdu	r1,-STACKFRAMESIZE(r1)
+	bl	CFUNC(enter_vmx_ops)
+	cmpwi	r3,0
+	ld	r0,STACKFRAMESIZE+16(r1)
+	ld	r3,STK_REG(R31)(r1)
+	ld	r4,STK_REG(R30)(r1)
+	mtlr	r0
+
+	li	r0,(PAGE_SIZE/128)
+	mtctr	r0
+
+	beq	.Lnonvmx_copy
+
+	addi	r1,r1,STACKFRAMESIZE
+
+	li	r6,16
+	li	r7,32
+	li	r8,48
+	li	r9,64
+	li	r10,80
+	li	r11,96
+	li	r12,112
+
+	.align	5
+1:	lvx	v7,0,r4
+	lvx	v6,r4,r6
+	lvx	v5,r4,r7
+	lvx	v4,r4,r8
+	lvx	v3,r4,r9
+	lvx	v2,r4,r10
+	lvx	v1,r4,r11
+	lvx	v0,r4,r12
+	addi	r4,r4,128
+	stvx	v7,0,r3
+	stvx	v6,r3,r6
+	stvx	v5,r3,r7
+	stvx	v4,r3,r8
+	stvx	v3,r3,r9
+	stvx	v2,r3,r10
+	stvx	v1,r3,r11
+	stvx	v0,r3,r12
+	addi	r3,r3,128
+	bdnz	1b
+
+	b	CFUNC(exit_vmx_ops)		/* tail call optimise */
+
+#else
+	li	r0,(PAGE_SIZE/128)
+	mtctr	r0
+
+	stdu	r1,-STACKFRAMESIZE(r1)
+#endif
+
+.Lnonvmx_copy:
+	std	r14,STK_REG(R14)(r1)
+	std	r15,STK_REG(R15)(r1)
+	std	r16,STK_REG(R16)(r1)
+	std	r17,STK_REG(R17)(r1)
+	std	r18,STK_REG(R18)(r1)
+	std	r19,STK_REG(R19)(r1)
+	std	r20,STK_REG(R20)(r1)
+
+1:	ld	r0,0(r4)
+	ld	r5,8(r4)
+	ld	r6,16(r4)
+	ld	r7,24(r4)
+	ld	r8,32(r4)
+	ld	r9,40(r4)
+	ld	r10,48(r4)
+	ld	r11,56(r4)
+	ld	r12,64(r4)
+	ld	r14,72(r4)
+	ld	r15,80(r4)
+	ld	r16,88(r4)
+	ld	r17,96(r4)
+	ld	r18,104(r4)
+	ld	r19,112(r4)
+	ld	r20,120(r4)
+	addi	r4,r4,128
+	std	r0,0(r3)
+	std	r5,8(r3)
+	std	r6,16(r3)
+	std	r7,24(r3)
+	std	r8,32(r3)
+	std	r9,40(r3)
+	std	r10,48(r3)
+	std	r11,56(r3)
+	std	r12,64(r3)
+	std	r14,72(r3)
+	std	r15,80(r3)
+	std	r16,88(r3)
+	std	r17,96(r3)
+	std	r18,104(r3)
+	std	r19,112(r3)
+	std	r20,120(r3)
+	addi	r3,r3,128
+	bdnz	1b
+
+	ld	r14,STK_REG(R14)(r1)
+	ld	r15,STK_REG(R15)(r1)
+	ld	r16,STK_REG(R16)(r1)
+	ld	r17,STK_REG(R17)(r1)
+	ld	r18,STK_REG(R18)(r1)
+	ld	r19,STK_REG(R19)(r1)
+	ld	r20,STK_REG(R20)(r1)
+	addi	r1,r1,STACKFRAMESIZE
+	blr
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
new file mode 100644
index 0000000000..9af969d2cc
--- /dev/null
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -0,0 +1,564 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ */
+#include <linux/export.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+
+#ifndef SELFTEST_CASE
+/* 0 == most CPUs, 1 == POWER6, 2 == Cell */
+#define SELFTEST_CASE	0
+#endif
+
+#ifdef __BIG_ENDIAN__
+#define sLd sld		/* Shift towards low-numbered address. */
+#define sHd srd		/* Shift towards high-numbered address. */
+#else
+#define sLd srd		/* Shift towards low-numbered address. */
+#define sHd sld		/* Shift towards high-numbered address. */
+#endif
+
+/*
+ * These macros are used to generate exception table entries.
+ * The exception handlers below use the original arguments
+ * (stored on the stack) and the point where we're up to in
+ * the destination buffer, i.e. the address of the first
+ * unmodified byte.  Generally r3 points into the destination
+ * buffer, but the first unmodified byte is at a variable
+ * offset from r3.  In the code below, the symbol r3_offset
+ * is set to indicate the current offset at each point in
+ * the code.  This offset is then used as a negative offset
+ * from the exception handler code, and those instructions
+ * before the exception handlers are addi instructions that
+ * adjust r3 to point to the correct place.
+ */
+	.macro	lex		/* exception handler for load */
+100:	EX_TABLE(100b, .Lld_exc - r3_offset)
+	.endm
+
+	.macro	stex		/* exception handler for store */
+100:	EX_TABLE(100b, .Lst_exc - r3_offset)
+	.endm
+
+	.align	7
+_GLOBAL_TOC(__copy_tofrom_user)
+#ifdef CONFIG_PPC_BOOK3S_64
+BEGIN_FTR_SECTION
+	nop
+FTR_SECTION_ELSE
+	b	__copy_tofrom_user_power7
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+#endif
+_GLOBAL(__copy_tofrom_user_base)
+	/* first check for a 4kB copy on a 4kB boundary */
+	cmpldi	cr1,r5,16
+	cmpdi	cr6,r5,4096
+	or	r0,r3,r4
+	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
+	andi.	r0,r0,4095
+	std	r3,-24(r1)
+	crand	cr0*4+2,cr0*4+2,cr6*4+2
+	std	r4,-16(r1)
+	std	r5,-8(r1)
+	dcbt	0,r4
+	beq	.Lcopy_page_4K
+	andi.	r6,r6,7
+	PPC_MTOCRF(0x01,r5)
+	blt	cr1,.Lshort_copy
+/* Below we want to nop out the bne if we're on a CPU that has the
+ * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
+ * cleared.
+ * At the time of writing the only CPU that has this combination of bits
+ * set is Power6.
+ */
+test_feature = (SELFTEST_CASE == 1)
+BEGIN_FTR_SECTION
+	nop
+FTR_SECTION_ELSE
+	bne	.Ldst_unaligned
+ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
+		    CPU_FTR_UNALIGNED_LD_STD)
+.Ldst_aligned:
+	addi	r3,r3,-16
+r3_offset = 16
+test_feature = (SELFTEST_CASE == 0)
+BEGIN_FTR_SECTION
+	andi.	r0,r4,7
+	bne	.Lsrc_unaligned
+END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
+	blt	cr1,.Ldo_tail		/* if < 16 bytes to copy */
+	srdi	r0,r5,5
+	cmpdi	cr1,r0,0
+lex;	ld	r7,0(r4)
+lex;	ld	r6,8(r4)
+	addi	r4,r4,16
+	mtctr	r0
+	andi.	r0,r5,0x10
+	beq	22f
+	addi	r3,r3,16
+r3_offset = 0
+	addi	r4,r4,-16
+	mr	r9,r7
+	mr	r8,r6
+	beq	cr1,72f
+21:
+lex;	ld	r7,16(r4)
+lex;	ld	r6,24(r4)
+	addi	r4,r4,32
+stex;	std	r9,0(r3)
+r3_offset = 8
+stex;	std	r8,8(r3)
+r3_offset = 16
+22:
+lex;	ld	r9,0(r4)
+lex;	ld	r8,8(r4)
+stex;	std	r7,16(r3)
+r3_offset = 24
+stex;	std	r6,24(r3)
+	addi	r3,r3,32
+r3_offset = 0
+	bdnz	21b
+72:
+stex;	std	r9,0(r3)
+r3_offset = 8
+stex;	std	r8,8(r3)
+r3_offset = 16
+	andi.	r5,r5,0xf
+	beq+	3f
+	addi	r4,r4,16
+.Ldo_tail:
+	addi	r3,r3,16
+r3_offset = 0
+	bf	cr7*4+0,246f
+lex;	ld	r9,0(r4)
+	addi	r4,r4,8
+stex;	std	r9,0(r3)
+	addi	r3,r3,8
+246:	bf	cr7*4+1,1f
+lex;	lwz	r9,0(r4)
+	addi	r4,r4,4
+stex;	stw	r9,0(r3)
+	addi	r3,r3,4
+1:	bf	cr7*4+2,2f
+lex;	lhz	r9,0(r4)
+	addi	r4,r4,2
+stex;	sth	r9,0(r3)
+	addi	r3,r3,2
+2:	bf	cr7*4+3,3f
+lex;	lbz	r9,0(r4)
+stex;	stb	r9,0(r3)
+3:	li	r3,0
+	blr
+
+.Lsrc_unaligned:
+r3_offset = 16
+	srdi	r6,r5,3
+	addi	r5,r5,-16
+	subf	r4,r0,r4
+	srdi	r7,r5,4
+	sldi	r10,r0,3
+	cmpldi	cr6,r6,3
+	andi.	r5,r5,7
+	mtctr	r7
+	subfic	r11,r10,64
+	add	r5,r5,r0
+	bt	cr7*4+0,28f
+
+lex;	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
+lex;	ld	r0,8(r4)
+	sLd	r6,r9,r10
+lex;	ldu	r9,16(r4)
+	sHd	r7,r0,r11
+	sLd	r8,r0,r10
+	or	r7,r7,r6
+	blt	cr6,79f
+lex;	ld	r0,8(r4)
+	b	2f
+
+28:
+lex;	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
+lex;	ldu	r9,8(r4)
+	sLd	r8,r0,r10
+	addi	r3,r3,-8
+r3_offset = 24
+	blt	cr6,5f
+lex;	ld	r0,8(r4)
+	sHd	r12,r9,r11
+	sLd	r6,r9,r10
+lex;	ldu	r9,16(r4)
+	or	r12,r8,r12
+	sHd	r7,r0,r11
+	sLd	r8,r0,r10
+	addi	r3,r3,16
+r3_offset = 8
+	beq	cr6,78f
+
+1:	or	r7,r7,r6
+lex;	ld	r0,8(r4)
+stex;	std	r12,8(r3)
+r3_offset = 16
+2:	sHd	r12,r9,r11
+	sLd	r6,r9,r10
+lex;	ldu	r9,16(r4)
+	or	r12,r8,r12
+stex;	stdu	r7,16(r3)
+r3_offset = 8
+	sHd	r7,r0,r11
+	sLd	r8,r0,r10
+	bdnz	1b
+
+78:
+stex;	std	r12,8(r3)
+r3_offset = 16
+	or	r7,r7,r6
+79:
+stex;	std	r7,16(r3)
+r3_offset = 24
+5:	sHd	r12,r9,r11
+	or	r12,r8,r12
+stex;	std	r12,24(r3)
+r3_offset = 32
+	bne	6f
+	li	r3,0
+	blr
+6:	cmpwi	cr1,r5,8
+	addi	r3,r3,32
+r3_offset = 0
+	sLd	r9,r9,r10
+	ble	cr1,7f
+lex;	ld	r0,8(r4)
+	sHd	r7,r0,r11
+	or	r9,r7,r9
+7:
+	bf	cr7*4+1,1f
+#ifdef __BIG_ENDIAN__
+	rotldi	r9,r9,32
+#endif
+stex;	stw	r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+	rotrdi	r9,r9,32
+#endif
+	addi	r3,r3,4
+1:	bf	cr7*4+2,2f
+#ifdef __BIG_ENDIAN__
+	rotldi	r9,r9,16
+#endif
+stex;	sth	r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+	rotrdi	r9,r9,16
+#endif
+	addi	r3,r3,2
+2:	bf	cr7*4+3,3f
+#ifdef __BIG_ENDIAN__
+	rotldi	r9,r9,8
+#endif
+stex;	stb	r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+	rotrdi	r9,r9,8
+#endif
+3:	li	r3,0
+	blr
+
+.Ldst_unaligned:
+r3_offset = 0
+	PPC_MTOCRF(0x01,r6)		/* put #bytes to 8B bdry into cr7 */
+	subf	r5,r6,r5
+	li	r7,0
+	cmpldi	cr1,r5,16
+	bf	cr7*4+3,1f
+100:	EX_TABLE(100b, .Lld_exc_r7)
+	lbz	r0,0(r4)
+100:	EX_TABLE(100b, .Lst_exc_r7)
+	stb	r0,0(r3)
+	addi	r7,r7,1
+1:	bf	cr7*4+2,2f
+100:	EX_TABLE(100b, .Lld_exc_r7)
+	lhzx	r0,r7,r4
+100:	EX_TABLE(100b, .Lst_exc_r7)
+	sthx	r0,r7,r3
+	addi	r7,r7,2
+2:	bf	cr7*4+1,3f
+100:	EX_TABLE(100b, .Lld_exc_r7)
+	lwzx	r0,r7,r4
+100:	EX_TABLE(100b, .Lst_exc_r7)
+	stwx	r0,r7,r3
+3:	PPC_MTOCRF(0x01,r5)
+	add	r4,r6,r4
+	add	r3,r6,r3
+	b	.Ldst_aligned
+
+.Lshort_copy:
+r3_offset = 0
+	bf	cr7*4+0,1f
+lex;	lwz	r0,0(r4)
+lex;	lwz	r9,4(r4)
+	addi	r4,r4,8
+stex;	stw	r0,0(r3)
+stex;	stw	r9,4(r3)
+	addi	r3,r3,8
+1:	bf	cr7*4+1,2f
+lex;	lwz	r0,0(r4)
+	addi	r4,r4,4
+stex;	stw	r0,0(r3)
+	addi	r3,r3,4
+2:	bf	cr7*4+2,3f
+lex;	lhz	r0,0(r4)
+	addi	r4,r4,2
+stex;	sth	r0,0(r3)
+	addi	r3,r3,2
+3:	bf	cr7*4+3,4f
+lex;	lbz	r0,0(r4)
+stex;	stb	r0,0(r3)
+4:	li	r3,0
+	blr
+
+/*
+ * exception handlers follow
+ * we have to return the number of bytes not copied
+ * for an exception on a load, we set the rest of the destination to 0
+ * Note that the number of bytes of instructions for adjusting r3 needs
+ * to equal the amount of the adjustment, due to the trick of using
+ * .Lld_exc - r3_offset as the handler address.
+ */
+
+.Lld_exc_r7:
+	add	r3,r3,r7
+	b	.Lld_exc
+
+	/* adjust by 24 */
+	addi	r3,r3,8
+	nop
+	/* adjust by 16 */
+	addi	r3,r3,8
+	nop
+	/* adjust by 8 */
+	addi	r3,r3,8
+	nop
+
+/*
+ * Here we have had a fault on a load and r3 points to the first
+ * unmodified byte of the destination.  We use the original arguments
+ * and r3 to work out how much wasn't copied.  Since we load some
+ * distance ahead of the stores, we continue copying byte-by-byte until
+ * we hit the load fault again in order to copy as much as possible.
+ */
+.Lld_exc:
+	ld	r6,-24(r1)
+	ld	r4,-16(r1)
+	ld	r5,-8(r1)
+	subf	r6,r6,r3
+	add	r4,r4,r6
+	subf	r5,r6,r5	/* #bytes left to go */
+
+/*
+ * first see if we can copy any more bytes before hitting another exception
+ */
+	mtctr	r5
+r3_offset = 0
+100:	EX_TABLE(100b, .Ldone)
+43:	lbz	r0,0(r4)
+	addi	r4,r4,1
+stex;	stb	r0,0(r3)
+	addi	r3,r3,1
+	bdnz	43b
+	li	r3,0		/* huh? all copied successfully this time? */
+	blr
+
+/*
+ * here we have trapped again, amount remaining is in ctr.
+ */
+.Ldone:
+	mfctr	r3
+	blr
+
+/*
+ * exception handlers for stores: we need to work out how many bytes
+ * weren't copied, and we may need to copy some more.
+ * Note that the number of bytes of instructions for adjusting r3 needs
+ * to equal the amount of the adjustment, due to the trick of using
+ * .Lst_exc - r3_offset as the handler address.
+ */
+.Lst_exc_r7:
+	add	r3,r3,r7
+	b	.Lst_exc
+
+	/* adjust by 24 */
+	addi	r3,r3,8
+	nop
+	/* adjust by 16 */
+	addi	r3,r3,8
+	nop
+	/* adjust by 8 */
+	addi	r3,r3,4
+	/* adjust by 4 */
+	addi	r3,r3,4
+.Lst_exc:
+	ld	r6,-24(r1)	/* original destination pointer */
+	ld	r4,-16(r1)	/* original source pointer */
+	ld	r5,-8(r1)	/* original number of bytes */
+	add	r7,r6,r5
+	/*
+	 * If the destination pointer isn't 8-byte aligned,
+	 * we may have got the exception as a result of a
+	 * store that overlapped a page boundary, so we may be
+	 * able to copy a few more bytes.
+	 */
+17:	andi.	r0,r3,7
+	beq	19f
+	subf	r8,r6,r3	/* #bytes copied */
+100:	EX_TABLE(100b,19f)
+	lbzx	r0,r8,r4
+100:	EX_TABLE(100b,19f)
+	stb	r0,0(r3)
+	addi	r3,r3,1
+	cmpld	r3,r7
+	blt	17b
+19:	subf	r3,r3,r7	/* #bytes not copied in r3 */
+	blr
+
+/*
+ * Routine to copy a whole page of data, optimized for POWER4.
+ * On POWER4 it is more than 50% faster than the simple loop
+ * above (following the .Ldst_aligned label).
+ */
+	.macro	exc
+100:	EX_TABLE(100b, .Labort)
+	.endm
+.Lcopy_page_4K:
+	std	r31,-32(1)
+	std	r30,-40(1)
+	std	r29,-48(1)
+	std	r28,-56(1)
+	std	r27,-64(1)
+	std	r26,-72(1)
+	std	r25,-80(1)
+	std	r24,-88(1)
+	std	r23,-96(1)
+	std	r22,-104(1)
+	std	r21,-112(1)
+	std	r20,-120(1)
+	li	r5,4096/32 - 1
+	addi	r3,r3,-8
+	li	r0,5
+0:	addi	r5,r5,-24
+	mtctr	r0
+exc;	ld	r22,640(4)
+exc;	ld	r21,512(4)
+exc;	ld	r20,384(4)
+exc;	ld	r11,256(4)
+exc;	ld	r9,128(4)
+exc;	ld	r7,0(4)
+exc;	ld	r25,648(4)
+exc;	ld	r24,520(4)
+exc;	ld	r23,392(4)
+exc;	ld	r10,264(4)
+exc;	ld	r8,136(4)
+exc;	ldu	r6,8(4)
+	cmpwi	r5,24
+1:
+exc;	std	r22,648(3)
+exc;	std	r21,520(3)
+exc;	std	r20,392(3)
+exc;	std	r11,264(3)
+exc;	std	r9,136(3)
+exc;	std	r7,8(3)
+exc;	ld	r28,648(4)
+exc;	ld	r27,520(4)
+exc;	ld	r26,392(4)
+exc;	ld	r31,264(4)
+exc;	ld	r30,136(4)
+exc;	ld	r29,8(4)
+exc;	std	r25,656(3)
+exc;	std	r24,528(3)
+exc;	std	r23,400(3)
+exc;	std	r10,272(3)
+exc;	std	r8,144(3)
+exc;	std	r6,16(3)
+exc;	ld	r22,656(4)
+exc;	ld	r21,528(4)
+exc;	ld	r20,400(4)
+exc;	ld	r11,272(4)
+exc;	ld	r9,144(4)
+exc;	ld	r7,16(4)
+exc;	std	r28,664(3)
+exc;	std	r27,536(3)
+exc;	std	r26,408(3)
+exc;	std	r31,280(3)
+exc;	std	r30,152(3)
+exc;	stdu	r29,24(3)
+exc;	ld	r25,664(4)
+exc;	ld	r24,536(4)
+exc;	ld	r23,408(4)
+exc;	ld	r10,280(4)
+exc;	ld	r8,152(4)
+exc;	ldu	r6,24(4)
+	bdnz	1b
+exc;	std	r22,648(3)
+exc;	std	r21,520(3)
+exc;	std	r20,392(3)
+exc;	std	r11,264(3)
+exc;	std	r9,136(3)
+exc;	std	r7,8(3)
+	addi	r4,r4,640
+	addi	r3,r3,648
+	bge	0b
+	mtctr	r5
+exc;	ld	r7,0(4)
+exc;	ld	r8,8(4)
+exc;	ldu	r9,16(4)
+3:
+exc;	ld	r10,8(4)
+exc;	std	r7,8(3)
+exc;	ld	r7,16(4)
+exc;	std	r8,16(3)
+exc;	ld	r8,24(4)
+exc;	std	r9,24(3)
+exc;	ldu	r9,32(4)
+exc;	stdu	r10,32(3)
+	bdnz	3b
+4:
+exc;	ld	r10,8(4)
+exc;	std	r7,8(3)
+exc;	std	r8,16(3)
+exc;	std	r9,24(3)
+exc;	std	r10,32(3)
+9:	ld	r20,-120(1)
+	ld	r21,-112(1)
+	ld	r22,-104(1)
+	ld	r23,-96(1)
+	ld	r24,-88(1)
+	ld	r25,-80(1)
+	ld	r26,-72(1)
+	ld	r27,-64(1)
+	ld	r28,-56(1)
+	ld	r29,-48(1)
+	ld	r30,-40(1)
+	ld	r31,-32(1)
+	li	r3,0
+	blr
+
+/*
+ * on an exception, reset to the beginning and jump back into the
+ * standard __copy_tofrom_user
+ */
+.Labort:
+	ld	r20,-120(1)
+	ld	r21,-112(1)
+	ld	r22,-104(1)
+	ld	r23,-96(1)
+	ld	r24,-88(1)
+	ld	r25,-80(1)
+	ld	r26,-72(1)
+	ld	r27,-64(1)
+	ld	r28,-56(1)
+	ld	r29,-48(1)
+	ld	r30,-40(1)
+	ld	r31,-32(1)
+	ld	r3,-24(r1)
+	ld	r4,-16(r1)
+	li	r5,4096
+	b	.Ldst_aligned
+EXPORT_SYMBOL(__copy_tofrom_user)
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
new file mode 100644
index 0000000000..ac41053c3a
--- /dev/null
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -0,0 +1,695 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *
+ * Copyright (C) IBM Corporation, 2011
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/ppc_asm.h>
+
+#ifndef SELFTEST_CASE
+/* 0 == don't use VMX, 1 == use VMX */
+#define SELFTEST_CASE	0
+#endif
+
+#ifdef __BIG_ENDIAN__
+#define LVS(VRT,RA,RB)		lvsl	VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRA,VRB,VRC
+#else
+#define LVS(VRT,RA,RB)		lvsr	VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRB,VRA,VRC
+#endif
+
+	.macro err1
+100:
+	EX_TABLE(100b,.Ldo_err1)
+	.endm
+
+	.macro err2
+200:
+	EX_TABLE(200b,.Ldo_err2)
+	.endm
+
+#ifdef CONFIG_ALTIVEC
+	.macro err3
+300:
+	EX_TABLE(300b,.Ldo_err3)
+	.endm
+
+	.macro err4
+400:
+	EX_TABLE(400b,.Ldo_err4)
+	.endm
+
+
+.Ldo_err4:
+	ld	r16,STK_REG(R16)(r1)
+	ld	r15,STK_REG(R15)(r1)
+	ld	r14,STK_REG(R14)(r1)
+.Ldo_err3:
+	bl	CFUNC(exit_vmx_usercopy)
+	ld	r0,STACKFRAMESIZE+16(r1)
+	mtlr	r0
+	b	.Lexit
+#endif /* CONFIG_ALTIVEC */
+
+.Ldo_err2:
+	ld	r22,STK_REG(R22)(r1)
+	ld	r21,STK_REG(R21)(r1)
+	ld	r20,STK_REG(R20)(r1)
+	ld	r19,STK_REG(R19)(r1)
+	ld	r18,STK_REG(R18)(r1)
+	ld	r17,STK_REG(R17)(r1)
+	ld	r16,STK_REG(R16)(r1)
+	ld	r15,STK_REG(R15)(r1)
+	ld	r14,STK_REG(R14)(r1)
+.Lexit:
+	addi	r1,r1,STACKFRAMESIZE
+.Ldo_err1:
+	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+	ld	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+	ld	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
+	b	__copy_tofrom_user_base
+
+
+_GLOBAL(__copy_tofrom_user_power7)
+	cmpldi	r5,16
+	cmpldi	cr1,r5,3328
+
+	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+	std	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
+
+	blt	.Lshort_copy
+
+#ifdef CONFIG_ALTIVEC
+test_feature = SELFTEST_CASE
+BEGIN_FTR_SECTION
+	bgt	cr1,.Lvmx_copy
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+
+.Lnonvmx_copy:
+	/* Get the source 8B aligned */
+	neg	r6,r4
+	mtocrf	0x01,r6
+	clrldi	r6,r6,(64-3)
+
+	bf	cr7*4+3,1f
+err1;	lbz	r0,0(r4)
+	addi	r4,r4,1
+err1;	stb	r0,0(r3)
+	addi	r3,r3,1
+
+1:	bf	cr7*4+2,2f
+err1;	lhz	r0,0(r4)
+	addi	r4,r4,2
+err1;	sth	r0,0(r3)
+	addi	r3,r3,2
+
+2:	bf	cr7*4+1,3f
+err1;	lwz	r0,0(r4)
+	addi	r4,r4,4
+err1;	stw	r0,0(r3)
+	addi	r3,r3,4
+
+3:	sub	r5,r5,r6
+	cmpldi	r5,128
+	blt	5f
+
+	mflr	r0
+	stdu	r1,-STACKFRAMESIZE(r1)
+	std	r14,STK_REG(R14)(r1)
+	std	r15,STK_REG(R15)(r1)
+	std	r16,STK_REG(R16)(r1)
+	std	r17,STK_REG(R17)(r1)
+	std	r18,STK_REG(R18)(r1)
+	std	r19,STK_REG(R19)(r1)
+	std	r20,STK_REG(R20)(r1)
+	std	r21,STK_REG(R21)(r1)
+	std	r22,STK_REG(R22)(r1)
+	std	r0,STACKFRAMESIZE+16(r1)
+
+	srdi	r6,r5,7
+	mtctr	r6
+
+	/* Now do cacheline (128B) sized loads and stores. */
+	.align	5
+4:
+err2;	ld	r0,0(r4)
+err2;	ld	r6,8(r4)
+err2;	ld	r7,16(r4)
+err2;	ld	r8,24(r4)
+err2;	ld	r9,32(r4)
+err2;	ld	r10,40(r4)
+err2;	ld	r11,48(r4)
+err2;	ld	r12,56(r4)
+err2;	ld	r14,64(r4)
+err2;	ld	r15,72(r4)
+err2;	ld	r16,80(r4)
+err2;	ld	r17,88(r4)
+err2;	ld	r18,96(r4)
+err2;	ld	r19,104(r4)
+err2;	ld	r20,112(r4)
+err2;	ld	r21,120(r4)
+	addi	r4,r4,128
+err2;	std	r0,0(r3)
+err2;	std	r6,8(r3)
+err2;	std	r7,16(r3)
+err2;	std	r8,24(r3)
+err2;	std	r9,32(r3)
+err2;	std	r10,40(r3)
+err2;	std	r11,48(r3)
+err2;	std	r12,56(r3)
+err2;	std	r14,64(r3)
+err2;	std	r15,72(r3)
+err2;	std	r16,80(r3)
+err2;	std	r17,88(r3)
+err2;	std	r18,96(r3)
+err2;	std	r19,104(r3)
+err2;	std	r20,112(r3)
+err2;	std	r21,120(r3)
+	addi	r3,r3,128
+	bdnz	4b
+
+	clrldi	r5,r5,(64-7)
+
+	ld	r14,STK_REG(R14)(r1)
+	ld	r15,STK_REG(R15)(r1)
+	ld	r16,STK_REG(R16)(r1)
+	ld	r17,STK_REG(R17)(r1)
+	ld	r18,STK_REG(R18)(r1)
+	ld	r19,STK_REG(R19)(r1)
+	ld	r20,STK_REG(R20)(r1)
+	ld	r21,STK_REG(R21)(r1)
+	ld	r22,STK_REG(R22)(r1)
+	addi	r1,r1,STACKFRAMESIZE
+
+	/* Up to 127B to go */
+5:	srdi	r6,r5,4
+	mtocrf	0x01,r6
+
+6:	bf	cr7*4+1,7f
+err1;	ld	r0,0(r4)
+err1;	ld	r6,8(r4)
+err1;	ld	r7,16(r4)
+err1;	ld	r8,24(r4)
+err1;	ld	r9,32(r4)
+err1;	ld	r10,40(r4)
+err1;	ld	r11,48(r4)
+err1;	ld	r12,56(r4)
+	addi	r4,r4,64
+err1;	std	r0,0(r3)
+err1;	std	r6,8(r3)
+err1;	std	r7,16(r3)
+err1;	std	r8,24(r3)
+err1;	std	r9,32(r3)
+err1;	std	r10,40(r3)
+err1;	std	r11,48(r3)
+err1;	std	r12,56(r3)
+	addi	r3,r3,64
+
+	/* Up to 63B to go */
+7:	bf	cr7*4+2,8f
+err1;	ld	r0,0(r4)
+err1;	ld	r6,8(r4)
+err1;	ld	r7,16(r4)
+err1;	ld	r8,24(r4)
+	addi	r4,r4,32
+err1;	std	r0,0(r3)
+err1;	std	r6,8(r3)
+err1;	std	r7,16(r3)
+err1;	std	r8,24(r3)
+	addi	r3,r3,32
+
+	/* Up to 31B to go */
+8:	bf	cr7*4+3,9f
+err1;	ld	r0,0(r4)
+err1;	ld	r6,8(r4)
+	addi	r4,r4,16
+err1;	std	r0,0(r3)
+err1;	std	r6,8(r3)
+	addi	r3,r3,16
+
+9:	clrldi	r5,r5,(64-4)
+
+	/* Up to 15B to go */
+.Lshort_copy:
+	mtocrf	0x01,r5
+	bf	cr7*4+0,12f
+err1;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
+err1;	lwz	r6,4(r4)
+	addi	r4,r4,8
+err1;	stw	r0,0(r3)
+err1;	stw	r6,4(r3)
+	addi	r3,r3,8
+
+12:	bf	cr7*4+1,13f
+err1;	lwz	r0,0(r4)
+	addi	r4,r4,4
+err1;	stw	r0,0(r3)
+	addi	r3,r3,4
+
+13:	bf	cr7*4+2,14f
+err1;	lhz	r0,0(r4)
+	addi	r4,r4,2
+err1;	sth	r0,0(r3)
+	addi	r3,r3,2
+
+14:	bf	cr7*4+3,15f
+err1;	lbz	r0,0(r4)
+err1;	stb	r0,0(r3)
+
+15:	li	r3,0
+	blr
+
+.Lunwind_stack_nonvmx_copy:
+	addi	r1,r1,STACKFRAMESIZE
+	b	.Lnonvmx_copy
+
+.Lvmx_copy:
+#ifdef CONFIG_ALTIVEC
+	mflr	r0
+	std	r0,16(r1)
+	stdu	r1,-STACKFRAMESIZE(r1)
+	bl	CFUNC(enter_vmx_usercopy)
+	cmpwi	cr1,r3,0
+	ld	r0,STACKFRAMESIZE+16(r1)
+	ld	r3,STK_REG(R31)(r1)
+	ld	r4,STK_REG(R30)(r1)
+	ld	r5,STK_REG(R29)(r1)
+	mtlr	r0
+
+	/*
+	 * We prefetch both the source and destination using enhanced touch
+	 * instructions. We use a stream ID of 0 for the load side and
+	 * 1 for the store side.
+	 */
+	clrrdi	r6,r4,7
+	clrrdi	r9,r3,7
+	ori	r9,r9,1		/* stream=1 */
+
+	srdi	r7,r5,7		/* length in cachelines, capped at 0x3FF */
+	cmpldi	r7,0x3FF
+	ble	1f
+	li	r7,0x3FF
+1:	lis	r0,0x0E00	/* depth=7 */
+	sldi	r7,r7,7
+	or	r7,r7,r0
+	ori	r10,r7,1	/* stream=1 */
+
+	lis	r8,0x8000	/* GO=1 */
+	clrldi	r8,r8,32
+
+	/* setup read stream 0 */
+	dcbt	0,r6,0b01000   /* addr from */
+	dcbt	0,r7,0b01010   /* length and depth from */
+	/* setup write stream 1 */
+	dcbtst	0,r9,0b01000   /* addr to */
+	dcbtst	0,r10,0b01010  /* length and depth to */
+	eieio
+	dcbt	0,r8,0b01010	/* all streams GO */
+
+	beq	cr1,.Lunwind_stack_nonvmx_copy
+
+	/*
+	 * If source and destination are not relatively aligned we use a
+	 * slower permute loop.
+	 */
+	xor	r6,r4,r3
+	rldicl.	r6,r6,0,(64-4)
+	bne	.Lvmx_unaligned_copy
+
+	/* Get the destination 16B aligned */
+	neg	r6,r3
+	mtocrf	0x01,r6
+	clrldi	r6,r6,(64-4)
+
+	bf	cr7*4+3,1f
+err3;	lbz	r0,0(r4)
+	addi	r4,r4,1
+err3;	stb	r0,0(r3)
+	addi	r3,r3,1
+
+1:	bf	cr7*4+2,2f
+err3;	lhz	r0,0(r4)
+	addi	r4,r4,2
+err3;	sth	r0,0(r3)
+	addi	r3,r3,2
+
+2:	bf	cr7*4+1,3f
+err3;	lwz	r0,0(r4)
+	addi	r4,r4,4
+err3;	stw	r0,0(r3)
+	addi	r3,r3,4
+
+3:	bf	cr7*4+0,4f
+err3;	ld	r0,0(r4)
+	addi	r4,r4,8
+err3;	std	r0,0(r3)
+	addi	r3,r3,8
+
+4:	sub	r5,r5,r6
+
+	/* Get the desination 128B aligned */
+	neg	r6,r3
+	srdi	r7,r6,4
+	mtocrf	0x01,r7
+	clrldi	r6,r6,(64-7)
+
+	li	r9,16
+	li	r10,32
+	li	r11,48
+
+	bf	cr7*4+3,5f
+err3;	lvx	v1,0,r4
+	addi	r4,r4,16
+err3;	stvx	v1,0,r3
+	addi	r3,r3,16
+
+5:	bf	cr7*4+2,6f
+err3;	lvx	v1,0,r4
+err3;	lvx	v0,r4,r9
+	addi	r4,r4,32
+err3;	stvx	v1,0,r3
+err3;	stvx	v0,r3,r9
+	addi	r3,r3,32
+
+6:	bf	cr7*4+1,7f
+err3;	lvx	v3,0,r4
+err3;	lvx	v2,r4,r9
+err3;	lvx	v1,r4,r10
+err3;	lvx	v0,r4,r11
+	addi	r4,r4,64
+err3;	stvx	v3,0,r3
+err3;	stvx	v2,r3,r9
+err3;	stvx	v1,r3,r10
+err3;	stvx	v0,r3,r11
+	addi	r3,r3,64
+
+7:	sub	r5,r5,r6
+	srdi	r6,r5,7
+
+	std	r14,STK_REG(R14)(r1)
+	std	r15,STK_REG(R15)(r1)
+	std	r16,STK_REG(R16)(r1)
+
+	li	r12,64
+	li	r14,80
+	li	r15,96
+	li	r16,112
+
+	mtctr	r6
+
+	/*
+	 * Now do cacheline sized loads and stores. By this stage the
+	 * cacheline stores are also cacheline aligned.
+	 */
+	.align	5
+8:
+err4;	lvx	v7,0,r4
+err4;	lvx	v6,r4,r9
+err4;	lvx	v5,r4,r10
+err4;	lvx	v4,r4,r11
+err4;	lvx	v3,r4,r12
+err4;	lvx	v2,r4,r14
+err4;	lvx	v1,r4,r15
+err4;	lvx	v0,r4,r16
+	addi	r4,r4,128
+err4;	stvx	v7,0,r3
+err4;	stvx	v6,r3,r9
+err4;	stvx	v5,r3,r10
+err4;	stvx	v4,r3,r11
+err4;	stvx	v3,r3,r12
+err4;	stvx	v2,r3,r14
+err4;	stvx	v1,r3,r15
+err4;	stvx	v0,r3,r16
+	addi	r3,r3,128
+	bdnz	8b
+
+	ld	r14,STK_REG(R14)(r1)
+	ld	r15,STK_REG(R15)(r1)
+	ld	r16,STK_REG(R16)(r1)
+
+	/* Up to 127B to go */
+	clrldi	r5,r5,(64-7)
+	srdi	r6,r5,4
+	mtocrf	0x01,r6
+
+	bf	cr7*4+1,9f
+err3;	lvx	v3,0,r4
+err3;	lvx	v2,r4,r9
+err3;	lvx	v1,r4,r10
+err3;	lvx	v0,r4,r11
+	addi	r4,r4,64
+err3;	stvx	v3,0,r3
+err3;	stvx	v2,r3,r9
+err3;	stvx	v1,r3,r10
+err3;	stvx	v0,r3,r11
+	addi	r3,r3,64
+
+9:	bf	cr7*4+2,10f
+err3;	lvx	v1,0,r4
+err3;	lvx	v0,r4,r9
+	addi	r4,r4,32
+err3;	stvx	v1,0,r3
+err3;	stvx	v0,r3,r9
+	addi	r3,r3,32
+
+10:	bf	cr7*4+3,11f
+err3;	lvx	v1,0,r4
+	addi	r4,r4,16
+err3;	stvx	v1,0,r3
+	addi	r3,r3,16
+
+	/* Up to 15B to go */
+11:	clrldi	r5,r5,(64-4)
+	mtocrf	0x01,r5
+	bf	cr7*4+0,12f
+err3;	ld	r0,0(r4)
+	addi	r4,r4,8
+err3;	std	r0,0(r3)
+	addi	r3,r3,8
+
+12:	bf	cr7*4+1,13f
+err3;	lwz	r0,0(r4)
+	addi	r4,r4,4
+err3;	stw	r0,0(r3)
+	addi	r3,r3,4
+
+13:	bf	cr7*4+2,14f
+err3;	lhz	r0,0(r4)
+	addi	r4,r4,2
+err3;	sth	r0,0(r3)
+	addi	r3,r3,2
+
+14:	bf	cr7*4+3,15f
+err3;	lbz	r0,0(r4)
+err3;	stb	r0,0(r3)
+
+15:	addi	r1,r1,STACKFRAMESIZE
+	b	CFUNC(exit_vmx_usercopy)	/* tail call optimise */
+
+.Lvmx_unaligned_copy:
+	/* Get the destination 16B aligned */
+	neg	r6,r3
+	mtocrf	0x01,r6
+	clrldi	r6,r6,(64-4)
+
+	bf	cr7*4+3,1f
+err3;	lbz	r0,0(r4)
+	addi	r4,r4,1
+err3;	stb	r0,0(r3)
+	addi	r3,r3,1
+
+1:	bf	cr7*4+2,2f
+err3;	lhz	r0,0(r4)
+	addi	r4,r4,2
+err3;	sth	r0,0(r3)
+	addi	r3,r3,2
+
+2:	bf	cr7*4+1,3f
+err3;	lwz	r0,0(r4)
+	addi	r4,r4,4
+err3;	stw	r0,0(r3)
+	addi	r3,r3,4
+
+3:	bf	cr7*4+0,4f
+err3;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
+err3;	lwz	r7,4(r4)
+	addi	r4,r4,8
+err3;	stw	r0,0(r3)
+err3;	stw	r7,4(r3)
+	addi	r3,r3,8
+
+4:	sub	r5,r5,r6
+
+	/* Get the desination 128B aligned */
+	neg	r6,r3
+	srdi	r7,r6,4
+	mtocrf	0x01,r7
+	clrldi	r6,r6,(64-7)
+
+	li	r9,16
+	li	r10,32
+	li	r11,48
+
+	LVS(v16,0,r4)		/* Setup permute control vector */
+err3;	lvx	v0,0,r4
+	addi	r4,r4,16
+
+	bf	cr7*4+3,5f
+err3;	lvx	v1,0,r4
+	VPERM(v8,v0,v1,v16)
+	addi	r4,r4,16
+err3;	stvx	v8,0,r3
+	addi	r3,r3,16
+	vor	v0,v1,v1
+
+5:	bf	cr7*4+2,6f
+err3;	lvx	v1,0,r4
+	VPERM(v8,v0,v1,v16)
+err3;	lvx	v0,r4,r9
+	VPERM(v9,v1,v0,v16)
+	addi	r4,r4,32
+err3;	stvx	v8,0,r3
+err3;	stvx	v9,r3,r9
+	addi	r3,r3,32
+
+6:	bf	cr7*4+1,7f
+err3;	lvx	v3,0,r4
+	VPERM(v8,v0,v3,v16)
+err3;	lvx	v2,r4,r9
+	VPERM(v9,v3,v2,v16)
+err3;	lvx	v1,r4,r10
+	VPERM(v10,v2,v1,v16)
+err3;	lvx	v0,r4,r11
+	VPERM(v11,v1,v0,v16)
+	addi	r4,r4,64
+err3;	stvx	v8,0,r3
+err3;	stvx	v9,r3,r9
+err3;	stvx	v10,r3,r10
+err3;	stvx	v11,r3,r11
+	addi	r3,r3,64
+
+7:	sub	r5,r5,r6
+	srdi	r6,r5,7
+
+	std	r14,STK_REG(R14)(r1)
+	std	r15,STK_REG(R15)(r1)
+	std	r16,STK_REG(R16)(r1)
+
+	li	r12,64
+	li	r14,80
+	li	r15,96
+	li	r16,112
+
+	mtctr	r6
+
+	/*
+	 * Now do cacheline sized loads and stores. By this stage the
+	 * cacheline stores are also cacheline aligned.
+	 */
+	.align	5
+8:
+err4;	lvx	v7,0,r4
+	VPERM(v8,v0,v7,v16)
+err4;	lvx	v6,r4,r9
+	VPERM(v9,v7,v6,v16)
+err4;	lvx	v5,r4,r10
+	VPERM(v10,v6,v5,v16)
+err4;	lvx	v4,r4,r11
+	VPERM(v11,v5,v4,v16)
+err4;	lvx	v3,r4,r12
+	VPERM(v12,v4,v3,v16)
+err4;	lvx	v2,r4,r14
+	VPERM(v13,v3,v2,v16)
+err4;	lvx	v1,r4,r15
+	VPERM(v14,v2,v1,v16)
+err4;	lvx	v0,r4,r16
+	VPERM(v15,v1,v0,v16)
+	addi	r4,r4,128
+err4;	stvx	v8,0,r3
+err4;	stvx	v9,r3,r9
+err4;	stvx	v10,r3,r10
+err4;	stvx	v11,r3,r11
+err4;	stvx	v12,r3,r12
+err4;	stvx	v13,r3,r14
+err4;	stvx	v14,r3,r15
+err4;	stvx	v15,r3,r16
+	addi	r3,r3,128
+	bdnz	8b
+
+	ld	r14,STK_REG(R14)(r1)
+	ld	r15,STK_REG(R15)(r1)
+	ld	r16,STK_REG(R16)(r1)
+
+	/* Up to 127B to go */
+	clrldi	r5,r5,(64-7)
+	srdi	r6,r5,4
+	mtocrf	0x01,r6
+
+	bf	cr7*4+1,9f
+err3;	lvx	v3,0,r4
+	VPERM(v8,v0,v3,v16)
+err3;	lvx	v2,r4,r9
+	VPERM(v9,v3,v2,v16)
+err3;	lvx	v1,r4,r10
+	VPERM(v10,v2,v1,v16)
+err3;	lvx	v0,r4,r11
+	VPERM(v11,v1,v0,v16)
+	addi	r4,r4,64
+err3;	stvx	v8,0,r3
+err3;	stvx	v9,r3,r9
+err3;	stvx	v10,r3,r10
+err3;	stvx	v11,r3,r11
+	addi	r3,r3,64
+
+9:	bf	cr7*4+2,10f
+err3;	lvx	v1,0,r4
+	VPERM(v8,v0,v1,v16)
+err3;	lvx	v0,r4,r9
+	VPERM(v9,v1,v0,v16)
+	addi	r4,r4,32
+err3;	stvx	v8,0,r3
+err3;	stvx	v9,r3,r9
+	addi	r3,r3,32
+
+10:	bf	cr7*4+3,11f
+err3;	lvx	v1,0,r4
+	VPERM(v8,v0,v1,v16)
+	addi	r4,r4,16
+err3;	stvx	v8,0,r3
+	addi	r3,r3,16
+
+	/* Up to 15B to go */
+11:	clrldi	r5,r5,(64-4)
+	addi	r4,r4,-16	/* Unwind the +16 load offset */
+	mtocrf	0x01,r5
+	bf	cr7*4+0,12f
+err3;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
+err3;	lwz	r6,4(r4)
+	addi	r4,r4,8
+err3;	stw	r0,0(r3)
+err3;	stw	r6,4(r3)
+	addi	r3,r3,8
+
+12:	bf	cr7*4+1,13f
+err3;	lwz	r0,0(r4)
+	addi	r4,r4,4
+err3;	stw	r0,0(r3)
+	addi	r3,r3,4
+
+13:	bf	cr7*4+2,14f
+err3;	lhz	r0,0(r4)
+	addi	r4,r4,2
+err3;	sth	r0,0(r3)
+	addi	r3,r3,2
+
+14:	bf	cr7*4+3,15f
+err3;	lbz	r0,0(r4)
+err3;	stb	r0,0(r3)
+
+15:	addi	r1,r1,STACKFRAMESIZE
+	b	CFUNC(exit_vmx_usercopy)	/* tail call optimise */
+#endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S
new file mode 100644
index 0000000000..7e5e1c28e5
--- /dev/null
+++ b/arch/powerpc/lib/crtsavres.S
@@ -0,0 +1,545 @@
+/*
+ * Special support for eabi and SVR4
+ *
+ *   Copyright (C) 1995, 1996, 1998, 2000, 2001 Free Software Foundation, Inc.
+ *   Copyright 2008 Freescale Semiconductor, Inc.
+ *   Written By Michael Meissner
+ *
+ * Based on gcc/config/rs6000/crtsavres.asm from gcc
+ * 64 bit additions from reading the PPC elf64abi document.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * In addition to the permissions in the GNU General Public License, the
+ * Free Software Foundation gives you unlimited permission to link the
+ * compiled version of this file with other programs, and to distribute
+ * those programs without any restriction coming from the use of this
+ * file.  (The General Public License restrictions do apply in other
+ * respects; for example, they cover modification of the file, and
+ * distribution when not linked into another program.)
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ *    As a special exception, if you link this library with files
+ *    compiled with GCC to produce an executable, this does not cause
+ *    the resulting executable to be covered by the GNU General Public License.
+ *    This exception does not however invalidate any other reasons why
+ *    the executable file might be covered by the GNU General Public License.
+ */
+
+#include <asm/ppc_asm.h>
+
+	.file	"crtsavres.S"
+
+#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
+
+	.section ".text"
+
+#ifndef CONFIG_PPC64
+
+/* Routines for saving integer registers, called by the compiler.  */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer save area.  */
+
+_GLOBAL(_savegpr_14)
+_GLOBAL(_save32gpr_14)
+	stw	14,-72(11)	/* save gp registers */
+_GLOBAL(_savegpr_15)
+_GLOBAL(_save32gpr_15)
+	stw	15,-68(11)
+_GLOBAL(_savegpr_16)
+_GLOBAL(_save32gpr_16)
+	stw	16,-64(11)
+_GLOBAL(_savegpr_17)
+_GLOBAL(_save32gpr_17)
+	stw	17,-60(11)
+_GLOBAL(_savegpr_18)
+_GLOBAL(_save32gpr_18)
+	stw	18,-56(11)
+_GLOBAL(_savegpr_19)
+_GLOBAL(_save32gpr_19)
+	stw	19,-52(11)
+_GLOBAL(_savegpr_20)
+_GLOBAL(_save32gpr_20)
+	stw	20,-48(11)
+_GLOBAL(_savegpr_21)
+_GLOBAL(_save32gpr_21)
+	stw	21,-44(11)
+_GLOBAL(_savegpr_22)
+_GLOBAL(_save32gpr_22)
+	stw	22,-40(11)
+_GLOBAL(_savegpr_23)
+_GLOBAL(_save32gpr_23)
+	stw	23,-36(11)
+_GLOBAL(_savegpr_24)
+_GLOBAL(_save32gpr_24)
+	stw	24,-32(11)
+_GLOBAL(_savegpr_25)
+_GLOBAL(_save32gpr_25)
+	stw	25,-28(11)
+_GLOBAL(_savegpr_26)
+_GLOBAL(_save32gpr_26)
+	stw	26,-24(11)
+_GLOBAL(_savegpr_27)
+_GLOBAL(_save32gpr_27)
+	stw	27,-20(11)
+_GLOBAL(_savegpr_28)
+_GLOBAL(_save32gpr_28)
+	stw	28,-16(11)
+_GLOBAL(_savegpr_29)
+_GLOBAL(_save32gpr_29)
+	stw	29,-12(11)
+_GLOBAL(_savegpr_30)
+_GLOBAL(_save32gpr_30)
+	stw	30,-8(11)
+_GLOBAL(_savegpr_31)
+_GLOBAL(_save32gpr_31)
+	stw	31,-4(11)
+	blr
+
+/* Routines for restoring integer registers, called by the compiler.  */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer restore area.  */
+
+_GLOBAL(_restgpr_14)
+_GLOBAL(_rest32gpr_14)
+	lwz	14,-72(11)	/* restore gp registers */
+_GLOBAL(_restgpr_15)
+_GLOBAL(_rest32gpr_15)
+	lwz	15,-68(11)
+_GLOBAL(_restgpr_16)
+_GLOBAL(_rest32gpr_16)
+	lwz	16,-64(11)
+_GLOBAL(_restgpr_17)
+_GLOBAL(_rest32gpr_17)
+	lwz	17,-60(11)
+_GLOBAL(_restgpr_18)
+_GLOBAL(_rest32gpr_18)
+	lwz	18,-56(11)
+_GLOBAL(_restgpr_19)
+_GLOBAL(_rest32gpr_19)
+	lwz	19,-52(11)
+_GLOBAL(_restgpr_20)
+_GLOBAL(_rest32gpr_20)
+	lwz	20,-48(11)
+_GLOBAL(_restgpr_21)
+_GLOBAL(_rest32gpr_21)
+	lwz	21,-44(11)
+_GLOBAL(_restgpr_22)
+_GLOBAL(_rest32gpr_22)
+	lwz	22,-40(11)
+_GLOBAL(_restgpr_23)
+_GLOBAL(_rest32gpr_23)
+	lwz	23,-36(11)
+_GLOBAL(_restgpr_24)
+_GLOBAL(_rest32gpr_24)
+	lwz	24,-32(11)
+_GLOBAL(_restgpr_25)
+_GLOBAL(_rest32gpr_25)
+	lwz	25,-28(11)
+_GLOBAL(_restgpr_26)
+_GLOBAL(_rest32gpr_26)
+	lwz	26,-24(11)
+_GLOBAL(_restgpr_27)
+_GLOBAL(_rest32gpr_27)
+	lwz	27,-20(11)
+_GLOBAL(_restgpr_28)
+_GLOBAL(_rest32gpr_28)
+	lwz	28,-16(11)
+_GLOBAL(_restgpr_29)
+_GLOBAL(_rest32gpr_29)
+	lwz	29,-12(11)
+_GLOBAL(_restgpr_30)
+_GLOBAL(_rest32gpr_30)
+	lwz	30,-8(11)
+_GLOBAL(_restgpr_31)
+_GLOBAL(_rest32gpr_31)
+	lwz	31,-4(11)
+	blr
+
+/* Routines for restoring integer registers, called by the compiler.  */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer restore area.  */
+
+_GLOBAL(_restgpr_14_x)
+_GLOBAL(_rest32gpr_14_x)
+	lwz	14,-72(11)	/* restore gp registers */
+_GLOBAL(_restgpr_15_x)
+_GLOBAL(_rest32gpr_15_x)
+	lwz	15,-68(11)
+_GLOBAL(_restgpr_16_x)
+_GLOBAL(_rest32gpr_16_x)
+	lwz	16,-64(11)
+_GLOBAL(_restgpr_17_x)
+_GLOBAL(_rest32gpr_17_x)
+	lwz	17,-60(11)
+_GLOBAL(_restgpr_18_x)
+_GLOBAL(_rest32gpr_18_x)
+	lwz	18,-56(11)
+_GLOBAL(_restgpr_19_x)
+_GLOBAL(_rest32gpr_19_x)
+	lwz	19,-52(11)
+_GLOBAL(_restgpr_20_x)
+_GLOBAL(_rest32gpr_20_x)
+	lwz	20,-48(11)
+_GLOBAL(_restgpr_21_x)
+_GLOBAL(_rest32gpr_21_x)
+	lwz	21,-44(11)
+_GLOBAL(_restgpr_22_x)
+_GLOBAL(_rest32gpr_22_x)
+	lwz	22,-40(11)
+_GLOBAL(_restgpr_23_x)
+_GLOBAL(_rest32gpr_23_x)
+	lwz	23,-36(11)
+_GLOBAL(_restgpr_24_x)
+_GLOBAL(_rest32gpr_24_x)
+	lwz	24,-32(11)
+_GLOBAL(_restgpr_25_x)
+_GLOBAL(_rest32gpr_25_x)
+	lwz	25,-28(11)
+_GLOBAL(_restgpr_26_x)
+_GLOBAL(_rest32gpr_26_x)
+	lwz	26,-24(11)
+_GLOBAL(_restgpr_27_x)
+_GLOBAL(_rest32gpr_27_x)
+	lwz	27,-20(11)
+_GLOBAL(_restgpr_28_x)
+_GLOBAL(_rest32gpr_28_x)
+	lwz	28,-16(11)
+_GLOBAL(_restgpr_29_x)
+_GLOBAL(_rest32gpr_29_x)
+	lwz	29,-12(11)
+_GLOBAL(_restgpr_30_x)
+_GLOBAL(_rest32gpr_30_x)
+	lwz	30,-8(11)
+_GLOBAL(_restgpr_31_x)
+_GLOBAL(_rest32gpr_31_x)
+	lwz	0,4(11)
+	lwz	31,-4(11)
+	mtlr	0
+	mr	1,11
+	blr
+
+#ifdef CONFIG_ALTIVEC
+/* Called with r0 pointing just beyond the end of the vector save area.  */
+
+_GLOBAL(_savevr_20)
+	li	r11,-192
+	stvx	v20,r11,r0
+_GLOBAL(_savevr_21)
+	li	r11,-176
+	stvx	v21,r11,r0
+_GLOBAL(_savevr_22)
+	li	r11,-160
+	stvx	v22,r11,r0
+_GLOBAL(_savevr_23)
+	li	r11,-144
+	stvx	v23,r11,r0
+_GLOBAL(_savevr_24)
+	li	r11,-128
+	stvx	v24,r11,r0
+_GLOBAL(_savevr_25)
+	li	r11,-112
+	stvx	v25,r11,r0
+_GLOBAL(_savevr_26)
+	li	r11,-96
+	stvx	v26,r11,r0
+_GLOBAL(_savevr_27)
+	li	r11,-80
+	stvx	v27,r11,r0
+_GLOBAL(_savevr_28)
+	li	r11,-64
+	stvx	v28,r11,r0
+_GLOBAL(_savevr_29)
+	li	r11,-48
+	stvx	v29,r11,r0
+_GLOBAL(_savevr_30)
+	li	r11,-32
+	stvx	v30,r11,r0
+_GLOBAL(_savevr_31)
+	li	r11,-16
+	stvx	v31,r11,r0
+	blr
+
+_GLOBAL(_restvr_20)
+	li	r11,-192
+	lvx	v20,r11,r0
+_GLOBAL(_restvr_21)
+	li	r11,-176
+	lvx	v21,r11,r0
+_GLOBAL(_restvr_22)
+	li	r11,-160
+	lvx	v22,r11,r0
+_GLOBAL(_restvr_23)
+	li	r11,-144
+	lvx	v23,r11,r0
+_GLOBAL(_restvr_24)
+	li	r11,-128
+	lvx	v24,r11,r0
+_GLOBAL(_restvr_25)
+	li	r11,-112
+	lvx	v25,r11,r0
+_GLOBAL(_restvr_26)
+	li	r11,-96
+	lvx	v26,r11,r0
+_GLOBAL(_restvr_27)
+	li	r11,-80
+	lvx	v27,r11,r0
+_GLOBAL(_restvr_28)
+	li	r11,-64
+	lvx	v28,r11,r0
+_GLOBAL(_restvr_29)
+	li	r11,-48
+	lvx	v29,r11,r0
+_GLOBAL(_restvr_30)
+	li	r11,-32
+	lvx	v30,r11,r0
+_GLOBAL(_restvr_31)
+	li	r11,-16
+	lvx	v31,r11,r0
+	blr
+
+#endif /* CONFIG_ALTIVEC */
+
+#else /* CONFIG_PPC64 */
+
+.globl	_savegpr0_14
+_savegpr0_14:
+	std	r14,-144(r1)
+.globl	_savegpr0_15
+_savegpr0_15:
+	std	r15,-136(r1)
+.globl	_savegpr0_16
+_savegpr0_16:
+	std	r16,-128(r1)
+.globl	_savegpr0_17
+_savegpr0_17:
+	std	r17,-120(r1)
+.globl	_savegpr0_18
+_savegpr0_18:
+	std	r18,-112(r1)
+.globl	_savegpr0_19
+_savegpr0_19:
+	std	r19,-104(r1)
+.globl	_savegpr0_20
+_savegpr0_20:
+	std	r20,-96(r1)
+.globl	_savegpr0_21
+_savegpr0_21:
+	std	r21,-88(r1)
+.globl	_savegpr0_22
+_savegpr0_22:
+	std	r22,-80(r1)
+.globl	_savegpr0_23
+_savegpr0_23:
+	std	r23,-72(r1)
+.globl	_savegpr0_24
+_savegpr0_24:
+	std	r24,-64(r1)
+.globl	_savegpr0_25
+_savegpr0_25:
+	std	r25,-56(r1)
+.globl	_savegpr0_26
+_savegpr0_26:
+	std	r26,-48(r1)
+.globl	_savegpr0_27
+_savegpr0_27:
+	std	r27,-40(r1)
+.globl	_savegpr0_28
+_savegpr0_28:
+	std	r28,-32(r1)
+.globl	_savegpr0_29
+_savegpr0_29:
+	std	r29,-24(r1)
+.globl	_savegpr0_30
+_savegpr0_30:
+	std	r30,-16(r1)
+.globl	_savegpr0_31
+_savegpr0_31:
+	std	r31,-8(r1)
+	std	r0,16(r1)
+	blr
+
+.globl	_restgpr0_14
+_restgpr0_14:
+	ld	r14,-144(r1)
+.globl	_restgpr0_15
+_restgpr0_15:
+	ld	r15,-136(r1)
+.globl	_restgpr0_16
+_restgpr0_16:
+	ld	r16,-128(r1)
+.globl	_restgpr0_17
+_restgpr0_17:
+	ld	r17,-120(r1)
+.globl	_restgpr0_18
+_restgpr0_18:
+	ld	r18,-112(r1)
+.globl	_restgpr0_19
+_restgpr0_19:
+	ld	r19,-104(r1)
+.globl	_restgpr0_20
+_restgpr0_20:
+	ld	r20,-96(r1)
+.globl	_restgpr0_21
+_restgpr0_21:
+	ld	r21,-88(r1)
+.globl	_restgpr0_22
+_restgpr0_22:
+	ld	r22,-80(r1)
+.globl	_restgpr0_23
+_restgpr0_23:
+	ld	r23,-72(r1)
+.globl	_restgpr0_24
+_restgpr0_24:
+	ld	r24,-64(r1)
+.globl	_restgpr0_25
+_restgpr0_25:
+	ld	r25,-56(r1)
+.globl	_restgpr0_26
+_restgpr0_26:
+	ld	r26,-48(r1)
+.globl	_restgpr0_27
+_restgpr0_27:
+	ld	r27,-40(r1)
+.globl	_restgpr0_28
+_restgpr0_28:
+	ld	r28,-32(r1)
+.globl	_restgpr0_29
+_restgpr0_29:
+	ld	r0,16(r1)
+	ld	r29,-24(r1)
+	mtlr	r0
+	ld	r30,-16(r1)
+	ld	r31,-8(r1)
+	blr
+
+.globl	_restgpr0_30
+_restgpr0_30:
+	ld	r30,-16(r1)
+.globl	_restgpr0_31
+_restgpr0_31:
+	ld	r0,16(r1)
+	ld	r31,-8(r1)
+	mtlr	r0
+	blr
+
+#ifdef CONFIG_ALTIVEC
+/* Called with r0 pointing just beyond the end of the vector save area.  */
+
+.globl	_savevr_20
+_savevr_20:
+	li	r12,-192
+	stvx	v20,r12,r0
+.globl	_savevr_21
+_savevr_21:
+	li	r12,-176
+	stvx	v21,r12,r0
+.globl	_savevr_22
+_savevr_22:
+	li	r12,-160
+	stvx	v22,r12,r0
+.globl	_savevr_23
+_savevr_23:
+	li	r12,-144
+	stvx	v23,r12,r0
+.globl	_savevr_24
+_savevr_24:
+	li	r12,-128
+	stvx	v24,r12,r0
+.globl	_savevr_25
+_savevr_25:
+	li	r12,-112
+	stvx	v25,r12,r0
+.globl	_savevr_26
+_savevr_26:
+	li	r12,-96
+	stvx	v26,r12,r0
+.globl	_savevr_27
+_savevr_27:
+	li	r12,-80
+	stvx	v27,r12,r0
+.globl	_savevr_28
+_savevr_28:
+	li	r12,-64
+	stvx	v28,r12,r0
+.globl	_savevr_29
+_savevr_29:
+	li	r12,-48
+	stvx	v29,r12,r0
+.globl	_savevr_30
+_savevr_30:
+	li	r12,-32
+	stvx	v30,r12,r0
+.globl	_savevr_31
+_savevr_31:
+	li	r12,-16
+	stvx	v31,r12,r0
+	blr
+
+.globl	_restvr_20
+_restvr_20:
+	li	r12,-192
+	lvx	v20,r12,r0
+.globl	_restvr_21
+_restvr_21:
+	li	r12,-176
+	lvx	v21,r12,r0
+.globl	_restvr_22
+_restvr_22:
+	li	r12,-160
+	lvx	v22,r12,r0
+.globl	_restvr_23
+_restvr_23:
+	li	r12,-144
+	lvx	v23,r12,r0
+.globl	_restvr_24
+_restvr_24:
+	li	r12,-128
+	lvx	v24,r12,r0
+.globl	_restvr_25
+_restvr_25:
+	li	r12,-112
+	lvx	v25,r12,r0
+.globl	_restvr_26
+_restvr_26:
+	li	r12,-96
+	lvx	v26,r12,r0
+.globl	_restvr_27
+_restvr_27:
+	li	r12,-80
+	lvx	v27,r12,r0
+.globl	_restvr_28
+_restvr_28:
+	li	r12,-64
+	lvx	v28,r12,r0
+.globl	_restvr_29
+_restvr_29:
+	li	r12,-48
+	lvx	v29,r12,r0
+.globl	_restvr_30
+_restvr_30:
+	li	r12,-32
+	lvx	v30,r12,r0
+.globl	_restvr_31
+_restvr_31:
+	li	r12,-16
+	lvx	v31,r12,r0
+	blr
+
+#endif /* CONFIG_ALTIVEC */
+
+#endif /* CONFIG_PPC64 */
+
+#endif
diff --git a/arch/powerpc/lib/div64.S b/arch/powerpc/lib/div64.S
new file mode 100644
index 0000000000..3d5426e7dc
--- /dev/null
+++ b/arch/powerpc/lib/div64.S
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Divide a 64-bit unsigned number by a 32-bit unsigned number.
+ * This routine assumes that the top 32 bits of the dividend are
+ * non-zero to start with.
+ * On entry, r3 points to the dividend, which get overwritten with
+ * the 64-bit quotient, and r4 contains the divisor.
+ * On exit, r3 contains the remainder.
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ */
+#include <asm/ppc_asm.h>
+#include <asm/processor.h>
+
+_GLOBAL(__div64_32)
+	lwz	r5,0(r3)	# get the dividend into r5/r6
+	lwz	r6,4(r3)
+	cmplw	r5,r4
+	li	r7,0
+	li	r8,0
+	blt	1f
+	divwu	r7,r5,r4	# if dividend.hi >= divisor,
+	mullw	r0,r7,r4	# quotient.hi = dividend.hi / divisor
+	subf.	r5,r0,r5	# dividend.hi %= divisor
+	beq	3f
+1:	mr	r11,r5		# here dividend.hi != 0
+	andis.	r0,r5,0xc000
+	bne	2f
+	cntlzw	r0,r5		# we are shifting the dividend right
+	li	r10,-1		# to make it < 2^32, and shifting
+	srw	r10,r10,r0	# the divisor right the same amount,
+	addc	r9,r4,r10	# rounding up (so the estimate cannot
+	andc	r11,r6,r10	# ever be too large, only too small)
+	andc	r9,r9,r10
+	addze	r9,r9
+	or	r11,r5,r11
+	rotlw	r9,r9,r0
+	rotlw	r11,r11,r0
+	divwu	r11,r11,r9	# then we divide the shifted quantities
+2:	mullw	r10,r11,r4	# to get an estimate of the quotient,
+	mulhwu	r9,r11,r4	# multiply the estimate by the divisor,
+	subfc	r6,r10,r6	# take the product from the divisor,
+	add	r8,r8,r11	# and add the estimate to the accumulated
+	subfe.	r5,r9,r5	# quotient
+	bne	1b
+3:	cmplw	r6,r4
+	blt	4f
+	divwu	r0,r6,r4	# perform the remaining 32-bit division
+	mullw	r10,r0,r4	# and get the remainder
+	add	r8,r8,r0
+	subf	r6,r10,r6
+4:	stw	r7,0(r3)	# return the quotient in *r3
+	stw	r8,4(r3)
+	mr	r3,r6		# return the remainder in r3
+	blr
diff --git a/arch/powerpc/lib/error-inject.c b/arch/powerpc/lib/error-inject.c
new file mode 100644
index 0000000000..e834079d2b
--- /dev/null
+++ b/arch/powerpc/lib/error-inject.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/error-injection.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+
+void override_function_with_return(struct pt_regs *regs)
+{
+	/*
+	 * Emulate 'blr'. 'regs' represents the state on entry of a predefined
+	 * function in the kernel/module, captured on a kprobe. We don't need
+	 * to worry about 32-bit userspace on a 64-bit kernel.
+	 */
+	regs_set_return_ip(regs, regs->link);
+}
+NOKPROBE_SYMBOL(override_function_with_return);
diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S
new file mode 100644
index 0000000000..480172fbd0
--- /dev/null
+++ b/arch/powerpc/lib/feature-fixups-test.S
@@ -0,0 +1,862 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2008 Michael Ellerman, IBM Corporation.
+ */
+
+#include <asm/feature-fixups.h>
+#include <asm/ppc_asm.h>
+#include <asm/synch.h>
+#include <asm/asm-compat.h>
+#include <asm/ppc-opcode.h>
+
+	.text
+
+#define globl(x)		\
+	.globl x;	\
+x:
+
+globl(ftr_fixup_test1)
+	or	1,1,1
+	or	2,2,2	/* fixup will nop out this instruction */
+	or	3,3,3
+
+globl(end_ftr_fixup_test1)
+
+globl(ftr_fixup_test1_orig)
+	or	1,1,1
+	or	2,2,2
+	or	3,3,3
+
+globl(ftr_fixup_test1_expected)
+	or	1,1,1
+	nop
+	or	3,3,3
+
+globl(ftr_fixup_test2)
+	or	1,1,1
+	or	2,2,2	/* fixup will replace this with ftr_fixup_test2_alt */
+	or	3,3,3
+
+globl(end_ftr_fixup_test2)
+
+globl(ftr_fixup_test2_orig)
+	or	1,1,1
+	or	2,2,2
+	or	3,3,3
+
+globl(ftr_fixup_test2_alt)
+	or	31,31,31
+
+globl(ftr_fixup_test2_expected)
+	or	1,1,1
+	or	31,31,31
+	or	3,3,3
+
+globl(ftr_fixup_test3)
+	or	1,1,1
+	or	2,2,2	/* fixup will fail to replace this */
+	or	3,3,3
+
+globl(end_ftr_fixup_test3)
+
+globl(ftr_fixup_test3_orig)
+	or	1,1,1
+	or	2,2,2
+	or	3,3,3
+
+globl(ftr_fixup_test3_alt)
+	or	31,31,31
+	or	31,31,31
+
+globl(ftr_fixup_test4)
+	or	1,1,1
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+	or	3,3,3
+
+globl(end_ftr_fixup_test4)
+
+globl(ftr_fixup_test4_expected)
+	or	1,1,1
+	or	31,31,31
+	or	31,31,31
+	nop
+	nop
+	or	3,3,3
+
+globl(ftr_fixup_test4_orig)
+	or	1,1,1
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+	or	3,3,3
+
+globl(ftr_fixup_test4_alt)
+	or	31,31,31
+	or	31,31,31
+
+
+globl(ftr_fixup_test5)
+	or	1,1,1
+BEGIN_FTR_SECTION
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+FTR_SECTION_ELSE
+2:	b	3f
+3:	or	5,5,5
+	beq	3b
+	b	1f
+	or	6,6,6
+	b	2b
+1:	bdnz	3b
+ALT_FTR_SECTION_END(0, 1)
+	or	1,1,1
+
+globl(end_ftr_fixup_test5)
+
+globl(ftr_fixup_test5_expected)
+	or	1,1,1
+2:	b	3f
+3:	or	5,5,5
+	beq	3b
+	b	1f
+	or	6,6,6
+	b	2b
+1:	bdnz	3b
+	or	1,1,1
+
+globl(ftr_fixup_test6)
+1:	or	1,1,1
+BEGIN_FTR_SECTION
+	or	5,5,5
+2:	PPC_LCMPI	r3,0
+	beq	4f
+	blt	2b
+	b	1b
+	b	4f
+FTR_SECTION_ELSE
+2:	or	2,2,2
+	PPC_LCMPI	r3,1
+	beq	3f
+	blt	2b
+	b	3f
+	b	1b
+ALT_FTR_SECTION_END(0, 1)
+3:	or	1,1,1
+	or	2,2,2
+4:	or	3,3,3
+
+globl(end_ftr_fixup_test6)
+
+globl(ftr_fixup_test6_expected)
+1:	or	1,1,1
+2:	or	2,2,2
+	PPC_LCMPI	r3,1
+	beq	3f
+	blt	2b
+	b	3f
+	b	1b
+3:	or	1,1,1
+	or	2,2,2
+	or	3,3,3
+
+globl(ftr_fixup_test7)
+	or	1,1,1
+BEGIN_FTR_SECTION
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+FTR_SECTION_ELSE
+2:	b	3f
+3:	or	5,5,5
+	beq	3b
+	b	1f
+	or	6,6,6
+	b	2b
+	bdnz	3b
+1:
+ALT_FTR_SECTION_END(0, 1)
+	or	1,1,1
+	or	1,1,1
+
+globl(end_ftr_fixup_test7)
+	nop
+
+globl(ftr_fixup_test7_expected)
+	or	1,1,1
+2:	b	3f
+3:	or	5,5,5
+	beq	3b
+	b	1f
+	or	6,6,6
+	b	2b
+	bdnz	3b
+1:	or	1,1,1
+
+#if 0
+/* Test that if we have a larger else case the assembler spots it and
+ * reports an error. #if 0'ed so as not to break the build normally.
+ */
+ftr_fixup_test_too_big:
+	or	1,1,1
+BEGIN_FTR_SECTION
+	or	2,2,2
+	or	2,2,2
+	or	2,2,2
+FTR_SECTION_ELSE
+	or	3,3,3
+	or	3,3,3
+	or	3,3,3
+	or	3,3,3
+ALT_FTR_SECTION_END(0, 1)
+	or	1,1,1
+#endif
+
+#define	MAKE_MACRO_TEST(TYPE)						\
+globl(ftr_fixup_test_ ##TYPE##_macros)					\
+	or	1,1,1;							\
+	/* Basic test, this section should all be nop'ed */		\
+BEGIN_##TYPE##_SECTION							\
+	or	2,2,2;							\
+	or	2,2,2;							\
+	or	2,2,2;							\
+END_##TYPE##_SECTION(0, 1)						\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Basic test, this section should NOT be nop'ed */		\
+BEGIN_##TYPE##_SECTION							\
+	or	2,2,2;							\
+	or	2,2,2;							\
+	or	2,2,2;							\
+END_##TYPE##_SECTION(0, 0)						\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Nesting test, inner section should be nop'ed */		\
+BEGIN_##TYPE##_SECTION							\
+	or	2,2,2;							\
+	or	2,2,2;							\
+BEGIN_##TYPE##_SECTION_NESTED(80)					\
+	or	3,3,3;							\
+	or	3,3,3;							\
+END_##TYPE##_SECTION_NESTED(0, 1, 80)					\
+	or	2,2,2;							\
+	or	2,2,2;							\
+END_##TYPE##_SECTION(0, 0)						\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Nesting test, whole section should be nop'ed */		\
+BEGIN_##TYPE##_SECTION							\
+	or	2,2,2;							\
+	or	2,2,2;							\
+BEGIN_##TYPE##_SECTION_NESTED(80)					\
+	or	3,3,3;							\
+	or	3,3,3;							\
+END_##TYPE##_SECTION_NESTED(0, 0, 80)					\
+	or	2,2,2;							\
+	or	2,2,2;							\
+END_##TYPE##_SECTION(0, 1)						\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Nesting test, none should be nop'ed */			\
+BEGIN_##TYPE##_SECTION							\
+	or	2,2,2;							\
+	or	2,2,2;							\
+BEGIN_##TYPE##_SECTION_NESTED(80)					\
+	or	3,3,3;							\
+	or	3,3,3;							\
+END_##TYPE##_SECTION_NESTED(0, 0, 80)					\
+	or	2,2,2;							\
+	or	2,2,2;							\
+END_##TYPE##_SECTION(0, 0)						\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Basic alt section test, default case should be taken */	\
+BEGIN_##TYPE##_SECTION							\
+	or	3,3,3;							\
+	or	3,3,3;							\
+	or	3,3,3;							\
+##TYPE##_SECTION_ELSE							\
+	or	5,5,5;							\
+	or	5,5,5;							\
+ALT_##TYPE##_SECTION_END(0, 0)						\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Basic alt section test, else case should be taken */		\
+BEGIN_##TYPE##_SECTION							\
+	or	3,3,3;							\
+	or	3,3,3;							\
+	or	3,3,3;							\
+##TYPE##_SECTION_ELSE							\
+	or	31,31,31;						\
+	or	31,31,31;						\
+	or	31,31,31;						\
+ALT_##TYPE##_SECTION_END(0, 1)						\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Alt with smaller else case, should be padded with nops */	\
+BEGIN_##TYPE##_SECTION							\
+	or	3,3,3;							\
+	or	3,3,3;							\
+	or	3,3,3;							\
+##TYPE##_SECTION_ELSE							\
+	or	31,31,31;						\
+ALT_##TYPE##_SECTION_END(0, 1)						\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Alt section with nested section in default case */		\
+	/* Default case should be taken, with nop'ed inner section */	\
+BEGIN_##TYPE##_SECTION							\
+	or	3,3,3;							\
+BEGIN_##TYPE##_SECTION_NESTED(95)					\
+	or	3,3,3;							\
+	or	3,3,3;							\
+END_##TYPE##_SECTION_NESTED(0, 1, 95)					\
+	or	3,3,3;							\
+##TYPE##_SECTION_ELSE							\
+	or	2,2,2;							\
+	or	2,2,2;							\
+ALT_##TYPE##_SECTION_END(0, 0)						\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Alt section with nested section in else, default taken */	\
+BEGIN_##TYPE##_SECTION							\
+	or	3,3,3;							\
+	or	3,3,3;							\
+	or	3,3,3;							\
+##TYPE##_SECTION_ELSE							\
+	or	5,5,5;							\
+BEGIN_##TYPE##_SECTION_NESTED(95)					\
+	or	3,3,3;							\
+END_##TYPE##_SECTION_NESTED(0, 1, 95)					\
+	or	5,5,5;							\
+ALT_##TYPE##_SECTION_END(0, 0)						\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Alt section with nested section in else, else taken & nop */	\
+BEGIN_##TYPE##_SECTION							\
+	or	3,3,3;							\
+	or	3,3,3;							\
+	or	3,3,3;							\
+##TYPE##_SECTION_ELSE							\
+	or	5,5,5;							\
+BEGIN_##TYPE##_SECTION_NESTED(95)					\
+	or	3,3,3;							\
+END_##TYPE##_SECTION_NESTED(0, 1, 95)					\
+	or	5,5,5;							\
+ALT_##TYPE##_SECTION_END(0, 1)						\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Feature section with nested alt section, default taken */	\
+BEGIN_##TYPE##_SECTION							\
+	or	2,2,2;							\
+BEGIN_##TYPE##_SECTION_NESTED(95)					\
+	or	1,1,1;							\
+##TYPE##_SECTION_ELSE_NESTED(95)					\
+	or	5,5,5;							\
+ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95)				\
+	or	2,2,2;							\
+END_##TYPE##_SECTION(0, 0)						\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Feature section with nested alt section, else taken */	\
+BEGIN_##TYPE##_SECTION							\
+	or	2,2,2;							\
+BEGIN_##TYPE##_SECTION_NESTED(95)					\
+	or	1,1,1;							\
+##TYPE##_SECTION_ELSE_NESTED(95)					\
+	or	5,5,5;							\
+ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95)				\
+	or	2,2,2;							\
+END_##TYPE##_SECTION(0, 0)						\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Feature section with nested alt section, all nop'ed */	\
+BEGIN_##TYPE##_SECTION							\
+	or	2,2,2;							\
+BEGIN_##TYPE##_SECTION_NESTED(95)					\
+	or	1,1,1;							\
+##TYPE##_SECTION_ELSE_NESTED(95)					\
+	or	5,5,5;							\
+ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95)				\
+	or	2,2,2;							\
+END_##TYPE##_SECTION(0, 1)						\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Nested alt sections, default with inner default taken */	\
+BEGIN_##TYPE##_SECTION							\
+	or	2,2,2;							\
+BEGIN_##TYPE##_SECTION_NESTED(95)					\
+	or	1,1,1;							\
+##TYPE##_SECTION_ELSE_NESTED(95)					\
+	or	5,5,5;							\
+ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95)				\
+	or	2,2,2;							\
+##TYPE##_SECTION_ELSE							\
+	or	31,31,31;						\
+BEGIN_##TYPE##_SECTION_NESTED(94)					\
+	or	5,5,5;							\
+##TYPE##_SECTION_ELSE_NESTED(94)					\
+	or	1,1,1;							\
+ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94)				\
+	or	31,31,31;						\
+ALT_##TYPE##_SECTION_END(0, 0)						\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Nested alt sections, default with inner else taken */	\
+BEGIN_##TYPE##_SECTION							\
+	or	2,2,2;							\
+BEGIN_##TYPE##_SECTION_NESTED(95)					\
+	or	1,1,1;							\
+##TYPE##_SECTION_ELSE_NESTED(95)					\
+	or	5,5,5;							\
+ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95)				\
+	or	2,2,2;							\
+##TYPE##_SECTION_ELSE							\
+	or	31,31,31;						\
+BEGIN_##TYPE##_SECTION_NESTED(94)					\
+	or	5,5,5;							\
+##TYPE##_SECTION_ELSE_NESTED(94)					\
+	or	1,1,1;							\
+ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94)				\
+	or	31,31,31;						\
+ALT_##TYPE##_SECTION_END(0, 0)						\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Nested alt sections, else with inner default taken */	\
+BEGIN_##TYPE##_SECTION							\
+	or	2,2,2;							\
+BEGIN_##TYPE##_SECTION_NESTED(95)					\
+	or	1,1,1;							\
+##TYPE##_SECTION_ELSE_NESTED(95)					\
+	or	5,5,5;							\
+ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95)				\
+	or	2,2,2;							\
+##TYPE##_SECTION_ELSE							\
+	or	31,31,31;						\
+BEGIN_##TYPE##_SECTION_NESTED(94)					\
+	or	5,5,5;							\
+##TYPE##_SECTION_ELSE_NESTED(94)					\
+	or	1,1,1;							\
+ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94)				\
+	or	31,31,31;						\
+ALT_##TYPE##_SECTION_END(0, 1)						\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Nested alt sections, else with inner else taken */		\
+BEGIN_##TYPE##_SECTION							\
+	or	2,2,2;							\
+BEGIN_##TYPE##_SECTION_NESTED(95)					\
+	or	1,1,1;							\
+##TYPE##_SECTION_ELSE_NESTED(95)					\
+	or	5,5,5;							\
+ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95)				\
+	or	2,2,2;							\
+##TYPE##_SECTION_ELSE							\
+	or	31,31,31;						\
+BEGIN_##TYPE##_SECTION_NESTED(94)					\
+	or	5,5,5;							\
+##TYPE##_SECTION_ELSE_NESTED(94)					\
+	or	1,1,1;							\
+ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94)				\
+	or	31,31,31;						\
+ALT_##TYPE##_SECTION_END(0, 1)						\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Nested alt sections, else can have large else case */	\
+BEGIN_##TYPE##_SECTION							\
+	or	2,2,2;							\
+	or	2,2,2;							\
+	or	2,2,2;							\
+	or	2,2,2;							\
+##TYPE##_SECTION_ELSE 							\
+BEGIN_##TYPE##_SECTION_NESTED(94) 					\
+	or	5,5,5;							\
+	or	5,5,5;							\
+	or	5,5,5;							\
+	or	5,5,5;							\
+##TYPE##_SECTION_ELSE_NESTED(94) 					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	or	1,1,1;							\
+ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94)				\
+ALT_##TYPE##_SECTION_END(0, 1)						\
+	or	1,1,1;							\
+	or	1,1,1;
+
+#define	MAKE_MACRO_TEST_EXPECTED(TYPE)					\
+globl(ftr_fixup_test_ ##TYPE##_macros_expected)				\
+	or	1,1,1;							\
+	/* Basic test, this section should all be nop'ed */		\
+/* BEGIN_##TYPE##_SECTION */						\
+	nop;								\
+	nop;								\
+	nop;								\
+/* END_##TYPE##_SECTION(0, 1) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Basic test, this section should NOT be nop'ed */		\
+/* BEGIN_##TYPE##_SECTION */						\
+	or	2,2,2;							\
+	or	2,2,2;							\
+	or	2,2,2;							\
+/* END_##TYPE##_SECTION(0, 0) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Nesting test, inner section should be nop'ed */		\
+/* BEGIN_##TYPE##_SECTION */						\
+	or	2,2,2;							\
+	or	2,2,2;							\
+/* BEGIN_##TYPE##_SECTION_NESTED(80) */					\
+	nop;								\
+	nop;								\
+/* END_##TYPE##_SECTION_NESTED(0, 1, 80) */				\
+	or	2,2,2;							\
+	or	2,2,2;							\
+/* END_##TYPE##_SECTION(0, 0) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Nesting test, whole section should be nop'ed */		\
+	/* NB. inner section is not nop'ed, but then entire outer is */	\
+/* BEGIN_##TYPE##_SECTION */						\
+	nop;								\
+	nop;								\
+/* BEGIN_##TYPE##_SECTION_NESTED(80) */					\
+	nop;								\
+	nop;								\
+/* END_##TYPE##_SECTION_NESTED(0, 0, 80) */				\
+	nop;								\
+	nop;								\
+/* END_##TYPE##_SECTION(0, 1) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Nesting test, none should be nop'ed */			\
+/* BEGIN_##TYPE##_SECTION */						\
+	or	2,2,2;							\
+	or	2,2,2;							\
+/* BEGIN_##TYPE##_SECTION_NESTED(80) */					\
+	or	3,3,3;							\
+	or	3,3,3;							\
+/* END_##TYPE##_SECTION_NESTED(0, 0, 80) */				\
+	or	2,2,2;							\
+	or	2,2,2;							\
+/* END_##TYPE##_SECTION(0, 0) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Basic alt section test, default case should be taken */	\
+/* BEGIN_##TYPE##_SECTION */						\
+	or	3,3,3;							\
+	or	3,3,3;							\
+	or	3,3,3;							\
+/* ##TYPE##_SECTION_ELSE */						\
+	/* or	5,5,5; */						\
+	/* or	5,5,5; */						\
+/* ALT_##TYPE##_SECTION_END(0, 0) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Basic alt section test, else case should be taken */		\
+/* BEGIN_##TYPE##_SECTION */						\
+	/* or	3,3,3; */						\
+	/* or	3,3,3; */						\
+	/* or	3,3,3; */						\
+/* ##TYPE##_SECTION_ELSE */						\
+	or	31,31,31;						\
+	or	31,31,31;						\
+	or	31,31,31;						\
+/* ALT_##TYPE##_SECTION_END(0, 1) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Alt with smaller else case, should be padded with nops */	\
+/* BEGIN_##TYPE##_SECTION */						\
+	/* or	3,3,3; */						\
+	/* or	3,3,3; */						\
+	/* or	3,3,3; */						\
+/* ##TYPE##_SECTION_ELSE */						\
+	or	31,31,31;						\
+	nop;								\
+	nop;								\
+/* ALT_##TYPE##_SECTION_END(0, 1) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Alt section with nested section in default case */		\
+	/* Default case should be taken, with nop'ed inner section */	\
+/* BEGIN_##TYPE##_SECTION */						\
+	or	3,3,3;							\
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
+	nop;								\
+	nop;								\
+/* END_##TYPE##_SECTION_NESTED(0, 1, 95) */				\
+	or	3,3,3;							\
+/* ##TYPE##_SECTION_ELSE */						\
+	/* or	2,2,2; */						\
+	/* or	2,2,2; */						\
+/* ALT_##TYPE##_SECTION_END(0, 0) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Alt section with nested section in else, default taken */	\
+/* BEGIN_##TYPE##_SECTION */						\
+	or	3,3,3;							\
+	or	3,3,3;							\
+	or	3,3,3;							\
+/* ##TYPE##_SECTION_ELSE */						\
+	/* or	5,5,5; */						\
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
+	/* or	3,3,3; */						\
+/* END_##TYPE##_SECTION_NESTED(0, 1, 95) */				\
+	/* or	5,5,5; */						\
+/* ALT_##TYPE##_SECTION_END(0, 0) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Alt section with nested section in else, else taken & nop */	\
+/* BEGIN_##TYPE##_SECTION */						\
+	/* or	3,3,3; */						\
+	/* or	3,3,3; */						\
+	/* or	3,3,3; */						\
+/* ##TYPE##_SECTION_ELSE */						\
+	or	5,5,5;							\
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
+	nop;								\
+/* END_##TYPE##_SECTION_NESTED(0, 1, 95) */				\
+	or	5,5,5;							\
+/* ALT_##TYPE##_SECTION_END(0, 1) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Feature section with nested alt section, default taken */	\
+/* BEGIN_##TYPE##_SECTION */						\
+	or	2,2,2;							\
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
+	or	1,1,1;							\
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */					\
+	/* or	5,5,5; */						\
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) */				\
+	or	2,2,2;							\
+/* END_##TYPE##_SECTION(0, 0) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Feature section with nested alt section, else taken */	\
+/* BEGIN_##TYPE##_SECTION */						\
+	or	2,2,2;							\
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
+	/* or	1,1,1; */						\
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */					\
+	or	5,5,5;							\
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */				\
+	or	2,2,2;							\
+/* END_##TYPE##_SECTION(0, 0) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Feature section with nested alt section, all nop'ed */	\
+/* BEGIN_##TYPE##_SECTION */						\
+	nop;								\
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
+	nop;								\
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */					\
+	/* or	5,5,5; */						\
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) */				\
+	nop;								\
+/* END_##TYPE##_SECTION(0, 1) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Nested alt sections, default with inner default taken */	\
+/* BEGIN_##TYPE##_SECTION */						\
+	or	2,2,2;							\
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
+	or	1,1,1;							\
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */					\
+	/* or	5,5,5; */						\
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) */				\
+	or	2,2,2;							\
+/* ##TYPE##_SECTION_ELSE */						\
+	/* or	31,31,31; */						\
+/* BEGIN_##TYPE##_SECTION_NESTED(94) */					\
+	/* or	5,5,5; */						\
+/* ##TYPE##_SECTION_ELSE_NESTED(94) */					\
+	/* or	1,1,1; */						\
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) */				\
+	/* or	31,31,31; */						\
+/* ALT_##TYPE##_SECTION_END(0, 0) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Nested alt sections, default with inner else taken */	\
+/* BEGIN_##TYPE##_SECTION */						\
+	or	2,2,2;							\
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
+	/* or	1,1,1; */						\
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */					\
+	or	5,5,5;							\
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */				\
+	or	2,2,2;							\
+/* ##TYPE##_SECTION_ELSE */						\
+	/* or	31,31,31; */						\
+/* BEGIN_##TYPE##_SECTION_NESTED(94) */					\
+	/* or	5,5,5; */						\
+/* ##TYPE##_SECTION_ELSE_NESTED(94) */					\
+	/* or	1,1,1; */						\
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) */				\
+	/* or	31,31,31; */						\
+/* ALT_##TYPE##_SECTION_END(0, 0) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Nested alt sections, else with inner default taken */	\
+/* BEGIN_##TYPE##_SECTION */						\
+	/* or	2,2,2; */						\
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
+	/* or	1,1,1; */						\
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */					\
+	/* or	5,5,5; */						\
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */				\
+	/* or	2,2,2; */						\
+/* ##TYPE##_SECTION_ELSE */						\
+	or	31,31,31;						\
+/* BEGIN_##TYPE##_SECTION_NESTED(94) */					\
+	or	5,5,5;							\
+/* ##TYPE##_SECTION_ELSE_NESTED(94) */					\
+	/* or	1,1,1; */						\
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) */				\
+	or	31,31,31;						\
+/* ALT_##TYPE##_SECTION_END(0, 1) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Nested alt sections, else with inner else taken */		\
+/* BEGIN_##TYPE##_SECTION */						\
+	/* or	2,2,2; */						\
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
+	/* or	1,1,1; */						\
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */					\
+	/* or	5,5,5; */						\
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */				\
+	/* or	2,2,2; */						\
+/* ##TYPE##_SECTION_ELSE */						\
+	or	31,31,31;						\
+/* BEGIN_##TYPE##_SECTION_NESTED(94) */					\
+	/* or	5,5,5; */						\
+/* ##TYPE##_SECTION_ELSE_NESTED(94) */					\
+	or	1,1,1;							\
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) */				\
+	or	31,31,31;						\
+/* ALT_##TYPE##_SECTION_END(0, 1) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	/* Nested alt sections, else can have large else case */	\
+/* BEGIN_##TYPE##_SECTION */						\
+	/* or	2,2,2; */						\
+	/* or	2,2,2; */						\
+	/* or	2,2,2; */						\
+	/* or	2,2,2; */						\
+/* ##TYPE##_SECTION_ELSE */						\
+/* BEGIN_##TYPE##_SECTION_NESTED(94) */					\
+	/* or	5,5,5; */						\
+	/* or	5,5,5; */						\
+	/* or	5,5,5; */						\
+	/* or	5,5,5; */						\
+/* ##TYPE##_SECTION_ELSE_NESTED(94) */					\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	or	1,1,1;							\
+	or	1,1,1;							\
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) */				\
+/* ALT_##TYPE##_SECTION_END(0, 1) */					\
+	or	1,1,1;							\
+	or	1,1,1;
+
+MAKE_MACRO_TEST(FTR);
+MAKE_MACRO_TEST_EXPECTED(FTR);
+
+#ifdef CONFIG_PPC64
+MAKE_MACRO_TEST(FW_FTR);
+MAKE_MACRO_TEST_EXPECTED(FW_FTR);
+#endif
+
+globl(lwsync_fixup_test)
+1:	or	1,1,1
+	LWSYNC
+globl(end_lwsync_fixup_test)
+
+globl(lwsync_fixup_test_expected_LWSYNC)
+1:	or	1,1,1
+	lwsync
+
+globl(lwsync_fixup_test_expected_SYNC)
+1:	or	1,1,1
+	sync
+
+globl(ftr_fixup_prefix1)
+	or	1,1,1
+	.long OP_PREFIX << 26
+	.long 0x0000000
+	or	2,2,2
+globl(end_ftr_fixup_prefix1)
+
+globl(ftr_fixup_prefix1_orig)
+	or	1,1,1
+	.long OP_PREFIX << 26
+	.long 0x0000000
+	or	2,2,2
+
+globl(ftr_fixup_prefix1_expected)
+	or	1,1,1
+	nop
+	nop
+	or	2,2,2
+
+globl(ftr_fixup_prefix2)
+	or	1,1,1
+	.long OP_PREFIX << 26
+	.long 0x0000000
+	or	2,2,2
+globl(end_ftr_fixup_prefix2)
+
+globl(ftr_fixup_prefix2_orig)
+	or	1,1,1
+	.long OP_PREFIX << 26
+	.long 0x0000000
+	or	2,2,2
+
+globl(ftr_fixup_prefix2_alt)
+	.long OP_PREFIX << 26
+	.long 0x0000001
+
+globl(ftr_fixup_prefix2_expected)
+	or	1,1,1
+	.long OP_PREFIX << 26
+	.long 0x0000001
+	or	2,2,2
+
+globl(ftr_fixup_prefix3)
+	or	1,1,1
+	.long OP_PREFIX << 26
+	.long 0x0000000
+	or	2,2,2
+	or	3,3,3
+globl(end_ftr_fixup_prefix3)
+
+globl(ftr_fixup_prefix3_orig)
+	or	1,1,1
+	.long OP_PREFIX << 26
+	.long 0x0000000
+	or	2,2,2
+	or	3,3,3
+
+globl(ftr_fixup_prefix3_alt)
+	.long OP_PREFIX << 26
+	.long 0x0000001
+	nop
+
+globl(ftr_fixup_prefix3_expected)
+	or	1,1,1
+	.long OP_PREFIX << 26
+	.long 0x0000001
+	nop
+	or	3,3,3
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
new file mode 100644
index 0000000000..4f82581ca2
--- /dev/null
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -0,0 +1,1012 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  Modifications for ppc64:
+ *      Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ *
+ *  Copyright 2008 Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/sched/mm.h>
+#include <linux/stop_machine.h>
+#include <asm/cputable.h>
+#include <asm/code-patching.h>
+#include <asm/interrupt.h>
+#include <asm/page.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/security_features.h>
+#include <asm/firmware.h>
+#include <asm/inst.h>
+
+struct fixup_entry {
+	unsigned long	mask;
+	unsigned long	value;
+	long		start_off;
+	long		end_off;
+	long		alt_start_off;
+	long		alt_end_off;
+};
+
+static u32 *calc_addr(struct fixup_entry *fcur, long offset)
+{
+	/*
+	 * We store the offset to the code as a negative offset from
+	 * the start of the alt_entry, to support the VDSO. This
+	 * routine converts that back into an actual address.
+	 */
+	return (u32 *)((unsigned long)fcur + offset);
+}
+
+static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_end)
+{
+	int err;
+	ppc_inst_t instr;
+
+	instr = ppc_inst_read(src);
+
+	if (instr_is_relative_branch(ppc_inst_read(src))) {
+		u32 *target = (u32 *)branch_target(src);
+
+		/* Branch within the section doesn't need translating */
+		if (target < alt_start || target > alt_end) {
+			err = translate_branch(&instr, dest, src);
+			if (err)
+				return 1;
+		}
+	}
+
+	raw_patch_instruction(dest, instr);
+
+	return 0;
+}
+
+static int patch_feature_section_mask(unsigned long value, unsigned long mask,
+				      struct fixup_entry *fcur)
+{
+	u32 *start, *end, *alt_start, *alt_end, *src, *dest;
+
+	start = calc_addr(fcur, fcur->start_off);
+	end = calc_addr(fcur, fcur->end_off);
+	alt_start = calc_addr(fcur, fcur->alt_start_off);
+	alt_end = calc_addr(fcur, fcur->alt_end_off);
+
+	if ((alt_end - alt_start) > (end - start))
+		return 1;
+
+	if ((value & fcur->mask & mask) == (fcur->value & mask))
+		return 0;
+
+	src = alt_start;
+	dest = start;
+
+	for (; src < alt_end; src = ppc_inst_next(src, src),
+			      dest = ppc_inst_next(dest, dest)) {
+		if (patch_alt_instruction(src, dest, alt_start, alt_end))
+			return 1;
+	}
+
+	for (; dest < end; dest++)
+		raw_patch_instruction(dest, ppc_inst(PPC_RAW_NOP()));
+
+	return 0;
+}
+
+static void do_feature_fixups_mask(unsigned long value, unsigned long mask,
+				   void *fixup_start, void *fixup_end)
+{
+	struct fixup_entry *fcur, *fend;
+
+	fcur = fixup_start;
+	fend = fixup_end;
+
+	for (; fcur < fend; fcur++) {
+		if (patch_feature_section_mask(value, mask, fcur)) {
+			WARN_ON(1);
+			printk("Unable to patch feature section at %p - %p" \
+				" with %p - %p\n",
+				calc_addr(fcur, fcur->start_off),
+				calc_addr(fcur, fcur->end_off),
+				calc_addr(fcur, fcur->alt_start_off),
+				calc_addr(fcur, fcur->alt_end_off));
+		}
+	}
+}
+
+void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
+{
+	do_feature_fixups_mask(value, ~0, fixup_start, fixup_end);
+}
+
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+static bool is_fixup_addr_valid(void *dest, size_t size)
+{
+	return system_state < SYSTEM_FREEING_INITMEM ||
+	       !init_section_contains(dest, size);
+}
+
+static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num)
+{
+	int i;
+
+	for (i = 0; start < end; start++, i++) {
+		int j;
+		unsigned int *dest = (void *)start + *start;
+
+		if (!is_fixup_addr_valid(dest, sizeof(*instrs) * num))
+			continue;
+
+		pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+		for (j = 0; j < num; j++)
+			patch_instruction(dest + j, ppc_inst(instrs[j]));
+	}
+	return i;
+}
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs,
+				 bool do_fallback, void *fallback)
+{
+	int i;
+
+	for (i = 0; start < end; start++, i++) {
+		unsigned int *dest = (void *)start + *start;
+
+		if (!is_fixup_addr_valid(dest, sizeof(*instrs) * 3))
+			continue;
+
+		pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+		// See comment in do_entry_flush_fixups() RE order of patching
+		if (do_fallback) {
+			patch_instruction(dest, ppc_inst(instrs[0]));
+			patch_instruction(dest + 2, ppc_inst(instrs[2]));
+			patch_branch(dest + 1, (unsigned long)fallback, BRANCH_SET_LINK);
+		} else {
+			patch_instruction(dest + 1, ppc_inst(instrs[1]));
+			patch_instruction(dest + 2, ppc_inst(instrs[2]));
+			patch_instruction(dest, ppc_inst(instrs[0]));
+		}
+	}
+	return i;
+}
+
+static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
+{
+	unsigned int instrs[3];
+	long *start, *end;
+	int i;
+
+	start = PTRRELOC(&__start___stf_entry_barrier_fixup);
+	end = PTRRELOC(&__stop___stf_entry_barrier_fixup);
+
+	instrs[0] = PPC_RAW_NOP();
+	instrs[1] = PPC_RAW_NOP();
+	instrs[2] = PPC_RAW_NOP();
+
+	i = 0;
+	if (types & STF_BARRIER_FALLBACK) {
+		instrs[i++] = PPC_RAW_MFLR(_R10);
+		instrs[i++] = PPC_RAW_NOP(); /* branch patched below */
+		instrs[i++] = PPC_RAW_MTLR(_R10);
+	} else if (types & STF_BARRIER_EIEIO) {
+		instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */
+	} else if (types & STF_BARRIER_SYNC_ORI) {
+		instrs[i++] = PPC_RAW_SYNC();
+		instrs[i++] = PPC_RAW_LD(_R10, _R13, 0);
+		instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+	}
+
+	i = do_patch_entry_fixups(start, end, instrs, types & STF_BARRIER_FALLBACK,
+				  &stf_barrier_fallback);
+
+	printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
+		(types == STF_BARRIER_NONE)                  ? "no" :
+		(types == STF_BARRIER_FALLBACK)              ? "fallback" :
+		(types == STF_BARRIER_EIEIO)                 ? "eieio" :
+		(types == (STF_BARRIER_SYNC_ORI))            ? "hwsync"
+		                                           : "unknown");
+}
+
+static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
+{
+	unsigned int instrs[6];
+	long *start, *end;
+	int i;
+
+	start = PTRRELOC(&__start___stf_exit_barrier_fixup);
+	end = PTRRELOC(&__stop___stf_exit_barrier_fixup);
+
+	instrs[0] = PPC_RAW_NOP();
+	instrs[1] = PPC_RAW_NOP();
+	instrs[2] = PPC_RAW_NOP();
+	instrs[3] = PPC_RAW_NOP();
+	instrs[4] = PPC_RAW_NOP();
+	instrs[5] = PPC_RAW_NOP();
+
+	i = 0;
+	if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) {
+		if (cpu_has_feature(CPU_FTR_HVMODE)) {
+			instrs[i++] = PPC_RAW_MTSPR(SPRN_HSPRG1, _R13);
+			instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG0);
+		} else {
+			instrs[i++] = PPC_RAW_MTSPR(SPRN_SPRG2, _R13);
+			instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG1);
+	        }
+		instrs[i++] = PPC_RAW_SYNC();
+		instrs[i++] = PPC_RAW_LD(_R13, _R13, 0);
+		instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+		if (cpu_has_feature(CPU_FTR_HVMODE))
+			instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG1);
+		else
+			instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG2);
+	} else if (types & STF_BARRIER_EIEIO) {
+		instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */
+	}
+
+	i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
+
+	printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i,
+		(types == STF_BARRIER_NONE)                  ? "no" :
+		(types == STF_BARRIER_FALLBACK)              ? "fallback" :
+		(types == STF_BARRIER_EIEIO)                 ? "eieio" :
+		(types == (STF_BARRIER_SYNC_ORI))            ? "hwsync"
+		                                           : "unknown");
+}
+
+static bool stf_exit_reentrant = false;
+static bool rfi_exit_reentrant = false;
+static DEFINE_MUTEX(exit_flush_lock);
+
+static int __do_stf_barrier_fixups(void *data)
+{
+	enum stf_barrier_type *types = data;
+
+	do_stf_entry_barrier_fixups(*types);
+	do_stf_exit_barrier_fixups(*types);
+
+	return 0;
+}
+
+void do_stf_barrier_fixups(enum stf_barrier_type types)
+{
+	/*
+	 * The call to the fallback entry flush, and the fallback/sync-ori exit
+	 * flush can not be safely patched in/out while other CPUs are
+	 * executing them. So call __do_stf_barrier_fixups() on one CPU while
+	 * all other CPUs spin in the stop machine core with interrupts hard
+	 * disabled.
+	 *
+	 * The branch to mark interrupt exits non-reentrant is enabled first,
+	 * then stop_machine runs which will ensure all CPUs are out of the
+	 * low level interrupt exit code before patching. After the patching,
+	 * if allowed, then flip the branch to allow fast exits.
+	 */
+
+	// Prevent static key update races with do_rfi_flush_fixups()
+	mutex_lock(&exit_flush_lock);
+	static_branch_enable(&interrupt_exit_not_reentrant);
+
+	stop_machine(__do_stf_barrier_fixups, &types, NULL);
+
+	if ((types & STF_BARRIER_FALLBACK) || (types & STF_BARRIER_SYNC_ORI))
+		stf_exit_reentrant = false;
+	else
+		stf_exit_reentrant = true;
+
+	if (stf_exit_reentrant && rfi_exit_reentrant)
+		static_branch_disable(&interrupt_exit_not_reentrant);
+
+	mutex_unlock(&exit_flush_lock);
+}
+
+void do_uaccess_flush_fixups(enum l1d_flush_type types)
+{
+	unsigned int instrs[4];
+	long *start, *end;
+	int i;
+
+	start = PTRRELOC(&__start___uaccess_flush_fixup);
+	end = PTRRELOC(&__stop___uaccess_flush_fixup);
+
+	instrs[0] = PPC_RAW_NOP();
+	instrs[1] = PPC_RAW_NOP();
+	instrs[2] = PPC_RAW_NOP();
+	instrs[3] = PPC_RAW_BLR();
+
+	i = 0;
+	if (types == L1D_FLUSH_FALLBACK) {
+		instrs[3] = PPC_RAW_NOP();
+		/* fallthrough to fallback flush */
+	}
+
+	if (types & L1D_FLUSH_ORI) {
+		instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+		instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
+	}
+
+	if (types & L1D_FLUSH_MTTRIG)
+		instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
+
+	i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
+
+	printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i,
+		(types == L1D_FLUSH_NONE)       ? "no" :
+		(types == L1D_FLUSH_FALLBACK)   ? "fallback displacement" :
+		(types &  L1D_FLUSH_ORI)        ? (types & L1D_FLUSH_MTTRIG)
+							? "ori+mttrig type"
+							: "ori type" :
+		(types &  L1D_FLUSH_MTTRIG)     ? "mttrig type"
+						: "unknown");
+}
+
+static int __do_entry_flush_fixups(void *data)
+{
+	enum l1d_flush_type types = *(enum l1d_flush_type *)data;
+	unsigned int instrs[3];
+	long *start, *end;
+	int i;
+
+	instrs[0] = PPC_RAW_NOP();
+	instrs[1] = PPC_RAW_NOP();
+	instrs[2] = PPC_RAW_NOP();
+
+	i = 0;
+	if (types == L1D_FLUSH_FALLBACK) {
+		instrs[i++] = PPC_RAW_MFLR(_R10);
+		instrs[i++] = PPC_RAW_NOP(); /* branch patched below */
+		instrs[i++] = PPC_RAW_MTLR(_R10);
+	}
+
+	if (types & L1D_FLUSH_ORI) {
+		instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+		instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
+	}
+
+	if (types & L1D_FLUSH_MTTRIG)
+		instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
+
+	/*
+	 * If we're patching in or out the fallback flush we need to be careful about the
+	 * order in which we patch instructions. That's because it's possible we could
+	 * take a page fault after patching one instruction, so the sequence of
+	 * instructions must be safe even in a half patched state.
+	 *
+	 * To make that work, when patching in the fallback flush we patch in this order:
+	 *  - the mflr		(dest)
+	 *  - the mtlr		(dest + 2)
+	 *  - the branch	(dest + 1)
+	 *
+	 * That ensures the sequence is safe to execute at any point. In contrast if we
+	 * patch the mtlr last, it's possible we could return from the branch and not
+	 * restore LR, leading to a crash later.
+	 *
+	 * When patching out the fallback flush (either with nops or another flush type),
+	 * we patch in this order:
+	 *  - the branch	(dest + 1)
+	 *  - the mtlr		(dest + 2)
+	 *  - the mflr		(dest)
+	 *
+	 * Note we are protected by stop_machine() from other CPUs executing the code in a
+	 * semi-patched state.
+	 */
+
+	start = PTRRELOC(&__start___entry_flush_fixup);
+	end = PTRRELOC(&__stop___entry_flush_fixup);
+	i = do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK,
+				  &entry_flush_fallback);
+
+	start = PTRRELOC(&__start___scv_entry_flush_fixup);
+	end = PTRRELOC(&__stop___scv_entry_flush_fixup);
+	i += do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK,
+				   &scv_entry_flush_fallback);
+
+	printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i,
+		(types == L1D_FLUSH_NONE)       ? "no" :
+		(types == L1D_FLUSH_FALLBACK)   ? "fallback displacement" :
+		(types &  L1D_FLUSH_ORI)        ? (types & L1D_FLUSH_MTTRIG)
+							? "ori+mttrig type"
+							: "ori type" :
+		(types &  L1D_FLUSH_MTTRIG)     ? "mttrig type"
+						: "unknown");
+
+	return 0;
+}
+
+void do_entry_flush_fixups(enum l1d_flush_type types)
+{
+	/*
+	 * The call to the fallback flush can not be safely patched in/out while
+	 * other CPUs are executing it. So call __do_entry_flush_fixups() on one
+	 * CPU while all other CPUs spin in the stop machine core with interrupts
+	 * hard disabled.
+	 */
+	stop_machine(__do_entry_flush_fixups, &types, NULL);
+}
+
+static int __do_rfi_flush_fixups(void *data)
+{
+	enum l1d_flush_type types = *(enum l1d_flush_type *)data;
+	unsigned int instrs[3];
+	long *start, *end;
+	int i;
+
+	start = PTRRELOC(&__start___rfi_flush_fixup);
+	end = PTRRELOC(&__stop___rfi_flush_fixup);
+
+	instrs[0] = PPC_RAW_NOP();
+	instrs[1] = PPC_RAW_NOP();
+	instrs[2] = PPC_RAW_NOP();
+
+	if (types & L1D_FLUSH_FALLBACK)
+		/* b .+16 to fallback flush */
+		instrs[0] = PPC_RAW_BRANCH(16);
+
+	i = 0;
+	if (types & L1D_FLUSH_ORI) {
+		instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+		instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
+	}
+
+	if (types & L1D_FLUSH_MTTRIG)
+		instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
+
+	i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
+
+	printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i,
+		(types == L1D_FLUSH_NONE)       ? "no" :
+		(types == L1D_FLUSH_FALLBACK)   ? "fallback displacement" :
+		(types &  L1D_FLUSH_ORI)        ? (types & L1D_FLUSH_MTTRIG)
+							? "ori+mttrig type"
+							: "ori type" :
+		(types &  L1D_FLUSH_MTTRIG)     ? "mttrig type"
+						: "unknown");
+
+	return 0;
+}
+
+void do_rfi_flush_fixups(enum l1d_flush_type types)
+{
+	/*
+	 * stop_machine gets all CPUs out of the interrupt exit handler same
+	 * as do_stf_barrier_fixups. do_rfi_flush_fixups patching can run
+	 * without stop_machine, so this could be achieved with a broadcast
+	 * IPI instead, but this matches the stf sequence.
+	 */
+
+	// Prevent static key update races with do_stf_barrier_fixups()
+	mutex_lock(&exit_flush_lock);
+	static_branch_enable(&interrupt_exit_not_reentrant);
+
+	stop_machine(__do_rfi_flush_fixups, &types, NULL);
+
+	if (types & L1D_FLUSH_FALLBACK)
+		rfi_exit_reentrant = false;
+	else
+		rfi_exit_reentrant = true;
+
+	if (stf_exit_reentrant && rfi_exit_reentrant)
+		static_branch_disable(&interrupt_exit_not_reentrant);
+
+	mutex_unlock(&exit_flush_lock);
+}
+
+void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
+{
+	unsigned int instr;
+	long *start, *end;
+	int i;
+
+	start = fixup_start;
+	end = fixup_end;
+
+	instr = PPC_RAW_NOP();
+
+	if (enable) {
+		pr_info("barrier-nospec: using ORI speculation barrier\n");
+		instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+	}
+
+	i = do_patch_fixups(start, end, &instr, 1);
+
+	printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
+}
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+void do_barrier_nospec_fixups(bool enable)
+{
+	void *start, *end;
+
+	start = PTRRELOC(&__start___barrier_nospec_fixup);
+	end = PTRRELOC(&__stop___barrier_nospec_fixup);
+
+	do_barrier_nospec_fixups_range(enable, start, end);
+}
+#endif /* CONFIG_PPC_BARRIER_NOSPEC */
+
+#ifdef CONFIG_PPC_E500
+void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
+{
+	unsigned int instr[2];
+	long *start, *end;
+	int i;
+
+	start = fixup_start;
+	end = fixup_end;
+
+	instr[0] = PPC_RAW_NOP();
+	instr[1] = PPC_RAW_NOP();
+
+	if (enable) {
+		pr_info("barrier-nospec: using isync; sync as speculation barrier\n");
+		instr[0] = PPC_RAW_ISYNC();
+		instr[1] = PPC_RAW_SYNC();
+	}
+
+	i = do_patch_fixups(start, end, instr, ARRAY_SIZE(instr));
+
+	printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
+}
+
+static void __init patch_btb_flush_section(long *curr)
+{
+	unsigned int *start, *end;
+
+	start = (void *)curr + *curr;
+	end = (void *)curr + *(curr + 1);
+	for (; start < end; start++) {
+		pr_devel("patching dest %lx\n", (unsigned long)start);
+		patch_instruction(start, ppc_inst(PPC_RAW_NOP()));
+	}
+}
+
+void __init do_btb_flush_fixups(void)
+{
+	long *start, *end;
+
+	start = PTRRELOC(&__start__btb_flush_fixup);
+	end = PTRRELOC(&__stop__btb_flush_fixup);
+
+	for (; start < end; start += 2)
+		patch_btb_flush_section(start);
+}
+#endif /* CONFIG_PPC_E500 */
+
+void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
+{
+	long *start, *end;
+	u32 *dest;
+
+	if (!(value & CPU_FTR_LWSYNC))
+		return ;
+
+	start = fixup_start;
+	end = fixup_end;
+
+	for (; start < end; start++) {
+		dest = (void *)start + *start;
+		raw_patch_instruction(dest, ppc_inst(PPC_INST_LWSYNC));
+	}
+}
+
+static void __init do_final_fixups(void)
+{
+#if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE)
+	ppc_inst_t inst;
+	u32 *src, *dest, *end;
+
+	if (PHYSICAL_START == 0)
+		return;
+
+	src = (u32 *)(KERNELBASE + PHYSICAL_START);
+	dest = (u32 *)KERNELBASE;
+	end = (void *)src + (__end_interrupts - _stext);
+
+	while (src < end) {
+		inst = ppc_inst_read(src);
+		raw_patch_instruction(dest, inst);
+		src = ppc_inst_next(src, src);
+		dest = ppc_inst_next(dest, dest);
+	}
+#endif
+}
+
+static unsigned long __initdata saved_cpu_features;
+static unsigned int __initdata saved_mmu_features;
+#ifdef CONFIG_PPC64
+static unsigned long __initdata saved_firmware_features;
+#endif
+
+void __init apply_feature_fixups(void)
+{
+	struct cpu_spec *spec = PTRRELOC(*PTRRELOC(&cur_cpu_spec));
+
+	*PTRRELOC(&saved_cpu_features) = spec->cpu_features;
+	*PTRRELOC(&saved_mmu_features) = spec->mmu_features;
+
+	/*
+	 * Apply the CPU-specific and firmware specific fixups to kernel text
+	 * (nop out sections not relevant to this CPU or this firmware).
+	 */
+	do_feature_fixups(spec->cpu_features,
+			  PTRRELOC(&__start___ftr_fixup),
+			  PTRRELOC(&__stop___ftr_fixup));
+
+	do_feature_fixups(spec->mmu_features,
+			  PTRRELOC(&__start___mmu_ftr_fixup),
+			  PTRRELOC(&__stop___mmu_ftr_fixup));
+
+	do_lwsync_fixups(spec->cpu_features,
+			 PTRRELOC(&__start___lwsync_fixup),
+			 PTRRELOC(&__stop___lwsync_fixup));
+
+#ifdef CONFIG_PPC64
+	saved_firmware_features = powerpc_firmware_features;
+	do_feature_fixups(powerpc_firmware_features,
+			  &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
+#endif
+	do_final_fixups();
+}
+
+void __init update_mmu_feature_fixups(unsigned long mask)
+{
+	saved_mmu_features &= ~mask;
+	saved_mmu_features |= cur_cpu_spec->mmu_features & mask;
+
+	do_feature_fixups_mask(cur_cpu_spec->mmu_features, mask,
+			       PTRRELOC(&__start___mmu_ftr_fixup),
+			       PTRRELOC(&__stop___mmu_ftr_fixup));
+	mmu_feature_keys_init();
+}
+
+void __init setup_feature_keys(void)
+{
+	/*
+	 * Initialise jump label. This causes all the cpu/mmu_has_feature()
+	 * checks to take on their correct polarity based on the current set of
+	 * CPU/MMU features.
+	 */
+	jump_label_init();
+	cpu_feature_keys_init();
+	mmu_feature_keys_init();
+}
+
+static int __init check_features(void)
+{
+	WARN(saved_cpu_features != cur_cpu_spec->cpu_features,
+	     "CPU features changed after feature patching!\n");
+	WARN(saved_mmu_features != cur_cpu_spec->mmu_features,
+	     "MMU features changed after feature patching!\n");
+#ifdef CONFIG_PPC64
+	WARN(saved_firmware_features != powerpc_firmware_features,
+	     "Firmware features changed after feature patching!\n");
+#endif
+
+	return 0;
+}
+late_initcall(check_features);
+
+#ifdef CONFIG_FTR_FIXUP_SELFTEST
+
+#define check(x)	\
+	if (!(x)) printk("feature-fixups: test failed at line %d\n", __LINE__);
+
+static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
+{
+	return patch_feature_section_mask(value, ~0, fcur);
+}
+
+/* This must be after the text it fixes up, vmlinux.lds.S enforces that atm */
+static struct fixup_entry fixup;
+
+static long __init calc_offset(struct fixup_entry *entry, unsigned int *p)
+{
+	return (unsigned long)p - (unsigned long)entry;
+}
+
+static void __init test_basic_patching(void)
+{
+	extern unsigned int ftr_fixup_test1[];
+	extern unsigned int end_ftr_fixup_test1[];
+	extern unsigned int ftr_fixup_test1_orig[];
+	extern unsigned int ftr_fixup_test1_expected[];
+	int size = 4 * (end_ftr_fixup_test1 - ftr_fixup_test1);
+
+	fixup.value = fixup.mask = 8;
+	fixup.start_off = calc_offset(&fixup, ftr_fixup_test1 + 1);
+	fixup.end_off = calc_offset(&fixup, ftr_fixup_test1 + 2);
+	fixup.alt_start_off = fixup.alt_end_off = 0;
+
+	/* Sanity check */
+	check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
+
+	/* Check we don't patch if the value matches */
+	patch_feature_section(8, &fixup);
+	check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
+
+	/* Check we do patch if the value doesn't match */
+	patch_feature_section(0, &fixup);
+	check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
+
+	/* Check we do patch if the mask doesn't match */
+	memcpy(ftr_fixup_test1, ftr_fixup_test1_orig, size);
+	check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
+	patch_feature_section(~8, &fixup);
+	check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
+}
+
+static void __init test_alternative_patching(void)
+{
+	extern unsigned int ftr_fixup_test2[];
+	extern unsigned int end_ftr_fixup_test2[];
+	extern unsigned int ftr_fixup_test2_orig[];
+	extern unsigned int ftr_fixup_test2_alt[];
+	extern unsigned int ftr_fixup_test2_expected[];
+	int size = 4 * (end_ftr_fixup_test2 - ftr_fixup_test2);
+
+	fixup.value = fixup.mask = 0xF;
+	fixup.start_off = calc_offset(&fixup, ftr_fixup_test2 + 1);
+	fixup.end_off = calc_offset(&fixup, ftr_fixup_test2 + 2);
+	fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test2_alt);
+	fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test2_alt + 1);
+
+	/* Sanity check */
+	check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
+
+	/* Check we don't patch if the value matches */
+	patch_feature_section(0xF, &fixup);
+	check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
+
+	/* Check we do patch if the value doesn't match */
+	patch_feature_section(0, &fixup);
+	check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
+
+	/* Check we do patch if the mask doesn't match */
+	memcpy(ftr_fixup_test2, ftr_fixup_test2_orig, size);
+	check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
+	patch_feature_section(~0xF, &fixup);
+	check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
+}
+
+static void __init test_alternative_case_too_big(void)
+{
+	extern unsigned int ftr_fixup_test3[];
+	extern unsigned int end_ftr_fixup_test3[];
+	extern unsigned int ftr_fixup_test3_orig[];
+	extern unsigned int ftr_fixup_test3_alt[];
+	int size = 4 * (end_ftr_fixup_test3 - ftr_fixup_test3);
+
+	fixup.value = fixup.mask = 0xC;
+	fixup.start_off = calc_offset(&fixup, ftr_fixup_test3 + 1);
+	fixup.end_off = calc_offset(&fixup, ftr_fixup_test3 + 2);
+	fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test3_alt);
+	fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test3_alt + 2);
+
+	/* Sanity check */
+	check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
+
+	/* Expect nothing to be patched, and the error returned to us */
+	check(patch_feature_section(0xF, &fixup) == 1);
+	check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
+	check(patch_feature_section(0, &fixup) == 1);
+	check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
+	check(patch_feature_section(~0xF, &fixup) == 1);
+	check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
+}
+
+static void __init test_alternative_case_too_small(void)
+{
+	extern unsigned int ftr_fixup_test4[];
+	extern unsigned int end_ftr_fixup_test4[];
+	extern unsigned int ftr_fixup_test4_orig[];
+	extern unsigned int ftr_fixup_test4_alt[];
+	extern unsigned int ftr_fixup_test4_expected[];
+	int size = 4 * (end_ftr_fixup_test4 - ftr_fixup_test4);
+	unsigned long flag;
+
+	/* Check a high-bit flag */
+	flag = 1UL << ((sizeof(unsigned long) - 1) * 8);
+	fixup.value = fixup.mask = flag;
+	fixup.start_off = calc_offset(&fixup, ftr_fixup_test4 + 1);
+	fixup.end_off = calc_offset(&fixup, ftr_fixup_test4 + 5);
+	fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test4_alt);
+	fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test4_alt + 2);
+
+	/* Sanity check */
+	check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
+
+	/* Check we don't patch if the value matches */
+	patch_feature_section(flag, &fixup);
+	check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
+
+	/* Check we do patch if the value doesn't match */
+	patch_feature_section(0, &fixup);
+	check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0);
+
+	/* Check we do patch if the mask doesn't match */
+	memcpy(ftr_fixup_test4, ftr_fixup_test4_orig, size);
+	check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
+	patch_feature_section(~flag, &fixup);
+	check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0);
+}
+
+static void test_alternative_case_with_branch(void)
+{
+	extern unsigned int ftr_fixup_test5[];
+	extern unsigned int end_ftr_fixup_test5[];
+	extern unsigned int ftr_fixup_test5_expected[];
+	int size = 4 * (end_ftr_fixup_test5 - ftr_fixup_test5);
+
+	check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0);
+}
+
+static void __init test_alternative_case_with_external_branch(void)
+{
+	extern unsigned int ftr_fixup_test6[];
+	extern unsigned int end_ftr_fixup_test6[];
+	extern unsigned int ftr_fixup_test6_expected[];
+	int size = 4 * (end_ftr_fixup_test6 - ftr_fixup_test6);
+
+	check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0);
+}
+
+static void __init test_alternative_case_with_branch_to_end(void)
+{
+	extern unsigned int ftr_fixup_test7[];
+	extern unsigned int end_ftr_fixup_test7[];
+	extern unsigned int ftr_fixup_test7_expected[];
+	int size = 4 * (end_ftr_fixup_test7 - ftr_fixup_test7);
+
+	check(memcmp(ftr_fixup_test7, ftr_fixup_test7_expected, size) == 0);
+}
+
+static void __init test_cpu_macros(void)
+{
+	extern u8 ftr_fixup_test_FTR_macros[];
+	extern u8 ftr_fixup_test_FTR_macros_expected[];
+	unsigned long size = ftr_fixup_test_FTR_macros_expected -
+			     ftr_fixup_test_FTR_macros;
+
+	/* The fixups have already been done for us during boot */
+	check(memcmp(ftr_fixup_test_FTR_macros,
+		     ftr_fixup_test_FTR_macros_expected, size) == 0);
+}
+
+static void __init test_fw_macros(void)
+{
+#ifdef CONFIG_PPC64
+	extern u8 ftr_fixup_test_FW_FTR_macros[];
+	extern u8 ftr_fixup_test_FW_FTR_macros_expected[];
+	unsigned long size = ftr_fixup_test_FW_FTR_macros_expected -
+			     ftr_fixup_test_FW_FTR_macros;
+
+	/* The fixups have already been done for us during boot */
+	check(memcmp(ftr_fixup_test_FW_FTR_macros,
+		     ftr_fixup_test_FW_FTR_macros_expected, size) == 0);
+#endif
+}
+
+static void __init test_lwsync_macros(void)
+{
+	extern u8 lwsync_fixup_test[];
+	extern u8 end_lwsync_fixup_test[];
+	extern u8 lwsync_fixup_test_expected_LWSYNC[];
+	extern u8 lwsync_fixup_test_expected_SYNC[];
+	unsigned long size = end_lwsync_fixup_test -
+			     lwsync_fixup_test;
+
+	/* The fixups have already been done for us during boot */
+	if (cur_cpu_spec->cpu_features & CPU_FTR_LWSYNC) {
+		check(memcmp(lwsync_fixup_test,
+			     lwsync_fixup_test_expected_LWSYNC, size) == 0);
+	} else {
+		check(memcmp(lwsync_fixup_test,
+			     lwsync_fixup_test_expected_SYNC, size) == 0);
+	}
+}
+
+#ifdef CONFIG_PPC64
+static void __init test_prefix_patching(void)
+{
+	extern unsigned int ftr_fixup_prefix1[];
+	extern unsigned int end_ftr_fixup_prefix1[];
+	extern unsigned int ftr_fixup_prefix1_orig[];
+	extern unsigned int ftr_fixup_prefix1_expected[];
+	int size = sizeof(unsigned int) * (end_ftr_fixup_prefix1 - ftr_fixup_prefix1);
+
+	fixup.value = fixup.mask = 8;
+	fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix1 + 1);
+	fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix1 + 3);
+	fixup.alt_start_off = fixup.alt_end_off = 0;
+
+	/* Sanity check */
+	check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) == 0);
+
+	patch_feature_section(0, &fixup);
+	check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_expected, size) == 0);
+	check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) != 0);
+}
+
+static void __init test_prefix_alt_patching(void)
+{
+	extern unsigned int ftr_fixup_prefix2[];
+	extern unsigned int end_ftr_fixup_prefix2[];
+	extern unsigned int ftr_fixup_prefix2_orig[];
+	extern unsigned int ftr_fixup_prefix2_expected[];
+	extern unsigned int ftr_fixup_prefix2_alt[];
+	int size = sizeof(unsigned int) * (end_ftr_fixup_prefix2 - ftr_fixup_prefix2);
+
+	fixup.value = fixup.mask = 8;
+	fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix2 + 1);
+	fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix2 + 3);
+	fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix2_alt);
+	fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix2_alt + 2);
+	/* Sanity check */
+	check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) == 0);
+
+	patch_feature_section(0, &fixup);
+	check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_expected, size) == 0);
+	check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) != 0);
+}
+
+static void __init test_prefix_word_alt_patching(void)
+{
+	extern unsigned int ftr_fixup_prefix3[];
+	extern unsigned int end_ftr_fixup_prefix3[];
+	extern unsigned int ftr_fixup_prefix3_orig[];
+	extern unsigned int ftr_fixup_prefix3_expected[];
+	extern unsigned int ftr_fixup_prefix3_alt[];
+	int size = sizeof(unsigned int) * (end_ftr_fixup_prefix3 - ftr_fixup_prefix3);
+
+	fixup.value = fixup.mask = 8;
+	fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix3 + 1);
+	fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix3 + 4);
+	fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix3_alt);
+	fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix3_alt + 3);
+	/* Sanity check */
+	check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) == 0);
+
+	patch_feature_section(0, &fixup);
+	check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_expected, size) == 0);
+	patch_feature_section(0, &fixup);
+	check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) != 0);
+}
+#else
+static inline void test_prefix_patching(void) {}
+static inline void test_prefix_alt_patching(void) {}
+static inline void test_prefix_word_alt_patching(void) {}
+#endif /* CONFIG_PPC64 */
+
+static int __init test_feature_fixups(void)
+{
+	printk(KERN_DEBUG "Running feature fixup self-tests ...\n");
+
+	test_basic_patching();
+	test_alternative_patching();
+	test_alternative_case_too_big();
+	test_alternative_case_too_small();
+	test_alternative_case_with_branch();
+	test_alternative_case_with_external_branch();
+	test_alternative_case_with_branch_to_end();
+	test_cpu_macros();
+	test_fw_macros();
+	test_lwsync_macros();
+	test_prefix_patching();
+	test_prefix_alt_patching();
+	test_prefix_word_alt_patching();
+
+	return 0;
+}
+late_initcall(test_feature_fixups);
+
+#endif /* CONFIG_FTR_FIXUP_SELFTEST */
diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S
new file mode 100644
index 0000000000..151875050d
--- /dev/null
+++ b/arch/powerpc/lib/hweight_64.S
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *
+ * Copyright (C) IBM Corporation, 2010
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <linux/export.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/feature-fixups.h>
+
+/* Note: This code relies on -mminimal-toc */
+
+_GLOBAL(__arch_hweight8)
+BEGIN_FTR_SECTION
+	b CFUNC(__sw_hweight8)
+	nop
+	nop
+FTR_SECTION_ELSE
+	PPC_POPCNTB(R3,R3)
+	clrldi	r3,r3,64-8
+	blr
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+EXPORT_SYMBOL(__arch_hweight8)
+
+_GLOBAL(__arch_hweight16)
+BEGIN_FTR_SECTION
+	b CFUNC(__sw_hweight16)
+	nop
+	nop
+	nop
+	nop
+FTR_SECTION_ELSE
+  BEGIN_FTR_SECTION_NESTED(50)
+	PPC_POPCNTB(R3,R3)
+	srdi	r4,r3,8
+	add	r3,r4,r3
+	clrldi	r3,r3,64-8
+	blr
+  FTR_SECTION_ELSE_NESTED(50)
+	clrlwi  r3,r3,16
+	PPC_POPCNTW(R3,R3)
+	clrldi	r3,r3,64-8
+	blr
+  ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 50)
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+EXPORT_SYMBOL(__arch_hweight16)
+
+_GLOBAL(__arch_hweight32)
+BEGIN_FTR_SECTION
+	b CFUNC(__sw_hweight32)
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+FTR_SECTION_ELSE
+  BEGIN_FTR_SECTION_NESTED(51)
+	PPC_POPCNTB(R3,R3)
+	srdi	r4,r3,16
+	add	r3,r4,r3
+	srdi	r4,r3,8
+	add	r3,r4,r3
+	clrldi	r3,r3,64-8
+	blr
+  FTR_SECTION_ELSE_NESTED(51)
+	PPC_POPCNTW(R3,R3)
+	clrldi	r3,r3,64-8
+	blr
+  ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 51)
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+EXPORT_SYMBOL(__arch_hweight32)
+
+_GLOBAL(__arch_hweight64)
+BEGIN_FTR_SECTION
+	b CFUNC(__sw_hweight64)
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+FTR_SECTION_ELSE
+  BEGIN_FTR_SECTION_NESTED(52)
+	PPC_POPCNTB(R3,R3)
+	srdi	r4,r3,32
+	add	r3,r4,r3
+	srdi	r4,r3,16
+	add	r3,r4,r3
+	srdi	r4,r3,8
+	add	r3,r4,r3
+	clrldi	r3,r3,64-8
+	blr
+  FTR_SECTION_ELSE_NESTED(52)
+	PPC_POPCNTD(R3,R3)
+	clrldi	r3,r3,64-8
+	blr
+  ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 52)
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+EXPORT_SYMBOL(__arch_hweight64)
diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S
new file mode 100644
index 0000000000..e00abeabc5
--- /dev/null
+++ b/arch/powerpc/lib/ldstfp.S
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Floating-point, VMX/Altivec and VSX loads and stores
+ * for use in instruction emulation.
+ *
+ * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+#include <asm/reg.h>
+#include <asm/asm-offsets.h>
+#include <asm/asm-compat.h>
+#include <linux/errno.h>
+
+#define STKFRM	(PPC_MIN_STKFRM + 16)
+
+/* Get the contents of frN into *p; N is in r3 and p is in r4. */
+_GLOBAL(get_fpr)
+	mflr	r0
+	mfmsr	r6
+	ori	r7, r6, MSR_FP
+	MTMSRD(r7)
+	isync
+	rlwinm	r3,r3,3,0xf8
+	bcl	20,31,1f
+reg = 0
+	.rept	32
+	stfd	reg, 0(r4)
+	b	2f
+reg = reg + 1
+	.endr
+1:	mflr	r5
+	add	r5,r3,r5
+	mtctr	r5
+	mtlr	r0
+	bctr
+2:	MTMSRD(r6)
+	isync
+	blr
+
+/* Put the contents of *p into frN; N is in r3 and p is in r4. */
+_GLOBAL(put_fpr)
+	mflr	r0
+	mfmsr	r6
+	ori	r7, r6, MSR_FP
+	MTMSRD(r7)
+	isync
+	rlwinm	r3,r3,3,0xf8
+	bcl	20,31,1f
+reg = 0
+	.rept	32
+	lfd	reg, 0(r4)
+	b	2f
+reg = reg + 1
+	.endr
+1:	mflr	r5
+	add	r5,r3,r5
+	mtctr	r5
+	mtlr	r0
+	bctr
+2:	MTMSRD(r6)
+	isync
+	blr
+
+#ifdef CONFIG_ALTIVEC
+/* Get the contents of vrN into *p; N is in r3 and p is in r4. */
+_GLOBAL(get_vr)
+	mflr	r0
+	mfmsr	r6
+	oris	r7, r6, MSR_VEC@h
+	MTMSRD(r7)
+	isync
+	rlwinm	r3,r3,3,0xf8
+	bcl	20,31,1f
+reg = 0
+	.rept	32
+	stvx	reg, 0, r4
+	b	2f
+reg = reg + 1
+	.endr
+1:	mflr	r5
+	add	r5,r3,r5
+	mtctr	r5
+	mtlr	r0
+	bctr
+2:	MTMSRD(r6)
+	isync
+	blr
+
+/* Put the contents of *p into vrN; N is in r3 and p is in r4. */
+_GLOBAL(put_vr)
+	mflr	r0
+	mfmsr	r6
+	oris	r7, r6, MSR_VEC@h
+	MTMSRD(r7)
+	isync
+	rlwinm	r3,r3,3,0xf8
+	bcl	20,31,1f
+reg = 0
+	.rept	32
+	lvx	reg, 0, r4
+	b	2f
+reg = reg + 1
+	.endr
+1:	mflr	r5
+	add	r5,r3,r5
+	mtctr	r5
+	mtlr	r0
+	bctr
+2:	MTMSRD(r6)
+	isync
+	blr
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+/* Get the contents of vsN into vs0; N is in r3. */
+_GLOBAL(get_vsr)
+	mflr	r0
+	rlwinm	r3,r3,3,0x1f8
+	bcl	20,31,1f
+	blr			/* vs0 is already in vs0 */
+	nop
+reg = 1
+	.rept	63
+	XXLOR(0,reg,reg)
+	blr
+reg = reg + 1
+	.endr
+1:	mflr	r5
+	add	r5,r3,r5
+	mtctr	r5
+	mtlr	r0
+	bctr
+
+/* Put the contents of vs0 into vsN; N is in r3. */
+_GLOBAL(put_vsr)
+	mflr	r0
+	rlwinm	r3,r3,3,0x1f8
+	bcl	20,31,1f
+	blr			/* v0 is already in v0 */
+	nop
+reg = 1
+	.rept	63
+	XXLOR(reg,0,0)
+	blr
+reg = reg + 1
+	.endr
+1:	mflr	r5
+	add	r5,r3,r5
+	mtctr	r5
+	mtlr	r0
+	bctr
+
+/* Load VSX reg N from vector doubleword *p.  N is in r3, p in r4. */
+_GLOBAL(load_vsrn)
+	PPC_STLU r1,-STKFRM(r1)
+	mflr	r0
+	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mfmsr	r6
+	oris	r7,r6,MSR_VSX@h
+	cmpwi	cr7,r3,0
+	li	r8,STKFRM-16
+	MTMSRD(r7)
+	isync
+	beq	cr7,1f
+	STXVD2X(0,R1,R8)
+1:	LXVD2X(0,R0,R4)
+#ifdef __LITTLE_ENDIAN__
+	XXSWAPD(0,0)
+#endif
+	beq	cr7,4f
+	bl	put_vsr
+	LXVD2X(0,R1,R8)
+4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mtlr	r0
+	MTMSRD(r6)
+	isync
+	addi	r1,r1,STKFRM
+	blr
+
+/* Store VSX reg N to vector doubleword *p.  N is in r3, p in r4. */
+_GLOBAL(store_vsrn)
+	PPC_STLU r1,-STKFRM(r1)
+	mflr	r0
+	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mfmsr	r6
+	oris	r7,r6,MSR_VSX@h
+	li	r8,STKFRM-16
+	MTMSRD(r7)
+	isync
+	STXVD2X(0,R1,R8)
+	bl	get_vsr
+#ifdef __LITTLE_ENDIAN__
+	XXSWAPD(0,0)
+#endif
+	STXVD2X(0,R0,R4)
+	LXVD2X(0,R1,R8)
+	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mtlr	r0
+	MTMSRD(r6)
+	isync
+	mr	r3,r9
+	addi	r1,r1,STKFRM
+	blr
+#endif /* CONFIG_VSX */
+
+/* Convert single-precision to double, without disturbing FPRs. */
+/* conv_sp_to_dp(float *sp, double *dp) */
+_GLOBAL(conv_sp_to_dp)
+	mfmsr	r6
+	ori	r7, r6, MSR_FP
+	MTMSRD(r7)
+	isync
+	stfd	fr0, -16(r1)
+	lfs	fr0, 0(r3)
+	stfd	fr0, 0(r4)
+	lfd	fr0, -16(r1)
+	MTMSRD(r6)
+	isync
+	blr
+
+/* Convert single-precision to double, without disturbing FPRs. */
+/* conv_sp_to_dp(double *dp, float *sp) */
+_GLOBAL(conv_dp_to_sp)
+	mfmsr	r6
+	ori	r7, r6, MSR_FP
+	MTMSRD(r7)
+	isync
+	stfd	fr0, -16(r1)
+	lfd	fr0, 0(r3)
+	stfs	fr0, 0(r4)
+	lfd	fr0, -16(r1)
+	MTMSRD(r6)
+	isync
+	blr
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
new file mode 100644
index 0000000000..04165b7a16
--- /dev/null
+++ b/arch/powerpc/lib/locks.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Spin and read/write lock operations.
+ *
+ * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
+ *   Rework to support virtual processors
+ */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/export.h>
+#include <linux/smp.h>
+
+/* waiting for a spinlock... */
+#if defined(CONFIG_PPC_SPLPAR)
+#include <asm/hvcall.h>
+#include <asm/smp.h>
+
+void splpar_spin_yield(arch_spinlock_t *lock)
+{
+	unsigned int lock_value, holder_cpu, yield_count;
+
+	lock_value = lock->slock;
+	if (lock_value == 0)
+		return;
+	holder_cpu = lock_value & 0xffff;
+	BUG_ON(holder_cpu >= NR_CPUS);
+
+	yield_count = yield_count_of(holder_cpu);
+	if ((yield_count & 1) == 0)
+		return;		/* virtual cpu is currently running */
+	rmb();
+	if (lock->slock != lock_value)
+		return;		/* something has changed */
+	yield_to_preempted(holder_cpu, yield_count);
+}
+EXPORT_SYMBOL_GPL(splpar_spin_yield);
+
+/*
+ * Waiting for a read lock or a write lock on a rwlock...
+ * This turns out to be the same for read and write locks, since
+ * we only know the holder if it is write-locked.
+ */
+void splpar_rw_yield(arch_rwlock_t *rw)
+{
+	int lock_value;
+	unsigned int holder_cpu, yield_count;
+
+	lock_value = rw->lock;
+	if (lock_value >= 0)
+		return;		/* no write lock at present */
+	holder_cpu = lock_value & 0xffff;
+	BUG_ON(holder_cpu >= NR_CPUS);
+
+	yield_count = yield_count_of(holder_cpu);
+	if ((yield_count & 1) == 0)
+		return;		/* virtual cpu is currently running */
+	rmb();
+	if (rw->lock != lock_value)
+		return;		/* something has changed */
+	yield_to_preempted(holder_cpu, yield_count);
+}
+#endif
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
new file mode 100644
index 0000000000..6fd06cd20f
--- /dev/null
+++ b/arch/powerpc/lib/mem_64.S
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * String handling functions for PowerPC.
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ */
+#include <linux/export.h>
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+#include <asm/kasan.h>
+
+#ifndef CONFIG_KASAN
+_GLOBAL(__memset16)
+	rlwimi	r4,r4,16,0,15
+	/* fall through */
+
+_GLOBAL(__memset32)
+	rldimi	r4,r4,32,0
+	/* fall through */
+
+_GLOBAL(__memset64)
+	neg	r0,r3
+	andi.	r0,r0,7
+	cmplw	cr1,r5,r0
+	b	.Lms
+EXPORT_SYMBOL(__memset16)
+EXPORT_SYMBOL(__memset32)
+EXPORT_SYMBOL(__memset64)
+#endif
+
+_GLOBAL_KASAN(memset)
+	neg	r0,r3
+	rlwimi	r4,r4,8,16,23
+	andi.	r0,r0,7			/* # bytes to be 8-byte aligned */
+	rlwimi	r4,r4,16,0,15
+	cmplw	cr1,r5,r0		/* do we get that far? */
+	rldimi	r4,r4,32,0
+.Lms:	PPC_MTOCRF(1,r0)
+	mr	r6,r3
+	blt	cr1,8f
+	beq	3f			/* if already 8-byte aligned */
+	subf	r5,r0,r5
+	bf	31,1f
+	stb	r4,0(r6)
+	addi	r6,r6,1
+1:	bf	30,2f
+	sth	r4,0(r6)
+	addi	r6,r6,2
+2:	bf	29,3f
+	stw	r4,0(r6)
+	addi	r6,r6,4
+3:	srdi.	r0,r5,6
+	clrldi	r5,r5,58
+	mtctr	r0
+	beq	5f
+	.balign 16
+4:	std	r4,0(r6)
+	std	r4,8(r6)
+	std	r4,16(r6)
+	std	r4,24(r6)
+	std	r4,32(r6)
+	std	r4,40(r6)
+	std	r4,48(r6)
+	std	r4,56(r6)
+	addi	r6,r6,64
+	bdnz	4b
+5:	srwi.	r0,r5,3
+	clrlwi	r5,r5,29
+	PPC_MTOCRF(1,r0)
+	beq	8f
+	bf	29,6f
+	std	r4,0(r6)
+	std	r4,8(r6)
+	std	r4,16(r6)
+	std	r4,24(r6)
+	addi	r6,r6,32
+6:	bf	30,7f
+	std	r4,0(r6)
+	std	r4,8(r6)
+	addi	r6,r6,16
+7:	bf	31,8f
+	std	r4,0(r6)
+	addi	r6,r6,8
+8:	cmpwi	r5,0
+	PPC_MTOCRF(1,r5)
+	beqlr
+	bf	29,9f
+	stw	r4,0(r6)
+	addi	r6,r6,4
+9:	bf	30,10f
+	sth	r4,0(r6)
+	addi	r6,r6,2
+10:	bflr	31
+	stb	r4,0(r6)
+	blr
+EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL_KASAN(memset)
+
+_GLOBAL_TOC_KASAN(memmove)
+	cmplw	0,r3,r4
+	bgt	backwards_memcpy
+	b	memcpy
+
+_GLOBAL(backwards_memcpy)
+	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
+	add	r6,r3,r5
+	add	r4,r4,r5
+	beq	2f
+	andi.	r0,r6,3
+	mtctr	r7
+	bne	5f
+	.balign 16
+1:	lwz	r7,-4(r4)
+	lwzu	r8,-8(r4)
+	stw	r7,-4(r6)
+	stwu	r8,-8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,-4(r4)
+	subi	r5,r5,4
+	stwu	r0,-4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+4:	lbzu	r0,-1(r4)
+	stbu	r0,-1(r6)
+	bdnz	4b
+	blr
+5:	mtctr	r0
+6:	lbzu	r7,-1(r4)
+	stbu	r7,-1(r6)
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
+EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL_KASAN(memmove)
diff --git a/arch/powerpc/lib/memcmp_32.S b/arch/powerpc/lib/memcmp_32.S
new file mode 100644
index 0000000000..f6fca5664e
--- /dev/null
+++ b/arch/powerpc/lib/memcmp_32.S
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * memcmp for PowerPC32
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ */
+
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+
+	.text
+
+_GLOBAL(memcmp)
+	srawi.	r7, r5, 2		/* Divide len by 4 */
+	mr	r6, r3
+	beq-	3f
+	mtctr	r7
+	li	r7, 0
+1:	lwzx	r3, r6, r7
+	lwzx	r0, r4, r7
+	addi	r7, r7, 4
+	cmplw	cr0, r3, r0
+	bdnzt	eq, 1b
+	bne	5f
+3:	andi.	r3, r5, 3
+	beqlr
+	cmplwi	cr1, r3, 2
+	blt-	cr1, 4f
+	lhzx	r3, r6, r7
+	lhzx	r0, r4, r7
+	addi	r7, r7, 2
+	subf.	r3, r0, r3
+	beqlr	cr1
+	bnelr
+4:	lbzx	r3, r6, r7
+	lbzx	r0, r4, r7
+	subf.	r3, r0, r3
+	blr
+5:	li	r3, 1
+	bgtlr
+	li	r3, -1
+	blr
+EXPORT_SYMBOL(memcmp)
diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S
new file mode 100644
index 0000000000..142c666d38
--- /dev/null
+++ b/arch/powerpc/lib/memcmp_64.S
@@ -0,0 +1,638 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ * Copyright 2015 IBM Corporation.
+ */
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+
+#define off8	r6
+#define off16	r7
+#define off24	r8
+
+#define rA	r9
+#define rB	r10
+#define rC	r11
+#define rD	r27
+#define rE	r28
+#define rF	r29
+#define rG	r30
+#define rH	r31
+
+#ifdef __LITTLE_ENDIAN__
+#define LH	lhbrx
+#define LW	lwbrx
+#define LD	ldbrx
+#define LVS	lvsr
+#define VPERM(_VRT,_VRA,_VRB,_VRC) \
+	vperm _VRT,_VRB,_VRA,_VRC
+#else
+#define LH	lhzx
+#define LW	lwzx
+#define LD	ldx
+#define LVS	lvsl
+#define VPERM(_VRT,_VRA,_VRB,_VRC) \
+	vperm _VRT,_VRA,_VRB,_VRC
+#endif
+
+#define VMX_THRESH 4096
+#define ENTER_VMX_OPS	\
+	mflr    r0;	\
+	std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1); \
+	std     r4,-STACKFRAMESIZE+STK_REG(R30)(r1); \
+	std     r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \
+	std     r0,16(r1); \
+	stdu    r1,-STACKFRAMESIZE(r1); \
+	bl      CFUNC(enter_vmx_ops); \
+	cmpwi   cr1,r3,0; \
+	ld      r0,STACKFRAMESIZE+16(r1); \
+	ld      r3,STK_REG(R31)(r1); \
+	ld      r4,STK_REG(R30)(r1); \
+	ld      r5,STK_REG(R29)(r1); \
+	addi	r1,r1,STACKFRAMESIZE; \
+	mtlr    r0
+
+#define EXIT_VMX_OPS \
+	mflr    r0; \
+	std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1); \
+	std     r4,-STACKFRAMESIZE+STK_REG(R30)(r1); \
+	std     r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \
+	std     r0,16(r1); \
+	stdu    r1,-STACKFRAMESIZE(r1); \
+	bl      CFUNC(exit_vmx_ops); \
+	ld      r0,STACKFRAMESIZE+16(r1); \
+	ld      r3,STK_REG(R31)(r1); \
+	ld      r4,STK_REG(R30)(r1); \
+	ld      r5,STK_REG(R29)(r1); \
+	addi	r1,r1,STACKFRAMESIZE; \
+	mtlr    r0
+
+/*
+ * LD_VSR_CROSS16B load the 2nd 16 bytes for _vaddr which is unaligned with
+ * 16 bytes boundary and permute the result with the 1st 16 bytes.
+
+ *    |  y y y y y y y y y y y y y 0 1 2 | 3 4 5 6 7 8 9 a b c d e f z z z |
+ *    ^                                  ^                                 ^
+ * 0xbbbb10                          0xbbbb20                          0xbbb30
+ *                                 ^
+ *                                _vaddr
+ *
+ *
+ * _vmask is the mask generated by LVS
+ * _v1st_qw is the 1st aligned QW of current addr which is already loaded.
+ *   for example: 0xyyyyyyyyyyyyy012 for big endian
+ * _v2nd_qw is the 2nd aligned QW of cur _vaddr to be loaded.
+ *   for example: 0x3456789abcdefzzz for big endian
+ * The permute result is saved in _v_res.
+ *   for example: 0x0123456789abcdef for big endian.
+ */
+#define LD_VSR_CROSS16B(_vaddr,_vmask,_v1st_qw,_v2nd_qw,_v_res) \
+        lvx     _v2nd_qw,_vaddr,off16; \
+        VPERM(_v_res,_v1st_qw,_v2nd_qw,_vmask)
+
+/*
+ * There are 2 categories for memcmp:
+ * 1) src/dst has the same offset to the 8 bytes boundary. The handlers
+ * are named like .Lsameoffset_xxxx
+ * 2) src/dst has different offset to the 8 bytes boundary. The handlers
+ * are named like .Ldiffoffset_xxxx
+ */
+_GLOBAL_TOC(memcmp)
+	cmpdi	cr1,r5,0
+
+	/* Use the short loop if the src/dst addresses are not
+	 * with the same offset of 8 bytes align boundary.
+	 */
+	xor	r6,r3,r4
+	andi.	r6,r6,7
+
+	/* Fall back to short loop if compare at aligned addrs
+	 * with less than 8 bytes.
+	 */
+	cmpdi   cr6,r5,7
+
+	beq	cr1,.Lzero
+	bgt	cr6,.Lno_short
+
+.Lshort:
+	mtctr	r5
+1:	lbz	rA,0(r3)
+	lbz	rB,0(r4)
+	subf.	rC,rB,rA
+	bne	.Lnon_zero
+	bdz	.Lzero
+
+	lbz	rA,1(r3)
+	lbz	rB,1(r4)
+	subf.	rC,rB,rA
+	bne	.Lnon_zero
+	bdz	.Lzero
+
+	lbz	rA,2(r3)
+	lbz	rB,2(r4)
+	subf.	rC,rB,rA
+	bne	.Lnon_zero
+	bdz	.Lzero
+
+	lbz	rA,3(r3)
+	lbz	rB,3(r4)
+	subf.	rC,rB,rA
+	bne	.Lnon_zero
+
+	addi	r3,r3,4
+	addi	r4,r4,4
+
+	bdnz	1b
+
+.Lzero:
+	li	r3,0
+	blr
+
+.Lno_short:
+	dcbt	0,r3
+	dcbt	0,r4
+	bne	.Ldiffoffset_8bytes_make_align_start
+
+
+.Lsameoffset_8bytes_make_align_start:
+	/* attempt to compare bytes not aligned with 8 bytes so that
+	 * rest comparison can run based on 8 bytes alignment.
+	 */
+	andi.   r6,r3,7
+
+	/* Try to compare the first double word which is not 8 bytes aligned:
+	 * load the first double word at (src & ~7UL) and shift left appropriate
+	 * bits before comparision.
+	 */
+	rlwinm  r6,r3,3,26,28
+	beq     .Lsameoffset_8bytes_aligned
+	clrrdi	r3,r3,3
+	clrrdi	r4,r4,3
+	LD	rA,0,r3
+	LD	rB,0,r4
+	sld	rA,rA,r6
+	sld	rB,rB,r6
+	cmpld	cr0,rA,rB
+	srwi	r6,r6,3
+	bne	cr0,.LcmpAB_lightweight
+	subfic  r6,r6,8
+	subf.	r5,r6,r5
+	addi	r3,r3,8
+	addi	r4,r4,8
+	beq	.Lzero
+
+.Lsameoffset_8bytes_aligned:
+	/* now we are aligned with 8 bytes.
+	 * Use .Llong loop if left cmp bytes are equal or greater than 32B.
+	 */
+	cmpdi   cr6,r5,31
+	bgt	cr6,.Llong
+
+.Lcmp_lt32bytes:
+	/* compare 1 ~ 31 bytes, at least r3 addr is 8 bytes aligned now */
+	cmpdi   cr5,r5,7
+	srdi    r0,r5,3
+	ble	cr5,.Lcmp_rest_lt8bytes
+
+	/* handle 8 ~ 31 bytes */
+	clrldi  r5,r5,61
+	mtctr   r0
+2:
+	LD	rA,0,r3
+	LD	rB,0,r4
+	cmpld	cr0,rA,rB
+	addi	r3,r3,8
+	addi	r4,r4,8
+	bne	cr0,.LcmpAB_lightweight
+	bdnz	2b
+
+	cmpwi   r5,0
+	beq	.Lzero
+
+.Lcmp_rest_lt8bytes:
+	/*
+	 * Here we have less than 8 bytes to compare. At least s1 is aligned to
+	 * 8 bytes, but s2 may not be. We must make sure s2 + 7 doesn't cross a
+	 * page boundary, otherwise we might read past the end of the buffer and
+	 * trigger a page fault. We use 4K as the conservative minimum page
+	 * size. If we detect that case we go to the byte-by-byte loop.
+	 *
+	 * Otherwise the next double word is loaded from s1 and s2, and shifted
+	 * right to compare the appropriate bits.
+	 */
+	clrldi	r6,r4,(64-12)	// r6 = r4 & 0xfff
+	cmpdi	r6,0xff8
+	bgt	.Lshort
+
+	subfic  r6,r5,8
+	slwi	r6,r6,3
+	LD	rA,0,r3
+	LD	rB,0,r4
+	srd	rA,rA,r6
+	srd	rB,rB,r6
+	cmpld	cr0,rA,rB
+	bne	cr0,.LcmpAB_lightweight
+	b	.Lzero
+
+.Lnon_zero:
+	mr	r3,rC
+	blr
+
+.Llong:
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+	/* Try to use vmx loop if length is equal or greater than 4K */
+	cmpldi  cr6,r5,VMX_THRESH
+	bge	cr6,.Lsameoffset_vmx_cmp
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+.Llong_novmx_cmp:
+#endif
+	/* At least s1 addr is aligned with 8 bytes */
+	li	off8,8
+	li	off16,16
+	li	off24,24
+
+	std	r31,-8(r1)
+	std	r30,-16(r1)
+	std	r29,-24(r1)
+	std	r28,-32(r1)
+	std	r27,-40(r1)
+
+	srdi	r0,r5,5
+	mtctr	r0
+	andi.	r5,r5,31
+
+	LD	rA,0,r3
+	LD	rB,0,r4
+
+	LD	rC,off8,r3
+	LD	rD,off8,r4
+
+	LD	rE,off16,r3
+	LD	rF,off16,r4
+
+	LD	rG,off24,r3
+	LD	rH,off24,r4
+	cmpld	cr0,rA,rB
+
+	addi	r3,r3,32
+	addi	r4,r4,32
+
+	bdz	.Lfirst32
+
+	LD	rA,0,r3
+	LD	rB,0,r4
+	cmpld	cr1,rC,rD
+
+	LD	rC,off8,r3
+	LD	rD,off8,r4
+	cmpld	cr6,rE,rF
+
+	LD	rE,off16,r3
+	LD	rF,off16,r4
+	cmpld	cr7,rG,rH
+	bne	cr0,.LcmpAB
+
+	LD	rG,off24,r3
+	LD	rH,off24,r4
+	cmpld	cr0,rA,rB
+	bne	cr1,.LcmpCD
+
+	addi	r3,r3,32
+	addi	r4,r4,32
+
+	bdz	.Lsecond32
+
+	.balign	16
+
+1:	LD	rA,0,r3
+	LD	rB,0,r4
+	cmpld	cr1,rC,rD
+	bne	cr6,.LcmpEF
+
+	LD	rC,off8,r3
+	LD	rD,off8,r4
+	cmpld	cr6,rE,rF
+	bne	cr7,.LcmpGH
+
+	LD	rE,off16,r3
+	LD	rF,off16,r4
+	cmpld	cr7,rG,rH
+	bne	cr0,.LcmpAB
+
+	LD	rG,off24,r3
+	LD	rH,off24,r4
+	cmpld	cr0,rA,rB
+	bne	cr1,.LcmpCD
+
+	addi	r3,r3,32
+	addi	r4,r4,32
+
+	bdnz	1b
+
+.Lsecond32:
+	cmpld	cr1,rC,rD
+	bne	cr6,.LcmpEF
+
+	cmpld	cr6,rE,rF
+	bne	cr7,.LcmpGH
+
+	cmpld	cr7,rG,rH
+	bne	cr0,.LcmpAB
+
+	bne	cr1,.LcmpCD
+	bne	cr6,.LcmpEF
+	bne	cr7,.LcmpGH
+
+.Ltail:
+	ld	r31,-8(r1)
+	ld	r30,-16(r1)
+	ld	r29,-24(r1)
+	ld	r28,-32(r1)
+	ld	r27,-40(r1)
+
+	cmpdi	r5,0
+	beq	.Lzero
+	b	.Lshort
+
+.Lfirst32:
+	cmpld	cr1,rC,rD
+	cmpld	cr6,rE,rF
+	cmpld	cr7,rG,rH
+
+	bne	cr0,.LcmpAB
+	bne	cr1,.LcmpCD
+	bne	cr6,.LcmpEF
+	bne	cr7,.LcmpGH
+
+	b	.Ltail
+
+.LcmpAB:
+	li	r3,1
+	bgt	cr0,.Lout
+	li	r3,-1
+	b	.Lout
+
+.LcmpCD:
+	li	r3,1
+	bgt	cr1,.Lout
+	li	r3,-1
+	b	.Lout
+
+.LcmpEF:
+	li	r3,1
+	bgt	cr6,.Lout
+	li	r3,-1
+	b	.Lout
+
+.LcmpGH:
+	li	r3,1
+	bgt	cr7,.Lout
+	li	r3,-1
+
+.Lout:
+	ld	r31,-8(r1)
+	ld	r30,-16(r1)
+	ld	r29,-24(r1)
+	ld	r28,-32(r1)
+	ld	r27,-40(r1)
+	blr
+
+.LcmpAB_lightweight:   /* skip NV GPRS restore */
+	li	r3,1
+	bgtlr
+	li	r3,-1
+	blr
+
+#ifdef CONFIG_ALTIVEC
+.Lsameoffset_vmx_cmp:
+	/* Enter with src/dst addrs has the same offset with 8 bytes
+	 * align boundary.
+	 *
+	 * There is an optimization based on following fact: memcmp()
+	 * prones to fail early at the first 32 bytes.
+	 * Before applying VMX instructions which will lead to 32x128bits
+	 * VMX regs load/restore penalty, we compare the first 32 bytes
+	 * so that we can catch the ~80% fail cases.
+	 */
+
+	li	r0,4
+	mtctr	r0
+.Lsameoffset_prechk_32B_loop:
+	LD	rA,0,r3
+	LD	rB,0,r4
+	cmpld	cr0,rA,rB
+	addi	r3,r3,8
+	addi	r4,r4,8
+	bne     cr0,.LcmpAB_lightweight
+	addi	r5,r5,-8
+	bdnz	.Lsameoffset_prechk_32B_loop
+
+	ENTER_VMX_OPS
+	beq     cr1,.Llong_novmx_cmp
+
+3:
+	/* need to check whether r4 has the same offset with r3
+	 * for 16 bytes boundary.
+	 */
+	xor	r0,r3,r4
+	andi.	r0,r0,0xf
+	bne	.Ldiffoffset_vmx_cmp_start
+
+	/* len is no less than 4KB. Need to align with 16 bytes further.
+	 */
+	andi.	rA,r3,8
+	LD	rA,0,r3
+	beq	4f
+	LD	rB,0,r4
+	cmpld	cr0,rA,rB
+	addi	r3,r3,8
+	addi	r4,r4,8
+	addi	r5,r5,-8
+
+	beq	cr0,4f
+	/* save and restore cr0 */
+	mfocrf  r5,128
+	EXIT_VMX_OPS
+	mtocrf  128,r5
+	b	.LcmpAB_lightweight
+
+4:
+	/* compare 32 bytes for each loop */
+	srdi	r0,r5,5
+	mtctr	r0
+	clrldi  r5,r5,59
+	li	off16,16
+
+.balign 16
+5:
+	lvx 	v0,0,r3
+	lvx 	v1,0,r4
+	VCMPEQUD_RC(v0,v0,v1)
+	bnl	cr6,7f
+	lvx 	v0,off16,r3
+	lvx 	v1,off16,r4
+	VCMPEQUD_RC(v0,v0,v1)
+	bnl	cr6,6f
+	addi	r3,r3,32
+	addi	r4,r4,32
+	bdnz	5b
+
+	EXIT_VMX_OPS
+	cmpdi	r5,0
+	beq	.Lzero
+	b	.Lcmp_lt32bytes
+
+6:
+	addi	r3,r3,16
+	addi	r4,r4,16
+
+7:
+	/* diff the last 16 bytes */
+	EXIT_VMX_OPS
+	LD	rA,0,r3
+	LD	rB,0,r4
+	cmpld	cr0,rA,rB
+	li	off8,8
+	bne	cr0,.LcmpAB_lightweight
+
+	LD	rA,off8,r3
+	LD	rB,off8,r4
+	cmpld	cr0,rA,rB
+	bne	cr0,.LcmpAB_lightweight
+	b	.Lzero
+#endif
+
+.Ldiffoffset_8bytes_make_align_start:
+	/* now try to align s1 with 8 bytes */
+	rlwinm  r6,r3,3,26,28
+	beq     .Ldiffoffset_align_s1_8bytes
+
+	clrrdi	r3,r3,3
+	LD	rA,0,r3
+	LD	rB,0,r4  /* unaligned load */
+	sld	rA,rA,r6
+	srd	rA,rA,r6
+	srd	rB,rB,r6
+	cmpld	cr0,rA,rB
+	srwi	r6,r6,3
+	bne	cr0,.LcmpAB_lightweight
+
+	subfic  r6,r6,8
+	subf.	r5,r6,r5
+	addi	r3,r3,8
+	add	r4,r4,r6
+
+	beq	.Lzero
+
+.Ldiffoffset_align_s1_8bytes:
+	/* now s1 is aligned with 8 bytes. */
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+	/* only do vmx ops when the size equal or greater than 4K bytes */
+	cmpdi	cr5,r5,VMX_THRESH
+	bge	cr5,.Ldiffoffset_vmx_cmp
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+.Ldiffoffset_novmx_cmp:
+#endif
+
+
+	cmpdi   cr5,r5,31
+	ble	cr5,.Lcmp_lt32bytes
+
+#ifdef CONFIG_ALTIVEC
+	b	.Llong_novmx_cmp
+#else
+	b	.Llong
+#endif
+
+#ifdef CONFIG_ALTIVEC
+.Ldiffoffset_vmx_cmp:
+	/* perform a 32 bytes pre-checking before
+	 * enable VMX operations.
+	 */
+	li	r0,4
+	mtctr	r0
+.Ldiffoffset_prechk_32B_loop:
+	LD	rA,0,r3
+	LD	rB,0,r4
+	cmpld	cr0,rA,rB
+	addi	r3,r3,8
+	addi	r4,r4,8
+	bne     cr0,.LcmpAB_lightweight
+	addi	r5,r5,-8
+	bdnz	.Ldiffoffset_prechk_32B_loop
+
+	ENTER_VMX_OPS
+	beq     cr1,.Ldiffoffset_novmx_cmp
+
+.Ldiffoffset_vmx_cmp_start:
+	/* Firstly try to align r3 with 16 bytes */
+	andi.   r6,r3,0xf
+	li	off16,16
+	beq     .Ldiffoffset_vmx_s1_16bytes_align
+
+	LVS	v3,0,r3
+	LVS	v4,0,r4
+
+	lvx     v5,0,r3
+	lvx     v6,0,r4
+	LD_VSR_CROSS16B(r3,v3,v5,v7,v9)
+	LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
+
+	VCMPEQUB_RC(v7,v9,v10)
+	bnl	cr6,.Ldiffoffset_vmx_diff_found
+
+	subfic  r6,r6,16
+	subf    r5,r6,r5
+	add     r3,r3,r6
+	add     r4,r4,r6
+
+.Ldiffoffset_vmx_s1_16bytes_align:
+	/* now s1 is aligned with 16 bytes */
+	lvx     v6,0,r4
+	LVS	v4,0,r4
+	srdi	r6,r5,5  /* loop for 32 bytes each */
+	clrldi  r5,r5,59
+	mtctr	r6
+
+.balign	16
+.Ldiffoffset_vmx_32bytesloop:
+	/* the first qw of r4 was saved in v6 */
+	lvx	v9,0,r3
+	LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
+	VCMPEQUB_RC(v7,v9,v10)
+	vor	v6,v8,v8
+	bnl	cr6,.Ldiffoffset_vmx_diff_found
+
+	addi	r3,r3,16
+	addi	r4,r4,16
+
+	lvx	v9,0,r3
+	LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
+	VCMPEQUB_RC(v7,v9,v10)
+	vor	v6,v8,v8
+	bnl	cr6,.Ldiffoffset_vmx_diff_found
+
+	addi	r3,r3,16
+	addi	r4,r4,16
+
+	bdnz	.Ldiffoffset_vmx_32bytesloop
+
+	EXIT_VMX_OPS
+
+	cmpdi	r5,0
+	beq	.Lzero
+	b	.Lcmp_lt32bytes
+
+.Ldiffoffset_vmx_diff_found:
+	EXIT_VMX_OPS
+	/* anyway, the diff will appear in next 16 bytes */
+	li	r5,16
+	b	.Lcmp_lt32bytes
+
+#endif
+EXPORT_SYMBOL(memcmp)
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
new file mode 100644
index 0000000000..b5a67e2014
--- /dev/null
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -0,0 +1,230 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ */
+#include <linux/export.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+#include <asm/kasan.h>
+
+#ifndef SELFTEST_CASE
+/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */
+#define SELFTEST_CASE	0
+#endif
+
+	.align	7
+_GLOBAL_TOC_KASAN(memcpy)
+BEGIN_FTR_SECTION
+#ifdef __LITTLE_ENDIAN__
+	cmpdi	cr7,r5,0
+#else
+	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* save destination pointer for return value */
+#endif
+FTR_SECTION_ELSE
+#ifdef CONFIG_PPC_BOOK3S_64
+	b	memcpy_power7
+#endif
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+#ifdef __LITTLE_ENDIAN__
+	/* dumb little-endian memcpy that will get replaced at runtime */
+	addi r9,r3,-1
+	addi r4,r4,-1
+	beqlr cr7
+	mtctr r5
+1:	lbzu r10,1(r4)
+	stbu r10,1(r9)
+	bdnz 1b
+	blr
+#else
+	PPC_MTOCRF(0x01,r5)
+	cmpldi	cr1,r5,16
+	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
+	andi.	r6,r6,7
+	dcbt	0,r4
+	blt	cr1,.Lshort_copy
+/* Below we want to nop out the bne if we're on a CPU that has the
+   CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
+   cleared.
+   At the time of writing the only CPU that has this combination of bits
+   set is Power6. */
+test_feature = (SELFTEST_CASE == 1)
+BEGIN_FTR_SECTION
+	nop
+FTR_SECTION_ELSE
+	bne	.Ldst_unaligned
+ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
+                    CPU_FTR_UNALIGNED_LD_STD)
+.Ldst_aligned:
+	addi	r3,r3,-16
+test_feature = (SELFTEST_CASE == 0)
+BEGIN_FTR_SECTION
+	andi.	r0,r4,7
+	bne	.Lsrc_unaligned
+END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
+	srdi	r7,r5,4
+	ld	r9,0(r4)
+	addi	r4,r4,-8
+	mtctr	r7
+	andi.	r5,r5,7
+	bf	cr7*4+0,2f
+	addi	r3,r3,8
+	addi	r4,r4,8
+	mr	r8,r9
+	blt	cr1,3f
+1:	ld	r9,8(r4)
+	std	r8,8(r3)
+2:	ldu	r8,16(r4)
+	stdu	r9,16(r3)
+	bdnz	1b
+3:	std	r8,8(r3)
+	beq	3f
+	addi	r3,r3,16
+.Ldo_tail:
+	bf	cr7*4+1,1f
+	lwz	r9,8(r4)
+	addi	r4,r4,4
+	stw	r9,0(r3)
+	addi	r3,r3,4
+1:	bf	cr7*4+2,2f
+	lhz	r9,8(r4)
+	addi	r4,r4,2
+	sth	r9,0(r3)
+	addi	r3,r3,2
+2:	bf	cr7*4+3,3f
+	lbz	r9,8(r4)
+	stb	r9,0(r3)
+3:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
+	blr
+
+.Lsrc_unaligned:
+	srdi	r6,r5,3
+	addi	r5,r5,-16
+	subf	r4,r0,r4
+	srdi	r7,r5,4
+	sldi	r10,r0,3
+	cmpdi	cr6,r6,3
+	andi.	r5,r5,7
+	mtctr	r7
+	subfic	r11,r10,64
+	add	r5,r5,r0
+
+	bt	cr7*4+0,0f
+
+	ld	r9,0(r4)	# 3+2n loads, 2+2n stores
+	ld	r0,8(r4)
+	sld	r6,r9,r10
+	ldu	r9,16(r4)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	or	r7,r7,r6
+	blt	cr6,4f
+	ld	r0,8(r4)
+	# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
+	b	2f
+
+0:	ld	r0,0(r4)	# 4+2n loads, 3+2n stores
+	ldu	r9,8(r4)
+	sld	r8,r0,r10
+	addi	r3,r3,-8
+	blt	cr6,5f
+	ld	r0,8(r4)
+	srd	r12,r9,r11
+	sld	r6,r9,r10
+	ldu	r9,16(r4)
+	or	r12,r8,r12
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	addi	r3,r3,16
+	beq	cr6,3f
+
+	# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
+1:	or	r7,r7,r6
+	ld	r0,8(r4)
+	std	r12,8(r3)
+2:	srd	r12,r9,r11
+	sld	r6,r9,r10
+	ldu	r9,16(r4)
+	or	r12,r8,r12
+	stdu	r7,16(r3)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	bdnz	1b
+
+3:	std	r12,8(r3)
+	or	r7,r7,r6
+4:	std	r7,16(r3)
+5:	srd	r12,r9,r11
+	or	r12,r8,r12
+	std	r12,24(r3)
+	beq	4f
+	cmpwi	cr1,r5,8
+	addi	r3,r3,32
+	sld	r9,r9,r10
+	ble	cr1,6f
+	ld	r0,8(r4)
+	srd	r7,r0,r11
+	or	r9,r7,r9
+6:
+	bf	cr7*4+1,1f
+	rotldi	r9,r9,32
+	stw	r9,0(r3)
+	addi	r3,r3,4
+1:	bf	cr7*4+2,2f
+	rotldi	r9,r9,16
+	sth	r9,0(r3)
+	addi	r3,r3,2
+2:	bf	cr7*4+3,3f
+	rotldi	r9,r9,8
+	stb	r9,0(r3)
+3:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
+	blr
+
+.Ldst_unaligned:
+	PPC_MTOCRF(0x01,r6)		# put #bytes to 8B bdry into cr7
+	subf	r5,r6,r5
+	li	r7,0
+	cmpldi	cr1,r5,16
+	bf	cr7*4+3,1f
+	lbz	r0,0(r4)
+	stb	r0,0(r3)
+	addi	r7,r7,1
+1:	bf	cr7*4+2,2f
+	lhzx	r0,r7,r4
+	sthx	r0,r7,r3
+	addi	r7,r7,2
+2:	bf	cr7*4+1,3f
+	lwzx	r0,r7,r4
+	stwx	r0,r7,r3
+3:	PPC_MTOCRF(0x01,r5)
+	add	r4,r6,r4
+	add	r3,r6,r3
+	b	.Ldst_aligned
+
+.Lshort_copy:
+	bf	cr7*4+0,1f
+	lwz	r0,0(r4)
+	lwz	r9,4(r4)
+	addi	r4,r4,8
+	stw	r0,0(r3)
+	stw	r9,4(r3)
+	addi	r3,r3,8
+1:	bf	cr7*4+1,2f
+	lwz	r0,0(r4)
+	addi	r4,r4,4
+	stw	r0,0(r3)
+	addi	r3,r3,4
+2:	bf	cr7*4+2,3f
+	lhz	r0,0(r4)
+	addi	r4,r4,2
+	sth	r0,0(r3)
+	addi	r3,r3,2
+3:	bf	cr7*4+3,4f
+	lbz	r0,0(r4)
+	stb	r0,0(r3)
+4:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
+	blr
+#endif
+EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL_KASAN(memcpy)
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
new file mode 100644
index 0000000000..9398b2b746
--- /dev/null
+++ b/arch/powerpc/lib/memcpy_power7.S
@@ -0,0 +1,641 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/ppc_asm.h>
+
+#ifndef SELFTEST_CASE
+/* 0 == don't use VMX, 1 == use VMX */
+#define SELFTEST_CASE	0
+#endif
+
+#ifdef __BIG_ENDIAN__
+#define LVS(VRT,RA,RB)		lvsl	VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRA,VRB,VRC
+#else
+#define LVS(VRT,RA,RB)		lvsr	VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRB,VRA,VRC
+#endif
+
+_GLOBAL(memcpy_power7)
+	cmpldi	r5,16
+	cmpldi	cr1,r5,4096
+	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+	blt	.Lshort_copy
+
+#ifdef CONFIG_ALTIVEC
+test_feature = SELFTEST_CASE
+BEGIN_FTR_SECTION
+	bgt	cr1, .Lvmx_copy
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+
+.Lnonvmx_copy:
+	/* Get the source 8B aligned */
+	neg	r6,r4
+	mtocrf	0x01,r6
+	clrldi	r6,r6,(64-3)
+
+	bf	cr7*4+3,1f
+	lbz	r0,0(r4)
+	addi	r4,r4,1
+	stb	r0,0(r3)
+	addi	r3,r3,1
+
+1:	bf	cr7*4+2,2f
+	lhz	r0,0(r4)
+	addi	r4,r4,2
+	sth	r0,0(r3)
+	addi	r3,r3,2
+
+2:	bf	cr7*4+1,3f
+	lwz	r0,0(r4)
+	addi	r4,r4,4
+	stw	r0,0(r3)
+	addi	r3,r3,4
+
+3:	sub	r5,r5,r6
+	cmpldi	r5,128
+	blt	5f
+
+	mflr	r0
+	stdu	r1,-STACKFRAMESIZE(r1)
+	std	r14,STK_REG(R14)(r1)
+	std	r15,STK_REG(R15)(r1)
+	std	r16,STK_REG(R16)(r1)
+	std	r17,STK_REG(R17)(r1)
+	std	r18,STK_REG(R18)(r1)
+	std	r19,STK_REG(R19)(r1)
+	std	r20,STK_REG(R20)(r1)
+	std	r21,STK_REG(R21)(r1)
+	std	r22,STK_REG(R22)(r1)
+	std	r0,STACKFRAMESIZE+16(r1)
+
+	srdi	r6,r5,7
+	mtctr	r6
+
+	/* Now do cacheline (128B) sized loads and stores. */
+	.align	5
+4:
+	ld	r0,0(r4)
+	ld	r6,8(r4)
+	ld	r7,16(r4)
+	ld	r8,24(r4)
+	ld	r9,32(r4)
+	ld	r10,40(r4)
+	ld	r11,48(r4)
+	ld	r12,56(r4)
+	ld	r14,64(r4)
+	ld	r15,72(r4)
+	ld	r16,80(r4)
+	ld	r17,88(r4)
+	ld	r18,96(r4)
+	ld	r19,104(r4)
+	ld	r20,112(r4)
+	ld	r21,120(r4)
+	addi	r4,r4,128
+	std	r0,0(r3)
+	std	r6,8(r3)
+	std	r7,16(r3)
+	std	r8,24(r3)
+	std	r9,32(r3)
+	std	r10,40(r3)
+	std	r11,48(r3)
+	std	r12,56(r3)
+	std	r14,64(r3)
+	std	r15,72(r3)
+	std	r16,80(r3)
+	std	r17,88(r3)
+	std	r18,96(r3)
+	std	r19,104(r3)
+	std	r20,112(r3)
+	std	r21,120(r3)
+	addi	r3,r3,128
+	bdnz	4b
+
+	clrldi	r5,r5,(64-7)
+
+	ld	r14,STK_REG(R14)(r1)
+	ld	r15,STK_REG(R15)(r1)
+	ld	r16,STK_REG(R16)(r1)
+	ld	r17,STK_REG(R17)(r1)
+	ld	r18,STK_REG(R18)(r1)
+	ld	r19,STK_REG(R19)(r1)
+	ld	r20,STK_REG(R20)(r1)
+	ld	r21,STK_REG(R21)(r1)
+	ld	r22,STK_REG(R22)(r1)
+	addi	r1,r1,STACKFRAMESIZE
+
+	/* Up to 127B to go */
+5:	srdi	r6,r5,4
+	mtocrf	0x01,r6
+
+6:	bf	cr7*4+1,7f
+	ld	r0,0(r4)
+	ld	r6,8(r4)
+	ld	r7,16(r4)
+	ld	r8,24(r4)
+	ld	r9,32(r4)
+	ld	r10,40(r4)
+	ld	r11,48(r4)
+	ld	r12,56(r4)
+	addi	r4,r4,64
+	std	r0,0(r3)
+	std	r6,8(r3)
+	std	r7,16(r3)
+	std	r8,24(r3)
+	std	r9,32(r3)
+	std	r10,40(r3)
+	std	r11,48(r3)
+	std	r12,56(r3)
+	addi	r3,r3,64
+
+	/* Up to 63B to go */
+7:	bf	cr7*4+2,8f
+	ld	r0,0(r4)
+	ld	r6,8(r4)
+	ld	r7,16(r4)
+	ld	r8,24(r4)
+	addi	r4,r4,32
+	std	r0,0(r3)
+	std	r6,8(r3)
+	std	r7,16(r3)
+	std	r8,24(r3)
+	addi	r3,r3,32
+
+	/* Up to 31B to go */
+8:	bf	cr7*4+3,9f
+	ld	r0,0(r4)
+	ld	r6,8(r4)
+	addi	r4,r4,16
+	std	r0,0(r3)
+	std	r6,8(r3)
+	addi	r3,r3,16
+
+9:	clrldi	r5,r5,(64-4)
+
+	/* Up to 15B to go */
+.Lshort_copy:
+	mtocrf	0x01,r5
+	bf	cr7*4+0,12f
+	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
+	lwz	r6,4(r4)
+	addi	r4,r4,8
+	stw	r0,0(r3)
+	stw	r6,4(r3)
+	addi	r3,r3,8
+
+12:	bf	cr7*4+1,13f
+	lwz	r0,0(r4)
+	addi	r4,r4,4
+	stw	r0,0(r3)
+	addi	r3,r3,4
+
+13:	bf	cr7*4+2,14f
+	lhz	r0,0(r4)
+	addi	r4,r4,2
+	sth	r0,0(r3)
+	addi	r3,r3,2
+
+14:	bf	cr7*4+3,15f
+	lbz	r0,0(r4)
+	stb	r0,0(r3)
+
+15:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+	blr
+
+.Lunwind_stack_nonvmx_copy:
+	addi	r1,r1,STACKFRAMESIZE
+	b	.Lnonvmx_copy
+
+.Lvmx_copy:
+#ifdef CONFIG_ALTIVEC
+	mflr	r0
+	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+	std	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
+	std	r0,16(r1)
+	stdu	r1,-STACKFRAMESIZE(r1)
+	bl	CFUNC(enter_vmx_ops)
+	cmpwi	cr1,r3,0
+	ld	r0,STACKFRAMESIZE+16(r1)
+	ld	r3,STK_REG(R31)(r1)
+	ld	r4,STK_REG(R30)(r1)
+	ld	r5,STK_REG(R29)(r1)
+	mtlr	r0
+
+	/*
+	 * We prefetch both the source and destination using enhanced touch
+	 * instructions. We use a stream ID of 0 for the load side and
+	 * 1 for the store side.
+	 */
+	clrrdi	r6,r4,7
+	clrrdi	r9,r3,7
+	ori	r9,r9,1		/* stream=1 */
+
+	srdi	r7,r5,7		/* length in cachelines, capped at 0x3FF */
+	cmpldi	r7,0x3FF
+	ble	1f
+	li	r7,0x3FF
+1:	lis	r0,0x0E00	/* depth=7 */
+	sldi	r7,r7,7
+	or	r7,r7,r0
+	ori	r10,r7,1	/* stream=1 */
+
+	lis	r8,0x8000	/* GO=1 */
+	clrldi	r8,r8,32
+
+	dcbt	0,r6,0b01000
+	dcbt	0,r7,0b01010
+	dcbtst	0,r9,0b01000
+	dcbtst	0,r10,0b01010
+	eieio
+	dcbt	0,r8,0b01010	/* GO */
+
+	beq	cr1,.Lunwind_stack_nonvmx_copy
+
+	/*
+	 * If source and destination are not relatively aligned we use a
+	 * slower permute loop.
+	 */
+	xor	r6,r4,r3
+	rldicl.	r6,r6,0,(64-4)
+	bne	.Lvmx_unaligned_copy
+
+	/* Get the destination 16B aligned */
+	neg	r6,r3
+	mtocrf	0x01,r6
+	clrldi	r6,r6,(64-4)
+
+	bf	cr7*4+3,1f
+	lbz	r0,0(r4)
+	addi	r4,r4,1
+	stb	r0,0(r3)
+	addi	r3,r3,1
+
+1:	bf	cr7*4+2,2f
+	lhz	r0,0(r4)
+	addi	r4,r4,2
+	sth	r0,0(r3)
+	addi	r3,r3,2
+
+2:	bf	cr7*4+1,3f
+	lwz	r0,0(r4)
+	addi	r4,r4,4
+	stw	r0,0(r3)
+	addi	r3,r3,4
+
+3:	bf	cr7*4+0,4f
+	ld	r0,0(r4)
+	addi	r4,r4,8
+	std	r0,0(r3)
+	addi	r3,r3,8
+
+4:	sub	r5,r5,r6
+
+	/* Get the desination 128B aligned */
+	neg	r6,r3
+	srdi	r7,r6,4
+	mtocrf	0x01,r7
+	clrldi	r6,r6,(64-7)
+
+	li	r9,16
+	li	r10,32
+	li	r11,48
+
+	bf	cr7*4+3,5f
+	lvx	v1,0,r4
+	addi	r4,r4,16
+	stvx	v1,0,r3
+	addi	r3,r3,16
+
+5:	bf	cr7*4+2,6f
+	lvx	v1,0,r4
+	lvx	v0,r4,r9
+	addi	r4,r4,32
+	stvx	v1,0,r3
+	stvx	v0,r3,r9
+	addi	r3,r3,32
+
+6:	bf	cr7*4+1,7f
+	lvx	v3,0,r4
+	lvx	v2,r4,r9
+	lvx	v1,r4,r10
+	lvx	v0,r4,r11
+	addi	r4,r4,64
+	stvx	v3,0,r3
+	stvx	v2,r3,r9
+	stvx	v1,r3,r10
+	stvx	v0,r3,r11
+	addi	r3,r3,64
+
+7:	sub	r5,r5,r6
+	srdi	r6,r5,7
+
+	std	r14,STK_REG(R14)(r1)
+	std	r15,STK_REG(R15)(r1)
+	std	r16,STK_REG(R16)(r1)
+
+	li	r12,64
+	li	r14,80
+	li	r15,96
+	li	r16,112
+
+	mtctr	r6
+
+	/*
+	 * Now do cacheline sized loads and stores. By this stage the
+	 * cacheline stores are also cacheline aligned.
+	 */
+	.align	5
+8:
+	lvx	v7,0,r4
+	lvx	v6,r4,r9
+	lvx	v5,r4,r10
+	lvx	v4,r4,r11
+	lvx	v3,r4,r12
+	lvx	v2,r4,r14
+	lvx	v1,r4,r15
+	lvx	v0,r4,r16
+	addi	r4,r4,128
+	stvx	v7,0,r3
+	stvx	v6,r3,r9
+	stvx	v5,r3,r10
+	stvx	v4,r3,r11
+	stvx	v3,r3,r12
+	stvx	v2,r3,r14
+	stvx	v1,r3,r15
+	stvx	v0,r3,r16
+	addi	r3,r3,128
+	bdnz	8b
+
+	ld	r14,STK_REG(R14)(r1)
+	ld	r15,STK_REG(R15)(r1)
+	ld	r16,STK_REG(R16)(r1)
+
+	/* Up to 127B to go */
+	clrldi	r5,r5,(64-7)
+	srdi	r6,r5,4
+	mtocrf	0x01,r6
+
+	bf	cr7*4+1,9f
+	lvx	v3,0,r4
+	lvx	v2,r4,r9
+	lvx	v1,r4,r10
+	lvx	v0,r4,r11
+	addi	r4,r4,64
+	stvx	v3,0,r3
+	stvx	v2,r3,r9
+	stvx	v1,r3,r10
+	stvx	v0,r3,r11
+	addi	r3,r3,64
+
+9:	bf	cr7*4+2,10f
+	lvx	v1,0,r4
+	lvx	v0,r4,r9
+	addi	r4,r4,32
+	stvx	v1,0,r3
+	stvx	v0,r3,r9
+	addi	r3,r3,32
+
+10:	bf	cr7*4+3,11f
+	lvx	v1,0,r4
+	addi	r4,r4,16
+	stvx	v1,0,r3
+	addi	r3,r3,16
+
+	/* Up to 15B to go */
+11:	clrldi	r5,r5,(64-4)
+	mtocrf	0x01,r5
+	bf	cr7*4+0,12f
+	ld	r0,0(r4)
+	addi	r4,r4,8
+	std	r0,0(r3)
+	addi	r3,r3,8
+
+12:	bf	cr7*4+1,13f
+	lwz	r0,0(r4)
+	addi	r4,r4,4
+	stw	r0,0(r3)
+	addi	r3,r3,4
+
+13:	bf	cr7*4+2,14f
+	lhz	r0,0(r4)
+	addi	r4,r4,2
+	sth	r0,0(r3)
+	addi	r3,r3,2
+
+14:	bf	cr7*4+3,15f
+	lbz	r0,0(r4)
+	stb	r0,0(r3)
+
+15:	addi	r1,r1,STACKFRAMESIZE
+	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+	b	CFUNC(exit_vmx_ops)		/* tail call optimise */
+
+.Lvmx_unaligned_copy:
+	/* Get the destination 16B aligned */
+	neg	r6,r3
+	mtocrf	0x01,r6
+	clrldi	r6,r6,(64-4)
+
+	bf	cr7*4+3,1f
+	lbz	r0,0(r4)
+	addi	r4,r4,1
+	stb	r0,0(r3)
+	addi	r3,r3,1
+
+1:	bf	cr7*4+2,2f
+	lhz	r0,0(r4)
+	addi	r4,r4,2
+	sth	r0,0(r3)
+	addi	r3,r3,2
+
+2:	bf	cr7*4+1,3f
+	lwz	r0,0(r4)
+	addi	r4,r4,4
+	stw	r0,0(r3)
+	addi	r3,r3,4
+
+3:	bf	cr7*4+0,4f
+	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
+	lwz	r7,4(r4)
+	addi	r4,r4,8
+	stw	r0,0(r3)
+	stw	r7,4(r3)
+	addi	r3,r3,8
+
+4:	sub	r5,r5,r6
+
+	/* Get the desination 128B aligned */
+	neg	r6,r3
+	srdi	r7,r6,4
+	mtocrf	0x01,r7
+	clrldi	r6,r6,(64-7)
+
+	li	r9,16
+	li	r10,32
+	li	r11,48
+
+	LVS(v16,0,r4)		/* Setup permute control vector */
+	lvx	v0,0,r4
+	addi	r4,r4,16
+
+	bf	cr7*4+3,5f
+	lvx	v1,0,r4
+	VPERM(v8,v0,v1,v16)
+	addi	r4,r4,16
+	stvx	v8,0,r3
+	addi	r3,r3,16
+	vor	v0,v1,v1
+
+5:	bf	cr7*4+2,6f
+	lvx	v1,0,r4
+	VPERM(v8,v0,v1,v16)
+	lvx	v0,r4,r9
+	VPERM(v9,v1,v0,v16)
+	addi	r4,r4,32
+	stvx	v8,0,r3
+	stvx	v9,r3,r9
+	addi	r3,r3,32
+
+6:	bf	cr7*4+1,7f
+	lvx	v3,0,r4
+	VPERM(v8,v0,v3,v16)
+	lvx	v2,r4,r9
+	VPERM(v9,v3,v2,v16)
+	lvx	v1,r4,r10
+	VPERM(v10,v2,v1,v16)
+	lvx	v0,r4,r11
+	VPERM(v11,v1,v0,v16)
+	addi	r4,r4,64
+	stvx	v8,0,r3
+	stvx	v9,r3,r9
+	stvx	v10,r3,r10
+	stvx	v11,r3,r11
+	addi	r3,r3,64
+
+7:	sub	r5,r5,r6
+	srdi	r6,r5,7
+
+	std	r14,STK_REG(R14)(r1)
+	std	r15,STK_REG(R15)(r1)
+	std	r16,STK_REG(R16)(r1)
+
+	li	r12,64
+	li	r14,80
+	li	r15,96
+	li	r16,112
+
+	mtctr	r6
+
+	/*
+	 * Now do cacheline sized loads and stores. By this stage the
+	 * cacheline stores are also cacheline aligned.
+	 */
+	.align	5
+8:
+	lvx	v7,0,r4
+	VPERM(v8,v0,v7,v16)
+	lvx	v6,r4,r9
+	VPERM(v9,v7,v6,v16)
+	lvx	v5,r4,r10
+	VPERM(v10,v6,v5,v16)
+	lvx	v4,r4,r11
+	VPERM(v11,v5,v4,v16)
+	lvx	v3,r4,r12
+	VPERM(v12,v4,v3,v16)
+	lvx	v2,r4,r14
+	VPERM(v13,v3,v2,v16)
+	lvx	v1,r4,r15
+	VPERM(v14,v2,v1,v16)
+	lvx	v0,r4,r16
+	VPERM(v15,v1,v0,v16)
+	addi	r4,r4,128
+	stvx	v8,0,r3
+	stvx	v9,r3,r9
+	stvx	v10,r3,r10
+	stvx	v11,r3,r11
+	stvx	v12,r3,r12
+	stvx	v13,r3,r14
+	stvx	v14,r3,r15
+	stvx	v15,r3,r16
+	addi	r3,r3,128
+	bdnz	8b
+
+	ld	r14,STK_REG(R14)(r1)
+	ld	r15,STK_REG(R15)(r1)
+	ld	r16,STK_REG(R16)(r1)
+
+	/* Up to 127B to go */
+	clrldi	r5,r5,(64-7)
+	srdi	r6,r5,4
+	mtocrf	0x01,r6
+
+	bf	cr7*4+1,9f
+	lvx	v3,0,r4
+	VPERM(v8,v0,v3,v16)
+	lvx	v2,r4,r9
+	VPERM(v9,v3,v2,v16)
+	lvx	v1,r4,r10
+	VPERM(v10,v2,v1,v16)
+	lvx	v0,r4,r11
+	VPERM(v11,v1,v0,v16)
+	addi	r4,r4,64
+	stvx	v8,0,r3
+	stvx	v9,r3,r9
+	stvx	v10,r3,r10
+	stvx	v11,r3,r11
+	addi	r3,r3,64
+
+9:	bf	cr7*4+2,10f
+	lvx	v1,0,r4
+	VPERM(v8,v0,v1,v16)
+	lvx	v0,r4,r9
+	VPERM(v9,v1,v0,v16)
+	addi	r4,r4,32
+	stvx	v8,0,r3
+	stvx	v9,r3,r9
+	addi	r3,r3,32
+
+10:	bf	cr7*4+3,11f
+	lvx	v1,0,r4
+	VPERM(v8,v0,v1,v16)
+	addi	r4,r4,16
+	stvx	v8,0,r3
+	addi	r3,r3,16
+
+	/* Up to 15B to go */
+11:	clrldi	r5,r5,(64-4)
+	addi	r4,r4,-16	/* Unwind the +16 load offset */
+	mtocrf	0x01,r5
+	bf	cr7*4+0,12f
+	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
+	lwz	r6,4(r4)
+	addi	r4,r4,8
+	stw	r0,0(r3)
+	stw	r6,4(r3)
+	addi	r3,r3,8
+
+12:	bf	cr7*4+1,13f
+	lwz	r0,0(r4)
+	addi	r4,r4,4
+	stw	r0,0(r3)
+	addi	r3,r3,4
+
+13:	bf	cr7*4+2,14f
+	lhz	r0,0(r4)
+	addi	r4,r4,2
+	sth	r0,0(r3)
+	addi	r3,r3,2
+
+14:	bf	cr7*4+3,15f
+	lbz	r0,0(r4)
+	stb	r0,0(r3)
+
+15:	addi	r1,r1,STACKFRAMESIZE
+	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+	b	CFUNC(exit_vmx_ops)		/* tail call optimise */
+#endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
new file mode 100644
index 0000000000..4e724c4c01
--- /dev/null
+++ b/arch/powerpc/lib/pmem.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright(c) 2017 IBM Corporation. All rights reserved.
+ */
+
+#include <linux/string.h>
+#include <linux/export.h>
+#include <linux/uaccess.h>
+#include <linux/libnvdimm.h>
+
+#include <asm/cacheflush.h>
+
+static inline void __clean_pmem_range(unsigned long start, unsigned long stop)
+{
+	unsigned long shift = l1_dcache_shift();
+	unsigned long bytes = l1_dcache_bytes();
+	void *addr = (void *)(start & ~(bytes - 1));
+	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+	unsigned long i;
+
+	for (i = 0; i < size >> shift; i++, addr += bytes)
+		asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory");
+}
+
+static inline void __flush_pmem_range(unsigned long start, unsigned long stop)
+{
+	unsigned long shift = l1_dcache_shift();
+	unsigned long bytes = l1_dcache_bytes();
+	void *addr = (void *)(start & ~(bytes - 1));
+	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+	unsigned long i;
+
+	for (i = 0; i < size >> shift; i++, addr += bytes)
+		asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory");
+}
+
+static inline void clean_pmem_range(unsigned long start, unsigned long stop)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		return __clean_pmem_range(start, stop);
+}
+
+static inline void flush_pmem_range(unsigned long start, unsigned long stop)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		return __flush_pmem_range(start, stop);
+}
+
+/*
+ * CONFIG_ARCH_HAS_PMEM_API symbols
+ */
+void arch_wb_cache_pmem(void *addr, size_t size)
+{
+	unsigned long start = (unsigned long) addr;
+	clean_pmem_range(start, start + size);
+}
+EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
+
+void arch_invalidate_pmem(void *addr, size_t size)
+{
+	unsigned long start = (unsigned long) addr;
+	flush_pmem_range(start, start + size);
+}
+EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
+
+/*
+ * CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE symbols
+ */
+long __copy_from_user_flushcache(void *dest, const void __user *src,
+		unsigned size)
+{
+	unsigned long copied, start = (unsigned long) dest;
+
+	copied = __copy_from_user(dest, src, size);
+	clean_pmem_range(start, start + size);
+
+	return copied;
+}
+
+void memcpy_flushcache(void *dest, const void *src, size_t size)
+{
+	unsigned long start = (unsigned long) dest;
+
+	memcpy(dest, src, size);
+	clean_pmem_range(start, start + size);
+}
+EXPORT_SYMBOL(memcpy_flushcache);
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
new file mode 100644
index 0000000000..6dd2f46bd3
--- /dev/null
+++ b/arch/powerpc/lib/qspinlock.c
@@ -0,0 +1,1007 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+#include <linux/percpu.h>
+#include <linux/processor.h>
+#include <linux/smp.h>
+#include <linux/topology.h>
+#include <linux/sched/clock.h>
+#include <asm/qspinlock.h>
+#include <asm/paravirt.h>
+
+#define MAX_NODES	4
+
+struct qnode {
+	struct qnode	*next;
+	struct qspinlock *lock;
+	int		cpu;
+	int		yield_cpu;
+	u8		locked; /* 1 if lock acquired */
+};
+
+struct qnodes {
+	int		count;
+	struct qnode nodes[MAX_NODES];
+};
+
+/* Tuning parameters */
+static int steal_spins __read_mostly = (1 << 5);
+static int remote_steal_spins __read_mostly = (1 << 2);
+#if _Q_SPIN_TRY_LOCK_STEAL == 1
+static const bool maybe_stealers = true;
+#else
+static bool maybe_stealers __read_mostly = true;
+#endif
+static int head_spins __read_mostly = (1 << 8);
+
+static bool pv_yield_owner __read_mostly = true;
+static bool pv_yield_allow_steal __read_mostly = false;
+static bool pv_spin_on_preempted_owner __read_mostly = false;
+static bool pv_sleepy_lock __read_mostly = true;
+static bool pv_sleepy_lock_sticky __read_mostly = false;
+static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
+static int pv_sleepy_lock_factor __read_mostly = 256;
+static bool pv_yield_prev __read_mostly = true;
+static bool pv_yield_propagate_owner __read_mostly = true;
+static bool pv_prod_head __read_mostly = false;
+
+static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
+static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
+
+#if _Q_SPIN_SPEC_BARRIER == 1
+#define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0)
+#else
+#define spec_barrier() do { } while (0)
+#endif
+
+static __always_inline bool recently_sleepy(void)
+{
+	/* pv_sleepy_lock is true when this is called */
+	if (pv_sleepy_lock_interval_ns) {
+		u64 seen = this_cpu_read(sleepy_lock_seen_clock);
+
+		if (seen) {
+			u64 delta = sched_clock() - seen;
+			if (delta < pv_sleepy_lock_interval_ns)
+				return true;
+			this_cpu_write(sleepy_lock_seen_clock, 0);
+		}
+	}
+
+	return false;
+}
+
+static __always_inline int get_steal_spins(bool paravirt, bool sleepy)
+{
+	if (paravirt && sleepy)
+		return steal_spins * pv_sleepy_lock_factor;
+	else
+		return steal_spins;
+}
+
+static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy)
+{
+	if (paravirt && sleepy)
+		return remote_steal_spins * pv_sleepy_lock_factor;
+	else
+		return remote_steal_spins;
+}
+
+static __always_inline int get_head_spins(bool paravirt, bool sleepy)
+{
+	if (paravirt && sleepy)
+		return head_spins * pv_sleepy_lock_factor;
+	else
+		return head_spins;
+}
+
+static inline u32 encode_tail_cpu(int cpu)
+{
+	return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
+}
+
+static inline int decode_tail_cpu(u32 val)
+{
+	return (val >> _Q_TAIL_CPU_OFFSET) - 1;
+}
+
+static inline int get_owner_cpu(u32 val)
+{
+	return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET;
+}
+
+/*
+ * Try to acquire the lock if it was not already locked. If the tail matches
+ * mytail then clear it, otherwise leave it unchnaged. Return previous value.
+ *
+ * This is used by the head of the queue to acquire the lock and clean up
+ * its tail if it was the last one queued.
+ */
+static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
+{
+	u32 newval = queued_spin_encode_locked_val();
+	u32 prev, tmp;
+
+	asm volatile(
+"1:	lwarx	%0,0,%2,%7	# trylock_clean_tail			\n"
+	/* This test is necessary if there could be stealers */
+"	andi.	%1,%0,%5						\n"
+"	bne	3f							\n"
+	/* Test whether the lock tail == mytail */
+"	and	%1,%0,%6						\n"
+"	cmpw	0,%1,%3							\n"
+	/* Merge the new locked value */
+"	or	%1,%1,%4						\n"
+"	bne	2f							\n"
+	/* If the lock tail matched, then clear it, otherwise leave it. */
+"	andc	%1,%1,%6						\n"
+"2:	stwcx.	%1,0,%2							\n"
+"	bne-	1b							\n"
+"\t"	PPC_ACQUIRE_BARRIER "						\n"
+"3:									\n"
+	: "=&r" (prev), "=&r" (tmp)
+	: "r" (&lock->val), "r"(tail), "r" (newval),
+	  "i" (_Q_LOCKED_VAL),
+	  "r" (_Q_TAIL_CPU_MASK),
+	  "i" (_Q_SPIN_EH_HINT)
+	: "cr0", "memory");
+
+	return prev;
+}
+
+/*
+ * Publish our tail, replacing previous tail. Return previous value.
+ *
+ * This provides a release barrier for publishing node, this pairs with the
+ * acquire barrier in get_tail_qnode() when the next CPU finds this tail
+ * value.
+ */
+static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
+{
+	u32 prev, tmp;
+
+	kcsan_release();
+
+	asm volatile(
+"\t"	PPC_RELEASE_BARRIER "						\n"
+"1:	lwarx	%0,0,%2		# publish_tail_cpu			\n"
+"	andc	%1,%0,%4						\n"
+"	or	%1,%1,%3						\n"
+"	stwcx.	%1,0,%2							\n"
+"	bne-	1b							\n"
+	: "=&r" (prev), "=&r"(tmp)
+	: "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK)
+	: "cr0", "memory");
+
+	return prev;
+}
+
+static __always_inline u32 set_mustq(struct qspinlock *lock)
+{
+	u32 prev;
+
+	asm volatile(
+"1:	lwarx	%0,0,%1		# set_mustq				\n"
+"	or	%0,%0,%2						\n"
+"	stwcx.	%0,0,%1							\n"
+"	bne-	1b							\n"
+	: "=&r" (prev)
+	: "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
+	: "cr0", "memory");
+
+	return prev;
+}
+
+static __always_inline u32 clear_mustq(struct qspinlock *lock)
+{
+	u32 prev;
+
+	asm volatile(
+"1:	lwarx	%0,0,%1		# clear_mustq				\n"
+"	andc	%0,%0,%2						\n"
+"	stwcx.	%0,0,%1							\n"
+"	bne-	1b							\n"
+	: "=&r" (prev)
+	: "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
+	: "cr0", "memory");
+
+	return prev;
+}
+
+static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old)
+{
+	u32 prev;
+	u32 new = old | _Q_SLEEPY_VAL;
+
+	BUG_ON(!(old & _Q_LOCKED_VAL));
+	BUG_ON(old & _Q_SLEEPY_VAL);
+
+	asm volatile(
+"1:	lwarx	%0,0,%1		# try_set_sleepy			\n"
+"	cmpw	0,%0,%2							\n"
+"	bne-	2f							\n"
+"	stwcx.	%3,0,%1							\n"
+"	bne-	1b							\n"
+"2:									\n"
+	: "=&r" (prev)
+	: "r" (&lock->val), "r"(old), "r" (new)
+	: "cr0", "memory");
+
+	return likely(prev == old);
+}
+
+static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val)
+{
+	if (pv_sleepy_lock) {
+		if (pv_sleepy_lock_interval_ns)
+			this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+		if (!(val & _Q_SLEEPY_VAL))
+			try_set_sleepy(lock, val);
+	}
+}
+
+static __always_inline void seen_sleepy_lock(void)
+{
+	if (pv_sleepy_lock && pv_sleepy_lock_interval_ns)
+		this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+}
+
+static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val)
+{
+	if (pv_sleepy_lock) {
+		if (pv_sleepy_lock_interval_ns)
+			this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+		if (val & _Q_LOCKED_VAL) {
+			if (!(val & _Q_SLEEPY_VAL))
+				try_set_sleepy(lock, val);
+		}
+	}
+}
+
+static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
+{
+	int cpu = decode_tail_cpu(val);
+	struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu);
+	int idx;
+
+	/*
+	 * After publishing the new tail and finding a previous tail in the
+	 * previous val (which is the control dependency), this barrier
+	 * orders the release barrier in publish_tail_cpu performed by the
+	 * last CPU, with subsequently looking at its qnode structures
+	 * after the barrier.
+	 */
+	smp_acquire__after_ctrl_dep();
+
+	for (idx = 0; idx < MAX_NODES; idx++) {
+		struct qnode *qnode = &qnodesp->nodes[idx];
+		if (qnode->lock == lock)
+			return qnode;
+	}
+
+	BUG();
+}
+
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
+{
+	int owner;
+	u32 yield_count;
+	bool preempted = false;
+
+	BUG_ON(!(val & _Q_LOCKED_VAL));
+
+	if (!paravirt)
+		goto relax;
+
+	if (!pv_yield_owner)
+		goto relax;
+
+	owner = get_owner_cpu(val);
+	yield_count = yield_count_of(owner);
+
+	if ((yield_count & 1) == 0)
+		goto relax; /* owner vcpu is running */
+
+	spin_end();
+
+	seen_sleepy_owner(lock, val);
+	preempted = true;
+
+	/*
+	 * Read the lock word after sampling the yield count. On the other side
+	 * there may a wmb because the yield count update is done by the
+	 * hypervisor preemption and the value update by the OS, however this
+	 * ordering might reduce the chance of out of order accesses and
+	 * improve the heuristic.
+	 */
+	smp_rmb();
+
+	if (READ_ONCE(lock->val) == val) {
+		if (mustq)
+			clear_mustq(lock);
+		yield_to_preempted(owner, yield_count);
+		if (mustq)
+			set_mustq(lock);
+		spin_begin();
+
+		/* Don't relax if we yielded. Maybe we should? */
+		return preempted;
+	}
+	spin_begin();
+relax:
+	spin_cpu_relax();
+
+	return preempted;
+}
+
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+{
+	return __yield_to_locked_owner(lock, val, paravirt, false);
+}
+
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+{
+	bool mustq = false;
+
+	if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
+		mustq = true;
+
+	return __yield_to_locked_owner(lock, val, paravirt, mustq);
+}
+
+static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
+{
+	struct qnode *next;
+	int owner;
+
+	if (!paravirt)
+		return;
+	if (!pv_yield_propagate_owner)
+		return;
+
+	owner = get_owner_cpu(val);
+	if (*set_yield_cpu == owner)
+		return;
+
+	next = READ_ONCE(node->next);
+	if (!next)
+		return;
+
+	if (vcpu_is_preempted(owner)) {
+		next->yield_cpu = owner;
+		*set_yield_cpu = owner;
+	} else if (*set_yield_cpu != -1) {
+		next->yield_cpu = owner;
+		*set_yield_cpu = owner;
+	}
+}
+
+/* Called inside spin_begin() */
+static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt)
+{
+	int prev_cpu = decode_tail_cpu(val);
+	u32 yield_count;
+	int yield_cpu;
+	bool preempted = false;
+
+	if (!paravirt)
+		goto relax;
+
+	if (!pv_yield_propagate_owner)
+		goto yield_prev;
+
+	yield_cpu = READ_ONCE(node->yield_cpu);
+	if (yield_cpu == -1) {
+		/* Propagate back the -1 CPU */
+		if (node->next && node->next->yield_cpu != -1)
+			node->next->yield_cpu = yield_cpu;
+		goto yield_prev;
+	}
+
+	yield_count = yield_count_of(yield_cpu);
+	if ((yield_count & 1) == 0)
+		goto yield_prev; /* owner vcpu is running */
+
+	if (get_owner_cpu(READ_ONCE(lock->val)) != yield_cpu)
+		goto yield_prev; /* re-sample lock owner */
+
+	spin_end();
+
+	preempted = true;
+	seen_sleepy_node(lock, val);
+
+	smp_rmb();
+
+	if (yield_cpu == node->yield_cpu) {
+		if (node->next && node->next->yield_cpu != yield_cpu)
+			node->next->yield_cpu = yield_cpu;
+		yield_to_preempted(yield_cpu, yield_count);
+		spin_begin();
+		return preempted;
+	}
+	spin_begin();
+
+yield_prev:
+	if (!pv_yield_prev)
+		goto relax;
+
+	yield_count = yield_count_of(prev_cpu);
+	if ((yield_count & 1) == 0)
+		goto relax; /* owner vcpu is running */
+
+	spin_end();
+
+	preempted = true;
+	seen_sleepy_node(lock, val);
+
+	smp_rmb(); /* See __yield_to_locked_owner comment */
+
+	if (!READ_ONCE(node->locked)) {
+		yield_to_preempted(prev_cpu, yield_count);
+		spin_begin();
+		return preempted;
+	}
+	spin_begin();
+
+relax:
+	spin_cpu_relax();
+
+	return preempted;
+}
+
+static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy)
+{
+	if (iters >= get_steal_spins(paravirt, sleepy))
+		return true;
+
+	if (IS_ENABLED(CONFIG_NUMA) &&
+	    (iters >= get_remote_steal_spins(paravirt, sleepy))) {
+		int cpu = get_owner_cpu(val);
+		if (numa_node_id() != cpu_to_node(cpu))
+			return true;
+	}
+	return false;
+}
+
+static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
+{
+	bool seen_preempted = false;
+	bool sleepy = false;
+	int iters = 0;
+	u32 val;
+
+	if (!steal_spins) {
+		/* XXX: should spin_on_preempted_owner do anything here? */
+		return false;
+	}
+
+	/* Attempt to steal the lock */
+	spin_begin();
+	do {
+		bool preempted = false;
+
+		val = READ_ONCE(lock->val);
+		if (val & _Q_MUST_Q_VAL)
+			break;
+		spec_barrier();
+
+		if (unlikely(!(val & _Q_LOCKED_VAL))) {
+			spin_end();
+			if (__queued_spin_trylock_steal(lock))
+				return true;
+			spin_begin();
+		} else {
+			preempted = yield_to_locked_owner(lock, val, paravirt);
+		}
+
+		if (paravirt && pv_sleepy_lock) {
+			if (!sleepy) {
+				if (val & _Q_SLEEPY_VAL) {
+					seen_sleepy_lock();
+					sleepy = true;
+				} else if (recently_sleepy()) {
+					sleepy = true;
+				}
+			}
+			if (pv_sleepy_lock_sticky && seen_preempted &&
+			    !(val & _Q_SLEEPY_VAL)) {
+				if (try_set_sleepy(lock, val))
+					val |= _Q_SLEEPY_VAL;
+			}
+		}
+
+		if (preempted) {
+			seen_preempted = true;
+			sleepy = true;
+			if (!pv_spin_on_preempted_owner)
+				iters++;
+			/*
+			 * pv_spin_on_preempted_owner don't increase iters
+			 * while the owner is preempted -- we won't interfere
+			 * with it by definition. This could introduce some
+			 * latency issue if we continually observe preempted
+			 * owners, but hopefully that's a rare corner case of
+			 * a badly oversubscribed system.
+			 */
+		} else {
+			iters++;
+		}
+	} while (!steal_break(val, iters, paravirt, sleepy));
+
+	spin_end();
+
+	return false;
+}
+
+static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt)
+{
+	struct qnodes *qnodesp;
+	struct qnode *next, *node;
+	u32 val, old, tail;
+	bool seen_preempted = false;
+	bool sleepy = false;
+	bool mustq = false;
+	int idx;
+	int set_yield_cpu = -1;
+	int iters = 0;
+
+	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
+
+	qnodesp = this_cpu_ptr(&qnodes);
+	if (unlikely(qnodesp->count >= MAX_NODES)) {
+		spec_barrier();
+		while (!queued_spin_trylock(lock))
+			cpu_relax();
+		return;
+	}
+
+	idx = qnodesp->count++;
+	/*
+	 * Ensure that we increment the head node->count before initialising
+	 * the actual node. If the compiler is kind enough to reorder these
+	 * stores, then an IRQ could overwrite our assignments.
+	 */
+	barrier();
+	node = &qnodesp->nodes[idx];
+	node->next = NULL;
+	node->lock = lock;
+	node->cpu = smp_processor_id();
+	node->yield_cpu = -1;
+	node->locked = 0;
+
+	tail = encode_tail_cpu(node->cpu);
+
+	/*
+	 * Assign all attributes of a node before it can be published.
+	 * Issues an lwsync, serving as a release barrier, as well as a
+	 * compiler barrier.
+	 */
+	old = publish_tail_cpu(lock, tail);
+
+	/*
+	 * If there was a previous node; link it and wait until reaching the
+	 * head of the waitqueue.
+	 */
+	if (old & _Q_TAIL_CPU_MASK) {
+		struct qnode *prev = get_tail_qnode(lock, old);
+
+		/* Link @node into the waitqueue. */
+		WRITE_ONCE(prev->next, node);
+
+		/* Wait for mcs node lock to be released */
+		spin_begin();
+		while (!READ_ONCE(node->locked)) {
+			spec_barrier();
+
+			if (yield_to_prev(lock, node, old, paravirt))
+				seen_preempted = true;
+		}
+		spec_barrier();
+		spin_end();
+
+		/* Clear out stale propagated yield_cpu */
+		if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1)
+			node->yield_cpu = -1;
+
+		smp_rmb(); /* acquire barrier for the mcs lock */
+
+		/*
+		 * Generic qspinlocks have this prefetch here, but it seems
+		 * like it could cause additional line transitions because
+		 * the waiter will keep loading from it.
+		 */
+		if (_Q_SPIN_PREFETCH_NEXT) {
+			next = READ_ONCE(node->next);
+			if (next)
+				prefetchw(next);
+		}
+	}
+
+	/* We're at the head of the waitqueue, wait for the lock. */
+again:
+	spin_begin();
+	for (;;) {
+		bool preempted;
+
+		val = READ_ONCE(lock->val);
+		if (!(val & _Q_LOCKED_VAL))
+			break;
+		spec_barrier();
+
+		if (paravirt && pv_sleepy_lock && maybe_stealers) {
+			if (!sleepy) {
+				if (val & _Q_SLEEPY_VAL) {
+					seen_sleepy_lock();
+					sleepy = true;
+				} else if (recently_sleepy()) {
+					sleepy = true;
+				}
+			}
+			if (pv_sleepy_lock_sticky && seen_preempted &&
+			    !(val & _Q_SLEEPY_VAL)) {
+				if (try_set_sleepy(lock, val))
+					val |= _Q_SLEEPY_VAL;
+			}
+		}
+
+		propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
+		preempted = yield_head_to_locked_owner(lock, val, paravirt);
+		if (!maybe_stealers)
+			continue;
+
+		if (preempted)
+			seen_preempted = true;
+
+		if (paravirt && preempted) {
+			sleepy = true;
+
+			if (!pv_spin_on_preempted_owner)
+				iters++;
+		} else {
+			iters++;
+		}
+
+		if (!mustq && iters >= get_head_spins(paravirt, sleepy)) {
+			mustq = true;
+			set_mustq(lock);
+			val |= _Q_MUST_Q_VAL;
+		}
+	}
+	spec_barrier();
+	spin_end();
+
+	/* If we're the last queued, must clean up the tail. */
+	old = trylock_clean_tail(lock, tail);
+	if (unlikely(old & _Q_LOCKED_VAL)) {
+		BUG_ON(!maybe_stealers);
+		goto again; /* Can only be true if maybe_stealers. */
+	}
+
+	if ((old & _Q_TAIL_CPU_MASK) == tail)
+		goto release; /* We were the tail, no next. */
+
+	/* There is a next, must wait for node->next != NULL (MCS protocol) */
+	next = READ_ONCE(node->next);
+	if (!next) {
+		spin_begin();
+		while (!(next = READ_ONCE(node->next)))
+			cpu_relax();
+		spin_end();
+	}
+	spec_barrier();
+
+	/*
+	 * Unlock the next mcs waiter node. Release barrier is not required
+	 * here because the acquirer is only accessing the lock word, and
+	 * the acquire barrier we took the lock with orders that update vs
+	 * this store to locked. The corresponding barrier is the smp_rmb()
+	 * acquire barrier for mcs lock, above.
+	 */
+	if (paravirt && pv_prod_head) {
+		int next_cpu = next->cpu;
+		WRITE_ONCE(next->locked, 1);
+		if (_Q_SPIN_MISO)
+			asm volatile("miso" ::: "memory");
+		if (vcpu_is_preempted(next_cpu))
+			prod_cpu(next_cpu);
+	} else {
+		WRITE_ONCE(next->locked, 1);
+		if (_Q_SPIN_MISO)
+			asm volatile("miso" ::: "memory");
+	}
+
+release:
+	qnodesp->count--; /* release the node */
+}
+
+void queued_spin_lock_slowpath(struct qspinlock *lock)
+{
+	/*
+	 * This looks funny, but it induces the compiler to inline both
+	 * sides of the branch rather than share code as when the condition
+	 * is passed as the paravirt argument to the functions.
+	 */
+	if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
+		if (try_to_steal_lock(lock, true)) {
+			spec_barrier();
+			return;
+		}
+		queued_spin_lock_mcs_queue(lock, true);
+	} else {
+		if (try_to_steal_lock(lock, false)) {
+			spec_barrier();
+			return;
+		}
+		queued_spin_lock_mcs_queue(lock, false);
+	}
+}
+EXPORT_SYMBOL(queued_spin_lock_slowpath);
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void pv_spinlocks_init(void)
+{
+}
+#endif
+
+#include <linux/debugfs.h>
+static int steal_spins_set(void *data, u64 val)
+{
+#if _Q_SPIN_TRY_LOCK_STEAL == 1
+	/* MAYBE_STEAL remains true */
+	steal_spins = val;
+#else
+	static DEFINE_MUTEX(lock);
+
+	/*
+	 * The lock slow path has a !maybe_stealers case that can assume
+	 * the head of queue will not see concurrent waiters. That waiter
+	 * is unsafe in the presence of stealers, so must keep them away
+	 * from one another.
+	 */
+
+	mutex_lock(&lock);
+	if (val && !steal_spins) {
+		maybe_stealers = true;
+		/* wait for queue head waiter to go away */
+		synchronize_rcu();
+		steal_spins = val;
+	} else if (!val && steal_spins) {
+		steal_spins = val;
+		/* wait for all possible stealers to go away */
+		synchronize_rcu();
+		maybe_stealers = false;
+	} else {
+		steal_spins = val;
+	}
+	mutex_unlock(&lock);
+#endif
+
+	return 0;
+}
+
+static int steal_spins_get(void *data, u64 *val)
+{
+	*val = steal_spins;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
+
+static int remote_steal_spins_set(void *data, u64 val)
+{
+	remote_steal_spins = val;
+
+	return 0;
+}
+
+static int remote_steal_spins_get(void *data, u64 *val)
+{
+	*val = remote_steal_spins;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n");
+
+static int head_spins_set(void *data, u64 val)
+{
+	head_spins = val;
+
+	return 0;
+}
+
+static int head_spins_get(void *data, u64 *val)
+{
+	*val = head_spins;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
+
+static int pv_yield_owner_set(void *data, u64 val)
+{
+	pv_yield_owner = !!val;
+
+	return 0;
+}
+
+static int pv_yield_owner_get(void *data, u64 *val)
+{
+	*val = pv_yield_owner;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
+
+static int pv_yield_allow_steal_set(void *data, u64 val)
+{
+	pv_yield_allow_steal = !!val;
+
+	return 0;
+}
+
+static int pv_yield_allow_steal_get(void *data, u64 *val)
+{
+	*val = pv_yield_allow_steal;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
+
+static int pv_spin_on_preempted_owner_set(void *data, u64 val)
+{
+	pv_spin_on_preempted_owner = !!val;
+
+	return 0;
+}
+
+static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
+{
+	*val = pv_spin_on_preempted_owner;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
+
+static int pv_sleepy_lock_set(void *data, u64 val)
+{
+	pv_sleepy_lock = !!val;
+
+	return 0;
+}
+
+static int pv_sleepy_lock_get(void *data, u64 *val)
+{
+	*val = pv_sleepy_lock;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n");
+
+static int pv_sleepy_lock_sticky_set(void *data, u64 val)
+{
+	pv_sleepy_lock_sticky = !!val;
+
+	return 0;
+}
+
+static int pv_sleepy_lock_sticky_get(void *data, u64 *val)
+{
+	*val = pv_sleepy_lock_sticky;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n");
+
+static int pv_sleepy_lock_interval_ns_set(void *data, u64 val)
+{
+	pv_sleepy_lock_interval_ns = val;
+
+	return 0;
+}
+
+static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val)
+{
+	*val = pv_sleepy_lock_interval_ns;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n");
+
+static int pv_sleepy_lock_factor_set(void *data, u64 val)
+{
+	pv_sleepy_lock_factor = val;
+
+	return 0;
+}
+
+static int pv_sleepy_lock_factor_get(void *data, u64 *val)
+{
+	*val = pv_sleepy_lock_factor;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n");
+
+static int pv_yield_prev_set(void *data, u64 val)
+{
+	pv_yield_prev = !!val;
+
+	return 0;
+}
+
+static int pv_yield_prev_get(void *data, u64 *val)
+{
+	*val = pv_yield_prev;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
+
+static int pv_yield_propagate_owner_set(void *data, u64 val)
+{
+	pv_yield_propagate_owner = !!val;
+
+	return 0;
+}
+
+static int pv_yield_propagate_owner_get(void *data, u64 *val)
+{
+	*val = pv_yield_propagate_owner;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n");
+
+static int pv_prod_head_set(void *data, u64 val)
+{
+	pv_prod_head = !!val;
+
+	return 0;
+}
+
+static int pv_prod_head_get(void *data, u64 *val)
+{
+	*val = pv_prod_head;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n");
+
+static __init int spinlock_debugfs_init(void)
+{
+	debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
+	debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins);
+	debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
+	if (is_shared_processor()) {
+		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
+		debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
+		debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
+		debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock);
+		debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky);
+		debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns);
+		debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor);
+		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
+		debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
+		debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
+	}
+
+	return 0;
+}
+device_initcall(spinlock_debugfs_init);
diff --git a/arch/powerpc/lib/quad.S b/arch/powerpc/lib/quad.S
new file mode 100644
index 0000000000..da71760e50
--- /dev/null
+++ b/arch/powerpc/lib/quad.S
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Quadword loads and stores
+ * for use in instruction emulation.
+ *
+ * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+#include <asm/reg.h>
+#include <asm/asm-offsets.h>
+#include <linux/errno.h>
+
+/* do_lq(unsigned long ea, unsigned long *regs) */
+_GLOBAL(do_lq)
+1:	lq	r6, 0(r3)
+	std	r6, 0(r4)
+	std	r7, 8(r4)
+	li	r3, 0
+	blr
+2:	li	r3, -EFAULT
+	blr
+	EX_TABLE(1b, 2b)
+
+/* do_stq(unsigned long ea, unsigned long val0, unsigned long val1) */
+_GLOBAL(do_stq)
+1:	stq	r4, 0(r3)
+	li	r3, 0
+	blr
+2:	li	r3, -EFAULT
+	blr
+	EX_TABLE(1b, 2b)
+
+/* do_lqarx(unsigned long ea, unsigned long *regs) */
+_GLOBAL(do_lqarx)
+1:	PPC_LQARX(6, 0, 3, 0)
+	std	r6, 0(r4)
+	std	r7, 8(r4)
+	li	r3, 0
+	blr
+2:	li	r3, -EFAULT
+	blr
+	EX_TABLE(1b, 2b)
+
+/* do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1,
+	    unsigned int *crp) */
+
+_GLOBAL(do_stqcx)
+1:	PPC_STQCX(4, 0, 3)
+	mfcr	r5
+	stw	r5, 0(r6)
+	li	r3, 0
+	blr
+2:	li	r3, -EFAULT
+	blr
+	EX_TABLE(1b, 2b)
diff --git a/arch/powerpc/lib/restart_table.c b/arch/powerpc/lib/restart_table.c
new file mode 100644
index 0000000000..bccb662c1b
--- /dev/null
+++ b/arch/powerpc/lib/restart_table.c
@@ -0,0 +1,56 @@
+#include <asm/interrupt.h>
+#include <asm/kprobes.h>
+
+struct soft_mask_table_entry {
+	unsigned long start;
+	unsigned long end;
+};
+
+struct restart_table_entry {
+	unsigned long start;
+	unsigned long end;
+	unsigned long fixup;
+};
+
+extern struct soft_mask_table_entry __start___soft_mask_table[];
+extern struct soft_mask_table_entry __stop___soft_mask_table[];
+
+extern struct restart_table_entry __start___restart_table[];
+extern struct restart_table_entry __stop___restart_table[];
+
+/* Given an address, look for it in the soft mask table */
+bool search_kernel_soft_mask_table(unsigned long addr)
+{
+	struct soft_mask_table_entry *smte = __start___soft_mask_table;
+
+	while (smte < __stop___soft_mask_table) {
+		unsigned long start = smte->start;
+		unsigned long end = smte->end;
+
+		if (addr >= start && addr < end)
+			return true;
+
+		smte++;
+	}
+	return false;
+}
+NOKPROBE_SYMBOL(search_kernel_soft_mask_table);
+
+/* Given an address, look for it in the kernel exception table */
+unsigned long search_kernel_restart_table(unsigned long addr)
+{
+	struct restart_table_entry *rte = __start___restart_table;
+
+	while (rte < __stop___restart_table) {
+		unsigned long start = rte->start;
+		unsigned long end = rte->end;
+		unsigned long fixup = rte->fixup;
+
+		if (addr >= start && addr < end)
+			return fixup;
+
+		rte++;
+	}
+	return 0;
+}
+NOKPROBE_SYMBOL(search_kernel_restart_table);
diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c
new file mode 100644
index 0000000000..6aa774aa5b
--- /dev/null
+++ b/arch/powerpc/lib/rheap.c
@@ -0,0 +1,747 @@
+/*
+ * A Remote Heap.  Remote means that we don't touch the memory that the
+ * heap points to. Normal heap implementations use the memory they manage
+ * to place their list. We cannot do that because the memory we manage may
+ * have special properties, for example it is uncachable or of different
+ * endianess.
+ *
+ * Author: Pantelis Antoniou <panto@intracom.gr>
+ *
+ * 2004 (c) INTRACOM S.A. Greece. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <asm/rheap.h>
+
+/*
+ * Fixup a list_head, needed when copying lists.  If the pointers fall
+ * between s and e, apply the delta.  This assumes that
+ * sizeof(struct list_head *) == sizeof(unsigned long *).
+ */
+static inline void fixup(unsigned long s, unsigned long e, int d,
+			 struct list_head *l)
+{
+	unsigned long *pp;
+
+	pp = (unsigned long *)&l->next;
+	if (*pp >= s && *pp < e)
+		*pp += d;
+
+	pp = (unsigned long *)&l->prev;
+	if (*pp >= s && *pp < e)
+		*pp += d;
+}
+
+/* Grow the allocated blocks */
+static int grow(rh_info_t * info, int max_blocks)
+{
+	rh_block_t *block, *blk;
+	int i, new_blocks;
+	int delta;
+	unsigned long blks, blke;
+
+	if (max_blocks <= info->max_blocks)
+		return -EINVAL;
+
+	new_blocks = max_blocks - info->max_blocks;
+
+	block = kmalloc_array(max_blocks, sizeof(rh_block_t), GFP_ATOMIC);
+	if (block == NULL)
+		return -ENOMEM;
+
+	if (info->max_blocks > 0) {
+
+		/* copy old block area */
+		memcpy(block, info->block,
+		       sizeof(rh_block_t) * info->max_blocks);
+
+		delta = (char *)block - (char *)info->block;
+
+		/* and fixup list pointers */
+		blks = (unsigned long)info->block;
+		blke = (unsigned long)(info->block + info->max_blocks);
+
+		for (i = 0, blk = block; i < info->max_blocks; i++, blk++)
+			fixup(blks, blke, delta, &blk->list);
+
+		fixup(blks, blke, delta, &info->empty_list);
+		fixup(blks, blke, delta, &info->free_list);
+		fixup(blks, blke, delta, &info->taken_list);
+
+		/* free the old allocated memory */
+		if ((info->flags & RHIF_STATIC_BLOCK) == 0)
+			kfree(info->block);
+	}
+
+	info->block = block;
+	info->empty_slots += new_blocks;
+	info->max_blocks = max_blocks;
+	info->flags &= ~RHIF_STATIC_BLOCK;
+
+	/* add all new blocks to the free list */
+	blk = block + info->max_blocks - new_blocks;
+	for (i = 0; i < new_blocks; i++, blk++)
+		list_add(&blk->list, &info->empty_list);
+
+	return 0;
+}
+
+/*
+ * Assure at least the required amount of empty slots.  If this function
+ * causes a grow in the block area then all pointers kept to the block
+ * area are invalid!
+ */
+static int assure_empty(rh_info_t * info, int slots)
+{
+	int max_blocks;
+
+	/* This function is not meant to be used to grow uncontrollably */
+	if (slots >= 4)
+		return -EINVAL;
+
+	/* Enough space */
+	if (info->empty_slots >= slots)
+		return 0;
+
+	/* Next 16 sized block */
+	max_blocks = ((info->max_blocks + slots) + 15) & ~15;
+
+	return grow(info, max_blocks);
+}
+
+static rh_block_t *get_slot(rh_info_t * info)
+{
+	rh_block_t *blk;
+
+	/* If no more free slots, and failure to extend. */
+	/* XXX: You should have called assure_empty before */
+	if (info->empty_slots == 0) {
+		printk(KERN_ERR "rh: out of slots; crash is imminent.\n");
+		return NULL;
+	}
+
+	/* Get empty slot to use */
+	blk = list_entry(info->empty_list.next, rh_block_t, list);
+	list_del_init(&blk->list);
+	info->empty_slots--;
+
+	/* Initialize */
+	blk->start = 0;
+	blk->size = 0;
+	blk->owner = NULL;
+
+	return blk;
+}
+
+static inline void release_slot(rh_info_t * info, rh_block_t * blk)
+{
+	list_add(&blk->list, &info->empty_list);
+	info->empty_slots++;
+}
+
+static void attach_free_block(rh_info_t * info, rh_block_t * blkn)
+{
+	rh_block_t *blk;
+	rh_block_t *before;
+	rh_block_t *after;
+	rh_block_t *next;
+	int size;
+	unsigned long s, e, bs, be;
+	struct list_head *l;
+
+	/* We assume that they are aligned properly */
+	size = blkn->size;
+	s = blkn->start;
+	e = s + size;
+
+	/* Find the blocks immediately before and after the given one
+	 * (if any) */
+	before = NULL;
+	after = NULL;
+	next = NULL;
+
+	list_for_each(l, &info->free_list) {
+		blk = list_entry(l, rh_block_t, list);
+
+		bs = blk->start;
+		be = bs + blk->size;
+
+		if (next == NULL && s >= bs)
+			next = blk;
+
+		if (be == s)
+			before = blk;
+
+		if (e == bs)
+			after = blk;
+
+		/* If both are not null, break now */
+		if (before != NULL && after != NULL)
+			break;
+	}
+
+	/* Now check if they are really adjacent */
+	if (before && s != (before->start + before->size))
+		before = NULL;
+
+	if (after && e != after->start)
+		after = NULL;
+
+	/* No coalescing; list insert and return */
+	if (before == NULL && after == NULL) {
+
+		if (next != NULL)
+			list_add(&blkn->list, &next->list);
+		else
+			list_add(&blkn->list, &info->free_list);
+
+		return;
+	}
+
+	/* We don't need it anymore */
+	release_slot(info, blkn);
+
+	/* Grow the before block */
+	if (before != NULL && after == NULL) {
+		before->size += size;
+		return;
+	}
+
+	/* Grow the after block backwards */
+	if (before == NULL && after != NULL) {
+		after->start -= size;
+		after->size += size;
+		return;
+	}
+
+	/* Grow the before block, and release the after block */
+	before->size += size + after->size;
+	list_del(&after->list);
+	release_slot(info, after);
+}
+
+static void attach_taken_block(rh_info_t * info, rh_block_t * blkn)
+{
+	rh_block_t *blk;
+	struct list_head *l;
+
+	/* Find the block immediately before the given one (if any) */
+	list_for_each(l, &info->taken_list) {
+		blk = list_entry(l, rh_block_t, list);
+		if (blk->start > blkn->start) {
+			list_add_tail(&blkn->list, &blk->list);
+			return;
+		}
+	}
+
+	list_add_tail(&blkn->list, &info->taken_list);
+}
+
+/*
+ * Create a remote heap dynamically.  Note that no memory for the blocks
+ * are allocated.  It will upon the first allocation
+ */
+rh_info_t *rh_create(unsigned int alignment)
+{
+	rh_info_t *info;
+
+	/* Alignment must be a power of two */
+	if ((alignment & (alignment - 1)) != 0)
+		return ERR_PTR(-EINVAL);
+
+	info = kmalloc(sizeof(*info), GFP_ATOMIC);
+	if (info == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	info->alignment = alignment;
+
+	/* Initially everything as empty */
+	info->block = NULL;
+	info->max_blocks = 0;
+	info->empty_slots = 0;
+	info->flags = 0;
+
+	INIT_LIST_HEAD(&info->empty_list);
+	INIT_LIST_HEAD(&info->free_list);
+	INIT_LIST_HEAD(&info->taken_list);
+
+	return info;
+}
+EXPORT_SYMBOL_GPL(rh_create);
+
+/*
+ * Destroy a dynamically created remote heap.  Deallocate only if the areas
+ * are not static
+ */
+void rh_destroy(rh_info_t * info)
+{
+	if ((info->flags & RHIF_STATIC_BLOCK) == 0)
+		kfree(info->block);
+
+	if ((info->flags & RHIF_STATIC_INFO) == 0)
+		kfree(info);
+}
+EXPORT_SYMBOL_GPL(rh_destroy);
+
+/*
+ * Initialize in place a remote heap info block.  This is needed to support
+ * operation very early in the startup of the kernel, when it is not yet safe
+ * to call kmalloc.
+ */
+void rh_init(rh_info_t * info, unsigned int alignment, int max_blocks,
+	     rh_block_t * block)
+{
+	int i;
+	rh_block_t *blk;
+
+	/* Alignment must be a power of two */
+	if ((alignment & (alignment - 1)) != 0)
+		return;
+
+	info->alignment = alignment;
+
+	/* Initially everything as empty */
+	info->block = block;
+	info->max_blocks = max_blocks;
+	info->empty_slots = max_blocks;
+	info->flags = RHIF_STATIC_INFO | RHIF_STATIC_BLOCK;
+
+	INIT_LIST_HEAD(&info->empty_list);
+	INIT_LIST_HEAD(&info->free_list);
+	INIT_LIST_HEAD(&info->taken_list);
+
+	/* Add all new blocks to the free list */
+	for (i = 0, blk = block; i < max_blocks; i++, blk++)
+		list_add(&blk->list, &info->empty_list);
+}
+EXPORT_SYMBOL_GPL(rh_init);
+
+/* Attach a free memory region, coalesces regions if adjacent */
+int rh_attach_region(rh_info_t * info, unsigned long start, int size)
+{
+	rh_block_t *blk;
+	unsigned long s, e, m;
+	int r;
+
+	/* The region must be aligned */
+	s = start;
+	e = s + size;
+	m = info->alignment - 1;
+
+	/* Round start up */
+	s = (s + m) & ~m;
+
+	/* Round end down */
+	e = e & ~m;
+
+	if (IS_ERR_VALUE(e) || (e < s))
+		return -ERANGE;
+
+	/* Take final values */
+	start = s;
+	size = e - s;
+
+	/* Grow the blocks, if needed */
+	r = assure_empty(info, 1);
+	if (r < 0)
+		return r;
+
+	blk = get_slot(info);
+	blk->start = start;
+	blk->size = size;
+	blk->owner = NULL;
+
+	attach_free_block(info, blk);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rh_attach_region);
+
+/* Detatch given address range, splits free block if needed. */
+unsigned long rh_detach_region(rh_info_t * info, unsigned long start, int size)
+{
+	struct list_head *l;
+	rh_block_t *blk, *newblk;
+	unsigned long s, e, m, bs, be;
+
+	/* Validate size */
+	if (size <= 0)
+		return (unsigned long) -EINVAL;
+
+	/* The region must be aligned */
+	s = start;
+	e = s + size;
+	m = info->alignment - 1;
+
+	/* Round start up */
+	s = (s + m) & ~m;
+
+	/* Round end down */
+	e = e & ~m;
+
+	if (assure_empty(info, 1) < 0)
+		return (unsigned long) -ENOMEM;
+
+	blk = NULL;
+	list_for_each(l, &info->free_list) {
+		blk = list_entry(l, rh_block_t, list);
+		/* The range must lie entirely inside one free block */
+		bs = blk->start;
+		be = blk->start + blk->size;
+		if (s >= bs && e <= be)
+			break;
+		blk = NULL;
+	}
+
+	if (blk == NULL)
+		return (unsigned long) -ENOMEM;
+
+	/* Perfect fit */
+	if (bs == s && be == e) {
+		/* Delete from free list, release slot */
+		list_del(&blk->list);
+		release_slot(info, blk);
+		return s;
+	}
+
+	/* blk still in free list, with updated start and/or size */
+	if (bs == s || be == e) {
+		if (bs == s)
+			blk->start += size;
+		blk->size -= size;
+
+	} else {
+		/* The front free fragment */
+		blk->size = s - bs;
+
+		/* the back free fragment */
+		newblk = get_slot(info);
+		newblk->start = e;
+		newblk->size = be - e;
+
+		list_add(&newblk->list, &blk->list);
+	}
+
+	return s;
+}
+EXPORT_SYMBOL_GPL(rh_detach_region);
+
+/* Allocate a block of memory at the specified alignment.  The value returned
+ * is an offset into the buffer initialized by rh_init(), or a negative number
+ * if there is an error.
+ */
+unsigned long rh_alloc_align(rh_info_t * info, int size, int alignment, const char *owner)
+{
+	struct list_head *l;
+	rh_block_t *blk;
+	rh_block_t *newblk;
+	unsigned long start, sp_size;
+
+	/* Validate size, and alignment must be power of two */
+	if (size <= 0 || (alignment & (alignment - 1)) != 0)
+		return (unsigned long) -EINVAL;
+
+	/* Align to configured alignment */
+	size = (size + (info->alignment - 1)) & ~(info->alignment - 1);
+
+	if (assure_empty(info, 2) < 0)
+		return (unsigned long) -ENOMEM;
+
+	blk = NULL;
+	list_for_each(l, &info->free_list) {
+		blk = list_entry(l, rh_block_t, list);
+		if (size <= blk->size) {
+			start = (blk->start + alignment - 1) & ~(alignment - 1);
+			if (start + size <= blk->start + blk->size)
+				break;
+		}
+		blk = NULL;
+	}
+
+	if (blk == NULL)
+		return (unsigned long) -ENOMEM;
+
+	/* Just fits */
+	if (blk->size == size) {
+		/* Move from free list to taken list */
+		list_del(&blk->list);
+		newblk = blk;
+	} else {
+		/* Fragment caused, split if needed */
+		/* Create block for fragment in the beginning */
+		sp_size = start - blk->start;
+		if (sp_size) {
+			rh_block_t *spblk;
+
+			spblk = get_slot(info);
+			spblk->start = blk->start;
+			spblk->size = sp_size;
+			/* add before the blk */
+			list_add(&spblk->list, blk->list.prev);
+		}
+		newblk = get_slot(info);
+		newblk->start = start;
+		newblk->size = size;
+
+		/* blk still in free list, with updated start and size
+		 * for fragment in the end */
+		blk->start = start + size;
+		blk->size -= sp_size + size;
+		/* No fragment in the end, remove blk */
+		if (blk->size == 0) {
+			list_del(&blk->list);
+			release_slot(info, blk);
+		}
+	}
+
+	newblk->owner = owner;
+	attach_taken_block(info, newblk);
+
+	return start;
+}
+EXPORT_SYMBOL_GPL(rh_alloc_align);
+
+/* Allocate a block of memory at the default alignment.  The value returned is
+ * an offset into the buffer initialized by rh_init(), or a negative number if
+ * there is an error.
+ */
+unsigned long rh_alloc(rh_info_t * info, int size, const char *owner)
+{
+	return rh_alloc_align(info, size, info->alignment, owner);
+}
+EXPORT_SYMBOL_GPL(rh_alloc);
+
+/* Allocate a block of memory at the given offset, rounded up to the default
+ * alignment.  The value returned is an offset into the buffer initialized by
+ * rh_init(), or a negative number if there is an error.
+ */
+unsigned long rh_alloc_fixed(rh_info_t * info, unsigned long start, int size, const char *owner)
+{
+	struct list_head *l;
+	rh_block_t *blk, *newblk1, *newblk2;
+	unsigned long s, e, m, bs = 0, be = 0;
+
+	/* Validate size */
+	if (size <= 0)
+		return (unsigned long) -EINVAL;
+
+	/* The region must be aligned */
+	s = start;
+	e = s + size;
+	m = info->alignment - 1;
+
+	/* Round start up */
+	s = (s + m) & ~m;
+
+	/* Round end down */
+	e = e & ~m;
+
+	if (assure_empty(info, 2) < 0)
+		return (unsigned long) -ENOMEM;
+
+	blk = NULL;
+	list_for_each(l, &info->free_list) {
+		blk = list_entry(l, rh_block_t, list);
+		/* The range must lie entirely inside one free block */
+		bs = blk->start;
+		be = blk->start + blk->size;
+		if (s >= bs && e <= be)
+			break;
+		blk = NULL;
+	}
+
+	if (blk == NULL)
+		return (unsigned long) -ENOMEM;
+
+	/* Perfect fit */
+	if (bs == s && be == e) {
+		/* Move from free list to taken list */
+		list_del(&blk->list);
+		blk->owner = owner;
+
+		start = blk->start;
+		attach_taken_block(info, blk);
+
+		return start;
+
+	}
+
+	/* blk still in free list, with updated start and/or size */
+	if (bs == s || be == e) {
+		if (bs == s)
+			blk->start += size;
+		blk->size -= size;
+
+	} else {
+		/* The front free fragment */
+		blk->size = s - bs;
+
+		/* The back free fragment */
+		newblk2 = get_slot(info);
+		newblk2->start = e;
+		newblk2->size = be - e;
+
+		list_add(&newblk2->list, &blk->list);
+	}
+
+	newblk1 = get_slot(info);
+	newblk1->start = s;
+	newblk1->size = e - s;
+	newblk1->owner = owner;
+
+	start = newblk1->start;
+	attach_taken_block(info, newblk1);
+
+	return start;
+}
+EXPORT_SYMBOL_GPL(rh_alloc_fixed);
+
+/* Deallocate the memory previously allocated by one of the rh_alloc functions.
+ * The return value is the size of the deallocated block, or a negative number
+ * if there is an error.
+ */
+int rh_free(rh_info_t * info, unsigned long start)
+{
+	rh_block_t *blk, *blk2;
+	struct list_head *l;
+	int size;
+
+	/* Linear search for block */
+	blk = NULL;
+	list_for_each(l, &info->taken_list) {
+		blk2 = list_entry(l, rh_block_t, list);
+		if (start < blk2->start)
+			break;
+		blk = blk2;
+	}
+
+	if (blk == NULL || start > (blk->start + blk->size))
+		return -EINVAL;
+
+	/* Remove from taken list */
+	list_del(&blk->list);
+
+	/* Get size of freed block */
+	size = blk->size;
+	attach_free_block(info, blk);
+
+	return size;
+}
+EXPORT_SYMBOL_GPL(rh_free);
+
+int rh_get_stats(rh_info_t * info, int what, int max_stats, rh_stats_t * stats)
+{
+	rh_block_t *blk;
+	struct list_head *l;
+	struct list_head *h;
+	int nr;
+
+	switch (what) {
+
+	case RHGS_FREE:
+		h = &info->free_list;
+		break;
+
+	case RHGS_TAKEN:
+		h = &info->taken_list;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	/* Linear search for block */
+	nr = 0;
+	list_for_each(l, h) {
+		blk = list_entry(l, rh_block_t, list);
+		if (stats != NULL && nr < max_stats) {
+			stats->start = blk->start;
+			stats->size = blk->size;
+			stats->owner = blk->owner;
+			stats++;
+		}
+		nr++;
+	}
+
+	return nr;
+}
+EXPORT_SYMBOL_GPL(rh_get_stats);
+
+int rh_set_owner(rh_info_t * info, unsigned long start, const char *owner)
+{
+	rh_block_t *blk, *blk2;
+	struct list_head *l;
+	int size;
+
+	/* Linear search for block */
+	blk = NULL;
+	list_for_each(l, &info->taken_list) {
+		blk2 = list_entry(l, rh_block_t, list);
+		if (start < blk2->start)
+			break;
+		blk = blk2;
+	}
+
+	if (blk == NULL || start > (blk->start + blk->size))
+		return -EINVAL;
+
+	blk->owner = owner;
+	size = blk->size;
+
+	return size;
+}
+EXPORT_SYMBOL_GPL(rh_set_owner);
+
+void rh_dump(rh_info_t * info)
+{
+	static rh_stats_t st[32];	/* XXX maximum 32 blocks */
+	int maxnr;
+	int i, nr;
+
+	maxnr = ARRAY_SIZE(st);
+
+	printk(KERN_INFO
+	       "info @0x%p (%d slots empty / %d max)\n",
+	       info, info->empty_slots, info->max_blocks);
+
+	printk(KERN_INFO "  Free:\n");
+	nr = rh_get_stats(info, RHGS_FREE, maxnr, st);
+	if (nr > maxnr)
+		nr = maxnr;
+	for (i = 0; i < nr; i++)
+		printk(KERN_INFO
+		       "    0x%lx-0x%lx (%u)\n",
+		       st[i].start, st[i].start + st[i].size,
+		       st[i].size);
+	printk(KERN_INFO "\n");
+
+	printk(KERN_INFO "  Taken:\n");
+	nr = rh_get_stats(info, RHGS_TAKEN, maxnr, st);
+	if (nr > maxnr)
+		nr = maxnr;
+	for (i = 0; i < nr; i++)
+		printk(KERN_INFO
+		       "    0x%lx-0x%lx (%u) %s\n",
+		       st[i].start, st[i].start + st[i].size,
+		       st[i].size, st[i].owner != NULL ? st[i].owner : "");
+	printk(KERN_INFO "\n");
+}
+EXPORT_SYMBOL_GPL(rh_dump);
+
+void rh_dump_blk(rh_info_t * info, rh_block_t * blk)
+{
+	printk(KERN_INFO
+	       "blk @0x%p: 0x%lx-0x%lx (%u)\n",
+	       blk, blk->start, blk->start + blk->size, blk->size);
+}
+EXPORT_SYMBOL_GPL(rh_dump_blk);
+
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
new file mode 100644
index 0000000000..a4ab862506
--- /dev/null
+++ b/arch/powerpc/lib/sstep.c
@@ -0,0 +1,3666 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Single-step support.
+ *
+ * Copyright (C) 2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ */
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/prefetch.h>
+#include <asm/sstep.h>
+#include <asm/processor.h>
+#include <linux/uaccess.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/cputable.h>
+#include <asm/disassemble.h>
+
+#ifdef CONFIG_PPC64
+/* Bits in SRR1 that are copied from MSR */
+#define MSR_MASK	0xffffffff87c0ffffUL
+#else
+#define MSR_MASK	0x87c0ffff
+#endif
+
+/* Bits in XER */
+#define XER_SO		0x80000000U
+#define XER_OV		0x40000000U
+#define XER_CA		0x20000000U
+#define XER_OV32	0x00080000U
+#define XER_CA32	0x00040000U
+
+#ifdef CONFIG_VSX
+#define VSX_REGISTER_XTP(rd)   ((((rd) & 1) << 5) | ((rd) & 0xfe))
+#endif
+
+#ifdef CONFIG_PPC_FPU
+/*
+ * Functions in ldstfp.S
+ */
+extern void get_fpr(int rn, double *p);
+extern void put_fpr(int rn, const double *p);
+extern void get_vr(int rn, __vector128 *p);
+extern void put_vr(int rn, __vector128 *p);
+extern void load_vsrn(int vsr, const void *p);
+extern void store_vsrn(int vsr, void *p);
+extern void conv_sp_to_dp(const float *sp, double *dp);
+extern void conv_dp_to_sp(const double *dp, float *sp);
+#endif
+
+#ifdef __powerpc64__
+/*
+ * Functions in quad.S
+ */
+extern int do_lq(unsigned long ea, unsigned long *regs);
+extern int do_stq(unsigned long ea, unsigned long val0, unsigned long val1);
+extern int do_lqarx(unsigned long ea, unsigned long *regs);
+extern int do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1,
+		    unsigned int *crp);
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define IS_LE	1
+#define IS_BE	0
+#else
+#define IS_LE	0
+#define IS_BE	1
+#endif
+
+/*
+ * Emulate the truncation of 64 bit values in 32-bit mode.
+ */
+static nokprobe_inline unsigned long truncate_if_32bit(unsigned long msr,
+							unsigned long val)
+{
+	if ((msr & MSR_64BIT) == 0)
+		val &= 0xffffffffUL;
+	return val;
+}
+
+/*
+ * Determine whether a conditional branch instruction would branch.
+ */
+static nokprobe_inline int branch_taken(unsigned int instr,
+					const struct pt_regs *regs,
+					struct instruction_op *op)
+{
+	unsigned int bo = (instr >> 21) & 0x1f;
+	unsigned int bi;
+
+	if ((bo & 4) == 0) {
+		/* decrement counter */
+		op->type |= DECCTR;
+		if (((bo >> 1) & 1) ^ (regs->ctr == 1))
+			return 0;
+	}
+	if ((bo & 0x10) == 0) {
+		/* check bit from CR */
+		bi = (instr >> 16) & 0x1f;
+		if (((regs->ccr >> (31 - bi)) & 1) != ((bo >> 3) & 1))
+			return 0;
+	}
+	return 1;
+}
+
+static nokprobe_inline long address_ok(struct pt_regs *regs,
+				       unsigned long ea, int nb)
+{
+	if (!user_mode(regs))
+		return 1;
+	if (access_ok((void __user *)ea, nb))
+		return 1;
+	if (access_ok((void __user *)ea, 1))
+		/* Access overlaps the end of the user region */
+		regs->dar = TASK_SIZE_MAX - 1;
+	else
+		regs->dar = ea;
+	return 0;
+}
+
+/*
+ * Calculate effective address for a D-form instruction
+ */
+static nokprobe_inline unsigned long dform_ea(unsigned int instr,
+					      const struct pt_regs *regs)
+{
+	int ra;
+	unsigned long ea;
+
+	ra = (instr >> 16) & 0x1f;
+	ea = (signed short) instr;		/* sign-extend */
+	if (ra)
+		ea += regs->gpr[ra];
+
+	return ea;
+}
+
+#ifdef __powerpc64__
+/*
+ * Calculate effective address for a DS-form instruction
+ */
+static nokprobe_inline unsigned long dsform_ea(unsigned int instr,
+					       const struct pt_regs *regs)
+{
+	int ra;
+	unsigned long ea;
+
+	ra = (instr >> 16) & 0x1f;
+	ea = (signed short) (instr & ~3);	/* sign-extend */
+	if (ra)
+		ea += regs->gpr[ra];
+
+	return ea;
+}
+
+/*
+ * Calculate effective address for a DQ-form instruction
+ */
+static nokprobe_inline unsigned long dqform_ea(unsigned int instr,
+					       const struct pt_regs *regs)
+{
+	int ra;
+	unsigned long ea;
+
+	ra = (instr >> 16) & 0x1f;
+	ea = (signed short) (instr & ~0xf);	/* sign-extend */
+	if (ra)
+		ea += regs->gpr[ra];
+
+	return ea;
+}
+#endif /* __powerpc64 */
+
+/*
+ * Calculate effective address for an X-form instruction
+ */
+static nokprobe_inline unsigned long xform_ea(unsigned int instr,
+					      const struct pt_regs *regs)
+{
+	int ra, rb;
+	unsigned long ea;
+
+	ra = (instr >> 16) & 0x1f;
+	rb = (instr >> 11) & 0x1f;
+	ea = regs->gpr[rb];
+	if (ra)
+		ea += regs->gpr[ra];
+
+	return ea;
+}
+
+/*
+ * Calculate effective address for a MLS:D-form / 8LS:D-form
+ * prefixed instruction
+ */
+static nokprobe_inline unsigned long mlsd_8lsd_ea(unsigned int instr,
+						  unsigned int suffix,
+						  const struct pt_regs *regs)
+{
+	int ra, prefix_r;
+	unsigned int  dd;
+	unsigned long ea, d0, d1, d;
+
+	prefix_r = GET_PREFIX_R(instr);
+	ra = GET_PREFIX_RA(suffix);
+
+	d0 = instr & 0x3ffff;
+	d1 = suffix & 0xffff;
+	d = (d0 << 16) | d1;
+
+	/*
+	 * sign extend a 34 bit number
+	 */
+	dd = (unsigned int)(d >> 2);
+	ea = (signed int)dd;
+	ea = (ea << 2) | (d & 0x3);
+
+	if (!prefix_r && ra)
+		ea += regs->gpr[ra];
+	else if (!prefix_r && !ra)
+		; /* Leave ea as is */
+	else if (prefix_r)
+		ea += regs->nip;
+
+	/*
+	 * (prefix_r && ra) is an invalid form. Should already be
+	 * checked for by caller!
+	 */
+
+	return ea;
+}
+
+/*
+ * Return the largest power of 2, not greater than sizeof(unsigned long),
+ * such that x is a multiple of it.
+ */
+static nokprobe_inline unsigned long max_align(unsigned long x)
+{
+	x |= sizeof(unsigned long);
+	return x & -x;		/* isolates rightmost bit */
+}
+
+static nokprobe_inline unsigned long byterev_2(unsigned long x)
+{
+	return ((x >> 8) & 0xff) | ((x & 0xff) << 8);
+}
+
+static nokprobe_inline unsigned long byterev_4(unsigned long x)
+{
+	return ((x >> 24) & 0xff) | ((x >> 8) & 0xff00) |
+		((x & 0xff00) << 8) | ((x & 0xff) << 24);
+}
+
+#ifdef __powerpc64__
+static nokprobe_inline unsigned long byterev_8(unsigned long x)
+{
+	return (byterev_4(x) << 32) | byterev_4(x >> 32);
+}
+#endif
+
+static nokprobe_inline void do_byte_reverse(void *ptr, int nb)
+{
+	switch (nb) {
+	case 2:
+		*(u16 *)ptr = byterev_2(*(u16 *)ptr);
+		break;
+	case 4:
+		*(u32 *)ptr = byterev_4(*(u32 *)ptr);
+		break;
+#ifdef __powerpc64__
+	case 8:
+		*(unsigned long *)ptr = byterev_8(*(unsigned long *)ptr);
+		break;
+	case 16: {
+		unsigned long *up = (unsigned long *)ptr;
+		unsigned long tmp;
+		tmp = byterev_8(up[0]);
+		up[0] = byterev_8(up[1]);
+		up[1] = tmp;
+		break;
+	}
+	case 32: {
+		unsigned long *up = (unsigned long *)ptr;
+		unsigned long tmp;
+
+		tmp = byterev_8(up[0]);
+		up[0] = byterev_8(up[3]);
+		up[3] = tmp;
+		tmp = byterev_8(up[2]);
+		up[2] = byterev_8(up[1]);
+		up[1] = tmp;
+		break;
+	}
+
+#endif
+	default:
+		WARN_ON_ONCE(1);
+	}
+}
+
+static __always_inline int
+__read_mem_aligned(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+	unsigned long x = 0;
+
+	switch (nb) {
+	case 1:
+		unsafe_get_user(x, (unsigned char __user *)ea, Efault);
+		break;
+	case 2:
+		unsafe_get_user(x, (unsigned short __user *)ea, Efault);
+		break;
+	case 4:
+		unsafe_get_user(x, (unsigned int __user *)ea, Efault);
+		break;
+#ifdef __powerpc64__
+	case 8:
+		unsafe_get_user(x, (unsigned long __user *)ea, Efault);
+		break;
+#endif
+	}
+	*dest = x;
+	return 0;
+
+Efault:
+	regs->dar = ea;
+	return -EFAULT;
+}
+
+static nokprobe_inline int
+read_mem_aligned(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+	int err;
+
+	if (is_kernel_addr(ea))
+		return __read_mem_aligned(dest, ea, nb, regs);
+
+	if (user_read_access_begin((void __user *)ea, nb)) {
+		err = __read_mem_aligned(dest, ea, nb, regs);
+		user_read_access_end();
+	} else {
+		err = -EFAULT;
+		regs->dar = ea;
+	}
+
+	return err;
+}
+
+/*
+ * Copy from userspace to a buffer, using the largest possible
+ * aligned accesses, up to sizeof(long).
+ */
+static __always_inline int __copy_mem_in(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+	int c;
+
+	for (; nb > 0; nb -= c) {
+		c = max_align(ea);
+		if (c > nb)
+			c = max_align(nb);
+		switch (c) {
+		case 1:
+			unsafe_get_user(*dest, (u8 __user *)ea, Efault);
+			break;
+		case 2:
+			unsafe_get_user(*(u16 *)dest, (u16 __user *)ea, Efault);
+			break;
+		case 4:
+			unsafe_get_user(*(u32 *)dest, (u32 __user *)ea, Efault);
+			break;
+#ifdef __powerpc64__
+		case 8:
+			unsafe_get_user(*(u64 *)dest, (u64 __user *)ea, Efault);
+			break;
+#endif
+		}
+		dest += c;
+		ea += c;
+	}
+	return 0;
+
+Efault:
+	regs->dar = ea;
+	return -EFAULT;
+}
+
+static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+	int err;
+
+	if (is_kernel_addr(ea))
+		return __copy_mem_in(dest, ea, nb, regs);
+
+	if (user_read_access_begin((void __user *)ea, nb)) {
+		err = __copy_mem_in(dest, ea, nb, regs);
+		user_read_access_end();
+	} else {
+		err = -EFAULT;
+		regs->dar = ea;
+	}
+
+	return err;
+}
+
+static nokprobe_inline int read_mem_unaligned(unsigned long *dest,
+					      unsigned long ea, int nb,
+					      struct pt_regs *regs)
+{
+	union {
+		unsigned long ul;
+		u8 b[sizeof(unsigned long)];
+	} u;
+	int i;
+	int err;
+
+	u.ul = 0;
+	i = IS_BE ? sizeof(unsigned long) - nb : 0;
+	err = copy_mem_in(&u.b[i], ea, nb, regs);
+	if (!err)
+		*dest = u.ul;
+	return err;
+}
+
+/*
+ * Read memory at address ea for nb bytes, return 0 for success
+ * or -EFAULT if an error occurred.  N.B. nb must be 1, 2, 4 or 8.
+ * If nb < sizeof(long), the result is right-justified on BE systems.
+ */
+static int read_mem(unsigned long *dest, unsigned long ea, int nb,
+			      struct pt_regs *regs)
+{
+	if (!address_ok(regs, ea, nb))
+		return -EFAULT;
+	if ((ea & (nb - 1)) == 0)
+		return read_mem_aligned(dest, ea, nb, regs);
+	return read_mem_unaligned(dest, ea, nb, regs);
+}
+NOKPROBE_SYMBOL(read_mem);
+
+static __always_inline int
+__write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *regs)
+{
+	switch (nb) {
+	case 1:
+		unsafe_put_user(val, (unsigned char __user *)ea, Efault);
+		break;
+	case 2:
+		unsafe_put_user(val, (unsigned short __user *)ea, Efault);
+		break;
+	case 4:
+		unsafe_put_user(val, (unsigned int __user *)ea, Efault);
+		break;
+#ifdef __powerpc64__
+	case 8:
+		unsafe_put_user(val, (unsigned long __user *)ea, Efault);
+		break;
+#endif
+	}
+	return 0;
+
+Efault:
+	regs->dar = ea;
+	return -EFAULT;
+}
+
+static nokprobe_inline int
+write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *regs)
+{
+	int err;
+
+	if (is_kernel_addr(ea))
+		return __write_mem_aligned(val, ea, nb, regs);
+
+	if (user_write_access_begin((void __user *)ea, nb)) {
+		err = __write_mem_aligned(val, ea, nb, regs);
+		user_write_access_end();
+	} else {
+		err = -EFAULT;
+		regs->dar = ea;
+	}
+
+	return err;
+}
+
+/*
+ * Copy from a buffer to userspace, using the largest possible
+ * aligned accesses, up to sizeof(long).
+ */
+static __always_inline int __copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+	int c;
+
+	for (; nb > 0; nb -= c) {
+		c = max_align(ea);
+		if (c > nb)
+			c = max_align(nb);
+		switch (c) {
+		case 1:
+			unsafe_put_user(*dest, (u8 __user *)ea, Efault);
+			break;
+		case 2:
+			unsafe_put_user(*(u16 *)dest, (u16 __user *)ea, Efault);
+			break;
+		case 4:
+			unsafe_put_user(*(u32 *)dest, (u32 __user *)ea, Efault);
+			break;
+#ifdef __powerpc64__
+		case 8:
+			unsafe_put_user(*(u64 *)dest, (u64 __user *)ea, Efault);
+			break;
+#endif
+		}
+		dest += c;
+		ea += c;
+	}
+	return 0;
+
+Efault:
+	regs->dar = ea;
+	return -EFAULT;
+}
+
+static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+	int err;
+
+	if (is_kernel_addr(ea))
+		return __copy_mem_out(dest, ea, nb, regs);
+
+	if (user_write_access_begin((void __user *)ea, nb)) {
+		err = __copy_mem_out(dest, ea, nb, regs);
+		user_write_access_end();
+	} else {
+		err = -EFAULT;
+		regs->dar = ea;
+	}
+
+	return err;
+}
+
+static nokprobe_inline int write_mem_unaligned(unsigned long val,
+					       unsigned long ea, int nb,
+					       struct pt_regs *regs)
+{
+	union {
+		unsigned long ul;
+		u8 b[sizeof(unsigned long)];
+	} u;
+	int i;
+
+	u.ul = val;
+	i = IS_BE ? sizeof(unsigned long) - nb : 0;
+	return copy_mem_out(&u.b[i], ea, nb, regs);
+}
+
+/*
+ * Write memory at address ea for nb bytes, return 0 for success
+ * or -EFAULT if an error occurred.  N.B. nb must be 1, 2, 4 or 8.
+ */
+static int write_mem(unsigned long val, unsigned long ea, int nb,
+			       struct pt_regs *regs)
+{
+	if (!address_ok(regs, ea, nb))
+		return -EFAULT;
+	if ((ea & (nb - 1)) == 0)
+		return write_mem_aligned(val, ea, nb, regs);
+	return write_mem_unaligned(val, ea, nb, regs);
+}
+NOKPROBE_SYMBOL(write_mem);
+
+#ifdef CONFIG_PPC_FPU
+/*
+ * These access either the real FP register or the image in the
+ * thread_struct, depending on regs->msr & MSR_FP.
+ */
+static int do_fp_load(struct instruction_op *op, unsigned long ea,
+		      struct pt_regs *regs, bool cross_endian)
+{
+	int err, rn, nb;
+	union {
+		int i;
+		unsigned int u;
+		float f;
+		double d[2];
+		unsigned long l[2];
+		u8 b[2 * sizeof(double)];
+	} u;
+
+	nb = GETSIZE(op->type);
+	if (!address_ok(regs, ea, nb))
+		return -EFAULT;
+	rn = op->reg;
+	err = copy_mem_in(u.b, ea, nb, regs);
+	if (err)
+		return err;
+	if (unlikely(cross_endian)) {
+		do_byte_reverse(u.b, min(nb, 8));
+		if (nb == 16)
+			do_byte_reverse(&u.b[8], 8);
+	}
+	preempt_disable();
+	if (nb == 4) {
+		if (op->type & FPCONV)
+			conv_sp_to_dp(&u.f, &u.d[0]);
+		else if (op->type & SIGNEXT)
+			u.l[0] = u.i;
+		else
+			u.l[0] = u.u;
+	}
+	if (regs->msr & MSR_FP)
+		put_fpr(rn, &u.d[0]);
+	else
+		current->thread.TS_FPR(rn) = u.l[0];
+	if (nb == 16) {
+		/* lfdp */
+		rn |= 1;
+		if (regs->msr & MSR_FP)
+			put_fpr(rn, &u.d[1]);
+		else
+			current->thread.TS_FPR(rn) = u.l[1];
+	}
+	preempt_enable();
+	return 0;
+}
+NOKPROBE_SYMBOL(do_fp_load);
+
+static int do_fp_store(struct instruction_op *op, unsigned long ea,
+		       struct pt_regs *regs, bool cross_endian)
+{
+	int rn, nb;
+	union {
+		unsigned int u;
+		float f;
+		double d[2];
+		unsigned long l[2];
+		u8 b[2 * sizeof(double)];
+	} u;
+
+	nb = GETSIZE(op->type);
+	if (!address_ok(regs, ea, nb))
+		return -EFAULT;
+	rn = op->reg;
+	preempt_disable();
+	if (regs->msr & MSR_FP)
+		get_fpr(rn, &u.d[0]);
+	else
+		u.l[0] = current->thread.TS_FPR(rn);
+	if (nb == 4) {
+		if (op->type & FPCONV)
+			conv_dp_to_sp(&u.d[0], &u.f);
+		else
+			u.u = u.l[0];
+	}
+	if (nb == 16) {
+		rn |= 1;
+		if (regs->msr & MSR_FP)
+			get_fpr(rn, &u.d[1]);
+		else
+			u.l[1] = current->thread.TS_FPR(rn);
+	}
+	preempt_enable();
+	if (unlikely(cross_endian)) {
+		do_byte_reverse(u.b, min(nb, 8));
+		if (nb == 16)
+			do_byte_reverse(&u.b[8], 8);
+	}
+	return copy_mem_out(u.b, ea, nb, regs);
+}
+NOKPROBE_SYMBOL(do_fp_store);
+#endif
+
+#ifdef CONFIG_ALTIVEC
+/* For Altivec/VMX, no need to worry about alignment */
+static nokprobe_inline int do_vec_load(int rn, unsigned long ea,
+				       int size, struct pt_regs *regs,
+				       bool cross_endian)
+{
+	int err;
+	union {
+		__vector128 v;
+		u8 b[sizeof(__vector128)];
+	} u = {};
+
+	if (!address_ok(regs, ea & ~0xfUL, 16))
+		return -EFAULT;
+	/* align to multiple of size */
+	ea &= ~(size - 1);
+	err = copy_mem_in(&u.b[ea & 0xf], ea, size, regs);
+	if (err)
+		return err;
+	if (unlikely(cross_endian))
+		do_byte_reverse(&u.b[ea & 0xf], size);
+	preempt_disable();
+	if (regs->msr & MSR_VEC)
+		put_vr(rn, &u.v);
+	else
+		current->thread.vr_state.vr[rn] = u.v;
+	preempt_enable();
+	return 0;
+}
+
+static nokprobe_inline int do_vec_store(int rn, unsigned long ea,
+					int size, struct pt_regs *regs,
+					bool cross_endian)
+{
+	union {
+		__vector128 v;
+		u8 b[sizeof(__vector128)];
+	} u;
+
+	if (!address_ok(regs, ea & ~0xfUL, 16))
+		return -EFAULT;
+	/* align to multiple of size */
+	ea &= ~(size - 1);
+
+	preempt_disable();
+	if (regs->msr & MSR_VEC)
+		get_vr(rn, &u.v);
+	else
+		u.v = current->thread.vr_state.vr[rn];
+	preempt_enable();
+	if (unlikely(cross_endian))
+		do_byte_reverse(&u.b[ea & 0xf], size);
+	return copy_mem_out(&u.b[ea & 0xf], ea, size, regs);
+}
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef __powerpc64__
+static nokprobe_inline int emulate_lq(struct pt_regs *regs, unsigned long ea,
+				      int reg, bool cross_endian)
+{
+	int err;
+
+	if (!address_ok(regs, ea, 16))
+		return -EFAULT;
+	/* if aligned, should be atomic */
+	if ((ea & 0xf) == 0) {
+		err = do_lq(ea, &regs->gpr[reg]);
+	} else {
+		err = read_mem(&regs->gpr[reg + IS_LE], ea, 8, regs);
+		if (!err)
+			err = read_mem(&regs->gpr[reg + IS_BE], ea + 8, 8, regs);
+	}
+	if (!err && unlikely(cross_endian))
+		do_byte_reverse(&regs->gpr[reg], 16);
+	return err;
+}
+
+static nokprobe_inline int emulate_stq(struct pt_regs *regs, unsigned long ea,
+				       int reg, bool cross_endian)
+{
+	int err;
+	unsigned long vals[2];
+
+	if (!address_ok(regs, ea, 16))
+		return -EFAULT;
+	vals[0] = regs->gpr[reg];
+	vals[1] = regs->gpr[reg + 1];
+	if (unlikely(cross_endian))
+		do_byte_reverse(vals, 16);
+
+	/* if aligned, should be atomic */
+	if ((ea & 0xf) == 0)
+		return do_stq(ea, vals[0], vals[1]);
+
+	err = write_mem(vals[IS_LE], ea, 8, regs);
+	if (!err)
+		err = write_mem(vals[IS_BE], ea + 8, 8, regs);
+	return err;
+}
+#endif /* __powerpc64 */
+
+#ifdef CONFIG_VSX
+void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg,
+		      const void *mem, bool rev)
+{
+	int size, read_size;
+	int i, j;
+	const unsigned int *wp;
+	const unsigned short *hp;
+	const unsigned char *bp;
+
+	size = GETSIZE(op->type);
+	reg->d[0] = reg->d[1] = 0;
+
+	switch (op->element_size) {
+	case 32:
+		/* [p]lxvp[x] */
+	case 16:
+		/* whole vector; lxv[x] or lxvl[l] */
+		if (size == 0)
+			break;
+		memcpy(reg, mem, size);
+		if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
+			rev = !rev;
+		if (rev)
+			do_byte_reverse(reg, size);
+		break;
+	case 8:
+		/* scalar loads, lxvd2x, lxvdsx */
+		read_size = (size >= 8) ? 8 : size;
+		i = IS_LE ? 8 : 8 - read_size;
+		memcpy(&reg->b[i], mem, read_size);
+		if (rev)
+			do_byte_reverse(&reg->b[i], 8);
+		if (size < 8) {
+			if (op->type & SIGNEXT) {
+				/* size == 4 is the only case here */
+				reg->d[IS_LE] = (signed int) reg->d[IS_LE];
+			} else if (op->vsx_flags & VSX_FPCONV) {
+				preempt_disable();
+				conv_sp_to_dp(&reg->fp[1 + IS_LE],
+					      &reg->dp[IS_LE]);
+				preempt_enable();
+			}
+		} else {
+			if (size == 16) {
+				unsigned long v = *(unsigned long *)(mem + 8);
+				reg->d[IS_BE] = !rev ? v : byterev_8(v);
+			} else if (op->vsx_flags & VSX_SPLAT)
+				reg->d[IS_BE] = reg->d[IS_LE];
+		}
+		break;
+	case 4:
+		/* lxvw4x, lxvwsx */
+		wp = mem;
+		for (j = 0; j < size / 4; ++j) {
+			i = IS_LE ? 3 - j : j;
+			reg->w[i] = !rev ? *wp++ : byterev_4(*wp++);
+		}
+		if (op->vsx_flags & VSX_SPLAT) {
+			u32 val = reg->w[IS_LE ? 3 : 0];
+			for (; j < 4; ++j) {
+				i = IS_LE ? 3 - j : j;
+				reg->w[i] = val;
+			}
+		}
+		break;
+	case 2:
+		/* lxvh8x */
+		hp = mem;
+		for (j = 0; j < size / 2; ++j) {
+			i = IS_LE ? 7 - j : j;
+			reg->h[i] = !rev ? *hp++ : byterev_2(*hp++);
+		}
+		break;
+	case 1:
+		/* lxvb16x */
+		bp = mem;
+		for (j = 0; j < size; ++j) {
+			i = IS_LE ? 15 - j : j;
+			reg->b[i] = *bp++;
+		}
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(emulate_vsx_load);
+NOKPROBE_SYMBOL(emulate_vsx_load);
+
+void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg,
+		       void *mem, bool rev)
+{
+	int size, write_size;
+	int i, j;
+	union vsx_reg buf;
+	unsigned int *wp;
+	unsigned short *hp;
+	unsigned char *bp;
+
+	size = GETSIZE(op->type);
+
+	switch (op->element_size) {
+	case 32:
+		/* [p]stxvp[x] */
+		if (size == 0)
+			break;
+		if (rev) {
+			/* reverse 32 bytes */
+			union vsx_reg buf32[2];
+			buf32[0].d[0] = byterev_8(reg[1].d[1]);
+			buf32[0].d[1] = byterev_8(reg[1].d[0]);
+			buf32[1].d[0] = byterev_8(reg[0].d[1]);
+			buf32[1].d[1] = byterev_8(reg[0].d[0]);
+			memcpy(mem, buf32, size);
+		} else {
+			memcpy(mem, reg, size);
+		}
+		break;
+	case 16:
+		/* stxv, stxvx, stxvl, stxvll */
+		if (size == 0)
+			break;
+		if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
+			rev = !rev;
+		if (rev) {
+			/* reverse 16 bytes */
+			buf.d[0] = byterev_8(reg->d[1]);
+			buf.d[1] = byterev_8(reg->d[0]);
+			reg = &buf;
+		}
+		memcpy(mem, reg, size);
+		break;
+	case 8:
+		/* scalar stores, stxvd2x */
+		write_size = (size >= 8) ? 8 : size;
+		i = IS_LE ? 8 : 8 - write_size;
+		if (size < 8 && op->vsx_flags & VSX_FPCONV) {
+			buf.d[0] = buf.d[1] = 0;
+			preempt_disable();
+			conv_dp_to_sp(&reg->dp[IS_LE], &buf.fp[1 + IS_LE]);
+			preempt_enable();
+			reg = &buf;
+		}
+		memcpy(mem, &reg->b[i], write_size);
+		if (size == 16)
+			memcpy(mem + 8, &reg->d[IS_BE], 8);
+		if (unlikely(rev)) {
+			do_byte_reverse(mem, write_size);
+			if (size == 16)
+				do_byte_reverse(mem + 8, 8);
+		}
+		break;
+	case 4:
+		/* stxvw4x */
+		wp = mem;
+		for (j = 0; j < size / 4; ++j) {
+			i = IS_LE ? 3 - j : j;
+			*wp++ = !rev ? reg->w[i] : byterev_4(reg->w[i]);
+		}
+		break;
+	case 2:
+		/* stxvh8x */
+		hp = mem;
+		for (j = 0; j < size / 2; ++j) {
+			i = IS_LE ? 7 - j : j;
+			*hp++ = !rev ? reg->h[i] : byterev_2(reg->h[i]);
+		}
+		break;
+	case 1:
+		/* stvxb16x */
+		bp = mem;
+		for (j = 0; j < size; ++j) {
+			i = IS_LE ? 15 - j : j;
+			*bp++ = reg->b[i];
+		}
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(emulate_vsx_store);
+NOKPROBE_SYMBOL(emulate_vsx_store);
+
+static nokprobe_inline int do_vsx_load(struct instruction_op *op,
+				       unsigned long ea, struct pt_regs *regs,
+				       bool cross_endian)
+{
+	int reg = op->reg;
+	int i, j, nr_vsx_regs;
+	u8 mem[32];
+	union vsx_reg buf[2];
+	int size = GETSIZE(op->type);
+
+	if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs))
+		return -EFAULT;
+
+	nr_vsx_regs = max(1ul, size / sizeof(__vector128));
+	emulate_vsx_load(op, buf, mem, cross_endian);
+	preempt_disable();
+	if (reg < 32) {
+		/* FP regs + extensions */
+		if (regs->msr & MSR_FP) {
+			for (i = 0; i < nr_vsx_regs; i++) {
+				j = IS_LE ? nr_vsx_regs - i - 1 : i;
+				load_vsrn(reg + i, &buf[j].v);
+			}
+		} else {
+			for (i = 0; i < nr_vsx_regs; i++) {
+				j = IS_LE ? nr_vsx_regs - i - 1 : i;
+				current->thread.fp_state.fpr[reg + i][0] = buf[j].d[0];
+				current->thread.fp_state.fpr[reg + i][1] = buf[j].d[1];
+			}
+		}
+	} else {
+		if (regs->msr & MSR_VEC) {
+			for (i = 0; i < nr_vsx_regs; i++) {
+				j = IS_LE ? nr_vsx_regs - i - 1 : i;
+				load_vsrn(reg + i, &buf[j].v);
+			}
+		} else {
+			for (i = 0; i < nr_vsx_regs; i++) {
+				j = IS_LE ? nr_vsx_regs - i - 1 : i;
+				current->thread.vr_state.vr[reg - 32 + i] = buf[j].v;
+			}
+		}
+	}
+	preempt_enable();
+	return 0;
+}
+
+static nokprobe_inline int do_vsx_store(struct instruction_op *op,
+					unsigned long ea, struct pt_regs *regs,
+					bool cross_endian)
+{
+	int reg = op->reg;
+	int i, j, nr_vsx_regs;
+	u8 mem[32];
+	union vsx_reg buf[2];
+	int size = GETSIZE(op->type);
+
+	if (!address_ok(regs, ea, size))
+		return -EFAULT;
+
+	nr_vsx_regs = max(1ul, size / sizeof(__vector128));
+	preempt_disable();
+	if (reg < 32) {
+		/* FP regs + extensions */
+		if (regs->msr & MSR_FP) {
+			for (i = 0; i < nr_vsx_regs; i++) {
+				j = IS_LE ? nr_vsx_regs - i - 1 : i;
+				store_vsrn(reg + i, &buf[j].v);
+			}
+		} else {
+			for (i = 0; i < nr_vsx_regs; i++) {
+				j = IS_LE ? nr_vsx_regs - i - 1 : i;
+				buf[j].d[0] = current->thread.fp_state.fpr[reg + i][0];
+				buf[j].d[1] = current->thread.fp_state.fpr[reg + i][1];
+			}
+		}
+	} else {
+		if (regs->msr & MSR_VEC) {
+			for (i = 0; i < nr_vsx_regs; i++) {
+				j = IS_LE ? nr_vsx_regs - i - 1 : i;
+				store_vsrn(reg + i, &buf[j].v);
+			}
+		} else {
+			for (i = 0; i < nr_vsx_regs; i++) {
+				j = IS_LE ? nr_vsx_regs - i - 1 : i;
+				buf[j].v = current->thread.vr_state.vr[reg - 32 + i];
+			}
+		}
+	}
+	preempt_enable();
+	emulate_vsx_store(op, buf, mem, cross_endian);
+	return  copy_mem_out(mem, ea, size, regs);
+}
+#endif /* CONFIG_VSX */
+
+static __always_inline int __emulate_dcbz(unsigned long ea)
+{
+	unsigned long i;
+	unsigned long size = l1_dcache_bytes();
+
+	for (i = 0; i < size; i += sizeof(long))
+		unsafe_put_user(0, (unsigned long __user *)(ea + i), Efault);
+
+	return 0;
+
+Efault:
+	return -EFAULT;
+}
+
+int emulate_dcbz(unsigned long ea, struct pt_regs *regs)
+{
+	int err;
+	unsigned long size = l1_dcache_bytes();
+
+	ea = truncate_if_32bit(regs->msr, ea);
+	ea &= ~(size - 1);
+	if (!address_ok(regs, ea, size))
+		return -EFAULT;
+
+	if (is_kernel_addr(ea)) {
+		err = __emulate_dcbz(ea);
+	} else if (user_write_access_begin((void __user *)ea, size)) {
+		err = __emulate_dcbz(ea);
+		user_write_access_end();
+	} else {
+		err = -EFAULT;
+	}
+
+	if (err)
+		regs->dar = ea;
+
+
+	return err;
+}
+NOKPROBE_SYMBOL(emulate_dcbz);
+
+#define __put_user_asmx(x, addr, err, op, cr)		\
+	__asm__ __volatile__(				\
+		".machine push\n"			\
+		".machine power8\n"			\
+		"1:	" op " %2,0,%3\n"		\
+		".machine pop\n"			\
+		"	mfcr	%1\n"			\
+		"2:\n"					\
+		".section .fixup,\"ax\"\n"		\
+		"3:	li	%0,%4\n"		\
+		"	b	2b\n"			\
+		".previous\n"				\
+		EX_TABLE(1b, 3b)			\
+		: "=r" (err), "=r" (cr)			\
+		: "r" (x), "r" (addr), "i" (-EFAULT), "0" (err))
+
+#define __get_user_asmx(x, addr, err, op)		\
+	__asm__ __volatile__(				\
+		".machine push\n"			\
+		".machine power8\n"			\
+		"1:	"op" %1,0,%2\n"			\
+		".machine pop\n"			\
+		"2:\n"					\
+		".section .fixup,\"ax\"\n"		\
+		"3:	li	%0,%3\n"		\
+		"	b	2b\n"			\
+		".previous\n"				\
+		EX_TABLE(1b, 3b)			\
+		: "=r" (err), "=r" (x)			\
+		: "r" (addr), "i" (-EFAULT), "0" (err))
+
+#define __cacheop_user_asmx(addr, err, op)		\
+	__asm__ __volatile__(				\
+		"1:	"op" 0,%1\n"			\
+		"2:\n"					\
+		".section .fixup,\"ax\"\n"		\
+		"3:	li	%0,%3\n"		\
+		"	b	2b\n"			\
+		".previous\n"				\
+		EX_TABLE(1b, 3b)			\
+		: "=r" (err)				\
+		: "r" (addr), "i" (-EFAULT), "0" (err))
+
+static nokprobe_inline void set_cr0(const struct pt_regs *regs,
+				    struct instruction_op *op)
+{
+	long val = op->val;
+
+	op->type |= SETCC;
+	op->ccval = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000);
+	if (!(regs->msr & MSR_64BIT))
+		val = (int) val;
+	if (val < 0)
+		op->ccval |= 0x80000000;
+	else if (val > 0)
+		op->ccval |= 0x40000000;
+	else
+		op->ccval |= 0x20000000;
+}
+
+static nokprobe_inline void set_ca32(struct instruction_op *op, bool val)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		if (val)
+			op->xerval |= XER_CA32;
+		else
+			op->xerval &= ~XER_CA32;
+	}
+}
+
+static nokprobe_inline void add_with_carry(const struct pt_regs *regs,
+				     struct instruction_op *op, int rd,
+				     unsigned long val1, unsigned long val2,
+				     unsigned long carry_in)
+{
+	unsigned long val = val1 + val2;
+
+	if (carry_in)
+		++val;
+	op->type = COMPUTE | SETREG | SETXER;
+	op->reg = rd;
+	op->val = val;
+	val = truncate_if_32bit(regs->msr, val);
+	val1 = truncate_if_32bit(regs->msr, val1);
+	op->xerval = regs->xer;
+	if (val < val1 || (carry_in && val == val1))
+		op->xerval |= XER_CA;
+	else
+		op->xerval &= ~XER_CA;
+
+	set_ca32(op, (unsigned int)val < (unsigned int)val1 ||
+			(carry_in && (unsigned int)val == (unsigned int)val1));
+}
+
+static nokprobe_inline void do_cmp_signed(const struct pt_regs *regs,
+					  struct instruction_op *op,
+					  long v1, long v2, int crfld)
+{
+	unsigned int crval, shift;
+
+	op->type = COMPUTE | SETCC;
+	crval = (regs->xer >> 31) & 1;		/* get SO bit */
+	if (v1 < v2)
+		crval |= 8;
+	else if (v1 > v2)
+		crval |= 4;
+	else
+		crval |= 2;
+	shift = (7 - crfld) * 4;
+	op->ccval = (regs->ccr & ~(0xf << shift)) | (crval << shift);
+}
+
+static nokprobe_inline void do_cmp_unsigned(const struct pt_regs *regs,
+					    struct instruction_op *op,
+					    unsigned long v1,
+					    unsigned long v2, int crfld)
+{
+	unsigned int crval, shift;
+
+	op->type = COMPUTE | SETCC;
+	crval = (regs->xer >> 31) & 1;		/* get SO bit */
+	if (v1 < v2)
+		crval |= 8;
+	else if (v1 > v2)
+		crval |= 4;
+	else
+		crval |= 2;
+	shift = (7 - crfld) * 4;
+	op->ccval = (regs->ccr & ~(0xf << shift)) | (crval << shift);
+}
+
+static nokprobe_inline void do_cmpb(const struct pt_regs *regs,
+				    struct instruction_op *op,
+				    unsigned long v1, unsigned long v2)
+{
+	unsigned long long out_val, mask;
+	int i;
+
+	out_val = 0;
+	for (i = 0; i < 8; i++) {
+		mask = 0xffUL << (i * 8);
+		if ((v1 & mask) == (v2 & mask))
+			out_val |= mask;
+	}
+	op->val = out_val;
+}
+
+/*
+ * The size parameter is used to adjust the equivalent popcnt instruction.
+ * popcntb = 8, popcntw = 32, popcntd = 64
+ */
+static nokprobe_inline void do_popcnt(const struct pt_regs *regs,
+				      struct instruction_op *op,
+				      unsigned long v1, int size)
+{
+	unsigned long long out = v1;
+
+	out -= (out >> 1) & 0x5555555555555555ULL;
+	out = (0x3333333333333333ULL & out) +
+	      (0x3333333333333333ULL & (out >> 2));
+	out = (out + (out >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
+
+	if (size == 8) {	/* popcntb */
+		op->val = out;
+		return;
+	}
+	out += out >> 8;
+	out += out >> 16;
+	if (size == 32) {	/* popcntw */
+		op->val = out & 0x0000003f0000003fULL;
+		return;
+	}
+
+	out = (out + (out >> 32)) & 0x7f;
+	op->val = out;	/* popcntd */
+}
+
+#ifdef CONFIG_PPC64
+static nokprobe_inline void do_bpermd(const struct pt_regs *regs,
+				      struct instruction_op *op,
+				      unsigned long v1, unsigned long v2)
+{
+	unsigned char perm, idx;
+	unsigned int i;
+
+	perm = 0;
+	for (i = 0; i < 8; i++) {
+		idx = (v1 >> (i * 8)) & 0xff;
+		if (idx < 64)
+			if (v2 & PPC_BIT(idx))
+				perm |= 1 << i;
+	}
+	op->val = perm;
+}
+#endif /* CONFIG_PPC64 */
+/*
+ * The size parameter adjusts the equivalent prty instruction.
+ * prtyw = 32, prtyd = 64
+ */
+static nokprobe_inline void do_prty(const struct pt_regs *regs,
+				    struct instruction_op *op,
+				    unsigned long v, int size)
+{
+	unsigned long long res = v ^ (v >> 8);
+
+	res ^= res >> 16;
+	if (size == 32) {		/* prtyw */
+		op->val = res & 0x0000000100000001ULL;
+		return;
+	}
+
+	res ^= res >> 32;
+	op->val = res & 1;	/*prtyd */
+}
+
+static nokprobe_inline int trap_compare(long v1, long v2)
+{
+	int ret = 0;
+
+	if (v1 < v2)
+		ret |= 0x10;
+	else if (v1 > v2)
+		ret |= 0x08;
+	else
+		ret |= 0x04;
+	if ((unsigned long)v1 < (unsigned long)v2)
+		ret |= 0x02;
+	else if ((unsigned long)v1 > (unsigned long)v2)
+		ret |= 0x01;
+	return ret;
+}
+
+/*
+ * Elements of 32-bit rotate and mask instructions.
+ */
+#define MASK32(mb, me)	((0xffffffffUL >> (mb)) + \
+			 ((signed long)-0x80000000L >> (me)) + ((me) >= (mb)))
+#ifdef __powerpc64__
+#define MASK64_L(mb)	(~0UL >> (mb))
+#define MASK64_R(me)	((signed long)-0x8000000000000000L >> (me))
+#define MASK64(mb, me)	(MASK64_L(mb) + MASK64_R(me) + ((me) >= (mb)))
+#define DATA32(x)	(((x) & 0xffffffffUL) | (((x) & 0xffffffffUL) << 32))
+#else
+#define DATA32(x)	(x)
+#endif
+#define ROTATE(x, n)	((n) ? (((x) << (n)) | ((x) >> (8 * sizeof(long) - (n)))) : (x))
+
+/*
+ * Decode an instruction, and return information about it in *op
+ * without changing *regs.
+ * Integer arithmetic and logical instructions, branches, and barrier
+ * instructions can be emulated just using the information in *op.
+ *
+ * Return value is 1 if the instruction can be emulated just by
+ * updating *regs with the information in *op, -1 if we need the
+ * GPRs but *regs doesn't contain the full register set, or 0
+ * otherwise.
+ */
+int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
+		  ppc_inst_t instr)
+{
+#ifdef CONFIG_PPC64
+	unsigned int suffixopcode, prefixtype, prefix_r;
+#endif
+	unsigned int opcode, ra, rb, rc, rd, spr, u;
+	unsigned long int imm;
+	unsigned long int val, val2;
+	unsigned int mb, me, sh;
+	unsigned int word, suffix;
+	long ival;
+
+	word = ppc_inst_val(instr);
+	suffix = ppc_inst_suffix(instr);
+
+	op->type = COMPUTE;
+
+	opcode = ppc_inst_primary_opcode(instr);
+	switch (opcode) {
+	case 16:	/* bc */
+		op->type = BRANCH;
+		imm = (signed short)(word & 0xfffc);
+		if ((word & 2) == 0)
+			imm += regs->nip;
+		op->val = truncate_if_32bit(regs->msr, imm);
+		if (word & 1)
+			op->type |= SETLK;
+		if (branch_taken(word, regs, op))
+			op->type |= BRTAKEN;
+		return 1;
+	case 17:	/* sc */
+		if ((word & 0xfe2) == 2)
+			op->type = SYSCALL;
+		else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
+				(word & 0xfe3) == 1) {	/* scv */
+			op->type = SYSCALL_VECTORED_0;
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+		} else
+			op->type = UNKNOWN;
+		return 0;
+	case 18:	/* b */
+		op->type = BRANCH | BRTAKEN;
+		imm = word & 0x03fffffc;
+		if (imm & 0x02000000)
+			imm -= 0x04000000;
+		if ((word & 2) == 0)
+			imm += regs->nip;
+		op->val = truncate_if_32bit(regs->msr, imm);
+		if (word & 1)
+			op->type |= SETLK;
+		return 1;
+	case 19:
+		switch ((word >> 1) & 0x3ff) {
+		case 0:		/* mcrf */
+			op->type = COMPUTE + SETCC;
+			rd = 7 - ((word >> 23) & 0x7);
+			ra = 7 - ((word >> 18) & 0x7);
+			rd *= 4;
+			ra *= 4;
+			val = (regs->ccr >> ra) & 0xf;
+			op->ccval = (regs->ccr & ~(0xfUL << rd)) | (val << rd);
+			return 1;
+
+		case 16:	/* bclr */
+		case 528:	/* bcctr */
+			op->type = BRANCH;
+			imm = (word & 0x400)? regs->ctr: regs->link;
+			op->val = truncate_if_32bit(regs->msr, imm);
+			if (word & 1)
+				op->type |= SETLK;
+			if (branch_taken(word, regs, op))
+				op->type |= BRTAKEN;
+			return 1;
+
+		case 18:	/* rfid, scary */
+			if (regs->msr & MSR_PR)
+				goto priv;
+			op->type = RFI;
+			return 0;
+
+		case 150:	/* isync */
+			op->type = BARRIER | BARRIER_ISYNC;
+			return 1;
+
+		case 33:	/* crnor */
+		case 129:	/* crandc */
+		case 193:	/* crxor */
+		case 225:	/* crnand */
+		case 257:	/* crand */
+		case 289:	/* creqv */
+		case 417:	/* crorc */
+		case 449:	/* cror */
+			op->type = COMPUTE + SETCC;
+			ra = (word >> 16) & 0x1f;
+			rb = (word >> 11) & 0x1f;
+			rd = (word >> 21) & 0x1f;
+			ra = (regs->ccr >> (31 - ra)) & 1;
+			rb = (regs->ccr >> (31 - rb)) & 1;
+			val = (word >> (6 + ra * 2 + rb)) & 1;
+			op->ccval = (regs->ccr & ~(1UL << (31 - rd))) |
+				(val << (31 - rd));
+			return 1;
+		}
+		break;
+	case 31:
+		switch ((word >> 1) & 0x3ff) {
+		case 598:	/* sync */
+			op->type = BARRIER + BARRIER_SYNC;
+#ifdef __powerpc64__
+			switch ((word >> 21) & 3) {
+			case 1:		/* lwsync */
+				op->type = BARRIER + BARRIER_LWSYNC;
+				break;
+			case 2:		/* ptesync */
+				op->type = BARRIER + BARRIER_PTESYNC;
+				break;
+			}
+#endif
+			return 1;
+
+		case 854:	/* eieio */
+			op->type = BARRIER + BARRIER_EIEIO;
+			return 1;
+		}
+		break;
+	}
+
+	rd = (word >> 21) & 0x1f;
+	ra = (word >> 16) & 0x1f;
+	rb = (word >> 11) & 0x1f;
+	rc = (word >> 6) & 0x1f;
+
+	switch (opcode) {
+#ifdef __powerpc64__
+	case 1:
+		if (!cpu_has_feature(CPU_FTR_ARCH_31))
+			goto unknown_opcode;
+
+		prefix_r = GET_PREFIX_R(word);
+		ra = GET_PREFIX_RA(suffix);
+		rd = (suffix >> 21) & 0x1f;
+		op->reg = rd;
+		op->val = regs->gpr[rd];
+		suffixopcode = get_op(suffix);
+		prefixtype = (word >> 24) & 0x3;
+		switch (prefixtype) {
+		case 2:
+			if (prefix_r && ra)
+				return 0;
+			switch (suffixopcode) {
+			case 14:	/* paddi */
+				op->type = COMPUTE | PREFIXED;
+				op->val = mlsd_8lsd_ea(word, suffix, regs);
+				goto compute_done;
+			}
+		}
+		break;
+	case 2:		/* tdi */
+		if (rd & trap_compare(regs->gpr[ra], (short) word))
+			goto trap;
+		return 1;
+#endif
+	case 3:		/* twi */
+		if (rd & trap_compare((int)regs->gpr[ra], (short) word))
+			goto trap;
+		return 1;
+
+#ifdef __powerpc64__
+	case 4:
+		/*
+		 * There are very many instructions with this primary opcode
+		 * introduced in the ISA as early as v2.03. However, the ones
+		 * we currently emulate were all introduced with ISA 3.0
+		 */
+		if (!cpu_has_feature(CPU_FTR_ARCH_300))
+			goto unknown_opcode;
+
+		switch (word & 0x3f) {
+		case 48:	/* maddhd */
+			asm volatile(PPC_MADDHD(%0, %1, %2, %3) :
+				     "=r" (op->val) : "r" (regs->gpr[ra]),
+				     "r" (regs->gpr[rb]), "r" (regs->gpr[rc]));
+			goto compute_done;
+
+		case 49:	/* maddhdu */
+			asm volatile(PPC_MADDHDU(%0, %1, %2, %3) :
+				     "=r" (op->val) : "r" (regs->gpr[ra]),
+				     "r" (regs->gpr[rb]), "r" (regs->gpr[rc]));
+			goto compute_done;
+
+		case 51:	/* maddld */
+			asm volatile(PPC_MADDLD(%0, %1, %2, %3) :
+				     "=r" (op->val) : "r" (regs->gpr[ra]),
+				     "r" (regs->gpr[rb]), "r" (regs->gpr[rc]));
+			goto compute_done;
+		}
+
+		/*
+		 * There are other instructions from ISA 3.0 with the same
+		 * primary opcode which do not have emulation support yet.
+		 */
+		goto unknown_opcode;
+#endif
+
+	case 7:		/* mulli */
+		op->val = regs->gpr[ra] * (short) word;
+		goto compute_done;
+
+	case 8:		/* subfic */
+		imm = (short) word;
+		add_with_carry(regs, op, rd, ~regs->gpr[ra], imm, 1);
+		return 1;
+
+	case 10:	/* cmpli */
+		imm = (unsigned short) word;
+		val = regs->gpr[ra];
+#ifdef __powerpc64__
+		if ((rd & 1) == 0)
+			val = (unsigned int) val;
+#endif
+		do_cmp_unsigned(regs, op, val, imm, rd >> 2);
+		return 1;
+
+	case 11:	/* cmpi */
+		imm = (short) word;
+		val = regs->gpr[ra];
+#ifdef __powerpc64__
+		if ((rd & 1) == 0)
+			val = (int) val;
+#endif
+		do_cmp_signed(regs, op, val, imm, rd >> 2);
+		return 1;
+
+	case 12:	/* addic */
+		imm = (short) word;
+		add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0);
+		return 1;
+
+	case 13:	/* addic. */
+		imm = (short) word;
+		add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0);
+		set_cr0(regs, op);
+		return 1;
+
+	case 14:	/* addi */
+		imm = (short) word;
+		if (ra)
+			imm += regs->gpr[ra];
+		op->val = imm;
+		goto compute_done;
+
+	case 15:	/* addis */
+		imm = ((short) word) << 16;
+		if (ra)
+			imm += regs->gpr[ra];
+		op->val = imm;
+		goto compute_done;
+
+	case 19:
+		if (((word >> 1) & 0x1f) == 2) {
+			/* addpcis */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			imm = (short) (word & 0xffc1);	/* d0 + d2 fields */
+			imm |= (word >> 15) & 0x3e;	/* d1 field */
+			op->val = regs->nip + (imm << 16) + 4;
+			goto compute_done;
+		}
+		op->type = UNKNOWN;
+		return 0;
+
+	case 20:	/* rlwimi */
+		mb = (word >> 6) & 0x1f;
+		me = (word >> 1) & 0x1f;
+		val = DATA32(regs->gpr[rd]);
+		imm = MASK32(mb, me);
+		op->val = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm);
+		goto logical_done;
+
+	case 21:	/* rlwinm */
+		mb = (word >> 6) & 0x1f;
+		me = (word >> 1) & 0x1f;
+		val = DATA32(regs->gpr[rd]);
+		op->val = ROTATE(val, rb) & MASK32(mb, me);
+		goto logical_done;
+
+	case 23:	/* rlwnm */
+		mb = (word >> 6) & 0x1f;
+		me = (word >> 1) & 0x1f;
+		rb = regs->gpr[rb] & 0x1f;
+		val = DATA32(regs->gpr[rd]);
+		op->val = ROTATE(val, rb) & MASK32(mb, me);
+		goto logical_done;
+
+	case 24:	/* ori */
+		op->val = regs->gpr[rd] | (unsigned short) word;
+		goto logical_done_nocc;
+
+	case 25:	/* oris */
+		imm = (unsigned short) word;
+		op->val = regs->gpr[rd] | (imm << 16);
+		goto logical_done_nocc;
+
+	case 26:	/* xori */
+		op->val = regs->gpr[rd] ^ (unsigned short) word;
+		goto logical_done_nocc;
+
+	case 27:	/* xoris */
+		imm = (unsigned short) word;
+		op->val = regs->gpr[rd] ^ (imm << 16);
+		goto logical_done_nocc;
+
+	case 28:	/* andi. */
+		op->val = regs->gpr[rd] & (unsigned short) word;
+		set_cr0(regs, op);
+		goto logical_done_nocc;
+
+	case 29:	/* andis. */
+		imm = (unsigned short) word;
+		op->val = regs->gpr[rd] & (imm << 16);
+		set_cr0(regs, op);
+		goto logical_done_nocc;
+
+#ifdef __powerpc64__
+	case 30:	/* rld* */
+		mb = ((word >> 6) & 0x1f) | (word & 0x20);
+		val = regs->gpr[rd];
+		if ((word & 0x10) == 0) {
+			sh = rb | ((word & 2) << 4);
+			val = ROTATE(val, sh);
+			switch ((word >> 2) & 3) {
+			case 0:		/* rldicl */
+				val &= MASK64_L(mb);
+				break;
+			case 1:		/* rldicr */
+				val &= MASK64_R(mb);
+				break;
+			case 2:		/* rldic */
+				val &= MASK64(mb, 63 - sh);
+				break;
+			case 3:		/* rldimi */
+				imm = MASK64(mb, 63 - sh);
+				val = (regs->gpr[ra] & ~imm) |
+					(val & imm);
+			}
+			op->val = val;
+			goto logical_done;
+		} else {
+			sh = regs->gpr[rb] & 0x3f;
+			val = ROTATE(val, sh);
+			switch ((word >> 1) & 7) {
+			case 0:		/* rldcl */
+				op->val = val & MASK64_L(mb);
+				goto logical_done;
+			case 1:		/* rldcr */
+				op->val = val & MASK64_R(mb);
+				goto logical_done;
+			}
+		}
+#endif
+		op->type = UNKNOWN;	/* illegal instruction */
+		return 0;
+
+	case 31:
+		/* isel occupies 32 minor opcodes */
+		if (((word >> 1) & 0x1f) == 15) {
+			mb = (word >> 6) & 0x1f; /* bc field */
+			val = (regs->ccr >> (31 - mb)) & 1;
+			val2 = (ra) ? regs->gpr[ra] : 0;
+
+			op->val = (val) ? val2 : regs->gpr[rb];
+			goto compute_done;
+		}
+
+		switch ((word >> 1) & 0x3ff) {
+		case 4:		/* tw */
+			if (rd == 0x1f ||
+			    (rd & trap_compare((int)regs->gpr[ra],
+					       (int)regs->gpr[rb])))
+				goto trap;
+			return 1;
+#ifdef __powerpc64__
+		case 68:	/* td */
+			if (rd & trap_compare(regs->gpr[ra], regs->gpr[rb]))
+				goto trap;
+			return 1;
+#endif
+		case 83:	/* mfmsr */
+			if (regs->msr & MSR_PR)
+				goto priv;
+			op->type = MFMSR;
+			op->reg = rd;
+			return 0;
+		case 146:	/* mtmsr */
+			if (regs->msr & MSR_PR)
+				goto priv;
+			op->type = MTMSR;
+			op->reg = rd;
+			op->val = 0xffffffff & ~(MSR_ME | MSR_LE);
+			return 0;
+#ifdef CONFIG_PPC64
+		case 178:	/* mtmsrd */
+			if (regs->msr & MSR_PR)
+				goto priv;
+			op->type = MTMSR;
+			op->reg = rd;
+			/* only MSR_EE and MSR_RI get changed if bit 15 set */
+			/* mtmsrd doesn't change MSR_HV, MSR_ME or MSR_LE */
+			imm = (word & 0x10000)? 0x8002: 0xefffffffffffeffeUL;
+			op->val = imm;
+			return 0;
+#endif
+
+		case 19:	/* mfcr */
+			imm = 0xffffffffUL;
+			if ((word >> 20) & 1) {
+				imm = 0xf0000000UL;
+				for (sh = 0; sh < 8; ++sh) {
+					if (word & (0x80000 >> sh))
+						break;
+					imm >>= 4;
+				}
+			}
+			op->val = regs->ccr & imm;
+			goto compute_done;
+
+		case 128:	/* setb */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			/*
+			 * 'ra' encodes the CR field number (bfa) in the top 3 bits.
+			 * Since each CR field is 4 bits,
+			 * we can simply mask off the bottom two bits (bfa * 4)
+			 * to yield the first bit in the CR field.
+			 */
+			ra = ra & ~0x3;
+			/* 'val' stores bits of the CR field (bfa) */
+			val = regs->ccr >> (CR0_SHIFT - ra);
+			/* checks if the LT bit of CR field (bfa) is set */
+			if (val & 8)
+				op->val = -1;
+			/* checks if the GT bit of CR field (bfa) is set */
+			else if (val & 4)
+				op->val = 1;
+			else
+				op->val = 0;
+			goto compute_done;
+
+		case 144:	/* mtcrf */
+			op->type = COMPUTE + SETCC;
+			imm = 0xf0000000UL;
+			val = regs->gpr[rd];
+			op->ccval = regs->ccr;
+			for (sh = 0; sh < 8; ++sh) {
+				if (word & (0x80000 >> sh))
+					op->ccval = (op->ccval & ~imm) |
+						(val & imm);
+				imm >>= 4;
+			}
+			return 1;
+
+		case 339:	/* mfspr */
+			spr = ((word >> 16) & 0x1f) | ((word >> 6) & 0x3e0);
+			op->type = MFSPR;
+			op->reg = rd;
+			op->spr = spr;
+			if (spr == SPRN_XER || spr == SPRN_LR ||
+			    spr == SPRN_CTR)
+				return 1;
+			return 0;
+
+		case 467:	/* mtspr */
+			spr = ((word >> 16) & 0x1f) | ((word >> 6) & 0x3e0);
+			op->type = MTSPR;
+			op->val = regs->gpr[rd];
+			op->spr = spr;
+			if (spr == SPRN_XER || spr == SPRN_LR ||
+			    spr == SPRN_CTR)
+				return 1;
+			return 0;
+
+/*
+ * Compare instructions
+ */
+		case 0:	/* cmp */
+			val = regs->gpr[ra];
+			val2 = regs->gpr[rb];
+#ifdef __powerpc64__
+			if ((rd & 1) == 0) {
+				/* word (32-bit) compare */
+				val = (int) val;
+				val2 = (int) val2;
+			}
+#endif
+			do_cmp_signed(regs, op, val, val2, rd >> 2);
+			return 1;
+
+		case 32:	/* cmpl */
+			val = regs->gpr[ra];
+			val2 = regs->gpr[rb];
+#ifdef __powerpc64__
+			if ((rd & 1) == 0) {
+				/* word (32-bit) compare */
+				val = (unsigned int) val;
+				val2 = (unsigned int) val2;
+			}
+#endif
+			do_cmp_unsigned(regs, op, val, val2, rd >> 2);
+			return 1;
+
+		case 508: /* cmpb */
+			do_cmpb(regs, op, regs->gpr[rd], regs->gpr[rb]);
+			goto logical_done_nocc;
+
+/*
+ * Arithmetic instructions
+ */
+		case 8:	/* subfc */
+			add_with_carry(regs, op, rd, ~regs->gpr[ra],
+				       regs->gpr[rb], 1);
+			goto arith_done;
+#ifdef __powerpc64__
+		case 9:	/* mulhdu */
+			asm("mulhdu %0,%1,%2" : "=r" (op->val) :
+			    "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+			goto arith_done;
+#endif
+		case 10:	/* addc */
+			add_with_carry(regs, op, rd, regs->gpr[ra],
+				       regs->gpr[rb], 0);
+			goto arith_done;
+
+		case 11:	/* mulhwu */
+			asm("mulhwu %0,%1,%2" : "=r" (op->val) :
+			    "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+			goto arith_done;
+
+		case 40:	/* subf */
+			op->val = regs->gpr[rb] - regs->gpr[ra];
+			goto arith_done;
+#ifdef __powerpc64__
+		case 73:	/* mulhd */
+			asm("mulhd %0,%1,%2" : "=r" (op->val) :
+			    "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+			goto arith_done;
+#endif
+		case 75:	/* mulhw */
+			asm("mulhw %0,%1,%2" : "=r" (op->val) :
+			    "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+			goto arith_done;
+
+		case 104:	/* neg */
+			op->val = -regs->gpr[ra];
+			goto arith_done;
+
+		case 136:	/* subfe */
+			add_with_carry(regs, op, rd, ~regs->gpr[ra],
+				       regs->gpr[rb], regs->xer & XER_CA);
+			goto arith_done;
+
+		case 138:	/* adde */
+			add_with_carry(regs, op, rd, regs->gpr[ra],
+				       regs->gpr[rb], regs->xer & XER_CA);
+			goto arith_done;
+
+		case 200:	/* subfze */
+			add_with_carry(regs, op, rd, ~regs->gpr[ra], 0L,
+				       regs->xer & XER_CA);
+			goto arith_done;
+
+		case 202:	/* addze */
+			add_with_carry(regs, op, rd, regs->gpr[ra], 0L,
+				       regs->xer & XER_CA);
+			goto arith_done;
+
+		case 232:	/* subfme */
+			add_with_carry(regs, op, rd, ~regs->gpr[ra], -1L,
+				       regs->xer & XER_CA);
+			goto arith_done;
+#ifdef __powerpc64__
+		case 233:	/* mulld */
+			op->val = regs->gpr[ra] * regs->gpr[rb];
+			goto arith_done;
+#endif
+		case 234:	/* addme */
+			add_with_carry(regs, op, rd, regs->gpr[ra], -1L,
+				       regs->xer & XER_CA);
+			goto arith_done;
+
+		case 235:	/* mullw */
+			op->val = (long)(int) regs->gpr[ra] *
+				(int) regs->gpr[rb];
+
+			goto arith_done;
+#ifdef __powerpc64__
+		case 265:	/* modud */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->val = regs->gpr[ra] % regs->gpr[rb];
+			goto compute_done;
+#endif
+		case 266:	/* add */
+			op->val = regs->gpr[ra] + regs->gpr[rb];
+			goto arith_done;
+
+		case 267:	/* moduw */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->val = (unsigned int) regs->gpr[ra] %
+				(unsigned int) regs->gpr[rb];
+			goto compute_done;
+#ifdef __powerpc64__
+		case 457:	/* divdu */
+			op->val = regs->gpr[ra] / regs->gpr[rb];
+			goto arith_done;
+#endif
+		case 459:	/* divwu */
+			op->val = (unsigned int) regs->gpr[ra] /
+				(unsigned int) regs->gpr[rb];
+			goto arith_done;
+#ifdef __powerpc64__
+		case 489:	/* divd */
+			op->val = (long int) regs->gpr[ra] /
+				(long int) regs->gpr[rb];
+			goto arith_done;
+#endif
+		case 491:	/* divw */
+			op->val = (int) regs->gpr[ra] /
+				(int) regs->gpr[rb];
+			goto arith_done;
+#ifdef __powerpc64__
+		case 425:	/* divde[.] */
+			asm volatile(PPC_DIVDE(%0, %1, %2) :
+				"=r" (op->val) : "r" (regs->gpr[ra]),
+				"r" (regs->gpr[rb]));
+			goto arith_done;
+		case 393:	/* divdeu[.] */
+			asm volatile(PPC_DIVDEU(%0, %1, %2) :
+				"=r" (op->val) : "r" (regs->gpr[ra]),
+				"r" (regs->gpr[rb]));
+			goto arith_done;
+#endif
+		case 755:	/* darn */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			switch (ra & 0x3) {
+			case 0:
+				/* 32-bit conditioned */
+				asm volatile(PPC_DARN(%0, 0) : "=r" (op->val));
+				goto compute_done;
+
+			case 1:
+				/* 64-bit conditioned */
+				asm volatile(PPC_DARN(%0, 1) : "=r" (op->val));
+				goto compute_done;
+
+			case 2:
+				/* 64-bit raw */
+				asm volatile(PPC_DARN(%0, 2) : "=r" (op->val));
+				goto compute_done;
+			}
+
+			goto unknown_opcode;
+#ifdef __powerpc64__
+		case 777:	/* modsd */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->val = (long int) regs->gpr[ra] %
+				(long int) regs->gpr[rb];
+			goto compute_done;
+#endif
+		case 779:	/* modsw */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->val = (int) regs->gpr[ra] %
+				(int) regs->gpr[rb];
+			goto compute_done;
+
+
+/*
+ * Logical instructions
+ */
+		case 26:	/* cntlzw */
+			val = (unsigned int) regs->gpr[rd];
+			op->val = ( val ? __builtin_clz(val) : 32 );
+			goto logical_done;
+#ifdef __powerpc64__
+		case 58:	/* cntlzd */
+			val = regs->gpr[rd];
+			op->val = ( val ? __builtin_clzl(val) : 64 );
+			goto logical_done;
+#endif
+		case 28:	/* and */
+			op->val = regs->gpr[rd] & regs->gpr[rb];
+			goto logical_done;
+
+		case 60:	/* andc */
+			op->val = regs->gpr[rd] & ~regs->gpr[rb];
+			goto logical_done;
+
+		case 122:	/* popcntb */
+			do_popcnt(regs, op, regs->gpr[rd], 8);
+			goto logical_done_nocc;
+
+		case 124:	/* nor */
+			op->val = ~(regs->gpr[rd] | regs->gpr[rb]);
+			goto logical_done;
+
+		case 154:	/* prtyw */
+			do_prty(regs, op, regs->gpr[rd], 32);
+			goto logical_done_nocc;
+
+		case 186:	/* prtyd */
+			do_prty(regs, op, regs->gpr[rd], 64);
+			goto logical_done_nocc;
+#ifdef CONFIG_PPC64
+		case 252:	/* bpermd */
+			do_bpermd(regs, op, regs->gpr[rd], regs->gpr[rb]);
+			goto logical_done_nocc;
+#endif
+		case 284:	/* xor */
+			op->val = ~(regs->gpr[rd] ^ regs->gpr[rb]);
+			goto logical_done;
+
+		case 316:	/* xor */
+			op->val = regs->gpr[rd] ^ regs->gpr[rb];
+			goto logical_done;
+
+		case 378:	/* popcntw */
+			do_popcnt(regs, op, regs->gpr[rd], 32);
+			goto logical_done_nocc;
+
+		case 412:	/* orc */
+			op->val = regs->gpr[rd] | ~regs->gpr[rb];
+			goto logical_done;
+
+		case 444:	/* or */
+			op->val = regs->gpr[rd] | regs->gpr[rb];
+			goto logical_done;
+
+		case 476:	/* nand */
+			op->val = ~(regs->gpr[rd] & regs->gpr[rb]);
+			goto logical_done;
+#ifdef CONFIG_PPC64
+		case 506:	/* popcntd */
+			do_popcnt(regs, op, regs->gpr[rd], 64);
+			goto logical_done_nocc;
+#endif
+		case 538:	/* cnttzw */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			val = (unsigned int) regs->gpr[rd];
+			op->val = (val ? __builtin_ctz(val) : 32);
+			goto logical_done;
+#ifdef __powerpc64__
+		case 570:	/* cnttzd */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			val = regs->gpr[rd];
+			op->val = (val ? __builtin_ctzl(val) : 64);
+			goto logical_done;
+#endif
+		case 922:	/* extsh */
+			op->val = (signed short) regs->gpr[rd];
+			goto logical_done;
+
+		case 954:	/* extsb */
+			op->val = (signed char) regs->gpr[rd];
+			goto logical_done;
+#ifdef __powerpc64__
+		case 986:	/* extsw */
+			op->val = (signed int) regs->gpr[rd];
+			goto logical_done;
+#endif
+
+/*
+ * Shift instructions
+ */
+		case 24:	/* slw */
+			sh = regs->gpr[rb] & 0x3f;
+			if (sh < 32)
+				op->val = (regs->gpr[rd] << sh) & 0xffffffffUL;
+			else
+				op->val = 0;
+			goto logical_done;
+
+		case 536:	/* srw */
+			sh = regs->gpr[rb] & 0x3f;
+			if (sh < 32)
+				op->val = (regs->gpr[rd] & 0xffffffffUL) >> sh;
+			else
+				op->val = 0;
+			goto logical_done;
+
+		case 792:	/* sraw */
+			op->type = COMPUTE + SETREG + SETXER;
+			sh = regs->gpr[rb] & 0x3f;
+			ival = (signed int) regs->gpr[rd];
+			op->val = ival >> (sh < 32 ? sh : 31);
+			op->xerval = regs->xer;
+			if (ival < 0 && (sh >= 32 || (ival & ((1ul << sh) - 1)) != 0))
+				op->xerval |= XER_CA;
+			else
+				op->xerval &= ~XER_CA;
+			set_ca32(op, op->xerval & XER_CA);
+			goto logical_done;
+
+		case 824:	/* srawi */
+			op->type = COMPUTE + SETREG + SETXER;
+			sh = rb;
+			ival = (signed int) regs->gpr[rd];
+			op->val = ival >> sh;
+			op->xerval = regs->xer;
+			if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0)
+				op->xerval |= XER_CA;
+			else
+				op->xerval &= ~XER_CA;
+			set_ca32(op, op->xerval & XER_CA);
+			goto logical_done;
+
+#ifdef __powerpc64__
+		case 27:	/* sld */
+			sh = regs->gpr[rb] & 0x7f;
+			if (sh < 64)
+				op->val = regs->gpr[rd] << sh;
+			else
+				op->val = 0;
+			goto logical_done;
+
+		case 539:	/* srd */
+			sh = regs->gpr[rb] & 0x7f;
+			if (sh < 64)
+				op->val = regs->gpr[rd] >> sh;
+			else
+				op->val = 0;
+			goto logical_done;
+
+		case 794:	/* srad */
+			op->type = COMPUTE + SETREG + SETXER;
+			sh = regs->gpr[rb] & 0x7f;
+			ival = (signed long int) regs->gpr[rd];
+			op->val = ival >> (sh < 64 ? sh : 63);
+			op->xerval = regs->xer;
+			if (ival < 0 && (sh >= 64 || (ival & ((1ul << sh) - 1)) != 0))
+				op->xerval |= XER_CA;
+			else
+				op->xerval &= ~XER_CA;
+			set_ca32(op, op->xerval & XER_CA);
+			goto logical_done;
+
+		case 826:	/* sradi with sh_5 = 0 */
+		case 827:	/* sradi with sh_5 = 1 */
+			op->type = COMPUTE + SETREG + SETXER;
+			sh = rb | ((word & 2) << 4);
+			ival = (signed long int) regs->gpr[rd];
+			op->val = ival >> sh;
+			op->xerval = regs->xer;
+			if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0)
+				op->xerval |= XER_CA;
+			else
+				op->xerval &= ~XER_CA;
+			set_ca32(op, op->xerval & XER_CA);
+			goto logical_done;
+
+		case 890:	/* extswsli with sh_5 = 0 */
+		case 891:	/* extswsli with sh_5 = 1 */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->type = COMPUTE + SETREG;
+			sh = rb | ((word & 2) << 4);
+			val = (signed int) regs->gpr[rd];
+			if (sh)
+				op->val = ROTATE(val, sh) & MASK64(0, 63 - sh);
+			else
+				op->val = val;
+			goto logical_done;
+
+#endif /* __powerpc64__ */
+
+/*
+ * Cache instructions
+ */
+		case 54:	/* dcbst */
+			op->type = MKOP(CACHEOP, DCBST, 0);
+			op->ea = xform_ea(word, regs);
+			return 0;
+
+		case 86:	/* dcbf */
+			op->type = MKOP(CACHEOP, DCBF, 0);
+			op->ea = xform_ea(word, regs);
+			return 0;
+
+		case 246:	/* dcbtst */
+			op->type = MKOP(CACHEOP, DCBTST, 0);
+			op->ea = xform_ea(word, regs);
+			op->reg = rd;
+			return 0;
+
+		case 278:	/* dcbt */
+			op->type = MKOP(CACHEOP, DCBTST, 0);
+			op->ea = xform_ea(word, regs);
+			op->reg = rd;
+			return 0;
+
+		case 982:	/* icbi */
+			op->type = MKOP(CACHEOP, ICBI, 0);
+			op->ea = xform_ea(word, regs);
+			return 0;
+
+		case 1014:	/* dcbz */
+			op->type = MKOP(CACHEOP, DCBZ, 0);
+			op->ea = xform_ea(word, regs);
+			return 0;
+		}
+		break;
+	}
+
+/*
+ * Loads and stores.
+ */
+	op->type = UNKNOWN;
+	op->update_reg = ra;
+	op->reg = rd;
+	op->val = regs->gpr[rd];
+	u = (word >> 20) & UPDATE;
+	op->vsx_flags = 0;
+
+	switch (opcode) {
+	case 31:
+		u = word & UPDATE;
+		op->ea = xform_ea(word, regs);
+		switch ((word >> 1) & 0x3ff) {
+		case 20:	/* lwarx */
+			op->type = MKOP(LARX, 0, 4);
+			break;
+
+		case 150:	/* stwcx. */
+			op->type = MKOP(STCX, 0, 4);
+			break;
+
+#ifdef CONFIG_PPC_HAS_LBARX_LHARX
+		case 52:	/* lbarx */
+			op->type = MKOP(LARX, 0, 1);
+			break;
+
+		case 694:	/* stbcx. */
+			op->type = MKOP(STCX, 0, 1);
+			break;
+
+		case 116:	/* lharx */
+			op->type = MKOP(LARX, 0, 2);
+			break;
+
+		case 726:	/* sthcx. */
+			op->type = MKOP(STCX, 0, 2);
+			break;
+#endif
+#ifdef __powerpc64__
+		case 84:	/* ldarx */
+			op->type = MKOP(LARX, 0, 8);
+			break;
+
+		case 214:	/* stdcx. */
+			op->type = MKOP(STCX, 0, 8);
+			break;
+
+		case 276:	/* lqarx */
+			if (!((rd & 1) || rd == ra || rd == rb))
+				op->type = MKOP(LARX, 0, 16);
+			break;
+
+		case 182:	/* stqcx. */
+			if (!(rd & 1))
+				op->type = MKOP(STCX, 0, 16);
+			break;
+#endif
+
+		case 23:	/* lwzx */
+		case 55:	/* lwzux */
+			op->type = MKOP(LOAD, u, 4);
+			break;
+
+		case 87:	/* lbzx */
+		case 119:	/* lbzux */
+			op->type = MKOP(LOAD, u, 1);
+			break;
+
+#ifdef CONFIG_ALTIVEC
+		/*
+		 * Note: for the load/store vector element instructions,
+		 * bits of the EA say which field of the VMX register to use.
+		 */
+		case 7:		/* lvebx */
+			op->type = MKOP(LOAD_VMX, 0, 1);
+			op->element_size = 1;
+			break;
+
+		case 39:	/* lvehx */
+			op->type = MKOP(LOAD_VMX, 0, 2);
+			op->element_size = 2;
+			break;
+
+		case 71:	/* lvewx */
+			op->type = MKOP(LOAD_VMX, 0, 4);
+			op->element_size = 4;
+			break;
+
+		case 103:	/* lvx */
+		case 359:	/* lvxl */
+			op->type = MKOP(LOAD_VMX, 0, 16);
+			op->element_size = 16;
+			break;
+
+		case 135:	/* stvebx */
+			op->type = MKOP(STORE_VMX, 0, 1);
+			op->element_size = 1;
+			break;
+
+		case 167:	/* stvehx */
+			op->type = MKOP(STORE_VMX, 0, 2);
+			op->element_size = 2;
+			break;
+
+		case 199:	/* stvewx */
+			op->type = MKOP(STORE_VMX, 0, 4);
+			op->element_size = 4;
+			break;
+
+		case 231:	/* stvx */
+		case 487:	/* stvxl */
+			op->type = MKOP(STORE_VMX, 0, 16);
+			break;
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef __powerpc64__
+		case 21:	/* ldx */
+		case 53:	/* ldux */
+			op->type = MKOP(LOAD, u, 8);
+			break;
+
+		case 149:	/* stdx */
+		case 181:	/* stdux */
+			op->type = MKOP(STORE, u, 8);
+			break;
+#endif
+
+		case 151:	/* stwx */
+		case 183:	/* stwux */
+			op->type = MKOP(STORE, u, 4);
+			break;
+
+		case 215:	/* stbx */
+		case 247:	/* stbux */
+			op->type = MKOP(STORE, u, 1);
+			break;
+
+		case 279:	/* lhzx */
+		case 311:	/* lhzux */
+			op->type = MKOP(LOAD, u, 2);
+			break;
+
+#ifdef __powerpc64__
+		case 341:	/* lwax */
+		case 373:	/* lwaux */
+			op->type = MKOP(LOAD, SIGNEXT | u, 4);
+			break;
+#endif
+
+		case 343:	/* lhax */
+		case 375:	/* lhaux */
+			op->type = MKOP(LOAD, SIGNEXT | u, 2);
+			break;
+
+		case 407:	/* sthx */
+		case 439:	/* sthux */
+			op->type = MKOP(STORE, u, 2);
+			break;
+
+#ifdef __powerpc64__
+		case 532:	/* ldbrx */
+			op->type = MKOP(LOAD, BYTEREV, 8);
+			break;
+
+#endif
+		case 533:	/* lswx */
+			op->type = MKOP(LOAD_MULTI, 0, regs->xer & 0x7f);
+			break;
+
+		case 534:	/* lwbrx */
+			op->type = MKOP(LOAD, BYTEREV, 4);
+			break;
+
+		case 597:	/* lswi */
+			if (rb == 0)
+				rb = 32;	/* # bytes to load */
+			op->type = MKOP(LOAD_MULTI, 0, rb);
+			op->ea = ra ? regs->gpr[ra] : 0;
+			break;
+
+#ifdef CONFIG_PPC_FPU
+		case 535:	/* lfsx */
+		case 567:	/* lfsux */
+			op->type = MKOP(LOAD_FP, u | FPCONV, 4);
+			break;
+
+		case 599:	/* lfdx */
+		case 631:	/* lfdux */
+			op->type = MKOP(LOAD_FP, u, 8);
+			break;
+
+		case 663:	/* stfsx */
+		case 695:	/* stfsux */
+			op->type = MKOP(STORE_FP, u | FPCONV, 4);
+			break;
+
+		case 727:	/* stfdx */
+		case 759:	/* stfdux */
+			op->type = MKOP(STORE_FP, u, 8);
+			break;
+
+#ifdef __powerpc64__
+		case 791:	/* lfdpx */
+			op->type = MKOP(LOAD_FP, 0, 16);
+			break;
+
+		case 855:	/* lfiwax */
+			op->type = MKOP(LOAD_FP, SIGNEXT, 4);
+			break;
+
+		case 887:	/* lfiwzx */
+			op->type = MKOP(LOAD_FP, 0, 4);
+			break;
+
+		case 919:	/* stfdpx */
+			op->type = MKOP(STORE_FP, 0, 16);
+			break;
+
+		case 983:	/* stfiwx */
+			op->type = MKOP(STORE_FP, 0, 4);
+			break;
+#endif /* __powerpc64 */
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef __powerpc64__
+		case 660:	/* stdbrx */
+			op->type = MKOP(STORE, BYTEREV, 8);
+			op->val = byterev_8(regs->gpr[rd]);
+			break;
+
+#endif
+		case 661:	/* stswx */
+			op->type = MKOP(STORE_MULTI, 0, regs->xer & 0x7f);
+			break;
+
+		case 662:	/* stwbrx */
+			op->type = MKOP(STORE, BYTEREV, 4);
+			op->val = byterev_4(regs->gpr[rd]);
+			break;
+
+		case 725:	/* stswi */
+			if (rb == 0)
+				rb = 32;	/* # bytes to store */
+			op->type = MKOP(STORE_MULTI, 0, rb);
+			op->ea = ra ? regs->gpr[ra] : 0;
+			break;
+
+		case 790:	/* lhbrx */
+			op->type = MKOP(LOAD, BYTEREV, 2);
+			break;
+
+		case 918:	/* sthbrx */
+			op->type = MKOP(STORE, BYTEREV, 2);
+			op->val = byterev_2(regs->gpr[rd]);
+			break;
+
+#ifdef CONFIG_VSX
+		case 12:	/* lxsiwzx */
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 4);
+			op->element_size = 8;
+			break;
+
+		case 76:	/* lxsiwax */
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(LOAD_VSX, SIGNEXT, 4);
+			op->element_size = 8;
+			break;
+
+		case 140:	/* stxsiwx */
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 4);
+			op->element_size = 8;
+			break;
+
+		case 268:	/* lxvx */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 16);
+			op->element_size = 16;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 269:	/* lxvl */
+		case 301: {	/* lxvll */
+			int nb;
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->reg = rd | ((word & 1) << 5);
+			op->ea = ra ? regs->gpr[ra] : 0;
+			nb = regs->gpr[rb] & 0xff;
+			if (nb > 16)
+				nb = 16;
+			op->type = MKOP(LOAD_VSX, 0, nb);
+			op->element_size = 16;
+			op->vsx_flags = ((word & 0x20) ? VSX_LDLEFT : 0) |
+				VSX_CHECK_VEC;
+			break;
+		}
+		case 332:	/* lxvdsx */
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 8);
+			op->element_size = 8;
+			op->vsx_flags = VSX_SPLAT;
+			break;
+
+		case 333:       /* lxvpx */
+			if (!cpu_has_feature(CPU_FTR_ARCH_31))
+				goto unknown_opcode;
+			op->reg = VSX_REGISTER_XTP(rd);
+			op->type = MKOP(LOAD_VSX, 0, 32);
+			op->element_size = 32;
+			break;
+
+		case 364:	/* lxvwsx */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 4);
+			op->element_size = 4;
+			op->vsx_flags = VSX_SPLAT | VSX_CHECK_VEC;
+			break;
+
+		case 396:	/* stxvx */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 16);
+			op->element_size = 16;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 397:	/* stxvl */
+		case 429: {	/* stxvll */
+			int nb;
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->reg = rd | ((word & 1) << 5);
+			op->ea = ra ? regs->gpr[ra] : 0;
+			nb = regs->gpr[rb] & 0xff;
+			if (nb > 16)
+				nb = 16;
+			op->type = MKOP(STORE_VSX, 0, nb);
+			op->element_size = 16;
+			op->vsx_flags = ((word & 0x20) ? VSX_LDLEFT : 0) |
+				VSX_CHECK_VEC;
+			break;
+		}
+		case 461:       /* stxvpx */
+			if (!cpu_has_feature(CPU_FTR_ARCH_31))
+				goto unknown_opcode;
+			op->reg = VSX_REGISTER_XTP(rd);
+			op->type = MKOP(STORE_VSX, 0, 32);
+			op->element_size = 32;
+			break;
+		case 524:	/* lxsspx */
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 4);
+			op->element_size = 8;
+			op->vsx_flags = VSX_FPCONV;
+			break;
+
+		case 588:	/* lxsdx */
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 8);
+			op->element_size = 8;
+			break;
+
+		case 652:	/* stxsspx */
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 4);
+			op->element_size = 8;
+			op->vsx_flags = VSX_FPCONV;
+			break;
+
+		case 716:	/* stxsdx */
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 8);
+			op->element_size = 8;
+			break;
+
+		case 780:	/* lxvw4x */
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 16);
+			op->element_size = 4;
+			break;
+
+		case 781:	/* lxsibzx */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 1);
+			op->element_size = 8;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 812:	/* lxvh8x */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 16);
+			op->element_size = 2;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 813:	/* lxsihzx */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 2);
+			op->element_size = 8;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 844:	/* lxvd2x */
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 16);
+			op->element_size = 8;
+			break;
+
+		case 876:	/* lxvb16x */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(LOAD_VSX, 0, 16);
+			op->element_size = 1;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 908:	/* stxvw4x */
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 16);
+			op->element_size = 4;
+			break;
+
+		case 909:	/* stxsibx */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 1);
+			op->element_size = 8;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 940:	/* stxvh8x */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 16);
+			op->element_size = 2;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 941:	/* stxsihx */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 2);
+			op->element_size = 8;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 972:	/* stxvd2x */
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 16);
+			op->element_size = 8;
+			break;
+
+		case 1004:	/* stxvb16x */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->reg = rd | ((word & 1) << 5);
+			op->type = MKOP(STORE_VSX, 0, 16);
+			op->element_size = 1;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+#endif /* CONFIG_VSX */
+		}
+		break;
+
+	case 32:	/* lwz */
+	case 33:	/* lwzu */
+		op->type = MKOP(LOAD, u, 4);
+		op->ea = dform_ea(word, regs);
+		break;
+
+	case 34:	/* lbz */
+	case 35:	/* lbzu */
+		op->type = MKOP(LOAD, u, 1);
+		op->ea = dform_ea(word, regs);
+		break;
+
+	case 36:	/* stw */
+	case 37:	/* stwu */
+		op->type = MKOP(STORE, u, 4);
+		op->ea = dform_ea(word, regs);
+		break;
+
+	case 38:	/* stb */
+	case 39:	/* stbu */
+		op->type = MKOP(STORE, u, 1);
+		op->ea = dform_ea(word, regs);
+		break;
+
+	case 40:	/* lhz */
+	case 41:	/* lhzu */
+		op->type = MKOP(LOAD, u, 2);
+		op->ea = dform_ea(word, regs);
+		break;
+
+	case 42:	/* lha */
+	case 43:	/* lhau */
+		op->type = MKOP(LOAD, SIGNEXT | u, 2);
+		op->ea = dform_ea(word, regs);
+		break;
+
+	case 44:	/* sth */
+	case 45:	/* sthu */
+		op->type = MKOP(STORE, u, 2);
+		op->ea = dform_ea(word, regs);
+		break;
+
+	case 46:	/* lmw */
+		if (ra >= rd)
+			break;		/* invalid form, ra in range to load */
+		op->type = MKOP(LOAD_MULTI, 0, 4 * (32 - rd));
+		op->ea = dform_ea(word, regs);
+		break;
+
+	case 47:	/* stmw */
+		op->type = MKOP(STORE_MULTI, 0, 4 * (32 - rd));
+		op->ea = dform_ea(word, regs);
+		break;
+
+#ifdef CONFIG_PPC_FPU
+	case 48:	/* lfs */
+	case 49:	/* lfsu */
+		op->type = MKOP(LOAD_FP, u | FPCONV, 4);
+		op->ea = dform_ea(word, regs);
+		break;
+
+	case 50:	/* lfd */
+	case 51:	/* lfdu */
+		op->type = MKOP(LOAD_FP, u, 8);
+		op->ea = dform_ea(word, regs);
+		break;
+
+	case 52:	/* stfs */
+	case 53:	/* stfsu */
+		op->type = MKOP(STORE_FP, u | FPCONV, 4);
+		op->ea = dform_ea(word, regs);
+		break;
+
+	case 54:	/* stfd */
+	case 55:	/* stfdu */
+		op->type = MKOP(STORE_FP, u, 8);
+		op->ea = dform_ea(word, regs);
+		break;
+#endif
+
+#ifdef __powerpc64__
+	case 56:	/* lq */
+		if (!((rd & 1) || (rd == ra)))
+			op->type = MKOP(LOAD, 0, 16);
+		op->ea = dqform_ea(word, regs);
+		break;
+#endif
+
+#ifdef CONFIG_VSX
+	case 57:	/* lfdp, lxsd, lxssp */
+		op->ea = dsform_ea(word, regs);
+		switch (word & 3) {
+		case 0:		/* lfdp */
+			if (rd & 1)
+				break;		/* reg must be even */
+			op->type = MKOP(LOAD_FP, 0, 16);
+			break;
+		case 2:		/* lxsd */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->reg = rd + 32;
+			op->type = MKOP(LOAD_VSX, 0, 8);
+			op->element_size = 8;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+		case 3:		/* lxssp */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->reg = rd + 32;
+			op->type = MKOP(LOAD_VSX, 0, 4);
+			op->element_size = 8;
+			op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
+			break;
+		}
+		break;
+#endif /* CONFIG_VSX */
+
+#ifdef __powerpc64__
+	case 58:	/* ld[u], lwa */
+		op->ea = dsform_ea(word, regs);
+		switch (word & 3) {
+		case 0:		/* ld */
+			op->type = MKOP(LOAD, 0, 8);
+			break;
+		case 1:		/* ldu */
+			op->type = MKOP(LOAD, UPDATE, 8);
+			break;
+		case 2:		/* lwa */
+			op->type = MKOP(LOAD, SIGNEXT, 4);
+			break;
+		}
+		break;
+#endif
+
+#ifdef CONFIG_VSX
+	case 6:
+		if (!cpu_has_feature(CPU_FTR_ARCH_31))
+			goto unknown_opcode;
+		op->ea = dqform_ea(word, regs);
+		op->reg = VSX_REGISTER_XTP(rd);
+		op->element_size = 32;
+		switch (word & 0xf) {
+		case 0:         /* lxvp */
+			op->type = MKOP(LOAD_VSX, 0, 32);
+			break;
+		case 1:         /* stxvp */
+			op->type = MKOP(STORE_VSX, 0, 32);
+			break;
+		}
+		break;
+
+	case 61:	/* stfdp, lxv, stxsd, stxssp, stxv */
+		switch (word & 7) {
+		case 0:		/* stfdp with LSB of DS field = 0 */
+		case 4:		/* stfdp with LSB of DS field = 1 */
+			op->ea = dsform_ea(word, regs);
+			op->type = MKOP(STORE_FP, 0, 16);
+			break;
+
+		case 1:		/* lxv */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->ea = dqform_ea(word, regs);
+			if (word & 8)
+				op->reg = rd + 32;
+			op->type = MKOP(LOAD_VSX, 0, 16);
+			op->element_size = 16;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 2:		/* stxsd with LSB of DS field = 0 */
+		case 6:		/* stxsd with LSB of DS field = 1 */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->ea = dsform_ea(word, regs);
+			op->reg = rd + 32;
+			op->type = MKOP(STORE_VSX, 0, 8);
+			op->element_size = 8;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+
+		case 3:		/* stxssp with LSB of DS field = 0 */
+		case 7:		/* stxssp with LSB of DS field = 1 */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->ea = dsform_ea(word, regs);
+			op->reg = rd + 32;
+			op->type = MKOP(STORE_VSX, 0, 4);
+			op->element_size = 8;
+			op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
+			break;
+
+		case 5:		/* stxv */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300))
+				goto unknown_opcode;
+			op->ea = dqform_ea(word, regs);
+			if (word & 8)
+				op->reg = rd + 32;
+			op->type = MKOP(STORE_VSX, 0, 16);
+			op->element_size = 16;
+			op->vsx_flags = VSX_CHECK_VEC;
+			break;
+		}
+		break;
+#endif /* CONFIG_VSX */
+
+#ifdef __powerpc64__
+	case 62:	/* std[u] */
+		op->ea = dsform_ea(word, regs);
+		switch (word & 3) {
+		case 0:		/* std */
+			op->type = MKOP(STORE, 0, 8);
+			break;
+		case 1:		/* stdu */
+			op->type = MKOP(STORE, UPDATE, 8);
+			break;
+		case 2:		/* stq */
+			if (!(rd & 1))
+				op->type = MKOP(STORE, 0, 16);
+			break;
+		}
+		break;
+	case 1: /* Prefixed instructions */
+		if (!cpu_has_feature(CPU_FTR_ARCH_31))
+			goto unknown_opcode;
+
+		prefix_r = GET_PREFIX_R(word);
+		ra = GET_PREFIX_RA(suffix);
+		op->update_reg = ra;
+		rd = (suffix >> 21) & 0x1f;
+		op->reg = rd;
+		op->val = regs->gpr[rd];
+
+		suffixopcode = get_op(suffix);
+		prefixtype = (word >> 24) & 0x3;
+		switch (prefixtype) {
+		case 0: /* Type 00  Eight-Byte Load/Store */
+			if (prefix_r && ra)
+				break;
+			op->ea = mlsd_8lsd_ea(word, suffix, regs);
+			switch (suffixopcode) {
+			case 41:	/* plwa */
+				op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 4);
+				break;
+#ifdef CONFIG_VSX
+			case 42:        /* plxsd */
+				op->reg = rd + 32;
+				op->type = MKOP(LOAD_VSX, PREFIXED, 8);
+				op->element_size = 8;
+				op->vsx_flags = VSX_CHECK_VEC;
+				break;
+			case 43:	/* plxssp */
+				op->reg = rd + 32;
+				op->type = MKOP(LOAD_VSX, PREFIXED, 4);
+				op->element_size = 8;
+				op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
+				break;
+			case 46:	/* pstxsd */
+				op->reg = rd + 32;
+				op->type = MKOP(STORE_VSX, PREFIXED, 8);
+				op->element_size = 8;
+				op->vsx_flags = VSX_CHECK_VEC;
+				break;
+			case 47:	/* pstxssp */
+				op->reg = rd + 32;
+				op->type = MKOP(STORE_VSX, PREFIXED, 4);
+				op->element_size = 8;
+				op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
+				break;
+			case 51:	/* plxv1 */
+				op->reg += 32;
+				fallthrough;
+			case 50:	/* plxv0 */
+				op->type = MKOP(LOAD_VSX, PREFIXED, 16);
+				op->element_size = 16;
+				op->vsx_flags = VSX_CHECK_VEC;
+				break;
+			case 55:	/* pstxv1 */
+				op->reg = rd + 32;
+				fallthrough;
+			case 54:	/* pstxv0 */
+				op->type = MKOP(STORE_VSX, PREFIXED, 16);
+				op->element_size = 16;
+				op->vsx_flags = VSX_CHECK_VEC;
+				break;
+#endif /* CONFIG_VSX */
+			case 56:        /* plq */
+				op->type = MKOP(LOAD, PREFIXED, 16);
+				break;
+			case 57:	/* pld */
+				op->type = MKOP(LOAD, PREFIXED, 8);
+				break;
+#ifdef CONFIG_VSX
+			case 58:        /* plxvp */
+				op->reg = VSX_REGISTER_XTP(rd);
+				op->type = MKOP(LOAD_VSX, PREFIXED, 32);
+				op->element_size = 32;
+				break;
+#endif /* CONFIG_VSX */
+			case 60:        /* pstq */
+				op->type = MKOP(STORE, PREFIXED, 16);
+				break;
+			case 61:	/* pstd */
+				op->type = MKOP(STORE, PREFIXED, 8);
+				break;
+#ifdef CONFIG_VSX
+			case 62:        /* pstxvp */
+				op->reg = VSX_REGISTER_XTP(rd);
+				op->type = MKOP(STORE_VSX, PREFIXED, 32);
+				op->element_size = 32;
+				break;
+#endif /* CONFIG_VSX */
+			}
+			break;
+		case 1: /* Type 01 Eight-Byte Register-to-Register */
+			break;
+		case 2: /* Type 10 Modified Load/Store */
+			if (prefix_r && ra)
+				break;
+			op->ea = mlsd_8lsd_ea(word, suffix, regs);
+			switch (suffixopcode) {
+			case 32:	/* plwz */
+				op->type = MKOP(LOAD, PREFIXED, 4);
+				break;
+			case 34:	/* plbz */
+				op->type = MKOP(LOAD, PREFIXED, 1);
+				break;
+			case 36:	/* pstw */
+				op->type = MKOP(STORE, PREFIXED, 4);
+				break;
+			case 38:	/* pstb */
+				op->type = MKOP(STORE, PREFIXED, 1);
+				break;
+			case 40:	/* plhz */
+				op->type = MKOP(LOAD, PREFIXED, 2);
+				break;
+			case 42:	/* plha */
+				op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 2);
+				break;
+			case 44:	/* psth */
+				op->type = MKOP(STORE, PREFIXED, 2);
+				break;
+			case 48:        /* plfs */
+				op->type = MKOP(LOAD_FP, PREFIXED | FPCONV, 4);
+				break;
+			case 50:        /* plfd */
+				op->type = MKOP(LOAD_FP, PREFIXED, 8);
+				break;
+			case 52:        /* pstfs */
+				op->type = MKOP(STORE_FP, PREFIXED | FPCONV, 4);
+				break;
+			case 54:        /* pstfd */
+				op->type = MKOP(STORE_FP, PREFIXED, 8);
+				break;
+			}
+			break;
+		case 3: /* Type 11 Modified Register-to-Register */
+			break;
+		}
+#endif /* __powerpc64__ */
+
+	}
+
+	if (OP_IS_LOAD_STORE(op->type) && (op->type & UPDATE)) {
+		switch (GETTYPE(op->type)) {
+		case LOAD:
+			if (ra == rd)
+				goto unknown_opcode;
+			fallthrough;
+		case STORE:
+		case LOAD_FP:
+		case STORE_FP:
+			if (ra == 0)
+				goto unknown_opcode;
+		}
+	}
+
+#ifdef CONFIG_VSX
+	if ((GETTYPE(op->type) == LOAD_VSX ||
+	     GETTYPE(op->type) == STORE_VSX) &&
+	    !cpu_has_feature(CPU_FTR_VSX)) {
+		return -1;
+	}
+#endif /* CONFIG_VSX */
+
+	return 0;
+
+ unknown_opcode:
+	op->type = UNKNOWN;
+	return 0;
+
+ logical_done:
+	if (word & 1)
+		set_cr0(regs, op);
+ logical_done_nocc:
+	op->reg = ra;
+	op->type |= SETREG;
+	return 1;
+
+ arith_done:
+	if (word & 1)
+		set_cr0(regs, op);
+ compute_done:
+	op->reg = rd;
+	op->type |= SETREG;
+	return 1;
+
+ priv:
+	op->type = INTERRUPT | 0x700;
+	op->val = SRR1_PROGPRIV;
+	return 0;
+
+ trap:
+	op->type = INTERRUPT | 0x700;
+	op->val = SRR1_PROGTRAP;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(analyse_instr);
+NOKPROBE_SYMBOL(analyse_instr);
+
+/*
+ * For PPC32 we always use stwu with r1 to change the stack pointer.
+ * So this emulated store may corrupt the exception frame, now we
+ * have to provide the exception frame trampoline, which is pushed
+ * below the kprobed function stack. So we only update gpr[1] but
+ * don't emulate the real store operation. We will do real store
+ * operation safely in exception return code by checking this flag.
+ */
+static nokprobe_inline int handle_stack_update(unsigned long ea, struct pt_regs *regs)
+{
+	/*
+	 * Check if we already set since that means we'll
+	 * lose the previous value.
+	 */
+	WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE));
+	set_thread_flag(TIF_EMULATE_STACK_STORE);
+	return 0;
+}
+
+static nokprobe_inline void do_signext(unsigned long *valp, int size)
+{
+	switch (size) {
+	case 2:
+		*valp = (signed short) *valp;
+		break;
+	case 4:
+		*valp = (signed int) *valp;
+		break;
+	}
+}
+
+static nokprobe_inline void do_byterev(unsigned long *valp, int size)
+{
+	switch (size) {
+	case 2:
+		*valp = byterev_2(*valp);
+		break;
+	case 4:
+		*valp = byterev_4(*valp);
+		break;
+#ifdef __powerpc64__
+	case 8:
+		*valp = byterev_8(*valp);
+		break;
+#endif
+	}
+}
+
+/*
+ * Emulate an instruction that can be executed just by updating
+ * fields in *regs.
+ */
+void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op)
+{
+	unsigned long next_pc;
+
+	next_pc = truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op->type));
+	switch (GETTYPE(op->type)) {
+	case COMPUTE:
+		if (op->type & SETREG)
+			regs->gpr[op->reg] = op->val;
+		if (op->type & SETCC)
+			regs->ccr = op->ccval;
+		if (op->type & SETXER)
+			regs->xer = op->xerval;
+		break;
+
+	case BRANCH:
+		if (op->type & SETLK)
+			regs->link = next_pc;
+		if (op->type & BRTAKEN)
+			next_pc = op->val;
+		if (op->type & DECCTR)
+			--regs->ctr;
+		break;
+
+	case BARRIER:
+		switch (op->type & BARRIER_MASK) {
+		case BARRIER_SYNC:
+			mb();
+			break;
+		case BARRIER_ISYNC:
+			isync();
+			break;
+		case BARRIER_EIEIO:
+			eieio();
+			break;
+#ifdef CONFIG_PPC64
+		case BARRIER_LWSYNC:
+			asm volatile("lwsync" : : : "memory");
+			break;
+		case BARRIER_PTESYNC:
+			asm volatile("ptesync" : : : "memory");
+			break;
+#endif
+		}
+		break;
+
+	case MFSPR:
+		switch (op->spr) {
+		case SPRN_XER:
+			regs->gpr[op->reg] = regs->xer & 0xffffffffUL;
+			break;
+		case SPRN_LR:
+			regs->gpr[op->reg] = regs->link;
+			break;
+		case SPRN_CTR:
+			regs->gpr[op->reg] = regs->ctr;
+			break;
+		default:
+			WARN_ON_ONCE(1);
+		}
+		break;
+
+	case MTSPR:
+		switch (op->spr) {
+		case SPRN_XER:
+			regs->xer = op->val & 0xffffffffUL;
+			break;
+		case SPRN_LR:
+			regs->link = op->val;
+			break;
+		case SPRN_CTR:
+			regs->ctr = op->val;
+			break;
+		default:
+			WARN_ON_ONCE(1);
+		}
+		break;
+
+	default:
+		WARN_ON_ONCE(1);
+	}
+	regs_set_return_ip(regs, next_pc);
+}
+NOKPROBE_SYMBOL(emulate_update_regs);
+
+/*
+ * Emulate a previously-analysed load or store instruction.
+ * Return values are:
+ * 0 = instruction emulated successfully
+ * -EFAULT = address out of range or access faulted (regs->dar
+ *	     contains the faulting address)
+ * -EACCES = misaligned access, instruction requires alignment
+ * -EINVAL = unknown operation in *op
+ */
+int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
+{
+	int err, size, type;
+	int i, rd, nb;
+	unsigned int cr;
+	unsigned long val;
+	unsigned long ea;
+	bool cross_endian;
+
+	err = 0;
+	size = GETSIZE(op->type);
+	type = GETTYPE(op->type);
+	cross_endian = (regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
+	ea = truncate_if_32bit(regs->msr, op->ea);
+
+	switch (type) {
+	case LARX:
+		if (ea & (size - 1))
+			return -EACCES;		/* can't handle misaligned */
+		if (!address_ok(regs, ea, size))
+			return -EFAULT;
+		err = 0;
+		val = 0;
+		switch (size) {
+#ifdef CONFIG_PPC_HAS_LBARX_LHARX
+		case 1:
+			__get_user_asmx(val, ea, err, "lbarx");
+			break;
+		case 2:
+			__get_user_asmx(val, ea, err, "lharx");
+			break;
+#endif
+		case 4:
+			__get_user_asmx(val, ea, err, "lwarx");
+			break;
+#ifdef __powerpc64__
+		case 8:
+			__get_user_asmx(val, ea, err, "ldarx");
+			break;
+		case 16:
+			err = do_lqarx(ea, &regs->gpr[op->reg]);
+			break;
+#endif
+		default:
+			return -EINVAL;
+		}
+		if (err) {
+			regs->dar = ea;
+			break;
+		}
+		if (size < 16)
+			regs->gpr[op->reg] = val;
+		break;
+
+	case STCX:
+		if (ea & (size - 1))
+			return -EACCES;		/* can't handle misaligned */
+		if (!address_ok(regs, ea, size))
+			return -EFAULT;
+		err = 0;
+		switch (size) {
+#ifdef __powerpc64__
+		case 1:
+			__put_user_asmx(op->val, ea, err, "stbcx.", cr);
+			break;
+		case 2:
+			__put_user_asmx(op->val, ea, err, "sthcx.", cr);
+			break;
+#endif
+		case 4:
+			__put_user_asmx(op->val, ea, err, "stwcx.", cr);
+			break;
+#ifdef __powerpc64__
+		case 8:
+			__put_user_asmx(op->val, ea, err, "stdcx.", cr);
+			break;
+		case 16:
+			err = do_stqcx(ea, regs->gpr[op->reg],
+				       regs->gpr[op->reg + 1], &cr);
+			break;
+#endif
+		default:
+			return -EINVAL;
+		}
+		if (!err)
+			regs->ccr = (regs->ccr & 0x0fffffff) |
+				(cr & 0xe0000000) |
+				((regs->xer >> 3) & 0x10000000);
+		else
+			regs->dar = ea;
+		break;
+
+	case LOAD:
+#ifdef __powerpc64__
+		if (size == 16) {
+			err = emulate_lq(regs, ea, op->reg, cross_endian);
+			break;
+		}
+#endif
+		err = read_mem(&regs->gpr[op->reg], ea, size, regs);
+		if (!err) {
+			if (op->type & SIGNEXT)
+				do_signext(&regs->gpr[op->reg], size);
+			if ((op->type & BYTEREV) == (cross_endian ? 0 : BYTEREV))
+				do_byterev(&regs->gpr[op->reg], size);
+		}
+		break;
+
+#ifdef CONFIG_PPC_FPU
+	case LOAD_FP:
+		/*
+		 * If the instruction is in userspace, we can emulate it even
+		 * if the VMX state is not live, because we have the state
+		 * stored in the thread_struct.  If the instruction is in
+		 * the kernel, we must not touch the state in the thread_struct.
+		 */
+		if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP))
+			return 0;
+		err = do_fp_load(op, ea, regs, cross_endian);
+		break;
+#endif
+#ifdef CONFIG_ALTIVEC
+	case LOAD_VMX:
+		if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC))
+			return 0;
+		err = do_vec_load(op->reg, ea, size, regs, cross_endian);
+		break;
+#endif
+#ifdef CONFIG_VSX
+	case LOAD_VSX: {
+		unsigned long msrbit = MSR_VSX;
+
+		/*
+		 * Some VSX instructions check the MSR_VEC bit rather than MSR_VSX
+		 * when the target of the instruction is a vector register.
+		 */
+		if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC))
+			msrbit = MSR_VEC;
+		if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit))
+			return 0;
+		err = do_vsx_load(op, ea, regs, cross_endian);
+		break;
+	}
+#endif
+	case LOAD_MULTI:
+		if (!address_ok(regs, ea, size))
+			return -EFAULT;
+		rd = op->reg;
+		for (i = 0; i < size; i += 4) {
+			unsigned int v32 = 0;
+
+			nb = size - i;
+			if (nb > 4)
+				nb = 4;
+			err = copy_mem_in((u8 *) &v32, ea, nb, regs);
+			if (err)
+				break;
+			if (unlikely(cross_endian))
+				v32 = byterev_4(v32);
+			regs->gpr[rd] = v32;
+			ea += 4;
+			/* reg number wraps from 31 to 0 for lsw[ix] */
+			rd = (rd + 1) & 0x1f;
+		}
+		break;
+
+	case STORE:
+#ifdef __powerpc64__
+		if (size == 16) {
+			err = emulate_stq(regs, ea, op->reg, cross_endian);
+			break;
+		}
+#endif
+		if ((op->type & UPDATE) && size == sizeof(long) &&
+		    op->reg == 1 && op->update_reg == 1 &&
+		    !(regs->msr & MSR_PR) &&
+		    ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) {
+			err = handle_stack_update(ea, regs);
+			break;
+		}
+		if (unlikely(cross_endian))
+			do_byterev(&op->val, size);
+		err = write_mem(op->val, ea, size, regs);
+		break;
+
+#ifdef CONFIG_PPC_FPU
+	case STORE_FP:
+		if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP))
+			return 0;
+		err = do_fp_store(op, ea, regs, cross_endian);
+		break;
+#endif
+#ifdef CONFIG_ALTIVEC
+	case STORE_VMX:
+		if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC))
+			return 0;
+		err = do_vec_store(op->reg, ea, size, regs, cross_endian);
+		break;
+#endif
+#ifdef CONFIG_VSX
+	case STORE_VSX: {
+		unsigned long msrbit = MSR_VSX;
+
+		/*
+		 * Some VSX instructions check the MSR_VEC bit rather than MSR_VSX
+		 * when the target of the instruction is a vector register.
+		 */
+		if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC))
+			msrbit = MSR_VEC;
+		if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit))
+			return 0;
+		err = do_vsx_store(op, ea, regs, cross_endian);
+		break;
+	}
+#endif
+	case STORE_MULTI:
+		if (!address_ok(regs, ea, size))
+			return -EFAULT;
+		rd = op->reg;
+		for (i = 0; i < size; i += 4) {
+			unsigned int v32 = regs->gpr[rd];
+
+			nb = size - i;
+			if (nb > 4)
+				nb = 4;
+			if (unlikely(cross_endian))
+				v32 = byterev_4(v32);
+			err = copy_mem_out((u8 *) &v32, ea, nb, regs);
+			if (err)
+				break;
+			ea += 4;
+			/* reg number wraps from 31 to 0 for stsw[ix] */
+			rd = (rd + 1) & 0x1f;
+		}
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (err)
+		return err;
+
+	if (op->type & UPDATE)
+		regs->gpr[op->update_reg] = op->ea;
+
+	return 0;
+}
+NOKPROBE_SYMBOL(emulate_loadstore);
+
+/*
+ * Emulate instructions that cause a transfer of control,
+ * loads and stores, and a few other instructions.
+ * Returns 1 if the step was emulated, 0 if not,
+ * or -1 if the instruction is one that should not be stepped,
+ * such as an rfid, or a mtmsrd that would clear MSR_RI.
+ */
+int emulate_step(struct pt_regs *regs, ppc_inst_t instr)
+{
+	struct instruction_op op;
+	int r, err, type;
+	unsigned long val;
+	unsigned long ea;
+
+	r = analyse_instr(&op, regs, instr);
+	if (r < 0)
+		return r;
+	if (r > 0) {
+		emulate_update_regs(regs, &op);
+		return 1;
+	}
+
+	err = 0;
+	type = GETTYPE(op.type);
+
+	if (OP_IS_LOAD_STORE(type)) {
+		err = emulate_loadstore(regs, &op);
+		if (err)
+			return 0;
+		goto instr_done;
+	}
+
+	switch (type) {
+	case CACHEOP:
+		ea = truncate_if_32bit(regs->msr, op.ea);
+		if (!address_ok(regs, ea, 8))
+			return 0;
+		switch (op.type & CACHEOP_MASK) {
+		case DCBST:
+			__cacheop_user_asmx(ea, err, "dcbst");
+			break;
+		case DCBF:
+			__cacheop_user_asmx(ea, err, "dcbf");
+			break;
+		case DCBTST:
+			if (op.reg == 0)
+				prefetchw((void *) ea);
+			break;
+		case DCBT:
+			if (op.reg == 0)
+				prefetch((void *) ea);
+			break;
+		case ICBI:
+			__cacheop_user_asmx(ea, err, "icbi");
+			break;
+		case DCBZ:
+			err = emulate_dcbz(ea, regs);
+			break;
+		}
+		if (err) {
+			regs->dar = ea;
+			return 0;
+		}
+		goto instr_done;
+
+	case MFMSR:
+		regs->gpr[op.reg] = regs->msr & MSR_MASK;
+		goto instr_done;
+
+	case MTMSR:
+		val = regs->gpr[op.reg];
+		if ((val & MSR_RI) == 0)
+			/* can't step mtmsr[d] that would clear MSR_RI */
+			return -1;
+		/* here op.val is the mask of bits to change */
+		regs_set_return_msr(regs, (regs->msr & ~op.val) | (val & op.val));
+		goto instr_done;
+
+	case SYSCALL:	/* sc */
+		/*
+		 * Per ISA v3.1, section 7.5.15 'Trace Interrupt', we can't
+		 * single step a system call instruction:
+		 *
+		 *   Successful completion for an instruction means that the
+		 *   instruction caused no other interrupt. Thus a Trace
+		 *   interrupt never occurs for a System Call or System Call
+		 *   Vectored instruction, or for a Trap instruction that
+		 *   traps.
+		 */
+		return -1;
+	case SYSCALL_VECTORED_0:	/* scv 0 */
+		return -1;
+	case RFI:
+		return -1;
+	}
+	return 0;
+
+ instr_done:
+	regs_set_return_ip(regs,
+		truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op.type)));
+	return 1;
+}
+NOKPROBE_SYMBOL(emulate_step);
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
new file mode 100644
index 0000000000..daa72061dc
--- /dev/null
+++ b/arch/powerpc/lib/string.S
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * String handling functions for PowerPC.
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ */
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/cache.h>
+
+	.text
+	
+/* This clears out any unused part of the destination buffer,
+   just as the libc version does.  -- paulus */
+_GLOBAL(strncpy)
+	PPC_LCMPI 0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r3,-1
+	addi	r4,r4,-1
+	.balign IFETCH_ALIGN_BYTES
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r6)
+	bdnzf	2,1b		/* dec ctr, branch if ctr != 0 && !cr0.eq */
+	bnelr			/* if we didn't hit a null char, we're done */
+	mfctr	r5
+	PPC_LCMPI 0,r5,0	/* any space left in destination buffer? */
+	beqlr			/* we know r0 == 0 here */
+2:	stbu	r0,1(r6)	/* clear it out if so */
+	bdnz	2b
+	blr
+EXPORT_SYMBOL(strncpy)
+
+_GLOBAL(strncmp)
+	PPC_LCMPI 0,r5,0
+	beq-	2f
+	mtctr	r5
+	addi	r5,r3,-1
+	addi	r4,r4,-1
+	.balign IFETCH_ALIGN_BYTES
+1:	lbzu	r3,1(r5)
+	cmpwi	1,r3,0
+	lbzu	r0,1(r4)
+	subf.	r3,r0,r3
+	beqlr	1
+	bdnzt	eq,1b
+	blr
+2:	li	r3,0
+	blr
+EXPORT_SYMBOL(strncmp)
+
+_GLOBAL(memchr)
+	PPC_LCMPI 0,r5,0
+	beq-	2f
+	mtctr	r5
+	addi	r3,r3,-1
+	.balign IFETCH_ALIGN_BYTES
+1:	lbzu	r0,1(r3)
+	cmpw	0,r0,r4
+	bdnzf	2,1b
+	beqlr
+2:	li	r3,0
+	blr
+EXPORT_SYMBOL(memchr)
diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S
new file mode 100644
index 0000000000..3ee45619a3
--- /dev/null
+++ b/arch/powerpc/lib/string_32.S
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * String handling functions for PowerPC32
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ */
+
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/cache.h>
+
+	.text
+
+CACHELINE_BYTES = L1_CACHE_BYTES
+LG_CACHELINE_BYTES = L1_CACHE_SHIFT
+CACHELINE_MASK = (L1_CACHE_BYTES-1)
+
+_GLOBAL(__arch_clear_user)
+/*
+ * Use dcbz on the complete cache lines in the destination
+ * to set them to zero.  This requires that the destination
+ * area is cacheable.
+ */
+	cmplwi	cr0, r4, 4
+	mr	r10, r3
+	li	r3, 0
+	blt	7f
+
+11:	stw	r3, 0(r10)
+	beqlr
+	andi.	r0, r10, 3
+	add	r11, r0, r4
+	subf	r6, r0, r10
+
+	clrlwi	r7, r6, 32 - LG_CACHELINE_BYTES
+	add	r8, r7, r11
+	srwi	r9, r8, LG_CACHELINE_BYTES
+	addic.	r9, r9, -1	/* total number of complete cachelines */
+	ble	2f
+	xori	r0, r7, CACHELINE_MASK & ~3
+	srwi.	r0, r0, 2
+	beq	3f
+	mtctr	r0
+4:	stwu	r3, 4(r6)
+	bdnz	4b
+3:	mtctr	r9
+	li	r7, 4
+10:	dcbz	r7, r6
+	addi	r6, r6, CACHELINE_BYTES
+	bdnz	10b
+	clrlwi	r11, r8, 32 - LG_CACHELINE_BYTES
+	addi	r11, r11, 4
+
+2:	srwi	r0 ,r11 ,2
+	mtctr	r0
+	bdz	6f
+1:	stwu	r3, 4(r6)
+	bdnz	1b
+6:	andi.	r11, r11, 3
+	beqlr
+	mtctr	r11
+	addi	r6, r6, 3
+8:	stbu	r3, 1(r6)
+	bdnz	8b
+	blr
+
+7:	cmpwi	cr0, r4, 0
+	beqlr
+	mtctr	r4
+	addi	r6, r10, -1
+9:	stbu	r3, 1(r6)
+	bdnz	9b
+	blr
+
+90:	mr	r3, r4
+	blr
+91:	add	r3, r10, r4
+	subf	r3, r6, r3
+	blr
+
+	EX_TABLE(11b, 90b)
+	EX_TABLE(4b, 91b)
+	EX_TABLE(10b, 91b)
+	EX_TABLE(1b, 91b)
+	EX_TABLE(8b, 91b)
+	EX_TABLE(9b, 91b)
+
+EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
new file mode 100644
index 0000000000..a25eb85884
--- /dev/null
+++ b/arch/powerpc/lib/string_64.S
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/linkage.h>
+#include <asm/asm-offsets.h>
+
+/**
+ * __arch_clear_user: - Zero a block of memory in user space, with less checking.
+ * @to:   Destination address, in user space.
+ * @n:    Number of bytes to zero.
+ *
+ * Zero a block of memory in user space.  Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be cleared.
+ * On success, this will be zero.
+ */
+
+	.macro err1
+100:
+	EX_TABLE(100b,.Ldo_err1)
+	.endm
+
+	.macro err2
+200:
+	EX_TABLE(200b,.Ldo_err2)
+	.endm
+
+	.macro err3
+300:
+	EX_TABLE(300b,.Ldo_err3)
+	.endm
+
+.Ldo_err1:
+	mr	r3,r8
+
+.Ldo_err2:
+	mtctr	r4
+1:
+err3;	stb	r0,0(r3)
+	addi	r3,r3,1
+	addi	r4,r4,-1
+	bdnz	1b
+
+.Ldo_err3:
+	mr	r3,r4
+	blr
+
+_GLOBAL_TOC(__arch_clear_user)
+	cmpdi	r4,32
+	neg	r6,r3
+	li	r0,0
+	blt	.Lshort_clear
+	mr	r8,r3
+	mtocrf	0x01,r6
+	clrldi	r6,r6,(64-3)
+
+	/* Get the destination 8 byte aligned */
+	bf	cr7*4+3,1f
+err1;	stb	r0,0(r3)
+	addi	r3,r3,1
+
+1:	bf	cr7*4+2,2f
+err1;	sth	r0,0(r3)
+	addi	r3,r3,2
+
+2:	bf	cr7*4+1,3f
+err1;	stw	r0,0(r3)
+	addi	r3,r3,4
+
+3:	sub	r4,r4,r6
+
+	cmpdi	r4,32
+	cmpdi	cr1,r4,512
+	blt	.Lshort_clear
+	bgt	cr1,.Llong_clear
+
+.Lmedium_clear:
+	srdi	r6,r4,5
+	mtctr	r6
+
+	/* Do 32 byte chunks */
+4:
+err2;	std	r0,0(r3)
+err2;	std	r0,8(r3)
+err2;	std	r0,16(r3)
+err2;	std	r0,24(r3)
+	addi	r3,r3,32
+	addi	r4,r4,-32
+	bdnz	4b
+
+.Lshort_clear:
+	/* up to 31 bytes to go */
+	cmpdi	r4,16
+	blt	6f
+err2;	std	r0,0(r3)
+err2;	std	r0,8(r3)
+	addi	r3,r3,16
+	addi	r4,r4,-16
+
+	/* Up to 15 bytes to go */
+6:	mr	r8,r3
+	clrldi	r4,r4,(64-4)
+	mtocrf	0x01,r4
+	bf	cr7*4+0,7f
+err1;	std	r0,0(r3)
+	addi	r3,r3,8
+
+7:	bf	cr7*4+1,8f
+err1;	stw	r0,0(r3)
+	addi	r3,r3,4
+
+8:	bf	cr7*4+2,9f
+err1;	sth	r0,0(r3)
+	addi	r3,r3,2
+
+9:	bf	cr7*4+3,10f
+err1;	stb	r0,0(r3)
+
+10:	li	r3,0
+	blr
+
+.Llong_clear:
+	LOAD_REG_ADDR(r5, ppc64_caches)
+
+	bf	cr7*4+0,11f
+err2;	std	r0,0(r3)
+	addi	r3,r3,8
+	addi	r4,r4,-8
+
+	/* Destination is 16 byte aligned, need to get it cache block aligned */
+11:	lwz	r7,DCACHEL1LOGBLOCKSIZE(r5)
+	lwz	r9,DCACHEL1BLOCKSIZE(r5)
+
+	/*
+	 * With worst case alignment the long clear loop takes a minimum
+	 * of 1 byte less than 2 cachelines.
+	 */
+	sldi	r10,r9,2
+	cmpd	r4,r10
+	blt	.Lmedium_clear
+
+	neg	r6,r3
+	addi	r10,r9,-1
+	and.	r5,r6,r10
+	beq	13f
+
+	srdi	r6,r5,4
+	mtctr	r6
+	mr	r8,r3
+12:
+err1;	std	r0,0(r3)
+err1;	std	r0,8(r3)
+	addi	r3,r3,16
+	bdnz	12b
+
+	sub	r4,r4,r5
+
+13:	srd	r6,r4,r7
+	mtctr	r6
+	mr	r8,r3
+14:
+err1;	dcbz	0,r3
+	add	r3,r3,r9
+	bdnz	14b
+
+	and	r4,r4,r10
+
+	cmpdi	r4,32
+	blt	.Lshort_clear
+	b	.Lmedium_clear
+EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/powerpc/lib/strlen_32.S b/arch/powerpc/lib/strlen_32.S
new file mode 100644
index 0000000000..bbd24feb23
--- /dev/null
+++ b/arch/powerpc/lib/strlen_32.S
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * strlen() for PPC32
+ *
+ * Copyright (C) 2018 Christophe Leroy CS Systemes d'Information.
+ *
+ * Inspired from glibc implementation
+ */
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/cache.h>
+
+	.text
+
+/*
+ * Algorithm:
+ *
+ * 1) Given a word 'x', we can test to see if it contains any 0 bytes
+ *    by subtracting 0x01010101, and seeing if any of the high bits of each
+ *    byte changed from 0 to 1. This works because the least significant
+ *    0 byte must have had no incoming carry (otherwise it's not the least
+ *    significant), so it is 0x00 - 0x01 == 0xff. For all other
+ *    byte values, either they have the high bit set initially, or when
+ *    1 is subtracted you get a value in the range 0x00-0x7f, none of which
+ *    have their high bit set. The expression here is
+ *    (x - 0x01010101) & ~x & 0x80808080), which gives 0x00000000 when
+ *    there were no 0x00 bytes in the word.  You get 0x80 in bytes that
+ *    match, but possibly false 0x80 matches in the next more significant
+ *    byte to a true match due to carries.  For little-endian this is
+ *    of no consequence since the least significant match is the one
+ *    we're interested in, but big-endian needs method 2 to find which
+ *    byte matches.
+ * 2) Given a word 'x', we can test to see _which_ byte was zero by
+ *    calculating ~(((x & ~0x80808080) - 0x80808080 - 1) | x | ~0x80808080).
+ *    This produces 0x80 in each byte that was zero, and 0x00 in all
+ *    the other bytes. The '| ~0x80808080' clears the low 7 bits in each
+ *    byte, and the '| x' part ensures that bytes with the high bit set
+ *    produce 0x00. The addition will carry into the high bit of each byte
+ *    iff that byte had one of its low 7 bits set. We can then just see
+ *    which was the most significant bit set and divide by 8 to find how
+ *    many to add to the index.
+ *    This is from the book 'The PowerPC Compiler Writer's Guide',
+ *    by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
+ */
+
+_GLOBAL(strlen)
+	andi.   r0, r3, 3
+	lis	r7, 0x0101
+	addi	r10, r3, -4
+	addic	r7, r7, 0x0101	/* r7 = 0x01010101 (lomagic) & clear XER[CA] */
+	rotlwi	r6, r7, 31 	/* r6 = 0x80808080 (himagic) */
+	bne-	3f
+	.balign IFETCH_ALIGN_BYTES
+1:	lwzu	r9, 4(r10)
+2:	subf	r8, r7, r9
+	and.	r8, r8, r6
+	beq+	1b
+	andc.	r8, r8, r9
+	beq+	1b
+	andc	r8, r9, r6
+	orc	r9, r9, r6
+	subfe	r8, r6, r8
+	nor	r8, r8, r9
+	cntlzw	r8, r8
+	subf	r3, r3, r10
+	srwi	r8, r8, 3
+	add	r3, r3, r8
+	blr
+
+	/* Missaligned string: make sure bytes before string are seen not 0 */
+3:	xor	r10, r10, r0
+	orc	r8, r8, r8
+	lwzu	r9, 4(r10)
+	slwi	r0, r0, 3
+	srw	r8, r8, r0
+	orc	r9, r9, r8
+	b	2b
+EXPORT_SYMBOL(strlen)
diff --git a/arch/powerpc/lib/test-code-patching.c b/arch/powerpc/lib/test-code-patching.c
new file mode 100644
index 0000000000..c44823292f
--- /dev/null
+++ b/arch/powerpc/lib/test-code-patching.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Copyright 2008 Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+
+#include <asm/code-patching.h>
+
+static int __init instr_is_branch_to_addr(const u32 *instr, unsigned long addr)
+{
+	if (instr_is_branch_iform(ppc_inst_read(instr)) ||
+	    instr_is_branch_bform(ppc_inst_read(instr)))
+		return branch_target(instr) == addr;
+
+	return 0;
+}
+
+static void __init test_trampoline(void)
+{
+	asm ("nop;nop;\n");
+}
+
+#define check(x)	do {	\
+	if (!(x))		\
+		pr_err("code-patching: test failed at line %d\n", __LINE__); \
+} while (0)
+
+static void __init test_branch_iform(void)
+{
+	int err;
+	ppc_inst_t instr;
+	u32 tmp[2];
+	u32 *iptr = tmp;
+	unsigned long addr = (unsigned long)tmp;
+
+	/* The simplest case, branch to self, no flags */
+	check(instr_is_branch_iform(ppc_inst(0x48000000)));
+	/* All bits of target set, and flags */
+	check(instr_is_branch_iform(ppc_inst(0x4bffffff)));
+	/* High bit of opcode set, which is wrong */
+	check(!instr_is_branch_iform(ppc_inst(0xcbffffff)));
+	/* Middle bits of opcode set, which is wrong */
+	check(!instr_is_branch_iform(ppc_inst(0x7bffffff)));
+
+	/* Simplest case, branch to self with link */
+	check(instr_is_branch_iform(ppc_inst(0x48000001)));
+	/* All bits of targets set */
+	check(instr_is_branch_iform(ppc_inst(0x4bfffffd)));
+	/* Some bits of targets set */
+	check(instr_is_branch_iform(ppc_inst(0x4bff00fd)));
+	/* Must be a valid branch to start with */
+	check(!instr_is_branch_iform(ppc_inst(0x7bfffffd)));
+
+	/* Absolute branch to 0x100 */
+	ppc_inst_write(iptr, ppc_inst(0x48000103));
+	check(instr_is_branch_to_addr(iptr, 0x100));
+	/* Absolute branch to 0x420fc */
+	ppc_inst_write(iptr, ppc_inst(0x480420ff));
+	check(instr_is_branch_to_addr(iptr, 0x420fc));
+	/* Maximum positive relative branch, + 20MB - 4B */
+	ppc_inst_write(iptr, ppc_inst(0x49fffffc));
+	check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC));
+	/* Smallest negative relative branch, - 4B */
+	ppc_inst_write(iptr, ppc_inst(0x4bfffffc));
+	check(instr_is_branch_to_addr(iptr, addr - 4));
+	/* Largest negative relative branch, - 32 MB */
+	ppc_inst_write(iptr, ppc_inst(0x4a000000));
+	check(instr_is_branch_to_addr(iptr, addr - 0x2000000));
+
+	/* Branch to self, with link */
+	err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK);
+	ppc_inst_write(iptr, instr);
+	check(instr_is_branch_to_addr(iptr, addr));
+
+	/* Branch to self - 0x100, with link */
+	err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK);
+	ppc_inst_write(iptr, instr);
+	check(instr_is_branch_to_addr(iptr, addr - 0x100));
+
+	/* Branch to self + 0x100, no link */
+	err = create_branch(&instr, iptr, addr + 0x100, 0);
+	ppc_inst_write(iptr, instr);
+	check(instr_is_branch_to_addr(iptr, addr + 0x100));
+
+	/* Maximum relative negative offset, - 32 MB */
+	err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK);
+	ppc_inst_write(iptr, instr);
+	check(instr_is_branch_to_addr(iptr, addr - 0x2000000));
+
+	/* Out of range relative negative offset, - 32 MB + 4*/
+	err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK);
+	check(err);
+
+	/* Out of range relative positive offset, + 32 MB */
+	err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK);
+	check(err);
+
+	/* Unaligned target */
+	err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK);
+	check(err);
+
+	/* Check flags are masked correctly */
+	err = create_branch(&instr, iptr, addr, 0xFFFFFFFC);
+	ppc_inst_write(iptr, instr);
+	check(instr_is_branch_to_addr(iptr, addr));
+	check(ppc_inst_equal(instr, ppc_inst(0x48000000)));
+}
+
+static void __init test_create_function_call(void)
+{
+	u32 *iptr;
+	unsigned long dest;
+	ppc_inst_t instr;
+
+	/* Check we can create a function call */
+	iptr = (u32 *)ppc_function_entry(test_trampoline);
+	dest = ppc_function_entry(test_create_function_call);
+	create_branch(&instr, iptr, dest, BRANCH_SET_LINK);
+	patch_instruction(iptr, instr);
+	check(instr_is_branch_to_addr(iptr, dest));
+}
+
+static void __init test_branch_bform(void)
+{
+	int err;
+	unsigned long addr;
+	ppc_inst_t instr;
+	u32 tmp[2];
+	u32 *iptr = tmp;
+	unsigned int flags;
+
+	addr = (unsigned long)iptr;
+
+	/* The simplest case, branch to self, no flags */
+	check(instr_is_branch_bform(ppc_inst(0x40000000)));
+	/* All bits of target set, and flags */
+	check(instr_is_branch_bform(ppc_inst(0x43ffffff)));
+	/* High bit of opcode set, which is wrong */
+	check(!instr_is_branch_bform(ppc_inst(0xc3ffffff)));
+	/* Middle bits of opcode set, which is wrong */
+	check(!instr_is_branch_bform(ppc_inst(0x7bffffff)));
+
+	/* Absolute conditional branch to 0x100 */
+	ppc_inst_write(iptr, ppc_inst(0x43ff0103));
+	check(instr_is_branch_to_addr(iptr, 0x100));
+	/* Absolute conditional branch to 0x20fc */
+	ppc_inst_write(iptr, ppc_inst(0x43ff20ff));
+	check(instr_is_branch_to_addr(iptr, 0x20fc));
+	/* Maximum positive relative conditional branch, + 32 KB - 4B */
+	ppc_inst_write(iptr, ppc_inst(0x43ff7ffc));
+	check(instr_is_branch_to_addr(iptr, addr + 0x7FFC));
+	/* Smallest negative relative conditional branch, - 4B */
+	ppc_inst_write(iptr, ppc_inst(0x43fffffc));
+	check(instr_is_branch_to_addr(iptr, addr - 4));
+	/* Largest negative relative conditional branch, - 32 KB */
+	ppc_inst_write(iptr, ppc_inst(0x43ff8000));
+	check(instr_is_branch_to_addr(iptr, addr - 0x8000));
+
+	/* All condition code bits set & link */
+	flags = 0x3ff000 | BRANCH_SET_LINK;
+
+	/* Branch to self */
+	err = create_cond_branch(&instr, iptr, addr, flags);
+	ppc_inst_write(iptr, instr);
+	check(instr_is_branch_to_addr(iptr, addr));
+
+	/* Branch to self - 0x100 */
+	err = create_cond_branch(&instr, iptr, addr - 0x100, flags);
+	ppc_inst_write(iptr, instr);
+	check(instr_is_branch_to_addr(iptr, addr - 0x100));
+
+	/* Branch to self + 0x100 */
+	err = create_cond_branch(&instr, iptr, addr + 0x100, flags);
+	ppc_inst_write(iptr, instr);
+	check(instr_is_branch_to_addr(iptr, addr + 0x100));
+
+	/* Maximum relative negative offset, - 32 KB */
+	err = create_cond_branch(&instr, iptr, addr - 0x8000, flags);
+	ppc_inst_write(iptr, instr);
+	check(instr_is_branch_to_addr(iptr, addr - 0x8000));
+
+	/* Out of range relative negative offset, - 32 KB + 4*/
+	err = create_cond_branch(&instr, iptr, addr - 0x8004, flags);
+	check(err);
+
+	/* Out of range relative positive offset, + 32 KB */
+	err = create_cond_branch(&instr, iptr, addr + 0x8000, flags);
+	check(err);
+
+	/* Unaligned target */
+	err = create_cond_branch(&instr, iptr, addr + 3, flags);
+	check(err);
+
+	/* Check flags are masked correctly */
+	err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC);
+	ppc_inst_write(iptr, instr);
+	check(instr_is_branch_to_addr(iptr, addr));
+	check(ppc_inst_equal(instr, ppc_inst(0x43FF0000)));
+}
+
+static void __init test_translate_branch(void)
+{
+	unsigned long addr;
+	void *p, *q;
+	ppc_inst_t instr;
+	void *buf;
+
+	buf = vmalloc(PAGE_ALIGN(0x2000000 + 1));
+	check(buf);
+	if (!buf)
+		return;
+
+	/* Simple case, branch to self moved a little */
+	p = buf;
+	addr = (unsigned long)p;
+	create_branch(&instr, p, addr, 0);
+	ppc_inst_write(p, instr);
+	check(instr_is_branch_to_addr(p, addr));
+	q = p + 4;
+	translate_branch(&instr, q, p);
+	ppc_inst_write(q, instr);
+	check(instr_is_branch_to_addr(q, addr));
+
+	/* Maximum negative case, move b . to addr + 32 MB */
+	p = buf;
+	addr = (unsigned long)p;
+	create_branch(&instr, p, addr, 0);
+	ppc_inst_write(p, instr);
+	q = buf + 0x2000000;
+	translate_branch(&instr, q, p);
+	ppc_inst_write(q, instr);
+	check(instr_is_branch_to_addr(p, addr));
+	check(instr_is_branch_to_addr(q, addr));
+	check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x4a000000)));
+
+	/* Maximum positive case, move x to x - 32 MB + 4 */
+	p = buf + 0x2000000;
+	addr = (unsigned long)p;
+	create_branch(&instr, p, addr, 0);
+	ppc_inst_write(p, instr);
+	q = buf + 4;
+	translate_branch(&instr, q, p);
+	ppc_inst_write(q, instr);
+	check(instr_is_branch_to_addr(p, addr));
+	check(instr_is_branch_to_addr(q, addr));
+	check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x49fffffc)));
+
+	/* Jump to x + 16 MB moved to x + 20 MB */
+	p = buf;
+	addr = 0x1000000 + (unsigned long)buf;
+	create_branch(&instr, p, addr, BRANCH_SET_LINK);
+	ppc_inst_write(p, instr);
+	q = buf + 0x1400000;
+	translate_branch(&instr, q, p);
+	ppc_inst_write(q, instr);
+	check(instr_is_branch_to_addr(p, addr));
+	check(instr_is_branch_to_addr(q, addr));
+
+	/* Jump to x + 16 MB moved to x - 16 MB + 4 */
+	p = buf + 0x1000000;
+	addr = 0x2000000 + (unsigned long)buf;
+	create_branch(&instr, p, addr, 0);
+	ppc_inst_write(p, instr);
+	q = buf + 4;
+	translate_branch(&instr, q, p);
+	ppc_inst_write(q, instr);
+	check(instr_is_branch_to_addr(p, addr));
+	check(instr_is_branch_to_addr(q, addr));
+
+
+	/* Conditional branch tests */
+
+	/* Simple case, branch to self moved a little */
+	p = buf;
+	addr = (unsigned long)p;
+	create_cond_branch(&instr, p, addr, 0);
+	ppc_inst_write(p, instr);
+	check(instr_is_branch_to_addr(p, addr));
+	q = buf + 4;
+	translate_branch(&instr, q, p);
+	ppc_inst_write(q, instr);
+	check(instr_is_branch_to_addr(q, addr));
+
+	/* Maximum negative case, move b . to addr + 32 KB */
+	p = buf;
+	addr = (unsigned long)p;
+	create_cond_branch(&instr, p, addr, 0xFFFFFFFC);
+	ppc_inst_write(p, instr);
+	q = buf + 0x8000;
+	translate_branch(&instr, q, p);
+	ppc_inst_write(q, instr);
+	check(instr_is_branch_to_addr(p, addr));
+	check(instr_is_branch_to_addr(q, addr));
+	check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff8000)));
+
+	/* Maximum positive case, move x to x - 32 KB + 4 */
+	p = buf + 0x8000;
+	addr = (unsigned long)p;
+	create_cond_branch(&instr, p, addr, 0xFFFFFFFC);
+	ppc_inst_write(p, instr);
+	q = buf + 4;
+	translate_branch(&instr, q, p);
+	ppc_inst_write(q, instr);
+	check(instr_is_branch_to_addr(p, addr));
+	check(instr_is_branch_to_addr(q, addr));
+	check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff7ffc)));
+
+	/* Jump to x + 12 KB moved to x + 20 KB */
+	p = buf;
+	addr = 0x3000 + (unsigned long)buf;
+	create_cond_branch(&instr, p, addr, BRANCH_SET_LINK);
+	ppc_inst_write(p, instr);
+	q = buf + 0x5000;
+	translate_branch(&instr, q, p);
+	ppc_inst_write(q, instr);
+	check(instr_is_branch_to_addr(p, addr));
+	check(instr_is_branch_to_addr(q, addr));
+
+	/* Jump to x + 8 KB moved to x - 8 KB + 4 */
+	p = buf + 0x2000;
+	addr = 0x4000 + (unsigned long)buf;
+	create_cond_branch(&instr, p, addr, 0);
+	ppc_inst_write(p, instr);
+	q = buf + 4;
+	translate_branch(&instr, q, p);
+	ppc_inst_write(q, instr);
+	check(instr_is_branch_to_addr(p, addr));
+	check(instr_is_branch_to_addr(q, addr));
+
+	/* Free the buffer we were using */
+	vfree(buf);
+}
+
+static void __init test_prefixed_patching(void)
+{
+	u32 *iptr = (u32 *)ppc_function_entry(test_trampoline);
+	u32 expected[2] = {OP_PREFIX << 26, 0};
+	ppc_inst_t inst = ppc_inst_prefix(OP_PREFIX << 26, 0);
+
+	if (!IS_ENABLED(CONFIG_PPC64))
+		return;
+
+	patch_instruction(iptr, inst);
+
+	check(!memcmp(iptr, expected, sizeof(expected)));
+}
+
+static int __init test_code_patching(void)
+{
+	pr_info("Running code patching self-tests ...\n");
+
+	test_branch_iform();
+	test_branch_bform();
+	test_create_function_call();
+	test_translate_branch();
+	test_prefixed_patching();
+
+	return 0;
+}
+late_initcall(test_code_patching);
diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c
new file mode 100644
index 0000000000..23c7805fb7
--- /dev/null
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -0,0 +1,1741 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Simple sanity tests for instruction emulation infrastructure.
+ *
+ * Copyright IBM Corp. 2016
+ */
+
+#define pr_fmt(fmt) "emulate_step_test: " fmt
+
+#include <linux/ptrace.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/sstep.h>
+#include <asm/ppc-opcode.h>
+#include <asm/code-patching.h>
+#include <asm/inst.h>
+
+#define MAX_SUBTESTS	16
+
+#define IGNORE_GPR(n)	(0x1UL << (n))
+#define IGNORE_XER	(0x1UL << 32)
+#define IGNORE_CCR	(0x1UL << 33)
+#define NEGATIVE_TEST	(0x1UL << 63)
+
+#define TEST_PLD(r, base, i, pr) \
+	ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \
+			PPC_INST_PLD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PLWZ(r, base, i, pr) \
+	ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+			PPC_RAW_LWZ(r, base, i))
+
+#define TEST_PSTD(r, base, i, pr) \
+	ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \
+			PPC_INST_PSTD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PLFS(r, base, i, pr) \
+	ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+			PPC_INST_LFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PSTFS(r, base, i, pr) \
+	ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+			PPC_INST_STFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PLFD(r, base, i, pr) \
+	ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+			PPC_INST_LFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PSTFD(r, base, i, pr) \
+	ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+			PPC_INST_STFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PADDI(t, a, i, pr) \
+	ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+			PPC_RAW_ADDI(t, a, i))
+
+static void __init init_pt_regs(struct pt_regs *regs)
+{
+	static unsigned long msr;
+	static bool msr_cached;
+
+	memset(regs, 0, sizeof(struct pt_regs));
+
+	if (likely(msr_cached)) {
+		regs->msr = msr;
+		return;
+	}
+
+	asm volatile("mfmsr %0" : "=r"(regs->msr));
+
+	regs->msr |= MSR_FP;
+	regs->msr |= MSR_VEC;
+	regs->msr |= MSR_VSX;
+
+	msr = regs->msr;
+	msr_cached = true;
+}
+
+static void __init show_result(char *mnemonic, char *result)
+{
+	pr_info("%-14s : %s\n", mnemonic, result);
+}
+
+static void __init show_result_with_descr(char *mnemonic, char *descr,
+					  char *result)
+{
+	pr_info("%-14s : %-50s %s\n", mnemonic, descr, result);
+}
+
+static void __init test_ld(void)
+{
+	struct pt_regs regs;
+	unsigned long a = 0x23;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long) &a;
+
+	/* ld r5, 0(r3) */
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LD(5, 3, 0)));
+
+	if (stepped == 1 && regs.gpr[5] == a)
+		show_result("ld", "PASS");
+	else
+		show_result("ld", "FAIL");
+}
+
+static void __init test_pld(void)
+{
+	struct pt_regs regs;
+	unsigned long a = 0x23;
+	int stepped = -1;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+		show_result("pld", "SKIP (!CPU_FTR_ARCH_31)");
+		return;
+	}
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long)&a;
+
+	/* pld r5, 0(r3), 0 */
+	stepped = emulate_step(&regs, TEST_PLD(5, 3, 0, 0));
+
+	if (stepped == 1 && regs.gpr[5] == a)
+		show_result("pld", "PASS");
+	else
+		show_result("pld", "FAIL");
+}
+
+static void __init test_lwz(void)
+{
+	struct pt_regs regs;
+	unsigned int a = 0x4545;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long) &a;
+
+	/* lwz r5, 0(r3) */
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LWZ(5, 3, 0)));
+
+	if (stepped == 1 && regs.gpr[5] == a)
+		show_result("lwz", "PASS");
+	else
+		show_result("lwz", "FAIL");
+}
+
+static void __init test_plwz(void)
+{
+	struct pt_regs regs;
+	unsigned int a = 0x4545;
+	int stepped = -1;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+		show_result("plwz", "SKIP (!CPU_FTR_ARCH_31)");
+		return;
+	}
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long)&a;
+
+	/* plwz r5, 0(r3), 0 */
+
+	stepped = emulate_step(&regs, TEST_PLWZ(5, 3, 0, 0));
+
+	if (stepped == 1 && regs.gpr[5] == a)
+		show_result("plwz", "PASS");
+	else
+		show_result("plwz", "FAIL");
+}
+
+static void __init test_lwzx(void)
+{
+	struct pt_regs regs;
+	unsigned int a[3] = {0x0, 0x0, 0x1234};
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long) a;
+	regs.gpr[4] = 8;
+	regs.gpr[5] = 0x8765;
+
+	/* lwzx r5, r3, r4 */
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LWZX(5, 3, 4)));
+	if (stepped == 1 && regs.gpr[5] == a[2])
+		show_result("lwzx", "PASS");
+	else
+		show_result("lwzx", "FAIL");
+}
+
+static void __init test_std(void)
+{
+	struct pt_regs regs;
+	unsigned long a = 0x1234;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long) &a;
+	regs.gpr[5] = 0x5678;
+
+	/* std r5, 0(r3) */
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STD(5, 3, 0)));
+	if (stepped == 1 && regs.gpr[5] == a)
+		show_result("std", "PASS");
+	else
+		show_result("std", "FAIL");
+}
+
+static void __init test_pstd(void)
+{
+	struct pt_regs regs;
+	unsigned long a = 0x1234;
+	int stepped = -1;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+		show_result("pstd", "SKIP (!CPU_FTR_ARCH_31)");
+		return;
+	}
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long)&a;
+	regs.gpr[5] = 0x5678;
+
+	/* pstd r5, 0(r3), 0 */
+	stepped = emulate_step(&regs, TEST_PSTD(5, 3, 0, 0));
+	if (stepped == 1 || regs.gpr[5] == a)
+		show_result("pstd", "PASS");
+	else
+		show_result("pstd", "FAIL");
+}
+
+static void __init test_ldarx_stdcx(void)
+{
+	struct pt_regs regs;
+	unsigned long a = 0x1234;
+	int stepped = -1;
+	unsigned long cr0_eq = 0x1 << 29; /* eq bit of CR0 */
+
+	init_pt_regs(&regs);
+	asm volatile("mfcr %0" : "=r"(regs.ccr));
+
+
+	/*** ldarx ***/
+
+	regs.gpr[3] = (unsigned long) &a;
+	regs.gpr[4] = 0;
+	regs.gpr[5] = 0x5678;
+
+	/* ldarx r5, r3, r4, 0 */
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LDARX(5, 3, 4, 0)));
+
+	/*
+	 * Don't touch 'a' here. Touching 'a' can do Load/store
+	 * of 'a' which result in failure of subsequent stdcx.
+	 * Instead, use hardcoded value for comparison.
+	 */
+	if (stepped <= 0 || regs.gpr[5] != 0x1234) {
+		show_result("ldarx / stdcx.", "FAIL (ldarx)");
+		return;
+	}
+
+
+	/*** stdcx. ***/
+
+	regs.gpr[5] = 0x9ABC;
+
+	/* stdcx. r5, r3, r4 */
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STDCX(5, 3, 4)));
+
+	/*
+	 * Two possible scenarios that indicates successful emulation
+	 * of stdcx. :
+	 *  1. Reservation is active and store is performed. In this
+	 *     case cr0.eq bit will be set to 1.
+	 *  2. Reservation is not active and store is not performed.
+	 *     In this case cr0.eq bit will be set to 0.
+	 */
+	if (stepped == 1 && ((regs.gpr[5] == a && (regs.ccr & cr0_eq))
+			|| (regs.gpr[5] != a && !(regs.ccr & cr0_eq))))
+		show_result("ldarx / stdcx.", "PASS");
+	else
+		show_result("ldarx / stdcx.", "FAIL (stdcx.)");
+}
+
+#ifdef CONFIG_PPC_FPU
+static void __init test_lfsx_stfsx(void)
+{
+	struct pt_regs regs;
+	union {
+		float a;
+		int b;
+	} c;
+	int cached_b;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+
+
+	/*** lfsx ***/
+
+	c.a = 123.45;
+	cached_b = c.b;
+
+	regs.gpr[3] = (unsigned long) &c.a;
+	regs.gpr[4] = 0;
+
+	/* lfsx frt10, r3, r4 */
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LFSX(10, 3, 4)));
+
+	if (stepped == 1)
+		show_result("lfsx", "PASS");
+	else
+		show_result("lfsx", "FAIL");
+
+
+	/*** stfsx ***/
+
+	c.a = 678.91;
+
+	/* stfsx frs10, r3, r4 */
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STFSX(10, 3, 4)));
+
+	if (stepped == 1 && c.b == cached_b)
+		show_result("stfsx", "PASS");
+	else
+		show_result("stfsx", "FAIL");
+}
+
+static void __init test_plfs_pstfs(void)
+{
+	struct pt_regs regs;
+	union {
+		float a;
+		int b;
+	} c;
+	int cached_b;
+	int stepped = -1;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+		show_result("pld", "SKIP (!CPU_FTR_ARCH_31)");
+		return;
+	}
+
+	init_pt_regs(&regs);
+
+
+	/*** plfs ***/
+
+	c.a = 123.45;
+	cached_b = c.b;
+
+	regs.gpr[3] = (unsigned long)&c.a;
+
+	/* plfs frt10, 0(r3), 0  */
+	stepped = emulate_step(&regs, TEST_PLFS(10, 3, 0, 0));
+
+	if (stepped == 1)
+		show_result("plfs", "PASS");
+	else
+		show_result("plfs", "FAIL");
+
+
+	/*** pstfs ***/
+
+	c.a = 678.91;
+
+	/* pstfs frs10, 0(r3), 0 */
+	stepped = emulate_step(&regs, TEST_PSTFS(10, 3, 0, 0));
+
+	if (stepped == 1 && c.b == cached_b)
+		show_result("pstfs", "PASS");
+	else
+		show_result("pstfs", "FAIL");
+}
+
+static void __init test_lfdx_stfdx(void)
+{
+	struct pt_regs regs;
+	union {
+		double a;
+		long b;
+	} c;
+	long cached_b;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+
+
+	/*** lfdx ***/
+
+	c.a = 123456.78;
+	cached_b = c.b;
+
+	regs.gpr[3] = (unsigned long) &c.a;
+	regs.gpr[4] = 0;
+
+	/* lfdx frt10, r3, r4 */
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LFDX(10, 3, 4)));
+
+	if (stepped == 1)
+		show_result("lfdx", "PASS");
+	else
+		show_result("lfdx", "FAIL");
+
+
+	/*** stfdx ***/
+
+	c.a = 987654.32;
+
+	/* stfdx frs10, r3, r4 */
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STFDX(10, 3, 4)));
+
+	if (stepped == 1 && c.b == cached_b)
+		show_result("stfdx", "PASS");
+	else
+		show_result("stfdx", "FAIL");
+}
+
+static void __init test_plfd_pstfd(void)
+{
+	struct pt_regs regs;
+	union {
+		double a;
+		long b;
+	} c;
+	long cached_b;
+	int stepped = -1;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+		show_result("pld", "SKIP (!CPU_FTR_ARCH_31)");
+		return;
+	}
+
+	init_pt_regs(&regs);
+
+
+	/*** plfd ***/
+
+	c.a = 123456.78;
+	cached_b = c.b;
+
+	regs.gpr[3] = (unsigned long)&c.a;
+
+	/* plfd frt10, 0(r3), 0 */
+	stepped = emulate_step(&regs, TEST_PLFD(10, 3, 0, 0));
+
+	if (stepped == 1)
+		show_result("plfd", "PASS");
+	else
+		show_result("plfd", "FAIL");
+
+
+	/*** pstfd ***/
+
+	c.a = 987654.32;
+
+	/* pstfd frs10, 0(r3), 0 */
+	stepped = emulate_step(&regs, TEST_PSTFD(10, 3, 0, 0));
+
+	if (stepped == 1 && c.b == cached_b)
+		show_result("pstfd", "PASS");
+	else
+		show_result("pstfd", "FAIL");
+}
+#else
+static void __init test_lfsx_stfsx(void)
+{
+	show_result("lfsx", "SKIP (CONFIG_PPC_FPU is not set)");
+	show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+
+static void __init test_plfs_pstfs(void)
+{
+	show_result("plfs", "SKIP (CONFIG_PPC_FPU is not set)");
+	show_result("pstfs", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+
+static void __init test_lfdx_stfdx(void)
+{
+	show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)");
+	show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+
+static void __init test_plfd_pstfd(void)
+{
+	show_result("plfd", "SKIP (CONFIG_PPC_FPU is not set)");
+	show_result("pstfd", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef CONFIG_ALTIVEC
+static void __init test_lvx_stvx(void)
+{
+	struct pt_regs regs;
+	union {
+		vector128 a;
+		u32 b[4];
+	} c;
+	u32 cached_b[4];
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+
+
+	/*** lvx ***/
+
+	cached_b[0] = c.b[0] = 923745;
+	cached_b[1] = c.b[1] = 2139478;
+	cached_b[2] = c.b[2] = 9012;
+	cached_b[3] = c.b[3] = 982134;
+
+	regs.gpr[3] = (unsigned long) &c.a;
+	regs.gpr[4] = 0;
+
+	/* lvx vrt10, r3, r4 */
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LVX(10, 3, 4)));
+
+	if (stepped == 1)
+		show_result("lvx", "PASS");
+	else
+		show_result("lvx", "FAIL");
+
+
+	/*** stvx ***/
+
+	c.b[0] = 4987513;
+	c.b[1] = 84313948;
+	c.b[2] = 71;
+	c.b[3] = 498532;
+
+	/* stvx vrs10, r3, r4 */
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STVX(10, 3, 4)));
+
+	if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
+	    cached_b[2] == c.b[2] && cached_b[3] == c.b[3])
+		show_result("stvx", "PASS");
+	else
+		show_result("stvx", "FAIL");
+}
+#else
+static void __init test_lvx_stvx(void)
+{
+	show_result("lvx", "SKIP (CONFIG_ALTIVEC is not set)");
+	show_result("stvx", "SKIP (CONFIG_ALTIVEC is not set)");
+}
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+static void __init test_lxvd2x_stxvd2x(void)
+{
+	struct pt_regs regs;
+	union {
+		vector128 a;
+		u32 b[4];
+	} c;
+	u32 cached_b[4];
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+
+
+	/*** lxvd2x ***/
+
+	cached_b[0] = c.b[0] = 18233;
+	cached_b[1] = c.b[1] = 34863571;
+	cached_b[2] = c.b[2] = 834;
+	cached_b[3] = c.b[3] = 6138911;
+
+	regs.gpr[3] = (unsigned long) &c.a;
+	regs.gpr[4] = 0;
+
+	/* lxvd2x vsr39, r3, r4 */
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LXVD2X(39, R3, R4)));
+
+	if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
+		show_result("lxvd2x", "PASS");
+	} else {
+		if (!cpu_has_feature(CPU_FTR_VSX))
+			show_result("lxvd2x", "PASS (!CPU_FTR_VSX)");
+		else
+			show_result("lxvd2x", "FAIL");
+	}
+
+
+	/*** stxvd2x ***/
+
+	c.b[0] = 21379463;
+	c.b[1] = 87;
+	c.b[2] = 374234;
+	c.b[3] = 4;
+
+	/* stxvd2x vsr39, r3, r4 */
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STXVD2X(39, R3, R4)));
+
+	if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
+	    cached_b[2] == c.b[2] && cached_b[3] == c.b[3] &&
+	    cpu_has_feature(CPU_FTR_VSX)) {
+		show_result("stxvd2x", "PASS");
+	} else {
+		if (!cpu_has_feature(CPU_FTR_VSX))
+			show_result("stxvd2x", "PASS (!CPU_FTR_VSX)");
+		else
+			show_result("stxvd2x", "FAIL");
+	}
+}
+#else
+static void __init test_lxvd2x_stxvd2x(void)
+{
+	show_result("lxvd2x", "SKIP (CONFIG_VSX is not set)");
+	show_result("stxvd2x", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_VSX
+static void __init test_lxvp_stxvp(void)
+{
+	struct pt_regs regs;
+	union {
+		vector128 a;
+		u32 b[4];
+	} c[2];
+	u32 cached_b[8];
+	int stepped = -1;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+		show_result("lxvp", "SKIP (!CPU_FTR_ARCH_31)");
+		show_result("stxvp", "SKIP (!CPU_FTR_ARCH_31)");
+		return;
+	}
+
+	init_pt_regs(&regs);
+
+	/*** lxvp ***/
+
+	cached_b[0] = c[0].b[0] = 18233;
+	cached_b[1] = c[0].b[1] = 34863571;
+	cached_b[2] = c[0].b[2] = 834;
+	cached_b[3] = c[0].b[3] = 6138911;
+	cached_b[4] = c[1].b[0] = 1234;
+	cached_b[5] = c[1].b[1] = 5678;
+	cached_b[6] = c[1].b[2] = 91011;
+	cached_b[7] = c[1].b[3] = 121314;
+
+	regs.gpr[4] = (unsigned long)&c[0].a;
+
+	/*
+	 * lxvp XTp,DQ(RA)
+	 * XTp = 32xTX + 2xTp
+	 * let TX=1 Tp=1 RA=4 DQ=0
+	 */
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LXVP(34, 4, 0)));
+
+	if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
+		show_result("lxvp", "PASS");
+	} else {
+		if (!cpu_has_feature(CPU_FTR_VSX))
+			show_result("lxvp", "PASS (!CPU_FTR_VSX)");
+		else
+			show_result("lxvp", "FAIL");
+	}
+
+	/*** stxvp ***/
+
+	c[0].b[0] = 21379463;
+	c[0].b[1] = 87;
+	c[0].b[2] = 374234;
+	c[0].b[3] = 4;
+	c[1].b[0] = 90;
+	c[1].b[1] = 122;
+	c[1].b[2] = 555;
+	c[1].b[3] = 32144;
+
+	/*
+	 * stxvp XSp,DQ(RA)
+	 * XSp = 32xSX + 2xSp
+	 * let SX=1 Sp=1 RA=4 DQ=0
+	 */
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STXVP(34, 4, 0)));
+
+	if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] &&
+	    cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] &&
+	    cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] &&
+	    cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] &&
+	    cpu_has_feature(CPU_FTR_VSX)) {
+		show_result("stxvp", "PASS");
+	} else {
+		if (!cpu_has_feature(CPU_FTR_VSX))
+			show_result("stxvp", "PASS (!CPU_FTR_VSX)");
+		else
+			show_result("stxvp", "FAIL");
+	}
+}
+#else
+static void __init test_lxvp_stxvp(void)
+{
+	show_result("lxvp", "SKIP (CONFIG_VSX is not set)");
+	show_result("stxvp", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_VSX
+static void __init test_lxvpx_stxvpx(void)
+{
+	struct pt_regs regs;
+	union {
+		vector128 a;
+		u32 b[4];
+	} c[2];
+	u32 cached_b[8];
+	int stepped = -1;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+		show_result("lxvpx", "SKIP (!CPU_FTR_ARCH_31)");
+		show_result("stxvpx", "SKIP (!CPU_FTR_ARCH_31)");
+		return;
+	}
+
+	init_pt_regs(&regs);
+
+	/*** lxvpx ***/
+
+	cached_b[0] = c[0].b[0] = 18233;
+	cached_b[1] = c[0].b[1] = 34863571;
+	cached_b[2] = c[0].b[2] = 834;
+	cached_b[3] = c[0].b[3] = 6138911;
+	cached_b[4] = c[1].b[0] = 1234;
+	cached_b[5] = c[1].b[1] = 5678;
+	cached_b[6] = c[1].b[2] = 91011;
+	cached_b[7] = c[1].b[3] = 121314;
+
+	regs.gpr[3] = (unsigned long)&c[0].a;
+	regs.gpr[4] = 0;
+
+	/*
+	 * lxvpx XTp,RA,RB
+	 * XTp = 32xTX + 2xTp
+	 * let TX=1 Tp=1 RA=3 RB=4
+	 */
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LXVPX(34, 3, 4)));
+
+	if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
+		show_result("lxvpx", "PASS");
+	} else {
+		if (!cpu_has_feature(CPU_FTR_VSX))
+			show_result("lxvpx", "PASS (!CPU_FTR_VSX)");
+		else
+			show_result("lxvpx", "FAIL");
+	}
+
+	/*** stxvpx ***/
+
+	c[0].b[0] = 21379463;
+	c[0].b[1] = 87;
+	c[0].b[2] = 374234;
+	c[0].b[3] = 4;
+	c[1].b[0] = 90;
+	c[1].b[1] = 122;
+	c[1].b[2] = 555;
+	c[1].b[3] = 32144;
+
+	/*
+	 * stxvpx XSp,RA,RB
+	 * XSp = 32xSX + 2xSp
+	 * let SX=1 Sp=1 RA=3 RB=4
+	 */
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STXVPX(34, 3, 4)));
+
+	if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] &&
+	    cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] &&
+	    cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] &&
+	    cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] &&
+	    cpu_has_feature(CPU_FTR_VSX)) {
+		show_result("stxvpx", "PASS");
+	} else {
+		if (!cpu_has_feature(CPU_FTR_VSX))
+			show_result("stxvpx", "PASS (!CPU_FTR_VSX)");
+		else
+			show_result("stxvpx", "FAIL");
+	}
+}
+#else
+static void __init test_lxvpx_stxvpx(void)
+{
+	show_result("lxvpx", "SKIP (CONFIG_VSX is not set)");
+	show_result("stxvpx", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_VSX
+static void __init test_plxvp_pstxvp(void)
+{
+	ppc_inst_t instr;
+	struct pt_regs regs;
+	union {
+		vector128 a;
+		u32 b[4];
+	} c[2];
+	u32 cached_b[8];
+	int stepped = -1;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+		show_result("plxvp", "SKIP (!CPU_FTR_ARCH_31)");
+		show_result("pstxvp", "SKIP (!CPU_FTR_ARCH_31)");
+		return;
+	}
+
+	/*** plxvp ***/
+
+	cached_b[0] = c[0].b[0] = 18233;
+	cached_b[1] = c[0].b[1] = 34863571;
+	cached_b[2] = c[0].b[2] = 834;
+	cached_b[3] = c[0].b[3] = 6138911;
+	cached_b[4] = c[1].b[0] = 1234;
+	cached_b[5] = c[1].b[1] = 5678;
+	cached_b[6] = c[1].b[2] = 91011;
+	cached_b[7] = c[1].b[3] = 121314;
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long)&c[0].a;
+
+	/*
+	 * plxvp XTp,D(RA),R
+	 * XTp = 32xTX + 2xTp
+	 * let RA=3 R=0 D=d0||d1=0 R=0 Tp=1 TX=1
+	 */
+	instr = ppc_inst_prefix(PPC_RAW_PLXVP_P(34, 0, 3, 0), PPC_RAW_PLXVP_S(34, 0, 3, 0));
+
+	stepped = emulate_step(&regs, instr);
+	if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
+		show_result("plxvp", "PASS");
+	} else {
+		if (!cpu_has_feature(CPU_FTR_VSX))
+			show_result("plxvp", "PASS (!CPU_FTR_VSX)");
+		else
+			show_result("plxvp", "FAIL");
+	}
+
+	/*** pstxvp ***/
+
+	c[0].b[0] = 21379463;
+	c[0].b[1] = 87;
+	c[0].b[2] = 374234;
+	c[0].b[3] = 4;
+	c[1].b[0] = 90;
+	c[1].b[1] = 122;
+	c[1].b[2] = 555;
+	c[1].b[3] = 32144;
+
+	/*
+	 * pstxvp XSp,D(RA),R
+	 * XSp = 32xSX + 2xSp
+	 * let RA=3 D=d0||d1=0 R=0 Sp=1 SX=1
+	 */
+	instr = ppc_inst_prefix(PPC_RAW_PSTXVP_P(34, 0, 3, 0), PPC_RAW_PSTXVP_S(34, 0, 3, 0));
+
+	stepped = emulate_step(&regs, instr);
+
+	if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] &&
+	    cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] &&
+	    cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] &&
+	    cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] &&
+	    cpu_has_feature(CPU_FTR_VSX)) {
+		show_result("pstxvp", "PASS");
+	} else {
+		if (!cpu_has_feature(CPU_FTR_VSX))
+			show_result("pstxvp", "PASS (!CPU_FTR_VSX)");
+		else
+			show_result("pstxvp", "FAIL");
+	}
+}
+#else
+static void __init test_plxvp_pstxvp(void)
+{
+	show_result("plxvp", "SKIP (CONFIG_VSX is not set)");
+	show_result("pstxvp", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
+static void __init run_tests_load_store(void)
+{
+	test_ld();
+	test_pld();
+	test_lwz();
+	test_plwz();
+	test_lwzx();
+	test_std();
+	test_pstd();
+	test_ldarx_stdcx();
+	test_lfsx_stfsx();
+	test_plfs_pstfs();
+	test_lfdx_stfdx();
+	test_plfd_pstfd();
+	test_lvx_stvx();
+	test_lxvd2x_stxvd2x();
+	test_lxvp_stxvp();
+	test_lxvpx_stxvpx();
+	test_plxvp_pstxvp();
+}
+
+struct compute_test {
+	char *mnemonic;
+	unsigned long cpu_feature;
+	struct {
+		char *descr;
+		unsigned long flags;
+		ppc_inst_t instr;
+		struct pt_regs regs;
+	} subtests[MAX_SUBTESTS + 1];
+};
+
+/* Extreme values for si0||si1 (the MLS:D-form 34 bit immediate field) */
+#define SI_MIN BIT(33)
+#define SI_MAX (BIT(33) - 1)
+#define SI_UMAX (BIT(34) - 1)
+
+static struct compute_test compute_tests[] = {
+	{
+		.mnemonic = "nop",
+		.subtests = {
+			{
+				.descr = "R0 = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_NOP()),
+				.regs = {
+					.gpr[0] = LONG_MAX,
+				}
+			}
+		}
+	},
+	{
+		.mnemonic = "setb",
+		.cpu_feature = CPU_FTR_ARCH_300,
+		.subtests = {
+			{
+				.descr = "BFA = 1, CR = GT",
+				.instr = ppc_inst(PPC_RAW_SETB(20, 1)),
+				.regs = {
+					.ccr = 0x4000000,
+				}
+			},
+			{
+				.descr = "BFA = 4, CR = LT",
+				.instr = ppc_inst(PPC_RAW_SETB(20, 4)),
+				.regs = {
+					.ccr = 0x8000,
+				}
+			},
+			{
+				.descr = "BFA = 5, CR = EQ",
+				.instr = ppc_inst(PPC_RAW_SETB(20, 5)),
+				.regs = {
+					.ccr = 0x200,
+				}
+			}
+		}
+	},
+	{
+		.mnemonic = "add",
+		.subtests = {
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MIN",
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MIN,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = LONG_MAX, RB = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MAX,
+					.gpr[22] = LONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = ULONG_MAX, RB = ULONG_MAX",
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+				.regs = {
+					.gpr[21] = ULONG_MAX,
+					.gpr[22] = ULONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = ULONG_MAX, RB = 0x1",
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+				.regs = {
+					.gpr[21] = ULONG_MAX,
+					.gpr[22] = 0x1,
+				}
+			},
+			{
+				.descr = "RA = INT_MIN, RB = INT_MIN",
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+				.regs = {
+					.gpr[21] = INT_MIN,
+					.gpr[22] = INT_MIN,
+				}
+			},
+			{
+				.descr = "RA = INT_MIN, RB = INT_MAX",
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+				.regs = {
+					.gpr[21] = INT_MIN,
+					.gpr[22] = INT_MAX,
+				}
+			},
+			{
+				.descr = "RA = INT_MAX, RB = INT_MAX",
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+				.regs = {
+					.gpr[21] = INT_MAX,
+					.gpr[22] = INT_MAX,
+				}
+			},
+			{
+				.descr = "RA = UINT_MAX, RB = UINT_MAX",
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+				.regs = {
+					.gpr[21] = UINT_MAX,
+					.gpr[22] = UINT_MAX,
+				}
+			},
+			{
+				.descr = "RA = UINT_MAX, RB = 0x1",
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+				.regs = {
+					.gpr[21] = UINT_MAX,
+					.gpr[22] = 0x1,
+				}
+			}
+		}
+	},
+	{
+		.mnemonic = "add.",
+		.subtests = {
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MIN",
+				.flags = IGNORE_CCR,
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MIN,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = LONG_MAX, RB = LONG_MAX",
+				.flags = IGNORE_CCR,
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MAX,
+					.gpr[22] = LONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = ULONG_MAX, RB = ULONG_MAX",
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = ULONG_MAX,
+					.gpr[22] = ULONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = ULONG_MAX, RB = 0x1",
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = ULONG_MAX,
+					.gpr[22] = 0x1,
+				}
+			},
+			{
+				.descr = "RA = INT_MIN, RB = INT_MIN",
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = INT_MIN,
+					.gpr[22] = INT_MIN,
+				}
+			},
+			{
+				.descr = "RA = INT_MIN, RB = INT_MAX",
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = INT_MIN,
+					.gpr[22] = INT_MAX,
+				}
+			},
+			{
+				.descr = "RA = INT_MAX, RB = INT_MAX",
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = INT_MAX,
+					.gpr[22] = INT_MAX,
+				}
+			},
+			{
+				.descr = "RA = UINT_MAX, RB = UINT_MAX",
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = UINT_MAX,
+					.gpr[22] = UINT_MAX,
+				}
+			},
+			{
+				.descr = "RA = UINT_MAX, RB = 0x1",
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = UINT_MAX,
+					.gpr[22] = 0x1,
+				}
+			}
+		}
+	},
+	{
+		.mnemonic = "addc",
+		.subtests = {
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MIN",
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MIN,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = LONG_MAX, RB = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MAX,
+					.gpr[22] = LONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = ULONG_MAX, RB = ULONG_MAX",
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+				.regs = {
+					.gpr[21] = ULONG_MAX,
+					.gpr[22] = ULONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = ULONG_MAX, RB = 0x1",
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+				.regs = {
+					.gpr[21] = ULONG_MAX,
+					.gpr[22] = 0x1,
+				}
+			},
+			{
+				.descr = "RA = INT_MIN, RB = INT_MIN",
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+				.regs = {
+					.gpr[21] = INT_MIN,
+					.gpr[22] = INT_MIN,
+				}
+			},
+			{
+				.descr = "RA = INT_MIN, RB = INT_MAX",
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+				.regs = {
+					.gpr[21] = INT_MIN,
+					.gpr[22] = INT_MAX,
+				}
+			},
+			{
+				.descr = "RA = INT_MAX, RB = INT_MAX",
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+				.regs = {
+					.gpr[21] = INT_MAX,
+					.gpr[22] = INT_MAX,
+				}
+			},
+			{
+				.descr = "RA = UINT_MAX, RB = UINT_MAX",
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+				.regs = {
+					.gpr[21] = UINT_MAX,
+					.gpr[22] = UINT_MAX,
+				}
+			},
+			{
+				.descr = "RA = UINT_MAX, RB = 0x1",
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+				.regs = {
+					.gpr[21] = UINT_MAX,
+					.gpr[22] = 0x1,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN",
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN | (uint)INT_MIN,
+					.gpr[22] = LONG_MIN | (uint)INT_MIN,
+				}
+			}
+		}
+	},
+	{
+		.mnemonic = "addc.",
+		.subtests = {
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MIN",
+				.flags = IGNORE_CCR,
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MIN,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = LONG_MAX, RB = LONG_MAX",
+				.flags = IGNORE_CCR,
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MAX,
+					.gpr[22] = LONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = ULONG_MAX, RB = ULONG_MAX",
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = ULONG_MAX,
+					.gpr[22] = ULONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = ULONG_MAX, RB = 0x1",
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = ULONG_MAX,
+					.gpr[22] = 0x1,
+				}
+			},
+			{
+				.descr = "RA = INT_MIN, RB = INT_MIN",
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = INT_MIN,
+					.gpr[22] = INT_MIN,
+				}
+			},
+			{
+				.descr = "RA = INT_MIN, RB = INT_MAX",
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = INT_MIN,
+					.gpr[22] = INT_MAX,
+				}
+			},
+			{
+				.descr = "RA = INT_MAX, RB = INT_MAX",
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = INT_MAX,
+					.gpr[22] = INT_MAX,
+				}
+			},
+			{
+				.descr = "RA = UINT_MAX, RB = UINT_MAX",
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = UINT_MAX,
+					.gpr[22] = UINT_MAX,
+				}
+			},
+			{
+				.descr = "RA = UINT_MAX, RB = 0x1",
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = UINT_MAX,
+					.gpr[22] = 0x1,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN",
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN | (uint)INT_MIN,
+					.gpr[22] = LONG_MIN | (uint)INT_MIN,
+				}
+			}
+		}
+	},
+	{
+		.mnemonic = "divde",
+		.subtests = {
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MIN",
+				.instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MIN,
+				}
+			},
+			{
+				.descr = "RA = 1L, RB = 0",
+				.instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)),
+				.flags = IGNORE_GPR(20),
+				.regs = {
+					.gpr[21] = 1L,
+					.gpr[22] = 0,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MAX,
+				}
+			}
+		}
+	},
+	{
+		.mnemonic = "divde.",
+		.subtests = {
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MIN",
+				.instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MIN,
+				}
+			},
+			{
+				.descr = "RA = 1L, RB = 0",
+				.instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)),
+				.flags = IGNORE_GPR(20),
+				.regs = {
+					.gpr[21] = 1L,
+					.gpr[22] = 0,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MAX,
+				}
+			}
+		}
+	},
+	{
+		.mnemonic = "divdeu",
+		.subtests = {
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MIN",
+				.instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+				.flags = IGNORE_GPR(20),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MIN,
+				}
+			},
+			{
+				.descr = "RA = 1L, RB = 0",
+				.instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+				.flags = IGNORE_GPR(20),
+				.regs = {
+					.gpr[21] = 1L,
+					.gpr[22] = 0,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = LONG_MAX - 1, RB = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MAX - 1,
+					.gpr[22] = LONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN + 1, RB = LONG_MIN",
+				.instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+				.flags = IGNORE_GPR(20),
+				.regs = {
+					.gpr[21] = LONG_MIN + 1,
+					.gpr[22] = LONG_MIN,
+				}
+			}
+		}
+	},
+	{
+		.mnemonic = "divdeu.",
+		.subtests = {
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MIN",
+				.instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+				.flags = IGNORE_GPR(20),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MIN,
+				}
+			},
+			{
+				.descr = "RA = 1L, RB = 0",
+				.instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+				.flags = IGNORE_GPR(20),
+				.regs = {
+					.gpr[21] = 1L,
+					.gpr[22] = 0,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = LONG_MAX - 1, RB = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MAX - 1,
+					.gpr[22] = LONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN + 1, RB = LONG_MIN",
+				.instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+				.flags = IGNORE_GPR(20),
+				.regs = {
+					.gpr[21] = LONG_MIN + 1,
+					.gpr[22] = LONG_MIN,
+				}
+			}
+		}
+	},
+	{
+		.mnemonic = "paddi",
+		.cpu_feature = CPU_FTR_ARCH_31,
+		.subtests = {
+			{
+				.descr = "RA = LONG_MIN, SI = SI_MIN, R = 0",
+				.instr = TEST_PADDI(21, 22, SI_MIN, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = LONG_MIN,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN, SI = SI_MAX, R = 0",
+				.instr = TEST_PADDI(21, 22, SI_MAX, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = LONG_MIN,
+				}
+			},
+			{
+				.descr = "RA = LONG_MAX, SI = SI_MAX, R = 0",
+				.instr = TEST_PADDI(21, 22, SI_MAX, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = LONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = ULONG_MAX, SI = SI_UMAX, R = 0",
+				.instr = TEST_PADDI(21, 22, SI_UMAX, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = ULONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = ULONG_MAX, SI = 0x1, R = 0",
+				.instr = TEST_PADDI(21, 22, 0x1, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = ULONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = INT_MIN, SI = SI_MIN, R = 0",
+				.instr = TEST_PADDI(21, 22, SI_MIN, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = INT_MIN,
+				}
+			},
+			{
+				.descr = "RA = INT_MIN, SI = SI_MAX, R = 0",
+				.instr = TEST_PADDI(21, 22, SI_MAX, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = INT_MIN,
+				}
+			},
+			{
+				.descr = "RA = INT_MAX, SI = SI_MAX, R = 0",
+				.instr = TEST_PADDI(21, 22, SI_MAX, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = INT_MAX,
+				}
+			},
+			{
+				.descr = "RA = UINT_MAX, SI = 0x1, R = 0",
+				.instr = TEST_PADDI(21, 22, 0x1, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = UINT_MAX,
+				}
+			},
+			{
+				.descr = "RA = UINT_MAX, SI = SI_MAX, R = 0",
+				.instr = TEST_PADDI(21, 22, SI_MAX, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = UINT_MAX,
+				}
+			},
+			{
+				.descr = "RA is r0, SI = SI_MIN, R = 0",
+				.instr = TEST_PADDI(21, 0, SI_MIN, 0),
+				.regs = {
+					.gpr[21] = 0x0,
+				}
+			},
+			{
+				.descr = "RA = 0, SI = SI_MIN, R = 0",
+				.instr = TEST_PADDI(21, 22, SI_MIN, 0),
+				.regs = {
+					.gpr[21] = 0x0,
+					.gpr[22] = 0x0,
+				}
+			},
+			{
+				.descr = "RA is r0, SI = 0, R = 1",
+				.instr = TEST_PADDI(21, 0, 0, 1),
+				.regs = {
+					.gpr[21] = 0,
+				}
+			},
+			{
+				.descr = "RA is r0, SI = SI_MIN, R = 1",
+				.instr = TEST_PADDI(21, 0, SI_MIN, 1),
+				.regs = {
+					.gpr[21] = 0,
+				}
+			},
+			/* Invalid instruction form with R = 1 and RA != 0 */
+			{
+				.descr = "RA = R22(0), SI = 0, R = 1",
+				.instr = TEST_PADDI(21, 22, 0, 1),
+				.flags = NEGATIVE_TEST,
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = 0,
+				}
+			}
+		}
+	}
+};
+
+static int __init emulate_compute_instr(struct pt_regs *regs,
+					ppc_inst_t instr,
+					bool negative)
+{
+	int analysed;
+	struct instruction_op op;
+
+	if (!regs || !ppc_inst_val(instr))
+		return -EINVAL;
+
+	/* This is not a return frame regs */
+	regs->nip = patch_site_addr(&patch__exec_instr);
+
+	analysed = analyse_instr(&op, regs, instr);
+	if (analysed != 1 || GETTYPE(op.type) != COMPUTE) {
+		if (negative)
+			return -EFAULT;
+		pr_info("emulation failed, instruction = %08lx\n", ppc_inst_as_ulong(instr));
+		return -EFAULT;
+	}
+	if (analysed == 1 && negative)
+		pr_info("negative test failed, instruction = %08lx\n", ppc_inst_as_ulong(instr));
+	if (!negative)
+		emulate_update_regs(regs, &op);
+	return 0;
+}
+
+static int __init execute_compute_instr(struct pt_regs *regs,
+					ppc_inst_t instr)
+{
+	extern int exec_instr(struct pt_regs *regs);
+
+	if (!regs || !ppc_inst_val(instr))
+		return -EINVAL;
+
+	/* Patch the NOP with the actual instruction */
+	patch_instruction_site(&patch__exec_instr, instr);
+	if (exec_instr(regs)) {
+		pr_info("execution failed, instruction = %08lx\n", ppc_inst_as_ulong(instr));
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+#define gpr_mismatch(gprn, exp, got)	\
+	pr_info("GPR%u mismatch, exp = 0x%016lx, got = 0x%016lx\n",	\
+		gprn, exp, got)
+
+#define reg_mismatch(name, exp, got)	\
+	pr_info("%s mismatch, exp = 0x%016lx, got = 0x%016lx\n",	\
+		name, exp, got)
+
+static void __init run_tests_compute(void)
+{
+	unsigned long flags;
+	struct compute_test *test;
+	struct pt_regs *regs, exp, got;
+	unsigned int i, j, k;
+	ppc_inst_t instr;
+	bool ignore_gpr, ignore_xer, ignore_ccr, passed, rc, negative;
+
+	for (i = 0; i < ARRAY_SIZE(compute_tests); i++) {
+		test = &compute_tests[i];
+
+		if (test->cpu_feature && !early_cpu_has_feature(test->cpu_feature)) {
+			show_result(test->mnemonic, "SKIP (!CPU_FTR)");
+			continue;
+		}
+
+		for (j = 0; j < MAX_SUBTESTS && test->subtests[j].descr; j++) {
+			instr = test->subtests[j].instr;
+			flags = test->subtests[j].flags;
+			regs = &test->subtests[j].regs;
+			negative = flags & NEGATIVE_TEST;
+			ignore_xer = flags & IGNORE_XER;
+			ignore_ccr = flags & IGNORE_CCR;
+			passed = true;
+
+			memcpy(&exp, regs, sizeof(struct pt_regs));
+			memcpy(&got, regs, sizeof(struct pt_regs));
+
+			/*
+			 * Set a compatible MSR value explicitly to ensure
+			 * that XER and CR bits are updated appropriately
+			 */
+			exp.msr = MSR_KERNEL;
+			got.msr = MSR_KERNEL;
+
+			rc = emulate_compute_instr(&got, instr, negative) != 0;
+			if (negative) {
+				/* skip executing instruction */
+				passed = rc;
+				goto print;
+			} else if (rc || execute_compute_instr(&exp, instr)) {
+				passed = false;
+				goto print;
+			}
+
+			/* Verify GPR values */
+			for (k = 0; k < 32; k++) {
+				ignore_gpr = flags & IGNORE_GPR(k);
+				if (!ignore_gpr && exp.gpr[k] != got.gpr[k]) {
+					passed = false;
+					gpr_mismatch(k, exp.gpr[k], got.gpr[k]);
+				}
+			}
+
+			/* Verify LR value */
+			if (exp.link != got.link) {
+				passed = false;
+				reg_mismatch("LR", exp.link, got.link);
+			}
+
+			/* Verify XER value */
+			if (!ignore_xer && exp.xer != got.xer) {
+				passed = false;
+				reg_mismatch("XER", exp.xer, got.xer);
+			}
+
+			/* Verify CR value */
+			if (!ignore_ccr && exp.ccr != got.ccr) {
+				passed = false;
+				reg_mismatch("CR", exp.ccr, got.ccr);
+			}
+
+print:
+			show_result_with_descr(test->mnemonic,
+					       test->subtests[j].descr,
+					       passed ? "PASS" : "FAIL");
+		}
+	}
+}
+
+static int __init test_emulate_step(void)
+{
+	printk(KERN_INFO "Running instruction emulation self-tests ...\n");
+	run_tests_load_store();
+	run_tests_compute();
+
+	return 0;
+}
+late_initcall(test_emulate_step);
diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S
new file mode 100644
index 0000000000..e2b646a4f7
--- /dev/null
+++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Non-emulated single-stepping support (currently limited to basic integer
+ * computations) used to validate the instruction emulation infrastructure.
+ *
+ * Copyright (C) 2019 IBM Corporation
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/ppc_asm.h>
+#include <asm/code-patching-asm.h>
+#include <linux/errno.h>
+
+/* int exec_instr(struct pt_regs *regs) */
+_GLOBAL(exec_instr)
+
+	/*
+	 * Stack frame layout (INT_FRAME_SIZE bytes)
+	 *   In-memory pt_regs	(SP + STACK_INT_FRAME_REGS)
+	 *   Scratch space	(SP + 8)
+	 *   Back chain		(SP + 0)
+	 */
+
+	/*
+	 * Allocate a new stack frame with enough space to hold the register
+	 * states in an in-memory pt_regs and also create the back chain to
+	 * the caller's stack frame.
+	 */
+	stdu	r1, -INT_FRAME_SIZE(r1)
+
+	/*
+	 * Save non-volatile GPRs on stack. This includes TOC pointer (GPR2)
+	 * and local variables (GPR14 to GPR31). The register for the pt_regs
+	 * parameter (GPR3) is saved additionally to ensure that the resulting
+	 * register state can still be saved even if GPR3 gets overwritten
+	 * when loading the initial register state for the test instruction.
+	 * The stack pointer (GPR1) and the thread pointer (GPR13) are not
+	 * saved as these should not be modified anyway.
+	 */
+	SAVE_GPRS(2, 3, r1)
+	SAVE_NVGPRS(r1)
+
+	/*
+	 * Save LR on stack to ensure that the return address is available
+	 * even if it gets overwritten by the test instruction.
+	 */
+	mflr	r0
+	std	r0, _LINK(r1)
+
+	/*
+	 * Save CR on stack. For simplicity, the entire register is saved
+	 * even though only fields 2 to 4 are non-volatile.
+	 */
+	mfcr	r0
+	std	r0, _CCR(r1)
+
+	/*
+	 * Load register state for the test instruction without touching the
+	 * critical non-volatile registers. The register state is passed as a
+	 * pointer to a pt_regs instance.
+	 */
+	subi	r31, r3, GPR0
+
+	/* Load LR from pt_regs */
+	ld	r0, _LINK(r31)
+	mtlr	r0
+
+	/* Load CR from pt_regs */
+	ld	r0, _CCR(r31)
+	mtcr	r0
+
+	/* Load XER from pt_regs */
+	ld	r0, _XER(r31)
+	mtxer	r0
+
+	/* Load GPRs from pt_regs */
+	REST_GPR(0, r31)
+	REST_GPRS(2, 12, r31)
+	REST_NVGPRS(r31)
+
+	/* Placeholder for the test instruction */
+	.balign 64
+1:	nop
+	nop
+	patch_site 1b patch__exec_instr
+
+	/*
+	 * Since GPR3 is overwritten, temporarily restore it back to its
+	 * original state, i.e. the pointer to pt_regs, to ensure that the
+	 * resulting register state can be saved. Before doing this, a copy
+	 * of it is created in the scratch space which is used later on to
+	 * save it to pt_regs.
+	 */
+	std	r3, 8(r1)
+	REST_GPR(3, r1)
+
+	/* Save resulting GPR state to pt_regs */
+	subi	r3, r3, GPR0
+	SAVE_GPR(0, r3)
+	SAVE_GPR(2, r3)
+	SAVE_GPRS(4, 12, r3)
+	SAVE_NVGPRS(r3)
+
+	/* Save resulting LR to pt_regs */
+	mflr	r0
+	std	r0, _LINK(r3)
+
+	/* Save resulting CR to pt_regs */
+	mfcr	r0
+	std	r0, _CCR(r3)
+
+	/* Save resulting XER to pt_regs */
+	mfxer	r0
+	std	r0, _XER(r3)
+
+	/* Restore resulting GPR3 from scratch space and save it to pt_regs */
+	ld	r0, 8(r1)
+	std	r0, GPR3(r3)
+
+	/* Set return value to denote execution success */
+	li	r3, 0
+
+	/* Continue */
+	b	3f
+
+	/* Set return value to denote execution failure */
+2:	li	r3, -EFAULT
+
+	/* Restore the non-volatile GPRs from stack */
+3:	REST_GPR(2, r1)
+	REST_NVGPRS(r1)
+
+	/* Restore LR from stack to be able to return */
+	ld	r0, _LINK(r1)
+	mtlr	r0
+
+	/* Restore CR from stack */
+	ld	r0, _CCR(r1)
+	mtcr	r0
+
+	/* Tear down stack frame */
+	addi	r1, r1, INT_FRAME_SIZE
+
+	/* Return */
+	blr
+
+	/* Setup exception table */
+	EX_TABLE(1b, 2b)
+
+_ASM_NOKPROBE_SYMBOL(exec_instr)
diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c
new file mode 100644
index 0000000000..d491da8d18
--- /dev/null
+++ b/arch/powerpc/lib/vmx-helper.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *
+ * Copyright (C) IBM Corporation, 2011
+ *
+ * Authors: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
+ *          Anton Blanchard <anton@au.ibm.com>
+ */
+#include <linux/uaccess.h>
+#include <linux/hardirq.h>
+#include <asm/switch_to.h>
+
+int enter_vmx_usercopy(void)
+{
+	if (in_interrupt())
+		return 0;
+
+	preempt_disable();
+	/*
+	 * We need to disable page faults as they can call schedule and
+	 * thus make us lose the VMX context. So on page faults, we just
+	 * fail which will cause a fallback to the normal non-vmx copy.
+	 */
+	pagefault_disable();
+
+	enable_kernel_altivec();
+
+	return 1;
+}
+
+/*
+ * This function must return 0 because we tail call optimise when calling
+ * from __copy_tofrom_user_power7 which returns 0 on success.
+ */
+int exit_vmx_usercopy(void)
+{
+	disable_kernel_altivec();
+	pagefault_enable();
+	preempt_enable_no_resched();
+	/*
+	 * Must never explicitly call schedule (including preempt_enable())
+	 * while in a kuap-unlocked user copy, because the AMR register will
+	 * not be saved and restored across context switch. However preempt
+	 * kernels need to be preempted as soon as possible if need_resched is
+	 * set and we are preemptible. The hack here is to schedule a
+	 * decrementer to fire here and reschedule for us if necessary.
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT) && need_resched())
+		set_dec(1);
+	return 0;
+}
+
+int enter_vmx_ops(void)
+{
+	if (in_interrupt())
+		return 0;
+
+	preempt_disable();
+
+	enable_kernel_altivec();
+
+	return 1;
+}
+
+/*
+ * All calls to this function will be optimised into tail calls. We are
+ * passed a pointer to the destination which we return as required by a
+ * memcpy implementation.
+ */
+void *exit_vmx_ops(void *dest)
+{
+	disable_kernel_altivec();
+	preempt_enable();
+	return dest;
+}
diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c
new file mode 100644
index 0000000000..aab49d056d
--- /dev/null
+++ b/arch/powerpc/lib/xor_vmx.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+
+/*
+ * Sparse (as at v0.5.0) gets very, very confused by this file.
+ * Make it a bit simpler for it.
+ */
+#if !defined(__CHECKER__)
+#include <altivec.h>
+#else
+#define vec_xor(a, b) a ^ b
+#define vector __attribute__((vector_size(16)))
+#endif
+
+#include "xor_vmx.h"
+
+typedef vector signed char unative_t;
+
+#define DEFINE(V)				\
+	unative_t *V = (unative_t *)V##_in;	\
+	unative_t V##_0, V##_1, V##_2, V##_3
+
+#define LOAD(V)			\
+	do {			\
+		V##_0 = V[0];	\
+		V##_1 = V[1];	\
+		V##_2 = V[2];	\
+		V##_3 = V[3];	\
+	} while (0)
+
+#define STORE(V)		\
+	do {			\
+		V[0] = V##_0;	\
+		V[1] = V##_1;	\
+		V[2] = V##_2;	\
+		V[3] = V##_3;	\
+	} while (0)
+
+#define XOR(V1, V2)					\
+	do {						\
+		V1##_0 = vec_xor(V1##_0, V2##_0);	\
+		V1##_1 = vec_xor(V1##_1, V2##_1);	\
+		V1##_2 = vec_xor(V1##_2, V2##_2);	\
+		V1##_3 = vec_xor(V1##_3, V2##_3);	\
+	} while (0)
+
+void __xor_altivec_2(unsigned long bytes,
+		     unsigned long * __restrict v1_in,
+		     const unsigned long * __restrict v2_in)
+{
+	DEFINE(v1);
+	DEFINE(v2);
+	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+	do {
+		LOAD(v1);
+		LOAD(v2);
+		XOR(v1, v2);
+		STORE(v1);
+
+		v1 += 4;
+		v2 += 4;
+	} while (--lines > 0);
+}
+
+void __xor_altivec_3(unsigned long bytes,
+		     unsigned long * __restrict v1_in,
+		     const unsigned long * __restrict v2_in,
+		     const unsigned long * __restrict v3_in)
+{
+	DEFINE(v1);
+	DEFINE(v2);
+	DEFINE(v3);
+	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+	do {
+		LOAD(v1);
+		LOAD(v2);
+		LOAD(v3);
+		XOR(v1, v2);
+		XOR(v1, v3);
+		STORE(v1);
+
+		v1 += 4;
+		v2 += 4;
+		v3 += 4;
+	} while (--lines > 0);
+}
+
+void __xor_altivec_4(unsigned long bytes,
+		     unsigned long * __restrict v1_in,
+		     const unsigned long * __restrict v2_in,
+		     const unsigned long * __restrict v3_in,
+		     const unsigned long * __restrict v4_in)
+{
+	DEFINE(v1);
+	DEFINE(v2);
+	DEFINE(v3);
+	DEFINE(v4);
+	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+	do {
+		LOAD(v1);
+		LOAD(v2);
+		LOAD(v3);
+		LOAD(v4);
+		XOR(v1, v2);
+		XOR(v3, v4);
+		XOR(v1, v3);
+		STORE(v1);
+
+		v1 += 4;
+		v2 += 4;
+		v3 += 4;
+		v4 += 4;
+	} while (--lines > 0);
+}
+
+void __xor_altivec_5(unsigned long bytes,
+		     unsigned long * __restrict v1_in,
+		     const unsigned long * __restrict v2_in,
+		     const unsigned long * __restrict v3_in,
+		     const unsigned long * __restrict v4_in,
+		     const unsigned long * __restrict v5_in)
+{
+	DEFINE(v1);
+	DEFINE(v2);
+	DEFINE(v3);
+	DEFINE(v4);
+	DEFINE(v5);
+	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+	do {
+		LOAD(v1);
+		LOAD(v2);
+		LOAD(v3);
+		LOAD(v4);
+		LOAD(v5);
+		XOR(v1, v2);
+		XOR(v3, v4);
+		XOR(v1, v5);
+		XOR(v1, v3);
+		STORE(v1);
+
+		v1 += 4;
+		v2 += 4;
+		v3 += 4;
+		v4 += 4;
+		v5 += 4;
+	} while (--lines > 0);
+}
diff --git a/arch/powerpc/lib/xor_vmx.h b/arch/powerpc/lib/xor_vmx.h
new file mode 100644
index 0000000000..573c41d90d
--- /dev/null
+++ b/arch/powerpc/lib/xor_vmx.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Simple interface to link xor_vmx.c and xor_vmx_glue.c
+ *
+ * Separating these file ensures that no altivec instructions are run
+ * outside of the enable/disable altivec block.
+ */
+
+void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2);
+void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3);
+void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3,
+		     const unsigned long * __restrict p4);
+void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3,
+		     const unsigned long * __restrict p4,
+		     const unsigned long * __restrict p5);
diff --git a/arch/powerpc/lib/xor_vmx_glue.c b/arch/powerpc/lib/xor_vmx_glue.c
new file mode 100644
index 0000000000..35d917ece4
--- /dev/null
+++ b/arch/powerpc/lib/xor_vmx_glue.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Altivec XOR operations
+ *
+ * Copyright 2017 IBM Corp.
+ */
+
+#include <linux/preempt.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <asm/switch_to.h>
+#include <asm/xor_altivec.h>
+#include "xor_vmx.h"
+
+void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2)
+{
+	preempt_disable();
+	enable_kernel_altivec();
+	__xor_altivec_2(bytes, p1, p2);
+	disable_kernel_altivec();
+	preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_2);
+
+void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3)
+{
+	preempt_disable();
+	enable_kernel_altivec();
+	__xor_altivec_3(bytes, p1, p2, p3);
+	disable_kernel_altivec();
+	preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_3);
+
+void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3,
+		   const unsigned long * __restrict p4)
+{
+	preempt_disable();
+	enable_kernel_altivec();
+	__xor_altivec_4(bytes, p1, p2, p3, p4);
+	disable_kernel_altivec();
+	preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_4);
+
+void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+		   const unsigned long * __restrict p2,
+		   const unsigned long * __restrict p3,
+		   const unsigned long * __restrict p4,
+		   const unsigned long * __restrict p5)
+{
+	preempt_disable();
+	enable_kernel_altivec();
+	__xor_altivec_5(bytes, p1, p2, p3, p4, p5);
+	disable_kernel_altivec();
+	preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_5);
diff --git a/arch/powerpc/math-emu/Makefile b/arch/powerpc/math-emu/Makefile
new file mode 100644
index 0000000000..603e59c3db
--- /dev/null
+++ b/arch/powerpc/math-emu/Makefile
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+math-emu-common-objs = math.o fre.o fsqrt.o fsqrts.o frsqrtes.o mtfsf.o mtfsfi.o
+obj-$(CONFIG_MATH_EMULATION_HW_UNIMPLEMENTED) += $(math-emu-common-objs)
+obj-$(CONFIG_MATH_EMULATION_FULL) += $(math-emu-common-objs) fabs.o fadd.o \
+					fadds.o fcmpo.o fcmpu.o fctiw.o \
+					fctiwz.o fdiv.o fdivs.o  fmadd.o \
+					fmadds.o fmsub.o fmsubs.o fmul.o \
+					fmuls.o fnabs.o fneg.o fnmadd.o \
+					fnmadds.o fnmsub.o fnmsubs.o fres.o \
+					frsp.o fsel.o lfs.o frsqrte.o fsub.o \
+					fsubs.o  mcrfs.o mffs.o mtfsb0.o \
+					mtfsb1.o stfiwx.o stfs.o math.o \
+					fmr.o lfd.o stfd.o
+
+obj-$(CONFIG_SPE)		+= math_efp.o
+
+CFLAGS_fabs.o = -fno-builtin-fabs
+CFLAGS_math.o = -fno-builtin-fabs
+
+ccflags-remove-y = -Wmissing-prototypes -Wmissing-declarations -Wunused-but-set-variable
+
+ifdef KBUILD_EXTRA_WARN
+CFLAGS_math.o += -Wmissing-prototypes -Wmissing-declarations -Wunused-but-set-variable
+CFLAGS_math_efp.o += -Wmissing-prototypes -Wmissing-declarations -Wunused-but-set-variable
+endif
diff --git a/arch/powerpc/math-emu/fabs.c b/arch/powerpc/math-emu/fabs.c
new file mode 100644
index 0000000000..3b62fd70b7
--- /dev/null
+++ b/arch/powerpc/math-emu/fabs.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+int
+fabs(u32 *frD, u32 *frB)
+{
+	frD[0] = frB[0] & 0x7fffffff;
+	frD[1] = frB[1];
+
+#ifdef DEBUG
+	printk("%s: D %p, B %p: ", __func__, frD, frB);
+	dump_double(frD);
+	printk("\n");
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/fadd.c b/arch/powerpc/math-emu/fadd.c
new file mode 100644
index 0000000000..727e49ad55
--- /dev/null
+++ b/arch/powerpc/math-emu/fadd.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+int
+fadd(void *frD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p\n", __func__, frD, frA, frB);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+	FP_UNPACK_DP(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	FP_ADD_D(R, A, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	__FP_PACK_D(frD, R);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/fadds.c b/arch/powerpc/math-emu/fadds.c
new file mode 100644
index 0000000000..45254be056
--- /dev/null
+++ b/arch/powerpc/math-emu/fadds.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+#include <math-emu/single.h>
+
+int
+fadds(void *frD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p\n", __func__, frD, frA, frB);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+	FP_UNPACK_DP(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	FP_ADD_D(R, A, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	__FP_PACK_DS(frD, R);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/fcmpo.c b/arch/powerpc/math-emu/fcmpo.c
new file mode 100644
index 0000000000..f437d0896c
--- /dev/null
+++ b/arch/powerpc/math-emu/fcmpo.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+int
+fcmpo(u32 *ccr, int crfD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_EX;
+	int code[4] = { (1 << 3), (1 << 1), (1 << 2), (1 << 0) };
+	long cmp;
+
+#ifdef DEBUG
+	printk("%s: %p (%08x) %d %p %p\n", __func__, ccr, *ccr, crfD, frA, frB);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+	FP_UNPACK_DP(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	if (A_c == FP_CLS_NAN || B_c == FP_CLS_NAN)
+		FP_SET_EXCEPTION(EFLAG_VXVC);
+
+	FP_CMP_D(cmp, A, B, 2);
+	cmp = code[(cmp + 1) & 3];
+
+	__FPU_FPSCR &= ~(0x1f000);
+	__FPU_FPSCR |= (cmp << 12);
+
+	*ccr &= ~(15 << ((7 - crfD) << 2));
+	*ccr |= (cmp << ((7 - crfD) << 2));
+
+#ifdef DEBUG
+	printk("CR: %08x\n", *ccr);
+#endif
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/fcmpu.c b/arch/powerpc/math-emu/fcmpu.c
new file mode 100644
index 0000000000..65631fa5dc
--- /dev/null
+++ b/arch/powerpc/math-emu/fcmpu.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+int
+fcmpu(u32 *ccr, int crfD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_EX;
+	int code[4] = { (1 << 3), (1 << 1), (1 << 2), (1 << 0) };
+	long cmp;
+
+#ifdef DEBUG
+	printk("%s: %p (%08x) %d %p %p\n", __func__, ccr, *ccr, crfD, frA, frB);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+	FP_UNPACK_DP(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	FP_CMP_D(cmp, A, B, 2);
+	cmp = code[(cmp + 1) & 3];
+
+	__FPU_FPSCR &= ~(0x1f000);
+	__FPU_FPSCR |= (cmp << 12);
+
+	*ccr &= ~(15 << ((7 - crfD) << 2));
+	*ccr |= (cmp << ((7 - crfD) << 2));
+
+#ifdef DEBUG
+	printk("CR: %08x\n", *ccr);
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/fctiw.c b/arch/powerpc/math-emu/fctiw.c
new file mode 100644
index 0000000000..ebb0f11e73
--- /dev/null
+++ b/arch/powerpc/math-emu/fctiw.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+int
+fctiw(u32 *frD, void *frB)
+{
+	FP_DECL_D(B);
+	FP_DECL_EX;
+	unsigned int r;
+
+	FP_UNPACK_DP(B, frB);
+	FP_TO_INT_D(r, B, 32, 1);
+	frD[1] = r;
+
+#ifdef DEBUG
+	printk("%s: D %p, B %p: ", __func__, frD, frB);
+	dump_double(frD);
+	printk("\n");
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/fctiwz.c b/arch/powerpc/math-emu/fctiwz.c
new file mode 100644
index 0000000000..426271c4f0
--- /dev/null
+++ b/arch/powerpc/math-emu/fctiwz.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+int
+fctiwz(u32 *frD, void *frB)
+{
+	FP_DECL_D(B);
+	FP_DECL_EX;
+	u32 fpscr;
+	unsigned int r;
+
+	fpscr = __FPU_FPSCR;
+	__FPU_FPSCR &= ~(3);
+	__FPU_FPSCR |= FP_RND_ZERO;
+
+	FP_UNPACK_DP(B, frB);
+	FP_TO_INT_D(r, B, 32, 1);
+	frD[1] = r;
+
+	__FPU_FPSCR = fpscr;
+
+#ifdef DEBUG
+	printk("%s: D %p, B %p: ", __func__, frD, frB);
+	dump_double(frD);
+	printk("\n");
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/fdiv.c b/arch/powerpc/math-emu/fdiv.c
new file mode 100644
index 0000000000..6e64ece2d3
--- /dev/null
+++ b/arch/powerpc/math-emu/fdiv.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+int
+fdiv(void *frD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p\n", __func__, frD, frA, frB);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+	FP_UNPACK_DP(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	if (A_c == FP_CLS_ZERO && B_c == FP_CLS_ZERO) {
+		FP_SET_EXCEPTION(EFLAG_VXZDZ);
+#ifdef DEBUG
+		printk("%s: FPSCR_VXZDZ raised\n", __func__);
+#endif
+	}
+	if (A_c == FP_CLS_INF && B_c == FP_CLS_INF) {
+		FP_SET_EXCEPTION(EFLAG_VXIDI);
+#ifdef DEBUG
+		printk("%s: FPSCR_VXIDI raised\n", __func__);
+#endif
+	}
+
+	if (B_c == FP_CLS_ZERO && A_c != FP_CLS_ZERO) {
+		FP_SET_EXCEPTION(EFLAG_DIVZERO);
+		if (__FPU_TRAP_P(EFLAG_DIVZERO))
+			return FP_CUR_EXCEPTIONS;
+	}
+	FP_DIV_D(R, A, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	__FP_PACK_D(frD, R);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/fdivs.c b/arch/powerpc/math-emu/fdivs.c
new file mode 100644
index 0000000000..f9f7adf462
--- /dev/null
+++ b/arch/powerpc/math-emu/fdivs.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+#include <math-emu/single.h>
+
+int
+fdivs(void *frD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p\n", __func__, frD, frA, frB);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+	FP_UNPACK_DP(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	if (A_c == FP_CLS_ZERO && B_c == FP_CLS_ZERO) {
+		FP_SET_EXCEPTION(EFLAG_VXZDZ);
+#ifdef DEBUG
+		printk("%s: FPSCR_VXZDZ raised\n", __func__);
+#endif
+	}
+	if (A_c == FP_CLS_INF && B_c == FP_CLS_INF) {
+		FP_SET_EXCEPTION(EFLAG_VXIDI);
+#ifdef DEBUG
+		printk("%s: FPSCR_VXIDI raised\n", __func__);
+#endif
+	}
+
+	if (B_c == FP_CLS_ZERO && A_c != FP_CLS_ZERO) {
+		FP_SET_EXCEPTION(EFLAG_DIVZERO);
+		if (__FPU_TRAP_P(EFLAG_DIVZERO))
+			return FP_CUR_EXCEPTIONS;
+	}
+
+	FP_DIV_D(R, A, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	__FP_PACK_DS(frD, R);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/fmadd.c b/arch/powerpc/math-emu/fmadd.c
new file mode 100644
index 0000000000..e8458aed5e
--- /dev/null
+++ b/arch/powerpc/math-emu/fmadd.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+int
+fmadd(void *frD, void *frA, void *frB, void *frC)
+{
+	FP_DECL_D(R);
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(C);
+	FP_DECL_D(T);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __func__, frD, frA, frB, frC);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+	FP_UNPACK_DP(B, frB);
+	FP_UNPACK_DP(C, frC);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+	printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+
+	if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+                FP_SET_EXCEPTION(EFLAG_VXIMZ);
+
+	FP_MUL_D(T, A, C);
+
+	if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		FP_SET_EXCEPTION(EFLAG_VXISI);
+
+	FP_ADD_D(R, T, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	__FP_PACK_D(frD, R);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/fmadds.c b/arch/powerpc/math-emu/fmadds.c
new file mode 100644
index 0000000000..a6d3f9842f
--- /dev/null
+++ b/arch/powerpc/math-emu/fmadds.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+#include <math-emu/single.h>
+
+int
+fmadds(void *frD, void *frA, void *frB, void *frC)
+{
+	FP_DECL_D(R);
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(C);
+	FP_DECL_D(T);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __func__, frD, frA, frB, frC);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+	FP_UNPACK_DP(B, frB);
+	FP_UNPACK_DP(C, frC);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+	printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+
+	if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+                FP_SET_EXCEPTION(EFLAG_VXIMZ);
+
+	FP_MUL_D(T, A, C);
+
+	if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		FP_SET_EXCEPTION(EFLAG_VXISI);
+
+	FP_ADD_D(R, T, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	__FP_PACK_DS(frD, R);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/fmr.c b/arch/powerpc/math-emu/fmr.c
new file mode 100644
index 0000000000..48c6437428
--- /dev/null
+++ b/arch/powerpc/math-emu/fmr.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+int
+fmr(u32 *frD, u32 *frB)
+{
+	frD[0] = frB[0];
+	frD[1] = frB[1];
+
+#ifdef DEBUG
+	printk("%s: D %p, B %p: ", __func__, frD, frB);
+	dump_double(frD);
+	printk("\n");
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/fmsub.c b/arch/powerpc/math-emu/fmsub.c
new file mode 100644
index 0000000000..605cda49e7
--- /dev/null
+++ b/arch/powerpc/math-emu/fmsub.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+int
+fmsub(void *frD, void *frA, void *frB, void *frC)
+{
+	FP_DECL_D(R);
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(C);
+	FP_DECL_D(T);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __func__, frD, frA, frB, frC);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+	FP_UNPACK_DP(B, frB);
+	FP_UNPACK_DP(C, frC);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+	printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+
+	if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+		FP_SET_EXCEPTION(EFLAG_VXIMZ);
+
+	FP_MUL_D(T, A, C);
+
+	if (B_c != FP_CLS_NAN)
+		B_s ^= 1;
+
+	if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		FP_SET_EXCEPTION(EFLAG_VXISI);
+
+	FP_ADD_D(R, T, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	__FP_PACK_D(frD, R);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/fmsubs.c b/arch/powerpc/math-emu/fmsubs.c
new file mode 100644
index 0000000000..f26ec0acf0
--- /dev/null
+++ b/arch/powerpc/math-emu/fmsubs.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+#include <math-emu/single.h>
+
+int
+fmsubs(void *frD, void *frA, void *frB, void *frC)
+{
+	FP_DECL_D(R);
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(C);
+	FP_DECL_D(T);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __func__, frD, frA, frB, frC);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+	FP_UNPACK_DP(B, frB);
+	FP_UNPACK_DP(C, frC);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+	printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+
+	if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+		FP_SET_EXCEPTION(EFLAG_VXIMZ);
+
+	FP_MUL_D(T, A, C);
+
+	if (B_c != FP_CLS_NAN)
+		B_s ^= 1;
+
+	if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		FP_SET_EXCEPTION(EFLAG_VXISI);
+
+	FP_ADD_D(R, T, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	__FP_PACK_DS(frD, R);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/fmul.c b/arch/powerpc/math-emu/fmul.c
new file mode 100644
index 0000000000..d114f7acdb
--- /dev/null
+++ b/arch/powerpc/math-emu/fmul.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+int
+fmul(void *frD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p\n", __func__, frD, frA, frB);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+	FP_UNPACK_DP(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n",
+	       A_s, A_f1, A_f0, A_e, A_c, A_f1, A_f0, A_e + 1023);
+	printk("B: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n",
+	       B_s, B_f1, B_f0, B_e, B_c, B_f1, B_f0, B_e + 1023);
+#endif
+
+	if ((A_c == FP_CLS_INF && B_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && B_c == FP_CLS_INF))
+		FP_SET_EXCEPTION(EFLAG_VXIMZ);
+
+	FP_MUL_D(R, A, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n",
+	       R_s, R_f1, R_f0, R_e, R_c, R_f1, R_f0, R_e + 1023);
+#endif
+
+	__FP_PACK_D(frD, R);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/fmuls.c b/arch/powerpc/math-emu/fmuls.c
new file mode 100644
index 0000000000..aaeba0acb4
--- /dev/null
+++ b/arch/powerpc/math-emu/fmuls.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+#include <math-emu/single.h>
+
+int
+fmuls(void *frD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p\n", __func__, frD, frA, frB);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+	FP_UNPACK_DP(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n",
+	       A_s, A_f1, A_f0, A_e, A_c, A_f1, A_f0, A_e + 1023);
+	printk("B: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n",
+	       B_s, B_f1, B_f0, B_e, B_c, B_f1, B_f0, B_e + 1023);
+#endif
+
+	if ((A_c == FP_CLS_INF && B_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && B_c == FP_CLS_INF))
+		FP_SET_EXCEPTION(EFLAG_VXIMZ);
+
+	FP_MUL_D(R, A, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n",
+	       R_s, R_f1, R_f0, R_e, R_c, R_f1, R_f0, R_e + 1023);
+#endif
+
+	__FP_PACK_DS(frD, R);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/fnabs.c b/arch/powerpc/math-emu/fnabs.c
new file mode 100644
index 0000000000..6c439e6c2c
--- /dev/null
+++ b/arch/powerpc/math-emu/fnabs.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+int
+fnabs(u32 *frD, u32 *frB)
+{
+	frD[0] = frB[0] | 0x80000000;
+	frD[1] = frB[1];
+
+#ifdef DEBUG
+	printk("%s: D %p, B %p: ", __func__, frD, frB);
+	dump_double(frD);
+	printk("\n");
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/fneg.c b/arch/powerpc/math-emu/fneg.c
new file mode 100644
index 0000000000..791e724f71
--- /dev/null
+++ b/arch/powerpc/math-emu/fneg.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+int
+fneg(u32 *frD, u32 *frB)
+{
+	frD[0] = frB[0] ^ 0x80000000;
+	frD[1] = frB[1];
+
+#ifdef DEBUG
+	printk("%s: D %p, B %p: ", __func__, frD, frB);
+	dump_double(frD);
+	printk("\n");
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/fnmadd.c b/arch/powerpc/math-emu/fnmadd.c
new file mode 100644
index 0000000000..02a7099b26
--- /dev/null
+++ b/arch/powerpc/math-emu/fnmadd.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+int
+fnmadd(void *frD, void *frA, void *frB, void *frC)
+{
+	FP_DECL_D(R);
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(C);
+	FP_DECL_D(T);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __func__, frD, frA, frB, frC);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+	FP_UNPACK_DP(B, frB);
+	FP_UNPACK_DP(C, frC);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+	printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+
+	if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+                FP_SET_EXCEPTION(EFLAG_VXIMZ);
+
+	FP_MUL_D(T, A, C);
+
+	if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		FP_SET_EXCEPTION(EFLAG_VXISI);
+
+	FP_ADD_D(R, T, B);
+
+	if (R_c != FP_CLS_NAN)
+		R_s ^= 1;
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	__FP_PACK_D(frD, R);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/fnmadds.c b/arch/powerpc/math-emu/fnmadds.c
new file mode 100644
index 0000000000..ce42a7a44d
--- /dev/null
+++ b/arch/powerpc/math-emu/fnmadds.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+#include <math-emu/single.h>
+
+int
+fnmadds(void *frD, void *frA, void *frB, void *frC)
+{
+	FP_DECL_D(R);
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(C);
+	FP_DECL_D(T);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __func__, frD, frA, frB, frC);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+	FP_UNPACK_DP(B, frB);
+	FP_UNPACK_DP(C, frC);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+	printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+
+	if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+                FP_SET_EXCEPTION(EFLAG_VXIMZ);
+
+	FP_MUL_D(T, A, C);
+
+	if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		FP_SET_EXCEPTION(EFLAG_VXISI);
+
+	FP_ADD_D(R, T, B);
+
+	if (R_c != FP_CLS_NAN)
+		R_s ^= 1;
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	__FP_PACK_DS(frD, R);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/fnmsub.c b/arch/powerpc/math-emu/fnmsub.c
new file mode 100644
index 0000000000..eade699c51
--- /dev/null
+++ b/arch/powerpc/math-emu/fnmsub.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+int
+fnmsub(void *frD, void *frA, void *frB, void *frC)
+{
+	FP_DECL_D(R);
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(C);
+	FP_DECL_D(T);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __func__, frD, frA, frB, frC);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+	FP_UNPACK_DP(B, frB);
+	FP_UNPACK_DP(C, frC);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+	printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+
+	if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+		FP_SET_EXCEPTION(EFLAG_VXIMZ);
+
+	FP_MUL_D(T, A, C);
+
+	if (B_c != FP_CLS_NAN)
+		B_s ^= 1;
+
+	if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		FP_SET_EXCEPTION(EFLAG_VXISI);
+
+	FP_ADD_D(R, T, B);
+
+	if (R_c != FP_CLS_NAN)
+		R_s ^= 1;
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	__FP_PACK_D(frD, R);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/fnmsubs.c b/arch/powerpc/math-emu/fnmsubs.c
new file mode 100644
index 0000000000..4e1f6c2b7c
--- /dev/null
+++ b/arch/powerpc/math-emu/fnmsubs.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+#include <math-emu/single.h>
+
+int
+fnmsubs(void *frD, void *frA, void *frB, void *frC)
+{
+	FP_DECL_D(R);
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(C);
+	FP_DECL_D(T);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __func__, frD, frA, frB, frC);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+	FP_UNPACK_DP(B, frB);
+	FP_UNPACK_DP(C, frC);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+	printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+
+	if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+	    (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+		FP_SET_EXCEPTION(EFLAG_VXIMZ);
+
+	FP_MUL_D(T, A, C);
+
+	if (B_c != FP_CLS_NAN)
+		B_s ^= 1;
+
+	if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		FP_SET_EXCEPTION(EFLAG_VXISI);
+
+	FP_ADD_D(R, T, B);
+
+	if (R_c != FP_CLS_NAN)
+		R_s ^= 1;
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	__FP_PACK_DS(frD, R);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/fre.c b/arch/powerpc/math-emu/fre.c
new file mode 100644
index 0000000000..584b16f533
--- /dev/null
+++ b/arch/powerpc/math-emu/fre.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+int fre(void *frD, void *frB)
+{
+#ifdef DEBUG
+	printk("%s: %p %p\n", __func__, frD, frB);
+#endif
+	return -ENOSYS;
+}
diff --git a/arch/powerpc/math-emu/fres.c b/arch/powerpc/math-emu/fres.c
new file mode 100644
index 0000000000..f7d5654ce7
--- /dev/null
+++ b/arch/powerpc/math-emu/fres.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+int
+fres(void *frD, void *frB)
+{
+#ifdef DEBUG
+	printk("%s: %p %p\n", __func__, frD, frB);
+#endif
+	return -ENOSYS;
+}
diff --git a/arch/powerpc/math-emu/frsp.c b/arch/powerpc/math-emu/frsp.c
new file mode 100644
index 0000000000..cb33e3d9bb
--- /dev/null
+++ b/arch/powerpc/math-emu/frsp.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+#include <math-emu/single.h>
+
+int
+frsp(void *frD, void *frB)
+{
+	FP_DECL_D(B);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: D %p, B %p\n", __func__, frD, frB);
+#endif
+
+	FP_UNPACK_DP(B, frB);
+
+#ifdef DEBUG
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	__FP_PACK_DS(frD, B);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/frsqrte.c b/arch/powerpc/math-emu/frsqrte.c
new file mode 100644
index 0000000000..72955b27c3
--- /dev/null
+++ b/arch/powerpc/math-emu/frsqrte.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+int
+frsqrte(void *frD, void *frB)
+{
+#ifdef DEBUG
+	printk("%s: %p %p\n", __func__, frD, frB);
+#endif
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/frsqrtes.c b/arch/powerpc/math-emu/frsqrtes.c
new file mode 100644
index 0000000000..a036f7b714
--- /dev/null
+++ b/arch/powerpc/math-emu/frsqrtes.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+int frsqrtes(void *frD, void *frB)
+{
+#ifdef DEBUG
+	printk("%s: %p %p\n", __func__, frD, frB);
+#endif
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/fsel.c b/arch/powerpc/math-emu/fsel.c
new file mode 100644
index 0000000000..b0d15e15a5
--- /dev/null
+++ b/arch/powerpc/math-emu/fsel.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+int
+fsel(u32 *frD, void *frA, u32 *frB, u32 *frC)
+{
+	FP_DECL_D(A);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __func__, frD, frA, frB, frC);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %08x %08x\n", frB[0], frB[1]);
+	printk("C: %08x %08x\n", frC[0], frC[1]);
+#endif
+
+	if (A_c == FP_CLS_NAN || (A_c != FP_CLS_ZERO && A_s)) {
+		frD[0] = frB[0];
+		frD[1] = frB[1];
+	} else {
+		frD[0] = frC[0];
+		frD[1] = frC[1];
+	}
+
+#ifdef DEBUG
+	printk("D: %08x.%08x\n", frD[0], frD[1]);
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/fsqrt.c b/arch/powerpc/math-emu/fsqrt.c
new file mode 100644
index 0000000000..0543859038
--- /dev/null
+++ b/arch/powerpc/math-emu/fsqrt.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+int
+fsqrt(void *frD, void *frB)
+{
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __func__, frD, frB);
+#endif
+
+	FP_UNPACK_DP(B, frB);
+
+#ifdef DEBUG
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	if (B_s && B_c != FP_CLS_ZERO)
+		FP_SET_EXCEPTION(EFLAG_VXSQRT);
+	if (B_c == FP_CLS_NAN)
+		FP_SET_EXCEPTION(EFLAG_VXSNAN);
+
+	FP_SQRT_D(R, B);
+
+#ifdef DEBUG
+	printk("R: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	__FP_PACK_D(frD, R);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/fsqrts.c b/arch/powerpc/math-emu/fsqrts.c
new file mode 100644
index 0000000000..1624f97c69
--- /dev/null
+++ b/arch/powerpc/math-emu/fsqrts.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+#include <math-emu/single.h>
+
+int
+fsqrts(void *frD, void *frB)
+{
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p %p\n", __func__, frD, frB);
+#endif
+
+	FP_UNPACK_DP(B, frB);
+
+#ifdef DEBUG
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	if (B_s && B_c != FP_CLS_ZERO)
+		FP_SET_EXCEPTION(EFLAG_VXSQRT);
+	if (B_c == FP_CLS_NAN)
+		FP_SET_EXCEPTION(EFLAG_VXSNAN);
+
+	FP_SQRT_D(R, B);
+
+#ifdef DEBUG
+	printk("R: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	__FP_PACK_DS(frD, R);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/fsub.c b/arch/powerpc/math-emu/fsub.c
new file mode 100644
index 0000000000..47a8f847b4
--- /dev/null
+++ b/arch/powerpc/math-emu/fsub.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+int
+fsub(void *frD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p\n", __func__, frD, frA, frB);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+	FP_UNPACK_DP(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	if (B_c != FP_CLS_NAN)
+		B_s ^= 1;
+
+	if (A_s != B_s && A_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		FP_SET_EXCEPTION(EFLAG_VXISI);
+
+	FP_ADD_D(R, A, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	__FP_PACK_D(frD, R);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/fsubs.c b/arch/powerpc/math-emu/fsubs.c
new file mode 100644
index 0000000000..fa1b3b18c3
--- /dev/null
+++ b/arch/powerpc/math-emu/fsubs.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+#include <math-emu/single.h>
+
+int
+fsubs(void *frD, void *frA, void *frB)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+#ifdef DEBUG
+	printk("%s: %p %p %p\n", __func__, frD, frA, frB);
+#endif
+
+	FP_UNPACK_DP(A, frA);
+	FP_UNPACK_DP(B, frB);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+	printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+
+	if (B_c != FP_CLS_NAN)
+		B_s ^= 1;
+
+	if (A_s != B_s && A_c == FP_CLS_INF && B_c == FP_CLS_INF)
+		FP_SET_EXCEPTION(EFLAG_VXISI);
+
+	FP_ADD_D(R, A, B);
+
+#ifdef DEBUG
+	printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	__FP_PACK_DS(frD, R);
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/lfd.c b/arch/powerpc/math-emu/lfd.c
new file mode 100644
index 0000000000..3a6b03d999
--- /dev/null
+++ b/arch/powerpc/math-emu/lfd.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/double.h>
+
+int
+lfd(void *frD, void *ea)
+{
+	if (copy_from_user(frD, ea, sizeof(double)))
+		return -EFAULT;
+#ifdef DEBUG
+	printk("%s: D %p, ea %p: ", __func__, frD, ea);
+	dump_double(frD);
+	printk("\n");
+#endif
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/lfs.c b/arch/powerpc/math-emu/lfs.c
new file mode 100644
index 0000000000..7fd3d0854c
--- /dev/null
+++ b/arch/powerpc/math-emu/lfs.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+#include <math-emu/single.h>
+
+int
+lfs(void *frD, void *ea)
+{
+	FP_DECL_D(R);
+	FP_DECL_S(A);
+	FP_DECL_EX;
+	float f;
+
+#ifdef DEBUG
+	printk("%s: D %p, ea %p\n", __func__, frD, ea);
+#endif
+
+	if (copy_from_user(&f, ea, sizeof(float)))
+		return -EFAULT;
+
+	FP_UNPACK_S(A, f);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %ld (%ld) [%08lx]\n", A_s, A_f, A_e, A_c,
+	       *(unsigned long *)&f);
+#endif
+
+	FP_CONV(D, S, 2, 1, R, A);
+
+#ifdef DEBUG
+	printk("R: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+
+	if (R_c == FP_CLS_NAN) {
+		R_e = _FP_EXPMAX_D;
+		_FP_PACK_RAW_2_P(D, frD, R);
+	} else {
+		__FP_PACK_D(frD, R);
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c
new file mode 100644
index 0000000000..936a9a1490
--- /dev/null
+++ b/arch/powerpc/math-emu/math.c
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 1999  Eddie C. Dost  (ecd@atecom.com)
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+
+#include <linux/uaccess.h>
+#include <asm/reg.h>
+#include <asm/switch_to.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/double.h>
+
+#define FLOATFUNC(x)	extern int x(void *, void *, void *, void *)
+
+/* The instructions list which may be not implemented by a hardware FPU */
+FLOATFUNC(fre);
+FLOATFUNC(frsqrtes);
+FLOATFUNC(fsqrt);
+FLOATFUNC(fsqrts);
+FLOATFUNC(mtfsf);
+FLOATFUNC(mtfsfi);
+
+#ifdef CONFIG_MATH_EMULATION_HW_UNIMPLEMENTED
+#undef FLOATFUNC
+#define FLOATFUNC(x)	static inline int x(void *op1, void *op2, void *op3, \
+						 void *op4) { return 0; }
+#endif
+
+FLOATFUNC(fadd);
+FLOATFUNC(fadds);
+FLOATFUNC(fdiv);
+FLOATFUNC(fdivs);
+FLOATFUNC(fmul);
+FLOATFUNC(fmuls);
+FLOATFUNC(fsub);
+FLOATFUNC(fsubs);
+
+FLOATFUNC(fmadd);
+FLOATFUNC(fmadds);
+FLOATFUNC(fmsub);
+FLOATFUNC(fmsubs);
+FLOATFUNC(fnmadd);
+FLOATFUNC(fnmadds);
+FLOATFUNC(fnmsub);
+FLOATFUNC(fnmsubs);
+
+FLOATFUNC(fctiw);
+FLOATFUNC(fctiwz);
+FLOATFUNC(frsp);
+
+FLOATFUNC(fcmpo);
+FLOATFUNC(fcmpu);
+
+FLOATFUNC(mcrfs);
+FLOATFUNC(mffs);
+FLOATFUNC(mtfsb0);
+FLOATFUNC(mtfsb1);
+
+FLOATFUNC(lfd);
+FLOATFUNC(lfs);
+
+FLOATFUNC(stfd);
+FLOATFUNC(stfs);
+FLOATFUNC(stfiwx);
+
+FLOATFUNC(fabs);
+FLOATFUNC(fmr);
+FLOATFUNC(fnabs);
+FLOATFUNC(fneg);
+
+/* Optional */
+FLOATFUNC(fres);
+FLOATFUNC(frsqrte);
+FLOATFUNC(fsel);
+
+
+#define OP31		0x1f		/*   31 */
+#define LFS		0x30		/*   48 */
+#define LFSU		0x31		/*   49 */
+#define LFD		0x32		/*   50 */
+#define LFDU		0x33		/*   51 */
+#define STFS		0x34		/*   52 */
+#define STFSU		0x35		/*   53 */
+#define STFD		0x36		/*   54 */
+#define STFDU		0x37		/*   55 */
+#define OP59		0x3b		/*   59 */
+#define OP63		0x3f		/*   63 */
+
+/* Opcode 31: */
+/* X-Form: */
+#define LFSX		0x217		/*  535 */
+#define LFSUX		0x237		/*  567 */
+#define LFDX		0x257		/*  599 */
+#define LFDUX		0x277		/*  631 */
+#define STFSX		0x297		/*  663 */
+#define STFSUX		0x2b7		/*  695 */
+#define STFDX		0x2d7		/*  727 */
+#define STFDUX		0x2f7		/*  759 */
+#define STFIWX		0x3d7		/*  983 */
+
+/* Opcode 59: */
+/* A-Form: */
+#define FDIVS		0x012		/*   18 */
+#define FSUBS		0x014		/*   20 */
+#define FADDS		0x015		/*   21 */
+#define FSQRTS		0x016		/*   22 */
+#define FRES		0x018		/*   24 */
+#define FMULS		0x019		/*   25 */
+#define FRSQRTES	0x01a		/*   26 */
+#define FMSUBS		0x01c		/*   28 */
+#define FMADDS		0x01d		/*   29 */
+#define FNMSUBS		0x01e		/*   30 */
+#define FNMADDS		0x01f		/*   31 */
+
+/* Opcode 63: */
+/* A-Form: */
+#define FDIV		0x012		/*   18 */
+#define FSUB		0x014		/*   20 */
+#define FADD		0x015		/*   21 */
+#define FSQRT		0x016		/*   22 */
+#define FSEL		0x017		/*   23 */
+#define FRE		0x018		/*   24 */
+#define FMUL		0x019		/*   25 */
+#define FRSQRTE		0x01a		/*   26 */
+#define FMSUB		0x01c		/*   28 */
+#define FMADD		0x01d		/*   29 */
+#define FNMSUB		0x01e		/*   30 */
+#define FNMADD		0x01f		/*   31 */
+
+/* X-Form: */
+#define FCMPU		0x000		/*    0	*/
+#define FRSP		0x00c		/*   12 */
+#define FCTIW		0x00e		/*   14 */
+#define FCTIWZ		0x00f		/*   15 */
+#define FCMPO		0x020		/*   32 */
+#define MTFSB1		0x026		/*   38 */
+#define FNEG		0x028		/*   40 */
+#define MCRFS		0x040		/*   64 */
+#define MTFSB0		0x046		/*   70 */
+#define FMR		0x048		/*   72 */
+#define MTFSFI		0x086		/*  134 */
+#define FNABS		0x088		/*  136 */
+#define FABS		0x108		/*  264 */
+#define MFFS		0x247		/*  583 */
+#define MTFSF		0x2c7		/*  711 */
+
+
+#define AB	2
+#define AC	3
+#define ABC	4
+#define D	5
+#define DU	6
+#define X	7
+#define XA	8
+#define XB	9
+#define XCR	11
+#define XCRB	12
+#define XCRI	13
+#define XCRL	16
+#define XE	14
+#define XEU	15
+#define XFLB	10
+
+static int
+record_exception(struct pt_regs *regs, int eflag)
+{
+	u32 fpscr;
+
+	fpscr = __FPU_FPSCR;
+
+	if (eflag) {
+		fpscr |= FPSCR_FX;
+		if (eflag & EFLAG_OVERFLOW)
+			fpscr |= FPSCR_OX;
+		if (eflag & EFLAG_UNDERFLOW)
+			fpscr |= FPSCR_UX;
+		if (eflag & EFLAG_DIVZERO)
+			fpscr |= FPSCR_ZX;
+		if (eflag & EFLAG_INEXACT)
+			fpscr |= FPSCR_XX;
+		if (eflag & EFLAG_INVALID)
+			fpscr |= FPSCR_VX;
+		if (eflag & EFLAG_VXSNAN)
+			fpscr |= FPSCR_VXSNAN;
+		if (eflag & EFLAG_VXISI)
+			fpscr |= FPSCR_VXISI;
+		if (eflag & EFLAG_VXIDI)
+			fpscr |= FPSCR_VXIDI;
+		if (eflag & EFLAG_VXZDZ)
+			fpscr |= FPSCR_VXZDZ;
+		if (eflag & EFLAG_VXIMZ)
+			fpscr |= FPSCR_VXIMZ;
+		if (eflag & EFLAG_VXVC)
+			fpscr |= FPSCR_VXVC;
+		if (eflag & EFLAG_VXSOFT)
+			fpscr |= FPSCR_VXSOFT;
+		if (eflag & EFLAG_VXSQRT)
+			fpscr |= FPSCR_VXSQRT;
+		if (eflag & EFLAG_VXCVI)
+			fpscr |= FPSCR_VXCVI;
+	}
+
+//	fpscr &= ~(FPSCR_VX);
+	if (fpscr & (FPSCR_VXSNAN | FPSCR_VXISI | FPSCR_VXIDI |
+		     FPSCR_VXZDZ | FPSCR_VXIMZ | FPSCR_VXVC |
+		     FPSCR_VXSOFT | FPSCR_VXSQRT | FPSCR_VXCVI))
+		fpscr |= FPSCR_VX;
+
+	fpscr &= ~(FPSCR_FEX);
+	if (((fpscr & FPSCR_VX) && (fpscr & FPSCR_VE)) ||
+	    ((fpscr & FPSCR_OX) && (fpscr & FPSCR_OE)) ||
+	    ((fpscr & FPSCR_UX) && (fpscr & FPSCR_UE)) ||
+	    ((fpscr & FPSCR_ZX) && (fpscr & FPSCR_ZE)) ||
+	    ((fpscr & FPSCR_XX) && (fpscr & FPSCR_XE)))
+		fpscr |= FPSCR_FEX;
+
+	__FPU_FPSCR = fpscr;
+
+	return (fpscr & FPSCR_FEX) ? 1 : 0;
+}
+
+int
+do_mathemu(struct pt_regs *regs)
+{
+	void *op0 = NULL, *op1 = NULL, *op2 = NULL, *op3 = NULL;
+	unsigned long pc = regs->nip;
+	signed short sdisp;
+	u32 insn = 0;
+	int idx = 0;
+	int (*func)(void *, void *, void *, void *);
+	int type = 0;
+	int eflag, trap;
+
+	if (get_user(insn, (u32 __user *)pc))
+		return -EFAULT;
+
+	switch (insn >> 26) {
+	case LFS:	func = lfs;	type = D;	break;
+	case LFSU:	func = lfs;	type = DU;	break;
+	case LFD:	func = lfd;	type = D;	break;
+	case LFDU:	func = lfd;	type = DU;	break;
+	case STFS:	func = stfs;	type = D;	break;
+	case STFSU:	func = stfs;	type = DU;	break;
+	case STFD:	func = stfd;	type = D;	break;
+	case STFDU:	func = stfd;	type = DU;	break;
+
+	case OP31:
+		switch ((insn >> 1) & 0x3ff) {
+		case LFSX:	func = lfs;	type = XE;	break;
+		case LFSUX:	func = lfs;	type = XEU;	break;
+		case LFDX:	func = lfd;	type = XE;	break;
+		case LFDUX:	func = lfd;	type = XEU;	break;
+		case STFSX:	func = stfs;	type = XE;	break;
+		case STFSUX:	func = stfs;	type = XEU;	break;
+		case STFDX:	func = stfd;	type = XE;	break;
+		case STFDUX:	func = stfd;	type = XEU;	break;
+		case STFIWX:	func = stfiwx;	type = XE;	break;
+		default:
+			goto illegal;
+		}
+		break;
+
+	case OP59:
+		switch ((insn >> 1) & 0x1f) {
+		case FDIVS:	func = fdivs;	type = AB;	break;
+		case FSUBS:	func = fsubs;	type = AB;	break;
+		case FADDS:	func = fadds;	type = AB;	break;
+		case FSQRTS:	func = fsqrts;	type = XB;	break;
+		case FRES:	func = fres;	type = XB;	break;
+		case FMULS:	func = fmuls;	type = AC;	break;
+		case FRSQRTES:	func = frsqrtes;type = XB;	break;
+		case FMSUBS:	func = fmsubs;	type = ABC;	break;
+		case FMADDS:	func = fmadds;	type = ABC;	break;
+		case FNMSUBS:	func = fnmsubs;	type = ABC;	break;
+		case FNMADDS:	func = fnmadds;	type = ABC;	break;
+		default:
+			goto illegal;
+		}
+		break;
+
+	case OP63:
+		if (insn & 0x20) {
+			switch ((insn >> 1) & 0x1f) {
+			case FDIV:	func = fdiv;	type = AB;	break;
+			case FSUB:	func = fsub;	type = AB;	break;
+			case FADD:	func = fadd;	type = AB;	break;
+			case FSQRT:	func = fsqrt;	type = XB;	break;
+			case FRE:	func = fre;	type = XB;	break;
+			case FSEL:	func = fsel;	type = ABC;	break;
+			case FMUL:	func = fmul;	type = AC;	break;
+			case FRSQRTE:	func = frsqrte;	type = XB;	break;
+			case FMSUB:	func = fmsub;	type = ABC;	break;
+			case FMADD:	func = fmadd;	type = ABC;	break;
+			case FNMSUB:	func = fnmsub;	type = ABC;	break;
+			case FNMADD:	func = fnmadd;	type = ABC;	break;
+			default:
+				goto illegal;
+			}
+			break;
+		}
+
+		switch ((insn >> 1) & 0x3ff) {
+		case FCMPU:	func = fcmpu;	type = XCR;	break;
+		case FRSP:	func = frsp;	type = XB;	break;
+		case FCTIW:	func = fctiw;	type = XB;	break;
+		case FCTIWZ:	func = fctiwz;	type = XB;	break;
+		case FCMPO:	func = fcmpo;	type = XCR;	break;
+		case MTFSB1:	func = mtfsb1;	type = XCRB;	break;
+		case FNEG:	func = fneg;	type = XB;	break;
+		case MCRFS:	func = mcrfs;	type = XCRL;	break;
+		case MTFSB0:	func = mtfsb0;	type = XCRB;	break;
+		case FMR:	func = fmr;	type = XB;	break;
+		case MTFSFI:	func = mtfsfi;	type = XCRI;	break;
+		case FNABS:	func = fnabs;	type = XB;	break;
+		case FABS:	func = fabs;	type = XB;	break;
+		case MFFS:	func = mffs;	type = X;	break;
+		case MTFSF:	func = mtfsf;	type = XFLB;	break;
+		default:
+			goto illegal;
+		}
+		break;
+
+	default:
+		goto illegal;
+	}
+
+	switch (type) {
+	case AB:
+		op0 = (void *)&current->thread.TS_FPR((insn >> 21) & 0x1f);
+		op1 = (void *)&current->thread.TS_FPR((insn >> 16) & 0x1f);
+		op2 = (void *)&current->thread.TS_FPR((insn >> 11) & 0x1f);
+		break;
+
+	case AC:
+		op0 = (void *)&current->thread.TS_FPR((insn >> 21) & 0x1f);
+		op1 = (void *)&current->thread.TS_FPR((insn >> 16) & 0x1f);
+		op2 = (void *)&current->thread.TS_FPR((insn >>  6) & 0x1f);
+		break;
+
+	case ABC:
+		op0 = (void *)&current->thread.TS_FPR((insn >> 21) & 0x1f);
+		op1 = (void *)&current->thread.TS_FPR((insn >> 16) & 0x1f);
+		op2 = (void *)&current->thread.TS_FPR((insn >> 11) & 0x1f);
+		op3 = (void *)&current->thread.TS_FPR((insn >>  6) & 0x1f);
+		break;
+
+	case D:
+		idx = (insn >> 16) & 0x1f;
+		sdisp = (insn & 0xffff);
+		op0 = (void *)&current->thread.TS_FPR((insn >> 21) & 0x1f);
+		op1 = (void *)((idx ? regs->gpr[idx] : 0) + sdisp);
+		break;
+
+	case DU:
+		idx = (insn >> 16) & 0x1f;
+		if (!idx)
+			goto illegal;
+
+		sdisp = (insn & 0xffff);
+		op0 = (void *)&current->thread.TS_FPR((insn >> 21) & 0x1f);
+		op1 = (void *)(regs->gpr[idx] + sdisp);
+		break;
+
+	case X:
+		op0 = (void *)&current->thread.TS_FPR((insn >> 21) & 0x1f);
+		break;
+
+	case XA:
+		op0 = (void *)&current->thread.TS_FPR((insn >> 21) & 0x1f);
+		op1 = (void *)&current->thread.TS_FPR((insn >> 16) & 0x1f);
+		break;
+
+	case XB:
+		op0 = (void *)&current->thread.TS_FPR((insn >> 21) & 0x1f);
+		op1 = (void *)&current->thread.TS_FPR((insn >> 11) & 0x1f);
+		break;
+
+	case XE:
+		idx = (insn >> 16) & 0x1f;
+		op0 = (void *)&current->thread.TS_FPR((insn >> 21) & 0x1f);
+		op1 = (void *)((idx ? regs->gpr[idx] : 0)
+				+ regs->gpr[(insn >> 11) & 0x1f]);
+		break;
+
+	case XEU:
+		idx = (insn >> 16) & 0x1f;
+		if (!idx)
+			goto illegal;
+		op0 = (void *)&current->thread.TS_FPR((insn >> 21) & 0x1f);
+		op1 = (void *)(regs->gpr[idx]
+				+ regs->gpr[(insn >> 11) & 0x1f]);
+		break;
+
+	case XCR:
+		op0 = (void *)&regs->ccr;
+		op1 = (void *)(long)((insn >> 23) & 0x7);
+		op2 = (void *)&current->thread.TS_FPR((insn >> 16) & 0x1f);
+		op3 = (void *)&current->thread.TS_FPR((insn >> 11) & 0x1f);
+		break;
+
+	case XCRL:
+		op0 = (void *)&regs->ccr;
+		op1 = (void *)(long)((insn >> 23) & 0x7);
+		op2 = (void *)(long)((insn >> 18) & 0x7);
+		break;
+
+	case XCRB:
+		op0 = (void *)(long)((insn >> 21) & 0x1f);
+		break;
+
+	case XCRI:
+		op0 = (void *)(long)((insn >> 23) & 0x7);
+		op1 = (void *)(long)((insn >> 12) & 0xf);
+		break;
+
+	case XFLB:
+		op0 = (void *)(long)((insn >> 17) & 0xff);
+		op1 = (void *)&current->thread.TS_FPR((insn >> 11) & 0x1f);
+		break;
+
+	default:
+		goto illegal;
+	}
+
+	/*
+	 * If we support a HW FPU, we need to ensure the FP state
+	 * is flushed into the thread_struct before attempting
+	 * emulation
+	 */
+	flush_fp_to_thread(current);
+
+	eflag = func(op0, op1, op2, op3);
+
+	if (insn & 1) {
+		regs->ccr &= ~(0x0f000000);
+		regs->ccr |= (__FPU_FPSCR >> 4) & 0x0f000000;
+	}
+
+	trap = record_exception(regs, eflag);
+	if (trap)
+		return 1;
+
+	switch (type) {
+	case DU:
+	case XEU:
+		regs->gpr[idx] = (unsigned long)op1;
+		break;
+
+	default:
+		break;
+	}
+
+	regs_add_return_ip(regs, 4);
+	return 0;
+
+illegal:
+	return -ENOSYS;
+}
diff --git a/arch/powerpc/math-emu/math_efp.c b/arch/powerpc/math-emu/math_efp.c
new file mode 100644
index 0000000000..34f62aafe7
--- /dev/null
+++ b/arch/powerpc/math-emu/math_efp.c
@@ -0,0 +1,931 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/math-emu/math_efp.c
+ *
+ * Copyright (C) 2006-2008, 2010 Freescale Semiconductor, Inc.
+ *
+ * Author: Ebony Zhu,	<ebony.zhu@freescale.com>
+ *         Yu Liu,	<yu.liu@freescale.com>
+ *
+ * Derived from arch/alpha/math-emu/math.c
+ *              arch/powerpc/math-emu/math.c
+ *
+ * Description:
+ * This file is the exception handler to make E500 SPE instructions
+ * fully comply with IEEE-754 floating point standard.
+ */
+
+#include <linux/types.h>
+#include <linux/prctl.h>
+#include <linux/module.h>
+
+#include <linux/uaccess.h>
+#include <asm/reg.h>
+
+#define FP_EX_BOOKE_E500_SPE
+#include <asm/sfp-machine.h>
+
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+#include <math-emu/double.h>
+
+#define EFAPU		0x4
+
+#define VCT		0x4
+#define SPFP		0x6
+#define DPFP		0x7
+
+#define EFSADD		0x2c0
+#define EFSSUB		0x2c1
+#define EFSABS		0x2c4
+#define EFSNABS		0x2c5
+#define EFSNEG		0x2c6
+#define EFSMUL		0x2c8
+#define EFSDIV		0x2c9
+#define EFSCMPGT	0x2cc
+#define EFSCMPLT	0x2cd
+#define EFSCMPEQ	0x2ce
+#define EFSCFD		0x2cf
+#define EFSCFSI		0x2d1
+#define EFSCTUI		0x2d4
+#define EFSCTSI		0x2d5
+#define EFSCTUF		0x2d6
+#define EFSCTSF		0x2d7
+#define EFSCTUIZ	0x2d8
+#define EFSCTSIZ	0x2da
+
+#define EVFSADD		0x280
+#define EVFSSUB		0x281
+#define EVFSABS		0x284
+#define EVFSNABS	0x285
+#define EVFSNEG		0x286
+#define EVFSMUL		0x288
+#define EVFSDIV		0x289
+#define EVFSCMPGT	0x28c
+#define EVFSCMPLT	0x28d
+#define EVFSCMPEQ	0x28e
+#define EVFSCTUI	0x294
+#define EVFSCTSI	0x295
+#define EVFSCTUF	0x296
+#define EVFSCTSF	0x297
+#define EVFSCTUIZ	0x298
+#define EVFSCTSIZ	0x29a
+
+#define EFDADD		0x2e0
+#define EFDSUB		0x2e1
+#define EFDABS		0x2e4
+#define EFDNABS		0x2e5
+#define EFDNEG		0x2e6
+#define EFDMUL		0x2e8
+#define EFDDIV		0x2e9
+#define EFDCTUIDZ	0x2ea
+#define EFDCTSIDZ	0x2eb
+#define EFDCMPGT	0x2ec
+#define EFDCMPLT	0x2ed
+#define EFDCMPEQ	0x2ee
+#define EFDCFS		0x2ef
+#define EFDCTUI		0x2f4
+#define EFDCTSI		0x2f5
+#define EFDCTUF		0x2f6
+#define EFDCTSF		0x2f7
+#define EFDCTUIZ	0x2f8
+#define EFDCTSIZ	0x2fa
+
+#define AB	2
+#define XA	3
+#define XB	4
+#define XCR	5
+#define NOTYPE	0
+
+#define SIGN_BIT_S	(1UL << 31)
+#define SIGN_BIT_D	(1ULL << 63)
+#define FP_EX_MASK	(FP_EX_INEXACT | FP_EX_INVALID | FP_EX_DIVZERO | \
+			FP_EX_UNDERFLOW | FP_EX_OVERFLOW)
+
+static int have_e500_cpu_a005_erratum;
+
+union dw_union {
+	u64 dp[1];
+	u32 wp[2];
+};
+
+static unsigned long insn_type(unsigned long speinsn)
+{
+	unsigned long ret = NOTYPE;
+
+	switch (speinsn & 0x7ff) {
+	case EFSABS:	ret = XA;	break;
+	case EFSADD:	ret = AB;	break;
+	case EFSCFD:	ret = XB;	break;
+	case EFSCMPEQ:	ret = XCR;	break;
+	case EFSCMPGT:	ret = XCR;	break;
+	case EFSCMPLT:	ret = XCR;	break;
+	case EFSCTSF:	ret = XB;	break;
+	case EFSCTSI:	ret = XB;	break;
+	case EFSCTSIZ:	ret = XB;	break;
+	case EFSCTUF:	ret = XB;	break;
+	case EFSCTUI:	ret = XB;	break;
+	case EFSCTUIZ:	ret = XB;	break;
+	case EFSDIV:	ret = AB;	break;
+	case EFSMUL:	ret = AB;	break;
+	case EFSNABS:	ret = XA;	break;
+	case EFSNEG:	ret = XA;	break;
+	case EFSSUB:	ret = AB;	break;
+	case EFSCFSI:	ret = XB;	break;
+
+	case EVFSABS:	ret = XA;	break;
+	case EVFSADD:	ret = AB;	break;
+	case EVFSCMPEQ:	ret = XCR;	break;
+	case EVFSCMPGT:	ret = XCR;	break;
+	case EVFSCMPLT:	ret = XCR;	break;
+	case EVFSCTSF:	ret = XB;	break;
+	case EVFSCTSI:	ret = XB;	break;
+	case EVFSCTSIZ:	ret = XB;	break;
+	case EVFSCTUF:	ret = XB;	break;
+	case EVFSCTUI:	ret = XB;	break;
+	case EVFSCTUIZ:	ret = XB;	break;
+	case EVFSDIV:	ret = AB;	break;
+	case EVFSMUL:	ret = AB;	break;
+	case EVFSNABS:	ret = XA;	break;
+	case EVFSNEG:	ret = XA;	break;
+	case EVFSSUB:	ret = AB;	break;
+
+	case EFDABS:	ret = XA;	break;
+	case EFDADD:	ret = AB;	break;
+	case EFDCFS:	ret = XB;	break;
+	case EFDCMPEQ:	ret = XCR;	break;
+	case EFDCMPGT:	ret = XCR;	break;
+	case EFDCMPLT:	ret = XCR;	break;
+	case EFDCTSF:	ret = XB;	break;
+	case EFDCTSI:	ret = XB;	break;
+	case EFDCTSIDZ:	ret = XB;	break;
+	case EFDCTSIZ:	ret = XB;	break;
+	case EFDCTUF:	ret = XB;	break;
+	case EFDCTUI:	ret = XB;	break;
+	case EFDCTUIDZ:	ret = XB;	break;
+	case EFDCTUIZ:	ret = XB;	break;
+	case EFDDIV:	ret = AB;	break;
+	case EFDMUL:	ret = AB;	break;
+	case EFDNABS:	ret = XA;	break;
+	case EFDNEG:	ret = XA;	break;
+	case EFDSUB:	ret = AB;	break;
+	}
+
+	return ret;
+}
+
+int do_spe_mathemu(struct pt_regs *regs)
+{
+	FP_DECL_EX;
+	int IR, cmp;
+
+	unsigned long type, func, fc, fa, fb, src, speinsn;
+	union dw_union vc, va, vb;
+
+	if (get_user(speinsn, (unsigned int __user *) regs->nip))
+		return -EFAULT;
+	if ((speinsn >> 26) != EFAPU)
+		return -EINVAL;         /* not an spe instruction */
+
+	type = insn_type(speinsn);
+	if (type == NOTYPE)
+		goto illegal;
+
+	func = speinsn & 0x7ff;
+	fc = (speinsn >> 21) & 0x1f;
+	fa = (speinsn >> 16) & 0x1f;
+	fb = (speinsn >> 11) & 0x1f;
+	src = (speinsn >> 5) & 0x7;
+
+	vc.wp[0] = current->thread.evr[fc];
+	vc.wp[1] = regs->gpr[fc];
+	va.wp[0] = current->thread.evr[fa];
+	va.wp[1] = regs->gpr[fa];
+	vb.wp[0] = current->thread.evr[fb];
+	vb.wp[1] = regs->gpr[fb];
+
+	__FPU_FPSCR = mfspr(SPRN_SPEFSCR);
+
+	pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR);
+	pr_debug("vc: %08x  %08x\n", vc.wp[0], vc.wp[1]);
+	pr_debug("va: %08x  %08x\n", va.wp[0], va.wp[1]);
+	pr_debug("vb: %08x  %08x\n", vb.wp[0], vb.wp[1]);
+
+	switch (src) {
+	case SPFP: {
+		FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
+
+		switch (type) {
+		case AB:
+		case XCR:
+			FP_UNPACK_SP(SA, va.wp + 1);
+			fallthrough;
+		case XB:
+			FP_UNPACK_SP(SB, vb.wp + 1);
+			break;
+		case XA:
+			FP_UNPACK_SP(SA, va.wp + 1);
+			break;
+		}
+
+		pr_debug("SA: %d %08x %d (%d)\n", SA_s, SA_f, SA_e, SA_c);
+		pr_debug("SB: %d %08x %d (%d)\n", SB_s, SB_f, SB_e, SB_c);
+
+		switch (func) {
+		case EFSABS:
+			vc.wp[1] = va.wp[1] & ~SIGN_BIT_S;
+			goto update_regs;
+
+		case EFSNABS:
+			vc.wp[1] = va.wp[1] | SIGN_BIT_S;
+			goto update_regs;
+
+		case EFSNEG:
+			vc.wp[1] = va.wp[1] ^ SIGN_BIT_S;
+			goto update_regs;
+
+		case EFSADD:
+			FP_ADD_S(SR, SA, SB);
+			goto pack_s;
+
+		case EFSSUB:
+			FP_SUB_S(SR, SA, SB);
+			goto pack_s;
+
+		case EFSMUL:
+			FP_MUL_S(SR, SA, SB);
+			goto pack_s;
+
+		case EFSDIV:
+			FP_DIV_S(SR, SA, SB);
+			goto pack_s;
+
+		case EFSCMPEQ:
+			cmp = 0;
+			goto cmp_s;
+
+		case EFSCMPGT:
+			cmp = 1;
+			goto cmp_s;
+
+		case EFSCMPLT:
+			cmp = -1;
+			goto cmp_s;
+
+		case EFSCTSF:
+		case EFSCTUF:
+			if (SB_c == FP_CLS_NAN) {
+				vc.wp[1] = 0;
+				FP_SET_EXCEPTION(FP_EX_INVALID);
+			} else {
+				SB_e += (func == EFSCTSF ? 31 : 32);
+				FP_TO_INT_ROUND_S(vc.wp[1], SB, 32,
+						(func == EFSCTSF) ? 1 : 0);
+			}
+			goto update_regs;
+
+		case EFSCFD: {
+			FP_DECL_D(DB);
+			FP_CLEAR_EXCEPTIONS;
+			FP_UNPACK_DP(DB, vb.dp);
+
+			pr_debug("DB: %d %08x %08x %d (%d)\n",
+					DB_s, DB_f1, DB_f0, DB_e, DB_c);
+
+			FP_CONV(S, D, 1, 2, SR, DB);
+			goto pack_s;
+		}
+
+		case EFSCTSI:
+		case EFSCTUI:
+			if (SB_c == FP_CLS_NAN) {
+				vc.wp[1] = 0;
+				FP_SET_EXCEPTION(FP_EX_INVALID);
+			} else {
+				FP_TO_INT_ROUND_S(vc.wp[1], SB, 32,
+						((func & 0x3) != 0) ? 1 : 0);
+			}
+			goto update_regs;
+
+		case EFSCTSIZ:
+		case EFSCTUIZ:
+			if (SB_c == FP_CLS_NAN) {
+				vc.wp[1] = 0;
+				FP_SET_EXCEPTION(FP_EX_INVALID);
+			} else {
+				FP_TO_INT_S(vc.wp[1], SB, 32,
+						((func & 0x3) != 0) ? 1 : 0);
+			}
+			goto update_regs;
+
+		default:
+			goto illegal;
+		}
+		break;
+
+pack_s:
+		pr_debug("SR: %d %08x %d (%d)\n", SR_s, SR_f, SR_e, SR_c);
+
+		FP_PACK_SP(vc.wp + 1, SR);
+		goto update_regs;
+
+cmp_s:
+		FP_CMP_S(IR, SA, SB, 3);
+		if (IR == 3 && (FP_ISSIGNAN_S(SA) || FP_ISSIGNAN_S(SB)))
+			FP_SET_EXCEPTION(FP_EX_INVALID);
+		if (IR == cmp) {
+			IR = 0x4;
+		} else {
+			IR = 0;
+		}
+		goto update_ccr;
+	}
+
+	case DPFP: {
+		FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
+
+		switch (type) {
+		case AB:
+		case XCR:
+			FP_UNPACK_DP(DA, va.dp);
+			fallthrough;
+		case XB:
+			FP_UNPACK_DP(DB, vb.dp);
+			break;
+		case XA:
+			FP_UNPACK_DP(DA, va.dp);
+			break;
+		}
+
+		pr_debug("DA: %d %08x %08x %d (%d)\n",
+				DA_s, DA_f1, DA_f0, DA_e, DA_c);
+		pr_debug("DB: %d %08x %08x %d (%d)\n",
+				DB_s, DB_f1, DB_f0, DB_e, DB_c);
+
+		switch (func) {
+		case EFDABS:
+			vc.dp[0] = va.dp[0] & ~SIGN_BIT_D;
+			goto update_regs;
+
+		case EFDNABS:
+			vc.dp[0] = va.dp[0] | SIGN_BIT_D;
+			goto update_regs;
+
+		case EFDNEG:
+			vc.dp[0] = va.dp[0] ^ SIGN_BIT_D;
+			goto update_regs;
+
+		case EFDADD:
+			FP_ADD_D(DR, DA, DB);
+			goto pack_d;
+
+		case EFDSUB:
+			FP_SUB_D(DR, DA, DB);
+			goto pack_d;
+
+		case EFDMUL:
+			FP_MUL_D(DR, DA, DB);
+			goto pack_d;
+
+		case EFDDIV:
+			FP_DIV_D(DR, DA, DB);
+			goto pack_d;
+
+		case EFDCMPEQ:
+			cmp = 0;
+			goto cmp_d;
+
+		case EFDCMPGT:
+			cmp = 1;
+			goto cmp_d;
+
+		case EFDCMPLT:
+			cmp = -1;
+			goto cmp_d;
+
+		case EFDCTSF:
+		case EFDCTUF:
+			if (DB_c == FP_CLS_NAN) {
+				vc.wp[1] = 0;
+				FP_SET_EXCEPTION(FP_EX_INVALID);
+			} else {
+				DB_e += (func == EFDCTSF ? 31 : 32);
+				FP_TO_INT_ROUND_D(vc.wp[1], DB, 32,
+						(func == EFDCTSF) ? 1 : 0);
+			}
+			goto update_regs;
+
+		case EFDCFS: {
+			FP_DECL_S(SB);
+			FP_CLEAR_EXCEPTIONS;
+			FP_UNPACK_SP(SB, vb.wp + 1);
+
+			pr_debug("SB: %d %08x %d (%d)\n",
+					SB_s, SB_f, SB_e, SB_c);
+
+			FP_CONV(D, S, 2, 1, DR, SB);
+			goto pack_d;
+		}
+
+		case EFDCTUIDZ:
+		case EFDCTSIDZ:
+			if (DB_c == FP_CLS_NAN) {
+				vc.dp[0] = 0;
+				FP_SET_EXCEPTION(FP_EX_INVALID);
+			} else {
+				FP_TO_INT_D(vc.dp[0], DB, 64,
+						((func & 0x1) == 0) ? 1 : 0);
+			}
+			goto update_regs;
+
+		case EFDCTUI:
+		case EFDCTSI:
+			if (DB_c == FP_CLS_NAN) {
+				vc.wp[1] = 0;
+				FP_SET_EXCEPTION(FP_EX_INVALID);
+			} else {
+				FP_TO_INT_ROUND_D(vc.wp[1], DB, 32,
+						((func & 0x3) != 0) ? 1 : 0);
+			}
+			goto update_regs;
+
+		case EFDCTUIZ:
+		case EFDCTSIZ:
+			if (DB_c == FP_CLS_NAN) {
+				vc.wp[1] = 0;
+				FP_SET_EXCEPTION(FP_EX_INVALID);
+			} else {
+				FP_TO_INT_D(vc.wp[1], DB, 32,
+						((func & 0x3) != 0) ? 1 : 0);
+			}
+			goto update_regs;
+
+		default:
+			goto illegal;
+		}
+		break;
+
+pack_d:
+		pr_debug("DR: %d %08x %08x %d (%d)\n",
+				DR_s, DR_f1, DR_f0, DR_e, DR_c);
+
+		FP_PACK_DP(vc.dp, DR);
+		goto update_regs;
+
+cmp_d:
+		FP_CMP_D(IR, DA, DB, 3);
+		if (IR == 3 && (FP_ISSIGNAN_D(DA) || FP_ISSIGNAN_D(DB)))
+			FP_SET_EXCEPTION(FP_EX_INVALID);
+		if (IR == cmp) {
+			IR = 0x4;
+		} else {
+			IR = 0;
+		}
+		goto update_ccr;
+
+	}
+
+	case VCT: {
+		FP_DECL_S(SA0); FP_DECL_S(SB0); FP_DECL_S(SR0);
+		FP_DECL_S(SA1); FP_DECL_S(SB1); FP_DECL_S(SR1);
+		int IR0, IR1;
+
+		switch (type) {
+		case AB:
+		case XCR:
+			FP_UNPACK_SP(SA0, va.wp);
+			FP_UNPACK_SP(SA1, va.wp + 1);
+			fallthrough;
+		case XB:
+			FP_UNPACK_SP(SB0, vb.wp);
+			FP_UNPACK_SP(SB1, vb.wp + 1);
+			break;
+		case XA:
+			FP_UNPACK_SP(SA0, va.wp);
+			FP_UNPACK_SP(SA1, va.wp + 1);
+			break;
+		}
+
+		pr_debug("SA0: %d %08x %d (%d)\n",
+				SA0_s, SA0_f, SA0_e, SA0_c);
+		pr_debug("SA1: %d %08x %d (%d)\n",
+				SA1_s, SA1_f, SA1_e, SA1_c);
+		pr_debug("SB0: %d %08x %d (%d)\n",
+				SB0_s, SB0_f, SB0_e, SB0_c);
+		pr_debug("SB1: %d %08x %d (%d)\n",
+				SB1_s, SB1_f, SB1_e, SB1_c);
+
+		switch (func) {
+		case EVFSABS:
+			vc.wp[0] = va.wp[0] & ~SIGN_BIT_S;
+			vc.wp[1] = va.wp[1] & ~SIGN_BIT_S;
+			goto update_regs;
+
+		case EVFSNABS:
+			vc.wp[0] = va.wp[0] | SIGN_BIT_S;
+			vc.wp[1] = va.wp[1] | SIGN_BIT_S;
+			goto update_regs;
+
+		case EVFSNEG:
+			vc.wp[0] = va.wp[0] ^ SIGN_BIT_S;
+			vc.wp[1] = va.wp[1] ^ SIGN_BIT_S;
+			goto update_regs;
+
+		case EVFSADD:
+			FP_ADD_S(SR0, SA0, SB0);
+			FP_ADD_S(SR1, SA1, SB1);
+			goto pack_vs;
+
+		case EVFSSUB:
+			FP_SUB_S(SR0, SA0, SB0);
+			FP_SUB_S(SR1, SA1, SB1);
+			goto pack_vs;
+
+		case EVFSMUL:
+			FP_MUL_S(SR0, SA0, SB0);
+			FP_MUL_S(SR1, SA1, SB1);
+			goto pack_vs;
+
+		case EVFSDIV:
+			FP_DIV_S(SR0, SA0, SB0);
+			FP_DIV_S(SR1, SA1, SB1);
+			goto pack_vs;
+
+		case EVFSCMPEQ:
+			cmp = 0;
+			goto cmp_vs;
+
+		case EVFSCMPGT:
+			cmp = 1;
+			goto cmp_vs;
+
+		case EVFSCMPLT:
+			cmp = -1;
+			goto cmp_vs;
+
+		case EVFSCTUF:
+		case EVFSCTSF:
+			if (SB0_c == FP_CLS_NAN) {
+				vc.wp[0] = 0;
+				FP_SET_EXCEPTION(FP_EX_INVALID);
+			} else {
+				SB0_e += (func == EVFSCTSF ? 31 : 32);
+				FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32,
+						(func == EVFSCTSF) ? 1 : 0);
+			}
+			if (SB1_c == FP_CLS_NAN) {
+				vc.wp[1] = 0;
+				FP_SET_EXCEPTION(FP_EX_INVALID);
+			} else {
+				SB1_e += (func == EVFSCTSF ? 31 : 32);
+				FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32,
+						(func == EVFSCTSF) ? 1 : 0);
+			}
+			goto update_regs;
+
+		case EVFSCTUI:
+		case EVFSCTSI:
+			if (SB0_c == FP_CLS_NAN) {
+				vc.wp[0] = 0;
+				FP_SET_EXCEPTION(FP_EX_INVALID);
+			} else {
+				FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32,
+						((func & 0x3) != 0) ? 1 : 0);
+			}
+			if (SB1_c == FP_CLS_NAN) {
+				vc.wp[1] = 0;
+				FP_SET_EXCEPTION(FP_EX_INVALID);
+			} else {
+				FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32,
+						((func & 0x3) != 0) ? 1 : 0);
+			}
+			goto update_regs;
+
+		case EVFSCTUIZ:
+		case EVFSCTSIZ:
+			if (SB0_c == FP_CLS_NAN) {
+				vc.wp[0] = 0;
+				FP_SET_EXCEPTION(FP_EX_INVALID);
+			} else {
+				FP_TO_INT_S(vc.wp[0], SB0, 32,
+						((func & 0x3) != 0) ? 1 : 0);
+			}
+			if (SB1_c == FP_CLS_NAN) {
+				vc.wp[1] = 0;
+				FP_SET_EXCEPTION(FP_EX_INVALID);
+			} else {
+				FP_TO_INT_S(vc.wp[1], SB1, 32,
+						((func & 0x3) != 0) ? 1 : 0);
+			}
+			goto update_regs;
+
+		default:
+			goto illegal;
+		}
+		break;
+
+pack_vs:
+		pr_debug("SR0: %d %08x %d (%d)\n",
+				SR0_s, SR0_f, SR0_e, SR0_c);
+		pr_debug("SR1: %d %08x %d (%d)\n",
+				SR1_s, SR1_f, SR1_e, SR1_c);
+
+		FP_PACK_SP(vc.wp, SR0);
+		FP_PACK_SP(vc.wp + 1, SR1);
+		goto update_regs;
+
+cmp_vs:
+		{
+			int ch, cl;
+
+			FP_CMP_S(IR0, SA0, SB0, 3);
+			FP_CMP_S(IR1, SA1, SB1, 3);
+			if (IR0 == 3 && (FP_ISSIGNAN_S(SA0) || FP_ISSIGNAN_S(SB0)))
+				FP_SET_EXCEPTION(FP_EX_INVALID);
+			if (IR1 == 3 && (FP_ISSIGNAN_S(SA1) || FP_ISSIGNAN_S(SB1)))
+				FP_SET_EXCEPTION(FP_EX_INVALID);
+			ch = (IR0 == cmp) ? 1 : 0;
+			cl = (IR1 == cmp) ? 1 : 0;
+			IR = (ch << 3) | (cl << 2) | ((ch | cl) << 1) |
+				((ch & cl) << 0);
+			goto update_ccr;
+		}
+	}
+	default:
+		return -EINVAL;
+	}
+
+update_ccr:
+	regs->ccr &= ~(15 << ((7 - ((speinsn >> 23) & 0x7)) << 2));
+	regs->ccr |= (IR << ((7 - ((speinsn >> 23) & 0x7)) << 2));
+
+update_regs:
+	/*
+	 * If the "invalid" exception sticky bit was set by the
+	 * processor for non-finite input, but was not set before the
+	 * instruction being emulated, clear it.  Likewise for the
+	 * "underflow" bit, which may have been set by the processor
+	 * for exact underflow, not just inexact underflow when the
+	 * flag should be set for IEEE 754 semantics.  Other sticky
+	 * exceptions will only be set by the processor when they are
+	 * correct according to IEEE 754 semantics, and we must not
+	 * clear sticky bits that were already set before the emulated
+	 * instruction as they represent the user-visible sticky
+	 * exception status.  "inexact" traps to kernel are not
+	 * required for IEEE semantics and are not enabled by default,
+	 * so the "inexact" sticky bit may have been set by a previous
+	 * instruction without the kernel being aware of it.
+	 */
+	__FPU_FPSCR
+	  &= ~(FP_EX_INVALID | FP_EX_UNDERFLOW) | current->thread.spefscr_last;
+	__FPU_FPSCR |= (FP_CUR_EXCEPTIONS & FP_EX_MASK);
+	mtspr(SPRN_SPEFSCR, __FPU_FPSCR);
+	current->thread.spefscr_last = __FPU_FPSCR;
+
+	current->thread.evr[fc] = vc.wp[0];
+	regs->gpr[fc] = vc.wp[1];
+
+	pr_debug("ccr = %08lx\n", regs->ccr);
+	pr_debug("cur exceptions = %08x spefscr = %08lx\n",
+			FP_CUR_EXCEPTIONS, __FPU_FPSCR);
+	pr_debug("vc: %08x  %08x\n", vc.wp[0], vc.wp[1]);
+	pr_debug("va: %08x  %08x\n", va.wp[0], va.wp[1]);
+	pr_debug("vb: %08x  %08x\n", vb.wp[0], vb.wp[1]);
+
+	if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) {
+		if ((FP_CUR_EXCEPTIONS & FP_EX_DIVZERO)
+		    && (current->thread.fpexc_mode & PR_FP_EXC_DIV))
+			return 1;
+		if ((FP_CUR_EXCEPTIONS & FP_EX_OVERFLOW)
+		    && (current->thread.fpexc_mode & PR_FP_EXC_OVF))
+			return 1;
+		if ((FP_CUR_EXCEPTIONS & FP_EX_UNDERFLOW)
+		    && (current->thread.fpexc_mode & PR_FP_EXC_UND))
+			return 1;
+		if ((FP_CUR_EXCEPTIONS & FP_EX_INEXACT)
+		    && (current->thread.fpexc_mode & PR_FP_EXC_RES))
+			return 1;
+		if ((FP_CUR_EXCEPTIONS & FP_EX_INVALID)
+		    && (current->thread.fpexc_mode & PR_FP_EXC_INV))
+			return 1;
+	}
+	return 0;
+
+illegal:
+	if (have_e500_cpu_a005_erratum) {
+		/* according to e500 cpu a005 erratum, reissue efp inst */
+		regs_add_return_ip(regs, -4);
+		pr_debug("re-issue efp inst: %08lx\n", speinsn);
+		return 0;
+	}
+
+	printk(KERN_ERR "\nOoops! IEEE-754 compliance handler encountered un-supported instruction.\ninst code: %08lx\n", speinsn);
+	return -ENOSYS;
+}
+
+int speround_handler(struct pt_regs *regs)
+{
+	union dw_union fgpr;
+	int s_lo, s_hi;
+	int lo_inexact, hi_inexact;
+	int fp_result;
+	unsigned long speinsn, type, fb, fc, fptype, func;
+
+	if (get_user(speinsn, (unsigned int __user *) regs->nip))
+		return -EFAULT;
+	if ((speinsn >> 26) != 4)
+		return -EINVAL;         /* not an spe instruction */
+
+	func = speinsn & 0x7ff;
+	type = insn_type(func);
+	if (type == XCR) return -ENOSYS;
+
+	__FPU_FPSCR = mfspr(SPRN_SPEFSCR);
+	pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR);
+
+	fptype = (speinsn >> 5) & 0x7;
+
+	/* No need to round if the result is exact */
+	lo_inexact = __FPU_FPSCR & (SPEFSCR_FG | SPEFSCR_FX);
+	hi_inexact = __FPU_FPSCR & (SPEFSCR_FGH | SPEFSCR_FXH);
+	if (!(lo_inexact || (hi_inexact && fptype == VCT)))
+		return 0;
+
+	fc = (speinsn >> 21) & 0x1f;
+	s_lo = regs->gpr[fc] & SIGN_BIT_S;
+	s_hi = current->thread.evr[fc] & SIGN_BIT_S;
+	fgpr.wp[0] = current->thread.evr[fc];
+	fgpr.wp[1] = regs->gpr[fc];
+
+	fb = (speinsn >> 11) & 0x1f;
+	switch (func) {
+	case EFSCTUIZ:
+	case EFSCTSIZ:
+	case EVFSCTUIZ:
+	case EVFSCTSIZ:
+	case EFDCTUIDZ:
+	case EFDCTSIDZ:
+	case EFDCTUIZ:
+	case EFDCTSIZ:
+		/*
+		 * These instructions always round to zero,
+		 * independent of the rounding mode.
+		 */
+		return 0;
+
+	case EFSCTUI:
+	case EFSCTUF:
+	case EVFSCTUI:
+	case EVFSCTUF:
+	case EFDCTUI:
+	case EFDCTUF:
+		fp_result = 0;
+		s_lo = 0;
+		s_hi = 0;
+		break;
+
+	case EFSCTSI:
+	case EFSCTSF:
+		fp_result = 0;
+		/* Recover the sign of a zero result if possible.  */
+		if (fgpr.wp[1] == 0)
+			s_lo = regs->gpr[fb] & SIGN_BIT_S;
+		break;
+
+	case EVFSCTSI:
+	case EVFSCTSF:
+		fp_result = 0;
+		/* Recover the sign of a zero result if possible.  */
+		if (fgpr.wp[1] == 0)
+			s_lo = regs->gpr[fb] & SIGN_BIT_S;
+		if (fgpr.wp[0] == 0)
+			s_hi = current->thread.evr[fb] & SIGN_BIT_S;
+		break;
+
+	case EFDCTSI:
+	case EFDCTSF:
+		fp_result = 0;
+		s_hi = s_lo;
+		/* Recover the sign of a zero result if possible.  */
+		if (fgpr.wp[1] == 0)
+			s_hi = current->thread.evr[fb] & SIGN_BIT_S;
+		break;
+
+	default:
+		fp_result = 1;
+		break;
+	}
+
+	pr_debug("round fgpr: %08x  %08x\n", fgpr.wp[0], fgpr.wp[1]);
+
+	switch (fptype) {
+	/* Since SPE instructions on E500 core can handle round to nearest
+	 * and round toward zero with IEEE-754 complied, we just need
+	 * to handle round toward +Inf and round toward -Inf by software.
+	 */
+	case SPFP:
+		if ((FP_ROUNDMODE) == FP_RND_PINF) {
+			if (!s_lo) fgpr.wp[1]++; /* Z > 0, choose Z1 */
+		} else { /* round to -Inf */
+			if (s_lo) {
+				if (fp_result)
+					fgpr.wp[1]++; /* Z < 0, choose Z2 */
+				else
+					fgpr.wp[1]--; /* Z < 0, choose Z2 */
+			}
+		}
+		break;
+
+	case DPFP:
+		if (FP_ROUNDMODE == FP_RND_PINF) {
+			if (!s_hi) {
+				if (fp_result)
+					fgpr.dp[0]++; /* Z > 0, choose Z1 */
+				else
+					fgpr.wp[1]++; /* Z > 0, choose Z1 */
+			}
+		} else { /* round to -Inf */
+			if (s_hi) {
+				if (fp_result)
+					fgpr.dp[0]++; /* Z < 0, choose Z2 */
+				else
+					fgpr.wp[1]--; /* Z < 0, choose Z2 */
+			}
+		}
+		break;
+
+	case VCT:
+		if (FP_ROUNDMODE == FP_RND_PINF) {
+			if (lo_inexact && !s_lo)
+				fgpr.wp[1]++; /* Z_low > 0, choose Z1 */
+			if (hi_inexact && !s_hi)
+				fgpr.wp[0]++; /* Z_high word > 0, choose Z1 */
+		} else { /* round to -Inf */
+			if (lo_inexact && s_lo) {
+				if (fp_result)
+					fgpr.wp[1]++; /* Z_low < 0, choose Z2 */
+				else
+					fgpr.wp[1]--; /* Z_low < 0, choose Z2 */
+			}
+			if (hi_inexact && s_hi) {
+				if (fp_result)
+					fgpr.wp[0]++; /* Z_high < 0, choose Z2 */
+				else
+					fgpr.wp[0]--; /* Z_high < 0, choose Z2 */
+			}
+		}
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	current->thread.evr[fc] = fgpr.wp[0];
+	regs->gpr[fc] = fgpr.wp[1];
+
+	pr_debug("  to fgpr: %08x  %08x\n", fgpr.wp[0], fgpr.wp[1]);
+
+	if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
+		return (current->thread.fpexc_mode & PR_FP_EXC_RES) ? 1 : 0;
+	return 0;
+}
+
+static int __init spe_mathemu_init(void)
+{
+	u32 pvr, maj, min;
+
+	pvr = mfspr(SPRN_PVR);
+
+	if ((PVR_VER(pvr) == PVR_VER_E500V1) ||
+	    (PVR_VER(pvr) == PVR_VER_E500V2)) {
+		maj = PVR_MAJ(pvr);
+		min = PVR_MIN(pvr);
+
+		/*
+		 * E500 revision below 1.1, 2.3, 3.1, 4.1, 5.1
+		 * need cpu a005 errata workaround
+		 */
+		switch (maj) {
+		case 1:
+			if (min < 1)
+				have_e500_cpu_a005_erratum = 1;
+			break;
+		case 2:
+			if (min < 3)
+				have_e500_cpu_a005_erratum = 1;
+			break;
+		case 3:
+		case 4:
+		case 5:
+			if (min < 1)
+				have_e500_cpu_a005_erratum = 1;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return 0;
+}
+
+module_init(spe_mathemu_init);
diff --git a/arch/powerpc/math-emu/mcrfs.c b/arch/powerpc/math-emu/mcrfs.c
new file mode 100644
index 0000000000..9c4fdaace4
--- /dev/null
+++ b/arch/powerpc/math-emu/mcrfs.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+
+int
+mcrfs(u32 *ccr, u32 crfD, u32 crfS)
+{
+	u32 value, clear;
+
+#ifdef DEBUG
+	printk("%s: %p (%08x) %d %d\n", __func__, ccr, *ccr, crfD, crfS);
+#endif
+
+	clear = 15 << ((7 - crfS) << 2);
+	if (!crfS)
+		clear = 0x90000000;
+
+	value = (__FPU_FPSCR >> ((7 - crfS) << 2)) & 15;
+	__FPU_FPSCR &= ~(clear);
+
+	*ccr &= ~(15 << ((7 - crfD) << 2));
+	*ccr |= (value << ((7 - crfD) << 2));
+
+#ifdef DEBUG
+	printk("CR: %08x\n", __func__, *ccr);
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/mffs.c b/arch/powerpc/math-emu/mffs.c
new file mode 100644
index 0000000000..d42f1278e9
--- /dev/null
+++ b/arch/powerpc/math-emu/mffs.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+
+int
+mffs(u32 *frD)
+{
+	frD[1] = __FPU_FPSCR;
+
+#ifdef DEBUG
+	printk("%s: frD %p: %08x.%08x\n", __func__, frD, frD[0], frD[1]);
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/mtfsb0.c b/arch/powerpc/math-emu/mtfsb0.c
new file mode 100644
index 0000000000..5753170b5d
--- /dev/null
+++ b/arch/powerpc/math-emu/mtfsb0.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+
+int
+mtfsb0(int crbD)
+{
+	if ((crbD != 1) && (crbD != 2))
+		__FPU_FPSCR &= ~(1 << (31 - crbD));
+
+#ifdef DEBUG
+	printk("%s: %d %08lx\n", __func__, crbD, __FPU_FPSCR);
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/mtfsb1.c b/arch/powerpc/math-emu/mtfsb1.c
new file mode 100644
index 0000000000..8162c3bfd1
--- /dev/null
+++ b/arch/powerpc/math-emu/mtfsb1.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+
+int
+mtfsb1(int crbD)
+{
+	if ((crbD != 1) && (crbD != 2))
+		__FPU_FPSCR |= (1 << (31 - crbD));
+
+#ifdef DEBUG
+	printk("%s: %d %08lx\n", __func__, crbD, __FPU_FPSCR);
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/mtfsf.c b/arch/powerpc/math-emu/mtfsf.c
new file mode 100644
index 0000000000..7ae990f6b5
--- /dev/null
+++ b/arch/powerpc/math-emu/mtfsf.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+
+int
+mtfsf(unsigned int FM, u32 *frB)
+{
+	u32 mask;
+	u32 fpscr;
+
+	if (likely(FM == 1))
+		mask = 0x0f;
+	else if (likely(FM == 0xff))
+		mask = ~0;
+	else {
+		mask = ((FM & 1) |
+				((FM << 3) & 0x10) |
+				((FM << 6) & 0x100) |
+				((FM << 9) & 0x1000) |
+				((FM << 12) & 0x10000) |
+				((FM << 15) & 0x100000) |
+				((FM << 18) & 0x1000000) |
+				((FM << 21) & 0x10000000)) * 15;
+	}
+
+	fpscr = ((__FPU_FPSCR & ~mask) | (frB[1] & mask)) &
+		~(FPSCR_VX | FPSCR_FEX | 0x800);
+
+	if (fpscr & (FPSCR_VXSNAN | FPSCR_VXISI | FPSCR_VXIDI |
+		     FPSCR_VXZDZ | FPSCR_VXIMZ | FPSCR_VXVC |
+		     FPSCR_VXSOFT | FPSCR_VXSQRT | FPSCR_VXCVI))
+		fpscr |= FPSCR_VX;
+
+	/* The bit order of exception enables and exception status
+	 * is the same. Simply shift and mask to check for enabled
+	 * exceptions.
+	 */
+	if (fpscr & (fpscr >> 22) &  0xf8)
+		fpscr |= FPSCR_FEX;
+
+	__FPU_FPSCR = fpscr;
+
+#ifdef DEBUG
+	printk("%s: %02x %p: %08lx\n", __func__, FM, frB, __FPU_FPSCR);
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/mtfsfi.c b/arch/powerpc/math-emu/mtfsfi.c
new file mode 100644
index 0000000000..45f1edbda3
--- /dev/null
+++ b/arch/powerpc/math-emu/mtfsfi.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+
+int
+mtfsfi(unsigned int crfD, unsigned int IMM)
+{
+	u32 mask = 0xf;
+
+	if (!crfD)
+		mask = 9;
+
+	__FPU_FPSCR &= ~(mask << ((7 - crfD) << 2));
+	__FPU_FPSCR |= (IMM & 0xf) << ((7 - crfD) << 2);
+
+#ifdef DEBUG
+	printk("%s: %d %x: %08lx\n", __func__, crfD, IMM, __FPU_FPSCR);
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/stfd.c b/arch/powerpc/math-emu/stfd.c
new file mode 100644
index 0000000000..463d2f0832
--- /dev/null
+++ b/arch/powerpc/math-emu/stfd.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+int
+stfd(void *frS, void *ea)
+{
+#if 0
+#ifdef DEBUG
+	printk("%s: S %p, ea %p: ", __func__, frS, ea);
+	dump_double(frS);
+	printk("\n");
+#endif
+#endif
+
+	if (copy_to_user(ea, frS, sizeof(double)))
+		return -EFAULT;
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/stfiwx.c b/arch/powerpc/math-emu/stfiwx.c
new file mode 100644
index 0000000000..24ae9622fe
--- /dev/null
+++ b/arch/powerpc/math-emu/stfiwx.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+int
+stfiwx(u32 *frS, void *ea)
+{
+#ifdef DEBUG
+	printk("%s: %p %p\n", __func__, frS, ea);
+#endif
+
+	if (copy_to_user(ea, &frS[1], sizeof(frS[1])))
+		return -EFAULT;
+
+	return 0;
+}
diff --git a/arch/powerpc/math-emu/stfs.c b/arch/powerpc/math-emu/stfs.c
new file mode 100644
index 0000000000..ddf9bbdb5b
--- /dev/null
+++ b/arch/powerpc/math-emu/stfs.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+#include <math-emu/single.h>
+
+int
+stfs(void *frS, void *ea)
+{
+	FP_DECL_D(A);
+	FP_DECL_S(R);
+	FP_DECL_EX;
+	float f;
+
+#ifdef DEBUG
+	printk("%s: S %p, ea %p\n", __func__, frS, ea);
+#endif
+
+	FP_UNPACK_DP(A, frS);
+
+#ifdef DEBUG
+	printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+#endif
+
+	FP_CONV(S, D, 1, 2, R, A);
+
+#ifdef DEBUG
+	printk("R: %ld %lu %ld (%ld)\n", R_s, R_f, R_e, R_c);
+#endif
+
+	_FP_PACK_CANONICAL(S, 1, R);
+	if (!FP_CUR_EXCEPTIONS || !__FPU_TRAP_P(FP_CUR_EXCEPTIONS)) {
+		_FP_PACK_RAW_1_P(S, &f, R);
+		if (copy_to_user(ea, &f, sizeof(float)))
+			return -EFAULT;
+	}
+
+	return FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/powerpc/math-emu/udivmodti4.c b/arch/powerpc/math-emu/udivmodti4.c
new file mode 100644
index 0000000000..1e52633dcb
--- /dev/null
+++ b/arch/powerpc/math-emu/udivmodti4.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+/* This has so very few changes over libgcc2's __udivmoddi4 it isn't funny.  */
+
+#include <math-emu/soft-fp.h>
+
+#undef count_leading_zeros
+#define count_leading_zeros  __FP_CLZ
+
+void
+_fp_udivmodti4(_FP_W_TYPE q[2], _FP_W_TYPE r[2],
+	       _FP_W_TYPE n1, _FP_W_TYPE n0,
+	       _FP_W_TYPE d1, _FP_W_TYPE d0)
+{
+  _FP_W_TYPE q0, q1, r0, r1;
+  _FP_I_TYPE b, bm;
+
+  if (d1 == 0)
+    {
+#if !UDIV_NEEDS_NORMALIZATION
+      if (d0 > n1)
+	{
+	  /* 0q = nn / 0D */
+
+	  udiv_qrnnd (q0, n0, n1, n0, d0);
+	  q1 = 0;
+
+	  /* Remainder in n0.  */
+	}
+      else
+	{
+	  /* qq = NN / 0d */
+
+	  if (d0 == 0)
+	    d0 = 1 / d0;	/* Divide intentionally by zero.  */
+
+	  udiv_qrnnd (q1, n1, 0, n1, d0);
+	  udiv_qrnnd (q0, n0, n1, n0, d0);
+
+	  /* Remainder in n0.  */
+	}
+
+      r0 = n0;
+      r1 = 0;
+
+#else /* UDIV_NEEDS_NORMALIZATION */
+
+      if (d0 > n1)
+	{
+	  /* 0q = nn / 0D */
+
+	  count_leading_zeros (bm, d0);
+
+	  if (bm != 0)
+	    {
+	      /* Normalize, i.e. make the most significant bit of the
+		 denominator set.  */
+
+	      d0 = d0 << bm;
+	      n1 = (n1 << bm) | (n0 >> (_FP_W_TYPE_SIZE - bm));
+	      n0 = n0 << bm;
+	    }
+
+	  udiv_qrnnd (q0, n0, n1, n0, d0);
+	  q1 = 0;
+
+	  /* Remainder in n0 >> bm.  */
+	}
+      else
+	{
+	  /* qq = NN / 0d */
+
+	  if (d0 == 0)
+	    d0 = 1 / d0;	/* Divide intentionally by zero.  */
+
+	  count_leading_zeros (bm, d0);
+
+	  if (bm == 0)
+	    {
+	      /* From (n1 >= d0) /\ (the most significant bit of d0 is set),
+		 conclude (the most significant bit of n1 is set) /\ (the
+		 leading quotient digit q1 = 1).
+
+		 This special case is necessary, not an optimization.
+		 (Shifts counts of SI_TYPE_SIZE are undefined.)  */
+
+	      n1 -= d0;
+	      q1 = 1;
+	    }
+	  else
+	    {
+	      _FP_W_TYPE n2;
+
+	      /* Normalize.  */
+
+	      b = _FP_W_TYPE_SIZE - bm;
+
+	      d0 = d0 << bm;
+	      n2 = n1 >> b;
+	      n1 = (n1 << bm) | (n0 >> b);
+	      n0 = n0 << bm;
+
+	      udiv_qrnnd (q1, n1, n2, n1, d0);
+	    }
+
+	  /* n1 != d0...  */
+
+	  udiv_qrnnd (q0, n0, n1, n0, d0);
+
+	  /* Remainder in n0 >> bm.  */
+	}
+
+      r0 = n0 >> bm;
+      r1 = 0;
+#endif /* UDIV_NEEDS_NORMALIZATION */
+    }
+  else
+    {
+      if (d1 > n1)
+	{
+	  /* 00 = nn / DD */
+
+	  q0 = 0;
+	  q1 = 0;
+
+	  /* Remainder in n1n0.  */
+	  r0 = n0;
+	  r1 = n1;
+	}
+      else
+	{
+	  /* 0q = NN / dd */
+
+	  count_leading_zeros (bm, d1);
+	  if (bm == 0)
+	    {
+	      /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
+		 conclude (the most significant bit of n1 is set) /\ (the
+		 quotient digit q0 = 0 or 1).
+
+		 This special case is necessary, not an optimization.  */
+
+	      /* The condition on the next line takes advantage of that
+		 n1 >= d1 (true due to program flow).  */
+	      if (n1 > d1 || n0 >= d0)
+		{
+		  q0 = 1;
+		  sub_ddmmss (n1, n0, n1, n0, d1, d0);
+		}
+	      else
+		q0 = 0;
+
+	      q1 = 0;
+
+	      r0 = n0;
+	      r1 = n1;
+	    }
+	  else
+	    {
+	      _FP_W_TYPE m1, m0, n2;
+
+	      /* Normalize.  */
+
+	      b = _FP_W_TYPE_SIZE - bm;
+
+	      d1 = (d1 << bm) | (d0 >> b);
+	      d0 = d0 << bm;
+	      n2 = n1 >> b;
+	      n1 = (n1 << bm) | (n0 >> b);
+	      n0 = n0 << bm;
+
+	      udiv_qrnnd (q0, n1, n2, n1, d1);
+	      umul_ppmm (m1, m0, q0, d0);
+
+	      if (m1 > n1 || (m1 == n1 && m0 > n0))
+		{
+		  q0--;
+		  sub_ddmmss (m1, m0, m1, m0, d1, d0);
+		}
+
+	      q1 = 0;
+
+	      /* Remainder in (n1n0 - m1m0) >> bm.  */
+	      sub_ddmmss (n1, n0, n1, n0, m1, m0);
+	      r0 = (n1 << b) | (n0 >> bm);
+	      r1 = n1 >> bm;
+	    }
+	}
+    }
+
+  q[0] = q0; q[1] = q1;
+  r[0] = r0, r[1] = r1;
+}
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
new file mode 100644
index 0000000000..503a6e2499
--- /dev/null
+++ b/arch/powerpc/mm/Makefile
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux ppc-specific parts of the memory manager.
+#
+
+ccflags-$(CONFIG_PPC64)	:= $(NO_MINIMAL_TOC)
+
+obj-y				:= fault.o mem.o pgtable.o maccess.o pageattr.o \
+				   init_$(BITS).o pgtable_$(BITS).o \
+				   pgtable-frag.o ioremap.o ioremap_$(BITS).o \
+				   init-common.o mmu_context.o drmem.o \
+				   cacheflush.o
+obj-$(CONFIG_PPC_MMU_NOHASH)	+= nohash/
+obj-$(CONFIG_PPC_BOOK3S_32)	+= book3s32/
+obj-$(CONFIG_PPC_BOOK3S_64)	+= book3s64/
+obj-$(CONFIG_NUMA) += numa.o
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
+obj-$(CONFIG_PPC_COPRO_BASE)	+= copro_fault.o
+obj-$(CONFIG_PTDUMP_CORE)	+= ptdump/
+obj-$(CONFIG_KASAN)		+= kasan/
diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile
new file mode 100644
index 0000000000..50dd8f6bdf
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+
+KASAN_SANITIZE_mmu.o := n
+
+ifdef CONFIG_KASAN
+CFLAGS_mmu.o  		+= -DDISABLE_BRANCH_PROFILING
+endif
+
+obj-y += mmu.o mmu_context.o
+obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o
+obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o
+obj-$(CONFIG_PPC_KUAP) += kuap.o
diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S
new file mode 100644
index 0000000000..8b804e1a9f
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -0,0 +1,598 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *  Adapted for Power Macintosh by Paul Mackerras.
+ *  Low-level exception handlers and MMU support
+ *  rewritten by Paul Mackerras.
+ *    Copyright (C) 1996 Paul Mackerras.
+ *
+ *  This file contains low-level assembler routines for managing
+ *  the PowerPC MMU hash table.  (PPC 8xx processors don't use a
+ *  hash table, so this file is not used on them.)
+ */
+
+#include <linux/export.h>
+#include <linux/pgtable.h>
+#include <linux/init.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/feature-fixups.h>
+#include <asm/code-patching-asm.h>
+
+#ifdef CONFIG_PTE_64BIT
+#define PTE_T_SIZE		8
+#define PTE_FLAGS_OFFSET	4	/* offset of PTE flags, in bytes */
+#else
+#define PTE_T_SIZE		4
+#define PTE_FLAGS_OFFSET	0
+#endif
+
+/*
+ * Load a PTE into the hash table, if possible.
+ * The address is in r4, and r3 contains an access flag:
+ * _PAGE_RW (0x400) if a write.
+ * r9 contains the SRR1 value, from which we use the MSR_PR bit.
+ * SPRG_THREAD contains the physical address of the current task's thread.
+ *
+ * Returns to the caller if the access is illegal or there is no
+ * mapping for the address.  Otherwise it places an appropriate PTE
+ * in the hash table and returns from the exception.
+ * Uses r0, r3 - r6, r8, r10, ctr, lr.
+ */
+	.text
+_GLOBAL(hash_page)
+#ifdef CONFIG_SMP
+	lis	r8, (mmu_hash_lock - PAGE_OFFSET)@h
+	ori	r8, r8, (mmu_hash_lock - PAGE_OFFSET)@l
+	lis	r0,0x0fff
+	b	10f
+11:	lwz	r6,0(r8)
+	cmpwi	0,r6,0
+	bne	11b
+10:	lwarx	r6,0,r8
+	cmpwi	0,r6,0
+	bne-	11b
+	stwcx.	r0,0,r8
+	bne-	10b
+	isync
+#endif
+	/* Get PTE (linux-style) and check access */
+	lis	r0, TASK_SIZE@h		/* check if kernel address */
+	cmplw	0,r4,r0
+	mfspr	r8,SPRN_SPRG_THREAD	/* current task's THREAD (phys) */
+	ori	r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */
+	lwz	r5,PGDIR(r8)		/* virt page-table root */
+	blt+	112f			/* assume user more likely */
+	lis	r5,swapper_pg_dir@ha	/* if kernel address, use */
+	addi	r5,r5,swapper_pg_dir@l	/* kernel page table */
+	rlwimi	r3,r9,32-12,29,29	/* MSR_PR -> _PAGE_USER */
+112:	tophys(r5, r5)
+#ifndef CONFIG_PTE_64BIT
+	rlwimi	r5,r4,12,20,29		/* insert top 10 bits of address */
+	lwz	r8,0(r5)		/* get pmd entry */
+	rlwinm.	r8,r8,0,0,19		/* extract address of pte page */
+#else
+	rlwinm	r8,r4,13,19,29		/* Compute pgdir/pmd offset */
+	lwzx	r8,r8,r5		/* Get L1 entry */
+	rlwinm.	r8,r8,0,0,20		/* extract pt base address */
+#endif
+#ifdef CONFIG_SMP
+	beq-	.Lhash_page_out		/* return if no mapping */
+#else
+	/* XXX it seems like the 601 will give a machine fault on the
+	   rfi if its alignment is wrong (bottom 4 bits of address are
+	   8 or 0xc) and we have had a not-taken conditional branch
+	   to the address following the rfi. */
+	beqlr-
+#endif
+#ifndef CONFIG_PTE_64BIT
+	rlwimi	r8,r4,22,20,29		/* insert next 10 bits of address */
+#else
+	rlwimi	r8,r4,23,20,28		/* compute pte address */
+	/*
+	 * If PTE_64BIT is set, the low word is the flags word; use that
+	 * word for locking since it contains all the interesting bits.
+	 */
+	addi	r8,r8,PTE_FLAGS_OFFSET
+#endif
+
+	/*
+	 * Update the linux PTE atomically.  We do the lwarx up-front
+	 * because almost always, there won't be a permission violation
+	 * and there won't already be an HPTE, and thus we will have
+	 * to update the PTE to set _PAGE_HASHPTE.  -- paulus.
+	 */
+.Lretry:
+	lwarx	r6,0,r8			/* get linux-style pte, flag word */
+#ifdef CONFIG_PPC_KUAP
+	mfsrin	r5,r4
+	rlwinm	r0,r9,28,_PAGE_RW	/* MSR[PR] => _PAGE_RW */
+	rlwinm	r5,r5,12,_PAGE_RW	/* Ks => _PAGE_RW */
+	andc	r5,r5,r0		/* Ks & ~MSR[PR] */
+	andc	r5,r6,r5		/* Clear _PAGE_RW when Ks = 1 && MSR[PR] = 0 */
+	andc.	r5,r3,r5		/* check access & ~permission */
+#else
+	andc.	r5,r3,r6		/* check access & ~permission */
+#endif
+	rlwinm	r0,r3,32-3,24,24	/* _PAGE_RW access -> _PAGE_DIRTY */
+	ori	r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE
+#ifdef CONFIG_SMP
+	bne-	.Lhash_page_out		/* return if access not permitted */
+#else
+	bnelr-
+#endif
+	or	r5,r0,r6		/* set accessed/dirty bits */
+#ifdef CONFIG_PTE_64BIT
+#ifdef CONFIG_SMP
+	subf	r10,r6,r8		/* create false data dependency */
+	subi	r10,r10,PTE_FLAGS_OFFSET
+	lwzx	r10,r6,r10		/* Get upper PTE word */
+#else
+	lwz	r10,-PTE_FLAGS_OFFSET(r8)
+#endif /* CONFIG_SMP */
+#endif /* CONFIG_PTE_64BIT */
+	stwcx.	r5,0,r8			/* attempt to update PTE */
+	bne-	.Lretry			/* retry if someone got there first */
+
+	mfsrin	r3,r4			/* get segment reg for segment */
+	bl	create_hpte		/* add the hash table entry */
+
+#ifdef CONFIG_SMP
+	eieio
+	lis	r8, (mmu_hash_lock - PAGE_OFFSET)@ha
+	li	r0,0
+	stw	r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8)
+#endif
+	b	fast_hash_page_return
+
+#ifdef CONFIG_SMP
+.Lhash_page_out:
+	eieio
+	lis	r8, (mmu_hash_lock - PAGE_OFFSET)@ha
+	li	r0,0
+	stw	r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8)
+	blr
+#endif /* CONFIG_SMP */
+_ASM_NOKPROBE_SYMBOL(hash_page)
+
+/*
+ * Add an entry for a particular page to the hash table.
+ *
+ * add_hash_page(unsigned context, unsigned long va, unsigned long pmdval)
+ *
+ * We assume any necessary modifications to the pte (e.g. setting
+ * the accessed bit) have already been done and that there is actually
+ * a hash table in use (i.e. we're not on a 603).
+ */
+_GLOBAL(add_hash_page)
+	mflr	r0
+	stw	r0,4(r1)
+
+#ifdef CONFIG_SMP
+	lwz	r8,TASK_CPU(r2)		/* to go in mmu_hash_lock */
+	oris	r8,r8,12
+#endif /* CONFIG_SMP */
+
+	/*
+	 * We disable interrupts here, even on UP, because we don't
+	 * want to race with hash_page, and because we want the
+	 * _PAGE_HASHPTE bit to be a reliable indication of whether
+	 * the HPTE exists (or at least whether one did once).
+	 * We also turn off the MMU for data accesses so that we
+	 * we can't take a hash table miss (assuming the code is
+	 * covered by a BAT).  -- paulus
+	 */
+	mfmsr	r9
+	rlwinm	r0,r9,0,17,15		/* clear bit 16 (MSR_EE) */
+	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
+	mtmsr	r0
+	isync
+
+#ifdef CONFIG_SMP
+	lis	r6, (mmu_hash_lock - PAGE_OFFSET)@ha
+	addi	r6, r6, (mmu_hash_lock - PAGE_OFFSET)@l
+10:	lwarx	r0,0,r6			/* take the mmu_hash_lock */
+	cmpwi	0,r0,0
+	bne-	11f
+	stwcx.	r8,0,r6
+	beq+	12f
+11:	lwz	r0,0(r6)
+	cmpwi	0,r0,0
+	beq	10b
+	b	11b
+12:	isync
+#endif
+
+	/*
+	 * Fetch the linux pte and test and set _PAGE_HASHPTE atomically.
+	 * If _PAGE_HASHPTE was already set, we don't replace the existing
+	 * HPTE, so we just unlock and return.
+	 */
+	mr	r8,r5
+#ifndef CONFIG_PTE_64BIT
+	rlwimi	r8,r4,22,20,29
+#else
+	rlwimi	r8,r4,23,20,28
+	addi	r8,r8,PTE_FLAGS_OFFSET
+#endif
+1:	lwarx	r6,0,r8
+	andi.	r0,r6,_PAGE_HASHPTE
+	bne	9f			/* if HASHPTE already set, done */
+#ifdef CONFIG_PTE_64BIT
+#ifdef CONFIG_SMP
+	subf	r10,r6,r8		/* create false data dependency */
+	subi	r10,r10,PTE_FLAGS_OFFSET
+	lwzx	r10,r6,r10		/* Get upper PTE word */
+#else
+	lwz	r10,-PTE_FLAGS_OFFSET(r8)
+#endif /* CONFIG_SMP */
+#endif /* CONFIG_PTE_64BIT */
+	ori	r5,r6,_PAGE_HASHPTE
+	stwcx.	r5,0,r8
+	bne-	1b
+
+	/* Convert context and va to VSID */
+	mulli	r3,r3,897*16		/* multiply context by context skew */
+	rlwinm	r0,r4,4,28,31		/* get ESID (top 4 bits of va) */
+	mulli	r0,r0,0x111		/* multiply by ESID skew */
+	add	r3,r3,r0		/* note create_hpte trims to 24 bits */
+
+	bl	create_hpte
+
+9:
+#ifdef CONFIG_SMP
+	lis	r6, (mmu_hash_lock - PAGE_OFFSET)@ha
+	addi	r6, r6, (mmu_hash_lock - PAGE_OFFSET)@l
+	eieio
+	li	r0,0
+	stw	r0,0(r6)		/* clear mmu_hash_lock */
+#endif
+
+	/* reenable interrupts and DR */
+	mtmsr	r9
+	isync
+
+	lwz	r0,4(r1)
+	mtlr	r0
+	blr
+_ASM_NOKPROBE_SYMBOL(add_hash_page)
+
+/*
+ * This routine adds a hardware PTE to the hash table.
+ * It is designed to be called with the MMU either on or off.
+ * r3 contains the VSID, r4 contains the virtual address,
+ * r5 contains the linux PTE, r6 contains the old value of the
+ * linux PTE (before setting _PAGE_HASHPTE). r10 contains the
+ * upper half of the PTE if CONFIG_PTE_64BIT.
+ * On SMP, the caller should have the mmu_hash_lock held.
+ * We assume that the caller has (or will) set the _PAGE_HASHPTE
+ * bit in the linux PTE in memory.  The value passed in r6 should
+ * be the old linux PTE value; if it doesn't have _PAGE_HASHPTE set
+ * this routine will skip the search for an existing HPTE.
+ * This procedure modifies r0, r3 - r6, r8, cr0.
+ *  -- paulus.
+ *
+ * For speed, 4 of the instructions get patched once the size and
+ * physical address of the hash table are known.  These definitions
+ * of Hash_base and Hash_bits below are for the early hash table.
+ */
+Hash_base = early_hash
+Hash_bits = 12				/* e.g. 256kB hash table */
+Hash_msk = (((1 << Hash_bits) - 1) * 64)
+
+/* defines for the PTE format for 32-bit PPCs */
+#define HPTE_SIZE	8
+#define PTEG_SIZE	64
+#define LG_PTEG_SIZE	6
+#define LDPTEu		lwzu
+#define LDPTE		lwz
+#define STPTE		stw
+#define CMPPTE		cmpw
+#define PTE_H		0x40
+#define PTE_V		0x80000000
+#define TST_V(r)	rlwinm. r,r,0,0,0
+#define SET_V(r)	oris r,r,PTE_V@h
+#define CLR_V(r,t)	rlwinm r,r,0,1,31
+
+#define HASH_LEFT	31-(LG_PTEG_SIZE+Hash_bits-1)
+#define HASH_RIGHT	31-LG_PTEG_SIZE
+
+__REF
+_GLOBAL(create_hpte)
+	/* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */
+	rlwinm	r8,r5,32-9,30,30	/* _PAGE_RW -> PP msb */
+	rlwinm	r0,r5,32-6,30,30	/* _PAGE_DIRTY -> PP msb */
+	and	r8,r8,r0		/* writable if _RW & _DIRTY */
+	rlwimi	r5,r5,32-1,30,30	/* _PAGE_USER -> PP msb */
+	rlwimi	r5,r5,32-2,31,31	/* _PAGE_USER -> PP lsb */
+	ori	r8,r8,0xe04		/* clear out reserved bits */
+	andc	r8,r5,r8		/* PP = user? (rw&dirty? 1: 3): 0 */
+BEGIN_FTR_SECTION
+	rlwinm	r8,r8,0,~_PAGE_COHERENT	/* clear M (coherence not required) */
+END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
+#ifdef CONFIG_PTE_64BIT
+	/* Put the XPN bits into the PTE */
+	rlwimi	r8,r10,8,20,22
+	rlwimi	r8,r10,2,29,29
+#endif
+
+	/* Construct the high word of the PPC-style PTE (r5) */
+	rlwinm	r5,r3,7,1,24		/* put VSID in 0x7fffff80 bits */
+	rlwimi	r5,r4,10,26,31		/* put in API (abbrev page index) */
+	SET_V(r5)			/* set V (valid) bit */
+
+	patch_site	0f, patch__hash_page_A0
+	patch_site	1f, patch__hash_page_A1
+	patch_site	2f, patch__hash_page_A2
+	/* Get the address of the primary PTE group in the hash table (r3) */
+0:	lis	r0, (Hash_base - PAGE_OFFSET)@h	/* base address of hash table */
+1:	rlwimi	r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT    /* VSID -> hash */
+2:	rlwinm	r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
+	xor	r3,r3,r0		/* make primary hash */
+	li	r0,8			/* PTEs/group */
+
+	/*
+	 * Test the _PAGE_HASHPTE bit in the old linux PTE, and skip the search
+	 * if it is clear, meaning that the HPTE isn't there already...
+	 */
+	andi.	r6,r6,_PAGE_HASHPTE
+	beq+	10f			/* no PTE: go look for an empty slot */
+	tlbie	r4
+
+	/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
+	mtctr	r0
+	addi	r4,r3,-HPTE_SIZE
+1:	LDPTEu	r6,HPTE_SIZE(r4)	/* get next PTE */
+	CMPPTE	0,r6,r5
+	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
+	beq+	.Lfound_slot
+
+	patch_site	0f, patch__hash_page_B
+	/* Search the secondary PTEG for a matching PTE */
+	ori	r5,r5,PTE_H		/* set H (secondary hash) bit */
+0:	xoris	r4,r3,Hash_msk>>16	/* compute secondary hash */
+	xori	r4,r4,(-PTEG_SIZE & 0xffff)
+	addi	r4,r4,-HPTE_SIZE
+	mtctr	r0
+2:	LDPTEu	r6,HPTE_SIZE(r4)
+	CMPPTE	0,r6,r5
+	bdnzf	2,2b
+	beq+	.Lfound_slot
+	xori	r5,r5,PTE_H		/* clear H bit again */
+
+	/* Search the primary PTEG for an empty slot */
+10:	mtctr	r0
+	addi	r4,r3,-HPTE_SIZE	/* search primary PTEG */
+1:	LDPTEu	r6,HPTE_SIZE(r4)	/* get next PTE */
+	TST_V(r6)			/* test valid bit */
+	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
+	beq+	.Lfound_empty
+
+	patch_site	0f, patch__hash_page_C
+	/* Search the secondary PTEG for an empty slot */
+	ori	r5,r5,PTE_H		/* set H (secondary hash) bit */
+0:	xoris	r4,r3,Hash_msk>>16	/* compute secondary hash */
+	xori	r4,r4,(-PTEG_SIZE & 0xffff)
+	addi	r4,r4,-HPTE_SIZE
+	mtctr	r0
+2:	LDPTEu	r6,HPTE_SIZE(r4)
+	TST_V(r6)
+	bdnzf	2,2b
+	beq+	.Lfound_empty
+	xori	r5,r5,PTE_H		/* clear H bit again */
+
+	/*
+	 * Choose an arbitrary slot in the primary PTEG to overwrite.
+	 * Since both the primary and secondary PTEGs are full, and we
+	 * have no information that the PTEs in the primary PTEG are
+	 * more important or useful than those in the secondary PTEG,
+	 * and we know there is a definite (although small) speed
+	 * advantage to putting the PTE in the primary PTEG, we always
+	 * put the PTE in the primary PTEG.
+	 */
+
+	lis	r4, (next_slot - PAGE_OFFSET)@ha	/* get next evict slot */
+	lwz	r6, (next_slot - PAGE_OFFSET)@l(r4)
+	addi	r6,r6,HPTE_SIZE			/* search for candidate */
+	andi.	r6,r6,7*HPTE_SIZE
+	stw	r6,next_slot@l(r4)
+	add	r4,r3,r6
+
+#ifndef CONFIG_SMP
+	/* Store PTE in PTEG */
+.Lfound_empty:
+	STPTE	r5,0(r4)
+.Lfound_slot:
+	STPTE	r8,HPTE_SIZE/2(r4)
+
+#else /* CONFIG_SMP */
+/*
+ * Between the tlbie above and updating the hash table entry below,
+ * another CPU could read the hash table entry and put it in its TLB.
+ * There are 3 cases:
+ * 1. using an empty slot
+ * 2. updating an earlier entry to change permissions (i.e. enable write)
+ * 3. taking over the PTE for an unrelated address
+ *
+ * In each case it doesn't really matter if the other CPUs have the old
+ * PTE in their TLB.  So we don't need to bother with another tlbie here,
+ * which is convenient as we've overwritten the register that had the
+ * address. :-)  The tlbie above is mainly to make sure that this CPU comes
+ * and gets the new PTE from the hash table.
+ *
+ * We do however have to make sure that the PTE is never in an invalid
+ * state with the V bit set.
+ */
+.Lfound_empty:
+.Lfound_slot:
+	CLR_V(r5,r0)		/* clear V (valid) bit in PTE */
+	STPTE	r5,0(r4)
+	sync
+	TLBSYNC
+	STPTE	r8,HPTE_SIZE/2(r4) /* put in correct RPN, WIMG, PP bits */
+	sync
+	SET_V(r5)
+	STPTE	r5,0(r4)	/* finally set V bit in PTE */
+#endif /* CONFIG_SMP */
+
+	sync		/* make sure pte updates get to memory */
+	blr
+	.previous
+_ASM_NOKPROBE_SYMBOL(create_hpte)
+
+	.section .bss
+	.align	2
+next_slot:
+	.space	4
+	.previous
+
+/*
+ * Flush the entry for a particular page from the hash table.
+ *
+ * flush_hash_pages(unsigned context, unsigned long va, unsigned long pmdval,
+ *		    int count)
+ *
+ * We assume that there is a hash table in use (Hash != 0).
+ */
+__REF
+_GLOBAL(flush_hash_pages)
+	/*
+	 * We disable interrupts here, even on UP, because we want
+	 * the _PAGE_HASHPTE bit to be a reliable indication of
+	 * whether the HPTE exists (or at least whether one did once).
+	 * We also turn off the MMU for data accesses so that we
+	 * we can't take a hash table miss (assuming the code is
+	 * covered by a BAT).  -- paulus
+	 */
+	mfmsr	r10
+	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
+	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
+	mtmsr	r0
+	isync
+
+	/* First find a PTE in the range that has _PAGE_HASHPTE set */
+#ifndef CONFIG_PTE_64BIT
+	rlwimi	r5,r4,22,20,29
+#else
+	rlwimi	r5,r4,23,20,28
+	addi	r5,r5,PTE_FLAGS_OFFSET
+#endif
+1:	lwz	r0,0(r5)
+	cmpwi	cr1,r6,1
+	andi.	r0,r0,_PAGE_HASHPTE
+	bne	2f
+	ble	cr1,19f
+	addi	r4,r4,0x1000
+	addi	r5,r5,PTE_T_SIZE
+	addi	r6,r6,-1
+	b	1b
+
+	/* Convert context and va to VSID */
+2:	mulli	r3,r3,897*16		/* multiply context by context skew */
+	rlwinm	r0,r4,4,28,31		/* get ESID (top 4 bits of va) */
+	mulli	r0,r0,0x111		/* multiply by ESID skew */
+	add	r3,r3,r0		/* note code below trims to 24 bits */
+
+	/* Construct the high word of the PPC-style PTE (r11) */
+	rlwinm	r11,r3,7,1,24		/* put VSID in 0x7fffff80 bits */
+	rlwimi	r11,r4,10,26,31		/* put in API (abbrev page index) */
+	SET_V(r11)			/* set V (valid) bit */
+
+#ifdef CONFIG_SMP
+	lis	r9, (mmu_hash_lock - PAGE_OFFSET)@ha
+	addi	r9, r9, (mmu_hash_lock - PAGE_OFFSET)@l
+	tophys	(r8, r2)
+	lwz	r8, TASK_CPU(r8)
+	oris	r8,r8,9
+10:	lwarx	r0,0,r9
+	cmpwi	0,r0,0
+	bne-	11f
+	stwcx.	r8,0,r9
+	beq+	12f
+11:	lwz	r0,0(r9)
+	cmpwi	0,r0,0
+	beq	10b
+	b	11b
+12:	isync
+#endif
+
+	/*
+	 * Check the _PAGE_HASHPTE bit in the linux PTE.  If it is
+	 * already clear, we're done (for this pte).  If not,
+	 * clear it (atomically) and proceed.  -- paulus.
+	 */
+33:	lwarx	r8,0,r5			/* fetch the pte flags word */
+	andi.	r0,r8,_PAGE_HASHPTE
+	beq	8f			/* done if HASHPTE is already clear */
+	rlwinm	r8,r8,0,31,29		/* clear HASHPTE bit */
+	stwcx.	r8,0,r5			/* update the pte */
+	bne-	33b
+
+	patch_site	0f, patch__flush_hash_A0
+	patch_site	1f, patch__flush_hash_A1
+	patch_site	2f, patch__flush_hash_A2
+	/* Get the address of the primary PTE group in the hash table (r3) */
+0:	lis	r8, (Hash_base - PAGE_OFFSET)@h	/* base address of hash table */
+1:	rlwimi	r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT    /* VSID -> hash */
+2:	rlwinm	r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
+	xor	r8,r0,r8		/* make primary hash */
+
+	/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
+	li	r0,8			/* PTEs/group */
+	mtctr	r0
+	addi	r12,r8,-HPTE_SIZE
+1:	LDPTEu	r0,HPTE_SIZE(r12)	/* get next PTE */
+	CMPPTE	0,r0,r11
+	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
+	beq+	3f
+
+	patch_site	0f, patch__flush_hash_B
+	/* Search the secondary PTEG for a matching PTE */
+	ori	r11,r11,PTE_H		/* set H (secondary hash) bit */
+	li	r0,8			/* PTEs/group */
+0:	xoris	r12,r8,Hash_msk>>16	/* compute secondary hash */
+	xori	r12,r12,(-PTEG_SIZE & 0xffff)
+	addi	r12,r12,-HPTE_SIZE
+	mtctr	r0
+2:	LDPTEu	r0,HPTE_SIZE(r12)
+	CMPPTE	0,r0,r11
+	bdnzf	2,2b
+	xori	r11,r11,PTE_H		/* clear H again */
+	bne-	4f			/* should rarely fail to find it */
+
+3:	li	r0,0
+	STPTE	r0,0(r12)		/* invalidate entry */
+4:	sync
+	tlbie	r4			/* in hw tlb too */
+	sync
+
+8:	ble	cr1,9f			/* if all ptes checked */
+81:	addi	r6,r6,-1
+	addi	r5,r5,PTE_T_SIZE
+	addi	r4,r4,0x1000
+	lwz	r0,0(r5)		/* check next pte */
+	cmpwi	cr1,r6,1
+	andi.	r0,r0,_PAGE_HASHPTE
+	bne	33b
+	bgt	cr1,81b
+
+9:
+#ifdef CONFIG_SMP
+	TLBSYNC
+	li	r0,0
+	stw	r0,0(r9)		/* clear mmu_hash_lock */
+#endif
+
+19:	mtmsr	r10
+	isync
+	blr
+	.previous
+EXPORT_SYMBOL(flush_hash_pages)
+_ASM_NOKPROBE_SYMBOL(flush_hash_pages)
diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c
new file mode 100644
index 0000000000..3a8815555a
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/kuap.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <asm/kup.h>
+#include <asm/smp.h>
+
+void setup_kuap(bool disabled)
+{
+	if (!disabled) {
+		update_user_segments(mfsr(0) | SR_KS);
+		isync();        /* Context sync required after mtsr() */
+		init_mm.context.sr0 |= SR_KS;
+		current->thread.sr0 |= SR_KS;
+	}
+
+	if (smp_processor_id() != boot_cpuid)
+		return;
+
+	if (disabled)
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_KUAP;
+	else
+		pr_info("Activating Kernel Userspace Access Protection\n");
+}
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
new file mode 100644
index 0000000000..850783cfa9
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -0,0 +1,444 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains the routines for handling the MMU on those
+ * PowerPC implementations where the MMU substantially follows the
+ * architecture specification.  This includes the 6xx, 7xx, 7xxx,
+ * and 8260 implementations but excludes the 8xx and 4xx.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/memblock.h>
+
+#include <asm/mmu.h>
+#include <asm/machdep.h>
+#include <asm/code-patching.h>
+#include <asm/sections.h>
+
+#include <mm/mmu_decl.h>
+
+u8 __initdata early_hash[SZ_256K] __aligned(SZ_256K) = {0};
+
+static struct hash_pte __initdata *Hash = (struct hash_pte *)early_hash;
+static unsigned long __initdata Hash_size, Hash_mask;
+static unsigned int __initdata hash_mb, hash_mb2;
+unsigned long __initdata _SDR1;
+
+struct ppc_bat BATS[8][2];	/* 8 pairs of IBAT, DBAT */
+
+static struct batrange {	/* stores address ranges mapped by BATs */
+	unsigned long start;
+	unsigned long limit;
+	phys_addr_t phys;
+} bat_addrs[8];
+
+#ifdef CONFIG_SMP
+unsigned long mmu_hash_lock;
+#endif
+
+/*
+ * Return PA for this VA if it is mapped by a BAT, or 0
+ */
+phys_addr_t v_block_mapped(unsigned long va)
+{
+	int b;
+	for (b = 0; b < ARRAY_SIZE(bat_addrs); ++b)
+		if (va >= bat_addrs[b].start && va < bat_addrs[b].limit)
+			return bat_addrs[b].phys + (va - bat_addrs[b].start);
+	return 0;
+}
+
+/*
+ * Return VA for a given PA or 0 if not mapped
+ */
+unsigned long p_block_mapped(phys_addr_t pa)
+{
+	int b;
+	for (b = 0; b < ARRAY_SIZE(bat_addrs); ++b)
+		if (pa >= bat_addrs[b].phys
+	    	    && pa < (bat_addrs[b].limit-bat_addrs[b].start)
+		              +bat_addrs[b].phys)
+			return bat_addrs[b].start+(pa-bat_addrs[b].phys);
+	return 0;
+}
+
+int __init find_free_bat(void)
+{
+	int b;
+	int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+
+	for (b = 0; b < n; b++) {
+		struct ppc_bat *bat = BATS[b];
+
+		if (!(bat[1].batu & 3))
+			return b;
+	}
+	return -1;
+}
+
+/*
+ * This function calculates the size of the larger block usable to map the
+ * beginning of an area based on the start address and size of that area:
+ * - max block size is 256 on 6xx.
+ * - base address must be aligned to the block size. So the maximum block size
+ *   is identified by the lowest bit set to 1 in the base address (for instance
+ *   if base is 0x16000000, max size is 0x02000000).
+ * - block size has to be a power of two. This is calculated by finding the
+ *   highest bit set to 1.
+ */
+unsigned int bat_block_size(unsigned long base, unsigned long top)
+{
+	unsigned int max_size = SZ_256M;
+	unsigned int base_shift = (ffs(base) - 1) & 31;
+	unsigned int block_shift = (fls(top - base) - 1) & 31;
+
+	return min3(max_size, 1U << base_shift, 1U << block_shift);
+}
+
+/*
+ * Set up one of the IBAT (block address translation) register pairs.
+ * The parameters are not checked; in particular size must be a power
+ * of 2 between 128k and 256M.
+ */
+static void setibat(int index, unsigned long virt, phys_addr_t phys,
+		    unsigned int size, pgprot_t prot)
+{
+	unsigned int bl = (size >> 17) - 1;
+	int wimgxpp;
+	struct ppc_bat *bat = BATS[index];
+	unsigned long flags = pgprot_val(prot);
+
+	if (!cpu_has_feature(CPU_FTR_NEED_COHERENT))
+		flags &= ~_PAGE_COHERENT;
+
+	wimgxpp = (flags & _PAGE_COHERENT) | (_PAGE_EXEC ? BPP_RX : BPP_XX);
+	bat[0].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
+	bat[0].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
+	if (flags & _PAGE_USER)
+		bat[0].batu |= 1;	/* Vp = 1 */
+}
+
+static void clearibat(int index)
+{
+	struct ppc_bat *bat = BATS[index];
+
+	bat[0].batu = 0;
+	bat[0].batl = 0;
+}
+
+static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long top)
+{
+	int idx;
+
+	while ((idx = find_free_bat()) != -1 && base != top) {
+		unsigned int size = bat_block_size(base, top);
+
+		if (size < 128 << 10)
+			break;
+		setbat(idx, PAGE_OFFSET + base, base, size, PAGE_KERNEL_X);
+		base += size;
+	}
+
+	return base;
+}
+
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
+{
+	unsigned long done;
+	unsigned long border = (unsigned long)__srwx_boundary - PAGE_OFFSET;
+	unsigned long size;
+
+	size = roundup_pow_of_two((unsigned long)_einittext - PAGE_OFFSET);
+	setibat(0, PAGE_OFFSET, 0, size, PAGE_KERNEL_X);
+
+	if (debug_pagealloc_enabled_or_kfence()) {
+		pr_debug_once("Read-Write memory mapped without BATs\n");
+		if (base >= border)
+			return base;
+		if (top >= border)
+			top = border;
+	}
+
+	if (!strict_kernel_rwx_enabled() || base >= border || top <= border)
+		return __mmu_mapin_ram(base, top);
+
+	done = __mmu_mapin_ram(base, border);
+	if (done != border)
+		return done;
+
+	return __mmu_mapin_ram(border, top);
+}
+
+static bool is_module_segment(unsigned long addr)
+{
+	if (!IS_ENABLED(CONFIG_MODULES))
+		return false;
+	if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M))
+		return false;
+	if (addr > ALIGN(MODULES_END, SZ_256M) - 1)
+		return false;
+	return true;
+}
+
+void mmu_mark_initmem_nx(void)
+{
+	int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+	int i;
+	unsigned long base = (unsigned long)_stext - PAGE_OFFSET;
+	unsigned long top = ALIGN((unsigned long)_etext - PAGE_OFFSET, SZ_128K);
+	unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
+	unsigned long size;
+
+	for (i = 0; i < nb - 1 && base < top;) {
+		size = bat_block_size(base, top);
+		setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT);
+		base += size;
+	}
+	if (base < top) {
+		size = bat_block_size(base, top);
+		if ((top - base) > size) {
+			size <<= 1;
+			if (strict_kernel_rwx_enabled() && base + size > border)
+				pr_warn("Some RW data is getting mapped X. "
+					"Adjust CONFIG_DATA_SHIFT to avoid that.\n");
+		}
+		setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT);
+		base += size;
+	}
+	for (; i < nb; i++)
+		clearibat(i);
+
+	update_bats();
+
+	for (i = TASK_SIZE >> 28; i < 16; i++) {
+		/* Do not set NX on VM space for modules */
+		if (is_module_segment(i << 28))
+			continue;
+
+		mtsr(mfsr(i << 28) | 0x10000000, i << 28);
+	}
+}
+
+void mmu_mark_rodata_ro(void)
+{
+	int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+	int i;
+
+	for (i = 0; i < nb; i++) {
+		struct ppc_bat *bat = BATS[i];
+
+		if (bat_addrs[i].start < (unsigned long)__end_rodata)
+			bat[1].batl = (bat[1].batl & ~BPP_RW) | BPP_RX;
+	}
+
+	update_bats();
+}
+
+/*
+ * Set up one of the D BAT (block address translation) register pairs.
+ * The parameters are not checked; in particular size must be a power
+ * of 2 between 128k and 256M.
+ */
+void __init setbat(int index, unsigned long virt, phys_addr_t phys,
+		   unsigned int size, pgprot_t prot)
+{
+	unsigned int bl;
+	int wimgxpp;
+	struct ppc_bat *bat;
+	unsigned long flags = pgprot_val(prot);
+
+	if (index == -1)
+		index = find_free_bat();
+	if (index == -1) {
+		pr_err("%s: no BAT available for mapping 0x%llx\n", __func__,
+		       (unsigned long long)phys);
+		return;
+	}
+	bat = BATS[index];
+
+	if ((flags & _PAGE_NO_CACHE) ||
+	    (cpu_has_feature(CPU_FTR_NEED_COHERENT) == 0))
+		flags &= ~_PAGE_COHERENT;
+
+	bl = (size >> 17) - 1;
+	/* Do DBAT first */
+	wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
+			   | _PAGE_COHERENT | _PAGE_GUARDED);
+	wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
+	bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
+	bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
+	if (flags & _PAGE_USER)
+		bat[1].batu |= 1; 	/* Vp = 1 */
+	if (flags & _PAGE_GUARDED) {
+		/* G bit must be zero in IBATs */
+		flags &= ~_PAGE_EXEC;
+	}
+
+	bat_addrs[index].start = virt;
+	bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1;
+	bat_addrs[index].phys = phys;
+}
+
+/*
+ * Preload a translation in the hash table
+ */
+static void hash_preload(struct mm_struct *mm, unsigned long ea)
+{
+	pmd_t *pmd;
+
+	if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+		return;
+	pmd = pmd_off(mm, ea);
+	if (!pmd_none(*pmd))
+		add_hash_page(mm->context.id, ea, pmd_val(*pmd));
+}
+
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux PTE.
+ *
+ * This must always be called with the pte lock held.
+ */
+void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+		      pte_t *ptep)
+{
+	/*
+	 * We don't need to worry about _PAGE_PRESENT here because we are
+	 * called with either mm->page_table_lock held or ptl lock held
+	 */
+
+	/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+	if (!pte_young(*ptep) || address >= TASK_SIZE)
+		return;
+
+	/* We have to test for regs NULL since init will get here first thing at boot */
+	if (!current->thread.regs)
+		return;
+
+	/* We also avoid filling the hash if not coming from a fault */
+	if (TRAP(current->thread.regs) != 0x300 && TRAP(current->thread.regs) != 0x400)
+		return;
+
+	hash_preload(vma->vm_mm, address);
+}
+
+/*
+ * Initialize the hash table and patch the instructions in hashtable.S.
+ */
+void __init MMU_init_hw(void)
+{
+	unsigned int n_hpteg, lg_n_hpteg;
+
+	if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+		return;
+
+	if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105);
+
+#define LG_HPTEG_SIZE	6		/* 64 bytes per HPTEG */
+#define SDR1_LOW_BITS	((n_hpteg - 1) >> 10)
+#define MIN_N_HPTEG	1024		/* min 64kB hash table */
+
+	/*
+	 * Allow 1 HPTE (1/8 HPTEG) for each page of memory.
+	 * This is less than the recommended amount, but then
+	 * Linux ain't AIX.
+	 */
+	n_hpteg = total_memory / (PAGE_SIZE * 8);
+	if (n_hpteg < MIN_N_HPTEG)
+		n_hpteg = MIN_N_HPTEG;
+	lg_n_hpteg = __ilog2(n_hpteg);
+	if (n_hpteg & (n_hpteg - 1)) {
+		++lg_n_hpteg;		/* round up if not power of 2 */
+		n_hpteg = 1 << lg_n_hpteg;
+	}
+	Hash_size = n_hpteg << LG_HPTEG_SIZE;
+
+	/*
+	 * Find some memory for the hash table.
+	 */
+	if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
+	Hash = memblock_alloc(Hash_size, Hash_size);
+	if (!Hash)
+		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+		      __func__, Hash_size, Hash_size);
+	_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
+
+	pr_info("Total memory = %lldMB; using %ldkB for hash table\n",
+		(unsigned long long)(total_memory >> 20), Hash_size >> 10);
+
+
+	Hash_mask = n_hpteg - 1;
+	hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
+	if (lg_n_hpteg > 16)
+		hash_mb2 = 16 - LG_HPTEG_SIZE;
+}
+
+void __init MMU_init_hw_patch(void)
+{
+	unsigned int hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
+	unsigned int hash = (unsigned int)Hash - PAGE_OFFSET;
+
+	if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+		return;
+
+	if (ppc_md.progress)
+		ppc_md.progress("hash:patch", 0x345);
+	if (ppc_md.progress)
+		ppc_md.progress("hash:done", 0x205);
+
+	/* WARNING: Make sure nothing can trigger a KASAN check past this point */
+
+	/*
+	 * Patch up the instructions in hashtable.S:create_hpte
+	 */
+	modify_instruction_site(&patch__hash_page_A0, 0xffff, hash >> 16);
+	modify_instruction_site(&patch__hash_page_A1, 0x7c0, hash_mb << 6);
+	modify_instruction_site(&patch__hash_page_A2, 0x7c0, hash_mb2 << 6);
+	modify_instruction_site(&patch__hash_page_B, 0xffff, hmask);
+	modify_instruction_site(&patch__hash_page_C, 0xffff, hmask);
+
+	/*
+	 * Patch up the instructions in hashtable.S:flush_hash_page
+	 */
+	modify_instruction_site(&patch__flush_hash_A0, 0xffff, hash >> 16);
+	modify_instruction_site(&patch__flush_hash_A1, 0x7c0, hash_mb << 6);
+	modify_instruction_site(&patch__flush_hash_A2, 0x7c0, hash_mb2 << 6);
+	modify_instruction_site(&patch__flush_hash_B, 0xffff, hmask);
+}
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	/* We don't currently support the first MEMBLOCK not mapping 0
+	 * physical on those processors
+	 */
+	BUG_ON(first_memblock_base != 0);
+
+	memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_256M));
+}
+
+void __init print_system_hash_info(void)
+{
+	pr_info("Hash_size         = 0x%lx\n", Hash_size);
+	if (Hash_mask)
+		pr_info("Hash_mask         = 0x%lx\n", Hash_mask);
+}
+
+void __init early_init_mmu(void)
+{
+}
diff --git a/arch/powerpc/mm/book3s32/mmu_context.c b/arch/powerpc/mm/book3s32/mmu_context.c
new file mode 100644
index 0000000000..1922f9a6b0
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/mmu_context.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains the routines for handling the MMU on those
+ * PowerPC implementations where the MMU substantially follows the
+ * architecture specification.  This includes the 6xx, 7xx, 7xxx,
+ * and 8260 implementations but excludes the 8xx and 4xx.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ */
+
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/export.h>
+
+#include <asm/mmu_context.h>
+
+/*
+ * Room for two PTE pointers, usually the kernel and current user pointers
+ * to their respective root page table.
+ */
+void *abatron_pteptrs[2];
+
+/*
+ * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs
+ * (virtual segment identifiers) for each context.  Although the
+ * hardware supports 24-bit VSIDs, and thus >1 million contexts,
+ * we only use 32,768 of them.  That is ample, since there can be
+ * at most around 30,000 tasks in the system anyway, and it means
+ * that we can use a bitmap to indicate which contexts are in use.
+ * Using a bitmap means that we entirely avoid all of the problems
+ * that we used to have when the context number overflowed,
+ * particularly on SMP systems.
+ *  -- paulus.
+ */
+#define NO_CONTEXT      	((unsigned long) -1)
+#define LAST_CONTEXT    	32767
+#define FIRST_CONTEXT    	1
+
+static unsigned long next_mmu_context;
+static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
+
+unsigned long __init_new_context(void)
+{
+	unsigned long ctx = next_mmu_context;
+
+	while (test_and_set_bit(ctx, context_map)) {
+		ctx = find_next_zero_bit(context_map, LAST_CONTEXT+1, ctx);
+		if (ctx > LAST_CONTEXT)
+			ctx = 0;
+	}
+	next_mmu_context = (ctx + 1) & LAST_CONTEXT;
+
+	return ctx;
+}
+EXPORT_SYMBOL_GPL(__init_new_context);
+
+/*
+ * Set up the context for a new address space.
+ */
+int init_new_context(struct task_struct *t, struct mm_struct *mm)
+{
+	mm->context.id = __init_new_context();
+	mm->context.sr0 = CTX_TO_VSID(mm->context.id, 0);
+
+	if (IS_ENABLED(CONFIG_PPC_KUEP))
+		mm->context.sr0 |= SR_NX;
+	if (!kuap_is_disabled())
+		mm->context.sr0 |= SR_KS;
+
+	return 0;
+}
+
+/*
+ * Free a context ID. Make sure to call this with preempt disabled!
+ */
+void __destroy_context(unsigned long ctx)
+{
+	clear_bit(ctx, context_map);
+}
+EXPORT_SYMBOL_GPL(__destroy_context);
+
+/*
+ * We're finished using the context for an address space.
+ */
+void destroy_context(struct mm_struct *mm)
+{
+	preempt_disable();
+	if (mm->context.id != NO_CONTEXT) {
+		__destroy_context(mm->context.id);
+		mm->context.id = NO_CONTEXT;
+	}
+	preempt_enable();
+}
+
+/*
+ * Initialize the context management stuff.
+ */
+void __init mmu_context_init(void)
+{
+	/* Reserve context 0 for kernel use */
+	context_map[0] = (1 << FIRST_CONTEXT) - 1;
+	next_mmu_context = FIRST_CONTEXT;
+}
+
+void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
+{
+	long id = next->context.id;
+
+	if (id < 0)
+		panic("mm_struct %p has no context ID", next);
+
+	isync();
+
+	update_user_segments(next->context.sr0);
+
+	if (IS_ENABLED(CONFIG_BDI_SWITCH))
+		abatron_pteptrs[1] = next->pgd;
+
+	if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+		mtspr(SPRN_SDR1, rol32(__pa(next->pgd), 4) & 0xffff01ff);
+
+	mb();	/* sync */
+	isync();
+}
+EXPORT_SYMBOL(switch_mmu_context);
diff --git a/arch/powerpc/mm/book3s32/nohash_low.S b/arch/powerpc/mm/book3s32/nohash_low.S
new file mode 100644
index 0000000000..19f418b0ed
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/nohash_low.S
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  This file contains low-level assembler routines for managing
+ *  the PowerPC 603 tlb invalidation.
+ */
+
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+/*
+ * Flush an entry from the TLB
+ */
+#ifdef CONFIG_SMP
+_GLOBAL(_tlbie)
+	lwz	r8,TASK_CPU(r2)
+	oris	r8,r8,11
+	mfmsr	r10
+	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
+	rlwinm	r0,r0,0,28,26		/* clear DR */
+	mtmsr	r0
+	isync
+	lis	r9,mmu_hash_lock@h
+	ori	r9,r9,mmu_hash_lock@l
+	tophys(r9,r9)
+10:	lwarx	r7,0,r9
+	cmpwi	0,r7,0
+	bne-	10b
+	stwcx.	r8,0,r9
+	bne-	10b
+	eieio
+	tlbie	r3
+	sync
+	TLBSYNC
+	li	r0,0
+	stw	r0,0(r9)		/* clear mmu_hash_lock */
+	mtmsr	r10
+	isync
+	blr
+_ASM_NOKPROBE_SYMBOL(_tlbie)
+#endif /* CONFIG_SMP */
+
+/*
+ * Flush the entire TLB. 603/603e only
+ */
+_GLOBAL(_tlbia)
+#if defined(CONFIG_SMP)
+	lwz	r8,TASK_CPU(r2)
+	oris	r8,r8,10
+	mfmsr	r10
+	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
+	rlwinm	r0,r0,0,28,26		/* clear DR */
+	mtmsr	r0
+	isync
+	lis	r9,mmu_hash_lock@h
+	ori	r9,r9,mmu_hash_lock@l
+	tophys(r9,r9)
+10:	lwarx	r7,0,r9
+	cmpwi	0,r7,0
+	bne-	10b
+	stwcx.	r8,0,r9
+	bne-	10b
+#endif /* CONFIG_SMP */
+	li	r5, 32
+	lis	r4, KERNELBASE@h
+	mtctr	r5
+	sync
+0:	tlbie	r4
+	addi	r4, r4, 0x1000
+	bdnz	0b
+	sync
+#ifdef CONFIG_SMP
+	TLBSYNC
+	li	r0,0
+	stw	r0,0(r9)		/* clear mmu_hash_lock */
+	mtmsr	r10
+	isync
+#endif /* CONFIG_SMP */
+	blr
+_ASM_NOKPROBE_SYMBOL(_tlbia)
diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c
new file mode 100644
index 0000000000..9ad6b56bfe
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/tlb.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains the routines for TLB flushing.
+ * On machines where the MMU uses a hash table to store virtual to
+ * physical translations, these routines flush entries from the
+ * hash table also.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/export.h>
+
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+#include <mm/mmu_decl.h>
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes kernel pages
+ *
+ * since the hardware hash table functions as an extension of the
+ * tlb as far as the linux tables are concerned, flush it too.
+ *    -- Cort
+ */
+
+/*
+ * For each address in the range, find the pte for the address
+ * and check _PAGE_HASHPTE bit; if it is set, find and destroy
+ * the corresponding HPTE.
+ */
+void hash__flush_range(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+	pmd_t *pmd;
+	unsigned long pmd_end;
+	int count;
+	unsigned int ctx = mm->context.id;
+
+	start &= PAGE_MASK;
+	if (start >= end)
+		return;
+	end = (end - 1) | ~PAGE_MASK;
+	pmd = pmd_off(mm, start);
+	for (;;) {
+		pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1;
+		if (pmd_end > end)
+			pmd_end = end;
+		if (!pmd_none(*pmd)) {
+			count = ((pmd_end - start) >> PAGE_SHIFT) + 1;
+			flush_hash_pages(ctx, start, pmd_val(*pmd), count);
+		}
+		if (pmd_end == end)
+			break;
+		start = pmd_end + 1;
+		++pmd;
+	}
+}
+EXPORT_SYMBOL(hash__flush_range);
+
+/*
+ * Flush all the (user) entries for the address space described by mm.
+ */
+void hash__flush_tlb_mm(struct mm_struct *mm)
+{
+	struct vm_area_struct *mp;
+	VMA_ITERATOR(vmi, mm, 0);
+
+	/*
+	 * It is safe to iterate the vmas when called from dup_mmap,
+	 * holding mmap_lock.  It would also be safe from unmap_region
+	 * or exit_mmap, but not from vmtruncate on SMP - but it seems
+	 * dup_mmap is the only SMP case which gets here.
+	 */
+	for_each_vma(vmi, mp)
+		hash__flush_range(mp->vm_mm, mp->vm_start, mp->vm_end);
+}
+EXPORT_SYMBOL(hash__flush_tlb_mm);
+
+void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+	struct mm_struct *mm;
+	pmd_t *pmd;
+
+	mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm;
+	pmd = pmd_off(mm, vmaddr);
+	if (!pmd_none(*pmd))
+		flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1);
+}
+EXPORT_SYMBOL(hash__flush_tlb_page);
diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile
new file mode 100644
index 0000000000..cad2abc173
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: GPL-2.0
+
+ccflags-y	:= $(NO_MINIMAL_TOC)
+
+obj-y				+= mmu_context.o pgtable.o trace.o
+ifdef CONFIG_PPC_64S_HASH_MMU
+CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
+obj-y				+= hash_pgtable.o hash_utils.o hash_tlb.o slb.o slice.o
+obj-$(CONFIG_PPC_HASH_MMU_NATIVE)	+= hash_native.o
+obj-$(CONFIG_PPC_4K_PAGES)	+= hash_4k.o
+obj-$(CONFIG_PPC_64K_PAGES)	+= hash_64k.o
+obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o
+obj-$(CONFIG_PPC_SUBPAGE_PROT)	+= subpage_prot.o
+endif
+
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+
+obj-$(CONFIG_PPC_RADIX_MMU)	+= radix_pgtable.o radix_tlb.o
+ifdef CONFIG_HUGETLB_PAGE
+obj-$(CONFIG_PPC_RADIX_MMU)	+= radix_hugetlbpage.o
+endif
+obj-$(CONFIG_SPAPR_TCE_IOMMU)	+= iommu_api.o
+obj-$(CONFIG_PPC_PKEY)	+= pkeys.o
+
+# Instrumenting the SLB fault path can lead to duplicate SLB entries
+KCOV_INSTRUMENT_slb.o := n
+
+# Parts of these can run in real mode and therefore are
+# not safe with the current outline KASAN implementation
+KASAN_SANITIZE_mmu_context.o := n
+KASAN_SANITIZE_pgtable.o := n
+KASAN_SANITIZE_radix_pgtable.o := n
+KASAN_SANITIZE_radix_tlb.o := n
+KASAN_SANITIZE_slb.o := n
+KASAN_SANITIZE_pkeys.o := n
diff --git a/arch/powerpc/mm/book3s64/hash_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c
new file mode 100644
index 0000000000..02acbfd05b
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/hash_4k.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright IBM Corporation, 2015
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/mm.h>
+#include <asm/machdep.h>
+#include <asm/mmu.h>
+
+#include "internal.h"
+
+int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
+		   pte_t *ptep, unsigned long trap, unsigned long flags,
+		   int ssize, int subpg_prot)
+{
+	real_pte_t rpte;
+	unsigned long hpte_group;
+	unsigned long rflags, pa;
+	unsigned long old_pte, new_pte;
+	unsigned long vpn, hash, slot;
+	unsigned long shift = mmu_psize_defs[MMU_PAGE_4K].shift;
+
+	/*
+	 * atomically mark the linux large page PTE busy and dirty
+	 */
+	do {
+		pte_t pte = READ_ONCE(*ptep);
+
+		old_pte = pte_val(pte);
+		/* If PTE busy, retry the access */
+		if (unlikely(old_pte & H_PAGE_BUSY))
+			return 0;
+		/* If PTE permissions don't match, take page fault */
+		if (unlikely(!check_pte_access(access, old_pte)))
+			return 1;
+		/*
+		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
+		 * a write access. Since this is 4K insert of 64K page size
+		 * also add H_PAGE_COMBO
+		 */
+		new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
+		if (access & _PAGE_WRITE)
+			new_pte |= _PAGE_DIRTY;
+	} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
+
+	/*
+	 * PP bits. _PAGE_USER is already PP bit 0x2, so we only
+	 * need to add in 0x1 if it's a read-only user page
+	 */
+	rflags = htab_convert_pte_flags(new_pte, flags);
+	rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
+
+	if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
+	    !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
+
+	vpn  = hpt_vpn(ea, vsid, ssize);
+	if (unlikely(old_pte & H_PAGE_HASHPTE)) {
+		/*
+		 * There MIGHT be an HPTE for this pte
+		 */
+		unsigned long gslot = pte_get_hash_gslot(vpn, shift, ssize,
+							 rpte, 0);
+
+		if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_4K,
+					       MMU_PAGE_4K, ssize, flags) == -1)
+			old_pte &= ~_PAGE_HPTEFLAGS;
+	}
+
+	if (likely(!(old_pte & H_PAGE_HASHPTE))) {
+
+		pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
+		hash = hpt_hash(vpn, shift, ssize);
+
+repeat:
+		hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+
+		/* Insert into the hash table, primary slot */
+		slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+						MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+		/*
+		 * Primary is full, try the secondary
+		 */
+		if (unlikely(slot == -1)) {
+			hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+			slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+							rflags,
+							HPTE_V_SECONDARY,
+							MMU_PAGE_4K,
+							MMU_PAGE_4K, ssize);
+			if (slot == -1) {
+				if (mftb() & 0x1)
+					hpte_group = (hash & htab_hash_mask) *
+							HPTES_PER_GROUP;
+				mmu_hash_ops.hpte_remove(hpte_group);
+				/*
+				 * FIXME!! Should be try the group from which we removed ?
+				 */
+				goto repeat;
+			}
+		}
+		/*
+		 * Hypervisor failure. Restore old pte and return -1
+		 * similar to __hash_page_*
+		 */
+		if (unlikely(slot == -2)) {
+			*ptep = __pte(old_pte);
+			hash_failure_debug(ea, access, vsid, trap, ssize,
+					   MMU_PAGE_4K, MMU_PAGE_4K, old_pte);
+			return -1;
+		}
+		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
+		new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
+
+		if (stress_hpt())
+			hpt_do_stress(ea, hpte_group);
+	}
+	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
+	return 0;
+}
diff --git a/arch/powerpc/mm/book3s64/hash_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c
new file mode 100644
index 0000000000..954af420f3
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/hash_64k.c
@@ -0,0 +1,343 @@
+/*
+ * Copyright IBM Corporation, 2015
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/mm.h>
+#include <asm/machdep.h>
+#include <asm/mmu.h>
+
+#include "internal.h"
+
+/*
+ * Return true, if the entry has a slot value which
+ * the software considers as invalid.
+ */
+static inline bool hpte_soft_invalid(unsigned long hidx)
+{
+	return ((hidx & 0xfUL) == 0xfUL);
+}
+
+/*
+ * index from 0 - 15
+ */
+bool __rpte_sub_valid(real_pte_t rpte, unsigned long index)
+{
+	return !(hpte_soft_invalid(__rpte_to_hidx(rpte, index)));
+}
+
+int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
+		   pte_t *ptep, unsigned long trap, unsigned long flags,
+		   int ssize, int subpg_prot)
+{
+	real_pte_t rpte;
+	unsigned long hpte_group;
+	unsigned int subpg_index;
+	unsigned long rflags, pa;
+	unsigned long old_pte, new_pte, subpg_pte;
+	unsigned long vpn, hash, slot, gslot;
+	unsigned long shift = mmu_psize_defs[MMU_PAGE_4K].shift;
+
+	/*
+	 * atomically mark the linux large page PTE busy and dirty
+	 */
+	do {
+		pte_t pte = READ_ONCE(*ptep);
+
+		old_pte = pte_val(pte);
+		/* If PTE busy, retry the access */
+		if (unlikely(old_pte & H_PAGE_BUSY))
+			return 0;
+		/* If PTE permissions don't match, take page fault */
+		if (unlikely(!check_pte_access(access, old_pte)))
+			return 1;
+		/*
+		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
+		 * a write access. Since this is 4K insert of 64K page size
+		 * also add H_PAGE_COMBO
+		 */
+		new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED | H_PAGE_COMBO;
+		if (access & _PAGE_WRITE)
+			new_pte |= _PAGE_DIRTY;
+	} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
+
+	/*
+	 * Handle the subpage protection bits
+	 */
+	subpg_pte = new_pte & ~subpg_prot;
+	rflags = htab_convert_pte_flags(subpg_pte, flags);
+
+	if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
+	    !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
+
+		/*
+		 * No CPU has hugepages but lacks no execute, so we
+		 * don't need to worry about that case
+		 */
+		rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
+	}
+
+	subpg_index = (ea & (PAGE_SIZE - 1)) >> shift;
+	vpn  = hpt_vpn(ea, vsid, ssize);
+	rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
+	/*
+	 *None of the sub 4k page is hashed
+	 */
+	if (!(old_pte & H_PAGE_HASHPTE))
+		goto htab_insert_hpte;
+	/*
+	 * Check if the pte was already inserted into the hash table
+	 * as a 64k HW page, and invalidate the 64k HPTE if so.
+	 */
+	if (!(old_pte & H_PAGE_COMBO)) {
+		flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags);
+		/*
+		 * clear the old slot details from the old and new pte.
+		 * On hash insert failure we use old pte value and we don't
+		 * want slot information there if we have a insert failure.
+		 */
+		old_pte &= ~H_PAGE_HASHPTE;
+		new_pte &= ~H_PAGE_HASHPTE;
+		goto htab_insert_hpte;
+	}
+	/*
+	 * Check for sub page valid and update
+	 */
+	if (__rpte_sub_valid(rpte, subpg_index)) {
+		int ret;
+
+		gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte,
+					   subpg_index);
+		ret = mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn,
+						 MMU_PAGE_4K, MMU_PAGE_4K,
+						 ssize, flags);
+
+		/*
+		 * If we failed because typically the HPTE wasn't really here
+		 * we try an insertion.
+		 */
+		if (ret == -1)
+			goto htab_insert_hpte;
+
+		*ptep = __pte(new_pte & ~H_PAGE_BUSY);
+		return 0;
+	}
+
+htab_insert_hpte:
+
+	/*
+	 * Initialize all hidx entries to invalid value, the first time
+	 * the PTE is about to allocate a 4K HPTE.
+	 */
+	if (!(old_pte & H_PAGE_COMBO))
+		rpte.hidx = INVALID_RPTE_HIDX;
+
+	/*
+	 * handle H_PAGE_4K_PFN case
+	 */
+	if (old_pte & H_PAGE_4K_PFN) {
+		/*
+		 * All the sub 4k page have the same
+		 * physical address.
+		 */
+		pa = pte_pfn(__pte(old_pte)) << HW_PAGE_SHIFT;
+	} else {
+		pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
+		pa += (subpg_index << shift);
+	}
+	hash = hpt_hash(vpn, shift, ssize);
+repeat:
+	hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+
+	/* Insert into the hash table, primary slot */
+	slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+					MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+	/*
+	 * Primary is full, try the secondary
+	 */
+	if (unlikely(slot == -1)) {
+		bool soft_invalid;
+
+		hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+						rflags, HPTE_V_SECONDARY,
+						MMU_PAGE_4K, MMU_PAGE_4K,
+						ssize);
+
+		soft_invalid = hpte_soft_invalid(slot);
+		if (unlikely(soft_invalid)) {
+			/*
+			 * We got a valid slot from a hardware point of view.
+			 * but we cannot use it, because we use this special
+			 * value; as defined by hpte_soft_invalid(), to track
+			 * invalid slots. We cannot use it. So invalidate it.
+			 */
+			gslot = slot & _PTEIDX_GROUP_IX;
+			mmu_hash_ops.hpte_invalidate(hpte_group + gslot, vpn,
+						     MMU_PAGE_4K, MMU_PAGE_4K,
+						     ssize, 0);
+		}
+
+		if (unlikely(slot == -1 || soft_invalid)) {
+			/*
+			 * For soft invalid slot, let's ensure that we release a
+			 * slot from the primary, with the hope that we will
+			 * acquire that slot next time we try. This will ensure
+			 * that we do not get the same soft-invalid slot.
+			 */
+			if (soft_invalid || (mftb() & 0x1))
+				hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+
+			mmu_hash_ops.hpte_remove(hpte_group);
+			/*
+			 * FIXME!! Should be try the group from which we removed ?
+			 */
+			goto repeat;
+		}
+	}
+	/*
+	 * Hypervisor failure. Restore old pte and return -1
+	 * similar to __hash_page_*
+	 */
+	if (unlikely(slot == -2)) {
+		*ptep = __pte(old_pte);
+		hash_failure_debug(ea, access, vsid, trap, ssize,
+				   MMU_PAGE_4K, MMU_PAGE_4K, old_pte);
+		return -1;
+	}
+
+	new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
+	new_pte |= H_PAGE_HASHPTE;
+
+	if (stress_hpt())
+		hpt_do_stress(ea, hpte_group);
+
+	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
+	return 0;
+}
+
+int __hash_page_64K(unsigned long ea, unsigned long access,
+		    unsigned long vsid, pte_t *ptep, unsigned long trap,
+		    unsigned long flags, int ssize)
+{
+	real_pte_t rpte;
+	unsigned long hpte_group;
+	unsigned long rflags, pa;
+	unsigned long old_pte, new_pte;
+	unsigned long vpn, hash, slot;
+	unsigned long shift = mmu_psize_defs[MMU_PAGE_64K].shift;
+
+	/*
+	 * atomically mark the linux large page PTE busy and dirty
+	 */
+	do {
+		pte_t pte = READ_ONCE(*ptep);
+
+		old_pte = pte_val(pte);
+		/* If PTE busy, retry the access */
+		if (unlikely(old_pte & H_PAGE_BUSY))
+			return 0;
+		/* If PTE permissions don't match, take page fault */
+		if (unlikely(!check_pte_access(access, old_pte)))
+			return 1;
+		/*
+		 * Check if PTE has the cache-inhibit bit set
+		 * If so, bail out and refault as a 4k page
+		 */
+		if (!mmu_has_feature(MMU_FTR_CI_LARGE_PAGE) &&
+		    unlikely(pte_ci(pte)))
+			return 0;
+		/*
+		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
+		 * a write access.
+		 */
+		new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
+		if (access & _PAGE_WRITE)
+			new_pte |= _PAGE_DIRTY;
+	} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
+
+	rflags = htab_convert_pte_flags(new_pte, flags);
+	rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
+
+	if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
+	    !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
+
+	vpn  = hpt_vpn(ea, vsid, ssize);
+	if (unlikely(old_pte & H_PAGE_HASHPTE)) {
+		unsigned long gslot;
+
+		/*
+		 * There MIGHT be an HPTE for this pte
+		 */
+		gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0);
+		if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_64K,
+					       MMU_PAGE_64K, ssize,
+					       flags) == -1)
+			old_pte &= ~_PAGE_HPTEFLAGS;
+	}
+
+	if (likely(!(old_pte & H_PAGE_HASHPTE))) {
+
+		pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
+		hash = hpt_hash(vpn, shift, ssize);
+
+repeat:
+		hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+
+		/* Insert into the hash table, primary slot */
+		slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+						MMU_PAGE_64K, MMU_PAGE_64K,
+						ssize);
+		/*
+		 * Primary is full, try the secondary
+		 */
+		if (unlikely(slot == -1)) {
+			hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+			slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+							rflags,
+							HPTE_V_SECONDARY,
+							MMU_PAGE_64K,
+							MMU_PAGE_64K, ssize);
+			if (slot == -1) {
+				if (mftb() & 0x1)
+					hpte_group = (hash & htab_hash_mask) *
+							HPTES_PER_GROUP;
+				mmu_hash_ops.hpte_remove(hpte_group);
+				/*
+				 * FIXME!! Should be try the group from which we removed ?
+				 */
+				goto repeat;
+			}
+		}
+		/*
+		 * Hypervisor failure. Restore old pte and return -1
+		 * similar to __hash_page_*
+		 */
+		if (unlikely(slot == -2)) {
+			*ptep = __pte(old_pte);
+			hash_failure_debug(ea, access, vsid, trap, ssize,
+					   MMU_PAGE_64K, MMU_PAGE_64K, old_pte);
+			return -1;
+		}
+
+		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
+		new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
+
+		if (stress_hpt())
+			hpt_do_stress(ea, hpte_group);
+	}
+
+	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
+
+	return 0;
+}
diff --git a/arch/powerpc/mm/book3s64/hash_hugepage.c b/arch/powerpc/mm/book3s64/hash_hugepage.c
new file mode 100644
index 0000000000..c0fabe6c5a
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/hash_hugepage.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+/*
+ * PPC64 THP Support for hash based MMUs
+ */
+#include <linux/mm.h>
+#include <asm/machdep.h>
+
+int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
+		    pmd_t *pmdp, unsigned long trap, unsigned long flags,
+		    int ssize, unsigned int psize)
+{
+	unsigned int index, valid;
+	unsigned char *hpte_slot_array;
+	unsigned long rflags, pa, hidx;
+	unsigned long old_pmd, new_pmd;
+	int ret, lpsize = MMU_PAGE_16M;
+	unsigned long vpn, hash, shift, slot;
+
+	/*
+	 * atomically mark the linux large page PMD busy and dirty
+	 */
+	do {
+		pmd_t pmd = READ_ONCE(*pmdp);
+
+		old_pmd = pmd_val(pmd);
+		/* If PMD busy, retry the access */
+		if (unlikely(old_pmd & H_PAGE_BUSY))
+			return 0;
+		/* If PMD permissions don't match, take page fault */
+		if (unlikely(!check_pte_access(access, old_pmd)))
+			return 1;
+		/*
+		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
+		 * a write access
+		 */
+		new_pmd = old_pmd | H_PAGE_BUSY | _PAGE_ACCESSED;
+		if (access & _PAGE_WRITE)
+			new_pmd |= _PAGE_DIRTY;
+	} while (!pmd_xchg(pmdp, __pmd(old_pmd), __pmd(new_pmd)));
+
+	/*
+	 * Make sure this is thp or devmap entry
+	 */
+	if (!(old_pmd & (H_PAGE_THP_HUGE | _PAGE_DEVMAP)))
+		return 0;
+
+	rflags = htab_convert_pte_flags(new_pmd, flags);
+
+#if 0
+	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
+
+		/*
+		 * No CPU has hugepages but lacks no execute, so we
+		 * don't need to worry about that case
+		 */
+		rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
+	}
+#endif
+	/*
+	 * Find the slot index details for this ea, using base page size.
+	 */
+	shift = mmu_psize_defs[psize].shift;
+	index = (ea & ~HPAGE_PMD_MASK) >> shift;
+	BUG_ON(index >= PTE_FRAG_SIZE);
+
+	vpn = hpt_vpn(ea, vsid, ssize);
+	hpte_slot_array = get_hpte_slot_array(pmdp);
+	if (psize == MMU_PAGE_4K) {
+		/*
+		 * invalidate the old hpte entry if we have that mapped via 64K
+		 * base page size. This is because demote_segment won't flush
+		 * hash page table entries.
+		 */
+		if ((old_pmd & H_PAGE_HASHPTE) && !(old_pmd & H_PAGE_COMBO)) {
+			flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K,
+					    ssize, flags);
+			/*
+			 * With THP, we also clear the slot information with
+			 * respect to all the 64K hash pte mapping the 16MB
+			 * page. They are all invalid now. This make sure we
+			 * don't find the slot valid when we fault with 4k
+			 * base page size.
+			 *
+			 */
+			memset(hpte_slot_array, 0, PTE_FRAG_SIZE);
+		}
+	}
+
+	valid = hpte_valid(hpte_slot_array, index);
+	if (valid) {
+		/* update the hpte bits */
+		hash = hpt_hash(vpn, shift, ssize);
+		hidx =  hpte_hash_index(hpte_slot_array, index);
+		if (hidx & _PTEIDX_SECONDARY)
+			hash = ~hash;
+		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot += hidx & _PTEIDX_GROUP_IX;
+
+		ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn,
+						 psize, lpsize, ssize, flags);
+		/*
+		 * We failed to update, try to insert a new entry.
+		 */
+		if (ret == -1) {
+			/*
+			 * large pte is marked busy, so we can be sure
+			 * nobody is looking at hpte_slot_array. hence we can
+			 * safely update this here.
+			 */
+			valid = 0;
+			hpte_slot_array[index] = 0;
+		}
+	}
+
+	if (!valid) {
+		unsigned long hpte_group;
+
+		hash = hpt_hash(vpn, shift, ssize);
+		/* insert new entry */
+		pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT;
+		new_pmd |= H_PAGE_HASHPTE;
+
+repeat:
+		hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+
+		/* Insert into the hash table, primary slot */
+		slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+						psize, lpsize, ssize);
+		/*
+		 * Primary is full, try the secondary
+		 */
+		if (unlikely(slot == -1)) {
+			hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+			slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+							rflags,
+							HPTE_V_SECONDARY,
+							psize, lpsize, ssize);
+			if (slot == -1) {
+				if (mftb() & 0x1)
+					hpte_group = (hash & htab_hash_mask) *
+							HPTES_PER_GROUP;
+
+				mmu_hash_ops.hpte_remove(hpte_group);
+				goto repeat;
+			}
+		}
+		/*
+		 * Hypervisor failure. Restore old pmd and return -1
+		 * similar to __hash_page_*
+		 */
+		if (unlikely(slot == -2)) {
+			*pmdp = __pmd(old_pmd);
+			hash_failure_debug(ea, access, vsid, trap, ssize,
+					   psize, lpsize, old_pmd);
+			return -1;
+		}
+		/*
+		 * large pte is marked busy, so we can be sure
+		 * nobody is looking at hpte_slot_array. hence we can
+		 * safely update this here.
+		 */
+		mark_hpte_slot_valid(hpte_slot_array, index, slot);
+	}
+	/*
+	 * Mark the pte with H_PAGE_COMBO, if we are trying to hash it with
+	 * base page size 4k.
+	 */
+	if (psize == MMU_PAGE_4K)
+		new_pmd |= H_PAGE_COMBO;
+	/*
+	 * The hpte valid is stored in the pgtable whose address is in the
+	 * second half of the PMD. Order this against clearing of the busy bit in
+	 * huge pmd.
+	 */
+	smp_wmb();
+	*pmdp = __pmd(new_pmd & ~H_PAGE_BUSY);
+	return 0;
+}
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
new file mode 100644
index 0000000000..430d1d935a
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -0,0 +1,882 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * native hashtable management.
+ *
+ * SMP scalability work:
+ *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ */
+
+#undef DEBUG_LOW
+
+#include <linux/spinlock.h>
+#include <linux/bitops.h>
+#include <linux/of.h>
+#include <linux/processor.h>
+#include <linux/threads.h>
+#include <linux/smp.h>
+#include <linux/pgtable.h>
+
+#include <asm/machdep.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/trace.h>
+#include <asm/tlb.h>
+#include <asm/cputable.h>
+#include <asm/udbg.h>
+#include <asm/kexec.h>
+#include <asm/ppc-opcode.h>
+#include <asm/feature-fixups.h>
+
+#include <misc/cxl-base.h>
+
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) udbg_printf(fmt)
+#else
+#define DBG_LOW(fmt...)
+#endif
+
+#ifdef __BIG_ENDIAN__
+#define HPTE_LOCK_BIT 3
+#else
+#define HPTE_LOCK_BIT (56+3)
+#endif
+
+static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
+
+#ifdef CONFIG_LOCKDEP
+static struct lockdep_map hpte_lock_map =
+	STATIC_LOCKDEP_MAP_INIT("hpte_lock", &hpte_lock_map);
+
+static void acquire_hpte_lock(void)
+{
+	lock_map_acquire(&hpte_lock_map);
+}
+
+static void release_hpte_lock(void)
+{
+	lock_map_release(&hpte_lock_map);
+}
+#else
+static void acquire_hpte_lock(void)
+{
+}
+
+static void release_hpte_lock(void)
+{
+}
+#endif
+
+static inline unsigned long  ___tlbie(unsigned long vpn, int psize,
+						int apsize, int ssize)
+{
+	unsigned long va;
+	unsigned int penc;
+	unsigned long sllp;
+
+	/*
+	 * We need 14 to 65 bits of va for a tlibe of 4K page
+	 * With vpn we ignore the lower VPN_SHIFT bits already.
+	 * And top two bits are already ignored because we can
+	 * only accomodate 76 bits in a 64 bit vpn with a VPN_SHIFT
+	 * of 12.
+	 */
+	va = vpn << VPN_SHIFT;
+	/*
+	 * clear top 16 bits of 64bit va, non SLS segment
+	 * Older versions of the architecture (2.02 and earler) require the
+	 * masking of the top 16 bits.
+	 */
+	if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA))
+		va &= ~(0xffffULL << 48);
+
+	switch (psize) {
+	case MMU_PAGE_4K:
+		/* clear out bits after (52) [0....52.....63] */
+		va &= ~((1ul << (64 - 52)) - 1);
+		va |= ssize << 8;
+		sllp = get_sllp_encoding(apsize);
+		va |= sllp << 5;
+		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
+			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
+			     : "memory");
+		break;
+	default:
+		/* We need 14 to 14 + i bits of va */
+		penc = mmu_psize_defs[psize].penc[apsize];
+		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
+		va |= penc << 12;
+		va |= ssize << 8;
+		/*
+		 * AVAL bits:
+		 * We don't need all the bits, but rest of the bits
+		 * must be ignored by the processor.
+		 * vpn cover upto 65 bits of va. (0...65) and we need
+		 * 58..64 bits of va.
+		 */
+		va |= (vpn & 0xfe); /* AVAL */
+		va |= 1; /* L */
+		asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
+			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
+			     : "memory");
+		break;
+	}
+	return va;
+}
+
+static inline void fixup_tlbie_vpn(unsigned long vpn, int psize,
+				   int apsize, int ssize)
+{
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+		/* Radix flush for a hash guest */
+
+		unsigned long rb,rs,prs,r,ric;
+
+		rb = PPC_BIT(52); /* IS = 2 */
+		rs = 0;  /* lpid = 0 */
+		prs = 0; /* partition scoped */
+		r = 1;   /* radix format */
+		ric = 0; /* RIC_FLSUH_TLB */
+
+		/*
+		 * Need the extra ptesync to make sure we don't
+		 * re-order the tlbie
+		 */
+		asm volatile("ptesync": : :"memory");
+		asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+			     : : "r"(rb), "i"(r), "i"(prs),
+			       "i"(ric), "r"(rs) : "memory");
+	}
+
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+		/* Need the extra ptesync to ensure we don't reorder tlbie*/
+		asm volatile("ptesync": : :"memory");
+		___tlbie(vpn, psize, apsize, ssize);
+	}
+}
+
+static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
+{
+	unsigned long rb;
+
+	rb = ___tlbie(vpn, psize, apsize, ssize);
+	trace_tlbie(0, 0, rb, 0, 0, 0, 0);
+}
+
+static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
+{
+	unsigned long va;
+	unsigned int penc;
+	unsigned long sllp;
+
+	/* VPN_SHIFT can be atmost 12 */
+	va = vpn << VPN_SHIFT;
+	/*
+	 * clear top 16 bits of 64 bit va, non SLS segment
+	 * Older versions of the architecture (2.02 and earler) require the
+	 * masking of the top 16 bits.
+	 */
+	if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA))
+		va &= ~(0xffffULL << 48);
+
+	switch (psize) {
+	case MMU_PAGE_4K:
+		/* clear out bits after(52) [0....52.....63] */
+		va &= ~((1ul << (64 - 52)) - 1);
+		va |= ssize << 8;
+		sllp = get_sllp_encoding(apsize);
+		va |= sllp << 5;
+		asm volatile(ASM_FTR_IFSET("tlbiel %0", PPC_TLBIEL_v205(%0, 0), %1)
+			     : : "r" (va), "i" (CPU_FTR_ARCH_206)
+			     : "memory");
+		break;
+	default:
+		/* We need 14 to 14 + i bits of va */
+		penc = mmu_psize_defs[psize].penc[apsize];
+		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
+		va |= penc << 12;
+		va |= ssize << 8;
+		/*
+		 * AVAL bits:
+		 * We don't need all the bits, but rest of the bits
+		 * must be ignored by the processor.
+		 * vpn cover upto 65 bits of va. (0...65) and we need
+		 * 58..64 bits of va.
+		 */
+		va |= (vpn & 0xfe);
+		va |= 1; /* L */
+		asm volatile(ASM_FTR_IFSET("tlbiel %0", PPC_TLBIEL_v205(%0, 1), %1)
+			     : : "r" (va), "i" (CPU_FTR_ARCH_206)
+			     : "memory");
+		break;
+	}
+	trace_tlbie(0, 1, va, 0, 0, 0, 0);
+
+}
+
+static inline void tlbie(unsigned long vpn, int psize, int apsize,
+			 int ssize, int local)
+{
+	unsigned int use_local;
+	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+	use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && !cxl_ctx_in_use();
+
+	if (use_local)
+		use_local = mmu_psize_defs[psize].tlbiel;
+	if (lock_tlbie && !use_local)
+		raw_spin_lock(&native_tlbie_lock);
+	asm volatile("ptesync": : :"memory");
+	if (use_local) {
+		__tlbiel(vpn, psize, apsize, ssize);
+		ppc_after_tlbiel_barrier();
+	} else {
+		__tlbie(vpn, psize, apsize, ssize);
+		fixup_tlbie_vpn(vpn, psize, apsize, ssize);
+		asm volatile("eieio; tlbsync; ptesync": : :"memory");
+	}
+	if (lock_tlbie && !use_local)
+		raw_spin_unlock(&native_tlbie_lock);
+}
+
+static inline void native_lock_hpte(struct hash_pte *hptep)
+{
+	unsigned long *word = (unsigned long *)&hptep->v;
+
+	acquire_hpte_lock();
+	while (1) {
+		if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
+			break;
+		spin_begin();
+		while(test_bit(HPTE_LOCK_BIT, word))
+			spin_cpu_relax();
+		spin_end();
+	}
+}
+
+static inline void native_unlock_hpte(struct hash_pte *hptep)
+{
+	unsigned long *word = (unsigned long *)&hptep->v;
+
+	release_hpte_lock();
+	clear_bit_unlock(HPTE_LOCK_BIT, word);
+}
+
+static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
+			unsigned long pa, unsigned long rflags,
+			unsigned long vflags, int psize, int apsize, int ssize)
+{
+	struct hash_pte *hptep = htab_address + hpte_group;
+	unsigned long hpte_v, hpte_r;
+	unsigned long flags;
+	int i;
+
+	local_irq_save(flags);
+
+	if (!(vflags & HPTE_V_BOLTED)) {
+		DBG_LOW("    insert(group=%lx, vpn=%016lx, pa=%016lx,"
+			" rflags=%lx, vflags=%lx, psize=%d)\n",
+			hpte_group, vpn, pa, rflags, vflags, psize);
+	}
+
+	for (i = 0; i < HPTES_PER_GROUP; i++) {
+		if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID)) {
+			/* retry with lock held */
+			native_lock_hpte(hptep);
+			if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID))
+				break;
+			native_unlock_hpte(hptep);
+		}
+
+		hptep++;
+	}
+
+	if (i == HPTES_PER_GROUP) {
+		local_irq_restore(flags);
+		return -1;
+	}
+
+	hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
+	hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
+
+	if (!(vflags & HPTE_V_BOLTED)) {
+		DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
+			i, hpte_v, hpte_r);
+	}
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		hpte_r = hpte_old_to_new_r(hpte_v, hpte_r);
+		hpte_v = hpte_old_to_new_v(hpte_v);
+	}
+
+	hptep->r = cpu_to_be64(hpte_r);
+	/* Guarantee the second dword is visible before the valid bit */
+	eieio();
+	/*
+	 * Now set the first dword including the valid bit
+	 * NOTE: this also unlocks the hpte
+	 */
+	release_hpte_lock();
+	hptep->v = cpu_to_be64(hpte_v);
+
+	__asm__ __volatile__ ("ptesync" : : : "memory");
+
+	local_irq_restore(flags);
+
+	return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
+}
+
+static long native_hpte_remove(unsigned long hpte_group)
+{
+	unsigned long hpte_v, flags;
+	struct hash_pte *hptep;
+	int i;
+	int slot_offset;
+
+	local_irq_save(flags);
+
+	DBG_LOW("    remove(group=%lx)\n", hpte_group);
+
+	/* pick a random entry to start at */
+	slot_offset = mftb() & 0x7;
+
+	for (i = 0; i < HPTES_PER_GROUP; i++) {
+		hptep = htab_address + hpte_group + slot_offset;
+		hpte_v = be64_to_cpu(hptep->v);
+
+		if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
+			/* retry with lock held */
+			native_lock_hpte(hptep);
+			hpte_v = be64_to_cpu(hptep->v);
+			if ((hpte_v & HPTE_V_VALID)
+			    && !(hpte_v & HPTE_V_BOLTED))
+				break;
+			native_unlock_hpte(hptep);
+		}
+
+		slot_offset++;
+		slot_offset &= 0x7;
+	}
+
+	if (i == HPTES_PER_GROUP) {
+		i = -1;
+		goto out;
+	}
+
+	/* Invalidate the hpte. NOTE: this also unlocks it */
+	release_hpte_lock();
+	hptep->v = 0;
+out:
+	local_irq_restore(flags);
+	return i;
+}
+
+static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
+				 unsigned long vpn, int bpsize,
+				 int apsize, int ssize, unsigned long flags)
+{
+	struct hash_pte *hptep = htab_address + slot;
+	unsigned long hpte_v, want_v;
+	int ret = 0, local = 0;
+	unsigned long irqflags;
+
+	local_irq_save(irqflags);
+
+	want_v = hpte_encode_avpn(vpn, bpsize, ssize);
+
+	DBG_LOW("    update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
+		vpn, want_v & HPTE_V_AVPN, slot, newpp);
+
+	hpte_v = hpte_get_old_v(hptep);
+	/*
+	 * We need to invalidate the TLB always because hpte_remove doesn't do
+	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
+	 * random entry from it. When we do that we don't invalidate the TLB
+	 * (hpte_remove) because we assume the old translation is still
+	 * technically "valid".
+	 */
+	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
+		DBG_LOW(" -> miss\n");
+		ret = -1;
+	} else {
+		native_lock_hpte(hptep);
+		/* recheck with locks held */
+		hpte_v = hpte_get_old_v(hptep);
+		if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) ||
+			     !(hpte_v & HPTE_V_VALID))) {
+			ret = -1;
+		} else {
+			DBG_LOW(" -> hit\n");
+			/* Update the HPTE */
+			hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
+						~(HPTE_R_PPP | HPTE_R_N)) |
+					       (newpp & (HPTE_R_PPP | HPTE_R_N |
+							 HPTE_R_C)));
+		}
+		native_unlock_hpte(hptep);
+	}
+
+	if (flags & HPTE_LOCAL_UPDATE)
+		local = 1;
+	/*
+	 * Ensure it is out of the tlb too if it is not a nohpte fault
+	 */
+	if (!(flags & HPTE_NOHPTE_UPDATE))
+		tlbie(vpn, bpsize, apsize, ssize, local);
+
+	local_irq_restore(irqflags);
+
+	return ret;
+}
+
+static long __native_hpte_find(unsigned long want_v, unsigned long slot)
+{
+	struct hash_pte *hptep;
+	unsigned long hpte_v;
+	unsigned long i;
+
+	for (i = 0; i < HPTES_PER_GROUP; i++) {
+
+		hptep = htab_address + slot;
+		hpte_v = hpte_get_old_v(hptep);
+		if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
+			/* HPTE matches */
+			return slot;
+		++slot;
+	}
+
+	return -1;
+}
+
+static long native_hpte_find(unsigned long vpn, int psize, int ssize)
+{
+	unsigned long hpte_group;
+	unsigned long want_v;
+	unsigned long hash;
+	long slot;
+
+	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+	/*
+	 * We try to keep bolted entries always in primary hash
+	 * But in some case we can find them in secondary too.
+	 */
+	hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+	slot = __native_hpte_find(want_v, hpte_group);
+	if (slot < 0) {
+		/* Try in secondary */
+		hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot = __native_hpte_find(want_v, hpte_group);
+		if (slot < 0)
+			return -1;
+	}
+
+	return slot;
+}
+
+/*
+ * Update the page protection bits. Intended to be used to create
+ * guard pages for kernel data structures on pages which are bolted
+ * in the HPT. Assumes pages being operated on will not be stolen.
+ *
+ * No need to lock here because we should be the only user.
+ */
+static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
+				       int psize, int ssize)
+{
+	unsigned long vpn;
+	unsigned long vsid;
+	long slot;
+	struct hash_pte *hptep;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	vsid = get_kernel_vsid(ea, ssize);
+	vpn = hpt_vpn(ea, vsid, ssize);
+
+	slot = native_hpte_find(vpn, psize, ssize);
+	if (slot == -1)
+		panic("could not find page to bolt\n");
+	hptep = htab_address + slot;
+
+	/* Update the HPTE */
+	hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
+				~(HPTE_R_PPP | HPTE_R_N)) |
+			       (newpp & (HPTE_R_PPP | HPTE_R_N)));
+	/*
+	 * Ensure it is out of the tlb too. Bolted entries base and
+	 * actual page size will be same.
+	 */
+	tlbie(vpn, psize, psize, ssize, 0);
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Remove a bolted kernel entry. Memory hotplug uses this.
+ *
+ * No need to lock here because we should be the only user.
+ */
+static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
+{
+	unsigned long vpn;
+	unsigned long vsid;
+	long slot;
+	struct hash_pte *hptep;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	vsid = get_kernel_vsid(ea, ssize);
+	vpn = hpt_vpn(ea, vsid, ssize);
+
+	slot = native_hpte_find(vpn, psize, ssize);
+	if (slot == -1)
+		return -ENOENT;
+
+	hptep = htab_address + slot;
+
+	VM_WARN_ON(!(be64_to_cpu(hptep->v) & HPTE_V_BOLTED));
+
+	/* Invalidate the hpte */
+	hptep->v = 0;
+
+	/* Invalidate the TLB */
+	tlbie(vpn, psize, psize, ssize, 0);
+
+	local_irq_restore(flags);
+
+	return 0;
+}
+
+
+static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
+				   int bpsize, int apsize, int ssize, int local)
+{
+	struct hash_pte *hptep = htab_address + slot;
+	unsigned long hpte_v;
+	unsigned long want_v;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	DBG_LOW("    invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
+
+	want_v = hpte_encode_avpn(vpn, bpsize, ssize);
+	hpte_v = hpte_get_old_v(hptep);
+
+	if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
+		native_lock_hpte(hptep);
+		/* recheck with locks held */
+		hpte_v = hpte_get_old_v(hptep);
+
+		if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
+			/* Invalidate the hpte. NOTE: this also unlocks it */
+			release_hpte_lock();
+			hptep->v = 0;
+		} else
+			native_unlock_hpte(hptep);
+	}
+	/*
+	 * We need to invalidate the TLB always because hpte_remove doesn't do
+	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
+	 * random entry from it. When we do that we don't invalidate the TLB
+	 * (hpte_remove) because we assume the old translation is still
+	 * technically "valid".
+	 */
+	tlbie(vpn, bpsize, apsize, ssize, local);
+
+	local_irq_restore(flags);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void native_hugepage_invalidate(unsigned long vsid,
+				       unsigned long addr,
+				       unsigned char *hpte_slot_array,
+				       int psize, int ssize, int local)
+{
+	int i;
+	struct hash_pte *hptep;
+	int actual_psize = MMU_PAGE_16M;
+	unsigned int max_hpte_count, valid;
+	unsigned long flags, s_addr = addr;
+	unsigned long hpte_v, want_v, shift;
+	unsigned long hidx, vpn = 0, hash, slot;
+
+	shift = mmu_psize_defs[psize].shift;
+	max_hpte_count = 1U << (PMD_SHIFT - shift);
+
+	local_irq_save(flags);
+	for (i = 0; i < max_hpte_count; i++) {
+		valid = hpte_valid(hpte_slot_array, i);
+		if (!valid)
+			continue;
+		hidx =  hpte_hash_index(hpte_slot_array, i);
+
+		/* get the vpn */
+		addr = s_addr + (i * (1ul << shift));
+		vpn = hpt_vpn(addr, vsid, ssize);
+		hash = hpt_hash(vpn, shift, ssize);
+		if (hidx & _PTEIDX_SECONDARY)
+			hash = ~hash;
+
+		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot += hidx & _PTEIDX_GROUP_IX;
+
+		hptep = htab_address + slot;
+		want_v = hpte_encode_avpn(vpn, psize, ssize);
+		hpte_v = hpte_get_old_v(hptep);
+
+		/* Even if we miss, we need to invalidate the TLB */
+		if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
+			/* recheck with locks held */
+			native_lock_hpte(hptep);
+			hpte_v = hpte_get_old_v(hptep);
+
+			if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
+				/* Invalidate the hpte. NOTE: this also unlocks it */
+				release_hpte_lock();
+				hptep->v = 0;
+			} else
+				native_unlock_hpte(hptep);
+		}
+		/*
+		 * We need to do tlb invalidate for all the address, tlbie
+		 * instruction compares entry_VA in tlb with the VA specified
+		 * here
+		 */
+		tlbie(vpn, psize, actual_psize, ssize, local);
+	}
+	local_irq_restore(flags);
+}
+#else
+static void native_hugepage_invalidate(unsigned long vsid,
+				       unsigned long addr,
+				       unsigned char *hpte_slot_array,
+				       int psize, int ssize, int local)
+{
+	WARN(1, "%s called without THP support\n", __func__);
+}
+#endif
+
+static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
+			int *psize, int *apsize, int *ssize, unsigned long *vpn)
+{
+	unsigned long avpn, pteg, vpi;
+	unsigned long hpte_v = be64_to_cpu(hpte->v);
+	unsigned long hpte_r = be64_to_cpu(hpte->r);
+	unsigned long vsid, seg_off;
+	int size, a_size, shift;
+	/* Look at the 8 bit LP value */
+	unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		hpte_v = hpte_new_to_old_v(hpte_v, hpte_r);
+		hpte_r = hpte_new_to_old_r(hpte_r);
+	}
+	if (!(hpte_v & HPTE_V_LARGE)) {
+		size   = MMU_PAGE_4K;
+		a_size = MMU_PAGE_4K;
+	} else {
+		size = hpte_page_sizes[lp] & 0xf;
+		a_size = hpte_page_sizes[lp] >> 4;
+	}
+	/* This works for all page sizes, and for 256M and 1T segments */
+	*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
+	shift = mmu_psize_defs[size].shift;
+
+	avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
+	pteg = slot / HPTES_PER_GROUP;
+	if (hpte_v & HPTE_V_SECONDARY)
+		pteg = ~pteg;
+
+	switch (*ssize) {
+	case MMU_SEGSIZE_256M:
+		/* We only have 28 - 23 bits of seg_off in avpn */
+		seg_off = (avpn & 0x1f) << 23;
+		vsid    =  avpn >> 5;
+		/* We can find more bits from the pteg value */
+		if (shift < 23) {
+			vpi = (vsid ^ pteg) & htab_hash_mask;
+			seg_off |= vpi << shift;
+		}
+		*vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
+		break;
+	case MMU_SEGSIZE_1T:
+		/* We only have 40 - 23 bits of seg_off in avpn */
+		seg_off = (avpn & 0x1ffff) << 23;
+		vsid    = avpn >> 17;
+		if (shift < 23) {
+			vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
+			seg_off |= vpi << shift;
+		}
+		*vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
+		break;
+	default:
+		*vpn = size = 0;
+	}
+	*psize  = size;
+	*apsize = a_size;
+}
+
+/*
+ * clear all mappings on kexec.  All cpus are in real mode (or they will
+ * be when they isi), and we are the only one left.  We rely on our kernel
+ * mapping being 0xC0's and the hardware ignoring those two real bits.
+ *
+ * This must be called with interrupts disabled.
+ *
+ * Taking the native_tlbie_lock is unsafe here due to the possibility of
+ * lockdep being on. On pre POWER5 hardware, not taking the lock could
+ * cause deadlock. POWER5 and newer not taking the lock is fine. This only
+ * gets called during boot before secondary CPUs have come up and during
+ * crashdump and all bets are off anyway.
+ *
+ * TODO: add batching support when enabled.  remember, no dynamic memory here,
+ * although there is the control page available...
+ */
+static notrace void native_hpte_clear(void)
+{
+	unsigned long vpn = 0;
+	unsigned long slot, slots;
+	struct hash_pte *hptep = htab_address;
+	unsigned long hpte_v;
+	unsigned long pteg_count;
+	int psize, apsize, ssize;
+
+	pteg_count = htab_hash_mask + 1;
+
+	slots = pteg_count * HPTES_PER_GROUP;
+
+	for (slot = 0; slot < slots; slot++, hptep++) {
+		/*
+		 * we could lock the pte here, but we are the only cpu
+		 * running,  right?  and for crash dump, we probably
+		 * don't want to wait for a maybe bad cpu.
+		 */
+		hpte_v = be64_to_cpu(hptep->v);
+
+		/*
+		 * Call __tlbie() here rather than tlbie() since we can't take the
+		 * native_tlbie_lock.
+		 */
+		if (hpte_v & HPTE_V_VALID) {
+			hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);
+			hptep->v = 0;
+			___tlbie(vpn, psize, apsize, ssize);
+		}
+	}
+
+	asm volatile("eieio; tlbsync; ptesync":::"memory");
+}
+
+/*
+ * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
+ * the lock all the time
+ */
+static void native_flush_hash_range(unsigned long number, int local)
+{
+	unsigned long vpn = 0;
+	unsigned long hash, index, hidx, shift, slot;
+	struct hash_pte *hptep;
+	unsigned long hpte_v;
+	unsigned long want_v;
+	unsigned long flags;
+	real_pte_t pte;
+	struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
+	unsigned long psize = batch->psize;
+	int ssize = batch->ssize;
+	int i;
+	unsigned int use_local;
+
+	use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) &&
+		mmu_psize_defs[psize].tlbiel && !cxl_ctx_in_use();
+
+	local_irq_save(flags);
+
+	for (i = 0; i < number; i++) {
+		vpn = batch->vpn[i];
+		pte = batch->pte[i];
+
+		pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+			hash = hpt_hash(vpn, shift, ssize);
+			hidx = __rpte_to_hidx(pte, index);
+			if (hidx & _PTEIDX_SECONDARY)
+				hash = ~hash;
+			slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+			slot += hidx & _PTEIDX_GROUP_IX;
+			hptep = htab_address + slot;
+			want_v = hpte_encode_avpn(vpn, psize, ssize);
+			hpte_v = hpte_get_old_v(hptep);
+
+			if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
+				continue;
+			/* lock and try again */
+			native_lock_hpte(hptep);
+			hpte_v = hpte_get_old_v(hptep);
+
+			if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
+				native_unlock_hpte(hptep);
+			else {
+				release_hpte_lock();
+				hptep->v = 0;
+			}
+
+		} pte_iterate_hashed_end();
+	}
+
+	if (use_local) {
+		asm volatile("ptesync":::"memory");
+		for (i = 0; i < number; i++) {
+			vpn = batch->vpn[i];
+			pte = batch->pte[i];
+
+			pte_iterate_hashed_subpages(pte, psize,
+						    vpn, index, shift) {
+				__tlbiel(vpn, psize, psize, ssize);
+			} pte_iterate_hashed_end();
+		}
+		ppc_after_tlbiel_barrier();
+	} else {
+		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+		if (lock_tlbie)
+			raw_spin_lock(&native_tlbie_lock);
+
+		asm volatile("ptesync":::"memory");
+		for (i = 0; i < number; i++) {
+			vpn = batch->vpn[i];
+			pte = batch->pte[i];
+
+			pte_iterate_hashed_subpages(pte, psize,
+						    vpn, index, shift) {
+				__tlbie(vpn, psize, psize, ssize);
+			} pte_iterate_hashed_end();
+		}
+		/*
+		 * Just do one more with the last used values.
+		 */
+		fixup_tlbie_vpn(vpn, psize, psize, ssize);
+		asm volatile("eieio; tlbsync; ptesync":::"memory");
+
+		if (lock_tlbie)
+			raw_spin_unlock(&native_tlbie_lock);
+	}
+
+	local_irq_restore(flags);
+}
+
+void __init hpte_init_native(void)
+{
+	mmu_hash_ops.hpte_invalidate	= native_hpte_invalidate;
+	mmu_hash_ops.hpte_updatepp	= native_hpte_updatepp;
+	mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp;
+	mmu_hash_ops.hpte_removebolted = native_hpte_removebolted;
+	mmu_hash_ops.hpte_insert	= native_hpte_insert;
+	mmu_hash_ops.hpte_remove	= native_hpte_remove;
+	mmu_hash_ops.hpte_clear_all	= native_hpte_clear;
+	mmu_hash_ops.flush_hash_range = native_flush_hash_range;
+	mmu_hash_ops.hugepage_invalidate   = native_hugepage_invalidate;
+}
diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
new file mode 100644
index 0000000000..988948d69b
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -0,0 +1,564 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2005, Paul Mackerras, IBM Corporation.
+ * Copyright 2009, Benjamin Herrenschmidt, IBM Corporation.
+ * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm_types.h>
+#include <linux/mm.h>
+#include <linux/stop_machine.h>
+
+#include <asm/sections.h>
+#include <asm/mmu.h>
+#include <asm/tlb.h>
+#include <asm/firmware.h>
+
+#include <mm/mmu_decl.h>
+
+#include <trace/events/thp.h>
+
+#if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE))
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/*
+ * vmemmap is the starting address of the virtual address space where
+ * struct pages are allocated for all possible PFNs present on the system
+ * including holes and bad memory (hence sparse). These virtual struct
+ * pages are stored in sequence in this virtual address space irrespective
+ * of the fact whether the corresponding PFN is valid or not. This achieves
+ * constant relationship between address of struct page and its PFN.
+ *
+ * During boot or memory hotplug operation when a new memory section is
+ * added, physical memory allocation (including hash table bolting) will
+ * be performed for the set of struct pages which are part of the memory
+ * section. This saves memory by not allocating struct pages for PFNs
+ * which are not valid.
+ *
+ *		----------------------------------------------
+ *		| PHYSICAL ALLOCATION OF VIRTUAL STRUCT PAGES|
+ *		----------------------------------------------
+ *
+ *	   f000000000000000                  c000000000000000
+ * vmemmap +--------------+                  +--------------+
+ *  +      |  page struct | +--------------> |  page struct |
+ *  |      +--------------+                  +--------------+
+ *  |      |  page struct | +--------------> |  page struct |
+ *  |      +--------------+ |                +--------------+
+ *  |      |  page struct | +       +------> |  page struct |
+ *  |      +--------------+         |        +--------------+
+ *  |      |  page struct |         |   +--> |  page struct |
+ *  |      +--------------+         |   |    +--------------+
+ *  |      |  page struct |         |   |
+ *  |      +--------------+         |   |
+ *  |      |  page struct |         |   |
+ *  |      +--------------+         |   |
+ *  |      |  page struct |         |   |
+ *  |      +--------------+         |   |
+ *  |      |  page struct |         |   |
+ *  |      +--------------+         |   |
+ *  |      |  page struct | +-------+   |
+ *  |      +--------------+             |
+ *  |      |  page struct | +-----------+
+ *  |      +--------------+
+ *  |      |  page struct | No mapping
+ *  |      +--------------+
+ *  |      |  page struct | No mapping
+ *  v      +--------------+
+ *
+ *		-----------------------------------------
+ *		| RELATION BETWEEN STRUCT PAGES AND PFNS|
+ *		-----------------------------------------
+ *
+ * vmemmap +--------------+                 +---------------+
+ *  +      |  page struct | +-------------> |      PFN      |
+ *  |      +--------------+                 +---------------+
+ *  |      |  page struct | +-------------> |      PFN      |
+ *  |      +--------------+                 +---------------+
+ *  |      |  page struct | +-------------> |      PFN      |
+ *  |      +--------------+                 +---------------+
+ *  |      |  page struct | +-------------> |      PFN      |
+ *  |      +--------------+                 +---------------+
+ *  |      |              |
+ *  |      +--------------+
+ *  |      |              |
+ *  |      +--------------+
+ *  |      |              |
+ *  |      +--------------+                 +---------------+
+ *  |      |  page struct | +-------------> |      PFN      |
+ *  |      +--------------+                 +---------------+
+ *  |      |              |
+ *  |      +--------------+
+ *  |      |              |
+ *  |      +--------------+                 +---------------+
+ *  |      |  page struct | +-------------> |      PFN      |
+ *  |      +--------------+                 +---------------+
+ *  |      |  page struct | +-------------> |      PFN      |
+ *  v      +--------------+                 +---------------+
+ */
+/*
+ * On hash-based CPUs, the vmemmap is bolted in the hash table.
+ *
+ */
+int __meminit hash__vmemmap_create_mapping(unsigned long start,
+				       unsigned long page_size,
+				       unsigned long phys)
+{
+	int rc;
+
+	if ((start + page_size) >= H_VMEMMAP_END) {
+		pr_warn("Outside the supported range\n");
+		return -1;
+	}
+
+	rc = htab_bolt_mapping(start, start + page_size, phys,
+			       pgprot_val(PAGE_KERNEL),
+			       mmu_vmemmap_psize, mmu_kernel_ssize);
+	if (rc < 0) {
+		int rc2 = htab_remove_mapping(start, start + page_size,
+					      mmu_vmemmap_psize,
+					      mmu_kernel_ssize);
+		BUG_ON(rc2 && (rc2 != -ENOENT));
+	}
+	return rc;
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void hash__vmemmap_remove_mapping(unsigned long start,
+			      unsigned long page_size)
+{
+	int rc = htab_remove_mapping(start, start + page_size,
+				     mmu_vmemmap_psize,
+				     mmu_kernel_ssize);
+	BUG_ON((rc < 0) && (rc != -ENOENT));
+	WARN_ON(rc == -ENOENT);
+}
+#endif
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+/*
+ * map_kernel_page currently only called by __ioremap
+ * map_kernel_page adds an entry to the ioremap page table
+ * and adds an entry to the HPT, possibly bolting it
+ */
+int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
+{
+	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	BUILD_BUG_ON(TASK_SIZE_USER64 > H_PGTABLE_RANGE);
+	if (slab_is_available()) {
+		pgdp = pgd_offset_k(ea);
+		p4dp = p4d_offset(pgdp, ea);
+		pudp = pud_alloc(&init_mm, p4dp, ea);
+		if (!pudp)
+			return -ENOMEM;
+		pmdp = pmd_alloc(&init_mm, pudp, ea);
+		if (!pmdp)
+			return -ENOMEM;
+		ptep = pte_alloc_kernel(pmdp, ea);
+		if (!ptep)
+			return -ENOMEM;
+		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot));
+	} else {
+		/*
+		 * If the mm subsystem is not fully up, we cannot create a
+		 * linux page table entry for this mapping.  Simply bolt an
+		 * entry in the hardware page table.
+		 *
+		 */
+		if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, pgprot_val(prot),
+				      mmu_io_psize, mmu_kernel_ssize)) {
+			printk(KERN_ERR "Failed to do bolted mapping IO "
+			       "memory at %016lx !\n", pa);
+			return -ENOMEM;
+		}
+	}
+
+	smp_wmb();
+	return 0;
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+unsigned long hash__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
+				    pmd_t *pmdp, unsigned long clr,
+				    unsigned long set)
+{
+	__be64 old_be, tmp;
+	unsigned long old;
+
+#ifdef CONFIG_DEBUG_VM
+	WARN_ON(!hash__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
+	assert_spin_locked(pmd_lockptr(mm, pmdp));
+#endif
+
+	__asm__ __volatile__(
+	"1:	ldarx	%0,0,%3\n\
+		and.	%1,%0,%6\n\
+		bne-	1b \n\
+		andc	%1,%0,%4 \n\
+		or	%1,%1,%7\n\
+		stdcx.	%1,0,%3 \n\
+		bne-	1b"
+	: "=&r" (old_be), "=&r" (tmp), "=m" (*pmdp)
+	: "r" (pmdp), "r" (cpu_to_be64(clr)), "m" (*pmdp),
+	  "r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set))
+	: "cc" );
+
+	old = be64_to_cpu(old_be);
+
+	trace_hugepage_update_pmd(addr, old, clr, set);
+	if (old & H_PAGE_HASHPTE)
+		hpte_do_hugepage_flush(mm, addr, pmdp, old);
+	return old;
+}
+
+pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
+			    pmd_t *pmdp)
+{
+	pmd_t pmd;
+
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+	VM_BUG_ON(pmd_trans_huge(*pmdp));
+	VM_BUG_ON(pmd_devmap(*pmdp));
+
+	pmd = *pmdp;
+	pmd_clear(pmdp);
+	/*
+	 * Wait for all pending hash_page to finish. This is needed
+	 * in case of subpage collapse. When we collapse normal pages
+	 * to hugepage, we first clear the pmd, then invalidate all
+	 * the PTE entries. The assumption here is that any low level
+	 * page fault will see a none pmd and take the slow path that
+	 * will wait on mmap_lock. But we could very well be in a
+	 * hash_page with local ptep pointer value. Such a hash page
+	 * can result in adding new HPTE entries for normal subpages.
+	 * That means we could be modifying the page content as we
+	 * copy them to a huge page. So wait for parallel hash_page
+	 * to finish before invalidating HPTE entries. We can do this
+	 * by sending an IPI to all the cpus and executing a dummy
+	 * function there.
+	 */
+	serialize_against_pte_lookup(vma->vm_mm);
+	/*
+	 * Now invalidate the hpte entries in the range
+	 * covered by pmd. This make sure we take a
+	 * fault and will find the pmd as none, which will
+	 * result in a major fault which takes mmap_lock and
+	 * hence wait for collapse to complete. Without this
+	 * the __collapse_huge_page_copy can result in copying
+	 * the old content.
+	 */
+	flush_hash_table_pmd_range(vma->vm_mm, &pmd, address);
+	return pmd;
+}
+
+/*
+ * We want to put the pgtable in pmd and use pgtable for tracking
+ * the base page size hptes
+ */
+void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				  pgtable_t pgtable)
+{
+	pgtable_t *pgtable_slot;
+
+	assert_spin_locked(pmd_lockptr(mm, pmdp));
+	/*
+	 * we store the pgtable in the second half of PMD
+	 */
+	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
+	*pgtable_slot = pgtable;
+	/*
+	 * expose the deposited pgtable to other cpus.
+	 * before we set the hugepage PTE at pmd level
+	 * hash fault code looks at the deposted pgtable
+	 * to store hash index values.
+	 */
+	smp_wmb();
+}
+
+pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+	pgtable_t pgtable;
+	pgtable_t *pgtable_slot;
+
+	assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
+	pgtable = *pgtable_slot;
+	/*
+	 * Once we withdraw, mark the entry NULL.
+	 */
+	*pgtable_slot = NULL;
+	/*
+	 * We store HPTE information in the deposited PTE fragment.
+	 * zero out the content on withdraw.
+	 */
+	memset(pgtable, 0, PTE_FRAG_SIZE);
+	return pgtable;
+}
+
+/*
+ * A linux hugepage PMD was changed and the corresponding hash table entries
+ * neesd to be flushed.
+ */
+void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
+			    pmd_t *pmdp, unsigned long old_pmd)
+{
+	int ssize;
+	unsigned int psize;
+	unsigned long vsid;
+	unsigned long flags = 0;
+
+	/* get the base page size,vsid and segment size */
+#ifdef CONFIG_DEBUG_VM
+	psize = get_slice_psize(mm, addr);
+	BUG_ON(psize == MMU_PAGE_16M);
+#endif
+	if (old_pmd & H_PAGE_COMBO)
+		psize = MMU_PAGE_4K;
+	else
+		psize = MMU_PAGE_64K;
+
+	if (!is_kernel_addr(addr)) {
+		ssize = user_segment_size(addr);
+		vsid = get_user_vsid(&mm->context, addr, ssize);
+		WARN_ON(vsid == 0);
+	} else {
+		vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
+		ssize = mmu_kernel_ssize;
+	}
+
+	if (mm_is_thread_local(mm))
+		flags |= HPTE_LOCAL_UPDATE;
+
+	return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
+}
+
+pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm,
+				unsigned long addr, pmd_t *pmdp)
+{
+	pmd_t old_pmd;
+	pgtable_t pgtable;
+	unsigned long old;
+	pgtable_t *pgtable_slot;
+
+	old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
+	old_pmd = __pmd(old);
+	/*
+	 * We have pmd == none and we are holding page_table_lock.
+	 * So we can safely go and clear the pgtable hash
+	 * index info.
+	 */
+	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
+	pgtable = *pgtable_slot;
+	/*
+	 * Let's zero out old valid and hash index details
+	 * hash fault look at them.
+	 */
+	memset(pgtable, 0, PTE_FRAG_SIZE);
+	return old_pmd;
+}
+
+int hash__has_transparent_hugepage(void)
+{
+
+	if (!mmu_has_feature(MMU_FTR_16M_PAGE))
+		return 0;
+	/*
+	 * We support THP only if PMD_SIZE is 16MB.
+	 */
+	if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
+		return 0;
+	/*
+	 * We need to make sure that we support 16MB hugepage in a segment
+	 * with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
+	 * of 64K.
+	 */
+	/*
+	 * If we have 64K HPTE, we will be using that by default
+	 */
+	if (mmu_psize_defs[MMU_PAGE_64K].shift &&
+	    (mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
+		return 0;
+	/*
+	 * Ok we only have 4K HPTE
+	 */
+	if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
+		return 0;
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
+
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+
+struct change_memory_parms {
+	unsigned long start, end, newpp;
+	unsigned int step, nr_cpus;
+	atomic_t master_cpu;
+	atomic_t cpu_counter;
+};
+
+// We'd rather this was on the stack but it has to be in the RMO
+static struct change_memory_parms chmem_parms;
+
+// And therefore we need a lock to protect it from concurrent use
+static DEFINE_MUTEX(chmem_lock);
+
+static void change_memory_range(unsigned long start, unsigned long end,
+				unsigned int step, unsigned long newpp)
+{
+	unsigned long idx;
+
+	pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n",
+		 start, end, newpp, step);
+
+	for (idx = start; idx < end; idx += step)
+		/* Not sure if we can do much with the return value */
+		mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize,
+							mmu_kernel_ssize);
+}
+
+static int notrace chmem_secondary_loop(struct change_memory_parms *parms)
+{
+	unsigned long msr, tmp, flags;
+	int *p;
+
+	p = &parms->cpu_counter.counter;
+
+	local_irq_save(flags);
+	hard_irq_disable();
+
+	asm volatile (
+	// Switch to real mode and leave interrupts off
+	"mfmsr	%[msr]			;"
+	"li	%[tmp], %[MSR_IR_DR]	;"
+	"andc	%[tmp], %[msr], %[tmp]	;"
+	"mtmsrd %[tmp]			;"
+
+	// Tell the master we are in real mode
+	"1:				"
+	"lwarx	%[tmp], 0, %[p]		;"
+	"addic	%[tmp], %[tmp], -1	;"
+	"stwcx.	%[tmp], 0, %[p]		;"
+	"bne-	1b			;"
+
+	// Spin until the counter goes to zero
+	"2:				;"
+	"lwz	%[tmp], 0(%[p])		;"
+	"cmpwi	%[tmp], 0		;"
+	"bne-	2b			;"
+
+	// Switch back to virtual mode
+	"mtmsrd %[msr]			;"
+
+	: // outputs
+	  [msr] "=&r" (msr), [tmp] "=&b" (tmp), "+m" (*p)
+	: // inputs
+	  [p] "b" (p), [MSR_IR_DR] "i" (MSR_IR | MSR_DR)
+	: // clobbers
+	  "cc", "xer"
+	);
+
+	local_irq_restore(flags);
+
+	return 0;
+}
+
+static int change_memory_range_fn(void *data)
+{
+	struct change_memory_parms *parms = data;
+
+	// First CPU goes through, all others wait.
+	if (atomic_xchg(&parms->master_cpu, 1) == 1)
+		return chmem_secondary_loop(parms);
+
+	// Wait for all but one CPU (this one) to call-in
+	while (atomic_read(&parms->cpu_counter) > 1)
+		barrier();
+
+	change_memory_range(parms->start, parms->end, parms->step, parms->newpp);
+
+	mb();
+
+	// Signal the other CPUs that we're done
+	atomic_dec(&parms->cpu_counter);
+
+	return 0;
+}
+
+static bool hash__change_memory_range(unsigned long start, unsigned long end,
+				      unsigned long newpp)
+{
+	unsigned int step, shift;
+
+	shift = mmu_psize_defs[mmu_linear_psize].shift;
+	step = 1 << shift;
+
+	start = ALIGN_DOWN(start, step);
+	end = ALIGN(end, step); // aligns up
+
+	if (start >= end)
+		return false;
+
+	if (firmware_has_feature(FW_FEATURE_LPAR)) {
+		mutex_lock(&chmem_lock);
+
+		chmem_parms.start = start;
+		chmem_parms.end = end;
+		chmem_parms.step = step;
+		chmem_parms.newpp = newpp;
+		atomic_set(&chmem_parms.master_cpu, 0);
+
+		cpus_read_lock();
+
+		atomic_set(&chmem_parms.cpu_counter, num_online_cpus());
+
+		// Ensure state is consistent before we call the other CPUs
+		mb();
+
+		stop_machine_cpuslocked(change_memory_range_fn, &chmem_parms,
+					cpu_online_mask);
+
+		cpus_read_unlock();
+		mutex_unlock(&chmem_lock);
+	} else
+		change_memory_range(start, end, step, newpp);
+
+	return true;
+}
+
+void hash__mark_rodata_ro(void)
+{
+	unsigned long start, end, pp;
+
+	start = (unsigned long)_stext;
+	end = (unsigned long)__end_rodata;
+
+	pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL_ROX), HPTE_USE_KERNEL_KEY);
+
+	WARN_ON(!hash__change_memory_range(start, end, pp));
+}
+
+void hash__mark_initmem_nx(void)
+{
+	unsigned long start, end, pp;
+
+	start = (unsigned long)__init_begin;
+	end = (unsigned long)__init_end;
+
+	pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY);
+
+	WARN_ON(!hash__change_memory_range(start, end, pp));
+}
+#endif
diff --git a/arch/powerpc/mm/book3s64/hash_tlb.c b/arch/powerpc/mm/book3s64/hash_tlb.c
new file mode 100644
index 0000000000..21fcad97ae
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/hash_tlb.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains the routines for flushing entries from the
+ * TLB and MMU hash table.
+ *
+ *  Derived from arch/ppc64/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Dave Engebretsen <engebret@us.ibm.com>
+ *      Rework for PPC64 port.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#include <asm/bug.h>
+#include <asm/pte-walk.h>
+
+
+#include <trace/events/thp.h>
+
+DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
+
+/*
+ * A linux PTE was changed and the corresponding hash table entry
+ * neesd to be flushed. This function will either perform the flush
+ * immediately or will batch it up if the current CPU has an active
+ * batch on it.
+ */
+void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, unsigned long pte, int huge)
+{
+	unsigned long vpn;
+	struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
+	unsigned long vsid;
+	unsigned int psize;
+	int ssize;
+	real_pte_t rpte;
+	int i, offset;
+
+	i = batch->index;
+
+	/*
+	 * Get page size (maybe move back to caller).
+	 *
+	 * NOTE: when using special 64K mappings in 4K environment like
+	 * for SPEs, we obtain the page size from the slice, which thus
+	 * must still exist (and thus the VMA not reused) at the time
+	 * of this call
+	 */
+	if (huge) {
+#ifdef CONFIG_HUGETLB_PAGE
+		psize = get_slice_psize(mm, addr);
+		/* Mask the address for the correct page size */
+		addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
+		if (unlikely(psize == MMU_PAGE_16G))
+			offset = PTRS_PER_PUD;
+		else
+			offset = PTRS_PER_PMD;
+#else
+		BUG();
+		psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
+#endif
+	} else {
+		psize = pte_pagesize_index(mm, addr, pte);
+		/*
+		 * Mask the address for the standard page size.  If we
+		 * have a 64k page kernel, but the hardware does not
+		 * support 64k pages, this might be different from the
+		 * hardware page size encoded in the slice table.
+		 */
+		addr &= PAGE_MASK;
+		offset = PTRS_PER_PTE;
+	}
+
+
+	/* Build full vaddr */
+	if (!is_kernel_addr(addr)) {
+		ssize = user_segment_size(addr);
+		vsid = get_user_vsid(&mm->context, addr, ssize);
+	} else {
+		vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
+		ssize = mmu_kernel_ssize;
+	}
+	WARN_ON(vsid == 0);
+	vpn = hpt_vpn(addr, vsid, ssize);
+	rpte = __real_pte(__pte(pte), ptep, offset);
+
+	/*
+	 * Check if we have an active batch on this CPU. If not, just
+	 * flush now and return.
+	 */
+	if (!batch->active) {
+		flush_hash_page(vpn, rpte, psize, ssize, mm_is_thread_local(mm));
+		put_cpu_var(ppc64_tlb_batch);
+		return;
+	}
+
+	/*
+	 * This can happen when we are in the middle of a TLB batch and
+	 * we encounter memory pressure (eg copy_page_range when it tries
+	 * to allocate a new pte). If we have to reclaim memory and end
+	 * up scanning and resetting referenced bits then our batch context
+	 * will change mid stream.
+	 *
+	 * We also need to ensure only one page size is present in a given
+	 * batch
+	 */
+	if (i != 0 && (mm != batch->mm || batch->psize != psize ||
+		       batch->ssize != ssize)) {
+		__flush_tlb_pending(batch);
+		i = 0;
+	}
+	if (i == 0) {
+		batch->mm = mm;
+		batch->psize = psize;
+		batch->ssize = ssize;
+	}
+	batch->pte[i] = rpte;
+	batch->vpn[i] = vpn;
+	batch->index = ++i;
+	if (i >= PPC64_TLB_BATCH_NR)
+		__flush_tlb_pending(batch);
+	put_cpu_var(ppc64_tlb_batch);
+}
+
+/*
+ * This function is called when terminating an mmu batch or when a batch
+ * is full. It will perform the flush of all the entries currently stored
+ * in a batch.
+ *
+ * Must be called from within some kind of spinlock/non-preempt region...
+ */
+void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
+{
+	int i, local;
+
+	i = batch->index;
+	local = mm_is_thread_local(batch->mm);
+	if (i == 1)
+		flush_hash_page(batch->vpn[0], batch->pte[0],
+				batch->psize, batch->ssize, local);
+	else
+		flush_hash_range(i, local);
+	batch->index = 0;
+}
+
+void hash__tlb_flush(struct mmu_gather *tlb)
+{
+	struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
+
+	/*
+	 * If there's a TLB batch pending, then we must flush it because the
+	 * pages are going to be freed and we really don't want to have a CPU
+	 * access a freed page because it has a stale TLB
+	 */
+	if (tlbbatch->index)
+		__flush_tlb_pending(tlbbatch);
+
+	put_cpu_var(ppc64_tlb_batch);
+}
+
+/**
+ * __flush_hash_table_range - Flush all HPTEs for a given address range
+ *                            from the hash table (and the TLB). But keeps
+ *                            the linux PTEs intact.
+ *
+ * @start	: starting address
+ * @end         : ending address (not included in the flush)
+ *
+ * This function is mostly to be used by some IO hotplug code in order
+ * to remove all hash entries from a given address range used to map IO
+ * space on a removed PCI-PCI bidge without tearing down the full mapping
+ * since 64K pages may overlap with other bridges when using 64K pages
+ * with 4K HW pages on IO space.
+ *
+ * Because of that usage pattern, it is implemented for small size rather
+ * than speed.
+ */
+void __flush_hash_table_range(unsigned long start, unsigned long end)
+{
+	int hugepage_shift;
+	unsigned long flags;
+
+	start = ALIGN_DOWN(start, PAGE_SIZE);
+	end = ALIGN(end, PAGE_SIZE);
+
+
+	/*
+	 * Note: Normally, we should only ever use a batch within a
+	 * PTE locked section. This violates the rule, but will work
+	 * since we don't actually modify the PTEs, we just flush the
+	 * hash while leaving the PTEs intact (including their reference
+	 * to being hashed). This is not the most performance oriented
+	 * way to do things but is fine for our needs here.
+	 */
+	local_irq_save(flags);
+	arch_enter_lazy_mmu_mode();
+	for (; start < end; start += PAGE_SIZE) {
+		pte_t *ptep = find_init_mm_pte(start, &hugepage_shift);
+		unsigned long pte;
+
+		if (ptep == NULL)
+			continue;
+		pte = pte_val(*ptep);
+		if (!(pte & H_PAGE_HASHPTE))
+			continue;
+		hpte_need_flush(&init_mm, start, ptep, pte, hugepage_shift);
+	}
+	arch_leave_lazy_mmu_mode();
+	local_irq_restore(flags);
+}
+
+void flush_hash_table_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
+{
+	pte_t *pte;
+	pte_t *start_pte;
+	unsigned long flags;
+
+	addr = ALIGN_DOWN(addr, PMD_SIZE);
+	/*
+	 * Note: Normally, we should only ever use a batch within a
+	 * PTE locked section. This violates the rule, but will work
+	 * since we don't actually modify the PTEs, we just flush the
+	 * hash while leaving the PTEs intact (including their reference
+	 * to being hashed). This is not the most performance oriented
+	 * way to do things but is fine for our needs here.
+	 */
+	local_irq_save(flags);
+	arch_enter_lazy_mmu_mode();
+	start_pte = pte_offset_map(pmd, addr);
+	if (!start_pte)
+		goto out;
+	for (pte = start_pte; pte < start_pte + PTRS_PER_PTE; pte++) {
+		unsigned long pteval = pte_val(*pte);
+		if (pteval & H_PAGE_HASHPTE)
+			hpte_need_flush(mm, addr, pte, pteval, 0);
+		addr += PAGE_SIZE;
+	}
+	pte_unmap(start_pte);
+out:
+	arch_leave_lazy_mmu_mode();
+	local_irq_restore(flags);
+}
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
new file mode 100644
index 0000000000..ad2afa08e6
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -0,0 +1,2282 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerPC64 port by Mike Corrigan and Dave Engebretsen
+ *   {mikejc|engebret}@us.ibm.com
+ *
+ *    Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com>
+ *
+ * SMP scalability work:
+ *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ *    Module name: htab.c
+ *
+ *    Description:
+ *      PowerPC Hashed Page Table functions
+ */
+
+#undef DEBUG
+#undef DEBUG_LOW
+
+#define pr_fmt(fmt) "hash-mmu: " fmt
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/sched/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/sysctl.h>
+#include <linux/export.h>
+#include <linux/ctype.h>
+#include <linux/cache.h>
+#include <linux/init.h>
+#include <linux/signal.h>
+#include <linux/memblock.h>
+#include <linux/context_tracking.h>
+#include <linux/libfdt.h>
+#include <linux/pkeys.h>
+#include <linux/hugetlb.h>
+#include <linux/cpu.h>
+#include <linux/pgtable.h>
+#include <linux/debugfs.h>
+#include <linux/random.h>
+#include <linux/elf-randomize.h>
+#include <linux/of_fdt.h>
+
+#include <asm/interrupt.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/page.h>
+#include <asm/types.h>
+#include <linux/uaccess.h>
+#include <asm/machdep.h>
+#include <asm/io.h>
+#include <asm/eeh.h>
+#include <asm/tlb.h>
+#include <asm/cacheflush.h>
+#include <asm/cputable.h>
+#include <asm/sections.h>
+#include <asm/copro.h>
+#include <asm/udbg.h>
+#include <asm/code-patching.h>
+#include <asm/fadump.h>
+#include <asm/firmware.h>
+#include <asm/tm.h>
+#include <asm/trace.h>
+#include <asm/ps3.h>
+#include <asm/pte-walk.h>
+#include <asm/asm-prototypes.h>
+#include <asm/ultravisor.h>
+
+#include <mm/mmu_decl.h>
+
+#include "internal.h"
+
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) udbg_printf(fmt)
+#else
+#define DBG_LOW(fmt...)
+#endif
+
+#define KB (1024)
+#define MB (1024*KB)
+#define GB (1024L*MB)
+
+/*
+ * Note:  pte   --> Linux PTE
+ *        HPTE  --> PowerPC Hashed Page Table Entry
+ *
+ * Execution context:
+ *   htab_initialize is called with the MMU off (of course), but
+ *   the kernel has been copied down to zero so it can directly
+ *   reference global data.  At this point it is very difficult
+ *   to print debug info.
+ *
+ */
+
+static unsigned long _SDR1;
+
+u8 hpte_page_sizes[1 << LP_BITS];
+EXPORT_SYMBOL_GPL(hpte_page_sizes);
+
+struct hash_pte *htab_address;
+unsigned long htab_size_bytes;
+unsigned long htab_hash_mask;
+EXPORT_SYMBOL_GPL(htab_hash_mask);
+int mmu_linear_psize = MMU_PAGE_4K;
+EXPORT_SYMBOL_GPL(mmu_linear_psize);
+int mmu_virtual_psize = MMU_PAGE_4K;
+int mmu_vmalloc_psize = MMU_PAGE_4K;
+EXPORT_SYMBOL_GPL(mmu_vmalloc_psize);
+int mmu_io_psize = MMU_PAGE_4K;
+int mmu_kernel_ssize = MMU_SEGSIZE_256M;
+EXPORT_SYMBOL_GPL(mmu_kernel_ssize);
+int mmu_highuser_ssize = MMU_SEGSIZE_256M;
+u16 mmu_slb_size = 64;
+EXPORT_SYMBOL_GPL(mmu_slb_size);
+#ifdef CONFIG_PPC_64K_PAGES
+int mmu_ci_restrictions;
+#endif
+static u8 *linear_map_hash_slots;
+static unsigned long linear_map_hash_count;
+struct mmu_hash_ops mmu_hash_ops;
+EXPORT_SYMBOL(mmu_hash_ops);
+
+/*
+ * These are definitions of page sizes arrays to be used when none
+ * is provided by the firmware.
+ */
+
+/*
+ * Fallback (4k pages only)
+ */
+static struct mmu_psize_def mmu_psize_defaults[] = {
+	[MMU_PAGE_4K] = {
+		.shift	= 12,
+		.sllp	= 0,
+		.penc   = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1},
+		.avpnm	= 0,
+		.tlbiel = 0,
+	},
+};
+
+/*
+ * POWER4, GPUL, POWER5
+ *
+ * Support for 16Mb large pages
+ */
+static struct mmu_psize_def mmu_psize_defaults_gp[] = {
+	[MMU_PAGE_4K] = {
+		.shift	= 12,
+		.sllp	= 0,
+		.penc   = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1},
+		.avpnm	= 0,
+		.tlbiel = 1,
+	},
+	[MMU_PAGE_16M] = {
+		.shift	= 24,
+		.sllp	= SLB_VSID_L,
+		.penc   = {[0 ... MMU_PAGE_16M - 1] = -1, [MMU_PAGE_16M] = 0,
+			    [MMU_PAGE_16M + 1 ... MMU_PAGE_COUNT - 1] = -1 },
+		.avpnm	= 0x1UL,
+		.tlbiel = 0,
+	},
+};
+
+static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
+{
+	unsigned long rb;
+
+	rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+
+	asm volatile("tlbiel %0" : : "r" (rb));
+}
+
+/*
+ * tlbiel instruction for hash, set invalidation
+ * i.e., r=1 and is=01 or is=10 or is=11
+ */
+static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
+					unsigned int pid,
+					unsigned int ric, unsigned int prs)
+{
+	unsigned long rb;
+	unsigned long rs;
+	unsigned int r = 0; /* hash format */
+
+	rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+	rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
+
+	asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
+		     : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r)
+		     : "memory");
+}
+
+
+static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is)
+{
+	unsigned int set;
+
+	asm volatile("ptesync": : :"memory");
+
+	for (set = 0; set < num_sets; set++)
+		tlbiel_hash_set_isa206(set, is);
+
+	ppc_after_tlbiel_barrier();
+}
+
+static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
+{
+	unsigned int set;
+
+	asm volatile("ptesync": : :"memory");
+
+	/*
+	 * Flush the partition table cache if this is HV mode.
+	 */
+	if (early_cpu_has_feature(CPU_FTR_HVMODE))
+		tlbiel_hash_set_isa300(0, is, 0, 2, 0);
+
+	/*
+	 * Now invalidate the process table cache. UPRT=0 HPT modes (what
+	 * current hardware implements) do not use the process table, but
+	 * add the flushes anyway.
+	 *
+	 * From ISA v3.0B p. 1078:
+	 *     The following forms are invalid.
+	 *      * PRS=1, R=0, and RIC!=2 (The only process-scoped
+	 *        HPT caching is of the Process Table.)
+	 */
+	tlbiel_hash_set_isa300(0, is, 0, 2, 1);
+
+	/*
+	 * Then flush the sets of the TLB proper. Hash mode uses
+	 * partition scoped TLB translations, which may be flushed
+	 * in !HV mode.
+	 */
+	for (set = 0; set < num_sets; set++)
+		tlbiel_hash_set_isa300(set, is, 0, 0, 0);
+
+	ppc_after_tlbiel_barrier();
+
+	asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
+}
+
+void hash__tlbiel_all(unsigned int action)
+{
+	unsigned int is;
+
+	switch (action) {
+	case TLB_INVAL_SCOPE_GLOBAL:
+		is = 3;
+		break;
+	case TLB_INVAL_SCOPE_LPID:
+		is = 2;
+		break;
+	default:
+		BUG();
+	}
+
+	if (early_cpu_has_feature(CPU_FTR_ARCH_300))
+		tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is);
+	else if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
+		tlbiel_all_isa206(POWER8_TLB_SETS, is);
+	else if (early_cpu_has_feature(CPU_FTR_ARCH_206))
+		tlbiel_all_isa206(POWER7_TLB_SETS, is);
+	else
+		WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
+}
+
+/*
+ * 'R' and 'C' update notes:
+ *  - Under pHyp or KVM, the updatepp path will not set C, thus it *will*
+ *     create writeable HPTEs without C set, because the hcall H_PROTECT
+ *     that we use in that case will not update C
+ *  - The above is however not a problem, because we also don't do that
+ *     fancy "no flush" variant of eviction and we use H_REMOVE which will
+ *     do the right thing and thus we don't have the race I described earlier
+ *
+ *    - Under bare metal,  we do have the race, so we need R and C set
+ *    - We make sure R is always set and never lost
+ *    - C is _PAGE_DIRTY, and *should* always be set for a writeable mapping
+ */
+unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags)
+{
+	unsigned long rflags = 0;
+
+	/* _PAGE_EXEC -> NOEXEC */
+	if ((pteflags & _PAGE_EXEC) == 0)
+		rflags |= HPTE_R_N;
+	/*
+	 * PPP bits:
+	 * Linux uses slb key 0 for kernel and 1 for user.
+	 * kernel RW areas are mapped with PPP=0b000
+	 * User area is mapped with PPP=0b010 for read/write
+	 * or PPP=0b011 for read-only (including writeable but clean pages).
+	 */
+	if (pteflags & _PAGE_PRIVILEGED) {
+		/*
+		 * Kernel read only mapped with ppp bits 0b110
+		 */
+		if (!(pteflags & _PAGE_WRITE)) {
+			if (mmu_has_feature(MMU_FTR_KERNEL_RO))
+				rflags |= (HPTE_R_PP0 | 0x2);
+			else
+				rflags |= 0x3;
+		}
+	} else {
+		if (pteflags & _PAGE_RWX)
+			rflags |= 0x2;
+		if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY)))
+			rflags |= 0x1;
+	}
+	/*
+	 * We can't allow hardware to update hpte bits. Hence always
+	 * set 'R' bit and set 'C' if it is a write fault
+	 */
+	rflags |=  HPTE_R_R;
+
+	if (pteflags & _PAGE_DIRTY)
+		rflags |= HPTE_R_C;
+	/*
+	 * Add in WIG bits
+	 */
+
+	if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT)
+		rflags |= HPTE_R_I;
+	else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)
+		rflags |= (HPTE_R_I | HPTE_R_G);
+	else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
+		rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M);
+	else
+		/*
+		 * Add memory coherence if cache inhibited is not set
+		 */
+		rflags |= HPTE_R_M;
+
+	rflags |= pte_to_hpte_pkey_bits(pteflags, flags);
+	return rflags;
+}
+
+int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
+		      unsigned long pstart, unsigned long prot,
+		      int psize, int ssize)
+{
+	unsigned long vaddr, paddr;
+	unsigned int step, shift;
+	int ret = 0;
+
+	shift = mmu_psize_defs[psize].shift;
+	step = 1 << shift;
+
+	prot = htab_convert_pte_flags(prot, HPTE_USE_KERNEL_KEY);
+
+	DBG("htab_bolt_mapping(%lx..%lx -> %lx (%lx,%d,%d)\n",
+	    vstart, vend, pstart, prot, psize, ssize);
+
+	/* Carefully map only the possible range */
+	vaddr = ALIGN(vstart, step);
+	paddr = ALIGN(pstart, step);
+	vend  = ALIGN_DOWN(vend, step);
+
+	for (; vaddr < vend; vaddr += step, paddr += step) {
+		unsigned long hash, hpteg;
+		unsigned long vsid = get_kernel_vsid(vaddr, ssize);
+		unsigned long vpn  = hpt_vpn(vaddr, vsid, ssize);
+		unsigned long tprot = prot;
+		bool secondary_hash = false;
+
+		/*
+		 * If we hit a bad address return error.
+		 */
+		if (!vsid)
+			return -1;
+		/* Make kernel text executable */
+		if (overlaps_kernel_text(vaddr, vaddr + step))
+			tprot &= ~HPTE_R_N;
+
+		/*
+		 * If relocatable, check if it overlaps interrupt vectors that
+		 * are copied down to real 0. For relocatable kernel
+		 * (e.g. kdump case) we copy interrupt vectors down to real
+		 * address 0. Mark that region as executable. This is
+		 * because on p8 system with relocation on exception feature
+		 * enabled, exceptions are raised with MMU (IR=DR=1) ON. Hence
+		 * in order to execute the interrupt handlers in virtual
+		 * mode the vector region need to be marked as executable.
+		 */
+		if ((PHYSICAL_START > MEMORY_START) &&
+			overlaps_interrupt_vector_text(vaddr, vaddr + step))
+				tprot &= ~HPTE_R_N;
+
+		hash = hpt_hash(vpn, shift, ssize);
+		hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+
+		BUG_ON(!mmu_hash_ops.hpte_insert);
+repeat:
+		ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot,
+					       HPTE_V_BOLTED, psize, psize,
+					       ssize);
+		if (ret == -1) {
+			/*
+			 * Try to keep bolted entries in primary.
+			 * Remove non bolted entries and try insert again
+			 */
+			ret = mmu_hash_ops.hpte_remove(hpteg);
+			if (ret != -1)
+				ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot,
+							       HPTE_V_BOLTED, psize, psize,
+							       ssize);
+			if (ret == -1 && !secondary_hash) {
+				secondary_hash = true;
+				hpteg = ((~hash & htab_hash_mask) * HPTES_PER_GROUP);
+				goto repeat;
+			}
+		}
+
+		if (ret < 0)
+			break;
+
+		cond_resched();
+		if (debug_pagealloc_enabled_or_kfence() &&
+			(paddr >> PAGE_SHIFT) < linear_map_hash_count)
+			linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80;
+	}
+	return ret < 0 ? ret : 0;
+}
+
+int htab_remove_mapping(unsigned long vstart, unsigned long vend,
+		      int psize, int ssize)
+{
+	unsigned long vaddr, time_limit;
+	unsigned int step, shift;
+	int rc;
+	int ret = 0;
+
+	shift = mmu_psize_defs[psize].shift;
+	step = 1 << shift;
+
+	if (!mmu_hash_ops.hpte_removebolted)
+		return -ENODEV;
+
+	/* Unmap the full range specificied */
+	vaddr = ALIGN_DOWN(vstart, step);
+	time_limit = jiffies + HZ;
+
+	for (;vaddr < vend; vaddr += step) {
+		rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize);
+
+		/*
+		 * For large number of mappings introduce a cond_resched()
+		 * to prevent softlockup warnings.
+		 */
+		if (time_after(jiffies, time_limit)) {
+			cond_resched();
+			time_limit = jiffies + HZ;
+		}
+		if (rc == -ENOENT) {
+			ret = -ENOENT;
+			continue;
+		}
+		if (rc < 0)
+			return rc;
+	}
+
+	return ret;
+}
+
+static bool disable_1tb_segments __ro_after_init;
+
+static int __init parse_disable_1tb_segments(char *p)
+{
+	disable_1tb_segments = true;
+	return 0;
+}
+early_param("disable_1tb_segments", parse_disable_1tb_segments);
+
+bool stress_hpt_enabled __initdata;
+
+static int __init parse_stress_hpt(char *p)
+{
+	stress_hpt_enabled = true;
+	return 0;
+}
+early_param("stress_hpt", parse_stress_hpt);
+
+__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_hpt_key);
+
+/*
+ * per-CPU array allocated if we enable stress_hpt.
+ */
+#define STRESS_MAX_GROUPS 16
+struct stress_hpt_struct {
+	unsigned long last_group[STRESS_MAX_GROUPS];
+};
+
+static inline int stress_nr_groups(void)
+{
+	/*
+	 * LPAR H_REMOVE flushes TLB, so need some number > 1 of entries
+	 * to allow practical forward progress. Bare metal returns 1, which
+	 * seems to help uncover more bugs.
+	 */
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		return STRESS_MAX_GROUPS;
+	else
+		return 1;
+}
+
+static struct stress_hpt_struct *stress_hpt_struct;
+
+static int __init htab_dt_scan_seg_sizes(unsigned long node,
+					 const char *uname, int depth,
+					 void *data)
+{
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const __be32 *prop;
+	int size = 0;
+
+	/* We are scanning "cpu" nodes only */
+	if (type == NULL || strcmp(type, "cpu") != 0)
+		return 0;
+
+	prop = of_get_flat_dt_prop(node, "ibm,processor-segment-sizes", &size);
+	if (prop == NULL)
+		return 0;
+	for (; size >= 4; size -= 4, ++prop) {
+		if (be32_to_cpu(prop[0]) == 40) {
+			DBG("1T segment support detected\n");
+
+			if (disable_1tb_segments) {
+				DBG("1T segments disabled by command line\n");
+				break;
+			}
+
+			cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT;
+			return 1;
+		}
+	}
+	cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
+	return 0;
+}
+
+static int __init get_idx_from_shift(unsigned int shift)
+{
+	int idx = -1;
+
+	switch (shift) {
+	case 0xc:
+		idx = MMU_PAGE_4K;
+		break;
+	case 0x10:
+		idx = MMU_PAGE_64K;
+		break;
+	case 0x14:
+		idx = MMU_PAGE_1M;
+		break;
+	case 0x18:
+		idx = MMU_PAGE_16M;
+		break;
+	case 0x22:
+		idx = MMU_PAGE_16G;
+		break;
+	}
+	return idx;
+}
+
+static int __init htab_dt_scan_page_sizes(unsigned long node,
+					  const char *uname, int depth,
+					  void *data)
+{
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const __be32 *prop;
+	int size = 0;
+
+	/* We are scanning "cpu" nodes only */
+	if (type == NULL || strcmp(type, "cpu") != 0)
+		return 0;
+
+	prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size);
+	if (!prop)
+		return 0;
+
+	pr_info("Page sizes from device-tree:\n");
+	size /= 4;
+	cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
+	while(size > 0) {
+		unsigned int base_shift = be32_to_cpu(prop[0]);
+		unsigned int slbenc = be32_to_cpu(prop[1]);
+		unsigned int lpnum = be32_to_cpu(prop[2]);
+		struct mmu_psize_def *def;
+		int idx, base_idx;
+
+		size -= 3; prop += 3;
+		base_idx = get_idx_from_shift(base_shift);
+		if (base_idx < 0) {
+			/* skip the pte encoding also */
+			prop += lpnum * 2; size -= lpnum * 2;
+			continue;
+		}
+		def = &mmu_psize_defs[base_idx];
+		if (base_idx == MMU_PAGE_16M)
+			cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE;
+
+		def->shift = base_shift;
+		if (base_shift <= 23)
+			def->avpnm = 0;
+		else
+			def->avpnm = (1 << (base_shift - 23)) - 1;
+		def->sllp = slbenc;
+		/*
+		 * We don't know for sure what's up with tlbiel, so
+		 * for now we only set it for 4K and 64K pages
+		 */
+		if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K)
+			def->tlbiel = 1;
+		else
+			def->tlbiel = 0;
+
+		while (size > 0 && lpnum) {
+			unsigned int shift = be32_to_cpu(prop[0]);
+			int penc  = be32_to_cpu(prop[1]);
+
+			prop += 2; size -= 2;
+			lpnum--;
+
+			idx = get_idx_from_shift(shift);
+			if (idx < 0)
+				continue;
+
+			if (penc == -1)
+				pr_err("Invalid penc for base_shift=%d "
+				       "shift=%d\n", base_shift, shift);
+
+			def->penc[idx] = penc;
+			pr_info("base_shift=%d: shift=%d, sllp=0x%04lx,"
+				" avpnm=0x%08lx, tlbiel=%d, penc=%d\n",
+				base_shift, shift, def->sllp,
+				def->avpnm, def->tlbiel, def->penc[idx]);
+		}
+	}
+
+	return 1;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * Scan for 16G memory blocks that have been set aside for huge pages
+ * and reserve those blocks for 16G huge pages.
+ */
+static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
+					const char *uname, int depth,
+					void *data) {
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const __be64 *addr_prop;
+	const __be32 *page_count_prop;
+	unsigned int expected_pages;
+	long unsigned int phys_addr;
+	long unsigned int block_size;
+
+	/* We are scanning "memory" nodes only */
+	if (type == NULL || strcmp(type, "memory") != 0)
+		return 0;
+
+	/*
+	 * This property is the log base 2 of the number of virtual pages that
+	 * will represent this memory block.
+	 */
+	page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL);
+	if (page_count_prop == NULL)
+		return 0;
+	expected_pages = (1 << be32_to_cpu(page_count_prop[0]));
+	addr_prop = of_get_flat_dt_prop(node, "reg", NULL);
+	if (addr_prop == NULL)
+		return 0;
+	phys_addr = be64_to_cpu(addr_prop[0]);
+	block_size = be64_to_cpu(addr_prop[1]);
+	if (block_size != (16 * GB))
+		return 0;
+	printk(KERN_INFO "Huge page(16GB) memory: "
+			"addr = 0x%lX size = 0x%lX pages = %d\n",
+			phys_addr, block_size, expected_pages);
+	if (phys_addr + block_size * expected_pages <= memblock_end_of_DRAM()) {
+		memblock_reserve(phys_addr, block_size * expected_pages);
+		pseries_add_gpage(phys_addr, block_size, expected_pages);
+	}
+	return 0;
+}
+#endif /* CONFIG_HUGETLB_PAGE */
+
+static void __init mmu_psize_set_default_penc(void)
+{
+	int bpsize, apsize;
+	for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
+		for (apsize = 0; apsize < MMU_PAGE_COUNT; apsize++)
+			mmu_psize_defs[bpsize].penc[apsize] = -1;
+}
+
+#ifdef CONFIG_PPC_64K_PAGES
+
+static bool __init might_have_hea(void)
+{
+	/*
+	 * The HEA ethernet adapter requires awareness of the
+	 * GX bus. Without that awareness we can easily assume
+	 * we will never see an HEA ethernet device.
+	 */
+#ifdef CONFIG_IBMEBUS
+	return !cpu_has_feature(CPU_FTR_ARCH_207S) &&
+		firmware_has_feature(FW_FEATURE_SPLPAR);
+#else
+	return false;
+#endif
+}
+
+#endif /* #ifdef CONFIG_PPC_64K_PAGES */
+
+static void __init htab_scan_page_sizes(void)
+{
+	int rc;
+
+	/* se the invalid penc to -1 */
+	mmu_psize_set_default_penc();
+
+	/* Default to 4K pages only */
+	memcpy(mmu_psize_defs, mmu_psize_defaults,
+	       sizeof(mmu_psize_defaults));
+
+	/*
+	 * Try to find the available page sizes in the device-tree
+	 */
+	rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL);
+	if (rc == 0 && early_mmu_has_feature(MMU_FTR_16M_PAGE)) {
+		/*
+		 * Nothing in the device-tree, but the CPU supports 16M pages,
+		 * so let's fallback on a known size list for 16M capable CPUs.
+		 */
+		memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
+		       sizeof(mmu_psize_defaults_gp));
+	}
+
+#ifdef CONFIG_HUGETLB_PAGE
+	if (!hugetlb_disabled && !early_radix_enabled() ) {
+		/* Reserve 16G huge page memory sections for huge pages */
+		of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
+	}
+#endif /* CONFIG_HUGETLB_PAGE */
+}
+
+/*
+ * Fill in the hpte_page_sizes[] array.
+ * We go through the mmu_psize_defs[] array looking for all the
+ * supported base/actual page size combinations.  Each combination
+ * has a unique pagesize encoding (penc) value in the low bits of
+ * the LP field of the HPTE.  For actual page sizes less than 1MB,
+ * some of the upper LP bits are used for RPN bits, meaning that
+ * we need to fill in several entries in hpte_page_sizes[].
+ *
+ * In diagrammatic form, with r = RPN bits and z = page size bits:
+ *        PTE LP     actual page size
+ *    rrrr rrrz		>=8KB
+ *    rrrr rrzz		>=16KB
+ *    rrrr rzzz		>=32KB
+ *    rrrr zzzz		>=64KB
+ *    ...
+ *
+ * The zzzz bits are implementation-specific but are chosen so that
+ * no encoding for a larger page size uses the same value in its
+ * low-order N bits as the encoding for the 2^(12+N) byte page size
+ * (if it exists).
+ */
+static void __init init_hpte_page_sizes(void)
+{
+	long int ap, bp;
+	long int shift, penc;
+
+	for (bp = 0; bp < MMU_PAGE_COUNT; ++bp) {
+		if (!mmu_psize_defs[bp].shift)
+			continue;	/* not a supported page size */
+		for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) {
+			penc = mmu_psize_defs[bp].penc[ap];
+			if (penc == -1 || !mmu_psize_defs[ap].shift)
+				continue;
+			shift = mmu_psize_defs[ap].shift - LP_SHIFT;
+			if (shift <= 0)
+				continue;	/* should never happen */
+			/*
+			 * For page sizes less than 1MB, this loop
+			 * replicates the entry for all possible values
+			 * of the rrrr bits.
+			 */
+			while (penc < (1 << LP_BITS)) {
+				hpte_page_sizes[penc] = (ap << 4) | bp;
+				penc += 1 << shift;
+			}
+		}
+	}
+}
+
+static void __init htab_init_page_sizes(void)
+{
+	bool aligned = true;
+	init_hpte_page_sizes();
+
+	if (!debug_pagealloc_enabled_or_kfence()) {
+		/*
+		 * Pick a size for the linear mapping. Currently, we only
+		 * support 16M, 1M and 4K which is the default
+		 */
+		if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) &&
+		    (unsigned long)_stext % 0x1000000) {
+			if (mmu_psize_defs[MMU_PAGE_16M].shift)
+				pr_warn("Kernel not 16M aligned, disabling 16M linear map alignment\n");
+			aligned = false;
+		}
+
+		if (mmu_psize_defs[MMU_PAGE_16M].shift && aligned)
+			mmu_linear_psize = MMU_PAGE_16M;
+		else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+			mmu_linear_psize = MMU_PAGE_1M;
+	}
+
+#ifdef CONFIG_PPC_64K_PAGES
+	/*
+	 * Pick a size for the ordinary pages. Default is 4K, we support
+	 * 64K for user mappings and vmalloc if supported by the processor.
+	 * We only use 64k for ioremap if the processor
+	 * (and firmware) support cache-inhibited large pages.
+	 * If not, we use 4k and set mmu_ci_restrictions so that
+	 * hash_page knows to switch processes that use cache-inhibited
+	 * mappings to 4k pages.
+	 */
+	if (mmu_psize_defs[MMU_PAGE_64K].shift) {
+		mmu_virtual_psize = MMU_PAGE_64K;
+		mmu_vmalloc_psize = MMU_PAGE_64K;
+		if (mmu_linear_psize == MMU_PAGE_4K)
+			mmu_linear_psize = MMU_PAGE_64K;
+		if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) {
+			/*
+			 * When running on pSeries using 64k pages for ioremap
+			 * would stop us accessing the HEA ethernet. So if we
+			 * have the chance of ever seeing one, stay at 4k.
+			 */
+			if (!might_have_hea())
+				mmu_io_psize = MMU_PAGE_64K;
+		} else
+			mmu_ci_restrictions = 1;
+	}
+#endif /* CONFIG_PPC_64K_PAGES */
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	/*
+	 * We try to use 16M pages for vmemmap if that is supported
+	 * and we have at least 1G of RAM at boot
+	 */
+	if (mmu_psize_defs[MMU_PAGE_16M].shift &&
+	    memblock_phys_mem_size() >= 0x40000000)
+		mmu_vmemmap_psize = MMU_PAGE_16M;
+	else
+		mmu_vmemmap_psize = mmu_virtual_psize;
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+	printk(KERN_DEBUG "Page orders: linear mapping = %d, "
+	       "virtual = %d, io = %d"
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	       ", vmemmap = %d"
+#endif
+	       "\n",
+	       mmu_psize_defs[mmu_linear_psize].shift,
+	       mmu_psize_defs[mmu_virtual_psize].shift,
+	       mmu_psize_defs[mmu_io_psize].shift
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	       ,mmu_psize_defs[mmu_vmemmap_psize].shift
+#endif
+	       );
+}
+
+static int __init htab_dt_scan_pftsize(unsigned long node,
+				       const char *uname, int depth,
+				       void *data)
+{
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const __be32 *prop;
+
+	/* We are scanning "cpu" nodes only */
+	if (type == NULL || strcmp(type, "cpu") != 0)
+		return 0;
+
+	prop = of_get_flat_dt_prop(node, "ibm,pft-size", NULL);
+	if (prop != NULL) {
+		/* pft_size[0] is the NUMA CEC cookie */
+		ppc64_pft_size = be32_to_cpu(prop[1]);
+		return 1;
+	}
+	return 0;
+}
+
+unsigned htab_shift_for_mem_size(unsigned long mem_size)
+{
+	unsigned memshift = __ilog2(mem_size);
+	unsigned pshift = mmu_psize_defs[mmu_virtual_psize].shift;
+	unsigned pteg_shift;
+
+	/* round mem_size up to next power of 2 */
+	if ((1UL << memshift) < mem_size)
+		memshift += 1;
+
+	/* aim for 2 pages / pteg */
+	pteg_shift = memshift - (pshift + 1);
+
+	/*
+	 * 2^11 PTEGS of 128 bytes each, ie. 2^18 bytes is the minimum htab
+	 * size permitted by the architecture.
+	 */
+	return max(pteg_shift + 7, 18U);
+}
+
+static unsigned long __init htab_get_table_size(void)
+{
+	/*
+	 * If hash size isn't already provided by the platform, we try to
+	 * retrieve it from the device-tree. If it's not there neither, we
+	 * calculate it now based on the total RAM size
+	 */
+	if (ppc64_pft_size == 0)
+		of_scan_flat_dt(htab_dt_scan_pftsize, NULL);
+	if (ppc64_pft_size)
+		return 1UL << ppc64_pft_size;
+
+	return 1UL << htab_shift_for_mem_size(memblock_phys_mem_size());
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static int resize_hpt_for_hotplug(unsigned long new_mem_size)
+{
+	unsigned target_hpt_shift;
+
+	if (!mmu_hash_ops.resize_hpt)
+		return 0;
+
+	target_hpt_shift = htab_shift_for_mem_size(new_mem_size);
+
+	/*
+	 * To avoid lots of HPT resizes if memory size is fluctuating
+	 * across a boundary, we deliberately have some hysterisis
+	 * here: we immediately increase the HPT size if the target
+	 * shift exceeds the current shift, but we won't attempt to
+	 * reduce unless the target shift is at least 2 below the
+	 * current shift
+	 */
+	if (target_hpt_shift > ppc64_pft_size ||
+	    target_hpt_shift < ppc64_pft_size - 1)
+		return mmu_hash_ops.resize_hpt(target_hpt_shift);
+
+	return 0;
+}
+
+int hash__create_section_mapping(unsigned long start, unsigned long end,
+				 int nid, pgprot_t prot)
+{
+	int rc;
+
+	if (end >= H_VMALLOC_START) {
+		pr_warn("Outside the supported range\n");
+		return -1;
+	}
+
+	resize_hpt_for_hotplug(memblock_phys_mem_size());
+
+	rc = htab_bolt_mapping(start, end, __pa(start),
+			       pgprot_val(prot), mmu_linear_psize,
+			       mmu_kernel_ssize);
+
+	if (rc < 0) {
+		int rc2 = htab_remove_mapping(start, end, mmu_linear_psize,
+					      mmu_kernel_ssize);
+		BUG_ON(rc2 && (rc2 != -ENOENT));
+	}
+	return rc;
+}
+
+int hash__remove_section_mapping(unsigned long start, unsigned long end)
+{
+	int rc = htab_remove_mapping(start, end, mmu_linear_psize,
+				     mmu_kernel_ssize);
+
+	if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
+		pr_warn("Hash collision while resizing HPT\n");
+
+	return rc;
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+static void __init hash_init_partition_table(phys_addr_t hash_table,
+					     unsigned long htab_size)
+{
+	mmu_partition_table_init();
+
+	/*
+	 * PS field (VRMA page size) is not used for LPID 0, hence set to 0.
+	 * For now, UPRT is 0 and we have no segment table.
+	 */
+	htab_size =  __ilog2(htab_size) - 18;
+	mmu_partition_table_set_entry(0, hash_table | htab_size, 0, false);
+	pr_info("Partition table %p\n", partition_tb);
+}
+
+void hpt_clear_stress(void);
+static struct timer_list stress_hpt_timer;
+static void stress_hpt_timer_fn(struct timer_list *timer)
+{
+	int next_cpu;
+
+	hpt_clear_stress();
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		tlbiel_all();
+
+	next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
+	if (next_cpu >= nr_cpu_ids)
+		next_cpu = cpumask_first(cpu_online_mask);
+	stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10);
+	add_timer_on(&stress_hpt_timer, next_cpu);
+}
+
+static void __init htab_initialize(void)
+{
+	unsigned long table;
+	unsigned long pteg_count;
+	unsigned long prot;
+	phys_addr_t base = 0, size = 0, end;
+	u64 i;
+
+	DBG(" -> htab_initialize()\n");
+
+	if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
+		mmu_kernel_ssize = MMU_SEGSIZE_1T;
+		mmu_highuser_ssize = MMU_SEGSIZE_1T;
+		printk(KERN_INFO "Using 1TB segments\n");
+	}
+
+	if (stress_slb_enabled)
+		static_branch_enable(&stress_slb_key);
+
+	if (stress_hpt_enabled) {
+		unsigned long tmp;
+		static_branch_enable(&stress_hpt_key);
+		// Too early to use nr_cpu_ids, so use NR_CPUS
+		tmp = memblock_phys_alloc_range(sizeof(struct stress_hpt_struct) * NR_CPUS,
+						__alignof__(struct stress_hpt_struct),
+						0, MEMBLOCK_ALLOC_ANYWHERE);
+		memset((void *)tmp, 0xff, sizeof(struct stress_hpt_struct) * NR_CPUS);
+		stress_hpt_struct = __va(tmp);
+
+		timer_setup(&stress_hpt_timer, stress_hpt_timer_fn, 0);
+		stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10);
+		add_timer(&stress_hpt_timer);
+	}
+
+	/*
+	 * Calculate the required size of the htab.  We want the number of
+	 * PTEGs to equal one half the number of real pages.
+	 */
+	htab_size_bytes = htab_get_table_size();
+	pteg_count = htab_size_bytes >> 7;
+
+	htab_hash_mask = pteg_count - 1;
+
+	if (firmware_has_feature(FW_FEATURE_LPAR) ||
+	    firmware_has_feature(FW_FEATURE_PS3_LV1)) {
+		/* Using a hypervisor which owns the htab */
+		htab_address = NULL;
+		_SDR1 = 0;
+#ifdef CONFIG_FA_DUMP
+		/*
+		 * If firmware assisted dump is active firmware preserves
+		 * the contents of htab along with entire partition memory.
+		 * Clear the htab if firmware assisted dump is active so
+		 * that we dont end up using old mappings.
+		 */
+		if (is_fadump_active() && mmu_hash_ops.hpte_clear_all)
+			mmu_hash_ops.hpte_clear_all();
+#endif
+	} else {
+		unsigned long limit = MEMBLOCK_ALLOC_ANYWHERE;
+
+#ifdef CONFIG_PPC_CELL
+		/*
+		 * Cell may require the hash table down low when using the
+		 * Axon IOMMU in order to fit the dynamic region over it, see
+		 * comments in cell/iommu.c
+		 */
+		if (fdt_subnode_offset(initial_boot_params, 0, "axon") > 0) {
+			limit = 0x80000000;
+			pr_info("Hash table forced below 2G for Axon IOMMU\n");
+		}
+#endif /* CONFIG_PPC_CELL */
+
+		table = memblock_phys_alloc_range(htab_size_bytes,
+						  htab_size_bytes,
+						  0, limit);
+		if (!table)
+			panic("ERROR: Failed to allocate %pa bytes below %pa\n",
+			      &htab_size_bytes, &limit);
+
+		DBG("Hash table allocated at %lx, size: %lx\n", table,
+		    htab_size_bytes);
+
+		htab_address = __va(table);
+
+		/* htab absolute addr + encoded htabsize */
+		_SDR1 = table + __ilog2(htab_size_bytes) - 18;
+
+		/* Initialize the HPT with no entries */
+		memset((void *)table, 0, htab_size_bytes);
+
+		if (!cpu_has_feature(CPU_FTR_ARCH_300))
+			/* Set SDR1 */
+			mtspr(SPRN_SDR1, _SDR1);
+		else
+			hash_init_partition_table(table, htab_size_bytes);
+	}
+
+	prot = pgprot_val(PAGE_KERNEL);
+
+	if (debug_pagealloc_enabled_or_kfence()) {
+		linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
+		linear_map_hash_slots = memblock_alloc_try_nid(
+				linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT,
+				ppc64_rma_size,	NUMA_NO_NODE);
+		if (!linear_map_hash_slots)
+			panic("%s: Failed to allocate %lu bytes max_addr=%pa\n",
+			      __func__, linear_map_hash_count, &ppc64_rma_size);
+	}
+
+	/* create bolted the linear mapping in the hash table */
+	for_each_mem_range(i, &base, &end) {
+		size = end - base;
+		base = (unsigned long)__va(base);
+
+		DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
+		    base, size, prot);
+
+		if ((base + size) >= H_VMALLOC_START) {
+			pr_warn("Outside the supported range\n");
+			continue;
+		}
+
+		BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
+				prot, mmu_linear_psize, mmu_kernel_ssize));
+	}
+	memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
+
+	/*
+	 * If we have a memory_limit and we've allocated TCEs then we need to
+	 * explicitly map the TCE area at the top of RAM. We also cope with the
+	 * case that the TCEs start below memory_limit.
+	 * tce_alloc_start/end are 16MB aligned so the mapping should work
+	 * for either 4K or 16MB pages.
+	 */
+	if (tce_alloc_start) {
+		tce_alloc_start = (unsigned long)__va(tce_alloc_start);
+		tce_alloc_end = (unsigned long)__va(tce_alloc_end);
+
+		if (base + size >= tce_alloc_start)
+			tce_alloc_start = base + size + 1;
+
+		BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end,
+					 __pa(tce_alloc_start), prot,
+					 mmu_linear_psize, mmu_kernel_ssize));
+	}
+
+
+	DBG(" <- htab_initialize()\n");
+}
+#undef KB
+#undef MB
+
+void __init hash__early_init_devtree(void)
+{
+	/* Initialize segment sizes */
+	of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL);
+
+	/* Initialize page sizes */
+	htab_scan_page_sizes();
+}
+
+static struct hash_mm_context init_hash_mm_context;
+void __init hash__early_init_mmu(void)
+{
+#ifndef CONFIG_PPC_64K_PAGES
+	/*
+	 * We have code in __hash_page_4K() and elsewhere, which assumes it can
+	 * do the following:
+	 *   new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & (H_PAGE_F_SECOND | H_PAGE_F_GIX);
+	 *
+	 * Where the slot number is between 0-15, and values of 8-15 indicate
+	 * the secondary bucket. For that code to work H_PAGE_F_SECOND and
+	 * H_PAGE_F_GIX must occupy four contiguous bits in the PTE, and
+	 * H_PAGE_F_SECOND must be placed above H_PAGE_F_GIX. Assert that here
+	 * with a BUILD_BUG_ON().
+	 */
+	BUILD_BUG_ON(H_PAGE_F_SECOND != (1ul  << (H_PAGE_F_GIX_SHIFT + 3)));
+#endif /* CONFIG_PPC_64K_PAGES */
+
+	htab_init_page_sizes();
+
+	/*
+	 * initialize page table size
+	 */
+	__pte_frag_nr = H_PTE_FRAG_NR;
+	__pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
+	__pmd_frag_nr = H_PMD_FRAG_NR;
+	__pmd_frag_size_shift = H_PMD_FRAG_SIZE_SHIFT;
+
+	__pte_index_size = H_PTE_INDEX_SIZE;
+	__pmd_index_size = H_PMD_INDEX_SIZE;
+	__pud_index_size = H_PUD_INDEX_SIZE;
+	__pgd_index_size = H_PGD_INDEX_SIZE;
+	__pud_cache_index = H_PUD_CACHE_INDEX;
+	__pte_table_size = H_PTE_TABLE_SIZE;
+	__pmd_table_size = H_PMD_TABLE_SIZE;
+	__pud_table_size = H_PUD_TABLE_SIZE;
+	__pgd_table_size = H_PGD_TABLE_SIZE;
+	/*
+	 * 4k use hugepd format, so for hash set then to
+	 * zero
+	 */
+	__pmd_val_bits = HASH_PMD_VAL_BITS;
+	__pud_val_bits = HASH_PUD_VAL_BITS;
+	__pgd_val_bits = HASH_PGD_VAL_BITS;
+
+	__kernel_virt_start = H_KERN_VIRT_START;
+	__vmalloc_start = H_VMALLOC_START;
+	__vmalloc_end = H_VMALLOC_END;
+	__kernel_io_start = H_KERN_IO_START;
+	__kernel_io_end = H_KERN_IO_END;
+	vmemmap = (struct page *)H_VMEMMAP_START;
+	ioremap_bot = IOREMAP_BASE;
+
+#ifdef CONFIG_PCI
+	pci_io_base = ISA_IO_BASE;
+#endif
+
+	/* Select appropriate backend */
+	if (firmware_has_feature(FW_FEATURE_PS3_LV1))
+		ps3_early_mm_init();
+	else if (firmware_has_feature(FW_FEATURE_LPAR))
+		hpte_init_pseries();
+	else if (IS_ENABLED(CONFIG_PPC_HASH_MMU_NATIVE))
+		hpte_init_native();
+
+	if (!mmu_hash_ops.hpte_insert)
+		panic("hash__early_init_mmu: No MMU hash ops defined!\n");
+
+	/*
+	 * Initialize the MMU Hash table and create the linear mapping
+	 * of memory. Has to be done before SLB initialization as this is
+	 * currently where the page size encoding is obtained.
+	 */
+	htab_initialize();
+
+	init_mm.context.hash_context = &init_hash_mm_context;
+	mm_ctx_set_slb_addr_limit(&init_mm.context, SLB_ADDR_LIMIT_DEFAULT);
+
+	pr_info("Initializing hash mmu with SLB\n");
+	/* Initialize SLB management */
+	slb_initialize();
+
+	if (cpu_has_feature(CPU_FTR_ARCH_206)
+			&& cpu_has_feature(CPU_FTR_HVMODE))
+		tlbiel_all();
+}
+
+#ifdef CONFIG_SMP
+void hash__early_init_mmu_secondary(void)
+{
+	/* Initialize hash table for that CPU */
+	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+
+		if (!cpu_has_feature(CPU_FTR_ARCH_300))
+			mtspr(SPRN_SDR1, _SDR1);
+		else
+			set_ptcr_when_no_uv(__pa(partition_tb) |
+					    (PATB_SIZE_SHIFT - 12));
+	}
+	/* Initialize SLB */
+	slb_initialize();
+
+	if (cpu_has_feature(CPU_FTR_ARCH_206)
+			&& cpu_has_feature(CPU_FTR_HVMODE))
+		tlbiel_all();
+
+#ifdef CONFIG_PPC_MEM_KEYS
+	if (mmu_has_feature(MMU_FTR_PKEY))
+		mtspr(SPRN_UAMOR, default_uamor);
+#endif
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * Called by asm hashtable.S for doing lazy icache flush
+ */
+unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
+{
+	struct folio *folio;
+
+	if (!pfn_valid(pte_pfn(pte)))
+		return pp;
+
+	folio = page_folio(pte_page(pte));
+
+	/* page is dirty */
+	if (!test_bit(PG_dcache_clean, &folio->flags) &&
+	    !folio_test_reserved(folio)) {
+		if (trap == INTERRUPT_INST_STORAGE) {
+			flush_dcache_icache_folio(folio);
+			set_bit(PG_dcache_clean, &folio->flags);
+		} else
+			pp |= HPTE_R_N;
+	}
+	return pp;
+}
+
+static unsigned int get_paca_psize(unsigned long addr)
+{
+	unsigned char *psizes;
+	unsigned long index, mask_index;
+
+	if (addr < SLICE_LOW_TOP) {
+		psizes = get_paca()->mm_ctx_low_slices_psize;
+		index = GET_LOW_SLICE_INDEX(addr);
+	} else {
+		psizes = get_paca()->mm_ctx_high_slices_psize;
+		index = GET_HIGH_SLICE_INDEX(addr);
+	}
+	mask_index = index & 0x1;
+	return (psizes[index >> 1] >> (mask_index * 4)) & 0xF;
+}
+
+
+/*
+ * Demote a segment to using 4k pages.
+ * For now this makes the whole process use 4k pages.
+ */
+#ifdef CONFIG_PPC_64K_PAGES
+void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
+{
+	if (get_slice_psize(mm, addr) == MMU_PAGE_4K)
+		return;
+	slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K);
+	copro_flush_all_slbs(mm);
+	if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) {
+
+		copy_mm_to_paca(mm);
+		slb_flush_and_restore_bolted();
+	}
+}
+#endif /* CONFIG_PPC_64K_PAGES */
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+/*
+ * This looks up a 2-bit protection code for a 4k subpage of a 64k page.
+ * Userspace sets the subpage permissions using the subpage_prot system call.
+ *
+ * Result is 0: full permissions, _PAGE_RW: read-only,
+ * _PAGE_RWX: no access.
+ */
+static int subpage_protection(struct mm_struct *mm, unsigned long ea)
+{
+	struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
+	u32 spp = 0;
+	u32 **sbpm, *sbpp;
+
+	if (!spt)
+		return 0;
+
+	if (ea >= spt->maxaddr)
+		return 0;
+	if (ea < 0x100000000UL) {
+		/* addresses below 4GB use spt->low_prot */
+		sbpm = spt->low_prot;
+	} else {
+		sbpm = spt->protptrs[ea >> SBP_L3_SHIFT];
+		if (!sbpm)
+			return 0;
+	}
+	sbpp = sbpm[(ea >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)];
+	if (!sbpp)
+		return 0;
+	spp = sbpp[(ea >> PAGE_SHIFT) & (SBP_L1_COUNT - 1)];
+
+	/* extract 2-bit bitfield for this 4k subpage */
+	spp >>= 30 - 2 * ((ea >> 12) & 0xf);
+
+	/*
+	 * 0 -> full permission
+	 * 1 -> Read only
+	 * 2 -> no access.
+	 * We return the flag that need to be cleared.
+	 */
+	spp = ((spp & 2) ? _PAGE_RWX : 0) | ((spp & 1) ? _PAGE_WRITE : 0);
+	return spp;
+}
+
+#else /* CONFIG_PPC_SUBPAGE_PROT */
+static inline int subpage_protection(struct mm_struct *mm, unsigned long ea)
+{
+	return 0;
+}
+#endif
+
+void hash_failure_debug(unsigned long ea, unsigned long access,
+			unsigned long vsid, unsigned long trap,
+			int ssize, int psize, int lpsize, unsigned long pte)
+{
+	if (!printk_ratelimit())
+		return;
+	pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n",
+		ea, access, current->comm);
+	pr_info("    trap=0x%lx vsid=0x%lx ssize=%d base psize=%d psize %d pte=0x%lx\n",
+		trap, vsid, ssize, psize, lpsize, pte);
+}
+
+static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
+			     int psize, bool user_region)
+{
+	if (user_region) {
+		if (psize != get_paca_psize(ea)) {
+			copy_mm_to_paca(mm);
+			slb_flush_and_restore_bolted();
+		}
+	} else if (get_paca()->vmalloc_sllp !=
+		   mmu_psize_defs[mmu_vmalloc_psize].sllp) {
+		get_paca()->vmalloc_sllp =
+			mmu_psize_defs[mmu_vmalloc_psize].sllp;
+		slb_vmalloc_update();
+	}
+}
+
+/*
+ * Result code is:
+ *  0 - handled
+ *  1 - normal page fault
+ * -1 - critical hash insertion error
+ * -2 - access not permitted by subpage protection mechanism
+ */
+int hash_page_mm(struct mm_struct *mm, unsigned long ea,
+		 unsigned long access, unsigned long trap,
+		 unsigned long flags)
+{
+	bool is_thp;
+	pgd_t *pgdir;
+	unsigned long vsid;
+	pte_t *ptep;
+	unsigned hugeshift;
+	int rc, user_region = 0;
+	int psize, ssize;
+
+	DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
+		ea, access, trap);
+	trace_hash_fault(ea, access, trap);
+
+	/* Get region & vsid */
+	switch (get_region_id(ea)) {
+	case USER_REGION_ID:
+		user_region = 1;
+		if (! mm) {
+			DBG_LOW(" user region with no mm !\n");
+			rc = 1;
+			goto bail;
+		}
+		psize = get_slice_psize(mm, ea);
+		ssize = user_segment_size(ea);
+		vsid = get_user_vsid(&mm->context, ea, ssize);
+		break;
+	case VMALLOC_REGION_ID:
+		vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+		psize = mmu_vmalloc_psize;
+		ssize = mmu_kernel_ssize;
+		flags |= HPTE_USE_KERNEL_KEY;
+		break;
+
+	case IO_REGION_ID:
+		vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+		psize = mmu_io_psize;
+		ssize = mmu_kernel_ssize;
+		flags |= HPTE_USE_KERNEL_KEY;
+		break;
+	default:
+		/*
+		 * Not a valid range
+		 * Send the problem up to do_page_fault()
+		 */
+		rc = 1;
+		goto bail;
+	}
+	DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
+
+	/* Bad address. */
+	if (!vsid) {
+		DBG_LOW("Bad address!\n");
+		rc = 1;
+		goto bail;
+	}
+	/* Get pgdir */
+	pgdir = mm->pgd;
+	if (pgdir == NULL) {
+		rc = 1;
+		goto bail;
+	}
+
+	/* Check CPU locality */
+	if (user_region && mm_is_thread_local(mm))
+		flags |= HPTE_LOCAL_UPDATE;
+
+#ifndef CONFIG_PPC_64K_PAGES
+	/*
+	 * If we use 4K pages and our psize is not 4K, then we might
+	 * be hitting a special driver mapping, and need to align the
+	 * address before we fetch the PTE.
+	 *
+	 * It could also be a hugepage mapping, in which case this is
+	 * not necessary, but it's not harmful, either.
+	 */
+	if (psize != MMU_PAGE_4K)
+		ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+#endif /* CONFIG_PPC_64K_PAGES */
+
+	/* Get PTE and page size from page tables */
+	ptep = find_linux_pte(pgdir, ea, &is_thp, &hugeshift);
+	if (ptep == NULL || !pte_present(*ptep)) {
+		DBG_LOW(" no PTE !\n");
+		rc = 1;
+		goto bail;
+	}
+
+	/*
+	 * Add _PAGE_PRESENT to the required access perm. If there are parallel
+	 * updates to the pte that can possibly clear _PAGE_PTE, catch that too.
+	 *
+	 * We can safely use the return pte address in rest of the function
+	 * because we do set H_PAGE_BUSY which prevents further updates to pte
+	 * from generic code.
+	 */
+	access |= _PAGE_PRESENT | _PAGE_PTE;
+
+	/*
+	 * Pre-check access permissions (will be re-checked atomically
+	 * in __hash_page_XX but this pre-check is a fast path
+	 */
+	if (!check_pte_access(access, pte_val(*ptep))) {
+		DBG_LOW(" no access !\n");
+		rc = 1;
+		goto bail;
+	}
+
+	if (hugeshift) {
+		if (is_thp)
+			rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep,
+					     trap, flags, ssize, psize);
+#ifdef CONFIG_HUGETLB_PAGE
+		else
+			rc = __hash_page_huge(ea, access, vsid, ptep, trap,
+					      flags, ssize, hugeshift, psize);
+#else
+		else {
+			/*
+			 * if we have hugeshift, and is not transhuge with
+			 * hugetlb disabled, something is really wrong.
+			 */
+			rc = 1;
+			WARN_ON(1);
+		}
+#endif
+		if (current->mm == mm)
+			check_paca_psize(ea, mm, psize, user_region);
+
+		goto bail;
+	}
+
+#ifndef CONFIG_PPC_64K_PAGES
+	DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
+#else
+	DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep),
+		pte_val(*(ptep + PTRS_PER_PTE)));
+#endif
+	/* Do actual hashing */
+#ifdef CONFIG_PPC_64K_PAGES
+	/* If H_PAGE_4K_PFN is set, make sure this is a 4k segment */
+	if ((pte_val(*ptep) & H_PAGE_4K_PFN) && psize == MMU_PAGE_64K) {
+		demote_segment_4k(mm, ea);
+		psize = MMU_PAGE_4K;
+	}
+
+	/*
+	 * If this PTE is non-cacheable and we have restrictions on
+	 * using non cacheable large pages, then we switch to 4k
+	 */
+	if (mmu_ci_restrictions && psize == MMU_PAGE_64K && pte_ci(*ptep)) {
+		if (user_region) {
+			demote_segment_4k(mm, ea);
+			psize = MMU_PAGE_4K;
+		} else if (ea < VMALLOC_END) {
+			/*
+			 * some driver did a non-cacheable mapping
+			 * in vmalloc space, so switch vmalloc
+			 * to 4k pages
+			 */
+			printk(KERN_ALERT "Reducing vmalloc segment "
+			       "to 4kB pages because of "
+			       "non-cacheable mapping\n");
+			psize = mmu_vmalloc_psize = MMU_PAGE_4K;
+			copro_flush_all_slbs(mm);
+		}
+	}
+
+#endif /* CONFIG_PPC_64K_PAGES */
+
+	if (current->mm == mm)
+		check_paca_psize(ea, mm, psize, user_region);
+
+#ifdef CONFIG_PPC_64K_PAGES
+	if (psize == MMU_PAGE_64K)
+		rc = __hash_page_64K(ea, access, vsid, ptep, trap,
+				     flags, ssize);
+	else
+#endif /* CONFIG_PPC_64K_PAGES */
+	{
+		int spp = subpage_protection(mm, ea);
+		if (access & spp)
+			rc = -2;
+		else
+			rc = __hash_page_4K(ea, access, vsid, ptep, trap,
+					    flags, ssize, spp);
+	}
+
+	/*
+	 * Dump some info in case of hash insertion failure, they should
+	 * never happen so it is really useful to know if/when they do
+	 */
+	if (rc == -1)
+		hash_failure_debug(ea, access, vsid, trap, ssize, psize,
+				   psize, pte_val(*ptep));
+#ifndef CONFIG_PPC_64K_PAGES
+	DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
+#else
+	DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep),
+		pte_val(*(ptep + PTRS_PER_PTE)));
+#endif
+	DBG_LOW(" -> rc=%d\n", rc);
+
+bail:
+	return rc;
+}
+EXPORT_SYMBOL_GPL(hash_page_mm);
+
+int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
+	      unsigned long dsisr)
+{
+	unsigned long flags = 0;
+	struct mm_struct *mm = current->mm;
+
+	if ((get_region_id(ea) == VMALLOC_REGION_ID) ||
+	    (get_region_id(ea) == IO_REGION_ID))
+		mm = &init_mm;
+
+	if (dsisr & DSISR_NOHPTE)
+		flags |= HPTE_NOHPTE_UPDATE;
+
+	return hash_page_mm(mm, ea, access, trap, flags);
+}
+EXPORT_SYMBOL_GPL(hash_page);
+
+DEFINE_INTERRUPT_HANDLER(do_hash_fault)
+{
+	unsigned long ea = regs->dar;
+	unsigned long dsisr = regs->dsisr;
+	unsigned long access = _PAGE_PRESENT | _PAGE_READ;
+	unsigned long flags = 0;
+	struct mm_struct *mm;
+	unsigned int region_id;
+	long err;
+
+	if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) {
+		hash__do_page_fault(regs);
+		return;
+	}
+
+	region_id = get_region_id(ea);
+	if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID))
+		mm = &init_mm;
+	else
+		mm = current->mm;
+
+	if (dsisr & DSISR_NOHPTE)
+		flags |= HPTE_NOHPTE_UPDATE;
+
+	if (dsisr & DSISR_ISSTORE)
+		access |= _PAGE_WRITE;
+	/*
+	 * We set _PAGE_PRIVILEGED only when
+	 * kernel mode access kernel space.
+	 *
+	 * _PAGE_PRIVILEGED is NOT set
+	 * 1) when kernel mode access user space
+	 * 2) user space access kernel space.
+	 */
+	access |= _PAGE_PRIVILEGED;
+	if (user_mode(regs) || (region_id == USER_REGION_ID))
+		access &= ~_PAGE_PRIVILEGED;
+
+	if (TRAP(regs) == INTERRUPT_INST_STORAGE)
+		access |= _PAGE_EXEC;
+
+	err = hash_page_mm(mm, ea, access, TRAP(regs), flags);
+	if (unlikely(err < 0)) {
+		// failed to insert a hash PTE due to an hypervisor error
+		if (user_mode(regs)) {
+			if (IS_ENABLED(CONFIG_PPC_SUBPAGE_PROT) && err == -2)
+				_exception(SIGSEGV, regs, SEGV_ACCERR, ea);
+			else
+				_exception(SIGBUS, regs, BUS_ADRERR, ea);
+		} else {
+			bad_page_fault(regs, SIGBUS);
+		}
+		err = 0;
+
+	} else if (err) {
+		hash__do_page_fault(regs);
+	}
+}
+
+static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
+{
+	int psize = get_slice_psize(mm, ea);
+
+	/* We only prefault standard pages for now */
+	if (unlikely(psize != mm_ctx_user_psize(&mm->context)))
+		return false;
+
+	/*
+	 * Don't prefault if subpage protection is enabled for the EA.
+	 */
+	if (unlikely((psize == MMU_PAGE_4K) && subpage_protection(mm, ea)))
+		return false;
+
+	return true;
+}
+
+static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
+			 bool is_exec, unsigned long trap)
+{
+	unsigned long vsid;
+	pgd_t *pgdir;
+	int rc, ssize, update_flags = 0;
+	unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0);
+	unsigned long flags;
+
+	BUG_ON(get_region_id(ea) != USER_REGION_ID);
+
+	if (!should_hash_preload(mm, ea))
+		return;
+
+	DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
+		" trap=%lx\n", mm, mm->pgd, ea, access, trap);
+
+	/* Get Linux PTE if available */
+	pgdir = mm->pgd;
+	if (pgdir == NULL)
+		return;
+
+	/* Get VSID */
+	ssize = user_segment_size(ea);
+	vsid = get_user_vsid(&mm->context, ea, ssize);
+	if (!vsid)
+		return;
+
+#ifdef CONFIG_PPC_64K_PAGES
+	/* If either H_PAGE_4K_PFN or cache inhibited is set (and we are on
+	 * a 64K kernel), then we don't preload, hash_page() will take
+	 * care of it once we actually try to access the page.
+	 * That way we don't have to duplicate all of the logic for segment
+	 * page size demotion here
+	 * Called with  PTL held, hence can be sure the value won't change in
+	 * between.
+	 */
+	if ((pte_val(*ptep) & H_PAGE_4K_PFN) || pte_ci(*ptep))
+		return;
+#endif /* CONFIG_PPC_64K_PAGES */
+
+	/*
+	 * __hash_page_* must run with interrupts off, including PMI interrupts
+	 * off, as it sets the H_PAGE_BUSY bit.
+	 *
+	 * It's otherwise possible for perf interrupts to hit at any time and
+	 * may take a hash fault reading the user stack, which could take a
+	 * hash miss and deadlock on the same H_PAGE_BUSY bit.
+	 *
+	 * Interrupts must also be off for the duration of the
+	 * mm_is_thread_local test and update, to prevent preempt running the
+	 * mm on another CPU (XXX: this may be racy vs kthread_use_mm).
+	 */
+	powerpc_local_irq_pmu_save(flags);
+
+	/* Is that local to this CPU ? */
+	if (mm_is_thread_local(mm))
+		update_flags |= HPTE_LOCAL_UPDATE;
+
+	/* Hash it in */
+#ifdef CONFIG_PPC_64K_PAGES
+	if (mm_ctx_user_psize(&mm->context) == MMU_PAGE_64K)
+		rc = __hash_page_64K(ea, access, vsid, ptep, trap,
+				     update_flags, ssize);
+	else
+#endif /* CONFIG_PPC_64K_PAGES */
+		rc = __hash_page_4K(ea, access, vsid, ptep, trap, update_flags,
+				    ssize, subpage_protection(mm, ea));
+
+	/* Dump some info in case of hash insertion failure, they should
+	 * never happen so it is really useful to know if/when they do
+	 */
+	if (rc == -1)
+		hash_failure_debug(ea, access, vsid, trap, ssize,
+				   mm_ctx_user_psize(&mm->context),
+				   mm_ctx_user_psize(&mm->context),
+				   pte_val(*ptep));
+
+	powerpc_local_irq_pmu_restore(flags);
+}
+
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux PTE.
+ *
+ * This must always be called with the pte lock held.
+ */
+void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+		      pte_t *ptep)
+{
+	/*
+	 * We don't need to worry about _PAGE_PRESENT here because we are
+	 * called with either mm->page_table_lock held or ptl lock held
+	 */
+	unsigned long trap;
+	bool is_exec;
+
+	/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+	if (!pte_young(*ptep) || address >= TASK_SIZE)
+		return;
+
+	/*
+	 * We try to figure out if we are coming from an instruction
+	 * access fault and pass that down to __hash_page so we avoid
+	 * double-faulting on execution of fresh text. We have to test
+	 * for regs NULL since init will get here first thing at boot.
+	 *
+	 * We also avoid filling the hash if not coming from a fault.
+	 */
+
+	trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL;
+	switch (trap) {
+	case 0x300:
+		is_exec = false;
+		break;
+	case 0x400:
+		is_exec = true;
+		break;
+	default:
+		return;
+	}
+
+	hash_preload(vma->vm_mm, ptep, address, is_exec, trap);
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static inline void tm_flush_hash_page(int local)
+{
+	/*
+	 * Transactions are not aborted by tlbiel, only tlbie. Without, syncing a
+	 * page back to a block device w/PIO could pick up transactional data
+	 * (bad!) so we force an abort here. Before the sync the page will be
+	 * made read-only, which will flush_hash_page. BIG ISSUE here: if the
+	 * kernel uses a page from userspace without unmapping it first, it may
+	 * see the speculated version.
+	 */
+	if (local && cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+	    MSR_TM_ACTIVE(current->thread.regs->msr)) {
+		tm_enable();
+		tm_abort(TM_CAUSE_TLBI);
+	}
+}
+#else
+static inline void tm_flush_hash_page(int local)
+{
+}
+#endif
+
+/*
+ * Return the global hash slot, corresponding to the given PTE, which contains
+ * the HPTE.
+ */
+unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift,
+		int ssize, real_pte_t rpte, unsigned int subpg_index)
+{
+	unsigned long hash, gslot, hidx;
+
+	hash = hpt_hash(vpn, shift, ssize);
+	hidx = __rpte_to_hidx(rpte, subpg_index);
+	if (hidx & _PTEIDX_SECONDARY)
+		hash = ~hash;
+	gslot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+	gslot += hidx & _PTEIDX_GROUP_IX;
+	return gslot;
+}
+
+void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
+		     unsigned long flags)
+{
+	unsigned long index, shift, gslot;
+	int local = flags & HPTE_LOCAL_UPDATE;
+
+	DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn);
+	pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+		gslot = pte_get_hash_gslot(vpn, shift, ssize, pte, index);
+		DBG_LOW(" sub %ld: gslot=%lx\n", index, gslot);
+		/*
+		 * We use same base page size and actual psize, because we don't
+		 * use these functions for hugepage
+		 */
+		mmu_hash_ops.hpte_invalidate(gslot, vpn, psize, psize,
+					     ssize, local);
+	} pte_iterate_hashed_end();
+
+	tm_flush_hash_page(local);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
+			 pmd_t *pmdp, unsigned int psize, int ssize,
+			 unsigned long flags)
+{
+	int i, max_hpte_count, valid;
+	unsigned long s_addr;
+	unsigned char *hpte_slot_array;
+	unsigned long hidx, shift, vpn, hash, slot;
+	int local = flags & HPTE_LOCAL_UPDATE;
+
+	s_addr = addr & HPAGE_PMD_MASK;
+	hpte_slot_array = get_hpte_slot_array(pmdp);
+	/*
+	 * IF we try to do a HUGE PTE update after a withdraw is done.
+	 * we will find the below NULL. This happens when we do
+	 * split_huge_pmd
+	 */
+	if (!hpte_slot_array)
+		return;
+
+	if (mmu_hash_ops.hugepage_invalidate) {
+		mmu_hash_ops.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
+						 psize, ssize, local);
+		goto tm_abort;
+	}
+	/*
+	 * No bluk hpte removal support, invalidate each entry
+	 */
+	shift = mmu_psize_defs[psize].shift;
+	max_hpte_count = HPAGE_PMD_SIZE >> shift;
+	for (i = 0; i < max_hpte_count; i++) {
+		/*
+		 * 8 bits per each hpte entries
+		 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
+		 */
+		valid = hpte_valid(hpte_slot_array, i);
+		if (!valid)
+			continue;
+		hidx =  hpte_hash_index(hpte_slot_array, i);
+
+		/* get the vpn */
+		addr = s_addr + (i * (1ul << shift));
+		vpn = hpt_vpn(addr, vsid, ssize);
+		hash = hpt_hash(vpn, shift, ssize);
+		if (hidx & _PTEIDX_SECONDARY)
+			hash = ~hash;
+
+		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot += hidx & _PTEIDX_GROUP_IX;
+		mmu_hash_ops.hpte_invalidate(slot, vpn, psize,
+					     MMU_PAGE_16M, ssize, local);
+	}
+tm_abort:
+	tm_flush_hash_page(local);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+void flush_hash_range(unsigned long number, int local)
+{
+	if (mmu_hash_ops.flush_hash_range)
+		mmu_hash_ops.flush_hash_range(number, local);
+	else {
+		int i;
+		struct ppc64_tlb_batch *batch =
+			this_cpu_ptr(&ppc64_tlb_batch);
+
+		for (i = 0; i < number; i++)
+			flush_hash_page(batch->vpn[i], batch->pte[i],
+					batch->psize, batch->ssize, local);
+	}
+}
+
+long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
+			   unsigned long pa, unsigned long rflags,
+			   unsigned long vflags, int psize, int ssize)
+{
+	unsigned long hpte_group;
+	long slot;
+
+repeat:
+	hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+
+	/* Insert into the hash table, primary slot */
+	slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, vflags,
+					psize, psize, ssize);
+
+	/* Primary is full, try the secondary */
+	if (unlikely(slot == -1)) {
+		hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags,
+						vflags | HPTE_V_SECONDARY,
+						psize, psize, ssize);
+		if (slot == -1) {
+			if (mftb() & 0x1)
+				hpte_group = (hash & htab_hash_mask) *
+						HPTES_PER_GROUP;
+
+			mmu_hash_ops.hpte_remove(hpte_group);
+			goto repeat;
+		}
+	}
+
+	return slot;
+}
+
+void hpt_clear_stress(void)
+{
+	int cpu = raw_smp_processor_id();
+	int g;
+
+	for (g = 0; g < stress_nr_groups(); g++) {
+		unsigned long last_group;
+		last_group = stress_hpt_struct[cpu].last_group[g];
+
+		if (last_group != -1UL) {
+			int i;
+			for (i = 0; i < HPTES_PER_GROUP; i++) {
+				if (mmu_hash_ops.hpte_remove(last_group) == -1)
+					break;
+			}
+			stress_hpt_struct[cpu].last_group[g] = -1;
+		}
+	}
+}
+
+void hpt_do_stress(unsigned long ea, unsigned long hpte_group)
+{
+	unsigned long last_group;
+	int cpu = raw_smp_processor_id();
+
+	last_group = stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1];
+	if (hpte_group == last_group)
+		return;
+
+	if (last_group != -1UL) {
+		int i;
+		/*
+		 * Concurrent CPUs might be inserting into this group, so
+		 * give up after a number of iterations, to prevent a live
+		 * lock.
+		 */
+		for (i = 0; i < HPTES_PER_GROUP; i++) {
+			if (mmu_hash_ops.hpte_remove(last_group) == -1)
+				break;
+		}
+		stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1] = -1;
+	}
+
+	if (ea >= PAGE_OFFSET) {
+		/*
+		 * We would really like to prefetch to get the TLB loaded, then
+		 * remove the PTE before returning from fault interrupt, to
+		 * increase the hash fault rate.
+		 *
+		 * Unfortunately QEMU TCG does not model the TLB in a way that
+		 * makes this possible, and systemsim (mambo) emulator does not
+		 * bring in TLBs with prefetches (although loads/stores do
+		 * work for non-CI PTEs).
+		 *
+		 * So remember this PTE and clear it on the next hash fault.
+		 */
+		memmove(&stress_hpt_struct[cpu].last_group[1],
+			&stress_hpt_struct[cpu].last_group[0],
+			(stress_nr_groups() - 1) * sizeof(unsigned long));
+		stress_hpt_struct[cpu].last_group[0] = hpte_group;
+	}
+}
+
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
+static DEFINE_RAW_SPINLOCK(linear_map_hash_lock);
+
+static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
+{
+	unsigned long hash;
+	unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
+	unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
+	unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY);
+	long ret;
+
+	hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
+
+	/* Don't create HPTE entries for bad address */
+	if (!vsid)
+		return;
+
+	if (linear_map_hash_slots[lmi] & 0x80)
+		return;
+
+	ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode,
+				    HPTE_V_BOLTED,
+				    mmu_linear_psize, mmu_kernel_ssize);
+
+	BUG_ON (ret < 0);
+	raw_spin_lock(&linear_map_hash_lock);
+	BUG_ON(linear_map_hash_slots[lmi] & 0x80);
+	linear_map_hash_slots[lmi] = ret | 0x80;
+	raw_spin_unlock(&linear_map_hash_lock);
+}
+
+static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
+{
+	unsigned long hash, hidx, slot;
+	unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
+	unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
+
+	hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
+	raw_spin_lock(&linear_map_hash_lock);
+	if (!(linear_map_hash_slots[lmi] & 0x80)) {
+		raw_spin_unlock(&linear_map_hash_lock);
+		return;
+	}
+	hidx = linear_map_hash_slots[lmi] & 0x7f;
+	linear_map_hash_slots[lmi] = 0;
+	raw_spin_unlock(&linear_map_hash_lock);
+	if (hidx & _PTEIDX_SECONDARY)
+		hash = ~hash;
+	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+	slot += hidx & _PTEIDX_GROUP_IX;
+	mmu_hash_ops.hpte_invalidate(slot, vpn, mmu_linear_psize,
+				     mmu_linear_psize,
+				     mmu_kernel_ssize, 0);
+}
+
+void hash__kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	unsigned long flags, vaddr, lmi;
+	int i;
+
+	local_irq_save(flags);
+	for (i = 0; i < numpages; i++, page++) {
+		vaddr = (unsigned long)page_address(page);
+		lmi = __pa(vaddr) >> PAGE_SHIFT;
+		if (lmi >= linear_map_hash_count)
+			continue;
+		if (enable)
+			kernel_map_linear_page(vaddr, lmi);
+		else
+			kernel_unmap_linear_page(vaddr, lmi);
+	}
+	local_irq_restore(flags);
+}
+#endif /* CONFIG_DEBUG_PAGEALLOC || CONFIG_KFENCE */
+
+void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	/*
+	 * We don't currently support the first MEMBLOCK not mapping 0
+	 * physical on those processors
+	 */
+	BUG_ON(first_memblock_base != 0);
+
+	/*
+	 * On virtualized systems the first entry is our RMA region aka VRMA,
+	 * non-virtualized 64-bit hash MMU systems don't have a limitation
+	 * on real mode access.
+	 *
+	 * For guests on platforms before POWER9, we clamp the it limit to 1G
+	 * to avoid some funky things such as RTAS bugs etc...
+	 *
+	 * On POWER9 we limit to 1TB in case the host erroneously told us that
+	 * the RMA was >1TB. Effective address bits 0:23 are treated as zero
+	 * (meaning the access is aliased to zero i.e. addr = addr % 1TB)
+	 * for virtual real mode addressing and so it doesn't make sense to
+	 * have an area larger than 1TB as it can't be addressed.
+	 */
+	if (!early_cpu_has_feature(CPU_FTR_HVMODE)) {
+		ppc64_rma_size = first_memblock_size;
+		if (!early_cpu_has_feature(CPU_FTR_ARCH_300))
+			ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x40000000);
+		else
+			ppc64_rma_size = min_t(u64, ppc64_rma_size,
+					       1UL << SID_SHIFT_1T);
+
+		/* Finally limit subsequent allocations */
+		memblock_set_current_limit(ppc64_rma_size);
+	} else {
+		ppc64_rma_size = ULONG_MAX;
+	}
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+static int hpt_order_get(void *data, u64 *val)
+{
+	*val = ppc64_pft_size;
+	return 0;
+}
+
+static int hpt_order_set(void *data, u64 val)
+{
+	int ret;
+
+	if (!mmu_hash_ops.resize_hpt)
+		return -ENODEV;
+
+	cpus_read_lock();
+	ret = mmu_hash_ops.resize_hpt(val);
+	cpus_read_unlock();
+
+	return ret;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n");
+
+static int __init hash64_debugfs(void)
+{
+	debugfs_create_file("hpt_order", 0600, arch_debugfs_dir, NULL,
+			    &fops_hpt_order);
+	return 0;
+}
+machine_device_initcall(pseries, hash64_debugfs);
+#endif /* CONFIG_DEBUG_FS */
+
+void __init print_system_hash_info(void)
+{
+	pr_info("ppc64_pft_size    = 0x%llx\n", ppc64_pft_size);
+
+	if (htab_hash_mask)
+		pr_info("htab_hash_mask    = 0x%lx\n", htab_hash_mask);
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	/*
+	 * If we are using 1TB segments and we are allowed to randomise
+	 * the heap, we can put it above 1TB so it is backed by a 1TB
+	 * segment. Otherwise the heap will be in the bottom 1TB
+	 * which always uses 256MB segments and this may result in a
+	 * performance penalty.
+	 */
+	if (is_32bit_task())
+		return randomize_page(mm->brk, SZ_32M);
+	else if (!radix_enabled() && mmu_highuser_ssize == MMU_SEGSIZE_1T)
+		return randomize_page(max_t(unsigned long, mm->brk, SZ_1T), SZ_1G);
+	else
+		return randomize_page(mm->brk, SZ_1G);
+}
diff --git a/arch/powerpc/mm/book3s64/hugetlbpage.c b/arch/powerpc/mm/book3s64/hugetlbpage.c
new file mode 100644
index 0000000000..5a2e512e96
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/hugetlbpage.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PPC64 Huge TLB Page Support for hash based MMUs (POWER4 and later)
+ *
+ * Copyright (C) 2003 David Gibson, IBM Corporation.
+ *
+ * Based on the IA-32 version:
+ * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <asm/cacheflush.h>
+#include <asm/machdep.h>
+
+unsigned int hpage_shift;
+EXPORT_SYMBOL(hpage_shift);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
+		     pte_t *ptep, unsigned long trap, unsigned long flags,
+		     int ssize, unsigned int shift, unsigned int mmu_psize)
+{
+	real_pte_t rpte;
+	unsigned long vpn;
+	unsigned long old_pte, new_pte;
+	unsigned long rflags, pa;
+	long slot, offset;
+
+	BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
+
+	/* Search the Linux page table for a match with va */
+	vpn = hpt_vpn(ea, vsid, ssize);
+
+	/*
+	 * At this point, we have a pte (old_pte) which can be used to build
+	 * or update an HPTE. There are 2 cases:
+	 *
+	 * 1. There is a valid (present) pte with no associated HPTE (this is
+	 *	the most common case)
+	 * 2. There is a valid (present) pte with an associated HPTE. The
+	 *	current values of the pp bits in the HPTE prevent access
+	 *	because we are doing software DIRTY bit management and the
+	 *	page is currently not DIRTY.
+	 */
+
+
+	do {
+		old_pte = pte_val(*ptep);
+		/* If PTE busy, retry the access */
+		if (unlikely(old_pte & H_PAGE_BUSY))
+			return 0;
+		/* If PTE permissions don't match, take page fault */
+		if (unlikely(!check_pte_access(access, old_pte)))
+			return 1;
+
+		/*
+		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
+		 * a write access
+		 */
+		new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
+		if (access & _PAGE_WRITE)
+			new_pte |= _PAGE_DIRTY;
+	} while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
+
+	/* Make sure this is a hugetlb entry */
+	if (old_pte & (H_PAGE_THP_HUGE | _PAGE_DEVMAP))
+		return 0;
+
+	rflags = htab_convert_pte_flags(new_pte, flags);
+	if (unlikely(mmu_psize == MMU_PAGE_16G))
+		offset = PTRS_PER_PUD;
+	else
+		offset = PTRS_PER_PMD;
+	rpte = __real_pte(__pte(old_pte), ptep, offset);
+
+	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		/*
+		 * No CPU has hugepages but lacks no execute, so we
+		 * don't need to worry about that case
+		 */
+		rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
+
+	/* Check if pte already has an hpte (case 2) */
+	if (unlikely(old_pte & H_PAGE_HASHPTE)) {
+		/* There MIGHT be an HPTE for this pte */
+		unsigned long gslot;
+
+		gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0);
+		if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, mmu_psize,
+					       mmu_psize, ssize, flags) == -1)
+			old_pte &= ~_PAGE_HPTEFLAGS;
+	}
+
+	if (likely(!(old_pte & H_PAGE_HASHPTE))) {
+		unsigned long hash = hpt_hash(vpn, shift, ssize);
+
+		pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
+
+		/* clear HPTE slot informations in new PTE */
+		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
+
+		slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0,
+					     mmu_psize, ssize);
+
+		/*
+		 * Hypervisor failure. Restore old pte and return -1
+		 * similar to __hash_page_*
+		 */
+		if (unlikely(slot == -2)) {
+			*ptep = __pte(old_pte);
+			hash_failure_debug(ea, access, vsid, trap, ssize,
+					   mmu_psize, mmu_psize, old_pte);
+			return -1;
+		}
+
+		new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset);
+	}
+
+	/*
+	 * No need to use ldarx/stdcx here
+	 */
+	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
+	return 0;
+}
+#endif
+
+pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+				  unsigned long addr, pte_t *ptep)
+{
+	unsigned long pte_val;
+	/*
+	 * Clear the _PAGE_PRESENT so that no hardware parallel update is
+	 * possible. Also keep the pte_present true so that we don't take
+	 * wrong fault.
+	 */
+	pte_val = pte_update(vma->vm_mm, addr, ptep,
+			     _PAGE_PRESENT, _PAGE_INVALID, 1);
+
+	return __pte(pte_val);
+}
+
+void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+				  pte_t *ptep, pte_t old_pte, pte_t pte)
+{
+	unsigned long psize;
+
+	if (radix_enabled())
+		return radix__huge_ptep_modify_prot_commit(vma, addr, ptep,
+							   old_pte, pte);
+
+	psize = huge_page_size(hstate_vma(vma));
+	set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize);
+}
+
+void __init hugetlbpage_init_defaultsize(void)
+{
+	/* Set default large page size. Currently, we pick 16M or 1M
+	 * depending on what is available
+	 */
+	if (mmu_psize_defs[MMU_PAGE_16M].shift)
+		hpage_shift = mmu_psize_defs[MMU_PAGE_16M].shift;
+	else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+		hpage_shift = mmu_psize_defs[MMU_PAGE_1M].shift;
+	else if (mmu_psize_defs[MMU_PAGE_2M].shift)
+		hpage_shift = mmu_psize_defs[MMU_PAGE_2M].shift;
+}
diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h
new file mode 100644
index 0000000000..a57a25f06a
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/internal.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H
+#define ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H
+
+#include <linux/jump_label.h>
+
+extern bool stress_slb_enabled;
+
+DECLARE_STATIC_KEY_FALSE(stress_slb_key);
+
+static inline bool stress_slb(void)
+{
+	return static_branch_unlikely(&stress_slb_key);
+}
+
+extern bool stress_hpt_enabled;
+
+DECLARE_STATIC_KEY_FALSE(stress_hpt_key);
+
+static inline bool stress_hpt(void)
+{
+	return static_branch_unlikely(&stress_hpt_key);
+}
+
+void hpt_do_stress(unsigned long ea, unsigned long hpte_group);
+
+void slb_setup_new_exec(void);
+
+void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush);
+
+#endif /* ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H */
diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c
new file mode 100644
index 0000000000..d19fb1f300
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/iommu_api.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  IOMMU helpers in MMU context.
+ *
+ *  Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru>
+ */
+
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
+#include <linux/rculist.h>
+#include <linux/vmalloc.h>
+#include <linux/mutex.h>
+#include <linux/migrate.h>
+#include <linux/hugetlb.h>
+#include <linux/swap.h>
+#include <linux/sizes.h>
+#include <linux/mm.h>
+#include <asm/mmu_context.h>
+#include <asm/pte-walk.h>
+#include <linux/mm_inline.h>
+
+static DEFINE_MUTEX(mem_list_mutex);
+
+#define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY	0x1
+#define MM_IOMMU_TABLE_GROUP_PAGE_MASK	~(SZ_4K - 1)
+
+struct mm_iommu_table_group_mem_t {
+	struct list_head next;
+	struct rcu_head rcu;
+	unsigned long used;
+	atomic64_t mapped;
+	unsigned int pageshift;
+	u64 ua;			/* userspace address */
+	u64 entries;		/* number of entries in hpas/hpages[] */
+	/*
+	 * in mm_iommu_get we temporarily use this to store
+	 * struct page address.
+	 *
+	 * We need to convert ua to hpa in real mode. Make it
+	 * simpler by storing physical address.
+	 */
+	union {
+		struct page **hpages;	/* vmalloc'ed */
+		phys_addr_t *hpas;
+	};
+#define MM_IOMMU_TABLE_INVALID_HPA	((uint64_t)-1)
+	u64 dev_hpa;		/* Device memory base address */
+};
+
+bool mm_iommu_preregistered(struct mm_struct *mm)
+{
+	return !list_empty(&mm->context.iommu_group_mem_list);
+}
+EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
+
+static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
+			      unsigned long entries, unsigned long dev_hpa,
+			      struct mm_iommu_table_group_mem_t **pmem)
+{
+	struct mm_iommu_table_group_mem_t *mem, *mem2;
+	long i, ret, locked_entries = 0, pinned = 0;
+	unsigned int pageshift;
+	unsigned long entry, chunk;
+
+	if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
+		ret = account_locked_vm(mm, entries, true);
+		if (ret)
+			return ret;
+
+		locked_entries = entries;
+	}
+
+	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+	if (!mem) {
+		ret = -ENOMEM;
+		goto unlock_exit;
+	}
+
+	if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) {
+		mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT));
+		mem->dev_hpa = dev_hpa;
+		goto good_exit;
+	}
+	mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA;
+
+	/*
+	 * For a starting point for a maximum page size calculation
+	 * we use @ua and @entries natural alignment to allow IOMMU pages
+	 * smaller than huge pages but still bigger than PAGE_SIZE.
+	 */
+	mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT));
+	mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0])));
+	if (!mem->hpas) {
+		kfree(mem);
+		ret = -ENOMEM;
+		goto unlock_exit;
+	}
+
+	mmap_read_lock(mm);
+	chunk = (1UL << (PAGE_SHIFT + MAX_ORDER)) /
+			sizeof(struct vm_area_struct *);
+	chunk = min(chunk, entries);
+	for (entry = 0; entry < entries; entry += chunk) {
+		unsigned long n = min(entries - entry, chunk);
+
+		ret = pin_user_pages(ua + (entry << PAGE_SHIFT), n,
+				FOLL_WRITE | FOLL_LONGTERM,
+				mem->hpages + entry);
+		if (ret == n) {
+			pinned += n;
+			continue;
+		}
+		if (ret > 0)
+			pinned += ret;
+		break;
+	}
+	mmap_read_unlock(mm);
+	if (pinned != entries) {
+		if (!ret)
+			ret = -EFAULT;
+		goto free_exit;
+	}
+
+good_exit:
+	atomic64_set(&mem->mapped, 1);
+	mem->used = 1;
+	mem->ua = ua;
+	mem->entries = entries;
+
+	mutex_lock(&mem_list_mutex);
+
+	list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next,
+				lockdep_is_held(&mem_list_mutex)) {
+		/* Overlap? */
+		if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) &&
+				(ua < (mem2->ua +
+				       (mem2->entries << PAGE_SHIFT)))) {
+			ret = -EINVAL;
+			mutex_unlock(&mem_list_mutex);
+			goto free_exit;
+		}
+	}
+
+	if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
+		/*
+		 * Allow to use larger than 64k IOMMU pages. Only do that
+		 * if we are backed by hugetlb. Skip device memory as it is not
+		 * backed with page structs.
+		 */
+		pageshift = PAGE_SHIFT;
+		for (i = 0; i < entries; ++i) {
+			struct page *page = mem->hpages[i];
+
+			if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page))
+				pageshift = page_shift(compound_head(page));
+			mem->pageshift = min(mem->pageshift, pageshift);
+			/*
+			 * We don't need struct page reference any more, switch
+			 * to physical address.
+			 */
+			mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
+		}
+	}
+
+	list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
+
+	mutex_unlock(&mem_list_mutex);
+
+	*pmem = mem;
+
+	return 0;
+
+free_exit:
+	/* free the references taken */
+	unpin_user_pages(mem->hpages, pinned);
+
+	vfree(mem->hpas);
+	kfree(mem);
+
+unlock_exit:
+	account_locked_vm(mm, locked_entries, false);
+
+	return ret;
+}
+
+long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+		struct mm_iommu_table_group_mem_t **pmem)
+{
+	return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA,
+			pmem);
+}
+EXPORT_SYMBOL_GPL(mm_iommu_new);
+
+long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
+		unsigned long entries, unsigned long dev_hpa,
+		struct mm_iommu_table_group_mem_t **pmem)
+{
+	return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem);
+}
+EXPORT_SYMBOL_GPL(mm_iommu_newdev);
+
+static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
+{
+	long i;
+	struct page *page = NULL;
+
+	if (!mem->hpas)
+		return;
+
+	for (i = 0; i < mem->entries; ++i) {
+		if (!mem->hpas[i])
+			continue;
+
+		page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT);
+		if (!page)
+			continue;
+
+		if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY)
+			SetPageDirty(page);
+
+		unpin_user_page(page);
+
+		mem->hpas[i] = 0;
+	}
+}
+
+static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem)
+{
+
+	mm_iommu_unpin(mem);
+	vfree(mem->hpas);
+	kfree(mem);
+}
+
+static void mm_iommu_free(struct rcu_head *head)
+{
+	struct mm_iommu_table_group_mem_t *mem = container_of(head,
+			struct mm_iommu_table_group_mem_t, rcu);
+
+	mm_iommu_do_free(mem);
+}
+
+static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
+{
+	list_del_rcu(&mem->next);
+	call_rcu(&mem->rcu, mm_iommu_free);
+}
+
+long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
+{
+	long ret = 0;
+	unsigned long unlock_entries = 0;
+
+	mutex_lock(&mem_list_mutex);
+
+	if (mem->used == 0) {
+		ret = -ENOENT;
+		goto unlock_exit;
+	}
+
+	--mem->used;
+	/* There are still users, exit */
+	if (mem->used)
+		goto unlock_exit;
+
+	/* Are there still mappings? */
+	if (atomic64_cmpxchg(&mem->mapped, 1, 0) != 1) {
+		++mem->used;
+		ret = -EBUSY;
+		goto unlock_exit;
+	}
+
+	if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
+		unlock_entries = mem->entries;
+
+	/* @mapped became 0 so now mappings are disabled, release the region */
+	mm_iommu_release(mem);
+
+unlock_exit:
+	mutex_unlock(&mem_list_mutex);
+
+	account_locked_vm(mm, unlock_entries, false);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_put);
+
+struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
+		unsigned long ua, unsigned long size)
+{
+	struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
+		if ((mem->ua <= ua) &&
+				(ua + size <= mem->ua +
+				 (mem->entries << PAGE_SHIFT))) {
+			ret = mem;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_lookup);
+
+struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
+		unsigned long ua, unsigned long entries)
+{
+	struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+
+	mutex_lock(&mem_list_mutex);
+
+	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next,
+				lockdep_is_held(&mem_list_mutex)) {
+		if ((mem->ua == ua) && (mem->entries == entries)) {
+			ret = mem;
+			++mem->used;
+			break;
+		}
+	}
+
+	mutex_unlock(&mem_list_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_get);
+
+long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+		unsigned long ua, unsigned int pageshift, unsigned long *hpa)
+{
+	const long entry = (ua - mem->ua) >> PAGE_SHIFT;
+	u64 *va;
+
+	if (entry >= mem->entries)
+		return -EFAULT;
+
+	if (pageshift > mem->pageshift)
+		return -EFAULT;
+
+	if (!mem->hpas) {
+		*hpa = mem->dev_hpa + (ua - mem->ua);
+		return 0;
+	}
+
+	va = &mem->hpas[entry];
+	*hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
+
+bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
+		unsigned int pageshift, unsigned long *size)
+{
+	struct mm_iommu_table_group_mem_t *mem;
+	unsigned long end;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
+		if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
+			continue;
+
+		end = mem->dev_hpa + (mem->entries << PAGE_SHIFT);
+		if ((mem->dev_hpa <= hpa) && (hpa < end)) {
+			/*
+			 * Since the IOMMU page size might be bigger than
+			 * PAGE_SIZE, the amount of preregistered memory
+			 * starting from @hpa might be smaller than 1<<pageshift
+			 * and the caller needs to distinguish this situation.
+			 */
+			*size = min(1UL << pageshift, end - hpa);
+			return true;
+		}
+	}
+	rcu_read_unlock();
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_is_devmem);
+
+long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
+{
+	if (atomic64_inc_not_zero(&mem->mapped))
+		return 0;
+
+	/* Last mm_iommu_put() has been called, no more mappings allowed() */
+	return -ENXIO;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc);
+
+void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem)
+{
+	atomic64_add_unless(&mem->mapped, -1, 1);
+}
+EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec);
+
+void mm_iommu_init(struct mm_struct *mm)
+{
+	INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list);
+}
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
new file mode 100644
index 0000000000..1715b07c63
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  MMU context allocation for 64-bit kernels.
+ *
+ *  Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/pkeys.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/export.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+
+#include "internal.h"
+
+static DEFINE_IDA(mmu_context_ida);
+
+static int alloc_context_id(int min_id, int max_id)
+{
+	return ida_alloc_range(&mmu_context_ida, min_id, max_id, GFP_KERNEL);
+}
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+void __init hash__reserve_context_id(int id)
+{
+	int result = ida_alloc_range(&mmu_context_ida, id, id, GFP_KERNEL);
+
+	WARN(result != id, "mmu: Failed to reserve context id %d (rc %d)\n", id, result);
+}
+
+int hash__alloc_context_id(void)
+{
+	unsigned long max;
+
+	if (mmu_has_feature(MMU_FTR_68_BIT_VA))
+		max = MAX_USER_CONTEXT;
+	else
+		max = MAX_USER_CONTEXT_65BIT_VA;
+
+	return alloc_context_id(MIN_USER_CONTEXT, max);
+}
+EXPORT_SYMBOL_GPL(hash__alloc_context_id);
+#endif
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+static int realloc_context_ids(mm_context_t *ctx)
+{
+	int i, id;
+
+	/*
+	 * id 0 (aka. ctx->id) is special, we always allocate a new one, even if
+	 * there wasn't one allocated previously (which happens in the exec
+	 * case where ctx is newly allocated).
+	 *
+	 * We have to be a bit careful here. We must keep the existing ids in
+	 * the array, so that we can test if they're non-zero to decide if we
+	 * need to allocate a new one. However in case of error we must free the
+	 * ids we've allocated but *not* any of the existing ones (or risk a
+	 * UAF). That's why we decrement i at the start of the error handling
+	 * loop, to skip the id that we just tested but couldn't reallocate.
+	 */
+	for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) {
+		if (i == 0 || ctx->extended_id[i]) {
+			id = hash__alloc_context_id();
+			if (id < 0)
+				goto error;
+
+			ctx->extended_id[i] = id;
+		}
+	}
+
+	/* The caller expects us to return id */
+	return ctx->id;
+
+error:
+	for (i--; i >= 0; i--) {
+		if (ctx->extended_id[i])
+			ida_free(&mmu_context_ida, ctx->extended_id[i]);
+	}
+
+	return id;
+}
+
+static int hash__init_new_context(struct mm_struct *mm)
+{
+	int index;
+
+	mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context),
+					   GFP_KERNEL);
+	if (!mm->context.hash_context)
+		return -ENOMEM;
+
+	/*
+	 * The old code would re-promote on fork, we don't do that when using
+	 * slices as it could cause problem promoting slices that have been
+	 * forced down to 4K.
+	 *
+	 * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check
+	 * explicitly against context.id == 0. This ensures that we properly
+	 * initialize context slice details for newly allocated mm's (which will
+	 * have id == 0) and don't alter context slice inherited via fork (which
+	 * will have id != 0).
+	 *
+	 * We should not be calling init_new_context() on init_mm. Hence a
+	 * check against 0 is OK.
+	 */
+	if (mm->context.id == 0) {
+		memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context));
+		slice_init_new_context_exec(mm);
+	} else {
+		/* This is fork. Copy hash_context details from current->mm */
+		memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context));
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+		/* inherit subpage prot details if we have one. */
+		if (current->mm->context.hash_context->spt) {
+			mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
+								GFP_KERNEL);
+			if (!mm->context.hash_context->spt) {
+				kfree(mm->context.hash_context);
+				return -ENOMEM;
+			}
+		}
+#endif
+	}
+
+	index = realloc_context_ids(&mm->context);
+	if (index < 0) {
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+		kfree(mm->context.hash_context->spt);
+#endif
+		kfree(mm->context.hash_context);
+		return index;
+	}
+
+	pkey_mm_init(mm);
+	return index;
+}
+
+void hash__setup_new_exec(void)
+{
+	slice_setup_new_exec();
+
+	slb_setup_new_exec();
+}
+#else
+static inline int hash__init_new_context(struct mm_struct *mm)
+{
+	BUILD_BUG();
+	return 0;
+}
+#endif
+
+static int radix__init_new_context(struct mm_struct *mm)
+{
+	unsigned long rts_field;
+	int index, max_id;
+
+	max_id = (1 << mmu_pid_bits) - 1;
+	index = alloc_context_id(mmu_base_pid, max_id);
+	if (index < 0)
+		return index;
+
+	/*
+	 * set the process table entry,
+	 */
+	rts_field = radix__get_tree_size();
+	process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
+
+	/*
+	 * Order the above store with subsequent update of the PID
+	 * register (at which point HW can start loading/caching
+	 * the entry) and the corresponding load by the MMU from
+	 * the L2 cache.
+	 */
+	asm volatile("ptesync;isync" : : : "memory");
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	mm->context.hash_context = NULL;
+#endif
+
+	return index;
+}
+
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	int index;
+
+	if (radix_enabled())
+		index = radix__init_new_context(mm);
+	else
+		index = hash__init_new_context(mm);
+
+	if (index < 0)
+		return index;
+
+	mm->context.id = index;
+
+	mm->context.pte_frag = NULL;
+	mm->context.pmd_frag = NULL;
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+	mm_iommu_init(mm);
+#endif
+	atomic_set(&mm->context.active_cpus, 0);
+	atomic_set(&mm->context.copros, 0);
+
+	return 0;
+}
+
+void __destroy_context(int context_id)
+{
+	ida_free(&mmu_context_ida, context_id);
+}
+EXPORT_SYMBOL_GPL(__destroy_context);
+
+static void destroy_contexts(mm_context_t *ctx)
+{
+	if (radix_enabled()) {
+		ida_free(&mmu_context_ida, ctx->id);
+	} else {
+#ifdef CONFIG_PPC_64S_HASH_MMU
+		int index, context_id;
+
+		for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
+			context_id = ctx->extended_id[index];
+			if (context_id)
+				ida_free(&mmu_context_ida, context_id);
+		}
+		kfree(ctx->hash_context);
+#else
+		BUILD_BUG(); // radix_enabled() should be constant true
+#endif
+	}
+}
+
+static void pmd_frag_destroy(void *pmd_frag)
+{
+	int count;
+	struct ptdesc *ptdesc;
+
+	ptdesc = virt_to_ptdesc(pmd_frag);
+	/* drop all the pending references */
+	count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
+	/* We allow PTE_FRAG_NR fragments from a PTE page */
+	if (atomic_sub_and_test(PMD_FRAG_NR - count, &ptdesc->pt_frag_refcount)) {
+		pagetable_pmd_dtor(ptdesc);
+		pagetable_free(ptdesc);
+	}
+}
+
+static void destroy_pagetable_cache(struct mm_struct *mm)
+{
+	void *frag;
+
+	frag = mm->context.pte_frag;
+	if (frag)
+		pte_frag_destroy(frag);
+
+	frag = mm->context.pmd_frag;
+	if (frag)
+		pmd_frag_destroy(frag);
+	return;
+}
+
+void destroy_context(struct mm_struct *mm)
+{
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+	WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
+#endif
+	/*
+	 * For tasks which were successfully initialized we end up calling
+	 * arch_exit_mmap() which clears the process table entry. And
+	 * arch_exit_mmap() is called before the required fullmm TLB flush
+	 * which does a RIC=2 flush. Hence for an initialized task, we do clear
+	 * any cached process table entries.
+	 *
+	 * The condition below handles the error case during task init. We have
+	 * set the process table entry early and if we fail a task
+	 * initialization, we need to ensure the process table entry is zeroed.
+	 * We need not worry about process table entry caches because the task
+	 * never ran with the PID value.
+	 */
+	if (radix_enabled())
+		process_tb[mm->context.id].prtb0 = 0;
+	else
+		subpage_prot_free(mm);
+	destroy_contexts(&mm->context);
+	mm->context.id = MMU_NO_CONTEXT;
+}
+
+void arch_exit_mmap(struct mm_struct *mm)
+{
+	destroy_pagetable_cache(mm);
+
+	if (radix_enabled()) {
+		/*
+		 * Radix doesn't have a valid bit in the process table
+		 * entries. However we know that at least P9 implementation
+		 * will avoid caching an entry with an invalid RTS field,
+		 * and 0 is invalid. So this will do.
+		 *
+		 * This runs before the "fullmm" tlb flush in exit_mmap,
+		 * which does a RIC=2 tlbie to clear the process table
+		 * entry. See the "fullmm" comments in tlb-radix.c.
+		 *
+		 * No barrier required here after the store because
+		 * this process will do the invalidate, which starts with
+		 * ptesync.
+		 */
+		process_tb[mm->context.id].prtb0 = 0;
+	}
+}
+
+#ifdef CONFIG_PPC_RADIX_MMU
+void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
+{
+	mtspr(SPRN_PID, next->context.id);
+	isync();
+}
+#endif
+
+/**
+ * cleanup_cpu_mmu_context - Clean up MMU details for this CPU (newly offlined)
+ *
+ * This clears the CPU from mm_cpumask for all processes, and then flushes the
+ * local TLB to ensure TLB coherency in case the CPU is onlined again.
+ *
+ * KVM guest translations are not necessarily flushed here. If KVM started
+ * using mm_cpumask or the Linux APIs which do, this would have to be resolved.
+ */
+#ifdef CONFIG_HOTPLUG_CPU
+void cleanup_cpu_mmu_context(void)
+{
+	int cpu = smp_processor_id();
+
+	clear_tasks_mm_cpumask(cpu);
+	tlbiel_all();
+}
+#endif
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
new file mode 100644
index 0000000000..8f8a62d3ff
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -0,0 +1,654 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm_types.h>
+#include <linux/memblock.h>
+#include <linux/memremap.h>
+#include <linux/pkeys.h>
+#include <linux/debugfs.h>
+#include <linux/proc_fs.h>
+#include <misc/cxl-base.h>
+
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/trace.h>
+#include <asm/powernv.h>
+#include <asm/firmware.h>
+#include <asm/ultravisor.h>
+#include <asm/kexec.h>
+
+#include <mm/mmu_decl.h>
+#include <trace/events/thp.h>
+
+#include "internal.h"
+
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+EXPORT_SYMBOL_GPL(mmu_psize_defs);
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+int mmu_vmemmap_psize = MMU_PAGE_4K;
+#endif
+
+unsigned long __pmd_frag_nr;
+EXPORT_SYMBOL(__pmd_frag_nr);
+unsigned long __pmd_frag_size_shift;
+EXPORT_SYMBOL(__pmd_frag_size_shift);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/*
+ * This is called when relaxing access to a hugepage. It's also called in the page
+ * fault path when we don't hit any of the major fault cases, ie, a minor
+ * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
+ * handled those two for us, we additionally deal with missing execute
+ * permission here on some processors
+ */
+int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+			  pmd_t *pmdp, pmd_t entry, int dirty)
+{
+	int changed;
+#ifdef CONFIG_DEBUG_VM
+	WARN_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
+	assert_spin_locked(pmd_lockptr(vma->vm_mm, pmdp));
+#endif
+	changed = !pmd_same(*(pmdp), entry);
+	if (changed) {
+		/*
+		 * We can use MMU_PAGE_2M here, because only radix
+		 * path look at the psize.
+		 */
+		__ptep_set_access_flags(vma, pmdp_ptep(pmdp),
+					pmd_pte(entry), address, MMU_PAGE_2M);
+	}
+	return changed;
+}
+
+int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+			  pud_t *pudp, pud_t entry, int dirty)
+{
+	int changed;
+#ifdef CONFIG_DEBUG_VM
+	WARN_ON(!pud_devmap(*pudp));
+	assert_spin_locked(pud_lockptr(vma->vm_mm, pudp));
+#endif
+	changed = !pud_same(*(pudp), entry);
+	if (changed) {
+		/*
+		 * We can use MMU_PAGE_1G here, because only radix
+		 * path look at the psize.
+		 */
+		__ptep_set_access_flags(vma, pudp_ptep(pudp),
+					pud_pte(entry), address, MMU_PAGE_1G);
+	}
+	return changed;
+}
+
+
+int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+			      unsigned long address, pmd_t *pmdp)
+{
+	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
+}
+
+int pudp_test_and_clear_young(struct vm_area_struct *vma,
+			      unsigned long address, pud_t *pudp)
+{
+	return __pudp_test_and_clear_young(vma->vm_mm, address, pudp);
+}
+
+/*
+ * set a new huge pmd. We should not be called for updating
+ * an existing pmd entry. That should go via pmd_hugepage_update.
+ */
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd)
+{
+#ifdef CONFIG_DEBUG_VM
+	/*
+	 * Make sure hardware valid bit is not set. We don't do
+	 * tlb flush for this update.
+	 */
+
+	WARN_ON(pte_hw_valid(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
+	assert_spin_locked(pmd_lockptr(mm, pmdp));
+	WARN_ON(!(pmd_large(pmd)));
+#endif
+	trace_hugepage_set_pmd(addr, pmd_val(pmd));
+	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
+}
+
+void set_pud_at(struct mm_struct *mm, unsigned long addr,
+		pud_t *pudp, pud_t pud)
+{
+#ifdef CONFIG_DEBUG_VM
+	/*
+	 * Make sure hardware valid bit is not set. We don't do
+	 * tlb flush for this update.
+	 */
+
+	WARN_ON(pte_hw_valid(pud_pte(*pudp)));
+	assert_spin_locked(pud_lockptr(mm, pudp));
+	WARN_ON(!(pud_large(pud)));
+#endif
+	trace_hugepage_set_pud(addr, pud_val(pud));
+	return set_pte_at(mm, addr, pudp_ptep(pudp), pud_pte(pud));
+}
+
+static void do_serialize(void *arg)
+{
+	/* We've taken the IPI, so try to trim the mask while here */
+	if (radix_enabled()) {
+		struct mm_struct *mm = arg;
+		exit_lazy_flush_tlb(mm, false);
+	}
+}
+
+/*
+ * Serialize against __find_linux_pte() which does lock-less
+ * lookup in page tables with local interrupts disabled. For huge pages
+ * it casts pmd_t to pte_t. Since format of pte_t is different from
+ * pmd_t we want to prevent transit from pmd pointing to page table
+ * to pmd pointing to huge page (and back) while interrupts are disabled.
+ * We clear pmd to possibly replace it with page table pointer in
+ * different code paths. So make sure we wait for the parallel
+ * __find_linux_pte() to finish.
+ */
+void serialize_against_pte_lookup(struct mm_struct *mm)
+{
+	smp_mb();
+	smp_call_function_many(mm_cpumask(mm), do_serialize, mm, 1);
+}
+
+/*
+ * We use this to invalidate a pmdp entry before switching from a
+ * hugepte to regular pmd entry.
+ */
+pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+		     pmd_t *pmdp)
+{
+	unsigned long old_pmd;
+
+	old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID);
+	flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+	return __pmd(old_pmd);
+}
+
+pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
+				   unsigned long addr, pmd_t *pmdp, int full)
+{
+	pmd_t pmd;
+	VM_BUG_ON(addr & ~HPAGE_PMD_MASK);
+	VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
+		   !pmd_devmap(*pmdp)) || !pmd_present(*pmdp));
+	pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
+	/*
+	 * if it not a fullmm flush, then we can possibly end up converting
+	 * this PMD pte entry to a regular level 0 PTE by a parallel page fault.
+	 * Make sure we flush the tlb in this case.
+	 */
+	if (!full)
+		flush_pmd_tlb_range(vma, addr, addr + HPAGE_PMD_SIZE);
+	return pmd;
+}
+
+pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
+				   unsigned long addr, pud_t *pudp, int full)
+{
+	pud_t pud;
+
+	VM_BUG_ON(addr & ~HPAGE_PMD_MASK);
+	VM_BUG_ON((pud_present(*pudp) && !pud_devmap(*pudp)) ||
+		  !pud_present(*pudp));
+	pud = pudp_huge_get_and_clear(vma->vm_mm, addr, pudp);
+	/*
+	 * if it not a fullmm flush, then we can possibly end up converting
+	 * this PMD pte entry to a regular level 0 PTE by a parallel page fault.
+	 * Make sure we flush the tlb in this case.
+	 */
+	if (!full)
+		flush_pud_tlb_range(vma, addr, addr + HPAGE_PUD_SIZE);
+	return pud;
+}
+
+static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
+{
+	return __pmd(pmd_val(pmd) | pgprot_val(pgprot));
+}
+
+static pud_t pud_set_protbits(pud_t pud, pgprot_t pgprot)
+{
+	return __pud(pud_val(pud) | pgprot_val(pgprot));
+}
+
+/*
+ * At some point we should be able to get rid of
+ * pmd_mkhuge() and mk_huge_pmd() when we update all the
+ * other archs to mark the pmd huge in pfn_pmd()
+ */
+pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
+{
+	unsigned long pmdv;
+
+	pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
+
+	return __pmd_mkhuge(pmd_set_protbits(__pmd(pmdv), pgprot));
+}
+
+pud_t pfn_pud(unsigned long pfn, pgprot_t pgprot)
+{
+	unsigned long pudv;
+
+	pudv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
+
+	return __pud_mkhuge(pud_set_protbits(__pud(pudv), pgprot));
+}
+
+pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
+{
+	return pfn_pmd(page_to_pfn(page), pgprot);
+}
+
+pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	unsigned long pmdv;
+
+	pmdv = pmd_val(pmd);
+	pmdv &= _HPAGE_CHG_MASK;
+	return pmd_set_protbits(__pmd(pmdv), newprot);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+/* For use by kexec, called with MMU off */
+notrace void mmu_cleanup_all(void)
+{
+	if (radix_enabled())
+		radix__mmu_cleanup_all();
+	else if (mmu_hash_ops.hpte_clear_all)
+		mmu_hash_ops.hpte_clear_all();
+
+	reset_sprs();
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int __meminit create_section_mapping(unsigned long start, unsigned long end,
+				     int nid, pgprot_t prot)
+{
+	if (radix_enabled())
+		return radix__create_section_mapping(start, end, nid, prot);
+
+	return hash__create_section_mapping(start, end, nid, prot);
+}
+
+int __meminit remove_section_mapping(unsigned long start, unsigned long end)
+{
+	if (radix_enabled())
+		return radix__remove_section_mapping(start, end);
+
+	return hash__remove_section_mapping(start, end);
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+void __init mmu_partition_table_init(void)
+{
+	unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
+	unsigned long ptcr;
+
+	/* Initialize the Partition Table with no entries */
+	partition_tb = memblock_alloc(patb_size, patb_size);
+	if (!partition_tb)
+		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+		      __func__, patb_size, patb_size);
+
+	ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
+	set_ptcr_when_no_uv(ptcr);
+	powernv_set_nmmu_ptcr(ptcr);
+}
+
+static void flush_partition(unsigned int lpid, bool radix)
+{
+	if (radix) {
+		radix__flush_all_lpid(lpid);
+		radix__flush_all_lpid_guest(lpid);
+	} else {
+		asm volatile("ptesync" : : : "memory");
+		asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
+			     "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+		/* do we need fixup here ?*/
+		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+		trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
+	}
+}
+
+void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
+				  unsigned long dw1, bool flush)
+{
+	unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
+
+	/*
+	 * When ultravisor is enabled, the partition table is stored in secure
+	 * memory and can only be accessed doing an ultravisor call. However, we
+	 * maintain a copy of the partition table in normal memory to allow Nest
+	 * MMU translations to occur (for normal VMs).
+	 *
+	 * Therefore, here we always update partition_tb, regardless of whether
+	 * we are running under an ultravisor or not.
+	 */
+	partition_tb[lpid].patb0 = cpu_to_be64(dw0);
+	partition_tb[lpid].patb1 = cpu_to_be64(dw1);
+
+	/*
+	 * If ultravisor is enabled, we do an ultravisor call to register the
+	 * partition table entry (PATE), which also do a global flush of TLBs
+	 * and partition table caches for the lpid. Otherwise, just do the
+	 * flush. The type of flush (hash or radix) depends on what the previous
+	 * use of the partition ID was, not the new use.
+	 */
+	if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) {
+		uv_register_pate(lpid, dw0, dw1);
+		pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = 0x%lx\n",
+			dw0, dw1);
+	} else if (flush) {
+		/*
+		 * Boot does not need to flush, because MMU is off and each
+		 * CPU does a tlbiel_all() before switching them on, which
+		 * flushes everything.
+		 */
+		flush_partition(lpid, (old & PATB_HR));
+	}
+}
+EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
+
+static pmd_t *get_pmd_from_cache(struct mm_struct *mm)
+{
+	void *pmd_frag, *ret;
+
+	if (PMD_FRAG_NR == 1)
+		return NULL;
+
+	spin_lock(&mm->page_table_lock);
+	ret = mm->context.pmd_frag;
+	if (ret) {
+		pmd_frag = ret + PMD_FRAG_SIZE;
+		/*
+		 * If we have taken up all the fragments mark PTE page NULL
+		 */
+		if (((unsigned long)pmd_frag & ~PAGE_MASK) == 0)
+			pmd_frag = NULL;
+		mm->context.pmd_frag = pmd_frag;
+	}
+	spin_unlock(&mm->page_table_lock);
+	return (pmd_t *)ret;
+}
+
+static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
+{
+	void *ret = NULL;
+	struct ptdesc *ptdesc;
+	gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
+
+	if (mm == &init_mm)
+		gfp &= ~__GFP_ACCOUNT;
+	ptdesc = pagetable_alloc(gfp, 0);
+	if (!ptdesc)
+		return NULL;
+	if (!pagetable_pmd_ctor(ptdesc)) {
+		pagetable_free(ptdesc);
+		return NULL;
+	}
+
+	atomic_set(&ptdesc->pt_frag_refcount, 1);
+
+	ret = ptdesc_address(ptdesc);
+	/*
+	 * if we support only one fragment just return the
+	 * allocated page.
+	 */
+	if (PMD_FRAG_NR == 1)
+		return ret;
+
+	spin_lock(&mm->page_table_lock);
+	/*
+	 * If we find ptdesc_page set, we return
+	 * the allocated page with single fragment
+	 * count.
+	 */
+	if (likely(!mm->context.pmd_frag)) {
+		atomic_set(&ptdesc->pt_frag_refcount, PMD_FRAG_NR);
+		mm->context.pmd_frag = ret + PMD_FRAG_SIZE;
+	}
+	spin_unlock(&mm->page_table_lock);
+
+	return (pmd_t *)ret;
+}
+
+pmd_t *pmd_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr)
+{
+	pmd_t *pmd;
+
+	pmd = get_pmd_from_cache(mm);
+	if (pmd)
+		return pmd;
+
+	return __alloc_for_pmdcache(mm);
+}
+
+void pmd_fragment_free(unsigned long *pmd)
+{
+	struct ptdesc *ptdesc = virt_to_ptdesc(pmd);
+
+	if (pagetable_is_reserved(ptdesc))
+		return free_reserved_ptdesc(ptdesc);
+
+	BUG_ON(atomic_read(&ptdesc->pt_frag_refcount) <= 0);
+	if (atomic_dec_and_test(&ptdesc->pt_frag_refcount)) {
+		pagetable_pmd_dtor(ptdesc);
+		pagetable_free(ptdesc);
+	}
+}
+
+static inline void pgtable_free(void *table, int index)
+{
+	switch (index) {
+	case PTE_INDEX:
+		pte_fragment_free(table, 0);
+		break;
+	case PMD_INDEX:
+		pmd_fragment_free(table);
+		break;
+	case PUD_INDEX:
+		__pud_free(table);
+		break;
+#if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE)
+		/* 16M hugepd directory at pud level */
+	case HTLB_16M_INDEX:
+		BUILD_BUG_ON(H_16M_CACHE_INDEX <= 0);
+		kmem_cache_free(PGT_CACHE(H_16M_CACHE_INDEX), table);
+		break;
+		/* 16G hugepd directory at the pgd level */
+	case HTLB_16G_INDEX:
+		BUILD_BUG_ON(H_16G_CACHE_INDEX <= 0);
+		kmem_cache_free(PGT_CACHE(H_16G_CACHE_INDEX), table);
+		break;
+#endif
+		/* We don't free pgd table via RCU callback */
+	default:
+		BUG();
+	}
+}
+
+void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
+{
+	unsigned long pgf = (unsigned long)table;
+
+	BUG_ON(index > MAX_PGTABLE_INDEX_SIZE);
+	pgf |= index;
+	tlb_remove_table(tlb, (void *)pgf);
+}
+
+void __tlb_remove_table(void *_table)
+{
+	void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+	unsigned int index = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+	return pgtable_free(table, index);
+}
+
+#ifdef CONFIG_PROC_FS
+atomic_long_t direct_pages_count[MMU_PAGE_COUNT];
+
+void arch_report_meminfo(struct seq_file *m)
+{
+	/*
+	 * Hash maps the memory with one size mmu_linear_psize.
+	 * So don't bother to print these on hash
+	 */
+	if (!radix_enabled())
+		return;
+	seq_printf(m, "DirectMap4k:    %8lu kB\n",
+		   atomic_long_read(&direct_pages_count[MMU_PAGE_4K]) << 2);
+	seq_printf(m, "DirectMap64k:    %8lu kB\n",
+		   atomic_long_read(&direct_pages_count[MMU_PAGE_64K]) << 6);
+	seq_printf(m, "DirectMap2M:    %8lu kB\n",
+		   atomic_long_read(&direct_pages_count[MMU_PAGE_2M]) << 11);
+	seq_printf(m, "DirectMap1G:    %8lu kB\n",
+		   atomic_long_read(&direct_pages_count[MMU_PAGE_1G]) << 20);
+}
+#endif /* CONFIG_PROC_FS */
+
+pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
+			     pte_t *ptep)
+{
+	unsigned long pte_val;
+
+	/*
+	 * Clear the _PAGE_PRESENT so that no hardware parallel update is
+	 * possible. Also keep the pte_present true so that we don't take
+	 * wrong fault.
+	 */
+	pte_val = pte_update(vma->vm_mm, addr, ptep, _PAGE_PRESENT, _PAGE_INVALID, 0);
+
+	return __pte(pte_val);
+
+}
+
+void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+			     pte_t *ptep, pte_t old_pte, pte_t pte)
+{
+	if (radix_enabled())
+		return radix__ptep_modify_prot_commit(vma, addr,
+						      ptep, old_pte, pte);
+	set_pte_at(vma->vm_mm, addr, ptep, pte);
+}
+
+/*
+ * For hash translation mode, we use the deposited table to store hash slot
+ * information and they are stored at PTRS_PER_PMD offset from related pmd
+ * location. Hence a pmd move requires deposit and withdraw.
+ *
+ * For radix translation with split pmd ptl, we store the deposited table in the
+ * pmd page. Hence if we have different pmd page we need to withdraw during pmd
+ * move.
+ *
+ * With hash we use deposited table always irrespective of anon or not.
+ * With radix we use deposited table only for anonymous mapping.
+ */
+int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
+			   struct spinlock *old_pmd_ptl,
+			   struct vm_area_struct *vma)
+{
+	if (radix_enabled())
+		return (new_pmd_ptl != old_pmd_ptl) && vma_is_anonymous(vma);
+
+	return true;
+}
+
+/*
+ * Does the CPU support tlbie?
+ */
+bool tlbie_capable __read_mostly = true;
+EXPORT_SYMBOL(tlbie_capable);
+
+/*
+ * Should tlbie be used for management of CPU TLBs, for kernel and process
+ * address spaces? tlbie may still be used for nMMU accelerators, and for KVM
+ * guest address spaces.
+ */
+bool tlbie_enabled __read_mostly = true;
+
+static int __init setup_disable_tlbie(char *str)
+{
+	if (!radix_enabled()) {
+		pr_err("disable_tlbie: Unable to disable TLBIE with Hash MMU.\n");
+		return 1;
+	}
+
+	tlbie_capable = false;
+	tlbie_enabled = false;
+
+        return 1;
+}
+__setup("disable_tlbie", setup_disable_tlbie);
+
+static int __init pgtable_debugfs_setup(void)
+{
+	if (!tlbie_capable)
+		return 0;
+
+	/*
+	 * There is no locking vs tlb flushing when changing this value.
+	 * The tlb flushers will see one value or another, and use either
+	 * tlbie or tlbiel with IPIs. In both cases the TLBs will be
+	 * invalidated as expected.
+	 */
+	debugfs_create_bool("tlbie_enabled", 0600,
+			arch_debugfs_dir,
+			&tlbie_enabled);
+
+	return 0;
+}
+arch_initcall(pgtable_debugfs_setup);
+
+#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_ARCH_HAS_MEMREMAP_COMPAT_ALIGN)
+/*
+ * Override the generic version in mm/memremap.c.
+ *
+ * With hash translation, the direct-map range is mapped with just one
+ * page size selected by htab_init_page_sizes(). Consult
+ * mmu_psize_defs[] to determine the minimum page size alignment.
+*/
+unsigned long memremap_compat_align(void)
+{
+	if (!radix_enabled()) {
+		unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift;
+		return max(SUBSECTION_SIZE, 1UL << shift);
+	}
+
+	return SUBSECTION_SIZE;
+}
+EXPORT_SYMBOL_GPL(memremap_compat_align);
+#endif
+
+pgprot_t vm_get_page_prot(unsigned long vm_flags)
+{
+	unsigned long prot;
+
+	/* Radix supports execute-only, but protection_map maps X -> RX */
+	if (radix_enabled() && ((vm_flags & VM_ACCESS_FLAGS) == VM_EXEC)) {
+		prot = pgprot_val(PAGE_EXECONLY);
+	} else {
+		prot = pgprot_val(protection_map[vm_flags &
+						 (VM_ACCESS_FLAGS | VM_SHARED)]);
+	}
+
+	if (vm_flags & VM_SAO)
+		prot |= _PAGE_SAO;
+
+#ifdef CONFIG_PPC_MEM_KEYS
+	prot |= vmflag_to_pte_pkey_bits(vm_flags);
+#endif
+
+	return __pgprot(prot);
+}
+EXPORT_SYMBOL(vm_get_page_prot);
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
new file mode 100644
index 0000000000..1257339620
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -0,0 +1,470 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PowerPC Memory Protection Keys management
+ *
+ * Copyright 2017, Ram Pai, IBM Corporation.
+ */
+
+#include <asm/mman.h>
+#include <asm/mmu_context.h>
+#include <asm/mmu.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/firmware.h>
+
+#include <linux/pkeys.h>
+#include <linux/of_fdt.h>
+
+
+int  num_pkey;		/* Max number of pkeys supported */
+/*
+ *  Keys marked in the reservation list cannot be allocated by  userspace
+ */
+u32 reserved_allocation_mask __ro_after_init;
+
+/* Bits set for the initially allocated keys */
+static u32 initial_allocation_mask __ro_after_init;
+
+/*
+ * Even if we allocate keys with sys_pkey_alloc(), we need to make sure
+ * other thread still find the access denied using the same keys.
+ */
+u64 default_amr __ro_after_init  = ~0x0UL;
+u64 default_iamr __ro_after_init = 0x5555555555555555UL;
+u64 default_uamor __ro_after_init;
+EXPORT_SYMBOL(default_amr);
+/*
+ * Key used to implement PROT_EXEC mmap. Denies READ/WRITE
+ * We pick key 2 because 0 is special key and 1 is reserved as per ISA.
+ */
+static int execute_only_key = 2;
+static bool pkey_execute_disable_supported;
+
+
+#define AMR_BITS_PER_PKEY 2
+#define AMR_RD_BIT 0x1UL
+#define AMR_WR_BIT 0x2UL
+#define IAMR_EX_BIT 0x1UL
+#define PKEY_REG_BITS (sizeof(u64) * 8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY))
+
+static int __init dt_scan_storage_keys(unsigned long node,
+				       const char *uname, int depth,
+				       void *data)
+{
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const __be32 *prop;
+	int *pkeys_total = (int *) data;
+
+	/* We are scanning "cpu" nodes only */
+	if (type == NULL || strcmp(type, "cpu") != 0)
+		return 0;
+
+	prop = of_get_flat_dt_prop(node, "ibm,processor-storage-keys", NULL);
+	if (!prop)
+		return 0;
+	*pkeys_total = be32_to_cpu(prop[0]);
+	return 1;
+}
+
+static int __init scan_pkey_feature(void)
+{
+	int ret;
+	int pkeys_total = 0;
+
+	/*
+	 * Pkey is not supported with Radix translation.
+	 */
+	if (early_radix_enabled())
+		return 0;
+
+	ret = of_scan_flat_dt(dt_scan_storage_keys, &pkeys_total);
+	if (ret == 0) {
+		/*
+		 * Let's assume 32 pkeys on P8/P9 bare metal, if its not defined by device
+		 * tree. We make this exception since some version of skiboot forgot to
+		 * expose this property on power8/9.
+		 */
+		if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+			unsigned long pvr = mfspr(SPRN_PVR);
+
+			if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E ||
+			    PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == PVR_POWER9)
+				pkeys_total = 32;
+		}
+	}
+
+#ifdef CONFIG_PPC_MEM_KEYS
+	/*
+	 * Adjust the upper limit, based on the number of bits supported by
+	 * arch-neutral code.
+	 */
+	pkeys_total = min_t(int, pkeys_total,
+			    ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + 1));
+#endif
+	return pkeys_total;
+}
+
+void __init pkey_early_init_devtree(void)
+{
+	int pkeys_total, i;
+
+#ifdef CONFIG_PPC_MEM_KEYS
+	/*
+	 * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral
+	 * generic defines for PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE.
+	 * Ensure that the bits a distinct.
+	 */
+	BUILD_BUG_ON(PKEY_DISABLE_EXECUTE &
+		     (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+	/*
+	 * pkey_to_vmflag_bits() assumes that the pkey bits are contiguous
+	 * in the vmaflag. Make sure that is really the case.
+	 */
+	BUILD_BUG_ON(__builtin_clzl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) +
+		     __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)
+				!= (sizeof(u64) * BITS_PER_BYTE));
+#endif
+	/*
+	 * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1
+	 */
+	if (!early_cpu_has_feature(CPU_FTR_ARCH_206))
+		return;
+
+	/* scan the device tree for pkey feature */
+	pkeys_total = scan_pkey_feature();
+	if (!pkeys_total)
+		goto out;
+
+	/* Allow all keys to be modified by default */
+	default_uamor = ~0x0UL;
+
+	cur_cpu_spec->mmu_features |= MMU_FTR_PKEY;
+
+	/*
+	 * The device tree cannot be relied to indicate support for
+	 * execute_disable support. Instead we use a PVR check.
+	 */
+	if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p))
+		pkey_execute_disable_supported = false;
+	else
+		pkey_execute_disable_supported = true;
+
+#ifdef CONFIG_PPC_4K_PAGES
+	/*
+	 * The OS can manage only 8 pkeys due to its inability to represent them
+	 * in the Linux 4K PTE. Mark all other keys reserved.
+	 */
+	num_pkey = min(8, pkeys_total);
+#else
+	num_pkey = pkeys_total;
+#endif
+
+	if (unlikely(num_pkey <= execute_only_key) || !pkey_execute_disable_supported) {
+		/*
+		 * Insufficient number of keys to support
+		 * execute only key. Mark it unavailable.
+		 */
+		execute_only_key = -1;
+	} else {
+		/*
+		 * Mark the execute_only_pkey as not available for
+		 * user allocation via pkey_alloc.
+		 */
+		reserved_allocation_mask |= (0x1 << execute_only_key);
+
+		/*
+		 * Deny READ/WRITE for execute_only_key.
+		 * Allow execute in IAMR.
+		 */
+		default_amr  |= (0x3ul << pkeyshift(execute_only_key));
+		default_iamr &= ~(0x1ul << pkeyshift(execute_only_key));
+
+		/*
+		 * Clear the uamor bits for this key.
+		 */
+		default_uamor &= ~(0x3ul << pkeyshift(execute_only_key));
+	}
+
+	if (unlikely(num_pkey <= 3)) {
+		/*
+		 * Insufficient number of keys to support
+		 * KUAP/KUEP feature.
+		 */
+		disable_kuep = true;
+		disable_kuap = true;
+		WARN(1, "Disabling kernel user protection due to low (%d) max supported keys\n", num_pkey);
+	} else {
+		/*  handle key which is used by kernel for KAUP */
+		reserved_allocation_mask |= (0x1 << 3);
+		/*
+		 * Mark access for kup_key in default amr so that
+		 * we continue to operate with that AMR in
+		 * copy_to/from_user().
+		 */
+		default_amr   &= ~(0x3ul << pkeyshift(3));
+		default_iamr  &= ~(0x1ul << pkeyshift(3));
+		default_uamor &= ~(0x3ul << pkeyshift(3));
+	}
+
+	/*
+	 * Allow access for only key 0. And prevent any other modification.
+	 */
+	default_amr   &= ~(0x3ul << pkeyshift(0));
+	default_iamr  &= ~(0x1ul << pkeyshift(0));
+	default_uamor &= ~(0x3ul << pkeyshift(0));
+	/*
+	 * key 0 is special in that we want to consider it an allocated
+	 * key which is preallocated. We don't allow changing AMR bits
+	 * w.r.t key 0. But one can pkey_free(key0)
+	 */
+	initial_allocation_mask |= (0x1 << 0);
+
+	/*
+	 * key 1 is recommended not to be used. PowerISA(3.0) page 1015,
+	 * programming note.
+	 */
+	reserved_allocation_mask |= (0x1 << 1);
+	default_uamor &= ~(0x3ul << pkeyshift(1));
+
+	/*
+	 * Prevent the usage of OS reserved keys. Update UAMOR
+	 * for those keys. Also mark the rest of the bits in the
+	 * 32 bit mask as reserved.
+	 */
+	for (i = num_pkey; i < 32 ; i++) {
+		reserved_allocation_mask |= (0x1 << i);
+		default_uamor &= ~(0x3ul << pkeyshift(i));
+	}
+	/*
+	 * Prevent the allocation of reserved keys too.
+	 */
+	initial_allocation_mask |= reserved_allocation_mask;
+
+	pr_info("Enabling pkeys with max key count %d\n", num_pkey);
+out:
+	/*
+	 * Setup uamor on boot cpu
+	 */
+	mtspr(SPRN_UAMOR, default_uamor);
+
+	return;
+}
+
+#ifdef CONFIG_PPC_KUEP
+void setup_kuep(bool disabled)
+{
+	if (disabled)
+		return;
+	/*
+	 * On hash if PKEY feature is not enabled, disable KUAP too.
+	 */
+	if (!early_radix_enabled() && !early_mmu_has_feature(MMU_FTR_PKEY))
+		return;
+
+	if (smp_processor_id() == boot_cpuid) {
+		pr_info("Activating Kernel Userspace Execution Prevention\n");
+		cur_cpu_spec->mmu_features |= MMU_FTR_BOOK3S_KUEP;
+	}
+
+	/*
+	 * Radix always uses key0 of the IAMR to determine if an access is
+	 * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
+	 * fetch.
+	 */
+	mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
+	isync();
+}
+#endif
+
+#ifdef CONFIG_PPC_KUAP
+void setup_kuap(bool disabled)
+{
+	if (disabled)
+		return;
+	/*
+	 * On hash if PKEY feature is not enabled, disable KUAP too.
+	 */
+	if (!early_radix_enabled() && !early_mmu_has_feature(MMU_FTR_PKEY))
+		return;
+
+	if (smp_processor_id() == boot_cpuid) {
+		pr_info("Activating Kernel Userspace Access Prevention\n");
+		cur_cpu_spec->mmu_features |= MMU_FTR_KUAP;
+	}
+
+	/*
+	 * Set the default kernel AMR values on all cpus.
+	 */
+	mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
+	isync();
+}
+#endif
+
+#ifdef CONFIG_PPC_MEM_KEYS
+void pkey_mm_init(struct mm_struct *mm)
+{
+	if (!mmu_has_feature(MMU_FTR_PKEY))
+		return;
+	mm_pkey_allocation_map(mm) = initial_allocation_mask;
+	mm->context.execute_only_pkey = execute_only_key;
+}
+
+static inline void init_amr(int pkey, u8 init_bits)
+{
+	u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey));
+	u64 old_amr = current_thread_amr() & ~((u64)(0x3ul) << pkeyshift(pkey));
+
+	current->thread.regs->amr = old_amr | new_amr_bits;
+}
+
+static inline void init_iamr(int pkey, u8 init_bits)
+{
+	u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey));
+	u64 old_iamr = current_thread_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey));
+
+	if (!likely(pkey_execute_disable_supported))
+		return;
+
+	current->thread.regs->iamr = old_iamr | new_iamr_bits;
+}
+
+/*
+ * Set the access rights in AMR IAMR and UAMOR registers for @pkey to that
+ * specified in @init_val.
+ */
+int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+				unsigned long init_val)
+{
+	u64 new_amr_bits = 0x0ul;
+	u64 new_iamr_bits = 0x0ul;
+	u64 pkey_bits, uamor_pkey_bits;
+
+	/*
+	 * Check whether the key is disabled by UAMOR.
+	 */
+	pkey_bits = 0x3ul << pkeyshift(pkey);
+	uamor_pkey_bits = (default_uamor & pkey_bits);
+
+	/*
+	 * Both the bits in UAMOR corresponding to the key should be set
+	 */
+	if (uamor_pkey_bits != pkey_bits)
+		return -EINVAL;
+
+	if (init_val & PKEY_DISABLE_EXECUTE) {
+		if (!pkey_execute_disable_supported)
+			return -EINVAL;
+		new_iamr_bits |= IAMR_EX_BIT;
+	}
+	init_iamr(pkey, new_iamr_bits);
+
+	/* Set the bits we need in AMR: */
+	if (init_val & PKEY_DISABLE_ACCESS)
+		new_amr_bits |= AMR_RD_BIT | AMR_WR_BIT;
+	else if (init_val & PKEY_DISABLE_WRITE)
+		new_amr_bits |= AMR_WR_BIT;
+
+	init_amr(pkey, new_amr_bits);
+	return 0;
+}
+
+int execute_only_pkey(struct mm_struct *mm)
+{
+	return mm->context.execute_only_pkey;
+}
+
+static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
+{
+	/* Do this check first since the vm_flags should be hot */
+	if ((vma->vm_flags & VM_ACCESS_FLAGS) != VM_EXEC)
+		return false;
+
+	return (vma_pkey(vma) == vma->vm_mm->context.execute_only_pkey);
+}
+
+/*
+ * This should only be called for *plain* mprotect calls.
+ */
+int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot,
+				  int pkey)
+{
+	/*
+	 * If the currently associated pkey is execute-only, but the requested
+	 * protection is not execute-only, move it back to the default pkey.
+	 */
+	if (vma_is_pkey_exec_only(vma) && (prot != PROT_EXEC))
+		return 0;
+
+	/*
+	 * The requested protection is execute-only. Hence let's use an
+	 * execute-only pkey.
+	 */
+	if (prot == PROT_EXEC) {
+		pkey = execute_only_pkey(vma->vm_mm);
+		if (pkey > 0)
+			return pkey;
+	}
+
+	/* Nothing to override. */
+	return vma_pkey(vma);
+}
+
+static bool pkey_access_permitted(int pkey, bool write, bool execute)
+{
+	int pkey_shift;
+	u64 amr;
+
+	pkey_shift = pkeyshift(pkey);
+	if (execute)
+		return !(current_thread_iamr() & (IAMR_EX_BIT << pkey_shift));
+
+	amr = current_thread_amr();
+	if (write)
+		return !(amr & (AMR_WR_BIT << pkey_shift));
+
+	return !(amr & (AMR_RD_BIT << pkey_shift));
+}
+
+bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
+{
+	if (!mmu_has_feature(MMU_FTR_PKEY))
+		return true;
+
+	return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute);
+}
+
+/*
+ * We only want to enforce protection keys on the current thread because we
+ * effectively have no access to AMR/IAMR for other threads or any way to tell
+ * which AMR/IAMR in a threaded process we could use.
+ *
+ * So do not enforce things if the VMA is not from the current mm, or if we are
+ * in a kernel thread.
+ */
+bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
+			       bool execute, bool foreign)
+{
+	if (!mmu_has_feature(MMU_FTR_PKEY))
+		return true;
+	/*
+	 * Do not enforce our key-permissions on a foreign vma.
+	 */
+	if (foreign || vma_is_foreign(vma))
+		return true;
+
+	return pkey_access_permitted(vma_pkey(vma), write, execute);
+}
+
+void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm)
+{
+	if (!mmu_has_feature(MMU_FTR_PKEY))
+		return;
+
+	/* Duplicate the oldmm pkey state in mm: */
+	mm_pkey_allocation_map(mm) = mm_pkey_allocation_map(oldmm);
+	mm->context.execute_only_pkey = oldmm->context.execute_only_pkey;
+}
+
+#endif /* CONFIG_PPC_MEM_KEYS */
diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
new file mode 100644
index 0000000000..35fd2a95be
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/security.h>
+#include <asm/cacheflush.h>
+#include <asm/machdep.h>
+#include <asm/mman.h>
+#include <asm/tlb.h>
+
+void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+	int psize;
+	struct hstate *hstate = hstate_file(vma->vm_file);
+
+	psize = hstate_get_psize(hstate);
+	radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, psize);
+}
+
+void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+	int psize;
+	struct hstate *hstate = hstate_file(vma->vm_file);
+
+	psize = hstate_get_psize(hstate);
+	radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, psize);
+}
+
+void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long start,
+				   unsigned long end)
+{
+	int psize;
+	struct hstate *hstate = hstate_file(vma->vm_file);
+
+	psize = hstate_get_psize(hstate);
+	/*
+	 * Flush PWC even if we get PUD_SIZE hugetlb invalidate to keep this simpler.
+	 */
+	if (end - start >= PUD_SIZE)
+		radix__flush_tlb_pwc_range_psize(vma->vm_mm, start, end, psize);
+	else
+		radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
+	mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
+}
+
+void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep,
+					 pte_t old_pte, pte_t pte)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long psize = huge_page_size(hstate_vma(vma));
+
+	/*
+	 * POWER9 NMMU must flush the TLB after clearing the PTE before
+	 * installing a PTE with more relaxed access permissions, see
+	 * radix__ptep_set_access_flags.
+	 */
+	if (!cpu_has_feature(CPU_FTR_ARCH_31) &&
+	    is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
+	    atomic_read(&mm->context.copros) > 0)
+		radix__flush_hugetlb_page(vma, addr);
+
+	set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize);
+}
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
new file mode 100644
index 0000000000..c6a4ac766b
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -0,0 +1,1624 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Page table handling routines for radix page table.
+ *
+ * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "radix-mmu: " fmt
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/sched/mm.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/string_helpers.h>
+#include <linux/memory.h>
+
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/mmu.h>
+#include <asm/firmware.h>
+#include <asm/powernv.h>
+#include <asm/sections.h>
+#include <asm/smp.h>
+#include <asm/trace.h>
+#include <asm/uaccess.h>
+#include <asm/ultravisor.h>
+#include <asm/set_memory.h>
+
+#include <trace/events/thp.h>
+
+#include <mm/mmu_decl.h>
+
+unsigned int mmu_base_pid;
+
+static __ref void *early_alloc_pgtable(unsigned long size, int nid,
+			unsigned long region_start, unsigned long region_end)
+{
+	phys_addr_t min_addr = MEMBLOCK_LOW_LIMIT;
+	phys_addr_t max_addr = MEMBLOCK_ALLOC_ANYWHERE;
+	void *ptr;
+
+	if (region_start)
+		min_addr = region_start;
+	if (region_end)
+		max_addr = region_end;
+
+	ptr = memblock_alloc_try_nid(size, size, min_addr, max_addr, nid);
+
+	if (!ptr)
+		panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa max_addr=%pa\n",
+		      __func__, size, size, nid, &min_addr, &max_addr);
+
+	return ptr;
+}
+
+/*
+ * When allocating pud or pmd pointers, we allocate a complete page
+ * of PAGE_SIZE rather than PUD_TABLE_SIZE or PMD_TABLE_SIZE. This
+ * is to ensure that the page obtained from the memblock allocator
+ * can be completely used as page table page and can be freed
+ * correctly when the page table entries are removed.
+ */
+static int early_map_kernel_page(unsigned long ea, unsigned long pa,
+			  pgprot_t flags,
+			  unsigned int map_page_size,
+			  int nid,
+			  unsigned long region_start, unsigned long region_end)
+{
+	unsigned long pfn = pa >> PAGE_SHIFT;
+	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	pgdp = pgd_offset_k(ea);
+	p4dp = p4d_offset(pgdp, ea);
+	if (p4d_none(*p4dp)) {
+		pudp = early_alloc_pgtable(PAGE_SIZE, nid,
+					   region_start, region_end);
+		p4d_populate(&init_mm, p4dp, pudp);
+	}
+	pudp = pud_offset(p4dp, ea);
+	if (map_page_size == PUD_SIZE) {
+		ptep = (pte_t *)pudp;
+		goto set_the_pte;
+	}
+	if (pud_none(*pudp)) {
+		pmdp = early_alloc_pgtable(PAGE_SIZE, nid, region_start,
+					   region_end);
+		pud_populate(&init_mm, pudp, pmdp);
+	}
+	pmdp = pmd_offset(pudp, ea);
+	if (map_page_size == PMD_SIZE) {
+		ptep = pmdp_ptep(pmdp);
+		goto set_the_pte;
+	}
+	if (!pmd_present(*pmdp)) {
+		ptep = early_alloc_pgtable(PAGE_SIZE, nid,
+						region_start, region_end);
+		pmd_populate_kernel(&init_mm, pmdp, ptep);
+	}
+	ptep = pte_offset_kernel(pmdp, ea);
+
+set_the_pte:
+	set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
+	asm volatile("ptesync": : :"memory");
+	return 0;
+}
+
+/*
+ * nid, region_start, and region_end are hints to try to place the page
+ * table memory in the same node or region.
+ */
+static int __map_kernel_page(unsigned long ea, unsigned long pa,
+			  pgprot_t flags,
+			  unsigned int map_page_size,
+			  int nid,
+			  unsigned long region_start, unsigned long region_end)
+{
+	unsigned long pfn = pa >> PAGE_SHIFT;
+	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	/*
+	 * Make sure task size is correct as per the max adddr
+	 */
+	BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
+
+#ifdef CONFIG_PPC_64K_PAGES
+	BUILD_BUG_ON(RADIX_KERN_MAP_SIZE != (1UL << MAX_EA_BITS_PER_CONTEXT));
+#endif
+
+	if (unlikely(!slab_is_available()))
+		return early_map_kernel_page(ea, pa, flags, map_page_size,
+						nid, region_start, region_end);
+
+	/*
+	 * Should make page table allocation functions be able to take a
+	 * node, so we can place kernel page tables on the right nodes after
+	 * boot.
+	 */
+	pgdp = pgd_offset_k(ea);
+	p4dp = p4d_offset(pgdp, ea);
+	pudp = pud_alloc(&init_mm, p4dp, ea);
+	if (!pudp)
+		return -ENOMEM;
+	if (map_page_size == PUD_SIZE) {
+		ptep = (pte_t *)pudp;
+		goto set_the_pte;
+	}
+	pmdp = pmd_alloc(&init_mm, pudp, ea);
+	if (!pmdp)
+		return -ENOMEM;
+	if (map_page_size == PMD_SIZE) {
+		ptep = pmdp_ptep(pmdp);
+		goto set_the_pte;
+	}
+	ptep = pte_alloc_kernel(pmdp, ea);
+	if (!ptep)
+		return -ENOMEM;
+
+set_the_pte:
+	set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
+	asm volatile("ptesync": : :"memory");
+	return 0;
+}
+
+int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+			  pgprot_t flags,
+			  unsigned int map_page_size)
+{
+	return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0);
+}
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+static void radix__change_memory_range(unsigned long start, unsigned long end,
+				       unsigned long clear)
+{
+	unsigned long idx;
+	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	start = ALIGN_DOWN(start, PAGE_SIZE);
+	end = PAGE_ALIGN(end); // aligns up
+
+	pr_debug("Changing flags on range %lx-%lx removing 0x%lx\n",
+		 start, end, clear);
+
+	for (idx = start; idx < end; idx += PAGE_SIZE) {
+		pgdp = pgd_offset_k(idx);
+		p4dp = p4d_offset(pgdp, idx);
+		pudp = pud_alloc(&init_mm, p4dp, idx);
+		if (!pudp)
+			continue;
+		if (pud_is_leaf(*pudp)) {
+			ptep = (pte_t *)pudp;
+			goto update_the_pte;
+		}
+		pmdp = pmd_alloc(&init_mm, pudp, idx);
+		if (!pmdp)
+			continue;
+		if (pmd_is_leaf(*pmdp)) {
+			ptep = pmdp_ptep(pmdp);
+			goto update_the_pte;
+		}
+		ptep = pte_alloc_kernel(pmdp, idx);
+		if (!ptep)
+			continue;
+update_the_pte:
+		radix__pte_update(&init_mm, idx, ptep, clear, 0, 0);
+	}
+
+	radix__flush_tlb_kernel_range(start, end);
+}
+
+void radix__mark_rodata_ro(void)
+{
+	unsigned long start, end;
+
+	start = (unsigned long)_stext;
+	end = (unsigned long)__end_rodata;
+
+	radix__change_memory_range(start, end, _PAGE_WRITE);
+
+	for (start = PAGE_OFFSET; start < (unsigned long)_stext; start += PAGE_SIZE) {
+		end = start + PAGE_SIZE;
+		if (overlaps_interrupt_vector_text(start, end))
+			radix__change_memory_range(start, end, _PAGE_WRITE);
+		else
+			break;
+	}
+}
+
+void radix__mark_initmem_nx(void)
+{
+	unsigned long start = (unsigned long)__init_begin;
+	unsigned long end = (unsigned long)__init_end;
+
+	radix__change_memory_range(start, end, _PAGE_EXEC);
+}
+#endif /* CONFIG_STRICT_KERNEL_RWX */
+
+static inline void __meminit
+print_mapping(unsigned long start, unsigned long end, unsigned long size, bool exec)
+{
+	char buf[10];
+
+	if (end <= start)
+		return;
+
+	string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf));
+
+	pr_info("Mapped 0x%016lx-0x%016lx with %s pages%s\n", start, end, buf,
+		exec ? " (exec)" : "");
+}
+
+static unsigned long next_boundary(unsigned long addr, unsigned long end)
+{
+#ifdef CONFIG_STRICT_KERNEL_RWX
+	unsigned long stext_phys;
+
+	stext_phys = __pa_symbol(_stext);
+
+	// Relocatable kernel running at non-zero real address
+	if (stext_phys != 0) {
+		// The end of interrupts code at zero is a rodata boundary
+		unsigned long end_intr = __pa_symbol(__end_interrupts) - stext_phys;
+		if (addr < end_intr)
+			return end_intr;
+
+		// Start of relocated kernel text is a rodata boundary
+		if (addr < stext_phys)
+			return stext_phys;
+	}
+
+	if (addr < __pa_symbol(__srwx_boundary))
+		return __pa_symbol(__srwx_boundary);
+#endif
+	return end;
+}
+
+static int __meminit create_physical_mapping(unsigned long start,
+					     unsigned long end,
+					     int nid, pgprot_t _prot)
+{
+	unsigned long vaddr, addr, mapping_size = 0;
+	bool prev_exec, exec = false;
+	pgprot_t prot;
+	int psize;
+	unsigned long max_mapping_size = memory_block_size;
+
+	if (debug_pagealloc_enabled_or_kfence())
+		max_mapping_size = PAGE_SIZE;
+
+	start = ALIGN(start, PAGE_SIZE);
+	end   = ALIGN_DOWN(end, PAGE_SIZE);
+	for (addr = start; addr < end; addr += mapping_size) {
+		unsigned long gap, previous_size;
+		int rc;
+
+		gap = next_boundary(addr, end) - addr;
+		if (gap > max_mapping_size)
+			gap = max_mapping_size;
+		previous_size = mapping_size;
+		prev_exec = exec;
+
+		if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
+		    mmu_psize_defs[MMU_PAGE_1G].shift) {
+			mapping_size = PUD_SIZE;
+			psize = MMU_PAGE_1G;
+		} else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
+			   mmu_psize_defs[MMU_PAGE_2M].shift) {
+			mapping_size = PMD_SIZE;
+			psize = MMU_PAGE_2M;
+		} else {
+			mapping_size = PAGE_SIZE;
+			psize = mmu_virtual_psize;
+		}
+
+		vaddr = (unsigned long)__va(addr);
+
+		if (overlaps_kernel_text(vaddr, vaddr + mapping_size) ||
+		    overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size)) {
+			prot = PAGE_KERNEL_X;
+			exec = true;
+		} else {
+			prot = _prot;
+			exec = false;
+		}
+
+		if (mapping_size != previous_size || exec != prev_exec) {
+			print_mapping(start, addr, previous_size, prev_exec);
+			start = addr;
+		}
+
+		rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
+		if (rc)
+			return rc;
+
+		update_page_count(psize, 1);
+	}
+
+	print_mapping(start, addr, mapping_size, exec);
+	return 0;
+}
+
+static void __init radix_init_pgtable(void)
+{
+	unsigned long rts_field;
+	phys_addr_t start, end;
+	u64 i;
+
+	/* We don't support slb for radix */
+	slb_set_size(0);
+
+	/*
+	 * Create the linear mapping
+	 */
+	for_each_mem_range(i, &start, &end) {
+		/*
+		 * The memblock allocator  is up at this point, so the
+		 * page tables will be allocated within the range. No
+		 * need or a node (which we don't have yet).
+		 */
+
+		if (end >= RADIX_VMALLOC_START) {
+			pr_warn("Outside the supported range\n");
+			continue;
+		}
+
+		WARN_ON(create_physical_mapping(start, end,
+						-1, PAGE_KERNEL));
+	}
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE) &&
+			cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
+		/*
+		 * Older versions of KVM on these machines prefer if the
+		 * guest only uses the low 19 PID bits.
+		 */
+		mmu_pid_bits = 19;
+	}
+	mmu_base_pid = 1;
+
+	/*
+	 * Allocate Partition table and process table for the
+	 * host.
+	 */
+	BUG_ON(PRTB_SIZE_SHIFT > 36);
+	process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0);
+	/*
+	 * Fill in the process table.
+	 */
+	rts_field = radix__get_tree_size();
+	process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
+
+	/*
+	 * The init_mm context is given the first available (non-zero) PID,
+	 * which is the "guard PID" and contains no page table. PIDR should
+	 * never be set to zero because that duplicates the kernel address
+	 * space at the 0x0... offset (quadrant 0)!
+	 *
+	 * An arbitrary PID that may later be allocated by the PID allocator
+	 * for userspace processes must not be used either, because that
+	 * would cause stale user mappings for that PID on CPUs outside of
+	 * the TLB invalidation scheme (because it won't be in mm_cpumask).
+	 *
+	 * So permanently carve out one PID for the purpose of a guard PID.
+	 */
+	init_mm.context.id = mmu_base_pid;
+	mmu_base_pid++;
+}
+
+static void __init radix_init_partition_table(void)
+{
+	unsigned long rts_field, dw0, dw1;
+
+	mmu_partition_table_init();
+	rts_field = radix__get_tree_size();
+	dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
+	dw1 = __pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR;
+	mmu_partition_table_set_entry(0, dw0, dw1, false);
+
+	pr_info("Initializing Radix MMU\n");
+}
+
+static int __init get_idx_from_shift(unsigned int shift)
+{
+	int idx = -1;
+
+	switch (shift) {
+	case 0xc:
+		idx = MMU_PAGE_4K;
+		break;
+	case 0x10:
+		idx = MMU_PAGE_64K;
+		break;
+	case 0x15:
+		idx = MMU_PAGE_2M;
+		break;
+	case 0x1e:
+		idx = MMU_PAGE_1G;
+		break;
+	}
+	return idx;
+}
+
+static int __init radix_dt_scan_page_sizes(unsigned long node,
+					   const char *uname, int depth,
+					   void *data)
+{
+	int size = 0;
+	int shift, idx;
+	unsigned int ap;
+	const __be32 *prop;
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+
+	/* We are scanning "cpu" nodes only */
+	if (type == NULL || strcmp(type, "cpu") != 0)
+		return 0;
+
+	/* Grab page size encodings */
+	prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
+	if (!prop)
+		return 0;
+
+	pr_info("Page sizes from device-tree:\n");
+	for (; size >= 4; size -= 4, ++prop) {
+
+		struct mmu_psize_def *def;
+
+		/* top 3 bit is AP encoding */
+		shift = be32_to_cpu(prop[0]) & ~(0xe << 28);
+		ap = be32_to_cpu(prop[0]) >> 29;
+		pr_info("Page size shift = %d AP=0x%x\n", shift, ap);
+
+		idx = get_idx_from_shift(shift);
+		if (idx < 0)
+			continue;
+
+		def = &mmu_psize_defs[idx];
+		def->shift = shift;
+		def->ap  = ap;
+		def->h_rpt_pgsize = psize_to_rpti_pgsize(idx);
+	}
+
+	/* needed ? */
+	cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
+	return 1;
+}
+
+void __init radix__early_init_devtree(void)
+{
+	int rc;
+
+	/*
+	 * Try to find the available page sizes in the device-tree
+	 */
+	rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL);
+	if (!rc) {
+		/*
+		 * No page size details found in device tree.
+		 * Let's assume we have page 4k and 64k support
+		 */
+		mmu_psize_defs[MMU_PAGE_4K].shift = 12;
+		mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
+		mmu_psize_defs[MMU_PAGE_4K].h_rpt_pgsize =
+			psize_to_rpti_pgsize(MMU_PAGE_4K);
+
+		mmu_psize_defs[MMU_PAGE_64K].shift = 16;
+		mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
+		mmu_psize_defs[MMU_PAGE_64K].h_rpt_pgsize =
+			psize_to_rpti_pgsize(MMU_PAGE_64K);
+	}
+	return;
+}
+
+void __init radix__early_init_mmu(void)
+{
+	unsigned long lpcr;
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+#ifdef CONFIG_PPC_64K_PAGES
+	/* PAGE_SIZE mappings */
+	mmu_virtual_psize = MMU_PAGE_64K;
+#else
+	mmu_virtual_psize = MMU_PAGE_4K;
+#endif
+#endif
+	/*
+	 * initialize page table size
+	 */
+	__pte_index_size = RADIX_PTE_INDEX_SIZE;
+	__pmd_index_size = RADIX_PMD_INDEX_SIZE;
+	__pud_index_size = RADIX_PUD_INDEX_SIZE;
+	__pgd_index_size = RADIX_PGD_INDEX_SIZE;
+	__pud_cache_index = RADIX_PUD_INDEX_SIZE;
+	__pte_table_size = RADIX_PTE_TABLE_SIZE;
+	__pmd_table_size = RADIX_PMD_TABLE_SIZE;
+	__pud_table_size = RADIX_PUD_TABLE_SIZE;
+	__pgd_table_size = RADIX_PGD_TABLE_SIZE;
+
+	__pmd_val_bits = RADIX_PMD_VAL_BITS;
+	__pud_val_bits = RADIX_PUD_VAL_BITS;
+	__pgd_val_bits = RADIX_PGD_VAL_BITS;
+
+	__kernel_virt_start = RADIX_KERN_VIRT_START;
+	__vmalloc_start = RADIX_VMALLOC_START;
+	__vmalloc_end = RADIX_VMALLOC_END;
+	__kernel_io_start = RADIX_KERN_IO_START;
+	__kernel_io_end = RADIX_KERN_IO_END;
+	vmemmap = (struct page *)RADIX_VMEMMAP_START;
+	ioremap_bot = IOREMAP_BASE;
+
+#ifdef CONFIG_PCI
+	pci_io_base = ISA_IO_BASE;
+#endif
+	__pte_frag_nr = RADIX_PTE_FRAG_NR;
+	__pte_frag_size_shift = RADIX_PTE_FRAG_SIZE_SHIFT;
+	__pmd_frag_nr = RADIX_PMD_FRAG_NR;
+	__pmd_frag_size_shift = RADIX_PMD_FRAG_SIZE_SHIFT;
+
+	radix_init_pgtable();
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+		lpcr = mfspr(SPRN_LPCR);
+		mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
+		radix_init_partition_table();
+	} else {
+		radix_init_pseries();
+	}
+
+	memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
+
+	/* Switch to the guard PID before turning on MMU */
+	radix__switch_mmu_context(NULL, &init_mm);
+	tlbiel_all();
+}
+
+void radix__early_init_mmu_secondary(void)
+{
+	unsigned long lpcr;
+	/*
+	 * update partition table control register and UPRT
+	 */
+	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+		lpcr = mfspr(SPRN_LPCR);
+		mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
+
+		set_ptcr_when_no_uv(__pa(partition_tb) |
+				    (PATB_SIZE_SHIFT - 12));
+	}
+
+	radix__switch_mmu_context(NULL, &init_mm);
+	tlbiel_all();
+
+	/* Make sure userspace can't change the AMR */
+	mtspr(SPRN_UAMOR, 0);
+}
+
+/* Called during kexec sequence with MMU off */
+notrace void radix__mmu_cleanup_all(void)
+{
+	unsigned long lpcr;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+		lpcr = mfspr(SPRN_LPCR);
+		mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
+		set_ptcr_when_no_uv(0);
+		powernv_set_nmmu_ptcr(0);
+		radix__flush_tlb_all();
+	}
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static void free_pte_table(pte_t *pte_start, pmd_t *pmd)
+{
+	pte_t *pte;
+	int i;
+
+	for (i = 0; i < PTRS_PER_PTE; i++) {
+		pte = pte_start + i;
+		if (!pte_none(*pte))
+			return;
+	}
+
+	pte_free_kernel(&init_mm, pte_start);
+	pmd_clear(pmd);
+}
+
+static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
+{
+	pmd_t *pmd;
+	int i;
+
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		pmd = pmd_start + i;
+		if (!pmd_none(*pmd))
+			return;
+	}
+
+	pmd_free(&init_mm, pmd_start);
+	pud_clear(pud);
+}
+
+static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
+{
+	pud_t *pud;
+	int i;
+
+	for (i = 0; i < PTRS_PER_PUD; i++) {
+		pud = pud_start + i;
+		if (!pud_none(*pud))
+			return;
+	}
+
+	pud_free(&init_mm, pud_start);
+	p4d_clear(p4d);
+}
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+static bool __meminit vmemmap_pmd_is_unused(unsigned long addr, unsigned long end)
+{
+	unsigned long start = ALIGN_DOWN(addr, PMD_SIZE);
+
+	return !vmemmap_populated(start, PMD_SIZE);
+}
+
+static bool __meminit vmemmap_page_is_unused(unsigned long addr, unsigned long end)
+{
+	unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
+
+	return !vmemmap_populated(start, PAGE_SIZE);
+
+}
+#endif
+
+static void __meminit free_vmemmap_pages(struct page *page,
+					 struct vmem_altmap *altmap,
+					 int order)
+{
+	unsigned int nr_pages = 1 << order;
+
+	if (altmap) {
+		unsigned long alt_start, alt_end;
+		unsigned long base_pfn = page_to_pfn(page);
+
+		/*
+		 * with 2M vmemmap mmaping we can have things setup
+		 * such that even though atlmap is specified we never
+		 * used altmap.
+		 */
+		alt_start = altmap->base_pfn;
+		alt_end = altmap->base_pfn + altmap->reserve + altmap->free;
+
+		if (base_pfn >= alt_start && base_pfn < alt_end) {
+			vmem_altmap_free(altmap, nr_pages);
+			return;
+		}
+	}
+
+	if (PageReserved(page)) {
+		/* allocated from memblock */
+		while (nr_pages--)
+			free_reserved_page(page++);
+	} else
+		free_pages((unsigned long)page_address(page), order);
+}
+
+static void __meminit remove_pte_table(pte_t *pte_start, unsigned long addr,
+				       unsigned long end, bool direct,
+				       struct vmem_altmap *altmap)
+{
+	unsigned long next, pages = 0;
+	pte_t *pte;
+
+	pte = pte_start + pte_index(addr);
+	for (; addr < end; addr = next, pte++) {
+		next = (addr + PAGE_SIZE) & PAGE_MASK;
+		if (next > end)
+			next = end;
+
+		if (!pte_present(*pte))
+			continue;
+
+		if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
+			if (!direct)
+				free_vmemmap_pages(pte_page(*pte), altmap, 0);
+			pte_clear(&init_mm, addr, pte);
+			pages++;
+		}
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+		else if (!direct && vmemmap_page_is_unused(addr, next)) {
+			free_vmemmap_pages(pte_page(*pte), altmap, 0);
+			pte_clear(&init_mm, addr, pte);
+		}
+#endif
+	}
+	if (direct)
+		update_page_count(mmu_virtual_psize, -pages);
+}
+
+static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
+				       unsigned long end, bool direct,
+				       struct vmem_altmap *altmap)
+{
+	unsigned long next, pages = 0;
+	pte_t *pte_base;
+	pmd_t *pmd;
+
+	pmd = pmd_start + pmd_index(addr);
+	for (; addr < end; addr = next, pmd++) {
+		next = pmd_addr_end(addr, end);
+
+		if (!pmd_present(*pmd))
+			continue;
+
+		if (pmd_is_leaf(*pmd)) {
+			if (IS_ALIGNED(addr, PMD_SIZE) &&
+			    IS_ALIGNED(next, PMD_SIZE)) {
+				if (!direct)
+					free_vmemmap_pages(pmd_page(*pmd), altmap, get_order(PMD_SIZE));
+				pte_clear(&init_mm, addr, (pte_t *)pmd);
+				pages++;
+			}
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+			else if (!direct && vmemmap_pmd_is_unused(addr, next)) {
+				free_vmemmap_pages(pmd_page(*pmd), altmap, get_order(PMD_SIZE));
+				pte_clear(&init_mm, addr, (pte_t *)pmd);
+			}
+#endif
+			continue;
+		}
+
+		pte_base = (pte_t *)pmd_page_vaddr(*pmd);
+		remove_pte_table(pte_base, addr, next, direct, altmap);
+		free_pte_table(pte_base, pmd);
+	}
+	if (direct)
+		update_page_count(MMU_PAGE_2M, -pages);
+}
+
+static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
+				       unsigned long end, bool direct,
+				       struct vmem_altmap *altmap)
+{
+	unsigned long next, pages = 0;
+	pmd_t *pmd_base;
+	pud_t *pud;
+
+	pud = pud_start + pud_index(addr);
+	for (; addr < end; addr = next, pud++) {
+		next = pud_addr_end(addr, end);
+
+		if (!pud_present(*pud))
+			continue;
+
+		if (pud_is_leaf(*pud)) {
+			if (!IS_ALIGNED(addr, PUD_SIZE) ||
+			    !IS_ALIGNED(next, PUD_SIZE)) {
+				WARN_ONCE(1, "%s: unaligned range\n", __func__);
+				continue;
+			}
+			pte_clear(&init_mm, addr, (pte_t *)pud);
+			pages++;
+			continue;
+		}
+
+		pmd_base = pud_pgtable(*pud);
+		remove_pmd_table(pmd_base, addr, next, direct, altmap);
+		free_pmd_table(pmd_base, pud);
+	}
+	if (direct)
+		update_page_count(MMU_PAGE_1G, -pages);
+}
+
+static void __meminit
+remove_pagetable(unsigned long start, unsigned long end, bool direct,
+		 struct vmem_altmap *altmap)
+{
+	unsigned long addr, next;
+	pud_t *pud_base;
+	pgd_t *pgd;
+	p4d_t *p4d;
+
+	spin_lock(&init_mm.page_table_lock);
+
+	for (addr = start; addr < end; addr = next) {
+		next = pgd_addr_end(addr, end);
+
+		pgd = pgd_offset_k(addr);
+		p4d = p4d_offset(pgd, addr);
+		if (!p4d_present(*p4d))
+			continue;
+
+		if (p4d_is_leaf(*p4d)) {
+			if (!IS_ALIGNED(addr, P4D_SIZE) ||
+			    !IS_ALIGNED(next, P4D_SIZE)) {
+				WARN_ONCE(1, "%s: unaligned range\n", __func__);
+				continue;
+			}
+
+			pte_clear(&init_mm, addr, (pte_t *)pgd);
+			continue;
+		}
+
+		pud_base = p4d_pgtable(*p4d);
+		remove_pud_table(pud_base, addr, next, direct, altmap);
+		free_pud_table(pud_base, p4d);
+	}
+
+	spin_unlock(&init_mm.page_table_lock);
+	radix__flush_tlb_kernel_range(start, end);
+}
+
+int __meminit radix__create_section_mapping(unsigned long start,
+					    unsigned long end, int nid,
+					    pgprot_t prot)
+{
+	if (end >= RADIX_VMALLOC_START) {
+		pr_warn("Outside the supported range\n");
+		return -1;
+	}
+
+	return create_physical_mapping(__pa(start), __pa(end),
+				       nid, prot);
+}
+
+int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
+{
+	remove_pagetable(start, end, true, NULL);
+	return 0;
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+static int __map_kernel_page_nid(unsigned long ea, unsigned long pa,
+				 pgprot_t flags, unsigned int map_page_size,
+				 int nid)
+{
+	return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0);
+}
+
+int __meminit radix__vmemmap_create_mapping(unsigned long start,
+				      unsigned long page_size,
+				      unsigned long phys)
+{
+	/* Create a PTE encoding */
+	int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
+	int ret;
+
+	if ((start + page_size) >= RADIX_VMEMMAP_END) {
+		pr_warn("Outside the supported range\n");
+		return -1;
+	}
+
+	ret = __map_kernel_page_nid(start, phys, PAGE_KERNEL, page_size, nid);
+	BUG_ON(ret);
+
+	return 0;
+}
+
+
+bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap)
+{
+	if (radix_enabled())
+		return __vmemmap_can_optimize(altmap, pgmap);
+
+	return false;
+}
+
+int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node,
+				unsigned long addr, unsigned long next)
+{
+	int large = pmd_large(*pmdp);
+
+	if (large)
+		vmemmap_verify(pmdp_ptep(pmdp), node, addr, next);
+
+	return large;
+}
+
+void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node,
+			       unsigned long addr, unsigned long next)
+{
+	pte_t entry;
+	pte_t *ptep = pmdp_ptep(pmdp);
+
+	VM_BUG_ON(!IS_ALIGNED(addr, PMD_SIZE));
+	entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
+	set_pte_at(&init_mm, addr, ptep, entry);
+	asm volatile("ptesync": : :"memory");
+
+	vmemmap_verify(ptep, node, addr, next);
+}
+
+static pte_t * __meminit radix__vmemmap_pte_populate(pmd_t *pmdp, unsigned long addr,
+						     int node,
+						     struct vmem_altmap *altmap,
+						     struct page *reuse)
+{
+	pte_t *pte = pte_offset_kernel(pmdp, addr);
+
+	if (pte_none(*pte)) {
+		pte_t entry;
+		void *p;
+
+		if (!reuse) {
+			/*
+			 * make sure we don't create altmap mappings
+			 * covering things outside the device.
+			 */
+			if (altmap && altmap_cross_boundary(altmap, addr, PAGE_SIZE))
+				altmap = NULL;
+
+			p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
+			if (!p && altmap)
+				p = vmemmap_alloc_block_buf(PAGE_SIZE, node, NULL);
+			if (!p)
+				return NULL;
+			pr_debug("PAGE_SIZE vmemmap mapping\n");
+		} else {
+			/*
+			 * When a PTE/PMD entry is freed from the init_mm
+			 * there's a free_pages() call to this page allocated
+			 * above. Thus this get_page() is paired with the
+			 * put_page_testzero() on the freeing path.
+			 * This can only called by certain ZONE_DEVICE path,
+			 * and through vmemmap_populate_compound_pages() when
+			 * slab is available.
+			 */
+			get_page(reuse);
+			p = page_to_virt(reuse);
+			pr_debug("Tail page reuse vmemmap mapping\n");
+		}
+
+		VM_BUG_ON(!PAGE_ALIGNED(addr));
+		entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
+		set_pte_at(&init_mm, addr, pte, entry);
+		asm volatile("ptesync": : :"memory");
+	}
+	return pte;
+}
+
+static inline pud_t *vmemmap_pud_alloc(p4d_t *p4dp, int node,
+				       unsigned long address)
+{
+	pud_t *pud;
+
+	/* All early vmemmap mapping to keep simple do it at PAGE_SIZE */
+	if (unlikely(p4d_none(*p4dp))) {
+		if (unlikely(!slab_is_available())) {
+			pud = early_alloc_pgtable(PAGE_SIZE, node, 0, 0);
+			p4d_populate(&init_mm, p4dp, pud);
+			/* go to the pud_offset */
+		} else
+			return pud_alloc(&init_mm, p4dp, address);
+	}
+	return pud_offset(p4dp, address);
+}
+
+static inline pmd_t *vmemmap_pmd_alloc(pud_t *pudp, int node,
+				       unsigned long address)
+{
+	pmd_t *pmd;
+
+	/* All early vmemmap mapping to keep simple do it at PAGE_SIZE */
+	if (unlikely(pud_none(*pudp))) {
+		if (unlikely(!slab_is_available())) {
+			pmd = early_alloc_pgtable(PAGE_SIZE, node, 0, 0);
+			pud_populate(&init_mm, pudp, pmd);
+		} else
+			return pmd_alloc(&init_mm, pudp, address);
+	}
+	return pmd_offset(pudp, address);
+}
+
+static inline pte_t *vmemmap_pte_alloc(pmd_t *pmdp, int node,
+				       unsigned long address)
+{
+	pte_t *pte;
+
+	/* All early vmemmap mapping to keep simple do it at PAGE_SIZE */
+	if (unlikely(pmd_none(*pmdp))) {
+		if (unlikely(!slab_is_available())) {
+			pte = early_alloc_pgtable(PAGE_SIZE, node, 0, 0);
+			pmd_populate(&init_mm, pmdp, pte);
+		} else
+			return pte_alloc_kernel(pmdp, address);
+	}
+	return pte_offset_kernel(pmdp, address);
+}
+
+
+
+int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, int node,
+				      struct vmem_altmap *altmap)
+{
+	unsigned long addr;
+	unsigned long next;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	for (addr = start; addr < end; addr = next) {
+		next = pmd_addr_end(addr, end);
+
+		pgd = pgd_offset_k(addr);
+		p4d = p4d_offset(pgd, addr);
+		pud = vmemmap_pud_alloc(p4d, node, addr);
+		if (!pud)
+			return -ENOMEM;
+		pmd = vmemmap_pmd_alloc(pud, node, addr);
+		if (!pmd)
+			return -ENOMEM;
+
+		if (pmd_none(READ_ONCE(*pmd))) {
+			void *p;
+
+			/*
+			 * keep it simple by checking addr PMD_SIZE alignment
+			 * and verifying the device boundary condition.
+			 * For us to use a pmd mapping, both addr and pfn should
+			 * be aligned. We skip if addr is not aligned and for
+			 * pfn we hope we have extra area in the altmap that
+			 * can help to find an aligned block. This can result
+			 * in altmap block allocation failures, in which case
+			 * we fallback to RAM for vmemmap allocation.
+			 */
+			if (altmap && (!IS_ALIGNED(addr, PMD_SIZE) ||
+				       altmap_cross_boundary(altmap, addr, PMD_SIZE))) {
+				/*
+				 * make sure we don't create altmap mappings
+				 * covering things outside the device.
+				 */
+				goto base_mapping;
+			}
+
+			p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
+			if (p) {
+				vmemmap_set_pmd(pmd, p, node, addr, next);
+				pr_debug("PMD_SIZE vmemmap mapping\n");
+				continue;
+			} else if (altmap) {
+				/*
+				 * A vmemmap block allocation can fail due to
+				 * alignment requirements and we trying to align
+				 * things aggressively there by running out of
+				 * space. Try base mapping on failure.
+				 */
+				goto base_mapping;
+			}
+		} else if (vmemmap_check_pmd(pmd, node, addr, next)) {
+			/*
+			 * If a huge mapping exist due to early call to
+			 * vmemmap_populate, let's try to use that.
+			 */
+			continue;
+		}
+base_mapping:
+		/*
+		 * Not able allocate higher order memory to back memmap
+		 * or we found a pointer to pte page. Allocate base page
+		 * size vmemmap
+		 */
+		pte = vmemmap_pte_alloc(pmd, node, addr);
+		if (!pte)
+			return -ENOMEM;
+
+		pte = radix__vmemmap_pte_populate(pmd, addr, node, altmap, NULL);
+		if (!pte)
+			return -ENOMEM;
+
+		vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+		next = addr + PAGE_SIZE;
+	}
+	return 0;
+}
+
+static pte_t * __meminit radix__vmemmap_populate_address(unsigned long addr, int node,
+							 struct vmem_altmap *altmap,
+							 struct page *reuse)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_offset_k(addr);
+	p4d = p4d_offset(pgd, addr);
+	pud = vmemmap_pud_alloc(p4d, node, addr);
+	if (!pud)
+		return NULL;
+	pmd = vmemmap_pmd_alloc(pud, node, addr);
+	if (!pmd)
+		return NULL;
+	if (pmd_leaf(*pmd))
+		/*
+		 * The second page is mapped as a hugepage due to a nearby request.
+		 * Force our mapping to page size without deduplication
+		 */
+		return NULL;
+	pte = vmemmap_pte_alloc(pmd, node, addr);
+	if (!pte)
+		return NULL;
+	radix__vmemmap_pte_populate(pmd, addr, node, NULL, NULL);
+	vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+
+	return pte;
+}
+
+static pte_t * __meminit vmemmap_compound_tail_page(unsigned long addr,
+						    unsigned long pfn_offset, int node)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long map_addr;
+
+	/* the second vmemmap page which we use for duplication */
+	map_addr = addr - pfn_offset * sizeof(struct page) + PAGE_SIZE;
+	pgd = pgd_offset_k(map_addr);
+	p4d = p4d_offset(pgd, map_addr);
+	pud = vmemmap_pud_alloc(p4d, node, map_addr);
+	if (!pud)
+		return NULL;
+	pmd = vmemmap_pmd_alloc(pud, node, map_addr);
+	if (!pmd)
+		return NULL;
+	if (pmd_leaf(*pmd))
+		/*
+		 * The second page is mapped as a hugepage due to a nearby request.
+		 * Force our mapping to page size without deduplication
+		 */
+		return NULL;
+	pte = vmemmap_pte_alloc(pmd, node, map_addr);
+	if (!pte)
+		return NULL;
+	/*
+	 * Check if there exist a mapping to the left
+	 */
+	if (pte_none(*pte)) {
+		/*
+		 * Populate the head page vmemmap page.
+		 * It can fall in different pmd, hence
+		 * vmemmap_populate_address()
+		 */
+		pte = radix__vmemmap_populate_address(map_addr - PAGE_SIZE, node, NULL, NULL);
+		if (!pte)
+			return NULL;
+		/*
+		 * Populate the tail pages vmemmap page
+		 */
+		pte = radix__vmemmap_pte_populate(pmd, map_addr, node, NULL, NULL);
+		if (!pte)
+			return NULL;
+		vmemmap_verify(pte, node, map_addr, map_addr + PAGE_SIZE);
+		return pte;
+	}
+	return pte;
+}
+
+int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
+					      unsigned long start,
+					      unsigned long end, int node,
+					      struct dev_pagemap *pgmap)
+{
+	/*
+	 * we want to map things as base page size mapping so that
+	 * we can save space in vmemmap. We could have huge mapping
+	 * covering out both edges.
+	 */
+	unsigned long addr;
+	unsigned long addr_pfn = start_pfn;
+	unsigned long next;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	for (addr = start; addr < end; addr = next) {
+
+		pgd = pgd_offset_k(addr);
+		p4d = p4d_offset(pgd, addr);
+		pud = vmemmap_pud_alloc(p4d, node, addr);
+		if (!pud)
+			return -ENOMEM;
+		pmd = vmemmap_pmd_alloc(pud, node, addr);
+		if (!pmd)
+			return -ENOMEM;
+
+		if (pmd_leaf(READ_ONCE(*pmd))) {
+			/* existing huge mapping. Skip the range */
+			addr_pfn += (PMD_SIZE >> PAGE_SHIFT);
+			next = pmd_addr_end(addr, end);
+			continue;
+		}
+		pte = vmemmap_pte_alloc(pmd, node, addr);
+		if (!pte)
+			return -ENOMEM;
+		if (!pte_none(*pte)) {
+			/*
+			 * This could be because we already have a compound
+			 * page whose VMEMMAP_RESERVE_NR pages were mapped and
+			 * this request fall in those pages.
+			 */
+			addr_pfn += 1;
+			next = addr + PAGE_SIZE;
+			continue;
+		} else {
+			unsigned long nr_pages = pgmap_vmemmap_nr(pgmap);
+			unsigned long pfn_offset = addr_pfn - ALIGN_DOWN(addr_pfn, nr_pages);
+			pte_t *tail_page_pte;
+
+			/*
+			 * if the address is aligned to huge page size it is the
+			 * head mapping.
+			 */
+			if (pfn_offset == 0) {
+				/* Populate the head page vmemmap page */
+				pte = radix__vmemmap_pte_populate(pmd, addr, node, NULL, NULL);
+				if (!pte)
+					return -ENOMEM;
+				vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+
+				/*
+				 * Populate the tail pages vmemmap page
+				 * It can fall in different pmd, hence
+				 * vmemmap_populate_address()
+				 */
+				pte = radix__vmemmap_populate_address(addr + PAGE_SIZE, node, NULL, NULL);
+				if (!pte)
+					return -ENOMEM;
+
+				addr_pfn += 2;
+				next = addr + 2 * PAGE_SIZE;
+				continue;
+			}
+			/*
+			 * get the 2nd mapping details
+			 * Also create it if that doesn't exist
+			 */
+			tail_page_pte = vmemmap_compound_tail_page(addr, pfn_offset, node);
+			if (!tail_page_pte) {
+
+				pte = radix__vmemmap_pte_populate(pmd, addr, node, NULL, NULL);
+				if (!pte)
+					return -ENOMEM;
+				vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+
+				addr_pfn += 1;
+				next = addr + PAGE_SIZE;
+				continue;
+			}
+
+			pte = radix__vmemmap_pte_populate(pmd, addr, node, NULL, pte_page(*tail_page_pte));
+			if (!pte)
+				return -ENOMEM;
+			vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+
+			addr_pfn += 1;
+			next = addr + PAGE_SIZE;
+			continue;
+		}
+	}
+	return 0;
+}
+
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
+{
+	remove_pagetable(start, start + page_size, true, NULL);
+}
+
+void __ref radix__vmemmap_free(unsigned long start, unsigned long end,
+			       struct vmem_altmap *altmap)
+{
+	remove_pagetable(start, end, false, altmap);
+}
+#endif
+#endif
+
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
+void radix__kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	unsigned long addr;
+
+	addr = (unsigned long)page_address(page);
+
+	if (enable)
+		set_memory_p(addr, numpages);
+	else
+		set_memory_np(addr, numpages);
+}
+#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
+				  pmd_t *pmdp, unsigned long clr,
+				  unsigned long set)
+{
+	unsigned long old;
+
+#ifdef CONFIG_DEBUG_VM
+	WARN_ON(!radix__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
+	assert_spin_locked(pmd_lockptr(mm, pmdp));
+#endif
+
+	old = radix__pte_update(mm, addr, pmdp_ptep(pmdp), clr, set, 1);
+	trace_hugepage_update_pmd(addr, old, clr, set);
+
+	return old;
+}
+
+unsigned long radix__pud_hugepage_update(struct mm_struct *mm, unsigned long addr,
+					 pud_t *pudp, unsigned long clr,
+					 unsigned long set)
+{
+	unsigned long old;
+
+#ifdef CONFIG_DEBUG_VM
+	WARN_ON(!pud_devmap(*pudp));
+	assert_spin_locked(pud_lockptr(mm, pudp));
+#endif
+
+	old = radix__pte_update(mm, addr, pudp_ptep(pudp), clr, set, 1);
+	trace_hugepage_update_pud(addr, old, clr, set);
+
+	return old;
+}
+
+pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
+			pmd_t *pmdp)
+
+{
+	pmd_t pmd;
+
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+	VM_BUG_ON(radix__pmd_trans_huge(*pmdp));
+	VM_BUG_ON(pmd_devmap(*pmdp));
+	/*
+	 * khugepaged calls this for normal pmd
+	 */
+	pmd = *pmdp;
+	pmd_clear(pmdp);
+
+	radix__flush_tlb_collapsed_pmd(vma->vm_mm, address);
+
+	return pmd;
+}
+
+/*
+ * For us pgtable_t is pte_t *. Inorder to save the deposisted
+ * page table, we consider the allocated page table as a list
+ * head. On withdraw we need to make sure we zero out the used
+ * list_head memory area.
+ */
+void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				 pgtable_t pgtable)
+{
+	struct list_head *lh = (struct list_head *) pgtable;
+
+	assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+	/* FIFO */
+	if (!pmd_huge_pte(mm, pmdp))
+		INIT_LIST_HEAD(lh);
+	else
+		list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+	pmd_huge_pte(mm, pmdp) = pgtable;
+}
+
+pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+	pte_t *ptep;
+	pgtable_t pgtable;
+	struct list_head *lh;
+
+	assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+	/* FIFO */
+	pgtable = pmd_huge_pte(mm, pmdp);
+	lh = (struct list_head *) pgtable;
+	if (list_empty(lh))
+		pmd_huge_pte(mm, pmdp) = NULL;
+	else {
+		pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
+		list_del(lh);
+	}
+	ptep = (pte_t *) pgtable;
+	*ptep = __pte(0);
+	ptep++;
+	*ptep = __pte(0);
+	return pgtable;
+}
+
+pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
+				     unsigned long addr, pmd_t *pmdp)
+{
+	pmd_t old_pmd;
+	unsigned long old;
+
+	old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
+	old_pmd = __pmd(old);
+	return old_pmd;
+}
+
+pud_t radix__pudp_huge_get_and_clear(struct mm_struct *mm,
+				     unsigned long addr, pud_t *pudp)
+{
+	pud_t old_pud;
+	unsigned long old;
+
+	old = radix__pud_hugepage_update(mm, addr, pudp, ~0UL, 0);
+	old_pud = __pud(old);
+	return old_pud;
+}
+
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
+				  pte_t entry, unsigned long address, int psize)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_SOFT_DIRTY |
+					      _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
+
+	unsigned long change = pte_val(entry) ^ pte_val(*ptep);
+	/*
+	 * On POWER9, the NMMU is not able to relax PTE access permissions
+	 * for a translation with a TLB. The PTE must be invalidated, TLB
+	 * flushed before the new PTE is installed.
+	 *
+	 * This only needs to be done for radix, because hash translation does
+	 * flush when updating the linux pte (and we don't support NMMU
+	 * accelerators on HPT on POWER9 anyway XXX: do we?).
+	 *
+	 * POWER10 (and P9P) NMMU does behave as per ISA.
+	 */
+	if (!cpu_has_feature(CPU_FTR_ARCH_31) && (change & _PAGE_RW) &&
+	    atomic_read(&mm->context.copros) > 0) {
+		unsigned long old_pte, new_pte;
+
+		old_pte = __radix_pte_update(ptep, _PAGE_PRESENT, _PAGE_INVALID);
+		new_pte = old_pte | set;
+		radix__flush_tlb_page_psize(mm, address, psize);
+		__radix_pte_update(ptep, _PAGE_INVALID, new_pte);
+	} else {
+		__radix_pte_update(ptep, 0, set);
+		/*
+		 * Book3S does not require a TLB flush when relaxing access
+		 * restrictions when the address space (modulo the POWER9 nest
+		 * MMU issue above) because the MMU will reload the PTE after
+		 * taking an access fault, as defined by the architecture. See
+		 * "Setting a Reference or Change Bit or Upgrading Access
+		 *  Authority (PTE Subject to Atomic Hardware Updates)" in
+		 *  Power ISA Version 3.1B.
+		 */
+	}
+	/* See ptesync comment in radix__set_pte_at */
+}
+
+void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
+				    unsigned long addr, pte_t *ptep,
+				    pte_t old_pte, pte_t pte)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	/*
+	 * POWER9 NMMU must flush the TLB after clearing the PTE before
+	 * installing a PTE with more relaxed access permissions, see
+	 * radix__ptep_set_access_flags.
+	 */
+	if (!cpu_has_feature(CPU_FTR_ARCH_31) &&
+	    is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
+	    (atomic_read(&mm->context.copros) > 0))
+		radix__flush_tlb_page(vma, addr);
+
+	set_pte_at(mm, addr, ptep, pte);
+}
+
+int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
+{
+	pte_t *ptep = (pte_t *)pud;
+	pte_t new_pud = pfn_pte(__phys_to_pfn(addr), prot);
+
+	if (!radix_enabled())
+		return 0;
+
+	set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud);
+
+	return 1;
+}
+
+int pud_clear_huge(pud_t *pud)
+{
+	if (pud_is_leaf(*pud)) {
+		pud_clear(pud);
+		return 1;
+	}
+
+	return 0;
+}
+
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
+{
+	pmd_t *pmd;
+	int i;
+
+	pmd = pud_pgtable(*pud);
+	pud_clear(pud);
+
+	flush_tlb_kernel_range(addr, addr + PUD_SIZE);
+
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		if (!pmd_none(pmd[i])) {
+			pte_t *pte;
+			pte = (pte_t *)pmd_page_vaddr(pmd[i]);
+
+			pte_free_kernel(&init_mm, pte);
+		}
+	}
+
+	pmd_free(&init_mm, pmd);
+
+	return 1;
+}
+
+int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
+{
+	pte_t *ptep = (pte_t *)pmd;
+	pte_t new_pmd = pfn_pte(__phys_to_pfn(addr), prot);
+
+	if (!radix_enabled())
+		return 0;
+
+	set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd);
+
+	return 1;
+}
+
+int pmd_clear_huge(pmd_t *pmd)
+{
+	if (pmd_is_leaf(*pmd)) {
+		pmd_clear(pmd);
+		return 1;
+	}
+
+	return 0;
+}
+
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
+{
+	pte_t *pte;
+
+	pte = (pte_t *)pmd_page_vaddr(*pmd);
+	pmd_clear(pmd);
+
+	flush_tlb_kernel_range(addr, addr + PMD_SIZE);
+
+	pte_free_kernel(&init_mm, pte);
+
+	return 1;
+}
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
new file mode 100644
index 0000000000..9e1f6558d0
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -0,0 +1,1587 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * TLB flush routines for radix kernels.
+ *
+ * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
+ */
+
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/memblock.h>
+#include <linux/mmu_context.h>
+#include <linux/sched/mm.h>
+#include <linux/debugfs.h>
+
+#include <asm/ppc-opcode.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/trace.h>
+#include <asm/cputhreads.h>
+#include <asm/plpar_wrappers.h>
+
+#include "internal.h"
+
+/*
+ * tlbiel instruction for radix, set invalidation
+ * i.e., r=1 and is=01 or is=10 or is=11
+ */
+static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
+					unsigned int pid,
+					unsigned int ric, unsigned int prs)
+{
+	unsigned long rb;
+	unsigned long rs;
+
+	rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+	rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
+
+	asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
+		     : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
+		     : "memory");
+}
+
+static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
+{
+	unsigned int set;
+
+	asm volatile("ptesync": : :"memory");
+
+	/*
+	 * Flush the first set of the TLB, and the entire Page Walk Cache
+	 * and partition table entries. Then flush the remaining sets of the
+	 * TLB.
+	 */
+
+	if (early_cpu_has_feature(CPU_FTR_HVMODE)) {
+		/* MSR[HV] should flush partition scope translations first. */
+		tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
+
+		if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
+			for (set = 1; set < num_sets; set++)
+				tlbiel_radix_set_isa300(set, is, 0,
+							RIC_FLUSH_TLB, 0);
+		}
+	}
+
+	/* Flush process scoped entries. */
+	tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
+
+	if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
+		for (set = 1; set < num_sets; set++)
+			tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
+	}
+
+	ppc_after_tlbiel_barrier();
+}
+
+void radix__tlbiel_all(unsigned int action)
+{
+	unsigned int is;
+
+	switch (action) {
+	case TLB_INVAL_SCOPE_GLOBAL:
+		is = 3;
+		break;
+	case TLB_INVAL_SCOPE_LPID:
+		is = 2;
+		break;
+	default:
+		BUG();
+	}
+
+	if (early_cpu_has_feature(CPU_FTR_ARCH_300))
+		tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
+	else
+		WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
+
+	asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
+}
+
+static __always_inline void __tlbiel_pid(unsigned long pid, int set,
+				unsigned long ric)
+{
+	unsigned long rb,rs,prs,r;
+
+	rb = PPC_BIT(53); /* IS = 1 */
+	rb |= set << PPC_BITLSHIFT(51);
+	rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
+	prs = 1; /* process scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(0, 1, rb, rs, ric, prs, r);
+}
+
+static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
+{
+	unsigned long rb,rs,prs,r;
+
+	rb = PPC_BIT(53); /* IS = 1 */
+	rs = pid << PPC_BITLSHIFT(31);
+	prs = 1; /* process scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
+static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
+{
+	unsigned long rb,rs,prs,r;
+
+	rb = PPC_BIT(52); /* IS = 2 */
+	rs = lpid;
+	prs = 0; /* partition scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
+}
+
+static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
+{
+	unsigned long rb,rs,prs,r;
+
+	rb = PPC_BIT(52); /* IS = 2 */
+	rs = lpid;
+	prs = 1; /* process scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
+}
+
+static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
+					unsigned long ap, unsigned long ric)
+{
+	unsigned long rb,rs,prs,r;
+
+	rb = va & ~(PPC_BITMASK(52, 63));
+	rb |= ap << PPC_BITLSHIFT(58);
+	rs = pid << PPC_BITLSHIFT(31);
+	prs = 1; /* process scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(0, 1, rb, rs, ric, prs, r);
+}
+
+static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
+				       unsigned long ap, unsigned long ric)
+{
+	unsigned long rb,rs,prs,r;
+
+	rb = va & ~(PPC_BITMASK(52, 63));
+	rb |= ap << PPC_BITLSHIFT(58);
+	rs = pid << PPC_BITLSHIFT(31);
+	prs = 1; /* process scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
+static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
+					    unsigned long ap, unsigned long ric)
+{
+	unsigned long rb,rs,prs,r;
+
+	rb = va & ~(PPC_BITMASK(52, 63));
+	rb |= ap << PPC_BITLSHIFT(58);
+	rs = lpid;
+	prs = 0; /* partition scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
+}
+
+
+static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
+				  unsigned long ap)
+{
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+		asm volatile("ptesync": : :"memory");
+		__tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+		asm volatile("ptesync": : :"memory");
+		__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
+	}
+}
+
+static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
+					unsigned long ap)
+{
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+		asm volatile("ptesync": : :"memory");
+		__tlbie_pid(0, RIC_FLUSH_TLB);
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+		asm volatile("ptesync": : :"memory");
+		__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
+	}
+}
+
+static inline void fixup_tlbie_pid(unsigned long pid)
+{
+	/*
+	 * We can use any address for the invalidation, pick one which is
+	 * probably unused as an optimisation.
+	 */
+	unsigned long va = ((1UL << 52) - 1);
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+		asm volatile("ptesync": : :"memory");
+		__tlbie_pid(0, RIC_FLUSH_TLB);
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+		asm volatile("ptesync": : :"memory");
+		__tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
+	}
+}
+
+static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
+				       unsigned long ap)
+{
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+		asm volatile("ptesync": : :"memory");
+		__tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+		asm volatile("ptesync": : :"memory");
+		__tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
+	}
+}
+
+static inline void fixup_tlbie_lpid(unsigned long lpid)
+{
+	/*
+	 * We can use any address for the invalidation, pick one which is
+	 * probably unused as an optimisation.
+	 */
+	unsigned long va = ((1UL << 52) - 1);
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+		asm volatile("ptesync": : :"memory");
+		__tlbie_lpid(0, RIC_FLUSH_TLB);
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+		asm volatile("ptesync": : :"memory");
+		__tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
+	}
+}
+
+/*
+ * We use 128 set in radix mode and 256 set in hpt mode.
+ */
+static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
+{
+	int set;
+
+	asm volatile("ptesync": : :"memory");
+
+	switch (ric) {
+	case RIC_FLUSH_PWC:
+
+		/* For PWC, only one flush is needed */
+		__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
+		ppc_after_tlbiel_barrier();
+		return;
+	case RIC_FLUSH_TLB:
+		__tlbiel_pid(pid, 0, RIC_FLUSH_TLB);
+		break;
+	case RIC_FLUSH_ALL:
+	default:
+		/*
+		 * Flush the first set of the TLB, and if
+		 * we're doing a RIC_FLUSH_ALL, also flush
+		 * the entire Page Walk Cache.
+		 */
+		__tlbiel_pid(pid, 0, RIC_FLUSH_ALL);
+	}
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+		/* For the remaining sets, just flush the TLB */
+		for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
+			__tlbiel_pid(pid, set, RIC_FLUSH_TLB);
+	}
+
+	ppc_after_tlbiel_barrier();
+	asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
+}
+
+static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
+{
+	asm volatile("ptesync": : :"memory");
+
+	/*
+	 * Workaround the fact that the "ric" argument to __tlbie_pid
+	 * must be a compile-time constraint to match the "i" constraint
+	 * in the asm statement.
+	 */
+	switch (ric) {
+	case RIC_FLUSH_TLB:
+		__tlbie_pid(pid, RIC_FLUSH_TLB);
+		fixup_tlbie_pid(pid);
+		break;
+	case RIC_FLUSH_PWC:
+		__tlbie_pid(pid, RIC_FLUSH_PWC);
+		break;
+	case RIC_FLUSH_ALL:
+	default:
+		__tlbie_pid(pid, RIC_FLUSH_ALL);
+		fixup_tlbie_pid(pid);
+	}
+	asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+struct tlbiel_pid {
+	unsigned long pid;
+	unsigned long ric;
+};
+
+static void do_tlbiel_pid(void *info)
+{
+	struct tlbiel_pid *t = info;
+
+	if (t->ric == RIC_FLUSH_TLB)
+		_tlbiel_pid(t->pid, RIC_FLUSH_TLB);
+	else if (t->ric == RIC_FLUSH_PWC)
+		_tlbiel_pid(t->pid, RIC_FLUSH_PWC);
+	else
+		_tlbiel_pid(t->pid, RIC_FLUSH_ALL);
+}
+
+static inline void _tlbiel_pid_multicast(struct mm_struct *mm,
+				unsigned long pid, unsigned long ric)
+{
+	struct cpumask *cpus = mm_cpumask(mm);
+	struct tlbiel_pid t = { .pid = pid, .ric = ric };
+
+	on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
+	/*
+	 * Always want the CPU translations to be invalidated with tlbiel in
+	 * these paths, so while coprocessors must use tlbie, we can not
+	 * optimise away the tlbiel component.
+	 */
+	if (atomic_read(&mm->context.copros) > 0)
+		_tlbie_pid(pid, RIC_FLUSH_ALL);
+}
+
+static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
+{
+	asm volatile("ptesync": : :"memory");
+
+	/*
+	 * Workaround the fact that the "ric" argument to __tlbie_pid
+	 * must be a compile-time contraint to match the "i" constraint
+	 * in the asm statement.
+	 */
+	switch (ric) {
+	case RIC_FLUSH_TLB:
+		__tlbie_lpid(lpid, RIC_FLUSH_TLB);
+		fixup_tlbie_lpid(lpid);
+		break;
+	case RIC_FLUSH_PWC:
+		__tlbie_lpid(lpid, RIC_FLUSH_PWC);
+		break;
+	case RIC_FLUSH_ALL:
+	default:
+		__tlbie_lpid(lpid, RIC_FLUSH_ALL);
+		fixup_tlbie_lpid(lpid);
+	}
+	asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
+{
+	/*
+	 * Workaround the fact that the "ric" argument to __tlbie_pid
+	 * must be a compile-time contraint to match the "i" constraint
+	 * in the asm statement.
+	 */
+	switch (ric) {
+	case RIC_FLUSH_TLB:
+		__tlbie_lpid_guest(lpid, RIC_FLUSH_TLB);
+		break;
+	case RIC_FLUSH_PWC:
+		__tlbie_lpid_guest(lpid, RIC_FLUSH_PWC);
+		break;
+	case RIC_FLUSH_ALL:
+	default:
+		__tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
+	}
+	fixup_tlbie_lpid(lpid);
+	asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
+				    unsigned long pid, unsigned long page_size,
+				    unsigned long psize)
+{
+	unsigned long addr;
+	unsigned long ap = mmu_get_ap(psize);
+
+	for (addr = start; addr < end; addr += page_size)
+		__tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+}
+
+static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
+				       unsigned long psize, unsigned long ric)
+{
+	unsigned long ap = mmu_get_ap(psize);
+
+	asm volatile("ptesync": : :"memory");
+	__tlbiel_va(va, pid, ap, ric);
+	ppc_after_tlbiel_barrier();
+}
+
+static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
+				    unsigned long pid, unsigned long page_size,
+				    unsigned long psize, bool also_pwc)
+{
+	asm volatile("ptesync": : :"memory");
+	if (also_pwc)
+		__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
+	__tlbiel_va_range(start, end, pid, page_size, psize);
+	ppc_after_tlbiel_barrier();
+}
+
+static inline void __tlbie_va_range(unsigned long start, unsigned long end,
+				    unsigned long pid, unsigned long page_size,
+				    unsigned long psize)
+{
+	unsigned long addr;
+	unsigned long ap = mmu_get_ap(psize);
+
+	for (addr = start; addr < end; addr += page_size)
+		__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+
+	fixup_tlbie_va_range(addr - page_size, pid, ap);
+}
+
+static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
+				      unsigned long psize, unsigned long ric)
+{
+	unsigned long ap = mmu_get_ap(psize);
+
+	asm volatile("ptesync": : :"memory");
+	__tlbie_va(va, pid, ap, ric);
+	fixup_tlbie_va(va, pid, ap);
+	asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+struct tlbiel_va {
+	unsigned long pid;
+	unsigned long va;
+	unsigned long psize;
+	unsigned long ric;
+};
+
+static void do_tlbiel_va(void *info)
+{
+	struct tlbiel_va *t = info;
+
+	if (t->ric == RIC_FLUSH_TLB)
+		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
+	else if (t->ric == RIC_FLUSH_PWC)
+		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
+	else
+		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
+}
+
+static inline void _tlbiel_va_multicast(struct mm_struct *mm,
+				unsigned long va, unsigned long pid,
+				unsigned long psize, unsigned long ric)
+{
+	struct cpumask *cpus = mm_cpumask(mm);
+	struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
+	on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
+	if (atomic_read(&mm->context.copros) > 0)
+		_tlbie_va(va, pid, psize, RIC_FLUSH_TLB);
+}
+
+struct tlbiel_va_range {
+	unsigned long pid;
+	unsigned long start;
+	unsigned long end;
+	unsigned long page_size;
+	unsigned long psize;
+	bool also_pwc;
+};
+
+static void do_tlbiel_va_range(void *info)
+{
+	struct tlbiel_va_range *t = info;
+
+	_tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
+				    t->psize, t->also_pwc);
+}
+
+static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
+			      unsigned long psize, unsigned long ric)
+{
+	unsigned long ap = mmu_get_ap(psize);
+
+	asm volatile("ptesync": : :"memory");
+	__tlbie_lpid_va(va, lpid, ap, ric);
+	fixup_tlbie_lpid_va(va, lpid, ap);
+	asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+static inline void _tlbie_va_range(unsigned long start, unsigned long end,
+				    unsigned long pid, unsigned long page_size,
+				    unsigned long psize, bool also_pwc)
+{
+	asm volatile("ptesync": : :"memory");
+	if (also_pwc)
+		__tlbie_pid(pid, RIC_FLUSH_PWC);
+	__tlbie_va_range(start, end, pid, page_size, psize);
+	asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
+				unsigned long start, unsigned long end,
+				unsigned long pid, unsigned long page_size,
+				unsigned long psize, bool also_pwc)
+{
+	struct cpumask *cpus = mm_cpumask(mm);
+	struct tlbiel_va_range t = { .start = start, .end = end,
+				.pid = pid, .page_size = page_size,
+				.psize = psize, .also_pwc = also_pwc };
+
+	on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
+	if (atomic_read(&mm->context.copros) > 0)
+		_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
+}
+
+/*
+ * Base TLB flushing operations:
+ *
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes kernel pages
+ *
+ *  - local_* variants of page and mm only apply to the current
+ *    processor
+ */
+void radix__local_flush_tlb_mm(struct mm_struct *mm)
+{
+	unsigned long pid = mm->context.id;
+
+	if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
+		return;
+
+	preempt_disable();
+	_tlbiel_pid(pid, RIC_FLUSH_TLB);
+	preempt_enable();
+}
+EXPORT_SYMBOL(radix__local_flush_tlb_mm);
+
+#ifndef CONFIG_SMP
+void radix__local_flush_all_mm(struct mm_struct *mm)
+{
+	unsigned long pid = mm->context.id;
+
+	if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
+		return;
+
+	preempt_disable();
+	_tlbiel_pid(pid, RIC_FLUSH_ALL);
+	preempt_enable();
+}
+EXPORT_SYMBOL(radix__local_flush_all_mm);
+
+static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
+{
+	radix__local_flush_all_mm(mm);
+}
+#endif /* CONFIG_SMP */
+
+void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+				       int psize)
+{
+	unsigned long pid = mm->context.id;
+
+	if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
+		return;
+
+	preempt_disable();
+	_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+	preempt_enable();
+}
+
+void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+#ifdef CONFIG_HUGETLB_PAGE
+	/* need the return fix for nohash.c */
+	if (is_vm_hugetlb_page(vma))
+		return radix__local_flush_hugetlb_page(vma, vmaddr);
+#endif
+	radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
+}
+EXPORT_SYMBOL(radix__local_flush_tlb_page);
+
+static bool mm_needs_flush_escalation(struct mm_struct *mm)
+{
+	/*
+	 * The P9 nest MMU has issues with the page walk cache caching PTEs
+	 * and not flushing them when RIC = 0 for a PID/LPID invalidate.
+	 *
+	 * This may have been fixed in shipping firmware (by disabling PWC
+	 * or preventing it from caching PTEs), but until that is confirmed,
+	 * this workaround is required - escalate all RIC=0 IS=1/2/3 flushes
+	 * to RIC=2.
+	 *
+	 * POWER10 (and P9P) does not have this problem.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		return false;
+	if (atomic_read(&mm->context.copros) > 0)
+		return true;
+	return false;
+}
+
+/*
+ * If always_flush is true, then flush even if this CPU can't be removed
+ * from mm_cpumask.
+ */
+void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
+{
+	unsigned long pid = mm->context.id;
+	int cpu = smp_processor_id();
+
+	/*
+	 * A kthread could have done a mmget_not_zero() after the flushing CPU
+	 * checked mm_cpumask, and be in the process of kthread_use_mm when
+	 * interrupted here. In that case, current->mm will be set to mm,
+	 * because kthread_use_mm() setting ->mm and switching to the mm is
+	 * done with interrupts off.
+	 */
+	if (current->mm == mm)
+		goto out;
+
+	if (current->active_mm == mm) {
+		unsigned long flags;
+
+		WARN_ON_ONCE(current->mm != NULL);
+		/*
+		 * It is a kernel thread and is using mm as the lazy tlb, so
+		 * switch it to init_mm. This is not always called from IPI
+		 * (e.g., flush_type_needed), so must disable irqs.
+		 */
+		local_irq_save(flags);
+		mmgrab_lazy_tlb(&init_mm);
+		current->active_mm = &init_mm;
+		switch_mm_irqs_off(mm, &init_mm, current);
+		mmdrop_lazy_tlb(mm);
+		local_irq_restore(flags);
+	}
+
+	/*
+	 * This IPI may be initiated from any source including those not
+	 * running the mm, so there may be a racing IPI that comes after
+	 * this one which finds the cpumask already clear. Check and avoid
+	 * underflowing the active_cpus count in that case. The race should
+	 * not otherwise be a problem, but the TLB must be flushed because
+	 * that's what the caller expects.
+	 */
+	if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
+		dec_mm_active_cpus(mm);
+		cpumask_clear_cpu(cpu, mm_cpumask(mm));
+		always_flush = true;
+	}
+
+out:
+	if (always_flush)
+		_tlbiel_pid(pid, RIC_FLUSH_ALL);
+}
+
+#ifdef CONFIG_SMP
+static void do_exit_flush_lazy_tlb(void *arg)
+{
+	struct mm_struct *mm = arg;
+	exit_lazy_flush_tlb(mm, true);
+}
+
+static void exit_flush_lazy_tlbs(struct mm_struct *mm)
+{
+	/*
+	 * Would be nice if this was async so it could be run in
+	 * parallel with our local flush, but generic code does not
+	 * give a good API for it. Could extend the generic code or
+	 * make a special powerpc IPI for flushing TLBs.
+	 * For now it's not too performance critical.
+	 */
+	smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
+				(void *)mm, 1);
+}
+
+#else /* CONFIG_SMP */
+static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { }
+#endif /* CONFIG_SMP */
+
+static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock);
+
+/*
+ * Interval between flushes at which we send out IPIs to check whether the
+ * mm_cpumask can be trimmed for the case where it's not a single-threaded
+ * process flushing its own mm. The intent is to reduce the cost of later
+ * flushes. Don't want this to be so low that it adds noticable cost to TLB
+ * flushing, or so high that it doesn't help reduce global TLBIEs.
+ */
+static unsigned long tlb_mm_cpumask_trim_timer = 1073;
+
+static bool tick_and_test_trim_clock(void)
+{
+	if (__this_cpu_inc_return(mm_cpumask_trim_clock) ==
+			tlb_mm_cpumask_trim_timer) {
+		__this_cpu_write(mm_cpumask_trim_clock, 0);
+		return true;
+	}
+	return false;
+}
+
+enum tlb_flush_type {
+	FLUSH_TYPE_NONE,
+	FLUSH_TYPE_LOCAL,
+	FLUSH_TYPE_GLOBAL,
+};
+
+static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
+{
+	int active_cpus = atomic_read(&mm->context.active_cpus);
+	int cpu = smp_processor_id();
+
+	if (active_cpus == 0)
+		return FLUSH_TYPE_NONE;
+	if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) {
+		if (current->mm != mm) {
+			/*
+			 * Asynchronous flush sources may trim down to nothing
+			 * if the process is not running, so occasionally try
+			 * to trim.
+			 */
+			if (tick_and_test_trim_clock()) {
+				exit_lazy_flush_tlb(mm, true);
+				return FLUSH_TYPE_NONE;
+			}
+		}
+		return FLUSH_TYPE_LOCAL;
+	}
+
+	/* Coprocessors require TLBIE to invalidate nMMU. */
+	if (atomic_read(&mm->context.copros) > 0)
+		return FLUSH_TYPE_GLOBAL;
+
+	/*
+	 * In the fullmm case there's no point doing the exit_flush_lazy_tlbs
+	 * because the mm is being taken down anyway, and a TLBIE tends to
+	 * be faster than an IPI+TLBIEL.
+	 */
+	if (fullmm)
+		return FLUSH_TYPE_GLOBAL;
+
+	/*
+	 * If we are running the only thread of a single-threaded process,
+	 * then we should almost always be able to trim off the rest of the
+	 * CPU mask (except in the case of use_mm() races), so always try
+	 * trimming the mask.
+	 */
+	if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) {
+		exit_flush_lazy_tlbs(mm);
+		/*
+		 * use_mm() race could prevent IPIs from being able to clear
+		 * the cpumask here, however those users are established
+		 * after our first check (and so after the PTEs are removed),
+		 * and the TLB still gets flushed by the IPI, so this CPU
+		 * will only require a local flush.
+		 */
+		return FLUSH_TYPE_LOCAL;
+	}
+
+	/*
+	 * Occasionally try to trim down the cpumask. It's possible this can
+	 * bring the mask to zero, which results in no flush.
+	 */
+	if (tick_and_test_trim_clock()) {
+		exit_flush_lazy_tlbs(mm);
+		if (current->mm == mm)
+			return FLUSH_TYPE_LOCAL;
+		if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
+			exit_lazy_flush_tlb(mm, true);
+		return FLUSH_TYPE_NONE;
+	}
+
+	return FLUSH_TYPE_GLOBAL;
+}
+
+#ifdef CONFIG_SMP
+void radix__flush_tlb_mm(struct mm_struct *mm)
+{
+	unsigned long pid;
+	enum tlb_flush_type type;
+
+	pid = mm->context.id;
+	if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
+		return;
+
+	preempt_disable();
+	/*
+	 * Order loads of mm_cpumask (in flush_type_needed) vs previous
+	 * stores to clear ptes before the invalidate. See barrier in
+	 * switch_mm_irqs_off
+	 */
+	smp_mb();
+	type = flush_type_needed(mm, false);
+	if (type == FLUSH_TYPE_LOCAL) {
+		_tlbiel_pid(pid, RIC_FLUSH_TLB);
+	} else if (type == FLUSH_TYPE_GLOBAL) {
+		if (!mmu_has_feature(MMU_FTR_GTSE)) {
+			unsigned long tgt = H_RPTI_TARGET_CMMU;
+
+			if (atomic_read(&mm->context.copros) > 0)
+				tgt |= H_RPTI_TARGET_NMMU;
+			pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
+					       H_RPTI_PAGE_ALL, 0, -1UL);
+		} else if (cputlb_use_tlbie()) {
+			if (mm_needs_flush_escalation(mm))
+				_tlbie_pid(pid, RIC_FLUSH_ALL);
+			else
+				_tlbie_pid(pid, RIC_FLUSH_TLB);
+		} else {
+			_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
+		}
+	}
+	preempt_enable();
+	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+}
+EXPORT_SYMBOL(radix__flush_tlb_mm);
+
+static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
+{
+	unsigned long pid;
+	enum tlb_flush_type type;
+
+	pid = mm->context.id;
+	if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
+		return;
+
+	preempt_disable();
+	smp_mb(); /* see radix__flush_tlb_mm */
+	type = flush_type_needed(mm, fullmm);
+	if (type == FLUSH_TYPE_LOCAL) {
+		_tlbiel_pid(pid, RIC_FLUSH_ALL);
+	} else if (type == FLUSH_TYPE_GLOBAL) {
+		if (!mmu_has_feature(MMU_FTR_GTSE)) {
+			unsigned long tgt = H_RPTI_TARGET_CMMU;
+			unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+					     H_RPTI_TYPE_PRT;
+
+			if (atomic_read(&mm->context.copros) > 0)
+				tgt |= H_RPTI_TARGET_NMMU;
+			pseries_rpt_invalidate(pid, tgt, type,
+					       H_RPTI_PAGE_ALL, 0, -1UL);
+		} else if (cputlb_use_tlbie())
+			_tlbie_pid(pid, RIC_FLUSH_ALL);
+		else
+			_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
+	}
+	preempt_enable();
+	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+}
+
+void radix__flush_all_mm(struct mm_struct *mm)
+{
+	__flush_all_mm(mm, false);
+}
+EXPORT_SYMBOL(radix__flush_all_mm);
+
+void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+				 int psize)
+{
+	unsigned long pid;
+	enum tlb_flush_type type;
+
+	pid = mm->context.id;
+	if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
+		return;
+
+	preempt_disable();
+	smp_mb(); /* see radix__flush_tlb_mm */
+	type = flush_type_needed(mm, false);
+	if (type == FLUSH_TYPE_LOCAL) {
+		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+	} else if (type == FLUSH_TYPE_GLOBAL) {
+		if (!mmu_has_feature(MMU_FTR_GTSE)) {
+			unsigned long tgt, pg_sizes, size;
+
+			tgt = H_RPTI_TARGET_CMMU;
+			pg_sizes = psize_to_rpti_pgsize(psize);
+			size = 1UL << mmu_psize_to_shift(psize);
+
+			if (atomic_read(&mm->context.copros) > 0)
+				tgt |= H_RPTI_TARGET_NMMU;
+			pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
+					       pg_sizes, vmaddr,
+					       vmaddr + size);
+		} else if (cputlb_use_tlbie())
+			_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+		else
+			_tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
+	}
+	preempt_enable();
+}
+
+void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+#ifdef CONFIG_HUGETLB_PAGE
+	if (is_vm_hugetlb_page(vma))
+		return radix__flush_hugetlb_page(vma, vmaddr);
+#endif
+	radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
+}
+EXPORT_SYMBOL(radix__flush_tlb_page);
+
+#endif /* CONFIG_SMP */
+
+static void do_tlbiel_kernel(void *info)
+{
+	_tlbiel_pid(0, RIC_FLUSH_ALL);
+}
+
+static inline void _tlbiel_kernel_broadcast(void)
+{
+	on_each_cpu(do_tlbiel_kernel, NULL, 1);
+	if (tlbie_capable) {
+		/*
+		 * Coherent accelerators don't refcount kernel memory mappings,
+		 * so have to always issue a tlbie for them. This is quite a
+		 * slow path anyway.
+		 */
+		_tlbie_pid(0, RIC_FLUSH_ALL);
+	}
+}
+
+/*
+ * If kernel TLBIs ever become local rather than global, then
+ * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
+ * assumes kernel TLBIs are global.
+ */
+void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	if (!mmu_has_feature(MMU_FTR_GTSE)) {
+		unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU;
+		unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+				     H_RPTI_TYPE_PRT;
+
+		pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL,
+				       start, end);
+	} else if (cputlb_use_tlbie())
+		_tlbie_pid(0, RIC_FLUSH_ALL);
+	else
+		_tlbiel_kernel_broadcast();
+}
+EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
+
+/*
+ * Doesn't appear to be used anywhere. Remove.
+ */
+#define TLB_FLUSH_ALL -1UL
+
+/*
+ * Number of pages above which we invalidate the entire PID rather than
+ * flush individual pages, for local and global flushes respectively.
+ *
+ * tlbie goes out to the interconnect and individual ops are more costly.
+ * It also does not iterate over sets like the local tlbiel variant when
+ * invalidating a full PID, so it has a far lower threshold to change from
+ * individual page flushes to full-pid flushes.
+ */
+static u32 tlb_single_page_flush_ceiling __read_mostly = 33;
+static u32 tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
+
+static inline void __radix__flush_tlb_range(struct mm_struct *mm,
+					    unsigned long start, unsigned long end)
+{
+	unsigned long pid;
+	unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
+	unsigned long page_size = 1UL << page_shift;
+	unsigned long nr_pages = (end - start) >> page_shift;
+	bool flush_pid, flush_pwc = false;
+	enum tlb_flush_type type;
+
+	pid = mm->context.id;
+	if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
+		return;
+
+	WARN_ON_ONCE(end == TLB_FLUSH_ALL);
+
+	preempt_disable();
+	smp_mb(); /* see radix__flush_tlb_mm */
+	type = flush_type_needed(mm, false);
+	if (type == FLUSH_TYPE_NONE)
+		goto out;
+
+	if (type == FLUSH_TYPE_GLOBAL)
+		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
+	else
+		flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
+	/*
+	 * full pid flush already does the PWC flush. if it is not full pid
+	 * flush check the range is more than PMD and force a pwc flush
+	 * mremap() depends on this behaviour.
+	 */
+	if (!flush_pid && (end - start) >= PMD_SIZE)
+		flush_pwc = true;
+
+	if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
+		unsigned long type = H_RPTI_TYPE_TLB;
+		unsigned long tgt = H_RPTI_TARGET_CMMU;
+		unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
+
+		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+			pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
+		if (atomic_read(&mm->context.copros) > 0)
+			tgt |= H_RPTI_TARGET_NMMU;
+		if (flush_pwc)
+			type |= H_RPTI_TYPE_PWC;
+		pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
+	} else if (flush_pid) {
+		/*
+		 * We are now flushing a range larger than PMD size force a RIC_FLUSH_ALL
+		 */
+		if (type == FLUSH_TYPE_LOCAL) {
+			_tlbiel_pid(pid, RIC_FLUSH_ALL);
+		} else {
+			if (cputlb_use_tlbie()) {
+				_tlbie_pid(pid, RIC_FLUSH_ALL);
+			} else {
+				_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
+			}
+		}
+	} else {
+		bool hflush;
+		unsigned long hstart, hend;
+
+		hstart = (start + PMD_SIZE - 1) & PMD_MASK;
+		hend = end & PMD_MASK;
+		hflush = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hstart < hend;
+
+		if (type == FLUSH_TYPE_LOCAL) {
+			asm volatile("ptesync": : :"memory");
+			if (flush_pwc)
+				/* For PWC, only one flush is needed */
+				__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
+			__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
+			if (hflush)
+				__tlbiel_va_range(hstart, hend, pid,
+						PMD_SIZE, MMU_PAGE_2M);
+			ppc_after_tlbiel_barrier();
+		} else if (cputlb_use_tlbie()) {
+			asm volatile("ptesync": : :"memory");
+			if (flush_pwc)
+				__tlbie_pid(pid, RIC_FLUSH_PWC);
+			__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
+			if (hflush)
+				__tlbie_va_range(hstart, hend, pid,
+						PMD_SIZE, MMU_PAGE_2M);
+			asm volatile("eieio; tlbsync; ptesync": : :"memory");
+		} else {
+			_tlbiel_va_range_multicast(mm,
+					start, end, pid, page_size, mmu_virtual_psize, flush_pwc);
+			if (hflush)
+				_tlbiel_va_range_multicast(mm,
+					hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, flush_pwc);
+		}
+	}
+out:
+	preempt_enable();
+	mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
+}
+
+void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		     unsigned long end)
+
+{
+#ifdef CONFIG_HUGETLB_PAGE
+	if (is_vm_hugetlb_page(vma))
+		return radix__flush_hugetlb_tlb_range(vma, start, end);
+#endif
+
+	__radix__flush_tlb_range(vma->vm_mm, start, end);
+}
+EXPORT_SYMBOL(radix__flush_tlb_range);
+
+static int radix_get_mmu_psize(int page_size)
+{
+	int psize;
+
+	if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
+		psize = mmu_virtual_psize;
+	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
+		psize = MMU_PAGE_2M;
+	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
+		psize = MMU_PAGE_1G;
+	else
+		return -1;
+	return psize;
+}
+
+/*
+ * Flush partition scoped LPID address translation for all CPUs.
+ */
+void radix__flush_tlb_lpid_page(unsigned int lpid,
+					unsigned long addr,
+					unsigned long page_size)
+{
+	int psize = radix_get_mmu_psize(page_size);
+
+	_tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
+}
+EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
+
+/*
+ * Flush partition scoped PWC from LPID for all CPUs.
+ */
+void radix__flush_pwc_lpid(unsigned int lpid)
+{
+	_tlbie_lpid(lpid, RIC_FLUSH_PWC);
+}
+EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
+
+/*
+ * Flush partition scoped translations from LPID (=LPIDR)
+ */
+void radix__flush_all_lpid(unsigned int lpid)
+{
+	_tlbie_lpid(lpid, RIC_FLUSH_ALL);
+}
+EXPORT_SYMBOL_GPL(radix__flush_all_lpid);
+
+/*
+ * Flush process scoped translations from LPID (=LPIDR)
+ */
+void radix__flush_all_lpid_guest(unsigned int lpid)
+{
+	_tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
+}
+
+void radix__tlb_flush(struct mmu_gather *tlb)
+{
+	int psize = 0;
+	struct mm_struct *mm = tlb->mm;
+	int page_size = tlb->page_size;
+	unsigned long start = tlb->start;
+	unsigned long end = tlb->end;
+
+	/*
+	 * if page size is not something we understand, do a full mm flush
+	 *
+	 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
+	 * that flushes the process table entry cache upon process teardown.
+	 * See the comment for radix in arch_exit_mmap().
+	 */
+	if (tlb->fullmm) {
+		if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) {
+			/*
+			 * Shootdown based lazy tlb mm refcounting means we
+			 * have to IPI everyone in the mm_cpumask anyway soon
+			 * when the mm goes away, so might as well do it as
+			 * part of the final flush now.
+			 *
+			 * If lazy shootdown was improved to reduce IPIs (e.g.,
+			 * by batching), then it may end up being better to use
+			 * tlbies here instead.
+			 */
+			preempt_disable();
+
+			smp_mb(); /* see radix__flush_tlb_mm */
+			exit_flush_lazy_tlbs(mm);
+			__flush_all_mm(mm, true);
+
+			preempt_enable();
+		} else {
+			__flush_all_mm(mm, true);
+		}
+
+	} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
+		if (!tlb->freed_tables)
+			radix__flush_tlb_mm(mm);
+		else
+			radix__flush_all_mm(mm);
+	} else {
+		if (!tlb->freed_tables)
+			radix__flush_tlb_range_psize(mm, start, end, psize);
+		else
+			radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
+	}
+}
+
+static void __radix__flush_tlb_range_psize(struct mm_struct *mm,
+				unsigned long start, unsigned long end,
+				int psize, bool also_pwc)
+{
+	unsigned long pid;
+	unsigned int page_shift = mmu_psize_defs[psize].shift;
+	unsigned long page_size = 1UL << page_shift;
+	unsigned long nr_pages = (end - start) >> page_shift;
+	bool flush_pid;
+	enum tlb_flush_type type;
+
+	pid = mm->context.id;
+	if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
+		return;
+
+	WARN_ON_ONCE(end == TLB_FLUSH_ALL);
+
+	preempt_disable();
+	smp_mb(); /* see radix__flush_tlb_mm */
+	type = flush_type_needed(mm, false);
+	if (type == FLUSH_TYPE_NONE)
+		goto out;
+
+	if (type == FLUSH_TYPE_GLOBAL)
+		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
+	else
+		flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
+
+	if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
+		unsigned long tgt = H_RPTI_TARGET_CMMU;
+		unsigned long type = H_RPTI_TYPE_TLB;
+		unsigned long pg_sizes = psize_to_rpti_pgsize(psize);
+
+		if (also_pwc)
+			type |= H_RPTI_TYPE_PWC;
+		if (atomic_read(&mm->context.copros) > 0)
+			tgt |= H_RPTI_TARGET_NMMU;
+		pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
+	} else if (flush_pid) {
+		if (type == FLUSH_TYPE_LOCAL) {
+			_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
+		} else {
+			if (cputlb_use_tlbie()) {
+				if (mm_needs_flush_escalation(mm))
+					also_pwc = true;
+
+				_tlbie_pid(pid,
+					also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
+			} else {
+				_tlbiel_pid_multicast(mm, pid,
+					also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
+			}
+
+		}
+	} else {
+		if (type == FLUSH_TYPE_LOCAL)
+			_tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
+		else if (cputlb_use_tlbie())
+			_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
+		else
+			_tlbiel_va_range_multicast(mm,
+					start, end, pid, page_size, psize, also_pwc);
+	}
+out:
+	preempt_enable();
+	mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
+}
+
+void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+				  unsigned long end, int psize)
+{
+	return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
+}
+
+void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
+				      unsigned long end, int psize)
+{
+	__radix__flush_tlb_range_psize(mm, start, end, psize, true);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
+{
+	unsigned long pid, end;
+	enum tlb_flush_type type;
+
+	pid = mm->context.id;
+	if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
+		return;
+
+	/* 4k page size, just blow the world */
+	if (PAGE_SIZE == 0x1000) {
+		radix__flush_all_mm(mm);
+		return;
+	}
+
+	end = addr + HPAGE_PMD_SIZE;
+
+	/* Otherwise first do the PWC, then iterate the pages. */
+	preempt_disable();
+	smp_mb(); /* see radix__flush_tlb_mm */
+	type = flush_type_needed(mm, false);
+	if (type == FLUSH_TYPE_LOCAL) {
+		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+	} else if (type == FLUSH_TYPE_GLOBAL) {
+		if (!mmu_has_feature(MMU_FTR_GTSE)) {
+			unsigned long tgt, type, pg_sizes;
+
+			tgt = H_RPTI_TARGET_CMMU;
+			type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+			       H_RPTI_TYPE_PRT;
+			pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
+
+			if (atomic_read(&mm->context.copros) > 0)
+				tgt |= H_RPTI_TARGET_NMMU;
+			pseries_rpt_invalidate(pid, tgt, type, pg_sizes,
+					       addr, end);
+		} else if (cputlb_use_tlbie())
+			_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+		else
+			_tlbiel_va_range_multicast(mm,
+					addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+	}
+
+	preempt_enable();
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
+				unsigned long start, unsigned long end)
+{
+	radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
+}
+EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
+
+void radix__flush_pud_tlb_range(struct vm_area_struct *vma,
+				unsigned long start, unsigned long end)
+{
+	radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_1G);
+}
+EXPORT_SYMBOL(radix__flush_pud_tlb_range);
+
+void radix__flush_tlb_all(void)
+{
+	unsigned long rb,prs,r,rs;
+	unsigned long ric = RIC_FLUSH_ALL;
+
+	rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
+	prs = 0; /* partition scoped */
+	r = 1;   /* radix format */
+	rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
+
+	asm volatile("ptesync": : :"memory");
+	/*
+	 * now flush guest entries by passing PRS = 1 and LPID != 0
+	 */
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
+	/*
+	 * now flush host entires by passing PRS = 0 and LPID == 0
+	 */
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
+	asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static __always_inline void __tlbie_pid_lpid(unsigned long pid,
+					     unsigned long lpid,
+					     unsigned long ric)
+{
+	unsigned long rb, rs, prs, r;
+
+	rb = PPC_BIT(53); /* IS = 1 */
+	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+	prs = 1; /* process scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
+static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
+					    unsigned long lpid,
+					    unsigned long ap, unsigned long ric)
+{
+	unsigned long rb, rs, prs, r;
+
+	rb = va & ~(PPC_BITMASK(52, 63));
+	rb |= ap << PPC_BITLSHIFT(58);
+	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+	prs = 1; /* process scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
+static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
+{
+	/*
+	 * We can use any address for the invalidation, pick one which is
+	 * probably unused as an optimisation.
+	 */
+	unsigned long va = ((1UL << 52) - 1);
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
+				RIC_FLUSH_TLB);
+	}
+}
+
+static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
+				   unsigned long ric)
+{
+	asm volatile("ptesync" : : : "memory");
+
+	/*
+	 * Workaround the fact that the "ric" argument to __tlbie_pid
+	 * must be a compile-time contraint to match the "i" constraint
+	 * in the asm statement.
+	 */
+	switch (ric) {
+	case RIC_FLUSH_TLB:
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+		fixup_tlbie_pid_lpid(pid, lpid);
+		break;
+	case RIC_FLUSH_PWC:
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+		break;
+	case RIC_FLUSH_ALL:
+	default:
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+		fixup_tlbie_pid_lpid(pid, lpid);
+	}
+	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
+static inline void fixup_tlbie_va_range_lpid(unsigned long va,
+					     unsigned long pid,
+					     unsigned long lpid,
+					     unsigned long ap)
+{
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
+	}
+}
+
+static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
+					 unsigned long pid, unsigned long lpid,
+					 unsigned long page_size,
+					 unsigned long psize)
+{
+	unsigned long addr;
+	unsigned long ap = mmu_get_ap(psize);
+
+	for (addr = start; addr < end; addr += page_size)
+		__tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
+
+	fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
+}
+
+static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
+					unsigned long pid, unsigned long lpid,
+					unsigned long page_size,
+					unsigned long psize, bool also_pwc)
+{
+	asm volatile("ptesync" : : : "memory");
+	if (also_pwc)
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+	__tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
+	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
+/*
+ * Performs process-scoped invalidations for a given LPID
+ * as part of H_RPT_INVALIDATE hcall.
+ */
+void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
+			     unsigned long type, unsigned long pg_sizes,
+			     unsigned long start, unsigned long end)
+{
+	unsigned long psize, nr_pages;
+	struct mmu_psize_def *def;
+	bool flush_pid;
+
+	/*
+	 * A H_RPTI_TYPE_ALL request implies RIC=3, hence
+	 * do a single IS=1 based flush.
+	 */
+	if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
+		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+		return;
+	}
+
+	if (type & H_RPTI_TYPE_PWC)
+		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+
+	/* Full PID flush */
+	if (start == 0 && end == -1)
+		return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+
+	/* Do range invalidation for all the valid page sizes */
+	for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+		def = &mmu_psize_defs[psize];
+		if (!(pg_sizes & def->h_rpt_pgsize))
+			continue;
+
+		nr_pages = (end - start) >> def->shift;
+		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
+
+		/*
+		 * If the number of pages spanning the range is above
+		 * the ceiling, convert the request into a full PID flush.
+		 * And since PID flush takes out all the page sizes, there
+		 * is no need to consider remaining page sizes.
+		 */
+		if (flush_pid) {
+			_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+			return;
+		}
+		_tlbie_va_range_lpid(start, end, pid, lpid,
+				     (1UL << def->shift), psize, false);
+	}
+}
+EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt);
+
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
+static int __init create_tlb_single_page_flush_ceiling(void)
+{
+	debugfs_create_u32("tlb_single_page_flush_ceiling", 0600,
+			   arch_debugfs_dir, &tlb_single_page_flush_ceiling);
+	debugfs_create_u32("tlb_local_single_page_flush_ceiling", 0600,
+			   arch_debugfs_dir, &tlb_local_single_page_flush_ceiling);
+	return 0;
+}
+late_initcall(create_tlb_single_page_flush_ceiling);
+
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
new file mode 100644
index 0000000000..f2708c8629
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -0,0 +1,870 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerPC64 SLB support.
+ *
+ * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
+ * Based on earlier code written by:
+ * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
+ *    Copyright (c) 2001 Dave Engebretsen
+ * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
+ */
+
+#include <asm/interrupt.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/paca.h>
+#include <asm/lppaca.h>
+#include <asm/ppc-opcode.h>
+#include <asm/cputable.h>
+#include <asm/cacheflush.h>
+#include <asm/smp.h>
+#include <linux/compiler.h>
+#include <linux/context_tracking.h>
+#include <linux/mm_types.h>
+#include <linux/pgtable.h>
+
+#include <asm/udbg.h>
+#include <asm/code-patching.h>
+
+#include "internal.h"
+
+
+static long slb_allocate_user(struct mm_struct *mm, unsigned long ea);
+
+bool stress_slb_enabled __initdata;
+
+static int __init parse_stress_slb(char *p)
+{
+	stress_slb_enabled = true;
+	return 0;
+}
+early_param("stress_slb", parse_stress_slb);
+
+__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_slb_key);
+
+static void assert_slb_presence(bool present, unsigned long ea)
+{
+#ifdef CONFIG_DEBUG_VM
+	unsigned long tmp;
+
+	WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_206))
+		return;
+
+	/*
+	 * slbfee. requires bit 24 (PPC bit 39) be clear in RB. Hardware
+	 * ignores all other bits from 0-27, so just clear them all.
+	 */
+	ea &= ~((1UL << SID_SHIFT) - 1);
+	asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0");
+
+	WARN_ON(present == (tmp == 0));
+#endif
+}
+
+static inline void slb_shadow_update(unsigned long ea, int ssize,
+				     unsigned long flags,
+				     enum slb_index index)
+{
+	struct slb_shadow *p = get_slb_shadow();
+
+	/*
+	 * Clear the ESID first so the entry is not valid while we are
+	 * updating it.  No write barriers are needed here, provided
+	 * we only update the current CPU's SLB shadow buffer.
+	 */
+	WRITE_ONCE(p->save_area[index].esid, 0);
+	WRITE_ONCE(p->save_area[index].vsid, cpu_to_be64(mk_vsid_data(ea, ssize, flags)));
+	WRITE_ONCE(p->save_area[index].esid, cpu_to_be64(mk_esid_data(ea, ssize, index)));
+}
+
+static inline void slb_shadow_clear(enum slb_index index)
+{
+	WRITE_ONCE(get_slb_shadow()->save_area[index].esid, cpu_to_be64(index));
+}
+
+static inline void create_shadowed_slbe(unsigned long ea, int ssize,
+					unsigned long flags,
+					enum slb_index index)
+{
+	/*
+	 * Updating the shadow buffer before writing the SLB ensures
+	 * we don't get a stale entry here if we get preempted by PHYP
+	 * between these two statements.
+	 */
+	slb_shadow_update(ea, ssize, flags, index);
+
+	assert_slb_presence(false, ea);
+	asm volatile("slbmte  %0,%1" :
+		     : "r" (mk_vsid_data(ea, ssize, flags)),
+		       "r" (mk_esid_data(ea, ssize, index))
+		     : "memory" );
+}
+
+/*
+ * Insert bolted entries into SLB (which may not be empty, so don't clear
+ * slb_cache_ptr).
+ */
+void __slb_restore_bolted_realmode(void)
+{
+	struct slb_shadow *p = get_slb_shadow();
+	enum slb_index index;
+
+	 /* No isync needed because realmode. */
+	for (index = 0; index < SLB_NUM_BOLTED; index++) {
+		asm volatile("slbmte  %0,%1" :
+		     : "r" (be64_to_cpu(p->save_area[index].vsid)),
+		       "r" (be64_to_cpu(p->save_area[index].esid)));
+	}
+
+	assert_slb_presence(true, local_paca->kstack);
+}
+
+/*
+ * Insert the bolted entries into an empty SLB.
+ */
+void slb_restore_bolted_realmode(void)
+{
+	__slb_restore_bolted_realmode();
+	get_paca()->slb_cache_ptr = 0;
+
+	get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
+}
+
+/*
+ * This flushes all SLB entries including 0, so it must be realmode.
+ */
+void slb_flush_all_realmode(void)
+{
+	asm volatile("slbmte %0,%0; slbia" : : "r" (0));
+}
+
+static __always_inline void __slb_flush_and_restore_bolted(bool preserve_kernel_lookaside)
+{
+	struct slb_shadow *p = get_slb_shadow();
+	unsigned long ksp_esid_data, ksp_vsid_data;
+	u32 ih;
+
+	/*
+	 * SLBIA IH=1 on ISA v2.05 and newer processors may preserve lookaside
+	 * information created with Class=0 entries, which we use for kernel
+	 * SLB entries (the SLB entries themselves are still invalidated).
+	 *
+	 * Older processors will ignore this optimisation. Over-invalidation
+	 * is fine because we never rely on lookaside information existing.
+	 */
+	if (preserve_kernel_lookaside)
+		ih = 1;
+	else
+		ih = 0;
+
+	ksp_esid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
+	ksp_vsid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
+
+	asm volatile(PPC_SLBIA(%0)"	\n"
+		     "slbmte	%1, %2	\n"
+		     :: "i" (ih),
+			"r" (ksp_vsid_data),
+			"r" (ksp_esid_data)
+		     : "memory");
+}
+
+/*
+ * This flushes non-bolted entries, it can be run in virtual mode. Must
+ * be called with interrupts disabled.
+ */
+void slb_flush_and_restore_bolted(void)
+{
+	BUILD_BUG_ON(SLB_NUM_BOLTED != 2);
+
+	WARN_ON(!irqs_disabled());
+
+	/*
+	 * We can't take a PMU exception in the following code, so hard
+	 * disable interrupts.
+	 */
+	hard_irq_disable();
+
+	isync();
+	__slb_flush_and_restore_bolted(false);
+	isync();
+
+	assert_slb_presence(true, get_paca()->kstack);
+
+	get_paca()->slb_cache_ptr = 0;
+
+	get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
+}
+
+void slb_save_contents(struct slb_entry *slb_ptr)
+{
+	int i;
+	unsigned long e, v;
+
+	/* Save slb_cache_ptr value. */
+	get_paca()->slb_save_cache_ptr = get_paca()->slb_cache_ptr;
+
+	if (!slb_ptr)
+		return;
+
+	for (i = 0; i < mmu_slb_size; i++) {
+		asm volatile("slbmfee  %0,%1" : "=r" (e) : "r" (i));
+		asm volatile("slbmfev  %0,%1" : "=r" (v) : "r" (i));
+		slb_ptr->esid = e;
+		slb_ptr->vsid = v;
+		slb_ptr++;
+	}
+}
+
+void slb_dump_contents(struct slb_entry *slb_ptr)
+{
+	int i, n;
+	unsigned long e, v;
+	unsigned long llp;
+
+	if (!slb_ptr)
+		return;
+
+	pr_err("SLB contents of cpu 0x%x\n", smp_processor_id());
+
+	for (i = 0; i < mmu_slb_size; i++) {
+		e = slb_ptr->esid;
+		v = slb_ptr->vsid;
+		slb_ptr++;
+
+		if (!e && !v)
+			continue;
+
+		pr_err("%02d %016lx %016lx %s\n", i, e, v,
+				(e & SLB_ESID_V) ? "VALID" : "NOT VALID");
+
+		if (!(e & SLB_ESID_V))
+			continue;
+
+		llp = v & SLB_VSID_LLP;
+		if (v & SLB_VSID_B_1T) {
+			pr_err("     1T ESID=%9lx VSID=%13lx LLP:%3lx\n",
+			       GET_ESID_1T(e),
+			       (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T, llp);
+		} else {
+			pr_err("   256M ESID=%9lx VSID=%13lx LLP:%3lx\n",
+			       GET_ESID(e),
+			       (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT, llp);
+		}
+	}
+
+	if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
+		/* RR is not so useful as it's often not used for allocation */
+		pr_err("SLB RR allocator index %d\n", get_paca()->stab_rr);
+
+		/* Dump slb cache entires as well. */
+		pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr);
+		pr_err("Valid SLB cache entries:\n");
+		n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES);
+		for (i = 0; i < n; i++)
+			pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
+		pr_err("Rest of SLB cache entries:\n");
+		for (i = n; i < SLB_CACHE_ENTRIES; i++)
+			pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
+	}
+}
+
+void slb_vmalloc_update(void)
+{
+	/*
+	 * vmalloc is not bolted, so just have to flush non-bolted.
+	 */
+	slb_flush_and_restore_bolted();
+}
+
+static bool preload_hit(struct thread_info *ti, unsigned long esid)
+{
+	unsigned char i;
+
+	for (i = 0; i < ti->slb_preload_nr; i++) {
+		unsigned char idx;
+
+		idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
+		if (esid == ti->slb_preload_esid[idx])
+			return true;
+	}
+	return false;
+}
+
+static bool preload_add(struct thread_info *ti, unsigned long ea)
+{
+	unsigned char idx;
+	unsigned long esid;
+
+	if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
+		/* EAs are stored >> 28 so 256MB segments don't need clearing */
+		if (ea & ESID_MASK_1T)
+			ea &= ESID_MASK_1T;
+	}
+
+	esid = ea >> SID_SHIFT;
+
+	if (preload_hit(ti, esid))
+		return false;
+
+	idx = (ti->slb_preload_tail + ti->slb_preload_nr) % SLB_PRELOAD_NR;
+	ti->slb_preload_esid[idx] = esid;
+	if (ti->slb_preload_nr == SLB_PRELOAD_NR)
+		ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
+	else
+		ti->slb_preload_nr++;
+
+	return true;
+}
+
+static void preload_age(struct thread_info *ti)
+{
+	if (!ti->slb_preload_nr)
+		return;
+	ti->slb_preload_nr--;
+	ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
+}
+
+void slb_setup_new_exec(void)
+{
+	struct thread_info *ti = current_thread_info();
+	struct mm_struct *mm = current->mm;
+	unsigned long exec = 0x10000000;
+
+	WARN_ON(irqs_disabled());
+
+	/*
+	 * preload cache can only be used to determine whether a SLB
+	 * entry exists if it does not start to overflow.
+	 */
+	if (ti->slb_preload_nr + 2 > SLB_PRELOAD_NR)
+		return;
+
+	hard_irq_disable();
+
+	/*
+	 * We have no good place to clear the slb preload cache on exec,
+	 * flush_thread is about the earliest arch hook but that happens
+	 * after we switch to the mm and have already preloaded the SLBEs.
+	 *
+	 * For the most part that's probably okay to use entries from the
+	 * previous exec, they will age out if unused. It may turn out to
+	 * be an advantage to clear the cache before switching to it,
+	 * however.
+	 */
+
+	/*
+	 * preload some userspace segments into the SLB.
+	 * Almost all 32 and 64bit PowerPC executables are linked at
+	 * 0x10000000 so it makes sense to preload this segment.
+	 */
+	if (!is_kernel_addr(exec)) {
+		if (preload_add(ti, exec))
+			slb_allocate_user(mm, exec);
+	}
+
+	/* Libraries and mmaps. */
+	if (!is_kernel_addr(mm->mmap_base)) {
+		if (preload_add(ti, mm->mmap_base))
+			slb_allocate_user(mm, mm->mmap_base);
+	}
+
+	/* see switch_slb */
+	asm volatile("isync" : : : "memory");
+
+	local_irq_enable();
+}
+
+void preload_new_slb_context(unsigned long start, unsigned long sp)
+{
+	struct thread_info *ti = current_thread_info();
+	struct mm_struct *mm = current->mm;
+	unsigned long heap = mm->start_brk;
+
+	WARN_ON(irqs_disabled());
+
+	/* see above */
+	if (ti->slb_preload_nr + 3 > SLB_PRELOAD_NR)
+		return;
+
+	hard_irq_disable();
+
+	/* Userspace entry address. */
+	if (!is_kernel_addr(start)) {
+		if (preload_add(ti, start))
+			slb_allocate_user(mm, start);
+	}
+
+	/* Top of stack, grows down. */
+	if (!is_kernel_addr(sp)) {
+		if (preload_add(ti, sp))
+			slb_allocate_user(mm, sp);
+	}
+
+	/* Bottom of heap, grows up. */
+	if (heap && !is_kernel_addr(heap)) {
+		if (preload_add(ti, heap))
+			slb_allocate_user(mm, heap);
+	}
+
+	/* see switch_slb */
+	asm volatile("isync" : : : "memory");
+
+	local_irq_enable();
+}
+
+static void slb_cache_slbie_kernel(unsigned int index)
+{
+	unsigned long slbie_data = get_paca()->slb_cache[index];
+	unsigned long ksp = get_paca()->kstack;
+
+	slbie_data <<= SID_SHIFT;
+	slbie_data |= 0xc000000000000000ULL;
+	if ((ksp & slb_esid_mask(mmu_kernel_ssize)) == slbie_data)
+		return;
+	slbie_data |= mmu_kernel_ssize << SLBIE_SSIZE_SHIFT;
+
+	asm volatile("slbie %0" : : "r" (slbie_data));
+}
+
+static void slb_cache_slbie_user(unsigned int index)
+{
+	unsigned long slbie_data = get_paca()->slb_cache[index];
+
+	slbie_data <<= SID_SHIFT;
+	slbie_data |= user_segment_size(slbie_data) << SLBIE_SSIZE_SHIFT;
+	slbie_data |= SLBIE_C; /* user slbs have C=1 */
+
+	asm volatile("slbie %0" : : "r" (slbie_data));
+}
+
+/* Flush all user entries from the segment table of the current processor. */
+void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
+{
+	struct thread_info *ti = task_thread_info(tsk);
+	unsigned char i;
+
+	/*
+	 * We need interrupts hard-disabled here, not just soft-disabled,
+	 * so that a PMU interrupt can't occur, which might try to access
+	 * user memory (to get a stack trace) and possible cause an SLB miss
+	 * which would update the slb_cache/slb_cache_ptr fields in the PACA.
+	 */
+	hard_irq_disable();
+	isync();
+	if (stress_slb()) {
+		__slb_flush_and_restore_bolted(false);
+		isync();
+		get_paca()->slb_cache_ptr = 0;
+		get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+
+	} else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		/*
+		 * SLBIA IH=3 invalidates all Class=1 SLBEs and their
+		 * associated lookaside structures, which matches what
+		 * switch_slb wants. So ARCH_300 does not use the slb
+		 * cache.
+		 */
+		asm volatile(PPC_SLBIA(3));
+
+	} else {
+		unsigned long offset = get_paca()->slb_cache_ptr;
+
+		if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
+		    offset <= SLB_CACHE_ENTRIES) {
+			/*
+			 * Could assert_slb_presence(true) here, but
+			 * hypervisor or machine check could have come
+			 * in and removed the entry at this point.
+			 */
+
+			for (i = 0; i < offset; i++)
+				slb_cache_slbie_user(i);
+
+			/* Workaround POWER5 < DD2.1 issue */
+			if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
+				slb_cache_slbie_user(0);
+
+		} else {
+			/* Flush but retain kernel lookaside information */
+			__slb_flush_and_restore_bolted(true);
+			isync();
+
+			get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+		}
+
+		get_paca()->slb_cache_ptr = 0;
+	}
+	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
+
+	copy_mm_to_paca(mm);
+
+	/*
+	 * We gradually age out SLBs after a number of context switches to
+	 * reduce reload overhead of unused entries (like we do with FP/VEC
+	 * reload). Each time we wrap 256 switches, take an entry out of the
+	 * SLB preload cache.
+	 */
+	tsk->thread.load_slb++;
+	if (!tsk->thread.load_slb) {
+		unsigned long pc = KSTK_EIP(tsk);
+
+		preload_age(ti);
+		preload_add(ti, pc);
+	}
+
+	for (i = 0; i < ti->slb_preload_nr; i++) {
+		unsigned char idx;
+		unsigned long ea;
+
+		idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
+		ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT;
+
+		slb_allocate_user(mm, ea);
+	}
+
+	/*
+	 * Synchronize slbmte preloads with possible subsequent user memory
+	 * address accesses by the kernel (user mode won't happen until
+	 * rfid, which is safe).
+	 */
+	isync();
+}
+
+void slb_set_size(u16 size)
+{
+	mmu_slb_size = size;
+}
+
+void slb_initialize(void)
+{
+	unsigned long linear_llp, vmalloc_llp, io_llp;
+	unsigned long lflags;
+	static int slb_encoding_inited;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	unsigned long vmemmap_llp;
+#endif
+
+	/* Prepare our SLB miss handler based on our page size */
+	linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
+	io_llp = mmu_psize_defs[mmu_io_psize].sllp;
+	vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
+	get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	vmemmap_llp = mmu_psize_defs[mmu_vmemmap_psize].sllp;
+#endif
+	if (!slb_encoding_inited) {
+		slb_encoding_inited = 1;
+		pr_devel("SLB: linear  LLP = %04lx\n", linear_llp);
+		pr_devel("SLB: io      LLP = %04lx\n", io_llp);
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+		pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp);
+#endif
+	}
+
+	get_paca()->stab_rr = SLB_NUM_BOLTED - 1;
+	get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
+
+	lflags = SLB_VSID_KERNEL | linear_llp;
+
+	/* Invalidate the entire SLB (even entry 0) & all the ERATS */
+	asm volatile("isync":::"memory");
+	asm volatile("slbmte  %0,%0"::"r" (0) : "memory");
+	asm volatile("isync; slbia; isync":::"memory");
+	create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX);
+
+	/*
+	 * For the boot cpu, we're running on the stack in init_thread_union,
+	 * which is in the first segment of the linear mapping, and also
+	 * get_paca()->kstack hasn't been initialized yet.
+	 * For secondary cpus, we need to bolt the kernel stack entry now.
+	 */
+	slb_shadow_clear(KSTACK_INDEX);
+	if (raw_smp_processor_id() != boot_cpuid &&
+	    (get_paca()->kstack & slb_esid_mask(mmu_kernel_ssize)) > PAGE_OFFSET)
+		create_shadowed_slbe(get_paca()->kstack,
+				     mmu_kernel_ssize, lflags, KSTACK_INDEX);
+
+	asm volatile("isync":::"memory");
+}
+
+static void slb_cache_update(unsigned long esid_data)
+{
+	int slb_cache_index;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		return; /* ISAv3.0B and later does not use slb_cache */
+
+	if (stress_slb())
+		return;
+
+	/*
+	 * Now update slb cache entries
+	 */
+	slb_cache_index = local_paca->slb_cache_ptr;
+	if (slb_cache_index < SLB_CACHE_ENTRIES) {
+		/*
+		 * We have space in slb cache for optimized switch_slb().
+		 * Top 36 bits from esid_data as per ISA
+		 */
+		local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT;
+		local_paca->slb_cache_ptr++;
+	} else {
+		/*
+		 * Our cache is full and the current cache content strictly
+		 * doesn't indicate the active SLB contents. Bump the ptr
+		 * so that switch_slb() will ignore the cache.
+		 */
+		local_paca->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
+	}
+}
+
+static enum slb_index alloc_slb_index(bool kernel)
+{
+	enum slb_index index;
+
+	/*
+	 * The allocation bitmaps can become out of synch with the SLB
+	 * when the _switch code does slbie when bolting a new stack
+	 * segment and it must not be anywhere else in the SLB. This leaves
+	 * a kernel allocated entry that is unused in the SLB. With very
+	 * large systems or small segment sizes, the bitmaps could slowly
+	 * fill with these entries. They will eventually be cleared out
+	 * by the round robin allocator in that case, so it's probably not
+	 * worth accounting for.
+	 */
+
+	/*
+	 * SLBs beyond 32 entries are allocated with stab_rr only
+	 * POWER7/8/9 have 32 SLB entries, this could be expanded if a
+	 * future CPU has more.
+	 */
+	if (local_paca->slb_used_bitmap != U32_MAX) {
+		index = ffz(local_paca->slb_used_bitmap);
+		local_paca->slb_used_bitmap |= 1U << index;
+		if (kernel)
+			local_paca->slb_kern_bitmap |= 1U << index;
+	} else {
+		/* round-robin replacement of slb starting at SLB_NUM_BOLTED. */
+		index = local_paca->stab_rr;
+		if (index < (mmu_slb_size - 1))
+			index++;
+		else
+			index = SLB_NUM_BOLTED;
+		local_paca->stab_rr = index;
+		if (index < 32) {
+			if (kernel)
+				local_paca->slb_kern_bitmap |= 1U << index;
+			else
+				local_paca->slb_kern_bitmap &= ~(1U << index);
+		}
+	}
+	BUG_ON(index < SLB_NUM_BOLTED);
+
+	return index;
+}
+
+static long slb_insert_entry(unsigned long ea, unsigned long context,
+				unsigned long flags, int ssize, bool kernel)
+{
+	unsigned long vsid;
+	unsigned long vsid_data, esid_data;
+	enum slb_index index;
+
+	vsid = get_vsid(context, ea, ssize);
+	if (!vsid)
+		return -EFAULT;
+
+	/*
+	 * There must not be a kernel SLB fault in alloc_slb_index or before
+	 * slbmte here or the allocation bitmaps could get out of whack with
+	 * the SLB.
+	 *
+	 * User SLB faults or preloads take this path which might get inlined
+	 * into the caller, so add compiler barriers here to ensure unsafe
+	 * memory accesses do not come between.
+	 */
+	barrier();
+
+	index = alloc_slb_index(kernel);
+
+	vsid_data = __mk_vsid_data(vsid, ssize, flags);
+	esid_data = mk_esid_data(ea, ssize, index);
+
+	/*
+	 * No need for an isync before or after this slbmte. The exception
+	 * we enter with and the rfid we exit with are context synchronizing.
+	 * User preloads should add isync afterwards in case the kernel
+	 * accesses user memory before it returns to userspace with rfid.
+	 */
+	assert_slb_presence(false, ea);
+	if (stress_slb()) {
+		int slb_cache_index = local_paca->slb_cache_ptr;
+
+		/*
+		 * stress_slb() does not use slb cache, repurpose as a
+		 * cache of inserted (non-bolted) kernel SLB entries. All
+		 * non-bolted kernel entries are flushed on any user fault,
+		 * or if there are already 3 non-boled kernel entries.
+		 */
+		BUILD_BUG_ON(SLB_CACHE_ENTRIES < 3);
+		if (!kernel || slb_cache_index == 3) {
+			int i;
+
+			for (i = 0; i < slb_cache_index; i++)
+				slb_cache_slbie_kernel(i);
+			slb_cache_index = 0;
+		}
+
+		if (kernel)
+			local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT;
+		local_paca->slb_cache_ptr = slb_cache_index;
+	}
+	asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
+
+	barrier();
+
+	if (!kernel)
+		slb_cache_update(esid_data);
+
+	return 0;
+}
+
+static long slb_allocate_kernel(unsigned long ea, unsigned long id)
+{
+	unsigned long context;
+	unsigned long flags;
+	int ssize;
+
+	if (id == LINEAR_MAP_REGION_ID) {
+
+		/* We only support upto H_MAX_PHYSMEM_BITS */
+		if ((ea & EA_MASK) > (1UL << H_MAX_PHYSMEM_BITS))
+			return -EFAULT;
+
+		flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	} else if (id == VMEMMAP_REGION_ID) {
+
+		if (ea >= H_VMEMMAP_END)
+			return -EFAULT;
+
+		flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
+#endif
+	} else if (id == VMALLOC_REGION_ID) {
+
+		if (ea >= H_VMALLOC_END)
+			return -EFAULT;
+
+		flags = local_paca->vmalloc_sllp;
+
+	} else if (id == IO_REGION_ID) {
+
+		if (ea >= H_KERN_IO_END)
+			return -EFAULT;
+
+		flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
+
+	} else {
+		return -EFAULT;
+	}
+
+	ssize = MMU_SEGSIZE_1T;
+	if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
+		ssize = MMU_SEGSIZE_256M;
+
+	context = get_kernel_context(ea);
+
+	return slb_insert_entry(ea, context, flags, ssize, true);
+}
+
+static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
+{
+	unsigned long context;
+	unsigned long flags;
+	int bpsize;
+	int ssize;
+
+	/*
+	 * consider this as bad access if we take a SLB miss
+	 * on an address above addr limit.
+	 */
+	if (ea >= mm_ctx_slb_addr_limit(&mm->context))
+		return -EFAULT;
+
+	context = get_user_context(&mm->context, ea);
+	if (!context)
+		return -EFAULT;
+
+	if (unlikely(ea >= H_PGTABLE_RANGE)) {
+		WARN_ON(1);
+		return -EFAULT;
+	}
+
+	ssize = user_segment_size(ea);
+
+	bpsize = get_slice_psize(mm, ea);
+	flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
+
+	return slb_insert_entry(ea, context, flags, ssize, false);
+}
+
+DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)
+{
+	unsigned long ea = regs->dar;
+	unsigned long id = get_region_id(ea);
+
+	/* IRQs are not reconciled here, so can't check irqs_disabled */
+	VM_WARN_ON(mfmsr() & MSR_EE);
+
+	if (regs_is_unrecoverable(regs))
+		return -EINVAL;
+
+	/*
+	 * SLB kernel faults must be very careful not to touch anything that is
+	 * not bolted. E.g., PACA and global variables are okay, mm->context
+	 * stuff is not. SLB user faults may access all of memory (and induce
+	 * one recursive SLB kernel fault), so the kernel fault must not
+	 * trample on the user fault state at those points.
+	 */
+
+	/*
+	 * This is a raw interrupt handler, for performance, so that
+	 * fast_interrupt_return can be used. The handler must not touch local
+	 * irq state, or schedule. We could test for usermode and upgrade to a
+	 * normal process context (synchronous) interrupt for those, which
+	 * would make them first-class kernel code and able to be traced and
+	 * instrumented, although performance would suffer a bit, it would
+	 * probably be a good tradeoff.
+	 */
+	if (id >= LINEAR_MAP_REGION_ID) {
+		long err;
+#ifdef CONFIG_DEBUG_VM
+		/* Catch recursive kernel SLB faults. */
+		BUG_ON(local_paca->in_kernel_slb_handler);
+		local_paca->in_kernel_slb_handler = 1;
+#endif
+		err = slb_allocate_kernel(ea, id);
+#ifdef CONFIG_DEBUG_VM
+		local_paca->in_kernel_slb_handler = 0;
+#endif
+		return err;
+	} else {
+		struct mm_struct *mm = current->mm;
+		long err;
+
+		if (unlikely(!mm))
+			return -EFAULT;
+
+		err = slb_allocate_user(mm, ea);
+		if (!err)
+			preload_add(current_thread_info(), ea);
+
+		return err;
+	}
+}
diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c
new file mode 100644
index 0000000000..c0b58afb9a
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/slice.c
@@ -0,0 +1,807 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * address space "slices" (meta-segments) support
+ *
+ * Copyright (C) 2007 Benjamin Herrenschmidt, IBM Corporation.
+ *
+ * Based on hugetlb implementation
+ *
+ * Copyright (C) 2003 David Gibson, IBM Corporation.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/export.h>
+#include <linux/hugetlb.h>
+#include <linux/sched/mm.h>
+#include <linux/security.h>
+#include <asm/mman.h>
+#include <asm/mmu.h>
+#include <asm/copro.h>
+#include <asm/hugetlb.h>
+#include <asm/mmu_context.h>
+
+static DEFINE_SPINLOCK(slice_convert_lock);
+
+#ifdef DEBUG
+int _slice_debug = 1;
+
+static void slice_print_mask(const char *label, const struct slice_mask *mask)
+{
+	if (!_slice_debug)
+		return;
+	pr_devel("%s low_slice: %*pbl\n", label,
+			(int)SLICE_NUM_LOW, &mask->low_slices);
+	pr_devel("%s high_slice: %*pbl\n", label,
+			(int)SLICE_NUM_HIGH, mask->high_slices);
+}
+
+#define slice_dbg(fmt...) do { if (_slice_debug) pr_devel(fmt); } while (0)
+
+#else
+
+static void slice_print_mask(const char *label, const struct slice_mask *mask) {}
+#define slice_dbg(fmt...)
+
+#endif
+
+static inline notrace bool slice_addr_is_low(unsigned long addr)
+{
+	u64 tmp = (u64)addr;
+
+	return tmp < SLICE_LOW_TOP;
+}
+
+static void slice_range_to_mask(unsigned long start, unsigned long len,
+				struct slice_mask *ret)
+{
+	unsigned long end = start + len - 1;
+
+	ret->low_slices = 0;
+	if (SLICE_NUM_HIGH)
+		bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+
+	if (slice_addr_is_low(start)) {
+		unsigned long mend = min(end,
+					 (unsigned long)(SLICE_LOW_TOP - 1));
+
+		ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
+			- (1u << GET_LOW_SLICE_INDEX(start));
+	}
+
+	if (SLICE_NUM_HIGH && !slice_addr_is_low(end)) {
+		unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
+		unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
+		unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
+
+		bitmap_set(ret->high_slices, start_index, count);
+	}
+}
+
+static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
+			      unsigned long len)
+{
+	struct vm_area_struct *vma;
+
+	if ((mm_ctx_slb_addr_limit(&mm->context) - len) < addr)
+		return 0;
+	vma = find_vma(mm, addr);
+	return (!vma || (addr + len) <= vm_start_gap(vma));
+}
+
+static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
+{
+	return !slice_area_is_free(mm, slice << SLICE_LOW_SHIFT,
+				   1ul << SLICE_LOW_SHIFT);
+}
+
+static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
+{
+	unsigned long start = slice << SLICE_HIGH_SHIFT;
+	unsigned long end = start + (1ul << SLICE_HIGH_SHIFT);
+
+	/* Hack, so that each addresses is controlled by exactly one
+	 * of the high or low area bitmaps, the first high area starts
+	 * at 4GB, not 0 */
+	if (start == 0)
+		start = (unsigned long)SLICE_LOW_TOP;
+
+	return !slice_area_is_free(mm, start, end - start);
+}
+
+static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
+				unsigned long high_limit)
+{
+	unsigned long i;
+
+	ret->low_slices = 0;
+	if (SLICE_NUM_HIGH)
+		bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+
+	for (i = 0; i < SLICE_NUM_LOW; i++)
+		if (!slice_low_has_vma(mm, i))
+			ret->low_slices |= 1u << i;
+
+	if (slice_addr_is_low(high_limit - 1))
+		return;
+
+	for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++)
+		if (!slice_high_has_vma(mm, i))
+			__set_bit(i, ret->high_slices);
+}
+
+static bool slice_check_range_fits(struct mm_struct *mm,
+			   const struct slice_mask *available,
+			   unsigned long start, unsigned long len)
+{
+	unsigned long end = start + len - 1;
+	u64 low_slices = 0;
+
+	if (slice_addr_is_low(start)) {
+		unsigned long mend = min(end,
+					 (unsigned long)(SLICE_LOW_TOP - 1));
+
+		low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
+				- (1u << GET_LOW_SLICE_INDEX(start));
+	}
+	if ((low_slices & available->low_slices) != low_slices)
+		return false;
+
+	if (SLICE_NUM_HIGH && !slice_addr_is_low(end)) {
+		unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
+		unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
+		unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
+		unsigned long i;
+
+		for (i = start_index; i < start_index + count; i++) {
+			if (!test_bit(i, available->high_slices))
+				return false;
+		}
+	}
+
+	return true;
+}
+
+static void slice_flush_segments(void *parm)
+{
+#ifdef CONFIG_PPC64
+	struct mm_struct *mm = parm;
+	unsigned long flags;
+
+	if (mm != current->active_mm)
+		return;
+
+	copy_mm_to_paca(current->active_mm);
+
+	local_irq_save(flags);
+	slb_flush_and_restore_bolted();
+	local_irq_restore(flags);
+#endif
+}
+
+static void slice_convert(struct mm_struct *mm,
+				const struct slice_mask *mask, int psize)
+{
+	int index, mask_index;
+	/* Write the new slice psize bits */
+	unsigned char *hpsizes, *lpsizes;
+	struct slice_mask *psize_mask, *old_mask;
+	unsigned long i, flags;
+	int old_psize;
+
+	slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
+	slice_print_mask(" mask", mask);
+
+	psize_mask = slice_mask_for_size(&mm->context, psize);
+
+	/* We need to use a spinlock here to protect against
+	 * concurrent 64k -> 4k demotion ...
+	 */
+	spin_lock_irqsave(&slice_convert_lock, flags);
+
+	lpsizes = mm_ctx_low_slices(&mm->context);
+	for (i = 0; i < SLICE_NUM_LOW; i++) {
+		if (!(mask->low_slices & (1u << i)))
+			continue;
+
+		mask_index = i & 0x1;
+		index = i >> 1;
+
+		/* Update the slice_mask */
+		old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf;
+		old_mask = slice_mask_for_size(&mm->context, old_psize);
+		old_mask->low_slices &= ~(1u << i);
+		psize_mask->low_slices |= 1u << i;
+
+		/* Update the sizes array */
+		lpsizes[index] = (lpsizes[index] & ~(0xf << (mask_index * 4))) |
+				(((unsigned long)psize) << (mask_index * 4));
+	}
+
+	hpsizes = mm_ctx_high_slices(&mm->context);
+	for (i = 0; i < GET_HIGH_SLICE_INDEX(mm_ctx_slb_addr_limit(&mm->context)); i++) {
+		if (!test_bit(i, mask->high_slices))
+			continue;
+
+		mask_index = i & 0x1;
+		index = i >> 1;
+
+		/* Update the slice_mask */
+		old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf;
+		old_mask = slice_mask_for_size(&mm->context, old_psize);
+		__clear_bit(i, old_mask->high_slices);
+		__set_bit(i, psize_mask->high_slices);
+
+		/* Update the sizes array */
+		hpsizes[index] = (hpsizes[index] & ~(0xf << (mask_index * 4))) |
+				(((unsigned long)psize) << (mask_index * 4));
+	}
+
+	slice_dbg(" lsps=%lx, hsps=%lx\n",
+		  (unsigned long)mm_ctx_low_slices(&mm->context),
+		  (unsigned long)mm_ctx_high_slices(&mm->context));
+
+	spin_unlock_irqrestore(&slice_convert_lock, flags);
+
+	copro_flush_all_slbs(mm);
+}
+
+/*
+ * Compute which slice addr is part of;
+ * set *boundary_addr to the start or end boundary of that slice
+ * (depending on 'end' parameter);
+ * return boolean indicating if the slice is marked as available in the
+ * 'available' slice_mark.
+ */
+static bool slice_scan_available(unsigned long addr,
+				 const struct slice_mask *available,
+				 int end, unsigned long *boundary_addr)
+{
+	unsigned long slice;
+	if (slice_addr_is_low(addr)) {
+		slice = GET_LOW_SLICE_INDEX(addr);
+		*boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
+		return !!(available->low_slices & (1u << slice));
+	} else {
+		slice = GET_HIGH_SLICE_INDEX(addr);
+		*boundary_addr = (slice + end) ?
+			((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
+		return !!test_bit(slice, available->high_slices);
+	}
+}
+
+static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
+					      unsigned long addr, unsigned long len,
+					      const struct slice_mask *available,
+					      int psize, unsigned long high_limit)
+{
+	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+	unsigned long found, next_end;
+	struct vm_unmapped_area_info info;
+
+	info.flags = 0;
+	info.length = len;
+	info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
+	info.align_offset = 0;
+	/*
+	 * Check till the allow max value for this mmap request
+	 */
+	while (addr < high_limit) {
+		info.low_limit = addr;
+		if (!slice_scan_available(addr, available, 1, &addr))
+			continue;
+
+ next_slice:
+		/*
+		 * At this point [info.low_limit; addr) covers
+		 * available slices only and ends at a slice boundary.
+		 * Check if we need to reduce the range, or if we can
+		 * extend it to cover the next available slice.
+		 */
+		if (addr >= high_limit)
+			addr = high_limit;
+		else if (slice_scan_available(addr, available, 1, &next_end)) {
+			addr = next_end;
+			goto next_slice;
+		}
+		info.high_limit = addr;
+
+		found = vm_unmapped_area(&info);
+		if (!(found & ~PAGE_MASK))
+			return found;
+	}
+
+	return -ENOMEM;
+}
+
+static unsigned long slice_find_area_topdown(struct mm_struct *mm,
+					     unsigned long addr, unsigned long len,
+					     const struct slice_mask *available,
+					     int psize, unsigned long high_limit)
+{
+	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+	unsigned long found, prev;
+	struct vm_unmapped_area_info info;
+	unsigned long min_addr = max(PAGE_SIZE, mmap_min_addr);
+
+	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+	info.length = len;
+	info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
+	info.align_offset = 0;
+	/*
+	 * If we are trying to allocate above DEFAULT_MAP_WINDOW
+	 * Add the different to the mmap_base.
+	 * Only for that request for which high_limit is above
+	 * DEFAULT_MAP_WINDOW we should apply this.
+	 */
+	if (high_limit > DEFAULT_MAP_WINDOW)
+		addr += mm_ctx_slb_addr_limit(&mm->context) - DEFAULT_MAP_WINDOW;
+
+	while (addr > min_addr) {
+		info.high_limit = addr;
+		if (!slice_scan_available(addr - 1, available, 0, &addr))
+			continue;
+
+ prev_slice:
+		/*
+		 * At this point [addr; info.high_limit) covers
+		 * available slices only and starts at a slice boundary.
+		 * Check if we need to reduce the range, or if we can
+		 * extend it to cover the previous available slice.
+		 */
+		if (addr < min_addr)
+			addr = min_addr;
+		else if (slice_scan_available(addr - 1, available, 0, &prev)) {
+			addr = prev;
+			goto prev_slice;
+		}
+		info.low_limit = addr;
+
+		found = vm_unmapped_area(&info);
+		if (!(found & ~PAGE_MASK))
+			return found;
+	}
+
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	return slice_find_area_bottomup(mm, TASK_UNMAPPED_BASE, len, available, psize, high_limit);
+}
+
+
+static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
+				     const struct slice_mask *mask, int psize,
+				     int topdown, unsigned long high_limit)
+{
+	if (topdown)
+		return slice_find_area_topdown(mm, mm->mmap_base, len, mask, psize, high_limit);
+	else
+		return slice_find_area_bottomup(mm, mm->mmap_base, len, mask, psize, high_limit);
+}
+
+static inline void slice_copy_mask(struct slice_mask *dst,
+					const struct slice_mask *src)
+{
+	dst->low_slices = src->low_slices;
+	if (!SLICE_NUM_HIGH)
+		return;
+	bitmap_copy(dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
+}
+
+static inline void slice_or_mask(struct slice_mask *dst,
+					const struct slice_mask *src1,
+					const struct slice_mask *src2)
+{
+	dst->low_slices = src1->low_slices | src2->low_slices;
+	if (!SLICE_NUM_HIGH)
+		return;
+	bitmap_or(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH);
+}
+
+static inline void slice_andnot_mask(struct slice_mask *dst,
+					const struct slice_mask *src1,
+					const struct slice_mask *src2)
+{
+	dst->low_slices = src1->low_slices & ~src2->low_slices;
+	if (!SLICE_NUM_HIGH)
+		return;
+	bitmap_andnot(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH);
+}
+
+#ifdef CONFIG_PPC_64K_PAGES
+#define MMU_PAGE_BASE	MMU_PAGE_64K
+#else
+#define MMU_PAGE_BASE	MMU_PAGE_4K
+#endif
+
+unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
+				      unsigned long flags, unsigned int psize,
+				      int topdown)
+{
+	struct slice_mask good_mask;
+	struct slice_mask potential_mask;
+	const struct slice_mask *maskp;
+	const struct slice_mask *compat_maskp = NULL;
+	int fixed = (flags & MAP_FIXED);
+	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+	unsigned long page_size = 1UL << pshift;
+	struct mm_struct *mm = current->mm;
+	unsigned long newaddr;
+	unsigned long high_limit;
+
+	high_limit = DEFAULT_MAP_WINDOW;
+	if (addr >= high_limit || (fixed && (addr + len > high_limit)))
+		high_limit = TASK_SIZE;
+
+	if (len > high_limit)
+		return -ENOMEM;
+	if (len & (page_size - 1))
+		return -EINVAL;
+	if (fixed) {
+		if (addr & (page_size - 1))
+			return -EINVAL;
+		if (addr > high_limit - len)
+			return -ENOMEM;
+	}
+
+	if (high_limit > mm_ctx_slb_addr_limit(&mm->context)) {
+		/*
+		 * Increasing the slb_addr_limit does not require
+		 * slice mask cache to be recalculated because it should
+		 * be already initialised beyond the old address limit.
+		 */
+		mm_ctx_set_slb_addr_limit(&mm->context, high_limit);
+
+		on_each_cpu(slice_flush_segments, mm, 1);
+	}
+
+	/* Sanity checks */
+	BUG_ON(mm->task_size == 0);
+	BUG_ON(mm_ctx_slb_addr_limit(&mm->context) == 0);
+	VM_BUG_ON(radix_enabled());
+
+	slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
+	slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d\n",
+		  addr, len, flags, topdown);
+
+	/* If hint, make sure it matches our alignment restrictions */
+	if (!fixed && addr) {
+		addr = ALIGN(addr, page_size);
+		slice_dbg(" aligned addr=%lx\n", addr);
+		/* Ignore hint if it's too large or overlaps a VMA */
+		if (addr > high_limit - len || addr < mmap_min_addr ||
+		    !slice_area_is_free(mm, addr, len))
+			addr = 0;
+	}
+
+	/* First make up a "good" mask of slices that have the right size
+	 * already
+	 */
+	maskp = slice_mask_for_size(&mm->context, psize);
+
+	/*
+	 * Here "good" means slices that are already the right page size,
+	 * "compat" means slices that have a compatible page size (i.e.
+	 * 4k in a 64k pagesize kernel), and "free" means slices without
+	 * any VMAs.
+	 *
+	 * If MAP_FIXED:
+	 *	check if fits in good | compat => OK
+	 *	check if fits in good | compat | free => convert free
+	 *	else bad
+	 * If have hint:
+	 *	check if hint fits in good => OK
+	 *	check if hint fits in good | free => convert free
+	 * Otherwise:
+	 *	search in good, found => OK
+	 *	search in good | free, found => convert free
+	 *	search in good | compat | free, found => convert free.
+	 */
+
+	/*
+	 * If we support combo pages, we can allow 64k pages in 4k slices
+	 * The mask copies could be avoided in most cases here if we had
+	 * a pointer to good mask for the next code to use.
+	 */
+	if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) {
+		compat_maskp = slice_mask_for_size(&mm->context, MMU_PAGE_4K);
+		if (fixed)
+			slice_or_mask(&good_mask, maskp, compat_maskp);
+		else
+			slice_copy_mask(&good_mask, maskp);
+	} else {
+		slice_copy_mask(&good_mask, maskp);
+	}
+
+	slice_print_mask(" good_mask", &good_mask);
+	if (compat_maskp)
+		slice_print_mask(" compat_mask", compat_maskp);
+
+	/* First check hint if it's valid or if we have MAP_FIXED */
+	if (addr != 0 || fixed) {
+		/* Check if we fit in the good mask. If we do, we just return,
+		 * nothing else to do
+		 */
+		if (slice_check_range_fits(mm, &good_mask, addr, len)) {
+			slice_dbg(" fits good !\n");
+			newaddr = addr;
+			goto return_addr;
+		}
+	} else {
+		/* Now let's see if we can find something in the existing
+		 * slices for that size
+		 */
+		newaddr = slice_find_area(mm, len, &good_mask,
+					  psize, topdown, high_limit);
+		if (newaddr != -ENOMEM) {
+			/* Found within the good mask, we don't have to setup,
+			 * we thus return directly
+			 */
+			slice_dbg(" found area at 0x%lx\n", newaddr);
+			goto return_addr;
+		}
+	}
+	/*
+	 * We don't fit in the good mask, check what other slices are
+	 * empty and thus can be converted
+	 */
+	slice_mask_for_free(mm, &potential_mask, high_limit);
+	slice_or_mask(&potential_mask, &potential_mask, &good_mask);
+	slice_print_mask(" potential", &potential_mask);
+
+	if (addr != 0 || fixed) {
+		if (slice_check_range_fits(mm, &potential_mask, addr, len)) {
+			slice_dbg(" fits potential !\n");
+			newaddr = addr;
+			goto convert;
+		}
+	}
+
+	/* If we have MAP_FIXED and failed the above steps, then error out */
+	if (fixed)
+		return -EBUSY;
+
+	slice_dbg(" search...\n");
+
+	/* If we had a hint that didn't work out, see if we can fit
+	 * anywhere in the good area.
+	 */
+	if (addr) {
+		newaddr = slice_find_area(mm, len, &good_mask,
+					  psize, topdown, high_limit);
+		if (newaddr != -ENOMEM) {
+			slice_dbg(" found area at 0x%lx\n", newaddr);
+			goto return_addr;
+		}
+	}
+
+	/* Now let's see if we can find something in the existing slices
+	 * for that size plus free slices
+	 */
+	newaddr = slice_find_area(mm, len, &potential_mask,
+				  psize, topdown, high_limit);
+
+	if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && newaddr == -ENOMEM &&
+	    psize == MMU_PAGE_64K) {
+		/* retry the search with 4k-page slices included */
+		slice_or_mask(&potential_mask, &potential_mask, compat_maskp);
+		newaddr = slice_find_area(mm, len, &potential_mask,
+					  psize, topdown, high_limit);
+	}
+
+	if (newaddr == -ENOMEM)
+		return -ENOMEM;
+
+	slice_range_to_mask(newaddr, len, &potential_mask);
+	slice_dbg(" found potential area at 0x%lx\n", newaddr);
+	slice_print_mask(" mask", &potential_mask);
+
+ convert:
+	/*
+	 * Try to allocate the context before we do slice convert
+	 * so that we handle the context allocation failure gracefully.
+	 */
+	if (need_extra_context(mm, newaddr)) {
+		if (alloc_extended_context(mm, newaddr) < 0)
+			return -ENOMEM;
+	}
+
+	slice_andnot_mask(&potential_mask, &potential_mask, &good_mask);
+	if (compat_maskp && !fixed)
+		slice_andnot_mask(&potential_mask, &potential_mask, compat_maskp);
+	if (potential_mask.low_slices ||
+		(SLICE_NUM_HIGH &&
+		 !bitmap_empty(potential_mask.high_slices, SLICE_NUM_HIGH))) {
+		slice_convert(mm, &potential_mask, psize);
+		if (psize > MMU_PAGE_BASE)
+			on_each_cpu(slice_flush_segments, mm, 1);
+	}
+	return newaddr;
+
+return_addr:
+	if (need_extra_context(mm, newaddr)) {
+		if (alloc_extended_context(mm, newaddr) < 0)
+			return -ENOMEM;
+	}
+	return newaddr;
+}
+EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
+
+unsigned long arch_get_unmapped_area(struct file *filp,
+				     unsigned long addr,
+				     unsigned long len,
+				     unsigned long pgoff,
+				     unsigned long flags)
+{
+	if (radix_enabled())
+		return generic_get_unmapped_area(filp, addr, len, pgoff, flags);
+
+	return slice_get_unmapped_area(addr, len, flags,
+				       mm_ctx_user_psize(&current->mm->context), 0);
+}
+
+unsigned long arch_get_unmapped_area_topdown(struct file *filp,
+					     const unsigned long addr0,
+					     const unsigned long len,
+					     const unsigned long pgoff,
+					     const unsigned long flags)
+{
+	if (radix_enabled())
+		return generic_get_unmapped_area_topdown(filp, addr0, len, pgoff, flags);
+
+	return slice_get_unmapped_area(addr0, len, flags,
+				       mm_ctx_user_psize(&current->mm->context), 1);
+}
+
+unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr)
+{
+	unsigned char *psizes;
+	int index, mask_index;
+
+	VM_BUG_ON(radix_enabled());
+
+	if (slice_addr_is_low(addr)) {
+		psizes = mm_ctx_low_slices(&mm->context);
+		index = GET_LOW_SLICE_INDEX(addr);
+	} else {
+		psizes = mm_ctx_high_slices(&mm->context);
+		index = GET_HIGH_SLICE_INDEX(addr);
+	}
+	mask_index = index & 0x1;
+	return (psizes[index >> 1] >> (mask_index * 4)) & 0xf;
+}
+EXPORT_SYMBOL_GPL(get_slice_psize);
+
+void slice_init_new_context_exec(struct mm_struct *mm)
+{
+	unsigned char *hpsizes, *lpsizes;
+	struct slice_mask *mask;
+	unsigned int psize = mmu_virtual_psize;
+
+	slice_dbg("slice_init_new_context_exec(mm=%p)\n", mm);
+
+	/*
+	 * In the case of exec, use the default limit. In the
+	 * case of fork it is just inherited from the mm being
+	 * duplicated.
+	 */
+	mm_ctx_set_slb_addr_limit(&mm->context, SLB_ADDR_LIMIT_DEFAULT);
+	mm_ctx_set_user_psize(&mm->context, psize);
+
+	/*
+	 * Set all slice psizes to the default.
+	 */
+	lpsizes = mm_ctx_low_slices(&mm->context);
+	memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1);
+
+	hpsizes = mm_ctx_high_slices(&mm->context);
+	memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1);
+
+	/*
+	 * Slice mask cache starts zeroed, fill the default size cache.
+	 */
+	mask = slice_mask_for_size(&mm->context, psize);
+	mask->low_slices = ~0UL;
+	if (SLICE_NUM_HIGH)
+		bitmap_fill(mask->high_slices, SLICE_NUM_HIGH);
+}
+
+void slice_setup_new_exec(void)
+{
+	struct mm_struct *mm = current->mm;
+
+	slice_dbg("slice_setup_new_exec(mm=%p)\n", mm);
+
+	if (!is_32bit_task())
+		return;
+
+	mm_ctx_set_slb_addr_limit(&mm->context, DEFAULT_MAP_WINDOW);
+}
+
+void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
+			   unsigned long len, unsigned int psize)
+{
+	struct slice_mask mask;
+
+	VM_BUG_ON(radix_enabled());
+
+	slice_range_to_mask(start, len, &mask);
+	slice_convert(mm, &mask, psize);
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * is_hugepage_only_range() is used by generic code to verify whether
+ * a normal mmap mapping (non hugetlbfs) is valid on a given area.
+ *
+ * until the generic code provides a more generic hook and/or starts
+ * calling arch get_unmapped_area for MAP_FIXED (which our implementation
+ * here knows how to deal with), we hijack it to keep standard mappings
+ * away from us.
+ *
+ * because of that generic code limitation, MAP_FIXED mapping cannot
+ * "convert" back a slice with no VMAs to the standard page size, only
+ * get_unmapped_area() can. It would be possible to fix it here but I
+ * prefer working on fixing the generic code instead.
+ *
+ * WARNING: This will not work if hugetlbfs isn't enabled since the
+ * generic code will redefine that function as 0 in that. This is ok
+ * for now as we only use slices with hugetlbfs enabled. This should
+ * be fixed as the generic code gets fixed.
+ */
+int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
+			   unsigned long len)
+{
+	const struct slice_mask *maskp;
+	unsigned int psize = mm_ctx_user_psize(&mm->context);
+
+	VM_BUG_ON(radix_enabled());
+
+	maskp = slice_mask_for_size(&mm->context, psize);
+
+	/* We need to account for 4k slices too */
+	if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) {
+		const struct slice_mask *compat_maskp;
+		struct slice_mask available;
+
+		compat_maskp = slice_mask_for_size(&mm->context, MMU_PAGE_4K);
+		slice_or_mask(&available, maskp, compat_maskp);
+		return !slice_check_range_fits(mm, &available, addr, len);
+	}
+
+	return !slice_check_range_fits(mm, maskp, addr, len);
+}
+
+unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
+{
+	/* With radix we don't use slice, so derive it from vma*/
+	if (radix_enabled())
+		return vma_kernel_pagesize(vma);
+
+	return 1UL << mmu_psize_to_shift(get_slice_psize(vma->vm_mm, vma->vm_start));
+}
+
+static int file_to_psize(struct file *file)
+{
+	struct hstate *hstate = hstate_file(file);
+	return shift_to_mmu_psize(huge_page_shift(hstate));
+}
+
+unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+					unsigned long len, unsigned long pgoff,
+					unsigned long flags)
+{
+	if (radix_enabled())
+		return generic_hugetlb_get_unmapped_area(file, addr, len, pgoff, flags);
+
+	return slice_get_unmapped_area(addr, len, flags, file_to_psize(file), 1);
+}
+#endif
diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c
new file mode 100644
index 0000000000..ec98e52616
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/subpage_prot.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2007-2008 Paul Mackerras, IBM Corp.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/types.h>
+#include <linux/pagewalk.h>
+#include <linux/hugetlb.h>
+#include <linux/syscalls.h>
+
+#include <linux/pgtable.h>
+#include <linux/uaccess.h>
+
+/*
+ * Free all pages allocated for subpage protection maps and pointers.
+ * Also makes sure that the subpage_prot_table structure is
+ * reinitialized for the next user.
+ */
+void subpage_prot_free(struct mm_struct *mm)
+{
+	struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
+	unsigned long i, j, addr;
+	u32 **p;
+
+	if (!spt)
+		return;
+
+	for (i = 0; i < 4; ++i) {
+		if (spt->low_prot[i]) {
+			free_page((unsigned long)spt->low_prot[i]);
+			spt->low_prot[i] = NULL;
+		}
+	}
+	addr = 0;
+	for (i = 0; i < (TASK_SIZE_USER64 >> 43); ++i) {
+		p = spt->protptrs[i];
+		if (!p)
+			continue;
+		spt->protptrs[i] = NULL;
+		for (j = 0; j < SBP_L2_COUNT && addr < spt->maxaddr;
+		     ++j, addr += PAGE_SIZE)
+			if (p[j])
+				free_page((unsigned long)p[j]);
+		free_page((unsigned long)p);
+	}
+	spt->maxaddr = 0;
+	kfree(spt);
+}
+
+static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
+			     int npages)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	pgd = pgd_offset(mm, addr);
+	p4d = p4d_offset(pgd, addr);
+	if (p4d_none(*p4d))
+		return;
+	pud = pud_offset(p4d, addr);
+	if (pud_none(*pud))
+		return;
+	pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd))
+		return;
+	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	if (!pte)
+		return;
+	arch_enter_lazy_mmu_mode();
+	for (; npages > 0; --npages) {
+		pte_update(mm, addr, pte, 0, 0, 0);
+		addr += PAGE_SIZE;
+		++pte;
+	}
+	arch_leave_lazy_mmu_mode();
+	pte_unmap_unlock(pte - 1, ptl);
+}
+
+/*
+ * Clear the subpage protection map for an address range, allowing
+ * all accesses that are allowed by the pte permissions.
+ */
+static void subpage_prot_clear(unsigned long addr, unsigned long len)
+{
+	struct mm_struct *mm = current->mm;
+	struct subpage_prot_table *spt;
+	u32 **spm, *spp;
+	unsigned long i;
+	size_t nw;
+	unsigned long next, limit;
+
+	mmap_write_lock(mm);
+
+	spt = mm_ctx_subpage_prot(&mm->context);
+	if (!spt)
+		goto err_out;
+
+	limit = addr + len;
+	if (limit > spt->maxaddr)
+		limit = spt->maxaddr;
+	for (; addr < limit; addr = next) {
+		next = pmd_addr_end(addr, limit);
+		if (addr < 0x100000000UL) {
+			spm = spt->low_prot;
+		} else {
+			spm = spt->protptrs[addr >> SBP_L3_SHIFT];
+			if (!spm)
+				continue;
+		}
+		spp = spm[(addr >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)];
+		if (!spp)
+			continue;
+		spp += (addr >> PAGE_SHIFT) & (SBP_L1_COUNT - 1);
+
+		i = (addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+		nw = PTRS_PER_PTE - i;
+		if (addr + (nw << PAGE_SHIFT) > next)
+			nw = (next - addr) >> PAGE_SHIFT;
+
+		memset(spp, 0, nw * sizeof(u32));
+
+		/* now flush any existing HPTEs for the range */
+		hpte_flush_range(mm, addr, nw);
+	}
+
+err_out:
+	mmap_write_unlock(mm);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
+				  unsigned long end, struct mm_walk *walk)
+{
+	struct vm_area_struct *vma = walk->vma;
+	split_huge_pmd(vma, pmd, addr);
+	return 0;
+}
+
+static const struct mm_walk_ops subpage_walk_ops = {
+	.pmd_entry	= subpage_walk_pmd_entry,
+	.walk_lock	= PGWALK_WRLOCK_VERIFY,
+};
+
+static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
+				    unsigned long len)
+{
+	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, addr);
+
+	/*
+	 * We don't try too hard, we just mark all the vma in that range
+	 * VM_NOHUGEPAGE and split them.
+	 */
+	for_each_vma_range(vmi, vma, addr + len) {
+		vm_flags_set(vma, VM_NOHUGEPAGE);
+		walk_page_vma(vma, &subpage_walk_ops, NULL);
+	}
+}
+#else
+static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
+				    unsigned long len)
+{
+	return;
+}
+#endif
+
+/*
+ * Copy in a subpage protection map for an address range.
+ * The map has 2 bits per 4k subpage, so 32 bits per 64k page.
+ * Each 2-bit field is 0 to allow any access, 1 to prevent writes,
+ * 2 or 3 to prevent all accesses.
+ * Note that the normal page protections also apply; the subpage
+ * protection mechanism is an additional constraint, so putting 0
+ * in a 2-bit field won't allow writes to a page that is otherwise
+ * write-protected.
+ */
+SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
+		unsigned long, len, u32 __user *, map)
+{
+	struct mm_struct *mm = current->mm;
+	struct subpage_prot_table *spt;
+	u32 **spm, *spp;
+	unsigned long i;
+	size_t nw;
+	unsigned long next, limit;
+	int err;
+
+	if (radix_enabled())
+		return -ENOENT;
+
+	/* Check parameters */
+	if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) ||
+	    addr >= mm->task_size || len >= mm->task_size ||
+	    addr + len > mm->task_size)
+		return -EINVAL;
+
+	if (is_hugepage_only_range(mm, addr, len))
+		return -EINVAL;
+
+	if (!map) {
+		/* Clear out the protection map for the address range */
+		subpage_prot_clear(addr, len);
+		return 0;
+	}
+
+	if (!access_ok(map, (len >> PAGE_SHIFT) * sizeof(u32)))
+		return -EFAULT;
+
+	mmap_write_lock(mm);
+
+	spt = mm_ctx_subpage_prot(&mm->context);
+	if (!spt) {
+		/*
+		 * Allocate subpage prot table if not already done.
+		 * Do this with mmap_lock held
+		 */
+		spt = kzalloc(sizeof(struct subpage_prot_table), GFP_KERNEL);
+		if (!spt) {
+			err = -ENOMEM;
+			goto out;
+		}
+		mm->context.hash_context->spt = spt;
+	}
+
+	subpage_mark_vma_nohuge(mm, addr, len);
+	for (limit = addr + len; addr < limit; addr = next) {
+		next = pmd_addr_end(addr, limit);
+		err = -ENOMEM;
+		if (addr < 0x100000000UL) {
+			spm = spt->low_prot;
+		} else {
+			spm = spt->protptrs[addr >> SBP_L3_SHIFT];
+			if (!spm) {
+				spm = (u32 **)get_zeroed_page(GFP_KERNEL);
+				if (!spm)
+					goto out;
+				spt->protptrs[addr >> SBP_L3_SHIFT] = spm;
+			}
+		}
+		spm += (addr >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1);
+		spp = *spm;
+		if (!spp) {
+			spp = (u32 *)get_zeroed_page(GFP_KERNEL);
+			if (!spp)
+				goto out;
+			*spm = spp;
+		}
+		spp += (addr >> PAGE_SHIFT) & (SBP_L1_COUNT - 1);
+
+		local_irq_disable();
+		demote_segment_4k(mm, addr);
+		local_irq_enable();
+
+		i = (addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+		nw = PTRS_PER_PTE - i;
+		if (addr + (nw << PAGE_SHIFT) > next)
+			nw = (next - addr) >> PAGE_SHIFT;
+
+		mmap_write_unlock(mm);
+		if (__copy_from_user(spp, map, nw * sizeof(u32)))
+			return -EFAULT;
+		map += nw;
+		mmap_write_lock(mm);
+
+		/* now flush any existing HPTEs for the range */
+		hpte_flush_range(mm, addr, nw);
+	}
+	if (limit > spt->maxaddr)
+		spt->maxaddr = limit;
+	err = 0;
+ out:
+	mmap_write_unlock(mm);
+	return err;
+}
diff --git a/arch/powerpc/mm/book3s64/trace.c b/arch/powerpc/mm/book3s64/trace.c
new file mode 100644
index 0000000000..ccd64b5e6c
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/trace.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file is for defining trace points and trace related helpers.
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#include <trace/events/thp.h>
+#endif
diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c
new file mode 100644
index 0000000000..15189592da
--- /dev/null
+++ b/arch/powerpc/mm/cacheflush.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/highmem.h>
+#include <linux/kprobes.h>
+
+/**
+ * flush_coherent_icache() - if a CPU has a coherent icache, flush it
+ * Return true if the cache was flushed, false otherwise
+ */
+static inline bool flush_coherent_icache(void)
+{
+	/*
+	 * For a snooping icache, we still need a dummy icbi to purge all the
+	 * prefetched instructions from the ifetch buffers. We also need a sync
+	 * before the icbi to order the actual stores to memory that might
+	 * have modified instructions with the icbi.
+	 */
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
+		mb(); /* sync */
+		icbi((void *)PAGE_OFFSET);
+		mb(); /* sync */
+		isync();
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * invalidate_icache_range() - Flush the icache by issuing icbi across an address range
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+static void invalidate_icache_range(unsigned long start, unsigned long stop)
+{
+	unsigned long shift = l1_icache_shift();
+	unsigned long bytes = l1_icache_bytes();
+	char *addr = (char *)(start & ~(bytes - 1));
+	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+	unsigned long i;
+
+	for (i = 0; i < size >> shift; i++, addr += bytes)
+		icbi(addr);
+
+	mb(); /* sync */
+	isync();
+}
+
+/**
+ * flush_icache_range: Write any modified data cache blocks out to memory
+ * and invalidate the corresponding blocks in the instruction cache
+ *
+ * Generic code will call this after writing memory, before executing from it.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+void flush_icache_range(unsigned long start, unsigned long stop)
+{
+	if (flush_coherent_icache())
+		return;
+
+	clean_dcache_range(start, stop);
+
+	if (IS_ENABLED(CONFIG_44x)) {
+		/*
+		 * Flash invalidate on 44x because we are passed kmapped
+		 * addresses and this doesn't work for userspace pages due to
+		 * the virtually tagged icache.
+		 */
+		iccci((void *)start);
+		mb(); /* sync */
+		isync();
+	} else
+		invalidate_icache_range(start, stop);
+}
+EXPORT_SYMBOL(flush_icache_range);
+
+#ifdef CONFIG_HIGHMEM
+/**
+ * flush_dcache_icache_phys() - Flush a page by it's physical address
+ * @physaddr: the physical address of the page
+ */
+static void flush_dcache_icache_phys(unsigned long physaddr)
+{
+	unsigned long bytes = l1_dcache_bytes();
+	unsigned long nb = PAGE_SIZE / bytes;
+	unsigned long addr = physaddr & PAGE_MASK;
+	unsigned long msr, msr0;
+	unsigned long loop1 = addr, loop2 = addr;
+
+	msr0 = mfmsr();
+	msr = msr0 & ~MSR_DR;
+	/*
+	 * This must remain as ASM to prevent potential memory accesses
+	 * while the data MMU is disabled
+	 */
+	asm volatile(
+		"   mtctr %2;\n"
+		"   mtmsr %3;\n"
+		"   isync;\n"
+		"0: dcbst   0, %0;\n"
+		"   addi    %0, %0, %4;\n"
+		"   bdnz    0b;\n"
+		"   sync;\n"
+		"   mtctr %2;\n"
+		"1: icbi    0, %1;\n"
+		"   addi    %1, %1, %4;\n"
+		"   bdnz    1b;\n"
+		"   sync;\n"
+		"   mtmsr %5;\n"
+		"   isync;\n"
+		: "+&r" (loop1), "+&r" (loop2)
+		: "r" (nb), "r" (msr), "i" (bytes), "r" (msr0)
+		: "ctr", "memory");
+}
+NOKPROBE_SYMBOL(flush_dcache_icache_phys)
+#else
+static void flush_dcache_icache_phys(unsigned long physaddr)
+{
+}
+#endif
+
+/**
+ * __flush_dcache_icache(): Flush a particular page from the data cache to RAM.
+ * Note: this is necessary because the instruction cache does *not*
+ * snoop from the data cache.
+ *
+ * @p: the address of the page to flush
+ */
+static void __flush_dcache_icache(void *p)
+{
+	unsigned long addr = (unsigned long)p & PAGE_MASK;
+
+	clean_dcache_range(addr, addr + PAGE_SIZE);
+
+	/*
+	 * We don't flush the icache on 44x. Those have a virtual icache and we
+	 * don't have access to the virtual address here (it's not the page
+	 * vaddr but where it's mapped in user space). The flushing of the
+	 * icache on these is handled elsewhere, when a change in the address
+	 * space occurs, before returning to user space.
+	 */
+
+	if (mmu_has_feature(MMU_FTR_TYPE_44x))
+		return;
+
+	invalidate_icache_range(addr, addr + PAGE_SIZE);
+}
+
+void flush_dcache_icache_folio(struct folio *folio)
+{
+	unsigned int i, nr = folio_nr_pages(folio);
+
+	if (flush_coherent_icache())
+		return;
+
+	if (!folio_test_highmem(folio)) {
+		void *addr = folio_address(folio);
+		for (i = 0; i < nr; i++)
+			__flush_dcache_icache(addr + i * PAGE_SIZE);
+	} else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) {
+		for (i = 0; i < nr; i++) {
+			void *start = kmap_local_folio(folio, i * PAGE_SIZE);
+
+			__flush_dcache_icache(start);
+			kunmap_local(start);
+		}
+	} else {
+		unsigned long pfn = folio_pfn(folio);
+		for (i = 0; i < nr; i++)
+			flush_dcache_icache_phys((pfn + i) * PAGE_SIZE);
+	}
+}
+EXPORT_SYMBOL(flush_dcache_icache_folio);
+
+void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
+{
+	clear_page(page);
+
+	/*
+	 * We shouldn't have to do this, but some versions of glibc
+	 * require it (ld.so assumes zero filled pages are icache clean)
+	 * - Anton
+	 */
+	flush_dcache_page(pg);
+}
+EXPORT_SYMBOL(clear_user_page);
+
+void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
+		    struct page *pg)
+{
+	copy_page(vto, vfrom);
+
+	/*
+	 * We should be able to use the following optimisation, however
+	 * there are two problems.
+	 * Firstly a bug in some versions of binutils meant PLT sections
+	 * were not marked executable.
+	 * Secondly the first word in the GOT section is blrl, used
+	 * to establish the GOT address. Until recently the GOT was
+	 * not marked executable.
+	 * - Anton
+	 */
+#if 0
+	if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
+		return;
+#endif
+
+	flush_dcache_page(pg);
+}
+
+void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
+			     unsigned long addr, int len)
+{
+	void *maddr;
+
+	maddr = kmap_local_page(page) + (addr & ~PAGE_MASK);
+	flush_icache_range((unsigned long)maddr, (unsigned long)maddr + len);
+	kunmap_local(maddr);
+}
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
new file mode 100644
index 0000000000..f49fd873df
--- /dev/null
+++ b/arch/powerpc/mm/copro_fault.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * CoProcessor (SPU/AFU) mm fault handler
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2007
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <asm/reg.h>
+#include <asm/copro.h>
+#include <asm/spu.h>
+#include <misc/cxl-base.h>
+
+/*
+ * This ought to be kept in sync with the powerpc specific do_page_fault
+ * function. Currently, there are a few corner cases that we haven't had
+ * to handle fortunately.
+ */
+int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
+		unsigned long dsisr, vm_fault_t *flt)
+{
+	struct vm_area_struct *vma;
+	unsigned long is_write;
+	int ret;
+
+	if (mm == NULL)
+		return -EFAULT;
+
+	if (mm->pgd == NULL)
+		return -EFAULT;
+
+	vma = lock_mm_and_find_vma(mm, ea, NULL);
+	if (!vma)
+		return -EFAULT;
+
+	ret = -EFAULT;
+	is_write = dsisr & DSISR_ISSTORE;
+	if (is_write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto out_unlock;
+	} else {
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto out_unlock;
+		/*
+		 * PROT_NONE is covered by the VMA check above.
+		 * and hash should get a NOHPTE fault instead of
+		 * a PROTFAULT in case fixup is needed for things
+		 * like autonuma.
+		 */
+		if (!radix_enabled())
+			WARN_ON_ONCE(dsisr & DSISR_PROTFAULT);
+	}
+
+	ret = 0;
+	*flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0, NULL);
+
+	/* The fault is fully completed (including releasing mmap lock) */
+	if (*flt & VM_FAULT_COMPLETED)
+		return 0;
+
+	if (unlikely(*flt & VM_FAULT_ERROR)) {
+		if (*flt & VM_FAULT_OOM) {
+			ret = -ENOMEM;
+			goto out_unlock;
+		} else if (*flt & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
+			ret = -EFAULT;
+			goto out_unlock;
+		}
+		BUG();
+	}
+
+out_unlock:
+	mmap_read_unlock(mm);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(copro_handle_mm_fault);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
+{
+	u64 vsid, vsidkey;
+	int psize, ssize;
+
+	switch (get_region_id(ea)) {
+	case USER_REGION_ID:
+		pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea);
+		if (mm == NULL)
+			return 1;
+		psize = get_slice_psize(mm, ea);
+		ssize = user_segment_size(ea);
+		vsid = get_user_vsid(&mm->context, ea, ssize);
+		vsidkey = SLB_VSID_USER;
+		break;
+	case VMALLOC_REGION_ID:
+		pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea);
+		psize = mmu_vmalloc_psize;
+		ssize = mmu_kernel_ssize;
+		vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+		vsidkey = SLB_VSID_KERNEL;
+		break;
+	case IO_REGION_ID:
+		pr_devel("%s: 0x%llx -- IO_REGION_ID\n", __func__, ea);
+		psize = mmu_io_psize;
+		ssize = mmu_kernel_ssize;
+		vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+		vsidkey = SLB_VSID_KERNEL;
+		break;
+	case LINEAR_MAP_REGION_ID:
+		pr_devel("%s: 0x%llx -- LINEAR_MAP_REGION_ID\n", __func__, ea);
+		psize = mmu_linear_psize;
+		ssize = mmu_kernel_ssize;
+		vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+		vsidkey = SLB_VSID_KERNEL;
+		break;
+	default:
+		pr_debug("%s: invalid region access at %016llx\n", __func__, ea);
+		return 1;
+	}
+	/* Bad address */
+	if (!vsid)
+		return 1;
+
+	vsid = (vsid << slb_vsid_shift(ssize)) | vsidkey;
+
+	vsid |= mmu_psize_defs[psize].sllp |
+		((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0);
+
+	slb->esid = (ea & (ssize == MMU_SEGSIZE_1T ? ESID_MASK_1T : ESID_MASK)) | SLB_ESID_V;
+	slb->vsid = vsid;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(copro_calculate_slb);
+
+void copro_flush_all_slbs(struct mm_struct *mm)
+{
+#ifdef CONFIG_SPU_BASE
+	spu_flush_all_slbs(mm);
+#endif
+	cxl_slbia(mm);
+}
+EXPORT_SYMBOL_GPL(copro_flush_all_slbs);
+#endif
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
new file mode 100644
index 0000000000..30260b5d14
--- /dev/null
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  PowerPC version derived from arch/arm/mm/consistent.c
+ *    Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
+ *
+ *  Copyright (C) 2000 Russell King
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/highmem.h>
+#include <linux/dma-direct.h>
+#include <linux/dma-map-ops.h>
+
+#include <asm/tlbflush.h>
+#include <asm/dma.h>
+
+/*
+ * make an area consistent.
+ */
+static void __dma_sync(void *vaddr, size_t size, int direction)
+{
+	unsigned long start = (unsigned long)vaddr;
+	unsigned long end   = start + size;
+
+	switch (direction) {
+	case DMA_NONE:
+		BUG();
+	case DMA_FROM_DEVICE:
+		/*
+		 * invalidate only when cache-line aligned otherwise there is
+		 * the potential for discarding uncommitted data from the cache
+		 */
+		if ((start | end) & (L1_CACHE_BYTES - 1))
+			flush_dcache_range(start, end);
+		else
+			invalidate_dcache_range(start, end);
+		break;
+	case DMA_TO_DEVICE:		/* writeback only */
+		clean_dcache_range(start, end);
+		break;
+	case DMA_BIDIRECTIONAL:	/* writeback and invalidate */
+		flush_dcache_range(start, end);
+		break;
+	}
+}
+
+#ifdef CONFIG_HIGHMEM
+/*
+ * __dma_sync_page() implementation for systems using highmem.
+ * In this case, each page of a buffer must be kmapped/kunmapped
+ * in order to have a virtual address for __dma_sync(). This must
+ * not sleep so kmap_atomic()/kunmap_atomic() are used.
+ *
+ * Note: yes, it is possible and correct to have a buffer extend
+ * beyond the first page.
+ */
+static inline void __dma_sync_page_highmem(struct page *page,
+		unsigned long offset, size_t size, int direction)
+{
+	size_t seg_size = min((size_t)(PAGE_SIZE - offset), size);
+	size_t cur_size = seg_size;
+	unsigned long flags, start, seg_offset = offset;
+	int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE;
+	int seg_nr = 0;
+
+	local_irq_save(flags);
+
+	do {
+		start = (unsigned long)kmap_atomic(page + seg_nr) + seg_offset;
+
+		/* Sync this buffer segment */
+		__dma_sync((void *)start, seg_size, direction);
+		kunmap_atomic((void *)start);
+		seg_nr++;
+
+		/* Calculate next buffer segment size */
+		seg_size = min((size_t)PAGE_SIZE, size - cur_size);
+
+		/* Add the segment size to our running total */
+		cur_size += seg_size;
+		seg_offset = 0;
+	} while (seg_nr < nr_segs);
+
+	local_irq_restore(flags);
+}
+#endif /* CONFIG_HIGHMEM */
+
+/*
+ * __dma_sync_page makes memory consistent. identical to __dma_sync, but
+ * takes a struct page instead of a virtual address
+ */
+static void __dma_sync_page(phys_addr_t paddr, size_t size, int dir)
+{
+	struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
+	unsigned offset = paddr & ~PAGE_MASK;
+
+#ifdef CONFIG_HIGHMEM
+	__dma_sync_page_highmem(page, offset, size, dir);
+#else
+	unsigned long start = (unsigned long)page_address(page) + offset;
+	__dma_sync((void *)start, size, dir);
+#endif
+}
+
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
+{
+	__dma_sync_page(paddr, size, dir);
+}
+
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
+{
+	__dma_sync_page(paddr, size, dir);
+}
+
+void arch_dma_prep_coherent(struct page *page, size_t size)
+{
+	unsigned long kaddr = (unsigned long)page_address(page);
+
+	flush_dcache_range(kaddr, kaddr + size);
+}
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
new file mode 100644
index 0000000000..2369d1bf24
--- /dev/null
+++ b/arch/powerpc/mm/drmem.c
@@ -0,0 +1,516 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Dynamic reconfiguration memory support
+ *
+ * Copyright 2017 IBM Corporation
+ */
+
+#define pr_fmt(fmt) "drmem: " fmt
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+#include <asm/drmem.h>
+
+static int n_root_addr_cells, n_root_size_cells;
+
+static struct drmem_lmb_info __drmem_info;
+struct drmem_lmb_info *drmem_info = &__drmem_info;
+static bool in_drmem_update;
+
+u64 drmem_lmb_memory_max(void)
+{
+	struct drmem_lmb *last_lmb;
+
+	last_lmb = &drmem_info->lmbs[drmem_info->n_lmbs - 1];
+	return last_lmb->base_addr + drmem_lmb_size();
+}
+
+static u32 drmem_lmb_flags(struct drmem_lmb *lmb)
+{
+	/*
+	 * Return the value of the lmb flags field minus the reserved
+	 * bit used internally for hotplug processing.
+	 */
+	return lmb->flags & ~DRMEM_LMB_RESERVED;
+}
+
+static struct property *clone_property(struct property *prop, u32 prop_sz)
+{
+	struct property *new_prop;
+
+	new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+	if (!new_prop)
+		return NULL;
+
+	new_prop->name = kstrdup(prop->name, GFP_KERNEL);
+	new_prop->value = kzalloc(prop_sz, GFP_KERNEL);
+	if (!new_prop->name || !new_prop->value) {
+		kfree(new_prop->name);
+		kfree(new_prop->value);
+		kfree(new_prop);
+		return NULL;
+	}
+
+	new_prop->length = prop_sz;
+#if defined(CONFIG_OF_DYNAMIC)
+	of_property_set_flag(new_prop, OF_DYNAMIC);
+#endif
+	return new_prop;
+}
+
+static int drmem_update_dt_v1(struct device_node *memory,
+			      struct property *prop)
+{
+	struct property *new_prop;
+	struct of_drconf_cell_v1 *dr_cell;
+	struct drmem_lmb *lmb;
+	u32 *p;
+
+	new_prop = clone_property(prop, prop->length);
+	if (!new_prop)
+		return -1;
+
+	p = new_prop->value;
+	*p++ = cpu_to_be32(drmem_info->n_lmbs);
+
+	dr_cell = (struct of_drconf_cell_v1 *)p;
+
+	for_each_drmem_lmb(lmb) {
+		dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
+		dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
+		dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
+		dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb));
+
+		dr_cell++;
+	}
+
+	of_update_property(memory, new_prop);
+	return 0;
+}
+
+static void init_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
+				struct drmem_lmb *lmb)
+{
+	dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
+	dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
+	dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
+	dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb));
+}
+
+static int drmem_update_dt_v2(struct device_node *memory,
+			      struct property *prop)
+{
+	struct property *new_prop;
+	struct of_drconf_cell_v2 *dr_cell;
+	struct drmem_lmb *lmb, *prev_lmb;
+	u32 lmb_sets, prop_sz, seq_lmbs;
+	u32 *p;
+
+	/* First pass, determine how many LMB sets are needed. */
+	lmb_sets = 0;
+	prev_lmb = NULL;
+	for_each_drmem_lmb(lmb) {
+		if (!prev_lmb) {
+			prev_lmb = lmb;
+			lmb_sets++;
+			continue;
+		}
+
+		if (prev_lmb->aa_index != lmb->aa_index ||
+		    drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb))
+			lmb_sets++;
+
+		prev_lmb = lmb;
+	}
+
+	prop_sz = lmb_sets * sizeof(*dr_cell) + sizeof(__be32);
+	new_prop = clone_property(prop, prop_sz);
+	if (!new_prop)
+		return -1;
+
+	p = new_prop->value;
+	*p++ = cpu_to_be32(lmb_sets);
+
+	dr_cell = (struct of_drconf_cell_v2 *)p;
+
+	/* Second pass, populate the LMB set data */
+	prev_lmb = NULL;
+	seq_lmbs = 0;
+	for_each_drmem_lmb(lmb) {
+		if (prev_lmb == NULL) {
+			/* Start of first LMB set */
+			prev_lmb = lmb;
+			init_drconf_v2_cell(dr_cell, lmb);
+			seq_lmbs++;
+			continue;
+		}
+
+		if (prev_lmb->aa_index != lmb->aa_index ||
+		    drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb)) {
+			/* end of one set, start of another */
+			dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
+			dr_cell++;
+
+			init_drconf_v2_cell(dr_cell, lmb);
+			seq_lmbs = 1;
+		} else {
+			seq_lmbs++;
+		}
+
+		prev_lmb = lmb;
+	}
+
+	/* close out last LMB set */
+	dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
+	of_update_property(memory, new_prop);
+	return 0;
+}
+
+int drmem_update_dt(void)
+{
+	struct device_node *memory;
+	struct property *prop;
+	int rc = -1;
+
+	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+	if (!memory)
+		return -1;
+
+	/*
+	 * Set in_drmem_update to prevent the notifier callback to process the
+	 * DT property back since the change is coming from the LMB tree.
+	 */
+	in_drmem_update = true;
+	prop = of_find_property(memory, "ibm,dynamic-memory", NULL);
+	if (prop) {
+		rc = drmem_update_dt_v1(memory, prop);
+	} else {
+		prop = of_find_property(memory, "ibm,dynamic-memory-v2", NULL);
+		if (prop)
+			rc = drmem_update_dt_v2(memory, prop);
+	}
+	in_drmem_update = false;
+
+	of_node_put(memory);
+	return rc;
+}
+
+static void read_drconf_v1_cell(struct drmem_lmb *lmb,
+				       const __be32 **prop)
+{
+	const __be32 *p = *prop;
+
+	lmb->base_addr = of_read_number(p, n_root_addr_cells);
+	p += n_root_addr_cells;
+	lmb->drc_index = of_read_number(p++, 1);
+
+	p++; /* skip reserved field */
+
+	lmb->aa_index = of_read_number(p++, 1);
+	lmb->flags = of_read_number(p++, 1);
+
+	*prop = p;
+}
+
+static int
+__walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm, void *data,
+		     int (*func)(struct drmem_lmb *, const __be32 **, void *))
+{
+	struct drmem_lmb lmb;
+	u32 i, n_lmbs;
+	int ret = 0;
+
+	n_lmbs = of_read_number(prop++, 1);
+	for (i = 0; i < n_lmbs; i++) {
+		read_drconf_v1_cell(&lmb, &prop);
+		ret = func(&lmb, &usm, data);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static void read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
+				       const __be32 **prop)
+{
+	const __be32 *p = *prop;
+
+	dr_cell->seq_lmbs = of_read_number(p++, 1);
+	dr_cell->base_addr = of_read_number(p, n_root_addr_cells);
+	p += n_root_addr_cells;
+	dr_cell->drc_index = of_read_number(p++, 1);
+	dr_cell->aa_index = of_read_number(p++, 1);
+	dr_cell->flags = of_read_number(p++, 1);
+
+	*prop = p;
+}
+
+static int
+__walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, void *data,
+		     int (*func)(struct drmem_lmb *, const __be32 **, void *))
+{
+	struct of_drconf_cell_v2 dr_cell;
+	struct drmem_lmb lmb;
+	u32 i, j, lmb_sets;
+	int ret = 0;
+
+	lmb_sets = of_read_number(prop++, 1);
+	for (i = 0; i < lmb_sets; i++) {
+		read_drconf_v2_cell(&dr_cell, &prop);
+
+		for (j = 0; j < dr_cell.seq_lmbs; j++) {
+			lmb.base_addr = dr_cell.base_addr;
+			dr_cell.base_addr += drmem_lmb_size();
+
+			lmb.drc_index = dr_cell.drc_index;
+			dr_cell.drc_index++;
+
+			lmb.aa_index = dr_cell.aa_index;
+			lmb.flags = dr_cell.flags;
+
+			ret = func(&lmb, &usm, data);
+			if (ret)
+				break;
+		}
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_PPC_PSERIES
+int __init walk_drmem_lmbs_early(unsigned long node, void *data,
+		int (*func)(struct drmem_lmb *, const __be32 **, void *))
+{
+	const __be32 *prop, *usm;
+	int len, ret = -ENODEV;
+
+	prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
+	if (!prop || len < dt_root_size_cells * sizeof(__be32))
+		return ret;
+
+	/* Get the address & size cells */
+	n_root_addr_cells = dt_root_addr_cells;
+	n_root_size_cells = dt_root_size_cells;
+
+	drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop);
+
+	usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", &len);
+
+	prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &len);
+	if (prop) {
+		ret = __walk_drmem_v1_lmbs(prop, usm, data, func);
+	} else {
+		prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory-v2",
+					   &len);
+		if (prop)
+			ret = __walk_drmem_v2_lmbs(prop, usm, data, func);
+	}
+
+	memblock_dump_all();
+	return ret;
+}
+
+/*
+ * Update the LMB associativity index.
+ */
+static int update_lmb(struct drmem_lmb *updated_lmb,
+		      __maybe_unused const __be32 **usm,
+		      __maybe_unused void *data)
+{
+	struct drmem_lmb *lmb;
+
+	for_each_drmem_lmb(lmb) {
+		if (lmb->drc_index != updated_lmb->drc_index)
+			continue;
+
+		lmb->aa_index = updated_lmb->aa_index;
+		break;
+	}
+	return 0;
+}
+
+/*
+ * Update the LMB associativity index.
+ *
+ * This needs to be called when the hypervisor is updating the
+ * dynamic-reconfiguration-memory node property.
+ */
+void drmem_update_lmbs(struct property *prop)
+{
+	/*
+	 * Don't update the LMBs if triggered by the update done in
+	 * drmem_update_dt(), the LMB values have been used to the update the DT
+	 * property in that case.
+	 */
+	if (in_drmem_update)
+		return;
+	if (!strcmp(prop->name, "ibm,dynamic-memory"))
+		__walk_drmem_v1_lmbs(prop->value, NULL, NULL, update_lmb);
+	else if (!strcmp(prop->name, "ibm,dynamic-memory-v2"))
+		__walk_drmem_v2_lmbs(prop->value, NULL, NULL, update_lmb);
+}
+#endif
+
+static int init_drmem_lmb_size(struct device_node *dn)
+{
+	const __be32 *prop;
+	int len;
+
+	if (drmem_info->lmb_size)
+		return 0;
+
+	prop = of_get_property(dn, "ibm,lmb-size", &len);
+	if (!prop || len < n_root_size_cells * sizeof(__be32)) {
+		pr_info("Could not determine LMB size\n");
+		return -1;
+	}
+
+	drmem_info->lmb_size = of_read_number(prop, n_root_size_cells);
+	return 0;
+}
+
+/*
+ * Returns the property linux,drconf-usable-memory if
+ * it exists (the property exists only in kexec/kdump kernels,
+ * added by kexec-tools)
+ */
+static const __be32 *of_get_usable_memory(struct device_node *dn)
+{
+	const __be32 *prop;
+	u32 len;
+
+	prop = of_get_property(dn, "linux,drconf-usable-memory", &len);
+	if (!prop || len < sizeof(unsigned int))
+		return NULL;
+
+	return prop;
+}
+
+int walk_drmem_lmbs(struct device_node *dn, void *data,
+		    int (*func)(struct drmem_lmb *, const __be32 **, void *))
+{
+	const __be32 *prop, *usm;
+	int ret = -ENODEV;
+
+	if (!of_root)
+		return ret;
+
+	/* Get the address & size cells */
+	of_node_get(of_root);
+	n_root_addr_cells = of_n_addr_cells(of_root);
+	n_root_size_cells = of_n_size_cells(of_root);
+	of_node_put(of_root);
+
+	if (init_drmem_lmb_size(dn))
+		return ret;
+
+	usm = of_get_usable_memory(dn);
+
+	prop = of_get_property(dn, "ibm,dynamic-memory", NULL);
+	if (prop) {
+		ret = __walk_drmem_v1_lmbs(prop, usm, data, func);
+	} else {
+		prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL);
+		if (prop)
+			ret = __walk_drmem_v2_lmbs(prop, usm, data, func);
+	}
+
+	return ret;
+}
+
+static void __init init_drmem_v1_lmbs(const __be32 *prop)
+{
+	struct drmem_lmb *lmb;
+
+	drmem_info->n_lmbs = of_read_number(prop++, 1);
+	if (drmem_info->n_lmbs == 0)
+		return;
+
+	drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
+				   GFP_KERNEL);
+	if (!drmem_info->lmbs)
+		return;
+
+	for_each_drmem_lmb(lmb)
+		read_drconf_v1_cell(lmb, &prop);
+}
+
+static void __init init_drmem_v2_lmbs(const __be32 *prop)
+{
+	struct drmem_lmb *lmb;
+	struct of_drconf_cell_v2 dr_cell;
+	const __be32 *p;
+	u32 i, j, lmb_sets;
+	int lmb_index;
+
+	lmb_sets = of_read_number(prop++, 1);
+	if (lmb_sets == 0)
+		return;
+
+	/* first pass, calculate the number of LMBs */
+	p = prop;
+	for (i = 0; i < lmb_sets; i++) {
+		read_drconf_v2_cell(&dr_cell, &p);
+		drmem_info->n_lmbs += dr_cell.seq_lmbs;
+	}
+
+	drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
+				   GFP_KERNEL);
+	if (!drmem_info->lmbs)
+		return;
+
+	/* second pass, read in the LMB information */
+	lmb_index = 0;
+	p = prop;
+
+	for (i = 0; i < lmb_sets; i++) {
+		read_drconf_v2_cell(&dr_cell, &p);
+
+		for (j = 0; j < dr_cell.seq_lmbs; j++) {
+			lmb = &drmem_info->lmbs[lmb_index++];
+
+			lmb->base_addr = dr_cell.base_addr;
+			dr_cell.base_addr += drmem_info->lmb_size;
+
+			lmb->drc_index = dr_cell.drc_index;
+			dr_cell.drc_index++;
+
+			lmb->aa_index = dr_cell.aa_index;
+			lmb->flags = dr_cell.flags;
+		}
+	}
+}
+
+static int __init drmem_init(void)
+{
+	struct device_node *dn;
+	const __be32 *prop;
+
+	dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+	if (!dn) {
+		pr_info("No dynamic reconfiguration memory found\n");
+		return 0;
+	}
+
+	if (init_drmem_lmb_size(dn)) {
+		of_node_put(dn);
+		return 0;
+	}
+
+	prop = of_get_property(dn, "ibm,dynamic-memory", NULL);
+	if (prop) {
+		init_drmem_v1_lmbs(prop);
+	} else {
+		prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL);
+		if (prop)
+			init_drmem_v2_lmbs(prop);
+	}
+
+	of_node_put(dn);
+	return 0;
+}
+late_initcall(drmem_init);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
new file mode 100644
index 0000000000..b1723094d4
--- /dev/null
+++ b/arch/powerpc/mm/fault.c
@@ -0,0 +1,680 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Derived from "arch/i386/mm/fault.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Modified by Cort Dougan and Paul Mackerras.
+ *
+ *  Modified for PPC64 by Dave Engebretsen (engebret@ibm.com)
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/pagemap.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+#include <linux/extable.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/perf_event.h>
+#include <linux/ratelimit.h>
+#include <linux/context_tracking.h>
+#include <linux/hugetlb.h>
+#include <linux/uaccess.h>
+#include <linux/kfence.h>
+#include <linux/pkeys.h>
+
+#include <asm/firmware.h>
+#include <asm/interrupt.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/siginfo.h>
+#include <asm/debug.h>
+#include <asm/kup.h>
+#include <asm/inst.h>
+
+
+/*
+ * do_page_fault error handling helpers
+ */
+
+static int
+__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
+{
+	/*
+	 * If we are in kernel mode, bail out with a SEGV, this will
+	 * be caught by the assembly which will restore the non-volatile
+	 * registers before calling bad_page_fault()
+	 */
+	if (!user_mode(regs))
+		return SIGSEGV;
+
+	_exception(SIGSEGV, regs, si_code, address);
+
+	return 0;
+}
+
+static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address)
+{
+	return __bad_area_nosemaphore(regs, address, SEGV_MAPERR);
+}
+
+static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
+{
+	struct mm_struct *mm = current->mm;
+
+	/*
+	 * Something tried to access memory that isn't in our memory map..
+	 * Fix it, but check if it's kernel or user first..
+	 */
+	mmap_read_unlock(mm);
+
+	return __bad_area_nosemaphore(regs, address, si_code);
+}
+
+static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address,
+				    struct vm_area_struct *vma)
+{
+	struct mm_struct *mm = current->mm;
+	int pkey;
+
+	/*
+	 * We don't try to fetch the pkey from page table because reading
+	 * page table without locking doesn't guarantee stable pte value.
+	 * Hence the pkey value that we return to userspace can be different
+	 * from the pkey that actually caused access error.
+	 *
+	 * It does *not* guarantee that the VMA we find here
+	 * was the one that we faulted on.
+	 *
+	 * 1. T1   : mprotect_key(foo, PAGE_SIZE, pkey=4);
+	 * 2. T1   : set AMR to deny access to pkey=4, touches, page
+	 * 3. T1   : faults...
+	 * 4.    T2: mprotect_key(foo, PAGE_SIZE, pkey=5);
+	 * 5. T1   : enters fault handler, takes mmap_lock, etc...
+	 * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
+	 *	     faulted on a pte with its pkey=4.
+	 */
+	pkey = vma_pkey(vma);
+
+	mmap_read_unlock(mm);
+
+	/*
+	 * If we are in kernel mode, bail out with a SEGV, this will
+	 * be caught by the assembly which will restore the non-volatile
+	 * registers before calling bad_page_fault()
+	 */
+	if (!user_mode(regs))
+		return SIGSEGV;
+
+	_exception_pkey(regs, address, pkey);
+
+	return 0;
+}
+
+static noinline int bad_access(struct pt_regs *regs, unsigned long address)
+{
+	return __bad_area(regs, address, SEGV_ACCERR);
+}
+
+static int do_sigbus(struct pt_regs *regs, unsigned long address,
+		     vm_fault_t fault)
+{
+	if (!user_mode(regs))
+		return SIGBUS;
+
+	current->thread.trap_nr = BUS_ADRERR;
+#ifdef CONFIG_MEMORY_FAILURE
+	if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
+		unsigned int lsb = 0; /* shutup gcc */
+
+		pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
+			current->comm, current->pid, address);
+
+		if (fault & VM_FAULT_HWPOISON_LARGE)
+			lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
+		if (fault & VM_FAULT_HWPOISON)
+			lsb = PAGE_SHIFT;
+
+		force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb);
+		return 0;
+	}
+
+#endif
+	force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
+	return 0;
+}
+
+static int mm_fault_error(struct pt_regs *regs, unsigned long addr,
+				vm_fault_t fault)
+{
+	/*
+	 * Kernel page fault interrupted by SIGKILL. We have no reason to
+	 * continue processing.
+	 */
+	if (fatal_signal_pending(current) && !user_mode(regs))
+		return SIGKILL;
+
+	/* Out of memory */
+	if (fault & VM_FAULT_OOM) {
+		/*
+		 * We ran out of memory, or some other thing happened to us that
+		 * made us unable to handle the page fault gracefully.
+		 */
+		if (!user_mode(regs))
+			return SIGSEGV;
+		pagefault_out_of_memory();
+	} else {
+		if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
+			     VM_FAULT_HWPOISON_LARGE))
+			return do_sigbus(regs, addr, fault);
+		else if (fault & VM_FAULT_SIGSEGV)
+			return bad_area_nosemaphore(regs, addr);
+		else
+			BUG();
+	}
+	return 0;
+}
+
+/* Is this a bad kernel fault ? */
+static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
+			     unsigned long address, bool is_write)
+{
+	int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE;
+
+	if (is_exec) {
+		pr_crit_ratelimited("kernel tried to execute %s page (%lx) - exploit attempt? (uid: %d)\n",
+				    address >= TASK_SIZE ? "exec-protected" : "user",
+				    address,
+				    from_kuid(&init_user_ns, current_uid()));
+
+		// Kernel exec fault is always bad
+		return true;
+	}
+
+	// Kernel fault on kernel address is bad
+	if (address >= TASK_SIZE)
+		return true;
+
+	// Read/write fault blocked by KUAP is bad, it can never succeed.
+	if (bad_kuap_fault(regs, address, is_write)) {
+		pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n",
+				    is_write ? "write" : "read", address,
+				    from_kuid(&init_user_ns, current_uid()));
+
+		// Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
+		if (!search_exception_tables(regs->nip))
+			return true;
+
+		// Read/write fault in a valid region (the exception table search passed
+		// above), but blocked by KUAP is bad, it can never succeed.
+		return WARN(true, "Bug: %s fault blocked by KUAP!", is_write ? "Write" : "Read");
+	}
+
+	// What's left? Kernel fault on user and allowed by KUAP in the faulting context.
+	return false;
+}
+
+static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey,
+			      struct vm_area_struct *vma)
+{
+	/*
+	 * Make sure to check the VMA so that we do not perform
+	 * faults just to hit a pkey fault as soon as we fill in a
+	 * page. Only called for current mm, hence foreign == 0
+	 */
+	if (!arch_vma_access_permitted(vma, is_write, is_exec, 0))
+		return true;
+
+	return false;
+}
+
+static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma)
+{
+	/*
+	 * Allow execution from readable areas if the MMU does not
+	 * provide separate controls over reading and executing.
+	 *
+	 * Note: That code used to not be enabled for 4xx/BookE.
+	 * It is now as I/D cache coherency for these is done at
+	 * set_pte_at() time and I see no reason why the test
+	 * below wouldn't be valid on those processors. This -may-
+	 * break programs compiled with a really old ABI though.
+	 */
+	if (is_exec) {
+		return !(vma->vm_flags & VM_EXEC) &&
+			(cpu_has_feature(CPU_FTR_NOEXECUTE) ||
+			 !(vma->vm_flags & (VM_READ | VM_WRITE)));
+	}
+
+	if (is_write) {
+		if (unlikely(!(vma->vm_flags & VM_WRITE)))
+			return true;
+		return false;
+	}
+
+	/*
+	 * VM_READ, VM_WRITE and VM_EXEC all imply read permissions, as
+	 * defined in protection_map[].  Read faults can only be caused by
+	 * a PROT_NONE mapping, or with a PROT_EXEC-only mapping on Radix.
+	 */
+	if (unlikely(!vma_is_accessible(vma)))
+		return true;
+
+	if (unlikely(radix_enabled() && ((vma->vm_flags & VM_ACCESS_FLAGS) == VM_EXEC)))
+		return true;
+
+	/*
+	 * We should ideally do the vma pkey access check here. But in the
+	 * fault path, handle_mm_fault() also does the same check. To avoid
+	 * these multiple checks, we skip it here and handle access error due
+	 * to pkeys later.
+	 */
+	return false;
+}
+
+#ifdef CONFIG_PPC_SMLPAR
+static inline void cmo_account_page_fault(void)
+{
+	if (firmware_has_feature(FW_FEATURE_CMO)) {
+		u32 page_ins;
+
+		preempt_disable();
+		page_ins = be32_to_cpu(get_lppaca()->page_ins);
+		page_ins += 1 << PAGE_FACTOR;
+		get_lppaca()->page_ins = cpu_to_be32(page_ins);
+		preempt_enable();
+	}
+}
+#else
+static inline void cmo_account_page_fault(void) { }
+#endif /* CONFIG_PPC_SMLPAR */
+
+static void sanity_check_fault(bool is_write, bool is_user,
+			       unsigned long error_code, unsigned long address)
+{
+	/*
+	 * Userspace trying to access kernel address, we get PROTFAULT for that.
+	 */
+	if (is_user && address >= TASK_SIZE) {
+		if ((long)address == -1)
+			return;
+
+		pr_crit_ratelimited("%s[%d]: User access of kernel address (%lx) - exploit attempt? (uid: %d)\n",
+				   current->comm, current->pid, address,
+				   from_kuid(&init_user_ns, current_uid()));
+		return;
+	}
+
+	if (!IS_ENABLED(CONFIG_PPC_BOOK3S))
+		return;
+
+	/*
+	 * For hash translation mode, we should never get a
+	 * PROTFAULT. Any update to pte to reduce access will result in us
+	 * removing the hash page table entry, thus resulting in a DSISR_NOHPTE
+	 * fault instead of DSISR_PROTFAULT.
+	 *
+	 * A pte update to relax the access will not result in a hash page table
+	 * entry invalidate and hence can result in DSISR_PROTFAULT.
+	 * ptep_set_access_flags() doesn't do a hpte flush. This is why we have
+	 * the special !is_write in the below conditional.
+	 *
+	 * For platforms that doesn't supports coherent icache and do support
+	 * per page noexec bit, we do setup things such that we do the
+	 * sync between D/I cache via fault. But that is handled via low level
+	 * hash fault code (hash_page_do_lazy_icache()) and we should not reach
+	 * here in such case.
+	 *
+	 * For wrong access that can result in PROTFAULT, the above vma->vm_flags
+	 * check should handle those and hence we should fall to the bad_area
+	 * handling correctly.
+	 *
+	 * For embedded with per page exec support that doesn't support coherent
+	 * icache we do get PROTFAULT and we handle that D/I cache sync in
+	 * set_pte_at while taking the noexec/prot fault. Hence this is WARN_ON
+	 * is conditional for server MMU.
+	 *
+	 * For radix, we can get prot fault for autonuma case, because radix
+	 * page table will have them marked noaccess for user.
+	 */
+	if (radix_enabled() || is_write)
+		return;
+
+	WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
+}
+
+/*
+ * Define the correct "is_write" bit in error_code based
+ * on the processor family
+ */
+#if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+#define page_fault_is_write(__err)	((__err) & ESR_DST)
+#else
+#define page_fault_is_write(__err)	((__err) & DSISR_ISSTORE)
+#endif
+
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+#define page_fault_is_bad(__err)	(0)
+#elif defined(CONFIG_PPC_8xx)
+#define page_fault_is_bad(__err)	((__err) & DSISR_NOEXEC_OR_G)
+#elif defined(CONFIG_PPC64)
+static int page_fault_is_bad(unsigned long err)
+{
+	unsigned long flag = DSISR_BAD_FAULT_64S;
+
+	/*
+	 * PAPR+ v2.11 § 14.15.3.4.1 (unreleased)
+	 * If byte 0, bit 3 of pi-attribute-specifier-type in
+	 * ibm,pi-features property is defined, ignore the DSI error
+	 * which is caused by the paste instruction on the
+	 * suspended NX window.
+	 */
+	if (mmu_has_feature(MMU_FTR_NX_DSI))
+		flag &= ~DSISR_BAD_COPYPASTE;
+
+	return err & flag;
+}
+#else
+#define page_fault_is_bad(__err)	((__err) & DSISR_BAD_FAULT_32S)
+#endif
+
+/*
+ * For 600- and 800-family processors, the error_code parameter is DSISR
+ * for a data fault, SRR1 for an instruction fault.
+ * For 400-family processors the error_code parameter is ESR for a data fault,
+ * 0 for an instruction fault.
+ * For 64-bit processors, the error_code parameter is DSISR for a data access
+ * fault, SRR1 & 0x08000000 for an instruction access fault.
+ *
+ * The return value is 0 if the fault was handled, or the signal
+ * number if this is a kernel fault that can't be handled here.
+ */
+static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
+			   unsigned long error_code)
+{
+	struct vm_area_struct * vma;
+	struct mm_struct *mm = current->mm;
+	unsigned int flags = FAULT_FLAG_DEFAULT;
+	int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE;
+	int is_user = user_mode(regs);
+	int is_write = page_fault_is_write(error_code);
+	vm_fault_t fault, major = 0;
+	bool kprobe_fault = kprobe_page_fault(regs, 11);
+
+	if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
+		return 0;
+
+	if (unlikely(page_fault_is_bad(error_code))) {
+		if (is_user) {
+			_exception(SIGBUS, regs, BUS_OBJERR, address);
+			return 0;
+		}
+		return SIGBUS;
+	}
+
+	/* Additional sanity check(s) */
+	sanity_check_fault(is_write, is_user, error_code, address);
+
+	/*
+	 * The kernel should never take an execute fault nor should it
+	 * take a page fault to a kernel address or a page fault to a user
+	 * address outside of dedicated places
+	 */
+	if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) {
+		if (kfence_handle_page_fault(address, is_write, regs))
+			return 0;
+
+		return SIGSEGV;
+	}
+
+	/*
+	 * If we're in an interrupt, have no user context or are running
+	 * in a region with pagefaults disabled then we must not take the fault
+	 */
+	if (unlikely(faulthandler_disabled() || !mm)) {
+		if (is_user)
+			printk_ratelimited(KERN_ERR "Page fault in user mode"
+					   " with faulthandler_disabled()=%d"
+					   " mm=%p\n",
+					   faulthandler_disabled(), mm);
+		return bad_area_nosemaphore(regs, address);
+	}
+
+	interrupt_cond_local_irq_enable(regs);
+
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
+	/*
+	 * We want to do this outside mmap_lock, because reading code around nip
+	 * can result in fault, which will cause a deadlock when called with
+	 * mmap_lock held
+	 */
+	if (is_user)
+		flags |= FAULT_FLAG_USER;
+	if (is_write)
+		flags |= FAULT_FLAG_WRITE;
+	if (is_exec)
+		flags |= FAULT_FLAG_INSTRUCTION;
+
+	if (!(flags & FAULT_FLAG_USER))
+		goto lock_mmap;
+
+	vma = lock_vma_under_rcu(mm, address);
+	if (!vma)
+		goto lock_mmap;
+
+	if (unlikely(access_pkey_error(is_write, is_exec,
+				       (error_code & DSISR_KEYFAULT), vma))) {
+		vma_end_read(vma);
+		goto lock_mmap;
+	}
+
+	if (unlikely(access_error(is_write, is_exec, vma))) {
+		vma_end_read(vma);
+		goto lock_mmap;
+	}
+
+	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
+	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
+		vma_end_read(vma);
+
+	if (!(fault & VM_FAULT_RETRY)) {
+		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+		goto done;
+	}
+	count_vm_vma_lock_event(VMA_LOCK_RETRY);
+
+	if (fault_signal_pending(fault, regs))
+		return user_mode(regs) ? 0 : SIGBUS;
+
+lock_mmap:
+
+	/* When running in the kernel we expect faults to occur only to
+	 * addresses in user space.  All other faults represent errors in the
+	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
+	 * erroneous fault occurring in a code path which already holds mmap_lock
+	 * we will deadlock attempting to validate the fault against the
+	 * address space.  Luckily the kernel only validly references user
+	 * space from well defined areas of code, which are listed in the
+	 * exceptions table. lock_mm_and_find_vma() handles that logic.
+	 */
+retry:
+	vma = lock_mm_and_find_vma(mm, address, regs);
+	if (unlikely(!vma))
+		return bad_area_nosemaphore(regs, address);
+
+	if (unlikely(access_pkey_error(is_write, is_exec,
+				       (error_code & DSISR_KEYFAULT), vma)))
+		return bad_access_pkey(regs, address, vma);
+
+	if (unlikely(access_error(is_write, is_exec, vma)))
+		return bad_access(regs, address);
+
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+	fault = handle_mm_fault(vma, address, flags, regs);
+
+	major |= fault & VM_FAULT_MAJOR;
+
+	if (fault_signal_pending(fault, regs))
+		return user_mode(regs) ? 0 : SIGBUS;
+
+	/* The fault is fully completed (including releasing mmap lock) */
+	if (fault & VM_FAULT_COMPLETED)
+		goto out;
+
+	/*
+	 * Handle the retry right now, the mmap_lock has been released in that
+	 * case.
+	 */
+	if (unlikely(fault & VM_FAULT_RETRY)) {
+		flags |= FAULT_FLAG_TRIED;
+		goto retry;
+	}
+
+	mmap_read_unlock(current->mm);
+
+done:
+	if (unlikely(fault & VM_FAULT_ERROR))
+		return mm_fault_error(regs, address, fault);
+
+out:
+	/*
+	 * Major/minor page fault accounting.
+	 */
+	if (major)
+		cmo_account_page_fault();
+
+	return 0;
+}
+NOKPROBE_SYMBOL(___do_page_fault);
+
+static __always_inline void __do_page_fault(struct pt_regs *regs)
+{
+	long err;
+
+	err = ___do_page_fault(regs, regs->dar, regs->dsisr);
+	if (unlikely(err))
+		bad_page_fault(regs, err);
+}
+
+DEFINE_INTERRUPT_HANDLER(do_page_fault)
+{
+	__do_page_fault(regs);
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/* Same as do_page_fault but interrupt entry has already run in do_hash_fault */
+void hash__do_page_fault(struct pt_regs *regs)
+{
+	__do_page_fault(regs);
+}
+NOKPROBE_SYMBOL(hash__do_page_fault);
+#endif
+
+/*
+ * bad_page_fault is called when we have a bad access from the kernel.
+ * It is called from the DSI and ISI handlers in head.S and from some
+ * of the procedures in traps.c.
+ */
+static void __bad_page_fault(struct pt_regs *regs, int sig)
+{
+	int is_write = page_fault_is_write(regs->dsisr);
+	const char *msg;
+
+	/* kernel has accessed a bad area */
+
+	if (regs->dar < PAGE_SIZE)
+		msg = "Kernel NULL pointer dereference";
+	else
+		msg = "Unable to handle kernel data access";
+
+	switch (TRAP(regs)) {
+	case INTERRUPT_DATA_STORAGE:
+	case INTERRUPT_H_DATA_STORAGE:
+		pr_alert("BUG: %s on %s at 0x%08lx\n", msg,
+			 is_write ? "write" : "read", regs->dar);
+		break;
+	case INTERRUPT_DATA_SEGMENT:
+		pr_alert("BUG: %s at 0x%08lx\n", msg, regs->dar);
+		break;
+	case INTERRUPT_INST_STORAGE:
+	case INTERRUPT_INST_SEGMENT:
+		pr_alert("BUG: Unable to handle kernel instruction fetch%s",
+			 regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n");
+		break;
+	case INTERRUPT_ALIGNMENT:
+		pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n",
+			 regs->dar);
+		break;
+	default:
+		pr_alert("BUG: Unable to handle unknown paging fault at 0x%08lx\n",
+			 regs->dar);
+		break;
+	}
+	printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
+		regs->nip);
+
+	if (task_stack_end_corrupted(current))
+		printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
+
+	die("Kernel access of bad area", regs, sig);
+}
+
+void bad_page_fault(struct pt_regs *regs, int sig)
+{
+	const struct exception_table_entry *entry;
+
+	/* Are we prepared to handle this fault?  */
+	entry = search_exception_tables(instruction_pointer(regs));
+	if (entry)
+		instruction_pointer_set(regs, extable_fixup(entry));
+	else
+		__bad_page_fault(regs, sig);
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_INTERRUPT_HANDLER(do_bad_page_fault_segv)
+{
+	bad_page_fault(regs, SIGSEGV);
+}
+
+/*
+ * In radix, segment interrupts indicate the EA is not addressable by the
+ * page table geometry, so they are always sent here.
+ *
+ * In hash, this is called if do_slb_fault returns error. Typically it is
+ * because the EA was outside the region allowed by software.
+ */
+DEFINE_INTERRUPT_HANDLER(do_bad_segment_interrupt)
+{
+	int err = regs->result;
+
+	if (err == -EFAULT) {
+		if (user_mode(regs))
+			_exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
+		else
+			bad_page_fault(regs, SIGSEGV);
+	} else if (err == -EINVAL) {
+		unrecoverable_exception(regs);
+	} else {
+		BUG();
+	}
+}
+#endif
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
new file mode 100644
index 0000000000..f7c683b672
--- /dev/null
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -0,0 +1,621 @@
+/*
+ * PPC Huge TLB Page Support for Kernel.
+ *
+ * Copyright (C) 2003 David Gibson, IBM Corporation.
+ * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
+ *
+ * Based on the IA-32 version:
+ * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/hugetlb.h>
+#include <linux/export.h>
+#include <linux/of_fdt.h>
+#include <linux/memblock.h>
+#include <linux/moduleparam.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/kmemleak.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/setup.h>
+#include <asm/hugetlb.h>
+#include <asm/pte-walk.h>
+#include <asm/firmware.h>
+
+bool hugetlb_disabled = false;
+
+#define hugepd_none(hpd)	(hpd_val(hpd) == 0)
+
+#define PTE_T_ORDER	(__builtin_ffs(sizeof(pte_basic_t)) - \
+			 __builtin_ffs(sizeof(void *)))
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz)
+{
+	/*
+	 * Only called for hugetlbfs pages, hence can ignore THP and the
+	 * irq disabled walk.
+	 */
+	return __find_linux_pte(mm->pgd, addr, NULL, NULL);
+}
+
+static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
+			   unsigned long address, unsigned int pdshift,
+			   unsigned int pshift, spinlock_t *ptl)
+{
+	struct kmem_cache *cachep;
+	pte_t *new;
+	int i;
+	int num_hugepd;
+
+	if (pshift >= pdshift) {
+		cachep = PGT_CACHE(PTE_T_ORDER);
+		num_hugepd = 1 << (pshift - pdshift);
+	} else {
+		cachep = PGT_CACHE(pdshift - pshift);
+		num_hugepd = 1;
+	}
+
+	if (!cachep) {
+		WARN_ONCE(1, "No page table cache created for hugetlb tables");
+		return -ENOMEM;
+	}
+
+	new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));
+
+	BUG_ON(pshift > HUGEPD_SHIFT_MASK);
+	BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
+
+	if (!new)
+		return -ENOMEM;
+
+	/*
+	 * Make sure other cpus find the hugepd set only after a
+	 * properly initialized page table is visible to them.
+	 * For more details look for comment in __pte_alloc().
+	 */
+	smp_wmb();
+
+	spin_lock(ptl);
+	/*
+	 * We have multiple higher-level entries that point to the same
+	 * actual pte location.  Fill in each as we go and backtrack on error.
+	 * We need all of these so the DTLB pgtable walk code can find the
+	 * right higher-level entry without knowing if it's a hugepage or not.
+	 */
+	for (i = 0; i < num_hugepd; i++, hpdp++) {
+		if (unlikely(!hugepd_none(*hpdp)))
+			break;
+		hugepd_populate(hpdp, new, pshift);
+	}
+	/* If we bailed from the for loop early, an error occurred, clean up */
+	if (i < num_hugepd) {
+		for (i = i - 1 ; i >= 0; i--, hpdp--)
+			*hpdp = __hugepd(0);
+		kmem_cache_free(cachep, new);
+	} else {
+		kmemleak_ignore(new);
+	}
+	spin_unlock(ptl);
+	return 0;
+}
+
+/*
+ * At this point we do the placement change only for BOOK3S 64. This would
+ * possibly work on other subarchs.
+ */
+pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+		      unsigned long addr, unsigned long sz)
+{
+	pgd_t *pg;
+	p4d_t *p4;
+	pud_t *pu;
+	pmd_t *pm;
+	hugepd_t *hpdp = NULL;
+	unsigned pshift = __ffs(sz);
+	unsigned pdshift = PGDIR_SHIFT;
+	spinlock_t *ptl;
+
+	addr &= ~(sz-1);
+	pg = pgd_offset(mm, addr);
+	p4 = p4d_offset(pg, addr);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (pshift == PGDIR_SHIFT)
+		/* 16GB huge page */
+		return (pte_t *) p4;
+	else if (pshift > PUD_SHIFT) {
+		/*
+		 * We need to use hugepd table
+		 */
+		ptl = &mm->page_table_lock;
+		hpdp = (hugepd_t *)p4;
+	} else {
+		pdshift = PUD_SHIFT;
+		pu = pud_alloc(mm, p4, addr);
+		if (!pu)
+			return NULL;
+		if (pshift == PUD_SHIFT)
+			return (pte_t *)pu;
+		else if (pshift > PMD_SHIFT) {
+			ptl = pud_lockptr(mm, pu);
+			hpdp = (hugepd_t *)pu;
+		} else {
+			pdshift = PMD_SHIFT;
+			pm = pmd_alloc(mm, pu, addr);
+			if (!pm)
+				return NULL;
+			if (pshift == PMD_SHIFT)
+				/* 16MB hugepage */
+				return (pte_t *)pm;
+			else {
+				ptl = pmd_lockptr(mm, pm);
+				hpdp = (hugepd_t *)pm;
+			}
+		}
+	}
+#else
+	if (pshift >= PGDIR_SHIFT) {
+		ptl = &mm->page_table_lock;
+		hpdp = (hugepd_t *)p4;
+	} else {
+		pdshift = PUD_SHIFT;
+		pu = pud_alloc(mm, p4, addr);
+		if (!pu)
+			return NULL;
+		if (pshift >= PUD_SHIFT) {
+			ptl = pud_lockptr(mm, pu);
+			hpdp = (hugepd_t *)pu;
+		} else {
+			pdshift = PMD_SHIFT;
+			pm = pmd_alloc(mm, pu, addr);
+			if (!pm)
+				return NULL;
+			ptl = pmd_lockptr(mm, pm);
+			hpdp = (hugepd_t *)pm;
+		}
+	}
+#endif
+	if (!hpdp)
+		return NULL;
+
+	if (IS_ENABLED(CONFIG_PPC_8xx) && pshift < PMD_SHIFT)
+		return pte_alloc_huge(mm, (pmd_t *)hpdp, addr);
+
+	BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
+
+	if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr,
+						  pdshift, pshift, ptl))
+		return NULL;
+
+	return hugepte_offset(*hpdp, addr, pdshift);
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * Tracks gpages after the device tree is scanned and before the
+ * huge_boot_pages list is ready on pseries.
+ */
+#define MAX_NUMBER_GPAGES	1024
+__initdata static u64 gpage_freearray[MAX_NUMBER_GPAGES];
+__initdata static unsigned nr_gpages;
+
+/*
+ * Build list of addresses of gigantic pages.  This function is used in early
+ * boot before the buddy allocator is setup.
+ */
+void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
+{
+	if (!addr)
+		return;
+	while (number_of_pages > 0) {
+		gpage_freearray[nr_gpages] = addr;
+		nr_gpages++;
+		number_of_pages--;
+		addr += page_size;
+	}
+}
+
+static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate)
+{
+	struct huge_bootmem_page *m;
+	if (nr_gpages == 0)
+		return 0;
+	m = phys_to_virt(gpage_freearray[--nr_gpages]);
+	gpage_freearray[nr_gpages] = 0;
+	list_add(&m->list, &huge_boot_pages);
+	m->hstate = hstate;
+	return 1;
+}
+
+bool __init hugetlb_node_alloc_supported(void)
+{
+	return false;
+}
+#endif
+
+
+int __init alloc_bootmem_huge_page(struct hstate *h, int nid)
+{
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled())
+		return pseries_alloc_bootmem_huge_page(h);
+#endif
+	return __alloc_bootmem_huge_page(h, nid);
+}
+
+#ifndef CONFIG_PPC_BOOK3S_64
+#define HUGEPD_FREELIST_SIZE \
+	((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
+
+struct hugepd_freelist {
+	struct rcu_head	rcu;
+	unsigned int index;
+	void *ptes[];
+};
+
+static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur);
+
+static void hugepd_free_rcu_callback(struct rcu_head *head)
+{
+	struct hugepd_freelist *batch =
+		container_of(head, struct hugepd_freelist, rcu);
+	unsigned int i;
+
+	for (i = 0; i < batch->index; i++)
+		kmem_cache_free(PGT_CACHE(PTE_T_ORDER), batch->ptes[i]);
+
+	free_page((unsigned long)batch);
+}
+
+static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
+{
+	struct hugepd_freelist **batchp;
+
+	batchp = &get_cpu_var(hugepd_freelist_cur);
+
+	if (atomic_read(&tlb->mm->mm_users) < 2 ||
+	    mm_is_thread_local(tlb->mm)) {
+		kmem_cache_free(PGT_CACHE(PTE_T_ORDER), hugepte);
+		put_cpu_var(hugepd_freelist_cur);
+		return;
+	}
+
+	if (*batchp == NULL) {
+		*batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC);
+		(*batchp)->index = 0;
+	}
+
+	(*batchp)->ptes[(*batchp)->index++] = hugepte;
+	if ((*batchp)->index == HUGEPD_FREELIST_SIZE) {
+		call_rcu(&(*batchp)->rcu, hugepd_free_rcu_callback);
+		*batchp = NULL;
+	}
+	put_cpu_var(hugepd_freelist_cur);
+}
+#else
+static inline void hugepd_free(struct mmu_gather *tlb, void *hugepte) {}
+#endif
+
+/* Return true when the entry to be freed maps more than the area being freed */
+static bool range_is_outside_limits(unsigned long start, unsigned long end,
+				    unsigned long floor, unsigned long ceiling,
+				    unsigned long mask)
+{
+	if ((start & mask) < floor)
+		return true;
+	if (ceiling) {
+		ceiling &= mask;
+		if (!ceiling)
+			return true;
+	}
+	return end - 1 > ceiling - 1;
+}
+
+static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
+			      unsigned long start, unsigned long end,
+			      unsigned long floor, unsigned long ceiling)
+{
+	pte_t *hugepte = hugepd_page(*hpdp);
+	int i;
+
+	unsigned long pdmask = ~((1UL << pdshift) - 1);
+	unsigned int num_hugepd = 1;
+	unsigned int shift = hugepd_shift(*hpdp);
+
+	/* Note: On fsl the hpdp may be the first of several */
+	if (shift > pdshift)
+		num_hugepd = 1 << (shift - pdshift);
+
+	if (range_is_outside_limits(start, end, floor, ceiling, pdmask))
+		return;
+
+	for (i = 0; i < num_hugepd; i++, hpdp++)
+		*hpdp = __hugepd(0);
+
+	if (shift >= pdshift)
+		hugepd_free(tlb, hugepte);
+	else
+		pgtable_free_tlb(tlb, hugepte,
+				 get_hugepd_cache_index(pdshift - shift));
+}
+
+static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+				   unsigned long addr, unsigned long end,
+				   unsigned long floor, unsigned long ceiling)
+{
+	pgtable_t token = pmd_pgtable(*pmd);
+
+	if (range_is_outside_limits(addr, end, floor, ceiling, PMD_MASK))
+		return;
+
+	pmd_clear(pmd);
+	pte_free_tlb(tlb, token, addr);
+	mm_dec_nr_ptes(tlb->mm);
+}
+
+static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+				   unsigned long addr, unsigned long end,
+				   unsigned long floor, unsigned long ceiling)
+{
+	pmd_t *pmd;
+	unsigned long next;
+	unsigned long start;
+
+	start = addr;
+	do {
+		unsigned long more;
+
+		pmd = pmd_offset(pud, addr);
+		next = pmd_addr_end(addr, end);
+		if (!is_hugepd(__hugepd(pmd_val(*pmd)))) {
+			if (pmd_none_or_clear_bad(pmd))
+				continue;
+
+			/*
+			 * if it is not hugepd pointer, we should already find
+			 * it cleared.
+			 */
+			WARN_ON(!IS_ENABLED(CONFIG_PPC_8xx));
+
+			hugetlb_free_pte_range(tlb, pmd, addr, end, floor, ceiling);
+
+			continue;
+		}
+		/*
+		 * Increment next by the size of the huge mapping since
+		 * there may be more than one entry at this level for a
+		 * single hugepage, but all of them point to
+		 * the same kmem cache that holds the hugepte.
+		 */
+		more = addr + (1UL << hugepd_shift(*(hugepd_t *)pmd));
+		if (more > next)
+			next = more;
+
+		free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
+				  addr, next, floor, ceiling);
+	} while (addr = next, addr != end);
+
+	if (range_is_outside_limits(start, end, floor, ceiling, PUD_MASK))
+		return;
+
+	pmd = pmd_offset(pud, start & PUD_MASK);
+	pud_clear(pud);
+	pmd_free_tlb(tlb, pmd, start & PUD_MASK);
+	mm_dec_nr_pmds(tlb->mm);
+}
+
+static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
+				   unsigned long addr, unsigned long end,
+				   unsigned long floor, unsigned long ceiling)
+{
+	pud_t *pud;
+	unsigned long next;
+	unsigned long start;
+
+	start = addr;
+	do {
+		pud = pud_offset(p4d, addr);
+		next = pud_addr_end(addr, end);
+		if (!is_hugepd(__hugepd(pud_val(*pud)))) {
+			if (pud_none_or_clear_bad(pud))
+				continue;
+			hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
+					       ceiling);
+		} else {
+			unsigned long more;
+			/*
+			 * Increment next by the size of the huge mapping since
+			 * there may be more than one entry at this level for a
+			 * single hugepage, but all of them point to
+			 * the same kmem cache that holds the hugepte.
+			 */
+			more = addr + (1UL << hugepd_shift(*(hugepd_t *)pud));
+			if (more > next)
+				next = more;
+
+			free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
+					  addr, next, floor, ceiling);
+		}
+	} while (addr = next, addr != end);
+
+	if (range_is_outside_limits(start, end, floor, ceiling, PGDIR_MASK))
+		return;
+
+	pud = pud_offset(p4d, start & PGDIR_MASK);
+	p4d_clear(p4d);
+	pud_free_tlb(tlb, pud, start & PGDIR_MASK);
+	mm_dec_nr_puds(tlb->mm);
+}
+
+/*
+ * This function frees user-level page tables of a process.
+ */
+void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+			    unsigned long addr, unsigned long end,
+			    unsigned long floor, unsigned long ceiling)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	unsigned long next;
+
+	/*
+	 * Because there are a number of different possible pagetable
+	 * layouts for hugepage ranges, we limit knowledge of how
+	 * things should be laid out to the allocation path
+	 * (huge_pte_alloc(), above).  Everything else works out the
+	 * structure as it goes from information in the hugepd
+	 * pointers.  That means that we can't here use the
+	 * optimization used in the normal page free_pgd_range(), of
+	 * checking whether we're actually covering a large enough
+	 * range to have to do anything at the top level of the walk
+	 * instead of at the bottom.
+	 *
+	 * To make sense of this, you should probably go read the big
+	 * block comment at the top of the normal free_pgd_range(),
+	 * too.
+	 */
+
+	do {
+		next = pgd_addr_end(addr, end);
+		pgd = pgd_offset(tlb->mm, addr);
+		p4d = p4d_offset(pgd, addr);
+		if (!is_hugepd(__hugepd(pgd_val(*pgd)))) {
+			if (p4d_none_or_clear_bad(p4d))
+				continue;
+			hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling);
+		} else {
+			unsigned long more;
+			/*
+			 * Increment next by the size of the huge mapping since
+			 * there may be more than one entry at the pgd level
+			 * for a single hugepage, but all of them point to the
+			 * same kmem cache that holds the hugepte.
+			 */
+			more = addr + (1UL << hugepd_shift(*(hugepd_t *)pgd));
+			if (more > next)
+				next = more;
+
+			free_hugepd_range(tlb, (hugepd_t *)p4d, PGDIR_SHIFT,
+					  addr, next, floor, ceiling);
+		}
+	} while (addr = next, addr != end);
+}
+
+bool __init arch_hugetlb_valid_size(unsigned long size)
+{
+	int shift = __ffs(size);
+	int mmu_psize;
+
+	/* Check that it is a page size supported by the hardware and
+	 * that it fits within pagetable and slice limits. */
+	if (size <= PAGE_SIZE || !is_power_of_2(size))
+		return false;
+
+	mmu_psize = check_and_get_huge_psize(shift);
+	if (mmu_psize < 0)
+		return false;
+
+	BUG_ON(mmu_psize_defs[mmu_psize].shift != shift);
+
+	return true;
+}
+
+static int __init add_huge_page_size(unsigned long long size)
+{
+	int shift = __ffs(size);
+
+	if (!arch_hugetlb_valid_size((unsigned long)size))
+		return -EINVAL;
+
+	hugetlb_add_hstate(shift - PAGE_SHIFT);
+	return 0;
+}
+
+static int __init hugetlbpage_init(void)
+{
+	bool configured = false;
+	int psize;
+
+	if (hugetlb_disabled) {
+		pr_info("HugeTLB support is disabled!\n");
+		return 0;
+	}
+
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled() &&
+	    !mmu_has_feature(MMU_FTR_16M_PAGE))
+		return -ENODEV;
+
+	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+		unsigned shift;
+		unsigned pdshift;
+
+		if (!mmu_psize_defs[psize].shift)
+			continue;
+
+		shift = mmu_psize_to_shift(psize);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+		if (shift > PGDIR_SHIFT)
+			continue;
+		else if (shift > PUD_SHIFT)
+			pdshift = PGDIR_SHIFT;
+		else if (shift > PMD_SHIFT)
+			pdshift = PUD_SHIFT;
+		else
+			pdshift = PMD_SHIFT;
+#else
+		if (shift < PUD_SHIFT)
+			pdshift = PMD_SHIFT;
+		else if (shift < PGDIR_SHIFT)
+			pdshift = PUD_SHIFT;
+		else
+			pdshift = PGDIR_SHIFT;
+#endif
+
+		if (add_huge_page_size(1ULL << shift) < 0)
+			continue;
+		/*
+		 * if we have pdshift and shift value same, we don't
+		 * use pgt cache for hugepd.
+		 */
+		if (pdshift > shift) {
+			if (!IS_ENABLED(CONFIG_PPC_8xx))
+				pgtable_cache_add(pdshift - shift);
+		} else if (IS_ENABLED(CONFIG_PPC_E500) ||
+			   IS_ENABLED(CONFIG_PPC_8xx)) {
+			pgtable_cache_add(PTE_T_ORDER);
+		}
+
+		configured = true;
+	}
+
+	if (!configured)
+		pr_info("Failed to initialize. Disabling HugeTLB");
+
+	return 0;
+}
+
+arch_initcall(hugetlbpage_init);
+
+void __init gigantic_hugetlb_cma_reserve(void)
+{
+	unsigned long order = 0;
+
+	if (radix_enabled())
+		order = PUD_SHIFT - PAGE_SHIFT;
+	else if (!firmware_has_feature(FW_FEATURE_LPAR) && mmu_psize_defs[MMU_PAGE_16G].shift)
+		/*
+		 * For pseries we do use ibm,expected#pages for reserving 16G pages.
+		 */
+		order = mmu_psize_to_shift(MMU_PAGE_16G) - PAGE_SHIFT;
+
+	if (order) {
+		VM_WARN_ON(order <= MAX_ORDER);
+		hugetlb_cma_reserve(order);
+	}
+}
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
new file mode 100644
index 0000000000..119ef491f7
--- /dev/null
+++ b/arch/powerpc/mm/init-common.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Dave Engebretsen <engebret@us.ibm.com>
+ *      Rework for PPC64 port.
+ */
+
+#undef DEBUG
+
+#include <linux/string.h>
+#include <linux/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/kup.h>
+#include <asm/smp.h>
+
+phys_addr_t memstart_addr __ro_after_init = (phys_addr_t)~0ull;
+EXPORT_SYMBOL_GPL(memstart_addr);
+phys_addr_t kernstart_addr __ro_after_init;
+EXPORT_SYMBOL_GPL(kernstart_addr);
+unsigned long kernstart_virt_addr __ro_after_init = KERNELBASE;
+EXPORT_SYMBOL_GPL(kernstart_virt_addr);
+
+bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP);
+bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
+
+static int __init parse_nosmep(char *p)
+{
+	if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+		return 0;
+
+	disable_kuep = true;
+	pr_warn("Disabling Kernel Userspace Execution Prevention\n");
+	return 0;
+}
+early_param("nosmep", parse_nosmep);
+
+static int __init parse_nosmap(char *p)
+{
+	disable_kuap = true;
+	pr_warn("Disabling Kernel Userspace Access Protection\n");
+	return 0;
+}
+early_param("nosmap", parse_nosmap);
+
+void __weak setup_kuep(bool disabled)
+{
+	if (!IS_ENABLED(CONFIG_PPC_KUEP) || disabled)
+		return;
+
+	if (smp_processor_id() != boot_cpuid)
+		return;
+
+	pr_info("Activating Kernel Userspace Execution Prevention\n");
+}
+
+void setup_kup(void)
+{
+	setup_kuap(disable_kuap);
+	setup_kuep(disable_kuep);
+}
+
+#define CTOR(shift) static void ctor_##shift(void *addr) \
+{							\
+	memset(addr, 0, sizeof(void *) << (shift));	\
+}
+
+CTOR(0); CTOR(1); CTOR(2); CTOR(3); CTOR(4); CTOR(5); CTOR(6); CTOR(7);
+CTOR(8); CTOR(9); CTOR(10); CTOR(11); CTOR(12); CTOR(13); CTOR(14); CTOR(15);
+
+static inline void (*ctor(int shift))(void *)
+{
+	BUILD_BUG_ON(MAX_PGTABLE_INDEX_SIZE != 15);
+
+	switch (shift) {
+	case 0: return ctor_0;
+	case 1: return ctor_1;
+	case 2: return ctor_2;
+	case 3: return ctor_3;
+	case 4: return ctor_4;
+	case 5: return ctor_5;
+	case 6: return ctor_6;
+	case 7: return ctor_7;
+	case 8: return ctor_8;
+	case 9: return ctor_9;
+	case 10: return ctor_10;
+	case 11: return ctor_11;
+	case 12: return ctor_12;
+	case 13: return ctor_13;
+	case 14: return ctor_14;
+	case 15: return ctor_15;
+	}
+	return NULL;
+}
+
+struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE + 1];
+EXPORT_SYMBOL_GPL(pgtable_cache);	/* used by kvm_hv module */
+
+/*
+ * Create a kmem_cache() for pagetables.  This is not used for PTE
+ * pages - they're linked to struct page, come from the normal free
+ * pages pool and have a different entry size (see real_pte_t) to
+ * everything else.  Caches created by this function are used for all
+ * the higher level pagetables, and for hugepage pagetables.
+ */
+void pgtable_cache_add(unsigned int shift)
+{
+	char *name;
+	unsigned long table_size = sizeof(void *) << shift;
+	unsigned long align = table_size;
+
+	/* When batching pgtable pointers for RCU freeing, we store
+	 * the index size in the low bits.  Table alignment must be
+	 * big enough to fit it.
+	 *
+	 * Likewise, hugeapge pagetable pointers contain a (different)
+	 * shift value in the low bits.  All tables must be aligned so
+	 * as to leave enough 0 bits in the address to contain it. */
+	unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1,
+				     HUGEPD_SHIFT_MASK + 1);
+	struct kmem_cache *new;
+
+	/* It would be nice if this was a BUILD_BUG_ON(), but at the
+	 * moment, gcc doesn't seem to recognize is_power_of_2 as a
+	 * constant expression, so so much for that. */
+	BUG_ON(!is_power_of_2(minalign));
+	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+
+	if (PGT_CACHE(shift))
+		return; /* Already have a cache of this size */
+
+	align = max_t(unsigned long, align, minalign);
+	name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
+	new = kmem_cache_create(name, table_size, align, 0, ctor(shift));
+	if (!new)
+		panic("Could not allocate pgtable cache for order %d", shift);
+
+	kfree(name);
+	pgtable_cache[shift] = new;
+
+	pr_debug("Allocated pgtable cache for order %d\n", shift);
+}
+EXPORT_SYMBOL_GPL(pgtable_cache_add);	/* used by kvm_hv module */
+
+void pgtable_cache_init(void)
+{
+	pgtable_cache_add(PGD_INDEX_SIZE);
+
+	if (PMD_CACHE_INDEX)
+		pgtable_cache_add(PMD_CACHE_INDEX);
+	/*
+	 * In all current configs, when the PUD index exists it's the
+	 * same size as either the pgd or pmd index except with THP enabled
+	 * on book3s 64
+	 */
+	if (PUD_CACHE_INDEX)
+		pgtable_cache_add(PUD_CACHE_INDEX);
+}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
new file mode 100644
index 0000000000..d8adc452f4
--- /dev/null
+++ b/arch/powerpc/mm/init_32.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/initrd.h>
+#include <linux/pagemap.h>
+#include <linux/memblock.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/hugetlb.h>
+
+#include <asm/io.h>
+#include <asm/mmu.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/btext.h>
+#include <asm/tlb.h>
+#include <asm/sections.h>
+#include <asm/hugetlb.h>
+#include <asm/kup.h>
+#include <asm/kasan.h>
+
+#include <mm/mmu_decl.h>
+
+#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL)
+/* The amount of lowmem must be within 0xF0000000 - KERNELBASE. */
+#if (CONFIG_LOWMEM_SIZE > (0xF0000000 - PAGE_OFFSET))
+#error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_KERNEL_START"
+#endif
+#endif
+#define MAX_LOW_MEM	CONFIG_LOWMEM_SIZE
+
+phys_addr_t total_memory;
+phys_addr_t total_lowmem;
+
+#ifdef CONFIG_RELOCATABLE
+/* Used in __va()/__pa() */
+long long virt_phys_offset;
+EXPORT_SYMBOL(virt_phys_offset);
+#endif
+
+phys_addr_t lowmem_end_addr;
+
+int boot_mapsize;
+#ifdef CONFIG_PPC_PMAC
+unsigned long agp_special_page;
+EXPORT_SYMBOL(agp_special_page);
+#endif
+
+void MMU_init(void);
+
+/* max amount of low RAM to map in */
+unsigned long __max_low_memory = MAX_LOW_MEM;
+
+/*
+ * MMU_init sets up the basic memory mappings for the kernel,
+ * including both RAM and possibly some I/O regions,
+ * and sets up the page tables and the MMU hardware ready to go.
+ */
+void __init MMU_init(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:enter", 0x111);
+
+	total_lowmem = total_memory = memblock_end_of_DRAM() - memstart_addr;
+	lowmem_end_addr = memstart_addr + total_lowmem;
+
+#ifdef CONFIG_PPC_85xx
+	/* Freescale Book-E parts expect lowmem to be mapped by fixed TLB
+	 * entries, so we need to adjust lowmem to match the amount we can map
+	 * in the fixed entries */
+	adjust_total_lowmem();
+#endif /* CONFIG_PPC_85xx */
+
+	if (total_lowmem > __max_low_memory) {
+		total_lowmem = __max_low_memory;
+		lowmem_end_addr = memstart_addr + total_lowmem;
+#ifndef CONFIG_HIGHMEM
+		total_memory = total_lowmem;
+		memblock_enforce_memory_limit(total_lowmem);
+#endif /* CONFIG_HIGHMEM */
+	}
+
+	/* Initialize the MMU hardware */
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:hw init", 0x300);
+	MMU_init_hw();
+
+	/* Map in all of RAM starting at KERNELBASE */
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:mapin", 0x301);
+	mapin_ram();
+
+	/* Initialize early top-down ioremap allocator */
+	ioremap_bot = IOREMAP_TOP;
+
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:exit", 0x211);
+
+	/* From now on, btext is no longer BAT mapped if it was at all */
+#ifdef CONFIG_BOOTX_TEXT
+	btext_unmap();
+#endif
+
+	kasan_mmu_init();
+
+	setup_kup();
+
+	update_mmu_feature_fixups(MMU_FTR_KUAP);
+
+	/* Shortly after that, the entire linear mapping will be available */
+	memblock_set_current_limit(lowmem_end_addr);
+}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
new file mode 100644
index 0000000000..d96bbc001e
--- /dev/null
+++ b/arch/powerpc/mm/init_64.c
@@ -0,0 +1,676 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Dave Engebretsen <engebret@us.ibm.com>
+ *      Rework for PPC64 port.
+ */
+
+#undef DEBUG
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+#include <linux/idr.h>
+#include <linux/nodemask.h>
+#include <linux/module.h>
+#include <linux/poison.h>
+#include <linux/memblock.h>
+#include <linux/hugetlb.h>
+#include <linux/slab.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <linux/memremap.h>
+#include <linux/memory.h>
+
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/mmu.h>
+#include <linux/uaccess.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/tlb.h>
+#include <asm/eeh.h>
+#include <asm/processor.h>
+#include <asm/mmzone.h>
+#include <asm/cputable.h>
+#include <asm/sections.h>
+#include <asm/iommu.h>
+#include <asm/vdso.h>
+#include <asm/hugetlb.h>
+
+#include <mm/mmu_decl.h>
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/*
+ * Given an address within the vmemmap, determine the page that
+ * represents the start of the subsection it is within.  Note that we have to
+ * do this by hand as the proffered address may not be correctly aligned.
+ * Subtraction of non-aligned pointers produces undefined results.
+ */
+static struct page * __meminit vmemmap_subsection_start(unsigned long vmemmap_addr)
+{
+	unsigned long start_pfn;
+	unsigned long offset = vmemmap_addr - ((unsigned long)(vmemmap));
+
+	/* Return the pfn of the start of the section. */
+	start_pfn = (offset / sizeof(struct page)) & PAGE_SUBSECTION_MASK;
+	return pfn_to_page(start_pfn);
+}
+
+/*
+ * Since memory is added in sub-section chunks, before creating a new vmemmap
+ * mapping, the kernel should check whether there is an existing memmap mapping
+ * covering the new subsection added. This is needed because kernel can map
+ * vmemmap area using 16MB pages which will cover a memory range of 16G. Such
+ * a range covers multiple subsections (2M)
+ *
+ * If any subsection in the 16G range mapped by vmemmap is valid we consider the
+ * vmemmap populated (There is a page table entry already present). We can't do
+ * a page table lookup here because with the hash translation we don't keep
+ * vmemmap details in linux page table.
+ */
+int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size)
+{
+	struct page *start;
+	unsigned long vmemmap_end = vmemmap_addr + vmemmap_map_size;
+	start = vmemmap_subsection_start(vmemmap_addr);
+
+	for (; (unsigned long)start < vmemmap_end; start += PAGES_PER_SUBSECTION)
+		/*
+		 * pfn valid check here is intended to really check
+		 * whether we have any subsection already initialized
+		 * in this range.
+		 */
+		if (pfn_valid(page_to_pfn(start)))
+			return 1;
+
+	return 0;
+}
+
+/*
+ * vmemmap virtual address space management does not have a traditional page
+ * table to track which virtual struct pages are backed by physical mapping.
+ * The virtual to physical mappings are tracked in a simple linked list
+ * format. 'vmemmap_list' maintains the entire vmemmap physical mapping at
+ * all times where as the 'next' list maintains the available
+ * vmemmap_backing structures which have been deleted from the
+ * 'vmemmap_global' list during system runtime (memory hotplug remove
+ * operation). The freed 'vmemmap_backing' structures are reused later when
+ * new requests come in without allocating fresh memory. This pointer also
+ * tracks the allocated 'vmemmap_backing' structures as we allocate one
+ * full page memory at a time when we dont have any.
+ */
+struct vmemmap_backing *vmemmap_list;
+static struct vmemmap_backing *next;
+
+/*
+ * The same pointer 'next' tracks individual chunks inside the allocated
+ * full page during the boot time and again tracks the freed nodes during
+ * runtime. It is racy but it does not happen as they are separated by the
+ * boot process. Will create problem if some how we have memory hotplug
+ * operation during boot !!
+ */
+static int num_left;
+static int num_freed;
+
+static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node)
+{
+	struct vmemmap_backing *vmem_back;
+	/* get from freed entries first */
+	if (num_freed) {
+		num_freed--;
+		vmem_back = next;
+		next = next->list;
+
+		return vmem_back;
+	}
+
+	/* allocate a page when required and hand out chunks */
+	if (!num_left) {
+		next = vmemmap_alloc_block(PAGE_SIZE, node);
+		if (unlikely(!next)) {
+			WARN_ON(1);
+			return NULL;
+		}
+		num_left = PAGE_SIZE / sizeof(struct vmemmap_backing);
+	}
+
+	num_left--;
+
+	return next++;
+}
+
+static __meminit int vmemmap_list_populate(unsigned long phys,
+					   unsigned long start,
+					   int node)
+{
+	struct vmemmap_backing *vmem_back;
+
+	vmem_back = vmemmap_list_alloc(node);
+	if (unlikely(!vmem_back)) {
+		pr_debug("vmemap list allocation failed\n");
+		return -ENOMEM;
+	}
+
+	vmem_back->phys = phys;
+	vmem_back->virt_addr = start;
+	vmem_back->list = vmemmap_list;
+
+	vmemmap_list = vmem_back;
+	return 0;
+}
+
+bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
+			   unsigned long page_size)
+{
+	unsigned long nr_pfn = page_size / sizeof(struct page);
+	unsigned long start_pfn = page_to_pfn((struct page *)start);
+
+	if ((start_pfn + nr_pfn - 1) > altmap->end_pfn)
+		return true;
+
+	if (start_pfn < altmap->base_pfn)
+		return true;
+
+	return false;
+}
+
+static int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int node,
+					struct vmem_altmap *altmap)
+{
+	bool altmap_alloc;
+	unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
+
+	/* Align to the page size of the linear mapping. */
+	start = ALIGN_DOWN(start, page_size);
+
+	pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node);
+
+	for (; start < end; start += page_size) {
+		void *p = NULL;
+		int rc;
+
+		/*
+		 * This vmemmap range is backing different subsections. If any
+		 * of that subsection is marked valid, that means we already
+		 * have initialized a page table covering this range and hence
+		 * the vmemmap range is populated.
+		 */
+		if (vmemmap_populated(start, page_size))
+			continue;
+
+		/*
+		 * Allocate from the altmap first if we have one. This may
+		 * fail due to alignment issues when using 16MB hugepages, so
+		 * fall back to system memory if the altmap allocation fail.
+		 */
+		if (altmap && !altmap_cross_boundary(altmap, start, page_size)) {
+			p = vmemmap_alloc_block_buf(page_size, node, altmap);
+			if (!p)
+				pr_debug("altmap block allocation failed, falling back to system memory");
+			else
+				altmap_alloc = true;
+		}
+		if (!p) {
+			p = vmemmap_alloc_block_buf(page_size, node, NULL);
+			altmap_alloc = false;
+		}
+		if (!p)
+			return -ENOMEM;
+
+		if (vmemmap_list_populate(__pa(p), start, node)) {
+			/*
+			 * If we don't populate vmemap list, we don't have
+			 * the ability to free the allocated vmemmap
+			 * pages in section_deactivate. Hence free them
+			 * here.
+			 */
+			int nr_pfns = page_size >> PAGE_SHIFT;
+			unsigned long page_order = get_order(page_size);
+
+			if (altmap_alloc)
+				vmem_altmap_free(altmap, nr_pfns);
+			else
+				free_pages((unsigned long)p, page_order);
+			return -ENOMEM;
+		}
+
+		pr_debug("      * %016lx..%016lx allocated at %p\n",
+			 start, start + page_size, p);
+
+		rc = vmemmap_create_mapping(start, page_size, __pa(p));
+		if (rc < 0) {
+			pr_warn("%s: Unable to create vmemmap mapping: %d\n",
+				__func__, rc);
+			return -EFAULT;
+		}
+	}
+
+	return 0;
+}
+
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+			       struct vmem_altmap *altmap)
+{
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (radix_enabled())
+		return radix__vmemmap_populate(start, end, node, altmap);
+#endif
+
+	return __vmemmap_populate(start, end, node, altmap);
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static unsigned long vmemmap_list_free(unsigned long start)
+{
+	struct vmemmap_backing *vmem_back, *vmem_back_prev;
+
+	vmem_back_prev = vmem_back = vmemmap_list;
+
+	/* look for it with prev pointer recorded */
+	for (; vmem_back; vmem_back = vmem_back->list) {
+		if (vmem_back->virt_addr == start)
+			break;
+		vmem_back_prev = vmem_back;
+	}
+
+	if (unlikely(!vmem_back))
+		return 0;
+
+	/* remove it from vmemmap_list */
+	if (vmem_back == vmemmap_list) /* remove head */
+		vmemmap_list = vmem_back->list;
+	else
+		vmem_back_prev->list = vmem_back->list;
+
+	/* next point to this freed entry */
+	vmem_back->list = next;
+	next = vmem_back;
+	num_freed++;
+
+	return vmem_back->phys;
+}
+
+static void __ref __vmemmap_free(unsigned long start, unsigned long end,
+				 struct vmem_altmap *altmap)
+{
+	unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
+	unsigned long page_order = get_order(page_size);
+	unsigned long alt_start = ~0, alt_end = ~0;
+	unsigned long base_pfn;
+
+	start = ALIGN_DOWN(start, page_size);
+	if (altmap) {
+		alt_start = altmap->base_pfn;
+		alt_end = altmap->base_pfn + altmap->reserve + altmap->free;
+	}
+
+	pr_debug("vmemmap_free %lx...%lx\n", start, end);
+
+	for (; start < end; start += page_size) {
+		unsigned long nr_pages, addr;
+		struct page *page;
+
+		/*
+		 * We have already marked the subsection we are trying to remove
+		 * invalid. So if we want to remove the vmemmap range, we
+		 * need to make sure there is no subsection marked valid
+		 * in this range.
+		 */
+		if (vmemmap_populated(start, page_size))
+			continue;
+
+		addr = vmemmap_list_free(start);
+		if (!addr)
+			continue;
+
+		page = pfn_to_page(addr >> PAGE_SHIFT);
+		nr_pages = 1 << page_order;
+		base_pfn = PHYS_PFN(addr);
+
+		if (base_pfn >= alt_start && base_pfn < alt_end) {
+			vmem_altmap_free(altmap, nr_pages);
+		} else if (PageReserved(page)) {
+			/* allocated from bootmem */
+			if (page_size < PAGE_SIZE) {
+				/*
+				 * this shouldn't happen, but if it is
+				 * the case, leave the memory there
+				 */
+				WARN_ON_ONCE(1);
+			} else {
+				while (nr_pages--)
+					free_reserved_page(page++);
+			}
+		} else {
+			free_pages((unsigned long)(__va(addr)), page_order);
+		}
+
+		vmemmap_remove_mapping(start, page_size);
+	}
+}
+
+void __ref vmemmap_free(unsigned long start, unsigned long end,
+			struct vmem_altmap *altmap)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (radix_enabled())
+		return radix__vmemmap_free(start, end, altmap);
+#endif
+	return __vmemmap_free(start, end, altmap);
+}
+
+#endif
+void register_page_bootmem_memmap(unsigned long section_nr,
+				  struct page *start_page, unsigned long size)
+{
+}
+
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+unsigned int mmu_lpid_bits;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+EXPORT_SYMBOL_GPL(mmu_lpid_bits);
+#endif
+unsigned int mmu_pid_bits;
+
+static bool disable_radix = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
+
+static int __init parse_disable_radix(char *p)
+{
+	bool val;
+
+	if (!p)
+		val = true;
+	else if (kstrtobool(p, &val))
+		return -EINVAL;
+
+	disable_radix = val;
+
+	return 0;
+}
+early_param("disable_radix", parse_disable_radix);
+
+/*
+ * If we're running under a hypervisor, we need to check the contents of
+ * /chosen/ibm,architecture-vec-5 to see if the hypervisor is willing to do
+ * radix.  If not, we clear the radix feature bit so we fall back to hash.
+ */
+static void __init early_check_vec5(void)
+{
+	unsigned long root, chosen;
+	int size;
+	const u8 *vec5;
+	u8 mmu_supported;
+
+	root = of_get_flat_dt_root();
+	chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
+	if (chosen == -FDT_ERR_NOTFOUND) {
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+		return;
+	}
+	vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
+	if (!vec5) {
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+		return;
+	}
+	if (size <= OV5_INDX(OV5_MMU_SUPPORT)) {
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+		return;
+	}
+
+	/* Check for supported configuration */
+	mmu_supported = vec5[OV5_INDX(OV5_MMU_SUPPORT)] &
+			OV5_FEAT(OV5_MMU_SUPPORT);
+	if (mmu_supported == OV5_FEAT(OV5_MMU_RADIX)) {
+		/* Hypervisor only supports radix - check enabled && GTSE */
+		if (!early_radix_enabled()) {
+			pr_warn("WARNING: Ignoring cmdline option disable_radix\n");
+		}
+		if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] &
+						OV5_FEAT(OV5_RADIX_GTSE))) {
+			cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE;
+		} else
+			cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
+		/* Do radix anyway - the hypervisor said we had to */
+		cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
+	} else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) {
+		/* Hypervisor only supports hash - disable radix */
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE;
+	}
+}
+
+static int __init dt_scan_mmu_pid_width(unsigned long node,
+					   const char *uname, int depth,
+					   void *data)
+{
+	int size = 0;
+	const __be32 *prop;
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+
+	/* We are scanning "cpu" nodes only */
+	if (type == NULL || strcmp(type, "cpu") != 0)
+		return 0;
+
+	/* Find MMU LPID, PID register size */
+	prop = of_get_flat_dt_prop(node, "ibm,mmu-lpid-bits", &size);
+	if (prop && size == 4)
+		mmu_lpid_bits = be32_to_cpup(prop);
+
+	prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
+	if (prop && size == 4)
+		mmu_pid_bits = be32_to_cpup(prop);
+
+	if (!mmu_pid_bits && !mmu_lpid_bits)
+		return 0;
+
+	return 1;
+}
+
+/*
+ * Outside hotplug the kernel uses this value to map the kernel direct map
+ * with radix. To be compatible with older kernels, let's keep this value
+ * as 16M which is also SECTION_SIZE with SPARSEMEM. We can ideally map
+ * things with 1GB size in the case where we don't support hotplug.
+ */
+#ifndef CONFIG_MEMORY_HOTPLUG
+#define DEFAULT_MEMORY_BLOCK_SIZE	SZ_16M
+#else
+#define DEFAULT_MEMORY_BLOCK_SIZE	MIN_MEMORY_BLOCK_SIZE
+#endif
+
+static void update_memory_block_size(unsigned long *block_size, unsigned long mem_size)
+{
+	unsigned long min_memory_block_size = DEFAULT_MEMORY_BLOCK_SIZE;
+
+	for (; *block_size > min_memory_block_size; *block_size >>= 2) {
+		if ((mem_size & *block_size) == 0)
+			break;
+	}
+}
+
+static int __init probe_memory_block_size(unsigned long node, const char *uname, int
+					  depth, void *data)
+{
+	const char *type;
+	unsigned long *block_size = (unsigned long *)data;
+	const __be32 *reg, *endp;
+	int l;
+
+	if (depth != 1)
+		return 0;
+	/*
+	 * If we have dynamic-reconfiguration-memory node, use the
+	 * lmb value.
+	 */
+	if (strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
+
+		const __be32 *prop;
+
+		prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
+
+		if (!prop || l < dt_root_size_cells * sizeof(__be32))
+			/*
+			 * Nothing in the device tree
+			 */
+			*block_size = DEFAULT_MEMORY_BLOCK_SIZE;
+		else
+			*block_size = of_read_number(prop, dt_root_size_cells);
+		/*
+		 * We have found the final value. Don't probe further.
+		 */
+		return 1;
+	}
+	/*
+	 * Find all the device tree nodes of memory type and make sure
+	 * the area can be mapped using the memory block size value
+	 * we end up using. We start with 1G value and keep reducing
+	 * it such that we can map the entire area using memory_block_size.
+	 * This will be used on powernv and older pseries that don't
+	 * have ibm,lmb-size node.
+	 * For ex: with P5 we can end up with
+	 * memory@0 -> 128MB
+	 * memory@128M -> 64M
+	 * This will end up using 64MB  memory block size value.
+	 */
+	type = of_get_flat_dt_prop(node, "device_type", NULL);
+	if (type == NULL || strcmp(type, "memory") != 0)
+		return 0;
+
+	reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
+	if (!reg)
+		reg = of_get_flat_dt_prop(node, "reg", &l);
+	if (!reg)
+		return 0;
+
+	endp = reg + (l / sizeof(__be32));
+	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
+		const char *compatible;
+		u64 size;
+
+		dt_mem_next_cell(dt_root_addr_cells, &reg);
+		size = dt_mem_next_cell(dt_root_size_cells, &reg);
+
+		if (size) {
+			update_memory_block_size(block_size, size);
+			continue;
+		}
+		/*
+		 * ibm,coherent-device-memory with linux,usable-memory = 0
+		 * Force 256MiB block size. Work around for GPUs on P9 PowerNV
+		 * linux,usable-memory == 0 implies driver managed memory and
+		 * we can't use large memory block size due to hotplug/unplug
+		 * limitations.
+		 */
+		compatible = of_get_flat_dt_prop(node, "compatible", NULL);
+		if (compatible && !strcmp(compatible, "ibm,coherent-device-memory")) {
+			if (*block_size > SZ_256M)
+				*block_size = SZ_256M;
+			/*
+			 * We keep 256M as the upper limit with GPU present.
+			 */
+			return 0;
+		}
+	}
+	/* continue looking for other memory device types */
+	return 0;
+}
+
+/*
+ * start with 1G memory block size. Early init will
+ * fix this with correct value.
+ */
+unsigned long memory_block_size __ro_after_init = 1UL << 30;
+static void __init early_init_memory_block_size(void)
+{
+	/*
+	 * We need to do memory_block_size probe early so that
+	 * radix__early_init_mmu() can use this as limit for
+	 * mapping page size.
+	 */
+	of_scan_flat_dt(probe_memory_block_size, &memory_block_size);
+}
+
+void __init mmu_early_init_devtree(void)
+{
+	bool hvmode = !!(mfmsr() & MSR_HV);
+
+	/* Disable radix mode based on kernel command line. */
+	if (disable_radix) {
+		if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU))
+			cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+		else
+			pr_warn("WARNING: Ignoring cmdline option disable_radix\n");
+	}
+
+	of_scan_flat_dt(dt_scan_mmu_pid_width, NULL);
+	if (hvmode && !mmu_lpid_bits) {
+		if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
+			mmu_lpid_bits = 12; /* POWER8-10 */
+		else
+			mmu_lpid_bits = 10; /* POWER7 */
+	}
+	if (!mmu_pid_bits) {
+		if (early_cpu_has_feature(CPU_FTR_ARCH_300))
+			mmu_pid_bits = 20; /* POWER9-10 */
+	}
+
+	/*
+	 * Check /chosen/ibm,architecture-vec-5 if running as a guest.
+	 * When running bare-metal, we can use radix if we like
+	 * even though the ibm,architecture-vec-5 property created by
+	 * skiboot doesn't have the necessary bits set.
+	 */
+	if (!hvmode)
+		early_check_vec5();
+
+	early_init_memory_block_size();
+
+	if (early_radix_enabled()) {
+		radix__early_init_devtree();
+
+		/*
+		 * We have finalized the translation we are going to use by now.
+		 * Radix mode is not limited by RMA / VRMA addressing.
+		 * Hence don't limit memblock allocations.
+		 */
+		ppc64_rma_size = ULONG_MAX;
+		memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
+	} else
+		hash__early_init_devtree();
+
+	if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE))
+		hugetlbpage_init_defaultsize();
+
+	if (!(cur_cpu_spec->mmu_features & MMU_FTR_HPTE_TABLE) &&
+	    !(cur_cpu_spec->mmu_features & MMU_FTR_TYPE_RADIX))
+		panic("kernel does not support any MMU type offered by platform");
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c
new file mode 100644
index 0000000000..705e8e8ffd
--- /dev/null
+++ b/arch/powerpc/mm/ioremap.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/mmzone.h>
+#include <linux/vmalloc.h>
+#include <asm/io-workarounds.h>
+
+unsigned long ioremap_bot;
+EXPORT_SYMBOL(ioremap_bot);
+
+void __iomem *ioremap(phys_addr_t addr, unsigned long size)
+{
+	pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
+	void *caller = __builtin_return_address(0);
+
+	if (iowa_is_active())
+		return iowa_ioremap(addr, size, prot, caller);
+	return __ioremap_caller(addr, size, prot, caller);
+}
+EXPORT_SYMBOL(ioremap);
+
+void __iomem *ioremap_wc(phys_addr_t addr, unsigned long size)
+{
+	pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL);
+	void *caller = __builtin_return_address(0);
+
+	if (iowa_is_active())
+		return iowa_ioremap(addr, size, prot, caller);
+	return __ioremap_caller(addr, size, prot, caller);
+}
+EXPORT_SYMBOL(ioremap_wc);
+
+void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size)
+{
+	pgprot_t prot = pgprot_cached(PAGE_KERNEL);
+	void *caller = __builtin_return_address(0);
+
+	if (iowa_is_active())
+		return iowa_ioremap(addr, size, prot, caller);
+	return __ioremap_caller(addr, size, prot, caller);
+}
+
+void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long flags)
+{
+	pte_t pte = __pte(flags);
+	void *caller = __builtin_return_address(0);
+
+	/* writeable implies dirty for kernel addresses */
+	if (pte_write(pte))
+		pte = pte_mkdirty(pte);
+
+	/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
+	pte = pte_exprotect(pte);
+	pte = pte_mkprivileged(pte);
+
+	if (iowa_is_active())
+		return iowa_ioremap(addr, size, pte_pgprot(pte), caller);
+	return __ioremap_caller(addr, size, pte_pgprot(pte), caller);
+}
+EXPORT_SYMBOL(ioremap_prot);
+
+int early_ioremap_range(unsigned long ea, phys_addr_t pa,
+			unsigned long size, pgprot_t prot)
+{
+	unsigned long i;
+
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		int err = map_kernel_page(ea + i, pa + i, prot);
+
+		if (WARN_ON_ONCE(err))  /* Should clean up */
+			return err;
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c
new file mode 100644
index 0000000000..ca5bc6be3e
--- /dev/null
+++ b/arch/powerpc/mm/ioremap_32.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include <mm/mmu_decl.h>
+
+void __iomem *ioremap_wt(phys_addr_t addr, unsigned long size)
+{
+	pgprot_t prot = pgprot_cached_wthru(PAGE_KERNEL);
+
+	return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wt);
+
+void __iomem *
+__ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller)
+{
+	unsigned long v;
+	phys_addr_t p, offset;
+	int err;
+
+	/*
+	 * If the address lies within the first 16 MB, assume it's in ISA
+	 * memory space
+	 */
+	if (addr < SZ_16M)
+		addr += _ISA_MEM_BASE;
+
+	/*
+	 * Choose an address to map it to.
+	 * Once the vmalloc system is running, we use it.
+	 * Before then, we use space going down from IOREMAP_TOP
+	 * (ioremap_bot records where we're up to).
+	 */
+	p = addr & PAGE_MASK;
+	offset = addr & ~PAGE_MASK;
+	size = PAGE_ALIGN(addr + size) - p;
+
+#ifndef CONFIG_CRASH_DUMP
+	/*
+	 * Don't allow anybody to remap normal RAM that we're using.
+	 * mem_init() sets high_memory so only do the check after that.
+	 */
+	if (slab_is_available() && p <= virt_to_phys(high_memory - 1) &&
+	    page_is_ram(__phys_to_pfn(p))) {
+		pr_warn("%s(): phys addr 0x%llx is RAM lr %ps\n", __func__,
+			(unsigned long long)p, __builtin_return_address(0));
+		return NULL;
+	}
+#endif
+
+	if (size == 0)
+		return NULL;
+
+	/*
+	 * Is it already mapped?  Perhaps overlapped by a previous
+	 * mapping.
+	 */
+	v = p_block_mapped(p);
+	if (v)
+		return (void __iomem *)v + offset;
+
+	if (slab_is_available())
+		return generic_ioremap_prot(addr, size, prot);
+
+	/*
+	 * Should check if it is a candidate for a BAT mapping
+	 */
+	pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller);
+
+	err = early_ioremap_range(ioremap_bot - size - PAGE_SIZE, p, size, prot);
+	if (err)
+		return NULL;
+	ioremap_bot -= size + PAGE_SIZE;
+
+	return (void __iomem *)ioremap_bot + offset;
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+	/*
+	 * If mapped by BATs then there is nothing to do.
+	 * Calling vfree() generates a benign warning.
+	 */
+	if (v_block_mapped((unsigned long)addr))
+		return;
+
+	generic_iounmap(addr);
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c
new file mode 100644
index 0000000000..d24e5f1667
--- /dev/null
+++ b/arch/powerpc/mm/ioremap_64.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size,
+			       pgprot_t prot, void *caller)
+{
+	phys_addr_t paligned, offset;
+	void __iomem *ret;
+	int err;
+
+	/* We don't support the 4K PFN hack with ioremap */
+	if (pgprot_val(prot) & H_PAGE_4K_PFN)
+		return NULL;
+
+	/*
+	 * Choose an address to map it to. Once the vmalloc system is running,
+	 * we use it. Before that, we map using addresses going up from
+	 * ioremap_bot.  vmalloc will use the addresses from IOREMAP_BASE
+	 * through ioremap_bot.
+	 */
+	paligned = addr & PAGE_MASK;
+	offset = addr & ~PAGE_MASK;
+	size = PAGE_ALIGN(addr + size) - paligned;
+
+	if (size == 0 || paligned == 0)
+		return NULL;
+
+	if (slab_is_available())
+		return generic_ioremap_prot(addr, size, prot);
+
+	pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller);
+
+	err = early_ioremap_range(ioremap_bot, paligned, size, prot);
+	if (err)
+		return NULL;
+
+	ret = (void __iomem *)ioremap_bot + offset;
+	ioremap_bot += size + PAGE_SIZE;
+
+	return ret;
+}
+
+/*
+ * Unmap an IO region and remove it from vmalloc'd list.
+ * Access to IO memory should be serialized by driver.
+ */
+void iounmap(volatile void __iomem *token)
+{
+	if (!slab_is_available())
+		return;
+
+	generic_iounmap(PCI_FIX_ADDR(token));
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/kasan/8xx.c b/arch/powerpc/mm/kasan/8xx.c
new file mode 100644
index 0000000000..2784224054
--- /dev/null
+++ b/arch/powerpc/mm/kasan/8xx.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/kasan.h>
+#include <linux/memblock.h>
+#include <linux/hugetlb.h>
+
+static int __init
+kasan_init_shadow_8M(unsigned long k_start, unsigned long k_end, void *block)
+{
+	pmd_t *pmd = pmd_off_k(k_start);
+	unsigned long k_cur, k_next;
+
+	for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd += 2, block += SZ_8M) {
+		pte_basic_t *new;
+
+		k_next = pgd_addr_end(k_cur, k_end);
+		k_next = pgd_addr_end(k_next, k_end);
+		if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte)
+			continue;
+
+		new = memblock_alloc(sizeof(pte_basic_t), SZ_4K);
+		if (!new)
+			return -ENOMEM;
+
+		*new = pte_val(pte_mkhuge(pfn_pte(PHYS_PFN(__pa(block)), PAGE_KERNEL)));
+
+		hugepd_populate_kernel((hugepd_t *)pmd, (pte_t *)new, PAGE_SHIFT_8M);
+		hugepd_populate_kernel((hugepd_t *)pmd + 1, (pte_t *)new, PAGE_SHIFT_8M);
+	}
+	return 0;
+}
+
+int __init kasan_init_region(void *start, size_t size)
+{
+	unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
+	unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
+	unsigned long k_cur;
+	int ret;
+	void *block;
+
+	block = memblock_alloc(k_end - k_start, SZ_8M);
+	if (!block)
+		return -ENOMEM;
+
+	if (IS_ALIGNED(k_start, SZ_8M)) {
+		kasan_init_shadow_8M(k_start, ALIGN_DOWN(k_end, SZ_8M), block);
+		k_cur = ALIGN_DOWN(k_end, SZ_8M);
+		if (k_cur == k_end)
+			goto finish;
+	} else {
+		k_cur = k_start;
+	}
+
+	ret = kasan_init_shadow_page_tables(k_start, k_end);
+	if (ret)
+		return ret;
+
+	for (; k_cur < k_end; k_cur += PAGE_SIZE) {
+		pmd_t *pmd = pmd_off_k(k_cur);
+		void *va = block + k_cur - k_start;
+		pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
+
+		if (k_cur < ALIGN_DOWN(k_end, SZ_512K))
+			pte = pte_mkhuge(pte);
+
+		__set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
+	}
+finish:
+	flush_tlb_kernel_range(k_start, k_end);
+	return 0;
+}
diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile
new file mode 100644
index 0000000000..f9522fd70b
--- /dev/null
+++ b/arch/powerpc/mm/kasan/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+KASAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+
+obj-$(CONFIG_PPC32)		+= init_32.o
+obj-$(CONFIG_PPC_8xx)		+= 8xx.o
+obj-$(CONFIG_PPC_BOOK3S_32)	+= book3s_32.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= init_book3s_64.o
+obj-$(CONFIG_PPC_BOOK3E_64)	+= init_book3e_64.o
diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c
new file mode 100644
index 0000000000..450a67ef0b
--- /dev/null
+++ b/arch/powerpc/mm/kasan/book3s_32.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/kasan.h>
+#include <linux/memblock.h>
+#include <mm/mmu_decl.h>
+
+int __init kasan_init_region(void *start, size_t size)
+{
+	unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
+	unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
+	unsigned long k_nobat = k_start;
+	unsigned long k_cur;
+	phys_addr_t phys;
+	int ret;
+
+	while (k_nobat < k_end) {
+		unsigned int k_size = bat_block_size(k_nobat, k_end);
+		int idx = find_free_bat();
+
+		if (idx == -1)
+			break;
+		if (k_size < SZ_128K)
+			break;
+		phys = memblock_phys_alloc_range(k_size, k_size, 0,
+						 MEMBLOCK_ALLOC_ANYWHERE);
+		if (!phys)
+			break;
+
+		setbat(idx, k_nobat, phys, k_size, PAGE_KERNEL);
+		k_nobat += k_size;
+	}
+	if (k_nobat != k_start)
+		update_bats();
+
+	if (k_nobat < k_end) {
+		phys = memblock_phys_alloc_range(k_end - k_nobat, PAGE_SIZE, 0,
+						 MEMBLOCK_ALLOC_ANYWHERE);
+		if (!phys)
+			return -ENOMEM;
+	}
+
+	ret = kasan_init_shadow_page_tables(k_start, k_end);
+	if (ret)
+		return ret;
+
+	kasan_update_early_region(k_start, k_nobat, __pte(0));
+
+	for (k_cur = k_nobat; k_cur < k_end; k_cur += PAGE_SIZE) {
+		pmd_t *pmd = pmd_off_k(k_cur);
+		pte_t pte = pfn_pte(PHYS_PFN(phys + k_cur - k_nobat), PAGE_KERNEL);
+
+		__set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
+	}
+	flush_tlb_kernel_range(k_start, k_end);
+	memset(kasan_mem_to_shadow(start), 0, k_end - k_start);
+
+	return 0;
+}
diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c
new file mode 100644
index 0000000000..a70828a6d9
--- /dev/null
+++ b/arch/powerpc/mm/kasan/init_32.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/kasan.h>
+#include <linux/printk.h>
+#include <linux/memblock.h>
+#include <linux/sched/task.h>
+#include <asm/pgalloc.h>
+#include <asm/code-patching.h>
+#include <mm/mmu_decl.h>
+
+static pgprot_t __init kasan_prot_ro(void)
+{
+	if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+		return PAGE_READONLY;
+
+	return PAGE_KERNEL_RO;
+}
+
+static void __init kasan_populate_pte(pte_t *ptep, pgprot_t prot)
+{
+	unsigned long va = (unsigned long)kasan_early_shadow_page;
+	phys_addr_t pa = __pa(kasan_early_shadow_page);
+	int i;
+
+	for (i = 0; i < PTRS_PER_PTE; i++, ptep++)
+		__set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 1);
+}
+
+int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end)
+{
+	pmd_t *pmd;
+	unsigned long k_cur, k_next;
+
+	pmd = pmd_off_k(k_start);
+
+	for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) {
+		pte_t *new;
+
+		k_next = pgd_addr_end(k_cur, k_end);
+		if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte)
+			continue;
+
+		new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE);
+
+		if (!new)
+			return -ENOMEM;
+		kasan_populate_pte(new, PAGE_KERNEL);
+		pmd_populate_kernel(&init_mm, pmd, new);
+	}
+	return 0;
+}
+
+int __init __weak kasan_init_region(void *start, size_t size)
+{
+	unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
+	unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
+	unsigned long k_cur;
+	int ret;
+	void *block;
+
+	ret = kasan_init_shadow_page_tables(k_start, k_end);
+	if (ret)
+		return ret;
+
+	block = memblock_alloc(k_end - k_start, PAGE_SIZE);
+	if (!block)
+		return -ENOMEM;
+
+	for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
+		pmd_t *pmd = pmd_off_k(k_cur);
+		void *va = block + k_cur - k_start;
+		pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
+
+		__set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
+	}
+	flush_tlb_kernel_range(k_start, k_end);
+	return 0;
+}
+
+void __init
+kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte)
+{
+	unsigned long k_cur;
+
+	for (k_cur = k_start; k_cur != k_end; k_cur += PAGE_SIZE) {
+		pmd_t *pmd = pmd_off_k(k_cur);
+		pte_t *ptep = pte_offset_kernel(pmd, k_cur);
+
+		if (pte_page(*ptep) != virt_to_page(lm_alias(kasan_early_shadow_page)))
+			continue;
+
+		__set_pte_at(&init_mm, k_cur, ptep, pte, 0);
+	}
+
+	flush_tlb_kernel_range(k_start, k_end);
+}
+
+static void __init kasan_remap_early_shadow_ro(void)
+{
+	pgprot_t prot = kasan_prot_ro();
+	phys_addr_t pa = __pa(kasan_early_shadow_page);
+
+	kasan_populate_pte(kasan_early_shadow_pte, prot);
+
+	kasan_update_early_region(KASAN_SHADOW_START, KASAN_SHADOW_END,
+				  pfn_pte(PHYS_PFN(pa), prot));
+}
+
+static void __init kasan_unmap_early_shadow_vmalloc(void)
+{
+	unsigned long k_start = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_START);
+	unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_END);
+
+	kasan_update_early_region(k_start, k_end, __pte(0));
+
+#ifdef MODULES_VADDR
+	k_start = (unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR);
+	k_end = (unsigned long)kasan_mem_to_shadow((void *)MODULES_END);
+	kasan_update_early_region(k_start, k_end, __pte(0));
+#endif
+}
+
+void __init kasan_mmu_init(void)
+{
+	int ret;
+
+	if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
+		ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+		if (ret)
+			panic("kasan: kasan_init_shadow_page_tables() failed");
+	}
+}
+
+void __init kasan_init(void)
+{
+	phys_addr_t base, end;
+	u64 i;
+	int ret;
+
+	for_each_mem_range(i, &base, &end) {
+		phys_addr_t top = min(end, total_lowmem);
+
+		if (base >= top)
+			continue;
+
+		ret = kasan_init_region(__va(base), top - base);
+		if (ret)
+			panic("kasan: kasan_init_region() failed");
+	}
+
+	if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+		ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+		if (ret)
+			panic("kasan: kasan_init_shadow_page_tables() failed");
+	}
+
+	kasan_remap_early_shadow_ro();
+
+	clear_page(kasan_early_shadow_page);
+
+	/* At this point kasan is fully initialized. Enable error messages */
+	init_task.kasan_depth = 0;
+	pr_info("KASAN init done\n");
+}
+
+void __init kasan_late_init(void)
+{
+	if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
+		kasan_unmap_early_shadow_vmalloc();
+}
+
+void __init kasan_early_init(void)
+{
+	unsigned long addr = KASAN_SHADOW_START;
+	unsigned long end = KASAN_SHADOW_END;
+	unsigned long next;
+	pmd_t *pmd = pmd_off_k(addr);
+
+	BUILD_BUG_ON(KASAN_SHADOW_START & ~PGDIR_MASK);
+
+	kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL);
+
+	do {
+		next = pgd_addr_end(addr, end);
+		pmd_populate_kernel(&init_mm, pmd, kasan_early_shadow_pte);
+	} while (pmd++, addr = next, addr != end);
+}
diff --git a/arch/powerpc/mm/kasan/init_book3e_64.c b/arch/powerpc/mm/kasan/init_book3e_64.c
new file mode 100644
index 0000000000..11519e88dc
--- /dev/null
+++ b/arch/powerpc/mm/kasan/init_book3e_64.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KASAN for 64-bit Book3e powerpc
+ *
+ * Copyright 2022, Christophe Leroy, CS GROUP France
+ */
+
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/kasan.h>
+#include <linux/printk.h>
+#include <linux/memblock.h>
+#include <linux/set_memory.h>
+
+#include <asm/pgalloc.h>
+
+static inline bool kasan_pud_table(p4d_t p4d)
+{
+	return p4d_page(p4d) == virt_to_page(lm_alias(kasan_early_shadow_pud));
+}
+
+static inline bool kasan_pmd_table(pud_t pud)
+{
+	return pud_page(pud) == virt_to_page(lm_alias(kasan_early_shadow_pmd));
+}
+
+static inline bool kasan_pte_table(pmd_t pmd)
+{
+	return pmd_page(pmd) == virt_to_page(lm_alias(kasan_early_shadow_pte));
+}
+
+static int __init kasan_map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
+{
+	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	pgdp = pgd_offset_k(ea);
+	p4dp = p4d_offset(pgdp, ea);
+	if (kasan_pud_table(*p4dp)) {
+		pudp = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
+		memcpy(pudp, kasan_early_shadow_pud, PUD_TABLE_SIZE);
+		p4d_populate(&init_mm, p4dp, pudp);
+	}
+	pudp = pud_offset(p4dp, ea);
+	if (kasan_pmd_table(*pudp)) {
+		pmdp = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
+		memcpy(pmdp, kasan_early_shadow_pmd, PMD_TABLE_SIZE);
+		pud_populate(&init_mm, pudp, pmdp);
+	}
+	pmdp = pmd_offset(pudp, ea);
+	if (kasan_pte_table(*pmdp)) {
+		ptep = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
+		memcpy(ptep, kasan_early_shadow_pte, PTE_TABLE_SIZE);
+		pmd_populate_kernel(&init_mm, pmdp, ptep);
+	}
+	ptep = pte_offset_kernel(pmdp, ea);
+
+	__set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot), 0);
+
+	return 0;
+}
+
+static void __init kasan_init_phys_region(void *start, void *end)
+{
+	unsigned long k_start, k_end, k_cur;
+	void *va;
+
+	if (start >= end)
+		return;
+
+	k_start = ALIGN_DOWN((unsigned long)kasan_mem_to_shadow(start), PAGE_SIZE);
+	k_end = ALIGN((unsigned long)kasan_mem_to_shadow(end), PAGE_SIZE);
+
+	va = memblock_alloc(k_end - k_start, PAGE_SIZE);
+	for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE, va += PAGE_SIZE)
+		kasan_map_kernel_page(k_cur, __pa(va), PAGE_KERNEL);
+}
+
+void __init kasan_early_init(void)
+{
+	int i;
+	unsigned long addr;
+	pgd_t *pgd = pgd_offset_k(KASAN_SHADOW_START);
+	pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL);
+
+	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
+	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+
+	for (i = 0; i < PTRS_PER_PTE; i++)
+		__set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page,
+			     &kasan_early_shadow_pte[i], zero_pte, 0);
+
+	for (i = 0; i < PTRS_PER_PMD; i++)
+		pmd_populate_kernel(&init_mm, &kasan_early_shadow_pmd[i],
+				    kasan_early_shadow_pte);
+
+	for (i = 0; i < PTRS_PER_PUD; i++)
+		pud_populate(&init_mm, &kasan_early_shadow_pud[i],
+			     kasan_early_shadow_pmd);
+
+	for (addr = KASAN_SHADOW_START; addr != KASAN_SHADOW_END; addr += PGDIR_SIZE)
+		p4d_populate(&init_mm, p4d_offset(pgd++, addr), kasan_early_shadow_pud);
+}
+
+void __init kasan_init(void)
+{
+	phys_addr_t start, end;
+	u64 i;
+	pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL_RO);
+
+	for_each_mem_range(i, &start, &end)
+		kasan_init_phys_region((void *)start, (void *)end);
+
+	if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
+		kasan_remove_zero_shadow((void *)VMALLOC_START, VMALLOC_SIZE);
+
+	for (i = 0; i < PTRS_PER_PTE; i++)
+		__set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page,
+			     &kasan_early_shadow_pte[i], zero_pte, 0);
+
+	flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+	memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+
+	/* Enable error messages */
+	init_task.kasan_depth = 0;
+	pr_info("KASAN init done\n");
+}
+
+void __init kasan_late_init(void) { }
diff --git a/arch/powerpc/mm/kasan/init_book3s_64.c b/arch/powerpc/mm/kasan/init_book3s_64.c
new file mode 100644
index 0000000000..9300d641cf
--- /dev/null
+++ b/arch/powerpc/mm/kasan/init_book3s_64.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KASAN for 64-bit Book3S powerpc
+ *
+ * Copyright 2019-2022, Daniel Axtens, IBM Corporation.
+ */
+
+/*
+ * ppc64 turns on virtual memory late in boot, after calling into generic code
+ * like the device-tree parser, so it uses this in conjunction with a hook in
+ * outline mode to avoid invalid access early in boot.
+ */
+
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/kasan.h>
+#include <linux/printk.h>
+#include <linux/sched/task.h>
+#include <linux/memblock.h>
+#include <asm/pgalloc.h>
+
+DEFINE_STATIC_KEY_FALSE(powerpc_kasan_enabled_key);
+
+static void __init kasan_init_phys_region(void *start, void *end)
+{
+	unsigned long k_start, k_end, k_cur;
+	void *va;
+
+	if (start >= end)
+		return;
+
+	k_start = ALIGN_DOWN((unsigned long)kasan_mem_to_shadow(start), PAGE_SIZE);
+	k_end = ALIGN((unsigned long)kasan_mem_to_shadow(end), PAGE_SIZE);
+
+	va = memblock_alloc(k_end - k_start, PAGE_SIZE);
+	for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE, va += PAGE_SIZE)
+		map_kernel_page(k_cur, __pa(va), PAGE_KERNEL);
+}
+
+void __init kasan_init(void)
+{
+	/*
+	 * We want to do the following things:
+	 *  1) Map real memory into the shadow for all physical memblocks
+	 *     This takes us from c000... to c008...
+	 *  2) Leave a hole over the shadow of vmalloc space. KASAN_VMALLOC
+	 *     will manage this for us.
+	 *     This takes us from c008... to c00a...
+	 *  3) Map the 'early shadow'/zero page over iomap and vmemmap space.
+	 *     This takes us up to where we start at c00e...
+	 */
+
+	void *k_start = kasan_mem_to_shadow((void *)RADIX_VMALLOC_END);
+	void *k_end = kasan_mem_to_shadow((void *)RADIX_VMEMMAP_END);
+	phys_addr_t start, end;
+	u64 i;
+	pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL);
+
+	if (!early_radix_enabled()) {
+		pr_warn("KASAN not enabled as it requires radix!");
+		return;
+	}
+
+	for_each_mem_range(i, &start, &end)
+		kasan_init_phys_region((void *)start, (void *)end);
+
+	for (i = 0; i < PTRS_PER_PTE; i++)
+		__set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page,
+			     &kasan_early_shadow_pte[i], zero_pte, 0);
+
+	for (i = 0; i < PTRS_PER_PMD; i++)
+		pmd_populate_kernel(&init_mm, &kasan_early_shadow_pmd[i],
+				    kasan_early_shadow_pte);
+
+	for (i = 0; i < PTRS_PER_PUD; i++)
+		pud_populate(&init_mm, &kasan_early_shadow_pud[i],
+			     kasan_early_shadow_pmd);
+
+	/* map the early shadow over the iomap and vmemmap space */
+	kasan_populate_early_shadow(k_start, k_end);
+
+	/* mark early shadow region as RO and wipe it */
+	zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL_RO);
+	for (i = 0; i < PTRS_PER_PTE; i++)
+		__set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page,
+			     &kasan_early_shadow_pte[i], zero_pte, 0);
+
+	/*
+	 * clear_page relies on some cache info that hasn't been set up yet.
+	 * It ends up looping ~forever and blows up other data.
+	 * Use memset instead.
+	 */
+	memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+
+	static_branch_inc(&powerpc_kasan_enabled_key);
+
+	/* Enable error messages */
+	init_task.kasan_depth = 0;
+	pr_info("KASAN init done\n");
+}
+
+void __init kasan_early_init(void) { }
+
+void __init kasan_late_init(void) { }
diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c
new file mode 100644
index 0000000000..ea821d0ffe
--- /dev/null
+++ b/arch/powerpc/mm/maccess.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+
+#include <asm/disassemble.h>
+#include <asm/inst.h>
+#include <asm/ppc-opcode.h>
+
+bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
+{
+	return is_kernel_addr((unsigned long)unsafe_src);
+}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
new file mode 100644
index 0000000000..07e8f4f1e0
--- /dev/null
+++ b/arch/powerpc/mm/mem.c
@@ -0,0 +1,407 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ */
+
+#include <linux/memblock.h>
+#include <linux/highmem.h>
+#include <linux/suspend.h>
+#include <linux/dma-direct.h>
+
+#include <asm/swiotlb.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/kasan.h>
+#include <asm/svm.h>
+#include <asm/mmzone.h>
+#include <asm/ftrace.h>
+#include <asm/code-patching.h>
+#include <asm/setup.h>
+
+#include <mm/mmu_decl.h>
+
+unsigned long long memory_limit;
+
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
+EXPORT_SYMBOL(empty_zero_page);
+
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+			      unsigned long size, pgprot_t vma_prot)
+{
+	if (ppc_md.phys_mem_access_prot)
+		return ppc_md.phys_mem_access_prot(file, pfn, size, vma_prot);
+
+	if (!page_is_ram(pfn))
+		vma_prot = pgprot_noncached(vma_prot);
+
+	return vma_prot;
+}
+EXPORT_SYMBOL(phys_mem_access_prot);
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static DEFINE_MUTEX(linear_mapping_mutex);
+
+#ifdef CONFIG_NUMA
+int memory_add_physaddr_to_nid(u64 start)
+{
+	return hot_add_scn_to_nid(start);
+}
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+#endif
+
+int __weak create_section_mapping(unsigned long start, unsigned long end,
+				  int nid, pgprot_t prot)
+{
+	return -ENODEV;
+}
+
+int __weak remove_section_mapping(unsigned long start, unsigned long end)
+{
+	return -ENODEV;
+}
+
+int __ref arch_create_linear_mapping(int nid, u64 start, u64 size,
+				     struct mhp_params *params)
+{
+	int rc;
+
+	start = (unsigned long)__va(start);
+	mutex_lock(&linear_mapping_mutex);
+	rc = create_section_mapping(start, start + size, nid,
+				    params->pgprot);
+	mutex_unlock(&linear_mapping_mutex);
+	if (rc) {
+		pr_warn("Unable to create linear mapping for 0x%llx..0x%llx: %d\n",
+			start, start + size, rc);
+		return -EFAULT;
+	}
+	return 0;
+}
+
+void __ref arch_remove_linear_mapping(u64 start, u64 size)
+{
+	int ret;
+
+	/* Remove htab bolted mappings for this section of memory */
+	start = (unsigned long)__va(start);
+
+	mutex_lock(&linear_mapping_mutex);
+	ret = remove_section_mapping(start, start + size);
+	mutex_unlock(&linear_mapping_mutex);
+	if (ret)
+		pr_warn("Unable to remove linear mapping for 0x%llx..0x%llx: %d\n",
+			start, start + size, ret);
+
+	/* Ensure all vmalloc mappings are flushed in case they also
+	 * hit that section of memory
+	 */
+	vm_unmap_aliases();
+}
+
+/*
+ * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
+ * updating.
+ */
+static void update_end_of_memory_vars(u64 start, u64 size)
+{
+	unsigned long end_pfn = PFN_UP(start + size);
+
+	if (end_pfn > max_pfn) {
+		max_pfn = end_pfn;
+		max_low_pfn = end_pfn;
+		high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
+	}
+}
+
+int __ref add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
+		    struct mhp_params *params)
+{
+	int ret;
+
+	ret = __add_pages(nid, start_pfn, nr_pages, params);
+	if (ret)
+		return ret;
+
+	/* update max_pfn, max_low_pfn and high_memory */
+	update_end_of_memory_vars(start_pfn << PAGE_SHIFT,
+				  nr_pages << PAGE_SHIFT);
+
+	return ret;
+}
+
+int __ref arch_add_memory(int nid, u64 start, u64 size,
+			  struct mhp_params *params)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	int rc;
+
+	rc = arch_create_linear_mapping(nid, start, size, params);
+	if (rc)
+		return rc;
+	rc = add_pages(nid, start_pfn, nr_pages, params);
+	if (rc)
+		arch_remove_linear_mapping(start, size);
+	return rc;
+}
+
+void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+
+	__remove_pages(start_pfn, nr_pages, altmap);
+	arch_remove_linear_mapping(start, size);
+}
+#endif
+
+#ifndef CONFIG_NUMA
+void __init mem_topology_setup(void)
+{
+	max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
+	min_low_pfn = MEMORY_START >> PAGE_SHIFT;
+#ifdef CONFIG_HIGHMEM
+	max_low_pfn = lowmem_end_addr >> PAGE_SHIFT;
+#endif
+
+	/* Place all memblock_regions in the same node and merge contiguous
+	 * memblock_regions
+	 */
+	memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0);
+}
+
+void __init initmem_init(void)
+{
+	sparse_init();
+}
+
+/* mark pages that don't exist as nosave */
+static int __init mark_nonram_nosave(void)
+{
+	unsigned long spfn, epfn, prev = 0;
+	int i;
+
+	for_each_mem_pfn_range(i, MAX_NUMNODES, &spfn, &epfn, NULL) {
+		if (prev && prev < spfn)
+			register_nosave_region(prev, spfn);
+
+		prev = epfn;
+	}
+
+	return 0;
+}
+#else /* CONFIG_NUMA */
+static int __init mark_nonram_nosave(void)
+{
+	return 0;
+}
+#endif
+
+/*
+ * Zones usage:
+ *
+ * We setup ZONE_DMA to be 31-bits on all platforms and ZONE_NORMAL to be
+ * everything else. GFP_DMA32 page allocations automatically fall back to
+ * ZONE_DMA.
+ *
+ * By using 31-bit unconditionally, we can exploit zone_dma_bits to inform the
+ * generic DMA mapping code.  32-bit only devices (if not handled by an IOMMU
+ * anyway) will take a first dip into ZONE_NORMAL and get otherwise served by
+ * ZONE_DMA.
+ */
+static unsigned long max_zone_pfns[MAX_NR_ZONES];
+
+/*
+ * paging_init() sets up the page tables - in fact we've already done this.
+ */
+void __init paging_init(void)
+{
+	unsigned long long total_ram = memblock_phys_mem_size();
+	phys_addr_t top_of_ram = memblock_end_of_DRAM();
+
+#ifdef CONFIG_HIGHMEM
+	unsigned long v = __fix_to_virt(FIX_KMAP_END);
+	unsigned long end = __fix_to_virt(FIX_KMAP_BEGIN);
+
+	for (; v < end; v += PAGE_SIZE)
+		map_kernel_page(v, 0, __pgprot(0)); /* XXX gross */
+
+	map_kernel_page(PKMAP_BASE, 0, __pgprot(0));	/* XXX gross */
+	pkmap_page_table = virt_to_kpte(PKMAP_BASE);
+#endif /* CONFIG_HIGHMEM */
+
+	printk(KERN_DEBUG "Top of RAM: 0x%llx, Total RAM: 0x%llx\n",
+	       (unsigned long long)top_of_ram, total_ram);
+	printk(KERN_DEBUG "Memory hole size: %ldMB\n",
+	       (long int)((top_of_ram - total_ram) >> 20));
+
+	/*
+	 * Allow 30-bit DMA for very limited Broadcom wifi chips on many
+	 * powerbooks.
+	 */
+	if (IS_ENABLED(CONFIG_PPC32))
+		zone_dma_bits = 30;
+	else
+		zone_dma_bits = 31;
+
+#ifdef CONFIG_ZONE_DMA
+	max_zone_pfns[ZONE_DMA]	= min(max_low_pfn,
+				      1UL << (zone_dma_bits - PAGE_SHIFT));
+#endif
+	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+#ifdef CONFIG_HIGHMEM
+	max_zone_pfns[ZONE_HIGHMEM] = max_pfn;
+#endif
+
+	free_area_init(max_zone_pfns);
+
+	mark_nonram_nosave();
+}
+
+void __init mem_init(void)
+{
+	/*
+	 * book3s is limited to 16 page sizes due to encoding this in
+	 * a 4-bit field for slices.
+	 */
+	BUILD_BUG_ON(MMU_PAGE_COUNT > 16);
+
+#ifdef CONFIG_SWIOTLB
+	/*
+	 * Some platforms (e.g. 85xx) limit DMA-able memory way below
+	 * 4G. We force memblock to bottom-up mode to ensure that the
+	 * memory allocated in swiotlb_init() is DMA-able.
+	 * As it's the last memblock allocation, no need to reset it
+	 * back to to-down.
+	 */
+	memblock_set_bottom_up(true);
+	swiotlb_init(ppc_swiotlb_enable, ppc_swiotlb_flags);
+#endif
+
+	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+
+	kasan_late_init();
+
+	memblock_free_all();
+
+#ifdef CONFIG_HIGHMEM
+	{
+		unsigned long pfn, highmem_mapnr;
+
+		highmem_mapnr = lowmem_end_addr >> PAGE_SHIFT;
+		for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) {
+			phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT;
+			struct page *page = pfn_to_page(pfn);
+			if (memblock_is_memory(paddr) && !memblock_is_reserved(paddr))
+				free_highmem_page(page);
+		}
+	}
+#endif /* CONFIG_HIGHMEM */
+
+#if defined(CONFIG_PPC_E500) && !defined(CONFIG_SMP)
+	/*
+	 * If smp is enabled, next_tlbcam_idx is initialized in the cpu up
+	 * functions.... do it here for the non-smp case.
+	 */
+	per_cpu(next_tlbcam_idx, smp_processor_id()) =
+		(mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
+#endif
+
+#ifdef CONFIG_PPC32
+	pr_info("Kernel virtual memory layout:\n");
+#ifdef CONFIG_KASAN
+	pr_info("  * 0x%08lx..0x%08lx  : kasan shadow mem\n",
+		KASAN_SHADOW_START, KASAN_SHADOW_END);
+#endif
+	pr_info("  * 0x%08lx..0x%08lx  : fixmap\n", FIXADDR_START, FIXADDR_TOP);
+#ifdef CONFIG_HIGHMEM
+	pr_info("  * 0x%08lx..0x%08lx  : highmem PTEs\n",
+		PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP));
+#endif /* CONFIG_HIGHMEM */
+	if (ioremap_bot != IOREMAP_TOP)
+		pr_info("  * 0x%08lx..0x%08lx  : early ioremap\n",
+			ioremap_bot, IOREMAP_TOP);
+	pr_info("  * 0x%08lx..0x%08lx  : vmalloc & ioremap\n",
+		VMALLOC_START, VMALLOC_END);
+#ifdef MODULES_VADDR
+	pr_info("  * 0x%08lx..0x%08lx  : modules\n",
+		MODULES_VADDR, MODULES_END);
+#endif
+#endif /* CONFIG_PPC32 */
+}
+
+void free_initmem(void)
+{
+	ppc_md.progress = ppc_printk_progress;
+	mark_initmem_nx();
+	free_initmem_default(POISON_FREE_INITMEM);
+	ftrace_free_init_tramp();
+}
+
+/*
+ * System memory should not be in /proc/iomem but various tools expect it
+ * (eg kdump).
+ */
+static int __init add_system_ram_resources(void)
+{
+	phys_addr_t start, end;
+	u64 i;
+
+	for_each_mem_range(i, &start, &end) {
+		struct resource *res;
+
+		res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+		WARN_ON(!res);
+
+		if (res) {
+			res->name = "System RAM";
+			res->start = start;
+			/*
+			 * In memblock, end points to the first byte after
+			 * the range while in resourses, end points to the
+			 * last byte in the range.
+			 */
+			res->end = end - 1;
+			res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+			WARN_ON(request_resource(&iomem_resource, res) < 0);
+		}
+	}
+
+	return 0;
+}
+subsys_initcall(add_system_ram_resources);
+
+#ifdef CONFIG_STRICT_DEVMEM
+/*
+ * devmem_is_allowed(): check to see if /dev/mem access to a certain address
+ * is valid. The argument is a physical page number.
+ *
+ * Access has to be given to non-kernel-ram areas as well, these contain the
+ * PCI mmio resources as well as potential bios/acpi data regions.
+ */
+int devmem_is_allowed(unsigned long pfn)
+{
+	if (page_is_rtas_user_buf(pfn))
+		return 1;
+	if (iomem_is_exclusive(PFN_PHYS(pfn)))
+		return 0;
+	if (!page_is_ram(pfn))
+		return 1;
+	return 0;
+}
+#endif /* CONFIG_STRICT_DEVMEM */
+
+/*
+ * This is defined in kernel/resource.c but only powerpc needs to export it, for
+ * the EHEA driver. Drop this when drivers/net/ethernet/ibm/ehea is removed.
+ */
+EXPORT_SYMBOL_GPL(walk_system_ram_range);
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
new file mode 100644
index 0000000000..b24c19078e
--- /dev/null
+++ b/arch/powerpc/mm/mmu_context.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Common implementation of switch_mm_irqs_off
+ *
+ *  Copyright IBM Corp. 2017
+ */
+
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/sched/mm.h>
+
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+
+#if defined(CONFIG_PPC32)
+static inline void switch_mm_pgdir(struct task_struct *tsk,
+				   struct mm_struct *mm)
+{
+	/* 32-bit keeps track of the current PGDIR in the thread struct */
+	tsk->thread.pgdir = mm->pgd;
+#ifdef CONFIG_PPC_BOOK3S_32
+	tsk->thread.sr0 = mm->context.sr0;
+#endif
+#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP)
+	tsk->thread.pid = mm->context.id;
+#endif
+}
+#elif defined(CONFIG_PPC_BOOK3E_64)
+static inline void switch_mm_pgdir(struct task_struct *tsk,
+				   struct mm_struct *mm)
+{
+	/* 64-bit Book3E keeps track of current PGD in the PACA */
+	get_paca()->pgd = mm->pgd;
+#ifdef CONFIG_PPC_KUAP
+	tsk->thread.pid = mm->context.id;
+#endif
+}
+#else
+static inline void switch_mm_pgdir(struct task_struct *tsk,
+				   struct mm_struct *mm) { }
+#endif
+
+void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+			struct task_struct *tsk)
+{
+	int cpu = smp_processor_id();
+	bool new_on_cpu = false;
+
+	/* Mark this context has been used on the new CPU */
+	if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
+		VM_WARN_ON_ONCE(next == &init_mm);
+		cpumask_set_cpu(cpu, mm_cpumask(next));
+		inc_mm_active_cpus(next);
+
+		/*
+		 * This full barrier orders the store to the cpumask above vs
+		 * a subsequent load which allows this CPU/MMU to begin loading
+		 * translations for 'next' from page table PTEs into the TLB.
+		 *
+		 * When using the radix MMU, that operation is the load of the
+		 * MMU context id, which is then moved to SPRN_PID.
+		 *
+		 * For the hash MMU it is either the first load from slb_cache
+		 * in switch_slb() to preload the SLBs, or the load of
+		 * get_user_context which loads the context for the VSID hash
+		 * to insert a new SLB, in the SLB fault handler.
+		 *
+		 * On the other side, the barrier is in mm/tlb-radix.c for
+		 * radix which orders earlier stores to clear the PTEs before
+		 * the load of mm_cpumask to check which CPU TLBs should be
+		 * flushed. For hash, pte_xchg to clear the PTE includes the
+		 * barrier.
+		 *
+		 * This full barrier is also needed by membarrier when
+		 * switching between processes after store to rq->curr, before
+		 * user-space memory accesses.
+		 */
+		smp_mb();
+
+		new_on_cpu = true;
+	}
+
+	/* Some subarchs need to track the PGD elsewhere */
+	switch_mm_pgdir(tsk, next);
+
+	/* Nothing else to do if we aren't actually switching */
+	if (prev == next)
+		return;
+
+	/*
+	 * We must stop all altivec streams before changing the HW
+	 * context
+	 */
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		asm volatile (PPC_DSSALL);
+
+	if (!new_on_cpu)
+		membarrier_arch_switch_mm(prev, next, tsk);
+
+	/*
+	 * The actual HW switching method differs between the various
+	 * sub architectures. Out of line for now
+	 */
+	switch_mmu_context(prev, next, tsk);
+
+	VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(prev)));
+}
+
+#ifndef CONFIG_PPC_BOOK3S_64
+void arch_exit_mmap(struct mm_struct *mm)
+{
+	void *frag = pte_frag_get(&mm->context);
+
+	if (frag)
+		pte_frag_destroy(frag);
+}
+#endif
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
new file mode 100644
index 0000000000..7f9ff06401
--- /dev/null
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Declarations of procedures and variables shared between files
+ * in arch/ppc/mm/.
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ */
+#include <linux/mm.h>
+#include <asm/mmu.h>
+
+#ifdef CONFIG_PPC_MMU_NOHASH
+#include <asm/trace.h>
+
+/*
+ * On 40x and 8xx, we directly inline tlbia and tlbivax
+ */
+#if defined(CONFIG_40x) || defined(CONFIG_PPC_8xx)
+static inline void _tlbil_all(void)
+{
+	asm volatile ("sync; tlbia; isync" : : : "memory");
+	trace_tlbia(MMU_NO_CONTEXT);
+}
+static inline void _tlbil_pid(unsigned int pid)
+{
+	asm volatile ("sync; tlbia; isync" : : : "memory");
+	trace_tlbia(pid);
+}
+#define _tlbil_pid_noind(pid)	_tlbil_pid(pid)
+
+#else /* CONFIG_40x || CONFIG_PPC_8xx */
+extern void _tlbil_all(void);
+extern void _tlbil_pid(unsigned int pid);
+#ifdef CONFIG_PPC_BOOK3E_64
+extern void _tlbil_pid_noind(unsigned int pid);
+#else
+#define _tlbil_pid_noind(pid)	_tlbil_pid(pid)
+#endif
+#endif /* !(CONFIG_40x || CONFIG_PPC_8xx) */
+
+/*
+ * On 8xx, we directly inline tlbie, on others, it's extern
+ */
+#ifdef CONFIG_PPC_8xx
+static inline void _tlbil_va(unsigned long address, unsigned int pid,
+			     unsigned int tsize, unsigned int ind)
+{
+	asm volatile ("tlbie %0; sync" : : "r" (address) : "memory");
+	trace_tlbie(0, 0, address, pid, 0, 0, 0);
+}
+#elif defined(CONFIG_PPC_BOOK3E_64)
+extern void _tlbil_va(unsigned long address, unsigned int pid,
+		      unsigned int tsize, unsigned int ind);
+#else
+extern void __tlbil_va(unsigned long address, unsigned int pid);
+static inline void _tlbil_va(unsigned long address, unsigned int pid,
+			     unsigned int tsize, unsigned int ind)
+{
+	__tlbil_va(address, pid);
+}
+#endif /* CONFIG_PPC_8xx */
+
+#if defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_47x)
+extern void _tlbivax_bcast(unsigned long address, unsigned int pid,
+			   unsigned int tsize, unsigned int ind);
+#else
+static inline void _tlbivax_bcast(unsigned long address, unsigned int pid,
+				   unsigned int tsize, unsigned int ind)
+{
+	BUG();
+}
+#endif
+
+static inline void print_system_hash_info(void) {}
+
+#else /* CONFIG_PPC_MMU_NOHASH */
+
+void print_system_hash_info(void);
+
+#endif /* CONFIG_PPC_MMU_NOHASH */
+
+#ifdef CONFIG_PPC32
+
+extern void mapin_ram(void);
+extern void setbat(int index, unsigned long virt, phys_addr_t phys,
+		   unsigned int size, pgprot_t prot);
+
+extern u8 early_hash[];
+
+#endif /* CONFIG_PPC32 */
+
+extern unsigned long __max_low_memory;
+extern phys_addr_t total_memory;
+extern phys_addr_t total_lowmem;
+extern phys_addr_t memstart_addr;
+extern phys_addr_t lowmem_end_addr;
+
+/* ...and now those things that may be slightly different between processor
+ * architectures.  -- Dan
+ */
+#ifdef CONFIG_PPC32
+extern void MMU_init_hw(void);
+void MMU_init_hw_patch(void);
+unsigned long mmu_mapin_ram(unsigned long base, unsigned long top);
+#endif
+void mmu_init_secondary(int cpu);
+
+#ifdef CONFIG_PPC_E500
+extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx,
+				     bool dryrun, bool init);
+#ifdef CONFIG_PPC32
+extern void adjust_total_lowmem(void);
+extern int switch_to_as1(void);
+extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
+void create_kaslr_tlb_entry(int entry, unsigned long virt, phys_addr_t phys);
+void reloc_kernel_entry(void *fdt, int addr);
+void relocate_init(u64 dt_ptr, phys_addr_t start);
+extern int is_second_reloc;
+#endif
+extern void loadcam_entry(unsigned int index);
+extern void loadcam_multi(int first_idx, int num, int tmp_idx);
+
+#ifdef CONFIG_RANDOMIZE_BASE
+void kaslr_early_init(void *dt_ptr, phys_addr_t size);
+void kaslr_late_init(void);
+#else
+static inline void kaslr_early_init(void *dt_ptr, phys_addr_t size) {}
+static inline void kaslr_late_init(void) {}
+#endif
+
+struct tlbcam {
+	u32	MAS0;
+	u32	MAS1;
+	unsigned long	MAS2;
+	u32	MAS3;
+	u32	MAS7;
+};
+
+#define NUM_TLBCAMS	64
+
+extern struct tlbcam TLBCAM[NUM_TLBCAMS];
+#endif
+
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_85xx) || defined(CONFIG_PPC_8xx)
+/* 6xx have BATS */
+/* PPC_85xx have TLBCAM */
+/* 8xx have LTLB */
+phys_addr_t v_block_mapped(unsigned long va);
+unsigned long p_block_mapped(phys_addr_t pa);
+#else
+static inline phys_addr_t v_block_mapped(unsigned long va) { return 0; }
+static inline unsigned long p_block_mapped(phys_addr_t pa) { return 0; }
+#endif
+
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC_E500)
+void mmu_mark_initmem_nx(void);
+void mmu_mark_rodata_ro(void);
+#else
+static inline void mmu_mark_initmem_nx(void) { }
+static inline void mmu_mark_rodata_ro(void) { }
+#endif
+
+#ifdef CONFIG_PPC_8xx
+void __init mmu_mapin_immr(void);
+#endif
+
+#ifdef CONFIG_DEBUG_WX
+void ptdump_check_wx(void);
+#else
+static inline void ptdump_check_wx(void) { }
+#endif
+
+static inline bool debug_pagealloc_enabled_or_kfence(void)
+{
+	return IS_ENABLED(CONFIG_KFENCE) || debug_pagealloc_enabled();
+}
diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c
new file mode 100644
index 0000000000..3684d6e570
--- /dev/null
+++ b/arch/powerpc/mm/nohash/40x.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains the routines for initializing the MMU
+ * on the 4xx series of chips.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+#include <linux/memblock.h>
+
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/mmu.h>
+#include <linux/uaccess.h>
+#include <asm/smp.h>
+#include <asm/bootx.h>
+#include <asm/machdep.h>
+#include <asm/setup.h>
+
+#include <mm/mmu_decl.h>
+
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
+{
+	/*
+	 * The Zone Protection Register (ZPR) defines how protection will
+	 * be applied to every page which is a member of a given zone. At
+	 * present, we utilize only two of the 4xx's zones.
+	 * The zone index bits (of ZSEL) in the PTE are used for software
+	 * indicators, except the LSB.  For user access, zone 1 is used,
+	 * for kernel access, zone 0 is used.  We set all but zone 1
+	 * to zero, allowing only kernel access as indicated in the PTE.
+	 * For zone 1, we set a 01 binary (a value of 10 will not work)
+	 * to allow user access as indicated in the PTE.  This also allows
+	 * kernel access as indicated in the PTE.
+	 */
+
+        mtspr(SPRN_ZPR, 0x10000000);
+
+	flush_instruction_cache();
+
+	/*
+	 * Set up the real-mode cache parameters for the exception vector
+	 * handlers (which are run in real-mode).
+	 */
+
+        mtspr(SPRN_DCWR, 0x00000000);	/* All caching is write-back */
+
+        /*
+	 * Cache instruction and data space where the exception
+	 * vectors and the kernel live in real-mode.
+	 */
+
+        mtspr(SPRN_DCCR, 0xFFFF0000);	/* 2GByte of data space at 0x0. */
+        mtspr(SPRN_ICCR, 0xFFFF0000);	/* 2GByte of instr. space at 0x0. */
+}
+
+#define LARGE_PAGE_SIZE_16M	(1<<24)
+#define LARGE_PAGE_SIZE_4M	(1<<22)
+
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
+{
+	unsigned long v, s, mapped;
+	phys_addr_t p;
+
+	v = KERNELBASE;
+	p = 0;
+	s = total_lowmem;
+
+	if (IS_ENABLED(CONFIG_KFENCE))
+		return 0;
+
+	if (debug_pagealloc_enabled())
+		return 0;
+
+	if (strict_kernel_rwx_enabled())
+		return 0;
+
+	while (s >= LARGE_PAGE_SIZE_16M) {
+		pmd_t *pmdp;
+		unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_RW;
+
+		pmdp = pmd_off_k(v);
+		*pmdp++ = __pmd(val);
+		*pmdp++ = __pmd(val);
+		*pmdp++ = __pmd(val);
+		*pmdp++ = __pmd(val);
+
+		v += LARGE_PAGE_SIZE_16M;
+		p += LARGE_PAGE_SIZE_16M;
+		s -= LARGE_PAGE_SIZE_16M;
+	}
+
+	while (s >= LARGE_PAGE_SIZE_4M) {
+		pmd_t *pmdp;
+		unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_RW;
+
+		pmdp = pmd_off_k(v);
+		*pmdp = __pmd(val);
+
+		v += LARGE_PAGE_SIZE_4M;
+		p += LARGE_PAGE_SIZE_4M;
+		s -= LARGE_PAGE_SIZE_4M;
+	}
+
+	mapped = total_lowmem - s;
+
+	/* If the size of RAM is not an exact power of two, we may not
+	 * have covered RAM in its entirety with 16 and 4 MiB
+	 * pages. Consequently, restrict the top end of RAM currently
+	 * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail"
+	 * coverage with normal-sized pages (or other reasons) do not
+	 * attempt to allocate outside the allowed range.
+	 */
+	memblock_set_current_limit(mapped);
+
+	return mapped;
+}
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	/* We don't currently support the first MEMBLOCK not mapping 0
+	 * physical on those processors
+	 */
+	BUG_ON(first_memblock_base != 0);
+
+	/* 40x can only access 16MB at the moment (see head_40x.S) */
+	memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000));
+}
diff --git a/arch/powerpc/mm/nohash/44x.c b/arch/powerpc/mm/nohash/44x.c
new file mode 100644
index 0000000000..1beae802bb
--- /dev/null
+++ b/arch/powerpc/mm/nohash/44x.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Modifications by Matt Porter (mporter@mvista.com) to support
+ * PPC44x Book E processors.
+ *
+ * This file contains the routines for initializing the MMU
+ * on the 4xx series of chips.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ */
+
+#include <linux/init.h>
+#include <linux/memblock.h>
+
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/cacheflush.h>
+#include <asm/code-patching.h>
+#include <asm/smp.h>
+
+#include <mm/mmu_decl.h>
+
+/* Used by the 44x TLB replacement exception handler.
+ * Just needed it declared someplace.
+ */
+unsigned int tlb_44x_index; /* = 0 */
+unsigned int tlb_44x_hwater = PPC44x_TLB_SIZE - 1 - PPC44x_EARLY_TLBS;
+int icache_44x_need_flush;
+
+unsigned long tlb_47x_boltmap[1024/8];
+
+static void __init ppc44x_update_tlb_hwater(void)
+{
+	/* The TLB miss handlers hard codes the watermark in a cmpli
+	 * instruction to improve performances rather than loading it
+	 * from the global variable. Thus, we patch the instructions
+	 * in the 2 TLB miss handlers when updating the value
+	 */
+	modify_instruction_site(&patch__tlb_44x_hwater_D, 0xffff, tlb_44x_hwater);
+	modify_instruction_site(&patch__tlb_44x_hwater_I, 0xffff, tlb_44x_hwater);
+}
+
+/*
+ * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 44x type MMU
+ */
+static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys)
+{
+	unsigned int entry = tlb_44x_hwater--;
+
+	ppc44x_update_tlb_hwater();
+
+	mtspr(SPRN_MMUCR, 0);
+
+	__asm__ __volatile__(
+		"tlbwe	%2,%3,%4\n"
+		"tlbwe	%1,%3,%5\n"
+		"tlbwe	%0,%3,%6\n"
+	:
+	: "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G),
+	  "r" (phys),
+	  "r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M),
+	  "r" (entry),
+	  "i" (PPC44x_TLB_PAGEID),
+	  "i" (PPC44x_TLB_XLAT),
+	  "i" (PPC44x_TLB_ATTRIB));
+}
+
+static int __init ppc47x_find_free_bolted(void)
+{
+	unsigned int mmube0 = mfspr(SPRN_MMUBE0);
+	unsigned int mmube1 = mfspr(SPRN_MMUBE1);
+
+	if (!(mmube0 & MMUBE0_VBE0))
+		return 0;
+	if (!(mmube0 & MMUBE0_VBE1))
+		return 1;
+	if (!(mmube0 & MMUBE0_VBE2))
+		return 2;
+	if (!(mmube1 & MMUBE1_VBE3))
+		return 3;
+	if (!(mmube1 & MMUBE1_VBE4))
+		return 4;
+	if (!(mmube1 & MMUBE1_VBE5))
+		return 5;
+	return -1;
+}
+
+static void __init ppc47x_update_boltmap(void)
+{
+	unsigned int mmube0 = mfspr(SPRN_MMUBE0);
+	unsigned int mmube1 = mfspr(SPRN_MMUBE1);
+
+	if (mmube0 & MMUBE0_VBE0)
+		__set_bit((mmube0 >> MMUBE0_IBE0_SHIFT) & 0xff,
+			  tlb_47x_boltmap);
+	if (mmube0 & MMUBE0_VBE1)
+		__set_bit((mmube0 >> MMUBE0_IBE1_SHIFT) & 0xff,
+			  tlb_47x_boltmap);
+	if (mmube0 & MMUBE0_VBE2)
+		__set_bit((mmube0 >> MMUBE0_IBE2_SHIFT) & 0xff,
+			  tlb_47x_boltmap);
+	if (mmube1 & MMUBE1_VBE3)
+		__set_bit((mmube1 >> MMUBE1_IBE3_SHIFT) & 0xff,
+			  tlb_47x_boltmap);
+	if (mmube1 & MMUBE1_VBE4)
+		__set_bit((mmube1 >> MMUBE1_IBE4_SHIFT) & 0xff,
+			  tlb_47x_boltmap);
+	if (mmube1 & MMUBE1_VBE5)
+		__set_bit((mmube1 >> MMUBE1_IBE5_SHIFT) & 0xff,
+			  tlb_47x_boltmap);
+}
+
+/*
+ * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU
+ */
+static void __init ppc47x_pin_tlb(unsigned int virt, unsigned int phys)
+{
+	unsigned int rA;
+	int bolted;
+
+	/* Base rA is HW way select, way 0, bolted bit set */
+	rA = 0x88000000;
+
+	/* Look for a bolted entry slot */
+	bolted = ppc47x_find_free_bolted();
+	BUG_ON(bolted < 0);
+
+	/* Insert bolted slot number */
+	rA |= bolted << 24;
+
+	pr_debug("256M TLB entry for 0x%08x->0x%08x in bolt slot %d\n",
+		 virt, phys, bolted);
+
+	mtspr(SPRN_MMUCR, 0);
+
+	__asm__ __volatile__(
+		"tlbwe	%2,%3,0\n"
+		"tlbwe	%1,%3,1\n"
+		"tlbwe	%0,%3,2\n"
+		:
+		: "r" (PPC47x_TLB2_SW | PPC47x_TLB2_SR |
+		       PPC47x_TLB2_SX
+#ifdef CONFIG_SMP
+		       | PPC47x_TLB2_M
+#endif
+		       ),
+		  "r" (phys),
+		  "r" (virt | PPC47x_TLB0_VALID | PPC47x_TLB0_256M),
+		  "r" (rA));
+}
+
+void __init MMU_init_hw(void)
+{
+	/* This is not useful on 47x but won't hurt either */
+	ppc44x_update_tlb_hwater();
+
+	flush_instruction_cache();
+}
+
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
+{
+	unsigned long addr;
+	unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1);
+
+	/* Pin in enough TLBs to cover any lowmem not covered by the
+	 * initial 256M mapping established in head_44x.S */
+	for (addr = memstart + PPC_PIN_SIZE; addr < lowmem_end_addr;
+	     addr += PPC_PIN_SIZE) {
+		if (mmu_has_feature(MMU_FTR_TYPE_47x))
+			ppc47x_pin_tlb(addr + PAGE_OFFSET, addr);
+		else
+			ppc44x_pin_tlb(addr + PAGE_OFFSET, addr);
+	}
+	if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
+		ppc47x_update_boltmap();
+
+#ifdef DEBUG
+		{
+			int i;
+
+			printk(KERN_DEBUG "bolted entries: ");
+			for (i = 0; i < 255; i++) {
+				if (test_bit(i, tlb_47x_boltmap))
+					printk("%d ", i);
+			}
+			printk("\n");
+		}
+#endif /* DEBUG */
+	}
+	return total_lowmem;
+}
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	u64 size;
+
+#ifndef CONFIG_NONSTATIC_KERNEL
+	/* We don't currently support the first MEMBLOCK not mapping 0
+	 * physical on those processors
+	 */
+	BUG_ON(first_memblock_base != 0);
+#endif
+
+	/* 44x has a 256M TLB entry pinned at boot */
+	size = (min_t(u64, first_memblock_size, PPC_PIN_SIZE));
+	memblock_set_current_limit(first_memblock_base + size);
+}
+
+#ifdef CONFIG_SMP
+void __init mmu_init_secondary(int cpu)
+{
+	unsigned long addr;
+	unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1);
+
+	/* Pin in enough TLBs to cover any lowmem not covered by the
+	 * initial 256M mapping established in head_44x.S
+	 *
+	 * WARNING: This is called with only the first 256M of the
+	 * linear mapping in the TLB and we can't take faults yet
+	 * so beware of what this code uses. It runs off a temporary
+	 * stack. current (r2) isn't initialized, smp_processor_id()
+	 * will not work, current thread info isn't accessible, ...
+	 */
+	for (addr = memstart + PPC_PIN_SIZE; addr < lowmem_end_addr;
+	     addr += PPC_PIN_SIZE) {
+		if (mmu_has_feature(MMU_FTR_TYPE_47x))
+			ppc47x_pin_tlb(addr + PAGE_OFFSET, addr);
+		else
+			ppc44x_pin_tlb(addr + PAGE_OFFSET, addr);
+	}
+}
+#endif /* CONFIG_SMP */
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
new file mode 100644
index 0000000000..a642a79298
--- /dev/null
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains the routines for initializing the MMU
+ * on the 8xx series of chips.
+ *  -- christophe
+ *
+ *  Derived from arch/powerpc/mm/40x_mmu.c:
+ */
+
+#include <linux/memblock.h>
+#include <linux/hugetlb.h>
+
+#include <mm/mmu_decl.h>
+
+#define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT)
+
+static unsigned long block_mapped_ram;
+
+/*
+ * Return PA for this VA if it is in an area mapped with LTLBs or fixmap.
+ * Otherwise, returns 0
+ */
+phys_addr_t v_block_mapped(unsigned long va)
+{
+	unsigned long p = PHYS_IMMR_BASE;
+
+	if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE)
+		return p + va - VIRT_IMMR_BASE;
+	if (va >= PAGE_OFFSET && va < PAGE_OFFSET + block_mapped_ram)
+		return __pa(va);
+	return 0;
+}
+
+/*
+ * Return VA for a given PA mapped with LTLBs or fixmap
+ * Return 0 if not mapped
+ */
+unsigned long p_block_mapped(phys_addr_t pa)
+{
+	unsigned long p = PHYS_IMMR_BASE;
+
+	if (pa >= p && pa < p + IMMR_SIZE)
+		return VIRT_IMMR_BASE + pa - p;
+	if (pa < block_mapped_ram)
+		return (unsigned long)__va(pa);
+	return 0;
+}
+
+static pte_t __init *early_hugepd_alloc_kernel(hugepd_t *pmdp, unsigned long va)
+{
+	if (hpd_val(*pmdp) == 0) {
+		pte_t *ptep = memblock_alloc(sizeof(pte_basic_t), SZ_4K);
+
+		if (!ptep)
+			return NULL;
+
+		hugepd_populate_kernel((hugepd_t *)pmdp, ptep, PAGE_SHIFT_8M);
+		hugepd_populate_kernel((hugepd_t *)pmdp + 1, ptep, PAGE_SHIFT_8M);
+	}
+	return hugepte_offset(*(hugepd_t *)pmdp, va, PGDIR_SHIFT);
+}
+
+static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa,
+					     pgprot_t prot, int psize, bool new)
+{
+	pmd_t *pmdp = pmd_off_k(va);
+	pte_t *ptep;
+
+	if (WARN_ON(psize != MMU_PAGE_512K && psize != MMU_PAGE_8M))
+		return -EINVAL;
+
+	if (new) {
+		if (WARN_ON(slab_is_available()))
+			return -EINVAL;
+
+		if (psize == MMU_PAGE_512K)
+			ptep = early_pte_alloc_kernel(pmdp, va);
+		else
+			ptep = early_hugepd_alloc_kernel((hugepd_t *)pmdp, va);
+	} else {
+		if (psize == MMU_PAGE_512K)
+			ptep = pte_offset_kernel(pmdp, va);
+		else
+			ptep = hugepte_offset(*(hugepd_t *)pmdp, va, PGDIR_SHIFT);
+	}
+
+	if (WARN_ON(!ptep))
+		return -ENOMEM;
+
+	/* The PTE should never be already present */
+	if (new && WARN_ON(pte_present(*ptep) && pgprot_val(prot)))
+		return -EINVAL;
+
+	set_huge_pte_at(&init_mm, va, ptep,
+			pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot)), psize);
+
+	return 0;
+}
+
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
+{
+}
+
+static bool immr_is_mapped __initdata;
+
+void __init mmu_mapin_immr(void)
+{
+	if (immr_is_mapped)
+		return;
+
+	immr_is_mapped = true;
+
+	__early_map_kernel_hugepage(VIRT_IMMR_BASE, PHYS_IMMR_BASE,
+				    PAGE_KERNEL_NCG, MMU_PAGE_512K, true);
+}
+
+static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top,
+				pgprot_t prot, bool new)
+{
+	unsigned long v = PAGE_OFFSET + offset;
+	unsigned long p = offset;
+
+	WARN_ON(!IS_ALIGNED(offset, SZ_512K) || !IS_ALIGNED(top, SZ_512K));
+
+	for (; p < ALIGN(p, SZ_8M) && p < top; p += SZ_512K, v += SZ_512K)
+		__early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new);
+	for (; p < ALIGN_DOWN(top, SZ_8M) && p < top; p += SZ_8M, v += SZ_8M)
+		__early_map_kernel_hugepage(v, p, prot, MMU_PAGE_8M, new);
+	for (; p < ALIGN_DOWN(top, SZ_512K) && p < top; p += SZ_512K, v += SZ_512K)
+		__early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new);
+
+	if (!new)
+		flush_tlb_kernel_range(PAGE_OFFSET + v, PAGE_OFFSET + top);
+}
+
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
+{
+	unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M);
+	unsigned long sinittext = __pa(_sinittext);
+	bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled_or_kfence();
+	unsigned long boundary = strict_boundary ? sinittext : etext8;
+	unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
+
+	WARN_ON(top < einittext8);
+
+	mmu_mapin_immr();
+
+	mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true);
+	if (debug_pagealloc_enabled_or_kfence()) {
+		top = boundary;
+	} else {
+		mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true);
+		mmu_mapin_ram_chunk(einittext8, top, PAGE_KERNEL, true);
+	}
+
+	if (top > SZ_32M)
+		memblock_set_current_limit(top);
+
+	block_mapped_ram = top;
+
+	return top;
+}
+
+void mmu_mark_initmem_nx(void)
+{
+	unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M);
+	unsigned long sinittext = __pa(_sinittext);
+	unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8;
+	unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
+
+	if (!debug_pagealloc_enabled_or_kfence())
+		mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false);
+
+	mmu_pin_tlb(block_mapped_ram, false);
+}
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void mmu_mark_rodata_ro(void)
+{
+	unsigned long sinittext = __pa(_sinittext);
+
+	mmu_mapin_ram_chunk(0, sinittext, PAGE_KERNEL_ROX, false);
+	if (IS_ENABLED(CONFIG_PIN_TLB_DATA))
+		mmu_pin_tlb(block_mapped_ram, true);
+}
+#endif
+
+void __init setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				       phys_addr_t first_memblock_size)
+{
+	/* We don't currently support the first MEMBLOCK not mapping 0
+	 * physical on those processors
+	 */
+	BUG_ON(first_memblock_base != 0);
+
+	/* 8xx can only access 32MB at the moment */
+	memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M));
+}
+
+int pud_clear_huge(pud_t *pud)
+{
+	 return 0;
+}
+
+int pmd_clear_huge(pmd_t *pmd)
+{
+	 return 0;
+}
diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile
new file mode 100644
index 0000000000..f3894e79d5
--- /dev/null
+++ b/arch/powerpc/mm/nohash/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+
+ccflags-$(CONFIG_PPC64)	:= $(NO_MINIMAL_TOC)
+
+obj-y				+= mmu_context.o tlb.o tlb_low.o kup.o
+obj-$(CONFIG_PPC_BOOK3E_64)  	+= tlb_low_64e.o book3e_pgtable.o
+obj-$(CONFIG_40x)		+= 40x.o
+obj-$(CONFIG_44x)		+= 44x.o
+obj-$(CONFIG_PPC_8xx)		+= 8xx.o
+obj-$(CONFIG_PPC_E500)		+= e500.o
+obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr_booke.o
+ifdef CONFIG_HUGETLB_PAGE
+obj-$(CONFIG_PPC_E500)	+= e500_hugetlbpage.o
+endif
+
+# Disable kcov instrumentation on sensitive code
+# This is necessary for booting with kcov enabled on book3e machines
+KCOV_INSTRUMENT_tlb.o := n
+KCOV_INSTRUMENT_e500.o := n
diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c
new file mode 100644
index 0000000000..b80fc4a91a
--- /dev/null
+++ b/arch/powerpc/mm/nohash/book3e_pgtable.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2005, Paul Mackerras, IBM Corporation.
+ * Copyright 2009, Benjamin Herrenschmidt, IBM Corporation.
+ * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
+ */
+
+#include <linux/sched.h>
+#include <linux/memblock.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/dma.h>
+#include <asm/code-patching.h>
+
+#include <mm/mmu_decl.h>
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/*
+ * On Book3E CPUs, the vmemmap is currently mapped in the top half of
+ * the vmalloc space using normal page tables, though the size of
+ * pages encoded in the PTEs can be different
+ */
+int __meminit vmemmap_create_mapping(unsigned long start,
+				     unsigned long page_size,
+				     unsigned long phys)
+{
+	/* Create a PTE encoding without page size */
+	unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED |
+		_PAGE_KERNEL_RW;
+
+	/* PTEs only contain page size encodings up to 32M */
+	BUG_ON(mmu_psize_defs[mmu_vmemmap_psize].enc > 0xf);
+
+	/* Encode the size in the PTE */
+	flags |= mmu_psize_defs[mmu_vmemmap_psize].enc << 8;
+
+	/* For each PTE for that area, map things. Note that we don't
+	 * increment phys because all PTEs are of the large size and
+	 * thus must have the low bits clear
+	 */
+	for (i = 0; i < page_size; i += PAGE_SIZE)
+		BUG_ON(map_kernel_page(start + i, phys, __pgprot(flags)));
+
+	return 0;
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void vmemmap_remove_mapping(unsigned long start,
+			    unsigned long page_size)
+{
+}
+#endif
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+static void __init *early_alloc_pgtable(unsigned long size)
+{
+	void *ptr;
+
+	ptr = memblock_alloc_try_nid(size, size, MEMBLOCK_LOW_LIMIT,
+				     __pa(MAX_DMA_ADDRESS), NUMA_NO_NODE);
+
+	if (!ptr)
+		panic("%s: Failed to allocate %lu bytes align=0x%lx max_addr=%lx\n",
+		      __func__, size, size, __pa(MAX_DMA_ADDRESS));
+
+	return ptr;
+}
+
+/*
+ * map_kernel_page currently only called by __ioremap
+ * map_kernel_page adds an entry to the ioremap page table
+ * and adds an entry to the HPT, possibly bolting it
+ */
+int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
+{
+	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	BUILD_BUG_ON(TASK_SIZE_USER64 > PGTABLE_RANGE);
+	if (slab_is_available()) {
+		pgdp = pgd_offset_k(ea);
+		p4dp = p4d_offset(pgdp, ea);
+		pudp = pud_alloc(&init_mm, p4dp, ea);
+		if (!pudp)
+			return -ENOMEM;
+		pmdp = pmd_alloc(&init_mm, pudp, ea);
+		if (!pmdp)
+			return -ENOMEM;
+		ptep = pte_alloc_kernel(pmdp, ea);
+		if (!ptep)
+			return -ENOMEM;
+	} else {
+		pgdp = pgd_offset_k(ea);
+		p4dp = p4d_offset(pgdp, ea);
+		if (p4d_none(*p4dp)) {
+			pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
+			p4d_populate(&init_mm, p4dp, pudp);
+		}
+		pudp = pud_offset(p4dp, ea);
+		if (pud_none(*pudp)) {
+			pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
+			pud_populate(&init_mm, pudp, pmdp);
+		}
+		pmdp = pmd_offset(pudp, ea);
+		if (!pmd_present(*pmdp)) {
+			ptep = early_alloc_pgtable(PTE_TABLE_SIZE);
+			pmd_populate_kernel(&init_mm, pmdp, ptep);
+		}
+		ptep = pte_offset_kernel(pmdp, ea);
+	}
+	set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot));
+
+	smp_wmb();
+	return 0;
+}
+
+void __patch_exception(int exc, unsigned long addr)
+{
+	unsigned int *ibase = &interrupt_base_book3e;
+
+	/*
+	 * Our exceptions vectors start with a NOP and -then- a branch
+	 * to deal with single stepping from userspace which stops on
+	 * the second instruction. Thus we need to patch the second
+	 * instruction of the exception, not the first one.
+	 */
+
+	patch_branch(ibase + (exc / 4) + 1, addr, 0);
+}
diff --git a/arch/powerpc/mm/nohash/e500.c b/arch/powerpc/mm/nohash/e500.c
new file mode 100644
index 0000000000..40a4e69ae1
--- /dev/null
+++ b/arch/powerpc/mm/nohash/e500.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Modifications by Kumar Gala (galak@kernel.crashing.org) to support
+ * E500 Book E processors.
+ *
+ * Copyright 2004,2010 Freescale Semiconductor, Inc.
+ *
+ * This file contains the routines for initializing the MMU
+ * on the 4xx series of chips.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/mmu.h>
+#include <linux/uaccess.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/setup.h>
+#include <asm/paca.h>
+
+#include <mm/mmu_decl.h>
+
+unsigned int tlbcam_index;
+
+struct tlbcam TLBCAM[NUM_TLBCAMS];
+
+static struct {
+	unsigned long start;
+	unsigned long limit;
+	phys_addr_t phys;
+} tlbcam_addrs[NUM_TLBCAMS];
+
+#ifdef CONFIG_PPC_85xx
+/*
+ * Return PA for this VA if it is mapped by a CAM, or 0
+ */
+phys_addr_t v_block_mapped(unsigned long va)
+{
+	int b;
+	for (b = 0; b < tlbcam_index; ++b)
+		if (va >= tlbcam_addrs[b].start && va < tlbcam_addrs[b].limit)
+			return tlbcam_addrs[b].phys + (va - tlbcam_addrs[b].start);
+	return 0;
+}
+
+/*
+ * Return VA for a given PA or 0 if not mapped
+ */
+unsigned long p_block_mapped(phys_addr_t pa)
+{
+	int b;
+	for (b = 0; b < tlbcam_index; ++b)
+		if (pa >= tlbcam_addrs[b].phys
+			&& pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start)
+		              +tlbcam_addrs[b].phys)
+			return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys);
+	return 0;
+}
+#endif
+
+/*
+ * Set up a variable-size TLB entry (tlbcam). The parameters are not checked;
+ * in particular size must be a power of 4 between 4k and the max supported by
+ * an implementation; max may further be limited by what can be represented in
+ * an unsigned long (for example, 32-bit implementations cannot support a 4GB
+ * size).
+ */
+static void settlbcam(int index, unsigned long virt, phys_addr_t phys,
+		unsigned long size, unsigned long flags, unsigned int pid)
+{
+	unsigned int tsize;
+
+	tsize = __ilog2(size) - 10;
+
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
+	if ((flags & _PAGE_NO_CACHE) == 0)
+		flags |= _PAGE_COHERENT;
+#endif
+
+	TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1);
+	TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid);
+	TLBCAM[index].MAS2 = virt & PAGE_MASK;
+
+	TLBCAM[index].MAS2 |= (flags & _PAGE_WRITETHRU) ? MAS2_W : 0;
+	TLBCAM[index].MAS2 |= (flags & _PAGE_NO_CACHE) ? MAS2_I : 0;
+	TLBCAM[index].MAS2 |= (flags & _PAGE_COHERENT) ? MAS2_M : 0;
+	TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0;
+	TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0;
+
+	TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SR;
+	TLBCAM[index].MAS3 |= (flags & _PAGE_RW) ? MAS3_SW : 0;
+	if (mmu_has_feature(MMU_FTR_BIG_PHYS))
+		TLBCAM[index].MAS7 = (u64)phys >> 32;
+
+	/* Below is unlikely -- only for large user pages or similar */
+	if (pte_user(__pte(flags))) {
+		TLBCAM[index].MAS3 |= MAS3_UR;
+		TLBCAM[index].MAS3 |= (flags & _PAGE_EXEC) ? MAS3_UX : 0;
+		TLBCAM[index].MAS3 |= (flags & _PAGE_RW) ? MAS3_UW : 0;
+	} else {
+		TLBCAM[index].MAS3 |= (flags & _PAGE_EXEC) ? MAS3_SX : 0;
+	}
+
+	tlbcam_addrs[index].start = virt;
+	tlbcam_addrs[index].limit = virt + size - 1;
+	tlbcam_addrs[index].phys = phys;
+}
+
+static unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
+				 phys_addr_t phys)
+{
+	unsigned int camsize = __ilog2(ram);
+	unsigned int align = __ffs(virt | phys);
+	unsigned long max_cam;
+
+	if ((mfspr(SPRN_MMUCFG) & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
+		/* Convert (4^max) kB to (2^max) bytes */
+		max_cam = ((mfspr(SPRN_TLB1CFG) >> 16) & 0xf) * 2 + 10;
+		camsize &= ~1U;
+		align &= ~1U;
+	} else {
+		/* Convert (2^max) kB to (2^max) bytes */
+		max_cam = __ilog2(mfspr(SPRN_TLB1PS)) + 10;
+	}
+
+	if (camsize > align)
+		camsize = align;
+	if (camsize > max_cam)
+		camsize = max_cam;
+
+	return 1UL << camsize;
+}
+
+static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt,
+					unsigned long ram, int max_cam_idx,
+					bool dryrun, bool init)
+{
+	int i;
+	unsigned long amount_mapped = 0;
+	unsigned long boundary;
+
+	if (strict_kernel_rwx_enabled())
+		boundary = (unsigned long)(_sinittext - _stext);
+	else
+		boundary = ram;
+
+	/* Calculate CAM values */
+	for (i = 0; boundary && i < max_cam_idx; i++) {
+		unsigned long cam_sz;
+		pgprot_t prot = init ? PAGE_KERNEL_X : PAGE_KERNEL_ROX;
+
+		cam_sz = calc_cam_sz(boundary, virt, phys);
+		if (!dryrun)
+			settlbcam(i, virt, phys, cam_sz, pgprot_val(prot), 0);
+
+		boundary -= cam_sz;
+		amount_mapped += cam_sz;
+		virt += cam_sz;
+		phys += cam_sz;
+	}
+	for (ram -= amount_mapped; ram && i < max_cam_idx; i++) {
+		unsigned long cam_sz;
+		pgprot_t prot = init ? PAGE_KERNEL_X : PAGE_KERNEL;
+
+		cam_sz = calc_cam_sz(ram, virt, phys);
+		if (!dryrun)
+			settlbcam(i, virt, phys, cam_sz, pgprot_val(prot), 0);
+
+		ram -= cam_sz;
+		amount_mapped += cam_sz;
+		virt += cam_sz;
+		phys += cam_sz;
+	}
+
+	if (dryrun)
+		return amount_mapped;
+
+	if (init) {
+		loadcam_multi(0, i, max_cam_idx);
+		tlbcam_index = i;
+	} else {
+		loadcam_multi(0, i, 0);
+		WARN_ON(i > tlbcam_index);
+	}
+
+#ifdef CONFIG_PPC64
+	get_paca()->tcd.esel_next = i;
+	get_paca()->tcd.esel_max = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
+	get_paca()->tcd.esel_first = i;
+#endif
+
+	return amount_mapped;
+}
+
+unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx, bool dryrun, bool init)
+{
+	unsigned long virt = PAGE_OFFSET;
+	phys_addr_t phys = memstart_addr;
+
+	return map_mem_in_cams_addr(phys, virt, ram, max_cam_idx, dryrun, init);
+}
+
+#ifdef CONFIG_PPC32
+
+#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS)
+#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS"
+#endif
+
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
+{
+	return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1;
+}
+
+void flush_instruction_cache(void)
+{
+	unsigned long tmp;
+
+	tmp = mfspr(SPRN_L1CSR1);
+	tmp |= L1CSR1_ICFI | L1CSR1_ICLFR;
+	mtspr(SPRN_L1CSR1, tmp);
+	isync();
+}
+
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
+{
+	flush_instruction_cache();
+}
+
+static unsigned long __init tlbcam_sz(int idx)
+{
+	return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1;
+}
+
+void __init adjust_total_lowmem(void)
+{
+	unsigned long ram;
+	int i;
+
+	/* adjust lowmem size to __max_low_memory */
+	ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem);
+
+	i = switch_to_as1();
+	__max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, false, true);
+	restore_to_as0(i, 0, NULL, 1);
+
+	pr_info("Memory CAM mapping: ");
+	for (i = 0; i < tlbcam_index - 1; i++)
+		pr_cont("%lu/", tlbcam_sz(i) >> 20);
+	pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20,
+	        (unsigned int)((total_lowmem - __max_low_memory) >> 20));
+
+	memblock_set_current_limit(memstart_addr + __max_low_memory);
+}
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void mmu_mark_rodata_ro(void)
+{
+	unsigned long remapped;
+
+	remapped = map_mem_in_cams(__max_low_memory, CONFIG_LOWMEM_CAM_NUM, false, false);
+
+	WARN_ON(__max_low_memory != remapped);
+}
+#endif
+
+void mmu_mark_initmem_nx(void)
+{
+	/* Everything is done in mmu_mark_rodata_ro() */
+}
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	phys_addr_t limit = first_memblock_base + first_memblock_size;
+
+	/* 64M mapped initially according to head_fsl_booke.S */
+	memblock_set_current_limit(min_t(u64, limit, 0x04000000));
+}
+
+#ifdef CONFIG_RELOCATABLE
+int __initdata is_second_reloc;
+notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
+{
+	unsigned long base = kernstart_virt_addr;
+	phys_addr_t size;
+
+	kernstart_addr = start;
+	if (is_second_reloc) {
+		virt_phys_offset = PAGE_OFFSET - memstart_addr;
+		kaslr_late_init();
+		return;
+	}
+
+	/*
+	 * Relocatable kernel support based on processing of dynamic
+	 * relocation entries. Before we get the real memstart_addr,
+	 * We will compute the virt_phys_offset like this:
+	 * virt_phys_offset = stext.run - kernstart_addr
+	 *
+	 * stext.run = (KERNELBASE & ~0x3ffffff) +
+	 *				(kernstart_addr & 0x3ffffff)
+	 * When we relocate, we have :
+	 *
+	 *	(kernstart_addr & 0x3ffffff) = (stext.run & 0x3ffffff)
+	 *
+	 * hence:
+	 *  virt_phys_offset = (KERNELBASE & ~0x3ffffff) -
+	 *                              (kernstart_addr & ~0x3ffffff)
+	 *
+	 */
+	start &= ~0x3ffffff;
+	base &= ~0x3ffffff;
+	virt_phys_offset = base - start;
+	early_get_first_memblock_info(__va(dt_ptr), &size);
+	/*
+	 * We now get the memstart_addr, then we should check if this
+	 * address is the same as what the PAGE_OFFSET map to now. If
+	 * not we have to change the map of PAGE_OFFSET to memstart_addr
+	 * and do a second relocation.
+	 */
+	if (start != memstart_addr) {
+		int n;
+		long offset = start - memstart_addr;
+
+		is_second_reloc = 1;
+		n = switch_to_as1();
+		/* map a 64M area for the second relocation */
+		if (memstart_addr > start)
+			map_mem_in_cams(0x4000000, CONFIG_LOWMEM_CAM_NUM,
+					false, true);
+		else
+			map_mem_in_cams_addr(start, PAGE_OFFSET + offset,
+					0x4000000, CONFIG_LOWMEM_CAM_NUM,
+					false, true);
+		restore_to_as0(n, offset, __va(dt_ptr), 1);
+		/* We should never reach here */
+		panic("Relocation error");
+	}
+
+	kaslr_early_init(__va(dt_ptr), size);
+}
+#endif
+#endif
diff --git a/arch/powerpc/mm/nohash/e500_hugetlbpage.c b/arch/powerpc/mm/nohash/e500_hugetlbpage.c
new file mode 100644
index 0000000000..6b30e40d45
--- /dev/null
+++ b/arch/powerpc/mm/nohash/e500_hugetlbpage.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PPC Huge TLB Page Support for Book3E MMU
+ *
+ * Copyright (C) 2009 David Gibson, IBM Corporation.
+ * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
+ *
+ */
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+
+#include <asm/mmu.h>
+
+#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+
+static inline int tlb1_next(void)
+{
+	struct paca_struct *paca = get_paca();
+	struct tlb_core_data *tcd;
+	int this, next;
+
+	tcd = paca->tcd_ptr;
+	this = tcd->esel_next;
+
+	next = this + 1;
+	if (next >= tcd->esel_max)
+		next = tcd->esel_first;
+
+	tcd->esel_next = next;
+	return this;
+}
+
+static inline void book3e_tlb_lock(void)
+{
+	struct paca_struct *paca = get_paca();
+	unsigned long tmp;
+	int token = smp_processor_id() + 1;
+
+	/*
+	 * Besides being unnecessary in the absence of SMT, this
+	 * check prevents trying to do lbarx/stbcx. on e5500 which
+	 * doesn't implement either feature.
+	 */
+	if (!cpu_has_feature(CPU_FTR_SMT))
+		return;
+
+	asm volatile(".machine push;"
+		     ".machine e6500;"
+		     "1: lbarx %0, 0, %1;"
+		     "cmpwi %0, 0;"
+		     "bne 2f;"
+		     "stbcx. %2, 0, %1;"
+		     "bne 1b;"
+		     "b 3f;"
+		     "2: lbzx %0, 0, %1;"
+		     "cmpwi %0, 0;"
+		     "bne 2b;"
+		     "b 1b;"
+		     "3:"
+		     ".machine pop;"
+		     : "=&r" (tmp)
+		     : "r" (&paca->tcd_ptr->lock), "r" (token)
+		     : "memory");
+}
+
+static inline void book3e_tlb_unlock(void)
+{
+	struct paca_struct *paca = get_paca();
+
+	if (!cpu_has_feature(CPU_FTR_SMT))
+		return;
+
+	isync();
+	paca->tcd_ptr->lock = 0;
+}
+#else
+static inline int tlb1_next(void)
+{
+	int index, ncams;
+
+	ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
+
+	index = this_cpu_read(next_tlbcam_idx);
+
+	/* Just round-robin the entries and wrap when we hit the end */
+	if (unlikely(index == ncams - 1))
+		__this_cpu_write(next_tlbcam_idx, tlbcam_index);
+	else
+		__this_cpu_inc(next_tlbcam_idx);
+
+	return index;
+}
+
+static inline void book3e_tlb_lock(void)
+{
+}
+
+static inline void book3e_tlb_unlock(void)
+{
+}
+#endif
+
+static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid)
+{
+	int found = 0;
+
+	mtspr(SPRN_MAS6, pid << 16);
+	asm volatile(
+		"tlbsx	0,%1\n"
+		"mfspr	%0,0x271\n"
+		"srwi	%0,%0,31\n"
+		: "=&r"(found) : "r"(ea));
+
+	return found;
+}
+
+static void
+book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte)
+{
+	unsigned long mas1, mas2;
+	u64 mas7_3;
+	unsigned long psize, tsize, shift;
+	unsigned long flags;
+	struct mm_struct *mm;
+	int index;
+
+	if (unlikely(is_kernel_addr(ea)))
+		return;
+
+	mm = vma->vm_mm;
+
+	psize = vma_mmu_pagesize(vma);
+	shift = __ilog2(psize);
+	tsize = shift - 10;
+	/*
+	 * We can't be interrupted while we're setting up the MAS
+	 * registers or after we've confirmed that no tlb exists.
+	 */
+	local_irq_save(flags);
+
+	book3e_tlb_lock();
+
+	if (unlikely(book3e_tlb_exists(ea, mm->context.id))) {
+		book3e_tlb_unlock();
+		local_irq_restore(flags);
+		return;
+	}
+
+	/* We have to use the CAM(TLB1) on FSL parts for hugepages */
+	index = tlb1_next();
+	mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1));
+
+	mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize);
+	mas2 = ea & ~((1UL << shift) - 1);
+	mas2 |= (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK;
+	mas7_3 = (u64)pte_pfn(pte) << PAGE_SHIFT;
+	mas7_3 |= (pte_val(pte) >> PTE_BAP_SHIFT) & MAS3_BAP_MASK;
+	if (!pte_dirty(pte))
+		mas7_3 &= ~(MAS3_SW|MAS3_UW);
+
+	mtspr(SPRN_MAS1, mas1);
+	mtspr(SPRN_MAS2, mas2);
+
+	if (mmu_has_feature(MMU_FTR_BIG_PHYS))
+		mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
+	mtspr(SPRN_MAS3, lower_32_bits(mas7_3));
+
+	asm volatile ("tlbwe");
+
+	book3e_tlb_unlock();
+	local_irq_restore(flags);
+}
+
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ *
+ * This must always be called with the pte lock held.
+ */
+void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
+		unsigned long address, pte_t *ptep, unsigned int nr)
+{
+	if (is_vm_hugetlb_page(vma))
+		book3e_hugetlb_preload(vma, address, *ptep);
+}
+
+void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+	struct hstate *hstate = hstate_file(vma->vm_file);
+	unsigned long tsize = huge_page_shift(hstate) - 10;
+
+	__flush_tlb_page(vma->vm_mm, vmaddr, tsize, 0);
+}
diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
new file mode 100644
index 0000000000..2fb3edafe9
--- /dev/null
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (C) 2019 Jason Yan <yanaijie@huawei.com>
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/memblock.h>
+#include <linux/libfdt.h>
+#include <linux/crash_core.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <asm/cacheflush.h>
+#include <asm/kdump.h>
+#include <mm/mmu_decl.h>
+
+struct regions {
+	unsigned long pa_start;
+	unsigned long pa_end;
+	unsigned long kernel_size;
+	unsigned long dtb_start;
+	unsigned long dtb_end;
+	unsigned long initrd_start;
+	unsigned long initrd_end;
+	unsigned long crash_start;
+	unsigned long crash_end;
+	int reserved_mem;
+	int reserved_mem_addr_cells;
+	int reserved_mem_size_cells;
+};
+
+struct regions __initdata regions;
+
+static __init void kaslr_get_cmdline(void *fdt)
+{
+	early_init_dt_scan_chosen(boot_command_line);
+}
+
+static unsigned long __init rotate_xor(unsigned long hash, const void *area,
+				       size_t size)
+{
+	size_t i;
+	const unsigned long *ptr = area;
+
+	for (i = 0; i < size / sizeof(hash); i++) {
+		/* Rotate by odd number of bits and XOR. */
+		hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
+		hash ^= ptr[i];
+	}
+
+	return hash;
+}
+
+/* Attempt to create a simple starting entropy. This can make it defferent for
+ * every build but it is still not enough. Stronger entropy should
+ * be added to make it change for every boot.
+ */
+static unsigned long __init get_boot_seed(void *fdt)
+{
+	unsigned long hash = 0;
+
+	/* build-specific string for starting entropy. */
+	hash = rotate_xor(hash, linux_banner, strlen(linux_banner));
+	hash = rotate_xor(hash, fdt, fdt_totalsize(fdt));
+
+	return hash;
+}
+
+static __init u64 get_kaslr_seed(void *fdt)
+{
+	int node, len;
+	fdt64_t *prop;
+	u64 ret;
+
+	node = fdt_path_offset(fdt, "/chosen");
+	if (node < 0)
+		return 0;
+
+	prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+	if (!prop || len != sizeof(u64))
+		return 0;
+
+	ret = fdt64_to_cpu(*prop);
+	*prop = 0;
+	return ret;
+}
+
+static __init bool regions_overlap(u32 s1, u32 e1, u32 s2, u32 e2)
+{
+	return e1 >= s2 && e2 >= s1;
+}
+
+static __init bool overlaps_reserved_region(const void *fdt, u32 start,
+					    u32 end)
+{
+	int subnode, len, i;
+	u64 base, size;
+
+	/* check for overlap with /memreserve/ entries */
+	for (i = 0; i < fdt_num_mem_rsv(fdt); i++) {
+		if (fdt_get_mem_rsv(fdt, i, &base, &size) < 0)
+			continue;
+		if (regions_overlap(start, end, base, base + size))
+			return true;
+	}
+
+	if (regions.reserved_mem < 0)
+		return false;
+
+	/* check for overlap with static reservations in /reserved-memory */
+	for (subnode = fdt_first_subnode(fdt, regions.reserved_mem);
+	     subnode >= 0;
+	     subnode = fdt_next_subnode(fdt, subnode)) {
+		const fdt32_t *reg;
+		u64 rsv_end;
+
+		len = 0;
+		reg = fdt_getprop(fdt, subnode, "reg", &len);
+		while (len >= (regions.reserved_mem_addr_cells +
+			       regions.reserved_mem_size_cells)) {
+			base = fdt32_to_cpu(reg[0]);
+			if (regions.reserved_mem_addr_cells == 2)
+				base = (base << 32) | fdt32_to_cpu(reg[1]);
+
+			reg += regions.reserved_mem_addr_cells;
+			len -= 4 * regions.reserved_mem_addr_cells;
+
+			size = fdt32_to_cpu(reg[0]);
+			if (regions.reserved_mem_size_cells == 2)
+				size = (size << 32) | fdt32_to_cpu(reg[1]);
+
+			reg += regions.reserved_mem_size_cells;
+			len -= 4 * regions.reserved_mem_size_cells;
+
+			if (base >= regions.pa_end)
+				continue;
+
+			rsv_end = min(base + size, (u64)U32_MAX);
+
+			if (regions_overlap(start, end, base, rsv_end))
+				return true;
+		}
+	}
+	return false;
+}
+
+static __init bool overlaps_region(const void *fdt, u32 start,
+				   u32 end)
+{
+	if (regions_overlap(start, end, __pa(_stext), __pa(_end)))
+		return true;
+
+	if (regions_overlap(start, end, regions.dtb_start,
+			    regions.dtb_end))
+		return true;
+
+	if (regions_overlap(start, end, regions.initrd_start,
+			    regions.initrd_end))
+		return true;
+
+	if (regions_overlap(start, end, regions.crash_start,
+			    regions.crash_end))
+		return true;
+
+	return overlaps_reserved_region(fdt, start, end);
+}
+
+static void __init get_crash_kernel(void *fdt, unsigned long size)
+{
+#ifdef CONFIG_CRASH_CORE
+	unsigned long long crash_size, crash_base;
+	int ret;
+
+	ret = parse_crashkernel(boot_command_line, size, &crash_size,
+				&crash_base);
+	if (ret != 0 || crash_size == 0)
+		return;
+	if (crash_base == 0)
+		crash_base = KDUMP_KERNELBASE;
+
+	regions.crash_start = (unsigned long)crash_base;
+	regions.crash_end = (unsigned long)(crash_base + crash_size);
+
+	pr_debug("crash_base=0x%llx crash_size=0x%llx\n", crash_base, crash_size);
+#endif
+}
+
+static void __init get_initrd_range(void *fdt)
+{
+	u64 start, end;
+	int node, len;
+	const __be32 *prop;
+
+	node = fdt_path_offset(fdt, "/chosen");
+	if (node < 0)
+		return;
+
+	prop = fdt_getprop(fdt, node, "linux,initrd-start", &len);
+	if (!prop)
+		return;
+	start = of_read_number(prop, len / 4);
+
+	prop = fdt_getprop(fdt, node, "linux,initrd-end", &len);
+	if (!prop)
+		return;
+	end = of_read_number(prop, len / 4);
+
+	regions.initrd_start = (unsigned long)start;
+	regions.initrd_end = (unsigned long)end;
+
+	pr_debug("initrd_start=0x%llx  initrd_end=0x%llx\n", start, end);
+}
+
+static __init unsigned long get_usable_address(const void *fdt,
+					       unsigned long start,
+					       unsigned long offset)
+{
+	unsigned long pa;
+	unsigned long pa_end;
+
+	for (pa = offset; (long)pa > (long)start; pa -= SZ_16K) {
+		pa_end = pa + regions.kernel_size;
+		if (overlaps_region(fdt, pa, pa_end))
+			continue;
+
+		return pa;
+	}
+	return 0;
+}
+
+static __init void get_cell_sizes(const void *fdt, int node, int *addr_cells,
+				  int *size_cells)
+{
+	const int *prop;
+	int len;
+
+	/*
+	 * Retrieve the #address-cells and #size-cells properties
+	 * from the 'node', or use the default if not provided.
+	 */
+	*addr_cells = *size_cells = 1;
+
+	prop = fdt_getprop(fdt, node, "#address-cells", &len);
+	if (len == 4)
+		*addr_cells = fdt32_to_cpu(*prop);
+	prop = fdt_getprop(fdt, node, "#size-cells", &len);
+	if (len == 4)
+		*size_cells = fdt32_to_cpu(*prop);
+}
+
+static unsigned long __init kaslr_legal_offset(void *dt_ptr, unsigned long index,
+					       unsigned long offset)
+{
+	unsigned long koffset = 0;
+	unsigned long start;
+
+	while ((long)index >= 0) {
+		offset = memstart_addr + index * SZ_64M + offset;
+		start = memstart_addr + index * SZ_64M;
+		koffset = get_usable_address(dt_ptr, start, offset);
+		if (koffset)
+			break;
+		index--;
+	}
+
+	if (koffset != 0)
+		koffset -= memstart_addr;
+
+	return koffset;
+}
+
+static inline __init bool kaslr_disabled(void)
+{
+	return strstr(boot_command_line, "nokaslr") != NULL;
+}
+
+static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size,
+						  unsigned long kernel_sz)
+{
+	unsigned long offset, random;
+	unsigned long ram, linear_sz;
+	u64 seed;
+	unsigned long index;
+
+	kaslr_get_cmdline(dt_ptr);
+	if (kaslr_disabled())
+		return 0;
+
+	random = get_boot_seed(dt_ptr);
+
+	seed = get_tb() << 32;
+	seed ^= get_tb();
+	random = rotate_xor(random, &seed, sizeof(seed));
+
+	/*
+	 * Retrieve (and wipe) the seed from the FDT
+	 */
+	seed = get_kaslr_seed(dt_ptr);
+	if (seed)
+		random = rotate_xor(random, &seed, sizeof(seed));
+	else
+		pr_warn("KASLR: No safe seed for randomizing the kernel base.\n");
+
+	ram = min_t(phys_addr_t, __max_low_memory, size);
+	ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true, true);
+	linear_sz = min_t(unsigned long, ram, SZ_512M);
+
+	/* If the linear size is smaller than 64M, do not randomize */
+	if (linear_sz < SZ_64M)
+		return 0;
+
+	/* check for a reserved-memory node and record its cell sizes */
+	regions.reserved_mem = fdt_path_offset(dt_ptr, "/reserved-memory");
+	if (regions.reserved_mem >= 0)
+		get_cell_sizes(dt_ptr, regions.reserved_mem,
+			       &regions.reserved_mem_addr_cells,
+			       &regions.reserved_mem_size_cells);
+
+	regions.pa_start = memstart_addr;
+	regions.pa_end = memstart_addr + linear_sz;
+	regions.dtb_start = __pa(dt_ptr);
+	regions.dtb_end = __pa(dt_ptr) + fdt_totalsize(dt_ptr);
+	regions.kernel_size = kernel_sz;
+
+	get_initrd_range(dt_ptr);
+	get_crash_kernel(dt_ptr, ram);
+
+	/*
+	 * Decide which 64M we want to start
+	 * Only use the low 8 bits of the random seed
+	 */
+	index = random & 0xFF;
+	index %= linear_sz / SZ_64M;
+
+	/* Decide offset inside 64M */
+	offset = random % (SZ_64M - kernel_sz);
+	offset = round_down(offset, SZ_16K);
+
+	return kaslr_legal_offset(dt_ptr, index, offset);
+}
+
+/*
+ * To see if we need to relocate the kernel to a random offset
+ * void *dt_ptr - address of the device tree
+ * phys_addr_t size - size of the first memory block
+ */
+notrace void __init kaslr_early_init(void *dt_ptr, phys_addr_t size)
+{
+	unsigned long tlb_virt;
+	phys_addr_t tlb_phys;
+	unsigned long offset;
+	unsigned long kernel_sz;
+
+	kernel_sz = (unsigned long)_end - (unsigned long)_stext;
+
+	offset = kaslr_choose_location(dt_ptr, size, kernel_sz);
+	if (offset == 0)
+		return;
+
+	kernstart_virt_addr += offset;
+	kernstart_addr += offset;
+
+	is_second_reloc = 1;
+
+	if (offset >= SZ_64M) {
+		tlb_virt = round_down(kernstart_virt_addr, SZ_64M);
+		tlb_phys = round_down(kernstart_addr, SZ_64M);
+
+		/* Create kernel map to relocate in */
+		create_kaslr_tlb_entry(1, tlb_virt, tlb_phys);
+	}
+
+	/* Copy the kernel to it's new location and run */
+	memcpy((void *)kernstart_virt_addr, (void *)_stext, kernel_sz);
+	flush_icache_range(kernstart_virt_addr, kernstart_virt_addr + kernel_sz);
+
+	reloc_kernel_entry(dt_ptr, kernstart_virt_addr);
+}
+
+void __init kaslr_late_init(void)
+{
+	/* If randomized, clear the original kernel */
+	if (kernstart_virt_addr != KERNELBASE) {
+		unsigned long kernel_sz;
+
+		kernel_sz = (unsigned long)_end - kernstart_virt_addr;
+		memzero_explicit((void *)KERNELBASE, kernel_sz);
+	}
+}
diff --git a/arch/powerpc/mm/nohash/kup.c b/arch/powerpc/mm/nohash/kup.c
new file mode 100644
index 0000000000..e1f7de2e54
--- /dev/null
+++ b/arch/powerpc/mm/nohash/kup.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains the routines for initializing kernel userspace protection
+ */
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/printk.h>
+#include <linux/smp.h>
+
+#include <asm/kup.h>
+#include <asm/smp.h>
+
+#ifdef CONFIG_PPC_KUAP
+void setup_kuap(bool disabled)
+{
+	if (disabled) {
+		if (IS_ENABLED(CONFIG_40x))
+			disable_kuep = true;
+		if (smp_processor_id() == boot_cpuid)
+			cur_cpu_spec->mmu_features &= ~MMU_FTR_KUAP;
+		return;
+	}
+
+	pr_info("Activating Kernel Userspace Access Protection\n");
+
+	prevent_user_access(KUAP_READ_WRITE);
+}
+#endif
diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c
new file mode 100644
index 0000000000..ccd5819b1b
--- /dev/null
+++ b/arch/powerpc/mm/nohash/mmu_context.c
@@ -0,0 +1,425 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains the routines for handling the MMU on those
+ * PowerPC implementations where the MMU is not using the hash
+ * table, such as 8xx, 4xx, BookE's etc...
+ *
+ * Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org>
+ *                IBM Corp.
+ *
+ *  Derived from previous arch/powerpc/mm/mmu_context.c
+ *  and arch/powerpc/include/asm/mmu_context.h
+ *
+ * TODO:
+ *
+ *   - The global context lock will not scale very well
+ *   - The maps should be dynamically allocated to allow for processors
+ *     that support more PID bits at runtime
+ *   - Implement flush_tlb_mm() by making the context stale and picking
+ *     a new one
+ *   - More aggressively clear stale map bits and maybe find some way to
+ *     also clear mm->cpu_vm_mask bits when processes are migrated
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/memblock.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/slab.h>
+
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+#include <asm/smp.h>
+#include <asm/kup.h>
+
+#include <mm/mmu_decl.h>
+
+/*
+ * Room for two PTE table pointers, usually the kernel and current user
+ * pointer to their respective root page table (pgdir).
+ */
+void *abatron_pteptrs[2];
+
+/*
+ * The MPC8xx has only 16 contexts. We rotate through them on each task switch.
+ * A better way would be to keep track of tasks that own contexts, and implement
+ * an LRU usage. That way very active tasks don't always have to pay the TLB
+ * reload overhead. The kernel pages are mapped shared, so the kernel can run on
+ * behalf of any task that makes a kernel entry. Shared does not mean they are
+ * not protected, just that the ASID comparison is not performed. -- Dan
+ *
+ * The IBM4xx has 256 contexts, so we can just rotate through these as a way of
+ * "switching" contexts. If the TID of the TLB is zero, the PID/TID comparison
+ * is disabled, so we can use a TID of zero to represent all kernel pages as
+ * shared among all contexts. -- Dan
+ *
+ * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We should
+ * normally never have to steal though the facility is present if needed.
+ * -- BenH
+ */
+#define FIRST_CONTEXT 1
+#if defined(CONFIG_PPC_8xx)
+#define LAST_CONTEXT 16
+#elif defined(CONFIG_PPC_47x)
+#define LAST_CONTEXT 65535
+#else
+#define LAST_CONTEXT 255
+#endif
+
+static unsigned int next_context, nr_free_contexts;
+static unsigned long *context_map;
+static unsigned long *stale_map[NR_CPUS];
+static struct mm_struct **context_mm;
+static DEFINE_RAW_SPINLOCK(context_lock);
+
+#define CTX_MAP_SIZE	\
+	(sizeof(unsigned long) * (LAST_CONTEXT / BITS_PER_LONG + 1))
+
+
+/* Steal a context from a task that has one at the moment.
+ *
+ * This is used when we are running out of available PID numbers
+ * on the processors.
+ *
+ * This isn't an LRU system, it just frees up each context in
+ * turn (sort-of pseudo-random replacement :).  This would be the
+ * place to implement an LRU scheme if anyone was motivated to do it.
+ *  -- paulus
+ *
+ * For context stealing, we use a slightly different approach for
+ * SMP and UP. Basically, the UP one is simpler and doesn't use
+ * the stale map as we can just flush the local CPU
+ *  -- benh
+ */
+static unsigned int steal_context_smp(unsigned int id)
+{
+	struct mm_struct *mm;
+	unsigned int cpu, max, i;
+
+	max = LAST_CONTEXT - FIRST_CONTEXT;
+
+	/* Attempt to free next_context first and then loop until we manage */
+	while (max--) {
+		/* Pick up the victim mm */
+		mm = context_mm[id];
+
+		/* We have a candidate victim, check if it's active, on SMP
+		 * we cannot steal active contexts
+		 */
+		if (mm->context.active) {
+			id++;
+			if (id > LAST_CONTEXT)
+				id = FIRST_CONTEXT;
+			continue;
+		}
+
+		/* Mark this mm has having no context anymore */
+		mm->context.id = MMU_NO_CONTEXT;
+
+		/* Mark it stale on all CPUs that used this mm. For threaded
+		 * implementations, we set it on all threads on each core
+		 * represented in the mask. A future implementation will use
+		 * a core map instead but this will do for now.
+		 */
+		for_each_cpu(cpu, mm_cpumask(mm)) {
+			for (i = cpu_first_thread_sibling(cpu);
+			     i <= cpu_last_thread_sibling(cpu); i++) {
+				if (stale_map[i])
+					__set_bit(id, stale_map[i]);
+			}
+			cpu = i - 1;
+		}
+		return id;
+	}
+
+	/* This will happen if you have more CPUs than available contexts,
+	 * all we can do here is wait a bit and try again
+	 */
+	raw_spin_unlock(&context_lock);
+	cpu_relax();
+	raw_spin_lock(&context_lock);
+
+	/* This will cause the caller to try again */
+	return MMU_NO_CONTEXT;
+}
+
+static unsigned int steal_all_contexts(void)
+{
+	struct mm_struct *mm;
+	int cpu = smp_processor_id();
+	unsigned int id;
+
+	for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
+		/* Pick up the victim mm */
+		mm = context_mm[id];
+
+		/* Mark this mm as having no context anymore */
+		mm->context.id = MMU_NO_CONTEXT;
+		if (id != FIRST_CONTEXT) {
+			context_mm[id] = NULL;
+			__clear_bit(id, context_map);
+		}
+		if (IS_ENABLED(CONFIG_SMP))
+			__clear_bit(id, stale_map[cpu]);
+	}
+
+	/* Flush the TLB for all contexts (not to be used on SMP) */
+	_tlbil_all();
+
+	nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT;
+
+	return FIRST_CONTEXT;
+}
+
+/* Note that this will also be called on SMP if all other CPUs are
+ * offlined, which means that it may be called for cpu != 0. For
+ * this to work, we somewhat assume that CPUs that are onlined
+ * come up with a fully clean TLB (or are cleaned when offlined)
+ */
+static unsigned int steal_context_up(unsigned int id)
+{
+	struct mm_struct *mm;
+	int cpu = smp_processor_id();
+
+	/* Pick up the victim mm */
+	mm = context_mm[id];
+
+	/* Flush the TLB for that context */
+	local_flush_tlb_mm(mm);
+
+	/* Mark this mm has having no context anymore */
+	mm->context.id = MMU_NO_CONTEXT;
+
+	/* XXX This clear should ultimately be part of local_flush_tlb_mm */
+	if (IS_ENABLED(CONFIG_SMP))
+		__clear_bit(id, stale_map[cpu]);
+
+	return id;
+}
+
+static void set_context(unsigned long id, pgd_t *pgd)
+{
+	if (IS_ENABLED(CONFIG_PPC_8xx)) {
+		s16 offset = (s16)(__pa(swapper_pg_dir));
+
+		/*
+		 * Register M_TWB will contain base address of level 1 table minus the
+		 * lower part of the kernel PGDIR base address, so that all accesses to
+		 * level 1 table are done relative to lower part of kernel PGDIR base
+		 * address.
+		 */
+		mtspr(SPRN_M_TWB, __pa(pgd) - offset);
+
+		/* Update context */
+		mtspr(SPRN_M_CASID, id - 1);
+
+		/* sync */
+		mb();
+	} else if (kuap_is_disabled()) {
+		if (IS_ENABLED(CONFIG_40x))
+			mb();	/* sync */
+
+		mtspr(SPRN_PID, id);
+		isync();
+	}
+}
+
+void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
+			struct task_struct *tsk)
+{
+	unsigned int id;
+	unsigned int i, cpu = smp_processor_id();
+	unsigned long *map;
+
+	/* No lockless fast path .. yet */
+	raw_spin_lock(&context_lock);
+
+	if (IS_ENABLED(CONFIG_SMP)) {
+		/* Mark us active and the previous one not anymore */
+		next->context.active++;
+		if (prev) {
+			WARN_ON(prev->context.active < 1);
+			prev->context.active--;
+		}
+	}
+
+ again:
+
+	/* If we already have a valid assigned context, skip all that */
+	id = next->context.id;
+	if (likely(id != MMU_NO_CONTEXT))
+		goto ctxt_ok;
+
+	/* We really don't have a context, let's try to acquire one */
+	id = next_context;
+	if (id > LAST_CONTEXT)
+		id = FIRST_CONTEXT;
+	map = context_map;
+
+	/* No more free contexts, let's try to steal one */
+	if (nr_free_contexts == 0) {
+		if (num_online_cpus() > 1) {
+			id = steal_context_smp(id);
+			if (id == MMU_NO_CONTEXT)
+				goto again;
+			goto stolen;
+		}
+		if (IS_ENABLED(CONFIG_PPC_8xx))
+			id = steal_all_contexts();
+		else
+			id = steal_context_up(id);
+		goto stolen;
+	}
+	nr_free_contexts--;
+
+	/* We know there's at least one free context, try to find it */
+	while (__test_and_set_bit(id, map)) {
+		id = find_next_zero_bit(map, LAST_CONTEXT+1, id);
+		if (id > LAST_CONTEXT)
+			id = FIRST_CONTEXT;
+	}
+ stolen:
+	next_context = id + 1;
+	context_mm[id] = next;
+	next->context.id = id;
+
+ ctxt_ok:
+
+	/* If that context got marked stale on this CPU, then flush the
+	 * local TLB for it and unmark it before we use it
+	 */
+	if (IS_ENABLED(CONFIG_SMP) && test_bit(id, stale_map[cpu])) {
+		local_flush_tlb_mm(next);
+
+		/* XXX This clear should ultimately be part of local_flush_tlb_mm */
+		for (i = cpu_first_thread_sibling(cpu);
+		     i <= cpu_last_thread_sibling(cpu); i++) {
+			if (stale_map[i])
+				__clear_bit(id, stale_map[i]);
+		}
+	}
+
+	/* Flick the MMU and release lock */
+	if (IS_ENABLED(CONFIG_BDI_SWITCH))
+		abatron_pteptrs[1] = next->pgd;
+	set_context(id, next->pgd);
+#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP)
+	tsk->thread.pid = id;
+#endif
+	raw_spin_unlock(&context_lock);
+}
+
+/*
+ * Set up the context for a new address space.
+ */
+int init_new_context(struct task_struct *t, struct mm_struct *mm)
+{
+	mm->context.id = MMU_NO_CONTEXT;
+	mm->context.active = 0;
+	pte_frag_set(&mm->context, NULL);
+	return 0;
+}
+
+/*
+ * We're finished using the context for an address space.
+ */
+void destroy_context(struct mm_struct *mm)
+{
+	unsigned long flags;
+	unsigned int id;
+
+	if (mm->context.id == MMU_NO_CONTEXT)
+		return;
+
+	WARN_ON(mm->context.active != 0);
+
+	raw_spin_lock_irqsave(&context_lock, flags);
+	id = mm->context.id;
+	if (id != MMU_NO_CONTEXT) {
+		__clear_bit(id, context_map);
+		mm->context.id = MMU_NO_CONTEXT;
+		context_mm[id] = NULL;
+		nr_free_contexts++;
+	}
+	raw_spin_unlock_irqrestore(&context_lock, flags);
+}
+
+static int mmu_ctx_cpu_prepare(unsigned int cpu)
+{
+	/* We don't touch CPU 0 map, it's allocated at aboot and kept
+	 * around forever
+	 */
+	if (cpu == boot_cpuid)
+		return 0;
+
+	stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
+	return 0;
+}
+
+static int mmu_ctx_cpu_dead(unsigned int cpu)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	if (cpu == boot_cpuid)
+		return 0;
+
+	kfree(stale_map[cpu]);
+	stale_map[cpu] = NULL;
+
+	/* We also clear the cpu_vm_mask bits of CPUs going away */
+	clear_tasks_mm_cpumask(cpu);
+#endif
+	return 0;
+}
+
+/*
+ * Initialize the context management stuff.
+ */
+void __init mmu_context_init(void)
+{
+	/* Mark init_mm as being active on all possible CPUs since
+	 * we'll get called with prev == init_mm the first time
+	 * we schedule on a given CPU
+	 */
+	init_mm.context.active = NR_CPUS;
+
+	/*
+	 * Allocate the maps used by context management
+	 */
+	context_map = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
+	if (!context_map)
+		panic("%s: Failed to allocate %zu bytes\n", __func__,
+		      CTX_MAP_SIZE);
+	context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1),
+				    SMP_CACHE_BYTES);
+	if (!context_mm)
+		panic("%s: Failed to allocate %zu bytes\n", __func__,
+		      sizeof(void *) * (LAST_CONTEXT + 1));
+	if (IS_ENABLED(CONFIG_SMP)) {
+		stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
+		if (!stale_map[boot_cpuid])
+			panic("%s: Failed to allocate %zu bytes\n", __func__,
+			      CTX_MAP_SIZE);
+
+		cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
+					  "powerpc/mmu/ctx:prepare",
+					  mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
+	}
+
+	printk(KERN_INFO
+	       "MMU: Allocated %zu bytes of context maps for %d contexts\n",
+	       2 * CTX_MAP_SIZE + (sizeof(void *) * (LAST_CONTEXT + 1)),
+	       LAST_CONTEXT - FIRST_CONTEXT + 1);
+
+	/*
+	 * Some processors have too few contexts to reserve one for
+	 * init_mm, and require using context 0 for a normal task.
+	 * Other processors reserve the use of context zero for the kernel.
+	 * This code assumes FIRST_CONTEXT < 32.
+	 */
+	context_map[0] = (1 << FIRST_CONTEXT) - 1;
+	next_context = FIRST_CONTEXT;
+	nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1;
+}
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
new file mode 100644
index 0000000000..5ffa0af432
--- /dev/null
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -0,0 +1,744 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains the routines for TLB flushing.
+ * On machines where the MMU does not use a hash table to store virtual to
+ * physical translations (ie, SW loaded TLBs or Book3E compilant processors,
+ * this does -not- include 603 however which shares the implementation with
+ * hash based processors)
+ *
+ *  -- BenH
+ *
+ * Copyright 2008,2009 Ben Herrenschmidt <benh@kernel.crashing.org>
+ *                     IBM Corp.
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/preempt.h>
+#include <linux/spinlock.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+#include <linux/hugetlb.h>
+
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#include <asm/code-patching.h>
+#include <asm/cputhreads.h>
+#include <asm/hugetlb.h>
+#include <asm/paca.h>
+
+#include <mm/mmu_decl.h>
+
+/*
+ * This struct lists the sw-supported page sizes.  The hardawre MMU may support
+ * other sizes not listed here.   The .ind field is only used on MMUs that have
+ * indirect page table entries.
+ */
+#ifdef CONFIG_PPC_E500
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
+	[MMU_PAGE_4K] = {
+		.shift	= 12,
+		.enc	= BOOK3E_PAGESZ_4K,
+	},
+	[MMU_PAGE_2M] = {
+		.shift	= 21,
+		.enc	= BOOK3E_PAGESZ_2M,
+	},
+	[MMU_PAGE_4M] = {
+		.shift	= 22,
+		.enc	= BOOK3E_PAGESZ_4M,
+	},
+	[MMU_PAGE_16M] = {
+		.shift	= 24,
+		.enc	= BOOK3E_PAGESZ_16M,
+	},
+	[MMU_PAGE_64M] = {
+		.shift	= 26,
+		.enc	= BOOK3E_PAGESZ_64M,
+	},
+	[MMU_PAGE_256M] = {
+		.shift	= 28,
+		.enc	= BOOK3E_PAGESZ_256M,
+	},
+	[MMU_PAGE_1G] = {
+		.shift	= 30,
+		.enc	= BOOK3E_PAGESZ_1GB,
+	},
+};
+
+static inline int mmu_get_tsize(int psize)
+{
+	return mmu_psize_defs[psize].enc;
+}
+#else
+static inline int mmu_get_tsize(int psize)
+{
+	/* This isn't used on !Book3E for now */
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_PPC_8xx
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
+	[MMU_PAGE_4K] = {
+		.shift	= 12,
+	},
+	[MMU_PAGE_16K] = {
+		.shift	= 14,
+	},
+	[MMU_PAGE_512K] = {
+		.shift	= 19,
+	},
+	[MMU_PAGE_8M] = {
+		.shift	= 23,
+	},
+};
+#endif
+
+/* The variables below are currently only used on 64-bit Book3E
+ * though this will probably be made common with other nohash
+ * implementations at some point
+ */
+#ifdef CONFIG_PPC64
+
+int mmu_pte_psize;		/* Page size used for PTE pages */
+int mmu_vmemmap_psize;		/* Page size used for the virtual mem map */
+int book3e_htw_mode;		/* HW tablewalk?  Value is PPC_HTW_* */
+unsigned long linear_map_top;	/* Top of linear mapping */
+
+
+/*
+ * Number of bytes to add to SPRN_SPRG_TLB_EXFRAME on crit/mcheck/debug
+ * exceptions.  This is used for bolted and e6500 TLB miss handlers which
+ * do not modify this SPRG in the TLB miss code; for other TLB miss handlers,
+ * this is set to zero.
+ */
+int extlb_level_exc;
+
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_PPC_E500
+/* next_tlbcam_idx is used to round-robin tlbcam entry assignment */
+DEFINE_PER_CPU(int, next_tlbcam_idx);
+EXPORT_PER_CPU_SYMBOL(next_tlbcam_idx);
+#endif
+
+/*
+ * Base TLB flushing operations:
+ *
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes kernel pages
+ *
+ *  - local_* variants of page and mm only apply to the current
+ *    processor
+ */
+
+#ifndef CONFIG_PPC_8xx
+/*
+ * These are the base non-SMP variants of page and mm flushing
+ */
+void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	unsigned int pid;
+
+	preempt_disable();
+	pid = mm->context.id;
+	if (pid != MMU_NO_CONTEXT)
+		_tlbil_pid(pid);
+	preempt_enable();
+}
+EXPORT_SYMBOL(local_flush_tlb_mm);
+
+void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+			    int tsize, int ind)
+{
+	unsigned int pid;
+
+	preempt_disable();
+	pid = mm ? mm->context.id : 0;
+	if (pid != MMU_NO_CONTEXT)
+		_tlbil_va(vmaddr, pid, tsize, ind);
+	preempt_enable();
+}
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+	__local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
+			       mmu_get_tsize(mmu_virtual_psize), 0);
+}
+EXPORT_SYMBOL(local_flush_tlb_page);
+
+void local_flush_tlb_page_psize(struct mm_struct *mm,
+				unsigned long vmaddr, int psize)
+{
+	__local_flush_tlb_page(mm, vmaddr, mmu_get_tsize(psize), 0);
+}
+EXPORT_SYMBOL(local_flush_tlb_page_psize);
+
+#endif
+
+/*
+ * And here are the SMP non-local implementations
+ */
+#ifdef CONFIG_SMP
+
+static DEFINE_RAW_SPINLOCK(tlbivax_lock);
+
+struct tlb_flush_param {
+	unsigned long addr;
+	unsigned int pid;
+	unsigned int tsize;
+	unsigned int ind;
+};
+
+static void do_flush_tlb_mm_ipi(void *param)
+{
+	struct tlb_flush_param *p = param;
+
+	_tlbil_pid(p ? p->pid : 0);
+}
+
+static void do_flush_tlb_page_ipi(void *param)
+{
+	struct tlb_flush_param *p = param;
+
+	_tlbil_va(p->addr, p->pid, p->tsize, p->ind);
+}
+
+
+/* Note on invalidations and PID:
+ *
+ * We snapshot the PID with preempt disabled. At this point, it can still
+ * change either because:
+ * - our context is being stolen (PID -> NO_CONTEXT) on another CPU
+ * - we are invaliating some target that isn't currently running here
+ *   and is concurrently acquiring a new PID on another CPU
+ * - some other CPU is re-acquiring a lost PID for this mm
+ * etc...
+ *
+ * However, this shouldn't be a problem as we only guarantee
+ * invalidation of TLB entries present prior to this call, so we
+ * don't care about the PID changing, and invalidating a stale PID
+ * is generally harmless.
+ */
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	unsigned int pid;
+
+	preempt_disable();
+	pid = mm->context.id;
+	if (unlikely(pid == MMU_NO_CONTEXT))
+		goto no_context;
+	if (!mm_is_core_local(mm)) {
+		struct tlb_flush_param p = { .pid = pid };
+		/* Ignores smp_processor_id() even if set. */
+		smp_call_function_many(mm_cpumask(mm),
+				       do_flush_tlb_mm_ipi, &p, 1);
+	}
+	_tlbil_pid(pid);
+ no_context:
+	preempt_enable();
+}
+EXPORT_SYMBOL(flush_tlb_mm);
+
+void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+		      int tsize, int ind)
+{
+	struct cpumask *cpu_mask;
+	unsigned int pid;
+
+	/*
+	 * This function as well as __local_flush_tlb_page() must only be called
+	 * for user contexts.
+	 */
+	if (WARN_ON(!mm))
+		return;
+
+	preempt_disable();
+	pid = mm->context.id;
+	if (unlikely(pid == MMU_NO_CONTEXT))
+		goto bail;
+	cpu_mask = mm_cpumask(mm);
+	if (!mm_is_core_local(mm)) {
+		/* If broadcast tlbivax is supported, use it */
+		if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) {
+			int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL);
+			if (lock)
+				raw_spin_lock(&tlbivax_lock);
+			_tlbivax_bcast(vmaddr, pid, tsize, ind);
+			if (lock)
+				raw_spin_unlock(&tlbivax_lock);
+			goto bail;
+		} else {
+			struct tlb_flush_param p = {
+				.pid = pid,
+				.addr = vmaddr,
+				.tsize = tsize,
+				.ind = ind,
+			};
+			/* Ignores smp_processor_id() even if set in cpu_mask */
+			smp_call_function_many(cpu_mask,
+					       do_flush_tlb_page_ipi, &p, 1);
+		}
+	}
+	_tlbil_va(vmaddr, pid, tsize, ind);
+ bail:
+	preempt_enable();
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+#ifdef CONFIG_HUGETLB_PAGE
+	if (vma && is_vm_hugetlb_page(vma))
+		flush_hugetlb_page(vma, vmaddr);
+#endif
+
+	__flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
+			 mmu_get_tsize(mmu_virtual_psize), 0);
+}
+EXPORT_SYMBOL(flush_tlb_page);
+
+#endif /* CONFIG_SMP */
+
+/*
+ * Flush kernel TLB entries in the given range
+ */
+#ifndef CONFIG_PPC_8xx
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+#ifdef CONFIG_SMP
+	preempt_disable();
+	smp_call_function(do_flush_tlb_mm_ipi, NULL, 1);
+	_tlbil_pid(0);
+	preempt_enable();
+#else
+	_tlbil_pid(0);
+#endif
+}
+EXPORT_SYMBOL(flush_tlb_kernel_range);
+#endif
+
+/*
+ * Currently, for range flushing, we just do a full mm flush. This should
+ * be optimized based on a threshold on the size of the range, since
+ * some implementation can stack multiple tlbivax before a tlbsync but
+ * for now, we keep it that way
+ */
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		     unsigned long end)
+
+{
+	if (end - start == PAGE_SIZE && !(start & ~PAGE_MASK))
+		flush_tlb_page(vma, start);
+	else
+		flush_tlb_mm(vma->vm_mm);
+}
+EXPORT_SYMBOL(flush_tlb_range);
+
+void tlb_flush(struct mmu_gather *tlb)
+{
+	flush_tlb_mm(tlb->mm);
+}
+
+/*
+ * Below are functions specific to the 64-bit variant of Book3E though that
+ * may change in the future
+ */
+
+#ifdef CONFIG_PPC64
+
+/*
+ * Handling of virtual linear page tables or indirect TLB entries
+ * flushing when PTE pages are freed
+ */
+void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
+{
+	int tsize = mmu_psize_defs[mmu_pte_psize].enc;
+
+	if (book3e_htw_mode != PPC_HTW_NONE) {
+		unsigned long start = address & PMD_MASK;
+		unsigned long end = address + PMD_SIZE;
+		unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift;
+
+		/* This isn't the most optimal, ideally we would factor out the
+		 * while preempt & CPU mask mucking around, or even the IPI but
+		 * it will do for now
+		 */
+		while (start < end) {
+			__flush_tlb_page(tlb->mm, start, tsize, 1);
+			start += size;
+		}
+	} else {
+		unsigned long rmask = 0xf000000000000000ul;
+		unsigned long rid = (address & rmask) | 0x1000000000000000ul;
+		unsigned long vpte = address & ~rmask;
+
+		vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful;
+		vpte |= rid;
+		__flush_tlb_page(tlb->mm, vpte, tsize, 0);
+	}
+}
+
+static void __init setup_page_sizes(void)
+{
+	unsigned int tlb0cfg;
+	unsigned int tlb0ps;
+	unsigned int eptcfg;
+	int i, psize;
+
+#ifdef CONFIG_PPC_E500
+	unsigned int mmucfg = mfspr(SPRN_MMUCFG);
+	int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E);
+
+	if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
+		unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG);
+		unsigned int min_pg, max_pg;
+
+		min_pg = (tlb1cfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT;
+		max_pg = (tlb1cfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT;
+
+		for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+			struct mmu_psize_def *def;
+			unsigned int shift;
+
+			def = &mmu_psize_defs[psize];
+			shift = def->shift;
+
+			if (shift == 0 || shift & 1)
+				continue;
+
+			/* adjust to be in terms of 4^shift Kb */
+			shift = (shift - 10) >> 1;
+
+			if ((shift >= min_pg) && (shift <= max_pg))
+				def->flags |= MMU_PAGE_SIZE_DIRECT;
+		}
+
+		goto out;
+	}
+
+	if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) {
+		u32 tlb1cfg, tlb1ps;
+
+		tlb0cfg = mfspr(SPRN_TLB0CFG);
+		tlb1cfg = mfspr(SPRN_TLB1CFG);
+		tlb1ps = mfspr(SPRN_TLB1PS);
+		eptcfg = mfspr(SPRN_EPTCFG);
+
+		if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT))
+			book3e_htw_mode = PPC_HTW_E6500;
+
+		/*
+		 * We expect 4K subpage size and unrestricted indirect size.
+		 * The lack of a restriction on indirect size is a Freescale
+		 * extension, indicated by PSn = 0 but SPSn != 0.
+		 */
+		if (eptcfg != 2)
+			book3e_htw_mode = PPC_HTW_NONE;
+
+		for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+			struct mmu_psize_def *def = &mmu_psize_defs[psize];
+
+			if (!def->shift)
+				continue;
+
+			if (tlb1ps & (1U << (def->shift - 10))) {
+				def->flags |= MMU_PAGE_SIZE_DIRECT;
+
+				if (book3e_htw_mode && psize == MMU_PAGE_2M)
+					def->flags |= MMU_PAGE_SIZE_INDIRECT;
+			}
+		}
+
+		goto out;
+	}
+#endif
+
+	tlb0cfg = mfspr(SPRN_TLB0CFG);
+	tlb0ps = mfspr(SPRN_TLB0PS);
+	eptcfg = mfspr(SPRN_EPTCFG);
+
+	/* Look for supported direct sizes */
+	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+		struct mmu_psize_def *def = &mmu_psize_defs[psize];
+
+		if (tlb0ps & (1U << (def->shift - 10)))
+			def->flags |= MMU_PAGE_SIZE_DIRECT;
+	}
+
+	/* Indirect page sizes supported ? */
+	if ((tlb0cfg & TLBnCFG_IND) == 0 ||
+	    (tlb0cfg & TLBnCFG_PT) == 0)
+		goto out;
+
+	book3e_htw_mode = PPC_HTW_IBM;
+
+	/* Now, we only deal with one IND page size for each
+	 * direct size. Hopefully all implementations today are
+	 * unambiguous, but we might want to be careful in the
+	 * future.
+	 */
+	for (i = 0; i < 3; i++) {
+		unsigned int ps, sps;
+
+		sps = eptcfg & 0x1f;
+		eptcfg >>= 5;
+		ps = eptcfg & 0x1f;
+		eptcfg >>= 5;
+		if (!ps || !sps)
+			continue;
+		for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+			struct mmu_psize_def *def = &mmu_psize_defs[psize];
+
+			if (ps == (def->shift - 10))
+				def->flags |= MMU_PAGE_SIZE_INDIRECT;
+			if (sps == (def->shift - 10))
+				def->ind = ps + 10;
+		}
+	}
+
+out:
+	/* Cleanup array and print summary */
+	pr_info("MMU: Supported page sizes\n");
+	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+		struct mmu_psize_def *def = &mmu_psize_defs[psize];
+		const char *__page_type_names[] = {
+			"unsupported",
+			"direct",
+			"indirect",
+			"direct & indirect"
+		};
+		if (def->flags == 0) {
+			def->shift = 0;	
+			continue;
+		}
+		pr_info("  %8ld KB as %s\n", 1ul << (def->shift - 10),
+			__page_type_names[def->flags & 0x3]);
+	}
+}
+
+static void __init setup_mmu_htw(void)
+{
+	/*
+	 * If we want to use HW tablewalk, enable it by patching the TLB miss
+	 * handlers to branch to the one dedicated to it.
+	 */
+
+	switch (book3e_htw_mode) {
+	case PPC_HTW_IBM:
+		patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e);
+		patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e);
+		break;
+#ifdef CONFIG_PPC_E500
+	case PPC_HTW_E6500:
+		extlb_level_exc = EX_TLB_SIZE;
+		patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e);
+		patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e);
+		break;
+#endif
+	}
+	pr_info("MMU: Book3E HW tablewalk %s\n",
+		book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported");
+}
+
+/*
+ * Early initialization of the MMU TLB code
+ */
+static void early_init_this_mmu(void)
+{
+	unsigned int mas4;
+
+	/* Set MAS4 based on page table setting */
+
+	mas4 = 0x4 << MAS4_WIMGED_SHIFT;
+	switch (book3e_htw_mode) {
+	case PPC_HTW_E6500:
+		mas4 |= MAS4_INDD;
+		mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT;
+		mas4 |= MAS4_TLBSELD(1);
+		mmu_pte_psize = MMU_PAGE_2M;
+		break;
+
+	case PPC_HTW_IBM:
+		mas4 |= MAS4_INDD;
+		mas4 |=	BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT;
+		mmu_pte_psize = MMU_PAGE_1M;
+		break;
+
+	case PPC_HTW_NONE:
+		mas4 |=	BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;
+		mmu_pte_psize = mmu_virtual_psize;
+		break;
+	}
+	mtspr(SPRN_MAS4, mas4);
+
+#ifdef CONFIG_PPC_E500
+	if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
+		unsigned int num_cams;
+		bool map = true;
+
+		/* use a quarter of the TLBCAM for bolted linear map */
+		num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
+
+		/*
+		 * Only do the mapping once per core, or else the
+		 * transient mapping would cause problems.
+		 */
+#ifdef CONFIG_SMP
+		if (hweight32(get_tensr()) > 1)
+			map = false;
+#endif
+
+		if (map)
+			linear_map_top = map_mem_in_cams(linear_map_top,
+							 num_cams, false, true);
+	}
+#endif
+
+	/* A sync won't hurt us after mucking around with
+	 * the MMU configuration
+	 */
+	mb();
+}
+
+static void __init early_init_mmu_global(void)
+{
+	/* XXX This should be decided at runtime based on supported
+	 * page sizes in the TLB, but for now let's assume 16M is
+	 * always there and a good fit (which it probably is)
+	 *
+	 * Freescale booke only supports 4K pages in TLB0, so use that.
+	 */
+	if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
+		mmu_vmemmap_psize = MMU_PAGE_4K;
+	else
+		mmu_vmemmap_psize = MMU_PAGE_16M;
+
+	/* XXX This code only checks for TLB 0 capabilities and doesn't
+	 *     check what page size combos are supported by the HW. It
+	 *     also doesn't handle the case where a separate array holds
+	 *     the IND entries from the array loaded by the PT.
+	 */
+	/* Look for supported page sizes */
+	setup_page_sizes();
+
+	/* Look for HW tablewalk support */
+	setup_mmu_htw();
+
+#ifdef CONFIG_PPC_E500
+	if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
+		if (book3e_htw_mode == PPC_HTW_NONE) {
+			extlb_level_exc = EX_TLB_SIZE;
+			patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
+			patch_exception(0x1e0,
+				exc_instruction_tlb_miss_bolted_book3e);
+		}
+	}
+#endif
+
+	/* Set the global containing the top of the linear mapping
+	 * for use by the TLB miss code
+	 */
+	linear_map_top = memblock_end_of_DRAM();
+
+	ioremap_bot = IOREMAP_BASE;
+}
+
+static void __init early_mmu_set_memory_limit(void)
+{
+#ifdef CONFIG_PPC_E500
+	if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
+		/*
+		 * Limit memory so we dont have linear faults.
+		 * Unlike memblock_set_current_limit, which limits
+		 * memory available during early boot, this permanently
+		 * reduces the memory available to Linux.  We need to
+		 * do this because highmem is not supported on 64-bit.
+		 */
+		memblock_enforce_memory_limit(linear_map_top);
+	}
+#endif
+
+	memblock_set_current_limit(linear_map_top);
+}
+
+/* boot cpu only */
+void __init early_init_mmu(void)
+{
+	early_init_mmu_global();
+	early_init_this_mmu();
+	early_mmu_set_memory_limit();
+}
+
+void early_init_mmu_secondary(void)
+{
+	early_init_this_mmu();
+}
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	/* On non-FSL Embedded 64-bit, we adjust the RMA size to match
+	 * the bolted TLB entry. We know for now that only 1G
+	 * entries are supported though that may eventually
+	 * change.
+	 *
+	 * on FSL Embedded 64-bit, usually all RAM is bolted, but with
+	 * unusual memory sizes it's possible for some RAM to not be mapped
+	 * (such RAM is not used at all by Linux, since we don't support
+	 * highmem on 64-bit).  We limit ppc64_rma_size to what would be
+	 * mappable if this memblock is the only one.  Additional memblocks
+	 * can only increase, not decrease, the amount that ends up getting
+	 * mapped.  We still limit max to 1G even if we'll eventually map
+	 * more.  This is due to what the early init code is set up to do.
+	 *
+	 * We crop it to the size of the first MEMBLOCK to
+	 * avoid going over total available memory just in case...
+	 */
+#ifdef CONFIG_PPC_E500
+	if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
+		unsigned long linear_sz;
+		unsigned int num_cams;
+
+		/* use a quarter of the TLBCAM for bolted linear map */
+		num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
+
+		linear_sz = map_mem_in_cams(first_memblock_size, num_cams,
+					    true, true);
+
+		ppc64_rma_size = min_t(u64, linear_sz, 0x40000000);
+	} else
+#endif
+		ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
+
+	/* Finally limit subsequent allocations */
+	memblock_set_current_limit(first_memblock_base + ppc64_rma_size);
+}
+#else /* ! CONFIG_PPC64 */
+void __init early_init_mmu(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (IS_ENABLED(CONFIG_PPC_47x) && IS_ENABLED(CONFIG_SMP) &&
+	    of_get_flat_dt_prop(root, "cooperative-partition", NULL))
+		mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST);
+}
+#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S
new file mode 100644
index 0000000000..e1199608ff
--- /dev/null
+++ b/arch/powerpc/mm/nohash/tlb_low.S
@@ -0,0 +1,472 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains low-level functions for performing various
+ * types of TLB invalidations on various processors with no hash
+ * table.
+ *
+ * This file implements the following functions for all no-hash
+ * processors. Some aren't implemented for some variants. Some
+ * are inline in tlbflush.h
+ *
+ *	- tlbil_va
+ *	- tlbil_pid
+ *	- tlbil_all
+ *	- tlbivax_bcast
+ *
+ * Code mostly moved over from misc_32.S
+ *
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Partially rewritten by Cort Dougan (cort@cs.nmt.edu)
+ * Paul Mackerras, Kumar Gala and Benjamin Herrenschmidt.
+ */
+
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/processor.h>
+#include <asm/bug.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+
+#if defined(CONFIG_40x)
+
+/*
+ * 40x implementation needs only tlbil_va
+ */
+_GLOBAL(__tlbil_va)
+	/* We run the search with interrupts disabled because we have to change
+	 * the PID and I don't want to preempt when that happens.
+	 */
+	mfmsr	r5
+	mfspr	r6,SPRN_PID
+	wrteei	0
+	mtspr	SPRN_PID,r4
+	tlbsx.	r3, 0, r3
+	mtspr	SPRN_PID,r6
+	wrtee	r5
+	bne	1f
+	sync
+	/* There are only 64 TLB entries, so r3 < 64, which means bit 25 is
+	 * clear. Since 25 is the V bit in the TLB_TAG, loading this value
+	 * will invalidate the TLB entry. */
+	tlbwe	r3, r3, TLB_TAG
+	isync
+1:	blr
+
+#elif defined(CONFIG_PPC_8xx)
+
+/*
+ * Nothing to do for 8xx, everything is inline
+ */
+
+#elif defined(CONFIG_44x) /* Includes 47x */
+
+/*
+ * 440 implementation uses tlbsx/we for tlbil_va and a full sweep
+ * of the TLB for everything else.
+ */
+_GLOBAL(__tlbil_va)
+	mfspr	r5,SPRN_MMUCR
+	mfmsr   r10
+
+	/*
+	 * We write 16 bits of STID since 47x supports that much, we
+	 * will never be passed out of bounds values on 440 (hopefully)
+	 */
+	rlwimi  r5,r4,0,16,31
+
+	/* We have to run the search with interrupts disabled, otherwise
+	 * an interrupt which causes a TLB miss can clobber the MMUCR
+	 * between the mtspr and the tlbsx.
+	 *
+	 * Critical and Machine Check interrupts take care of saving
+	 * and restoring MMUCR, so only normal interrupts have to be
+	 * taken care of.
+	 */
+	wrteei	0
+	mtspr	SPRN_MMUCR,r5
+	tlbsx.	r6,0,r3
+	bne	10f
+	sync
+#ifndef CONFIG_PPC_47x
+	/* On 440 There are only 64 TLB entries, so r3 < 64, which means bit
+	 * 22, is clear.  Since 22 is the V bit in the TLB_PAGEID, loading this
+	 * value will invalidate the TLB entry.
+	 */
+	tlbwe	r6,r6,PPC44x_TLB_PAGEID
+#else
+	oris	r7,r6,0x8000	/* specify way explicitly */
+	clrrwi	r4,r3,12	/* get an EPN for the hashing with V = 0 */
+	ori	r4,r4,PPC47x_TLBE_SIZE
+	tlbwe   r4,r7,0		/* write it */
+#endif /* !CONFIG_PPC_47x */
+	isync
+10:	wrtee	r10
+	blr
+
+_GLOBAL(_tlbil_all)
+_GLOBAL(_tlbil_pid)
+#ifndef CONFIG_PPC_47x
+	li	r3,0
+	sync
+
+	/* Load high watermark */
+	lis	r4,tlb_44x_hwater@ha
+	lwz	r5,tlb_44x_hwater@l(r4)
+
+1:	tlbwe	r3,r3,PPC44x_TLB_PAGEID
+	addi	r3,r3,1
+	cmpw	0,r3,r5
+	ble	1b
+
+	isync
+	blr
+#else
+	/* 476 variant. There's not simple way to do this, hopefully we'll
+	 * try to limit the amount of such full invalidates
+	 */
+	mfmsr	r11		/* Interrupts off */
+	wrteei	0
+	li	r3,-1		/* Current set */
+	lis	r10,tlb_47x_boltmap@h
+	ori	r10,r10,tlb_47x_boltmap@l
+	lis	r7,0x8000	/* Specify way explicitly */
+
+	b	9f		/* For each set */
+
+1:	li	r9,4		/* Number of ways */
+	li	r4,0		/* Current way */
+	li	r6,0		/* Default entry value 0 */
+	andi.	r0,r8,1		/* Check if way 0 is bolted */
+	mtctr	r9		/* Load way counter */
+	bne-	3f		/* Bolted, skip loading it */
+
+2:	/* For each way */
+	or	r5,r3,r4	/* Make way|index for tlbre */
+	rlwimi	r5,r5,16,8,15	/* Copy index into position */
+	tlbre	r6,r5,0		/* Read entry */
+3:	addis	r4,r4,0x2000	/* Next way */
+	andi.	r0,r6,PPC47x_TLB0_VALID /* Valid entry ? */
+	beq	4f		/* Nope, skip it */
+	rlwimi	r7,r5,0,1,2	/* Insert way number */
+	rlwinm	r6,r6,0,21,19	/* Clear V */
+	tlbwe   r6,r7,0		/* Write it */
+4:	bdnz	2b		/* Loop for each way */
+	srwi	r8,r8,1		/* Next boltmap bit */
+9:	cmpwi	cr1,r3,255	/* Last set done ? */
+	addi	r3,r3,1		/* Next set */
+	beq	cr1,1f		/* End of loop */
+	andi.	r0,r3,0x1f	/* Need to load a new boltmap word ? */
+	bne	1b		/* No, loop */
+	lwz	r8,0(r10)	/* Load boltmap entry */
+	addi	r10,r10,4	/* Next word */
+	b	1b		/* Then loop */
+1:	isync			/* Sync shadows */
+	wrtee	r11
+	blr
+#endif /* !CONFIG_PPC_47x */
+
+#ifdef CONFIG_PPC_47x
+
+/*
+ * _tlbivax_bcast is only on 47x. We don't bother doing a runtime
+ * check though, it will blow up soon enough if we mistakenly try
+ * to use it on a 440.
+ */
+_GLOBAL(_tlbivax_bcast)
+	mfspr	r5,SPRN_MMUCR
+	mfmsr	r10
+	rlwimi	r5,r4,0,16,31
+	wrteei	0
+	mtspr	SPRN_MMUCR,r5
+	isync
+	PPC_TLBIVAX(0, R3)
+	isync
+	mbar
+	tlbsync
+BEGIN_FTR_SECTION
+	b	1f
+END_FTR_SECTION_IFSET(CPU_FTR_476_DD2)
+	sync
+	wrtee	r10
+	blr
+/*
+ * DD2 HW could hang if in instruction fetch happens before msync completes.
+ * Touch enough instruction cache lines to ensure cache hits
+ */
+1:	mflr	r9
+	bcl	20,31,$+4
+2:	mflr	r6
+	li	r7,32
+	PPC_ICBT(0,R6,R7)		/* touch next cache line */
+	add	r6,r6,r7
+	PPC_ICBT(0,R6,R7)		/* touch next cache line */
+	add	r6,r6,r7
+	PPC_ICBT(0,R6,R7)		/* touch next cache line */
+	sync
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	mtlr	r9
+	wrtee	r10
+	blr
+#endif /* CONFIG_PPC_47x */
+
+#elif defined(CONFIG_PPC_85xx)
+/*
+ * FSL BookE implementations.
+ *
+ * Since feature sections are using _SECTION_ELSE we need
+ * to have the larger code path before the _SECTION_ELSE
+ */
+
+/*
+ * Flush MMU TLB on the local processor
+ */
+_GLOBAL(_tlbil_all)
+BEGIN_MMU_FTR_SECTION
+	li	r3,(MMUCSR0_TLBFI)@l
+	mtspr	SPRN_MMUCSR0, r3
+1:
+	mfspr	r3,SPRN_MMUCSR0
+	andi.	r3,r3,MMUCSR0_TLBFI@l
+	bne	1b
+MMU_FTR_SECTION_ELSE
+	PPC_TLBILX_ALL(0,R0)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
+	msync
+	isync
+	blr
+
+_GLOBAL(_tlbil_pid)
+BEGIN_MMU_FTR_SECTION
+	slwi	r3,r3,16
+	mfmsr	r10
+	wrteei	0
+	mfspr	r4,SPRN_MAS6	/* save MAS6 */
+	mtspr	SPRN_MAS6,r3
+	PPC_TLBILX_PID(0,R0)
+	mtspr	SPRN_MAS6,r4	/* restore MAS6 */
+	wrtee	r10
+MMU_FTR_SECTION_ELSE
+	li	r3,(MMUCSR0_TLBFI)@l
+	mtspr	SPRN_MMUCSR0, r3
+1:
+	mfspr	r3,SPRN_MMUCSR0
+	andi.	r3,r3,MMUCSR0_TLBFI@l
+	bne	1b
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBILX)
+	msync
+	isync
+	blr
+
+/*
+ * Flush MMU TLB for a particular address, but only on the local processor
+ * (no broadcast)
+ */
+_GLOBAL(__tlbil_va)
+	mfmsr	r10
+	wrteei	0
+	slwi	r4,r4,16
+	ori	r4,r4,(MAS6_ISIZE(BOOK3E_PAGESZ_4K))@l
+	mtspr	SPRN_MAS6,r4		/* assume AS=0 for now */
+BEGIN_MMU_FTR_SECTION
+	tlbsx	0,r3
+	mfspr	r4,SPRN_MAS1		/* check valid */
+	andis.	r3,r4,MAS1_VALID@h
+	beq	1f
+	rlwinm	r4,r4,0,1,31
+	mtspr	SPRN_MAS1,r4
+	tlbwe
+MMU_FTR_SECTION_ELSE
+	PPC_TLBILX_VA(0,R3)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
+	msync
+	isync
+1:	wrtee	r10
+	blr
+#elif defined(CONFIG_PPC_BOOK3E_64)
+/*
+ * New Book3E (>= 2.06) implementation
+ *
+ * Note: We may be able to get away without the interrupt masking stuff
+ * if we save/restore MAS6 on exceptions that might modify it
+ */
+_GLOBAL(_tlbil_pid)
+	slwi	r4,r3,MAS6_SPID_SHIFT
+	mfmsr	r10
+	wrteei	0
+	mtspr	SPRN_MAS6,r4
+	PPC_TLBILX_PID(0,R0)
+	wrtee	r10
+	msync
+	isync
+	blr
+
+_GLOBAL(_tlbil_pid_noind)
+	slwi	r4,r3,MAS6_SPID_SHIFT
+	mfmsr	r10
+	ori	r4,r4,MAS6_SIND
+	wrteei	0
+	mtspr	SPRN_MAS6,r4
+	PPC_TLBILX_PID(0,R0)
+	wrtee	r10
+	msync
+	isync
+	blr
+
+_GLOBAL(_tlbil_all)
+	PPC_TLBILX_ALL(0,R0)
+	msync
+	isync
+	blr
+
+_GLOBAL(_tlbil_va)
+	mfmsr	r10
+	wrteei	0
+	cmpwi	cr0,r6,0
+	slwi	r4,r4,MAS6_SPID_SHIFT
+	rlwimi	r4,r5,MAS6_ISIZE_SHIFT,MAS6_ISIZE_MASK
+	beq	1f
+	rlwimi	r4,r6,MAS6_SIND_SHIFT,MAS6_SIND
+1:	mtspr	SPRN_MAS6,r4		/* assume AS=0 for now */
+	PPC_TLBILX_VA(0,R3)
+	msync
+	isync
+	wrtee	r10
+	blr
+
+_GLOBAL(_tlbivax_bcast)
+	mfmsr	r10
+	wrteei	0
+	cmpwi	cr0,r6,0
+	slwi	r4,r4,MAS6_SPID_SHIFT
+	rlwimi	r4,r5,MAS6_ISIZE_SHIFT,MAS6_ISIZE_MASK
+	beq	1f
+	rlwimi	r4,r6,MAS6_SIND_SHIFT,MAS6_SIND
+1:	mtspr	SPRN_MAS6,r4		/* assume AS=0 for now */
+	PPC_TLBIVAX(0,R3)
+	mbar
+	tlbsync
+	sync
+	wrtee	r10
+	blr
+#else
+#error Unsupported processor type !
+#endif
+
+#if defined(CONFIG_PPC_E500)
+/*
+ * extern void loadcam_entry(unsigned int index)
+ *
+ * Load TLBCAM[index] entry in to the L2 CAM MMU
+ * Must preserve r7, r8, r9, r10, r11, r12
+ */
+_GLOBAL(loadcam_entry)
+	mflr	r5
+	LOAD_REG_ADDR_PIC(r4, TLBCAM)
+	mtlr	r5
+	mulli	r5,r3,TLBCAM_SIZE
+	add	r3,r5,r4
+	lwz	r4,TLBCAM_MAS0(r3)
+	mtspr	SPRN_MAS0,r4
+	lwz	r4,TLBCAM_MAS1(r3)
+	mtspr	SPRN_MAS1,r4
+	PPC_LL	r4,TLBCAM_MAS2(r3)
+	mtspr	SPRN_MAS2,r4
+	lwz	r4,TLBCAM_MAS3(r3)
+	mtspr	SPRN_MAS3,r4
+BEGIN_MMU_FTR_SECTION
+	lwz	r4,TLBCAM_MAS7(r3)
+	mtspr	SPRN_MAS7,r4
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
+	isync
+	tlbwe
+	isync
+	blr
+
+/*
+ * Load multiple TLB entries at once, using an alternate-space
+ * trampoline so that we don't have to care about whether the same
+ * TLB entry maps us before and after.
+ *
+ * r3 = first entry to write
+ * r4 = number of entries to write
+ * r5 = temporary tlb entry (0 means no switch to AS1)
+ */
+_GLOBAL(loadcam_multi)
+	mflr	r8
+	/* Don't switch to AS=1 if already there */
+	mfmsr	r11
+	andi.	r11,r11,MSR_IS
+	bne	10f
+	mr.	r12, r5
+	beq	10f
+
+	/*
+	 * Set up temporary TLB entry that is the same as what we're
+	 * running from, but in AS=1.
+	 */
+	bcl	20,31,$+4
+1:	mflr	r6
+	tlbsx	0,r8
+	mfspr	r6,SPRN_MAS1
+	ori	r6,r6,MAS1_TS
+	mtspr	SPRN_MAS1,r6
+	mfspr	r6,SPRN_MAS0
+	rlwimi	r6,r5,MAS0_ESEL_SHIFT,MAS0_ESEL_MASK
+	mr	r7,r5
+	mtspr	SPRN_MAS0,r6
+	isync
+	tlbwe
+	isync
+
+	/* Switch to AS=1 */
+	mfmsr	r6
+	ori	r6,r6,MSR_IS|MSR_DS
+	mtmsr	r6
+	isync
+
+10:
+	mr	r9,r3
+	add	r10,r3,r4
+2:	bl	loadcam_entry
+	addi	r9,r9,1
+	cmpw	r9,r10
+	mr	r3,r9
+	blt	2b
+
+	/* Don't return to AS=0 if we were in AS=1 at function start */
+	andi.	r11,r11,MSR_IS
+	bne	3f
+	cmpwi	r12, 0
+	beq	3f
+
+	/* Return to AS=0 and clear the temporary entry */
+	mfmsr	r6
+	rlwinm.	r6,r6,0,~(MSR_IS|MSR_DS)
+	mtmsr	r6
+	isync
+
+	li	r6,0
+	mtspr	SPRN_MAS1,r6
+	rlwinm	r6,r7,MAS0_ESEL_SHIFT,MAS0_ESEL_MASK
+	oris	r6,r6,MAS0_TLBSEL(1)@h
+	mtspr	SPRN_MAS0,r6
+	isync
+	tlbwe
+	isync
+
+3:
+	mtlr	r8
+	blr
+#endif
diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S
new file mode 100644
index 0000000000..7e0b8fe1c2
--- /dev/null
+++ b/arch/powerpc/mm/nohash/tlb_low_64e.S
@@ -0,0 +1,1169 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Low level TLB miss handlers for Book3E
+ *
+ *  Copyright (C) 2008-2009
+ *      Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
+ */
+
+#include <linux/pgtable.h>
+#include <asm/processor.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cputable.h>
+#include <asm/exception-64e.h>
+#include <asm/ppc-opcode.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_booke_hv_asm.h>
+#include <asm/feature-fixups.h>
+
+#define VPTE_PMD_SHIFT	(PTE_INDEX_SIZE)
+#define VPTE_PUD_SHIFT	(VPTE_PMD_SHIFT + PMD_INDEX_SIZE)
+#define VPTE_PGD_SHIFT	(VPTE_PUD_SHIFT + PUD_INDEX_SIZE)
+#define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE)
+
+/**********************************************************************
+ *                                                                    *
+ * TLB miss handling for Book3E with a bolted linear mapping          *
+ * No virtual page table, no nested TLB misses                        *
+ *                                                                    *
+ **********************************************************************/
+
+/*
+ * Note that, unlike non-bolted handlers, TLB_EXFRAME is not
+ * modified by the TLB miss handlers themselves, since the TLB miss
+ * handler code will not itself cause a recursive TLB miss.
+ *
+ * TLB_EXFRAME will be modified when crit/mc/debug exceptions are
+ * entered/exited.
+ */
+.macro tlb_prolog_bolted intnum addr
+	mtspr	SPRN_SPRG_GEN_SCRATCH,r12
+	mfspr	r12,SPRN_SPRG_TLB_EXFRAME
+	std	r13,EX_TLB_R13(r12)
+	std	r10,EX_TLB_R10(r12)
+	mfspr	r13,SPRN_SPRG_PACA
+
+	mfcr	r10
+	std	r11,EX_TLB_R11(r12)
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+	mfspr	r11, SPRN_SRR1
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+#endif
+	DO_KVM	\intnum, SPRN_SRR1
+	std	r16,EX_TLB_R16(r12)
+	mfspr	r16,\addr		/* get faulting address */
+	std	r14,EX_TLB_R14(r12)
+	ld	r14,PACAPGD(r13)
+	std	r15,EX_TLB_R15(r12)
+	std	r10,EX_TLB_CR(r12)
+START_BTB_FLUSH_SECTION
+	mfspr r11, SPRN_SRR1
+	andi. r10,r11,MSR_PR
+	beq 1f
+	BTB_FLUSH(r10)
+1:
+END_BTB_FLUSH_SECTION
+	std	r7,EX_TLB_R7(r12)
+.endm
+
+.macro tlb_epilog_bolted
+	ld	r14,EX_TLB_CR(r12)
+	ld	r7,EX_TLB_R7(r12)
+	ld	r10,EX_TLB_R10(r12)
+	ld	r11,EX_TLB_R11(r12)
+	ld	r13,EX_TLB_R13(r12)
+	mtcr	r14
+	ld	r14,EX_TLB_R14(r12)
+	ld	r15,EX_TLB_R15(r12)
+	ld	r16,EX_TLB_R16(r12)
+	mfspr	r12,SPRN_SPRG_GEN_SCRATCH
+.endm
+
+/* Data TLB miss */
+	START_EXCEPTION(data_tlb_miss_bolted)
+	tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR
+
+	/* We need _PAGE_PRESENT and  _PAGE_ACCESSED set */
+
+	/* We do the user/kernel test for the PID here along with the RW test
+	 */
+	/* We pre-test some combination of permissions to avoid double
+	 * faults:
+	 *
+	 * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE
+	 * ESR_ST   is 0x00800000
+	 * _PAGE_BAP_SW is 0x00000010
+	 * So the shift is >> 19. This tests for supervisor writeability.
+	 * If the page happens to be supervisor writeable and not user
+	 * writeable, we will take a new fault later, but that should be
+	 * a rare enough case.
+	 *
+	 * We also move ESR_ST in _PAGE_DIRTY position
+	 * _PAGE_DIRTY is 0x00001000 so the shift is >> 11
+	 *
+	 * MAS1 is preset for all we need except for TID that needs to
+	 * be cleared for kernel translations
+	 */
+
+	mfspr	r11,SPRN_ESR
+
+	srdi	r15,r16,60		/* get region */
+	rldicl.	r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
+	bne-	dtlb_miss_fault_bolted	/* Bail if fault addr is invalid */
+
+	rlwinm	r10,r11,32-19,27,27
+	rlwimi	r10,r11,32-16,19,19
+	cmpwi	r15,0			/* user vs kernel check */
+	ori	r10,r10,_PAGE_PRESENT
+	oris	r11,r10,_PAGE_ACCESSED@h
+
+	bne	tlb_miss_kernel_bolted
+
+tlb_miss_user_bolted:
+#ifdef CONFIG_PPC_KUAP
+	mfspr	r10,SPRN_MAS1
+	rlwinm.	r10,r10,0,0x3fff0000
+	beq-	tlb_miss_fault_bolted /* KUAP fault */
+#endif
+
+tlb_miss_common_bolted:
+/*
+ * This is the guts of the TLB miss handler for bolted-linear.
+ * We are entered with:
+ *
+ * r16 = faulting address
+ * r15 = crap (free to use)
+ * r14 = page table base
+ * r13 = PACA
+ * r11 = PTE permission mask
+ * r10 = crap (free to use)
+ */
+	rldicl	r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3
+	cmpldi	cr0,r14,0
+	clrrdi	r15,r15,3
+	beq	tlb_miss_fault_bolted	/* No PGDIR, bail */
+
+	ldx	r14,r14,r15		/* grab pgd entry */
+
+	rldicl	r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
+	clrrdi	r15,r15,3
+	cmpdi	cr0,r14,0
+	bge	tlb_miss_fault_bolted	/* Bad pgd entry or hugepage; bail */
+	ldx	r14,r14,r15		/* grab pud entry */
+
+	rldicl	r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
+	clrrdi	r15,r15,3
+	cmpdi	cr0,r14,0
+	bge	tlb_miss_fault_bolted
+	ldx	r14,r14,r15		/* Grab pmd entry */
+
+	rldicl	r15,r16,64-PAGE_SHIFT+3,64-PTE_INDEX_SIZE-3
+	clrrdi	r15,r15,3
+	cmpdi	cr0,r14,0
+	bge	tlb_miss_fault_bolted
+	ldx	r14,r14,r15		/* Grab PTE, normal (!huge) page */
+
+	/* Check if required permissions are met */
+	andc.	r15,r11,r14
+	rldicr	r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
+	bne-	tlb_miss_fault_bolted
+
+	/* Now we build the MAS:
+	 *
+	 * MAS 0   :	Fully setup with defaults in MAS4 and TLBnCFG
+	 * MAS 1   :	Almost fully setup
+	 *               - PID already updated by caller if necessary
+	 *               - TSIZE need change if !base page size, not
+	 *                 yet implemented for now
+	 * MAS 2   :	Defaults not useful, need to be redone
+	 * MAS 3+7 :	Needs to be done
+	 */
+	clrrdi	r11,r16,12		/* Clear low crap in EA */
+	clrldi	r15,r15,12		/* Clear crap at the top */
+	rlwimi	r11,r14,32-19,27,31	/* Insert WIMGE */
+	rlwimi	r15,r14,32-8,22,25	/* Move in U bits */
+	mtspr	SPRN_MAS2,r11
+	andi.	r11,r14,_PAGE_DIRTY
+	rlwimi	r15,r14,32-2,26,31	/* Move in BAP bits */
+
+	/* Mask out SW and UW if !DIRTY (XXX optimize this !) */
+	bne	1f
+	li	r11,MAS3_SW|MAS3_UW
+	andc	r15,r15,r11
+1:
+	mtspr	SPRN_MAS7_MAS3,r15
+	tlbwe
+
+tlb_miss_done_bolted:
+	tlb_epilog_bolted
+	rfi
+
+itlb_miss_kernel_bolted:
+	li	r11,_PAGE_PRESENT|_PAGE_BAP_SX	/* Base perm */
+	oris	r11,r11,_PAGE_ACCESSED@h
+tlb_miss_kernel_bolted:
+	mfspr	r10,SPRN_MAS1
+	ld	r14,PACA_KERNELPGD(r13)
+	srdi	r15,r16,44		/* get kernel region */
+	andi.	r15,r15,1		/* Check for vmalloc region */
+	rlwinm	r10,r10,0,16,1		/* Clear TID */
+	mtspr	SPRN_MAS1,r10
+	bne+	tlb_miss_common_bolted
+
+tlb_miss_fault_bolted:
+	/* We need to check if it was an instruction miss */
+	andi.	r10,r11,_PAGE_BAP_UX|_PAGE_BAP_SX
+	bne	itlb_miss_fault_bolted
+dtlb_miss_fault_bolted:
+	tlb_epilog_bolted
+	b	exc_data_storage_book3e
+itlb_miss_fault_bolted:
+	tlb_epilog_bolted
+	b	exc_instruction_storage_book3e
+
+/* Instruction TLB miss */
+	START_EXCEPTION(instruction_tlb_miss_bolted)
+	tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0
+
+	rldicl.	r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
+	srdi	r15,r16,60		/* get region */
+	bne-	itlb_miss_fault_bolted
+
+	li	r11,_PAGE_PRESENT|_PAGE_BAP_UX	/* Base perm */
+
+	/* We do the user/kernel test for the PID here along with the RW test
+	 */
+
+	cmpldi	cr0,r15,0			/* Check for user region */
+	oris	r11,r11,_PAGE_ACCESSED@h
+	beq	tlb_miss_user_bolted
+	b	itlb_miss_kernel_bolted
+
+/*
+ * TLB miss handling for e6500 and derivatives, using hardware tablewalk.
+ *
+ * Linear mapping is bolted: no virtual page table or nested TLB misses
+ * Indirect entries in TLB1, hardware loads resulting direct entries
+ *    into TLB0
+ * No HES or NV hint on TLB1, so we need to do software round-robin
+ * No tlbsrx. so we need a spinlock, and we have to deal
+ *    with MAS-damage caused by tlbsx
+ * 4K pages only
+ */
+
+	START_EXCEPTION(instruction_tlb_miss_e6500)
+	tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0
+
+	ld	r11,PACA_TCD_PTR(r13)
+	srdi.	r15,r16,60		/* get region */
+	ori	r16,r16,1
+
+	bne	tlb_miss_kernel_e6500	/* user/kernel test */
+
+	b	tlb_miss_common_e6500
+
+	START_EXCEPTION(data_tlb_miss_e6500)
+	tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR
+
+	ld	r11,PACA_TCD_PTR(r13)
+	srdi.	r15,r16,60		/* get region */
+	rldicr	r16,r16,0,62
+
+	bne	tlb_miss_kernel_e6500	/* user vs kernel check */
+
+/*
+ * This is the guts of the TLB miss handler for e6500 and derivatives.
+ * We are entered with:
+ *
+ * r16 = page of faulting address (low bit 0 if data, 1 if instruction)
+ * r15 = crap (free to use)
+ * r14 = page table base
+ * r13 = PACA
+ * r11 = tlb_per_core ptr
+ * r10 = crap (free to use)
+ * r7  = esel_next
+ */
+tlb_miss_common_e6500:
+	crmove	cr2*4+2,cr0*4+2		/* cr2.eq != 0 if kernel address */
+
+BEGIN_FTR_SECTION		/* CPU_FTR_SMT */
+	/*
+	 * Search if we already have an indirect entry for that virtual
+	 * address, and if we do, bail out.
+	 *
+	 * MAS6:IND should be already set based on MAS4
+	 */
+	lhz	r10,PACAPACAINDEX(r13)
+	addi	r10,r10,1
+	crclr	cr1*4+eq	/* set cr1.eq = 0 for non-recursive */
+1:	lbarx	r15,0,r11
+	cmpdi	r15,0
+	bne	2f
+	stbcx.	r10,0,r11
+	bne	1b
+3:
+	.subsection 1
+2:	cmpd	cr1,r15,r10	/* recursive lock due to mcheck/crit/etc? */
+	beq	cr1,3b		/* unlock will happen if cr1.eq = 0 */
+10:	lbz	r15,0(r11)
+	cmpdi	r15,0
+	bne	10b
+	b	1b
+	.previous
+END_FTR_SECTION_IFSET(CPU_FTR_SMT)
+
+	lbz	r7,TCD_ESEL_NEXT(r11)
+
+BEGIN_FTR_SECTION		/* CPU_FTR_SMT */
+	/*
+	 * Erratum A-008139 says that we can't use tlbwe to change
+	 * an indirect entry in any way (including replacing or
+	 * invalidating) if the other thread could be in the process
+	 * of a lookup.  The workaround is to invalidate the entry
+	 * with tlbilx before overwriting.
+	 */
+
+	rlwinm	r10,r7,16,0xff0000
+	oris	r10,r10,MAS0_TLBSEL(1)@h
+	mtspr	SPRN_MAS0,r10
+	isync
+	tlbre
+	mfspr	r15,SPRN_MAS1
+	andis.	r15,r15,MAS1_VALID@h
+	beq	5f
+
+BEGIN_FTR_SECTION_NESTED(532)
+	mfspr	r10,SPRN_MAS8
+	rlwinm	r10,r10,0,0x80000fff  /* tgs,tlpid -> sgs,slpid */
+	mtspr	SPRN_MAS5,r10
+END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532)
+
+	mfspr	r10,SPRN_MAS1
+	rlwinm	r15,r10,0,0x3fff0000  /* tid -> spid */
+	rlwimi	r15,r10,20,0x00000003 /* ind,ts -> sind,sas */
+	mfspr	r10,SPRN_MAS6
+	mtspr	SPRN_MAS6,r15
+
+	mfspr	r15,SPRN_MAS2
+	isync
+	PPC_TLBILX_VA(0,R15)
+	isync
+
+	mtspr	SPRN_MAS6,r10
+
+5:
+BEGIN_FTR_SECTION_NESTED(532)
+	li	r10,0
+	mtspr	SPRN_MAS8,r10
+	mtspr	SPRN_MAS5,r10
+END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532)
+
+	tlbsx	0,r16
+	mfspr	r10,SPRN_MAS1
+	andis.	r15,r10,MAS1_VALID@h
+	bne	tlb_miss_done_e6500
+FTR_SECTION_ELSE
+	mfspr	r10,SPRN_MAS1
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT)
+
+	oris	r10,r10,MAS1_VALID@h
+	beq	cr2,4f
+	rlwinm	r10,r10,0,16,1		/* Clear TID */
+4:	mtspr	SPRN_MAS1,r10
+
+	/* Now, we need to walk the page tables. First check if we are in
+	 * range.
+	 */
+	rldicl.	r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
+	bne-	tlb_miss_fault_e6500
+
+	rldicl	r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3
+	cmpldi	cr0,r14,0
+	clrrdi	r15,r15,3
+	beq-	tlb_miss_fault_e6500 /* No PGDIR, bail */
+	ldx	r14,r14,r15		/* grab pgd entry */
+
+	rldicl	r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
+	clrrdi	r15,r15,3
+	cmpdi	cr0,r14,0
+	bge	tlb_miss_huge_e6500	/* Bad pgd entry or hugepage; bail */
+	ldx	r14,r14,r15		/* grab pud entry */
+
+	rldicl	r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
+	clrrdi	r15,r15,3
+	cmpdi	cr0,r14,0
+	bge	tlb_miss_huge_e6500
+	ldx	r14,r14,r15		/* Grab pmd entry */
+
+	mfspr	r10,SPRN_MAS0
+	cmpdi	cr0,r14,0
+	bge	tlb_miss_huge_e6500
+
+	/* Now we build the MAS for a 2M indirect page:
+	 *
+	 * MAS 0   :	ESEL needs to be filled by software round-robin
+	 * MAS 1   :	Fully set up
+	 *               - PID already updated by caller if necessary
+	 *               - TSIZE for now is base ind page size always
+	 *               - TID already cleared if necessary
+	 * MAS 2   :	Default not 2M-aligned, need to be redone
+	 * MAS 3+7 :	Needs to be done
+	 */
+
+	ori	r14,r14,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
+	mtspr	SPRN_MAS7_MAS3,r14
+
+	clrrdi	r15,r16,21		/* make EA 2M-aligned */
+	mtspr	SPRN_MAS2,r15
+
+tlb_miss_huge_done_e6500:
+	lbz	r16,TCD_ESEL_MAX(r11)
+	lbz	r14,TCD_ESEL_FIRST(r11)
+	rlwimi	r10,r7,16,0x00ff0000	/* insert esel_next into MAS0 */
+	addi	r7,r7,1			/* increment esel_next */
+	mtspr	SPRN_MAS0,r10
+	cmpw	r7,r16
+	iseleq	r7,r14,r7		/* if next == last use first */
+	stb	r7,TCD_ESEL_NEXT(r11)
+
+	tlbwe
+
+tlb_miss_done_e6500:
+	.macro	tlb_unlock_e6500
+BEGIN_FTR_SECTION
+	beq	cr1,1f		/* no unlock if lock was recursively grabbed */
+	li	r15,0
+	isync
+	stb	r15,0(r11)
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_SMT)
+	.endm
+
+	tlb_unlock_e6500
+	tlb_epilog_bolted
+	rfi
+
+tlb_miss_huge_e6500:
+	beq	tlb_miss_fault_e6500
+	li	r10,1
+	andi.	r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */
+	rldimi	r14,r10,63,0		/* Set PD_HUGE */
+	xor	r14,r14,r15		/* Clear size bits */
+	ldx	r14,0,r14
+
+	/*
+	 * Now we build the MAS for a huge page.
+	 *
+	 * MAS 0   :	ESEL needs to be filled by software round-robin
+	 *		 - can be handled by indirect code
+	 * MAS 1   :	Need to clear IND and set TSIZE
+	 * MAS 2,3+7:	Needs to be redone similar to non-tablewalk handler
+	 */
+
+	subi	r15,r15,10		/* Convert psize to tsize */
+	mfspr	r10,SPRN_MAS1
+	rlwinm	r10,r10,0,~MAS1_IND
+	rlwimi	r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK
+	mtspr	SPRN_MAS1,r10
+
+	li	r10,-0x400
+	sld	r15,r10,r15		/* Generate mask based on size */
+	and	r10,r16,r15
+	rldicr	r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
+	rlwimi	r10,r14,32-19,27,31	/* Insert WIMGE */
+	clrldi	r15,r15,PAGE_SHIFT	/* Clear crap at the top */
+	rlwimi	r15,r14,32-8,22,25	/* Move in U bits */
+	mtspr	SPRN_MAS2,r10
+	andi.	r10,r14,_PAGE_DIRTY
+	rlwimi	r15,r14,32-2,26,31	/* Move in BAP bits */
+
+	/* Mask out SW and UW if !DIRTY (XXX optimize this !) */
+	bne	1f
+	li	r10,MAS3_SW|MAS3_UW
+	andc	r15,r15,r10
+1:
+	mtspr	SPRN_MAS7_MAS3,r15
+
+	mfspr	r10,SPRN_MAS0
+	b	tlb_miss_huge_done_e6500
+
+tlb_miss_kernel_e6500:
+	ld	r14,PACA_KERNELPGD(r13)
+	srdi	r15,r16,44		/* get kernel region */
+	xoris	r15,r15,0xc		/* Check for vmalloc region */
+	cmplwi	cr1,r15,1
+	beq+	cr1,tlb_miss_common_e6500
+
+tlb_miss_fault_e6500:
+	tlb_unlock_e6500
+	/* We need to check if it was an instruction miss */
+	andi.	r16,r16,1
+	bne	itlb_miss_fault_e6500
+dtlb_miss_fault_e6500:
+	tlb_epilog_bolted
+	b	exc_data_storage_book3e
+itlb_miss_fault_e6500:
+	tlb_epilog_bolted
+	b	exc_instruction_storage_book3e
+
+/**********************************************************************
+ *                                                                    *
+ * TLB miss handling for Book3E with TLB reservation and HES support  *
+ *                                                                    *
+ **********************************************************************/
+
+
+/* Data TLB miss */
+	START_EXCEPTION(data_tlb_miss)
+	TLB_MISS_PROLOG
+
+	/* Now we handle the fault proper. We only save DEAR in normal
+	 * fault case since that's the only interesting values here.
+	 * We could probably also optimize by not saving SRR0/1 in the
+	 * linear mapping case but I'll leave that for later
+	 */
+	mfspr	r14,SPRN_ESR
+	mfspr	r16,SPRN_DEAR		/* get faulting address */
+	srdi	r15,r16,44		/* get region */
+	xoris	r15,r15,0xc
+	cmpldi	cr0,r15,0		/* linear mapping ? */
+	beq	tlb_load_linear		/* yes -> go to linear map load */
+	cmpldi	cr1,r15,1		/* vmalloc mapping ? */
+
+	/* The page tables are mapped virtually linear. At this point, though,
+	 * we don't know whether we are trying to fault in a first level
+	 * virtual address or a virtual page table address. We can get that
+	 * from bit 0x1 of the region ID which we have set for a page table
+	 */
+	andis.	r10,r15,0x1
+	bne-	virt_page_table_tlb_miss
+
+	std	r14,EX_TLB_ESR(r12);	/* save ESR */
+	std	r16,EX_TLB_DEAR(r12);	/* save DEAR */
+
+	 /* We need _PAGE_PRESENT and  _PAGE_ACCESSED set */
+	li	r11,_PAGE_PRESENT
+	oris	r11,r11,_PAGE_ACCESSED@h
+
+	/* We do the user/kernel test for the PID here along with the RW test
+	 */
+	srdi.	r15,r16,60		/* Check for user region */
+
+	/* We pre-test some combination of permissions to avoid double
+	 * faults:
+	 *
+	 * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE
+	 * ESR_ST   is 0x00800000
+	 * _PAGE_BAP_SW is 0x00000010
+	 * So the shift is >> 19. This tests for supervisor writeability.
+	 * If the page happens to be supervisor writeable and not user
+	 * writeable, we will take a new fault later, but that should be
+	 * a rare enough case.
+	 *
+	 * We also move ESR_ST in _PAGE_DIRTY position
+	 * _PAGE_DIRTY is 0x00001000 so the shift is >> 11
+	 *
+	 * MAS1 is preset for all we need except for TID that needs to
+	 * be cleared for kernel translations
+	 */
+	rlwimi	r11,r14,32-19,27,27
+	rlwimi	r11,r14,32-16,19,19
+	beq	normal_tlb_miss_user
+	/* XXX replace the RMW cycles with immediate loads + writes */
+1:	mfspr	r10,SPRN_MAS1
+	rlwinm	r10,r10,0,16,1		/* Clear TID */
+	mtspr	SPRN_MAS1,r10
+	beq+	cr1,normal_tlb_miss
+
+	/* We got a crappy address, just fault with whatever DEAR and ESR
+	 * are here
+	 */
+	TLB_MISS_EPILOG_ERROR
+	b	exc_data_storage_book3e
+
+/* Instruction TLB miss */
+	START_EXCEPTION(instruction_tlb_miss)
+	TLB_MISS_PROLOG
+
+	/* If we take a recursive fault, the second level handler may need
+	 * to know whether we are handling a data or instruction fault in
+	 * order to get to the right store fault handler. We provide that
+	 * info by writing a crazy value in ESR in our exception frame
+	 */
+	li	r14,-1	/* store to exception frame is done later */
+
+	/* Now we handle the fault proper. We only save DEAR in the non
+	 * linear mapping case since we know the linear mapping case will
+	 * not re-enter. We could indeed optimize and also not save SRR0/1
+	 * in the linear mapping case but I'll leave that for later
+	 *
+	 * Faulting address is SRR0 which is already in r16
+	 */
+	srdi	r15,r16,44		/* get region */
+	xoris	r15,r15,0xc
+	cmpldi	cr0,r15,0		/* linear mapping ? */
+	beq	tlb_load_linear		/* yes -> go to linear map load */
+	cmpldi	cr1,r15,1		/* vmalloc mapping ? */
+
+	/* We do the user/kernel test for the PID here along with the RW test
+	 */
+	li	r11,_PAGE_PRESENT|_PAGE_BAP_UX	/* Base perm */
+	oris	r11,r11,_PAGE_ACCESSED@h
+
+	srdi.	r15,r16,60			/* Check for user region */
+	std	r14,EX_TLB_ESR(r12)		/* write crazy -1 to frame */
+	beq	normal_tlb_miss_user
+
+	li	r11,_PAGE_PRESENT|_PAGE_BAP_SX	/* Base perm */
+	oris	r11,r11,_PAGE_ACCESSED@h
+	/* XXX replace the RMW cycles with immediate loads + writes */
+	mfspr	r10,SPRN_MAS1
+	rlwinm	r10,r10,0,16,1			/* Clear TID */
+	mtspr	SPRN_MAS1,r10
+	beq+	cr1,normal_tlb_miss
+
+	/* We got a crappy address, just fault */
+	TLB_MISS_EPILOG_ERROR
+	b	exc_instruction_storage_book3e
+
+/*
+ * This is the guts of the first-level TLB miss handler for direct
+ * misses. We are entered with:
+ *
+ * r16 = faulting address
+ * r15 = region ID
+ * r14 = crap (free to use)
+ * r13 = PACA
+ * r12 = TLB exception frame in PACA
+ * r11 = PTE permission mask
+ * r10 = crap (free to use)
+ */
+normal_tlb_miss_user:
+#ifdef CONFIG_PPC_KUAP
+	mfspr	r14,SPRN_MAS1
+	rlwinm.	r14,r14,0,0x3fff0000
+	beq-	normal_tlb_miss_access_fault /* KUAP fault */
+#endif
+normal_tlb_miss:
+	/* So we first construct the page table address. We do that by
+	 * shifting the bottom of the address (not the region ID) by
+	 * PAGE_SHIFT-3, clearing the bottom 3 bits (get a PTE ptr) and
+	 * or'ing the fourth high bit.
+	 *
+	 * NOTE: For 64K pages, we do things slightly differently in
+	 * order to handle the weird page table format used by linux
+	 */
+	srdi	r15,r16,44
+	oris	r10,r15,0x1
+	rldicl	r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4
+	sldi	r15,r10,44
+	clrrdi	r14,r14,19
+	or	r10,r15,r14
+
+	ld	r14,0(r10)
+
+finish_normal_tlb_miss:
+	/* Check if required permissions are met */
+	andc.	r15,r11,r14
+	bne-	normal_tlb_miss_access_fault
+
+	/* Now we build the MAS:
+	 *
+	 * MAS 0   :	Fully setup with defaults in MAS4 and TLBnCFG
+	 * MAS 1   :	Almost fully setup
+	 *               - PID already updated by caller if necessary
+	 *               - TSIZE need change if !base page size, not
+	 *                 yet implemented for now
+	 * MAS 2   :	Defaults not useful, need to be redone
+	 * MAS 3+7 :	Needs to be done
+	 *
+	 * TODO: mix up code below for better scheduling
+	 */
+	clrrdi	r10,r16,12		/* Clear low crap in EA */
+	rlwimi	r10,r14,32-19,27,31	/* Insert WIMGE */
+	mtspr	SPRN_MAS2,r10
+
+	/* Check page size, if not standard, update MAS1 */
+	rldicl	r10,r14,64-8,64-8
+	cmpldi	cr0,r10,BOOK3E_PAGESZ_4K
+	beq-	1f
+	mfspr	r11,SPRN_MAS1
+	rlwimi	r11,r14,31,21,24
+	rlwinm	r11,r11,0,21,19
+	mtspr	SPRN_MAS1,r11
+1:
+	/* Move RPN in position */
+	rldicr	r11,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
+	clrldi	r15,r11,12		/* Clear crap at the top */
+	rlwimi	r15,r14,32-8,22,25	/* Move in U bits */
+	rlwimi	r15,r14,32-2,26,31	/* Move in BAP bits */
+
+	/* Mask out SW and UW if !DIRTY (XXX optimize this !) */
+	andi.	r11,r14,_PAGE_DIRTY
+	bne	1f
+	li	r11,MAS3_SW|MAS3_UW
+	andc	r15,r15,r11
+1:
+	srdi	r16,r15,32
+	mtspr	SPRN_MAS3,r15
+	mtspr	SPRN_MAS7,r16
+
+	tlbwe
+
+normal_tlb_miss_done:
+	/* We don't bother with restoring DEAR or ESR since we know we are
+	 * level 0 and just going back to userland. They are only needed
+	 * if you are going to take an access fault
+	 */
+	TLB_MISS_EPILOG_SUCCESS
+	rfi
+
+normal_tlb_miss_access_fault:
+	/* We need to check if it was an instruction miss */
+	andi.	r10,r11,_PAGE_BAP_UX
+	bne	1f
+	ld	r14,EX_TLB_DEAR(r12)
+	ld	r15,EX_TLB_ESR(r12)
+	mtspr	SPRN_DEAR,r14
+	mtspr	SPRN_ESR,r15
+	TLB_MISS_EPILOG_ERROR
+	b	exc_data_storage_book3e
+1:	TLB_MISS_EPILOG_ERROR
+	b	exc_instruction_storage_book3e
+
+
+/*
+ * This is the guts of the second-level TLB miss handler for direct
+ * misses. We are entered with:
+ *
+ * r16 = virtual page table faulting address
+ * r15 = region (top 4 bits of address)
+ * r14 = crap (free to use)
+ * r13 = PACA
+ * r12 = TLB exception frame in PACA
+ * r11 = crap (free to use)
+ * r10 = crap (free to use)
+ *
+ * Note that this should only ever be called as a second level handler
+ * with the current scheme when using SW load.
+ * That means we can always get the original fault DEAR at
+ * EX_TLB_DEAR-EX_TLB_SIZE(r12)
+ *
+ * It can be re-entered by the linear mapping miss handler. However, to
+ * avoid too much complication, it will restart the whole fault at level
+ * 0 so we don't care too much about clobbers
+ *
+ * XXX That code was written back when we couldn't clobber r14. We can now,
+ * so we could probably optimize things a bit
+ */
+virt_page_table_tlb_miss:
+	/* Are we hitting a kernel page table ? */
+	srdi	r15,r16,60
+	andi.	r10,r15,0x8
+
+	/* The cool thing now is that r10 contains 0 for user and 8 for kernel,
+	 * and we happen to have the swapper_pg_dir at offset 8 from the user
+	 * pgdir in the PACA :-).
+	 */
+	add	r11,r10,r13
+
+	/* If kernel, we need to clear MAS1 TID */
+	beq	1f
+	/* XXX replace the RMW cycles with immediate loads + writes */
+	mfspr	r10,SPRN_MAS1
+	rlwinm	r10,r10,0,16,1			/* Clear TID */
+	mtspr	SPRN_MAS1,r10
+#ifdef CONFIG_PPC_KUAP
+	b	2f
+1:
+	mfspr	r10,SPRN_MAS1
+	rlwinm.	r10,r10,0,0x3fff0000
+	beq-	virt_page_table_tlb_miss_fault /* KUAP fault */
+2:
+#else
+1:
+#endif
+
+	/* Now, we need to walk the page tables. First check if we are in
+	 * range.
+	 */
+	rldicl	r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4
+	cmpldi	r10,0x80
+	bne-	virt_page_table_tlb_miss_fault
+
+	/* Get the PGD pointer */
+	ld	r15,PACAPGD(r11)
+	cmpldi	cr0,r15,0
+	beq-	virt_page_table_tlb_miss_fault
+
+	/* Get to PGD entry */
+	rldicl	r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3
+	clrrdi	r10,r11,3
+	ldx	r15,r10,r15
+	cmpdi	cr0,r15,0
+	bge	virt_page_table_tlb_miss_fault
+
+	/* Get to PUD entry */
+	rldicl	r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3
+	clrrdi	r10,r11,3
+	ldx	r15,r10,r15
+	cmpdi	cr0,r15,0
+	bge	virt_page_table_tlb_miss_fault
+
+	/* Get to PMD entry */
+	rldicl	r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3
+	clrrdi	r10,r11,3
+	ldx	r15,r10,r15
+	cmpdi	cr0,r15,0
+	bge	virt_page_table_tlb_miss_fault
+
+	/* Ok, we're all right, we can now create a kernel translation for
+	 * a 4K or 64K page from r16 -> r15.
+	 */
+	/* Now we build the MAS:
+	 *
+	 * MAS 0   :	Fully setup with defaults in MAS4 and TLBnCFG
+	 * MAS 1   :	Almost fully setup
+	 *               - PID already updated by caller if necessary
+	 *               - TSIZE for now is base page size always
+	 * MAS 2   :	Use defaults
+	 * MAS 3+7 :	Needs to be done
+	 *
+	 * So we only do MAS 2 and 3 for now...
+	 */
+	clrldi	r11,r15,4		/* remove region ID from RPN */
+	ori	r10,r11,1		/* Or-in SR */
+
+	srdi	r16,r10,32
+	mtspr	SPRN_MAS3,r10
+	mtspr	SPRN_MAS7,r16
+
+	tlbwe
+
+	/* Return to caller, normal case */
+	TLB_MISS_EPILOG_SUCCESS
+	rfi
+
+virt_page_table_tlb_miss_fault:
+	/* If we fault here, things are a little bit tricky. We need to call
+	 * either data or instruction store fault, and we need to retrieve
+	 * the original fault address and ESR (for data).
+	 *
+	 * The thing is, we know that in normal circumstances, this is
+	 * always called as a second level tlb miss for SW load or as a first
+	 * level TLB miss for HW load, so we should be able to peek at the
+	 * relevant information in the first exception frame in the PACA.
+	 *
+	 * However, we do need to double check that, because we may just hit
+	 * a stray kernel pointer or a userland attack trying to hit those
+	 * areas. If that is the case, we do a data fault. (We can't get here
+	 * from an instruction tlb miss anyway).
+	 *
+	 * Note also that when going to a fault, we must unwind the previous
+	 * level as well. Since we are doing that, we don't need to clear or
+	 * restore the TLB reservation neither.
+	 */
+	subf	r10,r13,r12
+	cmpldi	cr0,r10,PACA_EXTLB+EX_TLB_SIZE
+	bne-	virt_page_table_tlb_miss_whacko_fault
+
+	/* We dig the original DEAR and ESR from slot 0 */
+	ld	r15,EX_TLB_DEAR+PACA_EXTLB(r13)
+	ld	r16,EX_TLB_ESR+PACA_EXTLB(r13)
+
+	/* We check for the "special" ESR value for instruction faults */
+	cmpdi	cr0,r16,-1
+	beq	1f
+	mtspr	SPRN_DEAR,r15
+	mtspr	SPRN_ESR,r16
+	TLB_MISS_EPILOG_ERROR
+	b	exc_data_storage_book3e
+1:	TLB_MISS_EPILOG_ERROR
+	b	exc_instruction_storage_book3e
+
+virt_page_table_tlb_miss_whacko_fault:
+	/* The linear fault will restart everything so ESR and DEAR will
+	 * not have been clobbered, let's just fault with what we have
+	 */
+	TLB_MISS_EPILOG_ERROR
+	b	exc_data_storage_book3e
+
+
+/**************************************************************
+ *                                                            *
+ * TLB miss handling for Book3E with hw page table support    *
+ *                                                            *
+ **************************************************************/
+
+
+/* Data TLB miss */
+	START_EXCEPTION(data_tlb_miss_htw)
+	TLB_MISS_PROLOG
+
+	/* Now we handle the fault proper. We only save DEAR in normal
+	 * fault case since that's the only interesting values here.
+	 * We could probably also optimize by not saving SRR0/1 in the
+	 * linear mapping case but I'll leave that for later
+	 */
+	mfspr	r14,SPRN_ESR
+	mfspr	r16,SPRN_DEAR		/* get faulting address */
+	srdi	r11,r16,44		/* get region */
+	xoris	r11,r11,0xc
+	cmpldi	cr0,r11,0		/* linear mapping ? */
+	beq	tlb_load_linear		/* yes -> go to linear map load */
+	cmpldi	cr1,r11,1		/* vmalloc mapping ? */
+
+	/* We do the user/kernel test for the PID here along with the RW test
+	 */
+	srdi.	r11,r16,60		/* Check for user region */
+	ld	r15,PACAPGD(r13)	/* Load user pgdir */
+	beq	htw_tlb_miss
+
+	/* XXX replace the RMW cycles with immediate loads + writes */
+1:	mfspr	r10,SPRN_MAS1
+	rlwinm	r10,r10,0,16,1		/* Clear TID */
+	mtspr	SPRN_MAS1,r10
+	ld	r15,PACA_KERNELPGD(r13)	/* Load kernel pgdir */
+	beq+	cr1,htw_tlb_miss
+
+	/* We got a crappy address, just fault with whatever DEAR and ESR
+	 * are here
+	 */
+	TLB_MISS_EPILOG_ERROR
+	b	exc_data_storage_book3e
+
+/* Instruction TLB miss */
+	START_EXCEPTION(instruction_tlb_miss_htw)
+	TLB_MISS_PROLOG
+
+	/* If we take a recursive fault, the second level handler may need
+	 * to know whether we are handling a data or instruction fault in
+	 * order to get to the right store fault handler. We provide that
+	 * info by keeping a crazy value for ESR in r14
+	 */
+	li	r14,-1	/* store to exception frame is done later */
+
+	/* Now we handle the fault proper. We only save DEAR in the non
+	 * linear mapping case since we know the linear mapping case will
+	 * not re-enter. We could indeed optimize and also not save SRR0/1
+	 * in the linear mapping case but I'll leave that for later
+	 *
+	 * Faulting address is SRR0 which is already in r16
+	 */
+	srdi	r11,r16,44		/* get region */
+	xoris	r11,r11,0xc
+	cmpldi	cr0,r11,0		/* linear mapping ? */
+	beq	tlb_load_linear		/* yes -> go to linear map load */
+	cmpldi	cr1,r11,1		/* vmalloc mapping ? */
+
+	/* We do the user/kernel test for the PID here along with the RW test
+	 */
+	srdi.	r11,r16,60		/* Check for user region */
+	ld	r15,PACAPGD(r13)		/* Load user pgdir */
+	beq	htw_tlb_miss
+
+	/* XXX replace the RMW cycles with immediate loads + writes */
+1:	mfspr	r10,SPRN_MAS1
+	rlwinm	r10,r10,0,16,1			/* Clear TID */
+	mtspr	SPRN_MAS1,r10
+	ld	r15,PACA_KERNELPGD(r13)		/* Load kernel pgdir */
+	beq+	htw_tlb_miss
+
+	/* We got a crappy address, just fault */
+	TLB_MISS_EPILOG_ERROR
+	b	exc_instruction_storage_book3e
+
+
+/*
+ * This is the guts of the second-level TLB miss handler for direct
+ * misses. We are entered with:
+ *
+ * r16 = virtual page table faulting address
+ * r15 = PGD pointer
+ * r14 = ESR
+ * r13 = PACA
+ * r12 = TLB exception frame in PACA
+ * r11 = crap (free to use)
+ * r10 = crap (free to use)
+ *
+ * It can be re-entered by the linear mapping miss handler. However, to
+ * avoid too much complication, it will save/restore things for us
+ */
+htw_tlb_miss:
+#ifdef CONFIG_PPC_KUAP
+	mfspr	r10,SPRN_MAS1
+	rlwinm.	r10,r10,0,0x3fff0000
+	beq-	htw_tlb_miss_fault /* KUAP fault */
+#endif
+	/* Search if we already have a TLB entry for that virtual address, and
+	 * if we do, bail out.
+	 *
+	 * MAS1:IND should be already set based on MAS4
+	 */
+	PPC_TLBSRX_DOT(0,R16)
+	beq	htw_tlb_miss_done
+
+	/* Now, we need to walk the page tables. First check if we are in
+	 * range.
+	 */
+	rldicl.	r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
+	bne-	htw_tlb_miss_fault
+
+	/* Get the PGD pointer */
+	cmpldi	cr0,r15,0
+	beq-	htw_tlb_miss_fault
+
+	/* Get to PGD entry */
+	rldicl	r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3
+	clrrdi	r10,r11,3
+	ldx	r15,r10,r15
+	cmpdi	cr0,r15,0
+	bge	htw_tlb_miss_fault
+
+	/* Get to PUD entry */
+	rldicl	r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3
+	clrrdi	r10,r11,3
+	ldx	r15,r10,r15
+	cmpdi	cr0,r15,0
+	bge	htw_tlb_miss_fault
+
+	/* Get to PMD entry */
+	rldicl	r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3
+	clrrdi	r10,r11,3
+	ldx	r15,r10,r15
+	cmpdi	cr0,r15,0
+	bge	htw_tlb_miss_fault
+
+	/* Ok, we're all right, we can now create an indirect entry for
+	 * a 1M or 256M page.
+	 *
+	 * The last trick is now that because we use "half" pages for
+	 * the HTW (1M IND is 2K and 256M IND is 32K) we need to account
+	 * for an added LSB bit to the RPN. For 64K pages, there is no
+	 * problem as we already use 32K arrays (half PTE pages), but for
+	 * 4K page we need to extract a bit from the virtual address and
+	 * insert it into the "PA52" bit of the RPN.
+	 */
+	rlwimi	r15,r16,32-9,20,20
+	/* Now we build the MAS:
+	 *
+	 * MAS 0   :	Fully setup with defaults in MAS4 and TLBnCFG
+	 * MAS 1   :	Almost fully setup
+	 *               - PID already updated by caller if necessary
+	 *               - TSIZE for now is base ind page size always
+	 * MAS 2   :	Use defaults
+	 * MAS 3+7 :	Needs to be done
+	 */
+	ori	r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
+
+	srdi	r16,r10,32
+	mtspr	SPRN_MAS3,r10
+	mtspr	SPRN_MAS7,r16
+
+	tlbwe
+
+htw_tlb_miss_done:
+	/* We don't bother with restoring DEAR or ESR since we know we are
+	 * level 0 and just going back to userland. They are only needed
+	 * if you are going to take an access fault
+	 */
+	TLB_MISS_EPILOG_SUCCESS
+	rfi
+
+htw_tlb_miss_fault:
+	/* We need to check if it was an instruction miss. We know this
+	 * though because r14 would contain -1
+	 */
+	cmpdi	cr0,r14,-1
+	beq	1f
+	mtspr	SPRN_DEAR,r16
+	mtspr	SPRN_ESR,r14
+	TLB_MISS_EPILOG_ERROR
+	b	exc_data_storage_book3e
+1:	TLB_MISS_EPILOG_ERROR
+	b	exc_instruction_storage_book3e
+
+/*
+ * This is the guts of "any" level TLB miss handler for kernel linear
+ * mapping misses. We are entered with:
+ *
+ *
+ * r16 = faulting address
+ * r15 = crap (free to use)
+ * r14 = ESR (data) or -1 (instruction)
+ * r13 = PACA
+ * r12 = TLB exception frame in PACA
+ * r11 = crap (free to use)
+ * r10 = crap (free to use)
+ *
+ * In addition we know that we will not re-enter, so in theory, we could
+ * use a simpler epilog not restoring SRR0/1 etc.. but we'll do that later.
+ *
+ * We also need to be careful about MAS registers here & TLB reservation,
+ * as we know we'll have clobbered them if we interrupt the main TLB miss
+ * handlers in which case we probably want to do a full restart at level
+ * 0 rather than saving / restoring the MAS.
+ *
+ * Note: If we care about performance of that core, we can easily shuffle
+ *       a few things around
+ */
+tlb_load_linear:
+	/* For now, we assume the linear mapping is contiguous and stops at
+	 * linear_map_top. We also assume the size is a multiple of 1G, thus
+	 * we only use 1G pages for now. That might have to be changed in a
+	 * final implementation, especially when dealing with hypervisors
+	 */
+	__LOAD_PACA_TOC(r11)
+	LOAD_REG_ADDR_ALTTOC(r11, r11, linear_map_top)
+	ld	r10,0(r11)
+	tovirt(10,10)
+	cmpld	cr0,r16,r10
+	bge	tlb_load_linear_fault
+
+	/* MAS1 need whole new setup. */
+	li	r15,(BOOK3E_PAGESZ_1GB<<MAS1_TSIZE_SHIFT)
+	oris	r15,r15,MAS1_VALID@h	/* MAS1 needs V and TSIZE */
+	mtspr	SPRN_MAS1,r15
+
+	/* Already somebody there ? */
+	PPC_TLBSRX_DOT(0,R16)
+	beq	tlb_load_linear_done
+
+	/* Now we build the remaining MAS. MAS0 and 2 should be fine
+	 * with their defaults, which leaves us with MAS 3 and 7. The
+	 * mapping is linear, so we just take the address, clear the
+	 * region bits, and or in the permission bits which are currently
+	 * hard wired
+	 */
+	clrrdi	r10,r16,30		/* 1G page index */
+	clrldi	r10,r10,4		/* clear region bits */
+	ori	r10,r10,MAS3_SR|MAS3_SW|MAS3_SX
+
+	srdi	r16,r10,32
+	mtspr	SPRN_MAS3,r10
+	mtspr	SPRN_MAS7,r16
+
+	tlbwe
+
+tlb_load_linear_done:
+	/* We use the "error" epilog for success as we do want to
+	 * restore to the initial faulting context, whatever it was.
+	 * We do that because we can't resume a fault within a TLB
+	 * miss handler, due to MAS and TLB reservation being clobbered.
+	 */
+	TLB_MISS_EPILOG_ERROR
+	rfi
+
+tlb_load_linear_fault:
+	/* We keep the DEAR and ESR around, this shouldn't have happened */
+	cmpdi	cr0,r14,-1
+	beq	1f
+	TLB_MISS_EPILOG_ERROR_SPECIAL
+	b	exc_data_storage_book3e
+1:	TLB_MISS_EPILOG_ERROR_SPECIAL
+	b	exc_instruction_storage_book3e
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
new file mode 100644
index 0000000000..f6c4ace3b2
--- /dev/null
+++ b/arch/powerpc/mm/numa.c
@@ -0,0 +1,1473 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * pSeries NUMA support
+ *
+ * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
+ */
+#define pr_fmt(fmt) "numa: " fmt
+
+#include <linux/threads.h>
+#include <linux/memblock.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/export.h>
+#include <linux/nodemask.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/pfn.h>
+#include <linux/cpuset.h>
+#include <linux/node.h>
+#include <linux/stop_machine.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <asm/cputhreads.h>
+#include <asm/sparsemem.h>
+#include <asm/smp.h>
+#include <asm/topology.h>
+#include <asm/firmware.h>
+#include <asm/paca.h>
+#include <asm/hvcall.h>
+#include <asm/setup.h>
+#include <asm/vdso.h>
+#include <asm/vphn.h>
+#include <asm/drmem.h>
+
+static int numa_enabled = 1;
+
+static char *cmdline __initdata;
+
+int numa_cpu_lookup_table[NR_CPUS];
+cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+struct pglist_data *node_data[MAX_NUMNODES];
+
+EXPORT_SYMBOL(numa_cpu_lookup_table);
+EXPORT_SYMBOL(node_to_cpumask_map);
+EXPORT_SYMBOL(node_data);
+
+static int primary_domain_index;
+static int n_mem_addr_cells, n_mem_size_cells;
+
+#define FORM0_AFFINITY 0
+#define FORM1_AFFINITY 1
+#define FORM2_AFFINITY 2
+static int affinity_form;
+
+#define MAX_DISTANCE_REF_POINTS 4
+static int distance_ref_points_depth;
+static const __be32 *distance_ref_points;
+static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
+static int numa_distance_table[MAX_NUMNODES][MAX_NUMNODES] = {
+	[0 ... MAX_NUMNODES - 1] = { [0 ... MAX_NUMNODES - 1] = -1 }
+};
+static int numa_id_index_table[MAX_NUMNODES] = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE };
+
+/*
+ * Allocate node_to_cpumask_map based on number of available nodes
+ * Requires node_possible_map to be valid.
+ *
+ * Note: cpumask_of_node() is not valid until after this is done.
+ */
+static void __init setup_node_to_cpumask_map(void)
+{
+	unsigned int node;
+
+	/* setup nr_node_ids if not done yet */
+	if (nr_node_ids == MAX_NUMNODES)
+		setup_nr_node_ids();
+
+	/* allocate the map */
+	for_each_node(node)
+		alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
+
+	/* cpumask_of_node() will now work */
+	pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids);
+}
+
+static int __init fake_numa_create_new_node(unsigned long end_pfn,
+						unsigned int *nid)
+{
+	unsigned long long mem;
+	char *p = cmdline;
+	static unsigned int fake_nid;
+	static unsigned long long curr_boundary;
+
+	/*
+	 * Modify node id, iff we started creating NUMA nodes
+	 * We want to continue from where we left of the last time
+	 */
+	if (fake_nid)
+		*nid = fake_nid;
+	/*
+	 * In case there are no more arguments to parse, the
+	 * node_id should be the same as the last fake node id
+	 * (we've handled this above).
+	 */
+	if (!p)
+		return 0;
+
+	mem = memparse(p, &p);
+	if (!mem)
+		return 0;
+
+	if (mem < curr_boundary)
+		return 0;
+
+	curr_boundary = mem;
+
+	if ((end_pfn << PAGE_SHIFT) > mem) {
+		/*
+		 * Skip commas and spaces
+		 */
+		while (*p == ',' || *p == ' ' || *p == '\t')
+			p++;
+
+		cmdline = p;
+		fake_nid++;
+		*nid = fake_nid;
+		pr_debug("created new fake_node with id %d\n", fake_nid);
+		return 1;
+	}
+	return 0;
+}
+
+static void __init reset_numa_cpu_lookup_table(void)
+{
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu)
+		numa_cpu_lookup_table[cpu] = -1;
+}
+
+void map_cpu_to_node(int cpu, int node)
+{
+	update_numa_cpu_lookup_table(cpu, node);
+
+	if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node]))) {
+		pr_debug("adding cpu %d to node %d\n", cpu, node);
+		cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
+	}
+}
+
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR)
+void unmap_cpu_from_node(unsigned long cpu)
+{
+	int node = numa_cpu_lookup_table[cpu];
+
+	if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
+		cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
+		pr_debug("removing cpu %lu from node %d\n", cpu, node);
+	} else {
+		pr_warn("Warning: cpu %lu not found in node %d\n", cpu, node);
+	}
+}
+#endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
+
+static int __associativity_to_nid(const __be32 *associativity,
+				  int max_array_sz)
+{
+	int nid;
+	/*
+	 * primary_domain_index is 1 based array index.
+	 */
+	int index = primary_domain_index  - 1;
+
+	if (!numa_enabled || index >= max_array_sz)
+		return NUMA_NO_NODE;
+
+	nid = of_read_number(&associativity[index], 1);
+
+	/* POWER4 LPAR uses 0xffff as invalid node */
+	if (nid == 0xffff || nid >= nr_node_ids)
+		nid = NUMA_NO_NODE;
+	return nid;
+}
+/*
+ * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
+ * info is found.
+ */
+static int associativity_to_nid(const __be32 *associativity)
+{
+	int array_sz = of_read_number(associativity, 1);
+
+	/* Skip the first element in the associativity array */
+	return __associativity_to_nid((associativity + 1), array_sz);
+}
+
+static int __cpu_form2_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+{
+	int dist;
+	int node1, node2;
+
+	node1 = associativity_to_nid(cpu1_assoc);
+	node2 = associativity_to_nid(cpu2_assoc);
+
+	dist = numa_distance_table[node1][node2];
+	if (dist <= LOCAL_DISTANCE)
+		return 0;
+	else if (dist <= REMOTE_DISTANCE)
+		return 1;
+	else
+		return 2;
+}
+
+static int __cpu_form1_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+{
+	int dist = 0;
+
+	int i, index;
+
+	for (i = 0; i < distance_ref_points_depth; i++) {
+		index = be32_to_cpu(distance_ref_points[i]);
+		if (cpu1_assoc[index] == cpu2_assoc[index])
+			break;
+		dist++;
+	}
+
+	return dist;
+}
+
+int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+{
+	/* We should not get called with FORM0 */
+	VM_WARN_ON(affinity_form == FORM0_AFFINITY);
+	if (affinity_form == FORM1_AFFINITY)
+		return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc);
+	return __cpu_form2_relative_distance(cpu1_assoc, cpu2_assoc);
+}
+
+/* must hold reference to node during call */
+static const __be32 *of_get_associativity(struct device_node *dev)
+{
+	return of_get_property(dev, "ibm,associativity", NULL);
+}
+
+int __node_distance(int a, int b)
+{
+	int i;
+	int distance = LOCAL_DISTANCE;
+
+	if (affinity_form == FORM2_AFFINITY)
+		return numa_distance_table[a][b];
+	else if (affinity_form == FORM0_AFFINITY)
+		return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE);
+
+	for (i = 0; i < distance_ref_points_depth; i++) {
+		if (distance_lookup_table[a][i] == distance_lookup_table[b][i])
+			break;
+
+		/* Double the distance for each NUMA level */
+		distance *= 2;
+	}
+
+	return distance;
+}
+EXPORT_SYMBOL(__node_distance);
+
+/* Returns the nid associated with the given device tree node,
+ * or -1 if not found.
+ */
+static int of_node_to_nid_single(struct device_node *device)
+{
+	int nid = NUMA_NO_NODE;
+	const __be32 *tmp;
+
+	tmp = of_get_associativity(device);
+	if (tmp)
+		nid = associativity_to_nid(tmp);
+	return nid;
+}
+
+/* Walk the device tree upwards, looking for an associativity id */
+int of_node_to_nid(struct device_node *device)
+{
+	int nid = NUMA_NO_NODE;
+
+	of_node_get(device);
+	while (device) {
+		nid = of_node_to_nid_single(device);
+		if (nid != -1)
+			break;
+
+		device = of_get_next_parent(device);
+	}
+	of_node_put(device);
+
+	return nid;
+}
+EXPORT_SYMBOL(of_node_to_nid);
+
+static void __initialize_form1_numa_distance(const __be32 *associativity,
+					     int max_array_sz)
+{
+	int i, nid;
+
+	if (affinity_form != FORM1_AFFINITY)
+		return;
+
+	nid = __associativity_to_nid(associativity, max_array_sz);
+	if (nid != NUMA_NO_NODE) {
+		for (i = 0; i < distance_ref_points_depth; i++) {
+			const __be32 *entry;
+			int index = be32_to_cpu(distance_ref_points[i]) - 1;
+
+			/*
+			 * broken hierarchy, return with broken distance table
+			 */
+			if (WARN(index >= max_array_sz, "Broken ibm,associativity property"))
+				return;
+
+			entry = &associativity[index];
+			distance_lookup_table[nid][i] = of_read_number(entry, 1);
+		}
+	}
+}
+
+static void initialize_form1_numa_distance(const __be32 *associativity)
+{
+	int array_sz;
+
+	array_sz = of_read_number(associativity, 1);
+	/* Skip the first element in the associativity array */
+	__initialize_form1_numa_distance(associativity + 1, array_sz);
+}
+
+/*
+ * Used to update distance information w.r.t newly added node.
+ */
+void update_numa_distance(struct device_node *node)
+{
+	int nid;
+
+	if (affinity_form == FORM0_AFFINITY)
+		return;
+	else if (affinity_form == FORM1_AFFINITY) {
+		const __be32 *associativity;
+
+		associativity = of_get_associativity(node);
+		if (!associativity)
+			return;
+
+		initialize_form1_numa_distance(associativity);
+		return;
+	}
+
+	/* FORM2 affinity  */
+	nid = of_node_to_nid_single(node);
+	if (nid == NUMA_NO_NODE)
+		return;
+
+	/*
+	 * With FORM2 we expect NUMA distance of all possible NUMA
+	 * nodes to be provided during boot.
+	 */
+	WARN(numa_distance_table[nid][nid] == -1,
+	     "NUMA distance details for node %d not provided\n", nid);
+}
+EXPORT_SYMBOL_GPL(update_numa_distance);
+
+/*
+ * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN}
+ * ibm,numa-distance-table = { N, 1, 2, 4, 5, 1, 6, .... N elements}
+ */
+static void __init initialize_form2_numa_distance_lookup_table(void)
+{
+	int i, j;
+	struct device_node *root;
+	const __u8 *form2_distances;
+	const __be32 *numa_lookup_index;
+	int form2_distances_length;
+	int max_numa_index, distance_index;
+
+	if (firmware_has_feature(FW_FEATURE_OPAL))
+		root = of_find_node_by_path("/ibm,opal");
+	else
+		root = of_find_node_by_path("/rtas");
+	if (!root)
+		root = of_find_node_by_path("/");
+
+	numa_lookup_index = of_get_property(root, "ibm,numa-lookup-index-table", NULL);
+	max_numa_index = of_read_number(&numa_lookup_index[0], 1);
+
+	/* first element of the array is the size and is encode-int */
+	form2_distances = of_get_property(root, "ibm,numa-distance-table", NULL);
+	form2_distances_length = of_read_number((const __be32 *)&form2_distances[0], 1);
+	/* Skip the size which is encoded int */
+	form2_distances += sizeof(__be32);
+
+	pr_debug("form2_distances_len = %d, numa_dist_indexes_len = %d\n",
+		 form2_distances_length, max_numa_index);
+
+	for (i = 0; i < max_numa_index; i++)
+		/* +1 skip the max_numa_index in the property */
+		numa_id_index_table[i] = of_read_number(&numa_lookup_index[i + 1], 1);
+
+
+	if (form2_distances_length != max_numa_index * max_numa_index) {
+		WARN(1, "Wrong NUMA distance information\n");
+		form2_distances = NULL; // don't use it
+	}
+	distance_index = 0;
+	for (i = 0;  i < max_numa_index; i++) {
+		for (j = 0; j < max_numa_index; j++) {
+			int nodeA = numa_id_index_table[i];
+			int nodeB = numa_id_index_table[j];
+			int dist;
+
+			if (form2_distances)
+				dist = form2_distances[distance_index++];
+			else if (nodeA == nodeB)
+				dist = LOCAL_DISTANCE;
+			else
+				dist = REMOTE_DISTANCE;
+			numa_distance_table[nodeA][nodeB] = dist;
+			pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, dist);
+		}
+	}
+
+	of_node_put(root);
+}
+
+static int __init find_primary_domain_index(void)
+{
+	int index;
+	struct device_node *root;
+
+	/*
+	 * Check for which form of affinity.
+	 */
+	if (firmware_has_feature(FW_FEATURE_OPAL)) {
+		affinity_form = FORM1_AFFINITY;
+	} else if (firmware_has_feature(FW_FEATURE_FORM2_AFFINITY)) {
+		pr_debug("Using form 2 affinity\n");
+		affinity_form = FORM2_AFFINITY;
+	} else if (firmware_has_feature(FW_FEATURE_FORM1_AFFINITY)) {
+		pr_debug("Using form 1 affinity\n");
+		affinity_form = FORM1_AFFINITY;
+	} else
+		affinity_form = FORM0_AFFINITY;
+
+	if (firmware_has_feature(FW_FEATURE_OPAL))
+		root = of_find_node_by_path("/ibm,opal");
+	else
+		root = of_find_node_by_path("/rtas");
+	if (!root)
+		root = of_find_node_by_path("/");
+
+	/*
+	 * This property is a set of 32-bit integers, each representing
+	 * an index into the ibm,associativity nodes.
+	 *
+	 * With form 0 affinity the first integer is for an SMP configuration
+	 * (should be all 0's) and the second is for a normal NUMA
+	 * configuration. We have only one level of NUMA.
+	 *
+	 * With form 1 affinity the first integer is the most significant
+	 * NUMA boundary and the following are progressively less significant
+	 * boundaries. There can be more than one level of NUMA.
+	 */
+	distance_ref_points = of_get_property(root,
+					"ibm,associativity-reference-points",
+					&distance_ref_points_depth);
+
+	if (!distance_ref_points) {
+		pr_debug("ibm,associativity-reference-points not found.\n");
+		goto err;
+	}
+
+	distance_ref_points_depth /= sizeof(int);
+	if (affinity_form == FORM0_AFFINITY) {
+		if (distance_ref_points_depth < 2) {
+			pr_warn("short ibm,associativity-reference-points\n");
+			goto err;
+		}
+
+		index = of_read_number(&distance_ref_points[1], 1);
+	} else {
+		/*
+		 * Both FORM1 and FORM2 affinity find the primary domain details
+		 * at the same offset.
+		 */
+		index = of_read_number(distance_ref_points, 1);
+	}
+	/*
+	 * Warn and cap if the hardware supports more than
+	 * MAX_DISTANCE_REF_POINTS domains.
+	 */
+	if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) {
+		pr_warn("distance array capped at %d entries\n",
+			MAX_DISTANCE_REF_POINTS);
+		distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
+	}
+
+	of_node_put(root);
+	return index;
+
+err:
+	of_node_put(root);
+	return -1;
+}
+
+static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
+{
+	struct device_node *memory = NULL;
+
+	memory = of_find_node_by_type(memory, "memory");
+	if (!memory)
+		panic("numa.c: No memory nodes found!");
+
+	*n_addr_cells = of_n_addr_cells(memory);
+	*n_size_cells = of_n_size_cells(memory);
+	of_node_put(memory);
+}
+
+static unsigned long read_n_cells(int n, const __be32 **buf)
+{
+	unsigned long result = 0;
+
+	while (n--) {
+		result = (result << 32) | of_read_number(*buf, 1);
+		(*buf)++;
+	}
+	return result;
+}
+
+struct assoc_arrays {
+	u32	n_arrays;
+	u32	array_sz;
+	const __be32 *arrays;
+};
+
+/*
+ * Retrieve and validate the list of associativity arrays for drconf
+ * memory from the ibm,associativity-lookup-arrays property of the
+ * device tree..
+ *
+ * The layout of the ibm,associativity-lookup-arrays property is a number N
+ * indicating the number of associativity arrays, followed by a number M
+ * indicating the size of each associativity array, followed by a list
+ * of N associativity arrays.
+ */
+static int of_get_assoc_arrays(struct assoc_arrays *aa)
+{
+	struct device_node *memory;
+	const __be32 *prop;
+	u32 len;
+
+	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+	if (!memory)
+		return -1;
+
+	prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len);
+	if (!prop || len < 2 * sizeof(unsigned int)) {
+		of_node_put(memory);
+		return -1;
+	}
+
+	aa->n_arrays = of_read_number(prop++, 1);
+	aa->array_sz = of_read_number(prop++, 1);
+
+	of_node_put(memory);
+
+	/* Now that we know the number of arrays and size of each array,
+	 * revalidate the size of the property read in.
+	 */
+	if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int))
+		return -1;
+
+	aa->arrays = prop;
+	return 0;
+}
+
+static int __init get_nid_and_numa_distance(struct drmem_lmb *lmb)
+{
+	struct assoc_arrays aa = { .arrays = NULL };
+	int default_nid = NUMA_NO_NODE;
+	int nid = default_nid;
+	int rc, index;
+
+	if ((primary_domain_index < 0) || !numa_enabled)
+		return default_nid;
+
+	rc = of_get_assoc_arrays(&aa);
+	if (rc)
+		return default_nid;
+
+	if (primary_domain_index <= aa.array_sz &&
+	    !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
+		const __be32 *associativity;
+
+		index = lmb->aa_index * aa.array_sz;
+		associativity = &aa.arrays[index];
+		nid = __associativity_to_nid(associativity, aa.array_sz);
+		if (nid > 0 && affinity_form == FORM1_AFFINITY) {
+			/*
+			 * lookup array associativity entries have
+			 * no length of the array as the first element.
+			 */
+			__initialize_form1_numa_distance(associativity, aa.array_sz);
+		}
+	}
+	return nid;
+}
+
+/*
+ * This is like of_node_to_nid_single() for memory represented in the
+ * ibm,dynamic-reconfiguration-memory node.
+ */
+int of_drconf_to_nid_single(struct drmem_lmb *lmb)
+{
+	struct assoc_arrays aa = { .arrays = NULL };
+	int default_nid = NUMA_NO_NODE;
+	int nid = default_nid;
+	int rc, index;
+
+	if ((primary_domain_index < 0) || !numa_enabled)
+		return default_nid;
+
+	rc = of_get_assoc_arrays(&aa);
+	if (rc)
+		return default_nid;
+
+	if (primary_domain_index <= aa.array_sz &&
+	    !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
+		const __be32 *associativity;
+
+		index = lmb->aa_index * aa.array_sz;
+		associativity = &aa.arrays[index];
+		nid = __associativity_to_nid(associativity, aa.array_sz);
+	}
+	return nid;
+}
+
+#ifdef CONFIG_PPC_SPLPAR
+
+static int __vphn_get_associativity(long lcpu, __be32 *associativity)
+{
+	long rc, hwid;
+
+	/*
+	 * On a shared lpar, device tree will not have node associativity.
+	 * At this time lppaca, or its __old_status field may not be
+	 * updated. Hence kernel cannot detect if its on a shared lpar. So
+	 * request an explicit associativity irrespective of whether the
+	 * lpar is shared or dedicated. Use the device tree property as a
+	 * fallback. cpu_to_phys_id is only valid between
+	 * smp_setup_cpu_maps() and smp_setup_pacas().
+	 */
+	if (firmware_has_feature(FW_FEATURE_VPHN)) {
+		if (cpu_to_phys_id)
+			hwid = cpu_to_phys_id[lcpu];
+		else
+			hwid = get_hard_smp_processor_id(lcpu);
+
+		rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity);
+		if (rc == H_SUCCESS)
+			return 0;
+	}
+
+	return -1;
+}
+
+static int vphn_get_nid(long lcpu)
+{
+	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
+
+
+	if (!__vphn_get_associativity(lcpu, associativity))
+		return associativity_to_nid(associativity);
+
+	return NUMA_NO_NODE;
+
+}
+#else
+
+static int __vphn_get_associativity(long lcpu, __be32 *associativity)
+{
+	return -1;
+}
+
+static int vphn_get_nid(long unused)
+{
+	return NUMA_NO_NODE;
+}
+#endif  /* CONFIG_PPC_SPLPAR */
+
+/*
+ * Figure out to which domain a cpu belongs and stick it there.
+ * Return the id of the domain used.
+ */
+static int numa_setup_cpu(unsigned long lcpu)
+{
+	struct device_node *cpu;
+	int fcpu = cpu_first_thread_sibling(lcpu);
+	int nid = NUMA_NO_NODE;
+
+	if (!cpu_present(lcpu)) {
+		set_cpu_numa_node(lcpu, first_online_node);
+		return first_online_node;
+	}
+
+	/*
+	 * If a valid cpu-to-node mapping is already available, use it
+	 * directly instead of querying the firmware, since it represents
+	 * the most recent mapping notified to us by the platform (eg: VPHN).
+	 * Since cpu_to_node binding remains the same for all threads in the
+	 * core. If a valid cpu-to-node mapping is already available, for
+	 * the first thread in the core, use it.
+	 */
+	nid = numa_cpu_lookup_table[fcpu];
+	if (nid >= 0) {
+		map_cpu_to_node(lcpu, nid);
+		return nid;
+	}
+
+	nid = vphn_get_nid(lcpu);
+	if (nid != NUMA_NO_NODE)
+		goto out_present;
+
+	cpu = of_get_cpu_node(lcpu, NULL);
+
+	if (!cpu) {
+		WARN_ON(1);
+		if (cpu_present(lcpu))
+			goto out_present;
+		else
+			goto out;
+	}
+
+	nid = of_node_to_nid_single(cpu);
+	of_node_put(cpu);
+
+out_present:
+	if (nid < 0 || !node_possible(nid))
+		nid = first_online_node;
+
+	/*
+	 * Update for the first thread of the core. All threads of a core
+	 * have to be part of the same node. This not only avoids querying
+	 * for every other thread in the core, but always avoids a case
+	 * where virtual node associativity change causes subsequent threads
+	 * of a core to be associated with different nid. However if first
+	 * thread is already online, expect it to have a valid mapping.
+	 */
+	if (fcpu != lcpu) {
+		WARN_ON(cpu_online(fcpu));
+		map_cpu_to_node(fcpu, nid);
+	}
+
+	map_cpu_to_node(lcpu, nid);
+out:
+	return nid;
+}
+
+static void verify_cpu_node_mapping(int cpu, int node)
+{
+	int base, sibling, i;
+
+	/* Verify that all the threads in the core belong to the same node */
+	base = cpu_first_thread_sibling(cpu);
+
+	for (i = 0; i < threads_per_core; i++) {
+		sibling = base + i;
+
+		if (sibling == cpu || cpu_is_offline(sibling))
+			continue;
+
+		if (cpu_to_node(sibling) != node) {
+			WARN(1, "CPU thread siblings %d and %d don't belong"
+				" to the same node!\n", cpu, sibling);
+			break;
+		}
+	}
+}
+
+/* Must run before sched domains notifier. */
+static int ppc_numa_cpu_prepare(unsigned int cpu)
+{
+	int nid;
+
+	nid = numa_setup_cpu(cpu);
+	verify_cpu_node_mapping(cpu, nid);
+	return 0;
+}
+
+static int ppc_numa_cpu_dead(unsigned int cpu)
+{
+	return 0;
+}
+
+/*
+ * Check and possibly modify a memory region to enforce the memory limit.
+ *
+ * Returns the size the region should have to enforce the memory limit.
+ * This will either be the original value of size, a truncated value,
+ * or zero. If the returned value of size is 0 the region should be
+ * discarded as it lies wholly above the memory limit.
+ */
+static unsigned long __init numa_enforce_memory_limit(unsigned long start,
+						      unsigned long size)
+{
+	/*
+	 * We use memblock_end_of_DRAM() in here instead of memory_limit because
+	 * we've already adjusted it for the limit and it takes care of
+	 * having memory holes below the limit.  Also, in the case of
+	 * iommu_is_off, memory_limit is not set but is implicitly enforced.
+	 */
+
+	if (start + size <= memblock_end_of_DRAM())
+		return size;
+
+	if (start >= memblock_end_of_DRAM())
+		return 0;
+
+	return memblock_end_of_DRAM() - start;
+}
+
+/*
+ * Reads the counter for a given entry in
+ * linux,drconf-usable-memory property
+ */
+static inline int __init read_usm_ranges(const __be32 **usm)
+{
+	/*
+	 * For each lmb in ibm,dynamic-memory a corresponding
+	 * entry in linux,drconf-usable-memory property contains
+	 * a counter followed by that many (base, size) duple.
+	 * read the counter from linux,drconf-usable-memory
+	 */
+	return read_n_cells(n_mem_size_cells, usm);
+}
+
+/*
+ * Extract NUMA information from the ibm,dynamic-reconfiguration-memory
+ * node.  This assumes n_mem_{addr,size}_cells have been set.
+ */
+static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
+					const __be32 **usm,
+					void *data)
+{
+	unsigned int ranges, is_kexec_kdump = 0;
+	unsigned long base, size, sz;
+	int nid;
+
+	/*
+	 * Skip this block if the reserved bit is set in flags (0x80)
+	 * or if the block is not assigned to this partition (0x8)
+	 */
+	if ((lmb->flags & DRCONF_MEM_RESERVED)
+	    || !(lmb->flags & DRCONF_MEM_ASSIGNED))
+		return 0;
+
+	if (*usm)
+		is_kexec_kdump = 1;
+
+	base = lmb->base_addr;
+	size = drmem_lmb_size();
+	ranges = 1;
+
+	if (is_kexec_kdump) {
+		ranges = read_usm_ranges(usm);
+		if (!ranges) /* there are no (base, size) duple */
+			return 0;
+	}
+
+	do {
+		if (is_kexec_kdump) {
+			base = read_n_cells(n_mem_addr_cells, usm);
+			size = read_n_cells(n_mem_size_cells, usm);
+		}
+
+		nid = get_nid_and_numa_distance(lmb);
+		fake_numa_create_new_node(((base + size) >> PAGE_SHIFT),
+					  &nid);
+		node_set_online(nid);
+		sz = numa_enforce_memory_limit(base, size);
+		if (sz)
+			memblock_set_node(base, sz, &memblock.memory, nid);
+	} while (--ranges);
+
+	return 0;
+}
+
+static int __init parse_numa_properties(void)
+{
+	struct device_node *memory;
+	int default_nid = 0;
+	unsigned long i;
+	const __be32 *associativity;
+
+	if (numa_enabled == 0) {
+		pr_warn("disabled by user\n");
+		return -1;
+	}
+
+	primary_domain_index = find_primary_domain_index();
+
+	if (primary_domain_index < 0) {
+		/*
+		 * if we fail to parse primary_domain_index from device tree
+		 * mark the numa disabled, boot with numa disabled.
+		 */
+		numa_enabled = false;
+		return primary_domain_index;
+	}
+
+	pr_debug("associativity depth for CPU/Memory: %d\n", primary_domain_index);
+
+	/*
+	 * If it is FORM2 initialize the distance table here.
+	 */
+	if (affinity_form == FORM2_AFFINITY)
+		initialize_form2_numa_distance_lookup_table();
+
+	/*
+	 * Even though we connect cpus to numa domains later in SMP
+	 * init, we need to know the node ids now. This is because
+	 * each node to be onlined must have NODE_DATA etc backing it.
+	 */
+	for_each_present_cpu(i) {
+		__be32 vphn_assoc[VPHN_ASSOC_BUFSIZE];
+		struct device_node *cpu;
+		int nid = NUMA_NO_NODE;
+
+		memset(vphn_assoc, 0, VPHN_ASSOC_BUFSIZE * sizeof(__be32));
+
+		if (__vphn_get_associativity(i, vphn_assoc) == 0) {
+			nid = associativity_to_nid(vphn_assoc);
+			initialize_form1_numa_distance(vphn_assoc);
+		} else {
+
+			/*
+			 * Don't fall back to default_nid yet -- we will plug
+			 * cpus into nodes once the memory scan has discovered
+			 * the topology.
+			 */
+			cpu = of_get_cpu_node(i, NULL);
+			BUG_ON(!cpu);
+
+			associativity = of_get_associativity(cpu);
+			if (associativity) {
+				nid = associativity_to_nid(associativity);
+				initialize_form1_numa_distance(associativity);
+			}
+			of_node_put(cpu);
+		}
+
+		/* node_set_online() is an UB if 'nid' is negative */
+		if (likely(nid >= 0))
+			node_set_online(nid);
+	}
+
+	get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
+
+	for_each_node_by_type(memory, "memory") {
+		unsigned long start;
+		unsigned long size;
+		int nid;
+		int ranges;
+		const __be32 *memcell_buf;
+		unsigned int len;
+
+		memcell_buf = of_get_property(memory,
+			"linux,usable-memory", &len);
+		if (!memcell_buf || len <= 0)
+			memcell_buf = of_get_property(memory, "reg", &len);
+		if (!memcell_buf || len <= 0)
+			continue;
+
+		/* ranges in cell */
+		ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
+new_range:
+		/* these are order-sensitive, and modify the buffer pointer */
+		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
+		size = read_n_cells(n_mem_size_cells, &memcell_buf);
+
+		/*
+		 * Assumption: either all memory nodes or none will
+		 * have associativity properties.  If none, then
+		 * everything goes to default_nid.
+		 */
+		associativity = of_get_associativity(memory);
+		if (associativity) {
+			nid = associativity_to_nid(associativity);
+			initialize_form1_numa_distance(associativity);
+		} else
+			nid = default_nid;
+
+		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
+		node_set_online(nid);
+
+		size = numa_enforce_memory_limit(start, size);
+		if (size)
+			memblock_set_node(start, size, &memblock.memory, nid);
+
+		if (--ranges)
+			goto new_range;
+	}
+
+	/*
+	 * Now do the same thing for each MEMBLOCK listed in the
+	 * ibm,dynamic-memory property in the
+	 * ibm,dynamic-reconfiguration-memory node.
+	 */
+	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+	if (memory) {
+		walk_drmem_lmbs(memory, NULL, numa_setup_drmem_lmb);
+		of_node_put(memory);
+	}
+
+	return 0;
+}
+
+static void __init setup_nonnuma(void)
+{
+	unsigned long top_of_ram = memblock_end_of_DRAM();
+	unsigned long total_ram = memblock_phys_mem_size();
+	unsigned long start_pfn, end_pfn;
+	unsigned int nid = 0;
+	int i;
+
+	pr_debug("Top of RAM: 0x%lx, Total RAM: 0x%lx\n", top_of_ram, total_ram);
+	pr_debug("Memory hole size: %ldMB\n", (top_of_ram - total_ram) >> 20);
+
+	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
+		fake_numa_create_new_node(end_pfn, &nid);
+		memblock_set_node(PFN_PHYS(start_pfn),
+				  PFN_PHYS(end_pfn - start_pfn),
+				  &memblock.memory, nid);
+		node_set_online(nid);
+	}
+}
+
+void __init dump_numa_cpu_topology(void)
+{
+	unsigned int node;
+	unsigned int cpu, count;
+
+	if (!numa_enabled)
+		return;
+
+	for_each_online_node(node) {
+		pr_info("Node %d CPUs:", node);
+
+		count = 0;
+		/*
+		 * If we used a CPU iterator here we would miss printing
+		 * the holes in the cpumap.
+		 */
+		for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
+			if (cpumask_test_cpu(cpu,
+					node_to_cpumask_map[node])) {
+				if (count == 0)
+					pr_cont(" %u", cpu);
+				++count;
+			} else {
+				if (count > 1)
+					pr_cont("-%u", cpu - 1);
+				count = 0;
+			}
+		}
+
+		if (count > 1)
+			pr_cont("-%u", nr_cpu_ids - 1);
+		pr_cont("\n");
+	}
+}
+
+/* Initialize NODE_DATA for a node on the local memory */
+static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
+{
+	u64 spanned_pages = end_pfn - start_pfn;
+	const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
+	u64 nd_pa;
+	void *nd;
+	int tnid;
+
+	nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+	if (!nd_pa)
+		panic("Cannot allocate %zu bytes for node %d data\n",
+		      nd_size, nid);
+
+	nd = __va(nd_pa);
+
+	/* report and initialize */
+	pr_info("  NODE_DATA [mem %#010Lx-%#010Lx]\n",
+		nd_pa, nd_pa + nd_size - 1);
+	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
+	if (tnid != nid)
+		pr_info("    NODE_DATA(%d) on node %d\n", nid, tnid);
+
+	node_data[nid] = nd;
+	memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
+	NODE_DATA(nid)->node_id = nid;
+	NODE_DATA(nid)->node_start_pfn = start_pfn;
+	NODE_DATA(nid)->node_spanned_pages = spanned_pages;
+}
+
+static void __init find_possible_nodes(void)
+{
+	struct device_node *rtas;
+	const __be32 *domains = NULL;
+	int prop_length, max_nodes;
+	u32 i;
+
+	if (!numa_enabled)
+		return;
+
+	rtas = of_find_node_by_path("/rtas");
+	if (!rtas)
+		return;
+
+	/*
+	 * ibm,current-associativity-domains is a fairly recent property. If
+	 * it doesn't exist, then fallback on ibm,max-associativity-domains.
+	 * Current denotes what the platform can support compared to max
+	 * which denotes what the Hypervisor can support.
+	 *
+	 * If the LPAR is migratable, new nodes might be activated after a LPM,
+	 * so we should consider the max number in that case.
+	 */
+	if (!of_get_property(of_root, "ibm,migratable-partition", NULL))
+		domains = of_get_property(rtas,
+					  "ibm,current-associativity-domains",
+					  &prop_length);
+	if (!domains) {
+		domains = of_get_property(rtas, "ibm,max-associativity-domains",
+					&prop_length);
+		if (!domains)
+			goto out;
+	}
+
+	max_nodes = of_read_number(&domains[primary_domain_index], 1);
+	pr_info("Partition configured for %d NUMA nodes.\n", max_nodes);
+
+	for (i = 0; i < max_nodes; i++) {
+		if (!node_possible(i))
+			node_set(i, node_possible_map);
+	}
+
+	prop_length /= sizeof(int);
+	if (prop_length > primary_domain_index + 2)
+		coregroup_enabled = 1;
+
+out:
+	of_node_put(rtas);
+}
+
+void __init mem_topology_setup(void)
+{
+	int cpu;
+
+	max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
+	min_low_pfn = MEMORY_START >> PAGE_SHIFT;
+
+	/*
+	 * Linux/mm assumes node 0 to be online at boot. However this is not
+	 * true on PowerPC, where node 0 is similar to any other node, it
+	 * could be cpuless, memoryless node. So force node 0 to be offline
+	 * for now. This will prevent cpuless, memoryless node 0 showing up
+	 * unnecessarily as online. If a node has cpus or memory that need
+	 * to be online, then node will anyway be marked online.
+	 */
+	node_set_offline(0);
+
+	if (parse_numa_properties())
+		setup_nonnuma();
+
+	/*
+	 * Modify the set of possible NUMA nodes to reflect information
+	 * available about the set of online nodes, and the set of nodes
+	 * that we expect to make use of for this platform's affinity
+	 * calculations.
+	 */
+	nodes_and(node_possible_map, node_possible_map, node_online_map);
+
+	find_possible_nodes();
+
+	setup_node_to_cpumask_map();
+
+	reset_numa_cpu_lookup_table();
+
+	for_each_possible_cpu(cpu) {
+		/*
+		 * Powerpc with CONFIG_NUMA always used to have a node 0,
+		 * even if it was memoryless or cpuless. For all cpus that
+		 * are possible but not present, cpu_to_node() would point
+		 * to node 0. To remove a cpuless, memoryless dummy node,
+		 * powerpc need to make sure all possible but not present
+		 * cpu_to_node are set to a proper node.
+		 */
+		numa_setup_cpu(cpu);
+	}
+}
+
+void __init initmem_init(void)
+{
+	int nid;
+
+	memblock_dump_all();
+
+	for_each_online_node(nid) {
+		unsigned long start_pfn, end_pfn;
+
+		get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+		setup_node_data(nid, start_pfn, end_pfn);
+	}
+
+	sparse_init();
+
+	/*
+	 * We need the numa_cpu_lookup_table to be accurate for all CPUs,
+	 * even before we online them, so that we can use cpu_to_{node,mem}
+	 * early in boot, cf. smp_prepare_cpus().
+	 * _nocalls() + manual invocation is used because cpuhp is not yet
+	 * initialized for the boot CPU.
+	 */
+	cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare",
+				  ppc_numa_cpu_prepare, ppc_numa_cpu_dead);
+}
+
+static int __init early_numa(char *p)
+{
+	if (!p)
+		return 0;
+
+	if (strstr(p, "off"))
+		numa_enabled = 0;
+
+	p = strstr(p, "fake=");
+	if (p)
+		cmdline = p + strlen("fake=");
+
+	return 0;
+}
+early_param("numa", early_numa);
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+/*
+ * Find the node associated with a hot added memory section for
+ * memory represented in the device tree by the property
+ * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory.
+ */
+static int hot_add_drconf_scn_to_nid(unsigned long scn_addr)
+{
+	struct drmem_lmb *lmb;
+	unsigned long lmb_size;
+	int nid = NUMA_NO_NODE;
+
+	lmb_size = drmem_lmb_size();
+
+	for_each_drmem_lmb(lmb) {
+		/* skip this block if it is reserved or not assigned to
+		 * this partition */
+		if ((lmb->flags & DRCONF_MEM_RESERVED)
+		    || !(lmb->flags & DRCONF_MEM_ASSIGNED))
+			continue;
+
+		if ((scn_addr < lmb->base_addr)
+		    || (scn_addr >= (lmb->base_addr + lmb_size)))
+			continue;
+
+		nid = of_drconf_to_nid_single(lmb);
+		break;
+	}
+
+	return nid;
+}
+
+/*
+ * Find the node associated with a hot added memory section for memory
+ * represented in the device tree as a node (i.e. memory@XXXX) for
+ * each memblock.
+ */
+static int hot_add_node_scn_to_nid(unsigned long scn_addr)
+{
+	struct device_node *memory;
+	int nid = NUMA_NO_NODE;
+
+	for_each_node_by_type(memory, "memory") {
+		int i = 0;
+
+		while (1) {
+			struct resource res;
+
+			if (of_address_to_resource(memory, i++, &res))
+				break;
+
+			if ((scn_addr < res.start) || (scn_addr > res.end))
+				continue;
+
+			nid = of_node_to_nid_single(memory);
+			break;
+		}
+
+		if (nid >= 0)
+			break;
+	}
+
+	of_node_put(memory);
+
+	return nid;
+}
+
+/*
+ * Find the node associated with a hot added memory section.  Section
+ * corresponds to a SPARSEMEM section, not an MEMBLOCK.  It is assumed that
+ * sections are fully contained within a single MEMBLOCK.
+ */
+int hot_add_scn_to_nid(unsigned long scn_addr)
+{
+	struct device_node *memory = NULL;
+	int nid;
+
+	if (!numa_enabled)
+		return first_online_node;
+
+	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+	if (memory) {
+		nid = hot_add_drconf_scn_to_nid(scn_addr);
+		of_node_put(memory);
+	} else {
+		nid = hot_add_node_scn_to_nid(scn_addr);
+	}
+
+	if (nid < 0 || !node_possible(nid))
+		nid = first_online_node;
+
+	return nid;
+}
+
+static u64 hot_add_drconf_memory_max(void)
+{
+	struct device_node *memory = NULL;
+	struct device_node *dn = NULL;
+	const __be64 *lrdr = NULL;
+
+	dn = of_find_node_by_path("/rtas");
+	if (dn) {
+		lrdr = of_get_property(dn, "ibm,lrdr-capacity", NULL);
+		of_node_put(dn);
+		if (lrdr)
+			return be64_to_cpup(lrdr);
+	}
+
+	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+	if (memory) {
+		of_node_put(memory);
+		return drmem_lmb_memory_max();
+	}
+	return 0;
+}
+
+/*
+ * memory_hotplug_max - return max address of memory that may be added
+ *
+ * This is currently only used on systems that support drconfig memory
+ * hotplug.
+ */
+u64 memory_hotplug_max(void)
+{
+        return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM());
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+/* Virtual Processor Home Node (VPHN) support */
+#ifdef CONFIG_PPC_SPLPAR
+static int topology_inited;
+
+/*
+ * Retrieve the new associativity information for a virtual processor's
+ * home node.
+ */
+static long vphn_get_associativity(unsigned long cpu,
+					__be32 *associativity)
+{
+	long rc;
+
+	rc = hcall_vphn(get_hard_smp_processor_id(cpu),
+				VPHN_FLAG_VCPU, associativity);
+
+	switch (rc) {
+	case H_SUCCESS:
+		pr_debug("VPHN hcall succeeded. Reset polling...\n");
+		goto out;
+
+	case H_FUNCTION:
+		pr_err_ratelimited("VPHN unsupported. Disabling polling...\n");
+		break;
+	case H_HARDWARE:
+		pr_err_ratelimited("hcall_vphn() experienced a hardware fault "
+			"preventing VPHN. Disabling polling...\n");
+		break;
+	case H_PARAMETER:
+		pr_err_ratelimited("hcall_vphn() was passed an invalid parameter. "
+			"Disabling polling...\n");
+		break;
+	default:
+		pr_err_ratelimited("hcall_vphn() returned %ld. Disabling polling...\n"
+			, rc);
+		break;
+	}
+out:
+	return rc;
+}
+
+void find_and_update_cpu_nid(int cpu)
+{
+	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
+	int new_nid;
+
+	/* Use associativity from first thread for all siblings */
+	if (vphn_get_associativity(cpu, associativity))
+		return;
+
+	/* Do not have previous associativity, so find it now. */
+	new_nid = associativity_to_nid(associativity);
+
+	if (new_nid < 0 || !node_possible(new_nid))
+		new_nid = first_online_node;
+	else
+		// Associate node <-> cpu, so cpu_up() calls
+		// try_online_node() on the right node.
+		set_cpu_numa_node(cpu, new_nid);
+
+	pr_debug("%s:%d cpu %d nid %d\n", __func__, __LINE__, cpu, new_nid);
+}
+
+int cpu_to_coregroup_id(int cpu)
+{
+	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
+	int index;
+
+	if (cpu < 0 || cpu > nr_cpu_ids)
+		return -1;
+
+	if (!coregroup_enabled)
+		goto out;
+
+	if (!firmware_has_feature(FW_FEATURE_VPHN))
+		goto out;
+
+	if (vphn_get_associativity(cpu, associativity))
+		goto out;
+
+	index = of_read_number(associativity, 1);
+	if (index > primary_domain_index + 1)
+		return of_read_number(&associativity[index - 1], 1);
+
+out:
+	return cpu_to_core_id(cpu);
+}
+
+static int topology_update_init(void)
+{
+	topology_inited = 1;
+	return 0;
+}
+device_initcall(topology_update_init);
+#endif /* CONFIG_PPC_SPLPAR */
diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c
new file mode 100644
index 0000000000..6163e484bc
--- /dev/null
+++ b/arch/powerpc/mm/pageattr.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * MMU-generic set_memory implementation for powerpc
+ *
+ * Copyright 2019-2021, IBM Corporation.
+ */
+
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/set_memory.h>
+
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+
+static pte_basic_t pte_update_delta(pte_t *ptep, unsigned long addr,
+				    unsigned long old, unsigned long new)
+{
+	return pte_update(&init_mm, addr, ptep, old & ~new, new & ~old, 0);
+}
+
+/*
+ * Updates the attributes of a page atomically.
+ *
+ * This sequence is safe against concurrent updates, and also allows updating the
+ * attributes of a page currently being executed or accessed.
+ */
+static int change_page_attr(pte_t *ptep, unsigned long addr, void *data)
+{
+	long action = (long)data;
+
+	addr &= PAGE_MASK;
+	/* modify the PTE bits as desired */
+	switch (action) {
+	case SET_MEMORY_RO:
+		/* Don't clear DIRTY bit */
+		pte_update_delta(ptep, addr, _PAGE_KERNEL_RW & ~_PAGE_DIRTY, _PAGE_KERNEL_RO);
+		break;
+	case SET_MEMORY_RW:
+		pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_RW);
+		break;
+	case SET_MEMORY_NX:
+		pte_update_delta(ptep, addr, _PAGE_KERNEL_ROX, _PAGE_KERNEL_RO);
+		break;
+	case SET_MEMORY_X:
+		pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_ROX);
+		break;
+	case SET_MEMORY_NP:
+		pte_update(&init_mm, addr, ptep, _PAGE_PRESENT, 0, 0);
+		break;
+	case SET_MEMORY_P:
+		pte_update(&init_mm, addr, ptep, 0, _PAGE_PRESENT, 0);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+
+	/* See ptesync comment in radix__set_pte_at() */
+	if (radix_enabled())
+		asm volatile("ptesync": : :"memory");
+
+	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+	return 0;
+}
+
+int change_memory_attr(unsigned long addr, int numpages, long action)
+{
+	unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
+	unsigned long size = numpages * PAGE_SIZE;
+
+	if (!numpages)
+		return 0;
+
+	if (WARN_ON_ONCE(is_vmalloc_or_module_addr((void *)addr) &&
+			 is_vm_area_hugepages((void *)addr)))
+		return -EINVAL;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * On hash, the linear mapping is not in the Linux page table so
+	 * apply_to_existing_page_range() will have no effect. If in the future
+	 * the set_memory_* functions are used on the linear map this will need
+	 * to be updated.
+	 */
+	if (!radix_enabled()) {
+		int region = get_region_id(addr);
+
+		if (WARN_ON_ONCE(region != VMALLOC_REGION_ID && region != IO_REGION_ID))
+			return -EINVAL;
+	}
+#endif
+
+	return apply_to_existing_page_range(&init_mm, start, size,
+					    change_page_attr, (void *)action);
+}
diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
new file mode 100644
index 0000000000..8c31802f97
--- /dev/null
+++ b/arch/powerpc/mm/pgtable-frag.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ *  Handling Page Tables through page fragments
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <linux/hugetlb.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+void pte_frag_destroy(void *pte_frag)
+{
+	int count;
+	struct ptdesc *ptdesc;
+
+	ptdesc = virt_to_ptdesc(pte_frag);
+	/* drop all the pending references */
+	count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
+	/* We allow PTE_FRAG_NR fragments from a PTE page */
+	if (atomic_sub_and_test(PTE_FRAG_NR - count, &ptdesc->pt_frag_refcount)) {
+		pagetable_pte_dtor(ptdesc);
+		pagetable_free(ptdesc);
+	}
+}
+
+static pte_t *get_pte_from_cache(struct mm_struct *mm)
+{
+	void *pte_frag, *ret;
+
+	if (PTE_FRAG_NR == 1)
+		return NULL;
+
+	spin_lock(&mm->page_table_lock);
+	ret = pte_frag_get(&mm->context);
+	if (ret) {
+		pte_frag = ret + PTE_FRAG_SIZE;
+		/*
+		 * If we have taken up all the fragments mark PTE page NULL
+		 */
+		if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
+			pte_frag = NULL;
+		pte_frag_set(&mm->context, pte_frag);
+	}
+	spin_unlock(&mm->page_table_lock);
+	return (pte_t *)ret;
+}
+
+static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
+{
+	void *ret = NULL;
+	struct ptdesc *ptdesc;
+
+	if (!kernel) {
+		ptdesc = pagetable_alloc(PGALLOC_GFP | __GFP_ACCOUNT, 0);
+		if (!ptdesc)
+			return NULL;
+		if (!pagetable_pte_ctor(ptdesc)) {
+			pagetable_free(ptdesc);
+			return NULL;
+		}
+	} else {
+		ptdesc = pagetable_alloc(PGALLOC_GFP, 0);
+		if (!ptdesc)
+			return NULL;
+	}
+
+	atomic_set(&ptdesc->pt_frag_refcount, 1);
+
+	ret = ptdesc_address(ptdesc);
+	/*
+	 * if we support only one fragment just return the
+	 * allocated page.
+	 */
+	if (PTE_FRAG_NR == 1)
+		return ret;
+	spin_lock(&mm->page_table_lock);
+	/*
+	 * If we find ptdesc_page set, we return
+	 * the allocated page with single fragment
+	 * count.
+	 */
+	if (likely(!pte_frag_get(&mm->context))) {
+		atomic_set(&ptdesc->pt_frag_refcount, PTE_FRAG_NR);
+		pte_frag_set(&mm->context, ret + PTE_FRAG_SIZE);
+	}
+	spin_unlock(&mm->page_table_lock);
+
+	return (pte_t *)ret;
+}
+
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
+{
+	pte_t *pte;
+
+	pte = get_pte_from_cache(mm);
+	if (pte)
+		return pte;
+
+	return __alloc_for_ptecache(mm, kernel);
+}
+
+static void pte_free_now(struct rcu_head *head)
+{
+	struct ptdesc *ptdesc;
+
+	ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
+	pagetable_pte_dtor(ptdesc);
+	pagetable_free(ptdesc);
+}
+
+void pte_fragment_free(unsigned long *table, int kernel)
+{
+	struct ptdesc *ptdesc = virt_to_ptdesc(table);
+
+	if (pagetable_is_reserved(ptdesc))
+		return free_reserved_ptdesc(ptdesc);
+
+	BUG_ON(atomic_read(&ptdesc->pt_frag_refcount) <= 0);
+	if (atomic_dec_and_test(&ptdesc->pt_frag_refcount)) {
+		if (kernel)
+			pagetable_free(ptdesc);
+		else if (folio_test_clear_active(ptdesc_folio(ptdesc)))
+			call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
+		else
+			pte_free_now(&ptdesc->pt_rcu_head);
+	}
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
+{
+	struct page *page;
+
+	page = virt_to_page(pgtable);
+	SetPageActive(page);
+	pte_fragment_free((unsigned long *)pgtable, 0);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
new file mode 100644
index 0000000000..4d69bfb9bc
--- /dev/null
+++ b/arch/powerpc/mm/pgtable.c
@@ -0,0 +1,524 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains common routines for dealing with free of page tables
+ * Along with common page table handling code
+ *
+ *  Derived from arch/powerpc/mm/tlb_64.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Dave Engebretsen <engebret@us.ibm.com>
+ *      Rework for PPC64 port.
+ */
+
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <linux/hugetlb.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#include <asm/hugetlb.h>
+#include <asm/pte-walk.h>
+
+#ifdef CONFIG_PPC64
+#define PGD_ALIGN (sizeof(pgd_t) * MAX_PTRS_PER_PGD)
+#else
+#define PGD_ALIGN PAGE_SIZE
+#endif
+
+pgd_t swapper_pg_dir[MAX_PTRS_PER_PGD] __section(".bss..page_aligned") __aligned(PGD_ALIGN);
+
+static inline int is_exec_fault(void)
+{
+	return current->thread.regs && TRAP(current->thread.regs) == 0x400;
+}
+
+/* We only try to do i/d cache coherency on stuff that looks like
+ * reasonably "normal" PTEs. We currently require a PTE to be present
+ * and we avoid _PAGE_SPECIAL and cache inhibited pte. We also only do that
+ * on userspace PTEs
+ */
+static inline int pte_looks_normal(pte_t pte)
+{
+
+	if (pte_present(pte) && !pte_special(pte)) {
+		if (pte_ci(pte))
+			return 0;
+		if (pte_user(pte))
+			return 1;
+	}
+	return 0;
+}
+
+static struct folio *maybe_pte_to_folio(pte_t pte)
+{
+	unsigned long pfn = pte_pfn(pte);
+	struct page *page;
+
+	if (unlikely(!pfn_valid(pfn)))
+		return NULL;
+	page = pfn_to_page(pfn);
+	if (PageReserved(page))
+		return NULL;
+	return page_folio(page);
+}
+
+#ifdef CONFIG_PPC_BOOK3S
+
+/* Server-style MMU handles coherency when hashing if HW exec permission
+ * is supposed per page (currently 64-bit only). If not, then, we always
+ * flush the cache for valid PTEs in set_pte. Embedded CPU without HW exec
+ * support falls into the same category.
+ */
+
+static pte_t set_pte_filter_hash(pte_t pte)
+{
+	pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
+	if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
+				       cpu_has_feature(CPU_FTR_NOEXECUTE))) {
+		struct folio *folio = maybe_pte_to_folio(pte);
+		if (!folio)
+			return pte;
+		if (!test_bit(PG_dcache_clean, &folio->flags)) {
+			flush_dcache_icache_folio(folio);
+			set_bit(PG_dcache_clean, &folio->flags);
+		}
+	}
+	return pte;
+}
+
+#else /* CONFIG_PPC_BOOK3S */
+
+static pte_t set_pte_filter_hash(pte_t pte) { return pte; }
+
+#endif /* CONFIG_PPC_BOOK3S */
+
+/* Embedded type MMU with HW exec support. This is a bit more complicated
+ * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so
+ * instead we "filter out" the exec permission for non clean pages.
+ *
+ * This is also called once for the folio. So only work with folio->flags here.
+ */
+static inline pte_t set_pte_filter(pte_t pte)
+{
+	struct folio *folio;
+
+	if (radix_enabled())
+		return pte;
+
+	if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
+		return set_pte_filter_hash(pte);
+
+	/* No exec permission in the first place, move on */
+	if (!pte_exec(pte) || !pte_looks_normal(pte))
+		return pte;
+
+	/* If you set _PAGE_EXEC on weird pages you're on your own */
+	folio = maybe_pte_to_folio(pte);
+	if (unlikely(!folio))
+		return pte;
+
+	/* If the page clean, we move on */
+	if (test_bit(PG_dcache_clean, &folio->flags))
+		return pte;
+
+	/* If it's an exec fault, we flush the cache and make it clean */
+	if (is_exec_fault()) {
+		flush_dcache_icache_folio(folio);
+		set_bit(PG_dcache_clean, &folio->flags);
+		return pte;
+	}
+
+	/* Else, we filter out _PAGE_EXEC */
+	return pte_exprotect(pte);
+}
+
+static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
+				     int dirty)
+{
+	struct folio *folio;
+
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+		return pte;
+
+	if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
+		return pte;
+
+	/* So here, we only care about exec faults, as we use them
+	 * to recover lost _PAGE_EXEC and perform I$/D$ coherency
+	 * if necessary. Also if _PAGE_EXEC is already set, same deal,
+	 * we just bail out
+	 */
+	if (dirty || pte_exec(pte) || !is_exec_fault())
+		return pte;
+
+#ifdef CONFIG_DEBUG_VM
+	/* So this is an exec fault, _PAGE_EXEC is not set. If it was
+	 * an error we would have bailed out earlier in do_page_fault()
+	 * but let's make sure of it
+	 */
+	if (WARN_ON(!(vma->vm_flags & VM_EXEC)))
+		return pte;
+#endif /* CONFIG_DEBUG_VM */
+
+	/* If you set _PAGE_EXEC on weird pages you're on your own */
+	folio = maybe_pte_to_folio(pte);
+	if (unlikely(!folio))
+		goto bail;
+
+	/* If the page is already clean, we move on */
+	if (test_bit(PG_dcache_clean, &folio->flags))
+		goto bail;
+
+	/* Clean the page and set PG_dcache_clean */
+	flush_dcache_icache_folio(folio);
+	set_bit(PG_dcache_clean, &folio->flags);
+
+ bail:
+	return pte_mkexec(pte);
+}
+
+/*
+ * set_pte stores a linux PTE into the linux page table.
+ */
+void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+		pte_t pte, unsigned int nr)
+{
+
+	/* Note: mm->context.id might not yet have been assigned as
+	 * this context might not have been activated yet when this
+	 * is called. Filter the pte value and use the filtered value
+	 * to setup all the ptes in the range.
+	 */
+	pte = set_pte_filter(pte);
+
+	/*
+	 * We don't need to call arch_enter/leave_lazy_mmu_mode()
+	 * because we expect set_ptes to be only be used on not present
+	 * and not hw_valid ptes. Hence there is no translation cache flush
+	 * involved that need to be batched.
+	 */
+	for (;;) {
+
+		/*
+		 * Make sure hardware valid bit is not set. We don't do
+		 * tlb flush for this update.
+		 */
+		VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
+
+		/* Perform the setting of the PTE */
+		__set_pte_at(mm, addr, ptep, pte, 0);
+		if (--nr == 0)
+			break;
+		ptep++;
+		addr += PAGE_SIZE;
+		/*
+		 * increment the pfn.
+		 */
+		pte = pfn_pte(pte_pfn(pte) + 1, pte_pgprot((pte)));
+	}
+}
+
+void unmap_kernel_page(unsigned long va)
+{
+	pmd_t *pmdp = pmd_off_k(va);
+	pte_t *ptep = pte_offset_kernel(pmdp, va);
+
+	pte_clear(&init_mm, va, ptep);
+	flush_tlb_kernel_range(va, va + PAGE_SIZE);
+}
+
+/*
+ * This is called when relaxing access to a PTE. It's also called in the page
+ * fault path when we don't hit any of the major fault cases, ie, a minor
+ * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
+ * handled those two for us, we additionally deal with missing execute
+ * permission here on some processors
+ */
+int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+			  pte_t *ptep, pte_t entry, int dirty)
+{
+	int changed;
+	entry = set_access_flags_filter(entry, vma, dirty);
+	changed = !pte_same(*(ptep), entry);
+	if (changed) {
+		assert_pte_locked(vma->vm_mm, address);
+		__ptep_set_access_flags(vma, ptep, entry,
+					address, mmu_virtual_psize);
+	}
+	return changed;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+			       unsigned long addr, pte_t *ptep,
+			       pte_t pte, int dirty)
+{
+#ifdef HUGETLB_NEED_PRELOAD
+	/*
+	 * The "return 1" forces a call of update_mmu_cache, which will write a
+	 * TLB entry.  Without this, platforms that don't do a write of the TLB
+	 * entry in the TLB miss handler asm will fault ad infinitum.
+	 */
+	ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+	return 1;
+#else
+	int changed, psize;
+
+	pte = set_access_flags_filter(pte, vma, dirty);
+	changed = !pte_same(*(ptep), pte);
+	if (changed) {
+
+#ifdef CONFIG_PPC_BOOK3S_64
+		struct hstate *h = hstate_vma(vma);
+
+		psize = hstate_get_psize(h);
+#ifdef CONFIG_DEBUG_VM
+		assert_spin_locked(huge_pte_lockptr(h, vma->vm_mm, ptep));
+#endif
+
+#else
+		/*
+		 * Not used on non book3s64 platforms.
+		 * 8xx compares it with mmu_virtual_psize to
+		 * know if it is a huge page or not.
+		 */
+		psize = MMU_PAGE_COUNT;
+#endif
+		__ptep_set_access_flags(vma, ptep, pte, addr, psize);
+	}
+	return changed;
+#endif
+}
+
+#if defined(CONFIG_PPC_8xx)
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+		     pte_t pte, unsigned long sz)
+{
+	pmd_t *pmd = pmd_off(mm, addr);
+	pte_basic_t val;
+	pte_basic_t *entry = (pte_basic_t *)ptep;
+	int num, i;
+
+	/*
+	 * Make sure hardware valid bit is not set. We don't do
+	 * tlb flush for this update.
+	 */
+	VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
+
+	pte = set_pte_filter(pte);
+
+	val = pte_val(pte);
+
+	num = number_of_cells_per_pte(pmd, val, 1);
+
+	for (i = 0; i < num; i++, entry++, val += SZ_4K)
+		*entry = val;
+}
+#endif
+#endif /* CONFIG_HUGETLB_PAGE */
+
+#ifdef CONFIG_DEBUG_VM
+void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	if (mm == &init_mm)
+		return;
+	pgd = mm->pgd + pgd_index(addr);
+	BUG_ON(pgd_none(*pgd));
+	p4d = p4d_offset(pgd, addr);
+	BUG_ON(p4d_none(*p4d));
+	pud = pud_offset(p4d, addr);
+	BUG_ON(pud_none(*pud));
+	pmd = pmd_offset(pud, addr);
+	/*
+	 * khugepaged to collapse normal pages to hugepage, first set
+	 * pmd to none to force page fault/gup to take mmap_lock. After
+	 * pmd is set to none, we do a pte_clear which does this assertion
+	 * so if we find pmd none, return.
+	 */
+	if (pmd_none(*pmd))
+		return;
+	pte = pte_offset_map_nolock(mm, pmd, addr, &ptl);
+	BUG_ON(!pte);
+	assert_spin_locked(ptl);
+	pte_unmap(pte);
+}
+#endif /* CONFIG_DEBUG_VM */
+
+unsigned long vmalloc_to_phys(void *va)
+{
+	unsigned long pfn = vmalloc_to_pfn(va);
+
+	BUG_ON(!pfn);
+	return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va);
+}
+EXPORT_SYMBOL_GPL(vmalloc_to_phys);
+
+/*
+ * We have 4 cases for pgds and pmds:
+ * (1) invalid (all zeroes)
+ * (2) pointer to next table, as normal; bottom 6 bits == 0
+ * (3) leaf pte for huge page _PAGE_PTE set
+ * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table
+ *
+ * So long as we atomically load page table pointers we are safe against teardown,
+ * we can follow the address down to the page and take a ref on it.
+ * This function need to be called with interrupts disabled. We use this variant
+ * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED
+ */
+pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
+			bool *is_thp, unsigned *hpage_shift)
+{
+	pgd_t *pgdp;
+	p4d_t p4d, *p4dp;
+	pud_t pud, *pudp;
+	pmd_t pmd, *pmdp;
+	pte_t *ret_pte;
+	hugepd_t *hpdp = NULL;
+	unsigned pdshift;
+
+	if (hpage_shift)
+		*hpage_shift = 0;
+
+	if (is_thp)
+		*is_thp = false;
+
+	/*
+	 * Always operate on the local stack value. This make sure the
+	 * value don't get updated by a parallel THP split/collapse,
+	 * page fault or a page unmap. The return pte_t * is still not
+	 * stable. So should be checked there for above conditions.
+	 * Top level is an exception because it is folded into p4d.
+	 */
+	pgdp = pgdir + pgd_index(ea);
+	p4dp = p4d_offset(pgdp, ea);
+	p4d  = READ_ONCE(*p4dp);
+	pdshift = P4D_SHIFT;
+
+	if (p4d_none(p4d))
+		return NULL;
+
+	if (p4d_is_leaf(p4d)) {
+		ret_pte = (pte_t *)p4dp;
+		goto out;
+	}
+
+	if (is_hugepd(__hugepd(p4d_val(p4d)))) {
+		hpdp = (hugepd_t *)&p4d;
+		goto out_huge;
+	}
+
+	/*
+	 * Even if we end up with an unmap, the pgtable will not
+	 * be freed, because we do an rcu free and here we are
+	 * irq disabled
+	 */
+	pdshift = PUD_SHIFT;
+	pudp = pud_offset(&p4d, ea);
+	pud  = READ_ONCE(*pudp);
+
+	if (pud_none(pud))
+		return NULL;
+
+	if (pud_is_leaf(pud)) {
+		ret_pte = (pte_t *)pudp;
+		goto out;
+	}
+
+	if (is_hugepd(__hugepd(pud_val(pud)))) {
+		hpdp = (hugepd_t *)&pud;
+		goto out_huge;
+	}
+
+	pdshift = PMD_SHIFT;
+	pmdp = pmd_offset(&pud, ea);
+	pmd  = READ_ONCE(*pmdp);
+
+	/*
+	 * A hugepage collapse is captured by this condition, see
+	 * pmdp_collapse_flush.
+	 */
+	if (pmd_none(pmd))
+		return NULL;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * A hugepage split is captured by this condition, see
+	 * pmdp_invalidate.
+	 *
+	 * Huge page modification can be caught here too.
+	 */
+	if (pmd_is_serializing(pmd))
+		return NULL;
+#endif
+
+	if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) {
+		if (is_thp)
+			*is_thp = true;
+		ret_pte = (pte_t *)pmdp;
+		goto out;
+	}
+
+	if (pmd_is_leaf(pmd)) {
+		ret_pte = (pte_t *)pmdp;
+		goto out;
+	}
+
+	if (is_hugepd(__hugepd(pmd_val(pmd)))) {
+		hpdp = (hugepd_t *)&pmd;
+		goto out_huge;
+	}
+
+	return pte_offset_kernel(&pmd, ea);
+
+out_huge:
+	if (!hpdp)
+		return NULL;
+
+	ret_pte = hugepte_offset(*hpdp, ea, pdshift);
+	pdshift = hugepd_shift(*hpdp);
+out:
+	if (hpage_shift)
+		*hpage_shift = pdshift;
+	return ret_pte;
+}
+EXPORT_SYMBOL_GPL(__find_linux_pte);
+
+/* Note due to the way vm flags are laid out, the bits are XWR */
+const pgprot_t protection_map[16] = {
+	[VM_NONE]					= PAGE_NONE,
+	[VM_READ]					= PAGE_READONLY,
+	[VM_WRITE]					= PAGE_COPY,
+	[VM_WRITE | VM_READ]				= PAGE_COPY,
+	[VM_EXEC]					= PAGE_READONLY_X,
+	[VM_EXEC | VM_READ]				= PAGE_READONLY_X,
+	[VM_EXEC | VM_WRITE]				= PAGE_COPY_X,
+	[VM_EXEC | VM_WRITE | VM_READ]			= PAGE_COPY_X,
+	[VM_SHARED]					= PAGE_NONE,
+	[VM_SHARED | VM_READ]				= PAGE_READONLY,
+	[VM_SHARED | VM_WRITE]				= PAGE_SHARED,
+	[VM_SHARED | VM_WRITE | VM_READ]		= PAGE_SHARED,
+	[VM_SHARED | VM_EXEC]				= PAGE_READONLY_X,
+	[VM_SHARED | VM_EXEC | VM_READ]			= PAGE_READONLY_X,
+	[VM_SHARED | VM_EXEC | VM_WRITE]		= PAGE_SHARED_X,
+	[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ]	= PAGE_SHARED_X
+};
+
+#ifndef CONFIG_PPC_BOOK3S_64
+DECLARE_VM_GET_PAGE_PROT
+#endif
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
new file mode 100644
index 0000000000..5c02fd08d6
--- /dev/null
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains the routines setting up the linux page tables.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+#include <linux/set_memory.h>
+
+#include <asm/pgalloc.h>
+#include <asm/fixmap.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/early_ioremap.h>
+
+#include <mm/mmu_decl.h>
+
+static u8 early_fixmap_pagetable[FIXMAP_PTE_SIZE] __page_aligned_data;
+
+notrace void __init early_ioremap_init(void)
+{
+	unsigned long addr = ALIGN_DOWN(FIXADDR_START, PGDIR_SIZE);
+	pte_t *ptep = (pte_t *)early_fixmap_pagetable;
+	pmd_t *pmdp = pmd_off_k(addr);
+
+	for (; (s32)(FIXADDR_TOP - addr) > 0;
+	     addr += PGDIR_SIZE, ptep += PTRS_PER_PTE, pmdp++)
+		pmd_populate_kernel(&init_mm, pmdp, ptep);
+
+	early_ioremap_setup();
+}
+
+static void __init *early_alloc_pgtable(unsigned long size)
+{
+	void *ptr = memblock_alloc(size, size);
+
+	if (!ptr)
+		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+		      __func__, size, size);
+
+	return ptr;
+}
+
+pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va)
+{
+	if (pmd_none(*pmdp)) {
+		pte_t *ptep = early_alloc_pgtable(PTE_FRAG_SIZE);
+
+		pmd_populate_kernel(&init_mm, pmdp, ptep);
+	}
+	return pte_offset_kernel(pmdp, va);
+}
+
+
+int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot)
+{
+	pmd_t *pd;
+	pte_t *pg;
+	int err = -ENOMEM;
+
+	/* Use upper 10 bits of VA to index the first level map */
+	pd = pmd_off_k(va);
+	/* Use middle 10 bits of VA to index the second-level map */
+	if (likely(slab_is_available()))
+		pg = pte_alloc_kernel(pd, va);
+	else
+		pg = early_pte_alloc_kernel(pd, va);
+	if (pg) {
+		err = 0;
+		/* The PTE should never be already set nor present in the
+		 * hash table
+		 */
+		BUG_ON((pte_present(*pg) | pte_hashpte(*pg)) && pgprot_val(prot));
+		set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, prot));
+	}
+	smp_wmb();
+	return err;
+}
+
+/*
+ * Map in a chunk of physical memory starting at start.
+ */
+static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
+{
+	unsigned long v, s;
+	phys_addr_t p;
+	bool ktext;
+
+	s = offset;
+	v = PAGE_OFFSET + s;
+	p = memstart_addr + s;
+	for (; s < top; s += PAGE_SIZE) {
+		ktext = core_kernel_text(v);
+		map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL);
+		v += PAGE_SIZE;
+		p += PAGE_SIZE;
+	}
+}
+
+void __init mapin_ram(void)
+{
+	phys_addr_t base, end;
+	u64 i;
+
+	for_each_mem_range(i, &base, &end) {
+		phys_addr_t top = min(end, total_lowmem);
+
+		if (base >= top)
+			continue;
+		base = mmu_mapin_ram(base, top);
+		__mapin_ram_chunk(base, top);
+	}
+}
+
+void mark_initmem_nx(void)
+{
+	unsigned long numpages = PFN_UP((unsigned long)_einittext) -
+				 PFN_DOWN((unsigned long)_sinittext);
+
+	mmu_mark_initmem_nx();
+
+	if (!v_block_mapped((unsigned long)_sinittext)) {
+		set_memory_nx((unsigned long)_sinittext, numpages);
+		set_memory_rw((unsigned long)_sinittext, numpages);
+	}
+}
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void mark_rodata_ro(void)
+{
+	unsigned long numpages;
+
+	if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX) && mmu_has_feature(MMU_FTR_HPTE_TABLE))
+		pr_warn("This platform has HASH MMU, STRICT_MODULE_RWX won't work\n");
+
+	if (v_block_mapped((unsigned long)_stext + 1)) {
+		mmu_mark_rodata_ro();
+		ptdump_check_wx();
+		return;
+	}
+
+	/*
+	 * mark text and rodata as read only. __end_rodata is set by
+	 * powerpc's linker script and includes tables and data
+	 * requiring relocation which are not put in RO_DATA.
+	 */
+	numpages = PFN_UP((unsigned long)__end_rodata) -
+		   PFN_DOWN((unsigned long)_stext);
+
+	set_memory_ro((unsigned long)_stext, numpages);
+
+	// mark_initmem_nx() should have already run by now
+	ptdump_check_wx();
+}
+#endif
+
+#if defined(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) && defined(CONFIG_DEBUG_PAGEALLOC)
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	unsigned long addr = (unsigned long)page_address(page);
+
+	if (PageHighMem(page))
+		return;
+
+	if (enable)
+		set_memory_p(addr, numpages);
+	else
+		set_memory_np(addr, numpages);
+}
+#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
new file mode 100644
index 0000000000..5ac1fd3034
--- /dev/null
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  This file contains pgtable related functions for 64-bit machines.
+ *
+ *  Derived from arch/ppc64/mm/init.c
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Dave Engebretsen <engebret@us.ibm.com>
+ *      Rework for PPC64 port.
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/hugetlb.h>
+
+#include <asm/page.h>
+#include <asm/mmu_context.h>
+#include <asm/mmu.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/tlb.h>
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/sections.h>
+#include <asm/firmware.h>
+#include <asm/dma.h>
+
+#include <mm/mmu_decl.h>
+
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * partition table and process table for ISA 3.0
+ */
+struct prtb_entry *process_tb;
+struct patb_entry *partition_tb;
+/*
+ * page table size
+ */
+unsigned long __pte_index_size;
+EXPORT_SYMBOL(__pte_index_size);
+unsigned long __pmd_index_size;
+EXPORT_SYMBOL(__pmd_index_size);
+unsigned long __pud_index_size;
+EXPORT_SYMBOL(__pud_index_size);
+unsigned long __pgd_index_size;
+EXPORT_SYMBOL(__pgd_index_size);
+unsigned long __pud_cache_index;
+EXPORT_SYMBOL(__pud_cache_index);
+unsigned long __pte_table_size;
+EXPORT_SYMBOL(__pte_table_size);
+unsigned long __pmd_table_size;
+EXPORT_SYMBOL(__pmd_table_size);
+unsigned long __pud_table_size;
+EXPORT_SYMBOL(__pud_table_size);
+unsigned long __pgd_table_size;
+EXPORT_SYMBOL(__pgd_table_size);
+unsigned long __pmd_val_bits;
+EXPORT_SYMBOL(__pmd_val_bits);
+unsigned long __pud_val_bits;
+EXPORT_SYMBOL(__pud_val_bits);
+unsigned long __pgd_val_bits;
+EXPORT_SYMBOL(__pgd_val_bits);
+unsigned long __kernel_virt_start;
+EXPORT_SYMBOL(__kernel_virt_start);
+unsigned long __vmalloc_start;
+EXPORT_SYMBOL(__vmalloc_start);
+unsigned long __vmalloc_end;
+EXPORT_SYMBOL(__vmalloc_end);
+unsigned long __kernel_io_start;
+EXPORT_SYMBOL(__kernel_io_start);
+unsigned long __kernel_io_end;
+struct page *vmemmap;
+EXPORT_SYMBOL(vmemmap);
+unsigned long __pte_frag_nr;
+EXPORT_SYMBOL(__pte_frag_nr);
+unsigned long __pte_frag_size_shift;
+EXPORT_SYMBOL(__pte_frag_size_shift);
+#endif
+
+#ifndef __PAGETABLE_PUD_FOLDED
+/* 4 level page table */
+struct page *p4d_page(p4d_t p4d)
+{
+	if (p4d_is_leaf(p4d)) {
+		if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
+			VM_WARN_ON(!p4d_huge(p4d));
+		return pte_page(p4d_pte(p4d));
+	}
+	return virt_to_page(p4d_pgtable(p4d));
+}
+#endif
+
+struct page *pud_page(pud_t pud)
+{
+	if (pud_is_leaf(pud)) {
+		if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
+			VM_WARN_ON(!pud_huge(pud));
+		return pte_page(pud_pte(pud));
+	}
+	return virt_to_page(pud_pgtable(pud));
+}
+
+/*
+ * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags
+ * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
+ */
+struct page *pmd_page(pmd_t pmd)
+{
+	if (pmd_is_leaf(pmd)) {
+		/*
+		 * vmalloc_to_page may be called on any vmap address (not only
+		 * vmalloc), and it uses pmd_page() etc., when huge vmap is
+		 * enabled so these checks can't be used.
+		 */
+		if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
+			VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd)));
+		return pte_page(pmd_pte(pmd));
+	}
+	return virt_to_page(pmd_page_vaddr(pmd));
+}
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void mark_rodata_ro(void)
+{
+	if (!mmu_has_feature(MMU_FTR_KERNEL_RO)) {
+		pr_warn("Warning: Unable to mark rodata read only on this CPU.\n");
+		return;
+	}
+
+	if (radix_enabled())
+		radix__mark_rodata_ro();
+	else
+		hash__mark_rodata_ro();
+
+	// mark_initmem_nx() should have already run by now
+	ptdump_check_wx();
+}
+
+void mark_initmem_nx(void)
+{
+	if (radix_enabled())
+		radix__mark_initmem_nx();
+	else
+		hash__mark_initmem_nx();
+}
+#endif
diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c
new file mode 100644
index 0000000000..fac932eb8f
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/8xx.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * From split of dump_linuxpagetables.c
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/pgtable.h>
+
+#include "ptdump.h"
+
+static const struct flag_info flag_array[] = {
+	{
+#ifdef CONFIG_PPC_16K_PAGES
+		.mask	= _PAGE_HUGE,
+		.val	= _PAGE_HUGE,
+#else
+		.mask	= _PAGE_SPS,
+		.val	= _PAGE_SPS,
+#endif
+		.set	= "huge",
+		.clear	= "    ",
+	}, {
+		.mask	= _PAGE_SH,
+		.val	= 0,
+		.set	= "user",
+		.clear	= "    ",
+	}, {
+		.mask	= _PAGE_RO | _PAGE_NA,
+		.val	= 0,
+		.set	= "rw",
+	}, {
+		.mask	= _PAGE_RO | _PAGE_NA,
+		.val	= _PAGE_RO,
+		.set	= "r ",
+	}, {
+		.mask	= _PAGE_RO | _PAGE_NA,
+		.val	= _PAGE_NA,
+		.set	= "  ",
+	}, {
+		.mask	= _PAGE_EXEC,
+		.val	= _PAGE_EXEC,
+		.set	= " X ",
+		.clear	= "   ",
+	}, {
+		.mask	= _PAGE_PRESENT,
+		.val	= _PAGE_PRESENT,
+		.set	= "present",
+		.clear	= "       ",
+	}, {
+		.mask	= _PAGE_GUARDED,
+		.val	= _PAGE_GUARDED,
+		.set	= "guarded",
+		.clear	= "       ",
+	}, {
+		.mask	= _PAGE_DIRTY,
+		.val	= _PAGE_DIRTY,
+		.set	= "dirty",
+		.clear	= "     ",
+	}, {
+		.mask	= _PAGE_ACCESSED,
+		.val	= _PAGE_ACCESSED,
+		.set	= "accessed",
+		.clear	= "        ",
+	}, {
+		.mask	= _PAGE_NO_CACHE,
+		.val	= _PAGE_NO_CACHE,
+		.set	= "no cache",
+		.clear	= "        ",
+	}, {
+		.mask	= _PAGE_SPECIAL,
+		.val	= _PAGE_SPECIAL,
+		.set	= "special",
+	}
+};
+
+struct pgtable_level pg_level[5] = {
+	{ /* pgd */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* p4d */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* pud */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* pmd */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* pte */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	},
+};
diff --git a/arch/powerpc/mm/ptdump/Makefile b/arch/powerpc/mm/ptdump/Makefile
new file mode 100644
index 0000000000..dc896d2874
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y	+= ptdump.o
+
+obj-$(CONFIG_4xx)		+= shared.o
+obj-$(CONFIG_PPC_8xx)		+= 8xx.o
+obj-$(CONFIG_PPC_E500)		+= shared.o
+obj-$(CONFIG_PPC_BOOK3S_32)	+= shared.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= book3s64.o
+
+ifdef CONFIG_PTDUMP_DEBUGFS
+obj-$(CONFIG_PPC_BOOK3S_32)	+= bats.o segment_regs.o
+obj-$(CONFIG_PPC_64S_HASH_MMU)	+= hashpagetable.o
+endif
diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c
new file mode 100644
index 0000000000..820c119013
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/bats.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2018, Christophe Leroy CS S.I.
+ * <christophe.leroy@c-s.fr>
+ *
+ * This dumps the content of BATS
+ */
+
+#include <linux/pgtable.h>
+#include <linux/debugfs.h>
+#include <asm/cpu_has_feature.h>
+
+#include "ptdump.h"
+
+static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool is_d)
+{
+	u32 bepi = upper & 0xfffe0000;
+	u32 bl = (upper >> 2) & 0x7ff;
+	u32 k = upper & 3;
+	phys_addr_t brpn = PHYS_BAT_ADDR(lower);
+	u32 size = (bl + 1) << 17;
+
+	seq_printf(m, "%d: ", idx);
+	if (k == 0) {
+		seq_puts(m, "        -\n");
+		return;
+	}
+
+	seq_printf(m, "0x%08x-0x%08x ", bepi, bepi + size - 1);
+#ifdef CONFIG_PHYS_64BIT
+	seq_printf(m, "0x%016llx ", brpn);
+#else
+	seq_printf(m, "0x%08x ", brpn);
+#endif
+	pt_dump_size(m, size);
+
+	if (k == 1)
+		seq_puts(m, "User ");
+	else if (k == 2)
+		seq_puts(m, "Kernel ");
+	else
+		seq_puts(m, "Kernel/User ");
+
+	if (lower & BPP_RX)
+		seq_puts(m, is_d ? "r   " : "  x ");
+	else if (lower & BPP_RW)
+		seq_puts(m, is_d ? "rw  " : "  x ");
+	else
+		seq_puts(m, is_d ? "    " : "    ");
+
+	seq_puts(m, lower & _PAGE_WRITETHRU ? "w " : "  ");
+	seq_puts(m, lower & _PAGE_NO_CACHE ? "i " : "  ");
+	seq_puts(m, lower & _PAGE_COHERENT ? "m " : "  ");
+	seq_puts(m, lower & _PAGE_GUARDED ? "g " : "  ");
+	seq_puts(m, "\n");
+}
+
+#define BAT_SHOW_603(_m, _n, _l, _u, _d) bat_show_603(_m, _n, mfspr(_l), mfspr(_u), _d)
+
+static int bats_show(struct seq_file *m, void *v)
+{
+	seq_puts(m, "---[ Instruction Block Address Translation ]---\n");
+
+	BAT_SHOW_603(m, 0, SPRN_IBAT0L, SPRN_IBAT0U, false);
+	BAT_SHOW_603(m, 1, SPRN_IBAT1L, SPRN_IBAT1U, false);
+	BAT_SHOW_603(m, 2, SPRN_IBAT2L, SPRN_IBAT2U, false);
+	BAT_SHOW_603(m, 3, SPRN_IBAT3L, SPRN_IBAT3U, false);
+	if (mmu_has_feature(MMU_FTR_USE_HIGH_BATS)) {
+		BAT_SHOW_603(m, 4, SPRN_IBAT4L, SPRN_IBAT4U, false);
+		BAT_SHOW_603(m, 5, SPRN_IBAT5L, SPRN_IBAT5U, false);
+		BAT_SHOW_603(m, 6, SPRN_IBAT6L, SPRN_IBAT6U, false);
+		BAT_SHOW_603(m, 7, SPRN_IBAT7L, SPRN_IBAT7U, false);
+	}
+
+	seq_puts(m, "\n---[ Data Block Address Translation ]---\n");
+
+	BAT_SHOW_603(m, 0, SPRN_DBAT0L, SPRN_DBAT0U, true);
+	BAT_SHOW_603(m, 1, SPRN_DBAT1L, SPRN_DBAT1U, true);
+	BAT_SHOW_603(m, 2, SPRN_DBAT2L, SPRN_DBAT2U, true);
+	BAT_SHOW_603(m, 3, SPRN_DBAT3L, SPRN_DBAT3U, true);
+	if (mmu_has_feature(MMU_FTR_USE_HIGH_BATS)) {
+		BAT_SHOW_603(m, 4, SPRN_DBAT4L, SPRN_DBAT4U, true);
+		BAT_SHOW_603(m, 5, SPRN_DBAT5L, SPRN_DBAT5U, true);
+		BAT_SHOW_603(m, 6, SPRN_DBAT6L, SPRN_DBAT6U, true);
+		BAT_SHOW_603(m, 7, SPRN_DBAT7L, SPRN_DBAT7U, true);
+	}
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(bats);
+
+static int __init bats_init(void)
+{
+	debugfs_create_file("block_address_translation", 0400,
+			    arch_debugfs_dir, NULL, &bats_fops);
+	return 0;
+}
+device_initcall(bats_init);
diff --git a/arch/powerpc/mm/ptdump/book3s64.c b/arch/powerpc/mm/ptdump/book3s64.c
new file mode 100644
index 0000000000..5ad92d9dc5
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/book3s64.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * From split of dump_linuxpagetables.c
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/pgtable.h>
+
+#include "ptdump.h"
+
+static const struct flag_info flag_array[] = {
+	{
+		.mask	= _PAGE_PRIVILEGED,
+		.val	= 0,
+		.set	= "user",
+		.clear	= "    ",
+	}, {
+		.mask	= _PAGE_READ,
+		.val	= _PAGE_READ,
+		.set	= "r",
+		.clear	= " ",
+	}, {
+		.mask	= _PAGE_WRITE,
+		.val	= _PAGE_WRITE,
+		.set	= "w",
+		.clear	= " ",
+	}, {
+		.mask	= _PAGE_EXEC,
+		.val	= _PAGE_EXEC,
+		.set	= " X ",
+		.clear	= "   ",
+	}, {
+		.mask	= _PAGE_PTE,
+		.val	= _PAGE_PTE,
+		.set	= "pte",
+		.clear	= "   ",
+	}, {
+		.mask	= _PAGE_PRESENT,
+		.val	= _PAGE_PRESENT,
+		.set	= "valid",
+		.clear	= "     ",
+	}, {
+		.mask	= _PAGE_PRESENT | _PAGE_INVALID,
+		.val	= 0,
+		.set	= "       ",
+		.clear	= "present",
+	}, {
+		.mask	= H_PAGE_HASHPTE,
+		.val	= H_PAGE_HASHPTE,
+		.set	= "hpte",
+		.clear	= "    ",
+	}, {
+		.mask	= _PAGE_DIRTY,
+		.val	= _PAGE_DIRTY,
+		.set	= "dirty",
+		.clear	= "     ",
+	}, {
+		.mask	= _PAGE_ACCESSED,
+		.val	= _PAGE_ACCESSED,
+		.set	= "accessed",
+		.clear	= "        ",
+	}, {
+		.mask	= _PAGE_NON_IDEMPOTENT,
+		.val	= _PAGE_NON_IDEMPOTENT,
+		.set	= "non-idempotent",
+		.clear	= "              ",
+	}, {
+		.mask	= _PAGE_TOLERANT,
+		.val	= _PAGE_TOLERANT,
+		.set	= "tolerant",
+		.clear	= "        ",
+	}, {
+		.mask	= H_PAGE_BUSY,
+		.val	= H_PAGE_BUSY,
+		.set	= "busy",
+	}, {
+#ifdef CONFIG_PPC_64K_PAGES
+		.mask	= H_PAGE_COMBO,
+		.val	= H_PAGE_COMBO,
+		.set	= "combo",
+	}, {
+		.mask	= H_PAGE_4K_PFN,
+		.val	= H_PAGE_4K_PFN,
+		.set	= "4K_pfn",
+	}, {
+#else /* CONFIG_PPC_64K_PAGES */
+		.mask	= H_PAGE_F_GIX,
+		.val	= H_PAGE_F_GIX,
+		.set	= "f_gix",
+		.is_val	= true,
+		.shift	= H_PAGE_F_GIX_SHIFT,
+	}, {
+		.mask	= H_PAGE_F_SECOND,
+		.val	= H_PAGE_F_SECOND,
+		.set	= "f_second",
+	}, {
+#endif /* CONFIG_PPC_64K_PAGES */
+		.mask	= _PAGE_SPECIAL,
+		.val	= _PAGE_SPECIAL,
+		.set	= "special",
+	}
+};
+
+struct pgtable_level pg_level[5] = {
+	{ /* pgd */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* p4d */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* pud */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* pmd */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* pte */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	},
+};
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
new file mode 100644
index 0000000000..9a60158783
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -0,0 +1,543 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ * This traverses the kernel virtual memory and dumps the pages that are in
+ * the hash pagetable, along with their flags to
+ * /sys/kernel/debug/kernel_hash_pagetable.
+ *
+ * If radix is enabled then there is no hash page table and so no debugfs file
+ * is generated.
+ */
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/const.h>
+#include <asm/page.h>
+#include <asm/plpar_wrappers.h>
+#include <linux/memblock.h>
+#include <asm/firmware.h>
+#include <asm/pgalloc.h>
+
+struct pg_state {
+	struct seq_file *seq;
+	const struct addr_marker *marker;
+	unsigned long start_address;
+	unsigned int level;
+	u64 current_flags;
+};
+
+struct addr_marker {
+	unsigned long start_address;
+	const char *name;
+};
+
+static struct addr_marker address_markers[] = {
+	{ 0,	"Start of kernel VM" },
+	{ 0,	"vmalloc() Area" },
+	{ 0,	"vmalloc() End" },
+	{ 0,	"isa I/O start" },
+	{ 0,	"isa I/O end" },
+	{ 0,	"phb I/O start" },
+	{ 0,	"phb I/O end" },
+	{ 0,	"I/O remap start" },
+	{ 0,	"I/O remap end" },
+	{ 0,	"vmemmap start" },
+	{ -1,	NULL },
+};
+
+struct flag_info {
+	u64		mask;
+	u64		val;
+	const char	*set;
+	const char	*clear;
+	bool		is_val;
+	int		shift;
+};
+
+static const struct flag_info v_flag_array[] = {
+	{
+		.mask   = SLB_VSID_B,
+		.val    = SLB_VSID_B_256M,
+		.set    = "ssize: 256M",
+		.clear  = "ssize: 1T  ",
+	}, {
+		.mask	= HPTE_V_SECONDARY,
+		.val	= HPTE_V_SECONDARY,
+		.set	= "secondary",
+		.clear	= "primary  ",
+	}, {
+		.mask	= HPTE_V_VALID,
+		.val	= HPTE_V_VALID,
+		.set	= "valid  ",
+		.clear	= "invalid",
+	}, {
+		.mask	= HPTE_V_BOLTED,
+		.val	= HPTE_V_BOLTED,
+		.set	= "bolted",
+		.clear	= "",
+	}
+};
+
+static const struct flag_info r_flag_array[] = {
+	{
+		.mask	= HPTE_R_PP0 | HPTE_R_PP,
+		.val	= PP_RWXX,
+		.set	= "prot:RW--",
+	}, {
+		.mask	= HPTE_R_PP0 | HPTE_R_PP,
+		.val	= PP_RWRX,
+		.set	= "prot:RWR-",
+	}, {
+		.mask	= HPTE_R_PP0 | HPTE_R_PP,
+		.val	= PP_RWRW,
+		.set	= "prot:RWRW",
+	}, {
+		.mask	= HPTE_R_PP0 | HPTE_R_PP,
+		.val	= PP_RXRX,
+		.set	= "prot:R-R-",
+	}, {
+		.mask	= HPTE_R_PP0 | HPTE_R_PP,
+		.val	= PP_RXXX,
+		.set	= "prot:R---",
+	}, {
+		.mask	= HPTE_R_KEY_HI | HPTE_R_KEY_LO,
+		.val	= HPTE_R_KEY_HI | HPTE_R_KEY_LO,
+		.set	= "key",
+		.clear	= "",
+		.is_val = true,
+	}, {
+		.mask	= HPTE_R_R,
+		.val	= HPTE_R_R,
+		.set	= "ref",
+		.clear	= "   ",
+	}, {
+		.mask	= HPTE_R_C,
+		.val	= HPTE_R_C,
+		.set	= "changed",
+		.clear	= "       ",
+	}, {
+		.mask	= HPTE_R_N,
+		.val	= HPTE_R_N,
+		.set	= "no execute",
+	}, {
+		.mask	= HPTE_R_WIMG,
+		.val	= HPTE_R_W,
+		.set	= "writethru",
+	}, {
+		.mask	= HPTE_R_WIMG,
+		.val	= HPTE_R_I,
+		.set	= "no cache",
+	}, {
+		.mask	= HPTE_R_WIMG,
+		.val	= HPTE_R_G,
+		.set	= "guarded",
+	}
+};
+
+static int calculate_pagesize(struct pg_state *st, int ps, char s[])
+{
+	static const char units[] = "BKMGTPE";
+	const char *unit = units;
+
+	while (ps > 9 && unit[1]) {
+		ps -= 10;
+		unit++;
+	}
+	seq_printf(st->seq, "  %s_ps: %i%c\t", s, 1<<ps, *unit);
+	return ps;
+}
+
+static void dump_flag_info(struct pg_state *st, const struct flag_info
+		*flag, u64 pte, int num)
+{
+	unsigned int i;
+
+	for (i = 0; i < num; i++, flag++) {
+		const char *s = NULL;
+		u64 val;
+
+		/* flag not defined so don't check it */
+		if (flag->mask == 0)
+			continue;
+		/* Some 'flags' are actually values */
+		if (flag->is_val) {
+			val = pte & flag->val;
+			if (flag->shift)
+				val = val >> flag->shift;
+			seq_printf(st->seq, "  %s:%llx", flag->set, val);
+		} else {
+			if ((pte & flag->mask) == flag->val)
+				s = flag->set;
+			else
+				s = flag->clear;
+			if (s)
+				seq_printf(st->seq, "  %s", s);
+		}
+	}
+}
+
+static void dump_hpte_info(struct pg_state *st, unsigned long ea, u64 v, u64 r,
+		unsigned long rpn, int bps, int aps, unsigned long lp)
+{
+	int aps_index;
+
+	while (ea >= st->marker[1].start_address) {
+		st->marker++;
+		seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+	}
+	seq_printf(st->seq, "0x%lx:\t", ea);
+	seq_printf(st->seq, "AVPN:%llx\t", HPTE_V_AVPN_VAL(v));
+	dump_flag_info(st, v_flag_array, v, ARRAY_SIZE(v_flag_array));
+	seq_printf(st->seq, "  rpn: %lx\t", rpn);
+	dump_flag_info(st, r_flag_array, r, ARRAY_SIZE(r_flag_array));
+
+	calculate_pagesize(st, bps, "base");
+	aps_index = calculate_pagesize(st, aps, "actual");
+	if (aps_index != 2)
+		seq_printf(st->seq, "LP enc: %lx", lp);
+	seq_putc(st->seq, '\n');
+}
+
+
+static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64
+		*r)
+{
+	struct hash_pte *hptep;
+	unsigned long hash, vsid, vpn, hpte_group, want_v, hpte_v;
+	int i, ssize = mmu_kernel_ssize;
+	unsigned long shift = mmu_psize_defs[psize].shift;
+
+	/* calculate hash */
+	vsid = get_kernel_vsid(ea, ssize);
+	vpn  = hpt_vpn(ea, vsid, ssize);
+	hash = hpt_hash(vpn, shift, ssize);
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+	/* to check in the secondary hash table, we invert the hash */
+	if (!primary)
+		hash = ~hash;
+	hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+	for (i = 0; i < HPTES_PER_GROUP; i++) {
+		hptep = htab_address + hpte_group;
+		hpte_v = be64_to_cpu(hptep->v);
+
+		if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
+			/* HPTE matches */
+			*v = be64_to_cpu(hptep->v);
+			*r = be64_to_cpu(hptep->r);
+			return 0;
+		}
+		++hpte_group;
+	}
+	return -1;
+}
+
+static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r)
+{
+	struct {
+		unsigned long v;
+		unsigned long r;
+	} ptes[4];
+	unsigned long vsid, vpn, hash, hpte_group, want_v;
+	int i, j, ssize = mmu_kernel_ssize;
+	long lpar_rc = 0;
+	unsigned long shift = mmu_psize_defs[psize].shift;
+
+	/* calculate hash */
+	vsid = get_kernel_vsid(ea, ssize);
+	vpn  = hpt_vpn(ea, vsid, ssize);
+	hash = hpt_hash(vpn, shift, ssize);
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+	/* to check in the secondary hash table, we invert the hash */
+	if (!primary)
+		hash = ~hash;
+	hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+	/* see if we can find an entry in the hpte with this hash */
+	for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) {
+		lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes);
+
+		if (lpar_rc)
+			continue;
+		for (j = 0; j < 4; j++) {
+			if (HPTE_V_COMPARE(ptes[j].v, want_v) &&
+					(ptes[j].v & HPTE_V_VALID)) {
+				/* HPTE matches */
+				*v = ptes[j].v;
+				*r = ptes[j].r;
+				return 0;
+			}
+		}
+	}
+	return -1;
+}
+
+static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps,
+		unsigned long *lp_bits)
+{
+	struct mmu_psize_def entry;
+	unsigned long arpn, mask, lp;
+	int penc = -2, idx = 0, shift;
+
+	/*.
+	 * The LP field has 8 bits. Depending on the actual page size, some of
+	 * these bits are concatenated with the APRN to get the RPN. The rest
+	 * of the bits in the LP field is the LP value and is an encoding for
+	 * the base page size and the actual page size.
+	 *
+	 *  -	find the mmu entry for our base page size
+	 *  -	go through all page encodings and use the associated mask to
+	 *	find an encoding that matches our encoding in the LP field.
+	 */
+	arpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT;
+	lp = arpn & 0xff;
+
+	entry = mmu_psize_defs[bps];
+	while (idx < MMU_PAGE_COUNT) {
+		penc = entry.penc[idx];
+		if ((penc != -1) && (mmu_psize_defs[idx].shift)) {
+			shift = mmu_psize_defs[idx].shift -  HPTE_R_RPN_SHIFT;
+			mask = (0x1 << (shift)) - 1;
+			if ((lp & mask) == penc) {
+				*aps = mmu_psize_to_shift(idx);
+				*lp_bits = lp & mask;
+				*rpn = arpn >> shift;
+				return;
+			}
+		}
+		idx++;
+	}
+}
+
+static int base_hpte_find(unsigned long ea, int psize, bool primary, u64 *v,
+			  u64 *r)
+{
+	if (IS_ENABLED(CONFIG_PPC_PSERIES) && firmware_has_feature(FW_FEATURE_LPAR))
+		return pseries_find(ea, psize, primary, v, r);
+
+	return native_find(ea, psize, primary, v, r);
+}
+
+static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize)
+{
+	unsigned long slot;
+	u64 v  = 0, r = 0;
+	unsigned long rpn, lp_bits;
+	int base_psize = 0, actual_psize = 0;
+
+	if (ea < PAGE_OFFSET)
+		return -1;
+
+	/* Look in primary table */
+	slot = base_hpte_find(ea, psize, true, &v, &r);
+
+	/* Look in secondary table */
+	if (slot == -1)
+		slot = base_hpte_find(ea, psize, false, &v, &r);
+
+	/* No entry found */
+	if (slot == -1)
+		return -1;
+
+	/*
+	 * We found an entry in the hash page table:
+	 *  - check that this has the same base page
+	 *  - find the actual page size
+	 *  - find the RPN
+	 */
+	base_psize = mmu_psize_to_shift(psize);
+
+	if ((v & HPTE_V_LARGE) == HPTE_V_LARGE) {
+		decode_r(psize, r, &rpn, &actual_psize, &lp_bits);
+	} else {
+		/* 4K actual page size */
+		actual_psize = 12;
+		rpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT;
+		/* In this case there are no LP bits */
+		lp_bits = -1;
+	}
+	/*
+	 * We didn't find a matching encoding, so the PTE we found isn't for
+	 * this address.
+	 */
+	if (actual_psize == -1)
+		return -1;
+
+	dump_hpte_info(st, ea, v, r, rpn, base_psize, actual_psize, lp_bits);
+	return 0;
+}
+
+static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
+{
+	pte_t *pte = pte_offset_kernel(pmd, 0);
+	unsigned long addr, pteval, psize;
+	int i, status;
+
+	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+		addr = start + i * PAGE_SIZE;
+		pteval = pte_val(*pte);
+
+		if (addr < VMALLOC_END)
+			psize = mmu_vmalloc_psize;
+		else
+			psize = mmu_io_psize;
+
+		/* check for secret 4K mappings */
+		if (IS_ENABLED(CONFIG_PPC_64K_PAGES) &&
+		    ((pteval & H_PAGE_COMBO) == H_PAGE_COMBO ||
+		     (pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN))
+			psize = mmu_io_psize;
+
+		/* check for hashpte */
+		status = hpte_find(st, addr, psize);
+
+		if (((pteval & H_PAGE_HASHPTE) != H_PAGE_HASHPTE)
+				&& (status != -1)) {
+		/* found a hpte that is not in the linux page tables */
+			seq_printf(st->seq, "page probably bolted before linux"
+				" pagetables were set: addr:%lx, pteval:%lx\n",
+				addr, pteval);
+		}
+	}
+}
+
+static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
+{
+	pmd_t *pmd = pmd_offset(pud, 0);
+	unsigned long addr;
+	unsigned int i;
+
+	for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+		addr = start + i * PMD_SIZE;
+		if (!pmd_none(*pmd))
+			/* pmd exists */
+			walk_pte(st, pmd, addr);
+	}
+}
+
+static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start)
+{
+	pud_t *pud = pud_offset(p4d, 0);
+	unsigned long addr;
+	unsigned int i;
+
+	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+		addr = start + i * PUD_SIZE;
+		if (!pud_none(*pud))
+			/* pud exists */
+			walk_pmd(st, pud, addr);
+	}
+}
+
+static void walk_p4d(struct pg_state *st, pgd_t *pgd, unsigned long start)
+{
+	p4d_t *p4d = p4d_offset(pgd, 0);
+	unsigned long addr;
+	unsigned int i;
+
+	for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
+		addr = start + i * P4D_SIZE;
+		if (!p4d_none(*p4d))
+			/* p4d exists */
+			walk_pud(st, p4d, addr);
+	}
+}
+
+static void walk_pagetables(struct pg_state *st)
+{
+	pgd_t *pgd = pgd_offset_k(0UL);
+	unsigned int i;
+	unsigned long addr;
+
+	/*
+	 * Traverse the linux pagetable structure and dump pages that are in
+	 * the hash pagetable.
+	 */
+	for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
+		addr = KERN_VIRT_START + i * PGDIR_SIZE;
+		if (!pgd_none(*pgd))
+			/* pgd exists */
+			walk_p4d(st, pgd, addr);
+	}
+}
+
+
+static void walk_linearmapping(struct pg_state *st)
+{
+	unsigned long addr;
+
+	/*
+	 * Traverse the linear mapping section of virtual memory and dump pages
+	 * that are in the hash pagetable.
+	 */
+	unsigned long psize = 1 << mmu_psize_defs[mmu_linear_psize].shift;
+
+	for (addr = PAGE_OFFSET; addr < PAGE_OFFSET +
+			memblock_end_of_DRAM(); addr += psize)
+		hpte_find(st, addr, mmu_linear_psize);
+}
+
+static void walk_vmemmap(struct pg_state *st)
+{
+	struct vmemmap_backing *ptr = vmemmap_list;
+
+	if (!IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP))
+		return;
+	/*
+	 * Traverse the vmemmaped memory and dump pages that are in the hash
+	 * pagetable.
+	 */
+	while (ptr->list) {
+		hpte_find(st, ptr->virt_addr, mmu_vmemmap_psize);
+		ptr = ptr->list;
+	}
+	seq_puts(st->seq, "---[ vmemmap end ]---\n");
+}
+
+static void populate_markers(void)
+{
+	address_markers[0].start_address = PAGE_OFFSET;
+	address_markers[1].start_address = VMALLOC_START;
+	address_markers[2].start_address = VMALLOC_END;
+	address_markers[3].start_address = ISA_IO_BASE;
+	address_markers[4].start_address = ISA_IO_END;
+	address_markers[5].start_address = PHB_IO_BASE;
+	address_markers[6].start_address = PHB_IO_END;
+	address_markers[7].start_address = IOREMAP_BASE;
+	address_markers[8].start_address = IOREMAP_END;
+	address_markers[9].start_address =  H_VMEMMAP_START;
+}
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+	struct pg_state st = {
+		.seq = m,
+		.start_address = PAGE_OFFSET,
+		.marker = address_markers,
+	};
+	/*
+	 * Traverse the 0xc, 0xd and 0xf areas of the kernel virtual memory and
+	 * dump pages that are in the hash pagetable.
+	 */
+	walk_linearmapping(&st);
+	walk_pagetables(&st);
+	walk_vmemmap(&st);
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(ptdump);
+
+static int ptdump_init(void)
+{
+	if (!radix_enabled()) {
+		populate_markers();
+		debugfs_create_file("kernel_hash_pagetable", 0400, NULL, NULL,
+				    &ptdump_fops);
+	}
+	return 0;
+}
+device_initcall(ptdump_init);
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
new file mode 100644
index 0000000000..2313053fe6
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ * This traverses the kernel pagetables and dumps the
+ * information about the used sections of memory to
+ * /sys/kernel/debug/kernel_pagetables.
+ *
+ * Derived from the arm64 implementation:
+ * Copyright (c) 2014, The Linux Foundation, Laura Abbott.
+ * (C) Copyright 2008 Intel Corporation, Arjan van de Ven.
+ */
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/hugetlb.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/ptdump.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <asm/fixmap.h>
+#include <linux/const.h>
+#include <linux/kasan.h>
+#include <asm/page.h>
+#include <asm/hugetlb.h>
+
+#include <mm/mmu_decl.h>
+
+#include "ptdump.h"
+
+/*
+ * To visualise what is happening,
+ *
+ *  - PTRS_PER_P** = how many entries there are in the corresponding P**
+ *  - P**_SHIFT = how many bits of the address we use to index into the
+ * corresponding P**
+ *  - P**_SIZE is how much memory we can access through the table - not the
+ * size of the table itself.
+ * P**={PGD, PUD, PMD, PTE}
+ *
+ *
+ * Each entry of the PGD points to a PUD. Each entry of a PUD points to a
+ * PMD. Each entry of a PMD points to a PTE. And every PTE entry points to
+ * a page.
+ *
+ * In the case where there are only 3 levels, the PUD is folded into the
+ * PGD: every PUD has only one entry which points to the PMD.
+ *
+ * The page dumper groups page table entries of the same type into a single
+ * description. It uses pg_state to track the range information while
+ * iterating over the PTE entries. When the continuity is broken it then
+ * dumps out a description of the range - ie PTEs that are virtually contiguous
+ * with the same PTE flags are chunked together. This is to make it clear how
+ * different areas of the kernel virtual memory are used.
+ *
+ */
+struct pg_state {
+	struct ptdump_state ptdump;
+	struct seq_file *seq;
+	const struct addr_marker *marker;
+	unsigned long start_address;
+	unsigned long start_pa;
+	int level;
+	u64 current_flags;
+	bool check_wx;
+	unsigned long wx_pages;
+};
+
+struct addr_marker {
+	unsigned long start_address;
+	const char *name;
+};
+
+static struct addr_marker address_markers[] = {
+	{ 0,	"Start of kernel VM" },
+#ifdef MODULES_VADDR
+	{ 0,	"modules start" },
+	{ 0,	"modules end" },
+#endif
+	{ 0,	"vmalloc() Area" },
+	{ 0,	"vmalloc() End" },
+#ifdef CONFIG_PPC64
+	{ 0,	"isa I/O start" },
+	{ 0,	"isa I/O end" },
+	{ 0,	"phb I/O start" },
+	{ 0,	"phb I/O end" },
+	{ 0,	"I/O remap start" },
+	{ 0,	"I/O remap end" },
+	{ 0,	"vmemmap start" },
+#else
+	{ 0,	"Early I/O remap start" },
+	{ 0,	"Early I/O remap end" },
+#ifdef CONFIG_HIGHMEM
+	{ 0,	"Highmem PTEs start" },
+	{ 0,	"Highmem PTEs end" },
+#endif
+	{ 0,	"Fixmap start" },
+	{ 0,	"Fixmap end" },
+#endif
+#ifdef CONFIG_KASAN
+	{ 0,	"kasan shadow mem start" },
+	{ 0,	"kasan shadow mem end" },
+#endif
+	{ -1,	NULL },
+};
+
+static struct ptdump_range ptdump_range[] __ro_after_init = {
+	{TASK_SIZE_MAX, ~0UL},
+	{0, 0}
+};
+
+#define pt_dump_seq_printf(m, fmt, args...)	\
+({						\
+	if (m)					\
+		seq_printf(m, fmt, ##args);	\
+})
+
+#define pt_dump_seq_putc(m, c)		\
+({					\
+	if (m)				\
+		seq_putc(m, c);		\
+})
+
+void pt_dump_size(struct seq_file *m, unsigned long size)
+{
+	static const char units[] = " KMGTPE";
+	const char *unit = units;
+
+	/* Work out what appropriate unit to use */
+	while (!(size & 1023) && unit[1]) {
+		size >>= 10;
+		unit++;
+	}
+	pt_dump_seq_printf(m, "%9lu%c ", size, *unit);
+}
+
+static void dump_flag_info(struct pg_state *st, const struct flag_info
+		*flag, u64 pte, int num)
+{
+	unsigned int i;
+
+	for (i = 0; i < num; i++, flag++) {
+		const char *s = NULL;
+		u64 val;
+
+		/* flag not defined so don't check it */
+		if (flag->mask == 0)
+			continue;
+		/* Some 'flags' are actually values */
+		if (flag->is_val) {
+			val = pte & flag->val;
+			if (flag->shift)
+				val = val >> flag->shift;
+			pt_dump_seq_printf(st->seq, "  %s:%llx", flag->set, val);
+		} else {
+			if ((pte & flag->mask) == flag->val)
+				s = flag->set;
+			else
+				s = flag->clear;
+			if (s)
+				pt_dump_seq_printf(st->seq, "  %s", s);
+		}
+		st->current_flags &= ~flag->mask;
+	}
+	if (st->current_flags != 0)
+		pt_dump_seq_printf(st->seq, "  unknown flags:%llx", st->current_flags);
+}
+
+static void dump_addr(struct pg_state *st, unsigned long addr)
+{
+#ifdef CONFIG_PPC64
+#define REG		"0x%016lx"
+#else
+#define REG		"0x%08lx"
+#endif
+
+	pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1);
+	pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
+	pt_dump_size(st->seq, addr - st->start_address);
+}
+
+static void note_prot_wx(struct pg_state *st, unsigned long addr)
+{
+	pte_t pte = __pte(st->current_flags);
+
+	if (!IS_ENABLED(CONFIG_DEBUG_WX) || !st->check_wx)
+		return;
+
+	if (!pte_write(pte) || !pte_exec(pte))
+		return;
+
+	WARN_ONCE(1, "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n",
+		  (void *)st->start_address, (void *)st->start_address);
+
+	st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
+}
+
+static void note_page_update_state(struct pg_state *st, unsigned long addr, int level, u64 val)
+{
+	u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
+	u64 pa = val & PTE_RPN_MASK;
+
+	st->level = level;
+	st->current_flags = flag;
+	st->start_address = addr;
+	st->start_pa = pa;
+
+	while (addr >= st->marker[1].start_address) {
+		st->marker++;
+		pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+	}
+}
+
+static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
+{
+	u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
+	struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
+
+	/* At first no level is set */
+	if (st->level == -1) {
+		pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+		note_page_update_state(st, addr, level, val);
+	/*
+	 * Dump the section of virtual memory when:
+	 *   - the PTE flags from one entry to the next differs.
+	 *   - we change levels in the tree.
+	 *   - the address is in a different section of memory and is thus
+	 *   used for a different purpose, regardless of the flags.
+	 */
+	} else if (flag != st->current_flags || level != st->level ||
+		   addr >= st->marker[1].start_address) {
+
+		/* Check the PTE flags */
+		if (st->current_flags) {
+			note_prot_wx(st, addr);
+			dump_addr(st, addr);
+
+			/* Dump all the flags */
+			if (pg_level[st->level].flag)
+				dump_flag_info(st, pg_level[st->level].flag,
+					  st->current_flags,
+					  pg_level[st->level].num);
+
+			pt_dump_seq_putc(st->seq, '\n');
+		}
+
+		/*
+		 * Address indicates we have passed the end of the
+		 * current section of virtual memory
+		 */
+		note_page_update_state(st, addr, level, val);
+	}
+}
+
+static void populate_markers(void)
+{
+	int i = 0;
+
+#ifdef CONFIG_PPC64
+	address_markers[i++].start_address = PAGE_OFFSET;
+#else
+	address_markers[i++].start_address = TASK_SIZE;
+#endif
+#ifdef MODULES_VADDR
+	address_markers[i++].start_address = MODULES_VADDR;
+	address_markers[i++].start_address = MODULES_END;
+#endif
+	address_markers[i++].start_address = VMALLOC_START;
+	address_markers[i++].start_address = VMALLOC_END;
+#ifdef CONFIG_PPC64
+	address_markers[i++].start_address = ISA_IO_BASE;
+	address_markers[i++].start_address = ISA_IO_END;
+	address_markers[i++].start_address = PHB_IO_BASE;
+	address_markers[i++].start_address = PHB_IO_END;
+	address_markers[i++].start_address = IOREMAP_BASE;
+	address_markers[i++].start_address = IOREMAP_END;
+	/* What is the ifdef about? */
+#ifdef CONFIG_PPC_BOOK3S_64
+	address_markers[i++].start_address =  H_VMEMMAP_START;
+#else
+	address_markers[i++].start_address =  VMEMMAP_BASE;
+#endif
+#else /* !CONFIG_PPC64 */
+	address_markers[i++].start_address = ioremap_bot;
+	address_markers[i++].start_address = IOREMAP_TOP;
+#ifdef CONFIG_HIGHMEM
+	address_markers[i++].start_address = PKMAP_BASE;
+	address_markers[i++].start_address = PKMAP_ADDR(LAST_PKMAP);
+#endif
+	address_markers[i++].start_address = FIXADDR_START;
+	address_markers[i++].start_address = FIXADDR_TOP;
+#endif /* CONFIG_PPC64 */
+#ifdef CONFIG_KASAN
+	address_markers[i++].start_address = KASAN_SHADOW_START;
+	address_markers[i++].start_address = KASAN_SHADOW_END;
+#endif
+}
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+	struct pg_state st = {
+		.seq = m,
+		.marker = address_markers,
+		.level = -1,
+		.ptdump = {
+			.note_page = note_page,
+			.range = ptdump_range,
+		}
+	};
+
+	/* Traverse kernel page tables */
+	ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(ptdump);
+
+static void __init build_pgtable_complete_mask(void)
+{
+	unsigned int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(pg_level); i++)
+		if (pg_level[i].flag)
+			for (j = 0; j < pg_level[i].num; j++)
+				pg_level[i].mask |= pg_level[i].flag[j].mask;
+}
+
+#ifdef CONFIG_DEBUG_WX
+void ptdump_check_wx(void)
+{
+	struct pg_state st = {
+		.seq = NULL,
+		.marker = (struct addr_marker[]) {
+			{ 0, NULL},
+			{ -1, NULL},
+		},
+		.level = -1,
+		.check_wx = true,
+		.ptdump = {
+			.note_page = note_page,
+			.range = ptdump_range,
+		}
+	};
+
+	ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
+
+	if (st.wx_pages)
+		pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n",
+			st.wx_pages);
+	else
+		pr_info("Checked W+X mappings: passed, no W+X pages found\n");
+}
+#endif
+
+static int __init ptdump_init(void)
+{
+#ifdef CONFIG_PPC64
+	if (!radix_enabled())
+		ptdump_range[0].start = KERN_VIRT_START;
+	else
+		ptdump_range[0].start = PAGE_OFFSET;
+
+	ptdump_range[0].end = PAGE_OFFSET + (PGDIR_SIZE * PTRS_PER_PGD);
+#endif
+
+	populate_markers();
+	build_pgtable_complete_mask();
+
+	if (IS_ENABLED(CONFIG_PTDUMP_DEBUGFS))
+		debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
+
+	return 0;
+}
+device_initcall(ptdump_init);
diff --git a/arch/powerpc/mm/ptdump/ptdump.h b/arch/powerpc/mm/ptdump/ptdump.h
new file mode 100644
index 0000000000..154efae96a
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/ptdump.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/types.h>
+#include <linux/seq_file.h>
+
+struct flag_info {
+	u64		mask;
+	u64		val;
+	const char	*set;
+	const char	*clear;
+	bool		is_val;
+	int		shift;
+};
+
+struct pgtable_level {
+	const struct flag_info *flag;
+	size_t num;
+	u64 mask;
+};
+
+extern struct pgtable_level pg_level[5];
+
+void pt_dump_size(struct seq_file *m, unsigned long delta);
diff --git a/arch/powerpc/mm/ptdump/segment_regs.c b/arch/powerpc/mm/ptdump/segment_regs.c
new file mode 100644
index 0000000000..9df3af8d48
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/segment_regs.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2018, Christophe Leroy CS S.I.
+ * <christophe.leroy@c-s.fr>
+ *
+ * This dumps the content of Segment Registers
+ */
+
+#include <linux/debugfs.h>
+
+static void seg_show(struct seq_file *m, int i)
+{
+	u32 val = mfsr(i << 28);
+
+	seq_printf(m, "0x%01x0000000-0x%01xfffffff ", i, i);
+	seq_printf(m, "Kern key %d ", (val >> 30) & 1);
+	seq_printf(m, "User key %d ", (val >> 29) & 1);
+	if (val & 0x80000000) {
+		seq_printf(m, "Device 0x%03x", (val >> 20) & 0x1ff);
+		seq_printf(m, "-0x%05x", val & 0xfffff);
+	} else {
+		if (val & 0x10000000)
+			seq_puts(m, "No Exec ");
+		seq_printf(m, "VSID 0x%06x", val & 0xffffff);
+	}
+	seq_puts(m, "\n");
+}
+
+static int sr_show(struct seq_file *m, void *v)
+{
+	int i;
+
+	seq_puts(m, "---[ User Segments ]---\n");
+	for (i = 0; i < TASK_SIZE >> 28; i++)
+		seg_show(m, i);
+
+	seq_puts(m, "\n---[ Kernel Segments ]---\n");
+	for (; i < 16; i++)
+		seg_show(m, i);
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(sr);
+
+static int __init sr_init(void)
+{
+	debugfs_create_file("segment_registers", 0400, arch_debugfs_dir,
+			    NULL, &sr_fops);
+	return 0;
+}
+device_initcall(sr_init);
diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c
new file mode 100644
index 0000000000..f884760ca5
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/shared.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * From split of dump_linuxpagetables.c
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/pgtable.h>
+
+#include "ptdump.h"
+
+static const struct flag_info flag_array[] = {
+	{
+		.mask	= _PAGE_USER,
+		.val	= _PAGE_USER,
+		.set	= "user",
+		.clear	= "    ",
+	}, {
+		.mask	= _PAGE_RW,
+		.val	= 0,
+		.set	= "r ",
+		.clear	= "rw",
+	}, {
+		.mask	= _PAGE_EXEC,
+		.val	= _PAGE_EXEC,
+		.set	= " X ",
+		.clear	= "   ",
+	}, {
+		.mask	= _PAGE_PRESENT,
+		.val	= _PAGE_PRESENT,
+		.set	= "present",
+		.clear	= "       ",
+	}, {
+		.mask	= _PAGE_COHERENT,
+		.val	= _PAGE_COHERENT,
+		.set	= "coherent",
+		.clear	= "        ",
+	}, {
+		.mask	= _PAGE_GUARDED,
+		.val	= _PAGE_GUARDED,
+		.set	= "guarded",
+		.clear	= "       ",
+	}, {
+		.mask	= _PAGE_DIRTY,
+		.val	= _PAGE_DIRTY,
+		.set	= "dirty",
+		.clear	= "     ",
+	}, {
+		.mask	= _PAGE_ACCESSED,
+		.val	= _PAGE_ACCESSED,
+		.set	= "accessed",
+		.clear	= "        ",
+	}, {
+		.mask	= _PAGE_WRITETHRU,
+		.val	= _PAGE_WRITETHRU,
+		.set	= "write through",
+		.clear	= "             ",
+	}, {
+		.mask	= _PAGE_NO_CACHE,
+		.val	= _PAGE_NO_CACHE,
+		.set	= "no cache",
+		.clear	= "        ",
+	}, {
+		.mask	= _PAGE_SPECIAL,
+		.val	= _PAGE_SPECIAL,
+		.set	= "special",
+	}
+};
+
+struct pgtable_level pg_level[5] = {
+	{ /* pgd */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* p4d */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* pud */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* pmd */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	}, { /* pte */
+		.flag	= flag_array,
+		.num	= ARRAY_SIZE(flag_array),
+	},
+};
diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile
new file mode 100644
index 0000000000..8e60af32e5
--- /dev/null
+++ b/arch/powerpc/net/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Arch-specific network modules
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp$(BITS).o
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
new file mode 100644
index 0000000000..72b7bb34fa
--- /dev/null
+++ b/arch/powerpc/net/bpf_jit.h
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * bpf_jit.h: BPF JIT compiler for PPC
+ *
+ * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
+ * 	     2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+ */
+#ifndef _BPF_JIT_H
+#define _BPF_JIT_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/types.h>
+#include <asm/ppc-opcode.h>
+
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+#define FUNCTION_DESCR_SIZE	24
+#else
+#define FUNCTION_DESCR_SIZE	0
+#endif
+
+#define CTX_NIA(ctx) ((unsigned long)ctx->idx * 4)
+
+#define PLANT_INSTR(d, idx, instr)					      \
+	do { if (d) { (d)[idx] = instr; } idx++; } while (0)
+#define EMIT(instr)		PLANT_INSTR(image, ctx->idx, instr)
+
+/* Long jump; (unconditional 'branch') */
+#define PPC_JMP(dest)							      \
+	do {								      \
+		long offset = (long)(dest) - CTX_NIA(ctx);		      \
+		if ((dest) != 0 && !is_offset_in_branch_range(offset)) {		      \
+			pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx);			\
+			return -ERANGE;					      \
+		}							      \
+		EMIT(PPC_RAW_BRANCH(offset));				      \
+	} while (0)
+
+/* bl (unconditional 'branch' with link) */
+#define PPC_BL(dest)	EMIT(PPC_RAW_BL((dest) - (unsigned long)(image + ctx->idx)))
+
+/* "cond" here covers BO:BI fields. */
+#define PPC_BCC_SHORT(cond, dest)					      \
+	do {								      \
+		long offset = (long)(dest) - CTX_NIA(ctx);		      \
+		if ((dest) != 0 && !is_offset_in_cond_branch_range(offset)) {		      \
+			pr_err_ratelimited("Conditional branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx);		\
+			return -ERANGE;					      \
+		}							      \
+		EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc));					\
+	} while (0)
+
+/* Sign-extended 32-bit immediate load */
+#define PPC_LI32(d, i)		do {					      \
+		if ((int)(uintptr_t)(i) >= -32768 &&			      \
+				(int)(uintptr_t)(i) < 32768)		      \
+			EMIT(PPC_RAW_LI(d, i));				      \
+		else {							      \
+			EMIT(PPC_RAW_LIS(d, IMM_H(i)));			      \
+			if (IMM_L(i))					      \
+				EMIT(PPC_RAW_ORI(d, d, IMM_L(i)));	      \
+		} } while(0)
+
+#ifdef CONFIG_PPC64
+#define PPC_LI64(d, i)		do {					      \
+		if ((long)(i) >= -2147483648 &&				      \
+				(long)(i) < 2147483648)			      \
+			PPC_LI32(d, i);					      \
+		else {							      \
+			if (!((uintptr_t)(i) & 0xffff800000000000ULL))	      \
+				EMIT(PPC_RAW_LI(d, ((uintptr_t)(i) >> 32) &   \
+						0xffff));		      \
+			else {						      \
+				EMIT(PPC_RAW_LIS(d, ((uintptr_t)(i) >> 48))); \
+				if ((uintptr_t)(i) & 0x0000ffff00000000ULL)   \
+					EMIT(PPC_RAW_ORI(d, d,		      \
+					  ((uintptr_t)(i) >> 32) & 0xffff));  \
+			}						      \
+			EMIT(PPC_RAW_SLDI(d, d, 32));			      \
+			if ((uintptr_t)(i) & 0x00000000ffff0000ULL)	      \
+				EMIT(PPC_RAW_ORIS(d, d,			      \
+					 ((uintptr_t)(i) >> 16) & 0xffff));   \
+			if ((uintptr_t)(i) & 0x000000000000ffffULL)	      \
+				EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) &       \
+							0xffff));             \
+		} } while (0)
+#endif
+
+/*
+ * The fly in the ointment of code size changing from pass to pass is
+ * avoided by padding the short branch case with a NOP.	 If code size differs
+ * with different branch reaches we will have the issue of code moving from
+ * one pass to the next and will need a few passes to converge on a stable
+ * state.
+ */
+#define PPC_BCC(cond, dest)	do {					      \
+		if (is_offset_in_cond_branch_range((long)(dest) - CTX_NIA(ctx))) {	\
+			PPC_BCC_SHORT(cond, dest);			      \
+			EMIT(PPC_RAW_NOP());				      \
+		} else {						      \
+			/* Flip the 'T or F' bit to invert comparison */      \
+			PPC_BCC_SHORT(cond ^ COND_CMP_TRUE, CTX_NIA(ctx) + 2*4);  \
+			PPC_JMP(dest);					      \
+		} } while(0)
+
+/* To create a branch condition, select a bit of cr0... */
+#define CR0_LT		0
+#define CR0_GT		1
+#define CR0_EQ		2
+/* ...and modify BO[3] */
+#define COND_CMP_TRUE	0x100
+#define COND_CMP_FALSE	0x000
+/* Together, they make all required comparisons: */
+#define COND_GT		(CR0_GT | COND_CMP_TRUE)
+#define COND_GE		(CR0_LT | COND_CMP_FALSE)
+#define COND_EQ		(CR0_EQ | COND_CMP_TRUE)
+#define COND_NE		(CR0_EQ | COND_CMP_FALSE)
+#define COND_LT		(CR0_LT | COND_CMP_TRUE)
+#define COND_LE		(CR0_GT | COND_CMP_FALSE)
+
+#define SEEN_FUNC	0x20000000 /* might call external helpers */
+#define SEEN_TAILCALL	0x40000000 /* uses tail calls */
+
+struct codegen_context {
+	/*
+	 * This is used to track register usage as well
+	 * as calls to external helpers.
+	 * - register usage is tracked with corresponding
+	 *   bits (r3-r31)
+	 * - rest of the bits can be used to track other
+	 *   things -- for now, we use bits 0 to 2
+	 *   encoded in SEEN_* macros above
+	 */
+	unsigned int seen;
+	unsigned int idx;
+	unsigned int stack_size;
+	int b2p[MAX_BPF_JIT_REG + 2];
+	unsigned int exentry_idx;
+	unsigned int alt_exit_addr;
+};
+
+#define bpf_to_ppc(r)	(ctx->b2p[r])
+
+#ifdef CONFIG_PPC32
+#define BPF_FIXUP_LEN	3 /* Three instructions => 12 bytes */
+#else
+#define BPF_FIXUP_LEN	2 /* Two instructions => 8 bytes */
+#endif
+
+static inline void bpf_flush_icache(void *start, void *end)
+{
+	smp_wmb();	/* smp write barrier */
+	flush_icache_range((unsigned long)start, (unsigned long)end);
+}
+
+static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
+{
+	return ctx->seen & (1 << (31 - i));
+}
+
+static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
+{
+	ctx->seen |= 1 << (31 - i);
+}
+
+static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
+{
+	ctx->seen &= ~(1 << (31 - i));
+}
+
+void bpf_jit_init_reg_mapping(struct codegen_context *ctx);
+int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+		       u32 *addrs, int pass, bool extra_pass);
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
+void bpf_jit_realloc_regs(struct codegen_context *ctx);
+int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr);
+
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
+			  int insn_idx, int jmp_off, int dst_reg);
+
+#endif
+
+#endif
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
new file mode 100644
index 0000000000..37043dfc1a
--- /dev/null
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * eBPF JIT compiler
+ *
+ * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+ *		  IBM Corporation
+ *
+ * Based on the powerpc classic BPF JIT compiler by Matt Evans
+ */
+#include <linux/moduleloader.h>
+#include <asm/cacheflush.h>
+#include <asm/asm-compat.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/if_vlan.h>
+#include <asm/kprobes.h>
+#include <linux/bpf.h>
+
+#include "bpf_jit.h"
+
+static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
+{
+	memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
+}
+
+int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr)
+{
+	if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) {
+		PPC_JMP(exit_addr);
+	} else if (ctx->alt_exit_addr) {
+		if (WARN_ON(!is_offset_in_branch_range((long)ctx->alt_exit_addr - (ctx->idx * 4))))
+			return -1;
+		PPC_JMP(ctx->alt_exit_addr);
+	} else {
+		ctx->alt_exit_addr = ctx->idx * 4;
+		bpf_jit_build_epilogue(image, ctx);
+	}
+
+	return 0;
+}
+
+struct powerpc64_jit_data {
+	struct bpf_binary_header *header;
+	u32 *addrs;
+	u8 *image;
+	u32 proglen;
+	struct codegen_context ctx;
+};
+
+bool bpf_jit_needs_zext(void)
+{
+	return true;
+}
+
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
+{
+	u32 proglen;
+	u32 alloclen;
+	u8 *image = NULL;
+	u32 *code_base;
+	u32 *addrs;
+	struct powerpc64_jit_data *jit_data;
+	struct codegen_context cgctx;
+	int pass;
+	int flen;
+	struct bpf_binary_header *bpf_hdr;
+	struct bpf_prog *org_fp = fp;
+	struct bpf_prog *tmp_fp;
+	bool bpf_blinded = false;
+	bool extra_pass = false;
+	u32 extable_len;
+	u32 fixup_len;
+
+	if (!fp->jit_requested)
+		return org_fp;
+
+	tmp_fp = bpf_jit_blind_constants(org_fp);
+	if (IS_ERR(tmp_fp))
+		return org_fp;
+
+	if (tmp_fp != org_fp) {
+		bpf_blinded = true;
+		fp = tmp_fp;
+	}
+
+	jit_data = fp->aux->jit_data;
+	if (!jit_data) {
+		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+		if (!jit_data) {
+			fp = org_fp;
+			goto out;
+		}
+		fp->aux->jit_data = jit_data;
+	}
+
+	flen = fp->len;
+	addrs = jit_data->addrs;
+	if (addrs) {
+		cgctx = jit_data->ctx;
+		image = jit_data->image;
+		bpf_hdr = jit_data->header;
+		proglen = jit_data->proglen;
+		extra_pass = true;
+		/* During extra pass, ensure index is reset before repopulating extable entries */
+		cgctx.exentry_idx = 0;
+		goto skip_init_ctx;
+	}
+
+	addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
+	if (addrs == NULL) {
+		fp = org_fp;
+		goto out_addrs;
+	}
+
+	memset(&cgctx, 0, sizeof(struct codegen_context));
+	bpf_jit_init_reg_mapping(&cgctx);
+
+	/* Make sure that the stack is quadword aligned. */
+	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
+
+	/* Scouting faux-generate pass 0 */
+	if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
+		/* We hit something illegal or unsupported. */
+		fp = org_fp;
+		goto out_addrs;
+	}
+
+	/*
+	 * If we have seen a tail call, we need a second pass.
+	 * This is because bpf_jit_emit_common_epilogue() is called
+	 * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
+	 * We also need a second pass if we ended up with too large
+	 * a program so as to ensure BPF_EXIT branches are in range.
+	 */
+	if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) {
+		cgctx.idx = 0;
+		if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
+			fp = org_fp;
+			goto out_addrs;
+		}
+	}
+
+	bpf_jit_realloc_regs(&cgctx);
+	/*
+	 * Pretend to build prologue, given the features we've seen.  This will
+	 * update ctgtx.idx as it pretends to output instructions, then we can
+	 * calculate total size from idx.
+	 */
+	bpf_jit_build_prologue(0, &cgctx);
+	addrs[fp->len] = cgctx.idx * 4;
+	bpf_jit_build_epilogue(0, &cgctx);
+
+	fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4;
+	extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry);
+
+	proglen = cgctx.idx * 4;
+	alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
+
+	bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns);
+	if (!bpf_hdr) {
+		fp = org_fp;
+		goto out_addrs;
+	}
+
+	if (extable_len)
+		fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len;
+
+skip_init_ctx:
+	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
+
+	/* Code generation passes 1-2 */
+	for (pass = 1; pass < 3; pass++) {
+		/* Now build the prologue, body code & epilogue for real. */
+		cgctx.idx = 0;
+		cgctx.alt_exit_addr = 0;
+		bpf_jit_build_prologue(code_base, &cgctx);
+		if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass, extra_pass)) {
+			bpf_jit_binary_free(bpf_hdr);
+			fp = org_fp;
+			goto out_addrs;
+		}
+		bpf_jit_build_epilogue(code_base, &cgctx);
+
+		if (bpf_jit_enable > 1)
+			pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
+				proglen - (cgctx.idx * 4), cgctx.seen);
+	}
+
+	if (bpf_jit_enable > 1)
+		/*
+		 * Note that we output the base address of the code_base
+		 * rather than image, since opcodes are in code_base.
+		 */
+		bpf_jit_dump(flen, proglen, pass, code_base);
+
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+	/* Function descriptor nastiness: Address + TOC */
+	((u64 *)image)[0] = (u64)code_base;
+	((u64 *)image)[1] = local_paca->kernel_toc;
+#endif
+
+	fp->bpf_func = (void *)image;
+	fp->jited = 1;
+	fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
+
+	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + bpf_hdr->size);
+	if (!fp->is_func || extra_pass) {
+		bpf_jit_binary_lock_ro(bpf_hdr);
+		bpf_prog_fill_jited_linfo(fp, addrs);
+out_addrs:
+		kfree(addrs);
+		kfree(jit_data);
+		fp->aux->jit_data = NULL;
+	} else {
+		jit_data->addrs = addrs;
+		jit_data->ctx = cgctx;
+		jit_data->proglen = proglen;
+		jit_data->image = image;
+		jit_data->header = bpf_hdr;
+	}
+
+out:
+	if (bpf_blinded)
+		bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
+
+	return fp;
+}
+
+/*
+ * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
+ * this function, as this only applies to BPF_PROBE_MEM, for now.
+ */
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
+			  int insn_idx, int jmp_off, int dst_reg)
+{
+	off_t offset;
+	unsigned long pc;
+	struct exception_table_entry *ex;
+	u32 *fixup;
+
+	/* Populate extable entries only in the last pass */
+	if (pass != 2)
+		return 0;
+
+	if (!fp->aux->extable ||
+	    WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
+		return -EINVAL;
+
+	pc = (unsigned long)&image[insn_idx];
+
+	fixup = (void *)fp->aux->extable -
+		(fp->aux->num_exentries * BPF_FIXUP_LEN * 4) +
+		(ctx->exentry_idx * BPF_FIXUP_LEN * 4);
+
+	fixup[0] = PPC_RAW_LI(dst_reg, 0);
+	if (IS_ENABLED(CONFIG_PPC32))
+		fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register too */
+
+	fixup[BPF_FIXUP_LEN - 1] =
+		PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]);
+
+	ex = &fp->aux->extable[ctx->exentry_idx];
+
+	offset = pc - (long)&ex->insn;
+	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+		return -ERANGE;
+	ex->insn = offset;
+
+	offset = (long)fixup - (long)&ex->fixup;
+	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+		return -ERANGE;
+	ex->fixup = offset;
+
+	ctx->exentry_idx++;
+	return 0;
+}
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
new file mode 100644
index 0000000000..7f91ea064c
--- /dev/null
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -0,0 +1,1301 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * eBPF JIT compiler for PPC32
+ *
+ * Copyright 2020 Christophe Leroy <christophe.leroy@csgroup.eu>
+ *		  CS GROUP France
+ *
+ * Based on PPC64 eBPF JIT compiler by Naveen N. Rao
+ */
+#include <linux/moduleloader.h>
+#include <asm/cacheflush.h>
+#include <asm/asm-compat.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/if_vlan.h>
+#include <asm/kprobes.h>
+#include <linux/bpf.h>
+
+#include "bpf_jit.h"
+
+/*
+ * Stack layout:
+ *
+ *		[	prev sp		] <-------------
+ *		[   nv gpr save area	] 16 * 4	|
+ * fp (r31) -->	[   ebpf stack space	] upto 512	|
+ *		[     frame header	] 16		|
+ * sp (r1) --->	[    stack pointer	] --------------
+ */
+
+/* for gpr non volatile registers r17 to r31 (14) + tail call */
+#define BPF_PPC_STACK_SAVE	(15 * 4 + 4)
+/* stack frame, ensure this is quadword aligned */
+#define BPF_PPC_STACKFRAME(ctx)	(STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_SAVE + (ctx)->stack_size)
+
+#define PPC_EX32(r, i)		EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0))
+
+/* PPC NVR range -- update this if we ever use NVRs below r17 */
+#define BPF_PPC_NVR_MIN		_R17
+#define BPF_PPC_TC		_R16
+
+/* BPF register usage */
+#define TMP_REG			(MAX_BPF_JIT_REG + 0)
+
+/* BPF to ppc register mappings */
+void bpf_jit_init_reg_mapping(struct codegen_context *ctx)
+{
+	/* function return value */
+	ctx->b2p[BPF_REG_0] = _R12;
+	/* function arguments */
+	ctx->b2p[BPF_REG_1] = _R4;
+	ctx->b2p[BPF_REG_2] = _R6;
+	ctx->b2p[BPF_REG_3] = _R8;
+	ctx->b2p[BPF_REG_4] = _R10;
+	ctx->b2p[BPF_REG_5] = _R22;
+	/* non volatile registers */
+	ctx->b2p[BPF_REG_6] = _R24;
+	ctx->b2p[BPF_REG_7] = _R26;
+	ctx->b2p[BPF_REG_8] = _R28;
+	ctx->b2p[BPF_REG_9] = _R30;
+	/* frame pointer aka BPF_REG_10 */
+	ctx->b2p[BPF_REG_FP] = _R18;
+	/* eBPF jit internal registers */
+	ctx->b2p[BPF_REG_AX] = _R20;
+	ctx->b2p[TMP_REG] = _R31;		/* 32 bits */
+}
+
+static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
+{
+	if ((reg >= BPF_PPC_NVR_MIN && reg < 32) || reg == BPF_PPC_TC)
+		return BPF_PPC_STACKFRAME(ctx) - 4 * (32 - reg);
+
+	WARN(true, "BPF JIT is asking about unknown registers, will crash the stack");
+	/* Use the hole we have left for alignment */
+	return BPF_PPC_STACKFRAME(ctx) - 4;
+}
+
+#define SEEN_VREG_MASK		0x1ff80000 /* Volatile registers r3-r12 */
+#define SEEN_NVREG_FULL_MASK	0x0003ffff /* Non volatile registers r14-r31 */
+#define SEEN_NVREG_TEMP_MASK	0x00001e01 /* BPF_REG_5, BPF_REG_AX, TMP_REG */
+
+static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
+{
+	/*
+	 * We only need a stack frame if:
+	 * - we call other functions (kernel helpers), or
+	 * - we use non volatile registers, or
+	 * - we use tail call counter
+	 * - the bpf program uses its stack area
+	 * The latter condition is deduced from the usage of BPF_REG_FP
+	 */
+	return ctx->seen & (SEEN_FUNC | SEEN_TAILCALL | SEEN_NVREG_FULL_MASK) ||
+	       bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP));
+}
+
+void bpf_jit_realloc_regs(struct codegen_context *ctx)
+{
+	unsigned int nvreg_mask;
+
+	if (ctx->seen & SEEN_FUNC)
+		nvreg_mask = SEEN_NVREG_TEMP_MASK;
+	else
+		nvreg_mask = SEEN_NVREG_FULL_MASK;
+
+	while (ctx->seen & nvreg_mask &&
+	      (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) {
+		int old = 32 - fls(ctx->seen & (nvreg_mask & 0xaaaaaaab));
+		int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa));
+		int i;
+
+		for (i = BPF_REG_0; i <= TMP_REG; i++) {
+			if (ctx->b2p[i] != old)
+				continue;
+			ctx->b2p[i] = new;
+			bpf_set_seen_register(ctx, new);
+			bpf_clear_seen_register(ctx, old);
+			if (i != TMP_REG) {
+				bpf_set_seen_register(ctx, new - 1);
+				bpf_clear_seen_register(ctx, old - 1);
+			}
+			break;
+		}
+	}
+}
+
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
+{
+	int i;
+
+	/* Initialize tail_call_cnt, to be skipped if we do tail calls. */
+	if (ctx->seen & SEEN_TAILCALL)
+		EMIT(PPC_RAW_LI(_R4, 0));
+	else
+		EMIT(PPC_RAW_NOP());
+
+#define BPF_TAILCALL_PROLOGUE_SIZE	4
+
+	if (bpf_has_stack_frame(ctx))
+		EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx)));
+
+	if (ctx->seen & SEEN_TAILCALL)
+		EMIT(PPC_RAW_STW(_R4, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+
+	/* First arg comes in as a 32 bits pointer. */
+	EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3));
+	EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0));
+
+	/*
+	 * We need a stack frame, but we don't necessarily need to
+	 * save/restore LR unless we call other functions
+	 */
+	if (ctx->seen & SEEN_FUNC)
+		EMIT(PPC_RAW_MFLR(_R0));
+
+	/*
+	 * Back up non-volatile regs -- registers r18-r31
+	 */
+	for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
+		if (bpf_is_seen_register(ctx, i))
+			EMIT(PPC_RAW_STW(i, _R1, bpf_jit_stack_offsetof(ctx, i)));
+
+	/* Setup frame pointer to point to the bpf stack area */
+	if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) {
+		EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_FP) - 1, 0));
+		EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1,
+				  STACK_FRAME_MIN_SIZE + ctx->stack_size));
+	}
+
+	if (ctx->seen & SEEN_FUNC)
+		EMIT(PPC_RAW_STW(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+}
+
+static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
+{
+	int i;
+
+	/* Restore NVRs */
+	for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
+		if (bpf_is_seen_register(ctx, i))
+			EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i)));
+
+	if (ctx->seen & SEEN_FUNC)
+		EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+
+	/* Tear down our stack frame */
+	if (bpf_has_stack_frame(ctx))
+		EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx)));
+
+	if (ctx->seen & SEEN_FUNC)
+		EMIT(PPC_RAW_MTLR(_R0));
+
+}
+
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+{
+	EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0)));
+
+	bpf_jit_emit_common_epilogue(image, ctx);
+
+	EMIT(PPC_RAW_BLR());
+}
+
+int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func)
+{
+	s32 rel = (s32)func - (s32)(image + ctx->idx);
+
+	if (image && rel < 0x2000000 && rel >= -0x2000000) {
+		PPC_BL(func);
+	} else {
+		/* Load function address into r0 */
+		EMIT(PPC_RAW_LIS(_R0, IMM_H(func)));
+		EMIT(PPC_RAW_ORI(_R0, _R0, IMM_L(func)));
+		EMIT(PPC_RAW_MTCTR(_R0));
+		EMIT(PPC_RAW_BCTRL());
+	}
+
+	return 0;
+}
+
+static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
+{
+	/*
+	 * By now, the eBPF program has already setup parameters in r3-r6
+	 * r3-r4/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program
+	 * r5-r6/BPF_REG_2 - pointer to bpf_array
+	 * r7-r8/BPF_REG_3 - index in bpf_array
+	 */
+	int b2p_bpf_array = bpf_to_ppc(BPF_REG_2);
+	int b2p_index = bpf_to_ppc(BPF_REG_3);
+
+	/*
+	 * if (index >= array->map.max_entries)
+	 *   goto out;
+	 */
+	EMIT(PPC_RAW_LWZ(_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)));
+	EMIT(PPC_RAW_CMPLW(b2p_index, _R0));
+	EMIT(PPC_RAW_LWZ(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+	PPC_BCC_SHORT(COND_GE, out);
+
+	/*
+	 * if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
+	 *   goto out;
+	 */
+	EMIT(PPC_RAW_CMPLWI(_R0, MAX_TAIL_CALL_CNT));
+	/* tail_call_cnt++; */
+	EMIT(PPC_RAW_ADDIC(_R0, _R0, 1));
+	PPC_BCC_SHORT(COND_GE, out);
+
+	/* prog = array->ptrs[index]; */
+	EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29));
+	EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array));
+	EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs)));
+
+	/*
+	 * if (prog == NULL)
+	 *   goto out;
+	 */
+	EMIT(PPC_RAW_CMPLWI(_R3, 0));
+	PPC_BCC_SHORT(COND_EQ, out);
+
+	/* goto *(prog->bpf_func + prologue_size); */
+	EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func)));
+	EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE));
+	EMIT(PPC_RAW_MTCTR(_R3));
+
+	EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1)));
+
+	/* Put tail_call_cnt in r4 */
+	EMIT(PPC_RAW_MR(_R4, _R0));
+
+	/* tear restore NVRs, ... */
+	bpf_jit_emit_common_epilogue(image, ctx);
+
+	EMIT(PPC_RAW_BCTR());
+
+	/* out: */
+	return 0;
+}
+
+/* Assemble the body code between the prologue & epilogue */
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+		       u32 *addrs, int pass, bool extra_pass)
+{
+	const struct bpf_insn *insn = fp->insnsi;
+	int flen = fp->len;
+	int i, ret;
+
+	/* Start of epilogue code - will only be valid 2nd pass onwards */
+	u32 exit_addr = addrs[flen];
+
+	for (i = 0; i < flen; i++) {
+		u32 code = insn[i].code;
+		u32 prevcode = i ? insn[i - 1].code : 0;
+		u32 dst_reg = bpf_to_ppc(insn[i].dst_reg);
+		u32 dst_reg_h = dst_reg - 1;
+		u32 src_reg = bpf_to_ppc(insn[i].src_reg);
+		u32 src_reg_h = src_reg - 1;
+		u32 src2_reg = dst_reg;
+		u32 src2_reg_h = dst_reg_h;
+		u32 ax_reg = bpf_to_ppc(BPF_REG_AX);
+		u32 tmp_reg = bpf_to_ppc(TMP_REG);
+		u32 size = BPF_SIZE(code);
+		u32 save_reg, ret_reg;
+		s16 off = insn[i].off;
+		s32 imm = insn[i].imm;
+		bool func_addr_fixed;
+		u64 func_addr;
+		u32 true_cond;
+		u32 tmp_idx;
+		int j;
+
+		if (i && (BPF_CLASS(code) == BPF_ALU64 || BPF_CLASS(code) == BPF_ALU) &&
+		    (BPF_CLASS(prevcode) == BPF_ALU64 || BPF_CLASS(prevcode) == BPF_ALU) &&
+		    BPF_OP(prevcode) == BPF_MOV && BPF_SRC(prevcode) == BPF_X &&
+		    insn[i - 1].dst_reg == insn[i].dst_reg && insn[i - 1].imm != 1) {
+			src2_reg = bpf_to_ppc(insn[i - 1].src_reg);
+			src2_reg_h = src2_reg - 1;
+			ctx->idx = addrs[i - 1] / 4;
+		}
+
+		/*
+		 * addrs[] maps a BPF bytecode address into a real offset from
+		 * the start of the body code.
+		 */
+		addrs[i] = ctx->idx * 4;
+
+		/*
+		 * As an optimization, we note down which registers
+		 * are used so that we can only save/restore those in our
+		 * prologue and epilogue. We do this here regardless of whether
+		 * the actual BPF instruction uses src/dst registers or not
+		 * (for instance, BPF_CALL does not use them). The expectation
+		 * is that those instructions will have src_reg/dst_reg set to
+		 * 0. Even otherwise, we just lose some prologue/epilogue
+		 * optimization but everything else should work without
+		 * any issues.
+		 */
+		if (dst_reg >= 3 && dst_reg < 32) {
+			bpf_set_seen_register(ctx, dst_reg);
+			bpf_set_seen_register(ctx, dst_reg_h);
+		}
+
+		if (src_reg >= 3 && src_reg < 32) {
+			bpf_set_seen_register(ctx, src_reg);
+			bpf_set_seen_register(ctx, src_reg_h);
+		}
+
+		switch (code) {
+		/*
+		 * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG
+		 */
+		case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */
+			EMIT(PPC_RAW_ADD(dst_reg, src2_reg, src_reg));
+			break;
+		case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */
+			EMIT(PPC_RAW_ADDC(dst_reg, src2_reg, src_reg));
+			EMIT(PPC_RAW_ADDE(dst_reg_h, src2_reg_h, src_reg_h));
+			break;
+		case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */
+			EMIT(PPC_RAW_SUB(dst_reg, src2_reg, src_reg));
+			break;
+		case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */
+			EMIT(PPC_RAW_SUBFC(dst_reg, src_reg, src2_reg));
+			EMIT(PPC_RAW_SUBFE(dst_reg_h, src_reg_h, src2_reg_h));
+			break;
+		case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
+			imm = -imm;
+			fallthrough;
+		case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */
+			if (!imm) {
+				EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+			} else if (IMM_HA(imm) & 0xffff) {
+				EMIT(PPC_RAW_ADDIS(dst_reg, src2_reg, IMM_HA(imm)));
+				src2_reg = dst_reg;
+			}
+			if (IMM_L(imm))
+				EMIT(PPC_RAW_ADDI(dst_reg, src2_reg, IMM_L(imm)));
+			break;
+		case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */
+			imm = -imm;
+			fallthrough;
+		case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */
+			if (!imm) {
+				EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+				EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h));
+				break;
+			}
+			if (imm >= -32768 && imm < 32768) {
+				EMIT(PPC_RAW_ADDIC(dst_reg, src2_reg, imm));
+			} else {
+				PPC_LI32(_R0, imm);
+				EMIT(PPC_RAW_ADDC(dst_reg, src2_reg, _R0));
+			}
+			if (imm >= 0 || (BPF_OP(code) == BPF_SUB && imm == 0x80000000))
+				EMIT(PPC_RAW_ADDZE(dst_reg_h, src2_reg_h));
+			else
+				EMIT(PPC_RAW_ADDME(dst_reg_h, src2_reg_h));
+			break;
+		case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */
+			bpf_set_seen_register(ctx, tmp_reg);
+			EMIT(PPC_RAW_MULW(_R0, src2_reg, src_reg_h));
+			EMIT(PPC_RAW_MULW(dst_reg_h, src2_reg_h, src_reg));
+			EMIT(PPC_RAW_MULHWU(tmp_reg, src2_reg, src_reg));
+			EMIT(PPC_RAW_MULW(dst_reg, src2_reg, src_reg));
+			EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0));
+			EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, tmp_reg));
+			break;
+		case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
+			EMIT(PPC_RAW_MULW(dst_reg, src2_reg, src_reg));
+			break;
+		case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */
+			if (imm == 1) {
+				EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+			} else if (imm == -1) {
+				EMIT(PPC_RAW_SUBFIC(dst_reg, src2_reg, 0));
+			} else if (is_power_of_2((u32)imm)) {
+				EMIT(PPC_RAW_SLWI(dst_reg, src2_reg, ilog2(imm)));
+			} else if (imm >= -32768 && imm < 32768) {
+				EMIT(PPC_RAW_MULI(dst_reg, src2_reg, imm));
+			} else {
+				PPC_LI32(_R0, imm);
+				EMIT(PPC_RAW_MULW(dst_reg, src2_reg, _R0));
+			}
+			break;
+		case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */
+			if (!imm) {
+				PPC_LI32(dst_reg, 0);
+				PPC_LI32(dst_reg_h, 0);
+			} else if (imm == 1) {
+				EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+				EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h));
+			} else if (imm == -1) {
+				EMIT(PPC_RAW_SUBFIC(dst_reg, src2_reg, 0));
+				EMIT(PPC_RAW_SUBFZE(dst_reg_h, src2_reg_h));
+			} else if (imm > 0 && is_power_of_2(imm)) {
+				imm = ilog2(imm);
+				EMIT(PPC_RAW_RLWINM(dst_reg_h, src2_reg_h, imm, 0, 31 - imm));
+				EMIT(PPC_RAW_RLWIMI(dst_reg_h, dst_reg, imm, 32 - imm, 31));
+				EMIT(PPC_RAW_SLWI(dst_reg, src2_reg, imm));
+			} else {
+				bpf_set_seen_register(ctx, tmp_reg);
+				PPC_LI32(tmp_reg, imm);
+				EMIT(PPC_RAW_MULW(dst_reg_h, src2_reg_h, tmp_reg));
+				if (imm < 0)
+					EMIT(PPC_RAW_SUB(dst_reg_h, dst_reg_h, src2_reg));
+				EMIT(PPC_RAW_MULHWU(_R0, src2_reg, tmp_reg));
+				EMIT(PPC_RAW_MULW(dst_reg, src2_reg, tmp_reg));
+				EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0));
+			}
+			break;
+		case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
+			EMIT(PPC_RAW_DIVWU(dst_reg, src2_reg, src_reg));
+			break;
+		case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
+			EMIT(PPC_RAW_DIVWU(_R0, src2_reg, src_reg));
+			EMIT(PPC_RAW_MULW(_R0, src_reg, _R0));
+			EMIT(PPC_RAW_SUB(dst_reg, src2_reg, _R0));
+			break;
+		case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
+			return -EOPNOTSUPP;
+		case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
+			return -EOPNOTSUPP;
+		case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */
+			if (!imm)
+				return -EINVAL;
+			if (imm == 1) {
+				EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+			} else if (is_power_of_2((u32)imm)) {
+				EMIT(PPC_RAW_SRWI(dst_reg, src2_reg, ilog2(imm)));
+			} else {
+				PPC_LI32(_R0, imm);
+				EMIT(PPC_RAW_DIVWU(dst_reg, src2_reg, _R0));
+			}
+			break;
+		case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
+			if (!imm)
+				return -EINVAL;
+
+			if (!is_power_of_2((u32)imm)) {
+				bpf_set_seen_register(ctx, tmp_reg);
+				PPC_LI32(tmp_reg, imm);
+				EMIT(PPC_RAW_DIVWU(_R0, src2_reg, tmp_reg));
+				EMIT(PPC_RAW_MULW(_R0, tmp_reg, _R0));
+				EMIT(PPC_RAW_SUB(dst_reg, src2_reg, _R0));
+			} else if (imm == 1) {
+				EMIT(PPC_RAW_LI(dst_reg, 0));
+			} else {
+				imm = ilog2((u32)imm);
+				EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0, 32 - imm, 31));
+			}
+			break;
+		case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */
+			if (!imm)
+				return -EINVAL;
+			if (imm < 0)
+				imm = -imm;
+			if (!is_power_of_2(imm))
+				return -EOPNOTSUPP;
+			if (imm == 1)
+				EMIT(PPC_RAW_LI(dst_reg, 0));
+			else
+				EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0, 32 - ilog2(imm), 31));
+			EMIT(PPC_RAW_LI(dst_reg_h, 0));
+			break;
+		case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */
+			if (!imm)
+				return -EINVAL;
+			if (!is_power_of_2(abs(imm)))
+				return -EOPNOTSUPP;
+
+			if (imm < 0) {
+				EMIT(PPC_RAW_SUBFIC(dst_reg, src2_reg, 0));
+				EMIT(PPC_RAW_SUBFZE(dst_reg_h, src2_reg_h));
+				imm = -imm;
+				src2_reg = dst_reg;
+			}
+			if (imm == 1) {
+				EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+				EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h));
+			} else {
+				imm = ilog2(imm);
+				EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 32 - imm, imm, 31));
+				EMIT(PPC_RAW_RLWIMI(dst_reg, src2_reg_h, 32 - imm, 0, imm - 1));
+				EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, imm));
+			}
+			break;
+		case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */
+			EMIT(PPC_RAW_NEG(dst_reg, src2_reg));
+			break;
+		case BPF_ALU64 | BPF_NEG: /* dst = -dst */
+			EMIT(PPC_RAW_SUBFIC(dst_reg, src2_reg, 0));
+			EMIT(PPC_RAW_SUBFZE(dst_reg_h, src2_reg_h));
+			break;
+
+		/*
+		 * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH
+		 */
+		case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
+			EMIT(PPC_RAW_AND(dst_reg, src2_reg, src_reg));
+			EMIT(PPC_RAW_AND(dst_reg_h, src2_reg_h, src_reg_h));
+			break;
+		case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */
+			EMIT(PPC_RAW_AND(dst_reg, src2_reg, src_reg));
+			break;
+		case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
+			if (imm >= 0)
+				EMIT(PPC_RAW_LI(dst_reg_h, 0));
+			fallthrough;
+		case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */
+			if (!IMM_H(imm)) {
+				EMIT(PPC_RAW_ANDI(dst_reg, src2_reg, IMM_L(imm)));
+			} else if (!IMM_L(imm)) {
+				EMIT(PPC_RAW_ANDIS(dst_reg, src2_reg, IMM_H(imm)));
+			} else if (imm == (((1 << fls(imm)) - 1) ^ ((1 << (ffs(i) - 1)) - 1))) {
+				EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0,
+						    32 - fls(imm), 32 - ffs(imm)));
+			} else {
+				PPC_LI32(_R0, imm);
+				EMIT(PPC_RAW_AND(dst_reg, src2_reg, _R0));
+			}
+			break;
+		case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
+			EMIT(PPC_RAW_OR(dst_reg, src2_reg, src_reg));
+			EMIT(PPC_RAW_OR(dst_reg_h, src2_reg_h, src_reg_h));
+			break;
+		case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
+			EMIT(PPC_RAW_OR(dst_reg, src2_reg, src_reg));
+			break;
+		case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
+			/* Sign-extended */
+			if (imm < 0)
+				EMIT(PPC_RAW_LI(dst_reg_h, -1));
+			fallthrough;
+		case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */
+			if (IMM_L(imm)) {
+				EMIT(PPC_RAW_ORI(dst_reg, src2_reg, IMM_L(imm)));
+				src2_reg = dst_reg;
+			}
+			if (IMM_H(imm))
+				EMIT(PPC_RAW_ORIS(dst_reg, src2_reg, IMM_H(imm)));
+			break;
+		case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */
+			if (dst_reg == src_reg) {
+				EMIT(PPC_RAW_LI(dst_reg, 0));
+				EMIT(PPC_RAW_LI(dst_reg_h, 0));
+			} else {
+				EMIT(PPC_RAW_XOR(dst_reg, src2_reg, src_reg));
+				EMIT(PPC_RAW_XOR(dst_reg_h, src2_reg_h, src_reg_h));
+			}
+			break;
+		case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */
+			if (dst_reg == src_reg)
+				EMIT(PPC_RAW_LI(dst_reg, 0));
+			else
+				EMIT(PPC_RAW_XOR(dst_reg, src2_reg, src_reg));
+			break;
+		case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
+			if (imm < 0)
+				EMIT(PPC_RAW_NOR(dst_reg_h, src2_reg_h, src2_reg_h));
+			fallthrough;
+		case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */
+			if (IMM_L(imm)) {
+				EMIT(PPC_RAW_XORI(dst_reg, src2_reg, IMM_L(imm)));
+				src2_reg = dst_reg;
+			}
+			if (IMM_H(imm))
+				EMIT(PPC_RAW_XORIS(dst_reg, src2_reg, IMM_H(imm)));
+			break;
+		case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
+			EMIT(PPC_RAW_SLW(dst_reg, src2_reg, src_reg));
+			break;
+		case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
+			bpf_set_seen_register(ctx, tmp_reg);
+			EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32));
+			EMIT(PPC_RAW_SLW(dst_reg_h, src2_reg_h, src_reg));
+			EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
+			EMIT(PPC_RAW_SRW(_R0, src2_reg, _R0));
+			EMIT(PPC_RAW_SLW(tmp_reg, src2_reg, tmp_reg));
+			EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, _R0));
+			EMIT(PPC_RAW_SLW(dst_reg, src2_reg, src_reg));
+			EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, tmp_reg));
+			break;
+		case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<= (u32) imm */
+			if (imm)
+				EMIT(PPC_RAW_SLWI(dst_reg, src2_reg, imm));
+			else
+				EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+			break;
+		case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<= imm */
+			if (imm < 0)
+				return -EINVAL;
+			if (!imm) {
+				EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+			} else if (imm < 32) {
+				EMIT(PPC_RAW_RLWINM(dst_reg_h, src2_reg_h, imm, 0, 31 - imm));
+				EMIT(PPC_RAW_RLWIMI(dst_reg_h, src2_reg, imm, 32 - imm, 31));
+				EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, imm, 0, 31 - imm));
+			} else if (imm < 64) {
+				EMIT(PPC_RAW_RLWINM(dst_reg_h, src2_reg, imm, 0, 31 - imm));
+				EMIT(PPC_RAW_LI(dst_reg, 0));
+			} else {
+				EMIT(PPC_RAW_LI(dst_reg_h, 0));
+				EMIT(PPC_RAW_LI(dst_reg, 0));
+			}
+			break;
+		case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
+			EMIT(PPC_RAW_SRW(dst_reg, src2_reg, src_reg));
+			break;
+		case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
+			bpf_set_seen_register(ctx, tmp_reg);
+			EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32));
+			EMIT(PPC_RAW_SRW(dst_reg, src2_reg, src_reg));
+			EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
+			EMIT(PPC_RAW_SLW(_R0, src2_reg_h, _R0));
+			EMIT(PPC_RAW_SRW(tmp_reg, dst_reg_h, tmp_reg));
+			EMIT(PPC_RAW_OR(dst_reg, dst_reg, _R0));
+			EMIT(PPC_RAW_SRW(dst_reg_h, src2_reg_h, src_reg));
+			EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg));
+			break;
+		case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
+			if (imm)
+				EMIT(PPC_RAW_SRWI(dst_reg, src2_reg, imm));
+			else
+				EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+			break;
+		case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
+			if (imm < 0)
+				return -EINVAL;
+			if (!imm) {
+				EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+				EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h));
+			} else if (imm < 32) {
+				EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 32 - imm, imm, 31));
+				EMIT(PPC_RAW_RLWIMI(dst_reg, src2_reg_h, 32 - imm, 0, imm - 1));
+				EMIT(PPC_RAW_RLWINM(dst_reg_h, src2_reg_h, 32 - imm, imm, 31));
+			} else if (imm < 64) {
+				EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg_h, 64 - imm, imm - 32, 31));
+				EMIT(PPC_RAW_LI(dst_reg_h, 0));
+			} else {
+				EMIT(PPC_RAW_LI(dst_reg, 0));
+				EMIT(PPC_RAW_LI(dst_reg_h, 0));
+			}
+			break;
+		case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */
+			EMIT(PPC_RAW_SRAW(dst_reg, src2_reg, src_reg));
+			break;
+		case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
+			bpf_set_seen_register(ctx, tmp_reg);
+			EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32));
+			EMIT(PPC_RAW_SRW(dst_reg, src2_reg, src_reg));
+			EMIT(PPC_RAW_SLW(_R0, src2_reg_h, _R0));
+			EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
+			EMIT(PPC_RAW_OR(dst_reg, dst_reg, _R0));
+			EMIT(PPC_RAW_RLWINM(_R0, tmp_reg, 0, 26, 26));
+			EMIT(PPC_RAW_SRAW(tmp_reg, src2_reg_h, tmp_reg));
+			EMIT(PPC_RAW_SRAW(dst_reg_h, src2_reg_h, src_reg));
+			EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, _R0));
+			EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg));
+			break;
+		case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */
+			if (imm)
+				EMIT(PPC_RAW_SRAWI(dst_reg, src2_reg, imm));
+			else
+				EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+			break;
+		case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */
+			if (imm < 0)
+				return -EINVAL;
+			if (!imm) {
+				EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+				EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h));
+			} else if (imm < 32) {
+				EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 32 - imm, imm, 31));
+				EMIT(PPC_RAW_RLWIMI(dst_reg, src2_reg_h, 32 - imm, 0, imm - 1));
+				EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, imm));
+			} else if (imm < 64) {
+				EMIT(PPC_RAW_SRAWI(dst_reg, src2_reg_h, imm - 32));
+				EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, 31));
+			} else {
+				EMIT(PPC_RAW_SRAWI(dst_reg, src2_reg_h, 31));
+				EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, 31));
+			}
+			break;
+
+		/*
+		 * MOV
+		 */
+		case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
+			if (dst_reg == src_reg)
+				break;
+			EMIT(PPC_RAW_MR(dst_reg, src_reg));
+			EMIT(PPC_RAW_MR(dst_reg_h, src_reg_h));
+			break;
+		case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
+			/* special mov32 for zext */
+			if (imm == 1)
+				EMIT(PPC_RAW_LI(dst_reg_h, 0));
+			else if (dst_reg != src_reg)
+				EMIT(PPC_RAW_MR(dst_reg, src_reg));
+			break;
+		case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */
+			PPC_LI32(dst_reg, imm);
+			PPC_EX32(dst_reg_h, imm);
+			break;
+		case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
+			PPC_LI32(dst_reg, imm);
+			break;
+
+		/*
+		 * BPF_FROM_BE/LE
+		 */
+		case BPF_ALU | BPF_END | BPF_FROM_LE:
+			switch (imm) {
+			case 16:
+				/* Copy 16 bits to upper part */
+				EMIT(PPC_RAW_RLWIMI(dst_reg, src2_reg, 16, 0, 15));
+				/* Rotate 8 bits right & mask */
+				EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 24, 16, 31));
+				break;
+			case 32:
+				/*
+				 * Rotate word left by 8 bits:
+				 * 2 bytes are already in their final position
+				 * -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
+				 */
+				EMIT(PPC_RAW_RLWINM(_R0, src2_reg, 8, 0, 31));
+				/* Rotate 24 bits and insert byte 1 */
+				EMIT(PPC_RAW_RLWIMI(_R0, src2_reg, 24, 0, 7));
+				/* Rotate 24 bits and insert byte 3 */
+				EMIT(PPC_RAW_RLWIMI(_R0, src2_reg, 24, 16, 23));
+				EMIT(PPC_RAW_MR(dst_reg, _R0));
+				break;
+			case 64:
+				bpf_set_seen_register(ctx, tmp_reg);
+				EMIT(PPC_RAW_RLWINM(tmp_reg, src2_reg, 8, 0, 31));
+				EMIT(PPC_RAW_RLWINM(_R0, src2_reg_h, 8, 0, 31));
+				/* Rotate 24 bits and insert byte 1 */
+				EMIT(PPC_RAW_RLWIMI(tmp_reg, src2_reg, 24, 0, 7));
+				EMIT(PPC_RAW_RLWIMI(_R0, src2_reg_h, 24, 0, 7));
+				/* Rotate 24 bits and insert byte 3 */
+				EMIT(PPC_RAW_RLWIMI(tmp_reg, src2_reg, 24, 16, 23));
+				EMIT(PPC_RAW_RLWIMI(_R0, src2_reg_h, 24, 16, 23));
+				EMIT(PPC_RAW_MR(dst_reg, _R0));
+				EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg));
+				break;
+			}
+			break;
+		case BPF_ALU | BPF_END | BPF_FROM_BE:
+			switch (imm) {
+			case 16:
+				/* zero-extend 16 bits into 32 bits */
+				EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0, 16, 31));
+				break;
+			case 32:
+			case 64:
+				/* nop */
+				break;
+			}
+			break;
+
+		/*
+		 * BPF_ST NOSPEC (speculation barrier)
+		 */
+		case BPF_ST | BPF_NOSPEC:
+			break;
+
+		/*
+		 * BPF_ST(X)
+		 */
+		case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
+			EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
+			break;
+		case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
+			PPC_LI32(_R0, imm);
+			EMIT(PPC_RAW_STB(_R0, dst_reg, off));
+			break;
+		case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
+			EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
+			break;
+		case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
+			PPC_LI32(_R0, imm);
+			EMIT(PPC_RAW_STH(_R0, dst_reg, off));
+			break;
+		case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
+			EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
+			break;
+		case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
+			PPC_LI32(_R0, imm);
+			EMIT(PPC_RAW_STW(_R0, dst_reg, off));
+			break;
+		case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
+			EMIT(PPC_RAW_STW(src_reg_h, dst_reg, off));
+			EMIT(PPC_RAW_STW(src_reg, dst_reg, off + 4));
+			break;
+		case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
+			PPC_LI32(_R0, imm);
+			EMIT(PPC_RAW_STW(_R0, dst_reg, off + 4));
+			PPC_EX32(_R0, imm);
+			EMIT(PPC_RAW_STW(_R0, dst_reg, off));
+			break;
+
+		/*
+		 * BPF_STX ATOMIC (atomic ops)
+		 */
+		case BPF_STX | BPF_ATOMIC | BPF_W:
+			save_reg = _R0;
+			ret_reg = src_reg;
+
+			bpf_set_seen_register(ctx, tmp_reg);
+			bpf_set_seen_register(ctx, ax_reg);
+
+			/* Get offset into TMP_REG */
+			EMIT(PPC_RAW_LI(tmp_reg, off));
+			tmp_idx = ctx->idx * 4;
+			/* load value from memory into r0 */
+			EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0));
+
+			/* Save old value in BPF_REG_AX */
+			if (imm & BPF_FETCH)
+				EMIT(PPC_RAW_MR(ax_reg, _R0));
+
+			switch (imm) {
+			case BPF_ADD:
+			case BPF_ADD | BPF_FETCH:
+				EMIT(PPC_RAW_ADD(_R0, _R0, src_reg));
+				break;
+			case BPF_AND:
+			case BPF_AND | BPF_FETCH:
+				EMIT(PPC_RAW_AND(_R0, _R0, src_reg));
+				break;
+			case BPF_OR:
+			case BPF_OR | BPF_FETCH:
+				EMIT(PPC_RAW_OR(_R0, _R0, src_reg));
+				break;
+			case BPF_XOR:
+			case BPF_XOR | BPF_FETCH:
+				EMIT(PPC_RAW_XOR(_R0, _R0, src_reg));
+				break;
+			case BPF_CMPXCHG:
+				/*
+				 * Return old value in BPF_REG_0 for BPF_CMPXCHG &
+				 * in src_reg for other cases.
+				 */
+				ret_reg = bpf_to_ppc(BPF_REG_0);
+
+				/* Compare with old value in BPF_REG_0 */
+				EMIT(PPC_RAW_CMPW(bpf_to_ppc(BPF_REG_0), _R0));
+				/* Don't set if different from old value */
+				PPC_BCC_SHORT(COND_NE, (ctx->idx + 3) * 4);
+				fallthrough;
+			case BPF_XCHG:
+				save_reg = src_reg;
+				break;
+			default:
+				pr_err_ratelimited("eBPF filter atomic op code %02x (@%d) unsupported\n",
+						   code, i);
+				return -EOPNOTSUPP;
+			}
+
+			/* store new value */
+			EMIT(PPC_RAW_STWCX(save_reg, tmp_reg, dst_reg));
+			/* we're done if this succeeded */
+			PPC_BCC_SHORT(COND_NE, tmp_idx);
+
+			/* For the BPF_FETCH variant, get old data into src_reg */
+			if (imm & BPF_FETCH) {
+				EMIT(PPC_RAW_MR(ret_reg, ax_reg));
+				if (!fp->aux->verifier_zext)
+					EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */
+			}
+			break;
+
+		case BPF_STX | BPF_ATOMIC | BPF_DW: /* *(u64 *)(dst + off) += src */
+			return -EOPNOTSUPP;
+
+		/*
+		 * BPF_LDX
+		 */
+		case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
+		case BPF_LDX | BPF_PROBE_MEM | BPF_B:
+		case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
+		case BPF_LDX | BPF_PROBE_MEM | BPF_H:
+		case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
+		case BPF_LDX | BPF_PROBE_MEM | BPF_W:
+		case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
+		case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+			/*
+			 * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid
+			 * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM
+			 * load only if addr is kernel address (see is_kernel_addr()), otherwise
+			 * set dst_reg=0 and move on.
+			 */
+			if (BPF_MODE(code) == BPF_PROBE_MEM) {
+				PPC_LI32(_R0, TASK_SIZE - off);
+				EMIT(PPC_RAW_CMPLW(src_reg, _R0));
+				PPC_BCC_SHORT(COND_GT, (ctx->idx + 4) * 4);
+				EMIT(PPC_RAW_LI(dst_reg, 0));
+				/*
+				 * For BPF_DW case, "li reg_h,0" would be needed when
+				 * !fp->aux->verifier_zext. Emit NOP otherwise.
+				 *
+				 * Note that "li reg_h,0" is emitted for BPF_B/H/W case,
+				 * if necessary. So, jump there insted of emitting an
+				 * additional "li reg_h,0" instruction.
+				 */
+				if (size == BPF_DW && !fp->aux->verifier_zext)
+					EMIT(PPC_RAW_LI(dst_reg_h, 0));
+				else
+					EMIT(PPC_RAW_NOP());
+				/*
+				 * Need to jump two instructions instead of one for BPF_DW case
+				 * as there are two load instructions for dst_reg_h & dst_reg
+				 * respectively.
+				 */
+				if (size == BPF_DW)
+					PPC_JMP((ctx->idx + 3) * 4);
+				else
+					PPC_JMP((ctx->idx + 2) * 4);
+			}
+
+			switch (size) {
+			case BPF_B:
+				EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+				break;
+			case BPF_H:
+				EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
+				break;
+			case BPF_W:
+				EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+				break;
+			case BPF_DW:
+				EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
+				EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
+				break;
+			}
+
+			if (size != BPF_DW && !fp->aux->verifier_zext)
+				EMIT(PPC_RAW_LI(dst_reg_h, 0));
+
+			if (BPF_MODE(code) == BPF_PROBE_MEM) {
+				int insn_idx = ctx->idx - 1;
+				int jmp_off = 4;
+
+				/*
+				 * In case of BPF_DW, two lwz instructions are emitted, one
+				 * for higher 32-bit and another for lower 32-bit. So, set
+				 * ex->insn to the first of the two and jump over both
+				 * instructions in fixup.
+				 *
+				 * Similarly, with !verifier_zext, two instructions are
+				 * emitted for BPF_B/H/W case. So, set ex->insn to the
+				 * instruction that could fault and skip over both
+				 * instructions.
+				 */
+				if (size == BPF_DW || !fp->aux->verifier_zext) {
+					insn_idx -= 1;
+					jmp_off += 4;
+				}
+
+				ret = bpf_add_extable_entry(fp, image, pass, ctx, insn_idx,
+							    jmp_off, dst_reg);
+				if (ret)
+					return ret;
+			}
+			break;
+
+		/*
+		 * Doubleword load
+		 * 16 byte instruction that uses two 'struct bpf_insn'
+		 */
+		case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
+			tmp_idx = ctx->idx;
+			PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm);
+			PPC_LI32(dst_reg, (u32)insn[i].imm);
+			/* padding to allow full 4 instructions for later patching */
+			if (!image)
+				for (j = ctx->idx - tmp_idx; j < 4; j++)
+					EMIT(PPC_RAW_NOP());
+			/* Adjust for two bpf instructions */
+			addrs[++i] = ctx->idx * 4;
+			break;
+
+		/*
+		 * Return/Exit
+		 */
+		case BPF_JMP | BPF_EXIT:
+			/*
+			 * If this isn't the very last instruction, branch to
+			 * the epilogue. If we _are_ the last instruction,
+			 * we'll just fall through to the epilogue.
+			 */
+			if (i != flen - 1) {
+				ret = bpf_jit_emit_exit_insn(image, ctx, _R0, exit_addr);
+				if (ret)
+					return ret;
+			}
+			/* else fall through to the epilogue */
+			break;
+
+		/*
+		 * Call kernel helper or bpf function
+		 */
+		case BPF_JMP | BPF_CALL:
+			ctx->seen |= SEEN_FUNC;
+
+			ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
+						    &func_addr, &func_addr_fixed);
+			if (ret < 0)
+				return ret;
+
+			if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_5))) {
+				EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_5) - 1, _R1, 8));
+				EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_5), _R1, 12));
+			}
+
+			ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr);
+			if (ret)
+				return ret;
+
+			EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0) - 1, _R3));
+			EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0), _R4));
+			break;
+
+		/*
+		 * Jumps and branches
+		 */
+		case BPF_JMP | BPF_JA:
+			PPC_JMP(addrs[i + 1 + off]);
+			break;
+
+		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JGT | BPF_X:
+		case BPF_JMP | BPF_JSGT | BPF_K:
+		case BPF_JMP | BPF_JSGT | BPF_X:
+		case BPF_JMP32 | BPF_JGT | BPF_K:
+		case BPF_JMP32 | BPF_JGT | BPF_X:
+		case BPF_JMP32 | BPF_JSGT | BPF_K:
+		case BPF_JMP32 | BPF_JSGT | BPF_X:
+			true_cond = COND_GT;
+			goto cond_branch;
+		case BPF_JMP | BPF_JLT | BPF_K:
+		case BPF_JMP | BPF_JLT | BPF_X:
+		case BPF_JMP | BPF_JSLT | BPF_K:
+		case BPF_JMP | BPF_JSLT | BPF_X:
+		case BPF_JMP32 | BPF_JLT | BPF_K:
+		case BPF_JMP32 | BPF_JLT | BPF_X:
+		case BPF_JMP32 | BPF_JSLT | BPF_K:
+		case BPF_JMP32 | BPF_JSLT | BPF_X:
+			true_cond = COND_LT;
+			goto cond_branch;
+		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JGE | BPF_X:
+		case BPF_JMP | BPF_JSGE | BPF_K:
+		case BPF_JMP | BPF_JSGE | BPF_X:
+		case BPF_JMP32 | BPF_JGE | BPF_K:
+		case BPF_JMP32 | BPF_JGE | BPF_X:
+		case BPF_JMP32 | BPF_JSGE | BPF_K:
+		case BPF_JMP32 | BPF_JSGE | BPF_X:
+			true_cond = COND_GE;
+			goto cond_branch;
+		case BPF_JMP | BPF_JLE | BPF_K:
+		case BPF_JMP | BPF_JLE | BPF_X:
+		case BPF_JMP | BPF_JSLE | BPF_K:
+		case BPF_JMP | BPF_JSLE | BPF_X:
+		case BPF_JMP32 | BPF_JLE | BPF_K:
+		case BPF_JMP32 | BPF_JLE | BPF_X:
+		case BPF_JMP32 | BPF_JSLE | BPF_K:
+		case BPF_JMP32 | BPF_JSLE | BPF_X:
+			true_cond = COND_LE;
+			goto cond_branch;
+		case BPF_JMP | BPF_JEQ | BPF_K:
+		case BPF_JMP | BPF_JEQ | BPF_X:
+		case BPF_JMP32 | BPF_JEQ | BPF_K:
+		case BPF_JMP32 | BPF_JEQ | BPF_X:
+			true_cond = COND_EQ;
+			goto cond_branch;
+		case BPF_JMP | BPF_JNE | BPF_K:
+		case BPF_JMP | BPF_JNE | BPF_X:
+		case BPF_JMP32 | BPF_JNE | BPF_K:
+		case BPF_JMP32 | BPF_JNE | BPF_X:
+			true_cond = COND_NE;
+			goto cond_branch;
+		case BPF_JMP | BPF_JSET | BPF_K:
+		case BPF_JMP | BPF_JSET | BPF_X:
+		case BPF_JMP32 | BPF_JSET | BPF_K:
+		case BPF_JMP32 | BPF_JSET | BPF_X:
+			true_cond = COND_NE;
+			/* fallthrough; */
+
+cond_branch:
+			switch (code) {
+			case BPF_JMP | BPF_JGT | BPF_X:
+			case BPF_JMP | BPF_JLT | BPF_X:
+			case BPF_JMP | BPF_JGE | BPF_X:
+			case BPF_JMP | BPF_JLE | BPF_X:
+			case BPF_JMP | BPF_JEQ | BPF_X:
+			case BPF_JMP | BPF_JNE | BPF_X:
+				/* unsigned comparison */
+				EMIT(PPC_RAW_CMPLW(dst_reg_h, src_reg_h));
+				PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+				EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+				break;
+			case BPF_JMP32 | BPF_JGT | BPF_X:
+			case BPF_JMP32 | BPF_JLT | BPF_X:
+			case BPF_JMP32 | BPF_JGE | BPF_X:
+			case BPF_JMP32 | BPF_JLE | BPF_X:
+			case BPF_JMP32 | BPF_JEQ | BPF_X:
+			case BPF_JMP32 | BPF_JNE | BPF_X:
+				/* unsigned comparison */
+				EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+				break;
+			case BPF_JMP | BPF_JSGT | BPF_X:
+			case BPF_JMP | BPF_JSLT | BPF_X:
+			case BPF_JMP | BPF_JSGE | BPF_X:
+			case BPF_JMP | BPF_JSLE | BPF_X:
+				/* signed comparison */
+				EMIT(PPC_RAW_CMPW(dst_reg_h, src_reg_h));
+				PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+				EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+				break;
+			case BPF_JMP32 | BPF_JSGT | BPF_X:
+			case BPF_JMP32 | BPF_JSLT | BPF_X:
+			case BPF_JMP32 | BPF_JSGE | BPF_X:
+			case BPF_JMP32 | BPF_JSLE | BPF_X:
+				/* signed comparison */
+				EMIT(PPC_RAW_CMPW(dst_reg, src_reg));
+				break;
+			case BPF_JMP | BPF_JSET | BPF_X:
+				EMIT(PPC_RAW_AND_DOT(_R0, dst_reg_h, src_reg_h));
+				PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+				EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, src_reg));
+				break;
+			case BPF_JMP32 | BPF_JSET | BPF_X: {
+				EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, src_reg));
+				break;
+			case BPF_JMP | BPF_JNE | BPF_K:
+			case BPF_JMP | BPF_JEQ | BPF_K:
+			case BPF_JMP | BPF_JGT | BPF_K:
+			case BPF_JMP | BPF_JLT | BPF_K:
+			case BPF_JMP | BPF_JGE | BPF_K:
+			case BPF_JMP | BPF_JLE | BPF_K:
+				/*
+				 * Need sign-extended load, so only positive
+				 * values can be used as imm in cmplwi
+				 */
+				if (imm >= 0 && imm < 32768) {
+					EMIT(PPC_RAW_CMPLWI(dst_reg_h, 0));
+					PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+					EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+				} else {
+					/* sign-extending load ... but unsigned comparison */
+					PPC_EX32(_R0, imm);
+					EMIT(PPC_RAW_CMPLW(dst_reg_h, _R0));
+					PPC_LI32(_R0, imm);
+					PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+					EMIT(PPC_RAW_CMPLW(dst_reg, _R0));
+				}
+				break;
+			case BPF_JMP32 | BPF_JNE | BPF_K:
+			case BPF_JMP32 | BPF_JEQ | BPF_K:
+			case BPF_JMP32 | BPF_JGT | BPF_K:
+			case BPF_JMP32 | BPF_JLT | BPF_K:
+			case BPF_JMP32 | BPF_JGE | BPF_K:
+			case BPF_JMP32 | BPF_JLE | BPF_K:
+				if (imm >= 0 && imm < 65536) {
+					EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+				} else {
+					PPC_LI32(_R0, imm);
+					EMIT(PPC_RAW_CMPLW(dst_reg, _R0));
+				}
+				break;
+			}
+			case BPF_JMP | BPF_JSGT | BPF_K:
+			case BPF_JMP | BPF_JSLT | BPF_K:
+			case BPF_JMP | BPF_JSGE | BPF_K:
+			case BPF_JMP | BPF_JSLE | BPF_K:
+				if (imm >= 0 && imm < 65536) {
+					EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0));
+					PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+					EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+				} else {
+					/* sign-extending load */
+					EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0));
+					PPC_LI32(_R0, imm);
+					PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+					EMIT(PPC_RAW_CMPLW(dst_reg, _R0));
+				}
+				break;
+			case BPF_JMP32 | BPF_JSGT | BPF_K:
+			case BPF_JMP32 | BPF_JSLT | BPF_K:
+			case BPF_JMP32 | BPF_JSGE | BPF_K:
+			case BPF_JMP32 | BPF_JSLE | BPF_K:
+				/*
+				 * signed comparison, so any 16-bit value
+				 * can be used in cmpwi
+				 */
+				if (imm >= -32768 && imm < 32768) {
+					EMIT(PPC_RAW_CMPWI(dst_reg, imm));
+				} else {
+					/* sign-extending load */
+					PPC_LI32(_R0, imm);
+					EMIT(PPC_RAW_CMPW(dst_reg, _R0));
+				}
+				break;
+			case BPF_JMP | BPF_JSET | BPF_K:
+				/* andi does not sign-extend the immediate */
+				if (imm >= 0 && imm < 32768) {
+					/* PPC_ANDI is _only/always_ dot-form */
+					EMIT(PPC_RAW_ANDI(_R0, dst_reg, imm));
+				} else {
+					PPC_LI32(_R0, imm);
+					if (imm < 0) {
+						EMIT(PPC_RAW_CMPWI(dst_reg_h, 0));
+						PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+					}
+					EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, _R0));
+				}
+				break;
+			case BPF_JMP32 | BPF_JSET | BPF_K:
+				/* andi does not sign-extend the immediate */
+				if (imm >= 0 && imm < 32768) {
+					/* PPC_ANDI is _only/always_ dot-form */
+					EMIT(PPC_RAW_ANDI(_R0, dst_reg, imm));
+				} else {
+					PPC_LI32(_R0, imm);
+					EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, _R0));
+				}
+				break;
+			}
+			PPC_BCC(true_cond, addrs[i + 1 + off]);
+			break;
+
+		/*
+		 * Tail call
+		 */
+		case BPF_JMP | BPF_TAIL_CALL:
+			ctx->seen |= SEEN_TAILCALL;
+			ret = bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
+			if (ret < 0)
+				return ret;
+			break;
+
+		default:
+			/*
+			 * The filter contains something cruel & unusual.
+			 * We don't handle it, but also there shouldn't be
+			 * anything missing from our list.
+			 */
+			pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", code, i);
+			return -EOPNOTSUPP;
+		}
+		if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext &&
+		    !insn_is_zext(&insn[i + 1]) && !(BPF_OP(code) == BPF_END && imm == 64))
+			EMIT(PPC_RAW_LI(dst_reg_h, 0));
+	}
+
+	/* Set end-of-body-code address for exit. */
+	addrs[i] = ctx->idx * 4;
+
+	return 0;
+}
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
new file mode 100644
index 0000000000..0f8048f6da
--- /dev/null
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -0,0 +1,1228 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * bpf_jit_comp64.c: eBPF JIT compiler
+ *
+ * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+ *		  IBM Corporation
+ *
+ * Based on the powerpc classic BPF JIT compiler by Matt Evans
+ */
+#include <linux/moduleloader.h>
+#include <asm/cacheflush.h>
+#include <asm/asm-compat.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/if_vlan.h>
+#include <asm/kprobes.h>
+#include <linux/bpf.h>
+#include <asm/security_features.h>
+
+#include "bpf_jit.h"
+
+/*
+ * Stack layout:
+ * Ensure the top half (upto local_tmp_var) stays consistent
+ * with our redzone usage.
+ *
+ *		[	prev sp		] <-------------
+ *		[   nv gpr save area	] 5*8		|
+ *		[    tail_call_cnt	] 8		|
+ *		[    local_tmp_var	] 16		|
+ * fp (r31) -->	[   ebpf stack space	] upto 512	|
+ *		[     frame header	] 32/112	|
+ * sp (r1) --->	[    stack pointer	] --------------
+ */
+
+/* for gpr non volatile registers BPG_REG_6 to 10 */
+#define BPF_PPC_STACK_SAVE	(5*8)
+/* for bpf JIT code internal usage */
+#define BPF_PPC_STACK_LOCALS	24
+/* stack frame excluding BPF stack, ensure this is quadword aligned */
+#define BPF_PPC_STACKFRAME	(STACK_FRAME_MIN_SIZE + \
+				 BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE)
+
+/* BPF register usage */
+#define TMP_REG_1	(MAX_BPF_JIT_REG + 0)
+#define TMP_REG_2	(MAX_BPF_JIT_REG + 1)
+
+/* BPF to ppc register mappings */
+void bpf_jit_init_reg_mapping(struct codegen_context *ctx)
+{
+	/* function return value */
+	ctx->b2p[BPF_REG_0] = _R8;
+	/* function arguments */
+	ctx->b2p[BPF_REG_1] = _R3;
+	ctx->b2p[BPF_REG_2] = _R4;
+	ctx->b2p[BPF_REG_3] = _R5;
+	ctx->b2p[BPF_REG_4] = _R6;
+	ctx->b2p[BPF_REG_5] = _R7;
+	/* non volatile registers */
+	ctx->b2p[BPF_REG_6] = _R27;
+	ctx->b2p[BPF_REG_7] = _R28;
+	ctx->b2p[BPF_REG_8] = _R29;
+	ctx->b2p[BPF_REG_9] = _R30;
+	/* frame pointer aka BPF_REG_10 */
+	ctx->b2p[BPF_REG_FP] = _R31;
+	/* eBPF jit internal registers */
+	ctx->b2p[BPF_REG_AX] = _R12;
+	ctx->b2p[TMP_REG_1] = _R9;
+	ctx->b2p[TMP_REG_2] = _R10;
+}
+
+/* PPC NVR range -- update this if we ever use NVRs below r27 */
+#define BPF_PPC_NVR_MIN		_R27
+
+static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
+{
+	/*
+	 * We only need a stack frame if:
+	 * - we call other functions (kernel helpers), or
+	 * - the bpf program uses its stack area
+	 * The latter condition is deduced from the usage of BPF_REG_FP
+	 */
+	return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP));
+}
+
+/*
+ * When not setting up our own stackframe, the redzone usage is:
+ *
+ *		[	prev sp		] <-------------
+ *		[	  ...       	] 		|
+ * sp (r1) --->	[    stack pointer	] --------------
+ *		[   nv gpr save area	] 5*8
+ *		[    tail_call_cnt	] 8
+ *		[    local_tmp_var	] 16
+ *		[   unused red zone	] 208 bytes protected
+ */
+static int bpf_jit_stack_local(struct codegen_context *ctx)
+{
+	if (bpf_has_stack_frame(ctx))
+		return STACK_FRAME_MIN_SIZE + ctx->stack_size;
+	else
+		return -(BPF_PPC_STACK_SAVE + 24);
+}
+
+static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx)
+{
+	return bpf_jit_stack_local(ctx) + 16;
+}
+
+static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
+{
+	if (reg >= BPF_PPC_NVR_MIN && reg < 32)
+		return (bpf_has_stack_frame(ctx) ?
+			(BPF_PPC_STACKFRAME + ctx->stack_size) : 0)
+				- (8 * (32 - reg));
+
+	pr_err("BPF JIT is asking about unknown registers");
+	BUG();
+}
+
+void bpf_jit_realloc_regs(struct codegen_context *ctx)
+{
+}
+
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
+{
+	int i;
+
+#ifndef CONFIG_PPC_KERNEL_PCREL
+	if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
+		EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc)));
+#endif
+
+	/*
+	 * Initialize tail_call_cnt if we do tail calls.
+	 * Otherwise, put in NOPs so that it can be skipped when we are
+	 * invoked through a tail call.
+	 */
+	if (ctx->seen & SEEN_TAILCALL) {
+		EMIT(PPC_RAW_LI(bpf_to_ppc(TMP_REG_1), 0));
+		/* this goes in the redzone */
+		EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, -(BPF_PPC_STACK_SAVE + 8)));
+	} else {
+		EMIT(PPC_RAW_NOP());
+		EMIT(PPC_RAW_NOP());
+	}
+
+	if (bpf_has_stack_frame(ctx)) {
+		/*
+		 * We need a stack frame, but we don't necessarily need to
+		 * save/restore LR unless we call other functions
+		 */
+		if (ctx->seen & SEEN_FUNC) {
+			EMIT(PPC_RAW_MFLR(_R0));
+			EMIT(PPC_RAW_STD(_R0, _R1, PPC_LR_STKOFF));
+		}
+
+		EMIT(PPC_RAW_STDU(_R1, _R1, -(BPF_PPC_STACKFRAME + ctx->stack_size)));
+	}
+
+	/*
+	 * Back up non-volatile regs -- BPF registers 6-10
+	 * If we haven't created our own stack frame, we save these
+	 * in the protected zone below the previous stack frame
+	 */
+	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
+		if (bpf_is_seen_register(ctx, bpf_to_ppc(i)))
+			EMIT(PPC_RAW_STD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i))));
+
+	/* Setup frame pointer to point to the bpf stack area */
+	if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP)))
+		EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1,
+				STACK_FRAME_MIN_SIZE + ctx->stack_size));
+}
+
+static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
+{
+	int i;
+
+	/* Restore NVRs */
+	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
+		if (bpf_is_seen_register(ctx, bpf_to_ppc(i)))
+			EMIT(PPC_RAW_LD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i))));
+
+	/* Tear down our stack frame */
+	if (bpf_has_stack_frame(ctx)) {
+		EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME + ctx->stack_size));
+		if (ctx->seen & SEEN_FUNC) {
+			EMIT(PPC_RAW_LD(_R0, _R1, PPC_LR_STKOFF));
+			EMIT(PPC_RAW_MTLR(_R0));
+		}
+	}
+}
+
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+{
+	bpf_jit_emit_common_epilogue(image, ctx);
+
+	/* Move result to r3 */
+	EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0)));
+
+	EMIT(PPC_RAW_BLR());
+}
+
+static int bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, u64 func)
+{
+	unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0;
+	long reladdr;
+
+	if (WARN_ON_ONCE(!core_kernel_text(func_addr)))
+		return -EINVAL;
+
+	if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+		reladdr = func_addr - CTX_NIA(ctx);
+
+		if (reladdr >= (long)SZ_8G || reladdr < -(long)SZ_8G) {
+			pr_err("eBPF: address of %ps out of range of pcrel address.\n",
+				(void *)func);
+			return -ERANGE;
+		}
+		/* pla r12,addr */
+		EMIT(PPC_PREFIX_MLS | __PPC_PRFX_R(1) | IMM_H18(reladdr));
+		EMIT(PPC_INST_PADDI | ___PPC_RT(_R12) | IMM_L(reladdr));
+		EMIT(PPC_RAW_MTCTR(_R12));
+		EMIT(PPC_RAW_BCTR());
+
+	} else {
+		reladdr = func_addr - kernel_toc_addr();
+		if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+			pr_err("eBPF: address of %ps out of range of kernel_toc.\n", (void *)func);
+			return -ERANGE;
+		}
+
+		EMIT(PPC_RAW_ADDIS(_R12, _R2, PPC_HA(reladdr)));
+		EMIT(PPC_RAW_ADDI(_R12, _R12, PPC_LO(reladdr)));
+		EMIT(PPC_RAW_MTCTR(_R12));
+		EMIT(PPC_RAW_BCTRL());
+	}
+
+	return 0;
+}
+
+int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func)
+{
+	unsigned int i, ctx_idx = ctx->idx;
+
+	if (WARN_ON_ONCE(func && is_module_text_address(func)))
+		return -EINVAL;
+
+	/* skip past descriptor if elf v1 */
+	func += FUNCTION_DESCR_SIZE;
+
+	/* Load function address into r12 */
+	PPC_LI64(_R12, func);
+
+	/* For bpf-to-bpf function calls, the callee's address is unknown
+	 * until the last extra pass. As seen above, we use PPC_LI64() to
+	 * load the callee's address, but this may optimize the number of
+	 * instructions required based on the nature of the address.
+	 *
+	 * Since we don't want the number of instructions emitted to increase,
+	 * we pad the optimized PPC_LI64() call with NOPs to guarantee that
+	 * we always have a five-instruction sequence, which is the maximum
+	 * that PPC_LI64() can emit.
+	 */
+	if (!image)
+		for (i = ctx->idx - ctx_idx; i < 5; i++)
+			EMIT(PPC_RAW_NOP());
+
+	EMIT(PPC_RAW_MTCTR(_R12));
+	EMIT(PPC_RAW_BCTRL());
+
+	return 0;
+}
+
+static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
+{
+	/*
+	 * By now, the eBPF program has already setup parameters in r3, r4 and r5
+	 * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program
+	 * r4/BPF_REG_2 - pointer to bpf_array
+	 * r5/BPF_REG_3 - index in bpf_array
+	 */
+	int b2p_bpf_array = bpf_to_ppc(BPF_REG_2);
+	int b2p_index = bpf_to_ppc(BPF_REG_3);
+	int bpf_tailcall_prologue_size = 8;
+
+	if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
+		bpf_tailcall_prologue_size += 4; /* skip past the toc load */
+
+	/*
+	 * if (index >= array->map.max_entries)
+	 *   goto out;
+	 */
+	EMIT(PPC_RAW_LWZ(bpf_to_ppc(TMP_REG_1), b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)));
+	EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31));
+	EMIT(PPC_RAW_CMPLW(b2p_index, bpf_to_ppc(TMP_REG_1)));
+	PPC_BCC_SHORT(COND_GE, out);
+
+	/*
+	 * if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
+	 *   goto out;
+	 */
+	EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx)));
+	EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT));
+	PPC_BCC_SHORT(COND_GE, out);
+
+	/*
+	 * tail_call_cnt++;
+	 */
+	EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), 1));
+	EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx)));
+
+	/* prog = array->ptrs[index]; */
+	EMIT(PPC_RAW_MULI(bpf_to_ppc(TMP_REG_1), b2p_index, 8));
+	EMIT(PPC_RAW_ADD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), b2p_bpf_array));
+	EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_array, ptrs)));
+
+	/*
+	 * if (prog == NULL)
+	 *   goto out;
+	 */
+	EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(TMP_REG_1), 0));
+	PPC_BCC_SHORT(COND_EQ, out);
+
+	/* goto *(prog->bpf_func + prologue_size); */
+	EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_prog, bpf_func)));
+	EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1),
+			FUNCTION_DESCR_SIZE + bpf_tailcall_prologue_size));
+	EMIT(PPC_RAW_MTCTR(bpf_to_ppc(TMP_REG_1)));
+
+	/* tear down stack, restore NVRs, ... */
+	bpf_jit_emit_common_epilogue(image, ctx);
+
+	EMIT(PPC_RAW_BCTR());
+
+	/* out: */
+	return 0;
+}
+
+/*
+ * We spill into the redzone always, even if the bpf program has its own stackframe.
+ * Offsets hardcoded based on BPF_PPC_STACK_SAVE -- see bpf_jit_stack_local()
+ */
+void bpf_stf_barrier(void);
+
+asm (
+"		.global bpf_stf_barrier		;"
+"	bpf_stf_barrier:			;"
+"		std	21,-64(1)		;"
+"		std	22,-56(1)		;"
+"		sync				;"
+"		ld	21,-64(1)		;"
+"		ld	22,-56(1)		;"
+"		ori	31,31,0			;"
+"		.rept 14			;"
+"		b	1f			;"
+"	1:					;"
+"		.endr				;"
+"		blr				;"
+);
+
+/* Assemble the body code between the prologue & epilogue */
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+		       u32 *addrs, int pass, bool extra_pass)
+{
+	enum stf_barrier_type stf_barrier = stf_barrier_type_get();
+	const struct bpf_insn *insn = fp->insnsi;
+	int flen = fp->len;
+	int i, ret;
+
+	/* Start of epilogue code - will only be valid 2nd pass onwards */
+	u32 exit_addr = addrs[flen];
+
+	for (i = 0; i < flen; i++) {
+		u32 code = insn[i].code;
+		u32 dst_reg = bpf_to_ppc(insn[i].dst_reg);
+		u32 src_reg = bpf_to_ppc(insn[i].src_reg);
+		u32 size = BPF_SIZE(code);
+		u32 tmp1_reg = bpf_to_ppc(TMP_REG_1);
+		u32 tmp2_reg = bpf_to_ppc(TMP_REG_2);
+		u32 save_reg, ret_reg;
+		s16 off = insn[i].off;
+		s32 imm = insn[i].imm;
+		bool func_addr_fixed;
+		u64 func_addr;
+		u64 imm64;
+		u32 true_cond;
+		u32 tmp_idx;
+		int j;
+
+		/*
+		 * addrs[] maps a BPF bytecode address into a real offset from
+		 * the start of the body code.
+		 */
+		addrs[i] = ctx->idx * 4;
+
+		/*
+		 * As an optimization, we note down which non-volatile registers
+		 * are used so that we can only save/restore those in our
+		 * prologue and epilogue. We do this here regardless of whether
+		 * the actual BPF instruction uses src/dst registers or not
+		 * (for instance, BPF_CALL does not use them). The expectation
+		 * is that those instructions will have src_reg/dst_reg set to
+		 * 0. Even otherwise, we just lose some prologue/epilogue
+		 * optimization but everything else should work without
+		 * any issues.
+		 */
+		if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32)
+			bpf_set_seen_register(ctx, dst_reg);
+		if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32)
+			bpf_set_seen_register(ctx, src_reg);
+
+		switch (code) {
+		/*
+		 * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG
+		 */
+		case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */
+		case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */
+			EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg));
+			goto bpf_alu32_trunc;
+		case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */
+		case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */
+			EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg));
+			goto bpf_alu32_trunc;
+		case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */
+		case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */
+			if (!imm) {
+				goto bpf_alu32_trunc;
+			} else if (imm >= -32768 && imm < 32768) {
+				EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm)));
+			} else {
+				PPC_LI32(tmp1_reg, imm);
+				EMIT(PPC_RAW_ADD(dst_reg, dst_reg, tmp1_reg));
+			}
+			goto bpf_alu32_trunc;
+		case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
+		case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */
+			if (!imm) {
+				goto bpf_alu32_trunc;
+			} else if (imm > -32768 && imm <= 32768) {
+				EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(-imm)));
+			} else {
+				PPC_LI32(tmp1_reg, imm);
+				EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
+			}
+			goto bpf_alu32_trunc;
+		case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
+		case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */
+			if (BPF_CLASS(code) == BPF_ALU)
+				EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg));
+			else
+				EMIT(PPC_RAW_MULD(dst_reg, dst_reg, src_reg));
+			goto bpf_alu32_trunc;
+		case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */
+		case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */
+			if (imm >= -32768 && imm < 32768)
+				EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm)));
+			else {
+				PPC_LI32(tmp1_reg, imm);
+				if (BPF_CLASS(code) == BPF_ALU)
+					EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp1_reg));
+				else
+					EMIT(PPC_RAW_MULD(dst_reg, dst_reg, tmp1_reg));
+			}
+			goto bpf_alu32_trunc;
+		case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
+		case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
+			if (BPF_OP(code) == BPF_MOD) {
+				EMIT(PPC_RAW_DIVWU(tmp1_reg, dst_reg, src_reg));
+				EMIT(PPC_RAW_MULW(tmp1_reg, src_reg, tmp1_reg));
+				EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
+			} else
+				EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg));
+			goto bpf_alu32_trunc;
+		case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
+		case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
+			if (BPF_OP(code) == BPF_MOD) {
+				EMIT(PPC_RAW_DIVDU(tmp1_reg, dst_reg, src_reg));
+				EMIT(PPC_RAW_MULD(tmp1_reg, src_reg, tmp1_reg));
+				EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
+			} else
+				EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, src_reg));
+			break;
+		case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
+		case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */
+		case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */
+		case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */
+			if (imm == 0)
+				return -EINVAL;
+			if (imm == 1) {
+				if (BPF_OP(code) == BPF_DIV) {
+					goto bpf_alu32_trunc;
+				} else {
+					EMIT(PPC_RAW_LI(dst_reg, 0));
+					break;
+				}
+			}
+
+			PPC_LI32(tmp1_reg, imm);
+			switch (BPF_CLASS(code)) {
+			case BPF_ALU:
+				if (BPF_OP(code) == BPF_MOD) {
+					EMIT(PPC_RAW_DIVWU(tmp2_reg, dst_reg, tmp1_reg));
+					EMIT(PPC_RAW_MULW(tmp1_reg, tmp1_reg, tmp2_reg));
+					EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
+				} else
+					EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, tmp1_reg));
+				break;
+			case BPF_ALU64:
+				if (BPF_OP(code) == BPF_MOD) {
+					EMIT(PPC_RAW_DIVDU(tmp2_reg, dst_reg, tmp1_reg));
+					EMIT(PPC_RAW_MULD(tmp1_reg, tmp1_reg, tmp2_reg));
+					EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
+				} else
+					EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, tmp1_reg));
+				break;
+			}
+			goto bpf_alu32_trunc;
+		case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */
+		case BPF_ALU64 | BPF_NEG: /* dst = -dst */
+			EMIT(PPC_RAW_NEG(dst_reg, dst_reg));
+			goto bpf_alu32_trunc;
+
+		/*
+		 * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH
+		 */
+		case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */
+		case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
+			EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg));
+			goto bpf_alu32_trunc;
+		case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */
+		case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
+			if (!IMM_H(imm))
+				EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm)));
+			else {
+				/* Sign-extended */
+				PPC_LI32(tmp1_reg, imm);
+				EMIT(PPC_RAW_AND(dst_reg, dst_reg, tmp1_reg));
+			}
+			goto bpf_alu32_trunc;
+		case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
+		case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
+			EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg));
+			goto bpf_alu32_trunc;
+		case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */
+		case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
+			if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
+				/* Sign-extended */
+				PPC_LI32(tmp1_reg, imm);
+				EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp1_reg));
+			} else {
+				if (IMM_L(imm))
+					EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm)));
+				if (IMM_H(imm))
+					EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm)));
+			}
+			goto bpf_alu32_trunc;
+		case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */
+		case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */
+			EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg));
+			goto bpf_alu32_trunc;
+		case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */
+		case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
+			if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
+				/* Sign-extended */
+				PPC_LI32(tmp1_reg, imm);
+				EMIT(PPC_RAW_XOR(dst_reg, dst_reg, tmp1_reg));
+			} else {
+				if (IMM_L(imm))
+					EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm)));
+				if (IMM_H(imm))
+					EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm)));
+			}
+			goto bpf_alu32_trunc;
+		case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
+			/* slw clears top 32 bits */
+			EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg));
+			/* skip zero extension move, but set address map. */
+			if (insn_is_zext(&insn[i + 1]))
+				addrs[++i] = ctx->idx * 4;
+			break;
+		case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
+			EMIT(PPC_RAW_SLD(dst_reg, dst_reg, src_reg));
+			break;
+		case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */
+			/* with imm 0, we still need to clear top 32 bits */
+			EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm));
+			if (insn_is_zext(&insn[i + 1]))
+				addrs[++i] = ctx->idx * 4;
+			break;
+		case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */
+			if (imm != 0)
+				EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, imm));
+			break;
+		case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
+			EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
+			if (insn_is_zext(&insn[i + 1]))
+				addrs[++i] = ctx->idx * 4;
+			break;
+		case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
+			EMIT(PPC_RAW_SRD(dst_reg, dst_reg, src_reg));
+			break;
+		case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
+			EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm));
+			if (insn_is_zext(&insn[i + 1]))
+				addrs[++i] = ctx->idx * 4;
+			break;
+		case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
+			if (imm != 0)
+				EMIT(PPC_RAW_SRDI(dst_reg, dst_reg, imm));
+			break;
+		case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */
+			EMIT(PPC_RAW_SRAW(dst_reg, dst_reg, src_reg));
+			goto bpf_alu32_trunc;
+		case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
+			EMIT(PPC_RAW_SRAD(dst_reg, dst_reg, src_reg));
+			break;
+		case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */
+			EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm));
+			goto bpf_alu32_trunc;
+		case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */
+			if (imm != 0)
+				EMIT(PPC_RAW_SRADI(dst_reg, dst_reg, imm));
+			break;
+
+		/*
+		 * MOV
+		 */
+		case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
+		case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
+			if (imm == 1) {
+				/* special mov32 for zext */
+				EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31));
+				break;
+			}
+			EMIT(PPC_RAW_MR(dst_reg, src_reg));
+			goto bpf_alu32_trunc;
+		case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
+		case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */
+			PPC_LI32(dst_reg, imm);
+			if (imm < 0)
+				goto bpf_alu32_trunc;
+			else if (insn_is_zext(&insn[i + 1]))
+				addrs[++i] = ctx->idx * 4;
+			break;
+
+bpf_alu32_trunc:
+		/* Truncate to 32-bits */
+		if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext)
+			EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31));
+		break;
+
+		/*
+		 * BPF_FROM_BE/LE
+		 */
+		case BPF_ALU | BPF_END | BPF_FROM_LE:
+		case BPF_ALU | BPF_END | BPF_FROM_BE:
+#ifdef __BIG_ENDIAN__
+			if (BPF_SRC(code) == BPF_FROM_BE)
+				goto emit_clear;
+#else /* !__BIG_ENDIAN__ */
+			if (BPF_SRC(code) == BPF_FROM_LE)
+				goto emit_clear;
+#endif
+			switch (imm) {
+			case 16:
+				/* Rotate 8 bits left & mask with 0x0000ff00 */
+				EMIT(PPC_RAW_RLWINM(tmp1_reg, dst_reg, 8, 16, 23));
+				/* Rotate 8 bits right & insert LSB to reg */
+				EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 24, 31));
+				/* Move result back to dst_reg */
+				EMIT(PPC_RAW_MR(dst_reg, tmp1_reg));
+				break;
+			case 32:
+				/*
+				 * Rotate word left by 8 bits:
+				 * 2 bytes are already in their final position
+				 * -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
+				 */
+				EMIT(PPC_RAW_RLWINM(tmp1_reg, dst_reg, 8, 0, 31));
+				/* Rotate 24 bits and insert byte 1 */
+				EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 0, 7));
+				/* Rotate 24 bits and insert byte 3 */
+				EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 16, 23));
+				EMIT(PPC_RAW_MR(dst_reg, tmp1_reg));
+				break;
+			case 64:
+				/* Store the value to stack and then use byte-reverse loads */
+				EMIT(PPC_RAW_STD(dst_reg, _R1, bpf_jit_stack_local(ctx)));
+				EMIT(PPC_RAW_ADDI(tmp1_reg, _R1, bpf_jit_stack_local(ctx)));
+				if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+					EMIT(PPC_RAW_LDBRX(dst_reg, 0, tmp1_reg));
+				} else {
+					EMIT(PPC_RAW_LWBRX(dst_reg, 0, tmp1_reg));
+					if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+						EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32));
+					EMIT(PPC_RAW_LI(tmp2_reg, 4));
+					EMIT(PPC_RAW_LWBRX(tmp2_reg, tmp2_reg, tmp1_reg));
+					if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+						EMIT(PPC_RAW_SLDI(tmp2_reg, tmp2_reg, 32));
+					EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp2_reg));
+				}
+				break;
+			}
+			break;
+
+emit_clear:
+			switch (imm) {
+			case 16:
+				/* zero-extend 16 bits into 64 bits */
+				EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 48));
+				if (insn_is_zext(&insn[i + 1]))
+					addrs[++i] = ctx->idx * 4;
+				break;
+			case 32:
+				if (!fp->aux->verifier_zext)
+					/* zero-extend 32 bits into 64 bits */
+					EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 32));
+				break;
+			case 64:
+				/* nop */
+				break;
+			}
+			break;
+
+		/*
+		 * BPF_ST NOSPEC (speculation barrier)
+		 */
+		case BPF_ST | BPF_NOSPEC:
+			if (!security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) ||
+					!security_ftr_enabled(SEC_FTR_STF_BARRIER))
+				break;
+
+			switch (stf_barrier) {
+			case STF_BARRIER_EIEIO:
+				EMIT(PPC_RAW_EIEIO() | 0x02000000);
+				break;
+			case STF_BARRIER_SYNC_ORI:
+				EMIT(PPC_RAW_SYNC());
+				EMIT(PPC_RAW_LD(tmp1_reg, _R13, 0));
+				EMIT(PPC_RAW_ORI(_R31, _R31, 0));
+				break;
+			case STF_BARRIER_FALLBACK:
+				ctx->seen |= SEEN_FUNC;
+				PPC_LI64(_R12, dereference_kernel_function_descriptor(bpf_stf_barrier));
+				EMIT(PPC_RAW_MTCTR(_R12));
+				EMIT(PPC_RAW_BCTRL());
+				break;
+			case STF_BARRIER_NONE:
+				break;
+			}
+			break;
+
+		/*
+		 * BPF_ST(X)
+		 */
+		case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
+		case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
+			if (BPF_CLASS(code) == BPF_ST) {
+				EMIT(PPC_RAW_LI(tmp1_reg, imm));
+				src_reg = tmp1_reg;
+			}
+			EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
+			break;
+		case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
+		case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
+			if (BPF_CLASS(code) == BPF_ST) {
+				EMIT(PPC_RAW_LI(tmp1_reg, imm));
+				src_reg = tmp1_reg;
+			}
+			EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
+			break;
+		case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
+		case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
+			if (BPF_CLASS(code) == BPF_ST) {
+				PPC_LI32(tmp1_reg, imm);
+				src_reg = tmp1_reg;
+			}
+			EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
+			break;
+		case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
+		case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
+			if (BPF_CLASS(code) == BPF_ST) {
+				PPC_LI32(tmp1_reg, imm);
+				src_reg = tmp1_reg;
+			}
+			if (off % 4) {
+				EMIT(PPC_RAW_LI(tmp2_reg, off));
+				EMIT(PPC_RAW_STDX(src_reg, dst_reg, tmp2_reg));
+			} else {
+				EMIT(PPC_RAW_STD(src_reg, dst_reg, off));
+			}
+			break;
+
+		/*
+		 * BPF_STX ATOMIC (atomic ops)
+		 */
+		case BPF_STX | BPF_ATOMIC | BPF_W:
+		case BPF_STX | BPF_ATOMIC | BPF_DW:
+			save_reg = tmp2_reg;
+			ret_reg = src_reg;
+
+			/* Get offset into TMP_REG_1 */
+			EMIT(PPC_RAW_LI(tmp1_reg, off));
+			tmp_idx = ctx->idx * 4;
+			/* load value from memory into TMP_REG_2 */
+			if (size == BPF_DW)
+				EMIT(PPC_RAW_LDARX(tmp2_reg, tmp1_reg, dst_reg, 0));
+			else
+				EMIT(PPC_RAW_LWARX(tmp2_reg, tmp1_reg, dst_reg, 0));
+
+			/* Save old value in _R0 */
+			if (imm & BPF_FETCH)
+				EMIT(PPC_RAW_MR(_R0, tmp2_reg));
+
+			switch (imm) {
+			case BPF_ADD:
+			case BPF_ADD | BPF_FETCH:
+				EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg));
+				break;
+			case BPF_AND:
+			case BPF_AND | BPF_FETCH:
+				EMIT(PPC_RAW_AND(tmp2_reg, tmp2_reg, src_reg));
+				break;
+			case BPF_OR:
+			case BPF_OR | BPF_FETCH:
+				EMIT(PPC_RAW_OR(tmp2_reg, tmp2_reg, src_reg));
+				break;
+			case BPF_XOR:
+			case BPF_XOR | BPF_FETCH:
+				EMIT(PPC_RAW_XOR(tmp2_reg, tmp2_reg, src_reg));
+				break;
+			case BPF_CMPXCHG:
+				/*
+				 * Return old value in BPF_REG_0 for BPF_CMPXCHG &
+				 * in src_reg for other cases.
+				 */
+				ret_reg = bpf_to_ppc(BPF_REG_0);
+
+				/* Compare with old value in BPF_R0 */
+				if (size == BPF_DW)
+					EMIT(PPC_RAW_CMPD(bpf_to_ppc(BPF_REG_0), tmp2_reg));
+				else
+					EMIT(PPC_RAW_CMPW(bpf_to_ppc(BPF_REG_0), tmp2_reg));
+				/* Don't set if different from old value */
+				PPC_BCC_SHORT(COND_NE, (ctx->idx + 3) * 4);
+				fallthrough;
+			case BPF_XCHG:
+				save_reg = src_reg;
+				break;
+			default:
+				pr_err_ratelimited(
+					"eBPF filter atomic op code %02x (@%d) unsupported\n",
+					code, i);
+				return -EOPNOTSUPP;
+			}
+
+			/* store new value */
+			if (size == BPF_DW)
+				EMIT(PPC_RAW_STDCX(save_reg, tmp1_reg, dst_reg));
+			else
+				EMIT(PPC_RAW_STWCX(save_reg, tmp1_reg, dst_reg));
+			/* we're done if this succeeded */
+			PPC_BCC_SHORT(COND_NE, tmp_idx);
+
+			if (imm & BPF_FETCH) {
+				EMIT(PPC_RAW_MR(ret_reg, _R0));
+				/*
+				 * Skip unnecessary zero-extension for 32-bit cmpxchg.
+				 * For context, see commit 39491867ace5.
+				 */
+				if (size != BPF_DW && imm == BPF_CMPXCHG &&
+				    insn_is_zext(&insn[i + 1]))
+					addrs[++i] = ctx->idx * 4;
+			}
+			break;
+
+		/*
+		 * BPF_LDX
+		 */
+		/* dst = *(u8 *)(ul) (src + off) */
+		case BPF_LDX | BPF_MEM | BPF_B:
+		case BPF_LDX | BPF_PROBE_MEM | BPF_B:
+		/* dst = *(u16 *)(ul) (src + off) */
+		case BPF_LDX | BPF_MEM | BPF_H:
+		case BPF_LDX | BPF_PROBE_MEM | BPF_H:
+		/* dst = *(u32 *)(ul) (src + off) */
+		case BPF_LDX | BPF_MEM | BPF_W:
+		case BPF_LDX | BPF_PROBE_MEM | BPF_W:
+		/* dst = *(u64 *)(ul) (src + off) */
+		case BPF_LDX | BPF_MEM | BPF_DW:
+		case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+			/*
+			 * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid
+			 * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM
+			 * load only if addr is kernel address (see is_kernel_addr()), otherwise
+			 * set dst_reg=0 and move on.
+			 */
+			if (BPF_MODE(code) == BPF_PROBE_MEM) {
+				EMIT(PPC_RAW_ADDI(tmp1_reg, src_reg, off));
+				if (IS_ENABLED(CONFIG_PPC_BOOK3E_64))
+					PPC_LI64(tmp2_reg, 0x8000000000000000ul);
+				else /* BOOK3S_64 */
+					PPC_LI64(tmp2_reg, PAGE_OFFSET);
+				EMIT(PPC_RAW_CMPLD(tmp1_reg, tmp2_reg));
+				PPC_BCC_SHORT(COND_GT, (ctx->idx + 3) * 4);
+				EMIT(PPC_RAW_LI(dst_reg, 0));
+				/*
+				 * Check if 'off' is word aligned for BPF_DW, because
+				 * we might generate two instructions.
+				 */
+				if (BPF_SIZE(code) == BPF_DW && (off & 3))
+					PPC_JMP((ctx->idx + 3) * 4);
+				else
+					PPC_JMP((ctx->idx + 2) * 4);
+			}
+
+			switch (size) {
+			case BPF_B:
+				EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+				break;
+			case BPF_H:
+				EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
+				break;
+			case BPF_W:
+				EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+				break;
+			case BPF_DW:
+				if (off % 4) {
+					EMIT(PPC_RAW_LI(tmp1_reg, off));
+					EMIT(PPC_RAW_LDX(dst_reg, src_reg, tmp1_reg));
+				} else {
+					EMIT(PPC_RAW_LD(dst_reg, src_reg, off));
+				}
+				break;
+			}
+
+			if (size != BPF_DW && insn_is_zext(&insn[i + 1]))
+				addrs[++i] = ctx->idx * 4;
+
+			if (BPF_MODE(code) == BPF_PROBE_MEM) {
+				ret = bpf_add_extable_entry(fp, image, pass, ctx, ctx->idx - 1,
+							    4, dst_reg);
+				if (ret)
+					return ret;
+			}
+			break;
+
+		/*
+		 * Doubleword load
+		 * 16 byte instruction that uses two 'struct bpf_insn'
+		 */
+		case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
+			imm64 = ((u64)(u32) insn[i].imm) |
+				    (((u64)(u32) insn[i+1].imm) << 32);
+			tmp_idx = ctx->idx;
+			PPC_LI64(dst_reg, imm64);
+			/* padding to allow full 5 instructions for later patching */
+			if (!image)
+				for (j = ctx->idx - tmp_idx; j < 5; j++)
+					EMIT(PPC_RAW_NOP());
+			/* Adjust for two bpf instructions */
+			addrs[++i] = ctx->idx * 4;
+			break;
+
+		/*
+		 * Return/Exit
+		 */
+		case BPF_JMP | BPF_EXIT:
+			/*
+			 * If this isn't the very last instruction, branch to
+			 * the epilogue. If we _are_ the last instruction,
+			 * we'll just fall through to the epilogue.
+			 */
+			if (i != flen - 1) {
+				ret = bpf_jit_emit_exit_insn(image, ctx, tmp1_reg, exit_addr);
+				if (ret)
+					return ret;
+			}
+			/* else fall through to the epilogue */
+			break;
+
+		/*
+		 * Call kernel helper or bpf function
+		 */
+		case BPF_JMP | BPF_CALL:
+			ctx->seen |= SEEN_FUNC;
+
+			ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
+						    &func_addr, &func_addr_fixed);
+			if (ret < 0)
+				return ret;
+
+			if (func_addr_fixed)
+				ret = bpf_jit_emit_func_call_hlp(image, ctx, func_addr);
+			else
+				ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr);
+
+			if (ret)
+				return ret;
+
+			/* move return value from r3 to BPF_REG_0 */
+			EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0), _R3));
+			break;
+
+		/*
+		 * Jumps and branches
+		 */
+		case BPF_JMP | BPF_JA:
+			PPC_JMP(addrs[i + 1 + off]);
+			break;
+
+		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JGT | BPF_X:
+		case BPF_JMP | BPF_JSGT | BPF_K:
+		case BPF_JMP | BPF_JSGT | BPF_X:
+		case BPF_JMP32 | BPF_JGT | BPF_K:
+		case BPF_JMP32 | BPF_JGT | BPF_X:
+		case BPF_JMP32 | BPF_JSGT | BPF_K:
+		case BPF_JMP32 | BPF_JSGT | BPF_X:
+			true_cond = COND_GT;
+			goto cond_branch;
+		case BPF_JMP | BPF_JLT | BPF_K:
+		case BPF_JMP | BPF_JLT | BPF_X:
+		case BPF_JMP | BPF_JSLT | BPF_K:
+		case BPF_JMP | BPF_JSLT | BPF_X:
+		case BPF_JMP32 | BPF_JLT | BPF_K:
+		case BPF_JMP32 | BPF_JLT | BPF_X:
+		case BPF_JMP32 | BPF_JSLT | BPF_K:
+		case BPF_JMP32 | BPF_JSLT | BPF_X:
+			true_cond = COND_LT;
+			goto cond_branch;
+		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JGE | BPF_X:
+		case BPF_JMP | BPF_JSGE | BPF_K:
+		case BPF_JMP | BPF_JSGE | BPF_X:
+		case BPF_JMP32 | BPF_JGE | BPF_K:
+		case BPF_JMP32 | BPF_JGE | BPF_X:
+		case BPF_JMP32 | BPF_JSGE | BPF_K:
+		case BPF_JMP32 | BPF_JSGE | BPF_X:
+			true_cond = COND_GE;
+			goto cond_branch;
+		case BPF_JMP | BPF_JLE | BPF_K:
+		case BPF_JMP | BPF_JLE | BPF_X:
+		case BPF_JMP | BPF_JSLE | BPF_K:
+		case BPF_JMP | BPF_JSLE | BPF_X:
+		case BPF_JMP32 | BPF_JLE | BPF_K:
+		case BPF_JMP32 | BPF_JLE | BPF_X:
+		case BPF_JMP32 | BPF_JSLE | BPF_K:
+		case BPF_JMP32 | BPF_JSLE | BPF_X:
+			true_cond = COND_LE;
+			goto cond_branch;
+		case BPF_JMP | BPF_JEQ | BPF_K:
+		case BPF_JMP | BPF_JEQ | BPF_X:
+		case BPF_JMP32 | BPF_JEQ | BPF_K:
+		case BPF_JMP32 | BPF_JEQ | BPF_X:
+			true_cond = COND_EQ;
+			goto cond_branch;
+		case BPF_JMP | BPF_JNE | BPF_K:
+		case BPF_JMP | BPF_JNE | BPF_X:
+		case BPF_JMP32 | BPF_JNE | BPF_K:
+		case BPF_JMP32 | BPF_JNE | BPF_X:
+			true_cond = COND_NE;
+			goto cond_branch;
+		case BPF_JMP | BPF_JSET | BPF_K:
+		case BPF_JMP | BPF_JSET | BPF_X:
+		case BPF_JMP32 | BPF_JSET | BPF_K:
+		case BPF_JMP32 | BPF_JSET | BPF_X:
+			true_cond = COND_NE;
+			/* Fall through */
+
+cond_branch:
+			switch (code) {
+			case BPF_JMP | BPF_JGT | BPF_X:
+			case BPF_JMP | BPF_JLT | BPF_X:
+			case BPF_JMP | BPF_JGE | BPF_X:
+			case BPF_JMP | BPF_JLE | BPF_X:
+			case BPF_JMP | BPF_JEQ | BPF_X:
+			case BPF_JMP | BPF_JNE | BPF_X:
+			case BPF_JMP32 | BPF_JGT | BPF_X:
+			case BPF_JMP32 | BPF_JLT | BPF_X:
+			case BPF_JMP32 | BPF_JGE | BPF_X:
+			case BPF_JMP32 | BPF_JLE | BPF_X:
+			case BPF_JMP32 | BPF_JEQ | BPF_X:
+			case BPF_JMP32 | BPF_JNE | BPF_X:
+				/* unsigned comparison */
+				if (BPF_CLASS(code) == BPF_JMP32)
+					EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+				else
+					EMIT(PPC_RAW_CMPLD(dst_reg, src_reg));
+				break;
+			case BPF_JMP | BPF_JSGT | BPF_X:
+			case BPF_JMP | BPF_JSLT | BPF_X:
+			case BPF_JMP | BPF_JSGE | BPF_X:
+			case BPF_JMP | BPF_JSLE | BPF_X:
+			case BPF_JMP32 | BPF_JSGT | BPF_X:
+			case BPF_JMP32 | BPF_JSLT | BPF_X:
+			case BPF_JMP32 | BPF_JSGE | BPF_X:
+			case BPF_JMP32 | BPF_JSLE | BPF_X:
+				/* signed comparison */
+				if (BPF_CLASS(code) == BPF_JMP32)
+					EMIT(PPC_RAW_CMPW(dst_reg, src_reg));
+				else
+					EMIT(PPC_RAW_CMPD(dst_reg, src_reg));
+				break;
+			case BPF_JMP | BPF_JSET | BPF_X:
+			case BPF_JMP32 | BPF_JSET | BPF_X:
+				if (BPF_CLASS(code) == BPF_JMP) {
+					EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg, src_reg));
+				} else {
+					EMIT(PPC_RAW_AND(tmp1_reg, dst_reg, src_reg));
+					EMIT(PPC_RAW_RLWINM_DOT(tmp1_reg, tmp1_reg, 0, 0, 31));
+				}
+				break;
+			case BPF_JMP | BPF_JNE | BPF_K:
+			case BPF_JMP | BPF_JEQ | BPF_K:
+			case BPF_JMP | BPF_JGT | BPF_K:
+			case BPF_JMP | BPF_JLT | BPF_K:
+			case BPF_JMP | BPF_JGE | BPF_K:
+			case BPF_JMP | BPF_JLE | BPF_K:
+			case BPF_JMP32 | BPF_JNE | BPF_K:
+			case BPF_JMP32 | BPF_JEQ | BPF_K:
+			case BPF_JMP32 | BPF_JGT | BPF_K:
+			case BPF_JMP32 | BPF_JLT | BPF_K:
+			case BPF_JMP32 | BPF_JGE | BPF_K:
+			case BPF_JMP32 | BPF_JLE | BPF_K:
+			{
+				bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;
+
+				/*
+				 * Need sign-extended load, so only positive
+				 * values can be used as imm in cmpldi
+				 */
+				if (imm >= 0 && imm < 32768) {
+					if (is_jmp32)
+						EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+					else
+						EMIT(PPC_RAW_CMPLDI(dst_reg, imm));
+				} else {
+					/* sign-extending load */
+					PPC_LI32(tmp1_reg, imm);
+					/* ... but unsigned comparison */
+					if (is_jmp32)
+						EMIT(PPC_RAW_CMPLW(dst_reg, tmp1_reg));
+					else
+						EMIT(PPC_RAW_CMPLD(dst_reg, tmp1_reg));
+				}
+				break;
+			}
+			case BPF_JMP | BPF_JSGT | BPF_K:
+			case BPF_JMP | BPF_JSLT | BPF_K:
+			case BPF_JMP | BPF_JSGE | BPF_K:
+			case BPF_JMP | BPF_JSLE | BPF_K:
+			case BPF_JMP32 | BPF_JSGT | BPF_K:
+			case BPF_JMP32 | BPF_JSLT | BPF_K:
+			case BPF_JMP32 | BPF_JSGE | BPF_K:
+			case BPF_JMP32 | BPF_JSLE | BPF_K:
+			{
+				bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;
+
+				/*
+				 * signed comparison, so any 16-bit value
+				 * can be used in cmpdi
+				 */
+				if (imm >= -32768 && imm < 32768) {
+					if (is_jmp32)
+						EMIT(PPC_RAW_CMPWI(dst_reg, imm));
+					else
+						EMIT(PPC_RAW_CMPDI(dst_reg, imm));
+				} else {
+					PPC_LI32(tmp1_reg, imm);
+					if (is_jmp32)
+						EMIT(PPC_RAW_CMPW(dst_reg, tmp1_reg));
+					else
+						EMIT(PPC_RAW_CMPD(dst_reg, tmp1_reg));
+				}
+				break;
+			}
+			case BPF_JMP | BPF_JSET | BPF_K:
+			case BPF_JMP32 | BPF_JSET | BPF_K:
+				/* andi does not sign-extend the immediate */
+				if (imm >= 0 && imm < 32768)
+					/* PPC_ANDI is _only/always_ dot-form */
+					EMIT(PPC_RAW_ANDI(tmp1_reg, dst_reg, imm));
+				else {
+					PPC_LI32(tmp1_reg, imm);
+					if (BPF_CLASS(code) == BPF_JMP) {
+						EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg,
+								     tmp1_reg));
+					} else {
+						EMIT(PPC_RAW_AND(tmp1_reg, dst_reg, tmp1_reg));
+						EMIT(PPC_RAW_RLWINM_DOT(tmp1_reg, tmp1_reg,
+									0, 0, 31));
+					}
+				}
+				break;
+			}
+			PPC_BCC(true_cond, addrs[i + 1 + off]);
+			break;
+
+		/*
+		 * Tail call
+		 */
+		case BPF_JMP | BPF_TAIL_CALL:
+			ctx->seen |= SEEN_TAILCALL;
+			ret = bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
+			if (ret < 0)
+				return ret;
+			break;
+
+		default:
+			/*
+			 * The filter contains something cruel & unusual.
+			 * We don't handle it, but also there shouldn't be
+			 * anything missing from our list.
+			 */
+			pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n",
+					code, i);
+			return -ENOTSUPP;
+		}
+	}
+
+	/* Set end-of-body-code address for exit. */
+	addrs[i] = ctx->idx * 4;
+
+	return 0;
+}
diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c
new file mode 100644
index 0000000000..308a2e40d7
--- /dev/null
+++ b/arch/powerpc/perf/8xx-pmu.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance event support - PPC 8xx
+ *
+ * Copyright 2016 Christophe Leroy, CS Systemes d'Information
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <asm/pmc.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/ptrace.h>
+#include <asm/code-patching.h>
+#include <asm/inst.h>
+
+#define PERF_8xx_ID_CPU_CYCLES		1
+#define PERF_8xx_ID_HW_INSTRUCTIONS	2
+#define PERF_8xx_ID_ITLB_LOAD_MISS	3
+#define PERF_8xx_ID_DTLB_LOAD_MISS	4
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+#define DTLB_LOAD_MISS	(C(DTLB) | (C(OP_READ) << 8) | (C(RESULT_MISS) << 16))
+#define ITLB_LOAD_MISS	(C(ITLB) | (C(OP_READ) << 8) | (C(RESULT_MISS) << 16))
+
+extern unsigned long itlb_miss_counter, dtlb_miss_counter;
+extern atomic_t instruction_counter;
+
+static atomic_t insn_ctr_ref;
+static atomic_t itlb_miss_ref;
+static atomic_t dtlb_miss_ref;
+
+static s64 get_insn_ctr(void)
+{
+	int ctr;
+	unsigned long counta;
+
+	do {
+		ctr = atomic_read(&instruction_counter);
+		counta = mfspr(SPRN_COUNTA);
+	} while (ctr != atomic_read(&instruction_counter));
+
+	return ((s64)ctr << 16) | (counta >> 16);
+}
+
+static int event_type(struct perf_event *event)
+{
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES)
+			return PERF_8xx_ID_CPU_CYCLES;
+		if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS)
+			return PERF_8xx_ID_HW_INSTRUCTIONS;
+		break;
+	case PERF_TYPE_HW_CACHE:
+		if (event->attr.config == ITLB_LOAD_MISS)
+			return PERF_8xx_ID_ITLB_LOAD_MISS;
+		if (event->attr.config == DTLB_LOAD_MISS)
+			return PERF_8xx_ID_DTLB_LOAD_MISS;
+		break;
+	case PERF_TYPE_RAW:
+		break;
+	default:
+		return -ENOENT;
+	}
+	return -EOPNOTSUPP;
+}
+
+static int mpc8xx_pmu_event_init(struct perf_event *event)
+{
+	int type = event_type(event);
+
+	if (type < 0)
+		return type;
+	return 0;
+}
+
+static int mpc8xx_pmu_add(struct perf_event *event, int flags)
+{
+	int type = event_type(event);
+	s64 val = 0;
+
+	if (type < 0)
+		return type;
+
+	switch (type) {
+	case PERF_8xx_ID_CPU_CYCLES:
+		val = get_tb();
+		break;
+	case PERF_8xx_ID_HW_INSTRUCTIONS:
+		if (atomic_inc_return(&insn_ctr_ref) == 1)
+			mtspr(SPRN_ICTRL, 0xc0080007);
+		val = get_insn_ctr();
+		break;
+	case PERF_8xx_ID_ITLB_LOAD_MISS:
+		if (atomic_inc_return(&itlb_miss_ref) == 1) {
+			unsigned long target = patch_site_addr(&patch__itlbmiss_perf);
+
+			patch_branch_site(&patch__itlbmiss_exit_1, target, 0);
+		}
+		val = itlb_miss_counter;
+		break;
+	case PERF_8xx_ID_DTLB_LOAD_MISS:
+		if (atomic_inc_return(&dtlb_miss_ref) == 1) {
+			unsigned long target = patch_site_addr(&patch__dtlbmiss_perf);
+
+			patch_branch_site(&patch__dtlbmiss_exit_1, target, 0);
+		}
+		val = dtlb_miss_counter;
+		break;
+	}
+	local64_set(&event->hw.prev_count, val);
+	return 0;
+}
+
+static void mpc8xx_pmu_read(struct perf_event *event)
+{
+	int type = event_type(event);
+	s64 prev, val = 0, delta = 0;
+
+	if (type < 0)
+		return;
+
+	do {
+		prev = local64_read(&event->hw.prev_count);
+		switch (type) {
+		case PERF_8xx_ID_CPU_CYCLES:
+			val = get_tb();
+			delta = 16 * (val - prev);
+			break;
+		case PERF_8xx_ID_HW_INSTRUCTIONS:
+			val = get_insn_ctr();
+			delta = prev - val;
+			if (delta < 0)
+				delta += 0x1000000000000LL;
+			break;
+		case PERF_8xx_ID_ITLB_LOAD_MISS:
+			val = itlb_miss_counter;
+			delta = (s64)((s32)val - (s32)prev);
+			break;
+		case PERF_8xx_ID_DTLB_LOAD_MISS:
+			val = dtlb_miss_counter;
+			delta = (s64)((s32)val - (s32)prev);
+			break;
+		}
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
+
+	local64_add(delta, &event->count);
+}
+
+static void mpc8xx_pmu_del(struct perf_event *event, int flags)
+{
+	ppc_inst_t insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2));
+
+	mpc8xx_pmu_read(event);
+
+	/* If it was the last user, stop counting to avoid useless overhead */
+	switch (event_type(event)) {
+	case PERF_8xx_ID_CPU_CYCLES:
+		break;
+	case PERF_8xx_ID_HW_INSTRUCTIONS:
+		if (atomic_dec_return(&insn_ctr_ref) == 0)
+			mtspr(SPRN_ICTRL, 7);
+		break;
+	case PERF_8xx_ID_ITLB_LOAD_MISS:
+		if (atomic_dec_return(&itlb_miss_ref) == 0)
+			patch_instruction_site(&patch__itlbmiss_exit_1, insn);
+		break;
+	case PERF_8xx_ID_DTLB_LOAD_MISS:
+		if (atomic_dec_return(&dtlb_miss_ref) == 0)
+			patch_instruction_site(&patch__dtlbmiss_exit_1, insn);
+		break;
+	}
+}
+
+static struct pmu mpc8xx_pmu = {
+	.event_init	= mpc8xx_pmu_event_init,
+	.add		= mpc8xx_pmu_add,
+	.del		= mpc8xx_pmu_del,
+	.read		= mpc8xx_pmu_read,
+	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT |
+			  PERF_PMU_CAP_NO_NMI,
+};
+
+static int init_mpc8xx_pmu(void)
+{
+	mtspr(SPRN_ICTRL, 7);
+	mtspr(SPRN_CMPA, 0);
+	mtspr(SPRN_COUNTA, 0xffff);
+
+	return perf_pmu_register(&mpc8xx_pmu, "cpu", PERF_TYPE_RAW);
+}
+
+early_initcall(init_mpc8xx_pmu);
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
new file mode 100644
index 0000000000..4f53d0b975
--- /dev/null
+++ b/arch/powerpc/perf/Makefile
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y				+= callchain.o callchain_$(BITS).o perf_regs.o
+obj-$(CONFIG_COMPAT)		+= callchain_32.o
+
+obj-$(CONFIG_PPC_PERF_CTRS)	+= core-book3s.o
+obj64-$(CONFIG_PPC_PERF_CTRS)	+= ppc970-pmu.o power5-pmu.o \
+				   power5+-pmu.o power6-pmu.o power7-pmu.o \
+				   isa207-common.o power8-pmu.o power9-pmu.o \
+				   generic-compat-pmu.o power10-pmu.o bhrb.o
+obj32-$(CONFIG_PPC_PERF_CTRS)	+= mpc7450-pmu.o
+
+obj-$(CONFIG_PPC_POWERNV)	+= imc-pmu.o
+obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
+obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
+
+obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o
+
+obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o
+
+obj-$(CONFIG_PPC64)		+= $(obj64-y)
+obj-$(CONFIG_PPC32)		+= $(obj32-y)
diff --git a/arch/powerpc/perf/bhrb.S b/arch/powerpc/perf/bhrb.S
new file mode 100644
index 0000000000..47ba05d5ae
--- /dev/null
+++ b/arch/powerpc/perf/bhrb.S
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Basic assembly code to read BHRB entries
+ *
+ * Copyright 2013 Anshuman Khandual, IBM Corporation.
+ */
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+
+	.text
+
+.balign 8
+
+/* r3 = n  (where n = [0-31])
+ * The maximum number of BHRB entries supported with PPC_MFBHRBE instruction
+ * is 1024. We have limited number of table entries here as POWER8 implements
+ * 32 BHRB entries.
+ */
+
+/* .global read_bhrb */
+_GLOBAL(read_bhrb)
+	cmpldi	r3,31
+	bgt	1f
+	LOAD_REG_ADDR(r4, bhrb_table)
+	sldi	r3,r3,3
+	add	r3,r4,r3
+	mtctr	r3
+	bctr
+1:	li	r3,0
+	blr
+
+#define MFBHRB_TABLE1(n) PPC_MFBHRBE(R3,n); blr
+#define MFBHRB_TABLE2(n) MFBHRB_TABLE1(n); MFBHRB_TABLE1(n+1)
+#define MFBHRB_TABLE4(n) MFBHRB_TABLE2(n); MFBHRB_TABLE2(n+2)
+#define MFBHRB_TABLE8(n) MFBHRB_TABLE4(n); MFBHRB_TABLE4(n+4)
+#define MFBHRB_TABLE16(n) MFBHRB_TABLE8(n); MFBHRB_TABLE8(n+8)
+#define MFBHRB_TABLE32(n) MFBHRB_TABLE16(n); MFBHRB_TABLE16(n+16)
+
+bhrb_table:
+	MFBHRB_TABLE32(0)
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
new file mode 100644
index 0000000000..6b4434dd0f
--- /dev/null
+++ b/arch/powerpc/perf/callchain.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter callchain support - powerpc architecture code
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/uaccess.h>
+#include <linux/mm.h>
+#include <asm/ptrace.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <asm/vdso.h>
+#include <asm/pte-walk.h>
+
+#include "callchain.h"
+
+/*
+ * Is sp valid as the address of the next kernel stack frame after prev_sp?
+ * The next frame may be in a different stack area but should not go
+ * back down in the same stack area.
+ */
+static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
+{
+	if (sp & 0xf)
+		return 0;		/* must be 16-byte aligned */
+	if (!validate_sp(sp, current))
+		return 0;
+	if (sp >= prev_sp + STACK_FRAME_MIN_SIZE)
+		return 1;
+	/*
+	 * sp could decrease when we jump off an interrupt stack
+	 * back to the regular process stack.
+	 */
+	if ((sp & ~(THREAD_SIZE - 1)) != (prev_sp & ~(THREAD_SIZE - 1)))
+		return 1;
+	return 0;
+}
+
+void __no_sanitize_address
+perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
+{
+	unsigned long sp, next_sp;
+	unsigned long next_ip;
+	unsigned long lr;
+	long level = 0;
+	unsigned long *fp;
+
+	lr = regs->link;
+	sp = regs->gpr[1];
+	perf_callchain_store(entry, perf_instruction_pointer(regs));
+
+	if (!validate_sp(sp, current))
+		return;
+
+	for (;;) {
+		fp = (unsigned long *) sp;
+		next_sp = fp[0];
+
+		if (next_sp == sp + STACK_INT_FRAME_SIZE &&
+		    validate_sp_size(sp, current, STACK_INT_FRAME_SIZE) &&
+		    fp[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
+			/*
+			 * This looks like an interrupt frame for an
+			 * interrupt that occurred in the kernel
+			 */
+			regs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS);
+			next_ip = regs->nip;
+			lr = regs->link;
+			level = 0;
+			perf_callchain_store_context(entry, PERF_CONTEXT_KERNEL);
+
+		} else {
+			if (level == 0)
+				next_ip = lr;
+			else
+				next_ip = fp[STACK_FRAME_LR_SAVE];
+
+			/*
+			 * We can't tell which of the first two addresses
+			 * we get are valid, but we can filter out the
+			 * obviously bogus ones here.  We replace them
+			 * with 0 rather than removing them entirely so
+			 * that userspace can tell which is which.
+			 */
+			if ((level == 1 && next_ip == lr) ||
+			    (level <= 1 && !kernel_text_address(next_ip)))
+				next_ip = 0;
+
+			++level;
+		}
+
+		perf_callchain_store(entry, next_ip);
+		if (!valid_next_sp(next_sp, sp))
+			return;
+		sp = next_sp;
+	}
+}
+
+void
+perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
+{
+	if (!is_32bit_task())
+		perf_callchain_user_64(entry, regs);
+	else
+		perf_callchain_user_32(entry, regs);
+}
diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h
new file mode 100644
index 0000000000..19a8d051dd
--- /dev/null
+++ b/arch/powerpc/perf/callchain.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _POWERPC_PERF_CALLCHAIN_H
+#define _POWERPC_PERF_CALLCHAIN_H
+
+void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
+			    struct pt_regs *regs);
+void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
+			    struct pt_regs *regs);
+
+static inline bool invalid_user_sp(unsigned long sp)
+{
+	unsigned long mask = is_32bit_task() ? 3 : 7;
+	unsigned long top = STACK_TOP - (is_32bit_task() ? 16 : 32);
+
+	return (!sp || (sp & mask) || (sp > top));
+}
+
+/*
+ * On 32-bit we just access the address and let hash_page create a
+ * HPTE if necessary, so there is no need to fall back to reading
+ * the page tables.  Since this is called at interrupt level,
+ * do_page_fault() won't treat a DSI as a page fault.
+ */
+static inline int __read_user_stack(const void __user *ptr, void *ret,
+				    size_t size)
+{
+	unsigned long addr = (unsigned long)ptr;
+
+	if (addr > TASK_SIZE - size || (addr & (size - 1)))
+		return -EFAULT;
+
+	return copy_from_user_nofault(ret, ptr, size);
+}
+
+#endif /* _POWERPC_PERF_CALLCHAIN_H */
diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c
new file mode 100644
index 0000000000..ea8cfe3806
--- /dev/null
+++ b/arch/powerpc/perf/callchain_32.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter callchain support - powerpc architecture code
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/uaccess.h>
+#include <linux/mm.h>
+#include <asm/ptrace.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <asm/vdso.h>
+#include <asm/pte-walk.h>
+
+#include "callchain.h"
+
+#ifdef CONFIG_PPC64
+#include <asm/syscalls_32.h>
+#else  /* CONFIG_PPC64 */
+
+#define __SIGNAL_FRAMESIZE32	__SIGNAL_FRAMESIZE
+#define sigcontext32		sigcontext
+#define mcontext32		mcontext
+#define ucontext32		ucontext
+#define compat_siginfo_t	struct siginfo
+
+#endif /* CONFIG_PPC64 */
+
+static int read_user_stack_32(const unsigned int __user *ptr, unsigned int *ret)
+{
+	return __read_user_stack(ptr, ret, sizeof(*ret));
+}
+
+/*
+ * Layout for non-RT signal frames
+ */
+struct signal_frame_32 {
+	char			dummy[__SIGNAL_FRAMESIZE32];
+	struct sigcontext32	sctx;
+	struct mcontext32	mctx;
+	int			abigap[56];
+};
+
+/*
+ * Layout for RT signal frames
+ */
+struct rt_signal_frame_32 {
+	char			dummy[__SIGNAL_FRAMESIZE32 + 16];
+	compat_siginfo_t	info;
+	struct ucontext32	uc;
+	int			abigap[56];
+};
+
+static int is_sigreturn_32_address(unsigned int nip, unsigned int fp)
+{
+	if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad))
+		return 1;
+	if (current->mm->context.vdso &&
+	    nip == VDSO32_SYMBOL(current->mm->context.vdso, sigtramp32))
+		return 1;
+	return 0;
+}
+
+static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp)
+{
+	if (nip == fp + offsetof(struct rt_signal_frame_32,
+				 uc.uc_mcontext.mc_pad))
+		return 1;
+	if (current->mm->context.vdso &&
+	    nip == VDSO32_SYMBOL(current->mm->context.vdso, sigtramp_rt32))
+		return 1;
+	return 0;
+}
+
+static int sane_signal_32_frame(unsigned int sp)
+{
+	struct signal_frame_32 __user *sf;
+	unsigned int regs;
+
+	sf = (struct signal_frame_32 __user *) (unsigned long) sp;
+	if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, &regs))
+		return 0;
+	return regs == (unsigned long) &sf->mctx;
+}
+
+static int sane_rt_signal_32_frame(unsigned int sp)
+{
+	struct rt_signal_frame_32 __user *sf;
+	unsigned int regs;
+
+	sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
+	if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, &regs))
+		return 0;
+	return regs == (unsigned long) &sf->uc.uc_mcontext;
+}
+
+static unsigned int __user *signal_frame_32_regs(unsigned int sp,
+				unsigned int next_sp, unsigned int next_ip)
+{
+	struct mcontext32 __user *mctx = NULL;
+	struct signal_frame_32 __user *sf;
+	struct rt_signal_frame_32 __user *rt_sf;
+
+	/*
+	 * Note: the next_sp - sp >= signal frame size check
+	 * is true when next_sp < sp, for example, when
+	 * transitioning from an alternate signal stack to the
+	 * normal stack.
+	 */
+	if (next_sp - sp >= sizeof(struct signal_frame_32) &&
+	    is_sigreturn_32_address(next_ip, sp) &&
+	    sane_signal_32_frame(sp)) {
+		sf = (struct signal_frame_32 __user *) (unsigned long) sp;
+		mctx = &sf->mctx;
+	}
+
+	if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) &&
+	    is_rt_sigreturn_32_address(next_ip, sp) &&
+	    sane_rt_signal_32_frame(sp)) {
+		rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
+		mctx = &rt_sf->uc.uc_mcontext;
+	}
+
+	if (!mctx)
+		return NULL;
+	return mctx->mc_gregs;
+}
+
+void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
+			    struct pt_regs *regs)
+{
+	unsigned int sp, next_sp;
+	unsigned int next_ip;
+	unsigned int lr;
+	long level = 0;
+	unsigned int __user *fp, *uregs;
+
+	next_ip = perf_instruction_pointer(regs);
+	lr = regs->link;
+	sp = regs->gpr[1];
+	perf_callchain_store(entry, next_ip);
+
+	while (entry->nr < entry->max_stack) {
+		fp = (unsigned int __user *) (unsigned long) sp;
+		if (invalid_user_sp(sp) || read_user_stack_32(fp, &next_sp))
+			return;
+		if (level > 0 && read_user_stack_32(&fp[1], &next_ip))
+			return;
+
+		uregs = signal_frame_32_regs(sp, next_sp, next_ip);
+		if (!uregs && level <= 1)
+			uregs = signal_frame_32_regs(sp, next_sp, lr);
+		if (uregs) {
+			/*
+			 * This looks like an signal frame, so restart
+			 * the stack trace with the values in it.
+			 */
+			if (read_user_stack_32(&uregs[PT_NIP], &next_ip) ||
+			    read_user_stack_32(&uregs[PT_LNK], &lr) ||
+			    read_user_stack_32(&uregs[PT_R1], &sp))
+				return;
+			level = 0;
+			perf_callchain_store_context(entry, PERF_CONTEXT_USER);
+			perf_callchain_store(entry, next_ip);
+			continue;
+		}
+
+		if (level == 0)
+			next_ip = lr;
+		perf_callchain_store(entry, next_ip);
+		++level;
+		sp = next_sp;
+	}
+}
diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c
new file mode 100644
index 0000000000..488e8a21a1
--- /dev/null
+++ b/arch/powerpc/perf/callchain_64.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter callchain support - powerpc architecture code
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/uaccess.h>
+#include <linux/mm.h>
+#include <asm/ptrace.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <asm/vdso.h>
+#include <asm/pte-walk.h>
+
+#include "callchain.h"
+
+static int read_user_stack_64(const unsigned long __user *ptr, unsigned long *ret)
+{
+	return __read_user_stack(ptr, ret, sizeof(*ret));
+}
+
+/*
+ * 64-bit user processes use the same stack frame for RT and non-RT signals.
+ */
+struct signal_frame_64 {
+	char		dummy[__SIGNAL_FRAMESIZE];
+	struct ucontext	uc;
+	unsigned long	unused[2];
+	unsigned int	tramp[6];
+	struct siginfo	*pinfo;
+	void		*puc;
+	struct siginfo	info;
+	char		abigap[288];
+};
+
+static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
+{
+	if (nip == fp + offsetof(struct signal_frame_64, tramp))
+		return 1;
+	if (current->mm->context.vdso &&
+	    nip == VDSO64_SYMBOL(current->mm->context.vdso, sigtramp_rt64))
+		return 1;
+	return 0;
+}
+
+/*
+ * Do some sanity checking on the signal frame pointed to by sp.
+ * We check the pinfo and puc pointers in the frame.
+ */
+static int sane_signal_64_frame(unsigned long sp)
+{
+	struct signal_frame_64 __user *sf;
+	unsigned long pinfo, puc;
+
+	sf = (struct signal_frame_64 __user *) sp;
+	if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) ||
+	    read_user_stack_64((unsigned long __user *) &sf->puc, &puc))
+		return 0;
+	return pinfo == (unsigned long) &sf->info &&
+		puc == (unsigned long) &sf->uc;
+}
+
+void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
+			    struct pt_regs *regs)
+{
+	unsigned long sp, next_sp;
+	unsigned long next_ip;
+	unsigned long lr;
+	long level = 0;
+	struct signal_frame_64 __user *sigframe;
+	unsigned long __user *fp, *uregs;
+
+	next_ip = perf_instruction_pointer(regs);
+	lr = regs->link;
+	sp = regs->gpr[1];
+	perf_callchain_store(entry, next_ip);
+
+	while (entry->nr < entry->max_stack) {
+		fp = (unsigned long __user *) sp;
+		if (invalid_user_sp(sp) || read_user_stack_64(fp, &next_sp))
+			return;
+		if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
+			return;
+
+		/*
+		 * Note: the next_sp - sp >= signal frame size check
+		 * is true when next_sp < sp, which can happen when
+		 * transitioning from an alternate signal stack to the
+		 * normal stack.
+		 */
+		if (next_sp - sp >= sizeof(struct signal_frame_64) &&
+		    (is_sigreturn_64_address(next_ip, sp) ||
+		     (level <= 1 && is_sigreturn_64_address(lr, sp))) &&
+		    sane_signal_64_frame(sp)) {
+			/*
+			 * This looks like an signal frame
+			 */
+			sigframe = (struct signal_frame_64 __user *) sp;
+			uregs = sigframe->uc.uc_mcontext.gp_regs;
+			if (read_user_stack_64(&uregs[PT_NIP], &next_ip) ||
+			    read_user_stack_64(&uregs[PT_LNK], &lr) ||
+			    read_user_stack_64(&uregs[PT_R1], &sp))
+				return;
+			level = 0;
+			perf_callchain_store_context(entry, PERF_CONTEXT_USER);
+			perf_callchain_store(entry, next_ip);
+			continue;
+		}
+
+		if (level == 0)
+			next_ip = lr;
+		perf_callchain_store(entry, next_ip);
+		++level;
+		sp = next_sp;
+	}
+}
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
new file mode 100644
index 0000000000..10b946e9c6
--- /dev/null
+++ b/arch/powerpc/perf/core-book3s.c
@@ -0,0 +1,2621 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance event support - powerpc architecture code
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <linux/uaccess.h>
+#include <asm/reg.h>
+#include <asm/pmc.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/ptrace.h>
+#include <asm/code-patching.h>
+#include <asm/hw_irq.h>
+#include <asm/interrupt.h>
+
+#ifdef CONFIG_PPC64
+#include "internal.h"
+#endif
+
+#define BHRB_MAX_ENTRIES	32
+#define BHRB_TARGET		0x0000000000000002
+#define BHRB_PREDICTION		0x0000000000000001
+#define BHRB_EA			0xFFFFFFFFFFFFFFFCUL
+
+struct cpu_hw_events {
+	int n_events;
+	int n_percpu;
+	int disabled;
+	int n_added;
+	int n_limited;
+	u8  pmcs_enabled;
+	struct perf_event *event[MAX_HWEVENTS];
+	u64 events[MAX_HWEVENTS];
+	unsigned int flags[MAX_HWEVENTS];
+	struct mmcr_regs mmcr;
+	struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS];
+	u8  limited_hwidx[MAX_LIMITED_HWCOUNTERS];
+	u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
+	unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
+	unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
+
+	unsigned int txn_flags;
+	int n_txn_start;
+
+	/* BHRB bits */
+	u64				bhrb_filter;	/* BHRB HW branch filter */
+	unsigned int			bhrb_users;
+	void				*bhrb_context;
+	struct	perf_branch_stack	bhrb_stack;
+	struct	perf_branch_entry	bhrb_entries[BHRB_MAX_ENTRIES];
+	u64				ic_init;
+
+	/* Store the PMC values */
+	unsigned long pmcs[MAX_HWEVENTS];
+};
+
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+static struct power_pmu *ppmu;
+
+/*
+ * Normally, to ignore kernel events we set the FCS (freeze counters
+ * in supervisor mode) bit in MMCR0, but if the kernel runs with the
+ * hypervisor bit set in the MSR, or if we are running on a processor
+ * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
+ * then we need to use the FCHV bit to ignore kernel events.
+ */
+static unsigned int freeze_events_kernel = MMCR0_FCS;
+
+/*
+ * 32-bit doesn't have MMCRA but does have an MMCR2,
+ * and a few other names are different.
+ * Also 32-bit doesn't have MMCR3, SIER2 and SIER3.
+ * Define them as zero knowing that any code path accessing
+ * these registers (via mtspr/mfspr) are done under ppmu flag
+ * check for PPMU_ARCH_31 and we will not enter that code path
+ * for 32-bit.
+ */
+#ifdef CONFIG_PPC32
+
+#define MMCR0_FCHV		0
+#define MMCR0_PMCjCE		MMCR0_PMCnCE
+#define MMCR0_FC56		0
+#define MMCR0_PMAO		0
+#define MMCR0_EBE		0
+#define MMCR0_BHRBA		0
+#define MMCR0_PMCC		0
+#define MMCR0_PMCC_U6		0
+
+#define SPRN_MMCRA		SPRN_MMCR2
+#define SPRN_MMCR3		0
+#define SPRN_SIER2		0
+#define SPRN_SIER3		0
+#define MMCRA_SAMPLE_ENABLE	0
+#define MMCRA_BHRB_DISABLE     0
+#define MMCR0_PMCCEXT		0
+
+static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
+{
+	return 0;
+}
+static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { }
+static inline u32 perf_get_misc_flags(struct pt_regs *regs)
+{
+	return 0;
+}
+static inline void perf_read_regs(struct pt_regs *regs)
+{
+	regs->result = 0;
+}
+
+static inline int siar_valid(struct pt_regs *regs)
+{
+	return 1;
+}
+
+static bool is_ebb_event(struct perf_event *event) { return false; }
+static int ebb_event_check(struct perf_event *event) { return 0; }
+static void ebb_event_add(struct perf_event *event) { }
+static void ebb_switch_out(unsigned long mmcr0) { }
+static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
+{
+	return cpuhw->mmcr.mmcr0;
+}
+
+static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
+static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
+static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) {}
+static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {}
+static void pmao_restore_workaround(bool ebb) { }
+#endif /* CONFIG_PPC32 */
+
+bool is_sier_available(void)
+{
+	if (!ppmu)
+		return false;
+
+	if (ppmu->flags & PPMU_HAS_SIER)
+		return true;
+
+	return false;
+}
+
+/*
+ * Return PMC value corresponding to the
+ * index passed.
+ */
+unsigned long get_pmcs_ext_regs(int idx)
+{
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+	return cpuhw->pmcs[idx];
+}
+
+static bool regs_use_siar(struct pt_regs *regs)
+{
+	/*
+	 * When we take a performance monitor exception the regs are setup
+	 * using perf_read_regs() which overloads some fields, in particular
+	 * regs->result to tell us whether to use SIAR.
+	 *
+	 * However if the regs are from another exception, eg. a syscall, then
+	 * they have not been setup using perf_read_regs() and so regs->result
+	 * is something random.
+	 */
+	return ((TRAP(regs) == INTERRUPT_PERFMON) && regs->result);
+}
+
+/*
+ * Things that are specific to 64-bit implementations.
+ */
+#ifdef CONFIG_PPC64
+
+static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
+{
+	unsigned long mmcra = regs->dsisr;
+
+	if ((ppmu->flags & PPMU_HAS_SSLOT) && (mmcra & MMCRA_SAMPLE_ENABLE)) {
+		unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
+		if (slot > 1)
+			return 4 * (slot - 1);
+	}
+
+	return 0;
+}
+
+/*
+ * The user wants a data address recorded.
+ * If we're not doing instruction sampling, give them the SDAR
+ * (sampled data address).  If we are doing instruction sampling, then
+ * only give them the SDAR if it corresponds to the instruction
+ * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the
+ * [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER.
+ */
+static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp)
+{
+	unsigned long mmcra = regs->dsisr;
+	bool sdar_valid;
+
+	if (ppmu->flags & PPMU_HAS_SIER)
+		sdar_valid = regs->dar & SIER_SDAR_VALID;
+	else {
+		unsigned long sdsync;
+
+		if (ppmu->flags & PPMU_SIAR_VALID)
+			sdsync = POWER7P_MMCRA_SDAR_VALID;
+		else if (ppmu->flags & PPMU_ALT_SIPR)
+			sdsync = POWER6_MMCRA_SDSYNC;
+		else if (ppmu->flags & PPMU_NO_SIAR)
+			sdsync = MMCRA_SAMPLE_ENABLE;
+		else
+			sdsync = MMCRA_SDSYNC;
+
+		sdar_valid = mmcra & sdsync;
+	}
+
+	if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
+		*addrp = mfspr(SPRN_SDAR);
+
+	if (is_kernel_addr(mfspr(SPRN_SDAR)) && event->attr.exclude_kernel)
+		*addrp = 0;
+}
+
+static bool regs_sihv(struct pt_regs *regs)
+{
+	unsigned long sihv = MMCRA_SIHV;
+
+	if (ppmu->flags & PPMU_HAS_SIER)
+		return !!(regs->dar & SIER_SIHV);
+
+	if (ppmu->flags & PPMU_ALT_SIPR)
+		sihv = POWER6_MMCRA_SIHV;
+
+	return !!(regs->dsisr & sihv);
+}
+
+static bool regs_sipr(struct pt_regs *regs)
+{
+	unsigned long sipr = MMCRA_SIPR;
+
+	if (ppmu->flags & PPMU_HAS_SIER)
+		return !!(regs->dar & SIER_SIPR);
+
+	if (ppmu->flags & PPMU_ALT_SIPR)
+		sipr = POWER6_MMCRA_SIPR;
+
+	return !!(regs->dsisr & sipr);
+}
+
+static inline u32 perf_flags_from_msr(struct pt_regs *regs)
+{
+	if (regs->msr & MSR_PR)
+		return PERF_RECORD_MISC_USER;
+	if ((regs->msr & MSR_HV) && freeze_events_kernel != MMCR0_FCHV)
+		return PERF_RECORD_MISC_HYPERVISOR;
+	return PERF_RECORD_MISC_KERNEL;
+}
+
+static inline u32 perf_get_misc_flags(struct pt_regs *regs)
+{
+	bool use_siar = regs_use_siar(regs);
+	unsigned long mmcra = regs->dsisr;
+	int marked = mmcra & MMCRA_SAMPLE_ENABLE;
+
+	if (!use_siar)
+		return perf_flags_from_msr(regs);
+
+	/*
+	 * Check the address in SIAR to identify the
+	 * privilege levels since the SIER[MSR_HV, MSR_PR]
+	 * bits are not set for marked events in power10
+	 * DD1.
+	 */
+	if (marked && (ppmu->flags & PPMU_P10_DD1)) {
+		unsigned long siar = mfspr(SPRN_SIAR);
+		if (siar) {
+			if (is_kernel_addr(siar))
+				return PERF_RECORD_MISC_KERNEL;
+			return PERF_RECORD_MISC_USER;
+		} else {
+			if (is_kernel_addr(regs->nip))
+				return PERF_RECORD_MISC_KERNEL;
+			return PERF_RECORD_MISC_USER;
+		}
+	}
+
+	/*
+	 * If we don't have flags in MMCRA, rather than using
+	 * the MSR, we intuit the flags from the address in
+	 * SIAR which should give slightly more reliable
+	 * results
+	 */
+	if (ppmu->flags & PPMU_NO_SIPR) {
+		unsigned long siar = mfspr(SPRN_SIAR);
+		if (is_kernel_addr(siar))
+			return PERF_RECORD_MISC_KERNEL;
+		return PERF_RECORD_MISC_USER;
+	}
+
+	/* PR has priority over HV, so order below is important */
+	if (regs_sipr(regs))
+		return PERF_RECORD_MISC_USER;
+
+	if (regs_sihv(regs) && (freeze_events_kernel != MMCR0_FCHV))
+		return PERF_RECORD_MISC_HYPERVISOR;
+
+	return PERF_RECORD_MISC_KERNEL;
+}
+
+/*
+ * Overload regs->dsisr to store MMCRA so we only need to read it once
+ * on each interrupt.
+ * Overload regs->dar to store SIER if we have it.
+ * Overload regs->result to specify whether we should use the MSR (result
+ * is zero) or the SIAR (result is non zero).
+ */
+static inline void perf_read_regs(struct pt_regs *regs)
+{
+	unsigned long mmcra = mfspr(SPRN_MMCRA);
+	int marked = mmcra & MMCRA_SAMPLE_ENABLE;
+	int use_siar;
+
+	regs->dsisr = mmcra;
+
+	if (ppmu->flags & PPMU_HAS_SIER)
+		regs->dar = mfspr(SPRN_SIER);
+
+	/*
+	 * If this isn't a PMU exception (eg a software event) the SIAR is
+	 * not valid. Use pt_regs.
+	 *
+	 * If it is a marked event use the SIAR.
+	 *
+	 * If the PMU doesn't update the SIAR for non marked events use
+	 * pt_regs.
+	 *
+	 * If regs is a kernel interrupt, always use SIAR. Some PMUs have an
+	 * issue with regs_sipr not being in synch with SIAR in interrupt entry
+	 * and return sequences, which can result in regs_sipr being true for
+	 * kernel interrupts and SIAR, which has the effect of causing samples
+	 * to pile up at mtmsrd MSR[EE] 0->1 or pending irq replay around
+	 * interrupt entry/exit.
+	 *
+	 * If the PMU has HV/PR flags then check to see if they
+	 * place the exception in userspace. If so, use pt_regs. In
+	 * continuous sampling mode the SIAR and the PMU exception are
+	 * not synchronised, so they may be many instructions apart.
+	 * This can result in confusing backtraces. We still want
+	 * hypervisor samples as well as samples in the kernel with
+	 * interrupts off hence the userspace check.
+	 */
+	if (TRAP(regs) != INTERRUPT_PERFMON)
+		use_siar = 0;
+	else if ((ppmu->flags & PPMU_NO_SIAR))
+		use_siar = 0;
+	else if (marked)
+		use_siar = 1;
+	else if ((ppmu->flags & PPMU_NO_CONT_SAMPLING))
+		use_siar = 0;
+	else if (!user_mode(regs))
+		use_siar = 1;
+	else if (!(ppmu->flags & PPMU_NO_SIPR) && regs_sipr(regs))
+		use_siar = 0;
+	else
+		use_siar = 1;
+
+	regs->result = use_siar;
+}
+
+/*
+ * On processors like P7+ that have the SIAR-Valid bit, marked instructions
+ * must be sampled only if the SIAR-valid bit is set.
+ *
+ * For unmarked instructions and for processors that don't have the SIAR-Valid
+ * bit, assume that SIAR is valid.
+ */
+static inline int siar_valid(struct pt_regs *regs)
+{
+	unsigned long mmcra = regs->dsisr;
+	int marked = mmcra & MMCRA_SAMPLE_ENABLE;
+
+	if (marked) {
+		/*
+		 * SIER[SIAR_VALID] is not set for some
+		 * marked events on power10 DD1, so drop
+		 * the check for SIER[SIAR_VALID] and return true.
+		 */
+		if (ppmu->flags & PPMU_P10_DD1)
+			return 0x1;
+		else if (ppmu->flags & PPMU_HAS_SIER)
+			return regs->dar & SIER_SIAR_VALID;
+
+		if (ppmu->flags & PPMU_SIAR_VALID)
+			return mmcra & POWER7P_MMCRA_SIAR_VALID;
+	}
+
+	return 1;
+}
+
+
+/* Reset all possible BHRB entries */
+static void power_pmu_bhrb_reset(void)
+{
+	asm volatile(PPC_CLRBHRB);
+}
+
+static void power_pmu_bhrb_enable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+	if (!ppmu->bhrb_nr)
+		return;
+
+	/* Clear BHRB if we changed task context to avoid data leaks */
+	if (event->ctx->task && cpuhw->bhrb_context != event->ctx) {
+		power_pmu_bhrb_reset();
+		cpuhw->bhrb_context = event->ctx;
+	}
+	cpuhw->bhrb_users++;
+	perf_sched_cb_inc(event->pmu);
+}
+
+static void power_pmu_bhrb_disable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+	if (!ppmu->bhrb_nr)
+		return;
+
+	WARN_ON_ONCE(!cpuhw->bhrb_users);
+	cpuhw->bhrb_users--;
+	perf_sched_cb_dec(event->pmu);
+
+	if (!cpuhw->disabled && !cpuhw->bhrb_users) {
+		/* BHRB cannot be turned off when other
+		 * events are active on the PMU.
+		 */
+
+		/* avoid stale pointer */
+		cpuhw->bhrb_context = NULL;
+	}
+}
+
+/* Called from ctxsw to prevent one process's branch entries to
+ * mingle with the other process's entries during context switch.
+ */
+static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
+{
+	if (!ppmu->bhrb_nr)
+		return;
+
+	if (sched_in)
+		power_pmu_bhrb_reset();
+}
+/* Calculate the to address for a branch */
+static __u64 power_pmu_bhrb_to(u64 addr)
+{
+	unsigned int instr;
+	__u64 target;
+
+	if (is_kernel_addr(addr)) {
+		if (copy_from_kernel_nofault(&instr, (void *)addr,
+				sizeof(instr)))
+			return 0;
+
+		return branch_target(&instr);
+	}
+
+	/* Userspace: need copy instruction here then translate it */
+	if (copy_from_user_nofault(&instr, (unsigned int __user *)addr,
+			sizeof(instr)))
+		return 0;
+
+	target = branch_target(&instr);
+	if ((!target) || (instr & BRANCH_ABSOLUTE))
+		return target;
+
+	/* Translate relative branch target from kernel to user address */
+	return target - (unsigned long)&instr + addr;
+}
+
+/* Processing BHRB entries */
+static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw)
+{
+	u64 val;
+	u64 addr;
+	int r_index, u_index, pred;
+
+	r_index = 0;
+	u_index = 0;
+	while (r_index < ppmu->bhrb_nr) {
+		/* Assembly read function */
+		val = read_bhrb(r_index++);
+		if (!val)
+			/* Terminal marker: End of valid BHRB entries */
+			break;
+		else {
+			addr = val & BHRB_EA;
+			pred = val & BHRB_PREDICTION;
+
+			if (!addr)
+				/* invalid entry */
+				continue;
+
+			/*
+			 * BHRB rolling buffer could very much contain the kernel
+			 * addresses at this point. Check the privileges before
+			 * exporting it to userspace (avoid exposure of regions
+			 * where we could have speculative execution)
+			 * Incase of ISA v3.1, BHRB will capture only user-space
+			 * addresses, hence include a check before filtering code
+			 */
+			if (!(ppmu->flags & PPMU_ARCH_31) &&
+			    is_kernel_addr(addr) && event->attr.exclude_kernel)
+				continue;
+
+			/* Branches are read most recent first (ie. mfbhrb 0 is
+			 * the most recent branch).
+			 * There are two types of valid entries:
+			 * 1) a target entry which is the to address of a
+			 *    computed goto like a blr,bctr,btar.  The next
+			 *    entry read from the bhrb will be branch
+			 *    corresponding to this target (ie. the actual
+			 *    blr/bctr/btar instruction).
+			 * 2) a from address which is an actual branch.  If a
+			 *    target entry proceeds this, then this is the
+			 *    matching branch for that target.  If this is not
+			 *    following a target entry, then this is a branch
+			 *    where the target is given as an immediate field
+			 *    in the instruction (ie. an i or b form branch).
+			 *    In this case we need to read the instruction from
+			 *    memory to determine the target/to address.
+			 */
+
+			if (val & BHRB_TARGET) {
+				/* Target branches use two entries
+				 * (ie. computed gotos/XL form)
+				 */
+				cpuhw->bhrb_entries[u_index].to = addr;
+				cpuhw->bhrb_entries[u_index].mispred = pred;
+				cpuhw->bhrb_entries[u_index].predicted = ~pred;
+
+				/* Get from address in next entry */
+				val = read_bhrb(r_index++);
+				addr = val & BHRB_EA;
+				if (val & BHRB_TARGET) {
+					/* Shouldn't have two targets in a
+					   row.. Reset index and try again */
+					r_index--;
+					addr = 0;
+				}
+				cpuhw->bhrb_entries[u_index].from = addr;
+			} else {
+				/* Branches to immediate field 
+				   (ie I or B form) */
+				cpuhw->bhrb_entries[u_index].from = addr;
+				cpuhw->bhrb_entries[u_index].to =
+					power_pmu_bhrb_to(addr);
+				cpuhw->bhrb_entries[u_index].mispred = pred;
+				cpuhw->bhrb_entries[u_index].predicted = ~pred;
+			}
+			u_index++;
+
+		}
+	}
+	cpuhw->bhrb_stack.nr = u_index;
+	cpuhw->bhrb_stack.hw_idx = -1ULL;
+	return;
+}
+
+static bool is_ebb_event(struct perf_event *event)
+{
+	/*
+	 * This could be a per-PMU callback, but we'd rather avoid the cost. We
+	 * check that the PMU supports EBB, meaning those that don't can still
+	 * use bit 63 of the event code for something else if they wish.
+	 */
+	return (ppmu->flags & PPMU_ARCH_207S) &&
+	       ((event->attr.config >> PERF_EVENT_CONFIG_EBB_SHIFT) & 1);
+}
+
+static int ebb_event_check(struct perf_event *event)
+{
+	struct perf_event *leader = event->group_leader;
+
+	/* Event and group leader must agree on EBB */
+	if (is_ebb_event(leader) != is_ebb_event(event))
+		return -EINVAL;
+
+	if (is_ebb_event(event)) {
+		if (!(event->attach_state & PERF_ATTACH_TASK))
+			return -EINVAL;
+
+		if (!leader->attr.pinned || !leader->attr.exclusive)
+			return -EINVAL;
+
+		if (event->attr.freq ||
+		    event->attr.inherit ||
+		    event->attr.sample_type ||
+		    event->attr.sample_period ||
+		    event->attr.enable_on_exec)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void ebb_event_add(struct perf_event *event)
+{
+	if (!is_ebb_event(event) || current->thread.used_ebb)
+		return;
+
+	/*
+	 * IFF this is the first time we've added an EBB event, set
+	 * PMXE in the user MMCR0 so we can detect when it's cleared by
+	 * userspace. We need this so that we can context switch while
+	 * userspace is in the EBB handler (where PMXE is 0).
+	 */
+	current->thread.used_ebb = 1;
+	current->thread.mmcr0 |= MMCR0_PMXE;
+}
+
+static void ebb_switch_out(unsigned long mmcr0)
+{
+	if (!(mmcr0 & MMCR0_EBE))
+		return;
+
+	current->thread.siar  = mfspr(SPRN_SIAR);
+	current->thread.sier  = mfspr(SPRN_SIER);
+	current->thread.sdar  = mfspr(SPRN_SDAR);
+	current->thread.mmcr0 = mmcr0 & MMCR0_USER_MASK;
+	current->thread.mmcr2 = mfspr(SPRN_MMCR2) & MMCR2_USER_MASK;
+	if (ppmu->flags & PPMU_ARCH_31) {
+		current->thread.mmcr3 = mfspr(SPRN_MMCR3);
+		current->thread.sier2 = mfspr(SPRN_SIER2);
+		current->thread.sier3 = mfspr(SPRN_SIER3);
+	}
+}
+
+static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
+{
+	unsigned long mmcr0 = cpuhw->mmcr.mmcr0;
+
+	if (!ebb)
+		goto out;
+
+	/* Enable EBB and read/write to all 6 PMCs and BHRB for userspace */
+	mmcr0 |= MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC_U6;
+
+	/*
+	 * Add any bits from the user MMCR0, FC or PMAO. This is compatible
+	 * with pmao_restore_workaround() because we may add PMAO but we never
+	 * clear it here.
+	 */
+	mmcr0 |= current->thread.mmcr0;
+
+	/*
+	 * Be careful not to set PMXE if userspace had it cleared. This is also
+	 * compatible with pmao_restore_workaround() because it has already
+	 * cleared PMXE and we leave PMAO alone.
+	 */
+	if (!(current->thread.mmcr0 & MMCR0_PMXE))
+		mmcr0 &= ~MMCR0_PMXE;
+
+	mtspr(SPRN_SIAR, current->thread.siar);
+	mtspr(SPRN_SIER, current->thread.sier);
+	mtspr(SPRN_SDAR, current->thread.sdar);
+
+	/*
+	 * Merge the kernel & user values of MMCR2. The semantics we implement
+	 * are that the user MMCR2 can set bits, ie. cause counters to freeze,
+	 * but not clear bits. If a task wants to be able to clear bits, ie.
+	 * unfreeze counters, it should not set exclude_xxx in its events and
+	 * instead manage the MMCR2 entirely by itself.
+	 */
+	mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2 | current->thread.mmcr2);
+
+	if (ppmu->flags & PPMU_ARCH_31) {
+		mtspr(SPRN_MMCR3, current->thread.mmcr3);
+		mtspr(SPRN_SIER2, current->thread.sier2);
+		mtspr(SPRN_SIER3, current->thread.sier3);
+	}
+out:
+	return mmcr0;
+}
+
+static void pmao_restore_workaround(bool ebb)
+{
+	unsigned pmcs[6];
+
+	if (!cpu_has_feature(CPU_FTR_PMAO_BUG))
+		return;
+
+	/*
+	 * On POWER8E there is a hardware defect which affects the PMU context
+	 * switch logic, ie. power_pmu_disable/enable().
+	 *
+	 * When a counter overflows PMXE is cleared and FC/PMAO is set in MMCR0
+	 * by the hardware. Sometime later the actual PMU exception is
+	 * delivered.
+	 *
+	 * If we context switch, or simply disable/enable, the PMU prior to the
+	 * exception arriving, the exception will be lost when we clear PMAO.
+	 *
+	 * When we reenable the PMU, we will write the saved MMCR0 with PMAO
+	 * set, and this _should_ generate an exception. However because of the
+	 * defect no exception is generated when we write PMAO, and we get
+	 * stuck with no counters counting but no exception delivered.
+	 *
+	 * The workaround is to detect this case and tweak the hardware to
+	 * create another pending PMU exception.
+	 *
+	 * We do that by setting up PMC6 (cycles) for an imminent overflow and
+	 * enabling the PMU. That causes a new exception to be generated in the
+	 * chip, but we don't take it yet because we have interrupts hard
+	 * disabled. We then write back the PMU state as we want it to be seen
+	 * by the exception handler. When we reenable interrupts the exception
+	 * handler will be called and see the correct state.
+	 *
+	 * The logic is the same for EBB, except that the exception is gated by
+	 * us having interrupts hard disabled as well as the fact that we are
+	 * not in userspace. The exception is finally delivered when we return
+	 * to userspace.
+	 */
+
+	/* Only if PMAO is set and PMAO_SYNC is clear */
+	if ((current->thread.mmcr0 & (MMCR0_PMAO | MMCR0_PMAO_SYNC)) != MMCR0_PMAO)
+		return;
+
+	/* If we're doing EBB, only if BESCR[GE] is set */
+	if (ebb && !(current->thread.bescr & BESCR_GE))
+		return;
+
+	/*
+	 * We are already soft-disabled in power_pmu_enable(). We need to hard
+	 * disable to actually prevent the PMU exception from firing.
+	 */
+	hard_irq_disable();
+
+	/*
+	 * This is a bit gross, but we know we're on POWER8E and have 6 PMCs.
+	 * Using read/write_pmc() in a for loop adds 12 function calls and
+	 * almost doubles our code size.
+	 */
+	pmcs[0] = mfspr(SPRN_PMC1);
+	pmcs[1] = mfspr(SPRN_PMC2);
+	pmcs[2] = mfspr(SPRN_PMC3);
+	pmcs[3] = mfspr(SPRN_PMC4);
+	pmcs[4] = mfspr(SPRN_PMC5);
+	pmcs[5] = mfspr(SPRN_PMC6);
+
+	/* Ensure all freeze bits are unset */
+	mtspr(SPRN_MMCR2, 0);
+
+	/* Set up PMC6 to overflow in one cycle */
+	mtspr(SPRN_PMC6, 0x7FFFFFFE);
+
+	/* Enable exceptions and unfreeze PMC6 */
+	mtspr(SPRN_MMCR0, MMCR0_PMXE | MMCR0_PMCjCE | MMCR0_PMAO);
+
+	/* Now we need to refreeze and restore the PMCs */
+	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMAO);
+
+	mtspr(SPRN_PMC1, pmcs[0]);
+	mtspr(SPRN_PMC2, pmcs[1]);
+	mtspr(SPRN_PMC3, pmcs[2]);
+	mtspr(SPRN_PMC4, pmcs[3]);
+	mtspr(SPRN_PMC5, pmcs[4]);
+	mtspr(SPRN_PMC6, pmcs[5]);
+}
+
+/*
+ * If the perf subsystem wants performance monitor interrupts as soon as
+ * possible (e.g., to sample the instruction address and stack chain),
+ * this should return true. The IRQ masking code can then enable MSR[EE]
+ * in some places (e.g., interrupt handlers) that allows PMI interrupts
+ * through to improve accuracy of profiles, at the cost of some performance.
+ *
+ * The PMU counters can be enabled by other means (e.g., sysfs raw SPR
+ * access), but in that case there is no need for prompt PMI handling.
+ *
+ * This currently returns true if any perf counter is being used. It
+ * could possibly return false if only events are being counted rather than
+ * samples being taken, but for now this is good enough.
+ */
+bool power_pmu_wants_prompt_pmi(void)
+{
+	struct cpu_hw_events *cpuhw;
+
+	/*
+	 * This could simply test local_paca->pmcregs_in_use if that were not
+	 * under ifdef KVM.
+	 */
+	if (!ppmu)
+		return false;
+
+	cpuhw = this_cpu_ptr(&cpu_hw_events);
+	return cpuhw->n_events;
+}
+#endif /* CONFIG_PPC64 */
+
+static void perf_event_interrupt(struct pt_regs *regs);
+
+/*
+ * Read one performance monitor counter (PMC).
+ */
+static unsigned long read_pmc(int idx)
+{
+	unsigned long val;
+
+	switch (idx) {
+	case 1:
+		val = mfspr(SPRN_PMC1);
+		break;
+	case 2:
+		val = mfspr(SPRN_PMC2);
+		break;
+	case 3:
+		val = mfspr(SPRN_PMC3);
+		break;
+	case 4:
+		val = mfspr(SPRN_PMC4);
+		break;
+	case 5:
+		val = mfspr(SPRN_PMC5);
+		break;
+	case 6:
+		val = mfspr(SPRN_PMC6);
+		break;
+#ifdef CONFIG_PPC64
+	case 7:
+		val = mfspr(SPRN_PMC7);
+		break;
+	case 8:
+		val = mfspr(SPRN_PMC8);
+		break;
+#endif /* CONFIG_PPC64 */
+	default:
+		printk(KERN_ERR "oops trying to read PMC%d\n", idx);
+		val = 0;
+	}
+	return val;
+}
+
+/*
+ * Write one PMC.
+ */
+static void write_pmc(int idx, unsigned long val)
+{
+	switch (idx) {
+	case 1:
+		mtspr(SPRN_PMC1, val);
+		break;
+	case 2:
+		mtspr(SPRN_PMC2, val);
+		break;
+	case 3:
+		mtspr(SPRN_PMC3, val);
+		break;
+	case 4:
+		mtspr(SPRN_PMC4, val);
+		break;
+	case 5:
+		mtspr(SPRN_PMC5, val);
+		break;
+	case 6:
+		mtspr(SPRN_PMC6, val);
+		break;
+#ifdef CONFIG_PPC64
+	case 7:
+		mtspr(SPRN_PMC7, val);
+		break;
+	case 8:
+		mtspr(SPRN_PMC8, val);
+		break;
+#endif /* CONFIG_PPC64 */
+	default:
+		printk(KERN_ERR "oops trying to write PMC%d\n", idx);
+	}
+}
+
+static int any_pmc_overflown(struct cpu_hw_events *cpuhw)
+{
+	int i, idx;
+
+	for (i = 0; i < cpuhw->n_events; i++) {
+		idx = cpuhw->event[i]->hw.idx;
+		if ((idx) && ((int)read_pmc(idx) < 0))
+			return idx;
+	}
+
+	return 0;
+}
+
+/* Called from sysrq_handle_showregs() */
+void perf_event_print_debug(void)
+{
+	unsigned long sdar, sier, flags;
+	u32 pmcs[MAX_HWEVENTS];
+	int i;
+
+	if (!ppmu) {
+		pr_info("Performance monitor hardware not registered.\n");
+		return;
+	}
+
+	if (!ppmu->n_counter)
+		return;
+
+	local_irq_save(flags);
+
+	pr_info("CPU: %d PMU registers, ppmu = %s n_counters = %d",
+		 smp_processor_id(), ppmu->name, ppmu->n_counter);
+
+	for (i = 0; i < ppmu->n_counter; i++)
+		pmcs[i] = read_pmc(i + 1);
+
+	for (; i < MAX_HWEVENTS; i++)
+		pmcs[i] = 0xdeadbeef;
+
+	pr_info("PMC1:  %08x PMC2: %08x PMC3: %08x PMC4: %08x\n",
+		 pmcs[0], pmcs[1], pmcs[2], pmcs[3]);
+
+	if (ppmu->n_counter > 4)
+		pr_info("PMC5:  %08x PMC6: %08x PMC7: %08x PMC8: %08x\n",
+			 pmcs[4], pmcs[5], pmcs[6], pmcs[7]);
+
+	pr_info("MMCR0: %016lx MMCR1: %016lx MMCRA: %016lx\n",
+		mfspr(SPRN_MMCR0), mfspr(SPRN_MMCR1), mfspr(SPRN_MMCRA));
+
+	sdar = sier = 0;
+#ifdef CONFIG_PPC64
+	sdar = mfspr(SPRN_SDAR);
+
+	if (ppmu->flags & PPMU_HAS_SIER)
+		sier = mfspr(SPRN_SIER);
+
+	if (ppmu->flags & PPMU_ARCH_207S) {
+		pr_info("MMCR2: %016lx EBBHR: %016lx\n",
+			mfspr(SPRN_MMCR2), mfspr(SPRN_EBBHR));
+		pr_info("EBBRR: %016lx BESCR: %016lx\n",
+			mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR));
+	}
+
+	if (ppmu->flags & PPMU_ARCH_31) {
+		pr_info("MMCR3: %016lx SIER2: %016lx SIER3: %016lx\n",
+			mfspr(SPRN_MMCR3), mfspr(SPRN_SIER2), mfspr(SPRN_SIER3));
+	}
+#endif
+	pr_info("SIAR:  %016lx SDAR:  %016lx SIER:  %016lx\n",
+		mfspr(SPRN_SIAR), sdar, sier);
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Check if a set of events can all go on the PMU at once.
+ * If they can't, this will look at alternative codes for the events
+ * and see if any combination of alternative codes is feasible.
+ * The feasible set is returned in event_id[].
+ */
+static int power_check_constraints(struct cpu_hw_events *cpuhw,
+				   u64 event_id[], unsigned int cflags[],
+				   int n_ev, struct perf_event **event)
+{
+	unsigned long mask, value, nv;
+	unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS];
+	int n_alt[MAX_HWEVENTS], choice[MAX_HWEVENTS];
+	int i, j;
+	unsigned long addf = ppmu->add_fields;
+	unsigned long tadd = ppmu->test_adder;
+	unsigned long grp_mask = ppmu->group_constraint_mask;
+	unsigned long grp_val = ppmu->group_constraint_val;
+
+	if (n_ev > ppmu->n_counter)
+		return -1;
+
+	/* First see if the events will go on as-is */
+	for (i = 0; i < n_ev; ++i) {
+		if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
+		    && !ppmu->limited_pmc_event(event_id[i])) {
+			ppmu->get_alternatives(event_id[i], cflags[i],
+					       cpuhw->alternatives[i]);
+			event_id[i] = cpuhw->alternatives[i][0];
+		}
+		if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0],
+					 &cpuhw->avalues[i][0], event[i]->attr.config1))
+			return -1;
+	}
+	value = mask = 0;
+	for (i = 0; i < n_ev; ++i) {
+		nv = (value | cpuhw->avalues[i][0]) +
+			(value & cpuhw->avalues[i][0] & addf);
+
+		if (((((nv + tadd) ^ value) & mask) & (~grp_mask)) != 0)
+			break;
+
+		if (((((nv + tadd) ^ cpuhw->avalues[i][0]) & cpuhw->amasks[i][0])
+			& (~grp_mask)) != 0)
+			break;
+
+		value = nv;
+		mask |= cpuhw->amasks[i][0];
+	}
+	if (i == n_ev) {
+		if ((value & mask & grp_mask) != (mask & grp_val))
+			return -1;
+		else
+			return 0;	/* all OK */
+	}
+
+	/* doesn't work, gather alternatives... */
+	if (!ppmu->get_alternatives)
+		return -1;
+	for (i = 0; i < n_ev; ++i) {
+		choice[i] = 0;
+		n_alt[i] = ppmu->get_alternatives(event_id[i], cflags[i],
+						  cpuhw->alternatives[i]);
+		for (j = 1; j < n_alt[i]; ++j)
+			ppmu->get_constraint(cpuhw->alternatives[i][j],
+					     &cpuhw->amasks[i][j],
+					     &cpuhw->avalues[i][j],
+					     event[i]->attr.config1);
+	}
+
+	/* enumerate all possibilities and see if any will work */
+	i = 0;
+	j = -1;
+	value = mask = nv = 0;
+	while (i < n_ev) {
+		if (j >= 0) {
+			/* we're backtracking, restore context */
+			value = svalues[i];
+			mask = smasks[i];
+			j = choice[i];
+		}
+		/*
+		 * See if any alternative k for event_id i,
+		 * where k > j, will satisfy the constraints.
+		 */
+		while (++j < n_alt[i]) {
+			nv = (value | cpuhw->avalues[i][j]) +
+				(value & cpuhw->avalues[i][j] & addf);
+			if ((((nv + tadd) ^ value) & mask) == 0 &&
+			    (((nv + tadd) ^ cpuhw->avalues[i][j])
+			     & cpuhw->amasks[i][j]) == 0)
+				break;
+		}
+		if (j >= n_alt[i]) {
+			/*
+			 * No feasible alternative, backtrack
+			 * to event_id i-1 and continue enumerating its
+			 * alternatives from where we got up to.
+			 */
+			if (--i < 0)
+				return -1;
+		} else {
+			/*
+			 * Found a feasible alternative for event_id i,
+			 * remember where we got up to with this event_id,
+			 * go on to the next event_id, and start with
+			 * the first alternative for it.
+			 */
+			choice[i] = j;
+			svalues[i] = value;
+			smasks[i] = mask;
+			value = nv;
+			mask |= cpuhw->amasks[i][j];
+			++i;
+			j = -1;
+		}
+	}
+
+	/* OK, we have a feasible combination, tell the caller the solution */
+	for (i = 0; i < n_ev; ++i)
+		event_id[i] = cpuhw->alternatives[i][choice[i]];
+	return 0;
+}
+
+/*
+ * Check if newly-added events have consistent settings for
+ * exclude_{user,kernel,hv} with each other and any previously
+ * added events.
+ */
+static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
+			  int n_prev, int n_new)
+{
+	int eu = 0, ek = 0, eh = 0;
+	int i, n, first;
+	struct perf_event *event;
+
+	/*
+	 * If the PMU we're on supports per event exclude settings then we
+	 * don't need to do any of this logic. NB. This assumes no PMU has both
+	 * per event exclude and limited PMCs.
+	 */
+	if (ppmu->flags & PPMU_ARCH_207S)
+		return 0;
+
+	n = n_prev + n_new;
+	if (n <= 1)
+		return 0;
+
+	first = 1;
+	for (i = 0; i < n; ++i) {
+		if (cflags[i] & PPMU_LIMITED_PMC_OK) {
+			cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
+			continue;
+		}
+		event = ctrs[i];
+		if (first) {
+			eu = event->attr.exclude_user;
+			ek = event->attr.exclude_kernel;
+			eh = event->attr.exclude_hv;
+			first = 0;
+		} else if (event->attr.exclude_user != eu ||
+			   event->attr.exclude_kernel != ek ||
+			   event->attr.exclude_hv != eh) {
+			return -EAGAIN;
+		}
+	}
+
+	if (eu || ek || eh)
+		for (i = 0; i < n; ++i)
+			if (cflags[i] & PPMU_LIMITED_PMC_OK)
+				cflags[i] |= PPMU_LIMITED_PMC_REQD;
+
+	return 0;
+}
+
+static u64 check_and_compute_delta(u64 prev, u64 val)
+{
+	u64 delta = (val - prev) & 0xfffffffful;
+
+	/*
+	 * POWER7 can roll back counter values, if the new value is smaller
+	 * than the previous value it will cause the delta and the counter to
+	 * have bogus values unless we rolled a counter over.  If a counter is
+	 * rolled back, it will be smaller, but within 256, which is the maximum
+	 * number of events to rollback at once.  If we detect a rollback
+	 * return 0.  This can lead to a small lack of precision in the
+	 * counters.
+	 */
+	if (prev > val && (prev - val) < 256)
+		delta = 0;
+
+	return delta;
+}
+
+static void power_pmu_read(struct perf_event *event)
+{
+	s64 val, delta, prev;
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	if (!event->hw.idx)
+		return;
+
+	if (is_ebb_event(event)) {
+		val = read_pmc(event->hw.idx);
+		local64_set(&event->hw.prev_count, val);
+		return;
+	}
+
+	/*
+	 * Performance monitor interrupts come even when interrupts
+	 * are soft-disabled, as long as interrupts are hard-enabled.
+	 * Therefore we treat them like NMIs.
+	 */
+	do {
+		prev = local64_read(&event->hw.prev_count);
+		barrier();
+		val = read_pmc(event->hw.idx);
+		delta = check_and_compute_delta(prev, val);
+		if (!delta)
+			return;
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
+
+	local64_add(delta, &event->count);
+
+	/*
+	 * A number of places program the PMC with (0x80000000 - period_left).
+	 * We never want period_left to be less than 1 because we will program
+	 * the PMC with a value >= 0x800000000 and an edge detected PMC will
+	 * roll around to 0 before taking an exception. We have seen this
+	 * on POWER8.
+	 *
+	 * To fix this, clamp the minimum value of period_left to 1.
+	 */
+	do {
+		prev = local64_read(&event->hw.period_left);
+		val = prev - delta;
+		if (val < 1)
+			val = 1;
+	} while (local64_cmpxchg(&event->hw.period_left, prev, val) != prev);
+}
+
+/*
+ * On some machines, PMC5 and PMC6 can't be written, don't respect
+ * the freeze conditions, and don't generate interrupts.  This tells
+ * us if `event' is using such a PMC.
+ */
+static int is_limited_pmc(int pmcnum)
+{
+	return (ppmu->flags & PPMU_LIMITED_PMC5_6)
+		&& (pmcnum == 5 || pmcnum == 6);
+}
+
+static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
+				    unsigned long pmc5, unsigned long pmc6)
+{
+	struct perf_event *event;
+	u64 val, prev, delta;
+	int i;
+
+	for (i = 0; i < cpuhw->n_limited; ++i) {
+		event = cpuhw->limited_counter[i];
+		if (!event->hw.idx)
+			continue;
+		val = (event->hw.idx == 5) ? pmc5 : pmc6;
+		prev = local64_read(&event->hw.prev_count);
+		event->hw.idx = 0;
+		delta = check_and_compute_delta(prev, val);
+		if (delta)
+			local64_add(delta, &event->count);
+	}
+}
+
+static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
+				  unsigned long pmc5, unsigned long pmc6)
+{
+	struct perf_event *event;
+	u64 val, prev;
+	int i;
+
+	for (i = 0; i < cpuhw->n_limited; ++i) {
+		event = cpuhw->limited_counter[i];
+		event->hw.idx = cpuhw->limited_hwidx[i];
+		val = (event->hw.idx == 5) ? pmc5 : pmc6;
+		prev = local64_read(&event->hw.prev_count);
+		if (check_and_compute_delta(prev, val))
+			local64_set(&event->hw.prev_count, val);
+		perf_event_update_userpage(event);
+	}
+}
+
+/*
+ * Since limited events don't respect the freeze conditions, we
+ * have to read them immediately after freezing or unfreezing the
+ * other events.  We try to keep the values from the limited
+ * events as consistent as possible by keeping the delay (in
+ * cycles and instructions) between freezing/unfreezing and reading
+ * the limited events as small and consistent as possible.
+ * Therefore, if any limited events are in use, we read them
+ * both, and always in the same order, to minimize variability,
+ * and do it inside the same asm that writes MMCR0.
+ */
+static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
+{
+	unsigned long pmc5, pmc6;
+
+	if (!cpuhw->n_limited) {
+		mtspr(SPRN_MMCR0, mmcr0);
+		return;
+	}
+
+	/*
+	 * Write MMCR0, then read PMC5 and PMC6 immediately.
+	 * To ensure we don't get a performance monitor interrupt
+	 * between writing MMCR0 and freezing/thawing the limited
+	 * events, we first write MMCR0 with the event overflow
+	 * interrupt enable bits turned off.
+	 */
+	asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
+		     : "=&r" (pmc5), "=&r" (pmc6)
+		     : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)),
+		       "i" (SPRN_MMCR0),
+		       "i" (SPRN_PMC5), "i" (SPRN_PMC6));
+
+	if (mmcr0 & MMCR0_FC)
+		freeze_limited_counters(cpuhw, pmc5, pmc6);
+	else
+		thaw_limited_counters(cpuhw, pmc5, pmc6);
+
+	/*
+	 * Write the full MMCR0 including the event overflow interrupt
+	 * enable bits, if necessary.
+	 */
+	if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE))
+		mtspr(SPRN_MMCR0, mmcr0);
+}
+
+/*
+ * Disable all events to prevent PMU interrupts and to allow
+ * events to be added or removed.
+ */
+static void power_pmu_disable(struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuhw;
+	unsigned long flags, mmcr0, val, mmcra;
+
+	if (!ppmu)
+		return;
+	local_irq_save(flags);
+	cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+	if (!cpuhw->disabled) {
+		/*
+		 * Check if we ever enabled the PMU on this cpu.
+		 */
+		if (!cpuhw->pmcs_enabled) {
+			ppc_enable_pmcs();
+			cpuhw->pmcs_enabled = 1;
+		}
+
+		/*
+		 * Set the 'freeze counters' bit, clear EBE/BHRBA/PMCC/PMAO/FC56
+		 * Also clear PMXE to disable PMI's getting triggered in some
+		 * corner cases during PMU disable.
+		 */
+		val  = mmcr0 = mfspr(SPRN_MMCR0);
+		val |= MMCR0_FC;
+		val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO |
+			 MMCR0_PMXE | MMCR0_FC56);
+		/* Set mmcr0 PMCCEXT for p10 */
+		if (ppmu->flags & PPMU_ARCH_31)
+			val |= MMCR0_PMCCEXT;
+
+		/*
+		 * The barrier is to make sure the mtspr has been
+		 * executed and the PMU has frozen the events etc.
+		 * before we return.
+		 */
+		write_mmcr0(cpuhw, val);
+		mb();
+		isync();
+
+		/*
+		 * Some corner cases could clear the PMU counter overflow
+		 * while a masked PMI is pending. One such case is when
+		 * a PMI happens during interrupt replay and perf counter
+		 * values are cleared by PMU callbacks before replay.
+		 *
+		 * Disable the interrupt by clearing the paca bit for PMI
+		 * since we are disabling the PMU now. Otherwise provide a
+		 * warning if there is PMI pending, but no counter is found
+		 * overflown.
+		 *
+		 * Since power_pmu_disable runs under local_irq_save, it
+		 * could happen that code hits a PMC overflow without PMI
+		 * pending in paca. Hence only clear PMI pending if it was
+		 * set.
+		 *
+		 * If a PMI is pending, then MSR[EE] must be disabled (because
+		 * the masked PMI handler disabling EE). So it is safe to
+		 * call clear_pmi_irq_pending().
+		 */
+		if (pmi_irq_pending())
+			clear_pmi_irq_pending();
+
+		val = mmcra = cpuhw->mmcr.mmcra;
+
+		/*
+		 * Disable instruction sampling if it was enabled
+		 */
+		val &= ~MMCRA_SAMPLE_ENABLE;
+
+		/* Disable BHRB via mmcra (BHRBRD) for p10 */
+		if (ppmu->flags & PPMU_ARCH_31)
+			val |= MMCRA_BHRB_DISABLE;
+
+		/*
+		 * Write SPRN_MMCRA if mmcra has either disabled
+		 * instruction sampling or BHRB.
+		 */
+		if (val != mmcra) {
+			mtspr(SPRN_MMCRA, val);
+			mb();
+			isync();
+		}
+
+		cpuhw->disabled = 1;
+		cpuhw->n_added = 0;
+
+		ebb_switch_out(mmcr0);
+
+#ifdef CONFIG_PPC64
+		/*
+		 * These are readable by userspace, may contain kernel
+		 * addresses and are not switched by context switch, so clear
+		 * them now to avoid leaking anything to userspace in general
+		 * including to another process.
+		 */
+		if (ppmu->flags & PPMU_ARCH_207S) {
+			mtspr(SPRN_SDAR, 0);
+			mtspr(SPRN_SIAR, 0);
+		}
+#endif
+	}
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Re-enable all events if disable == 0.
+ * If we were previously disabled and events were added, then
+ * put the new config on the PMU.
+ */
+static void power_pmu_enable(struct pmu *pmu)
+{
+	struct perf_event *event;
+	struct cpu_hw_events *cpuhw;
+	unsigned long flags;
+	long i;
+	unsigned long val, mmcr0;
+	s64 left;
+	unsigned int hwc_index[MAX_HWEVENTS];
+	int n_lim;
+	int idx;
+	bool ebb;
+
+	if (!ppmu)
+		return;
+	local_irq_save(flags);
+
+	cpuhw = this_cpu_ptr(&cpu_hw_events);
+	if (!cpuhw->disabled)
+		goto out;
+
+	if (cpuhw->n_events == 0) {
+		ppc_set_pmu_inuse(0);
+		goto out;
+	}
+
+	cpuhw->disabled = 0;
+
+	/*
+	 * EBB requires an exclusive group and all events must have the EBB
+	 * flag set, or not set, so we can just check a single event. Also we
+	 * know we have at least one event.
+	 */
+	ebb = is_ebb_event(cpuhw->event[0]);
+
+	/*
+	 * If we didn't change anything, or only removed events,
+	 * no need to recalculate MMCR* settings and reset the PMCs.
+	 * Just reenable the PMU with the current MMCR* settings
+	 * (possibly updated for removal of events).
+	 */
+	if (!cpuhw->n_added) {
+		/*
+		 * If there is any active event with an overflown PMC
+		 * value, set back PACA_IRQ_PMI which would have been
+		 * cleared in power_pmu_disable().
+		 */
+		hard_irq_disable();
+		if (any_pmc_overflown(cpuhw))
+			set_pmi_irq_pending();
+
+		mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE);
+		mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1);
+		if (ppmu->flags & PPMU_ARCH_31)
+			mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3);
+		goto out_enable;
+	}
+
+	/*
+	 * Clear all MMCR settings and recompute them for the new set of events.
+	 */
+	memset(&cpuhw->mmcr, 0, sizeof(cpuhw->mmcr));
+
+	if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
+			       &cpuhw->mmcr, cpuhw->event, ppmu->flags)) {
+		/* shouldn't ever get here */
+		printk(KERN_ERR "oops compute_mmcr failed\n");
+		goto out;
+	}
+
+	if (!(ppmu->flags & PPMU_ARCH_207S)) {
+		/*
+		 * Add in MMCR0 freeze bits corresponding to the attr.exclude_*
+		 * bits for the first event. We have already checked that all
+		 * events have the same value for these bits as the first event.
+		 */
+		event = cpuhw->event[0];
+		if (event->attr.exclude_user)
+			cpuhw->mmcr.mmcr0 |= MMCR0_FCP;
+		if (event->attr.exclude_kernel)
+			cpuhw->mmcr.mmcr0 |= freeze_events_kernel;
+		if (event->attr.exclude_hv)
+			cpuhw->mmcr.mmcr0 |= MMCR0_FCHV;
+	}
+
+	/*
+	 * Write the new configuration to MMCR* with the freeze
+	 * bit set and set the hardware events to their initial values.
+	 * Then unfreeze the events.
+	 */
+	ppc_set_pmu_inuse(1);
+	mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE);
+	mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1);
+	mtspr(SPRN_MMCR0, (cpuhw->mmcr.mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
+				| MMCR0_FC);
+	if (ppmu->flags & PPMU_ARCH_207S)
+		mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2);
+
+	if (ppmu->flags & PPMU_ARCH_31)
+		mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3);
+
+	/*
+	 * Read off any pre-existing events that need to move
+	 * to another PMC.
+	 */
+	for (i = 0; i < cpuhw->n_events; ++i) {
+		event = cpuhw->event[i];
+		if (event->hw.idx && event->hw.idx != hwc_index[i] + 1) {
+			power_pmu_read(event);
+			write_pmc(event->hw.idx, 0);
+			event->hw.idx = 0;
+		}
+	}
+
+	/*
+	 * Initialize the PMCs for all the new and moved events.
+	 */
+	cpuhw->n_limited = n_lim = 0;
+	for (i = 0; i < cpuhw->n_events; ++i) {
+		event = cpuhw->event[i];
+		if (event->hw.idx)
+			continue;
+		idx = hwc_index[i] + 1;
+		if (is_limited_pmc(idx)) {
+			cpuhw->limited_counter[n_lim] = event;
+			cpuhw->limited_hwidx[n_lim] = idx;
+			++n_lim;
+			continue;
+		}
+
+		if (ebb)
+			val = local64_read(&event->hw.prev_count);
+		else {
+			val = 0;
+			if (event->hw.sample_period) {
+				left = local64_read(&event->hw.period_left);
+				if (left < 0x80000000L)
+					val = 0x80000000L - left;
+			}
+			local64_set(&event->hw.prev_count, val);
+		}
+
+		event->hw.idx = idx;
+		if (event->hw.state & PERF_HES_STOPPED)
+			val = 0;
+		write_pmc(idx, val);
+
+		perf_event_update_userpage(event);
+	}
+	cpuhw->n_limited = n_lim;
+	cpuhw->mmcr.mmcr0 |= MMCR0_PMXE | MMCR0_FCECE;
+
+ out_enable:
+	pmao_restore_workaround(ebb);
+
+	mmcr0 = ebb_switch_in(ebb, cpuhw);
+
+	mb();
+	if (cpuhw->bhrb_users)
+		ppmu->config_bhrb(cpuhw->bhrb_filter);
+
+	write_mmcr0(cpuhw, mmcr0);
+
+	/*
+	 * Enable instruction sampling if necessary
+	 */
+	if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE) {
+		mb();
+		mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra);
+	}
+
+ out:
+
+	local_irq_restore(flags);
+}
+
+static int collect_events(struct perf_event *group, int max_count,
+			  struct perf_event *ctrs[], u64 *events,
+			  unsigned int *flags)
+{
+	int n = 0;
+	struct perf_event *event;
+
+	if (group->pmu->task_ctx_nr == perf_hw_context) {
+		if (n >= max_count)
+			return -1;
+		ctrs[n] = group;
+		flags[n] = group->hw.event_base;
+		events[n++] = group->hw.config;
+	}
+	for_each_sibling_event(event, group) {
+		if (event->pmu->task_ctx_nr == perf_hw_context &&
+		    event->state != PERF_EVENT_STATE_OFF) {
+			if (n >= max_count)
+				return -1;
+			ctrs[n] = event;
+			flags[n] = event->hw.event_base;
+			events[n++] = event->hw.config;
+		}
+	}
+	return n;
+}
+
+/*
+ * Add an event to the PMU.
+ * If all events are not already frozen, then we disable and
+ * re-enable the PMU in order to get hw_perf_enable to do the
+ * actual work of reconfiguring the PMU.
+ */
+static int power_pmu_add(struct perf_event *event, int ef_flags)
+{
+	struct cpu_hw_events *cpuhw;
+	unsigned long flags;
+	int n0;
+	int ret = -EAGAIN;
+
+	local_irq_save(flags);
+	perf_pmu_disable(event->pmu);
+
+	/*
+	 * Add the event to the list (if there is room)
+	 * and check whether the total set is still feasible.
+	 */
+	cpuhw = this_cpu_ptr(&cpu_hw_events);
+	n0 = cpuhw->n_events;
+	if (n0 >= ppmu->n_counter)
+		goto out;
+	cpuhw->event[n0] = event;
+	cpuhw->events[n0] = event->hw.config;
+	cpuhw->flags[n0] = event->hw.event_base;
+
+	/*
+	 * This event may have been disabled/stopped in record_and_restart()
+	 * because we exceeded the ->event_limit. If re-starting the event,
+	 * clear the ->hw.state (STOPPED and UPTODATE flags), so the user
+	 * notification is re-enabled.
+	 */
+	if (!(ef_flags & PERF_EF_START))
+		event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	else
+		event->hw.state = 0;
+
+	/*
+	 * If group events scheduling transaction was started,
+	 * skip the schedulability test here, it will be performed
+	 * at commit time(->commit_txn) as a whole
+	 */
+	if (cpuhw->txn_flags & PERF_PMU_TXN_ADD)
+		goto nocheck;
+
+	if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
+		goto out;
+	if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1, cpuhw->event))
+		goto out;
+	event->hw.config = cpuhw->events[n0];
+
+nocheck:
+	ebb_event_add(event);
+
+	++cpuhw->n_events;
+	++cpuhw->n_added;
+
+	ret = 0;
+ out:
+	if (has_branch_stack(event)) {
+		u64 bhrb_filter = -1;
+
+		if (ppmu->bhrb_filter_map)
+			bhrb_filter = ppmu->bhrb_filter_map(
+				event->attr.branch_sample_type);
+
+		if (bhrb_filter != -1) {
+			cpuhw->bhrb_filter = bhrb_filter;
+			power_pmu_bhrb_enable(event);
+		}
+	}
+
+	perf_pmu_enable(event->pmu);
+	local_irq_restore(flags);
+	return ret;
+}
+
+/*
+ * Remove an event from the PMU.
+ */
+static void power_pmu_del(struct perf_event *event, int ef_flags)
+{
+	struct cpu_hw_events *cpuhw;
+	long i;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	perf_pmu_disable(event->pmu);
+
+	power_pmu_read(event);
+
+	cpuhw = this_cpu_ptr(&cpu_hw_events);
+	for (i = 0; i < cpuhw->n_events; ++i) {
+		if (event == cpuhw->event[i]) {
+			while (++i < cpuhw->n_events) {
+				cpuhw->event[i-1] = cpuhw->event[i];
+				cpuhw->events[i-1] = cpuhw->events[i];
+				cpuhw->flags[i-1] = cpuhw->flags[i];
+			}
+			--cpuhw->n_events;
+			ppmu->disable_pmc(event->hw.idx - 1, &cpuhw->mmcr);
+			if (event->hw.idx) {
+				write_pmc(event->hw.idx, 0);
+				event->hw.idx = 0;
+			}
+			perf_event_update_userpage(event);
+			break;
+		}
+	}
+	for (i = 0; i < cpuhw->n_limited; ++i)
+		if (event == cpuhw->limited_counter[i])
+			break;
+	if (i < cpuhw->n_limited) {
+		while (++i < cpuhw->n_limited) {
+			cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i];
+			cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
+		}
+		--cpuhw->n_limited;
+	}
+	if (cpuhw->n_events == 0) {
+		/* disable exceptions if no events are running */
+		cpuhw->mmcr.mmcr0 &= ~(MMCR0_PMXE | MMCR0_FCECE);
+	}
+
+	if (has_branch_stack(event))
+		power_pmu_bhrb_disable(event);
+
+	perf_pmu_enable(event->pmu);
+	local_irq_restore(flags);
+}
+
+/*
+ * POWER-PMU does not support disabling individual counters, hence
+ * program their cycle counter to their max value and ignore the interrupts.
+ */
+
+static void power_pmu_start(struct perf_event *event, int ef_flags)
+{
+	unsigned long flags;
+	s64 left;
+	unsigned long val;
+
+	if (!event->hw.idx || !event->hw.sample_period)
+		return;
+
+	if (!(event->hw.state & PERF_HES_STOPPED))
+		return;
+
+	if (ef_flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+	local_irq_save(flags);
+	perf_pmu_disable(event->pmu);
+
+	event->hw.state = 0;
+	left = local64_read(&event->hw.period_left);
+
+	val = 0;
+	if (left < 0x80000000L)
+		val = 0x80000000L - left;
+
+	write_pmc(event->hw.idx, val);
+
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+	local_irq_restore(flags);
+}
+
+static void power_pmu_stop(struct perf_event *event, int ef_flags)
+{
+	unsigned long flags;
+
+	if (!event->hw.idx || !event->hw.sample_period)
+		return;
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	local_irq_save(flags);
+	perf_pmu_disable(event->pmu);
+
+	power_pmu_read(event);
+	event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	write_pmc(event->hw.idx, 0);
+
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+	local_irq_restore(flags);
+}
+
+/*
+ * Start group events scheduling transaction
+ * Set the flag to make pmu::enable() not perform the
+ * schedulability test, it will be performed at commit time
+ *
+ * We only support PERF_PMU_TXN_ADD transactions. Save the
+ * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
+ * transactions.
+ */
+static void power_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
+{
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+	WARN_ON_ONCE(cpuhw->txn_flags);		/* txn already in flight */
+
+	cpuhw->txn_flags = txn_flags;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+
+	perf_pmu_disable(pmu);
+	cpuhw->n_txn_start = cpuhw->n_events;
+}
+
+/*
+ * Stop group events scheduling transaction
+ * Clear the flag and pmu::enable() will perform the
+ * schedulability test.
+ */
+static void power_pmu_cancel_txn(struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+	unsigned int txn_flags;
+
+	WARN_ON_ONCE(!cpuhw->txn_flags);	/* no txn in flight */
+
+	txn_flags = cpuhw->txn_flags;
+	cpuhw->txn_flags = 0;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+
+	perf_pmu_enable(pmu);
+}
+
+/*
+ * Commit group events scheduling transaction
+ * Perform the group schedulability test as a whole
+ * Return 0 if success
+ */
+static int power_pmu_commit_txn(struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuhw;
+	long i, n;
+
+	if (!ppmu)
+		return -EAGAIN;
+
+	cpuhw = this_cpu_ptr(&cpu_hw_events);
+	WARN_ON_ONCE(!cpuhw->txn_flags);	/* no txn in flight */
+
+	if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) {
+		cpuhw->txn_flags = 0;
+		return 0;
+	}
+
+	n = cpuhw->n_events;
+	if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
+		return -EAGAIN;
+	i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n, cpuhw->event);
+	if (i < 0)
+		return -EAGAIN;
+
+	for (i = cpuhw->n_txn_start; i < n; ++i)
+		cpuhw->event[i]->hw.config = cpuhw->events[i];
+
+	cpuhw->txn_flags = 0;
+	perf_pmu_enable(pmu);
+	return 0;
+}
+
+/*
+ * Return 1 if we might be able to put event on a limited PMC,
+ * or 0 if not.
+ * An event can only go on a limited PMC if it counts something
+ * that a limited PMC can count, doesn't require interrupts, and
+ * doesn't exclude any processor mode.
+ */
+static int can_go_on_limited_pmc(struct perf_event *event, u64 ev,
+				 unsigned int flags)
+{
+	int n;
+	u64 alt[MAX_EVENT_ALTERNATIVES];
+
+	if (event->attr.exclude_user
+	    || event->attr.exclude_kernel
+	    || event->attr.exclude_hv
+	    || event->attr.sample_period)
+		return 0;
+
+	if (ppmu->limited_pmc_event(ev))
+		return 1;
+
+	/*
+	 * The requested event_id isn't on a limited PMC already;
+	 * see if any alternative code goes on a limited PMC.
+	 */
+	if (!ppmu->get_alternatives)
+		return 0;
+
+	flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
+	n = ppmu->get_alternatives(ev, flags, alt);
+
+	return n > 0;
+}
+
+/*
+ * Find an alternative event_id that goes on a normal PMC, if possible,
+ * and return the event_id code, or 0 if there is no such alternative.
+ * (Note: event_id code 0 is "don't count" on all machines.)
+ */
+static u64 normal_pmc_alternative(u64 ev, unsigned long flags)
+{
+	u64 alt[MAX_EVENT_ALTERNATIVES];
+	int n;
+
+	flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
+	n = ppmu->get_alternatives(ev, flags, alt);
+	if (!n)
+		return 0;
+	return alt[0];
+}
+
+/* Number of perf_events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_pmc_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Release the PMU if this is the last perf_event.
+ */
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	if (!atomic_add_unless(&num_events, -1, 1)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_dec_return(&num_events) == 0)
+			release_pmc_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
+/*
+ * Translate a generic cache event_id config to a raw event_id code.
+ */
+static int hw_perf_cache_event(u64 config, u64 *eventp)
+{
+	unsigned long type, op, result;
+	u64 ev;
+
+	if (!ppmu->cache_events)
+		return -EINVAL;
+
+	/* unpack config */
+	type = config & 0xff;
+	op = (config >> 8) & 0xff;
+	result = (config >> 16) & 0xff;
+
+	if (type >= PERF_COUNT_HW_CACHE_MAX ||
+	    op >= PERF_COUNT_HW_CACHE_OP_MAX ||
+	    result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	ev = (*ppmu->cache_events)[type][op][result];
+	if (ev == 0)
+		return -EOPNOTSUPP;
+	if (ev == -1)
+		return -EINVAL;
+	*eventp = ev;
+	return 0;
+}
+
+static bool is_event_blacklisted(u64 ev)
+{
+	int i;
+
+	for (i=0; i < ppmu->n_blacklist_ev; i++) {
+		if (ppmu->blacklist_ev[i] == ev)
+			return true;
+	}
+
+	return false;
+}
+
+static int power_pmu_event_init(struct perf_event *event)
+{
+	u64 ev;
+	unsigned long flags, irq_flags;
+	struct perf_event *ctrs[MAX_HWEVENTS];
+	u64 events[MAX_HWEVENTS];
+	unsigned int cflags[MAX_HWEVENTS];
+	int n;
+	int err;
+	struct cpu_hw_events *cpuhw;
+
+	if (!ppmu)
+		return -ENOENT;
+
+	if (has_branch_stack(event)) {
+	        /* PMU has BHRB enabled */
+		if (!(ppmu->flags & PPMU_ARCH_207S))
+			return -EOPNOTSUPP;
+	}
+
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		ev = event->attr.config;
+		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
+			return -EOPNOTSUPP;
+
+		if (ppmu->blacklist_ev && is_event_blacklisted(ev))
+			return -EINVAL;
+		ev = ppmu->generic_events[ev];
+		break;
+	case PERF_TYPE_HW_CACHE:
+		err = hw_perf_cache_event(event->attr.config, &ev);
+		if (err)
+			return err;
+
+		if (ppmu->blacklist_ev && is_event_blacklisted(ev))
+			return -EINVAL;
+		break;
+	case PERF_TYPE_RAW:
+		ev = event->attr.config;
+
+		if (ppmu->blacklist_ev && is_event_blacklisted(ev))
+			return -EINVAL;
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	/*
+	 * PMU config registers have fields that are
+	 * reserved and some specific values for bit fields are reserved.
+	 * For ex., MMCRA[61:62] is Random Sampling Mode (SM)
+	 * and value of 0b11 to this field is reserved.
+	 * Check for invalid values in attr.config.
+	 */
+	if (ppmu->check_attr_config &&
+	    ppmu->check_attr_config(event))
+		return -EINVAL;
+
+	event->hw.config_base = ev;
+	event->hw.idx = 0;
+
+	/*
+	 * If we are not running on a hypervisor, force the
+	 * exclude_hv bit to 0 so that we don't care what
+	 * the user set it to.
+	 */
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		event->attr.exclude_hv = 0;
+
+	/*
+	 * If this is a per-task event, then we can use
+	 * PM_RUN_* events interchangeably with their non RUN_*
+	 * equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
+	 * XXX we should check if the task is an idle task.
+	 */
+	flags = 0;
+	if (event->attach_state & PERF_ATTACH_TASK)
+		flags |= PPMU_ONLY_COUNT_RUN;
+
+	/*
+	 * If this machine has limited events, check whether this
+	 * event_id could go on a limited event.
+	 */
+	if (ppmu->flags & PPMU_LIMITED_PMC5_6) {
+		if (can_go_on_limited_pmc(event, ev, flags)) {
+			flags |= PPMU_LIMITED_PMC_OK;
+		} else if (ppmu->limited_pmc_event(ev)) {
+			/*
+			 * The requested event_id is on a limited PMC,
+			 * but we can't use a limited PMC; see if any
+			 * alternative goes on a normal PMC.
+			 */
+			ev = normal_pmc_alternative(ev, flags);
+			if (!ev)
+				return -EINVAL;
+		}
+	}
+
+	/* Extra checks for EBB */
+	err = ebb_event_check(event);
+	if (err)
+		return err;
+
+	/*
+	 * If this is in a group, check if it can go on with all the
+	 * other hardware events in the group.  We assume the event
+	 * hasn't been linked into its leader's sibling list at this point.
+	 */
+	n = 0;
+	if (event->group_leader != event) {
+		n = collect_events(event->group_leader, ppmu->n_counter - 1,
+				   ctrs, events, cflags);
+		if (n < 0)
+			return -EINVAL;
+	}
+	events[n] = ev;
+	ctrs[n] = event;
+	cflags[n] = flags;
+	if (check_excludes(ctrs, cflags, n, 1))
+		return -EINVAL;
+
+	local_irq_save(irq_flags);
+	cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+	err = power_check_constraints(cpuhw, events, cflags, n + 1, ctrs);
+
+	if (has_branch_stack(event)) {
+		u64 bhrb_filter = -1;
+
+		/*
+		 * Currently no PMU supports having multiple branch filters
+		 * at the same time. Branch filters are set via MMCRA IFM[32:33]
+		 * bits for Power8 and above. Return EOPNOTSUPP when multiple
+		 * branch filters are requested in the event attr.
+		 *
+		 * When opening event via perf_event_open(), branch_sample_type
+		 * gets adjusted in perf_copy_attr(). Kernel will automatically
+		 * adjust the branch_sample_type based on the event modifier
+		 * settings to include PERF_SAMPLE_BRANCH_PLM_ALL. Hence drop
+		 * the check for PERF_SAMPLE_BRANCH_PLM_ALL.
+		 */
+		if (hweight64(event->attr.branch_sample_type & ~PERF_SAMPLE_BRANCH_PLM_ALL) > 1) {
+			local_irq_restore(irq_flags);
+			return -EOPNOTSUPP;
+		}
+
+		if (ppmu->bhrb_filter_map)
+			bhrb_filter = ppmu->bhrb_filter_map(
+					event->attr.branch_sample_type);
+
+		if (bhrb_filter == -1) {
+			local_irq_restore(irq_flags);
+			return -EOPNOTSUPP;
+		}
+		cpuhw->bhrb_filter = bhrb_filter;
+	}
+
+	local_irq_restore(irq_flags);
+	if (err)
+		return -EINVAL;
+
+	event->hw.config = events[n];
+	event->hw.event_base = cflags[n];
+	event->hw.last_period = event->hw.sample_period;
+	local64_set(&event->hw.period_left, event->hw.last_period);
+
+	/*
+	 * For EBB events we just context switch the PMC value, we don't do any
+	 * of the sample_period logic. We use hw.prev_count for this.
+	 */
+	if (is_ebb_event(event))
+		local64_set(&event->hw.prev_count, 0);
+
+	/*
+	 * See if we need to reserve the PMU.
+	 * If no events are currently in use, then we have to take a
+	 * mutex to ensure that we don't race with another task doing
+	 * reserve_pmc_hardware or release_pmc_hardware.
+	 */
+	err = 0;
+	if (!atomic_inc_not_zero(&num_events)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&num_events) == 0 &&
+		    reserve_pmc_hardware(perf_event_interrupt))
+			err = -EBUSY;
+		else
+			atomic_inc(&num_events);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+	event->destroy = hw_perf_event_destroy;
+
+	return err;
+}
+
+static int power_pmu_event_idx(struct perf_event *event)
+{
+	return event->hw.idx;
+}
+
+ssize_t power_events_sysfs_show(struct device *dev,
+				struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+
+	return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+static struct pmu power_pmu = {
+	.pmu_enable	= power_pmu_enable,
+	.pmu_disable	= power_pmu_disable,
+	.event_init	= power_pmu_event_init,
+	.add		= power_pmu_add,
+	.del		= power_pmu_del,
+	.start		= power_pmu_start,
+	.stop		= power_pmu_stop,
+	.read		= power_pmu_read,
+	.start_txn	= power_pmu_start_txn,
+	.cancel_txn	= power_pmu_cancel_txn,
+	.commit_txn	= power_pmu_commit_txn,
+	.event_idx	= power_pmu_event_idx,
+	.sched_task	= power_pmu_sched_task,
+};
+
+#define PERF_SAMPLE_ADDR_TYPE  (PERF_SAMPLE_ADDR |		\
+				PERF_SAMPLE_PHYS_ADDR |		\
+				PERF_SAMPLE_DATA_PAGE_SIZE)
+/*
+ * A counter has overflowed; update its count and record
+ * things if requested.  Note that interrupts are hard-disabled
+ * here so there is no possibility of being interrupted.
+ */
+static void record_and_restart(struct perf_event *event, unsigned long val,
+			       struct pt_regs *regs)
+{
+	u64 period = event->hw.sample_period;
+	s64 prev, delta, left;
+	int record = 0;
+
+	if (event->hw.state & PERF_HES_STOPPED) {
+		write_pmc(event->hw.idx, 0);
+		return;
+	}
+
+	/* we don't have to worry about interrupts here */
+	prev = local64_read(&event->hw.prev_count);
+	delta = check_and_compute_delta(prev, val);
+	local64_add(delta, &event->count);
+
+	/*
+	 * See if the total period for this event has expired,
+	 * and update for the next period.
+	 */
+	val = 0;
+	left = local64_read(&event->hw.period_left) - delta;
+	if (delta == 0)
+		left++;
+	if (period) {
+		if (left <= 0) {
+			left += period;
+			if (left <= 0)
+				left = period;
+
+			/*
+			 * If address is not requested in the sample via
+			 * PERF_SAMPLE_IP, just record that sample irrespective
+			 * of SIAR valid check.
+			 */
+			if (event->attr.sample_type & PERF_SAMPLE_IP)
+				record = siar_valid(regs);
+			else
+				record = 1;
+
+			event->hw.last_period = event->hw.sample_period;
+		}
+		if (left < 0x80000000LL)
+			val = 0x80000000LL - left;
+	}
+
+	write_pmc(event->hw.idx, val);
+	local64_set(&event->hw.prev_count, val);
+	local64_set(&event->hw.period_left, left);
+	perf_event_update_userpage(event);
+
+	/*
+	 * Due to hardware limitation, sometimes SIAR could sample a kernel
+	 * address even when freeze on supervisor state (kernel) is set in
+	 * MMCR2. Check attr.exclude_kernel and address to drop the sample in
+	 * these cases.
+	 */
+	if (event->attr.exclude_kernel &&
+	    (event->attr.sample_type & PERF_SAMPLE_IP) &&
+	    is_kernel_addr(mfspr(SPRN_SIAR)))
+		record = 0;
+
+	/*
+	 * Finally record data if requested.
+	 */
+	if (record) {
+		struct perf_sample_data data;
+
+		perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
+
+		if (event->attr.sample_type & PERF_SAMPLE_ADDR_TYPE)
+			perf_get_data_addr(event, regs, &data.addr);
+
+		if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
+			struct cpu_hw_events *cpuhw;
+			cpuhw = this_cpu_ptr(&cpu_hw_events);
+			power_pmu_bhrb_read(event, cpuhw);
+			perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack);
+		}
+
+		if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
+						ppmu->get_mem_data_src) {
+			ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs);
+			data.sample_flags |= PERF_SAMPLE_DATA_SRC;
+		}
+
+		if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
+						ppmu->get_mem_weight) {
+			ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
+			data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+		}
+		if (perf_event_overflow(event, &data, regs))
+			power_pmu_stop(event, 0);
+	} else if (period) {
+		/* Account for interrupt in case of invalid SIAR */
+		if (perf_event_account_interrupt(event))
+			power_pmu_stop(event, 0);
+	}
+}
+
+/*
+ * Called from generic code to get the misc flags (i.e. processor mode)
+ * for an event_id.
+ */
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	u32 flags = perf_get_misc_flags(regs);
+
+	if (flags)
+		return flags;
+	return user_mode(regs) ? PERF_RECORD_MISC_USER :
+		PERF_RECORD_MISC_KERNEL;
+}
+
+/*
+ * Called from generic code to get the instruction pointer
+ * for an event_id.
+ */
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	unsigned long siar = mfspr(SPRN_SIAR);
+
+	if (regs_use_siar(regs) && siar_valid(regs) && siar)
+		return siar + perf_ip_adjust(regs);
+	else
+		return regs->nip;
+}
+
+static bool pmc_overflow_power7(unsigned long val)
+{
+	/*
+	 * Events on POWER7 can roll back if a speculative event doesn't
+	 * eventually complete. Unfortunately in some rare cases they will
+	 * raise a performance monitor exception. We need to catch this to
+	 * ensure we reset the PMC. In all cases the PMC will be 256 or less
+	 * cycles from overflow.
+	 *
+	 * We only do this if the first pass fails to find any overflowing
+	 * PMCs because a user might set a period of less than 256 and we
+	 * don't want to mistakenly reset them.
+	 */
+	if ((0x80000000 - val) <= 256)
+		return true;
+
+	return false;
+}
+
+static bool pmc_overflow(unsigned long val)
+{
+	if ((int)val < 0)
+		return true;
+
+	return false;
+}
+
+/*
+ * Performance monitor interrupt stuff
+ */
+static void __perf_event_interrupt(struct pt_regs *regs)
+{
+	int i, j;
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+	struct perf_event *event;
+	int found, active;
+
+	if (cpuhw->n_limited)
+		freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
+					mfspr(SPRN_PMC6));
+
+	perf_read_regs(regs);
+
+	/* Read all the PMCs since we'll need them a bunch of times */
+	for (i = 0; i < ppmu->n_counter; ++i)
+		cpuhw->pmcs[i] = read_pmc(i + 1);
+
+	/* Try to find what caused the IRQ */
+	found = 0;
+	for (i = 0; i < ppmu->n_counter; ++i) {
+		if (!pmc_overflow(cpuhw->pmcs[i]))
+			continue;
+		if (is_limited_pmc(i + 1))
+			continue; /* these won't generate IRQs */
+		/*
+		 * We've found one that's overflowed.  For active
+		 * counters we need to log this.  For inactive
+		 * counters, we need to reset it anyway
+		 */
+		found = 1;
+		active = 0;
+		for (j = 0; j < cpuhw->n_events; ++j) {
+			event = cpuhw->event[j];
+			if (event->hw.idx == (i + 1)) {
+				active = 1;
+				record_and_restart(event, cpuhw->pmcs[i], regs);
+				break;
+			}
+		}
+
+		/*
+		 * Clear PACA_IRQ_PMI in case it was set by
+		 * set_pmi_irq_pending() when PMU was enabled
+		 * after accounting for interrupts.
+		 */
+		clear_pmi_irq_pending();
+
+		if (!active)
+			/* reset non active counters that have overflowed */
+			write_pmc(i + 1, 0);
+	}
+	if (!found && pvr_version_is(PVR_POWER7)) {
+		/* check active counters for special buggy p7 overflow */
+		for (i = 0; i < cpuhw->n_events; ++i) {
+			event = cpuhw->event[i];
+			if (!event->hw.idx || is_limited_pmc(event->hw.idx))
+				continue;
+			if (pmc_overflow_power7(cpuhw->pmcs[event->hw.idx - 1])) {
+				/* event has overflowed in a buggy way*/
+				found = 1;
+				record_and_restart(event,
+						   cpuhw->pmcs[event->hw.idx - 1],
+						   regs);
+			}
+		}
+	}
+
+	/*
+	 * During system wide profiling or while specific CPU is monitored for an
+	 * event, some corner cases could cause PMC to overflow in idle path. This
+	 * will trigger a PMI after waking up from idle. Since counter values are _not_
+	 * saved/restored in idle path, can lead to below "Can't find PMC" message.
+	 */
+	if (unlikely(!found) && !arch_irq_disabled_regs(regs))
+		printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n");
+
+	/*
+	 * Reset MMCR0 to its normal value.  This will set PMXE and
+	 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
+	 * and thus allow interrupts to occur again.
+	 * XXX might want to use MSR.PM to keep the events frozen until
+	 * we get back out of this interrupt.
+	 */
+	write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0);
+
+	/* Clear the cpuhw->pmcs */
+	memset(&cpuhw->pmcs, 0, sizeof(cpuhw->pmcs));
+
+}
+
+static void perf_event_interrupt(struct pt_regs *regs)
+{
+	u64 start_clock = sched_clock();
+
+	__perf_event_interrupt(regs);
+	perf_sample_event_took(sched_clock() - start_clock);
+}
+
+static int power_pmu_prepare_cpu(unsigned int cpu)
+{
+	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
+
+	if (ppmu) {
+		memset(cpuhw, 0, sizeof(*cpuhw));
+		cpuhw->mmcr.mmcr0 = MMCR0_FC;
+	}
+	return 0;
+}
+
+static ssize_t pmu_name_show(struct device *cdev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	if (ppmu)
+		return sysfs_emit(buf, "%s", ppmu->name);
+
+	return 0;
+}
+
+static DEVICE_ATTR_RO(pmu_name);
+
+static struct attribute *pmu_caps_attrs[] = {
+	&dev_attr_pmu_name.attr,
+	NULL
+};
+
+static const struct attribute_group pmu_caps_group = {
+	.name  = "caps",
+	.attrs = pmu_caps_attrs,
+};
+
+static const struct attribute_group *pmu_caps_groups[] = {
+	&pmu_caps_group,
+	NULL,
+};
+
+int __init register_power_pmu(struct power_pmu *pmu)
+{
+	if (ppmu)
+		return -EBUSY;		/* something's already registered */
+
+	ppmu = pmu;
+	pr_info("%s performance monitor hardware support registered\n",
+		pmu->name);
+
+	power_pmu.attr_groups = ppmu->attr_groups;
+
+	if (ppmu->flags & PPMU_ARCH_207S)
+		power_pmu.attr_update = pmu_caps_groups;
+
+	power_pmu.capabilities |= (ppmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS);
+
+#ifdef MSR_HV
+	/*
+	 * Use FCHV to ignore kernel events if MSR.HV is set.
+	 */
+	if (mfmsr() & MSR_HV)
+		freeze_events_kernel = MMCR0_FCHV;
+#endif /* CONFIG_PPC64 */
+
+	perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW);
+	cpuhp_setup_state(CPUHP_PERF_POWER, "perf/powerpc:prepare",
+			  power_pmu_prepare_cpu, NULL);
+	return 0;
+}
+
+#ifdef CONFIG_PPC64
+static bool pmu_override = false;
+static unsigned long pmu_override_val;
+static void do_pmu_override(void *data)
+{
+	ppc_set_pmu_inuse(1);
+	if (pmu_override_val)
+		mtspr(SPRN_MMCR1, pmu_override_val);
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+}
+
+static int __init init_ppc64_pmu(void)
+{
+	if (cpu_has_feature(CPU_FTR_HVMODE) && pmu_override) {
+		pr_warn("disabling perf due to pmu_override= command line option.\n");
+		on_each_cpu(do_pmu_override, NULL, 1);
+		return 0;
+	}
+
+	/* run through all the pmu drivers one at a time */
+	if (!init_power5_pmu())
+		return 0;
+	else if (!init_power5p_pmu())
+		return 0;
+	else if (!init_power6_pmu())
+		return 0;
+	else if (!init_power7_pmu())
+		return 0;
+	else if (!init_power8_pmu())
+		return 0;
+	else if (!init_power9_pmu())
+		return 0;
+	else if (!init_power10_pmu())
+		return 0;
+	else if (!init_ppc970_pmu())
+		return 0;
+	else
+		return init_generic_compat_pmu();
+}
+early_initcall(init_ppc64_pmu);
+
+static int __init pmu_setup(char *str)
+{
+	unsigned long val;
+
+	if (!early_cpu_has_feature(CPU_FTR_HVMODE))
+		return 0;
+
+	pmu_override = true;
+
+	if (kstrtoul(str, 0, &val))
+		val = 0;
+
+	pmu_override_val = val;
+
+	return 1;
+}
+__setup("pmu_override=", pmu_setup);
+
+#endif
diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
new file mode 100644
index 0000000000..1a53ab0844
--- /dev/null
+++ b/arch/powerpc/perf/core-fsl-emb.c
@@ -0,0 +1,696 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance event support - Freescale Embedded Performance Monitor
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ * Copyright 2010 Freescale Semiconductor, Inc.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <asm/reg_fsl_emb.h>
+#include <asm/pmc.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/ptrace.h>
+
+struct cpu_hw_events {
+	int n_events;
+	int disabled;
+	u8  pmcs_enabled;
+	struct perf_event *event[MAX_HWEVENTS];
+};
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+static struct fsl_emb_pmu *ppmu;
+
+/* Number of perf_events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_pmc_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+static void perf_event_interrupt(struct pt_regs *regs);
+
+/*
+ * Read one performance monitor counter (PMC).
+ */
+static unsigned long read_pmc(int idx)
+{
+	unsigned long val;
+
+	switch (idx) {
+	case 0:
+		val = mfpmr(PMRN_PMC0);
+		break;
+	case 1:
+		val = mfpmr(PMRN_PMC1);
+		break;
+	case 2:
+		val = mfpmr(PMRN_PMC2);
+		break;
+	case 3:
+		val = mfpmr(PMRN_PMC3);
+		break;
+	case 4:
+		val = mfpmr(PMRN_PMC4);
+		break;
+	case 5:
+		val = mfpmr(PMRN_PMC5);
+		break;
+	default:
+		printk(KERN_ERR "oops trying to read PMC%d\n", idx);
+		val = 0;
+	}
+	return val;
+}
+
+/*
+ * Write one PMC.
+ */
+static void write_pmc(int idx, unsigned long val)
+{
+	switch (idx) {
+	case 0:
+		mtpmr(PMRN_PMC0, val);
+		break;
+	case 1:
+		mtpmr(PMRN_PMC1, val);
+		break;
+	case 2:
+		mtpmr(PMRN_PMC2, val);
+		break;
+	case 3:
+		mtpmr(PMRN_PMC3, val);
+		break;
+	case 4:
+		mtpmr(PMRN_PMC4, val);
+		break;
+	case 5:
+		mtpmr(PMRN_PMC5, val);
+		break;
+	default:
+		printk(KERN_ERR "oops trying to write PMC%d\n", idx);
+	}
+
+	isync();
+}
+
+/*
+ * Write one local control A register
+ */
+static void write_pmlca(int idx, unsigned long val)
+{
+	switch (idx) {
+	case 0:
+		mtpmr(PMRN_PMLCA0, val);
+		break;
+	case 1:
+		mtpmr(PMRN_PMLCA1, val);
+		break;
+	case 2:
+		mtpmr(PMRN_PMLCA2, val);
+		break;
+	case 3:
+		mtpmr(PMRN_PMLCA3, val);
+		break;
+	case 4:
+		mtpmr(PMRN_PMLCA4, val);
+		break;
+	case 5:
+		mtpmr(PMRN_PMLCA5, val);
+		break;
+	default:
+		printk(KERN_ERR "oops trying to write PMLCA%d\n", idx);
+	}
+
+	isync();
+}
+
+/*
+ * Write one local control B register
+ */
+static void write_pmlcb(int idx, unsigned long val)
+{
+	switch (idx) {
+	case 0:
+		mtpmr(PMRN_PMLCB0, val);
+		break;
+	case 1:
+		mtpmr(PMRN_PMLCB1, val);
+		break;
+	case 2:
+		mtpmr(PMRN_PMLCB2, val);
+		break;
+	case 3:
+		mtpmr(PMRN_PMLCB3, val);
+		break;
+	case 4:
+		mtpmr(PMRN_PMLCB4, val);
+		break;
+	case 5:
+		mtpmr(PMRN_PMLCB5, val);
+		break;
+	default:
+		printk(KERN_ERR "oops trying to write PMLCB%d\n", idx);
+	}
+
+	isync();
+}
+
+static void fsl_emb_pmu_read(struct perf_event *event)
+{
+	s64 val, delta, prev;
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	/*
+	 * Performance monitor interrupts come even when interrupts
+	 * are soft-disabled, as long as interrupts are hard-enabled.
+	 * Therefore we treat them like NMIs.
+	 */
+	do {
+		prev = local64_read(&event->hw.prev_count);
+		barrier();
+		val = read_pmc(event->hw.idx);
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
+
+	/* The counters are only 32 bits wide */
+	delta = (val - prev) & 0xfffffffful;
+	local64_add(delta, &event->count);
+	local64_sub(delta, &event->hw.period_left);
+}
+
+/*
+ * Disable all events to prevent PMU interrupts and to allow
+ * events to be added or removed.
+ */
+static void fsl_emb_pmu_disable(struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuhw;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+	if (!cpuhw->disabled) {
+		cpuhw->disabled = 1;
+
+		/*
+		 * Check if we ever enabled the PMU on this cpu.
+		 */
+		if (!cpuhw->pmcs_enabled) {
+			ppc_enable_pmcs();
+			cpuhw->pmcs_enabled = 1;
+		}
+
+		if (atomic_read(&num_events)) {
+			/*
+			 * Set the 'freeze all counters' bit, and disable
+			 * interrupts.  The barrier is to make sure the
+			 * mtpmr has been executed and the PMU has frozen
+			 * the events before we return.
+			 */
+
+			mtpmr(PMRN_PMGC0, PMGC0_FAC);
+			isync();
+		}
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * Re-enable all events if disable == 0.
+ * If we were previously disabled and events were added, then
+ * put the new config on the PMU.
+ */
+static void fsl_emb_pmu_enable(struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuhw;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	cpuhw = this_cpu_ptr(&cpu_hw_events);
+	if (!cpuhw->disabled)
+		goto out;
+
+	cpuhw->disabled = 0;
+	ppc_set_pmu_inuse(cpuhw->n_events != 0);
+
+	if (cpuhw->n_events > 0) {
+		mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE);
+		isync();
+	}
+
+ out:
+	local_irq_restore(flags);
+}
+
+static int collect_events(struct perf_event *group, int max_count,
+			  struct perf_event *ctrs[])
+{
+	int n = 0;
+	struct perf_event *event;
+
+	if (!is_software_event(group)) {
+		if (n >= max_count)
+			return -1;
+		ctrs[n] = group;
+		n++;
+	}
+	for_each_sibling_event(event, group) {
+		if (!is_software_event(event) &&
+		    event->state != PERF_EVENT_STATE_OFF) {
+			if (n >= max_count)
+				return -1;
+			ctrs[n] = event;
+			n++;
+		}
+	}
+	return n;
+}
+
+/* context locked on entry */
+static int fsl_emb_pmu_add(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuhw;
+	int ret = -EAGAIN;
+	int num_counters = ppmu->n_counter;
+	u64 val;
+	int i;
+
+	perf_pmu_disable(event->pmu);
+	cpuhw = &get_cpu_var(cpu_hw_events);
+
+	if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
+		num_counters = ppmu->n_restricted;
+
+	/*
+	 * Allocate counters from top-down, so that restricted-capable
+	 * counters are kept free as long as possible.
+	 */
+	for (i = num_counters - 1; i >= 0; i--) {
+		if (cpuhw->event[i])
+			continue;
+
+		break;
+	}
+
+	if (i < 0)
+		goto out;
+
+	event->hw.idx = i;
+	cpuhw->event[i] = event;
+	++cpuhw->n_events;
+
+	val = 0;
+	if (event->hw.sample_period) {
+		s64 left = local64_read(&event->hw.period_left);
+		if (left < 0x80000000L)
+			val = 0x80000000L - left;
+	}
+	local64_set(&event->hw.prev_count, val);
+
+	if (unlikely(!(flags & PERF_EF_START))) {
+		event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+		val = 0;
+	} else {
+		event->hw.state &= ~(PERF_HES_STOPPED | PERF_HES_UPTODATE);
+	}
+
+	write_pmc(i, val);
+	perf_event_update_userpage(event);
+
+	write_pmlcb(i, event->hw.config >> 32);
+	write_pmlca(i, event->hw.config_base);
+
+	ret = 0;
+ out:
+	put_cpu_var(cpu_hw_events);
+	perf_pmu_enable(event->pmu);
+	return ret;
+}
+
+/* context locked on entry */
+static void fsl_emb_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuhw;
+	int i = event->hw.idx;
+
+	perf_pmu_disable(event->pmu);
+	if (i < 0)
+		goto out;
+
+	fsl_emb_pmu_read(event);
+
+	cpuhw = &get_cpu_var(cpu_hw_events);
+
+	WARN_ON(event != cpuhw->event[event->hw.idx]);
+
+	write_pmlca(i, 0);
+	write_pmlcb(i, 0);
+	write_pmc(i, 0);
+
+	cpuhw->event[i] = NULL;
+	event->hw.idx = -1;
+
+	/*
+	 * TODO: if at least one restricted event exists, and we
+	 * just freed up a non-restricted-capable counter, and
+	 * there is a restricted-capable counter occupied by
+	 * a non-restricted event, migrate that event to the
+	 * vacated counter.
+	 */
+
+	cpuhw->n_events--;
+
+ out:
+	perf_pmu_enable(event->pmu);
+	put_cpu_var(cpu_hw_events);
+}
+
+static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
+{
+	unsigned long flags;
+	unsigned long val;
+	s64 left;
+
+	if (event->hw.idx < 0 || !event->hw.sample_period)
+		return;
+
+	if (!(event->hw.state & PERF_HES_STOPPED))
+		return;
+
+	if (ef_flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+	local_irq_save(flags);
+	perf_pmu_disable(event->pmu);
+
+	event->hw.state = 0;
+	left = local64_read(&event->hw.period_left);
+	val = 0;
+	if (left < 0x80000000L)
+		val = 0x80000000L - left;
+	write_pmc(event->hw.idx, val);
+
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+	local_irq_restore(flags);
+}
+
+static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags)
+{
+	unsigned long flags;
+
+	if (event->hw.idx < 0 || !event->hw.sample_period)
+		return;
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	local_irq_save(flags);
+	perf_pmu_disable(event->pmu);
+
+	fsl_emb_pmu_read(event);
+	event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	write_pmc(event->hw.idx, 0);
+
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+	local_irq_restore(flags);
+}
+
+/*
+ * Release the PMU if this is the last perf_event.
+ */
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	if (!atomic_add_unless(&num_events, -1, 1)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_dec_return(&num_events) == 0)
+			release_pmc_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
+/*
+ * Translate a generic cache event_id config to a raw event_id code.
+ */
+static int hw_perf_cache_event(u64 config, u64 *eventp)
+{
+	unsigned long type, op, result;
+	int ev;
+
+	if (!ppmu->cache_events)
+		return -EINVAL;
+
+	/* unpack config */
+	type = config & 0xff;
+	op = (config >> 8) & 0xff;
+	result = (config >> 16) & 0xff;
+
+	if (type >= PERF_COUNT_HW_CACHE_MAX ||
+	    op >= PERF_COUNT_HW_CACHE_OP_MAX ||
+	    result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	ev = (*ppmu->cache_events)[type][op][result];
+	if (ev == 0)
+		return -EOPNOTSUPP;
+	if (ev == -1)
+		return -EINVAL;
+	*eventp = ev;
+	return 0;
+}
+
+static int fsl_emb_pmu_event_init(struct perf_event *event)
+{
+	u64 ev;
+	struct perf_event *events[MAX_HWEVENTS];
+	int n;
+	int err;
+	int num_restricted;
+	int i;
+
+	if (ppmu->n_counter > MAX_HWEVENTS) {
+		WARN(1, "No. of perf counters (%d) is higher than max array size(%d)\n",
+			ppmu->n_counter, MAX_HWEVENTS);
+		ppmu->n_counter = MAX_HWEVENTS;
+	}
+
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		ev = event->attr.config;
+		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
+			return -EOPNOTSUPP;
+		ev = ppmu->generic_events[ev];
+		break;
+
+	case PERF_TYPE_HW_CACHE:
+		err = hw_perf_cache_event(event->attr.config, &ev);
+		if (err)
+			return err;
+		break;
+
+	case PERF_TYPE_RAW:
+		ev = event->attr.config;
+		break;
+
+	default:
+		return -ENOENT;
+	}
+
+	event->hw.config = ppmu->xlate_event(ev);
+	if (!(event->hw.config & FSL_EMB_EVENT_VALID))
+		return -EINVAL;
+
+	/*
+	 * If this is in a group, check if it can go on with all the
+	 * other hardware events in the group.  We assume the event
+	 * hasn't been linked into its leader's sibling list at this point.
+	 */
+	n = 0;
+	if (event->group_leader != event) {
+		n = collect_events(event->group_leader,
+		                   ppmu->n_counter - 1, events);
+		if (n < 0)
+			return -EINVAL;
+	}
+
+	if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
+		num_restricted = 0;
+		for (i = 0; i < n; i++) {
+			if (events[i]->hw.config & FSL_EMB_EVENT_RESTRICTED)
+				num_restricted++;
+		}
+
+		if (num_restricted >= ppmu->n_restricted)
+			return -EINVAL;
+	}
+
+	event->hw.idx = -1;
+
+	event->hw.config_base = PMLCA_CE | PMLCA_FCM1 |
+	                        (u32)((ev << 16) & PMLCA_EVENT_MASK);
+
+	if (event->attr.exclude_user)
+		event->hw.config_base |= PMLCA_FCU;
+	if (event->attr.exclude_kernel)
+		event->hw.config_base |= PMLCA_FCS;
+	if (event->attr.exclude_idle)
+		return -ENOTSUPP;
+
+	event->hw.last_period = event->hw.sample_period;
+	local64_set(&event->hw.period_left, event->hw.last_period);
+
+	/*
+	 * See if we need to reserve the PMU.
+	 * If no events are currently in use, then we have to take a
+	 * mutex to ensure that we don't race with another task doing
+	 * reserve_pmc_hardware or release_pmc_hardware.
+	 */
+	err = 0;
+	if (!atomic_inc_not_zero(&num_events)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&num_events) == 0 &&
+		    reserve_pmc_hardware(perf_event_interrupt))
+			err = -EBUSY;
+		else
+			atomic_inc(&num_events);
+		mutex_unlock(&pmc_reserve_mutex);
+
+		mtpmr(PMRN_PMGC0, PMGC0_FAC);
+		isync();
+	}
+	event->destroy = hw_perf_event_destroy;
+
+	return err;
+}
+
+static struct pmu fsl_emb_pmu = {
+	.pmu_enable	= fsl_emb_pmu_enable,
+	.pmu_disable	= fsl_emb_pmu_disable,
+	.event_init	= fsl_emb_pmu_event_init,
+	.add		= fsl_emb_pmu_add,
+	.del		= fsl_emb_pmu_del,
+	.start		= fsl_emb_pmu_start,
+	.stop		= fsl_emb_pmu_stop,
+	.read		= fsl_emb_pmu_read,
+};
+
+/*
+ * A counter has overflowed; update its count and record
+ * things if requested.  Note that interrupts are hard-disabled
+ * here so there is no possibility of being interrupted.
+ */
+static void record_and_restart(struct perf_event *event, unsigned long val,
+			       struct pt_regs *regs)
+{
+	u64 period = event->hw.sample_period;
+	s64 prev, delta, left;
+	int record = 0;
+
+	if (event->hw.state & PERF_HES_STOPPED) {
+		write_pmc(event->hw.idx, 0);
+		return;
+	}
+
+	/* we don't have to worry about interrupts here */
+	prev = local64_read(&event->hw.prev_count);
+	delta = (val - prev) & 0xfffffffful;
+	local64_add(delta, &event->count);
+
+	/*
+	 * See if the total period for this event has expired,
+	 * and update for the next period.
+	 */
+	val = 0;
+	left = local64_read(&event->hw.period_left) - delta;
+	if (period) {
+		if (left <= 0) {
+			left += period;
+			if (left <= 0)
+				left = period;
+			record = 1;
+			event->hw.last_period = event->hw.sample_period;
+		}
+		if (left < 0x80000000LL)
+			val = 0x80000000LL - left;
+	}
+
+	write_pmc(event->hw.idx, val);
+	local64_set(&event->hw.prev_count, val);
+	local64_set(&event->hw.period_left, left);
+	perf_event_update_userpage(event);
+
+	/*
+	 * Finally record data if requested.
+	 */
+	if (record) {
+		struct perf_sample_data data;
+
+		perf_sample_data_init(&data, 0, event->hw.last_period);
+
+		if (perf_event_overflow(event, &data, regs))
+			fsl_emb_pmu_stop(event, 0);
+	}
+}
+
+static void perf_event_interrupt(struct pt_regs *regs)
+{
+	int i;
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+	struct perf_event *event;
+	unsigned long val;
+
+	for (i = 0; i < ppmu->n_counter; ++i) {
+		event = cpuhw->event[i];
+
+		val = read_pmc(i);
+		if ((int)val < 0) {
+			if (event) {
+				/* event has overflowed */
+				record_and_restart(event, val, regs);
+			} else {
+				/*
+				 * Disabled counter is negative,
+				 * reset it just in case.
+				 */
+				write_pmc(i, 0);
+			}
+		}
+	}
+
+	/* PMM will keep counters frozen until we return from the interrupt. */
+	mtmsr(mfmsr() | MSR_PMM);
+	mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE);
+	isync();
+}
+
+static int fsl_emb_pmu_prepare_cpu(unsigned int cpu)
+{
+	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
+
+	memset(cpuhw, 0, sizeof(*cpuhw));
+
+	return 0;
+}
+
+int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
+{
+	if (ppmu)
+		return -EBUSY;		/* something's already registered */
+
+	ppmu = pmu;
+	pr_info("%s performance monitor hardware support registered\n",
+		pmu->name);
+
+	perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW);
+	cpuhp_setup_state(CPUHP_PERF_POWER, "perf/powerpc:prepare",
+			  fsl_emb_pmu_prepare_cpu, NULL);
+
+	return 0;
+}
diff --git a/arch/powerpc/perf/e500-pmu.c b/arch/powerpc/perf/e500-pmu.c
new file mode 100644
index 0000000000..e3e1a68eb1
--- /dev/null
+++ b/arch/powerpc/perf/e500-pmu.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter support for e500 family processors.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ * Copyright 2010 Freescale Semiconductor, Inc.
+ */
+#include <linux/string.h>
+#include <linux/perf_event.h>
+#include <asm/reg.h>
+#include <asm/cputable.h>
+
+/*
+ * Map of generic hardware event types to hardware events
+ * Zero if unsupported
+ */
+static int e500_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = 1,
+	[PERF_COUNT_HW_INSTRUCTIONS] = 2,
+	[PERF_COUNT_HW_CACHE_MISSES] = 41, /* Data L1 cache reloads */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 12,
+	[PERF_COUNT_HW_BRANCH_MISSES] = 15,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 18,
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 19,
+};
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	/*
+	 * D-cache misses are not split into read/write/prefetch;
+	 * use raw event 41.
+	 */
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	27,		0	},
+		[C(OP_WRITE)] = {	28,		0	},
+		[C(OP_PREFETCH)] = {	29,		0	},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	2,		60	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	0,		0	},
+	},
+	/*
+	 * Assuming LL means L2, it's not a good match for this model.
+	 * It allocates only on L1 castout or explicit prefetch, and
+	 * does not have separate read/write events (but it does have
+	 * separate instruction/data events).
+	 */
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0	},
+		[C(OP_WRITE)] = {	0,		0	},
+		[C(OP_PREFETCH)] = {	0,		0	},
+	},
+	/*
+	 * There are data/instruction MMU misses, but that's a miss on
+	 * the chip's internal level-one TLB which is probably not
+	 * what the user wants.  Instead, unified level-two TLB misses
+	 * are reported here.
+	 */
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	26,		66	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	12,		15 	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	-1,		-1 	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+};
+
+static int num_events = 128;
+
+/* Upper half of event id is PMLCb, for threshold events */
+static u64 e500_xlate_event(u64 event_id)
+{
+	u32 event_low = (u32)event_id;
+	u64 ret;
+
+	if (event_low >= num_events)
+		return 0;
+
+	ret = FSL_EMB_EVENT_VALID;
+
+	if (event_low >= 76 && event_low <= 81) {
+		ret |= FSL_EMB_EVENT_RESTRICTED;
+		ret |= event_id &
+		       (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH);
+	} else if (event_id &
+	           (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH)) {
+		/* Threshold requested on non-threshold event */
+		return 0;
+	}
+
+	return ret;
+}
+
+static struct fsl_emb_pmu e500_pmu = {
+	.name			= "e500 family",
+	.n_counter		= 4,
+	.n_restricted		= 2,
+	.xlate_event		= e500_xlate_event,
+	.n_generic		= ARRAY_SIZE(e500_generic_events),
+	.generic_events		= e500_generic_events,
+	.cache_events		= &e500_cache_events,
+};
+
+static int init_e500_pmu(void)
+{
+	unsigned int pvr = mfspr(SPRN_PVR);
+
+	/* ec500mc */
+	if (PVR_VER(pvr) == PVR_VER_E500MC || PVR_VER(pvr) == PVR_VER_E5500)
+		num_events = 256;
+	/* e500 */
+	else if (PVR_VER(pvr) != PVR_VER_E500V1 && PVR_VER(pvr) != PVR_VER_E500V2)
+		return -ENODEV;
+
+	return register_fsl_emb_pmu(&e500_pmu);
+}
+
+early_initcall(init_e500_pmu);
diff --git a/arch/powerpc/perf/e6500-pmu.c b/arch/powerpc/perf/e6500-pmu.c
new file mode 100644
index 0000000000..bd779a2338
--- /dev/null
+++ b/arch/powerpc/perf/e6500-pmu.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter support for e6500 family processors.
+ *
+ * Author: Priyanka Jain, Priyanka.Jain@freescale.com
+ * Based on e500-pmu.c
+ * Copyright 2013 Freescale Semiconductor, Inc.
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ */
+
+#include <linux/string.h>
+#include <linux/perf_event.h>
+#include <asm/reg.h>
+#include <asm/cputable.h>
+
+/*
+ * Map of generic hardware event types to hardware events
+ * Zero if unsupported
+ */
+static int e6500_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = 1,
+	[PERF_COUNT_HW_INSTRUCTIONS] = 2,
+	[PERF_COUNT_HW_CACHE_MISSES] = 221,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 12,
+	[PERF_COUNT_HW_BRANCH_MISSES] = 15,
+};
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int e6500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {
+				/*RESULT_ACCESS		RESULT_MISS */
+		[C(OP_READ)] = {	27,		222	},
+		[C(OP_WRITE)] = {	28,		223	},
+		[C(OP_PREFETCH)] = {	29,		0	},
+	},
+	[C(L1I)] = {
+				/*RESULT_ACCESS		RESULT_MISS */
+		[C(OP_READ)] = {	2,		254	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	37,		0	},
+	},
+	/*
+	 * Assuming LL means L2, it's not a good match for this model.
+	 * It does not have separate read/write events (but it does have
+	 * separate instruction/data events).
+	 */
+	[C(LL)] = {
+				/*RESULT_ACCESS		RESULT_MISS */
+		[C(OP_READ)] = {	0,		0	},
+		[C(OP_WRITE)] = {	0,		0	},
+		[C(OP_PREFETCH)] = {	0,		0	},
+	},
+	/*
+	 * There are data/instruction MMU misses, but that's a miss on
+	 * the chip's internal level-one TLB which is probably not
+	 * what the user wants.  Instead, unified level-two TLB misses
+	 * are reported here.
+	 */
+	[C(DTLB)] = {
+				/*RESULT_ACCESS		RESULT_MISS */
+		[C(OP_READ)] = {	26,		66	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(BPU)] = {
+				/*RESULT_ACCESS		RESULT_MISS */
+		[C(OP_READ)] = {	12,		15	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(NODE)] = {
+				/* RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	-1,		-1	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+};
+
+static int num_events = 512;
+
+/* Upper half of event id is PMLCb, for threshold events */
+static u64 e6500_xlate_event(u64 event_id)
+{
+	u32 event_low = (u32)event_id;
+	if (event_low >= num_events ||
+		(event_id & (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH)))
+		return 0;
+
+	return FSL_EMB_EVENT_VALID;
+}
+
+static struct fsl_emb_pmu e6500_pmu = {
+	.name			= "e6500 family",
+	.n_counter		= 6,
+	.n_restricted		= 0,
+	.xlate_event		= e6500_xlate_event,
+	.n_generic		= ARRAY_SIZE(e6500_generic_events),
+	.generic_events		= e6500_generic_events,
+	.cache_events		= &e6500_cache_events,
+};
+
+static int init_e6500_pmu(void)
+{
+	unsigned int pvr = mfspr(SPRN_PVR);
+
+	if (PVR_VER(pvr) != PVR_VER_E6500)
+		return -ENODEV;
+
+	return register_fsl_emb_pmu(&e6500_pmu);
+}
+
+early_initcall(init_e6500_pmu);
diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c
new file mode 100644
index 0000000000..b5c414876e
--- /dev/null
+++ b/arch/powerpc/perf/generic-compat-pmu.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2019 Madhavan Srinivasan, IBM Corporation.
+
+#define pr_fmt(fmt)	"generic-compat-pmu: " fmt
+
+#include "isa207-common.h"
+
+/*
+ * Raw event encoding:
+ *
+ *        60        56        52        48        44        40        36        32
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *
+ *        28        24        20        16        12         8         4         0
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *                                 [ pmc ]                       [    pmcxsel    ]
+ */
+
+/*
+ * Event codes defined in ISA v3.0B
+ */
+#define EVENT(_name, _code)	_name = _code,
+
+enum {
+	/* Cycles, alternate code */
+	EVENT(PM_CYC_ALT,			0x100f0)
+	/* One or more instructions completed in a cycle */
+	EVENT(PM_CYC_INST_CMPL,			0x100f2)
+	/* Floating-point instruction completed */
+	EVENT(PM_FLOP_CMPL,			0x100f4)
+	/* Instruction ERAT/L1-TLB miss */
+	EVENT(PM_L1_ITLB_MISS,			0x100f6)
+	/* All instructions completed and none available */
+	EVENT(PM_NO_INST_AVAIL,			0x100f8)
+	/* A load-type instruction completed (ISA v3.0+) */
+	EVENT(PM_LD_CMPL,			0x100fc)
+	/* Instruction completed, alternate code (ISA v3.0+) */
+	EVENT(PM_INST_CMPL_ALT,			0x100fe)
+	/* A store-type instruction completed */
+	EVENT(PM_ST_CMPL,			0x200f0)
+	/* Instruction Dispatched */
+	EVENT(PM_INST_DISP,			0x200f2)
+	/* Run_cycles */
+	EVENT(PM_RUN_CYC,			0x200f4)
+	/* Data ERAT/L1-TLB miss/reload */
+	EVENT(PM_L1_DTLB_RELOAD,		0x200f6)
+	/* Taken branch completed */
+	EVENT(PM_BR_TAKEN_CMPL,			0x200fa)
+	/* Demand iCache Miss */
+	EVENT(PM_L1_ICACHE_MISS,		0x200fc)
+	/* L1 Dcache reload from memory */
+	EVENT(PM_L1_RELOAD_FROM_MEM,		0x200fe)
+	/* L1 Dcache store miss */
+	EVENT(PM_ST_MISS_L1,			0x300f0)
+	/* Alternate code for PM_INST_DISP */
+	EVENT(PM_INST_DISP_ALT,			0x300f2)
+	/* Branch direction or target mispredicted */
+	EVENT(PM_BR_MISPREDICT,			0x300f6)
+	/* Data TLB miss/reload */
+	EVENT(PM_DTLB_MISS,			0x300fc)
+	/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
+	EVENT(PM_DATA_FROM_L3MISS,		0x300fe)
+	/* L1 Dcache load miss */
+	EVENT(PM_LD_MISS_L1,			0x400f0)
+	/* Cycle when instruction(s) dispatched */
+	EVENT(PM_CYC_INST_DISP,			0x400f2)
+	/* Branch or branch target mispredicted */
+	EVENT(PM_BR_MPRED_CMPL,			0x400f6)
+	/* Instructions completed with run latch set */
+	EVENT(PM_RUN_INST_CMPL,			0x400fa)
+	/* Instruction TLB miss/reload */
+	EVENT(PM_ITLB_MISS,			0x400fc)
+	/* Load data not cached */
+	EVENT(PM_LD_NOT_CACHED,			0x400fe)
+	/* Instructions */
+	EVENT(PM_INST_CMPL,			0x500fa)
+	/* Cycles */
+	EVENT(PM_CYC,				0x600f4)
+};
+
+#undef EVENT
+
+/* Table of alternatives, sorted in increasing order of column 0 */
+/* Note that in each row, column 0 must be the smallest */
+static const unsigned int generic_event_alternatives[][MAX_ALT] = {
+	{ PM_CYC_ALT,			PM_CYC },
+	{ PM_INST_CMPL_ALT,		PM_INST_CMPL },
+	{ PM_INST_DISP,			PM_INST_DISP_ALT },
+};
+
+static int generic_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	int num_alt = 0;
+
+	num_alt = isa207_get_alternatives(event, alt,
+					  ARRAY_SIZE(generic_event_alternatives), flags,
+					  generic_event_alternatives);
+
+	return num_alt;
+}
+
+GENERIC_EVENT_ATTR(cpu-cycles,			PM_CYC);
+GENERIC_EVENT_ATTR(instructions,		PM_INST_CMPL);
+GENERIC_EVENT_ATTR(stalled-cycles-frontend,	PM_NO_INST_AVAIL);
+GENERIC_EVENT_ATTR(branch-misses,		PM_BR_MPRED_CMPL);
+GENERIC_EVENT_ATTR(cache-misses,		PM_LD_MISS_L1);
+
+CACHE_EVENT_ATTR(L1-dcache-load-misses,		PM_LD_MISS_L1);
+CACHE_EVENT_ATTR(L1-dcache-store-misses,	PM_ST_MISS_L1);
+CACHE_EVENT_ATTR(L1-icache-load-misses,		PM_L1_ICACHE_MISS);
+CACHE_EVENT_ATTR(LLC-load-misses,		PM_DATA_FROM_L3MISS);
+CACHE_EVENT_ATTR(branch-load-misses,		PM_BR_MPRED_CMPL);
+CACHE_EVENT_ATTR(dTLB-load-misses,		PM_DTLB_MISS);
+CACHE_EVENT_ATTR(iTLB-load-misses,		PM_ITLB_MISS);
+
+static struct attribute *generic_compat_events_attr[] = {
+	GENERIC_EVENT_PTR(PM_CYC),
+	GENERIC_EVENT_PTR(PM_INST_CMPL),
+	GENERIC_EVENT_PTR(PM_NO_INST_AVAIL),
+	GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
+	GENERIC_EVENT_PTR(PM_LD_MISS_L1),
+	CACHE_EVENT_PTR(PM_LD_MISS_L1),
+	CACHE_EVENT_PTR(PM_ST_MISS_L1),
+	CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+	CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+	CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+	CACHE_EVENT_PTR(PM_DTLB_MISS),
+	CACHE_EVENT_PTR(PM_ITLB_MISS),
+	NULL
+};
+
+static const struct attribute_group generic_compat_pmu_events_group = {
+	.name = "events",
+	.attrs = generic_compat_events_attr,
+};
+
+PMU_FORMAT_ATTR(event,		"config:0-19");
+PMU_FORMAT_ATTR(pmcxsel,	"config:0-7");
+PMU_FORMAT_ATTR(pmc,		"config:16-19");
+
+static struct attribute *generic_compat_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_pmcxsel.attr,
+	&format_attr_pmc.attr,
+	NULL,
+};
+
+static const struct attribute_group generic_compat_pmu_format_group = {
+	.name = "format",
+	.attrs = generic_compat_pmu_format_attr,
+};
+
+static struct attribute *generic_compat_pmu_caps_attrs[] = {
+	NULL
+};
+
+static struct attribute_group generic_compat_pmu_caps_group = {
+	.name  = "caps",
+	.attrs = generic_compat_pmu_caps_attrs,
+};
+
+static const struct attribute_group *generic_compat_pmu_attr_groups[] = {
+	&generic_compat_pmu_format_group,
+	&generic_compat_pmu_events_group,
+	&generic_compat_pmu_caps_group,
+	NULL,
+};
+
+static int compat_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
+	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =	PM_NO_INST_AVAIL,
+	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_CMPL,
+	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_MISS_L1,
+};
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[ C(L1D) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = PM_LD_MISS_L1,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = PM_ST_MISS_L1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+	[ C(L1I) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = PM_L1_ICACHE_MISS,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+	[ C(LL) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = PM_DATA_FROM_L3MISS,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+	[ C(DTLB) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = PM_DTLB_MISS,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+	[ C(ITLB) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = PM_ITLB_MISS,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+	[ C(BPU) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = PM_BR_MPRED_CMPL,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+	[ C(NODE) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+};
+
+#undef C
+
+/*
+ * We set MMCR0[CC5-6RUN] so we can use counters 5 and 6 for
+ * PM_INST_CMPL and PM_CYC.
+ */
+static int generic_compute_mmcr(u64 event[], int n_ev,
+				unsigned int hwc[], struct mmcr_regs *mmcr,
+				struct perf_event *pevents[], u32 flags)
+{
+	int ret;
+
+	ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags);
+	if (!ret)
+		mmcr->mmcr0 |= MMCR0_C56RUN;
+	return ret;
+}
+
+static struct power_pmu generic_compat_pmu = {
+	.name			= "ISAv3",
+	.n_counter		= MAX_PMU_COUNTERS,
+	.add_fields		= ISA207_ADD_FIELDS,
+	.test_adder		= ISA207_TEST_ADDER,
+	.compute_mmcr		= generic_compute_mmcr,
+	.get_constraint		= isa207_get_constraint,
+	.get_alternatives	= generic_get_alternatives,
+	.disable_pmc		= isa207_disable_pmc,
+	.flags			= PPMU_HAS_SIER | PPMU_ARCH_207S,
+	.n_generic		= ARRAY_SIZE(compat_generic_events),
+	.generic_events		= compat_generic_events,
+	.cache_events		= &generic_compat_cache_events,
+	.attr_groups		= generic_compat_pmu_attr_groups,
+};
+
+int __init init_generic_compat_pmu(void)
+{
+	int rc = 0;
+
+	/*
+	 * From ISA v2.07 on, PMU features are architected;
+	 * we require >= v3.0 because (a) that has PM_LD_CMPL and
+	 * PM_INST_CMPL_ALT, which v2.07 doesn't have, and
+	 * (b) we don't expect any non-IBM Power ISA
+	 * implementations that conform to v2.07 but not v3.0.
+	 */
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		return -ENODEV;
+
+	rc = register_power_pmu(&generic_compat_pmu);
+	if (rc)
+		return rc;
+
+	/* Tell userspace that EBB is supported */
+	cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
+
+	return 0;
+}
diff --git a/arch/powerpc/perf/hv-24x7-catalog.h b/arch/powerpc/perf/hv-24x7-catalog.h
new file mode 100644
index 0000000000..5fab5a397d
--- /dev/null
+++ b/arch/powerpc/perf/hv-24x7-catalog.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_POWERPC_PERF_HV_24X7_CATALOG_H_
+#define LINUX_POWERPC_PERF_HV_24X7_CATALOG_H_
+
+#include <linux/types.h>
+
+/* From document "24x7 Event and Group Catalog Formats Proposal" v0.15 */
+
+struct hv_24x7_catalog_page_0 {
+#define HV_24X7_CATALOG_MAGIC 0x32347837 /* "24x7" in ASCII */
+	__be32 magic;
+	__be32 length; /* In 4096 byte pages */
+	__be64 version; /* XXX: arbitrary? what's the meaning/useage/purpose? */
+	__u8 build_time_stamp[16]; /* "YYYYMMDDHHMMSS\0\0" */
+	__u8 reserved2[32];
+	__be16 schema_data_offs; /* in 4096 byte pages */
+	__be16 schema_data_len;  /* in 4096 byte pages */
+	__be16 schema_entry_count;
+	__u8 reserved3[2];
+	__be16 event_data_offs;
+	__be16 event_data_len;
+	__be16 event_entry_count;
+	__u8 reserved4[2];
+	__be16 group_data_offs; /* in 4096 byte pages */
+	__be16 group_data_len;  /* in 4096 byte pages */
+	__be16 group_entry_count;
+	__u8 reserved5[2];
+	__be16 formula_data_offs; /* in 4096 byte pages */
+	__be16 formula_data_len;  /* in 4096 byte pages */
+	__be16 formula_entry_count;
+	__u8 reserved6[2];
+} __packed;
+
+struct hv_24x7_event_data {
+	__be16 length; /* in bytes, must be a multiple of 16 */
+	__u8 reserved1[2];
+	__u8 domain; /* Chip = 1, Core = 2 */
+	__u8 reserved2[1];
+	__be16 event_group_record_offs; /* in bytes, must be 8 byte aligned */
+	__be16 event_group_record_len; /* in bytes */
+
+	/* in bytes, offset from event_group_record */
+	__be16 event_counter_offs;
+
+	/* verified_state, unverified_state, caveat_state, broken_state, ... */
+	__be32 flags;
+
+	__be16 primary_group_ix;
+	__be16 group_count;
+	__be16 event_name_len;
+	__u8 remainder[];
+	/* __u8 event_name[event_name_len - 2]; */
+	/* __be16 event_description_len; */
+	/* __u8 event_desc[event_description_len - 2]; */
+	/* __be16 detailed_desc_len; */
+	/* __u8 detailed_desc[detailed_desc_len - 2]; */
+} __packed;
+
+#endif
diff --git a/arch/powerpc/perf/hv-24x7-domains.h b/arch/powerpc/perf/hv-24x7-domains.h
new file mode 100644
index 0000000000..6f91f62e0a
--- /dev/null
+++ b/arch/powerpc/perf/hv-24x7-domains.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * DOMAIN(name, num, index_kind, is_physical)
+ *
+ * @name:	An all caps token, suitable for use in generating an enum
+ *		member and appending to an event name in sysfs.
+ *
+ * @num:	The number corresponding to the domain as given in
+ *		documentation. We assume the catalog domain and the hcall
+ *		domain have the same numbering (so far they do), but this
+ *		may need to be changed in the future.
+ *
+ * @index_kind: A stringifiable token describing the meaning of the index
+ *		within the given domain. Must fit the parsing rules of the
+ *		perf sysfs api.
+ *
+ * @is_physical: True if the domain is physical, false otherwise (if virtual).
+ *
+ * Note: The terms PHYS_CHIP, PHYS_CORE, VCPU correspond to physical chip,
+ *	 physical core and virtual processor in 24x7 Counters specifications.
+ */
+
+DOMAIN(PHYS_CHIP, 0x01, chip, true)
+DOMAIN(PHYS_CORE, 0x02, core, true)
+DOMAIN(VCPU_HOME_CORE, 0x03, vcpu, false)
+DOMAIN(VCPU_HOME_CHIP, 0x04, vcpu, false)
+DOMAIN(VCPU_HOME_NODE, 0x05, vcpu, false)
+DOMAIN(VCPU_REMOTE_NODE, 0x06, vcpu, false)
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
new file mode 100644
index 0000000000..3449be7c0d
--- /dev/null
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -0,0 +1,1763 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Hypervisor supplied "24x7" performance counter support
+ *
+ * Author: Cody P Schafer <cody@linux.vnet.ibm.com>
+ * Copyright 2014 IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "hv-24x7: " fmt
+
+#include <linux/perf_event.h>
+#include <linux/rbtree.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include <asm/cputhreads.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/io.h>
+#include <asm/papr-sysparm.h>
+#include <linux/byteorder/generic.h>
+
+#include <asm/rtas.h>
+#include "hv-24x7.h"
+#include "hv-24x7-catalog.h"
+#include "hv-common.h"
+
+/* Version of the 24x7 hypervisor API that we should use in this machine. */
+static int interface_version;
+
+/* Whether we have to aggregate result data for some domains. */
+static bool aggregate_result_elements;
+
+static cpumask_t hv_24x7_cpumask;
+
+static bool domain_is_valid(unsigned int domain)
+{
+	switch (domain) {
+#define DOMAIN(n, v, x, c)		\
+	case HV_PERF_DOMAIN_##n:	\
+		/* fall through */
+#include "hv-24x7-domains.h"
+#undef DOMAIN
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_physical_domain(unsigned int domain)
+{
+	switch (domain) {
+#define DOMAIN(n, v, x, c)		\
+	case HV_PERF_DOMAIN_##n:	\
+		return c;
+#include "hv-24x7-domains.h"
+#undef DOMAIN
+	default:
+		return false;
+	}
+}
+
+/*
+ * The Processor Module Information system parameter allows transferring
+ * of certain processor module information from the platform to the OS.
+ * Refer PAPR+ document to get parameter token value as '43'.
+ */
+
+static u32 phys_sockets;	/* Physical sockets */
+static u32 phys_chipspersocket;	/* Physical chips per socket*/
+static u32 phys_coresperchip; /* Physical cores per chip */
+
+/*
+ * read_24x7_sys_info()
+ * Retrieve the number of sockets and chips per socket and cores per
+ * chip details through the get-system-parameter rtas call.
+ */
+void read_24x7_sys_info(void)
+{
+	struct papr_sysparm_buf *buf;
+
+	/*
+	 * Making system parameter: chips and sockets and cores per chip
+	 * default to 1.
+	 */
+	phys_sockets = 1;
+	phys_chipspersocket = 1;
+	phys_coresperchip = 1;
+
+	buf = papr_sysparm_buf_alloc();
+	if (!buf)
+		return;
+
+	if (!papr_sysparm_get(PAPR_SYSPARM_PROC_MODULE_INFO, buf)) {
+		int ntypes = be16_to_cpup((__be16 *)&buf->val[0]);
+		int len = be16_to_cpu(buf->len);
+
+		if (len >= 8 && ntypes != 0) {
+			phys_sockets = be16_to_cpup((__be16 *)&buf->val[2]);
+			phys_chipspersocket = be16_to_cpup((__be16 *)&buf->val[4]);
+			phys_coresperchip = be16_to_cpup((__be16 *)&buf->val[6]);
+		}
+	}
+
+	papr_sysparm_buf_free(buf);
+}
+
+/* Domains for which more than one result element are returned for each event. */
+static bool domain_needs_aggregation(unsigned int domain)
+{
+	return aggregate_result_elements &&
+			(domain == HV_PERF_DOMAIN_PHYS_CORE ||
+			 (domain >= HV_PERF_DOMAIN_VCPU_HOME_CORE &&
+			  domain <= HV_PERF_DOMAIN_VCPU_REMOTE_NODE));
+}
+
+static const char *domain_name(unsigned int domain)
+{
+	if (!domain_is_valid(domain))
+		return NULL;
+
+	switch (domain) {
+	case HV_PERF_DOMAIN_PHYS_CHIP:		return "Physical Chip";
+	case HV_PERF_DOMAIN_PHYS_CORE:		return "Physical Core";
+	case HV_PERF_DOMAIN_VCPU_HOME_CORE:	return "VCPU Home Core";
+	case HV_PERF_DOMAIN_VCPU_HOME_CHIP:	return "VCPU Home Chip";
+	case HV_PERF_DOMAIN_VCPU_HOME_NODE:	return "VCPU Home Node";
+	case HV_PERF_DOMAIN_VCPU_REMOTE_NODE:	return "VCPU Remote Node";
+	}
+
+	WARN_ON_ONCE(domain);
+	return NULL;
+}
+
+static bool catalog_entry_domain_is_valid(unsigned int domain)
+{
+	/* POWER8 doesn't support virtual domains. */
+	if (interface_version == 1)
+		return is_physical_domain(domain);
+	else
+		return domain_is_valid(domain);
+}
+
+/*
+ * TODO: Merging events:
+ * - Think of the hcall as an interface to a 4d array of counters:
+ *   - x = domains
+ *   - y = indexes in the domain (core, chip, vcpu, node, etc)
+ *   - z = offset into the counter space
+ *   - w = lpars (guest vms, "logical partitions")
+ * - A single request is: x,y,y_last,z,z_last,w,w_last
+ *   - this means we can retrieve a rectangle of counters in y,z for a single x.
+ *
+ * - Things to consider (ignoring w):
+ *   - input  cost_per_request = 16
+ *   - output cost_per_result(ys,zs)  = 8 + 8 * ys + ys * zs
+ *   - limited number of requests per hcall (must fit into 4K bytes)
+ *     - 4k = 16 [buffer header] - 16 [request size] * request_count
+ *     - 255 requests per hcall
+ *   - sometimes it will be more efficient to read extra data and discard
+ */
+
+/*
+ * Example usage:
+ *  perf stat -e 'hv_24x7/domain=2,offset=8,vcpu=0,lpar=0xffffffff/'
+ */
+
+/* u3 0-6, one of HV_24X7_PERF_DOMAIN */
+EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3);
+/* u16 */
+EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31);
+EVENT_DEFINE_RANGE_FORMAT(chip, config, 16, 31);
+EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31);
+/* u32, see "data_offset" */
+EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63);
+/* u16 */
+EVENT_DEFINE_RANGE_FORMAT(lpar, config1, 0, 15);
+
+EVENT_DEFINE_RANGE(reserved1, config,   4, 15);
+EVENT_DEFINE_RANGE(reserved2, config1, 16, 63);
+EVENT_DEFINE_RANGE(reserved3, config2,  0, 63);
+
+static struct attribute *format_attrs[] = {
+	&format_attr_domain.attr,
+	&format_attr_offset.attr,
+	&format_attr_core.attr,
+	&format_attr_chip.attr,
+	&format_attr_vcpu.attr,
+	&format_attr_lpar.attr,
+	NULL,
+};
+
+static const struct attribute_group format_group = {
+	.name = "format",
+	.attrs = format_attrs,
+};
+
+static struct attribute_group event_group = {
+	.name = "events",
+	/* .attrs is set in init */
+};
+
+static struct attribute_group event_desc_group = {
+	.name = "event_descs",
+	/* .attrs is set in init */
+};
+
+static struct attribute_group event_long_desc_group = {
+	.name = "event_long_descs",
+	/* .attrs is set in init */
+};
+
+static struct kmem_cache *hv_page_cache;
+
+static DEFINE_PER_CPU(int, hv_24x7_txn_flags);
+static DEFINE_PER_CPU(int, hv_24x7_txn_err);
+
+struct hv_24x7_hw {
+	struct perf_event *events[255];
+};
+
+static DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
+
+/*
+ * request_buffer and result_buffer are not required to be 4k aligned,
+ * but are not allowed to cross any 4k boundary. Aligning them to 4k is
+ * the simplest way to ensure that.
+ */
+#define H24x7_DATA_BUFFER_SIZE	4096
+static DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+static DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+
+static unsigned int max_num_requests(int interface_version)
+{
+	return (H24x7_DATA_BUFFER_SIZE - sizeof(struct hv_24x7_request_buffer))
+		/ H24x7_REQUEST_SIZE(interface_version);
+}
+
+static char *event_name(struct hv_24x7_event_data *ev, int *len)
+{
+	*len = be16_to_cpu(ev->event_name_len) - 2;
+	return (char *)ev->remainder;
+}
+
+static char *event_desc(struct hv_24x7_event_data *ev, int *len)
+{
+	unsigned int nl = be16_to_cpu(ev->event_name_len);
+	__be16 *desc_len = (__be16 *)(ev->remainder + nl - 2);
+
+	*len = be16_to_cpu(*desc_len) - 2;
+	return (char *)ev->remainder + nl;
+}
+
+static char *event_long_desc(struct hv_24x7_event_data *ev, int *len)
+{
+	unsigned int nl = be16_to_cpu(ev->event_name_len);
+	__be16 *desc_len_ = (__be16 *)(ev->remainder + nl - 2);
+	unsigned int desc_len = be16_to_cpu(*desc_len_);
+	__be16 *long_desc_len = (__be16 *)(ev->remainder + nl + desc_len - 2);
+
+	*len = be16_to_cpu(*long_desc_len) - 2;
+	return (char *)ev->remainder + nl + desc_len;
+}
+
+static bool event_fixed_portion_is_within(struct hv_24x7_event_data *ev,
+					  void *end)
+{
+	void *start = ev;
+
+	return (start + offsetof(struct hv_24x7_event_data, remainder)) < end;
+}
+
+/*
+ * Things we don't check:
+ *  - padding for desc, name, and long/detailed desc is required to be '\0'
+ *    bytes.
+ *
+ *  Return NULL if we pass end,
+ *  Otherwise return the address of the byte just following the event.
+ */
+static void *event_end(struct hv_24x7_event_data *ev, void *end)
+{
+	void *start = ev;
+	__be16 *dl_, *ldl_;
+	unsigned int dl, ldl;
+	unsigned int nl = be16_to_cpu(ev->event_name_len);
+
+	if (nl < 2) {
+		pr_debug("%s: name length too short: %d", __func__, nl);
+		return NULL;
+	}
+
+	if (start + nl > end) {
+		pr_debug("%s: start=%p + nl=%u > end=%p",
+				__func__, start, nl, end);
+		return NULL;
+	}
+
+	dl_ = (__be16 *)(ev->remainder + nl - 2);
+	if (!IS_ALIGNED((uintptr_t)dl_, 2))
+		pr_warn("desc len not aligned %p", dl_);
+	dl = be16_to_cpu(*dl_);
+	if (dl < 2) {
+		pr_debug("%s: desc len too short: %d", __func__, dl);
+		return NULL;
+	}
+
+	if (start + nl + dl > end) {
+		pr_debug("%s: (start=%p + nl=%u + dl=%u)=%p > end=%p",
+				__func__, start, nl, dl, start + nl + dl, end);
+		return NULL;
+	}
+
+	ldl_ = (__be16 *)(ev->remainder + nl + dl - 2);
+	if (!IS_ALIGNED((uintptr_t)ldl_, 2))
+		pr_warn("long desc len not aligned %p", ldl_);
+	ldl = be16_to_cpu(*ldl_);
+	if (ldl < 2) {
+		pr_debug("%s: long desc len too short (ldl=%u)",
+				__func__, ldl);
+		return NULL;
+	}
+
+	if (start + nl + dl + ldl > end) {
+		pr_debug("%s: start=%p + nl=%u + dl=%u + ldl=%u > end=%p",
+				__func__, start, nl, dl, ldl, end);
+		return NULL;
+	}
+
+	return start + nl + dl + ldl;
+}
+
+static long h_get_24x7_catalog_page_(unsigned long phys_4096,
+				     unsigned long version, unsigned long index)
+{
+	pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)",
+			phys_4096, version, index);
+
+	WARN_ON(!IS_ALIGNED(phys_4096, 4096));
+
+	return plpar_hcall_norets(H_GET_24X7_CATALOG_PAGE,
+			phys_4096, version, index);
+}
+
+static long h_get_24x7_catalog_page(char page[], u64 version, u32 index)
+{
+	return h_get_24x7_catalog_page_(virt_to_phys(page),
+					version, index);
+}
+
+/*
+ * Each event we find in the catalog, will have a sysfs entry. Format the
+ * data for this sysfs entry based on the event's domain.
+ *
+ * Events belonging to the Chip domain can only be monitored in that domain.
+ * i.e the domain for these events is a fixed/knwon value.
+ *
+ * Events belonging to the Core domain can be monitored either in the physical
+ * core or in one of the virtual CPU domains. So the domain value for these
+ * events must be specified by the user (i.e is a required parameter). Format
+ * the Core events with 'domain=?' so the perf-tool can error check required
+ * parameters.
+ *
+ * NOTE: For the Core domain events, rather than making domain a required
+ *	 parameter we could default it to PHYS_CORE and allowe users to
+ *	 override the domain to one of the VCPU domains.
+ *
+ *	 However, this can make the interface a little inconsistent.
+ *
+ *	 If we set domain=2 (PHYS_CHIP) and allow user to override this field
+ *	 the user may be tempted to also modify the "offset=x" field in which
+ *	 can lead to confusing usage. Consider the HPM_PCYC (offset=0x18) and
+ *	 HPM_INST (offset=0x20) events. With:
+ *
+ *		perf stat -e hv_24x7/HPM_PCYC,offset=0x20/
+ *
+ *	we end up monitoring HPM_INST, while the command line has HPM_PCYC.
+ *
+ *	By not assigning a default value to the domain for the Core events,
+ *	we can have simple guidelines:
+ *
+ *		- Specifying values for parameters with "=?" is required.
+ *
+ *		- Specifying (i.e overriding) values for other parameters
+ *		  is undefined.
+ */
+static char *event_fmt(struct hv_24x7_event_data *event, unsigned int domain)
+{
+	const char *sindex;
+	const char *lpar;
+	const char *domain_str;
+	char buf[8];
+
+	switch (domain) {
+	case HV_PERF_DOMAIN_PHYS_CHIP:
+		snprintf(buf, sizeof(buf), "%d", domain);
+		domain_str = buf;
+		lpar = "0x0";
+		sindex = "chip";
+		break;
+	case HV_PERF_DOMAIN_PHYS_CORE:
+		domain_str = "?";
+		lpar = "0x0";
+		sindex = "core";
+		break;
+	default:
+		domain_str = "?";
+		lpar = "?";
+		sindex = "vcpu";
+	}
+
+	return kasprintf(GFP_KERNEL,
+			"domain=%s,offset=0x%x,%s=?,lpar=%s",
+			domain_str,
+			be16_to_cpu(event->event_counter_offs) +
+				be16_to_cpu(event->event_group_record_offs),
+			sindex,
+			lpar);
+}
+
+/* Avoid trusting fw to NUL terminate strings */
+static char *memdup_to_str(char *maybe_str, int max_len, gfp_t gfp)
+{
+	return kasprintf(gfp, "%.*s", max_len, maybe_str);
+}
+
+static ssize_t device_show_string(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *d;
+
+	d = container_of(attr, struct dev_ext_attribute, attr);
+
+	return sprintf(buf, "%s\n", (char *)d->var);
+}
+
+static ssize_t cpumask_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	return cpumap_print_to_pagebuf(true, buf, &hv_24x7_cpumask);
+}
+
+static ssize_t sockets_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", phys_sockets);
+}
+
+static ssize_t chipspersocket_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", phys_chipspersocket);
+}
+
+static ssize_t coresperchip_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", phys_coresperchip);
+}
+
+static struct attribute *device_str_attr_create_(char *name, char *str)
+{
+	struct dev_ext_attribute *attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+
+	if (!attr)
+		return NULL;
+
+	sysfs_attr_init(&attr->attr.attr);
+
+	attr->var = str;
+	attr->attr.attr.name = name;
+	attr->attr.attr.mode = 0444;
+	attr->attr.show = device_show_string;
+
+	return &attr->attr.attr;
+}
+
+/*
+ * Allocate and initialize strings representing event attributes.
+ *
+ * NOTE: The strings allocated here are never destroyed and continue to
+ *	 exist till shutdown. This is to allow us to create as many events
+ *	 from the catalog as possible, even if we encounter errors with some.
+ *	 In case of changes to error paths in future, these may need to be
+ *	 freed by the caller.
+ */
+static struct attribute *device_str_attr_create(char *name, int name_max,
+						int name_nonce,
+						char *str, size_t str_max)
+{
+	char *n;
+	char *s = memdup_to_str(str, str_max, GFP_KERNEL);
+	struct attribute *a;
+
+	if (!s)
+		return NULL;
+
+	if (!name_nonce)
+		n = kasprintf(GFP_KERNEL, "%.*s", name_max, name);
+	else
+		n = kasprintf(GFP_KERNEL, "%.*s__%d", name_max, name,
+					name_nonce);
+	if (!n)
+		goto out_s;
+
+	a = device_str_attr_create_(n, s);
+	if (!a)
+		goto out_n;
+
+	return a;
+out_n:
+	kfree(n);
+out_s:
+	kfree(s);
+	return NULL;
+}
+
+static struct attribute *event_to_attr(unsigned int ix,
+				       struct hv_24x7_event_data *event,
+				       unsigned int domain,
+				       int nonce)
+{
+	int event_name_len;
+	char *ev_name, *a_ev_name, *val;
+	struct attribute *attr;
+
+	if (!domain_is_valid(domain)) {
+		pr_warn("catalog event %u has invalid domain %u\n",
+				ix, domain);
+		return NULL;
+	}
+
+	val = event_fmt(event, domain);
+	if (!val)
+		return NULL;
+
+	ev_name = event_name(event, &event_name_len);
+	if (!nonce)
+		a_ev_name = kasprintf(GFP_KERNEL, "%.*s",
+				(int)event_name_len, ev_name);
+	else
+		a_ev_name = kasprintf(GFP_KERNEL, "%.*s__%d",
+				(int)event_name_len, ev_name, nonce);
+
+	if (!a_ev_name)
+		goto out_val;
+
+	attr = device_str_attr_create_(a_ev_name, val);
+	if (!attr)
+		goto out_name;
+
+	return attr;
+out_name:
+	kfree(a_ev_name);
+out_val:
+	kfree(val);
+	return NULL;
+}
+
+static struct attribute *event_to_desc_attr(struct hv_24x7_event_data *event,
+					    int nonce)
+{
+	int nl, dl;
+	char *name = event_name(event, &nl);
+	char *desc = event_desc(event, &dl);
+
+	/* If there isn't a description, don't create the sysfs file */
+	if (!dl)
+		return NULL;
+
+	return device_str_attr_create(name, nl, nonce, desc, dl);
+}
+
+static struct attribute *
+event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce)
+{
+	int nl, dl;
+	char *name = event_name(event, &nl);
+	char *desc = event_long_desc(event, &dl);
+
+	/* If there isn't a description, don't create the sysfs file */
+	if (!dl)
+		return NULL;
+
+	return device_str_attr_create(name, nl, nonce, desc, dl);
+}
+
+static int event_data_to_attrs(unsigned int ix, struct attribute **attrs,
+			       struct hv_24x7_event_data *event, int nonce)
+{
+	*attrs = event_to_attr(ix, event, event->domain, nonce);
+	if (!*attrs)
+		return -1;
+
+	return 0;
+}
+
+/* */
+struct event_uniq {
+	struct rb_node node;
+	const char *name;
+	int nl;
+	unsigned int ct;
+	unsigned int domain;
+};
+
+static int memord(const void *d1, size_t s1, const void *d2, size_t s2)
+{
+	if (s1 < s2)
+		return 1;
+	if (s1 > s2)
+		return -1;
+
+	return memcmp(d1, d2, s1);
+}
+
+static int ev_uniq_ord(const void *v1, size_t s1, unsigned int d1,
+		       const void *v2, size_t s2, unsigned int d2)
+{
+	int r = memord(v1, s1, v2, s2);
+
+	if (r)
+		return r;
+	if (d1 > d2)
+		return 1;
+	if (d2 > d1)
+		return -1;
+	return 0;
+}
+
+static int event_uniq_add(struct rb_root *root, const char *name, int nl,
+			  unsigned int domain)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	struct event_uniq *data;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct event_uniq *it;
+		int result;
+
+		it = rb_entry(*new, struct event_uniq, node);
+		result = ev_uniq_ord(name, nl, domain, it->name, it->nl,
+					it->domain);
+
+		parent = *new;
+		if (result < 0)
+			new = &((*new)->rb_left);
+		else if (result > 0)
+			new = &((*new)->rb_right);
+		else {
+			it->ct++;
+			pr_info("found a duplicate event %.*s, ct=%u\n", nl,
+						name, it->ct);
+			return it->ct;
+		}
+	}
+
+	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	*data = (struct event_uniq) {
+		.name = name,
+		.nl = nl,
+		.ct = 0,
+		.domain = domain,
+	};
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+
+	/* data->ct */
+	return 0;
+}
+
+static void event_uniq_destroy(struct rb_root *root)
+{
+	/*
+	 * the strings we point to are in the giant block of memory filled by
+	 * the catalog, and are freed separately.
+	 */
+	struct event_uniq *pos, *n;
+
+	rbtree_postorder_for_each_entry_safe(pos, n, root, node)
+		kfree(pos);
+}
+
+
+/*
+ * ensure the event structure's sizes are self consistent and don't cause us to
+ * read outside of the event
+ *
+ * On success, return the event length in bytes.
+ * Otherwise, return -1 (and print as appropriate).
+ */
+static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event,
+					  size_t event_idx,
+					  size_t event_data_bytes,
+					  size_t event_entry_count,
+					  size_t offset, void *end)
+{
+	ssize_t ev_len;
+	void *ev_end, *calc_ev_end;
+
+	if (offset >= event_data_bytes)
+		return -1;
+
+	if (event_idx >= event_entry_count) {
+		pr_devel("catalog event data has %zu bytes of padding after last event\n",
+				event_data_bytes - offset);
+		return -1;
+	}
+
+	if (!event_fixed_portion_is_within(event, end)) {
+		pr_warn("event %zu fixed portion is not within range\n",
+				event_idx);
+		return -1;
+	}
+
+	ev_len = be16_to_cpu(event->length);
+
+	if (ev_len % 16)
+		pr_info("event %zu has length %zu not divisible by 16: event=%pK\n",
+				event_idx, ev_len, event);
+
+	ev_end = (__u8 *)event + ev_len;
+	if (ev_end > end) {
+		pr_warn("event %zu has .length=%zu, ends after buffer end: ev_end=%pK > end=%pK, offset=%zu\n",
+				event_idx, ev_len, ev_end, end,
+				offset);
+		return -1;
+	}
+
+	calc_ev_end = event_end(event, end);
+	if (!calc_ev_end) {
+		pr_warn("event %zu has a calculated length which exceeds buffer length %zu: event=%pK end=%pK, offset=%zu\n",
+			event_idx, event_data_bytes, event, end,
+			offset);
+		return -1;
+	}
+
+	if (calc_ev_end > ev_end) {
+		pr_warn("event %zu exceeds its own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n",
+			event_idx, event, ev_end, offset, calc_ev_end);
+		return -1;
+	}
+
+	return ev_len;
+}
+
+/*
+ * Return true incase of invalid or dummy events with names like RESERVED*
+ */
+static bool ignore_event(const char *name)
+{
+	return strncmp(name, "RESERVED", 8) == 0;
+}
+
+#define MAX_4K (SIZE_MAX / 4096)
+
+static int create_events_from_catalog(struct attribute ***events_,
+				      struct attribute ***event_descs_,
+				      struct attribute ***event_long_descs_)
+{
+	long hret;
+	size_t catalog_len, catalog_page_len, event_entry_count,
+	       event_data_len, event_data_offs,
+	       event_data_bytes, junk_events, event_idx, event_attr_ct, i,
+	       attr_max, event_idx_last, desc_ct, long_desc_ct;
+	ssize_t ct, ev_len;
+	uint64_t catalog_version_num;
+	struct attribute **events, **event_descs, **event_long_descs;
+	struct hv_24x7_catalog_page_0 *page_0 =
+		kmem_cache_alloc(hv_page_cache, GFP_KERNEL);
+	void *page = page_0;
+	void *event_data, *end;
+	struct hv_24x7_event_data *event;
+	struct rb_root ev_uniq = RB_ROOT;
+	int ret = 0;
+
+	if (!page) {
+		ret = -ENOMEM;
+		goto e_out;
+	}
+
+	hret = h_get_24x7_catalog_page(page, 0, 0);
+	if (hret) {
+		ret = -EIO;
+		goto e_free;
+	}
+
+	catalog_version_num = be64_to_cpu(page_0->version);
+	catalog_page_len = be32_to_cpu(page_0->length);
+
+	if (MAX_4K < catalog_page_len) {
+		pr_err("invalid page count: %zu\n", catalog_page_len);
+		ret = -EIO;
+		goto e_free;
+	}
+
+	catalog_len = catalog_page_len * 4096;
+
+	event_entry_count = be16_to_cpu(page_0->event_entry_count);
+	event_data_offs   = be16_to_cpu(page_0->event_data_offs);
+	event_data_len    = be16_to_cpu(page_0->event_data_len);
+
+	pr_devel("cv %llu cl %zu eec %zu edo %zu edl %zu\n",
+			catalog_version_num, catalog_len,
+			event_entry_count, event_data_offs, event_data_len);
+
+	if ((MAX_4K < event_data_len)
+			|| (MAX_4K < event_data_offs)
+			|| (MAX_4K - event_data_offs < event_data_len)) {
+		pr_err("invalid event data offs %zu and/or len %zu\n",
+				event_data_offs, event_data_len);
+		ret = -EIO;
+		goto e_free;
+	}
+
+	if ((event_data_offs + event_data_len) > catalog_page_len) {
+		pr_err("event data %zu-%zu does not fit inside catalog 0-%zu\n",
+				event_data_offs,
+				event_data_offs + event_data_len,
+				catalog_page_len);
+		ret = -EIO;
+		goto e_free;
+	}
+
+	if (SIZE_MAX - 1 < event_entry_count) {
+		pr_err("event_entry_count %zu is invalid\n", event_entry_count);
+		ret = -EIO;
+		goto e_free;
+	}
+
+	event_data_bytes = event_data_len * 4096;
+
+	/*
+	 * event data can span several pages, events can cross between these
+	 * pages. Use vmalloc to make this easier.
+	 */
+	event_data = vmalloc(event_data_bytes);
+	if (!event_data) {
+		pr_err("could not allocate event data\n");
+		ret = -ENOMEM;
+		goto e_free;
+	}
+
+	end = event_data + event_data_bytes;
+
+	/*
+	 * using vmalloc_to_phys() like this only works if PAGE_SIZE is
+	 * divisible by 4096
+	 */
+	BUILD_BUG_ON(PAGE_SIZE % 4096);
+
+	for (i = 0; i < event_data_len; i++) {
+		hret = h_get_24x7_catalog_page_(
+				vmalloc_to_phys(event_data + i * 4096),
+				catalog_version_num,
+				i + event_data_offs);
+		if (hret) {
+			pr_err("Failed to get event data in page %zu: rc=%ld\n",
+			       i + event_data_offs, hret);
+			ret = -EIO;
+			goto e_event_data;
+		}
+	}
+
+	/*
+	 * scan the catalog to determine the number of attributes we need, and
+	 * verify it at the same time.
+	 */
+	for (junk_events = 0, event = event_data, event_idx = 0, attr_max = 0;
+	     ;
+	     event_idx++, event = (void *)event + ev_len) {
+		size_t offset = (void *)event - (void *)event_data;
+		char *name;
+		int nl;
+
+		ev_len = catalog_event_len_validate(event, event_idx,
+						    event_data_bytes,
+						    event_entry_count,
+						    offset, end);
+		if (ev_len < 0)
+			break;
+
+		name = event_name(event, &nl);
+
+		if (ignore_event(name)) {
+			junk_events++;
+			continue;
+		}
+		if (event->event_group_record_len == 0) {
+			pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n",
+					event_idx, nl, name);
+			junk_events++;
+			continue;
+		}
+
+		if (!catalog_entry_domain_is_valid(event->domain)) {
+			pr_info("event %zu (%.*s) has invalid domain %d\n",
+					event_idx, nl, name, event->domain);
+			junk_events++;
+			continue;
+		}
+
+		attr_max++;
+	}
+
+	event_idx_last = event_idx;
+	if (event_idx_last != event_entry_count)
+		pr_warn("event buffer ended before listed # of events were parsed (got %zu, wanted %zu, junk %zu)\n",
+				event_idx_last, event_entry_count, junk_events);
+
+	events = kmalloc_array(attr_max + 1, sizeof(*events), GFP_KERNEL);
+	if (!events) {
+		ret = -ENOMEM;
+		goto e_event_data;
+	}
+
+	event_descs = kmalloc_array(event_idx + 1, sizeof(*event_descs),
+				GFP_KERNEL);
+	if (!event_descs) {
+		ret = -ENOMEM;
+		goto e_event_attrs;
+	}
+
+	event_long_descs = kmalloc_array(event_idx + 1,
+			sizeof(*event_long_descs), GFP_KERNEL);
+	if (!event_long_descs) {
+		ret = -ENOMEM;
+		goto e_event_descs;
+	}
+
+	/* Iterate over the catalog filling in the attribute vector */
+	for (junk_events = 0, event_attr_ct = 0, desc_ct = 0, long_desc_ct = 0,
+				event = event_data, event_idx = 0;
+			event_idx < event_idx_last;
+			event_idx++, ev_len = be16_to_cpu(event->length),
+				event = (void *)event + ev_len) {
+		char *name;
+		int nl;
+		int nonce;
+		/*
+		 * these are the only "bad" events that are intermixed and that
+		 * we can ignore without issue. make sure to skip them here
+		 */
+		if (event->event_group_record_len == 0)
+			continue;
+		if (!catalog_entry_domain_is_valid(event->domain))
+			continue;
+
+		name  = event_name(event, &nl);
+		if (ignore_event(name))
+			continue;
+
+		nonce = event_uniq_add(&ev_uniq, name, nl, event->domain);
+		ct    = event_data_to_attrs(event_idx, events + event_attr_ct,
+					    event, nonce);
+		if (ct < 0) {
+			pr_warn("event %zu (%.*s) creation failure, skipping\n",
+				event_idx, nl, name);
+			junk_events++;
+		} else {
+			event_attr_ct++;
+			event_descs[desc_ct] = event_to_desc_attr(event, nonce);
+			if (event_descs[desc_ct])
+				desc_ct++;
+			event_long_descs[long_desc_ct] =
+					event_to_long_desc_attr(event, nonce);
+			if (event_long_descs[long_desc_ct])
+				long_desc_ct++;
+		}
+	}
+
+	pr_info("read %zu catalog entries, created %zu event attrs (%zu failures), %zu descs\n",
+			event_idx, event_attr_ct, junk_events, desc_ct);
+
+	events[event_attr_ct] = NULL;
+	event_descs[desc_ct] = NULL;
+	event_long_descs[long_desc_ct] = NULL;
+
+	event_uniq_destroy(&ev_uniq);
+	vfree(event_data);
+	kmem_cache_free(hv_page_cache, page);
+
+	*events_ = events;
+	*event_descs_ = event_descs;
+	*event_long_descs_ = event_long_descs;
+	return 0;
+
+e_event_descs:
+	kfree(event_descs);
+e_event_attrs:
+	kfree(events);
+e_event_data:
+	vfree(event_data);
+e_free:
+	kmem_cache_free(hv_page_cache, page);
+e_out:
+	*events_ = NULL;
+	*event_descs_ = NULL;
+	*event_long_descs_ = NULL;
+	return ret;
+}
+
+static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
+			    struct bin_attribute *bin_attr, char *buf,
+			    loff_t offset, size_t count)
+{
+	long hret;
+	ssize_t ret = 0;
+	size_t catalog_len = 0, catalog_page_len = 0;
+	loff_t page_offset = 0;
+	loff_t offset_in_page;
+	size_t copy_len;
+	uint64_t catalog_version_num = 0;
+	void *page = kmem_cache_alloc(hv_page_cache, GFP_USER);
+	struct hv_24x7_catalog_page_0 *page_0 = page;
+
+	if (!page)
+		return -ENOMEM;
+
+	hret = h_get_24x7_catalog_page(page, 0, 0);
+	if (hret) {
+		ret = -EIO;
+		goto e_free;
+	}
+
+	catalog_version_num = be64_to_cpu(page_0->version);
+	catalog_page_len = be32_to_cpu(page_0->length);
+	catalog_len = catalog_page_len * 4096;
+
+	page_offset = offset / 4096;
+	offset_in_page = offset % 4096;
+
+	if (page_offset >= catalog_page_len)
+		goto e_free;
+
+	if (page_offset != 0) {
+		hret = h_get_24x7_catalog_page(page, catalog_version_num,
+					       page_offset);
+		if (hret) {
+			ret = -EIO;
+			goto e_free;
+		}
+	}
+
+	copy_len = 4096 - offset_in_page;
+	if (copy_len > count)
+		copy_len = count;
+
+	memcpy(buf, page+offset_in_page, copy_len);
+	ret = copy_len;
+
+e_free:
+	if (hret)
+		pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:"
+		       " rc=%ld\n",
+		       catalog_version_num, page_offset, hret);
+	kmem_cache_free(hv_page_cache, page);
+
+	pr_devel("catalog_read: offset=%lld(%lld) count=%zu "
+			"catalog_len=%zu(%zu) => %zd\n", offset, page_offset,
+			count, catalog_len, catalog_page_len, ret);
+
+	return ret;
+}
+
+static ssize_t domains_show(struct device *dev, struct device_attribute *attr,
+			    char *page)
+{
+	int d, n, count = 0;
+	const char *str;
+
+	for (d = 0; d < HV_PERF_DOMAIN_MAX; d++) {
+		str = domain_name(d);
+		if (!str)
+			continue;
+
+		n = sprintf(page, "%d: %s\n", d, str);
+		if (n < 0)
+			break;
+
+		count += n;
+		page += n;
+	}
+	return count;
+}
+
+#define PAGE_0_ATTR(_name, _fmt, _expr)				\
+static ssize_t _name##_show(struct device *dev,			\
+			    struct device_attribute *dev_attr,	\
+			    char *buf)				\
+{								\
+	long hret;						\
+	ssize_t ret = 0;					\
+	void *page = kmem_cache_alloc(hv_page_cache, GFP_USER);	\
+	struct hv_24x7_catalog_page_0 *page_0 = page;		\
+	if (!page)						\
+		return -ENOMEM;					\
+	hret = h_get_24x7_catalog_page(page, 0, 0);		\
+	if (hret) {						\
+		ret = -EIO;					\
+		goto e_free;					\
+	}							\
+	ret = sprintf(buf, _fmt, _expr);			\
+e_free:								\
+	kmem_cache_free(hv_page_cache, page);			\
+	return ret;						\
+}								\
+static DEVICE_ATTR_RO(_name)
+
+PAGE_0_ATTR(catalog_version, "%lld\n",
+		(unsigned long long)be64_to_cpu(page_0->version));
+PAGE_0_ATTR(catalog_len, "%lld\n",
+		(unsigned long long)be32_to_cpu(page_0->length) * 4096);
+static BIN_ATTR_RO(catalog, 0/* real length varies */);
+static DEVICE_ATTR_RO(domains);
+static DEVICE_ATTR_RO(sockets);
+static DEVICE_ATTR_RO(chipspersocket);
+static DEVICE_ATTR_RO(coresperchip);
+static DEVICE_ATTR_RO(cpumask);
+
+static struct bin_attribute *if_bin_attrs[] = {
+	&bin_attr_catalog,
+	NULL,
+};
+
+static struct attribute *cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static const struct attribute_group cpumask_attr_group = {
+	.attrs = cpumask_attrs,
+};
+
+static struct attribute *if_attrs[] = {
+	&dev_attr_catalog_len.attr,
+	&dev_attr_catalog_version.attr,
+	&dev_attr_domains.attr,
+	&dev_attr_sockets.attr,
+	&dev_attr_chipspersocket.attr,
+	&dev_attr_coresperchip.attr,
+	NULL,
+};
+
+static const struct attribute_group if_group = {
+	.name = "interface",
+	.bin_attrs = if_bin_attrs,
+	.attrs = if_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+	&format_group,
+	&event_group,
+	&event_desc_group,
+	&event_long_desc_group,
+	&if_group,
+	&cpumask_attr_group,
+	NULL,
+};
+
+/*
+ * Start the process for a new H_GET_24x7_DATA hcall.
+ */
+static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
+			      struct hv_24x7_data_result_buffer *result_buffer)
+{
+
+	memset(request_buffer, 0, H24x7_DATA_BUFFER_SIZE);
+	memset(result_buffer, 0, H24x7_DATA_BUFFER_SIZE);
+
+	request_buffer->interface_version = interface_version;
+	/* memset above set request_buffer->num_requests to 0 */
+}
+
+/*
+ * Commit (i.e perform) the H_GET_24x7_DATA hcall using the data collected
+ * by 'init_24x7_request()' and 'add_event_to_24x7_request()'.
+ */
+static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
+			     struct hv_24x7_data_result_buffer *result_buffer)
+{
+	long ret;
+
+	/*
+	 * NOTE: Due to variable number of array elements in request and
+	 *	 result buffer(s), sizeof() is not reliable. Use the actual
+	 *	 allocated buffer size, H24x7_DATA_BUFFER_SIZE.
+	 */
+	ret = plpar_hcall_norets(H_GET_24X7_DATA,
+			virt_to_phys(request_buffer), H24x7_DATA_BUFFER_SIZE,
+			virt_to_phys(result_buffer),  H24x7_DATA_BUFFER_SIZE);
+
+	if (ret) {
+		struct hv_24x7_request *req;
+
+		req = request_buffer->requests;
+		pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
+				      req->performance_domain, req->data_offset,
+				      req->starting_ix, req->starting_lpar_ix,
+				      ret, ret, result_buffer->detailed_rc,
+				      result_buffer->failing_request_ix);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/*
+ * Add the given @event to the next slot in the 24x7 request_buffer.
+ *
+ * Note that H_GET_24X7_DATA hcall allows reading several counters'
+ * values in a single HCALL. We expect the caller to add events to the
+ * request buffer one by one, make the HCALL and process the results.
+ */
+static int add_event_to_24x7_request(struct perf_event *event,
+				struct hv_24x7_request_buffer *request_buffer)
+{
+	u16 idx;
+	int i;
+	size_t req_size;
+	struct hv_24x7_request *req;
+
+	if (request_buffer->num_requests >=
+	    max_num_requests(request_buffer->interface_version)) {
+		pr_devel("Too many requests for 24x7 HCALL %d\n",
+				request_buffer->num_requests);
+		return -EINVAL;
+	}
+
+	switch (event_get_domain(event)) {
+	case HV_PERF_DOMAIN_PHYS_CHIP:
+		idx = event_get_chip(event);
+		break;
+	case HV_PERF_DOMAIN_PHYS_CORE:
+		idx = event_get_core(event);
+		break;
+	default:
+		idx = event_get_vcpu(event);
+	}
+
+	req_size = H24x7_REQUEST_SIZE(request_buffer->interface_version);
+
+	i = request_buffer->num_requests++;
+	req = (void *) request_buffer->requests + i * req_size;
+
+	req->performance_domain = event_get_domain(event);
+	req->data_size = cpu_to_be16(8);
+	req->data_offset = cpu_to_be32(event_get_offset(event));
+	req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event));
+	req->max_num_lpars = cpu_to_be16(1);
+	req->starting_ix = cpu_to_be16(idx);
+	req->max_ix = cpu_to_be16(1);
+
+	if (request_buffer->interface_version > 1) {
+		if (domain_needs_aggregation(req->performance_domain))
+			req->max_num_thread_groups = -1;
+		else if (req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) {
+			req->starting_thread_group_ix = idx % 2;
+			req->max_num_thread_groups = 1;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * get_count_from_result - get event count from all result elements in result
+ *
+ * If the event corresponding to this result needs aggregation of the result
+ * element values, then this function does that.
+ *
+ * @event:	Event associated with @res.
+ * @resb:	Result buffer containing @res.
+ * @res:	Result to work on.
+ * @countp:	Output variable containing the event count.
+ * @next:	Optional output variable pointing to the next result in @resb.
+ */
+static int get_count_from_result(struct perf_event *event,
+				 struct hv_24x7_data_result_buffer *resb,
+				 struct hv_24x7_result *res, u64 *countp,
+				 struct hv_24x7_result **next)
+{
+	u16 num_elements = be16_to_cpu(res->num_elements_returned);
+	u16 data_size = be16_to_cpu(res->result_element_data_size);
+	unsigned int data_offset;
+	void *element_data;
+	int i;
+	u64 count;
+
+	/*
+	 * We can bail out early if the result is empty.
+	 */
+	if (!num_elements) {
+		pr_debug("Result of request %hhu is empty, nothing to do\n",
+			 res->result_ix);
+
+		if (next)
+			*next = (struct hv_24x7_result *) res->elements;
+
+		return -ENODATA;
+	}
+
+	/*
+	 * Since we always specify 1 as the maximum for the smallest resource
+	 * we're requesting, there should to be only one element per result.
+	 * Except when an event needs aggregation, in which case there are more.
+	 */
+	if (num_elements != 1 &&
+	    !domain_needs_aggregation(event_get_domain(event))) {
+		pr_err("Error: result of request %hhu has %hu elements\n",
+		       res->result_ix, num_elements);
+
+		return -EIO;
+	}
+
+	if (data_size != sizeof(u64)) {
+		pr_debug("Error: result of request %hhu has data of %hu bytes\n",
+			 res->result_ix, data_size);
+
+		return -ENOTSUPP;
+	}
+
+	if (resb->interface_version == 1)
+		data_offset = offsetof(struct hv_24x7_result_element_v1,
+				       element_data);
+	else
+		data_offset = offsetof(struct hv_24x7_result_element_v2,
+				       element_data);
+
+	/* Go through the result elements in the result. */
+	for (i = count = 0, element_data = res->elements + data_offset;
+	     i < num_elements;
+	     i++, element_data += data_size + data_offset)
+		count += be64_to_cpu(*((u64 *) element_data));
+
+	*countp = count;
+
+	/* The next result is after the last result element. */
+	if (next)
+		*next = element_data - data_offset;
+
+	return 0;
+}
+
+static int single_24x7_request(struct perf_event *event, u64 *count)
+{
+	int ret;
+	struct hv_24x7_request_buffer *request_buffer;
+	struct hv_24x7_data_result_buffer *result_buffer;
+
+	BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
+	BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
+
+	request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+	result_buffer = (void *)get_cpu_var(hv_24x7_resb);
+
+	init_24x7_request(request_buffer, result_buffer);
+
+	ret = add_event_to_24x7_request(event, request_buffer);
+	if (ret)
+		goto out;
+
+	ret = make_24x7_request(request_buffer, result_buffer);
+	if (ret)
+		goto out;
+
+	/* process result from hcall */
+	ret = get_count_from_result(event, result_buffer,
+				    result_buffer->results, count, NULL);
+
+out:
+	put_cpu_var(hv_24x7_reqb);
+	put_cpu_var(hv_24x7_resb);
+	return ret;
+}
+
+
+static int h_24x7_event_init(struct perf_event *event)
+{
+	struct hv_perf_caps caps;
+	unsigned int domain;
+	unsigned long hret;
+	u64 ct;
+
+	/* Not our event */
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* Unused areas must be 0 */
+	if (event_get_reserved1(event) ||
+	    event_get_reserved2(event) ||
+	    event_get_reserved3(event)) {
+		pr_devel("reserved set when forbidden 0x%llx(0x%llx) 0x%llx(0x%llx) 0x%llx(0x%llx)\n",
+				event->attr.config,
+				event_get_reserved1(event),
+				event->attr.config1,
+				event_get_reserved2(event),
+				event->attr.config2,
+				event_get_reserved3(event));
+		return -EINVAL;
+	}
+
+	/* no branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	/* offset must be 8 byte aligned */
+	if (event_get_offset(event) % 8) {
+		pr_devel("bad alignment\n");
+		return -EINVAL;
+	}
+
+	domain = event_get_domain(event);
+	if (domain  == 0 || domain >= HV_PERF_DOMAIN_MAX) {
+		pr_devel("invalid domain %d\n", domain);
+		return -EINVAL;
+	}
+
+	hret = hv_perf_caps_get(&caps);
+	if (hret) {
+		pr_devel("could not get capabilities: rc=%ld\n", hret);
+		return -EIO;
+	}
+
+	/* Physical domains & other lpars require extra capabilities */
+	if (!caps.collect_privileged && (is_physical_domain(domain) ||
+		(event_get_lpar(event) != event_get_lpar_max()))) {
+		pr_devel("hv permissions disallow: is_physical_domain:%d, lpar=0x%llx\n",
+				is_physical_domain(domain),
+				event_get_lpar(event));
+		return -EACCES;
+	}
+
+	/* Get the initial value of the counter for this event */
+	if (single_24x7_request(event, &ct)) {
+		pr_devel("test hcall failed\n");
+		return -EIO;
+	}
+	(void)local64_xchg(&event->hw.prev_count, ct);
+
+	return 0;
+}
+
+static u64 h_24x7_get_value(struct perf_event *event)
+{
+	u64 ct;
+
+	if (single_24x7_request(event, &ct))
+		/* We checked this in event init, shouldn't fail here... */
+		return 0;
+
+	return ct;
+}
+
+static void update_event_count(struct perf_event *event, u64 now)
+{
+	s64 prev;
+
+	prev = local64_xchg(&event->hw.prev_count, now);
+	local64_add(now - prev, &event->count);
+}
+
+static void h_24x7_event_read(struct perf_event *event)
+{
+	u64 now;
+	struct hv_24x7_request_buffer *request_buffer;
+	struct hv_24x7_hw *h24x7hw;
+	int txn_flags;
+
+	txn_flags = __this_cpu_read(hv_24x7_txn_flags);
+
+	/*
+	 * If in a READ transaction, add this counter to the list of
+	 * counters to read during the next HCALL (i.e commit_txn()).
+	 * If not in a READ transaction, go ahead and make the HCALL
+	 * to read this counter by itself.
+	 */
+
+	if (txn_flags & PERF_PMU_TXN_READ) {
+		int i;
+		int ret;
+
+		if (__this_cpu_read(hv_24x7_txn_err))
+			return;
+
+		request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+
+		ret = add_event_to_24x7_request(event, request_buffer);
+		if (ret) {
+			__this_cpu_write(hv_24x7_txn_err, ret);
+		} else {
+			/*
+			 * Associate the event with the HCALL request index,
+			 * so ->commit_txn() can quickly find/update count.
+			 */
+			i = request_buffer->num_requests - 1;
+
+			h24x7hw = &get_cpu_var(hv_24x7_hw);
+			h24x7hw->events[i] = event;
+			put_cpu_var(h24x7hw);
+		}
+
+		put_cpu_var(hv_24x7_reqb);
+	} else {
+		now = h_24x7_get_value(event);
+		update_event_count(event, now);
+	}
+}
+
+static void h_24x7_event_start(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_RELOAD)
+		local64_set(&event->hw.prev_count, h_24x7_get_value(event));
+}
+
+static void h_24x7_event_stop(struct perf_event *event, int flags)
+{
+	h_24x7_event_read(event);
+}
+
+static int h_24x7_event_add(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_START)
+		h_24x7_event_start(event, flags);
+
+	return 0;
+}
+
+/*
+ * 24x7 counters only support READ transactions. They are
+ * always counting and dont need/support ADD transactions.
+ * Cache the flags, but otherwise ignore transactions that
+ * are not PERF_PMU_TXN_READ.
+ */
+static void h_24x7_event_start_txn(struct pmu *pmu, unsigned int flags)
+{
+	struct hv_24x7_request_buffer *request_buffer;
+	struct hv_24x7_data_result_buffer *result_buffer;
+
+	/* We should not be called if we are already in a txn */
+	WARN_ON_ONCE(__this_cpu_read(hv_24x7_txn_flags));
+
+	__this_cpu_write(hv_24x7_txn_flags, flags);
+	if (flags & ~PERF_PMU_TXN_READ)
+		return;
+
+	request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+	result_buffer = (void *)get_cpu_var(hv_24x7_resb);
+
+	init_24x7_request(request_buffer, result_buffer);
+
+	put_cpu_var(hv_24x7_resb);
+	put_cpu_var(hv_24x7_reqb);
+}
+
+/*
+ * Clean up transaction state.
+ *
+ * NOTE: Ignore state of request and result buffers for now.
+ *	 We will initialize them during the next read/txn.
+ */
+static void reset_txn(void)
+{
+	__this_cpu_write(hv_24x7_txn_flags, 0);
+	__this_cpu_write(hv_24x7_txn_err, 0);
+}
+
+/*
+ * 24x7 counters only support READ transactions. They are always counting
+ * and dont need/support ADD transactions. Clear ->txn_flags but otherwise
+ * ignore transactions that are not of type PERF_PMU_TXN_READ.
+ *
+ * For READ transactions, submit all pending 24x7 requests (i.e requests
+ * that were queued by h_24x7_event_read()), to the hypervisor and update
+ * the event counts.
+ */
+static int h_24x7_event_commit_txn(struct pmu *pmu)
+{
+	struct hv_24x7_request_buffer *request_buffer;
+	struct hv_24x7_data_result_buffer *result_buffer;
+	struct hv_24x7_result *res, *next_res;
+	u64 count;
+	int i, ret, txn_flags;
+	struct hv_24x7_hw *h24x7hw;
+
+	txn_flags = __this_cpu_read(hv_24x7_txn_flags);
+	WARN_ON_ONCE(!txn_flags);
+
+	ret = 0;
+	if (txn_flags & ~PERF_PMU_TXN_READ)
+		goto out;
+
+	ret = __this_cpu_read(hv_24x7_txn_err);
+	if (ret)
+		goto out;
+
+	request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+	result_buffer = (void *)get_cpu_var(hv_24x7_resb);
+
+	ret = make_24x7_request(request_buffer, result_buffer);
+	if (ret)
+		goto put_reqb;
+
+	h24x7hw = &get_cpu_var(hv_24x7_hw);
+
+	/* Go through results in the result buffer to update event counts. */
+	for (i = 0, res = result_buffer->results;
+	     i < result_buffer->num_results; i++, res = next_res) {
+		struct perf_event *event = h24x7hw->events[res->result_ix];
+
+		ret = get_count_from_result(event, result_buffer, res, &count,
+					    &next_res);
+		if (ret)
+			break;
+
+		update_event_count(event, count);
+	}
+
+	put_cpu_var(hv_24x7_hw);
+
+put_reqb:
+	put_cpu_var(hv_24x7_resb);
+	put_cpu_var(hv_24x7_reqb);
+out:
+	reset_txn();
+	return ret;
+}
+
+/*
+ * 24x7 counters only support READ transactions. They are always counting
+ * and dont need/support ADD transactions. However, regardless of type
+ * of transaction, all we need to do is cleanup, so we don't have to check
+ * the type of transaction.
+ */
+static void h_24x7_event_cancel_txn(struct pmu *pmu)
+{
+	WARN_ON_ONCE(!__this_cpu_read(hv_24x7_txn_flags));
+	reset_txn();
+}
+
+static struct pmu h_24x7_pmu = {
+	.task_ctx_nr = perf_invalid_context,
+
+	.name = "hv_24x7",
+	.attr_groups = attr_groups,
+	.event_init  = h_24x7_event_init,
+	.add         = h_24x7_event_add,
+	.del         = h_24x7_event_stop,
+	.start       = h_24x7_event_start,
+	.stop        = h_24x7_event_stop,
+	.read        = h_24x7_event_read,
+	.start_txn   = h_24x7_event_start_txn,
+	.commit_txn  = h_24x7_event_commit_txn,
+	.cancel_txn  = h_24x7_event_cancel_txn,
+	.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+};
+
+static int ppc_hv_24x7_cpu_online(unsigned int cpu)
+{
+	if (cpumask_empty(&hv_24x7_cpumask))
+		cpumask_set_cpu(cpu, &hv_24x7_cpumask);
+
+	return 0;
+}
+
+static int ppc_hv_24x7_cpu_offline(unsigned int cpu)
+{
+	int target;
+
+	/* Check if exiting cpu is used for collecting 24x7 events */
+	if (!cpumask_test_and_clear_cpu(cpu, &hv_24x7_cpumask))
+		return 0;
+
+	/* Find a new cpu to collect 24x7 events */
+	target = cpumask_last(cpu_active_mask);
+
+	if (target < 0 || target >= nr_cpu_ids) {
+		pr_err("hv_24x7: CPU hotplug init failed\n");
+		return -1;
+	}
+
+	/* Migrate 24x7 events to the new target */
+	cpumask_set_cpu(target, &hv_24x7_cpumask);
+	perf_pmu_migrate_context(&h_24x7_pmu, cpu, target);
+
+	return 0;
+}
+
+static int hv_24x7_cpu_hotplug_init(void)
+{
+	return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE,
+			  "perf/powerpc/hv_24x7:online",
+			  ppc_hv_24x7_cpu_online,
+			  ppc_hv_24x7_cpu_offline);
+}
+
+static int hv_24x7_init(void)
+{
+	int r;
+	unsigned long hret;
+	unsigned int pvr = mfspr(SPRN_PVR);
+	struct hv_perf_caps caps;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+		pr_debug("not a virtualized system, not enabling\n");
+		return -ENODEV;
+	}
+
+	/* POWER8 only supports v1, while POWER9 only supports v2. */
+	if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E ||
+	    PVR_VER(pvr) == PVR_POWER8NVL)
+		interface_version = 1;
+	else {
+		interface_version = 2;
+
+		/* SMT8 in POWER9 needs to aggregate result elements. */
+		if (threads_per_core == 8)
+			aggregate_result_elements = true;
+	}
+
+	hret = hv_perf_caps_get(&caps);
+	if (hret) {
+		pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
+				hret);
+		return -ENODEV;
+	}
+
+	hv_page_cache = kmem_cache_create("hv-page-4096", 4096, 4096, 0, NULL);
+	if (!hv_page_cache)
+		return -ENOMEM;
+
+	/* sampling not supported */
+	h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+	r = create_events_from_catalog(&event_group.attrs,
+				   &event_desc_group.attrs,
+				   &event_long_desc_group.attrs);
+
+	if (r)
+		return r;
+
+	/* init cpuhotplug */
+	r = hv_24x7_cpu_hotplug_init();
+	if (r)
+		return r;
+
+	r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
+	if (r)
+		return r;
+
+	read_24x7_sys_info();
+
+	return 0;
+}
+
+device_initcall(hv_24x7_init);
diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h
new file mode 100644
index 0000000000..ae4ae4813e
--- /dev/null
+++ b/arch/powerpc/perf/hv-24x7.h
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_POWERPC_PERF_HV_24X7_H_
+#define LINUX_POWERPC_PERF_HV_24X7_H_
+
+#include <linux/types.h>
+
+enum hv_perf_domains {
+#define DOMAIN(n, v, x, c) HV_PERF_DOMAIN_##n = v,
+#include "hv-24x7-domains.h"
+#undef DOMAIN
+	HV_PERF_DOMAIN_MAX,
+};
+
+#define H24x7_REQUEST_SIZE(iface_version)	(iface_version == 1 ? 16 : 32)
+
+struct hv_24x7_request {
+	/* PHYSICAL domains require enabling via phyp/hmc. */
+	__u8 performance_domain;
+	__u8 reserved[0x1];
+
+	/* bytes to read starting at @data_offset. must be a multiple of 8 */
+	__be16 data_size;
+
+	/*
+	 * byte offset within the perf domain to read from. must be 8 byte
+	 * aligned
+	 */
+	__be32 data_offset;
+
+	/*
+	 * only valid for VIRTUAL_PROCESSOR domains, ignored for others.
+	 * -1 means "current partition only"
+	 *  Enabling via phyp/hmc required for non-"-1" values. 0 forbidden
+	 *  unless requestor is 0.
+	 */
+	__be16 starting_lpar_ix;
+
+	/*
+	 * Ignored when @starting_lpar_ix == -1
+	 * Ignored when @performance_domain is not VIRTUAL_PROCESSOR_*
+	 * -1 means "infinite" or all
+	 */
+	__be16 max_num_lpars;
+
+	/* chip, core, or virtual processor based on @performance_domain */
+	__be16 starting_ix;
+	__be16 max_ix;
+
+	/* The following fields were added in v2 of the 24x7 interface. */
+
+	__u8 starting_thread_group_ix;
+
+	/* -1 means all thread groups starting at @starting_thread_group_ix */
+	__u8 max_num_thread_groups;
+
+	__u8 reserved2[0xE];
+} __packed;
+
+struct hv_24x7_request_buffer {
+	/* 0 - ? */
+	/* 1 - ? */
+	__u8 interface_version;
+	__u8 num_requests;
+	__u8 reserved[0xE];
+	struct hv_24x7_request requests[];
+} __packed;
+
+struct hv_24x7_result_element_v1 {
+	__be16 lpar_ix;
+
+	/*
+	 * represents the core, chip, or virtual processor based on the
+	 * request's @performance_domain
+	 */
+	__be16 domain_ix;
+
+	/* -1 if @performance_domain does not refer to a virtual processor */
+	__be32 lpar_cfg_instance_id;
+
+	/* size = @result_element_data_size of containing result. */
+	__u64 element_data[];
+} __packed;
+
+/*
+ * We need a separate struct for v2 because the offset of @element_data changed
+ * between versions.
+ */
+struct hv_24x7_result_element_v2 {
+	__be16 lpar_ix;
+
+	/*
+	 * represents the core, chip, or virtual processor based on the
+	 * request's @performance_domain
+	 */
+	__be16 domain_ix;
+
+	/* -1 if @performance_domain does not refer to a virtual processor */
+	__be32 lpar_cfg_instance_id;
+
+	__u8 thread_group_ix;
+
+	__u8 reserved[7];
+
+	/* size = @result_element_data_size of containing result. */
+	__u64 element_data[];
+} __packed;
+
+struct hv_24x7_result {
+	/*
+	 * The index of the 24x7 Request Structure in the 24x7 Request Buffer
+	 * used to request this result.
+	 */
+	__u8 result_ix;
+
+	/*
+	 * 0 = not all result elements fit into the buffer, additional requests
+	 *     required
+	 * 1 = all result elements were returned
+	 */
+	__u8 results_complete;
+	__be16 num_elements_returned;
+
+	/*
+	 * This is a copy of @data_size from the corresponding hv_24x7_request
+	 *
+	 * Warning: to obtain the size of each element in @elements you have
+	 * to add the size of the other members of the result_element struct.
+	 */
+	__be16 result_element_data_size;
+	__u8 reserved[0x2];
+
+	/*
+	 * Either
+	 *	struct hv_24x7_result_element_v1[@num_elements_returned]
+	 * or
+	 *	struct hv_24x7_result_element_v2[@num_elements_returned]
+	 *
+	 * depending on the interface_version field of the
+	 * struct hv_24x7_data_result_buffer containing this result.
+	 */
+	char elements[];
+} __packed;
+
+struct hv_24x7_data_result_buffer {
+	/* See versioning for request buffer */
+	__u8 interface_version;
+
+	__u8 num_results;
+	__u8 reserved[0x1];
+	__u8 failing_request_ix;
+	__be32 detailed_rc;
+	__be64 cec_cfg_instance_id;
+	__be64 catalog_version_num;
+	__u8 reserved2[0x8];
+	/* WARNING: only valid for the first result due to variable sizes of
+	 *	    results */
+	struct hv_24x7_result results[]; /* [@num_results] */
+} __packed;
+
+#endif
diff --git a/arch/powerpc/perf/hv-common.c b/arch/powerpc/perf/hv-common.c
new file mode 100644
index 0000000000..0370518edd
--- /dev/null
+++ b/arch/powerpc/perf/hv-common.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/io.h>
+#include <asm/hvcall.h>
+
+#include "hv-gpci.h"
+#include "hv-common.h"
+
+unsigned long hv_perf_caps_get(struct hv_perf_caps *caps)
+{
+	unsigned long r;
+	struct p {
+		struct hv_get_perf_counter_info_params params;
+		struct hv_gpci_system_performance_capabilities caps;
+	} __packed __aligned(sizeof(uint64_t));
+
+	struct p arg = {
+		.params = {
+			.counter_request = cpu_to_be32(
+				HV_GPCI_system_performance_capabilities),
+			.starting_index = cpu_to_be32(-1),
+			.counter_info_version_in = 0,
+		}
+	};
+
+	r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+			       virt_to_phys(&arg), sizeof(arg));
+
+	if (r)
+		return r;
+
+	pr_devel("capability_mask: 0x%x\n", arg.caps.capability_mask);
+
+	caps->version = arg.params.counter_info_version_out;
+	caps->collect_privileged = !!arg.caps.perf_collect_privileged;
+	caps->ga = !!(arg.caps.capability_mask & HV_GPCI_CM_GA);
+	caps->expanded = !!(arg.caps.capability_mask & HV_GPCI_CM_EXPANDED);
+	caps->lab = !!(arg.caps.capability_mask & HV_GPCI_CM_LAB);
+
+	return r;
+}
diff --git a/arch/powerpc/perf/hv-common.h b/arch/powerpc/perf/hv-common.h
new file mode 100644
index 0000000000..2cce17bc32
--- /dev/null
+++ b/arch/powerpc/perf/hv-common.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_POWERPC_PERF_HV_COMMON_H_
+#define LINUX_POWERPC_PERF_HV_COMMON_H_
+
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+struct hv_perf_caps {
+	u16 version;
+	u16 collect_privileged:1,
+	    ga:1,
+	    expanded:1,
+	    lab:1,
+	    unused:12;
+};
+
+unsigned long hv_perf_caps_get(struct hv_perf_caps *caps);
+
+
+#define EVENT_DEFINE_RANGE_FORMAT(name, attr_var, bit_start, bit_end)	\
+PMU_FORMAT_ATTR(name, #attr_var ":" #bit_start "-" #bit_end);		\
+EVENT_DEFINE_RANGE(name, attr_var, bit_start, bit_end)
+
+/*
+ * The EVENT_DEFINE_RANGE_FORMAT() macro above includes helper functions
+ * for the fields (eg: event_get_starting_index()). For some fields we
+ * need the bit-range definition, but no the helper functions. Define a
+ * lite version of the above macro without the helpers and silence
+ * compiler warnings unused static functions.
+ */
+#define EVENT_DEFINE_RANGE_FORMAT_LITE(name, attr_var, bit_start, bit_end) \
+PMU_FORMAT_ATTR(name, #attr_var ":" #bit_start "-" #bit_end);
+
+#define EVENT_DEFINE_RANGE(name, attr_var, bit_start, bit_end)	\
+static u64 event_get_##name##_max(void)					\
+{									\
+	BUILD_BUG_ON((bit_start > bit_end)				\
+		    || (bit_end >= (sizeof(1ull) * 8)));		\
+	return (((1ull << (bit_end - bit_start)) - 1) << 1) + 1;	\
+}									\
+static u64 event_get_##name(struct perf_event *event)			\
+{									\
+	return (event->attr.attr_var >> (bit_start)) &			\
+		event_get_##name##_max();				\
+}
+
+#endif
diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h
new file mode 100644
index 0000000000..5e86371a20
--- /dev/null
+++ b/arch/powerpc/perf/hv-gpci-requests.h
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "req-gen/_begin.h"
+
+/*
+ * Based on the document "getPerfCountInfo v1.07"
+ */
+
+/*
+ * #define REQUEST_NAME counter_request_name
+ * #define REQUEST_NUM r_num
+ * #define REQUEST_IDX_KIND starting_index_kind
+ * #include I(REQUEST_BEGIN)
+ * REQUEST(
+ *	__field(...)
+ *	__field(...)
+ *	__array(...)
+ *	__count(...)
+ * )
+ * #include I(REQUEST_END)
+ *
+ * - starting_index_kind is one of the following, depending on the event:
+ *
+ *   hw_chip_id: hardware chip id or -1 for current hw chip
+ *   partition_id
+ *   sibling_part_id,
+ *   phys_processor_idx:
+ *   0xffffffffffffffff: or -1, which means it is irrelavant for the event
+ *
+ * __count(offset, bytes, name):
+ *	a counter that should be exposed via perf
+ * __field(offset, bytes, name)
+ *	a normal field
+ * __array(offset, bytes, name)
+ *	an array of bytes
+ *
+ *
+ *	@bytes for __count, and __field _must_ be a numeral token
+ *	in decimal, not an expression and not in hex.
+ *
+ *
+ * TODO:
+ *	- expose secondary index (if any counter ever uses it, only 0xA0
+ *	  appears to use it right now, and it doesn't have any counters)
+ *	- embed versioning info
+ *	- include counter descriptions
+ */
+#define REQUEST_NAME dispatch_timebase_by_processor
+#define REQUEST_NUM 0x10
+#define REQUEST_IDX_KIND "phys_processor_idx=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__count(0,	8,	processor_time_in_timebase_cycles)
+	__field(0x8,	4,	hw_processor_id)
+	__field(0xC,	2,	owning_part_id)
+	__field(0xE,	1,	processor_state)
+	__field(0xF,	1,	version)
+	__field(0x10,	4,	hw_chip_id)
+	__field(0x14,	4,	phys_module_id)
+	__field(0x18,	4,	primary_affinity_domain_idx)
+	__field(0x1C,	4,	secondary_affinity_domain_idx)
+	__field(0x20,	4,	processor_version)
+	__field(0x24,	2,	logical_processor_idx)
+	__field(0x26,	2,	reserved)
+	__field(0x28,	4,	processor_id_register)
+	__field(0x2C,	4,	phys_processor_idx)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME entitled_capped_uncapped_donated_idle_timebase_by_partition
+#define REQUEST_NUM 0x20
+#define REQUEST_IDX_KIND "sibling_part_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	8,	partition_id)
+	__count(0x8,	8,	entitled_cycles)
+	__count(0x10,	8,	consumed_capped_cycles)
+	__count(0x18,	8,	consumed_uncapped_cycles)
+	__count(0x20,	8,	cycles_donated)
+	__count(0x28,	8,	purr_idle_cycles)
+)
+#include I(REQUEST_END)
+
+#ifdef ENABLE_EVENTS_COUNTERINFO_V6
+/*
+ * Not available for counter_info_version >= 0x8, use
+ * run_instruction_cycles_by_partition(0x100) instead.
+ */
+#define REQUEST_NAME run_instructions_run_cycles_by_partition
+#define REQUEST_NUM 0x30
+#define REQUEST_IDX_KIND "sibling_part_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	8,	partition_id)
+	__count(0x8,	8,	instructions_completed)
+	__count(0x10,	8,	cycles)
+)
+#include I(REQUEST_END)
+#endif
+
+#define REQUEST_NAME system_performance_capabilities
+#define REQUEST_NUM 0x40
+#define REQUEST_IDX_KIND "starting_index=0xffffffff"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	1,	perf_collect_privileged)
+	__field(0x1,	1,	capability_mask)
+	__array(0x2,	0xE,	reserved)
+)
+#include I(REQUEST_END)
+
+#ifdef ENABLE_EVENTS_COUNTERINFO_V6
+#define REQUEST_NAME processor_bus_utilization_abc_links
+#define REQUEST_NUM 0x50
+#define REQUEST_IDX_KIND "hw_chip_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	4,	hw_chip_id)
+	__array(0x4,	0xC,	reserved1)
+	__count(0x10,	8,	total_link_cycles)
+	__count(0x18,	8,	idle_cycles_for_a_link)
+	__count(0x20,	8,	idle_cycles_for_b_link)
+	__count(0x28,	8,	idle_cycles_for_c_link)
+	__array(0x30,	0x20,	reserved2)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME processor_bus_utilization_wxyz_links
+#define REQUEST_NUM 0x60
+#define REQUEST_IDX_KIND "hw_chip_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	4,	hw_chip_id)
+	__array(0x4,	0xC,	reserved1)
+	__count(0x10,	8,	total_link_cycles)
+	__count(0x18,	8,	idle_cycles_for_w_link)
+	__count(0x20,	8,	idle_cycles_for_x_link)
+	__count(0x28,	8,	idle_cycles_for_y_link)
+	__count(0x30,	8,	idle_cycles_for_z_link)
+	__array(0x38,	0x28,	reserved2)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME processor_bus_utilization_gx_links
+#define REQUEST_NUM 0x70
+#define REQUEST_IDX_KIND "hw_chip_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	4,	hw_chip_id)
+	__array(0x4,	0xC,	reserved1)
+	__count(0x10,	8,	gx0_in_address_cycles)
+	__count(0x18,	8,	gx0_in_data_cycles)
+	__count(0x20,	8,	gx0_in_retries)
+	__count(0x28,	8,	gx0_in_bus_cycles)
+	__count(0x30,	8,	gx0_in_cycles_total)
+	__count(0x38,	8,	gx0_out_address_cycles)
+	__count(0x40,	8,	gx0_out_data_cycles)
+	__count(0x48,	8,	gx0_out_retries)
+	__count(0x50,	8,	gx0_out_bus_cycles)
+	__count(0x58,	8,	gx0_out_cycles_total)
+	__count(0x60,	8,	gx1_in_address_cycles)
+	__count(0x68,	8,	gx1_in_data_cycles)
+	__count(0x70,	8,	gx1_in_retries)
+	__count(0x78,	8,	gx1_in_bus_cycles)
+	__count(0x80,	8,	gx1_in_cycles_total)
+	__count(0x88,	8,	gx1_out_address_cycles)
+	__count(0x90,	8,	gx1_out_data_cycles)
+	__count(0x98,	8,	gx1_out_retries)
+	__count(0xA0,	8,	gx1_out_bus_cycles)
+	__count(0xA8,	8,	gx1_out_cycles_total)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME processor_bus_utilization_mc_links
+#define REQUEST_NUM 0x80
+#define REQUEST_IDX_KIND "hw_chip_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	4,	hw_chip_id)
+	__array(0x4,	0xC,	reserved1)
+	__count(0x10,	8,	mc0_frames)
+	__count(0x18,	8,	mc0_reads)
+	__count(0x20,	8,	mc0_write)
+	__count(0x28,	8,	mc0_total_cycles)
+	__count(0x30,	8,	mc1_frames)
+	__count(0x38,	8,	mc1_reads)
+	__count(0x40,	8,	mc1_writes)
+	__count(0x48,	8,	mc1_total_cycles)
+)
+#include I(REQUEST_END)
+
+/* Processor_config (0x90) skipped, no counters */
+/* Current_processor_frequency (0x91) skipped, no counters */
+
+#define REQUEST_NAME processor_core_utilization
+#define REQUEST_NUM 0x94
+#define REQUEST_IDX_KIND "phys_processor_idx=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	4,	phys_processor_idx)
+	__field(0x4,	4,	hw_processor_id)
+	__count(0x8,	8,	cycles_across_any_thread)
+	__count(0x10,	8,	timebase_at_collection)
+	__count(0x18,	8,	purr_cycles)
+	__count(0x20,	8,	sum_of_cycles_across_all_threads)
+	__count(0x28,	8,	instructions_completed)
+)
+#include I(REQUEST_END)
+#endif
+
+/* Processor_core_power_mode (0x95) skipped, no counters */
+/* Affinity_domain_information_by_virtual_processor (0xA0) skipped,
+ *	no counters */
+/* Affinity_domain_information_by_domain (0xB0) skipped, no counters */
+/* Affinity_domain_information_by_partition (0xB1) skipped, no counters */
+/* Physical_memory_info (0xC0) skipped, no counters */
+/* Processor_bus_topology (0xD0) skipped, no counters */
+
+#define REQUEST_NAME partition_hypervisor_queuing_times
+#define REQUEST_NUM 0xE0
+#define REQUEST_IDX_KIND "partition_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	2, partition_id)
+	__array(0x2,	6, reserved1)
+	__count(0x8,	8, time_waiting_for_entitlement)
+	__count(0x10,	8, times_waited_for_entitlement)
+	__count(0x18,	8, time_waiting_for_phys_processor)
+	__count(0x20,	8, times_waited_for_phys_processor)
+	__count(0x28,	8, dispatches_on_home_core)
+	__count(0x30,	8, dispatches_on_home_primary_affinity_domain)
+	__count(0x38,	8, dispatches_on_home_secondary_affinity_domain)
+	__count(0x40,	8, dispatches_off_home_secondary_affinity_domain)
+	__count(0x48,	8, dispatches_on_dedicated_processor_donating_cycles)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME system_hypervisor_times
+#define REQUEST_NUM 0xF0
+#define REQUEST_IDX_KIND "starting_index=0xffffffff"
+#include I(REQUEST_BEGIN)
+REQUEST(__count(0,	8,	time_spent_to_dispatch_virtual_processors)
+	__count(0x8,	8,	time_spent_processing_virtual_processor_timers)
+	__count(0x10,	8,	time_spent_managing_partitions_over_entitlement)
+	__count(0x18,	8,	time_spent_on_system_management)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME system_tlbie_count_and_time
+#define REQUEST_NUM 0xF4
+#define REQUEST_IDX_KIND "starting_index=0xffffffff"
+#include I(REQUEST_BEGIN)
+REQUEST(__count(0,	8,	tlbie_instructions_issued)
+	/*
+	 * FIXME: The spec says the offset here is 0x10, which I suspect
+	 *	  is wrong.
+	 */
+	__count(0x8,	8,	time_spent_issuing_tlbies)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME partition_instruction_count_and_time
+#define REQUEST_NUM 0x100
+#define REQUEST_IDX_KIND "partition_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	2,	partition_id)
+	__array(0x2,	0x6,	reserved1)
+	__count(0x8,	8,	instructions_performed)
+	__count(0x10,	8,	time_collected)
+)
+#include I(REQUEST_END)
+
+/* set_mmcrh (0x80001000) skipped, no counters */
+/* retrieve_hpmcx (0x80002000) skipped, no counters */
+
+#include "req-gen/_end.h"
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
new file mode 100644
index 0000000000..27f18119fd
--- /dev/null
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -0,0 +1,1030 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Hypervisor supplied "gpci" ("get performance counter info") performance
+ * counter support
+ *
+ * Author: Cody P Schafer <cody@linux.vnet.ibm.com>
+ * Copyright 2014 IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "hv-gpci: " fmt
+
+#include <linux/init.h>
+#include <linux/perf_event.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/io.h>
+
+#include "hv-gpci.h"
+#include "hv-common.h"
+
+/*
+ * Example usage:
+ *  perf stat -e 'hv_gpci/counter_info_version=3,offset=0,length=8,
+ *		  secondary_index=0,starting_index=0xffffffff,request=0x10/' ...
+ */
+
+/* u32 */
+EVENT_DEFINE_RANGE_FORMAT(request, config, 0, 31);
+/* u32 */
+/*
+ * Note that starting_index, phys_processor_idx, sibling_part_id,
+ * hw_chip_id, partition_id all refer to the same bit range. They
+ * are basically aliases for the starting_index. The specific alias
+ * used depends on the event. See REQUEST_IDX_KIND in hv-gpci-requests.h
+ */
+EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 32, 63);
+EVENT_DEFINE_RANGE_FORMAT_LITE(phys_processor_idx, config, 32, 63);
+EVENT_DEFINE_RANGE_FORMAT_LITE(sibling_part_id, config, 32, 63);
+EVENT_DEFINE_RANGE_FORMAT_LITE(hw_chip_id, config, 32, 63);
+EVENT_DEFINE_RANGE_FORMAT_LITE(partition_id, config, 32, 63);
+
+/* u16 */
+EVENT_DEFINE_RANGE_FORMAT(secondary_index, config1, 0, 15);
+/* u8 */
+EVENT_DEFINE_RANGE_FORMAT(counter_info_version, config1, 16, 23);
+/* u8, bytes of data (1-8) */
+EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31);
+/* u32, byte offset */
+EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63);
+
+static cpumask_t hv_gpci_cpumask;
+
+static struct attribute *format_attrs[] = {
+	&format_attr_request.attr,
+	&format_attr_starting_index.attr,
+	&format_attr_phys_processor_idx.attr,
+	&format_attr_sibling_part_id.attr,
+	&format_attr_hw_chip_id.attr,
+	&format_attr_partition_id.attr,
+	&format_attr_secondary_index.attr,
+	&format_attr_counter_info_version.attr,
+
+	&format_attr_offset.attr,
+	&format_attr_length.attr,
+	NULL,
+};
+
+static const struct attribute_group format_group = {
+	.name = "format",
+	.attrs = format_attrs,
+};
+
+static struct attribute_group event_group = {
+	.name  = "events",
+	/* .attrs is set in init */
+};
+
+#define HV_CAPS_ATTR(_name, _format)				\
+static ssize_t _name##_show(struct device *dev,			\
+			    struct device_attribute *attr,	\
+			    char *page)				\
+{								\
+	struct hv_perf_caps caps;				\
+	unsigned long hret = hv_perf_caps_get(&caps);		\
+	if (hret)						\
+		return -EIO;					\
+								\
+	return sprintf(page, _format, caps._name);		\
+}								\
+static struct device_attribute hv_caps_attr_##_name = __ATTR_RO(_name)
+
+static ssize_t kernel_version_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *page)
+{
+	return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT);
+}
+
+static ssize_t cpumask_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask);
+}
+
+/* Interface attribute array index to store system information */
+#define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR	6
+#define INTERFACE_PROCESSOR_CONFIG_ATTR		7
+#define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR	8
+#define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR	9
+#define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR	10
+#define INTERFACE_NULL_ATTR			11
+
+/* Counter request value to retrieve system information */
+enum {
+	PROCESSOR_BUS_TOPOLOGY,
+	PROCESSOR_CONFIG,
+	AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */
+	AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */
+	AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */
+};
+
+static int sysinfo_counter_request[] = {
+	[PROCESSOR_BUS_TOPOLOGY] = 0xD0,
+	[PROCESSOR_CONFIG] = 0x90,
+	[AFFINITY_DOMAIN_VIA_VP] = 0xA0,
+	[AFFINITY_DOMAIN_VIA_DOM] = 0xB0,
+	[AFFINITY_DOMAIN_VIA_PAR] = 0xB1,
+};
+
+static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
+
+static unsigned long systeminfo_gpci_request(u32 req, u32 starting_index,
+			u16 secondary_index, char *buf,
+			size_t *n, struct hv_gpci_request_buffer *arg)
+{
+	unsigned long ret;
+	size_t i, j;
+
+	arg->params.counter_request = cpu_to_be32(req);
+	arg->params.starting_index = cpu_to_be32(starting_index);
+	arg->params.secondary_index = cpu_to_be16(secondary_index);
+
+	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+	/*
+	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
+	 * which means that the current buffer size cannot accommodate
+	 * all the information and a partial buffer returned.
+	 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
+	 *
+	 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
+	 * performance information, and required to set
+	 * "Enable Performance Information Collection" option.
+	 */
+	if (ret == H_AUTHORITY)
+		return -EPERM;
+
+	/*
+	 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
+	 * because of invalid buffer-length/address or due to some hardware
+	 * error.
+	 */
+	if (ret && (ret != H_PARAMETER))
+		return -EIO;
+
+	/*
+	 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
+	 * to show the total number of counter_value array elements
+	 * returned via hcall.
+	 * hcall also populates 'cv_element_size' corresponds to individual
+	 * counter_value array element size. Below loop go through all
+	 * counter_value array elements as per their size and add it to
+	 * the output buffer.
+	 */
+	for (i = 0; i < be16_to_cpu(arg->params.returned_values); i++) {
+		j = i * be16_to_cpu(arg->params.cv_element_size);
+
+		for (; j < (i + 1) * be16_to_cpu(arg->params.cv_element_size); j++)
+			*n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[j]);
+		*n += sprintf(buf + *n,  "\n");
+	}
+
+	if (*n >= PAGE_SIZE) {
+		pr_info("System information exceeds PAGE_SIZE\n");
+		return -EFBIG;
+	}
+
+	return ret;
+}
+
+static ssize_t processor_bus_topology_show(struct device *dev, struct device_attribute *attr,
+				char *buf)
+{
+	struct hv_gpci_request_buffer *arg;
+	unsigned long ret;
+	size_t n = 0;
+
+	arg = (void *)get_cpu_var(hv_gpci_reqb);
+	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+	/*
+	 * Pass the counter request value 0xD0 corresponds to request
+	 * type 'Processor_bus_topology', to retrieve
+	 * the system topology information.
+	 * starting_index value implies the starting hardware
+	 * chip id.
+	 */
+	ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
+			0, 0, buf, &n, arg);
+
+	if (!ret)
+		return n;
+
+	if (ret != H_PARAMETER)
+		goto out;
+
+	/*
+	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+	 * implies that buffer can't accommodate all information, and a partial buffer
+	 * returned. To handle that, we need to make subsequent requests
+	 * with next starting index to retrieve additional (missing) data.
+	 * Below loop do subsequent hcalls with next starting index and add it
+	 * to buffer util we get all the information.
+	 */
+	while (ret == H_PARAMETER) {
+		int returned_values = be16_to_cpu(arg->params.returned_values);
+		int elementsize = be16_to_cpu(arg->params.cv_element_size);
+		int last_element = (returned_values - 1) * elementsize;
+
+		/*
+		 * Since the starting index value is part of counter_value
+		 * buffer elements, use the starting index value in the last
+		 * element and add 1 to make subsequent hcalls.
+		 */
+		u32 starting_index = arg->bytes[last_element + 3] +
+				(arg->bytes[last_element + 2] << 8) +
+				(arg->bytes[last_element + 1] << 16) +
+				(arg->bytes[last_element] << 24) + 1;
+
+		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+		ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
+				starting_index, 0, buf, &n, arg);
+
+		if (!ret)
+			return n;
+
+		if (ret != H_PARAMETER)
+			goto out;
+	}
+
+	return n;
+
+out:
+	put_cpu_var(hv_gpci_reqb);
+	return ret;
+}
+
+static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr,
+					char *buf)
+{
+	struct hv_gpci_request_buffer *arg;
+	unsigned long ret;
+	size_t n = 0;
+
+	arg = (void *)get_cpu_var(hv_gpci_reqb);
+	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+	/*
+	 * Pass the counter request value 0x90 corresponds to request
+	 * type 'Processor_config', to retrieve
+	 * the system processor information.
+	 * starting_index value implies the starting hardware
+	 * processor index.
+	 */
+	ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
+			0, 0, buf, &n, arg);
+
+	if (!ret)
+		return n;
+
+	if (ret != H_PARAMETER)
+		goto out;
+
+	/*
+	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+	 * implies that buffer can't accommodate all information, and a partial buffer
+	 * returned. To handle that, we need to take subsequent requests
+	 * with next starting index to retrieve additional (missing) data.
+	 * Below loop do subsequent hcalls with next starting index and add it
+	 * to buffer util we get all the information.
+	 */
+	while (ret == H_PARAMETER) {
+		int returned_values = be16_to_cpu(arg->params.returned_values);
+		int elementsize = be16_to_cpu(arg->params.cv_element_size);
+		int last_element = (returned_values - 1) * elementsize;
+
+		/*
+		 * Since the starting index is part of counter_value
+		 * buffer elements, use the starting index value in the last
+		 * element and add 1 to subsequent hcalls.
+		 */
+		u32 starting_index = arg->bytes[last_element + 3] +
+				(arg->bytes[last_element + 2] << 8) +
+				(arg->bytes[last_element + 1] << 16) +
+				(arg->bytes[last_element] << 24) + 1;
+
+		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+		ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
+				starting_index, 0, buf, &n, arg);
+
+		if (!ret)
+			return n;
+
+		if (ret != H_PARAMETER)
+			goto out;
+	}
+
+	return n;
+
+out:
+	put_cpu_var(hv_gpci_reqb);
+	return ret;
+}
+
+static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct hv_gpci_request_buffer *arg;
+	unsigned long ret;
+	size_t n = 0;
+
+	arg = (void *)get_cpu_var(hv_gpci_reqb);
+	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+	/*
+	 * Pass the counter request 0xA0 corresponds to request
+	 * type 'Affinity_domain_information_by_virutal_processor',
+	 * to retrieve the system affinity domain information.
+	 * starting_index value refers to the starting hardware
+	 * processor index.
+	 */
+	ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
+			0, 0, buf, &n, arg);
+
+	if (!ret)
+		return n;
+
+	if (ret != H_PARAMETER)
+		goto out;
+
+	/*
+	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+	 * implies that buffer can't accommodate all information, and a partial buffer
+	 * returned. To handle that, we need to take subsequent requests
+	 * with next secondary index to retrieve additional (missing) data.
+	 * Below loop do subsequent hcalls with next secondary index and add it
+	 * to buffer util we get all the information.
+	 */
+	while (ret == H_PARAMETER) {
+		int returned_values = be16_to_cpu(arg->params.returned_values);
+		int elementsize = be16_to_cpu(arg->params.cv_element_size);
+		int last_element = (returned_values - 1) * elementsize;
+
+		/*
+		 * Since the starting index and secondary index type is part of the
+		 * counter_value buffer elements, use the starting index value in the
+		 * last array element as subsequent starting index, and use secondary index
+		 * value in the last array element plus 1 as subsequent secondary index.
+		 * For counter request '0xA0', starting index points to partition id
+		 * and secondary index points to corresponding virtual processor index.
+		 */
+		u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8);
+		u16 secondary_index = arg->bytes[last_element + 3] +
+				(arg->bytes[last_element + 2] << 8) + 1;
+
+		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+		ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
+				starting_index, secondary_index, buf, &n, arg);
+
+		if (!ret)
+			return n;
+
+		if (ret != H_PARAMETER)
+			goto out;
+	}
+
+	return n;
+
+out:
+	put_cpu_var(hv_gpci_reqb);
+	return ret;
+}
+
+static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr,
+						char *buf)
+{
+	struct hv_gpci_request_buffer *arg;
+	unsigned long ret;
+	size_t n = 0;
+
+	arg = (void *)get_cpu_var(hv_gpci_reqb);
+	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+	/*
+	 * Pass the counter request 0xB0 corresponds to request
+	 * type 'Affinity_domain_information_by_domain',
+	 * to retrieve the system affinity domain information.
+	 * starting_index value refers to the starting hardware
+	 * processor index.
+	 */
+	ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
+			0, 0, buf, &n, arg);
+
+	if (!ret)
+		return n;
+
+	if (ret != H_PARAMETER)
+		goto out;
+
+	/*
+	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+	 * implies that buffer can't accommodate all information, and a partial buffer
+	 * returned. To handle that, we need to take subsequent requests
+	 * with next starting index to retrieve additional (missing) data.
+	 * Below loop do subsequent hcalls with next starting index and add it
+	 * to buffer util we get all the information.
+	 */
+	while (ret == H_PARAMETER) {
+		int returned_values = be16_to_cpu(arg->params.returned_values);
+		int elementsize = be16_to_cpu(arg->params.cv_element_size);
+		int last_element = (returned_values - 1) * elementsize;
+
+		/*
+		 * Since the starting index value is part of counter_value
+		 * buffer elements, use the starting index value in the last
+		 * element and add 1 to make subsequent hcalls.
+		 */
+		u32 starting_index = arg->bytes[last_element + 1] +
+			(arg->bytes[last_element] << 8) + 1;
+
+		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+		ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
+					starting_index, 0, buf, &n, arg);
+
+		if (!ret)
+			return n;
+
+		if (ret != H_PARAMETER)
+			goto out;
+	}
+
+	return n;
+
+out:
+	put_cpu_var(hv_gpci_reqb);
+	return ret;
+}
+
+static void affinity_domain_via_partition_result_parse(int returned_values,
+			int element_size, char *buf, size_t *last_element,
+			size_t *n, struct hv_gpci_request_buffer *arg)
+{
+	size_t i = 0, j = 0;
+	size_t k, l, m;
+	uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele;
+
+	/*
+	 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
+	 * to show the total number of counter_value array elements
+	 * returned via hcall.
+	 * Unlike other request types, the data structure returned by this
+	 * request is variable-size. For this counter request type,
+	 * hcall populates 'cv_element_size' corresponds to minimum size of
+	 * the structure returned i.e; the size of the structure with no domain
+	 * information. Below loop go through all counter_value array
+	 * to determine the number and size of each domain array element and
+	 * add it to the output buffer.
+	 */
+	while (i < returned_values) {
+		k = j;
+		for (; k < j + element_size; k++)
+			*n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[k]);
+		*n += sprintf(buf + *n,  "\n");
+
+		total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3];
+		size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1];
+
+		for (l = 0; l < total_affinity_domain_ele; l++) {
+			for (m = 0; m < size_of_each_affinity_domain_ele; m++) {
+				*n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[k]);
+				k++;
+			}
+			*n += sprintf(buf + *n,  "\n");
+		}
+
+		*n += sprintf(buf + *n,  "\n");
+		i++;
+		j = k;
+	}
+
+	*last_element = k;
+}
+
+static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr,
+							char *buf)
+{
+	struct hv_gpci_request_buffer *arg;
+	unsigned long ret;
+	size_t n = 0;
+	size_t last_element = 0;
+	u32 starting_index;
+
+	arg = (void *)get_cpu_var(hv_gpci_reqb);
+	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+	/*
+	 * Pass the counter request value 0xB1 corresponds to counter request
+	 * type 'Affinity_domain_information_by_partition',
+	 * to retrieve the system affinity domain by partition information.
+	 * starting_index value refers to the starting hardware
+	 * processor index.
+	 */
+	arg->params.counter_request = cpu_to_be32(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
+	arg->params.starting_index = cpu_to_be32(0);
+
+	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+	if (!ret)
+		goto parse_result;
+
+	if (ret && (ret != H_PARAMETER))
+		goto out;
+
+	/*
+	 * ret value as 'H_PARAMETER' implies that the current buffer size
+	 * can't accommodate all the information, and a partial buffer
+	 * returned. To handle that, we need to make subsequent requests
+	 * with next starting index to retrieve additional (missing) data.
+	 * Below loop do subsequent hcalls with next starting index and add it
+	 * to buffer util we get all the information.
+	 */
+	while (ret == H_PARAMETER) {
+		affinity_domain_via_partition_result_parse(
+			be16_to_cpu(arg->params.returned_values) - 1,
+			be16_to_cpu(arg->params.cv_element_size), buf,
+			&last_element, &n, arg);
+
+		if (n >= PAGE_SIZE) {
+			put_cpu_var(hv_gpci_reqb);
+			pr_debug("System information exceeds PAGE_SIZE\n");
+			return -EFBIG;
+		}
+
+		/*
+		 * Since the starting index value is part of counter_value
+		 * buffer elements, use the starting_index value in the last
+		 * element and add 1 to make subsequent hcalls.
+		 */
+		starting_index = (u8)arg->bytes[last_element] << 8 |
+				(u8)arg->bytes[last_element + 1];
+
+		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+		arg->params.counter_request = cpu_to_be32(
+				sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
+		arg->params.starting_index = cpu_to_be32(starting_index);
+
+		ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+				virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+		if (ret && (ret != H_PARAMETER))
+			goto out;
+	}
+
+parse_result:
+	affinity_domain_via_partition_result_parse(
+		be16_to_cpu(arg->params.returned_values),
+		be16_to_cpu(arg->params.cv_element_size),
+		buf, &last_element, &n, arg);
+
+	put_cpu_var(hv_gpci_reqb);
+	return n;
+
+out:
+	put_cpu_var(hv_gpci_reqb);
+
+	/*
+	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
+	 * which means that the current buffer size cannot accommodate
+	 * all the information and a partial buffer returned.
+	 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
+	 *
+	 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
+	 * performance information, and required to set
+	 * "Enable Performance Information Collection" option.
+	 */
+	if (ret == H_AUTHORITY)
+		return -EPERM;
+
+	/*
+	 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
+	 * because of invalid buffer-length/address or due to some hardware
+	 * error.
+	 */
+	return -EIO;
+}
+
+static DEVICE_ATTR_RO(kernel_version);
+static DEVICE_ATTR_RO(cpumask);
+
+HV_CAPS_ATTR(version, "0x%x\n");
+HV_CAPS_ATTR(ga, "%d\n");
+HV_CAPS_ATTR(expanded, "%d\n");
+HV_CAPS_ATTR(lab, "%d\n");
+HV_CAPS_ATTR(collect_privileged, "%d\n");
+
+static struct attribute *interface_attrs[] = {
+	&dev_attr_kernel_version.attr,
+	&hv_caps_attr_version.attr,
+	&hv_caps_attr_ga.attr,
+	&hv_caps_attr_expanded.attr,
+	&hv_caps_attr_lab.attr,
+	&hv_caps_attr_collect_privileged.attr,
+	/*
+	 * This NULL is a placeholder for the processor_bus_topology
+	 * attribute, set in init function if applicable.
+	 */
+	NULL,
+	/*
+	 * This NULL is a placeholder for the processor_config
+	 * attribute, set in init function if applicable.
+	 */
+	NULL,
+	/*
+	 * This NULL is a placeholder for the affinity_domain_via_virtual_processor
+	 * attribute, set in init function if applicable.
+	 */
+	NULL,
+	/*
+	 * This NULL is a placeholder for the affinity_domain_via_domain
+	 * attribute, set in init function if applicable.
+	 */
+	NULL,
+	/*
+	 * This NULL is a placeholder for the affinity_domain_via_partition
+	 * attribute, set in init function if applicable.
+	 */
+	NULL,
+	NULL,
+};
+
+static struct attribute *cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static const struct attribute_group cpumask_attr_group = {
+	.attrs = cpumask_attrs,
+};
+
+static const struct attribute_group interface_group = {
+	.name = "interface",
+	.attrs = interface_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+	&format_group,
+	&event_group,
+	&interface_group,
+	&cpumask_attr_group,
+	NULL,
+};
+
+static unsigned long single_gpci_request(u32 req, u32 starting_index,
+		u16 secondary_index, u8 version_in, u32 offset, u8 length,
+		u64 *value)
+{
+	unsigned long ret;
+	size_t i;
+	u64 count;
+	struct hv_gpci_request_buffer *arg;
+
+	arg = (void *)get_cpu_var(hv_gpci_reqb);
+	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+	arg->params.counter_request = cpu_to_be32(req);
+	arg->params.starting_index = cpu_to_be32(starting_index);
+	arg->params.secondary_index = cpu_to_be16(secondary_index);
+	arg->params.counter_info_version_in = version_in;
+
+	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+	if (ret) {
+		pr_devel("hcall failed: 0x%lx\n", ret);
+		goto out;
+	}
+
+	/*
+	 * we verify offset and length are within the zeroed buffer at event
+	 * init.
+	 */
+	count = 0;
+	for (i = offset; i < offset + length; i++)
+		count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8);
+
+	*value = count;
+out:
+	put_cpu_var(hv_gpci_reqb);
+	return ret;
+}
+
+static u64 h_gpci_get_value(struct perf_event *event)
+{
+	u64 count;
+	unsigned long ret = single_gpci_request(event_get_request(event),
+					event_get_starting_index(event),
+					event_get_secondary_index(event),
+					event_get_counter_info_version(event),
+					event_get_offset(event),
+					event_get_length(event),
+					&count);
+	if (ret)
+		return 0;
+	return count;
+}
+
+static void h_gpci_event_update(struct perf_event *event)
+{
+	s64 prev;
+	u64 now = h_gpci_get_value(event);
+	prev = local64_xchg(&event->hw.prev_count, now);
+	local64_add(now - prev, &event->count);
+}
+
+static void h_gpci_event_start(struct perf_event *event, int flags)
+{
+	local64_set(&event->hw.prev_count, h_gpci_get_value(event));
+}
+
+static void h_gpci_event_stop(struct perf_event *event, int flags)
+{
+	h_gpci_event_update(event);
+}
+
+static int h_gpci_event_add(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_START)
+		h_gpci_event_start(event, flags);
+
+	return 0;
+}
+
+static int h_gpci_event_init(struct perf_event *event)
+{
+	u64 count;
+	u8 length;
+
+	/* Not our event */
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* config2 is unused */
+	if (event->attr.config2) {
+		pr_devel("config2 set when reserved\n");
+		return -EINVAL;
+	}
+
+	/* no branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	length = event_get_length(event);
+	if (length < 1 || length > 8) {
+		pr_devel("length invalid\n");
+		return -EINVAL;
+	}
+
+	/* last byte within the buffer? */
+	if ((event_get_offset(event) + length) > HGPCI_MAX_DATA_BYTES) {
+		pr_devel("request outside of buffer: %zu > %zu\n",
+				(size_t)event_get_offset(event) + length,
+				HGPCI_MAX_DATA_BYTES);
+		return -EINVAL;
+	}
+
+	/* check if the request works... */
+	if (single_gpci_request(event_get_request(event),
+				event_get_starting_index(event),
+				event_get_secondary_index(event),
+				event_get_counter_info_version(event),
+				event_get_offset(event),
+				length,
+				&count)) {
+		pr_devel("gpci hcall failed\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct pmu h_gpci_pmu = {
+	.task_ctx_nr = perf_invalid_context,
+
+	.name = "hv_gpci",
+	.attr_groups = attr_groups,
+	.event_init  = h_gpci_event_init,
+	.add         = h_gpci_event_add,
+	.del         = h_gpci_event_stop,
+	.start       = h_gpci_event_start,
+	.stop        = h_gpci_event_stop,
+	.read        = h_gpci_event_update,
+	.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+};
+
+static int ppc_hv_gpci_cpu_online(unsigned int cpu)
+{
+	if (cpumask_empty(&hv_gpci_cpumask))
+		cpumask_set_cpu(cpu, &hv_gpci_cpumask);
+
+	return 0;
+}
+
+static int ppc_hv_gpci_cpu_offline(unsigned int cpu)
+{
+	int target;
+
+	/* Check if exiting cpu is used for collecting gpci events */
+	if (!cpumask_test_and_clear_cpu(cpu, &hv_gpci_cpumask))
+		return 0;
+
+	/* Find a new cpu to collect gpci events */
+	target = cpumask_last(cpu_active_mask);
+
+	if (target < 0 || target >= nr_cpu_ids) {
+		pr_err("hv_gpci: CPU hotplug init failed\n");
+		return -1;
+	}
+
+	/* Migrate gpci events to the new target */
+	cpumask_set_cpu(target, &hv_gpci_cpumask);
+	perf_pmu_migrate_context(&h_gpci_pmu, cpu, target);
+
+	return 0;
+}
+
+static int hv_gpci_cpu_hotplug_init(void)
+{
+	return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE,
+			  "perf/powerpc/hv_gcpi:online",
+			  ppc_hv_gpci_cpu_online,
+			  ppc_hv_gpci_cpu_offline);
+}
+
+static struct device_attribute *sysinfo_device_attr_create(int
+		sysinfo_interface_group_index, u32 req)
+{
+	struct device_attribute *attr = NULL;
+	unsigned long ret;
+	struct hv_gpci_request_buffer *arg;
+
+	if (sysinfo_interface_group_index < INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR ||
+			sysinfo_interface_group_index >= INTERFACE_NULL_ATTR) {
+		pr_info("Wrong interface group index for system information\n");
+		return NULL;
+	}
+
+	/* Check for given counter request value support */
+	arg = (void *)get_cpu_var(hv_gpci_reqb);
+	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+	arg->params.counter_request = cpu_to_be32(req);
+
+	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+	put_cpu_var(hv_gpci_reqb);
+
+	/*
+	 * Add given counter request value attribute in the interface_attrs
+	 * attribute array, only for valid return types.
+	 */
+	if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) {
+		attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+		if (!attr)
+			return NULL;
+
+		sysfs_attr_init(&attr->attr);
+		attr->attr.mode = 0444;
+
+		switch (sysinfo_interface_group_index) {
+		case INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR:
+			attr->attr.name = "processor_bus_topology";
+			attr->show = processor_bus_topology_show;
+		break;
+		case INTERFACE_PROCESSOR_CONFIG_ATTR:
+			attr->attr.name = "processor_config";
+			attr->show = processor_config_show;
+		break;
+		case INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR:
+			attr->attr.name = "affinity_domain_via_virtual_processor";
+			attr->show = affinity_domain_via_virtual_processor_show;
+		break;
+		case INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR:
+			attr->attr.name = "affinity_domain_via_domain";
+			attr->show = affinity_domain_via_domain_show;
+		break;
+		case INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR:
+			attr->attr.name = "affinity_domain_via_partition";
+			attr->show = affinity_domain_via_partition_show;
+		break;
+		}
+	} else
+		pr_devel("hcall failed, with error: 0x%lx\n", ret);
+
+	return attr;
+}
+
+static void add_sysinfo_interface_files(void)
+{
+	int sysfs_count;
+	struct device_attribute *attr[INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR];
+	int i;
+
+	sysfs_count = INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR;
+
+	/* Get device attribute for a given counter request value */
+	for (i = 0; i < sysfs_count; i++) {
+		attr[i] = sysinfo_device_attr_create(i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR,
+				sysinfo_counter_request[i]);
+
+		if (!attr[i])
+			goto out;
+	}
+
+	/* Add sysinfo interface attributes in the interface_attrs attribute array */
+	for (i = 0; i < sysfs_count; i++)
+		interface_attrs[i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr[i]->attr;
+
+	return;
+
+out:
+	/*
+	 * The sysinfo interface attributes will be added, only if hcall passed for
+	 * all the counter request values. Free the device attribute array incase
+	 * of any hcall failure.
+	 */
+	if (i > 0) {
+		while (i >= 0) {
+			kfree(attr[i]);
+			i--;
+		}
+	}
+}
+
+static int hv_gpci_init(void)
+{
+	int r;
+	unsigned long hret;
+	struct hv_perf_caps caps;
+	struct hv_gpci_request_buffer *arg;
+
+	hv_gpci_assert_offsets_correct();
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+		pr_debug("not a virtualized system, not enabling\n");
+		return -ENODEV;
+	}
+
+	hret = hv_perf_caps_get(&caps);
+	if (hret) {
+		pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
+				hret);
+		return -ENODEV;
+	}
+
+	/* init cpuhotplug */
+	r = hv_gpci_cpu_hotplug_init();
+	if (r)
+		return r;
+
+	/* sampling not supported */
+	h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+	arg = (void *)get_cpu_var(hv_gpci_reqb);
+	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+	/*
+	 * hcall H_GET_PERF_COUNTER_INFO populates the output
+	 * counter_info_version value based on the system hypervisor.
+	 * Pass the counter request 0x10 corresponds to request type
+	 * 'Dispatch_timebase_by_processor', to get the supported
+	 * counter_info_version.
+	 */
+	arg->params.counter_request = cpu_to_be32(0x10);
+
+	r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+	if (r) {
+		pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r);
+		arg->params.counter_info_version_out = 0x8;
+	}
+
+	/*
+	 * Use counter_info_version_out value to assign
+	 * required hv-gpci event list.
+	 */
+	if (arg->params.counter_info_version_out >= 0x8)
+		event_group.attrs = hv_gpci_event_attrs;
+	else
+		event_group.attrs = hv_gpci_event_attrs_v6;
+
+	put_cpu_var(hv_gpci_reqb);
+
+	r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1);
+	if (r)
+		return r;
+
+	/* sysinfo interface files are only available for power10 and above platforms */
+	if (PVR_VER(mfspr(SPRN_PVR)) >= PVR_POWER10)
+		add_sysinfo_interface_files();
+
+	return 0;
+}
+
+device_initcall(hv_gpci_init);
diff --git a/arch/powerpc/perf/hv-gpci.h b/arch/powerpc/perf/hv-gpci.h
new file mode 100644
index 0000000000..c72020912d
--- /dev/null
+++ b/arch/powerpc/perf/hv-gpci.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_POWERPC_PERF_HV_GPCI_H_
+#define LINUX_POWERPC_PERF_HV_GPCI_H_
+
+/*
+ * counter info version => fw version/reference (spec version)
+ *
+ * 8 => power8 (1.07)
+ * [7 is skipped by spec 1.07]
+ * 6 => TLBIE (1.07)
+ * 5 => v7r7m0.phyp (1.05)
+ * [4 skipped]
+ * 3 => v7r6m0.phyp (?)
+ * [1,2 skipped]
+ * 0 => v7r{2,3,4}m0.phyp (?)
+ */
+#define COUNTER_INFO_VERSION_CURRENT 0x8
+
+/* capability mask masks. */
+enum {
+	HV_GPCI_CM_GA = (1 << 7),
+	HV_GPCI_CM_EXPANDED = (1 << 6),
+	HV_GPCI_CM_LAB = (1 << 5)
+};
+
+#define REQUEST_FILE "../hv-gpci-requests.h"
+#define NAME_LOWER hv_gpci
+#define NAME_UPPER HV_GPCI
+#define ENABLE_EVENTS_COUNTERINFO_V6
+#include "req-gen/perf.h"
+#undef REQUEST_FILE
+#undef NAME_LOWER
+#undef NAME_UPPER
+
+#endif
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
new file mode 100644
index 0000000000..56d82f7f97
--- /dev/null
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -0,0 +1,1877 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * In-Memory Collection (IMC) Performance Monitor counter support.
+ *
+ * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ *           (C) 2017 Anju T Sudhakar, IBM Corporation.
+ *           (C) 2017 Hemant K Shaw, IBM Corporation.
+ */
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <asm/opal.h>
+#include <asm/imc-pmu.h>
+#include <asm/cputhreads.h>
+#include <asm/smp.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+
+/* Nest IMC data structures and variables */
+
+/*
+ * Used to avoid races in counting the nest-pmu units during hotplug
+ * register and unregister
+ */
+static DEFINE_MUTEX(nest_init_lock);
+static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc);
+static struct imc_pmu **per_nest_pmu_arr;
+static cpumask_t nest_imc_cpumask;
+static struct imc_pmu_ref *nest_imc_refc;
+static int nest_pmus;
+
+/* Core IMC data structures and variables */
+
+static cpumask_t core_imc_cpumask;
+static struct imc_pmu_ref *core_imc_refc;
+static struct imc_pmu *core_imc_pmu;
+
+/* Thread IMC data structures and variables */
+
+static DEFINE_PER_CPU(u64 *, thread_imc_mem);
+static struct imc_pmu *thread_imc_pmu;
+static int thread_imc_mem_size;
+
+/* Trace IMC data structures */
+static DEFINE_PER_CPU(u64 *, trace_imc_mem);
+static struct imc_pmu_ref *trace_imc_refc;
+static int trace_imc_mem_size;
+
+/*
+ * Global data structure used to avoid races between thread,
+ * core and trace-imc
+ */
+static struct imc_pmu_ref imc_global_refc = {
+	.lock = __SPIN_LOCK_UNLOCKED(imc_global_refc.lock),
+	.id = 0,
+	.refc = 0,
+};
+
+static struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
+{
+	return container_of(event->pmu, struct imc_pmu, pmu);
+}
+
+PMU_FORMAT_ATTR(event, "config:0-61");
+PMU_FORMAT_ATTR(offset, "config:0-31");
+PMU_FORMAT_ATTR(rvalue, "config:32");
+PMU_FORMAT_ATTR(mode, "config:33-40");
+static struct attribute *imc_format_attrs[] = {
+	&format_attr_event.attr,
+	&format_attr_offset.attr,
+	&format_attr_rvalue.attr,
+	&format_attr_mode.attr,
+	NULL,
+};
+
+static const struct attribute_group imc_format_group = {
+	.name = "format",
+	.attrs = imc_format_attrs,
+};
+
+/* Format attribute for imc trace-mode */
+PMU_FORMAT_ATTR(cpmc_reserved, "config:0-19");
+PMU_FORMAT_ATTR(cpmc_event, "config:20-27");
+PMU_FORMAT_ATTR(cpmc_samplesel, "config:28-29");
+PMU_FORMAT_ATTR(cpmc_load, "config:30-61");
+static struct attribute *trace_imc_format_attrs[] = {
+	&format_attr_event.attr,
+	&format_attr_cpmc_reserved.attr,
+	&format_attr_cpmc_event.attr,
+	&format_attr_cpmc_samplesel.attr,
+	&format_attr_cpmc_load.attr,
+	NULL,
+};
+
+static const struct attribute_group trace_imc_format_group = {
+.name = "format",
+.attrs = trace_imc_format_attrs,
+};
+
+/* Get the cpumask printed to a buffer "buf" */
+static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct imc_pmu *imc_pmu = container_of(pmu, struct imc_pmu, pmu);
+	cpumask_t *active_mask;
+
+	switch(imc_pmu->domain){
+	case IMC_DOMAIN_NEST:
+		active_mask = &nest_imc_cpumask;
+		break;
+	case IMC_DOMAIN_CORE:
+		active_mask = &core_imc_cpumask;
+		break;
+	default:
+		return 0;
+	}
+
+	return cpumap_print_to_pagebuf(true, buf, active_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, imc_pmu_cpumask_get_attr, NULL);
+
+static struct attribute *imc_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static const struct attribute_group imc_pmu_cpumask_attr_group = {
+	.attrs = imc_pmu_cpumask_attrs,
+};
+
+/* device_str_attr_create : Populate event "name" and string "str" in attribute */
+static struct attribute *device_str_attr_create(const char *name, const char *str)
+{
+	struct perf_pmu_events_attr *attr;
+
+	attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+	if (!attr)
+		return NULL;
+	sysfs_attr_init(&attr->attr.attr);
+
+	attr->event_str = str;
+	attr->attr.attr.name = name;
+	attr->attr.attr.mode = 0444;
+	attr->attr.show = perf_event_sysfs_show;
+
+	return &attr->attr.attr;
+}
+
+static int imc_parse_event(struct device_node *np, const char *scale,
+				  const char *unit, const char *prefix,
+				  u32 base, struct imc_events *event)
+{
+	const char *s;
+	u32 reg;
+
+	if (of_property_read_u32(np, "reg", &reg))
+		goto error;
+	/* Add the base_reg value to the "reg" */
+	event->value = base + reg;
+
+	if (of_property_read_string(np, "event-name", &s))
+		goto error;
+
+	event->name = kasprintf(GFP_KERNEL, "%s%s", prefix, s);
+	if (!event->name)
+		goto error;
+
+	if (of_property_read_string(np, "scale", &s))
+		s = scale;
+
+	if (s) {
+		event->scale = kstrdup(s, GFP_KERNEL);
+		if (!event->scale)
+			goto error;
+	}
+
+	if (of_property_read_string(np, "unit", &s))
+		s = unit;
+
+	if (s) {
+		event->unit = kstrdup(s, GFP_KERNEL);
+		if (!event->unit)
+			goto error;
+	}
+
+	return 0;
+error:
+	kfree(event->unit);
+	kfree(event->scale);
+	kfree(event->name);
+	return -EINVAL;
+}
+
+/*
+ * imc_free_events: Function to cleanup the events list, having
+ * 		    "nr_entries".
+ */
+static void imc_free_events(struct imc_events *events, int nr_entries)
+{
+	int i;
+
+	/* Nothing to clean, return */
+	if (!events)
+		return;
+	for (i = 0; i < nr_entries; i++) {
+		kfree(events[i].unit);
+		kfree(events[i].scale);
+		kfree(events[i].name);
+	}
+
+	kfree(events);
+}
+
+/*
+ * update_events_in_group: Update the "events" information in an attr_group
+ *                         and assign the attr_group to the pmu "pmu".
+ */
+static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
+{
+	struct attribute_group *attr_group;
+	struct attribute **attrs, *dev_str;
+	struct device_node *np, *pmu_events;
+	u32 handle, base_reg;
+	int i = 0, j = 0, ct, ret;
+	const char *prefix, *g_scale, *g_unit;
+	const char *ev_val_str, *ev_scale_str, *ev_unit_str;
+
+	if (!of_property_read_u32(node, "events", &handle))
+		pmu_events = of_find_node_by_phandle(handle);
+	else
+		return 0;
+
+	/* Did not find any node with a given phandle */
+	if (!pmu_events)
+		return 0;
+
+	/* Get a count of number of child nodes */
+	ct = of_get_child_count(pmu_events);
+
+	/* Get the event prefix */
+	if (of_property_read_string(node, "events-prefix", &prefix)) {
+		of_node_put(pmu_events);
+		return 0;
+	}
+
+	/* Get a global unit and scale data if available */
+	if (of_property_read_string(node, "scale", &g_scale))
+		g_scale = NULL;
+
+	if (of_property_read_string(node, "unit", &g_unit))
+		g_unit = NULL;
+
+	/* "reg" property gives out the base offset of the counters data */
+	of_property_read_u32(node, "reg", &base_reg);
+
+	/* Allocate memory for the events */
+	pmu->events = kcalloc(ct, sizeof(struct imc_events), GFP_KERNEL);
+	if (!pmu->events) {
+		of_node_put(pmu_events);
+		return -ENOMEM;
+	}
+
+	ct = 0;
+	/* Parse the events and update the struct */
+	for_each_child_of_node(pmu_events, np) {
+		ret = imc_parse_event(np, g_scale, g_unit, prefix, base_reg, &pmu->events[ct]);
+		if (!ret)
+			ct++;
+	}
+
+	of_node_put(pmu_events);
+
+	/* Allocate memory for attribute group */
+	attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
+	if (!attr_group) {
+		imc_free_events(pmu->events, ct);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Allocate memory for attributes.
+	 * Since we have count of events for this pmu, we also allocate
+	 * memory for the scale and unit attribute for now.
+	 * "ct" has the total event structs added from the events-parent node.
+	 * So allocate three times the "ct" (this includes event, event_scale and
+	 * event_unit).
+	 */
+	attrs = kcalloc(((ct * 3) + 1), sizeof(struct attribute *), GFP_KERNEL);
+	if (!attrs) {
+		kfree(attr_group);
+		imc_free_events(pmu->events, ct);
+		return -ENOMEM;
+	}
+
+	attr_group->name = "events";
+	attr_group->attrs = attrs;
+	do {
+		ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i].value);
+		if (!ev_val_str)
+			continue;
+		dev_str = device_str_attr_create(pmu->events[i].name, ev_val_str);
+		if (!dev_str)
+			continue;
+
+		attrs[j++] = dev_str;
+		if (pmu->events[i].scale) {
+			ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale", pmu->events[i].name);
+			if (!ev_scale_str)
+				continue;
+			dev_str = device_str_attr_create(ev_scale_str, pmu->events[i].scale);
+			if (!dev_str)
+				continue;
+
+			attrs[j++] = dev_str;
+		}
+
+		if (pmu->events[i].unit) {
+			ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit", pmu->events[i].name);
+			if (!ev_unit_str)
+				continue;
+			dev_str = device_str_attr_create(ev_unit_str, pmu->events[i].unit);
+			if (!dev_str)
+				continue;
+
+			attrs[j++] = dev_str;
+		}
+	} while (++i < ct);
+
+	/* Save the event attribute */
+	pmu->attr_groups[IMC_EVENT_ATTR] = attr_group;
+
+	return 0;
+}
+
+/* get_nest_pmu_ref: Return the imc_pmu_ref struct for the given node */
+static struct imc_pmu_ref *get_nest_pmu_ref(int cpu)
+{
+	return per_cpu(local_nest_imc_refc, cpu);
+}
+
+static void nest_change_cpu_context(int old_cpu, int new_cpu)
+{
+	struct imc_pmu **pn = per_nest_pmu_arr;
+
+	if (old_cpu < 0 || new_cpu < 0)
+		return;
+
+	while (*pn) {
+		perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu);
+		pn++;
+	}
+}
+
+static int ppc_nest_imc_cpu_offline(unsigned int cpu)
+{
+	int nid, target = -1;
+	const struct cpumask *l_cpumask;
+	struct imc_pmu_ref *ref;
+
+	/*
+	 * Check in the designated list for this cpu. Dont bother
+	 * if not one of them.
+	 */
+	if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
+		return 0;
+
+	/*
+	 * Check whether nest_imc is registered. We could end up here if the
+	 * cpuhotplug callback registration fails. i.e, callback invokes the
+	 * offline path for all successfully registered nodes. At this stage,
+	 * nest_imc pmu will not be registered and we should return here.
+	 *
+	 * We return with a zero since this is not an offline failure. And
+	 * cpuhp_setup_state() returns the actual failure reason to the caller,
+	 * which in turn will call the cleanup routine.
+	 */
+	if (!nest_pmus)
+		return 0;
+
+	/*
+	 * Now that this cpu is one of the designated,
+	 * find a next cpu a) which is online and b) in same chip.
+	 */
+	nid = cpu_to_node(cpu);
+	l_cpumask = cpumask_of_node(nid);
+	target = cpumask_last(l_cpumask);
+
+	/*
+	 * If this(target) is the last cpu in the cpumask for this chip,
+	 * check for any possible online cpu in the chip.
+	 */
+	if (unlikely(target == cpu))
+		target = cpumask_any_but(l_cpumask, cpu);
+
+	/*
+	 * Update the cpumask with the target cpu and
+	 * migrate the context if needed
+	 */
+	if (target >= 0 && target < nr_cpu_ids) {
+		cpumask_set_cpu(target, &nest_imc_cpumask);
+		nest_change_cpu_context(cpu, target);
+	} else {
+		opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+				       get_hard_smp_processor_id(cpu));
+		/*
+		 * If this is the last cpu in this chip then, skip the reference
+		 * count lock and make the reference count on this chip zero.
+		 */
+		ref = get_nest_pmu_ref(cpu);
+		if (!ref)
+			return -EINVAL;
+
+		ref->refc = 0;
+	}
+	return 0;
+}
+
+static int ppc_nest_imc_cpu_online(unsigned int cpu)
+{
+	const struct cpumask *l_cpumask;
+	static struct cpumask tmp_mask;
+	int res;
+
+	/* Get the cpumask of this node */
+	l_cpumask = cpumask_of_node(cpu_to_node(cpu));
+
+	/*
+	 * If this is not the first online CPU on this node, then
+	 * just return.
+	 */
+	if (cpumask_and(&tmp_mask, l_cpumask, &nest_imc_cpumask))
+		return 0;
+
+	/*
+	 * If this is the first online cpu on this node
+	 * disable the nest counters by making an OPAL call.
+	 */
+	res = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+				     get_hard_smp_processor_id(cpu));
+	if (res)
+		return res;
+
+	/* Make this CPU the designated target for counter collection */
+	cpumask_set_cpu(cpu, &nest_imc_cpumask);
+	return 0;
+}
+
+static int nest_pmu_cpumask_init(void)
+{
+	return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
+				 "perf/powerpc/imc:online",
+				 ppc_nest_imc_cpu_online,
+				 ppc_nest_imc_cpu_offline);
+}
+
+static void nest_imc_counters_release(struct perf_event *event)
+{
+	int rc, node_id;
+	struct imc_pmu_ref *ref;
+
+	if (event->cpu < 0)
+		return;
+
+	node_id = cpu_to_node(event->cpu);
+
+	/*
+	 * See if we need to disable the nest PMU.
+	 * If no events are currently in use, then we have to take a
+	 * lock to ensure that we don't race with another task doing
+	 * enable or disable the nest counters.
+	 */
+	ref = get_nest_pmu_ref(event->cpu);
+	if (!ref)
+		return;
+
+	/* Take the lock for this node and then decrement the reference count */
+	spin_lock(&ref->lock);
+	if (ref->refc == 0) {
+		/*
+		 * The scenario where this is true is, when perf session is
+		 * started, followed by offlining of all cpus in a given node.
+		 *
+		 * In the cpuhotplug offline path, ppc_nest_imc_cpu_offline()
+		 * function set the ref->count to zero, if the cpu which is
+		 * about to offline is the last cpu in a given node and make
+		 * an OPAL call to disable the engine in that node.
+		 *
+		 */
+		spin_unlock(&ref->lock);
+		return;
+	}
+	ref->refc--;
+	if (ref->refc == 0) {
+		rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+					    get_hard_smp_processor_id(event->cpu));
+		if (rc) {
+			spin_unlock(&ref->lock);
+			pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id);
+			return;
+		}
+	} else if (ref->refc < 0) {
+		WARN(1, "nest-imc: Invalid event reference count\n");
+		ref->refc = 0;
+	}
+	spin_unlock(&ref->lock);
+}
+
+static int nest_imc_event_init(struct perf_event *event)
+{
+	int chip_id, rc, node_id;
+	u32 l_config, config = event->attr.config;
+	struct imc_mem_info *pcni;
+	struct imc_pmu *pmu;
+	struct imc_pmu_ref *ref;
+	bool flag = false;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* Sampling not supported */
+	if (event->hw.sample_period)
+		return -EINVAL;
+
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	pmu = imc_event_to_pmu(event);
+
+	/* Sanity check for config (event offset) */
+	if ((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size)
+		return -EINVAL;
+
+	/*
+	 * Nest HW counter memory resides in a per-chip reserve-memory (HOMER).
+	 * Get the base memory address for this cpu.
+	 */
+	chip_id = cpu_to_chip_id(event->cpu);
+
+	/* Return, if chip_id is not valid */
+	if (chip_id < 0)
+		return -ENODEV;
+
+	pcni = pmu->mem_info;
+	do {
+		if (pcni->id == chip_id) {
+			flag = true;
+			break;
+		}
+		pcni++;
+	} while (pcni->vbase != 0);
+
+	if (!flag)
+		return -ENODEV;
+
+	/*
+	 * Add the event offset to the base address.
+	 */
+	l_config = config & IMC_EVENT_OFFSET_MASK;
+	event->hw.event_base = (u64)pcni->vbase + l_config;
+	node_id = cpu_to_node(event->cpu);
+
+	/*
+	 * Get the imc_pmu_ref struct for this node.
+	 * Take the lock and then increment the count of nest pmu events inited.
+	 */
+	ref = get_nest_pmu_ref(event->cpu);
+	if (!ref)
+		return -EINVAL;
+
+	spin_lock(&ref->lock);
+	if (ref->refc == 0) {
+		rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST,
+					     get_hard_smp_processor_id(event->cpu));
+		if (rc) {
+			spin_unlock(&ref->lock);
+			pr_err("nest-imc: Unable to start the counters for node %d\n",
+									node_id);
+			return rc;
+		}
+	}
+	++ref->refc;
+	spin_unlock(&ref->lock);
+
+	event->destroy = nest_imc_counters_release;
+	return 0;
+}
+
+/*
+ * core_imc_mem_init : Initializes memory for the current core.
+ *
+ * Uses alloc_pages_node() and uses the returned address as an argument to
+ * an opal call to configure the pdbar. The address sent as an argument is
+ * converted to physical address before the opal call is made. This is the
+ * base address at which the core imc counters are populated.
+ */
+static int core_imc_mem_init(int cpu, int size)
+{
+	int nid, rc = 0, core_id = (cpu / threads_per_core);
+	struct imc_mem_info *mem_info;
+	struct page *page;
+
+	/*
+	 * alloc_pages_node() will allocate memory for core in the
+	 * local node only.
+	 */
+	nid = cpu_to_node(cpu);
+	mem_info = &core_imc_pmu->mem_info[core_id];
+	mem_info->id = core_id;
+
+	/* We need only vbase for core counters */
+	page = alloc_pages_node(nid,
+				GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+				__GFP_NOWARN, get_order(size));
+	if (!page)
+		return -ENOMEM;
+	mem_info->vbase = page_address(page);
+
+	core_imc_refc[core_id].id = core_id;
+	spin_lock_init(&core_imc_refc[core_id].lock);
+
+	rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE,
+				__pa((void *)mem_info->vbase),
+				get_hard_smp_processor_id(cpu));
+	if (rc) {
+		free_pages((u64)mem_info->vbase, get_order(size));
+		mem_info->vbase = NULL;
+	}
+
+	return rc;
+}
+
+static bool is_core_imc_mem_inited(int cpu)
+{
+	struct imc_mem_info *mem_info;
+	int core_id = (cpu / threads_per_core);
+
+	mem_info = &core_imc_pmu->mem_info[core_id];
+	if (!mem_info->vbase)
+		return false;
+
+	return true;
+}
+
+static int ppc_core_imc_cpu_online(unsigned int cpu)
+{
+	const struct cpumask *l_cpumask;
+	static struct cpumask tmp_mask;
+	int ret = 0;
+
+	/* Get the cpumask for this core */
+	l_cpumask = cpu_sibling_mask(cpu);
+
+	/* If a cpu for this core is already set, then, don't do anything */
+	if (cpumask_and(&tmp_mask, l_cpumask, &core_imc_cpumask))
+		return 0;
+
+	if (!is_core_imc_mem_inited(cpu)) {
+		ret = core_imc_mem_init(cpu, core_imc_pmu->counter_mem_size);
+		if (ret) {
+			pr_info("core_imc memory allocation for cpu %d failed\n", cpu);
+			return ret;
+		}
+	}
+
+	/* set the cpu in the mask */
+	cpumask_set_cpu(cpu, &core_imc_cpumask);
+	return 0;
+}
+
+static int ppc_core_imc_cpu_offline(unsigned int cpu)
+{
+	unsigned int core_id;
+	int ncpu;
+	struct imc_pmu_ref *ref;
+
+	/*
+	 * clear this cpu out of the mask, if not present in the mask,
+	 * don't bother doing anything.
+	 */
+	if (!cpumask_test_and_clear_cpu(cpu, &core_imc_cpumask))
+		return 0;
+
+	/*
+	 * Check whether core_imc is registered. We could end up here
+	 * if the cpuhotplug callback registration fails. i.e, callback
+	 * invokes the offline path for all successfully registered cpus.
+	 * At this stage, core_imc pmu will not be registered and we
+	 * should return here.
+	 *
+	 * We return with a zero since this is not an offline failure.
+	 * And cpuhp_setup_state() returns the actual failure reason
+	 * to the caller, which inturn will call the cleanup routine.
+	 */
+	if (!core_imc_pmu->pmu.event_init)
+		return 0;
+
+	/* Find any online cpu in that core except the current "cpu" */
+	ncpu = cpumask_last(cpu_sibling_mask(cpu));
+
+	if (unlikely(ncpu == cpu))
+		ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
+
+	if (ncpu >= 0 && ncpu < nr_cpu_ids) {
+		cpumask_set_cpu(ncpu, &core_imc_cpumask);
+		perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu);
+	} else {
+		/*
+		 * If this is the last cpu in this core then skip taking reference
+		 * count lock for this core and directly zero "refc" for this core.
+		 */
+		opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+				       get_hard_smp_processor_id(cpu));
+		core_id = cpu / threads_per_core;
+		ref = &core_imc_refc[core_id];
+		if (!ref)
+			return -EINVAL;
+
+		ref->refc = 0;
+		/*
+		 * Reduce the global reference count, if this is the
+		 * last cpu in this core and core-imc event running
+		 * in this cpu.
+		 */
+		spin_lock(&imc_global_refc.lock);
+		if (imc_global_refc.id == IMC_DOMAIN_CORE)
+			imc_global_refc.refc--;
+
+		spin_unlock(&imc_global_refc.lock);
+	}
+	return 0;
+}
+
+static int core_imc_pmu_cpumask_init(void)
+{
+	return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
+				 "perf/powerpc/imc_core:online",
+				 ppc_core_imc_cpu_online,
+				 ppc_core_imc_cpu_offline);
+}
+
+static void reset_global_refc(struct perf_event *event)
+{
+		spin_lock(&imc_global_refc.lock);
+		imc_global_refc.refc--;
+
+		/*
+		 * If no other thread is running any
+		 * event for this domain(thread/core/trace),
+		 * set the global id to zero.
+		 */
+		if (imc_global_refc.refc <= 0) {
+			imc_global_refc.refc = 0;
+			imc_global_refc.id = 0;
+		}
+		spin_unlock(&imc_global_refc.lock);
+}
+
+static void core_imc_counters_release(struct perf_event *event)
+{
+	int rc, core_id;
+	struct imc_pmu_ref *ref;
+
+	if (event->cpu < 0)
+		return;
+	/*
+	 * See if we need to disable the IMC PMU.
+	 * If no events are currently in use, then we have to take a
+	 * lock to ensure that we don't race with another task doing
+	 * enable or disable the core counters.
+	 */
+	core_id = event->cpu / threads_per_core;
+
+	/* Take the lock and decrement the refernce count for this core */
+	ref = &core_imc_refc[core_id];
+	if (!ref)
+		return;
+
+	spin_lock(&ref->lock);
+	if (ref->refc == 0) {
+		/*
+		 * The scenario where this is true is, when perf session is
+		 * started, followed by offlining of all cpus in a given core.
+		 *
+		 * In the cpuhotplug offline path, ppc_core_imc_cpu_offline()
+		 * function set the ref->count to zero, if the cpu which is
+		 * about to offline is the last cpu in a given core and make
+		 * an OPAL call to disable the engine in that core.
+		 *
+		 */
+		spin_unlock(&ref->lock);
+		return;
+	}
+	ref->refc--;
+	if (ref->refc == 0) {
+		rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+					    get_hard_smp_processor_id(event->cpu));
+		if (rc) {
+			spin_unlock(&ref->lock);
+			pr_err("IMC: Unable to stop the counters for core %d\n", core_id);
+			return;
+		}
+	} else if (ref->refc < 0) {
+		WARN(1, "core-imc: Invalid event reference count\n");
+		ref->refc = 0;
+	}
+	spin_unlock(&ref->lock);
+
+	reset_global_refc(event);
+}
+
+static int core_imc_event_init(struct perf_event *event)
+{
+	int core_id, rc;
+	u64 config = event->attr.config;
+	struct imc_mem_info *pcmi;
+	struct imc_pmu *pmu;
+	struct imc_pmu_ref *ref;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* Sampling not supported */
+	if (event->hw.sample_period)
+		return -EINVAL;
+
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	event->hw.idx = -1;
+	pmu = imc_event_to_pmu(event);
+
+	/* Sanity check for config (event offset) */
+	if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size))
+		return -EINVAL;
+
+	if (!is_core_imc_mem_inited(event->cpu))
+		return -ENODEV;
+
+	core_id = event->cpu / threads_per_core;
+	pcmi = &core_imc_pmu->mem_info[core_id];
+	if ((!pcmi->vbase))
+		return -ENODEV;
+
+	ref = &core_imc_refc[core_id];
+	if (!ref)
+		return -EINVAL;
+
+	/*
+	 * Core pmu units are enabled only when it is used.
+	 * See if this is triggered for the first time.
+	 * If yes, take the lock and enable the core counters.
+	 * If not, just increment the count in core_imc_refc struct.
+	 */
+	spin_lock(&ref->lock);
+	if (ref->refc == 0) {
+		rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
+					     get_hard_smp_processor_id(event->cpu));
+		if (rc) {
+			spin_unlock(&ref->lock);
+			pr_err("core-imc: Unable to start the counters for core %d\n",
+									core_id);
+			return rc;
+		}
+	}
+	++ref->refc;
+	spin_unlock(&ref->lock);
+
+	/*
+	 * Since the system can run either in accumulation or trace-mode
+	 * of IMC at a time, core-imc events are allowed only if no other
+	 * trace/thread imc events are enabled/monitored.
+	 *
+	 * Take the global lock, and check the refc.id
+	 * to know whether any other trace/thread imc
+	 * events are running.
+	 */
+	spin_lock(&imc_global_refc.lock);
+	if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) {
+		/*
+		 * No other trace/thread imc events are running in
+		 * the system, so set the refc.id to core-imc.
+		 */
+		imc_global_refc.id = IMC_DOMAIN_CORE;
+		imc_global_refc.refc++;
+	} else {
+		spin_unlock(&imc_global_refc.lock);
+		return -EBUSY;
+	}
+	spin_unlock(&imc_global_refc.lock);
+
+	event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK);
+	event->destroy = core_imc_counters_release;
+	return 0;
+}
+
+/*
+ * Allocates a page of memory for each of the online cpus, and load
+ * LDBAR with 0.
+ * The physical base address of the page allocated for a cpu will be
+ * written to the LDBAR for that cpu, when the thread-imc event
+ * is added.
+ *
+ * LDBAR Register Layout:
+ *
+ *  0          4         8         12        16        20        24        28
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *   | |       [   ]    [                   Counter Address [8:50]
+ *   | * Mode    |
+ *   |           * PB Scope
+ *   * Enable/Disable
+ *
+ *  32        36        40        44        48        52        56        60
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *           Counter Address [8:50]              ]
+ *
+ */
+static int thread_imc_mem_alloc(int cpu_id, int size)
+{
+	u64 *local_mem = per_cpu(thread_imc_mem, cpu_id);
+	int nid = cpu_to_node(cpu_id);
+
+	if (!local_mem) {
+		struct page *page;
+		/*
+		 * This case could happen only once at start, since we dont
+		 * free the memory in cpu offline path.
+		 */
+		page = alloc_pages_node(nid,
+				  GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+				  __GFP_NOWARN, get_order(size));
+		if (!page)
+			return -ENOMEM;
+		local_mem = page_address(page);
+
+		per_cpu(thread_imc_mem, cpu_id) = local_mem;
+	}
+
+	mtspr(SPRN_LDBAR, 0);
+	return 0;
+}
+
+static int ppc_thread_imc_cpu_online(unsigned int cpu)
+{
+	return thread_imc_mem_alloc(cpu, thread_imc_mem_size);
+}
+
+static int ppc_thread_imc_cpu_offline(unsigned int cpu)
+{
+	/*
+	 * Set the bit 0 of LDBAR to zero.
+	 *
+	 * If bit 0 of LDBAR is unset, it will stop posting
+	 * the counter data to memory.
+	 * For thread-imc, bit 0 of LDBAR will be set to 1 in the
+	 * event_add function. So reset this bit here, to stop the updates
+	 * to memory in the cpu_offline path.
+	 */
+	mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
+
+	/* Reduce the refc if thread-imc event running on this cpu */
+	spin_lock(&imc_global_refc.lock);
+	if (imc_global_refc.id == IMC_DOMAIN_THREAD)
+		imc_global_refc.refc--;
+	spin_unlock(&imc_global_refc.lock);
+
+	return 0;
+}
+
+static int thread_imc_cpu_init(void)
+{
+	return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
+			  "perf/powerpc/imc_thread:online",
+			  ppc_thread_imc_cpu_online,
+			  ppc_thread_imc_cpu_offline);
+}
+
+static int thread_imc_event_init(struct perf_event *event)
+{
+	u32 config = event->attr.config;
+	struct task_struct *target;
+	struct imc_pmu *pmu;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (!perfmon_capable())
+		return -EACCES;
+
+	/* Sampling not supported */
+	if (event->hw.sample_period)
+		return -EINVAL;
+
+	event->hw.idx = -1;
+	pmu = imc_event_to_pmu(event);
+
+	/* Sanity check for config offset */
+	if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size))
+		return -EINVAL;
+
+	target = event->hw.target;
+	if (!target)
+		return -EINVAL;
+
+	spin_lock(&imc_global_refc.lock);
+	/*
+	 * Check if any other trace/core imc events are running in the
+	 * system, if not set the global id to thread-imc.
+	 */
+	if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_THREAD) {
+		imc_global_refc.id = IMC_DOMAIN_THREAD;
+		imc_global_refc.refc++;
+	} else {
+		spin_unlock(&imc_global_refc.lock);
+		return -EBUSY;
+	}
+	spin_unlock(&imc_global_refc.lock);
+
+	event->pmu->task_ctx_nr = perf_sw_context;
+	event->destroy = reset_global_refc;
+	return 0;
+}
+
+static bool is_thread_imc_pmu(struct perf_event *event)
+{
+	if (!strncmp(event->pmu->name, "thread_imc", strlen("thread_imc")))
+		return true;
+
+	return false;
+}
+
+static u64 * get_event_base_addr(struct perf_event *event)
+{
+	u64 addr;
+
+	if (is_thread_imc_pmu(event)) {
+		addr = (u64)per_cpu(thread_imc_mem, smp_processor_id());
+		return (u64 *)(addr + (event->attr.config & IMC_EVENT_OFFSET_MASK));
+	}
+
+	return (u64 *)event->hw.event_base;
+}
+
+static void thread_imc_pmu_start_txn(struct pmu *pmu,
+				     unsigned int txn_flags)
+{
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+	perf_pmu_disable(pmu);
+}
+
+static void thread_imc_pmu_cancel_txn(struct pmu *pmu)
+{
+	perf_pmu_enable(pmu);
+}
+
+static int thread_imc_pmu_commit_txn(struct pmu *pmu)
+{
+	perf_pmu_enable(pmu);
+	return 0;
+}
+
+static u64 imc_read_counter(struct perf_event *event)
+{
+	u64 *addr, data;
+
+	/*
+	 * In-Memory Collection (IMC) counters are free flowing counters.
+	 * So we take a snapshot of the counter value on enable and save it
+	 * to calculate the delta at later stage to present the event counter
+	 * value.
+	 */
+	addr = get_event_base_addr(event);
+	data = be64_to_cpu(READ_ONCE(*addr));
+	local64_set(&event->hw.prev_count, data);
+
+	return data;
+}
+
+static void imc_event_update(struct perf_event *event)
+{
+	u64 counter_prev, counter_new, final_count;
+
+	counter_prev = local64_read(&event->hw.prev_count);
+	counter_new = imc_read_counter(event);
+	final_count = counter_new - counter_prev;
+
+	/* Update the delta to the event count */
+	local64_add(final_count, &event->count);
+}
+
+static void imc_event_start(struct perf_event *event, int flags)
+{
+	/*
+	 * In Memory Counters are free flowing counters. HW or the microcode
+	 * keeps adding to the counter offset in memory. To get event
+	 * counter value, we snapshot the value here and we calculate
+	 * delta at later point.
+	 */
+	imc_read_counter(event);
+}
+
+static void imc_event_stop(struct perf_event *event, int flags)
+{
+	/*
+	 * Take a snapshot and calculate the delta and update
+	 * the event counter values.
+	 */
+	imc_event_update(event);
+}
+
+static int imc_event_add(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_START)
+		imc_event_start(event, flags);
+
+	return 0;
+}
+
+static int thread_imc_event_add(struct perf_event *event, int flags)
+{
+	int core_id;
+	struct imc_pmu_ref *ref;
+	u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, smp_processor_id());
+
+	if (flags & PERF_EF_START)
+		imc_event_start(event, flags);
+
+	if (!is_core_imc_mem_inited(smp_processor_id()))
+		return -EINVAL;
+
+	core_id = smp_processor_id() / threads_per_core;
+	ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE;
+	mtspr(SPRN_LDBAR, ldbar_value);
+
+	/*
+	 * imc pmus are enabled only when it is used.
+	 * See if this is triggered for the first time.
+	 * If yes, take the lock and enable the counters.
+	 * If not, just increment the count in ref count struct.
+	 */
+	ref = &core_imc_refc[core_id];
+	if (!ref)
+		return -EINVAL;
+
+	spin_lock(&ref->lock);
+	if (ref->refc == 0) {
+		if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
+		    get_hard_smp_processor_id(smp_processor_id()))) {
+			spin_unlock(&ref->lock);
+			pr_err("thread-imc: Unable to start the counter\
+				for core %d\n", core_id);
+			return -EINVAL;
+		}
+	}
+	++ref->refc;
+	spin_unlock(&ref->lock);
+	return 0;
+}
+
+static void thread_imc_event_del(struct perf_event *event, int flags)
+{
+
+	int core_id;
+	struct imc_pmu_ref *ref;
+
+	core_id = smp_processor_id() / threads_per_core;
+	ref = &core_imc_refc[core_id];
+	if (!ref) {
+		pr_debug("imc: Failed to get event reference count\n");
+		return;
+	}
+
+	spin_lock(&ref->lock);
+	ref->refc--;
+	if (ref->refc == 0) {
+		if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+		    get_hard_smp_processor_id(smp_processor_id()))) {
+			spin_unlock(&ref->lock);
+			pr_err("thread-imc: Unable to stop the counters\
+				for core %d\n", core_id);
+			return;
+		}
+	} else if (ref->refc < 0) {
+		ref->refc = 0;
+	}
+	spin_unlock(&ref->lock);
+
+	/* Set bit 0 of LDBAR to zero, to stop posting updates to memory */
+	mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
+
+	/*
+	 * Take a snapshot and calculate the delta and update
+	 * the event counter values.
+	 */
+	imc_event_update(event);
+}
+
+/*
+ * Allocate a page of memory for each cpu, and load LDBAR with 0.
+ */
+static int trace_imc_mem_alloc(int cpu_id, int size)
+{
+	u64 *local_mem = per_cpu(trace_imc_mem, cpu_id);
+	int phys_id = cpu_to_node(cpu_id), rc = 0;
+	int core_id = (cpu_id / threads_per_core);
+
+	if (!local_mem) {
+		struct page *page;
+
+		page = alloc_pages_node(phys_id,
+				GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+				__GFP_NOWARN, get_order(size));
+		if (!page)
+			return -ENOMEM;
+		local_mem = page_address(page);
+		per_cpu(trace_imc_mem, cpu_id) = local_mem;
+
+		/* Initialise the counters for trace mode */
+		rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_TRACE, __pa((void *)local_mem),
+					    get_hard_smp_processor_id(cpu_id));
+		if (rc) {
+			pr_info("IMC:opal init failed for trace imc\n");
+			return rc;
+		}
+	}
+
+	trace_imc_refc[core_id].id = core_id;
+	spin_lock_init(&trace_imc_refc[core_id].lock);
+
+	mtspr(SPRN_LDBAR, 0);
+	return 0;
+}
+
+static int ppc_trace_imc_cpu_online(unsigned int cpu)
+{
+	return trace_imc_mem_alloc(cpu, trace_imc_mem_size);
+}
+
+static int ppc_trace_imc_cpu_offline(unsigned int cpu)
+{
+	/*
+	 * No need to set bit 0 of LDBAR to zero, as
+	 * it is set to zero for imc trace-mode
+	 *
+	 * Reduce the refc if any trace-imc event running
+	 * on this cpu.
+	 */
+	spin_lock(&imc_global_refc.lock);
+	if (imc_global_refc.id == IMC_DOMAIN_TRACE)
+		imc_global_refc.refc--;
+	spin_unlock(&imc_global_refc.lock);
+
+	return 0;
+}
+
+static int trace_imc_cpu_init(void)
+{
+	return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
+			  "perf/powerpc/imc_trace:online",
+			  ppc_trace_imc_cpu_online,
+			  ppc_trace_imc_cpu_offline);
+}
+
+static u64 get_trace_imc_event_base_addr(void)
+{
+	return (u64)per_cpu(trace_imc_mem, smp_processor_id());
+}
+
+/*
+ * Function to parse trace-imc data obtained
+ * and to prepare the perf sample.
+ */
+static int trace_imc_prepare_sample(struct trace_imc_data *mem,
+				    struct perf_sample_data *data,
+				    u64 *prev_tb,
+				    struct perf_event_header *header,
+				    struct perf_event *event)
+{
+	/* Sanity checks for a valid record */
+	if (be64_to_cpu(READ_ONCE(mem->tb1)) > *prev_tb)
+		*prev_tb = be64_to_cpu(READ_ONCE(mem->tb1));
+	else
+		return -EINVAL;
+
+	if ((be64_to_cpu(READ_ONCE(mem->tb1)) & IMC_TRACE_RECORD_TB1_MASK) !=
+			 be64_to_cpu(READ_ONCE(mem->tb2)))
+		return -EINVAL;
+
+	/* Prepare perf sample */
+	data->ip =  be64_to_cpu(READ_ONCE(mem->ip));
+	data->period = event->hw.last_period;
+
+	header->type = PERF_RECORD_SAMPLE;
+	header->size = sizeof(*header) + event->header_size;
+	header->misc = 0;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		switch (IMC_TRACE_RECORD_VAL_HVPR(be64_to_cpu(READ_ONCE(mem->val)))) {
+		case 0:/* when MSR HV and PR not set in the trace-record */
+			header->misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+			break;
+		case 1: /* MSR HV is 0 and PR is 1 */
+			header->misc |= PERF_RECORD_MISC_GUEST_USER;
+			break;
+		case 2: /* MSR HV is 1 and PR is 0 */
+			header->misc |= PERF_RECORD_MISC_KERNEL;
+			break;
+		case 3: /* MSR HV is 1 and PR is 1 */
+			header->misc |= PERF_RECORD_MISC_USER;
+			break;
+		default:
+			pr_info("IMC: Unable to set the flag based on MSR bits\n");
+			break;
+		}
+	} else {
+		if (is_kernel_addr(data->ip))
+			header->misc |= PERF_RECORD_MISC_KERNEL;
+		else
+			header->misc |= PERF_RECORD_MISC_USER;
+	}
+	perf_event_header__init_id(header, data, event);
+
+	return 0;
+}
+
+static void dump_trace_imc_data(struct perf_event *event)
+{
+	struct trace_imc_data *mem;
+	int i, ret;
+	u64 prev_tb = 0;
+
+	mem = (struct trace_imc_data *)get_trace_imc_event_base_addr();
+	for (i = 0; i < (trace_imc_mem_size / sizeof(struct trace_imc_data));
+		i++, mem++) {
+		struct perf_sample_data data;
+		struct perf_event_header header;
+
+		ret = trace_imc_prepare_sample(mem, &data, &prev_tb, &header, event);
+		if (ret) /* Exit, if not a valid record */
+			break;
+		else {
+			/* If this is a valid record, create the sample */
+			struct perf_output_handle handle;
+
+			if (perf_output_begin(&handle, &data, event, header.size))
+				return;
+
+			perf_output_sample(&handle, &header, &data, event);
+			perf_output_end(&handle);
+		}
+	}
+}
+
+static int trace_imc_event_add(struct perf_event *event, int flags)
+{
+	int core_id = smp_processor_id() / threads_per_core;
+	struct imc_pmu_ref *ref = NULL;
+	u64 local_mem, ldbar_value;
+
+	/* Set trace-imc bit in ldbar and load ldbar with per-thread memory address */
+	local_mem = get_trace_imc_event_base_addr();
+	ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE;
+
+	/* trace-imc reference count */
+	if (trace_imc_refc)
+		ref = &trace_imc_refc[core_id];
+	if (!ref) {
+		pr_debug("imc: Failed to get the event reference count\n");
+		return -EINVAL;
+	}
+
+	mtspr(SPRN_LDBAR, ldbar_value);
+	spin_lock(&ref->lock);
+	if (ref->refc == 0) {
+		if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE,
+				get_hard_smp_processor_id(smp_processor_id()))) {
+			spin_unlock(&ref->lock);
+			pr_err("trace-imc: Unable to start the counters for core %d\n", core_id);
+			return -EINVAL;
+		}
+	}
+	++ref->refc;
+	spin_unlock(&ref->lock);
+	return 0;
+}
+
+static void trace_imc_event_read(struct perf_event *event)
+{
+	return;
+}
+
+static void trace_imc_event_stop(struct perf_event *event, int flags)
+{
+	u64 local_mem = get_trace_imc_event_base_addr();
+	dump_trace_imc_data(event);
+	memset((void *)local_mem, 0, sizeof(u64));
+}
+
+static void trace_imc_event_start(struct perf_event *event, int flags)
+{
+	return;
+}
+
+static void trace_imc_event_del(struct perf_event *event, int flags)
+{
+	int core_id = smp_processor_id() / threads_per_core;
+	struct imc_pmu_ref *ref = NULL;
+
+	if (trace_imc_refc)
+		ref = &trace_imc_refc[core_id];
+	if (!ref) {
+		pr_debug("imc: Failed to get event reference count\n");
+		return;
+	}
+
+	spin_lock(&ref->lock);
+	ref->refc--;
+	if (ref->refc == 0) {
+		if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE,
+				get_hard_smp_processor_id(smp_processor_id()))) {
+			spin_unlock(&ref->lock);
+			pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id);
+			return;
+		}
+	} else if (ref->refc < 0) {
+		ref->refc = 0;
+	}
+	spin_unlock(&ref->lock);
+
+	trace_imc_event_stop(event, flags);
+}
+
+static int trace_imc_event_init(struct perf_event *event)
+{
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (!perfmon_capable())
+		return -EACCES;
+
+	/* Return if this is a couting event */
+	if (event->attr.sample_period == 0)
+		return -ENOENT;
+
+	/*
+	 * Take the global lock, and make sure
+	 * no other thread is running any core/thread imc
+	 * events
+	 */
+	spin_lock(&imc_global_refc.lock);
+	if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) {
+		/*
+		 * No core/thread imc events are running in the
+		 * system, so set the refc.id to trace-imc.
+		 */
+		imc_global_refc.id = IMC_DOMAIN_TRACE;
+		imc_global_refc.refc++;
+	} else {
+		spin_unlock(&imc_global_refc.lock);
+		return -EBUSY;
+	}
+	spin_unlock(&imc_global_refc.lock);
+
+	event->hw.idx = -1;
+
+	/*
+	 * There can only be a single PMU for perf_hw_context events which is assigned to
+	 * core PMU. Hence use "perf_sw_context" for trace_imc.
+	 */
+	event->pmu->task_ctx_nr = perf_sw_context;
+	event->destroy = reset_global_refc;
+	return 0;
+}
+
+/* update_pmu_ops : Populate the appropriate operations for "pmu" */
+static int update_pmu_ops(struct imc_pmu *pmu)
+{
+	pmu->pmu.task_ctx_nr = perf_invalid_context;
+	pmu->pmu.add = imc_event_add;
+	pmu->pmu.del = imc_event_stop;
+	pmu->pmu.start = imc_event_start;
+	pmu->pmu.stop = imc_event_stop;
+	pmu->pmu.read = imc_event_update;
+	pmu->pmu.attr_groups = pmu->attr_groups;
+	pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
+	pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
+
+	switch (pmu->domain) {
+	case IMC_DOMAIN_NEST:
+		pmu->pmu.event_init = nest_imc_event_init;
+		pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
+		break;
+	case IMC_DOMAIN_CORE:
+		pmu->pmu.event_init = core_imc_event_init;
+		pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
+		break;
+	case IMC_DOMAIN_THREAD:
+		pmu->pmu.event_init = thread_imc_event_init;
+		pmu->pmu.add = thread_imc_event_add;
+		pmu->pmu.del = thread_imc_event_del;
+		pmu->pmu.start_txn = thread_imc_pmu_start_txn;
+		pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn;
+		pmu->pmu.commit_txn = thread_imc_pmu_commit_txn;
+		break;
+	case IMC_DOMAIN_TRACE:
+		pmu->pmu.event_init = trace_imc_event_init;
+		pmu->pmu.add = trace_imc_event_add;
+		pmu->pmu.del = trace_imc_event_del;
+		pmu->pmu.start = trace_imc_event_start;
+		pmu->pmu.stop = trace_imc_event_stop;
+		pmu->pmu.read = trace_imc_event_read;
+		pmu->attr_groups[IMC_FORMAT_ATTR] = &trace_imc_format_group;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/* init_nest_pmu_ref: Initialize the imc_pmu_ref struct for all the nodes */
+static int init_nest_pmu_ref(void)
+{
+	int nid, i, cpu;
+
+	nest_imc_refc = kcalloc(num_possible_nodes(), sizeof(*nest_imc_refc),
+								GFP_KERNEL);
+
+	if (!nest_imc_refc)
+		return -ENOMEM;
+
+	i = 0;
+	for_each_node(nid) {
+		/*
+		 * Take the lock to avoid races while tracking the number of
+		 * sessions using the chip's nest pmu units.
+		 */
+		spin_lock_init(&nest_imc_refc[i].lock);
+
+		/*
+		 * Loop to init the "id" with the node_id. Variable "i" initialized to
+		 * 0 and will be used as index to the array. "i" will not go off the
+		 * end of the array since the "for_each_node" loops for "N_POSSIBLE"
+		 * nodes only.
+		 */
+		nest_imc_refc[i++].id = nid;
+	}
+
+	/*
+	 * Loop to init the per_cpu "local_nest_imc_refc" with the proper
+	 * "nest_imc_refc" index. This makes get_nest_pmu_ref() alot simple.
+	 */
+	for_each_possible_cpu(cpu) {
+		nid = cpu_to_node(cpu);
+		for (i = 0; i < num_possible_nodes(); i++) {
+			if (nest_imc_refc[i].id == nid) {
+				per_cpu(local_nest_imc_refc, cpu) = &nest_imc_refc[i];
+				break;
+			}
+		}
+	}
+	return 0;
+}
+
+static void cleanup_all_core_imc_memory(void)
+{
+	int i, nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
+	struct imc_mem_info *ptr = core_imc_pmu->mem_info;
+	int size = core_imc_pmu->counter_mem_size;
+
+	/* mem_info will never be NULL */
+	for (i = 0; i < nr_cores; i++) {
+		if (ptr[i].vbase)
+			free_pages((u64)ptr[i].vbase, get_order(size));
+	}
+
+	kfree(ptr);
+	kfree(core_imc_refc);
+}
+
+static void thread_imc_ldbar_disable(void *dummy)
+{
+	/*
+	 * By setting 0th bit of LDBAR to zero, we disable thread-imc
+	 * updates to memory.
+	 */
+	mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
+}
+
+void thread_imc_disable(void)
+{
+	on_each_cpu(thread_imc_ldbar_disable, NULL, 1);
+}
+
+static void cleanup_all_thread_imc_memory(void)
+{
+	int i, order = get_order(thread_imc_mem_size);
+
+	for_each_online_cpu(i) {
+		if (per_cpu(thread_imc_mem, i))
+			free_pages((u64)per_cpu(thread_imc_mem, i), order);
+
+	}
+}
+
+static void cleanup_all_trace_imc_memory(void)
+{
+	int i, order = get_order(trace_imc_mem_size);
+
+	for_each_online_cpu(i) {
+		if (per_cpu(trace_imc_mem, i))
+			free_pages((u64)per_cpu(trace_imc_mem, i), order);
+
+	}
+	kfree(trace_imc_refc);
+}
+
+/* Function to free the attr_groups which are dynamically allocated */
+static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
+{
+	if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
+		kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
+	kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
+}
+
+/*
+ * Common function to unregister cpu hotplug callback and
+ * free the memory.
+ * TODO: Need to handle pmu unregistering, which will be
+ * done in followup series.
+ */
+static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
+{
+	if (pmu_ptr->domain == IMC_DOMAIN_NEST) {
+		mutex_lock(&nest_init_lock);
+		if (nest_pmus == 1) {
+			cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
+			kfree(nest_imc_refc);
+			kfree(per_nest_pmu_arr);
+			per_nest_pmu_arr = NULL;
+		}
+
+		if (nest_pmus > 0)
+			nest_pmus--;
+		mutex_unlock(&nest_init_lock);
+	}
+
+	/* Free core_imc memory */
+	if (pmu_ptr->domain == IMC_DOMAIN_CORE) {
+		cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE);
+		cleanup_all_core_imc_memory();
+	}
+
+	/* Free thread_imc memory */
+	if (pmu_ptr->domain == IMC_DOMAIN_THREAD) {
+		cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
+		cleanup_all_thread_imc_memory();
+	}
+
+	if (pmu_ptr->domain == IMC_DOMAIN_TRACE) {
+		cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE);
+		cleanup_all_trace_imc_memory();
+	}
+}
+
+/*
+ * Function to unregister thread-imc if core-imc
+ * is not registered.
+ */
+void unregister_thread_imc(void)
+{
+	imc_common_cpuhp_mem_free(thread_imc_pmu);
+	imc_common_mem_free(thread_imc_pmu);
+	perf_pmu_unregister(&thread_imc_pmu->pmu);
+}
+
+/*
+ * imc_mem_init : Function to support memory allocation for core imc.
+ */
+static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
+								int pmu_index)
+{
+	const char *s;
+	int nr_cores, cpu, res = -ENOMEM;
+
+	if (of_property_read_string(parent, "name", &s))
+		return -ENODEV;
+
+	switch (pmu_ptr->domain) {
+	case IMC_DOMAIN_NEST:
+		/* Update the pmu name */
+		pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s_imc", "nest_", s);
+		if (!pmu_ptr->pmu.name)
+			goto err;
+
+		/* Needed for hotplug/migration */
+		if (!per_nest_pmu_arr) {
+			per_nest_pmu_arr = kcalloc(get_max_nest_dev() + 1,
+						sizeof(struct imc_pmu *),
+						GFP_KERNEL);
+			if (!per_nest_pmu_arr)
+				goto err;
+		}
+		per_nest_pmu_arr[pmu_index] = pmu_ptr;
+		break;
+	case IMC_DOMAIN_CORE:
+		/* Update the pmu name */
+		pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
+		if (!pmu_ptr->pmu.name)
+			goto err;
+
+		nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
+		pmu_ptr->mem_info = kcalloc(nr_cores, sizeof(struct imc_mem_info),
+								GFP_KERNEL);
+
+		if (!pmu_ptr->mem_info)
+			goto err;
+
+		core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
+								GFP_KERNEL);
+
+		if (!core_imc_refc) {
+			kfree(pmu_ptr->mem_info);
+			goto err;
+		}
+
+		core_imc_pmu = pmu_ptr;
+		break;
+	case IMC_DOMAIN_THREAD:
+		/* Update the pmu name */
+		pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
+		if (!pmu_ptr->pmu.name)
+			goto err;
+
+		thread_imc_mem_size = pmu_ptr->counter_mem_size;
+		for_each_online_cpu(cpu) {
+			res = thread_imc_mem_alloc(cpu, pmu_ptr->counter_mem_size);
+			if (res) {
+				cleanup_all_thread_imc_memory();
+				goto err;
+			}
+		}
+
+		thread_imc_pmu = pmu_ptr;
+		break;
+	case IMC_DOMAIN_TRACE:
+		/* Update the pmu name */
+		pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
+		if (!pmu_ptr->pmu.name)
+			return -ENOMEM;
+
+		nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
+		trace_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
+								GFP_KERNEL);
+		if (!trace_imc_refc)
+			return -ENOMEM;
+
+		trace_imc_mem_size = pmu_ptr->counter_mem_size;
+		for_each_online_cpu(cpu) {
+			res = trace_imc_mem_alloc(cpu, trace_imc_mem_size);
+			if (res) {
+				cleanup_all_trace_imc_memory();
+				goto err;
+			}
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+err:
+	return res;
+}
+
+/*
+ * init_imc_pmu : Setup and register the IMC pmu device.
+ *
+ * @parent:	Device tree unit node
+ * @pmu_ptr:	memory allocated for this pmu
+ * @pmu_idx:	Count of nest pmc registered
+ *
+ * init_imc_pmu() setup pmu cpumask and registers for a cpu hotplug callback.
+ * Handles failure cases and accordingly frees memory.
+ */
+int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_idx)
+{
+	int ret;
+
+	ret = imc_mem_init(pmu_ptr, parent, pmu_idx);
+	if (ret)
+		goto err_free_mem;
+
+	switch (pmu_ptr->domain) {
+	case IMC_DOMAIN_NEST:
+		/*
+		* Nest imc pmu need only one cpu per chip, we initialize the
+		* cpumask for the first nest imc pmu and use the same for the
+		* rest. To handle the cpuhotplug callback unregister, we track
+		* the number of nest pmus in "nest_pmus".
+		*/
+		mutex_lock(&nest_init_lock);
+		if (nest_pmus == 0) {
+			ret = init_nest_pmu_ref();
+			if (ret) {
+				mutex_unlock(&nest_init_lock);
+				kfree(per_nest_pmu_arr);
+				per_nest_pmu_arr = NULL;
+				goto err_free_mem;
+			}
+			/* Register for cpu hotplug notification. */
+			ret = nest_pmu_cpumask_init();
+			if (ret) {
+				mutex_unlock(&nest_init_lock);
+				kfree(nest_imc_refc);
+				kfree(per_nest_pmu_arr);
+				per_nest_pmu_arr = NULL;
+				goto err_free_mem;
+			}
+		}
+		nest_pmus++;
+		mutex_unlock(&nest_init_lock);
+		break;
+	case IMC_DOMAIN_CORE:
+		ret = core_imc_pmu_cpumask_init();
+		if (ret) {
+			cleanup_all_core_imc_memory();
+			goto err_free_mem;
+		}
+
+		break;
+	case IMC_DOMAIN_THREAD:
+		ret = thread_imc_cpu_init();
+		if (ret) {
+			cleanup_all_thread_imc_memory();
+			goto err_free_mem;
+		}
+
+		break;
+	case IMC_DOMAIN_TRACE:
+		ret = trace_imc_cpu_init();
+		if (ret) {
+			cleanup_all_trace_imc_memory();
+			goto err_free_mem;
+		}
+
+		break;
+	default:
+		return  -EINVAL;	/* Unknown domain */
+	}
+
+	ret = update_events_in_group(parent, pmu_ptr);
+	if (ret)
+		goto err_free_cpuhp_mem;
+
+	ret = update_pmu_ops(pmu_ptr);
+	if (ret)
+		goto err_free_cpuhp_mem;
+
+	ret = perf_pmu_register(&pmu_ptr->pmu, pmu_ptr->pmu.name, -1);
+	if (ret)
+		goto err_free_cpuhp_mem;
+
+	pr_debug("%s performance monitor hardware support registered\n",
+							pmu_ptr->pmu.name);
+
+	return 0;
+
+err_free_cpuhp_mem:
+	imc_common_cpuhp_mem_free(pmu_ptr);
+err_free_mem:
+	imc_common_mem_free(pmu_ptr);
+	return ret;
+}
diff --git a/arch/powerpc/perf/internal.h b/arch/powerpc/perf/internal.h
new file mode 100644
index 0000000000..4c18b55043
--- /dev/null
+++ b/arch/powerpc/perf/internal.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2019 Madhavan Srinivasan, IBM Corporation.
+
+int __init init_ppc970_pmu(void);
+int __init init_power5_pmu(void);
+int __init init_power5p_pmu(void);
+int __init init_power6_pmu(void);
+int __init init_power7_pmu(void);
+int __init init_power8_pmu(void);
+int __init init_power9_pmu(void);
+int __init init_power10_pmu(void);
+int __init init_generic_compat_pmu(void);
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
new file mode 100644
index 0000000000..56301b2bc8
--- /dev/null
+++ b/arch/powerpc/perf/isa207-common.c
@@ -0,0 +1,840 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Common Performance counter support functions for PowerISA v2.07 processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ * Copyright 2013 Michael Ellerman, IBM Corporation.
+ * Copyright 2016 Madhavan Srinivasan, IBM Corporation.
+ */
+#include "isa207-common.h"
+
+PMU_FORMAT_ATTR(event,		"config:0-49");
+PMU_FORMAT_ATTR(pmcxsel,	"config:0-7");
+PMU_FORMAT_ATTR(mark,		"config:8");
+PMU_FORMAT_ATTR(combine,	"config:11");
+PMU_FORMAT_ATTR(unit,		"config:12-15");
+PMU_FORMAT_ATTR(pmc,		"config:16-19");
+PMU_FORMAT_ATTR(cache_sel,	"config:20-23");
+PMU_FORMAT_ATTR(sample_mode,	"config:24-28");
+PMU_FORMAT_ATTR(thresh_sel,	"config:29-31");
+PMU_FORMAT_ATTR(thresh_stop,	"config:32-35");
+PMU_FORMAT_ATTR(thresh_start,	"config:36-39");
+PMU_FORMAT_ATTR(thresh_cmp,	"config:40-49");
+
+static struct attribute *isa207_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_pmcxsel.attr,
+	&format_attr_mark.attr,
+	&format_attr_combine.attr,
+	&format_attr_unit.attr,
+	&format_attr_pmc.attr,
+	&format_attr_cache_sel.attr,
+	&format_attr_sample_mode.attr,
+	&format_attr_thresh_sel.attr,
+	&format_attr_thresh_stop.attr,
+	&format_attr_thresh_start.attr,
+	&format_attr_thresh_cmp.attr,
+	NULL,
+};
+
+const struct attribute_group isa207_pmu_format_group = {
+	.name = "format",
+	.attrs = isa207_pmu_format_attr,
+};
+
+static inline bool event_is_fab_match(u64 event)
+{
+	/* Only check pmc, unit and pmcxsel, ignore the edge bit (0) */
+	event &= 0xff0fe;
+
+	/* PM_MRK_FAB_RSP_MATCH & PM_MRK_FAB_RSP_MATCH_CYC */
+	return (event == 0x30056 || event == 0x4f052);
+}
+
+static bool is_event_valid(u64 event)
+{
+	u64 valid_mask = EVENT_VALID_MASK;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		valid_mask = p10_EVENT_VALID_MASK;
+	else if (cpu_has_feature(CPU_FTR_ARCH_300))
+		valid_mask = p9_EVENT_VALID_MASK;
+
+	return !(event & ~valid_mask);
+}
+
+static inline bool is_event_marked(u64 event)
+{
+	if (event & EVENT_IS_MARKED)
+		return true;
+
+	return false;
+}
+
+static unsigned long sdar_mod_val(u64 event)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		return p10_SDAR_MODE(event);
+
+	return p9_SDAR_MODE(event);
+}
+
+static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
+{
+	/*
+	 * MMCRA[SDAR_MODE] specifies how the SDAR should be updated in
+	 * continuous sampling mode.
+	 *
+	 * Incase of Power8:
+	 * MMCRA[SDAR_MODE] will be programmed as "0b01" for continuous sampling
+	 * mode and will be un-changed when setting MMCRA[63] (Marked events).
+	 *
+	 * Incase of Power9/power10:
+	 * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'),
+	 *               or if group already have any marked events.
+	 * For rest
+	 *	MMCRA[SDAR_MODE] will be set from event code.
+	 *      If sdar_mode from event is zero, default to 0b01. Hardware
+	 *      requires that we set a non-zero value.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
+			*mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
+		else if (sdar_mod_val(event))
+			*mmcra |= sdar_mod_val(event) << MMCRA_SDAR_MODE_SHIFT;
+		else
+			*mmcra |= MMCRA_SDAR_MODE_DCACHE;
+	} else
+		*mmcra |= MMCRA_SDAR_MODE_TLB;
+}
+
+static int p10_thresh_cmp_val(u64 value)
+{
+	int exp = 0;
+	u64 result = value;
+
+	if (!value)
+		return value;
+
+	/*
+	 * Incase of P10, thresh_cmp value is not part of raw event code
+	 * and provided via attr.config1 parameter. To program threshold in MMCRA,
+	 * take a 18 bit number N and shift right 2 places and increment
+	 * the exponent E by 1 until the upper 10 bits of N are zero.
+	 * Write E to the threshold exponent and write the lower 8 bits of N
+	 * to the threshold mantissa.
+	 * The max threshold that can be written is 261120.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		if (value > 261120)
+			value = 261120;
+		while ((64 - __builtin_clzl(value)) > 8) {
+			exp++;
+			value >>= 2;
+		}
+
+		/*
+		 * Note that it is invalid to write a mantissa with the
+		 * upper 2 bits of mantissa being zero, unless the
+		 * exponent is also zero.
+		 */
+		if (!(value & 0xC0) && exp)
+			result = -1;
+		else
+			result = (exp << 8) | value;
+	}
+	return result;
+}
+
+static u64 thresh_cmp_val(u64 value)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		value = p10_thresh_cmp_val(value);
+
+	/*
+	 * Since location of threshold compare bits in MMCRA
+	 * is different for p8, using different shift value.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		return value << p9_MMCRA_THR_CMP_SHIFT;
+	else
+		return value << MMCRA_THR_CMP_SHIFT;
+}
+
+static unsigned long combine_from_event(u64 event)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		return p9_EVENT_COMBINE(event);
+
+	return EVENT_COMBINE(event);
+}
+
+static unsigned long combine_shift(unsigned long pmc)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		return p9_MMCR1_COMBINE_SHIFT(pmc);
+
+	return MMCR1_COMBINE_SHIFT(pmc);
+}
+
+static inline bool event_is_threshold(u64 event)
+{
+	return (event >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK;
+}
+
+static bool is_thresh_cmp_valid(u64 event)
+{
+	unsigned int cmp, exp;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		return p10_thresh_cmp_val(event) >= 0;
+
+	/*
+	 * Check the mantissa upper two bits are not zero, unless the
+	 * exponent is also zero. See the THRESH_CMP_MANTISSA doc.
+	 */
+
+	cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
+	exp = cmp >> 7;
+
+	if (exp && (cmp & 0x60) == 0)
+		return false;
+
+	return true;
+}
+
+static unsigned int dc_ic_rld_quad_l1_sel(u64 event)
+{
+	unsigned int cache;
+
+	cache = (event >> EVENT_CACHE_SEL_SHIFT) & MMCR1_DC_IC_QUAL_MASK;
+	return cache;
+}
+
+static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
+{
+	u64 ret = PERF_MEM_NA;
+
+	switch(idx) {
+	case 0:
+		/* Nothing to do */
+		break;
+	case 1:
+		ret = PH(LVL, L1) | LEVEL(L1) | P(SNOOP, HIT);
+		break;
+	case 2:
+		ret = PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT);
+		break;
+	case 3:
+		ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
+		break;
+	case 4:
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			ret = P(SNOOP, HIT);
+
+			if (sub_idx == 1)
+				ret |= PH(LVL, LOC_RAM) | LEVEL(RAM);
+			else if (sub_idx == 2 || sub_idx == 3)
+				ret |= P(LVL, HIT) | LEVEL(PMEM);
+			else if (sub_idx == 4)
+				ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | P(HOPS, 2);
+			else if (sub_idx == 5 || sub_idx == 7)
+				ret |= P(LVL, HIT) | LEVEL(PMEM) | REM;
+			else if (sub_idx == 6)
+				ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | P(HOPS, 3);
+		} else {
+			if (sub_idx <= 1)
+				ret = PH(LVL, LOC_RAM);
+			else if (sub_idx > 1 && sub_idx <= 2)
+				ret = PH(LVL, REM_RAM1);
+			else
+				ret = PH(LVL, REM_RAM2);
+			ret |= P(SNOOP, HIT);
+		}
+		break;
+	case 5:
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			ret = REM | P(HOPS, 0);
+
+			if (sub_idx == 0 || sub_idx == 4)
+				ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT);
+			else if (sub_idx == 1 || sub_idx == 5)
+				ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HITM);
+			else if (sub_idx == 2 || sub_idx == 6)
+				ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
+			else if (sub_idx == 3 || sub_idx == 7)
+				ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
+		} else {
+			if (sub_idx == 0)
+				ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HIT) | P(HOPS, 0);
+			else if (sub_idx == 1)
+				ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HITM) | P(HOPS, 0);
+			else if (sub_idx == 2 || sub_idx == 4)
+				ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HIT) | P(HOPS, 0);
+			else if (sub_idx == 3 || sub_idx == 5)
+				ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HITM) | P(HOPS, 0);
+		}
+		break;
+	case 6:
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			if (sub_idx == 0)
+				ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM |
+					P(SNOOP, HIT) | P(HOPS, 2);
+			else if (sub_idx == 1)
+				ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM |
+					P(SNOOP, HITM) | P(HOPS, 2);
+			else if (sub_idx == 2)
+				ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM |
+					P(SNOOP, HIT) | P(HOPS, 3);
+			else if (sub_idx == 3)
+				ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM |
+					P(SNOOP, HITM) | P(HOPS, 3);
+		} else {
+			ret = PH(LVL, REM_CCE2);
+			if (sub_idx == 0 || sub_idx == 2)
+				ret |= P(SNOOP, HIT);
+			else if (sub_idx == 1 || sub_idx == 3)
+				ret |= P(SNOOP, HITM);
+		}
+		break;
+	case 7:
+		ret = PM(LVL, L1);
+		break;
+	}
+
+	return ret;
+}
+
+void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
+							struct pt_regs *regs)
+{
+	u64 idx;
+	u32 sub_idx;
+	u64 sier;
+	u64 val;
+
+	/* Skip if no SIER support */
+	if (!(flags & PPMU_HAS_SIER)) {
+		dsrc->val = 0;
+		return;
+	}
+
+	sier = mfspr(SPRN_SIER);
+	val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
+	if (val != 1 && val != 2 && !(val == 7 && cpu_has_feature(CPU_FTR_ARCH_31)))
+		return;
+
+	idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT;
+	sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT;
+
+	dsrc->val = isa207_find_source(idx, sub_idx);
+	if (val == 7) {
+		u64 mmcra;
+		u32 op_type;
+
+		/*
+		 * Type 0b111 denotes either larx or stcx instruction. Use the
+		 * MMCRA sampling bits [57:59] along with the type value
+		 * to determine the exact instruction type. If the sampling
+		 * criteria is neither load or store, set the type as default
+		 * to NA.
+		 */
+		mmcra = mfspr(SPRN_MMCRA);
+
+		op_type = (mmcra >> MMCRA_SAMP_ELIG_SHIFT) & MMCRA_SAMP_ELIG_MASK;
+		switch (op_type) {
+		case 5:
+			dsrc->val |= P(OP, LOAD);
+			break;
+		case 7:
+			dsrc->val |= P(OP, STORE);
+			break;
+		default:
+			dsrc->val |= P(OP, NA);
+			break;
+		}
+	} else {
+		dsrc->val |= (val == 1) ? P(OP, LOAD) : P(OP, STORE);
+	}
+}
+
+void isa207_get_mem_weight(u64 *weight, u64 type)
+{
+	union perf_sample_weight *weight_fields;
+	u64 weight_lat;
+	u64 mmcra = mfspr(SPRN_MMCRA);
+	u64 exp = MMCRA_THR_CTR_EXP(mmcra);
+	u64 mantissa = MMCRA_THR_CTR_MANT(mmcra);
+	u64 sier = mfspr(SPRN_SIER);
+	u64 val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		mantissa = P10_MMCRA_THR_CTR_MANT(mmcra);
+
+	if (val == 0 || (val == 7 && !cpu_has_feature(CPU_FTR_ARCH_31)))
+		weight_lat = 0;
+	else
+		weight_lat = mantissa << (2 * exp);
+
+	/*
+	 * Use 64 bit weight field (full) if sample type is
+	 * WEIGHT.
+	 *
+	 * if sample type is WEIGHT_STRUCT:
+	 * - store memory latency in the lower 32 bits.
+	 * - For ISA v3.1, use remaining two 16 bit fields of
+	 *   perf_sample_weight to store cycle counter values
+	 *   from sier2.
+	 */
+	weight_fields = (union perf_sample_weight *)weight;
+	if (type & PERF_SAMPLE_WEIGHT)
+		weight_fields->full = weight_lat;
+	else {
+		weight_fields->var1_dw = (u32)weight_lat;
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			weight_fields->var2_w = P10_SIER2_FINISH_CYC(mfspr(SPRN_SIER2));
+			weight_fields->var3_w = P10_SIER2_DISPATCH_CYC(mfspr(SPRN_SIER2));
+		}
+	}
+}
+
+int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1)
+{
+	unsigned int unit, pmc, cache, ebb;
+	unsigned long mask, value;
+
+	mask = value = 0;
+
+	if (!is_event_valid(event))
+		return -1;
+
+	pmc   = (event >> EVENT_PMC_SHIFT)        & EVENT_PMC_MASK;
+	unit  = (event >> EVENT_UNIT_SHIFT)       & EVENT_UNIT_MASK;
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		cache = (event >> EVENT_CACHE_SEL_SHIFT) &
+			p10_EVENT_CACHE_SEL_MASK;
+	else
+		cache = (event >> EVENT_CACHE_SEL_SHIFT) &
+			EVENT_CACHE_SEL_MASK;
+	ebb   = (event >> EVENT_EBB_SHIFT)        & EVENT_EBB_MASK;
+
+	if (pmc) {
+		u64 base_event;
+
+		if (pmc > 6)
+			return -1;
+
+		/* Ignore Linux defined bits when checking event below */
+		base_event = event & ~EVENT_LINUX_MASK;
+
+		if (pmc >= 5 && base_event != 0x500fa &&
+				base_event != 0x600f4)
+			return -1;
+
+		mask  |= CNST_PMC_MASK(pmc);
+		value |= CNST_PMC_VAL(pmc);
+
+		/*
+		 * PMC5 and PMC6 are used to count cycles and instructions and
+		 * they do not support most of the constraint bits. Add a check
+		 * to exclude PMC5/6 from most of the constraints except for
+		 * EBB/BHRB.
+		 */
+		if (pmc >= 5)
+			goto ebb_bhrb;
+	}
+
+	if (pmc <= 4) {
+		/*
+		 * Add to number of counters in use. Note this includes events with
+		 * a PMC of 0 - they still need a PMC, it's just assigned later.
+		 * Don't count events on PMC 5 & 6, there is only one valid event
+		 * on each of those counters, and they are handled above.
+		 */
+		mask  |= CNST_NC_MASK;
+		value |= CNST_NC_VAL;
+	}
+
+	if (unit >= 6 && unit <= 9) {
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			if (unit == 6) {
+				mask |= CNST_L2L3_GROUP_MASK;
+				value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT);
+			}
+		} else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+			mask  |= CNST_CACHE_GROUP_MASK;
+			value |= CNST_CACHE_GROUP_VAL(event & 0xff);
+
+			mask |= CNST_CACHE_PMC4_MASK;
+			if (pmc == 4)
+				value |= CNST_CACHE_PMC4_VAL;
+		} else if (cache & 0x7) {
+			/*
+			 * L2/L3 events contain a cache selector field, which is
+			 * supposed to be programmed into MMCRC. However MMCRC is only
+			 * HV writable, and there is no API for guest kernels to modify
+			 * it. The solution is for the hypervisor to initialise the
+			 * field to zeroes, and for us to only ever allow events that
+			 * have a cache selector of zero. The bank selector (bit 3) is
+			 * irrelevant, as long as the rest of the value is 0.
+			 */
+			return -1;
+		}
+
+	} else if (cpu_has_feature(CPU_FTR_ARCH_300) || (event & EVENT_IS_L1)) {
+		mask  |= CNST_L1_QUAL_MASK;
+		value |= CNST_L1_QUAL_VAL(cache);
+	}
+
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		mask |= CNST_RADIX_SCOPE_GROUP_MASK;
+		value |= CNST_RADIX_SCOPE_GROUP_VAL(event >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT);
+	}
+
+	if (is_event_marked(event)) {
+		mask  |= CNST_SAMPLE_MASK;
+		value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
+	}
+
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		if (event_is_threshold(event) && is_thresh_cmp_valid(event_config1)) {
+			mask  |= CNST_THRESH_CTL_SEL_MASK;
+			value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT);
+			mask  |= p10_CNST_THRESH_CMP_MASK;
+			value |= p10_CNST_THRESH_CMP_VAL(p10_thresh_cmp_val(event_config1));
+		} else if (event_is_threshold(event))
+			return -1;
+	} else if (cpu_has_feature(CPU_FTR_ARCH_300))  {
+		if (event_is_threshold(event) && is_thresh_cmp_valid(event)) {
+			mask  |= CNST_THRESH_MASK;
+			value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT);
+		} else if (event_is_threshold(event))
+			return -1;
+	} else {
+		/*
+		 * Special case for PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC,
+		 * the threshold control bits are used for the match value.
+		 */
+		if (event_is_fab_match(event)) {
+			mask  |= CNST_FAB_MATCH_MASK;
+			value |= CNST_FAB_MATCH_VAL(event >> EVENT_THR_CTL_SHIFT);
+		} else {
+			if (!is_thresh_cmp_valid(event))
+				return -1;
+
+			mask  |= CNST_THRESH_MASK;
+			value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT);
+		}
+	}
+
+ebb_bhrb:
+	if (!pmc && ebb)
+		/* EBB events must specify the PMC */
+		return -1;
+
+	if (event & EVENT_WANTS_BHRB) {
+		if (!ebb)
+			/* Only EBB events can request BHRB */
+			return -1;
+
+		mask  |= CNST_IFM_MASK;
+		value |= CNST_IFM_VAL(event >> EVENT_IFM_SHIFT);
+	}
+
+	/*
+	 * All events must agree on EBB, either all request it or none.
+	 * EBB events are pinned & exclusive, so this should never actually
+	 * hit, but we leave it as a fallback in case.
+	 */
+	mask  |= CNST_EBB_MASK;
+	value |= CNST_EBB_VAL(ebb);
+
+	*maskp = mask;
+	*valp = value;
+
+	return 0;
+}
+
+int isa207_compute_mmcr(u64 event[], int n_ev,
+			       unsigned int hwc[], struct mmcr_regs *mmcr,
+			       struct perf_event *pevents[], u32 flags)
+{
+	unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val;
+	unsigned long mmcr3;
+	unsigned int pmc, pmc_inuse;
+	int i;
+
+	pmc_inuse = 0;
+
+	/* First pass to count resource use */
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
+		if (pmc)
+			pmc_inuse |= 1 << pmc;
+	}
+
+	mmcra = mmcr1 = mmcr2 = mmcr3 = 0;
+
+	/*
+	 * Disable bhrb unless explicitly requested
+	 * by setting MMCRA (BHRBRD) bit.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		mmcra |= MMCRA_BHRB_DISABLE;
+
+	/* Second pass: assign PMCs, set all MMCR1 fields */
+	for (i = 0; i < n_ev; ++i) {
+		pmc     = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
+		unit    = (event[i] >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK;
+		combine = combine_from_event(event[i]);
+		psel    =  event[i] & EVENT_PSEL_MASK;
+
+		if (!pmc) {
+			for (pmc = 1; pmc <= 4; ++pmc) {
+				if (!(pmc_inuse & (1 << pmc)))
+					break;
+			}
+
+			pmc_inuse |= 1 << pmc;
+		}
+
+		if (pmc <= 4) {
+			mmcr1 |= unit << MMCR1_UNIT_SHIFT(pmc);
+			mmcr1 |= combine << combine_shift(pmc);
+			mmcr1 |= psel << MMCR1_PMCSEL_SHIFT(pmc);
+		}
+
+		/* In continuous sampling mode, update SDAR on TLB miss */
+		mmcra_sdar_mode(event[i], &mmcra);
+
+		if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+			cache = dc_ic_rld_quad_l1_sel(event[i]);
+			mmcr1 |= (cache) << MMCR1_DC_IC_QUAL_SHIFT;
+		} else {
+			if (event[i] & EVENT_IS_L1) {
+				cache = dc_ic_rld_quad_l1_sel(event[i]);
+				mmcr1 |= (cache) << MMCR1_DC_IC_QUAL_SHIFT;
+			}
+		}
+
+		/* Set RADIX_SCOPE_QUAL bit */
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			val = (event[i] >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT) &
+				p10_EVENT_RADIX_SCOPE_QUAL_MASK;
+			mmcr1 |= val << p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT;
+		}
+
+		if (is_event_marked(event[i])) {
+			mmcra |= MMCRA_SAMPLE_ENABLE;
+
+			val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
+			if (val) {
+				mmcra |= (val &  3) << MMCRA_SAMP_MODE_SHIFT;
+				mmcra |= (val >> 2) << MMCRA_SAMP_ELIG_SHIFT;
+			}
+		}
+
+		/*
+		 * PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC,
+		 * the threshold bits are used for the match value.
+		 */
+		if (!cpu_has_feature(CPU_FTR_ARCH_300) && event_is_fab_match(event[i])) {
+			mmcr1 |= ((event[i] >> EVENT_THR_CTL_SHIFT) &
+				  EVENT_THR_CTL_MASK) << MMCR1_FAB_SHIFT;
+		} else {
+			val = (event[i] >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK;
+			mmcra |= val << MMCRA_THR_CTL_SHIFT;
+			val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK;
+			mmcra |= val << MMCRA_THR_SEL_SHIFT;
+			if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+				val = (event[i] >> EVENT_THR_CMP_SHIFT) &
+					EVENT_THR_CMP_MASK;
+				mmcra |= thresh_cmp_val(val);
+			} else if (flags & PPMU_HAS_ATTR_CONFIG1) {
+				val = (pevents[i]->attr.config1 >> p10_EVENT_THR_CMP_SHIFT) &
+					p10_EVENT_THR_CMP_MASK;
+				mmcra |= thresh_cmp_val(val);
+			}
+		}
+
+		if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) {
+			val = (event[i] >> p10_L2L3_EVENT_SHIFT) &
+				p10_EVENT_L2L3_SEL_MASK;
+			mmcr2 |= val << p10_L2L3_SEL_SHIFT;
+		}
+
+		if (event[i] & EVENT_WANTS_BHRB) {
+			val = (event[i] >> EVENT_IFM_SHIFT) & EVENT_IFM_MASK;
+			mmcra |= val << MMCRA_IFM_SHIFT;
+		}
+
+		/* set MMCRA (BHRBRD) to 0 if there is user request for BHRB */
+		if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+				(has_branch_stack(pevents[i]) || (event[i] & EVENT_WANTS_BHRB)))
+			mmcra &= ~MMCRA_BHRB_DISABLE;
+
+		if (pevents[i]->attr.exclude_user)
+			mmcr2 |= MMCR2_FCP(pmc);
+
+		if (pevents[i]->attr.exclude_hv)
+			mmcr2 |= MMCR2_FCH(pmc);
+
+		if (pevents[i]->attr.exclude_kernel) {
+			if (cpu_has_feature(CPU_FTR_HVMODE))
+				mmcr2 |= MMCR2_FCH(pmc);
+			else
+				mmcr2 |= MMCR2_FCS(pmc);
+		}
+
+		if (pevents[i]->attr.exclude_idle)
+			mmcr2 |= MMCR2_FCWAIT(pmc);
+
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			if (pmc <= 4) {
+				val = (event[i] >> p10_EVENT_MMCR3_SHIFT) &
+					p10_EVENT_MMCR3_MASK;
+				mmcr3 |= val << MMCR3_SHIFT(pmc);
+			}
+		}
+
+		hwc[i] = pmc - 1;
+	}
+
+	/* Return MMCRx values */
+	mmcr->mmcr0 = 0;
+
+	/* pmc_inuse is 1-based */
+	if (pmc_inuse & 2)
+		mmcr->mmcr0 = MMCR0_PMC1CE;
+
+	if (pmc_inuse & 0x7c)
+		mmcr->mmcr0 |= MMCR0_PMCjCE;
+
+	/* If we're not using PMC 5 or 6, freeze them */
+	if (!(pmc_inuse & 0x60))
+		mmcr->mmcr0 |= MMCR0_FC56;
+
+	/*
+	 * Set mmcr0 (PMCCEXT) for p10 which
+	 * will restrict access to group B registers
+	 * when MMCR0 PMCC=0b00.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		mmcr->mmcr0 |= MMCR0_PMCCEXT;
+
+	mmcr->mmcr1 = mmcr1;
+	mmcr->mmcra = mmcra;
+	mmcr->mmcr2 = mmcr2;
+	mmcr->mmcr3 = mmcr3;
+
+	return 0;
+}
+
+void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
+{
+	if (pmc <= 3)
+		mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1));
+}
+
+static int find_alternative(u64 event, const unsigned int ev_alt[][MAX_ALT], int size)
+{
+	int i, j;
+
+	for (i = 0; i < size; ++i) {
+		if (event < ev_alt[i][0])
+			break;
+
+		for (j = 0; j < MAX_ALT && ev_alt[i][j]; ++j)
+			if (event == ev_alt[i][j])
+				return i;
+	}
+
+	return -1;
+}
+
+int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
+					const unsigned int ev_alt[][MAX_ALT])
+{
+	int i, j, num_alt = 0;
+	u64 alt_event;
+
+	alt[num_alt++] = event;
+	i = find_alternative(event, ev_alt, size);
+	if (i >= 0) {
+		/* Filter out the original event, it's already in alt[0] */
+		for (j = 0; j < MAX_ALT; ++j) {
+			alt_event = ev_alt[i][j];
+			if (alt_event && alt_event != event)
+				alt[num_alt++] = alt_event;
+		}
+	}
+
+	if (flags & PPMU_ONLY_COUNT_RUN) {
+		/*
+		 * We're only counting in RUN state, so PM_CYC is equivalent to
+		 * PM_RUN_CYC and PM_INST_CMPL === PM_RUN_INST_CMPL.
+		 */
+		j = num_alt;
+		for (i = 0; i < num_alt; ++i) {
+			switch (alt[i]) {
+			case 0x1e:			/* PMC_CYC */
+				alt[j++] = 0x600f4;	/* PM_RUN_CYC */
+				break;
+			case 0x600f4:
+				alt[j++] = 0x1e;
+				break;
+			case 0x2:			/* PM_INST_CMPL */
+				alt[j++] = 0x500fa;	/* PM_RUN_INST_CMPL */
+				break;
+			case 0x500fa:
+				alt[j++] = 0x2;
+				break;
+			}
+		}
+		num_alt = j;
+	}
+
+	return num_alt;
+}
+
+int isa3XX_check_attr_config(struct perf_event *ev)
+{
+	u64 val, sample_mode;
+	u64 event = ev->attr.config;
+
+	val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
+	sample_mode = val & 0x3;
+
+	/*
+	 * MMCRA[61:62] is Random Sampling Mode (SM).
+	 * value of 0b11 is reserved.
+	 */
+	if (sample_mode == 0x3)
+		return -EINVAL;
+
+	/*
+	 * Check for all reserved value
+	 * Source: Performance Monitoring Unit User Guide
+	 */
+	switch (val) {
+	case 0x5:
+	case 0x9:
+	case 0xD:
+	case 0x19:
+	case 0x1D:
+	case 0x1A:
+	case 0x1E:
+		return -EINVAL;
+	}
+
+	/*
+	 * MMCRA[48:51]/[52:55]) Threshold Start/Stop
+	 * Events Selection.
+	 * 0b11110000/0b00001111 is reserved.
+	 */
+	val = (event >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK;
+	if (((val & 0xF0) == 0xF0) || ((val & 0xF) == 0xF))
+		return -EINVAL;
+
+	return 0;
+}
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
new file mode 100644
index 0000000000..f594fa6580
--- /dev/null
+++ b/arch/powerpc/perf/isa207-common.h
@@ -0,0 +1,293 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ * Copyright 2013 Michael Ellerman, IBM Corporation.
+ * Copyright 2016 Madhavan Srinivasan, IBM Corporation.
+ */
+
+#ifndef _LINUX_POWERPC_PERF_ISA207_COMMON_H_
+#define _LINUX_POWERPC_PERF_ISA207_COMMON_H_
+
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <asm/firmware.h>
+#include <asm/cputable.h>
+
+#include "internal.h"
+
+#define EVENT_EBB_MASK		1ull
+#define EVENT_EBB_SHIFT		PERF_EVENT_CONFIG_EBB_SHIFT
+#define EVENT_BHRB_MASK		1ull
+#define EVENT_BHRB_SHIFT	62
+#define EVENT_WANTS_BHRB	(EVENT_BHRB_MASK << EVENT_BHRB_SHIFT)
+#define EVENT_IFM_MASK		3ull
+#define EVENT_IFM_SHIFT		60
+#define EVENT_THR_CMP_SHIFT	40	/* Threshold CMP value */
+#define EVENT_THR_CMP_MASK	0x3ff
+#define EVENT_THR_CTL_SHIFT	32	/* Threshold control value (start/stop) */
+#define EVENT_THR_CTL_MASK	0xffull
+#define EVENT_THR_SEL_SHIFT	29	/* Threshold select value */
+#define EVENT_THR_SEL_MASK	0x7
+#define EVENT_THRESH_SHIFT	29	/* All threshold bits */
+#define EVENT_THRESH_MASK	0x1fffffull
+#define EVENT_SAMPLE_SHIFT	24	/* Sampling mode & eligibility */
+#define EVENT_SAMPLE_MASK	0x1f
+#define EVENT_CACHE_SEL_SHIFT	20	/* L2/L3 cache select */
+#define EVENT_CACHE_SEL_MASK	0xf
+#define EVENT_IS_L1		(4 << EVENT_CACHE_SEL_SHIFT)
+#define EVENT_PMC_SHIFT		16	/* PMC number (1-based) */
+#define EVENT_PMC_MASK		0xf
+#define EVENT_UNIT_SHIFT	12	/* Unit */
+#define EVENT_UNIT_MASK		0xf
+#define EVENT_COMBINE_SHIFT	11	/* Combine bit */
+#define EVENT_COMBINE_MASK	0x1
+#define EVENT_COMBINE(v)	(((v) >> EVENT_COMBINE_SHIFT) & EVENT_COMBINE_MASK)
+#define EVENT_MARKED_SHIFT	8	/* Marked bit */
+#define EVENT_MARKED_MASK	0x1
+#define EVENT_IS_MARKED		(EVENT_MARKED_MASK << EVENT_MARKED_SHIFT)
+#define EVENT_PSEL_MASK		0xff	/* PMCxSEL value */
+
+/* Bits defined by Linux */
+#define EVENT_LINUX_MASK	\
+	((EVENT_EBB_MASK  << EVENT_EBB_SHIFT)			|	\
+	 (EVENT_BHRB_MASK << EVENT_BHRB_SHIFT)			|	\
+	 (EVENT_IFM_MASK  << EVENT_IFM_SHIFT))
+
+#define EVENT_VALID_MASK	\
+	((EVENT_THRESH_MASK    << EVENT_THRESH_SHIFT)		|	\
+	 (EVENT_SAMPLE_MASK    << EVENT_SAMPLE_SHIFT)		|	\
+	 (EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT)	|	\
+	 (EVENT_PMC_MASK       << EVENT_PMC_SHIFT)		|	\
+	 (EVENT_UNIT_MASK      << EVENT_UNIT_SHIFT)		|	\
+	 (EVENT_COMBINE_MASK   << EVENT_COMBINE_SHIFT)		|	\
+	 (EVENT_MARKED_MASK    << EVENT_MARKED_SHIFT)		|	\
+	  EVENT_LINUX_MASK					|	\
+	  EVENT_PSEL_MASK)
+
+#define ONLY_PLM \
+	(PERF_SAMPLE_BRANCH_USER        |\
+	 PERF_SAMPLE_BRANCH_KERNEL      |\
+	 PERF_SAMPLE_BRANCH_HV)
+
+/* Contants to support power9 raw encoding format */
+#define p9_EVENT_COMBINE_SHIFT	10	/* Combine bit */
+#define p9_EVENT_COMBINE_MASK	0x3ull
+#define p9_EVENT_COMBINE(v)	(((v) >> p9_EVENT_COMBINE_SHIFT) & p9_EVENT_COMBINE_MASK)
+#define p9_SDAR_MODE_SHIFT	50
+#define p9_SDAR_MODE_MASK	0x3ull
+#define p9_SDAR_MODE(v)		(((v) >> p9_SDAR_MODE_SHIFT) & p9_SDAR_MODE_MASK)
+
+#define p9_EVENT_VALID_MASK		\
+	((p9_SDAR_MODE_MASK   << p9_SDAR_MODE_SHIFT		|	\
+	(EVENT_THRESH_MASK    << EVENT_THRESH_SHIFT)		|	\
+	(EVENT_SAMPLE_MASK    << EVENT_SAMPLE_SHIFT)		|	\
+	(EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT)		|	\
+	(EVENT_PMC_MASK       << EVENT_PMC_SHIFT)		|	\
+	(EVENT_UNIT_MASK      << EVENT_UNIT_SHIFT)		|	\
+	(p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT)	|	\
+	(EVENT_MARKED_MASK    << EVENT_MARKED_SHIFT)		|	\
+	 EVENT_LINUX_MASK					|	\
+	 EVENT_PSEL_MASK))
+
+/* Contants to support power10 raw encoding format */
+#define p10_SDAR_MODE_SHIFT		22
+#define p10_SDAR_MODE_MASK		0x3ull
+#define p10_SDAR_MODE(v)		(((v) >> p10_SDAR_MODE_SHIFT) & \
+					p10_SDAR_MODE_MASK)
+#define p10_EVENT_L2L3_SEL_MASK		0x1f
+#define p10_L2L3_SEL_SHIFT		3
+#define p10_L2L3_EVENT_SHIFT		40
+#define p10_EVENT_THRESH_MASK		0xffffull
+#define p10_EVENT_CACHE_SEL_MASK	0x3ull
+#define p10_EVENT_MMCR3_MASK		0x7fffull
+#define p10_EVENT_MMCR3_SHIFT		45
+#define p10_EVENT_RADIX_SCOPE_QUAL_SHIFT	9
+#define p10_EVENT_RADIX_SCOPE_QUAL_MASK	0x1
+#define p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT	45
+
+/* Event Threshold Compare bit constant for power10 in config1 attribute */
+#define p10_EVENT_THR_CMP_SHIFT        0
+#define p10_EVENT_THR_CMP_MASK 0x3FFFFull
+
+#define p10_EVENT_VALID_MASK		\
+	((p10_SDAR_MODE_MASK   << p10_SDAR_MODE_SHIFT		|	\
+	(p10_EVENT_THRESH_MASK  << EVENT_THRESH_SHIFT)		|	\
+	(EVENT_SAMPLE_MASK     << EVENT_SAMPLE_SHIFT)		|	\
+	(p10_EVENT_CACHE_SEL_MASK  << EVENT_CACHE_SEL_SHIFT)	|	\
+	(EVENT_PMC_MASK        << EVENT_PMC_SHIFT)		|	\
+	(EVENT_UNIT_MASK       << EVENT_UNIT_SHIFT)		|	\
+	(p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT)	|	\
+	(p10_EVENT_MMCR3_MASK  << p10_EVENT_MMCR3_SHIFT)	|	\
+	(EVENT_MARKED_MASK     << EVENT_MARKED_SHIFT)		|	\
+	(p10_EVENT_RADIX_SCOPE_QUAL_MASK << p10_EVENT_RADIX_SCOPE_QUAL_SHIFT)	|	\
+	 EVENT_LINUX_MASK					|	\
+	EVENT_PSEL_MASK))
+/*
+ * Layout of constraint bits:
+ *
+ *        60        56        52        48        44        40        36        32
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *   [   fab_match   ]         [       thresh_cmp      ] [   thresh_ctl    ] [   ]
+ *                                          |                                  |
+ *                           [  thresh_cmp bits for p10]           thresh_sel -*
+ *
+ *        28        24        20        16        12         8         4         0
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *               [ ] |   [ ] |  [  sample ]   [     ]   [6] [5]   [4] [3]   [2] [1]
+ *                |  |    |  |                  |
+ *      BHRB IFM -*  |    |  |*radix_scope      |      Count of events for each PMC.
+ *              EBB -*    |                     |        p1, p2, p3, p4, p5, p6.
+ *      L1 I/D qualifier -*                     |
+ *                     nc - number of counters -*
+ *
+ * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints
+ * we want the low bit of each field to be added to any existing value.
+ *
+ * Everything else is a value field.
+ */
+
+#define CNST_FAB_MATCH_VAL(v)	(((v) & EVENT_THR_CTL_MASK) << 56)
+#define CNST_FAB_MATCH_MASK	CNST_FAB_MATCH_VAL(EVENT_THR_CTL_MASK)
+
+/* We just throw all the threshold bits into the constraint */
+#define CNST_THRESH_VAL(v)	(((v) & EVENT_THRESH_MASK) << 32)
+#define CNST_THRESH_MASK	CNST_THRESH_VAL(EVENT_THRESH_MASK)
+
+#define CNST_THRESH_CTL_SEL_VAL(v)	(((v) & 0x7ffull) << 32)
+#define CNST_THRESH_CTL_SEL_MASK	CNST_THRESH_CTL_SEL_VAL(0x7ff)
+
+#define p10_CNST_THRESH_CMP_VAL(v) (((v) & 0x7ffull) << 43)
+#define p10_CNST_THRESH_CMP_MASK   p10_CNST_THRESH_CMP_VAL(0x7ff)
+
+#define CNST_EBB_VAL(v)		(((v) & EVENT_EBB_MASK) << 24)
+#define CNST_EBB_MASK		CNST_EBB_VAL(EVENT_EBB_MASK)
+
+#define CNST_IFM_VAL(v)		(((v) & EVENT_IFM_MASK) << 25)
+#define CNST_IFM_MASK		CNST_IFM_VAL(EVENT_IFM_MASK)
+
+#define CNST_L1_QUAL_VAL(v)	(((v) & 3) << 22)
+#define CNST_L1_QUAL_MASK	CNST_L1_QUAL_VAL(3)
+
+#define CNST_SAMPLE_VAL(v)	(((v) & EVENT_SAMPLE_MASK) << 16)
+#define CNST_SAMPLE_MASK	CNST_SAMPLE_VAL(EVENT_SAMPLE_MASK)
+
+#define CNST_CACHE_GROUP_VAL(v)	(((v) & 0xffull) << 55)
+#define CNST_CACHE_GROUP_MASK	CNST_CACHE_GROUP_VAL(0xff)
+#define CNST_CACHE_PMC4_VAL	(1ull << 54)
+#define CNST_CACHE_PMC4_MASK	CNST_CACHE_PMC4_VAL
+
+#define CNST_L2L3_GROUP_VAL(v)	(((v) & 0x1full) << 55)
+#define CNST_L2L3_GROUP_MASK	CNST_L2L3_GROUP_VAL(0x1f)
+
+#define CNST_RADIX_SCOPE_GROUP_VAL(v)	(((v) & 0x1ull) << 21)
+#define CNST_RADIX_SCOPE_GROUP_MASK	CNST_RADIX_SCOPE_GROUP_VAL(1)
+
+/*
+ * For NC we are counting up to 4 events. This requires three bits, and we need
+ * the fifth event to overflow and set the 4th bit. To achieve that we bias the
+ * fields by 3 in test_adder.
+ */
+#define CNST_NC_SHIFT		12
+#define CNST_NC_VAL		(1 << CNST_NC_SHIFT)
+#define CNST_NC_MASK		(8 << CNST_NC_SHIFT)
+#define ISA207_TEST_ADDER	(3 << CNST_NC_SHIFT)
+
+/*
+ * For the per-PMC fields we have two bits. The low bit is added, so if two
+ * events ask for the same PMC the sum will overflow, setting the high bit,
+ * indicating an error. So our mask sets the high bit.
+ */
+#define CNST_PMC_SHIFT(pmc)	((pmc - 1) * 2)
+#define CNST_PMC_VAL(pmc)	(1 << CNST_PMC_SHIFT(pmc))
+#define CNST_PMC_MASK(pmc)	(2 << CNST_PMC_SHIFT(pmc))
+
+/* Our add_fields is defined as: */
+#define ISA207_ADD_FIELDS	\
+	CNST_PMC_VAL(1) | CNST_PMC_VAL(2) | CNST_PMC_VAL(3) | \
+	CNST_PMC_VAL(4) | CNST_PMC_VAL(5) | CNST_PMC_VAL(6) | CNST_NC_VAL
+
+/* Bits in MMCR1 for PowerISA v2.07 */
+#define MMCR1_UNIT_SHIFT(pmc)		(60 - (4 * ((pmc) - 1)))
+#define MMCR1_COMBINE_SHIFT(pmc)	(35 - ((pmc) - 1))
+#define MMCR1_PMCSEL_SHIFT(pmc)		(24 - (((pmc) - 1)) * 8)
+#define MMCR1_FAB_SHIFT			36
+#define MMCR1_DC_IC_QUAL_MASK		0x3
+#define MMCR1_DC_IC_QUAL_SHIFT		46
+
+/* MMCR1 Combine bits macro for power9 */
+#define p9_MMCR1_COMBINE_SHIFT(pmc)	(38 - ((pmc - 1) * 2))
+
+/* Bits in MMCRA for PowerISA v2.07 */
+#define MMCRA_SAMP_MODE_SHIFT		1
+#define MMCRA_SAMP_ELIG_SHIFT		4
+#define MMCRA_SAMP_ELIG_MASK		7
+#define MMCRA_THR_CTL_SHIFT		8
+#define MMCRA_THR_SEL_SHIFT		16
+#define MMCRA_THR_CMP_SHIFT		32
+#define MMCRA_SDAR_MODE_SHIFT		42
+#define MMCRA_SDAR_MODE_TLB		(1ull << MMCRA_SDAR_MODE_SHIFT)
+#define MMCRA_SDAR_MODE_NO_UPDATES	~(0x3ull << MMCRA_SDAR_MODE_SHIFT)
+#define MMCRA_SDAR_MODE_DCACHE		(2ull << MMCRA_SDAR_MODE_SHIFT)
+#define MMCRA_IFM_SHIFT			30
+#define MMCRA_THR_CTR_MANT_SHIFT	19
+#define MMCRA_THR_CTR_MANT_MASK		0x7Ful
+#define MMCRA_THR_CTR_MANT(v)		(((v) >> MMCRA_THR_CTR_MANT_SHIFT) &\
+						MMCRA_THR_CTR_MANT_MASK)
+
+#define MMCRA_THR_CTR_EXP_SHIFT		27
+#define MMCRA_THR_CTR_EXP_MASK		0x7ul
+#define MMCRA_THR_CTR_EXP(v)		(((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\
+						MMCRA_THR_CTR_EXP_MASK)
+
+#define P10_MMCRA_THR_CTR_MANT_MASK	0xFFul
+#define P10_MMCRA_THR_CTR_MANT(v)	(((v) >> MMCRA_THR_CTR_MANT_SHIFT) &\
+						P10_MMCRA_THR_CTR_MANT_MASK)
+
+/* MMCRA Threshold Compare bit constant for power9 */
+#define p9_MMCRA_THR_CMP_SHIFT	45
+
+/* Bits in MMCR2 for PowerISA v2.07 */
+#define MMCR2_FCS(pmc)			(1ull << (63 - (((pmc) - 1) * 9)))
+#define MMCR2_FCP(pmc)			(1ull << (62 - (((pmc) - 1) * 9)))
+#define MMCR2_FCWAIT(pmc)		(1ull << (58 - (((pmc) - 1) * 9)))
+#define MMCR2_FCH(pmc)			(1ull << (57 - (((pmc) - 1) * 9)))
+
+#define MAX_ALT				2
+#define MAX_PMU_COUNTERS		6
+
+/* Bits in MMCR3 for PowerISA v3.10 */
+#define MMCR3_SHIFT(pmc)		(49 - (15 * ((pmc) - 1)))
+
+#define ISA207_SIER_TYPE_SHIFT		15
+#define ISA207_SIER_TYPE_MASK		(0x7ull << ISA207_SIER_TYPE_SHIFT)
+
+#define ISA207_SIER_LDST_SHIFT		1
+#define ISA207_SIER_LDST_MASK		(0x7ull << ISA207_SIER_LDST_SHIFT)
+
+#define ISA207_SIER_DATA_SRC_SHIFT	53
+#define ISA207_SIER_DATA_SRC_MASK	(0x7ull << ISA207_SIER_DATA_SRC_SHIFT)
+
+/* Bits in SIER2/SIER3 for Power10 */
+#define P10_SIER2_FINISH_CYC(sier2)	(((sier2) >> (63 - 37)) & 0x7fful)
+#define P10_SIER2_DISPATCH_CYC(sier2)	(((sier2) >> (63 - 13)) & 0x7fful)
+
+#define P(a, b)				PERF_MEM_S(a, b)
+#define PH(a, b)			(P(LVL, HIT) | P(a, b))
+#define PM(a, b)			(P(LVL, MISS) | P(a, b))
+#define LEVEL(x)			P(LVLNUM, x)
+#define REM				P(REMOTE, REMOTE)
+
+int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1);
+int isa207_compute_mmcr(u64 event[], int n_ev,
+				unsigned int hwc[], struct mmcr_regs *mmcr,
+				struct perf_event *pevents[], u32 flags);
+void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr);
+int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
+					const unsigned int ev_alt[][MAX_ALT]);
+void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
+							struct pt_regs *regs);
+void isa207_get_mem_weight(u64 *weight, u64 type);
+
+int isa3XX_check_attr_config(struct perf_event *ev);
+
+#endif
diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c
new file mode 100644
index 0000000000..db451b9aac
--- /dev/null
+++ b/arch/powerpc/perf/mpc7450-pmu.c
@@ -0,0 +1,428 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter support for MPC7450-family processors.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ */
+#include <linux/string.h>
+#include <linux/perf_event.h>
+#include <asm/reg.h>
+#include <asm/cputable.h>
+
+#define N_COUNTER	6	/* Number of hardware counters */
+#define MAX_ALT		3	/* Maximum number of event alternative codes */
+
+/*
+ * Bits in event code for MPC7450 family
+ */
+#define PM_THRMULT_MSKS	0x40000
+#define PM_THRESH_SH	12
+#define PM_THRESH_MSK	0x3f
+#define PM_PMC_SH	8
+#define PM_PMC_MSK	7
+#define PM_PMCSEL_MSK	0x7f
+
+/*
+ * Classify events according to how specific their PMC requirements are.
+ * Result is:
+ *	0: can go on any PMC
+ *	1: can go on PMCs 1-4
+ *	2: can go on PMCs 1,2,4
+ *	3: can go on PMCs 1 or 2
+ *	4: can only go on one PMC
+ *	-1: event code is invalid
+ */
+#define N_CLASSES	5
+
+static int mpc7450_classify_event(u32 event)
+{
+	int pmc;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc) {
+		if (pmc > N_COUNTER)
+			return -1;
+		return 4;
+	}
+	event &= PM_PMCSEL_MSK;
+	if (event <= 1)
+		return 0;
+	if (event <= 7)
+		return 1;
+	if (event <= 13)
+		return 2;
+	if (event <= 22)
+		return 3;
+	return -1;
+}
+
+/*
+ * Events using threshold and possible threshold scale:
+ *	code	scale?	name
+ *	11e	N	PM_INSTQ_EXCEED_CYC
+ *	11f	N	PM_ALTV_IQ_EXCEED_CYC
+ *	128	Y	PM_DTLB_SEARCH_EXCEED_CYC
+ *	12b	Y	PM_LD_MISS_EXCEED_L1_CYC
+ *	220	N	PM_CQ_EXCEED_CYC
+ *	30c	N	PM_GPR_RB_EXCEED_CYC
+ *	30d	?	PM_FPR_IQ_EXCEED_CYC ?
+ *	311	Y	PM_ITLB_SEARCH_EXCEED
+ *	410	N	PM_GPR_IQ_EXCEED_CYC
+ */
+
+/*
+ * Return use of threshold and threshold scale bits:
+ * 0 = uses neither, 1 = uses threshold, 2 = uses both
+ */
+static int mpc7450_threshold_use(u32 event)
+{
+	int pmc, sel;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	sel = event & PM_PMCSEL_MSK;
+	switch (pmc) {
+	case 1:
+		if (sel == 0x1e || sel == 0x1f)
+			return 1;
+		if (sel == 0x28 || sel == 0x2b)
+			return 2;
+		break;
+	case 2:
+		if (sel == 0x20)
+			return 1;
+		break;
+	case 3:
+		if (sel == 0xc || sel == 0xd)
+			return 1;
+		if (sel == 0x11)
+			return 2;
+		break;
+	case 4:
+		if (sel == 0x10)
+			return 1;
+		break;
+	}
+	return 0;
+}
+
+/*
+ * Layout of constraint bits:
+ * 33222222222211111111110000000000
+ * 10987654321098765432109876543210
+ *  |<    ><  > < > < ><><><><><><>
+ *  TS TV   G4   G3  G2P6P5P4P3P2P1
+ *
+ * P1 - P6
+ *	0 - 11: Count of events needing PMC1 .. PMC6
+ *
+ * G2
+ *	12 - 14: Count of events needing PMC1 or PMC2
+ *
+ * G3
+ *	16 - 18: Count of events needing PMC1, PMC2 or PMC4
+ *
+ * G4
+ *	20 - 23: Count of events needing PMC1, PMC2, PMC3 or PMC4
+ *
+ * TV
+ *	24 - 29: Threshold value requested
+ *
+ * TS
+ *	30: Threshold scale value requested
+ */
+
+static u32 pmcbits[N_COUNTER][2] = {
+	{ 0x00844002, 0x00111001 },	/* PMC1 mask, value: P1,G2,G3,G4 */
+	{ 0x00844008, 0x00111004 },	/* PMC2: P2,G2,G3,G4 */
+	{ 0x00800020, 0x00100010 },	/* PMC3: P3,G4 */
+	{ 0x00840080, 0x00110040 },	/* PMC4: P4,G3,G4 */
+	{ 0x00000200, 0x00000100 },	/* PMC5: P5 */
+	{ 0x00000800, 0x00000400 }	/* PMC6: P6 */
+};
+
+static u32 classbits[N_CLASSES - 1][2] = {
+	{ 0x00000000, 0x00000000 },	/* class 0: no constraint */
+	{ 0x00800000, 0x00100000 },	/* class 1: G4 */
+	{ 0x00040000, 0x00010000 },	/* class 2: G3 */
+	{ 0x00004000, 0x00001000 },	/* class 3: G2 */
+};
+
+static int mpc7450_get_constraint(u64 event, unsigned long *maskp,
+				  unsigned long *valp, u64 event_config1 __maybe_unused)
+{
+	int pmc, class;
+	u32 mask, value;
+	int thresh, tuse;
+
+	class = mpc7450_classify_event(event);
+	if (class < 0)
+		return -1;
+	if (class == 4) {
+		pmc = ((unsigned int)event >> PM_PMC_SH) & PM_PMC_MSK;
+		mask  = pmcbits[pmc - 1][0];
+		value = pmcbits[pmc - 1][1];
+	} else {
+		mask  = classbits[class][0];
+		value = classbits[class][1];
+	}
+
+	tuse = mpc7450_threshold_use(event);
+	if (tuse) {
+		thresh = ((unsigned int)event >> PM_THRESH_SH) & PM_THRESH_MSK;
+		mask  |= 0x3f << 24;
+		value |= thresh << 24;
+		if (tuse == 2) {
+			mask |= 0x40000000;
+			if ((unsigned int)event & PM_THRMULT_MSKS)
+				value |= 0x40000000;
+		}
+	}
+
+	*maskp = mask;
+	*valp = value;
+	return 0;
+}
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+	{ 0x217, 0x317 },		/* PM_L1_DCACHE_MISS */
+	{ 0x418, 0x50f, 0x60f },	/* PM_SNOOP_RETRY */
+	{ 0x502, 0x602 },		/* PM_L2_HIT */
+	{ 0x503, 0x603 },		/* PM_L3_HIT */
+	{ 0x504, 0x604 },		/* PM_L2_ICACHE_MISS */
+	{ 0x505, 0x605 },		/* PM_L3_ICACHE_MISS */
+	{ 0x506, 0x606 },		/* PM_L2_DCACHE_MISS */
+	{ 0x507, 0x607 },		/* PM_L3_DCACHE_MISS */
+	{ 0x50a, 0x623 },		/* PM_LD_HIT_L3 */
+	{ 0x50b, 0x624 },		/* PM_ST_HIT_L3 */
+	{ 0x50d, 0x60d },		/* PM_L2_TOUCH_HIT */
+	{ 0x50e, 0x60e },		/* PM_L3_TOUCH_HIT */
+	{ 0x512, 0x612 },		/* PM_INT_LOCAL */
+	{ 0x513, 0x61d },		/* PM_L2_MISS */
+	{ 0x514, 0x61e },		/* PM_L3_MISS */
+};
+
+/*
+ * Scan the alternatives table for a match and return the
+ * index into the alternatives table if found, else -1.
+ */
+static int find_alternative(u32 event)
+{
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+		if (event < event_alternatives[i][0])
+			break;
+		for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
+			if (event == event_alternatives[i][j])
+				return i;
+	}
+	return -1;
+}
+
+static int mpc7450_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	int i, j, nalt = 1;
+	u32 ae;
+
+	alt[0] = event;
+	nalt = 1;
+	i = find_alternative((u32)event);
+	if (i >= 0) {
+		for (j = 0; j < MAX_ALT; ++j) {
+			ae = event_alternatives[i][j];
+			if (ae && ae != (u32)event)
+				alt[nalt++] = ae;
+		}
+	}
+	return nalt;
+}
+
+/*
+ * Bitmaps of which PMCs each class can use for classes 0 - 3.
+ * Bit i is set if PMC i+1 is usable.
+ */
+static const u8 classmap[N_CLASSES] = {
+	0x3f, 0x0f, 0x0b, 0x03, 0
+};
+
+/* Bit position and width of each PMCSEL field */
+static const int pmcsel_shift[N_COUNTER] = {
+	6,	0,	27,	22,	17,	11
+};
+static const u32 pmcsel_mask[N_COUNTER] = {
+	0x7f,	0x3f,	0x1f,	0x1f,	0x1f,	0x3f
+};
+
+/*
+ * Compute MMCR0/1/2 values for a set of events.
+ */
+static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[],
+				struct mmcr_regs *mmcr,
+				struct perf_event *pevents[],
+				u32 flags __maybe_unused)
+{
+	u8 event_index[N_CLASSES][N_COUNTER];
+	int n_classevent[N_CLASSES];
+	int i, j, class, tuse;
+	u32 pmc_inuse = 0, pmc_avail;
+	u32 mmcr0 = 0, mmcr1 = 0, mmcr2 = 0;
+	u32 ev, pmc, thresh;
+
+	if (n_ev > N_COUNTER)
+		return -1;
+
+	/* First pass: count usage in each class */
+	for (i = 0; i < N_CLASSES; ++i)
+		n_classevent[i] = 0;
+	for (i = 0; i < n_ev; ++i) {
+		class = mpc7450_classify_event(event[i]);
+		if (class < 0)
+			return -1;
+		j = n_classevent[class]++;
+		event_index[class][j] = i;
+	}
+
+	/* Second pass: allocate PMCs from most specific event to least */
+	for (class = N_CLASSES - 1; class >= 0; --class) {
+		for (i = 0; i < n_classevent[class]; ++i) {
+			ev = event[event_index[class][i]];
+			if (class == 4) {
+				pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK;
+				if (pmc_inuse & (1 << (pmc - 1)))
+					return -1;
+			} else {
+				/* Find a suitable PMC */
+				pmc_avail = classmap[class] & ~pmc_inuse;
+				if (!pmc_avail)
+					return -1;
+				pmc = ffs(pmc_avail);
+			}
+			pmc_inuse |= 1 << (pmc - 1);
+
+			tuse = mpc7450_threshold_use(ev);
+			if (tuse) {
+				thresh = (ev >> PM_THRESH_SH) & PM_THRESH_MSK;
+				mmcr0 |= thresh << 16;
+				if (tuse == 2 && (ev & PM_THRMULT_MSKS))
+					mmcr2 = 0x80000000;
+			}
+			ev &= pmcsel_mask[pmc - 1];
+			ev <<= pmcsel_shift[pmc - 1];
+			if (pmc <= 2)
+				mmcr0 |= ev;
+			else
+				mmcr1 |= ev;
+			hwc[event_index[class][i]] = pmc - 1;
+		}
+	}
+
+	if (pmc_inuse & 1)
+		mmcr0 |= MMCR0_PMC1CE;
+	if (pmc_inuse & 0x3e)
+		mmcr0 |= MMCR0_PMCnCE;
+
+	/* Return MMCRx values */
+	mmcr->mmcr0 = mmcr0;
+	mmcr->mmcr1 = mmcr1;
+	mmcr->mmcr2 = mmcr2;
+	/*
+	 * 32-bit doesn't have an MMCRA and uses SPRN_MMCR2 to define
+	 * SPRN_MMCRA. So assign mmcra of cpu_hw_events with `mmcr2`
+	 * value to ensure that any write to this SPRN_MMCRA will
+	 * use mmcr2 value.
+	 */
+	mmcr->mmcra = mmcr2;
+	return 0;
+}
+
+/*
+ * Disable counting by a PMC.
+ * Note that the pmc argument is 0-based here, not 1-based.
+ */
+static void mpc7450_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
+{
+	if (pmc <= 1)
+		mmcr->mmcr0 &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
+	else
+		mmcr->mmcr1 &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
+}
+
+static int mpc7450_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= 1,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 2,
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x217, /* PM_L1_DCACHE_MISS */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x122, /* PM_BR_CMPL */
+	[PERF_COUNT_HW_BRANCH_MISSES] 		= 0x41c, /* PM_BR_MPRED */
+};
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static u64 mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x225	},
+		[C(OP_WRITE)] = {	0,		0x227	},
+		[C(OP_PREFETCH)] = {	0,		0	},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x129,		0x115	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	0x634,		0	},
+	},
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0	},
+		[C(OP_WRITE)] = {	0,		0	},
+		[C(OP_PREFETCH)] = {	0,		0	},
+	},
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x312	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x223	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x122,		0x41c	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	-1,		-1	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+};
+
+struct power_pmu mpc7450_pmu = {
+	.name			= "MPC7450 family",
+	.n_counter		= N_COUNTER,
+	.max_alternatives	= MAX_ALT,
+	.add_fields		= 0x00111555ul,
+	.test_adder		= 0x00301000ul,
+	.compute_mmcr		= mpc7450_compute_mmcr,
+	.get_constraint		= mpc7450_get_constraint,
+	.get_alternatives	= mpc7450_get_alternatives,
+	.disable_pmc		= mpc7450_disable_pmc,
+	.n_generic		= ARRAY_SIZE(mpc7450_generic_events),
+	.generic_events		= mpc7450_generic_events,
+	.cache_events		= &mpc7450_cache_events,
+};
+
+static int __init init_mpc7450_pmu(void)
+{
+	if (!pvr_version_is(PVR_VER_7450) && !pvr_version_is(PVR_VER_7455) &&
+	    !pvr_version_is(PVR_VER_7447) && !pvr_version_is(PVR_VER_7447A) &&
+	    !pvr_version_is(PVR_VER_7448))
+		return -ENODEV;
+
+	return register_power_pmu(&mpc7450_pmu);
+}
+
+early_initcall(init_mpc7450_pmu);
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
new file mode 100644
index 0000000000..350dccb014
--- /dev/null
+++ b/arch/powerpc/perf/perf_regs.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2016 Anju T, IBM Corporation.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/perf_event.h>
+#include <linux/bug.h>
+#include <linux/stddef.h>
+#include <asm/ptrace.h>
+#include <asm/perf_regs.h>
+
+u64 PERF_REG_EXTENDED_MASK;
+
+#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
+
+#define REG_RESERVED (~(PERF_REG_EXTENDED_MASK | PERF_REG_PMU_MASK))
+
+static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R0,  gpr[0]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R1,  gpr[1]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R2,  gpr[2]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R3,  gpr[3]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R4,  gpr[4]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R5,  gpr[5]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R6,  gpr[6]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R7,  gpr[7]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R8,  gpr[8]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R9,  gpr[9]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R10, gpr[10]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R11, gpr[11]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R12, gpr[12]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R13, gpr[13]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R14, gpr[14]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R15, gpr[15]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R16, gpr[16]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R17, gpr[17]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R18, gpr[18]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R19, gpr[19]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R20, gpr[20]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R21, gpr[21]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R22, gpr[22]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R23, gpr[23]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R24, gpr[24]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R25, gpr[25]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R26, gpr[26]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R27, gpr[27]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R28, gpr[28]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R29, gpr[29]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R30, gpr[30]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_R31, gpr[31]),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_NIP, nip),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_MSR, msr),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_ORIG_R3, orig_gpr3),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_CTR, ctr),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_LINK, link),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_XER, xer),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_CCR, ccr),
+#ifdef CONFIG_PPC64
+	PT_REGS_OFFSET(PERF_REG_POWERPC_SOFTE, softe),
+#else
+	PT_REGS_OFFSET(PERF_REG_POWERPC_SOFTE, mq),
+#endif
+	PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_SIER, dar),
+	PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr),
+};
+
+/* Function to return the extended register values */
+static u64 get_ext_regs_value(int idx)
+{
+	switch (idx) {
+	case PERF_REG_POWERPC_PMC1 ... PERF_REG_POWERPC_PMC6:
+		return get_pmcs_ext_regs(idx - PERF_REG_POWERPC_PMC1);
+	case PERF_REG_POWERPC_MMCR0:
+		return mfspr(SPRN_MMCR0);
+	case PERF_REG_POWERPC_MMCR1:
+		return mfspr(SPRN_MMCR1);
+	case PERF_REG_POWERPC_MMCR2:
+		return mfspr(SPRN_MMCR2);
+#ifdef CONFIG_PPC64
+	case PERF_REG_POWERPC_MMCR3:
+		return mfspr(SPRN_MMCR3);
+	case PERF_REG_POWERPC_SIER2:
+		return mfspr(SPRN_SIER2);
+	case PERF_REG_POWERPC_SIER3:
+		return mfspr(SPRN_SIER3);
+	case PERF_REG_POWERPC_SDAR:
+		return mfspr(SPRN_SDAR);
+#endif
+	case PERF_REG_POWERPC_SIAR:
+		return mfspr(SPRN_SIAR);
+	default: return 0;
+	}
+}
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+	if (idx == PERF_REG_POWERPC_SIER &&
+	   (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
+	    IS_ENABLED(CONFIG_PPC32) ||
+	    !is_sier_available()))
+		return 0;
+
+	if (idx == PERF_REG_POWERPC_MMCRA &&
+	   (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
+	    IS_ENABLED(CONFIG_PPC32)))
+		return 0;
+
+	if (idx >= PERF_REG_POWERPC_MAX && idx < PERF_REG_EXTENDED_MAX)
+		return get_ext_regs_value(idx);
+
+	/*
+	 * If the idx is referring to value beyond the
+	 * supported registers, return 0 with a warning
+	 */
+	if (WARN_ON_ONCE(idx >= PERF_REG_EXTENDED_MAX))
+		return 0;
+
+	return regs_get_register(regs, pt_regs_offset[idx]);
+}
+
+int perf_reg_validate(u64 mask)
+{
+	if (!mask || mask & REG_RESERVED)
+		return -EINVAL;
+	return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+	if (is_tsk_32bit_task(task))
+		return PERF_SAMPLE_REGS_ABI_32;
+	else
+		return PERF_SAMPLE_REGS_ABI_64;
+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+			struct pt_regs *regs)
+{
+	regs_user->regs = task_pt_regs(current);
+	regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
+			 PERF_SAMPLE_REGS_ABI_NONE;
+}
diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h
new file mode 100644
index 0000000000..564f14097f
--- /dev/null
+++ b/arch/powerpc/perf/power10-events-list.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Performance counter support for POWER10 processors.
+ *
+ * Copyright 2020 Madhavan Srinivasan, IBM Corporation.
+ * Copyright 2020 Athira Rajeev, IBM Corporation.
+ */
+
+/*
+ * Power10 event codes.
+ */
+EVENT(PM_CYC,				0x600f4);
+EVENT(PM_DISP_STALL_CYC,			0x100f8);
+EVENT(PM_EXEC_STALL,				0x30008);
+EVENT(PM_INST_CMPL,				0x500fa);
+EVENT(PM_BR_CMPL,                               0x4d05e);
+EVENT(PM_BR_MPRED_CMPL,                         0x400f6);
+EVENT(PM_BR_FIN,				0x2f04a);
+EVENT(PM_MPRED_BR_FIN,				0x3e098);
+EVENT(PM_LD_DEMAND_MISS_L1_FIN,			0x400f0);
+
+/* All L1 D cache load references counted at finish, gated by reject */
+EVENT(PM_LD_REF_L1,				0x100fc);
+/* Load Missed L1 */
+EVENT(PM_LD_MISS_L1,				0x3e054);
+/* Store Missed L1 */
+EVENT(PM_ST_MISS_L1,				0x300f0);
+/* L1 cache data prefetches */
+EVENT(PM_LD_PREFETCH_CACHE_LINE_MISS,		0x1002c);
+/* Demand iCache Miss */
+EVENT(PM_L1_ICACHE_MISS,			0x200fc);
+/* Instruction fetches from L1 */
+EVENT(PM_INST_FROM_L1,				0x04080);
+/* Instruction Demand sectors wriittent into IL1 */
+EVENT(PM_INST_FROM_L1MISS,			0x03f00000001c040);
+/* Instruction prefetch written into IL1 */
+EVENT(PM_IC_PREF_REQ,				0x040a0);
+/* The data cache was reloaded from local core's L3 due to a demand load */
+EVENT(PM_DATA_FROM_L3,				0x01340000001c040);
+/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
+EVENT(PM_DATA_FROM_L3MISS,			0x300fe);
+/* All successful D-side store dispatches for this thread */
+EVENT(PM_L2_ST,					0x010000046080);
+/* All successful D-side store dispatches for this thread that were L2 Miss */
+EVENT(PM_L2_ST_MISS,				0x26880);
+/* Total HW L3 prefetches(Load+store) */
+EVENT(PM_L3_PF_MISS_L3,				0x100000016080);
+/* Data PTEG reload */
+EVENT(PM_DTLB_MISS,				0x300fc);
+/* ITLB Reloaded */
+EVENT(PM_ITLB_MISS,				0x400fc);
+
+EVENT(PM_CYC_ALT,				0x0001e);
+EVENT(PM_INST_CMPL_ALT,				0x00002);
+
+/*
+ * Memory Access Events
+ *
+ * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0)
+ * To enable capturing of memory profiling, these MMCRA bits
+ * needs to be programmed and corresponding raw event format
+ * encoding.
+ *
+ * MMCRA bits encoding needed are
+ *     SM (Sampling Mode)
+ *     EM (Eligibility for Random Sampling)
+ *     TECE (Threshold Event Counter Event)
+ *     TS (Threshold Start Event)
+ *     TE (Threshold End Event)
+ *
+ * Corresponding Raw Encoding bits:
+ *     sample [EM,SM]
+ *     thresh_sel (TECE)
+ *     thresh start (TS)
+ *     thresh end (TE)
+ */
+
+EVENT(MEM_LOADS,				0x35340401e0);
+EVENT(MEM_STORES,				0x353c0401e0);
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
new file mode 100644
index 0000000000..9b5133e361
--- /dev/null
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -0,0 +1,636 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter support for POWER10 processors.
+ *
+ * Copyright 2020 Madhavan Srinivasan, IBM Corporation.
+ * Copyright 2020 Athira Rajeev, IBM Corporation.
+ */
+
+#define pr_fmt(fmt)	"power10-pmu: " fmt
+
+#include "isa207-common.h"
+
+/*
+ * Raw event encoding for Power10:
+ *
+ *        60        56        52        48        44        40        36        32
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *   | | [ ]   [ src_match ] [  src_mask ]   | [ ] [ l2l3_sel ]  [  thresh_ctl   ]
+ *   | |  |                                  |  |                         |
+ *   | |  *- IFM (Linux)                     |  |        thresh start/stop -*
+ *   | *- BHRB (Linux)                       |  src_sel
+ *   *- EBB (Linux)                          *invert_bit
+ *
+ *        28        24        20        16        12         8         4         0
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *   [   ] [  sample ]   [ ] [ ]   [ pmc ]   [unit ]   [ ] |  m   [    pmcxsel    ]
+ *     |        |        |    |                        |   |  |
+ *     |        |        |    |                        |   |  *- mark
+ *     |        |        |    *- L1/L2/L3 cache_sel    |   |*-radix_scope_qual
+ *     |        |        sdar_mode                     |
+ *     |        *- sampling mode for marked events     *- combine
+ *     |
+ *     *- thresh_sel
+ *
+ * Below uses IBM bit numbering.
+ *
+ * MMCR1[x:y] = unit    (PMCxUNIT)
+ * MMCR1[24]   = pmc1combine[0]
+ * MMCR1[25]   = pmc1combine[1]
+ * MMCR1[26]   = pmc2combine[0]
+ * MMCR1[27]   = pmc2combine[1]
+ * MMCR1[28]   = pmc3combine[0]
+ * MMCR1[29]   = pmc3combine[1]
+ * MMCR1[30]   = pmc4combine[0]
+ * MMCR1[31]   = pmc4combine[1]
+ *
+ * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011
+ *	MMCR1[20:27] = thresh_ctl
+ * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001
+ *	MMCR1[20:27] = thresh_ctl
+ * else
+ *	MMCRA[48:55] = thresh_ctl   (THRESH START/END)
+ *
+ * if thresh_sel:
+ *	MMCRA[45:47] = thresh_sel
+ *
+ * if l2l3_sel:
+ * MMCR2[56:60] = l2l3_sel[0:4]
+ *
+ * MMCR1[16] = cache_sel[0]
+ * MMCR1[17] = cache_sel[1]
+ * MMCR1[18] = radix_scope_qual
+ *
+ * if mark:
+ *	MMCRA[63]    = 1		(SAMPLE_ENABLE)
+ *	MMCRA[57:59] = sample[0:2]	(RAND_SAMP_ELIG)
+ *	MMCRA[61:62] = sample[3:4]	(RAND_SAMP_MODE)
+ *
+ * if EBB and BHRB:
+ *	MMCRA[32:33] = IFM
+ *
+ * MMCRA[SDAR_MODE]  = sdar_mode[0:1]
+ */
+
+/*
+ * Some power10 event codes.
+ */
+#define EVENT(_name, _code)     enum{_name = _code}
+
+#include "power10-events-list.h"
+
+#undef EVENT
+
+/* MMCRA IFM bits - POWER10 */
+#define POWER10_MMCRA_IFM1		0x0000000040000000UL
+#define POWER10_MMCRA_IFM2		0x0000000080000000UL
+#define POWER10_MMCRA_IFM3		0x00000000C0000000UL
+#define POWER10_MMCRA_BHRB_MASK		0x00000000C0000000UL
+
+extern u64 PERF_REG_EXTENDED_MASK;
+
+/* Table of alternatives, sorted by column 0 */
+static const unsigned int power10_event_alternatives[][MAX_ALT] = {
+	{ PM_INST_CMPL_ALT,		PM_INST_CMPL },
+	{ PM_CYC_ALT,			PM_CYC },
+};
+
+static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	int num_alt = 0;
+
+	num_alt = isa207_get_alternatives(event, alt,
+					  ARRAY_SIZE(power10_event_alternatives), flags,
+					  power10_event_alternatives);
+
+	return num_alt;
+}
+
+static int power10_check_attr_config(struct perf_event *ev)
+{
+	u64 val;
+	u64 event = ev->attr.config;
+
+	val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
+	if (val == 0x10 || isa3XX_check_attr_config(ev))
+		return -EINVAL;
+
+	return 0;
+}
+
+GENERIC_EVENT_ATTR(cpu-cycles,			PM_CYC);
+GENERIC_EVENT_ATTR(instructions,		PM_INST_CMPL);
+GENERIC_EVENT_ATTR(branch-instructions,		PM_BR_CMPL);
+GENERIC_EVENT_ATTR(branch-misses,		PM_BR_MPRED_CMPL);
+GENERIC_EVENT_ATTR(cache-references,		PM_LD_REF_L1);
+GENERIC_EVENT_ATTR(cache-misses,		PM_LD_MISS_L1);
+GENERIC_EVENT_ATTR(mem-loads,			MEM_LOADS);
+GENERIC_EVENT_ATTR(mem-stores,			MEM_STORES);
+GENERIC_EVENT_ATTR(branch-instructions,		PM_BR_FIN);
+GENERIC_EVENT_ATTR(branch-misses,		PM_MPRED_BR_FIN);
+GENERIC_EVENT_ATTR(cache-misses,		PM_LD_DEMAND_MISS_L1_FIN);
+
+CACHE_EVENT_ATTR(L1-dcache-load-misses,		PM_LD_MISS_L1);
+CACHE_EVENT_ATTR(L1-dcache-loads,		PM_LD_REF_L1);
+CACHE_EVENT_ATTR(L1-dcache-prefetches,		PM_LD_PREFETCH_CACHE_LINE_MISS);
+CACHE_EVENT_ATTR(L1-dcache-store-misses,	PM_ST_MISS_L1);
+CACHE_EVENT_ATTR(L1-icache-load-misses,		PM_L1_ICACHE_MISS);
+CACHE_EVENT_ATTR(L1-icache-loads,		PM_INST_FROM_L1);
+CACHE_EVENT_ATTR(L1-icache-prefetches,		PM_IC_PREF_REQ);
+CACHE_EVENT_ATTR(LLC-load-misses,		PM_DATA_FROM_L3MISS);
+CACHE_EVENT_ATTR(LLC-loads,			PM_DATA_FROM_L3);
+CACHE_EVENT_ATTR(LLC-prefetches,		PM_L3_PF_MISS_L3);
+CACHE_EVENT_ATTR(LLC-store-misses,		PM_L2_ST_MISS);
+CACHE_EVENT_ATTR(LLC-stores,			PM_L2_ST);
+CACHE_EVENT_ATTR(branch-load-misses,		PM_BR_MPRED_CMPL);
+CACHE_EVENT_ATTR(branch-loads,			PM_BR_CMPL);
+CACHE_EVENT_ATTR(dTLB-load-misses,		PM_DTLB_MISS);
+CACHE_EVENT_ATTR(iTLB-load-misses,		PM_ITLB_MISS);
+
+static struct attribute *power10_events_attr_dd1[] = {
+	GENERIC_EVENT_PTR(PM_CYC),
+	GENERIC_EVENT_PTR(PM_INST_CMPL),
+	GENERIC_EVENT_PTR(PM_BR_CMPL),
+	GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
+	GENERIC_EVENT_PTR(PM_LD_REF_L1),
+	GENERIC_EVENT_PTR(PM_LD_MISS_L1),
+	GENERIC_EVENT_PTR(MEM_LOADS),
+	GENERIC_EVENT_PTR(MEM_STORES),
+	CACHE_EVENT_PTR(PM_LD_MISS_L1),
+	CACHE_EVENT_PTR(PM_LD_REF_L1),
+	CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS),
+	CACHE_EVENT_PTR(PM_ST_MISS_L1),
+	CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+	CACHE_EVENT_PTR(PM_INST_FROM_L1),
+	CACHE_EVENT_PTR(PM_IC_PREF_REQ),
+	CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+	CACHE_EVENT_PTR(PM_DATA_FROM_L3),
+	CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+	CACHE_EVENT_PTR(PM_BR_CMPL),
+	CACHE_EVENT_PTR(PM_DTLB_MISS),
+	CACHE_EVENT_PTR(PM_ITLB_MISS),
+	NULL
+};
+
+static struct attribute *power10_events_attr[] = {
+	GENERIC_EVENT_PTR(PM_CYC),
+	GENERIC_EVENT_PTR(PM_INST_CMPL),
+	GENERIC_EVENT_PTR(PM_BR_FIN),
+	GENERIC_EVENT_PTR(PM_MPRED_BR_FIN),
+	GENERIC_EVENT_PTR(PM_LD_REF_L1),
+	GENERIC_EVENT_PTR(PM_LD_DEMAND_MISS_L1_FIN),
+	GENERIC_EVENT_PTR(MEM_LOADS),
+	GENERIC_EVENT_PTR(MEM_STORES),
+	CACHE_EVENT_PTR(PM_LD_MISS_L1),
+	CACHE_EVENT_PTR(PM_LD_REF_L1),
+	CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS),
+	CACHE_EVENT_PTR(PM_ST_MISS_L1),
+	CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+	CACHE_EVENT_PTR(PM_INST_FROM_L1),
+	CACHE_EVENT_PTR(PM_IC_PREF_REQ),
+	CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+	CACHE_EVENT_PTR(PM_DATA_FROM_L3),
+	CACHE_EVENT_PTR(PM_L3_PF_MISS_L3),
+	CACHE_EVENT_PTR(PM_L2_ST_MISS),
+	CACHE_EVENT_PTR(PM_L2_ST),
+	CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+	CACHE_EVENT_PTR(PM_BR_CMPL),
+	CACHE_EVENT_PTR(PM_DTLB_MISS),
+	CACHE_EVENT_PTR(PM_ITLB_MISS),
+	NULL
+};
+
+static const struct attribute_group power10_pmu_events_group_dd1 = {
+	.name = "events",
+	.attrs = power10_events_attr_dd1,
+};
+
+static const struct attribute_group power10_pmu_events_group = {
+	.name = "events",
+	.attrs = power10_events_attr,
+};
+
+PMU_FORMAT_ATTR(event,          "config:0-59");
+PMU_FORMAT_ATTR(pmcxsel,        "config:0-7");
+PMU_FORMAT_ATTR(mark,           "config:8");
+PMU_FORMAT_ATTR(combine,        "config:10-11");
+PMU_FORMAT_ATTR(unit,           "config:12-15");
+PMU_FORMAT_ATTR(pmc,            "config:16-19");
+PMU_FORMAT_ATTR(cache_sel,      "config:20-21");
+PMU_FORMAT_ATTR(sdar_mode,      "config:22-23");
+PMU_FORMAT_ATTR(sample_mode,    "config:24-28");
+PMU_FORMAT_ATTR(thresh_sel,     "config:29-31");
+PMU_FORMAT_ATTR(thresh_stop,    "config:32-35");
+PMU_FORMAT_ATTR(thresh_start,   "config:36-39");
+PMU_FORMAT_ATTR(l2l3_sel,       "config:40-44");
+PMU_FORMAT_ATTR(src_sel,        "config:45-46");
+PMU_FORMAT_ATTR(invert_bit,     "config:47");
+PMU_FORMAT_ATTR(src_mask,       "config:48-53");
+PMU_FORMAT_ATTR(src_match,      "config:54-59");
+PMU_FORMAT_ATTR(radix_scope,	"config:9");
+PMU_FORMAT_ATTR(thresh_cmp,     "config1:0-17");
+
+static struct attribute *power10_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_pmcxsel.attr,
+	&format_attr_mark.attr,
+	&format_attr_combine.attr,
+	&format_attr_unit.attr,
+	&format_attr_pmc.attr,
+	&format_attr_cache_sel.attr,
+	&format_attr_sdar_mode.attr,
+	&format_attr_sample_mode.attr,
+	&format_attr_thresh_sel.attr,
+	&format_attr_thresh_stop.attr,
+	&format_attr_thresh_start.attr,
+	&format_attr_l2l3_sel.attr,
+	&format_attr_src_sel.attr,
+	&format_attr_invert_bit.attr,
+	&format_attr_src_mask.attr,
+	&format_attr_src_match.attr,
+	&format_attr_radix_scope.attr,
+	&format_attr_thresh_cmp.attr,
+	NULL,
+};
+
+static const struct attribute_group power10_pmu_format_group = {
+	.name = "format",
+	.attrs = power10_pmu_format_attr,
+};
+
+static struct attribute *power10_pmu_caps_attrs[] = {
+	NULL
+};
+
+static struct attribute_group power10_pmu_caps_group = {
+	.name  = "caps",
+	.attrs = power10_pmu_caps_attrs,
+};
+
+static const struct attribute_group *power10_pmu_attr_groups_dd1[] = {
+	&power10_pmu_format_group,
+	&power10_pmu_events_group_dd1,
+	&power10_pmu_caps_group,
+	NULL,
+};
+
+static const struct attribute_group *power10_pmu_attr_groups[] = {
+	&power10_pmu_format_group,
+	&power10_pmu_events_group,
+	&power10_pmu_caps_group,
+	NULL,
+};
+
+static int power10_generic_events_dd1[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
+	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_CMPL,
+	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_CMPL,
+	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
+	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_MISS_L1,
+};
+
+static int power10_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
+	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_FIN,
+	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_MPRED_BR_FIN,
+	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
+	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_DEMAND_MISS_L1_FIN,
+};
+
+static u64 power10_bhrb_filter_map(u64 branch_sample_type)
+{
+	u64 pmu_bhrb_filter = 0;
+
+	/* BHRB and regular PMU events share the same privilege state
+	 * filter configuration. BHRB is always recorded along with a
+	 * regular PMU event. As the privilege state filter is handled
+	 * in the basic PMC configuration of the accompanying regular
+	 * PMU event, we ignore any separate BHRB specific request.
+	 */
+
+	/* No branch filter requested */
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
+		return pmu_bhrb_filter;
+
+	/* Invalid branch filter options - HW does not support */
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+		return -1;
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL) {
+		pmu_bhrb_filter |= POWER10_MMCRA_IFM2;
+		return pmu_bhrb_filter;
+	}
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_COND) {
+		pmu_bhrb_filter |= POWER10_MMCRA_IFM3;
+		return pmu_bhrb_filter;
+	}
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL)
+		return -1;
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
+		pmu_bhrb_filter |= POWER10_MMCRA_IFM1;
+		return pmu_bhrb_filter;
+	}
+
+	/* Every thing else is unsupported */
+	return -1;
+}
+
+static void power10_config_bhrb(u64 pmu_bhrb_filter)
+{
+	pmu_bhrb_filter &= POWER10_MMCRA_BHRB_MASK;
+
+	/* Enable BHRB filter in PMU */
+	mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
+}
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static u64 power10_cache_events_dd1[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = PM_LD_REF_L1,
+			[C(RESULT_MISS)] = PM_LD_MISS_L1,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = 0,
+			[C(RESULT_MISS)] = PM_ST_MISS_L1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS,
+			[C(RESULT_MISS)] = 0,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = PM_INST_FROM_L1,
+			[C(RESULT_MISS)] = PM_L1_ICACHE_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = PM_IC_PREF_REQ,
+			[C(RESULT_MISS)] = 0,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = PM_DATA_FROM_L3,
+			[C(RESULT_MISS)] = PM_DATA_FROM_L3MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = 0,
+		},
+	},
+	 [C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = 0,
+			[C(RESULT_MISS)] = PM_DTLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = 0,
+			[C(RESULT_MISS)] = PM_ITLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = PM_BR_CMPL,
+			[C(RESULT_MISS)] = PM_BR_MPRED_CMPL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+	},
+	[C(NODE)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+	},
+};
+
+static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = PM_LD_REF_L1,
+			[C(RESULT_MISS)] = PM_LD_MISS_L1,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = 0,
+			[C(RESULT_MISS)] = PM_ST_MISS_L1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS,
+			[C(RESULT_MISS)] = 0,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = PM_INST_FROM_L1,
+			[C(RESULT_MISS)] = PM_L1_ICACHE_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = PM_IC_PREF_REQ,
+			[C(RESULT_MISS)] = 0,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = PM_DATA_FROM_L3,
+			[C(RESULT_MISS)] = PM_DATA_FROM_L3MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = PM_L2_ST,
+			[C(RESULT_MISS)] = PM_L2_ST_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = PM_L3_PF_MISS_L3,
+			[C(RESULT_MISS)] = 0,
+		},
+	},
+	 [C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = 0,
+			[C(RESULT_MISS)] = PM_DTLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = 0,
+			[C(RESULT_MISS)] = PM_ITLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = PM_BR_CMPL,
+			[C(RESULT_MISS)] = PM_BR_MPRED_CMPL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+	},
+	[C(NODE)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+	},
+};
+
+#undef C
+
+/*
+ * Set the MMCR0[CC56RUN] bit to enable counting for
+ * PMC5 and PMC6 regardless of the state of CTRL[RUN],
+ * so that we can use counters 5 and 6 as PM_INST_CMPL and
+ * PM_CYC.
+ */
+static int power10_compute_mmcr(u64 event[], int n_ev,
+				unsigned int hwc[], struct mmcr_regs *mmcr,
+				struct perf_event *pevents[], u32 flags)
+{
+	int ret;
+
+	ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags);
+	if (!ret)
+		mmcr->mmcr0 |= MMCR0_C56RUN;
+	return ret;
+}
+
+static struct power_pmu power10_pmu = {
+	.name			= "POWER10",
+	.n_counter		= MAX_PMU_COUNTERS,
+	.add_fields		= ISA207_ADD_FIELDS,
+	.test_adder		= ISA207_TEST_ADDER,
+	.group_constraint_mask	= CNST_CACHE_PMC4_MASK,
+	.group_constraint_val	= CNST_CACHE_PMC4_VAL,
+	.compute_mmcr		= power10_compute_mmcr,
+	.config_bhrb		= power10_config_bhrb,
+	.bhrb_filter_map	= power10_bhrb_filter_map,
+	.get_constraint		= isa207_get_constraint,
+	.get_alternatives	= power10_get_alternatives,
+	.get_mem_data_src	= isa207_get_mem_data_src,
+	.get_mem_weight		= isa207_get_mem_weight,
+	.disable_pmc		= isa207_disable_pmc,
+	.flags			= PPMU_HAS_SIER | PPMU_ARCH_207S |
+				  PPMU_ARCH_31 | PPMU_HAS_ATTR_CONFIG1,
+	.n_generic		= ARRAY_SIZE(power10_generic_events),
+	.generic_events		= power10_generic_events,
+	.cache_events		= &power10_cache_events,
+	.attr_groups		= power10_pmu_attr_groups,
+	.bhrb_nr		= 32,
+	.capabilities           = PERF_PMU_CAP_EXTENDED_REGS,
+	.check_attr_config	= power10_check_attr_config,
+};
+
+int __init init_power10_pmu(void)
+{
+	unsigned int pvr;
+	int rc;
+
+	pvr = mfspr(SPRN_PVR);
+	if (PVR_VER(pvr) != PVR_POWER10)
+		return -ENODEV;
+
+	/* Add the ppmu flag for power10 DD1 */
+	if ((PVR_CFG(pvr) == 1))
+		power10_pmu.flags |= PPMU_P10_DD1;
+
+	/* Set the PERF_REG_EXTENDED_MASK here */
+	PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31;
+
+	if ((PVR_CFG(pvr) == 1)) {
+		power10_pmu.generic_events = power10_generic_events_dd1;
+		power10_pmu.attr_groups = power10_pmu_attr_groups_dd1;
+		power10_pmu.cache_events = &power10_cache_events_dd1;
+	}
+
+	rc = register_power_pmu(&power10_pmu);
+	if (rc)
+		return rc;
+
+	/* Tell userspace that EBB is supported */
+	cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
+
+	return 0;
+}
diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c
new file mode 100644
index 0000000000..b4708ab731
--- /dev/null
+++ b/arch/powerpc/perf/power5+-pmu.c
@@ -0,0 +1,688 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter support for POWER5+/++ (not POWER5) processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/string.h>
+#include <asm/reg.h>
+#include <asm/cputable.h>
+
+#include "internal.h"
+
+/*
+ * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
+ */
+#define PM_PMC_SH	20	/* PMC number (1-based) for direct events */
+#define PM_PMC_MSK	0xf
+#define PM_PMC_MSKS	(PM_PMC_MSK << PM_PMC_SH)
+#define PM_UNIT_SH	16	/* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK	0xf
+#define PM_BYTE_SH	12	/* Byte number of event bus to use */
+#define PM_BYTE_MSK	7
+#define PM_GRS_SH	8	/* Storage subsystem mux select */
+#define PM_GRS_MSK	7
+#define PM_BUSEVENT_MSK	0x80	/* Set if event uses event bus */
+#define PM_PMCSEL_MSK	0x7f
+
+/* Values in PM_UNIT field */
+#define PM_FPU		0
+#define PM_ISU0		1
+#define PM_IFU		2
+#define PM_ISU1		3
+#define PM_IDU		4
+#define PM_ISU0_ALT	6
+#define PM_GRS		7
+#define PM_LSU0		8
+#define PM_LSU1		0xc
+#define PM_LASTUNIT	0xc
+
+/*
+ * Bits in MMCR1 for POWER5+
+ */
+#define MMCR1_TTM0SEL_SH	62
+#define MMCR1_TTM1SEL_SH	60
+#define MMCR1_TTM2SEL_SH	58
+#define MMCR1_TTM3SEL_SH	56
+#define MMCR1_TTMSEL_MSK	3
+#define MMCR1_TD_CP_DBG0SEL_SH	54
+#define MMCR1_TD_CP_DBG1SEL_SH	52
+#define MMCR1_TD_CP_DBG2SEL_SH	50
+#define MMCR1_TD_CP_DBG3SEL_SH	48
+#define MMCR1_GRS_L2SEL_SH	46
+#define MMCR1_GRS_L2SEL_MSK	3
+#define MMCR1_GRS_L3SEL_SH	44
+#define MMCR1_GRS_L3SEL_MSK	3
+#define MMCR1_GRS_MCSEL_SH	41
+#define MMCR1_GRS_MCSEL_MSK	7
+#define MMCR1_GRS_FABSEL_SH	39
+#define MMCR1_GRS_FABSEL_MSK	3
+#define MMCR1_PMC1_ADDER_SEL_SH	35
+#define MMCR1_PMC2_ADDER_SEL_SH	34
+#define MMCR1_PMC3_ADDER_SEL_SH	33
+#define MMCR1_PMC4_ADDER_SEL_SH	32
+#define MMCR1_PMC1SEL_SH	25
+#define MMCR1_PMC2SEL_SH	17
+#define MMCR1_PMC3SEL_SH	9
+#define MMCR1_PMC4SEL_SH	1
+#define MMCR1_PMCSEL_SH(n)	(MMCR1_PMC1SEL_SH - (n) * 8)
+#define MMCR1_PMCSEL_MSK	0x7f
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ *             [  ><><>< ><> <><>[  >  <  ><  ><  ><  ><><><><><><>
+ *             NC  G0G1G2 G3 T0T1 UC    B0  B1  B2  B3 P6P5P4P3P2P1
+ *
+ * NC - number of counters
+ *     51: NC error 0x0008_0000_0000_0000
+ *     48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
+ *
+ * G0..G3 - GRS mux constraints
+ *     46-47: GRS_L2SEL value
+ *     44-45: GRS_L3SEL value
+ *     41-44: GRS_MCSEL value
+ *     39-40: GRS_FABSEL value
+ *	Note that these match up with their bit positions in MMCR1
+ *
+ * T0 - TTM0 constraint
+ *     36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000
+ *
+ * T1 - TTM1 constraint
+ *     34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000
+ *
+ * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
+ *     33: UC3 error 0x02_0000_0000
+ *     32: FPU|IFU|ISU1 events needed 0x01_0000_0000
+ *     31: ISU0 events needed 0x01_8000_0000
+ *     30: IDU|GRS events needed 0x00_4000_0000
+ *
+ * B0
+ *     24-27: Byte 0 event source 0x0f00_0000
+ *	      Encoding as for the event code
+ *
+ * B1, B2, B3
+ *     20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
+ *
+ * P6
+ *     11: P6 error 0x800
+ *     10-11: Count of events needing PMC6
+ *
+ * P1..P5
+ *     0-9: Count of events needing PMC1..PMC5
+ */
+
+static const int grsel_shift[8] = {
+	MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
+	MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
+	MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
+};
+
+/* Masks and values for using events from the various units */
+static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
+	[PM_FPU] =   { 0x3200000000ul, 0x0100000000ul },
+	[PM_ISU0] =  { 0x0200000000ul, 0x0080000000ul },
+	[PM_ISU1] =  { 0x3200000000ul, 0x3100000000ul },
+	[PM_IFU] =   { 0x3200000000ul, 0x2100000000ul },
+	[PM_IDU] =   { 0x0e00000000ul, 0x0040000000ul },
+	[PM_GRS] =   { 0x0e00000000ul, 0x0c40000000ul },
+};
+
+static int power5p_get_constraint(u64 event, unsigned long *maskp,
+				  unsigned long *valp, u64 event_config1 __maybe_unused)
+{
+	int pmc, byte, unit, sh;
+	int bit, fmask;
+	unsigned long mask = 0, value = 0;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc) {
+		if (pmc > 6)
+			return -1;
+		sh = (pmc - 1) * 2;
+		mask |= 2 << sh;
+		value |= 1 << sh;
+		if (pmc >= 5 && !(event == 0x500009 || event == 0x600005))
+			return -1;
+	}
+	if (event & PM_BUSEVENT_MSK) {
+		unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+		if (unit > PM_LASTUNIT)
+			return -1;
+		if (unit == PM_ISU0_ALT)
+			unit = PM_ISU0;
+		mask |= unit_cons[unit][0];
+		value |= unit_cons[unit][1];
+		byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+		if (byte >= 4) {
+			if (unit != PM_LSU1)
+				return -1;
+			/* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
+			++unit;
+			byte &= 3;
+		}
+		if (unit == PM_GRS) {
+			bit = event & 7;
+			fmask = (bit == 6)? 7: 3;
+			sh = grsel_shift[bit];
+			mask |= (unsigned long)fmask << sh;
+			value |= (unsigned long)((event >> PM_GRS_SH) & fmask)
+				<< sh;
+		}
+		/* Set byte lane select field */
+		mask  |= 0xfUL << (24 - 4 * byte);
+		value |= (unsigned long)unit << (24 - 4 * byte);
+	}
+	if (pmc < 5) {
+		/* need a counter from PMC1-4 set */
+		mask  |= 0x8000000000000ul;
+		value |= 0x1000000000000ul;
+	}
+	*maskp = mask;
+	*valp = value;
+	return 0;
+}
+
+static int power5p_limited_pmc_event(u64 event)
+{
+	int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+
+	return pmc == 5 || pmc == 6;
+}
+
+#define MAX_ALT	3	/* at most 3 alternatives for any event */
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+	{ 0x100c0,  0x40001f },			/* PM_GCT_FULL_CYC */
+	{ 0x120e4,  0x400002 },			/* PM_GRP_DISP_REJECT */
+	{ 0x230e2,  0x323087 },			/* PM_BR_PRED_CR */
+	{ 0x230e3,  0x223087, 0x3230a0 },	/* PM_BR_PRED_TA */
+	{ 0x410c7,  0x441084 },			/* PM_THRD_L2MISS_BOTH_CYC */
+	{ 0x800c4,  0xc20e0 },			/* PM_DTLB_MISS */
+	{ 0xc50c6,  0xc60e0 },			/* PM_MRK_DTLB_MISS */
+	{ 0x100005, 0x600005 },			/* PM_RUN_CYC */
+	{ 0x100009, 0x200009 },			/* PM_INST_CMPL */
+	{ 0x200015, 0x300015 },			/* PM_LSU_LMQ_SRQ_EMPTY_CYC */
+	{ 0x300009, 0x400009 },			/* PM_INST_DISP */
+};
+
+/*
+ * Scan the alternatives table for a match and return the
+ * index into the alternatives table if found, else -1.
+ */
+static int find_alternative(unsigned int event)
+{
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+		if (event < event_alternatives[i][0])
+			break;
+		for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
+			if (event == event_alternatives[i][j])
+				return i;
+	}
+	return -1;
+}
+
+static const unsigned char bytedecode_alternatives[4][4] = {
+	/* PMC 1 */	{ 0x21, 0x23, 0x25, 0x27 },
+	/* PMC 2 */	{ 0x07, 0x17, 0x0e, 0x1e },
+	/* PMC 3 */	{ 0x20, 0x22, 0x24, 0x26 },
+	/* PMC 4 */	{ 0x07, 0x17, 0x0e, 0x1e }
+};
+
+/*
+ * Some direct events for decodes of event bus byte 3 have alternative
+ * PMCSEL values on other counters.  This returns the alternative
+ * event code for those that do, or -1 otherwise.  This also handles
+ * alternative PCMSEL values for add events.
+ */
+static s64 find_alternative_bdecode(u64 event)
+{
+	int pmc, altpmc, pp, j;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc == 0 || pmc > 4)
+		return -1;
+	altpmc = 5 - pmc;	/* 1 <-> 4, 2 <-> 3 */
+	pp = event & PM_PMCSEL_MSK;
+	for (j = 0; j < 4; ++j) {
+		if (bytedecode_alternatives[pmc - 1][j] == pp) {
+			return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
+				(altpmc << PM_PMC_SH) |
+				bytedecode_alternatives[altpmc - 1][j];
+		}
+	}
+
+	/* new decode alternatives for power5+ */
+	if (pmc == 1 && (pp == 0x0d || pp == 0x0e))
+		return event + (2 << PM_PMC_SH) + (0x2e - 0x0d);
+	if (pmc == 3 && (pp == 0x2e || pp == 0x2f))
+		return event - (2 << PM_PMC_SH) - (0x2e - 0x0d);
+
+	/* alternative add event encodings */
+	if (pp == 0x10 || pp == 0x28)
+		return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) |
+			(altpmc << PM_PMC_SH);
+
+	return -1;
+}
+
+static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	int i, j, nalt = 1;
+	int nlim;
+	s64 ae;
+
+	alt[0] = event;
+	nalt = 1;
+	nlim = power5p_limited_pmc_event(event);
+	i = find_alternative(event);
+	if (i >= 0) {
+		for (j = 0; j < MAX_ALT; ++j) {
+			ae = event_alternatives[i][j];
+			if (ae && ae != event)
+				alt[nalt++] = ae;
+			nlim += power5p_limited_pmc_event(ae);
+		}
+	} else {
+		ae = find_alternative_bdecode(event);
+		if (ae > 0)
+			alt[nalt++] = ae;
+	}
+
+	if (flags & PPMU_ONLY_COUNT_RUN) {
+		/*
+		 * We're only counting in RUN state,
+		 * so PM_CYC is equivalent to PM_RUN_CYC
+		 * and PM_INST_CMPL === PM_RUN_INST_CMPL.
+		 * This doesn't include alternatives that don't provide
+		 * any extra flexibility in assigning PMCs (e.g.
+		 * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC).
+		 * Note that even with these additional alternatives
+		 * we never end up with more than 3 alternatives for any event.
+		 */
+		j = nalt;
+		for (i = 0; i < nalt; ++i) {
+			switch (alt[i]) {
+			case 0xf:	/* PM_CYC */
+				alt[j++] = 0x600005;	/* PM_RUN_CYC */
+				++nlim;
+				break;
+			case 0x600005:	/* PM_RUN_CYC */
+				alt[j++] = 0xf;
+				break;
+			case 0x100009:	/* PM_INST_CMPL */
+				alt[j++] = 0x500009;	/* PM_RUN_INST_CMPL */
+				++nlim;
+				break;
+			case 0x500009:	/* PM_RUN_INST_CMPL */
+				alt[j++] = 0x100009;	/* PM_INST_CMPL */
+				alt[j++] = 0x200009;
+				break;
+			}
+		}
+		nalt = j;
+	}
+
+	if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
+		/* remove the limited PMC events */
+		j = 0;
+		for (i = 0; i < nalt; ++i) {
+			if (!power5p_limited_pmc_event(alt[i])) {
+				alt[j] = alt[i];
+				++j;
+			}
+		}
+		nalt = j;
+	} else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
+		/* remove all but the limited PMC events */
+		j = 0;
+		for (i = 0; i < nalt; ++i) {
+			if (power5p_limited_pmc_event(alt[i])) {
+				alt[j] = alt[i];
+				++j;
+			}
+		}
+		nalt = j;
+	}
+
+	return nalt;
+}
+
+/*
+ * Map of which direct events on which PMCs are marked instruction events.
+ * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
+ * Bit 0 is set if it is marked for all PMCs.
+ * The 0x80 bit indicates a byte decode PMCSEL value.
+ */
+static unsigned char direct_event_is_marked[0x28] = {
+	0,	/* 00 */
+	0x1f,	/* 01 PM_IOPS_CMPL */
+	0x2,	/* 02 PM_MRK_GRP_DISP */
+	0xe,	/* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
+	0,	/* 04 */
+	0x1c,	/* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
+	0x80,	/* 06 */
+	0x80,	/* 07 */
+	0, 0, 0,/* 08 - 0a */
+	0x18,	/* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
+	0,	/* 0c */
+	0x80,	/* 0d */
+	0x80,	/* 0e */
+	0,	/* 0f */
+	0,	/* 10 */
+	0x14,	/* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
+	0,	/* 12 */
+	0x10,	/* 13 PM_MRK_GRP_CMPL */
+	0x1f,	/* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
+	0x2,	/* 15 PM_MRK_GRP_ISSUED */
+	0x80,	/* 16 */
+	0x80,	/* 17 */
+	0, 0, 0, 0, 0,
+	0x80,	/* 1d */
+	0x80,	/* 1e */
+	0,	/* 1f */
+	0x80,	/* 20 */
+	0x80,	/* 21 */
+	0x80,	/* 22 */
+	0x80,	/* 23 */
+	0x80,	/* 24 */
+	0x80,	/* 25 */
+	0x80,	/* 26 */
+	0x80,	/* 27 */
+};
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int power5p_marked_instr_event(u64 event)
+{
+	int pmc, psel;
+	int bit, byte, unit;
+	u32 mask;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	psel = event & PM_PMCSEL_MSK;
+	if (pmc >= 5)
+		return 0;
+
+	bit = -1;
+	if (psel < sizeof(direct_event_is_marked)) {
+		if (direct_event_is_marked[psel] & (1 << pmc))
+			return 1;
+		if (direct_event_is_marked[psel] & 0x80)
+			bit = 4;
+		else if (psel == 0x08)
+			bit = pmc - 1;
+		else if (psel == 0x10)
+			bit = 4 - pmc;
+		else if (psel == 0x1b && (pmc == 1 || pmc == 3))
+			bit = 4;
+	} else if ((psel & 0x48) == 0x40) {
+		bit = psel & 7;
+	} else if (psel == 0x28) {
+		bit = pmc - 1;
+	} else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) {
+		bit = 4;
+	}
+
+	if (!(event & PM_BUSEVENT_MSK) || bit == -1)
+		return 0;
+
+	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	if (unit == PM_LSU0) {
+		/* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
+		mask = 0x5dff00;
+	} else if (unit == PM_LSU1 && byte >= 4) {
+		byte -= 4;
+		/* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */
+		mask = 0x5f11c000;
+	} else
+		return 0;
+
+	return (mask >> (byte * 8 + bit)) & 1;
+}
+
+static int power5p_compute_mmcr(u64 event[], int n_ev,
+				unsigned int hwc[], struct mmcr_regs *mmcr,
+				struct perf_event *pevents[],
+				u32 flags __maybe_unused)
+{
+	unsigned long mmcr1 = 0;
+	unsigned long mmcra = 0;
+	unsigned int pmc, unit, byte, psel;
+	unsigned int ttm;
+	int i, isbus, bit, grsel;
+	unsigned int pmc_inuse = 0;
+	unsigned char busbyte[4];
+	unsigned char unituse[16];
+	int ttmuse;
+
+	if (n_ev > 6)
+		return -1;
+
+	/* First pass to count resource use */
+	memset(busbyte, 0, sizeof(busbyte));
+	memset(unituse, 0, sizeof(unituse));
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			if (pmc > 6)
+				return -1;
+			if (pmc_inuse & (1 << (pmc - 1)))
+				return -1;
+			pmc_inuse |= 1 << (pmc - 1);
+		}
+		if (event[i] & PM_BUSEVENT_MSK) {
+			unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+			byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+			if (unit > PM_LASTUNIT)
+				return -1;
+			if (unit == PM_ISU0_ALT)
+				unit = PM_ISU0;
+			if (byte >= 4) {
+				if (unit != PM_LSU1)
+					return -1;
+				++unit;
+				byte &= 3;
+			}
+			if (busbyte[byte] && busbyte[byte] != unit)
+				return -1;
+			busbyte[byte] = unit;
+			unituse[unit] = 1;
+		}
+	}
+
+	/*
+	 * Assign resources and set multiplexer selects.
+	 *
+	 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
+	 * choice we have to deal with.
+	 */
+	if (unituse[PM_ISU0] &
+	    (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
+		unituse[PM_ISU0_ALT] = 1;	/* move ISU to TTM1 */
+		unituse[PM_ISU0] = 0;
+	}
+	/* Set TTM[01]SEL fields. */
+	ttmuse = 0;
+	for (i = PM_FPU; i <= PM_ISU1; ++i) {
+		if (!unituse[i])
+			continue;
+		if (ttmuse++)
+			return -1;
+		mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH;
+	}
+	ttmuse = 0;
+	for (; i <= PM_GRS; ++i) {
+		if (!unituse[i])
+			continue;
+		if (ttmuse++)
+			return -1;
+		mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH;
+	}
+	if (ttmuse > 1)
+		return -1;
+
+	/* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
+	for (byte = 0; byte < 4; ++byte) {
+		unit = busbyte[byte];
+		if (!unit)
+			continue;
+		if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
+			/* get ISU0 through TTM1 rather than TTM0 */
+			unit = PM_ISU0_ALT;
+		} else if (unit == PM_LSU1 + 1) {
+			/* select lower word of LSU1 for this byte */
+			mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte);
+		}
+		ttm = unit >> 2;
+		mmcr1 |= (unsigned long)ttm
+			<< (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
+	}
+
+	/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+		psel = event[i] & PM_PMCSEL_MSK;
+		isbus = event[i] & PM_BUSEVENT_MSK;
+		if (!pmc) {
+			/* Bus event or any-PMC direct event */
+			for (pmc = 0; pmc < 4; ++pmc) {
+				if (!(pmc_inuse & (1 << pmc)))
+					break;
+			}
+			if (pmc >= 4)
+				return -1;
+			pmc_inuse |= 1 << pmc;
+		} else if (pmc <= 4) {
+			/* Direct event */
+			--pmc;
+			if (isbus && (byte & 2) &&
+			    (psel == 8 || psel == 0x10 || psel == 0x28))
+				/* add events on higher-numbered bus */
+				mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
+		} else {
+			/* Instructions or run cycles on PMC5/6 */
+			--pmc;
+		}
+		if (isbus && unit == PM_GRS) {
+			bit = psel & 7;
+			grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
+			mmcr1 |= (unsigned long)grsel << grsel_shift[bit];
+		}
+		if (power5p_marked_instr_event(event[i]))
+			mmcra |= MMCRA_SAMPLE_ENABLE;
+		if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1))
+			/* select alternate byte lane */
+			psel |= 0x10;
+		if (pmc <= 3)
+			mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
+		hwc[i] = pmc;
+	}
+
+	/* Return MMCRx values */
+	mmcr->mmcr0 = 0;
+	if (pmc_inuse & 1)
+		mmcr->mmcr0 = MMCR0_PMC1CE;
+	if (pmc_inuse & 0x3e)
+		mmcr->mmcr0 |= MMCR0_PMCjCE;
+	mmcr->mmcr1 = mmcr1;
+	mmcr->mmcra = mmcra;
+	return 0;
+}
+
+static void power5p_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
+{
+	if (pmc <= 3)
+		mmcr->mmcr1 &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
+}
+
+static int power5p_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= 0xf,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x100009,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x1c10a8, /* LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x3c1088, /* LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x230e4,  /* BR_ISSUED */
+	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x230e5,  /* BR_MPRED_CR */
+};
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static u64 power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x1c10a8,	0x3c1088	},
+		[C(OP_WRITE)] = {	0x2c10a8,	0xc10c3		},
+		[C(OP_PREFETCH)] = {	0xc70e7,	-1		},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	0,		0		},
+	},
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0		},
+		[C(OP_WRITE)] = {	0,		0		},
+		[C(OP_PREFETCH)] = {	0xc50c3,	0		},
+	},
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0xc20e4,	0x800c4		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x800c0		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x230e4,	0x230e5		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	-1,		-1		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+};
+
+static struct power_pmu power5p_pmu = {
+	.name			= "POWER5+/++",
+	.n_counter		= 6,
+	.max_alternatives	= MAX_ALT,
+	.add_fields		= 0x7000000000055ul,
+	.test_adder		= 0x3000040000000ul,
+	.compute_mmcr		= power5p_compute_mmcr,
+	.get_constraint		= power5p_get_constraint,
+	.get_alternatives	= power5p_get_alternatives,
+	.disable_pmc		= power5p_disable_pmc,
+	.limited_pmc_event	= power5p_limited_pmc_event,
+	.flags			= PPMU_LIMITED_PMC5_6 | PPMU_HAS_SSLOT,
+	.n_generic		= ARRAY_SIZE(power5p_generic_events),
+	.generic_events		= power5p_generic_events,
+	.cache_events		= &power5p_cache_events,
+};
+
+int __init init_power5p_pmu(void)
+{
+	unsigned int pvr = mfspr(SPRN_PVR);
+
+	if (PVR_VER(pvr) != PVR_POWER5p)
+		return -ENODEV;
+
+	return register_power_pmu(&power5p_pmu);
+}
diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c
new file mode 100644
index 0000000000..c6aefd0a1c
--- /dev/null
+++ b/arch/powerpc/perf/power5-pmu.c
@@ -0,0 +1,629 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter support for POWER5 (not POWER5++) processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/string.h>
+#include <asm/reg.h>
+#include <asm/cputable.h>
+
+#include "internal.h"
+
+/*
+ * Bits in event code for POWER5 (not POWER5++)
+ */
+#define PM_PMC_SH	20	/* PMC number (1-based) for direct events */
+#define PM_PMC_MSK	0xf
+#define PM_PMC_MSKS	(PM_PMC_MSK << PM_PMC_SH)
+#define PM_UNIT_SH	16	/* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK	0xf
+#define PM_BYTE_SH	12	/* Byte number of event bus to use */
+#define PM_BYTE_MSK	7
+#define PM_GRS_SH	8	/* Storage subsystem mux select */
+#define PM_GRS_MSK	7
+#define PM_BUSEVENT_MSK	0x80	/* Set if event uses event bus */
+#define PM_PMCSEL_MSK	0x7f
+
+/* Values in PM_UNIT field */
+#define PM_FPU		0
+#define PM_ISU0		1
+#define PM_IFU		2
+#define PM_ISU1		3
+#define PM_IDU		4
+#define PM_ISU0_ALT	6
+#define PM_GRS		7
+#define PM_LSU0		8
+#define PM_LSU1		0xc
+#define PM_LASTUNIT	0xc
+
+/*
+ * Bits in MMCR1 for POWER5
+ */
+#define MMCR1_TTM0SEL_SH	62
+#define MMCR1_TTM1SEL_SH	60
+#define MMCR1_TTM2SEL_SH	58
+#define MMCR1_TTM3SEL_SH	56
+#define MMCR1_TTMSEL_MSK	3
+#define MMCR1_TD_CP_DBG0SEL_SH	54
+#define MMCR1_TD_CP_DBG1SEL_SH	52
+#define MMCR1_TD_CP_DBG2SEL_SH	50
+#define MMCR1_TD_CP_DBG3SEL_SH	48
+#define MMCR1_GRS_L2SEL_SH	46
+#define MMCR1_GRS_L2SEL_MSK	3
+#define MMCR1_GRS_L3SEL_SH	44
+#define MMCR1_GRS_L3SEL_MSK	3
+#define MMCR1_GRS_MCSEL_SH	41
+#define MMCR1_GRS_MCSEL_MSK	7
+#define MMCR1_GRS_FABSEL_SH	39
+#define MMCR1_GRS_FABSEL_MSK	3
+#define MMCR1_PMC1_ADDER_SEL_SH	35
+#define MMCR1_PMC2_ADDER_SEL_SH	34
+#define MMCR1_PMC3_ADDER_SEL_SH	33
+#define MMCR1_PMC4_ADDER_SEL_SH	32
+#define MMCR1_PMC1SEL_SH	25
+#define MMCR1_PMC2SEL_SH	17
+#define MMCR1_PMC3SEL_SH	9
+#define MMCR1_PMC4SEL_SH	1
+#define MMCR1_PMCSEL_SH(n)	(MMCR1_PMC1SEL_SH - (n) * 8)
+#define MMCR1_PMCSEL_MSK	0x7f
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ *         <><>[  ><><>< ><> [  >[ >[ ><  ><  ><  ><  ><><><><><><>
+ *         T0T1 NC G0G1G2 G3  UC PS1PS2 B0  B1  B2  B3 P6P5P4P3P2P1
+ *
+ * T0 - TTM0 constraint
+ *     54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000
+ *
+ * T1 - TTM1 constraint
+ *     52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000
+ *
+ * NC - number of counters
+ *     51: NC error 0x0008_0000_0000_0000
+ *     48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
+ *
+ * G0..G3 - GRS mux constraints
+ *     46-47: GRS_L2SEL value
+ *     44-45: GRS_L3SEL value
+ *     41-44: GRS_MCSEL value
+ *     39-40: GRS_FABSEL value
+ *	Note that these match up with their bit positions in MMCR1
+ *
+ * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
+ *     37: UC3 error 0x20_0000_0000
+ *     36: FPU|IFU|ISU1 events needed 0x10_0000_0000
+ *     35: ISU0 events needed 0x08_0000_0000
+ *     34: IDU|GRS events needed 0x04_0000_0000
+ *
+ * PS1
+ *     33: PS1 error 0x2_0000_0000
+ *     31-32: count of events needing PMC1/2 0x1_8000_0000
+ *
+ * PS2
+ *     30: PS2 error 0x4000_0000
+ *     28-29: count of events needing PMC3/4 0x3000_0000
+ *
+ * B0
+ *     24-27: Byte 0 event source 0x0f00_0000
+ *	      Encoding as for the event code
+ *
+ * B1, B2, B3
+ *     20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
+ *
+ * P1..P6
+ *     0-11: Count of events needing PMC1..PMC6
+ */
+
+static const int grsel_shift[8] = {
+	MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
+	MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
+	MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
+};
+
+/* Masks and values for using events from the various units */
+static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
+	[PM_FPU] =   { 0xc0002000000000ul, 0x00001000000000ul },
+	[PM_ISU0] =  { 0x00002000000000ul, 0x00000800000000ul },
+	[PM_ISU1] =  { 0xc0002000000000ul, 0xc0001000000000ul },
+	[PM_IFU] =   { 0xc0002000000000ul, 0x80001000000000ul },
+	[PM_IDU] =   { 0x30002000000000ul, 0x00000400000000ul },
+	[PM_GRS] =   { 0x30002000000000ul, 0x30000400000000ul },
+};
+
+static int power5_get_constraint(u64 event, unsigned long *maskp,
+				 unsigned long *valp, u64 event_config1 __maybe_unused)
+{
+	int pmc, byte, unit, sh;
+	int bit, fmask;
+	unsigned long mask = 0, value = 0;
+	int grp = -1;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc) {
+		if (pmc > 6)
+			return -1;
+		sh = (pmc - 1) * 2;
+		mask |= 2 << sh;
+		value |= 1 << sh;
+		if (pmc <= 4)
+			grp = (pmc - 1) >> 1;
+		else if (event != 0x500009 && event != 0x600005)
+			return -1;
+	}
+	if (event & PM_BUSEVENT_MSK) {
+		unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+		if (unit > PM_LASTUNIT)
+			return -1;
+		if (unit == PM_ISU0_ALT)
+			unit = PM_ISU0;
+		mask |= unit_cons[unit][0];
+		value |= unit_cons[unit][1];
+		byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+		if (byte >= 4) {
+			if (unit != PM_LSU1)
+				return -1;
+			/* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
+			++unit;
+			byte &= 3;
+		}
+		if (unit == PM_GRS) {
+			bit = event & 7;
+			fmask = (bit == 6)? 7: 3;
+			sh = grsel_shift[bit];
+			mask |= (unsigned long)fmask << sh;
+			value |= (unsigned long)((event >> PM_GRS_SH) & fmask)
+				<< sh;
+		}
+		/*
+		 * Bus events on bytes 0 and 2 can be counted
+		 * on PMC1/2; bytes 1 and 3 on PMC3/4.
+		 */
+		if (!pmc)
+			grp = byte & 1;
+		/* Set byte lane select field */
+		mask  |= 0xfUL << (24 - 4 * byte);
+		value |= (unsigned long)unit << (24 - 4 * byte);
+	}
+	if (grp == 0) {
+		/* increment PMC1/2 field */
+		mask  |= 0x200000000ul;
+		value |= 0x080000000ul;
+	} else if (grp == 1) {
+		/* increment PMC3/4 field */
+		mask  |= 0x40000000ul;
+		value |= 0x10000000ul;
+	}
+	if (pmc < 5) {
+		/* need a counter from PMC1-4 set */
+		mask  |= 0x8000000000000ul;
+		value |= 0x1000000000000ul;
+	}
+	*maskp = mask;
+	*valp = value;
+	return 0;
+}
+
+#define MAX_ALT	3	/* at most 3 alternatives for any event */
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+	{ 0x120e4,  0x400002 },			/* PM_GRP_DISP_REJECT */
+	{ 0x410c7,  0x441084 },			/* PM_THRD_L2MISS_BOTH_CYC */
+	{ 0x100005, 0x600005 },			/* PM_RUN_CYC */
+	{ 0x100009, 0x200009, 0x500009 },	/* PM_INST_CMPL */
+	{ 0x300009, 0x400009 },			/* PM_INST_DISP */
+};
+
+/*
+ * Scan the alternatives table for a match and return the
+ * index into the alternatives table if found, else -1.
+ */
+static int find_alternative(u64 event)
+{
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+		if (event < event_alternatives[i][0])
+			break;
+		for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
+			if (event == event_alternatives[i][j])
+				return i;
+	}
+	return -1;
+}
+
+static const unsigned char bytedecode_alternatives[4][4] = {
+	/* PMC 1 */	{ 0x21, 0x23, 0x25, 0x27 },
+	/* PMC 2 */	{ 0x07, 0x17, 0x0e, 0x1e },
+	/* PMC 3 */	{ 0x20, 0x22, 0x24, 0x26 },
+	/* PMC 4 */	{ 0x07, 0x17, 0x0e, 0x1e }
+};
+
+/*
+ * Some direct events for decodes of event bus byte 3 have alternative
+ * PMCSEL values on other counters.  This returns the alternative
+ * event code for those that do, or -1 otherwise.
+ */
+static s64 find_alternative_bdecode(u64 event)
+{
+	int pmc, altpmc, pp, j;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc == 0 || pmc > 4)
+		return -1;
+	altpmc = 5 - pmc;	/* 1 <-> 4, 2 <-> 3 */
+	pp = event & PM_PMCSEL_MSK;
+	for (j = 0; j < 4; ++j) {
+		if (bytedecode_alternatives[pmc - 1][j] == pp) {
+			return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
+				(altpmc << PM_PMC_SH) |
+				bytedecode_alternatives[altpmc - 1][j];
+		}
+	}
+	return -1;
+}
+
+static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	int i, j, nalt = 1;
+	s64 ae;
+
+	alt[0] = event;
+	nalt = 1;
+	i = find_alternative(event);
+	if (i >= 0) {
+		for (j = 0; j < MAX_ALT; ++j) {
+			ae = event_alternatives[i][j];
+			if (ae && ae != event)
+				alt[nalt++] = ae;
+		}
+	} else {
+		ae = find_alternative_bdecode(event);
+		if (ae > 0)
+			alt[nalt++] = ae;
+	}
+	return nalt;
+}
+
+/*
+ * Map of which direct events on which PMCs are marked instruction events.
+ * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
+ * Bit 0 is set if it is marked for all PMCs.
+ * The 0x80 bit indicates a byte decode PMCSEL value.
+ */
+static unsigned char direct_event_is_marked[0x28] = {
+	0,	/* 00 */
+	0x1f,	/* 01 PM_IOPS_CMPL */
+	0x2,	/* 02 PM_MRK_GRP_DISP */
+	0xe,	/* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
+	0,	/* 04 */
+	0x1c,	/* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
+	0x80,	/* 06 */
+	0x80,	/* 07 */
+	0, 0, 0,/* 08 - 0a */
+	0x18,	/* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
+	0,	/* 0c */
+	0x80,	/* 0d */
+	0x80,	/* 0e */
+	0,	/* 0f */
+	0,	/* 10 */
+	0x14,	/* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
+	0,	/* 12 */
+	0x10,	/* 13 PM_MRK_GRP_CMPL */
+	0x1f,	/* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
+	0x2,	/* 15 PM_MRK_GRP_ISSUED */
+	0x80,	/* 16 */
+	0x80,	/* 17 */
+	0, 0, 0, 0, 0,
+	0x80,	/* 1d */
+	0x80,	/* 1e */
+	0,	/* 1f */
+	0x80,	/* 20 */
+	0x80,	/* 21 */
+	0x80,	/* 22 */
+	0x80,	/* 23 */
+	0x80,	/* 24 */
+	0x80,	/* 25 */
+	0x80,	/* 26 */
+	0x80,	/* 27 */
+};
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int power5_marked_instr_event(u64 event)
+{
+	int pmc, psel;
+	int bit, byte, unit;
+	u32 mask;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	psel = event & PM_PMCSEL_MSK;
+	if (pmc >= 5)
+		return 0;
+
+	bit = -1;
+	if (psel < sizeof(direct_event_is_marked)) {
+		if (direct_event_is_marked[psel] & (1 << pmc))
+			return 1;
+		if (direct_event_is_marked[psel] & 0x80)
+			bit = 4;
+		else if (psel == 0x08)
+			bit = pmc - 1;
+		else if (psel == 0x10)
+			bit = 4 - pmc;
+		else if (psel == 0x1b && (pmc == 1 || pmc == 3))
+			bit = 4;
+	} else if ((psel & 0x58) == 0x40)
+		bit = psel & 7;
+
+	if (!(event & PM_BUSEVENT_MSK))
+		return 0;
+
+	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	if (unit == PM_LSU0) {
+		/* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
+		mask = 0x5dff00;
+	} else if (unit == PM_LSU1 && byte >= 4) {
+		byte -= 4;
+		/* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */
+		mask = 0x5f00c0aa;
+	} else
+		return 0;
+
+	return (mask >> (byte * 8 + bit)) & 1;
+}
+
+static int power5_compute_mmcr(u64 event[], int n_ev,
+			       unsigned int hwc[], struct mmcr_regs *mmcr,
+			       struct perf_event *pevents[],
+			       u32 flags __maybe_unused)
+{
+	unsigned long mmcr1 = 0;
+	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
+	unsigned int pmc, unit, byte, psel;
+	unsigned int ttm, grp;
+	int i, isbus, bit, grsel;
+	unsigned int pmc_inuse = 0;
+	unsigned int pmc_grp_use[2];
+	unsigned char busbyte[4];
+	unsigned char unituse[16];
+	int ttmuse;
+
+	if (n_ev > 6)
+		return -1;
+
+	/* First pass to count resource use */
+	pmc_grp_use[0] = pmc_grp_use[1] = 0;
+	memset(busbyte, 0, sizeof(busbyte));
+	memset(unituse, 0, sizeof(unituse));
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			if (pmc > 6)
+				return -1;
+			if (pmc_inuse & (1 << (pmc - 1)))
+				return -1;
+			pmc_inuse |= 1 << (pmc - 1);
+			/* count 1/2 vs 3/4 use */
+			if (pmc <= 4)
+				++pmc_grp_use[(pmc - 1) >> 1];
+		}
+		if (event[i] & PM_BUSEVENT_MSK) {
+			unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+			byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+			if (unit > PM_LASTUNIT)
+				return -1;
+			if (unit == PM_ISU0_ALT)
+				unit = PM_ISU0;
+			if (byte >= 4) {
+				if (unit != PM_LSU1)
+					return -1;
+				++unit;
+				byte &= 3;
+			}
+			if (!pmc)
+				++pmc_grp_use[byte & 1];
+			if (busbyte[byte] && busbyte[byte] != unit)
+				return -1;
+			busbyte[byte] = unit;
+			unituse[unit] = 1;
+		}
+	}
+	if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2)
+		return -1;
+
+	/*
+	 * Assign resources and set multiplexer selects.
+	 *
+	 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
+	 * choice we have to deal with.
+	 */
+	if (unituse[PM_ISU0] &
+	    (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
+		unituse[PM_ISU0_ALT] = 1;	/* move ISU to TTM1 */
+		unituse[PM_ISU0] = 0;
+	}
+	/* Set TTM[01]SEL fields. */
+	ttmuse = 0;
+	for (i = PM_FPU; i <= PM_ISU1; ++i) {
+		if (!unituse[i])
+			continue;
+		if (ttmuse++)
+			return -1;
+		mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH;
+	}
+	ttmuse = 0;
+	for (; i <= PM_GRS; ++i) {
+		if (!unituse[i])
+			continue;
+		if (ttmuse++)
+			return -1;
+		mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH;
+	}
+	if (ttmuse > 1)
+		return -1;
+
+	/* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
+	for (byte = 0; byte < 4; ++byte) {
+		unit = busbyte[byte];
+		if (!unit)
+			continue;
+		if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
+			/* get ISU0 through TTM1 rather than TTM0 */
+			unit = PM_ISU0_ALT;
+		} else if (unit == PM_LSU1 + 1) {
+			/* select lower word of LSU1 for this byte */
+			mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte);
+		}
+		ttm = unit >> 2;
+		mmcr1 |= (unsigned long)ttm
+			<< (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
+	}
+
+	/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+		psel = event[i] & PM_PMCSEL_MSK;
+		isbus = event[i] & PM_BUSEVENT_MSK;
+		if (!pmc) {
+			/* Bus event or any-PMC direct event */
+			for (pmc = 0; pmc < 4; ++pmc) {
+				if (pmc_inuse & (1 << pmc))
+					continue;
+				grp = (pmc >> 1) & 1;
+				if (isbus) {
+					if (grp == (byte & 1))
+						break;
+				} else if (pmc_grp_use[grp] < 2) {
+					++pmc_grp_use[grp];
+					break;
+				}
+			}
+			pmc_inuse |= 1 << pmc;
+		} else if (pmc <= 4) {
+			/* Direct event */
+			--pmc;
+			if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
+				/* add events on higher-numbered bus */
+				mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
+		} else {
+			/* Instructions or run cycles on PMC5/6 */
+			--pmc;
+		}
+		if (isbus && unit == PM_GRS) {
+			bit = psel & 7;
+			grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
+			mmcr1 |= (unsigned long)grsel << grsel_shift[bit];
+		}
+		if (power5_marked_instr_event(event[i]))
+			mmcra |= MMCRA_SAMPLE_ENABLE;
+		if (pmc <= 3)
+			mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
+		hwc[i] = pmc;
+	}
+
+	/* Return MMCRx values */
+	mmcr->mmcr0 = 0;
+	if (pmc_inuse & 1)
+		mmcr->mmcr0 = MMCR0_PMC1CE;
+	if (pmc_inuse & 0x3e)
+		mmcr->mmcr0 |= MMCR0_PMCjCE;
+	mmcr->mmcr1 = mmcr1;
+	mmcr->mmcra = mmcra;
+	return 0;
+}
+
+static void power5_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
+{
+	if (pmc <= 3)
+		mmcr->mmcr1 &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
+}
+
+static int power5_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= 0xf,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x100009,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4c1090, /* LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x3c1088, /* LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x230e4,  /* BR_ISSUED */
+	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x230e5,  /* BR_MPRED_CR */
+};
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static u64 power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x4c1090,	0x3c1088	},
+		[C(OP_WRITE)] = {	0x3c1090,	0xc10c3		},
+		[C(OP_PREFETCH)] = {	0xc70e7,	0		},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	0,		0		},
+	},
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x3c309b	},
+		[C(OP_WRITE)] = {	0,		0		},
+		[C(OP_PREFETCH)] = {	0xc50c3,	0		},
+	},
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x2c4090,	0x800c4		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x800c0		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x230e4,	0x230e5		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	-1,		-1		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+};
+
+static struct power_pmu power5_pmu = {
+	.name			= "POWER5",
+	.n_counter		= 6,
+	.max_alternatives	= MAX_ALT,
+	.add_fields		= 0x7000090000555ul,
+	.test_adder		= 0x3000490000000ul,
+	.compute_mmcr		= power5_compute_mmcr,
+	.get_constraint		= power5_get_constraint,
+	.get_alternatives	= power5_get_alternatives,
+	.disable_pmc		= power5_disable_pmc,
+	.n_generic		= ARRAY_SIZE(power5_generic_events),
+	.generic_events		= power5_generic_events,
+	.cache_events		= &power5_cache_events,
+	.flags			= PPMU_HAS_SSLOT,
+};
+
+int __init init_power5_pmu(void)
+{
+	unsigned int pvr = mfspr(SPRN_PVR);
+
+	if (PVR_VER(pvr) != PVR_POWER5)
+		return -ENODEV;
+
+	return register_power_pmu(&power5_pmu);
+}
diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c
new file mode 100644
index 0000000000..5729b6e059
--- /dev/null
+++ b/arch/powerpc/perf/power6-pmu.c
@@ -0,0 +1,550 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter support for POWER6 processors.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/string.h>
+#include <asm/reg.h>
+#include <asm/cputable.h>
+
+#include "internal.h"
+
+/*
+ * Bits in event code for POWER6
+ */
+#define PM_PMC_SH	20	/* PMC number (1-based) for direct events */
+#define PM_PMC_MSK	0x7
+#define PM_PMC_MSKS	(PM_PMC_MSK << PM_PMC_SH)
+#define PM_UNIT_SH	16	/* Unit event comes (TTMxSEL encoding) */
+#define PM_UNIT_MSK	0xf
+#define PM_UNIT_MSKS	(PM_UNIT_MSK << PM_UNIT_SH)
+#define PM_LLAV		0x8000	/* Load lookahead match value */
+#define PM_LLA		0x4000	/* Load lookahead match enable */
+#define PM_BYTE_SH	12	/* Byte of event bus to use */
+#define PM_BYTE_MSK	3
+#define PM_SUBUNIT_SH	8	/* Subunit event comes from (NEST_SEL enc.) */
+#define PM_SUBUNIT_MSK	7
+#define PM_SUBUNIT_MSKS	(PM_SUBUNIT_MSK << PM_SUBUNIT_SH)
+#define PM_PMCSEL_MSK	0xff	/* PMCxSEL value */
+#define PM_BUSEVENT_MSK	0xf3700
+
+/*
+ * Bits in MMCR1 for POWER6
+ */
+#define MMCR1_TTM0SEL_SH	60
+#define MMCR1_TTMSEL_SH(n)	(MMCR1_TTM0SEL_SH - (n) * 4)
+#define MMCR1_TTMSEL_MSK	0xf
+#define MMCR1_TTMSEL(m, n)	(((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK)
+#define MMCR1_NESTSEL_SH	45
+#define MMCR1_NESTSEL_MSK	0x7
+#define MMCR1_NESTSEL(m)	(((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK)
+#define MMCR1_PMC1_LLA		(1ul << 44)
+#define MMCR1_PMC1_LLA_VALUE	(1ul << 39)
+#define MMCR1_PMC1_ADDR_SEL	(1ul << 35)
+#define MMCR1_PMC1SEL_SH	24
+#define MMCR1_PMCSEL_SH(n)	(MMCR1_PMC1SEL_SH - (n) * 8)
+#define MMCR1_PMCSEL_MSK	0xff
+
+/*
+ * Map of which direct events on which PMCs are marked instruction events.
+ * Indexed by PMCSEL value >> 1.
+ * Bottom 4 bits are a map of which PMCs are interesting,
+ * top 4 bits say what sort of event:
+ *   0 = direct marked event,
+ *   1 = byte decode event,
+ *   4 = add/and event (PMC1 -> bits 0 & 4),
+ *   5 = add/and event (PMC1 -> bits 1 & 5),
+ *   6 = add/and event (PMC1 -> bits 2 & 6),
+ *   7 = add/and event (PMC1 -> bits 3 & 7).
+ */
+static unsigned char direct_event_is_marked[0x60 >> 1] = {
+	0,	/* 00 */
+	0,	/* 02 */
+	0,	/* 04 */
+	0x07,	/* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
+	0x04,	/* 08 PM_MRK_DFU_FIN */
+	0x06,	/* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */
+	0,	/* 0c */
+	0,	/* 0e */
+	0x02,	/* 10 PM_MRK_INST_DISP */
+	0x08,	/* 12 PM_MRK_LSU_DERAT_MISS */
+	0,	/* 14 */
+	0,	/* 16 */
+	0x0c,	/* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */
+	0x0f,	/* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */
+	0x01,	/* 1c PM_MRK_INST_ISSUED */
+	0,	/* 1e */
+	0,	/* 20 */
+	0,	/* 22 */
+	0,	/* 24 */
+	0,	/* 26 */
+	0x15,	/* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */
+	0,	/* 2a */
+	0,	/* 2c */
+	0,	/* 2e */
+	0x4f,	/* 30 */
+	0x7f,	/* 32 */
+	0x4f,	/* 34 */
+	0x5f,	/* 36 */
+	0x6f,	/* 38 */
+	0x4f,	/* 3a */
+	0,	/* 3c */
+	0x08,	/* 3e PM_MRK_INST_TIMEO */
+	0x1f,	/* 40 */
+	0x1f,	/* 42 */
+	0x1f,	/* 44 */
+	0x1f,	/* 46 */
+	0x1f,	/* 48 */
+	0x1f,	/* 4a */
+	0x1f,	/* 4c */
+	0x1f,	/* 4e */
+	0,	/* 50 */
+	0x05,	/* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */
+	0x1c,	/* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */
+	0x02,	/* 56 PM_MRK_LD_MISS_L1 */
+	0,	/* 58 */
+	0,	/* 5a */
+	0,	/* 5c */
+	0,	/* 5e */
+};
+
+/*
+ * Masks showing for each unit which bits are marked events.
+ * These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0.
+ */
+static u32 marked_bus_events[16] = {
+	0x01000000,	/* direct events set 1: byte 3 bit 0 */
+	0x00010000,	/* direct events set 2: byte 2 bit 0 */
+	0, 0, 0, 0,	/* IDU, IFU, nest: nothing */
+	0x00000088,	/* VMX set 1: byte 0 bits 3, 7 */
+	0x000000c0,	/* VMX set 2: byte 0 bits 4-7 */
+	0x04010000,	/* LSU set 1: byte 2 bit 0, byte 3 bit 2 */
+	0xff010000u,	/* LSU set 2: byte 2 bit 0, all of byte 3 */
+	0,		/* LSU set 3 */
+	0x00000010,	/* VMX set 3: byte 0 bit 4 */
+	0,		/* BFP set 1 */
+	0x00000022,	/* BFP set 2: byte 0 bits 1, 5 */
+	0, 0
+};
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int power6_marked_instr_event(u64 event)
+{
+	int pmc, psel, ptype;
+	int bit, byte, unit;
+	u32 mask;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	psel = (event & PM_PMCSEL_MSK) >> 1;	/* drop edge/level bit */
+	if (pmc >= 5)
+		return 0;
+
+	bit = -1;
+	if (psel < sizeof(direct_event_is_marked)) {
+		ptype = direct_event_is_marked[psel];
+		if (pmc == 0 || !(ptype & (1 << (pmc - 1))))
+			return 0;
+		ptype >>= 4;
+		if (ptype == 0)
+			return 1;
+		if (ptype == 1)
+			bit = 0;
+		else
+			bit = ptype ^ (pmc - 1);
+	} else if ((psel & 0x48) == 0x40)
+		bit = psel & 7;
+
+	if (!(event & PM_BUSEVENT_MSK) || bit == -1)
+		return 0;
+
+	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	mask = marked_bus_events[unit];
+	return (mask >> (byte * 8 + bit)) & 1;
+}
+
+/*
+ * Assign PMC numbers and compute MMCR1 value for a set of events
+ */
+static int p6_compute_mmcr(u64 event[], int n_ev,
+			   unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[],
+			   u32 flags __maybe_unused)
+{
+	unsigned long mmcr1 = 0;
+	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
+	int i;
+	unsigned int pmc, ev, b, u, s, psel;
+	unsigned int ttmset = 0;
+	unsigned int pmc_inuse = 0;
+
+	if (n_ev > 6)
+		return -1;
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			if (pmc_inuse & (1 << (pmc - 1)))
+				return -1;	/* collision! */
+			pmc_inuse |= 1 << (pmc - 1);
+		}
+	}
+	for (i = 0; i < n_ev; ++i) {
+		ev = event[i];
+		pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			--pmc;
+		} else {
+			/* can go on any PMC; find a free one */
+			for (pmc = 0; pmc < 4; ++pmc)
+				if (!(pmc_inuse & (1 << pmc)))
+					break;
+			if (pmc >= 4)
+				return -1;
+			pmc_inuse |= 1 << pmc;
+		}
+		hwc[i] = pmc;
+		psel = ev & PM_PMCSEL_MSK;
+		if (ev & PM_BUSEVENT_MSK) {
+			/* this event uses the event bus */
+			b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK;
+			u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK;
+			/* check for conflict on this byte of event bus */
+			if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u)
+				return -1;
+			mmcr1 |= (unsigned long)u << MMCR1_TTMSEL_SH(b);
+			ttmset |= 1 << b;
+			if (u == 5) {
+				/* Nest events have a further mux */
+				s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
+				if ((ttmset & 0x10) &&
+				    MMCR1_NESTSEL(mmcr1) != s)
+					return -1;
+				ttmset |= 0x10;
+				mmcr1 |= (unsigned long)s << MMCR1_NESTSEL_SH;
+			}
+			if (0x30 <= psel && psel <= 0x3d) {
+				/* these need the PMCx_ADDR_SEL bits */
+				if (b >= 2)
+					mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc;
+			}
+			/* bus select values are different for PMC3/4 */
+			if (pmc >= 2 && (psel & 0x90) == 0x80)
+				psel ^= 0x20;
+		}
+		if (ev & PM_LLA) {
+			mmcr1 |= MMCR1_PMC1_LLA >> pmc;
+			if (ev & PM_LLAV)
+				mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc;
+		}
+		if (power6_marked_instr_event(event[i]))
+			mmcra |= MMCRA_SAMPLE_ENABLE;
+		if (pmc < 4)
+			mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc);
+	}
+	mmcr->mmcr0 = 0;
+	if (pmc_inuse & 1)
+		mmcr->mmcr0 = MMCR0_PMC1CE;
+	if (pmc_inuse & 0xe)
+		mmcr->mmcr0 |= MMCR0_PMCjCE;
+	mmcr->mmcr1 = mmcr1;
+	mmcr->mmcra = mmcra;
+	return 0;
+}
+
+/*
+ * Layout of constraint bits:
+ *
+ *	0-1	add field: number of uses of PMC1 (max 1)
+ *	2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6
+ *	12-15	add field: number of uses of PMC1-4 (max 4)
+ *	16-19	select field: unit on byte 0 of event bus
+ *	20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
+ *	32-34	select field: nest (subunit) event selector
+ */
+static int p6_get_constraint(u64 event, unsigned long *maskp,
+			     unsigned long *valp, u64 event_config1 __maybe_unused)
+{
+	int pmc, byte, sh, subunit;
+	unsigned long mask = 0, value = 0;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc) {
+		if (pmc > 4 && !(event == 0x500009 || event == 0x600005))
+			return -1;
+		sh = (pmc - 1) * 2;
+		mask |= 2 << sh;
+		value |= 1 << sh;
+	}
+	if (event & PM_BUSEVENT_MSK) {
+		byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+		sh = byte * 4 + (16 - PM_UNIT_SH);
+		mask |= PM_UNIT_MSKS << sh;
+		value |= (unsigned long)(event & PM_UNIT_MSKS) << sh;
+		if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
+			subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
+			mask  |= (unsigned long)PM_SUBUNIT_MSK << 32;
+			value |= (unsigned long)subunit << 32;
+		}
+	}
+	if (pmc <= 4) {
+		mask  |= 0x8000;	/* add field for count of PMC1-4 uses */
+		value |= 0x1000;
+	}
+	*maskp = mask;
+	*valp = value;
+	return 0;
+}
+
+static int p6_limited_pmc_event(u64 event)
+{
+	int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+
+	return pmc == 5 || pmc == 6;
+}
+
+#define MAX_ALT	4	/* at most 4 alternatives for any event */
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+	{ 0x0130e8, 0x2000f6, 0x3000fc },	/* PM_PTEG_RELOAD_VALID */
+	{ 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
+	{ 0x080088, 0x200054, 0x3000f0 },	/* PM_ST_MISS_L1 */
+	{ 0x10000a, 0x2000f4, 0x600005 },	/* PM_RUN_CYC */
+	{ 0x10000b, 0x2000f5 },			/* PM_RUN_COUNT */
+	{ 0x10000e, 0x400010 },			/* PM_PURR */
+	{ 0x100010, 0x4000f8 },			/* PM_FLUSH */
+	{ 0x10001a, 0x200010 },			/* PM_MRK_INST_DISP */
+	{ 0x100026, 0x3000f8 },			/* PM_TB_BIT_TRANS */
+	{ 0x100054, 0x2000f0 },			/* PM_ST_FIN */
+	{ 0x100056, 0x2000fc },			/* PM_L1_ICACHE_MISS */
+	{ 0x1000f0, 0x40000a },			/* PM_INST_IMC_MATCH_CMPL */
+	{ 0x1000f8, 0x200008 },			/* PM_GCT_EMPTY_CYC */
+	{ 0x1000fc, 0x400006 },			/* PM_LSU_DERAT_MISS_CYC */
+	{ 0x20000e, 0x400007 },			/* PM_LSU_DERAT_MISS */
+	{ 0x200012, 0x300012 },			/* PM_INST_DISP */
+	{ 0x2000f2, 0x3000f2 },			/* PM_INST_DISP */
+	{ 0x2000f8, 0x300010 },			/* PM_EXT_INT */
+	{ 0x2000fe, 0x300056 },			/* PM_DATA_FROM_L2MISS */
+	{ 0x2d0030, 0x30001a },			/* PM_MRK_FPU_FIN */
+	{ 0x30000a, 0x400018 },			/* PM_MRK_INST_FIN */
+	{ 0x3000f6, 0x40000e },			/* PM_L1_DCACHE_RELOAD_VALID */
+	{ 0x3000fe, 0x400056 },			/* PM_DATA_FROM_L3MISS */
+};
+
+/*
+ * This could be made more efficient with a binary search on
+ * a presorted list, if necessary
+ */
+static int find_alternatives_list(u64 event)
+{
+	int i, j;
+	unsigned int alt;
+
+	for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+		if (event < event_alternatives[i][0])
+			return -1;
+		for (j = 0; j < MAX_ALT; ++j) {
+			alt = event_alternatives[i][j];
+			if (!alt || event < alt)
+				break;
+			if (event == alt)
+				return i;
+		}
+	}
+	return -1;
+}
+
+static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	int i, j, nlim;
+	unsigned int psel, pmc;
+	unsigned int nalt = 1;
+	u64 aevent;
+
+	alt[0] = event;
+	nlim = p6_limited_pmc_event(event);
+
+	/* check the alternatives table */
+	i = find_alternatives_list(event);
+	if (i >= 0) {
+		/* copy out alternatives from list */
+		for (j = 0; j < MAX_ALT; ++j) {
+			aevent = event_alternatives[i][j];
+			if (!aevent)
+				break;
+			if (aevent != event)
+				alt[nalt++] = aevent;
+			nlim += p6_limited_pmc_event(aevent);
+		}
+
+	} else {
+		/* Check for alternative ways of computing sum events */
+		/* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */
+		psel = event & (PM_PMCSEL_MSK & ~1);	/* ignore edge bit */
+		pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc && (psel == 0x32 || psel == 0x34))
+			alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) |
+				((5 - pmc) << PM_PMC_SH);
+
+		/* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */
+		if (pmc && (psel == 0x38 || psel == 0x3a))
+			alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) |
+				((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
+	}
+
+	if (flags & PPMU_ONLY_COUNT_RUN) {
+		/*
+		 * We're only counting in RUN state,
+		 * so PM_CYC is equivalent to PM_RUN_CYC,
+		 * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR.
+		 * This doesn't include alternatives that don't provide
+		 * any extra flexibility in assigning PMCs (e.g.
+		 * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC).
+		 * Note that even with these additional alternatives
+		 * we never end up with more than 4 alternatives for any event.
+		 */
+		j = nalt;
+		for (i = 0; i < nalt; ++i) {
+			switch (alt[i]) {
+			case 0x1e:	/* PM_CYC */
+				alt[j++] = 0x600005;	/* PM_RUN_CYC */
+				++nlim;
+				break;
+			case 0x10000a:	/* PM_RUN_CYC */
+				alt[j++] = 0x1e;	/* PM_CYC */
+				break;
+			case 2:		/* PM_INST_CMPL */
+				alt[j++] = 0x500009;	/* PM_RUN_INST_CMPL */
+				++nlim;
+				break;
+			case 0x500009:	/* PM_RUN_INST_CMPL */
+				alt[j++] = 2;		/* PM_INST_CMPL */
+				break;
+			case 0x10000e:	/* PM_PURR */
+				alt[j++] = 0x4000f4;	/* PM_RUN_PURR */
+				break;
+			case 0x4000f4:	/* PM_RUN_PURR */
+				alt[j++] = 0x10000e;	/* PM_PURR */
+				break;
+			}
+		}
+		nalt = j;
+	}
+
+	if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
+		/* remove the limited PMC events */
+		j = 0;
+		for (i = 0; i < nalt; ++i) {
+			if (!p6_limited_pmc_event(alt[i])) {
+				alt[j] = alt[i];
+				++j;
+			}
+		}
+		nalt = j;
+	} else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
+		/* remove all but the limited PMC events */
+		j = 0;
+		for (i = 0; i < nalt; ++i) {
+			if (p6_limited_pmc_event(alt[i])) {
+				alt[j] = alt[i];
+				++j;
+			}
+		}
+		nalt = j;
+	}
+
+	return nalt;
+}
+
+static void p6_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
+{
+	/* Set PMCxSEL to 0 to disable PMCx */
+	if (pmc <= 3)
+		mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
+}
+
+static int power6_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= 0x1e,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 2,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x280030, /* LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x30000c, /* LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x410a0,  /* BR_PRED */
+	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x400052, /* BR_MPRED */
+};
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ * The "DTLB" and "ITLB" events relate to the DERAT and IERAT.
+ */
+static u64 power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x280030,	0x80080		},
+		[C(OP_WRITE)] = {	0x180032,	0x80088		},
+		[C(OP_PREFETCH)] = {	0x810a4,	0		},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x100056 	},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	0x4008c,	0		},
+	},
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x150730,	0x250532	},
+		[C(OP_WRITE)] = {	0x250432,	0x150432	},
+		[C(OP_PREFETCH)] = {	0x810a6,	0		},
+	},
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x20000e	},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x420ce		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x430e6,	0x400052	},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	-1,		-1		},
+		[C(OP_WRITE)] = {	-1,		-1		},
+		[C(OP_PREFETCH)] = {	-1,		-1		},
+	},
+};
+
+static struct power_pmu power6_pmu = {
+	.name			= "POWER6",
+	.n_counter		= 6,
+	.max_alternatives	= MAX_ALT,
+	.add_fields		= 0x1555,
+	.test_adder		= 0x3000,
+	.compute_mmcr		= p6_compute_mmcr,
+	.get_constraint		= p6_get_constraint,
+	.get_alternatives	= p6_get_alternatives,
+	.disable_pmc		= p6_disable_pmc,
+	.limited_pmc_event	= p6_limited_pmc_event,
+	.flags			= PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR,
+	.n_generic		= ARRAY_SIZE(power6_generic_events),
+	.generic_events		= power6_generic_events,
+	.cache_events		= &power6_cache_events,
+};
+
+int __init init_power6_pmu(void)
+{
+	unsigned int pvr = mfspr(SPRN_PVR);
+
+	if (PVR_VER(pvr) != PVR_POWER6)
+		return -ENODEV;
+
+	return register_power_pmu(&power6_pmu);
+}
diff --git a/arch/powerpc/perf/power7-events-list.h b/arch/powerpc/perf/power7-events-list.h
new file mode 100644
index 0000000000..6c2b706649
--- /dev/null
+++ b/arch/powerpc/perf/power7-events-list.h
@@ -0,0 +1,554 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Performance counter support for POWER7 processors.
+ *
+ * Copyright 2013 Runzhen Wang, IBM Corporation.
+ */
+
+EVENT(PM_IC_DEMAND_L2_BR_ALL,                 0x04898)
+EVENT(PM_GCT_UTIL_7_TO_10_SLOTS,              0x020a0)
+EVENT(PM_PMC2_SAVED,                          0x10022)
+EVENT(PM_CMPLU_STALL_DFU,                     0x2003c)
+EVENT(PM_VSU0_16FLOP,                         0x0a0a4)
+EVENT(PM_MRK_LSU_DERAT_MISS,                  0x3d05a)
+EVENT(PM_MRK_ST_CMPL,                         0x10034)
+EVENT(PM_NEST_PAIR3_ADD,                      0x40881)
+EVENT(PM_L2_ST_DISP,                          0x46180)
+EVENT(PM_L2_CASTOUT_MOD,                      0x16180)
+EVENT(PM_ISEG,                                0x020a4)
+EVENT(PM_MRK_INST_TIMEO,                      0x40034)
+EVENT(PM_L2_RCST_DISP_FAIL_ADDR,              0x36282)
+EVENT(PM_LSU1_DC_PREF_STREAM_CONFIRM,         0x0d0b6)
+EVENT(PM_IERAT_WR_64K,                        0x040be)
+EVENT(PM_MRK_DTLB_MISS_16M,                   0x4d05e)
+EVENT(PM_IERAT_MISS,                          0x100f6)
+EVENT(PM_MRK_PTEG_FROM_LMEM,                  0x4d052)
+EVENT(PM_FLOP,                                0x100f4)
+EVENT(PM_THRD_PRIO_4_5_CYC,                   0x040b4)
+EVENT(PM_BR_PRED_TA,                          0x040aa)
+EVENT(PM_CMPLU_STALL_FXU,                     0x20014)
+EVENT(PM_EXT_INT,                             0x200f8)
+EVENT(PM_VSU_FSQRT_FDIV,                      0x0a888)
+EVENT(PM_MRK_LD_MISS_EXPOSED_CYC,             0x1003e)
+EVENT(PM_LSU1_LDF,                            0x0c086)
+EVENT(PM_IC_WRITE_ALL,                        0x0488c)
+EVENT(PM_LSU0_SRQ_STFWD,                      0x0c0a0)
+EVENT(PM_PTEG_FROM_RL2L3_MOD,                 0x1c052)
+EVENT(PM_MRK_DATA_FROM_L31_SHR,               0x1d04e)
+EVENT(PM_DATA_FROM_L21_MOD,                   0x3c046)
+EVENT(PM_VSU1_SCAL_DOUBLE_ISSUED,             0x0b08a)
+EVENT(PM_VSU0_8FLOP,                          0x0a0a0)
+EVENT(PM_POWER_EVENT1,                        0x1006e)
+EVENT(PM_DISP_CLB_HELD_BAL,                   0x02092)
+EVENT(PM_VSU1_2FLOP,                          0x0a09a)
+EVENT(PM_LWSYNC_HELD,                         0x0209a)
+EVENT(PM_PTEG_FROM_DL2L3_SHR,                 0x3c054)
+EVENT(PM_INST_FROM_L21_MOD,                   0x34046)
+EVENT(PM_IERAT_XLATE_WR_16MPLUS,              0x040bc)
+EVENT(PM_IC_REQ_ALL,                          0x04888)
+EVENT(PM_DSLB_MISS,                           0x0d090)
+EVENT(PM_L3_MISS,                             0x1f082)
+EVENT(PM_LSU0_L1_PREF,                        0x0d0b8)
+EVENT(PM_VSU_SCALAR_SINGLE_ISSUED,            0x0b884)
+EVENT(PM_LSU1_DC_PREF_STREAM_CONFIRM_STRIDE,  0x0d0be)
+EVENT(PM_L2_INST,                             0x36080)
+EVENT(PM_VSU0_FRSP,                           0x0a0b4)
+EVENT(PM_FLUSH_DISP,                          0x02082)
+EVENT(PM_PTEG_FROM_L2MISS,                    0x4c058)
+EVENT(PM_VSU1_DQ_ISSUED,                      0x0b09a)
+EVENT(PM_CMPLU_STALL_LSU,                     0x20012)
+EVENT(PM_MRK_DATA_FROM_DMEM,                  0x1d04a)
+EVENT(PM_LSU_FLUSH_ULD,                       0x0c8b0)
+EVENT(PM_PTEG_FROM_LMEM,                      0x4c052)
+EVENT(PM_MRK_DERAT_MISS_16M,                  0x3d05c)
+EVENT(PM_THRD_ALL_RUN_CYC,                    0x2000c)
+EVENT(PM_MEM0_PREFETCH_DISP,                  0x20083)
+EVENT(PM_MRK_STALL_CMPLU_CYC_COUNT,           0x3003f)
+EVENT(PM_DATA_FROM_DL2L3_MOD,                 0x3c04c)
+EVENT(PM_VSU_FRSP,                            0x0a8b4)
+EVENT(PM_MRK_DATA_FROM_L21_MOD,               0x3d046)
+EVENT(PM_PMC1_OVERFLOW,                       0x20010)
+EVENT(PM_VSU0_SINGLE,                         0x0a0a8)
+EVENT(PM_MRK_PTEG_FROM_L3MISS,                0x2d058)
+EVENT(PM_MRK_PTEG_FROM_L31_SHR,               0x2d056)
+EVENT(PM_VSU0_VECTOR_SP_ISSUED,               0x0b090)
+EVENT(PM_VSU1_FEST,                           0x0a0ba)
+EVENT(PM_MRK_INST_DISP,                       0x20030)
+EVENT(PM_VSU0_COMPLEX_ISSUED,                 0x0b096)
+EVENT(PM_LSU1_FLUSH_UST,                      0x0c0b6)
+EVENT(PM_INST_CMPL,                           0x00002)
+EVENT(PM_FXU_IDLE,                            0x1000e)
+EVENT(PM_LSU0_FLUSH_ULD,                      0x0c0b0)
+EVENT(PM_MRK_DATA_FROM_DL2L3_MOD,             0x3d04c)
+EVENT(PM_LSU_LMQ_SRQ_EMPTY_ALL_CYC,           0x3001c)
+EVENT(PM_LSU1_REJECT_LMQ_FULL,                0x0c0a6)
+EVENT(PM_INST_PTEG_FROM_L21_MOD,              0x3e056)
+EVENT(PM_INST_FROM_RL2L3_MOD,                 0x14042)
+EVENT(PM_SHL_CREATED,                         0x05082)
+EVENT(PM_L2_ST_HIT,                           0x46182)
+EVENT(PM_DATA_FROM_DMEM,                      0x1c04a)
+EVENT(PM_L3_LD_MISS,                          0x2f082)
+EVENT(PM_FXU1_BUSY_FXU0_IDLE,                 0x4000e)
+EVENT(PM_DISP_CLB_HELD_RES,                   0x02094)
+EVENT(PM_L2_SN_SX_I_DONE,                     0x36382)
+EVENT(PM_GRP_CMPL,                            0x30004)
+EVENT(PM_STCX_CMPL,                           0x0c098)
+EVENT(PM_VSU0_2FLOP,                          0x0a098)
+EVENT(PM_L3_PREF_MISS,                        0x3f082)
+EVENT(PM_LSU_SRQ_SYNC_CYC,                    0x0d096)
+EVENT(PM_LSU_REJECT_ERAT_MISS,                0x20064)
+EVENT(PM_L1_ICACHE_MISS,                      0x200fc)
+EVENT(PM_LSU1_FLUSH_SRQ,                      0x0c0be)
+EVENT(PM_LD_REF_L1_LSU0,                      0x0c080)
+EVENT(PM_VSU0_FEST,                           0x0a0b8)
+EVENT(PM_VSU_VECTOR_SINGLE_ISSUED,            0x0b890)
+EVENT(PM_FREQ_UP,                             0x4000c)
+EVENT(PM_DATA_FROM_LMEM,                      0x3c04a)
+EVENT(PM_LSU1_LDX,                            0x0c08a)
+EVENT(PM_PMC3_OVERFLOW,                       0x40010)
+EVENT(PM_MRK_BR_MPRED,                        0x30036)
+EVENT(PM_SHL_MATCH,                           0x05086)
+EVENT(PM_MRK_BR_TAKEN,                        0x10036)
+EVENT(PM_CMPLU_STALL_BRU,                     0x4004e)
+EVENT(PM_ISLB_MISS,                           0x0d092)
+EVENT(PM_CYC,                                 0x0001e)
+EVENT(PM_DISP_HELD_THERMAL,                   0x30006)
+EVENT(PM_INST_PTEG_FROM_RL2L3_SHR,            0x2e054)
+EVENT(PM_LSU1_SRQ_STFWD,                      0x0c0a2)
+EVENT(PM_GCT_NOSLOT_BR_MPRED,                 0x4001a)
+EVENT(PM_1PLUS_PPC_CMPL,                      0x100f2)
+EVENT(PM_PTEG_FROM_DMEM,                      0x2c052)
+EVENT(PM_VSU_2FLOP,                           0x0a898)
+EVENT(PM_GCT_FULL_CYC,                        0x04086)
+EVENT(PM_MRK_DATA_FROM_L3_CYC,                0x40020)
+EVENT(PM_LSU_SRQ_S0_ALLOC,                    0x0d09d)
+EVENT(PM_MRK_DERAT_MISS_4K,                   0x1d05c)
+EVENT(PM_BR_MPRED_TA,                         0x040ae)
+EVENT(PM_INST_PTEG_FROM_L2MISS,               0x4e058)
+EVENT(PM_DPU_HELD_POWER,                      0x20006)
+EVENT(PM_RUN_INST_CMPL,                       0x400fa)
+EVENT(PM_MRK_VSU_FIN,                         0x30032)
+EVENT(PM_LSU_SRQ_S0_VALID,                    0x0d09c)
+EVENT(PM_GCT_EMPTY_CYC,                       0x20008)
+EVENT(PM_IOPS_DISP,                           0x30014)
+EVENT(PM_RUN_SPURR,                           0x10008)
+EVENT(PM_PTEG_FROM_L21_MOD,                   0x3c056)
+EVENT(PM_VSU0_1FLOP,                          0x0a080)
+EVENT(PM_SNOOP_TLBIE,                         0x0d0b2)
+EVENT(PM_DATA_FROM_L3MISS,                    0x2c048)
+EVENT(PM_VSU_SINGLE,                          0x0a8a8)
+EVENT(PM_DTLB_MISS_16G,                       0x1c05e)
+EVENT(PM_CMPLU_STALL_VECTOR,                  0x2001c)
+EVENT(PM_FLUSH,                               0x400f8)
+EVENT(PM_L2_LD_HIT,                           0x36182)
+EVENT(PM_NEST_PAIR2_AND,                      0x30883)
+EVENT(PM_VSU1_1FLOP,                          0x0a082)
+EVENT(PM_IC_PREF_REQ,                         0x0408a)
+EVENT(PM_L3_LD_HIT,                           0x2f080)
+EVENT(PM_GCT_NOSLOT_IC_MISS,                  0x2001a)
+EVENT(PM_DISP_HELD,                           0x10006)
+EVENT(PM_L2_LD,                               0x16080)
+EVENT(PM_LSU_FLUSH_SRQ,                       0x0c8bc)
+EVENT(PM_BC_PLUS_8_CONV,                      0x040b8)
+EVENT(PM_MRK_DATA_FROM_L31_MOD_CYC,           0x40026)
+EVENT(PM_CMPLU_STALL_VECTOR_LONG,             0x4004a)
+EVENT(PM_L2_RCST_BUSY_RC_FULL,                0x26282)
+EVENT(PM_TB_BIT_TRANS,                        0x300f8)
+EVENT(PM_THERMAL_MAX,                         0x40006)
+EVENT(PM_LSU1_FLUSH_ULD,                      0x0c0b2)
+EVENT(PM_LSU1_REJECT_LHS,                     0x0c0ae)
+EVENT(PM_LSU_LRQ_S0_ALLOC,                    0x0d09f)
+EVENT(PM_L3_CO_L31,                           0x4f080)
+EVENT(PM_POWER_EVENT4,                        0x4006e)
+EVENT(PM_DATA_FROM_L31_SHR,                   0x1c04e)
+EVENT(PM_BR_UNCOND,                           0x0409e)
+EVENT(PM_LSU1_DC_PREF_STREAM_ALLOC,           0x0d0aa)
+EVENT(PM_PMC4_REWIND,                         0x10020)
+EVENT(PM_L2_RCLD_DISP,                        0x16280)
+EVENT(PM_THRD_PRIO_2_3_CYC,                   0x040b2)
+EVENT(PM_MRK_PTEG_FROM_L2MISS,                0x4d058)
+EVENT(PM_IC_DEMAND_L2_BHT_REDIRECT,           0x04098)
+EVENT(PM_LSU_DERAT_MISS,                      0x200f6)
+EVENT(PM_IC_PREF_CANCEL_L2,                   0x04094)
+EVENT(PM_MRK_FIN_STALL_CYC_COUNT,             0x1003d)
+EVENT(PM_BR_PRED_CCACHE,                      0x040a0)
+EVENT(PM_GCT_UTIL_1_TO_2_SLOTS,               0x0209c)
+EVENT(PM_MRK_ST_CMPL_INT,                     0x30034)
+EVENT(PM_LSU_TWO_TABLEWALK_CYC,               0x0d0a6)
+EVENT(PM_MRK_DATA_FROM_L3MISS,                0x2d048)
+EVENT(PM_GCT_NOSLOT_CYC,                      0x100f8)
+EVENT(PM_LSU_SET_MPRED,                       0x0c0a8)
+EVENT(PM_FLUSH_DISP_TLBIE,                    0x0208a)
+EVENT(PM_VSU1_FCONV,                          0x0a0b2)
+EVENT(PM_DERAT_MISS_16G,                      0x4c05c)
+EVENT(PM_INST_FROM_LMEM,                      0x3404a)
+EVENT(PM_IC_DEMAND_L2_BR_REDIRECT,            0x0409a)
+EVENT(PM_CMPLU_STALL_SCALAR_LONG,             0x20018)
+EVENT(PM_INST_PTEG_FROM_L2,                   0x1e050)
+EVENT(PM_PTEG_FROM_L2,                        0x1c050)
+EVENT(PM_MRK_DATA_FROM_L21_SHR_CYC,           0x20024)
+EVENT(PM_MRK_DTLB_MISS_4K,                    0x2d05a)
+EVENT(PM_VSU0_FPSCR,                          0x0b09c)
+EVENT(PM_VSU1_VECT_DOUBLE_ISSUED,             0x0b082)
+EVENT(PM_MRK_PTEG_FROM_RL2L3_MOD,             0x1d052)
+EVENT(PM_MEM0_RQ_DISP,                        0x10083)
+EVENT(PM_L2_LD_MISS,                          0x26080)
+EVENT(PM_VMX_RESULT_SAT_1,                    0x0b0a0)
+EVENT(PM_L1_PREF,                             0x0d8b8)
+EVENT(PM_MRK_DATA_FROM_LMEM_CYC,              0x2002c)
+EVENT(PM_GRP_IC_MISS_NONSPEC,                 0x1000c)
+EVENT(PM_PB_NODE_PUMP,                        0x10081)
+EVENT(PM_SHL_MERGED,                          0x05084)
+EVENT(PM_NEST_PAIR1_ADD,                      0x20881)
+EVENT(PM_DATA_FROM_L3,                        0x1c048)
+EVENT(PM_LSU_FLUSH,                           0x0208e)
+EVENT(PM_LSU_SRQ_SYNC_COUNT,                  0x0d097)
+EVENT(PM_PMC2_OVERFLOW,                       0x30010)
+EVENT(PM_LSU_LDF,                             0x0c884)
+EVENT(PM_POWER_EVENT3,                        0x3006e)
+EVENT(PM_DISP_WT,                             0x30008)
+EVENT(PM_CMPLU_STALL_REJECT,                  0x40016)
+EVENT(PM_IC_BANK_CONFLICT,                    0x04082)
+EVENT(PM_BR_MPRED_CR_TA,                      0x048ae)
+EVENT(PM_L2_INST_MISS,                        0x36082)
+EVENT(PM_CMPLU_STALL_ERAT_MISS,               0x40018)
+EVENT(PM_NEST_PAIR2_ADD,                      0x30881)
+EVENT(PM_MRK_LSU_FLUSH,                       0x0d08c)
+EVENT(PM_L2_LDST,                             0x16880)
+EVENT(PM_INST_FROM_L31_SHR,                   0x1404e)
+EVENT(PM_VSU0_FIN,                            0x0a0bc)
+EVENT(PM_LARX_LSU,                            0x0c894)
+EVENT(PM_INST_FROM_RMEM,                      0x34042)
+EVENT(PM_DISP_CLB_HELD_TLBIE,                 0x02096)
+EVENT(PM_MRK_DATA_FROM_DMEM_CYC,              0x2002e)
+EVENT(PM_BR_PRED_CR,                          0x040a8)
+EVENT(PM_LSU_REJECT,                          0x10064)
+EVENT(PM_GCT_UTIL_3_TO_6_SLOTS,               0x0209e)
+EVENT(PM_CMPLU_STALL_END_GCT_NOSLOT,          0x10028)
+EVENT(PM_LSU0_REJECT_LMQ_FULL,                0x0c0a4)
+EVENT(PM_VSU_FEST,                            0x0a8b8)
+EVENT(PM_NEST_PAIR0_AND,                      0x10883)
+EVENT(PM_PTEG_FROM_L3,                        0x2c050)
+EVENT(PM_POWER_EVENT2,                        0x2006e)
+EVENT(PM_IC_PREF_CANCEL_PAGE,                 0x04090)
+EVENT(PM_VSU0_FSQRT_FDIV,                     0x0a088)
+EVENT(PM_MRK_GRP_CMPL,                        0x40030)
+EVENT(PM_VSU0_SCAL_DOUBLE_ISSUED,             0x0b088)
+EVENT(PM_GRP_DISP,                            0x3000a)
+EVENT(PM_LSU0_LDX,                            0x0c088)
+EVENT(PM_DATA_FROM_L2,                        0x1c040)
+EVENT(PM_MRK_DATA_FROM_RL2L3_MOD,             0x1d042)
+EVENT(PM_LD_REF_L1,                           0x0c880)
+EVENT(PM_VSU0_VECT_DOUBLE_ISSUED,             0x0b080)
+EVENT(PM_VSU1_2FLOP_DOUBLE,                   0x0a08e)
+EVENT(PM_THRD_PRIO_6_7_CYC,                   0x040b6)
+EVENT(PM_BC_PLUS_8_RSLV_TAKEN,                0x040ba)
+EVENT(PM_BR_MPRED_CR,                         0x040ac)
+EVENT(PM_L3_CO_MEM,                           0x4f082)
+EVENT(PM_LD_MISS_L1,                          0x400f0)
+EVENT(PM_DATA_FROM_RL2L3_MOD,                 0x1c042)
+EVENT(PM_LSU_SRQ_FULL_CYC,                    0x1001a)
+EVENT(PM_TABLEWALK_CYC,                       0x10026)
+EVENT(PM_MRK_PTEG_FROM_RMEM,                  0x3d052)
+EVENT(PM_LSU_SRQ_STFWD,                       0x0c8a0)
+EVENT(PM_INST_PTEG_FROM_RMEM,                 0x3e052)
+EVENT(PM_FXU0_FIN,                            0x10004)
+EVENT(PM_LSU1_L1_SW_PREF,                     0x0c09e)
+EVENT(PM_PTEG_FROM_L31_MOD,                   0x1c054)
+EVENT(PM_PMC5_OVERFLOW,                       0x10024)
+EVENT(PM_LD_REF_L1_LSU1,                      0x0c082)
+EVENT(PM_INST_PTEG_FROM_L21_SHR,              0x4e056)
+EVENT(PM_CMPLU_STALL_THRD,                    0x1001c)
+EVENT(PM_DATA_FROM_RMEM,                      0x3c042)
+EVENT(PM_VSU0_SCAL_SINGLE_ISSUED,             0x0b084)
+EVENT(PM_BR_MPRED_LSTACK,                     0x040a6)
+EVENT(PM_MRK_DATA_FROM_RL2L3_MOD_CYC,         0x40028)
+EVENT(PM_LSU0_FLUSH_UST,                      0x0c0b4)
+EVENT(PM_LSU_NCST,                            0x0c090)
+EVENT(PM_BR_TAKEN,                            0x20004)
+EVENT(PM_INST_PTEG_FROM_LMEM,                 0x4e052)
+EVENT(PM_GCT_NOSLOT_BR_MPRED_IC_MISS,         0x4001c)
+EVENT(PM_DTLB_MISS_4K,                        0x2c05a)
+EVENT(PM_PMC4_SAVED,                          0x30022)
+EVENT(PM_VSU1_PERMUTE_ISSUED,                 0x0b092)
+EVENT(PM_SLB_MISS,                            0x0d890)
+EVENT(PM_LSU1_FLUSH_LRQ,                      0x0c0ba)
+EVENT(PM_DTLB_MISS,                           0x300fc)
+EVENT(PM_VSU1_FRSP,                           0x0a0b6)
+EVENT(PM_VSU_VECTOR_DOUBLE_ISSUED,            0x0b880)
+EVENT(PM_L2_CASTOUT_SHR,                      0x16182)
+EVENT(PM_DATA_FROM_DL2L3_SHR,                 0x3c044)
+EVENT(PM_VSU1_STF,                            0x0b08e)
+EVENT(PM_ST_FIN,                              0x200f0)
+EVENT(PM_PTEG_FROM_L21_SHR,                   0x4c056)
+EVENT(PM_L2_LOC_GUESS_WRONG,                  0x26480)
+EVENT(PM_MRK_STCX_FAIL,                       0x0d08e)
+EVENT(PM_LSU0_REJECT_LHS,                     0x0c0ac)
+EVENT(PM_IC_PREF_CANCEL_HIT,                  0x04092)
+EVENT(PM_L3_PREF_BUSY,                        0x4f080)
+EVENT(PM_MRK_BRU_FIN,                         0x2003a)
+EVENT(PM_LSU1_NCLD,                           0x0c08e)
+EVENT(PM_INST_PTEG_FROM_L31_MOD,              0x1e054)
+EVENT(PM_LSU_NCLD,                            0x0c88c)
+EVENT(PM_LSU_LDX,                             0x0c888)
+EVENT(PM_L2_LOC_GUESS_CORRECT,                0x16480)
+EVENT(PM_THRESH_TIMEO,                        0x10038)
+EVENT(PM_L3_PREF_ST,                          0x0d0ae)
+EVENT(PM_DISP_CLB_HELD_SYNC,                  0x02098)
+EVENT(PM_VSU_SIMPLE_ISSUED,                   0x0b894)
+EVENT(PM_VSU1_SINGLE,                         0x0a0aa)
+EVENT(PM_DATA_TABLEWALK_CYC,                  0x3001a)
+EVENT(PM_L2_RC_ST_DONE,                       0x36380)
+EVENT(PM_MRK_PTEG_FROM_L21_MOD,               0x3d056)
+EVENT(PM_LARX_LSU1,                           0x0c096)
+EVENT(PM_MRK_DATA_FROM_RMEM,                  0x3d042)
+EVENT(PM_DISP_CLB_HELD,                       0x02090)
+EVENT(PM_DERAT_MISS_4K,                       0x1c05c)
+EVENT(PM_L2_RCLD_DISP_FAIL_ADDR,              0x16282)
+EVENT(PM_SEG_EXCEPTION,                       0x028a4)
+EVENT(PM_FLUSH_DISP_SB,                       0x0208c)
+EVENT(PM_L2_DC_INV,                           0x26182)
+EVENT(PM_PTEG_FROM_DL2L3_MOD,                 0x4c054)
+EVENT(PM_DSEG,                                0x020a6)
+EVENT(PM_BR_PRED_LSTACK,                      0x040a2)
+EVENT(PM_VSU0_STF,                            0x0b08c)
+EVENT(PM_LSU_FX_FIN,                          0x10066)
+EVENT(PM_DERAT_MISS_16M,                      0x3c05c)
+EVENT(PM_MRK_PTEG_FROM_DL2L3_MOD,             0x4d054)
+EVENT(PM_GCT_UTIL_11_PLUS_SLOTS,              0x020a2)
+EVENT(PM_INST_FROM_L3,                        0x14048)
+EVENT(PM_MRK_IFU_FIN,                         0x3003a)
+EVENT(PM_ITLB_MISS,                           0x400fc)
+EVENT(PM_VSU_STF,                             0x0b88c)
+EVENT(PM_LSU_FLUSH_UST,                       0x0c8b4)
+EVENT(PM_L2_LDST_MISS,                        0x26880)
+EVENT(PM_FXU1_FIN,                            0x40004)
+EVENT(PM_SHL_DEALLOCATED,                     0x05080)
+EVENT(PM_L2_SN_M_WR_DONE,                     0x46382)
+EVENT(PM_LSU_REJECT_SET_MPRED,                0x0c8a8)
+EVENT(PM_L3_PREF_LD,                          0x0d0ac)
+EVENT(PM_L2_SN_M_RD_DONE,                     0x46380)
+EVENT(PM_MRK_DERAT_MISS_16G,                  0x4d05c)
+EVENT(PM_VSU_FCONV,                           0x0a8b0)
+EVENT(PM_ANY_THRD_RUN_CYC,                    0x100fa)
+EVENT(PM_LSU_LMQ_FULL_CYC,                    0x0d0a4)
+EVENT(PM_MRK_LSU_REJECT_LHS,                  0x0d082)
+EVENT(PM_MRK_LD_MISS_L1_CYC,                  0x4003e)
+EVENT(PM_MRK_DATA_FROM_L2_CYC,                0x20020)
+EVENT(PM_INST_IMC_MATCH_DISP,                 0x30016)
+EVENT(PM_MRK_DATA_FROM_RMEM_CYC,              0x4002c)
+EVENT(PM_VSU0_SIMPLE_ISSUED,                  0x0b094)
+EVENT(PM_CMPLU_STALL_DIV,                     0x40014)
+EVENT(PM_MRK_PTEG_FROM_RL2L3_SHR,             0x2d054)
+EVENT(PM_VSU_FMA_DOUBLE,                      0x0a890)
+EVENT(PM_VSU_4FLOP,                           0x0a89c)
+EVENT(PM_VSU1_FIN,                            0x0a0be)
+EVENT(PM_NEST_PAIR1_AND,                      0x20883)
+EVENT(PM_INST_PTEG_FROM_RL2L3_MOD,            0x1e052)
+EVENT(PM_RUN_CYC,                             0x200f4)
+EVENT(PM_PTEG_FROM_RMEM,                      0x3c052)
+EVENT(PM_LSU_LRQ_S0_VALID,                    0x0d09e)
+EVENT(PM_LSU0_LDF,                            0x0c084)
+EVENT(PM_FLUSH_COMPLETION,                    0x30012)
+EVENT(PM_ST_MISS_L1,                          0x300f0)
+EVENT(PM_L2_NODE_PUMP,                        0x36480)
+EVENT(PM_INST_FROM_DL2L3_SHR,                 0x34044)
+EVENT(PM_MRK_STALL_CMPLU_CYC,                 0x3003e)
+EVENT(PM_VSU1_DENORM,                         0x0a0ae)
+EVENT(PM_MRK_DATA_FROM_L31_SHR_CYC,           0x20026)
+EVENT(PM_NEST_PAIR0_ADD,                      0x10881)
+EVENT(PM_INST_FROM_L3MISS,                    0x24048)
+EVENT(PM_EE_OFF_EXT_INT,                      0x02080)
+EVENT(PM_INST_PTEG_FROM_DMEM,                 0x2e052)
+EVENT(PM_INST_FROM_DL2L3_MOD,                 0x3404c)
+EVENT(PM_PMC6_OVERFLOW,                       0x30024)
+EVENT(PM_VSU_2FLOP_DOUBLE,                    0x0a88c)
+EVENT(PM_TLB_MISS,                            0x20066)
+EVENT(PM_FXU_BUSY,                            0x2000e)
+EVENT(PM_L2_RCLD_DISP_FAIL_OTHER,             0x26280)
+EVENT(PM_LSU_REJECT_LMQ_FULL,                 0x0c8a4)
+EVENT(PM_IC_RELOAD_SHR,                       0x04096)
+EVENT(PM_GRP_MRK,                             0x10031)
+EVENT(PM_MRK_ST_NEST,                         0x20034)
+EVENT(PM_VSU1_FSQRT_FDIV,                     0x0a08a)
+EVENT(PM_LSU0_FLUSH_LRQ,                      0x0c0b8)
+EVENT(PM_LARX_LSU0,                           0x0c094)
+EVENT(PM_IBUF_FULL_CYC,                       0x04084)
+EVENT(PM_MRK_DATA_FROM_DL2L3_SHR_CYC,         0x2002a)
+EVENT(PM_LSU_DC_PREF_STREAM_ALLOC,            0x0d8a8)
+EVENT(PM_GRP_MRK_CYC,                         0x10030)
+EVENT(PM_MRK_DATA_FROM_RL2L3_SHR_CYC,         0x20028)
+EVENT(PM_L2_GLOB_GUESS_CORRECT,               0x16482)
+EVENT(PM_LSU_REJECT_LHS,                      0x0c8ac)
+EVENT(PM_MRK_DATA_FROM_LMEM,                  0x3d04a)
+EVENT(PM_INST_PTEG_FROM_L3,                   0x2e050)
+EVENT(PM_FREQ_DOWN,                           0x3000c)
+EVENT(PM_PB_RETRY_NODE_PUMP,                  0x30081)
+EVENT(PM_INST_FROM_RL2L3_SHR,                 0x1404c)
+EVENT(PM_MRK_INST_ISSUED,                     0x10032)
+EVENT(PM_PTEG_FROM_L3MISS,                    0x2c058)
+EVENT(PM_RUN_PURR,                            0x400f4)
+EVENT(PM_MRK_GRP_IC_MISS,                     0x40038)
+EVENT(PM_MRK_DATA_FROM_L3,                    0x1d048)
+EVENT(PM_CMPLU_STALL_DCACHE_MISS,             0x20016)
+EVENT(PM_PTEG_FROM_RL2L3_SHR,                 0x2c054)
+EVENT(PM_LSU_FLUSH_LRQ,                       0x0c8b8)
+EVENT(PM_MRK_DERAT_MISS_64K,                  0x2d05c)
+EVENT(PM_INST_PTEG_FROM_DL2L3_MOD,            0x4e054)
+EVENT(PM_L2_ST_MISS,                          0x26082)
+EVENT(PM_MRK_PTEG_FROM_L21_SHR,               0x4d056)
+EVENT(PM_LWSYNC,                              0x0d094)
+EVENT(PM_LSU0_DC_PREF_STREAM_CONFIRM_STRIDE,  0x0d0bc)
+EVENT(PM_MRK_LSU_FLUSH_LRQ,                   0x0d088)
+EVENT(PM_INST_IMC_MATCH_CMPL,                 0x100f0)
+EVENT(PM_NEST_PAIR3_AND,                      0x40883)
+EVENT(PM_PB_RETRY_SYS_PUMP,                   0x40081)
+EVENT(PM_MRK_INST_FIN,                        0x30030)
+EVENT(PM_MRK_PTEG_FROM_DL2L3_SHR,             0x3d054)
+EVENT(PM_INST_FROM_L31_MOD,                   0x14044)
+EVENT(PM_MRK_DTLB_MISS_64K,                   0x3d05e)
+EVENT(PM_LSU_FIN,                             0x30066)
+EVENT(PM_MRK_LSU_REJECT,                      0x40064)
+EVENT(PM_L2_CO_FAIL_BUSY,                     0x16382)
+EVENT(PM_MEM0_WQ_DISP,                        0x40083)
+EVENT(PM_DATA_FROM_L31_MOD,                   0x1c044)
+EVENT(PM_THERMAL_WARN,                        0x10016)
+EVENT(PM_VSU0_4FLOP,                          0x0a09c)
+EVENT(PM_BR_MPRED_CCACHE,                     0x040a4)
+EVENT(PM_CMPLU_STALL_IFU,                     0x4004c)
+EVENT(PM_L1_DEMAND_WRITE,                     0x0408c)
+EVENT(PM_FLUSH_BR_MPRED,                      0x02084)
+EVENT(PM_MRK_DTLB_MISS_16G,                   0x1d05e)
+EVENT(PM_MRK_PTEG_FROM_DMEM,                  0x2d052)
+EVENT(PM_L2_RCST_DISP,                        0x36280)
+EVENT(PM_CMPLU_STALL,                         0x4000a)
+EVENT(PM_LSU_PARTIAL_CDF,                     0x0c0aa)
+EVENT(PM_DISP_CLB_HELD_SB,                    0x020a8)
+EVENT(PM_VSU0_FMA_DOUBLE,                     0x0a090)
+EVENT(PM_FXU0_BUSY_FXU1_IDLE,                 0x3000e)
+EVENT(PM_IC_DEMAND_CYC,                       0x10018)
+EVENT(PM_MRK_DATA_FROM_L21_SHR,               0x3d04e)
+EVENT(PM_MRK_LSU_FLUSH_UST,                   0x0d086)
+EVENT(PM_INST_PTEG_FROM_L3MISS,               0x2e058)
+EVENT(PM_VSU_DENORM,                          0x0a8ac)
+EVENT(PM_MRK_LSU_PARTIAL_CDF,                 0x0d080)
+EVENT(PM_INST_FROM_L21_SHR,                   0x3404e)
+EVENT(PM_IC_PREF_WRITE,                       0x0408e)
+EVENT(PM_BR_PRED,                             0x0409c)
+EVENT(PM_INST_FROM_DMEM,                      0x1404a)
+EVENT(PM_IC_PREF_CANCEL_ALL,                  0x04890)
+EVENT(PM_LSU_DC_PREF_STREAM_CONFIRM,          0x0d8b4)
+EVENT(PM_MRK_LSU_FLUSH_SRQ,                   0x0d08a)
+EVENT(PM_MRK_FIN_STALL_CYC,                   0x1003c)
+EVENT(PM_L2_RCST_DISP_FAIL_OTHER,             0x46280)
+EVENT(PM_VSU1_DD_ISSUED,                      0x0b098)
+EVENT(PM_PTEG_FROM_L31_SHR,                   0x2c056)
+EVENT(PM_DATA_FROM_L21_SHR,                   0x3c04e)
+EVENT(PM_LSU0_NCLD,                           0x0c08c)
+EVENT(PM_VSU1_4FLOP,                          0x0a09e)
+EVENT(PM_VSU1_8FLOP,                          0x0a0a2)
+EVENT(PM_VSU_8FLOP,                           0x0a8a0)
+EVENT(PM_LSU_LMQ_SRQ_EMPTY_CYC,               0x2003e)
+EVENT(PM_DTLB_MISS_64K,                       0x3c05e)
+EVENT(PM_THRD_CONC_RUN_INST,                  0x300f4)
+EVENT(PM_MRK_PTEG_FROM_L2,                    0x1d050)
+EVENT(PM_PB_SYS_PUMP,                         0x20081)
+EVENT(PM_VSU_FIN,                             0x0a8bc)
+EVENT(PM_MRK_DATA_FROM_L31_MOD,               0x1d044)
+EVENT(PM_THRD_PRIO_0_1_CYC,                   0x040b0)
+EVENT(PM_DERAT_MISS_64K,                      0x2c05c)
+EVENT(PM_PMC2_REWIND,                         0x30020)
+EVENT(PM_INST_FROM_L2,                        0x14040)
+EVENT(PM_GRP_BR_MPRED_NONSPEC,                0x1000a)
+EVENT(PM_INST_DISP,                           0x200f2)
+EVENT(PM_MEM0_RD_CANCEL_TOTAL,                0x30083)
+EVENT(PM_LSU0_DC_PREF_STREAM_CONFIRM,         0x0d0b4)
+EVENT(PM_L1_DCACHE_RELOAD_VALID,              0x300f6)
+EVENT(PM_VSU_SCALAR_DOUBLE_ISSUED,            0x0b888)
+EVENT(PM_L3_PREF_HIT,                         0x3f080)
+EVENT(PM_MRK_PTEG_FROM_L31_MOD,               0x1d054)
+EVENT(PM_CMPLU_STALL_STORE,                   0x2004a)
+EVENT(PM_MRK_FXU_FIN,                         0x20038)
+EVENT(PM_PMC4_OVERFLOW,                       0x10010)
+EVENT(PM_MRK_PTEG_FROM_L3,                    0x2d050)
+EVENT(PM_LSU0_LMQ_LHR_MERGE,                  0x0d098)
+EVENT(PM_BTAC_HIT,                            0x0508a)
+EVENT(PM_L3_RD_BUSY,                          0x4f082)
+EVENT(PM_LSU0_L1_SW_PREF,                     0x0c09c)
+EVENT(PM_INST_FROM_L2MISS,                    0x44048)
+EVENT(PM_LSU0_DC_PREF_STREAM_ALLOC,           0x0d0a8)
+EVENT(PM_L2_ST,                               0x16082)
+EVENT(PM_VSU0_DENORM,                         0x0a0ac)
+EVENT(PM_MRK_DATA_FROM_DL2L3_SHR,             0x3d044)
+EVENT(PM_BR_PRED_CR_TA,                       0x048aa)
+EVENT(PM_VSU0_FCONV,                          0x0a0b0)
+EVENT(PM_MRK_LSU_FLUSH_ULD,                   0x0d084)
+EVENT(PM_BTAC_MISS,                           0x05088)
+EVENT(PM_MRK_LD_MISS_EXPOSED_CYC_COUNT,       0x1003f)
+EVENT(PM_MRK_DATA_FROM_L2,                    0x1d040)
+EVENT(PM_LSU_DCACHE_RELOAD_VALID,             0x0d0a2)
+EVENT(PM_VSU_FMA,                             0x0a884)
+EVENT(PM_LSU0_FLUSH_SRQ,                      0x0c0bc)
+EVENT(PM_LSU1_L1_PREF,                        0x0d0ba)
+EVENT(PM_IOPS_CMPL,                           0x10014)
+EVENT(PM_L2_SYS_PUMP,                         0x36482)
+EVENT(PM_L2_RCLD_BUSY_RC_FULL,                0x46282)
+EVENT(PM_LSU_LMQ_S0_ALLOC,                    0x0d0a1)
+EVENT(PM_FLUSH_DISP_SYNC,                     0x02088)
+EVENT(PM_MRK_DATA_FROM_DL2L3_MOD_CYC,         0x4002a)
+EVENT(PM_L2_IC_INV,                           0x26180)
+EVENT(PM_MRK_DATA_FROM_L21_MOD_CYC,           0x40024)
+EVENT(PM_L3_PREF_LDST,                        0x0d8ac)
+EVENT(PM_LSU_SRQ_EMPTY_CYC,                   0x40008)
+EVENT(PM_LSU_LMQ_S0_VALID,                    0x0d0a0)
+EVENT(PM_FLUSH_PARTIAL,                       0x02086)
+EVENT(PM_VSU1_FMA_DOUBLE,                     0x0a092)
+EVENT(PM_1PLUS_PPC_DISP,                      0x400f2)
+EVENT(PM_DATA_FROM_L2MISS,                    0x200fe)
+EVENT(PM_SUSPENDED,                           0x00000)
+EVENT(PM_VSU0_FMA,                            0x0a084)
+EVENT(PM_CMPLU_STALL_SCALAR,                  0x40012)
+EVENT(PM_STCX_FAIL,                           0x0c09a)
+EVENT(PM_VSU0_FSQRT_FDIV_DOUBLE,              0x0a094)
+EVENT(PM_DC_PREF_DST,                         0x0d0b0)
+EVENT(PM_VSU1_SCAL_SINGLE_ISSUED,             0x0b086)
+EVENT(PM_L3_HIT,                              0x1f080)
+EVENT(PM_L2_GLOB_GUESS_WRONG,                 0x26482)
+EVENT(PM_MRK_DFU_FIN,                         0x20032)
+EVENT(PM_INST_FROM_L1,                        0x04080)
+EVENT(PM_BRU_FIN,                             0x10068)
+EVENT(PM_IC_DEMAND_REQ,                       0x04088)
+EVENT(PM_VSU1_FSQRT_FDIV_DOUBLE,              0x0a096)
+EVENT(PM_VSU1_FMA,                            0x0a086)
+EVENT(PM_MRK_LD_MISS_L1,                      0x20036)
+EVENT(PM_VSU0_2FLOP_DOUBLE,                   0x0a08c)
+EVENT(PM_LSU_DC_PREF_STRIDED_STREAM_CONFIRM,  0x0d8bc)
+EVENT(PM_INST_PTEG_FROM_L31_SHR,              0x2e056)
+EVENT(PM_MRK_LSU_REJECT_ERAT_MISS,            0x30064)
+EVENT(PM_MRK_DATA_FROM_L2MISS,                0x4d048)
+EVENT(PM_DATA_FROM_RL2L3_SHR,                 0x1c04c)
+EVENT(PM_INST_FROM_PREF,                      0x14046)
+EVENT(PM_VSU1_SQ,                             0x0b09e)
+EVENT(PM_L2_LD_DISP,                          0x36180)
+EVENT(PM_L2_DISP_ALL,                         0x46080)
+EVENT(PM_THRD_GRP_CMPL_BOTH_CYC,              0x10012)
+EVENT(PM_VSU_FSQRT_FDIV_DOUBLE,               0x0a894)
+EVENT(PM_BR_MPRED,                            0x400f6)
+EVENT(PM_INST_PTEG_FROM_DL2L3_SHR,            0x3e054)
+EVENT(PM_VSU_1FLOP,                           0x0a880)
+EVENT(PM_HV_CYC,                              0x2000a)
+EVENT(PM_MRK_LSU_FIN,                         0x40032)
+EVENT(PM_MRK_DATA_FROM_RL2L3_SHR,             0x1d04c)
+EVENT(PM_DTLB_MISS_16M,                       0x4c05e)
+EVENT(PM_LSU1_LMQ_LHR_MERGE,                  0x0d09a)
+EVENT(PM_IFU_FIN,                             0x40066)
+EVENT(PM_1THRD_CON_RUN_INSTR,                 0x30062)
+EVENT(PM_CMPLU_STALL_COUNT,                   0x4000B)
+EVENT(PM_MEM0_PB_RD_CL,                       0x30083)
+EVENT(PM_THRD_1_RUN_CYC,                      0x10060)
+EVENT(PM_THRD_2_CONC_RUN_INSTR,               0x40062)
+EVENT(PM_THRD_2_RUN_CYC,                      0x20060)
+EVENT(PM_THRD_3_CONC_RUN_INST,                0x10062)
+EVENT(PM_THRD_3_RUN_CYC,                      0x30060)
+EVENT(PM_THRD_4_CONC_RUN_INST,                0x20062)
+EVENT(PM_THRD_4_RUN_CYC,                      0x40060)
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
new file mode 100644
index 0000000000..c95ccf2e28
--- /dev/null
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter support for POWER7 processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/string.h>
+#include <asm/reg.h>
+#include <asm/cputable.h>
+
+#include "internal.h"
+
+/*
+ * Bits in event code for POWER7
+ */
+#define PM_PMC_SH	16	/* PMC number (1-based) for direct events */
+#define PM_PMC_MSK	0xf
+#define PM_PMC_MSKS	(PM_PMC_MSK << PM_PMC_SH)
+#define PM_UNIT_SH	12	/* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK	0xf
+#define PM_COMBINE_SH	11	/* Combined event bit */
+#define PM_COMBINE_MSK	1
+#define PM_COMBINE_MSKS	0x800
+#define PM_L2SEL_SH	8	/* L2 event select */
+#define PM_L2SEL_MSK	7
+#define PM_PMCSEL_MSK	0xff
+
+/*
+ * Bits in MMCR1 for POWER7
+ */
+#define MMCR1_TTM0SEL_SH	60
+#define MMCR1_TTM1SEL_SH	56
+#define MMCR1_TTM2SEL_SH	52
+#define MMCR1_TTM3SEL_SH	48
+#define MMCR1_TTMSEL_MSK	0xf
+#define MMCR1_L2SEL_SH		45
+#define MMCR1_L2SEL_MSK		7
+#define MMCR1_PMC1_COMBINE_SH	35
+#define MMCR1_PMC2_COMBINE_SH	34
+#define MMCR1_PMC3_COMBINE_SH	33
+#define MMCR1_PMC4_COMBINE_SH	32
+#define MMCR1_PMC1SEL_SH	24
+#define MMCR1_PMC2SEL_SH	16
+#define MMCR1_PMC3SEL_SH	8
+#define MMCR1_PMC4SEL_SH	0
+#define MMCR1_PMCSEL_SH(n)	(MMCR1_PMC1SEL_SH - (n) * 8)
+#define MMCR1_PMCSEL_MSK	0xff
+
+/*
+ * Power7 event codes.
+ */
+#define EVENT(_name, _code) \
+	_name = _code,
+
+enum {
+#include "power7-events-list.h"
+};
+#undef EVENT
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ *                                              < ><  ><><><><><><>
+ *                                              L2  NC P6P5P4P3P2P1
+ *
+ * L2 - 16-18 - Required L2SEL value (select field)
+ *
+ * NC - number of counters
+ *     15: NC error 0x8000
+ *     12-14: number of events needing PMC1-4 0x7000
+ *
+ * P6
+ *     11: P6 error 0x800
+ *     10-11: Count of events needing PMC6
+ *
+ * P1..P5
+ *     0-9: Count of events needing PMC1..PMC5
+ */
+
+static int power7_get_constraint(u64 event, unsigned long *maskp,
+				 unsigned long *valp, u64 event_config1 __maybe_unused)
+{
+	int pmc, sh, unit;
+	unsigned long mask = 0, value = 0;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc) {
+		if (pmc > 6)
+			return -1;
+		sh = (pmc - 1) * 2;
+		mask |= 2 << sh;
+		value |= 1 << sh;
+		if (pmc >= 5 && !(event == 0x500fa || event == 0x600f4))
+			return -1;
+	}
+	if (pmc < 5) {
+		/* need a counter from PMC1-4 set */
+		mask  |= 0x8000;
+		value |= 0x1000;
+	}
+
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	if (unit == 6) {
+		/* L2SEL must be identical across events */
+		int l2sel = (event >> PM_L2SEL_SH) & PM_L2SEL_MSK;
+		mask  |= 0x7 << 16;
+		value |= l2sel << 16;
+	}
+
+	*maskp = mask;
+	*valp = value;
+	return 0;
+}
+
+#define MAX_ALT	2	/* at most 2 alternatives for any event */
+
+static const unsigned int event_alternatives[][MAX_ALT] = {
+	{ 0x200f2, 0x300f2 },		/* PM_INST_DISP */
+	{ 0x200f4, 0x600f4 },		/* PM_RUN_CYC */
+	{ 0x400fa, 0x500fa },		/* PM_RUN_INST_CMPL */
+};
+
+/*
+ * Scan the alternatives table for a match and return the
+ * index into the alternatives table if found, else -1.
+ */
+static int find_alternative(u64 event)
+{
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
+		if (event < event_alternatives[i][0])
+			break;
+		for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
+			if (event == event_alternatives[i][j])
+				return i;
+	}
+	return -1;
+}
+
+static s64 find_alternative_decode(u64 event)
+{
+	int pmc, psel;
+
+	/* this only handles the 4x decode events */
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	psel = event & PM_PMCSEL_MSK;
+	if ((pmc == 2 || pmc == 4) && (psel & ~7) == 0x40)
+		return event - (1 << PM_PMC_SH) + 8;
+	if ((pmc == 1 || pmc == 3) && (psel & ~7) == 0x48)
+		return event + (1 << PM_PMC_SH) - 8;
+	return -1;
+}
+
+static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	int i, j, nalt = 1;
+	s64 ae;
+
+	alt[0] = event;
+	nalt = 1;
+	i = find_alternative(event);
+	if (i >= 0) {
+		for (j = 0; j < MAX_ALT; ++j) {
+			ae = event_alternatives[i][j];
+			if (ae && ae != event)
+				alt[nalt++] = ae;
+		}
+	} else {
+		ae = find_alternative_decode(event);
+		if (ae > 0)
+			alt[nalt++] = ae;
+	}
+
+	if (flags & PPMU_ONLY_COUNT_RUN) {
+		/*
+		 * We're only counting in RUN state,
+		 * so PM_CYC is equivalent to PM_RUN_CYC
+		 * and PM_INST_CMPL === PM_RUN_INST_CMPL.
+		 * This doesn't include alternatives that don't provide
+		 * any extra flexibility in assigning PMCs.
+		 */
+		j = nalt;
+		for (i = 0; i < nalt; ++i) {
+			switch (alt[i]) {
+			case 0x1e:	/* PM_CYC */
+				alt[j++] = 0x600f4;	/* PM_RUN_CYC */
+				break;
+			case 0x600f4:	/* PM_RUN_CYC */
+				alt[j++] = 0x1e;
+				break;
+			case 0x2:	/* PM_PPC_CMPL */
+				alt[j++] = 0x500fa;	/* PM_RUN_INST_CMPL */
+				break;
+			case 0x500fa:	/* PM_RUN_INST_CMPL */
+				alt[j++] = 0x2;	/* PM_PPC_CMPL */
+				break;
+			}
+		}
+		nalt = j;
+	}
+
+	return nalt;
+}
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int power7_marked_instr_event(u64 event)
+{
+	int pmc, psel;
+	int unit;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	psel = event & PM_PMCSEL_MSK & ~1;	/* trim off edge/level bit */
+	if (pmc >= 5)
+		return 0;
+
+	switch (psel >> 4) {
+	case 2:
+		return pmc == 2 || pmc == 4;
+	case 3:
+		if (psel == 0x3c)
+			return pmc == 1;
+		if (psel == 0x3e)
+			return pmc != 2;
+		return 1;
+	case 4:
+	case 5:
+		return unit == 0xd;
+	case 6:
+		if (psel == 0x64)
+			return pmc >= 3;
+		break;
+	case 8:
+		return unit == 0xd;
+	}
+	return 0;
+}
+
+static int power7_compute_mmcr(u64 event[], int n_ev,
+			       unsigned int hwc[], struct mmcr_regs *mmcr,
+			       struct perf_event *pevents[],
+			       u32 flags __maybe_unused)
+{
+	unsigned long mmcr1 = 0;
+	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
+	unsigned int pmc, unit, combine, l2sel, psel;
+	unsigned int pmc_inuse = 0;
+	int i;
+
+	/* First pass to count resource use */
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			if (pmc > 6)
+				return -1;
+			if (pmc_inuse & (1 << (pmc - 1)))
+				return -1;
+			pmc_inuse |= 1 << (pmc - 1);
+		}
+	}
+
+	/* Second pass: assign PMCs, set all MMCR1 fields */
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		combine = (event[i] >> PM_COMBINE_SH) & PM_COMBINE_MSK;
+		l2sel = (event[i] >> PM_L2SEL_SH) & PM_L2SEL_MSK;
+		psel = event[i] & PM_PMCSEL_MSK;
+		if (!pmc) {
+			/* Bus event or any-PMC direct event */
+			for (pmc = 0; pmc < 4; ++pmc) {
+				if (!(pmc_inuse & (1 << pmc)))
+					break;
+			}
+			if (pmc >= 4)
+				return -1;
+			pmc_inuse |= 1 << pmc;
+		} else {
+			/* Direct or decoded event */
+			--pmc;
+		}
+		if (pmc <= 3) {
+			mmcr1 |= (unsigned long) unit
+				<< (MMCR1_TTM0SEL_SH - 4 * pmc);
+			mmcr1 |= (unsigned long) combine
+				<< (MMCR1_PMC1_COMBINE_SH - pmc);
+			mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
+			if (unit == 6)	/* L2 events */
+				mmcr1 |= (unsigned long) l2sel
+					<< MMCR1_L2SEL_SH;
+		}
+		if (power7_marked_instr_event(event[i]))
+			mmcra |= MMCRA_SAMPLE_ENABLE;
+		hwc[i] = pmc;
+	}
+
+	/* Return MMCRx values */
+	mmcr->mmcr0 = 0;
+	if (pmc_inuse & 1)
+		mmcr->mmcr0 = MMCR0_PMC1CE;
+	if (pmc_inuse & 0x3e)
+		mmcr->mmcr0 |= MMCR0_PMCjCE;
+	mmcr->mmcr1 = mmcr1;
+	mmcr->mmcra = mmcra;
+	return 0;
+}
+
+static void power7_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
+{
+	if (pmc <= 3)
+		mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
+}
+
+static int power7_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =	PM_GCT_NOSLOT_CYC,
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =	PM_CMPLU_STALL,
+	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
+	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
+	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_MISS_L1,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BRU_FIN,
+	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED,
+};
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static u64 power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0xc880,		0x400f0	},
+		[C(OP_WRITE)] = {	0,		0x300f0	},
+		[C(OP_PREFETCH)] = {	0xd8b8,		0	},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x200fc	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	0x408a,		0	},
+	},
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x16080,	0x26080	},
+		[C(OP_WRITE)] = {	0x16082,	0x26082	},
+		[C(OP_PREFETCH)] = {	0,		0	},
+	},
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x300fc	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x400fc	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x10068,	0x400f6	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	-1,		-1	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+};
+
+
+GENERIC_EVENT_ATTR(cpu-cycles,			PM_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-frontend,	PM_GCT_NOSLOT_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-backend,	PM_CMPLU_STALL);
+GENERIC_EVENT_ATTR(instructions,		PM_INST_CMPL);
+GENERIC_EVENT_ATTR(cache-references,		PM_LD_REF_L1);
+GENERIC_EVENT_ATTR(cache-misses,		PM_LD_MISS_L1);
+GENERIC_EVENT_ATTR(branch-instructions,		PM_BRU_FIN);
+GENERIC_EVENT_ATTR(branch-misses,		PM_BR_MPRED);
+
+#define EVENT(_name, _code)     POWER_EVENT_ATTR(_name, _name);
+#include "power7-events-list.h"
+#undef EVENT
+
+#define EVENT(_name, _code)     POWER_EVENT_PTR(_name),
+
+static struct attribute *power7_events_attr[] = {
+	GENERIC_EVENT_PTR(PM_CYC),
+	GENERIC_EVENT_PTR(PM_GCT_NOSLOT_CYC),
+	GENERIC_EVENT_PTR(PM_CMPLU_STALL),
+	GENERIC_EVENT_PTR(PM_INST_CMPL),
+	GENERIC_EVENT_PTR(PM_LD_REF_L1),
+	GENERIC_EVENT_PTR(PM_LD_MISS_L1),
+	GENERIC_EVENT_PTR(PM_BRU_FIN),
+	GENERIC_EVENT_PTR(PM_BR_MPRED),
+
+	#include "power7-events-list.h"
+	#undef EVENT
+	NULL
+};
+
+static const struct attribute_group power7_pmu_events_group = {
+	.name = "events",
+	.attrs = power7_events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-19");
+
+static struct attribute *power7_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static const struct attribute_group power7_pmu_format_group = {
+	.name = "format",
+	.attrs = power7_pmu_format_attr,
+};
+
+static const struct attribute_group *power7_pmu_attr_groups[] = {
+	&power7_pmu_format_group,
+	&power7_pmu_events_group,
+	NULL,
+};
+
+static struct power_pmu power7_pmu = {
+	.name			= "POWER7",
+	.n_counter		= 6,
+	.max_alternatives	= MAX_ALT + 1,
+	.add_fields		= 0x1555ul,
+	.test_adder		= 0x3000ul,
+	.compute_mmcr		= power7_compute_mmcr,
+	.get_constraint		= power7_get_constraint,
+	.get_alternatives	= power7_get_alternatives,
+	.disable_pmc		= power7_disable_pmc,
+	.flags			= PPMU_ALT_SIPR,
+	.attr_groups		= power7_pmu_attr_groups,
+	.n_generic		= ARRAY_SIZE(power7_generic_events),
+	.generic_events		= power7_generic_events,
+	.cache_events		= &power7_cache_events,
+};
+
+int __init init_power7_pmu(void)
+{
+	unsigned int pvr = mfspr(SPRN_PVR);
+
+	if (PVR_VER(pvr) != PVR_POWER7 && PVR_VER(pvr) != PVR_POWER7p)
+		return -ENODEV;
+
+	if (PVR_VER(pvr) == PVR_POWER7p)
+		power7_pmu.flags |= PPMU_SIAR_VALID;
+
+	return register_power_pmu(&power7_pmu);
+}
diff --git a/arch/powerpc/perf/power8-events-list.h b/arch/powerpc/perf/power8-events-list.h
new file mode 100644
index 0000000000..2e9b75d995
--- /dev/null
+++ b/arch/powerpc/perf/power8-events-list.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Performance counter support for POWER8 processors.
+ *
+ * Copyright 2014 Sukadev Bhattiprolu, IBM Corporation.
+ */
+
+/*
+ * Power8 event codes.
+ */
+EVENT(PM_CYC,					0x0001e)
+EVENT(PM_GCT_NOSLOT_CYC,			0x100f8)
+EVENT(PM_CMPLU_STALL,				0x4000a)
+EVENT(PM_INST_CMPL,				0x00002)
+EVENT(PM_BRU_FIN,				0x10068)
+EVENT(PM_BR_MPRED_CMPL,				0x400f6)
+
+/* All L1 D cache load references counted at finish, gated by reject */
+EVENT(PM_LD_REF_L1,				0x100ee)
+/* Load Missed L1 */
+EVENT(PM_LD_MISS_L1,				0x3e054)
+/* Store Missed L1 */
+EVENT(PM_ST_MISS_L1,				0x300f0)
+/* L1 cache data prefetches */
+EVENT(PM_L1_PREF,				0x0d8b8)
+/* Instruction fetches from L1 */
+EVENT(PM_INST_FROM_L1,				0x04080)
+/* Demand iCache Miss */
+EVENT(PM_L1_ICACHE_MISS,			0x200fd)
+/* Instruction Demand sectors wriittent into IL1 */
+EVENT(PM_L1_DEMAND_WRITE,			0x0408c)
+/* Instruction prefetch written into IL1 */
+EVENT(PM_IC_PREF_WRITE,				0x0408e)
+/* The data cache was reloaded from local core's L3 due to a demand load */
+EVENT(PM_DATA_FROM_L3,				0x4c042)
+/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
+EVENT(PM_DATA_FROM_L3MISS,			0x300fe)
+/* All successful D-side store dispatches for this thread */
+EVENT(PM_L2_ST,					0x17080)
+/* All successful D-side store dispatches for this thread that were L2 Miss */
+EVENT(PM_L2_ST_MISS,				0x17082)
+/* Total HW L3 prefetches(Load+store) */
+EVENT(PM_L3_PREF_ALL,				0x4e052)
+/* Data PTEG reload */
+EVENT(PM_DTLB_MISS,				0x300fc)
+/* ITLB Reloaded */
+EVENT(PM_ITLB_MISS,				0x400fc)
+/* Run_Instructions */
+EVENT(PM_RUN_INST_CMPL,				0x500fa)
+/* Alternate event code for PM_RUN_INST_CMPL */
+EVENT(PM_RUN_INST_CMPL_ALT,			0x400fa)
+/* Run_cycles */
+EVENT(PM_RUN_CYC,				0x600f4)
+/* Alternate event code for Run_cycles */
+EVENT(PM_RUN_CYC_ALT,				0x200f4)
+/* Marked store completed */
+EVENT(PM_MRK_ST_CMPL,				0x10134)
+/* Alternate event code for Marked store completed */
+EVENT(PM_MRK_ST_CMPL_ALT,			0x301e2)
+/* Marked two path branch */
+EVENT(PM_BR_MRK_2PATH,				0x10138)
+/* Alternate event code for PM_BR_MRK_2PATH */
+EVENT(PM_BR_MRK_2PATH_ALT,			0x40138)
+/* L3 castouts in Mepf state */
+EVENT(PM_L3_CO_MEPF,				0x18082)
+/* Alternate event code for PM_L3_CO_MEPF */
+EVENT(PM_L3_CO_MEPF_ALT,			0x3e05e)
+/* Data cache was reloaded from a location other than L2 due to a marked load */
+EVENT(PM_MRK_DATA_FROM_L2MISS,			0x1d14e)
+/* Alternate event code for PM_MRK_DATA_FROM_L2MISS */
+EVENT(PM_MRK_DATA_FROM_L2MISS_ALT,		0x401e8)
+/* Alternate event code for  PM_CMPLU_STALL */
+EVENT(PM_CMPLU_STALL_ALT,			0x1e054)
+/* Two path branch */
+EVENT(PM_BR_2PATH,				0x20036)
+/* Alternate event code for PM_BR_2PATH */
+EVENT(PM_BR_2PATH_ALT,				0x40036)
+/* # PPC Dispatched */
+EVENT(PM_INST_DISP,				0x200f2)
+/* Alternate event code for PM_INST_DISP */
+EVENT(PM_INST_DISP_ALT,				0x300f2)
+/* Marked filter Match */
+EVENT(PM_MRK_FILT_MATCH,			0x2013c)
+/* Alternate event code for PM_MRK_FILT_MATCH */
+EVENT(PM_MRK_FILT_MATCH_ALT,			0x3012e)
+/* Alternate event code for PM_LD_MISS_L1 */
+EVENT(PM_LD_MISS_L1_ALT,			0x400f0)
+/*
+ * Memory Access Event -- mem_access
+ * Primary PMU event used here is PM_MRK_INST_CMPL, along with
+ * Random Load/Store Facility Sampling (RIS) in Random sampling mode (MMCRA[SM]).
+ */
+EVENT(MEM_ACCESS,				0x10401e0)
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
new file mode 100644
index 0000000000..ef9685065a
--- /dev/null
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter support for POWER8 processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ * Copyright 2013 Michael Ellerman, IBM Corporation.
+ */
+
+#define pr_fmt(fmt)	"power8-pmu: " fmt
+
+#include "isa207-common.h"
+
+/*
+ * Some power8 event codes.
+ */
+#define EVENT(_name, _code)	_name = _code,
+
+enum {
+#include "power8-events-list.h"
+};
+
+#undef EVENT
+
+/* MMCRA IFM bits - POWER8 */
+#define	POWER8_MMCRA_IFM1		0x0000000040000000UL
+#define	POWER8_MMCRA_IFM2		0x0000000080000000UL
+#define	POWER8_MMCRA_IFM3		0x00000000C0000000UL
+#define	POWER8_MMCRA_BHRB_MASK		0x00000000C0000000UL
+
+/*
+ * Raw event encoding for PowerISA v2.07 (Power8):
+ *
+ *        60        56        52        48        44        40        36        32
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *   | | [ ]                           [      thresh_cmp     ]   [  thresh_ctl   ]
+ *   | |  |                                                              |
+ *   | |  *- IFM (Linux)                 thresh start/stop OR FAB match -*
+ *   | *- BHRB (Linux)
+ *   *- EBB (Linux)
+ *
+ *        28        24        20        16        12         8         4         0
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *   [   ] [  sample ]   [cache]   [ pmc ]   [unit ]   c     m   [    pmcxsel    ]
+ *     |        |           |                          |     |
+ *     |        |           |                          |     *- mark
+ *     |        |           *- L1/L2/L3 cache_sel      |
+ *     |        |                                      |
+ *     |        *- sampling mode for marked events     *- combine
+ *     |
+ *     *- thresh_sel
+ *
+ * Below uses IBM bit numbering.
+ *
+ * MMCR1[x:y] = unit    (PMCxUNIT)
+ * MMCR1[x]   = combine (PMCxCOMB)
+ *
+ * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011
+ *	# PM_MRK_FAB_RSP_MATCH
+ *	MMCR1[20:27] = thresh_ctl   (FAB_CRESP_MATCH / FAB_TYPE_MATCH)
+ * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001
+ *	# PM_MRK_FAB_RSP_MATCH_CYC
+ *	MMCR1[20:27] = thresh_ctl   (FAB_CRESP_MATCH / FAB_TYPE_MATCH)
+ * else
+ *	MMCRA[48:55] = thresh_ctl   (THRESH START/END)
+ *
+ * if thresh_sel:
+ *	MMCRA[45:47] = thresh_sel
+ *
+ * if thresh_cmp:
+ *	MMCRA[22:24] = thresh_cmp[0:2]
+ *	MMCRA[25:31] = thresh_cmp[3:9]
+ *
+ * if unit == 6 or unit == 7
+ *	MMCRC[53:55] = cache_sel[1:3]      (L2EVENT_SEL)
+ * else if unit == 8 or unit == 9:
+ *	if cache_sel[0] == 0: # L3 bank
+ *		MMCRC[47:49] = cache_sel[1:3]  (L3EVENT_SEL0)
+ *	else if cache_sel[0] == 1:
+ *		MMCRC[50:51] = cache_sel[2:3]  (L3EVENT_SEL1)
+ * else if cache_sel[1]: # L1 event
+ *	MMCR1[16] = cache_sel[2]
+ *	MMCR1[17] = cache_sel[3]
+ *
+ * if mark:
+ *	MMCRA[63]    = 1		(SAMPLE_ENABLE)
+ *	MMCRA[57:59] = sample[0:2]	(RAND_SAMP_ELIG)
+ *	MMCRA[61:62] = sample[3:4]	(RAND_SAMP_MODE)
+ *
+ * if EBB and BHRB:
+ *	MMCRA[32:33] = IFM
+ *
+ */
+
+/* PowerISA v2.07 format attribute structure*/
+extern const struct attribute_group isa207_pmu_format_group;
+
+/* Table of alternatives, sorted by column 0 */
+static const unsigned int event_alternatives[][MAX_ALT] = {
+	{ PM_MRK_ST_CMPL,		PM_MRK_ST_CMPL_ALT },
+	{ PM_BR_MRK_2PATH,		PM_BR_MRK_2PATH_ALT },
+	{ PM_L3_CO_MEPF,		PM_L3_CO_MEPF_ALT },
+	{ PM_MRK_DATA_FROM_L2MISS,	PM_MRK_DATA_FROM_L2MISS_ALT },
+	{ PM_CMPLU_STALL_ALT,		PM_CMPLU_STALL },
+	{ PM_BR_2PATH,			PM_BR_2PATH_ALT },
+	{ PM_INST_DISP,			PM_INST_DISP_ALT },
+	{ PM_RUN_CYC_ALT,		PM_RUN_CYC },
+	{ PM_MRK_FILT_MATCH,		PM_MRK_FILT_MATCH_ALT },
+	{ PM_LD_MISS_L1,		PM_LD_MISS_L1_ALT },
+	{ PM_RUN_INST_CMPL_ALT,		PM_RUN_INST_CMPL },
+};
+
+static int power8_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	int num_alt = 0;
+
+	num_alt = isa207_get_alternatives(event, alt,
+					  ARRAY_SIZE(event_alternatives), flags,
+					  event_alternatives);
+
+	return num_alt;
+}
+
+GENERIC_EVENT_ATTR(cpu-cycles,			PM_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-frontend,	PM_GCT_NOSLOT_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-backend,	PM_CMPLU_STALL);
+GENERIC_EVENT_ATTR(instructions,		PM_INST_CMPL);
+GENERIC_EVENT_ATTR(branch-instructions,		PM_BRU_FIN);
+GENERIC_EVENT_ATTR(branch-misses,		PM_BR_MPRED_CMPL);
+GENERIC_EVENT_ATTR(cache-references,		PM_LD_REF_L1);
+GENERIC_EVENT_ATTR(cache-misses,		PM_LD_MISS_L1);
+GENERIC_EVENT_ATTR(mem_access,			MEM_ACCESS);
+
+CACHE_EVENT_ATTR(L1-dcache-load-misses,		PM_LD_MISS_L1);
+CACHE_EVENT_ATTR(L1-dcache-loads,		PM_LD_REF_L1);
+
+CACHE_EVENT_ATTR(L1-dcache-prefetches,		PM_L1_PREF);
+CACHE_EVENT_ATTR(L1-dcache-store-misses,	PM_ST_MISS_L1);
+CACHE_EVENT_ATTR(L1-icache-load-misses,		PM_L1_ICACHE_MISS);
+CACHE_EVENT_ATTR(L1-icache-loads,		PM_INST_FROM_L1);
+CACHE_EVENT_ATTR(L1-icache-prefetches,		PM_IC_PREF_WRITE);
+
+CACHE_EVENT_ATTR(LLC-load-misses,		PM_DATA_FROM_L3MISS);
+CACHE_EVENT_ATTR(LLC-loads,			PM_DATA_FROM_L3);
+CACHE_EVENT_ATTR(LLC-prefetches,		PM_L3_PREF_ALL);
+CACHE_EVENT_ATTR(LLC-store-misses,		PM_L2_ST_MISS);
+CACHE_EVENT_ATTR(LLC-stores,			PM_L2_ST);
+
+CACHE_EVENT_ATTR(branch-load-misses,		PM_BR_MPRED_CMPL);
+CACHE_EVENT_ATTR(branch-loads,			PM_BRU_FIN);
+CACHE_EVENT_ATTR(dTLB-load-misses,		PM_DTLB_MISS);
+CACHE_EVENT_ATTR(iTLB-load-misses,		PM_ITLB_MISS);
+
+static struct attribute *power8_events_attr[] = {
+	GENERIC_EVENT_PTR(PM_CYC),
+	GENERIC_EVENT_PTR(PM_GCT_NOSLOT_CYC),
+	GENERIC_EVENT_PTR(PM_CMPLU_STALL),
+	GENERIC_EVENT_PTR(PM_INST_CMPL),
+	GENERIC_EVENT_PTR(PM_BRU_FIN),
+	GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
+	GENERIC_EVENT_PTR(PM_LD_REF_L1),
+	GENERIC_EVENT_PTR(PM_LD_MISS_L1),
+	GENERIC_EVENT_PTR(MEM_ACCESS),
+
+	CACHE_EVENT_PTR(PM_LD_MISS_L1),
+	CACHE_EVENT_PTR(PM_LD_REF_L1),
+	CACHE_EVENT_PTR(PM_L1_PREF),
+	CACHE_EVENT_PTR(PM_ST_MISS_L1),
+	CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+	CACHE_EVENT_PTR(PM_INST_FROM_L1),
+	CACHE_EVENT_PTR(PM_IC_PREF_WRITE),
+	CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+	CACHE_EVENT_PTR(PM_DATA_FROM_L3),
+	CACHE_EVENT_PTR(PM_L3_PREF_ALL),
+	CACHE_EVENT_PTR(PM_L2_ST_MISS),
+	CACHE_EVENT_PTR(PM_L2_ST),
+
+	CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+	CACHE_EVENT_PTR(PM_BRU_FIN),
+
+	CACHE_EVENT_PTR(PM_DTLB_MISS),
+	CACHE_EVENT_PTR(PM_ITLB_MISS),
+	NULL
+};
+
+static const struct attribute_group power8_pmu_events_group = {
+	.name = "events",
+	.attrs = power8_events_attr,
+};
+
+static struct attribute *power8_pmu_caps_attrs[] = {
+	NULL
+};
+
+static struct attribute_group power8_pmu_caps_group = {
+	.name  = "caps",
+	.attrs = power8_pmu_caps_attrs,
+};
+
+static const struct attribute_group *power8_pmu_attr_groups[] = {
+	&isa207_pmu_format_group,
+	&power8_pmu_events_group,
+	&power8_pmu_caps_group,
+	NULL,
+};
+
+static int power8_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =	PM_GCT_NOSLOT_CYC,
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =	PM_CMPLU_STALL,
+	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BRU_FIN,
+	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_CMPL,
+	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
+	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_MISS_L1,
+};
+
+static u64 power8_bhrb_filter_map(u64 branch_sample_type)
+{
+	u64 pmu_bhrb_filter = 0;
+
+	/* BHRB and regular PMU events share the same privilege state
+	 * filter configuration. BHRB is always recorded along with a
+	 * regular PMU event. As the privilege state filter is handled
+	 * in the basic PMC configuration of the accompanying regular
+	 * PMU event, we ignore any separate BHRB specific request.
+	 */
+
+	/* No branch filter requested */
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
+		return pmu_bhrb_filter;
+
+	/* Invalid branch filter options - HW does not support */
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+		return -1;
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL)
+		return -1;
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL)
+		return -1;
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
+		pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
+		return pmu_bhrb_filter;
+	}
+
+	/* Every thing else is unsupported */
+	return -1;
+}
+
+static void power8_config_bhrb(u64 pmu_bhrb_filter)
+{
+	pmu_bhrb_filter &= POWER8_MMCRA_BHRB_MASK;
+
+	/* Enable BHRB filter in PMU */
+	mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
+}
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static u64 power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[ C(L1D) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = PM_LD_REF_L1,
+			[ C(RESULT_MISS)   ] = PM_LD_MISS_L1,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = PM_ST_MISS_L1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = PM_L1_PREF,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+	[ C(L1I) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = PM_INST_FROM_L1,
+			[ C(RESULT_MISS)   ] = PM_L1_ICACHE_MISS,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = PM_L1_DEMAND_WRITE,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = PM_IC_PREF_WRITE,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+	[ C(LL) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = PM_DATA_FROM_L3,
+			[ C(RESULT_MISS)   ] = PM_DATA_FROM_L3MISS,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = PM_L2_ST,
+			[ C(RESULT_MISS)   ] = PM_L2_ST_MISS,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = PM_L3_PREF_ALL,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+	[ C(DTLB) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = PM_DTLB_MISS,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+	[ C(ITLB) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = PM_ITLB_MISS,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+	[ C(BPU) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = PM_BRU_FIN,
+			[ C(RESULT_MISS)   ] = PM_BR_MPRED_CMPL,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+	[ C(NODE) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+};
+
+#undef C
+
+static struct power_pmu power8_pmu = {
+	.name			= "POWER8",
+	.n_counter		= MAX_PMU_COUNTERS,
+	.max_alternatives	= MAX_ALT + 1,
+	.add_fields		= ISA207_ADD_FIELDS,
+	.test_adder		= ISA207_TEST_ADDER,
+	.compute_mmcr		= isa207_compute_mmcr,
+	.config_bhrb		= power8_config_bhrb,
+	.bhrb_filter_map	= power8_bhrb_filter_map,
+	.get_constraint		= isa207_get_constraint,
+	.get_alternatives	= power8_get_alternatives,
+	.get_mem_data_src	= isa207_get_mem_data_src,
+	.get_mem_weight		= isa207_get_mem_weight,
+	.disable_pmc		= isa207_disable_pmc,
+	.flags			= PPMU_HAS_SIER | PPMU_ARCH_207S,
+	.n_generic		= ARRAY_SIZE(power8_generic_events),
+	.generic_events		= power8_generic_events,
+	.cache_events		= &power8_cache_events,
+	.attr_groups		= power8_pmu_attr_groups,
+	.bhrb_nr		= 32,
+};
+
+int __init init_power8_pmu(void)
+{
+	int rc;
+	unsigned int pvr = mfspr(SPRN_PVR);
+
+	if (PVR_VER(pvr) != PVR_POWER8E && PVR_VER(pvr) != PVR_POWER8NVL &&
+	    PVR_VER(pvr) != PVR_POWER8)
+		return -ENODEV;
+
+	rc = register_power_pmu(&power8_pmu);
+	if (rc)
+		return rc;
+
+	/* Tell userspace that EBB is supported */
+	cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
+
+	if (cpu_has_feature(CPU_FTR_PMAO_BUG))
+		pr_info("PMAO restore workaround active.\n");
+
+	return 0;
+}
diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
new file mode 100644
index 0000000000..7f4e6b5f22
--- /dev/null
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Performance counter support for POWER9 processors.
+ *
+ * Copyright 2016 Madhavan Srinivasan, IBM Corporation.
+ */
+
+/*
+ * Power9 event codes.
+ */
+EVENT(PM_CYC,					0x0001e)
+EVENT(PM_ICT_NOSLOT_CYC,			0x100f8)
+EVENT(PM_CMPLU_STALL,				0x1e054)
+EVENT(PM_INST_CMPL,				0x00002)
+EVENT(PM_BR_CMPL,				0x4d05e)
+EVENT(PM_BR_MPRED_CMPL,				0x400f6)
+
+/* All L1 D cache load references counted at finish, gated by reject */
+EVENT(PM_LD_REF_L1,				0x100fc)
+/* Load Missed L1 */
+EVENT(PM_LD_MISS_L1_FIN,			0x2c04e)
+EVENT(PM_LD_MISS_L1,				0x3e054)
+/* Alternate event code for PM_LD_MISS_L1 */
+EVENT(PM_LD_MISS_L1_ALT,			0x400f0)
+/* Store Missed L1 */
+EVENT(PM_ST_MISS_L1,				0x300f0)
+/* L1 cache data prefetches */
+EVENT(PM_L1_PREF,				0x20054)
+/* Instruction fetches from L1 */
+EVENT(PM_INST_FROM_L1,				0x04080)
+/* Demand iCache Miss */
+EVENT(PM_L1_ICACHE_MISS,			0x200fd)
+/* Instruction Demand sectors wriittent into IL1 */
+EVENT(PM_L1_DEMAND_WRITE,			0x0408c)
+/* Instruction prefetch written into IL1 */
+EVENT(PM_IC_PREF_WRITE,				0x0488c)
+/* The data cache was reloaded from local core's L3 due to a demand load */
+EVENT(PM_DATA_FROM_L3,				0x4c042)
+/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
+EVENT(PM_DATA_FROM_L3MISS,			0x300fe)
+/* All successful D-side store dispatches for this thread */
+EVENT(PM_L2_ST,					0x16880)
+/* All successful D-side store dispatches for this thread that were L2 Miss */
+EVENT(PM_L2_ST_MISS,				0x26880)
+/* Total HW L3 prefetches(Load+store) */
+EVENT(PM_L3_PREF_ALL,				0x4e052)
+/* Data PTEG reload */
+EVENT(PM_DTLB_MISS,				0x300fc)
+/* ITLB Reloaded */
+EVENT(PM_ITLB_MISS,				0x400fc)
+/* Run_Instructions */
+EVENT(PM_RUN_INST_CMPL,				0x500fa)
+/* Alternate event code for PM_RUN_INST_CMPL */
+EVENT(PM_RUN_INST_CMPL_ALT,			0x400fa)
+/* Run_cycles */
+EVENT(PM_RUN_CYC,				0x600f4)
+/* Alternate event code for Run_cycles */
+EVENT(PM_RUN_CYC_ALT,				0x200f4)
+/* Instruction Dispatched */
+EVENT(PM_INST_DISP,				0x200f2)
+EVENT(PM_INST_DISP_ALT,				0x300f2)
+/* Branch event that are not strongly biased */
+EVENT(PM_BR_2PATH,				0x20036)
+/* ALternate branch event that are not strongly biased */
+EVENT(PM_BR_2PATH_ALT,				0x40036)
+
+/* Blacklisted events */
+EVENT(PM_MRK_ST_DONE_L2,			0x10134)
+EVENT(PM_RADIX_PWC_L1_HIT,			0x1f056)
+EVENT(PM_FLOP_CMPL,				0x100f4)
+EVENT(PM_MRK_NTF_FIN,				0x20112)
+EVENT(PM_RADIX_PWC_L2_HIT,			0x2d024)
+EVENT(PM_IFETCH_THROTTLE,			0x3405e)
+EVENT(PM_MRK_L2_TM_ST_ABORT_SISTER,		0x3e15c)
+EVENT(PM_RADIX_PWC_L3_HIT,			0x3f056)
+EVENT(PM_RUN_CYC_SMT2_MODE,			0x3006c)
+EVENT(PM_TM_TX_PASS_RUN_INST,			0x4e014)
+EVENT(PM_DISP_HELD_SYNC_HOLD,			0x4003c)
+EVENT(PM_DTLB_MISS_16G,				0x1c058)
+EVENT(PM_DERAT_MISS_2M,				0x1c05a)
+EVENT(PM_DTLB_MISS_2M,				0x1c05c)
+EVENT(PM_MRK_DTLB_MISS_1G,			0x1d15c)
+EVENT(PM_DTLB_MISS_4K,				0x2c056)
+EVENT(PM_DERAT_MISS_1G,				0x2c05a)
+EVENT(PM_MRK_DERAT_MISS_2M,			0x2d152)
+EVENT(PM_MRK_DTLB_MISS_4K,			0x2d156)
+EVENT(PM_MRK_DTLB_MISS_16G,			0x2d15e)
+EVENT(PM_DTLB_MISS_64K,				0x3c056)
+EVENT(PM_MRK_DERAT_MISS_1G,			0x3d152)
+EVENT(PM_MRK_DTLB_MISS_64K,			0x3d156)
+EVENT(PM_DTLB_MISS_16M,				0x4c056)
+EVENT(PM_DTLB_MISS_1G,				0x4c05a)
+EVENT(PM_MRK_DTLB_MISS_16M,			0x4c15e)
+
+/*
+ * Memory Access Events
+ *
+ * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0)
+ * To enable capturing of memory profiling, these MMCRA bits
+ * needs to be programmed and corresponding raw event format
+ * encoding.
+ *
+ * MMCRA bits encoding needed are
+ *     SM (Sampling Mode)
+ *     EM (Eligibility for Random Sampling)
+ *     TECE (Threshold Event Counter Event)
+ *     TS (Threshold Start Event)
+ *     TE (Threshold End Event)
+ *
+ * Corresponding Raw Encoding bits:
+ *     sample [EM,SM]
+ *     thresh_sel (TECE)
+ *     thresh start (TS)
+ *     thresh end (TE)
+ */
+EVENT(MEM_LOADS,				0x34340401e0)
+EVENT(MEM_STORES,				0x343c0401e0)
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
new file mode 100644
index 0000000000..cb6a7dc02d
--- /dev/null
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -0,0 +1,495 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter support for POWER9 processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ * Copyright 2013 Michael Ellerman, IBM Corporation.
+ * Copyright 2016 Madhavan Srinivasan, IBM Corporation.
+ */
+
+#define pr_fmt(fmt)	"power9-pmu: " fmt
+
+#include "isa207-common.h"
+
+/*
+ * Raw event encoding for Power9:
+ *
+ *        60        56        52        48        44        40        36        32
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *   | | [ ]                       [ ] [      thresh_cmp     ]   [  thresh_ctl   ]
+ *   | |  |                         |                                     |
+ *   | |  *- IFM (Linux)            |	               thresh start/stop -*
+ *   | *- BHRB (Linux)              *sm
+ *   *- EBB (Linux)
+ *
+ *        28        24        20        16        12         8         4         0
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *   [   ] [  sample ]   [cache]   [ pmc ]   [unit ]   []    m   [    pmcxsel    ]
+ *     |        |           |                          |     |
+ *     |        |           |                          |     *- mark
+ *     |        |           *- L1/L2/L3 cache_sel      |
+ *     |        |                                      |
+ *     |        *- sampling mode for marked events     *- combine
+ *     |
+ *     *- thresh_sel
+ *
+ * Below uses IBM bit numbering.
+ *
+ * MMCR1[x:y] = unit    (PMCxUNIT)
+ * MMCR1[24]   = pmc1combine[0]
+ * MMCR1[25]   = pmc1combine[1]
+ * MMCR1[26]   = pmc2combine[0]
+ * MMCR1[27]   = pmc2combine[1]
+ * MMCR1[28]   = pmc3combine[0]
+ * MMCR1[29]   = pmc3combine[1]
+ * MMCR1[30]   = pmc4combine[0]
+ * MMCR1[31]   = pmc4combine[1]
+ *
+ * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011
+ *	MMCR1[20:27] = thresh_ctl
+ * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001
+ *	MMCR1[20:27] = thresh_ctl
+ * else
+ *	MMCRA[48:55] = thresh_ctl   (THRESH START/END)
+ *
+ * if thresh_sel:
+ *	MMCRA[45:47] = thresh_sel
+ *
+ * if thresh_cmp:
+ *	MMCRA[9:11] = thresh_cmp[0:2]
+ *	MMCRA[12:18] = thresh_cmp[3:9]
+ *
+ * MMCR1[16] = cache_sel[2]
+ * MMCR1[17] = cache_sel[3]
+ *
+ * if mark:
+ *	MMCRA[63]    = 1		(SAMPLE_ENABLE)
+ *	MMCRA[57:59] = sample[0:2]	(RAND_SAMP_ELIG)
+ *	MMCRA[61:62] = sample[3:4]	(RAND_SAMP_MODE)
+ *
+ * if EBB and BHRB:
+ *	MMCRA[32:33] = IFM
+ *
+ * MMCRA[SDAR_MODE]  = sm
+ */
+
+/*
+ * Some power9 event codes.
+ */
+#define EVENT(_name, _code)	_name = _code,
+
+enum {
+#include "power9-events-list.h"
+};
+
+#undef EVENT
+
+/* MMCRA IFM bits - POWER9 */
+#define POWER9_MMCRA_IFM1		0x0000000040000000UL
+#define POWER9_MMCRA_IFM2		0x0000000080000000UL
+#define POWER9_MMCRA_IFM3		0x00000000C0000000UL
+#define POWER9_MMCRA_BHRB_MASK		0x00000000C0000000UL
+
+extern u64 PERF_REG_EXTENDED_MASK;
+
+/* Nasty Power9 specific hack */
+#define PVR_POWER9_CUMULUS		0x00002000
+
+/* PowerISA v2.07 format attribute structure*/
+extern const struct attribute_group isa207_pmu_format_group;
+
+static int p9_dd21_bl_ev[] = {
+	PM_MRK_ST_DONE_L2,
+	PM_RADIX_PWC_L1_HIT,
+	PM_FLOP_CMPL,
+	PM_MRK_NTF_FIN,
+	PM_RADIX_PWC_L2_HIT,
+	PM_IFETCH_THROTTLE,
+	PM_MRK_L2_TM_ST_ABORT_SISTER,
+	PM_RADIX_PWC_L3_HIT,
+	PM_RUN_CYC_SMT2_MODE,
+	PM_TM_TX_PASS_RUN_INST,
+	PM_DISP_HELD_SYNC_HOLD,
+};
+
+static int p9_dd22_bl_ev[] = {
+	PM_DTLB_MISS_16G,
+	PM_DERAT_MISS_2M,
+	PM_DTLB_MISS_2M,
+	PM_MRK_DTLB_MISS_1G,
+	PM_DTLB_MISS_4K,
+	PM_DERAT_MISS_1G,
+	PM_MRK_DERAT_MISS_2M,
+	PM_MRK_DTLB_MISS_4K,
+	PM_MRK_DTLB_MISS_16G,
+	PM_DTLB_MISS_64K,
+	PM_MRK_DERAT_MISS_1G,
+	PM_MRK_DTLB_MISS_64K,
+	PM_DISP_HELD_SYNC_HOLD,
+	PM_DTLB_MISS_16M,
+	PM_DTLB_MISS_1G,
+	PM_MRK_DTLB_MISS_16M,
+};
+
+/* Table of alternatives, sorted by column 0 */
+static const unsigned int power9_event_alternatives[][MAX_ALT] = {
+	{ PM_BR_2PATH,			PM_BR_2PATH_ALT },
+	{ PM_INST_DISP,			PM_INST_DISP_ALT },
+	{ PM_RUN_CYC_ALT,               PM_RUN_CYC },
+	{ PM_LD_MISS_L1,                PM_LD_MISS_L1_ALT },
+	{ PM_RUN_INST_CMPL_ALT,         PM_RUN_INST_CMPL },
+};
+
+static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	int num_alt = 0;
+
+	num_alt = isa207_get_alternatives(event, alt,
+					  ARRAY_SIZE(power9_event_alternatives), flags,
+					  power9_event_alternatives);
+
+	return num_alt;
+}
+
+static int power9_check_attr_config(struct perf_event *ev)
+{
+	u64 val;
+	u64 event = ev->attr.config;
+
+	val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
+	if (val == 0xC || isa3XX_check_attr_config(ev))
+		return -EINVAL;
+
+	return 0;
+}
+
+GENERIC_EVENT_ATTR(cpu-cycles,			PM_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-frontend,	PM_ICT_NOSLOT_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-backend,	PM_CMPLU_STALL);
+GENERIC_EVENT_ATTR(instructions,		PM_INST_CMPL);
+GENERIC_EVENT_ATTR(branch-instructions,		PM_BR_CMPL);
+GENERIC_EVENT_ATTR(branch-misses,		PM_BR_MPRED_CMPL);
+GENERIC_EVENT_ATTR(cache-references,		PM_LD_REF_L1);
+GENERIC_EVENT_ATTR(cache-misses,		PM_LD_MISS_L1_FIN);
+GENERIC_EVENT_ATTR(mem-loads,			MEM_LOADS);
+GENERIC_EVENT_ATTR(mem-stores,			MEM_STORES);
+
+CACHE_EVENT_ATTR(L1-dcache-load-misses,		PM_LD_MISS_L1_FIN);
+CACHE_EVENT_ATTR(L1-dcache-loads,		PM_LD_REF_L1);
+CACHE_EVENT_ATTR(L1-dcache-prefetches,		PM_L1_PREF);
+CACHE_EVENT_ATTR(L1-dcache-store-misses,	PM_ST_MISS_L1);
+CACHE_EVENT_ATTR(L1-icache-load-misses,		PM_L1_ICACHE_MISS);
+CACHE_EVENT_ATTR(L1-icache-loads,		PM_INST_FROM_L1);
+CACHE_EVENT_ATTR(L1-icache-prefetches,		PM_IC_PREF_WRITE);
+CACHE_EVENT_ATTR(LLC-load-misses,		PM_DATA_FROM_L3MISS);
+CACHE_EVENT_ATTR(LLC-loads,			PM_DATA_FROM_L3);
+CACHE_EVENT_ATTR(LLC-prefetches,		PM_L3_PREF_ALL);
+CACHE_EVENT_ATTR(branch-load-misses,		PM_BR_MPRED_CMPL);
+CACHE_EVENT_ATTR(branch-loads,			PM_BR_CMPL);
+CACHE_EVENT_ATTR(dTLB-load-misses,		PM_DTLB_MISS);
+CACHE_EVENT_ATTR(iTLB-load-misses,		PM_ITLB_MISS);
+
+static struct attribute *power9_events_attr[] = {
+	GENERIC_EVENT_PTR(PM_CYC),
+	GENERIC_EVENT_PTR(PM_ICT_NOSLOT_CYC),
+	GENERIC_EVENT_PTR(PM_CMPLU_STALL),
+	GENERIC_EVENT_PTR(PM_INST_CMPL),
+	GENERIC_EVENT_PTR(PM_BR_CMPL),
+	GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
+	GENERIC_EVENT_PTR(PM_LD_REF_L1),
+	GENERIC_EVENT_PTR(PM_LD_MISS_L1_FIN),
+	GENERIC_EVENT_PTR(MEM_LOADS),
+	GENERIC_EVENT_PTR(MEM_STORES),
+	CACHE_EVENT_PTR(PM_LD_MISS_L1_FIN),
+	CACHE_EVENT_PTR(PM_LD_REF_L1),
+	CACHE_EVENT_PTR(PM_L1_PREF),
+	CACHE_EVENT_PTR(PM_ST_MISS_L1),
+	CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+	CACHE_EVENT_PTR(PM_INST_FROM_L1),
+	CACHE_EVENT_PTR(PM_IC_PREF_WRITE),
+	CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+	CACHE_EVENT_PTR(PM_DATA_FROM_L3),
+	CACHE_EVENT_PTR(PM_L3_PREF_ALL),
+	CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+	CACHE_EVENT_PTR(PM_BR_CMPL),
+	CACHE_EVENT_PTR(PM_DTLB_MISS),
+	CACHE_EVENT_PTR(PM_ITLB_MISS),
+	NULL
+};
+
+static const struct attribute_group power9_pmu_events_group = {
+	.name = "events",
+	.attrs = power9_events_attr,
+};
+
+PMU_FORMAT_ATTR(event,		"config:0-51");
+PMU_FORMAT_ATTR(pmcxsel,	"config:0-7");
+PMU_FORMAT_ATTR(mark,		"config:8");
+PMU_FORMAT_ATTR(combine,	"config:10-11");
+PMU_FORMAT_ATTR(unit,		"config:12-15");
+PMU_FORMAT_ATTR(pmc,		"config:16-19");
+PMU_FORMAT_ATTR(cache_sel,	"config:20-23");
+PMU_FORMAT_ATTR(sample_mode,	"config:24-28");
+PMU_FORMAT_ATTR(thresh_sel,	"config:29-31");
+PMU_FORMAT_ATTR(thresh_stop,	"config:32-35");
+PMU_FORMAT_ATTR(thresh_start,	"config:36-39");
+PMU_FORMAT_ATTR(thresh_cmp,	"config:40-49");
+PMU_FORMAT_ATTR(sdar_mode,	"config:50-51");
+
+static struct attribute *power9_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_pmcxsel.attr,
+	&format_attr_mark.attr,
+	&format_attr_combine.attr,
+	&format_attr_unit.attr,
+	&format_attr_pmc.attr,
+	&format_attr_cache_sel.attr,
+	&format_attr_sample_mode.attr,
+	&format_attr_thresh_sel.attr,
+	&format_attr_thresh_stop.attr,
+	&format_attr_thresh_start.attr,
+	&format_attr_thresh_cmp.attr,
+	&format_attr_sdar_mode.attr,
+	NULL,
+};
+
+static const struct attribute_group power9_pmu_format_group = {
+	.name = "format",
+	.attrs = power9_pmu_format_attr,
+};
+
+static struct attribute *power9_pmu_caps_attrs[] = {
+	NULL
+};
+
+static struct attribute_group power9_pmu_caps_group = {
+	.name  = "caps",
+	.attrs = power9_pmu_caps_attrs,
+};
+
+static const struct attribute_group *power9_pmu_attr_groups[] = {
+	&power9_pmu_format_group,
+	&power9_pmu_events_group,
+	&power9_pmu_caps_group,
+	NULL,
+};
+
+static int power9_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =	PM_ICT_NOSLOT_CYC,
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =	PM_CMPLU_STALL,
+	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_CMPL,
+	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_CMPL,
+	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
+	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_MISS_L1_FIN,
+};
+
+static u64 power9_bhrb_filter_map(u64 branch_sample_type)
+{
+	u64 pmu_bhrb_filter = 0;
+
+	/* BHRB and regular PMU events share the same privilege state
+	 * filter configuration. BHRB is always recorded along with a
+	 * regular PMU event. As the privilege state filter is handled
+	 * in the basic PMC configuration of the accompanying regular
+	 * PMU event, we ignore any separate BHRB specific request.
+	 */
+
+	/* No branch filter requested */
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
+		return pmu_bhrb_filter;
+
+	/* Invalid branch filter options - HW does not support */
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+		return -1;
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL)
+		return -1;
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL)
+		return -1;
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
+		pmu_bhrb_filter |= POWER9_MMCRA_IFM1;
+		return pmu_bhrb_filter;
+	}
+
+	/* Every thing else is unsupported */
+	return -1;
+}
+
+static void power9_config_bhrb(u64 pmu_bhrb_filter)
+{
+	pmu_bhrb_filter &= POWER9_MMCRA_BHRB_MASK;
+
+	/* Enable BHRB filter in PMU */
+	mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
+}
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static u64 power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[ C(L1D) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = PM_LD_REF_L1,
+			[ C(RESULT_MISS)   ] = PM_LD_MISS_L1_FIN,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = PM_ST_MISS_L1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = PM_L1_PREF,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+	[ C(L1I) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = PM_INST_FROM_L1,
+			[ C(RESULT_MISS)   ] = PM_L1_ICACHE_MISS,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = PM_L1_DEMAND_WRITE,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = PM_IC_PREF_WRITE,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+	[ C(LL) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = PM_DATA_FROM_L3,
+			[ C(RESULT_MISS)   ] = PM_DATA_FROM_L3MISS,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = PM_L3_PREF_ALL,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+	[ C(DTLB) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = PM_DTLB_MISS,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+	[ C(ITLB) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = PM_ITLB_MISS,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+	[ C(BPU) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = PM_BR_CMPL,
+			[ C(RESULT_MISS)   ] = PM_BR_MPRED_CMPL,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+	[ C(NODE) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+};
+
+#undef C
+
+static struct power_pmu power9_pmu = {
+	.name			= "POWER9",
+	.n_counter		= MAX_PMU_COUNTERS,
+	.add_fields		= ISA207_ADD_FIELDS,
+	.test_adder		= ISA207_TEST_ADDER,
+	.group_constraint_mask	= CNST_CACHE_PMC4_MASK,
+	.group_constraint_val	= CNST_CACHE_PMC4_VAL,
+	.compute_mmcr		= isa207_compute_mmcr,
+	.config_bhrb		= power9_config_bhrb,
+	.bhrb_filter_map	= power9_bhrb_filter_map,
+	.get_constraint		= isa207_get_constraint,
+	.get_alternatives	= power9_get_alternatives,
+	.get_mem_data_src	= isa207_get_mem_data_src,
+	.get_mem_weight		= isa207_get_mem_weight,
+	.disable_pmc		= isa207_disable_pmc,
+	.flags			= PPMU_HAS_SIER | PPMU_ARCH_207S,
+	.n_generic		= ARRAY_SIZE(power9_generic_events),
+	.generic_events		= power9_generic_events,
+	.cache_events		= &power9_cache_events,
+	.attr_groups		= power9_pmu_attr_groups,
+	.bhrb_nr		= 32,
+	.capabilities           = PERF_PMU_CAP_EXTENDED_REGS,
+	.check_attr_config	= power9_check_attr_config,
+};
+
+int __init init_power9_pmu(void)
+{
+	int rc = 0;
+	unsigned int pvr = mfspr(SPRN_PVR);
+
+	if (PVR_VER(pvr) != PVR_POWER9)
+		return -ENODEV;
+
+	/* Blacklist events */
+	if (!(pvr & PVR_POWER9_CUMULUS)) {
+		if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 1)) {
+			power9_pmu.blacklist_ev = p9_dd21_bl_ev;
+			power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd21_bl_ev);
+		} else if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 2)) {
+			power9_pmu.blacklist_ev = p9_dd22_bl_ev;
+			power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd22_bl_ev);
+		}
+	}
+
+	/* Set the PERF_REG_EXTENDED_MASK here */
+	PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_300;
+
+	rc = register_power_pmu(&power9_pmu);
+	if (rc)
+		return rc;
+
+	/* Tell userspace that EBB is supported */
+	cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
+
+	return 0;
+}
diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c
new file mode 100644
index 0000000000..762676fb83
--- /dev/null
+++ b/arch/powerpc/perf/ppc970-pmu.c
@@ -0,0 +1,501 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter support for PPC970-family processors.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ */
+#include <linux/string.h>
+#include <linux/perf_event.h>
+#include <asm/reg.h>
+#include <asm/cputable.h>
+
+#include "internal.h"
+
+/*
+ * Bits in event code for PPC970
+ */
+#define PM_PMC_SH	12	/* PMC number (1-based) for direct events */
+#define PM_PMC_MSK	0xf
+#define PM_UNIT_SH	8	/* TTMMUX number and setting - unit select */
+#define PM_UNIT_MSK	0xf
+#define PM_SPCSEL_SH	6
+#define PM_SPCSEL_MSK	3
+#define PM_BYTE_SH	4	/* Byte number of event bus to use */
+#define PM_BYTE_MSK	3
+#define PM_PMCSEL_MSK	0xf
+
+/* Values in PM_UNIT field */
+#define PM_NONE		0
+#define PM_FPU		1
+#define PM_VPU		2
+#define PM_ISU		3
+#define PM_IFU		4
+#define PM_IDU		5
+#define PM_STS		6
+#define PM_LSU0		7
+#define PM_LSU1U	8
+#define PM_LSU1L	9
+#define PM_LASTUNIT	9
+
+/*
+ * Bits in MMCR0 for PPC970
+ */
+#define MMCR0_PMC1SEL_SH	8
+#define MMCR0_PMC2SEL_SH	1
+#define MMCR_PMCSEL_MSK		0x1f
+
+/*
+ * Bits in MMCR1 for PPC970
+ */
+#define MMCR1_TTM0SEL_SH	62
+#define MMCR1_TTM1SEL_SH	59
+#define MMCR1_TTM3SEL_SH	53
+#define MMCR1_TTMSEL_MSK	3
+#define MMCR1_TD_CP_DBG0SEL_SH	50
+#define MMCR1_TD_CP_DBG1SEL_SH	48
+#define MMCR1_TD_CP_DBG2SEL_SH	46
+#define MMCR1_TD_CP_DBG3SEL_SH	44
+#define MMCR1_PMC1_ADDER_SEL_SH	39
+#define MMCR1_PMC2_ADDER_SEL_SH	38
+#define MMCR1_PMC6_ADDER_SEL_SH	37
+#define MMCR1_PMC5_ADDER_SEL_SH	36
+#define MMCR1_PMC8_ADDER_SEL_SH	35
+#define MMCR1_PMC7_ADDER_SEL_SH	34
+#define MMCR1_PMC3_ADDER_SEL_SH	33
+#define MMCR1_PMC4_ADDER_SEL_SH	32
+#define MMCR1_PMC3SEL_SH	27
+#define MMCR1_PMC4SEL_SH	22
+#define MMCR1_PMC5SEL_SH	17
+#define MMCR1_PMC6SEL_SH	12
+#define MMCR1_PMC7SEL_SH	7
+#define MMCR1_PMC8SEL_SH	2
+
+static short mmcr1_adder_bits[8] = {
+	MMCR1_PMC1_ADDER_SEL_SH,
+	MMCR1_PMC2_ADDER_SEL_SH,
+	MMCR1_PMC3_ADDER_SEL_SH,
+	MMCR1_PMC4_ADDER_SEL_SH,
+	MMCR1_PMC5_ADDER_SEL_SH,
+	MMCR1_PMC6_ADDER_SEL_SH,
+	MMCR1_PMC7_ADDER_SEL_SH,
+	MMCR1_PMC8_ADDER_SEL_SH
+};
+
+/*
+ * Layout of constraint bits:
+ * 6666555555555544444444443333333333222222222211111111110000000000
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ *               <><><>[  >[  >[  ><  ><  ><  ><  ><><><><><><><><>
+ *               SPT0T1 UC  PS1 PS2 B0  B1  B2  B3 P1P2P3P4P5P6P7P8
+ *
+ * SP - SPCSEL constraint
+ *     48-49: SPCSEL value 0x3_0000_0000_0000
+ *
+ * T0 - TTM0 constraint
+ *     46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000
+ *
+ * T1 - TTM1 constraint
+ *     44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000
+ *
+ * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS
+ *     43: UC3 error 0x0800_0000_0000
+ *     42: FPU|IFU|VPU events needed 0x0400_0000_0000
+ *     41: ISU events needed 0x0200_0000_0000
+ *     40: IDU|STS events needed 0x0100_0000_0000
+ *
+ * PS1
+ *     39: PS1 error 0x0080_0000_0000
+ *     36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
+ *
+ * PS2
+ *     35: PS2 error 0x0008_0000_0000
+ *     32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
+ *
+ * B0
+ *     28-31: Byte 0 event source 0xf000_0000
+ *	      Encoding as for the event code
+ *
+ * B1, B2, B3
+ *     24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
+ *
+ * P1
+ *     15: P1 error 0x8000
+ *     14-15: Count of events needing PMC1
+ *
+ * P2..P8
+ *     0-13: Count of events needing PMC2..PMC8
+ */
+
+static unsigned char direct_marked_event[8] = {
+	(1<<2) | (1<<3),	/* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
+	(1<<3) | (1<<5),	/* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
+	(1<<3) | (1<<5),	/* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */
+	(1<<4) | (1<<5),	/* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
+	(1<<4) | (1<<5),	/* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */
+	(1<<3) | (1<<4) | (1<<5),
+		/* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
+	(1<<4) | (1<<5),	/* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
+	(1<<4)			/* PMC8: PM_MRK_LSU_FIN */
+};
+
+/*
+ * Returns 1 if event counts things relating to marked instructions
+ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
+ */
+static int p970_marked_instr_event(u64 event)
+{
+	int pmc, psel, unit, byte, bit;
+	unsigned int mask;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	psel = event & PM_PMCSEL_MSK;
+	if (pmc) {
+		if (direct_marked_event[pmc - 1] & (1 << psel))
+			return 1;
+		if (psel == 0)		/* add events */
+			bit = (pmc <= 4)? pmc - 1: 8 - pmc;
+		else if (psel == 7 || psel == 13)	/* decode events */
+			bit = 4;
+		else
+			return 0;
+	} else
+		bit = psel;
+
+	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	mask = 0;
+	switch (unit) {
+	case PM_VPU:
+		mask = 0x4c;		/* byte 0 bits 2,3,6 */
+		break;
+	case PM_LSU0:
+		/* byte 2 bits 0,2,3,4,6; all of byte 1 */
+		mask = 0x085dff00;
+		break;
+	case PM_LSU1L:
+		mask = 0x50 << 24;	/* byte 3 bits 4,6 */
+		break;
+	}
+	return (mask >> (byte * 8 + bit)) & 1;
+}
+
+/* Masks and values for using events from the various units */
+static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
+	[PM_FPU] =   { 0xc80000000000ull, 0x040000000000ull },
+	[PM_VPU] =   { 0xc80000000000ull, 0xc40000000000ull },
+	[PM_ISU] =   { 0x080000000000ull, 0x020000000000ull },
+	[PM_IFU] =   { 0xc80000000000ull, 0x840000000000ull },
+	[PM_IDU] =   { 0x380000000000ull, 0x010000000000ull },
+	[PM_STS] =   { 0x380000000000ull, 0x310000000000ull },
+};
+
+static int p970_get_constraint(u64 event, unsigned long *maskp,
+			       unsigned long *valp, u64 event_config1 __maybe_unused)
+{
+	int pmc, byte, unit, sh, spcsel;
+	unsigned long mask = 0, value = 0;
+	int grp = -1;
+
+	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
+	if (pmc) {
+		if (pmc > 8)
+			return -1;
+		sh = (pmc - 1) * 2;
+		mask |= 2 << sh;
+		value |= 1 << sh;
+		grp = ((pmc - 1) >> 1) & 1;
+	}
+	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+	if (unit) {
+		if (unit > PM_LASTUNIT)
+			return -1;
+		mask |= unit_cons[unit][0];
+		value |= unit_cons[unit][1];
+		byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
+		/*
+		 * Bus events on bytes 0 and 2 can be counted
+		 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
+		 */
+		if (!pmc)
+			grp = byte & 1;
+		/* Set byte lane select field */
+		mask  |= 0xfULL << (28 - 4 * byte);
+		value |= (unsigned long)unit << (28 - 4 * byte);
+	}
+	if (grp == 0) {
+		/* increment PMC1/2/5/6 field */
+		mask  |= 0x8000000000ull;
+		value |= 0x1000000000ull;
+	} else if (grp == 1) {
+		/* increment PMC3/4/7/8 field */
+		mask  |= 0x800000000ull;
+		value |= 0x100000000ull;
+	}
+	spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
+	if (spcsel) {
+		mask  |= 3ull << 48;
+		value |= (unsigned long)spcsel << 48;
+	}
+	*maskp = mask;
+	*valp = value;
+	return 0;
+}
+
+static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	alt[0] = event;
+
+	/* 2 alternatives for LSU empty */
+	if (event == 0x2002 || event == 0x3002) {
+		alt[1] = event ^ 0x1000;
+		return 2;
+	}
+
+	return 1;
+}
+
+static int p970_compute_mmcr(u64 event[], int n_ev,
+			     unsigned int hwc[], struct mmcr_regs *mmcr,
+			     struct perf_event *pevents[],
+			     u32 flags __maybe_unused)
+{
+	unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
+	unsigned int pmc, unit, byte, psel;
+	unsigned int ttm, grp;
+	unsigned int pmc_inuse = 0;
+	unsigned int pmc_grp_use[2];
+	unsigned char busbyte[4];
+	unsigned char unituse[16];
+	unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 };
+	unsigned char ttmuse[2];
+	unsigned char pmcsel[8];
+	int i;
+	int spcsel;
+
+	if (n_ev > 8)
+		return -1;
+
+	/* First pass to count resource use */
+	pmc_grp_use[0] = pmc_grp_use[1] = 0;
+	memset(busbyte, 0, sizeof(busbyte));
+	memset(unituse, 0, sizeof(unituse));
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		if (pmc) {
+			if (pmc_inuse & (1 << (pmc - 1)))
+				return -1;
+			pmc_inuse |= 1 << (pmc - 1);
+			/* count 1/2/5/6 vs 3/4/7/8 use */
+			++pmc_grp_use[((pmc - 1) >> 1) & 1];
+		}
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+		if (unit) {
+			if (unit > PM_LASTUNIT)
+				return -1;
+			if (!pmc)
+				++pmc_grp_use[byte & 1];
+			if (busbyte[byte] && busbyte[byte] != unit)
+				return -1;
+			busbyte[byte] = unit;
+			unituse[unit] = 1;
+		}
+	}
+	if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
+		return -1;
+
+	/*
+	 * Assign resources and set multiplexer selects.
+	 *
+	 * PM_ISU can go either on TTM0 or TTM1, but that's the only
+	 * choice we have to deal with.
+	 */
+	if (unituse[PM_ISU] &
+	    (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU]))
+		unitmap[PM_ISU] = 2 | 4;	/* move ISU to TTM1 */
+	/* Set TTM[01]SEL fields. */
+	ttmuse[0] = ttmuse[1] = 0;
+	for (i = PM_FPU; i <= PM_STS; ++i) {
+		if (!unituse[i])
+			continue;
+		ttm = unitmap[i];
+		++ttmuse[(ttm >> 2) & 1];
+		mmcr1 |= (unsigned long)(ttm & ~4) << MMCR1_TTM1SEL_SH;
+	}
+	/* Check only one unit per TTMx */
+	if (ttmuse[0] > 1 || ttmuse[1] > 1)
+		return -1;
+
+	/* Set byte lane select fields and TTM3SEL. */
+	for (byte = 0; byte < 4; ++byte) {
+		unit = busbyte[byte];
+		if (!unit)
+			continue;
+		if (unit <= PM_STS)
+			ttm = (unitmap[unit] >> 2) & 1;
+		else if (unit == PM_LSU0)
+			ttm = 2;
+		else {
+			ttm = 3;
+			if (unit == PM_LSU1L && byte >= 2)
+				mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
+		}
+		mmcr1 |= (unsigned long)ttm
+			<< (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
+	}
+
+	/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
+	memset(pmcsel, 0x8, sizeof(pmcsel));	/* 8 means don't count */
+	for (i = 0; i < n_ev; ++i) {
+		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
+		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
+		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
+		psel = event[i] & PM_PMCSEL_MSK;
+		if (!pmc) {
+			/* Bus event or any-PMC direct event */
+			if (unit)
+				psel |= 0x10 | ((byte & 2) << 2);
+			else
+				psel |= 8;
+			for (pmc = 0; pmc < 8; ++pmc) {
+				if (pmc_inuse & (1 << pmc))
+					continue;
+				grp = (pmc >> 1) & 1;
+				if (unit) {
+					if (grp == (byte & 1))
+						break;
+				} else if (pmc_grp_use[grp] < 4) {
+					++pmc_grp_use[grp];
+					break;
+				}
+			}
+			pmc_inuse |= 1 << pmc;
+		} else {
+			/* Direct event */
+			--pmc;
+			if (psel == 0 && (byte & 2))
+				/* add events on higher-numbered bus */
+				mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
+		}
+		pmcsel[pmc] = psel;
+		hwc[i] = pmc;
+		spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
+		mmcr1 |= spcsel;
+		if (p970_marked_instr_event(event[i]))
+			mmcra |= MMCRA_SAMPLE_ENABLE;
+	}
+	for (pmc = 0; pmc < 2; ++pmc)
+		mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc);
+	for (; pmc < 8; ++pmc)
+		mmcr1 |= (unsigned long)pmcsel[pmc]
+			<< (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
+	if (pmc_inuse & 1)
+		mmcr0 |= MMCR0_PMC1CE;
+	if (pmc_inuse & 0xfe)
+		mmcr0 |= MMCR0_PMCjCE;
+
+	mmcra |= 0x2000;	/* mark only one IOP per PPC instruction */
+
+	/* Return MMCRx values */
+	mmcr->mmcr0 = mmcr0;
+	mmcr->mmcr1 = mmcr1;
+	mmcr->mmcra = mmcra;
+	return 0;
+}
+
+static void p970_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
+{
+	int shift;
+
+	/*
+	 * Setting the PMCxSEL field to 0x08 disables PMC x.
+	 */
+	if (pmc <= 1) {
+		shift = MMCR0_PMC1SEL_SH - 7 * pmc;
+		mmcr->mmcr0 = (mmcr->mmcr0 & ~(0x1fUL << shift)) | (0x08UL << shift);
+	} else {
+		shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
+		mmcr->mmcr1 = (mmcr->mmcr1 & ~(0x1fUL << shift)) | (0x08UL << shift);
+	}
+}
+
+static int ppc970_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= 7,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 1,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x8810, /* PM_LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x3810, /* PM_LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x431,  /* PM_BR_ISSUED */
+	[PERF_COUNT_HW_BRANCH_MISSES] 		= 0x327,  /* PM_GRP_BR_MPRED */
+};
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static u64 ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x8810,		0x3810	},
+		[C(OP_WRITE)] = {	0x7810,		0x813	},
+		[C(OP_PREFETCH)] = {	0x731,		0	},
+	},
+	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	0,		0	},
+	},
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0	},
+		[C(OP_WRITE)] = {	0,		0	},
+		[C(OP_PREFETCH)] = {	0x733,		0	},
+	},
+	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x704	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0,		0x700	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	0x431,		0x327	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+		[C(OP_READ)] = {	-1,		-1	},
+		[C(OP_WRITE)] = {	-1,		-1	},
+		[C(OP_PREFETCH)] = {	-1,		-1	},
+	},
+};
+
+static struct power_pmu ppc970_pmu = {
+	.name			= "PPC970/FX/MP",
+	.n_counter		= 8,
+	.max_alternatives	= 2,
+	.add_fields		= 0x001100005555ull,
+	.test_adder		= 0x013300000000ull,
+	.compute_mmcr		= p970_compute_mmcr,
+	.get_constraint		= p970_get_constraint,
+	.get_alternatives	= p970_get_alternatives,
+	.disable_pmc		= p970_disable_pmc,
+	.n_generic		= ARRAY_SIZE(ppc970_generic_events),
+	.generic_events		= ppc970_generic_events,
+	.cache_events		= &ppc970_cache_events,
+	.flags			= PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING,
+};
+
+int __init init_ppc970_pmu(void)
+{
+	unsigned int pvr = mfspr(SPRN_PVR);
+
+	if (PVR_VER(pvr) != PVR_970 && PVR_VER(pvr) != PVR_970MP &&
+	    PVR_VER(pvr) != PVR_970FX && PVR_VER(pvr) != PVR_970GX)
+		return -ENODEV;
+
+	return register_power_pmu(&ppc970_pmu);
+}
diff --git a/arch/powerpc/perf/req-gen/_begin.h b/arch/powerpc/perf/req-gen/_begin.h
new file mode 100644
index 0000000000..a200b86eba
--- /dev/null
+++ b/arch/powerpc/perf/req-gen/_begin.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Include paths to be used in interface defining headers */
+#ifndef POWERPC_PERF_REQ_GEN_H_
+#define POWERPC_PERF_REQ_GEN_H_
+
+#include <linux/stringify.h>
+
+#define CAT2_STR_(t, s) __stringify(t/s)
+#define CAT2_STR(t, s) CAT2_STR_(t, s)
+#define I(...) __VA_ARGS__
+
+#endif
+
+#define REQ_GEN_PREFIX req-gen
+#define REQUEST_BEGIN CAT2_STR(REQ_GEN_PREFIX, _request-begin.h)
+#define REQUEST_END   CAT2_STR(REQ_GEN_PREFIX, _request-end.h)
diff --git a/arch/powerpc/perf/req-gen/_clear.h b/arch/powerpc/perf/req-gen/_clear.h
new file mode 100644
index 0000000000..67c3859157
--- /dev/null
+++ b/arch/powerpc/perf/req-gen/_clear.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#undef __field_
+#undef __count_
+#undef __array_
+#undef REQUEST_
diff --git a/arch/powerpc/perf/req-gen/_end.h b/arch/powerpc/perf/req-gen/_end.h
new file mode 100644
index 0000000000..8a406980b6
--- /dev/null
+++ b/arch/powerpc/perf/req-gen/_end.h
@@ -0,0 +1,4 @@
+
+#undef REQ_GEN_PREFIX
+#undef REQUEST_BEGIN
+#undef REQUEST_END
diff --git a/arch/powerpc/perf/req-gen/_request-begin.h b/arch/powerpc/perf/req-gen/_request-begin.h
new file mode 100644
index 0000000000..7c74c2ab4c
--- /dev/null
+++ b/arch/powerpc/perf/req-gen/_request-begin.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define REQUEST(r_contents) \
+	REQUEST_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, I(r_contents))
+
+#define __field(f_offset, f_bytes, f_name) \
+	__field_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, \
+		 f_offset, f_bytes, f_name)
+
+#define __array(f_offset, f_bytes, f_name) \
+	__array_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, \
+		 f_offset, f_bytes, f_name)
+
+#define __count(f_offset, f_bytes, f_name) \
+	__count_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, \
+		 f_offset, f_bytes, f_name)
diff --git a/arch/powerpc/perf/req-gen/_request-end.h b/arch/powerpc/perf/req-gen/_request-end.h
new file mode 100644
index 0000000000..7d9f4046c2
--- /dev/null
+++ b/arch/powerpc/perf/req-gen/_request-end.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef REQUEST
+#undef __field
+#undef __array
+#undef __count
+
+#undef REQUEST_NAME
+#undef REQUEST_NUM
+#undef REQUEST_IDX_KIND
diff --git a/arch/powerpc/perf/req-gen/perf.h b/arch/powerpc/perf/req-gen/perf.h
new file mode 100644
index 0000000000..6b2a59feff
--- /dev/null
+++ b/arch/powerpc/perf/req-gen/perf.h
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_POWERPC_PERF_REQ_GEN_PERF_H_
+#define LINUX_POWERPC_PERF_REQ_GEN_PERF_H_
+
+#include <linux/perf_event.h>
+#include <linux/stringify.h>
+
+#ifndef REQUEST_FILE
+#error "REQUEST_FILE must be defined before including"
+#endif
+
+#ifndef NAME_LOWER
+#error "NAME_LOWER must be defined before including"
+#endif
+
+#ifndef NAME_UPPER
+#error "NAME_UPPER must be defined before including"
+#endif
+
+#define BE_TYPE_b1 __u8
+#define BE_TYPE_b2 __be16
+#define BE_TYPE_b4 __be32
+#define BE_TYPE_b8 __be64
+
+#define BYTES_TO_BE_TYPE(bytes) \
+		BE_TYPE_b##bytes
+
+#define CAT2_(a, b) a ## b
+#define CAT2(a, b) CAT2_(a, b)
+#define CAT3_(a, b, c) a ## b ## c
+#define CAT3(a, b, c) CAT3_(a, b, c)
+
+/*
+ * enumerate the request values as
+ * <NAME_UPPER>_<request name> = <request value>
+ */
+#define REQUEST_VALUE__(name_upper, r_name) name_upper ## _ ## r_name
+#define REQUEST_VALUE_(name_upper, r_name) REQUEST_VALUE__(name_upper, r_name)
+#define REQUEST_VALUE(r_name) REQUEST_VALUE_(NAME_UPPER, r_name)
+
+#include "_clear.h"
+#define REQUEST_(r_name, r_value, r_idx_1, r_fields) \
+	REQUEST_VALUE(r_name) = r_value,
+enum CAT2(NAME_LOWER, _requests) {
+#include REQUEST_FILE
+};
+
+/*
+ * For each request:
+ * struct <NAME_LOWER>_<request name> {
+ *	r_fields
+ * };
+ */
+#include "_clear.h"
+#define STRUCT_NAME__(name_lower, r_name) name_lower ## _ ## r_name
+#define STRUCT_NAME_(name_lower, r_name) STRUCT_NAME__(name_lower, r_name)
+#define STRUCT_NAME(r_name) STRUCT_NAME_(NAME_LOWER, r_name)
+#define REQUEST_(r_name, r_value, r_idx_1, r_fields)	\
+struct STRUCT_NAME(r_name) {				\
+	r_fields					\
+};
+#define __field_(r_name, r_value, r_idx_1, f_offset, f_bytes, f_name) \
+	BYTES_TO_BE_TYPE(f_bytes) f_name;
+#define __count_(r_name, r_value, r_idx_1, f_offset, f_bytes, f_name) \
+	__field_(r_name, r_value, r_idx_1, f_offset, f_bytes, f_name)
+#define __array_(r_name, r_value, r_idx_1, a_offset, a_bytes, a_name) \
+	__u8 a_name[a_bytes];
+
+#include REQUEST_FILE
+
+/*
+ * Generate a check of the field offsets
+ * <NAME_LOWER>_assert_offsets_correct()
+ */
+#include "_clear.h"
+#define REQUEST_(r_name, r_value, index, r_fields)			\
+r_fields
+#define __field_(r_name, r_value, r_idx_1, f_offset, f_size, f_name) \
+	BUILD_BUG_ON(offsetof(struct STRUCT_NAME(r_name), f_name) != f_offset);
+#define __count_(r_name, r_value, r_idx_1, c_offset, c_size, c_name) \
+	__field_(r_name, r_value, r_idx_1, c_offset, c_size, c_name)
+#define __array_(r_name, r_value, r_idx_1, a_offset, a_size, a_name) \
+	__field_(r_name, r_value, r_idx_1, a_offset, a_size, a_name)
+
+static inline void CAT2(NAME_LOWER, _assert_offsets_correct)(void)
+{
+#include REQUEST_FILE
+}
+
+/*
+ * Generate event attributes:
+ * PMU_EVENT_ATTR_STRING(<request name>_<field name>,
+ *	<NAME_LOWER>_event_attr_<request name>_<field name>,
+ *		"request=<request value>"
+ *		"starting_index=<starting index type>"
+ *		"counter_info_version=CURRENT_COUNTER_INFO_VERSION"
+ *		"length=<f_size>"
+ *		"offset=<f_offset>")
+ *
+ *	TODO: counter_info_version may need to vary, we should interperate the
+ *	value to some extent
+ */
+#define EVENT_ATTR_NAME__(name, r_name, c_name) \
+	name ## _event_attr_ ## r_name ## _ ## c_name
+#define EVENT_ATTR_NAME_(name, r_name, c_name) \
+	EVENT_ATTR_NAME__(name, r_name, c_name)
+#define EVENT_ATTR_NAME(r_name, c_name) \
+	EVENT_ATTR_NAME_(NAME_LOWER, r_name, c_name)
+
+#include "_clear.h"
+#define __field_(r_name, r_value, r_idx_1, f_offset, f_size, f_name)
+#define __array_(r_name, r_value, r_idx_1, a_offset, a_size, a_name)
+#define __count_(r_name, r_value, r_idx_1, c_offset, c_size, c_name)	\
+PMU_EVENT_ATTR_STRING(							\
+		CAT3(r_name, _, c_name),				\
+		EVENT_ATTR_NAME(r_name, c_name),			\
+		"request=" __stringify(r_value) ","			\
+		r_idx_1 ","						\
+		"counter_info_version="					\
+			__stringify(COUNTER_INFO_VERSION_CURRENT) ","	\
+		"length=" #c_size ","					\
+		"offset=" #c_offset)
+#define REQUEST_(r_name, r_value, r_idx_1, r_fields)			\
+	r_fields
+
+#include REQUEST_FILE
+
+/*
+ * Define event attribute array
+ * static struct attribute *hv_gpci_event_attrs[] = {
+ *	&<NAME_LOWER>_event_attr_<request name>_<field name>.attr,
+ * };
+ */
+#include "_clear.h"
+#define __field_(r_name, r_value, r_idx_1, f_offset, f_size, f_name)
+#define __count_(r_name, r_value, r_idx_1, c_offset, c_size, c_name)	\
+	&EVENT_ATTR_NAME(r_name, c_name).attr.attr,
+#define __array_(r_name, r_value, r_idx_1, a_offset, a_size, a_name)
+#define REQUEST_(r_name, r_value, r_idx_1, r_fields)			\
+	r_fields
+
+/* Generate event list for platforms with counter_info_version 0x6 or below */
+static __maybe_unused struct attribute *hv_gpci_event_attrs_v6[] = {
+#include REQUEST_FILE
+	NULL
+};
+
+/*
+ * Based on getPerfCountInfo v1.018 documentation, some of the hv-gpci
+ * events were deprecated for platform firmware that supports
+ * counter_info_version 0x8 or above.
+ * Those deprecated events are still part of platform firmware that
+ * support counter_info_version 0x6 and below. As per the getPerfCountInfo
+ * v1.018 documentation there is no counter_info_version 0x7.
+ * Undefining macro ENABLE_EVENTS_COUNTERINFO_V6, to disable the addition of
+ * deprecated events in "hv_gpci_event_attrs" attribute group, for platforms
+ * that supports counter_info_version 0x8 or above.
+ */
+#undef ENABLE_EVENTS_COUNTERINFO_V6
+
+/* Generate event list for platforms with counter_info_version 0x8 or above*/
+static __maybe_unused struct attribute *hv_gpci_event_attrs[] = {
+#include REQUEST_FILE
+	NULL
+};
+
+/* cleanup */
+#include "_clear.h"
+#undef EVENT_ATTR_NAME
+#undef EVENT_ATTR_NAME_
+#undef BIT_NAME
+#undef BIT_NAME_
+#undef STRUCT_NAME
+#undef REQUEST_VALUE
+#undef REQUEST_VALUE_
+
+#endif
diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig
new file mode 100644
index 0000000000..b3c466c505
--- /dev/null
+++ b/arch/powerpc/platforms/40x/Kconfig
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: GPL-2.0
+config ACADIA
+	bool "Acadia"
+	depends on 40x
+	select PPC40x_SIMPLE
+	select 405EZ
+	help
+	  This option enables support for the AMCC 405EZ Acadia evaluation board.
+
+config HOTFOOT
+	bool "Hotfoot"
+	depends on 40x
+	select PPC40x_SIMPLE
+	select FORCE_PCI
+	help
+	  This option enables support for the ESTEEM 195E Hotfoot board.
+
+config KILAUEA
+	bool "Kilauea"
+	depends on 40x
+	select 405EX
+	select PPC40x_SIMPLE
+	select PPC4xx_PCI_EXPRESS
+	select FORCE_PCI
+	select PCI_MSI
+	help
+	  This option enables support for the AMCC PPC405EX evaluation board.
+
+config MAKALU
+	bool "Makalu"
+	depends on 40x
+	select 405EX
+	select FORCE_PCI
+	select PPC4xx_PCI_EXPRESS
+	select PPC40x_SIMPLE
+	help
+	  This option enables support for the AMCC PPC405EX board.
+
+config OBS600
+	bool "OpenBlockS 600"
+	depends on 40x
+	select 405EX
+	select PPC40x_SIMPLE
+	help
+	  This option enables support for PlatHome OpenBlockS 600 server
+
+config PPC40x_SIMPLE
+	bool "Simple PowerPC 40x board support"
+	depends on 40x
+	help
+	  This option enables the simple PowerPC 40x platform support.
+
+config 405EX
+	bool
+	select IBM_EMAC_EMAC4 if IBM_EMAC
+	select IBM_EMAC_RGMII if IBM_EMAC
+
+config 405EZ
+	bool
+	select IBM_EMAC_NO_FLOW_CTRL if IBM_EMAC
+	select IBM_EMAC_MAL_CLR_ICINTSTAT if IBM_EMAC
+	select IBM_EMAC_MAL_COMMON_ERR if IBM_EMAC
+
+config PPC4xx_GPIO
+	bool "PPC4xx GPIO support"
+	depends on 40x
+	select GPIOLIB
+	select OF_GPIO_MM_GPIOCHIP
+	help
+	  Enable gpiolib support for ppc40x based boards
+
+config APM8018X
+	bool "APM8018X"
+	depends on 40x
+	select PPC40x_SIMPLE
+	help
+	  This option enables support for the AppliedMicro APM8018X evaluation
+	  board.
diff --git a/arch/powerpc/platforms/40x/Makefile b/arch/powerpc/platforms/40x/Makefile
new file mode 100644
index 0000000000..122de98527
--- /dev/null
+++ b/arch/powerpc/platforms/40x/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_PPC40x_SIMPLE)		+= ppc40x_simple.o
diff --git a/arch/powerpc/platforms/40x/ppc40x_simple.c b/arch/powerpc/platforms/40x/ppc40x_simple.c
new file mode 100644
index 0000000000..e454e9d2ef
--- /dev/null
+++ b/arch/powerpc/platforms/40x/ppc40x_simple.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Generic PowerPC 40x platform support
+ *
+ * Copyright 2008 IBM Corporation
+ *
+ * This implements simple platform support for PowerPC 44x chips.  This is
+ * mostly used for eval boards or other simple and "generic" 44x boards.  If
+ * your board has custom functions or hardware, then you will likely want to
+ * implement your own board.c file to accommodate it.
+ */
+
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc4xx.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+#include <asm/uic.h>
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+
+static const struct of_device_id ppc40x_of_bus[] __initconst = {
+	{ .compatible = "ibm,plb3", },
+	{ .compatible = "ibm,plb4", },
+	{ .compatible = "ibm,opb", },
+	{ .compatible = "ibm,ebc", },
+	{ .compatible = "simple-bus", },
+	{},
+};
+
+static int __init ppc40x_device_probe(void)
+{
+	of_platform_bus_probe(NULL, ppc40x_of_bus, NULL);
+
+	return 0;
+}
+machine_device_initcall(ppc40x_simple, ppc40x_device_probe);
+
+/* This is the list of boards that can be supported by this simple
+ * platform code.  This does _not_ mean the boards are compatible,
+ * as they most certainly are not from a device tree perspective.
+ * However, their differences are handled by the device tree and the
+ * drivers and therefore they don't need custom board support files.
+ *
+ * Again, if your board needs to do things differently then create a
+ * board.c file for it rather than adding it to this list.
+ */
+static const char * const board[] __initconst = {
+	"amcc,acadia",
+	"amcc,haleakala",
+	"amcc,kilauea",
+	"amcc,makalu",
+	"apm,klondike",
+	"est,hotfoot",
+	"plathome,obs600",
+	NULL
+};
+
+static int __init ppc40x_probe(void)
+{
+	if (of_device_compatible_match(of_root, board)) {
+		pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+		return 1;
+	}
+
+	return 0;
+}
+
+define_machine(ppc40x_simple) {
+	.name = "PowerPC 40x Platform",
+	.probe = ppc40x_probe,
+	.progress = udbg_progress,
+	.init_IRQ = uic_init_tree,
+	.get_irq = uic_get_irq,
+	.restart = ppc4xx_reset_system,
+};
diff --git a/arch/powerpc/platforms/44x/44x.h b/arch/powerpc/platforms/44x/44x.h
new file mode 100644
index 0000000000..0e912a6a0b
--- /dev/null
+++ b/arch/powerpc/platforms/44x/44x.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __POWERPC_PLATFORMS_44X_44X_H
+#define __POWERPC_PLATFORMS_44X_44X_H
+
+extern u8 as1_readb(volatile u8 __iomem  *addr);
+extern void as1_writeb(u8 data, volatile u8 __iomem *addr);
+
+#define GPIO0_OSRH	0xC
+#define GPIO0_TSRH	0x14
+#define GPIO0_ISR1H	0x34
+
+#endif /* __POWERPC_PLATFORMS_44X_44X_H */
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
new file mode 100644
index 0000000000..35a1f4b9f8
--- /dev/null
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -0,0 +1,319 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_47x
+	bool "Support for 47x variant"
+	depends on 44x
+	select MPIC
+	help
+	  This option enables support for the 47x family of processors and is
+	  not currently compatible with other 44x or 46x variants
+
+config BAMBOO
+	bool "Bamboo"
+	depends on 44x
+	select PPC44x_SIMPLE
+	select 440EP
+	select FORCE_PCI
+	help
+	  This option enables support for the IBM PPC440EP evaluation board.
+
+config BLUESTONE
+	bool "Bluestone"
+	depends on 44x
+	select PPC44x_SIMPLE
+	select APM821xx
+	select FORCE_PCI
+	select PCI_MSI
+	select PPC4xx_PCI_EXPRESS
+	select IBM_EMAC_RGMII if IBM_EMAC
+	help
+	  This option enables support for the APM APM821xx Evaluation board.
+
+config EBONY
+	bool "Ebony"
+	depends on 44x
+	default y
+	select 440GP
+	select FORCE_PCI
+	select OF_RTC
+	help
+	  This option enables support for the IBM PPC440GP evaluation board.
+
+config SAM440EP
+	bool "Sam440ep"
+	depends on 44x
+	select 440EP
+	select FORCE_PCI
+	help
+	  This option enables support for the ACube Sam440ep board.
+
+config SEQUOIA
+	bool "Sequoia"
+	depends on 44x
+	select PPC44x_SIMPLE
+	select 440EPX
+	help
+	  This option enables support for the AMCC PPC440EPX evaluation board.
+
+config TAISHAN
+	bool "Taishan"
+	depends on 44x
+	select PPC44x_SIMPLE
+	select 440GX
+	select FORCE_PCI
+	help
+	  This option enables support for the AMCC PPC440GX "Taishan"
+	  evaluation board.
+
+config KATMAI
+	bool "Katmai"
+	depends on 44x
+	select PPC44x_SIMPLE
+	select 440SPe
+	select FORCE_PCI
+	select PPC4xx_PCI_EXPRESS
+	select PCI_MSI
+	help
+	  This option enables support for the AMCC PPC440SPe evaluation board.
+
+config RAINIER
+	bool "Rainier"
+	depends on 44x
+	select PPC44x_SIMPLE
+	select 440GRX
+	select FORCE_PCI
+	help
+	  This option enables support for the AMCC PPC440GRX evaluation board.
+
+config WARP
+	bool "PIKA Warp"
+	depends on 44x
+	select 440EP
+	help
+	  This option enables support for the PIKA Warp(tm) Appliance. The Warp
+	  is a small computer replacement with up to 9 ports of FXO/FXS plus VOIP
+	  stations and trunks.
+
+	  See http://www.pikatechnologies.com/ and follow the "PIKA for Computer
+	  Telephony Developers" link for more information.
+
+config ARCHES
+	bool "Arches"
+	depends on 44x
+	select PPC44x_SIMPLE
+	select 460EX # Odd since it uses 460GT but the effects are the same
+	select FORCE_PCI
+	select PPC4xx_PCI_EXPRESS
+	help
+	  This option enables support for the AMCC Dual PPC460GT evaluation board.
+
+config CANYONLANDS
+	bool "Canyonlands"
+	depends on 44x
+	select 460EX
+	select FORCE_PCI
+	select PPC4xx_PCI_EXPRESS
+	select PCI_MSI
+	select IBM_EMAC_RGMII if IBM_EMAC
+	select IBM_EMAC_ZMII if IBM_EMAC
+	help
+	  This option enables support for the AMCC PPC460EX evaluation board.
+
+config GLACIER
+	bool "Glacier"
+	depends on 44x
+	select PPC44x_SIMPLE
+	select 460EX # Odd since it uses 460GT but the effects are the same
+	select FORCE_PCI
+	select PPC4xx_PCI_EXPRESS
+	select IBM_EMAC_RGMII if IBM_EMAC
+	select IBM_EMAC_ZMII if IBM_EMAC
+	help
+	  This option enables support for the AMCC PPC460GT evaluation board.
+
+config REDWOOD
+	bool "Redwood"
+	depends on 44x
+	select PPC44x_SIMPLE
+	select 460SX
+	select FORCE_PCI
+	select PPC4xx_PCI_EXPRESS
+	select PCI_MSI
+	help
+	  This option enables support for the AMCC PPC460SX Redwood board.
+
+config EIGER
+	bool "Eiger"
+	depends on 44x
+	select PPC44x_SIMPLE
+	select 460SX
+	select FORCE_PCI
+	select PPC4xx_PCI_EXPRESS
+	select IBM_EMAC_RGMII if IBM_EMAC
+	help
+	  This option enables support for the AMCC PPC460SX evaluation board.
+
+config YOSEMITE
+	bool "Yosemite"
+	depends on 44x
+	select PPC44x_SIMPLE
+	select 440EP
+	select FORCE_PCI
+	help
+	  This option enables support for the AMCC PPC440EP evaluation board.
+
+config ISS4xx
+	bool "ISS 4xx Simulator"
+	depends on 44x
+	select 440GP if 44x && !PPC_47x
+	select PPC_FPU
+	select OF_RTC
+	help
+	  This option enables support for the IBM ISS simulation environment
+
+config CURRITUCK
+	bool "IBM Currituck (476fpe) Support"
+	depends on PPC_47x
+	select I2C
+	select SWIOTLB
+	select 476FPE
+	select FORCE_PCI
+	select PPC4xx_PCI_EXPRESS
+	help
+	  This option enables support for the IBM Currituck (476fpe) evaluation board
+
+config FSP2
+	bool "IBM FSP2 (476fpe) Support"
+	depends on PPC_47x
+	select 476FPE
+	select IBM_EMAC_EMAC4 if IBM_EMAC
+	select IBM_EMAC_RGMII if IBM_EMAC
+	select COMMON_CLK
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the IBM FSP2 (476fpe) board
+
+config AKEBONO
+	bool "IBM Akebono (476gtr) Support"
+	depends on PPC_47x
+	select SWIOTLB
+	select 476FPE
+	select PPC4xx_PCI_EXPRESS
+	select FORCE_PCI
+	select PCI_MSI
+	select PPC4xx_HSTA_MSI
+	select I2C
+	select I2C_IBM_IIC
+	select IBM_EMAC_EMAC4 if IBM_EMAC
+	select USB if USB_SUPPORT
+	select USB_OHCI_HCD_PLATFORM if USB_OHCI_HCD
+	select USB_EHCI_HCD_PLATFORM if USB_EHCI_HCD
+	help
+	  This option enables support for the IBM Akebono (476gtr) evaluation board
+
+
+config ICON
+	bool "Icon"
+	depends on 44x
+	select PPC44x_SIMPLE
+	select 440SPe
+	select FORCE_PCI
+	select PPC4xx_PCI_EXPRESS
+	help
+	  This option enables support for the AMCC PPC440SPe evaluation board.
+
+config PPC44x_SIMPLE
+	bool "Simple PowerPC 44x board support"
+	depends on 44x
+	help
+	  This option enables the simple PowerPC 44x platform support.
+
+config PPC4xx_GPIO
+	bool "PPC4xx GPIO support"
+	depends on 44x
+	select GPIOLIB
+	select OF_GPIO_MM_GPIOCHIP
+	help
+	  Enable gpiolib support for ppc440 based boards
+
+# 44x specific CPU modules, selected based on the board above.
+config 440EP
+	bool
+	select PPC_FPU
+	select IBM440EP_ERR42
+	select IBM_EMAC_ZMII if IBM_EMAC
+
+config 440EPX
+	bool
+	select PPC_FPU
+	select IBM_EMAC_EMAC4 if IBM_EMAC
+	select IBM_EMAC_RGMII if IBM_EMAC
+	select IBM_EMAC_ZMII if IBM_EMAC
+	select USB_EHCI_BIG_ENDIAN_MMIO
+	select USB_EHCI_BIG_ENDIAN_DESC
+
+config 440GRX
+	bool
+	select IBM_EMAC_EMAC4 if IBM_EMAC
+	select IBM_EMAC_RGMII if IBM_EMAC
+	select IBM_EMAC_ZMII if IBM_EMAC
+
+config 440GP
+	bool
+	select IBM_EMAC_ZMII if IBM_EMAC
+
+config 440GX
+	bool
+	select IBM_EMAC_EMAC4 if IBM_EMAC
+	select IBM_EMAC_RGMII if IBM_EMAC
+	select IBM_EMAC_ZMII if IBM_EMAC #test only
+	select IBM_EMAC_TAH if IBM_EMAC  #test only
+
+config 440SP
+	bool
+
+config 440SPe
+	bool
+	select IBM_EMAC_EMAC4 if IBM_EMAC
+
+config 460EX
+	bool
+	select PPC_FPU
+	select IBM_EMAC_EMAC4 if IBM_EMAC
+	select IBM_EMAC_TAH if IBM_EMAC
+
+config 460SX
+	bool
+	select PPC_FPU
+	select IBM_EMAC_EMAC4 if IBM_EMAC
+	select IBM_EMAC_RGMII if IBM_EMAC
+	select IBM_EMAC_ZMII if IBM_EMAC
+	select IBM_EMAC_TAH if IBM_EMAC
+
+config 476FPE
+	bool
+	select PPC_FPU
+
+config APM821xx
+	bool
+	select PPC_FPU
+	select IBM_EMAC_EMAC4 if IBM_EMAC
+	select IBM_EMAC_TAH if IBM_EMAC
+
+config 476FPE_ERR46
+	depends on 476FPE
+	bool "Enable linker work around for PPC476FPE errata #46"
+	help
+	  This option enables a work around for an icache bug on 476
+	  that can cause execution of stale instructions when falling
+	  through pages (IBM errata #46). It requires a recent version
+	  of binutils which supports the --ppc476-workaround option.
+
+	  The work around enables the appropriate linker options and
+	  ensures that all module output sections are aligned to 4K
+	  page boundaries. The work around is only required when
+	  building modules.
+
+# 44x errata/workaround config symbols, selected by the CPU models above
+config IBM440EP_ERR42
+	bool
+
diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile
new file mode 100644
index 0000000000..5ba031f576
--- /dev/null
+++ b/arch/powerpc/platforms/44x/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-y	+= misc_44x.o machine_check.o
+ifneq ($(CONFIG_PPC4xx_CPM),y)
+obj-y	+= idle.o
+endif
+obj-$(CONFIG_PPC44x_SIMPLE) += ppc44x_simple.o
+obj-$(CONFIG_EBONY)	+= ebony.o
+obj-$(CONFIG_SAM440EP) 	+= sam440ep.o
+obj-$(CONFIG_WARP)	+= warp.o
+obj-$(CONFIG_ISS4xx)	+= iss4xx.o
+obj-$(CONFIG_CANYONLANDS)+= canyonlands.o
+obj-$(CONFIG_CURRITUCK)	+= ppc476.o
+obj-$(CONFIG_AKEBONO)	+= ppc476.o
+obj-$(CONFIG_FSP2)	+= fsp2.o
diff --git a/arch/powerpc/platforms/44x/canyonlands.c b/arch/powerpc/platforms/44x/canyonlands.c
new file mode 100644
index 0000000000..8742a10d9e
--- /dev/null
+++ b/arch/powerpc/platforms/44x/canyonlands.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This contain platform specific code for APM PPC460EX based Canyonlands
+ * board.
+ *
+ * Copyright (c) 2010, Applied Micro Circuits Corporation
+ * Author: Rupjyoti Sarmah <rsarmah@apm.com>
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc4xx.h>
+#include <asm/udbg.h>
+#include <asm/uic.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/delay.h>
+#include "44x.h"
+
+#define BCSR_USB_EN	0x11
+
+static const struct of_device_id ppc460ex_of_bus[] __initconst = {
+	{ .compatible = "ibm,plb4", },
+	{ .compatible = "ibm,opb", },
+	{ .compatible = "ibm,ebc", },
+	{ .compatible = "simple-bus", },
+	{},
+};
+
+static int __init ppc460ex_device_probe(void)
+{
+	of_platform_bus_probe(NULL, ppc460ex_of_bus, NULL);
+
+	return 0;
+}
+machine_device_initcall(canyonlands, ppc460ex_device_probe);
+
+/* Using this code only for the Canyonlands board.  */
+
+static int __init ppc460ex_probe(void)
+{
+	pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+
+	return 1;
+}
+
+/* USB PHY fixup code on Canyonlands kit. */
+
+static int __init ppc460ex_canyonlands_fixup(void)
+{
+	u8 __iomem *bcsr ;
+	void __iomem *vaddr;
+	struct device_node *np;
+	int ret = 0;
+
+	np = of_find_compatible_node(NULL, NULL, "amcc,ppc460ex-bcsr");
+	if (!np) {
+		printk(KERN_ERR "failed did not find amcc, ppc460ex bcsr node\n");
+		return -ENODEV;
+	}
+
+	bcsr = of_iomap(np, 0);
+	of_node_put(np);
+
+	if (!bcsr) {
+		printk(KERN_CRIT "Could not remap bcsr\n");
+		ret = -ENODEV;
+		goto err_bcsr;
+	}
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,ppc4xx-gpio");
+	if (!np) {
+		printk(KERN_ERR "failed did not find ibm,ppc4xx-gpio node\n");
+		return -ENODEV;
+	}
+
+	vaddr = of_iomap(np, 0);
+	of_node_put(np);
+
+	if (!vaddr) {
+		printk(KERN_CRIT "Could not get gpio node address\n");
+		ret = -ENODEV;
+		goto err_gpio;
+	}
+	/* Disable USB, through the BCSR7 bits */
+	setbits8(&bcsr[7], BCSR_USB_EN);
+
+	/* Wait for a while after reset */
+	msleep(100);
+
+	/* Enable USB here */
+	clrbits8(&bcsr[7], BCSR_USB_EN);
+
+	/*
+	 * Configure multiplexed gpio16 and gpio19 as alternate1 output
+	 * source after USB reset. In this configuration gpio16 will be
+	 * USB2HStop and gpio19 will be USB2DStop. For more details refer to
+	 * table 34-7 of PPC460EX user manual.
+	 */
+	setbits32((vaddr + GPIO0_OSRH), 0x42000000);
+	setbits32((vaddr + GPIO0_TSRH), 0x42000000);
+err_gpio:
+	iounmap(vaddr);
+err_bcsr:
+	iounmap(bcsr);
+	return ret;
+}
+machine_device_initcall(canyonlands, ppc460ex_canyonlands_fixup);
+define_machine(canyonlands) {
+	.name = "Canyonlands",
+	.compatible = "amcc,canyonlands",
+	.probe = ppc460ex_probe,
+	.progress = udbg_progress,
+	.init_IRQ = uic_init_tree,
+	.get_irq = uic_get_irq,
+	.restart = ppc4xx_reset_system,
+};
diff --git a/arch/powerpc/platforms/44x/ebony.c b/arch/powerpc/platforms/44x/ebony.c
new file mode 100644
index 0000000000..4861310c8d
--- /dev/null
+++ b/arch/powerpc/platforms/44x/ebony.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Ebony board specific routines
+ *
+ * Matt Porter <mporter@kernel.crashing.org>
+ * Copyright 2002-2005 MontaVista Software Inc.
+ *
+ * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ * Copyright (c) 2003-2005 Zultys Technologies
+ *
+ * Rewritten and ported to the merged powerpc tree:
+ * Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <linux/rtc.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc4xx.h>
+
+static const struct of_device_id ebony_of_bus[] __initconst = {
+	{ .compatible = "ibm,plb4", },
+	{ .compatible = "ibm,opb", },
+	{ .compatible = "ibm,ebc", },
+	{},
+};
+
+static int __init ebony_device_probe(void)
+{
+	of_platform_bus_probe(NULL, ebony_of_bus, NULL);
+	of_instantiate_rtc();
+
+	return 0;
+}
+machine_device_initcall(ebony, ebony_device_probe);
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init ebony_probe(void)
+{
+	pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+
+	return 1;
+}
+
+define_machine(ebony) {
+	.name			= "Ebony",
+	.compatible		= "ibm,ebony",
+	.probe			= ebony_probe,
+	.progress		= udbg_progress,
+	.init_IRQ		= uic_init_tree,
+	.get_irq		= uic_get_irq,
+	.restart		= ppc4xx_reset_system,
+};
diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c
new file mode 100644
index 0000000000..f6b8d02e08
--- /dev/null
+++ b/arch/powerpc/platforms/44x/fsp2.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * FSP-2 board specific routines
+ *
+ * Based on earlier code:
+ *    Matt Porter <mporter@kernel.crashing.org>
+ *    Copyright 2002-2005 MontaVista Software Inc.
+ *
+ *    Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ *    Copyright (c) 2003-2005 Zultys Technologies
+ *
+ *    Rewritten and ported to the merged powerpc tree:
+ *    Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ */
+
+#include <linux/init.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#include <linux/rtc.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/ppc4xx.h>
+#include <asm/dcr.h>
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
+#include "fsp2.h"
+
+#define FSP2_BUS_ERR	"ibm,bus-error-irq"
+#define FSP2_CMU_ERR	"ibm,cmu-error-irq"
+#define FSP2_CONF_ERR	"ibm,conf-error-irq"
+#define FSP2_OPBD_ERR	"ibm,opbd-error-irq"
+#define FSP2_MCUE	"ibm,mc-ue-irq"
+#define FSP2_RST_WRN	"ibm,reset-warning-irq"
+
+static __initdata struct of_device_id fsp2_of_bus[] = {
+	{ .compatible = "ibm,plb4", },
+	{ .compatible = "ibm,plb6", },
+	{ .compatible = "ibm,opb", },
+	{},
+};
+
+static void l2regs(void)
+{
+	pr_err("L2 Controller:\n");
+	pr_err("MCK:      0x%08x\n", mfl2(L2MCK));
+	pr_err("INT:      0x%08x\n", mfl2(L2INT));
+	pr_err("PLBSTAT0: 0x%08x\n", mfl2(L2PLBSTAT0));
+	pr_err("PLBSTAT1: 0x%08x\n", mfl2(L2PLBSTAT1));
+	pr_err("ARRSTAT0: 0x%08x\n", mfl2(L2ARRSTAT0));
+	pr_err("ARRSTAT1: 0x%08x\n", mfl2(L2ARRSTAT1));
+	pr_err("ARRSTAT2: 0x%08x\n", mfl2(L2ARRSTAT2));
+	pr_err("CPUSTAT:  0x%08x\n", mfl2(L2CPUSTAT));
+	pr_err("RACSTAT0: 0x%08x\n", mfl2(L2RACSTAT0));
+	pr_err("WACSTAT0: 0x%08x\n", mfl2(L2WACSTAT0));
+	pr_err("WACSTAT1: 0x%08x\n", mfl2(L2WACSTAT1));
+	pr_err("WACSTAT2: 0x%08x\n", mfl2(L2WACSTAT2));
+	pr_err("WDFSTAT:  0x%08x\n", mfl2(L2WDFSTAT));
+	pr_err("LOG0:     0x%08x\n", mfl2(L2LOG0));
+	pr_err("LOG1:     0x%08x\n", mfl2(L2LOG1));
+	pr_err("LOG2:     0x%08x\n", mfl2(L2LOG2));
+	pr_err("LOG3:     0x%08x\n", mfl2(L2LOG3));
+	pr_err("LOG4:     0x%08x\n", mfl2(L2LOG4));
+	pr_err("LOG5:     0x%08x\n", mfl2(L2LOG5));
+}
+
+static void show_plbopb_regs(u32 base, int num)
+{
+	pr_err("\nPLBOPB Bridge %d:\n", num);
+	pr_err("GESR0: 0x%08x\n", mfdcr(base + PLB4OPB_GESR0));
+	pr_err("GESR1: 0x%08x\n", mfdcr(base + PLB4OPB_GESR1));
+	pr_err("GESR2: 0x%08x\n", mfdcr(base + PLB4OPB_GESR2));
+	pr_err("GEARU: 0x%08x\n", mfdcr(base + PLB4OPB_GEARU));
+	pr_err("GEAR:  0x%08x\n", mfdcr(base + PLB4OPB_GEAR));
+}
+
+static irqreturn_t bus_err_handler(int irq, void *data)
+{
+	pr_err("Bus Error\n");
+
+	l2regs();
+
+	pr_err("\nPLB6 Controller:\n");
+	pr_err("BC_SHD: 0x%08x\n", mfdcr(DCRN_PLB6_SHD));
+	pr_err("BC_ERR: 0x%08x\n", mfdcr(DCRN_PLB6_ERR));
+
+	pr_err("\nPLB6-to-PLB4 Bridge:\n");
+	pr_err("ESR:  0x%08x\n", mfdcr(DCRN_PLB6PLB4_ESR));
+	pr_err("EARH: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_EARH));
+	pr_err("EARL: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_EARL));
+
+	pr_err("\nPLB4-to-PLB6 Bridge:\n");
+	pr_err("ESR:  0x%08x\n", mfdcr(DCRN_PLB4PLB6_ESR));
+	pr_err("EARH: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_EARH));
+	pr_err("EARL: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_EARL));
+
+	pr_err("\nPLB6-to-MCIF Bridge:\n");
+	pr_err("BESR0: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BESR0));
+	pr_err("BESR1: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BESR1));
+	pr_err("BEARH: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BEARH));
+	pr_err("BEARL: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BEARL));
+
+	pr_err("\nPLB4 Arbiter:\n");
+	pr_err("P0ESRH 0x%08x\n", mfdcr(DCRN_PLB4_P0ESRH));
+	pr_err("P0ESRL 0x%08x\n", mfdcr(DCRN_PLB4_P0ESRL));
+	pr_err("P0EARH 0x%08x\n", mfdcr(DCRN_PLB4_P0EARH));
+	pr_err("P0EARH 0x%08x\n", mfdcr(DCRN_PLB4_P0EARH));
+	pr_err("P1ESRH 0x%08x\n", mfdcr(DCRN_PLB4_P1ESRH));
+	pr_err("P1ESRL 0x%08x\n", mfdcr(DCRN_PLB4_P1ESRL));
+	pr_err("P1EARH 0x%08x\n", mfdcr(DCRN_PLB4_P1EARH));
+	pr_err("P1EARH 0x%08x\n", mfdcr(DCRN_PLB4_P1EARH));
+
+	show_plbopb_regs(DCRN_PLB4OPB0_BASE, 0);
+	show_plbopb_regs(DCRN_PLB4OPB1_BASE, 1);
+	show_plbopb_regs(DCRN_PLB4OPB2_BASE, 2);
+	show_plbopb_regs(DCRN_PLB4OPB3_BASE, 3);
+
+	pr_err("\nPLB4-to-AHB Bridge:\n");
+	pr_err("ESR:   0x%08x\n", mfdcr(DCRN_PLB4AHB_ESR));
+	pr_err("SEUAR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_SEUAR));
+	pr_err("SELAR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_SELAR));
+
+	pr_err("\nAHB-to-PLB4 Bridge:\n");
+	pr_err("\nESR: 0x%08x\n", mfdcr(DCRN_AHBPLB4_ESR));
+	pr_err("\nEAR: 0x%08x\n", mfdcr(DCRN_AHBPLB4_EAR));
+	panic("Bus Error\n");
+}
+
+static irqreturn_t cmu_err_handler(int irq, void *data) {
+	pr_err("CMU Error\n");
+	pr_err("FIR0: 0x%08x\n", mfcmu(CMUN_FIR0));
+	panic("CMU Error\n");
+}
+
+static irqreturn_t conf_err_handler(int irq, void *data) {
+	pr_err("Configuration Logic Error\n");
+	pr_err("CONF_FIR: 0x%08x\n", mfdcr(DCRN_CONF_FIR_RWC));
+	pr_err("RPERR0:   0x%08x\n", mfdcr(DCRN_CONF_RPERR0));
+	pr_err("RPERR1:   0x%08x\n", mfdcr(DCRN_CONF_RPERR1));
+	panic("Configuration Logic Error\n");
+}
+
+static irqreturn_t opbd_err_handler(int irq, void *data) {
+	panic("OPBD Error\n");
+}
+
+static irqreturn_t mcue_handler(int irq, void *data) {
+	pr_err("DDR: Uncorrectable Error\n");
+	pr_err("MCSTAT:            0x%08x\n",
+		mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCSTAT));
+	pr_err("MCOPT1:            0x%08x\n",
+		mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCOPT1));
+	pr_err("MCOPT2:            0x%08x\n",
+		mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCOPT2));
+	pr_err("PHYSTAT:           0x%08x\n",
+		mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_PHYSTAT));
+	pr_err("CFGR0:             0x%08x\n",
+		mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR0));
+	pr_err("CFGR1:             0x%08x\n",
+		mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR1));
+	pr_err("CFGR2:             0x%08x\n",
+		mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR2));
+	pr_err("CFGR3:             0x%08x\n",
+		mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR3));
+	pr_err("SCRUB_CNTL:        0x%08x\n",
+		mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_SCRUB_CNTL));
+	pr_err("ECCERR_PORT0:      0x%08x\n",
+		mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_PORT0));
+	pr_err("ECCERR_ADDR_PORT0: 0x%08x\n",
+		mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_ADDR_PORT0));
+	pr_err("ECCERR_CNT_PORT0:  0x%08x\n",
+		mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_COUNT_PORT0));
+	pr_err("ECC_CHECK_PORT0:   0x%08x\n",
+		mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECC_CHECK_PORT0));
+	pr_err("MCER0:            0x%08x\n",
+		mfdcr(DCRN_CW_BASE + DCRN_CW_MCER0));
+	pr_err("MCER1:            0x%08x\n",
+		mfdcr(DCRN_CW_BASE + DCRN_CW_MCER1));
+	pr_err("BESR:             0x%08x\n",
+		mfdcr(DCRN_PLB6MCIF_BESR0));
+	pr_err("BEARL:            0x%08x\n",
+		mfdcr(DCRN_PLB6MCIF_BEARL));
+	pr_err("BEARH:            0x%08x\n",
+		mfdcr(DCRN_PLB6MCIF_BEARH));
+	panic("DDR: Uncorrectable Error\n");
+}
+
+static irqreturn_t rst_wrn_handler(int irq, void *data) {
+	u32 crcs = mfcmu(CMUN_CRCS);
+	switch (crcs & CRCS_STAT_MASK) {
+	case CRCS_STAT_CHIP_RST_B:
+		panic("Received chassis-initiated reset request");
+	default:
+		panic("Unknown external reset: CRCS=0x%x", crcs);
+	}
+}
+
+static void __init node_irq_request(const char *compat, irq_handler_t errirq_handler)
+{
+	struct device_node *np;
+	unsigned int irq;
+	int32_t rc;
+
+	for_each_compatible_node(np, NULL, compat) {
+		irq = irq_of_parse_and_map(np, 0);
+		if (!irq) {
+			pr_err("device tree node %pOFn is missing a interrupt",
+			      np);
+			of_node_put(np);
+			return;
+		}
+
+		rc = request_irq(irq, errirq_handler, 0, np->name, np);
+		if (rc) {
+			pr_err("fsp_of_probe: request_irq failed: np=%pOF rc=%d",
+			      np, rc);
+			of_node_put(np);
+			return;
+		}
+	}
+}
+
+static void __init critical_irq_setup(void)
+{
+	node_irq_request(FSP2_CMU_ERR, cmu_err_handler);
+	node_irq_request(FSP2_BUS_ERR, bus_err_handler);
+	node_irq_request(FSP2_CONF_ERR, conf_err_handler);
+	node_irq_request(FSP2_OPBD_ERR, opbd_err_handler);
+	node_irq_request(FSP2_MCUE, mcue_handler);
+	node_irq_request(FSP2_RST_WRN, rst_wrn_handler);
+}
+
+static int __init fsp2_device_probe(void)
+{
+	of_platform_bus_probe(NULL, fsp2_of_bus, NULL);
+	return 0;
+}
+machine_device_initcall(fsp2, fsp2_device_probe);
+
+static int __init fsp2_probe(void)
+{
+	u32 val;
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "ibm,fsp2"))
+		return 0;
+
+	/* Clear BC_ERR and mask snoopable request plb errors. */
+	val = mfdcr(DCRN_PLB6_CR0);
+	val |= 0x20000000;
+	mtdcr(DCRN_PLB6_BASE, val);
+	mtdcr(DCRN_PLB6_HD, 0xffff0000);
+	mtdcr(DCRN_PLB6_SHD, 0xffff0000);
+
+	/* TVSENSE reset is blocked (clock gated) by the POR default of the TVS
+	 * sleep config bit. As a consequence, TVSENSE will provide erratic
+	 * sensor values, which may result in spurious (parity) errors
+	 * recorded in the CMU FIR and leading to erroneous interrupt requests
+	 * once the CMU interrupt is unmasked.
+	 */
+
+	/* 1. set TVS1[UNDOZE] */
+	val = mfcmu(CMUN_TVS1);
+	val |= 0x4;
+	mtcmu(CMUN_TVS1, val);
+
+	/* 2. clear FIR[TVS] and FIR[TVSPAR] */
+	val = mfcmu(CMUN_FIR0);
+	val |= 0x30000000;
+	mtcmu(CMUN_FIR0, val);
+
+	/* L2 machine checks */
+	mtl2(L2PLBMCKEN0, 0xffffffff);
+	mtl2(L2PLBMCKEN1, 0x0000ffff);
+	mtl2(L2ARRMCKEN0, 0xffffffff);
+	mtl2(L2ARRMCKEN1, 0xffffffff);
+	mtl2(L2ARRMCKEN2, 0xfffff000);
+	mtl2(L2CPUMCKEN,  0xffffffff);
+	mtl2(L2RACMCKEN0, 0xffffffff);
+	mtl2(L2WACMCKEN0, 0xffffffff);
+	mtl2(L2WACMCKEN1, 0xffffffff);
+	mtl2(L2WACMCKEN2, 0xffffffff);
+	mtl2(L2WDFMCKEN,  0xffffffff);
+
+	/* L2 interrupts */
+	mtl2(L2PLBINTEN1, 0xffff0000);
+
+	/*
+	 * At a global level, enable all L2 machine checks and interrupts
+	 * reported by the L2 subsystems, except for the external machine check
+	 * input (UIC0.1).
+	 */
+	mtl2(L2MCKEN, 0x000007ff);
+	mtl2(L2INTEN, 0x000004ff);
+
+	/* Enable FSP-2 configuration logic parity errors */
+	mtdcr(DCRN_CONF_EIR_RS, 0x80000000);
+	return 1;
+}
+
+static void __init fsp2_irq_init(void)
+{
+	uic_init_tree();
+	critical_irq_setup();
+}
+
+define_machine(fsp2) {
+	.name			= "FSP-2",
+	.probe			= fsp2_probe,
+	.progress		= udbg_progress,
+	.init_IRQ		= fsp2_irq_init,
+	.get_irq		= uic_get_irq,
+	.restart		= ppc4xx_reset_system,
+};
diff --git a/arch/powerpc/platforms/44x/fsp2.h b/arch/powerpc/platforms/44x/fsp2.h
new file mode 100644
index 0000000000..9e1d52754c
--- /dev/null
+++ b/arch/powerpc/platforms/44x/fsp2.h
@@ -0,0 +1,272 @@
+#ifndef _ASM_POWERPC_FSP_DCR_H_
+#define _ASM_POWERPC_FSP_DCR_H_
+#ifdef __KERNEL__
+#include <asm/dcr.h>
+
+#define DCRN_CMU_ADDR		0x00C	/* Chip management unic addr */
+#define DCRN_CMU_DATA		0x00D	/* Chip management unic data */
+
+/* PLB4 Arbiter */
+#define DCRN_PLB4_PCBI		0x010	/* PLB Crossbar ID/Rev Register */
+#define DCRN_PLB4_P0ACR		0x011	/* PLB0 Arbiter Control Register */
+#define DCRN_PLB4_P0ESRL	0x012	/* PLB0 Error Status Register Low */
+#define DCRN_PLB4_P0ESRH	0x013	/* PLB0 Error Status Register High */
+#define DCRN_PLB4_P0EARL	0x014	/* PLB0 Error Address Register Low */
+#define DCRN_PLB4_P0EARH	0x015	/* PLB0 Error Address Register High */
+#define DCRN_PLB4_P0ESRLS	0x016	/* PLB0 Error Status Register Low Set*/
+#define DCRN_PLB4_P0ESRHS	0x017	/* PLB0 Error Status Register High */
+#define DCRN_PLB4_PCBC		0x018	/* PLB Crossbar Control Register */
+#define DCRN_PLB4_P1ACR		0x019	/* PLB1 Arbiter Control Register */
+#define DCRN_PLB4_P1ESRL	0x01A	/* PLB1 Error Status Register Low */
+#define DCRN_PLB4_P1ESRH	0x01B	/* PLB1 Error Status Register High */
+#define DCRN_PLB4_P1EARL	0x01C	/* PLB1 Error Address Register Low */
+#define DCRN_PLB4_P1EARH	0x01D	/* PLB1 Error Address Register High */
+#define DCRN_PLB4_P1ESRLS	0x01E	/* PLB1 Error Status Register Low Set*/
+#define DCRN_PLB4_P1ESRHS	0x01F	/*PLB1 Error Status Register High Set*/
+
+/* PLB4/OPB bridge 0, 1, 2, 3 */
+#define DCRN_PLB4OPB0_BASE	0x020
+#define DCRN_PLB4OPB1_BASE	0x030
+#define DCRN_PLB4OPB2_BASE	0x040
+#define DCRN_PLB4OPB3_BASE	0x050
+
+#define PLB4OPB_GESR0		0x0	/* Error status 0: Master Dev 0-3 */
+#define PLB4OPB_GEAR		0x2	/* Error Address Register */
+#define PLB4OPB_GEARU		0x3	/* Error Upper Address Register */
+#define PLB4OPB_GESR1		0x4	/* Error Status 1: Master Dev 4-7 */
+#define PLB4OPB_GESR2		0xC	/* Error Status 2: Master Dev 8-11 */
+
+/* PLB4-to-AHB Bridge */
+#define DCRN_PLB4AHB_BASE	0x400
+#define DCRN_PLB4AHB_SEUAR	(DCRN_PLB4AHB_BASE + 1)
+#define DCRN_PLB4AHB_SELAR	(DCRN_PLB4AHB_BASE + 2)
+#define DCRN_PLB4AHB_ESR	(DCRN_PLB4AHB_BASE + 3)
+#define DCRN_AHBPLB4_ESR	(DCRN_PLB4AHB_BASE + 8)
+#define DCRN_AHBPLB4_EAR	(DCRN_PLB4AHB_BASE + 9)
+
+/* PLB6 Controller */
+#define DCRN_PLB6_BASE		0x11111300
+#define DCRN_PLB6_CR0		(DCRN_PLB6_BASE)
+#define DCRN_PLB6_ERR		(DCRN_PLB6_BASE + 0x0B)
+#define DCRN_PLB6_HD		(DCRN_PLB6_BASE + 0x0E)
+#define DCRN_PLB6_SHD		(DCRN_PLB6_BASE + 0x10)
+
+/* PLB4-to-PLB6 Bridge */
+#define DCRN_PLB4PLB6_BASE	0x11111320
+#define DCRN_PLB4PLB6_ESR	(DCRN_PLB4PLB6_BASE + 1)
+#define DCRN_PLB4PLB6_EARH	(DCRN_PLB4PLB6_BASE + 3)
+#define DCRN_PLB4PLB6_EARL	(DCRN_PLB4PLB6_BASE + 4)
+
+/* PLB6-to-PLB4 Bridge */
+#define DCRN_PLB6PLB4_BASE	0x11111350
+#define DCRN_PLB6PLB4_ESR	(DCRN_PLB6PLB4_BASE + 1)
+#define DCRN_PLB6PLB4_EARH	(DCRN_PLB6PLB4_BASE + 3)
+#define DCRN_PLB6PLB4_EARL	(DCRN_PLB6PLB4_BASE + 4)
+
+/* PLB6-to-MCIF Bridge */
+#define DCRN_PLB6MCIF_BASE	0x11111380
+#define DCRN_PLB6MCIF_BESR0	(DCRN_PLB6MCIF_BASE + 0)
+#define DCRN_PLB6MCIF_BESR1	(DCRN_PLB6MCIF_BASE + 1)
+#define DCRN_PLB6MCIF_BEARL	(DCRN_PLB6MCIF_BASE + 2)
+#define DCRN_PLB6MCIF_BEARH	(DCRN_PLB6MCIF_BASE + 3)
+
+/* Configuration Logic Registers */
+#define DCRN_CONF_BASE		0x11111400
+#define DCRN_CONF_FIR_RWC	(DCRN_CONF_BASE + 0x3A)
+#define DCRN_CONF_EIR_RS	(DCRN_CONF_BASE + 0x3E)
+#define DCRN_CONF_RPERR0	(DCRN_CONF_BASE + 0x4D)
+#define DCRN_CONF_RPERR1	(DCRN_CONF_BASE + 0x4E)
+
+#define DCRN_L2CDCRAI		0x11111100
+#define DCRN_L2CDCRDI		0x11111104
+/* L2 indirect addresses */
+#define L2MCK		0x120
+#define L2MCKEN		0x130
+#define L2INT		0x150
+#define L2INTEN		0x160
+#define L2LOG0		0x180
+#define L2LOG1		0x184
+#define L2LOG2		0x188
+#define L2LOG3		0x18C
+#define L2LOG4		0x190
+#define L2LOG5		0x194
+#define L2PLBSTAT0	0x300
+#define L2PLBSTAT1	0x304
+#define L2PLBMCKEN0	0x330
+#define L2PLBMCKEN1	0x334
+#define L2PLBINTEN0	0x360
+#define L2PLBINTEN1	0x364
+#define L2ARRSTAT0	0x500
+#define L2ARRSTAT1	0x504
+#define L2ARRSTAT2	0x508
+#define L2ARRMCKEN0	0x530
+#define L2ARRMCKEN1	0x534
+#define L2ARRMCKEN2	0x538
+#define L2ARRINTEN0	0x560
+#define L2ARRINTEN1	0x564
+#define L2ARRINTEN2	0x568
+#define L2CPUSTAT	0x700
+#define L2CPUMCKEN	0x730
+#define L2CPUINTEN	0x760
+#define L2RACSTAT0	0x900
+#define L2RACMCKEN0	0x930
+#define L2RACINTEN0	0x960
+#define L2WACSTAT0	0xD00
+#define L2WACSTAT1	0xD04
+#define L2WACSTAT2	0xD08
+#define L2WACMCKEN0	0xD30
+#define L2WACMCKEN1	0xD34
+#define L2WACMCKEN2	0xD38
+#define L2WACINTEN0	0xD60
+#define L2WACINTEN1	0xD64
+#define L2WACINTEN2	0xD68
+#define L2WDFSTAT	0xF00
+#define L2WDFMCKEN	0xF30
+#define L2WDFINTEN	0xF60
+
+/* DDR3/4 Memory Controller */
+#define DCRN_DDR34_BASE			0x11120000
+#define DCRN_DDR34_MCSTAT		0x10
+#define DCRN_DDR34_MCOPT1		0x20
+#define DCRN_DDR34_MCOPT2		0x21
+#define DCRN_DDR34_PHYSTAT		0x32
+#define DCRN_DDR34_CFGR0		0x40
+#define DCRN_DDR34_CFGR1		0x41
+#define DCRN_DDR34_CFGR2		0x42
+#define DCRN_DDR34_CFGR3		0x43
+#define DCRN_DDR34_SCRUB_CNTL		0xAA
+#define DCRN_DDR34_SCRUB_INT		0xAB
+#define DCRN_DDR34_SCRUB_START_ADDR	0xB0
+#define DCRN_DDR34_SCRUB_END_ADDR	0xD0
+#define DCRN_DDR34_ECCERR_ADDR_PORT0	0xE0
+#define DCRN_DDR34_ECCERR_ADDR_PORT1	0xE1
+#define DCRN_DDR34_ECCERR_ADDR_PORT2	0xE2
+#define DCRN_DDR34_ECCERR_ADDR_PORT3	0xE3
+#define DCRN_DDR34_ECCERR_COUNT_PORT0	0xE4
+#define DCRN_DDR34_ECCERR_COUNT_PORT1	0xE5
+#define DCRN_DDR34_ECCERR_COUNT_PORT2	0xE6
+#define DCRN_DDR34_ECCERR_COUNT_PORT3	0xE7
+#define DCRN_DDR34_ECCERR_PORT0		0xF0
+#define DCRN_DDR34_ECCERR_PORT1		0xF2
+#define DCRN_DDR34_ECCERR_PORT2		0xF4
+#define DCRN_DDR34_ECCERR_PORT3		0xF6
+#define DCRN_DDR34_ECC_CHECK_PORT0	0xF8
+#define DCRN_DDR34_ECC_CHECK_PORT1	0xF9
+#define DCRN_DDR34_ECC_CHECK_PORT2	0xF9
+#define DCRN_DDR34_ECC_CHECK_PORT3	0xFB
+
+#define DDR34_SCRUB_CNTL_STOP		0x00000000
+#define DDR34_SCRUB_CNTL_SCRUB		0x80000000
+#define DDR34_SCRUB_CNTL_UE_STOP	0x20000000
+#define DDR34_SCRUB_CNTL_CE_STOP	0x10000000
+#define DDR34_SCRUB_CNTL_RANK_EN	0x00008000
+
+/* PLB-Attached DDR3/4 Core Wrapper */
+#define DCRN_CW_BASE			0x11111800
+#define DCRN_CW_MCER0			0x00
+#define DCRN_CW_MCER1			0x01
+#define DCRN_CW_MCER_AND0		0x02
+#define DCRN_CW_MCER_AND1		0x03
+#define DCRN_CW_MCER_OR0		0x04
+#define DCRN_CW_MCER_OR1		0x05
+#define DCRN_CW_MCER_MASK0		0x06
+#define DCRN_CW_MCER_MASK1		0x07
+#define DCRN_CW_MCER_MASK_AND0		0x08
+#define DCRN_CW_MCER_MASK_AND1		0x09
+#define DCRN_CW_MCER_MASK_OR0		0x0A
+#define DCRN_CW_MCER_MASK_OR1		0x0B
+#define DCRN_CW_MCER_ACTION0		0x0C
+#define DCRN_CW_MCER_ACTION1		0x0D
+#define DCRN_CW_MCER_WOF0		0x0E
+#define DCRN_CW_MCER_WOF1		0x0F
+#define DCRN_CW_LFIR			0x10
+#define DCRN_CW_LFIR_AND		0x11
+#define DCRN_CW_LFIR_OR			0x12
+#define DCRN_CW_LFIR_MASK		0x13
+#define DCRN_CW_LFIR_MASK_AND		0x14
+#define DCRN_CW_LFIR_MASK_OR		0x15
+
+#define CW_MCER0_MEM_CE			0x00020000
+/* CMU addresses */
+#define CMUN_CRCS		0x00 /* Chip Reset Control/Status */
+#define CMUN_CONFFIR0		0x20 /* Config Reg Parity FIR 0 */
+#define CMUN_CONFFIR1		0x21 /* Config Reg Parity FIR 1 */
+#define CMUN_CONFFIR2		0x22 /* Config Reg Parity FIR 2 */
+#define CMUN_CONFFIR3		0x23 /* Config Reg Parity FIR 3 */
+#define CMUN_URCR3_RS		0x24 /* Unit Reset Control Reg 3 Set */
+#define CMUN_URCR3_C		0x25 /* Unit Reset Control Reg 3 Clear */
+#define CMUN_URCR3_P		0x26 /* Unit Reset Control Reg 3 Pulse */
+#define CMUN_PW0		0x2C /* Pulse Width Register */
+#define CMUN_URCR0_P		0x2D /* Unit Reset Control Reg 0 Pulse */
+#define CMUN_URCR1_P		0x2E /* Unit Reset Control Reg 1 Pulse */
+#define CMUN_URCR2_P		0x2F /* Unit Reset Control Reg 2 Pulse */
+#define CMUN_CLS_RW		0x30 /* Code Load Status (Read/Write) */
+#define CMUN_CLS_S		0x31 /* Code Load Status (Set) */
+#define CMUN_CLS_C		0x32 /* Code Load Status (Clear */
+#define CMUN_URCR2_RS		0x33 /* Unit Reset Control Reg 2 Set */
+#define CMUN_URCR2_C		0x34 /* Unit Reset Control Reg 2 Clear */
+#define CMUN_CLKEN0		0x35 /* Clock Enable 0 */
+#define CMUN_CLKEN1		0x36 /* Clock Enable 1 */
+#define CMUN_PCD0		0x37 /* PSI clock divider 0 */
+#define CMUN_PCD1		0x38 /* PSI clock divider 1 */
+#define CMUN_TMR0		0x39 /* Reset Timer */
+#define CMUN_TVS0		0x3A /* TV Sense Reg 0 */
+#define CMUN_TVS1		0x3B /* TV Sense Reg 1 */
+#define CMUN_MCCR		0x3C /* DRAM Configuration Reg */
+#define CMUN_FIR0		0x3D /* Fault Isolation Reg 0 */
+#define CMUN_FMR0		0x3E /* FIR Mask Reg 0 */
+#define CMUN_ETDRB		0x3F /* ETDR Backdoor */
+
+/* CRCS bit fields */
+#define CRCS_STAT_MASK		0xF0000000
+#define CRCS_STAT_POR		0x10000000
+#define CRCS_STAT_PHR		0x20000000
+#define CRCS_STAT_PCIE		0x30000000
+#define CRCS_STAT_CRCS_SYS	0x40000000
+#define CRCS_STAT_DBCR_SYS	0x50000000
+#define CRCS_STAT_HOST_SYS	0x60000000
+#define CRCS_STAT_CHIP_RST_B	0x70000000
+#define CRCS_STAT_CRCS_CHIP	0x80000000
+#define CRCS_STAT_DBCR_CHIP	0x90000000
+#define CRCS_STAT_HOST_CHIP	0xA0000000
+#define CRCS_STAT_PSI_CHIP	0xB0000000
+#define CRCS_STAT_CRCS_CORE	0xC0000000
+#define CRCS_STAT_DBCR_CORE	0xD0000000
+#define CRCS_STAT_HOST_CORE	0xE0000000
+#define CRCS_STAT_PCIE_HOT	0xF0000000
+#define CRCS_STAT_SELF_CORE	0x40000000
+#define CRCS_STAT_SELF_CHIP	0x50000000
+#define CRCS_WATCHE		0x08000000
+#define CRCS_CORE		0x04000000 /* Reset PPC440 core */
+#define CRCS_CHIP		0x02000000 /* Chip Reset */
+#define CRCS_SYS		0x01000000 /* System Reset */
+#define CRCS_WRCR		0x00800000 /* Watchdog reset on core reset */
+#define CRCS_EXTCR		0x00080000 /* CHIP_RST_B triggers chip reset */
+#define CRCS_PLOCK		0x00000002 /* PLL Locked */
+
+#define mtcmu(reg, data)		\
+do {					\
+	mtdcr(DCRN_CMU_ADDR, reg);	\
+	mtdcr(DCRN_CMU_DATA, data);	\
+} while (0)
+
+#define mfcmu(reg)\
+	({u32 data;			\
+	mtdcr(DCRN_CMU_ADDR, reg);	\
+	data = mfdcr(DCRN_CMU_DATA);	\
+	data; })
+
+#define mtl2(reg, data)			\
+do {					\
+	mtdcr(DCRN_L2CDCRAI, reg);	\
+	mtdcr(DCRN_L2CDCRDI, data);	\
+} while (0)
+
+#define mfl2(reg)			\
+	({u32 data;			\
+	mtdcr(DCRN_L2CDCRAI, reg);	\
+	data = mfdcr(DCRN_L2CDCRDI);	\
+	data; })
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_FSP2_DCR_H_ */
diff --git a/arch/powerpc/platforms/44x/idle.c b/arch/powerpc/platforms/44x/idle.c
new file mode 100644
index 0000000000..f533b495e7
--- /dev/null
+++ b/arch/powerpc/platforms/44x/idle.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2008 IBM Corp. 
+ *
+ * Based on arch/powerpc/platforms/pasemi/idle.c: 
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Added by: Jerone Young <jyoung5@us.ibm.com>
+ */
+
+#include <linux/of.h>
+#include <linux/kernel.h>
+#include <asm/machdep.h>
+
+static int mode_spin;
+
+static void ppc44x_idle(void)
+{
+	unsigned long msr_save;
+
+	msr_save = mfmsr();
+	/* set wait state MSR */
+	mtmsr(msr_save|MSR_WE|MSR_EE|MSR_CE|MSR_DE);
+	isync();
+	/* return to initial state */
+	mtmsr(msr_save);
+	isync();
+}
+
+int __init ppc44x_idle_init(void)
+{
+	if (!mode_spin) {
+		/* If we are not setting spin mode 
+                   then we set to wait mode */
+		ppc_md.power_save = &ppc44x_idle;
+	}
+
+	return 0;
+}
+
+arch_initcall(ppc44x_idle_init);
+
+static int __init idle_param(char *p)
+{ 
+
+	if (!strcmp("spin", p)) {
+		mode_spin = 1;
+		ppc_md.power_save = NULL;
+	}
+
+	return 0;
+}
+
+early_param("idle", idle_param);
diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c
new file mode 100644
index 0000000000..ef883d97fe
--- /dev/null
+++ b/arch/powerpc/platforms/44x/iss4xx.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PPC476 board specific routines
+ *
+ * Copyright 2010 Torez Smith, IBM Corporation.
+ *
+ * Based on earlier code:
+ *    Matt Porter <mporter@kernel.crashing.org>
+ *    Copyright 2002-2005 MontaVista Software Inc.
+ *
+ *    Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ *    Copyright (c) 2003-2005 Zultys Technologies
+ *
+ *    Rewritten and ported to the merged powerpc tree:
+ *    Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <linux/rtc.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/ppc4xx.h>
+#include <asm/mpic.h>
+#include <asm/mmu.h>
+
+static const struct of_device_id iss4xx_of_bus[] __initconst = {
+	{ .compatible = "ibm,plb4", },
+	{ .compatible = "ibm,plb6", },
+	{ .compatible = "ibm,opb", },
+	{ .compatible = "ibm,ebc", },
+	{},
+};
+
+static int __init iss4xx_device_probe(void)
+{
+	of_platform_bus_probe(NULL, iss4xx_of_bus, NULL);
+	of_instantiate_rtc();
+
+	return 0;
+}
+machine_device_initcall(iss4xx, iss4xx_device_probe);
+
+/* We can have either UICs or MPICs */
+static void __init iss4xx_init_irq(void)
+{
+	struct device_node *np;
+
+	/* Find top level interrupt controller */
+	for_each_node_with_property(np, "interrupt-controller") {
+		if (!of_property_present(np, "interrupts"))
+			break;
+	}
+	if (np == NULL)
+		panic("Can't find top level interrupt controller");
+
+	/* Check type and do appropriate initialization */
+	if (of_device_is_compatible(np, "ibm,uic")) {
+		uic_init_tree();
+		ppc_md.get_irq = uic_get_irq;
+#ifdef CONFIG_MPIC
+	} else if (of_device_is_compatible(np, "chrp,open-pic")) {
+		/* The MPIC driver will get everything it needs from the
+		 * device-tree, just pass 0 to all arguments
+		 */
+		struct mpic *mpic = mpic_alloc(np, 0, MPIC_NO_RESET, 0, 0, " MPIC     ");
+		BUG_ON(mpic == NULL);
+		mpic_init(mpic);
+		ppc_md.get_irq = mpic_get_irq;
+#endif
+	} else
+		panic("Unrecognized top level interrupt controller");
+}
+
+#ifdef CONFIG_SMP
+static void smp_iss4xx_setup_cpu(int cpu)
+{
+	mpic_setup_this_cpu();
+}
+
+static int smp_iss4xx_kick_cpu(int cpu)
+{
+	struct device_node *cpunode = of_get_cpu_node(cpu, NULL);
+	const u64 *spin_table_addr_prop;
+	u32 *spin_table;
+	extern void start_secondary_47x(void);
+
+	BUG_ON(cpunode == NULL);
+
+	/* Assume spin table. We could test for the enable-method in
+	 * the device-tree but currently there's little point as it's
+	 * our only supported method
+	 */
+	spin_table_addr_prop = of_get_property(cpunode, "cpu-release-addr",
+					       NULL);
+	if (spin_table_addr_prop == NULL) {
+		pr_err("CPU%d: Can't start, missing cpu-release-addr !\n", cpu);
+		return -ENOENT;
+	}
+
+	/* Assume it's mapped as part of the linear mapping. This is a bit
+	 * fishy but will work fine for now
+	 */
+	spin_table = (u32 *)__va(*spin_table_addr_prop);
+	pr_debug("CPU%d: Spin table mapped at %p\n", cpu, spin_table);
+
+	spin_table[3] = cpu;
+	smp_wmb();
+	spin_table[1] = __pa(start_secondary_47x);
+	mb();
+
+	return 0;
+}
+
+static struct smp_ops_t iss_smp_ops = {
+	.probe		= smp_mpic_probe,
+	.message_pass	= smp_mpic_message_pass,
+	.setup_cpu	= smp_iss4xx_setup_cpu,
+	.kick_cpu	= smp_iss4xx_kick_cpu,
+	.give_timebase	= smp_generic_give_timebase,
+	.take_timebase	= smp_generic_take_timebase,
+};
+
+static void __init iss4xx_smp_init(void)
+{
+	if (mmu_has_feature(MMU_FTR_TYPE_47x))
+		smp_ops = &iss_smp_ops;
+}
+
+#else /* CONFIG_SMP */
+static void __init iss4xx_smp_init(void) { }
+#endif /* CONFIG_SMP */
+
+static void __init iss4xx_setup_arch(void)
+{
+	iss4xx_smp_init();
+}
+
+define_machine(iss4xx) {
+	.name			= "ISS-4xx",
+	.compatible		= "ibm,iss-4xx",
+	.progress		= udbg_progress,
+	.init_IRQ		= iss4xx_init_irq,
+	.setup_arch		= iss4xx_setup_arch,
+	.restart		= ppc4xx_reset_system,
+};
diff --git a/arch/powerpc/platforms/44x/machine_check.c b/arch/powerpc/platforms/44x/machine_check.c
new file mode 100644
index 0000000000..5d19daacd7
--- /dev/null
+++ b/arch/powerpc/platforms/44x/machine_check.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ */
+
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/ptrace.h>
+
+#include <asm/reg.h>
+#include <asm/cacheflush.h>
+
+int machine_check_440A(struct pt_regs *regs)
+{
+	unsigned long reason = regs->esr;
+
+	printk("Machine check in kernel mode.\n");
+	if (reason & ESR_IMCP){
+		printk("Instruction Synchronous Machine Check exception\n");
+		mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+	}
+	else {
+		u32 mcsr = mfspr(SPRN_MCSR);
+		if (mcsr & MCSR_IB)
+			printk("Instruction Read PLB Error\n");
+		if (mcsr & MCSR_DRB)
+			printk("Data Read PLB Error\n");
+		if (mcsr & MCSR_DWB)
+			printk("Data Write PLB Error\n");
+		if (mcsr & MCSR_TLBP)
+			printk("TLB Parity Error\n");
+		if (mcsr & MCSR_ICP){
+			flush_instruction_cache();
+			printk("I-Cache Parity Error\n");
+		}
+		if (mcsr & MCSR_DCSP)
+			printk("D-Cache Search Parity Error\n");
+		if (mcsr & MCSR_DCFP)
+			printk("D-Cache Flush Parity Error\n");
+		if (mcsr & MCSR_IMPE)
+			printk("Machine Check exception is imprecise\n");
+
+		/* Clear MCSR */
+		mtspr(SPRN_MCSR, mcsr);
+	}
+	return 0;
+}
+
+#ifdef CONFIG_PPC_47x
+int machine_check_47x(struct pt_regs *regs)
+{
+	unsigned long reason = regs->esr;
+	u32 mcsr;
+
+	printk(KERN_ERR "Machine check in kernel mode.\n");
+	if (reason & ESR_IMCP) {
+		printk(KERN_ERR "Instruction Synchronous Machine Check exception\n");
+		mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+		return 0;
+	}
+	mcsr = mfspr(SPRN_MCSR);
+	if (mcsr & MCSR_IB)
+		printk(KERN_ERR "Instruction Read PLB Error\n");
+	if (mcsr & MCSR_DRB)
+		printk(KERN_ERR "Data Read PLB Error\n");
+	if (mcsr & MCSR_DWB)
+		printk(KERN_ERR "Data Write PLB Error\n");
+	if (mcsr & MCSR_TLBP)
+		printk(KERN_ERR "TLB Parity Error\n");
+	if (mcsr & MCSR_ICP) {
+		flush_instruction_cache();
+		printk(KERN_ERR "I-Cache Parity Error\n");
+	}
+	if (mcsr & MCSR_DCSP)
+		printk(KERN_ERR "D-Cache Search Parity Error\n");
+	if (mcsr & PPC47x_MCSR_GPR)
+		printk(KERN_ERR "GPR Parity Error\n");
+	if (mcsr & PPC47x_MCSR_FPR)
+		printk(KERN_ERR "FPR Parity Error\n");
+	if (mcsr & PPC47x_MCSR_IPR)
+		printk(KERN_ERR "Machine Check exception is imprecise\n");
+
+	/* Clear MCSR */
+	mtspr(SPRN_MCSR, mcsr);
+
+	return 0;
+}
+#endif /* CONFIG_PPC_47x */
diff --git a/arch/powerpc/platforms/44x/misc_44x.S b/arch/powerpc/platforms/44x/misc_44x.S
new file mode 100644
index 0000000000..3a0c4bd3d6
--- /dev/null
+++ b/arch/powerpc/platforms/44x/misc_44x.S
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains miscellaneous low-level functions for PPC 44x.
+ *    Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ */
+
+#include <asm/reg.h>
+#include <asm/ppc_asm.h>
+
+	.text
+
+/*
+ * Do an IO access in AS1
+ */
+_GLOBAL(as1_readb)
+	mfmsr	r7
+	ori	r0,r7,MSR_DS
+	sync
+	mtmsr	r0
+	sync
+	isync
+	lbz	r3,0(r3)
+	sync
+	mtmsr	r7
+	sync
+	isync
+	blr
+
+_GLOBAL(as1_writeb)
+	mfmsr	r7
+	ori	r0,r7,MSR_DS
+	sync
+	mtmsr	r0
+	sync
+	isync
+	stb	r3,0(r4)
+	sync
+	mtmsr	r7
+	sync
+	isync
+	blr
diff --git a/arch/powerpc/platforms/44x/ppc44x_simple.c b/arch/powerpc/platforms/44x/ppc44x_simple.c
new file mode 100644
index 0000000000..971786ff1a
--- /dev/null
+++ b/arch/powerpc/platforms/44x/ppc44x_simple.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Generic PowerPC 44x platform support
+ *
+ * Copyright 2008 IBM Corporation
+ *
+ * This implements simple platform support for PowerPC 44x chips.  This is
+ * mostly used for eval boards or other simple and "generic" 44x boards.  If
+ * your board has custom functions or hardware, then you will likely want to
+ * implement your own board.c file to accommodate it.
+ */
+
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc4xx.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+#include <asm/uic.h>
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+
+static const struct of_device_id ppc44x_of_bus[] __initconst = {
+	{ .compatible = "ibm,plb4", },
+	{ .compatible = "ibm,opb", },
+	{ .compatible = "ibm,ebc", },
+	{ .compatible = "simple-bus", },
+	{},
+};
+
+static int __init ppc44x_device_probe(void)
+{
+	of_platform_bus_probe(NULL, ppc44x_of_bus, NULL);
+
+	return 0;
+}
+machine_device_initcall(ppc44x_simple, ppc44x_device_probe);
+
+/* This is the list of boards that can be supported by this simple
+ * platform code.  This does _not_ mean the boards are compatible,
+ * as they most certainly are not from a device tree perspective.
+ * However, their differences are handled by the device tree and the
+ * drivers and therefore they don't need custom board support files.
+ *
+ * Again, if your board needs to do things differently then create a
+ * board.c file for it rather than adding it to this list.
+ */
+static char *board[] __initdata = {
+	"amcc,arches",
+	"amcc,bamboo",
+	"apm,bluestone",
+	"amcc,glacier",
+	"ibm,ebony",
+	"amcc,eiger",
+	"amcc,katmai",
+	"amcc,rainier",
+	"amcc,redwood",
+	"amcc,sequoia",
+	"amcc,taishan",
+	"amcc,yosemite",
+	"mosaixtech,icon"
+};
+
+static int __init ppc44x_probe(void)
+{
+	int i = 0;
+
+	for (i = 0; i < ARRAY_SIZE(board); i++) {
+		if (of_machine_is_compatible(board[i])) {
+			pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+define_machine(ppc44x_simple) {
+	.name = "PowerPC 44x Platform",
+	.probe = ppc44x_probe,
+	.progress = udbg_progress,
+	.init_IRQ = uic_init_tree,
+	.get_irq = uic_get_irq,
+	.restart = ppc4xx_reset_system,
+};
diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c
new file mode 100644
index 0000000000..164cbcd458
--- /dev/null
+++ b/arch/powerpc/platforms/44x/ppc476.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerPC 476FPE board specific routines
+ *
+ * Copyright © 2013 Tony Breeds IBM Corporation
+ * Copyright © 2013 Alistair Popple IBM Corporation
+ *
+ * Based on earlier code:
+ *    Matt Porter <mporter@kernel.crashing.org>
+ *    Copyright 2002-2005 MontaVista Software Inc.
+ *
+ *    Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ *    Copyright (c) 2003-2005 Zultys Technologies
+ *
+ *    Rewritten and ported to the merged powerpc tree:
+ *    Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ *    Copyright © 2011 David Kliekamp IBM Corporation
+ */
+
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/rtc.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/ppc4xx.h>
+#include <asm/mpic.h>
+#include <asm/mmu.h>
+#include <asm/swiotlb.h>
+
+#include <linux/pci.h>
+#include <linux/i2c.h>
+
+static const struct of_device_id ppc47x_of_bus[] __initconst = {
+	{ .compatible = "ibm,plb4", },
+	{ .compatible = "ibm,plb6", },
+	{ .compatible = "ibm,opb", },
+	{ .compatible = "ibm,ebc", },
+	{},
+};
+
+/* The EEPROM is missing and the default values are bogus.  This forces USB in
+ * to EHCI mode */
+static void quirk_ppc_currituck_usb_fixup(struct pci_dev *dev)
+{
+	if (of_machine_is_compatible("ibm,currituck")) {
+		pci_write_config_dword(dev, 0xe0, 0x0114231f);
+		pci_write_config_dword(dev, 0xe4, 0x00006c40);
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(0x1033, 0x0035, quirk_ppc_currituck_usb_fixup);
+
+/* Akebono has an AVR microcontroller attached to the I2C bus
+ * which is used to power off/reset the system. */
+
+/* AVR I2C Commands */
+#define AVR_PWRCTL_CMD (0x26)
+
+/* Flags for the power control I2C commands */
+#define AVR_PWRCTL_PWROFF (0x01)
+#define AVR_PWRCTL_RESET (0x02)
+
+static struct i2c_client *avr_i2c_client;
+static void __noreturn avr_halt_system(int pwrctl_flags)
+{
+	/* Request the AVR to reset the system */
+	i2c_smbus_write_byte_data(avr_i2c_client,
+				  AVR_PWRCTL_CMD, pwrctl_flags);
+
+	/* Wait for system to be reset */
+	while (1)
+		;
+}
+
+static void avr_power_off_system(void)
+{
+	avr_halt_system(AVR_PWRCTL_PWROFF);
+}
+
+static void __noreturn avr_reset_system(char *cmd)
+{
+	avr_halt_system(AVR_PWRCTL_RESET);
+}
+
+static int avr_probe(struct i2c_client *client)
+{
+	avr_i2c_client = client;
+	ppc_md.restart = avr_reset_system;
+	pm_power_off = avr_power_off_system;
+	return 0;
+}
+
+static const struct i2c_device_id avr_id[] = {
+	{ "akebono-avr", 0 },
+	{ }
+};
+
+static struct i2c_driver avr_driver = {
+	.driver = {
+		.name = "akebono-avr",
+	},
+	.probe = avr_probe,
+	.id_table = avr_id,
+};
+
+static int __init ppc47x_device_probe(void)
+{
+	i2c_add_driver(&avr_driver);
+	of_platform_bus_probe(NULL, ppc47x_of_bus, NULL);
+
+	return 0;
+}
+machine_device_initcall(ppc47x_akebono, ppc47x_device_probe);
+machine_device_initcall(ppc47x_currituck, ppc47x_device_probe);
+
+static void __init ppc47x_init_irq(void)
+{
+	struct device_node *np;
+
+	/* Find top level interrupt controller */
+	for_each_node_with_property(np, "interrupt-controller") {
+		if (!of_property_present(np, "interrupts"))
+			break;
+	}
+	if (np == NULL)
+		panic("Can't find top level interrupt controller");
+
+	/* Check type and do appropriate initialization */
+	if (of_device_is_compatible(np, "chrp,open-pic")) {
+		/* The MPIC driver will get everything it needs from the
+		 * device-tree, just pass 0 to all arguments
+		 */
+		struct mpic *mpic =
+			mpic_alloc(np, 0, MPIC_NO_RESET, 0, 0, " MPIC     ");
+		BUG_ON(mpic == NULL);
+		mpic_init(mpic);
+		ppc_md.get_irq = mpic_get_irq;
+	} else
+		panic("Unrecognized top level interrupt controller");
+
+	of_node_put(np);
+}
+
+#ifdef CONFIG_SMP
+static void smp_ppc47x_setup_cpu(int cpu)
+{
+	mpic_setup_this_cpu();
+}
+
+static int smp_ppc47x_kick_cpu(int cpu)
+{
+	struct device_node *cpunode = of_get_cpu_node(cpu, NULL);
+	const u64 *spin_table_addr_prop;
+	u32 *spin_table;
+	extern void start_secondary_47x(void);
+
+	BUG_ON(cpunode == NULL);
+
+	/* Assume spin table. We could test for the enable-method in
+	 * the device-tree but currently there's little point as it's
+	 * our only supported method
+	 */
+	spin_table_addr_prop =
+		of_get_property(cpunode, "cpu-release-addr", NULL);
+
+	if (spin_table_addr_prop == NULL) {
+		pr_err("CPU%d: Can't start, missing cpu-release-addr !\n",
+		       cpu);
+		return 1;
+	}
+
+	/* Assume it's mapped as part of the linear mapping. This is a bit
+	 * fishy but will work fine for now
+	 *
+	 * XXX: Is there any reason to assume differently?
+	 */
+	spin_table = (u32 *)__va(*spin_table_addr_prop);
+	pr_debug("CPU%d: Spin table mapped at %p\n", cpu, spin_table);
+
+	spin_table[3] = cpu;
+	smp_wmb();
+	spin_table[1] = __pa(start_secondary_47x);
+	mb();
+
+	return 0;
+}
+
+static struct smp_ops_t ppc47x_smp_ops = {
+	.probe		= smp_mpic_probe,
+	.message_pass	= smp_mpic_message_pass,
+	.setup_cpu	= smp_ppc47x_setup_cpu,
+	.kick_cpu	= smp_ppc47x_kick_cpu,
+	.give_timebase	= smp_generic_give_timebase,
+	.take_timebase	= smp_generic_take_timebase,
+};
+
+static void __init ppc47x_smp_init(void)
+{
+	if (mmu_has_feature(MMU_FTR_TYPE_47x))
+		smp_ops = &ppc47x_smp_ops;
+}
+
+#else /* CONFIG_SMP */
+static void __init ppc47x_smp_init(void) { }
+#endif /* CONFIG_SMP */
+
+static void __init ppc47x_setup_arch(void)
+{
+
+	/* No need to check the DMA config as we /know/ our windows are all of
+	 * RAM.  Lets hope that doesn't change */
+	swiotlb_detect_4g();
+
+	ppc47x_smp_init();
+}
+
+static int board_rev = -1;
+static int __init ppc47x_get_board_rev(void)
+{
+	int reg;
+	u8 __iomem *fpga;
+	struct device_node *np = NULL;
+
+	if (of_machine_is_compatible("ibm,currituck")) {
+		np = of_find_compatible_node(NULL, NULL, "ibm,currituck-fpga");
+		reg = 0;
+	} else if (of_machine_is_compatible("ibm,akebono")) {
+		np = of_find_compatible_node(NULL, NULL, "ibm,akebono-fpga");
+		reg = 2;
+	}
+
+	if (!np)
+		goto fail;
+
+	fpga = of_iomap(np, 0);
+	of_node_put(np);
+	if (!fpga)
+		goto fail;
+
+	board_rev = ioread8(fpga + reg) & 0x03;
+	pr_info("%s: Found board revision %d\n", __func__, board_rev);
+	iounmap(fpga);
+	return 0;
+
+fail:
+	pr_info("%s: Unable to find board revision\n", __func__);
+	return 0;
+}
+machine_arch_initcall(ppc47x_akebono, ppc47x_get_board_rev);
+machine_arch_initcall(ppc47x_currituck, ppc47x_get_board_rev);
+
+/* Use USB controller should have been hardware swizzled but it wasn't :( */
+static void ppc47x_pci_irq_fixup(struct pci_dev *dev)
+{
+	if (dev->vendor == 0x1033 && (dev->device == 0x0035 ||
+				      dev->device == 0x00e0)) {
+		if (board_rev == 0) {
+			dev->irq = irq_create_mapping(NULL, 47);
+			pr_info("%s: Mapping irq %d\n", __func__, dev->irq);
+		} else if (board_rev == 2) {
+			dev->irq = irq_create_mapping(NULL, 49);
+			pr_info("%s: Mapping irq %d\n", __func__, dev->irq);
+		} else {
+			pr_alert("%s: Unknown board revision\n", __func__);
+		}
+	}
+}
+
+define_machine(ppc47x_akebono) {
+	.name			= "PowerPC 47x (akebono)",
+	.compatible		= "ibm,akebono",
+	.progress		= udbg_progress,
+	.init_IRQ		= ppc47x_init_irq,
+	.setup_arch		= ppc47x_setup_arch,
+	.restart		= ppc4xx_reset_system,
+};
+
+define_machine(ppc47x_currituck) {
+	.name			= "PowerPC 47x (currituck)",
+	.compatible		= "ibm,currituck",
+	.progress		= udbg_progress,
+	.init_IRQ		= ppc47x_init_irq,
+	.pci_irq_fixup		= ppc47x_pci_irq_fixup,
+	.setup_arch		= ppc47x_setup_arch,
+	.restart		= ppc4xx_reset_system,
+};
diff --git a/arch/powerpc/platforms/44x/ppc476_modules.lds b/arch/powerpc/platforms/44x/ppc476_modules.lds
new file mode 100644
index 0000000000..9fec5d34ba
--- /dev/null
+++ b/arch/powerpc/platforms/44x/ppc476_modules.lds
@@ -0,0 +1,15 @@
+SECTIONS
+{
+	.text : ALIGN(4096)
+	{
+		*(.text .text.* .fixup)
+	}
+	.init.text : ALIGN(4096)
+	{
+		*(.init.text .init.text.*)
+	}
+	.exit.text : ALIGN(4096)
+	{
+		*(.exit.text .exit.text.*)
+	}
+}
diff --git a/arch/powerpc/platforms/44x/sam440ep.c b/arch/powerpc/platforms/44x/sam440ep.c
new file mode 100644
index 0000000000..5cdaa4068e
--- /dev/null
+++ b/arch/powerpc/platforms/44x/sam440ep.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Sam440ep board specific routines based off bamboo.c code
+ * original copyrights below
+ *
+ * Wade Farnsworth <wfarnsworth@mvista.com>
+ * Copyright 2004 MontaVista Software Inc.
+ *
+ * Rewritten and ported to the merged powerpc tree:
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ * Copyright 2007 IBM Corporation
+ *
+ * Modified from bamboo.c for sam440ep:
+ * Copyright 2008 Giuseppe Coviello <gicoviello@gmail.com>
+ */
+#include <linux/init.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc4xx.h>
+#include <linux/i2c.h>
+
+static const struct of_device_id sam440ep_of_bus[] __initconst = {
+	{ .compatible = "ibm,plb4", },
+	{ .compatible = "ibm,opb", },
+	{ .compatible = "ibm,ebc", },
+	{},
+};
+
+static int __init sam440ep_device_probe(void)
+{
+	of_platform_bus_probe(NULL, sam440ep_of_bus, NULL);
+
+	return 0;
+}
+machine_device_initcall(sam440ep, sam440ep_device_probe);
+
+static int __init sam440ep_probe(void)
+{
+	pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+
+	return 1;
+}
+
+define_machine(sam440ep) {
+	.name 			= "Sam440ep",
+	.compatible		= "acube,sam440ep",
+	.probe 			= sam440ep_probe,
+	.progress 		= udbg_progress,
+	.init_IRQ 		= uic_init_tree,
+	.get_irq 		= uic_get_irq,
+	.restart		= ppc4xx_reset_system,
+};
+
+static struct i2c_board_info sam440ep_rtc_info = {
+	.type = "m41st85",
+	.addr = 0x68,
+	.irq = -1,
+};
+
+static int __init sam440ep_setup_rtc(void)
+{
+	return i2c_register_board_info(0, &sam440ep_rtc_info, 1);
+}
+machine_device_initcall(sam440ep, sam440ep_setup_rtc);
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
new file mode 100644
index 0000000000..bf0188dcb9
--- /dev/null
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PIKA Warp(tm) board specific routines
+ *
+ * Copyright (c) 2008-2009 PIKA Technologies
+ *   Sean MacLennan <smaclennan@pikatech.com>
+ */
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <linux/kthread.h>
+#include <linux/leds.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/gpio/consumer.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/ppc4xx.h>
+#include <asm/dma.h>
+
+
+static const struct of_device_id warp_of_bus[] __initconst = {
+	{ .compatible = "ibm,plb4", },
+	{ .compatible = "ibm,opb", },
+	{ .compatible = "ibm,ebc", },
+	{},
+};
+
+static int __init warp_device_probe(void)
+{
+	of_platform_bus_probe(NULL, warp_of_bus, NULL);
+	return 0;
+}
+machine_device_initcall(warp, warp_device_probe);
+
+define_machine(warp) {
+	.name		= "Warp",
+	.compatible	= "pika,warp",
+	.progress 	= udbg_progress,
+	.init_IRQ 	= uic_init_tree,
+	.get_irq 	= uic_get_irq,
+	.restart	= ppc4xx_reset_system,
+};
+
+
+static int __init warp_post_info(void)
+{
+	struct device_node *np;
+	void __iomem *fpga;
+	u32 post1, post2;
+
+	/* Sighhhh... POST information is in the sd area. */
+	np = of_find_compatible_node(NULL, NULL, "pika,fpga-sd");
+	if (np == NULL)
+		return -ENOENT;
+
+	fpga = of_iomap(np, 0);
+	of_node_put(np);
+	if (fpga == NULL)
+		return -ENOENT;
+
+	post1 = in_be32(fpga + 0x40);
+	post2 = in_be32(fpga + 0x44);
+
+	iounmap(fpga);
+
+	if (post1 || post2)
+		printk(KERN_INFO "Warp POST %08x %08x\n", post1, post2);
+	else
+		printk(KERN_INFO "Warp POST OK\n");
+
+	return 0;
+}
+
+
+#ifdef CONFIG_SENSORS_AD7414
+
+static void __iomem *dtm_fpga;
+
+#define WARP_GREEN_LED	0
+#define WARP_RED_LED	1
+
+static struct gpio_led warp_gpio_led_pins[] = {
+	[WARP_GREEN_LED] = {
+		.name		= "green",
+		.default_state	= LEDS_DEFSTATE_KEEP,
+		.gpiod		= NULL, /* to be filled by pika_setup_leds() */
+	},
+	[WARP_RED_LED] = {
+		.name		= "red",
+		.default_state	= LEDS_DEFSTATE_KEEP,
+		.gpiod		= NULL, /* to be filled by pika_setup_leds() */
+	},
+};
+
+static struct gpio_led_platform_data warp_gpio_led_data = {
+	.leds		= warp_gpio_led_pins,
+	.num_leds	= ARRAY_SIZE(warp_gpio_led_pins),
+};
+
+static struct platform_device warp_gpio_leds = {
+	.name	= "leds-gpio",
+	.id	= -1,
+	.dev	= {
+		.platform_data = &warp_gpio_led_data,
+	},
+};
+
+static irqreturn_t temp_isr(int irq, void *context)
+{
+	int value = 1;
+
+	local_irq_disable();
+
+	gpiod_set_value(warp_gpio_led_pins[WARP_GREEN_LED].gpiod, 0);
+
+	printk(KERN_EMERG "\n\nCritical Temperature Shutdown\n\n");
+
+	while (1) {
+		if (dtm_fpga) {
+			unsigned reset = in_be32(dtm_fpga + 0x14);
+			out_be32(dtm_fpga + 0x14, reset);
+		}
+
+		gpiod_set_value(warp_gpio_led_pins[WARP_RED_LED].gpiod, value);
+		value ^= 1;
+		mdelay(500);
+	}
+
+	/* Not reached */
+	return IRQ_HANDLED;
+}
+
+/*
+ * Because green and red power LEDs are normally driven by leds-gpio driver,
+ * but in case of critical temperature shutdown we want to drive them
+ * ourselves, we acquire both and then create leds-gpio platform device
+ * ourselves, instead of doing it through device tree. This way we can still
+ * keep access to the gpios and use them when needed.
+ */
+static int pika_setup_leds(void)
+{
+	struct device_node *np, *child;
+	struct gpio_desc *gpio;
+	struct gpio_led *led;
+	int led_count = 0;
+	int error;
+	int i;
+
+	np = of_find_compatible_node(NULL, NULL, "warp-power-leds");
+	if (!np) {
+		printk(KERN_ERR __FILE__ ": Unable to find leds\n");
+		return -ENOENT;
+	}
+
+	for_each_child_of_node(np, child) {
+		for (i = 0; i < ARRAY_SIZE(warp_gpio_led_pins); i++) {
+			led = &warp_gpio_led_pins[i];
+
+			if (!of_node_name_eq(child, led->name))
+				continue;
+
+			if (led->gpiod) {
+				printk(KERN_ERR __FILE__ ": %s led has already been defined\n",
+				       led->name);
+				continue;
+			}
+
+			gpio = fwnode_gpiod_get_index(of_fwnode_handle(child),
+						      NULL, 0, GPIOD_ASIS,
+						      led->name);
+			error = PTR_ERR_OR_ZERO(gpio);
+			if (error) {
+				printk(KERN_ERR __FILE__ ": Failed to get %s led gpio: %d\n",
+				       led->name, error);
+				of_node_put(child);
+				goto err_cleanup_pins;
+			}
+
+			led->gpiod = gpio;
+			led_count++;
+		}
+	}
+
+	of_node_put(np);
+
+	/* Skip device registration if no leds have been defined */
+	if (led_count) {
+		error = platform_device_register(&warp_gpio_leds);
+		if (error) {
+			printk(KERN_ERR __FILE__ ": Unable to add leds-gpio: %d\n",
+			       error);
+			goto err_cleanup_pins;
+		}
+	}
+
+	return 0;
+
+err_cleanup_pins:
+	for (i = 0; i < ARRAY_SIZE(warp_gpio_led_pins); i++) {
+		led = &warp_gpio_led_pins[i];
+		gpiod_put(led->gpiod);
+		led->gpiod = NULL;
+	}
+	return error;
+}
+
+static void pika_setup_critical_temp(struct device_node *np,
+				     struct i2c_client *client)
+{
+	int irq, rc;
+
+	/* Do this before enabling critical temp interrupt since we
+	 * may immediately interrupt.
+	 */
+	pika_setup_leds();
+
+	/* These registers are in 1 degree increments. */
+	i2c_smbus_write_byte_data(client, 2, 65); /* Thigh */
+	i2c_smbus_write_byte_data(client, 3,  0); /* Tlow */
+
+	irq = irq_of_parse_and_map(np, 0);
+	if (!irq) {
+		printk(KERN_ERR __FILE__ ": Unable to get ad7414 irq\n");
+		return;
+	}
+
+	rc = request_irq(irq, temp_isr, 0, "ad7414", NULL);
+	if (rc) {
+		printk(KERN_ERR __FILE__
+		       ": Unable to request ad7414 irq %d = %d\n", irq, rc);
+		return;
+	}
+}
+
+static inline void pika_dtm_check_fan(void __iomem *fpga)
+{
+	static int fan_state;
+	u32 fan = in_be32(fpga + 0x34) & (1 << 14);
+
+	if (fan_state != fan) {
+		fan_state = fan;
+		if (fan)
+			printk(KERN_WARNING "Fan rotation error detected."
+				   " Please check hardware.\n");
+	}
+}
+
+static int pika_dtm_thread(void __iomem *fpga)
+{
+	struct device_node *np;
+	struct i2c_client *client;
+
+	np = of_find_compatible_node(NULL, NULL, "adi,ad7414");
+	if (np == NULL)
+		return -ENOENT;
+
+	client = of_find_i2c_device_by_node(np);
+	if (client == NULL) {
+		of_node_put(np);
+		return -ENOENT;
+	}
+
+	pika_setup_critical_temp(np, client);
+
+	of_node_put(np);
+
+	printk(KERN_INFO "Warp DTM thread running.\n");
+
+	while (!kthread_should_stop()) {
+		int val;
+
+		val = i2c_smbus_read_word_data(client, 0);
+		if (val < 0)
+			dev_dbg(&client->dev, "DTM read temp failed.\n");
+		else {
+			s16 temp = swab16(val);
+			out_be32(fpga + 0x20, temp);
+		}
+
+		pika_dtm_check_fan(fpga);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(HZ);
+	}
+
+	return 0;
+}
+
+static int __init pika_dtm_start(void)
+{
+	struct task_struct *dtm_thread;
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "pika,fpga");
+	if (np == NULL)
+		return -ENOENT;
+
+	dtm_fpga = of_iomap(np, 0);
+	of_node_put(np);
+	if (dtm_fpga == NULL)
+		return -ENOENT;
+
+	/* Must get post info before thread starts. */
+	warp_post_info();
+
+	dtm_thread = kthread_run(pika_dtm_thread, dtm_fpga, "pika-dtm");
+	if (IS_ERR(dtm_thread)) {
+		iounmap(dtm_fpga);
+		return PTR_ERR(dtm_thread);
+	}
+
+	return 0;
+}
+machine_late_initcall(warp, pika_dtm_start);
+
+#else /* !CONFIG_SENSORS_AD7414 */
+
+machine_late_initcall(warp, warp_post_info);
+
+#endif
diff --git a/arch/powerpc/platforms/4xx/Makefile b/arch/powerpc/platforms/4xx/Makefile
new file mode 100644
index 0000000000..2071a0abe0
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-y				+= uic.o machine_check.o
+obj-$(CONFIG_4xx_SOC)		+= soc.o
+obj-$(CONFIG_PCI)		+= pci.o
+obj-$(CONFIG_PPC4xx_HSTA_MSI)	+= hsta_msi.o
+obj-$(CONFIG_PPC4xx_CPM)	+= cpm.o
+obj-$(CONFIG_PPC4xx_GPIO)	+= gpio.o
diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/4xx/cpm.c
new file mode 100644
index 0000000000..670f8ad446
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/cpm.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerPC 4xx Clock and Power Management
+ *
+ * Copyright (C) 2010, Applied Micro Circuits Corporation
+ * Victor Gallardo (vgallardo@apm.com)
+ *
+ * Based on arch/powerpc/platforms/44x/idle.c:
+ * Jerone Young <jyoung5@us.ibm.com>
+ * Copyright 2008 IBM Corp.
+ *
+ * Based on arch/powerpc/sysdev/fsl_pmc.c:
+ * Anton Vorontsov <avorontsov@ru.mvista.com>
+ * Copyright 2009  MontaVista Software, Inc.
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/sysfs.h>
+#include <linux/cpu.h>
+#include <linux/suspend.h>
+#include <asm/dcr.h>
+#include <asm/dcr-native.h>
+#include <asm/machdep.h>
+
+#define CPM_ER	0
+#define CPM_FR	1
+#define CPM_SR	2
+
+#define CPM_IDLE_WAIT	0
+#define CPM_IDLE_DOZE	1
+
+struct cpm {
+	dcr_host_t	dcr_host;
+	unsigned int	dcr_offset[3];
+	unsigned int	powersave_off;
+	unsigned int	unused;
+	unsigned int	idle_doze;
+	unsigned int	standby;
+	unsigned int	suspend;
+};
+
+static struct cpm cpm;
+
+struct cpm_idle_mode {
+	unsigned int enabled;
+	const char  *name;
+};
+
+static struct cpm_idle_mode idle_mode[] = {
+	[CPM_IDLE_WAIT] = { 1, "wait" }, /* default */
+	[CPM_IDLE_DOZE] = { 0, "doze" },
+};
+
+static unsigned int cpm_set(unsigned int cpm_reg, unsigned int mask)
+{
+	unsigned int value;
+
+	/* CPM controller supports 3 different types of sleep interface
+	 * known as class 1, 2 and 3. For class 1 units, they are
+	 * unconditionally put to sleep when the corresponding CPM bit is
+	 * set. For class 2 and 3 units this is not case; if they can be
+	 * put to sleep, they will. Here we do not verify, we just
+	 * set them and expect them to eventually go off when they can.
+	 */
+	value = dcr_read(cpm.dcr_host, cpm.dcr_offset[cpm_reg]);
+	dcr_write(cpm.dcr_host, cpm.dcr_offset[cpm_reg], value | mask);
+
+	/* return old state, to restore later if needed */
+	return value;
+}
+
+static void cpm_idle_wait(void)
+{
+	unsigned long msr_save;
+
+	/* save off initial state */
+	msr_save = mfmsr();
+	/* sync required when CPM0_ER[CPU] is set */
+	mb();
+	/* set wait state MSR */
+	mtmsr(msr_save|MSR_WE|MSR_EE|MSR_CE|MSR_DE);
+	isync();
+	/* return to initial state */
+	mtmsr(msr_save);
+	isync();
+}
+
+static void cpm_idle_sleep(unsigned int mask)
+{
+	unsigned int er_save;
+
+	/* update CPM_ER state */
+	er_save = cpm_set(CPM_ER, mask);
+
+	/* go to wait state so that CPM0_ER[CPU] can take effect */
+	cpm_idle_wait();
+
+	/* restore CPM_ER state */
+	dcr_write(cpm.dcr_host, cpm.dcr_offset[CPM_ER], er_save);
+}
+
+static void cpm_idle_doze(void)
+{
+	cpm_idle_sleep(cpm.idle_doze);
+}
+
+static void cpm_idle_config(int mode)
+{
+	int i;
+
+	if (idle_mode[mode].enabled)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(idle_mode); i++)
+		idle_mode[i].enabled = 0;
+
+	idle_mode[mode].enabled = 1;
+}
+
+static ssize_t cpm_idle_show(struct kobject *kobj,
+			     struct kobj_attribute *attr, char *buf)
+{
+	char *s = buf;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(idle_mode); i++) {
+		if (idle_mode[i].enabled)
+			s += sprintf(s, "[%s] ", idle_mode[i].name);
+		else
+			s += sprintf(s, "%s ", idle_mode[i].name);
+	}
+
+	*(s-1) = '\n'; /* convert the last space to a newline */
+
+	return s - buf;
+}
+
+static ssize_t cpm_idle_store(struct kobject *kobj,
+			      struct kobj_attribute *attr,
+			      const char *buf, size_t n)
+{
+	int i;
+	char *p;
+	int len;
+
+	p = memchr(buf, '\n', n);
+	len = p ? p - buf : n;
+
+	for (i = 0; i < ARRAY_SIZE(idle_mode); i++) {
+		if (strncmp(buf, idle_mode[i].name, len) == 0) {
+			cpm_idle_config(i);
+			return n;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static struct kobj_attribute cpm_idle_attr =
+	__ATTR(idle, 0644, cpm_idle_show, cpm_idle_store);
+
+static void __init cpm_idle_config_sysfs(void)
+{
+	struct device *dev;
+	unsigned long ret;
+
+	dev = get_cpu_device(0);
+
+	ret = sysfs_create_file(&dev->kobj,
+				&cpm_idle_attr.attr);
+	if (ret)
+		printk(KERN_WARNING
+		       "cpm: failed to create idle sysfs entry\n");
+}
+
+static void cpm_idle(void)
+{
+	if (idle_mode[CPM_IDLE_DOZE].enabled)
+		cpm_idle_doze();
+	else
+		cpm_idle_wait();
+}
+
+static int cpm_suspend_valid(suspend_state_t state)
+{
+	switch (state) {
+	case PM_SUSPEND_STANDBY:
+		return !!cpm.standby;
+	case PM_SUSPEND_MEM:
+		return !!cpm.suspend;
+	default:
+		return 0;
+	}
+}
+
+static void cpm_suspend_standby(unsigned int mask)
+{
+	unsigned long tcr_save;
+
+	/* disable decrement interrupt */
+	tcr_save = mfspr(SPRN_TCR);
+	mtspr(SPRN_TCR, tcr_save & ~TCR_DIE);
+
+	/* go to sleep state */
+	cpm_idle_sleep(mask);
+
+	/* restore decrement interrupt */
+	mtspr(SPRN_TCR, tcr_save);
+}
+
+static int cpm_suspend_enter(suspend_state_t state)
+{
+	switch (state) {
+	case PM_SUSPEND_STANDBY:
+		cpm_suspend_standby(cpm.standby);
+		break;
+	case PM_SUSPEND_MEM:
+		cpm_suspend_standby(cpm.suspend);
+		break;
+	}
+
+	return 0;
+}
+
+static const struct platform_suspend_ops cpm_suspend_ops = {
+	.valid		= cpm_suspend_valid,
+	.enter		= cpm_suspend_enter,
+};
+
+static int __init cpm_get_uint_property(struct device_node *np,
+				 const char *name)
+{
+	int len;
+	const unsigned int *prop = of_get_property(np, name, &len);
+
+	if (prop == NULL || len < sizeof(u32))
+		return 0;
+
+	return *prop;
+}
+
+static int __init cpm_init(void)
+{
+	struct device_node *np;
+	int dcr_base, dcr_len;
+	int ret = 0;
+
+	if (!cpm.powersave_off) {
+		cpm_idle_config(CPM_IDLE_WAIT);
+		ppc_md.power_save = &cpm_idle;
+	}
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,cpm");
+	if (!np) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	dcr_base = dcr_resource_start(np, 0);
+	dcr_len = dcr_resource_len(np, 0);
+
+	if (dcr_base == 0 || dcr_len == 0) {
+		printk(KERN_ERR "cpm: could not parse dcr property for %pOF\n",
+		       np);
+		ret = -EINVAL;
+		goto node_put;
+	}
+
+	cpm.dcr_host = dcr_map(np, dcr_base, dcr_len);
+
+	if (!DCR_MAP_OK(cpm.dcr_host)) {
+		printk(KERN_ERR "cpm: failed to map dcr property for %pOF\n",
+		       np);
+		ret = -EINVAL;
+		goto node_put;
+	}
+
+	/* All 4xx SoCs with a CPM controller have one of two
+	 * different order for the CPM registers. Some have the
+	 * CPM registers in the following order (ER,FR,SR). The
+	 * others have them in the following order (SR,ER,FR).
+	 */
+
+	if (cpm_get_uint_property(np, "er-offset") == 0) {
+		cpm.dcr_offset[CPM_ER] = 0;
+		cpm.dcr_offset[CPM_FR] = 1;
+		cpm.dcr_offset[CPM_SR] = 2;
+	} else {
+		cpm.dcr_offset[CPM_ER] = 1;
+		cpm.dcr_offset[CPM_FR] = 2;
+		cpm.dcr_offset[CPM_SR] = 0;
+	}
+
+	/* Now let's see what IPs to turn off for the following modes */
+
+	cpm.unused = cpm_get_uint_property(np, "unused-units");
+	cpm.idle_doze = cpm_get_uint_property(np, "idle-doze");
+	cpm.standby = cpm_get_uint_property(np, "standby");
+	cpm.suspend = cpm_get_uint_property(np, "suspend");
+
+	/* If some IPs are unused let's turn them off now */
+
+	if (cpm.unused) {
+		cpm_set(CPM_ER, cpm.unused);
+		cpm_set(CPM_FR, cpm.unused);
+	}
+
+	/* Now let's export interfaces */
+
+	if (!cpm.powersave_off && cpm.idle_doze)
+		cpm_idle_config_sysfs();
+
+	if (cpm.standby || cpm.suspend)
+		suspend_set_ops(&cpm_suspend_ops);
+node_put:
+	of_node_put(np);
+out:
+	return ret;
+}
+
+late_initcall(cpm_init);
+
+static int __init cpm_powersave_off(char *arg)
+{
+	cpm.powersave_off = 1;
+	return 1;
+}
+__setup("powersave=off", cpm_powersave_off);
diff --git a/arch/powerpc/platforms/4xx/gpio.c b/arch/powerpc/platforms/4xx/gpio.c
new file mode 100644
index 0000000000..e5f2319e5c
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/gpio.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PPC4xx gpio driver
+ *
+ * Copyright (c) 2008 Harris Corporation
+ * Copyright (c) 2008 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
+ * Copyright (c) MontaVista Software, Inc. 2008.
+ *
+ * Author: Steve Falco <sfalco@harris.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/gpio/legacy-of-mm-gpiochip.h>
+#include <linux/gpio/driver.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#define GPIO_MASK(gpio)		(0x80000000 >> (gpio))
+#define GPIO_MASK2(gpio)	(0xc0000000 >> ((gpio) * 2))
+
+/* Physical GPIO register layout */
+struct ppc4xx_gpio {
+	__be32 or;
+	__be32 tcr;
+	__be32 osrl;
+	__be32 osrh;
+	__be32 tsrl;
+	__be32 tsrh;
+	__be32 odr;
+	__be32 ir;
+	__be32 rr1;
+	__be32 rr2;
+	__be32 rr3;
+	__be32 reserved1;
+	__be32 isr1l;
+	__be32 isr1h;
+	__be32 isr2l;
+	__be32 isr2h;
+	__be32 isr3l;
+	__be32 isr3h;
+};
+
+struct ppc4xx_gpio_chip {
+	struct of_mm_gpio_chip mm_gc;
+	spinlock_t lock;
+};
+
+/*
+ * GPIO LIB API implementation for GPIOs
+ *
+ * There are a maximum of 32 gpios in each gpio controller.
+ */
+
+static int ppc4xx_gpio_get(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
+
+	return !!(in_be32(&regs->ir) & GPIO_MASK(gpio));
+}
+
+static inline void
+__ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
+
+	if (val)
+		setbits32(&regs->or, GPIO_MASK(gpio));
+	else
+		clrbits32(&regs->or, GPIO_MASK(gpio));
+}
+
+static void
+ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+	struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chip->lock, flags);
+
+	__ppc4xx_gpio_set(gc, gpio, val);
+
+	spin_unlock_irqrestore(&chip->lock, flags);
+
+	pr_debug("%s: gpio: %d val: %d\n", __func__, gpio, val);
+}
+
+static int ppc4xx_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
+	struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chip->lock, flags);
+
+	/* Disable open-drain function */
+	clrbits32(&regs->odr, GPIO_MASK(gpio));
+
+	/* Float the pin */
+	clrbits32(&regs->tcr, GPIO_MASK(gpio));
+
+	/* Bits 0-15 use TSRL/OSRL, bits 16-31 use TSRH/OSRH */
+	if (gpio < 16) {
+		clrbits32(&regs->osrl, GPIO_MASK2(gpio));
+		clrbits32(&regs->tsrl, GPIO_MASK2(gpio));
+	} else {
+		clrbits32(&regs->osrh, GPIO_MASK2(gpio));
+		clrbits32(&regs->tsrh, GPIO_MASK2(gpio));
+	}
+
+	spin_unlock_irqrestore(&chip->lock, flags);
+
+	return 0;
+}
+
+static int
+ppc4xx_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
+	struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chip->lock, flags);
+
+	/* First set initial value */
+	__ppc4xx_gpio_set(gc, gpio, val);
+
+	/* Disable open-drain function */
+	clrbits32(&regs->odr, GPIO_MASK(gpio));
+
+	/* Drive the pin */
+	setbits32(&regs->tcr, GPIO_MASK(gpio));
+
+	/* Bits 0-15 use TSRL, bits 16-31 use TSRH */
+	if (gpio < 16) {
+		clrbits32(&regs->osrl, GPIO_MASK2(gpio));
+		clrbits32(&regs->tsrl, GPIO_MASK2(gpio));
+	} else {
+		clrbits32(&regs->osrh, GPIO_MASK2(gpio));
+		clrbits32(&regs->tsrh, GPIO_MASK2(gpio));
+	}
+
+	spin_unlock_irqrestore(&chip->lock, flags);
+
+	pr_debug("%s: gpio: %d val: %d\n", __func__, gpio, val);
+
+	return 0;
+}
+
+static int __init ppc4xx_add_gpiochips(void)
+{
+	struct device_node *np;
+
+	for_each_compatible_node(np, NULL, "ibm,ppc4xx-gpio") {
+		int ret;
+		struct ppc4xx_gpio_chip *ppc4xx_gc;
+		struct of_mm_gpio_chip *mm_gc;
+		struct gpio_chip *gc;
+
+		ppc4xx_gc = kzalloc(sizeof(*ppc4xx_gc), GFP_KERNEL);
+		if (!ppc4xx_gc) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		spin_lock_init(&ppc4xx_gc->lock);
+
+		mm_gc = &ppc4xx_gc->mm_gc;
+		gc = &mm_gc->gc;
+
+		gc->ngpio = 32;
+		gc->direction_input = ppc4xx_gpio_dir_in;
+		gc->direction_output = ppc4xx_gpio_dir_out;
+		gc->get = ppc4xx_gpio_get;
+		gc->set = ppc4xx_gpio_set;
+
+		ret = of_mm_gpiochip_add_data(np, mm_gc, ppc4xx_gc);
+		if (ret)
+			goto err;
+		continue;
+err:
+		pr_err("%pOF: registration failed with status %d\n", np, ret);
+		kfree(ppc4xx_gc);
+		/* try others anyway */
+	}
+	return 0;
+}
+arch_initcall(ppc4xx_add_gpiochips);
diff --git a/arch/powerpc/platforms/4xx/hsta_msi.c b/arch/powerpc/platforms/4xx/hsta_msi.c
new file mode 100644
index 0000000000..c6bd846b0d
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/hsta_msi.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MSI support for PPC4xx SoCs using High Speed Transfer Assist (HSTA) for
+ * generation of the interrupt.
+ *
+ * Copyright © 2013 Alistair Popple <alistair@popple.id.au> IBM Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/pci.h>
+#include <linux/semaphore.h>
+#include <asm/msi_bitmap.h>
+#include <asm/ppc-pci.h>
+
+struct ppc4xx_hsta_msi {
+	struct device *dev;
+
+	/* The ioremapped HSTA MSI IO space */
+	u32 __iomem *data;
+
+	/* Physical address of HSTA MSI IO space */
+	u64 address;
+	struct msi_bitmap bmp;
+
+	/* An array mapping offsets to hardware IRQs */
+	int *irq_map;
+
+	/* Number of hwirqs supported */
+	int irq_count;
+};
+static struct ppc4xx_hsta_msi ppc4xx_hsta_msi;
+
+static int hsta_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	struct msi_msg msg;
+	struct msi_desc *entry;
+	int irq, hwirq;
+	u64 addr;
+
+	/* We don't support MSI-X */
+	if (type == PCI_CAP_ID_MSIX) {
+		pr_debug("%s: MSI-X not supported.\n", __func__);
+		return -EINVAL;
+	}
+
+	msi_for_each_desc(entry, &dev->dev, MSI_DESC_NOTASSOCIATED) {
+		irq = msi_bitmap_alloc_hwirqs(&ppc4xx_hsta_msi.bmp, 1);
+		if (irq < 0) {
+			pr_debug("%s: Failed to allocate msi interrupt\n",
+				 __func__);
+			return irq;
+		}
+
+		hwirq = ppc4xx_hsta_msi.irq_map[irq];
+		if (!hwirq) {
+			pr_err("%s: Failed mapping irq %d\n", __func__, irq);
+			return -EINVAL;
+		}
+
+		/*
+		 * HSTA generates interrupts on writes to 128-bit aligned
+		 * addresses.
+		 */
+		addr = ppc4xx_hsta_msi.address + irq*0x10;
+		msg.address_hi = upper_32_bits(addr);
+		msg.address_lo = lower_32_bits(addr);
+
+		/* Data is not used by the HSTA. */
+		msg.data = 0;
+
+		pr_debug("%s: Setup irq %d (0x%0llx)\n", __func__, hwirq,
+			 (((u64) msg.address_hi) << 32) | msg.address_lo);
+
+		if (irq_set_msi_desc(hwirq, entry)) {
+			pr_err(
+			"%s: Invalid hwirq %d specified in device tree\n",
+			__func__, hwirq);
+			msi_bitmap_free_hwirqs(&ppc4xx_hsta_msi.bmp, irq, 1);
+			return -EINVAL;
+		}
+		pci_write_msi_msg(hwirq, &msg);
+	}
+
+	return 0;
+}
+
+static int hsta_find_hwirq_offset(int hwirq)
+{
+	int irq;
+
+	/* Find the offset given the hwirq */
+	for (irq = 0; irq < ppc4xx_hsta_msi.irq_count; irq++)
+		if (ppc4xx_hsta_msi.irq_map[irq] == hwirq)
+			return irq;
+
+	return -EINVAL;
+}
+
+static void hsta_teardown_msi_irqs(struct pci_dev *dev)
+{
+	struct msi_desc *entry;
+	int irq;
+
+	msi_for_each_desc(entry, &dev->dev, MSI_DESC_ASSOCIATED) {
+		irq = hsta_find_hwirq_offset(entry->irq);
+
+		/* entry->irq should always be in irq_map */
+		BUG_ON(irq < 0);
+		irq_set_msi_desc(entry->irq, NULL);
+		msi_bitmap_free_hwirqs(&ppc4xx_hsta_msi.bmp, irq, 1);
+		pr_debug("%s: Teardown IRQ %u (index %u)\n", __func__,
+			 entry->irq, irq);
+		entry->irq = 0;
+	}
+}
+
+static int hsta_msi_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *mem;
+	int irq, ret, irq_count;
+	struct pci_controller *phb;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!mem) {
+		dev_err(dev, "Unable to get mmio space\n");
+		return -EINVAL;
+	}
+
+	irq_count = of_irq_count(dev->of_node);
+	if (!irq_count) {
+		dev_err(dev, "Unable to find IRQ range\n");
+		return -EINVAL;
+	}
+
+	ppc4xx_hsta_msi.dev = dev;
+	ppc4xx_hsta_msi.address = mem->start;
+	ppc4xx_hsta_msi.data = ioremap(mem->start, resource_size(mem));
+	ppc4xx_hsta_msi.irq_count = irq_count;
+	if (!ppc4xx_hsta_msi.data) {
+		dev_err(dev, "Unable to map memory\n");
+		return -ENOMEM;
+	}
+
+	ret = msi_bitmap_alloc(&ppc4xx_hsta_msi.bmp, irq_count, dev->of_node);
+	if (ret)
+		goto out;
+
+	ppc4xx_hsta_msi.irq_map = kmalloc_array(irq_count, sizeof(int),
+						GFP_KERNEL);
+	if (!ppc4xx_hsta_msi.irq_map) {
+		ret = -ENOMEM;
+		goto out1;
+	}
+
+	/* Setup a mapping from irq offsets to hardware irq numbers */
+	for (irq = 0; irq < irq_count; irq++) {
+		ppc4xx_hsta_msi.irq_map[irq] =
+			irq_of_parse_and_map(dev->of_node, irq);
+		if (!ppc4xx_hsta_msi.irq_map[irq]) {
+			dev_err(dev, "Unable to map IRQ\n");
+			ret = -EINVAL;
+			goto out2;
+		}
+	}
+
+	list_for_each_entry(phb, &hose_list, list_node) {
+		phb->controller_ops.setup_msi_irqs = hsta_setup_msi_irqs;
+		phb->controller_ops.teardown_msi_irqs = hsta_teardown_msi_irqs;
+	}
+	return 0;
+
+out2:
+	kfree(ppc4xx_hsta_msi.irq_map);
+
+out1:
+	msi_bitmap_free(&ppc4xx_hsta_msi.bmp);
+
+out:
+	iounmap(ppc4xx_hsta_msi.data);
+	return ret;
+}
+
+static const struct of_device_id hsta_msi_ids[] = {
+	{
+		.compatible = "ibm,hsta-msi",
+	},
+	{}
+};
+
+static struct platform_driver hsta_msi_driver = {
+	.probe = hsta_msi_probe,
+	.driver = {
+		.name = "hsta-msi",
+		.of_match_table = hsta_msi_ids,
+	},
+};
+
+static int hsta_msi_init(void)
+{
+	return platform_driver_register(&hsta_msi_driver);
+}
+subsys_initcall(hsta_msi_init);
diff --git a/arch/powerpc/platforms/4xx/machine_check.c b/arch/powerpc/platforms/4xx/machine_check.c
new file mode 100644
index 0000000000..a905da1d6f
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/machine_check.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ */
+
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/ptrace.h>
+
+#include <asm/reg.h>
+
+int machine_check_4xx(struct pt_regs *regs)
+{
+	unsigned long reason = regs->esr;
+
+	if (reason & ESR_IMCP) {
+		printk("Instruction");
+		mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+	} else
+		printk("Data");
+	printk(" machine check in kernel mode.\n");
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/4xx/pci.c b/arch/powerpc/platforms/4xx/pci.c
new file mode 100644
index 0000000000..48626615b1
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/pci.c
@@ -0,0 +1,2182 @@
+/*
+ * PCI / PCI-X / PCI-Express support for 4xx parts
+ *
+ * Copyright 2007 Ben. Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
+ *
+ * Most PCI Express code is coming from Stefan Roese implementation for
+ * arch/ppc in the Denx tree, slightly reworked by me.
+ *
+ * Copyright 2007 DENX Software Engineering, Stefan Roese <sr@denx.de>
+ *
+ * Some of that comes itself from a previous implementation for 440SPE only
+ * by Roland Dreier:
+ *
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Roland Dreier <rolandd@cisco.com>
+ *
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include <mm/mmu_decl.h>
+
+#include "pci.h"
+
+static int dma_offset_set;
+
+#define U64_TO_U32_LOW(val)	((u32)((val) & 0x00000000ffffffffULL))
+#define U64_TO_U32_HIGH(val)	((u32)((val) >> 32))
+
+#define RES_TO_U32_LOW(val)	\
+	((sizeof(resource_size_t) > sizeof(u32)) ? U64_TO_U32_LOW(val) : (val))
+#define RES_TO_U32_HIGH(val)	\
+	((sizeof(resource_size_t) > sizeof(u32)) ? U64_TO_U32_HIGH(val) : (0))
+
+static inline int ppc440spe_revA(void)
+{
+	/* Catch both 440SPe variants, with and without RAID6 support */
+        if ((mfspr(SPRN_PVR) & 0xffefffff) == 0x53421890)
+                return 1;
+        else
+                return 0;
+}
+
+static void fixup_ppc4xx_pci_bridge(struct pci_dev *dev)
+{
+	struct pci_controller *hose;
+	struct resource *r;
+
+	if (dev->devfn != 0 || dev->bus->self != NULL)
+		return;
+
+	hose = pci_bus_to_host(dev->bus);
+	if (hose == NULL)
+		return;
+
+	if (!of_device_is_compatible(hose->dn, "ibm,plb-pciex") &&
+	    !of_device_is_compatible(hose->dn, "ibm,plb-pcix") &&
+	    !of_device_is_compatible(hose->dn, "ibm,plb-pci"))
+		return;
+
+	if (of_device_is_compatible(hose->dn, "ibm,plb440epx-pci") ||
+		of_device_is_compatible(hose->dn, "ibm,plb440grx-pci")) {
+		hose->indirect_type |= PPC_INDIRECT_TYPE_BROKEN_MRM;
+	}
+
+	/* Hide the PCI host BARs from the kernel as their content doesn't
+	 * fit well in the resource management
+	 */
+	pci_dev_for_each_resource(dev, r) {
+		r->start = r->end = 0;
+		r->flags = 0;
+	}
+
+	printk(KERN_INFO "PCI: Hiding 4xx host bridge resources %s\n",
+	       pci_name(dev));
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, fixup_ppc4xx_pci_bridge);
+
+static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose,
+					  void __iomem *reg,
+					  struct resource *res)
+{
+	u64 size;
+	const u32 *ranges;
+	int rlen;
+	int pna = of_n_addr_cells(hose->dn);
+	int np = pna + 5;
+
+	/* Default */
+	res->start = 0;
+	size = 0x80000000;
+	res->end = size - 1;
+	res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH;
+
+	/* Get dma-ranges property */
+	ranges = of_get_property(hose->dn, "dma-ranges", &rlen);
+	if (ranges == NULL)
+		goto out;
+
+	/* Walk it */
+	while ((rlen -= np * 4) >= 0) {
+		u32 pci_space = ranges[0];
+		u64 pci_addr = of_read_number(ranges + 1, 2);
+		u64 cpu_addr = of_translate_dma_address(hose->dn, ranges + 3);
+		size = of_read_number(ranges + pna + 3, 2);
+		ranges += np;
+		if (cpu_addr == OF_BAD_ADDR || size == 0)
+			continue;
+
+		/* We only care about memory */
+		if ((pci_space & 0x03000000) != 0x02000000)
+			continue;
+
+		/* We currently only support memory at 0, and pci_addr
+		 * within 32 bits space
+		 */
+		if (cpu_addr != 0 || pci_addr > 0xffffffff) {
+			printk(KERN_WARNING "%pOF: Ignored unsupported dma range"
+			       " 0x%016llx...0x%016llx -> 0x%016llx\n",
+			       hose->dn,
+			       pci_addr, pci_addr + size - 1, cpu_addr);
+			continue;
+		}
+
+		/* Check if not prefetchable */
+		if (!(pci_space & 0x40000000))
+			res->flags &= ~IORESOURCE_PREFETCH;
+
+
+		/* Use that */
+		res->start = pci_addr;
+		/* Beware of 32 bits resources */
+		if (sizeof(resource_size_t) == sizeof(u32) &&
+		    (pci_addr + size) > 0x100000000ull)
+			res->end = 0xffffffff;
+		else
+			res->end = res->start + size - 1;
+		break;
+	}
+
+	/* We only support one global DMA offset */
+	if (dma_offset_set && pci_dram_offset != res->start) {
+		printk(KERN_ERR "%pOF: dma-ranges(s) mismatch\n", hose->dn);
+		return -ENXIO;
+	}
+
+	/* Check that we can fit all of memory as we don't support
+	 * DMA bounce buffers
+	 */
+	if (size < total_memory) {
+		printk(KERN_ERR "%pOF: dma-ranges too small "
+		       "(size=%llx total_memory=%llx)\n",
+		       hose->dn, size, (u64)total_memory);
+		return -ENXIO;
+	}
+
+	/* Check we are a power of 2 size and that base is a multiple of size*/
+	if ((size & (size - 1)) != 0  ||
+	    (res->start & (size - 1)) != 0) {
+		printk(KERN_ERR "%pOF: dma-ranges unaligned\n", hose->dn);
+		return -ENXIO;
+	}
+
+	/* Check that we are fully contained within 32 bits space if we are not
+	 * running on a 460sx or 476fpe which have 64 bit bus addresses.
+	 */
+	if (res->end > 0xffffffff &&
+	    !(of_device_is_compatible(hose->dn, "ibm,plb-pciex-460sx")
+	      || of_device_is_compatible(hose->dn, "ibm,plb-pciex-476fpe"))) {
+		printk(KERN_ERR "%pOF: dma-ranges outside of 32 bits space\n",
+		       hose->dn);
+		return -ENXIO;
+	}
+ out:
+	dma_offset_set = 1;
+	pci_dram_offset = res->start;
+	hose->dma_window_base_cur = res->start;
+	hose->dma_window_size = resource_size(res);
+
+	printk(KERN_INFO "4xx PCI DMA offset set to 0x%08lx\n",
+	       pci_dram_offset);
+	printk(KERN_INFO "4xx PCI DMA window base to 0x%016llx\n",
+	       (unsigned long long)hose->dma_window_base_cur);
+	printk(KERN_INFO "DMA window size 0x%016llx\n",
+	       (unsigned long long)hose->dma_window_size);
+	return 0;
+}
+
+/*
+ * 4xx PCI 2.x part
+ */
+
+static int __init ppc4xx_setup_one_pci_PMM(struct pci_controller	*hose,
+					   void __iomem			*reg,
+					   u64				plb_addr,
+					   u64				pci_addr,
+					   u64				size,
+					   unsigned int			flags,
+					   int				index)
+{
+	u32 ma, pcila, pciha;
+
+	/* Hack warning ! The "old" PCI 2.x cell only let us configure the low
+	 * 32-bit of incoming PLB addresses. The top 4 bits of the 36-bit
+	 * address are actually hard wired to a value that appears to depend
+	 * on the specific SoC. For example, it's 0 on 440EP and 1 on 440EPx.
+	 *
+	 * The trick here is we just crop those top bits and ignore them when
+	 * programming the chip. That means the device-tree has to be right
+	 * for the specific part used (we don't print a warning if it's wrong
+	 * but on the other hand, you'll crash quickly enough), but at least
+	 * this code should work whatever the hard coded value is
+	 */
+	plb_addr &= 0xffffffffull;
+
+	/* Note: Due to the above hack, the test below doesn't actually test
+	 * if you address is above 4G, but it tests that address and
+	 * (address + size) are both contained in the same 4G
+	 */
+	if ((plb_addr + size) > 0xffffffffull || !is_power_of_2(size) ||
+	    size < 0x1000 || (plb_addr & (size - 1)) != 0) {
+		printk(KERN_WARNING "%pOF: Resource out of range\n", hose->dn);
+		return -1;
+	}
+	ma = (0xffffffffu << ilog2(size)) | 1;
+	if (flags & IORESOURCE_PREFETCH)
+		ma |= 2;
+
+	pciha = RES_TO_U32_HIGH(pci_addr);
+	pcila = RES_TO_U32_LOW(pci_addr);
+
+	writel(plb_addr, reg + PCIL0_PMM0LA + (0x10 * index));
+	writel(pcila, reg + PCIL0_PMM0PCILA + (0x10 * index));
+	writel(pciha, reg + PCIL0_PMM0PCIHA + (0x10 * index));
+	writel(ma, reg + PCIL0_PMM0MA + (0x10 * index));
+
+	return 0;
+}
+
+static void __init ppc4xx_configure_pci_PMMs(struct pci_controller *hose,
+					     void __iomem *reg)
+{
+	int i, j, found_isa_hole = 0;
+
+	/* Setup outbound memory windows */
+	for (i = j = 0; i < 3; i++) {
+		struct resource *res = &hose->mem_resources[i];
+		resource_size_t offset = hose->mem_offset[i];
+
+		/* we only care about memory windows */
+		if (!(res->flags & IORESOURCE_MEM))
+			continue;
+		if (j > 2) {
+			printk(KERN_WARNING "%pOF: Too many ranges\n", hose->dn);
+			break;
+		}
+
+		/* Configure the resource */
+		if (ppc4xx_setup_one_pci_PMM(hose, reg,
+					     res->start,
+					     res->start - offset,
+					     resource_size(res),
+					     res->flags,
+					     j) == 0) {
+			j++;
+
+			/* If the resource PCI address is 0 then we have our
+			 * ISA memory hole
+			 */
+			if (res->start == offset)
+				found_isa_hole = 1;
+		}
+	}
+
+	/* Handle ISA memory hole if not already covered */
+	if (j <= 2 && !found_isa_hole && hose->isa_mem_size)
+		if (ppc4xx_setup_one_pci_PMM(hose, reg, hose->isa_mem_phys, 0,
+					     hose->isa_mem_size, 0, j) == 0)
+			printk(KERN_INFO "%pOF: Legacy ISA memory support enabled\n",
+			       hose->dn);
+}
+
+static void __init ppc4xx_configure_pci_PTMs(struct pci_controller *hose,
+					     void __iomem *reg,
+					     const struct resource *res)
+{
+	resource_size_t size = resource_size(res);
+	u32 sa;
+
+	/* Calculate window size */
+	sa = (0xffffffffu << ilog2(size)) | 1;
+	sa |= 0x1;
+
+	/* RAM is always at 0 local for now */
+	writel(0, reg + PCIL0_PTM1LA);
+	writel(sa, reg + PCIL0_PTM1MS);
+
+	/* Map on PCI side */
+	early_write_config_dword(hose, hose->first_busno, 0,
+				 PCI_BASE_ADDRESS_1, res->start);
+	early_write_config_dword(hose, hose->first_busno, 0,
+				 PCI_BASE_ADDRESS_2, 0x00000000);
+	early_write_config_word(hose, hose->first_busno, 0,
+				PCI_COMMAND, 0x0006);
+}
+
+static void __init ppc4xx_probe_pci_bridge(struct device_node *np)
+{
+	/* NYI */
+	struct resource rsrc_cfg;
+	struct resource rsrc_reg;
+	struct resource dma_window;
+	struct pci_controller *hose = NULL;
+	void __iomem *reg = NULL;
+	const int *bus_range;
+	int primary = 0;
+
+	/* Check if device is enabled */
+	if (!of_device_is_available(np)) {
+		printk(KERN_INFO "%pOF: Port disabled via device-tree\n", np);
+		return;
+	}
+
+	/* Fetch config space registers address */
+	if (of_address_to_resource(np, 0, &rsrc_cfg)) {
+		printk(KERN_ERR "%pOF: Can't get PCI config register base !",
+		       np);
+		return;
+	}
+	/* Fetch host bridge internal registers address */
+	if (of_address_to_resource(np, 3, &rsrc_reg)) {
+		printk(KERN_ERR "%pOF: Can't get PCI internal register base !",
+		       np);
+		return;
+	}
+
+	/* Check if primary bridge */
+	if (of_property_read_bool(np, "primary"))
+		primary = 1;
+
+	/* Get bus range if any */
+	bus_range = of_get_property(np, "bus-range", NULL);
+
+	/* Map registers */
+	reg = ioremap(rsrc_reg.start, resource_size(&rsrc_reg));
+	if (reg == NULL) {
+		printk(KERN_ERR "%pOF: Can't map registers !", np);
+		goto fail;
+	}
+
+	/* Allocate the host controller data structure */
+	hose = pcibios_alloc_controller(np);
+	if (!hose)
+		goto fail;
+
+	hose->first_busno = bus_range ? bus_range[0] : 0x0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	/* Setup config space */
+	setup_indirect_pci(hose, rsrc_cfg.start, rsrc_cfg.start + 0x4, 0);
+
+	/* Disable all windows */
+	writel(0, reg + PCIL0_PMM0MA);
+	writel(0, reg + PCIL0_PMM1MA);
+	writel(0, reg + PCIL0_PMM2MA);
+	writel(0, reg + PCIL0_PTM1MS);
+	writel(0, reg + PCIL0_PTM2MS);
+
+	/* Parse outbound mapping resources */
+	pci_process_bridge_OF_ranges(hose, np, primary);
+
+	/* Parse inbound mapping resources */
+	if (ppc4xx_parse_dma_ranges(hose, reg, &dma_window) != 0)
+		goto fail;
+
+	/* Configure outbound ranges POMs */
+	ppc4xx_configure_pci_PMMs(hose, reg);
+
+	/* Configure inbound ranges PIMs */
+	ppc4xx_configure_pci_PTMs(hose, reg, &dma_window);
+
+	/* We don't need the registers anymore */
+	iounmap(reg);
+	return;
+
+ fail:
+	if (hose)
+		pcibios_free_controller(hose);
+	if (reg)
+		iounmap(reg);
+}
+
+/*
+ * 4xx PCI-X part
+ */
+
+static int __init ppc4xx_setup_one_pcix_POM(struct pci_controller	*hose,
+					    void __iomem		*reg,
+					    u64				plb_addr,
+					    u64				pci_addr,
+					    u64				size,
+					    unsigned int		flags,
+					    int				index)
+{
+	u32 lah, lal, pciah, pcial, sa;
+
+	if (!is_power_of_2(size) || size < 0x1000 ||
+	    (plb_addr & (size - 1)) != 0) {
+		printk(KERN_WARNING "%pOF: Resource out of range\n",
+		       hose->dn);
+		return -1;
+	}
+
+	/* Calculate register values */
+	lah = RES_TO_U32_HIGH(plb_addr);
+	lal = RES_TO_U32_LOW(plb_addr);
+	pciah = RES_TO_U32_HIGH(pci_addr);
+	pcial = RES_TO_U32_LOW(pci_addr);
+	sa = (0xffffffffu << ilog2(size)) | 0x1;
+
+	/* Program register values */
+	if (index == 0) {
+		writel(lah, reg + PCIX0_POM0LAH);
+		writel(lal, reg + PCIX0_POM0LAL);
+		writel(pciah, reg + PCIX0_POM0PCIAH);
+		writel(pcial, reg + PCIX0_POM0PCIAL);
+		writel(sa, reg + PCIX0_POM0SA);
+	} else {
+		writel(lah, reg + PCIX0_POM1LAH);
+		writel(lal, reg + PCIX0_POM1LAL);
+		writel(pciah, reg + PCIX0_POM1PCIAH);
+		writel(pcial, reg + PCIX0_POM1PCIAL);
+		writel(sa, reg + PCIX0_POM1SA);
+	}
+
+	return 0;
+}
+
+static void __init ppc4xx_configure_pcix_POMs(struct pci_controller *hose,
+					      void __iomem *reg)
+{
+	int i, j, found_isa_hole = 0;
+
+	/* Setup outbound memory windows */
+	for (i = j = 0; i < 3; i++) {
+		struct resource *res = &hose->mem_resources[i];
+		resource_size_t offset = hose->mem_offset[i];
+
+		/* we only care about memory windows */
+		if (!(res->flags & IORESOURCE_MEM))
+			continue;
+		if (j > 1) {
+			printk(KERN_WARNING "%pOF: Too many ranges\n", hose->dn);
+			break;
+		}
+
+		/* Configure the resource */
+		if (ppc4xx_setup_one_pcix_POM(hose, reg,
+					      res->start,
+					      res->start - offset,
+					      resource_size(res),
+					      res->flags,
+					      j) == 0) {
+			j++;
+
+			/* If the resource PCI address is 0 then we have our
+			 * ISA memory hole
+			 */
+			if (res->start == offset)
+				found_isa_hole = 1;
+		}
+	}
+
+	/* Handle ISA memory hole if not already covered */
+	if (j <= 1 && !found_isa_hole && hose->isa_mem_size)
+		if (ppc4xx_setup_one_pcix_POM(hose, reg, hose->isa_mem_phys, 0,
+					      hose->isa_mem_size, 0, j) == 0)
+			printk(KERN_INFO "%pOF: Legacy ISA memory support enabled\n",
+			       hose->dn);
+}
+
+static void __init ppc4xx_configure_pcix_PIMs(struct pci_controller *hose,
+					      void __iomem *reg,
+					      const struct resource *res,
+					      int big_pim,
+					      int enable_msi_hole)
+{
+	resource_size_t size = resource_size(res);
+	u32 sa;
+
+	/* RAM is always at 0 */
+	writel(0x00000000, reg + PCIX0_PIM0LAH);
+	writel(0x00000000, reg + PCIX0_PIM0LAL);
+
+	/* Calculate window size */
+	sa = (0xffffffffu << ilog2(size)) | 1;
+	sa |= 0x1;
+	if (res->flags & IORESOURCE_PREFETCH)
+		sa |= 0x2;
+	if (enable_msi_hole)
+		sa |= 0x4;
+	writel(sa, reg + PCIX0_PIM0SA);
+	if (big_pim)
+		writel(0xffffffff, reg + PCIX0_PIM0SAH);
+
+	/* Map on PCI side */
+	writel(0x00000000, reg + PCIX0_BAR0H);
+	writel(res->start, reg + PCIX0_BAR0L);
+	writew(0x0006, reg + PCIX0_COMMAND);
+}
+
+static void __init ppc4xx_probe_pcix_bridge(struct device_node *np)
+{
+	struct resource rsrc_cfg;
+	struct resource rsrc_reg;
+	struct resource dma_window;
+	struct pci_controller *hose = NULL;
+	void __iomem *reg = NULL;
+	const int *bus_range;
+	int big_pim, msi, primary;
+
+	/* Fetch config space registers address */
+	if (of_address_to_resource(np, 0, &rsrc_cfg)) {
+		printk(KERN_ERR "%pOF: Can't get PCI-X config register base !",
+		       np);
+		return;
+	}
+	/* Fetch host bridge internal registers address */
+	if (of_address_to_resource(np, 3, &rsrc_reg)) {
+		printk(KERN_ERR "%pOF: Can't get PCI-X internal register base !",
+		       np);
+		return;
+	}
+
+	/* Check if it supports large PIMs (440GX) */
+	big_pim = of_property_read_bool(np, "large-inbound-windows");
+
+	/* Check if we should enable MSIs inbound hole */
+	msi = of_property_read_bool(np, "enable-msi-hole");
+
+	/* Check if primary bridge */
+	primary = of_property_read_bool(np, "primary");
+
+	/* Get bus range if any */
+	bus_range = of_get_property(np, "bus-range", NULL);
+
+	/* Map registers */
+	reg = ioremap(rsrc_reg.start, resource_size(&rsrc_reg));
+	if (reg == NULL) {
+		printk(KERN_ERR "%pOF: Can't map registers !", np);
+		goto fail;
+	}
+
+	/* Allocate the host controller data structure */
+	hose = pcibios_alloc_controller(np);
+	if (!hose)
+		goto fail;
+
+	hose->first_busno = bus_range ? bus_range[0] : 0x0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	/* Setup config space */
+	setup_indirect_pci(hose, rsrc_cfg.start, rsrc_cfg.start + 0x4,
+					PPC_INDIRECT_TYPE_SET_CFG_TYPE);
+
+	/* Disable all windows */
+	writel(0, reg + PCIX0_POM0SA);
+	writel(0, reg + PCIX0_POM1SA);
+	writel(0, reg + PCIX0_POM2SA);
+	writel(0, reg + PCIX0_PIM0SA);
+	writel(0, reg + PCIX0_PIM1SA);
+	writel(0, reg + PCIX0_PIM2SA);
+	if (big_pim) {
+		writel(0, reg + PCIX0_PIM0SAH);
+		writel(0, reg + PCIX0_PIM2SAH);
+	}
+
+	/* Parse outbound mapping resources */
+	pci_process_bridge_OF_ranges(hose, np, primary);
+
+	/* Parse inbound mapping resources */
+	if (ppc4xx_parse_dma_ranges(hose, reg, &dma_window) != 0)
+		goto fail;
+
+	/* Configure outbound ranges POMs */
+	ppc4xx_configure_pcix_POMs(hose, reg);
+
+	/* Configure inbound ranges PIMs */
+	ppc4xx_configure_pcix_PIMs(hose, reg, &dma_window, big_pim, msi);
+
+	/* We don't need the registers anymore */
+	iounmap(reg);
+	return;
+
+ fail:
+	if (hose)
+		pcibios_free_controller(hose);
+	if (reg)
+		iounmap(reg);
+}
+
+#ifdef CONFIG_PPC4xx_PCI_EXPRESS
+
+/*
+ * 4xx PCI-Express part
+ *
+ * We support 3 parts currently based on the compatible property:
+ *
+ * ibm,plb-pciex-440spe
+ * ibm,plb-pciex-405ex
+ * ibm,plb-pciex-460ex
+ *
+ * Anything else will be rejected for now as they are all subtly
+ * different unfortunately.
+ *
+ */
+
+#define MAX_PCIE_BUS_MAPPED	0x40
+
+struct ppc4xx_pciex_port
+{
+	struct pci_controller	*hose;
+	struct device_node	*node;
+	unsigned int		index;
+	int			endpoint;
+	int			link;
+	int			has_ibpre;
+	unsigned int		sdr_base;
+	dcr_host_t		dcrs;
+	struct resource		cfg_space;
+	struct resource		utl_regs;
+	void __iomem		*utl_base;
+};
+
+static struct ppc4xx_pciex_port *ppc4xx_pciex_ports;
+static unsigned int ppc4xx_pciex_port_count;
+
+struct ppc4xx_pciex_hwops
+{
+	bool want_sdr;
+	int (*core_init)(struct device_node *np);
+	int (*port_init_hw)(struct ppc4xx_pciex_port *port);
+	int (*setup_utl)(struct ppc4xx_pciex_port *port);
+	void (*check_link)(struct ppc4xx_pciex_port *port);
+};
+
+static struct ppc4xx_pciex_hwops *ppc4xx_pciex_hwops;
+
+static int __init ppc4xx_pciex_wait_on_sdr(struct ppc4xx_pciex_port *port,
+					   unsigned int sdr_offset,
+					   unsigned int mask,
+					   unsigned int value,
+					   int timeout_ms)
+{
+	u32 val;
+
+	while(timeout_ms--) {
+		val = mfdcri(SDR0, port->sdr_base + sdr_offset);
+		if ((val & mask) == value) {
+			pr_debug("PCIE%d: Wait on SDR %x success with tm %d (%08x)\n",
+				 port->index, sdr_offset, timeout_ms, val);
+			return 0;
+		}
+		msleep(1);
+	}
+	return -1;
+}
+
+static int __init ppc4xx_pciex_port_reset_sdr(struct ppc4xx_pciex_port *port)
+{
+	/* Wait for reset to complete */
+	if (ppc4xx_pciex_wait_on_sdr(port, PESDRn_RCSSTS, 1 << 20, 0, 10)) {
+		printk(KERN_WARNING "PCIE%d: PGRST failed\n",
+		       port->index);
+		return -1;
+	}
+	return 0;
+}
+
+
+static void __init ppc4xx_pciex_check_link_sdr(struct ppc4xx_pciex_port *port)
+{
+	printk(KERN_INFO "PCIE%d: Checking link...\n", port->index);
+
+	/* Check for card presence detect if supported, if not, just wait for
+	 * link unconditionally.
+	 *
+	 * note that we don't fail if there is no link, we just filter out
+	 * config space accesses. That way, it will be easier to implement
+	 * hotplug later on.
+	 */
+	if (!port->has_ibpre ||
+	    !ppc4xx_pciex_wait_on_sdr(port, PESDRn_LOOP,
+				      1 << 28, 1 << 28, 100)) {
+		printk(KERN_INFO
+		       "PCIE%d: Device detected, waiting for link...\n",
+		       port->index);
+		if (ppc4xx_pciex_wait_on_sdr(port, PESDRn_LOOP,
+					     0x1000, 0x1000, 2000))
+			printk(KERN_WARNING
+			       "PCIE%d: Link up failed\n", port->index);
+		else {
+			printk(KERN_INFO
+			       "PCIE%d: link is up !\n", port->index);
+			port->link = 1;
+		}
+	} else
+		printk(KERN_INFO "PCIE%d: No device detected.\n", port->index);
+}
+
+#ifdef CONFIG_44x
+
+/* Check various reset bits of the 440SPe PCIe core */
+static int __init ppc440spe_pciex_check_reset(struct device_node *np)
+{
+	u32 valPE0, valPE1, valPE2;
+	int err = 0;
+
+	/* SDR0_PEGPLLLCT1 reset */
+	if (!(mfdcri(SDR0, PESDR0_PLLLCT1) & 0x01000000)) {
+		/*
+		 * the PCIe core was probably already initialised
+		 * by firmware - let's re-reset RCSSET regs
+		 *
+		 * -- Shouldn't we also re-reset the whole thing ? -- BenH
+		 */
+		pr_debug("PCIE: SDR0_PLLLCT1 already reset.\n");
+		mtdcri(SDR0, PESDR0_440SPE_RCSSET, 0x01010000);
+		mtdcri(SDR0, PESDR1_440SPE_RCSSET, 0x01010000);
+		mtdcri(SDR0, PESDR2_440SPE_RCSSET, 0x01010000);
+	}
+
+	valPE0 = mfdcri(SDR0, PESDR0_440SPE_RCSSET);
+	valPE1 = mfdcri(SDR0, PESDR1_440SPE_RCSSET);
+	valPE2 = mfdcri(SDR0, PESDR2_440SPE_RCSSET);
+
+	/* SDR0_PExRCSSET rstgu */
+	if (!(valPE0 & 0x01000000) ||
+	    !(valPE1 & 0x01000000) ||
+	    !(valPE2 & 0x01000000)) {
+		printk(KERN_INFO "PCIE: SDR0_PExRCSSET rstgu error\n");
+		err = -1;
+	}
+
+	/* SDR0_PExRCSSET rstdl */
+	if (!(valPE0 & 0x00010000) ||
+	    !(valPE1 & 0x00010000) ||
+	    !(valPE2 & 0x00010000)) {
+		printk(KERN_INFO "PCIE: SDR0_PExRCSSET rstdl error\n");
+		err = -1;
+	}
+
+	/* SDR0_PExRCSSET rstpyn */
+	if ((valPE0 & 0x00001000) ||
+	    (valPE1 & 0x00001000) ||
+	    (valPE2 & 0x00001000)) {
+		printk(KERN_INFO "PCIE: SDR0_PExRCSSET rstpyn error\n");
+		err = -1;
+	}
+
+	/* SDR0_PExRCSSET hldplb */
+	if ((valPE0 & 0x10000000) ||
+	    (valPE1 & 0x10000000) ||
+	    (valPE2 & 0x10000000)) {
+		printk(KERN_INFO "PCIE: SDR0_PExRCSSET hldplb error\n");
+		err = -1;
+	}
+
+	/* SDR0_PExRCSSET rdy */
+	if ((valPE0 & 0x00100000) ||
+	    (valPE1 & 0x00100000) ||
+	    (valPE2 & 0x00100000)) {
+		printk(KERN_INFO "PCIE: SDR0_PExRCSSET rdy error\n");
+		err = -1;
+	}
+
+	/* SDR0_PExRCSSET shutdown */
+	if ((valPE0 & 0x00000100) ||
+	    (valPE1 & 0x00000100) ||
+	    (valPE2 & 0x00000100)) {
+		printk(KERN_INFO "PCIE: SDR0_PExRCSSET shutdown error\n");
+		err = -1;
+	}
+
+	return err;
+}
+
+/* Global PCIe core initializations for 440SPe core */
+static int __init ppc440spe_pciex_core_init(struct device_node *np)
+{
+	int time_out = 20;
+
+	/* Set PLL clock receiver to LVPECL */
+	dcri_clrset(SDR0, PESDR0_PLLLCT1, 0, 1 << 28);
+
+	/* Shouldn't we do all the calibration stuff etc... here ? */
+	if (ppc440spe_pciex_check_reset(np))
+		return -ENXIO;
+
+	if (!(mfdcri(SDR0, PESDR0_PLLLCT2) & 0x10000)) {
+		printk(KERN_INFO "PCIE: PESDR_PLLCT2 resistance calibration "
+		       "failed (0x%08x)\n",
+		       mfdcri(SDR0, PESDR0_PLLLCT2));
+		return -1;
+	}
+
+	/* De-assert reset of PCIe PLL, wait for lock */
+	dcri_clrset(SDR0, PESDR0_PLLLCT1, 1 << 24, 0);
+	udelay(3);
+
+	while (time_out) {
+		if (!(mfdcri(SDR0, PESDR0_PLLLCT3) & 0x10000000)) {
+			time_out--;
+			udelay(1);
+		} else
+			break;
+	}
+	if (!time_out) {
+		printk(KERN_INFO "PCIE: VCO output not locked\n");
+		return -1;
+	}
+
+	pr_debug("PCIE initialization OK\n");
+
+	return 3;
+}
+
+static int __init ppc440spe_pciex_init_port_hw(struct ppc4xx_pciex_port *port)
+{
+	u32 val = 1 << 24;
+
+	if (port->endpoint)
+		val = PTYPE_LEGACY_ENDPOINT << 20;
+	else
+		val = PTYPE_ROOT_PORT << 20;
+
+	if (port->index == 0)
+		val |= LNKW_X8 << 12;
+	else
+		val |= LNKW_X4 << 12;
+
+	mtdcri(SDR0, port->sdr_base + PESDRn_DLPSET, val);
+	mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET1, 0x20222222);
+	if (ppc440spe_revA())
+		mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET2, 0x11000000);
+	mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL0SET1, 0x35000000);
+	mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL1SET1, 0x35000000);
+	mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL2SET1, 0x35000000);
+	mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL3SET1, 0x35000000);
+	if (port->index == 0) {
+		mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL4SET1,
+		       0x35000000);
+		mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL5SET1,
+		       0x35000000);
+		mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL6SET1,
+		       0x35000000);
+		mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL7SET1,
+		       0x35000000);
+	}
+	dcri_clrset(SDR0, port->sdr_base + PESDRn_RCSSET,
+			(1 << 24) | (1 << 16), 1 << 12);
+
+	return ppc4xx_pciex_port_reset_sdr(port);
+}
+
+static int __init ppc440speA_pciex_init_port_hw(struct ppc4xx_pciex_port *port)
+{
+	return ppc440spe_pciex_init_port_hw(port);
+}
+
+static int __init ppc440speB_pciex_init_port_hw(struct ppc4xx_pciex_port *port)
+{
+	int rc = ppc440spe_pciex_init_port_hw(port);
+
+	port->has_ibpre = 1;
+
+	return rc;
+}
+
+static int ppc440speA_pciex_init_utl(struct ppc4xx_pciex_port *port)
+{
+	/* XXX Check what that value means... I hate magic */
+	dcr_write(port->dcrs, DCRO_PEGPL_SPECIAL, 0x68782800);
+
+	/*
+	 * Set buffer allocations and then assert VRB and TXE.
+	 */
+	out_be32(port->utl_base + PEUTL_OUTTR,   0x08000000);
+	out_be32(port->utl_base + PEUTL_INTR,    0x02000000);
+	out_be32(port->utl_base + PEUTL_OPDBSZ,  0x10000000);
+	out_be32(port->utl_base + PEUTL_PBBSZ,   0x53000000);
+	out_be32(port->utl_base + PEUTL_IPHBSZ,  0x08000000);
+	out_be32(port->utl_base + PEUTL_IPDBSZ,  0x10000000);
+	out_be32(port->utl_base + PEUTL_RCIRQEN, 0x00f00000);
+	out_be32(port->utl_base + PEUTL_PCTL,    0x80800066);
+
+	return 0;
+}
+
+static int ppc440speB_pciex_init_utl(struct ppc4xx_pciex_port *port)
+{
+	/* Report CRS to the operating system */
+	out_be32(port->utl_base + PEUTL_PBCTL,    0x08000000);
+
+	return 0;
+}
+
+static struct ppc4xx_pciex_hwops ppc440speA_pcie_hwops __initdata =
+{
+	.want_sdr	= true,
+	.core_init	= ppc440spe_pciex_core_init,
+	.port_init_hw	= ppc440speA_pciex_init_port_hw,
+	.setup_utl	= ppc440speA_pciex_init_utl,
+	.check_link	= ppc4xx_pciex_check_link_sdr,
+};
+
+static struct ppc4xx_pciex_hwops ppc440speB_pcie_hwops __initdata =
+{
+	.want_sdr	= true,
+	.core_init	= ppc440spe_pciex_core_init,
+	.port_init_hw	= ppc440speB_pciex_init_port_hw,
+	.setup_utl	= ppc440speB_pciex_init_utl,
+	.check_link	= ppc4xx_pciex_check_link_sdr,
+};
+
+static int __init ppc460ex_pciex_core_init(struct device_node *np)
+{
+	/* Nothing to do, return 2 ports */
+	return 2;
+}
+
+static int __init ppc460ex_pciex_init_port_hw(struct ppc4xx_pciex_port *port)
+{
+	u32 val;
+	u32 utlset1;
+
+	if (port->endpoint)
+		val = PTYPE_LEGACY_ENDPOINT << 20;
+	else
+		val = PTYPE_ROOT_PORT << 20;
+
+	if (port->index == 0) {
+		val |= LNKW_X1 << 12;
+		utlset1 = 0x20000000;
+	} else {
+		val |= LNKW_X4 << 12;
+		utlset1 = 0x20101101;
+	}
+
+	mtdcri(SDR0, port->sdr_base + PESDRn_DLPSET, val);
+	mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET1, utlset1);
+	mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET2, 0x01210000);
+
+	switch (port->index) {
+	case 0:
+		mtdcri(SDR0, PESDR0_460EX_L0CDRCTL, 0x00003230);
+		mtdcri(SDR0, PESDR0_460EX_L0DRV, 0x00000130);
+		mtdcri(SDR0, PESDR0_460EX_L0CLK, 0x00000006);
+
+		mtdcri(SDR0, PESDR0_460EX_PHY_CTL_RST,0x10000000);
+		break;
+
+	case 1:
+		mtdcri(SDR0, PESDR1_460EX_L0CDRCTL, 0x00003230);
+		mtdcri(SDR0, PESDR1_460EX_L1CDRCTL, 0x00003230);
+		mtdcri(SDR0, PESDR1_460EX_L2CDRCTL, 0x00003230);
+		mtdcri(SDR0, PESDR1_460EX_L3CDRCTL, 0x00003230);
+		mtdcri(SDR0, PESDR1_460EX_L0DRV, 0x00000130);
+		mtdcri(SDR0, PESDR1_460EX_L1DRV, 0x00000130);
+		mtdcri(SDR0, PESDR1_460EX_L2DRV, 0x00000130);
+		mtdcri(SDR0, PESDR1_460EX_L3DRV, 0x00000130);
+		mtdcri(SDR0, PESDR1_460EX_L0CLK, 0x00000006);
+		mtdcri(SDR0, PESDR1_460EX_L1CLK, 0x00000006);
+		mtdcri(SDR0, PESDR1_460EX_L2CLK, 0x00000006);
+		mtdcri(SDR0, PESDR1_460EX_L3CLK, 0x00000006);
+
+		mtdcri(SDR0, PESDR1_460EX_PHY_CTL_RST,0x10000000);
+		break;
+	}
+
+	mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET,
+	       mfdcri(SDR0, port->sdr_base + PESDRn_RCSSET) |
+	       (PESDRx_RCSSET_RSTGU | PESDRx_RCSSET_RSTPYN));
+
+	/* Poll for PHY reset */
+	/* XXX FIXME add timeout */
+	switch (port->index) {
+	case 0:
+		while (!(mfdcri(SDR0, PESDR0_460EX_RSTSTA) & 0x1))
+			udelay(10);
+		break;
+	case 1:
+		while (!(mfdcri(SDR0, PESDR1_460EX_RSTSTA) & 0x1))
+			udelay(10);
+		break;
+	}
+
+	mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET,
+	       (mfdcri(SDR0, port->sdr_base + PESDRn_RCSSET) &
+		~(PESDRx_RCSSET_RSTGU | PESDRx_RCSSET_RSTDL)) |
+	       PESDRx_RCSSET_RSTPYN);
+
+	port->has_ibpre = 1;
+
+	return ppc4xx_pciex_port_reset_sdr(port);
+}
+
+static int ppc460ex_pciex_init_utl(struct ppc4xx_pciex_port *port)
+{
+	dcr_write(port->dcrs, DCRO_PEGPL_SPECIAL, 0x0);
+
+	/*
+	 * Set buffer allocations and then assert VRB and TXE.
+	 */
+	out_be32(port->utl_base + PEUTL_PBCTL,	0x0800000c);
+	out_be32(port->utl_base + PEUTL_OUTTR,	0x08000000);
+	out_be32(port->utl_base + PEUTL_INTR,	0x02000000);
+	out_be32(port->utl_base + PEUTL_OPDBSZ,	0x04000000);
+	out_be32(port->utl_base + PEUTL_PBBSZ,	0x00000000);
+	out_be32(port->utl_base + PEUTL_IPHBSZ,	0x02000000);
+	out_be32(port->utl_base + PEUTL_IPDBSZ,	0x04000000);
+	out_be32(port->utl_base + PEUTL_RCIRQEN,0x00f00000);
+	out_be32(port->utl_base + PEUTL_PCTL,	0x80800066);
+
+	return 0;
+}
+
+static struct ppc4xx_pciex_hwops ppc460ex_pcie_hwops __initdata =
+{
+	.want_sdr	= true,
+	.core_init	= ppc460ex_pciex_core_init,
+	.port_init_hw	= ppc460ex_pciex_init_port_hw,
+	.setup_utl	= ppc460ex_pciex_init_utl,
+	.check_link	= ppc4xx_pciex_check_link_sdr,
+};
+
+static int __init apm821xx_pciex_core_init(struct device_node *np)
+{
+	/* Return the number of pcie port */
+	return 1;
+}
+
+static int __init apm821xx_pciex_init_port_hw(struct ppc4xx_pciex_port *port)
+{
+	u32 val;
+
+	/*
+	 * Do a software reset on PCIe ports.
+	 * This code is to fix the issue that pci drivers doesn't re-assign
+	 * bus number for PCIE devices after Uboot
+	 * scanned and configured all the buses (eg. PCIE NIC IntelPro/1000
+	 * PT quad port, SAS LSI 1064E)
+	 */
+
+	mtdcri(SDR0, PESDR0_460EX_PHY_CTL_RST, 0x0);
+	mdelay(10);
+
+	if (port->endpoint)
+		val = PTYPE_LEGACY_ENDPOINT << 20;
+	else
+		val = PTYPE_ROOT_PORT << 20;
+
+	val |= LNKW_X1 << 12;
+
+	mtdcri(SDR0, port->sdr_base + PESDRn_DLPSET, val);
+	mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET1, 0x00000000);
+	mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET2, 0x01010000);
+
+	mtdcri(SDR0, PESDR0_460EX_L0CDRCTL, 0x00003230);
+	mtdcri(SDR0, PESDR0_460EX_L0DRV, 0x00000130);
+	mtdcri(SDR0, PESDR0_460EX_L0CLK, 0x00000006);
+
+	mtdcri(SDR0, PESDR0_460EX_PHY_CTL_RST, 0x10000000);
+	mdelay(50);
+	mtdcri(SDR0, PESDR0_460EX_PHY_CTL_RST, 0x30000000);
+
+	mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET,
+		mfdcri(SDR0, port->sdr_base + PESDRn_RCSSET) |
+		(PESDRx_RCSSET_RSTGU | PESDRx_RCSSET_RSTPYN));
+
+	/* Poll for PHY reset */
+	val = PESDR0_460EX_RSTSTA - port->sdr_base;
+	if (ppc4xx_pciex_wait_on_sdr(port, val, 0x1, 1,	100)) {
+		printk(KERN_WARNING "%s: PCIE: Can't reset PHY\n", __func__);
+		return -EBUSY;
+	} else {
+		mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET,
+			(mfdcri(SDR0, port->sdr_base + PESDRn_RCSSET) &
+			~(PESDRx_RCSSET_RSTGU | PESDRx_RCSSET_RSTDL)) |
+			PESDRx_RCSSET_RSTPYN);
+
+		port->has_ibpre = 1;
+		return 0;
+	}
+}
+
+static struct ppc4xx_pciex_hwops apm821xx_pcie_hwops __initdata = {
+	.want_sdr   = true,
+	.core_init	= apm821xx_pciex_core_init,
+	.port_init_hw	= apm821xx_pciex_init_port_hw,
+	.setup_utl	= ppc460ex_pciex_init_utl,
+	.check_link = ppc4xx_pciex_check_link_sdr,
+};
+
+static int __init ppc460sx_pciex_core_init(struct device_node *np)
+{
+	/* HSS drive amplitude */
+	mtdcri(SDR0, PESDR0_460SX_HSSL0DAMP, 0xB9843211);
+	mtdcri(SDR0, PESDR0_460SX_HSSL1DAMP, 0xB9843211);
+	mtdcri(SDR0, PESDR0_460SX_HSSL2DAMP, 0xB9843211);
+	mtdcri(SDR0, PESDR0_460SX_HSSL3DAMP, 0xB9843211);
+	mtdcri(SDR0, PESDR0_460SX_HSSL4DAMP, 0xB9843211);
+	mtdcri(SDR0, PESDR0_460SX_HSSL5DAMP, 0xB9843211);
+	mtdcri(SDR0, PESDR0_460SX_HSSL6DAMP, 0xB9843211);
+	mtdcri(SDR0, PESDR0_460SX_HSSL7DAMP, 0xB9843211);
+
+	mtdcri(SDR0, PESDR1_460SX_HSSL0DAMP, 0xB9843211);
+	mtdcri(SDR0, PESDR1_460SX_HSSL1DAMP, 0xB9843211);
+	mtdcri(SDR0, PESDR1_460SX_HSSL2DAMP, 0xB9843211);
+	mtdcri(SDR0, PESDR1_460SX_HSSL3DAMP, 0xB9843211);
+
+	mtdcri(SDR0, PESDR2_460SX_HSSL0DAMP, 0xB9843211);
+	mtdcri(SDR0, PESDR2_460SX_HSSL1DAMP, 0xB9843211);
+	mtdcri(SDR0, PESDR2_460SX_HSSL2DAMP, 0xB9843211);
+	mtdcri(SDR0, PESDR2_460SX_HSSL3DAMP, 0xB9843211);
+
+	/* HSS TX pre-emphasis */
+	mtdcri(SDR0, PESDR0_460SX_HSSL0COEFA, 0xDCB98987);
+	mtdcri(SDR0, PESDR0_460SX_HSSL1COEFA, 0xDCB98987);
+	mtdcri(SDR0, PESDR0_460SX_HSSL2COEFA, 0xDCB98987);
+	mtdcri(SDR0, PESDR0_460SX_HSSL3COEFA, 0xDCB98987);
+	mtdcri(SDR0, PESDR0_460SX_HSSL4COEFA, 0xDCB98987);
+	mtdcri(SDR0, PESDR0_460SX_HSSL5COEFA, 0xDCB98987);
+	mtdcri(SDR0, PESDR0_460SX_HSSL6COEFA, 0xDCB98987);
+	mtdcri(SDR0, PESDR0_460SX_HSSL7COEFA, 0xDCB98987);
+
+	mtdcri(SDR0, PESDR1_460SX_HSSL0COEFA, 0xDCB98987);
+	mtdcri(SDR0, PESDR1_460SX_HSSL1COEFA, 0xDCB98987);
+	mtdcri(SDR0, PESDR1_460SX_HSSL2COEFA, 0xDCB98987);
+	mtdcri(SDR0, PESDR1_460SX_HSSL3COEFA, 0xDCB98987);
+
+	mtdcri(SDR0, PESDR2_460SX_HSSL0COEFA, 0xDCB98987);
+	mtdcri(SDR0, PESDR2_460SX_HSSL1COEFA, 0xDCB98987);
+	mtdcri(SDR0, PESDR2_460SX_HSSL2COEFA, 0xDCB98987);
+	mtdcri(SDR0, PESDR2_460SX_HSSL3COEFA, 0xDCB98987);
+
+	/* HSS TX calibration control */
+	mtdcri(SDR0, PESDR0_460SX_HSSL1CALDRV, 0x22222222);
+	mtdcri(SDR0, PESDR1_460SX_HSSL1CALDRV, 0x22220000);
+	mtdcri(SDR0, PESDR2_460SX_HSSL1CALDRV, 0x22220000);
+
+	/* HSS TX slew control */
+	mtdcri(SDR0, PESDR0_460SX_HSSSLEW, 0xFFFFFFFF);
+	mtdcri(SDR0, PESDR1_460SX_HSSSLEW, 0xFFFF0000);
+	mtdcri(SDR0, PESDR2_460SX_HSSSLEW, 0xFFFF0000);
+
+	/* Set HSS PRBS enabled */
+	mtdcri(SDR0, PESDR0_460SX_HSSCTLSET, 0x00001130);
+	mtdcri(SDR0, PESDR2_460SX_HSSCTLSET, 0x00001130);
+
+	udelay(100);
+
+	/* De-assert PLLRESET */
+	dcri_clrset(SDR0, PESDR0_PLLLCT2, 0x00000100, 0);
+
+	/* Reset DL, UTL, GPL before configuration */
+	mtdcri(SDR0, PESDR0_460SX_RCSSET,
+			PESDRx_RCSSET_RSTDL | PESDRx_RCSSET_RSTGU);
+	mtdcri(SDR0, PESDR1_460SX_RCSSET,
+			PESDRx_RCSSET_RSTDL | PESDRx_RCSSET_RSTGU);
+	mtdcri(SDR0, PESDR2_460SX_RCSSET,
+			PESDRx_RCSSET_RSTDL | PESDRx_RCSSET_RSTGU);
+
+	udelay(100);
+
+	/*
+	 * If bifurcation is not enabled, u-boot would have disabled the
+	 * third PCIe port
+	 */
+	if (((mfdcri(SDR0, PESDR1_460SX_HSSCTLSET) & 0x00000001) ==
+				0x00000001)) {
+		printk(KERN_INFO "PCI: PCIE bifurcation setup successfully.\n");
+		printk(KERN_INFO "PCI: Total 3 PCIE ports are present\n");
+		return 3;
+	}
+
+	printk(KERN_INFO "PCI: Total 2 PCIE ports are present\n");
+	return 2;
+}
+
+static int __init ppc460sx_pciex_init_port_hw(struct ppc4xx_pciex_port *port)
+{
+
+	if (port->endpoint)
+		dcri_clrset(SDR0, port->sdr_base + PESDRn_UTLSET2,
+				0x01000000, 0);
+	else
+		dcri_clrset(SDR0, port->sdr_base + PESDRn_UTLSET2,
+				0, 0x01000000);
+
+	dcri_clrset(SDR0, port->sdr_base + PESDRn_RCSSET,
+			(PESDRx_RCSSET_RSTGU | PESDRx_RCSSET_RSTDL),
+			PESDRx_RCSSET_RSTPYN);
+
+	port->has_ibpre = 1;
+
+	return ppc4xx_pciex_port_reset_sdr(port);
+}
+
+static int ppc460sx_pciex_init_utl(struct ppc4xx_pciex_port *port)
+{
+	/* Max 128 Bytes */
+	out_be32 (port->utl_base + PEUTL_PBBSZ,   0x00000000);
+	/* Assert VRB and TXE - per datasheet turn off addr validation */
+	out_be32(port->utl_base + PEUTL_PCTL,  0x80800000);
+	return 0;
+}
+
+static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port)
+{
+	void __iomem *mbase;
+	int attempt = 50;
+
+	port->link = 0;
+
+	mbase = ioremap(port->cfg_space.start + 0x10000000, 0x1000);
+	if (mbase == NULL) {
+		printk(KERN_ERR "%pOF: Can't map internal config space !",
+			port->node);
+		return;
+	}
+
+	while (attempt && (0 == (in_le32(mbase + PECFG_460SX_DLLSTA)
+			& PECFG_460SX_DLLSTA_LINKUP))) {
+		attempt--;
+		mdelay(10);
+	}
+	if (attempt)
+		port->link = 1;
+	iounmap(mbase);
+}
+
+static struct ppc4xx_pciex_hwops ppc460sx_pcie_hwops __initdata = {
+	.want_sdr	= true,
+	.core_init	= ppc460sx_pciex_core_init,
+	.port_init_hw	= ppc460sx_pciex_init_port_hw,
+	.setup_utl	= ppc460sx_pciex_init_utl,
+	.check_link	= ppc460sx_pciex_check_link,
+};
+
+#endif /* CONFIG_44x */
+
+#ifdef CONFIG_40x
+
+static int __init ppc405ex_pciex_core_init(struct device_node *np)
+{
+	/* Nothing to do, return 2 ports */
+	return 2;
+}
+
+static void __init ppc405ex_pcie_phy_reset(struct ppc4xx_pciex_port *port)
+{
+	/* Assert the PE0_PHY reset */
+	mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x01010000);
+	msleep(1);
+
+	/* deassert the PE0_hotreset */
+	if (port->endpoint)
+		mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x01111000);
+	else
+		mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x01101000);
+
+	/* poll for phy !reset */
+	/* XXX FIXME add timeout */
+	while (!(mfdcri(SDR0, port->sdr_base + PESDRn_405EX_PHYSTA) & 0x00001000))
+		;
+
+	/* deassert the PE0_gpl_utl_reset */
+	mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x00101000);
+}
+
+static int __init ppc405ex_pciex_init_port_hw(struct ppc4xx_pciex_port *port)
+{
+	u32 val;
+
+	if (port->endpoint)
+		val = PTYPE_LEGACY_ENDPOINT;
+	else
+		val = PTYPE_ROOT_PORT;
+
+	mtdcri(SDR0, port->sdr_base + PESDRn_DLPSET,
+	       1 << 24 | val << 20 | LNKW_X1 << 12);
+
+	mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET1, 0x00000000);
+	mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET2, 0x01010000);
+	mtdcri(SDR0, port->sdr_base + PESDRn_405EX_PHYSET1, 0x720F0000);
+	mtdcri(SDR0, port->sdr_base + PESDRn_405EX_PHYSET2, 0x70600003);
+
+	/*
+	 * Only reset the PHY when no link is currently established.
+	 * This is for the Atheros PCIe board which has problems to establish
+	 * the link (again) after this PHY reset. All other currently tested
+	 * PCIe boards don't show this problem.
+	 * This has to be re-tested and fixed in a later release!
+	 */
+	val = mfdcri(SDR0, port->sdr_base + PESDRn_LOOP);
+	if (!(val & 0x00001000))
+		ppc405ex_pcie_phy_reset(port);
+
+	dcr_write(port->dcrs, DCRO_PEGPL_CFG, 0x10000000);  /* guarded on */
+
+	port->has_ibpre = 1;
+
+	return ppc4xx_pciex_port_reset_sdr(port);
+}
+
+static int ppc405ex_pciex_init_utl(struct ppc4xx_pciex_port *port)
+{
+	dcr_write(port->dcrs, DCRO_PEGPL_SPECIAL, 0x0);
+
+	/*
+	 * Set buffer allocations and then assert VRB and TXE.
+	 */
+	out_be32(port->utl_base + PEUTL_OUTTR,   0x02000000);
+	out_be32(port->utl_base + PEUTL_INTR,    0x02000000);
+	out_be32(port->utl_base + PEUTL_OPDBSZ,  0x04000000);
+	out_be32(port->utl_base + PEUTL_PBBSZ,   0x21000000);
+	out_be32(port->utl_base + PEUTL_IPHBSZ,  0x02000000);
+	out_be32(port->utl_base + PEUTL_IPDBSZ,  0x04000000);
+	out_be32(port->utl_base + PEUTL_RCIRQEN, 0x00f00000);
+	out_be32(port->utl_base + PEUTL_PCTL,    0x80800066);
+
+	out_be32(port->utl_base + PEUTL_PBCTL,   0x08000000);
+
+	return 0;
+}
+
+static struct ppc4xx_pciex_hwops ppc405ex_pcie_hwops __initdata =
+{
+	.want_sdr	= true,
+	.core_init	= ppc405ex_pciex_core_init,
+	.port_init_hw	= ppc405ex_pciex_init_port_hw,
+	.setup_utl	= ppc405ex_pciex_init_utl,
+	.check_link	= ppc4xx_pciex_check_link_sdr,
+};
+
+#endif /* CONFIG_40x */
+
+#ifdef CONFIG_476FPE
+static int __init ppc_476fpe_pciex_core_init(struct device_node *np)
+{
+	return 4;
+}
+
+static void __init ppc_476fpe_pciex_check_link(struct ppc4xx_pciex_port *port)
+{
+	u32 timeout_ms = 20;
+	u32 val = 0, mask = (PECFG_TLDLP_LNKUP|PECFG_TLDLP_PRESENT);
+	void __iomem *mbase = ioremap(port->cfg_space.start + 0x10000000,
+	                              0x1000);
+
+	printk(KERN_INFO "PCIE%d: Checking link...\n", port->index);
+
+	if (mbase == NULL) {
+		printk(KERN_WARNING "PCIE%d: failed to get cfg space\n",
+		                    port->index);
+		return;
+	}
+
+	while (timeout_ms--) {
+		val = in_le32(mbase + PECFG_TLDLP);
+
+		if ((val & mask) == mask)
+			break;
+		msleep(10);
+	}
+
+	if (val & PECFG_TLDLP_PRESENT) {
+		printk(KERN_INFO "PCIE%d: link is up !\n", port->index);
+		port->link = 1;
+	} else
+		printk(KERN_WARNING "PCIE%d: Link up failed\n", port->index);
+
+	iounmap(mbase);
+}
+
+static struct ppc4xx_pciex_hwops ppc_476fpe_pcie_hwops __initdata =
+{
+	.core_init	= ppc_476fpe_pciex_core_init,
+	.check_link	= ppc_476fpe_pciex_check_link,
+};
+#endif /* CONFIG_476FPE */
+
+/* Check that the core has been initied and if not, do it */
+static int __init ppc4xx_pciex_check_core_init(struct device_node *np)
+{
+	static int core_init;
+	int count = -ENODEV;
+
+	if (core_init++)
+		return 0;
+
+#ifdef CONFIG_44x
+	if (of_device_is_compatible(np, "ibm,plb-pciex-440spe")) {
+		if (ppc440spe_revA())
+			ppc4xx_pciex_hwops = &ppc440speA_pcie_hwops;
+		else
+			ppc4xx_pciex_hwops = &ppc440speB_pcie_hwops;
+	}
+	if (of_device_is_compatible(np, "ibm,plb-pciex-460ex"))
+		ppc4xx_pciex_hwops = &ppc460ex_pcie_hwops;
+	if (of_device_is_compatible(np, "ibm,plb-pciex-460sx"))
+		ppc4xx_pciex_hwops = &ppc460sx_pcie_hwops;
+	if (of_device_is_compatible(np, "ibm,plb-pciex-apm821xx"))
+		ppc4xx_pciex_hwops = &apm821xx_pcie_hwops;
+#endif /* CONFIG_44x    */
+#ifdef CONFIG_40x
+	if (of_device_is_compatible(np, "ibm,plb-pciex-405ex"))
+		ppc4xx_pciex_hwops = &ppc405ex_pcie_hwops;
+#endif
+#ifdef CONFIG_476FPE
+	if (of_device_is_compatible(np, "ibm,plb-pciex-476fpe")
+		|| of_device_is_compatible(np, "ibm,plb-pciex-476gtr"))
+		ppc4xx_pciex_hwops = &ppc_476fpe_pcie_hwops;
+#endif
+	if (ppc4xx_pciex_hwops == NULL) {
+		printk(KERN_WARNING "PCIE: unknown host type %pOF\n", np);
+		return -ENODEV;
+	}
+
+	count = ppc4xx_pciex_hwops->core_init(np);
+	if (count > 0) {
+		ppc4xx_pciex_ports =
+		       kcalloc(count, sizeof(struct ppc4xx_pciex_port),
+			       GFP_KERNEL);
+		if (ppc4xx_pciex_ports) {
+			ppc4xx_pciex_port_count = count;
+			return 0;
+		}
+		printk(KERN_WARNING "PCIE: failed to allocate ports array\n");
+		return -ENOMEM;
+	}
+	return -ENODEV;
+}
+
+static void __init ppc4xx_pciex_port_init_mapping(struct ppc4xx_pciex_port *port)
+{
+	/* We map PCI Express configuration based on the reg property */
+	dcr_write(port->dcrs, DCRO_PEGPL_CFGBAH,
+		  RES_TO_U32_HIGH(port->cfg_space.start));
+	dcr_write(port->dcrs, DCRO_PEGPL_CFGBAL,
+		  RES_TO_U32_LOW(port->cfg_space.start));
+
+	/* XXX FIXME: Use size from reg property. For now, map 512M */
+	dcr_write(port->dcrs, DCRO_PEGPL_CFGMSK, 0xe0000001);
+
+	/* We map UTL registers based on the reg property */
+	dcr_write(port->dcrs, DCRO_PEGPL_REGBAH,
+		  RES_TO_U32_HIGH(port->utl_regs.start));
+	dcr_write(port->dcrs, DCRO_PEGPL_REGBAL,
+		  RES_TO_U32_LOW(port->utl_regs.start));
+
+	/* XXX FIXME: Use size from reg property */
+	dcr_write(port->dcrs, DCRO_PEGPL_REGMSK, 0x00007001);
+
+	/* Disable all other outbound windows */
+	dcr_write(port->dcrs, DCRO_PEGPL_OMR1MSKL, 0);
+	dcr_write(port->dcrs, DCRO_PEGPL_OMR2MSKL, 0);
+	dcr_write(port->dcrs, DCRO_PEGPL_OMR3MSKL, 0);
+	dcr_write(port->dcrs, DCRO_PEGPL_MSGMSK, 0);
+}
+
+static int __init ppc4xx_pciex_port_init(struct ppc4xx_pciex_port *port)
+{
+	int rc = 0;
+
+	/* Init HW */
+	if (ppc4xx_pciex_hwops->port_init_hw)
+		rc = ppc4xx_pciex_hwops->port_init_hw(port);
+	if (rc != 0)
+		return rc;
+
+	/*
+	 * Initialize mapping: disable all regions and configure
+	 * CFG and REG regions based on resources in the device tree
+	 */
+	ppc4xx_pciex_port_init_mapping(port);
+
+	if (ppc4xx_pciex_hwops->check_link)
+		ppc4xx_pciex_hwops->check_link(port);
+
+	/*
+	 * Map UTL
+	 */
+	port->utl_base = ioremap(port->utl_regs.start, 0x100);
+	BUG_ON(port->utl_base == NULL);
+
+	/*
+	 * Setup UTL registers --BenH.
+	 */
+	if (ppc4xx_pciex_hwops->setup_utl)
+		ppc4xx_pciex_hwops->setup_utl(port);
+
+	/*
+	 * Check for VC0 active or PLL Locked and assert RDY.
+	 */
+	if (port->sdr_base) {
+		if (of_device_is_compatible(port->node,
+				"ibm,plb-pciex-460sx")){
+			if (port->link && ppc4xx_pciex_wait_on_sdr(port,
+					PESDRn_RCSSTS,
+					1 << 12, 1 << 12, 5000)) {
+				printk(KERN_INFO "PCIE%d: PLL not locked\n",
+						port->index);
+				port->link = 0;
+			}
+		} else if (port->link &&
+			ppc4xx_pciex_wait_on_sdr(port, PESDRn_RCSSTS,
+				1 << 16, 1 << 16, 5000)) {
+			printk(KERN_INFO "PCIE%d: VC0 not active\n",
+					port->index);
+			port->link = 0;
+		}
+
+		dcri_clrset(SDR0, port->sdr_base + PESDRn_RCSSET, 0, 1 << 20);
+	}
+
+	msleep(100);
+
+	return 0;
+}
+
+static int ppc4xx_pciex_validate_bdf(struct ppc4xx_pciex_port *port,
+				     struct pci_bus *bus,
+				     unsigned int devfn)
+{
+	static int message;
+
+	/* Endpoint can not generate upstream(remote) config cycles */
+	if (port->endpoint && bus->number != port->hose->first_busno)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	/* Check we are within the mapped range */
+	if (bus->number > port->hose->last_busno) {
+		if (!message) {
+			printk(KERN_WARNING "Warning! Probing bus %u"
+			       " out of range !\n", bus->number);
+			message++;
+		}
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	/* The root complex has only one device / function */
+	if (bus->number == port->hose->first_busno && devfn != 0)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	/* The other side of the RC has only one device as well */
+	if (bus->number == (port->hose->first_busno + 1) &&
+	    PCI_SLOT(devfn) != 0)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	/* Check if we have a link */
+	if ((bus->number != port->hose->first_busno) && !port->link)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	return 0;
+}
+
+static void __iomem *ppc4xx_pciex_get_config_base(struct ppc4xx_pciex_port *port,
+						  struct pci_bus *bus,
+						  unsigned int devfn)
+{
+	int relbus;
+
+	/* Remove the casts when we finally remove the stupid volatile
+	 * in struct pci_controller
+	 */
+	if (bus->number == port->hose->first_busno)
+		return (void __iomem *)port->hose->cfg_addr;
+
+	relbus = bus->number - (port->hose->first_busno + 1);
+	return (void __iomem *)port->hose->cfg_data +
+		((relbus  << 20) | (devfn << 12));
+}
+
+static int ppc4xx_pciex_read_config(struct pci_bus *bus, unsigned int devfn,
+				    int offset, int len, u32 *val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct ppc4xx_pciex_port *port =
+		&ppc4xx_pciex_ports[hose->indirect_type];
+	void __iomem *addr;
+	u32 gpl_cfg;
+
+	BUG_ON(hose != port->hose);
+
+	if (ppc4xx_pciex_validate_bdf(port, bus, devfn) != 0)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	addr = ppc4xx_pciex_get_config_base(port, bus, devfn);
+
+	/*
+	 * Reading from configuration space of non-existing device can
+	 * generate transaction errors. For the read duration we suppress
+	 * assertion of machine check exceptions to avoid those.
+	 */
+	gpl_cfg = dcr_read(port->dcrs, DCRO_PEGPL_CFG);
+	dcr_write(port->dcrs, DCRO_PEGPL_CFG, gpl_cfg | GPL_DMER_MASK_DISA);
+
+	/* Make sure no CRS is recorded */
+	out_be32(port->utl_base + PEUTL_RCSTA, 0x00040000);
+
+	switch (len) {
+	case 1:
+		*val = in_8((u8 *)(addr + offset));
+		break;
+	case 2:
+		*val = in_le16((u16 *)(addr + offset));
+		break;
+	default:
+		*val = in_le32((u32 *)(addr + offset));
+		break;
+	}
+
+	pr_debug("pcie-config-read: bus=%3d [%3d..%3d] devfn=0x%04x"
+		 " offset=0x%04x len=%d, addr=0x%p val=0x%08x\n",
+		 bus->number, hose->first_busno, hose->last_busno,
+		 devfn, offset, len, addr + offset, *val);
+
+	/* Check for CRS (440SPe rev B does that for us but heh ..) */
+	if (in_be32(port->utl_base + PEUTL_RCSTA) & 0x00040000) {
+		pr_debug("Got CRS !\n");
+		if (len != 4 || offset != 0)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		*val = 0xffff0001;
+	}
+
+	dcr_write(port->dcrs, DCRO_PEGPL_CFG, gpl_cfg);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int ppc4xx_pciex_write_config(struct pci_bus *bus, unsigned int devfn,
+				     int offset, int len, u32 val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct ppc4xx_pciex_port *port =
+		&ppc4xx_pciex_ports[hose->indirect_type];
+	void __iomem *addr;
+	u32 gpl_cfg;
+
+	if (ppc4xx_pciex_validate_bdf(port, bus, devfn) != 0)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	addr = ppc4xx_pciex_get_config_base(port, bus, devfn);
+
+	/*
+	 * Reading from configuration space of non-existing device can
+	 * generate transaction errors. For the read duration we suppress
+	 * assertion of machine check exceptions to avoid those.
+	 */
+	gpl_cfg = dcr_read(port->dcrs, DCRO_PEGPL_CFG);
+	dcr_write(port->dcrs, DCRO_PEGPL_CFG, gpl_cfg | GPL_DMER_MASK_DISA);
+
+	pr_debug("pcie-config-write: bus=%3d [%3d..%3d] devfn=0x%04x"
+		 " offset=0x%04x len=%d, addr=0x%p val=0x%08x\n",
+		 bus->number, hose->first_busno, hose->last_busno,
+		 devfn, offset, len, addr + offset, val);
+
+	switch (len) {
+	case 1:
+		out_8((u8 *)(addr + offset), val);
+		break;
+	case 2:
+		out_le16((u16 *)(addr + offset), val);
+		break;
+	default:
+		out_le32((u32 *)(addr + offset), val);
+		break;
+	}
+
+	dcr_write(port->dcrs, DCRO_PEGPL_CFG, gpl_cfg);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops ppc4xx_pciex_pci_ops =
+{
+	.read  = ppc4xx_pciex_read_config,
+	.write = ppc4xx_pciex_write_config,
+};
+
+static int __init ppc4xx_setup_one_pciex_POM(struct ppc4xx_pciex_port	*port,
+					     struct pci_controller	*hose,
+					     void __iomem		*mbase,
+					     u64			plb_addr,
+					     u64			pci_addr,
+					     u64			size,
+					     unsigned int		flags,
+					     int			index)
+{
+	u32 lah, lal, pciah, pcial, sa;
+
+	if (!is_power_of_2(size) ||
+	    (index < 2 && size < 0x100000) ||
+	    (index == 2 && size < 0x100) ||
+	    (plb_addr & (size - 1)) != 0) {
+		printk(KERN_WARNING "%pOF: Resource out of range\n", hose->dn);
+		return -1;
+	}
+
+	/* Calculate register values */
+	lah = RES_TO_U32_HIGH(plb_addr);
+	lal = RES_TO_U32_LOW(plb_addr);
+	pciah = RES_TO_U32_HIGH(pci_addr);
+	pcial = RES_TO_U32_LOW(pci_addr);
+	sa = (0xffffffffu << ilog2(size)) | 0x1;
+
+	/* Program register values */
+	switch (index) {
+	case 0:
+		out_le32(mbase + PECFG_POM0LAH, pciah);
+		out_le32(mbase + PECFG_POM0LAL, pcial);
+		dcr_write(port->dcrs, DCRO_PEGPL_OMR1BAH, lah);
+		dcr_write(port->dcrs, DCRO_PEGPL_OMR1BAL, lal);
+		dcr_write(port->dcrs, DCRO_PEGPL_OMR1MSKH, 0x7fffffff);
+		/*Enabled and single region */
+		if (of_device_is_compatible(port->node, "ibm,plb-pciex-460sx"))
+			dcr_write(port->dcrs, DCRO_PEGPL_OMR1MSKL,
+				sa | DCRO_PEGPL_460SX_OMR1MSKL_UOT
+					| DCRO_PEGPL_OMRxMSKL_VAL);
+		else if (of_device_is_compatible(
+				port->node, "ibm,plb-pciex-476fpe") ||
+			of_device_is_compatible(
+				port->node, "ibm,plb-pciex-476gtr"))
+			dcr_write(port->dcrs, DCRO_PEGPL_OMR1MSKL,
+				sa | DCRO_PEGPL_476FPE_OMR1MSKL_UOT
+					| DCRO_PEGPL_OMRxMSKL_VAL);
+		else
+			dcr_write(port->dcrs, DCRO_PEGPL_OMR1MSKL,
+				sa | DCRO_PEGPL_OMR1MSKL_UOT
+					| DCRO_PEGPL_OMRxMSKL_VAL);
+		break;
+	case 1:
+		out_le32(mbase + PECFG_POM1LAH, pciah);
+		out_le32(mbase + PECFG_POM1LAL, pcial);
+		dcr_write(port->dcrs, DCRO_PEGPL_OMR2BAH, lah);
+		dcr_write(port->dcrs, DCRO_PEGPL_OMR2BAL, lal);
+		dcr_write(port->dcrs, DCRO_PEGPL_OMR2MSKH, 0x7fffffff);
+		dcr_write(port->dcrs, DCRO_PEGPL_OMR2MSKL,
+				sa | DCRO_PEGPL_OMRxMSKL_VAL);
+		break;
+	case 2:
+		out_le32(mbase + PECFG_POM2LAH, pciah);
+		out_le32(mbase + PECFG_POM2LAL, pcial);
+		dcr_write(port->dcrs, DCRO_PEGPL_OMR3BAH, lah);
+		dcr_write(port->dcrs, DCRO_PEGPL_OMR3BAL, lal);
+		dcr_write(port->dcrs, DCRO_PEGPL_OMR3MSKH, 0x7fffffff);
+		/* Note that 3 here means enabled | IO space !!! */
+		dcr_write(port->dcrs, DCRO_PEGPL_OMR3MSKL,
+				sa | DCRO_PEGPL_OMR3MSKL_IO
+					| DCRO_PEGPL_OMRxMSKL_VAL);
+		break;
+	}
+
+	return 0;
+}
+
+static void __init ppc4xx_configure_pciex_POMs(struct ppc4xx_pciex_port *port,
+					       struct pci_controller *hose,
+					       void __iomem *mbase)
+{
+	int i, j, found_isa_hole = 0;
+
+	/* Setup outbound memory windows */
+	for (i = j = 0; i < 3; i++) {
+		struct resource *res = &hose->mem_resources[i];
+		resource_size_t offset = hose->mem_offset[i];
+
+		/* we only care about memory windows */
+		if (!(res->flags & IORESOURCE_MEM))
+			continue;
+		if (j > 1) {
+			printk(KERN_WARNING "%pOF: Too many ranges\n",
+			       port->node);
+			break;
+		}
+
+		/* Configure the resource */
+		if (ppc4xx_setup_one_pciex_POM(port, hose, mbase,
+					       res->start,
+					       res->start - offset,
+					       resource_size(res),
+					       res->flags,
+					       j) == 0) {
+			j++;
+
+			/* If the resource PCI address is 0 then we have our
+			 * ISA memory hole
+			 */
+			if (res->start == offset)
+				found_isa_hole = 1;
+		}
+	}
+
+	/* Handle ISA memory hole if not already covered */
+	if (j <= 1 && !found_isa_hole && hose->isa_mem_size)
+		if (ppc4xx_setup_one_pciex_POM(port, hose, mbase,
+					       hose->isa_mem_phys, 0,
+					       hose->isa_mem_size, 0, j) == 0)
+			printk(KERN_INFO "%pOF: Legacy ISA memory support enabled\n",
+			       hose->dn);
+
+	/* Configure IO, always 64K starting at 0. We hard wire it to 64K !
+	 * Note also that it -has- to be region index 2 on this HW
+	 */
+	if (hose->io_resource.flags & IORESOURCE_IO)
+		ppc4xx_setup_one_pciex_POM(port, hose, mbase,
+					   hose->io_base_phys, 0,
+					   0x10000, IORESOURCE_IO, 2);
+}
+
+static void __init ppc4xx_configure_pciex_PIMs(struct ppc4xx_pciex_port *port,
+					       struct pci_controller *hose,
+					       void __iomem *mbase,
+					       struct resource *res)
+{
+	resource_size_t size = resource_size(res);
+	u64 sa;
+
+	if (port->endpoint) {
+		resource_size_t ep_addr = 0;
+		resource_size_t ep_size = 32 << 20;
+
+		/* Currently we map a fixed 64MByte window to PLB address
+		 * 0 (SDRAM). This should probably be configurable via a dts
+		 * property.
+		 */
+
+		/* Calculate window size */
+		sa = (0xffffffffffffffffull << ilog2(ep_size));
+
+		/* Setup BAR0 */
+		out_le32(mbase + PECFG_BAR0HMPA, RES_TO_U32_HIGH(sa));
+		out_le32(mbase + PECFG_BAR0LMPA, RES_TO_U32_LOW(sa) |
+			 PCI_BASE_ADDRESS_MEM_TYPE_64);
+
+		/* Disable BAR1 & BAR2 */
+		out_le32(mbase + PECFG_BAR1MPA, 0);
+		out_le32(mbase + PECFG_BAR2HMPA, 0);
+		out_le32(mbase + PECFG_BAR2LMPA, 0);
+
+		out_le32(mbase + PECFG_PIM01SAH, RES_TO_U32_HIGH(sa));
+		out_le32(mbase + PECFG_PIM01SAL, RES_TO_U32_LOW(sa));
+
+		out_le32(mbase + PCI_BASE_ADDRESS_0, RES_TO_U32_LOW(ep_addr));
+		out_le32(mbase + PCI_BASE_ADDRESS_1, RES_TO_U32_HIGH(ep_addr));
+	} else {
+		/* Calculate window size */
+		sa = (0xffffffffffffffffull << ilog2(size));
+		if (res->flags & IORESOURCE_PREFETCH)
+			sa |= PCI_BASE_ADDRESS_MEM_PREFETCH;
+
+		if (of_device_is_compatible(port->node, "ibm,plb-pciex-460sx") ||
+		    of_device_is_compatible(
+			    port->node, "ibm,plb-pciex-476fpe") ||
+		    of_device_is_compatible(
+			    port->node, "ibm,plb-pciex-476gtr"))
+			sa |= PCI_BASE_ADDRESS_MEM_TYPE_64;
+
+		out_le32(mbase + PECFG_BAR0HMPA, RES_TO_U32_HIGH(sa));
+		out_le32(mbase + PECFG_BAR0LMPA, RES_TO_U32_LOW(sa));
+
+		/* The setup of the split looks weird to me ... let's see
+		 * if it works
+		 */
+		out_le32(mbase + PECFG_PIM0LAL, 0x00000000);
+		out_le32(mbase + PECFG_PIM0LAH, 0x00000000);
+		out_le32(mbase + PECFG_PIM1LAL, 0x00000000);
+		out_le32(mbase + PECFG_PIM1LAH, 0x00000000);
+		out_le32(mbase + PECFG_PIM01SAH, 0xffff0000);
+		out_le32(mbase + PECFG_PIM01SAL, 0x00000000);
+
+		out_le32(mbase + PCI_BASE_ADDRESS_0, RES_TO_U32_LOW(res->start));
+		out_le32(mbase + PCI_BASE_ADDRESS_1, RES_TO_U32_HIGH(res->start));
+	}
+
+	/* Enable inbound mapping */
+	out_le32(mbase + PECFG_PIMEN, 0x1);
+
+	/* Enable I/O, Mem, and Busmaster cycles */
+	out_le16(mbase + PCI_COMMAND,
+		 in_le16(mbase + PCI_COMMAND) |
+		 PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+}
+
+static void __init ppc4xx_pciex_port_setup_hose(struct ppc4xx_pciex_port *port)
+{
+	struct resource dma_window;
+	struct pci_controller *hose = NULL;
+	const int *bus_range;
+	int primary, busses;
+	void __iomem *mbase = NULL, *cfg_data = NULL;
+	const u32 *pval;
+	u32 val;
+
+	/* Check if primary bridge */
+	primary = of_property_read_bool(port->node, "primary");
+
+	/* Get bus range if any */
+	bus_range = of_get_property(port->node, "bus-range", NULL);
+
+	/* Allocate the host controller data structure */
+	hose = pcibios_alloc_controller(port->node);
+	if (!hose)
+		goto fail;
+
+	/* We stick the port number in "indirect_type" so the config space
+	 * ops can retrieve the port data structure easily
+	 */
+	hose->indirect_type = port->index;
+
+	/* Get bus range */
+	hose->first_busno = bus_range ? bus_range[0] : 0x0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	/* Because of how big mapping the config space is (1M per bus), we
+	 * limit how many busses we support. In the long run, we could replace
+	 * that with something akin to kmap_atomic instead. We set aside 1 bus
+	 * for the host itself too.
+	 */
+	busses = hose->last_busno - hose->first_busno; /* This is off by 1 */
+	if (busses > MAX_PCIE_BUS_MAPPED) {
+		busses = MAX_PCIE_BUS_MAPPED;
+		hose->last_busno = hose->first_busno + busses;
+	}
+
+	if (!port->endpoint) {
+		/* Only map the external config space in cfg_data for
+		 * PCIe root-complexes. External space is 1M per bus
+		 */
+		cfg_data = ioremap(port->cfg_space.start +
+				   (hose->first_busno + 1) * 0x100000,
+				   busses * 0x100000);
+		if (cfg_data == NULL) {
+			printk(KERN_ERR "%pOF: Can't map external config space !",
+			       port->node);
+			goto fail;
+		}
+		hose->cfg_data = cfg_data;
+	}
+
+	/* Always map the host config space in cfg_addr.
+	 * Internal space is 4K
+	 */
+	mbase = ioremap(port->cfg_space.start + 0x10000000, 0x1000);
+	if (mbase == NULL) {
+		printk(KERN_ERR "%pOF: Can't map internal config space !",
+		       port->node);
+		goto fail;
+	}
+	hose->cfg_addr = mbase;
+
+	pr_debug("PCIE %pOF, bus %d..%d\n", port->node,
+		 hose->first_busno, hose->last_busno);
+	pr_debug("     config space mapped at: root @0x%p, other @0x%p\n",
+		 hose->cfg_addr, hose->cfg_data);
+
+	/* Setup config space */
+	hose->ops = &ppc4xx_pciex_pci_ops;
+	port->hose = hose;
+	mbase = (void __iomem *)hose->cfg_addr;
+
+	if (!port->endpoint) {
+		/*
+		 * Set bus numbers on our root port
+		 */
+		out_8(mbase + PCI_PRIMARY_BUS, hose->first_busno);
+		out_8(mbase + PCI_SECONDARY_BUS, hose->first_busno + 1);
+		out_8(mbase + PCI_SUBORDINATE_BUS, hose->last_busno);
+	}
+
+	/*
+	 * OMRs are already reset, also disable PIMs
+	 */
+	out_le32(mbase + PECFG_PIMEN, 0);
+
+	/* Parse outbound mapping resources */
+	pci_process_bridge_OF_ranges(hose, port->node, primary);
+
+	/* Parse inbound mapping resources */
+	if (ppc4xx_parse_dma_ranges(hose, mbase, &dma_window) != 0)
+		goto fail;
+
+	/* Configure outbound ranges POMs */
+	ppc4xx_configure_pciex_POMs(port, hose, mbase);
+
+	/* Configure inbound ranges PIMs */
+	ppc4xx_configure_pciex_PIMs(port, hose, mbase, &dma_window);
+
+	/* The root complex doesn't show up if we don't set some vendor
+	 * and device IDs into it. The defaults below are the same bogus
+	 * one that the initial code in arch/ppc had. This can be
+	 * overwritten by setting the "vendor-id/device-id" properties
+	 * in the pciex node.
+	 */
+
+	/* Get the (optional) vendor-/device-id from the device-tree */
+	pval = of_get_property(port->node, "vendor-id", NULL);
+	if (pval) {
+		val = *pval;
+	} else {
+		if (!port->endpoint)
+			val = 0xaaa0 + port->index;
+		else
+			val = 0xeee0 + port->index;
+	}
+	out_le16(mbase + 0x200, val);
+
+	pval = of_get_property(port->node, "device-id", NULL);
+	if (pval) {
+		val = *pval;
+	} else {
+		if (!port->endpoint)
+			val = 0xbed0 + port->index;
+		else
+			val = 0xfed0 + port->index;
+	}
+	out_le16(mbase + 0x202, val);
+
+	/* Enable Bus master, memory, and io space */
+	if (of_device_is_compatible(port->node, "ibm,plb-pciex-460sx"))
+		out_le16(mbase + 0x204, 0x7);
+
+	if (!port->endpoint) {
+		/* Set Class Code to PCI-PCI bridge and Revision Id to 1 */
+		out_le32(mbase + 0x208, 0x06040001);
+
+		printk(KERN_INFO "PCIE%d: successfully set as root-complex\n",
+		       port->index);
+	} else {
+		/* Set Class Code to Processor/PPC */
+		out_le32(mbase + 0x208, 0x0b200001);
+
+		printk(KERN_INFO "PCIE%d: successfully set as endpoint\n",
+		       port->index);
+	}
+
+	return;
+ fail:
+	if (hose)
+		pcibios_free_controller(hose);
+	if (cfg_data)
+		iounmap(cfg_data);
+	if (mbase)
+		iounmap(mbase);
+}
+
+static void __init ppc4xx_probe_pciex_bridge(struct device_node *np)
+{
+	struct ppc4xx_pciex_port *port;
+	const u32 *pval;
+	int portno;
+	unsigned int dcrs;
+
+	/* First, proceed to core initialization as we assume there's
+	 * only one PCIe core in the system
+	 */
+	if (ppc4xx_pciex_check_core_init(np))
+		return;
+
+	/* Get the port number from the device-tree */
+	pval = of_get_property(np, "port", NULL);
+	if (pval == NULL) {
+		printk(KERN_ERR "PCIE: Can't find port number for %pOF\n", np);
+		return;
+	}
+	portno = *pval;
+	if (portno >= ppc4xx_pciex_port_count) {
+		printk(KERN_ERR "PCIE: port number out of range for %pOF\n",
+		       np);
+		return;
+	}
+	port = &ppc4xx_pciex_ports[portno];
+	port->index = portno;
+
+	/*
+	 * Check if device is enabled
+	 */
+	if (!of_device_is_available(np)) {
+		printk(KERN_INFO "PCIE%d: Port disabled via device-tree\n", port->index);
+		return;
+	}
+
+	port->node = of_node_get(np);
+	if (ppc4xx_pciex_hwops->want_sdr) {
+		pval = of_get_property(np, "sdr-base", NULL);
+		if (pval == NULL) {
+			printk(KERN_ERR "PCIE: missing sdr-base for %pOF\n",
+			       np);
+			return;
+		}
+		port->sdr_base = *pval;
+	}
+
+	/* Check if device_type property is set to "pci" or "pci-endpoint".
+	 * Resulting from this setup this PCIe port will be configured
+	 * as root-complex or as endpoint.
+	 */
+	if (of_node_is_type(port->node, "pci-endpoint")) {
+		port->endpoint = 1;
+	} else if (of_node_is_type(port->node, "pci")) {
+		port->endpoint = 0;
+	} else {
+		printk(KERN_ERR "PCIE: missing or incorrect device_type for %pOF\n",
+		       np);
+		return;
+	}
+
+	/* Fetch config space registers address */
+	if (of_address_to_resource(np, 0, &port->cfg_space)) {
+		printk(KERN_ERR "%pOF: Can't get PCI-E config space !", np);
+		return;
+	}
+	/* Fetch host bridge internal registers address */
+	if (of_address_to_resource(np, 1, &port->utl_regs)) {
+		printk(KERN_ERR "%pOF: Can't get UTL register base !", np);
+		return;
+	}
+
+	/* Map DCRs */
+	dcrs = dcr_resource_start(np, 0);
+	if (dcrs == 0) {
+		printk(KERN_ERR "%pOF: Can't get DCR register base !", np);
+		return;
+	}
+	port->dcrs = dcr_map(np, dcrs, dcr_resource_len(np, 0));
+
+	/* Initialize the port specific registers */
+	if (ppc4xx_pciex_port_init(port)) {
+		printk(KERN_WARNING "PCIE%d: Port init failed\n", port->index);
+		return;
+	}
+
+	/* Setup the linux hose data structure */
+	ppc4xx_pciex_port_setup_hose(port);
+}
+
+#endif /* CONFIG_PPC4xx_PCI_EXPRESS */
+
+static int __init ppc4xx_pci_find_bridges(void)
+{
+	struct device_node *np;
+
+	pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0);
+
+#ifdef CONFIG_PPC4xx_PCI_EXPRESS
+	for_each_compatible_node(np, NULL, "ibm,plb-pciex")
+		ppc4xx_probe_pciex_bridge(np);
+#endif
+	for_each_compatible_node(np, NULL, "ibm,plb-pcix")
+		ppc4xx_probe_pcix_bridge(np);
+	for_each_compatible_node(np, NULL, "ibm,plb-pci")
+		ppc4xx_probe_pci_bridge(np);
+
+	return 0;
+}
+arch_initcall(ppc4xx_pci_find_bridges);
+
diff --git a/arch/powerpc/platforms/4xx/pci.h b/arch/powerpc/platforms/4xx/pci.h
new file mode 100644
index 0000000000..bb4821938a
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/pci.h
@@ -0,0 +1,505 @@
+/*
+ * PCI / PCI-X / PCI-Express support for 4xx parts
+ *
+ * Copyright 2007 Ben. Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
+ *
+ * Bits and pieces extracted from arch/ppc support by
+ *
+ * Matt Porter <mporter@kernel.crashing.org>
+ *
+ * Copyright 2002-2005 MontaVista Software Inc.
+ */
+#ifndef __PPC4XX_PCI_H__
+#define __PPC4XX_PCI_H__
+
+/*
+ * 4xx PCI-X bridge register definitions
+ */
+#define PCIX0_VENDID		0x000
+#define PCIX0_DEVID		0x002
+#define PCIX0_COMMAND		0x004
+#define PCIX0_STATUS		0x006
+#define PCIX0_REVID		0x008
+#define PCIX0_CLS		0x009
+#define PCIX0_CACHELS		0x00c
+#define PCIX0_LATTIM		0x00d
+#define PCIX0_HDTYPE		0x00e
+#define PCIX0_BIST		0x00f
+#define PCIX0_BAR0L		0x010
+#define PCIX0_BAR0H		0x014
+#define PCIX0_BAR1		0x018
+#define PCIX0_BAR2L		0x01c
+#define PCIX0_BAR2H		0x020
+#define PCIX0_BAR3		0x024
+#define PCIX0_CISPTR		0x028
+#define PCIX0_SBSYSVID		0x02c
+#define PCIX0_SBSYSID		0x02e
+#define PCIX0_EROMBA		0x030
+#define PCIX0_CAP		0x034
+#define PCIX0_RES0		0x035
+#define PCIX0_RES1		0x036
+#define PCIX0_RES2		0x038
+#define PCIX0_INTLN		0x03c
+#define PCIX0_INTPN		0x03d
+#define PCIX0_MINGNT		0x03e
+#define PCIX0_MAXLTNCY		0x03f
+#define PCIX0_BRDGOPT1		0x040
+#define PCIX0_BRDGOPT2		0x044
+#define PCIX0_ERREN		0x050
+#define PCIX0_ERRSTS		0x054
+#define PCIX0_PLBBESR		0x058
+#define PCIX0_PLBBEARL		0x05c
+#define PCIX0_PLBBEARH		0x060
+#define PCIX0_POM0LAL		0x068
+#define PCIX0_POM0LAH		0x06c
+#define PCIX0_POM0SA		0x070
+#define PCIX0_POM0PCIAL		0x074
+#define PCIX0_POM0PCIAH		0x078
+#define PCIX0_POM1LAL		0x07c
+#define PCIX0_POM1LAH		0x080
+#define PCIX0_POM1SA		0x084
+#define PCIX0_POM1PCIAL		0x088
+#define PCIX0_POM1PCIAH		0x08c
+#define PCIX0_POM2SA		0x090
+#define PCIX0_PIM0SAL		0x098
+#define PCIX0_PIM0SA		PCIX0_PIM0SAL
+#define PCIX0_PIM0LAL		0x09c
+#define PCIX0_PIM0LAH		0x0a0
+#define PCIX0_PIM1SA		0x0a4
+#define PCIX0_PIM1LAL		0x0a8
+#define PCIX0_PIM1LAH		0x0ac
+#define PCIX0_PIM2SAL		0x0b0
+#define PCIX0_PIM2SA		PCIX0_PIM2SAL
+#define PCIX0_PIM2LAL		0x0b4
+#define PCIX0_PIM2LAH		0x0b8
+#define PCIX0_OMCAPID		0x0c0
+#define PCIX0_OMNIPTR		0x0c1
+#define PCIX0_OMMC		0x0c2
+#define PCIX0_OMMA		0x0c4
+#define PCIX0_OMMUA		0x0c8
+#define PCIX0_OMMDATA		0x0cc
+#define PCIX0_OMMEOI		0x0ce
+#define PCIX0_PMCAPID		0x0d0
+#define PCIX0_PMNIPTR		0x0d1
+#define PCIX0_PMC		0x0d2
+#define PCIX0_PMCSR		0x0d4
+#define PCIX0_PMCSRBSE		0x0d6
+#define PCIX0_PMDATA		0x0d7
+#define PCIX0_PMSCRR		0x0d8
+#define PCIX0_CAPID		0x0dc
+#define PCIX0_NIPTR		0x0dd
+#define PCIX0_CMD		0x0de
+#define PCIX0_STS		0x0e0
+#define PCIX0_IDR		0x0e4
+#define PCIX0_CID		0x0e8
+#define PCIX0_RID		0x0ec
+#define PCIX0_PIM0SAH		0x0f8
+#define PCIX0_PIM2SAH		0x0fc
+#define PCIX0_MSGIL		0x100
+#define PCIX0_MSGIH		0x104
+#define PCIX0_MSGOL		0x108
+#define PCIX0_MSGOH		0x10c
+#define PCIX0_IM		0x1f8
+
+/*
+ * 4xx PCI bridge register definitions
+ */
+#define PCIL0_PMM0LA		0x00
+#define PCIL0_PMM0MA		0x04
+#define PCIL0_PMM0PCILA		0x08
+#define PCIL0_PMM0PCIHA		0x0c
+#define PCIL0_PMM1LA		0x10
+#define PCIL0_PMM1MA		0x14
+#define PCIL0_PMM1PCILA		0x18
+#define PCIL0_PMM1PCIHA		0x1c
+#define PCIL0_PMM2LA		0x20
+#define PCIL0_PMM2MA		0x24
+#define PCIL0_PMM2PCILA		0x28
+#define PCIL0_PMM2PCIHA		0x2c
+#define PCIL0_PTM1MS		0x30
+#define PCIL0_PTM1LA		0x34
+#define PCIL0_PTM2MS		0x38
+#define PCIL0_PTM2LA		0x3c
+
+/*
+ * 4xx PCIe bridge register definitions
+ */
+
+/* DCR offsets */
+#define DCRO_PEGPL_CFGBAH		0x00
+#define DCRO_PEGPL_CFGBAL		0x01
+#define DCRO_PEGPL_CFGMSK		0x02
+#define DCRO_PEGPL_MSGBAH		0x03
+#define DCRO_PEGPL_MSGBAL		0x04
+#define DCRO_PEGPL_MSGMSK		0x05
+#define DCRO_PEGPL_OMR1BAH		0x06
+#define DCRO_PEGPL_OMR1BAL		0x07
+#define DCRO_PEGPL_OMR1MSKH		0x08
+#define DCRO_PEGPL_OMR1MSKL		0x09
+#define DCRO_PEGPL_OMR2BAH		0x0a
+#define DCRO_PEGPL_OMR2BAL		0x0b
+#define DCRO_PEGPL_OMR2MSKH		0x0c
+#define DCRO_PEGPL_OMR2MSKL		0x0d
+#define DCRO_PEGPL_OMR3BAH		0x0e
+#define DCRO_PEGPL_OMR3BAL		0x0f
+#define DCRO_PEGPL_OMR3MSKH		0x10
+#define DCRO_PEGPL_OMR3MSKL		0x11
+#define DCRO_PEGPL_REGBAH		0x12
+#define DCRO_PEGPL_REGBAL		0x13
+#define DCRO_PEGPL_REGMSK		0x14
+#define DCRO_PEGPL_SPECIAL		0x15
+#define DCRO_PEGPL_CFG			0x16
+#define DCRO_PEGPL_ESR			0x17
+#define DCRO_PEGPL_EARH			0x18
+#define DCRO_PEGPL_EARL			0x19
+#define DCRO_PEGPL_EATR			0x1a
+
+/* DMER mask */
+#define GPL_DMER_MASK_DISA	0x02000000
+
+/*
+ * System DCRs (SDRs)
+ */
+#define PESDR0_PLLLCT1			0x03a0
+#define PESDR0_PLLLCT2			0x03a1
+#define PESDR0_PLLLCT3			0x03a2
+
+/*
+ * 440SPe additional DCRs
+ */
+#define PESDR0_440SPE_UTLSET1		0x0300
+#define PESDR0_440SPE_UTLSET2		0x0301
+#define PESDR0_440SPE_DLPSET		0x0302
+#define PESDR0_440SPE_LOOP		0x0303
+#define PESDR0_440SPE_RCSSET		0x0304
+#define PESDR0_440SPE_RCSSTS		0x0305
+#define PESDR0_440SPE_HSSL0SET1		0x0306
+#define PESDR0_440SPE_HSSL0SET2		0x0307
+#define PESDR0_440SPE_HSSL0STS		0x0308
+#define PESDR0_440SPE_HSSL1SET1		0x0309
+#define PESDR0_440SPE_HSSL1SET2		0x030a
+#define PESDR0_440SPE_HSSL1STS		0x030b
+#define PESDR0_440SPE_HSSL2SET1		0x030c
+#define PESDR0_440SPE_HSSL2SET2		0x030d
+#define PESDR0_440SPE_HSSL2STS		0x030e
+#define PESDR0_440SPE_HSSL3SET1		0x030f
+#define PESDR0_440SPE_HSSL3SET2		0x0310
+#define PESDR0_440SPE_HSSL3STS		0x0311
+#define PESDR0_440SPE_HSSL4SET1		0x0312
+#define PESDR0_440SPE_HSSL4SET2		0x0313
+#define PESDR0_440SPE_HSSL4STS	       	0x0314
+#define PESDR0_440SPE_HSSL5SET1		0x0315
+#define PESDR0_440SPE_HSSL5SET2		0x0316
+#define PESDR0_440SPE_HSSL5STS		0x0317
+#define PESDR0_440SPE_HSSL6SET1		0x0318
+#define PESDR0_440SPE_HSSL6SET2		0x0319
+#define PESDR0_440SPE_HSSL6STS		0x031a
+#define PESDR0_440SPE_HSSL7SET1		0x031b
+#define PESDR0_440SPE_HSSL7SET2		0x031c
+#define PESDR0_440SPE_HSSL7STS		0x031d
+#define PESDR0_440SPE_HSSCTLSET		0x031e
+#define PESDR0_440SPE_LANE_ABCD		0x031f
+#define PESDR0_440SPE_LANE_EFGH		0x0320
+
+#define PESDR1_440SPE_UTLSET1		0x0340
+#define PESDR1_440SPE_UTLSET2		0x0341
+#define PESDR1_440SPE_DLPSET		0x0342
+#define PESDR1_440SPE_LOOP		0x0343
+#define PESDR1_440SPE_RCSSET		0x0344
+#define PESDR1_440SPE_RCSSTS		0x0345
+#define PESDR1_440SPE_HSSL0SET1		0x0346
+#define PESDR1_440SPE_HSSL0SET2		0x0347
+#define PESDR1_440SPE_HSSL0STS		0x0348
+#define PESDR1_440SPE_HSSL1SET1		0x0349
+#define PESDR1_440SPE_HSSL1SET2		0x034a
+#define PESDR1_440SPE_HSSL1STS		0x034b
+#define PESDR1_440SPE_HSSL2SET1		0x034c
+#define PESDR1_440SPE_HSSL2SET2		0x034d
+#define PESDR1_440SPE_HSSL2STS		0x034e
+#define PESDR1_440SPE_HSSL3SET1		0x034f
+#define PESDR1_440SPE_HSSL3SET2		0x0350
+#define PESDR1_440SPE_HSSL3STS		0x0351
+#define PESDR1_440SPE_HSSCTLSET		0x0352
+#define PESDR1_440SPE_LANE_ABCD		0x0353
+
+#define PESDR2_440SPE_UTLSET1		0x0370
+#define PESDR2_440SPE_UTLSET2		0x0371
+#define PESDR2_440SPE_DLPSET		0x0372
+#define PESDR2_440SPE_LOOP		0x0373
+#define PESDR2_440SPE_RCSSET		0x0374
+#define PESDR2_440SPE_RCSSTS		0x0375
+#define PESDR2_440SPE_HSSL0SET1		0x0376
+#define PESDR2_440SPE_HSSL0SET2		0x0377
+#define PESDR2_440SPE_HSSL0STS		0x0378
+#define PESDR2_440SPE_HSSL1SET1		0x0379
+#define PESDR2_440SPE_HSSL1SET2		0x037a
+#define PESDR2_440SPE_HSSL1STS		0x037b
+#define PESDR2_440SPE_HSSL2SET1		0x037c
+#define PESDR2_440SPE_HSSL2SET2		0x037d
+#define PESDR2_440SPE_HSSL2STS		0x037e
+#define PESDR2_440SPE_HSSL3SET1		0x037f
+#define PESDR2_440SPE_HSSL3SET2		0x0380
+#define PESDR2_440SPE_HSSL3STS		0x0381
+#define PESDR2_440SPE_HSSCTLSET		0x0382
+#define PESDR2_440SPE_LANE_ABCD		0x0383
+
+/*
+ * 405EX additional DCRs
+ */
+#define PESDR0_405EX_UTLSET1		0x0400
+#define PESDR0_405EX_UTLSET2		0x0401
+#define PESDR0_405EX_DLPSET		0x0402
+#define PESDR0_405EX_LOOP		0x0403
+#define PESDR0_405EX_RCSSET		0x0404
+#define PESDR0_405EX_RCSSTS		0x0405
+#define PESDR0_405EX_PHYSET1		0x0406
+#define PESDR0_405EX_PHYSET2		0x0407
+#define PESDR0_405EX_BIST		0x0408
+#define PESDR0_405EX_LPB		0x040B
+#define PESDR0_405EX_PHYSTA		0x040C
+
+#define PESDR1_405EX_UTLSET1		0x0440
+#define PESDR1_405EX_UTLSET2		0x0441
+#define PESDR1_405EX_DLPSET		0x0442
+#define PESDR1_405EX_LOOP		0x0443
+#define PESDR1_405EX_RCSSET		0x0444
+#define PESDR1_405EX_RCSSTS		0x0445
+#define PESDR1_405EX_PHYSET1		0x0446
+#define PESDR1_405EX_PHYSET2		0x0447
+#define PESDR1_405EX_BIST		0x0448
+#define PESDR1_405EX_LPB		0x044B
+#define PESDR1_405EX_PHYSTA		0x044C
+
+/*
+ * 460EX additional DCRs
+ */
+#define PESDR0_460EX_L0BIST		0x0308
+#define PESDR0_460EX_L0BISTSTS		0x0309
+#define PESDR0_460EX_L0CDRCTL		0x030A
+#define PESDR0_460EX_L0DRV		0x030B
+#define PESDR0_460EX_L0REC		0x030C
+#define PESDR0_460EX_L0LPB		0x030D
+#define PESDR0_460EX_L0CLK		0x030E
+#define PESDR0_460EX_PHY_CTL_RST	0x030F
+#define PESDR0_460EX_RSTSTA		0x0310
+#define PESDR0_460EX_OBS		0x0311
+#define PESDR0_460EX_L0ERRC		0x0320
+
+#define PESDR1_460EX_L0BIST		0x0348
+#define PESDR1_460EX_L1BIST		0x0349
+#define PESDR1_460EX_L2BIST		0x034A
+#define PESDR1_460EX_L3BIST		0x034B
+#define PESDR1_460EX_L0BISTSTS		0x034C
+#define PESDR1_460EX_L1BISTSTS		0x034D
+#define PESDR1_460EX_L2BISTSTS		0x034E
+#define PESDR1_460EX_L3BISTSTS		0x034F
+#define PESDR1_460EX_L0CDRCTL		0x0350
+#define PESDR1_460EX_L1CDRCTL		0x0351
+#define PESDR1_460EX_L2CDRCTL		0x0352
+#define PESDR1_460EX_L3CDRCTL		0x0353
+#define PESDR1_460EX_L0DRV		0x0354
+#define PESDR1_460EX_L1DRV		0x0355
+#define PESDR1_460EX_L2DRV		0x0356
+#define PESDR1_460EX_L3DRV		0x0357
+#define PESDR1_460EX_L0REC		0x0358
+#define PESDR1_460EX_L1REC		0x0359
+#define PESDR1_460EX_L2REC		0x035A
+#define PESDR1_460EX_L3REC		0x035B
+#define PESDR1_460EX_L0LPB		0x035C
+#define PESDR1_460EX_L1LPB		0x035D
+#define PESDR1_460EX_L2LPB		0x035E
+#define PESDR1_460EX_L3LPB		0x035F
+#define PESDR1_460EX_L0CLK		0x0360
+#define PESDR1_460EX_L1CLK		0x0361
+#define PESDR1_460EX_L2CLK		0x0362
+#define PESDR1_460EX_L3CLK		0x0363
+#define PESDR1_460EX_PHY_CTL_RST	0x0364
+#define PESDR1_460EX_RSTSTA		0x0365
+#define PESDR1_460EX_OBS		0x0366
+#define PESDR1_460EX_L0ERRC		0x0368
+#define PESDR1_460EX_L1ERRC		0x0369
+#define PESDR1_460EX_L2ERRC		0x036A
+#define PESDR1_460EX_L3ERRC		0x036B
+#define PESDR0_460EX_IHS1		0x036C
+#define PESDR0_460EX_IHS2		0x036D
+
+/*
+ * 460SX additional DCRs
+ */
+#define PESDRn_460SX_RCEI		0x02
+
+#define PESDR0_460SX_HSSL0DAMP		0x320
+#define PESDR0_460SX_HSSL1DAMP		0x321
+#define PESDR0_460SX_HSSL2DAMP		0x322
+#define PESDR0_460SX_HSSL3DAMP		0x323
+#define PESDR0_460SX_HSSL4DAMP		0x324
+#define PESDR0_460SX_HSSL5DAMP		0x325
+#define PESDR0_460SX_HSSL6DAMP		0x326
+#define PESDR0_460SX_HSSL7DAMP		0x327
+
+#define PESDR1_460SX_HSSL0DAMP		0x354
+#define PESDR1_460SX_HSSL1DAMP		0x355
+#define PESDR1_460SX_HSSL2DAMP		0x356
+#define PESDR1_460SX_HSSL3DAMP		0x357
+
+#define PESDR2_460SX_HSSL0DAMP		0x384
+#define PESDR2_460SX_HSSL1DAMP		0x385
+#define PESDR2_460SX_HSSL2DAMP		0x386
+#define PESDR2_460SX_HSSL3DAMP		0x387
+
+#define PESDR0_460SX_HSSL0COEFA		0x328
+#define PESDR0_460SX_HSSL1COEFA		0x329
+#define PESDR0_460SX_HSSL2COEFA		0x32A
+#define PESDR0_460SX_HSSL3COEFA		0x32B
+#define PESDR0_460SX_HSSL4COEFA		0x32C
+#define PESDR0_460SX_HSSL5COEFA		0x32D
+#define PESDR0_460SX_HSSL6COEFA		0x32E
+#define PESDR0_460SX_HSSL7COEFA		0x32F
+
+#define PESDR1_460SX_HSSL0COEFA		0x358
+#define PESDR1_460SX_HSSL1COEFA		0x359
+#define PESDR1_460SX_HSSL2COEFA		0x35A
+#define PESDR1_460SX_HSSL3COEFA		0x35B
+
+#define PESDR2_460SX_HSSL0COEFA		0x388
+#define PESDR2_460SX_HSSL1COEFA		0x389
+#define PESDR2_460SX_HSSL2COEFA		0x38A
+#define PESDR2_460SX_HSSL3COEFA		0x38B
+
+#define PESDR0_460SX_HSSL1CALDRV	0x339
+#define PESDR1_460SX_HSSL1CALDRV	0x361
+#define PESDR2_460SX_HSSL1CALDRV	0x391
+
+#define PESDR0_460SX_HSSSLEW		0x338
+#define PESDR1_460SX_HSSSLEW		0x360
+#define PESDR2_460SX_HSSSLEW		0x390
+
+#define PESDR0_460SX_HSSCTLSET		0x31E
+#define PESDR1_460SX_HSSCTLSET		0x352
+#define PESDR2_460SX_HSSCTLSET		0x382
+
+#define PESDR0_460SX_RCSSET		0x304
+#define PESDR1_460SX_RCSSET		0x344
+#define PESDR2_460SX_RCSSET		0x374
+/*
+ * Of the above, some are common offsets from the base
+ */
+#define PESDRn_UTLSET1			0x00
+#define PESDRn_UTLSET2			0x01
+#define PESDRn_DLPSET			0x02
+#define PESDRn_LOOP			0x03
+#define PESDRn_RCSSET			0x04
+#define PESDRn_RCSSTS			0x05
+
+/* 440spe only */
+#define PESDRn_440SPE_HSSL0SET1		0x06
+#define PESDRn_440SPE_HSSL0SET2		0x07
+#define PESDRn_440SPE_HSSL0STS		0x08
+#define PESDRn_440SPE_HSSL1SET1		0x09
+#define PESDRn_440SPE_HSSL1SET2		0x0a
+#define PESDRn_440SPE_HSSL1STS		0x0b
+#define PESDRn_440SPE_HSSL2SET1		0x0c
+#define PESDRn_440SPE_HSSL2SET2		0x0d
+#define PESDRn_440SPE_HSSL2STS		0x0e
+#define PESDRn_440SPE_HSSL3SET1		0x0f
+#define PESDRn_440SPE_HSSL3SET2		0x10
+#define PESDRn_440SPE_HSSL3STS		0x11
+
+/* 440spe port 0 only */
+#define PESDRn_440SPE_HSSL4SET1		0x12
+#define PESDRn_440SPE_HSSL4SET2		0x13
+#define PESDRn_440SPE_HSSL4STS	       	0x14
+#define PESDRn_440SPE_HSSL5SET1		0x15
+#define PESDRn_440SPE_HSSL5SET2		0x16
+#define PESDRn_440SPE_HSSL5STS		0x17
+#define PESDRn_440SPE_HSSL6SET1		0x18
+#define PESDRn_440SPE_HSSL6SET2		0x19
+#define PESDRn_440SPE_HSSL6STS		0x1a
+#define PESDRn_440SPE_HSSL7SET1		0x1b
+#define PESDRn_440SPE_HSSL7SET2		0x1c
+#define PESDRn_440SPE_HSSL7STS		0x1d
+
+/* 405ex only */
+#define PESDRn_405EX_PHYSET1		0x06
+#define PESDRn_405EX_PHYSET2		0x07
+#define PESDRn_405EX_PHYSTA		0x0c
+
+/*
+ * UTL register offsets
+ */
+#define PEUTL_PBCTL		0x00
+#define PEUTL_PBBSZ		0x20
+#define PEUTL_OPDBSZ		0x68
+#define PEUTL_IPHBSZ		0x70
+#define PEUTL_IPDBSZ		0x78
+#define PEUTL_OUTTR		0x90
+#define PEUTL_INTR		0x98
+#define PEUTL_PCTL		0xa0
+#define PEUTL_RCSTA		0xB0
+#define PEUTL_RCIRQEN		0xb8
+
+/*
+ * Config space register offsets
+ */
+#define PECFG_ECRTCTL		0x074
+
+#define PECFG_BAR0LMPA		0x210
+#define PECFG_BAR0HMPA		0x214
+#define PECFG_BAR1MPA		0x218
+#define PECFG_BAR2LMPA		0x220
+#define PECFG_BAR2HMPA		0x224
+
+#define PECFG_PIMEN		0x33c
+#define PECFG_PIM0LAL		0x340
+#define PECFG_PIM0LAH		0x344
+#define PECFG_PIM1LAL		0x348
+#define PECFG_PIM1LAH		0x34c
+#define PECFG_PIM01SAL		0x350
+#define PECFG_PIM01SAH		0x354
+
+#define PECFG_POM0LAL		0x380
+#define PECFG_POM0LAH		0x384
+#define PECFG_POM1LAL		0x388
+#define PECFG_POM1LAH		0x38c
+#define PECFG_POM2LAL		0x390
+#define PECFG_POM2LAH		0x394
+
+/* 460sx only */
+#define PECFG_460SX_DLLSTA     0x3f8
+
+/* 460sx Bit Mappings */
+#define PECFG_460SX_DLLSTA_LINKUP	 0x00000010
+#define DCRO_PEGPL_460SX_OMR1MSKL_UOT	 0x00000004
+
+/* PEGPL Bit Mappings */
+#define DCRO_PEGPL_OMRxMSKL_VAL	 0x00000001
+#define DCRO_PEGPL_OMR1MSKL_UOT	 0x00000002
+#define DCRO_PEGPL_OMR3MSKL_IO	 0x00000002
+
+/* 476FPE */
+#define PCCFG_LCPA			0x270
+#define PECFG_TLDLP			0x3F8
+#define PECFG_TLDLP_LNKUP		0x00000008
+#define PECFG_TLDLP_PRESENT		0x00000010
+#define DCRO_PEGPL_476FPE_OMR1MSKL_UOT	 0x00000004
+
+/* SDR Bit Mappings */
+#define PESDRx_RCSSET_HLDPLB	0x10000000
+#define PESDRx_RCSSET_RSTGU	0x01000000
+#define PESDRx_RCSSET_RDY       0x00100000
+#define PESDRx_RCSSET_RSTDL     0x00010000
+#define PESDRx_RCSSET_RSTPYN    0x00001000
+
+enum
+{
+	PTYPE_ENDPOINT		= 0x0,
+	PTYPE_LEGACY_ENDPOINT	= 0x1,
+	PTYPE_ROOT_PORT		= 0x4,
+
+	LNKW_X1			= 0x1,
+	LNKW_X4			= 0x4,
+	LNKW_X8			= 0x8
+};
+
+
+#endif /* __PPC4XX_PCI_H__ */
diff --git a/arch/powerpc/platforms/4xx/soc.c b/arch/powerpc/platforms/4xx/soc.c
new file mode 100644
index 0000000000..b2d940437a
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/soc.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IBM/AMCC PPC4xx SoC setup code
+ *
+ * Copyright 2008 DENX Software Engineering, Stefan Roese <sr@denx.de>
+ *
+ * L2 cache routines cloned from arch/ppc/syslib/ibm440gx_common.c which is:
+ *   Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ *   Copyright (c) 2003 - 2006 Zultys Technologies
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include <asm/reg.h>
+#include <asm/ppc4xx.h>
+
+static u32 dcrbase_l2c;
+
+/*
+ * L2-cache
+ */
+
+/* Issue L2C diagnostic command */
+static inline u32 l2c_diag(u32 addr)
+{
+	mtdcr(dcrbase_l2c + DCRN_L2C0_ADDR, addr);
+	mtdcr(dcrbase_l2c + DCRN_L2C0_CMD, L2C_CMD_DIAG);
+	while (!(mfdcr(dcrbase_l2c + DCRN_L2C0_SR) & L2C_SR_CC))
+		;
+
+	return mfdcr(dcrbase_l2c + DCRN_L2C0_DATA);
+}
+
+static irqreturn_t l2c_error_handler(int irq, void *dev)
+{
+	u32 sr = mfdcr(dcrbase_l2c + DCRN_L2C0_SR);
+
+	if (sr & L2C_SR_CPE) {
+		/* Read cache trapped address */
+		u32 addr = l2c_diag(0x42000000);
+		printk(KERN_EMERG "L2C: Cache Parity Error, addr[16:26] = 0x%08x\n",
+		       addr);
+	}
+	if (sr & L2C_SR_TPE) {
+		/* Read tag trapped address */
+		u32 addr = l2c_diag(0x82000000) >> 16;
+		printk(KERN_EMERG "L2C: Tag Parity Error, addr[16:26] = 0x%08x\n",
+		       addr);
+	}
+
+	/* Clear parity errors */
+	if (sr & (L2C_SR_CPE | L2C_SR_TPE)){
+		mtdcr(dcrbase_l2c + DCRN_L2C0_ADDR, 0);
+		mtdcr(dcrbase_l2c + DCRN_L2C0_CMD, L2C_CMD_CCP | L2C_CMD_CTE);
+	} else {
+		printk(KERN_EMERG "L2C: LRU error\n");
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int __init ppc4xx_l2c_probe(void)
+{
+	struct device_node *np;
+	u32 r;
+	unsigned long flags;
+	int irq;
+	const u32 *dcrreg;
+	u32 dcrbase_isram;
+	int len;
+	const u32 *prop;
+	u32 l2_size;
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,l2-cache");
+	if (!np)
+		return 0;
+
+	/* Get l2 cache size */
+	prop = of_get_property(np, "cache-size", NULL);
+	if (prop == NULL) {
+		printk(KERN_ERR "%pOF: Can't get cache-size!\n", np);
+		of_node_put(np);
+		return -ENODEV;
+	}
+	l2_size = prop[0];
+
+	/* Map DCRs */
+	dcrreg = of_get_property(np, "dcr-reg", &len);
+	if (!dcrreg || (len != 4 * sizeof(u32))) {
+		printk(KERN_ERR "%pOF: Can't get DCR register base !", np);
+		of_node_put(np);
+		return -ENODEV;
+	}
+	dcrbase_isram = dcrreg[0];
+	dcrbase_l2c = dcrreg[2];
+
+	/* Get and map irq number from device tree */
+	irq = irq_of_parse_and_map(np, 0);
+	if (!irq) {
+		printk(KERN_ERR "irq_of_parse_and_map failed\n");
+		of_node_put(np);
+		return -ENODEV;
+	}
+
+	/* Install error handler */
+	if (request_irq(irq, l2c_error_handler, 0, "L2C", 0) < 0) {
+		printk(KERN_ERR "Cannot install L2C error handler"
+		       ", cache is not enabled\n");
+		of_node_put(np);
+		return -ENODEV;
+	}
+
+	local_irq_save(flags);
+	asm volatile ("sync" ::: "memory");
+
+	/* Disable SRAM */
+	mtdcr(dcrbase_isram + DCRN_SRAM0_DPC,
+	      mfdcr(dcrbase_isram + DCRN_SRAM0_DPC) & ~SRAM_DPC_ENABLE);
+	mtdcr(dcrbase_isram + DCRN_SRAM0_SB0CR,
+	      mfdcr(dcrbase_isram + DCRN_SRAM0_SB0CR) & ~SRAM_SBCR_BU_MASK);
+	mtdcr(dcrbase_isram + DCRN_SRAM0_SB1CR,
+	      mfdcr(dcrbase_isram + DCRN_SRAM0_SB1CR) & ~SRAM_SBCR_BU_MASK);
+	mtdcr(dcrbase_isram + DCRN_SRAM0_SB2CR,
+	      mfdcr(dcrbase_isram + DCRN_SRAM0_SB2CR) & ~SRAM_SBCR_BU_MASK);
+	mtdcr(dcrbase_isram + DCRN_SRAM0_SB3CR,
+	      mfdcr(dcrbase_isram + DCRN_SRAM0_SB3CR) & ~SRAM_SBCR_BU_MASK);
+
+	/* Enable L2_MODE without ICU/DCU */
+	r = mfdcr(dcrbase_l2c + DCRN_L2C0_CFG) &
+		~(L2C_CFG_ICU | L2C_CFG_DCU | L2C_CFG_SS_MASK);
+	r |= L2C_CFG_L2M | L2C_CFG_SS_256;
+	mtdcr(dcrbase_l2c + DCRN_L2C0_CFG, r);
+
+	mtdcr(dcrbase_l2c + DCRN_L2C0_ADDR, 0);
+
+	/* Hardware Clear Command */
+	mtdcr(dcrbase_l2c + DCRN_L2C0_CMD, L2C_CMD_HCC);
+	while (!(mfdcr(dcrbase_l2c + DCRN_L2C0_SR) & L2C_SR_CC))
+		;
+
+	/* Clear Cache Parity and Tag Errors */
+	mtdcr(dcrbase_l2c + DCRN_L2C0_CMD, L2C_CMD_CCP | L2C_CMD_CTE);
+
+	/* Enable 64G snoop region starting at 0 */
+	r = mfdcr(dcrbase_l2c + DCRN_L2C0_SNP0) &
+		~(L2C_SNP_BA_MASK | L2C_SNP_SSR_MASK);
+	r |= L2C_SNP_SSR_32G | L2C_SNP_ESR;
+	mtdcr(dcrbase_l2c + DCRN_L2C0_SNP0, r);
+
+	r = mfdcr(dcrbase_l2c + DCRN_L2C0_SNP1) &
+		~(L2C_SNP_BA_MASK | L2C_SNP_SSR_MASK);
+	r |= 0x80000000 | L2C_SNP_SSR_32G | L2C_SNP_ESR;
+	mtdcr(dcrbase_l2c + DCRN_L2C0_SNP1, r);
+
+	asm volatile ("sync" ::: "memory");
+
+	/* Enable ICU/DCU ports */
+	r = mfdcr(dcrbase_l2c + DCRN_L2C0_CFG);
+	r &= ~(L2C_CFG_DCW_MASK | L2C_CFG_PMUX_MASK | L2C_CFG_PMIM
+	       | L2C_CFG_TPEI | L2C_CFG_CPEI | L2C_CFG_NAM | L2C_CFG_NBRM);
+	r |= L2C_CFG_ICU | L2C_CFG_DCU | L2C_CFG_TPC | L2C_CFG_CPC | L2C_CFG_FRAN
+		| L2C_CFG_CPIM | L2C_CFG_TPIM | L2C_CFG_LIM | L2C_CFG_SMCM;
+
+	/* Check for 460EX/GT special handling */
+	if (of_device_is_compatible(np, "ibm,l2-cache-460ex") ||
+	    of_device_is_compatible(np, "ibm,l2-cache-460gt"))
+		r |= L2C_CFG_RDBW;
+
+	mtdcr(dcrbase_l2c + DCRN_L2C0_CFG, r);
+
+	asm volatile ("sync; isync" ::: "memory");
+	local_irq_restore(flags);
+
+	printk(KERN_INFO "%dk L2-cache enabled\n", l2_size >> 10);
+
+	of_node_put(np);
+	return 0;
+}
+arch_initcall(ppc4xx_l2c_probe);
+
+/*
+ * Apply a system reset. Alternatively a board specific value may be
+ * provided via the "reset-type" property in the cpu node.
+ */
+void ppc4xx_reset_system(char *cmd)
+{
+	struct device_node *np;
+	u32 reset_type = DBCR0_RST_SYSTEM;
+	const u32 *prop;
+
+	np = of_get_cpu_node(0, NULL);
+	if (np) {
+		prop = of_get_property(np, "reset-type", NULL);
+
+		/*
+		 * Check if property exists and if it is in range:
+		 * 1 - PPC4xx core reset
+		 * 2 - PPC4xx chip reset
+		 * 3 - PPC4xx system reset (default)
+		 */
+		if ((prop) && ((prop[0] >= 1) && (prop[0] <= 3)))
+			reset_type = prop[0] << 28;
+	}
+
+	mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | reset_type);
+
+	while (1)
+		;	/* Just in case the reset doesn't work */
+}
diff --git a/arch/powerpc/platforms/4xx/uic.c b/arch/powerpc/platforms/4xx/uic.c
new file mode 100644
index 0000000000..e3e148b9dd
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/uic.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/sysdev/uic.c
+ *
+ * IBM PowerPC 4xx Universal Interrupt Controller
+ *
+ * Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/dcr.h>
+#include <asm/uic.h>
+
+#define NR_UIC_INTS	32
+
+#define UIC_SR		0x0
+#define UIC_ER		0x2
+#define UIC_CR		0x3
+#define UIC_PR		0x4
+#define UIC_TR		0x5
+#define UIC_MSR		0x6
+#define UIC_VR		0x7
+#define UIC_VCR		0x8
+
+struct uic *primary_uic;
+
+struct uic {
+	int index;
+	int dcrbase;
+
+	raw_spinlock_t lock;
+
+	/* The remapper for this UIC */
+	struct irq_domain	*irqhost;
+};
+
+static void uic_unmask_irq(struct irq_data *d)
+{
+	struct uic *uic = irq_data_get_irq_chip_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 er, sr;
+
+	sr = 1 << (31-src);
+	raw_spin_lock_irqsave(&uic->lock, flags);
+	/* ack level-triggered interrupts here */
+	if (irqd_is_level_type(d))
+		mtdcr(uic->dcrbase + UIC_SR, sr);
+	er = mfdcr(uic->dcrbase + UIC_ER);
+	er |= sr;
+	mtdcr(uic->dcrbase + UIC_ER, er);
+	raw_spin_unlock_irqrestore(&uic->lock, flags);
+}
+
+static void uic_mask_irq(struct irq_data *d)
+{
+	struct uic *uic = irq_data_get_irq_chip_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 er;
+
+	raw_spin_lock_irqsave(&uic->lock, flags);
+	er = mfdcr(uic->dcrbase + UIC_ER);
+	er &= ~(1 << (31 - src));
+	mtdcr(uic->dcrbase + UIC_ER, er);
+	raw_spin_unlock_irqrestore(&uic->lock, flags);
+}
+
+static void uic_ack_irq(struct irq_data *d)
+{
+	struct uic *uic = irq_data_get_irq_chip_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&uic->lock, flags);
+	mtdcr(uic->dcrbase + UIC_SR, 1 << (31-src));
+	raw_spin_unlock_irqrestore(&uic->lock, flags);
+}
+
+static void uic_mask_ack_irq(struct irq_data *d)
+{
+	struct uic *uic = irq_data_get_irq_chip_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 er, sr;
+
+	sr = 1 << (31-src);
+	raw_spin_lock_irqsave(&uic->lock, flags);
+	er = mfdcr(uic->dcrbase + UIC_ER);
+	er &= ~sr;
+	mtdcr(uic->dcrbase + UIC_ER, er);
+ 	/* On the UIC, acking (i.e. clearing the SR bit)
+	 * a level irq will have no effect if the interrupt
+	 * is still asserted by the device, even if
+	 * the interrupt is already masked. Therefore
+	 * we only ack the egde interrupts here, while
+	 * level interrupts are ack'ed after the actual
+	 * isr call in the uic_unmask_irq()
+	 */
+	if (!irqd_is_level_type(d))
+		mtdcr(uic->dcrbase + UIC_SR, sr);
+	raw_spin_unlock_irqrestore(&uic->lock, flags);
+}
+
+static int uic_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+	struct uic *uic = irq_data_get_irq_chip_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned long flags;
+	int trigger, polarity;
+	u32 tr, pr, mask;
+
+	switch (flow_type & IRQ_TYPE_SENSE_MASK) {
+	case IRQ_TYPE_NONE:
+		uic_mask_irq(d);
+		return 0;
+
+	case IRQ_TYPE_EDGE_RISING:
+		trigger = 1; polarity = 1;
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		trigger = 1; polarity = 0;
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+		trigger = 0; polarity = 1;
+		break;
+	case IRQ_TYPE_LEVEL_LOW:
+		trigger = 0; polarity = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	mask = ~(1 << (31 - src));
+
+	raw_spin_lock_irqsave(&uic->lock, flags);
+	tr = mfdcr(uic->dcrbase + UIC_TR);
+	pr = mfdcr(uic->dcrbase + UIC_PR);
+	tr = (tr & mask) | (trigger << (31-src));
+	pr = (pr & mask) | (polarity << (31-src));
+
+	mtdcr(uic->dcrbase + UIC_PR, pr);
+	mtdcr(uic->dcrbase + UIC_TR, tr);
+	mtdcr(uic->dcrbase + UIC_SR, ~mask);
+
+	raw_spin_unlock_irqrestore(&uic->lock, flags);
+
+	return 0;
+}
+
+static struct irq_chip uic_irq_chip = {
+	.name		= "UIC",
+	.irq_unmask	= uic_unmask_irq,
+	.irq_mask	= uic_mask_irq,
+	.irq_mask_ack	= uic_mask_ack_irq,
+	.irq_ack	= uic_ack_irq,
+	.irq_set_type	= uic_set_irq_type,
+};
+
+static int uic_host_map(struct irq_domain *h, unsigned int virq,
+			irq_hw_number_t hw)
+{
+	struct uic *uic = h->host_data;
+
+	irq_set_chip_data(virq, uic);
+	/* Despite the name, handle_level_irq() works for both level
+	 * and edge irqs on UIC.  FIXME: check this is correct */
+	irq_set_chip_and_handler(virq, &uic_irq_chip, handle_level_irq);
+
+	/* Set default irq type */
+	irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+	return 0;
+}
+
+static const struct irq_domain_ops uic_host_ops = {
+	.map	= uic_host_map,
+	.xlate	= irq_domain_xlate_twocell,
+};
+
+static void uic_irq_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct irq_data *idata = irq_desc_get_irq_data(desc);
+	struct uic *uic = irq_desc_get_handler_data(desc);
+	u32 msr;
+	int src;
+
+	raw_spin_lock(&desc->lock);
+	if (irqd_is_level_type(idata))
+		chip->irq_mask(idata);
+	else
+		chip->irq_mask_ack(idata);
+	raw_spin_unlock(&desc->lock);
+
+	msr = mfdcr(uic->dcrbase + UIC_MSR);
+	if (!msr) /* spurious interrupt */
+		goto uic_irq_ret;
+
+	src = 32 - ffs(msr);
+
+	generic_handle_domain_irq(uic->irqhost, src);
+
+uic_irq_ret:
+	raw_spin_lock(&desc->lock);
+	if (irqd_is_level_type(idata))
+		chip->irq_ack(idata);
+	if (!irqd_irq_disabled(idata) && chip->irq_unmask)
+		chip->irq_unmask(idata);
+	raw_spin_unlock(&desc->lock);
+}
+
+static struct uic * __init uic_init_one(struct device_node *node)
+{
+	struct uic *uic;
+	const u32 *indexp, *dcrreg;
+	int len;
+
+	BUG_ON(! of_device_is_compatible(node, "ibm,uic"));
+
+	uic = kzalloc(sizeof(*uic), GFP_KERNEL);
+	if (! uic)
+		return NULL; /* FIXME: panic? */
+
+	raw_spin_lock_init(&uic->lock);
+	indexp = of_get_property(node, "cell-index", &len);
+	if (!indexp || (len != sizeof(u32))) {
+		printk(KERN_ERR "uic: Device node %pOF has missing or invalid "
+		       "cell-index property\n", node);
+		return NULL;
+	}
+	uic->index = *indexp;
+
+	dcrreg = of_get_property(node, "dcr-reg", &len);
+	if (!dcrreg || (len != 2*sizeof(u32))) {
+		printk(KERN_ERR "uic: Device node %pOF has missing or invalid "
+		       "dcr-reg property\n", node);
+		return NULL;
+	}
+	uic->dcrbase = *dcrreg;
+
+	uic->irqhost = irq_domain_add_linear(node, NR_UIC_INTS, &uic_host_ops,
+					     uic);
+	if (! uic->irqhost)
+		return NULL; /* FIXME: panic? */
+
+	/* Start with all interrupts disabled, level and non-critical */
+	mtdcr(uic->dcrbase + UIC_ER, 0);
+	mtdcr(uic->dcrbase + UIC_CR, 0);
+	mtdcr(uic->dcrbase + UIC_TR, 0);
+	/* Clear any pending interrupts, in case the firmware left some */
+	mtdcr(uic->dcrbase + UIC_SR, 0xffffffff);
+
+	printk ("UIC%d (%d IRQ sources) at DCR 0x%x\n", uic->index,
+		NR_UIC_INTS, uic->dcrbase);
+
+	return uic;
+}
+
+void __init uic_init_tree(void)
+{
+	struct device_node *np;
+	struct uic *uic;
+	const u32 *interrupts;
+
+	/* First locate and initialize the top-level UIC */
+	for_each_compatible_node(np, NULL, "ibm,uic") {
+		interrupts = of_get_property(np, "interrupts", NULL);
+		if (!interrupts)
+			break;
+	}
+
+	BUG_ON(!np); /* uic_init_tree() assumes there's a UIC as the
+		      * top-level interrupt controller */
+	primary_uic = uic_init_one(np);
+	if (!primary_uic)
+		panic("Unable to initialize primary UIC %pOF\n", np);
+
+	irq_set_default_host(primary_uic->irqhost);
+	of_node_put(np);
+
+	/* The scan again for cascaded UICs */
+	for_each_compatible_node(np, NULL, "ibm,uic") {
+		interrupts = of_get_property(np, "interrupts", NULL);
+		if (interrupts) {
+			/* Secondary UIC */
+			int cascade_virq;
+
+			uic = uic_init_one(np);
+			if (! uic)
+				panic("Unable to initialize a secondary UIC %pOF\n",
+				      np);
+
+			cascade_virq = irq_of_parse_and_map(np, 0);
+
+			irq_set_handler_data(cascade_virq, uic);
+			irq_set_chained_handler(cascade_virq, uic_irq_cascade);
+
+			/* FIXME: setup critical cascade?? */
+		}
+	}
+}
+
+/* Return an interrupt vector or 0 if no interrupt is pending. */
+unsigned int uic_get_irq(void)
+{
+	u32 msr;
+	int src;
+
+	BUG_ON(! primary_uic);
+
+	msr = mfdcr(primary_uic->dcrbase + UIC_MSR);
+	src = 32 - ffs(msr);
+
+	return irq_linear_revmap(primary_uic->irqhost, src);
+}
diff --git a/arch/powerpc/platforms/512x/Kconfig b/arch/powerpc/platforms/512x/Kconfig
new file mode 100644
index 0000000000..deecede787
--- /dev/null
+++ b/arch/powerpc/platforms/512x/Kconfig
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_MPC512x
+	bool "512x-based boards"
+	depends on PPC_BOOK3S_32
+	select COMMON_CLK
+	select FSL_SOC
+	select IPIC
+	select HAVE_PCI
+	select FSL_PCI if PCI
+	select USB_EHCI_BIG_ENDIAN_MMIO if USB_EHCI_HCD
+	select USB_EHCI_BIG_ENDIAN_DESC if USB_EHCI_HCD
+
+config MPC512x_LPBFIFO
+	tristate "MPC512x LocalPlus Bus FIFO driver"
+	depends on PPC_MPC512x && MPC512X_DMA
+	help
+	  Enable support for Freescale MPC512x LocalPlus Bus FIFO (SCLPC).
+
+config MPC5121_ADS
+	bool "Freescale MPC5121E ADS"
+	depends on PPC_MPC512x
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the MPC5121E ADS board.
+
+config MPC512x_GENERIC
+	bool "Generic support for simple MPC512x based boards"
+	depends on PPC_MPC512x
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for simple MPC512x based boards
+	  which do not need custom platform specific setup.
+
+	  Compatible boards include:  Protonic LVT base boards (ZANMCU
+	  and VICVT2), Freescale MPC5125 Tower system.
+
+config PDM360NG
+	bool "ifm PDM360NG board"
+	depends on PPC_MPC512x
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the PDM360NG board.
diff --git a/arch/powerpc/platforms/512x/Makefile b/arch/powerpc/platforms/512x/Makefile
new file mode 100644
index 0000000000..2daf22ee26
--- /dev/null
+++ b/arch/powerpc/platforms/512x/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the Freescale PowerPC 512x linux kernel.
+#
+obj-$(CONFIG_COMMON_CLK)	+= clock-commonclk.o
+obj-y				+= mpc512x_shared.o
+obj-$(CONFIG_MPC5121_ADS)	+= mpc5121_ads.o mpc5121_ads_cpld.o
+obj-$(CONFIG_MPC512x_GENERIC)	+= mpc512x_generic.o
+obj-$(CONFIG_MPC512x_LPBFIFO)	+= mpc512x_lpbfifo.o
+obj-$(CONFIG_PDM360NG)		+= pdm360ng.o
diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c
new file mode 100644
index 0000000000..079cb3627e
--- /dev/null
+++ b/arch/powerpc/platforms/512x/clock-commonclk.c
@@ -0,0 +1,1224 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2013 DENX Software Engineering
+ *
+ * Gerhard Sittig, <gsi@denx.de>
+ *
+ * common clock driver support for the MPC512x platform
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/mpc5121.h>
+#include <dt-bindings/clock/mpc512x-clock.h>
+
+#include "mpc512x.h"		/* our public mpc5121_clk_init() API */
+
+/* helpers to keep the MCLK intermediates "somewhere" in our table */
+enum {
+	MCLK_IDX_MUX0,
+	MCLK_IDX_EN0,
+	MCLK_IDX_DIV0,
+	MCLK_MAX_IDX,
+};
+
+#define NR_PSCS			12
+#define NR_MSCANS		4
+#define NR_SPDIFS		1
+#define NR_OUTCLK		4
+#define NR_MCLKS		(NR_PSCS + NR_MSCANS + NR_SPDIFS + NR_OUTCLK)
+
+/* extend the public set of clocks by adding internal slots for management */
+enum {
+	/* arrange for adjacent numbers after the public set */
+	MPC512x_CLK_START_PRIVATE = MPC512x_CLK_LAST_PUBLIC,
+	/* clocks which aren't announced to the public */
+	MPC512x_CLK_DDR,
+	MPC512x_CLK_MEM,
+	MPC512x_CLK_IIM,
+	/* intermediates in div+gate combos or fractional dividers */
+	MPC512x_CLK_DDR_UG,
+	MPC512x_CLK_SDHC_x4,
+	MPC512x_CLK_SDHC_UG,
+	MPC512x_CLK_SDHC2_UG,
+	MPC512x_CLK_DIU_x4,
+	MPC512x_CLK_DIU_UG,
+	MPC512x_CLK_MBX_BUS_UG,
+	MPC512x_CLK_MBX_UG,
+	MPC512x_CLK_MBX_3D_UG,
+	MPC512x_CLK_PCI_UG,
+	MPC512x_CLK_NFC_UG,
+	MPC512x_CLK_LPC_UG,
+	MPC512x_CLK_SPDIF_TX_IN,
+	/* intermediates for the mux+gate+div+mux MCLK generation */
+	MPC512x_CLK_MCLKS_FIRST,
+	MPC512x_CLK_MCLKS_LAST = MPC512x_CLK_MCLKS_FIRST
+				+ NR_MCLKS * MCLK_MAX_IDX,
+	/* internal, symbolic spec for the number of slots */
+	MPC512x_CLK_LAST_PRIVATE,
+};
+
+/* data required for the OF clock provider registration */
+static struct clk *clks[MPC512x_CLK_LAST_PRIVATE];
+static struct clk_onecell_data clk_data;
+
+/* CCM register access */
+static struct mpc512x_ccm __iomem *clkregs;
+static DEFINE_SPINLOCK(clklock);
+
+/* SoC variants {{{ */
+
+/*
+ * tell SoC variants apart as they are rather similar yet not identical,
+ * cache the result in an enum to not repeatedly run the expensive OF test
+ *
+ * MPC5123 is an MPC5121 without the MBX graphics accelerator
+ *
+ * MPC5125 has many more differences: no MBX, no AXE, no VIU, no SPDIF,
+ * no PATA, no SATA, no PCI, two FECs (of different compatibility name),
+ * only 10 PSCs (of different compatibility name), two SDHCs, different
+ * NFC IP block, output clocks, system PLL status query, different CPMF
+ * interpretation, no CFM, different fourth PSC/CAN mux0 input -- yet
+ * those differences can get folded into this clock provider support
+ * code and don't warrant a separate highly redundant implementation
+ */
+
+static enum soc_type {
+	MPC512x_SOC_MPC5121,
+	MPC512x_SOC_MPC5123,
+	MPC512x_SOC_MPC5125,
+} soc;
+
+static void __init mpc512x_clk_determine_soc(void)
+{
+	if (of_machine_is_compatible("fsl,mpc5121")) {
+		soc = MPC512x_SOC_MPC5121;
+		return;
+	}
+	if (of_machine_is_compatible("fsl,mpc5123")) {
+		soc = MPC512x_SOC_MPC5123;
+		return;
+	}
+	if (of_machine_is_compatible("fsl,mpc5125")) {
+		soc = MPC512x_SOC_MPC5125;
+		return;
+	}
+}
+
+static bool __init soc_has_mbx(void)
+{
+	if (soc == MPC512x_SOC_MPC5121)
+		return true;
+	return false;
+}
+
+static bool __init soc_has_axe(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool __init soc_has_viu(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool __init soc_has_spdif(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool __init soc_has_pata(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool __init soc_has_sata(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool __init soc_has_pci(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool __init soc_has_fec2(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static int __init soc_max_pscnum(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return 10;
+	return 12;
+}
+
+static bool __init soc_has_sdhc2(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static bool __init soc_has_nfc_5125(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static bool __init soc_has_outclk(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static bool __init soc_has_cpmf_0_bypass(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static bool __init soc_has_mclk_mux0_canin(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+/* }}} SoC variants */
+/* common clk API wrappers {{{ */
+
+/* convenience wrappers around the common clk API */
+static inline struct clk *mpc512x_clk_fixed(const char *name, int rate)
+{
+	return clk_register_fixed_rate(NULL, name, NULL, 0, rate);
+}
+
+static inline struct clk *mpc512x_clk_factor(
+	const char *name, const char *parent_name,
+	int mul, int div)
+{
+	int clkflags;
+
+	clkflags = CLK_SET_RATE_PARENT;
+	return clk_register_fixed_factor(NULL, name, parent_name, clkflags,
+					 mul, div);
+}
+
+static inline struct clk *mpc512x_clk_divider(
+	const char *name, const char *parent_name, u8 clkflags,
+	u32 __iomem *reg, u8 pos, u8 len, int divflags)
+{
+	divflags |= CLK_DIVIDER_BIG_ENDIAN;
+	return clk_register_divider(NULL, name, parent_name, clkflags,
+				    reg, pos, len, divflags, &clklock);
+}
+
+static inline struct clk *mpc512x_clk_divtable(
+	const char *name, const char *parent_name,
+	u32 __iomem *reg, u8 pos, u8 len,
+	const struct clk_div_table *divtab)
+{
+	u8 divflags;
+
+	divflags = CLK_DIVIDER_BIG_ENDIAN;
+	return clk_register_divider_table(NULL, name, parent_name, 0,
+					  reg, pos, len, divflags,
+					  divtab, &clklock);
+}
+
+static inline struct clk *mpc512x_clk_gated(
+	const char *name, const char *parent_name,
+	u32 __iomem *reg, u8 pos)
+{
+	int clkflags;
+	u8 gateflags;
+
+	clkflags = CLK_SET_RATE_PARENT;
+	gateflags = CLK_GATE_BIG_ENDIAN;
+	return clk_register_gate(NULL, name, parent_name, clkflags,
+				 reg, pos, gateflags, &clklock);
+}
+
+static inline struct clk *mpc512x_clk_muxed(const char *name,
+	const char **parent_names, int parent_count,
+	u32 __iomem *reg, u8 pos, u8 len)
+{
+	int clkflags;
+	u8 muxflags;
+
+	clkflags = CLK_SET_RATE_PARENT;
+	muxflags = CLK_MUX_BIG_ENDIAN;
+	return clk_register_mux(NULL, name,
+				parent_names, parent_count, clkflags,
+				reg, pos, len, muxflags, &clklock);
+}
+
+/* }}} common clk API wrappers */
+
+/* helper to isolate a bit field from a register */
+static inline int get_bit_field(uint32_t __iomem *reg, uint8_t pos, uint8_t len)
+{
+	uint32_t val;
+
+	val = in_be32(reg);
+	val >>= pos;
+	val &= (1 << len) - 1;
+	return val;
+}
+
+/* get the SPMF and translate it into the "sys pll" multiplier */
+static int __init get_spmf_mult(void)
+{
+	static int spmf_to_mult[] = {
+		68, 1, 12, 16, 20, 24, 28, 32,
+		36, 40, 44, 48, 52, 56, 60, 64,
+	};
+	int spmf;
+
+	spmf = get_bit_field(&clkregs->spmr, 24, 4);
+	return spmf_to_mult[spmf];
+}
+
+/*
+ * get the SYS_DIV value and translate it into a divide factor
+ *
+ * values returned from here are a multiple of the real factor since the
+ * divide ratio is fractional
+ */
+static int __init get_sys_div_x2(void)
+{
+	static int sysdiv_code_to_x2[] = {
+		4, 5, 6, 7, 8, 9, 10, 14,
+		12, 16, 18, 22, 20, 24, 26, 30,
+		28, 32, 34, 38, 36, 40, 42, 46,
+		44, 48, 50, 54, 52, 56, 58, 62,
+		60, 64, 66,
+	};
+	int divcode;
+
+	divcode = get_bit_field(&clkregs->scfr2, 26, 6);
+	return sysdiv_code_to_x2[divcode];
+}
+
+/*
+ * get the CPMF value and translate it into a multiplier factor
+ *
+ * values returned from here are a multiple of the real factor since the
+ * multiplier ratio is fractional
+ */
+static int __init get_cpmf_mult_x2(void)
+{
+	static int cpmf_to_mult_x36[] = {
+		/* 0b000 is "times 36" */
+		72, 2, 2, 3, 4, 5, 6, 7,
+	};
+	static int cpmf_to_mult_0by[] = {
+		/* 0b000 is "bypass" */
+		2, 2, 2, 3, 4, 5, 6, 7,
+	};
+
+	int *cpmf_to_mult;
+	int cpmf;
+
+	cpmf = get_bit_field(&clkregs->spmr, 16, 4);
+	if (soc_has_cpmf_0_bypass())
+		cpmf_to_mult = cpmf_to_mult_0by;
+	else
+		cpmf_to_mult = cpmf_to_mult_x36;
+	return cpmf_to_mult[cpmf];
+}
+
+/*
+ * some of the clock dividers do scale in a linear way, yet not all of
+ * their bit combinations are legal; use a divider table to get a
+ * resulting set of applicable divider values
+ */
+
+/* applies to the IPS_DIV, and PCI_DIV values */
+static const struct clk_div_table divtab_2346[] = {
+	{ .val = 2, .div = 2, },
+	{ .val = 3, .div = 3, },
+	{ .val = 4, .div = 4, },
+	{ .val = 6, .div = 6, },
+	{ .div = 0, },
+};
+
+/* applies to the MBX_DIV, LPC_DIV, and NFC_DIV values */
+static const struct clk_div_table divtab_1234[] = {
+	{ .val = 1, .div = 1, },
+	{ .val = 2, .div = 2, },
+	{ .val = 3, .div = 3, },
+	{ .val = 4, .div = 4, },
+	{ .div = 0, },
+};
+
+static int __init get_freq_from_dt(char *propname)
+{
+	struct device_node *np;
+	const unsigned int *prop;
+	int val;
+
+	val = 0;
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-immr");
+	if (np) {
+		prop = of_get_property(np, propname, NULL);
+		if (prop)
+			val = *prop;
+	    of_node_put(np);
+	}
+	return val;
+}
+
+static void __init mpc512x_clk_preset_data(void)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(clks); i++)
+		clks[i] = ERR_PTR(-ENODEV);
+}
+
+/*
+ * - receives the "bus frequency" from the caller (that's the IPS clock
+ *   rate, the historical source of clock information)
+ * - fetches the system PLL multiplier and divider values as well as the
+ *   IPS divider value from hardware
+ * - determines the REF clock rate either from the XTAL/OSC spec (if
+ *   there is a device tree node describing the oscillator) or from the
+ *   IPS bus clock (supported for backwards compatibility, such that
+ *   setups without XTAL/OSC specs keep working)
+ * - creates the "ref" clock item in the clock tree, such that
+ *   subsequent code can create the remainder of the hierarchy (REF ->
+ *   SYS -> CSB -> IPS) from the REF clock rate and the returned mul/div
+ *   values
+ */
+static void __init mpc512x_clk_setup_ref_clock(struct device_node *np, int bus_freq,
+					int *sys_mul, int *sys_div,
+					int *ips_div)
+{
+	struct clk *osc_clk;
+	int calc_freq;
+
+	/* fetch mul/div factors from the hardware */
+	*sys_mul = get_spmf_mult();
+	*sys_mul *= 2;		/* compensate for the fractional divider */
+	*sys_div = get_sys_div_x2();
+	*ips_div = get_bit_field(&clkregs->scfr1, 23, 3);
+
+	/* lookup the oscillator clock for its rate */
+	osc_clk = of_clk_get_by_name(np, "osc");
+
+	/*
+	 * either descend from OSC to REF (and in bypassing verify the
+	 * IPS rate), or backtrack from IPS and multiplier values that
+	 * were fetched from hardware to REF and thus to the OSC value
+	 *
+	 * in either case the REF clock gets created here and the
+	 * remainder of the clock tree can get spanned from there
+	 */
+	if (!IS_ERR(osc_clk)) {
+		clks[MPC512x_CLK_REF] = mpc512x_clk_factor("ref", "osc", 1, 1);
+		calc_freq = clk_get_rate(clks[MPC512x_CLK_REF]);
+		calc_freq *= *sys_mul;
+		calc_freq /= *sys_div;
+		calc_freq /= 2;
+		calc_freq /= *ips_div;
+		if (bus_freq && calc_freq != bus_freq)
+			pr_warn("calc rate %d != OF spec %d\n",
+				calc_freq, bus_freq);
+	} else {
+		calc_freq = bus_freq;	/* start with IPS */
+		calc_freq *= *ips_div;	/* IPS -> CSB */
+		calc_freq *= 2;		/* CSB -> SYS */
+		calc_freq *= *sys_div;	/* SYS -> PLL out */
+		calc_freq /= *sys_mul;	/* PLL out -> REF == OSC */
+		clks[MPC512x_CLK_REF] = mpc512x_clk_fixed("ref", calc_freq);
+	}
+}
+
+/* MCLK helpers {{{ */
+
+/*
+ * helper code for the MCLK subtree setup
+ *
+ * the overview in section 5.2.4 of the MPC5121e Reference Manual rev4
+ * suggests that all instances of the "PSC clock generation" are equal,
+ * and that one might re-use the PSC setup for MSCAN clock generation
+ * (section 5.2.5) as well, at least the logic if not the data for
+ * description
+ *
+ * the details (starting at page 5-20) show differences in the specific
+ * inputs of the first mux stage ("can clk in", "spdif tx"), and the
+ * factual non-availability of the second mux stage (it's present yet
+ * only one input is valid)
+ *
+ * the MSCAN clock related registers (starting at page 5-35) all
+ * reference "spdif clk" at the first mux stage and don't mention any
+ * "can clk" at all, which somehow is unexpected
+ *
+ * TODO re-check the document, and clarify whether the RM is correct in
+ * the overview or in the details, and whether the difference is a
+ * clipboard induced error or results from chip revisions
+ *
+ * it turns out that the RM rev4 as of 2012-06 talks about "can" for the
+ * PSCs while RM rev3 as of 2008-10 talks about "spdif", so I guess that
+ * first a doc update is required which better reflects reality in the
+ * SoC before the implementation should follow while no questions remain
+ */
+
+/*
+ * note that this declaration raises a checkpatch warning, but
+ * it's the very data type dictated by <linux/clk-provider.h>,
+ * "fixing" this warning will break compilation
+ */
+static const char *parent_names_mux0_spdif[] = {
+	"sys", "ref", "psc-mclk-in", "spdif-tx",
+};
+
+static const char *parent_names_mux0_canin[] = {
+	"sys", "ref", "psc-mclk-in", "can-clk-in",
+};
+
+enum mclk_type {
+	MCLK_TYPE_PSC,
+	MCLK_TYPE_MSCAN,
+	MCLK_TYPE_SPDIF,
+	MCLK_TYPE_OUTCLK,
+};
+
+struct mclk_setup_data {
+	enum mclk_type type;
+	bool has_mclk1;
+	const char *name_mux0;
+	const char *name_en0;
+	const char *name_div0;
+	const char *parent_names_mux1[2];
+	const char *name_mclk;
+};
+
+#define MCLK_SETUP_DATA_PSC(id) { \
+	MCLK_TYPE_PSC, 0, \
+	"psc" #id "-mux0", \
+	"psc" #id "-en0", \
+	"psc" #id "_mclk_div", \
+	{ "psc" #id "_mclk_div", "dummy", }, \
+	"psc" #id "_mclk", \
+}
+
+#define MCLK_SETUP_DATA_MSCAN(id) { \
+	MCLK_TYPE_MSCAN, 0, \
+	"mscan" #id "-mux0", \
+	"mscan" #id "-en0", \
+	"mscan" #id "_mclk_div", \
+	{ "mscan" #id "_mclk_div", "dummy", }, \
+	"mscan" #id "_mclk", \
+}
+
+#define MCLK_SETUP_DATA_SPDIF { \
+	MCLK_TYPE_SPDIF, 1, \
+	"spdif-mux0", \
+	"spdif-en0", \
+	"spdif_mclk_div", \
+	{ "spdif_mclk_div", "spdif-rx", }, \
+	"spdif_mclk", \
+}
+
+#define MCLK_SETUP_DATA_OUTCLK(id) { \
+	MCLK_TYPE_OUTCLK, 0, \
+	"out" #id "-mux0", \
+	"out" #id "-en0", \
+	"out" #id "_mclk_div", \
+	{ "out" #id "_mclk_div", "dummy", }, \
+	"out" #id "_clk", \
+}
+
+static struct mclk_setup_data mclk_psc_data[] = {
+	MCLK_SETUP_DATA_PSC(0),
+	MCLK_SETUP_DATA_PSC(1),
+	MCLK_SETUP_DATA_PSC(2),
+	MCLK_SETUP_DATA_PSC(3),
+	MCLK_SETUP_DATA_PSC(4),
+	MCLK_SETUP_DATA_PSC(5),
+	MCLK_SETUP_DATA_PSC(6),
+	MCLK_SETUP_DATA_PSC(7),
+	MCLK_SETUP_DATA_PSC(8),
+	MCLK_SETUP_DATA_PSC(9),
+	MCLK_SETUP_DATA_PSC(10),
+	MCLK_SETUP_DATA_PSC(11),
+};
+
+static struct mclk_setup_data mclk_mscan_data[] = {
+	MCLK_SETUP_DATA_MSCAN(0),
+	MCLK_SETUP_DATA_MSCAN(1),
+	MCLK_SETUP_DATA_MSCAN(2),
+	MCLK_SETUP_DATA_MSCAN(3),
+};
+
+static struct mclk_setup_data mclk_spdif_data[] = {
+	MCLK_SETUP_DATA_SPDIF,
+};
+
+static struct mclk_setup_data mclk_outclk_data[] = {
+	MCLK_SETUP_DATA_OUTCLK(0),
+	MCLK_SETUP_DATA_OUTCLK(1),
+	MCLK_SETUP_DATA_OUTCLK(2),
+	MCLK_SETUP_DATA_OUTCLK(3),
+};
+
+/* setup the MCLK clock subtree of an individual PSC/MSCAN/SPDIF */
+static void __init mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx)
+{
+	size_t clks_idx_pub, clks_idx_int;
+	u32 __iomem *mccr_reg;	/* MCLK control register (mux, en, div) */
+	int div;
+
+	/* derive a few parameters from the component type and index */
+	switch (entry->type) {
+	case MCLK_TYPE_PSC:
+		clks_idx_pub = MPC512x_CLK_PSC0_MCLK + idx;
+		clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+			     + (idx) * MCLK_MAX_IDX;
+		mccr_reg = &clkregs->psc_ccr[idx];
+		break;
+	case MCLK_TYPE_MSCAN:
+		clks_idx_pub = MPC512x_CLK_MSCAN0_MCLK + idx;
+		clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+			     + (NR_PSCS + idx) * MCLK_MAX_IDX;
+		mccr_reg = &clkregs->mscan_ccr[idx];
+		break;
+	case MCLK_TYPE_SPDIF:
+		clks_idx_pub = MPC512x_CLK_SPDIF_MCLK;
+		clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+			     + (NR_PSCS + NR_MSCANS) * MCLK_MAX_IDX;
+		mccr_reg = &clkregs->spccr;
+		break;
+	case MCLK_TYPE_OUTCLK:
+		clks_idx_pub = MPC512x_CLK_OUT0_CLK + idx;
+		clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+			     + (NR_PSCS + NR_MSCANS + NR_SPDIFS + idx)
+			     * MCLK_MAX_IDX;
+		mccr_reg = &clkregs->out_ccr[idx];
+		break;
+	default:
+		return;
+	}
+
+	/*
+	 * this was grabbed from the PPC_CLOCK implementation, which
+	 * enforced a specific MCLK divider while the clock was gated
+	 * during setup (that's a documented hardware requirement)
+	 *
+	 * the PPC_CLOCK implementation might even have violated the
+	 * "MCLK <= IPS" constraint, the fixed divider value of 1
+	 * results in a divider of 2 and thus MCLK = SYS/2 which equals
+	 * CSB which is greater than IPS; the serial port setup may have
+	 * adjusted the divider which the clock setup might have left in
+	 * an undesirable state
+	 *
+	 * initial setup is:
+	 * - MCLK 0 from SYS
+	 * - MCLK DIV such to not exceed the IPS clock
+	 * - MCLK 0 enabled
+	 * - MCLK 1 from MCLK DIV
+	 */
+	div = clk_get_rate(clks[MPC512x_CLK_SYS]);
+	div /= clk_get_rate(clks[MPC512x_CLK_IPS]);
+	out_be32(mccr_reg, (0 << 16));
+	out_be32(mccr_reg, (0 << 16) | ((div - 1) << 17));
+	out_be32(mccr_reg, (1 << 16) | ((div - 1) << 17));
+
+	/*
+	 * create the 'struct clk' items of the MCLK's clock subtree
+	 *
+	 * note that by design we always create all nodes and won't take
+	 * shortcuts here, because
+	 * - the "internal" MCLK_DIV and MCLK_OUT signal in turn are
+	 *   selectable inputs to the CFM while those who "actually use"
+	 *   the PSC/MSCAN/SPDIF (serial drivers et al) need the MCLK
+	 *   for their bitrate
+	 * - in the absence of "aliases" for clocks we need to create
+	 *   individual 'struct clk' items for whatever might get
+	 *   referenced or looked up, even if several of those items are
+	 *   identical from the logical POV (their rate value)
+	 * - for easier future maintenance and for better reflection of
+	 *   the SoC's documentation, it appears appropriate to generate
+	 *   clock items even for those muxers which actually are NOPs
+	 *   (those with two inputs of which one is reserved)
+	 */
+	clks[clks_idx_int + MCLK_IDX_MUX0] = mpc512x_clk_muxed(
+			entry->name_mux0,
+			soc_has_mclk_mux0_canin()
+				? &parent_names_mux0_canin[0]
+				: &parent_names_mux0_spdif[0],
+			ARRAY_SIZE(parent_names_mux0_spdif),
+			mccr_reg, 14, 2);
+	clks[clks_idx_int + MCLK_IDX_EN0] = mpc512x_clk_gated(
+			entry->name_en0, entry->name_mux0,
+			mccr_reg, 16);
+	clks[clks_idx_int + MCLK_IDX_DIV0] = mpc512x_clk_divider(
+			entry->name_div0,
+			entry->name_en0, CLK_SET_RATE_GATE,
+			mccr_reg, 17, 15, 0);
+	if (entry->has_mclk1) {
+		clks[clks_idx_pub] = mpc512x_clk_muxed(
+				entry->name_mclk,
+				&entry->parent_names_mux1[0],
+				ARRAY_SIZE(entry->parent_names_mux1),
+				mccr_reg, 7, 1);
+	} else {
+		clks[clks_idx_pub] = mpc512x_clk_factor(
+				entry->name_mclk,
+				entry->parent_names_mux1[0],
+				1, 1);
+	}
+}
+
+/* }}} MCLK helpers */
+
+static void __init mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq)
+{
+	int sys_mul, sys_div, ips_div;
+	int mul, div;
+	size_t mclk_idx;
+	int freq;
+
+	/*
+	 * developer's notes:
+	 * - consider whether to handle clocks which have both gates and
+	 *   dividers via intermediates or by means of composites
+	 * - fractional dividers appear to not map well to composites
+	 *   since they can be seen as a fixed multiplier and an
+	 *   adjustable divider, while composites can only combine at
+	 *   most one of a mux, div, and gate each into one 'struct clk'
+	 *   item
+	 * - PSC/MSCAN/SPDIF clock generation OTOH already is very
+	 *   specific and cannot get mapped to composites (at least not
+	 *   a single one, maybe two of them, but then some of these
+	 *   intermediate clock signals get referenced elsewhere (e.g.
+	 *   in the clock frequency measurement, CFM) and thus need
+	 *   publicly available names
+	 * - the current source layout appropriately reflects the
+	 *   hardware setup, and it works, so it's questionable whether
+	 *   further changes will result in big enough a benefit
+	 */
+
+	/* regardless of whether XTAL/OSC exists, have REF created */
+	mpc512x_clk_setup_ref_clock(np, busfreq, &sys_mul, &sys_div, &ips_div);
+
+	/* now setup the REF -> SYS -> CSB -> IPS hierarchy */
+	clks[MPC512x_CLK_SYS] = mpc512x_clk_factor("sys", "ref",
+						   sys_mul, sys_div);
+	clks[MPC512x_CLK_CSB] = mpc512x_clk_factor("csb", "sys", 1, 2);
+	clks[MPC512x_CLK_IPS] = mpc512x_clk_divtable("ips", "csb",
+						     &clkregs->scfr1, 23, 3,
+						     divtab_2346);
+	/* now setup anything below SYS and CSB and IPS */
+
+	clks[MPC512x_CLK_DDR_UG] = mpc512x_clk_factor("ddr-ug", "sys", 1, 2);
+
+	/*
+	 * the Reference Manual discusses that for SDHC only even divide
+	 * ratios are supported because clock domain synchronization
+	 * between 'per' and 'ipg' is broken;
+	 * keep the divider's bit 0 cleared (per reset value), and only
+	 * allow to setup the divider's bits 7:1, which results in that
+	 * only even divide ratios can get configured upon rate changes;
+	 * keep the "x4" name because this bit shift hack is an internal
+	 * implementation detail, the "fractional divider with quarters"
+	 * semantics remains
+	 */
+	clks[MPC512x_CLK_SDHC_x4] = mpc512x_clk_factor("sdhc-x4", "csb", 2, 1);
+	clks[MPC512x_CLK_SDHC_UG] = mpc512x_clk_divider("sdhc-ug", "sdhc-x4", 0,
+							&clkregs->scfr2, 1, 7,
+							CLK_DIVIDER_ONE_BASED);
+	if (soc_has_sdhc2()) {
+		clks[MPC512x_CLK_SDHC2_UG] = mpc512x_clk_divider(
+				"sdhc2-ug", "sdhc-x4", 0, &clkregs->scfr2,
+				9, 7, CLK_DIVIDER_ONE_BASED);
+	}
+
+	clks[MPC512x_CLK_DIU_x4] = mpc512x_clk_factor("diu-x4", "csb", 4, 1);
+	clks[MPC512x_CLK_DIU_UG] = mpc512x_clk_divider("diu-ug", "diu-x4", 0,
+						       &clkregs->scfr1, 0, 8,
+						       CLK_DIVIDER_ONE_BASED);
+
+	/*
+	 * the "power architecture PLL" was setup from data which was
+	 * sampled from the reset config word, at this point in time the
+	 * configuration can be considered fixed and read only (i.e. no
+	 * longer adjustable, or no longer in need of adjustment), which
+	 * is why we don't register a PLL here but assume fixed factors
+	 */
+	mul = get_cpmf_mult_x2();
+	div = 2;	/* compensate for the fractional factor */
+	clks[MPC512x_CLK_E300] = mpc512x_clk_factor("e300", "csb", mul, div);
+
+	if (soc_has_mbx()) {
+		clks[MPC512x_CLK_MBX_BUS_UG] = mpc512x_clk_factor(
+				"mbx-bus-ug", "csb", 1, 2);
+		clks[MPC512x_CLK_MBX_UG] = mpc512x_clk_divtable(
+				"mbx-ug", "mbx-bus-ug", &clkregs->scfr1,
+				14, 3, divtab_1234);
+		clks[MPC512x_CLK_MBX_3D_UG] = mpc512x_clk_factor(
+				"mbx-3d-ug", "mbx-ug", 1, 1);
+	}
+	if (soc_has_pci()) {
+		clks[MPC512x_CLK_PCI_UG] = mpc512x_clk_divtable(
+				"pci-ug", "csb", &clkregs->scfr1,
+				20, 3, divtab_2346);
+	}
+	if (soc_has_nfc_5125()) {
+		/*
+		 * XXX TODO implement 5125 NFC clock setup logic,
+		 * with high/low period counters in clkregs->scfr3,
+		 * currently there are no users so it's ENOIMPL
+		 */
+		clks[MPC512x_CLK_NFC_UG] = ERR_PTR(-ENOTSUPP);
+	} else {
+		clks[MPC512x_CLK_NFC_UG] = mpc512x_clk_divtable(
+				"nfc-ug", "ips", &clkregs->scfr1,
+				8, 3, divtab_1234);
+	}
+	clks[MPC512x_CLK_LPC_UG] = mpc512x_clk_divtable("lpc-ug", "ips",
+							&clkregs->scfr1, 11, 3,
+							divtab_1234);
+
+	clks[MPC512x_CLK_LPC] = mpc512x_clk_gated("lpc", "lpc-ug",
+						  &clkregs->sccr1, 30);
+	clks[MPC512x_CLK_NFC] = mpc512x_clk_gated("nfc", "nfc-ug",
+						  &clkregs->sccr1, 29);
+	if (soc_has_pata()) {
+		clks[MPC512x_CLK_PATA] = mpc512x_clk_gated(
+				"pata", "ips", &clkregs->sccr1, 28);
+	}
+	/* for PSCs there is a "registers" gate and a bitrate MCLK subtree */
+	for (mclk_idx = 0; mclk_idx < soc_max_pscnum(); mclk_idx++) {
+		char name[12];
+		snprintf(name, sizeof(name), "psc%d", mclk_idx);
+		clks[MPC512x_CLK_PSC0 + mclk_idx] = mpc512x_clk_gated(
+				name, "ips", &clkregs->sccr1, 27 - mclk_idx);
+		mpc512x_clk_setup_mclk(&mclk_psc_data[mclk_idx], mclk_idx);
+	}
+	clks[MPC512x_CLK_PSC_FIFO] = mpc512x_clk_gated("psc-fifo", "ips",
+						       &clkregs->sccr1, 15);
+	if (soc_has_sata()) {
+		clks[MPC512x_CLK_SATA] = mpc512x_clk_gated(
+				"sata", "ips", &clkregs->sccr1, 14);
+	}
+	clks[MPC512x_CLK_FEC] = mpc512x_clk_gated("fec", "ips",
+						  &clkregs->sccr1, 13);
+	if (soc_has_pci()) {
+		clks[MPC512x_CLK_PCI] = mpc512x_clk_gated(
+				"pci", "pci-ug", &clkregs->sccr1, 11);
+	}
+	clks[MPC512x_CLK_DDR] = mpc512x_clk_gated("ddr", "ddr-ug",
+						  &clkregs->sccr1, 10);
+	if (soc_has_fec2()) {
+		clks[MPC512x_CLK_FEC2] = mpc512x_clk_gated(
+				"fec2", "ips", &clkregs->sccr1, 9);
+	}
+
+	clks[MPC512x_CLK_DIU] = mpc512x_clk_gated("diu", "diu-ug",
+						  &clkregs->sccr2, 31);
+	if (soc_has_axe()) {
+		clks[MPC512x_CLK_AXE] = mpc512x_clk_gated(
+				"axe", "csb", &clkregs->sccr2, 30);
+	}
+	clks[MPC512x_CLK_MEM] = mpc512x_clk_gated("mem", "ips",
+						  &clkregs->sccr2, 29);
+	clks[MPC512x_CLK_USB1] = mpc512x_clk_gated("usb1", "csb",
+						   &clkregs->sccr2, 28);
+	clks[MPC512x_CLK_USB2] = mpc512x_clk_gated("usb2", "csb",
+						   &clkregs->sccr2, 27);
+	clks[MPC512x_CLK_I2C] = mpc512x_clk_gated("i2c", "ips",
+						  &clkregs->sccr2, 26);
+	/* MSCAN differs from PSC with just one gate for multiple components */
+	clks[MPC512x_CLK_BDLC] = mpc512x_clk_gated("bdlc", "ips",
+						   &clkregs->sccr2, 25);
+	for (mclk_idx = 0; mclk_idx < ARRAY_SIZE(mclk_mscan_data); mclk_idx++)
+		mpc512x_clk_setup_mclk(&mclk_mscan_data[mclk_idx], mclk_idx);
+	clks[MPC512x_CLK_SDHC] = mpc512x_clk_gated("sdhc", "sdhc-ug",
+						   &clkregs->sccr2, 24);
+	/* there is only one SPDIF component, which shares MCLK support code */
+	if (soc_has_spdif()) {
+		clks[MPC512x_CLK_SPDIF] = mpc512x_clk_gated(
+				"spdif", "ips", &clkregs->sccr2, 23);
+		mpc512x_clk_setup_mclk(&mclk_spdif_data[0], 0);
+	}
+	if (soc_has_mbx()) {
+		clks[MPC512x_CLK_MBX_BUS] = mpc512x_clk_gated(
+				"mbx-bus", "mbx-bus-ug", &clkregs->sccr2, 22);
+		clks[MPC512x_CLK_MBX] = mpc512x_clk_gated(
+				"mbx", "mbx-ug", &clkregs->sccr2, 21);
+		clks[MPC512x_CLK_MBX_3D] = mpc512x_clk_gated(
+				"mbx-3d", "mbx-3d-ug", &clkregs->sccr2, 20);
+	}
+	clks[MPC512x_CLK_IIM] = mpc512x_clk_gated("iim", "csb",
+						  &clkregs->sccr2, 19);
+	if (soc_has_viu()) {
+		clks[MPC512x_CLK_VIU] = mpc512x_clk_gated(
+				"viu", "csb", &clkregs->sccr2, 18);
+	}
+	if (soc_has_sdhc2()) {
+		clks[MPC512x_CLK_SDHC2] = mpc512x_clk_gated(
+				"sdhc-2", "sdhc2-ug", &clkregs->sccr2, 17);
+	}
+
+	if (soc_has_outclk()) {
+		size_t idx;	/* used as mclk_idx, just to trim line length */
+		for (idx = 0; idx < ARRAY_SIZE(mclk_outclk_data); idx++)
+			mpc512x_clk_setup_mclk(&mclk_outclk_data[idx], idx);
+	}
+
+	/*
+	 * externally provided clocks (when implemented in hardware,
+	 * device tree may specify values which otherwise were unknown)
+	 */
+	freq = get_freq_from_dt("psc_mclk_in");
+	if (!freq)
+		freq = 25000000;
+	clks[MPC512x_CLK_PSC_MCLK_IN] = mpc512x_clk_fixed("psc_mclk_in", freq);
+	if (soc_has_mclk_mux0_canin()) {
+		freq = get_freq_from_dt("can_clk_in");
+		clks[MPC512x_CLK_CAN_CLK_IN] = mpc512x_clk_fixed(
+				"can_clk_in", freq);
+	} else {
+		freq = get_freq_from_dt("spdif_tx_in");
+		clks[MPC512x_CLK_SPDIF_TX_IN] = mpc512x_clk_fixed(
+				"spdif_tx_in", freq);
+		freq = get_freq_from_dt("spdif_rx_in");
+		clks[MPC512x_CLK_SPDIF_TX_IN] = mpc512x_clk_fixed(
+				"spdif_rx_in", freq);
+	}
+
+	/* fixed frequency for AC97, always 24.567MHz */
+	clks[MPC512x_CLK_AC97] = mpc512x_clk_fixed("ac97", 24567000);
+
+	/*
+	 * pre-enable those "internal" clock items which never get
+	 * claimed by any peripheral driver, to not have the clock
+	 * subsystem disable them late at startup
+	 */
+	clk_prepare_enable(clks[MPC512x_CLK_DUMMY]);
+	clk_prepare_enable(clks[MPC512x_CLK_E300]);	/* PowerPC CPU */
+	clk_prepare_enable(clks[MPC512x_CLK_DDR]);	/* DRAM */
+	clk_prepare_enable(clks[MPC512x_CLK_MEM]);	/* SRAM */
+	clk_prepare_enable(clks[MPC512x_CLK_IPS]);	/* SoC periph */
+	clk_prepare_enable(clks[MPC512x_CLK_LPC]);	/* boot media */
+}
+
+/*
+ * registers the set of public clocks (those listed in the dt-bindings/
+ * header file) for OF lookups, keeps the intermediates private to us
+ */
+static void __init mpc5121_clk_register_of_provider(struct device_node *np)
+{
+	clk_data.clks = clks;
+	clk_data.clk_num = MPC512x_CLK_LAST_PUBLIC + 1;	/* _not_ ARRAY_SIZE() */
+	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
+}
+
+/*
+ * temporary support for the period of time between introduction of CCF
+ * support and the adjustment of peripheral drivers to OF based lookups
+ */
+static void __init mpc5121_clk_provide_migration_support(void)
+{
+	struct device_node *np;
+	/*
+	 * pre-enable those clock items which are not yet appropriately
+	 * acquired by their peripheral driver
+	 *
+	 * the PCI clock cannot get acquired by its peripheral driver,
+	 * because for this platform the driver won't probe(), instead
+	 * initialization is done from within the .setup_arch() routine
+	 * at a point in time where the clock provider has not been
+	 * setup yet and thus isn't available yet
+	 *
+	 * so we "pre-enable" the clock here, to not have the clock
+	 * subsystem automatically disable this item in a late init call
+	 *
+	 * this PCI clock pre-enable workaround only applies when there
+	 * are device tree nodes for PCI and thus the peripheral driver
+	 * has attached to bridges, otherwise the PCI clock remains
+	 * unused and so it gets disabled
+	 */
+	clk_prepare_enable(clks[MPC512x_CLK_PSC3_MCLK]);/* serial console */
+	np = of_find_compatible_node(NULL, "pci", "fsl,mpc5121-pci");
+	of_node_put(np);
+	if (np)
+		clk_prepare_enable(clks[MPC512x_CLK_PCI]);
+}
+
+/*
+ * those macros are not exactly pretty, but they encapsulate a lot
+ * of copy'n'paste heavy code which is even more ugly, and reduce
+ * the potential for inconsistencies in those many code copies
+ */
+#define FOR_NODES(compatname) \
+	for_each_compatible_node(np, NULL, compatname)
+
+#define NODE_PREP do { \
+	of_address_to_resource(np, 0, &res); \
+	snprintf(devname, sizeof(devname), "%pa.%s", &res.start, np->name); \
+} while (0)
+
+#define NODE_CHK(clkname, clkitem, regnode, regflag) do { \
+	struct clk *clk; \
+	clk = of_clk_get_by_name(np, clkname); \
+	if (IS_ERR(clk)) { \
+		clk = clkitem; \
+		clk_register_clkdev(clk, clkname, devname); \
+		if (regnode) \
+			clk_register_clkdev(clk, clkname, np->name); \
+		did_register |= DID_REG_ ## regflag; \
+		pr_debug("clock alias name '%s' for dev '%s' pointer %p\n", \
+			 clkname, devname, clk); \
+	} else { \
+		clk_put(clk); \
+	} \
+} while (0)
+
+/*
+ * register source code provided fallback results for clock lookups,
+ * these get consulted when OF based clock lookup fails (that is in the
+ * case of not yet adjusted device tree data, where clock related specs
+ * are missing)
+ */
+static void __init mpc5121_clk_provide_backwards_compat(void)
+{
+	enum did_reg_flags {
+		DID_REG_PSC	= BIT(0),
+		DID_REG_PSCFIFO	= BIT(1),
+		DID_REG_NFC	= BIT(2),
+		DID_REG_CAN	= BIT(3),
+		DID_REG_I2C	= BIT(4),
+		DID_REG_DIU	= BIT(5),
+		DID_REG_VIU	= BIT(6),
+		DID_REG_FEC	= BIT(7),
+		DID_REG_USB	= BIT(8),
+		DID_REG_PATA	= BIT(9),
+	};
+
+	int did_register;
+	struct device_node *np;
+	struct resource res;
+	int idx;
+	char devname[32];
+
+	did_register = 0;
+
+	FOR_NODES(mpc512x_select_psc_compat()) {
+		NODE_PREP;
+		idx = (res.start >> 8) & 0xf;
+		NODE_CHK("ipg", clks[MPC512x_CLK_PSC0 + idx], 0, PSC);
+		NODE_CHK("mclk", clks[MPC512x_CLK_PSC0_MCLK + idx], 0, PSC);
+	}
+
+	FOR_NODES("fsl,mpc5121-psc-fifo") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_PSC_FIFO], 1, PSCFIFO);
+	}
+
+	FOR_NODES("fsl,mpc5121-nfc") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_NFC], 0, NFC);
+	}
+
+	FOR_NODES("fsl,mpc5121-mscan") {
+		NODE_PREP;
+		idx = 0;
+		idx += (res.start & 0x2000) ? 2 : 0;
+		idx += (res.start & 0x0080) ? 1 : 0;
+		NODE_CHK("ipg", clks[MPC512x_CLK_BDLC], 0, CAN);
+		NODE_CHK("mclk", clks[MPC512x_CLK_MSCAN0_MCLK + idx], 0, CAN);
+	}
+
+	/*
+	 * do register the 'ips', 'sys', and 'ref' names globally
+	 * instead of inside each individual CAN node, as there is no
+	 * potential for a name conflict (in contrast to 'ipg' and 'mclk')
+	 */
+	if (did_register & DID_REG_CAN) {
+		clk_register_clkdev(clks[MPC512x_CLK_IPS], "ips", NULL);
+		clk_register_clkdev(clks[MPC512x_CLK_SYS], "sys", NULL);
+		clk_register_clkdev(clks[MPC512x_CLK_REF], "ref", NULL);
+	}
+
+	FOR_NODES("fsl,mpc5121-i2c") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_I2C], 0, I2C);
+	}
+
+	/*
+	 * workaround for the fact that the I2C driver does an "anonymous"
+	 * lookup (NULL name spec, which yields the first clock spec) for
+	 * which we cannot register an alias -- a _global_ 'ipg' alias that
+	 * is not bound to any device name and returns the I2C clock item
+	 * is not a good idea
+	 *
+	 * so we have the lookup in the peripheral driver fail, which is
+	 * silent and non-fatal, and pre-enable the clock item here such
+	 * that register access is possible
+	 *
+	 * see commit b3bfce2b "i2c: mpc: cleanup clock API use" for
+	 * details, adjusting s/NULL/"ipg"/ in i2c-mpc.c would make this
+	 * workaround obsolete
+	 */
+	if (did_register & DID_REG_I2C)
+		clk_prepare_enable(clks[MPC512x_CLK_I2C]);
+
+	FOR_NODES("fsl,mpc5121-diu") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_DIU], 1, DIU);
+	}
+
+	FOR_NODES("fsl,mpc5121-viu") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_VIU], 0, VIU);
+	}
+
+	/*
+	 * note that 2771399a "fs_enet: cleanup clock API use" did use the
+	 * "per" string for the clock lookup in contrast to the "ipg" name
+	 * which most other nodes are using -- this is not a fatal thing
+	 * but just something to keep in mind when doing compatibility
+	 * registration, it's a non-issue with up-to-date device tree data
+	 */
+	FOR_NODES("fsl,mpc5121-fec") {
+		NODE_PREP;
+		NODE_CHK("per", clks[MPC512x_CLK_FEC], 0, FEC);
+	}
+	FOR_NODES("fsl,mpc5121-fec-mdio") {
+		NODE_PREP;
+		NODE_CHK("per", clks[MPC512x_CLK_FEC], 0, FEC);
+	}
+	/*
+	 * MPC5125 has two FECs: FEC1 at 0x2800, FEC2 at 0x4800;
+	 * the clock items don't "form an array" since FEC2 was
+	 * added only later and was not allowed to shift all other
+	 * clock item indices, so the numbers aren't adjacent
+	 */
+	FOR_NODES("fsl,mpc5125-fec") {
+		NODE_PREP;
+		if (res.start & 0x4000)
+			idx = MPC512x_CLK_FEC2;
+		else
+			idx = MPC512x_CLK_FEC;
+		NODE_CHK("per", clks[idx], 0, FEC);
+	}
+
+	FOR_NODES("fsl,mpc5121-usb2-dr") {
+		NODE_PREP;
+		idx = (res.start & 0x4000) ? 1 : 0;
+		NODE_CHK("ipg", clks[MPC512x_CLK_USB1 + idx], 0, USB);
+	}
+
+	FOR_NODES("fsl,mpc5121-pata") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_PATA], 0, PATA);
+	}
+
+	/*
+	 * try to collapse diagnostics into a single line of output yet
+	 * provide a full list of what is missing, to avoid noise in the
+	 * absence of up-to-date device tree data -- backwards
+	 * compatibility to old DTBs is a requirement, updates may be
+	 * desirable or preferrable but are not at all mandatory
+	 */
+	if (did_register) {
+		pr_notice("device tree lacks clock specs, adding fallbacks (0x%x,%s%s%s%s%s%s%s%s%s%s)\n",
+			  did_register,
+			  (did_register & DID_REG_PSC) ? " PSC" : "",
+			  (did_register & DID_REG_PSCFIFO) ? " PSCFIFO" : "",
+			  (did_register & DID_REG_NFC) ? " NFC" : "",
+			  (did_register & DID_REG_CAN) ? " CAN" : "",
+			  (did_register & DID_REG_I2C) ? " I2C" : "",
+			  (did_register & DID_REG_DIU) ? " DIU" : "",
+			  (did_register & DID_REG_VIU) ? " VIU" : "",
+			  (did_register & DID_REG_FEC) ? " FEC" : "",
+			  (did_register & DID_REG_USB) ? " USB" : "",
+			  (did_register & DID_REG_PATA) ? " PATA" : "");
+	} else {
+		pr_debug("device tree has clock specs, no fallbacks added\n");
+	}
+}
+
+/*
+ * The "fixed-clock" nodes (which includes the oscillator node if the board's
+ * DT provides one) has already been scanned by the of_clk_init() in
+ * time_init().
+ */
+int __init mpc5121_clk_init(void)
+{
+	struct device_node *clk_np;
+	int busfreq;
+
+	/* map the clock control registers */
+	clk_np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-clock");
+	if (!clk_np)
+		return -ENODEV;
+	clkregs = of_iomap(clk_np, 0);
+	WARN_ON(!clkregs);
+
+	/* determine the SoC variant we run on */
+	mpc512x_clk_determine_soc();
+
+	/* invalidate all not yet registered clock slots */
+	mpc512x_clk_preset_data();
+
+	/*
+	 * add a dummy clock for those situations where a clock spec is
+	 * required yet no real clock is involved
+	 */
+	clks[MPC512x_CLK_DUMMY] = mpc512x_clk_fixed("dummy", 0);
+
+	/*
+	 * have all the real nodes in the clock tree populated from REF
+	 * down to all leaves, either starting from the OSC node or from
+	 * a REF root that was created from the IPS bus clock input
+	 */
+	busfreq = get_freq_from_dt("bus-frequency");
+	mpc512x_clk_setup_clock_tree(clk_np, busfreq);
+
+	/* register as an OF clock provider */
+	mpc5121_clk_register_of_provider(clk_np);
+
+	of_node_put(clk_np);
+
+	/*
+	 * unbreak not yet adjusted peripheral drivers during migration
+	 * towards fully operational common clock support, and allow
+	 * operation in the absence of clock related device tree specs
+	 */
+	mpc5121_clk_provide_migration_support();
+	mpc5121_clk_provide_backwards_compat();
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c
new file mode 100644
index 0000000000..a18f85b3ef
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc5121_ads.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007, 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: John Rigby, <jrigby@freescale.com>, Thur Mar 29 2007
+ *
+ * Description:
+ * MPC5121 ADS board setup
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/of.h>
+
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/time.h>
+
+#include <sysdev/fsl_pci.h>
+
+#include "mpc512x.h"
+#include "mpc5121_ads.h"
+
+static void __init mpc5121_ads_setup_arch(void)
+{
+	printk(KERN_INFO "MPC5121 ADS board from Freescale Semiconductor\n");
+	/*
+	 * cpld regs are needed early
+	 */
+	mpc5121_ads_cpld_map();
+
+	mpc512x_setup_arch();
+}
+
+static void __init mpc5121_ads_setup_pci(void)
+{
+#ifdef CONFIG_PCI
+	struct device_node *np;
+
+	for_each_compatible_node(np, "pci", "fsl,mpc5121-pci")
+		mpc83xx_add_bridge(np);
+#endif
+}
+
+static void __init mpc5121_ads_init_IRQ(void)
+{
+	mpc512x_init_IRQ();
+	mpc5121_ads_cpld_pic_init();
+}
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc5121_ads_probe(void)
+{
+	mpc512x_init_early();
+
+	return 1;
+}
+
+define_machine(mpc5121_ads) {
+	.name			= "MPC5121 ADS",
+	.compatible		= "fsl,mpc5121ads",
+	.probe			= mpc5121_ads_probe,
+	.setup_arch		= mpc5121_ads_setup_arch,
+	.discover_phbs		= mpc5121_ads_setup_pci,
+	.init			= mpc512x_init,
+	.init_IRQ		= mpc5121_ads_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.restart		= mpc512x_restart,
+};
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.h b/arch/powerpc/platforms/512x/mpc5121_ads.h
new file mode 100644
index 0000000000..c88dea828c
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc5121_ads.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Prototypes for ADS5121 specific code
+ */
+
+#ifndef __MPC512ADS_H__
+#define __MPC512ADS_H__
+extern void __init mpc5121_ads_cpld_map(void);
+extern void __init mpc5121_ads_cpld_pic_init(void);
+#endif				/* __MPC512ADS_H__ */
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
new file mode 100644
index 0000000000..6f08d07aee
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: John Rigby, <jrigby@freescale.com>
+ *
+ * Description:
+ * MPC5121ADS CPLD irq handling
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+static struct device_node *cpld_pic_node;
+static struct irq_domain *cpld_pic_host;
+
+/*
+ * Bits to ignore in the misc_status register
+ * 0x10 touch screen pendown is hard routed to irq1
+ * 0x02 pci status is read from pci status register
+ */
+#define MISC_IGNORE 0x12
+
+/*
+ * Nothing to ignore in pci status register
+ */
+#define PCI_IGNORE 0x00
+
+struct cpld_pic {
+	u8 pci_mask;
+	u8 pci_status;
+	u8 route;
+	u8 misc_mask;
+	u8 misc_status;
+	u8 misc_control;
+};
+
+static struct cpld_pic __iomem *cpld_regs;
+
+static void __iomem *
+irq_to_pic_mask(unsigned int irq)
+{
+	return irq <= 7 ? &cpld_regs->pci_mask : &cpld_regs->misc_mask;
+}
+
+static unsigned int
+irq_to_pic_bit(unsigned int irq)
+{
+	return 1 << (irq & 0x7);
+}
+
+static void
+cpld_mask_irq(struct irq_data *d)
+{
+	unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d);
+	void __iomem *pic_mask = irq_to_pic_mask(cpld_irq);
+
+	out_8(pic_mask,
+	      in_8(pic_mask) | irq_to_pic_bit(cpld_irq));
+}
+
+static void
+cpld_unmask_irq(struct irq_data *d)
+{
+	unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d);
+	void __iomem *pic_mask = irq_to_pic_mask(cpld_irq);
+
+	out_8(pic_mask,
+	      in_8(pic_mask) & ~irq_to_pic_bit(cpld_irq));
+}
+
+static struct irq_chip cpld_pic = {
+	.name = "CPLD PIC",
+	.irq_mask = cpld_mask_irq,
+	.irq_ack = cpld_mask_irq,
+	.irq_unmask = cpld_unmask_irq,
+};
+
+static unsigned int
+cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp,
+			    u8 __iomem *maskp)
+{
+	u8 status = in_8(statusp);
+	u8 mask = in_8(maskp);
+
+	/* ignore don't cares and masked irqs */
+	status |= (ignore | mask);
+
+	if (status == 0xff)
+		return ~0;
+
+	return ffz(status) + offset;
+}
+
+static void cpld_pic_cascade(struct irq_desc *desc)
+{
+	unsigned int hwirq;
+
+	hwirq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status,
+		&cpld_regs->pci_mask);
+	if (hwirq != ~0) {
+		generic_handle_domain_irq(cpld_pic_host, hwirq);
+		return;
+	}
+
+	hwirq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status,
+		&cpld_regs->misc_mask);
+	if (hwirq != ~0) {
+		generic_handle_domain_irq(cpld_pic_host, hwirq);
+		return;
+	}
+}
+
+static int
+cpld_pic_host_match(struct irq_domain *h, struct device_node *node,
+		    enum irq_domain_bus_token bus_token)
+{
+	return cpld_pic_node == node;
+}
+
+static int
+cpld_pic_host_map(struct irq_domain *h, unsigned int virq,
+			     irq_hw_number_t hw)
+{
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &cpld_pic, handle_level_irq);
+	return 0;
+}
+
+static const struct irq_domain_ops cpld_pic_host_ops = {
+	.match = cpld_pic_host_match,
+	.map = cpld_pic_host_map,
+};
+
+void __init
+mpc5121_ads_cpld_map(void)
+{
+	struct device_node *np = NULL;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121ads-cpld-pic");
+	if (!np) {
+		printk(KERN_ERR "CPLD PIC init: can not find cpld-pic node\n");
+		return;
+	}
+
+	cpld_regs = of_iomap(np, 0);
+	of_node_put(np);
+}
+
+void __init
+mpc5121_ads_cpld_pic_init(void)
+{
+	unsigned int cascade_irq;
+	struct device_node *np = NULL;
+
+	pr_debug("cpld_ic_init\n");
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121ads-cpld-pic");
+	if (!np) {
+		printk(KERN_ERR "CPLD PIC init: can not find cpld-pic node\n");
+		return;
+	}
+
+	if (!cpld_regs)
+		goto end;
+
+	cascade_irq = irq_of_parse_and_map(np, 0);
+	if (!cascade_irq)
+		goto end;
+
+	/*
+	 * statically route touch screen pendown through 1
+	 * and ignore it here
+	 * route all others through our cascade irq
+	 */
+	out_8(&cpld_regs->route, 0xfd);
+	out_8(&cpld_regs->pci_mask, 0xff);
+	/* unmask pci ints in misc mask */
+	out_8(&cpld_regs->misc_mask, ~(MISC_IGNORE));
+
+	cpld_pic_node = of_node_get(np);
+
+	cpld_pic_host = irq_domain_add_linear(np, 16, &cpld_pic_host_ops, NULL);
+	if (!cpld_pic_host) {
+		printk(KERN_ERR "CPLD PIC: failed to allocate irq host!\n");
+		goto end;
+	}
+
+	irq_set_chained_handler(cascade_irq, cpld_pic_cascade);
+end:
+	of_node_put(np);
+}
diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h
new file mode 100644
index 0000000000..d2cb06e3a4
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc512x.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Prototypes for MPC512x shared code
+ */
+
+#ifndef __MPC512X_H__
+#define __MPC512X_H__
+extern void __init mpc512x_init_IRQ(void);
+extern void __init mpc512x_init_early(void);
+extern void __init mpc512x_init(void);
+extern void __init mpc512x_setup_arch(void);
+extern int __init mpc5121_clk_init(void);
+const char *__init mpc512x_select_psc_compat(void);
+extern void __noreturn mpc512x_restart(char *cmd);
+
+#endif				/* __MPC512X_H__ */
diff --git a/arch/powerpc/platforms/512x/mpc512x_generic.c b/arch/powerpc/platforms/512x/mpc512x_generic.c
new file mode 100644
index 0000000000..0d58ab257c
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc512x_generic.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007,2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: John Rigby, <jrigby@freescale.com>
+ *
+ * Description:
+ * MPC512x SoC setup
+ */
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/time.h>
+
+#include "mpc512x.h"
+
+/*
+ * list of supported boards
+ */
+static const char * const board[] __initconst = {
+	"prt,prtlvt",
+	"fsl,mpc5125ads",
+	"ifm,ac14xx",
+	NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc512x_generic_probe(void)
+{
+	if (!of_device_compatible_match(of_root, board))
+		return 0;
+
+	mpc512x_init_early();
+
+	return 1;
+}
+
+define_machine(mpc512x_generic) {
+	.name			= "MPC512x generic",
+	.probe			= mpc512x_generic_probe,
+	.init			= mpc512x_init,
+	.setup_arch		= mpc512x_setup_arch,
+	.init_IRQ		= mpc512x_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.restart		= mpc512x_restart,
+};
diff --git a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
new file mode 100644
index 0000000000..4a25b6b486
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
@@ -0,0 +1,518 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * The driver for Freescale MPC512x LocalPlus Bus FIFO
+ * (called SCLPC in the Reference Manual).
+ *
+ * Copyright (C) 2013-2015 Alexander Popov <alex.popov@linux.com>.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <asm/mpc5121.h>
+#include <asm/io.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+
+#define DRV_NAME "mpc512x_lpbfifo"
+
+struct cs_range {
+	u32 csnum;
+	u32 base; /* must be zero */
+	u32 addr;
+	u32 size;
+};
+
+static struct lpbfifo_data {
+	spinlock_t lock; /* for protecting lpbfifo_data */
+	phys_addr_t regs_phys;
+	resource_size_t regs_size;
+	struct mpc512x_lpbfifo __iomem *regs;
+	int irq;
+	struct cs_range *cs_ranges;
+	size_t cs_n;
+	struct dma_chan *chan;
+	struct mpc512x_lpbfifo_request *req;
+	dma_addr_t ram_bus_addr;
+	bool wait_lpbfifo_irq;
+	bool wait_lpbfifo_callback;
+} lpbfifo;
+
+/*
+ * A data transfer from RAM to some device on LPB is finished
+ * when both mpc512x_lpbfifo_irq() and mpc512x_lpbfifo_callback()
+ * have been called. We execute the callback registered in
+ * mpc512x_lpbfifo_request just after that.
+ * But for a data transfer from some device on LPB to RAM we don't enable
+ * LPBFIFO interrupt because clearing MPC512X_SCLPC_SUCCESS interrupt flag
+ * automatically disables LPBFIFO reading request to the DMA controller
+ * and the data transfer hangs. So the callback registered in
+ * mpc512x_lpbfifo_request is executed at the end of mpc512x_lpbfifo_callback().
+ */
+
+/*
+ * mpc512x_lpbfifo_irq - IRQ handler for LPB FIFO
+ */
+static irqreturn_t mpc512x_lpbfifo_irq(int irq, void *param)
+{
+	struct device *dev = (struct device *)param;
+	struct mpc512x_lpbfifo_request *req = NULL;
+	unsigned long flags;
+	u32 status;
+
+	spin_lock_irqsave(&lpbfifo.lock, flags);
+
+	if (!lpbfifo.regs)
+		goto end;
+
+	req = lpbfifo.req;
+	if (!req || req->dir == MPC512X_LPBFIFO_REQ_DIR_READ) {
+		dev_err(dev, "bogus LPBFIFO IRQ\n");
+		goto end;
+	}
+
+	status = in_be32(&lpbfifo.regs->status);
+	if (status != MPC512X_SCLPC_SUCCESS) {
+		dev_err(dev, "DMA transfer from RAM to peripheral failed\n");
+		out_be32(&lpbfifo.regs->enable,
+				MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+		goto end;
+	}
+	/* Clear the interrupt flag */
+	out_be32(&lpbfifo.regs->status, MPC512X_SCLPC_SUCCESS);
+
+	lpbfifo.wait_lpbfifo_irq = false;
+
+	if (lpbfifo.wait_lpbfifo_callback)
+		goto end;
+
+	/* Transfer is finished, set the FIFO as idle */
+	lpbfifo.req = NULL;
+
+	spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+	if (req->callback)
+		req->callback(req);
+
+	return IRQ_HANDLED;
+
+ end:
+	spin_unlock_irqrestore(&lpbfifo.lock, flags);
+	return IRQ_HANDLED;
+}
+
+/*
+ * mpc512x_lpbfifo_callback is called by DMA driver when
+ * DMA transaction is finished.
+ */
+static void mpc512x_lpbfifo_callback(void *param)
+{
+	unsigned long flags;
+	struct mpc512x_lpbfifo_request *req = NULL;
+	enum dma_data_direction dir;
+
+	spin_lock_irqsave(&lpbfifo.lock, flags);
+
+	if (!lpbfifo.regs) {
+		spin_unlock_irqrestore(&lpbfifo.lock, flags);
+		return;
+	}
+
+	req = lpbfifo.req;
+	if (!req) {
+		pr_err("bogus LPBFIFO callback\n");
+		spin_unlock_irqrestore(&lpbfifo.lock, flags);
+		return;
+	}
+
+	/* Release the mapping */
+	if (req->dir == MPC512X_LPBFIFO_REQ_DIR_WRITE)
+		dir = DMA_TO_DEVICE;
+	else
+		dir = DMA_FROM_DEVICE;
+	dma_unmap_single(lpbfifo.chan->device->dev,
+			lpbfifo.ram_bus_addr, req->size, dir);
+
+	lpbfifo.wait_lpbfifo_callback = false;
+
+	if (!lpbfifo.wait_lpbfifo_irq) {
+		/* Transfer is finished, set the FIFO as idle */
+		lpbfifo.req = NULL;
+
+		spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+		if (req->callback)
+			req->callback(req);
+	} else {
+		spin_unlock_irqrestore(&lpbfifo.lock, flags);
+	}
+}
+
+static int mpc512x_lpbfifo_kick(void)
+{
+	u32 bits;
+	bool no_incr = false;
+	u32 bpt = 32; /* max bytes per LPBFIFO transaction involving DMA */
+	u32 cs = 0;
+	size_t i;
+	struct dma_device *dma_dev = NULL;
+	struct scatterlist sg;
+	enum dma_data_direction dir;
+	struct dma_slave_config dma_conf = {};
+	struct dma_async_tx_descriptor *dma_tx = NULL;
+	dma_cookie_t cookie;
+	int ret;
+
+	/*
+	 * 1. Fit the requirements:
+	 * - the packet size must be a multiple of 4 since FIFO Data Word
+	 *    Register allows only full-word access according the Reference
+	 *    Manual;
+	 * - the physical address of the device on LPB and the packet size
+	 *    must be aligned on BPT (bytes per transaction) or 8-bytes
+	 *    boundary according the Reference Manual;
+	 * - but we choose DMA maxburst equal (or very close to) BPT to prevent
+	 *    DMA controller from overtaking FIFO and causing FIFO underflow
+	 *    error. So we force the packet size to be aligned on BPT boundary
+	 *    not to confuse DMA driver which requires the packet size to be
+	 *    aligned on maxburst boundary;
+	 * - BPT should be set to the LPB device port size for operation with
+	 *    disabled auto-incrementing according Reference Manual.
+	 */
+	if (lpbfifo.req->size == 0 || !IS_ALIGNED(lpbfifo.req->size, 4))
+		return -EINVAL;
+
+	if (lpbfifo.req->portsize != LPB_DEV_PORTSIZE_UNDEFINED) {
+		bpt = lpbfifo.req->portsize;
+		no_incr = true;
+	}
+
+	while (bpt > 1) {
+		if (IS_ALIGNED(lpbfifo.req->dev_phys_addr, min(bpt, 0x8u)) &&
+					IS_ALIGNED(lpbfifo.req->size, bpt)) {
+			break;
+		}
+
+		if (no_incr)
+			return -EINVAL;
+
+		bpt >>= 1;
+	}
+	dma_conf.dst_maxburst = max(bpt, 0x4u) / 4;
+	dma_conf.src_maxburst = max(bpt, 0x4u) / 4;
+
+	for (i = 0; i < lpbfifo.cs_n; i++) {
+		phys_addr_t cs_start = lpbfifo.cs_ranges[i].addr;
+		phys_addr_t cs_end = cs_start + lpbfifo.cs_ranges[i].size;
+		phys_addr_t access_start = lpbfifo.req->dev_phys_addr;
+		phys_addr_t access_end = access_start + lpbfifo.req->size;
+
+		if (access_start >= cs_start && access_end <= cs_end) {
+			cs = lpbfifo.cs_ranges[i].csnum;
+			break;
+		}
+	}
+	if (i == lpbfifo.cs_n)
+		return -EFAULT;
+
+	/* 2. Prepare DMA */
+	dma_dev = lpbfifo.chan->device;
+
+	if (lpbfifo.req->dir == MPC512X_LPBFIFO_REQ_DIR_WRITE) {
+		dir = DMA_TO_DEVICE;
+		dma_conf.direction = DMA_MEM_TO_DEV;
+		dma_conf.dst_addr = lpbfifo.regs_phys +
+				offsetof(struct mpc512x_lpbfifo, data_word);
+	} else {
+		dir = DMA_FROM_DEVICE;
+		dma_conf.direction = DMA_DEV_TO_MEM;
+		dma_conf.src_addr = lpbfifo.regs_phys +
+				offsetof(struct mpc512x_lpbfifo, data_word);
+	}
+	dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	dma_conf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+
+	/* Make DMA channel work with LPB FIFO data register */
+	if (dma_dev->device_config(lpbfifo.chan, &dma_conf)) {
+		ret = -EINVAL;
+		goto err_dma_prep;
+	}
+
+	sg_init_table(&sg, 1);
+
+	sg_dma_address(&sg) = dma_map_single(dma_dev->dev,
+			lpbfifo.req->ram_virt_addr, lpbfifo.req->size, dir);
+	if (dma_mapping_error(dma_dev->dev, sg_dma_address(&sg)))
+		return -EFAULT;
+
+	lpbfifo.ram_bus_addr = sg_dma_address(&sg); /* For freeing later */
+
+	sg_dma_len(&sg) = lpbfifo.req->size;
+
+	dma_tx = dmaengine_prep_slave_sg(lpbfifo.chan, &sg,
+						1, dma_conf.direction, 0);
+	if (!dma_tx) {
+		ret = -ENOSPC;
+		goto err_dma_prep;
+	}
+	dma_tx->callback = mpc512x_lpbfifo_callback;
+	dma_tx->callback_param = NULL;
+
+	/* 3. Prepare FIFO */
+	out_be32(&lpbfifo.regs->enable,
+				MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+	out_be32(&lpbfifo.regs->enable, 0x0);
+
+	/*
+	 * Configure the watermarks for write operation (RAM->DMA->FIFO->dev):
+	 * - high watermark 7 words according the Reference Manual,
+	 * - low watermark 512 bytes (half of the FIFO).
+	 * These watermarks don't work for read operation since the
+	 * MPC512X_SCLPC_FLUSH bit is set (according the Reference Manual).
+	 */
+	out_be32(&lpbfifo.regs->fifo_ctrl, MPC512X_SCLPC_FIFO_CTRL(0x7));
+	out_be32(&lpbfifo.regs->fifo_alarm, MPC512X_SCLPC_FIFO_ALARM(0x200));
+
+	/*
+	 * Start address is a physical address of the region which belongs
+	 * to the device on the LocalPlus Bus
+	 */
+	out_be32(&lpbfifo.regs->start_addr, lpbfifo.req->dev_phys_addr);
+
+	/*
+	 * Configure chip select, transfer direction, address increment option
+	 * and bytes per transaction option
+	 */
+	bits = MPC512X_SCLPC_CS(cs);
+	if (lpbfifo.req->dir == MPC512X_LPBFIFO_REQ_DIR_READ)
+		bits |= MPC512X_SCLPC_READ | MPC512X_SCLPC_FLUSH;
+	if (no_incr)
+		bits |= MPC512X_SCLPC_DAI;
+	bits |= MPC512X_SCLPC_BPT(bpt);
+	out_be32(&lpbfifo.regs->ctrl, bits);
+
+	/* Unmask irqs */
+	bits = MPC512X_SCLPC_ENABLE | MPC512X_SCLPC_ABORT_INT_ENABLE;
+	if (lpbfifo.req->dir == MPC512X_LPBFIFO_REQ_DIR_WRITE)
+		bits |= MPC512X_SCLPC_NORM_INT_ENABLE;
+	else
+		lpbfifo.wait_lpbfifo_irq = false;
+
+	out_be32(&lpbfifo.regs->enable, bits);
+
+	/* 4. Set packet size and kick FIFO off */
+	bits = lpbfifo.req->size | MPC512X_SCLPC_START;
+	out_be32(&lpbfifo.regs->pkt_size, bits);
+
+	/* 5. Finally kick DMA off */
+	cookie = dma_tx->tx_submit(dma_tx);
+	if (dma_submit_error(cookie)) {
+		ret = -ENOSPC;
+		goto err_dma_submit;
+	}
+
+	return 0;
+
+ err_dma_submit:
+	out_be32(&lpbfifo.regs->enable,
+				MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+ err_dma_prep:
+	dma_unmap_single(dma_dev->dev, sg_dma_address(&sg),
+						lpbfifo.req->size, dir);
+	return ret;
+}
+
+static int mpc512x_lpbfifo_submit_locked(struct mpc512x_lpbfifo_request *req)
+{
+	int ret = 0;
+
+	if (!lpbfifo.regs)
+		return -ENODEV;
+
+	/* Check whether a transfer is in progress */
+	if (lpbfifo.req)
+		return -EBUSY;
+
+	lpbfifo.wait_lpbfifo_irq = true;
+	lpbfifo.wait_lpbfifo_callback = true;
+	lpbfifo.req = req;
+
+	ret = mpc512x_lpbfifo_kick();
+	if (ret != 0)
+		lpbfifo.req = NULL; /* Set the FIFO as idle */
+
+	return ret;
+}
+
+int mpc512x_lpbfifo_submit(struct mpc512x_lpbfifo_request *req)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&lpbfifo.lock, flags);
+	ret = mpc512x_lpbfifo_submit_locked(req);
+	spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(mpc512x_lpbfifo_submit);
+
+/*
+ * LPBFIFO driver uses "ranges" property of "localbus" device tree node
+ * for being able to determine the chip select number of a client device
+ * ordering a DMA transfer.
+ */
+static int get_cs_ranges(struct device *dev)
+{
+	int ret = -ENODEV;
+	struct device_node *lb_node;
+	size_t i = 0;
+	struct of_range_parser parser;
+	struct of_range range;
+
+	lb_node = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-localbus");
+	if (!lb_node)
+		return ret;
+
+	of_range_parser_init(&parser, lb_node);
+	lpbfifo.cs_n = of_range_count(&parser);
+
+	lpbfifo.cs_ranges = devm_kcalloc(dev, lpbfifo.cs_n,
+					sizeof(struct cs_range), GFP_KERNEL);
+	if (!lpbfifo.cs_ranges)
+		goto end;
+
+	for_each_of_range(&parser, &range) {
+		u32 base = lower_32_bits(range.bus_addr);
+		if (base)
+			goto end;
+
+		lpbfifo.cs_ranges[i].csnum = upper_32_bits(range.bus_addr);
+		lpbfifo.cs_ranges[i].base = base;
+		lpbfifo.cs_ranges[i].addr = range.cpu_addr;
+		lpbfifo.cs_ranges[i].size = range.size;
+		i++;
+	}
+
+	ret = 0;
+
+ end:
+	of_node_put(lb_node);
+	return ret;
+}
+
+static int mpc512x_lpbfifo_probe(struct platform_device *pdev)
+{
+	struct resource r;
+	int ret = 0;
+
+	memset(&lpbfifo, 0, sizeof(struct lpbfifo_data));
+	spin_lock_init(&lpbfifo.lock);
+
+	lpbfifo.chan = dma_request_chan(&pdev->dev, "rx-tx");
+	if (IS_ERR(lpbfifo.chan))
+		return PTR_ERR(lpbfifo.chan);
+
+	if (of_address_to_resource(pdev->dev.of_node, 0, &r) != 0) {
+		dev_err(&pdev->dev, "bad 'reg' in 'sclpc' device tree node\n");
+		ret = -ENODEV;
+		goto err0;
+	}
+
+	lpbfifo.regs_phys = r.start;
+	lpbfifo.regs_size = resource_size(&r);
+
+	if (!devm_request_mem_region(&pdev->dev, lpbfifo.regs_phys,
+					lpbfifo.regs_size, DRV_NAME)) {
+		dev_err(&pdev->dev, "unable to request region\n");
+		ret = -EBUSY;
+		goto err0;
+	}
+
+	lpbfifo.regs = devm_ioremap(&pdev->dev,
+					lpbfifo.regs_phys, lpbfifo.regs_size);
+	if (!lpbfifo.regs) {
+		dev_err(&pdev->dev, "mapping registers failed\n");
+		ret = -ENOMEM;
+		goto err0;
+	}
+
+	out_be32(&lpbfifo.regs->enable,
+				MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+
+	if (get_cs_ranges(&pdev->dev) != 0) {
+		dev_err(&pdev->dev, "bad '/localbus' device tree node\n");
+		ret = -ENODEV;
+		goto err0;
+	}
+
+	lpbfifo.irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+	if (!lpbfifo.irq) {
+		dev_err(&pdev->dev, "mapping irq failed\n");
+		ret = -ENODEV;
+		goto err0;
+	}
+
+	if (request_irq(lpbfifo.irq, mpc512x_lpbfifo_irq, 0,
+						DRV_NAME, &pdev->dev) != 0) {
+		dev_err(&pdev->dev, "requesting irq failed\n");
+		ret = -ENODEV;
+		goto err1;
+	}
+
+	dev_info(&pdev->dev, "probe succeeded\n");
+	return 0;
+
+ err1:
+	irq_dispose_mapping(lpbfifo.irq);
+ err0:
+	dma_release_channel(lpbfifo.chan);
+	return ret;
+}
+
+static void mpc512x_lpbfifo_remove(struct platform_device *pdev)
+{
+	unsigned long flags;
+	struct dma_device *dma_dev = lpbfifo.chan->device;
+	struct mpc512x_lpbfifo __iomem *regs = NULL;
+
+	spin_lock_irqsave(&lpbfifo.lock, flags);
+	regs = lpbfifo.regs;
+	lpbfifo.regs = NULL;
+	spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+	dma_dev->device_terminate_all(lpbfifo.chan);
+	out_be32(&regs->enable, MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+
+	free_irq(lpbfifo.irq, &pdev->dev);
+	irq_dispose_mapping(lpbfifo.irq);
+	dma_release_channel(lpbfifo.chan);
+}
+
+static const struct of_device_id mpc512x_lpbfifo_match[] = {
+	{ .compatible = "fsl,mpc512x-lpbfifo", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mpc512x_lpbfifo_match);
+
+static struct platform_driver mpc512x_lpbfifo_driver = {
+	.probe = mpc512x_lpbfifo_probe,
+	.remove_new = mpc512x_lpbfifo_remove,
+	.driver = {
+		.name = DRV_NAME,
+		.of_match_table = mpc512x_lpbfifo_match,
+	},
+};
+
+module_platform_driver(mpc512x_lpbfifo_driver);
+
+MODULE_AUTHOR("Alexander Popov <alex.popov@linux.com>");
+MODULE_DESCRIPTION("MPC512x LocalPlus Bus FIFO device driver");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
new file mode 100644
index 0000000000..8f75e9574c
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -0,0 +1,506 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007,2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: John Rigby <jrigby@freescale.com>
+ *
+ * Description:
+ * MPC512x Shared code
+ */
+
+#include <linux/clk.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/fsl-diu-fb.h>
+#include <linux/memblock.h>
+#include <sysdev/fsl_soc.h>
+
+#include <asm/cacheflush.h>
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/time.h>
+#include <asm/mpc5121.h>
+#include <asm/mpc52xx_psc.h>
+
+#include "mpc512x.h"
+
+static struct mpc512x_reset_module __iomem *reset_module_base;
+
+void __noreturn mpc512x_restart(char *cmd)
+{
+	if (reset_module_base) {
+		/* Enable software reset "RSTE" */
+		out_be32(&reset_module_base->rpr, 0x52535445);
+		/* Set software hard reset */
+		out_be32(&reset_module_base->rcr, 0x2);
+	} else {
+		pr_err("Restart module not mapped.\n");
+	}
+	for (;;)
+		;
+}
+
+struct fsl_diu_shared_fb {
+	u8		gamma[0x300];	/* 32-bit aligned! */
+	struct diu_ad	ad0;		/* 32-bit aligned! */
+	phys_addr_t	fb_phys;
+	size_t		fb_len;
+	bool		in_use;
+};
+
+/* receives a pixel clock spec in pico seconds, adjusts the DIU clock rate */
+static void mpc512x_set_pixel_clock(unsigned int pixclock)
+{
+	struct device_node *np;
+	struct clk *clk_diu;
+	unsigned long epsilon, minpixclock, maxpixclock;
+	unsigned long offset, want, got, delta;
+
+	/* lookup and enable the DIU clock */
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-diu");
+	if (!np) {
+		pr_err("Could not find DIU device tree node.\n");
+		return;
+	}
+	clk_diu = of_clk_get(np, 0);
+	if (IS_ERR(clk_diu)) {
+		/* backwards compat with device trees that lack clock specs */
+		clk_diu = clk_get_sys(np->name, "ipg");
+	}
+	of_node_put(np);
+	if (IS_ERR(clk_diu)) {
+		pr_err("Could not lookup DIU clock.\n");
+		return;
+	}
+	if (clk_prepare_enable(clk_diu)) {
+		pr_err("Could not enable DIU clock.\n");
+		return;
+	}
+
+	/*
+	 * convert the picoseconds spec into the desired clock rate,
+	 * determine the acceptable clock range for the monitor (+/- 5%),
+	 * do the calculation in steps to avoid integer overflow
+	 */
+	pr_debug("DIU pixclock in ps - %u\n", pixclock);
+	pixclock = (1000000000 / pixclock) * 1000;
+	pr_debug("DIU pixclock freq  - %u\n", pixclock);
+	epsilon = pixclock / 20; /* pixclock * 0.05 */
+	pr_debug("DIU deviation      - %lu\n", epsilon);
+	minpixclock = pixclock - epsilon;
+	maxpixclock = pixclock + epsilon;
+	pr_debug("DIU minpixclock    - %lu\n", minpixclock);
+	pr_debug("DIU maxpixclock    - %lu\n", maxpixclock);
+
+	/*
+	 * check whether the DIU supports the desired pixel clock
+	 *
+	 * - simply request the desired clock and see what the
+	 *   platform's clock driver will make of it, assuming that it
+	 *   will setup the best approximation of the requested value
+	 * - try other candidate frequencies in the order of decreasing
+	 *   preference (i.e. with increasing distance from the desired
+	 *   pixel clock, and checking the lower frequency before the
+	 *   higher frequency to not overload the hardware) until the
+	 *   first match is found -- any potential subsequent match
+	 *   would only be as good as the former match or typically
+	 *   would be less preferrable
+	 *
+	 * the offset increment of pixelclock divided by 64 is an
+	 * arbitrary choice -- it's simple to calculate, in the typical
+	 * case we expect the first check to succeed already, in the
+	 * worst case seven frequencies get tested (the exact center and
+	 * three more values each to the left and to the right) before
+	 * the 5% tolerance window is exceeded, resulting in fast enough
+	 * execution yet high enough probability of finding a suitable
+	 * value, while the error rate will be in the order of single
+	 * percents
+	 */
+	for (offset = 0; offset <= epsilon; offset += pixclock / 64) {
+		want = pixclock - offset;
+		pr_debug("DIU checking clock - %lu\n", want);
+		clk_set_rate(clk_diu, want);
+		got = clk_get_rate(clk_diu);
+		delta = abs(pixclock - got);
+		if (delta < epsilon)
+			break;
+		if (!offset)
+			continue;
+		want = pixclock + offset;
+		pr_debug("DIU checking clock - %lu\n", want);
+		clk_set_rate(clk_diu, want);
+		got = clk_get_rate(clk_diu);
+		delta = abs(pixclock - got);
+		if (delta < epsilon)
+			break;
+	}
+	if (offset <= epsilon) {
+		pr_debug("DIU clock accepted - %lu\n", want);
+		pr_debug("DIU pixclock want %u, got %lu, delta %lu, eps %lu\n",
+			 pixclock, got, delta, epsilon);
+		return;
+	}
+	pr_warn("DIU pixclock auto search unsuccessful\n");
+
+	/*
+	 * what is the most appropriate action to take when the search
+	 * for an available pixel clock which is acceptable to the
+	 * monitor has failed?  disable the DIU (clock) or just provide
+	 * a "best effort"?  we go with the latter
+	 */
+	pr_warn("DIU pixclock best effort fallback (backend's choice)\n");
+	clk_set_rate(clk_diu, pixclock);
+	got = clk_get_rate(clk_diu);
+	delta = abs(pixclock - got);
+	pr_debug("DIU pixclock want %u, got %lu, delta %lu, eps %lu\n",
+		 pixclock, got, delta, epsilon);
+}
+
+static enum fsl_diu_monitor_port
+mpc512x_valid_monitor_port(enum fsl_diu_monitor_port port)
+{
+	return FSL_DIU_PORT_DVI;
+}
+
+static struct fsl_diu_shared_fb __attribute__ ((__aligned__(8))) diu_shared_fb;
+
+static inline void mpc512x_free_bootmem(struct page *page)
+{
+	BUG_ON(PageTail(page));
+	BUG_ON(page_ref_count(page) > 1);
+	free_reserved_page(page);
+}
+
+static void mpc512x_release_bootmem(void)
+{
+	unsigned long addr = diu_shared_fb.fb_phys & PAGE_MASK;
+	unsigned long size = diu_shared_fb.fb_len;
+	unsigned long start, end;
+
+	if (diu_shared_fb.in_use) {
+		start = PFN_UP(addr);
+		end = PFN_DOWN(addr + size);
+
+		for (; start < end; start++)
+			mpc512x_free_bootmem(pfn_to_page(start));
+
+		diu_shared_fb.in_use = false;
+	}
+	diu_ops.release_bootmem	= NULL;
+}
+
+/*
+ * Check if DIU was pre-initialized. If so, perform steps
+ * needed to continue displaying through the whole boot process.
+ * Move area descriptor and gamma table elsewhere, they are
+ * destroyed by bootmem allocator otherwise. The frame buffer
+ * address range will be reserved in setup_arch() after bootmem
+ * allocator is up.
+ */
+static void __init mpc512x_init_diu(void)
+{
+	struct device_node *np;
+	struct diu __iomem *diu_reg;
+	phys_addr_t desc;
+	void __iomem *vaddr;
+	unsigned long mode, pix_fmt, res, bpp;
+	unsigned long dst;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-diu");
+	if (!np) {
+		pr_err("No DIU node\n");
+		return;
+	}
+
+	diu_reg = of_iomap(np, 0);
+	of_node_put(np);
+	if (!diu_reg) {
+		pr_err("Can't map DIU\n");
+		return;
+	}
+
+	mode = in_be32(&diu_reg->diu_mode);
+	if (mode == MFB_MODE0) {
+		pr_info("%s: DIU OFF\n", __func__);
+		goto out;
+	}
+
+	desc = in_be32(&diu_reg->desc[0]);
+	vaddr = ioremap(desc, sizeof(struct diu_ad));
+	if (!vaddr) {
+		pr_err("Can't map DIU area desc.\n");
+		goto out;
+	}
+	memcpy(&diu_shared_fb.ad0, vaddr, sizeof(struct diu_ad));
+	/* flush fb area descriptor */
+	dst = (unsigned long)&diu_shared_fb.ad0;
+	flush_dcache_range(dst, dst + sizeof(struct diu_ad) - 1);
+
+	res = in_be32(&diu_reg->disp_size);
+	pix_fmt = in_le32(vaddr);
+	bpp = ((pix_fmt >> 16) & 0x3) + 1;
+	diu_shared_fb.fb_phys = in_le32(vaddr + 4);
+	diu_shared_fb.fb_len = ((res & 0xfff0000) >> 16) * (res & 0xfff) * bpp;
+	diu_shared_fb.in_use = true;
+	iounmap(vaddr);
+
+	desc = in_be32(&diu_reg->gamma);
+	vaddr = ioremap(desc, sizeof(diu_shared_fb.gamma));
+	if (!vaddr) {
+		pr_err("Can't map DIU area desc.\n");
+		diu_shared_fb.in_use = false;
+		goto out;
+	}
+	memcpy(&diu_shared_fb.gamma, vaddr, sizeof(diu_shared_fb.gamma));
+	/* flush gamma table */
+	dst = (unsigned long)&diu_shared_fb.gamma;
+	flush_dcache_range(dst, dst + sizeof(diu_shared_fb.gamma) - 1);
+
+	iounmap(vaddr);
+	out_be32(&diu_reg->gamma, virt_to_phys(&diu_shared_fb.gamma));
+	out_be32(&diu_reg->desc[1], 0);
+	out_be32(&diu_reg->desc[2], 0);
+	out_be32(&diu_reg->desc[0], virt_to_phys(&diu_shared_fb.ad0));
+
+out:
+	iounmap(diu_reg);
+}
+
+static void __init mpc512x_setup_diu(void)
+{
+	int ret;
+
+	/*
+	 * We do not allocate and configure new area for bitmap buffer
+	 * because it would require copying bitmap data (splash image)
+	 * and so negatively affect boot time. Instead we reserve the
+	 * already configured frame buffer area so that it won't be
+	 * destroyed. The starting address of the area to reserve and
+	 * also it's length is passed to memblock_reserve(). It will be
+	 * freed later on first open of fbdev, when splash image is not
+	 * needed any more.
+	 */
+	if (diu_shared_fb.in_use) {
+		ret = memblock_reserve(diu_shared_fb.fb_phys,
+				       diu_shared_fb.fb_len);
+		if (ret) {
+			pr_err("%s: reserve bootmem failed\n", __func__);
+			diu_shared_fb.in_use = false;
+		}
+	}
+
+	diu_ops.set_pixel_clock		= mpc512x_set_pixel_clock;
+	diu_ops.valid_monitor_port	= mpc512x_valid_monitor_port;
+	diu_ops.release_bootmem		= mpc512x_release_bootmem;
+}
+
+void __init mpc512x_init_IRQ(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-ipic");
+	if (!np)
+		return;
+
+	ipic_init(np, 0);
+	of_node_put(np);
+
+	/*
+	 * Initialize the default interrupt mapping priorities,
+	 * in case the boot rom changed something on us.
+	 */
+	ipic_set_default_priority();
+}
+
+/*
+ * Nodes to do bus probe on, soc and localbus
+ */
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5121-immr", },
+	{ .compatible = "fsl,mpc5121-localbus", },
+	{ .compatible = "fsl,mpc5121-mbx", },
+	{ .compatible = "fsl,mpc5121-nfc", },
+	{ .compatible = "fsl,mpc5121-sram", },
+	{ .compatible = "fsl,mpc5121-pci", },
+	{ .compatible = "gpio-leds", },
+	{},
+};
+
+static void __init mpc512x_declare_of_platform_devices(void)
+{
+	if (of_platform_bus_probe(NULL, of_bus_ids, NULL))
+		printk(KERN_ERR __FILE__ ": "
+			"Error while probing of_platform bus\n");
+}
+
+#define DEFAULT_FIFO_SIZE 16
+
+const char *__init mpc512x_select_psc_compat(void)
+{
+	if (of_machine_is_compatible("fsl,mpc5121"))
+		return "fsl,mpc5121-psc";
+
+	if (of_machine_is_compatible("fsl,mpc5125"))
+		return "fsl,mpc5125-psc";
+
+	return NULL;
+}
+
+static const char *__init mpc512x_select_reset_compat(void)
+{
+	if (of_machine_is_compatible("fsl,mpc5121"))
+		return "fsl,mpc5121-reset";
+
+	if (of_machine_is_compatible("fsl,mpc5125"))
+		return "fsl,mpc5125-reset";
+
+	return NULL;
+}
+
+static unsigned int __init get_fifo_size(struct device_node *np,
+					 char *prop_name)
+{
+	const unsigned int *fp;
+
+	fp = of_get_property(np, prop_name, NULL);
+	if (fp)
+		return *fp;
+
+	pr_warn("no %s property in %pOF node, defaulting to %d\n",
+		prop_name, np, DEFAULT_FIFO_SIZE);
+
+	return DEFAULT_FIFO_SIZE;
+}
+
+#define FIFOC(_base) ((struct mpc512x_psc_fifo __iomem *) \
+		    ((u32)(_base) + sizeof(struct mpc52xx_psc)))
+
+/* Init PSC FIFO space for TX and RX slices */
+static void __init mpc512x_psc_fifo_init(void)
+{
+	struct device_node *np;
+	void __iomem *psc;
+	unsigned int tx_fifo_size;
+	unsigned int rx_fifo_size;
+	const char *psc_compat;
+	int fifobase = 0; /* current fifo address in 32 bit words */
+
+	psc_compat = mpc512x_select_psc_compat();
+	if (!psc_compat) {
+		pr_err("%s: no compatible devices found\n", __func__);
+		return;
+	}
+
+	for_each_compatible_node(np, NULL, psc_compat) {
+		tx_fifo_size = get_fifo_size(np, "fsl,tx-fifo-size");
+		rx_fifo_size = get_fifo_size(np, "fsl,rx-fifo-size");
+
+		/* size in register is in 4 byte units */
+		tx_fifo_size /= 4;
+		rx_fifo_size /= 4;
+		if (!tx_fifo_size)
+			tx_fifo_size = 1;
+		if (!rx_fifo_size)
+			rx_fifo_size = 1;
+
+		psc = of_iomap(np, 0);
+		if (!psc) {
+			pr_err("%s: Can't map %pOF device\n",
+				__func__, np);
+			continue;
+		}
+
+		/* FIFO space is 4KiB, check if requested size is available */
+		if ((fifobase + tx_fifo_size + rx_fifo_size) > 0x1000) {
+			pr_err("%s: no fifo space available for %pOF\n",
+				__func__, np);
+			iounmap(psc);
+			/*
+			 * chances are that another device requests less
+			 * fifo space, so we continue.
+			 */
+			continue;
+		}
+
+		/* set tx and rx fifo size registers */
+		out_be32(&FIFOC(psc)->txsz, (fifobase << 16) | tx_fifo_size);
+		fifobase += tx_fifo_size;
+		out_be32(&FIFOC(psc)->rxsz, (fifobase << 16) | rx_fifo_size);
+		fifobase += rx_fifo_size;
+
+		/* reset and enable the slices */
+		out_be32(&FIFOC(psc)->txcmd, 0x80);
+		out_be32(&FIFOC(psc)->txcmd, 0x01);
+		out_be32(&FIFOC(psc)->rxcmd, 0x80);
+		out_be32(&FIFOC(psc)->rxcmd, 0x01);
+
+		iounmap(psc);
+	}
+}
+
+static void __init mpc512x_restart_init(void)
+{
+	struct device_node *np;
+	const char *reset_compat;
+
+	reset_compat = mpc512x_select_reset_compat();
+	np = of_find_compatible_node(NULL, NULL, reset_compat);
+	if (!np)
+		return;
+
+	reset_module_base = of_iomap(np, 0);
+	of_node_put(np);
+}
+
+void __init mpc512x_init_early(void)
+{
+	mpc512x_restart_init();
+	if (IS_ENABLED(CONFIG_FB_FSL_DIU))
+		mpc512x_init_diu();
+}
+
+void __init mpc512x_init(void)
+{
+	mpc5121_clk_init();
+	mpc512x_declare_of_platform_devices();
+	mpc512x_psc_fifo_init();
+}
+
+void __init mpc512x_setup_arch(void)
+{
+	if (IS_ENABLED(CONFIG_FB_FSL_DIU))
+		mpc512x_setup_diu();
+}
+
+/**
+ * mpc512x_cs_config - Setup chip select configuration
+ * @cs: chip select number
+ * @val: chip select configuration value
+ *
+ * Perform chip select configuration for devices on LocalPlus Bus.
+ * Intended to dynamically reconfigure the chip select parameters
+ * for configurable devices on the bus.
+ */
+int mpc512x_cs_config(unsigned int cs, u32 val)
+{
+	static struct mpc512x_lpc __iomem *lpc;
+	struct device_node *np;
+
+	if (cs > 7)
+		return -EINVAL;
+
+	if (!lpc) {
+		np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-lpc");
+		lpc = of_iomap(np, 0);
+		of_node_put(np);
+		if (!lpc)
+			return -ENOMEM;
+	}
+
+	out_be32(&lpc->cs_cfg[cs], val);
+	return 0;
+}
+EXPORT_SYMBOL(mpc512x_cs_config);
diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c
new file mode 100644
index 0000000000..ce51cfeeb0
--- /dev/null
+++ b/arch/powerpc/platforms/512x/pdm360ng.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2010 DENX Software Engineering
+ *
+ * Anatolij Gustschin, <agust@denx.de>
+ *
+ * PDM360NG board setup
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+
+#include "mpc512x.h"
+
+#if defined(CONFIG_TOUCHSCREEN_ADS7846) || \
+    defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE)
+#include <linux/interrupt.h>
+#include <linux/spi/ads7846.h>
+#include <linux/spi/spi.h>
+#include <linux/notifier.h>
+
+static void *pdm360ng_gpio_base;
+
+static int pdm360ng_get_pendown_state(void)
+{
+	u32 reg;
+
+	reg = in_be32(pdm360ng_gpio_base + 0xc);
+	if (reg & 0x40)
+		setbits32(pdm360ng_gpio_base + 0xc, 0x40);
+
+	reg = in_be32(pdm360ng_gpio_base + 0x8);
+
+	/* return 1 if pen is down */
+	return (reg & 0x40) == 0;
+}
+
+static struct ads7846_platform_data pdm360ng_ads7846_pdata = {
+	.model			= 7845,
+	.get_pendown_state	= pdm360ng_get_pendown_state,
+	.irq_flags		= IRQF_TRIGGER_LOW,
+};
+
+static int __init pdm360ng_penirq_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-gpio");
+	if (!np) {
+		pr_err("%s: Can't find 'mpc5121-gpio' node\n", __func__);
+		return -ENODEV;
+	}
+
+	pdm360ng_gpio_base = of_iomap(np, 0);
+	of_node_put(np);
+	if (!pdm360ng_gpio_base) {
+		pr_err("%s: Can't map gpio regs.\n", __func__);
+		return -ENODEV;
+	}
+	out_be32(pdm360ng_gpio_base + 0xc, 0xffffffff);
+	setbits32(pdm360ng_gpio_base + 0x18, 0x2000);
+	setbits32(pdm360ng_gpio_base + 0x10, 0x40);
+
+	return 0;
+}
+
+static int pdm360ng_touchscreen_notifier_call(struct notifier_block *nb,
+					unsigned long event, void *__dev)
+{
+	struct device *dev = __dev;
+
+	if ((event == BUS_NOTIFY_ADD_DEVICE) &&
+	    of_device_is_compatible(dev->of_node, "ti,ads7846")) {
+		dev->platform_data = &pdm360ng_ads7846_pdata;
+		return NOTIFY_OK;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block pdm360ng_touchscreen_nb = {
+	.notifier_call = pdm360ng_touchscreen_notifier_call,
+};
+
+static void __init pdm360ng_touchscreen_init(void)
+{
+	if (pdm360ng_penirq_init())
+		return;
+
+	bus_register_notifier(&spi_bus_type, &pdm360ng_touchscreen_nb);
+}
+#else
+static inline void __init pdm360ng_touchscreen_init(void)
+{
+}
+#endif /* CONFIG_TOUCHSCREEN_ADS7846 */
+
+void __init pdm360ng_init(void)
+{
+	mpc512x_init();
+	pdm360ng_touchscreen_init();
+}
+
+static int __init pdm360ng_probe(void)
+{
+	mpc512x_init_early();
+
+	return 1;
+}
+
+define_machine(pdm360ng) {
+	.name			= "PDM360NG",
+	.compatible		= "ifm,pdm360ng",
+	.probe			= pdm360ng_probe,
+	.setup_arch		= mpc512x_setup_arch,
+	.init			= pdm360ng_init,
+	.init_IRQ		= mpc512x_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.restart		= mpc512x_restart,
+};
diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
new file mode 100644
index 0000000000..384e4bef2c
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_MPC52xx
+	bool "52xx-based boards"
+	depends on PPC_BOOK3S_32
+	select COMMON_CLK
+	select HAVE_PCI
+
+config PPC_MPC5200_SIMPLE
+	bool "Generic support for simple MPC5200 based boards"
+	depends on PPC_MPC52xx
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for a simple MPC52xx based boards which
+	  do not need a custom platform specific setup. Such boards are
+	  supported assuming the following:
+
+	  - GPIO pins are configured by the firmware,
+	  - CDM configuration (clocking) is setup correctly by firmware,
+	  - if the 'fsl,has-wdt' property is present in one of the
+	    gpt nodes, then it is safe to use such gpt to reset the board,
+	  - PCI is supported if enabled in the kernel configuration
+	    and if there is a PCI bus node defined in the device tree.
+
+	  Boards that are compatible with this generic platform support
+	  are:
+	     intercontrol,digsy-mtc
+	     phytec,pcm030
+	     phytec,pcm032
+	     promess,motionpro
+	     schindler,cm5200
+	     tqc,tqm5200
+
+config PPC_EFIKA
+	bool "bPlan Efika 5k2. MPC5200B based computer"
+	depends on PPC_MPC52xx
+	select PPC_RTAS
+	select PPC_HASH_MMU_NATIVE
+
+config PPC_LITE5200
+	bool "Freescale Lite5200 Eval Board"
+	depends on PPC_MPC52xx
+	select DEFAULT_UIMAGE
+
+config PPC_MEDIA5200
+	bool "Freescale Media5200 Eval Board"
+	depends on PPC_MPC52xx
+	select DEFAULT_UIMAGE
+
+config PPC_MPC5200_BUGFIX
+	bool "MPC5200 (L25R) bugfix support"
+	depends on PPC_MPC52xx
+	help
+	  Enable workarounds for original MPC5200 errata.  This is not required
+	  for MPC5200B based boards.
+
+	  It is safe to say 'Y' here
diff --git a/arch/powerpc/platforms/52xx/Makefile b/arch/powerpc/platforms/52xx/Makefile
new file mode 100644
index 0000000000..1b1f72d833
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for 52xx based boards
+#
+obj-y				+= mpc52xx_pic.o mpc52xx_common.o mpc52xx_gpt.o
+obj-$(CONFIG_PCI)		+= mpc52xx_pci.o
+
+obj-$(CONFIG_PPC_MPC5200_SIMPLE) += mpc5200_simple.o
+obj-$(CONFIG_PPC_EFIKA)		+= efika.o
+obj-$(CONFIG_PPC_LITE5200)	+= lite5200.o
+obj-$(CONFIG_PPC_MEDIA5200)	+= media5200.o
+
+obj-$(CONFIG_PM)		+= mpc52xx_sleep.o mpc52xx_pm.o
+ifdef CONFIG_PPC_LITE5200
+	obj-$(CONFIG_PM)	+= lite5200_sleep.o lite5200_pm.o
+endif
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
new file mode 100644
index 0000000000..aa82e6b437
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -0,0 +1,233 @@
+/*
+ * Efika 5K2 platform code
+ * Some code really inspired from the lite5200b platform.
+ *
+ * Copyright (C) 2006 bplan GmbH
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <generated/utsrelease.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <asm/dma.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/mpc52xx.h>
+
+#define EFIKA_PLATFORM_NAME "Efika"
+
+
+/* ------------------------------------------------------------------------ */
+/* PCI accesses thru RTAS                                                   */
+/* ------------------------------------------------------------------------ */
+
+#ifdef CONFIG_PCI
+
+/*
+ * Access functions for PCI config space using RTAS calls.
+ */
+static int rtas_read_config(struct pci_bus *bus, unsigned int devfn, int offset,
+			    int len, u32 * val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	unsigned long addr = (offset & 0xff) | ((devfn & 0xff) << 8)
+	    | (((bus->number - hose->first_busno) & 0xff) << 16)
+	    | (hose->global_number << 24);
+	int ret = -1;
+	int rval;
+
+	rval = rtas_call(rtas_function_token(RTAS_FN_READ_PCI_CONFIG), 2, 2, &ret, addr, len);
+	*val = ret;
+	return rval ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_write_config(struct pci_bus *bus, unsigned int devfn,
+			     int offset, int len, u32 val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	unsigned long addr = (offset & 0xff) | ((devfn & 0xff) << 8)
+	    | (((bus->number - hose->first_busno) & 0xff) << 16)
+	    | (hose->global_number << 24);
+	int rval;
+
+	rval = rtas_call(rtas_function_token(RTAS_FN_WRITE_PCI_CONFIG), 3, 1, NULL,
+			 addr, len, val);
+	return rval ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops rtas_pci_ops = {
+	.read = rtas_read_config,
+	.write = rtas_write_config,
+};
+
+
+static void __init efika_pcisetup(void)
+{
+	const int *bus_range;
+	int len;
+	struct pci_controller *hose;
+	struct device_node *root;
+	struct device_node *pcictrl;
+
+	root = of_find_node_by_path("/");
+	if (root == NULL) {
+		printk(KERN_WARNING EFIKA_PLATFORM_NAME
+		       ": Unable to find the root node\n");
+		return;
+	}
+
+	for_each_child_of_node(root, pcictrl)
+		if (of_node_name_eq(pcictrl, "pci"))
+			break;
+
+	of_node_put(root);
+
+	if (pcictrl == NULL) {
+		printk(KERN_WARNING EFIKA_PLATFORM_NAME
+		       ": Unable to find the PCI bridge node\n");
+		return;
+	}
+
+	bus_range = of_get_property(pcictrl, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int)) {
+		printk(KERN_WARNING EFIKA_PLATFORM_NAME
+		       ": Can't get bus-range for %pOF\n", pcictrl);
+		goto out_put;
+	}
+
+	if (bus_range[1] == bus_range[0])
+		printk(KERN_INFO EFIKA_PLATFORM_NAME ": PCI bus %d",
+		       bus_range[0]);
+	else
+		printk(KERN_INFO EFIKA_PLATFORM_NAME ": PCI buses %d..%d",
+		       bus_range[0], bus_range[1]);
+	printk(" controlled by %pOF\n", pcictrl);
+	printk("\n");
+
+	hose = pcibios_alloc_controller(pcictrl);
+	if (!hose) {
+		printk(KERN_WARNING EFIKA_PLATFORM_NAME
+		       ": Can't allocate PCI controller structure for %pOF\n",
+		       pcictrl);
+		goto out_put;
+	}
+
+	hose->first_busno = bus_range[0];
+	hose->last_busno = bus_range[1];
+	hose->ops = &rtas_pci_ops;
+
+	pci_process_bridge_OF_ranges(hose, pcictrl, 0);
+	return;
+out_put:
+	of_node_put(pcictrl);
+}
+
+#else
+static void __init efika_pcisetup(void)
+{}
+#endif
+
+
+
+/* ------------------------------------------------------------------------ */
+/* Platform setup                                                           */
+/* ------------------------------------------------------------------------ */
+
+static void efika_show_cpuinfo(struct seq_file *m)
+{
+	struct device_node *root;
+	const char *revision;
+	const char *codegendescription;
+	const char *codegenvendor;
+
+	root = of_find_node_by_path("/");
+	if (!root)
+		return;
+
+	revision = of_get_property(root, "revision", NULL);
+	codegendescription = of_get_property(root, "CODEGEN,description", NULL);
+	codegenvendor = of_get_property(root, "CODEGEN,vendor", NULL);
+
+	if (codegendescription)
+		seq_printf(m, "machine\t\t: %s\n", codegendescription);
+	else
+		seq_printf(m, "machine\t\t: Efika\n");
+
+	if (revision)
+		seq_printf(m, "revision\t: %s\n", revision);
+
+	if (codegenvendor)
+		seq_printf(m, "vendor\t\t: %s\n", codegenvendor);
+
+	of_node_put(root);
+}
+
+#ifdef CONFIG_PM
+static void efika_suspend_prepare(void __iomem *mbar)
+{
+	u8 pin = 4;	/* GPIO_WKUP_4 (GPIO_PSC6_0 - IRDA_RX) */
+	u8 level = 1;	/* wakeup on high level */
+	/* IOW. to wake it up, short pins 1 and 3 on IRDA connector */
+	mpc52xx_set_wakeup_gpio(pin, level);
+}
+#endif
+
+static void __init efika_setup_arch(void)
+{
+	rtas_initialize();
+
+	/* Map important registers from the internal memory map */
+	mpc52xx_map_common_devices();
+
+#ifdef CONFIG_PM
+	mpc52xx_suspend.board_suspend_prepare = efika_suspend_prepare;
+	mpc52xx_pm_init();
+#endif
+
+	if (ppc_md.progress)
+		ppc_md.progress("Linux/PPC " UTS_RELEASE " running on Efika ;-)\n", 0x0);
+}
+
+static int __init efika_probe(void)
+{
+	const char *model = of_get_property(of_root, "model", NULL);
+
+	if (model == NULL)
+		return 0;
+	if (strcmp(model, "EFIKA5K2"))
+		return 0;
+
+	DMA_MODE_READ = 0x44;
+	DMA_MODE_WRITE = 0x48;
+
+	pm_power_off = rtas_power_off;
+
+	return 1;
+}
+
+define_machine(efika)
+{
+	.name			= EFIKA_PLATFORM_NAME,
+	.probe			= efika_probe,
+	.setup_arch		= efika_setup_arch,
+	.discover_phbs		= efika_pcisetup,
+	.init			= mpc52xx_declare_of_platform_devices,
+	.show_cpuinfo		= efika_show_cpuinfo,
+	.init_IRQ		= mpc52xx_init_irq,
+	.get_irq		= mpc52xx_get_irq,
+	.restart		= rtas_restart,
+	.halt			= rtas_halt,
+	.set_rtc_time		= rtas_set_rtc_time,
+	.get_rtc_time		= rtas_get_rtc_time,
+	.progress		= rtas_progress,
+	.get_boot_time		= rtas_get_boot_time,
+#ifdef CONFIG_PCI
+	.phys_mem_access_prot	= pci_phys_mem_access_prot,
+#endif
+};
+
diff --git a/arch/powerpc/platforms/52xx/lite5200.c b/arch/powerpc/platforms/52xx/lite5200.c
new file mode 100644
index 0000000000..0fd67b3ffc
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/lite5200.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale Lite5200 board support
+ *
+ * Written by: Grant Likely <grant.likely@secretlab.ca>
+ *
+ * Copyright (C) Secret Lab Technologies Ltd. 2006. All rights reserved.
+ * Copyright 2006 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Description:
+ */
+
+#undef DEBUG
+
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/root_dev.h>
+#include <linux/initrd.h>
+#include <asm/time.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/mpc52xx.h>
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+
+/* mpc5200 device tree match tables */
+static const struct of_device_id mpc5200_cdm_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5200-cdm", },
+	{ .compatible = "mpc5200-cdm", },
+	{}
+};
+
+static const struct of_device_id mpc5200_gpio_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5200-gpio", },
+	{ .compatible = "mpc5200-gpio", },
+	{}
+};
+
+/*
+ * Fix clock configuration.
+ *
+ * Firmware is supposed to be responsible for this.  If you are creating a
+ * new board port, do *NOT* duplicate this code.  Fix your boot firmware
+ * to set it correctly in the first place
+ */
+static void __init
+lite5200_fix_clock_config(void)
+{
+	struct device_node *np;
+	struct mpc52xx_cdm  __iomem *cdm;
+	/* Map zones */
+	np = of_find_matching_node(NULL, mpc5200_cdm_ids);
+	cdm = of_iomap(np, 0);
+	of_node_put(np);
+	if (!cdm) {
+		printk(KERN_ERR "%s() failed; expect abnormal behaviour\n",
+		       __func__);
+		return;
+	}
+
+	/* Use internal 48 Mhz */
+	out_8(&cdm->ext_48mhz_en, 0x00);
+	out_8(&cdm->fd_enable, 0x01);
+	if (in_be32(&cdm->rstcfg) & 0x40)	/* Assumes 33Mhz clock */
+		out_be16(&cdm->fd_counters, 0x0001);
+	else
+		out_be16(&cdm->fd_counters, 0x5555);
+
+	/* Unmap the regs */
+	iounmap(cdm);
+}
+
+/*
+ * Fix setting of port_config register.
+ *
+ * Firmware is supposed to be responsible for this.  If you are creating a
+ * new board port, do *NOT* duplicate this code.  Fix your boot firmware
+ * to set it correctly in the first place
+ */
+static void __init
+lite5200_fix_port_config(void)
+{
+	struct device_node *np;
+	struct mpc52xx_gpio __iomem *gpio;
+	u32 port_config;
+
+	np = of_find_matching_node(NULL, mpc5200_gpio_ids);
+	gpio = of_iomap(np, 0);
+	of_node_put(np);
+	if (!gpio) {
+		printk(KERN_ERR "%s() failed. expect abnormal behavior\n",
+		       __func__);
+		return;
+	}
+
+	/* Set port config */
+	port_config = in_be32(&gpio->port_config);
+
+	port_config &= ~0x00800000;	/* 48Mhz internal, pin is GPIO	*/
+
+	port_config &= ~0x00007000;	/* USB port : Differential mode	*/
+	port_config |=  0x00001000;	/*            USB 1 only	*/
+
+	port_config &= ~0x03000000;	/* ATA CS is on csb_4/5		*/
+	port_config |=  0x01000000;
+
+	pr_debug("port_config: old:%x new:%x\n",
+	         in_be32(&gpio->port_config), port_config);
+	out_be32(&gpio->port_config, port_config);
+
+	/* Unmap zone */
+	iounmap(gpio);
+}
+
+#ifdef CONFIG_PM
+static void lite5200_suspend_prepare(void __iomem *mbar)
+{
+	u8 pin = 1;	/* GPIO_WKUP_1 (GPIO_PSC2_4) */
+	u8 level = 0;	/* wakeup on low level */
+	mpc52xx_set_wakeup_gpio(pin, level);
+
+	/*
+	 * power down usb port
+	 * this needs to be called before of-ohci suspend code
+	 */
+
+	/* set ports to "power switched" and "powered at the same time"
+	 * USB Rh descriptor A: NPS = 0, PSM = 0 */
+	out_be32(mbar + 0x1048, in_be32(mbar + 0x1048) & ~0x300);
+	/* USB Rh status: LPS = 1 - turn off power */
+	out_be32(mbar + 0x1050, 0x00000001);
+}
+
+static void lite5200_resume_finish(void __iomem *mbar)
+{
+	/* USB Rh status: LPSC = 1 - turn on power */
+	out_be32(mbar + 0x1050, 0x00010000);
+}
+#endif
+
+static void __init lite5200_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("lite5200_setup_arch()", 0);
+
+	/* Map important registers from the internal memory map */
+	mpc52xx_map_common_devices();
+
+	/* Some mpc5200 & mpc5200b related configuration */
+	mpc5200_setup_xlb_arbiter();
+
+	/* Fix things that firmware should have done. */
+	lite5200_fix_clock_config();
+	lite5200_fix_port_config();
+
+#ifdef CONFIG_PM
+	mpc52xx_suspend.board_suspend_prepare = lite5200_suspend_prepare;
+	mpc52xx_suspend.board_resume_finish = lite5200_resume_finish;
+	lite5200_pm_init();
+#endif
+}
+
+static const char * const board[] __initconst = {
+	"fsl,lite5200",
+	"fsl,lite5200b",
+	NULL,
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init lite5200_probe(void)
+{
+	return of_device_compatible_match(of_root, board);
+}
+
+define_machine(lite5200) {
+	.name 		= "lite5200",
+	.probe 		= lite5200_probe,
+	.setup_arch 	= lite5200_setup_arch,
+	.discover_phbs	= mpc52xx_setup_pci,
+	.init		= mpc52xx_declare_of_platform_devices,
+	.init_IRQ 	= mpc52xx_init_irq,
+	.get_irq 	= mpc52xx_get_irq,
+	.restart	= mpc52xx_restart,
+};
diff --git a/arch/powerpc/platforms/52xx/lite5200_pm.c b/arch/powerpc/platforms/52xx/lite5200_pm.c
new file mode 100644
index 0000000000..4900f5f48c
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/lite5200_pm.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/suspend.h>
+#include <linux/of_address.h>
+
+#include <asm/io.h>
+#include <asm/time.h>
+#include <asm/mpc52xx.h>
+#include <asm/switch_to.h>
+
+/* defined in lite5200_sleep.S and only used here */
+extern void lite5200_low_power(void __iomem *sram, void __iomem *mbar);
+
+static struct mpc52xx_cdm __iomem *cdm;
+static struct mpc52xx_intr __iomem *pic;
+static struct mpc52xx_sdma __iomem *bes;
+static struct mpc52xx_xlb __iomem *xlb;
+static struct mpc52xx_gpio __iomem *gps;
+static struct mpc52xx_gpio_wkup __iomem *gpw;
+static void __iomem *pci;
+static void __iomem *sram;
+static const int sram_size = 0x4000;	/* 16 kBytes */
+static void __iomem *mbar;
+
+static suspend_state_t lite5200_pm_target_state;
+
+static int lite5200_pm_valid(suspend_state_t state)
+{
+	switch (state) {
+	case PM_SUSPEND_STANDBY:
+	case PM_SUSPEND_MEM:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static int lite5200_pm_begin(suspend_state_t state)
+{
+	if (lite5200_pm_valid(state)) {
+		lite5200_pm_target_state = state;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int lite5200_pm_prepare(void)
+{
+	struct device_node *np;
+	static const struct of_device_id immr_ids[] = {
+		{ .compatible = "fsl,mpc5200-immr", },
+		{ .compatible = "fsl,mpc5200b-immr", },
+		{ .type = "soc", .compatible = "mpc5200", }, /* lite5200 */
+		{ .type = "builtin", .compatible = "mpc5200", }, /* efika */
+		{}
+	};
+	struct resource res;
+
+	/* deep sleep? let mpc52xx code handle that */
+	if (lite5200_pm_target_state == PM_SUSPEND_STANDBY)
+		return mpc52xx_pm_prepare();
+
+	if (lite5200_pm_target_state != PM_SUSPEND_MEM)
+		return -EINVAL;
+
+	/* map registers */
+	np = of_find_matching_node(NULL, immr_ids);
+	of_address_to_resource(np, 0, &res);
+	of_node_put(np);
+
+	mbar = ioremap(res.start, 0xC000);
+	if (!mbar) {
+		printk(KERN_ERR "%s:%i Error mapping registers\n", __func__, __LINE__);
+		return -ENOSYS;
+	}
+
+	cdm = mbar + 0x200;
+	pic = mbar + 0x500;
+	gps = mbar + 0xb00;
+	gpw = mbar + 0xc00;
+	pci = mbar + 0xd00;
+	bes = mbar + 0x1200;
+	xlb = mbar + 0x1f00;
+	sram = mbar + 0x8000;
+
+	return 0;
+}
+
+/* save and restore registers not bound to any real devices */
+static struct mpc52xx_cdm scdm;
+static struct mpc52xx_intr spic;
+static struct mpc52xx_sdma sbes;
+static struct mpc52xx_xlb sxlb;
+static struct mpc52xx_gpio sgps;
+static struct mpc52xx_gpio_wkup sgpw;
+static char spci[0x200];
+
+static void lite5200_save_regs(void)
+{
+	_memcpy_fromio(&spic, pic, sizeof(*pic));
+	_memcpy_fromio(&sbes, bes, sizeof(*bes));
+	_memcpy_fromio(&scdm, cdm, sizeof(*cdm));
+	_memcpy_fromio(&sxlb, xlb, sizeof(*xlb));
+	_memcpy_fromio(&sgps, gps, sizeof(*gps));
+	_memcpy_fromio(&sgpw, gpw, sizeof(*gpw));
+	_memcpy_fromio(spci, pci, 0x200);
+
+	_memcpy_fromio(saved_sram, sram, sram_size);
+}
+
+static void lite5200_restore_regs(void)
+{
+	int i;
+	_memcpy_toio(sram, saved_sram, sram_size);
+
+	/* PCI Configuration */
+	_memcpy_toio(pci, spci, 0x200);
+
+	/*
+	 * GPIOs. Interrupt Master Enable has higher address then other
+	 * registers, so just memcpy is ok.
+	 */
+	_memcpy_toio(gpw, &sgpw, sizeof(*gpw));
+	_memcpy_toio(gps, &sgps, sizeof(*gps));
+
+
+	/* XLB Arbitrer */
+	out_be32(&xlb->snoop_window, sxlb.snoop_window);
+	out_be32(&xlb->master_priority, sxlb.master_priority);
+	out_be32(&xlb->master_pri_enable, sxlb.master_pri_enable);
+
+	/* enable */
+	out_be32(&xlb->int_enable, sxlb.int_enable);
+	out_be32(&xlb->config, sxlb.config);
+
+
+	/* CDM - Clock Distribution Module */
+	out_8(&cdm->ipb_clk_sel, scdm.ipb_clk_sel);
+	out_8(&cdm->pci_clk_sel, scdm.pci_clk_sel);
+
+	out_8(&cdm->ext_48mhz_en, scdm.ext_48mhz_en);
+	out_8(&cdm->fd_enable, scdm.fd_enable);
+	out_be16(&cdm->fd_counters, scdm.fd_counters);
+
+	out_be32(&cdm->clk_enables, scdm.clk_enables);
+
+	out_8(&cdm->osc_disable, scdm.osc_disable);
+
+	out_be16(&cdm->mclken_div_psc1, scdm.mclken_div_psc1);
+	out_be16(&cdm->mclken_div_psc2, scdm.mclken_div_psc2);
+	out_be16(&cdm->mclken_div_psc3, scdm.mclken_div_psc3);
+	out_be16(&cdm->mclken_div_psc6, scdm.mclken_div_psc6);
+
+
+	/* BESTCOMM */
+	out_be32(&bes->taskBar, sbes.taskBar);
+	out_be32(&bes->currentPointer, sbes.currentPointer);
+	out_be32(&bes->endPointer, sbes.endPointer);
+	out_be32(&bes->variablePointer, sbes.variablePointer);
+
+	out_8(&bes->IntVect1, sbes.IntVect1);
+	out_8(&bes->IntVect2, sbes.IntVect2);
+	out_be16(&bes->PtdCntrl, sbes.PtdCntrl);
+
+	for (i=0; i<32; i++)
+		out_8(&bes->ipr[i], sbes.ipr[i]);
+
+	out_be32(&bes->cReqSelect, sbes.cReqSelect);
+	out_be32(&bes->task_size0, sbes.task_size0);
+	out_be32(&bes->task_size1, sbes.task_size1);
+	out_be32(&bes->MDEDebug, sbes.MDEDebug);
+	out_be32(&bes->ADSDebug, sbes.ADSDebug);
+	out_be32(&bes->Value1, sbes.Value1);
+	out_be32(&bes->Value2, sbes.Value2);
+	out_be32(&bes->Control, sbes.Control);
+	out_be32(&bes->Status, sbes.Status);
+	out_be32(&bes->PTDDebug, sbes.PTDDebug);
+
+	/* restore tasks */
+	for (i=0; i<16; i++)
+		out_be16(&bes->tcr[i], sbes.tcr[i]);
+
+	/* enable interrupts */
+	out_be32(&bes->IntPend, sbes.IntPend);
+	out_be32(&bes->IntMask, sbes.IntMask);
+
+
+	/* PIC */
+	out_be32(&pic->per_pri1, spic.per_pri1);
+	out_be32(&pic->per_pri2, spic.per_pri2);
+	out_be32(&pic->per_pri3, spic.per_pri3);
+
+	out_be32(&pic->main_pri1, spic.main_pri1);
+	out_be32(&pic->main_pri2, spic.main_pri2);
+
+	out_be32(&pic->enc_status, spic.enc_status);
+
+	/* unmask and enable interrupts */
+	out_be32(&pic->per_mask, spic.per_mask);
+	out_be32(&pic->main_mask, spic.main_mask);
+	out_be32(&pic->ctrl, spic.ctrl);
+}
+
+static int lite5200_pm_enter(suspend_state_t state)
+{
+	/* deep sleep? let mpc52xx code handle that */
+	if (state == PM_SUSPEND_STANDBY) {
+		return mpc52xx_pm_enter(state);
+	}
+
+	lite5200_save_regs();
+
+	/* effectively save FP regs */
+	enable_kernel_fp();
+
+	lite5200_low_power(sram, mbar);
+
+	lite5200_restore_regs();
+
+	iounmap(mbar);
+	return 0;
+}
+
+static void lite5200_pm_finish(void)
+{
+	/* deep sleep? let mpc52xx code handle that */
+	if (lite5200_pm_target_state == PM_SUSPEND_STANDBY)
+		mpc52xx_pm_finish();
+}
+
+static void lite5200_pm_end(void)
+{
+	lite5200_pm_target_state = PM_SUSPEND_ON;
+}
+
+static const struct platform_suspend_ops lite5200_pm_ops = {
+	.valid		= lite5200_pm_valid,
+	.begin		= lite5200_pm_begin,
+	.prepare	= lite5200_pm_prepare,
+	.enter		= lite5200_pm_enter,
+	.finish		= lite5200_pm_finish,
+	.end		= lite5200_pm_end,
+};
+
+int __init lite5200_pm_init(void)
+{
+	suspend_set_ops(&lite5200_pm_ops);
+	return 0;
+}
diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S
new file mode 100644
index 0000000000..0b12647e7b
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S
@@ -0,0 +1,422 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+
+#include <asm/reg.h>
+#include <asm/ppc_asm.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+
+
+#define SDRAM_CTRL	0x104
+#define SC_MODE_EN	(1<<31)
+#define SC_CKE		(1<<30)
+#define SC_REF_EN	(1<<28)
+#define SC_SOFT_PRE	(1<<1)
+
+#define GPIOW_GPIOE	0xc00
+#define GPIOW_DDR	0xc08
+#define GPIOW_DVO	0xc0c
+
+#define CDM_CE		0x214
+#define CDM_SDRAM	(1<<3)
+
+
+/* helpers... beware: r10 and r4 are overwritten */
+#define SAVE_SPRN(reg, addr)		\
+	mfspr	r10, SPRN_##reg;	\
+	stw	r10, ((addr)*4)(r4);
+
+#define LOAD_SPRN(reg, addr)		\
+	lwz	r10, ((addr)*4)(r4);	\
+	mtspr	SPRN_##reg, r10;	\
+	sync;				\
+	isync;
+
+
+	.data
+registers:
+	.space 0x5c*4
+	.text
+
+/* ---------------------------------------------------------------------- */
+/* low-power mode with help of M68HLC908QT1 */
+
+	.globl lite5200_low_power
+lite5200_low_power:
+
+	mr	r7, r3	/* save SRAM va */
+	mr	r8, r4	/* save MBAR va */
+
+	/* setup wakeup address for u-boot at physical location 0x0 */
+	lis	r3, CONFIG_KERNEL_START@h
+	lis	r4, lite5200_wakeup@h
+	ori	r4, r4, lite5200_wakeup@l
+	sub	r4, r4, r3
+	stw	r4, 0(r3)
+
+
+	/*
+	 * save stuff BDI overwrites
+	 * 0xf0 (0xe0->0x100 gets overwritten when BDI connected;
+	 *   even when CONFIG_BDI_SWITCH is disabled and MMU XLAT commented; heisenbug?))
+	 * WARNING: self-refresh doesn't seem to work when BDI2000 is connected,
+	 *   possibly because BDI sets SDRAM registers before wakeup code does
+	 */
+	lis	r4, registers@h
+	ori	r4, r4, registers@l
+	lwz	r10, 0xf0(r3)
+	stw	r10, (0x1d*4)(r4)
+
+	/* save registers to r4 [destroys r10] */
+	SAVE_SPRN(LR, 0x1c)
+	bl	save_regs
+
+	/* flush caches [destroys r3, r4] */
+	bl	flush_data_cache
+
+
+	/* copy code to sram */
+	mr	r4, r7
+	li	r3, (sram_code_end - sram_code)/4
+	mtctr	r3
+	lis	r3, sram_code@h
+	ori	r3, r3, sram_code@l
+1:
+	lwz	r5, 0(r3)
+	stw	r5, 0(r4)
+	addi	r3, r3, 4
+	addi	r4, r4, 4
+	bdnz	1b
+
+	/* get tb_ticks_per_usec */
+	lis	r3, tb_ticks_per_usec@h
+	lwz	r11, tb_ticks_per_usec@l(r3)
+
+	/* disable I and D caches */
+	mfspr	r3, SPRN_HID0
+	ori	r3, r3, HID0_ICE | HID0_DCE
+	xori	r3, r3, HID0_ICE | HID0_DCE
+	sync; isync;
+	mtspr	SPRN_HID0, r3
+	sync; isync;
+
+	/* jump to sram */
+	mtlr	r7
+	blrl
+	/* doesn't return */
+
+
+sram_code:
+	/* self refresh */
+	lwz	r4, SDRAM_CTRL(r8)
+
+	/* send NOP (precharge) */
+	oris	r4, r4, SC_MODE_EN@h	/* mode_en */
+	stw	r4, SDRAM_CTRL(r8)
+	sync
+
+	ori	r4, r4, SC_SOFT_PRE	/* soft_pre */
+	stw	r4, SDRAM_CTRL(r8)
+	sync
+	xori	r4, r4, SC_SOFT_PRE
+
+	xoris	r4, r4, SC_MODE_EN@h	/* !mode_en */
+	stw	r4, SDRAM_CTRL(r8)
+	sync
+
+	/* delay (for NOP to finish) */
+	li	r12, 1
+	bl	udelay
+
+	/*
+	 * mode_en must not be set when enabling self-refresh
+	 * send AR with CKE low (self-refresh)
+	 */
+	oris	r4, r4, (SC_REF_EN | SC_CKE)@h
+	xoris	r4, r4, (SC_CKE)@h	/* ref_en !cke */
+	stw	r4, SDRAM_CTRL(r8)
+	sync
+
+	/* delay (after !CKE there should be two cycles) */
+	li	r12, 1
+	bl	udelay
+
+	/* disable clock */
+	lwz	r4, CDM_CE(r8)
+	ori	r4, r4, CDM_SDRAM
+	xori	r4, r4, CDM_SDRAM
+	stw	r4, CDM_CE(r8)
+	sync
+
+	/* delay a bit */
+	li	r12, 1
+	bl	udelay
+
+
+	/* turn off with QT chip */
+	li	r4, 0x02
+	stb	r4, GPIOW_GPIOE(r8)	/* enable gpio_wkup1 */
+	sync
+
+	stb	r4, GPIOW_DVO(r8)	/* "output" high */
+	sync
+	stb	r4, GPIOW_DDR(r8)	/* output */
+	sync
+	stb	r4, GPIOW_DVO(r8)	/* output high */
+	sync
+
+	/* 10uS delay */
+	li	r12, 10
+	bl	udelay
+
+	/* turn off */
+	li	r4, 0
+	stb	r4, GPIOW_DVO(r8)	/* output low */
+	sync
+
+	/* wait until we're offline */
+  1:
+	b	1b
+
+
+	/* local udelay in sram is needed */
+SYM_FUNC_START_LOCAL(udelay)
+	/* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */
+	mullw	r12, r12, r11
+	mftb	r13	/* start */
+	add	r12, r13, r12 /* end */
+    1:
+	mftb	r13	/* current */
+	cmp	cr0, r13, r12
+	blt	1b
+	blr
+SYM_FUNC_END(udelay)
+
+sram_code_end:
+
+
+
+/* uboot jumps here on resume */
+lite5200_wakeup:
+	bl	restore_regs
+
+
+	/* HIDs, MSR */
+	LOAD_SPRN(HID1, 0x19)
+	LOAD_SPRN(HID2, 0x1a)
+
+
+	/* address translation is tricky (see turn_on_mmu) */
+	mfmsr	r10
+	ori	r10, r10, MSR_DR | MSR_IR
+
+
+	mtspr	SPRN_SRR1, r10
+	lis	r10, mmu_on@h
+	ori	r10, r10, mmu_on@l
+	mtspr	SPRN_SRR0, r10
+	sync
+	rfi
+mmu_on:
+	/* kernel offset (r4 is still set from restore_registers) */
+	addis	r4, r4, CONFIG_KERNEL_START@h
+
+
+	/* restore MSR */
+	lwz	r10, (4*0x1b)(r4)
+	mtmsr	r10
+	sync; isync;
+
+	/* invalidate caches */
+	mfspr	r10, SPRN_HID0
+	ori	r5, r10, HID0_ICFI | HID0_DCI
+	mtspr	SPRN_HID0, r5	/* invalidate caches */
+	sync; isync;
+	mtspr	SPRN_HID0, r10
+	sync; isync;
+
+	/* enable caches */
+	lwz	r10, (4*0x18)(r4)
+	mtspr	SPRN_HID0, r10	/* restore (enable caches, DPM) */
+	/* ^ this has to be after address translation set in MSR */
+	sync
+	isync
+
+
+	/* restore 0xf0 (BDI2000) */
+	lis	r3, CONFIG_KERNEL_START@h
+	lwz	r10, (0x1d*4)(r4)
+	stw	r10, 0xf0(r3)
+
+	LOAD_SPRN(LR, 0x1c)
+
+
+	blr
+_ASM_NOKPROBE_SYMBOL(lite5200_wakeup)
+
+
+/* ---------------------------------------------------------------------- */
+/* boring code: helpers */
+
+/* save registers */
+#define SAVE_BAT(n, addr)		\
+	SAVE_SPRN(DBAT##n##L, addr);	\
+	SAVE_SPRN(DBAT##n##U, addr+1);	\
+	SAVE_SPRN(IBAT##n##L, addr+2);	\
+	SAVE_SPRN(IBAT##n##U, addr+3);
+
+#define SAVE_SR(n, addr)		\
+	mfsr	r10, n;			\
+	stw	r10, ((addr)*4)(r4);
+
+#define SAVE_4SR(n, addr)	\
+	SAVE_SR(n, addr);	\
+	SAVE_SR(n+1, addr+1);	\
+	SAVE_SR(n+2, addr+2);	\
+	SAVE_SR(n+3, addr+3);
+
+SYM_FUNC_START_LOCAL(save_regs)
+	stw	r0, 0(r4)
+	stw	r1, 0x4(r4)
+	stw	r2, 0x8(r4)
+	stmw	r11, 0xc(r4) /* 0xc -> 0x5f, (0x18*4-1) */
+
+	SAVE_SPRN(HID0, 0x18)
+	SAVE_SPRN(HID1, 0x19)
+	SAVE_SPRN(HID2, 0x1a)
+	mfmsr	r10
+	stw	r10, (4*0x1b)(r4)
+	/*SAVE_SPRN(LR, 0x1c) have to save it before the call */
+	/* 0x1d reserved by 0xf0 */
+	SAVE_SPRN(RPA,   0x1e)
+	SAVE_SPRN(SDR1,  0x1f)
+
+	/* save MMU regs */
+	SAVE_BAT(0, 0x20)
+	SAVE_BAT(1, 0x24)
+	SAVE_BAT(2, 0x28)
+	SAVE_BAT(3, 0x2c)
+	SAVE_BAT(4, 0x30)
+	SAVE_BAT(5, 0x34)
+	SAVE_BAT(6, 0x38)
+	SAVE_BAT(7, 0x3c)
+
+	SAVE_4SR(0, 0x40)
+	SAVE_4SR(4, 0x44)
+	SAVE_4SR(8, 0x48)
+	SAVE_4SR(12, 0x4c)
+
+	SAVE_SPRN(SPRG0, 0x50)
+	SAVE_SPRN(SPRG1, 0x51)
+	SAVE_SPRN(SPRG2, 0x52)
+	SAVE_SPRN(SPRG3, 0x53)
+	SAVE_SPRN(SPRG4, 0x54)
+	SAVE_SPRN(SPRG5, 0x55)
+	SAVE_SPRN(SPRG6, 0x56)
+	SAVE_SPRN(SPRG7, 0x57)
+
+	SAVE_SPRN(IABR,  0x58)
+	SAVE_SPRN(DABR,  0x59)
+	SAVE_SPRN(TBRL,  0x5a)
+	SAVE_SPRN(TBRU,  0x5b)
+
+	blr
+SYM_FUNC_END(save_regs)
+
+
+/* restore registers */
+#define LOAD_BAT(n, addr)		\
+	LOAD_SPRN(DBAT##n##L, addr);	\
+	LOAD_SPRN(DBAT##n##U, addr+1);	\
+	LOAD_SPRN(IBAT##n##L, addr+2);	\
+	LOAD_SPRN(IBAT##n##U, addr+3);
+
+#define LOAD_SR(n, addr)		\
+	lwz	r10, ((addr)*4)(r4);	\
+	mtsr	n, r10;
+
+#define LOAD_4SR(n, addr)	\
+	LOAD_SR(n, addr);	\
+	LOAD_SR(n+1, addr+1);	\
+	LOAD_SR(n+2, addr+2);	\
+	LOAD_SR(n+3, addr+3);
+
+SYM_FUNC_START_LOCAL(restore_regs)
+	lis	r4, registers@h
+	ori	r4, r4, registers@l
+
+	/* MMU is not up yet */
+	subis	r4, r4, CONFIG_KERNEL_START@h
+
+	lwz	r0, 0(r4)
+	lwz	r1, 0x4(r4)
+	lwz	r2, 0x8(r4)
+	lmw	r11, 0xc(r4)
+
+	/*
+	 * these are a bit tricky
+	 *
+	 * 0x18 - HID0
+	 * 0x19 - HID1
+	 * 0x1a - HID2
+	 * 0x1b - MSR
+	 * 0x1c - LR
+	 * 0x1d - reserved by 0xf0 (BDI2000)
+	 */
+	LOAD_SPRN(RPA,   0x1e);
+	LOAD_SPRN(SDR1,  0x1f);
+
+	/* restore MMU regs */
+	LOAD_BAT(0, 0x20)
+	LOAD_BAT(1, 0x24)
+	LOAD_BAT(2, 0x28)
+	LOAD_BAT(3, 0x2c)
+	LOAD_BAT(4, 0x30)
+	LOAD_BAT(5, 0x34)
+	LOAD_BAT(6, 0x38)
+	LOAD_BAT(7, 0x3c)
+
+	LOAD_4SR(0, 0x40)
+	LOAD_4SR(4, 0x44)
+	LOAD_4SR(8, 0x48)
+	LOAD_4SR(12, 0x4c)
+
+	/* rest of regs */
+	LOAD_SPRN(SPRG0, 0x50);
+	LOAD_SPRN(SPRG1, 0x51);
+	LOAD_SPRN(SPRG2, 0x52);
+	LOAD_SPRN(SPRG3, 0x53);
+	LOAD_SPRN(SPRG4, 0x54);
+	LOAD_SPRN(SPRG5, 0x55);
+	LOAD_SPRN(SPRG6, 0x56);
+	LOAD_SPRN(SPRG7, 0x57);
+
+	LOAD_SPRN(IABR,  0x58);
+	LOAD_SPRN(DABR,  0x59);
+	LOAD_SPRN(TBWL,  0x5a);	/* these two have separate R/W regs */
+	LOAD_SPRN(TBWU,  0x5b);
+
+	blr
+_ASM_NOKPROBE_SYMBOL(restore_regs)
+SYM_FUNC_END(restore_regs)
+
+
+
+/* cache flushing code. copied from arch/ppc/boot/util.S */
+#define NUM_CACHE_LINES (128*8)
+
+/*
+ * Flush data cache
+ * Do this by just reading lots of stuff into the cache.
+ */
+SYM_FUNC_START_LOCAL(flush_data_cache)
+	lis	r3,CONFIG_KERNEL_START@h
+	ori	r3,r3,CONFIG_KERNEL_START@l
+	li	r4,NUM_CACHE_LINES
+	mtctr	r4
+1:
+	lwz	r4,0(r3)
+	addi	r3,r3,L1_CACHE_BYTES	/* Next line, please */
+	bdnz	1b
+	blr
+SYM_FUNC_END(flush_data_cache)
diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c
new file mode 100644
index 0000000000..19626cd424
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/media5200.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Support for 'media5200-platform' compatible boards.
+ *
+ * Copyright (C) 2008 Secret Lab Technologies Ltd.
+ *
+ * Description:
+ * This code implements support for the Freescape Media5200 platform
+ * (built around the MPC5200 SoC).
+ *
+ * Notable characteristic of the Media5200 is the presence of an FPGA
+ * that has all external IRQ lines routed through it.  This file implements
+ * a cascaded interrupt controller driver which attaches itself to the
+ * Virtual IRQ subsystem after the primary mpc5200 interrupt controller
+ * is initialized.
+ */
+
+#undef DEBUG
+
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/mpc52xx.h>
+
+static const struct of_device_id mpc5200_gpio_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5200-gpio", },
+	{ .compatible = "mpc5200-gpio", },
+	{}
+};
+
+/* FPGA register set */
+#define MEDIA5200_IRQ_ENABLE (0x40c)
+#define MEDIA5200_IRQ_STATUS (0x410)
+#define MEDIA5200_NUM_IRQS   (6)
+#define MEDIA5200_IRQ_SHIFT  (32 - MEDIA5200_NUM_IRQS)
+
+struct media5200_irq {
+	void __iomem *regs;
+	spinlock_t lock;
+	struct irq_domain *irqhost;
+};
+struct media5200_irq media5200_irq;
+
+static void media5200_irq_unmask(struct irq_data *d)
+{
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&media5200_irq.lock, flags);
+	val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE);
+	val |= 1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d));
+	out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val);
+	spin_unlock_irqrestore(&media5200_irq.lock, flags);
+}
+
+static void media5200_irq_mask(struct irq_data *d)
+{
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&media5200_irq.lock, flags);
+	val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE);
+	val &= ~(1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d)));
+	out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val);
+	spin_unlock_irqrestore(&media5200_irq.lock, flags);
+}
+
+static struct irq_chip media5200_irq_chip = {
+	.name = "Media5200 FPGA",
+	.irq_unmask = media5200_irq_unmask,
+	.irq_mask = media5200_irq_mask,
+	.irq_mask_ack = media5200_irq_mask,
+};
+
+static void media5200_irq_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	int val;
+	u32 status, enable;
+
+	/* Mask off the cascaded IRQ */
+	raw_spin_lock(&desc->lock);
+	chip->irq_mask(&desc->irq_data);
+	raw_spin_unlock(&desc->lock);
+
+	/* Ask the FPGA for IRQ status.  If 'val' is 0, then no irqs
+	 * are pending.  'ffs()' is 1 based */
+	status = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE);
+	enable = in_be32(media5200_irq.regs + MEDIA5200_IRQ_STATUS);
+	val = ffs((status & enable) >> MEDIA5200_IRQ_SHIFT);
+	if (val) {
+		generic_handle_domain_irq(media5200_irq.irqhost, val - 1);
+		/* pr_debug("%s: virq=%i s=%.8x e=%.8x hwirq=%i\n",
+		 *          __func__, virq, status, enable, val - 1);
+		 */
+	}
+
+	/* Processing done; can reenable the cascade now */
+	raw_spin_lock(&desc->lock);
+	chip->irq_ack(&desc->irq_data);
+	if (!irqd_irq_disabled(&desc->irq_data))
+		chip->irq_unmask(&desc->irq_data);
+	raw_spin_unlock(&desc->lock);
+}
+
+static int media5200_irq_map(struct irq_domain *h, unsigned int virq,
+			     irq_hw_number_t hw)
+{
+	pr_debug("%s: h=%p, virq=%i, hwirq=%i\n", __func__, h, virq, (int)hw);
+	irq_set_chip_data(virq, &media5200_irq);
+	irq_set_chip_and_handler(virq, &media5200_irq_chip, handle_level_irq);
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	return 0;
+}
+
+static int media5200_irq_xlate(struct irq_domain *h, struct device_node *ct,
+				 const u32 *intspec, unsigned int intsize,
+				 irq_hw_number_t *out_hwirq,
+				 unsigned int *out_flags)
+{
+	if (intsize != 2)
+		return -1;
+
+	pr_debug("%s: bank=%i, number=%i\n", __func__, intspec[0], intspec[1]);
+	*out_hwirq = intspec[1];
+	*out_flags = IRQ_TYPE_NONE;
+	return 0;
+}
+
+static const struct irq_domain_ops media5200_irq_ops = {
+	.map = media5200_irq_map,
+	.xlate = media5200_irq_xlate,
+};
+
+/*
+ * Setup Media5200 IRQ mapping
+ */
+static void __init media5200_init_irq(void)
+{
+	struct device_node *fpga_np;
+	int cascade_virq;
+
+	/* First setup the regular MPC5200 interrupt controller */
+	mpc52xx_init_irq();
+
+	/* Now find the FPGA IRQ */
+	fpga_np = of_find_compatible_node(NULL, NULL, "fsl,media5200-fpga");
+	if (!fpga_np)
+		goto out;
+	pr_debug("%s: found fpga node: %pOF\n", __func__, fpga_np);
+
+	media5200_irq.regs = of_iomap(fpga_np, 0);
+	if (!media5200_irq.regs)
+		goto out;
+	pr_debug("%s: mapped to %p\n", __func__, media5200_irq.regs);
+
+	cascade_virq = irq_of_parse_and_map(fpga_np, 0);
+	if (!cascade_virq)
+		goto out;
+	pr_debug("%s: cascaded on virq=%i\n", __func__, cascade_virq);
+
+	/* Disable all FPGA IRQs */
+	out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, 0);
+
+	spin_lock_init(&media5200_irq.lock);
+
+	media5200_irq.irqhost = irq_domain_add_linear(fpga_np,
+			MEDIA5200_NUM_IRQS, &media5200_irq_ops, &media5200_irq);
+	if (!media5200_irq.irqhost)
+		goto out;
+	pr_debug("%s: allocated irqhost\n", __func__);
+
+	of_node_put(fpga_np);
+
+	irq_set_handler_data(cascade_virq, &media5200_irq);
+	irq_set_chained_handler(cascade_virq, media5200_irq_cascade);
+
+	return;
+
+ out:
+	pr_err("Could not find Media5200 FPGA; PCI interrupts will not work\n");
+	of_node_put(fpga_np);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init media5200_setup_arch(void)
+{
+
+	struct device_node *np;
+	struct mpc52xx_gpio __iomem *gpio;
+	u32 port_config;
+
+	if (ppc_md.progress)
+		ppc_md.progress("media5200_setup_arch()", 0);
+
+	/* Map important registers from the internal memory map */
+	mpc52xx_map_common_devices();
+
+	/* Some mpc5200 & mpc5200b related configuration */
+	mpc5200_setup_xlb_arbiter();
+
+	np = of_find_matching_node(NULL, mpc5200_gpio_ids);
+	gpio = of_iomap(np, 0);
+	of_node_put(np);
+	if (!gpio) {
+		printk(KERN_ERR "%s() failed. expect abnormal behavior\n",
+		       __func__);
+		return;
+	}
+
+	/* Set port config */
+	port_config = in_be32(&gpio->port_config);
+
+	port_config &= ~0x03000000;	/* ATA CS is on csb_4/5		*/
+	port_config |=  0x01000000;
+
+	out_be32(&gpio->port_config, port_config);
+
+	/* Unmap zone */
+	iounmap(gpio);
+
+}
+
+define_machine(media5200_platform) {
+	.name		= "media5200-platform",
+	.compatible	= "fsl,media5200",
+	.setup_arch	= media5200_setup_arch,
+	.discover_phbs	= mpc52xx_setup_pci,
+	.init		= mpc52xx_declare_of_platform_devices,
+	.init_IRQ	= media5200_init_irq,
+	.get_irq	= mpc52xx_get_irq,
+	.restart	= mpc52xx_restart,
+};
diff --git a/arch/powerpc/platforms/52xx/mpc5200_simple.c b/arch/powerpc/platforms/52xx/mpc5200_simple.c
new file mode 100644
index 0000000000..f1e85e86f5
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc5200_simple.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Support for 'mpc5200-simple-platform' compatible boards.
+ *
+ * Written by Marian Balakowicz <m8@semihalf.com>
+ * Copyright (C) 2007 Semihalf
+ *
+ * Description:
+ * This code implements support for a simple MPC52xx based boards which
+ * do not need a custom platform specific setup. Such boards are
+ * supported assuming the following:
+ *
+ * - GPIO pins are configured by the firmware,
+ * - CDM configuration (clocking) is setup correctly by firmware,
+ * - if the 'fsl,has-wdt' property is present in one of the
+ *   gpt nodes, then it is safe to use such gpt to reset the board,
+ * - PCI is supported if enabled in the kernel configuration
+ *   and if there is a PCI bus node defined in the device tree.
+ *
+ * Boards that are compatible with this generic platform support
+ * are listed in a 'board' table.
+ */
+
+#undef DEBUG
+#include <linux/of.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/mpc52xx.h>
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc5200_simple_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc5200_simple_setup_arch()", 0);
+
+	/* Map important registers from the internal memory map */
+	mpc52xx_map_common_devices();
+
+	/* Some mpc5200 & mpc5200b related configuration */
+	mpc5200_setup_xlb_arbiter();
+}
+
+/* list of the supported boards */
+static const char *board[] __initdata = {
+	"anonymous,a3m071",
+	"anonymous,a4m072",
+	"anon,charon",
+	"ifm,o2d",
+	"intercontrol,digsy-mtc",
+	"manroland,mucmc52",
+	"manroland,uc101",
+	"phytec,pcm030",
+	"phytec,pcm032",
+	"promess,motionpro",
+	"schindler,cm5200",
+	"tqc,tqm5200",
+	NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc5200_simple_probe(void)
+{
+	return of_device_compatible_match(of_root, board);
+}
+
+define_machine(mpc5200_simple_platform) {
+	.name		= "mpc5200-simple-platform",
+	.probe		= mpc5200_simple_probe,
+	.setup_arch	= mpc5200_simple_setup_arch,
+	.discover_phbs	= mpc52xx_setup_pci,
+	.init		= mpc52xx_declare_of_platform_devices,
+	.init_IRQ	= mpc52xx_init_irq,
+	.get_irq	= mpc52xx_get_irq,
+	.restart	= mpc52xx_restart,
+};
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c
new file mode 100644
index 0000000000..b4938e344f
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c
@@ -0,0 +1,306 @@
+/*
+ *
+ * Utility functions for the Freescale MPC52xx.
+ *
+ * Copyright (C) 2006 Sylvain Munaut <tnt@246tNt.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ */
+
+#undef DEBUG
+
+#include <linux/gpio.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/of_gpio.h>
+#include <linux/export.h>
+#include <asm/io.h>
+#include <asm/mpc52xx.h>
+
+/* MPC5200 device tree match tables */
+static const struct of_device_id mpc52xx_xlb_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5200-xlb", },
+	{ .compatible = "mpc5200-xlb", },
+	{}
+};
+static const struct of_device_id mpc52xx_bus_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5200-immr", },
+	{ .compatible = "fsl,mpc5200b-immr", },
+	{ .compatible = "simple-bus", },
+
+	/* depreciated matches; shouldn't be used in new device trees */
+	{ .compatible = "fsl,lpb", },
+	{ .type = "builtin", .compatible = "mpc5200", }, /* efika */
+	{ .type = "soc", .compatible = "mpc5200", }, /* lite5200 */
+	{}
+};
+
+/*
+ * This variable is mapped in mpc52xx_map_wdt() and used in mpc52xx_restart().
+ * Permanent mapping is required because mpc52xx_restart() can be called
+ * from interrupt context while node mapping (which calls ioremap())
+ * cannot be used at such point.
+ */
+static DEFINE_SPINLOCK(mpc52xx_lock);
+static struct mpc52xx_gpt __iomem *mpc52xx_wdt;
+static struct mpc52xx_cdm __iomem *mpc52xx_cdm;
+
+/*
+ * Configure the XLB arbiter settings to match what Linux expects.
+ */
+void __init
+mpc5200_setup_xlb_arbiter(void)
+{
+	struct device_node *np;
+	struct mpc52xx_xlb  __iomem *xlb;
+
+	np = of_find_matching_node(NULL, mpc52xx_xlb_ids);
+	xlb = of_iomap(np, 0);
+	of_node_put(np);
+	if (!xlb) {
+		printk(KERN_ERR __FILE__ ": "
+			"Error mapping XLB in mpc52xx_setup_cpu(). "
+			"Expect some abnormal behavior\n");
+		return;
+	}
+
+	/* Configure the XLB Arbiter priorities */
+	out_be32(&xlb->master_pri_enable, 0xff);
+	out_be32(&xlb->master_priority, 0x11111111);
+
+	/*
+	 * Disable XLB pipelining
+	 * (cfr errate 292. We could do this only just before ATA PIO
+	 *  transaction and re-enable it afterwards ...)
+	 * Not needed on MPC5200B.
+	 */
+	if ((mfspr(SPRN_SVR) & MPC5200_SVR_MASK) == MPC5200_SVR)
+		out_be32(&xlb->config, in_be32(&xlb->config) | MPC52xx_XLB_CFG_PLDIS);
+
+	iounmap(xlb);
+}
+
+/*
+ * This variable is mapped in mpc52xx_map_common_devices and
+ * used in mpc5200_psc_ac97_gpio_reset().
+ */
+static DEFINE_SPINLOCK(gpio_lock);
+struct mpc52xx_gpio __iomem *simple_gpio;
+struct mpc52xx_gpio_wkup __iomem *wkup_gpio;
+
+/**
+ * mpc52xx_declare_of_platform_devices: register internal devices and children
+ *					of the localplus bus to the of_platform
+ *					bus.
+ */
+void __init mpc52xx_declare_of_platform_devices(void)
+{
+	/* Find all the 'platform' devices and register them. */
+	if (of_platform_populate(NULL, mpc52xx_bus_ids, NULL, NULL))
+		pr_err(__FILE__ ": Error while populating devices from DT\n");
+}
+
+/*
+ * match tables used by mpc52xx_map_common_devices()
+ */
+static const struct of_device_id mpc52xx_gpt_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5200-gpt", },
+	{ .compatible = "mpc5200-gpt", }, /* old */
+	{}
+};
+static const struct of_device_id mpc52xx_cdm_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5200-cdm", },
+	{ .compatible = "mpc5200-cdm", }, /* old */
+	{}
+};
+static const struct of_device_id mpc52xx_gpio_simple[] __initconst = {
+	{ .compatible = "fsl,mpc5200-gpio", },
+	{}
+};
+static const struct of_device_id mpc52xx_gpio_wkup[] __initconst = {
+	{ .compatible = "fsl,mpc5200-gpio-wkup", },
+	{}
+};
+
+
+/**
+ * mpc52xx_map_common_devices: iomap devices required by common code
+ */
+void __init
+mpc52xx_map_common_devices(void)
+{
+	struct device_node *np;
+
+	/* mpc52xx_wdt is mapped here and used in mpc52xx_restart,
+	 * possibly from a interrupt context. wdt is only implement
+	 * on a gpt0, so check has-wdt property before mapping.
+	 */
+	for_each_matching_node(np, mpc52xx_gpt_ids) {
+		if (of_property_read_bool(np, "fsl,has-wdt") ||
+		    of_property_read_bool(np, "has-wdt")) {
+			mpc52xx_wdt = of_iomap(np, 0);
+			of_node_put(np);
+			break;
+		}
+	}
+
+	/* Clock Distribution Module, used by PSC clock setting function */
+	np = of_find_matching_node(NULL, mpc52xx_cdm_ids);
+	mpc52xx_cdm = of_iomap(np, 0);
+	of_node_put(np);
+
+	/* simple_gpio registers */
+	np = of_find_matching_node(NULL, mpc52xx_gpio_simple);
+	simple_gpio = of_iomap(np, 0);
+	of_node_put(np);
+
+	/* wkup_gpio registers */
+	np = of_find_matching_node(NULL, mpc52xx_gpio_wkup);
+	wkup_gpio = of_iomap(np, 0);
+	of_node_put(np);
+}
+
+/**
+ * mpc52xx_set_psc_clkdiv: Set clock divider in the CDM for PSC ports
+ *
+ * @psc_id: id of psc port; must be 1,2,3 or 6
+ * @clkdiv: clock divider value to put into CDM PSC register.
+ */
+int mpc52xx_set_psc_clkdiv(int psc_id, int clkdiv)
+{
+	unsigned long flags;
+	u16 __iomem *reg;
+	u32 val;
+	u32 mask;
+	u32 mclken_div;
+
+	if (!mpc52xx_cdm)
+		return -ENODEV;
+
+	mclken_div = 0x8000 | (clkdiv & 0x1FF);
+	switch (psc_id) {
+	case 1: reg = &mpc52xx_cdm->mclken_div_psc1; mask = 0x20; break;
+	case 2: reg = &mpc52xx_cdm->mclken_div_psc2; mask = 0x40; break;
+	case 3: reg = &mpc52xx_cdm->mclken_div_psc3; mask = 0x80; break;
+	case 6: reg = &mpc52xx_cdm->mclken_div_psc6; mask = 0x10; break;
+	default:
+		return -ENODEV;
+	}
+
+	/* Set the rate and enable the clock */
+	spin_lock_irqsave(&mpc52xx_lock, flags);
+	out_be16(reg, mclken_div);
+	val = in_be32(&mpc52xx_cdm->clk_enables);
+	out_be32(&mpc52xx_cdm->clk_enables, val | mask);
+	spin_unlock_irqrestore(&mpc52xx_lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(mpc52xx_set_psc_clkdiv);
+
+/**
+ * mpc52xx_restart: ppc_md->restart hook for mpc5200 using the watchdog timer
+ */
+void __noreturn mpc52xx_restart(char *cmd)
+{
+	local_irq_disable();
+
+	/* Turn on the watchdog and wait for it to expire.
+	 * It effectively does a reset. */
+	if (mpc52xx_wdt) {
+		out_be32(&mpc52xx_wdt->mode, 0x00000000);
+		out_be32(&mpc52xx_wdt->count, 0x000000ff);
+		out_be32(&mpc52xx_wdt->mode, 0x00009004);
+	} else
+		printk(KERN_ERR __FILE__ ": "
+			"mpc52xx_restart: Can't access wdt. "
+			"Restart impossible, system halted.\n");
+
+	while (1);
+}
+
+#define PSC1_RESET     0x1
+#define PSC1_SYNC      0x4
+#define PSC1_SDATA_OUT 0x1
+#define PSC2_RESET     0x2
+#define PSC2_SYNC      (0x4<<4)
+#define PSC2_SDATA_OUT (0x1<<4)
+#define MPC52xx_GPIO_PSC1_MASK 0x7
+#define MPC52xx_GPIO_PSC2_MASK (0x7<<4)
+
+/**
+ * mpc5200_psc_ac97_gpio_reset: Use gpio pins to reset the ac97 bus
+ *
+ * @psc: psc number to reset (only psc 1 and 2 support ac97)
+ */
+int mpc5200_psc_ac97_gpio_reset(int psc_number)
+{
+	unsigned long flags;
+	u32 gpio;
+	u32 mux;
+	int out;
+	int reset;
+	int sync;
+
+	if ((!simple_gpio) || (!wkup_gpio))
+		return -ENODEV;
+
+	switch (psc_number) {
+	case 0:
+		reset   = PSC1_RESET;           /* AC97_1_RES */
+		sync    = PSC1_SYNC;            /* AC97_1_SYNC */
+		out     = PSC1_SDATA_OUT;       /* AC97_1_SDATA_OUT */
+		gpio    = MPC52xx_GPIO_PSC1_MASK;
+		break;
+	case 1:
+		reset   = PSC2_RESET;           /* AC97_2_RES */
+		sync    = PSC2_SYNC;            /* AC97_2_SYNC */
+		out     = PSC2_SDATA_OUT;       /* AC97_2_SDATA_OUT */
+		gpio    = MPC52xx_GPIO_PSC2_MASK;
+		break;
+	default:
+		pr_err(__FILE__ ": Unable to determine PSC, no ac97 "
+		       "cold-reset will be performed\n");
+		return -ENODEV;
+	}
+
+	spin_lock_irqsave(&gpio_lock, flags);
+
+	/* Reconfigure pin-muxing to gpio */
+	mux = in_be32(&simple_gpio->port_config);
+	out_be32(&simple_gpio->port_config, mux & (~gpio));
+
+	/* enable gpio pins for output */
+	setbits8(&wkup_gpio->wkup_gpioe, reset);
+	setbits32(&simple_gpio->simple_gpioe, sync | out);
+
+	setbits8(&wkup_gpio->wkup_ddr, reset);
+	setbits32(&simple_gpio->simple_ddr, sync | out);
+
+	/* Assert cold reset */
+	clrbits32(&simple_gpio->simple_dvo, sync | out);
+	clrbits8(&wkup_gpio->wkup_dvo, reset);
+
+	/* wait for 1 us */
+	udelay(1);
+
+	/* Deassert reset */
+	setbits8(&wkup_gpio->wkup_dvo, reset);
+
+	/* wait at least 200ns */
+	/* 7 ~= (200ns * timebase) / ns2sec */
+	__delay(7);
+
+	/* Restore pin-muxing */
+	out_be32(&simple_gpio->port_config, mux);
+
+	spin_unlock_irqrestore(&gpio_lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(mpc5200_psc_ac97_gpio_reset);
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
new file mode 100644
index 0000000000..581059527c
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -0,0 +1,780 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC5200 General Purpose Timer device driver
+ *
+ * Copyright (c) 2009 Secret Lab Technologies Ltd.
+ * Copyright (c) 2008 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
+ *
+ * This file is a driver for the General Purpose Timer (gpt) devices
+ * found on the MPC5200 SoC.  Each timer has an IO pin which can be used
+ * for GPIO or can be used to raise interrupts.  The timer function can
+ * be used independently from the IO pin, or it can be used to control
+ * output signals or measure input signals.
+ *
+ * This driver supports the GPIO and IRQ controller functions of the GPT
+ * device.  Timer functions are not yet supported.
+ *
+ * The timer gpt0 can be used as watchdog (wdt).  If the wdt mode is used,
+ * this prevents the use of any gpt0 gpt function (i.e. they will fail with
+ * -EBUSY).  Thus, the safety wdt function always has precedence over the gpt
+ * function.  If the kernel has been compiled with CONFIG_WATCHDOG_NOWAYOUT,
+ * this means that gpt0 is locked in wdt mode until the next reboot - this
+ * may be a requirement in safety applications.
+ *
+ * To use the GPIO function, the following two properties must be added
+ * to the device tree node for the gpt device (typically in the .dts file
+ * for the board):
+ * 	gpio-controller;
+ * 	#gpio-cells = < 2 >;
+ * This driver will register the GPIO pin if it finds the gpio-controller
+ * property in the device tree.
+ *
+ * To use the IRQ controller function, the following two properties must
+ * be added to the device tree node for the gpt device:
+ * 	interrupt-controller;
+ * 	#interrupt-cells = < 1 >;
+ * The IRQ controller binding only uses one cell to specify the interrupt,
+ * and the IRQ flags are encoded in the cell.  A cell is not used to encode
+ * the IRQ number because the GPT only has a single IRQ source.  For flags,
+ * a value of '1' means rising edge sensitive and '2' means falling edge.
+ *
+ * The GPIO and the IRQ controller functions can be used at the same time,
+ * but in this use case the IO line will only work as an input.  Trying to
+ * use it as a GPIO output will not work.
+ *
+ * When using the GPIO line as an output, it can either be driven as normal
+ * IO, or it can be an Open Collector (OC) output.  At the moment it is the
+ * responsibility of either the bootloader or the platform setup code to set
+ * the output mode.  This driver does not change the output mode setting.
+ */
+
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_gpio.h>
+#include <linux/platform_device.h>
+#include <linux/kernel.h>
+#include <linux/property.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/watchdog.h>
+#include <linux/miscdevice.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <asm/div64.h>
+#include <asm/mpc52xx.h>
+
+MODULE_DESCRIPTION("Freescale MPC52xx gpt driver");
+MODULE_AUTHOR("Sascha Hauer, Grant Likely, Albrecht Dreß");
+MODULE_LICENSE("GPL");
+
+/**
+ * struct mpc52xx_gpt - Private data structure for MPC52xx GPT driver
+ * @dev: pointer to device structure
+ * @regs: virtual address of GPT registers
+ * @lock: spinlock to coordinate between different functions.
+ * @gc: gpio_chip instance structure; used when GPIO is enabled
+ * @irqhost: Pointer to irq_domain instance; used when IRQ mode is supported
+ * @wdt_mode: only relevant for gpt0: bit 0 (MPC52xx_GPT_CAN_WDT) indicates
+ *   if the gpt may be used as wdt, bit 1 (MPC52xx_GPT_IS_WDT) indicates
+ *   if the timer is actively used as wdt which blocks gpt functions
+ */
+struct mpc52xx_gpt_priv {
+	struct list_head list;		/* List of all GPT devices */
+	struct device *dev;
+	struct mpc52xx_gpt __iomem *regs;
+	raw_spinlock_t lock;
+	struct irq_domain *irqhost;
+	u32 ipb_freq;
+	u8 wdt_mode;
+
+#if defined(CONFIG_GPIOLIB)
+	struct gpio_chip gc;
+#endif
+};
+
+LIST_HEAD(mpc52xx_gpt_list);
+DEFINE_MUTEX(mpc52xx_gpt_list_mutex);
+
+#define MPC52xx_GPT_MODE_MS_MASK	(0x07)
+#define MPC52xx_GPT_MODE_MS_IC		(0x01)
+#define MPC52xx_GPT_MODE_MS_OC		(0x02)
+#define MPC52xx_GPT_MODE_MS_PWM		(0x03)
+#define MPC52xx_GPT_MODE_MS_GPIO	(0x04)
+
+#define MPC52xx_GPT_MODE_GPIO_MASK	(0x30)
+#define MPC52xx_GPT_MODE_GPIO_OUT_LOW	(0x20)
+#define MPC52xx_GPT_MODE_GPIO_OUT_HIGH	(0x30)
+
+#define MPC52xx_GPT_MODE_COUNTER_ENABLE	(0x1000)
+#define MPC52xx_GPT_MODE_CONTINUOUS	(0x0400)
+#define MPC52xx_GPT_MODE_OPEN_DRAIN	(0x0200)
+#define MPC52xx_GPT_MODE_IRQ_EN		(0x0100)
+#define MPC52xx_GPT_MODE_WDT_EN		(0x8000)
+
+#define MPC52xx_GPT_MODE_ICT_MASK	(0x030000)
+#define MPC52xx_GPT_MODE_ICT_RISING	(0x010000)
+#define MPC52xx_GPT_MODE_ICT_FALLING	(0x020000)
+#define MPC52xx_GPT_MODE_ICT_TOGGLE	(0x030000)
+
+#define MPC52xx_GPT_MODE_WDT_PING	(0xa5)
+
+#define MPC52xx_GPT_STATUS_IRQMASK	(0x000f)
+
+#define MPC52xx_GPT_CAN_WDT		(1 << 0)
+#define MPC52xx_GPT_IS_WDT		(1 << 1)
+
+
+/* ---------------------------------------------------------------------
+ * Cascaded interrupt controller hooks
+ */
+
+static void mpc52xx_gpt_irq_unmask(struct irq_data *d)
+{
+	struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&gpt->lock, flags);
+	setbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN);
+	raw_spin_unlock_irqrestore(&gpt->lock, flags);
+}
+
+static void mpc52xx_gpt_irq_mask(struct irq_data *d)
+{
+	struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&gpt->lock, flags);
+	clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN);
+	raw_spin_unlock_irqrestore(&gpt->lock, flags);
+}
+
+static void mpc52xx_gpt_irq_ack(struct irq_data *d)
+{
+	struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
+
+	out_be32(&gpt->regs->status, MPC52xx_GPT_STATUS_IRQMASK);
+}
+
+static int mpc52xx_gpt_irq_set_type(struct irq_data *d, unsigned int flow_type)
+{
+	struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
+	unsigned long flags;
+	u32 reg;
+
+	dev_dbg(gpt->dev, "%s: virq=%i type=%x\n", __func__, d->irq, flow_type);
+
+	raw_spin_lock_irqsave(&gpt->lock, flags);
+	reg = in_be32(&gpt->regs->mode) & ~MPC52xx_GPT_MODE_ICT_MASK;
+	if (flow_type & IRQF_TRIGGER_RISING)
+		reg |= MPC52xx_GPT_MODE_ICT_RISING;
+	if (flow_type & IRQF_TRIGGER_FALLING)
+		reg |= MPC52xx_GPT_MODE_ICT_FALLING;
+	out_be32(&gpt->regs->mode, reg);
+	raw_spin_unlock_irqrestore(&gpt->lock, flags);
+
+	return 0;
+}
+
+static struct irq_chip mpc52xx_gpt_irq_chip = {
+	.name = "MPC52xx GPT",
+	.irq_unmask = mpc52xx_gpt_irq_unmask,
+	.irq_mask = mpc52xx_gpt_irq_mask,
+	.irq_ack = mpc52xx_gpt_irq_ack,
+	.irq_set_type = mpc52xx_gpt_irq_set_type,
+};
+
+static void mpc52xx_gpt_irq_cascade(struct irq_desc *desc)
+{
+	struct mpc52xx_gpt_priv *gpt = irq_desc_get_handler_data(desc);
+	u32 status;
+
+	status = in_be32(&gpt->regs->status) & MPC52xx_GPT_STATUS_IRQMASK;
+	if (status)
+		generic_handle_domain_irq(gpt->irqhost, 0);
+}
+
+static int mpc52xx_gpt_irq_map(struct irq_domain *h, unsigned int virq,
+			       irq_hw_number_t hw)
+{
+	struct mpc52xx_gpt_priv *gpt = h->host_data;
+
+	dev_dbg(gpt->dev, "%s: h=%p, virq=%i\n", __func__, h, virq);
+	irq_set_chip_data(virq, gpt);
+	irq_set_chip_and_handler(virq, &mpc52xx_gpt_irq_chip, handle_edge_irq);
+
+	return 0;
+}
+
+static int mpc52xx_gpt_irq_xlate(struct irq_domain *h, struct device_node *ct,
+				 const u32 *intspec, unsigned int intsize,
+				 irq_hw_number_t *out_hwirq,
+				 unsigned int *out_flags)
+{
+	struct mpc52xx_gpt_priv *gpt = h->host_data;
+
+	dev_dbg(gpt->dev, "%s: flags=%i\n", __func__, intspec[0]);
+
+	if ((intsize < 1) || (intspec[0] > 3)) {
+		dev_err(gpt->dev, "bad irq specifier in %pOF\n", ct);
+		return -EINVAL;
+	}
+
+	*out_hwirq = 0; /* The GPT only has 1 IRQ line */
+	*out_flags = intspec[0];
+
+	return 0;
+}
+
+static const struct irq_domain_ops mpc52xx_gpt_irq_ops = {
+	.map = mpc52xx_gpt_irq_map,
+	.xlate = mpc52xx_gpt_irq_xlate,
+};
+
+static void
+mpc52xx_gpt_irq_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
+{
+	int cascade_virq;
+	unsigned long flags;
+	u32 mode;
+
+	cascade_virq = irq_of_parse_and_map(node, 0);
+	if (!cascade_virq)
+		return;
+
+	gpt->irqhost = irq_domain_add_linear(node, 1, &mpc52xx_gpt_irq_ops, gpt);
+	if (!gpt->irqhost) {
+		dev_err(gpt->dev, "irq_domain_add_linear() failed\n");
+		return;
+	}
+
+	irq_set_handler_data(cascade_virq, gpt);
+	irq_set_chained_handler(cascade_virq, mpc52xx_gpt_irq_cascade);
+
+	/* If the GPT is currently disabled, then change it to be in Input
+	 * Capture mode.  If the mode is non-zero, then the pin could be
+	 * already in use for something. */
+	raw_spin_lock_irqsave(&gpt->lock, flags);
+	mode = in_be32(&gpt->regs->mode);
+	if ((mode & MPC52xx_GPT_MODE_MS_MASK) == 0)
+		out_be32(&gpt->regs->mode, mode | MPC52xx_GPT_MODE_MS_IC);
+	raw_spin_unlock_irqrestore(&gpt->lock, flags);
+
+	dev_dbg(gpt->dev, "%s() complete. virq=%i\n", __func__, cascade_virq);
+}
+
+
+/* ---------------------------------------------------------------------
+ * GPIOLIB hooks
+ */
+#if defined(CONFIG_GPIOLIB)
+static int mpc52xx_gpt_gpio_get(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct mpc52xx_gpt_priv *gpt = gpiochip_get_data(gc);
+
+	return (in_be32(&gpt->regs->status) >> 8) & 1;
+}
+
+static void
+mpc52xx_gpt_gpio_set(struct gpio_chip *gc, unsigned int gpio, int v)
+{
+	struct mpc52xx_gpt_priv *gpt = gpiochip_get_data(gc);
+	unsigned long flags;
+	u32 r;
+
+	dev_dbg(gpt->dev, "%s: gpio:%d v:%d\n", __func__, gpio, v);
+	r = v ? MPC52xx_GPT_MODE_GPIO_OUT_HIGH : MPC52xx_GPT_MODE_GPIO_OUT_LOW;
+
+	raw_spin_lock_irqsave(&gpt->lock, flags);
+	clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK, r);
+	raw_spin_unlock_irqrestore(&gpt->lock, flags);
+}
+
+static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct mpc52xx_gpt_priv *gpt = gpiochip_get_data(gc);
+	unsigned long flags;
+
+	dev_dbg(gpt->dev, "%s: gpio:%d\n", __func__, gpio);
+
+	raw_spin_lock_irqsave(&gpt->lock, flags);
+	clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK);
+	raw_spin_unlock_irqrestore(&gpt->lock, flags);
+
+	return 0;
+}
+
+static int
+mpc52xx_gpt_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+	mpc52xx_gpt_gpio_set(gc, gpio, val);
+	return 0;
+}
+
+static void mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt)
+{
+	int rc;
+
+	/* Only setup GPIO if the device claims the GPT is a GPIO controller */
+	if (!device_property_present(gpt->dev, "gpio-controller"))
+		return;
+
+	gpt->gc.label = kasprintf(GFP_KERNEL, "%pfw", dev_fwnode(gpt->dev));
+	if (!gpt->gc.label) {
+		dev_err(gpt->dev, "out of memory\n");
+		return;
+	}
+
+	gpt->gc.ngpio = 1;
+	gpt->gc.direction_input  = mpc52xx_gpt_gpio_dir_in;
+	gpt->gc.direction_output = mpc52xx_gpt_gpio_dir_out;
+	gpt->gc.get = mpc52xx_gpt_gpio_get;
+	gpt->gc.set = mpc52xx_gpt_gpio_set;
+	gpt->gc.base = -1;
+	gpt->gc.parent = gpt->dev;
+
+	/* Setup external pin in GPIO mode */
+	clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_MS_MASK,
+			MPC52xx_GPT_MODE_MS_GPIO);
+
+	rc = gpiochip_add_data(&gpt->gc, gpt);
+	if (rc)
+		dev_err(gpt->dev, "gpiochip_add_data() failed; rc=%i\n", rc);
+
+	dev_dbg(gpt->dev, "%s() complete.\n", __func__);
+}
+#else /* defined(CONFIG_GPIOLIB) */
+static void mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt) { }
+#endif /* defined(CONFIG_GPIOLIB) */
+
+/***********************************************************************
+ * Timer API
+ */
+
+/**
+ * mpc52xx_gpt_from_irq - Return the GPT device associated with an IRQ number
+ * @irq: irq of timer.
+ */
+struct mpc52xx_gpt_priv *mpc52xx_gpt_from_irq(int irq)
+{
+	struct mpc52xx_gpt_priv *gpt;
+	struct list_head *pos;
+
+	/* Iterate over the list of timers looking for a matching device */
+	mutex_lock(&mpc52xx_gpt_list_mutex);
+	list_for_each(pos, &mpc52xx_gpt_list) {
+		gpt = container_of(pos, struct mpc52xx_gpt_priv, list);
+		if (gpt->irqhost && irq == irq_linear_revmap(gpt->irqhost, 0)) {
+			mutex_unlock(&mpc52xx_gpt_list_mutex);
+			return gpt;
+		}
+	}
+	mutex_unlock(&mpc52xx_gpt_list_mutex);
+
+	return NULL;
+}
+EXPORT_SYMBOL(mpc52xx_gpt_from_irq);
+
+static int mpc52xx_gpt_do_start(struct mpc52xx_gpt_priv *gpt, u64 period,
+				int continuous, int as_wdt)
+{
+	u32 clear, set;
+	u64 clocks;
+	u32 prescale;
+	unsigned long flags;
+
+	clear = MPC52xx_GPT_MODE_MS_MASK | MPC52xx_GPT_MODE_CONTINUOUS;
+	set = MPC52xx_GPT_MODE_MS_GPIO | MPC52xx_GPT_MODE_COUNTER_ENABLE;
+	if (as_wdt) {
+		clear |= MPC52xx_GPT_MODE_IRQ_EN;
+		set |= MPC52xx_GPT_MODE_WDT_EN;
+	} else if (continuous)
+		set |= MPC52xx_GPT_MODE_CONTINUOUS;
+
+	/* Determine the number of clocks in the requested period.  64 bit
+	 * arithmetic is done here to preserve the precision until the value
+	 * is scaled back down into the u32 range.  Period is in 'ns', bus
+	 * frequency is in Hz. */
+	clocks = period * (u64)gpt->ipb_freq;
+	do_div(clocks, 1000000000); /* Scale it down to ns range */
+
+	/* This device cannot handle a clock count greater than 32 bits */
+	if (clocks > 0xffffffff)
+		return -EINVAL;
+
+	/* Calculate the prescaler and count values from the clocks value.
+	 * 'clocks' is the number of clock ticks in the period.  The timer
+	 * has 16 bit precision and a 16 bit prescaler.  Prescaler is
+	 * calculated by integer dividing the clocks by 0x10000 (shifting
+	 * down 16 bits) to obtain the smallest possible divisor for clocks
+	 * to get a 16 bit count value.
+	 *
+	 * Note: the prescale register is '1' based, not '0' based.  ie. a
+	 * value of '1' means divide the clock by one.  0xffff divides the
+	 * clock by 0xffff.  '0x0000' does not divide by zero, but wraps
+	 * around and divides by 0x10000.  That is why prescale must be
+	 * a u32 variable, not a u16, for this calculation. */
+	prescale = (clocks >> 16) + 1;
+	do_div(clocks, prescale);
+	if (clocks > 0xffff) {
+		pr_err("calculation error; prescale:%x clocks:%llx\n",
+		       prescale, clocks);
+		return -EINVAL;
+	}
+
+	/* Set and enable the timer, reject an attempt to use a wdt as gpt */
+	raw_spin_lock_irqsave(&gpt->lock, flags);
+	if (as_wdt)
+		gpt->wdt_mode |= MPC52xx_GPT_IS_WDT;
+	else if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) {
+		raw_spin_unlock_irqrestore(&gpt->lock, flags);
+		return -EBUSY;
+	}
+	out_be32(&gpt->regs->count, prescale << 16 | clocks);
+	clrsetbits_be32(&gpt->regs->mode, clear, set);
+	raw_spin_unlock_irqrestore(&gpt->lock, flags);
+
+	return 0;
+}
+
+/**
+ * mpc52xx_gpt_start_timer - Set and enable the GPT timer
+ * @gpt: Pointer to gpt private data structure
+ * @period: period of timer in ns; max. ~130s @ 33MHz IPB clock
+ * @continuous: set to 1 to make timer continuous free running
+ *
+ * An interrupt will be generated every time the timer fires
+ */
+int mpc52xx_gpt_start_timer(struct mpc52xx_gpt_priv *gpt, u64 period,
+                            int continuous)
+{
+	return mpc52xx_gpt_do_start(gpt, period, continuous, 0);
+}
+EXPORT_SYMBOL(mpc52xx_gpt_start_timer);
+
+/**
+ * mpc52xx_gpt_stop_timer - Stop a gpt
+ * @gpt: Pointer to gpt private data structure
+ *
+ * Returns an error if attempting to stop a wdt
+ */
+int mpc52xx_gpt_stop_timer(struct mpc52xx_gpt_priv *gpt)
+{
+	unsigned long flags;
+
+	/* reject the operation if the timer is used as watchdog (gpt 0 only) */
+	raw_spin_lock_irqsave(&gpt->lock, flags);
+	if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) {
+		raw_spin_unlock_irqrestore(&gpt->lock, flags);
+		return -EBUSY;
+	}
+
+	clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_COUNTER_ENABLE);
+	raw_spin_unlock_irqrestore(&gpt->lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(mpc52xx_gpt_stop_timer);
+
+/**
+ * mpc52xx_gpt_timer_period - Read the timer period
+ * @gpt: Pointer to gpt private data structure
+ *
+ * Returns the timer period in ns
+ */
+u64 mpc52xx_gpt_timer_period(struct mpc52xx_gpt_priv *gpt)
+{
+	u64 period;
+	u64 prescale;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&gpt->lock, flags);
+	period = in_be32(&gpt->regs->count);
+	raw_spin_unlock_irqrestore(&gpt->lock, flags);
+
+	prescale = period >> 16;
+	period &= 0xffff;
+	if (prescale == 0)
+		prescale = 0x10000;
+	period = period * prescale * 1000000000ULL;
+	do_div(period, gpt->ipb_freq);
+	return period;
+}
+EXPORT_SYMBOL(mpc52xx_gpt_timer_period);
+
+#if defined(CONFIG_MPC5200_WDT)
+/***********************************************************************
+ * Watchdog API for gpt0
+ */
+
+#define WDT_IDENTITY	    "mpc52xx watchdog on GPT0"
+
+/* wdt_is_active stores whether or not the /dev/watchdog device is opened */
+static unsigned long wdt_is_active;
+
+/* wdt-capable gpt */
+static struct mpc52xx_gpt_priv *mpc52xx_gpt_wdt;
+
+/* low-level wdt functions */
+static inline void mpc52xx_gpt_wdt_ping(struct mpc52xx_gpt_priv *gpt_wdt)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&gpt_wdt->lock, flags);
+	out_8((u8 *) &gpt_wdt->regs->mode, MPC52xx_GPT_MODE_WDT_PING);
+	raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags);
+}
+
+/* wdt misc device api */
+static ssize_t mpc52xx_wdt_write(struct file *file, const char __user *data,
+				 size_t len, loff_t *ppos)
+{
+	struct mpc52xx_gpt_priv *gpt_wdt = file->private_data;
+	mpc52xx_gpt_wdt_ping(gpt_wdt);
+	return 0;
+}
+
+static const struct watchdog_info mpc5200_wdt_info = {
+	.options	= WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING,
+	.identity	= WDT_IDENTITY,
+};
+
+static long mpc52xx_wdt_ioctl(struct file *file, unsigned int cmd,
+			      unsigned long arg)
+{
+	struct mpc52xx_gpt_priv *gpt_wdt = file->private_data;
+	int __user *data = (int __user *)arg;
+	int timeout;
+	u64 real_timeout;
+	int ret = 0;
+
+	switch (cmd) {
+	case WDIOC_GETSUPPORT:
+		ret = copy_to_user(data, &mpc5200_wdt_info,
+				   sizeof(mpc5200_wdt_info));
+		if (ret)
+			ret = -EFAULT;
+		break;
+
+	case WDIOC_GETSTATUS:
+	case WDIOC_GETBOOTSTATUS:
+		ret = put_user(0, data);
+		break;
+
+	case WDIOC_KEEPALIVE:
+		mpc52xx_gpt_wdt_ping(gpt_wdt);
+		break;
+
+	case WDIOC_SETTIMEOUT:
+		ret = get_user(timeout, data);
+		if (ret)
+			break;
+		real_timeout = (u64) timeout * 1000000000ULL;
+		ret = mpc52xx_gpt_do_start(gpt_wdt, real_timeout, 0, 1);
+		if (ret)
+			break;
+		/* fall through and return the timeout */
+		fallthrough;
+
+	case WDIOC_GETTIMEOUT:
+		/* we need to round here as to avoid e.g. the following
+		 * situation:
+		 * - timeout requested is 1 second;
+		 * - real timeout @33MHz is 999997090ns
+		 * - the int divide by 10^9 will return 0.
+		 */
+		real_timeout =
+			mpc52xx_gpt_timer_period(gpt_wdt) + 500000000ULL;
+		do_div(real_timeout, 1000000000ULL);
+		timeout = (int) real_timeout;
+		ret = put_user(timeout, data);
+		break;
+
+	default:
+		ret = -ENOTTY;
+	}
+	return ret;
+}
+
+static int mpc52xx_wdt_open(struct inode *inode, struct file *file)
+{
+	int ret;
+
+	/* sanity check */
+	if (!mpc52xx_gpt_wdt)
+		return -ENODEV;
+
+	/* /dev/watchdog can only be opened once */
+	if (test_and_set_bit(0, &wdt_is_active))
+		return -EBUSY;
+
+	/* Set and activate the watchdog with 30 seconds timeout */
+	ret = mpc52xx_gpt_do_start(mpc52xx_gpt_wdt, 30ULL * 1000000000ULL,
+				   0, 1);
+	if (ret) {
+		clear_bit(0, &wdt_is_active);
+		return ret;
+	}
+
+	file->private_data = mpc52xx_gpt_wdt;
+	return stream_open(inode, file);
+}
+
+static int mpc52xx_wdt_release(struct inode *inode, struct file *file)
+{
+	/* note: releasing the wdt in NOWAYOUT-mode does not stop it */
+#if !defined(CONFIG_WATCHDOG_NOWAYOUT)
+	struct mpc52xx_gpt_priv *gpt_wdt = file->private_data;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&gpt_wdt->lock, flags);
+	clrbits32(&gpt_wdt->regs->mode,
+		  MPC52xx_GPT_MODE_COUNTER_ENABLE | MPC52xx_GPT_MODE_WDT_EN);
+	gpt_wdt->wdt_mode &= ~MPC52xx_GPT_IS_WDT;
+	raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags);
+#endif
+	clear_bit(0, &wdt_is_active);
+	return 0;
+}
+
+
+static const struct file_operations mpc52xx_wdt_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+	.write		= mpc52xx_wdt_write,
+	.unlocked_ioctl = mpc52xx_wdt_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
+	.open		= mpc52xx_wdt_open,
+	.release	= mpc52xx_wdt_release,
+};
+
+static struct miscdevice mpc52xx_wdt_miscdev = {
+	.minor		= WATCHDOG_MINOR,
+	.name		= "watchdog",
+	.fops		= &mpc52xx_wdt_fops,
+};
+
+static int mpc52xx_gpt_wdt_init(void)
+{
+	int err;
+
+	/* try to register the watchdog misc device */
+	err = misc_register(&mpc52xx_wdt_miscdev);
+	if (err)
+		pr_err("%s: cannot register watchdog device\n", WDT_IDENTITY);
+	else
+		pr_info("%s: watchdog device registered\n", WDT_IDENTITY);
+	return err;
+}
+
+static int mpc52xx_gpt_wdt_setup(struct mpc52xx_gpt_priv *gpt,
+				 const u32 *period)
+{
+	u64 real_timeout;
+
+	/* remember the gpt for the wdt operation */
+	mpc52xx_gpt_wdt = gpt;
+
+	/* configure the wdt if the device tree contained a timeout */
+	if (!period || *period == 0)
+		return 0;
+
+	real_timeout = (u64) *period * 1000000000ULL;
+	if (mpc52xx_gpt_do_start(gpt, real_timeout, 0, 1))
+		dev_warn(gpt->dev, "starting as wdt failed\n");
+	else
+		dev_info(gpt->dev, "watchdog set to %us timeout\n", *period);
+	return 0;
+}
+
+#else
+
+static int mpc52xx_gpt_wdt_init(void)
+{
+	return 0;
+}
+
+static inline int mpc52xx_gpt_wdt_setup(struct mpc52xx_gpt_priv *gpt,
+					const u32 *period)
+{
+	return 0;
+}
+
+#endif	/*  CONFIG_MPC5200_WDT	*/
+
+/* ---------------------------------------------------------------------
+ * of_platform bus binding code
+ */
+static int mpc52xx_gpt_probe(struct platform_device *ofdev)
+{
+	struct mpc52xx_gpt_priv *gpt;
+
+	gpt = devm_kzalloc(&ofdev->dev, sizeof *gpt, GFP_KERNEL);
+	if (!gpt)
+		return -ENOMEM;
+
+	raw_spin_lock_init(&gpt->lock);
+	gpt->dev = &ofdev->dev;
+	gpt->ipb_freq = mpc5xxx_get_bus_frequency(&ofdev->dev);
+	gpt->regs = of_iomap(ofdev->dev.of_node, 0);
+	if (!gpt->regs)
+		return -ENOMEM;
+
+	dev_set_drvdata(&ofdev->dev, gpt);
+
+	mpc52xx_gpt_gpio_setup(gpt);
+	mpc52xx_gpt_irq_setup(gpt, ofdev->dev.of_node);
+
+	mutex_lock(&mpc52xx_gpt_list_mutex);
+	list_add(&gpt->list, &mpc52xx_gpt_list);
+	mutex_unlock(&mpc52xx_gpt_list_mutex);
+
+	/* check if this device could be a watchdog */
+	if (of_property_read_bool(ofdev->dev.of_node, "fsl,has-wdt") ||
+	    of_property_read_bool(ofdev->dev.of_node, "has-wdt")) {
+		const u32 *on_boot_wdt;
+
+		gpt->wdt_mode = MPC52xx_GPT_CAN_WDT;
+		on_boot_wdt = of_get_property(ofdev->dev.of_node,
+					      "fsl,wdt-on-boot", NULL);
+		if (on_boot_wdt) {
+			dev_info(gpt->dev, "used as watchdog\n");
+			gpt->wdt_mode |= MPC52xx_GPT_IS_WDT;
+		} else
+			dev_info(gpt->dev, "can function as watchdog\n");
+		mpc52xx_gpt_wdt_setup(gpt, on_boot_wdt);
+	}
+
+	return 0;
+}
+
+static const struct of_device_id mpc52xx_gpt_match[] = {
+	{ .compatible = "fsl,mpc5200-gpt", },
+
+	/* Depreciated compatible values; don't use for new dts files */
+	{ .compatible = "fsl,mpc5200-gpt-gpio", },
+	{ .compatible = "mpc5200-gpt", },
+	{}
+};
+
+static struct platform_driver mpc52xx_gpt_driver = {
+	.driver = {
+		.name = "mpc52xx-gpt",
+		.suppress_bind_attrs = true,
+		.of_match_table = mpc52xx_gpt_match,
+	},
+	.probe = mpc52xx_gpt_probe,
+};
+
+static int __init mpc52xx_gpt_init(void)
+{
+	return platform_driver_register(&mpc52xx_gpt_driver);
+}
+
+/* Make sure GPIOs and IRQs get set up before anyone tries to use them */
+subsys_initcall(mpc52xx_gpt_init);
+device_initcall(mpc52xx_gpt_wdt_init);
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
new file mode 100644
index 0000000000..0ca4401ba7
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
@@ -0,0 +1,419 @@
+/*
+ * PCI code for the Freescale MPC52xx embedded CPU.
+ *
+ * Copyright (C) 2006 Secret Lab Technologies Ltd.
+ *                        Grant Likely <grant.likely@secretlab.ca>
+ * Copyright (C) 2004 Sylvain Munaut <tnt@246tNt.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#undef DEBUG
+
+#include <linux/pci.h>
+#include <linux/of_address.h>
+#include <asm/mpc52xx.h>
+#include <asm/delay.h>
+#include <asm/machdep.h>
+#include <linux/kernel.h>
+
+
+/* ======================================================================== */
+/* Structures mapping & Defines for PCI Unit                                */
+/* ======================================================================== */
+
+#define MPC52xx_PCI_GSCR_BM		0x40000000
+#define MPC52xx_PCI_GSCR_PE		0x20000000
+#define MPC52xx_PCI_GSCR_SE		0x10000000
+#define MPC52xx_PCI_GSCR_XLB2PCI_MASK	0x07000000
+#define MPC52xx_PCI_GSCR_XLB2PCI_SHIFT	24
+#define MPC52xx_PCI_GSCR_IPG2PCI_MASK	0x00070000
+#define MPC52xx_PCI_GSCR_IPG2PCI_SHIFT	16
+#define MPC52xx_PCI_GSCR_BME		0x00004000
+#define MPC52xx_PCI_GSCR_PEE		0x00002000
+#define MPC52xx_PCI_GSCR_SEE		0x00001000
+#define MPC52xx_PCI_GSCR_PR		0x00000001
+
+
+#define MPC52xx_PCI_IWBTAR_TRANSLATION(proc_ad,pci_ad,size)	  \
+		( ( (proc_ad) & 0xff000000 )			| \
+		  ( (((size) - 1) >> 8) & 0x00ff0000 )		| \
+		  ( ((pci_ad) >> 16) & 0x0000ff00 ) )
+
+#define MPC52xx_PCI_IWCR_PACK(win0,win1,win2)	(((win0) << 24) | \
+						 ((win1) << 16) | \
+						 ((win2) <<  8))
+
+#define MPC52xx_PCI_IWCR_DISABLE	0x0
+#define MPC52xx_PCI_IWCR_ENABLE		0x1
+#define MPC52xx_PCI_IWCR_READ		0x0
+#define MPC52xx_PCI_IWCR_READ_LINE	0x2
+#define MPC52xx_PCI_IWCR_READ_MULTI	0x4
+#define MPC52xx_PCI_IWCR_MEM		0x0
+#define MPC52xx_PCI_IWCR_IO		0x8
+
+#define MPC52xx_PCI_TCR_P		0x01000000
+#define MPC52xx_PCI_TCR_LD		0x00010000
+#define MPC52xx_PCI_TCR_WCT8		0x00000008
+
+#define MPC52xx_PCI_TBATR_DISABLE	0x0
+#define MPC52xx_PCI_TBATR_ENABLE	0x1
+
+struct mpc52xx_pci {
+	u32	idr;		/* PCI + 0x00 */
+	u32	scr;		/* PCI + 0x04 */
+	u32	ccrir;		/* PCI + 0x08 */
+	u32	cr1;		/* PCI + 0x0C */
+	u32	bar0;		/* PCI + 0x10 */
+	u32	bar1;		/* PCI + 0x14 */
+	u8	reserved1[16];	/* PCI + 0x18 */
+	u32	ccpr;		/* PCI + 0x28 */
+	u32	sid;		/* PCI + 0x2C */
+	u32	erbar;		/* PCI + 0x30 */
+	u32	cpr;		/* PCI + 0x34 */
+	u8	reserved2[4];	/* PCI + 0x38 */
+	u32	cr2;		/* PCI + 0x3C */
+	u8	reserved3[32];	/* PCI + 0x40 */
+	u32	gscr;		/* PCI + 0x60 */
+	u32	tbatr0;		/* PCI + 0x64 */
+	u32	tbatr1;		/* PCI + 0x68 */
+	u32	tcr;		/* PCI + 0x6C */
+	u32	iw0btar;	/* PCI + 0x70 */
+	u32	iw1btar;	/* PCI + 0x74 */
+	u32	iw2btar;	/* PCI + 0x78 */
+	u8	reserved4[4];	/* PCI + 0x7C */
+	u32	iwcr;		/* PCI + 0x80 */
+	u32	icr;		/* PCI + 0x84 */
+	u32	isr;		/* PCI + 0x88 */
+	u32	arb;		/* PCI + 0x8C */
+	u8	reserved5[104];	/* PCI + 0x90 */
+	u32	car;		/* PCI + 0xF8 */
+	u8	reserved6[4];	/* PCI + 0xFC */
+};
+
+/* MPC5200 device tree match tables */
+const struct of_device_id mpc52xx_pci_ids[] __initconst = {
+	{ .type = "pci", .compatible = "fsl,mpc5200-pci", },
+	{ .type = "pci", .compatible = "mpc5200-pci", },
+	{}
+};
+
+/* ======================================================================== */
+/* PCI configuration access                                                 */
+/* ======================================================================== */
+
+static int
+mpc52xx_pci_read_config(struct pci_bus *bus, unsigned int devfn,
+				int offset, int len, u32 *val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	u32 value;
+
+	if (ppc_md.pci_exclude_device)
+		if (ppc_md.pci_exclude_device(hose, bus->number, devfn))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+	out_be32(hose->cfg_addr,
+		(1 << 31) |
+		(bus->number << 16) |
+		(devfn << 8) |
+		(offset & 0xfc));
+	mb();
+
+#if defined(CONFIG_PPC_MPC5200_BUGFIX)
+	if (bus->number) {
+		/* workaround for the bug 435 of the MPC5200 (L25R);
+		 * Don't do 32 bits config access during type-1 cycles */
+		switch (len) {
+		      case 1:
+			value = in_8(((u8 __iomem *)hose->cfg_data) +
+			             (offset & 3));
+			break;
+		      case 2:
+			value = in_le16(((u16 __iomem *)hose->cfg_data) +
+			                ((offset>>1) & 1));
+			break;
+
+		      default:
+			value = in_le16((u16 __iomem *)hose->cfg_data) |
+				(in_le16(((u16 __iomem *)hose->cfg_data) + 1) << 16);
+			break;
+		}
+	}
+	else
+#endif
+	{
+		value = in_le32(hose->cfg_data);
+
+		if (len != 4) {
+			value >>= ((offset & 0x3) << 3);
+			value &= 0xffffffff >> (32 - (len << 3));
+		}
+	}
+
+	*val = value;
+
+	out_be32(hose->cfg_addr, 0);
+	mb();
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int
+mpc52xx_pci_write_config(struct pci_bus *bus, unsigned int devfn,
+				int offset, int len, u32 val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	u32 value, mask;
+
+	if (ppc_md.pci_exclude_device)
+		if (ppc_md.pci_exclude_device(hose, bus->number, devfn))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+	out_be32(hose->cfg_addr,
+		(1 << 31) |
+		(bus->number << 16) |
+		(devfn << 8) |
+		(offset & 0xfc));
+	mb();
+
+#if defined(CONFIG_PPC_MPC5200_BUGFIX)
+	if (bus->number) {
+		/* workaround for the bug 435 of the MPC5200 (L25R);
+		 * Don't do 32 bits config access during type-1 cycles */
+		switch (len) {
+		      case 1:
+			out_8(((u8 __iomem *)hose->cfg_data) +
+				(offset & 3), val);
+			break;
+		      case 2:
+			out_le16(((u16 __iomem *)hose->cfg_data) +
+				((offset>>1) & 1), val);
+			break;
+
+		      default:
+			out_le16((u16 __iomem *)hose->cfg_data,
+				(u16)val);
+			out_le16(((u16 __iomem *)hose->cfg_data) + 1,
+				(u16)(val>>16));
+			break;
+		}
+	}
+	else
+#endif
+	{
+		if (len != 4) {
+			value = in_le32(hose->cfg_data);
+
+			offset = (offset & 0x3) << 3;
+			mask = (0xffffffff >> (32 - (len << 3)));
+			mask <<= offset;
+
+			value &= ~mask;
+			val = value | ((val << offset) & mask);
+		}
+
+		out_le32(hose->cfg_data, val);
+	}
+	mb();
+
+	out_be32(hose->cfg_addr, 0);
+	mb();
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops mpc52xx_pci_ops = {
+	.read  = mpc52xx_pci_read_config,
+	.write = mpc52xx_pci_write_config
+};
+
+
+/* ======================================================================== */
+/* PCI setup                                                                */
+/* ======================================================================== */
+
+static void __init
+mpc52xx_pci_setup(struct pci_controller *hose,
+                  struct mpc52xx_pci __iomem *pci_regs, phys_addr_t pci_phys)
+{
+	struct resource *res;
+	u32 tmp;
+	int iwcr0 = 0, iwcr1 = 0, iwcr2 = 0;
+
+	pr_debug("%s(hose=%p, pci_regs=%p)\n", __func__, hose, pci_regs);
+
+	/* pci_process_bridge_OF_ranges() found all our addresses for us;
+	 * now store them in the right places */
+	hose->cfg_addr = &pci_regs->car;
+	hose->cfg_data = hose->io_base_virt;
+
+	/* Control regs */
+	tmp = in_be32(&pci_regs->scr);
+	tmp |= PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY;
+	out_be32(&pci_regs->scr, tmp);
+
+	/* Memory windows */
+	res = &hose->mem_resources[0];
+	if (res->flags) {
+		pr_debug("mem_resource[0] = %pr\n", res);
+		out_be32(&pci_regs->iw0btar,
+		         MPC52xx_PCI_IWBTAR_TRANSLATION(res->start, res->start,
+							resource_size(res)));
+		iwcr0 = MPC52xx_PCI_IWCR_ENABLE | MPC52xx_PCI_IWCR_MEM;
+		if (res->flags & IORESOURCE_PREFETCH)
+			iwcr0 |= MPC52xx_PCI_IWCR_READ_MULTI;
+		else
+			iwcr0 |= MPC52xx_PCI_IWCR_READ;
+	}
+
+	res = &hose->mem_resources[1];
+	if (res->flags) {
+		pr_debug("mem_resource[1] = %pr\n", res);
+		out_be32(&pci_regs->iw1btar,
+		         MPC52xx_PCI_IWBTAR_TRANSLATION(res->start, res->start,
+							resource_size(res)));
+		iwcr1 = MPC52xx_PCI_IWCR_ENABLE | MPC52xx_PCI_IWCR_MEM;
+		if (res->flags & IORESOURCE_PREFETCH)
+			iwcr1 |= MPC52xx_PCI_IWCR_READ_MULTI;
+		else
+			iwcr1 |= MPC52xx_PCI_IWCR_READ;
+	}
+
+	/* IO resources */
+	res = &hose->io_resource;
+	if (!res) {
+		printk(KERN_ERR "%s: Didn't find IO resources\n", __FILE__);
+		return;
+	}
+	pr_debug(".io_resource = %pr .io_base_phys=0x%pa\n",
+		 res, &hose->io_base_phys);
+	out_be32(&pci_regs->iw2btar,
+	         MPC52xx_PCI_IWBTAR_TRANSLATION(hose->io_base_phys,
+	                                        res->start,
+						resource_size(res)));
+	iwcr2 = MPC52xx_PCI_IWCR_ENABLE | MPC52xx_PCI_IWCR_IO;
+
+	/* Set all the IWCR fields at once; they're in the same reg */
+	out_be32(&pci_regs->iwcr, MPC52xx_PCI_IWCR_PACK(iwcr0, iwcr1, iwcr2));
+
+	/* Map IMMR onto PCI bus */
+	pci_phys &= 0xfffc0000; /* bar0 has only 14 significant bits */
+	out_be32(&pci_regs->tbatr0, MPC52xx_PCI_TBATR_ENABLE | pci_phys);
+	out_be32(&pci_regs->bar0, PCI_BASE_ADDRESS_MEM_PREFETCH | pci_phys);
+
+	/* Map memory onto PCI bus */
+	out_be32(&pci_regs->tbatr1, MPC52xx_PCI_TBATR_ENABLE);
+	out_be32(&pci_regs->bar1, PCI_BASE_ADDRESS_MEM_PREFETCH);
+
+	out_be32(&pci_regs->tcr, MPC52xx_PCI_TCR_LD | MPC52xx_PCI_TCR_WCT8);
+
+	tmp = in_be32(&pci_regs->gscr);
+#if 0
+	/* Reset the exteral bus ( internal PCI controller is NOT reset ) */
+	/* Not necessary and can be a bad thing if for example the bootloader
+	   is displaying a splash screen or ... Just left here for
+	   documentation purpose if anyone need it */
+	out_be32(&pci_regs->gscr, tmp | MPC52xx_PCI_GSCR_PR);
+	udelay(50);
+#endif
+
+	/* Make sure the PCI bridge is out of reset */
+	out_be32(&pci_regs->gscr, tmp & ~MPC52xx_PCI_GSCR_PR);
+}
+
+static void
+mpc52xx_pci_fixup_resources(struct pci_dev *dev)
+{
+	struct resource *res;
+
+	pr_debug("%s() %.4x:%.4x\n", __func__, dev->vendor, dev->device);
+
+	/* We don't rely on boot loader for PCI and resets all
+	   devices */
+	pci_dev_for_each_resource(dev, res) {
+		if (res->end > res->start) {	/* Only valid resources */
+			res->end -= res->start;
+			res->start = 0;
+			res->flags |= IORESOURCE_UNSET;
+		}
+	}
+
+	/* The PCI Host bridge of MPC52xx has a prefetch memory resource
+	   fixed to 1Gb. Doesn't fit in the resource system so we remove it */
+	if ( (dev->vendor == PCI_VENDOR_ID_MOTOROLA) &&
+	     (   dev->device == PCI_DEVICE_ID_MOTOROLA_MPC5200
+	      || dev->device == PCI_DEVICE_ID_MOTOROLA_MPC5200B) ) {
+		struct resource *res = &dev->resource[1];
+		res->start = res->end = res->flags = 0;
+	}
+}
+
+int __init
+mpc52xx_add_bridge(struct device_node *node)
+{
+	int len;
+	struct mpc52xx_pci __iomem *pci_regs;
+	struct pci_controller *hose;
+	const int *bus_range;
+	struct resource rsrc;
+
+	pr_debug("Adding MPC52xx PCI host bridge %pOF\n", node);
+
+	pci_add_flags(PCI_REASSIGN_ALL_BUS);
+
+	if (of_address_to_resource(node, 0, &rsrc) != 0) {
+		printk(KERN_ERR "Can't get %pOF resources\n", node);
+		return -EINVAL;
+	}
+
+	bus_range = of_get_property(node, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int)) {
+		printk(KERN_WARNING "Can't get %pOF bus-range, assume bus 0\n",
+		       node);
+		bus_range = NULL;
+	}
+
+	/* There are some PCI quirks on the 52xx, register the hook to
+	 * fix them. */
+	ppc_md.pcibios_fixup_resources = mpc52xx_pci_fixup_resources;
+
+	/* Alloc and initialize the pci controller.  Values in the device
+	 * tree are needed to configure the 52xx PCI controller.  Rather
+	 * than parse the tree here, let pci_process_bridge_OF_ranges()
+	 * do it for us and extract the values after the fact */
+	hose = pcibios_alloc_controller(node);
+	if (!hose)
+		return -ENOMEM;
+
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	hose->ops = &mpc52xx_pci_ops;
+
+	pci_regs = ioremap(rsrc.start, resource_size(&rsrc));
+	if (!pci_regs)
+		return -ENOMEM;
+
+	pci_process_bridge_OF_ranges(hose, node, 1);
+
+	/* Finish setting up PCI using values obtained by
+	 * pci_proces_bridge_OF_ranges */
+	mpc52xx_pci_setup(hose, pci_regs, rsrc.start);
+
+	return 0;
+}
+
+void __init mpc52xx_setup_pci(void)
+{
+	struct device_node *pci;
+
+	pci = of_find_matching_node(NULL, mpc52xx_pci_ids);
+	if (!pci)
+		return;
+
+	mpc52xx_add_bridge(pci);
+	of_node_put(pci);
+}
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
new file mode 100644
index 0000000000..1e0a5e9644
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
@@ -0,0 +1,519 @@
+/*
+ *
+ * Programmable Interrupt Controller functions for the Freescale MPC52xx.
+ *
+ * Copyright (C) 2008 Secret Lab Technologies Ltd.
+ * Copyright (C) 2006 bplan GmbH
+ * Copyright (C) 2004 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2003 Montavista Software, Inc
+ *
+ * Based on the code from the 2.4 kernel by
+ * Dale Farnsworth <dfarnsworth@mvista.com> and Kent Borg.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ */
+
+/*
+ * This is the device driver for the MPC5200 interrupt controller.
+ *
+ * hardware overview
+ * -----------------
+ * The MPC5200 interrupt controller groups the all interrupt sources into
+ * three groups called 'critical', 'main', and 'peripheral'.  The critical
+ * group has 3 irqs, External IRQ0, slice timer 0 irq, and wake from deep
+ * sleep.  Main group include the other 3 external IRQs, slice timer 1, RTC,
+ * gpios, and the general purpose timers.  Peripheral group contains the
+ * remaining irq sources from all of the on-chip peripherals (PSCs, Ethernet,
+ * USB, DMA, etc).
+ *
+ * virqs
+ * -----
+ * The Linux IRQ subsystem requires that each irq source be assigned a
+ * system wide unique IRQ number starting at 1 (0 means no irq).  Since
+ * systems can have multiple interrupt controllers, the virtual IRQ (virq)
+ * infrastructure lets each interrupt controller to define a local set
+ * of IRQ numbers and the virq infrastructure maps those numbers into
+ * a unique range of the global IRQ# space.
+ *
+ * To define a range of virq numbers for this controller, this driver first
+ * assigns a number to each of the irq groups (called the level 1 or L1
+ * value).  Within each group individual irq sources are also assigned a
+ * number, as defined by the MPC5200 user guide, and refers to it as the
+ * level 2 or L2 value.  The virq number is determined by shifting up the
+ * L1 value by MPC52xx_IRQ_L1_OFFSET and ORing it with the L2 value.
+ *
+ * For example, the TMR0 interrupt is irq 9 in the main group.  The
+ * virq for TMR0 is calculated by ((1 << MPC52xx_IRQ_L1_OFFSET) | 9).
+ *
+ * The observant reader will also notice that this driver defines a 4th
+ * interrupt group called 'bestcomm'.  The bestcomm group isn't physically
+ * part of the MPC5200 interrupt controller, but it is used here to assign
+ * a separate virq number for each bestcomm task (since any of the 16
+ * bestcomm tasks can cause the bestcomm interrupt to be raised).  When a
+ * bestcomm interrupt occurs (peripheral group, irq 0) this driver determines
+ * which task needs servicing and returns the irq number for that task.  This
+ * allows drivers which use bestcomm to define their own interrupt handlers.
+ *
+ * irq_chip structures
+ * -------------------
+ * For actually manipulating IRQs (masking, enabling, clearing, etc) this
+ * driver defines four separate 'irq_chip' structures, one for the main
+ * group, one for the peripherals group, one for the bestcomm group and one
+ * for external interrupts.  The irq_chip structures provide the hooks needed
+ * to manipulate each IRQ source, and since each group is has a separate set
+ * of registers for controlling the irq, it makes sense to divide up the
+ * hooks along those lines.
+ *
+ * You'll notice that there is not an irq_chip for the critical group and
+ * you'll also notice that there is an irq_chip defined for external
+ * interrupts even though there is no external interrupt group.  The reason
+ * for this is that the four external interrupts are all managed with the same
+ * register even though one of the external IRQs is in the critical group and
+ * the other three are in the main group.  For this reason it makes sense for
+ * the 4 external irqs to be managed using a separate set of hooks.  The
+ * reason there is no crit irq_chip is that of the 3 irqs in the critical
+ * group, only external interrupt is actually support at this time by this
+ * driver and since external interrupt is the only one used, it can just
+ * be directed to make use of the external irq irq_chip.
+ *
+ * device tree bindings
+ * --------------------
+ * The device tree bindings for this controller reflect the two level
+ * organization of irqs in the device.  #interrupt-cells = <3> where the
+ * first cell is the group number [0..3], the second cell is the irq
+ * number in the group, and the third cell is the sense type (level/edge).
+ * For reference, the following is a list of the interrupt property values
+ * associated with external interrupt sources on the MPC5200 (just because
+ * it is non-obvious to determine what the interrupts property should be
+ * when reading the mpc5200 manual and it is a frequently asked question).
+ *
+ * External interrupts:
+ * <0 0 n>	external irq0, n is sense	(n=0: level high,
+ * <1 1 n>	external irq1, n is sense	 n=1: edge rising,
+ * <1 2 n>	external irq2, n is sense	 n=2: edge falling,
+ * <1 3 n>	external irq3, n is sense	 n=3: level low)
+ */
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <asm/io.h>
+#include <asm/mpc52xx.h>
+
+/* HW IRQ mapping */
+#define MPC52xx_IRQ_L1_CRIT	(0)
+#define MPC52xx_IRQ_L1_MAIN	(1)
+#define MPC52xx_IRQ_L1_PERP	(2)
+#define MPC52xx_IRQ_L1_SDMA	(3)
+
+#define MPC52xx_IRQ_L1_OFFSET	(6)
+#define MPC52xx_IRQ_L1_MASK	(0x00c0)
+#define MPC52xx_IRQ_L2_MASK	(0x003f)
+
+#define MPC52xx_IRQ_HIGHTESTHWIRQ (0xd0)
+
+
+/* MPC5200 device tree match tables */
+static const struct of_device_id mpc52xx_pic_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5200-pic", },
+	{ .compatible = "mpc5200-pic", },
+	{}
+};
+static const struct of_device_id mpc52xx_sdma_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5200-bestcomm", },
+	{ .compatible = "mpc5200-bestcomm", },
+	{}
+};
+
+static struct mpc52xx_intr __iomem *intr;
+static struct mpc52xx_sdma __iomem *sdma;
+static struct irq_domain *mpc52xx_irqhost = NULL;
+
+static unsigned char mpc52xx_map_senses[4] = {
+	IRQ_TYPE_LEVEL_HIGH,
+	IRQ_TYPE_EDGE_RISING,
+	IRQ_TYPE_EDGE_FALLING,
+	IRQ_TYPE_LEVEL_LOW,
+};
+
+/* Utility functions */
+static inline void io_be_setbit(u32 __iomem *addr, int bitno)
+{
+	out_be32(addr, in_be32(addr) | (1 << bitno));
+}
+
+static inline void io_be_clrbit(u32 __iomem *addr, int bitno)
+{
+	out_be32(addr, in_be32(addr) & ~(1 << bitno));
+}
+
+/*
+ * IRQ[0-3] interrupt irq_chip
+ */
+static void mpc52xx_extirq_mask(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	io_be_clrbit(&intr->ctrl, 11 - l2irq);
+}
+
+static void mpc52xx_extirq_unmask(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	io_be_setbit(&intr->ctrl, 11 - l2irq);
+}
+
+static void mpc52xx_extirq_ack(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	io_be_setbit(&intr->ctrl, 27-l2irq);
+}
+
+static int mpc52xx_extirq_set_type(struct irq_data *d, unsigned int flow_type)
+{
+	u32 ctrl_reg, type;
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	void *handler = handle_level_irq;
+
+	pr_debug("%s: irq=%x. l2=%d flow_type=%d\n", __func__,
+		(int) irqd_to_hwirq(d), l2irq, flow_type);
+
+	switch (flow_type) {
+	case IRQF_TRIGGER_HIGH: type = 0; break;
+	case IRQF_TRIGGER_RISING: type = 1; handler = handle_edge_irq; break;
+	case IRQF_TRIGGER_FALLING: type = 2; handler = handle_edge_irq; break;
+	case IRQF_TRIGGER_LOW: type = 3; break;
+	default:
+		type = 0;
+	}
+
+	ctrl_reg = in_be32(&intr->ctrl);
+	ctrl_reg &= ~(0x3 << (22 - (l2irq * 2)));
+	ctrl_reg |= (type << (22 - (l2irq * 2)));
+	out_be32(&intr->ctrl, ctrl_reg);
+
+	irq_set_handler_locked(d, handler);
+
+	return 0;
+}
+
+static struct irq_chip mpc52xx_extirq_irqchip = {
+	.name = "MPC52xx External",
+	.irq_mask = mpc52xx_extirq_mask,
+	.irq_unmask = mpc52xx_extirq_unmask,
+	.irq_ack = mpc52xx_extirq_ack,
+	.irq_set_type = mpc52xx_extirq_set_type,
+};
+
+/*
+ * Main interrupt irq_chip
+ */
+static int mpc52xx_null_set_type(struct irq_data *d, unsigned int flow_type)
+{
+	return 0; /* Do nothing so that the sense mask will get updated */
+}
+
+static void mpc52xx_main_mask(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	io_be_setbit(&intr->main_mask, 16 - l2irq);
+}
+
+static void mpc52xx_main_unmask(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	io_be_clrbit(&intr->main_mask, 16 - l2irq);
+}
+
+static struct irq_chip mpc52xx_main_irqchip = {
+	.name = "MPC52xx Main",
+	.irq_mask = mpc52xx_main_mask,
+	.irq_mask_ack = mpc52xx_main_mask,
+	.irq_unmask = mpc52xx_main_unmask,
+	.irq_set_type = mpc52xx_null_set_type,
+};
+
+/*
+ * Peripherals interrupt irq_chip
+ */
+static void mpc52xx_periph_mask(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	io_be_setbit(&intr->per_mask, 31 - l2irq);
+}
+
+static void mpc52xx_periph_unmask(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	io_be_clrbit(&intr->per_mask, 31 - l2irq);
+}
+
+static struct irq_chip mpc52xx_periph_irqchip = {
+	.name = "MPC52xx Peripherals",
+	.irq_mask = mpc52xx_periph_mask,
+	.irq_mask_ack = mpc52xx_periph_mask,
+	.irq_unmask = mpc52xx_periph_unmask,
+	.irq_set_type = mpc52xx_null_set_type,
+};
+
+/*
+ * SDMA interrupt irq_chip
+ */
+static void mpc52xx_sdma_mask(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	io_be_setbit(&sdma->IntMask, l2irq);
+}
+
+static void mpc52xx_sdma_unmask(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	io_be_clrbit(&sdma->IntMask, l2irq);
+}
+
+static void mpc52xx_sdma_ack(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	out_be32(&sdma->IntPend, 1 << l2irq);
+}
+
+static struct irq_chip mpc52xx_sdma_irqchip = {
+	.name = "MPC52xx SDMA",
+	.irq_mask = mpc52xx_sdma_mask,
+	.irq_unmask = mpc52xx_sdma_unmask,
+	.irq_ack = mpc52xx_sdma_ack,
+	.irq_set_type = mpc52xx_null_set_type,
+};
+
+/**
+ * mpc52xx_is_extirq - Returns true if hwirq number is for an external IRQ
+ */
+static int mpc52xx_is_extirq(int l1, int l2)
+{
+	return ((l1 == 0) && (l2 == 0)) ||
+	       ((l1 == 1) && (l2 >= 1) && (l2 <= 3));
+}
+
+/**
+ * mpc52xx_irqhost_xlate - translate virq# from device tree interrupts property
+ */
+static int mpc52xx_irqhost_xlate(struct irq_domain *h, struct device_node *ct,
+				 const u32 *intspec, unsigned int intsize,
+				 irq_hw_number_t *out_hwirq,
+				 unsigned int *out_flags)
+{
+	int intrvect_l1;
+	int intrvect_l2;
+	int intrvect_type;
+	int intrvect_linux;
+
+	if (intsize != 3)
+		return -1;
+
+	intrvect_l1 = (int)intspec[0];
+	intrvect_l2 = (int)intspec[1];
+	intrvect_type = (int)intspec[2] & 0x3;
+
+	intrvect_linux = (intrvect_l1 << MPC52xx_IRQ_L1_OFFSET) &
+			 MPC52xx_IRQ_L1_MASK;
+	intrvect_linux |= intrvect_l2 & MPC52xx_IRQ_L2_MASK;
+
+	*out_hwirq = intrvect_linux;
+	*out_flags = IRQ_TYPE_LEVEL_LOW;
+	if (mpc52xx_is_extirq(intrvect_l1, intrvect_l2))
+		*out_flags = mpc52xx_map_senses[intrvect_type];
+
+	pr_debug("return %x, l1=%d, l2=%d\n", intrvect_linux, intrvect_l1,
+		 intrvect_l2);
+	return 0;
+}
+
+/**
+ * mpc52xx_irqhost_map - Hook to map from virq to an irq_chip structure
+ */
+static int mpc52xx_irqhost_map(struct irq_domain *h, unsigned int virq,
+			       irq_hw_number_t irq)
+{
+	int l1irq;
+	int l2irq;
+	struct irq_chip *irqchip;
+	void *hndlr;
+	int type;
+	u32 reg;
+
+	l1irq = (irq & MPC52xx_IRQ_L1_MASK) >> MPC52xx_IRQ_L1_OFFSET;
+	l2irq = irq & MPC52xx_IRQ_L2_MASK;
+
+	/*
+	 * External IRQs are handled differently by the hardware so they are
+	 * handled by a dedicated irq_chip structure.
+	 */
+	if (mpc52xx_is_extirq(l1irq, l2irq)) {
+		reg = in_be32(&intr->ctrl);
+		type = mpc52xx_map_senses[(reg >> (22 - l2irq * 2)) & 0x3];
+		if ((type == IRQ_TYPE_EDGE_FALLING) ||
+		    (type == IRQ_TYPE_EDGE_RISING))
+			hndlr = handle_edge_irq;
+		else
+			hndlr = handle_level_irq;
+
+		irq_set_chip_and_handler(virq, &mpc52xx_extirq_irqchip, hndlr);
+		pr_debug("%s: External IRQ%i virq=%x, hw=%x. type=%x\n",
+			 __func__, l2irq, virq, (int)irq, type);
+		return 0;
+	}
+
+	/* It is an internal SOC irq.  Choose the correct irq_chip */
+	switch (l1irq) {
+	case MPC52xx_IRQ_L1_MAIN: irqchip = &mpc52xx_main_irqchip; break;
+	case MPC52xx_IRQ_L1_PERP: irqchip = &mpc52xx_periph_irqchip; break;
+	case MPC52xx_IRQ_L1_SDMA: irqchip = &mpc52xx_sdma_irqchip; break;
+	case MPC52xx_IRQ_L1_CRIT:
+		pr_warn("%s: Critical IRQ #%d is unsupported! Nopping it.\n",
+			__func__, l2irq);
+		irq_set_chip(virq, &no_irq_chip);
+		return 0;
+	}
+
+	irq_set_chip_and_handler(virq, irqchip, handle_level_irq);
+	pr_debug("%s: virq=%x, l1=%i, l2=%i\n", __func__, virq, l1irq, l2irq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops mpc52xx_irqhost_ops = {
+	.xlate = mpc52xx_irqhost_xlate,
+	.map = mpc52xx_irqhost_map,
+};
+
+/**
+ * mpc52xx_init_irq - Initialize and register with the virq subsystem
+ *
+ * Hook for setting up IRQs on an mpc5200 system.  A pointer to this function
+ * is to be put into the machine definition structure.
+ *
+ * This function searches the device tree for an MPC5200 interrupt controller,
+ * initializes it, and registers it with the virq subsystem.
+ */
+void __init mpc52xx_init_irq(void)
+{
+	u32 intr_ctrl;
+	struct device_node *picnode;
+	struct device_node *np;
+
+	/* Remap the necessary zones */
+	picnode = of_find_matching_node(NULL, mpc52xx_pic_ids);
+	intr = of_iomap(picnode, 0);
+	if (!intr)
+		panic(__FILE__	": find_and_map failed on 'mpc5200-pic'. "
+				"Check node !");
+
+	np = of_find_matching_node(NULL, mpc52xx_sdma_ids);
+	sdma = of_iomap(np, 0);
+	of_node_put(np);
+	if (!sdma)
+		panic(__FILE__	": find_and_map failed on 'mpc5200-bestcomm'. "
+				"Check node !");
+
+	pr_debug("MPC5200 IRQ controller mapped to 0x%p\n", intr);
+
+	/* Disable all interrupt sources. */
+	out_be32(&sdma->IntPend, 0xffffffff);	/* 1 means clear pending */
+	out_be32(&sdma->IntMask, 0xffffffff);	/* 1 means disabled */
+	out_be32(&intr->per_mask, 0x7ffffc00);	/* 1 means disabled */
+	out_be32(&intr->main_mask, 0x00010fff);	/* 1 means disabled */
+	intr_ctrl = in_be32(&intr->ctrl);
+	intr_ctrl &= 0x00ff0000;	/* Keeps IRQ[0-3] config */
+	intr_ctrl |=	0x0f000000 |	/* clear IRQ 0-3 */
+			0x00001000 |	/* MEE master external enable */
+			0x00000000 |	/* 0 means disable IRQ 0-3 */
+			0x00000001;	/* CEb route critical normally */
+	out_be32(&intr->ctrl, intr_ctrl);
+
+	/* Zero a bunch of the priority settings. */
+	out_be32(&intr->per_pri1, 0);
+	out_be32(&intr->per_pri2, 0);
+	out_be32(&intr->per_pri3, 0);
+	out_be32(&intr->main_pri1, 0);
+	out_be32(&intr->main_pri2, 0);
+
+	/*
+	 * As last step, add an irq host to translate the real
+	 * hw irq information provided by the ofw to linux virq
+	 */
+	mpc52xx_irqhost = irq_domain_add_linear(picnode,
+	                                 MPC52xx_IRQ_HIGHTESTHWIRQ,
+	                                 &mpc52xx_irqhost_ops, NULL);
+
+	if (!mpc52xx_irqhost)
+		panic(__FILE__ ": Cannot allocate the IRQ host\n");
+
+	irq_set_default_host(mpc52xx_irqhost);
+
+	pr_info("MPC52xx PIC is up and running!\n");
+}
+
+/**
+ * mpc52xx_get_irq - Get pending interrupt number hook function
+ *
+ * Called by the interrupt handler to determine what IRQ handler needs to be
+ * executed.
+ *
+ * Status of pending interrupts is determined by reading the encoded status
+ * register.  The encoded status register has three fields; one for each of the
+ * types of interrupts defined by the controller - 'critical', 'main' and
+ * 'peripheral'.  This function reads the status register and returns the IRQ
+ * number associated with the highest priority pending interrupt.  'Critical'
+ * interrupts have the highest priority, followed by 'main' interrupts, and
+ * then 'peripheral'.
+ *
+ * The mpc5200 interrupt controller can be configured to boost the priority
+ * of individual 'peripheral' interrupts.  If this is the case then a special
+ * value will appear in either the crit or main fields indicating a high
+ * or medium priority peripheral irq has occurred.
+ *
+ * This function checks each of the 3 irq request fields and returns the
+ * first pending interrupt that it finds.
+ *
+ * This function also identifies a 4th type of interrupt; 'bestcomm'.  Each
+ * bestcomm DMA task can raise the bestcomm peripheral interrupt.  When this
+ * occurs at task-specific IRQ# is decoded so that each task can have its
+ * own IRQ handler.
+ */
+unsigned int mpc52xx_get_irq(void)
+{
+	u32 status;
+	int irq;
+
+	status = in_be32(&intr->enc_status);
+	if (status & 0x00000400) {	/* critical */
+		irq = (status >> 8) & 0x3;
+		if (irq == 2)	/* high priority peripheral */
+			goto peripheral;
+		irq |= (MPC52xx_IRQ_L1_CRIT << MPC52xx_IRQ_L1_OFFSET);
+	} else if (status & 0x00200000) {	/* main */
+		irq = (status >> 16) & 0x1f;
+		if (irq == 4)	/* low priority peripheral */
+			goto peripheral;
+		irq |= (MPC52xx_IRQ_L1_MAIN << MPC52xx_IRQ_L1_OFFSET);
+	} else if (status & 0x20000000) {	/* peripheral */
+	      peripheral:
+		irq = (status >> 24) & 0x1f;
+		if (irq == 0) {	/* bestcomm */
+			status = in_be32(&sdma->IntPend);
+			irq = ffs(status) - 1;
+			irq |= (MPC52xx_IRQ_L1_SDMA << MPC52xx_IRQ_L1_OFFSET);
+		} else {
+			irq |= (MPC52xx_IRQ_L1_PERP << MPC52xx_IRQ_L1_OFFSET);
+		}
+	} else {
+		return 0;
+	}
+
+	return irq_linear_revmap(mpc52xx_irqhost, irq);
+}
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pm.c b/arch/powerpc/platforms/52xx/mpc52xx_pm.c
new file mode 100644
index 0000000000..f0c31ae15d
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pm.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/suspend.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+
+#include <asm/time.h>
+#include <asm/cacheflush.h>
+#include <asm/mpc52xx.h>
+
+/* these are defined in mpc52xx_sleep.S, and only used here */
+extern void mpc52xx_deep_sleep(void __iomem *sram, void __iomem *sdram_regs,
+		struct mpc52xx_cdm __iomem *, struct mpc52xx_intr __iomem*);
+extern void mpc52xx_ds_sram(void);
+extern const long mpc52xx_ds_sram_size;
+extern void mpc52xx_ds_cached(void);
+extern const long mpc52xx_ds_cached_size;
+
+static void __iomem *mbar;
+static void __iomem *sdram;
+static struct mpc52xx_cdm __iomem *cdm;
+static struct mpc52xx_intr __iomem *intr;
+static struct mpc52xx_gpio_wkup __iomem *gpiow;
+static void __iomem *sram;
+static int sram_size;
+
+struct mpc52xx_suspend mpc52xx_suspend;
+
+static int mpc52xx_pm_valid(suspend_state_t state)
+{
+	switch (state) {
+	case PM_SUSPEND_STANDBY:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+int mpc52xx_set_wakeup_gpio(u8 pin, u8 level)
+{
+	u16 tmp;
+
+	/* enable gpio */
+	out_8(&gpiow->wkup_gpioe, in_8(&gpiow->wkup_gpioe) | (1 << pin));
+	/* set as input */
+	out_8(&gpiow->wkup_ddr, in_8(&gpiow->wkup_ddr) & ~(1 << pin));
+	/* enable deep sleep interrupt */
+	out_8(&gpiow->wkup_inten, in_8(&gpiow->wkup_inten) | (1 << pin));
+	/* low/high level creates wakeup interrupt */
+	tmp = in_be16(&gpiow->wkup_itype);
+	tmp &= ~(0x3 << (pin * 2));
+	tmp |= (!level + 1) << (pin * 2);
+	out_be16(&gpiow->wkup_itype, tmp);
+	/* master enable */
+	out_8(&gpiow->wkup_maste, 1);
+
+	return 0;
+}
+
+int mpc52xx_pm_prepare(void)
+{
+	struct device_node *np;
+	static const struct of_device_id immr_ids[] = {
+		{ .compatible = "fsl,mpc5200-immr", },
+		{ .compatible = "fsl,mpc5200b-immr", },
+		{ .type = "soc", .compatible = "mpc5200", }, /* lite5200 */
+		{ .type = "builtin", .compatible = "mpc5200", }, /* efika */
+		{}
+	};
+	struct resource res;
+
+	/* map the whole register space */
+	np = of_find_matching_node(NULL, immr_ids);
+
+	if (of_address_to_resource(np, 0, &res)) {
+		pr_err("mpc52xx_pm_prepare(): could not get IMMR address\n");
+		of_node_put(np);
+		return -ENOSYS;
+	}
+
+	mbar = ioremap(res.start, 0xc000); /* we should map whole region including SRAM */
+
+	of_node_put(np);
+	if (!mbar) {
+		pr_err("mpc52xx_pm_prepare(): could not map registers\n");
+		return -ENOSYS;
+	}
+	/* these offsets are from mpc5200 users manual */
+	sdram	= mbar + 0x100;
+	cdm	= mbar + 0x200;
+	intr	= mbar + 0x500;
+	gpiow	= mbar + 0xc00;
+	sram	= mbar + 0x8000;	/* Those will be handled by the */
+	sram_size = 0x4000;		/* bestcomm driver soon */
+
+	/* call board suspend code, if applicable */
+	if (mpc52xx_suspend.board_suspend_prepare)
+		mpc52xx_suspend.board_suspend_prepare(mbar);
+	else {
+		printk(KERN_ALERT "%s: %i don't know how to wake up the board\n",
+				__func__, __LINE__);
+		goto out_unmap;
+	}
+
+	return 0;
+
+ out_unmap:
+	iounmap(mbar);
+	return -ENOSYS;
+}
+
+
+char saved_sram[0x4000];
+
+int mpc52xx_pm_enter(suspend_state_t state)
+{
+	u32 clk_enables;
+	u32 msr, hid0;
+	u32 intr_main_mask;
+	void __iomem * irq_0x500 = (void __iomem *)CONFIG_KERNEL_START + 0x500;
+	unsigned long irq_0x500_stop = (unsigned long)irq_0x500 + mpc52xx_ds_cached_size;
+	char saved_0x500[0x600-0x500];
+
+	if (WARN_ON(mpc52xx_ds_cached_size > sizeof(saved_0x500)))
+		return -ENOMEM;
+
+	/* disable all interrupts in PIC */
+	intr_main_mask = in_be32(&intr->main_mask);
+	out_be32(&intr->main_mask, intr_main_mask | 0x1ffff);
+
+	/* don't let DEC expire any time soon */
+	mtspr(SPRN_DEC, 0x7fffffff);
+
+	/* save SRAM */
+	memcpy(saved_sram, sram, sram_size);
+
+	/* copy low level suspend code to sram */
+	memcpy(sram, mpc52xx_ds_sram, mpc52xx_ds_sram_size);
+
+	out_8(&cdm->ccs_sleep_enable, 1);
+	out_8(&cdm->osc_sleep_enable, 1);
+	out_8(&cdm->ccs_qreq_test, 1);
+
+	/* disable all but SDRAM and bestcomm (SRAM) clocks */
+	clk_enables = in_be32(&cdm->clk_enables);
+	out_be32(&cdm->clk_enables, clk_enables & 0x00088000);
+
+	/* disable power management */
+	msr = mfmsr();
+	mtmsr(msr & ~MSR_POW);
+
+	/* enable sleep mode, disable others */
+	hid0 = mfspr(SPRN_HID0);
+	mtspr(SPRN_HID0, (hid0 & ~(HID0_DOZE | HID0_NAP | HID0_DPM)) | HID0_SLEEP);
+
+	/* save original, copy our irq handler, flush from dcache and invalidate icache */
+	memcpy(saved_0x500, irq_0x500, mpc52xx_ds_cached_size);
+	memcpy(irq_0x500, mpc52xx_ds_cached, mpc52xx_ds_cached_size);
+	flush_icache_range((unsigned long)irq_0x500, irq_0x500_stop);
+
+	/* call low-level sleep code */
+	mpc52xx_deep_sleep(sram, sdram, cdm, intr);
+
+	/* restore original irq handler */
+	memcpy(irq_0x500, saved_0x500, mpc52xx_ds_cached_size);
+	flush_icache_range((unsigned long)irq_0x500, irq_0x500_stop);
+
+	/* restore old power mode */
+	mtmsr(msr & ~MSR_POW);
+	mtspr(SPRN_HID0, hid0);
+	mtmsr(msr);
+
+	out_be32(&cdm->clk_enables, clk_enables);
+	out_8(&cdm->ccs_sleep_enable, 0);
+	out_8(&cdm->osc_sleep_enable, 0);
+
+	/* restore SRAM */
+	memcpy(sram, saved_sram, sram_size);
+
+	/* reenable interrupts in PIC */
+	out_be32(&intr->main_mask, intr_main_mask);
+
+	return 0;
+}
+
+void mpc52xx_pm_finish(void)
+{
+	/* call board resume code */
+	if (mpc52xx_suspend.board_resume_finish)
+		mpc52xx_suspend.board_resume_finish(mbar);
+
+	iounmap(mbar);
+}
+
+static const struct platform_suspend_ops mpc52xx_pm_ops = {
+	.valid		= mpc52xx_pm_valid,
+	.prepare	= mpc52xx_pm_prepare,
+	.enter		= mpc52xx_pm_enter,
+	.finish		= mpc52xx_pm_finish,
+};
+
+int __init mpc52xx_pm_init(void)
+{
+	suspend_set_ops(&mpc52xx_pm_ops);
+	return 0;
+}
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_sleep.S b/arch/powerpc/platforms/52xx/mpc52xx_sleep.S
new file mode 100644
index 0000000000..a66eb311b6
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_sleep.S
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/reg.h>
+#include <asm/ppc_asm.h>
+#include <asm/processor.h>
+
+
+.text
+
+_GLOBAL(mpc52xx_deep_sleep)
+mpc52xx_deep_sleep: /* args r3-r6: SRAM, SDRAM regs, CDM regs, INTR regs */
+
+	/* enable interrupts */
+	mfmsr	r7
+	ori	r7, r7, 0x8000 /* EE */
+	mtmsr	r7
+	sync; isync;
+
+	li	r10, 0 /* flag that irq handler sets */
+
+	/* enable tmr7 (or any other) interrupt */
+	lwz	r8, 0x14(r6) /* intr->main_mask */
+	ori	r8, r8, 0x1
+	xori	r8, r8, 0x1
+	stw	r8, 0x14(r6)
+	sync
+
+	/* emulate tmr7 interrupt */
+	li	r8, 0x1
+	stw	r8, 0x40(r6) /* intr->main_emulate */
+	sync
+
+	/* wait for it to happen */
+1:
+	cmpi	cr0, r10, 1
+	bne	cr0, 1b
+
+	/* lock icache */
+	mfspr	r10, SPRN_HID0
+	ori	r10, r10, 0x2000
+	sync; isync;
+	mtspr	SPRN_HID0, r10
+	sync; isync;
+
+
+	mflr	r9 /* save LR */
+
+	/* jump to sram */
+	mtlr	r3
+	blrl
+
+	mtlr	r9 /* restore LR */
+
+	/* unlock icache */
+	mfspr	r10, SPRN_HID0
+	ori	r10, r10, 0x2000
+	xori	r10, r10, 0x2000
+	sync; isync;
+	mtspr	SPRN_HID0, r10
+	sync; isync;
+
+
+	/* return to C code */
+	blr
+
+
+_GLOBAL(mpc52xx_ds_sram)
+mpc52xx_ds_sram:
+	/* put SDRAM into self-refresh */
+	lwz	r8, 0x4(r4)	/* sdram->ctrl */
+
+	oris	r8, r8, 0x8000 /* mode_en */
+	stw	r8, 0x4(r4)
+	sync
+
+	ori	r8, r8, 0x0002 /* soft_pre */
+	stw	r8, 0x4(r4)
+	sync
+	xori	r8, r8, 0x0002
+
+	xoris	r8, r8, 0x8000 /* !mode_en */
+	stw	r8, 0x4(r4)
+	sync
+
+	oris	r8, r8, 0x5000
+	xoris	r8, r8, 0x4000 /* ref_en !cke */
+	stw	r8, 0x4(r4)
+	sync
+
+	/* disable SDRAM clock */
+	lwz	r8, 0x14(r5) /* cdm->clkenable */
+	ori	r8, r8, 0x0008
+	xori	r8, r8, 0x0008
+	stw	r8, 0x14(r5)
+	sync
+
+
+	/* put mpc5200 to sleep */
+	mfmsr	r10
+	oris	r10, r10, 0x0004	/* POW = 1 */
+	sync; isync;
+	mtmsr	r10
+	sync; isync;
+
+
+	/* enable clock */
+	lwz	r8, 0x14(r5)
+	ori	r8, r8, 0x0008
+	stw	r8, 0x14(r5)
+	sync
+
+	/* get ram out of self-refresh */
+	lwz	r8, 0x4(r4)
+	oris	r8, r8, 0x5000 /* cke ref_en */
+	stw	r8, 0x4(r4)
+	sync
+
+	blr
+_GLOBAL(mpc52xx_ds_sram_size)
+mpc52xx_ds_sram_size:
+	.long $-mpc52xx_ds_sram
+
+
+/* ### interrupt handler for wakeup from deep-sleep ### */
+_GLOBAL(mpc52xx_ds_cached)
+mpc52xx_ds_cached:
+	mtspr	SPRN_SPRG0, r7
+	mtspr	SPRN_SPRG1, r8
+
+	/* disable emulated interrupt */
+	mfspr	r7, 311 /* MBAR */
+	addi	r7, r7, 0x540	/* intr->main_emul */
+	li	r8, 0
+	stw	r8, 0(r7)
+	sync
+	dcbf	0, r7
+
+	/* acknowledge wakeup, so CCS releases power pown */
+	mfspr	r7, 311	/* MBAR */
+	addi	r7, r7, 0x524	/* intr->enc_status */
+	lwz	r8, 0(r7)
+	ori	r8, r8, 0x0400
+	stw	r8, 0(r7)
+	sync
+	dcbf	0, r7
+
+	/* flag - we handled the interrupt */
+	li	r10, 1
+
+	mfspr	r8, SPRN_SPRG1
+	mfspr	r7, SPRN_SPRG0
+
+	rfi
+_GLOBAL(mpc52xx_ds_cached_size)
+mpc52xx_ds_cached_size:
+	.long $-mpc52xx_ds_cached
diff --git a/arch/powerpc/platforms/82xx/Kconfig b/arch/powerpc/platforms/82xx/Kconfig
new file mode 100644
index 0000000000..1824536cf6
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/Kconfig
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+menuconfig PPC_82xx
+	bool "82xx-based boards (PQ II)"
+	depends on PPC_BOOK3S_32
+	select FSL_SOC
+
+if PPC_82xx
+
+config EP8248E
+	bool "Embedded Planet EP8248E (a.k.a. CWH-PPC-8248N-VE)"
+	select CPM2
+	select PPC_INDIRECT_PCI if PCI
+	select PHYLIB if NETDEVICES
+	select MDIO_BITBANG if PHYLIB
+	help
+	  This enables support for the Embedded Planet EP8248E board.
+
+	  This board is also resold by Freescale as the QUICCStart
+	  MPC8248 Evaluation System and/or the CWH-PPC-8248N-VE.
+
+config MGCOGE
+	bool "Keymile MGCOGE"
+	select CPM2
+	select PPC_INDIRECT_PCI if PCI
+	help
+	  This enables support for the Keymile MGCOGE board.
+
+endif
diff --git a/arch/powerpc/platforms/82xx/Makefile b/arch/powerpc/platforms/82xx/Makefile
new file mode 100644
index 0000000000..4fa43a5cd5
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the PowerPC 82xx linux kernel.
+#
+obj-$(CONFIG_CPM2) += pq2.o
+obj-$(CONFIG_EP8248E) += ep8248e.o
+obj-$(CONFIG_MGCOGE) += km82xx.o
diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c
new file mode 100644
index 0000000000..3dc65ce1f1
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/ep8248e.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Embedded Planet EP8248E support
+ *
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ * Author: Scott Wood <scottwood@freescale.com>
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/fsl_devices.h>
+#include <linux/mdio-bitbang.h>
+#include <linux/of_mdio.h>
+#include <linux/slab.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+#include <asm/cpm2.h>
+#include <asm/udbg.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/cpm2_pic.h>
+
+#include "pq2.h"
+
+static u8 __iomem *ep8248e_bcsr;
+static struct device_node *ep8248e_bcsr_node;
+
+#define BCSR7_SCC2_ENABLE 0x10
+
+#define BCSR8_PHY1_ENABLE 0x80
+#define BCSR8_PHY1_POWER  0x40
+#define BCSR8_PHY2_ENABLE 0x20
+#define BCSR8_PHY2_POWER  0x10
+#define BCSR8_MDIO_READ   0x04
+#define BCSR8_MDIO_CLOCK  0x02
+#define BCSR8_MDIO_DATA   0x01
+
+#define BCSR9_USB_ENABLE  0x80
+#define BCSR9_USB_POWER   0x40
+#define BCSR9_USB_HOST    0x20
+#define BCSR9_USB_FULL_SPEED_TARGET 0x10
+
+static void __init ep8248e_pic_init(void)
+{
+	struct device_node *np = of_find_compatible_node(NULL, NULL, "fsl,pq2-pic");
+	if (!np) {
+		printk(KERN_ERR "PIC init: can not find cpm-pic node\n");
+		return;
+	}
+
+	cpm2_pic_init(np);
+	of_node_put(np);
+}
+
+static void ep8248e_set_mdc(struct mdiobb_ctrl *ctrl, int level)
+{
+	if (level)
+		setbits8(&ep8248e_bcsr[8], BCSR8_MDIO_CLOCK);
+	else
+		clrbits8(&ep8248e_bcsr[8], BCSR8_MDIO_CLOCK);
+
+	/* Read back to flush the write. */
+	in_8(&ep8248e_bcsr[8]);
+}
+
+static void ep8248e_set_mdio_dir(struct mdiobb_ctrl *ctrl, int output)
+{
+	if (output)
+		clrbits8(&ep8248e_bcsr[8], BCSR8_MDIO_READ);
+	else
+		setbits8(&ep8248e_bcsr[8], BCSR8_MDIO_READ);
+
+	/* Read back to flush the write. */
+	in_8(&ep8248e_bcsr[8]);
+}
+
+static void ep8248e_set_mdio_data(struct mdiobb_ctrl *ctrl, int data)
+{
+	if (data)
+		setbits8(&ep8248e_bcsr[8], BCSR8_MDIO_DATA);
+	else
+		clrbits8(&ep8248e_bcsr[8], BCSR8_MDIO_DATA);
+
+	/* Read back to flush the write. */
+	in_8(&ep8248e_bcsr[8]);
+}
+
+static int ep8248e_get_mdio_data(struct mdiobb_ctrl *ctrl)
+{
+	return in_8(&ep8248e_bcsr[8]) & BCSR8_MDIO_DATA;
+}
+
+static const struct mdiobb_ops ep8248e_mdio_ops = {
+	.set_mdc = ep8248e_set_mdc,
+	.set_mdio_dir = ep8248e_set_mdio_dir,
+	.set_mdio_data = ep8248e_set_mdio_data,
+	.get_mdio_data = ep8248e_get_mdio_data,
+	.owner = THIS_MODULE,
+};
+
+static struct mdiobb_ctrl ep8248e_mdio_ctrl = {
+	.ops = &ep8248e_mdio_ops,
+};
+
+static int ep8248e_mdio_probe(struct platform_device *ofdev)
+{
+	struct mii_bus *bus;
+	struct resource res;
+	struct device_node *node;
+	int ret;
+
+	node = of_get_parent(ofdev->dev.of_node);
+	of_node_put(node);
+	if (node != ep8248e_bcsr_node)
+		return -ENODEV;
+
+	ret = of_address_to_resource(ofdev->dev.of_node, 0, &res);
+	if (ret)
+		return ret;
+
+	bus = alloc_mdio_bitbang(&ep8248e_mdio_ctrl);
+	if (!bus)
+		return -ENOMEM;
+
+	bus->name = "ep8248e-mdio-bitbang";
+	bus->parent = &ofdev->dev;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%x", res.start);
+
+	ret = of_mdiobus_register(bus, ofdev->dev.of_node);
+	if (ret)
+		goto err_free_bus;
+
+	return 0;
+err_free_bus:
+	free_mdio_bitbang(bus);
+	return ret;
+}
+
+static const struct of_device_id ep8248e_mdio_match[] = {
+	{
+		.compatible = "fsl,ep8248e-mdio-bitbang",
+	},
+	{},
+};
+
+static struct platform_driver ep8248e_mdio_driver = {
+	.driver = {
+		.name = "ep8248e-mdio-bitbang",
+		.of_match_table = ep8248e_mdio_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = ep8248e_mdio_probe,
+};
+
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static __initdata struct cpm_pin ep8248e_pins[] = {
+	/* SMC1 */
+	{2, 4, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 5, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+
+	/* SCC1 */
+	{2, 14, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{3, 29, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 30, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{3, 31, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* FCC1 */
+	{0, 14, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 18, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 19, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 26, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 27, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 28, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{0, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{0, 30, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 31, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{2, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* FCC2 */
+	{1, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 20, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 22, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 25, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 26, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 27, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{1, 30, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 31, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{2, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* I2C */
+	{4, 14, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{4, 15, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+
+	/* USB */
+	{2, 10, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 11, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{2, 24, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{3, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 25, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+};
+
+static void __init init_ioports(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ep8248e_pins); i++) {
+		const struct cpm_pin *pin = &ep8248e_pins[i];
+		cpm2_set_pin(pin->port, pin->pin, pin->flags);
+	}
+
+	cpm2_smc_clk_setup(CPM_CLK_SMC1, CPM_BRG7);
+	cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_SCC3, CPM_CLK8, CPM_CLK_TX); /* USB */
+	cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK11, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK10, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK13, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK14, CPM_CLK_TX);
+}
+
+static void __init ep8248e_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("ep8248e_setup_arch()", 0);
+
+	cpm2_reset();
+
+	/* When this is set, snooping CPM DMA from RAM causes
+	 * machine checks.  See erratum SIU18.
+	 */
+	clrbits32(&cpm2_immr->im_siu_conf.siu_82xx.sc_bcr, MPC82XX_BCR_PLDP);
+
+	ep8248e_bcsr_node =
+		of_find_compatible_node(NULL, NULL, "fsl,ep8248e-bcsr");
+	if (!ep8248e_bcsr_node) {
+		printk(KERN_ERR "No bcsr in device tree\n");
+		return;
+	}
+
+	ep8248e_bcsr = of_iomap(ep8248e_bcsr_node, 0);
+	if (!ep8248e_bcsr) {
+		printk(KERN_ERR "Cannot map BCSR registers\n");
+		of_node_put(ep8248e_bcsr_node);
+		ep8248e_bcsr_node = NULL;
+		return;
+	}
+
+	setbits8(&ep8248e_bcsr[7], BCSR7_SCC2_ENABLE);
+	setbits8(&ep8248e_bcsr[8], BCSR8_PHY1_ENABLE | BCSR8_PHY1_POWER |
+	                           BCSR8_PHY2_ENABLE | BCSR8_PHY2_POWER);
+
+	init_ioports();
+
+	if (ppc_md.progress)
+		ppc_md.progress("ep8248e_setup_arch(), finish", 0);
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .compatible = "simple-bus", },
+	{ .compatible = "fsl,ep8248e-bcsr", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+	if (IS_ENABLED(CONFIG_MDIO_BITBANG))
+		platform_driver_register(&ep8248e_mdio_driver);
+
+	return 0;
+}
+machine_device_initcall(ep8248e, declare_of_platform_devices);
+
+define_machine(ep8248e)
+{
+	.name = "Embedded Planet EP8248E",
+	.compatible = "fsl,ep8248e",
+	.setup_arch = ep8248e_setup_arch,
+	.init_IRQ = ep8248e_pic_init,
+	.get_irq = cpm2_get_irq,
+	.restart = pq2_restart,
+	.progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c
new file mode 100644
index 0000000000..c86da3f2b7
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/km82xx.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Keymile km82xx support
+ * Copyright 2008-2011 DENX Software Engineering GmbH
+ * Author: Heiko Schocher <hs@denx.de>
+ *
+ * based on code from:
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ * Author: Scott Wood <scottwood@freescale.com>
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/fsl_devices.h>
+#include <linux/of_platform.h>
+
+#include <linux/io.h>
+#include <asm/cpm2.h>
+#include <asm/udbg.h>
+#include <asm/machdep.h>
+#include <linux/time.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/cpm2_pic.h>
+
+#include "pq2.h"
+
+static void __init km82xx_pic_init(void)
+{
+	struct device_node *np = of_find_compatible_node(NULL, NULL,
+							"fsl,pq2-pic");
+	if (!np) {
+		pr_err("PIC init: can not find cpm-pic node\n");
+		return;
+	}
+
+	cpm2_pic_init(np);
+	of_node_put(np);
+}
+
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static __initdata struct cpm_pin km82xx_pins[] = {
+	/* SMC1 */
+	{2, 4, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 5, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+
+	/* SMC2 */
+	{0, 8, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 9, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+
+	/* SCC1 */
+	{2, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{3, 31, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{3, 30, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+
+	/* SCC4 */
+	{2, 25, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 24, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2,  9, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2,  8, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{3, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{3, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+
+	/* FCC1 */
+	{0, 14, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 18, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 19, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 26, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 27, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 28, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{0, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{0, 30, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 31, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+
+	{2, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 23, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* FCC2 */
+	{1, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 20, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 22, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 25, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 26, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 27, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{1, 30, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 31, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+
+	{2, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* MDC */
+	{0, 13, CPM_PIN_OUTPUT | CPM_PIN_GPIO},
+
+#if defined(CONFIG_I2C_CPM)
+	/* I2C */
+	{3, 14, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_OPENDRAIN},
+	{3, 15, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_OPENDRAIN},
+#endif
+
+	/* USB */
+	{0, 10, CPM_PIN_OUTPUT | CPM_PIN_GPIO},    /* FULL_SPEED */
+	{0, 11, CPM_PIN_OUTPUT | CPM_PIN_GPIO},    /*/SLAVE */
+	{2, 10, CPM_PIN_INPUT  | CPM_PIN_PRIMARY}, /* RXN */
+	{2, 11, CPM_PIN_INPUT  | CPM_PIN_PRIMARY}, /* RXP */
+	{2, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, /* /OE */
+	{2, 27, CPM_PIN_INPUT  | CPM_PIN_PRIMARY}, /* RXCLK */
+	{3, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, /* TXP */
+	{3, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, /* TXN */
+	{3, 25, CPM_PIN_INPUT  | CPM_PIN_PRIMARY}, /* RXD */
+
+	/* SPI */
+	{3, 16, CPM_PIN_INPUT | CPM_PIN_SECONDARY},/* SPI_MISO PD16 */
+	{3, 17, CPM_PIN_INPUT | CPM_PIN_SECONDARY},/* SPI_MOSI PD17 */
+	{3, 18, CPM_PIN_INPUT | CPM_PIN_SECONDARY},/* SPI_CLK PD18 */
+};
+
+static void __init init_ioports(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(km82xx_pins); i++) {
+		const struct cpm_pin *pin = &km82xx_pins[i];
+		cpm2_set_pin(pin->port, pin->pin, pin->flags);
+	}
+
+	cpm2_smc_clk_setup(CPM_CLK_SMC2, CPM_BRG8);
+	cpm2_smc_clk_setup(CPM_CLK_SMC1, CPM_BRG7);
+	cpm2_clk_setup(CPM_CLK_SCC1, CPM_CLK11, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_SCC1, CPM_CLK11, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_SCC3, CPM_CLK5, CPM_CLK_RTX);
+	cpm2_clk_setup(CPM_CLK_SCC4, CPM_CLK7, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_SCC4, CPM_CLK8, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK10, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK9,  CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK13, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK14, CPM_CLK_TX);
+
+	/* Force USB FULL SPEED bit to '1' */
+	setbits32(&cpm2_immr->im_ioport.iop_pdata, 1 << (31 - 10));
+	/* clear USB_SLAVE */
+	clrbits32(&cpm2_immr->im_ioport.iop_pdata, 1 << (31 - 11));
+}
+
+static void __init km82xx_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("km82xx_setup_arch()", 0);
+
+	cpm2_reset();
+
+	/* When this is set, snooping CPM DMA from RAM causes
+	 * machine checks.  See erratum SIU18.
+	 */
+	clrbits32(&cpm2_immr->im_siu_conf.siu_82xx.sc_bcr, MPC82XX_BCR_PLDP);
+
+	init_ioports();
+
+	if (ppc_md.progress)
+		ppc_md.progress("km82xx_setup_arch(), finish", 0);
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .compatible = "simple-bus", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+	return 0;
+}
+machine_device_initcall(km82xx, declare_of_platform_devices);
+
+define_machine(km82xx)
+{
+	.name = "Keymile km82xx",
+	.compatible = "keymile,km82xx",
+	.setup_arch = km82xx_setup_arch,
+	.init_IRQ = km82xx_pic_init,
+	.get_irq = cpm2_get_irq,
+	.restart = pq2_restart,
+	.progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/82xx/pq2.c b/arch/powerpc/platforms/82xx/pq2.c
new file mode 100644
index 0000000000..391d72a2e0
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/pq2.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Common PowerQUICC II code.
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ * Copyright (c) 2007 Freescale Semiconductor
+ *
+ * Based on code by Vitaly Bordug <vbordug@ru.mvista.com>
+ * pq2_restart fix by Wade Farnsworth <wfarnsworth@mvista.com>
+ * Copyright (c) 2006 MontaVista Software, Inc.
+ */
+
+#include <linux/kprobes.h>
+
+#include <asm/cpm2.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+
+#include <platforms/82xx/pq2.h>
+
+#define RMR_CSRE 0x00000001
+
+void __noreturn pq2_restart(char *cmd)
+{
+	local_irq_disable();
+	setbits32(&cpm2_immr->im_clkrst.car_rmr, RMR_CSRE);
+
+	/* Clear the ME,EE,IR & DR bits in MSR to cause checkstop */
+	mtmsr(mfmsr() & ~(MSR_ME | MSR_EE | MSR_IR | MSR_DR));
+	in_8(&cpm2_immr->im_clkrst.res[0]);
+
+	panic("Restart failed\n");
+}
+NOKPROBE_SYMBOL(pq2_restart)
diff --git a/arch/powerpc/platforms/82xx/pq2.h b/arch/powerpc/platforms/82xx/pq2.h
new file mode 100644
index 0000000000..902ef0bd49
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/pq2.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PQ2_H
+#define _PQ2_H
+
+void __noreturn pq2_restart(char *cmd);
+
+#ifdef CONFIG_PCI
+int pq2ads_pci_init_irq(void);
+void pq2_init_pci(void);
+#else
+static inline int pq2ads_pci_init_irq(void)
+{
+	return 0;
+}
+
+static inline void pq2_init_pci(void)
+{
+}
+#endif
+
+#endif
diff --git a/arch/powerpc/platforms/83xx/Kconfig b/arch/powerpc/platforms/83xx/Kconfig
new file mode 100644
index 0000000000..d355ad4099
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/Kconfig
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: GPL-2.0
+menuconfig PPC_83xx
+	bool "83xx-based boards"
+	depends on PPC_BOOK3S_32
+	select PPC_UDBG_16550
+	select HAVE_PCI
+	select FSL_PCI if PCI
+	select FSL_SOC
+	select IPIC
+
+if PPC_83xx
+
+config MPC830x_RDB
+	bool "Freescale MPC830x RDB and derivatives"
+	select DEFAULT_UIMAGE
+	select PPC_MPC831x
+	select FSL_GTM
+	help
+	  This option enables support for the MPC8308 RDB and MPC8308 P1M boards.
+
+config MPC831x_RDB
+	bool "Freescale MPC831x RDB"
+	select DEFAULT_UIMAGE
+	select PPC_MPC831x
+	help
+	  This option enables support for the MPC8313 RDB and MPC8315 RDB boards.
+
+config MPC832x_RDB
+	bool "Freescale MPC832x RDB"
+	select DEFAULT_UIMAGE
+	select PPC_MPC832x
+	help
+	  This option enables support for the MPC8323 RDB board.
+
+config MPC834x_ITX
+	bool "Freescale MPC834x ITX"
+	select DEFAULT_UIMAGE
+	select PPC_MPC834x
+	help
+	  This option enables support for the MPC 834x ITX evaluation board.
+
+	  Be aware that PCI initialization is the bootloader's
+	  responsibility.
+
+config MPC836x_RDK
+	bool "Freescale/Logic MPC836x RDK"
+	select DEFAULT_UIMAGE
+	select FSL_GTM
+	select FSL_LBC
+	help
+	  This option enables support for the MPC836x RDK Processor Board,
+	  also known as ZOOM PowerQUICC Kit.
+
+config MPC837x_RDB
+	bool "Freescale MPC837x RDB/WLAN"
+	select DEFAULT_UIMAGE
+	select PPC_MPC837x
+	help
+	  This option enables support for the MPC837x RDB and WLAN Boards.
+
+config ASP834x
+	bool "Analogue & Micro ASP 834x"
+	select PPC_MPC834x
+	help
+	  This enables support for the Analogue & Micro ASP 83xx
+	  board.
+
+config KMETER1
+	bool "Keymile KMETER1"
+	select DEFAULT_UIMAGE
+	select QUICC_ENGINE
+	help
+	  This enables support for the Keymile KMETER1 board.
+
+
+endif
+
+# used for usb & gpio
+config PPC_MPC831x
+	bool
+
+# used for math-emu
+config PPC_MPC832x
+	bool
+
+# used for usb & gpio
+config PPC_MPC834x
+	bool
+
+# used for usb & gpio
+config PPC_MPC837x
+	bool
diff --git a/arch/powerpc/platforms/83xx/Makefile b/arch/powerpc/platforms/83xx/Makefile
new file mode 100644
index 0000000000..6fc3dba943
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the PowerPC 83xx linux kernel.
+#
+obj-y				:= misc.o
+obj-$(CONFIG_SUSPEND)		+= suspend.o suspend-asm.o
+obj-$(CONFIG_MCU_MPC8349EMITX)	+= mcu_mpc8349emitx.o
+obj-$(CONFIG_MPC830x_RDB)	+= mpc830x_rdb.o
+obj-$(CONFIG_MPC831x_RDB)	+= mpc831x_rdb.o
+obj-$(CONFIG_MPC832x_RDB)	+= mpc832x_rdb.o
+obj-$(CONFIG_MPC834x_ITX)	+= mpc834x_itx.o
+obj-$(CONFIG_MPC836x_RDK)	+= mpc836x_rdk.o
+obj-$(CONFIG_MPC837x_RDB)	+= mpc837x_rdb.o
+obj-$(CONFIG_ASP834x)		+= asp834x.o
+obj-$(CONFIG_KMETER1)		+= km83xx.o
+obj-$(CONFIG_PPC_MPC831x)	+= usb_831x.o
+obj-$(CONFIG_PPC_MPC834x)	+= usb_834x.o
+obj-$(CONFIG_PPC_MPC837x)	+= usb_837x.o
diff --git a/arch/powerpc/platforms/83xx/asp834x.c b/arch/powerpc/platforms/83xx/asp834x.c
new file mode 100644
index 0000000000..6870d0c34f
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/asp834x.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/83xx/asp834x.c
+ *
+ * Analogue & Micro ASP8347 board specific routines
+ * clone of mpc834x_itx
+ *
+ * Copyright 2008 Codehermit
+ *
+ * Maintainer: Bryan O'Donoghue <bodonoghue@codhermit.ie>
+ */
+
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+
+#include "mpc83xx.h"
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init asp834x_setup_arch(void)
+{
+	mpc83xx_setup_arch();
+	mpc834x_usb_cfg();
+}
+
+machine_device_initcall(asp834x, mpc83xx_declare_of_platform_devices);
+
+define_machine(asp834x) {
+	.name			= "ASP8347E",
+	.compatible		= "analogue-and-micro,asp8347e",
+	.setup_arch		= asp834x_setup_arch,
+	.discover_phbs		= mpc83xx_setup_pci,
+	.init_IRQ		= mpc83xx_ipic_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.restart		= mpc83xx_restart,
+	.time_init		= mpc83xx_time_init,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c
new file mode 100644
index 0000000000..2b5d187d9b
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/km83xx.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2008-2011 DENX Software Engineering GmbH
+ * Author: Heiko Schocher <hs@denx.de>
+ *
+ * Description:
+ * Keymile 83xx platform specific routines.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/initrd.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <linux/atomic.h>
+#include <linux/time.h>
+#include <linux/io.h>
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/irq.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include <soc/fsl/qe/qe.h>
+
+#include "mpc83xx.h"
+
+#define SVR_REV(svr)    (((svr) >>  0) & 0xFFFF) /* Revision field */
+
+static void __init quirk_mpc8360e_qe_enet10(void)
+{
+	/*
+	 * handle mpc8360E Erratum QE_ENET10:
+	 * RGMII AC values do not meet the specification
+	 */
+	uint svid = mfspr(SPRN_SVR);
+	struct	device_node *np_par;
+	struct	resource res;
+	void	__iomem *base;
+	int	ret;
+
+	np_par = of_find_node_by_name(NULL, "par_io");
+	if (np_par == NULL) {
+		pr_warn("%s couldn't find par_io node\n", __func__);
+		return;
+	}
+	/* Map Parallel I/O ports registers */
+	ret = of_address_to_resource(np_par, 0, &res);
+	if (ret) {
+		pr_warn("%s couldn't map par_io registers\n", __func__);
+		goto out;
+	}
+
+	base = ioremap(res.start, resource_size(&res));
+	if (!base)
+		goto out;
+
+	/*
+	 * set output delay adjustments to default values according
+	 * table 5 in Errata Rev. 5, 9/2011:
+	 *
+	 * write 0b01 to UCC1 bits 18:19
+	 * write 0b01 to UCC2 option 1 bits 4:5
+	 * write 0b01 to UCC2 option 2 bits 16:17
+	 */
+	clrsetbits_be32((base + 0xa8), 0x0c00f000, 0x04005000);
+
+	/*
+	 * set output delay adjustments to default values according
+	 * table 3-13 in Reference Manual Rev.3 05/2010:
+	 *
+	 * write 0b01 to UCC2 option 2 bits 16:17
+	 * write 0b0101 to UCC1 bits 20:23
+	 * write 0b0101 to UCC2 option 1 bits 24:27
+	 */
+	clrsetbits_be32((base + 0xac), 0x0000cff0, 0x00004550);
+
+	if (SVR_REV(svid) == 0x0021) {
+		/*
+		 * UCC2 option 1: write 0b1010 to bits 24:27
+		 * at address IMMRBAR+0x14AC
+		 */
+		clrsetbits_be32((base + 0xac), 0x000000f0, 0x000000a0);
+	} else if (SVR_REV(svid) == 0x0020) {
+		/*
+		 * UCC1: write 0b11 to bits 18:19
+		 * at address IMMRBAR+0x14A8
+		 */
+		setbits32((base + 0xa8), 0x00003000);
+
+		/*
+		 * UCC2 option 1: write 0b11 to bits 4:5
+		 * at address IMMRBAR+0x14A8
+		 */
+		setbits32((base + 0xa8), 0x0c000000);
+
+		/*
+		 * UCC2 option 2: write 0b11 to bits 16:17
+		 * at address IMMRBAR+0x14AC
+		 */
+		setbits32((base + 0xac), 0x0000c000);
+	}
+	iounmap(base);
+out:
+	of_node_put(np_par);
+}
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init mpc83xx_km_setup_arch(void)
+{
+#ifdef CONFIG_QUICC_ENGINE
+	struct device_node *np;
+#endif
+
+	mpc83xx_setup_arch();
+
+#ifdef CONFIG_QUICC_ENGINE
+	np = of_find_node_by_name(NULL, "par_io");
+	if (np != NULL) {
+		par_io_init(np);
+		of_node_put(np);
+
+		for_each_node_by_name(np, "spi")
+			par_io_of_config(np);
+
+		for_each_node_by_name(np, "ucc")
+			par_io_of_config(np);
+
+		/* Only apply this quirk when par_io is available */
+		np = of_find_compatible_node(NULL, "network", "ucc_geth");
+		if (np != NULL) {
+			quirk_mpc8360e_qe_enet10();
+			of_node_put(np);
+		}
+	}
+#endif	/* CONFIG_QUICC_ENGINE */
+}
+
+machine_device_initcall(mpc83xx_km, mpc83xx_declare_of_platform_devices);
+
+/* list of the supported boards */
+static char *board[] __initdata = {
+	"Keymile,KMETER1",
+	"Keymile,kmpbec8321",
+	NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc83xx_km_probe(void)
+{
+	int i = 0;
+
+	while (board[i]) {
+		if (of_machine_is_compatible(board[i]))
+			break;
+		i++;
+	}
+	return (board[i] != NULL);
+}
+
+define_machine(mpc83xx_km) {
+	.name		= "mpc83xx-km-platform",
+	.probe		= mpc83xx_km_probe,
+	.setup_arch	= mpc83xx_km_setup_arch,
+	.discover_phbs	= mpc83xx_setup_pci,
+	.init_IRQ	= mpc83xx_ipic_init_IRQ,
+	.get_irq	= ipic_get_irq,
+	.restart	= mpc83xx_restart,
+	.time_init	= mpc83xx_time_init,
+	.progress	= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
new file mode 100644
index 0000000000..4d8fa9ed1a
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Power Management and GPIO expander driver for MPC8349E-mITX-compatible MCU
+ *
+ * Copyright (c) 2008  MontaVista Software, Inc.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/i2c.h>
+#include <linux/gpio/driver.h>
+#include <linux/slab.h>
+#include <linux/kthread.h>
+#include <linux/property.h>
+#include <linux/reboot.h>
+#include <asm/machdep.h>
+
+/*
+ * I don't have specifications for the MCU firmware, I found this register
+ * and bits positions by the trial&error method.
+ */
+#define MCU_REG_CTRL	0x20
+#define MCU_CTRL_POFF	0x40
+#define MCU_CTRL_BTN	0x80
+
+#define MCU_NUM_GPIO	2
+
+struct mcu {
+	struct mutex lock;
+	struct i2c_client *client;
+	struct gpio_chip gc;
+	u8 reg_ctrl;
+};
+
+static struct mcu *glob_mcu;
+
+struct task_struct *shutdown_thread;
+static int shutdown_thread_fn(void *data)
+{
+	int ret;
+	struct mcu *mcu = glob_mcu;
+
+	while (!kthread_should_stop()) {
+		ret = i2c_smbus_read_byte_data(mcu->client, MCU_REG_CTRL);
+		if (ret < 0)
+			pr_err("MCU status reg read failed.\n");
+		mcu->reg_ctrl = ret;
+
+
+		if (mcu->reg_ctrl & MCU_CTRL_BTN) {
+			i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL,
+						  mcu->reg_ctrl & ~MCU_CTRL_BTN);
+
+			ctrl_alt_del();
+		}
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(HZ);
+	}
+
+	return 0;
+}
+
+static ssize_t show_status(struct device *d,
+			   struct device_attribute *attr, char *buf)
+{
+	int ret;
+	struct mcu *mcu = glob_mcu;
+
+	ret = i2c_smbus_read_byte_data(mcu->client, MCU_REG_CTRL);
+	if (ret < 0)
+		return -ENODEV;
+	mcu->reg_ctrl = ret;
+
+	return sprintf(buf, "%02x\n", ret);
+}
+static DEVICE_ATTR(status, 0444, show_status, NULL);
+
+static void mcu_power_off(void)
+{
+	struct mcu *mcu = glob_mcu;
+
+	pr_info("Sending power-off request to the MCU...\n");
+	mutex_lock(&mcu->lock);
+	i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL,
+				  mcu->reg_ctrl | MCU_CTRL_POFF);
+	mutex_unlock(&mcu->lock);
+}
+
+static void mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+	struct mcu *mcu = gpiochip_get_data(gc);
+	u8 bit = 1 << (4 + gpio);
+
+	mutex_lock(&mcu->lock);
+	if (val)
+		mcu->reg_ctrl &= ~bit;
+	else
+		mcu->reg_ctrl |= bit;
+
+	i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL, mcu->reg_ctrl);
+	mutex_unlock(&mcu->lock);
+}
+
+static int mcu_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+	mcu_gpio_set(gc, gpio, val);
+	return 0;
+}
+
+static int mcu_gpiochip_add(struct mcu *mcu)
+{
+	struct device *dev = &mcu->client->dev;
+	struct gpio_chip *gc = &mcu->gc;
+
+	gc->owner = THIS_MODULE;
+	gc->label = kasprintf(GFP_KERNEL, "%pfw", dev_fwnode(dev));
+	gc->can_sleep = 1;
+	gc->ngpio = MCU_NUM_GPIO;
+	gc->base = -1;
+	gc->set = mcu_gpio_set;
+	gc->direction_output = mcu_gpio_dir_out;
+	gc->parent = dev;
+
+	return gpiochip_add_data(gc, mcu);
+}
+
+static void mcu_gpiochip_remove(struct mcu *mcu)
+{
+	kfree(mcu->gc.label);
+	gpiochip_remove(&mcu->gc);
+}
+
+static int mcu_probe(struct i2c_client *client)
+{
+	struct mcu *mcu;
+	int ret;
+
+	mcu = kzalloc(sizeof(*mcu), GFP_KERNEL);
+	if (!mcu)
+		return -ENOMEM;
+
+	mutex_init(&mcu->lock);
+	mcu->client = client;
+	i2c_set_clientdata(client, mcu);
+
+	ret = i2c_smbus_read_byte_data(mcu->client, MCU_REG_CTRL);
+	if (ret < 0)
+		goto err;
+	mcu->reg_ctrl = ret;
+
+	ret = mcu_gpiochip_add(mcu);
+	if (ret)
+		goto err;
+
+	/* XXX: this is potentially racy, but there is no lock for pm_power_off */
+	if (!pm_power_off) {
+		glob_mcu = mcu;
+		pm_power_off = mcu_power_off;
+		dev_info(&client->dev, "will provide power-off service\n");
+	}
+
+	if (device_create_file(&client->dev, &dev_attr_status))
+		dev_err(&client->dev,
+			"couldn't create device file for status\n");
+
+	shutdown_thread = kthread_run(shutdown_thread_fn, NULL,
+				      "mcu-i2c-shdn");
+
+	return 0;
+err:
+	kfree(mcu);
+	return ret;
+}
+
+static void mcu_remove(struct i2c_client *client)
+{
+	struct mcu *mcu = i2c_get_clientdata(client);
+
+	kthread_stop(shutdown_thread);
+
+	device_remove_file(&client->dev, &dev_attr_status);
+
+	if (glob_mcu == mcu) {
+		pm_power_off = NULL;
+		glob_mcu = NULL;
+	}
+
+	mcu_gpiochip_remove(mcu);
+	kfree(mcu);
+}
+
+static const struct i2c_device_id mcu_ids[] = {
+	{ "mcu-mpc8349emitx", },
+	{},
+};
+MODULE_DEVICE_TABLE(i2c, mcu_ids);
+
+static const struct of_device_id mcu_of_match_table[] = {
+	{ .compatible = "fsl,mcu-mpc8349emitx", },
+	{ },
+};
+
+static struct i2c_driver mcu_driver = {
+	.driver = {
+		.name = "mcu-mpc8349emitx",
+		.of_match_table = mcu_of_match_table,
+	},
+	.probe = mcu_probe,
+	.remove	= mcu_remove,
+	.id_table = mcu_ids,
+};
+
+module_i2c_driver(mcu_driver);
+
+MODULE_DESCRIPTION("Power Management and GPIO expander driver for "
+		   "MPC8349E-mITX-compatible MCU");
+MODULE_AUTHOR("Anton Vorontsov <avorontsov@ru.mvista.com>");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c
new file mode 100644
index 0000000000..2fb2a85d13
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/misc.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * misc setup functions for MPC83xx
+ *
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+
+#include <asm/debug.h>
+#include <asm/io.h>
+#include <asm/hw_irq.h>
+#include <asm/ipic.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include <mm/mmu_decl.h>
+
+#include "mpc83xx.h"
+
+static __be32 __iomem *restart_reg_base;
+
+static int __init mpc83xx_restart_init(void)
+{
+	/* map reset restart_reg_baseister space */
+	restart_reg_base = ioremap(get_immrbase() + 0x900, 0xff);
+
+	return 0;
+}
+
+arch_initcall(mpc83xx_restart_init);
+
+void __noreturn mpc83xx_restart(char *cmd)
+{
+#define RST_OFFSET	0x00000900
+#define RST_PROT_REG	0x00000018
+#define RST_CTRL_REG	0x0000001c
+
+	local_irq_disable();
+
+	if (restart_reg_base) {
+		/* enable software reset "RSTE" */
+		out_be32(restart_reg_base + (RST_PROT_REG >> 2), 0x52535445);
+
+		/* set software hard reset */
+		out_be32(restart_reg_base + (RST_CTRL_REG >> 2), 0x2);
+	} else {
+		printk (KERN_EMERG "Error: Restart registers not mapped, spinning!\n");
+	}
+
+	for (;;) ;
+}
+
+long __init mpc83xx_time_init(void)
+{
+#define SPCR_OFFSET	0x00000110
+#define SPCR_TBEN	0x00400000
+	__be32 __iomem *spcr = ioremap(get_immrbase() + SPCR_OFFSET, 4);
+	__be32 tmp;
+
+	tmp = in_be32(spcr);
+	out_be32(spcr, tmp | SPCR_TBEN);
+
+	iounmap(spcr);
+
+	return 0;
+}
+
+void __init mpc83xx_ipic_init_IRQ(void)
+{
+	struct device_node *np;
+
+	/* looking for fsl,pq2pro-pic which is asl compatible with fsl,ipic */
+	np = of_find_compatible_node(NULL, NULL, "fsl,ipic");
+	if (!np)
+		np = of_find_node_by_type(NULL, "ipic");
+	if (!np)
+		return;
+
+	ipic_init(np, 0);
+
+	of_node_put(np);
+
+	/* Initialize the default interrupt mapping priorities,
+	 * in case the boot rom changed something on us.
+	 */
+	ipic_set_default_priority();
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .type = "soc", },
+	{ .compatible = "soc", },
+	{ .compatible = "simple-bus" },
+	{ .compatible = "gianfar" },
+	{ .compatible = "gpio-leds", },
+	{ .type = "qe", },
+	{ .compatible = "fsl,qe", },
+	{},
+};
+
+int __init mpc83xx_declare_of_platform_devices(void)
+{
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+	return 0;
+}
+
+#ifdef CONFIG_PCI
+void __init mpc83xx_setup_pci(void)
+{
+	struct device_node *np;
+
+	for_each_compatible_node(np, "pci", "fsl,mpc8349-pci")
+		mpc83xx_add_bridge(np);
+	for_each_compatible_node(np, "pci", "fsl,mpc8314-pcie")
+		mpc83xx_add_bridge(np);
+}
+#endif
+
+void __init mpc83xx_setup_arch(void)
+{
+	phys_addr_t immrbase = get_immrbase();
+	int immrsize = IS_ALIGNED(immrbase, SZ_2M) ? SZ_2M : SZ_1M;
+	unsigned long va = fix_to_virt(FIX_IMMR_BASE);
+
+	if (ppc_md.progress)
+		ppc_md.progress("mpc83xx_setup_arch()", 0);
+
+	setbat(-1, va, immrbase, immrsize, PAGE_KERNEL_NCG);
+	update_bats();
+}
+
+int machine_check_83xx(struct pt_regs *regs)
+{
+	u32 mask = 1 << (31 - IPIC_MCP_WDT);
+
+	if (!(regs->msr & SRR1_MCE_MCP) || !(ipic_get_mcp_status() & mask))
+		return machine_check_generic(regs);
+	ipic_clear_mcp_status(mask);
+
+	if (debugger_fault_handler(regs))
+		return 1;
+
+	die("Watchdog NMI Reset", regs, 0);
+
+	return 1;
+}
diff --git a/arch/powerpc/platforms/83xx/mpc830x_rdb.c b/arch/powerpc/platforms/83xx/mpc830x_rdb.c
new file mode 100644
index 0000000000..534bb22748
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc830x_rdb.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/83xx/mpc830x_rdb.c
+ *
+ * Description: MPC830x RDB board specific routines.
+ * This file is based on mpc831x_rdb.c
+ *
+ * Copyright (C) Freescale Semiconductor, Inc. 2009. All rights reserved.
+ * Copyright (C) 2010. Ilya Yanok, Emcraft Systems, yanok@emcraft.com
+ */
+
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_pci.h>
+#include <sysdev/fsl_soc.h>
+#include "mpc83xx.h"
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc830x_rdb_setup_arch(void)
+{
+	mpc83xx_setup_arch();
+	mpc831x_usb_cfg();
+}
+
+static const char *board[] __initdata = {
+	"MPC8308RDB",
+	"fsl,mpc8308rdb",
+	"denx,mpc8308_p1m",
+	NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc830x_rdb_probe(void)
+{
+	return of_device_compatible_match(of_root, board);
+}
+
+machine_device_initcall(mpc830x_rdb, mpc83xx_declare_of_platform_devices);
+
+define_machine(mpc830x_rdb) {
+	.name			= "MPC830x RDB",
+	.probe			= mpc830x_rdb_probe,
+	.setup_arch		= mpc830x_rdb_setup_arch,
+	.discover_phbs		= mpc83xx_setup_pci,
+	.init_IRQ		= mpc83xx_ipic_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.restart		= mpc83xx_restart,
+	.time_init		= mpc83xx_time_init,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc831x_rdb.c b/arch/powerpc/platforms/83xx/mpc831x_rdb.c
new file mode 100644
index 0000000000..7b901ab3b8
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc831x_rdb.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/83xx/mpc831x_rdb.c
+ *
+ * Description: MPC831x RDB board specific routines.
+ * This file is based on mpc834x_sys.c
+ * Author: Lo Wlison <r43300@freescale.com>
+ *
+ * Copyright (C) Freescale Semiconductor, Inc. 2006. All rights reserved.
+ */
+
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc831x_rdb_setup_arch(void)
+{
+	mpc83xx_setup_arch();
+	mpc831x_usb_cfg();
+}
+
+static const char *board[] __initdata = {
+	"MPC8313ERDB",
+	"fsl,mpc8315erdb",
+	NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc831x_rdb_probe(void)
+{
+	return of_device_compatible_match(of_root, board);
+}
+
+machine_device_initcall(mpc831x_rdb, mpc83xx_declare_of_platform_devices);
+
+define_machine(mpc831x_rdb) {
+	.name			= "MPC831x RDB",
+	.probe			= mpc831x_rdb_probe,
+	.setup_arch		= mpc831x_rdb_setup_arch,
+	.discover_phbs		= mpc83xx_setup_pci,
+	.init_IRQ		= mpc83xx_ipic_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.restart		= mpc83xx_restart,
+	.time_init		= mpc83xx_time_init,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
new file mode 100644
index 0000000000..d523ce0f48
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/83xx/mpc832x_rdb.c
+ *
+ * Copyright (C) Freescale Semiconductor, Inc. 2007. All rights reserved.
+ *
+ * Description:
+ * MPC832x RDB board specific routines.
+ * This file is based on mpc832x_mds.c and mpc8313_rdb.c
+ * Author: Michael Barkowski <michael.barkowski@freescale.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/mmc_spi.h>
+#include <linux/mmc/host.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/fsl_devices.h>
+
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+#include <soc/fsl/qe/qe.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+#undef DEBUG
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+#ifdef CONFIG_QUICC_ENGINE
+static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk,
+				   struct spi_board_info *board_infos,
+				   unsigned int num_board_infos,
+				   void (*cs_control)(struct spi_device *dev,
+						      bool on))
+{
+	struct device_node *np;
+	unsigned int i = 0;
+
+	for_each_compatible_node(np, type, compatible) {
+		int ret;
+		unsigned int j;
+		const void *prop;
+		struct resource res[2];
+		struct platform_device *pdev;
+		struct fsl_spi_platform_data pdata = {
+			.cs_control = cs_control,
+		};
+
+		memset(res, 0, sizeof(res));
+
+		pdata.sysclk = sysclk;
+
+		prop = of_get_property(np, "reg", NULL);
+		if (!prop)
+			goto err;
+		pdata.bus_num = *(u32 *)prop;
+
+		prop = of_get_property(np, "cell-index", NULL);
+		if (prop)
+			i = *(u32 *)prop;
+
+		prop = of_get_property(np, "mode", NULL);
+		if (prop && !strcmp(prop, "cpu-qe"))
+			pdata.flags = SPI_QE_CPU_MODE;
+
+		for (j = 0; j < num_board_infos; j++) {
+			if (board_infos[j].bus_num == pdata.bus_num)
+				pdata.max_chipselect++;
+		}
+
+		if (!pdata.max_chipselect)
+			continue;
+
+		ret = of_address_to_resource(np, 0, &res[0]);
+		if (ret)
+			goto err;
+
+		ret = of_irq_to_resource(np, 0, &res[1]);
+		if (ret <= 0)
+			goto err;
+
+		pdev = platform_device_alloc("mpc83xx_spi", i);
+		if (!pdev)
+			goto err;
+
+		ret = platform_device_add_data(pdev, &pdata, sizeof(pdata));
+		if (ret)
+			goto unreg;
+
+		ret = platform_device_add_resources(pdev, res,
+						    ARRAY_SIZE(res));
+		if (ret)
+			goto unreg;
+
+		ret = platform_device_add(pdev);
+		if (ret)
+			goto unreg;
+
+		goto next;
+unreg:
+		platform_device_put(pdev);
+err:
+		pr_err("%pOF: registration failed\n", np);
+next:
+		i++;
+	}
+
+	return i;
+}
+
+static int __init fsl_spi_init(struct spi_board_info *board_infos,
+			       unsigned int num_board_infos,
+			       void (*cs_control)(struct spi_device *spi,
+						  bool on))
+{
+	u32 sysclk = -1;
+	int ret;
+
+	/* SPI controller is either clocked from QE or SoC clock */
+	sysclk = get_brgfreq();
+	if (sysclk == -1) {
+		sysclk = fsl_get_sys_freq();
+		if (sysclk == -1)
+			return -ENODEV;
+	}
+
+	ret = of_fsl_spi_probe(NULL, "fsl,spi", sysclk, board_infos,
+			       num_board_infos, cs_control);
+	if (!ret)
+		of_fsl_spi_probe("spi", "fsl_spi", sysclk, board_infos,
+				 num_board_infos, cs_control);
+
+	return spi_register_board_info(board_infos, num_board_infos);
+}
+
+static void mpc83xx_spi_cs_control(struct spi_device *spi, bool on)
+{
+	pr_debug("%s %d %d\n", __func__, spi_get_chipselect(spi, 0), on);
+	par_io_data_set(3, 13, on);
+}
+
+static struct mmc_spi_platform_data mpc832x_mmc_pdata = {
+	.ocr_mask = MMC_VDD_33_34,
+};
+
+static struct spi_board_info mpc832x_spi_boardinfo = {
+	.bus_num = 0x4c0,
+	.chip_select = 0,
+	.max_speed_hz = 50000000,
+	.modalias = "mmc_spi",
+	.platform_data = &mpc832x_mmc_pdata,
+};
+
+static int __init mpc832x_spi_init(void)
+{
+	struct device_node *np;
+
+	par_io_config_pin(3,  0, 3, 0, 1, 0); /* SPI1 MOSI, I/O */
+	par_io_config_pin(3,  1, 3, 0, 1, 0); /* SPI1 MISO, I/O */
+	par_io_config_pin(3,  2, 3, 0, 1, 0); /* SPI1 CLK,  I/O */
+	par_io_config_pin(3,  3, 2, 0, 1, 0); /* SPI1 SEL,  I   */
+
+	par_io_config_pin(3, 13, 1, 0, 0, 0); /* !SD_CS,    O */
+	par_io_config_pin(3, 14, 2, 0, 0, 0); /* SD_INSERT, I */
+	par_io_config_pin(3, 15, 2, 0, 0, 0); /* SD_PROTECT,I */
+
+	/*
+	 * Don't bother with legacy stuff when device tree contains
+	 * mmc-spi-slot node.
+	 */
+	np = of_find_compatible_node(NULL, NULL, "mmc-spi-slot");
+	of_node_put(np);
+	if (np)
+		return 0;
+	return fsl_spi_init(&mpc832x_spi_boardinfo, 1, mpc83xx_spi_cs_control);
+}
+machine_device_initcall(mpc832x_rdb, mpc832x_spi_init);
+#endif /* CONFIG_QUICC_ENGINE */
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init mpc832x_rdb_setup_arch(void)
+{
+#if defined(CONFIG_QUICC_ENGINE)
+	struct device_node *np;
+#endif
+
+	mpc83xx_setup_arch();
+
+#ifdef CONFIG_QUICC_ENGINE
+	if ((np = of_find_node_by_name(NULL, "par_io")) != NULL) {
+		par_io_init(np);
+		of_node_put(np);
+
+		for_each_node_by_name(np, "ucc")
+			par_io_of_config(np);
+	}
+#endif				/* CONFIG_QUICC_ENGINE */
+}
+
+machine_device_initcall(mpc832x_rdb, mpc83xx_declare_of_platform_devices);
+
+define_machine(mpc832x_rdb) {
+	.name		= "MPC832x RDB",
+	.compatible	= "MPC832xRDB",
+	.setup_arch	= mpc832x_rdb_setup_arch,
+	.discover_phbs  = mpc83xx_setup_pci,
+	.init_IRQ	= mpc83xx_ipic_init_IRQ,
+	.get_irq	= ipic_get_irq,
+	.restart	= mpc83xx_restart,
+	.time_init	= mpc83xx_time_init,
+	.progress	= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c
new file mode 100644
index 0000000000..e45b98ff02
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/83xx/mpc834x_itx.c
+ *
+ * MPC834x ITX board specific routines
+ *
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/of_platform.h>
+
+#include <linux/atomic.h>
+#include <asm/time.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/irq.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+static const struct of_device_id mpc834x_itx_ids[] __initconst = {
+	{ .compatible = "fsl,pq2pro-localbus", },
+	{},
+};
+
+static int __init mpc834x_itx_declare_of_platform_devices(void)
+{
+	mpc83xx_declare_of_platform_devices();
+	return of_platform_bus_probe(NULL, mpc834x_itx_ids, NULL);
+}
+machine_device_initcall(mpc834x_itx, mpc834x_itx_declare_of_platform_devices);
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init mpc834x_itx_setup_arch(void)
+{
+	mpc83xx_setup_arch();
+
+	mpc834x_usb_cfg();
+}
+
+define_machine(mpc834x_itx) {
+	.name			= "MPC834x ITX",
+	.compatible		= "MPC834xMITX",
+	.setup_arch		= mpc834x_itx_setup_arch,
+	.discover_phbs  	= mpc83xx_setup_pci,
+	.init_IRQ		= mpc83xx_ipic_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.restart		= mpc83xx_restart,
+	.time_init		= mpc83xx_time_init,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc836x_rdk.c b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
new file mode 100644
index 0000000000..1fc9d1235a
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8360E-RDK board file.
+ *
+ * Copyright (c) 2006  Freescale Semiconductor, Inc.
+ * Copyright (c) 2007-2008  MontaVista Software, Inc.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+#include <linux/io.h>
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+#include <soc/fsl/qe/qe.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+machine_device_initcall(mpc836x_rdk, mpc83xx_declare_of_platform_devices);
+
+static void __init mpc836x_rdk_setup_arch(void)
+{
+	mpc83xx_setup_arch();
+}
+
+define_machine(mpc836x_rdk) {
+	.name		= "MPC836x RDK",
+	.compatible	= "fsl,mpc8360rdk",
+	.setup_arch	= mpc836x_rdk_setup_arch,
+	.discover_phbs  = mpc83xx_setup_pci,
+	.init_IRQ	= mpc83xx_ipic_init_IRQ,
+	.get_irq	= ipic_get_irq,
+	.restart	= mpc83xx_restart,
+	.time_init	= mpc83xx_time_init,
+	.progress	= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
new file mode 100644
index 0000000000..39e78018dd
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/83xx/mpc837x_rdb.c
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * MPC837x RDB board specific routines
+ */
+
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+static void __init mpc837x_rdb_sd_cfg(void)
+{
+	void __iomem *im;
+
+	im = ioremap(get_immrbase(), 0x1000);
+	if (!im) {
+		WARN_ON(1);
+		return;
+	}
+
+	/*
+	 * On RDB boards (in contrast to MDS) USBB pins are used for SD only,
+	 * so we can safely mux them away from the USB block.
+	 */
+	clrsetbits_be32(im + MPC83XX_SICRL_OFFS, MPC837X_SICRL_USBB_MASK,
+						 MPC837X_SICRL_SD);
+	clrsetbits_be32(im + MPC83XX_SICRH_OFFS, MPC837X_SICRH_SPI_MASK,
+						 MPC837X_SICRH_SD);
+	iounmap(im);
+}
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init mpc837x_rdb_setup_arch(void)
+{
+	mpc83xx_setup_arch();
+	mpc837x_usb_cfg();
+	mpc837x_rdb_sd_cfg();
+}
+
+machine_device_initcall(mpc837x_rdb, mpc83xx_declare_of_platform_devices);
+
+static const char * const board[] __initconst = {
+	"fsl,mpc8377rdb",
+	"fsl,mpc8378rdb",
+	"fsl,mpc8379rdb",
+	"fsl,mpc8377wlan",
+	NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc837x_rdb_probe(void)
+{
+	return of_device_compatible_match(of_root, board);
+}
+
+define_machine(mpc837x_rdb) {
+	.name			= "MPC837x RDB/WLAN",
+	.probe			= mpc837x_rdb_probe,
+	.setup_arch		= mpc837x_rdb_setup_arch,
+	.discover_phbs  	= mpc83xx_setup_pci,
+	.init_IRQ		= mpc83xx_ipic_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.restart		= mpc83xx_restart,
+	.time_init		= mpc83xx_time_init,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h
new file mode 100644
index 0000000000..0b8738a2b9
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc83xx.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __MPC83XX_H__
+#define __MPC83XX_H__
+
+#include <linux/init.h>
+
+/* System Clock Control Register */
+#define MPC83XX_SCCR_OFFS          0xA08
+#define MPC83XX_SCCR_USB_MASK      0x00f00000
+#define MPC83XX_SCCR_USB_MPHCM_11  0x00c00000
+#define MPC83XX_SCCR_USB_MPHCM_01  0x00400000
+#define MPC83XX_SCCR_USB_MPHCM_10  0x00800000
+#define MPC83XX_SCCR_USB_DRCM_11   0x00300000
+#define MPC83XX_SCCR_USB_DRCM_01   0x00100000
+#define MPC83XX_SCCR_USB_DRCM_10   0x00200000
+#define MPC8315_SCCR_USB_MASK      0x00c00000
+#define MPC8315_SCCR_USB_DRCM_11   0x00c00000
+#define MPC8315_SCCR_USB_DRCM_01   0x00400000
+#define MPC837X_SCCR_USB_DRCM_11   0x00c00000
+
+/* system i/o configuration register low */
+#define MPC83XX_SICRL_OFFS         0x114
+#define MPC834X_SICRL_USB_MASK     0x60000000
+#define MPC834X_SICRL_USB0         0x20000000
+#define MPC834X_SICRL_USB1         0x40000000
+#define MPC831X_SICRL_USB_MASK     0x00000c00
+#define MPC831X_SICRL_USB_ULPI     0x00000800
+#define MPC8315_SICRL_USB_MASK     0x000000fc
+#define MPC8315_SICRL_USB_ULPI     0x00000054
+#define MPC837X_SICRL_USB_MASK     0xf0000000
+#define MPC837X_SICRL_USB_ULPI     0x50000000
+#define MPC837X_SICRL_USBB_MASK    0x30000000
+#define MPC837X_SICRL_SD           0x20000000
+
+/* system i/o configuration register high */
+#define MPC83XX_SICRH_OFFS         0x118
+#define MPC8308_SICRH_USB_MASK     0x000c0000
+#define MPC8308_SICRH_USB_ULPI     0x00040000
+#define MPC834X_SICRH_USB_UTMI     0x00020000
+#define MPC831X_SICRH_USB_MASK     0x000000e0
+#define MPC831X_SICRH_USB_ULPI     0x000000a0
+#define MPC8315_SICRH_USB_MASK     0x0000ff00
+#define MPC8315_SICRH_USB_ULPI     0x00000000
+#define MPC837X_SICRH_SPI_MASK     0x00000003
+#define MPC837X_SICRH_SD           0x00000001
+
+/* USB Control Register */
+#define FSL_USB2_CONTROL_OFFS      0x500
+#define CONTROL_UTMI_PHY_EN        0x00000200
+#define CONTROL_REFSEL_24MHZ       0x00000040
+#define CONTROL_REFSEL_48MHZ       0x00000080
+#define CONTROL_PHY_CLK_SEL_ULPI   0x00000400
+#define CONTROL_OTG_PORT           0x00000020
+
+/* USB PORTSC Registers */
+#define FSL_USB2_PORTSC1_OFFS      0x184
+#define FSL_USB2_PORTSC2_OFFS      0x188
+#define PORTSCX_PTW_16BIT          0x10000000
+#define PORTSCX_PTS_UTMI           0x00000000
+#define PORTSCX_PTS_ULPI           0x80000000
+
+/*
+ * Declaration for the various functions exported by the
+ * mpc83xx_* files. Mostly for use by mpc83xx_setup
+ */
+
+extern void __noreturn mpc83xx_restart(char *cmd);
+extern long mpc83xx_time_init(void);
+int __init mpc837x_usb_cfg(void);
+int __init mpc834x_usb_cfg(void);
+int __init mpc831x_usb_cfg(void);
+extern void mpc83xx_ipic_init_IRQ(void);
+
+#ifdef CONFIG_PCI
+extern void mpc83xx_setup_pci(void);
+#else
+#define mpc83xx_setup_pci	NULL
+#endif
+
+extern int mpc83xx_declare_of_platform_devices(void);
+extern void mpc83xx_setup_arch(void);
+
+#endif				/* __MPC83XX_H__ */
diff --git a/arch/powerpc/platforms/83xx/suspend-asm.S b/arch/powerpc/platforms/83xx/suspend-asm.S
new file mode 100644
index 0000000000..bc6bd4d0ae
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/suspend-asm.S
@@ -0,0 +1,551 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Enter and leave deep sleep state on MPC83xx
+ *
+ * Copyright (c) 2006-2008 Freescale Semiconductor, Inc.
+ * Author: Scott Wood <scottwood@freescale.com>
+ */
+
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/reg.h>
+#include <asm/asm-offsets.h>
+
+#define SS_MEMSAVE	0x00 /* First 8 bytes of RAM */
+#define SS_HID		0x08 /* 3 HIDs */
+#define SS_IABR		0x14 /* 2 IABRs */
+#define SS_IBCR		0x1c
+#define SS_DABR		0x20 /* 2 DABRs */
+#define SS_DBCR		0x28
+#define SS_SP		0x2c
+#define SS_SR		0x30 /* 16 segment registers */
+#define SS_R2		0x70
+#define SS_MSR		0x74
+#define SS_SDR1		0x78
+#define SS_LR		0x7c
+#define SS_SPRG		0x80 /* 8 SPRGs */
+#define SS_DBAT		0xa0 /* 8 DBATs */
+#define SS_IBAT		0xe0 /* 8 IBATs */
+#define SS_TB		0x120
+#define SS_CR		0x128
+#define SS_GPREG	0x12c /* r12-r31 */
+#define STATE_SAVE_SIZE 0x17c
+
+	.section .data
+	.align	5
+
+mpc83xx_sleep_save_area:
+	.space	STATE_SAVE_SIZE
+immrbase:
+	.long	0
+
+	.section .text
+	.align	5
+
+	/* r3 = physical address of IMMR */
+_GLOBAL(mpc83xx_enter_deep_sleep)
+	lis	r4, immrbase@ha
+	stw	r3, immrbase@l(r4)
+
+	/* The first 2 words of memory are used to communicate with the
+	 * bootloader, to tell it how to resume.
+	 *
+	 * The first word is the magic number 0xf5153ae5, and the second
+	 * is the pointer to mpc83xx_deep_resume.
+	 *
+	 * The original content of these two words is saved in SS_MEMSAVE.
+	 */
+
+	lis	r3, mpc83xx_sleep_save_area@h
+	ori	r3, r3, mpc83xx_sleep_save_area@l
+
+	lis	r4, KERNELBASE@h
+	lwz	r5, 0(r4)
+	lwz	r6, 4(r4)
+
+	stw	r5, SS_MEMSAVE+0(r3)
+	stw	r6, SS_MEMSAVE+4(r3)
+
+	mfspr	r5, SPRN_HID0
+	mfspr	r6, SPRN_HID1
+	mfspr	r7, SPRN_HID2
+
+	stw	r5, SS_HID+0(r3)
+	stw	r6, SS_HID+4(r3)
+	stw	r7, SS_HID+8(r3)
+
+	mfspr	r4, SPRN_IABR
+	mfspr	r5, SPRN_IABR2
+	mfspr	r6, SPRN_IBCR
+	mfspr	r7, SPRN_DABR
+	mfspr	r8, SPRN_DABR2
+	mfspr	r9, SPRN_DBCR
+
+	stw	r4, SS_IABR+0(r3)
+	stw	r5, SS_IABR+4(r3)
+	stw	r6, SS_IBCR(r3)
+	stw	r7, SS_DABR+0(r3)
+	stw	r8, SS_DABR+4(r3)
+	stw	r9, SS_DBCR(r3)
+
+	mfspr	r4, SPRN_SPRG0
+	mfspr	r5, SPRN_SPRG1
+	mfspr	r6, SPRN_SPRG2
+	mfspr	r7, SPRN_SPRG3
+	mfsdr1	r8
+
+	stw	r4, SS_SPRG+0(r3)
+	stw	r5, SS_SPRG+4(r3)
+	stw	r6, SS_SPRG+8(r3)
+	stw	r7, SS_SPRG+12(r3)
+	stw	r8, SS_SDR1(r3)
+
+	mfspr	r4, SPRN_SPRG4
+	mfspr	r5, SPRN_SPRG5
+	mfspr	r6, SPRN_SPRG6
+	mfspr	r7, SPRN_SPRG7
+
+	stw	r4, SS_SPRG+16(r3)
+	stw	r5, SS_SPRG+20(r3)
+	stw	r6, SS_SPRG+24(r3)
+	stw	r7, SS_SPRG+28(r3)
+
+	mfspr	r4, SPRN_DBAT0U
+	mfspr	r5, SPRN_DBAT0L
+	mfspr	r6, SPRN_DBAT1U
+	mfspr	r7, SPRN_DBAT1L
+
+	stw	r4, SS_DBAT+0x00(r3)
+	stw	r5, SS_DBAT+0x04(r3)
+	stw	r6, SS_DBAT+0x08(r3)
+	stw	r7, SS_DBAT+0x0c(r3)
+
+	mfspr	r4, SPRN_DBAT2U
+	mfspr	r5, SPRN_DBAT2L
+	mfspr	r6, SPRN_DBAT3U
+	mfspr	r7, SPRN_DBAT3L
+
+	stw	r4, SS_DBAT+0x10(r3)
+	stw	r5, SS_DBAT+0x14(r3)
+	stw	r6, SS_DBAT+0x18(r3)
+	stw	r7, SS_DBAT+0x1c(r3)
+
+	mfspr	r4, SPRN_DBAT4U
+	mfspr	r5, SPRN_DBAT4L
+	mfspr	r6, SPRN_DBAT5U
+	mfspr	r7, SPRN_DBAT5L
+
+	stw	r4, SS_DBAT+0x20(r3)
+	stw	r5, SS_DBAT+0x24(r3)
+	stw	r6, SS_DBAT+0x28(r3)
+	stw	r7, SS_DBAT+0x2c(r3)
+
+	mfspr	r4, SPRN_DBAT6U
+	mfspr	r5, SPRN_DBAT6L
+	mfspr	r6, SPRN_DBAT7U
+	mfspr	r7, SPRN_DBAT7L
+
+	stw	r4, SS_DBAT+0x30(r3)
+	stw	r5, SS_DBAT+0x34(r3)
+	stw	r6, SS_DBAT+0x38(r3)
+	stw	r7, SS_DBAT+0x3c(r3)
+
+	mfspr	r4, SPRN_IBAT0U
+	mfspr	r5, SPRN_IBAT0L
+	mfspr	r6, SPRN_IBAT1U
+	mfspr	r7, SPRN_IBAT1L
+
+	stw	r4, SS_IBAT+0x00(r3)
+	stw	r5, SS_IBAT+0x04(r3)
+	stw	r6, SS_IBAT+0x08(r3)
+	stw	r7, SS_IBAT+0x0c(r3)
+
+	mfspr	r4, SPRN_IBAT2U
+	mfspr	r5, SPRN_IBAT2L
+	mfspr	r6, SPRN_IBAT3U
+	mfspr	r7, SPRN_IBAT3L
+
+	stw	r4, SS_IBAT+0x10(r3)
+	stw	r5, SS_IBAT+0x14(r3)
+	stw	r6, SS_IBAT+0x18(r3)
+	stw	r7, SS_IBAT+0x1c(r3)
+
+	mfspr	r4, SPRN_IBAT4U
+	mfspr	r5, SPRN_IBAT4L
+	mfspr	r6, SPRN_IBAT5U
+	mfspr	r7, SPRN_IBAT5L
+
+	stw	r4, SS_IBAT+0x20(r3)
+	stw	r5, SS_IBAT+0x24(r3)
+	stw	r6, SS_IBAT+0x28(r3)
+	stw	r7, SS_IBAT+0x2c(r3)
+
+	mfspr	r4, SPRN_IBAT6U
+	mfspr	r5, SPRN_IBAT6L
+	mfspr	r6, SPRN_IBAT7U
+	mfspr	r7, SPRN_IBAT7L
+
+	stw	r4, SS_IBAT+0x30(r3)
+	stw	r5, SS_IBAT+0x34(r3)
+	stw	r6, SS_IBAT+0x38(r3)
+	stw	r7, SS_IBAT+0x3c(r3)
+
+	mfmsr	r4
+	mflr	r5
+	mfcr	r6
+
+	stw	r4, SS_MSR(r3)
+	stw	r5, SS_LR(r3)
+	stw	r6, SS_CR(r3)
+	stw	r1, SS_SP(r3)
+	stw	r2, SS_R2(r3)
+
+1:	mftbu	r4
+	mftb	r5
+	mftbu	r6
+	cmpw	r4, r6
+	bne	1b
+
+	stw	r4, SS_TB+0(r3)
+	stw	r5, SS_TB+4(r3)
+
+	stmw	r12, SS_GPREG(r3)
+
+	li	r4, 0
+	addi	r6, r3, SS_SR-4
+1:	mfsrin	r5, r4
+	stwu	r5, 4(r6)
+	addis	r4, r4, 0x1000
+	cmpwi	r4, 0
+	bne	1b
+
+	/* Disable machine checks and critical exceptions */
+	mfmsr	r4
+	rlwinm	r4, r4, 0, ~MSR_CE
+	rlwinm	r4, r4, 0, ~MSR_ME
+	mtmsr	r4
+	isync
+
+#define TMP_VIRT_IMMR		0xf0000000
+#define DEFAULT_IMMR_VALUE	0xff400000
+#define IMMRBAR_BASE		0x0000
+
+	lis	r4, immrbase@ha
+	lwz	r4, immrbase@l(r4)
+
+	/* Use DBAT0 to address the current IMMR space */
+
+	ori	r4, r4, 0x002a
+	mtspr	SPRN_DBAT0L, r4
+	lis	r8, TMP_VIRT_IMMR@h
+	ori	r4, r8, 0x001e	/* 1 MByte accessible from Kernel Space only */
+	mtspr	SPRN_DBAT0U, r4
+	isync
+
+	/* Use DBAT1 to address the original IMMR space */
+
+	lis	r4, DEFAULT_IMMR_VALUE@h
+	ori	r4, r4, 0x002a
+	mtspr	SPRN_DBAT1L, r4
+	lis	r9, (TMP_VIRT_IMMR + 0x01000000)@h
+	ori	r4, r9, 0x001e	/* 1 MByte accessible from Kernel Space only */
+	mtspr	SPRN_DBAT1U, r4
+	isync
+
+	/* Use DBAT2 to address the beginning of RAM.  This isn't done
+	 * using the normal virtual mapping, because with page debugging
+	 * enabled it will be read-only.
+	 */
+
+	li	r4, 0x0002
+	mtspr	SPRN_DBAT2L, r4
+	lis	r4, KERNELBASE@h
+	ori	r4, r4, 0x001e	/* 1 MByte accessible from Kernel Space only */
+	mtspr	SPRN_DBAT2U, r4
+	isync
+
+	/* Flush the cache with our BAT, as there will be TLB misses
+	 * otherwise if page debugging is enabled, and these misses
+	 * will disturb the PLRU algorithm.
+	 */
+
+	bl	__flush_disable_L1
+
+	/* Keep the i-cache enabled, so the hack below for low-boot
+	 * flash will work.
+	 */
+	mfspr	r3, SPRN_HID0
+	ori	r3, r3, HID0_ICE
+	mtspr	SPRN_HID0, r3
+	isync
+
+	lis	r6, 0xf515
+	ori	r6, r6, 0x3ae5
+
+	lis	r7, mpc83xx_deep_resume@h
+	ori	r7, r7, mpc83xx_deep_resume@l
+	tophys(r7, r7)
+
+	lis	r5, KERNELBASE@h
+	stw	r6, 0(r5)
+	stw	r7, 4(r5)
+
+	/* Reset BARs */
+
+	li	r4, 0
+	stw	r4, 0x0024(r8)
+	stw	r4, 0x002c(r8)
+	stw	r4, 0x0034(r8)
+	stw	r4, 0x003c(r8)
+	stw	r4, 0x0064(r8)
+	stw	r4, 0x006c(r8)
+
+	/* Rev 1 of the 8313 has problems with wakeup events that are
+	 * pending during the transition to deep sleep state (such as if
+	 * the PCI host sets the state to D3 and then D0 in rapid
+	 * succession).  This check shrinks the race window somewhat.
+	 *
+	 * See erratum PCI23, though the problem is not limited
+	 * to PCI.
+	 */
+
+	lwz	r3, 0x0b04(r8)
+	andi.	r3, r3, 1
+	bne-	mpc83xx_deep_resume
+
+	/* Move IMMR back to the default location, following the
+	 * procedure specified in the MPC8313 manual.
+	 */
+	lwz	r4, IMMRBAR_BASE(r8)
+	isync
+	lis	r4, DEFAULT_IMMR_VALUE@h
+	stw	r4, IMMRBAR_BASE(r8)
+	lis	r4, KERNELBASE@h
+	lwz	r4, 0(r4)
+	isync
+	lwz	r4, IMMRBAR_BASE(r9)
+	mr	r8, r9
+	isync
+
+	/* Check the Reset Configuration Word to see whether flash needs
+	 * to be mapped at a low address or a high address.
+	 */
+
+	lwz	r4, 0x0904(r8)
+	andis.	r4, r4, 0x0400
+	li	r4, 0
+	beq	boot_low
+	lis	r4, 0xff80
+boot_low:
+	stw	r4, 0x0020(r8)
+	lis	r7, 0x8000
+	ori	r7, r7, 0x0016
+
+	mfspr	r5, SPRN_HID0
+	rlwinm	r5, r5, 0, ~(HID0_DOZE | HID0_NAP)
+	oris	r5, r5, HID0_SLEEP@h
+	mtspr	SPRN_HID0, r5
+	isync
+
+	mfmsr	r5
+	oris	r5, r5, MSR_POW@h
+
+	/* Enable the flash mapping at the appropriate address.  This
+	 * mapping will override the RAM mapping if booting low, so there's
+	 * no need to disable the latter.  This must be done inside the same
+	 * cache line as setting MSR_POW, so that no instruction fetches
+	 * from RAM happen after the flash mapping is turned on.
+	 */
+
+	.align	5
+	stw	r7, 0x0024(r8)
+	sync
+	isync
+	mtmsr	r5
+	isync
+1:	b	1b
+
+mpc83xx_deep_resume:
+	lis	r4, 1f@h
+	ori	r4, r4, 1f@l
+	tophys(r4, r4)
+	mtsrr0	r4
+
+	mfmsr	r4
+	rlwinm	r4, r4, 0, ~(MSR_IR | MSR_DR)
+	mtsrr1	r4
+
+	rfi
+
+1:	tlbia
+	bl	__inval_enable_L1
+
+	lis	r3, mpc83xx_sleep_save_area@h
+	ori	r3, r3, mpc83xx_sleep_save_area@l
+	tophys(r3, r3)
+
+	lwz	r5, SS_MEMSAVE+0(r3)
+	lwz	r6, SS_MEMSAVE+4(r3)
+
+	stw	r5, 0(0)
+	stw	r6, 4(0)
+
+	lwz	r5, SS_HID+0(r3)
+	lwz	r6, SS_HID+4(r3)
+	lwz	r7, SS_HID+8(r3)
+
+	mtspr	SPRN_HID0, r5
+	mtspr	SPRN_HID1, r6
+	mtspr	SPRN_HID2, r7
+
+	lwz	r4, SS_IABR+0(r3)
+	lwz	r5, SS_IABR+4(r3)
+	lwz	r6, SS_IBCR(r3)
+	lwz	r7, SS_DABR+0(r3)
+	lwz	r8, SS_DABR+4(r3)
+	lwz	r9, SS_DBCR(r3)
+
+	mtspr	SPRN_IABR, r4
+	mtspr	SPRN_IABR2, r5
+	mtspr	SPRN_IBCR, r6
+	mtspr	SPRN_DABR, r7
+	mtspr	SPRN_DABR2, r8
+	mtspr	SPRN_DBCR, r9
+
+	li	r4, 0
+	addi	r6, r3, SS_SR-4
+1:	lwzu	r5, 4(r6)
+	mtsrin	r5, r4
+	addis	r4, r4, 0x1000
+	cmpwi	r4, 0
+	bne	1b
+
+	lwz	r4, SS_DBAT+0x00(r3)
+	lwz	r5, SS_DBAT+0x04(r3)
+	lwz	r6, SS_DBAT+0x08(r3)
+	lwz	r7, SS_DBAT+0x0c(r3)
+
+	mtspr	SPRN_DBAT0U, r4
+	mtspr	SPRN_DBAT0L, r5
+	mtspr	SPRN_DBAT1U, r6
+	mtspr	SPRN_DBAT1L, r7
+
+	lwz	r4, SS_DBAT+0x10(r3)
+	lwz	r5, SS_DBAT+0x14(r3)
+	lwz	r6, SS_DBAT+0x18(r3)
+	lwz	r7, SS_DBAT+0x1c(r3)
+
+	mtspr	SPRN_DBAT2U, r4
+	mtspr	SPRN_DBAT2L, r5
+	mtspr	SPRN_DBAT3U, r6
+	mtspr	SPRN_DBAT3L, r7
+
+	lwz	r4, SS_DBAT+0x20(r3)
+	lwz	r5, SS_DBAT+0x24(r3)
+	lwz	r6, SS_DBAT+0x28(r3)
+	lwz	r7, SS_DBAT+0x2c(r3)
+
+	mtspr	SPRN_DBAT4U, r4
+	mtspr	SPRN_DBAT4L, r5
+	mtspr	SPRN_DBAT5U, r6
+	mtspr	SPRN_DBAT5L, r7
+
+	lwz	r4, SS_DBAT+0x30(r3)
+	lwz	r5, SS_DBAT+0x34(r3)
+	lwz	r6, SS_DBAT+0x38(r3)
+	lwz	r7, SS_DBAT+0x3c(r3)
+
+	mtspr	SPRN_DBAT6U, r4
+	mtspr	SPRN_DBAT6L, r5
+	mtspr	SPRN_DBAT7U, r6
+	mtspr	SPRN_DBAT7L, r7
+
+	lwz	r4, SS_IBAT+0x00(r3)
+	lwz	r5, SS_IBAT+0x04(r3)
+	lwz	r6, SS_IBAT+0x08(r3)
+	lwz	r7, SS_IBAT+0x0c(r3)
+
+	mtspr	SPRN_IBAT0U, r4
+	mtspr	SPRN_IBAT0L, r5
+	mtspr	SPRN_IBAT1U, r6
+	mtspr	SPRN_IBAT1L, r7
+
+	lwz	r4, SS_IBAT+0x10(r3)
+	lwz	r5, SS_IBAT+0x14(r3)
+	lwz	r6, SS_IBAT+0x18(r3)
+	lwz	r7, SS_IBAT+0x1c(r3)
+
+	mtspr	SPRN_IBAT2U, r4
+	mtspr	SPRN_IBAT2L, r5
+	mtspr	SPRN_IBAT3U, r6
+	mtspr	SPRN_IBAT3L, r7
+
+	lwz	r4, SS_IBAT+0x20(r3)
+	lwz	r5, SS_IBAT+0x24(r3)
+	lwz	r6, SS_IBAT+0x28(r3)
+	lwz	r7, SS_IBAT+0x2c(r3)
+
+	mtspr	SPRN_IBAT4U, r4
+	mtspr	SPRN_IBAT4L, r5
+	mtspr	SPRN_IBAT5U, r6
+	mtspr	SPRN_IBAT5L, r7
+
+	lwz	r4, SS_IBAT+0x30(r3)
+	lwz	r5, SS_IBAT+0x34(r3)
+	lwz	r6, SS_IBAT+0x38(r3)
+	lwz	r7, SS_IBAT+0x3c(r3)
+
+	mtspr	SPRN_IBAT6U, r4
+	mtspr	SPRN_IBAT6L, r5
+	mtspr	SPRN_IBAT7U, r6
+	mtspr	SPRN_IBAT7L, r7
+
+	lwz	r4, SS_SPRG+16(r3)
+	lwz	r5, SS_SPRG+20(r3)
+	lwz	r6, SS_SPRG+24(r3)
+	lwz	r7, SS_SPRG+28(r3)
+
+	mtspr	SPRN_SPRG4, r4
+	mtspr	SPRN_SPRG5, r5
+	mtspr	SPRN_SPRG6, r6
+	mtspr	SPRN_SPRG7, r7
+
+	lwz	r4, SS_SPRG+0(r3)
+	lwz	r5, SS_SPRG+4(r3)
+	lwz	r6, SS_SPRG+8(r3)
+	lwz	r7, SS_SPRG+12(r3)
+	lwz	r8, SS_SDR1(r3)
+
+	mtspr	SPRN_SPRG0, r4
+	mtspr	SPRN_SPRG1, r5
+	mtspr	SPRN_SPRG2, r6
+	mtspr	SPRN_SPRG3, r7
+	mtsdr1	r8
+
+	lwz	r4, SS_MSR(r3)
+	lwz	r5, SS_LR(r3)
+	lwz	r6, SS_CR(r3)
+	lwz	r1, SS_SP(r3)
+	lwz	r2, SS_R2(r3)
+
+	mtsrr1	r4
+	mtsrr0	r5
+	mtcr	r6
+
+	li	r4, 0
+	mtspr	SPRN_TBWL, r4
+
+	lwz	r4, SS_TB+0(r3)
+	lwz	r5, SS_TB+4(r3)
+
+	mtspr	SPRN_TBWU, r4
+	mtspr	SPRN_TBWL, r5
+
+	lmw	r12, SS_GPREG(r3)
+
+	/* Kick decrementer */
+	li	r0, 1
+	mtdec	r0
+
+	rfi
+_ASM_NOKPROBE_SYMBOL(mpc83xx_deep_resume)
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
new file mode 100644
index 0000000000..9833c36bda
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -0,0 +1,431 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * MPC83xx suspend support
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2006-2007 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/pm.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/wait.h>
+#include <linux/sched/signal.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/suspend.h>
+#include <linux/fsl_devices.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/export.h>
+
+#include <asm/reg.h>
+#include <asm/io.h>
+#include <asm/time.h>
+#include <asm/mpc6xx.h>
+#include <asm/switch_to.h>
+
+#include <sysdev/fsl_soc.h>
+
+#define PMCCR1_NEXT_STATE       0x0C /* Next state for power management */
+#define PMCCR1_NEXT_STATE_SHIFT 2
+#define PMCCR1_CURR_STATE       0x03 /* Current state for power management*/
+#define IMMR_SYSCR_OFFSET       0x100
+#define IMMR_RCW_OFFSET         0x900
+#define RCW_PCI_HOST            0x80000000
+
+void mpc83xx_enter_deep_sleep(phys_addr_t immrbase);
+
+struct mpc83xx_pmc {
+	u32 config;
+#define PMCCR_DLPEN 2 /* DDR SDRAM low power enable */
+#define PMCCR_SLPEN 1 /* System low power enable */
+
+	u32 event;
+	u32 mask;
+/* All but PMCI are deep-sleep only */
+#define PMCER_GPIO   0x100
+#define PMCER_PCI    0x080
+#define PMCER_USB    0x040
+#define PMCER_ETSEC1 0x020
+#define PMCER_ETSEC2 0x010
+#define PMCER_TIMER  0x008
+#define PMCER_INT1   0x004
+#define PMCER_INT2   0x002
+#define PMCER_PMCI   0x001
+#define PMCER_ALL    0x1FF
+
+	/* deep-sleep only */
+	u32 config1;
+#define PMCCR1_USE_STATE  0x80000000
+#define PMCCR1_PME_EN     0x00000080
+#define PMCCR1_ASSERT_PME 0x00000040
+#define PMCCR1_POWER_OFF  0x00000020
+
+	/* deep-sleep only */
+	u32 config2;
+};
+
+struct mpc83xx_rcw {
+	u32 rcwlr;
+	u32 rcwhr;
+};
+
+struct mpc83xx_clock {
+	u32 spmr;
+	u32 occr;
+	u32 sccr;
+};
+
+struct mpc83xx_syscr {
+	__be32 sgprl;
+	__be32 sgprh;
+	__be32 spridr;
+	__be32 :32;
+	__be32 spcr;
+	__be32 sicrl;
+	__be32 sicrh;
+};
+
+struct mpc83xx_saved {
+	u32 sicrl;
+	u32 sicrh;
+	u32 sccr;
+};
+
+struct pmc_type {
+	int has_deep_sleep;
+};
+
+static int has_deep_sleep, deep_sleeping;
+static int pmc_irq;
+static struct mpc83xx_pmc __iomem *pmc_regs;
+static struct mpc83xx_clock __iomem *clock_regs;
+static struct mpc83xx_syscr __iomem *syscr_regs;
+static struct mpc83xx_saved saved_regs;
+static int is_pci_agent, wake_from_pci;
+static phys_addr_t immrbase;
+static int pci_pm_state;
+static DECLARE_WAIT_QUEUE_HEAD(agent_wq);
+
+int fsl_deep_sleep(void)
+{
+	return deep_sleeping;
+}
+EXPORT_SYMBOL(fsl_deep_sleep);
+
+static int mpc83xx_change_state(void)
+{
+	u32 curr_state;
+	u32 reg_cfg1 = in_be32(&pmc_regs->config1);
+
+	if (is_pci_agent) {
+		pci_pm_state = (reg_cfg1 & PMCCR1_NEXT_STATE) >>
+		               PMCCR1_NEXT_STATE_SHIFT;
+		curr_state = reg_cfg1 & PMCCR1_CURR_STATE;
+
+		if (curr_state != pci_pm_state) {
+			reg_cfg1 &= ~PMCCR1_CURR_STATE;
+			reg_cfg1 |= pci_pm_state;
+			out_be32(&pmc_regs->config1, reg_cfg1);
+
+			wake_up(&agent_wq);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static irqreturn_t pmc_irq_handler(int irq, void *dev_id)
+{
+	u32 event = in_be32(&pmc_regs->event);
+	int ret = IRQ_NONE;
+
+	if (mpc83xx_change_state())
+		ret = IRQ_HANDLED;
+
+	if (event) {
+		out_be32(&pmc_regs->event, event);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static void mpc83xx_suspend_restore_regs(void)
+{
+	out_be32(&syscr_regs->sicrl, saved_regs.sicrl);
+	out_be32(&syscr_regs->sicrh, saved_regs.sicrh);
+	out_be32(&clock_regs->sccr, saved_regs.sccr);
+}
+
+static void mpc83xx_suspend_save_regs(void)
+{
+	saved_regs.sicrl = in_be32(&syscr_regs->sicrl);
+	saved_regs.sicrh = in_be32(&syscr_regs->sicrh);
+	saved_regs.sccr = in_be32(&clock_regs->sccr);
+}
+
+static int mpc83xx_suspend_enter(suspend_state_t state)
+{
+	int ret = -EAGAIN;
+
+	/* Don't go to sleep if there's a race where pci_pm_state changes
+	 * between the agent thread checking it and the PM code disabling
+	 * interrupts.
+	 */
+	if (wake_from_pci) {
+		if (pci_pm_state != (deep_sleeping ? 3 : 2))
+			goto out;
+
+		out_be32(&pmc_regs->config1,
+		         in_be32(&pmc_regs->config1) | PMCCR1_PME_EN);
+	}
+
+	/* Put the system into low-power mode and the RAM
+	 * into self-refresh mode once the core goes to
+	 * sleep.
+	 */
+
+	out_be32(&pmc_regs->config, PMCCR_SLPEN | PMCCR_DLPEN);
+
+	/* If it has deep sleep (i.e. it's an 831x or compatible),
+	 * disable power to the core upon entering sleep mode.  This will
+	 * require going through the boot firmware upon a wakeup event.
+	 */
+
+	if (deep_sleeping) {
+		mpc83xx_suspend_save_regs();
+
+		out_be32(&pmc_regs->mask, PMCER_ALL);
+
+		out_be32(&pmc_regs->config1,
+		         in_be32(&pmc_regs->config1) | PMCCR1_POWER_OFF);
+
+		enable_kernel_fp();
+
+		mpc83xx_enter_deep_sleep(immrbase);
+
+		out_be32(&pmc_regs->config1,
+		         in_be32(&pmc_regs->config1) & ~PMCCR1_POWER_OFF);
+
+		out_be32(&pmc_regs->mask, PMCER_PMCI);
+
+		mpc83xx_suspend_restore_regs();
+	} else {
+		out_be32(&pmc_regs->mask, PMCER_PMCI);
+
+		mpc6xx_enter_standby();
+	}
+
+	ret = 0;
+
+out:
+	out_be32(&pmc_regs->config1,
+	         in_be32(&pmc_regs->config1) & ~PMCCR1_PME_EN);
+
+	return ret;
+}
+
+static void mpc83xx_suspend_end(void)
+{
+	deep_sleeping = 0;
+}
+
+static int mpc83xx_suspend_valid(suspend_state_t state)
+{
+	return state == PM_SUSPEND_STANDBY || state == PM_SUSPEND_MEM;
+}
+
+static int mpc83xx_suspend_begin(suspend_state_t state)
+{
+	switch (state) {
+		case PM_SUSPEND_STANDBY:
+			deep_sleeping = 0;
+			return 0;
+
+		case PM_SUSPEND_MEM:
+			if (has_deep_sleep)
+				deep_sleeping = 1;
+
+			return 0;
+
+		default:
+			return -EINVAL;
+	}
+}
+
+static int agent_thread_fn(void *data)
+{
+	while (1) {
+		wait_event_interruptible(agent_wq, pci_pm_state >= 2);
+		try_to_freeze();
+
+		if (signal_pending(current) || pci_pm_state < 2)
+			continue;
+
+		/* With a preemptible kernel (or SMP), this could race with
+		 * a userspace-driven suspend request.  It's probably best
+		 * to avoid mixing the two with such a configuration (or
+		 * else fix it by adding a mutex to state_store that we can
+		 * synchronize with).
+		 */
+
+		wake_from_pci = 1;
+
+		pm_suspend(pci_pm_state == 3 ? PM_SUSPEND_MEM :
+		                               PM_SUSPEND_STANDBY);
+
+		wake_from_pci = 0;
+	}
+
+	return 0;
+}
+
+static void mpc83xx_set_agent(void)
+{
+	out_be32(&pmc_regs->config1, PMCCR1_USE_STATE);
+	out_be32(&pmc_regs->mask, PMCER_PMCI);
+
+	kthread_run(agent_thread_fn, NULL, "PCI power mgt");
+}
+
+static int mpc83xx_is_pci_agent(void)
+{
+	struct mpc83xx_rcw __iomem *rcw_regs;
+	int ret;
+
+	rcw_regs = ioremap(get_immrbase() + IMMR_RCW_OFFSET,
+	                   sizeof(struct mpc83xx_rcw));
+
+	if (!rcw_regs)
+		return -ENOMEM;
+
+	ret = !(in_be32(&rcw_regs->rcwhr) & RCW_PCI_HOST);
+
+	iounmap(rcw_regs);
+	return ret;
+}
+
+static const struct platform_suspend_ops mpc83xx_suspend_ops = {
+	.valid = mpc83xx_suspend_valid,
+	.begin = mpc83xx_suspend_begin,
+	.enter = mpc83xx_suspend_enter,
+	.end = mpc83xx_suspend_end,
+};
+
+static struct pmc_type pmc_types[] = {
+	{
+		.has_deep_sleep = 1,
+	},
+	{
+		.has_deep_sleep = 0,
+	}
+};
+
+static const struct of_device_id pmc_match[] = {
+	{
+		.compatible = "fsl,mpc8313-pmc",
+		.data = &pmc_types[0],
+	},
+	{
+		.compatible = "fsl,mpc8349-pmc",
+		.data = &pmc_types[1],
+	},
+	{}
+};
+
+static int pmc_probe(struct platform_device *ofdev)
+{
+	struct device_node *np = ofdev->dev.of_node;
+	struct resource res;
+	const struct pmc_type *type;
+	int ret = 0;
+
+	type = of_device_get_match_data(&ofdev->dev);
+	if (!type)
+		return -EINVAL;
+
+	if (!of_device_is_available(np))
+		return -ENODEV;
+
+	has_deep_sleep = type->has_deep_sleep;
+	immrbase = get_immrbase();
+
+	is_pci_agent = mpc83xx_is_pci_agent();
+	if (is_pci_agent < 0)
+		return is_pci_agent;
+
+	ret = of_address_to_resource(np, 0, &res);
+	if (ret)
+		return -ENODEV;
+
+	pmc_irq = irq_of_parse_and_map(np, 0);
+	if (pmc_irq) {
+		ret = request_irq(pmc_irq, pmc_irq_handler, IRQF_SHARED,
+		                  "pmc", ofdev);
+
+		if (ret)
+			return -EBUSY;
+	}
+
+	pmc_regs = ioremap(res.start, sizeof(*pmc_regs));
+
+	if (!pmc_regs) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = of_address_to_resource(np, 1, &res);
+	if (ret) {
+		ret = -ENODEV;
+		goto out_pmc;
+	}
+
+	clock_regs = ioremap(res.start, sizeof(*clock_regs));
+
+	if (!clock_regs) {
+		ret = -ENOMEM;
+		goto out_pmc;
+	}
+
+	if (has_deep_sleep) {
+		syscr_regs = ioremap(immrbase + IMMR_SYSCR_OFFSET,
+				     sizeof(*syscr_regs));
+		if (!syscr_regs) {
+			ret = -ENOMEM;
+			goto out_syscr;
+		}
+	}
+
+	if (is_pci_agent)
+		mpc83xx_set_agent();
+
+	suspend_set_ops(&mpc83xx_suspend_ops);
+	return 0;
+
+out_syscr:
+	iounmap(clock_regs);
+out_pmc:
+	iounmap(pmc_regs);
+out:
+	if (pmc_irq)
+		free_irq(pmc_irq, ofdev);
+
+	return ret;
+}
+
+static struct platform_driver pmc_driver = {
+	.driver = {
+		.name = "mpc83xx-pmc",
+		.of_match_table = pmc_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = pmc_probe,
+};
+
+builtin_platform_driver(pmc_driver);
diff --git a/arch/powerpc/platforms/83xx/usb_831x.c b/arch/powerpc/platforms/83xx/usb_831x.c
new file mode 100644
index 0000000000..28c24e90f0
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/usb_831x.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale 83xx USB SOC setup code
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc.
+ * Author: Li Yang
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+
+#include <sysdev/fsl_soc.h>
+
+#include "mpc83xx.h"
+
+int __init mpc831x_usb_cfg(void)
+{
+	u32 temp;
+	void __iomem *immap, *usb_regs;
+	struct device_node *np = NULL;
+	struct device_node *immr_node = NULL;
+	const void *prop;
+	struct resource res;
+	int ret = 0;
+#ifdef CONFIG_USB_OTG
+	const void *dr_mode;
+#endif
+
+	np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
+	if (!np)
+		return -ENODEV;
+	prop = of_get_property(np, "phy_type", NULL);
+
+	/* Map IMMR space for pin and clock settings */
+	immap = ioremap(get_immrbase(), 0x1000);
+	if (!immap) {
+		of_node_put(np);
+		return -ENOMEM;
+	}
+
+	/* Configure clock */
+	immr_node = of_get_parent(np);
+	if (immr_node && (of_device_is_compatible(immr_node, "fsl,mpc8315-immr") ||
+			  of_device_is_compatible(immr_node, "fsl,mpc8308-immr")))
+		clrsetbits_be32(immap + MPC83XX_SCCR_OFFS,
+				MPC8315_SCCR_USB_MASK,
+				MPC8315_SCCR_USB_DRCM_01);
+	else
+		clrsetbits_be32(immap + MPC83XX_SCCR_OFFS,
+				MPC83XX_SCCR_USB_MASK,
+				MPC83XX_SCCR_USB_DRCM_11);
+
+	/* Configure pin mux for ULPI.  There is no pin mux for UTMI */
+	if (prop && !strcmp(prop, "ulpi")) {
+		if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) {
+			clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
+					MPC8308_SICRH_USB_MASK,
+					MPC8308_SICRH_USB_ULPI);
+		} else if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr")) {
+			clrsetbits_be32(immap + MPC83XX_SICRL_OFFS,
+					MPC8315_SICRL_USB_MASK,
+					MPC8315_SICRL_USB_ULPI);
+			clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
+					MPC8315_SICRH_USB_MASK,
+					MPC8315_SICRH_USB_ULPI);
+		} else {
+			clrsetbits_be32(immap + MPC83XX_SICRL_OFFS,
+					MPC831X_SICRL_USB_MASK,
+					MPC831X_SICRL_USB_ULPI);
+			clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
+					MPC831X_SICRH_USB_MASK,
+					MPC831X_SICRH_USB_ULPI);
+		}
+	}
+
+	iounmap(immap);
+
+	of_node_put(immr_node);
+
+	/* Map USB SOC space */
+	ret = of_address_to_resource(np, 0, &res);
+	if (ret) {
+		of_node_put(np);
+		return ret;
+	}
+	usb_regs = ioremap(res.start, resource_size(&res));
+
+	/* Using on-chip PHY */
+	if (prop && (!strcmp(prop, "utmi_wide") || !strcmp(prop, "utmi"))) {
+		u32 refsel;
+
+		if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr"))
+			goto out;
+
+		if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr"))
+			refsel = CONTROL_REFSEL_24MHZ;
+		else
+			refsel = CONTROL_REFSEL_48MHZ;
+		/* Set UTMI_PHY_EN and REFSEL */
+		out_be32(usb_regs + FSL_USB2_CONTROL_OFFS,
+			 CONTROL_UTMI_PHY_EN | refsel);
+	/* Using external UPLI PHY */
+	} else if (prop && !strcmp(prop, "ulpi")) {
+		/* Set PHY_CLK_SEL to ULPI */
+		temp = CONTROL_PHY_CLK_SEL_ULPI;
+#ifdef CONFIG_USB_OTG
+		/* Set OTG_PORT */
+		if (!of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) {
+			dr_mode = of_get_property(np, "dr_mode", NULL);
+			if (dr_mode && !strcmp(dr_mode, "otg"))
+				temp |= CONTROL_OTG_PORT;
+		}
+#endif /* CONFIG_USB_OTG */
+		out_be32(usb_regs + FSL_USB2_CONTROL_OFFS, temp);
+	} else {
+		pr_warn("831x USB PHY type not supported\n");
+		ret = -EINVAL;
+	}
+
+out:
+	iounmap(usb_regs);
+	of_node_put(np);
+	return ret;
+}
diff --git a/arch/powerpc/platforms/83xx/usb_834x.c b/arch/powerpc/platforms/83xx/usb_834x.c
new file mode 100644
index 0000000000..3a8d6c662d
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/usb_834x.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale 83xx USB SOC setup code
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc.
+ * Author: Li Yang
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+
+#include <sysdev/fsl_soc.h>
+
+#include "mpc83xx.h"
+
+int __init mpc834x_usb_cfg(void)
+{
+	unsigned long sccr, sicrl, sicrh;
+	void __iomem *immap;
+	struct device_node *np = NULL;
+	int port0_is_dr = 0, port1_is_dr = 0;
+	const void *prop, *dr_mode;
+
+	immap = ioremap(get_immrbase(), 0x1000);
+	if (!immap)
+		return -ENOMEM;
+
+	/* Read registers */
+	/* Note: DR and MPH must use the same clock setting in SCCR */
+	sccr = in_be32(immap + MPC83XX_SCCR_OFFS) & ~MPC83XX_SCCR_USB_MASK;
+	sicrl = in_be32(immap + MPC83XX_SICRL_OFFS) & ~MPC834X_SICRL_USB_MASK;
+	sicrh = in_be32(immap + MPC83XX_SICRH_OFFS) & ~MPC834X_SICRH_USB_UTMI;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
+	if (np) {
+		sccr |= MPC83XX_SCCR_USB_DRCM_11;  /* 1:3 */
+
+		prop = of_get_property(np, "phy_type", NULL);
+		port1_is_dr = 1;
+		if (prop &&
+		    (!strcmp(prop, "utmi") || !strcmp(prop, "utmi_wide"))) {
+			sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1;
+			sicrh |= MPC834X_SICRH_USB_UTMI;
+			port0_is_dr = 1;
+		} else if (prop && !strcmp(prop, "serial")) {
+			dr_mode = of_get_property(np, "dr_mode", NULL);
+			if (dr_mode && !strcmp(dr_mode, "otg")) {
+				sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1;
+				port0_is_dr = 1;
+			} else {
+				sicrl |= MPC834X_SICRL_USB1;
+			}
+		} else if (prop && !strcmp(prop, "ulpi")) {
+			sicrl |= MPC834X_SICRL_USB1;
+		} else {
+			pr_warn("834x USB PHY type not supported\n");
+		}
+		of_node_put(np);
+	}
+	np = of_find_compatible_node(NULL, NULL, "fsl-usb2-mph");
+	if (np) {
+		sccr |= MPC83XX_SCCR_USB_MPHCM_11; /* 1:3 */
+
+		prop = of_get_property(np, "port0", NULL);
+		if (prop) {
+			if (port0_is_dr)
+				pr_warn("834x USB port0 can't be used by both DR and MPH!\n");
+			sicrl &= ~MPC834X_SICRL_USB0;
+		}
+		prop = of_get_property(np, "port1", NULL);
+		if (prop) {
+			if (port1_is_dr)
+				pr_warn("834x USB port1 can't be used by both DR and MPH!\n");
+			sicrl &= ~MPC834X_SICRL_USB1;
+		}
+		of_node_put(np);
+	}
+
+	/* Write back */
+	out_be32(immap + MPC83XX_SCCR_OFFS, sccr);
+	out_be32(immap + MPC83XX_SICRL_OFFS, sicrl);
+	out_be32(immap + MPC83XX_SICRH_OFFS, sicrh);
+
+	iounmap(immap);
+	return 0;
+}
diff --git a/arch/powerpc/platforms/83xx/usb_837x.c b/arch/powerpc/platforms/83xx/usb_837x.c
new file mode 100644
index 0000000000..726935bb6e
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/usb_837x.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale 83xx USB SOC setup code
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc.
+ * Author: Li Yang
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+
+#include <sysdev/fsl_soc.h>
+
+#include "mpc83xx.h"
+
+int __init mpc837x_usb_cfg(void)
+{
+	void __iomem *immap;
+	struct device_node *np = NULL;
+	const void *prop;
+	int ret = 0;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
+	if (!np || !of_device_is_available(np)) {
+		of_node_put(np);
+		return -ENODEV;
+	}
+	prop = of_get_property(np, "phy_type", NULL);
+
+	if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) {
+		pr_warn("837x USB PHY type not supported\n");
+		of_node_put(np);
+		return -EINVAL;
+	}
+
+	/* Map IMMR space for pin and clock settings */
+	immap = ioremap(get_immrbase(), 0x1000);
+	if (!immap) {
+		of_node_put(np);
+		return -ENOMEM;
+	}
+
+	/* Configure clock */
+	clrsetbits_be32(immap + MPC83XX_SCCR_OFFS, MPC837X_SCCR_USB_DRCM_11,
+			MPC837X_SCCR_USB_DRCM_11);
+
+	/* Configure pin mux for ULPI/serial */
+	clrsetbits_be32(immap + MPC83XX_SICRL_OFFS, MPC837X_SICRL_USB_MASK,
+			MPC837X_SICRL_USB_ULPI);
+
+	iounmap(immap);
+	of_node_put(np);
+	return ret;
+}
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
new file mode 100644
index 0000000000..9315a3b69d
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -0,0 +1,291 @@
+# SPDX-License-Identifier: GPL-2.0
+menuconfig FSL_SOC_BOOKE
+	bool "Freescale Book-E Machine Type"
+	depends on PPC_E500
+	select FSL_SOC
+	select PPC_UDBG_16550
+	select MPIC
+	select HAVE_PCI
+	select FSL_PCI if PCI
+	select SERIAL_8250_EXTENDED if SERIAL_8250
+	select SERIAL_8250_SHARE_IRQ if SERIAL_8250
+	select FSL_CORENET_RCPM if PPC_E500MC
+	default y
+
+if FSL_SOC_BOOKE
+
+if PPC32
+
+config BSC9131_RDB
+	bool "Freescale BSC9131RDB"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the Freescale BSC9131RDB board.
+	  The BSC9131 is a heterogeneous SoC containing an e500v2 powerpc and a
+	  StarCore SC3850 DSP
+	  Manufacturer : Freescale Semiconductor, Inc
+
+config C293_PCIE
+	  bool "Freescale C293PCIE"
+	  select DEFAULT_UIMAGE
+	  help
+	  This option enables support for the C293PCIE board
+
+config BSC9132_QDS
+	bool "Freescale BSC9132QDS"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the Freescale BSC9132 QDS board.
+	  BSC9132 is a heterogeneous SoC containing dual e500v2 powerpc cores
+	  and dual StarCore SC3850 DSP cores.
+	  Manufacturer : Freescale Semiconductor, Inc
+
+config MPC8540_ADS
+	bool "Freescale MPC8540 ADS"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the MPC 8540 ADS board
+
+config MPC8560_ADS
+	bool "Freescale MPC8560 ADS"
+	select DEFAULT_UIMAGE
+	select CPM2
+	help
+	  This option enables support for the MPC 8560 ADS board
+
+config MPC85xx_CDS
+	bool "Freescale MPC85xx CDS"
+	select DEFAULT_UIMAGE
+	select PPC_I8259
+	select HAVE_RAPIDIO
+	help
+	  This option enables support for the MPC85xx CDS board
+
+config MPC85xx_MDS
+	bool "Freescale MPC8568 MDS / MPC8569 MDS / P1021 MDS"
+	select DEFAULT_UIMAGE
+	select PHYLIB if NETDEVICES
+	select HAVE_RAPIDIO
+	select SWIOTLB
+	help
+	  This option enables support for the MPC8568 MDS, MPC8569 MDS and P1021 MDS boards
+
+config MPC8536_DS
+	bool "Freescale MPC8536 DS"
+	select DEFAULT_UIMAGE
+	select SWIOTLB
+	help
+	  This option enables support for the MPC8536 DS board
+
+config MPC85xx_DS
+	bool "Freescale MPC8544 DS / MPC8572 DS"
+	select PPC_I8259
+	select DEFAULT_UIMAGE
+	select FSL_ULI1575 if PCI
+	select SWIOTLB
+	help
+	  This option enables support for the MPC8544 DS and MPC8572 DS boards
+
+config MPC85xx_RDB
+	bool "Freescale P102x MBG/UTM/RDB"
+	select PPC_I8259
+	select DEFAULT_UIMAGE
+	select SWIOTLB
+	help
+	  This option enables support for the P1020 MBG PC, P1020 UTM PC,
+	  P1020 RDB PC, P1020 RDB PD, P1020 RDB, P1021 RDB PC, P1024 RDB,
+	  and P1025 RDB boards
+
+config PPC_P2020
+	bool "Freescale P2020"
+	default y if MPC85xx_DS || MPC85xx_RDB
+	select DEFAULT_UIMAGE
+	select SWIOTLB
+	imply PPC_I8259
+	imply FSL_ULI1575 if PCI
+	help
+	  This option enables generic unified support for any board with the
+	  Freescale P2020 processor.
+
+	  For example: P2020 DS board, P2020 RDB board, P2020 RDB PC board or
+	  CZ.NIC Turris 1.x boards.
+
+config P1010_RDB
+	bool "Freescale P1010 RDB"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the P1010 RDB board
+
+	  P1010RDB contains P1010Si, which provides CPU performance up to 800
+	  MHz and 1600 DMIPS, additional functionality and faster interfaces
+	  (DDR3/3L, SATA II, and PCI  Express).
+
+config P1022_DS
+	bool "Freescale P1022 DS"
+	select DEFAULT_UIMAGE
+	select SWIOTLB
+	help
+	  This option enables support for the Freescale P1022DS reference board.
+
+config P1022_RDK
+	bool "Freescale / iVeia P1022 RDK"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the Freescale / iVeia P1022RDK
+	  reference board.
+
+config P1023_RDB
+	bool "Freescale P1023 RDB"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the P1023 RDB board.
+
+config TWR_P102x
+	bool "Freescale TWR-P102x"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the TWR-P1025 board.
+
+config SOCRATES
+	bool "Socrates"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the Socrates board.
+
+config KSI8560
+	bool "Emerson KSI8560"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the Emerson KSI8560 board
+
+config XES_MPC85xx
+	bool "X-ES single-board computer"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the various single-board
+	  computers from Extreme Engineering Solutions (X-ES) based on
+	  Freescale MPC85xx processors.
+	  Manufacturer: Extreme Engineering Solutions, Inc.
+	  URL: <https://www.xes-inc.com/>
+
+config STX_GP3
+	bool "Silicon Turnkey Express GP3"
+	help
+	  This option enables support for the Silicon Turnkey Express GP3
+	  board.
+	select CPM2
+	select DEFAULT_UIMAGE
+
+config TQM8540
+	bool "TQ Components TQM8540"
+	help
+	  This option enables support for the TQ Components TQM8540 board.
+	select DEFAULT_UIMAGE
+	select TQM85xx
+
+config TQM8541
+	bool "TQ Components TQM8541"
+	help
+	  This option enables support for the TQ Components TQM8541 board.
+	select DEFAULT_UIMAGE
+	select TQM85xx
+	select CPM2
+
+config TQM8548
+	bool "TQ Components TQM8548"
+	help
+	  This option enables support for the TQ Components TQM8548 board.
+	select DEFAULT_UIMAGE
+	select TQM85xx
+
+config TQM8555
+	bool "TQ Components TQM8555"
+	help
+	  This option enables support for the TQ Components TQM8555 board.
+	select DEFAULT_UIMAGE
+	select TQM85xx
+	select CPM2
+
+config TQM8560
+	bool "TQ Components TQM8560"
+	help
+	  This option enables support for the TQ Components TQM8560 board.
+	select DEFAULT_UIMAGE
+	select TQM85xx
+	select CPM2
+
+config PPA8548
+	bool "Prodrive PPA8548"
+	help
+	  This option enables support for the Prodrive PPA8548 board.
+	select DEFAULT_UIMAGE
+	select HAVE_RAPIDIO
+
+config GE_IMP3A
+	bool "GE Intelligent Platforms IMP3A"
+	select DEFAULT_UIMAGE
+	select SWIOTLB
+	select MMIO_NVRAM
+	select GPIOLIB
+	select GE_FPGA
+	help
+	  This option enables support for the GE Intelligent Platforms IMP3A
+	  board.
+
+	  This board is a 3U CompactPCI Single Board Computer with a Freescale
+	  P2020 processor.
+
+config SGY_CTS1000
+	tristate "Servergy CTS-1000 support"
+	select GPIOLIB
+	select OF_GPIO
+	depends on CORENET_GENERIC
+	help
+	  Enable this to support functionality in Servergy's CTS-1000 systems.
+
+config MVME2500
+	bool "Artesyn MVME2500"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the Emerson/Artesyn MVME2500 board.
+
+endif # PPC32
+
+config PPC_QEMU_E500
+	bool "QEMU generic e500 platform"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for running as a QEMU guest using
+	  QEMU's generic e500 machine.  This is not required if you're
+	  using a QEMU machine that targets a specific board, such as
+	  mpc8544ds.
+
+	  Unlike most e500 boards that target a specific CPU, this
+	  platform works with any e500-family CPU that QEMU supports.
+	  Thus, you'll need to make sure CONFIG_PPC_E500MC is set or
+	  unset based on the emulated CPU (or actual host CPU in the case
+	  of KVM).
+
+config CORENET_GENERIC
+	bool "Freescale CoreNet Generic"
+	select DEFAULT_UIMAGE
+	select PPC_E500MC
+	select PHYS_64BIT
+	select SWIOTLB
+	select GPIOLIB
+	select GPIO_MPC8XXX
+	select HAVE_RAPIDIO
+	select PPC_EPAPR_HV_PIC
+	help
+	  This option enables support for the FSL CoreNet based boards.
+	  For 32bit kernel, the following boards are supported:
+	    P2041 RDB, P3041 DS, P4080 DS, kmcoge4, and OCA4080
+	  For 64bit kernel, the following boards are supported:
+	    T208x QDS/RDB, T4240 QDS/RDB and B4 QDS
+	  The following boards are supported for both 32bit and 64bit kernel:
+	    P5020 DS, P5040 DS, T102x QDS/RDB, T104x QDS/RDB
+
+endif # FSL_SOC_BOOKE
+
+config TQM85xx
+	bool
diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
new file mode 100644
index 0000000000..43c34f26f1
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the PowerPC 85xx linux kernel.
+#
+obj-$(CONFIG_SMP) += smp.o
+ifneq ($(CONFIG_FSL_CORENET_RCPM),y)
+obj-$(CONFIG_SMP) += mpc85xx_pm_ops.o
+endif
+
+obj-y += common.o
+
+obj-$(CONFIG_BSC9131_RDB) += bsc913x_rdb.o
+obj-$(CONFIG_BSC9132_QDS) += bsc913x_qds.o
+obj-$(CONFIG_C293_PCIE)   += c293pcie.o
+obj-$(CONFIG_MPC8536_DS)  += mpc8536_ds.o
+obj8259-$(CONFIG_PPC_I8259)   += mpc85xx_8259.o
+obj-$(CONFIG_MPC85xx_DS)  += mpc85xx_ds.o $(obj8259-y)
+obj-$(CONFIG_MPC85xx_MDS) += mpc85xx_mds.o
+obj-$(CONFIG_MPC85xx_RDB) += mpc85xx_rdb.o
+obj-$(CONFIG_P1010_RDB)   += p1010rdb.o
+obj-$(CONFIG_P1022_DS)    += p1022_ds.o
+obj-$(CONFIG_P1022_RDK)   += p1022_rdk.o
+obj-$(CONFIG_P1023_RDB)   += p1023_rdb.o
+obj-$(CONFIG_PPC_P2020)   += p2020.o $(obj8259-y)
+obj-$(CONFIG_TWR_P102x)   += twr_p102x.o
+obj-$(CONFIG_CORENET_GENERIC)   += corenet_generic.o
+obj-$(CONFIG_FB_FSL_DIU)	+= t1042rdb_diu.o
+obj-$(CONFIG_STX_GP3)	  += stx_gp3.o
+obj-$(CONFIG_TQM85xx)	  += tqm85xx.o
+obj-$(CONFIG_PPA8548)     += ppa8548.o
+obj-$(CONFIG_SOCRATES)    += socrates.o socrates_fpga_pic.o
+obj-$(CONFIG_KSI8560)	  += ksi8560.o
+obj-$(CONFIG_XES_MPC85xx) += xes_mpc85xx.o
+obj-$(CONFIG_GE_IMP3A)	  += ge_imp3a.o
+obj-$(CONFIG_PPC_QEMU_E500) += qemu_e500.o
+obj-$(CONFIG_SGY_CTS1000) += sgy_cts1000.o
+obj-$(CONFIG_MVME2500)	  += mvme2500.o
diff --git a/arch/powerpc/platforms/85xx/bsc913x_qds.c b/arch/powerpc/platforms/85xx/bsc913x_qds.c
new file mode 100644
index 0000000000..2eb62bff86
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/bsc913x_qds.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * BSC913xQDS Board Setup
+ *
+ * Author:
+ *   Harninder Rai <harninder.rai@freescale.com>
+ *   Priyanka Jain <Priyanka.Jain@freescale.com>
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ */
+
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <asm/mpic.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include <asm/udbg.h>
+
+#include "mpc85xx.h"
+#include "smp.h"
+
+void __init bsc913x_qds_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+	  MPIC_SINGLE_DEST_CPU,
+	  0, 256, " OpenPIC  ");
+
+	if (!mpic)
+		pr_err("bsc913x: Failed to allocate MPIC structure\n");
+	else
+		mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init bsc913x_qds_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("bsc913x_qds_setup_arch()", 0);
+
+#if defined(CONFIG_SMP)
+	mpc85xx_smp_init();
+#endif
+
+	fsl_pci_assign_primary();
+
+	pr_info("bsc913x board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(bsc9132_qds, mpc85xx_common_publish_devices);
+
+define_machine(bsc9132_qds) {
+	.name			= "BSC9132 QDS",
+	.compatible		= "fsl,bsc9132qds",
+	.setup_arch		= bsc913x_qds_setup_arch,
+	.init_IRQ		= bsc913x_qds_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/bsc913x_rdb.c b/arch/powerpc/platforms/85xx/bsc913x_rdb.c
new file mode 100644
index 0000000000..161f006cb3
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/bsc913x_rdb.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * BSC913xRDB Board Setup
+ *
+ * Author: Priyanka Jain <Priyanka.Jain@freescale.com>
+ *
+ * Copyright 2011-2012 Freescale Semiconductor Inc.
+ */
+
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <asm/mpic.h>
+#include <sysdev/fsl_soc.h>
+#include <asm/udbg.h>
+
+#include "mpc85xx.h"
+
+void __init bsc913x_rdb_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+	  MPIC_SINGLE_DEST_CPU,
+	  0, 256, " OpenPIC  ");
+
+	if (!mpic)
+		pr_err("bsc913x: Failed to allocate MPIC structure\n");
+	else
+		mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init bsc913x_rdb_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("bsc913x_rdb_setup_arch()", 0);
+
+	pr_info("bsc913x board from Freescale Semiconductor\n");
+}
+
+machine_device_initcall(bsc9131_rdb, mpc85xx_common_publish_devices);
+
+define_machine(bsc9131_rdb) {
+	.name			= "BSC9131 RDB",
+	.compatible		= "fsl,bsc9131rdb",
+	.setup_arch		= bsc913x_rdb_setup_arch,
+	.init_IRQ		= bsc913x_rdb_pic_init,
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/c293pcie.c b/arch/powerpc/platforms/85xx/c293pcie.c
new file mode 100644
index 0000000000..7a63a3ad5e
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/c293pcie.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * C293PCIE Board Setup
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+static void __init c293_pcie_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+	  MPIC_SINGLE_DEST_CPU, 0, 256, " OpenPIC  ");
+
+	BUG_ON(mpic == NULL);
+
+	mpic_init(mpic);
+}
+
+
+/*
+ * Setup the architecture
+ */
+static void __init c293_pcie_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("c293_pcie_setup_arch()", 0);
+
+	fsl_pci_assign_primary();
+
+	printk(KERN_INFO "C293 PCIE board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(c293_pcie, mpc85xx_common_publish_devices);
+
+define_machine(c293_pcie) {
+	.name			= "C293 PCIE",
+	.compatible		= "fsl,C293PCIE",
+	.setup_arch		= c293_pcie_setup_arch,
+	.init_IRQ		= c293_pcie_pic_init,
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
new file mode 100644
index 0000000000..7578111555
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Routines common to most mpc85xx-based boards.
+ */
+
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+
+#include <asm/fsl_pm.h>
+#include <soc/fsl/qe/qe.h>
+#include <sysdev/cpm2_pic.h>
+
+#include "mpc85xx.h"
+
+const struct fsl_pm_ops *qoriq_pm_ops;
+
+static const struct of_device_id mpc85xx_common_ids[] __initconst = {
+	{ .type = "soc", },
+	{ .compatible = "soc", },
+	{ .compatible = "simple-bus", },
+	{ .name = "cpm", },
+	{ .name = "localbus", },
+	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,qe", },
+	{ .compatible = "fsl,cpm2", },
+	{ .compatible = "fsl,srio", },
+	/* So that the DMA channel nodes can be probed individually: */
+	{ .compatible = "fsl,eloplus-dma", },
+	/* For the PMC driver */
+	{ .compatible = "fsl,mpc8548-guts", },
+	/* Probably unnecessary? */
+	{ .compatible = "gpio-leds", },
+	/* For all PCI controllers */
+	{ .compatible = "fsl,mpc8540-pci", },
+	{ .compatible = "fsl,mpc8548-pcie", },
+	{ .compatible = "fsl,p1022-pcie", },
+	{ .compatible = "fsl,p1010-pcie", },
+	{ .compatible = "fsl,p1023-pcie", },
+	{ .compatible = "fsl,p4080-pcie", },
+	{ .compatible = "fsl,qoriq-pcie-v2.4", },
+	{ .compatible = "fsl,qoriq-pcie-v2.3", },
+	{ .compatible = "fsl,qoriq-pcie-v2.2", },
+	{ .compatible = "fsl,fman", },
+	{},
+};
+
+int __init mpc85xx_common_publish_devices(void)
+{
+	return of_platform_bus_probe(NULL, mpc85xx_common_ids, NULL);
+}
+#ifdef CONFIG_CPM2
+static void cpm2_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	int cascade_irq;
+
+	while ((cascade_irq = cpm2_get_irq()) >= 0)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+
+void __init mpc85xx_cpm2_pic_init(void)
+{
+	struct device_node *np;
+	int irq;
+
+	/* Setup CPM2 PIC */
+	np = of_find_compatible_node(NULL, NULL, "fsl,cpm2-pic");
+	if (np == NULL) {
+		printk(KERN_ERR "PIC init: can not find fsl,cpm2-pic node\n");
+		return;
+	}
+	irq = irq_of_parse_and_map(np, 0);
+	if (!irq) {
+		of_node_put(np);
+		printk(KERN_ERR "PIC init: got no IRQ for cpm cascade\n");
+		return;
+	}
+
+	cpm2_pic_init(np);
+	of_node_put(np);
+	irq_set_chained_handler(irq, cpm2_cascade);
+}
+#endif
+
+#ifdef CONFIG_QUICC_ENGINE
+void __init mpc85xx_qe_par_io_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_node_by_name(NULL, "par_io");
+	if (np) {
+		struct device_node *ucc;
+
+		par_io_init(np);
+		of_node_put(np);
+
+		for_each_node_by_name(ucc, "ucc")
+			par_io_of_config(ucc);
+
+	}
+}
+#endif
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
new file mode 100644
index 0000000000..645fcca77c
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Corenet based SoC DS Setup
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2009-2011 Freescale Semiconductor Inc.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/pgtable.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/ehv_pic.h>
+#include <asm/swiotlb.h>
+
+#include <linux/of_platform.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+#include "mpc85xx.h"
+
+static void __init corenet_gen_pic_init(void)
+{
+	struct mpic *mpic;
+	unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU |
+		MPIC_NO_RESET;
+
+	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) && !IS_ENABLED(CONFIG_KEXEC_CORE))
+		flags |= MPIC_ENABLE_COREINT;
+
+	mpic = mpic_alloc(NULL, 0, flags, 0, 512, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+
+	mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init corenet_gen_setup_arch(void)
+{
+	mpc85xx_smp_init();
+
+	swiotlb_detect_4g();
+
+	pr_info("%s board\n", ppc_md.name);
+}
+
+static const struct of_device_id of_device_ids[] = {
+	{
+		.compatible	= "simple-bus"
+	},
+	{
+		.compatible	= "mdio-mux-gpio"
+	},
+	{
+		.compatible	= "fsl,fpga-ngpixis"
+	},
+	{
+		.compatible	= "fsl,fpga-qixis"
+	},
+	{
+		.compatible	= "fsl,srio",
+	},
+	{
+		.compatible	= "fsl,p4080-pcie",
+	},
+	{
+		.compatible	= "fsl,qoriq-pcie-v2.2",
+	},
+	{
+		.compatible	= "fsl,qoriq-pcie-v2.3",
+	},
+	{
+		.compatible	= "fsl,qoriq-pcie-v2.4",
+	},
+	{
+		.compatible	= "fsl,qoriq-pcie-v3.0",
+	},
+	{
+		.compatible	= "fsl,qe",
+	},
+	/* The following two are for the Freescale hypervisor */
+	{
+		.name		= "hypervisor",
+	},
+	{
+		.name		= "handles",
+	},
+	{}
+};
+
+static int __init corenet_gen_publish_devices(void)
+{
+	return of_platform_bus_probe(NULL, of_device_ids, NULL);
+}
+machine_arch_initcall(corenet_generic, corenet_gen_publish_devices);
+
+static const char * const boards[] __initconst = {
+	"fsl,P2041RDB",
+	"fsl,P3041DS",
+	"fsl,OCA4080",
+	"fsl,P4080DS",
+	"fsl,P5020DS",
+	"fsl,P5040DS",
+	"fsl,T2080QDS",
+	"fsl,T2080RDB",
+	"fsl,T2081QDS",
+	"fsl,T4240QDS",
+	"fsl,T4240RDB",
+	"fsl,B4860QDS",
+	"fsl,B4420QDS",
+	"fsl,B4220QDS",
+	"fsl,T1023RDB",
+	"fsl,T1024QDS",
+	"fsl,T1024RDB",
+	"fsl,T1040D4RDB",
+	"fsl,T1042D4RDB",
+	"fsl,T1040QDS",
+	"fsl,T1042QDS",
+	"fsl,T1040RDB",
+	"fsl,T1042RDB",
+	"fsl,T1042RDB_PI",
+	"keymile,kmcent2",
+	"keymile,kmcoge4",
+	"varisys,CYRUS",
+	NULL
+};
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init corenet_generic_probe(void)
+{
+	char hv_compat[24];
+	int i;
+#ifdef CONFIG_SMP
+	extern struct smp_ops_t smp_85xx_ops;
+#endif
+
+	if (of_device_compatible_match(of_root, boards))
+		return 1;
+
+	/* Check if we're running under the Freescale hypervisor */
+	for (i = 0; boards[i]; i++) {
+		snprintf(hv_compat, sizeof(hv_compat), "%s-hv", boards[i]);
+		if (of_machine_is_compatible(hv_compat)) {
+			ppc_md.init_IRQ = ehv_pic_init;
+
+			ppc_md.get_irq = ehv_pic_get_irq;
+			ppc_md.restart = fsl_hv_restart;
+			pm_power_off = fsl_hv_halt;
+			ppc_md.halt = fsl_hv_halt;
+#ifdef CONFIG_SMP
+			/*
+			 * Disable the timebase sync operations because we
+			 * can't write to the timebase registers under the
+			 * hypervisor.
+			 */
+			smp_85xx_ops.give_timebase = NULL;
+			smp_85xx_ops.take_timebase = NULL;
+#endif
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+define_machine(corenet_generic) {
+	.name			= "CoreNet Generic",
+	.probe			= corenet_generic_probe,
+	.setup_arch		= corenet_gen_setup_arch,
+	.init_IRQ		= corenet_gen_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+/*
+ * Core reset may cause issues if using the proxy mode of MPIC.
+ * So, use the mixed mode of MPIC if enabling CPU hotplug.
+ *
+ * Likewise, problems have been seen with kexec when coreint is enabled.
+ */
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC_CORE)
+	.get_irq		= mpic_get_irq,
+#else
+	.get_irq		= mpic_get_coreint_irq,
+#endif
+	.progress		= udbg_progress,
+	.power_save		= e500_idle,
+};
diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c
new file mode 100644
index 0000000000..9c3b44a195
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/ge_imp3a.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GE IMP3A Board Setup
+ *
+ * Author Martyn Welch <martyn.welch@ge.com>
+ *
+ * Copyright 2010 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * Based on: mpc85xx_ds.c (MPC85xx DS Board Setup)
+ * Copyright 2007 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+#include <asm/nvram.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+#include <sysdev/ge/ge_pic.h>
+
+void __iomem *imp3a_regs;
+
+void __init ge_imp3a_pic_init(void)
+{
+	struct mpic *mpic;
+	struct device_node *np;
+	struct device_node *cascade_node = NULL;
+
+	if (of_machine_is_compatible("fsl,MPC8572DS-CAMP")) {
+		mpic = mpic_alloc(NULL, 0,
+			MPIC_NO_RESET |
+			MPIC_BIG_ENDIAN |
+			MPIC_SINGLE_DEST_CPU,
+			0, 256, " OpenPIC  ");
+	} else {
+		mpic = mpic_alloc(NULL, 0,
+			  MPIC_BIG_ENDIAN |
+			  MPIC_SINGLE_DEST_CPU,
+			0, 256, " OpenPIC  ");
+	}
+
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+	/*
+	 * There is a simple interrupt handler in the main FPGA, this needs
+	 * to be cascaded into the MPIC
+	 */
+	for_each_node_by_type(np, "interrupt-controller")
+		if (of_device_is_compatible(np, "gef,fpga-pic-1.00")) {
+			cascade_node = np;
+			break;
+		}
+
+	if (cascade_node == NULL) {
+		printk(KERN_WARNING "IMP3A: No FPGA PIC\n");
+		return;
+	}
+
+	gef_pic_init(cascade_node);
+	of_node_put(cascade_node);
+}
+
+static void __init ge_imp3a_pci_assign_primary(void)
+{
+#ifdef CONFIG_PCI
+	struct device_node *np;
+	struct resource rsrc;
+
+	for_each_node_by_type(np, "pci") {
+		if (of_device_is_compatible(np, "fsl,mpc8540-pci") ||
+		    of_device_is_compatible(np, "fsl,mpc8548-pcie") ||
+		    of_device_is_compatible(np, "fsl,p2020-pcie")) {
+			of_address_to_resource(np, 0, &rsrc);
+			if ((rsrc.start & 0xfffff) == 0x9000) {
+				of_node_put(fsl_pci_primary);
+				fsl_pci_primary = of_node_get(np);
+			}
+		}
+	}
+#endif
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init ge_imp3a_setup_arch(void)
+{
+	struct device_node *regs;
+
+	if (ppc_md.progress)
+		ppc_md.progress("ge_imp3a_setup_arch()", 0);
+
+	mpc85xx_smp_init();
+
+	ge_imp3a_pci_assign_primary();
+
+	swiotlb_detect_4g();
+
+	/* Remap basic board registers */
+	regs = of_find_compatible_node(NULL, NULL, "ge,imp3a-fpga-regs");
+	if (regs) {
+		imp3a_regs = of_iomap(regs, 0);
+		if (imp3a_regs == NULL)
+			printk(KERN_WARNING "Unable to map board registers\n");
+		of_node_put(regs);
+	}
+
+#if defined(CONFIG_MMIO_NVRAM)
+	mmio_nvram_init();
+#endif
+
+	printk(KERN_INFO "GE Intelligent Platforms IMP3A 3U cPCI SBC\n");
+}
+
+/* Return the PCB revision */
+static unsigned int ge_imp3a_get_pcb_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread16(imp3a_regs);
+	return (reg >> 8) & 0xff;
+}
+
+/* Return the board (software) revision */
+static unsigned int ge_imp3a_get_board_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread16(imp3a_regs + 0x2);
+	return reg & 0xff;
+}
+
+/* Return the FPGA revision */
+static unsigned int ge_imp3a_get_fpga_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread16(imp3a_regs + 0x2);
+	return (reg >> 8) & 0xff;
+}
+
+/* Return compactPCI Geographical Address */
+static unsigned int ge_imp3a_get_cpci_geo_addr(void)
+{
+	unsigned int reg;
+
+	reg = ioread16(imp3a_regs + 0x6);
+	return (reg & 0x0f00) >> 8;
+}
+
+/* Return compactPCI System Controller Status */
+static unsigned int ge_imp3a_get_cpci_is_syscon(void)
+{
+	unsigned int reg;
+
+	reg = ioread16(imp3a_regs + 0x6);
+	return reg & (1 << 12);
+}
+
+static void ge_imp3a_show_cpuinfo(struct seq_file *m)
+{
+	seq_printf(m, "Vendor\t\t: GE Intelligent Platforms\n");
+
+	seq_printf(m, "Revision\t: %u%c\n", ge_imp3a_get_pcb_rev(),
+		('A' + ge_imp3a_get_board_rev() - 1));
+
+	seq_printf(m, "FPGA Revision\t: %u\n", ge_imp3a_get_fpga_rev());
+
+	seq_printf(m, "cPCI geo. addr\t: %u\n", ge_imp3a_get_cpci_geo_addr());
+
+	seq_printf(m, "cPCI syscon\t: %s\n",
+		ge_imp3a_get_cpci_is_syscon() ? "yes" : "no");
+}
+
+machine_arch_initcall(ge_imp3a, mpc85xx_common_publish_devices);
+
+define_machine(ge_imp3a) {
+	.name			= "GE_IMP3A",
+	.compatible		= "ge,IMP3A",
+	.setup_arch		= ge_imp3a_setup_arch,
+	.init_IRQ		= ge_imp3a_pic_init,
+	.show_cpuinfo		= ge_imp3a_show_cpuinfo,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/ksi8560.c b/arch/powerpc/platforms/85xx/ksi8560.c
new file mode 100644
index 0000000000..1b6326a4b0
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/ksi8560.c
@@ -0,0 +1,184 @@
+/*
+ * Board setup routines for the Emerson KSI8560
+ *
+ * Author: Alexandr Smirnov <asmirnov@ru.mvista.com>
+ *
+ * Based on mpc85xx_ads.c maintained by Kumar Gala
+ *
+ * 2008 (c) MontaVista, Software, Inc.  This file is licensed under
+ * the terms of the GNU General Public License version 2.  This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ *
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include <asm/cpm2.h>
+#include <sysdev/cpm2_pic.h>
+
+#include "mpc85xx.h"
+
+#define KSI8560_CPLD_HVR		0x04 /* Hardware Version Register */
+#define KSI8560_CPLD_PVR		0x08 /* PLD Version Register */
+#define KSI8560_CPLD_RCR1		0x30 /* Reset Command Register 1 */
+
+#define KSI8560_CPLD_RCR1_CPUHR		0x80 /* CPU Hard Reset */
+
+static void __iomem *cpld_base = NULL;
+
+static void __noreturn machine_restart(char *cmd)
+{
+	if (cpld_base)
+		out_8(cpld_base + KSI8560_CPLD_RCR1, KSI8560_CPLD_RCR1_CPUHR);
+	else
+		printk(KERN_ERR "Can't find CPLD base, hang forever\n");
+
+	for (;;);
+}
+
+static void __init ksi8560_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+
+	mpc85xx_cpm2_pic_init();
+}
+
+#ifdef CONFIG_CPM2
+/*
+ * Setup I/O ports
+ */
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static struct cpm_pin __initdata ksi8560_pins[] = {
+	/* SCC1 */
+	{3, 29, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 30, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{3, 31, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* SCC2 */
+	{3, 26, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 27, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* FCC1 */
+	{0, 14, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 18, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 19, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 26, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 27, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 28, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{0, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{0, 30, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 31, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{2, 23, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* CLK9 */
+	{2, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* CLK10 */
+
+};
+
+static void __init init_ioports(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ksi8560_pins); i++) {
+		struct cpm_pin *pin = &ksi8560_pins[i];
+		cpm2_set_pin(pin->port, pin->pin, pin->flags);
+	}
+
+	cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK9, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK10, CPM_CLK_TX);
+}
+#endif
+
+/*
+ * Setup the architecture
+ */
+static void __init ksi8560_setup_arch(void)
+{
+	struct device_node *cpld;
+
+	cpld = of_find_compatible_node(NULL, NULL, "emerson,KSI8560-cpld");
+	if (cpld)
+		cpld_base = of_iomap(cpld, 0);
+	else
+		printk(KERN_ERR "Can't find CPLD in device tree\n");
+
+	of_node_put(cpld);
+
+	if (ppc_md.progress)
+		ppc_md.progress("ksi8560_setup_arch()", 0);
+
+#ifdef CONFIG_CPM2
+	cpm2_reset();
+	init_ioports();
+#endif
+}
+
+static void ksi8560_show_cpuinfo(struct seq_file *m)
+{
+	uint pvid, svid, phid1;
+
+	pvid = mfspr(SPRN_PVR);
+	svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: Emerson Network Power\n");
+	seq_printf(m, "Board\t\t: KSI8560\n");
+
+	if (cpld_base) {
+		seq_printf(m, "Hardware rev\t: %d\n",
+					in_8(cpld_base + KSI8560_CPLD_HVR));
+		seq_printf(m, "CPLD rev\t: %d\n",
+					in_8(cpld_base + KSI8560_CPLD_PVR));
+	} else
+		seq_printf(m, "Unknown Hardware and CPLD revs\n");
+
+	seq_printf(m, "PVR\t\t: 0x%x\n", pvid);
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+	/* Display cpu Pll setting */
+	phid1 = mfspr(SPRN_HID1);
+	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
+}
+
+machine_device_initcall(ksi8560, mpc85xx_common_publish_devices);
+
+define_machine(ksi8560) {
+	.name			= "KSI8560",
+	.compatible		= "emerson,KSI8560",
+	.setup_arch		= ksi8560_setup_arch,
+	.init_IRQ		= ksi8560_pic_init,
+	.show_cpuinfo		= ksi8560_show_cpuinfo,
+	.get_irq		= mpic_get_irq,
+	.restart		= machine_restart,
+};
diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c
new file mode 100644
index 0000000000..e966b2ad8e
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8536 DS Board Setup
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+void __init mpc8536_ds_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc8536_ds_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc8536_ds_setup_arch()", 0);
+
+	fsl_pci_assign_primary();
+
+	swiotlb_detect_4g();
+
+	printk("MPC8536 DS board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(mpc8536_ds, mpc85xx_common_publish_devices);
+
+define_machine(mpc8536_ds) {
+	.name			= "MPC8536 DS",
+	.compatible		= "fsl,mpc8536ds",
+	.setup_arch		= mpc8536_ds_setup_arch,
+	.init_IRQ		= mpc8536_ds_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/mpc85xx.h b/arch/powerpc/platforms/85xx/mpc85xx.h
new file mode 100644
index 0000000000..c764d7551e
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef MPC85xx_H
+#define MPC85xx_H
+extern int mpc85xx_common_publish_devices(void);
+
+#ifdef CONFIG_CPM2
+extern void mpc85xx_cpm2_pic_init(void);
+#else
+static inline void __init mpc85xx_cpm2_pic_init(void) {}
+#endif /* CONFIG_CPM2 */
+
+#ifdef CONFIG_QUICC_ENGINE
+extern void mpc85xx_qe_par_io_init(void);
+#else
+static inline void __init mpc85xx_qe_par_io_init(void) {}
+#endif
+
+#ifdef CONFIG_PPC_I8259
+void __init mpc85xx_8259_init(void);
+#else
+static inline void __init mpc85xx_8259_init(void) {}
+#endif
+
+#endif
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_8259.c b/arch/powerpc/platforms/85xx/mpc85xx_8259.c
new file mode 100644
index 0000000000..cb00d596ad
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx_8259.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC85xx 8259 functions for DS Board Setup
+ *
+ * Author Xianghua Xiao (x.xiao@freescale.com)
+ * Roy Zang <tie-fei.zang@freescale.com>
+ *      - Add PCI/PCI Express support
+ * Copyright 2007 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+
+#include <asm/mpic.h>
+#include <asm/i8259.h>
+
+#include "mpc85xx.h"
+
+static void mpc85xx_8259_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int cascade_irq = i8259_irq();
+
+	if (cascade_irq)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+void __init mpc85xx_8259_init(void)
+{
+	struct device_node *np;
+	struct device_node *cascade_node = NULL;
+	int cascade_irq;
+
+	/* Initialize the i8259 controller */
+	for_each_node_by_type(np, "interrupt-controller") {
+		if (of_device_is_compatible(np, "chrp,iic")) {
+			cascade_node = np;
+			break;
+		}
+	}
+
+	if (cascade_node == NULL) {
+		pr_debug("i8259: Could not find i8259 PIC\n");
+		return;
+	}
+
+	cascade_irq = irq_of_parse_and_map(cascade_node, 0);
+	if (!cascade_irq) {
+		pr_err("i8259: Failed to map cascade interrupt\n");
+		return;
+	}
+
+	pr_debug("i8259: cascade mapped to irq %d\n", cascade_irq);
+
+	i8259_init(cascade_node, 0);
+	of_node_put(cascade_node);
+
+	irq_set_chained_handler(cascade_irq, mpc85xx_8259_cascade);
+}
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
new file mode 100644
index 0000000000..2856148321
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC85xx DS Board Setup
+ *
+ * Author Xianghua Xiao (x.xiao@freescale.com)
+ * Roy Zang <tie-fei.zang@freescale.com>
+ * 	- Add PCI/PCI Exprees support
+ * Copyright 2007 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/i8259.h>
+#include <asm/swiotlb.h>
+#include <asm/ppc-pci.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+static void __init mpc85xx_ds_pic_init(void)
+{
+	struct mpic *mpic;
+	int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU;
+
+	if (of_machine_is_compatible("fsl,MPC8572DS-CAMP"))
+		flags |= MPIC_NO_RESET;
+
+	mpic = mpic_alloc(NULL, 0, flags, 0, 256, " OpenPIC  ");
+
+	if (WARN_ON(!mpic))
+		return;
+
+	mpic_init(mpic);
+
+	mpc85xx_8259_init();
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc85xx_ds_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc85xx_ds_setup_arch()", 0);
+
+	swiotlb_detect_4g();
+	fsl_pci_assign_primary();
+	uli_init();
+	mpc85xx_smp_init();
+
+	pr_info("MPC85xx DS board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(mpc8544_ds, mpc85xx_common_publish_devices);
+machine_arch_initcall(mpc8572_ds, mpc85xx_common_publish_devices);
+
+define_machine(mpc8544_ds) {
+	.name			= "MPC8544 DS",
+	.compatible		= "MPC8544DS",
+	.setup_arch		= mpc85xx_ds_setup_arch,
+	.init_IRQ		= mpc85xx_ds_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
+
+define_machine(mpc8572_ds) {
+	.name			= "MPC8572 DS",
+	.compatible		= "fsl,MPC8572DS",
+	.setup_arch		= mpc85xx_ds_setup_arch,
+	.init_IRQ		= mpc85xx_ds_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
new file mode 100644
index 0000000000..c19490cf63
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -0,0 +1,372 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2006-2010, 2012-2013 Freescale Semiconductor, Inc.
+ * All rights reserved.
+ *
+ * Author: Andy Fleming <afleming@freescale.com>
+ *
+ * Based on 83xx/mpc8360e_pb.c by:
+ *	   Li Yang <LeoLi@freescale.com>
+ *	   Yin Olivia <Hong-hua.Yin@freescale.com>
+ *
+ * Description:
+ * MPC85xx MDS board specific routines.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/initrd.h>
+#include <linux/fsl_devices.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/phy.h>
+#include <linux/memblock.h>
+#include <linux/fsl/guts.h>
+
+#include <linux/atomic.h>
+#include <asm/time.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/irq.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include <soc/fsl/qe/qe.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+#if IS_BUILTIN(CONFIG_PHYLIB)
+
+#define MV88E1111_SCR	0x10
+#define MV88E1111_SCR_125CLK	0x0010
+static int mpc8568_fixup_125_clock(struct phy_device *phydev)
+{
+	int scr;
+	int err;
+
+	/* Workaround for the 125 CLK Toggle */
+	scr = phy_read(phydev, MV88E1111_SCR);
+
+	if (scr < 0)
+		return scr;
+
+	err = phy_write(phydev, MV88E1111_SCR, scr & ~(MV88E1111_SCR_125CLK));
+
+	if (err)
+		return err;
+
+	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+
+	if (err)
+		return err;
+
+	scr = phy_read(phydev, MV88E1111_SCR);
+
+	if (scr < 0)
+		return scr;
+
+	err = phy_write(phydev, MV88E1111_SCR, scr | 0x0008);
+
+	return err;
+}
+
+static int mpc8568_mds_phy_fixups(struct phy_device *phydev)
+{
+	int temp;
+	int err;
+
+	/* Errata */
+	err = phy_write(phydev,29, 0x0006);
+
+	if (err)
+		return err;
+
+	temp = phy_read(phydev, 30);
+
+	if (temp < 0)
+		return temp;
+
+	temp = (temp & (~0x8000)) | 0x4000;
+	err = phy_write(phydev,30, temp);
+
+	if (err)
+		return err;
+
+	err = phy_write(phydev,29, 0x000a);
+
+	if (err)
+		return err;
+
+	temp = phy_read(phydev, 30);
+
+	if (temp < 0)
+		return temp;
+
+	temp = phy_read(phydev, 30);
+
+	if (temp < 0)
+		return temp;
+
+	temp &= ~0x0020;
+
+	err = phy_write(phydev,30,temp);
+
+	if (err)
+		return err;
+
+	/* Disable automatic MDI/MDIX selection */
+	temp = phy_read(phydev, 16);
+
+	if (temp < 0)
+		return temp;
+
+	temp &= ~0x0060;
+	err = phy_write(phydev,16,temp);
+
+	return err;
+}
+
+#endif
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+#ifdef CONFIG_QUICC_ENGINE
+static void __init mpc85xx_mds_reset_ucc_phys(void)
+{
+	struct device_node *np;
+	static u8 __iomem *bcsr_regs;
+
+	/* Map BCSR area */
+	np = of_find_node_by_name(NULL, "bcsr");
+	if (!np)
+		return;
+
+	bcsr_regs = of_iomap(np, 0);
+	of_node_put(np);
+	if (!bcsr_regs)
+		return;
+
+	if (machine_is(mpc8568_mds)) {
+#define BCSR_UCC1_GETH_EN	(0x1 << 7)
+#define BCSR_UCC2_GETH_EN	(0x1 << 7)
+#define BCSR_UCC1_MODE_MSK	(0x3 << 4)
+#define BCSR_UCC2_MODE_MSK	(0x3 << 0)
+
+		/* Turn off UCC1 & UCC2 */
+		clrbits8(&bcsr_regs[8], BCSR_UCC1_GETH_EN);
+		clrbits8(&bcsr_regs[9], BCSR_UCC2_GETH_EN);
+
+		/* Mode is RGMII, all bits clear */
+		clrbits8(&bcsr_regs[11], BCSR_UCC1_MODE_MSK |
+					 BCSR_UCC2_MODE_MSK);
+
+		/* Turn UCC1 & UCC2 on */
+		setbits8(&bcsr_regs[8], BCSR_UCC1_GETH_EN);
+		setbits8(&bcsr_regs[9], BCSR_UCC2_GETH_EN);
+	} else if (machine_is(mpc8569_mds)) {
+#define BCSR7_UCC12_GETHnRST	(0x1 << 2)
+#define BCSR8_UEM_MARVELL_RST	(0x1 << 1)
+#define BCSR_UCC_RGMII		(0x1 << 6)
+#define BCSR_UCC_RTBI		(0x1 << 5)
+		/*
+		 * U-Boot mangles interrupt polarity for Marvell PHYs,
+		 * so reset built-in and UEM Marvell PHYs, this puts
+		 * the PHYs into their normal state.
+		 */
+		clrbits8(&bcsr_regs[7], BCSR7_UCC12_GETHnRST);
+		setbits8(&bcsr_regs[8], BCSR8_UEM_MARVELL_RST);
+
+		setbits8(&bcsr_regs[7], BCSR7_UCC12_GETHnRST);
+		clrbits8(&bcsr_regs[8], BCSR8_UEM_MARVELL_RST);
+
+		for_each_compatible_node(np, "network", "ucc_geth") {
+			const unsigned int *prop;
+			int ucc_num;
+
+			prop = of_get_property(np, "cell-index", NULL);
+			if (prop == NULL)
+				continue;
+
+			ucc_num = *prop - 1;
+
+			prop = of_get_property(np, "phy-connection-type", NULL);
+			if (prop == NULL)
+				continue;
+
+			if (strcmp("rtbi", (const char *)prop) == 0)
+				clrsetbits_8(&bcsr_regs[7 + ucc_num],
+					BCSR_UCC_RGMII, BCSR_UCC_RTBI);
+		}
+	} else if (machine_is(p1021_mds)) {
+#define BCSR11_ENET_MICRST     (0x1 << 5)
+		/* Reset Micrel PHY */
+		clrbits8(&bcsr_regs[11], BCSR11_ENET_MICRST);
+		setbits8(&bcsr_regs[11], BCSR11_ENET_MICRST);
+	}
+
+	iounmap(bcsr_regs);
+}
+
+static void __init mpc85xx_mds_qe_init(void)
+{
+	struct device_node *np;
+
+	mpc85xx_qe_par_io_init();
+	mpc85xx_mds_reset_ucc_phys();
+
+	if (machine_is(p1021_mds)) {
+
+		struct ccsr_guts __iomem *guts;
+
+		np = of_find_node_by_name(NULL, "global-utilities");
+		if (np) {
+			guts = of_iomap(np, 0);
+			if (!guts)
+				pr_err("mpc85xx-rdb: could not map global utilities register\n");
+			else{
+			/* P1021 has pins muxed for QE and other functions. To
+			 * enable QE UEC mode, we need to set bit QE0 for UCC1
+			 * in Eth mode, QE0 and QE3 for UCC5 in Eth mode, QE9
+			 * and QE12 for QE MII management signals in PMUXCR
+			 * register.
+			 */
+				setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) |
+						  MPC85xx_PMUXCR_QE(3) |
+						  MPC85xx_PMUXCR_QE(9) |
+						  MPC85xx_PMUXCR_QE(12));
+				iounmap(guts);
+			}
+			of_node_put(np);
+		}
+
+	}
+}
+
+#else
+static void __init mpc85xx_mds_qe_init(void) { }
+#endif	/* CONFIG_QUICC_ENGINE */
+
+static void __init mpc85xx_mds_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc85xx_mds_setup_arch()", 0);
+
+	mpc85xx_smp_init();
+
+	mpc85xx_mds_qe_init();
+
+	fsl_pci_assign_primary();
+
+	swiotlb_detect_4g();
+}
+
+#if IS_BUILTIN(CONFIG_PHYLIB)
+
+static int __init board_fixups(void)
+{
+	char phy_id[20];
+	char *compstrs[2] = {"fsl,gianfar-mdio", "fsl,ucc-mdio"};
+	struct device_node *mdio;
+	struct resource res;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(compstrs); i++) {
+		mdio = of_find_compatible_node(NULL, NULL, compstrs[i]);
+
+		of_address_to_resource(mdio, 0, &res);
+		snprintf(phy_id, sizeof(phy_id), "%llx:%02x",
+			(unsigned long long)res.start, 1);
+
+		phy_register_fixup_for_id(phy_id, mpc8568_fixup_125_clock);
+		phy_register_fixup_for_id(phy_id, mpc8568_mds_phy_fixups);
+
+		/* Register a workaround for errata */
+		snprintf(phy_id, sizeof(phy_id), "%llx:%02x",
+			(unsigned long long)res.start, 7);
+		phy_register_fixup_for_id(phy_id, mpc8568_mds_phy_fixups);
+
+		of_node_put(mdio);
+	}
+
+	return 0;
+}
+
+machine_arch_initcall(mpc8568_mds, board_fixups);
+machine_arch_initcall(mpc8569_mds, board_fixups);
+
+#endif
+
+static int __init mpc85xx_publish_devices(void)
+{
+	return mpc85xx_common_publish_devices();
+}
+
+machine_arch_initcall(mpc8568_mds, mpc85xx_publish_devices);
+machine_arch_initcall(mpc8569_mds, mpc85xx_publish_devices);
+machine_arch_initcall(p1021_mds, mpc85xx_common_publish_devices);
+
+static void __init mpc85xx_mds_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+			MPIC_SINGLE_DEST_CPU,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+
+	mpic_init(mpic);
+}
+
+define_machine(mpc8568_mds) {
+	.name		= "MPC8568 MDS",
+	.compatible	= "MPC85xxMDS",
+	.setup_arch	= mpc85xx_mds_setup_arch,
+	.init_IRQ	= mpc85xx_mds_pic_init,
+	.get_irq	= mpic_get_irq,
+	.progress	= udbg_progress,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+};
+
+define_machine(mpc8569_mds) {
+	.name		= "MPC8569 MDS",
+	.compatible	= "fsl,MPC8569EMDS",
+	.setup_arch	= mpc85xx_mds_setup_arch,
+	.init_IRQ	= mpc85xx_mds_pic_init,
+	.get_irq	= mpic_get_irq,
+	.progress	= udbg_progress,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+};
+
+define_machine(p1021_mds) {
+	.name		= "P1021 MDS",
+	.compatible	= "fsl,P1021MDS",
+	.setup_arch	= mpc85xx_mds_setup_arch,
+	.init_IRQ	= mpc85xx_mds_pic_init,
+	.get_irq	= mpic_get_irq,
+	.progress	= udbg_progress,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+};
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
new file mode 100644
index 0000000000..f7ac92a8ae
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC85xx PM operators
+ *
+ * Copyright 2015 Freescale Semiconductor Inc.
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/fsl/guts.h>
+
+#include <asm/io.h>
+#include <asm/fsl_pm.h>
+
+#include "smp.h"
+
+static struct ccsr_guts __iomem *guts;
+
+#ifdef CONFIG_FSL_PMC
+static void mpc85xx_irq_mask(int cpu)
+{
+
+}
+
+static void mpc85xx_irq_unmask(int cpu)
+{
+
+}
+
+static void mpc85xx_cpu_die(int cpu)
+{
+	u32 tmp;
+
+	tmp = (mfspr(SPRN_HID0) & ~(HID0_DOZE|HID0_SLEEP)) | HID0_NAP;
+	mtspr(SPRN_HID0, tmp);
+
+	/* Enter NAP mode. */
+	tmp = mfmsr();
+	tmp |= MSR_WE;
+	asm volatile(
+		"msync\n"
+		"mtmsr %0\n"
+		"isync\n"
+		:
+		: "r" (tmp));
+}
+
+static void mpc85xx_cpu_up_prepare(int cpu)
+{
+
+}
+#endif
+
+static void mpc85xx_freeze_time_base(bool freeze)
+{
+	uint32_t mask;
+
+	mask = CCSR_GUTS_DEVDISR_TB0 | CCSR_GUTS_DEVDISR_TB1;
+	if (freeze)
+		setbits32(&guts->devdisr, mask);
+	else
+		clrbits32(&guts->devdisr, mask);
+
+	in_be32(&guts->devdisr);
+}
+
+static const struct of_device_id mpc85xx_smp_guts_ids[] = {
+	{ .compatible = "fsl,mpc8572-guts", },
+	{ .compatible = "fsl,p1020-guts", },
+	{ .compatible = "fsl,p1021-guts", },
+	{ .compatible = "fsl,p1022-guts", },
+	{ .compatible = "fsl,p1023-guts", },
+	{ .compatible = "fsl,p2020-guts", },
+	{ .compatible = "fsl,bsc9132-guts", },
+	{},
+};
+
+static const struct fsl_pm_ops mpc85xx_pm_ops = {
+	.freeze_time_base = mpc85xx_freeze_time_base,
+#ifdef CONFIG_FSL_PMC
+	.irq_mask = mpc85xx_irq_mask,
+	.irq_unmask = mpc85xx_irq_unmask,
+	.cpu_die = mpc85xx_cpu_die,
+	.cpu_up_prepare = mpc85xx_cpu_up_prepare,
+#endif
+};
+
+int __init mpc85xx_setup_pmc(void)
+{
+	struct device_node *np;
+
+	np = of_find_matching_node(NULL, mpc85xx_smp_guts_ids);
+	if (np) {
+		guts = of_iomap(np, 0);
+		of_node_put(np);
+		if (!guts) {
+			pr_err("Could not map guts node address\n");
+			return -ENOMEM;
+		}
+		qoriq_pm_ops = &mpc85xx_pm_ops;
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
new file mode 100644
index 0000000000..ec9f60fbeb
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC85xx RDB Board Setup
+ *
+ * Copyright 2009,2012-2013 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/fsl/guts.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <soc/fsl/qe/qe.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+static void __init mpc85xx_rdb_pic_init(void)
+{
+	struct mpic *mpic;
+	int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU;
+
+	if (of_machine_is_compatible("fsl,MPC85XXRDB-CAMP"))
+		flags |= MPIC_NO_RESET;
+
+	mpic = mpic_alloc(NULL, 0, flags, 0, 256, " OpenPIC  ");
+
+	if (WARN_ON(!mpic))
+		return;
+
+	mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc85xx_rdb_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc85xx_rdb_setup_arch()", 0);
+
+	mpc85xx_smp_init();
+
+	fsl_pci_assign_primary();
+
+	mpc85xx_qe_par_io_init();
+#if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE)
+	if (machine_is(p1025_rdb)) {
+		struct device_node *np;
+
+		struct ccsr_guts __iomem *guts;
+
+		np = of_find_node_by_name(NULL, "global-utilities");
+		if (np) {
+			guts = of_iomap(np, 0);
+			if (!guts) {
+
+				pr_err("mpc85xx-rdb: could not map global utilities register\n");
+
+			} else {
+			/* P1025 has pins muxed for QE and other functions. To
+			* enable QE UEC mode, we need to set bit QE0 for UCC1
+			* in Eth mode, QE0 and QE3 for UCC5 in Eth mode, QE9
+			* and QE12 for QE MII management singals in PMUXCR
+			* register.
+			*/
+				setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) |
+						MPC85xx_PMUXCR_QE(3) |
+						MPC85xx_PMUXCR_QE(9) |
+						MPC85xx_PMUXCR_QE(12));
+				iounmap(guts);
+			}
+			of_node_put(np);
+		}
+
+	}
+#endif
+
+	pr_info("MPC85xx RDB board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(p1020_mbg_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_rdb, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_rdb_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_rdb_pd, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_utm_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1021_rdb_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1025_rdb, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1024_rdb, mpc85xx_common_publish_devices);
+
+define_machine(p1020_rdb) {
+	.name			= "P1020 RDB",
+	.compatible		= "fsl,P1020RDB",
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
+
+define_machine(p1021_rdb_pc) {
+	.name			= "P1021 RDB-PC",
+	.compatible		= "fsl,P1021RDB-PC",
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
+
+define_machine(p1025_rdb) {
+	.name			= "P1025 RDB",
+	.compatible		= "fsl,P1025RDB",
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
+
+define_machine(p1020_mbg_pc) {
+	.name			= "P1020 MBG-PC",
+	.compatible		= "fsl,P1020MBG-PC",
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
+
+define_machine(p1020_utm_pc) {
+	.name			= "P1020 UTM-PC",
+	.compatible		= "fsl,P1020UTM-PC",
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
+
+define_machine(p1020_rdb_pc) {
+	.name			= "P1020RDB-PC",
+	.compatible		= "fsl,P1020RDB-PC",
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
+
+define_machine(p1020_rdb_pd) {
+	.name			= "P1020RDB-PD",
+	.compatible		= "fsl,P1020RDB-PD",
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
+
+define_machine(p1024_rdb) {
+	.name			= "P1024 RDB",
+	.compatible		= "fsl,P1024RDB",
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/mvme2500.c b/arch/powerpc/platforms/85xx/mvme2500.c
new file mode 100644
index 0000000000..1b59e45a0c
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mvme2500.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Board setup routines for the Emerson/Artesyn MVME2500
+ *
+ * Copyright 2014 Elettra-Sincrotrone Trieste S.C.p.A.
+ *
+ * Based on earlier code by:
+ *
+ *	Xianghua Xiao (x.xiao@freescale.com)
+ *	Tom Armistead (tom.armistead@emerson.com)
+ *	Copyright 2012 Emerson
+ *
+ * Author Alessio Igor Bogani <alessio.bogani@elettra.eu>
+ */
+
+#include <linux/pci.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+void __init mvme2500_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0,
+		  MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU,
+		0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init mvme2500_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mvme2500_setup_arch()", 0);
+	fsl_pci_assign_primary();
+	pr_info("MVME2500 board from Artesyn\n");
+}
+
+machine_arch_initcall(mvme2500, mpc85xx_common_publish_devices);
+
+define_machine(mvme2500) {
+	.name			= "MVME2500",
+	.compatible		= "artesyn,MVME2500",
+	.setup_arch		= mvme2500_setup_arch,
+	.init_IRQ		= mvme2500_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c
new file mode 100644
index 0000000000..10d6f1fa33
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/p1010rdb.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * P1010RDB Board Setup
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+void __init p1010_rdb_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+	  MPIC_SINGLE_DEST_CPU,
+	  0, 256, " OpenPIC  ");
+
+	BUG_ON(mpic == NULL);
+
+	mpic_init(mpic);
+}
+
+
+/*
+ * Setup the architecture
+ */
+static void __init p1010_rdb_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("p1010_rdb_setup_arch()", 0);
+
+	fsl_pci_assign_primary();
+
+	printk(KERN_INFO "P1010 RDB board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(p1010_rdb, mpc85xx_common_publish_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init p1010_rdb_probe(void)
+{
+	if (of_machine_is_compatible("fsl,P1010RDB"))
+		return 1;
+	if (of_machine_is_compatible("fsl,P1010RDB-PB"))
+		return 1;
+	return 0;
+}
+
+define_machine(p1010_rdb) {
+	.name			= "P1010 RDB",
+	.probe			= p1010_rdb_probe,
+	.setup_arch		= p1010_rdb_setup_arch,
+	.init_IRQ		= p1010_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
new file mode 100644
index 0000000000..0dd786a061
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -0,0 +1,563 @@
+/*
+ * P1022DS board specific routines
+ *
+ * Authors: Travis Wheatley <travis.wheatley@freescale.com>
+ *          Dave Liu <daveliu@freescale.com>
+ *          Timur Tabi <timur@freescale.com>
+ *
+ * Copyright 2010 Freescale Semiconductor, Inc.
+ *
+ * This file is taken from the Freescale P1022DS BSP, with modifications:
+ * 2) No AMP support
+ * 3) No PCI endpoint support
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/fsl/guts.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <asm/div64.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include <asm/udbg.h>
+#include <asm/fsl_lbc.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+
+#define PMUXCR_ELBCDIU_MASK	0xc0000000
+#define PMUXCR_ELBCDIU_NOR16	0x80000000
+#define PMUXCR_ELBCDIU_DIU	0x40000000
+
+/*
+ * Board-specific initialization of the DIU.  This code should probably be
+ * executed when the DIU is opened, rather than in arch code, but the DIU
+ * driver does not have a mechanism for this (yet).
+ *
+ * This is especially problematic on the P1022DS because the local bus (eLBC)
+ * and the DIU video signals share the same pins, which means that enabling the
+ * DIU will disable access to NOR flash.
+ */
+
+/* DIU Pixel Clock bits of the CLKDVDR Global Utilities register */
+#define CLKDVDR_PXCKEN		0x80000000
+#define CLKDVDR_PXCKINV		0x10000000
+#define CLKDVDR_PXCKDLY		0x06000000
+#define CLKDVDR_PXCLK_MASK	0x00FF0000
+
+/* Some ngPIXIS register definitions */
+#define PX_CTL		3
+#define PX_BRDCFG0	8
+#define PX_BRDCFG1	9
+
+#define PX_BRDCFG0_ELBC_SPI_MASK	0xc0
+#define PX_BRDCFG0_ELBC_SPI_ELBC	0x00
+#define PX_BRDCFG0_ELBC_SPI_NULL	0xc0
+#define PX_BRDCFG0_ELBC_DIU		0x02
+
+#define PX_BRDCFG1_DVIEN	0x80
+#define PX_BRDCFG1_DFPEN	0x40
+#define PX_BRDCFG1_BACKLIGHT	0x20
+#define PX_BRDCFG1_DDCEN	0x10
+
+#define PX_CTL_ALTACC		0x80
+
+/*
+ * DIU Area Descriptor
+ *
+ * Note that we need to byte-swap the value before it's written to the AD
+ * register.  So even though the registers don't look like they're in the same
+ * bit positions as they are on the MPC8610, the same value is written to the
+ * AD register on the MPC8610 and on the P1022.
+ */
+#define AD_BYTE_F		0x10000000
+#define AD_ALPHA_C_MASK		0x0E000000
+#define AD_ALPHA_C_SHIFT	25
+#define AD_BLUE_C_MASK		0x01800000
+#define AD_BLUE_C_SHIFT		23
+#define AD_GREEN_C_MASK		0x00600000
+#define AD_GREEN_C_SHIFT	21
+#define AD_RED_C_MASK		0x00180000
+#define AD_RED_C_SHIFT		19
+#define AD_PALETTE		0x00040000
+#define AD_PIXEL_S_MASK		0x00030000
+#define AD_PIXEL_S_SHIFT	16
+#define AD_COMP_3_MASK		0x0000F000
+#define AD_COMP_3_SHIFT		12
+#define AD_COMP_2_MASK		0x00000F00
+#define AD_COMP_2_SHIFT		8
+#define AD_COMP_1_MASK		0x000000F0
+#define AD_COMP_1_SHIFT		4
+#define AD_COMP_0_MASK		0x0000000F
+#define AD_COMP_0_SHIFT		0
+
+#define MAKE_AD(alpha, red, blue, green, size, c0, c1, c2, c3) \
+	cpu_to_le32(AD_BYTE_F | (alpha << AD_ALPHA_C_SHIFT) | \
+	(blue << AD_BLUE_C_SHIFT) | (green << AD_GREEN_C_SHIFT) | \
+	(red << AD_RED_C_SHIFT) | (c3 << AD_COMP_3_SHIFT) | \
+	(c2 << AD_COMP_2_SHIFT) | (c1 << AD_COMP_1_SHIFT) | \
+	(c0 << AD_COMP_0_SHIFT) | (size << AD_PIXEL_S_SHIFT))
+
+struct fsl_law {
+	u32	lawbar;
+	u32	reserved1;
+	u32	lawar;
+	u32	reserved[5];
+};
+
+#define LAWBAR_MASK	0x00F00000
+#define LAWBAR_SHIFT	12
+
+#define LAWAR_EN	0x80000000
+#define LAWAR_TGT_MASK	0x01F00000
+#define LAW_TRGT_IF_LBC	(0x04 << 20)
+
+#define LAWAR_MASK	(LAWAR_EN | LAWAR_TGT_MASK)
+#define LAWAR_MATCH	(LAWAR_EN | LAW_TRGT_IF_LBC)
+
+#define BR_BA		0xFFFF8000
+
+/*
+ * Map a BRx value to a physical address
+ *
+ * The localbus BRx registers only store the lower 32 bits of the address.  To
+ * obtain the upper four bits, we need to scan the LAW table.  The entry which
+ * maps to the localbus will contain the upper four bits.
+ */
+static phys_addr_t lbc_br_to_phys(const void *ecm, unsigned int count, u32 br)
+{
+#ifndef CONFIG_PHYS_64BIT
+	/*
+	 * If we only have 32-bit addressing, then the BRx address *is* the
+	 * physical address.
+	 */
+	return br & BR_BA;
+#else
+	const struct fsl_law *law = ecm + 0xc08;
+	unsigned int i;
+
+	for (i = 0; i < count; i++) {
+		u64 lawbar = in_be32(&law[i].lawbar);
+		u32 lawar = in_be32(&law[i].lawar);
+
+		if ((lawar & LAWAR_MASK) == LAWAR_MATCH)
+			/* Extract the upper four bits */
+			return (br & BR_BA) | ((lawbar & LAWBAR_MASK) << 12);
+	}
+
+	return 0;
+#endif
+}
+
+/**
+ * p1022ds_set_monitor_port: switch the output to a different monitor port
+ */
+static void p1022ds_set_monitor_port(enum fsl_diu_monitor_port port)
+{
+	struct device_node *guts_node;
+	struct device_node *lbc_node = NULL;
+	struct device_node *law_node = NULL;
+	struct ccsr_guts __iomem *guts;
+	struct fsl_lbc_regs *lbc = NULL;
+	void *ecm = NULL;
+	u8 __iomem *lbc_lcs0_ba = NULL;
+	u8 __iomem *lbc_lcs1_ba = NULL;
+	phys_addr_t cs0_addr, cs1_addr;
+	u32 br0, or0, br1, or1;
+	const __be32 *iprop;
+	unsigned int num_laws;
+	u8 b;
+
+	/* Map the global utilities registers. */
+	guts_node = of_find_compatible_node(NULL, NULL, "fsl,p1022-guts");
+	if (!guts_node) {
+		pr_err("p1022ds: missing global utilities device node\n");
+		return;
+	}
+
+	guts = of_iomap(guts_node, 0);
+	if (!guts) {
+		pr_err("p1022ds: could not map global utilities device\n");
+		goto exit;
+	}
+
+	lbc_node = of_find_compatible_node(NULL, NULL, "fsl,p1022-elbc");
+	if (!lbc_node) {
+		pr_err("p1022ds: missing localbus node\n");
+		goto exit;
+	}
+
+	lbc = of_iomap(lbc_node, 0);
+	if (!lbc) {
+		pr_err("p1022ds: could not map localbus node\n");
+		goto exit;
+	}
+
+	law_node = of_find_compatible_node(NULL, NULL, "fsl,ecm-law");
+	if (!law_node) {
+		pr_err("p1022ds: missing local access window node\n");
+		goto exit;
+	}
+
+	ecm = of_iomap(law_node, 0);
+	if (!ecm) {
+		pr_err("p1022ds: could not map local access window node\n");
+		goto exit;
+	}
+
+	iprop = of_get_property(law_node, "fsl,num-laws", NULL);
+	if (!iprop) {
+		pr_err("p1022ds: LAW node is missing fsl,num-laws property\n");
+		goto exit;
+	}
+	num_laws = be32_to_cpup(iprop);
+
+	/*
+	 * Indirect mode requires both BR0 and BR1 to be set to "GPCM",
+	 * otherwise writes to these addresses won't actually appear on the
+	 * local bus, and so the PIXIS won't see them.
+	 *
+	 * In FCM mode, writes go to the NAND controller, which does not pass
+	 * them to the localbus directly.  So we force BR0 and BR1 into GPCM
+	 * mode, since we don't care about what's behind the localbus any
+	 * more.
+	 */
+	br0 = in_be32(&lbc->bank[0].br);
+	br1 = in_be32(&lbc->bank[1].br);
+	or0 = in_be32(&lbc->bank[0].or);
+	or1 = in_be32(&lbc->bank[1].or);
+
+	/* Make sure CS0 and CS1 are programmed */
+	if (!(br0 & BR_V) || !(br1 & BR_V)) {
+		pr_err("p1022ds: CS0 and/or CS1 is not programmed\n");
+		goto exit;
+	}
+
+	/*
+	 * Use the existing BRx/ORx values if it's already GPCM. Otherwise,
+	 * force the values to simple 32KB GPCM windows with the most
+	 * conservative timing.
+	 */
+	if ((br0 & BR_MSEL) != BR_MS_GPCM) {
+		br0 = (br0 & BR_BA) | BR_V;
+		or0 = 0xFFFF8000 | 0xFF7;
+		out_be32(&lbc->bank[0].br, br0);
+		out_be32(&lbc->bank[0].or, or0);
+	}
+	if ((br1 & BR_MSEL) != BR_MS_GPCM) {
+		br1 = (br1 & BR_BA) | BR_V;
+		or1 = 0xFFFF8000 | 0xFF7;
+		out_be32(&lbc->bank[1].br, br1);
+		out_be32(&lbc->bank[1].or, or1);
+	}
+
+	cs0_addr = lbc_br_to_phys(ecm, num_laws, br0);
+	if (!cs0_addr) {
+		pr_err("p1022ds: could not determine physical address for CS0"
+		       " (BR0=%08x)\n", br0);
+		goto exit;
+	}
+	cs1_addr = lbc_br_to_phys(ecm, num_laws, br1);
+	if (!cs1_addr) {
+		pr_err("p1022ds: could not determine physical address for CS1"
+		       " (BR1=%08x)\n", br1);
+		goto exit;
+	}
+
+	lbc_lcs0_ba = ioremap(cs0_addr, 1);
+	if (!lbc_lcs0_ba) {
+		pr_err("p1022ds: could not ioremap CS0 address %llx\n",
+		       (unsigned long long)cs0_addr);
+		goto exit;
+	}
+	lbc_lcs1_ba = ioremap(cs1_addr, 1);
+	if (!lbc_lcs1_ba) {
+		pr_err("p1022ds: could not ioremap CS1 address %llx\n",
+		       (unsigned long long)cs1_addr);
+		goto exit;
+	}
+
+	/* Make sure we're in indirect mode first. */
+	if ((in_be32(&guts->pmuxcr) & PMUXCR_ELBCDIU_MASK) !=
+	    PMUXCR_ELBCDIU_DIU) {
+		struct device_node *pixis_node;
+		void __iomem *pixis;
+
+		pixis_node =
+			of_find_compatible_node(NULL, NULL, "fsl,p1022ds-fpga");
+		if (!pixis_node) {
+			pr_err("p1022ds: missing pixis node\n");
+			goto exit;
+		}
+
+		pixis = of_iomap(pixis_node, 0);
+		of_node_put(pixis_node);
+		if (!pixis) {
+			pr_err("p1022ds: could not map pixis registers\n");
+			goto exit;
+		}
+
+		/* Enable indirect PIXIS mode.  */
+		setbits8(pixis + PX_CTL, PX_CTL_ALTACC);
+		iounmap(pixis);
+
+		/* Switch the board mux to the DIU */
+		out_8(lbc_lcs0_ba, PX_BRDCFG0);	/* BRDCFG0 */
+		b = in_8(lbc_lcs1_ba);
+		b |= PX_BRDCFG0_ELBC_DIU;
+		out_8(lbc_lcs1_ba, b);
+
+		/* Set the chip mux to DIU mode. */
+		clrsetbits_be32(&guts->pmuxcr, PMUXCR_ELBCDIU_MASK,
+				PMUXCR_ELBCDIU_DIU);
+		in_be32(&guts->pmuxcr);
+	}
+
+
+	switch (port) {
+	case FSL_DIU_PORT_DVI:
+		/* Enable the DVI port, disable the DFP and the backlight */
+		out_8(lbc_lcs0_ba, PX_BRDCFG1);
+		b = in_8(lbc_lcs1_ba);
+		b &= ~(PX_BRDCFG1_DFPEN | PX_BRDCFG1_BACKLIGHT);
+		b |= PX_BRDCFG1_DVIEN;
+		out_8(lbc_lcs1_ba, b);
+		break;
+	case FSL_DIU_PORT_LVDS:
+		/*
+		 * LVDS also needs backlight enabled, otherwise the display
+		 * will be blank.
+		 */
+		/* Enable the DFP port, disable the DVI and the backlight */
+		out_8(lbc_lcs0_ba, PX_BRDCFG1);
+		b = in_8(lbc_lcs1_ba);
+		b &= ~PX_BRDCFG1_DVIEN;
+		b |= PX_BRDCFG1_DFPEN | PX_BRDCFG1_BACKLIGHT;
+		out_8(lbc_lcs1_ba, b);
+		break;
+	default:
+		pr_err("p1022ds: unsupported monitor port %i\n", port);
+	}
+
+exit:
+	if (lbc_lcs1_ba)
+		iounmap(lbc_lcs1_ba);
+	if (lbc_lcs0_ba)
+		iounmap(lbc_lcs0_ba);
+	if (lbc)
+		iounmap(lbc);
+	if (ecm)
+		iounmap(ecm);
+	if (guts)
+		iounmap(guts);
+
+	of_node_put(law_node);
+	of_node_put(lbc_node);
+	of_node_put(guts_node);
+}
+
+/**
+ * p1022ds_set_pixel_clock: program the DIU's clock
+ *
+ * @pixclock: the wavelength, in picoseconds, of the clock
+ */
+void p1022ds_set_pixel_clock(unsigned int pixclock)
+{
+	struct device_node *guts_np = NULL;
+	struct ccsr_guts __iomem *guts;
+	unsigned long freq;
+	u64 temp;
+	u32 pxclk;
+
+	/* Map the global utilities registers. */
+	guts_np = of_find_compatible_node(NULL, NULL, "fsl,p1022-guts");
+	if (!guts_np) {
+		pr_err("p1022ds: missing global utilities device node\n");
+		return;
+	}
+
+	guts = of_iomap(guts_np, 0);
+	of_node_put(guts_np);
+	if (!guts) {
+		pr_err("p1022ds: could not map global utilities device\n");
+		return;
+	}
+
+	/* Convert pixclock from a wavelength to a frequency */
+	temp = 1000000000000ULL;
+	do_div(temp, pixclock);
+	freq = temp;
+
+	/*
+	 * 'pxclk' is the ratio of the platform clock to the pixel clock.
+	 * This number is programmed into the CLKDVDR register, and the valid
+	 * range of values is 2-255.
+	 */
+	pxclk = DIV_ROUND_CLOSEST(fsl_get_sys_freq(), freq);
+	pxclk = clamp_t(u32, pxclk, 2, 255);
+
+	/* Disable the pixel clock, and set it to non-inverted and no delay */
+	clrbits32(&guts->clkdvdr,
+		  CLKDVDR_PXCKEN | CLKDVDR_PXCKDLY | CLKDVDR_PXCLK_MASK);
+
+	/* Enable the clock and set the pxclk */
+	setbits32(&guts->clkdvdr, CLKDVDR_PXCKEN | (pxclk << 16));
+
+	iounmap(guts);
+}
+
+/**
+ * p1022ds_valid_monitor_port: set the monitor port for sysfs
+ */
+enum fsl_diu_monitor_port
+p1022ds_valid_monitor_port(enum fsl_diu_monitor_port port)
+{
+	switch (port) {
+	case FSL_DIU_PORT_DVI:
+	case FSL_DIU_PORT_LVDS:
+		return port;
+	default:
+		return FSL_DIU_PORT_DVI; /* Dual-link LVDS is not supported */
+	}
+}
+
+#endif
+
+void __init p1022_ds_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+		MPIC_SINGLE_DEST_CPU,
+		0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+}
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+
+/* TRUE if there is a "video=fslfb" command-line parameter. */
+static bool fslfb;
+
+/*
+ * Search for a "video=fslfb" command-line parameter, and set 'fslfb' to
+ * true if we find it.
+ *
+ * We need to use early_param() instead of __setup() because the normal
+ * __setup() gets called to late.  However, early_param() gets called very
+ * early, before the device tree is unflattened, so all we can do now is set a
+ * global variable.  Later on, p1022_ds_setup_arch() will use that variable
+ * to determine if we need to update the device tree.
+ */
+static int __init early_video_setup(char *options)
+{
+	fslfb = (strncmp(options, "fslfb:", 6) == 0);
+
+	return 0;
+}
+early_param("video", early_video_setup);
+
+#endif
+
+/*
+ * Setup the architecture
+ */
+static void __init p1022_ds_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("p1022_ds_setup_arch()", 0);
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+	diu_ops.set_monitor_port	= p1022ds_set_monitor_port;
+	diu_ops.set_pixel_clock		= p1022ds_set_pixel_clock;
+	diu_ops.valid_monitor_port	= p1022ds_valid_monitor_port;
+
+	/*
+	 * Disable the NOR and NAND flash nodes if there is video=fslfb...
+	 * command-line parameter.  When the DIU is active, the localbus is
+	 * unavailable, so we have to disable these nodes before the MTD
+	 * driver loads.
+	 */
+	if (fslfb) {
+		struct device_node *np =
+			of_find_compatible_node(NULL, NULL, "fsl,p1022-elbc");
+
+		if (np) {
+			struct device_node *np2;
+
+			of_node_get(np);
+			np2 = of_find_compatible_node(np, NULL, "cfi-flash");
+			if (np2) {
+				static struct property nor_status = {
+					.name = "status",
+					.value = "disabled",
+					.length = sizeof("disabled"),
+				};
+
+				/*
+				 * of_update_property() is called before
+				 * kmalloc() is available, so the 'new' object
+				 * should be allocated in the global area.
+				 * The easiest way is to do that is to
+				 * allocate one static local variable for each
+				 * call to this function.
+				 */
+				pr_info("p1022ds: disabling %pOF node",
+					np2);
+				of_update_property(np2, &nor_status);
+				of_node_put(np2);
+			}
+
+			of_node_get(np);
+			np2 = of_find_compatible_node(np, NULL,
+						      "fsl,elbc-fcm-nand");
+			if (np2) {
+				static struct property nand_status = {
+					.name = "status",
+					.value = "disabled",
+					.length = sizeof("disabled"),
+				};
+
+				pr_info("p1022ds: disabling %pOF node",
+					np2);
+				of_update_property(np2, &nand_status);
+				of_node_put(np2);
+			}
+
+			of_node_put(np);
+		}
+
+	}
+
+#endif
+
+	mpc85xx_smp_init();
+
+	fsl_pci_assign_primary();
+
+	swiotlb_detect_4g();
+
+	pr_info("Freescale P1022 DS reference board\n");
+}
+
+machine_arch_initcall(p1022_ds, mpc85xx_common_publish_devices);
+
+define_machine(p1022_ds) {
+	.name			= "P1022 DS",
+	.compatible		= "fsl,p1022ds",
+	.setup_arch		= p1022_ds_setup_arch,
+	.init_IRQ		= p1022_ds_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb	= fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c
new file mode 100644
index 0000000000..25ab6e9c14
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/p1022_rdk.c
@@ -0,0 +1,143 @@
+/*
+ * P1022 RDK board specific routines
+ *
+ * Copyright 2012 Freescale Semiconductor, Inc.
+ *
+ * Author: Timur Tabi <timur@freescale.com>
+ *
+ * Based on p1022_ds.c
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/fsl/guts.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <asm/div64.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include <asm/udbg.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+
+/* DIU Pixel Clock bits of the CLKDVDR Global Utilities register */
+#define CLKDVDR_PXCKEN		0x80000000
+#define CLKDVDR_PXCKINV		0x10000000
+#define CLKDVDR_PXCKDLY		0x06000000
+#define CLKDVDR_PXCLK_MASK	0x00FF0000
+
+/**
+ * p1022rdk_set_pixel_clock: program the DIU's clock
+ *
+ * @pixclock: the wavelength, in picoseconds, of the clock
+ */
+void p1022rdk_set_pixel_clock(unsigned int pixclock)
+{
+	struct device_node *guts_np = NULL;
+	struct ccsr_guts __iomem *guts;
+	unsigned long freq;
+	u64 temp;
+	u32 pxclk;
+
+	/* Map the global utilities registers. */
+	guts_np = of_find_compatible_node(NULL, NULL, "fsl,p1022-guts");
+	if (!guts_np) {
+		pr_err("p1022rdk: missing global utilities device node\n");
+		return;
+	}
+
+	guts = of_iomap(guts_np, 0);
+	of_node_put(guts_np);
+	if (!guts) {
+		pr_err("p1022rdk: could not map global utilities device\n");
+		return;
+	}
+
+	/* Convert pixclock from a wavelength to a frequency */
+	temp = 1000000000000ULL;
+	do_div(temp, pixclock);
+	freq = temp;
+
+	/*
+	 * 'pxclk' is the ratio of the platform clock to the pixel clock.
+	 * This number is programmed into the CLKDVDR register, and the valid
+	 * range of values is 2-255.
+	 */
+	pxclk = DIV_ROUND_CLOSEST(fsl_get_sys_freq(), freq);
+	pxclk = clamp_t(u32, pxclk, 2, 255);
+
+	/* Disable the pixel clock, and set it to non-inverted and no delay */
+	clrbits32(&guts->clkdvdr,
+		  CLKDVDR_PXCKEN | CLKDVDR_PXCKDLY | CLKDVDR_PXCLK_MASK);
+
+	/* Enable the clock and set the pxclk */
+	setbits32(&guts->clkdvdr, CLKDVDR_PXCKEN | (pxclk << 16));
+
+	iounmap(guts);
+}
+
+/**
+ * p1022rdk_valid_monitor_port: set the monitor port for sysfs
+ */
+enum fsl_diu_monitor_port
+p1022rdk_valid_monitor_port(enum fsl_diu_monitor_port port)
+{
+	return FSL_DIU_PORT_DVI;
+}
+
+#endif
+
+void __init p1022_rdk_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+		MPIC_SINGLE_DEST_CPU,
+		0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init p1022_rdk_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("p1022_rdk_setup_arch()", 0);
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+	diu_ops.set_pixel_clock		= p1022rdk_set_pixel_clock;
+	diu_ops.valid_monitor_port	= p1022rdk_valid_monitor_port;
+#endif
+
+	mpc85xx_smp_init();
+
+	fsl_pci_assign_primary();
+
+	swiotlb_detect_4g();
+
+	pr_info("Freescale / iVeia P1022 RDK reference board\n");
+}
+
+machine_arch_initcall(p1022_rdk, mpc85xx_common_publish_devices);
+
+define_machine(p1022_rdk) {
+	.name			= "P1022 RDK",
+	.compatible		= "fsl,p1022rdk",
+	.setup_arch		= p1022_rdk_setup_arch,
+	.init_IRQ		= p1022_rdk_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/p1023_rdb.c b/arch/powerpc/platforms/85xx/p1023_rdb.c
new file mode 100644
index 0000000000..e4fa8731fd
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/p1023_rdb.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2010-2011, 2013 Freescale Semiconductor, Inc.
+ *
+ * Author: Roy Zang <tie-fei.zang@freescale.com>
+ *
+ * Description:
+ * P1023 RDB Board Setup
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/fsl_devices.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include "smp.h"
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init p1023_rdb_setup_arch(void)
+{
+	struct device_node *np;
+
+	if (ppc_md.progress)
+		ppc_md.progress("p1023_rdb_setup_arch()", 0);
+
+	/* Map BCSR area */
+	np = of_find_node_by_name(NULL, "bcsr");
+	if (np != NULL) {
+		static u8 __iomem *bcsr_regs;
+
+		bcsr_regs = of_iomap(np, 0);
+		of_node_put(np);
+
+		if (!bcsr_regs) {
+			printk(KERN_ERR
+			       "BCSR: Failed to map bcsr register space\n");
+			return;
+		} else {
+#define BCSR15_I2C_BUS0_SEG_CLR		0x07
+#define BCSR15_I2C_BUS0_SEG2		0x02
+/*
+ * Note: Accessing exclusively i2c devices.
+ *
+ * The i2c controller selects initially ID EEPROM in the u-boot;
+ * but if menu configuration selects RTC support in the kernel,
+ * the i2c controller switches to select RTC chip in the kernel.
+ */
+#ifdef CONFIG_RTC_CLASS
+			/* Enable RTC chip on the segment #2 of i2c */
+			clrbits8(&bcsr_regs[15], BCSR15_I2C_BUS0_SEG_CLR);
+			setbits8(&bcsr_regs[15], BCSR15_I2C_BUS0_SEG2);
+#endif
+
+			iounmap(bcsr_regs);
+		}
+	}
+
+	mpc85xx_smp_init();
+
+	fsl_pci_assign_primary();
+}
+
+machine_arch_initcall(p1023_rdb, mpc85xx_common_publish_devices);
+
+static void __init p1023_rdb_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+		MPIC_SINGLE_DEST_CPU,
+		0, 256, " OpenPIC  ");
+
+	BUG_ON(mpic == NULL);
+
+	mpic_init(mpic);
+}
+
+define_machine(p1023_rdb) {
+	.name			= "P1023 RDB",
+	.compatible		= "fsl,P1023RDB",
+	.setup_arch		= p1023_rdb_setup_arch,
+	.init_IRQ		= p1023_rdb_pic_init,
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+};
diff --git a/arch/powerpc/platforms/85xx/p2020.c b/arch/powerpc/platforms/85xx/p2020.c
new file mode 100644
index 0000000000..0e4d715145
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/p2020.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale P2020 board Setup
+ *
+ * Copyright 2007,2009,2012-2013 Freescale Semiconductor Inc.
+ * Copyright 2022-2023 Pali Rohár <pali@kernel.org>
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+#include <asm/ppc-pci.h>
+
+#include <sysdev/fsl_pci.h>
+
+#include "smp.h"
+#include "mpc85xx.h"
+
+static void __init p2020_pic_init(void)
+{
+	struct mpic *mpic;
+	int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU;
+
+	mpic = mpic_alloc(NULL, 0, flags, 0, 256, " OpenPIC  ");
+
+	if (WARN_ON(!mpic))
+		return;
+
+	mpic_init(mpic);
+	mpc85xx_8259_init();
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init p2020_setup_arch(void)
+{
+	swiotlb_detect_4g();
+	fsl_pci_assign_primary();
+	uli_init();
+	mpc85xx_smp_init();
+	mpc85xx_qe_par_io_init();
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init p2020_probe(void)
+{
+	struct device_node *p2020_cpu;
+
+	/*
+	 * There is no common compatible string for all P2020 boards.
+	 * The only common thing is "PowerPC,P2020@0" cpu node.
+	 * So check for P2020 board via this cpu node.
+	 */
+	p2020_cpu = of_find_node_by_path("/cpus/PowerPC,P2020@0");
+	of_node_put(p2020_cpu);
+
+	return !!p2020_cpu;
+}
+
+machine_arch_initcall(p2020, mpc85xx_common_publish_devices);
+
+define_machine(p2020) {
+	.name			= "Freescale P2020",
+	.probe			= p2020_probe,
+	.setup_arch		= p2020_setup_arch,
+	.init_IRQ		= p2020_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb	= fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/ppa8548.c b/arch/powerpc/platforms/85xx/ppa8548.c
new file mode 100644
index 0000000000..acd19c52ad
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/ppa8548.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ppa8548 setup and early boot code.
+ *
+ * Copyright 2009 Prodrive B.V..
+ *
+ * By Stef van Os (see MAINTAINERS for contact information)
+ *
+ * Based on the SBC8548 support - Copyright 2007 Wind River Systems Inc.
+ * Based on the MPC8548CDS support - Copyright 2005 Freescale Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/reboot.h>
+#include <linux/seq_file.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+
+#include <sysdev/fsl_soc.h>
+
+static void __init ppa8548_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init ppa8548_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("ppa8548_setup_arch()", 0);
+}
+
+static void ppa8548_show_cpuinfo(struct seq_file *m)
+{
+	uint32_t svid, phid1;
+
+	svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: Prodrive B.V.\n");
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+	/* Display cpu Pll setting */
+	phid1 = mfspr(SPRN_HID1);
+	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .name = "soc", },
+	{ .type = "soc", },
+	{ .compatible = "simple-bus", },
+	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,srio", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+	return 0;
+}
+machine_device_initcall(ppa8548, declare_of_platform_devices);
+
+define_machine(ppa8548) {
+	.name		= "ppa8548",
+	.compatible	= "ppa8548",
+	.setup_arch	= ppa8548_setup_arch,
+	.init_IRQ	= ppa8548_pic_init,
+	.show_cpuinfo	= ppa8548_show_cpuinfo,
+	.get_irq	= mpic_get_irq,
+	.progress	= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c
new file mode 100644
index 0000000000..3cd2f3bd42
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/qemu_e500.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Paravirt target for a generic QEMU e500 machine
+ *
+ * This is intended to be a flexible device-tree-driven platform, not fixed
+ * to a particular piece of hardware or a particular spec of virtual hardware,
+ * beyond the assumption of an e500-family CPU.  Some things are still hardcoded
+ * here, such as MPIC, but this is a limitation of the current code rather than
+ * an interface contract with QEMU.
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/pgtable.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+#include "mpc85xx.h"
+
+static void __init qemu_e500_pic_init(void)
+{
+	struct mpic *mpic;
+	unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU |
+		MPIC_ENABLE_COREINT;
+
+	mpic = mpic_alloc(NULL, 0, flags, 0, 256, " OpenPIC  ");
+
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+}
+
+static void __init qemu_e500_setup_arch(void)
+{
+	ppc_md.progress("qemu_e500_setup_arch()", 0);
+
+	fsl_pci_assign_primary();
+	swiotlb_detect_4g();
+	mpc85xx_smp_init();
+}
+
+machine_arch_initcall(qemu_e500, mpc85xx_common_publish_devices);
+
+define_machine(qemu_e500) {
+	.name			= "QEMU e500",
+	.compatible		= "fsl,qemu-e500",
+	.setup_arch		= qemu_e500_setup_arch,
+	.init_IRQ		= qemu_e500_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_coreint_irq,
+	.progress		= udbg_progress,
+	.power_save		= e500_idle,
+};
diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c
new file mode 100644
index 0000000000..751395cbf0
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Servergy CTS-1000 Setup
+ *
+ * Maintained by Ben Collins <ben.c@servergy.com>
+ *
+ * Copyright 2012 by Servergy, Inc.
+ */
+
+#define pr_fmt(fmt) "gpio-halt: " fmt
+
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/of_irq.h>
+#include <linux/workqueue.h>
+#include <linux/reboot.h>
+#include <linux/interrupt.h>
+
+#include <asm/machdep.h>
+
+static struct gpio_desc *halt_gpio;
+static int halt_irq;
+
+static const struct of_device_id child_match[] = {
+	{
+		.compatible = "sgy,gpio-halt",
+	},
+	{},
+};
+
+static void gpio_halt_wfn(struct work_struct *work)
+{
+	/* Likely wont return */
+	orderly_poweroff(true);
+}
+static DECLARE_WORK(gpio_halt_wq, gpio_halt_wfn);
+
+static void __noreturn gpio_halt_cb(void)
+{
+	pr_info("triggering GPIO.\n");
+
+	/* Probably wont return */
+	gpiod_set_value(halt_gpio, 1);
+
+	panic("Halt failed\n");
+}
+
+/* This IRQ means someone pressed the power button and it is waiting for us
+ * to handle the shutdown/poweroff. */
+static irqreturn_t gpio_halt_irq(int irq, void *__data)
+{
+	struct platform_device *pdev = __data;
+
+	dev_info(&pdev->dev, "scheduling shutdown due to power button IRQ\n");
+	schedule_work(&gpio_halt_wq);
+
+        return IRQ_HANDLED;
+};
+
+static int __gpio_halt_probe(struct platform_device *pdev,
+			     struct device_node *halt_node)
+{
+	int err;
+
+	halt_gpio = fwnode_gpiod_get_index(of_fwnode_handle(halt_node),
+					   NULL, 0, GPIOD_OUT_LOW, "gpio-halt");
+	err = PTR_ERR_OR_ZERO(halt_gpio);
+	if (err) {
+		dev_err(&pdev->dev, "failed to request halt GPIO: %d\n", err);
+		return err;
+	}
+
+	/* Now get the IRQ which tells us when the power button is hit */
+	halt_irq = irq_of_parse_and_map(halt_node, 0);
+	err = request_irq(halt_irq, gpio_halt_irq,
+			  IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+			  "gpio-halt", pdev);
+	if (err) {
+		dev_err(&pdev->dev, "failed to request IRQ %d: %d\n",
+			halt_irq, err);
+		gpiod_put(halt_gpio);
+		halt_gpio = NULL;
+		return err;
+	}
+
+	/* Register our halt function */
+	ppc_md.halt = gpio_halt_cb;
+	pm_power_off = gpio_halt_cb;
+
+	dev_info(&pdev->dev, "registered halt GPIO, irq: %d\n", halt_irq);
+
+	return 0;
+}
+
+static int gpio_halt_probe(struct platform_device *pdev)
+{
+	struct device_node *halt_node;
+	int ret;
+
+	if (!pdev->dev.of_node)
+		return -ENODEV;
+
+	/* If there's no matching child, this isn't really an error */
+	halt_node = of_find_matching_node(pdev->dev.of_node, child_match);
+	if (!halt_node)
+		return -ENODEV;
+
+	ret = __gpio_halt_probe(pdev, halt_node);
+	of_node_put(halt_node);
+
+	return ret;
+}
+
+static int gpio_halt_remove(struct platform_device *pdev)
+{
+	free_irq(halt_irq, pdev);
+	cancel_work_sync(&gpio_halt_wq);
+
+	ppc_md.halt = NULL;
+	pm_power_off = NULL;
+
+	gpiod_put(halt_gpio);
+	halt_gpio = NULL;
+
+	return 0;
+}
+
+static const struct of_device_id gpio_halt_match[] = {
+	/* We match on the gpio bus itself and scan the children since they
+	 * wont be matched against us. We know the bus wont match until it
+	 * has been registered too. */
+	{
+		.compatible = "fsl,qoriq-gpio",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, gpio_halt_match);
+
+static struct platform_driver gpio_halt_driver = {
+	.driver = {
+		.name		= "gpio-halt",
+		.of_match_table = gpio_halt_match,
+	},
+	.probe		= gpio_halt_probe,
+	.remove		= gpio_halt_remove,
+};
+
+module_platform_driver(gpio_halt_driver);
+
+MODULE_DESCRIPTION("Driver to support GPIO triggered system halt for Servergy CTS-1000 Systems.");
+MODULE_VERSION("1.0");
+MODULE_AUTHOR("Ben Collins <ben.c@servergy.com>");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
new file mode 100644
index 0000000000..40aa582068
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Andy Fleming <afleming@freescale.com>
+ * 	   Kumar Gala <galak@kernel.crashing.org>
+ *
+ * Copyright 2006-2008, 2011-2012, 2015 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/sched/hotplug.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/kexec.h>
+#include <linux/highmem.h>
+#include <linux/cpu.h>
+#include <linux/fsl/guts.h>
+#include <linux/pgtable.h>
+
+#include <asm/machdep.h>
+#include <asm/page.h>
+#include <asm/mpic.h>
+#include <asm/cacheflush.h>
+#include <asm/dbell.h>
+#include <asm/code-patching.h>
+#include <asm/cputhreads.h>
+#include <asm/fsl_pm.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/mpic.h>
+#include "smp.h"
+
+struct epapr_spin_table {
+	u32	addr_h;
+	u32	addr_l;
+	u32	r3_h;
+	u32	r3_l;
+	u32	reserved;
+	u32	pir;
+};
+
+static u64 timebase;
+static int tb_req;
+static int tb_valid;
+
+static void mpc85xx_give_timebase(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	hard_irq_disable();
+
+	while (!tb_req)
+		barrier();
+	tb_req = 0;
+
+	qoriq_pm_ops->freeze_time_base(true);
+#ifdef CONFIG_PPC64
+	/*
+	 * e5500/e6500 have a workaround for erratum A-006958 in place
+	 * that will reread the timebase until TBL is non-zero.
+	 * That would be a bad thing when the timebase is frozen.
+	 *
+	 * Thus, we read it manually, and instead of checking that
+	 * TBL is non-zero, we ensure that TB does not change.  We don't
+	 * do that for the main mftb implementation, because it requires
+	 * a scratch register
+	 */
+	{
+		u64 prev;
+
+		asm volatile("mfspr %0, %1" : "=r" (timebase) :
+			     "i" (SPRN_TBRL));
+
+		do {
+			prev = timebase;
+			asm volatile("mfspr %0, %1" : "=r" (timebase) :
+				     "i" (SPRN_TBRL));
+		} while (prev != timebase);
+	}
+#else
+	timebase = get_tb();
+#endif
+	mb();
+	tb_valid = 1;
+
+	while (tb_valid)
+		barrier();
+
+	qoriq_pm_ops->freeze_time_base(false);
+
+	local_irq_restore(flags);
+}
+
+static void mpc85xx_take_timebase(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	hard_irq_disable();
+
+	tb_req = 1;
+	while (!tb_valid)
+		barrier();
+
+	set_tb(timebase >> 32, timebase & 0xffffffff);
+	isync();
+	tb_valid = 0;
+
+	local_irq_restore(flags);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void smp_85xx_cpu_offline_self(void)
+{
+	unsigned int cpu = smp_processor_id();
+
+	local_irq_disable();
+	hard_irq_disable();
+	/* mask all irqs to prevent cpu wakeup */
+	qoriq_pm_ops->irq_mask(cpu);
+
+	idle_task_exit();
+
+	mtspr(SPRN_TCR, 0);
+	mtspr(SPRN_TSR, mfspr(SPRN_TSR));
+
+	generic_set_cpu_dead(cpu);
+
+	cur_cpu_spec->cpu_down_flush();
+
+	qoriq_pm_ops->cpu_die(cpu);
+
+	while (1)
+		;
+}
+
+static void qoriq_cpu_kill(unsigned int cpu)
+{
+	int i;
+
+	for (i = 0; i < 500; i++) {
+		if (is_cpu_dead(cpu)) {
+#ifdef CONFIG_PPC64
+			paca_ptrs[cpu]->cpu_start = 0;
+#endif
+			return;
+		}
+		msleep(20);
+	}
+	pr_err("CPU%d didn't die...\n", cpu);
+}
+#endif
+
+/*
+ * To keep it compatible with old boot program which uses
+ * cache-inhibit spin table, we need to flush the cache
+ * before accessing spin table to invalidate any staled data.
+ * We also need to flush the cache after writing to spin
+ * table to push data out.
+ */
+static inline void flush_spin_table(void *spin_table)
+{
+	flush_dcache_range((ulong)spin_table,
+		(ulong)spin_table + sizeof(struct epapr_spin_table));
+}
+
+static inline u32 read_spin_table_addr_l(void *spin_table)
+{
+	flush_dcache_range((ulong)spin_table,
+		(ulong)spin_table + sizeof(struct epapr_spin_table));
+	return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l);
+}
+
+#ifdef CONFIG_PPC64
+static void wake_hw_thread(void *info)
+{
+	void fsl_secondary_thread_init(void);
+	unsigned long inia;
+	int cpu = *(const int *)info;
+
+	inia = ppc_function_entry(fsl_secondary_thread_init);
+	book3e_start_thread(cpu_thread_in_core(cpu), inia);
+}
+#endif
+
+static int smp_85xx_start_cpu(int cpu)
+{
+	int ret = 0;
+	struct device_node *np;
+	const u64 *cpu_rel_addr;
+	unsigned long flags;
+	int ioremappable;
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	struct epapr_spin_table __iomem *spin_table;
+
+	np = of_get_cpu_node(cpu, NULL);
+	cpu_rel_addr = of_get_property(np, "cpu-release-addr", NULL);
+	if (!cpu_rel_addr) {
+		pr_err("No cpu-release-addr for cpu %d\n", cpu);
+		return -ENOENT;
+	}
+
+	/*
+	 * A secondary core could be in a spinloop in the bootpage
+	 * (0xfffff000), somewhere in highmem, or somewhere in lowmem.
+	 * The bootpage and highmem can be accessed via ioremap(), but
+	 * we need to directly access the spinloop if its in lowmem.
+	 */
+	ioremappable = *cpu_rel_addr > virt_to_phys(high_memory - 1);
+
+	/* Map the spin table */
+	if (ioremappable)
+		spin_table = ioremap_coherent(*cpu_rel_addr,
+					      sizeof(struct epapr_spin_table));
+	else
+		spin_table = phys_to_virt(*cpu_rel_addr);
+
+	local_irq_save(flags);
+	hard_irq_disable();
+
+	if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare)
+		qoriq_pm_ops->cpu_up_prepare(cpu);
+
+	/* if cpu is not spinning, reset it */
+	if (read_spin_table_addr_l(spin_table) != 1) {
+		/*
+		 * We don't set the BPTR register here since it already points
+		 * to the boot page properly.
+		 */
+		mpic_reset_core(cpu);
+
+		/*
+		 * wait until core is ready...
+		 * We need to invalidate the stale data, in case the boot
+		 * loader uses a cache-inhibited spin table.
+		 */
+		if (!spin_event_timeout(
+				read_spin_table_addr_l(spin_table) == 1,
+				10000, 100)) {
+			pr_err("timeout waiting for cpu %d to reset\n",
+				hw_cpu);
+			ret = -EAGAIN;
+			goto err;
+		}
+	}
+
+	flush_spin_table(spin_table);
+	out_be32(&spin_table->pir, hw_cpu);
+#ifdef CONFIG_PPC64
+	out_be64((u64 *)(&spin_table->addr_h),
+		__pa(ppc_function_entry(generic_secondary_smp_init)));
+#else
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+	/*
+	 * We need also to write addr_h to spin table for systems
+	 * in which their physical memory start address was configured
+	 * to above 4G, otherwise the secondary core can not get
+	 * correct entry to start from.
+	 */
+	out_be32(&spin_table->addr_h, __pa(__early_start) >> 32);
+#endif
+	out_be32(&spin_table->addr_l, __pa(__early_start));
+#endif
+	flush_spin_table(spin_table);
+err:
+	local_irq_restore(flags);
+
+	if (ioremappable)
+		iounmap(spin_table);
+
+	return ret;
+}
+
+static int smp_85xx_kick_cpu(int nr)
+{
+	int ret = 0;
+#ifdef CONFIG_PPC64
+	int primary = nr;
+#endif
+
+	WARN_ON(nr < 0 || nr >= num_possible_cpus());
+
+	pr_debug("kick CPU #%d\n", nr);
+
+#ifdef CONFIG_PPC64
+	if (threads_per_core == 2) {
+		if (WARN_ON_ONCE(!cpu_has_feature(CPU_FTR_SMT)))
+			return -ENOENT;
+
+		booting_thread_hwid = cpu_thread_in_core(nr);
+		primary = cpu_first_thread_sibling(nr);
+
+		if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare)
+			qoriq_pm_ops->cpu_up_prepare(nr);
+
+		/*
+		 * If either thread in the core is online, use it to start
+		 * the other.
+		 */
+		if (cpu_online(primary)) {
+			smp_call_function_single(primary,
+					wake_hw_thread, &nr, 1);
+			goto done;
+		} else if (cpu_online(primary + 1)) {
+			smp_call_function_single(primary + 1,
+					wake_hw_thread, &nr, 1);
+			goto done;
+		}
+
+		/*
+		 * If getting here, it means both threads in the core are
+		 * offline. So start the primary thread, then it will start
+		 * the thread specified in booting_thread_hwid, the one
+		 * corresponding to nr.
+		 */
+
+	} else if (threads_per_core == 1) {
+		/*
+		 * If one core has only one thread, set booting_thread_hwid to
+		 * an invalid value.
+		 */
+		booting_thread_hwid = INVALID_THREAD_HWID;
+
+	} else if (threads_per_core > 2) {
+		pr_err("Do not support more than 2 threads per CPU.");
+		return -EINVAL;
+	}
+
+	ret = smp_85xx_start_cpu(primary);
+	if (ret)
+		return ret;
+
+done:
+	paca_ptrs[nr]->cpu_start = 1;
+	generic_set_cpu_up(nr);
+
+	return ret;
+#else
+	ret = smp_85xx_start_cpu(nr);
+	if (ret)
+		return ret;
+
+	generic_set_cpu_up(nr);
+
+	return ret;
+#endif
+}
+
+struct smp_ops_t smp_85xx_ops = {
+	.cause_nmi_ipi = NULL,
+	.kick_cpu = smp_85xx_kick_cpu,
+	.cpu_bootable = smp_generic_cpu_bootable,
+#ifdef CONFIG_HOTPLUG_CPU
+	.cpu_disable	= generic_cpu_disable,
+	.cpu_die	= generic_cpu_die,
+#endif
+#if defined(CONFIG_KEXEC_CORE) && !defined(CONFIG_PPC64)
+	.give_timebase	= smp_generic_give_timebase,
+	.take_timebase	= smp_generic_take_timebase,
+#endif
+};
+
+#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_PPC32
+atomic_t kexec_down_cpus = ATOMIC_INIT(0);
+
+static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
+{
+	local_irq_disable();
+
+	if (secondary) {
+		cur_cpu_spec->cpu_down_flush();
+		atomic_inc(&kexec_down_cpus);
+		/* loop forever */
+		while (1);
+	}
+}
+
+static void mpc85xx_smp_kexec_down(void *arg)
+{
+	if (ppc_md.kexec_cpu_down)
+		ppc_md.kexec_cpu_down(0,1);
+}
+#else
+static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
+{
+	int cpu = smp_processor_id();
+	int sibling = cpu_last_thread_sibling(cpu);
+	bool notified = false;
+	int disable_cpu;
+	int disable_threadbit = 0;
+	long start = mftb();
+	long now;
+
+	local_irq_disable();
+	hard_irq_disable();
+	mpic_teardown_this_cpu(secondary);
+
+	if (cpu == crashing_cpu && cpu_thread_in_core(cpu) != 0) {
+		/*
+		 * We enter the crash kernel on whatever cpu crashed,
+		 * even if it's a secondary thread.  If that's the case,
+		 * disable the corresponding primary thread.
+		 */
+		disable_threadbit = 1;
+		disable_cpu = cpu_first_thread_sibling(cpu);
+	} else if (sibling != crashing_cpu &&
+		   cpu_thread_in_core(cpu) == 0 &&
+		   cpu_thread_in_core(sibling) != 0) {
+		disable_threadbit = 2;
+		disable_cpu = sibling;
+	}
+
+	if (disable_threadbit) {
+		while (paca_ptrs[disable_cpu]->kexec_state < KEXEC_STATE_REAL_MODE) {
+			barrier();
+			now = mftb();
+			if (!notified && now - start > 1000000) {
+				pr_info("%s/%d: waiting for cpu %d to enter KEXEC_STATE_REAL_MODE (%d)\n",
+					__func__, smp_processor_id(),
+					disable_cpu,
+					paca_ptrs[disable_cpu]->kexec_state);
+				notified = true;
+			}
+		}
+
+		if (notified) {
+			pr_info("%s: cpu %d done waiting\n",
+				__func__, disable_cpu);
+		}
+
+		mtspr(SPRN_TENC, disable_threadbit);
+		while (mfspr(SPRN_TENSR) & disable_threadbit)
+			cpu_relax();
+	}
+}
+#endif
+
+static void mpc85xx_smp_machine_kexec(struct kimage *image)
+{
+#ifdef CONFIG_PPC32
+	int timeout = INT_MAX;
+	int i, num_cpus = num_present_cpus();
+
+	if (image->type == KEXEC_TYPE_DEFAULT)
+		smp_call_function(mpc85xx_smp_kexec_down, NULL, 0);
+
+	while ( (atomic_read(&kexec_down_cpus) != (num_cpus - 1)) &&
+		( timeout > 0 ) )
+	{
+		timeout--;
+	}
+
+	if ( !timeout )
+		printk(KERN_ERR "Unable to bring down secondary cpu(s)");
+
+	for_each_online_cpu(i)
+	{
+		if ( i == smp_processor_id() ) continue;
+		mpic_reset_core(i);
+	}
+#endif
+
+	default_machine_kexec(image);
+}
+#endif /* CONFIG_KEXEC_CORE */
+
+static void smp_85xx_setup_cpu(int cpu_nr)
+{
+	mpic_setup_this_cpu();
+}
+
+void __init mpc85xx_smp_init(void)
+{
+	struct device_node *np;
+
+
+	np = of_find_node_by_type(NULL, "open-pic");
+	if (np) {
+		smp_85xx_ops.probe = smp_mpic_probe;
+		smp_85xx_ops.setup_cpu = smp_85xx_setup_cpu;
+		smp_85xx_ops.message_pass = smp_mpic_message_pass;
+	} else
+		smp_85xx_ops.setup_cpu = NULL;
+
+	if (cpu_has_feature(CPU_FTR_DBELL)) {
+		/*
+		 * If left NULL, .message_pass defaults to
+		 * smp_muxed_ipi_message_pass
+		 */
+		smp_85xx_ops.message_pass = NULL;
+		smp_85xx_ops.cause_ipi = doorbell_global_ipi;
+		smp_85xx_ops.probe = NULL;
+	}
+
+#ifdef CONFIG_FSL_CORENET_RCPM
+	/* Assign a value to qoriq_pm_ops on PPC_E500MC */
+	fsl_rcpm_init();
+#else
+	/* Assign a value to qoriq_pm_ops on !PPC_E500MC */
+	mpc85xx_setup_pmc();
+#endif
+	if (qoriq_pm_ops) {
+		smp_85xx_ops.give_timebase = mpc85xx_give_timebase;
+		smp_85xx_ops.take_timebase = mpc85xx_take_timebase;
+#ifdef CONFIG_HOTPLUG_CPU
+		smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self;
+		smp_85xx_ops.cpu_die = qoriq_cpu_kill;
+#endif
+	}
+	smp_ops = &smp_85xx_ops;
+
+#ifdef CONFIG_KEXEC_CORE
+	ppc_md.kexec_cpu_down = mpc85xx_smp_kexec_cpu_down;
+	ppc_md.machine_kexec = mpc85xx_smp_machine_kexec;
+#endif
+}
diff --git a/arch/powerpc/platforms/85xx/smp.h b/arch/powerpc/platforms/85xx/smp.h
new file mode 100644
index 0000000000..3936ff6dfb
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/smp.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef POWERPC_85XX_SMP_H_
+#define POWERPC_85XX_SMP_H_ 1
+
+#include <linux/init.h>
+
+#ifdef CONFIG_SMP
+void __init mpc85xx_smp_init(void);
+int __init mpc85xx_setup_pmc(void);
+#else
+static inline void mpc85xx_smp_init(void)
+{
+	/* Nothing to do */
+}
+#endif
+
+#endif /* not POWERPC_85XX_SMP_H_ */
diff --git a/arch/powerpc/platforms/85xx/socrates.c b/arch/powerpc/platforms/85xx/socrates.c
new file mode 100644
index 0000000000..403367b318
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/socrates.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2008 Emcraft Systems
+ * Sergei Poselenov <sposelenov@emcraft.com>
+ *
+ * Based on MPC8560 ADS and arch/ppc tqm85xx ports
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2008 Freescale Semiconductor Inc.
+ *
+ * Copyright (c) 2005-2006 DENX Software Engineering
+ * Stefan Roese <sr@denx.de>
+ *
+ * Based on original work by
+ * 	Kumar Gala <kumar.gala@freescale.com>
+ *      Copyright 2004 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+#include "socrates_fpga_pic.h"
+
+static void __init socrates_pic_init(void)
+{
+	struct device_node *np;
+
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+
+	np = of_find_compatible_node(NULL, NULL, "abb,socrates-fpga-pic");
+	if (!np) {
+		printk(KERN_ERR "Could not find socrates-fpga-pic node\n");
+		return;
+	}
+	socrates_fpga_pic_init(np);
+	of_node_put(np);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init socrates_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("socrates_setup_arch()", 0);
+
+	fsl_pci_assign_primary();
+}
+
+machine_arch_initcall(socrates, mpc85xx_common_publish_devices);
+
+define_machine(socrates) {
+	.name			= "Socrates",
+	.compatible		= "abb,socrates",
+	.setup_arch		= socrates_setup_arch,
+	.init_IRQ		= socrates_pic_init,
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
new file mode 100644
index 0000000000..baa12eff6d
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  Copyright (C) 2008 Ilya Yanok, Emcraft Systems
+ */
+
+#include <linux/irq.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/io.h>
+
+/*
+ * The FPGA supports 9 interrupt sources, which can be routed to 3
+ * interrupt request lines of the MPIC. The line to be used can be
+ * specified through the third cell of FDT property  "interrupts".
+ */
+
+#define SOCRATES_FPGA_NUM_IRQS	9
+
+#define FPGA_PIC_IRQCFG		(0x0)
+#define FPGA_PIC_IRQMASK(n)	(0x4 + 0x4 * (n))
+
+#define SOCRATES_FPGA_IRQ_MASK	((1 << SOCRATES_FPGA_NUM_IRQS) - 1)
+
+struct socrates_fpga_irq_info {
+	unsigned int irq_line;
+	int type;
+};
+
+/*
+ * Interrupt routing and type table
+ *
+ * IRQ_TYPE_NONE means the interrupt type is configurable,
+ * otherwise it's fixed to the specified value.
+ */
+static struct socrates_fpga_irq_info fpga_irqs[SOCRATES_FPGA_NUM_IRQS] = {
+	[0] = {0, IRQ_TYPE_NONE},
+	[1] = {0, IRQ_TYPE_LEVEL_HIGH},
+	[2] = {0, IRQ_TYPE_LEVEL_LOW},
+	[3] = {0, IRQ_TYPE_NONE},
+	[4] = {0, IRQ_TYPE_NONE},
+	[5] = {0, IRQ_TYPE_NONE},
+	[6] = {0, IRQ_TYPE_NONE},
+	[7] = {0, IRQ_TYPE_NONE},
+	[8] = {0, IRQ_TYPE_LEVEL_HIGH},
+};
+
+static DEFINE_RAW_SPINLOCK(socrates_fpga_pic_lock);
+
+static void __iomem *socrates_fpga_pic_iobase;
+static struct irq_domain *socrates_fpga_pic_irq_host;
+static unsigned int socrates_fpga_irqs[3];
+
+static inline uint32_t socrates_fpga_pic_read(int reg)
+{
+	return in_be32(socrates_fpga_pic_iobase + reg);
+}
+
+static inline void socrates_fpga_pic_write(int reg, uint32_t val)
+{
+	out_be32(socrates_fpga_pic_iobase + reg, val);
+}
+
+static inline unsigned int socrates_fpga_pic_get_irq(unsigned int irq)
+{
+	uint32_t cause;
+	unsigned long flags;
+	int i;
+
+	/* Check irq line routed to the MPIC */
+	for (i = 0; i < 3; i++) {
+		if (irq == socrates_fpga_irqs[i])
+			break;
+	}
+	if (i == 3)
+		return 0;
+
+	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+	cause = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(i));
+	raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+	for (i = SOCRATES_FPGA_NUM_IRQS - 1; i >= 0; i--) {
+		if (cause >> (i + 16))
+			break;
+	}
+	return irq_linear_revmap(socrates_fpga_pic_irq_host,
+			(irq_hw_number_t)i);
+}
+
+static void socrates_fpga_pic_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int irq = irq_desc_get_irq(desc);
+	unsigned int cascade_irq;
+
+	/*
+	 * See if we actually have an interrupt, call generic handling code if
+	 * we do.
+	 */
+	cascade_irq = socrates_fpga_pic_get_irq(irq);
+
+	if (cascade_irq)
+		generic_handle_irq(cascade_irq);
+	chip->irq_eoi(&desc->irq_data);
+}
+
+static void socrates_fpga_pic_ack(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int irq_line, hwirq = irqd_to_hwirq(d);
+	uint32_t mask;
+
+	irq_line = fpga_irqs[hwirq].irq_line;
+	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
+		& SOCRATES_FPGA_IRQ_MASK;
+	mask |= (1 << (hwirq + 16));
+	socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask);
+	raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+}
+
+static void socrates_fpga_pic_mask(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int hwirq = irqd_to_hwirq(d);
+	int irq_line;
+	u32 mask;
+
+	irq_line = fpga_irqs[hwirq].irq_line;
+	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
+		& SOCRATES_FPGA_IRQ_MASK;
+	mask &= ~(1 << hwirq);
+	socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask);
+	raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+}
+
+static void socrates_fpga_pic_mask_ack(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int hwirq = irqd_to_hwirq(d);
+	int irq_line;
+	u32 mask;
+
+	irq_line = fpga_irqs[hwirq].irq_line;
+	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
+		& SOCRATES_FPGA_IRQ_MASK;
+	mask &= ~(1 << hwirq);
+	mask |= (1 << (hwirq + 16));
+	socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask);
+	raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+}
+
+static void socrates_fpga_pic_unmask(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int hwirq = irqd_to_hwirq(d);
+	int irq_line;
+	u32 mask;
+
+	irq_line = fpga_irqs[hwirq].irq_line;
+	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
+		& SOCRATES_FPGA_IRQ_MASK;
+	mask |= (1 << hwirq);
+	socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask);
+	raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+}
+
+static void socrates_fpga_pic_eoi(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int hwirq = irqd_to_hwirq(d);
+	int irq_line;
+	u32 mask;
+
+	irq_line = fpga_irqs[hwirq].irq_line;
+	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
+		& SOCRATES_FPGA_IRQ_MASK;
+	mask |= (1 << (hwirq + 16));
+	socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask);
+	raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+}
+
+static int socrates_fpga_pic_set_type(struct irq_data *d,
+		unsigned int flow_type)
+{
+	unsigned long flags;
+	unsigned int hwirq = irqd_to_hwirq(d);
+	int polarity;
+	u32 mask;
+
+	if (fpga_irqs[hwirq].type != IRQ_TYPE_NONE)
+		return -EINVAL;
+
+	switch (flow_type & IRQ_TYPE_SENSE_MASK) {
+	case IRQ_TYPE_LEVEL_HIGH:
+		polarity = 1;
+		break;
+	case IRQ_TYPE_LEVEL_LOW:
+		polarity = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+	mask = socrates_fpga_pic_read(FPGA_PIC_IRQCFG);
+	if (polarity)
+		mask |= (1 << hwirq);
+	else
+		mask &= ~(1 << hwirq);
+	socrates_fpga_pic_write(FPGA_PIC_IRQCFG, mask);
+	raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+	return 0;
+}
+
+static struct irq_chip socrates_fpga_pic_chip = {
+	.name		= "FPGA-PIC",
+	.irq_ack	= socrates_fpga_pic_ack,
+	.irq_mask	= socrates_fpga_pic_mask,
+	.irq_mask_ack	= socrates_fpga_pic_mask_ack,
+	.irq_unmask	= socrates_fpga_pic_unmask,
+	.irq_eoi	= socrates_fpga_pic_eoi,
+	.irq_set_type	= socrates_fpga_pic_set_type,
+};
+
+static int socrates_fpga_pic_host_map(struct irq_domain *h, unsigned int virq,
+		irq_hw_number_t hwirq)
+{
+	/* All interrupts are LEVEL sensitive */
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &socrates_fpga_pic_chip,
+				 handle_fasteoi_irq);
+
+	return 0;
+}
+
+static int socrates_fpga_pic_host_xlate(struct irq_domain *h,
+		struct device_node *ct,	const u32 *intspec, unsigned int intsize,
+		irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+{
+	struct socrates_fpga_irq_info *fpga_irq = &fpga_irqs[intspec[0]];
+
+	*out_hwirq = intspec[0];
+	if  (fpga_irq->type == IRQ_TYPE_NONE) {
+		/* type is configurable */
+		if (intspec[1] != IRQ_TYPE_LEVEL_LOW &&
+		    intspec[1] != IRQ_TYPE_LEVEL_HIGH) {
+			pr_warn("FPGA PIC: invalid irq type, setting default active low\n");
+			*out_flags = IRQ_TYPE_LEVEL_LOW;
+		} else {
+			*out_flags = intspec[1];
+		}
+	} else {
+		/* type is fixed */
+		*out_flags = fpga_irq->type;
+	}
+
+	/* Use specified interrupt routing */
+	if (intspec[2] <= 2)
+		fpga_irq->irq_line = intspec[2];
+	else
+		pr_warn("FPGA PIC: invalid irq routing\n");
+
+	return 0;
+}
+
+static const struct irq_domain_ops socrates_fpga_pic_host_ops = {
+	.map    = socrates_fpga_pic_host_map,
+	.xlate  = socrates_fpga_pic_host_xlate,
+};
+
+void __init socrates_fpga_pic_init(struct device_node *pic)
+{
+	unsigned long flags;
+	int i;
+
+	/* Setup an irq_domain structure */
+	socrates_fpga_pic_irq_host = irq_domain_add_linear(pic,
+		    SOCRATES_FPGA_NUM_IRQS, &socrates_fpga_pic_host_ops, NULL);
+	if (socrates_fpga_pic_irq_host == NULL) {
+		pr_err("FPGA PIC: Unable to allocate host\n");
+		return;
+	}
+
+	for (i = 0; i < 3; i++) {
+		socrates_fpga_irqs[i] = irq_of_parse_and_map(pic, i);
+		if (!socrates_fpga_irqs[i]) {
+			pr_warn("FPGA PIC: can't get irq%d\n", i);
+			continue;
+		}
+		irq_set_chained_handler(socrates_fpga_irqs[i],
+					socrates_fpga_pic_cascade);
+	}
+
+	socrates_fpga_pic_iobase = of_iomap(pic, 0);
+
+	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+	socrates_fpga_pic_write(FPGA_PIC_IRQMASK(0),
+			SOCRATES_FPGA_IRQ_MASK << 16);
+	socrates_fpga_pic_write(FPGA_PIC_IRQMASK(1),
+			SOCRATES_FPGA_IRQ_MASK << 16);
+	socrates_fpga_pic_write(FPGA_PIC_IRQMASK(2),
+			SOCRATES_FPGA_IRQ_MASK << 16);
+	raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+
+	pr_info("FPGA PIC: Setting up Socrates FPGA PIC\n");
+}
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.h b/arch/powerpc/platforms/85xx/socrates_fpga_pic.h
new file mode 100644
index 0000000000..c50b23794a
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  Copyright (C) 2008 Ilya Yanok, Emcraft Systems
+ */
+
+#ifndef SOCRATES_FPGA_PIC_H
+#define SOCRATES_FPGA_PIC_H
+
+void __init socrates_fpga_pic_init(struct device_node *pic);
+
+#endif
diff --git a/arch/powerpc/platforms/85xx/stx_gp3.c b/arch/powerpc/platforms/85xx/stx_gp3.c
new file mode 100644
index 0000000000..c10efc4589
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/stx_gp3.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Based on MPC8560 ADS and arch/ppc stx_gp3 ports
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2008 Freescale Semiconductor Inc.
+ *
+ * Dan Malek <dan@embeddededge.com>
+ * Copyright 2004 Embedded Edge, LLC
+ *
+ * Copied from mpc8560_ads.c
+ * Copyright 2002, 2003 Motorola Inc.
+ *
+ * Ported to 2.6, Matt Porter <mporter@kernel.crashing.org>
+ * Copyright 2004-2005 MontaVista Software, Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+#ifdef CONFIG_CPM2
+#include <asm/cpm2.h>
+#endif /* CONFIG_CPM2 */
+
+static void __init stx_gp3_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+
+	mpc85xx_cpm2_pic_init();
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init stx_gp3_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("stx_gp3_setup_arch()", 0);
+
+	fsl_pci_assign_primary();
+
+#ifdef CONFIG_CPM2
+	cpm2_reset();
+#endif
+}
+
+static void stx_gp3_show_cpuinfo(struct seq_file *m)
+{
+	uint pvid, svid, phid1;
+
+	pvid = mfspr(SPRN_PVR);
+	svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: RPC Electronics STx\n");
+	seq_printf(m, "PVR\t\t: 0x%x\n", pvid);
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+	/* Display cpu Pll setting */
+	phid1 = mfspr(SPRN_HID1);
+	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
+}
+
+machine_arch_initcall(stx_gp3, mpc85xx_common_publish_devices);
+
+define_machine(stx_gp3) {
+	.name			= "STX GP3",
+	.compatible		= "stx,gp3-8560",
+	.setup_arch		= stx_gp3_setup_arch,
+	.init_IRQ		= stx_gp3_pic_init,
+	.show_cpuinfo		= stx_gp3_show_cpuinfo,
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/t1042rdb_diu.c b/arch/powerpc/platforms/85xx/t1042rdb_diu.c
new file mode 100644
index 0000000000..767eed98a0
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/t1042rdb_diu.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * T1042 platform DIU operation
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <sysdev/fsl_soc.h>
+
+/*DIU Pixel ClockCR offset in scfg*/
+#define CCSR_SCFG_PIXCLKCR      0x28
+
+/* DIU Pixel Clock bits of the PIXCLKCR */
+#define PIXCLKCR_PXCKEN		0x80000000
+#define PIXCLKCR_PXCKINV	0x40000000
+#define PIXCLKCR_PXCKDLY	0x0000FF00
+#define PIXCLKCR_PXCLK_MASK	0x00FF0000
+
+/* Some CPLD register definitions */
+#define CPLD_DIUCSR		0x16
+#define CPLD_DIUCSR_DVIEN	0x80
+#define CPLD_DIUCSR_BACKLIGHT	0x0f
+
+struct device_node *cpld_node;
+
+/**
+ * t1042rdb_set_monitor_port: switch the output to a different monitor port
+ */
+static void t1042rdb_set_monitor_port(enum fsl_diu_monitor_port port)
+{
+	void __iomem *cpld_base;
+
+	cpld_base = of_iomap(cpld_node, 0);
+	if (!cpld_base) {
+		pr_err("%s: Could not map cpld registers\n", __func__);
+		goto exit;
+	}
+
+	switch (port) {
+	case FSL_DIU_PORT_DVI:
+		/* Enable the DVI(HDMI) port, disable the DFP and
+		 * the backlight
+		 */
+		clrbits8(cpld_base + CPLD_DIUCSR, CPLD_DIUCSR_DVIEN);
+		break;
+	case FSL_DIU_PORT_LVDS:
+		/*
+		 * LVDS also needs backlight enabled, otherwise the display
+		 * will be blank.
+		 */
+		/* Enable the DFP port, disable the DVI*/
+		setbits8(cpld_base + CPLD_DIUCSR, 0x01 << 8);
+		setbits8(cpld_base + CPLD_DIUCSR, 0x01 << 4);
+		setbits8(cpld_base + CPLD_DIUCSR, CPLD_DIUCSR_BACKLIGHT);
+		break;
+	default:
+		pr_err("%s: Unsupported monitor port %i\n", __func__, port);
+	}
+
+	iounmap(cpld_base);
+exit:
+	of_node_put(cpld_node);
+}
+
+/**
+ * t1042rdb_set_pixel_clock: program the DIU's clock
+ * @pixclock: pixel clock in ps (pico seconds)
+ */
+static void t1042rdb_set_pixel_clock(unsigned int pixclock)
+{
+	struct device_node *scfg_np;
+	void __iomem *scfg;
+	unsigned long freq;
+	u64 temp;
+	u32 pxclk;
+
+	scfg_np = of_find_compatible_node(NULL, NULL, "fsl,t1040-scfg");
+	if (!scfg_np) {
+		pr_err("%s: Missing scfg node. Can not display video.\n",
+		       __func__);
+		return;
+	}
+
+	scfg = of_iomap(scfg_np, 0);
+	of_node_put(scfg_np);
+	if (!scfg) {
+		pr_err("%s: Could not map device. Can not display video.\n",
+		       __func__);
+		return;
+	}
+
+	/* Convert pixclock into frequency */
+	temp = 1000000000000ULL;
+	do_div(temp, pixclock);
+	freq = temp;
+
+	/*
+	 * 'pxclk' is the ratio of the platform clock to the pixel clock.
+	 * This number is programmed into the PIXCLKCR register, and the valid
+	 * range of values is 2-255.
+	 */
+	pxclk = DIV_ROUND_CLOSEST(fsl_get_sys_freq(), freq);
+	pxclk = clamp_t(u32, pxclk, 2, 255);
+
+	/* Disable the pixel clock, and set it to non-inverted and no delay */
+	clrbits32(scfg + CCSR_SCFG_PIXCLKCR,
+		  PIXCLKCR_PXCKEN | PIXCLKCR_PXCKDLY | PIXCLKCR_PXCLK_MASK);
+
+	/* Enable the clock and set the pxclk */
+	setbits32(scfg + CCSR_SCFG_PIXCLKCR, PIXCLKCR_PXCKEN | (pxclk << 16));
+
+	iounmap(scfg);
+}
+
+/**
+ * t1042rdb_valid_monitor_port: set the monitor port for sysfs
+ */
+static enum fsl_diu_monitor_port
+t1042rdb_valid_monitor_port(enum fsl_diu_monitor_port port)
+{
+	switch (port) {
+	case FSL_DIU_PORT_DVI:
+	case FSL_DIU_PORT_LVDS:
+		return port;
+	default:
+		return FSL_DIU_PORT_DVI; /* Dual-link LVDS is not supported */
+	}
+}
+
+static int __init t1042rdb_diu_init(void)
+{
+	cpld_node = of_find_compatible_node(NULL, NULL, "fsl,t1042rdb-cpld");
+	if (!cpld_node)
+		return 0;
+
+	diu_ops.set_monitor_port	= t1042rdb_set_monitor_port;
+	diu_ops.set_pixel_clock		= t1042rdb_set_pixel_clock;
+	diu_ops.valid_monitor_port	= t1042rdb_valid_monitor_port;
+
+	return 0;
+}
+
+early_initcall(t1042rdb_diu_init);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c
new file mode 100644
index 0000000000..6be1b9809d
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/tqm85xx.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Based on MPC8560 ADS and arch/ppc tqm85xx ports
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2008 Freescale Semiconductor Inc.
+ *
+ * Copyright (c) 2005-2006 DENX Software Engineering
+ * Stefan Roese <sr@denx.de>
+ *
+ * Based on original work by
+ * 	Kumar Gala <kumar.gala@freescale.com>
+ *      Copyright 2004 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+#ifdef CONFIG_CPM2
+#include <asm/cpm2.h>
+#endif /* CONFIG_CPM2 */
+
+static void __init tqm85xx_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0,
+			MPIC_BIG_ENDIAN,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+
+	mpc85xx_cpm2_pic_init();
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init tqm85xx_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("tqm85xx_setup_arch()", 0);
+
+#ifdef CONFIG_CPM2
+	cpm2_reset();
+#endif
+
+	fsl_pci_assign_primary();
+}
+
+static void tqm85xx_show_cpuinfo(struct seq_file *m)
+{
+	uint pvid, svid, phid1;
+
+	pvid = mfspr(SPRN_PVR);
+	svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: TQ Components\n");
+	seq_printf(m, "PVR\t\t: 0x%x\n", pvid);
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+	/* Display cpu Pll setting */
+	phid1 = mfspr(SPRN_HID1);
+	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
+}
+
+static void tqm85xx_ti1520_fixup(struct pci_dev *pdev)
+{
+	unsigned int val;
+
+	/* Do not do the fixup on other platforms! */
+	if (!machine_is(tqm85xx))
+		return;
+
+	dev_info(&pdev->dev, "Using TI 1520 fixup on TQM85xx\n");
+
+	/*
+	 * Enable P2CCLK bit in system control register
+	 * to enable CLOCK output to power chip
+	 */
+	pci_read_config_dword(pdev, 0x80, &val);
+	pci_write_config_dword(pdev, 0x80, val | (1 << 27));
+
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_1520,
+		tqm85xx_ti1520_fixup);
+
+machine_arch_initcall(tqm85xx, mpc85xx_common_publish_devices);
+
+static const char * const board[] __initconst = {
+	"tqc,tqm8540",
+	"tqc,tqm8541",
+	"tqc,tqm8548",
+	"tqc,tqm8555",
+	"tqc,tqm8560",
+	NULL
+};
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init tqm85xx_probe(void)
+{
+	return of_device_compatible_match(of_root, board);
+}
+
+define_machine(tqm85xx) {
+	.name			= "TQM85xx",
+	.probe			= tqm85xx_probe,
+	.setup_arch		= tqm85xx_setup_arch,
+	.init_IRQ		= tqm85xx_pic_init,
+	.show_cpuinfo		= tqm85xx_show_cpuinfo,
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c
new file mode 100644
index 0000000000..c0a0456f16
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/twr_p102x.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2010-2011, 2013 Freescale Semiconductor, Inc.
+ *
+ * Author: Michael Johnston <michael.johnston@freescale.com>
+ *
+ * Description:
+ * TWR-P102x Board Setup
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/fsl/guts.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <soc/fsl/qe/qe.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+static void __init twr_p1025_pic_init(void)
+{
+	struct mpic *mpic;
+
+	mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+			MPIC_SINGLE_DEST_CPU,
+			0, 256, " OpenPIC  ");
+
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+}
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init twr_p1025_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("twr_p1025_setup_arch()", 0);
+
+	mpc85xx_smp_init();
+
+	fsl_pci_assign_primary();
+
+#ifdef CONFIG_QUICC_ENGINE
+	mpc85xx_qe_par_io_init();
+
+#if IS_ENABLED(CONFIG_UCC_GETH) || IS_ENABLED(CONFIG_SERIAL_QE)
+	if (machine_is(twr_p1025)) {
+		struct ccsr_guts __iomem *guts;
+		struct device_node *np;
+
+		np = of_find_compatible_node(NULL, NULL, "fsl,p1021-guts");
+		if (np) {
+			guts = of_iomap(np, 0);
+			if (!guts)
+				pr_err("twr_p1025: could not map global utilities register\n");
+			else {
+			/* P1025 has pins muxed for QE and other functions. To
+			 * enable QE UEC mode, we need to set bit QE0 for UCC1
+			 * in Eth mode, QE0 and QE3 for UCC5 in Eth mode, QE9
+			 * and QE12 for QE MII management signals in PMUXCR
+			 * register.
+			 * Set QE mux bits in PMUXCR */
+			setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) |
+					MPC85xx_PMUXCR_QE(3) |
+					MPC85xx_PMUXCR_QE(9) |
+					MPC85xx_PMUXCR_QE(12));
+			iounmap(guts);
+
+#if IS_ENABLED(CONFIG_SERIAL_QE)
+			/* On P1025TWR board, the UCC7 acted as UART port.
+			 * However, The UCC7's CTS pin is low level in default,
+			 * it will impact the transmission in full duplex
+			 * communication. So disable the Flow control pin PA18.
+			 * The UCC7 UART just can use RXD and TXD pins.
+			 */
+			par_io_config_pin(0, 18, 0, 0, 0, 0);
+#endif
+			/* Drive PB29 to CPLD low - CPLD will then change
+			 * muxing from LBC to QE */
+			par_io_config_pin(1, 29, 1, 0, 0, 0);
+			par_io_data_set(1, 29, 0);
+			}
+			of_node_put(np);
+		}
+	}
+#endif
+#endif	/* CONFIG_QUICC_ENGINE */
+
+	pr_info("TWR-P1025 board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(twr_p1025, mpc85xx_common_publish_devices);
+
+define_machine(twr_p1025) {
+	.name			= "TWR-P1025",
+	.compatible		= "fsl,TWR-P1025",
+	.setup_arch		= twr_p1025_setup_arch,
+	.init_IRQ		= twr_p1025_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
new file mode 100644
index 0000000000..45f257fc1a
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2009 Extreme Engineering Solutions, Inc.
+ *
+ * X-ES board-specific functionality
+ *
+ * Based on mpc85xx_ds code from Freescale Semiconductor, Inc.
+ *
+ * Author: Nate Case <ncase@xes-inc.com>
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+/* A few bit definitions needed for fixups on some boards */
+#define MPC85xx_L2CTL_L2E		0x80000000 /* L2 enable */
+#define MPC85xx_L2CTL_L2I		0x40000000 /* L2 flash invalidate */
+#define MPC85xx_L2CTL_L2SIZ_MASK	0x30000000 /* L2 SRAM size (R/O) */
+
+void __init xes_mpc85xx_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+}
+
+static void __init xes_mpc85xx_configure_l2(void __iomem *l2_base)
+{
+	volatile uint32_t ctl, tmp;
+
+	asm volatile("msync; isync");
+	tmp = in_be32(l2_base);
+
+	/*
+	 * xMon may have enabled part of L2 as SRAM, so we need to set it
+	 * up for all cache mode just to be safe.
+	 */
+	printk(KERN_INFO "xes_mpc85xx: Enabling L2 as cache\n");
+
+	ctl = MPC85xx_L2CTL_L2E | MPC85xx_L2CTL_L2I;
+	if (of_machine_is_compatible("MPC8540") ||
+	    of_machine_is_compatible("MPC8560"))
+		/*
+		 * Assume L2 SRAM is used fully for cache, so set
+		 * L2BLKSZ (bits 4:5) to match L2SIZ (bits 2:3).
+		 */
+		ctl |= (tmp & MPC85xx_L2CTL_L2SIZ_MASK) >> 2;
+
+	asm volatile("msync; isync");
+	out_be32(l2_base, ctl);
+	asm volatile("msync; isync");
+}
+
+static void __init xes_mpc85xx_fixups(void)
+{
+	struct device_node *np;
+	int err;
+
+	/*
+	 * Legacy xMon firmware on some X-ES boards does not enable L2
+	 * as cache.  We must ensure that they get enabled here.
+	 */
+	for_each_node_by_name(np, "l2-cache-controller") {
+		struct resource r[2];
+		void __iomem *l2_base;
+
+		/* Only MPC8548, MPC8540, and MPC8560 boards are affected */
+		if (!of_device_is_compatible(np,
+				    "fsl,mpc8548-l2-cache-controller") &&
+		    !of_device_is_compatible(np,
+				    "fsl,mpc8540-l2-cache-controller") &&
+		    !of_device_is_compatible(np,
+				    "fsl,mpc8560-l2-cache-controller"))
+			continue;
+
+		err = of_address_to_resource(np, 0, &r[0]);
+		if (err) {
+			printk(KERN_WARNING "xes_mpc85xx: Could not get "
+			       "resource for device tree node '%pOF'",
+			       np);
+			continue;
+		}
+
+		l2_base = ioremap(r[0].start, resource_size(&r[0]));
+
+		xes_mpc85xx_configure_l2(l2_base);
+	}
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init xes_mpc85xx_setup_arch(void)
+{
+	struct device_node *root;
+	const char *model = "Unknown";
+
+	root = of_find_node_by_path("/");
+	if (root == NULL)
+		return;
+
+	model = of_get_property(root, "model", NULL);
+
+	printk(KERN_INFO "X-ES MPC85xx-based single-board computer: %s\n",
+	       model + strlen("xes,"));
+
+	xes_mpc85xx_fixups();
+
+	mpc85xx_smp_init();
+
+	fsl_pci_assign_primary();
+}
+
+machine_arch_initcall(xes_mpc8572, mpc85xx_common_publish_devices);
+machine_arch_initcall(xes_mpc8548, mpc85xx_common_publish_devices);
+machine_arch_initcall(xes_mpc8540, mpc85xx_common_publish_devices);
+
+define_machine(xes_mpc8572) {
+	.name			= "X-ES MPC8572",
+	.compatible		= "xes,MPC8572",
+	.setup_arch		= xes_mpc85xx_setup_arch,
+	.init_IRQ		= xes_mpc85xx_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
+
+define_machine(xes_mpc8548) {
+	.name			= "X-ES MPC8548",
+	.compatible		= "xes,MPC8548",
+	.setup_arch		= xes_mpc85xx_setup_arch,
+	.init_IRQ		= xes_mpc85xx_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
+
+define_machine(xes_mpc8540) {
+	.name			= "X-ES MPC8540",
+	.compatible		= "xes,MPC8540",
+	.setup_arch		= xes_mpc85xx_setup_arch,
+	.init_IRQ		= xes_mpc85xx_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig
new file mode 100644
index 0000000000..67467cd6f3
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/Kconfig
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0
+menuconfig PPC_86xx
+	bool "86xx-based boards"
+	depends on PPC_BOOK3S_32
+	select FSL_SOC
+	select ALTIVEC
+	help
+	  The Freescale E600 SoCs have 74xx cores.
+
+if PPC_86xx
+
+config GEF_PPC9A
+	bool "GE PPC9A"
+	select DEFAULT_UIMAGE
+	select MMIO_NVRAM
+	select GPIOLIB
+	select GE_FPGA
+	help
+	  This option enables support for the GE PPC9A.
+
+config GEF_SBC310
+	bool "GE SBC310"
+	select DEFAULT_UIMAGE
+	select MMIO_NVRAM
+	select GPIOLIB
+	select GE_FPGA
+	help
+	  This option enables support for the GE SBC310.
+
+config GEF_SBC610
+	bool "GE SBC610"
+	select DEFAULT_UIMAGE
+	select MMIO_NVRAM
+	select GPIOLIB
+	select GE_FPGA
+	select HAVE_RAPIDIO
+	help
+	  This option enables support for the GE SBC610.
+
+config MVME7100
+	bool "Artesyn MVME7100"
+	help
+	  This option enables support for the Emerson/Artesyn MVME7100 board.
+
+endif
+
+config MPC8641
+	bool
+	select HAVE_PCI
+	select FSL_PCI if PCI
+	select PPC_UDBG_16550
+	select MPIC
+	default y if GEF_SBC610 || GEF_SBC310 || GEF_PPC9A \
+			|| MVME7100
+
+config MPC8610
+	bool
+	select HAVE_PCI
+	select FSL_PCI if PCI
+	select PPC_UDBG_16550
+	select MPIC
diff --git a/arch/powerpc/platforms/86xx/Makefile b/arch/powerpc/platforms/86xx/Makefile
new file mode 100644
index 0000000000..dafbc037ff
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the PowerPC 86xx linux kernel.
+#
+
+obj-y				:= pic.o common.o
+obj-$(CONFIG_SMP)		+= mpc86xx_smp.o
+obj-$(CONFIG_GEF_SBC610)	+= gef_sbc610.o
+obj-$(CONFIG_GEF_SBC310)	+= gef_sbc310.o
+obj-$(CONFIG_GEF_PPC9A)		+= gef_ppc9a.o
+obj-$(CONFIG_MVME7100)          += mvme7100.o
diff --git a/arch/powerpc/platforms/86xx/common.c b/arch/powerpc/platforms/86xx/common.c
new file mode 100644
index 0000000000..a4a5505276
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/common.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Routines common to most mpc86xx-based boards.
+ */
+
+#include <linux/init.h>
+#include <linux/mod_devicetable.h>
+#include <linux/of_platform.h>
+#include <asm/reg.h>
+#include <asm/synch.h>
+
+#include "mpc86xx.h"
+
+static const struct of_device_id mpc86xx_common_ids[] __initconst = {
+	{ .type = "soc", },
+	{ .compatible = "soc", },
+	{ .compatible = "simple-bus", },
+	{ .name = "localbus", },
+	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,mpc8641-pcie", },
+	{},
+};
+
+int __init mpc86xx_common_publish_devices(void)
+{
+	return of_platform_bus_probe(NULL, mpc86xx_common_ids, NULL);
+}
+
+long __init mpc86xx_time_init(void)
+{
+	unsigned int temp;
+
+	/* Set the time base to zero */
+	mtspr(SPRN_TBWL, 0);
+	mtspr(SPRN_TBWU, 0);
+
+	temp = mfspr(SPRN_HID0);
+	temp |= HID0_TBEN;
+	mtspr(SPRN_HID0, temp);
+	isync();
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c
new file mode 100644
index 0000000000..f7f98cca7b
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GE PPC9A board support
+ *
+ * Author: Martyn Welch <martyn.welch@ge.com>
+ *
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * Based on: mpc86xx_hpcn.c (MPC86xx HPCN board specific routines)
+ * Copyright 2006 Freescale Semiconductor Inc.
+ *
+ * NEC fixup adapted from arch/mips/pci/fixup-lm2e.c
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <asm/mpic.h>
+#include <asm/nvram.h>
+
+#include <sysdev/fsl_pci.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/ge/ge_pic.h>
+
+#include "mpc86xx.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG (fmt...) do { printk(KERN_ERR "PPC9A: " fmt); } while (0)
+#else
+#define DBG (fmt...) do { } while (0)
+#endif
+
+void __iomem *ppc9a_regs;
+
+static void __init gef_ppc9a_init_irq(void)
+{
+	struct device_node *cascade_node = NULL;
+
+	mpc86xx_init_irq();
+
+	/*
+	 * There is a simple interrupt handler in the main FPGA, this needs
+	 * to be cascaded into the MPIC
+	 */
+	cascade_node = of_find_compatible_node(NULL, NULL, "gef,fpga-pic-1.00");
+	if (!cascade_node) {
+		printk(KERN_WARNING "PPC9A: No FPGA PIC\n");
+		return;
+	}
+
+	gef_pic_init(cascade_node);
+	of_node_put(cascade_node);
+}
+
+static void __init gef_ppc9a_setup_arch(void)
+{
+	struct device_node *regs;
+
+	printk(KERN_INFO "GE Intelligent Platforms PPC9A 6U VME SBC\n");
+
+#ifdef CONFIG_SMP
+	mpc86xx_smp_init();
+#endif
+
+	fsl_pci_assign_primary();
+
+	/* Remap basic board registers */
+	regs = of_find_compatible_node(NULL, NULL, "gef,ppc9a-fpga-regs");
+	if (regs) {
+		ppc9a_regs = of_iomap(regs, 0);
+		if (ppc9a_regs == NULL)
+			printk(KERN_WARNING "Unable to map board registers\n");
+		of_node_put(regs);
+	}
+
+#if defined(CONFIG_MMIO_NVRAM)
+	mmio_nvram_init();
+#endif
+}
+
+/* Return the PCB revision */
+static unsigned int gef_ppc9a_get_pcb_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread32be(ppc9a_regs);
+	return (reg >> 16) & 0xff;
+}
+
+/* Return the board (software) revision */
+static unsigned int gef_ppc9a_get_board_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread32be(ppc9a_regs);
+	return (reg >> 8) & 0xff;
+}
+
+/* Return the FPGA revision */
+static unsigned int gef_ppc9a_get_fpga_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread32be(ppc9a_regs);
+	return reg & 0xf;
+}
+
+/* Return VME Geographical Address */
+static unsigned int gef_ppc9a_get_vme_geo_addr(void)
+{
+	unsigned int reg;
+
+	reg = ioread32be(ppc9a_regs + 0x4);
+	return reg & 0x1f;
+}
+
+/* Return VME System Controller Status */
+static unsigned int gef_ppc9a_get_vme_is_syscon(void)
+{
+	unsigned int reg;
+
+	reg = ioread32be(ppc9a_regs + 0x4);
+	return (reg >> 9) & 0x1;
+}
+
+static void gef_ppc9a_show_cpuinfo(struct seq_file *m)
+{
+	uint svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: GE Intelligent Platforms\n");
+
+	seq_printf(m, "Revision\t: %u%c\n", gef_ppc9a_get_pcb_rev(),
+		('A' + gef_ppc9a_get_board_rev()));
+	seq_printf(m, "FPGA Revision\t: %u\n", gef_ppc9a_get_fpga_rev());
+
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+	seq_printf(m, "VME geo. addr\t: %u\n", gef_ppc9a_get_vme_geo_addr());
+
+	seq_printf(m, "VME syscon\t: %s\n",
+		gef_ppc9a_get_vme_is_syscon() ? "yes" : "no");
+}
+
+static void gef_ppc9a_nec_fixup(struct pci_dev *pdev)
+{
+	unsigned int val;
+
+	/* Do not do the fixup on other platforms! */
+	if (!machine_is(gef_ppc9a))
+		return;
+
+	printk(KERN_INFO "Running NEC uPD720101 Fixup\n");
+
+	/* Ensure ports 1, 2, 3, 4 & 5 are enabled */
+	pci_read_config_dword(pdev, 0xe0, &val);
+	pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x5);
+
+	/* System clock is 48-MHz Oscillator and EHCI Enabled. */
+	pci_write_config_dword(pdev, 0xe4, 1 << 5);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
+	gef_ppc9a_nec_fixup);
+
+machine_arch_initcall(gef_ppc9a, mpc86xx_common_publish_devices);
+
+define_machine(gef_ppc9a) {
+	.name			= "GE PPC9A",
+	.compatible		= "gef,ppc9a",
+	.setup_arch		= gef_ppc9a_setup_arch,
+	.init_IRQ		= gef_ppc9a_init_irq,
+	.show_cpuinfo		= gef_ppc9a_show_cpuinfo,
+	.get_irq		= mpic_get_irq,
+	.time_init		= mpc86xx_time_init,
+	.progress		= udbg_progress,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+};
diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c
new file mode 100644
index 0000000000..689835f7f0
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/gef_sbc310.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GE SBC310 board support
+ *
+ * Author: Martyn Welch <martyn.welch@ge.com>
+ *
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * Based on: mpc86xx_hpcn.c (MPC86xx HPCN board specific routines)
+ * Copyright 2006 Freescale Semiconductor Inc.
+ *
+ * NEC fixup adapted from arch/mips/pci/fixup-lm2e.c
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <asm/mpic.h>
+#include <asm/nvram.h>
+
+#include <sysdev/fsl_pci.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/ge/ge_pic.h>
+
+#include "mpc86xx.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG (fmt...) do { printk(KERN_ERR "SBC310: " fmt); } while (0)
+#else
+#define DBG (fmt...) do { } while (0)
+#endif
+
+void __iomem *sbc310_regs;
+
+static void __init gef_sbc310_init_irq(void)
+{
+	struct device_node *cascade_node = NULL;
+
+	mpc86xx_init_irq();
+
+	/*
+	 * There is a simple interrupt handler in the main FPGA, this needs
+	 * to be cascaded into the MPIC
+	 */
+	cascade_node = of_find_compatible_node(NULL, NULL, "gef,fpga-pic");
+	if (!cascade_node) {
+		printk(KERN_WARNING "SBC310: No FPGA PIC\n");
+		return;
+	}
+
+	gef_pic_init(cascade_node);
+	of_node_put(cascade_node);
+}
+
+static void __init gef_sbc310_setup_arch(void)
+{
+	struct device_node *regs;
+	printk(KERN_INFO "GE Intelligent Platforms SBC310 6U VPX SBC\n");
+
+#ifdef CONFIG_SMP
+	mpc86xx_smp_init();
+#endif
+
+	fsl_pci_assign_primary();
+
+	/* Remap basic board registers */
+	regs = of_find_compatible_node(NULL, NULL, "gef,fpga-regs");
+	if (regs) {
+		sbc310_regs = of_iomap(regs, 0);
+		if (sbc310_regs == NULL)
+			printk(KERN_WARNING "Unable to map board registers\n");
+		of_node_put(regs);
+	}
+
+#if defined(CONFIG_MMIO_NVRAM)
+	mmio_nvram_init();
+#endif
+}
+
+/* Return the PCB revision */
+static unsigned int gef_sbc310_get_board_id(void)
+{
+	unsigned int reg;
+
+	reg = ioread32(sbc310_regs);
+	return reg & 0xff;
+}
+
+/* Return the PCB revision */
+static unsigned int gef_sbc310_get_pcb_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread32(sbc310_regs);
+	return (reg >> 8) & 0xff;
+}
+
+/* Return the board (software) revision */
+static unsigned int gef_sbc310_get_board_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread32(sbc310_regs);
+	return (reg >> 16) & 0xff;
+}
+
+/* Return the FPGA revision */
+static unsigned int gef_sbc310_get_fpga_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread32(sbc310_regs);
+	return (reg >> 24) & 0xf;
+}
+
+static void gef_sbc310_show_cpuinfo(struct seq_file *m)
+{
+	uint svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: GE Intelligent Platforms\n");
+
+	seq_printf(m, "Board ID\t: 0x%2.2x\n", gef_sbc310_get_board_id());
+	seq_printf(m, "Revision\t: %u%c\n", gef_sbc310_get_pcb_rev(),
+		('A' + gef_sbc310_get_board_rev() - 1));
+	seq_printf(m, "FPGA Revision\t: %u\n", gef_sbc310_get_fpga_rev());
+
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+}
+
+static void gef_sbc310_nec_fixup(struct pci_dev *pdev)
+{
+	unsigned int val;
+
+	/* Do not do the fixup on other platforms! */
+	if (!machine_is(gef_sbc310))
+		return;
+
+	printk(KERN_INFO "Running NEC uPD720101 Fixup\n");
+
+	/* Ensure only ports 1 & 2 are enabled */
+	pci_read_config_dword(pdev, 0xe0, &val);
+	pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x2);
+
+	/* System clock is 48-MHz Oscillator and EHCI Enabled. */
+	pci_write_config_dword(pdev, 0xe4, 1 << 5);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
+	gef_sbc310_nec_fixup);
+
+machine_arch_initcall(gef_sbc310, mpc86xx_common_publish_devices);
+
+define_machine(gef_sbc310) {
+	.name			= "GE SBC310",
+	.compatible		= "gef,sbc310",
+	.setup_arch		= gef_sbc310_setup_arch,
+	.init_IRQ		= gef_sbc310_init_irq,
+	.show_cpuinfo		= gef_sbc310_show_cpuinfo,
+	.get_irq		= mpic_get_irq,
+	.time_init		= mpc86xx_time_init,
+	.progress		= udbg_progress,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+};
diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c
new file mode 100644
index 0000000000..365f511186
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/gef_sbc610.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GE SBC610 board support
+ *
+ * Author: Martyn Welch <martyn.welch@ge.com>
+ *
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * Based on: mpc86xx_hpcn.c (MPC86xx HPCN board specific routines)
+ * Copyright 2006 Freescale Semiconductor Inc.
+ *
+ * NEC fixup adapted from arch/mips/pci/fixup-lm2e.c
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <asm/mpic.h>
+#include <asm/nvram.h>
+
+#include <sysdev/fsl_pci.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/ge/ge_pic.h>
+
+#include "mpc86xx.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG (fmt...) do { printk(KERN_ERR "SBC610: " fmt); } while (0)
+#else
+#define DBG (fmt...) do { } while (0)
+#endif
+
+void __iomem *sbc610_regs;
+
+static void __init gef_sbc610_init_irq(void)
+{
+	struct device_node *cascade_node = NULL;
+
+	mpc86xx_init_irq();
+
+	/*
+	 * There is a simple interrupt handler in the main FPGA, this needs
+	 * to be cascaded into the MPIC
+	 */
+	cascade_node = of_find_compatible_node(NULL, NULL, "gef,fpga-pic");
+	if (!cascade_node) {
+		printk(KERN_WARNING "SBC610: No FPGA PIC\n");
+		return;
+	}
+
+	gef_pic_init(cascade_node);
+	of_node_put(cascade_node);
+}
+
+static void __init gef_sbc610_setup_arch(void)
+{
+	struct device_node *regs;
+
+	printk(KERN_INFO "GE Intelligent Platforms SBC610 6U VPX SBC\n");
+
+#ifdef CONFIG_SMP
+	mpc86xx_smp_init();
+#endif
+
+	fsl_pci_assign_primary();
+
+	/* Remap basic board registers */
+	regs = of_find_compatible_node(NULL, NULL, "gef,fpga-regs");
+	if (regs) {
+		sbc610_regs = of_iomap(regs, 0);
+		if (sbc610_regs == NULL)
+			printk(KERN_WARNING "Unable to map board registers\n");
+		of_node_put(regs);
+	}
+
+#if defined(CONFIG_MMIO_NVRAM)
+	mmio_nvram_init();
+#endif
+}
+
+/* Return the PCB revision */
+static unsigned int gef_sbc610_get_pcb_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread32(sbc610_regs);
+	return (reg >> 8) & 0xff;
+}
+
+/* Return the board (software) revision */
+static unsigned int gef_sbc610_get_board_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread32(sbc610_regs);
+	return (reg >> 16) & 0xff;
+}
+
+/* Return the FPGA revision */
+static unsigned int gef_sbc610_get_fpga_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread32(sbc610_regs);
+	return (reg >> 24) & 0xf;
+}
+
+static void gef_sbc610_show_cpuinfo(struct seq_file *m)
+{
+	uint svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: GE Intelligent Platforms\n");
+
+	seq_printf(m, "Revision\t: %u%c\n", gef_sbc610_get_pcb_rev(),
+		('A' + gef_sbc610_get_board_rev() - 1));
+	seq_printf(m, "FPGA Revision\t: %u\n", gef_sbc610_get_fpga_rev());
+
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+}
+
+static void gef_sbc610_nec_fixup(struct pci_dev *pdev)
+{
+	unsigned int val;
+
+	/* Do not do the fixup on other platforms! */
+	if (!machine_is(gef_sbc610))
+		return;
+
+	printk(KERN_INFO "Running NEC uPD720101 Fixup\n");
+
+	/* Ensure ports 1, 2, 3, 4 & 5 are enabled */
+	pci_read_config_dword(pdev, 0xe0, &val);
+	pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x5);
+
+	/* System clock is 48-MHz Oscillator and EHCI Enabled. */
+	pci_write_config_dword(pdev, 0xe4, 1 << 5);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
+	gef_sbc610_nec_fixup);
+
+machine_arch_initcall(gef_sbc610, mpc86xx_common_publish_devices);
+
+define_machine(gef_sbc610) {
+	.name			= "GE SBC610",
+	.compatible		= "gef,sbc610",
+	.setup_arch		= gef_sbc610_setup_arch,
+	.init_IRQ		= gef_sbc610_init_irq,
+	.show_cpuinfo		= gef_sbc610_show_cpuinfo,
+	.get_irq		= mpic_get_irq,
+	.time_init		= mpc86xx_time_init,
+	.progress		= udbg_progress,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+};
diff --git a/arch/powerpc/platforms/86xx/mpc86xx.h b/arch/powerpc/platforms/86xx/mpc86xx.h
new file mode 100644
index 0000000000..61e52c757e
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/mpc86xx.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2006 Freescale Semiconductor Inc.
+ */
+
+#ifndef __MPC86XX_H__
+#define __MPC86XX_H__
+
+/*
+ * Declaration for the various functions exported by the
+ * mpc86xx_* files. Mostly for use by mpc86xx_setup().
+ */
+
+extern void mpc86xx_smp_init(void);
+extern void mpc86xx_init_irq(void);
+extern long mpc86xx_time_init(void);
+extern int mpc86xx_common_publish_devices(void);
+
+#endif	/* __MPC86XX_H__ */
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
new file mode 100644
index 0000000000..8a7e55acf0
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Xianghua Xiao <x.xiao@freescale.com>
+ *         Zhang Wei <wei.zhang@freescale.com>
+ *
+ * Copyright 2006 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pgtable.h>
+
+#include <asm/code-patching.h>
+#include <asm/page.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <asm/cacheflush.h>
+#include <asm/inst.h>
+
+#include <sysdev/fsl_soc.h>
+
+#include "mpc86xx.h"
+
+extern void __secondary_start_mpc86xx(void);
+
+#define MCM_PORT_CONFIG_OFFSET	0x10
+
+/* Offset from CCSRBAR */
+#define MPC86xx_MCM_OFFSET      (0x1000)
+#define MPC86xx_MCM_SIZE        (0x1000)
+
+static void __init
+smp_86xx_release_core(int nr)
+{
+	__be32 __iomem *mcm_vaddr;
+	unsigned long pcr;
+
+	if (nr < 0 || nr >= NR_CPUS)
+		return;
+
+	/*
+	 * Startup Core #nr.
+	 */
+	mcm_vaddr = ioremap(get_immrbase() + MPC86xx_MCM_OFFSET,
+			    MPC86xx_MCM_SIZE);
+	pcr = in_be32(mcm_vaddr + (MCM_PORT_CONFIG_OFFSET >> 2));
+	pcr |= 1 << (nr + 24);
+	out_be32(mcm_vaddr + (MCM_PORT_CONFIG_OFFSET >> 2), pcr);
+
+	iounmap(mcm_vaddr);
+}
+
+
+static int __init
+smp_86xx_kick_cpu(int nr)
+{
+	unsigned int save_vector;
+	unsigned long target, flags;
+	int n = 0;
+	unsigned int *vector = (unsigned int *)(KERNELBASE + 0x100);
+
+	if (nr < 0 || nr >= NR_CPUS)
+		return -ENOENT;
+
+	pr_debug("smp_86xx_kick_cpu: kick CPU #%d\n", nr);
+
+	local_irq_save(flags);
+
+	/* Save reset vector */
+	save_vector = *vector;
+
+	/* Setup fake reset vector to call __secondary_start_mpc86xx. */
+	target = (unsigned long) __secondary_start_mpc86xx;
+	patch_branch(vector, target, BRANCH_SET_LINK);
+
+	/* Kick that CPU */
+	smp_86xx_release_core(nr);
+
+	/* Wait a bit for the CPU to take the exception. */
+	while ((__secondary_hold_acknowledge != nr) && (n++, n < 1000))
+		mdelay(1);
+
+	/* Restore the exception vector */
+	patch_instruction(vector, ppc_inst(save_vector));
+
+	local_irq_restore(flags);
+
+	pr_debug("wait CPU #%d for %d msecs.\n", nr, n);
+
+	return 0;
+}
+
+
+static void __init
+smp_86xx_setup_cpu(int cpu_nr)
+{
+	mpic_setup_this_cpu();
+}
+
+
+struct smp_ops_t smp_86xx_ops = {
+	.cause_nmi_ipi = NULL,
+	.message_pass = smp_mpic_message_pass,
+	.probe = smp_mpic_probe,
+	.kick_cpu = smp_86xx_kick_cpu,
+	.setup_cpu = smp_86xx_setup_cpu,
+	.take_timebase = smp_generic_take_timebase,
+	.give_timebase = smp_generic_give_timebase,
+};
+
+
+void __init
+mpc86xx_smp_init(void)
+{
+	smp_ops = &smp_86xx_ops;
+}
diff --git a/arch/powerpc/platforms/86xx/mvme7100.c b/arch/powerpc/platforms/86xx/mvme7100.c
new file mode 100644
index 0000000000..cee49ecd32
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/mvme7100.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Board setup routines for the Emerson/Artesyn MVME7100
+ *
+ * Copyright 2016 Elettra-Sincrotrone Trieste S.C.p.A.
+ *
+ * Author: Alessio Igor Bogani <alessio.bogani@elettra.eu>
+ *
+ * Based on earlier code by:
+ *
+ *	Ajit Prem <ajit.prem@emerson.com>
+ *	Copyright 2008 Emerson
+ *
+ * USB host fixup is borrowed by:
+ *
+ *	Martyn Welch <martyn.welch@ge.com>
+ *	Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ */
+
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_address.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc86xx.h"
+
+#define MVME7100_INTERRUPT_REG_2_OFFSET	0x05
+#define MVME7100_DS1375_MASK		0x40
+#define MVME7100_MAX6649_MASK		0x20
+#define MVME7100_ABORT_MASK		0x10
+
+/*
+ * Setup the architecture
+ */
+static void __init mvme7100_setup_arch(void)
+{
+	struct device_node *bcsr_node;
+	void __iomem *mvme7100_regs = NULL;
+	u8 reg;
+
+	if (ppc_md.progress)
+		ppc_md.progress("mvme7100_setup_arch()", 0);
+
+#ifdef CONFIG_SMP
+	mpc86xx_smp_init();
+#endif
+
+	fsl_pci_assign_primary();
+
+	/* Remap BCSR registers */
+	bcsr_node = of_find_compatible_node(NULL, NULL,
+			"artesyn,mvme7100-bcsr");
+	if (bcsr_node) {
+		mvme7100_regs = of_iomap(bcsr_node, 0);
+		of_node_put(bcsr_node);
+	}
+
+	if (mvme7100_regs) {
+		/* Disable ds1375, max6649, and abort interrupts */
+		reg = readb(mvme7100_regs + MVME7100_INTERRUPT_REG_2_OFFSET);
+		reg |= MVME7100_DS1375_MASK | MVME7100_MAX6649_MASK
+			| MVME7100_ABORT_MASK;
+		writeb(reg, mvme7100_regs + MVME7100_INTERRUPT_REG_2_OFFSET);
+	} else
+		pr_warn("Unable to map board registers\n");
+
+	pr_info("MVME7100 board from Artesyn\n");
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init mvme7100_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "artesyn,MVME7100");
+}
+
+static void mvme7100_usb_host_fixup(struct pci_dev *pdev)
+{
+	unsigned int val;
+
+	if (!machine_is(mvme7100))
+		return;
+
+	/* Ensure only ports 1 & 2 are enabled */
+	pci_read_config_dword(pdev, 0xe0, &val);
+	pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x2);
+
+	/* System clock is 48-MHz Oscillator and EHCI Enabled. */
+	pci_write_config_dword(pdev, 0xe4, 1 << 5);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
+	mvme7100_usb_host_fixup);
+
+machine_arch_initcall(mvme7100, mpc86xx_common_publish_devices);
+
+define_machine(mvme7100) {
+	.name			= "MVME7100",
+	.probe			= mvme7100_probe,
+	.setup_arch		= mvme7100_setup_arch,
+	.init_IRQ		= mpc86xx_init_irq,
+	.get_irq		= mpic_get_irq,
+	.time_init		= mpc86xx_time_init,
+	.progress		= udbg_progress,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+};
diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c
new file mode 100644
index 0000000000..9ca36de235
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/pic.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2008 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+
+#include <asm/mpic.h>
+#include <asm/i8259.h>
+
+#include "mpc86xx.h"
+
+#ifdef CONFIG_PPC_I8259
+static void mpc86xx_8259_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int cascade_irq = i8259_irq();
+
+	if (cascade_irq)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+#endif	/* CONFIG_PPC_I8259 */
+
+void __init mpc86xx_init_irq(void)
+{
+#ifdef CONFIG_PPC_I8259
+	struct device_node *np;
+	struct device_node *cascade_node = NULL;
+	int cascade_irq;
+#endif
+
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+			MPIC_SINGLE_DEST_CPU,
+			0, 256, " MPIC     ");
+	BUG_ON(mpic == NULL);
+
+	mpic_init(mpic);
+
+#ifdef CONFIG_PPC_I8259
+	/* Initialize i8259 controller */
+	for_each_node_by_type(np, "interrupt-controller")
+		if (of_device_is_compatible(np, "chrp,iic")) {
+			cascade_node = np;
+			break;
+		}
+
+	if (cascade_node == NULL) {
+		printk(KERN_DEBUG "Could not find i8259 PIC\n");
+		return;
+	}
+
+	cascade_irq = irq_of_parse_and_map(cascade_node, 0);
+	if (!cascade_irq) {
+		printk(KERN_ERR "Failed to map cascade interrupt\n");
+		return;
+	}
+
+	i8259_init(cascade_node, 0);
+	of_node_put(cascade_node);
+
+	irq_set_chained_handler(cascade_irq, mpc86xx_8259_cascade);
+#endif
+}
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
new file mode 100644
index 0000000000..a14d9d8997
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -0,0 +1,200 @@
+# SPDX-License-Identifier: GPL-2.0
+config CPM1
+	bool
+	select CPM
+
+choice
+	prompt "8xx Machine Type"
+	depends on PPC_8xx
+	default MPC885ADS
+
+config MPC8XXFADS
+	bool "FADS"
+
+config MPC86XADS
+	bool "MPC86XADS"
+	select CPM1
+	help
+	  MPC86x Application Development System by Freescale Semiconductor.
+	  The MPC86xADS is meant to serve as a platform for s/w and h/w
+	  development around the MPC86X processor families.
+
+config MPC885ADS
+	bool "MPC885ADS"
+	select CPM1
+	select OF_DYNAMIC
+	help
+	  Freescale Semiconductor MPC885 Application Development System (ADS).
+	  Also known as DUET.
+	  The MPC885ADS is meant to serve as a platform for s/w and h/w
+	  development around the MPC885 processor family.
+
+config PPC_EP88XC
+	bool "Embedded Planet EP88xC (a.k.a. CWH-PPC-885XN-VE)"
+	select CPM1
+	help
+	  This enables support for the Embedded Planet EP88xC board.
+
+	  This board is also resold by Freescale as the QUICCStart
+	  MPC885 Evaluation System and/or the CWH-PPC-885XN-VE.
+
+config PPC_ADDER875
+	bool "Analogue & Micro Adder 875"
+	select CPM1
+	help
+	  This enables support for the Analogue & Micro Adder 875
+	  board.
+
+config TQM8XX
+	bool "TQM8XX"
+	select CPM1
+	help
+	  support for the mpc8xx based boards from TQM.
+
+endchoice
+
+menu "Freescale Ethernet driver platform-specific options"
+	depends on (FS_ENET && MPC885ADS)
+
+	config MPC8xx_SECOND_ETH
+	bool "Second Ethernet channel"
+	depends on MPC885ADS
+	default y
+	help
+	  This enables support for second Ethernet on MPC885ADS and MPC86xADS boards.
+	  The latter will use SCC1, for 885ADS you can select it below.
+
+	choice
+		prompt "Second Ethernet channel"
+		depends on MPC8xx_SECOND_ETH
+		default MPC8xx_SECOND_ETH_FEC2
+
+		config MPC8xx_SECOND_ETH_FEC2
+		bool "FEC2"
+		depends on MPC885ADS
+		help
+		  Enable FEC2 to serve as 2-nd Ethernet channel. Note that SMC2
+		  (often 2-nd UART) will not work if this is enabled.
+
+		config MPC8xx_SECOND_ETH_SCC3
+		bool "SCC3"
+		depends on MPC885ADS
+		help
+		  Enable SCC3 to serve as 2-nd Ethernet channel. Note that SMC1
+		  (often 1-nd UART) will not work if this is enabled.
+
+	endchoice
+
+endmenu
+
+#
+# MPC8xx Communication options
+#
+
+menu "MPC8xx CPM Options"
+	depends on PPC_8xx
+
+# This doesn't really belong here, but it is convenient to ask
+# 8xx specific questions.
+comment "Generic MPC8xx Options"
+
+config 8xx_GPIO
+	bool "GPIO API Support"
+	select GPIOLIB
+	select OF_GPIO_MM_GPIOCHIP
+	help
+	  Saying Y here will cause the ports on an MPC8xx processor to be used
+	  with the GPIO API.  If you say N here, the kernel needs less memory.
+
+	  If in doubt, say Y here.
+
+config 8xx_CPU15
+	bool "CPU15 Silicon Errata"
+	depends on !HUGETLB_PAGE
+	default y
+	help
+	  This enables a workaround for erratum CPU15 on MPC8xx chips.
+	  This bug can cause incorrect code execution under certain
+	  circumstances.  This workaround adds some overhead (a TLB miss
+	  every time execution crosses a page boundary), and you may wish
+	  to disable it if you have worked around the bug in the compiler
+	  (by not placing conditional branches or branches to LR or CTR
+	  in the last word of a page, with a target of the last cache
+	  line in the next page), or if you have used some other
+	  workaround.
+
+	  If in doubt, say Y here.
+
+choice
+	prompt "Microcode patch selection"
+	default NO_UCODE_PATCH
+	help
+	  Help not implemented yet, coming soon.
+
+config NO_UCODE_PATCH
+	bool "None"
+
+config USB_SOF_UCODE_PATCH
+	bool "USB SOF patch"
+	help
+	  Help not implemented yet, coming soon.
+
+config I2C_SPI_UCODE_PATCH
+	bool "I2C/SPI relocation patch"
+	help
+	  Help not implemented yet, coming soon.
+
+config I2C_SPI_SMC1_UCODE_PATCH
+	bool "I2C/SPI/SMC1 relocation patch"
+	help
+	  Help not implemented yet, coming soon.
+
+config SMC_UCODE_PATCH
+	bool "SMC relocation patch"
+	help
+	  This microcode relocates SMC1 and SMC2 parameter RAMs at
+	  offset 0x1ec0 and 0x1fc0 to allow extended parameter RAM
+	  for SCC3 and SCC4.
+
+endchoice
+
+config UCODE_PATCH
+	bool
+	default y
+	depends on !NO_UCODE_PATCH
+
+menu "8xx advanced setup"
+	depends on PPC_8xx
+
+config PIN_TLB
+	bool "Pinned Kernel TLBs"
+	depends on ADVANCED_OPTIONS
+	help
+	  On the 8xx, we have 32 instruction TLBs and 32 data TLBs. In each
+	  table 4 TLBs can be pinned.
+
+	  It reduces the amount of usable TLBs to 28 (ie by 12%). That's the
+	  reason why we make it selectable.
+
+	  This option does nothing, it just activate the selection of what
+	  to pin.
+
+config PIN_TLB_DATA
+	bool "Pinned TLB for DATA"
+	depends on PIN_TLB
+	default y
+	help
+	  This pins the first 32 Mbytes of memory with 8M pages.
+
+config PIN_TLB_IMMR
+	bool "Pinned TLB for IMMR"
+	depends on PIN_TLB
+	default y
+	help
+	  This pins the IMMR area with a 512kbytes page. In case
+	  CONFIG_PIN_TLB_DATA is also selected, it will reduce
+	  CONFIG_PIN_TLB_DATA to 24 Mbytes.
+
+endmenu
+
+endmenu
diff --git a/arch/powerpc/platforms/8xx/Makefile b/arch/powerpc/platforms/8xx/Makefile
new file mode 100644
index 0000000000..5a098f7d5d
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the PowerPC 8xx linux kernel.
+#
+obj-y			+= m8xx_setup.o machine_check.o pic.o
+obj-$(CONFIG_CPM1)		+= cpm1.o cpm1-ic.o
+obj-$(CONFIG_UCODE_PATCH)	+= micropatch.o
+obj-$(CONFIG_MPC885ADS)   += mpc885ads_setup.o
+obj-$(CONFIG_MPC86XADS)   += mpc86xads_setup.o
+obj-$(CONFIG_PPC_EP88XC)  += ep88xc.o
+obj-$(CONFIG_PPC_ADDER875) += adder875.o
+obj-$(CONFIG_TQM8XX)      += tqm8xx_setup.o
diff --git a/arch/powerpc/platforms/8xx/adder875.c b/arch/powerpc/platforms/8xx/adder875.c
new file mode 100644
index 0000000000..d02f8dd664
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/adder875.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Analogue & Micro Adder MPC875 board support
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/cpm1.h>
+#include <asm/8xx_immap.h>
+#include <asm/udbg.h>
+
+#include "mpc8xx.h"
+#include "pic.h"
+
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static __initdata struct cpm_pin adder875_pins[] = {
+	/* SMC1 */
+	{CPM_PORTB, 24, CPM_PIN_INPUT}, /* RX */
+	{CPM_PORTB, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+
+	/* MII1 */
+	{CPM_PORTA, 0, CPM_PIN_INPUT},
+	{CPM_PORTA, 1, CPM_PIN_INPUT},
+	{CPM_PORTA, 2, CPM_PIN_INPUT},
+	{CPM_PORTA, 3, CPM_PIN_INPUT},
+	{CPM_PORTA, 4, CPM_PIN_OUTPUT},
+	{CPM_PORTA, 10, CPM_PIN_OUTPUT},
+	{CPM_PORTA, 11, CPM_PIN_OUTPUT},
+	{CPM_PORTB, 19, CPM_PIN_INPUT},
+	{CPM_PORTB, 31, CPM_PIN_INPUT},
+	{CPM_PORTC, 12, CPM_PIN_INPUT},
+	{CPM_PORTC, 13, CPM_PIN_INPUT},
+	{CPM_PORTE, 30, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 31, CPM_PIN_OUTPUT},
+
+	/* MII2 */
+	{CPM_PORTE, 14, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 15, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 16, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 17, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 18, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 19, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 20, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 21, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 22, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 23, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 24, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 25, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 26, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 27, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 28, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 29, CPM_PIN_OUTPUT},
+};
+
+static void __init init_ioports(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(adder875_pins); i++) {
+		const struct cpm_pin *pin = &adder875_pins[i];
+		cpm1_set_pin(pin->port, pin->pin, pin->flags);
+	}
+
+	cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX);
+
+	/* Set FEC1 and FEC2 to MII mode */
+	clrbits32(&mpc8xx_immr->im_cpm.cp_cptr, 0x00000180);
+}
+
+static void __init adder875_setup(void)
+{
+	cpm_reset();
+	init_ioports();
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .compatible = "simple-bus", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+	return 0;
+}
+machine_device_initcall(adder875, declare_of_platform_devices);
+
+define_machine(adder875) {
+	.name = "Adder MPC875",
+	.compatible = "analogue-and-micro,adder875",
+	.setup_arch = adder875_setup,
+	.init_IRQ = mpc8xx_pic_init,
+	.get_irq = mpc8xx_get_irq,
+	.restart = mpc8xx_restart,
+	.progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/8xx/cpm1-ic.c b/arch/powerpc/platforms/8xx/cpm1-ic.c
new file mode 100644
index 0000000000..a18fc7c99f
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/cpm1-ic.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Interrupt controller for the
+ * Communication Processor Module.
+ * Copyright (c) 1997 Dan error_act (dmalek@jlc.net)
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/platform_device.h>
+#include <asm/cpm1.h>
+
+struct cpm_pic_data {
+	cpic8xx_t __iomem *reg;
+	struct irq_domain *host;
+};
+
+static void cpm_mask_irq(struct irq_data *d)
+{
+	struct cpm_pic_data *data = irq_data_get_irq_chip_data(d);
+	unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
+
+	clrbits32(&data->reg->cpic_cimr, (1 << cpm_vec));
+}
+
+static void cpm_unmask_irq(struct irq_data *d)
+{
+	struct cpm_pic_data *data = irq_data_get_irq_chip_data(d);
+	unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
+
+	setbits32(&data->reg->cpic_cimr, (1 << cpm_vec));
+}
+
+static void cpm_end_irq(struct irq_data *d)
+{
+	struct cpm_pic_data *data = irq_data_get_irq_chip_data(d);
+	unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
+
+	out_be32(&data->reg->cpic_cisr, (1 << cpm_vec));
+}
+
+static struct irq_chip cpm_pic = {
+	.name = "CPM PIC",
+	.irq_mask = cpm_mask_irq,
+	.irq_unmask = cpm_unmask_irq,
+	.irq_eoi = cpm_end_irq,
+};
+
+static int cpm_get_irq(struct irq_desc *desc)
+{
+	struct cpm_pic_data *data = irq_desc_get_handler_data(desc);
+	int cpm_vec;
+
+	/*
+	 * Get the vector by setting the ACK bit and then reading
+	 * the register.
+	 */
+	out_be16(&data->reg->cpic_civr, 1);
+	cpm_vec = in_be16(&data->reg->cpic_civr);
+	cpm_vec >>= 11;
+
+	return irq_linear_revmap(data->host, cpm_vec);
+}
+
+static void cpm_cascade(struct irq_desc *desc)
+{
+	generic_handle_irq(cpm_get_irq(desc));
+}
+
+static int cpm_pic_host_map(struct irq_domain *h, unsigned int virq,
+			    irq_hw_number_t hw)
+{
+	irq_set_chip_data(virq, h->host_data);
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &cpm_pic, handle_fasteoi_irq);
+	return 0;
+}
+
+static const struct irq_domain_ops cpm_pic_host_ops = {
+	.map = cpm_pic_host_map,
+};
+
+static int cpm_pic_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	int irq;
+	struct cpm_pic_data *data;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->reg = devm_ioremap(dev, res->start, resource_size(res));
+	if (!data->reg)
+		return -ENODEV;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	/* Initialize the CPM interrupt controller. */
+	out_be32(&data->reg->cpic_cicr,
+		 (CICR_SCD_SCC4 | CICR_SCC_SCC3 | CICR_SCB_SCC2 | CICR_SCA_SCC1) |
+		 ((virq_to_hw(irq) / 2) << 13) | CICR_HP_MASK);
+
+	out_be32(&data->reg->cpic_cimr, 0);
+
+	data->host = irq_domain_add_linear(dev->of_node, 64, &cpm_pic_host_ops, data);
+	if (!data->host)
+		return -ENODEV;
+
+	irq_set_handler_data(irq, data);
+	irq_set_chained_handler(irq, cpm_cascade);
+
+	setbits32(&data->reg->cpic_cicr, CICR_IEN);
+
+	return 0;
+}
+
+static const struct of_device_id cpm_pic_match[] = {
+	{
+		.compatible = "fsl,cpm1-pic",
+	}, {
+		.type = "cpm-pic",
+		.compatible = "CPM",
+	}, {},
+};
+
+static struct platform_driver cpm_pic_driver = {
+	.driver	= {
+		.name		= "cpm-pic",
+		.of_match_table	= cpm_pic_match,
+	},
+	.probe	= cpm_pic_probe,
+};
+
+static int __init cpm_pic_init(void)
+{
+	return platform_driver_register(&cpm_pic_driver);
+}
+arch_initcall(cpm_pic_init);
+
+/*
+ * The CPM can generate the error interrupt when there is a race condition
+ * between generating and masking interrupts.  All we have to do is ACK it
+ * and return.  This is a no-op function so we don't need any special
+ * tests in the interrupt handler.
+ */
+static irqreturn_t cpm_error_interrupt(int irq, void *dev)
+{
+	return IRQ_HANDLED;
+}
+
+static int cpm_error_probe(struct platform_device *pdev)
+{
+	int irq;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	return request_irq(irq, cpm_error_interrupt, IRQF_NO_THREAD, "error", NULL);
+}
+
+static const struct of_device_id cpm_error_ids[] = {
+	{ .compatible = "fsl,cpm1" },
+	{ .type = "cpm" },
+	{},
+};
+
+static struct platform_driver cpm_error_driver = {
+	.driver	= {
+		.name		= "cpm-error",
+		.of_match_table	= cpm_error_ids,
+	},
+	.probe	= cpm_error_probe,
+};
+
+static int __init cpm_error_init(void)
+{
+	return platform_driver_register(&cpm_error_driver);
+}
+subsys_initcall(cpm_error_init);
diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c
new file mode 100644
index 0000000000..ebb5f6a27d
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/cpm1.c
@@ -0,0 +1,636 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * General Purpose functions for the global management of the
+ * Communication Processor Module.
+ * Copyright (c) 1997 Dan error_act (dmalek@jlc.net)
+ *
+ * In addition to the individual control of the communication
+ * channels, there are a few functions that globally affect the
+ * communication processor.
+ *
+ * Buffer descriptors must be allocated from the dual ported memory
+ * space.  The allocator for that is here.  When the communication
+ * process is reset, we reclaim the memory available.  There is
+ * currently no deallocator for this memory.
+ * The amount of space available is platform dependent.  On the
+ * MBX, the EPPC software loads additional microcode into the
+ * communication processor, and uses some of the DP ram for this
+ * purpose.  Current, the first 512 bytes and the last 256 bytes of
+ * memory are used.  Right now I am conservative and only use the
+ * memory that can never be used for microcode.  If there are
+ * applications that require more DP ram, we can expand the boundaries
+ * but then we have to be careful of any downloaded microcode.
+ */
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/of_irq.h>
+#include <asm/page.h>
+#include <asm/8xx_immap.h>
+#include <asm/cpm1.h>
+#include <asm/io.h>
+#include <asm/rheap.h>
+#include <asm/cpm.h>
+
+#include <sysdev/fsl_soc.h>
+
+#ifdef CONFIG_8xx_GPIO
+#include <linux/gpio/legacy-of-mm-gpiochip.h>
+#endif
+
+#define CPM_MAP_SIZE    (0x4000)
+
+cpm8xx_t __iomem *cpmp;  /* Pointer to comm processor space */
+immap_t __iomem *mpc8xx_immr = (void __iomem *)VIRT_IMMR_BASE;
+
+void __init cpm_reset(void)
+{
+	cpmp = &mpc8xx_immr->im_cpm;
+
+#ifndef CONFIG_PPC_EARLY_DEBUG_CPM
+	/* Perform a reset. */
+	out_be16(&cpmp->cp_cpcr, CPM_CR_RST | CPM_CR_FLG);
+
+	/* Wait for it. */
+	while (in_be16(&cpmp->cp_cpcr) & CPM_CR_FLG);
+#endif
+
+#ifdef CONFIG_UCODE_PATCH
+	cpm_load_patch(cpmp);
+#endif
+
+	/*
+	 * Set SDMA Bus Request priority 5.
+	 * On 860T, this also enables FEC priority 6.  I am not sure
+	 * this is what we really want for some applications, but the
+	 * manual recommends it.
+	 * Bit 25, FAM can also be set to use FEC aggressive mode (860T).
+	 */
+	if ((mfspr(SPRN_IMMR) & 0xffff) == 0x0900) /* MPC885 */
+		out_be32(&mpc8xx_immr->im_siu_conf.sc_sdcr, 0x40);
+	else
+		out_be32(&mpc8xx_immr->im_siu_conf.sc_sdcr, 1);
+}
+
+static DEFINE_SPINLOCK(cmd_lock);
+
+#define MAX_CR_CMD_LOOPS        10000
+
+int cpm_command(u32 command, u8 opcode)
+{
+	int i, ret;
+	unsigned long flags;
+
+	if (command & 0xffffff03)
+		return -EINVAL;
+
+	spin_lock_irqsave(&cmd_lock, flags);
+
+	ret = 0;
+	out_be16(&cpmp->cp_cpcr, command | CPM_CR_FLG | (opcode << 8));
+	for (i = 0; i < MAX_CR_CMD_LOOPS; i++)
+		if ((in_be16(&cpmp->cp_cpcr) & CPM_CR_FLG) == 0)
+			goto out;
+
+	printk(KERN_ERR "%s(): Not able to issue CPM command\n", __func__);
+	ret = -EIO;
+out:
+	spin_unlock_irqrestore(&cmd_lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(cpm_command);
+
+/*
+ * Set a baud rate generator.  This needs lots of work.  There are
+ * four BRGs, any of which can be wired to any channel.
+ * The internal baud rate clock is the system clock divided by 16.
+ * This assumes the baudrate is 16x oversampled by the uart.
+ */
+#define BRG_INT_CLK		(get_brgfreq())
+#define BRG_UART_CLK		(BRG_INT_CLK/16)
+#define BRG_UART_CLK_DIV16	(BRG_UART_CLK/16)
+
+void
+cpm_setbrg(uint brg, uint rate)
+{
+	u32 __iomem *bp;
+
+	/* This is good enough to get SMCs running..... */
+	bp = &cpmp->cp_brgc1;
+	bp += brg;
+	/*
+	 * The BRG has a 12-bit counter.  For really slow baud rates (or
+	 * really fast processors), we may have to further divide by 16.
+	 */
+	if (((BRG_UART_CLK / rate) - 1) < 4096)
+		out_be32(bp, (((BRG_UART_CLK / rate) - 1) << 1) | CPM_BRG_EN);
+	else
+		out_be32(bp, (((BRG_UART_CLK_DIV16 / rate) - 1) << 1) |
+			      CPM_BRG_EN | CPM_BRG_DIV16);
+}
+EXPORT_SYMBOL(cpm_setbrg);
+
+struct cpm_ioport16 {
+	__be16 dir, par, odr_sor, dat, intr;
+	__be16 res[3];
+};
+
+struct cpm_ioport32b {
+	__be32 dir, par, odr, dat;
+};
+
+struct cpm_ioport32e {
+	__be32 dir, par, sor, odr, dat;
+};
+
+static void __init cpm1_set_pin32(int port, int pin, int flags)
+{
+	struct cpm_ioport32e __iomem *iop;
+	pin = 1 << (31 - pin);
+
+	if (port == CPM_PORTB)
+		iop = (struct cpm_ioport32e __iomem *)
+		      &mpc8xx_immr->im_cpm.cp_pbdir;
+	else
+		iop = (struct cpm_ioport32e __iomem *)
+		      &mpc8xx_immr->im_cpm.cp_pedir;
+
+	if (flags & CPM_PIN_OUTPUT)
+		setbits32(&iop->dir, pin);
+	else
+		clrbits32(&iop->dir, pin);
+
+	if (!(flags & CPM_PIN_GPIO))
+		setbits32(&iop->par, pin);
+	else
+		clrbits32(&iop->par, pin);
+
+	if (port == CPM_PORTB) {
+		if (flags & CPM_PIN_OPENDRAIN)
+			setbits16(&mpc8xx_immr->im_cpm.cp_pbodr, pin);
+		else
+			clrbits16(&mpc8xx_immr->im_cpm.cp_pbodr, pin);
+	}
+
+	if (port == CPM_PORTE) {
+		if (flags & CPM_PIN_SECONDARY)
+			setbits32(&iop->sor, pin);
+		else
+			clrbits32(&iop->sor, pin);
+
+		if (flags & CPM_PIN_OPENDRAIN)
+			setbits32(&mpc8xx_immr->im_cpm.cp_peodr, pin);
+		else
+			clrbits32(&mpc8xx_immr->im_cpm.cp_peodr, pin);
+	}
+}
+
+static void __init cpm1_set_pin16(int port, int pin, int flags)
+{
+	struct cpm_ioport16 __iomem *iop =
+		(struct cpm_ioport16 __iomem *)&mpc8xx_immr->im_ioport;
+
+	pin = 1 << (15 - pin);
+
+	if (port != 0)
+		iop += port - 1;
+
+	if (flags & CPM_PIN_OUTPUT)
+		setbits16(&iop->dir, pin);
+	else
+		clrbits16(&iop->dir, pin);
+
+	if (!(flags & CPM_PIN_GPIO))
+		setbits16(&iop->par, pin);
+	else
+		clrbits16(&iop->par, pin);
+
+	if (port == CPM_PORTA) {
+		if (flags & CPM_PIN_OPENDRAIN)
+			setbits16(&iop->odr_sor, pin);
+		else
+			clrbits16(&iop->odr_sor, pin);
+	}
+	if (port == CPM_PORTC) {
+		if (flags & CPM_PIN_SECONDARY)
+			setbits16(&iop->odr_sor, pin);
+		else
+			clrbits16(&iop->odr_sor, pin);
+		if (flags & CPM_PIN_FALLEDGE)
+			setbits16(&iop->intr, pin);
+		else
+			clrbits16(&iop->intr, pin);
+	}
+}
+
+void __init cpm1_set_pin(enum cpm_port port, int pin, int flags)
+{
+	if (port == CPM_PORTB || port == CPM_PORTE)
+		cpm1_set_pin32(port, pin, flags);
+	else
+		cpm1_set_pin16(port, pin, flags);
+}
+
+int __init cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode)
+{
+	int shift;
+	int i, bits = 0;
+	u32 __iomem *reg;
+	u32 mask = 7;
+
+	u8 clk_map[][3] = {
+		{CPM_CLK_SCC1, CPM_BRG1, 0},
+		{CPM_CLK_SCC1, CPM_BRG2, 1},
+		{CPM_CLK_SCC1, CPM_BRG3, 2},
+		{CPM_CLK_SCC1, CPM_BRG4, 3},
+		{CPM_CLK_SCC1, CPM_CLK1, 4},
+		{CPM_CLK_SCC1, CPM_CLK2, 5},
+		{CPM_CLK_SCC1, CPM_CLK3, 6},
+		{CPM_CLK_SCC1, CPM_CLK4, 7},
+
+		{CPM_CLK_SCC2, CPM_BRG1, 0},
+		{CPM_CLK_SCC2, CPM_BRG2, 1},
+		{CPM_CLK_SCC2, CPM_BRG3, 2},
+		{CPM_CLK_SCC2, CPM_BRG4, 3},
+		{CPM_CLK_SCC2, CPM_CLK1, 4},
+		{CPM_CLK_SCC2, CPM_CLK2, 5},
+		{CPM_CLK_SCC2, CPM_CLK3, 6},
+		{CPM_CLK_SCC2, CPM_CLK4, 7},
+
+		{CPM_CLK_SCC3, CPM_BRG1, 0},
+		{CPM_CLK_SCC3, CPM_BRG2, 1},
+		{CPM_CLK_SCC3, CPM_BRG3, 2},
+		{CPM_CLK_SCC3, CPM_BRG4, 3},
+		{CPM_CLK_SCC3, CPM_CLK5, 4},
+		{CPM_CLK_SCC3, CPM_CLK6, 5},
+		{CPM_CLK_SCC3, CPM_CLK7, 6},
+		{CPM_CLK_SCC3, CPM_CLK8, 7},
+
+		{CPM_CLK_SCC4, CPM_BRG1, 0},
+		{CPM_CLK_SCC4, CPM_BRG2, 1},
+		{CPM_CLK_SCC4, CPM_BRG3, 2},
+		{CPM_CLK_SCC4, CPM_BRG4, 3},
+		{CPM_CLK_SCC4, CPM_CLK5, 4},
+		{CPM_CLK_SCC4, CPM_CLK6, 5},
+		{CPM_CLK_SCC4, CPM_CLK7, 6},
+		{CPM_CLK_SCC4, CPM_CLK8, 7},
+
+		{CPM_CLK_SMC1, CPM_BRG1, 0},
+		{CPM_CLK_SMC1, CPM_BRG2, 1},
+		{CPM_CLK_SMC1, CPM_BRG3, 2},
+		{CPM_CLK_SMC1, CPM_BRG4, 3},
+		{CPM_CLK_SMC1, CPM_CLK1, 4},
+		{CPM_CLK_SMC1, CPM_CLK2, 5},
+		{CPM_CLK_SMC1, CPM_CLK3, 6},
+		{CPM_CLK_SMC1, CPM_CLK4, 7},
+
+		{CPM_CLK_SMC2, CPM_BRG1, 0},
+		{CPM_CLK_SMC2, CPM_BRG2, 1},
+		{CPM_CLK_SMC2, CPM_BRG3, 2},
+		{CPM_CLK_SMC2, CPM_BRG4, 3},
+		{CPM_CLK_SMC2, CPM_CLK5, 4},
+		{CPM_CLK_SMC2, CPM_CLK6, 5},
+		{CPM_CLK_SMC2, CPM_CLK7, 6},
+		{CPM_CLK_SMC2, CPM_CLK8, 7},
+	};
+
+	switch (target) {
+	case CPM_CLK_SCC1:
+		reg = &mpc8xx_immr->im_cpm.cp_sicr;
+		shift = 0;
+		break;
+
+	case CPM_CLK_SCC2:
+		reg = &mpc8xx_immr->im_cpm.cp_sicr;
+		shift = 8;
+		break;
+
+	case CPM_CLK_SCC3:
+		reg = &mpc8xx_immr->im_cpm.cp_sicr;
+		shift = 16;
+		break;
+
+	case CPM_CLK_SCC4:
+		reg = &mpc8xx_immr->im_cpm.cp_sicr;
+		shift = 24;
+		break;
+
+	case CPM_CLK_SMC1:
+		reg = &mpc8xx_immr->im_cpm.cp_simode;
+		shift = 12;
+		break;
+
+	case CPM_CLK_SMC2:
+		reg = &mpc8xx_immr->im_cpm.cp_simode;
+		shift = 28;
+		break;
+
+	default:
+		printk(KERN_ERR "cpm1_clock_setup: invalid clock target\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(clk_map); i++) {
+		if (clk_map[i][0] == target && clk_map[i][1] == clock) {
+			bits = clk_map[i][2];
+			break;
+		}
+	}
+
+	if (i == ARRAY_SIZE(clk_map)) {
+		printk(KERN_ERR "cpm1_clock_setup: invalid clock combination\n");
+		return -EINVAL;
+	}
+
+	bits <<= shift;
+	mask <<= shift;
+
+	if (reg == &mpc8xx_immr->im_cpm.cp_sicr) {
+		if (mode == CPM_CLK_RTX) {
+			bits |= bits << 3;
+			mask |= mask << 3;
+		} else if (mode == CPM_CLK_RX) {
+			bits <<= 3;
+			mask <<= 3;
+		}
+	}
+
+	out_be32(reg, (in_be32(reg) & ~mask) | bits);
+
+	return 0;
+}
+
+/*
+ * GPIO LIB API implementation
+ */
+#ifdef CONFIG_8xx_GPIO
+
+struct cpm1_gpio16_chip {
+	struct of_mm_gpio_chip mm_gc;
+	spinlock_t lock;
+
+	/* shadowed data register to clear/set bits safely */
+	u16 cpdata;
+
+	/* IRQ associated with Pins when relevant */
+	int irq[16];
+};
+
+static void cpm1_gpio16_save_regs(struct of_mm_gpio_chip *mm_gc)
+{
+	struct cpm1_gpio16_chip *cpm1_gc =
+		container_of(mm_gc, struct cpm1_gpio16_chip, mm_gc);
+	struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+
+	cpm1_gc->cpdata = in_be16(&iop->dat);
+}
+
+static int cpm1_gpio16_get(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+	u16 pin_mask;
+
+	pin_mask = 1 << (15 - gpio);
+
+	return !!(in_be16(&iop->dat) & pin_mask);
+}
+
+static void __cpm1_gpio16_set(struct of_mm_gpio_chip *mm_gc, u16 pin_mask,
+	int value)
+{
+	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+	struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+
+	if (value)
+		cpm1_gc->cpdata |= pin_mask;
+	else
+		cpm1_gc->cpdata &= ~pin_mask;
+
+	out_be16(&iop->dat, cpm1_gc->cpdata);
+}
+
+static void cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+	unsigned long flags;
+	u16 pin_mask = 1 << (15 - gpio);
+
+	spin_lock_irqsave(&cpm1_gc->lock, flags);
+
+	__cpm1_gpio16_set(mm_gc, pin_mask, value);
+
+	spin_unlock_irqrestore(&cpm1_gc->lock, flags);
+}
+
+static int cpm1_gpio16_to_irq(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+
+	return cpm1_gc->irq[gpio] ? : -ENXIO;
+}
+
+static int cpm1_gpio16_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+	struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+	unsigned long flags;
+	u16 pin_mask = 1 << (15 - gpio);
+
+	spin_lock_irqsave(&cpm1_gc->lock, flags);
+
+	setbits16(&iop->dir, pin_mask);
+	__cpm1_gpio16_set(mm_gc, pin_mask, val);
+
+	spin_unlock_irqrestore(&cpm1_gc->lock, flags);
+
+	return 0;
+}
+
+static int cpm1_gpio16_dir_in(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+	struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+	unsigned long flags;
+	u16 pin_mask = 1 << (15 - gpio);
+
+	spin_lock_irqsave(&cpm1_gc->lock, flags);
+
+	clrbits16(&iop->dir, pin_mask);
+
+	spin_unlock_irqrestore(&cpm1_gc->lock, flags);
+
+	return 0;
+}
+
+int cpm1_gpiochip_add16(struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+	struct cpm1_gpio16_chip *cpm1_gc;
+	struct of_mm_gpio_chip *mm_gc;
+	struct gpio_chip *gc;
+	u16 mask;
+
+	cpm1_gc = kzalloc(sizeof(*cpm1_gc), GFP_KERNEL);
+	if (!cpm1_gc)
+		return -ENOMEM;
+
+	spin_lock_init(&cpm1_gc->lock);
+
+	if (!of_property_read_u16(np, "fsl,cpm1-gpio-irq-mask", &mask)) {
+		int i, j;
+
+		for (i = 0, j = 0; i < 16; i++)
+			if (mask & (1 << (15 - i)))
+				cpm1_gc->irq[i] = irq_of_parse_and_map(np, j++);
+	}
+
+	mm_gc = &cpm1_gc->mm_gc;
+	gc = &mm_gc->gc;
+
+	mm_gc->save_regs = cpm1_gpio16_save_regs;
+	gc->ngpio = 16;
+	gc->direction_input = cpm1_gpio16_dir_in;
+	gc->direction_output = cpm1_gpio16_dir_out;
+	gc->get = cpm1_gpio16_get;
+	gc->set = cpm1_gpio16_set;
+	gc->to_irq = cpm1_gpio16_to_irq;
+	gc->parent = dev;
+	gc->owner = THIS_MODULE;
+
+	return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc);
+}
+
+struct cpm1_gpio32_chip {
+	struct of_mm_gpio_chip mm_gc;
+	spinlock_t lock;
+
+	/* shadowed data register to clear/set bits safely */
+	u32 cpdata;
+};
+
+static void cpm1_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc)
+{
+	struct cpm1_gpio32_chip *cpm1_gc =
+		container_of(mm_gc, struct cpm1_gpio32_chip, mm_gc);
+	struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+
+	cpm1_gc->cpdata = in_be32(&iop->dat);
+}
+
+static int cpm1_gpio32_get(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+	u32 pin_mask;
+
+	pin_mask = 1 << (31 - gpio);
+
+	return !!(in_be32(&iop->dat) & pin_mask);
+}
+
+static void __cpm1_gpio32_set(struct of_mm_gpio_chip *mm_gc, u32 pin_mask,
+	int value)
+{
+	struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+	struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+
+	if (value)
+		cpm1_gc->cpdata |= pin_mask;
+	else
+		cpm1_gc->cpdata &= ~pin_mask;
+
+	out_be32(&iop->dat, cpm1_gc->cpdata);
+}
+
+static void cpm1_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+	unsigned long flags;
+	u32 pin_mask = 1 << (31 - gpio);
+
+	spin_lock_irqsave(&cpm1_gc->lock, flags);
+
+	__cpm1_gpio32_set(mm_gc, pin_mask, value);
+
+	spin_unlock_irqrestore(&cpm1_gc->lock, flags);
+}
+
+static int cpm1_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+	struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+	unsigned long flags;
+	u32 pin_mask = 1 << (31 - gpio);
+
+	spin_lock_irqsave(&cpm1_gc->lock, flags);
+
+	setbits32(&iop->dir, pin_mask);
+	__cpm1_gpio32_set(mm_gc, pin_mask, val);
+
+	spin_unlock_irqrestore(&cpm1_gc->lock, flags);
+
+	return 0;
+}
+
+static int cpm1_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+	struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+	unsigned long flags;
+	u32 pin_mask = 1 << (31 - gpio);
+
+	spin_lock_irqsave(&cpm1_gc->lock, flags);
+
+	clrbits32(&iop->dir, pin_mask);
+
+	spin_unlock_irqrestore(&cpm1_gc->lock, flags);
+
+	return 0;
+}
+
+int cpm1_gpiochip_add32(struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+	struct cpm1_gpio32_chip *cpm1_gc;
+	struct of_mm_gpio_chip *mm_gc;
+	struct gpio_chip *gc;
+
+	cpm1_gc = kzalloc(sizeof(*cpm1_gc), GFP_KERNEL);
+	if (!cpm1_gc)
+		return -ENOMEM;
+
+	spin_lock_init(&cpm1_gc->lock);
+
+	mm_gc = &cpm1_gc->mm_gc;
+	gc = &mm_gc->gc;
+
+	mm_gc->save_regs = cpm1_gpio32_save_regs;
+	gc->ngpio = 32;
+	gc->direction_input = cpm1_gpio32_dir_in;
+	gc->direction_output = cpm1_gpio32_dir_out;
+	gc->get = cpm1_gpio32_get;
+	gc->set = cpm1_gpio32_set;
+	gc->parent = dev;
+	gc->owner = THIS_MODULE;
+
+	return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc);
+}
+
+#endif /* CONFIG_8xx_GPIO */
diff --git a/arch/powerpc/platforms/8xx/ep88xc.c b/arch/powerpc/platforms/8xx/ep88xc.c
new file mode 100644
index 0000000000..fc276a29d6
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/ep88xc.c
@@ -0,0 +1,170 @@
+/*
+ * Platform setup for the Embedded Planet EP88xC board
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/io.h>
+#include <asm/udbg.h>
+#include <asm/cpm1.h>
+
+#include "mpc8xx.h"
+#include "pic.h"
+
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static struct cpm_pin ep88xc_pins[] = {
+	/* SMC1 */
+	{1, 24, CPM_PIN_INPUT}, /* RX */
+	{1, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+
+	/* SCC2 */
+	{0, 12, CPM_PIN_INPUT}, /* TX */
+	{0, 13, CPM_PIN_INPUT}, /* RX */
+	{2, 8, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* CD */
+	{2, 9, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* CTS */
+	{2, 14, CPM_PIN_INPUT}, /* RTS */
+
+	/* MII1 */
+	{0, 0, CPM_PIN_INPUT},
+	{0, 1, CPM_PIN_INPUT},
+	{0, 2, CPM_PIN_INPUT},
+	{0, 3, CPM_PIN_INPUT},
+	{0, 4, CPM_PIN_OUTPUT},
+	{0, 10, CPM_PIN_OUTPUT},
+	{0, 11, CPM_PIN_OUTPUT},
+	{1, 19, CPM_PIN_INPUT},
+	{1, 31, CPM_PIN_INPUT},
+	{2, 12, CPM_PIN_INPUT},
+	{2, 13, CPM_PIN_INPUT},
+	{3, 8, CPM_PIN_INPUT},
+	{4, 30, CPM_PIN_OUTPUT},
+	{4, 31, CPM_PIN_OUTPUT},
+
+	/* MII2 */
+	{4, 14, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{4, 15, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{4, 16, CPM_PIN_OUTPUT},
+	{4, 17, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{4, 18, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{4, 19, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{4, 20, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{4, 21, CPM_PIN_OUTPUT},
+	{4, 22, CPM_PIN_OUTPUT},
+	{4, 23, CPM_PIN_OUTPUT},
+	{4, 24, CPM_PIN_OUTPUT},
+	{4, 25, CPM_PIN_OUTPUT},
+	{4, 26, CPM_PIN_OUTPUT},
+	{4, 27, CPM_PIN_OUTPUT},
+	{4, 28, CPM_PIN_OUTPUT},
+	{4, 29, CPM_PIN_OUTPUT},
+
+	/* USB */
+	{0, 6, CPM_PIN_INPUT},  /* CLK2 */
+	{0, 14, CPM_PIN_INPUT}, /* USBOE */
+	{0, 15, CPM_PIN_INPUT}, /* USBRXD */
+	{2, 6, CPM_PIN_OUTPUT}, /* USBTXN */
+	{2, 7, CPM_PIN_OUTPUT}, /* USBTXP */
+	{2, 10, CPM_PIN_INPUT}, /* USBRXN */
+	{2, 11, CPM_PIN_INPUT}, /* USBRXP */
+
+	/* Misc */
+	{1, 26, CPM_PIN_INPUT}, /* BRGO2 */
+	{1, 27, CPM_PIN_INPUT}, /* BRGO1 */
+};
+
+static void __init init_ioports(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ep88xc_pins); i++) {
+		struct cpm_pin *pin = &ep88xc_pins[i];
+		cpm1_set_pin(pin->port, pin->pin, pin->flags);
+	}
+
+	cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX);
+	cpm1_clk_setup(CPM_CLK_SCC1, CPM_CLK2, CPM_CLK_TX); /* USB */
+	cpm1_clk_setup(CPM_CLK_SCC1, CPM_CLK2, CPM_CLK_RX);
+	cpm1_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_TX);
+	cpm1_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_RX);
+}
+
+static u8 __iomem *ep88xc_bcsr;
+
+#define BCSR7_SCC2_ENABLE 0x10
+
+#define BCSR8_PHY1_ENABLE 0x80
+#define BCSR8_PHY1_POWER  0x40
+#define BCSR8_PHY2_ENABLE 0x20
+#define BCSR8_PHY2_POWER  0x10
+
+#define BCSR9_USB_ENABLE  0x80
+#define BCSR9_USB_POWER   0x40
+#define BCSR9_USB_HOST    0x20
+#define BCSR9_USB_FULL_SPEED_TARGET 0x10
+
+static void __init ep88xc_setup_arch(void)
+{
+	struct device_node *np;
+
+	cpm_reset();
+	init_ioports();
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,ep88xc-bcsr");
+	if (!np) {
+		printk(KERN_CRIT "Could not find fsl,ep88xc-bcsr node\n");
+		return;
+	}
+
+	ep88xc_bcsr = of_iomap(np, 0);
+	of_node_put(np);
+
+	if (!ep88xc_bcsr) {
+		printk(KERN_CRIT "Could not remap BCSR\n");
+		return;
+	}
+
+	setbits8(&ep88xc_bcsr[7], BCSR7_SCC2_ENABLE);
+	setbits8(&ep88xc_bcsr[8], BCSR8_PHY1_ENABLE | BCSR8_PHY1_POWER |
+	                          BCSR8_PHY2_ENABLE | BCSR8_PHY2_POWER);
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .name = "soc", },
+	{ .name = "cpm", },
+	{ .name = "localbus", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	/* Publish the QE devices */
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+	return 0;
+}
+machine_device_initcall(ep88xc, declare_of_platform_devices);
+
+define_machine(ep88xc) {
+	.name = "Embedded Planet EP88xC",
+	.compatible = "fsl,ep88xc",
+	.setup_arch = ep88xc_setup_arch,
+	.init_IRQ = mpc8xx_pic_init,
+	.get_irq	= mpc8xx_get_irq,
+	.restart = mpc8xx_restart,
+	.calibrate_decr = mpc8xx_calibrate_decr,
+	.progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
new file mode 100644
index 0000000000..2336b687bc
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Adapted from 'alpha' version by Gary Thomas
+ *  Modified by Cort Dougan (cort@cs.nmt.edu)
+ *  Modified for MBX using prep/chrp/pmac functions by Dan (dmalek@jlc.net)
+ *  Further modified for generic 8xx by Dan.
+ */
+
+/*
+ * bootup setup stuff..
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/time.h>
+#include <linux/rtc.h>
+#include <linux/fsl_devices.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+
+#include <asm/io.h>
+#include <asm/8xx_immap.h>
+#include <mm/mmu_decl.h>
+
+#include "pic.h"
+
+#include "mpc8xx.h"
+
+/* A place holder for time base interrupts, if they are ever enabled. */
+static irqreturn_t timebase_interrupt(int irq, void *dev)
+{
+	printk ("timebase_interrupt()\n");
+
+	return IRQ_HANDLED;
+}
+
+static int __init get_freq(char *name, unsigned long *val)
+{
+	struct device_node *cpu;
+	const unsigned int *fp;
+	int found = 0;
+
+	/* The cpu node should have timebase and clock frequency properties */
+	cpu = of_get_cpu_node(0, NULL);
+
+	if (cpu) {
+		fp = of_get_property(cpu, name, NULL);
+		if (fp) {
+			found = 1;
+			*val = *fp;
+		}
+
+		of_node_put(cpu);
+	}
+
+	return found;
+}
+
+/* The decrementer counts at the system (internal) clock frequency divided by
+ * sixteen, or external oscillator divided by four.  We force the processor
+ * to use system clock divided by sixteen.
+ */
+void __init mpc8xx_calibrate_decr(void)
+{
+	struct device_node *cpu;
+	int irq, virq;
+
+	/* Unlock the SCCR. */
+	out_be32(&mpc8xx_immr->im_clkrstk.cark_sccrk, ~KAPWR_KEY);
+	out_be32(&mpc8xx_immr->im_clkrstk.cark_sccrk, KAPWR_KEY);
+
+	/* Force all 8xx processors to use divide by 16 processor clock. */
+	setbits32(&mpc8xx_immr->im_clkrst.car_sccr, 0x02000000);
+
+	/* Processor frequency is MHz.
+	 */
+	ppc_proc_freq = 50000000;
+	if (!get_freq("clock-frequency", &ppc_proc_freq))
+		printk(KERN_ERR "WARNING: Estimating processor frequency "
+		                "(not found)\n");
+
+	ppc_tb_freq = ppc_proc_freq / 16;
+	printk("Decrementer Frequency = 0x%lx\n", ppc_tb_freq);
+
+	/* Perform some more timer/timebase initialization.  This used
+	 * to be done elsewhere, but other changes caused it to get
+	 * called more than once....that is a bad thing.
+	 *
+	 * First, unlock all of the registers we are going to modify.
+	 * To protect them from corruption during power down, registers
+	 * that are maintained by keep alive power are "locked".  To
+	 * modify these registers we have to write the key value to
+	 * the key location associated with the register.
+	 * Some boards power up with these unlocked, while others
+	 * are locked.  Writing anything (including the unlock code?)
+	 * to the unlocked registers will lock them again.  So, here
+	 * we guarantee the registers are locked, then we unlock them
+	 * for our use.
+	 */
+	out_be32(&mpc8xx_immr->im_sitk.sitk_tbscrk, ~KAPWR_KEY);
+	out_be32(&mpc8xx_immr->im_sitk.sitk_rtcsck, ~KAPWR_KEY);
+	out_be32(&mpc8xx_immr->im_sitk.sitk_tbk, ~KAPWR_KEY);
+	out_be32(&mpc8xx_immr->im_sitk.sitk_tbscrk, KAPWR_KEY);
+	out_be32(&mpc8xx_immr->im_sitk.sitk_rtcsck, KAPWR_KEY);
+	out_be32(&mpc8xx_immr->im_sitk.sitk_tbk, KAPWR_KEY);
+
+	/* Disable the RTC one second and alarm interrupts. */
+	clrbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_SIE | RTCSC_ALE));
+
+	/* Enable the RTC */
+	setbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_RTF | RTCSC_RTE));
+
+	/* Enabling the decrementer also enables the timebase interrupts
+	 * (or from the other point of view, to get decrementer interrupts
+	 * we have to enable the timebase).  The decrementer interrupt
+	 * is wired into the vector table, nothing to do here for that.
+	 */
+	cpu = of_get_cpu_node(0, NULL);
+	virq= irq_of_parse_and_map(cpu, 0);
+	of_node_put(cpu);
+	irq = virq_to_hw(virq);
+
+	out_be16(&mpc8xx_immr->im_sit.sit_tbscr,
+		 ((1 << (7 - (irq / 2))) << 8) | (TBSCR_TBF | TBSCR_TBE));
+
+	if (request_irq(virq, timebase_interrupt, IRQF_NO_THREAD, "tbint",
+			NULL))
+		panic("Could not allocate timer IRQ!");
+}
+
+/* The RTC on the MPC8xx is an internal register.
+ * We want to protect this during power down, so we need to unlock,
+ * modify, and re-lock.
+ */
+
+int mpc8xx_set_rtc_time(struct rtc_time *tm)
+{
+	time64_t time;
+
+	time = rtc_tm_to_time64(tm);
+
+	out_be32(&mpc8xx_immr->im_sitk.sitk_rtck, KAPWR_KEY);
+	out_be32(&mpc8xx_immr->im_sit.sit_rtc, (u32)time);
+	out_be32(&mpc8xx_immr->im_sitk.sitk_rtck, ~KAPWR_KEY);
+
+	return 0;
+}
+
+void mpc8xx_get_rtc_time(struct rtc_time *tm)
+{
+	unsigned long data;
+
+	/* Get time from the RTC. */
+	data = in_be32(&mpc8xx_immr->im_sit.sit_rtc);
+	rtc_time64_to_tm(data, tm);
+	return;
+}
+
+void __noreturn mpc8xx_restart(char *cmd)
+{
+	local_irq_disable();
+
+	setbits32(&mpc8xx_immr->im_clkrst.car_plprcr, 0x00000080);
+	/* Clear the ME bit in MSR to cause checkstop on machine check
+	*/
+	mtmsr(mfmsr() & ~0x1000);
+
+	in_8(&mpc8xx_immr->im_clkrst.res[0]);
+	panic("Restart failed\n");
+}
diff --git a/arch/powerpc/platforms/8xx/machine_check.c b/arch/powerpc/platforms/8xx/machine_check.c
new file mode 100644
index 0000000000..6563659758
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/machine_check.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ */
+
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/ptrace.h>
+
+#include <asm/reg.h>
+
+int machine_check_8xx(struct pt_regs *regs)
+{
+	unsigned long reason = regs->msr;
+
+	pr_err("Machine check in kernel mode.\n");
+	pr_err("Caused by (from SRR1=%lx): ", reason);
+	if (reason & 0x40000000)
+		pr_cont("Fetch error at address %lx\n", regs->nip);
+	else
+		pr_cont("Data access error at address %lx\n", regs->dar);
+
+#ifdef CONFIG_PCI
+	/* the qspan pci read routines can cause machine checks -- Cort
+	 *
+	 * yuck !!! that totally needs to go away ! There are better ways
+	 * to deal with that than having a wart in the mcheck handler.
+	 * -- BenH
+	 */
+	bad_page_fault(regs, SIGBUS);
+	return 1;
+#else
+	return 0;
+#endif
+}
diff --git a/arch/powerpc/platforms/8xx/mpc86xads.h b/arch/powerpc/platforms/8xx/mpc86xads.h
new file mode 100644
index 0000000000..17b1fe75e0
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/mpc86xads.h
@@ -0,0 +1,47 @@
+/*
+ * A collection of structures, addresses, and values associated with
+ * the Freescale MPC86xADS board.
+ * Copied from the FADS stuff.
+ *
+ * Author: MontaVista Software, Inc.
+ *         source@mvista.com
+ *
+ * 2005 (c) MontaVista Software, Inc.  This file is licensed under the
+ * terms of the GNU General Public License version 2.  This program is licensed
+ * "as is" without any warranty of any kind, whether express or implied.
+ */
+
+#ifdef __KERNEL__
+#ifndef __ASM_MPC86XADS_H__
+#define __ASM_MPC86XADS_H__
+
+/* Bits of interest in the BCSRs.
+ */
+#define BCSR1_ETHEN		((uint)0x20000000)
+#define BCSR1_IRDAEN		((uint)0x10000000)
+#define BCSR1_RS232EN_1		((uint)0x01000000)
+#define BCSR1_PCCEN		((uint)0x00800000)
+#define BCSR1_PCCVCC0		((uint)0x00400000)
+#define BCSR1_PCCVPP0		((uint)0x00200000)
+#define BCSR1_PCCVPP1		((uint)0x00100000)
+#define BCSR1_PCCVPP_MASK	(BCSR1_PCCVPP0 | BCSR1_PCCVPP1)
+#define BCSR1_RS232EN_2		((uint)0x00040000)
+#define BCSR1_PCCVCC1		((uint)0x00010000)
+#define BCSR1_PCCVCC_MASK	(BCSR1_PCCVCC0 | BCSR1_PCCVCC1)
+
+#define BCSR4_ETH10_RST		((uint)0x80000000)	/* 10Base-T PHY reset*/
+#define BCSR4_USB_LO_SPD	((uint)0x04000000)
+#define BCSR4_USB_VCC		((uint)0x02000000)
+#define BCSR4_USB_FULL_SPD	((uint)0x00040000)
+#define BCSR4_USB_EN		((uint)0x00020000)
+
+#define BCSR5_MII2_EN		0x40
+#define BCSR5_MII2_RST		0x20
+#define BCSR5_T1_RST		0x10
+#define BCSR5_ATM155_RST	0x08
+#define BCSR5_ATM25_RST		0x04
+#define BCSR5_MII1_EN		0x02
+#define BCSR5_MII1_RST		0x01
+
+#endif /* __ASM_MPC86XADS_H__ */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/platforms/8xx/mpc86xads_setup.c b/arch/powerpc/platforms/8xx/mpc86xads_setup.c
new file mode 100644
index 0000000000..e4192c0a3c
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/mpc86xads_setup.c
@@ -0,0 +1,145 @@
+/*arch/powerpc/platforms/8xx/mpc86xads_setup.c
+ *
+ * Platform setup for the Freescale mpc86xads board
+ *
+ * Vitaly Bordug <vbordug@ru.mvista.com>
+ *
+ * Copyright 2005 MontaVista Software Inc.
+ *
+ * Heavily modified by Scott Wood <scottwood@freescale.com>
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/8xx_immap.h>
+#include <asm/cpm1.h>
+#include <asm/udbg.h>
+
+#include "mpc86xads.h"
+#include "mpc8xx.h"
+#include "pic.h"
+
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static struct cpm_pin mpc866ads_pins[] = {
+	/* SMC1 */
+	{CPM_PORTB, 24, CPM_PIN_INPUT}, /* RX */
+	{CPM_PORTB, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+
+	/* SMC2 */
+	{CPM_PORTB, 21, CPM_PIN_INPUT}, /* RX */
+	{CPM_PORTB, 20, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+
+	/* SCC1 */
+	{CPM_PORTA, 6, CPM_PIN_INPUT}, /* CLK1 */
+	{CPM_PORTA, 7, CPM_PIN_INPUT}, /* CLK2 */
+	{CPM_PORTA, 14, CPM_PIN_INPUT}, /* TX */
+	{CPM_PORTA, 15, CPM_PIN_INPUT}, /* RX */
+	{CPM_PORTB, 19, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TENA */
+	{CPM_PORTC, 10, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* RENA */
+	{CPM_PORTC, 11, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* CLSN */
+
+	/* MII */
+	{CPM_PORTD, 3, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 4, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 5, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 6, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 7, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 8, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 9, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 10, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 11, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 12, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 13, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 14, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 15, CPM_PIN_OUTPUT},
+
+	/* I2C */
+	{CPM_PORTB, 26, CPM_PIN_INPUT | CPM_PIN_OPENDRAIN},
+	{CPM_PORTB, 27, CPM_PIN_INPUT | CPM_PIN_OPENDRAIN},
+};
+
+static void __init init_ioports(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mpc866ads_pins); i++) {
+		struct cpm_pin *pin = &mpc866ads_pins[i];
+		cpm1_set_pin(pin->port, pin->pin, pin->flags);
+	}
+
+	cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX);
+	cpm1_clk_setup(CPM_CLK_SMC2, CPM_BRG2, CPM_CLK_RTX);
+	cpm1_clk_setup(CPM_CLK_SCC1, CPM_CLK1, CPM_CLK_TX);
+	cpm1_clk_setup(CPM_CLK_SCC1, CPM_CLK2, CPM_CLK_RX);
+
+	/* Set FEC1 and FEC2 to MII mode */
+	clrbits32(&mpc8xx_immr->im_cpm.cp_cptr, 0x00000180);
+}
+
+static void __init mpc86xads_setup_arch(void)
+{
+	struct device_node *np;
+	u32 __iomem *bcsr_io;
+
+	cpm_reset();
+	init_ioports();
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc866ads-bcsr");
+	if (!np) {
+		printk(KERN_CRIT "Could not find fsl,mpc866ads-bcsr node\n");
+		return;
+	}
+
+	bcsr_io = of_iomap(np, 0);
+	of_node_put(np);
+
+	if (bcsr_io == NULL) {
+		printk(KERN_CRIT "Could not remap BCSR\n");
+		return;
+	}
+
+	clrbits32(bcsr_io, BCSR1_RS232EN_1 | BCSR1_RS232EN_2 | BCSR1_ETHEN);
+	iounmap(bcsr_io);
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .name = "soc", },
+	{ .name = "cpm", },
+	{ .name = "localbus", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+	return 0;
+}
+machine_device_initcall(mpc86x_ads, declare_of_platform_devices);
+
+define_machine(mpc86x_ads) {
+	.name			= "MPC86x ADS",
+	.compatible		= "fsl,mpc866ads",
+	.setup_arch		= mpc86xads_setup_arch,
+	.init_IRQ		= mpc8xx_pic_init,
+	.get_irq		= mpc8xx_get_irq,
+	.restart		= mpc8xx_restart,
+	.calibrate_decr		= mpc8xx_calibrate_decr,
+	.set_rtc_time		= mpc8xx_set_rtc_time,
+	.get_rtc_time		= mpc8xx_get_rtc_time,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/8xx/mpc885ads.h b/arch/powerpc/platforms/8xx/mpc885ads.h
new file mode 100644
index 0000000000..19412f76fa
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/mpc885ads.h
@@ -0,0 +1,49 @@
+/*
+ * A collection of structures, addresses, and values associated with
+ * the Freescale MPC885ADS board.
+ * Copied from the FADS stuff.
+ *
+ * Author: MontaVista Software, Inc.
+ *         source@mvista.com
+ *
+ * 2005 (c) MontaVista Software, Inc.  This file is licensed under the
+ * terms of the GNU General Public License version 2.  This program is licensed
+ * "as is" without any warranty of any kind, whether express or implied.
+ */
+
+#ifdef __KERNEL__
+#ifndef __ASM_MPC885ADS_H__
+#define __ASM_MPC885ADS_H__
+
+#include <sysdev/fsl_soc.h>
+
+/* Bits of interest in the BCSRs.
+ */
+#define BCSR1_ETHEN		((uint)0x20000000)
+#define BCSR1_IRDAEN		((uint)0x10000000)
+#define BCSR1_RS232EN_1		((uint)0x01000000)
+#define BCSR1_PCCEN		((uint)0x00800000)
+#define BCSR1_PCCVCC0		((uint)0x00400000)
+#define BCSR1_PCCVPP0		((uint)0x00200000)
+#define BCSR1_PCCVPP1		((uint)0x00100000)
+#define BCSR1_PCCVPP_MASK	(BCSR1_PCCVPP0 | BCSR1_PCCVPP1)
+#define BCSR1_RS232EN_2		((uint)0x00040000)
+#define BCSR1_PCCVCC1		((uint)0x00010000)
+#define BCSR1_PCCVCC_MASK	(BCSR1_PCCVCC0 | BCSR1_PCCVCC1)
+
+#define BCSR4_ETH10_RST		((uint)0x80000000)	/* 10Base-T PHY reset*/
+#define BCSR4_USB_LO_SPD	((uint)0x04000000)
+#define BCSR4_USB_VCC		((uint)0x02000000)
+#define BCSR4_USB_FULL_SPD	((uint)0x00040000)
+#define BCSR4_USB_EN		((uint)0x00020000)
+
+#define BCSR5_MII2_EN		0x40
+#define BCSR5_MII2_RST		0x20
+#define BCSR5_T1_RST		0x10
+#define BCSR5_ATM155_RST	0x08
+#define BCSR5_ATM25_RST		0x04
+#define BCSR5_MII1_EN		0x02
+#define BCSR5_MII1_RST		0x01
+
+#endif /* __ASM_MPC885ADS_H__ */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
new file mode 100644
index 0000000000..2d899be746
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
@@ -0,0 +1,217 @@
+/*
+ * Platform setup for the Freescale mpc885ads board
+ *
+ * Vitaly Bordug <vbordug@ru.mvista.com>
+ *
+ * Copyright 2005 MontaVista Software Inc.
+ *
+ * Heavily modified by Scott Wood <scottwood@freescale.com>
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/ioport.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+
+#include <linux/fsl_devices.h>
+#include <linux/mii.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/time.h>
+#include <asm/8xx_immap.h>
+#include <asm/cpm1.h>
+#include <asm/udbg.h>
+
+#include "mpc885ads.h"
+#include "mpc8xx.h"
+#include "pic.h"
+
+static u32 __iomem *bcsr, *bcsr5;
+
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static struct cpm_pin mpc885ads_pins[] = {
+	/* SMC1 */
+	{CPM_PORTB, 24, CPM_PIN_INPUT}, /* RX */
+	{CPM_PORTB, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+
+	/* SMC2 */
+#ifndef CONFIG_MPC8xx_SECOND_ETH_FEC2
+	{CPM_PORTE, 21, CPM_PIN_INPUT}, /* RX */
+	{CPM_PORTE, 20, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+#endif
+
+	/* SCC3 */
+	{CPM_PORTA, 9, CPM_PIN_INPUT}, /* RX */
+	{CPM_PORTA, 8, CPM_PIN_INPUT}, /* TX */
+	{CPM_PORTC, 4, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* RENA */
+	{CPM_PORTC, 5, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* CLSN */
+	{CPM_PORTE, 27, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TENA */
+	{CPM_PORTE, 17, CPM_PIN_INPUT}, /* CLK5 */
+	{CPM_PORTE, 16, CPM_PIN_INPUT}, /* CLK6 */
+
+	/* MII1 */
+	{CPM_PORTA, 0, CPM_PIN_INPUT},
+	{CPM_PORTA, 1, CPM_PIN_INPUT},
+	{CPM_PORTA, 2, CPM_PIN_INPUT},
+	{CPM_PORTA, 3, CPM_PIN_INPUT},
+	{CPM_PORTA, 4, CPM_PIN_OUTPUT},
+	{CPM_PORTA, 10, CPM_PIN_OUTPUT},
+	{CPM_PORTA, 11, CPM_PIN_OUTPUT},
+	{CPM_PORTB, 19, CPM_PIN_INPUT},
+	{CPM_PORTB, 31, CPM_PIN_INPUT},
+	{CPM_PORTC, 12, CPM_PIN_INPUT},
+	{CPM_PORTC, 13, CPM_PIN_INPUT},
+	{CPM_PORTE, 30, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 31, CPM_PIN_OUTPUT},
+
+	/* MII2 */
+#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2
+	{CPM_PORTE, 14, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 15, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 16, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 17, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 18, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 19, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 20, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 21, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 22, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 23, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 24, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 25, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 26, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 27, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 28, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 29, CPM_PIN_OUTPUT},
+#endif
+	/* I2C */
+	{CPM_PORTB, 26, CPM_PIN_INPUT | CPM_PIN_OPENDRAIN},
+	{CPM_PORTB, 27, CPM_PIN_INPUT | CPM_PIN_OPENDRAIN},
+};
+
+static void __init init_ioports(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mpc885ads_pins); i++) {
+		struct cpm_pin *pin = &mpc885ads_pins[i];
+		cpm1_set_pin(pin->port, pin->pin, pin->flags);
+	}
+
+	cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX);
+	cpm1_clk_setup(CPM_CLK_SMC2, CPM_BRG2, CPM_CLK_RTX);
+	cpm1_clk_setup(CPM_CLK_SCC3, CPM_CLK5, CPM_CLK_TX);
+	cpm1_clk_setup(CPM_CLK_SCC3, CPM_CLK6, CPM_CLK_RX);
+
+	/* Set FEC1 and FEC2 to MII mode */
+	clrbits32(&mpc8xx_immr->im_cpm.cp_cptr, 0x00000180);
+}
+
+static void __init mpc885ads_setup_arch(void)
+{
+	struct device_node *np;
+
+	cpm_reset();
+	init_ioports();
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc885ads-bcsr");
+	if (!np) {
+		printk(KERN_CRIT "Could not find fsl,mpc885ads-bcsr node\n");
+		return;
+	}
+
+	bcsr = of_iomap(np, 0);
+	bcsr5 = of_iomap(np, 1);
+	of_node_put(np);
+
+	if (!bcsr || !bcsr5) {
+		printk(KERN_CRIT "Could not remap BCSR\n");
+		return;
+	}
+
+	clrbits32(&bcsr[1], BCSR1_RS232EN_1);
+#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2
+	setbits32(&bcsr[1], BCSR1_RS232EN_2);
+#else
+	clrbits32(&bcsr[1], BCSR1_RS232EN_2);
+#endif
+
+	clrbits32(bcsr5, BCSR5_MII1_EN);
+	setbits32(bcsr5, BCSR5_MII1_RST);
+	udelay(1000);
+	clrbits32(bcsr5, BCSR5_MII1_RST);
+
+#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2
+	clrbits32(bcsr5, BCSR5_MII2_EN);
+	setbits32(bcsr5, BCSR5_MII2_RST);
+	udelay(1000);
+	clrbits32(bcsr5, BCSR5_MII2_RST);
+#else
+	setbits32(bcsr5, BCSR5_MII2_EN);
+#endif
+
+#ifdef CONFIG_MPC8xx_SECOND_ETH_SCC3
+	clrbits32(&bcsr[4], BCSR4_ETH10_RST);
+	udelay(1000);
+	setbits32(&bcsr[4], BCSR4_ETH10_RST);
+
+	setbits32(&bcsr[1], BCSR1_ETHEN);
+
+	np = of_find_node_by_path("/soc@ff000000/cpm@9c0/serial@a80");
+#else
+	np = of_find_node_by_path("/soc@ff000000/cpm@9c0/ethernet@a40");
+#endif
+
+	/* The SCC3 enet registers overlap the SMC1 registers, so
+	 * one of the two must be removed from the device tree.
+	 */
+
+	if (np) {
+		of_detach_node(np);
+		of_node_put(np);
+	}
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .name = "soc", },
+	{ .name = "cpm", },
+	{ .name = "localbus", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	/* Publish the QE devices */
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+	return 0;
+}
+machine_device_initcall(mpc885_ads, declare_of_platform_devices);
+
+define_machine(mpc885_ads) {
+	.name			= "Freescale MPC885 ADS",
+	.compatible		= "fsl,mpc885ads",
+	.setup_arch		= mpc885ads_setup_arch,
+	.init_IRQ		= mpc8xx_pic_init,
+	.get_irq		= mpc8xx_get_irq,
+	.restart		= mpc8xx_restart,
+	.calibrate_decr		= mpc8xx_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/8xx/mpc8xx.h b/arch/powerpc/platforms/8xx/mpc8xx.h
new file mode 100644
index 0000000000..79fae33248
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/mpc8xx.h
@@ -0,0 +1,20 @@
+/*
+ * Prototypes, etc. for the Freescale MPC8xx embedded cpu chips
+ * May need to be cleaned as the port goes on ...
+ *
+ * Copyright (C) 2008 Jochen Friedrich <jochen@scram.de>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+#ifndef __MPC8xx_H
+#define __MPC8xx_H
+
+extern void __noreturn mpc8xx_restart(char *cmd);
+extern void mpc8xx_calibrate_decr(void);
+extern int mpc8xx_set_rtc_time(struct rtc_time *tm);
+extern void mpc8xx_get_rtc_time(struct rtc_time *tm);
+extern unsigned int mpc8xx_get_irq(void);
+
+#endif /* __MPC8xx_H */
diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c
new file mode 100644
index 0000000000..ea6b0e523c
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/pic.c
@@ -0,0 +1,155 @@
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/irq.h>
+#include <linux/dma-mapping.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/8xx_immap.h>
+
+#include "pic.h"
+
+
+#define PIC_VEC_SPURRIOUS      15
+
+static struct irq_domain *mpc8xx_pic_host;
+static unsigned long mpc8xx_cached_irq_mask;
+static sysconf8xx_t __iomem *siu_reg;
+
+static inline unsigned long mpc8xx_irqd_to_bit(struct irq_data *d)
+{
+	return 0x80000000 >> irqd_to_hwirq(d);
+}
+
+static void mpc8xx_unmask_irq(struct irq_data *d)
+{
+	mpc8xx_cached_irq_mask |= mpc8xx_irqd_to_bit(d);
+	out_be32(&siu_reg->sc_simask, mpc8xx_cached_irq_mask);
+}
+
+static void mpc8xx_mask_irq(struct irq_data *d)
+{
+	mpc8xx_cached_irq_mask &= ~mpc8xx_irqd_to_bit(d);
+	out_be32(&siu_reg->sc_simask, mpc8xx_cached_irq_mask);
+}
+
+static void mpc8xx_ack(struct irq_data *d)
+{
+	out_be32(&siu_reg->sc_sipend, mpc8xx_irqd_to_bit(d));
+}
+
+static void mpc8xx_end_irq(struct irq_data *d)
+{
+	mpc8xx_cached_irq_mask |= mpc8xx_irqd_to_bit(d);
+	out_be32(&siu_reg->sc_simask, mpc8xx_cached_irq_mask);
+}
+
+static int mpc8xx_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+	/* only external IRQ senses are programmable */
+	if ((flow_type & IRQ_TYPE_EDGE_FALLING) && !(irqd_to_hwirq(d) & 1)) {
+		unsigned int siel = in_be32(&siu_reg->sc_siel);
+		siel |= mpc8xx_irqd_to_bit(d);
+		out_be32(&siu_reg->sc_siel, siel);
+		irq_set_handler_locked(d, handle_edge_irq);
+	}
+	return 0;
+}
+
+static struct irq_chip mpc8xx_pic = {
+	.name = "8XX SIU",
+	.irq_unmask = mpc8xx_unmask_irq,
+	.irq_mask = mpc8xx_mask_irq,
+	.irq_ack = mpc8xx_ack,
+	.irq_eoi = mpc8xx_end_irq,
+	.irq_set_type = mpc8xx_set_irq_type,
+};
+
+unsigned int mpc8xx_get_irq(void)
+{
+	int irq;
+
+	/* For MPC8xx, read the SIVEC register and shift the bits down
+	 * to get the irq number.
+	 */
+	irq = in_be32(&siu_reg->sc_sivec) >> 26;
+
+	if (irq == PIC_VEC_SPURRIOUS)
+		return 0;
+
+        return irq_linear_revmap(mpc8xx_pic_host, irq);
+
+}
+
+static int mpc8xx_pic_host_map(struct irq_domain *h, unsigned int virq,
+			  irq_hw_number_t hw)
+{
+	pr_debug("mpc8xx_pic_host_map(%d, 0x%lx)\n", virq, hw);
+
+	/* Set default irq handle */
+	irq_set_chip_and_handler(virq, &mpc8xx_pic, handle_level_irq);
+	return 0;
+}
+
+
+static int mpc8xx_pic_host_xlate(struct irq_domain *h, struct device_node *ct,
+			    const u32 *intspec, unsigned int intsize,
+			    irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+{
+	static unsigned char map_pic_senses[4] = {
+		IRQ_TYPE_EDGE_RISING,
+		IRQ_TYPE_LEVEL_LOW,
+		IRQ_TYPE_LEVEL_HIGH,
+		IRQ_TYPE_EDGE_FALLING,
+	};
+
+	if (intspec[0] > 0x1f)
+		return 0;
+
+	*out_hwirq = intspec[0];
+	if (intsize > 1 && intspec[1] < 4)
+		*out_flags = map_pic_senses[intspec[1]];
+	else
+		*out_flags = IRQ_TYPE_NONE;
+
+	return 0;
+}
+
+
+static const struct irq_domain_ops mpc8xx_pic_host_ops = {
+	.map = mpc8xx_pic_host_map,
+	.xlate = mpc8xx_pic_host_xlate,
+};
+
+void __init mpc8xx_pic_init(void)
+{
+	struct resource res;
+	struct device_node *np;
+	int ret;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,pq1-pic");
+	if (np == NULL)
+		np = of_find_node_by_type(NULL, "mpc8xx-pic");
+	if (np == NULL) {
+		printk(KERN_ERR "Could not find fsl,pq1-pic node\n");
+		return;
+	}
+
+	ret = of_address_to_resource(np, 0, &res);
+	if (ret)
+		goto out;
+
+	siu_reg = ioremap(res.start, resource_size(&res));
+	if (!siu_reg)
+		goto out;
+
+	mpc8xx_pic_host = irq_domain_add_linear(np, 64, &mpc8xx_pic_host_ops, NULL);
+	if (!mpc8xx_pic_host)
+		printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n");
+
+out:
+	of_node_put(np);
+}
diff --git a/arch/powerpc/platforms/8xx/pic.h b/arch/powerpc/platforms/8xx/pic.h
new file mode 100644
index 0000000000..c70f1b446f
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/pic.h
@@ -0,0 +1,19 @@
+#ifndef _PPC_KERNEL_MPC8xx_H
+#define _PPC_KERNEL_MPC8xx_H
+
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+
+void mpc8xx_pic_init(void);
+unsigned int mpc8xx_get_irq(void);
+
+/*
+ * Some internal interrupt registers use an 8-bit mask for the interrupt
+ * level instead of a number.
+ */
+static inline uint mk_int_int_mask(uint mask)
+{
+	return (1 << (7 - (mask/2)));
+}
+
+#endif /* _PPC_KERNEL_PPC8xx_H */
diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
new file mode 100644
index 0000000000..d97a7910c5
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
@@ -0,0 +1,148 @@
+/*
+ * Platform setup for the MPC8xx based boards from TQM.
+ *
+ * Heiko Schocher <hs@denx.de>
+ * Copyright 2010 DENX Software Engineering GmbH
+ *
+ * based on:
+ * Vitaly Bordug <vbordug@ru.mvista.com>
+ *
+ * Copyright 2005 MontaVista Software Inc.
+ *
+ * Heavily modified by Scott Wood <scottwood@freescale.com>
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/ioport.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+
+#include <linux/fsl_devices.h>
+#include <linux/mii.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/time.h>
+#include <asm/8xx_immap.h>
+#include <asm/cpm1.h>
+#include <asm/udbg.h>
+
+#include "mpc8xx.h"
+#include "pic.h"
+
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static struct cpm_pin tqm8xx_pins[] __initdata = {
+	/* SMC1 */
+	{CPM_PORTB, 24, CPM_PIN_INPUT}, /* RX */
+	{CPM_PORTB, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+
+	/* SCC1 */
+	{CPM_PORTA, 5, CPM_PIN_INPUT}, /* CLK1 */
+	{CPM_PORTA, 7, CPM_PIN_INPUT}, /* CLK2 */
+	{CPM_PORTA, 14, CPM_PIN_INPUT}, /* TX */
+	{CPM_PORTA, 15, CPM_PIN_INPUT}, /* RX */
+	{CPM_PORTC, 15, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TENA */
+	{CPM_PORTC, 10, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO},
+	{CPM_PORTC, 11, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO},
+};
+
+static struct cpm_pin tqm8xx_fec_pins[] __initdata = {
+	/* MII */
+	{CPM_PORTD, 3, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 4, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 5, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 6, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 7, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 8, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 9, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 10, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 11, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 12, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 13, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 14, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 15, CPM_PIN_OUTPUT},
+};
+
+static void __init init_pins(int n, struct cpm_pin *pin)
+{
+	int i;
+
+	for (i = 0; i < n; i++) {
+		cpm1_set_pin(pin->port, pin->pin, pin->flags);
+		pin++;
+	}
+}
+
+static void __init init_ioports(void)
+{
+	struct device_node *dnode;
+	struct property *prop;
+	int	len;
+
+	init_pins(ARRAY_SIZE(tqm8xx_pins), &tqm8xx_pins[0]);
+
+	cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX);
+
+	dnode = of_find_node_by_name(NULL, "aliases");
+	if (dnode == NULL)
+		return;
+	prop = of_find_property(dnode, "ethernet1", &len);
+
+	of_node_put(dnode);
+
+	if (prop == NULL)
+		return;
+
+	/* init FEC pins */
+	init_pins(ARRAY_SIZE(tqm8xx_fec_pins), &tqm8xx_fec_pins[0]);
+}
+
+static void __init tqm8xx_setup_arch(void)
+{
+	cpm_reset();
+	init_ioports();
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .name = "soc", },
+	{ .name = "cpm", },
+	{ .name = "localbus", },
+	{ .compatible = "simple-bus" },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+	return 0;
+}
+machine_device_initcall(tqm8xx, declare_of_platform_devices);
+
+define_machine(tqm8xx) {
+	.name			= "TQM8xx",
+	.compatible		= "tqc,tqm8xx",
+	.setup_arch		= tqm8xx_setup_arch,
+	.init_IRQ		= mpc8xx_pic_init,
+	.get_irq		= mpc8xx_get_irq,
+	.restart		= mpc8xx_restart,
+	.calibrate_decr		= mpc8xx_calibrate_decr,
+	.set_rtc_time		= mpc8xx_set_rtc_time,
+	.get_rtc_time		= mpc8xx_get_rtc_time,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
new file mode 100644
index 0000000000..1fd253f92a
--- /dev/null
+++ b/arch/powerpc/platforms/Kconfig
@@ -0,0 +1,307 @@
+# SPDX-License-Identifier: GPL-2.0
+menu "Platform support"
+
+source "arch/powerpc/platforms/powernv/Kconfig"
+source "arch/powerpc/platforms/pseries/Kconfig"
+source "arch/powerpc/platforms/chrp/Kconfig"
+source "arch/powerpc/platforms/512x/Kconfig"
+source "arch/powerpc/platforms/52xx/Kconfig"
+source "arch/powerpc/platforms/powermac/Kconfig"
+source "arch/powerpc/platforms/maple/Kconfig"
+source "arch/powerpc/platforms/pasemi/Kconfig"
+source "arch/powerpc/platforms/ps3/Kconfig"
+source "arch/powerpc/platforms/cell/Kconfig"
+source "arch/powerpc/platforms/8xx/Kconfig"
+source "arch/powerpc/platforms/82xx/Kconfig"
+source "arch/powerpc/platforms/83xx/Kconfig"
+source "arch/powerpc/platforms/85xx/Kconfig"
+source "arch/powerpc/platforms/86xx/Kconfig"
+source "arch/powerpc/platforms/embedded6xx/Kconfig"
+source "arch/powerpc/platforms/44x/Kconfig"
+source "arch/powerpc/platforms/40x/Kconfig"
+source "arch/powerpc/platforms/amigaone/Kconfig"
+source "arch/powerpc/platforms/book3s/Kconfig"
+source "arch/powerpc/platforms/microwatt/Kconfig"
+
+config KVM_GUEST
+	bool "KVM Guest support"
+	select EPAPR_PARAVIRT
+	help
+	  This option enables various optimizations for running under the KVM
+	  hypervisor. Overhead for the kernel when not running inside KVM should
+	  be minimal.
+
+	  In case of doubt, say Y
+
+config EPAPR_PARAVIRT
+	bool "ePAPR para-virtualization support"
+	help
+	  Enables ePAPR para-virtualization support for guests.
+
+	  In case of doubt, say Y
+
+config PPC_HASH_MMU_NATIVE
+	bool
+	depends on PPC_BOOK3S
+	help
+	  Support for running natively on the hardware, i.e. without
+	  a hypervisor. This option is not user-selectable but should
+	  be selected by all platforms that need it.
+
+config PPC_OF_BOOT_TRAMPOLINE
+	bool "Support booting from Open Firmware or yaboot"
+	depends on PPC_BOOK3S_32 || PPC64
+	select RELOCATABLE if PPC64
+	default y
+	help
+	  Support from booting from Open Firmware or yaboot using an
+	  Open Firmware client interface. This enables the kernel to
+	  communicate with open firmware to retrieve system information
+	  such as the device tree.
+
+	  In case of doubt, say Y
+
+config PPC_DT_CPU_FTRS
+	bool "Device-tree based CPU feature discovery & setup"
+	depends on PPC_BOOK3S_64
+	default y
+	help
+	  This enables code to use a new device tree binding for describing CPU
+	  compatibility and features. Saying Y here will attempt to use the new
+	  binding if the firmware provides it. Currently only the skiboot
+	  firmware provides this binding.
+	  If you're not sure say Y.
+
+config UDBG_RTAS_CONSOLE
+	bool "RTAS based debug console"
+	depends on PPC_RTAS
+
+config PPC_SMP_MUXED_IPI
+	bool
+	help
+	  Select this option if your platform supports SMP and your
+	  interrupt controller provides less than 4 interrupts to each
+	  cpu.	This will enable the generic code to multiplex the 4
+	  messages on to one ipi.
+
+config IPIC
+	bool
+
+config MPIC
+	bool
+
+config MPIC_TIMER
+	bool "MPIC Global Timer"
+	depends on MPIC && FSL_SOC
+	help
+	  The MPIC global timer is a hardware timer inside the
+	  Freescale PIC complying with OpenPIC standard. When the
+	  specified interval times out, the hardware timer generates
+	  an interrupt. The driver currently is only tested on fsl
+	  chip, but it can potentially support other global timers
+	  complying with the OpenPIC standard.
+
+config FSL_MPIC_TIMER_WAKEUP
+	tristate "Freescale MPIC global timer wakeup driver"
+	depends on FSL_SOC &&  MPIC_TIMER && PM
+	help
+	  The driver provides a way to wake up the system by MPIC
+	  timer.
+	  e.g. "echo 5 > /sys/devices/system/mpic/timer_wakeup"
+
+config PPC_EPAPR_HV_PIC
+	bool
+	select EPAPR_PARAVIRT
+
+config MPIC_WEIRD
+	bool
+
+config MPIC_MSGR
+	bool "MPIC message register support"
+	depends on MPIC
+	help
+	  Enables support for the MPIC message registers.  These
+	  registers are used for inter-processor communication.
+
+config PPC_I8259
+	bool
+
+config U3_DART
+	bool
+	depends on PPC64
+
+config PPC_RTAS
+	bool
+
+config RTAS_ERROR_LOGGING
+	bool
+	depends on PPC_RTAS
+
+config PPC_RTAS_DAEMON
+	bool
+	depends on PPC_RTAS
+
+config RTAS_PROC
+	bool "Proc interface to RTAS"
+	depends on PPC_RTAS && PROC_FS
+	default y
+
+config RTAS_FLASH
+	tristate "Firmware flash interface"
+	depends on PPC64 && RTAS_PROC
+
+config MMIO_NVRAM
+	bool
+
+config MPIC_U3_HT_IRQS
+	bool
+
+config MPIC_BROKEN_REGREAD
+	bool
+	depends on MPIC
+	help
+	  This option enables a MPIC driver workaround for some chips
+	  that have a bug that causes some interrupt source information
+	  to not read back properly. It is safe to use on other chips as
+	  well, but enabling it uses about 8KB of memory to keep copies
+	  of the register contents in software.
+
+config EEH
+	bool
+	depends on (PPC_POWERNV || PPC_PSERIES) && PCI
+	default y
+
+config PPC_MPC106
+	bool
+
+config PPC_970_NAP
+	bool
+
+config PPC_P7_NAP
+	bool
+
+config PPC_BOOK3S_IDLE
+	def_bool y
+	depends on (PPC_970_NAP || PPC_P7_NAP)
+
+config PPC_INDIRECT_PIO
+	bool
+	select GENERIC_IOMAP
+
+config PPC_INDIRECT_MMIO
+	bool
+
+config PPC_IO_WORKAROUNDS
+	bool
+
+source "drivers/cpufreq/Kconfig"
+
+menu "CPUIdle driver"
+
+source "drivers/cpuidle/Kconfig"
+
+endmenu
+
+config TAU
+	bool "On-chip CPU temperature sensor support"
+	depends on PPC_BOOK3S_32
+	help
+	  G3 and G4 processors have an on-chip temperature sensor called the
+	  'Thermal Assist Unit (TAU)', which, in theory, can measure the on-die
+	  temperature within 2-4 degrees Celsius. This option shows the current
+	  on-die temperature in /proc/cpuinfo if the cpu supports it.
+
+	  Unfortunately, this sensor is very inaccurate when uncalibrated, so
+	  don't assume the cpu temp is actually what /proc/cpuinfo says it is.
+
+config TAU_INT
+	bool "Interrupt driven TAU driver (EXPERIMENTAL)"
+	depends on TAU
+	help
+	  The TAU supports an interrupt driven mode which causes an interrupt
+	  whenever the temperature goes out of range. This is the fastest way
+	  to get notified the temp has exceeded a range. With this option off,
+	  a timer is used to re-check the temperature periodically.
+
+	  If in doubt, say N here.
+
+config TAU_AVERAGE
+	bool "Average high and low temp"
+	depends on TAU
+	help
+	  The TAU hardware can compare the temperature to an upper and lower
+	  bound.  The default behavior is to show both the upper and lower
+	  bound in /proc/cpuinfo. If the range is large, the temperature is
+	  either changing a lot, or the TAU hardware is broken (likely on some
+	  G4's). If the range is small (around 4 degrees), the temperature is
+	  relatively stable.  If you say Y here, a single temperature value,
+	  halfway between the upper and lower bounds, will be reported in
+	  /proc/cpuinfo.
+
+	  If in doubt, say N here.
+
+config QE_GPIO
+	bool "QE GPIO support"
+	depends on QUICC_ENGINE
+	select GPIOLIB
+	select OF_GPIO_MM_GPIOCHIP
+	help
+	  Say Y here if you're going to use hardware that connects to the
+	  QE GPIOs.
+
+config CPM2
+	bool "Enable support for the CPM2 (Communications Processor Module)"
+	depends on (FSL_SOC_BOOKE && PPC32) || PPC_82xx
+	select CPM
+	select HAVE_PCI
+	select GPIOLIB
+	select OF_GPIO_MM_GPIOCHIP
+	help
+	  The CPM2 (Communications Processor Module) is a coprocessor on
+	  embedded CPUs made by Freescale.  Selecting this option means that
+	  you wish to build a kernel for a machine with a CPM2 coprocessor
+	  on it (826x, 827x, 8560).
+
+config FSL_ULI1575
+	bool "ULI1575 PCIe south bridge support"
+	depends on FSL_SOC_BOOKE || PPC_86xx
+	depends on PCI
+	select FSL_PCI
+	select GENERIC_ISA_DMA
+	help
+	  Supports for the ULI1575 PCIe south bridge that exists on some
+	  Freescale reference boards. The boards all use the ULI in pretty
+	  much the same way.
+
+config CPM
+	bool
+	select GENERIC_ALLOCATOR
+
+config OF_RTC
+	bool
+	help
+	  Uses information from the OF or flattened device tree to instantiate
+	  platform devices for direct mapped RTC chips like the DS1742 or DS1743.
+
+config GEN_RTC
+	bool "Use the platform RTC operations from user space"
+	select RTC_CLASS
+	select RTC_DRV_GENERIC
+	help
+	  This option provides backwards compatibility with the old gen_rtc.ko
+	  module that was traditionally used for old PowerPC machines.
+	  Platforms should migrate to enabling the RTC_DRV_GENERIC by hand
+	  replacing their get_rtc_time/set_rtc_time callbacks with
+	  a proper RTC device driver.
+
+config MCU_MPC8349EMITX
+	bool "MPC8349E-mITX MCU driver"
+	depends on I2C=y && PPC_83xx
+	select GPIOLIB
+	help
+	  Say Y here to enable soft power-off functionality on the Freescale
+	  boards with the MPC8349E-mITX-compatible MCU chips. This driver will
+	  also register MCU GPIOs with the generic GPIO API, so you'll able
+	  to use MCU pins as GPIOs.
+
+endmenu
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
new file mode 100644
index 0000000000..b2d8c0da2a
--- /dev/null
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -0,0 +1,646 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC32
+	bool
+	default y if !PPC64
+
+config PPC64
+	bool "64-bit kernel"
+	select ZLIB_DEFLATE
+	help
+	  This option selects whether a 32-bit or a 64-bit kernel
+	  will be built.
+
+menu "Processor support"
+choice
+	prompt "Processor Type"
+	depends on PPC32
+	help
+	  There are five families of 32 bit PowerPC chips supported.
+	  The most common ones are the desktop and server CPUs (603,
+	  604, 740, 750, 74xx) CPUs from Freescale and IBM, with their
+	  embedded 512x/52xx/82xx/83xx/86xx counterparts.
+	  The other embedded parts, namely 4xx, 8xx and e500
+	  (85xx) each form a family of their own that is not compatible
+	  with the others.
+
+	  If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx.
+
+config PPC_BOOK3S_32
+	bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx"
+	imply PPC_FPU
+	select PPC_HAVE_PMU_SUPPORT
+	select HAVE_ARCH_VMAP_STACK
+
+config PPC_85xx
+	bool "Freescale 85xx"
+	select PPC_E500
+
+config PPC_8xx
+	bool "Freescale 8xx"
+	select ARCH_SUPPORTS_HUGETLBFS
+	select FSL_SOC
+	select PPC_KUEP
+	select HAVE_ARCH_VMAP_STACK
+	select HUGETLBFS
+
+config 40x
+	bool "AMCC 40x"
+	select PPC_DCR_NATIVE
+	select PPC_UDBG_16550
+	select 4xx_SOC
+	select HAVE_PCI
+	select PPC_KUEP if PPC_KUAP
+
+config 44x
+	bool "AMCC 44x, 46x or 47x"
+	select PPC_DCR_NATIVE
+	select PPC_UDBG_16550
+	select 4xx_SOC
+	select HAVE_PCI
+	select PHYS_64BIT
+	select PPC_KUEP
+
+endchoice
+
+config PPC_BOOK3S_603
+	bool "Support for 603 SW loaded TLB"
+	depends on PPC_BOOK3S_32
+	default y
+	help
+	  Provide support for processors based on the 603 cores. Those
+	  processors don't have a HASH MMU and provide SW TLB loading.
+
+config PPC_BOOK3S_604
+	bool "Support for 604+ HASH MMU" if PPC_BOOK3S_603
+	depends on PPC_BOOK3S_32
+	default y
+	help
+	  Provide support for processors not based on the 603 cores.
+	  Those processors have a HASH MMU.
+
+choice
+	prompt "Processor Type"
+	depends on PPC64
+	help
+	  There are two families of 64 bit PowerPC chips supported.
+	  The most common ones are the desktop and server CPUs
+	  (POWER5, 970, POWER5+, POWER6, POWER7, POWER8, POWER9 ...)
+
+	  The other are the "embedded" processors compliant with the
+	  "Book 3E" variant of the architecture
+
+config PPC_BOOK3S_64
+	bool "Server processors"
+	select PPC_FPU
+	select PPC_HAVE_PMU_SUPPORT
+	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+	select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
+	select ARCH_ENABLE_SPLIT_PMD_PTLOCK
+	select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
+	select ARCH_SUPPORTS_HUGETLBFS
+	select ARCH_SUPPORTS_NUMA_BALANCING
+	select HAVE_MOVE_PMD
+	select HAVE_MOVE_PUD
+	select IRQ_WORK
+	select PPC_64S_HASH_MMU if !PPC_RADIX_MMU
+	select KASAN_VMALLOC if KASAN
+
+config PPC_BOOK3E_64
+	bool "Embedded processors"
+	select PPC_E500
+	select PPC_E500MC
+	select PPC_FPU # Make it a choice ?
+	select PPC_SMP_MUXED_IPI
+	select PPC_DOORBELL
+	select ZONE_DMA
+
+endchoice
+
+choice
+	prompt "CPU selection"
+	help
+	  This will create a kernel which is optimised for a particular CPU.
+	  The resulting kernel may not run on other CPUs, so use this with care.
+
+	  If unsure, select Generic.
+
+config POWERPC64_CPU
+	bool "Generic (POWER5 and PowerPC 970 and above)"
+	depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+	select PPC_64S_HASH_MMU
+
+config POWERPC64_CPU
+	bool "Generic (POWER8 and above)"
+	depends on PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
+	select ARCH_HAS_FAST_MULTIPLIER
+	select PPC_64S_HASH_MMU
+	select PPC_HAS_LBARX_LHARX
+
+config POWERPC_CPU
+	bool "Generic 32 bits powerpc"
+	depends on PPC_BOOK3S_32
+
+config CELL_CPU
+	bool "Cell Broadband Engine"
+	depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+	depends on !CC_IS_CLANG
+	select PPC_64S_HASH_MMU
+
+config PPC_970_CPU
+	bool "PowerPC 970 (including PowerPC G5)"
+	depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+	select PPC_64S_HASH_MMU
+
+config POWER6_CPU
+	bool "POWER6"
+	depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+	select PPC_64S_HASH_MMU
+
+config POWER7_CPU
+	bool "POWER7"
+	depends on PPC_BOOK3S_64
+	select ARCH_HAS_FAST_MULTIPLIER
+	select PPC_64S_HASH_MMU
+	select PPC_HAS_LBARX_LHARX
+
+config POWER8_CPU
+	bool "POWER8"
+	depends on PPC_BOOK3S_64
+	select ARCH_HAS_FAST_MULTIPLIER
+	select PPC_64S_HASH_MMU
+	select PPC_HAS_LBARX_LHARX
+
+config POWER9_CPU
+	bool "POWER9"
+	depends on PPC_BOOK3S_64
+	select ARCH_HAS_FAST_MULTIPLIER
+	select PPC_HAS_LBARX_LHARX
+
+config POWER10_CPU
+	bool "POWER10"
+	depends on PPC_BOOK3S_64
+	select ARCH_HAS_FAST_MULTIPLIER
+	select PPC_HAVE_PREFIXED_SUPPORT
+	select PPC_HAVE_PCREL_SUPPORT
+
+config E5500_CPU
+	bool "Freescale e5500"
+	depends on PPC64 && PPC_E500
+
+config E6500_CPU
+	bool "Freescale e6500"
+	depends on PPC64 && PPC_E500
+	depends on !CC_IS_CLANG
+	select PPC_HAS_LBARX_LHARX
+
+config 405_CPU
+	bool "40x family"
+	depends on 40x
+	depends on !CC_IS_CLANG
+
+config 440_CPU
+	bool "440 (44x family)"
+	depends on 44x
+
+config 464_CPU
+	bool "464 (44x family)"
+	depends on 44x
+	depends on !CC_IS_CLANG
+
+config 476_CPU
+	bool "476 (47x family)"
+	depends on PPC_47x
+	depends on !CC_IS_CLANG
+
+config 860_CPU
+	bool "8xx family"
+	depends on PPC_8xx
+	depends on !CC_IS_CLANG
+
+config E300C2_CPU
+	bool "e300c2 (832x)"
+	depends on PPC_BOOK3S_32
+	depends on !CC_IS_CLANG
+
+config E300C3_CPU
+	bool "e300c3 (831x)"
+	depends on PPC_BOOK3S_32
+	depends on !CC_IS_CLANG
+
+config G4_CPU
+	bool "G4 (74xx)"
+	depends on PPC_BOOK3S_32
+	select ALTIVEC
+
+config E500_CPU
+	bool "e500 (8540)"
+	depends on PPC_85xx && !PPC_E500MC
+
+config E500MC_CPU
+	bool "e500mc"
+	depends on PPC_85xx && PPC_E500MC
+
+config TOOLCHAIN_DEFAULT_CPU
+	bool "Rely on the toolchain's implicit default CPU"
+
+endchoice
+
+config TARGET_CPU_BOOL
+	bool
+	default !TOOLCHAIN_DEFAULT_CPU
+
+config TARGET_CPU
+	string
+	depends on TARGET_CPU_BOOL
+	default "cell" if CELL_CPU
+	default "970" if PPC_970_CPU
+	default "power6" if POWER6_CPU
+	default "power7" if POWER7_CPU
+	default "power8" if POWER8_CPU
+	default "power9" if POWER9_CPU
+	default "power10" if POWER10_CPU
+	default "e5500" if E5500_CPU
+	default "e6500" if E6500_CPU
+	default "power4" if POWERPC64_CPU && !CPU_LITTLE_ENDIAN
+	default "power8" if POWERPC64_CPU && CPU_LITTLE_ENDIAN
+	default "405" if 405_CPU
+	default "440" if 440_CPU
+	default "464" if 464_CPU
+	default "476" if 476_CPU
+	default "860" if 860_CPU
+	default "e300c2" if E300C2_CPU
+	default "e300c3" if E300C3_CPU
+	default "G4" if G4_CPU
+	default "8540" if E500_CPU
+	default "e500mc" if E500MC_CPU
+	default "powerpc" if POWERPC_CPU
+
+config TUNE_CPU
+	string
+	depends on POWERPC64_CPU
+	default "-mtune=power10" if $(cc-option,-mtune=power10)
+	default "-mtune=power9"  if $(cc-option,-mtune=power9)
+	default "-mtune=power8"  if $(cc-option,-mtune=power8)
+
+config PPC_BOOK3S
+	def_bool y
+	depends on PPC_BOOK3S_32 || PPC_BOOK3S_64
+
+config PPC_E500
+	select FSL_EMB_PERFMON
+	bool
+	select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64
+	select PPC_SMP_MUXED_IPI
+	select PPC_DOORBELL
+	select PPC_KUEP
+
+config PPC_E500MC
+	bool "e500mc Support"
+	select PPC_FPU
+	select COMMON_CLK
+	depends on PPC_E500
+	help
+	  This must be enabled for running on e500mc (and derivatives
+	  such as e5500/e6500), and must be disabled for running on
+	  e500v1 or e500v2.
+
+config PPC_FPU_REGS
+	bool
+
+config PPC_FPU
+	bool "Support for Floating Point Unit (FPU)" if PPC_MPC832x
+	default y if PPC64
+	select PPC_FPU_REGS
+	help
+	  This must be enabled to support the Floating Point Unit
+	  Most 6xx have an FPU but e300c2 core (mpc832x) don't have
+	  an FPU, so when building an embedded kernel for that target
+	  you can disable FPU support.
+
+	  If unsure say Y.
+
+config FSL_EMB_PERFMON
+	bool "Freescale Embedded Perfmon"
+	depends on PPC_E500 || PPC_83xx
+	help
+	  This is the Performance Monitor support found on the e500 core
+	  and some e300 cores (c3 and c4).  Select this only if your
+	  core supports the Embedded Performance Monitor APU
+
+config FSL_EMB_PERF_EVENT
+	bool
+	depends on FSL_EMB_PERFMON && PERF_EVENTS && !PPC_PERF_CTRS
+	default y
+
+config FSL_EMB_PERF_EVENT_E500
+	bool
+	depends on FSL_EMB_PERF_EVENT && PPC_E500
+	default y
+
+config 4xx
+	bool
+	depends on 40x || 44x
+	default y
+
+config BOOKE
+	bool
+	depends on PPC_E500 || 44x
+	default y
+
+config BOOKE_OR_40x
+	bool
+	depends on BOOKE || 40x
+	default y
+
+config PTE_64BIT
+	bool
+	depends on 44x || PPC_E500 || PPC_86xx
+	default y if PHYS_64BIT
+
+config PHYS_64BIT
+	bool 'Large physical address support' if PPC_E500 || PPC_86xx
+	depends on (44x || PPC_E500 || PPC_86xx) && !PPC_83xx && !PPC_82xx
+	select PHYS_ADDR_T_64BIT
+	help
+	  This option enables kernel support for larger than 32-bit physical
+	  addresses.  This feature may not be available on all cores.
+
+	  If you have more than 3.5GB of RAM or so, you also need to enable
+	  SWIOTLB under Kernel Options for this to work.  The actual number
+	  is platform-dependent.
+
+	  If in doubt, say N here.
+
+config ALTIVEC
+	bool "AltiVec Support"
+	depends on PPC_BOOK3S || (PPC_E500MC && PPC64 && !E5500_CPU)
+	select PPC_FPU
+	help
+	  This option enables kernel support for the Altivec extensions to the
+	  PowerPC processor. The kernel currently supports saving and restoring
+	  altivec registers, and turning on the 'altivec enable' bit so user
+	  processes can execute altivec instructions.
+
+	  This option is only usefully if you have a processor that supports
+	  altivec (G4, otherwise known as 74xx series), but does not have
+	  any affect on a non-altivec cpu (it does, however add code to the
+	  kernel).
+
+	  If in doubt, say Y here.
+
+config VSX
+	bool "VSX Support"
+	depends on PPC_BOOK3S_64 && ALTIVEC && PPC_FPU
+	help
+
+	  This option enables kernel support for the Vector Scaler extensions
+	  to the PowerPC processor. The kernel currently supports saving and
+	  restoring VSX registers, and turning on the 'VSX enable' bit so user
+	  processes can execute VSX instructions.
+
+	  This option is only useful if you have a processor that supports
+	  VSX (P7 and above), but does not have any affect on a non-VSX
+	  CPUs (it does, however add code to the kernel).
+
+	  If in doubt, say Y here.
+
+config SPE_POSSIBLE
+	def_bool y
+	depends on PPC_E500 && !PPC_E500MC
+
+config SPE
+	bool "SPE Support"
+	depends on SPE_POSSIBLE
+	default y
+	help
+	  This option enables kernel support for the Signal Processing
+	  Extensions (SPE) to the PowerPC processor. The kernel currently
+	  supports saving and restoring SPE registers, and turning on the
+	  'spe enable' bit so user processes can execute SPE instructions.
+
+	  This option is only useful if you have a processor that supports
+	  SPE (e500, otherwise known as 85xx series), but does not have any
+	  effect on a non-spe cpu (it does, however add code to the kernel).
+
+	  If in doubt, say Y here.
+
+config PPC_64S_HASH_MMU
+	bool "Hash MMU Support"
+	depends on PPC_BOOK3S_64
+	default y
+	help
+	  Enable support for the Power ISA Hash style MMU. This is implemented
+	  by all IBM Power and other 64-bit Book3S CPUs before ISA v3.0. The
+	  OpenPOWER ISA does not mandate the hash MMU and some CPUs do not
+	  implement it (e.g., Microwatt).
+
+	  Note that POWER9 PowerVM platforms only support the hash
+	  MMU. From POWER10 radix is also supported by PowerVM.
+
+	  If you're unsure, say Y.
+
+config PPC_RADIX_MMU
+	bool "Radix MMU Support"
+	depends on PPC_BOOK3S_64
+	select ARCH_HAS_GIGANTIC_PAGE
+	default y
+	help
+	  Enable support for the Power ISA 3.0 Radix style MMU. Currently this
+	  is only implemented by IBM Power9 CPUs, if you don't have one of them
+	  you can probably disable this.
+
+config PPC_RADIX_MMU_DEFAULT
+	bool "Default to using the Radix MMU when possible" if PPC_64S_HASH_MMU
+	depends on PPC_BOOK3S_64
+	depends on PPC_RADIX_MMU
+	default y
+	help
+	  When the hardware supports the Radix MMU, default to using it unless
+	  "disable_radix[=yes]" is specified on the kernel command line.
+
+	  If this option is disabled, the Hash MMU will be used by default,
+	  unless "disable_radix=no" is specified on the kernel command line.
+
+	  If you're unsure, say Y.
+
+config PPC_KERNEL_PREFIXED
+	depends on PPC_HAVE_PREFIXED_SUPPORT
+	depends on CC_HAS_PREFIXED
+	default n
+	bool "Build Kernel with Prefixed Instructions"
+	help
+	  POWER10 and later CPUs support prefixed instructions, 8 byte
+	  instructions that include large immediate, pc relative addressing,
+	  and various floating point, vector, MMA.
+
+	  This option builds the kernel with prefixed instructions, and
+	  allows a pc relative addressing option to be selected.
+
+	  Kernel support for prefixed instructions in applications and guests
+	  is not affected by this option.
+
+config PPC_KERNEL_PCREL
+	depends on PPC_HAVE_PCREL_SUPPORT
+	depends on PPC_HAVE_PREFIXED_SUPPORT
+	depends on CC_HAS_PCREL
+	default n
+	select PPC_KERNEL_PREFIXED
+	bool "Build Kernel with PC-Relative addressing model"
+	help
+	  POWER10 and later CPUs support pc relative addressing. Recent
+	  compilers have support for an ELF ABI extension for a pc relative
+	  ABI.
+
+	  This option builds the kernel with the pc relative ABI model.
+
+config PPC_KUEP
+	bool "Kernel Userspace Execution Prevention" if !40x
+	default y if !40x
+	help
+	  Enable support for Kernel Userspace Execution Prevention (KUEP)
+
+	  If you're unsure, say Y.
+
+config PPC_KUAP
+	bool "Kernel Userspace Access Protection"
+	default y
+	help
+	  Enable support for Kernel Userspace Access Protection (KUAP)
+
+	  If you're unsure, say Y.
+
+config PPC_KUAP_DEBUG
+	bool "Extra debugging for Kernel Userspace Access Protection"
+	depends on PPC_KUAP
+	help
+	  Add extra debugging for Kernel Userspace Access Protection (KUAP)
+	  If you're unsure, say N.
+
+config PPC_PKEY
+	def_bool y
+	depends on PPC_BOOK3S_64
+	depends on PPC_MEM_KEYS || PPC_KUAP || PPC_KUEP
+
+
+config PPC_MMU_NOHASH
+	def_bool y
+	depends on !PPC_BOOK3S
+
+config PPC_HAVE_PMU_SUPPORT
+	bool
+
+config PPC_HAVE_PREFIXED_SUPPORT
+	bool
+
+config PPC_HAVE_PCREL_SUPPORT
+	bool
+
+config PMU_SYSFS
+	bool "Create PMU SPRs sysfs file"
+	default n
+	help
+	  This option enables sysfs file creation for PMU SPRs like MMCR* and PMC*.
+
+config PPC_PERF_CTRS
+	def_bool y
+	depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT
+	help
+	 This enables the powerpc-specific perf_event back-end.
+
+config FORCE_SMP
+	# Allow platforms to force SMP=y by selecting this
+	bool
+	select SMP
+
+config SMP
+	depends on PPC_BOOK3S || PPC_E500 || PPC_47x
+	select GENERIC_IRQ_MIGRATION
+	bool "Symmetric multi-processing support" if !FORCE_SMP
+	help
+	  This enables support for systems with more than one CPU. If you have
+	  a system with only one CPU, say N. If you have a system with more
+	  than one CPU, say Y.  Note that the kernel does not currently
+	  support SMP machines with 603/603e/603ev or PPC750 ("G3") processors
+	  since they have inadequate hardware support for multiprocessor
+	  operation.
+
+	  If you say N here, the kernel will run on single and multiprocessor
+	  machines, but will use only one CPU of a multiprocessor machine. If
+	  you say Y here, the kernel will run on single-processor machines.
+	  On a single-processor machine, the kernel will run faster if you say
+	  N here.
+
+	  If you don't know what to do here, say N.
+
+config NR_CPUS
+	int "Maximum number of CPUs (2-8192)" if SMP
+	range 2 8192 if SMP
+	default "1" if !SMP
+	default "32" if PPC64
+	default "4"
+
+config NOT_COHERENT_CACHE
+	bool
+	depends on 4xx || PPC_8xx || PPC_MPC512x || \
+		GAMECUBE_COMMON || AMIGAONE
+	select ARCH_HAS_DMA_PREP_COHERENT
+	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+	select ARCH_HAS_SYNC_DMA_FOR_CPU
+	select DMA_DIRECT_REMAP
+	default n if PPC_47x
+	default y
+
+config CHECK_CACHE_COHERENCY
+	bool
+
+config PPC_DOORBELL
+	bool
+
+endmenu
+
+config VDSO32
+	def_bool y
+	depends on PPC32 || COMPAT
+	help
+	  This symbol controls whether we build the 32-bit VDSO. We obviously
+	  want to do that if we're building a 32-bit kernel. If we're building
+	  a 64-bit kernel then we only want a 32-bit VDSO if we're also enabling
+	  COMPAT.
+
+choice
+	prompt "Endianness selection"
+	default CPU_BIG_ENDIAN
+	help
+	  This option selects whether a big endian or little endian kernel will
+	  be built.
+
+config CPU_BIG_ENDIAN
+	bool "Build big endian kernel"
+	help
+	  Build a big endian kernel.
+
+	  If unsure, select this option.
+
+config CPU_LITTLE_ENDIAN
+	bool "Build little endian kernel"
+	depends on PPC_BOOK3S_64
+	select PPC64_BOOT_WRAPPER
+	help
+	  Build a little endian kernel.
+
+	  Note that if cross compiling a little endian kernel,
+	  CROSS_COMPILE must point to a toolchain capable of targeting
+	  little endian powerpc.
+
+endchoice
+
+config PPC64_ELF_ABI_V1
+	def_bool PPC64 && (CPU_BIG_ENDIAN && !PPC64_BIG_ENDIAN_ELF_ABI_V2)
+
+config PPC64_ELF_ABI_V2
+	def_bool PPC64 && !PPC64_ELF_ABI_V1
+
+config PPC64_BOOT_WRAPPER
+	def_bool n
+	depends on CPU_LITTLE_ENDIAN
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
new file mode 100644
index 0000000000..94470fb27c
--- /dev/null
+++ b/arch/powerpc/platforms/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_FSL_ULI1575)	+= fsl_uli1575.o
+
+obj-$(CONFIG_PPC_PMAC)		+= powermac/
+obj-$(CONFIG_PPC_CHRP)		+= chrp/
+obj-$(CONFIG_4xx)		+= 4xx/
+obj-$(CONFIG_40x)		+= 40x/
+obj-$(CONFIG_44x)		+= 44x/
+obj-$(CONFIG_PPC_MPC512x)	+= 512x/
+obj-$(CONFIG_PPC_MPC52xx)	+= 52xx/
+obj-$(CONFIG_PPC_8xx)		+= 8xx/
+obj-$(CONFIG_PPC_82xx)		+= 82xx/
+obj-$(CONFIG_PPC_83xx)		+= 83xx/
+obj-$(CONFIG_FSL_SOC_BOOKE)	+= 85xx/
+obj-$(CONFIG_PPC_86xx)		+= 86xx/
+obj-$(CONFIG_PPC_POWERNV)	+= powernv/
+obj-$(CONFIG_PPC_PSERIES)	+= pseries/
+obj-$(CONFIG_PPC_MAPLE)		+= maple/
+obj-$(CONFIG_PPC_PASEMI)	+= pasemi/
+obj-$(CONFIG_PPC_CELL)		+= cell/
+obj-$(CONFIG_PPC_PS3)		+= ps3/
+obj-$(CONFIG_EMBEDDED6xx)	+= embedded6xx/
+obj-$(CONFIG_AMIGAONE)		+= amigaone/
+obj-$(CONFIG_PPC_BOOK3S)	+= book3s/
+obj-$(CONFIG_PPC_MICROWATT)	+= microwatt/
diff --git a/arch/powerpc/platforms/amigaone/Kconfig b/arch/powerpc/platforms/amigaone/Kconfig
new file mode 100644
index 0000000000..0741edb10b
--- /dev/null
+++ b/arch/powerpc/platforms/amigaone/Kconfig
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+config AMIGAONE
+	bool "Eyetech AmigaOne/MAI Teron"
+	depends on PPC_BOOK3S_32 && BROKEN_ON_SMP
+	select PPC_I8259
+	select PPC_INDIRECT_PCI
+	select PPC_UDBG_16550
+	select FORCE_PCI
+	select NOT_COHERENT_CACHE
+	select CHECK_CACHE_COHERENCY
+	select DEFAULT_UIMAGE
+	select HAVE_PCSPKR_PLATFORM
+	help
+	Select AmigaOne for the following machines:
+	- AmigaOne SE/Teron CX (G3 only)
+	- AmigaOne XE/Teron PX
+	- uA1/Teron mini
+	  More information is available at:
+	  <http://amigaone-linux.sourceforge.net/>.
diff --git a/arch/powerpc/platforms/amigaone/Makefile b/arch/powerpc/platforms/amigaone/Makefile
new file mode 100644
index 0000000000..e95e4e3e2d
--- /dev/null
+++ b/arch/powerpc/platforms/amigaone/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-y	+= setup.o
diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c
new file mode 100644
index 0000000000..6c6e714a75
--- /dev/null
+++ b/arch/powerpc/platforms/amigaone/setup.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AmigaOne platform setup
+ *
+ * Copyright 2008 Gerhard Pircher (gerhard_pircher@gmx.net)
+ *
+ *   Based on original amigaone_setup.c source code
+ * Copyright 2003 by Hans-Joerg Frieden and Thomas Frieden
+ */
+
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/seq_file.h>
+#include <generated/utsrelease.h>
+
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/pci-bridge.h>
+#include <asm/i8259.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+#include <asm/dma.h>
+
+extern void __flush_disable_L1(void);
+
+void amigaone_show_cpuinfo(struct seq_file *m)
+{
+	seq_printf(m, "vendor\t\t: Eyetech Ltd.\n");
+}
+
+static int __init amigaone_add_bridge(struct device_node *dev)
+{
+	const u32 *cfg_addr, *cfg_data;
+	int len;
+	const int *bus_range;
+	struct pci_controller *hose;
+
+	printk(KERN_INFO "Adding PCI host bridge %pOF\n", dev);
+
+	cfg_addr = of_get_address(dev, 0, NULL, NULL);
+	cfg_data = of_get_address(dev, 1, NULL, NULL);
+	if ((cfg_addr == NULL) || (cfg_data == NULL))
+		return -ENODEV;
+
+	bus_range = of_get_property(dev, "bus-range", &len);
+	if ((bus_range == NULL) || (len < 2 * sizeof(int)))
+		printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+		       " bus 0\n", dev);
+
+	hose = pcibios_alloc_controller(dev);
+	if (hose == NULL)
+		return -ENOMEM;
+
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	setup_indirect_pci(hose, cfg_addr[0], cfg_data[0], 0);
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(hose, dev, 1);
+
+	return 0;
+}
+
+void __init amigaone_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0);
+}
+
+static void __init amigaone_discover_phbs(void)
+{
+	struct device_node *np;
+	int phb = -ENODEV;
+
+	/* Lookup PCI host bridges. */
+	for_each_compatible_node(np, "pci", "mai-logic,articia-s")
+		phb = amigaone_add_bridge(np);
+
+	BUG_ON(phb != 0);
+}
+
+void __init amigaone_init_IRQ(void)
+{
+	struct device_node *pic, *np = NULL;
+	const unsigned long *prop = NULL;
+	unsigned long int_ack = 0;
+
+	/* Search for ISA interrupt controller. */
+	pic = of_find_compatible_node(NULL, "interrupt-controller",
+	                              "pnpPNP,000");
+	BUG_ON(pic == NULL);
+
+	/* Look for interrupt acknowledge address in the PCI root node. */
+	np = of_find_compatible_node(NULL, "pci", "mai-logic,articia-s");
+	if (np) {
+		prop = of_get_property(np, "8259-interrupt-acknowledge", NULL);
+		if (prop)
+			int_ack = prop[0];
+		of_node_put(np);
+	}
+
+	if (int_ack == 0)
+		printk(KERN_WARNING "Cannot find PCI interrupt acknowledge"
+		       " address, polling\n");
+
+	i8259_init(pic, int_ack);
+	ppc_md.get_irq = i8259_irq;
+	irq_set_default_host(i8259_get_host());
+}
+
+static int __init request_isa_regions(void)
+{
+	request_region(0x00, 0x20, "dma1");
+	request_region(0x40, 0x20, "timer");
+	request_region(0x80, 0x10, "dma page reg");
+	request_region(0xc0, 0x20, "dma2");
+
+	return 0;
+}
+machine_device_initcall(amigaone, request_isa_regions);
+
+void __noreturn amigaone_restart(char *cmd)
+{
+	local_irq_disable();
+
+	/* Flush and disable caches. */
+	__flush_disable_L1();
+
+        /* Set SRR0 to the reset vector and turn on MSR_IP. */
+	mtspr(SPRN_SRR0, 0xfff00100);
+	mtspr(SPRN_SRR1, MSR_IP);
+
+	/* Do an rfi to jump back to firmware. */
+	__asm__ __volatile__("rfi" : : : "memory");
+
+	/* Not reached. */
+	while (1);
+}
+
+static int __init amigaone_probe(void)
+{
+	/*
+	 * Coherent memory access cause complete system lockup! Thus
+	 * disable this CPU feature, even if the CPU needs it.
+	 */
+	cur_cpu_spec->cpu_features &= ~CPU_FTR_NEED_COHERENT;
+
+	DMA_MODE_READ = 0x44;
+	DMA_MODE_WRITE = 0x48;
+
+	return 1;
+}
+
+define_machine(amigaone) {
+	.name			= "AmigaOne",
+	.compatible		= "eyetech,amigaone",
+	.probe			= amigaone_probe,
+	.setup_arch		= amigaone_setup_arch,
+	.discover_phbs		= amigaone_discover_phbs,
+	.show_cpuinfo		= amigaone_show_cpuinfo,
+	.init_IRQ		= amigaone_init_IRQ,
+	.restart		= amigaone_restart,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/book3s/Kconfig b/arch/powerpc/platforms/book3s/Kconfig
new file mode 100644
index 0000000000..34c931592e
--- /dev/null
+++ b/arch/powerpc/platforms/book3s/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_VAS
+	bool "IBM Virtual Accelerator Switchboard (VAS)"
+	depends on (PPC_POWERNV || PPC_PSERIES) && PPC_64K_PAGES
+	default y
+	help
+	  This enables support for IBM Virtual Accelerator Switchboard (VAS).
+
+	  VAS devices are found in POWER9-based and later systems, they
+	  provide access to accelerator coprocessors such as NX-GZIP and
+	  NX-842. This config allows the kernel to use NX-842 accelerators,
+	  and user-mode APIs for the NX-GZIP accelerator on POWER9 PowerNV
+	  and POWER10 PowerVM platforms.
+
+	  If unsure, say "N".
diff --git a/arch/powerpc/platforms/book3s/Makefile b/arch/powerpc/platforms/book3s/Makefile
new file mode 100644
index 0000000000..e790f1910f
--- /dev/null
+++ b/arch/powerpc/platforms/book3s/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_PPC_VAS)	+= vas-api.o
diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c
new file mode 100644
index 0000000000..f381b177ea
--- /dev/null
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -0,0 +1,634 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * VAS user space API for its accelerators (Only NX-GZIP is supported now)
+ * Copyright (C) 2019 Haren Myneni, IBM Corp
+ */
+
+#define pr_fmt(fmt)	"vas-api: " fmt
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/kthread.h>
+#include <linux/sched/signal.h>
+#include <linux/mmu_context.h>
+#include <linux/io.h>
+#include <asm/vas.h>
+#include <uapi/asm/vas-api.h>
+
+/*
+ * The driver creates the device node that can be used as follows:
+ * For NX-GZIP
+ *
+ *	fd = open("/dev/crypto/nx-gzip", O_RDWR);
+ *	rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr);
+ *	paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL).
+ *	vas_copy(&crb, 0, 1);
+ *	vas_paste(paste_addr, 0, 1);
+ *	close(fd) or exit process to close window.
+ *
+ * where "vas_copy" and "vas_paste" are defined in copy-paste.h.
+ * copy/paste returns to the user space directly. So refer NX hardware
+ * documentation for exact copy/paste usage and completion / error
+ * conditions.
+ */
+
+/*
+ * Wrapper object for the nx-gzip device - there is just one instance of
+ * this node for the whole system.
+ */
+static struct coproc_dev {
+	struct cdev cdev;
+	struct device *device;
+	char *name;
+	dev_t devt;
+	struct class *class;
+	enum vas_cop_type cop_type;
+	const struct vas_user_win_ops *vops;
+} coproc_device;
+
+struct coproc_instance {
+	struct coproc_dev *coproc;
+	struct vas_window *txwin;
+};
+
+static char *coproc_devnode(const struct device *dev, umode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev));
+}
+
+/*
+ * Take reference to pid and mm
+ */
+int get_vas_user_win_ref(struct vas_user_win_ref *task_ref)
+{
+	/*
+	 * Window opened by a child thread may not be closed when
+	 * it exits. So take reference to its pid and release it
+	 * when the window is free by parent thread.
+	 * Acquire a reference to the task's pid to make sure
+	 * pid will not be re-used - needed only for multithread
+	 * applications.
+	 */
+	task_ref->pid = get_task_pid(current, PIDTYPE_PID);
+	/*
+	 * Acquire a reference to the task's mm.
+	 */
+	task_ref->mm = get_task_mm(current);
+	if (!task_ref->mm) {
+		put_pid(task_ref->pid);
+		pr_err("pid(%d): mm_struct is not found\n",
+				current->pid);
+		return -EPERM;
+	}
+
+	mmgrab(task_ref->mm);
+	mmput(task_ref->mm);
+	/*
+	 * Process closes window during exit. In the case of
+	 * multithread application, the child thread can open
+	 * window and can exit without closing it. So takes tgid
+	 * reference until window closed to make sure tgid is not
+	 * reused.
+	 */
+	task_ref->tgid = find_get_pid(task_tgid_vnr(current));
+
+	return 0;
+}
+
+/*
+ * Successful return must release the task reference with
+ * put_task_struct
+ */
+static bool ref_get_pid_and_task(struct vas_user_win_ref *task_ref,
+			  struct task_struct **tskp, struct pid **pidp)
+{
+	struct task_struct *tsk;
+	struct pid *pid;
+
+	pid = task_ref->pid;
+	tsk = get_pid_task(pid, PIDTYPE_PID);
+	if (!tsk) {
+		pid = task_ref->tgid;
+		tsk = get_pid_task(pid, PIDTYPE_PID);
+		/*
+		 * Parent thread (tgid) will be closing window when it
+		 * exits. So should not get here.
+		 */
+		if (WARN_ON_ONCE(!tsk))
+			return false;
+	}
+
+	/* Return if the task is exiting. */
+	if (tsk->flags & PF_EXITING) {
+		put_task_struct(tsk);
+		return false;
+	}
+
+	*tskp = tsk;
+	*pidp = pid;
+
+	return true;
+}
+
+/*
+ * Update the CSB to indicate a translation error.
+ *
+ * User space will be polling on CSB after the request is issued.
+ * If NX can handle the request without any issues, it updates CSB.
+ * Whereas if NX encounters page fault, the kernel will handle the
+ * fault and update CSB with translation error.
+ *
+ * If we are unable to update the CSB means copy_to_user failed due to
+ * invalid csb_addr, send a signal to the process.
+ */
+void vas_update_csb(struct coprocessor_request_block *crb,
+		    struct vas_user_win_ref *task_ref)
+{
+	struct coprocessor_status_block csb;
+	struct kernel_siginfo info;
+	struct task_struct *tsk;
+	void __user *csb_addr;
+	struct pid *pid;
+	int rc;
+
+	/*
+	 * NX user space windows can not be opened for task->mm=NULL
+	 * and faults will not be generated for kernel requests.
+	 */
+	if (WARN_ON_ONCE(!task_ref->mm))
+		return;
+
+	csb_addr = (void __user *)be64_to_cpu(crb->csb_addr);
+
+	memset(&csb, 0, sizeof(csb));
+	csb.cc = CSB_CC_FAULT_ADDRESS;
+	csb.ce = CSB_CE_TERMINATION;
+	csb.cs = 0;
+	csb.count = 0;
+
+	/*
+	 * NX operates and returns in BE format as defined CRB struct.
+	 * So saves fault_storage_addr in BE as NX pastes in FIFO and
+	 * expects user space to convert to CPU format.
+	 */
+	csb.address = crb->stamp.nx.fault_storage_addr;
+	csb.flags = 0;
+
+	/*
+	 * Process closes send window after all pending NX requests are
+	 * completed. In multi-thread applications, a child thread can
+	 * open a window and can exit without closing it. May be some
+	 * requests are pending or this window can be used by other
+	 * threads later. We should handle faults if NX encounters
+	 * pages faults on these requests. Update CSB with translation
+	 * error and fault address. If csb_addr passed by user space is
+	 * invalid, send SEGV signal to pid saved in window. If the
+	 * child thread is not running, send the signal to tgid.
+	 * Parent thread (tgid) will close this window upon its exit.
+	 *
+	 * pid and mm references are taken when window is opened by
+	 * process (pid). So tgid is used only when child thread opens
+	 * a window and exits without closing it.
+	 */
+
+	if (!ref_get_pid_and_task(task_ref, &tsk, &pid))
+		return;
+
+	kthread_use_mm(task_ref->mm);
+	rc = copy_to_user(csb_addr, &csb, sizeof(csb));
+	/*
+	 * User space polls on csb.flags (first byte). So add barrier
+	 * then copy first byte with csb flags update.
+	 */
+	if (!rc) {
+		csb.flags = CSB_V;
+		/* Make sure update to csb.flags is visible now */
+		smp_mb();
+		rc = copy_to_user(csb_addr, &csb, sizeof(u8));
+	}
+	kthread_unuse_mm(task_ref->mm);
+	put_task_struct(tsk);
+
+	/* Success */
+	if (!rc)
+		return;
+
+
+	pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n",
+			csb_addr, pid_vnr(pid));
+
+	clear_siginfo(&info);
+	info.si_signo = SIGSEGV;
+	info.si_errno = EFAULT;
+	info.si_code = SEGV_MAPERR;
+	info.si_addr = csb_addr;
+	/*
+	 * process will be polling on csb.flags after request is sent to
+	 * NX. So generally CSB update should not fail except when an
+	 * application passes invalid csb_addr. So an error message will
+	 * be displayed and leave it to user space whether to ignore or
+	 * handle this signal.
+	 */
+	rcu_read_lock();
+	rc = kill_pid_info(SIGSEGV, &info, pid);
+	rcu_read_unlock();
+
+	pr_devel("pid %d kill_proc_info() rc %d\n", pid_vnr(pid), rc);
+}
+
+void vas_dump_crb(struct coprocessor_request_block *crb)
+{
+	struct data_descriptor_entry *dde;
+	struct nx_fault_stamp *nx;
+
+	dde = &crb->source;
+	pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
+		be64_to_cpu(dde->address), be32_to_cpu(dde->length),
+		dde->count, dde->index, dde->flags);
+
+	dde = &crb->target;
+	pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
+		be64_to_cpu(dde->address), be32_to_cpu(dde->length),
+		dde->count, dde->index, dde->flags);
+
+	nx = &crb->stamp.nx;
+	pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n",
+		be32_to_cpu(nx->pswid),
+		be64_to_cpu(crb->stamp.nx.fault_storage_addr),
+		nx->flags, nx->fault_status);
+}
+
+static int coproc_open(struct inode *inode, struct file *fp)
+{
+	struct coproc_instance *cp_inst;
+
+	cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL);
+	if (!cp_inst)
+		return -ENOMEM;
+
+	cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev,
+					cdev);
+	fp->private_data = cp_inst;
+
+	return 0;
+}
+
+static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg)
+{
+	void __user *uptr = (void __user *)arg;
+	struct vas_tx_win_open_attr uattr;
+	struct coproc_instance *cp_inst;
+	struct vas_window *txwin;
+	int rc;
+
+	cp_inst = fp->private_data;
+
+	/*
+	 * One window for file descriptor
+	 */
+	if (cp_inst->txwin)
+		return -EEXIST;
+
+	rc = copy_from_user(&uattr, uptr, sizeof(uattr));
+	if (rc) {
+		pr_err("copy_from_user() returns %d\n", rc);
+		return -EFAULT;
+	}
+
+	if (uattr.version != 1) {
+		pr_err("Invalid window open API version\n");
+		return -EINVAL;
+	}
+
+	if (!cp_inst->coproc->vops || !cp_inst->coproc->vops->open_win) {
+		pr_err("VAS API is not registered\n");
+		return -EACCES;
+	}
+
+	txwin = cp_inst->coproc->vops->open_win(uattr.vas_id, uattr.flags,
+						cp_inst->coproc->cop_type);
+	if (IS_ERR(txwin)) {
+		pr_err_ratelimited("VAS window open failed rc=%ld\n",
+				PTR_ERR(txwin));
+		return PTR_ERR(txwin);
+	}
+
+	mutex_init(&txwin->task_ref.mmap_mutex);
+	cp_inst->txwin = txwin;
+
+	return 0;
+}
+
+static int coproc_release(struct inode *inode, struct file *fp)
+{
+	struct coproc_instance *cp_inst = fp->private_data;
+	int rc;
+
+	if (cp_inst->txwin) {
+		if (cp_inst->coproc->vops &&
+			cp_inst->coproc->vops->close_win) {
+			rc = cp_inst->coproc->vops->close_win(cp_inst->txwin);
+			if (rc)
+				return rc;
+		}
+		cp_inst->txwin = NULL;
+	}
+
+	kfree(cp_inst);
+	fp->private_data = NULL;
+
+	/*
+	 * We don't know here if user has other receive windows
+	 * open, so we can't really call clear_thread_tidr().
+	 * So, once the process calls set_thread_tidr(), the
+	 * TIDR value sticks around until process exits, resulting
+	 * in an extra copy in restore_sprs().
+	 */
+
+	return 0;
+}
+
+/*
+ * If the executed instruction that caused the fault was a paste, then
+ * clear regs CR0[EQ], advance NIP, and return 0. Else return error code.
+ */
+static int do_fail_paste(void)
+{
+	struct pt_regs *regs = current->thread.regs;
+	u32 instword;
+
+	if (WARN_ON_ONCE(!regs))
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(!user_mode(regs)))
+		return -EINVAL;
+
+	/*
+	 * If we couldn't translate the instruction, the driver should
+	 * return success without handling the fault, it will be retried
+	 * or the instruction fetch will fault.
+	 */
+	if (get_user(instword, (u32 __user *)(regs->nip)))
+		return -EAGAIN;
+
+	/*
+	 * Not a paste instruction, driver may fail the fault.
+	 */
+	if ((instword & PPC_INST_PASTE_MASK) != PPC_INST_PASTE)
+		return -ENOENT;
+
+	regs->ccr &= ~0xe0000000;	/* Clear CR0[0-2] to fail paste */
+	regs_add_return_ip(regs, 4);	/* Emulate the paste */
+
+	return 0;
+}
+
+/*
+ * This fault handler is invoked when the core generates page fault on
+ * the paste address. Happens if the kernel closes window in hypervisor
+ * (on pseries) due to lost credit or the paste address is not mapped.
+ */
+static vm_fault_t vas_mmap_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct file *fp = vma->vm_file;
+	struct coproc_instance *cp_inst = fp->private_data;
+	struct vas_window *txwin;
+	vm_fault_t fault;
+	u64 paste_addr;
+	int ret;
+
+	/*
+	 * window is not opened. Shouldn't expect this error.
+	 */
+	if (!cp_inst || !cp_inst->txwin) {
+		pr_err("Unexpected fault on paste address with TX window closed\n");
+		return VM_FAULT_SIGBUS;
+	}
+
+	txwin = cp_inst->txwin;
+	/*
+	 * When the LPAR lost credits due to core removal or during
+	 * migration, invalidate the existing mapping for the current
+	 * paste addresses and set windows in-active (zap_vma_pages in
+	 * reconfig_close_windows()).
+	 * New mapping will be done later after migration or new credits
+	 * available. So continue to receive faults if the user space
+	 * issue NX request.
+	 */
+	if (txwin->task_ref.vma != vmf->vma) {
+		pr_err("No previous mapping with paste address\n");
+		return VM_FAULT_SIGBUS;
+	}
+
+	mutex_lock(&txwin->task_ref.mmap_mutex);
+	/*
+	 * The window may be inactive due to lost credit (Ex: core
+	 * removal with DLPAR). If the window is active again when
+	 * the credit is available, map the new paste address at the
+	 * window virtual address.
+	 */
+	if (txwin->status == VAS_WIN_ACTIVE) {
+		paste_addr = cp_inst->coproc->vops->paste_addr(txwin);
+		if (paste_addr) {
+			fault = vmf_insert_pfn(vma, vma->vm_start,
+					(paste_addr >> PAGE_SHIFT));
+			mutex_unlock(&txwin->task_ref.mmap_mutex);
+			return fault;
+		}
+	}
+	mutex_unlock(&txwin->task_ref.mmap_mutex);
+
+	/*
+	 * Received this fault due to closing the actual window.
+	 * It can happen during migration or lost credits.
+	 * Since no mapping, return the paste instruction failure
+	 * to the user space.
+	 */
+	ret = do_fail_paste();
+	/*
+	 * The user space can retry several times until success (needed
+	 * for migration) or should fallback to SW compression or
+	 * manage with the existing open windows if available.
+	 * Looking at sysfs interface, it can determine whether these
+	 * failures are coming during migration or core removal:
+	 * nr_used_credits > nr_total_credits when lost credits
+	 */
+	if (!ret || (ret == -EAGAIN))
+		return VM_FAULT_NOPAGE;
+
+	return VM_FAULT_SIGBUS;
+}
+
+static const struct vm_operations_struct vas_vm_ops = {
+	.fault = vas_mmap_fault,
+};
+
+static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
+{
+	struct coproc_instance *cp_inst = fp->private_data;
+	struct vas_window *txwin;
+	unsigned long pfn;
+	u64 paste_addr;
+	pgprot_t prot;
+	int rc;
+
+	txwin = cp_inst->txwin;
+
+	if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
+		pr_debug("size 0x%zx, PAGE_SIZE 0x%zx\n",
+				(vma->vm_end - vma->vm_start), PAGE_SIZE);
+		return -EINVAL;
+	}
+
+	/* Ensure instance has an open send window */
+	if (!txwin) {
+		pr_err("No send window open?\n");
+		return -EINVAL;
+	}
+
+	if (!cp_inst->coproc->vops || !cp_inst->coproc->vops->paste_addr) {
+		pr_err("VAS API is not registered\n");
+		return -EACCES;
+	}
+
+	/*
+	 * The initial mmap is done after the window is opened
+	 * with ioctl. But before mmap(), this window can be closed in
+	 * the hypervisor due to lost credit (core removal on pseries).
+	 * So if the window is not active, return mmap() failure with
+	 * -EACCES and expects the user space reissue mmap() when it
+	 * is active again or open new window when the credit is available.
+	 * mmap_mutex protects the paste address mmap() with DLPAR
+	 * close/open event and allows mmap() only when the window is
+	 * active.
+	 */
+	mutex_lock(&txwin->task_ref.mmap_mutex);
+	if (txwin->status != VAS_WIN_ACTIVE) {
+		pr_err("Window is not active\n");
+		rc = -EACCES;
+		goto out;
+	}
+
+	paste_addr = cp_inst->coproc->vops->paste_addr(txwin);
+	if (!paste_addr) {
+		pr_err("Window paste address failed\n");
+		rc = -EINVAL;
+		goto out;
+	}
+
+	pfn = paste_addr >> PAGE_SHIFT;
+
+	/* flags, page_prot from cxl_mmap(), except we want cachable */
+	vm_flags_set(vma, VM_IO | VM_PFNMAP);
+	vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
+
+	prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY);
+
+	rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
+			vma->vm_end - vma->vm_start, prot);
+
+	pr_devel("paste addr %llx at %lx, rc %d\n", paste_addr,
+			vma->vm_start, rc);
+
+	txwin->task_ref.vma = vma;
+	vma->vm_ops = &vas_vm_ops;
+
+out:
+	mutex_unlock(&txwin->task_ref.mmap_mutex);
+	return rc;
+}
+
+static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case VAS_TX_WIN_OPEN:
+		return coproc_ioc_tx_win_open(fp, arg);
+	default:
+		return -EINVAL;
+	}
+}
+
+static struct file_operations coproc_fops = {
+	.open = coproc_open,
+	.release = coproc_release,
+	.mmap = coproc_mmap,
+	.unlocked_ioctl = coproc_ioctl,
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type,
+			    const char *name,
+			    const struct vas_user_win_ops *vops)
+{
+	int rc = -EINVAL;
+	dev_t devno;
+
+	rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name);
+	if (rc) {
+		pr_err("Unable to allocate coproc major number: %i\n", rc);
+		return rc;
+	}
+
+	pr_devel("%s device allocated, dev [%i,%i]\n", name,
+			MAJOR(coproc_device.devt), MINOR(coproc_device.devt));
+
+	coproc_device.class = class_create(name);
+	if (IS_ERR(coproc_device.class)) {
+		rc = PTR_ERR(coproc_device.class);
+		pr_err("Unable to create %s class %d\n", name, rc);
+		goto err_class;
+	}
+	coproc_device.class->devnode = coproc_devnode;
+	coproc_device.cop_type = cop_type;
+	coproc_device.vops = vops;
+
+	coproc_fops.owner = mod;
+	cdev_init(&coproc_device.cdev, &coproc_fops);
+
+	devno = MKDEV(MAJOR(coproc_device.devt), 0);
+	rc = cdev_add(&coproc_device.cdev, devno, 1);
+	if (rc) {
+		pr_err("cdev_add() failed %d\n", rc);
+		goto err_cdev;
+	}
+
+	coproc_device.device = device_create(coproc_device.class, NULL,
+			devno, NULL, name, MINOR(devno));
+	if (IS_ERR(coproc_device.device)) {
+		rc = PTR_ERR(coproc_device.device);
+		pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc);
+		goto err;
+	}
+
+	pr_devel("Added dev [%d,%d]\n", MAJOR(devno), MINOR(devno));
+
+	return 0;
+
+err:
+	cdev_del(&coproc_device.cdev);
+err_cdev:
+	class_destroy(coproc_device.class);
+err_class:
+	unregister_chrdev_region(coproc_device.devt, 1);
+	return rc;
+}
+
+void vas_unregister_coproc_api(void)
+{
+	dev_t devno;
+
+	cdev_del(&coproc_device.cdev);
+	devno = MKDEV(MAJOR(coproc_device.devt), 0);
+	device_destroy(coproc_device.class, devno);
+
+	class_destroy(coproc_device.class);
+	unregister_chrdev_region(coproc_device.devt, 1);
+}
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
new file mode 100644
index 0000000000..34669b060f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -0,0 +1,104 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_CELL
+	select PPC_64S_HASH_MMU if PPC64
+	bool
+
+config PPC_CELL_COMMON
+	bool
+	select PPC_CELL
+	select PPC_DCR_MMIO
+	select PPC_INDIRECT_PIO
+	select PPC_INDIRECT_MMIO
+	select PPC_HASH_MMU_NATIVE
+	select PPC_RTAS
+	select IRQ_EDGE_EOI_HANDLER
+
+config PPC_CELL_NATIVE
+	bool
+	select PPC_CELL_COMMON
+	select MPIC
+	select PPC_IO_WORKAROUNDS
+	select IBM_EMAC_EMAC4 if IBM_EMAC
+	select IBM_EMAC_RGMII if IBM_EMAC
+	select IBM_EMAC_ZMII if IBM_EMAC #test only
+	select IBM_EMAC_TAH if IBM_EMAC  #test only
+
+config PPC_IBM_CELL_BLADE
+	bool "IBM Cell Blade"
+	depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN
+	select PPC_CELL_NATIVE
+	select PPC_OF_PLATFORM_PCI
+	select FORCE_PCI
+	select MMIO_NVRAM
+	select PPC_UDBG_16550
+	select UDBG_RTAS_CONSOLE
+
+config AXON_MSI
+	bool
+	depends on PPC_IBM_CELL_BLADE && PCI_MSI
+	select IRQ_DOMAIN_NOMAP
+	default y
+
+menu "Cell Broadband Engine options"
+	depends on PPC_CELL
+
+config SPU_FS
+	tristate "SPU file system"
+	default m
+	depends on PPC_CELL
+	depends on COREDUMP
+	select SPU_BASE
+	help
+	  The SPU file system is used to access Synergistic Processing
+	  Units on machines implementing the Broadband Processor
+	  Architecture.
+
+config SPU_BASE
+	bool
+	select PPC_COPRO_BASE
+
+config CBE_RAS
+	bool "RAS features for bare metal Cell BE"
+	depends on PPC_CELL_NATIVE
+	default y
+
+config PPC_IBM_CELL_RESETBUTTON
+	bool "IBM Cell Blade Pinhole reset button"
+	depends on CBE_RAS && PPC_IBM_CELL_BLADE
+	default y
+	help
+	  Support Pinhole Resetbutton on IBM Cell blades.
+	  This adds a method to trigger system reset via front panel pinhole button.
+
+config PPC_IBM_CELL_POWERBUTTON
+	tristate "IBM Cell Blade power button"
+	depends on PPC_IBM_CELL_BLADE && INPUT_EVDEV
+	default y
+	help
+	  Support Powerbutton on IBM Cell blades.
+	  This will enable the powerbutton as an input device.
+
+config CBE_THERM
+	tristate "CBE thermal support"
+	default m
+	depends on CBE_RAS && SPU_BASE
+
+config PPC_PMI
+	tristate
+	default y
+	depends on CPU_FREQ_CBE_PMI || PPC_IBM_CELL_POWERBUTTON
+	help
+	  PMI (Platform Management Interrupt) is a way to
+	  communicate with the BMC (Baseboard Management Controller).
+	  It is used in some IBM Cell blades.
+
+config CBE_CPUFREQ_SPU_GOVERNOR
+	tristate "CBE frequency scaling based on SPU usage"
+	depends on SPU_FS && CPU_FREQ
+	default m
+	help
+	  This governor checks for spu usage to adjust the cpu frequency.
+	  If no spu is running on a given cpu, that cpu will be throttled to
+	  the minimal possible frequency.
+
+endmenu
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
new file mode 100644
index 0000000000..7ea6692f67
--- /dev/null
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_PPC_CELL_COMMON)		+= cbe_regs.o interrupt.o pervasive.o
+
+obj-$(CONFIG_PPC_CELL_NATIVE)		+= iommu.o setup.o spider-pic.o \
+					   pmu.o spider-pci.o
+obj-$(CONFIG_CBE_RAS)			+= ras.o
+
+obj-$(CONFIG_CBE_THERM)			+= cbe_thermal.o
+obj-$(CONFIG_CBE_CPUFREQ_SPU_GOVERNOR)	+= cpufreq_spudemand.o
+
+obj-$(CONFIG_PPC_IBM_CELL_POWERBUTTON)	+= cbe_powerbutton.o
+
+ifdef CONFIG_SMP
+obj-$(CONFIG_PPC_CELL_NATIVE)		+= smp.o
+endif
+
+# needed only when building loadable spufs.ko
+spu-priv1-$(CONFIG_PPC_CELL_COMMON)	+= spu_priv1_mmio.o
+spu-manage-$(CONFIG_PPC_CELL_COMMON)	+= spu_manage.o
+
+obj-$(CONFIG_SPU_BASE)			+= spu_callbacks.o spu_base.o \
+					   spu_syscalls.o \
+					   $(spu-priv1-y) \
+					   $(spu-manage-y) \
+					   spufs/
+
+obj-$(CONFIG_AXON_MSI)			+= axon_msi.o
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
new file mode 100644
index 0000000000..28dc86744c
--- /dev/null
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2007, Michael Ellerman, IBM Corporation.
+ */
+
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+
+#include <asm/dcr.h>
+#include <asm/machdep.h>
+
+#include "cell.h"
+
+/*
+ * MSIC registers, specified as offsets from dcr_base
+ */
+#define MSIC_CTRL_REG	0x0
+
+/* Base Address registers specify FIFO location in BE memory */
+#define MSIC_BASE_ADDR_HI_REG	0x3
+#define MSIC_BASE_ADDR_LO_REG	0x4
+
+/* Hold the read/write offsets into the FIFO */
+#define MSIC_READ_OFFSET_REG	0x5
+#define MSIC_WRITE_OFFSET_REG	0x6
+
+
+/* MSIC control register flags */
+#define MSIC_CTRL_ENABLE		0x0001
+#define MSIC_CTRL_FIFO_FULL_ENABLE	0x0002
+#define MSIC_CTRL_IRQ_ENABLE		0x0008
+#define MSIC_CTRL_FULL_STOP_ENABLE	0x0010
+
+/*
+ * The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB.
+ * Currently we're using a 64KB FIFO size.
+ */
+#define MSIC_FIFO_SIZE_SHIFT	16
+#define MSIC_FIFO_SIZE_BYTES	(1 << MSIC_FIFO_SIZE_SHIFT)
+
+/*
+ * To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits
+ * 8-9 of the MSIC control reg.
+ */
+#define MSIC_CTRL_FIFO_SIZE	(((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300)
+
+/*
+ * We need to mask the read/write offsets to make sure they stay within
+ * the bounds of the FIFO. Also they should always be 16-byte aligned.
+ */
+#define MSIC_FIFO_SIZE_MASK	((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu)
+
+/* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */
+#define MSIC_FIFO_ENTRY_SIZE	0x10
+
+
+struct axon_msic {
+	struct irq_domain *irq_domain;
+	__le32 *fifo_virt;
+	dma_addr_t fifo_phys;
+	dcr_host_t dcr_host;
+	u32 read_offset;
+#ifdef DEBUG
+	u32 __iomem *trigger;
+#endif
+};
+
+#ifdef DEBUG
+void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic);
+#else
+static inline void axon_msi_debug_setup(struct device_node *dn,
+					struct axon_msic *msic) { }
+#endif
+
+
+static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val)
+{
+	pr_devel("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n);
+
+	dcr_write(msic->dcr_host, dcr_n, val);
+}
+
+static void axon_msi_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct axon_msic *msic = irq_desc_get_handler_data(desc);
+	u32 write_offset, msi;
+	int idx;
+	int retry = 0;
+
+	write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG);
+	pr_devel("axon_msi: original write_offset 0x%x\n", write_offset);
+
+	/* write_offset doesn't wrap properly, so we have to mask it */
+	write_offset &= MSIC_FIFO_SIZE_MASK;
+
+	while (msic->read_offset != write_offset && retry < 100) {
+		idx  = msic->read_offset / sizeof(__le32);
+		msi  = le32_to_cpu(msic->fifo_virt[idx]);
+		msi &= 0xFFFF;
+
+		pr_devel("axon_msi: woff %x roff %x msi %x\n",
+			  write_offset, msic->read_offset, msi);
+
+		if (msi < nr_irqs && irq_get_chip_data(msi) == msic) {
+			generic_handle_irq(msi);
+			msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
+		} else {
+			/*
+			 * Reading the MSIC_WRITE_OFFSET_REG does not
+			 * reliably flush the outstanding DMA to the
+			 * FIFO buffer. Here we were reading stale
+			 * data, so we need to retry.
+			 */
+			udelay(1);
+			retry++;
+			pr_devel("axon_msi: invalid irq 0x%x!\n", msi);
+			continue;
+		}
+
+		if (retry) {
+			pr_devel("axon_msi: late irq 0x%x, retry %d\n",
+				 msi, retry);
+			retry = 0;
+		}
+
+		msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
+		msic->read_offset &= MSIC_FIFO_SIZE_MASK;
+	}
+
+	if (retry) {
+		printk(KERN_WARNING "axon_msi: irq timed out\n");
+
+		msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
+		msic->read_offset &= MSIC_FIFO_SIZE_MASK;
+	}
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+static struct axon_msic *find_msi_translator(struct pci_dev *dev)
+{
+	struct irq_domain *irq_domain;
+	struct device_node *dn, *tmp;
+	const phandle *ph;
+	struct axon_msic *msic = NULL;
+
+	dn = of_node_get(pci_device_to_OF_node(dev));
+	if (!dn) {
+		dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
+		return NULL;
+	}
+
+	for (; dn; dn = of_get_next_parent(dn)) {
+		ph = of_get_property(dn, "msi-translator", NULL);
+		if (ph)
+			break;
+	}
+
+	if (!ph) {
+		dev_dbg(&dev->dev,
+			"axon_msi: no msi-translator property found\n");
+		goto out_error;
+	}
+
+	tmp = dn;
+	dn = of_find_node_by_phandle(*ph);
+	of_node_put(tmp);
+	if (!dn) {
+		dev_dbg(&dev->dev,
+			"axon_msi: msi-translator doesn't point to a node\n");
+		goto out_error;
+	}
+
+	irq_domain = irq_find_host(dn);
+	if (!irq_domain) {
+		dev_dbg(&dev->dev, "axon_msi: no irq_domain found for node %pOF\n",
+			dn);
+		goto out_error;
+	}
+
+	msic = irq_domain->host_data;
+
+out_error:
+	of_node_put(dn);
+
+	return msic;
+}
+
+static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
+{
+	struct device_node *dn;
+	int len;
+	const u32 *prop;
+
+	dn = of_node_get(pci_device_to_OF_node(dev));
+	if (!dn) {
+		dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
+		return -ENODEV;
+	}
+
+	for (; dn; dn = of_get_next_parent(dn)) {
+		if (!dev->no_64bit_msi) {
+			prop = of_get_property(dn, "msi-address-64", &len);
+			if (prop)
+				break;
+		}
+
+		prop = of_get_property(dn, "msi-address-32", &len);
+		if (prop)
+			break;
+	}
+
+	if (!prop) {
+		dev_dbg(&dev->dev,
+			"axon_msi: no msi-address-(32|64) properties found\n");
+		of_node_put(dn);
+		return -ENOENT;
+	}
+
+	switch (len) {
+	case 8:
+		msg->address_hi = prop[0];
+		msg->address_lo = prop[1];
+		break;
+	case 4:
+		msg->address_hi = 0;
+		msg->address_lo = prop[0];
+		break;
+	default:
+		dev_dbg(&dev->dev,
+			"axon_msi: malformed msi-address-(32|64) property\n");
+		of_node_put(dn);
+		return -EINVAL;
+	}
+
+	of_node_put(dn);
+
+	return 0;
+}
+
+static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	unsigned int virq, rc;
+	struct msi_desc *entry;
+	struct msi_msg msg;
+	struct axon_msic *msic;
+
+	msic = find_msi_translator(dev);
+	if (!msic)
+		return -ENODEV;
+
+	rc = setup_msi_msg_address(dev, &msg);
+	if (rc)
+		return rc;
+
+	msi_for_each_desc(entry, &dev->dev, MSI_DESC_NOTASSOCIATED) {
+		virq = irq_create_direct_mapping(msic->irq_domain);
+		if (!virq) {
+			dev_warn(&dev->dev,
+				 "axon_msi: virq allocation failed!\n");
+			return -1;
+		}
+		dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq);
+
+		irq_set_msi_desc(virq, entry);
+		msg.data = virq;
+		pci_write_msi_msg(virq, &msg);
+	}
+
+	return 0;
+}
+
+static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
+{
+	struct msi_desc *entry;
+
+	dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n");
+
+	msi_for_each_desc(entry, &dev->dev, MSI_DESC_ASSOCIATED) {
+		irq_set_msi_desc(entry->irq, NULL);
+		irq_dispose_mapping(entry->irq);
+		entry->irq = 0;
+	}
+}
+
+static struct irq_chip msic_irq_chip = {
+	.irq_mask	= pci_msi_mask_irq,
+	.irq_unmask	= pci_msi_unmask_irq,
+	.irq_shutdown	= pci_msi_mask_irq,
+	.name		= "AXON-MSI",
+};
+
+static int msic_host_map(struct irq_domain *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	irq_set_chip_data(virq, h->host_data);
+	irq_set_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops msic_host_ops = {
+	.map	= msic_host_map,
+};
+
+static void axon_msi_shutdown(struct platform_device *device)
+{
+	struct axon_msic *msic = dev_get_drvdata(&device->dev);
+	u32 tmp;
+
+	pr_devel("axon_msi: disabling %pOF\n",
+		 irq_domain_get_of_node(msic->irq_domain));
+	tmp  = dcr_read(msic->dcr_host, MSIC_CTRL_REG);
+	tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
+	msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
+}
+
+static int axon_msi_probe(struct platform_device *device)
+{
+	struct device_node *dn = device->dev.of_node;
+	struct axon_msic *msic;
+	unsigned int virq;
+	int dcr_base, dcr_len;
+
+	pr_devel("axon_msi: setting up dn %pOF\n", dn);
+
+	msic = kzalloc(sizeof(*msic), GFP_KERNEL);
+	if (!msic) {
+		printk(KERN_ERR "axon_msi: couldn't allocate msic for %pOF\n",
+		       dn);
+		goto out;
+	}
+
+	dcr_base = dcr_resource_start(dn, 0);
+	dcr_len = dcr_resource_len(dn, 0);
+
+	if (dcr_base == 0 || dcr_len == 0) {
+		printk(KERN_ERR
+		       "axon_msi: couldn't parse dcr properties on %pOF\n",
+			dn);
+		goto out_free_msic;
+	}
+
+	msic->dcr_host = dcr_map(dn, dcr_base, dcr_len);
+	if (!DCR_MAP_OK(msic->dcr_host)) {
+		printk(KERN_ERR "axon_msi: dcr_map failed for %pOF\n",
+		       dn);
+		goto out_free_msic;
+	}
+
+	msic->fifo_virt = dma_alloc_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES,
+					     &msic->fifo_phys, GFP_KERNEL);
+	if (!msic->fifo_virt) {
+		printk(KERN_ERR "axon_msi: couldn't allocate fifo for %pOF\n",
+		       dn);
+		goto out_free_msic;
+	}
+
+	virq = irq_of_parse_and_map(dn, 0);
+	if (!virq) {
+		printk(KERN_ERR "axon_msi: irq parse and map failed for %pOF\n",
+		       dn);
+		goto out_free_fifo;
+	}
+	memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES);
+
+	/* We rely on being able to stash a virq in a u16, so limit irqs to < 65536 */
+	msic->irq_domain = irq_domain_add_nomap(dn, 65536, &msic_host_ops, msic);
+	if (!msic->irq_domain) {
+		printk(KERN_ERR "axon_msi: couldn't allocate irq_domain for %pOF\n",
+		       dn);
+		goto out_free_fifo;
+	}
+
+	irq_set_handler_data(virq, msic);
+	irq_set_chained_handler(virq, axon_msi_cascade);
+	pr_devel("axon_msi: irq 0x%x setup for axon_msi\n", virq);
+
+	/* Enable the MSIC hardware */
+	msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, msic->fifo_phys >> 32);
+	msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG,
+				  msic->fifo_phys & 0xFFFFFFFF);
+	msic_dcr_write(msic, MSIC_CTRL_REG,
+			MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE |
+			MSIC_CTRL_FIFO_SIZE);
+
+	msic->read_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG)
+				& MSIC_FIFO_SIZE_MASK;
+
+	dev_set_drvdata(&device->dev, msic);
+
+	cell_pci_controller_ops.setup_msi_irqs = axon_msi_setup_msi_irqs;
+	cell_pci_controller_ops.teardown_msi_irqs = axon_msi_teardown_msi_irqs;
+
+	axon_msi_debug_setup(dn, msic);
+
+	printk(KERN_DEBUG "axon_msi: setup MSIC on %pOF\n", dn);
+
+	return 0;
+
+out_free_fifo:
+	dma_free_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES, msic->fifo_virt,
+			  msic->fifo_phys);
+out_free_msic:
+	kfree(msic);
+out:
+
+	return -1;
+}
+
+static const struct of_device_id axon_msi_device_id[] = {
+	{
+		.compatible	= "ibm,axon-msic"
+	},
+	{}
+};
+
+static struct platform_driver axon_msi_driver = {
+	.probe		= axon_msi_probe,
+	.shutdown	= axon_msi_shutdown,
+	.driver = {
+		.name = "axon-msi",
+		.of_match_table = axon_msi_device_id,
+	},
+};
+
+static int __init axon_msi_init(void)
+{
+	return platform_driver_register(&axon_msi_driver);
+}
+subsys_initcall(axon_msi_init);
+
+
+#ifdef DEBUG
+static int msic_set(void *data, u64 val)
+{
+	struct axon_msic *msic = data;
+	out_le32(msic->trigger, val);
+	return 0;
+}
+
+static int msic_get(void *data, u64 *val)
+{
+	*val = 0;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_msic, msic_get, msic_set, "%llu\n");
+
+void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic)
+{
+	char name[8];
+	struct resource res;
+
+	if (of_address_to_resource(dn, 0, &res)) {
+		pr_devel("axon_msi: couldn't get reg property\n");
+		return;
+	}
+
+	msic->trigger = ioremap(res.start, 0x4);
+	if (!msic->trigger) {
+		pr_devel("axon_msi: ioremap failed\n");
+		return;
+	}
+
+	snprintf(name, sizeof(name), "msic_%d", of_node_to_nid(dn));
+
+	debugfs_create_file(name, 0600, arch_debugfs_dir, msic, &fops_msic);
+}
+#endif /* DEBUG */
diff --git a/arch/powerpc/platforms/cell/cbe_powerbutton.c b/arch/powerpc/platforms/cell/cbe_powerbutton.c
new file mode 100644
index 0000000000..a3ee397486
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_powerbutton.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * driver for powerbutton on IBM cell blades
+ *
+ * (C) Copyright IBM Corp. 2005-2008
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ */
+
+#include <linux/input.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <asm/pmi.h>
+
+static struct input_dev *button_dev;
+static struct platform_device *button_pdev;
+
+static void cbe_powerbutton_handle_pmi(pmi_message_t pmi_msg)
+{
+	BUG_ON(pmi_msg.type != PMI_TYPE_POWER_BUTTON);
+
+	input_report_key(button_dev, KEY_POWER, 1);
+	input_sync(button_dev);
+	input_report_key(button_dev, KEY_POWER, 0);
+	input_sync(button_dev);
+}
+
+static struct pmi_handler cbe_pmi_handler = {
+	.type			= PMI_TYPE_POWER_BUTTON,
+	.handle_pmi_message	= cbe_powerbutton_handle_pmi,
+};
+
+static int __init cbe_powerbutton_init(void)
+{
+	int ret = 0;
+	struct input_dev *dev;
+
+	if (!of_machine_is_compatible("IBM,CBPLUS-1.0")) {
+		printk(KERN_ERR "%s: Not a cell blade.\n", __func__);
+		ret = -ENODEV;
+		goto out;
+	}
+
+	dev = input_allocate_device();
+	if (!dev) {
+		ret = -ENOMEM;
+		printk(KERN_ERR "%s: Not enough memory.\n", __func__);
+		goto out;
+	}
+
+	set_bit(EV_KEY, dev->evbit);
+	set_bit(KEY_POWER, dev->keybit);
+
+	dev->name = "Power Button";
+	dev->id.bustype = BUS_HOST;
+
+	/* this makes the button look like an acpi power button
+	 * no clue whether anyone relies on that though */
+	dev->id.product = 0x02;
+	dev->phys = "LNXPWRBN/button/input0";
+
+	button_pdev = platform_device_register_simple("power_button", 0, NULL, 0);
+	if (IS_ERR(button_pdev)) {
+		ret = PTR_ERR(button_pdev);
+		goto out_free_input;
+	}
+
+	dev->dev.parent = &button_pdev->dev;
+	ret = input_register_device(dev);
+	if (ret) {
+		printk(KERN_ERR "%s: Failed to register device\n", __func__);
+		goto out_free_pdev;
+	}
+
+	button_dev = dev;
+
+	ret = pmi_register_handler(&cbe_pmi_handler);
+	if (ret) {
+		printk(KERN_ERR "%s: Failed to register with pmi.\n", __func__);
+		goto out_free_pdev;
+	}
+
+	goto out;
+
+out_free_pdev:
+	platform_device_unregister(button_pdev);
+out_free_input:
+	input_free_device(dev);
+out:
+	return ret;
+}
+
+static void __exit cbe_powerbutton_exit(void)
+{
+	pmi_unregister_handler(&cbe_pmi_handler);
+	platform_device_unregister(button_pdev);
+	input_free_device(button_dev);
+}
+
+module_init(cbe_powerbutton_init);
+module_exit(cbe_powerbutton_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
new file mode 100644
index 0000000000..99b3558753
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * cbe_regs.c
+ *
+ * Accessor routines for the various MMIO register blocks of the CBE
+ *
+ * (c) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
+ */
+
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/pgtable.h>
+
+#include <asm/io.h>
+#include <asm/ptrace.h>
+#include <asm/cell-regs.h>
+
+/*
+ * Current implementation uses "cpu" nodes. We build our own mapping
+ * array of cpu numbers to cpu nodes locally for now to allow interrupt
+ * time code to have a fast path rather than call of_get_cpu_node(). If
+ * we implement cpu hotplug, we'll have to install an appropriate notifier
+ * in order to release references to the cpu going away
+ */
+static struct cbe_regs_map
+{
+	struct device_node *cpu_node;
+	struct device_node *be_node;
+	struct cbe_pmd_regs __iomem *pmd_regs;
+	struct cbe_iic_regs __iomem *iic_regs;
+	struct cbe_mic_tm_regs __iomem *mic_tm_regs;
+	struct cbe_pmd_shadow_regs pmd_shadow_regs;
+} cbe_regs_maps[MAX_CBE];
+static int cbe_regs_map_count;
+
+static struct cbe_thread_map
+{
+	struct device_node *cpu_node;
+	struct device_node *be_node;
+	struct cbe_regs_map *regs;
+	unsigned int thread_id;
+	unsigned int cbe_id;
+} cbe_thread_map[NR_CPUS];
+
+static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = {CPU_BITS_NONE} };
+static cpumask_t cbe_first_online_cpu = { CPU_BITS_NONE };
+
+static struct cbe_regs_map *cbe_find_map(struct device_node *np)
+{
+	int i;
+	struct device_node *tmp_np;
+
+	if (!of_node_is_type(np, "spe")) {
+		for (i = 0; i < cbe_regs_map_count; i++)
+			if (cbe_regs_maps[i].cpu_node == np ||
+			    cbe_regs_maps[i].be_node == np)
+				return &cbe_regs_maps[i];
+		return NULL;
+	}
+
+	if (np->data)
+		return np->data;
+
+	/* walk up path until cpu or be node was found */
+	tmp_np = np;
+	do {
+		tmp_np = tmp_np->parent;
+		/* on a correct devicetree we wont get up to root */
+		BUG_ON(!tmp_np);
+	} while (!of_node_is_type(tmp_np, "cpu") ||
+		 !of_node_is_type(tmp_np, "be"));
+
+	np->data = cbe_find_map(tmp_np);
+
+	return np->data;
+}
+
+struct cbe_pmd_regs __iomem *cbe_get_pmd_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return map->pmd_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_pmd_regs);
+
+struct cbe_pmd_regs __iomem *cbe_get_cpu_pmd_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return map->pmd_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_cpu_pmd_regs);
+
+struct cbe_pmd_shadow_regs *cbe_get_pmd_shadow_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return &map->pmd_shadow_regs;
+}
+
+struct cbe_pmd_shadow_regs *cbe_get_cpu_pmd_shadow_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return &map->pmd_shadow_regs;
+}
+
+struct cbe_iic_regs __iomem *cbe_get_iic_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return map->iic_regs;
+}
+
+struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return map->iic_regs;
+}
+
+struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return map->mic_tm_regs;
+}
+
+struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return map->mic_tm_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs);
+
+u32 cbe_get_hw_thread_id(int cpu)
+{
+	return cbe_thread_map[cpu].thread_id;
+}
+EXPORT_SYMBOL_GPL(cbe_get_hw_thread_id);
+
+u32 cbe_cpu_to_node(int cpu)
+{
+	return cbe_thread_map[cpu].cbe_id;
+}
+EXPORT_SYMBOL_GPL(cbe_cpu_to_node);
+
+u32 cbe_node_to_cpu(int node)
+{
+	return cpumask_first(&cbe_local_mask[node]);
+
+}
+EXPORT_SYMBOL_GPL(cbe_node_to_cpu);
+
+static struct device_node *__init cbe_get_be_node(int cpu_id)
+{
+	struct device_node *np;
+
+	for_each_node_by_type (np, "be") {
+		int len,i;
+		const phandle *cpu_handle;
+
+		cpu_handle = of_get_property(np, "cpus", &len);
+
+		/*
+		 * the CAB SLOF tree is non compliant, so we just assume
+		 * there is only one node
+		 */
+		if (WARN_ON_ONCE(!cpu_handle))
+			return np;
+
+		for (i = 0; i < len; i++) {
+			struct device_node *ch_np = of_find_node_by_phandle(cpu_handle[i]);
+			struct device_node *ci_np = of_get_cpu_node(cpu_id, NULL);
+
+			of_node_put(ch_np);
+			of_node_put(ci_np);
+
+			if (ch_np == ci_np)
+				return np;
+		}
+	}
+
+	return NULL;
+}
+
+static void __init cbe_fill_regs_map(struct cbe_regs_map *map)
+{
+	if(map->be_node) {
+		struct device_node *be, *np, *parent_np;
+
+		be = map->be_node;
+
+		for_each_node_by_type(np, "pervasive") {
+			parent_np = of_get_parent(np);
+			if (parent_np == be)
+				map->pmd_regs = of_iomap(np, 0);
+			of_node_put(parent_np);
+		}
+
+		for_each_node_by_type(np, "CBEA-Internal-Interrupt-Controller") {
+			parent_np = of_get_parent(np);
+			if (parent_np == be)
+				map->iic_regs = of_iomap(np, 2);
+			of_node_put(parent_np);
+		}
+
+		for_each_node_by_type(np, "mic-tm") {
+			parent_np = of_get_parent(np);
+			if (parent_np == be)
+				map->mic_tm_regs = of_iomap(np, 0);
+			of_node_put(parent_np);
+		}
+	} else {
+		struct device_node *cpu;
+		/* That hack must die die die ! */
+		const struct address_prop {
+			unsigned long address;
+			unsigned int len;
+		} __attribute__((packed)) *prop;
+
+		cpu = map->cpu_node;
+
+		prop = of_get_property(cpu, "pervasive", NULL);
+		if (prop != NULL)
+			map->pmd_regs = ioremap(prop->address, prop->len);
+
+		prop = of_get_property(cpu, "iic", NULL);
+		if (prop != NULL)
+			map->iic_regs = ioremap(prop->address, prop->len);
+
+		prop = of_get_property(cpu, "mic-tm", NULL);
+		if (prop != NULL)
+			map->mic_tm_regs = ioremap(prop->address, prop->len);
+	}
+}
+
+
+void __init cbe_regs_init(void)
+{
+	int i;
+	unsigned int thread_id;
+	struct device_node *cpu;
+
+	/* Build local fast map of CPUs */
+	for_each_possible_cpu(i) {
+		cbe_thread_map[i].cpu_node = of_get_cpu_node(i, &thread_id);
+		cbe_thread_map[i].be_node = cbe_get_be_node(i);
+		cbe_thread_map[i].thread_id = thread_id;
+	}
+
+	/* Find maps for each device tree CPU */
+	for_each_node_by_type(cpu, "cpu") {
+		struct cbe_regs_map *map;
+		unsigned int cbe_id;
+
+		cbe_id = cbe_regs_map_count++;
+		map = &cbe_regs_maps[cbe_id];
+
+		if (cbe_regs_map_count > MAX_CBE) {
+			printk(KERN_ERR "cbe_regs: More BE chips than supported"
+			       "!\n");
+			cbe_regs_map_count--;
+			of_node_put(cpu);
+			return;
+		}
+		of_node_put(map->cpu_node);
+		map->cpu_node = of_node_get(cpu);
+
+		for_each_possible_cpu(i) {
+			struct cbe_thread_map *thread = &cbe_thread_map[i];
+
+			if (thread->cpu_node == cpu) {
+				thread->regs = map;
+				thread->cbe_id = cbe_id;
+				map->be_node = thread->be_node;
+				cpumask_set_cpu(i, &cbe_local_mask[cbe_id]);
+				if(thread->thread_id == 0)
+					cpumask_set_cpu(i, &cbe_first_online_cpu);
+			}
+		}
+
+		cbe_fill_regs_map(map);
+	}
+}
+
diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c
new file mode 100644
index 0000000000..2f45428e32
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_thermal.c
@@ -0,0 +1,386 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * thermal support for the cell processor
+ *
+ * This module adds some sysfs attributes to cpu and spu nodes.
+ * Base for measurements are the digital thermal sensors (DTS)
+ * located on the chip.
+ * The accuracy is 2 degrees, starting from 65 up to 125 degrees celsius
+ * The attributes can be found under
+ * /sys/devices/system/cpu/cpuX/thermal
+ * /sys/devices/system/spu/spuX/thermal
+ *
+ * The following attributes are added for each node:
+ * temperature:
+ *	contains the current temperature measured by the DTS
+ * throttle_begin:
+ *	throttling begins when temperature is greater or equal to
+ *	throttle_begin. Setting this value to 125 prevents throttling.
+ * throttle_end:
+ *	throttling is being ceased, if the temperature is lower than
+ *	throttle_end. Due to a delay between applying throttling and
+ *	a reduced temperature this value should be less than throttle_begin.
+ *	A value equal to throttle_begin provides only a very little hysteresis.
+ * throttle_full_stop:
+ *	If the temperatrue is greater or equal to throttle_full_stop,
+ *	full throttling is applied to the cpu or spu. This value should be
+ *	greater than throttle_begin and throttle_end. Setting this value to
+ *	65 prevents the unit from running code at all.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/stringify.h>
+#include <asm/spu.h>
+#include <asm/io.h>
+#include <asm/cell-regs.h>
+
+#include "spu_priv1_mmio.h"
+
+#define TEMP_MIN 65
+#define TEMP_MAX 125
+
+#define DEVICE_PREFIX_ATTR(_prefix,_name,_mode)			\
+struct device_attribute attr_ ## _prefix ## _ ## _name = {	\
+	.attr = { .name = __stringify(_name), .mode = _mode },	\
+	.show	= _prefix ## _show_ ## _name,			\
+	.store	= _prefix ## _store_ ## _name,			\
+};
+
+static inline u8 reg_to_temp(u8 reg_value)
+{
+	return ((reg_value & 0x3f) << 1) + TEMP_MIN;
+}
+
+static inline u8 temp_to_reg(u8 temp)
+{
+	return ((temp - TEMP_MIN) >> 1) & 0x3f;
+}
+
+static struct cbe_pmd_regs __iomem *get_pmd_regs(struct device *dev)
+{
+	struct spu *spu;
+
+	spu = container_of(dev, struct spu, dev);
+
+	return cbe_get_pmd_regs(spu_devnode(spu));
+}
+
+/* returns the value for a given spu in a given register */
+static u8 spu_read_register_value(struct device *dev, union spe_reg __iomem *reg)
+{
+	union spe_reg value;
+	struct spu *spu;
+
+	spu = container_of(dev, struct spu, dev);
+	value.val = in_be64(&reg->val);
+
+	return value.spe[spu->spe_id];
+}
+
+static ssize_t spu_show_temp(struct device *dev, struct device_attribute *attr,
+			char *buf)
+{
+	u8 value;
+	struct cbe_pmd_regs __iomem *pmd_regs;
+
+	pmd_regs = get_pmd_regs(dev);
+
+	value = spu_read_register_value(dev, &pmd_regs->ts_ctsr1);
+
+	return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+static ssize_t show_throttle(struct cbe_pmd_regs __iomem *pmd_regs, char *buf, int pos)
+{
+	u64 value;
+
+	value = in_be64(&pmd_regs->tm_tpr.val);
+	/* access the corresponding byte */
+	value >>= pos;
+	value &= 0x3F;
+
+	return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char *buf, size_t size, int pos)
+{
+	u64 reg_value;
+	unsigned int temp;
+	u64 new_value;
+	int ret;
+
+	ret = sscanf(buf, "%u", &temp);
+
+	if (ret != 1 || temp < TEMP_MIN || temp > TEMP_MAX)
+		return -EINVAL;
+
+	new_value = temp_to_reg(temp);
+
+	reg_value = in_be64(&pmd_regs->tm_tpr.val);
+
+	/* zero out bits for new value */
+	reg_value &= ~(0xffull << pos);
+	/* set bits to new value */
+	reg_value |= new_value << pos;
+
+	out_be64(&pmd_regs->tm_tpr.val, reg_value);
+	return size;
+}
+
+static ssize_t spu_show_throttle_end(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(get_pmd_regs(dev), buf, 0);
+}
+
+static ssize_t spu_show_throttle_begin(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(get_pmd_regs(dev), buf, 8);
+}
+
+static ssize_t spu_show_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(get_pmd_regs(dev), buf, 16);
+}
+
+static ssize_t spu_store_throttle_end(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(get_pmd_regs(dev), buf, size, 0);
+}
+
+static ssize_t spu_store_throttle_begin(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(get_pmd_regs(dev), buf, size, 8);
+}
+
+static ssize_t spu_store_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(get_pmd_regs(dev), buf, size, 16);
+}
+
+static ssize_t ppe_show_temp(struct device *dev, char *buf, int pos)
+{
+	struct cbe_pmd_regs __iomem *pmd_regs;
+	u64 value;
+
+	pmd_regs = cbe_get_cpu_pmd_regs(dev->id);
+	value = in_be64(&pmd_regs->ts_ctsr2);
+
+	value = (value >> pos) & 0x3f;
+
+	return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+
+/* shows the temperature of the DTS on the PPE,
+ * located near the linear thermal sensor */
+static ssize_t ppe_show_temp0(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return ppe_show_temp(dev, buf, 32);
+}
+
+/* shows the temperature of the second DTS on the PPE */
+static ssize_t ppe_show_temp1(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return ppe_show_temp(dev, buf, 0);
+}
+
+static ssize_t ppe_show_throttle_end(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 32);
+}
+
+static ssize_t ppe_show_throttle_begin(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 40);
+}
+
+static ssize_t ppe_show_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 48);
+}
+
+static ssize_t ppe_store_throttle_end(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 32);
+}
+
+static ssize_t ppe_store_throttle_begin(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 40);
+}
+
+static ssize_t ppe_store_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 48);
+}
+
+
+static struct device_attribute attr_spu_temperature = {
+	.attr = {.name = "temperature", .mode = 0400 },
+	.show = spu_show_temp,
+};
+
+static DEVICE_PREFIX_ATTR(spu, throttle_end, 0600);
+static DEVICE_PREFIX_ATTR(spu, throttle_begin, 0600);
+static DEVICE_PREFIX_ATTR(spu, throttle_full_stop, 0600);
+
+
+static struct attribute *spu_attributes[] = {
+	&attr_spu_temperature.attr,
+	&attr_spu_throttle_end.attr,
+	&attr_spu_throttle_begin.attr,
+	&attr_spu_throttle_full_stop.attr,
+	NULL,
+};
+
+static const struct attribute_group spu_attribute_group = {
+	.name	= "thermal",
+	.attrs	= spu_attributes,
+};
+
+static struct device_attribute attr_ppe_temperature0 = {
+	.attr = {.name = "temperature0", .mode = 0400 },
+	.show = ppe_show_temp0,
+};
+
+static struct device_attribute attr_ppe_temperature1 = {
+	.attr = {.name = "temperature1", .mode = 0400 },
+	.show = ppe_show_temp1,
+};
+
+static DEVICE_PREFIX_ATTR(ppe, throttle_end, 0600);
+static DEVICE_PREFIX_ATTR(ppe, throttle_begin, 0600);
+static DEVICE_PREFIX_ATTR(ppe, throttle_full_stop, 0600);
+
+static struct attribute *ppe_attributes[] = {
+	&attr_ppe_temperature0.attr,
+	&attr_ppe_temperature1.attr,
+	&attr_ppe_throttle_end.attr,
+	&attr_ppe_throttle_begin.attr,
+	&attr_ppe_throttle_full_stop.attr,
+	NULL,
+};
+
+static struct attribute_group ppe_attribute_group = {
+	.name	= "thermal",
+	.attrs	= ppe_attributes,
+};
+
+/*
+ * initialize throttling with default values
+ */
+static int __init init_default_values(void)
+{
+	int cpu;
+	struct cbe_pmd_regs __iomem *pmd_regs;
+	struct device *dev;
+	union ppe_spe_reg tpr;
+	union spe_reg str1;
+	u64 str2;
+	union spe_reg cr1;
+	u64 cr2;
+
+	/* TPR defaults */
+	/* ppe
+	 *	1F - no full stop
+	 *	08 - dynamic throttling starts if over 80 degrees
+	 *	03 - dynamic throttling ceases if below 70 degrees */
+	tpr.ppe = 0x1F0803;
+	/* spe
+	 *	10 - full stopped when over 96 degrees
+	 *	08 - dynamic throttling starts if over 80 degrees
+	 *	03 - dynamic throttling ceases if below 70 degrees
+	 */
+	tpr.spe = 0x100803;
+
+	/* STR defaults */
+	/* str1
+	 *	10 - stop 16 of 32 cycles
+	 */
+	str1.val = 0x1010101010101010ull;
+	/* str2
+	 *	10 - stop 16 of 32 cycles
+	 */
+	str2 = 0x10;
+
+	/* CR defaults */
+	/* cr1
+	 *	4 - normal operation
+	 */
+	cr1.val = 0x0404040404040404ull;
+	/* cr2
+	 *	4 - normal operation
+	 */
+	cr2 = 0x04;
+
+	for_each_possible_cpu (cpu) {
+		pr_debug("processing cpu %d\n", cpu);
+		dev = get_cpu_device(cpu);
+
+		if (!dev) {
+			pr_info("invalid dev pointer for cbe_thermal\n");
+			return -EINVAL;
+		}
+
+		pmd_regs = cbe_get_cpu_pmd_regs(dev->id);
+
+		if (!pmd_regs) {
+			pr_info("invalid CBE regs pointer for cbe_thermal\n");
+			return -EINVAL;
+		}
+
+		out_be64(&pmd_regs->tm_str2, str2);
+		out_be64(&pmd_regs->tm_str1.val, str1.val);
+		out_be64(&pmd_regs->tm_tpr.val, tpr.val);
+		out_be64(&pmd_regs->tm_cr1.val, cr1.val);
+		out_be64(&pmd_regs->tm_cr2, cr2);
+	}
+
+	return 0;
+}
+
+
+static int __init thermal_init(void)
+{
+	int rc = init_default_values();
+
+	if (rc == 0) {
+		spu_add_dev_attr_group(&spu_attribute_group);
+		cpu_add_dev_attr_group(&ppe_attribute_group);
+	}
+
+	return rc;
+}
+module_init(thermal_init);
+
+static void __exit thermal_exit(void)
+{
+	spu_remove_dev_attr_group(&spu_attribute_group);
+	cpu_remove_dev_attr_group(&ppe_attribute_group);
+}
+module_exit(thermal_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+
diff --git a/arch/powerpc/platforms/cell/cell.h b/arch/powerpc/platforms/cell/cell.h
new file mode 100644
index 0000000000..d5142e905a
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cell.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Cell Platform common data structures
+ *
+ * Copyright 2015, Daniel Axtens, IBM Corporation
+ */
+
+#ifndef CELL_H
+#define CELL_H
+
+#include <asm/pci-bridge.h>
+
+extern struct pci_controller_ops cell_pci_controller_ops;
+
+#endif
diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
new file mode 100644
index 0000000000..ca7849e113
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * spu aware cpufreq governor for the cell processor
+ *
+ * © Copyright IBM Corporation 2006-2008
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/sched.h>
+#include <linux/sched/loadavg.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/atomic.h>
+#include <asm/machdep.h>
+#include <asm/spu.h>
+
+#define POLL_TIME	100000		/* in µs */
+#define EXP		753		/* exp(-1) in fixed-point */
+
+struct spu_gov_info_struct {
+	unsigned long busy_spus;	/* fixed-point */
+	struct cpufreq_policy *policy;
+	struct delayed_work work;
+	unsigned int poll_int;		/* µs */
+};
+static DEFINE_PER_CPU(struct spu_gov_info_struct, spu_gov_info);
+
+static int calc_freq(struct spu_gov_info_struct *info)
+{
+	int cpu;
+	int busy_spus;
+
+	cpu = info->policy->cpu;
+	busy_spus = atomic_read(&cbe_spu_info[cpu_to_node(cpu)].busy_spus);
+
+	info->busy_spus = calc_load(info->busy_spus, EXP, busy_spus * FIXED_1);
+	pr_debug("cpu %d: busy_spus=%d, info->busy_spus=%ld\n",
+			cpu, busy_spus, info->busy_spus);
+
+	return info->policy->max * info->busy_spus / FIXED_1;
+}
+
+static void spu_gov_work(struct work_struct *work)
+{
+	struct spu_gov_info_struct *info;
+	int delay;
+	unsigned long target_freq;
+
+	info = container_of(work, struct spu_gov_info_struct, work.work);
+
+	/* after cancel_delayed_work_sync we unset info->policy */
+	BUG_ON(info->policy == NULL);
+
+	target_freq = calc_freq(info);
+	__cpufreq_driver_target(info->policy, target_freq, CPUFREQ_RELATION_H);
+
+	delay = usecs_to_jiffies(info->poll_int);
+	schedule_delayed_work_on(info->policy->cpu, &info->work, delay);
+}
+
+static void spu_gov_init_work(struct spu_gov_info_struct *info)
+{
+	int delay = usecs_to_jiffies(info->poll_int);
+	INIT_DEFERRABLE_WORK(&info->work, spu_gov_work);
+	schedule_delayed_work_on(info->policy->cpu, &info->work, delay);
+}
+
+static void spu_gov_cancel_work(struct spu_gov_info_struct *info)
+{
+	cancel_delayed_work_sync(&info->work);
+}
+
+static int spu_gov_start(struct cpufreq_policy *policy)
+{
+	unsigned int cpu = policy->cpu;
+	struct spu_gov_info_struct *info = &per_cpu(spu_gov_info, cpu);
+	struct spu_gov_info_struct *affected_info;
+	int i;
+
+	if (!cpu_online(cpu)) {
+		printk(KERN_ERR "cpu %d is not online\n", cpu);
+		return -EINVAL;
+	}
+
+	if (!policy->cur) {
+		printk(KERN_ERR "no cpu specified in policy\n");
+		return -EINVAL;
+	}
+
+	/* initialize spu_gov_info for all affected cpus */
+	for_each_cpu(i, policy->cpus) {
+		affected_info = &per_cpu(spu_gov_info, i);
+		affected_info->policy = policy;
+	}
+
+	info->poll_int = POLL_TIME;
+
+	/* setup timer */
+	spu_gov_init_work(info);
+
+	return 0;
+}
+
+static void spu_gov_stop(struct cpufreq_policy *policy)
+{
+	unsigned int cpu = policy->cpu;
+	struct spu_gov_info_struct *info = &per_cpu(spu_gov_info, cpu);
+	int i;
+
+	/* cancel timer */
+	spu_gov_cancel_work(info);
+
+	/* clean spu_gov_info for all affected cpus */
+	for_each_cpu (i, policy->cpus) {
+		info = &per_cpu(spu_gov_info, i);
+		info->policy = NULL;
+	}
+}
+
+static struct cpufreq_governor spu_governor = {
+	.name = "spudemand",
+	.start = spu_gov_start,
+	.stop = spu_gov_stop,
+	.owner = THIS_MODULE,
+};
+cpufreq_governor_init(spu_governor);
+cpufreq_governor_exit(spu_governor);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
new file mode 100644
index 0000000000..03ee8152ee
--- /dev/null
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Cell Internal Interrupt Controller
+ *
+ * Copyright (C) 2006 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *                    IBM, Corp.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * TODO:
+ * - Fix various assumptions related to HW CPU numbers vs. linux CPU numbers
+ *   vs node numbers in the setup code
+ * - Implement proper handling of maxcpus=1/2 (that is, routing of irqs from
+ *   a non-active node to the active node)
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/export.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/kernel_stat.h>
+#include <linux/pgtable.h>
+#include <linux/of_address.h>
+
+#include <asm/io.h>
+#include <asm/ptrace.h>
+#include <asm/machdep.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+
+struct iic {
+	struct cbe_iic_thread_regs __iomem *regs;
+	u8 target_id;
+	u8 eoi_stack[16];
+	int eoi_ptr;
+	struct device_node *node;
+};
+
+static DEFINE_PER_CPU(struct iic, cpu_iic);
+#define IIC_NODE_COUNT	2
+static struct irq_domain *iic_host;
+
+/* Convert between "pending" bits and hw irq number */
+static irq_hw_number_t iic_pending_to_hwnum(struct cbe_iic_pending_bits bits)
+{
+	unsigned char unit = bits.source & 0xf;
+	unsigned char node = bits.source >> 4;
+	unsigned char class = bits.class & 3;
+
+	/* Decode IPIs */
+	if (bits.flags & CBE_IIC_IRQ_IPI)
+		return IIC_IRQ_TYPE_IPI | (bits.prio >> 4);
+	else
+		return (node << IIC_IRQ_NODE_SHIFT) | (class << 4) | unit;
+}
+
+static void iic_mask(struct irq_data *d)
+{
+}
+
+static void iic_unmask(struct irq_data *d)
+{
+}
+
+static void iic_eoi(struct irq_data *d)
+{
+	struct iic *iic = this_cpu_ptr(&cpu_iic);
+	out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]);
+	BUG_ON(iic->eoi_ptr < 0);
+}
+
+static struct irq_chip iic_chip = {
+	.name = "CELL-IIC",
+	.irq_mask = iic_mask,
+	.irq_unmask = iic_unmask,
+	.irq_eoi = iic_eoi,
+};
+
+
+static void iic_ioexc_eoi(struct irq_data *d)
+{
+}
+
+static void iic_ioexc_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct cbe_iic_regs __iomem *node_iic =
+		(void __iomem *)irq_desc_get_handler_data(desc);
+	unsigned int irq = irq_desc_get_irq(desc);
+	unsigned int base = (irq & 0xffffff00) | IIC_IRQ_TYPE_IOEXC;
+	unsigned long bits, ack;
+	int cascade;
+
+	for (;;) {
+		bits = in_be64(&node_iic->iic_is);
+		if (bits == 0)
+			break;
+		/* pre-ack edge interrupts */
+		ack = bits & IIC_ISR_EDGE_MASK;
+		if (ack)
+			out_be64(&node_iic->iic_is, ack);
+		/* handle them */
+		for (cascade = 63; cascade >= 0; cascade--)
+			if (bits & (0x8000000000000000UL >> cascade))
+				generic_handle_domain_irq(iic_host,
+							  base | cascade);
+		/* post-ack level interrupts */
+		ack = bits & ~IIC_ISR_EDGE_MASK;
+		if (ack)
+			out_be64(&node_iic->iic_is, ack);
+	}
+	chip->irq_eoi(&desc->irq_data);
+}
+
+
+static struct irq_chip iic_ioexc_chip = {
+	.name = "CELL-IOEX",
+	.irq_mask = iic_mask,
+	.irq_unmask = iic_unmask,
+	.irq_eoi = iic_ioexc_eoi,
+};
+
+/* Get an IRQ number from the pending state register of the IIC */
+static unsigned int iic_get_irq(void)
+{
+	struct cbe_iic_pending_bits pending;
+	struct iic *iic;
+	unsigned int virq;
+
+	iic = this_cpu_ptr(&cpu_iic);
+	*(unsigned long *) &pending =
+		in_be64((u64 __iomem *) &iic->regs->pending_destr);
+	if (!(pending.flags & CBE_IIC_IRQ_VALID))
+		return 0;
+	virq = irq_linear_revmap(iic_host, iic_pending_to_hwnum(pending));
+	if (!virq)
+		return 0;
+	iic->eoi_stack[++iic->eoi_ptr] = pending.prio;
+	BUG_ON(iic->eoi_ptr > 15);
+	return virq;
+}
+
+void iic_setup_cpu(void)
+{
+	out_be64(&this_cpu_ptr(&cpu_iic)->regs->prio, 0xff);
+}
+
+u8 iic_get_target_id(int cpu)
+{
+	return per_cpu(cpu_iic, cpu).target_id;
+}
+
+EXPORT_SYMBOL_GPL(iic_get_target_id);
+
+#ifdef CONFIG_SMP
+
+/* Use the highest interrupt priorities for IPI */
+static inline int iic_msg_to_irq(int msg)
+{
+	return IIC_IRQ_TYPE_IPI + 0xf - msg;
+}
+
+void iic_message_pass(int cpu, int msg)
+{
+	out_be64(&per_cpu(cpu_iic, cpu).regs->generate, (0xf - msg) << 4);
+}
+
+static void iic_request_ipi(int msg)
+{
+	int virq;
+
+	virq = irq_create_mapping(iic_host, iic_msg_to_irq(msg));
+	if (!virq) {
+		printk(KERN_ERR
+		       "iic: failed to map IPI %s\n", smp_ipi_name[msg]);
+		return;
+	}
+
+	/*
+	 * If smp_request_message_ipi encounters an error it will notify
+	 * the error.  If a message is not needed it will return non-zero.
+	 */
+	if (smp_request_message_ipi(virq, msg))
+		irq_dispose_mapping(virq);
+}
+
+void iic_request_IPIs(void)
+{
+	iic_request_ipi(PPC_MSG_CALL_FUNCTION);
+	iic_request_ipi(PPC_MSG_RESCHEDULE);
+	iic_request_ipi(PPC_MSG_TICK_BROADCAST);
+	iic_request_ipi(PPC_MSG_NMI_IPI);
+}
+
+#endif /* CONFIG_SMP */
+
+
+static int iic_host_match(struct irq_domain *h, struct device_node *node,
+			  enum irq_domain_bus_token bus_token)
+{
+	return of_device_is_compatible(node,
+				    "IBM,CBEA-Internal-Interrupt-Controller");
+}
+
+static int iic_host_map(struct irq_domain *h, unsigned int virq,
+			irq_hw_number_t hw)
+{
+	switch (hw & IIC_IRQ_TYPE_MASK) {
+	case IIC_IRQ_TYPE_IPI:
+		irq_set_chip_and_handler(virq, &iic_chip, handle_percpu_irq);
+		break;
+	case IIC_IRQ_TYPE_IOEXC:
+		irq_set_chip_and_handler(virq, &iic_ioexc_chip,
+					 handle_edge_eoi_irq);
+		break;
+	default:
+		irq_set_chip_and_handler(virq, &iic_chip, handle_edge_eoi_irq);
+	}
+	return 0;
+}
+
+static int iic_host_xlate(struct irq_domain *h, struct device_node *ct,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	unsigned int node, ext, unit, class;
+	const u32 *val;
+
+	if (!of_device_is_compatible(ct,
+				     "IBM,CBEA-Internal-Interrupt-Controller"))
+		return -ENODEV;
+	if (intsize != 1)
+		return -ENODEV;
+	val = of_get_property(ct, "#interrupt-cells", NULL);
+	if (val == NULL || *val != 1)
+		return -ENODEV;
+
+	node = intspec[0] >> 24;
+	ext = (intspec[0] >> 16) & 0xff;
+	class = (intspec[0] >> 8) & 0xff;
+	unit = intspec[0] & 0xff;
+
+	/* Check if node is in supported range */
+	if (node > 1)
+		return -EINVAL;
+
+	/* Build up interrupt number, special case for IO exceptions */
+	*out_hwirq = (node << IIC_IRQ_NODE_SHIFT);
+	if (unit == IIC_UNIT_IIC && class == 1)
+		*out_hwirq |= IIC_IRQ_TYPE_IOEXC | ext;
+	else
+		*out_hwirq |= IIC_IRQ_TYPE_NORMAL |
+			(class << IIC_IRQ_CLASS_SHIFT) | unit;
+
+	/* Dummy flags, ignored by iic code */
+	*out_flags = IRQ_TYPE_EDGE_RISING;
+
+	return 0;
+}
+
+static const struct irq_domain_ops iic_host_ops = {
+	.match = iic_host_match,
+	.map = iic_host_map,
+	.xlate = iic_host_xlate,
+};
+
+static void __init init_one_iic(unsigned int hw_cpu, unsigned long addr,
+				struct device_node *node)
+{
+	/* XXX FIXME: should locate the linux CPU number from the HW cpu
+	 * number properly. We are lucky for now
+	 */
+	struct iic *iic = &per_cpu(cpu_iic, hw_cpu);
+
+	iic->regs = ioremap(addr, sizeof(struct cbe_iic_thread_regs));
+	BUG_ON(iic->regs == NULL);
+
+	iic->target_id = ((hw_cpu & 2) << 3) | ((hw_cpu & 1) ? 0xf : 0xe);
+	iic->eoi_stack[0] = 0xff;
+	iic->node = of_node_get(node);
+	out_be64(&iic->regs->prio, 0);
+
+	printk(KERN_INFO "IIC for CPU %d target id 0x%x : %pOF\n",
+	       hw_cpu, iic->target_id, node);
+}
+
+static int __init setup_iic(void)
+{
+	struct device_node *dn;
+	struct resource r0, r1;
+	unsigned int node, cascade, found = 0;
+	struct cbe_iic_regs __iomem *node_iic;
+	const u32 *np;
+
+	for_each_node_by_name(dn, "interrupt-controller") {
+		if (!of_device_is_compatible(dn,
+				     "IBM,CBEA-Internal-Interrupt-Controller"))
+			continue;
+		np = of_get_property(dn, "ibm,interrupt-server-ranges", NULL);
+		if (np == NULL) {
+			printk(KERN_WARNING "IIC: CPU association not found\n");
+			of_node_put(dn);
+			return -ENODEV;
+		}
+		if (of_address_to_resource(dn, 0, &r0) ||
+		    of_address_to_resource(dn, 1, &r1)) {
+			printk(KERN_WARNING "IIC: Can't resolve addresses\n");
+			of_node_put(dn);
+			return -ENODEV;
+		}
+		found++;
+		init_one_iic(np[0], r0.start, dn);
+		init_one_iic(np[1], r1.start, dn);
+
+		/* Setup cascade for IO exceptions. XXX cleanup tricks to get
+		 * node vs CPU etc...
+		 * Note that we configure the IIC_IRR here with a hard coded
+		 * priority of 1. We might want to improve that later.
+		 */
+		node = np[0] >> 1;
+		node_iic = cbe_get_cpu_iic_regs(np[0]);
+		cascade = node << IIC_IRQ_NODE_SHIFT;
+		cascade |= 1 << IIC_IRQ_CLASS_SHIFT;
+		cascade |= IIC_UNIT_IIC;
+		cascade = irq_create_mapping(iic_host, cascade);
+		if (!cascade)
+			continue;
+		/*
+		 * irq_data is a generic pointer that gets passed back
+		 * to us later, so the forced cast is fine.
+		 */
+		irq_set_handler_data(cascade, (void __force *)node_iic);
+		irq_set_chained_handler(cascade, iic_ioexc_cascade);
+		out_be64(&node_iic->iic_ir,
+			 (1 << 12)		/* priority */ |
+			 (node << 4)		/* dest node */ |
+			 IIC_UNIT_THREAD_0	/* route them to thread 0 */);
+		/* Flush pending (make sure it triggers if there is
+		 * anything pending
+		 */
+		out_be64(&node_iic->iic_is, 0xfffffffffffffffful);
+	}
+
+	if (found)
+		return 0;
+	else
+		return -ENODEV;
+}
+
+void __init iic_init_IRQ(void)
+{
+	/* Setup an irq host data structure */
+	iic_host = irq_domain_add_linear(NULL, IIC_SOURCE_COUNT, &iic_host_ops,
+					 NULL);
+	BUG_ON(iic_host == NULL);
+	irq_set_default_host(iic_host);
+
+	/* Discover and initialize iics */
+	if (setup_iic() < 0)
+		panic("IIC: Failed to initialize !\n");
+
+	/* Set master interrupt handling function */
+	ppc_md.get_irq = iic_get_irq;
+
+	/* Enable on current CPU */
+	iic_setup_cpu();
+}
+
+void iic_set_interrupt_routing(int cpu, int thread, int priority)
+{
+	struct cbe_iic_regs __iomem *iic_regs = cbe_get_cpu_iic_regs(cpu);
+	u64 iic_ir = 0;
+	int node = cpu >> 1;
+
+	/* Set which node and thread will handle the next interrupt */
+	iic_ir |= CBE_IIC_IR_PRIO(priority) |
+		  CBE_IIC_IR_DEST_NODE(node);
+	if (thread == 0)
+		iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_0);
+	else
+		iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_1);
+	out_be64(&iic_regs->iic_ir, iic_ir);
+}
diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h
new file mode 100644
index 0000000000..a479022485
--- /dev/null
+++ b/arch/powerpc/platforms/cell/interrupt.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_CELL_PIC_H
+#define ASM_CELL_PIC_H
+#ifdef __KERNEL__
+/*
+ * Mapping of IIC pending bits into per-node interrupt numbers.
+ *
+ * Interrupt numbers are in the range 0...0x1ff where the top bit
+ * (0x100) represent the source node. Only 2 nodes are supported with
+ * the current code though it's trivial to extend that if necessary using
+ * higher level bits
+ *
+ * The bottom 8 bits are split into 2 type bits and 6 data bits that
+ * depend on the type:
+ *
+ * 00 (0x00 | data) : normal interrupt. data is (class << 4) | source
+ * 01 (0x40 | data) : IO exception. data is the exception number as
+ *                    defined by bit numbers in IIC_SR
+ * 10 (0x80 | data) : IPI. data is the IPI number (obtained from the priority)
+ *                    and node is always 0 (IPIs are per-cpu, their source is
+ *                    not relevant)
+ * 11 (0xc0 | data) : reserved
+ *
+ * In addition, interrupt number 0x80000000 is defined as always invalid
+ * (that is the node field is expected to never extend to move than 23 bits)
+ *
+ */
+
+enum {
+	IIC_IRQ_INVALID		= 0x80000000u,
+	IIC_IRQ_NODE_MASK	= 0x100,
+	IIC_IRQ_NODE_SHIFT	= 8,
+	IIC_IRQ_MAX		= 0x1ff,
+	IIC_IRQ_TYPE_MASK	= 0xc0,
+	IIC_IRQ_TYPE_NORMAL	= 0x00,
+	IIC_IRQ_TYPE_IOEXC	= 0x40,
+	IIC_IRQ_TYPE_IPI	= 0x80,
+	IIC_IRQ_CLASS_SHIFT	= 4,
+	IIC_IRQ_CLASS_0		= 0x00,
+	IIC_IRQ_CLASS_1		= 0x10,
+	IIC_IRQ_CLASS_2		= 0x20,
+	IIC_SOURCE_COUNT	= 0x200,
+
+	/* Here are defined the various source/dest units. Avoid using those
+	 * definitions if you can, they are mostly here for reference
+	 */
+	IIC_UNIT_SPU_0		= 0x4,
+	IIC_UNIT_SPU_1		= 0x7,
+	IIC_UNIT_SPU_2		= 0x3,
+	IIC_UNIT_SPU_3		= 0x8,
+	IIC_UNIT_SPU_4		= 0x2,
+	IIC_UNIT_SPU_5		= 0x9,
+	IIC_UNIT_SPU_6		= 0x1,
+	IIC_UNIT_SPU_7		= 0xa,
+	IIC_UNIT_IOC_0		= 0x0,
+	IIC_UNIT_IOC_1		= 0xb,
+	IIC_UNIT_THREAD_0	= 0xe, /* target only */
+	IIC_UNIT_THREAD_1	= 0xf, /* target only */
+	IIC_UNIT_IIC		= 0xe, /* source only (IO exceptions) */
+
+	/* Base numbers for the external interrupts */
+	IIC_IRQ_EXT_IOIF0	=
+		IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_0,
+	IIC_IRQ_EXT_IOIF1	=
+		IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_1,
+
+	/* Base numbers for the IIC_ISR interrupts */
+	IIC_IRQ_IOEX_TMI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 63,
+	IIC_IRQ_IOEX_PMI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 62,
+	IIC_IRQ_IOEX_ATI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 61,
+	IIC_IRQ_IOEX_MATBFI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 60,
+	IIC_IRQ_IOEX_ELDI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 59,
+
+	/* Which bits in IIC_ISR are edge sensitive */
+	IIC_ISR_EDGE_MASK	= 0x4ul,
+};
+
+extern void iic_init_IRQ(void);
+extern void iic_message_pass(int cpu, int msg);
+extern void iic_request_IPIs(void);
+extern void iic_setup_cpu(void);
+
+extern u8 iic_get_target_id(int cpu);
+
+extern void spider_init_IRQ(void);
+
+extern void iic_set_interrupt_routing(int cpu, int thread, int priority);
+
+#endif
+#endif /* ASM_CELL_PIC_H */
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
new file mode 100644
index 0000000000..1202a69b0a
--- /dev/null
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -0,0 +1,1094 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IOMMU implementation for Cell Broadband Processor Architecture
+ *
+ * (C) Copyright IBM Corporation 2006-2008
+ *
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/memblock.h>
+
+#include <asm/prom.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/udbg.h>
+#include <asm/firmware.h>
+#include <asm/cell-regs.h>
+
+#include "cell.h"
+#include "interrupt.h"
+
+/* Define CELL_IOMMU_REAL_UNMAP to actually unmap non-used pages
+ * instead of leaving them mapped to some dummy page. This can be
+ * enabled once the appropriate workarounds for spider bugs have
+ * been enabled
+ */
+#define CELL_IOMMU_REAL_UNMAP
+
+/* Define CELL_IOMMU_STRICT_PROTECTION to enforce protection of
+ * IO PTEs based on the transfer direction. That can be enabled
+ * once spider-net has been fixed to pass the correct direction
+ * to the DMA mapping functions
+ */
+#define CELL_IOMMU_STRICT_PROTECTION
+
+
+#define NR_IOMMUS			2
+
+/* IOC mmap registers */
+#define IOC_Reg_Size			0x2000
+
+#define IOC_IOPT_CacheInvd		0x908
+#define IOC_IOPT_CacheInvd_NE_Mask	0xffe0000000000000ul
+#define IOC_IOPT_CacheInvd_IOPTE_Mask	0x000003fffffffff8ul
+#define IOC_IOPT_CacheInvd_Busy		0x0000000000000001ul
+
+#define IOC_IOST_Origin			0x918
+#define IOC_IOST_Origin_E		0x8000000000000000ul
+#define IOC_IOST_Origin_HW		0x0000000000000800ul
+#define IOC_IOST_Origin_HL		0x0000000000000400ul
+
+#define IOC_IO_ExcpStat			0x920
+#define IOC_IO_ExcpStat_V		0x8000000000000000ul
+#define IOC_IO_ExcpStat_SPF_Mask	0x6000000000000000ul
+#define IOC_IO_ExcpStat_SPF_S		0x6000000000000000ul
+#define IOC_IO_ExcpStat_SPF_P		0x2000000000000000ul
+#define IOC_IO_ExcpStat_ADDR_Mask	0x00000007fffff000ul
+#define IOC_IO_ExcpStat_RW_Mask		0x0000000000000800ul
+#define IOC_IO_ExcpStat_IOID_Mask	0x00000000000007fful
+
+#define IOC_IO_ExcpMask			0x928
+#define IOC_IO_ExcpMask_SFE		0x4000000000000000ul
+#define IOC_IO_ExcpMask_PFE		0x2000000000000000ul
+
+#define IOC_IOCmd_Offset		0x1000
+
+#define IOC_IOCmd_Cfg			0xc00
+#define IOC_IOCmd_Cfg_TE		0x0000800000000000ul
+
+
+/* Segment table entries */
+#define IOSTE_V			0x8000000000000000ul /* valid */
+#define IOSTE_H			0x4000000000000000ul /* cache hint */
+#define IOSTE_PT_Base_RPN_Mask  0x3ffffffffffff000ul /* base RPN of IOPT */
+#define IOSTE_NPPT_Mask		0x0000000000000fe0ul /* no. pages in IOPT */
+#define IOSTE_PS_Mask		0x0000000000000007ul /* page size */
+#define IOSTE_PS_4K		0x0000000000000001ul /*   - 4kB  */
+#define IOSTE_PS_64K		0x0000000000000003ul /*   - 64kB */
+#define IOSTE_PS_1M		0x0000000000000005ul /*   - 1MB  */
+#define IOSTE_PS_16M		0x0000000000000007ul /*   - 16MB */
+
+
+/* IOMMU sizing */
+#define IO_SEGMENT_SHIFT	28
+#define IO_PAGENO_BITS(shift)	(IO_SEGMENT_SHIFT - (shift))
+
+/* The high bit needs to be set on every DMA address */
+#define SPIDER_DMA_OFFSET	0x80000000ul
+
+struct iommu_window {
+	struct list_head list;
+	struct cbe_iommu *iommu;
+	unsigned long offset;
+	unsigned long size;
+	unsigned int ioid;
+	struct iommu_table table;
+};
+
+#define NAMESIZE 8
+struct cbe_iommu {
+	int nid;
+	char name[NAMESIZE];
+	void __iomem *xlate_regs;
+	void __iomem *cmd_regs;
+	unsigned long *stab;
+	unsigned long *ptab;
+	void *pad_page;
+	struct list_head windows;
+};
+
+/* Static array of iommus, one per node
+ *   each contains a list of windows, keyed from dma_window property
+ *   - on bus setup, look for a matching window, or create one
+ *   - on dev setup, assign iommu_table ptr
+ */
+static struct cbe_iommu iommus[NR_IOMMUS];
+static int cbe_nr_iommus;
+
+static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
+		long n_ptes)
+{
+	u64 __iomem *reg;
+	u64 val;
+	long n;
+
+	reg = iommu->xlate_regs + IOC_IOPT_CacheInvd;
+
+	while (n_ptes > 0) {
+		/* we can invalidate up to 1 << 11 PTEs at once */
+		n = min(n_ptes, 1l << 11);
+		val = (((n /*- 1*/) << 53) & IOC_IOPT_CacheInvd_NE_Mask)
+			| (__pa(pte) & IOC_IOPT_CacheInvd_IOPTE_Mask)
+		        | IOC_IOPT_CacheInvd_Busy;
+
+		out_be64(reg, val);
+		while (in_be64(reg) & IOC_IOPT_CacheInvd_Busy)
+			;
+
+		n_ptes -= n;
+		pte += n;
+	}
+}
+
+static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
+		unsigned long uaddr, enum dma_data_direction direction,
+		unsigned long attrs)
+{
+	int i;
+	unsigned long *io_pte, base_pte;
+	struct iommu_window *window =
+		container_of(tbl, struct iommu_window, table);
+
+	/* implementing proper protection causes problems with the spidernet
+	 * driver - check mapping directions later, but allow read & write by
+	 * default for now.*/
+#ifdef CELL_IOMMU_STRICT_PROTECTION
+	/* to avoid referencing a global, we use a trick here to setup the
+	 * protection bit. "prot" is setup to be 3 fields of 4 bits appended
+	 * together for each of the 3 supported direction values. It is then
+	 * shifted left so that the fields matching the desired direction
+	 * lands on the appropriate bits, and other bits are masked out.
+	 */
+	const unsigned long prot = 0xc48;
+	base_pte =
+		((prot << (52 + 4 * direction)) &
+		 (CBE_IOPTE_PP_W | CBE_IOPTE_PP_R)) |
+		CBE_IOPTE_M | CBE_IOPTE_SO_RW |
+		(window->ioid & CBE_IOPTE_IOID_Mask);
+#else
+	base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M |
+		CBE_IOPTE_SO_RW | (window->ioid & CBE_IOPTE_IOID_Mask);
+#endif
+	if (unlikely(attrs & DMA_ATTR_WEAK_ORDERING))
+		base_pte &= ~CBE_IOPTE_SO_RW;
+
+	io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
+
+	for (i = 0; i < npages; i++, uaddr += (1 << tbl->it_page_shift))
+		io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask);
+
+	mb();
+
+	invalidate_tce_cache(window->iommu, io_pte, npages);
+
+	pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
+		 index, npages, direction, base_pte);
+	return 0;
+}
+
+static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
+{
+
+	int i;
+	unsigned long *io_pte, pte;
+	struct iommu_window *window =
+		container_of(tbl, struct iommu_window, table);
+
+	pr_debug("tce_free_cell(index=%lx,n=%lx)\n", index, npages);
+
+#ifdef CELL_IOMMU_REAL_UNMAP
+	pte = 0;
+#else
+	/* spider bridge does PCI reads after freeing - insert a mapping
+	 * to a scratch page instead of an invalid entry */
+	pte = CBE_IOPTE_PP_R | CBE_IOPTE_M | CBE_IOPTE_SO_RW |
+		__pa(window->iommu->pad_page) |
+		(window->ioid & CBE_IOPTE_IOID_Mask);
+#endif
+
+	io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
+
+	for (i = 0; i < npages; i++)
+		io_pte[i] = pte;
+
+	mb();
+
+	invalidate_tce_cache(window->iommu, io_pte, npages);
+}
+
+static irqreturn_t ioc_interrupt(int irq, void *data)
+{
+	unsigned long stat, spf;
+	struct cbe_iommu *iommu = data;
+
+	stat = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat);
+	spf = stat & IOC_IO_ExcpStat_SPF_Mask;
+
+	/* Might want to rate limit it */
+	printk(KERN_ERR "iommu: DMA exception 0x%016lx\n", stat);
+	printk(KERN_ERR "  V=%d, SPF=[%c%c], RW=%s, IOID=0x%04x\n",
+	       !!(stat & IOC_IO_ExcpStat_V),
+	       (spf == IOC_IO_ExcpStat_SPF_S) ? 'S' : ' ',
+	       (spf == IOC_IO_ExcpStat_SPF_P) ? 'P' : ' ',
+	       (stat & IOC_IO_ExcpStat_RW_Mask) ? "Read" : "Write",
+	       (unsigned int)(stat & IOC_IO_ExcpStat_IOID_Mask));
+	printk(KERN_ERR "  page=0x%016lx\n",
+	       stat & IOC_IO_ExcpStat_ADDR_Mask);
+
+	/* clear interrupt */
+	stat &= ~IOC_IO_ExcpStat_V;
+	out_be64(iommu->xlate_regs + IOC_IO_ExcpStat, stat);
+
+	return IRQ_HANDLED;
+}
+
+static int __init cell_iommu_find_ioc(int nid, unsigned long *base)
+{
+	struct device_node *np;
+	struct resource r;
+
+	*base = 0;
+
+	/* First look for new style /be nodes */
+	for_each_node_by_name(np, "ioc") {
+		if (of_node_to_nid(np) != nid)
+			continue;
+		if (of_address_to_resource(np, 0, &r)) {
+			printk(KERN_ERR "iommu: can't get address for %pOF\n",
+			       np);
+			continue;
+		}
+		*base = r.start;
+		of_node_put(np);
+		return 0;
+	}
+
+	/* Ok, let's try the old way */
+	for_each_node_by_type(np, "cpu") {
+		const unsigned int *nidp;
+		const unsigned long *tmp;
+
+		nidp = of_get_property(np, "node-id", NULL);
+		if (nidp && *nidp == nid) {
+			tmp = of_get_property(np, "ioc-translation", NULL);
+			if (tmp) {
+				*base = *tmp;
+				of_node_put(np);
+				return 0;
+			}
+		}
+	}
+
+	return -ENODEV;
+}
+
+static void __init cell_iommu_setup_stab(struct cbe_iommu *iommu,
+				unsigned long dbase, unsigned long dsize,
+				unsigned long fbase, unsigned long fsize)
+{
+	struct page *page;
+	unsigned long segments, stab_size;
+
+	segments = max(dbase + dsize, fbase + fsize) >> IO_SEGMENT_SHIFT;
+
+	pr_debug("%s: iommu[%d]: segments: %lu\n",
+			__func__, iommu->nid, segments);
+
+	/* set up the segment table */
+	stab_size = segments * sizeof(unsigned long);
+	page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(stab_size));
+	BUG_ON(!page);
+	iommu->stab = page_address(page);
+	memset(iommu->stab, 0, stab_size);
+}
+
+static unsigned long *__init cell_iommu_alloc_ptab(struct cbe_iommu *iommu,
+		unsigned long base, unsigned long size, unsigned long gap_base,
+		unsigned long gap_size, unsigned long page_shift)
+{
+	struct page *page;
+	int i;
+	unsigned long reg, segments, pages_per_segment, ptab_size,
+		      n_pte_pages, start_seg, *ptab;
+
+	start_seg = base >> IO_SEGMENT_SHIFT;
+	segments  = size >> IO_SEGMENT_SHIFT;
+	pages_per_segment = 1ull << IO_PAGENO_BITS(page_shift);
+	/* PTEs for each segment must start on a 4K boundary */
+	pages_per_segment = max(pages_per_segment,
+				(1 << 12) / sizeof(unsigned long));
+
+	ptab_size = segments * pages_per_segment * sizeof(unsigned long);
+	pr_debug("%s: iommu[%d]: ptab_size: %lu, order: %d\n", __func__,
+			iommu->nid, ptab_size, get_order(ptab_size));
+	page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(ptab_size));
+	BUG_ON(!page);
+
+	ptab = page_address(page);
+	memset(ptab, 0, ptab_size);
+
+	/* number of 4K pages needed for a page table */
+	n_pte_pages = (pages_per_segment * sizeof(unsigned long)) >> 12;
+
+	pr_debug("%s: iommu[%d]: stab at %p, ptab at %p, n_pte_pages: %lu\n",
+			__func__, iommu->nid, iommu->stab, ptab,
+			n_pte_pages);
+
+	/* initialise the STEs */
+	reg = IOSTE_V | ((n_pte_pages - 1) << 5);
+
+	switch (page_shift) {
+	case 12: reg |= IOSTE_PS_4K;  break;
+	case 16: reg |= IOSTE_PS_64K; break;
+	case 20: reg |= IOSTE_PS_1M;  break;
+	case 24: reg |= IOSTE_PS_16M; break;
+	default: BUG();
+	}
+
+	gap_base = gap_base >> IO_SEGMENT_SHIFT;
+	gap_size = gap_size >> IO_SEGMENT_SHIFT;
+
+	pr_debug("Setting up IOMMU stab:\n");
+	for (i = start_seg; i < (start_seg + segments); i++) {
+		if (i >= gap_base && i < (gap_base + gap_size)) {
+			pr_debug("\toverlap at %d, skipping\n", i);
+			continue;
+		}
+		iommu->stab[i] = reg | (__pa(ptab) + (n_pte_pages << 12) *
+					(i - start_seg));
+		pr_debug("\t[%d] 0x%016lx\n", i, iommu->stab[i]);
+	}
+
+	return ptab;
+}
+
+static void __init cell_iommu_enable_hardware(struct cbe_iommu *iommu)
+{
+	int ret;
+	unsigned long reg, xlate_base;
+	unsigned int virq;
+
+	if (cell_iommu_find_ioc(iommu->nid, &xlate_base))
+		panic("%s: missing IOC register mappings for node %d\n",
+		      __func__, iommu->nid);
+
+	iommu->xlate_regs = ioremap(xlate_base, IOC_Reg_Size);
+	iommu->cmd_regs = iommu->xlate_regs + IOC_IOCmd_Offset;
+
+	/* ensure that the STEs have updated */
+	mb();
+
+	/* setup interrupts for the iommu. */
+	reg = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat);
+	out_be64(iommu->xlate_regs + IOC_IO_ExcpStat,
+			reg & ~IOC_IO_ExcpStat_V);
+	out_be64(iommu->xlate_regs + IOC_IO_ExcpMask,
+			IOC_IO_ExcpMask_PFE | IOC_IO_ExcpMask_SFE);
+
+	virq = irq_create_mapping(NULL,
+			IIC_IRQ_IOEX_ATI | (iommu->nid << IIC_IRQ_NODE_SHIFT));
+	BUG_ON(!virq);
+
+	ret = request_irq(virq, ioc_interrupt, 0, iommu->name, iommu);
+	BUG_ON(ret);
+
+	/* set the IOC segment table origin register (and turn on the iommu) */
+	reg = IOC_IOST_Origin_E | __pa(iommu->stab) | IOC_IOST_Origin_HW;
+	out_be64(iommu->xlate_regs + IOC_IOST_Origin, reg);
+	in_be64(iommu->xlate_regs + IOC_IOST_Origin);
+
+	/* turn on IO translation */
+	reg = in_be64(iommu->cmd_regs + IOC_IOCmd_Cfg) | IOC_IOCmd_Cfg_TE;
+	out_be64(iommu->cmd_regs + IOC_IOCmd_Cfg, reg);
+}
+
+static void __init cell_iommu_setup_hardware(struct cbe_iommu *iommu,
+	unsigned long base, unsigned long size)
+{
+	cell_iommu_setup_stab(iommu, base, size, 0, 0);
+	iommu->ptab = cell_iommu_alloc_ptab(iommu, base, size, 0, 0,
+					    IOMMU_PAGE_SHIFT_4K);
+	cell_iommu_enable_hardware(iommu);
+}
+
+#if 0/* Unused for now */
+static struct iommu_window *find_window(struct cbe_iommu *iommu,
+		unsigned long offset, unsigned long size)
+{
+	struct iommu_window *window;
+
+	/* todo: check for overlapping (but not equal) windows) */
+
+	list_for_each_entry(window, &(iommu->windows), list) {
+		if (window->offset == offset && window->size == size)
+			return window;
+	}
+
+	return NULL;
+}
+#endif
+
+static inline u32 cell_iommu_get_ioid(struct device_node *np)
+{
+	const u32 *ioid;
+
+	ioid = of_get_property(np, "ioid", NULL);
+	if (ioid == NULL) {
+		printk(KERN_WARNING "iommu: missing ioid for %pOF using 0\n",
+		       np);
+		return 0;
+	}
+
+	return *ioid;
+}
+
+static struct iommu_table_ops cell_iommu_ops = {
+	.set = tce_build_cell,
+	.clear = tce_free_cell
+};
+
+static struct iommu_window * __init
+cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
+			unsigned long offset, unsigned long size,
+			unsigned long pte_offset)
+{
+	struct iommu_window *window;
+	struct page *page;
+	u32 ioid;
+
+	ioid = cell_iommu_get_ioid(np);
+
+	window = kzalloc_node(sizeof(*window), GFP_KERNEL, iommu->nid);
+	BUG_ON(window == NULL);
+
+	window->offset = offset;
+	window->size = size;
+	window->ioid = ioid;
+	window->iommu = iommu;
+
+	window->table.it_blocksize = 16;
+	window->table.it_base = (unsigned long)iommu->ptab;
+	window->table.it_index = iommu->nid;
+	window->table.it_page_shift = IOMMU_PAGE_SHIFT_4K;
+	window->table.it_offset =
+		(offset >> window->table.it_page_shift) + pte_offset;
+	window->table.it_size = size >> window->table.it_page_shift;
+	window->table.it_ops = &cell_iommu_ops;
+
+	if (!iommu_init_table(&window->table, iommu->nid, 0, 0))
+		panic("Failed to initialize iommu table");
+
+	pr_debug("\tioid      %d\n", window->ioid);
+	pr_debug("\tblocksize %ld\n", window->table.it_blocksize);
+	pr_debug("\tbase      0x%016lx\n", window->table.it_base);
+	pr_debug("\toffset    0x%lx\n", window->table.it_offset);
+	pr_debug("\tsize      %ld\n", window->table.it_size);
+
+	list_add(&window->list, &iommu->windows);
+
+	if (offset != 0)
+		return window;
+
+	/* We need to map and reserve the first IOMMU page since it's used
+	 * by the spider workaround. In theory, we only need to do that when
+	 * running on spider but it doesn't really matter.
+	 *
+	 * This code also assumes that we have a window that starts at 0,
+	 * which is the case on all spider based blades.
+	 */
+	page = alloc_pages_node(iommu->nid, GFP_KERNEL, 0);
+	BUG_ON(!page);
+	iommu->pad_page = page_address(page);
+	clear_page(iommu->pad_page);
+
+	__set_bit(0, window->table.it_map);
+	tce_build_cell(&window->table, window->table.it_offset, 1,
+		       (unsigned long)iommu->pad_page, DMA_TO_DEVICE, 0);
+
+	return window;
+}
+
+static struct cbe_iommu *cell_iommu_for_node(int nid)
+{
+	int i;
+
+	for (i = 0; i < cbe_nr_iommus; i++)
+		if (iommus[i].nid == nid)
+			return &iommus[i];
+	return NULL;
+}
+
+static unsigned long cell_dma_nommu_offset;
+
+static unsigned long dma_iommu_fixed_base;
+static bool cell_iommu_enabled;
+
+/* iommu_fixed_is_weak is set if booted with iommu_fixed=weak */
+bool iommu_fixed_is_weak;
+
+static struct iommu_table *cell_get_iommu_table(struct device *dev)
+{
+	struct iommu_window *window;
+	struct cbe_iommu *iommu;
+
+	/* Current implementation uses the first window available in that
+	 * node's iommu. We -might- do something smarter later though it may
+	 * never be necessary
+	 */
+	iommu = cell_iommu_for_node(dev_to_node(dev));
+	if (iommu == NULL || list_empty(&iommu->windows)) {
+		dev_err(dev, "iommu: missing iommu for %pOF (node %d)\n",
+		       dev->of_node, dev_to_node(dev));
+		return NULL;
+	}
+	window = list_entry(iommu->windows.next, struct iommu_window, list);
+
+	return &window->table;
+}
+
+static u64 cell_iommu_get_fixed_address(struct device *dev);
+
+static void cell_dma_dev_setup(struct device *dev)
+{
+	if (cell_iommu_enabled) {
+		u64 addr = cell_iommu_get_fixed_address(dev);
+
+		if (addr != OF_BAD_ADDR)
+			dev->archdata.dma_offset = addr + dma_iommu_fixed_base;
+		set_iommu_table_base(dev, cell_get_iommu_table(dev));
+	} else {
+		dev->archdata.dma_offset = cell_dma_nommu_offset;
+	}
+}
+
+static void cell_pci_dma_dev_setup(struct pci_dev *dev)
+{
+	cell_dma_dev_setup(&dev->dev);
+}
+
+static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action,
+			      void *data)
+{
+	struct device *dev = data;
+
+	/* We are only interested in device addition */
+	if (action != BUS_NOTIFY_ADD_DEVICE)
+		return 0;
+
+	if (cell_iommu_enabled)
+		dev->dma_ops = &dma_iommu_ops;
+	cell_dma_dev_setup(dev);
+	return 0;
+}
+
+static struct notifier_block cell_of_bus_notifier = {
+	.notifier_call = cell_of_bus_notify
+};
+
+static int __init cell_iommu_get_window(struct device_node *np,
+					 unsigned long *base,
+					 unsigned long *size)
+{
+	const __be32 *dma_window;
+	unsigned long index;
+
+	/* Use ibm,dma-window if available, else, hard code ! */
+	dma_window = of_get_property(np, "ibm,dma-window", NULL);
+	if (dma_window == NULL) {
+		*base = 0;
+		*size = 0x80000000u;
+		return -ENODEV;
+	}
+
+	of_parse_dma_window(np, dma_window, &index, base, size);
+	return 0;
+}
+
+static struct cbe_iommu * __init cell_iommu_alloc(struct device_node *np)
+{
+	struct cbe_iommu *iommu;
+	int nid, i;
+
+	/* Get node ID */
+	nid = of_node_to_nid(np);
+	if (nid < 0) {
+		printk(KERN_ERR "iommu: failed to get node for %pOF\n",
+		       np);
+		return NULL;
+	}
+	pr_debug("iommu: setting up iommu for node %d (%pOF)\n",
+		 nid, np);
+
+	/* XXX todo: If we can have multiple windows on the same IOMMU, which
+	 * isn't the case today, we probably want here to check whether the
+	 * iommu for that node is already setup.
+	 * However, there might be issue with getting the size right so let's
+	 * ignore that for now. We might want to completely get rid of the
+	 * multiple window support since the cell iommu supports per-page ioids
+	 */
+
+	if (cbe_nr_iommus >= NR_IOMMUS) {
+		printk(KERN_ERR "iommu: too many IOMMUs detected ! (%pOF)\n",
+		       np);
+		return NULL;
+	}
+
+	/* Init base fields */
+	i = cbe_nr_iommus++;
+	iommu = &iommus[i];
+	iommu->stab = NULL;
+	iommu->nid = nid;
+	snprintf(iommu->name, sizeof(iommu->name), "iommu%d", i);
+	INIT_LIST_HEAD(&iommu->windows);
+
+	return iommu;
+}
+
+static void __init cell_iommu_init_one(struct device_node *np,
+				       unsigned long offset)
+{
+	struct cbe_iommu *iommu;
+	unsigned long base, size;
+
+	iommu = cell_iommu_alloc(np);
+	if (!iommu)
+		return;
+
+	/* Obtain a window for it */
+	cell_iommu_get_window(np, &base, &size);
+
+	pr_debug("\ttranslating window 0x%lx...0x%lx\n",
+		 base, base + size - 1);
+
+	/* Initialize the hardware */
+	cell_iommu_setup_hardware(iommu, base, size);
+
+	/* Setup the iommu_table */
+	cell_iommu_setup_window(iommu, np, base, size,
+				offset >> IOMMU_PAGE_SHIFT_4K);
+}
+
+static void __init cell_disable_iommus(void)
+{
+	int node;
+	unsigned long base, val;
+	void __iomem *xregs, *cregs;
+
+	/* Make sure IOC translation is disabled on all nodes */
+	for_each_online_node(node) {
+		if (cell_iommu_find_ioc(node, &base))
+			continue;
+		xregs = ioremap(base, IOC_Reg_Size);
+		if (xregs == NULL)
+			continue;
+		cregs = xregs + IOC_IOCmd_Offset;
+
+		pr_debug("iommu: cleaning up iommu on node %d\n", node);
+
+		out_be64(xregs + IOC_IOST_Origin, 0);
+		(void)in_be64(xregs + IOC_IOST_Origin);
+		val = in_be64(cregs + IOC_IOCmd_Cfg);
+		val &= ~IOC_IOCmd_Cfg_TE;
+		out_be64(cregs + IOC_IOCmd_Cfg, val);
+		(void)in_be64(cregs + IOC_IOCmd_Cfg);
+
+		iounmap(xregs);
+	}
+}
+
+static int __init cell_iommu_init_disabled(void)
+{
+	struct device_node *np = NULL;
+	unsigned long base = 0, size;
+
+	/* When no iommu is present, we use direct DMA ops */
+
+	/* First make sure all IOC translation is turned off */
+	cell_disable_iommus();
+
+	/* If we have no Axon, we set up the spider DMA magic offset */
+	np = of_find_node_by_name(NULL, "axon");
+	if (!np)
+		cell_dma_nommu_offset = SPIDER_DMA_OFFSET;
+	of_node_put(np);
+
+	/* Now we need to check to see where the memory is mapped
+	 * in PCI space. We assume that all busses use the same dma
+	 * window which is always the case so far on Cell, thus we
+	 * pick up the first pci-internal node we can find and check
+	 * the DMA window from there.
+	 */
+	for_each_node_by_name(np, "axon") {
+		if (np->parent == NULL || np->parent->parent != NULL)
+			continue;
+		if (cell_iommu_get_window(np, &base, &size) == 0)
+			break;
+	}
+	if (np == NULL) {
+		for_each_node_by_name(np, "pci-internal") {
+			if (np->parent == NULL || np->parent->parent != NULL)
+				continue;
+			if (cell_iommu_get_window(np, &base, &size) == 0)
+				break;
+		}
+	}
+	of_node_put(np);
+
+	/* If we found a DMA window, we check if it's big enough to enclose
+	 * all of physical memory. If not, we force enable IOMMU
+	 */
+	if (np && size < memblock_end_of_DRAM()) {
+		printk(KERN_WARNING "iommu: force-enabled, dma window"
+		       " (%ldMB) smaller than total memory (%lldMB)\n",
+		       size >> 20, memblock_end_of_DRAM() >> 20);
+		return -ENODEV;
+	}
+
+	cell_dma_nommu_offset += base;
+
+	if (cell_dma_nommu_offset != 0)
+		cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
+
+	printk("iommu: disabled, direct DMA offset is 0x%lx\n",
+	       cell_dma_nommu_offset);
+
+	return 0;
+}
+
+/*
+ *  Fixed IOMMU mapping support
+ *
+ *  This code adds support for setting up a fixed IOMMU mapping on certain
+ *  cell machines. For 64-bit devices this avoids the performance overhead of
+ *  mapping and unmapping pages at runtime. 32-bit devices are unable to use
+ *  the fixed mapping.
+ *
+ *  The fixed mapping is established at boot, and maps all of physical memory
+ *  1:1 into device space at some offset. On machines with < 30 GB of memory
+ *  we setup the fixed mapping immediately above the normal IOMMU window.
+ *
+ *  For example a machine with 4GB of memory would end up with the normal
+ *  IOMMU window from 0-2GB and the fixed mapping window from 2GB to 6GB. In
+ *  this case a 64-bit device wishing to DMA to 1GB would be told to DMA to
+ *  3GB, plus any offset required by firmware. The firmware offset is encoded
+ *  in the "dma-ranges" property.
+ *
+ *  On machines with 30GB or more of memory, we are unable to place the fixed
+ *  mapping above the normal IOMMU window as we would run out of address space.
+ *  Instead we move the normal IOMMU window to coincide with the hash page
+ *  table, this region does not need to be part of the fixed mapping as no
+ *  device should ever be DMA'ing to it. We then setup the fixed mapping
+ *  from 0 to 32GB.
+ */
+
+static u64 cell_iommu_get_fixed_address(struct device *dev)
+{
+	u64 cpu_addr, size, best_size, dev_addr = OF_BAD_ADDR;
+	struct device_node *np;
+	const u32 *ranges = NULL;
+	int i, len, best, naddr, nsize, pna, range_size;
+
+	/* We can be called for platform devices that have no of_node */
+	np = of_node_get(dev->of_node);
+	if (!np)
+		goto out;
+
+	while (1) {
+		naddr = of_n_addr_cells(np);
+		nsize = of_n_size_cells(np);
+		np = of_get_next_parent(np);
+		if (!np)
+			break;
+
+		ranges = of_get_property(np, "dma-ranges", &len);
+
+		/* Ignore empty ranges, they imply no translation required */
+		if (ranges && len > 0)
+			break;
+	}
+
+	if (!ranges) {
+		dev_dbg(dev, "iommu: no dma-ranges found\n");
+		goto out;
+	}
+
+	len /= sizeof(u32);
+
+	pna = of_n_addr_cells(np);
+	range_size = naddr + nsize + pna;
+
+	/* dma-ranges format:
+	 * child addr	: naddr cells
+	 * parent addr	: pna cells
+	 * size		: nsize cells
+	 */
+	for (i = 0, best = -1, best_size = 0; i < len; i += range_size) {
+		cpu_addr = of_translate_dma_address(np, ranges + i + naddr);
+		size = of_read_number(ranges + i + naddr + pna, nsize);
+
+		if (cpu_addr == 0 && size > best_size) {
+			best = i;
+			best_size = size;
+		}
+	}
+
+	if (best >= 0) {
+		dev_addr = of_read_number(ranges + best, naddr);
+	} else
+		dev_dbg(dev, "iommu: no suitable range found!\n");
+
+out:
+	of_node_put(np);
+
+	return dev_addr;
+}
+
+static bool cell_pci_iommu_bypass_supported(struct pci_dev *pdev, u64 mask)
+{
+	return mask == DMA_BIT_MASK(64) &&
+		cell_iommu_get_fixed_address(&pdev->dev) != OF_BAD_ADDR;
+}
+
+static void __init insert_16M_pte(unsigned long addr, unsigned long *ptab,
+			   unsigned long base_pte)
+{
+	unsigned long segment, offset;
+
+	segment = addr >> IO_SEGMENT_SHIFT;
+	offset = (addr >> 24) - (segment << IO_PAGENO_BITS(24));
+	ptab = ptab + (segment * (1 << 12) / sizeof(unsigned long));
+
+	pr_debug("iommu: addr %lx ptab %p segment %lx offset %lx\n",
+		  addr, ptab, segment, offset);
+
+	ptab[offset] = base_pte | (__pa(addr) & CBE_IOPTE_RPN_Mask);
+}
+
+static void __init cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu,
+	struct device_node *np, unsigned long dbase, unsigned long dsize,
+	unsigned long fbase, unsigned long fsize)
+{
+	unsigned long base_pte, uaddr, ioaddr, *ptab;
+
+	ptab = cell_iommu_alloc_ptab(iommu, fbase, fsize, dbase, dsize, 24);
+
+	dma_iommu_fixed_base = fbase;
+
+	pr_debug("iommu: mapping 0x%lx pages from 0x%lx\n", fsize, fbase);
+
+	base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M |
+		(cell_iommu_get_ioid(np) & CBE_IOPTE_IOID_Mask);
+
+	if (iommu_fixed_is_weak)
+		pr_info("IOMMU: Using weak ordering for fixed mapping\n");
+	else {
+		pr_info("IOMMU: Using strong ordering for fixed mapping\n");
+		base_pte |= CBE_IOPTE_SO_RW;
+	}
+
+	for (uaddr = 0; uaddr < fsize; uaddr += (1 << 24)) {
+		/* Don't touch the dynamic region */
+		ioaddr = uaddr + fbase;
+		if (ioaddr >= dbase && ioaddr < (dbase + dsize)) {
+			pr_debug("iommu: fixed/dynamic overlap, skipping\n");
+			continue;
+		}
+
+		insert_16M_pte(uaddr, ptab, base_pte);
+	}
+
+	mb();
+}
+
+static int __init cell_iommu_fixed_mapping_init(void)
+{
+	unsigned long dbase, dsize, fbase, fsize, hbase, hend;
+	struct cbe_iommu *iommu;
+	struct device_node *np;
+
+	/* The fixed mapping is only supported on axon machines */
+	np = of_find_node_by_name(NULL, "axon");
+	of_node_put(np);
+
+	if (!np) {
+		pr_debug("iommu: fixed mapping disabled, no axons found\n");
+		return -1;
+	}
+
+	/* We must have dma-ranges properties for fixed mapping to work */
+	np = of_find_node_with_property(NULL, "dma-ranges");
+	of_node_put(np);
+
+	if (!np) {
+		pr_debug("iommu: no dma-ranges found, no fixed mapping\n");
+		return -1;
+	}
+
+	/* The default setup is to have the fixed mapping sit after the
+	 * dynamic region, so find the top of the largest IOMMU window
+	 * on any axon, then add the size of RAM and that's our max value.
+	 * If that is > 32GB we have to do other shennanigans.
+	 */
+	fbase = 0;
+	for_each_node_by_name(np, "axon") {
+		cell_iommu_get_window(np, &dbase, &dsize);
+		fbase = max(fbase, dbase + dsize);
+	}
+
+	fbase = ALIGN(fbase, 1 << IO_SEGMENT_SHIFT);
+	fsize = memblock_phys_mem_size();
+
+	if ((fbase + fsize) <= 0x800000000ul)
+		hbase = 0; /* use the device tree window */
+	else {
+		/* If we're over 32 GB we need to cheat. We can't map all of
+		 * RAM with the fixed mapping, and also fit the dynamic
+		 * region. So try to place the dynamic region where the hash
+		 * table sits, drivers never need to DMA to it, we don't
+		 * need a fixed mapping for that area.
+		 */
+		if (!htab_address) {
+			pr_debug("iommu: htab is NULL, on LPAR? Huh?\n");
+			return -1;
+		}
+		hbase = __pa(htab_address);
+		hend  = hbase + htab_size_bytes;
+
+		/* The window must start and end on a segment boundary */
+		if ((hbase != ALIGN(hbase, 1 << IO_SEGMENT_SHIFT)) ||
+		    (hend != ALIGN(hend, 1 << IO_SEGMENT_SHIFT))) {
+			pr_debug("iommu: hash window not segment aligned\n");
+			return -1;
+		}
+
+		/* Check the hash window fits inside the real DMA window */
+		for_each_node_by_name(np, "axon") {
+			cell_iommu_get_window(np, &dbase, &dsize);
+
+			if (hbase < dbase || (hend > (dbase + dsize))) {
+				pr_debug("iommu: hash window doesn't fit in"
+					 "real DMA window\n");
+				of_node_put(np);
+				return -1;
+			}
+		}
+
+		fbase = 0;
+	}
+
+	/* Setup the dynamic regions */
+	for_each_node_by_name(np, "axon") {
+		iommu = cell_iommu_alloc(np);
+		BUG_ON(!iommu);
+
+		if (hbase == 0)
+			cell_iommu_get_window(np, &dbase, &dsize);
+		else {
+			dbase = hbase;
+			dsize = htab_size_bytes;
+		}
+
+		printk(KERN_DEBUG "iommu: node %d, dynamic window 0x%lx-0x%lx "
+			"fixed window 0x%lx-0x%lx\n", iommu->nid, dbase,
+			 dbase + dsize, fbase, fbase + fsize);
+
+		cell_iommu_setup_stab(iommu, dbase, dsize, fbase, fsize);
+		iommu->ptab = cell_iommu_alloc_ptab(iommu, dbase, dsize, 0, 0,
+						    IOMMU_PAGE_SHIFT_4K);
+		cell_iommu_setup_fixed_ptab(iommu, np, dbase, dsize,
+					     fbase, fsize);
+		cell_iommu_enable_hardware(iommu);
+		cell_iommu_setup_window(iommu, np, dbase, dsize, 0);
+	}
+
+	cell_pci_controller_ops.iommu_bypass_supported =
+		cell_pci_iommu_bypass_supported;
+	return 0;
+}
+
+static int iommu_fixed_disabled;
+
+static int __init setup_iommu_fixed(char *str)
+{
+	struct device_node *pciep;
+
+	if (strcmp(str, "off") == 0)
+		iommu_fixed_disabled = 1;
+
+	/* If we can find a pcie-endpoint in the device tree assume that
+	 * we're on a triblade or a CAB so by default the fixed mapping
+	 * should be set to be weakly ordered; but only if the boot
+	 * option WASN'T set for strong ordering
+	 */
+	pciep = of_find_node_by_type(NULL, "pcie-endpoint");
+
+	if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
+		iommu_fixed_is_weak = true;
+
+	of_node_put(pciep);
+
+	return 1;
+}
+__setup("iommu_fixed=", setup_iommu_fixed);
+
+static int __init cell_iommu_init(void)
+{
+	struct device_node *np;
+
+	/* If IOMMU is disabled or we have little enough RAM to not need
+	 * to enable it, we setup a direct mapping.
+	 *
+	 * Note: should we make sure we have the IOMMU actually disabled ?
+	 */
+	if (iommu_is_off ||
+	    (!iommu_force_on && memblock_end_of_DRAM() <= 0x80000000ull))
+		if (cell_iommu_init_disabled() == 0)
+			goto bail;
+
+	/* Setup various callbacks */
+	cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
+
+	if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0)
+		goto done;
+
+	/* Create an iommu for each /axon node.  */
+	for_each_node_by_name(np, "axon") {
+		if (np->parent == NULL || np->parent->parent != NULL)
+			continue;
+		cell_iommu_init_one(np, 0);
+	}
+
+	/* Create an iommu for each toplevel /pci-internal node for
+	 * old hardware/firmware
+	 */
+	for_each_node_by_name(np, "pci-internal") {
+		if (np->parent == NULL || np->parent->parent != NULL)
+			continue;
+		cell_iommu_init_one(np, SPIDER_DMA_OFFSET);
+	}
+ done:
+	/* Setup default PCI iommu ops */
+	set_pci_dma_ops(&dma_iommu_ops);
+	cell_iommu_enabled = true;
+ bail:
+	/* Register callbacks on OF platform device addition/removal
+	 * to handle linking them to the right DMA operations
+	 */
+	bus_register_notifier(&platform_bus_type, &cell_of_bus_notifier);
+
+	return 0;
+}
+machine_arch_initcall(cell, cell_iommu_init);
diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c
new file mode 100644
index 0000000000..58d967ee38
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pervasive.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * CBE Pervasive Monitor and Debug
+ *
+ * (C) Copyright IBM Corporation 2005
+ *
+ * Authors: Maximino Aguilar (maguilar@us.ibm.com)
+ *          Michael N. Day (mnday@us.ibm.com)
+ */
+
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/kallsyms.h>
+#include <linux/pgtable.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/reg.h>
+#include <asm/cell-regs.h>
+#include <asm/cpu_has_feature.h>
+
+#include "pervasive.h"
+#include "ras.h"
+
+static void cbe_power_save(void)
+{
+	unsigned long ctrl, thread_switch_control;
+
+	/* Ensure our interrupt state is properly tracked */
+	if (!prep_irq_for_idle())
+		return;
+
+	ctrl = mfspr(SPRN_CTRLF);
+
+	/* Enable DEC and EE interrupt request */
+	thread_switch_control  = mfspr(SPRN_TSC_CELL);
+	thread_switch_control |= TSC_CELL_EE_ENABLE | TSC_CELL_EE_BOOST;
+
+	switch (ctrl & CTRL_CT) {
+	case CTRL_CT0:
+		thread_switch_control |= TSC_CELL_DEC_ENABLE_0;
+		break;
+	case CTRL_CT1:
+		thread_switch_control |= TSC_CELL_DEC_ENABLE_1;
+		break;
+	default:
+		printk(KERN_WARNING "%s: unknown configuration\n",
+			__func__);
+		break;
+	}
+	mtspr(SPRN_TSC_CELL, thread_switch_control);
+
+	/*
+	 * go into low thread priority, medium priority will be
+	 * restored for us after wake-up.
+	 */
+	HMT_low();
+
+	/*
+	 * atomically disable thread execution and runlatch.
+	 * External and Decrementer exceptions are still handled when the
+	 * thread is disabled but now enter in cbe_system_reset_exception()
+	 */
+	ctrl &= ~(CTRL_RUNLATCH | CTRL_TE);
+	mtspr(SPRN_CTRLT, ctrl);
+
+	/* Re-enable interrupts in MSR */
+	__hard_irq_enable();
+}
+
+static int cbe_system_reset_exception(struct pt_regs *regs)
+{
+	switch (regs->msr & SRR1_WAKEMASK) {
+	case SRR1_WAKEDEC:
+		set_dec(1);
+		break;
+	case SRR1_WAKEEE:
+		/*
+		 * Handle these when interrupts get re-enabled and we take
+		 * them as regular exceptions. We are in an NMI context
+		 * and can't handle these here.
+		 */
+		break;
+	case SRR1_WAKEMT:
+		return cbe_sysreset_hack();
+#ifdef CONFIG_CBE_RAS
+	case SRR1_WAKESYSERR:
+		cbe_system_error_exception(regs);
+		break;
+	case SRR1_WAKETHERM:
+		cbe_thermal_exception(regs);
+		break;
+#endif /* CONFIG_CBE_RAS */
+	default:
+		/* do system reset */
+		return 0;
+	}
+	/* everything handled */
+	return 1;
+}
+
+void __init cbe_pervasive_init(void)
+{
+	int cpu;
+
+	if (!cpu_has_feature(CPU_FTR_PAUSE_ZERO))
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct cbe_pmd_regs __iomem *regs = cbe_get_cpu_pmd_regs(cpu);
+		if (!regs)
+			continue;
+
+		 /* Enable Pause(0) control bit */
+		out_be64(&regs->pmcr, in_be64(&regs->pmcr) |
+					    CBE_PMD_PAUSE_ZERO_CONTROL);
+	}
+
+	ppc_md.power_save = cbe_power_save;
+	ppc_md.system_reset_exception = cbe_system_reset_exception;
+}
diff --git a/arch/powerpc/platforms/cell/pervasive.h b/arch/powerpc/platforms/cell/pervasive.h
new file mode 100644
index 0000000000..0da74ab107
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pervasive.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Cell Pervasive Monitor and Debug interface and HW structures
+ *
+ * (C) Copyright IBM Corporation 2005
+ *
+ * Authors: Maximino Aguilar (maguilar@us.ibm.com)
+ *          David J. Erb (djerb@us.ibm.com)
+ */
+
+
+#ifndef PERVASIVE_H
+#define PERVASIVE_H
+
+extern void cbe_pervasive_init(void);
+
+#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON
+extern int cbe_sysreset_hack(void);
+#else
+static inline int cbe_sysreset_hack(void)
+{
+	return 1;
+}
+#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */
+
+#endif
diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c
new file mode 100644
index 0000000000..b207a7f99b
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pmu.c
@@ -0,0 +1,412 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Cell Broadband Engine Performance Monitor
+ *
+ * (C) Copyright IBM Corporation 2001,2006
+ *
+ * Author:
+ *    David Erb (djerb@us.ibm.com)
+ *    Kevin Corry (kevcorry@us.ibm.com)
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/types.h>
+#include <linux/export.h>
+#include <asm/io.h>
+#include <asm/irq_regs.h>
+#include <asm/machdep.h>
+#include <asm/pmc.h>
+#include <asm/reg.h>
+#include <asm/spu.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+
+/*
+ * When writing to write-only mmio addresses, save a shadow copy. All of the
+ * registers are 32-bit, but stored in the upper-half of a 64-bit field in
+ * pmd_regs.
+ */
+
+#define WRITE_WO_MMIO(reg, x)					\
+	do {							\
+		u32 _x = (x);					\
+		struct cbe_pmd_regs __iomem *pmd_regs;		\
+		struct cbe_pmd_shadow_regs *shadow_regs;	\
+		pmd_regs = cbe_get_cpu_pmd_regs(cpu);		\
+		shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);	\
+		out_be64(&(pmd_regs->reg), (((u64)_x) << 32));	\
+		shadow_regs->reg = _x;				\
+	} while (0)
+
+#define READ_SHADOW_REG(val, reg)				\
+	do {							\
+		struct cbe_pmd_shadow_regs *shadow_regs;	\
+		shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);	\
+		(val) = shadow_regs->reg;			\
+	} while (0)
+
+#define READ_MMIO_UPPER32(val, reg)				\
+	do {							\
+		struct cbe_pmd_regs __iomem *pmd_regs;		\
+		pmd_regs = cbe_get_cpu_pmd_regs(cpu);		\
+		(val) = (u32)(in_be64(&pmd_regs->reg) >> 32);	\
+	} while (0)
+
+/*
+ * Physical counter registers.
+ * Each physical counter can act as one 32-bit counter or two 16-bit counters.
+ */
+
+u32 cbe_read_phys_ctr(u32 cpu, u32 phys_ctr)
+{
+	u32 val_in_latch, val = 0;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		READ_SHADOW_REG(val_in_latch, counter_value_in_latch);
+
+		/* Read the latch or the actual counter, whichever is newer. */
+		if (val_in_latch & (1 << phys_ctr)) {
+			READ_SHADOW_REG(val, pm_ctr[phys_ctr]);
+		} else {
+			READ_MMIO_UPPER32(val, pm_ctr[phys_ctr]);
+		}
+	}
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_phys_ctr);
+
+void cbe_write_phys_ctr(u32 cpu, u32 phys_ctr, u32 val)
+{
+	struct cbe_pmd_shadow_regs *shadow_regs;
+	u32 pm_ctrl;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		/* Writing to a counter only writes to a hardware latch.
+		 * The new value is not propagated to the actual counter
+		 * until the performance monitor is enabled.
+		 */
+		WRITE_WO_MMIO(pm_ctr[phys_ctr], val);
+
+		pm_ctrl = cbe_read_pm(cpu, pm_control);
+		if (pm_ctrl & CBE_PM_ENABLE_PERF_MON) {
+			/* The counters are already active, so we need to
+			 * rewrite the pm_control register to "re-enable"
+			 * the PMU.
+			 */
+			cbe_write_pm(cpu, pm_control, pm_ctrl);
+		} else {
+			shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);
+			shadow_regs->counter_value_in_latch |= (1 << phys_ctr);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(cbe_write_phys_ctr);
+
+/*
+ * "Logical" counter registers.
+ * These will read/write 16-bits or 32-bits depending on the
+ * current size of the counter. Counters 4 - 7 are always 16-bit.
+ */
+
+u32 cbe_read_ctr(u32 cpu, u32 ctr)
+{
+	u32 val;
+	u32 phys_ctr = ctr & (NR_PHYS_CTRS - 1);
+
+	val = cbe_read_phys_ctr(cpu, phys_ctr);
+
+	if (cbe_get_ctr_size(cpu, phys_ctr) == 16)
+		val = (ctr < NR_PHYS_CTRS) ? (val >> 16) : (val & 0xffff);
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_ctr);
+
+void cbe_write_ctr(u32 cpu, u32 ctr, u32 val)
+{
+	u32 phys_ctr;
+	u32 phys_val;
+
+	phys_ctr = ctr & (NR_PHYS_CTRS - 1);
+
+	if (cbe_get_ctr_size(cpu, phys_ctr) == 16) {
+		phys_val = cbe_read_phys_ctr(cpu, phys_ctr);
+
+		if (ctr < NR_PHYS_CTRS)
+			val = (val << 16) | (phys_val & 0xffff);
+		else
+			val = (val & 0xffff) | (phys_val & 0xffff0000);
+	}
+
+	cbe_write_phys_ctr(cpu, phys_ctr, val);
+}
+EXPORT_SYMBOL_GPL(cbe_write_ctr);
+
+/*
+ * Counter-control registers.
+ * Each "logical" counter has a corresponding control register.
+ */
+
+u32 cbe_read_pm07_control(u32 cpu, u32 ctr)
+{
+	u32 pm07_control = 0;
+
+	if (ctr < NR_CTRS)
+		READ_SHADOW_REG(pm07_control, pm07_control[ctr]);
+
+	return pm07_control;
+}
+EXPORT_SYMBOL_GPL(cbe_read_pm07_control);
+
+void cbe_write_pm07_control(u32 cpu, u32 ctr, u32 val)
+{
+	if (ctr < NR_CTRS)
+		WRITE_WO_MMIO(pm07_control[ctr], val);
+}
+EXPORT_SYMBOL_GPL(cbe_write_pm07_control);
+
+/*
+ * Other PMU control registers. Most of these are write-only.
+ */
+
+u32 cbe_read_pm(u32 cpu, enum pm_reg_name reg)
+{
+	u32 val = 0;
+
+	switch (reg) {
+	case group_control:
+		READ_SHADOW_REG(val, group_control);
+		break;
+
+	case debug_bus_control:
+		READ_SHADOW_REG(val, debug_bus_control);
+		break;
+
+	case trace_address:
+		READ_MMIO_UPPER32(val, trace_address);
+		break;
+
+	case ext_tr_timer:
+		READ_SHADOW_REG(val, ext_tr_timer);
+		break;
+
+	case pm_status:
+		READ_MMIO_UPPER32(val, pm_status);
+		break;
+
+	case pm_control:
+		READ_SHADOW_REG(val, pm_control);
+		break;
+
+	case pm_interval:
+		READ_MMIO_UPPER32(val, pm_interval);
+		break;
+
+	case pm_start_stop:
+		READ_SHADOW_REG(val, pm_start_stop);
+		break;
+	}
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_pm);
+
+void cbe_write_pm(u32 cpu, enum pm_reg_name reg, u32 val)
+{
+	switch (reg) {
+	case group_control:
+		WRITE_WO_MMIO(group_control, val);
+		break;
+
+	case debug_bus_control:
+		WRITE_WO_MMIO(debug_bus_control, val);
+		break;
+
+	case trace_address:
+		WRITE_WO_MMIO(trace_address, val);
+		break;
+
+	case ext_tr_timer:
+		WRITE_WO_MMIO(ext_tr_timer, val);
+		break;
+
+	case pm_status:
+		WRITE_WO_MMIO(pm_status, val);
+		break;
+
+	case pm_control:
+		WRITE_WO_MMIO(pm_control, val);
+		break;
+
+	case pm_interval:
+		WRITE_WO_MMIO(pm_interval, val);
+		break;
+
+	case pm_start_stop:
+		WRITE_WO_MMIO(pm_start_stop, val);
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(cbe_write_pm);
+
+/*
+ * Get/set the size of a physical counter to either 16 or 32 bits.
+ */
+
+u32 cbe_get_ctr_size(u32 cpu, u32 phys_ctr)
+{
+	u32 pm_ctrl, size = 0;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		pm_ctrl = cbe_read_pm(cpu, pm_control);
+		size = (pm_ctrl & CBE_PM_16BIT_CTR(phys_ctr)) ? 16 : 32;
+	}
+
+	return size;
+}
+EXPORT_SYMBOL_GPL(cbe_get_ctr_size);
+
+void cbe_set_ctr_size(u32 cpu, u32 phys_ctr, u32 ctr_size)
+{
+	u32 pm_ctrl;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		pm_ctrl = cbe_read_pm(cpu, pm_control);
+		switch (ctr_size) {
+		case 16:
+			pm_ctrl |= CBE_PM_16BIT_CTR(phys_ctr);
+			break;
+
+		case 32:
+			pm_ctrl &= ~CBE_PM_16BIT_CTR(phys_ctr);
+			break;
+		}
+		cbe_write_pm(cpu, pm_control, pm_ctrl);
+	}
+}
+EXPORT_SYMBOL_GPL(cbe_set_ctr_size);
+
+/*
+ * Enable/disable the entire performance monitoring unit.
+ * When we enable the PMU, all pending writes to counters get committed.
+ */
+
+void cbe_enable_pm(u32 cpu)
+{
+	struct cbe_pmd_shadow_regs *shadow_regs;
+	u32 pm_ctrl;
+
+	shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);
+	shadow_regs->counter_value_in_latch = 0;
+
+	pm_ctrl = cbe_read_pm(cpu, pm_control) | CBE_PM_ENABLE_PERF_MON;
+	cbe_write_pm(cpu, pm_control, pm_ctrl);
+}
+EXPORT_SYMBOL_GPL(cbe_enable_pm);
+
+void cbe_disable_pm(u32 cpu)
+{
+	u32 pm_ctrl;
+	pm_ctrl = cbe_read_pm(cpu, pm_control) & ~CBE_PM_ENABLE_PERF_MON;
+	cbe_write_pm(cpu, pm_control, pm_ctrl);
+}
+EXPORT_SYMBOL_GPL(cbe_disable_pm);
+
+/*
+ * Reading from the trace_buffer.
+ * The trace buffer is two 64-bit registers. Reading from
+ * the second half automatically increments the trace_address.
+ */
+
+void cbe_read_trace_buffer(u32 cpu, u64 *buf)
+{
+	struct cbe_pmd_regs __iomem *pmd_regs = cbe_get_cpu_pmd_regs(cpu);
+
+	*buf++ = in_be64(&pmd_regs->trace_buffer_0_63);
+	*buf++ = in_be64(&pmd_regs->trace_buffer_64_127);
+}
+EXPORT_SYMBOL_GPL(cbe_read_trace_buffer);
+
+/*
+ * Enabling/disabling interrupts for the entire performance monitoring unit.
+ */
+
+u32 cbe_get_and_clear_pm_interrupts(u32 cpu)
+{
+	/* Reading pm_status clears the interrupt bits. */
+	return cbe_read_pm(cpu, pm_status);
+}
+EXPORT_SYMBOL_GPL(cbe_get_and_clear_pm_interrupts);
+
+void cbe_enable_pm_interrupts(u32 cpu, u32 thread, u32 mask)
+{
+	/* Set which node and thread will handle the next interrupt. */
+	iic_set_interrupt_routing(cpu, thread, 0);
+
+	/* Enable the interrupt bits in the pm_status register. */
+	if (mask)
+		cbe_write_pm(cpu, pm_status, mask);
+}
+EXPORT_SYMBOL_GPL(cbe_enable_pm_interrupts);
+
+void cbe_disable_pm_interrupts(u32 cpu)
+{
+	cbe_get_and_clear_pm_interrupts(cpu);
+	cbe_write_pm(cpu, pm_status, 0);
+}
+EXPORT_SYMBOL_GPL(cbe_disable_pm_interrupts);
+
+static irqreturn_t cbe_pm_irq(int irq, void *dev_id)
+{
+	perf_irq(get_irq_regs());
+	return IRQ_HANDLED;
+}
+
+static int __init cbe_init_pm_irq(void)
+{
+	unsigned int irq;
+	int rc, node;
+
+	for_each_online_node(node) {
+		irq = irq_create_mapping(NULL, IIC_IRQ_IOEX_PMI |
+					       (node << IIC_IRQ_NODE_SHIFT));
+		if (!irq) {
+			printk("ERROR: Unable to allocate irq for node %d\n",
+			       node);
+			return -EINVAL;
+		}
+
+		rc = request_irq(irq, cbe_pm_irq,
+				 0, "cbe-pmu-0", NULL);
+		if (rc) {
+			printk("ERROR: Request for irq on node %d failed\n",
+			       node);
+			return rc;
+		}
+	}
+
+	return 0;
+}
+machine_arch_initcall(cell, cbe_init_pm_irq);
+
+void cbe_sync_irq(int node)
+{
+	unsigned int irq;
+
+	irq = irq_find_mapping(NULL,
+			       IIC_IRQ_IOEX_PMI
+			       | (node << IIC_IRQ_NODE_SHIFT));
+
+	if (!irq) {
+		printk(KERN_WARNING "ERROR, unable to get existing irq %d " \
+		"for node %d\n", irq, node);
+		return;
+	}
+
+	synchronize_irq(irq);
+}
+EXPORT_SYMBOL_GPL(cbe_sync_irq);
+
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
new file mode 100644
index 0000000000..f6b8792653
--- /dev/null
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2006-2008, IBM Corporation.
+ */
+
+#undef DEBUG
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+#include <linux/of.h>
+
+#include <asm/kexec.h>
+#include <asm/reg.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/cell-regs.h>
+
+#include "ras.h"
+#include "pervasive.h"
+
+static void dump_fir(int cpu)
+{
+	struct cbe_pmd_regs __iomem *pregs = cbe_get_cpu_pmd_regs(cpu);
+	struct cbe_iic_regs __iomem *iregs = cbe_get_cpu_iic_regs(cpu);
+
+	if (pregs == NULL)
+		return;
+
+	/* Todo: do some nicer parsing of bits and based on them go down
+	 * to other sub-units FIRs and not only IIC
+	 */
+	printk(KERN_ERR "Global Checkstop FIR    : 0x%016llx\n",
+	       in_be64(&pregs->checkstop_fir));
+	printk(KERN_ERR "Global Recoverable FIR  : 0x%016llx\n",
+	       in_be64(&pregs->checkstop_fir));
+	printk(KERN_ERR "Global MachineCheck FIR : 0x%016llx\n",
+	       in_be64(&pregs->spec_att_mchk_fir));
+
+	if (iregs == NULL)
+		return;
+	printk(KERN_ERR "IOC FIR                 : 0x%016llx\n",
+	       in_be64(&iregs->ioc_fir));
+
+}
+
+DEFINE_INTERRUPT_HANDLER(cbe_system_error_exception)
+{
+	int cpu = smp_processor_id();
+
+	printk(KERN_ERR "System Error Interrupt on CPU %d !\n", cpu);
+	dump_fir(cpu);
+	dump_stack();
+}
+
+DEFINE_INTERRUPT_HANDLER(cbe_maintenance_exception)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * Nothing implemented for the maintenance interrupt at this point
+	 */
+
+	printk(KERN_ERR "Unhandled Maintenance interrupt on CPU %d !\n", cpu);
+	dump_stack();
+}
+
+DEFINE_INTERRUPT_HANDLER(cbe_thermal_exception)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * Nothing implemented for the thermal interrupt at this point
+	 */
+
+	printk(KERN_ERR "Unhandled Thermal interrupt on CPU %d !\n", cpu);
+	dump_stack();
+}
+
+static int cbe_machine_check_handler(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	printk(KERN_ERR "Machine Check Interrupt on CPU %d !\n", cpu);
+	dump_fir(cpu);
+
+	/* No recovery from this code now, lets continue */
+	return 0;
+}
+
+struct ptcal_area {
+	struct list_head list;
+	int nid;
+	int order;
+	struct page *pages;
+};
+
+static LIST_HEAD(ptcal_list);
+
+static int ptcal_start_tok, ptcal_stop_tok;
+
+static int __init cbe_ptcal_enable_on_node(int nid, int order)
+{
+	struct ptcal_area *area;
+	int ret = -ENOMEM;
+	unsigned long addr;
+
+	if (is_kdump_kernel())
+		rtas_call(ptcal_stop_tok, 1, 1, NULL, nid);
+
+	area = kmalloc(sizeof(*area), GFP_KERNEL);
+	if (!area)
+		goto out_err;
+
+	area->nid = nid;
+	area->order = order;
+	area->pages = __alloc_pages_node(area->nid,
+						GFP_KERNEL|__GFP_THISNODE,
+						area->order);
+
+	if (!area->pages) {
+		printk(KERN_WARNING "%s: no page on node %d\n",
+			__func__, area->nid);
+		goto out_free_area;
+	}
+
+	/*
+	 * We move the ptcal area to the middle of the allocated
+	 * page, in order to avoid prefetches in memcpy and similar
+	 * functions stepping on it.
+	 */
+	addr = __pa(page_address(area->pages)) + (PAGE_SIZE >> 1);
+	printk(KERN_DEBUG "%s: enabling PTCAL on node %d address=0x%016lx\n",
+			__func__, area->nid, addr);
+
+	ret = -EIO;
+	if (rtas_call(ptcal_start_tok, 3, 1, NULL, area->nid,
+				(unsigned int)(addr >> 32),
+				(unsigned int)(addr & 0xffffffff))) {
+		printk(KERN_ERR "%s: error enabling PTCAL on node %d!\n",
+				__func__, nid);
+		goto out_free_pages;
+	}
+
+	list_add(&area->list, &ptcal_list);
+
+	return 0;
+
+out_free_pages:
+	__free_pages(area->pages, area->order);
+out_free_area:
+	kfree(area);
+out_err:
+	return ret;
+}
+
+static int __init cbe_ptcal_enable(void)
+{
+	const u32 *size;
+	struct device_node *np;
+	int order, found_mic = 0;
+
+	np = of_find_node_by_path("/rtas");
+	if (!np)
+		return -ENODEV;
+
+	size = of_get_property(np, "ibm,cbe-ptcal-size", NULL);
+	if (!size) {
+		of_node_put(np);
+		return -ENODEV;
+	}
+
+	pr_debug("%s: enabling PTCAL, size = 0x%x\n", __func__, *size);
+	order = get_order(*size);
+	of_node_put(np);
+
+	/* support for malta device trees, with be@/mic@ nodes */
+	for_each_node_by_type(np, "mic-tm") {
+		cbe_ptcal_enable_on_node(of_node_to_nid(np), order);
+		found_mic = 1;
+	}
+
+	if (found_mic)
+		return 0;
+
+	/* support for older device tree - use cpu nodes */
+	for_each_node_by_type(np, "cpu") {
+		const u32 *nid = of_get_property(np, "node-id", NULL);
+		if (!nid) {
+			printk(KERN_ERR "%s: node %pOF is missing node-id?\n",
+					__func__, np);
+			continue;
+		}
+		cbe_ptcal_enable_on_node(*nid, order);
+		found_mic = 1;
+	}
+
+	return found_mic ? 0 : -ENODEV;
+}
+
+static int cbe_ptcal_disable(void)
+{
+	struct ptcal_area *area, *tmp;
+	int ret = 0;
+
+	pr_debug("%s: disabling PTCAL\n", __func__);
+
+	list_for_each_entry_safe(area, tmp, &ptcal_list, list) {
+		/* disable ptcal on this node */
+		if (rtas_call(ptcal_stop_tok, 1, 1, NULL, area->nid)) {
+			printk(KERN_ERR "%s: error disabling PTCAL "
+					"on node %d!\n", __func__,
+					area->nid);
+			ret = -EIO;
+			continue;
+		}
+
+		/* ensure we can access the PTCAL area */
+		memset(page_address(area->pages), 0,
+				1 << (area->order + PAGE_SHIFT));
+
+		/* clean up */
+		list_del(&area->list);
+		__free_pages(area->pages, area->order);
+		kfree(area);
+	}
+
+	return ret;
+}
+
+static int cbe_ptcal_notify_reboot(struct notifier_block *nb,
+		unsigned long code, void *data)
+{
+	return cbe_ptcal_disable();
+}
+
+static void cbe_ptcal_crash_shutdown(void)
+{
+	cbe_ptcal_disable();
+}
+
+static struct notifier_block cbe_ptcal_reboot_notifier = {
+	.notifier_call = cbe_ptcal_notify_reboot
+};
+
+#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON
+static int sysreset_hack;
+
+static int __init cbe_sysreset_init(void)
+{
+	struct cbe_pmd_regs __iomem *regs;
+
+	sysreset_hack = of_machine_is_compatible("IBM,CBPLUS-1.0");
+	if (!sysreset_hack)
+		return 0;
+
+	regs = cbe_get_cpu_pmd_regs(0);
+	if (!regs)
+		return 0;
+
+	/* Enable JTAG system-reset hack */
+	out_be32(&regs->fir_mode_reg,
+		in_be32(&regs->fir_mode_reg) |
+		CBE_PMD_FIR_MODE_M8);
+
+	return 0;
+}
+device_initcall(cbe_sysreset_init);
+
+int cbe_sysreset_hack(void)
+{
+	struct cbe_pmd_regs __iomem *regs;
+
+	/*
+	 * The BMC can inject user triggered system reset exceptions,
+	 * but cannot set the system reset reason in srr1,
+	 * so check an extra register here.
+	 */
+	if (sysreset_hack && (smp_processor_id() == 0)) {
+		regs = cbe_get_cpu_pmd_regs(0);
+		if (!regs)
+			return 0;
+		if (in_be64(&regs->ras_esc_0) & 0x0000ffff) {
+			out_be64(&regs->ras_esc_0, 0);
+			return 0;
+		}
+	}
+	return 1;
+}
+#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */
+
+static int __init cbe_ptcal_init(void)
+{
+	int ret;
+	ptcal_start_tok = rtas_function_token(RTAS_FN_IBM_CBE_START_PTCAL);
+	ptcal_stop_tok = rtas_function_token(RTAS_FN_IBM_CBE_STOP_PTCAL);
+
+	if (ptcal_start_tok == RTAS_UNKNOWN_SERVICE
+			|| ptcal_stop_tok == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	ret = register_reboot_notifier(&cbe_ptcal_reboot_notifier);
+	if (ret)
+		goto out1;
+
+	ret = crash_shutdown_register(&cbe_ptcal_crash_shutdown);
+	if (ret)
+		goto out2;
+
+	return cbe_ptcal_enable();
+
+out2:
+	unregister_reboot_notifier(&cbe_ptcal_reboot_notifier);
+out1:
+	printk(KERN_ERR "Can't disable PTCAL, so not enabling\n");
+	return ret;
+}
+
+arch_initcall(cbe_ptcal_init);
+
+void __init cbe_ras_init(void)
+{
+	unsigned long hid0;
+
+	/*
+	 * Enable System Error & thermal interrupts and wakeup conditions
+	 */
+
+	hid0 = mfspr(SPRN_HID0);
+	hid0 |= HID0_CBE_THERM_INT_EN | HID0_CBE_THERM_WAKEUP |
+		HID0_CBE_SYSERR_INT_EN | HID0_CBE_SYSERR_WAKEUP;
+	mtspr(SPRN_HID0, hid0);
+	mb();
+
+	/*
+	 * Install machine check handler. Leave setting of precise mode to
+	 * what the firmware did for now
+	 */
+	ppc_md.machine_check_exception = cbe_machine_check_handler;
+	mb();
+
+	/*
+	 * For now, we assume that IOC_FIR is already set to forward some
+	 * error conditions to the System Error handler. If that is not true
+	 * then it will have to be fixed up here.
+	 */
+}
diff --git a/arch/powerpc/platforms/cell/ras.h b/arch/powerpc/platforms/cell/ras.h
new file mode 100644
index 0000000000..226dbd48ef
--- /dev/null
+++ b/arch/powerpc/platforms/cell/ras.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef RAS_H
+#define RAS_H
+
+#include <asm/interrupt.h>
+
+DECLARE_INTERRUPT_HANDLER(cbe_system_error_exception);
+DECLARE_INTERRUPT_HANDLER(cbe_maintenance_exception);
+DECLARE_INTERRUPT_HANDLER(cbe_thermal_exception);
+
+extern void cbe_ras_init(void);
+
+#endif /* RAS_H */
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
new file mode 100644
index 0000000000..f64a1ef98a
--- /dev/null
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  linux/arch/powerpc/platforms/cell/cell_setup.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Adapted from 'alpha' version by Gary Thomas
+ *  Modified by Cort Dougan (cort@cs.nmt.edu)
+ *  Modified by PPC64 Team, IBM Corp
+ *  Modified by Cell Team, IBM Deutschland Entwicklung GmbH
+ */
+#undef DEBUG
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/export.h>
+#include <linux/unistd.h>
+#include <linux/user.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/console.h>
+#include <linux/mutex.h>
+#include <linux/memory_hotplug.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+
+#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/rtas.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/nvram.h>
+#include <asm/cputable.h>
+#include <asm/ppc-pci.h>
+#include <asm/irq.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/cell-regs.h>
+#include <asm/io-workarounds.h>
+
+#include "cell.h"
+#include "interrupt.h"
+#include "pervasive.h"
+#include "ras.h"
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static void cell_show_cpuinfo(struct seq_file *m)
+{
+	struct device_node *root;
+	const char *model = "";
+
+	root = of_find_node_by_path("/");
+	if (root)
+		model = of_get_property(root, "model", NULL);
+	seq_printf(m, "machine\t\t: CHRP %s\n", model);
+	of_node_put(root);
+}
+
+static void cell_progress(char *s, unsigned short hex)
+{
+	printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+static void cell_fixup_pcie_rootcomplex(struct pci_dev *dev)
+{
+	struct pci_controller *hose;
+	const char *s;
+	int i;
+
+	if (!machine_is(cell))
+		return;
+
+	/* We're searching for a direct child of the PHB */
+	if (dev->bus->self != NULL || dev->devfn != 0)
+		return;
+
+	hose = pci_bus_to_host(dev->bus);
+	if (hose == NULL)
+		return;
+
+	/* Only on PCIE */
+	if (!of_device_is_compatible(hose->dn, "pciex"))
+		return;
+
+	/* And only on axon */
+	s = of_get_property(hose->dn, "model", NULL);
+	if (!s || strcmp(s, "Axon") != 0)
+		return;
+
+	for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) {
+		dev->resource[i].start = dev->resource[i].end = 0;
+		dev->resource[i].flags = 0;
+	}
+
+	printk(KERN_DEBUG "PCI: Hiding resources on Axon PCIE RC %s\n",
+	       pci_name(dev));
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, cell_fixup_pcie_rootcomplex);
+
+static int cell_setup_phb(struct pci_controller *phb)
+{
+	const char *model;
+	struct device_node *np;
+
+	int rc = rtas_setup_phb(phb);
+	if (rc)
+		return rc;
+
+	phb->controller_ops = cell_pci_controller_ops;
+
+	np = phb->dn;
+	model = of_get_property(np, "model", NULL);
+	if (model == NULL || !of_node_name_eq(np, "pci"))
+		return 0;
+
+	/* Setup workarounds for spider */
+	if (strcmp(model, "Spider"))
+		return 0;
+
+	iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init,
+				  (void *)SPIDER_PCI_REG_BASE);
+	return 0;
+}
+
+static const struct of_device_id cell_bus_ids[] __initconst = {
+	{ .type = "soc", },
+	{ .compatible = "soc", },
+	{ .type = "spider", },
+	{ .type = "axon", },
+	{ .type = "plb5", },
+	{ .type = "plb4", },
+	{ .type = "opb", },
+	{ .type = "ebc", },
+	{},
+};
+
+static int __init cell_publish_devices(void)
+{
+	struct device_node *root = of_find_node_by_path("/");
+	struct device_node *np;
+	int node;
+
+	/* Publish OF platform devices for southbridge IOs */
+	of_platform_bus_probe(NULL, cell_bus_ids, NULL);
+
+	/* On spider based blades, we need to manually create the OF
+	 * platform devices for the PCI host bridges
+	 */
+	for_each_child_of_node(root, np) {
+		if (!of_node_is_type(np, "pci") && !of_node_is_type(np, "pciex"))
+			continue;
+		of_platform_device_create(np, NULL, NULL);
+	}
+
+	of_node_put(root);
+
+	/* There is no device for the MIC memory controller, thus we create
+	 * a platform device for it to attach the EDAC driver to.
+	 */
+	for_each_online_node(node) {
+		if (cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(node)) == NULL)
+			continue;
+		platform_device_register_simple("cbe-mic", node, NULL, 0);
+	}
+
+	return 0;
+}
+machine_subsys_initcall(cell, cell_publish_devices);
+
+static void __init mpic_init_IRQ(void)
+{
+	struct device_node *dn;
+	struct mpic *mpic;
+
+	for_each_node_by_name(dn, "interrupt-controller") {
+		if (!of_device_is_compatible(dn, "CBEA,platform-open-pic"))
+			continue;
+
+		/* The MPIC driver will get everything it needs from the
+		 * device-tree, just pass 0 to all arguments
+		 */
+		mpic = mpic_alloc(dn, 0, MPIC_SECONDARY | MPIC_NO_RESET,
+				0, 0, " MPIC     ");
+		if (mpic == NULL)
+			continue;
+		mpic_init(mpic);
+	}
+}
+
+
+static void __init cell_init_irq(void)
+{
+	iic_init_IRQ();
+	spider_init_IRQ();
+	mpic_init_IRQ();
+}
+
+static void __init cell_set_dabrx(void)
+{
+	mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
+}
+
+static void __init cell_setup_arch(void)
+{
+#ifdef CONFIG_SPU_BASE
+	spu_priv1_ops = &spu_priv1_mmio_ops;
+	spu_management_ops = &spu_management_of_ops;
+#endif
+
+	cbe_regs_init();
+
+	cell_set_dabrx();
+
+#ifdef CONFIG_CBE_RAS
+	cbe_ras_init();
+#endif
+
+#ifdef CONFIG_SMP
+	smp_init_cell();
+#endif
+	/* init to some ~sane value until calibrate_delay() runs */
+	loops_per_jiffy = 50000000;
+
+	/* Find and initialize PCI host bridges */
+	init_pci_config_tokens();
+
+	cbe_pervasive_init();
+
+	mmio_nvram_init();
+}
+
+static int __init cell_probe(void)
+{
+	if (!of_machine_is_compatible("IBM,CBEA") &&
+	    !of_machine_is_compatible("IBM,CPBW-1.0"))
+		return 0;
+
+	pm_power_off = rtas_power_off;
+
+	return 1;
+}
+
+define_machine(cell) {
+	.name			= "Cell",
+	.probe			= cell_probe,
+	.setup_arch		= cell_setup_arch,
+	.show_cpuinfo		= cell_show_cpuinfo,
+	.restart		= rtas_restart,
+	.halt			= rtas_halt,
+	.get_boot_time		= rtas_get_boot_time,
+	.get_rtc_time		= rtas_get_rtc_time,
+	.set_rtc_time		= rtas_set_rtc_time,
+	.progress		= cell_progress,
+	.init_IRQ       	= cell_init_irq,
+	.pci_setup_phb		= cell_setup_phb,
+};
+
+struct pci_controller_ops cell_pci_controller_ops;
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
new file mode 100644
index 0000000000..30394c6f88
--- /dev/null
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SMP support for BPA machines.
+ *
+ * Dave Engebretsen, Peter Bergner, and
+ * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
+ *
+ * Plus various changes from other IBM teams...
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/pgtable.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/paca.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/cputhreads.h>
+#include <asm/code-patching.h>
+
+#include "interrupt.h"
+#include <asm/udbg.h>
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/*
+ * The Primary thread of each non-boot processor was started from the OF client
+ * interface by prom_hold_cpus and is spinning on secondary_hold_spinloop.
+ */
+static cpumask_t of_spin_map;
+
+/**
+ * smp_startup_cpu() - start the given cpu
+ *
+ * At boot time, there is nothing to do for primary threads which were
+ * started from Open Firmware.  For anything else, call RTAS with the
+ * appropriate start location.
+ *
+ * Returns:
+ *	0	- failure
+ *	1	- success
+ */
+static inline int smp_startup_cpu(unsigned int lcpu)
+{
+	int status;
+	unsigned long start_here =
+			__pa(ppc_function_entry(generic_secondary_smp_init));
+	unsigned int pcpu;
+	int start_cpu;
+
+	if (cpumask_test_cpu(lcpu, &of_spin_map))
+		/* Already started by OF and sitting in spin loop */
+		return 1;
+
+	pcpu = get_hard_smp_processor_id(lcpu);
+
+	/*
+	 * If the RTAS start-cpu token does not exist then presume the
+	 * cpu is already spinning.
+	 */
+	start_cpu = rtas_function_token(RTAS_FN_START_CPU);
+	if (start_cpu == RTAS_UNKNOWN_SERVICE)
+		return 1;
+
+	status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, lcpu);
+	if (status != 0) {
+		printk(KERN_ERR "start-cpu failed: %i\n", status);
+		return 0;
+	}
+
+	return 1;
+}
+
+static void smp_cell_setup_cpu(int cpu)
+{
+	if (cpu != boot_cpuid)
+		iic_setup_cpu();
+
+	/*
+	 * change default DABRX to allow user watchpoints
+	 */
+	mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
+}
+
+static int smp_cell_kick_cpu(int nr)
+{
+	if (nr < 0 || nr >= nr_cpu_ids)
+		return -EINVAL;
+
+	if (!smp_startup_cpu(nr))
+		return -ENOENT;
+
+	/*
+	 * The processor is currently spinning, waiting for the
+	 * cpu_start field to become non-zero After we set cpu_start,
+	 * the processor will continue on to secondary_start
+	 */
+	paca_ptrs[nr]->cpu_start = 1;
+
+	return 0;
+}
+
+static struct smp_ops_t bpa_iic_smp_ops = {
+	.message_pass	= iic_message_pass,
+	.probe		= iic_request_IPIs,
+	.kick_cpu	= smp_cell_kick_cpu,
+	.setup_cpu	= smp_cell_setup_cpu,
+	.cpu_bootable	= smp_generic_cpu_bootable,
+};
+
+/* This is called very early */
+void __init smp_init_cell(void)
+{
+	int i;
+
+	DBG(" -> smp_init_cell()\n");
+
+	smp_ops = &bpa_iic_smp_ops;
+
+	/* Mark threads which are still spinning in hold loops. */
+	if (cpu_has_feature(CPU_FTR_SMT)) {
+		for_each_present_cpu(i) {
+			if (cpu_thread_in_core(i) == 0)
+				cpumask_set_cpu(i, &of_spin_map);
+		}
+	} else
+		cpumask_copy(&of_spin_map, cpu_present_mask);
+
+	cpumask_clear_cpu(boot_cpuid, &of_spin_map);
+
+	/* Non-lpar has additional take/give timebase */
+	if (rtas_function_token(RTAS_FN_FREEZE_TIME_BASE) != RTAS_UNKNOWN_SERVICE) {
+		smp_ops->give_timebase = rtas_give_timebase;
+		smp_ops->take_timebase = rtas_take_timebase;
+	}
+
+	DBG(" <- smp_init_cell()\n");
+}
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
new file mode 100644
index 0000000000..68439445b1
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IO workarounds for PCI on Celleb/Cell platform
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+
+#include <asm/ppc-pci.h>
+#include <asm/pci-bridge.h>
+#include <asm/io-workarounds.h>
+
+#define SPIDER_PCI_DISABLE_PREFETCH
+
+struct spiderpci_iowa_private {
+	void __iomem *regs;
+};
+
+static void spiderpci_io_flush(struct iowa_bus *bus)
+{
+	struct spiderpci_iowa_private *priv;
+
+	priv = bus->private;
+	in_be32(priv->regs + SPIDER_PCI_DUMMY_READ);
+	iosync();
+}
+
+#define SPIDER_PCI_MMIO_READ(name, ret)					\
+static ret spiderpci_##name(const PCI_IO_ADDR addr)			\
+{									\
+	ret val = __do_##name(addr);					\
+	spiderpci_io_flush(iowa_mem_find_bus(addr));			\
+	return val;							\
+}
+
+#define SPIDER_PCI_MMIO_READ_STR(name)					\
+static void spiderpci_##name(const PCI_IO_ADDR addr, void *buf, 	\
+			     unsigned long count)			\
+{									\
+	__do_##name(addr, buf, count);					\
+	spiderpci_io_flush(iowa_mem_find_bus(addr));			\
+}
+
+SPIDER_PCI_MMIO_READ(readb, u8)
+SPIDER_PCI_MMIO_READ(readw, u16)
+SPIDER_PCI_MMIO_READ(readl, u32)
+SPIDER_PCI_MMIO_READ(readq, u64)
+SPIDER_PCI_MMIO_READ(readw_be, u16)
+SPIDER_PCI_MMIO_READ(readl_be, u32)
+SPIDER_PCI_MMIO_READ(readq_be, u64)
+SPIDER_PCI_MMIO_READ_STR(readsb)
+SPIDER_PCI_MMIO_READ_STR(readsw)
+SPIDER_PCI_MMIO_READ_STR(readsl)
+
+static void spiderpci_memcpy_fromio(void *dest, const PCI_IO_ADDR src,
+				    unsigned long n)
+{
+	__do_memcpy_fromio(dest, src, n);
+	spiderpci_io_flush(iowa_mem_find_bus(src));
+}
+
+static int __init spiderpci_pci_setup_chip(struct pci_controller *phb,
+					   void __iomem *regs)
+{
+	void *dummy_page_va;
+	dma_addr_t dummy_page_da;
+
+#ifdef SPIDER_PCI_DISABLE_PREFETCH
+	u32 val = in_be32(regs + SPIDER_PCI_VCI_CNTL_STAT);
+	pr_debug("SPIDER_IOWA:PVCI_Control_Status was 0x%08x\n", val);
+	out_be32(regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8);
+#endif /* SPIDER_PCI_DISABLE_PREFETCH */
+
+	/* setup dummy read */
+	/*
+	 * On CellBlade, we can't know that which XDR memory is used by
+	 * kmalloc() to allocate dummy_page_va.
+	 * In order to improve the performance, the XDR which is used to
+	 * allocate dummy_page_va is the nearest the spider-pci.
+	 * We have to select the CBE which is the nearest the spider-pci
+	 * to allocate memory from the best XDR, but I don't know that
+	 * how to do.
+	 *
+	 * Celleb does not have this problem, because it has only one XDR.
+	 */
+	dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!dummy_page_va) {
+		pr_err("SPIDERPCI-IOWA:Alloc dummy_page_va failed.\n");
+		return -1;
+	}
+
+	dummy_page_da = dma_map_single(phb->parent, dummy_page_va,
+				       PAGE_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(phb->parent, dummy_page_da)) {
+		pr_err("SPIDER-IOWA:Map dummy page filed.\n");
+		kfree(dummy_page_va);
+		return -1;
+	}
+
+	out_be32(regs + SPIDER_PCI_DUMMY_READ_BASE, dummy_page_da);
+
+	return 0;
+}
+
+int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data)
+{
+	void __iomem *regs = NULL;
+	struct spiderpci_iowa_private *priv;
+	struct device_node *np = bus->phb->dn;
+	struct resource r;
+	unsigned long offset = (unsigned long)data;
+
+	pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%pOF)\n",
+		 np);
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		pr_err("SPIDERPCI-IOWA:"
+		       "Can't allocate struct spiderpci_iowa_private");
+		return -1;
+	}
+
+	if (of_address_to_resource(np, 0, &r)) {
+		pr_err("SPIDERPCI-IOWA:Can't get resource.\n");
+		goto error;
+	}
+
+	regs = ioremap(r.start + offset, SPIDER_PCI_REG_SIZE);
+	if (!regs) {
+		pr_err("SPIDERPCI-IOWA:ioremap failed.\n");
+		goto error;
+	}
+	priv->regs = regs;
+	bus->private = priv;
+
+	if (spiderpci_pci_setup_chip(bus->phb, regs))
+		goto error;
+
+	return 0;
+
+error:
+	kfree(priv);
+	bus->private = NULL;
+
+	if (regs)
+		iounmap(regs);
+
+	return -1;
+}
+
+struct ppc_pci_io spiderpci_ops = {
+	.readb = spiderpci_readb,
+	.readw = spiderpci_readw,
+	.readl = spiderpci_readl,
+	.readq = spiderpci_readq,
+	.readw_be = spiderpci_readw_be,
+	.readl_be = spiderpci_readl_be,
+	.readq_be = spiderpci_readq_be,
+	.readsb = spiderpci_readsb,
+	.readsw = spiderpci_readsw,
+	.readsl = spiderpci_readsl,
+	.memcpy_fromio = spiderpci_memcpy_fromio,
+};
+
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
new file mode 100644
index 0000000000..11df737c8c
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * External Interrupt Controller on Spider South Bridge
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/ioport.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/pgtable.h>
+
+#include <asm/io.h>
+
+#include "interrupt.h"
+
+/* register layout taken from Spider spec, table 7.4-4 */
+enum {
+	TIR_DEN		= 0x004, /* Detection Enable Register */
+	TIR_MSK		= 0x084, /* Mask Level Register */
+	TIR_EDC		= 0x0c0, /* Edge Detection Clear Register */
+	TIR_PNDA	= 0x100, /* Pending Register A */
+	TIR_PNDB	= 0x104, /* Pending Register B */
+	TIR_CS		= 0x144, /* Current Status Register */
+	TIR_LCSA	= 0x150, /* Level Current Status Register A */
+	TIR_LCSB	= 0x154, /* Level Current Status Register B */
+	TIR_LCSC	= 0x158, /* Level Current Status Register C */
+	TIR_LCSD	= 0x15c, /* Level Current Status Register D */
+	TIR_CFGA	= 0x200, /* Setting Register A0 */
+	TIR_CFGB	= 0x204, /* Setting Register B0 */
+			/* 0x208 ... 0x3ff Setting Register An/Bn */
+	TIR_PPNDA	= 0x400, /* Packet Pending Register A */
+	TIR_PPNDB	= 0x404, /* Packet Pending Register B */
+	TIR_PIERA	= 0x408, /* Packet Output Error Register A */
+	TIR_PIERB	= 0x40c, /* Packet Output Error Register B */
+	TIR_PIEN	= 0x444, /* Packet Output Enable Register */
+	TIR_PIPND	= 0x454, /* Packet Output Pending Register */
+	TIRDID		= 0x484, /* Spider Device ID Register */
+	REISTIM		= 0x500, /* Reissue Command Timeout Time Setting */
+	REISTIMEN	= 0x504, /* Reissue Command Timeout Setting */
+	REISWAITEN	= 0x508, /* Reissue Wait Control*/
+};
+
+#define SPIDER_CHIP_COUNT	4
+#define SPIDER_SRC_COUNT	64
+#define SPIDER_IRQ_INVALID	63
+
+struct spider_pic {
+	struct irq_domain		*host;
+	void __iomem		*regs;
+	unsigned int		node_id;
+};
+static struct spider_pic spider_pics[SPIDER_CHIP_COUNT];
+
+static struct spider_pic *spider_irq_data_to_pic(struct irq_data *d)
+{
+	return irq_data_get_irq_chip_data(d);
+}
+
+static void __iomem *spider_get_irq_config(struct spider_pic *pic,
+					   unsigned int src)
+{
+	return pic->regs + TIR_CFGA + 8 * src;
+}
+
+static void spider_unmask_irq(struct irq_data *d)
+{
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
+
+	out_be32(cfg, in_be32(cfg) | 0x30000000u);
+}
+
+static void spider_mask_irq(struct irq_data *d)
+{
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
+
+	out_be32(cfg, in_be32(cfg) & ~0x30000000u);
+}
+
+static void spider_ack_irq(struct irq_data *d)
+{
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	unsigned int src = irqd_to_hwirq(d);
+
+	/* Reset edge detection logic if necessary
+	 */
+	if (irqd_is_level_type(d))
+		return;
+
+	/* Only interrupts 47 to 50 can be set to edge */
+	if (src < 47 || src > 50)
+		return;
+
+	/* Perform the clear of the edge logic */
+	out_be32(pic->regs + TIR_EDC, 0x100 | (src & 0xf));
+}
+
+static int spider_set_irq_type(struct irq_data *d, unsigned int type)
+{
+	unsigned int sense = type & IRQ_TYPE_SENSE_MASK;
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	unsigned int hw = irqd_to_hwirq(d);
+	void __iomem *cfg = spider_get_irq_config(pic, hw);
+	u32 old_mask;
+	u32 ic;
+
+	/* Note that only level high is supported for most interrupts */
+	if (sense != IRQ_TYPE_NONE && sense != IRQ_TYPE_LEVEL_HIGH &&
+	    (hw < 47 || hw > 50))
+		return -EINVAL;
+
+	/* Decode sense type */
+	switch(sense) {
+	case IRQ_TYPE_EDGE_RISING:
+		ic = 0x3;
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		ic = 0x2;
+		break;
+	case IRQ_TYPE_LEVEL_LOW:
+		ic = 0x0;
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+	case IRQ_TYPE_NONE:
+		ic = 0x1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Configure the source. One gross hack that was there before and
+	 * that I've kept around is the priority to the BE which I set to
+	 * be the same as the interrupt source number. I don't know whether
+	 * that's supposed to make any kind of sense however, we'll have to
+	 * decide that, but for now, I'm not changing the behaviour.
+	 */
+	old_mask = in_be32(cfg) & 0x30000000u;
+	out_be32(cfg, old_mask | (ic << 24) | (0x7 << 16) |
+		 (pic->node_id << 4) | 0xe);
+	out_be32(cfg + 4, (0x2 << 16) | (hw & 0xff));
+
+	return 0;
+}
+
+static struct irq_chip spider_pic = {
+	.name = "SPIDER",
+	.irq_unmask = spider_unmask_irq,
+	.irq_mask = spider_mask_irq,
+	.irq_ack = spider_ack_irq,
+	.irq_set_type = spider_set_irq_type,
+};
+
+static int spider_host_map(struct irq_domain *h, unsigned int virq,
+			irq_hw_number_t hw)
+{
+	irq_set_chip_data(virq, h->host_data);
+	irq_set_chip_and_handler(virq, &spider_pic, handle_level_irq);
+
+	/* Set default irq type */
+	irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+	return 0;
+}
+
+static int spider_host_xlate(struct irq_domain *h, struct device_node *ct,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	/* Spider interrupts have 2 cells, first is the interrupt source,
+	 * second, well, I don't know for sure yet ... We mask the top bits
+	 * because old device-trees encode a node number in there
+	 */
+	*out_hwirq = intspec[0] & 0x3f;
+	*out_flags = IRQ_TYPE_LEVEL_HIGH;
+	return 0;
+}
+
+static const struct irq_domain_ops spider_host_ops = {
+	.map = spider_host_map,
+	.xlate = spider_host_xlate,
+};
+
+static void spider_irq_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct spider_pic *pic = irq_desc_get_handler_data(desc);
+	unsigned int cs;
+
+	cs = in_be32(pic->regs + TIR_CS) >> 24;
+	if (cs != SPIDER_IRQ_INVALID)
+		generic_handle_domain_irq(pic->host, cs);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+/* For hooking up the cascade we have a problem. Our device-tree is
+ * crap and we don't know on which BE iic interrupt we are hooked on at
+ * least not the "standard" way. We can reconstitute it based on two
+ * informations though: which BE node we are connected to and whether
+ * we are connected to IOIF0 or IOIF1. Right now, we really only care
+ * about the IBM cell blade and we know that its firmware gives us an
+ * interrupt-map property which is pretty strange.
+ */
+static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
+{
+	unsigned int virq;
+	const u32 *imap, *tmp;
+	int imaplen, intsize, unit;
+	struct device_node *iic;
+	struct device_node *of_node;
+
+	of_node = irq_domain_get_of_node(pic->host);
+
+	/* First, we check whether we have a real "interrupts" in the device
+	 * tree in case the device-tree is ever fixed
+	 */
+	virq = irq_of_parse_and_map(of_node, 0);
+	if (virq)
+		return virq;
+
+	/* Now do the horrible hacks */
+	tmp = of_get_property(of_node, "#interrupt-cells", NULL);
+	if (tmp == NULL)
+		return 0;
+	intsize = *tmp;
+	imap = of_get_property(of_node, "interrupt-map", &imaplen);
+	if (imap == NULL || imaplen < (intsize + 1))
+		return 0;
+	iic = of_find_node_by_phandle(imap[intsize]);
+	if (iic == NULL)
+		return 0;
+	imap += intsize + 1;
+	tmp = of_get_property(iic, "#interrupt-cells", NULL);
+	if (tmp == NULL) {
+		of_node_put(iic);
+		return 0;
+	}
+	intsize = *tmp;
+	/* Assume unit is last entry of interrupt specifier */
+	unit = imap[intsize - 1];
+	/* Ok, we have a unit, now let's try to get the node */
+	tmp = of_get_property(iic, "ibm,interrupt-server-ranges", NULL);
+	if (tmp == NULL) {
+		of_node_put(iic);
+		return 0;
+	}
+	/* ugly as hell but works for now */
+	pic->node_id = (*tmp) >> 1;
+	of_node_put(iic);
+
+	/* Ok, now let's get cracking. You may ask me why I just didn't match
+	 * the iic host from the iic OF node, but that way I'm still compatible
+	 * with really really old old firmwares for which we don't have a node
+	 */
+	/* Manufacture an IIC interrupt number of class 2 */
+	virq = irq_create_mapping(NULL,
+				  (pic->node_id << IIC_IRQ_NODE_SHIFT) |
+				  (2 << IIC_IRQ_CLASS_SHIFT) |
+				  unit);
+	if (!virq)
+		printk(KERN_ERR "spider_pic: failed to map cascade !");
+	return virq;
+}
+
+
+static void __init spider_init_one(struct device_node *of_node, int chip,
+				   unsigned long addr)
+{
+	struct spider_pic *pic = &spider_pics[chip];
+	int i, virq;
+
+	/* Map registers */
+	pic->regs = ioremap(addr, 0x1000);
+	if (pic->regs == NULL)
+		panic("spider_pic: can't map registers !");
+
+	/* Allocate a host */
+	pic->host = irq_domain_add_linear(of_node, SPIDER_SRC_COUNT,
+					  &spider_host_ops, pic);
+	if (pic->host == NULL)
+		panic("spider_pic: can't allocate irq host !");
+
+	/* Go through all sources and disable them */
+	for (i = 0; i < SPIDER_SRC_COUNT; i++) {
+		void __iomem *cfg = pic->regs + TIR_CFGA + 8 * i;
+		out_be32(cfg, in_be32(cfg) & ~0x30000000u);
+	}
+
+	/* do not mask any interrupts because of level */
+	out_be32(pic->regs + TIR_MSK, 0x0);
+
+	/* enable interrupt packets to be output */
+	out_be32(pic->regs + TIR_PIEN, in_be32(pic->regs + TIR_PIEN) | 0x1);
+
+	/* Hook up the cascade interrupt to the iic and nodeid */
+	virq = spider_find_cascade_and_node(pic);
+	if (!virq)
+		return;
+	irq_set_handler_data(virq, pic);
+	irq_set_chained_handler(virq, spider_irq_cascade);
+
+	printk(KERN_INFO "spider_pic: node %d, addr: 0x%lx %pOF\n",
+	       pic->node_id, addr, of_node);
+
+	/* Enable the interrupt detection enable bit. Do this last! */
+	out_be32(pic->regs + TIR_DEN, in_be32(pic->regs + TIR_DEN) | 0x1);
+}
+
+void __init spider_init_IRQ(void)
+{
+	struct resource r;
+	struct device_node *dn;
+	int chip = 0;
+
+	/* XXX node numbers are totally bogus. We _hope_ we get the device
+	 * nodes in the right order here but that's definitely not guaranteed,
+	 * we need to get the node from the device tree instead.
+	 * There is currently no proper property for it (but our whole
+	 * device-tree is bogus anyway) so all we can do is pray or maybe test
+	 * the address and deduce the node-id
+	 */
+	for_each_node_by_name(dn, "interrupt-controller") {
+		if (of_device_is_compatible(dn, "CBEA,platform-spider-pic")) {
+			if (of_address_to_resource(dn, 0, &r)) {
+				printk(KERN_WARNING "spider-pic: Failed\n");
+				continue;
+			}
+		} else if (of_device_is_compatible(dn, "sti,platform-spider-pic")
+			   && (chip < 2)) {
+			static long hard_coded_pics[] =
+				{ 0x24000008000ul, 0x34000008000ul};
+			r.start = hard_coded_pics[chip];
+		} else
+			continue;
+		spider_init_one(dn, chip++, r.start);
+	}
+}
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
new file mode 100644
index 0000000000..dea6f0f258
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -0,0 +1,790 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/linux_logo.h>
+#include <linux/syscore_ops.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/xmon.h>
+#include <asm/kexec.h>
+
+const struct spu_management_ops *spu_management_ops;
+EXPORT_SYMBOL_GPL(spu_management_ops);
+
+const struct spu_priv1_ops *spu_priv1_ops;
+EXPORT_SYMBOL_GPL(spu_priv1_ops);
+
+struct cbe_spu_info cbe_spu_info[MAX_NUMNODES];
+EXPORT_SYMBOL_GPL(cbe_spu_info);
+
+/*
+ * The spufs fault-handling code needs to call force_sig_fault to raise signals
+ * on DMA errors. Export it here to avoid general kernel-wide access to this
+ * function
+ */
+EXPORT_SYMBOL_GPL(force_sig_fault);
+
+/*
+ * Protects cbe_spu_info and spu->number.
+ */
+static DEFINE_SPINLOCK(spu_lock);
+
+/*
+ * List of all spus in the system.
+ *
+ * This list is iterated by callers from irq context and callers that
+ * want to sleep.  Thus modifications need to be done with both
+ * spu_full_list_lock and spu_full_list_mutex held, while iterating
+ * through it requires either of these locks.
+ *
+ * In addition spu_full_list_lock protects all assignments to
+ * spu->mm.
+ */
+static LIST_HEAD(spu_full_list);
+static DEFINE_SPINLOCK(spu_full_list_lock);
+static DEFINE_MUTEX(spu_full_list_mutex);
+
+void spu_invalidate_slbs(struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu->register_lock, flags);
+	if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK)
+		out_be64(&priv2->slb_invalidate_all_W, 0UL);
+	spin_unlock_irqrestore(&spu->register_lock, flags);
+}
+EXPORT_SYMBOL_GPL(spu_invalidate_slbs);
+
+/* This is called by the MM core when a segment size is changed, to
+ * request a flush of all the SPEs using a given mm
+ */
+void spu_flush_all_slbs(struct mm_struct *mm)
+{
+	struct spu *spu;
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu_full_list_lock, flags);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		if (spu->mm == mm)
+			spu_invalidate_slbs(spu);
+	}
+	spin_unlock_irqrestore(&spu_full_list_lock, flags);
+}
+
+/* The hack below stinks... try to do something better one of
+ * these days... Does it even work properly with NR_CPUS == 1 ?
+ */
+static inline void mm_needs_global_tlbie(struct mm_struct *mm)
+{
+	int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;
+
+	/* Global TLBIE broadcast required with SPEs. */
+	bitmap_fill(cpumask_bits(mm_cpumask(mm)), nr);
+}
+
+void spu_associate_mm(struct spu *spu, struct mm_struct *mm)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu_full_list_lock, flags);
+	spu->mm = mm;
+	spin_unlock_irqrestore(&spu_full_list_lock, flags);
+	if (mm)
+		mm_needs_global_tlbie(mm);
+}
+EXPORT_SYMBOL_GPL(spu_associate_mm);
+
+int spu_64k_pages_available(void)
+{
+	return mmu_psize_defs[MMU_PAGE_64K].shift != 0;
+}
+EXPORT_SYMBOL_GPL(spu_64k_pages_available);
+
+static void spu_restart_dma(struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags))
+		out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+	else {
+		set_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags);
+		mb();
+	}
+}
+
+static inline void spu_load_slb(struct spu *spu, int slbe, struct copro_slb *slb)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	pr_debug("%s: adding SLB[%d] 0x%016llx 0x%016llx\n",
+			__func__, slbe, slb->vsid, slb->esid);
+
+	out_be64(&priv2->slb_index_W, slbe);
+	/* set invalid before writing vsid */
+	out_be64(&priv2->slb_esid_RW, 0);
+	/* now it's safe to write the vsid */
+	out_be64(&priv2->slb_vsid_RW, slb->vsid);
+	/* setting the new esid makes the entry valid again */
+	out_be64(&priv2->slb_esid_RW, slb->esid);
+}
+
+static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
+{
+	struct copro_slb slb;
+	int ret;
+
+	ret = copro_calculate_slb(spu->mm, ea, &slb);
+	if (ret)
+		return ret;
+
+	spu_load_slb(spu, spu->slb_replace, &slb);
+
+	spu->slb_replace++;
+	if (spu->slb_replace >= 8)
+		spu->slb_replace = 0;
+
+	spu_restart_dma(spu);
+	spu->stats.slb_flt++;
+	return 0;
+}
+
+extern int hash_page(unsigned long ea, unsigned long access,
+		     unsigned long trap, unsigned long dsisr); //XXX
+static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
+{
+	int ret;
+
+	pr_debug("%s, %llx, %lx\n", __func__, dsisr, ea);
+
+	/*
+	 * Handle kernel space hash faults immediately. User hash
+	 * faults need to be deferred to process context.
+	 */
+	if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) &&
+	    (get_region_id(ea) != USER_REGION_ID)) {
+
+		spin_unlock(&spu->register_lock);
+		ret = hash_page(ea,
+				_PAGE_PRESENT | _PAGE_READ | _PAGE_PRIVILEGED,
+				0x300, dsisr);
+		spin_lock(&spu->register_lock);
+
+		if (!ret) {
+			spu_restart_dma(spu);
+			return 0;
+		}
+	}
+
+	spu->class_1_dar = ea;
+	spu->class_1_dsisr = dsisr;
+
+	spu->stop_callback(spu, 1);
+
+	spu->class_1_dar = 0;
+	spu->class_1_dsisr = 0;
+
+	return 0;
+}
+
+static void __spu_kernel_slb(void *addr, struct copro_slb *slb)
+{
+	unsigned long ea = (unsigned long)addr;
+	u64 llp;
+
+	if (get_region_id(ea) == LINEAR_MAP_REGION_ID)
+		llp = mmu_psize_defs[mmu_linear_psize].sllp;
+	else
+		llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+
+	slb->vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) |
+		SLB_VSID_KERNEL | llp;
+	slb->esid = (ea & ESID_MASK) | SLB_ESID_V;
+}
+
+/**
+ * Given an array of @nr_slbs SLB entries, @slbs, return non-zero if the
+ * address @new_addr is present.
+ */
+static inline int __slb_present(struct copro_slb *slbs, int nr_slbs,
+		void *new_addr)
+{
+	unsigned long ea = (unsigned long)new_addr;
+	int i;
+
+	for (i = 0; i < nr_slbs; i++)
+		if (!((slbs[i].esid ^ ea) & ESID_MASK))
+			return 1;
+
+	return 0;
+}
+
+/**
+ * Setup the SPU kernel SLBs, in preparation for a context save/restore. We
+ * need to map both the context save area, and the save/restore code.
+ *
+ * Because the lscsa and code may cross segment boundaries, we check to see
+ * if mappings are required for the start and end of each range. We currently
+ * assume that the mappings are smaller that one segment - if not, something
+ * is seriously wrong.
+ */
+void spu_setup_kernel_slbs(struct spu *spu, struct spu_lscsa *lscsa,
+		void *code, int code_size)
+{
+	struct copro_slb slbs[4];
+	int i, nr_slbs = 0;
+	/* start and end addresses of both mappings */
+	void *addrs[] = {
+		lscsa, (void *)lscsa + sizeof(*lscsa) - 1,
+		code, code + code_size - 1
+	};
+
+	/* check the set of addresses, and create a new entry in the slbs array
+	 * if there isn't already a SLB for that address */
+	for (i = 0; i < ARRAY_SIZE(addrs); i++) {
+		if (__slb_present(slbs, nr_slbs, addrs[i]))
+			continue;
+
+		__spu_kernel_slb(addrs[i], &slbs[nr_slbs]);
+		nr_slbs++;
+	}
+
+	spin_lock_irq(&spu->register_lock);
+	/* Add the set of SLBs */
+	for (i = 0; i < nr_slbs; i++)
+		spu_load_slb(spu, i, &slbs[i]);
+	spin_unlock_irq(&spu->register_lock);
+}
+EXPORT_SYMBOL_GPL(spu_setup_kernel_slbs);
+
+static irqreturn_t
+spu_irq_class_0(int irq, void *data)
+{
+	struct spu *spu;
+	unsigned long stat, mask;
+
+	spu = data;
+
+	spin_lock(&spu->register_lock);
+	mask = spu_int_mask_get(spu, 0);
+	stat = spu_int_stat_get(spu, 0) & mask;
+
+	spu->class_0_pending |= stat;
+	spu->class_0_dar = spu_mfc_dar_get(spu);
+	spu->stop_callback(spu, 0);
+	spu->class_0_pending = 0;
+	spu->class_0_dar = 0;
+
+	spu_int_stat_clear(spu, 0, stat);
+	spin_unlock(&spu->register_lock);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t
+spu_irq_class_1(int irq, void *data)
+{
+	struct spu *spu;
+	unsigned long stat, mask, dar, dsisr;
+
+	spu = data;
+
+	/* atomically read & clear class1 status. */
+	spin_lock(&spu->register_lock);
+	mask  = spu_int_mask_get(spu, 1);
+	stat  = spu_int_stat_get(spu, 1) & mask;
+	dar   = spu_mfc_dar_get(spu);
+	dsisr = spu_mfc_dsisr_get(spu);
+	if (stat & CLASS1_STORAGE_FAULT_INTR)
+		spu_mfc_dsisr_set(spu, 0ul);
+	spu_int_stat_clear(spu, 1, stat);
+
+	pr_debug("%s: %lx %lx %lx %lx\n", __func__, mask, stat,
+			dar, dsisr);
+
+	if (stat & CLASS1_SEGMENT_FAULT_INTR)
+		__spu_trap_data_seg(spu, dar);
+
+	if (stat & CLASS1_STORAGE_FAULT_INTR)
+		__spu_trap_data_map(spu, dar, dsisr);
+
+	spu->class_1_dsisr = 0;
+	spu->class_1_dar = 0;
+
+	spin_unlock(&spu->register_lock);
+
+	return stat ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static irqreturn_t
+spu_irq_class_2(int irq, void *data)
+{
+	struct spu *spu;
+	unsigned long stat;
+	unsigned long mask;
+	const int mailbox_intrs =
+		CLASS2_MAILBOX_THRESHOLD_INTR | CLASS2_MAILBOX_INTR;
+
+	spu = data;
+	spin_lock(&spu->register_lock);
+	stat = spu_int_stat_get(spu, 2);
+	mask = spu_int_mask_get(spu, 2);
+	/* ignore interrupts we're not waiting for */
+	stat &= mask;
+	/* mailbox interrupts are level triggered. mask them now before
+	 * acknowledging */
+	if (stat & mailbox_intrs)
+		spu_int_mask_and(spu, 2, ~(stat & mailbox_intrs));
+	/* acknowledge all interrupts before the callbacks */
+	spu_int_stat_clear(spu, 2, stat);
+
+	pr_debug("class 2 interrupt %d, %lx, %lx\n", irq, stat, mask);
+
+	if (stat & CLASS2_MAILBOX_INTR)
+		spu->ibox_callback(spu);
+
+	if (stat & CLASS2_SPU_STOP_INTR)
+		spu->stop_callback(spu, 2);
+
+	if (stat & CLASS2_SPU_HALT_INTR)
+		spu->stop_callback(spu, 2);
+
+	if (stat & CLASS2_SPU_DMA_TAG_GROUP_COMPLETE_INTR)
+		spu->mfc_callback(spu);
+
+	if (stat & CLASS2_MAILBOX_THRESHOLD_INTR)
+		spu->wbox_callback(spu);
+
+	spu->stats.class2_intr++;
+
+	spin_unlock(&spu->register_lock);
+
+	return stat ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static int __init spu_request_irqs(struct spu *spu)
+{
+	int ret = 0;
+
+	if (spu->irqs[0]) {
+		snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0",
+			 spu->number);
+		ret = request_irq(spu->irqs[0], spu_irq_class_0,
+				  0, spu->irq_c0, spu);
+		if (ret)
+			goto bail0;
+	}
+	if (spu->irqs[1]) {
+		snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1",
+			 spu->number);
+		ret = request_irq(spu->irqs[1], spu_irq_class_1,
+				  0, spu->irq_c1, spu);
+		if (ret)
+			goto bail1;
+	}
+	if (spu->irqs[2]) {
+		snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2",
+			 spu->number);
+		ret = request_irq(spu->irqs[2], spu_irq_class_2,
+				  0, spu->irq_c2, spu);
+		if (ret)
+			goto bail2;
+	}
+	return 0;
+
+bail2:
+	if (spu->irqs[1])
+		free_irq(spu->irqs[1], spu);
+bail1:
+	if (spu->irqs[0])
+		free_irq(spu->irqs[0], spu);
+bail0:
+	return ret;
+}
+
+static void spu_free_irqs(struct spu *spu)
+{
+	if (spu->irqs[0])
+		free_irq(spu->irqs[0], spu);
+	if (spu->irqs[1])
+		free_irq(spu->irqs[1], spu);
+	if (spu->irqs[2])
+		free_irq(spu->irqs[2], spu);
+}
+
+void spu_init_channels(struct spu *spu)
+{
+	static const struct {
+		 unsigned channel;
+		 unsigned count;
+	} zero_list[] = {
+		{ 0x00, 1, }, { 0x01, 1, }, { 0x03, 1, }, { 0x04, 1, },
+		{ 0x18, 1, }, { 0x19, 1, }, { 0x1b, 1, }, { 0x1d, 1, },
+	}, count_list[] = {
+		{ 0x00, 0, }, { 0x03, 0, }, { 0x04, 0, }, { 0x15, 16, },
+		{ 0x17, 1, }, { 0x18, 0, }, { 0x19, 0, }, { 0x1b, 0, },
+		{ 0x1c, 1, }, { 0x1d, 0, }, { 0x1e, 1, },
+	};
+	struct spu_priv2 __iomem *priv2;
+	int i;
+
+	priv2 = spu->priv2;
+
+	/* initialize all channel data to zero */
+	for (i = 0; i < ARRAY_SIZE(zero_list); i++) {
+		int count;
+
+		out_be64(&priv2->spu_chnlcntptr_RW, zero_list[i].channel);
+		for (count = 0; count < zero_list[i].count; count++)
+			out_be64(&priv2->spu_chnldata_RW, 0);
+	}
+
+	/* initialize channel counts to meaningful values */
+	for (i = 0; i < ARRAY_SIZE(count_list); i++) {
+		out_be64(&priv2->spu_chnlcntptr_RW, count_list[i].channel);
+		out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count);
+	}
+}
+EXPORT_SYMBOL_GPL(spu_init_channels);
+
+static struct bus_type spu_subsys = {
+	.name = "spu",
+	.dev_name = "spu",
+};
+
+int spu_add_dev_attr(struct device_attribute *attr)
+{
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		device_create_file(&spu->dev, attr);
+	mutex_unlock(&spu_full_list_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(spu_add_dev_attr);
+
+int spu_add_dev_attr_group(const struct attribute_group *attrs)
+{
+	struct spu *spu;
+	int rc = 0;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		rc = sysfs_create_group(&spu->dev.kobj, attrs);
+
+		/* we're in trouble here, but try unwinding anyway */
+		if (rc) {
+			printk(KERN_ERR "%s: can't create sysfs group '%s'\n",
+					__func__, attrs->name);
+
+			list_for_each_entry_continue_reverse(spu,
+					&spu_full_list, full_list)
+				sysfs_remove_group(&spu->dev.kobj, attrs);
+			break;
+		}
+	}
+
+	mutex_unlock(&spu_full_list_mutex);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(spu_add_dev_attr_group);
+
+
+void spu_remove_dev_attr(struct device_attribute *attr)
+{
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		device_remove_file(&spu->dev, attr);
+	mutex_unlock(&spu_full_list_mutex);
+}
+EXPORT_SYMBOL_GPL(spu_remove_dev_attr);
+
+void spu_remove_dev_attr_group(const struct attribute_group *attrs)
+{
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		sysfs_remove_group(&spu->dev.kobj, attrs);
+	mutex_unlock(&spu_full_list_mutex);
+}
+EXPORT_SYMBOL_GPL(spu_remove_dev_attr_group);
+
+static int __init spu_create_dev(struct spu *spu)
+{
+	int ret;
+
+	spu->dev.id = spu->number;
+	spu->dev.bus = &spu_subsys;
+	ret = device_register(&spu->dev);
+	if (ret) {
+		printk(KERN_ERR "Can't register SPU %d with sysfs\n",
+				spu->number);
+		return ret;
+	}
+
+	sysfs_add_device_to_node(&spu->dev, spu->node);
+
+	return 0;
+}
+
+static int __init create_spu(void *data)
+{
+	struct spu *spu;
+	int ret;
+	static int number;
+	unsigned long flags;
+
+	ret = -ENOMEM;
+	spu = kzalloc(sizeof (*spu), GFP_KERNEL);
+	if (!spu)
+		goto out;
+
+	spu->alloc_state = SPU_FREE;
+
+	spin_lock_init(&spu->register_lock);
+	spin_lock(&spu_lock);
+	spu->number = number++;
+	spin_unlock(&spu_lock);
+
+	ret = spu_create_spu(spu, data);
+
+	if (ret)
+		goto out_free;
+
+	spu_mfc_sdr_setup(spu);
+	spu_mfc_sr1_set(spu, 0x33);
+	ret = spu_request_irqs(spu);
+	if (ret)
+		goto out_destroy;
+
+	ret = spu_create_dev(spu);
+	if (ret)
+		goto out_free_irqs;
+
+	mutex_lock(&cbe_spu_info[spu->node].list_mutex);
+	list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus);
+	cbe_spu_info[spu->node].n_spus++;
+	mutex_unlock(&cbe_spu_info[spu->node].list_mutex);
+
+	mutex_lock(&spu_full_list_mutex);
+	spin_lock_irqsave(&spu_full_list_lock, flags);
+	list_add(&spu->full_list, &spu_full_list);
+	spin_unlock_irqrestore(&spu_full_list_lock, flags);
+	mutex_unlock(&spu_full_list_mutex);
+
+	spu->stats.util_state = SPU_UTIL_IDLE_LOADED;
+	spu->stats.tstamp = ktime_get_ns();
+
+	INIT_LIST_HEAD(&spu->aff_list);
+
+	goto out;
+
+out_free_irqs:
+	spu_free_irqs(spu);
+out_destroy:
+	spu_destroy_spu(spu);
+out_free:
+	kfree(spu);
+out:
+	return ret;
+}
+
+static const char *spu_state_names[] = {
+	"user", "system", "iowait", "idle"
+};
+
+static unsigned long long spu_acct_time(struct spu *spu,
+		enum spu_utilization_state state)
+{
+	unsigned long long time = spu->stats.times[state];
+
+	/*
+	 * If the spu is idle or the context is stopped, utilization
+	 * statistics are not updated.  Apply the time delta from the
+	 * last recorded state of the spu.
+	 */
+	if (spu->stats.util_state == state)
+		time += ktime_get_ns() - spu->stats.tstamp;
+
+	return time / NSEC_PER_MSEC;
+}
+
+
+static ssize_t spu_stat_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct spu *spu = container_of(dev, struct spu, dev);
+
+	return sprintf(buf, "%s %llu %llu %llu %llu "
+		      "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+		spu_state_names[spu->stats.util_state],
+		spu_acct_time(spu, SPU_UTIL_USER),
+		spu_acct_time(spu, SPU_UTIL_SYSTEM),
+		spu_acct_time(spu, SPU_UTIL_IOWAIT),
+		spu_acct_time(spu, SPU_UTIL_IDLE_LOADED),
+		spu->stats.vol_ctx_switch,
+		spu->stats.invol_ctx_switch,
+		spu->stats.slb_flt,
+		spu->stats.hash_flt,
+		spu->stats.min_flt,
+		spu->stats.maj_flt,
+		spu->stats.class2_intr,
+		spu->stats.libassist);
+}
+
+static DEVICE_ATTR(stat, 0444, spu_stat_show, NULL);
+
+#ifdef CONFIG_KEXEC_CORE
+
+struct crash_spu_info {
+	struct spu *spu;
+	u32 saved_spu_runcntl_RW;
+	u32 saved_spu_status_R;
+	u32 saved_spu_npc_RW;
+	u64 saved_mfc_sr1_RW;
+	u64 saved_mfc_dar;
+	u64 saved_mfc_dsisr;
+};
+
+#define CRASH_NUM_SPUS	16	/* Enough for current hardware */
+static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS];
+
+static void crash_kexec_stop_spus(void)
+{
+	struct spu *spu;
+	int i;
+	u64 tmp;
+
+	for (i = 0; i < CRASH_NUM_SPUS; i++) {
+		if (!crash_spu_info[i].spu)
+			continue;
+
+		spu = crash_spu_info[i].spu;
+
+		crash_spu_info[i].saved_spu_runcntl_RW =
+			in_be32(&spu->problem->spu_runcntl_RW);
+		crash_spu_info[i].saved_spu_status_R =
+			in_be32(&spu->problem->spu_status_R);
+		crash_spu_info[i].saved_spu_npc_RW =
+			in_be32(&spu->problem->spu_npc_RW);
+
+		crash_spu_info[i].saved_mfc_dar    = spu_mfc_dar_get(spu);
+		crash_spu_info[i].saved_mfc_dsisr  = spu_mfc_dsisr_get(spu);
+		tmp = spu_mfc_sr1_get(spu);
+		crash_spu_info[i].saved_mfc_sr1_RW = tmp;
+
+		tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+		spu_mfc_sr1_set(spu, tmp);
+
+		__delay(200);
+	}
+}
+
+static void __init crash_register_spus(struct list_head *list)
+{
+	struct spu *spu;
+	int ret;
+
+	list_for_each_entry(spu, list, full_list) {
+		if (WARN_ON(spu->number >= CRASH_NUM_SPUS))
+			continue;
+
+		crash_spu_info[spu->number].spu = spu;
+	}
+
+	ret = crash_shutdown_register(&crash_kexec_stop_spus);
+	if (ret)
+		printk(KERN_ERR "Could not register SPU crash handler");
+}
+
+#else
+static inline void crash_register_spus(struct list_head *list)
+{
+}
+#endif
+
+static void spu_shutdown(void)
+{
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		spu_free_irqs(spu);
+		spu_destroy_spu(spu);
+	}
+	mutex_unlock(&spu_full_list_mutex);
+}
+
+static struct syscore_ops spu_syscore_ops = {
+	.shutdown = spu_shutdown,
+};
+
+static int __init init_spu_base(void)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		mutex_init(&cbe_spu_info[i].list_mutex);
+		INIT_LIST_HEAD(&cbe_spu_info[i].spus);
+	}
+
+	if (!spu_management_ops)
+		goto out;
+
+	/* create system subsystem for spus */
+	ret = subsys_system_register(&spu_subsys, NULL);
+	if (ret)
+		goto out;
+
+	ret = spu_enumerate_spus(create_spu);
+
+	if (ret < 0) {
+		printk(KERN_WARNING "%s: Error initializing spus\n",
+			__func__);
+		goto out_unregister_subsys;
+	}
+
+	if (ret > 0)
+		fb_append_extra_logo(&logo_spe_clut224, ret);
+
+	mutex_lock(&spu_full_list_mutex);
+	xmon_register_spus(&spu_full_list);
+	crash_register_spus(&spu_full_list);
+	mutex_unlock(&spu_full_list_mutex);
+	spu_add_dev_attr(&dev_attr_stat);
+	register_syscore_ops(&spu_syscore_ops);
+
+	spu_init_affinity();
+
+	return 0;
+
+ out_unregister_subsys:
+	bus_unregister(&spu_subsys);
+ out:
+	return ret;
+}
+device_initcall(init_spu_base);
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c
new file mode 100644
index 0000000000..e780c14c57
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * System call callback functions for SPUs
+ */
+
+#undef DEBUG
+
+#include <linux/kallsyms.h>
+#include <linux/export.h>
+#include <linux/syscalls.h>
+
+#include <asm/spu.h>
+#include <asm/syscalls.h>
+#include <asm/unistd.h>
+
+/*
+ * This table defines the system calls that an SPU can call.
+ * It is currently a subset of the 64 bit powerpc system calls,
+ * with the exact semantics.
+ *
+ * The reasons for disabling some of the system calls are:
+ * 1. They interact with the way SPU syscalls are handled
+ *    and we can't let them execute ever:
+ *	restart_syscall, exit, for, execve, ptrace, ...
+ * 2. They are deprecated and replaced by other means:
+ *	uselib, pciconfig_*, sysfs, ...
+ * 3. They are somewhat interacting with the system in a way
+ *    we don't want an SPU to:
+ *	reboot, init_module, mount, kexec_load
+ * 4. They are optional and we can't rely on them being
+ *    linked into the kernel. Unfortunately, the cond_syscall
+ *    helper does not work here as it does not add the necessary
+ *    opd symbols:
+ *	mbind, mq_open, ipc, ...
+ */
+
+static const syscall_fn spu_syscall_table[] = {
+#define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr, entry)
+#define __SYSCALL(nr, entry) [nr] = (void *) entry,
+#include <asm/syscall_table_spu.h>
+};
+
+long spu_sys_callback(struct spu_syscall_block *s)
+{
+	syscall_fn syscall;
+
+	if (s->nr_ret >= ARRAY_SIZE(spu_syscall_table)) {
+		pr_debug("%s: invalid syscall #%lld", __func__, s->nr_ret);
+		return -ENOSYS;
+	}
+
+	syscall = spu_syscall_table[s->nr_ret];
+
+	pr_debug("SPU-syscall "
+		 "%pSR:syscall%lld(%llx, %llx, %llx, %llx, %llx, %llx)\n",
+		 syscall,
+		 s->nr_ret,
+		 s->parm[0], s->parm[1], s->parm[2],
+		 s->parm[3], s->parm[4], s->parm[5]);
+
+	return syscall(s->parm[0], s->parm[1], s->parm[2],
+		       s->parm[3], s->parm[4], s->parm[5]);
+}
+EXPORT_SYMBOL_GPL(spu_sys_callback);
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
new file mode 100644
index 0000000000..f464a1f2e5
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -0,0 +1,530 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * spu management operations for of based platforms
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * Copyright 2006 Sony Corp.
+ * (C) Copyright 2007 TOSHIBA CORPORATION
+ */
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/export.h>
+#include <linux/ptrace.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+
+#include "spufs/spufs.h"
+#include "interrupt.h"
+#include "spu_priv1_mmio.h"
+
+struct device_node *spu_devnode(struct spu *spu)
+{
+	return spu->devnode;
+}
+
+EXPORT_SYMBOL_GPL(spu_devnode);
+
+static u64 __init find_spu_unit_number(struct device_node *spe)
+{
+	const unsigned int *prop;
+	int proplen;
+
+	/* new device trees should provide the physical-id attribute */
+	prop = of_get_property(spe, "physical-id", &proplen);
+	if (proplen == 4)
+		return (u64)*prop;
+
+	/* celleb device tree provides the unit-id */
+	prop = of_get_property(spe, "unit-id", &proplen);
+	if (proplen == 4)
+		return (u64)*prop;
+
+	/* legacy device trees provide the id in the reg attribute */
+	prop = of_get_property(spe, "reg", &proplen);
+	if (proplen == 4)
+		return (u64)*prop;
+
+	return 0;
+}
+
+static void spu_unmap(struct spu *spu)
+{
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		iounmap(spu->priv1);
+	iounmap(spu->priv2);
+	iounmap(spu->problem);
+	iounmap((__force u8 __iomem *)spu->local_store);
+}
+
+static int __init spu_map_interrupts_old(struct spu *spu,
+	struct device_node *np)
+{
+	unsigned int isrc;
+	const u32 *tmp;
+	int nid;
+
+	/* Get the interrupt source unit from the device-tree */
+	tmp = of_get_property(np, "isrc", NULL);
+	if (!tmp)
+		return -ENODEV;
+	isrc = tmp[0];
+
+	tmp = of_get_property(np->parent->parent, "node-id", NULL);
+	if (!tmp) {
+		printk(KERN_WARNING "%s: can't find node-id\n", __func__);
+		nid = spu->node;
+	} else
+		nid = tmp[0];
+
+	/* Add the node number */
+	isrc |= nid << IIC_IRQ_NODE_SHIFT;
+
+	/* Now map interrupts of all 3 classes */
+	spu->irqs[0] = irq_create_mapping(NULL, IIC_IRQ_CLASS_0 | isrc);
+	spu->irqs[1] = irq_create_mapping(NULL, IIC_IRQ_CLASS_1 | isrc);
+	spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc);
+
+	/* Right now, we only fail if class 2 failed */
+	if (!spu->irqs[2])
+		return -EINVAL;
+
+	return 0;
+}
+
+static void __iomem * __init spu_map_prop_old(struct spu *spu,
+					      struct device_node *n,
+					      const char *name)
+{
+	const struct address_prop {
+		unsigned long address;
+		unsigned int len;
+	} __attribute__((packed)) *prop;
+	int proplen;
+
+	prop = of_get_property(n, name, &proplen);
+	if (prop == NULL || proplen != sizeof (struct address_prop))
+		return NULL;
+
+	return ioremap(prop->address, prop->len);
+}
+
+static int __init spu_map_device_old(struct spu *spu)
+{
+	struct device_node *node = spu->devnode;
+	const char *prop;
+	int ret;
+
+	ret = -ENODEV;
+	spu->name = of_get_property(node, "name", NULL);
+	if (!spu->name)
+		goto out;
+
+	prop = of_get_property(node, "local-store", NULL);
+	if (!prop)
+		goto out;
+	spu->local_store_phys = *(unsigned long *)prop;
+
+	/* we use local store as ram, not io memory */
+	spu->local_store = (void __force *)
+		spu_map_prop_old(spu, node, "local-store");
+	if (!spu->local_store)
+		goto out;
+
+	prop = of_get_property(node, "problem", NULL);
+	if (!prop)
+		goto out_unmap;
+	spu->problem_phys = *(unsigned long *)prop;
+
+	spu->problem = spu_map_prop_old(spu, node, "problem");
+	if (!spu->problem)
+		goto out_unmap;
+
+	spu->priv2 = spu_map_prop_old(spu, node, "priv2");
+	if (!spu->priv2)
+		goto out_unmap;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+		spu->priv1 = spu_map_prop_old(spu, node, "priv1");
+		if (!spu->priv1)
+			goto out_unmap;
+	}
+
+	ret = 0;
+	goto out;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	return ret;
+}
+
+static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
+{
+	int i;
+
+	for (i=0; i < 3; i++) {
+		spu->irqs[i] = irq_of_parse_and_map(np, i);
+		if (!spu->irqs[i])
+			goto err;
+	}
+	return 0;
+
+err:
+	pr_debug("failed to map irq %x for spu %s\n", i, spu->name);
+	for (; i >= 0; i--) {
+		if (spu->irqs[i])
+			irq_dispose_mapping(spu->irqs[i]);
+	}
+	return -EINVAL;
+}
+
+static int __init spu_map_resource(struct spu *spu, int nr,
+			    void __iomem** virt, unsigned long *phys)
+{
+	struct device_node *np = spu->devnode;
+	struct resource resource = { };
+	unsigned long len;
+	int ret;
+
+	ret = of_address_to_resource(np, nr, &resource);
+	if (ret)
+		return ret;
+	if (phys)
+		*phys = resource.start;
+	len = resource_size(&resource);
+	*virt = ioremap(resource.start, len);
+	if (!*virt)
+		return -EINVAL;
+	return 0;
+}
+
+static int __init spu_map_device(struct spu *spu)
+{
+	struct device_node *np = spu->devnode;
+	int ret = -ENODEV;
+
+	spu->name = of_get_property(np, "name", NULL);
+	if (!spu->name)
+		goto out;
+
+	ret = spu_map_resource(spu, 0, (void __iomem**)&spu->local_store,
+			       &spu->local_store_phys);
+	if (ret) {
+		pr_debug("spu_new: failed to map %pOF resource 0\n",
+			 np);
+		goto out;
+	}
+	ret = spu_map_resource(spu, 1, (void __iomem**)&spu->problem,
+			       &spu->problem_phys);
+	if (ret) {
+		pr_debug("spu_new: failed to map %pOF resource 1\n",
+			 np);
+		goto out_unmap;
+	}
+	ret = spu_map_resource(spu, 2, (void __iomem**)&spu->priv2, NULL);
+	if (ret) {
+		pr_debug("spu_new: failed to map %pOF resource 2\n",
+			 np);
+		goto out_unmap;
+	}
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		ret = spu_map_resource(spu, 3,
+			       (void __iomem**)&spu->priv1, NULL);
+	if (ret) {
+		pr_debug("spu_new: failed to map %pOF resource 3\n",
+			 np);
+		goto out_unmap;
+	}
+	pr_debug("spu_new: %pOF maps:\n", np);
+	pr_debug("  local store   : 0x%016lx -> 0x%p\n",
+		 spu->local_store_phys, spu->local_store);
+	pr_debug("  problem state : 0x%016lx -> 0x%p\n",
+		 spu->problem_phys, spu->problem);
+	pr_debug("  priv2         :                       0x%p\n", spu->priv2);
+	pr_debug("  priv1         :                       0x%p\n", spu->priv1);
+
+	return 0;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	pr_debug("failed to map spe %s: %d\n", spu->name, ret);
+	return ret;
+}
+
+static int __init of_enumerate_spus(int (*fn)(void *data))
+{
+	int ret;
+	struct device_node *node;
+	unsigned int n = 0;
+
+	ret = -ENODEV;
+	for_each_node_by_type(node, "spe") {
+		ret = fn(node);
+		if (ret) {
+			printk(KERN_WARNING "%s: Error initializing %pOFn\n",
+				__func__, node);
+			of_node_put(node);
+			break;
+		}
+		n++;
+	}
+	return ret ? ret : n;
+}
+
+static int __init of_create_spu(struct spu *spu, void *data)
+{
+	int ret;
+	struct device_node *spe = (struct device_node *)data;
+	static int legacy_map = 0, legacy_irq = 0;
+
+	spu->devnode = of_node_get(spe);
+	spu->spe_id = find_spu_unit_number(spe);
+
+	spu->node = of_node_to_nid(spe);
+	if (spu->node >= MAX_NUMNODES) {
+		printk(KERN_WARNING "SPE %pOF on node %d ignored,"
+		       " node number too big\n", spe, spu->node);
+		printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n");
+		ret = -ENODEV;
+		goto out;
+	}
+
+	ret = spu_map_device(spu);
+	if (ret) {
+		if (!legacy_map) {
+			legacy_map = 1;
+			printk(KERN_WARNING "%s: Legacy device tree found, "
+				"trying to map old style\n", __func__);
+		}
+		ret = spu_map_device_old(spu);
+		if (ret) {
+			printk(KERN_ERR "Unable to map %s\n",
+				spu->name);
+			goto out;
+		}
+	}
+
+	ret = spu_map_interrupts(spu, spe);
+	if (ret) {
+		if (!legacy_irq) {
+			legacy_irq = 1;
+			printk(KERN_WARNING "%s: Legacy device tree found, "
+				"trying old style irq\n", __func__);
+		}
+		ret = spu_map_interrupts_old(spu, spe);
+		if (ret) {
+			printk(KERN_ERR "%s: could not map interrupts\n",
+				spu->name);
+			goto out_unmap;
+		}
+	}
+
+	pr_debug("Using SPE %s %p %p %p %p %d\n", spu->name,
+		spu->local_store, spu->problem, spu->priv1,
+		spu->priv2, spu->number);
+	goto out;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	return ret;
+}
+
+static int of_destroy_spu(struct spu *spu)
+{
+	spu_unmap(spu);
+	of_node_put(spu->devnode);
+	return 0;
+}
+
+static void enable_spu_by_master_run(struct spu_context *ctx)
+{
+	ctx->ops->master_start(ctx);
+}
+
+static void disable_spu_by_master_run(struct spu_context *ctx)
+{
+	ctx->ops->master_stop(ctx);
+}
+
+/* Hardcoded affinity idxs for qs20 */
+#define QS20_SPES_PER_BE 8
+static int qs20_reg_idxs[QS20_SPES_PER_BE] =   { 0, 2, 4, 6, 7, 5, 3, 1 };
+static int qs20_reg_memory[QS20_SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 };
+
+static struct spu *__init spu_lookup_reg(int node, u32 reg)
+{
+	struct spu *spu;
+	const u32 *spu_reg;
+
+	list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+		spu_reg = of_get_property(spu_devnode(spu), "reg", NULL);
+		if (*spu_reg == reg)
+			return spu;
+	}
+	return NULL;
+}
+
+static void __init init_affinity_qs20_harcoded(void)
+{
+	int node, i;
+	struct spu *last_spu, *spu;
+	u32 reg;
+
+	for (node = 0; node < MAX_NUMNODES; node++) {
+		last_spu = NULL;
+		for (i = 0; i < QS20_SPES_PER_BE; i++) {
+			reg = qs20_reg_idxs[i];
+			spu = spu_lookup_reg(node, reg);
+			if (!spu)
+				continue;
+			spu->has_mem_affinity = qs20_reg_memory[reg];
+			if (last_spu)
+				list_add_tail(&spu->aff_list,
+						&last_spu->aff_list);
+			last_spu = spu;
+		}
+	}
+}
+
+static int __init of_has_vicinity(void)
+{
+	struct device_node *dn;
+
+	for_each_node_by_type(dn, "spe") {
+		if (of_property_present(dn, "vicinity"))  {
+			of_node_put(dn);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static struct spu *__init devnode_spu(int cbe, struct device_node *dn)
+{
+	struct spu *spu;
+
+	list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list)
+		if (spu_devnode(spu) == dn)
+			return spu;
+	return NULL;
+}
+
+static struct spu * __init
+neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid)
+{
+	struct spu *spu;
+	struct device_node *spu_dn;
+	const phandle *vic_handles;
+	int lenp, i;
+
+	list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) {
+		spu_dn = spu_devnode(spu);
+		if (spu_dn == avoid)
+			continue;
+		vic_handles = of_get_property(spu_dn, "vicinity", &lenp);
+		for (i=0; i < (lenp / sizeof(phandle)); i++) {
+			if (vic_handles[i] == target->phandle)
+				return spu;
+		}
+	}
+	return NULL;
+}
+
+static void __init init_affinity_node(int cbe)
+{
+	struct spu *spu, *last_spu;
+	struct device_node *vic_dn, *last_spu_dn;
+	phandle avoid_ph;
+	const phandle *vic_handles;
+	int lenp, i, added;
+
+	last_spu = list_first_entry(&cbe_spu_info[cbe].spus, struct spu,
+								cbe_list);
+	avoid_ph = 0;
+	for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) {
+		last_spu_dn = spu_devnode(last_spu);
+		vic_handles = of_get_property(last_spu_dn, "vicinity", &lenp);
+
+		/*
+		 * Walk through each phandle in vicinity property of the spu
+		 * (typically two vicinity phandles per spe node)
+		 */
+		for (i = 0; i < (lenp / sizeof(phandle)); i++) {
+			if (vic_handles[i] == avoid_ph)
+				continue;
+
+			vic_dn = of_find_node_by_phandle(vic_handles[i]);
+			if (!vic_dn)
+				continue;
+
+			if (of_node_name_eq(vic_dn, "spe") ) {
+				spu = devnode_spu(cbe, vic_dn);
+				avoid_ph = last_spu_dn->phandle;
+			} else {
+				/*
+				 * "mic-tm" and "bif0" nodes do not have
+				 * vicinity property. So we need to find the
+				 * spe which has vic_dn as neighbour, but
+				 * skipping the one we came from (last_spu_dn)
+				 */
+				spu = neighbour_spu(cbe, vic_dn, last_spu_dn);
+				if (!spu)
+					continue;
+				if (of_node_name_eq(vic_dn, "mic-tm")) {
+					last_spu->has_mem_affinity = 1;
+					spu->has_mem_affinity = 1;
+				}
+				avoid_ph = vic_dn->phandle;
+			}
+
+			of_node_put(vic_dn);
+
+			list_add_tail(&spu->aff_list, &last_spu->aff_list);
+			last_spu = spu;
+			break;
+		}
+	}
+}
+
+static void __init init_affinity_fw(void)
+{
+	int cbe;
+
+	for (cbe = 0; cbe < MAX_NUMNODES; cbe++)
+		init_affinity_node(cbe);
+}
+
+static int __init init_affinity(void)
+{
+	if (of_has_vicinity()) {
+		init_affinity_fw();
+	} else {
+		if (of_machine_is_compatible("IBM,CPBW-1.0"))
+			init_affinity_qs20_harcoded();
+		else
+			printk("No affinity configuration found\n");
+	}
+
+	return 0;
+}
+
+const struct spu_management_ops spu_management_of_ops = {
+	.enumerate_spus = of_enumerate_spus,
+	.create_spu = of_create_spu,
+	.destroy_spu = of_destroy_spu,
+	.enable_spu = enable_spu_by_master_run,
+	.disable_spu = disable_spu_by_master_run,
+	.init_affinity = init_affinity,
+};
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
new file mode 100644
index 0000000000..d150e39873
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * spu hypervisor abstraction for direct hardware access.
+ *
+ *  (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *  Copyright 2006 Sony Corp.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/ptrace.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+#include <linux/sched.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+
+#include "interrupt.h"
+#include "spu_priv1_mmio.h"
+
+static void int_mask_and(struct spu *spu, int class, u64 mask)
+{
+	u64 old_mask;
+
+	old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
+	out_be64(&spu->priv1->int_mask_RW[class], old_mask & mask);
+}
+
+static void int_mask_or(struct spu *spu, int class, u64 mask)
+{
+	u64 old_mask;
+
+	old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
+	out_be64(&spu->priv1->int_mask_RW[class], old_mask | mask);
+}
+
+static void int_mask_set(struct spu *spu, int class, u64 mask)
+{
+	out_be64(&spu->priv1->int_mask_RW[class], mask);
+}
+
+static u64 int_mask_get(struct spu *spu, int class)
+{
+	return in_be64(&spu->priv1->int_mask_RW[class]);
+}
+
+static void int_stat_clear(struct spu *spu, int class, u64 stat)
+{
+	out_be64(&spu->priv1->int_stat_RW[class], stat);
+}
+
+static u64 int_stat_get(struct spu *spu, int class)
+{
+	return in_be64(&spu->priv1->int_stat_RW[class]);
+}
+
+static void cpu_affinity_set(struct spu *spu, int cpu)
+{
+	u64 target;
+	u64 route;
+
+	if (nr_cpus_node(spu->node)) {
+		const struct cpumask *spumask = cpumask_of_node(spu->node),
+			*cpumask = cpumask_of_node(cpu_to_node(cpu));
+
+		if (!cpumask_intersects(spumask, cpumask))
+			return;
+	}
+
+	target = iic_get_target_id(cpu);
+	route = target << 48 | target << 32 | target << 16;
+	out_be64(&spu->priv1->int_route_RW, route);
+}
+
+static u64 mfc_dar_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_dar_RW);
+}
+
+static u64 mfc_dsisr_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_dsisr_RW);
+}
+
+static void mfc_dsisr_set(struct spu *spu, u64 dsisr)
+{
+	out_be64(&spu->priv1->mfc_dsisr_RW, dsisr);
+}
+
+static void mfc_sdr_setup(struct spu *spu)
+{
+	out_be64(&spu->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1));
+}
+
+static void mfc_sr1_set(struct spu *spu, u64 sr1)
+{
+	out_be64(&spu->priv1->mfc_sr1_RW, sr1);
+}
+
+static u64 mfc_sr1_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_sr1_RW);
+}
+
+static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
+{
+	out_be64(&spu->priv1->mfc_tclass_id_RW, tclass_id);
+}
+
+static u64 mfc_tclass_id_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_tclass_id_RW);
+}
+
+static void tlb_invalidate(struct spu *spu)
+{
+	out_be64(&spu->priv1->tlb_invalidate_entry_W, 0ul);
+}
+
+static void resource_allocation_groupID_set(struct spu *spu, u64 id)
+{
+	out_be64(&spu->priv1->resource_allocation_groupID_RW, id);
+}
+
+static u64 resource_allocation_groupID_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->resource_allocation_groupID_RW);
+}
+
+static void resource_allocation_enable_set(struct spu *spu, u64 enable)
+{
+	out_be64(&spu->priv1->resource_allocation_enable_RW, enable);
+}
+
+static u64 resource_allocation_enable_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->resource_allocation_enable_RW);
+}
+
+const struct spu_priv1_ops spu_priv1_mmio_ops =
+{
+	.int_mask_and = int_mask_and,
+	.int_mask_or = int_mask_or,
+	.int_mask_set = int_mask_set,
+	.int_mask_get = int_mask_get,
+	.int_stat_clear = int_stat_clear,
+	.int_stat_get = int_stat_get,
+	.cpu_affinity_set = cpu_affinity_set,
+	.mfc_dar_get = mfc_dar_get,
+	.mfc_dsisr_get = mfc_dsisr_get,
+	.mfc_dsisr_set = mfc_dsisr_set,
+	.mfc_sdr_setup = mfc_sdr_setup,
+	.mfc_sr1_set = mfc_sr1_set,
+	.mfc_sr1_get = mfc_sr1_get,
+	.mfc_tclass_id_set = mfc_tclass_id_set,
+	.mfc_tclass_id_get = mfc_tclass_id_get,
+	.tlb_invalidate = tlb_invalidate,
+	.resource_allocation_groupID_set = resource_allocation_groupID_set,
+	.resource_allocation_groupID_get = resource_allocation_groupID_get,
+	.resource_allocation_enable_set = resource_allocation_enable_set,
+	.resource_allocation_enable_get = resource_allocation_enable_get,
+};
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.h b/arch/powerpc/platforms/cell/spu_priv1_mmio.h
new file mode 100644
index 0000000000..04f0db339d
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * spu hypervisor abstraction for direct hardware access.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ */
+
+#ifndef SPU_PRIV1_MMIO_H
+#define SPU_PRIV1_MMIO_H
+
+struct device_node *spu_devnode(struct spu *spu);
+
+#endif /* SPU_PRIV1_MMIO_H */
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c
new file mode 100644
index 0000000000..87ad7d563c
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SPU file system -- system call stubs
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * (C) Copyright 2006-2007, IBM Corporation
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/rcupdate.h>
+#include <linux/binfmts.h>
+
+#include <asm/spu.h>
+
+/* protected by rcu */
+static struct spufs_calls *spufs_calls;
+
+#ifdef CONFIG_SPU_FS_MODULE
+
+static inline struct spufs_calls *spufs_calls_get(void)
+{
+	struct spufs_calls *calls = NULL;
+
+	rcu_read_lock();
+	calls = rcu_dereference(spufs_calls);
+	if (calls && !try_module_get(calls->owner))
+		calls = NULL;
+	rcu_read_unlock();
+
+	return calls;
+}
+
+static inline void spufs_calls_put(struct spufs_calls *calls)
+{
+	BUG_ON(calls != spufs_calls);
+
+	/* we don't need to rcu this, as we hold a reference to the module */
+	module_put(spufs_calls->owner);
+}
+
+#else /* !defined CONFIG_SPU_FS_MODULE */
+
+static inline struct spufs_calls *spufs_calls_get(void)
+{
+	return spufs_calls;
+}
+
+static inline void spufs_calls_put(struct spufs_calls *calls) { }
+
+#endif /* CONFIG_SPU_FS_MODULE */
+
+SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags,
+	umode_t, mode, int, neighbor_fd)
+{
+	long ret;
+	struct spufs_calls *calls;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return -ENOSYS;
+
+	if (flags & SPU_CREATE_AFFINITY_SPU) {
+		struct fd neighbor = fdget(neighbor_fd);
+		ret = -EBADF;
+		if (neighbor.file) {
+			ret = calls->create_thread(name, flags, mode, neighbor.file);
+			fdput(neighbor);
+		}
+	} else
+		ret = calls->create_thread(name, flags, mode, NULL);
+
+	spufs_calls_put(calls);
+	return ret;
+}
+
+SYSCALL_DEFINE3(spu_run,int, fd, __u32 __user *, unpc, __u32 __user *, ustatus)
+{
+	long ret;
+	struct fd arg;
+	struct spufs_calls *calls;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return -ENOSYS;
+
+	ret = -EBADF;
+	arg = fdget(fd);
+	if (arg.file) {
+		ret = calls->spu_run(arg.file, unpc, ustatus);
+		fdput(arg);
+	}
+
+	spufs_calls_put(calls);
+	return ret;
+}
+
+#ifdef CONFIG_COREDUMP
+int elf_coredump_extra_notes_size(void)
+{
+	struct spufs_calls *calls;
+	int ret;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return 0;
+
+	ret = calls->coredump_extra_notes_size();
+
+	spufs_calls_put(calls);
+
+	return ret;
+}
+
+int elf_coredump_extra_notes_write(struct coredump_params *cprm)
+{
+	struct spufs_calls *calls;
+	int ret;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return 0;
+
+	ret = calls->coredump_extra_notes_write(cprm);
+
+	spufs_calls_put(calls);
+
+	return ret;
+}
+#endif
+
+void notify_spus_active(void)
+{
+	struct spufs_calls *calls;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return;
+
+	calls->notify_spus_active();
+	spufs_calls_put(calls);
+
+	return;
+}
+
+int register_spu_syscalls(struct spufs_calls *calls)
+{
+	if (spufs_calls)
+		return -EBUSY;
+
+	rcu_assign_pointer(spufs_calls, calls);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(register_spu_syscalls);
+
+void unregister_spu_syscalls(struct spufs_calls *calls)
+{
+	BUG_ON(spufs_calls->owner != calls->owner);
+	RCU_INIT_POINTER(spufs_calls, NULL);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(unregister_spu_syscalls);
diff --git a/arch/powerpc/platforms/cell/spufs/.gitignore b/arch/powerpc/platforms/cell/spufs/.gitignore
new file mode 100644
index 0000000000..5f3eb224f6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+spu_save_dump.h
+spu_restore_dump.h
diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile
new file mode 100644
index 0000000000..52e4c80ec8
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/Makefile
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_SPU_FS) += spufs.o
+spufs-y += inode.o file.o context.o syscalls.o
+spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o
+spufs-y += switch.o fault.o lscsa_alloc.o
+spufs-$(CONFIG_COREDUMP) += coredump.o
+
+# magic for the trace events
+CFLAGS_sched.o := -I$(src)
+
+# Rules to build switch.o with the help of SPU tool chain
+SPU_CROSS	:= spu-
+SPU_CC		:= $(SPU_CROSS)gcc
+SPU_AS		:= $(SPU_CROSS)gcc
+SPU_LD		:= $(SPU_CROSS)ld
+SPU_OBJCOPY	:= $(SPU_CROSS)objcopy
+SPU_CFLAGS	:= -O2 -Wall -I$(srctree)/include -D__KERNEL__
+SPU_AFLAGS	:= -c -D__ASSEMBLY__ -I$(srctree)/include -D__KERNEL__
+SPU_LDFLAGS	:= -N -Ttext=0x0
+
+$(obj)/switch.o: $(obj)/spu_save_dump.h $(obj)/spu_restore_dump.h
+clean-files := spu_save_dump.h spu_restore_dump.h
+
+# Compile SPU files
+      cmd_spu_cc = $(SPU_CC) $(SPU_CFLAGS) -c -o $@ $<
+quiet_cmd_spu_cc = SPU_CC  $@
+$(obj)/spu_%.o: $(src)/spu_%.c
+	$(call if_changed,spu_cc)
+
+# Assemble SPU files
+      cmd_spu_as = $(SPU_AS) $(SPU_AFLAGS) -o $@ $<
+quiet_cmd_spu_as = SPU_AS  $@
+$(obj)/spu_%.o: $(src)/spu_%.S
+	$(call if_changed,spu_as)
+
+# Link SPU Executables
+      cmd_spu_ld = $(SPU_LD) $(SPU_LDFLAGS) -o $@ $^
+quiet_cmd_spu_ld = SPU_LD  $@
+$(obj)/spu_%: $(obj)/spu_%_crt0.o $(obj)/spu_%.o
+	$(call if_changed,spu_ld)
+
+# Copy into binary format
+      cmd_spu_objcopy = $(SPU_OBJCOPY) -O binary $< $@
+quiet_cmd_spu_objcopy = OBJCOPY $@
+$(obj)/spu_%.bin: $(src)/spu_%
+	$(call if_changed,spu_objcopy)
+
+# create C code from ELF executable
+cmd_hexdump   = ( \
+		echo "/*" ; \
+		echo " * $*_dump.h: Copyright (C) 2005 IBM." ; \
+		echo " * Hex-dump auto generated from $*.c." ; \
+		echo " * Do not edit!" ; \
+		echo " */" ; \
+		echo "static unsigned int $*_code[] " \
+			"__attribute__((__aligned__(128))) = {" ; \
+		hexdump -v -e '"0x" 4/1 "%02x" "," "\n"' $< ; \
+		echo "};" ; \
+		) > $@
+quiet_cmd_hexdump = HEXDUMP $@
+$(obj)/%_dump.h: $(obj)/%.bin
+	$(call if_changed,hexdump)
diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c
new file mode 100644
index 0000000000..28a34a2caa
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -0,0 +1,400 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* backing_ops.c - query/set operations on saved SPU context.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * These register operations allow SPUFS to operate on saved
+ * SPU contexts rather than hardware.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/poll.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_info.h>
+#include <asm/mmu_context.h>
+#include "spufs.h"
+
+/*
+ * Reads/writes to various problem and priv2 registers require
+ * state changes, i.e.  generate SPU events, modify channel
+ * counts, etc.
+ */
+
+static void gen_spu_event(struct spu_context *ctx, u32 event)
+{
+	u64 ch0_cnt;
+	u64 ch0_data;
+	u64 ch1_data;
+
+	ch0_cnt = ctx->csa.spu_chnlcnt_RW[0];
+	ch0_data = ctx->csa.spu_chnldata_RW[0];
+	ch1_data = ctx->csa.spu_chnldata_RW[1];
+	ctx->csa.spu_chnldata_RW[0] |= event;
+	if ((ch0_cnt == 0) && !(ch0_data & event) && (ch1_data & event)) {
+		ctx->csa.spu_chnlcnt_RW[0] = 1;
+	}
+}
+
+static int spu_backing_mbox_read(struct spu_context *ctx, u32 * data)
+{
+	u32 mbox_stat;
+	int ret = 0;
+
+	spin_lock(&ctx->csa.register_lock);
+	mbox_stat = ctx->csa.prob.mb_stat_R;
+	if (mbox_stat & 0x0000ff) {
+		/* Read the first available word.
+		 * Implementation note: the depth
+		 * of pu_mb_R is currently 1.
+		 */
+		*data = ctx->csa.prob.pu_mb_R;
+		ctx->csa.prob.mb_stat_R &= ~(0x0000ff);
+		ctx->csa.spu_chnlcnt_RW[28] = 1;
+		gen_spu_event(ctx, MFC_PU_MAILBOX_AVAILABLE_EVENT);
+		ret = 4;
+	}
+	spin_unlock(&ctx->csa.register_lock);
+	return ret;
+}
+
+static u32 spu_backing_mbox_stat_read(struct spu_context *ctx)
+{
+	return ctx->csa.prob.mb_stat_R;
+}
+
+static __poll_t spu_backing_mbox_stat_poll(struct spu_context *ctx,
+					  __poll_t events)
+{
+	__poll_t ret;
+	u32 stat;
+
+	ret = 0;
+	spin_lock_irq(&ctx->csa.register_lock);
+	stat = ctx->csa.prob.mb_stat_R;
+
+	/* if the requested event is there, return the poll
+	   mask, otherwise enable the interrupt to get notified,
+	   but first mark any pending interrupts as done so
+	   we don't get woken up unnecessarily */
+
+	if (events & (EPOLLIN | EPOLLRDNORM)) {
+		if (stat & 0xff0000)
+			ret |= EPOLLIN | EPOLLRDNORM;
+		else {
+			ctx->csa.priv1.int_stat_class2_RW &=
+				~CLASS2_MAILBOX_INTR;
+			ctx->csa.priv1.int_mask_class2_RW |=
+				CLASS2_ENABLE_MAILBOX_INTR;
+		}
+	}
+	if (events & (EPOLLOUT | EPOLLWRNORM)) {
+		if (stat & 0x00ff00)
+			ret = EPOLLOUT | EPOLLWRNORM;
+		else {
+			ctx->csa.priv1.int_stat_class2_RW &=
+				~CLASS2_MAILBOX_THRESHOLD_INTR;
+			ctx->csa.priv1.int_mask_class2_RW |=
+				CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR;
+		}
+	}
+	spin_unlock_irq(&ctx->csa.register_lock);
+	return ret;
+}
+
+static int spu_backing_ibox_read(struct spu_context *ctx, u32 * data)
+{
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	if (ctx->csa.prob.mb_stat_R & 0xff0000) {
+		/* Read the first available word.
+		 * Implementation note: the depth
+		 * of puint_mb_R is currently 1.
+		 */
+		*data = ctx->csa.priv2.puint_mb_R;
+		ctx->csa.prob.mb_stat_R &= ~(0xff0000);
+		ctx->csa.spu_chnlcnt_RW[30] = 1;
+		gen_spu_event(ctx, MFC_PU_INT_MAILBOX_AVAILABLE_EVENT);
+		ret = 4;
+	} else {
+		/* make sure we get woken up by the interrupt */
+		ctx->csa.priv1.int_mask_class2_RW |= CLASS2_ENABLE_MAILBOX_INTR;
+		ret = 0;
+	}
+	spin_unlock(&ctx->csa.register_lock);
+	return ret;
+}
+
+static int spu_backing_wbox_write(struct spu_context *ctx, u32 data)
+{
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	if ((ctx->csa.prob.mb_stat_R) & 0x00ff00) {
+		int slot = ctx->csa.spu_chnlcnt_RW[29];
+		int avail = (ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8;
+
+		/* We have space to write wbox_data.
+		 * Implementation note: the depth
+		 * of spu_mb_W is currently 4.
+		 */
+		BUG_ON(avail != (4 - slot));
+		ctx->csa.spu_mailbox_data[slot] = data;
+		ctx->csa.spu_chnlcnt_RW[29] = ++slot;
+		ctx->csa.prob.mb_stat_R &= ~(0x00ff00);
+		ctx->csa.prob.mb_stat_R |= (((4 - slot) & 0xff) << 8);
+		gen_spu_event(ctx, MFC_SPU_MAILBOX_WRITTEN_EVENT);
+		ret = 4;
+	} else {
+		/* make sure we get woken up by the interrupt when space
+		   becomes available */
+		ctx->csa.priv1.int_mask_class2_RW |=
+			CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR;
+		ret = 0;
+	}
+	spin_unlock(&ctx->csa.register_lock);
+	return ret;
+}
+
+static u32 spu_backing_signal1_read(struct spu_context *ctx)
+{
+	return ctx->csa.spu_chnldata_RW[3];
+}
+
+static void spu_backing_signal1_write(struct spu_context *ctx, u32 data)
+{
+	spin_lock(&ctx->csa.register_lock);
+	if (ctx->csa.priv2.spu_cfg_RW & 0x1)
+		ctx->csa.spu_chnldata_RW[3] |= data;
+	else
+		ctx->csa.spu_chnldata_RW[3] = data;
+	ctx->csa.spu_chnlcnt_RW[3] = 1;
+	gen_spu_event(ctx, MFC_SIGNAL_1_EVENT);
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static u32 spu_backing_signal2_read(struct spu_context *ctx)
+{
+	return ctx->csa.spu_chnldata_RW[4];
+}
+
+static void spu_backing_signal2_write(struct spu_context *ctx, u32 data)
+{
+	spin_lock(&ctx->csa.register_lock);
+	if (ctx->csa.priv2.spu_cfg_RW & 0x2)
+		ctx->csa.spu_chnldata_RW[4] |= data;
+	else
+		ctx->csa.spu_chnldata_RW[4] = data;
+	ctx->csa.spu_chnlcnt_RW[4] = 1;
+	gen_spu_event(ctx, MFC_SIGNAL_2_EVENT);
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static void spu_backing_signal1_type_set(struct spu_context *ctx, u64 val)
+{
+	u64 tmp;
+
+	spin_lock(&ctx->csa.register_lock);
+	tmp = ctx->csa.priv2.spu_cfg_RW;
+	if (val)
+		tmp |= 1;
+	else
+		tmp &= ~1;
+	ctx->csa.priv2.spu_cfg_RW = tmp;
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static u64 spu_backing_signal1_type_get(struct spu_context *ctx)
+{
+	return ((ctx->csa.priv2.spu_cfg_RW & 1) != 0);
+}
+
+static void spu_backing_signal2_type_set(struct spu_context *ctx, u64 val)
+{
+	u64 tmp;
+
+	spin_lock(&ctx->csa.register_lock);
+	tmp = ctx->csa.priv2.spu_cfg_RW;
+	if (val)
+		tmp |= 2;
+	else
+		tmp &= ~2;
+	ctx->csa.priv2.spu_cfg_RW = tmp;
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static u64 spu_backing_signal2_type_get(struct spu_context *ctx)
+{
+	return ((ctx->csa.priv2.spu_cfg_RW & 2) != 0);
+}
+
+static u32 spu_backing_npc_read(struct spu_context *ctx)
+{
+	return ctx->csa.prob.spu_npc_RW;
+}
+
+static void spu_backing_npc_write(struct spu_context *ctx, u32 val)
+{
+	ctx->csa.prob.spu_npc_RW = val;
+}
+
+static u32 spu_backing_status_read(struct spu_context *ctx)
+{
+	return ctx->csa.prob.spu_status_R;
+}
+
+static char *spu_backing_get_ls(struct spu_context *ctx)
+{
+	return ctx->csa.lscsa->ls;
+}
+
+static void spu_backing_privcntl_write(struct spu_context *ctx, u64 val)
+{
+	ctx->csa.priv2.spu_privcntl_RW = val;
+}
+
+static u32 spu_backing_runcntl_read(struct spu_context *ctx)
+{
+	return ctx->csa.prob.spu_runcntl_RW;
+}
+
+static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val)
+{
+	spin_lock(&ctx->csa.register_lock);
+	ctx->csa.prob.spu_runcntl_RW = val;
+	if (val & SPU_RUNCNTL_RUNNABLE) {
+		ctx->csa.prob.spu_status_R &=
+			~SPU_STATUS_STOPPED_BY_STOP &
+			~SPU_STATUS_STOPPED_BY_HALT &
+			~SPU_STATUS_SINGLE_STEP &
+			~SPU_STATUS_INVALID_INSTR &
+			~SPU_STATUS_INVALID_CH;
+		ctx->csa.prob.spu_status_R |= SPU_STATUS_RUNNING;
+	} else {
+		ctx->csa.prob.spu_status_R &= ~SPU_STATUS_RUNNING;
+	}
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static void spu_backing_runcntl_stop(struct spu_context *ctx)
+{
+	spu_backing_runcntl_write(ctx, SPU_RUNCNTL_STOP);
+}
+
+static void spu_backing_master_start(struct spu_context *ctx)
+{
+	struct spu_state *csa = &ctx->csa;
+	u64 sr1;
+
+	spin_lock(&csa->register_lock);
+	sr1 = csa->priv1.mfc_sr1_RW | MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	csa->priv1.mfc_sr1_RW = sr1;
+	spin_unlock(&csa->register_lock);
+}
+
+static void spu_backing_master_stop(struct spu_context *ctx)
+{
+	struct spu_state *csa = &ctx->csa;
+	u64 sr1;
+
+	spin_lock(&csa->register_lock);
+	sr1 = csa->priv1.mfc_sr1_RW & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	csa->priv1.mfc_sr1_RW = sr1;
+	spin_unlock(&csa->register_lock);
+}
+
+static int spu_backing_set_mfc_query(struct spu_context * ctx, u32 mask,
+					u32 mode)
+{
+	struct spu_problem_collapsed *prob = &ctx->csa.prob;
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	ret = -EAGAIN;
+	if (prob->dma_querytype_RW)
+		goto out;
+	ret = 0;
+	/* FIXME: what are the side-effects of this? */
+	prob->dma_querymask_RW = mask;
+	prob->dma_querytype_RW = mode;
+	/* In the current implementation, the SPU context is always
+	 * acquired in runnable state when new bits are added to the
+	 * mask (tagwait), so it's sufficient just to mask
+	 * dma_tagstatus_R with the 'mask' parameter here.
+	 */
+	ctx->csa.prob.dma_tagstatus_R &= mask;
+out:
+	spin_unlock(&ctx->csa.register_lock);
+
+	return ret;
+}
+
+static u32 spu_backing_read_mfc_tagstatus(struct spu_context * ctx)
+{
+	return ctx->csa.prob.dma_tagstatus_R;
+}
+
+static u32 spu_backing_get_mfc_free_elements(struct spu_context *ctx)
+{
+	return ctx->csa.prob.dma_qstatus_R;
+}
+
+static int spu_backing_send_mfc_command(struct spu_context *ctx,
+					struct mfc_dma_command *cmd)
+{
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	ret = -EAGAIN;
+	/* FIXME: set up priv2->puq */
+	spin_unlock(&ctx->csa.register_lock);
+
+	return ret;
+}
+
+static void spu_backing_restart_dma(struct spu_context *ctx)
+{
+	ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND;
+}
+
+struct spu_context_ops spu_backing_ops = {
+	.mbox_read = spu_backing_mbox_read,
+	.mbox_stat_read = spu_backing_mbox_stat_read,
+	.mbox_stat_poll = spu_backing_mbox_stat_poll,
+	.ibox_read = spu_backing_ibox_read,
+	.wbox_write = spu_backing_wbox_write,
+	.signal1_read = spu_backing_signal1_read,
+	.signal1_write = spu_backing_signal1_write,
+	.signal2_read = spu_backing_signal2_read,
+	.signal2_write = spu_backing_signal2_write,
+	.signal1_type_set = spu_backing_signal1_type_set,
+	.signal1_type_get = spu_backing_signal1_type_get,
+	.signal2_type_set = spu_backing_signal2_type_set,
+	.signal2_type_get = spu_backing_signal2_type_get,
+	.npc_read = spu_backing_npc_read,
+	.npc_write = spu_backing_npc_write,
+	.status_read = spu_backing_status_read,
+	.get_ls = spu_backing_get_ls,
+	.privcntl_write = spu_backing_privcntl_write,
+	.runcntl_read = spu_backing_runcntl_read,
+	.runcntl_write = spu_backing_runcntl_write,
+	.runcntl_stop = spu_backing_runcntl_stop,
+	.master_start = spu_backing_master_start,
+	.master_stop = spu_backing_master_stop,
+	.set_mfc_query = spu_backing_set_mfc_query,
+	.read_mfc_tagstatus = spu_backing_read_mfc_tagstatus,
+	.get_mfc_free_elements = spu_backing_get_mfc_free_elements,
+	.send_mfc_command = spu_backing_send_mfc_command,
+	.restart_dma = spu_backing_restart_dma,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
new file mode 100644
index 0000000000..7a39cc414f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SPU file system -- SPU context management
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/sched.h>
+#include <linux/sched/mm.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include "spufs.h"
+#include "sputrace.h"
+
+
+atomic_t nr_spu_contexts = ATOMIC_INIT(0);
+
+struct spu_context *alloc_spu_context(struct spu_gang *gang)
+{
+	struct spu_context *ctx;
+
+	ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
+	if (!ctx)
+		goto out;
+	/* Binding to physical processor deferred
+	 * until spu_activate().
+	 */
+	if (spu_init_csa(&ctx->csa))
+		goto out_free;
+	spin_lock_init(&ctx->mmio_lock);
+	mutex_init(&ctx->mapping_lock);
+	kref_init(&ctx->kref);
+	mutex_init(&ctx->state_mutex);
+	mutex_init(&ctx->run_mutex);
+	init_waitqueue_head(&ctx->ibox_wq);
+	init_waitqueue_head(&ctx->wbox_wq);
+	init_waitqueue_head(&ctx->stop_wq);
+	init_waitqueue_head(&ctx->mfc_wq);
+	init_waitqueue_head(&ctx->run_wq);
+	ctx->state = SPU_STATE_SAVED;
+	ctx->ops = &spu_backing_ops;
+	ctx->owner = get_task_mm(current);
+	INIT_LIST_HEAD(&ctx->rq);
+	INIT_LIST_HEAD(&ctx->aff_list);
+	if (gang)
+		spu_gang_add_ctx(gang, ctx);
+
+	__spu_update_sched_info(ctx);
+	spu_set_timeslice(ctx);
+	ctx->stats.util_state = SPU_UTIL_IDLE_LOADED;
+	ctx->stats.tstamp = ktime_get_ns();
+
+	atomic_inc(&nr_spu_contexts);
+	goto out;
+out_free:
+	kfree(ctx);
+	ctx = NULL;
+out:
+	return ctx;
+}
+
+void destroy_spu_context(struct kref *kref)
+{
+	struct spu_context *ctx;
+	ctx = container_of(kref, struct spu_context, kref);
+	spu_context_nospu_trace(destroy_spu_context__enter, ctx);
+	mutex_lock(&ctx->state_mutex);
+	spu_deactivate(ctx);
+	mutex_unlock(&ctx->state_mutex);
+	spu_fini_csa(&ctx->csa);
+	if (ctx->gang)
+		spu_gang_remove_ctx(ctx->gang, ctx);
+	if (ctx->prof_priv_kref)
+		kref_put(ctx->prof_priv_kref, ctx->prof_priv_release);
+	BUG_ON(!list_empty(&ctx->rq));
+	atomic_dec(&nr_spu_contexts);
+	kfree(ctx->switch_log);
+	kfree(ctx);
+}
+
+struct spu_context * get_spu_context(struct spu_context *ctx)
+{
+	kref_get(&ctx->kref);
+	return ctx;
+}
+
+int put_spu_context(struct spu_context *ctx)
+{
+	return kref_put(&ctx->kref, &destroy_spu_context);
+}
+
+/* give up the mm reference when the context is about to be destroyed */
+void spu_forget(struct spu_context *ctx)
+{
+	struct mm_struct *mm;
+
+	/*
+	 * This is basically an open-coded spu_acquire_saved, except that
+	 * we don't acquire the state mutex interruptible, and we don't
+	 * want this context to be rescheduled on release.
+	 */
+	mutex_lock(&ctx->state_mutex);
+	if (ctx->state != SPU_STATE_SAVED)
+		spu_deactivate(ctx);
+
+	mm = ctx->owner;
+	ctx->owner = NULL;
+	mmput(mm);
+	spu_release(ctx);
+}
+
+void spu_unmap_mappings(struct spu_context *ctx)
+{
+	mutex_lock(&ctx->mapping_lock);
+	if (ctx->local_store)
+		unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1);
+	if (ctx->mfc)
+		unmap_mapping_range(ctx->mfc, 0, SPUFS_MFC_MAP_SIZE, 1);
+	if (ctx->cntl)
+		unmap_mapping_range(ctx->cntl, 0, SPUFS_CNTL_MAP_SIZE, 1);
+	if (ctx->signal1)
+		unmap_mapping_range(ctx->signal1, 0, SPUFS_SIGNAL_MAP_SIZE, 1);
+	if (ctx->signal2)
+		unmap_mapping_range(ctx->signal2, 0, SPUFS_SIGNAL_MAP_SIZE, 1);
+	if (ctx->mss)
+		unmap_mapping_range(ctx->mss, 0, SPUFS_MSS_MAP_SIZE, 1);
+	if (ctx->psmap)
+		unmap_mapping_range(ctx->psmap, 0, SPUFS_PS_MAP_SIZE, 1);
+	mutex_unlock(&ctx->mapping_lock);
+}
+
+/**
+ * spu_acquire_saved - lock spu contex and make sure it is in saved state
+ * @ctx:	spu contex to lock
+ */
+int spu_acquire_saved(struct spu_context *ctx)
+{
+	int ret;
+
+	spu_context_nospu_trace(spu_acquire_saved__enter, ctx);
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	if (ctx->state != SPU_STATE_SAVED) {
+		set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags);
+		spu_deactivate(ctx);
+	}
+
+	return 0;
+}
+
+/**
+ * spu_release_saved - unlock spu context and return it to the runqueue
+ * @ctx:	context to unlock
+ */
+void spu_release_saved(struct spu_context *ctx)
+{
+	BUG_ON(ctx->state != SPU_STATE_SAVED);
+
+	if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags) &&
+			test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+		spu_activate(ctx, 0);
+
+	spu_release(ctx);
+}
+
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
new file mode 100644
index 0000000000..1a58761801
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SPU core dump code
+ *
+ * (C) Copyright 2006 IBM Corp.
+ *
+ * Author: Dwayne Grant McConnell <decimal@us.ibm.com>
+ */
+
+#include <linux/elf.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/list.h>
+#include <linux/syscalls.h>
+#include <linux/coredump.h>
+#include <linux/binfmts.h>
+
+#include <linux/uaccess.h>
+
+#include "spufs.h"
+
+static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
+{
+	int i, sz, total = 0;
+	char *name;
+	char fullname[80];
+
+	for (i = 0; spufs_coredump_read[i].name != NULL; i++) {
+		name = spufs_coredump_read[i].name;
+		sz = spufs_coredump_read[i].size;
+
+		sprintf(fullname, "SPU/%d/%s", dfd, name);
+
+		total += sizeof(struct elf_note);
+		total += roundup(strlen(fullname) + 1, 4);
+		total += roundup(sz, 4);
+	}
+
+	return total;
+}
+
+static int match_context(const void *v, struct file *file, unsigned fd)
+{
+	struct spu_context *ctx;
+	if (file->f_op != &spufs_context_fops)
+		return 0;
+	ctx = SPUFS_I(file_inode(file))->i_ctx;
+	if (ctx->flags & SPU_CREATE_NOSCHED)
+		return 0;
+	return fd + 1;
+}
+
+/*
+ * The additional architecture-specific notes for Cell are various
+ * context files in the spu context.
+ *
+ * This function iterates over all open file descriptors and sees
+ * if they are a directory in spufs.  In that case we use spufs
+ * internal functionality to dump them without needing to actually
+ * open the files.
+ */
+/*
+ * descriptor table is not shared, so files can't change or go away.
+ */
+static struct spu_context *coredump_next_context(int *fd)
+{
+	struct spu_context *ctx;
+	struct file *file;
+	int n = iterate_fd(current->files, *fd, match_context, NULL);
+	if (!n)
+		return NULL;
+	*fd = n - 1;
+
+	rcu_read_lock();
+	file = lookup_fd_rcu(*fd);
+	ctx = SPUFS_I(file_inode(file))->i_ctx;
+	get_spu_context(ctx);
+	rcu_read_unlock();
+
+	return ctx;
+}
+
+int spufs_coredump_extra_notes_size(void)
+{
+	struct spu_context *ctx;
+	int size = 0, rc, fd;
+
+	fd = 0;
+	while ((ctx = coredump_next_context(&fd)) != NULL) {
+		rc = spu_acquire_saved(ctx);
+		if (rc) {
+			put_spu_context(ctx);
+			break;
+		}
+
+		rc = spufs_ctx_note_size(ctx, fd);
+		spu_release_saved(ctx);
+		if (rc < 0) {
+			put_spu_context(ctx);
+			break;
+		}
+
+		size += rc;
+
+		/* start searching the next fd next time */
+		fd++;
+		put_spu_context(ctx);
+	}
+
+	return size;
+}
+
+static int spufs_arch_write_note(struct spu_context *ctx, int i,
+				  struct coredump_params *cprm, int dfd)
+{
+	size_t sz = spufs_coredump_read[i].size;
+	char fullname[80];
+	struct elf_note en;
+	int ret;
+
+	sprintf(fullname, "SPU/%d/%s", dfd, spufs_coredump_read[i].name);
+	en.n_namesz = strlen(fullname) + 1;
+	en.n_descsz = sz;
+	en.n_type = NT_SPU;
+
+	if (!dump_emit(cprm, &en, sizeof(en)))
+		return -EIO;
+	if (!dump_emit(cprm, fullname, en.n_namesz))
+		return -EIO;
+	if (!dump_align(cprm, 4))
+		return -EIO;
+
+	if (spufs_coredump_read[i].dump) {
+		ret = spufs_coredump_read[i].dump(ctx, cprm);
+		if (ret < 0)
+			return ret;
+	} else {
+		char buf[32];
+
+		ret = snprintf(buf, sizeof(buf), "0x%.16llx",
+			       spufs_coredump_read[i].get(ctx));
+		if (ret >= sizeof(buf))
+			return sizeof(buf);
+
+		/* count trailing the NULL: */
+		if (!dump_emit(cprm, buf, ret + 1))
+			return -EIO;
+	}
+
+	dump_skip_to(cprm, roundup(cprm->pos - ret + sz, 4));
+	return 0;
+}
+
+int spufs_coredump_extra_notes_write(struct coredump_params *cprm)
+{
+	struct spu_context *ctx;
+	int fd, j, rc;
+
+	fd = 0;
+	while ((ctx = coredump_next_context(&fd)) != NULL) {
+		rc = spu_acquire_saved(ctx);
+		if (rc)
+			return rc;
+
+		for (j = 0; spufs_coredump_read[j].name != NULL; j++) {
+			rc = spufs_arch_write_note(ctx, j, cprm, fd);
+			if (rc) {
+				spu_release_saved(ctx);
+				return rc;
+			}
+		}
+
+		spu_release_saved(ctx);
+
+		/* start searching the next fd next time */
+		fd++;
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
new file mode 100644
index 0000000000..24adbe3c60
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+#include <linux/sched/signal.h>
+#include <linux/mm.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+
+#include "spufs.h"
+
+/**
+ * Handle an SPE event, depending on context SPU_CREATE_EVENTS_ENABLED flag.
+ *
+ * If the context was created with events, we just set the return event.
+ * Otherwise, send an appropriate signal to the process.
+ */
+static void spufs_handle_event(struct spu_context *ctx,
+				unsigned long ea, int type)
+{
+	if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
+		ctx->event_return |= type;
+		wake_up_all(&ctx->stop_wq);
+		return;
+	}
+
+	switch (type) {
+	case SPE_EVENT_INVALID_DMA:
+		force_sig_fault(SIGBUS, BUS_OBJERR, NULL);
+		break;
+	case SPE_EVENT_SPE_DATA_STORAGE:
+		ctx->ops->restart_dma(ctx);
+		force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *)ea);
+		break;
+	case SPE_EVENT_DMA_ALIGNMENT:
+		/* DAR isn't set for an alignment fault :( */
+		force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
+		break;
+	case SPE_EVENT_SPE_ERROR:
+		force_sig_fault(
+			SIGILL, ILL_ILLOPC,
+			(void __user *)(unsigned long)
+			ctx->ops->npc_read(ctx) - 4);
+		break;
+	}
+}
+
+int spufs_handle_class0(struct spu_context *ctx)
+{
+	unsigned long stat = ctx->csa.class_0_pending & CLASS0_INTR_MASK;
+
+	if (likely(!stat))
+		return 0;
+
+	if (stat & CLASS0_DMA_ALIGNMENT_INTR)
+		spufs_handle_event(ctx, ctx->csa.class_0_dar,
+			SPE_EVENT_DMA_ALIGNMENT);
+
+	if (stat & CLASS0_INVALID_DMA_COMMAND_INTR)
+		spufs_handle_event(ctx, ctx->csa.class_0_dar,
+			SPE_EVENT_INVALID_DMA);
+
+	if (stat & CLASS0_SPU_ERROR_INTR)
+		spufs_handle_event(ctx, ctx->csa.class_0_dar,
+			SPE_EVENT_SPE_ERROR);
+
+	ctx->csa.class_0_pending = 0;
+
+	return -EIO;
+}
+
+/*
+ * bottom half handler for page faults, we can't do this from
+ * interrupt context, since we might need to sleep.
+ * we also need to give up the mutex so we can get scheduled
+ * out while waiting for the backing store.
+ *
+ * TODO: try calling hash_page from the interrupt handler first
+ *       in order to speed up the easy case.
+ */
+int spufs_handle_class1(struct spu_context *ctx)
+{
+	u64 ea, dsisr, access;
+	unsigned long flags;
+	vm_fault_t flt = 0;
+	int ret;
+
+	/*
+	 * dar and dsisr get passed from the registers
+	 * to the spu_context, to this function, but not
+	 * back to the spu if it gets scheduled again.
+	 *
+	 * if we don't handle the fault for a saved context
+	 * in time, we can still expect to get the same fault
+	 * the immediately after the context restore.
+	 */
+	ea = ctx->csa.class_1_dar;
+	dsisr = ctx->csa.class_1_dsisr;
+
+	if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
+		return 0;
+
+	spuctx_switch_state(ctx, SPU_UTIL_IOWAIT);
+
+	pr_debug("ctx %p: ea %016llx, dsisr %016llx state %d\n", ctx, ea,
+		dsisr, ctx->state);
+
+	ctx->stats.hash_flt++;
+	if (ctx->state == SPU_STATE_RUNNABLE)
+		ctx->spu->stats.hash_flt++;
+
+	/* we must not hold the lock when entering copro_handle_mm_fault */
+	spu_release(ctx);
+
+	access = (_PAGE_PRESENT | _PAGE_READ);
+	access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_WRITE : 0UL;
+	local_irq_save(flags);
+	ret = hash_page(ea, access, 0x300, dsisr);
+	local_irq_restore(flags);
+
+	/* hashing failed, so try the actual fault handler */
+	if (ret)
+		ret = copro_handle_mm_fault(current->mm, ea, dsisr, &flt);
+
+	/*
+	 * This is nasty: we need the state_mutex for all the bookkeeping even
+	 * if the syscall was interrupted by a signal. ewww.
+	 */
+	mutex_lock(&ctx->state_mutex);
+
+	/*
+	 * Clear dsisr under ctxt lock after handling the fault, so that
+	 * time slicing will not preempt the context while the page fault
+	 * handler is running. Context switch code removes mappings.
+	 */
+	ctx->csa.class_1_dar = ctx->csa.class_1_dsisr = 0;
+
+	/*
+	 * If we handled the fault successfully and are in runnable
+	 * state, restart the DMA.
+	 * In case of unhandled error report the problem to user space.
+	 */
+	if (!ret) {
+		if (flt & VM_FAULT_MAJOR)
+			ctx->stats.maj_flt++;
+		else
+			ctx->stats.min_flt++;
+		if (ctx->state == SPU_STATE_RUNNABLE) {
+			if (flt & VM_FAULT_MAJOR)
+				ctx->spu->stats.maj_flt++;
+			else
+				ctx->spu->stats.min_flt++;
+		}
+
+		if (ctx->spu)
+			ctx->ops->restart_dma(ctx);
+	} else
+		spufs_handle_event(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+	return ret;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
new file mode 100644
index 0000000000..02a8158c46
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -0,0 +1,2633 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SPU file system -- file contents
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/coredump.h>
+#include <linux/fs.h>
+#include <linux/ioctl.h>
+#include <linux/export.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+#include <linux/ptrace.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+#include <asm/io.h>
+#include <asm/time.h>
+#include <asm/spu.h>
+#include <asm/spu_info.h>
+#include <linux/uaccess.h>
+
+#include "spufs.h"
+#include "sputrace.h"
+
+#define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000)
+
+/* Simple attribute files */
+struct spufs_attr {
+	int (*get)(void *, u64 *);
+	int (*set)(void *, u64);
+	char get_buf[24];       /* enough to store a u64 and "\n\0" */
+	char set_buf[24];
+	void *data;
+	const char *fmt;        /* format for read operation */
+	struct mutex mutex;     /* protects access to these buffers */
+};
+
+static int spufs_attr_open(struct inode *inode, struct file *file,
+		int (*get)(void *, u64 *), int (*set)(void *, u64),
+		const char *fmt)
+{
+	struct spufs_attr *attr;
+
+	attr = kmalloc(sizeof(*attr), GFP_KERNEL);
+	if (!attr)
+		return -ENOMEM;
+
+	attr->get = get;
+	attr->set = set;
+	attr->data = inode->i_private;
+	attr->fmt = fmt;
+	mutex_init(&attr->mutex);
+	file->private_data = attr;
+
+	return nonseekable_open(inode, file);
+}
+
+static int spufs_attr_release(struct inode *inode, struct file *file)
+{
+       kfree(file->private_data);
+	return 0;
+}
+
+static ssize_t spufs_attr_read(struct file *file, char __user *buf,
+		size_t len, loff_t *ppos)
+{
+	struct spufs_attr *attr;
+	size_t size;
+	ssize_t ret;
+
+	attr = file->private_data;
+	if (!attr->get)
+		return -EACCES;
+
+	ret = mutex_lock_interruptible(&attr->mutex);
+	if (ret)
+		return ret;
+
+	if (*ppos) {		/* continued read */
+		size = strlen(attr->get_buf);
+	} else {		/* first read */
+		u64 val;
+		ret = attr->get(attr->data, &val);
+		if (ret)
+			goto out;
+
+		size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
+				 attr->fmt, (unsigned long long)val);
+	}
+
+	ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
+out:
+	mutex_unlock(&attr->mutex);
+	return ret;
+}
+
+static ssize_t spufs_attr_write(struct file *file, const char __user *buf,
+		size_t len, loff_t *ppos)
+{
+	struct spufs_attr *attr;
+	u64 val;
+	size_t size;
+	ssize_t ret;
+
+	attr = file->private_data;
+	if (!attr->set)
+		return -EACCES;
+
+	ret = mutex_lock_interruptible(&attr->mutex);
+	if (ret)
+		return ret;
+
+	ret = -EFAULT;
+	size = min(sizeof(attr->set_buf) - 1, len);
+	if (copy_from_user(attr->set_buf, buf, size))
+		goto out;
+
+	ret = len; /* claim we got the whole input */
+	attr->set_buf[size] = '\0';
+	val = simple_strtol(attr->set_buf, NULL, 0);
+	attr->set(attr->data, val);
+out:
+	mutex_unlock(&attr->mutex);
+	return ret;
+}
+
+static ssize_t spufs_dump_emit(struct coredump_params *cprm, void *buf,
+		size_t size)
+{
+	if (!dump_emit(cprm, buf, size))
+		return -EIO;
+	return size;
+}
+
+#define DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt)	\
+static int __fops ## _open(struct inode *inode, struct file *file)	\
+{									\
+	__simple_attr_check_format(__fmt, 0ull);			\
+	return spufs_attr_open(inode, file, __get, __set, __fmt);	\
+}									\
+static const struct file_operations __fops = {				\
+	.open	 = __fops ## _open,					\
+	.release = spufs_attr_release,					\
+	.read	 = spufs_attr_read,					\
+	.write	 = spufs_attr_write,					\
+	.llseek  = generic_file_llseek,					\
+};
+
+
+static int
+spufs_mem_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->local_store = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static int
+spufs_mem_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->local_store = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static ssize_t
+spufs_mem_dump(struct spu_context *ctx, struct coredump_params *cprm)
+{
+	return spufs_dump_emit(cprm, ctx->ops->get_ls(ctx), LS_SIZE);
+}
+
+static ssize_t
+spufs_mem_read(struct file *file, char __user *buffer,
+				size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ret = simple_read_from_buffer(buffer, size, pos, ctx->ops->get_ls(ctx),
+				      LS_SIZE);
+	spu_release(ctx);
+
+	return ret;
+}
+
+static ssize_t
+spufs_mem_write(struct file *file, const char __user *buffer,
+					size_t size, loff_t *ppos)
+{
+	struct spu_context *ctx = file->private_data;
+	char *local_store;
+	loff_t pos = *ppos;
+	int ret;
+
+	if (pos > LS_SIZE)
+		return -EFBIG;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	local_store = ctx->ops->get_ls(ctx);
+	size = simple_write_to_buffer(local_store, LS_SIZE, ppos, buffer, size);
+	spu_release(ctx);
+
+	return size;
+}
+
+static vm_fault_t
+spufs_mem_mmap_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct spu_context *ctx	= vma->vm_file->private_data;
+	unsigned long pfn, offset;
+	vm_fault_t ret;
+
+	offset = vmf->pgoff << PAGE_SHIFT;
+	if (offset >= LS_SIZE)
+		return VM_FAULT_SIGBUS;
+
+	pr_debug("spufs_mem_mmap_fault address=0x%lx, offset=0x%lx\n",
+			vmf->address, offset);
+
+	if (spu_acquire(ctx))
+		return VM_FAULT_NOPAGE;
+
+	if (ctx->state == SPU_STATE_SAVED) {
+		vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
+		pfn = vmalloc_to_pfn(ctx->csa.lscsa->ls + offset);
+	} else {
+		vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+		pfn = (ctx->spu->local_store_phys + offset) >> PAGE_SHIFT;
+	}
+	ret = vmf_insert_pfn(vma, vmf->address, pfn);
+
+	spu_release(ctx);
+
+	return ret;
+}
+
+static int spufs_mem_mmap_access(struct vm_area_struct *vma,
+				unsigned long address,
+				void *buf, int len, int write)
+{
+	struct spu_context *ctx = vma->vm_file->private_data;
+	unsigned long offset = address - vma->vm_start;
+	char *local_store;
+
+	if (write && !(vma->vm_flags & VM_WRITE))
+		return -EACCES;
+	if (spu_acquire(ctx))
+		return -EINTR;
+	if ((offset + len) > vma->vm_end)
+		len = vma->vm_end - offset;
+	local_store = ctx->ops->get_ls(ctx);
+	if (write)
+		memcpy_toio(local_store + offset, buf, len);
+	else
+		memcpy_fromio(buf, local_store + offset, len);
+	spu_release(ctx);
+	return len;
+}
+
+static const struct vm_operations_struct spufs_mem_mmap_vmops = {
+	.fault = spufs_mem_mmap_fault,
+	.access = spufs_mem_mmap_access,
+};
+
+static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vm_flags_set(vma, VM_IO | VM_PFNMAP);
+	vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_mem_mmap_vmops;
+	return 0;
+}
+
+static const struct file_operations spufs_mem_fops = {
+	.open			= spufs_mem_open,
+	.release		= spufs_mem_release,
+	.read			= spufs_mem_read,
+	.write			= spufs_mem_write,
+	.llseek			= generic_file_llseek,
+	.mmap			= spufs_mem_mmap,
+};
+
+static vm_fault_t spufs_ps_fault(struct vm_fault *vmf,
+				    unsigned long ps_offs,
+				    unsigned long ps_size)
+{
+	struct spu_context *ctx = vmf->vma->vm_file->private_data;
+	unsigned long area, offset = vmf->pgoff << PAGE_SHIFT;
+	int err = 0;
+	vm_fault_t ret = VM_FAULT_NOPAGE;
+
+	spu_context_nospu_trace(spufs_ps_fault__enter, ctx);
+
+	if (offset >= ps_size)
+		return VM_FAULT_SIGBUS;
+
+	if (fatal_signal_pending(current))
+		return VM_FAULT_SIGBUS;
+
+	/*
+	 * Because we release the mmap_lock, the context may be destroyed while
+	 * we're in spu_wait. Grab an extra reference so it isn't destroyed
+	 * in the meantime.
+	 */
+	get_spu_context(ctx);
+
+	/*
+	 * We have to wait for context to be loaded before we have
+	 * pages to hand out to the user, but we don't want to wait
+	 * with the mmap_lock held.
+	 * It is possible to drop the mmap_lock here, but then we need
+	 * to return VM_FAULT_NOPAGE because the mappings may have
+	 * hanged.
+	 */
+	if (spu_acquire(ctx))
+		goto refault;
+
+	if (ctx->state == SPU_STATE_SAVED) {
+		mmap_read_unlock(current->mm);
+		spu_context_nospu_trace(spufs_ps_fault__sleep, ctx);
+		err = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+		spu_context_trace(spufs_ps_fault__wake, ctx, ctx->spu);
+		mmap_read_lock(current->mm);
+	} else {
+		area = ctx->spu->problem_phys + ps_offs;
+		ret = vmf_insert_pfn(vmf->vma, vmf->address,
+				(area + offset) >> PAGE_SHIFT);
+		spu_context_trace(spufs_ps_fault__insert, ctx, ctx->spu);
+	}
+
+	if (!err)
+		spu_release(ctx);
+
+refault:
+	put_spu_context(ctx);
+	return ret;
+}
+
+#if SPUFS_MMAP_4K
+static vm_fault_t spufs_cntl_mmap_fault(struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vmf, 0x4000, SPUFS_CNTL_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_cntl_mmap_vmops = {
+	.fault = spufs_cntl_mmap_fault,
+};
+
+/*
+ * mmap support for problem state control area [0x4000 - 0x4fff].
+ */
+static int spufs_cntl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vm_flags_set(vma, VM_IO | VM_PFNMAP);
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_cntl_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_cntl_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_cntl_get(void *data, u64 *val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	*val = ctx->ops->status_read(ctx);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static int spufs_cntl_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->runcntl_write(ctx, val);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static int spufs_cntl_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->cntl = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return simple_attr_open(inode, file, spufs_cntl_get,
+					spufs_cntl_set, "0x%08lx");
+}
+
+static int
+spufs_cntl_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	simple_attr_release(inode, file);
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->cntl = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static const struct file_operations spufs_cntl_fops = {
+	.open = spufs_cntl_open,
+	.release = spufs_cntl_release,
+	.read = simple_attr_read,
+	.write = simple_attr_write,
+	.llseek	= no_llseek,
+	.mmap = spufs_cntl_mmap,
+};
+
+static int
+spufs_regs_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	file->private_data = i->i_ctx;
+	return 0;
+}
+
+static ssize_t
+spufs_regs_dump(struct spu_context *ctx, struct coredump_params *cprm)
+{
+	return spufs_dump_emit(cprm, ctx->csa.lscsa->gprs,
+			       sizeof(ctx->csa.lscsa->gprs));
+}
+
+static ssize_t
+spufs_regs_read(struct file *file, char __user *buffer,
+		size_t size, loff_t *pos)
+{
+	int ret;
+	struct spu_context *ctx = file->private_data;
+
+	/* pre-check for file position: if we'd return EOF, there's no point
+	 * causing a deschedule */
+	if (*pos >= sizeof(ctx->csa.lscsa->gprs))
+		return 0;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = simple_read_from_buffer(buffer, size, pos, ctx->csa.lscsa->gprs,
+				      sizeof(ctx->csa.lscsa->gprs));
+	spu_release_saved(ctx);
+	return ret;
+}
+
+static ssize_t
+spufs_regs_write(struct file *file, const char __user *buffer,
+		 size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	if (*pos >= sizeof(lscsa->gprs))
+		return -EFBIG;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+
+	size = simple_write_to_buffer(lscsa->gprs, sizeof(lscsa->gprs), pos,
+					buffer, size);
+
+	spu_release_saved(ctx);
+	return size;
+}
+
+static const struct file_operations spufs_regs_fops = {
+	.open	 = spufs_regs_open,
+	.read    = spufs_regs_read,
+	.write   = spufs_regs_write,
+	.llseek  = generic_file_llseek,
+};
+
+static ssize_t
+spufs_fpcr_dump(struct spu_context *ctx, struct coredump_params *cprm)
+{
+	return spufs_dump_emit(cprm, &ctx->csa.lscsa->fpcr,
+			       sizeof(ctx->csa.lscsa->fpcr));
+}
+
+static ssize_t
+spufs_fpcr_read(struct file *file, char __user * buffer,
+		size_t size, loff_t * pos)
+{
+	int ret;
+	struct spu_context *ctx = file->private_data;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = simple_read_from_buffer(buffer, size, pos, &ctx->csa.lscsa->fpcr,
+				      sizeof(ctx->csa.lscsa->fpcr));
+	spu_release_saved(ctx);
+	return ret;
+}
+
+static ssize_t
+spufs_fpcr_write(struct file *file, const char __user * buffer,
+		 size_t size, loff_t * pos)
+{
+	struct spu_context *ctx = file->private_data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	if (*pos >= sizeof(lscsa->fpcr))
+		return -EFBIG;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+
+	size = simple_write_to_buffer(&lscsa->fpcr, sizeof(lscsa->fpcr), pos,
+					buffer, size);
+
+	spu_release_saved(ctx);
+	return size;
+}
+
+static const struct file_operations spufs_fpcr_fops = {
+	.open = spufs_regs_open,
+	.read = spufs_fpcr_read,
+	.write = spufs_fpcr_write,
+	.llseek = generic_file_llseek,
+};
+
+/* generic open function for all pipe-like files */
+static int spufs_pipe_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	file->private_data = i->i_ctx;
+
+	return stream_open(inode, file);
+}
+
+/*
+ * Read as many bytes from the mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - no more data available in the mailbox
+ * - end of the user provided buffer
+ * - end of the mapped area
+ */
+static ssize_t spufs_mbox_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 mbox_data, __user *udata = (void __user *)buf;
+	ssize_t count;
+
+	if (len < 4)
+		return -EINVAL;
+
+	count = spu_acquire(ctx);
+	if (count)
+		return count;
+
+	for (count = 0; (count + 4) <= len; count += 4, udata++) {
+		int ret;
+		ret = ctx->ops->mbox_read(ctx, &mbox_data);
+		if (ret == 0)
+			break;
+
+		/*
+		 * at the end of the mapped area, we can fault
+		 * but still need to return the data we have
+		 * read successfully so far.
+		 */
+		ret = put_user(mbox_data, udata);
+		if (ret) {
+			if (!count)
+				count = -EFAULT;
+			break;
+		}
+	}
+	spu_release(ctx);
+
+	if (!count)
+		count = -EAGAIN;
+
+	return count;
+}
+
+static const struct file_operations spufs_mbox_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_mbox_read,
+	.llseek	= no_llseek,
+};
+
+static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
+	u32 mbox_stat;
+
+	if (len < 4)
+		return -EINVAL;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	mbox_stat = ctx->ops->mbox_stat_read(ctx) & 0xff;
+
+	spu_release(ctx);
+
+	if (copy_to_user(buf, &mbox_stat, sizeof mbox_stat))
+		return -EFAULT;
+
+	return 4;
+}
+
+static const struct file_operations spufs_mbox_stat_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_mbox_stat_read,
+	.llseek = no_llseek,
+};
+
+/* low-level ibox access function */
+size_t spu_ibox_read(struct spu_context *ctx, u32 *data)
+{
+	return ctx->ops->ibox_read(ctx, data);
+}
+
+/* interrupt-level ibox callback function. */
+void spufs_ibox_callback(struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	if (ctx)
+		wake_up_all(&ctx->ibox_wq);
+}
+
+/*
+ * Read as many bytes from the interrupt mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - no more data available in the mailbox
+ * - end of the user provided buffer
+ * - end of the mapped area
+ *
+ * If the file is opened without O_NONBLOCK, we wait here until
+ * any data is available, but return when we have been able to
+ * read something.
+ */
+static ssize_t spufs_ibox_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 ibox_data, __user *udata = (void __user *)buf;
+	ssize_t count;
+
+	if (len < 4)
+		return -EINVAL;
+
+	count = spu_acquire(ctx);
+	if (count)
+		goto out;
+
+	/* wait only for the first element */
+	count = 0;
+	if (file->f_flags & O_NONBLOCK) {
+		if (!spu_ibox_read(ctx, &ibox_data)) {
+			count = -EAGAIN;
+			goto out_unlock;
+		}
+	} else {
+		count = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data));
+		if (count)
+			goto out;
+	}
+
+	/* if we can't write at all, return -EFAULT */
+	count = put_user(ibox_data, udata);
+	if (count)
+		goto out_unlock;
+
+	for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
+		int ret;
+		ret = ctx->ops->ibox_read(ctx, &ibox_data);
+		if (ret == 0)
+			break;
+		/*
+		 * at the end of the mapped area, we can fault
+		 * but still need to return the data we have
+		 * read successfully so far.
+		 */
+		ret = put_user(ibox_data, udata);
+		if (ret)
+			break;
+	}
+
+out_unlock:
+	spu_release(ctx);
+out:
+	return count;
+}
+
+static __poll_t spufs_ibox_poll(struct file *file, poll_table *wait)
+{
+	struct spu_context *ctx = file->private_data;
+	__poll_t mask;
+
+	poll_wait(file, &ctx->ibox_wq, wait);
+
+	/*
+	 * For now keep this uninterruptible and also ignore the rule
+	 * that poll should not sleep.  Will be fixed later.
+	 */
+	mutex_lock(&ctx->state_mutex);
+	mask = ctx->ops->mbox_stat_poll(ctx, EPOLLIN | EPOLLRDNORM);
+	spu_release(ctx);
+
+	return mask;
+}
+
+static const struct file_operations spufs_ibox_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_ibox_read,
+	.poll	= spufs_ibox_poll,
+	.llseek = no_llseek,
+};
+
+static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
+	u32 ibox_stat;
+
+	if (len < 4)
+		return -EINVAL;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ibox_stat = (ctx->ops->mbox_stat_read(ctx) >> 16) & 0xff;
+	spu_release(ctx);
+
+	if (copy_to_user(buf, &ibox_stat, sizeof ibox_stat))
+		return -EFAULT;
+
+	return 4;
+}
+
+static const struct file_operations spufs_ibox_stat_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_ibox_stat_read,
+	.llseek = no_llseek,
+};
+
+/* low-level mailbox write */
+size_t spu_wbox_write(struct spu_context *ctx, u32 data)
+{
+	return ctx->ops->wbox_write(ctx, data);
+}
+
+/* interrupt-level wbox callback function. */
+void spufs_wbox_callback(struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	if (ctx)
+		wake_up_all(&ctx->wbox_wq);
+}
+
+/*
+ * Write as many bytes to the interrupt mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - the mailbox is full
+ * - end of the user provided buffer
+ * - end of the mapped area
+ *
+ * If the file is opened without O_NONBLOCK, we wait here until
+ * space is available, but return when we have been able to
+ * write something.
+ */
+static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 wbox_data, __user *udata = (void __user *)buf;
+	ssize_t count;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (get_user(wbox_data, udata))
+		return -EFAULT;
+
+	count = spu_acquire(ctx);
+	if (count)
+		goto out;
+
+	/*
+	 * make sure we can at least write one element, by waiting
+	 * in case of !O_NONBLOCK
+	 */
+	count = 0;
+	if (file->f_flags & O_NONBLOCK) {
+		if (!spu_wbox_write(ctx, wbox_data)) {
+			count = -EAGAIN;
+			goto out_unlock;
+		}
+	} else {
+		count = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data));
+		if (count)
+			goto out;
+	}
+
+
+	/* write as much as possible */
+	for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
+		int ret;
+		ret = get_user(wbox_data, udata);
+		if (ret)
+			break;
+
+		ret = spu_wbox_write(ctx, wbox_data);
+		if (ret == 0)
+			break;
+	}
+
+out_unlock:
+	spu_release(ctx);
+out:
+	return count;
+}
+
+static __poll_t spufs_wbox_poll(struct file *file, poll_table *wait)
+{
+	struct spu_context *ctx = file->private_data;
+	__poll_t mask;
+
+	poll_wait(file, &ctx->wbox_wq, wait);
+
+	/*
+	 * For now keep this uninterruptible and also ignore the rule
+	 * that poll should not sleep.  Will be fixed later.
+	 */
+	mutex_lock(&ctx->state_mutex);
+	mask = ctx->ops->mbox_stat_poll(ctx, EPOLLOUT | EPOLLWRNORM);
+	spu_release(ctx);
+
+	return mask;
+}
+
+static const struct file_operations spufs_wbox_fops = {
+	.open	= spufs_pipe_open,
+	.write	= spufs_wbox_write,
+	.poll	= spufs_wbox_poll,
+	.llseek = no_llseek,
+};
+
+static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
+	u32 wbox_stat;
+
+	if (len < 4)
+		return -EINVAL;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	wbox_stat = (ctx->ops->mbox_stat_read(ctx) >> 8) & 0xff;
+	spu_release(ctx);
+
+	if (copy_to_user(buf, &wbox_stat, sizeof wbox_stat))
+		return -EFAULT;
+
+	return 4;
+}
+
+static const struct file_operations spufs_wbox_stat_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_wbox_stat_read,
+	.llseek = no_llseek,
+};
+
+static int spufs_signal1_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->signal1 = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_signal1_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->signal1 = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static ssize_t spufs_signal1_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
+{
+	if (!ctx->csa.spu_chnlcnt_RW[3])
+		return 0;
+	return spufs_dump_emit(cprm, &ctx->csa.spu_chnldata_RW[3],
+			       sizeof(ctx->csa.spu_chnldata_RW[3]));
+}
+
+static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf,
+			size_t len)
+{
+	if (len < sizeof(ctx->csa.spu_chnldata_RW[3]))
+		return -EINVAL;
+	if (!ctx->csa.spu_chnlcnt_RW[3])
+		return 0;
+	if (copy_to_user(buf, &ctx->csa.spu_chnldata_RW[3],
+			 sizeof(ctx->csa.spu_chnldata_RW[3])))
+		return -EFAULT;
+	return sizeof(ctx->csa.spu_chnldata_RW[3]);
+}
+
+static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	int ret;
+	struct spu_context *ctx = file->private_data;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_signal1_read(ctx, buf, len);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static ssize_t spufs_signal1_write(struct file *file, const char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx;
+	ssize_t ret;
+	u32 data;
+
+	ctx = file->private_data;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (copy_from_user(&data, buf, 4))
+		return -EFAULT;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->signal1_write(ctx, data);
+	spu_release(ctx);
+
+	return 4;
+}
+
+static vm_fault_t
+spufs_signal1_mmap_fault(struct vm_fault *vmf)
+{
+#if SPUFS_SIGNAL_MAP_SIZE == 0x1000
+	return spufs_ps_fault(vmf, 0x14000, SPUFS_SIGNAL_MAP_SIZE);
+#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
+	/* For 64k pages, both signal1 and signal2 can be used to mmap the whole
+	 * signal 1 and 2 area
+	 */
+	return spufs_ps_fault(vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+#else
+#error unsupported page size
+#endif
+}
+
+static const struct vm_operations_struct spufs_signal1_mmap_vmops = {
+	.fault = spufs_signal1_mmap_fault,
+};
+
+static int spufs_signal1_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vm_flags_set(vma, VM_IO | VM_PFNMAP);
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_signal1_mmap_vmops;
+	return 0;
+}
+
+static const struct file_operations spufs_signal1_fops = {
+	.open = spufs_signal1_open,
+	.release = spufs_signal1_release,
+	.read = spufs_signal1_read,
+	.write = spufs_signal1_write,
+	.mmap = spufs_signal1_mmap,
+	.llseek = no_llseek,
+};
+
+static const struct file_operations spufs_signal1_nosched_fops = {
+	.open = spufs_signal1_open,
+	.release = spufs_signal1_release,
+	.write = spufs_signal1_write,
+	.mmap = spufs_signal1_mmap,
+	.llseek = no_llseek,
+};
+
+static int spufs_signal2_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->signal2 = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_signal2_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->signal2 = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static ssize_t spufs_signal2_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
+{
+	if (!ctx->csa.spu_chnlcnt_RW[4])
+		return 0;
+	return spufs_dump_emit(cprm, &ctx->csa.spu_chnldata_RW[4],
+			       sizeof(ctx->csa.spu_chnldata_RW[4]));
+}
+
+static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf,
+			size_t len)
+{
+	if (len < sizeof(ctx->csa.spu_chnldata_RW[4]))
+		return -EINVAL;
+	if (!ctx->csa.spu_chnlcnt_RW[4])
+		return 0;
+	if (copy_to_user(buf, &ctx->csa.spu_chnldata_RW[4],
+			 sizeof(ctx->csa.spu_chnldata_RW[4])))
+		return -EFAULT;
+	return sizeof(ctx->csa.spu_chnldata_RW[4]);
+}
+
+static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_signal2_read(ctx, buf, len);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static ssize_t spufs_signal2_write(struct file *file, const char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx;
+	ssize_t ret;
+	u32 data;
+
+	ctx = file->private_data;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (copy_from_user(&data, buf, 4))
+		return -EFAULT;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->signal2_write(ctx, data);
+	spu_release(ctx);
+
+	return 4;
+}
+
+#if SPUFS_MMAP_4K
+static vm_fault_t
+spufs_signal2_mmap_fault(struct vm_fault *vmf)
+{
+#if SPUFS_SIGNAL_MAP_SIZE == 0x1000
+	return spufs_ps_fault(vmf, 0x1c000, SPUFS_SIGNAL_MAP_SIZE);
+#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
+	/* For 64k pages, both signal1 and signal2 can be used to mmap the whole
+	 * signal 1 and 2 area
+	 */
+	return spufs_ps_fault(vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+#else
+#error unsupported page size
+#endif
+}
+
+static const struct vm_operations_struct spufs_signal2_mmap_vmops = {
+	.fault = spufs_signal2_mmap_fault,
+};
+
+static int spufs_signal2_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vm_flags_set(vma, VM_IO | VM_PFNMAP);
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_signal2_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_signal2_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static const struct file_operations spufs_signal2_fops = {
+	.open = spufs_signal2_open,
+	.release = spufs_signal2_release,
+	.read = spufs_signal2_read,
+	.write = spufs_signal2_write,
+	.mmap = spufs_signal2_mmap,
+	.llseek = no_llseek,
+};
+
+static const struct file_operations spufs_signal2_nosched_fops = {
+	.open = spufs_signal2_open,
+	.release = spufs_signal2_release,
+	.write = spufs_signal2_write,
+	.mmap = spufs_signal2_mmap,
+	.llseek = no_llseek,
+};
+
+/*
+ * This is a wrapper around DEFINE_SIMPLE_ATTRIBUTE which does the
+ * work of acquiring (or not) the SPU context before calling through
+ * to the actual get routine. The set routine is called directly.
+ */
+#define SPU_ATTR_NOACQUIRE	0
+#define SPU_ATTR_ACQUIRE	1
+#define SPU_ATTR_ACQUIRE_SAVED	2
+
+#define DEFINE_SPUFS_ATTRIBUTE(__name, __get, __set, __fmt, __acquire)	\
+static int __##__get(void *data, u64 *val)				\
+{									\
+	struct spu_context *ctx = data;					\
+	int ret = 0;							\
+									\
+	if (__acquire == SPU_ATTR_ACQUIRE) {				\
+		ret = spu_acquire(ctx);					\
+		if (ret)						\
+			return ret;					\
+		*val = __get(ctx);					\
+		spu_release(ctx);					\
+	} else if (__acquire == SPU_ATTR_ACQUIRE_SAVED)	{		\
+		ret = spu_acquire_saved(ctx);				\
+		if (ret)						\
+			return ret;					\
+		*val = __get(ctx);					\
+		spu_release_saved(ctx);					\
+	} else								\
+		*val = __get(ctx);					\
+									\
+	return 0;							\
+}									\
+DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__name, __##__get, __set, __fmt);
+
+static int spufs_signal1_type_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->signal1_type_set(ctx, val);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static u64 spufs_signal1_type_get(struct spu_context *ctx)
+{
+	return ctx->ops->signal1_type_get(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_signal1_type, spufs_signal1_type_get,
+		       spufs_signal1_type_set, "%llu\n", SPU_ATTR_ACQUIRE);
+
+
+static int spufs_signal2_type_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->signal2_type_set(ctx, val);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static u64 spufs_signal2_type_get(struct spu_context *ctx)
+{
+	return ctx->ops->signal2_type_get(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get,
+		       spufs_signal2_type_set, "%llu\n", SPU_ATTR_ACQUIRE);
+
+#if SPUFS_MMAP_4K
+static vm_fault_t
+spufs_mss_mmap_fault(struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vmf, 0x0000, SPUFS_MSS_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_mss_mmap_vmops = {
+	.fault = spufs_mss_mmap_fault,
+};
+
+/*
+ * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
+ */
+static int spufs_mss_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vm_flags_set(vma, VM_IO | VM_PFNMAP);
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_mss_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_mss_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_mss_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	file->private_data = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!i->i_openers++)
+		ctx->mss = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_mss_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->mss = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static const struct file_operations spufs_mss_fops = {
+	.open	 = spufs_mss_open,
+	.release = spufs_mss_release,
+	.mmap	 = spufs_mss_mmap,
+	.llseek  = no_llseek,
+};
+
+static vm_fault_t
+spufs_psmap_mmap_fault(struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vmf, 0x0000, SPUFS_PS_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_psmap_mmap_vmops = {
+	.fault = spufs_psmap_mmap_fault,
+};
+
+/*
+ * mmap support for full problem state area [0x00000 - 0x1ffff].
+ */
+static int spufs_psmap_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vm_flags_set(vma, VM_IO | VM_PFNMAP);
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_psmap_mmap_vmops;
+	return 0;
+}
+
+static int spufs_psmap_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = i->i_ctx;
+	if (!i->i_openers++)
+		ctx->psmap = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_psmap_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->psmap = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static const struct file_operations spufs_psmap_fops = {
+	.open	 = spufs_psmap_open,
+	.release = spufs_psmap_release,
+	.mmap	 = spufs_psmap_mmap,
+	.llseek  = no_llseek,
+};
+
+
+#if SPUFS_MMAP_4K
+static vm_fault_t
+spufs_mfc_mmap_fault(struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vmf, 0x3000, SPUFS_MFC_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_mfc_mmap_vmops = {
+	.fault = spufs_mfc_mmap_fault,
+};
+
+/*
+ * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
+ */
+static int spufs_mfc_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vm_flags_set(vma, VM_IO | VM_PFNMAP);
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_mfc_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_mfc_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_mfc_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	/* we don't want to deal with DMA into other processes */
+	if (ctx->owner != current->mm)
+		return -EINVAL;
+
+	if (atomic_read(&inode->i_count) != 1)
+		return -EBUSY;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->mfc = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_mfc_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->mfc = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+/* interrupt-level mfc callback function. */
+void spufs_mfc_callback(struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	if (ctx)
+		wake_up_all(&ctx->mfc_wq);
+}
+
+static int spufs_read_mfc_tagstatus(struct spu_context *ctx, u32 *status)
+{
+	/* See if there is one tag group is complete */
+	/* FIXME we need locking around tagwait */
+	*status = ctx->ops->read_mfc_tagstatus(ctx) & ctx->tagwait;
+	ctx->tagwait &= ~*status;
+	if (*status)
+		return 1;
+
+	/* enable interrupt waiting for any tag group,
+	   may silently fail if interrupts are already enabled */
+	ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1);
+	return 0;
+}
+
+static ssize_t spufs_mfc_read(struct file *file, char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret = -EINVAL;
+	u32 status;
+
+	if (size != 4)
+		goto out;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	ret = -EINVAL;
+	if (file->f_flags & O_NONBLOCK) {
+		status = ctx->ops->read_mfc_tagstatus(ctx);
+		if (!(status & ctx->tagwait))
+			ret = -EAGAIN;
+		else
+			/* XXX(hch): shouldn't we clear ret here? */
+			ctx->tagwait &= ~status;
+	} else {
+		ret = spufs_wait(ctx->mfc_wq,
+			   spufs_read_mfc_tagstatus(ctx, &status));
+		if (ret)
+			goto out;
+	}
+	spu_release(ctx);
+
+	ret = 4;
+	if (copy_to_user(buffer, &status, 4))
+		ret = -EFAULT;
+
+out:
+	return ret;
+}
+
+static int spufs_check_valid_dma(struct mfc_dma_command *cmd)
+{
+	pr_debug("queueing DMA %x %llx %x %x %x\n", cmd->lsa,
+		 cmd->ea, cmd->size, cmd->tag, cmd->cmd);
+
+	switch (cmd->cmd) {
+	case MFC_PUT_CMD:
+	case MFC_PUTF_CMD:
+	case MFC_PUTB_CMD:
+	case MFC_GET_CMD:
+	case MFC_GETF_CMD:
+	case MFC_GETB_CMD:
+		break;
+	default:
+		pr_debug("invalid DMA opcode %x\n", cmd->cmd);
+		return -EIO;
+	}
+
+	if ((cmd->lsa & 0xf) != (cmd->ea &0xf)) {
+		pr_debug("invalid DMA alignment, ea %llx lsa %x\n",
+				cmd->ea, cmd->lsa);
+		return -EIO;
+	}
+
+	switch (cmd->size & 0xf) {
+	case 1:
+		break;
+	case 2:
+		if (cmd->lsa & 1)
+			goto error;
+		break;
+	case 4:
+		if (cmd->lsa & 3)
+			goto error;
+		break;
+	case 8:
+		if (cmd->lsa & 7)
+			goto error;
+		break;
+	case 0:
+		if (cmd->lsa & 15)
+			goto error;
+		break;
+	error:
+	default:
+		pr_debug("invalid DMA alignment %x for size %x\n",
+			cmd->lsa & 0xf, cmd->size);
+		return -EIO;
+	}
+
+	if (cmd->size > 16 * 1024) {
+		pr_debug("invalid DMA size %x\n", cmd->size);
+		return -EIO;
+	}
+
+	if (cmd->tag & 0xfff0) {
+		/* we reserve the higher tag numbers for kernel use */
+		pr_debug("invalid DMA tag\n");
+		return -EIO;
+	}
+
+	if (cmd->class) {
+		/* not supported in this version */
+		pr_debug("invalid DMA class\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int spu_send_mfc_command(struct spu_context *ctx,
+				struct mfc_dma_command cmd,
+				int *error)
+{
+	*error = ctx->ops->send_mfc_command(ctx, &cmd);
+	if (*error == -EAGAIN) {
+		/* wait for any tag group to complete
+		   so we have space for the new command */
+		ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1);
+		/* try again, because the queue might be
+		   empty again */
+		*error = ctx->ops->send_mfc_command(ctx, &cmd);
+		if (*error == -EAGAIN)
+			return 0;
+	}
+	return 1;
+}
+
+static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	struct mfc_dma_command cmd;
+	int ret = -EINVAL;
+
+	if (size != sizeof cmd)
+		goto out;
+
+	ret = -EFAULT;
+	if (copy_from_user(&cmd, buffer, sizeof cmd))
+		goto out;
+
+	ret = spufs_check_valid_dma(&cmd);
+	if (ret)
+		goto out;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		goto out;
+
+	ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+	if (ret)
+		goto out;
+
+	if (file->f_flags & O_NONBLOCK) {
+		ret = ctx->ops->send_mfc_command(ctx, &cmd);
+	} else {
+		int status;
+		ret = spufs_wait(ctx->mfc_wq,
+				 spu_send_mfc_command(ctx, cmd, &status));
+		if (ret)
+			goto out;
+		if (status)
+			ret = status;
+	}
+
+	if (ret)
+		goto out_unlock;
+
+	ctx->tagwait |= 1 << cmd.tag;
+	ret = size;
+
+out_unlock:
+	spu_release(ctx);
+out:
+	return ret;
+}
+
+static __poll_t spufs_mfc_poll(struct file *file,poll_table *wait)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 free_elements, tagstatus;
+	__poll_t mask;
+
+	poll_wait(file, &ctx->mfc_wq, wait);
+
+	/*
+	 * For now keep this uninterruptible and also ignore the rule
+	 * that poll should not sleep.  Will be fixed later.
+	 */
+	mutex_lock(&ctx->state_mutex);
+	ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2);
+	free_elements = ctx->ops->get_mfc_free_elements(ctx);
+	tagstatus = ctx->ops->read_mfc_tagstatus(ctx);
+	spu_release(ctx);
+
+	mask = 0;
+	if (free_elements & 0xffff)
+		mask |= EPOLLOUT | EPOLLWRNORM;
+	if (tagstatus & ctx->tagwait)
+		mask |= EPOLLIN | EPOLLRDNORM;
+
+	pr_debug("%s: free %d tagstatus %d tagwait %d\n", __func__,
+		free_elements, tagstatus, ctx->tagwait);
+
+	return mask;
+}
+
+static int spufs_mfc_flush(struct file *file, fl_owner_t id)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		goto out;
+#if 0
+/* this currently hangs */
+	ret = spufs_wait(ctx->mfc_wq,
+			 ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2));
+	if (ret)
+		goto out;
+	ret = spufs_wait(ctx->mfc_wq,
+			 ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait);
+	if (ret)
+		goto out;
+#else
+	ret = 0;
+#endif
+	spu_release(ctx);
+out:
+	return ret;
+}
+
+static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+	struct inode *inode = file_inode(file);
+	int err = file_write_and_wait_range(file, start, end);
+	if (!err) {
+		inode_lock(inode);
+		err = spufs_mfc_flush(file, NULL);
+		inode_unlock(inode);
+	}
+	return err;
+}
+
+static const struct file_operations spufs_mfc_fops = {
+	.open	 = spufs_mfc_open,
+	.release = spufs_mfc_release,
+	.read	 = spufs_mfc_read,
+	.write	 = spufs_mfc_write,
+	.poll	 = spufs_mfc_poll,
+	.flush	 = spufs_mfc_flush,
+	.fsync	 = spufs_mfc_fsync,
+	.mmap	 = spufs_mfc_mmap,
+	.llseek  = no_llseek,
+};
+
+static int spufs_npc_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->npc_write(ctx, val);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static u64 spufs_npc_get(struct spu_context *ctx)
+{
+	return ctx->ops->npc_read(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set,
+		       "0x%llx\n", SPU_ATTR_ACQUIRE);
+
+static int spufs_decr_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	lscsa->decr.slot[0] = (u32) val;
+	spu_release_saved(ctx);
+
+	return 0;
+}
+
+static u64 spufs_decr_get(struct spu_context *ctx)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return lscsa->decr.slot[0];
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
+		       "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_decr_status_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	if (val)
+		ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING;
+	else
+		ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
+	spu_release_saved(ctx);
+
+	return 0;
+}
+
+static u64 spufs_decr_status_get(struct spu_context *ctx)
+{
+	if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING)
+		return SPU_DECR_STATUS_RUNNING;
+	else
+		return 0;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get,
+		       spufs_decr_status_set, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_event_mask_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	lscsa->event_mask.slot[0] = (u32) val;
+	spu_release_saved(ctx);
+
+	return 0;
+}
+
+static u64 spufs_event_mask_get(struct spu_context *ctx)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return lscsa->event_mask.slot[0];
+}
+
+DEFINE_SPUFS_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get,
+		       spufs_event_mask_set, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE_SAVED);
+
+static u64 spufs_event_status_get(struct spu_context *ctx)
+{
+	struct spu_state *state = &ctx->csa;
+	u64 stat;
+	stat = state->spu_chnlcnt_RW[0];
+	if (stat)
+		return state->spu_chnldata_RW[0];
+	return 0;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get,
+		       NULL, "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED)
+
+static int spufs_srr0_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	lscsa->srr0.slot[0] = (u32) val;
+	spu_release_saved(ctx);
+
+	return 0;
+}
+
+static u64 spufs_srr0_get(struct spu_context *ctx)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return lscsa->srr0.slot[0];
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set,
+		       "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED)
+
+static u64 spufs_id_get(struct spu_context *ctx)
+{
+	u64 num;
+
+	if (ctx->state == SPU_STATE_RUNNABLE)
+		num = ctx->spu->number;
+	else
+		num = (unsigned int)-1;
+
+	return num;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_id_ops, spufs_id_get, NULL, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE)
+
+static u64 spufs_object_id_get(struct spu_context *ctx)
+{
+	/* FIXME: Should there really be no locking here? */
+	return ctx->object_id;
+}
+
+static int spufs_object_id_set(void *data, u64 id)
+{
+	struct spu_context *ctx = data;
+	ctx->object_id = id;
+
+	return 0;
+}
+
+DEFINE_SPUFS_ATTRIBUTE(spufs_object_id_ops, spufs_object_id_get,
+		       spufs_object_id_set, "0x%llx\n", SPU_ATTR_NOACQUIRE);
+
+static u64 spufs_lslr_get(struct spu_context *ctx)
+{
+	return ctx->csa.priv2.spu_lslr_RW;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_lslr_ops, spufs_lslr_get, NULL, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_info_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+	file->private_data = ctx;
+	return 0;
+}
+
+static int spufs_caps_show(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+
+	if (!(ctx->flags & SPU_CREATE_NOSCHED))
+		seq_puts(s, "sched\n");
+	if (!(ctx->flags & SPU_CREATE_ISOLATE))
+		seq_puts(s, "step\n");
+	return 0;
+}
+
+static int spufs_caps_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_caps_show, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_caps_fops = {
+	.open		= spufs_caps_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static ssize_t spufs_mbox_info_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
+{
+	if (!(ctx->csa.prob.mb_stat_R & 0x0000ff))
+		return 0;
+	return spufs_dump_emit(cprm, &ctx->csa.prob.pu_mb_R,
+			       sizeof(ctx->csa.prob.pu_mb_R));
+}
+
+static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 stat, data;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	stat = ctx->csa.prob.mb_stat_R;
+	data = ctx->csa.prob.pu_mb_R;
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	/* EOF if there's no entry in the mbox */
+	if (!(stat & 0x0000ff))
+		return 0;
+
+	return simple_read_from_buffer(buf, len, pos, &data, sizeof(data));
+}
+
+static const struct file_operations spufs_mbox_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_mbox_info_read,
+	.llseek  = generic_file_llseek,
+};
+
+static ssize_t spufs_ibox_info_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
+{
+	if (!(ctx->csa.prob.mb_stat_R & 0xff0000))
+		return 0;
+	return spufs_dump_emit(cprm, &ctx->csa.priv2.puint_mb_R,
+			       sizeof(ctx->csa.priv2.puint_mb_R));
+}
+
+static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 stat, data;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	stat = ctx->csa.prob.mb_stat_R;
+	data = ctx->csa.priv2.puint_mb_R;
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	/* EOF if there's no entry in the ibox */
+	if (!(stat & 0xff0000))
+		return 0;
+
+	return simple_read_from_buffer(buf, len, pos, &data, sizeof(data));
+}
+
+static const struct file_operations spufs_ibox_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_ibox_info_read,
+	.llseek  = generic_file_llseek,
+};
+
+static size_t spufs_wbox_info_cnt(struct spu_context *ctx)
+{
+	return (4 - ((ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8)) * sizeof(u32);
+}
+
+static ssize_t spufs_wbox_info_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
+{
+	return spufs_dump_emit(cprm, &ctx->csa.spu_mailbox_data,
+			spufs_wbox_info_cnt(ctx));
+}
+
+static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 data[ARRAY_SIZE(ctx->csa.spu_mailbox_data)];
+	int ret, count;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	count = spufs_wbox_info_cnt(ctx);
+	memcpy(&data, &ctx->csa.spu_mailbox_data, sizeof(data));
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return simple_read_from_buffer(buf, len, pos, &data,
+				count * sizeof(u32));
+}
+
+static const struct file_operations spufs_wbox_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_wbox_info_read,
+	.llseek  = generic_file_llseek,
+};
+
+static void spufs_get_dma_info(struct spu_context *ctx,
+		struct spu_dma_info *info)
+{
+	int i;
+
+	info->dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW;
+	info->dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0];
+	info->dma_info_status = ctx->csa.spu_chnldata_RW[24];
+	info->dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25];
+	info->dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27];
+	for (i = 0; i < 16; i++) {
+		struct mfc_cq_sr *qp = &info->dma_info_command_data[i];
+		struct mfc_cq_sr *spuqp = &ctx->csa.priv2.spuq[i];
+
+		qp->mfc_cq_data0_RW = spuqp->mfc_cq_data0_RW;
+		qp->mfc_cq_data1_RW = spuqp->mfc_cq_data1_RW;
+		qp->mfc_cq_data2_RW = spuqp->mfc_cq_data2_RW;
+		qp->mfc_cq_data3_RW = spuqp->mfc_cq_data3_RW;
+	}
+}
+
+static ssize_t spufs_dma_info_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
+{
+	struct spu_dma_info info;
+
+	spufs_get_dma_info(ctx, &info);
+	return spufs_dump_emit(cprm, &info, sizeof(info));
+}
+
+static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
+			      size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	struct spu_dma_info info;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	spufs_get_dma_info(ctx, &info);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return simple_read_from_buffer(buf, len, pos, &info,
+				sizeof(info));
+}
+
+static const struct file_operations spufs_dma_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_dma_info_read,
+	.llseek = no_llseek,
+};
+
+static void spufs_get_proxydma_info(struct spu_context *ctx,
+		struct spu_proxydma_info *info)
+{
+	int i;
+
+	info->proxydma_info_type = ctx->csa.prob.dma_querytype_RW;
+	info->proxydma_info_mask = ctx->csa.prob.dma_querymask_RW;
+	info->proxydma_info_status = ctx->csa.prob.dma_tagstatus_R;
+
+	for (i = 0; i < 8; i++) {
+		struct mfc_cq_sr *qp = &info->proxydma_info_command_data[i];
+		struct mfc_cq_sr *puqp = &ctx->csa.priv2.puq[i];
+
+		qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW;
+		qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW;
+		qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW;
+		qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW;
+	}
+}
+
+static ssize_t spufs_proxydma_info_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
+{
+	struct spu_proxydma_info info;
+
+	spufs_get_proxydma_info(ctx, &info);
+	return spufs_dump_emit(cprm, &info, sizeof(info));
+}
+
+static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	struct spu_proxydma_info info;
+	int ret;
+
+	if (len < sizeof(info))
+		return -EINVAL;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	spufs_get_proxydma_info(ctx, &info);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return simple_read_from_buffer(buf, len, pos, &info,
+				sizeof(info));
+}
+
+static const struct file_operations spufs_proxydma_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_proxydma_info_read,
+	.llseek = no_llseek,
+};
+
+static int spufs_show_tid(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+
+	seq_printf(s, "%d\n", ctx->tid);
+	return 0;
+}
+
+static int spufs_tid_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_show_tid, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_tid_fops = {
+	.open		= spufs_tid_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const char *ctx_state_names[] = {
+	"user", "system", "iowait", "loaded"
+};
+
+static unsigned long long spufs_acct_time(struct spu_context *ctx,
+		enum spu_utilization_state state)
+{
+	unsigned long long time = ctx->stats.times[state];
+
+	/*
+	 * In general, utilization statistics are updated by the controlling
+	 * thread as the spu context moves through various well defined
+	 * state transitions, but if the context is lazily loaded its
+	 * utilization statistics are not updated as the controlling thread
+	 * is not tightly coupled with the execution of the spu context.  We
+	 * calculate and apply the time delta from the last recorded state
+	 * of the spu context.
+	 */
+	if (ctx->spu && ctx->stats.util_state == state) {
+		time += ktime_get_ns() - ctx->stats.tstamp;
+	}
+
+	return time / NSEC_PER_MSEC;
+}
+
+static unsigned long long spufs_slb_flts(struct spu_context *ctx)
+{
+	unsigned long long slb_flts = ctx->stats.slb_flt;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		slb_flts += (ctx->spu->stats.slb_flt -
+			     ctx->stats.slb_flt_base);
+	}
+
+	return slb_flts;
+}
+
+static unsigned long long spufs_class2_intrs(struct spu_context *ctx)
+{
+	unsigned long long class2_intrs = ctx->stats.class2_intr;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		class2_intrs += (ctx->spu->stats.class2_intr -
+				 ctx->stats.class2_intr_base);
+	}
+
+	return class2_intrs;
+}
+
+
+static int spufs_show_stat(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "%s %llu %llu %llu %llu "
+		      "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+		ctx_state_names[ctx->stats.util_state],
+		spufs_acct_time(ctx, SPU_UTIL_USER),
+		spufs_acct_time(ctx, SPU_UTIL_SYSTEM),
+		spufs_acct_time(ctx, SPU_UTIL_IOWAIT),
+		spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED),
+		ctx->stats.vol_ctx_switch,
+		ctx->stats.invol_ctx_switch,
+		spufs_slb_flts(ctx),
+		ctx->stats.hash_flt,
+		ctx->stats.min_flt,
+		ctx->stats.maj_flt,
+		spufs_class2_intrs(ctx),
+		ctx->stats.libassist);
+	spu_release(ctx);
+	return 0;
+}
+
+static int spufs_stat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_show_stat, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_stat_fops = {
+	.open		= spufs_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static inline int spufs_switch_log_used(struct spu_context *ctx)
+{
+	return (ctx->switch_log->head - ctx->switch_log->tail) %
+		SWITCH_LOG_BUFSIZE;
+}
+
+static inline int spufs_switch_log_avail(struct spu_context *ctx)
+{
+	return SWITCH_LOG_BUFSIZE - spufs_switch_log_used(ctx);
+}
+
+static int spufs_switch_log_open(struct inode *inode, struct file *file)
+{
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	int rc;
+
+	rc = spu_acquire(ctx);
+	if (rc)
+		return rc;
+
+	if (ctx->switch_log) {
+		rc = -EBUSY;
+		goto out;
+	}
+
+	ctx->switch_log = kmalloc(struct_size(ctx->switch_log, log,
+				  SWITCH_LOG_BUFSIZE), GFP_KERNEL);
+
+	if (!ctx->switch_log) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	ctx->switch_log->head = ctx->switch_log->tail = 0;
+	init_waitqueue_head(&ctx->switch_log->wait);
+	rc = 0;
+
+out:
+	spu_release(ctx);
+	return rc;
+}
+
+static int spufs_switch_log_release(struct inode *inode, struct file *file)
+{
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	int rc;
+
+	rc = spu_acquire(ctx);
+	if (rc)
+		return rc;
+
+	kfree(ctx->switch_log);
+	ctx->switch_log = NULL;
+	spu_release(ctx);
+
+	return 0;
+}
+
+static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n)
+{
+	struct switch_log_entry *p;
+
+	p = ctx->switch_log->log + ctx->switch_log->tail % SWITCH_LOG_BUFSIZE;
+
+	return snprintf(tbuf, n, "%llu.%09u %d %u %u %llu\n",
+			(unsigned long long) p->tstamp.tv_sec,
+			(unsigned int) p->tstamp.tv_nsec,
+			p->spu_id,
+			(unsigned int) p->type,
+			(unsigned int) p->val,
+			(unsigned long long) p->timebase);
+}
+
+static ssize_t spufs_switch_log_read(struct file *file, char __user *buf,
+			     size_t len, loff_t *ppos)
+{
+	struct inode *inode = file_inode(file);
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	int error = 0, cnt = 0;
+
+	if (!buf)
+		return -EINVAL;
+
+	error = spu_acquire(ctx);
+	if (error)
+		return error;
+
+	while (cnt < len) {
+		char tbuf[128];
+		int width;
+
+		if (spufs_switch_log_used(ctx) == 0) {
+			if (cnt > 0) {
+				/* If there's data ready to go, we can
+				 * just return straight away */
+				break;
+
+			} else if (file->f_flags & O_NONBLOCK) {
+				error = -EAGAIN;
+				break;
+
+			} else {
+				/* spufs_wait will drop the mutex and
+				 * re-acquire, but since we're in read(), the
+				 * file cannot be _released (and so
+				 * ctx->switch_log is stable).
+				 */
+				error = spufs_wait(ctx->switch_log->wait,
+						spufs_switch_log_used(ctx) > 0);
+
+				/* On error, spufs_wait returns without the
+				 * state mutex held */
+				if (error)
+					return error;
+
+				/* We may have had entries read from underneath
+				 * us while we dropped the mutex in spufs_wait,
+				 * so re-check */
+				if (spufs_switch_log_used(ctx) == 0)
+					continue;
+			}
+		}
+
+		width = switch_log_sprint(ctx, tbuf, sizeof(tbuf));
+		if (width < len)
+			ctx->switch_log->tail =
+				(ctx->switch_log->tail + 1) %
+				 SWITCH_LOG_BUFSIZE;
+		else
+			/* If the record is greater than space available return
+			 * partial buffer (so far) */
+			break;
+
+		error = copy_to_user(buf + cnt, tbuf, width);
+		if (error)
+			break;
+		cnt += width;
+	}
+
+	spu_release(ctx);
+
+	return cnt == 0 ? error : cnt;
+}
+
+static __poll_t spufs_switch_log_poll(struct file *file, poll_table *wait)
+{
+	struct inode *inode = file_inode(file);
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	__poll_t mask = 0;
+	int rc;
+
+	poll_wait(file, &ctx->switch_log->wait, wait);
+
+	rc = spu_acquire(ctx);
+	if (rc)
+		return rc;
+
+	if (spufs_switch_log_used(ctx) > 0)
+		mask |= EPOLLIN;
+
+	spu_release(ctx);
+
+	return mask;
+}
+
+static const struct file_operations spufs_switch_log_fops = {
+	.open		= spufs_switch_log_open,
+	.read		= spufs_switch_log_read,
+	.poll		= spufs_switch_log_poll,
+	.release	= spufs_switch_log_release,
+	.llseek		= no_llseek,
+};
+
+/**
+ * Log a context switch event to a switch log reader.
+ *
+ * Must be called with ctx->state_mutex held.
+ */
+void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
+		u32 type, u32 val)
+{
+	if (!ctx->switch_log)
+		return;
+
+	if (spufs_switch_log_avail(ctx) > 1) {
+		struct switch_log_entry *p;
+
+		p = ctx->switch_log->log + ctx->switch_log->head;
+		ktime_get_ts64(&p->tstamp);
+		p->timebase = get_tb();
+		p->spu_id = spu ? spu->number : -1;
+		p->type = type;
+		p->val = val;
+
+		ctx->switch_log->head =
+			(ctx->switch_log->head + 1) % SWITCH_LOG_BUFSIZE;
+	}
+
+	wake_up(&ctx->switch_log->wait);
+}
+
+static int spufs_show_ctx(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+	u64 mfc_control_RW;
+
+	mutex_lock(&ctx->state_mutex);
+	if (ctx->spu) {
+		struct spu *spu = ctx->spu;
+		struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+		spin_lock_irq(&spu->register_lock);
+		mfc_control_RW = in_be64(&priv2->mfc_control_RW);
+		spin_unlock_irq(&spu->register_lock);
+	} else {
+		struct spu_state *csa = &ctx->csa;
+
+		mfc_control_RW = csa->priv2.mfc_control_RW;
+	}
+
+	seq_printf(s, "%c flgs(%lx) sflgs(%lx) pri(%d) ts(%d) spu(%02d)"
+		" %c %llx %llx %llx %llx %x %x\n",
+		ctx->state == SPU_STATE_SAVED ? 'S' : 'R',
+		ctx->flags,
+		ctx->sched_flags,
+		ctx->prio,
+		ctx->time_slice,
+		ctx->spu ? ctx->spu->number : -1,
+		!list_empty(&ctx->rq) ? 'q' : ' ',
+		ctx->csa.class_0_pending,
+		ctx->csa.class_0_dar,
+		ctx->csa.class_1_dsisr,
+		mfc_control_RW,
+		ctx->ops->runcntl_read(ctx),
+		ctx->ops->status_read(ctx));
+
+	mutex_unlock(&ctx->state_mutex);
+
+	return 0;
+}
+
+static int spufs_ctx_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_show_ctx, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_ctx_fops = {
+	.open           = spufs_ctx_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = single_release,
+};
+
+const struct spufs_tree_descr spufs_dir_contents[] = {
+	{ "capabilities", &spufs_caps_fops, 0444, },
+	{ "mem",  &spufs_mem_fops,  0666, LS_SIZE, },
+	{ "regs", &spufs_regs_fops,  0666, sizeof(struct spu_reg128[128]), },
+	{ "mbox", &spufs_mbox_fops, 0444, },
+	{ "ibox", &spufs_ibox_fops, 0444, },
+	{ "wbox", &spufs_wbox_fops, 0222, },
+	{ "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), },
+	{ "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), },
+	{ "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), },
+	{ "signal1", &spufs_signal1_fops, 0666, },
+	{ "signal2", &spufs_signal2_fops, 0666, },
+	{ "signal1_type", &spufs_signal1_type, 0666, },
+	{ "signal2_type", &spufs_signal2_type, 0666, },
+	{ "cntl", &spufs_cntl_fops,  0666, },
+	{ "fpcr", &spufs_fpcr_fops, 0666, sizeof(struct spu_reg128), },
+	{ "lslr", &spufs_lslr_ops, 0444, },
+	{ "mfc", &spufs_mfc_fops, 0666, },
+	{ "mss", &spufs_mss_fops, 0666, },
+	{ "npc", &spufs_npc_ops, 0666, },
+	{ "srr0", &spufs_srr0_ops, 0666, },
+	{ "decr", &spufs_decr_ops, 0666, },
+	{ "decr_status", &spufs_decr_status_ops, 0666, },
+	{ "event_mask", &spufs_event_mask_ops, 0666, },
+	{ "event_status", &spufs_event_status_ops, 0444, },
+	{ "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, },
+	{ "phys-id", &spufs_id_ops, 0666, },
+	{ "object-id", &spufs_object_id_ops, 0666, },
+	{ "mbox_info", &spufs_mbox_info_fops, 0444, sizeof(u32), },
+	{ "ibox_info", &spufs_ibox_info_fops, 0444, sizeof(u32), },
+	{ "wbox_info", &spufs_wbox_info_fops, 0444, sizeof(u32), },
+	{ "dma_info", &spufs_dma_info_fops, 0444,
+		sizeof(struct spu_dma_info), },
+	{ "proxydma_info", &spufs_proxydma_info_fops, 0444,
+		sizeof(struct spu_proxydma_info)},
+	{ "tid", &spufs_tid_fops, 0444, },
+	{ "stat", &spufs_stat_fops, 0444, },
+	{ "switch_log", &spufs_switch_log_fops, 0444 },
+	{},
+};
+
+const struct spufs_tree_descr spufs_dir_nosched_contents[] = {
+	{ "capabilities", &spufs_caps_fops, 0444, },
+	{ "mem",  &spufs_mem_fops,  0666, LS_SIZE, },
+	{ "mbox", &spufs_mbox_fops, 0444, },
+	{ "ibox", &spufs_ibox_fops, 0444, },
+	{ "wbox", &spufs_wbox_fops, 0222, },
+	{ "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), },
+	{ "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), },
+	{ "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), },
+	{ "signal1", &spufs_signal1_nosched_fops, 0222, },
+	{ "signal2", &spufs_signal2_nosched_fops, 0222, },
+	{ "signal1_type", &spufs_signal1_type, 0666, },
+	{ "signal2_type", &spufs_signal2_type, 0666, },
+	{ "mss", &spufs_mss_fops, 0666, },
+	{ "mfc", &spufs_mfc_fops, 0666, },
+	{ "cntl", &spufs_cntl_fops,  0666, },
+	{ "npc", &spufs_npc_ops, 0666, },
+	{ "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, },
+	{ "phys-id", &spufs_id_ops, 0666, },
+	{ "object-id", &spufs_object_id_ops, 0666, },
+	{ "tid", &spufs_tid_fops, 0444, },
+	{ "stat", &spufs_stat_fops, 0444, },
+	{},
+};
+
+const struct spufs_tree_descr spufs_dir_debug_contents[] = {
+	{ ".ctx", &spufs_ctx_fops, 0444, },
+	{},
+};
+
+const struct spufs_coredump_reader spufs_coredump_read[] = {
+	{ "regs", spufs_regs_dump, NULL, sizeof(struct spu_reg128[128])},
+	{ "fpcr", spufs_fpcr_dump, NULL, sizeof(struct spu_reg128) },
+	{ "lslr", NULL, spufs_lslr_get, 19 },
+	{ "decr", NULL, spufs_decr_get, 19 },
+	{ "decr_status", NULL, spufs_decr_status_get, 19 },
+	{ "mem", spufs_mem_dump, NULL, LS_SIZE, },
+	{ "signal1", spufs_signal1_dump, NULL, sizeof(u32) },
+	{ "signal1_type", NULL, spufs_signal1_type_get, 19 },
+	{ "signal2", spufs_signal2_dump, NULL, sizeof(u32) },
+	{ "signal2_type", NULL, spufs_signal2_type_get, 19 },
+	{ "event_mask", NULL, spufs_event_mask_get, 19 },
+	{ "event_status", NULL, spufs_event_status_get, 19 },
+	{ "mbox_info", spufs_mbox_info_dump, NULL, sizeof(u32) },
+	{ "ibox_info", spufs_ibox_info_dump, NULL, sizeof(u32) },
+	{ "wbox_info", spufs_wbox_info_dump, NULL, 4 * sizeof(u32)},
+	{ "dma_info", spufs_dma_info_dump, NULL, sizeof(struct spu_dma_info)},
+	{ "proxydma_info", spufs_proxydma_info_dump,
+			   NULL, sizeof(struct spu_proxydma_info)},
+	{ "object-id", NULL, spufs_object_id_get, 19 },
+	{ "npc", NULL, spufs_npc_get, 19 },
+	{ NULL },
+};
diff --git a/arch/powerpc/platforms/cell/spufs/gang.c b/arch/powerpc/platforms/cell/spufs/gang.c
new file mode 100644
index 0000000000..827d338dea
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/gang.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include "spufs.h"
+
+struct spu_gang *alloc_spu_gang(void)
+{
+	struct spu_gang *gang;
+
+	gang = kzalloc(sizeof *gang, GFP_KERNEL);
+	if (!gang)
+		goto out;
+
+	kref_init(&gang->kref);
+	mutex_init(&gang->mutex);
+	mutex_init(&gang->aff_mutex);
+	INIT_LIST_HEAD(&gang->list);
+	INIT_LIST_HEAD(&gang->aff_list_head);
+
+out:
+	return gang;
+}
+
+static void destroy_spu_gang(struct kref *kref)
+{
+	struct spu_gang *gang;
+	gang = container_of(kref, struct spu_gang, kref);
+	WARN_ON(gang->contexts || !list_empty(&gang->list));
+	kfree(gang);
+}
+
+struct spu_gang *get_spu_gang(struct spu_gang *gang)
+{
+	kref_get(&gang->kref);
+	return gang;
+}
+
+int put_spu_gang(struct spu_gang *gang)
+{
+	return kref_put(&gang->kref, &destroy_spu_gang);
+}
+
+void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx)
+{
+	mutex_lock(&gang->mutex);
+	ctx->gang = get_spu_gang(gang);
+	list_add(&ctx->gang_list, &gang->list);
+	gang->contexts++;
+	mutex_unlock(&gang->mutex);
+}
+
+void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx)
+{
+	mutex_lock(&gang->mutex);
+	WARN_ON(ctx->gang != gang);
+	if (!list_empty(&ctx->aff_list)) {
+		list_del_init(&ctx->aff_list);
+		gang->aff_flags &= ~AFF_OFFSETS_SET;
+	}
+	list_del_init(&ctx->gang_list);
+	gang->contexts--;
+	mutex_unlock(&gang->mutex);
+
+	put_spu_gang(gang);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c
new file mode 100644
index 0000000000..8deaf786ed
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* hw_ops.c - query/set operations on active SPU context.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/poll.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu_context.h>
+#include "spufs.h"
+
+static int spu_hw_mbox_read(struct spu_context *ctx, u32 * data)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 mbox_stat;
+	int ret = 0;
+
+	spin_lock_irq(&spu->register_lock);
+	mbox_stat = in_be32(&prob->mb_stat_R);
+	if (mbox_stat & 0x0000ff) {
+		*data = in_be32(&prob->pu_mb_R);
+		ret = 4;
+	}
+	spin_unlock_irq(&spu->register_lock);
+	return ret;
+}
+
+static u32 spu_hw_mbox_stat_read(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->mb_stat_R);
+}
+
+static __poll_t spu_hw_mbox_stat_poll(struct spu_context *ctx, __poll_t events)
+{
+	struct spu *spu = ctx->spu;
+	__poll_t ret = 0;
+	u32 stat;
+
+	spin_lock_irq(&spu->register_lock);
+	stat = in_be32(&spu->problem->mb_stat_R);
+
+	/* if the requested event is there, return the poll
+	   mask, otherwise enable the interrupt to get notified,
+	   but first mark any pending interrupts as done so
+	   we don't get woken up unnecessarily */
+
+	if (events & (EPOLLIN | EPOLLRDNORM)) {
+		if (stat & 0xff0000)
+			ret |= EPOLLIN | EPOLLRDNORM;
+		else {
+			spu_int_stat_clear(spu, 2, CLASS2_MAILBOX_INTR);
+			spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+		}
+	}
+	if (events & (EPOLLOUT | EPOLLWRNORM)) {
+		if (stat & 0x00ff00)
+			ret = EPOLLOUT | EPOLLWRNORM;
+		else {
+			spu_int_stat_clear(spu, 2,
+					CLASS2_MAILBOX_THRESHOLD_INTR);
+			spu_int_mask_or(spu, 2,
+					CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR);
+		}
+	}
+	spin_unlock_irq(&spu->register_lock);
+	return ret;
+}
+
+static int spu_hw_ibox_read(struct spu_context *ctx, u32 * data)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_problem __iomem *prob = spu->problem;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int ret;
+
+	spin_lock_irq(&spu->register_lock);
+	if (in_be32(&prob->mb_stat_R) & 0xff0000) {
+		/* read the first available word */
+		*data = in_be64(&priv2->puint_mb_R);
+		ret = 4;
+	} else {
+		/* make sure we get woken up by the interrupt */
+		spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+		ret = 0;
+	}
+	spin_unlock_irq(&spu->register_lock);
+	return ret;
+}
+
+static int spu_hw_wbox_write(struct spu_context *ctx, u32 data)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_problem __iomem *prob = spu->problem;
+	int ret;
+
+	spin_lock_irq(&spu->register_lock);
+	if (in_be32(&prob->mb_stat_R) & 0x00ff00) {
+		/* we have space to write wbox_data to */
+		out_be32(&prob->spu_mb_W, data);
+		ret = 4;
+	} else {
+		/* make sure we get woken up by the interrupt when space
+		   becomes available */
+		spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR);
+		ret = 0;
+	}
+	spin_unlock_irq(&spu->register_lock);
+	return ret;
+}
+
+static void spu_hw_signal1_write(struct spu_context *ctx, u32 data)
+{
+	out_be32(&ctx->spu->problem->signal_notify1, data);
+}
+
+static void spu_hw_signal2_write(struct spu_context *ctx, u32 data)
+{
+	out_be32(&ctx->spu->problem->signal_notify2, data);
+}
+
+static void spu_hw_signal1_type_set(struct spu_context *ctx, u64 val)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 tmp;
+
+	spin_lock_irq(&spu->register_lock);
+	tmp = in_be64(&priv2->spu_cfg_RW);
+	if (val)
+		tmp |= 1;
+	else
+		tmp &= ~1;
+	out_be64(&priv2->spu_cfg_RW, tmp);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static u64 spu_hw_signal1_type_get(struct spu_context *ctx)
+{
+	return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 1) != 0);
+}
+
+static void spu_hw_signal2_type_set(struct spu_context *ctx, u64 val)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 tmp;
+
+	spin_lock_irq(&spu->register_lock);
+	tmp = in_be64(&priv2->spu_cfg_RW);
+	if (val)
+		tmp |= 2;
+	else
+		tmp &= ~2;
+	out_be64(&priv2->spu_cfg_RW, tmp);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static u64 spu_hw_signal2_type_get(struct spu_context *ctx)
+{
+	return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 2) != 0);
+}
+
+static u32 spu_hw_npc_read(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->spu_npc_RW);
+}
+
+static void spu_hw_npc_write(struct spu_context *ctx, u32 val)
+{
+	out_be32(&ctx->spu->problem->spu_npc_RW, val);
+}
+
+static u32 spu_hw_status_read(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->spu_status_R);
+}
+
+static char *spu_hw_get_ls(struct spu_context *ctx)
+{
+	return ctx->spu->local_store;
+}
+
+static void spu_hw_privcntl_write(struct spu_context *ctx, u64 val)
+{
+	out_be64(&ctx->spu->priv2->spu_privcntl_RW, val);
+}
+
+static u32 spu_hw_runcntl_read(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->spu_runcntl_RW);
+}
+
+static void spu_hw_runcntl_write(struct spu_context *ctx, u32 val)
+{
+	spin_lock_irq(&ctx->spu->register_lock);
+	if (val & SPU_RUNCNTL_ISOLATE)
+		spu_hw_privcntl_write(ctx,
+			SPU_PRIVCNT_LOAD_REQUEST_ENABLE_MASK);
+	out_be32(&ctx->spu->problem->spu_runcntl_RW, val);
+	spin_unlock_irq(&ctx->spu->register_lock);
+}
+
+static void spu_hw_runcntl_stop(struct spu_context *ctx)
+{
+	spin_lock_irq(&ctx->spu->register_lock);
+	out_be32(&ctx->spu->problem->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+	while (in_be32(&ctx->spu->problem->spu_status_R) & SPU_STATUS_RUNNING)
+		cpu_relax();
+	spin_unlock_irq(&ctx->spu->register_lock);
+}
+
+static void spu_hw_master_start(struct spu_context *ctx)
+{
+	struct spu *spu = ctx->spu;
+	u64 sr1;
+
+	spin_lock_irq(&spu->register_lock);
+	sr1 = spu_mfc_sr1_get(spu) | MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	spu_mfc_sr1_set(spu, sr1);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static void spu_hw_master_stop(struct spu_context *ctx)
+{
+	struct spu *spu = ctx->spu;
+	u64 sr1;
+
+	spin_lock_irq(&spu->register_lock);
+	sr1 = spu_mfc_sr1_get(spu) & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	spu_mfc_sr1_set(spu, sr1);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static int spu_hw_set_mfc_query(struct spu_context * ctx, u32 mask, u32 mode)
+{
+	struct spu_problem __iomem *prob = ctx->spu->problem;
+	int ret;
+
+	spin_lock_irq(&ctx->spu->register_lock);
+	ret = -EAGAIN;
+	if (in_be32(&prob->dma_querytype_RW))
+		goto out;
+	ret = 0;
+	out_be32(&prob->dma_querymask_RW, mask);
+	out_be32(&prob->dma_querytype_RW, mode);
+out:
+	spin_unlock_irq(&ctx->spu->register_lock);
+	return ret;
+}
+
+static u32 spu_hw_read_mfc_tagstatus(struct spu_context * ctx)
+{
+	return in_be32(&ctx->spu->problem->dma_tagstatus_R);
+}
+
+static u32 spu_hw_get_mfc_free_elements(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->dma_qstatus_R);
+}
+
+static int spu_hw_send_mfc_command(struct spu_context *ctx,
+					struct mfc_dma_command *cmd)
+{
+	u32 status;
+	struct spu_problem __iomem *prob = ctx->spu->problem;
+
+	spin_lock_irq(&ctx->spu->register_lock);
+	out_be32(&prob->mfc_lsa_W, cmd->lsa);
+	out_be64(&prob->mfc_ea_W, cmd->ea);
+	out_be32(&prob->mfc_union_W.by32.mfc_size_tag32,
+				cmd->size << 16 | cmd->tag);
+	out_be32(&prob->mfc_union_W.by32.mfc_class_cmd32,
+				cmd->class << 16 | cmd->cmd);
+	status = in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32);
+	spin_unlock_irq(&ctx->spu->register_lock);
+
+	switch (status & 0xffff) {
+	case 0:
+		return 0;
+	case 2:
+		return -EAGAIN;
+	default:
+		return -EINVAL;
+	}
+}
+
+static void spu_hw_restart_dma(struct spu_context *ctx)
+{
+	struct spu_priv2 __iomem *priv2 = ctx->spu->priv2;
+
+	if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &ctx->spu->flags))
+		out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+}
+
+struct spu_context_ops spu_hw_ops = {
+	.mbox_read = spu_hw_mbox_read,
+	.mbox_stat_read = spu_hw_mbox_stat_read,
+	.mbox_stat_poll = spu_hw_mbox_stat_poll,
+	.ibox_read = spu_hw_ibox_read,
+	.wbox_write = spu_hw_wbox_write,
+	.signal1_write = spu_hw_signal1_write,
+	.signal2_write = spu_hw_signal2_write,
+	.signal1_type_set = spu_hw_signal1_type_set,
+	.signal1_type_get = spu_hw_signal1_type_get,
+	.signal2_type_set = spu_hw_signal2_type_set,
+	.signal2_type_get = spu_hw_signal2_type_get,
+	.npc_read = spu_hw_npc_read,
+	.npc_write = spu_hw_npc_write,
+	.status_read = spu_hw_status_read,
+	.get_ls = spu_hw_get_ls,
+	.privcntl_write = spu_hw_privcntl_write,
+	.runcntl_read = spu_hw_runcntl_read,
+	.runcntl_write = spu_hw_runcntl_write,
+	.runcntl_stop = spu_hw_runcntl_stop,
+	.master_start = spu_hw_master_start,
+	.master_stop = spu_hw_master_stop,
+	.set_mfc_query = spu_hw_set_mfc_query,
+	.read_mfc_tagstatus = spu_hw_read_mfc_tagstatus,
+	.get_mfc_free_elements = spu_hw_get_mfc_free_elements,
+	.send_mfc_command = spu_hw_send_mfc_command,
+	.restart_dma = spu_hw_restart_dma,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
new file mode 100644
index 0000000000..38c5be34c8
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -0,0 +1,826 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
+#include <linux/fsnotify.h>
+#include <linux/backing-dev.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+#include <linux/of.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <linux/uaccess.h>
+
+#include "spufs.h"
+
+struct spufs_sb_info {
+	bool debug;
+};
+
+static struct kmem_cache *spufs_inode_cache;
+char *isolated_loader;
+static int isolated_loader_size;
+
+static struct spufs_sb_info *spufs_get_sb_info(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+static struct inode *
+spufs_alloc_inode(struct super_block *sb)
+{
+	struct spufs_inode_info *ei;
+
+	ei = kmem_cache_alloc(spufs_inode_cache, GFP_KERNEL);
+	if (!ei)
+		return NULL;
+
+	ei->i_gang = NULL;
+	ei->i_ctx = NULL;
+	ei->i_openers = 0;
+
+	return &ei->vfs_inode;
+}
+
+static void spufs_free_inode(struct inode *inode)
+{
+	kmem_cache_free(spufs_inode_cache, SPUFS_I(inode));
+}
+
+static void
+spufs_init_once(void *p)
+{
+	struct spufs_inode_info *ei = p;
+
+	inode_init_once(&ei->vfs_inode);
+}
+
+static struct inode *
+spufs_new_inode(struct super_block *sb, umode_t mode)
+{
+	struct inode *inode;
+
+	inode = new_inode(sb);
+	if (!inode)
+		goto out;
+
+	inode->i_ino = get_next_ino();
+	inode->i_mode = mode;
+	inode->i_uid = current_fsuid();
+	inode->i_gid = current_fsgid();
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+out:
+	return inode;
+}
+
+static int
+spufs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+	      struct iattr *attr)
+{
+	struct inode *inode = d_inode(dentry);
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    (attr->ia_size != inode->i_size))
+		return -EINVAL;
+	setattr_copy(&nop_mnt_idmap, inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
+}
+
+
+static int
+spufs_new_file(struct super_block *sb, struct dentry *dentry,
+		const struct file_operations *fops, umode_t mode,
+		size_t size, struct spu_context *ctx)
+{
+	static const struct inode_operations spufs_file_iops = {
+		.setattr = spufs_setattr,
+	};
+	struct inode *inode;
+	int ret;
+
+	ret = -ENOSPC;
+	inode = spufs_new_inode(sb, S_IFREG | mode);
+	if (!inode)
+		goto out;
+
+	ret = 0;
+	inode->i_op = &spufs_file_iops;
+	inode->i_fop = fops;
+	inode->i_size = size;
+	inode->i_private = SPUFS_I(inode)->i_ctx = get_spu_context(ctx);
+	d_add(dentry, inode);
+out:
+	return ret;
+}
+
+static void
+spufs_evict_inode(struct inode *inode)
+{
+	struct spufs_inode_info *ei = SPUFS_I(inode);
+	clear_inode(inode);
+	if (ei->i_ctx)
+		put_spu_context(ei->i_ctx);
+	if (ei->i_gang)
+		put_spu_gang(ei->i_gang);
+}
+
+static void spufs_prune_dir(struct dentry *dir)
+{
+	struct dentry *dentry, *tmp;
+
+	inode_lock(d_inode(dir));
+	list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) {
+		spin_lock(&dentry->d_lock);
+		if (simple_positive(dentry)) {
+			dget_dlock(dentry);
+			__d_drop(dentry);
+			spin_unlock(&dentry->d_lock);
+			simple_unlink(d_inode(dir), dentry);
+			/* XXX: what was dcache_lock protecting here? Other
+			 * filesystems (IB, configfs) release dcache_lock
+			 * before unlink */
+			dput(dentry);
+		} else {
+			spin_unlock(&dentry->d_lock);
+		}
+	}
+	shrink_dcache_parent(dir);
+	inode_unlock(d_inode(dir));
+}
+
+/* Caller must hold parent->i_mutex */
+static int spufs_rmdir(struct inode *parent, struct dentry *dir)
+{
+	/* remove all entries */
+	int res;
+	spufs_prune_dir(dir);
+	d_drop(dir);
+	res = simple_rmdir(parent, dir);
+	/* We have to give up the mm_struct */
+	spu_forget(SPUFS_I(d_inode(dir))->i_ctx);
+	return res;
+}
+
+static int spufs_fill_dir(struct dentry *dir,
+		const struct spufs_tree_descr *files, umode_t mode,
+		struct spu_context *ctx)
+{
+	while (files->name && files->name[0]) {
+		int ret;
+		struct dentry *dentry = d_alloc_name(dir, files->name);
+		if (!dentry)
+			return -ENOMEM;
+		ret = spufs_new_file(dir->d_sb, dentry, files->ops,
+					files->mode & mode, files->size, ctx);
+		if (ret)
+			return ret;
+		files++;
+	}
+	return 0;
+}
+
+static int spufs_dir_close(struct inode *inode, struct file *file)
+{
+	struct inode *parent;
+	struct dentry *dir;
+	int ret;
+
+	dir = file->f_path.dentry;
+	parent = d_inode(dir->d_parent);
+
+	inode_lock_nested(parent, I_MUTEX_PARENT);
+	ret = spufs_rmdir(parent, dir);
+	inode_unlock(parent);
+	WARN_ON(ret);
+
+	return dcache_dir_close(inode, file);
+}
+
+const struct file_operations spufs_context_fops = {
+	.open		= dcache_dir_open,
+	.release	= spufs_dir_close,
+	.llseek		= dcache_dir_lseek,
+	.read		= generic_read_dir,
+	.iterate_shared	= dcache_readdir,
+	.fsync		= noop_fsync,
+};
+EXPORT_SYMBOL_GPL(spufs_context_fops);
+
+static int
+spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
+		umode_t mode)
+{
+	int ret;
+	struct inode *inode;
+	struct spu_context *ctx;
+
+	inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
+	if (!inode)
+		return -ENOSPC;
+
+	inode_init_owner(&nop_mnt_idmap, inode, dir, mode | S_IFDIR);
+	ctx = alloc_spu_context(SPUFS_I(dir)->i_gang); /* XXX gang */
+	SPUFS_I(inode)->i_ctx = ctx;
+	if (!ctx) {
+		iput(inode);
+		return -ENOSPC;
+	}
+
+	ctx->flags = flags;
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+
+	inode_lock(inode);
+
+	dget(dentry);
+	inc_nlink(dir);
+	inc_nlink(inode);
+
+	d_instantiate(dentry, inode);
+
+	if (flags & SPU_CREATE_NOSCHED)
+		ret = spufs_fill_dir(dentry, spufs_dir_nosched_contents,
+					 mode, ctx);
+	else
+		ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx);
+
+	if (!ret && spufs_get_sb_info(dir->i_sb)->debug)
+		ret = spufs_fill_dir(dentry, spufs_dir_debug_contents,
+				mode, ctx);
+
+	if (ret)
+		spufs_rmdir(dir, dentry);
+
+	inode_unlock(inode);
+
+	return ret;
+}
+
+static int spufs_context_open(const struct path *path)
+{
+	int ret;
+	struct file *filp;
+
+	ret = get_unused_fd_flags(0);
+	if (ret < 0)
+		return ret;
+
+	filp = dentry_open(path, O_RDONLY, current_cred());
+	if (IS_ERR(filp)) {
+		put_unused_fd(ret);
+		return PTR_ERR(filp);
+	}
+
+	filp->f_op = &spufs_context_fops;
+	fd_install(ret, filp);
+	return ret;
+}
+
+static struct spu_context *
+spufs_assert_affinity(unsigned int flags, struct spu_gang *gang,
+						struct file *filp)
+{
+	struct spu_context *tmp, *neighbor, *err;
+	int count, node;
+	int aff_supp;
+
+	aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next,
+					struct spu, cbe_list))->aff_list);
+
+	if (!aff_supp)
+		return ERR_PTR(-EINVAL);
+
+	if (flags & SPU_CREATE_GANG)
+		return ERR_PTR(-EINVAL);
+
+	if (flags & SPU_CREATE_AFFINITY_MEM &&
+	    gang->aff_ref_ctx &&
+	    gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM)
+		return ERR_PTR(-EEXIST);
+
+	if (gang->aff_flags & AFF_MERGED)
+		return ERR_PTR(-EBUSY);
+
+	neighbor = NULL;
+	if (flags & SPU_CREATE_AFFINITY_SPU) {
+		if (!filp || filp->f_op != &spufs_context_fops)
+			return ERR_PTR(-EINVAL);
+
+		neighbor = get_spu_context(
+				SPUFS_I(file_inode(filp))->i_ctx);
+
+		if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) &&
+		    !list_is_last(&neighbor->aff_list, &gang->aff_list_head) &&
+		    !list_entry(neighbor->aff_list.next, struct spu_context,
+		    aff_list)->aff_head) {
+			err = ERR_PTR(-EEXIST);
+			goto out_put_neighbor;
+		}
+
+		if (gang != neighbor->gang) {
+			err = ERR_PTR(-EINVAL);
+			goto out_put_neighbor;
+		}
+
+		count = 1;
+		list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+			count++;
+		if (list_empty(&neighbor->aff_list))
+			count++;
+
+		for (node = 0; node < MAX_NUMNODES; node++) {
+			if ((cbe_spu_info[node].n_spus - atomic_read(
+				&cbe_spu_info[node].reserved_spus)) >= count)
+				break;
+		}
+
+		if (node == MAX_NUMNODES) {
+			err = ERR_PTR(-EEXIST);
+			goto out_put_neighbor;
+		}
+	}
+
+	return neighbor;
+
+out_put_neighbor:
+	put_spu_context(neighbor);
+	return err;
+}
+
+static void
+spufs_set_affinity(unsigned int flags, struct spu_context *ctx,
+					struct spu_context *neighbor)
+{
+	if (flags & SPU_CREATE_AFFINITY_MEM)
+		ctx->gang->aff_ref_ctx = ctx;
+
+	if (flags & SPU_CREATE_AFFINITY_SPU) {
+		if (list_empty(&neighbor->aff_list)) {
+			list_add_tail(&neighbor->aff_list,
+				&ctx->gang->aff_list_head);
+			neighbor->aff_head = 1;
+		}
+
+		if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head)
+		    || list_entry(neighbor->aff_list.next, struct spu_context,
+							aff_list)->aff_head) {
+			list_add(&ctx->aff_list, &neighbor->aff_list);
+		} else  {
+			list_add_tail(&ctx->aff_list, &neighbor->aff_list);
+			if (neighbor->aff_head) {
+				neighbor->aff_head = 0;
+				ctx->aff_head = 1;
+			}
+		}
+
+		if (!ctx->gang->aff_ref_ctx)
+			ctx->gang->aff_ref_ctx = ctx;
+	}
+}
+
+static int
+spufs_create_context(struct inode *inode, struct dentry *dentry,
+			struct vfsmount *mnt, int flags, umode_t mode,
+			struct file *aff_filp)
+{
+	int ret;
+	int affinity;
+	struct spu_gang *gang;
+	struct spu_context *neighbor;
+	struct path path = {.mnt = mnt, .dentry = dentry};
+
+	if ((flags & SPU_CREATE_NOSCHED) &&
+	    !capable(CAP_SYS_NICE))
+		return -EPERM;
+
+	if ((flags & (SPU_CREATE_NOSCHED | SPU_CREATE_ISOLATE))
+	    == SPU_CREATE_ISOLATE)
+		return -EINVAL;
+
+	if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
+		return -ENODEV;
+
+	gang = NULL;
+	neighbor = NULL;
+	affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
+	if (affinity) {
+		gang = SPUFS_I(inode)->i_gang;
+		if (!gang)
+			return -EINVAL;
+		mutex_lock(&gang->aff_mutex);
+		neighbor = spufs_assert_affinity(flags, gang, aff_filp);
+		if (IS_ERR(neighbor)) {
+			ret = PTR_ERR(neighbor);
+			goto out_aff_unlock;
+		}
+	}
+
+	ret = spufs_mkdir(inode, dentry, flags, mode & 0777);
+	if (ret)
+		goto out_aff_unlock;
+
+	if (affinity) {
+		spufs_set_affinity(flags, SPUFS_I(d_inode(dentry))->i_ctx,
+								neighbor);
+		if (neighbor)
+			put_spu_context(neighbor);
+	}
+
+	ret = spufs_context_open(&path);
+	if (ret < 0)
+		WARN_ON(spufs_rmdir(inode, dentry));
+
+out_aff_unlock:
+	if (affinity)
+		mutex_unlock(&gang->aff_mutex);
+	return ret;
+}
+
+static int
+spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	int ret;
+	struct inode *inode;
+	struct spu_gang *gang;
+
+	ret = -ENOSPC;
+	inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
+	if (!inode)
+		goto out;
+
+	ret = 0;
+	inode_init_owner(&nop_mnt_idmap, inode, dir, mode | S_IFDIR);
+	gang = alloc_spu_gang();
+	SPUFS_I(inode)->i_ctx = NULL;
+	SPUFS_I(inode)->i_gang = gang;
+	if (!gang) {
+		ret = -ENOMEM;
+		goto out_iput;
+	}
+
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+
+	d_instantiate(dentry, inode);
+	inc_nlink(dir);
+	inc_nlink(d_inode(dentry));
+	return ret;
+
+out_iput:
+	iput(inode);
+out:
+	return ret;
+}
+
+static int spufs_gang_open(const struct path *path)
+{
+	int ret;
+	struct file *filp;
+
+	ret = get_unused_fd_flags(0);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * get references for dget and mntget, will be released
+	 * in error path of *_open().
+	 */
+	filp = dentry_open(path, O_RDONLY, current_cred());
+	if (IS_ERR(filp)) {
+		put_unused_fd(ret);
+		return PTR_ERR(filp);
+	}
+
+	filp->f_op = &simple_dir_operations;
+	fd_install(ret, filp);
+	return ret;
+}
+
+static int spufs_create_gang(struct inode *inode,
+			struct dentry *dentry,
+			struct vfsmount *mnt, umode_t mode)
+{
+	struct path path = {.mnt = mnt, .dentry = dentry};
+	int ret;
+
+	ret = spufs_mkgang(inode, dentry, mode & 0777);
+	if (!ret) {
+		ret = spufs_gang_open(&path);
+		if (ret < 0) {
+			int err = simple_rmdir(inode, dentry);
+			WARN_ON(err);
+		}
+	}
+	return ret;
+}
+
+
+static struct file_system_type spufs_type;
+
+long spufs_create(const struct path *path, struct dentry *dentry,
+		unsigned int flags, umode_t mode, struct file *filp)
+{
+	struct inode *dir = d_inode(path->dentry);
+	int ret;
+
+	/* check if we are on spufs */
+	if (path->dentry->d_sb->s_type != &spufs_type)
+		return -EINVAL;
+
+	/* don't accept undefined flags */
+	if (flags & (~SPU_CREATE_FLAG_ALL))
+		return -EINVAL;
+
+	/* only threads can be underneath a gang */
+	if (path->dentry != path->dentry->d_sb->s_root)
+		if ((flags & SPU_CREATE_GANG) || !SPUFS_I(dir)->i_gang)
+			return -EINVAL;
+
+	mode &= ~current_umask();
+
+	if (flags & SPU_CREATE_GANG)
+		ret = spufs_create_gang(dir, dentry, path->mnt, mode);
+	else
+		ret = spufs_create_context(dir, dentry, path->mnt, flags, mode,
+					    filp);
+	if (ret >= 0)
+		fsnotify_mkdir(dir, dentry);
+
+	return ret;
+}
+
+/* File system initialization */
+struct spufs_fs_context {
+	kuid_t	uid;
+	kgid_t	gid;
+	umode_t	mode;
+};
+
+enum {
+	Opt_uid, Opt_gid, Opt_mode, Opt_debug,
+};
+
+static const struct fs_parameter_spec spufs_fs_parameters[] = {
+	fsparam_u32	("gid",				Opt_gid),
+	fsparam_u32oct	("mode",			Opt_mode),
+	fsparam_u32	("uid",				Opt_uid),
+	fsparam_flag	("debug",			Opt_debug),
+	{}
+};
+
+static int spufs_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct spufs_sb_info *sbi = spufs_get_sb_info(root->d_sb);
+	struct inode *inode = root->d_inode;
+
+	if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID))
+		seq_printf(m, ",uid=%u",
+			   from_kuid_munged(&init_user_ns, inode->i_uid));
+	if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID))
+		seq_printf(m, ",gid=%u",
+			   from_kgid_munged(&init_user_ns, inode->i_gid));
+	if ((inode->i_mode & S_IALLUGO) != 0775)
+		seq_printf(m, ",mode=%o", inode->i_mode);
+	if (sbi->debug)
+		seq_puts(m, ",debug");
+	return 0;
+}
+
+static int spufs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+	struct spufs_fs_context *ctx = fc->fs_private;
+	struct spufs_sb_info *sbi = fc->s_fs_info;
+	struct fs_parse_result result;
+	kuid_t uid;
+	kgid_t gid;
+	int opt;
+
+	opt = fs_parse(fc, spufs_fs_parameters, param, &result);
+	if (opt < 0)
+		return opt;
+
+	switch (opt) {
+	case Opt_uid:
+		uid = make_kuid(current_user_ns(), result.uint_32);
+		if (!uid_valid(uid))
+			return invalf(fc, "Unknown uid");
+		ctx->uid = uid;
+		break;
+	case Opt_gid:
+		gid = make_kgid(current_user_ns(), result.uint_32);
+		if (!gid_valid(gid))
+			return invalf(fc, "Unknown gid");
+		ctx->gid = gid;
+		break;
+	case Opt_mode:
+		ctx->mode = result.uint_32 & S_IALLUGO;
+		break;
+	case Opt_debug:
+		sbi->debug = true;
+		break;
+	}
+
+	return 0;
+}
+
+static void spufs_exit_isolated_loader(void)
+{
+	free_pages((unsigned long) isolated_loader,
+			get_order(isolated_loader_size));
+}
+
+static void __init
+spufs_init_isolated_loader(void)
+{
+	struct device_node *dn;
+	const char *loader;
+	int size;
+
+	dn = of_find_node_by_path("/spu-isolation");
+	if (!dn)
+		return;
+
+	loader = of_get_property(dn, "loader", &size);
+	of_node_put(dn);
+	if (!loader)
+		return;
+
+	/* the loader must be align on a 16 byte boundary */
+	isolated_loader = (char *)__get_free_pages(GFP_KERNEL, get_order(size));
+	if (!isolated_loader)
+		return;
+
+	isolated_loader_size = size;
+	memcpy(isolated_loader, loader, size);
+	printk(KERN_INFO "spufs: SPU isolation mode enabled\n");
+}
+
+static int spufs_create_root(struct super_block *sb, struct fs_context *fc)
+{
+	struct spufs_fs_context *ctx = fc->fs_private;
+	struct inode *inode;
+
+	if (!spu_management_ops)
+		return -ENODEV;
+
+	inode = spufs_new_inode(sb, S_IFDIR | ctx->mode);
+	if (!inode)
+		return -ENOMEM;
+
+	inode->i_uid = ctx->uid;
+	inode->i_gid = ctx->gid;
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+	SPUFS_I(inode)->i_ctx = NULL;
+	inc_nlink(inode);
+
+	sb->s_root = d_make_root(inode);
+	if (!sb->s_root)
+		return -ENOMEM;
+	return 0;
+}
+
+static const struct super_operations spufs_ops = {
+	.alloc_inode	= spufs_alloc_inode,
+	.free_inode	= spufs_free_inode,
+	.statfs		= simple_statfs,
+	.evict_inode	= spufs_evict_inode,
+	.show_options	= spufs_show_options,
+};
+
+static int spufs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
+	sb->s_magic = SPUFS_MAGIC;
+	sb->s_op = &spufs_ops;
+
+	return spufs_create_root(sb, fc);
+}
+
+static int spufs_get_tree(struct fs_context *fc)
+{
+	return get_tree_single(fc, spufs_fill_super);
+}
+
+static void spufs_free_fc(struct fs_context *fc)
+{
+	kfree(fc->s_fs_info);
+}
+
+static const struct fs_context_operations spufs_context_ops = {
+	.free		= spufs_free_fc,
+	.parse_param	= spufs_parse_param,
+	.get_tree	= spufs_get_tree,
+};
+
+static int spufs_init_fs_context(struct fs_context *fc)
+{
+	struct spufs_fs_context *ctx;
+	struct spufs_sb_info *sbi;
+
+	ctx = kzalloc(sizeof(struct spufs_fs_context), GFP_KERNEL);
+	if (!ctx)
+		goto nomem;
+
+	sbi = kzalloc(sizeof(struct spufs_sb_info), GFP_KERNEL);
+	if (!sbi)
+		goto nomem_ctx;
+
+	ctx->uid = current_uid();
+	ctx->gid = current_gid();
+	ctx->mode = 0755;
+
+	fc->fs_private = ctx;
+	fc->s_fs_info = sbi;
+	fc->ops = &spufs_context_ops;
+	return 0;
+
+nomem_ctx:
+	kfree(ctx);
+nomem:
+	return -ENOMEM;
+}
+
+static struct file_system_type spufs_type = {
+	.owner = THIS_MODULE,
+	.name = "spufs",
+	.init_fs_context = spufs_init_fs_context,
+	.parameters	= spufs_fs_parameters,
+	.kill_sb = kill_litter_super,
+};
+MODULE_ALIAS_FS("spufs");
+
+static int __init spufs_init(void)
+{
+	int ret;
+
+	ret = -ENODEV;
+	if (!spu_management_ops)
+		goto out;
+
+	ret = -ENOMEM;
+	spufs_inode_cache = kmem_cache_create("spufs_inode_cache",
+			sizeof(struct spufs_inode_info), 0,
+			SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, spufs_init_once);
+
+	if (!spufs_inode_cache)
+		goto out;
+	ret = spu_sched_init();
+	if (ret)
+		goto out_cache;
+	ret = register_spu_syscalls(&spufs_calls);
+	if (ret)
+		goto out_sched;
+	ret = register_filesystem(&spufs_type);
+	if (ret)
+		goto out_syscalls;
+
+	spufs_init_isolated_loader();
+
+	return 0;
+
+out_syscalls:
+	unregister_spu_syscalls(&spufs_calls);
+out_sched:
+	spu_sched_exit();
+out_cache:
+	kmem_cache_destroy(spufs_inode_cache);
+out:
+	return ret;
+}
+module_init(spufs_init);
+
+static void __exit spufs_exit(void)
+{
+	spu_sched_exit();
+	spufs_exit_isolated_loader();
+	unregister_spu_syscalls(&spufs_calls);
+	unregister_filesystem(&spufs_type);
+	kmem_cache_destroy(spufs_inode_cache);
+}
+module_exit(spufs_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");
+
diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
new file mode 100644
index 0000000000..43b9dde7fd
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SPU local store allocation routines
+ *
+ * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu.h>
+
+#include "spufs.h"
+
+int spu_alloc_lscsa(struct spu_state *csa)
+{
+	struct spu_lscsa *lscsa;
+	unsigned char *p;
+
+	lscsa = vzalloc(sizeof(*lscsa));
+	if (!lscsa)
+		return -ENOMEM;
+	csa->lscsa = lscsa;
+
+	/* Set LS pages reserved to allow for user-space mapping. */
+	for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		SetPageReserved(vmalloc_to_page(p));
+
+	return 0;
+}
+
+void spu_free_lscsa(struct spu_state *csa)
+{
+	/* Clear reserved bit before vfree. */
+	unsigned char *p;
+
+	if (csa->lscsa == NULL)
+		return;
+
+	for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		ClearPageReserved(vmalloc_to_page(p));
+
+	vfree(csa->lscsa);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
new file mode 100644
index 0000000000..ce52b87496
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -0,0 +1,451 @@
+// SPDX-License-Identifier: GPL-2.0
+#define DEBUG
+
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
+
+#include "spufs.h"
+
+/* interrupt-level stop callback function. */
+void spufs_stop_callback(struct spu *spu, int irq)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	/*
+	 * It should be impossible to preempt a context while an exception
+	 * is being processed, since the context switch code is specially
+	 * coded to deal with interrupts ... But, just in case, sanity check
+	 * the context pointer.  It is OK to return doing nothing since
+	 * the exception will be regenerated when the context is resumed.
+	 */
+	if (ctx) {
+		/* Copy exception arguments into module specific structure */
+		switch(irq) {
+		case 0 :
+			ctx->csa.class_0_pending = spu->class_0_pending;
+			ctx->csa.class_0_dar = spu->class_0_dar;
+			break;
+		case 1 :
+			ctx->csa.class_1_dsisr = spu->class_1_dsisr;
+			ctx->csa.class_1_dar = spu->class_1_dar;
+			break;
+		case 2 :
+			break;
+		}
+
+		/* ensure that the exception status has hit memory before a
+		 * thread waiting on the context's stop queue is woken */
+		smp_wmb();
+
+		wake_up_all(&ctx->stop_wq);
+	}
+}
+
+int spu_stopped(struct spu_context *ctx, u32 *stat)
+{
+	u64 dsisr;
+	u32 stopped;
+
+	stopped = SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
+		SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+
+top:
+	*stat = ctx->ops->status_read(ctx);
+	if (*stat & stopped) {
+		/*
+		 * If the spu hasn't finished stopping, we need to
+		 * re-read the register to get the stopped value.
+		 */
+		if (*stat & SPU_STATUS_RUNNING)
+			goto top;
+		return 1;
+	}
+
+	if (test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags))
+		return 1;
+
+	dsisr = ctx->csa.class_1_dsisr;
+	if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))
+		return 1;
+
+	if (ctx->csa.class_0_pending)
+		return 1;
+
+	return 0;
+}
+
+static int spu_setup_isolated(struct spu_context *ctx)
+{
+	int ret;
+	u64 __iomem *mfc_cntl;
+	u64 sr1;
+	u32 status;
+	unsigned long timeout;
+	const u32 status_loading = SPU_STATUS_RUNNING
+		| SPU_STATUS_ISOLATED_STATE | SPU_STATUS_ISOLATED_LOAD_STATUS;
+
+	ret = -ENODEV;
+	if (!isolated_loader)
+		goto out;
+
+	/*
+	 * We need to exclude userspace access to the context.
+	 *
+	 * To protect against memory access we invalidate all ptes
+	 * and make sure the pagefault handlers block on the mutex.
+	 */
+	spu_unmap_mappings(ctx);
+
+	mfc_cntl = &ctx->spu->priv2->mfc_control_RW;
+
+	/* purge the MFC DMA queue to ensure no spurious accesses before we
+	 * enter kernel mode */
+	timeout = jiffies + HZ;
+	out_be64(mfc_cntl, MFC_CNTL_PURGE_DMA_REQUEST);
+	while ((in_be64(mfc_cntl) & MFC_CNTL_PURGE_DMA_STATUS_MASK)
+			!= MFC_CNTL_PURGE_DMA_COMPLETE) {
+		if (time_after(jiffies, timeout)) {
+			printk(KERN_ERR "%s: timeout flushing MFC DMA queue\n",
+					__func__);
+			ret = -EIO;
+			goto out;
+		}
+		cond_resched();
+	}
+
+	/* clear purge status */
+	out_be64(mfc_cntl, 0);
+
+	/* put the SPE in kernel mode to allow access to the loader */
+	sr1 = spu_mfc_sr1_get(ctx->spu);
+	sr1 &= ~MFC_STATE1_PROBLEM_STATE_MASK;
+	spu_mfc_sr1_set(ctx->spu, sr1);
+
+	/* start the loader */
+	ctx->ops->signal1_write(ctx, (unsigned long)isolated_loader >> 32);
+	ctx->ops->signal2_write(ctx,
+			(unsigned long)isolated_loader & 0xffffffff);
+
+	ctx->ops->runcntl_write(ctx,
+			SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
+
+	ret = 0;
+	timeout = jiffies + HZ;
+	while (((status = ctx->ops->status_read(ctx)) & status_loading) ==
+				status_loading) {
+		if (time_after(jiffies, timeout)) {
+			printk(KERN_ERR "%s: timeout waiting for loader\n",
+					__func__);
+			ret = -EIO;
+			goto out_drop_priv;
+		}
+		cond_resched();
+	}
+
+	if (!(status & SPU_STATUS_RUNNING)) {
+		/* If isolated LOAD has failed: run SPU, we will get a stop-and
+		 * signal later. */
+		pr_debug("%s: isolated LOAD failed\n", __func__);
+		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+		ret = -EACCES;
+		goto out_drop_priv;
+	}
+
+	if (!(status & SPU_STATUS_ISOLATED_STATE)) {
+		/* This isn't allowed by the CBEA, but check anyway */
+		pr_debug("%s: SPU fell out of isolated mode?\n", __func__);
+		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_STOP);
+		ret = -EINVAL;
+		goto out_drop_priv;
+	}
+
+out_drop_priv:
+	/* Finished accessing the loader. Drop kernel mode */
+	sr1 |= MFC_STATE1_PROBLEM_STATE_MASK;
+	spu_mfc_sr1_set(ctx->spu, sr1);
+
+out:
+	return ret;
+}
+
+static int spu_run_init(struct spu_context *ctx, u32 *npc)
+{
+	unsigned long runcntl = SPU_RUNCNTL_RUNNABLE;
+	int ret;
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+	/*
+	 * NOSCHED is synchronous scheduling with respect to the caller.
+	 * The caller waits for the context to be loaded.
+	 */
+	if (ctx->flags & SPU_CREATE_NOSCHED) {
+		if (ctx->state == SPU_STATE_SAVED) {
+			ret = spu_activate(ctx, 0);
+			if (ret)
+				return ret;
+		}
+	}
+
+	/*
+	 * Apply special setup as required.
+	 */
+	if (ctx->flags & SPU_CREATE_ISOLATE) {
+		if (!(ctx->ops->status_read(ctx) & SPU_STATUS_ISOLATED_STATE)) {
+			ret = spu_setup_isolated(ctx);
+			if (ret)
+				return ret;
+		}
+
+		/*
+		 * If userspace has set the runcntrl register (eg, to
+		 * issue an isolated exit), we need to re-set it here
+		 */
+		runcntl = ctx->ops->runcntl_read(ctx) &
+			(SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
+		if (runcntl == 0)
+			runcntl = SPU_RUNCNTL_RUNNABLE;
+	} else {
+		unsigned long privcntl;
+
+		if (test_thread_flag(TIF_SINGLESTEP))
+			privcntl = SPU_PRIVCNTL_MODE_SINGLE_STEP;
+		else
+			privcntl = SPU_PRIVCNTL_MODE_NORMAL;
+
+		ctx->ops->privcntl_write(ctx, privcntl);
+		ctx->ops->npc_write(ctx, *npc);
+	}
+
+	ctx->ops->runcntl_write(ctx, runcntl);
+
+	if (ctx->flags & SPU_CREATE_NOSCHED) {
+		spuctx_switch_state(ctx, SPU_UTIL_USER);
+	} else {
+
+		if (ctx->state == SPU_STATE_SAVED) {
+			ret = spu_activate(ctx, 0);
+			if (ret)
+				return ret;
+		} else {
+			spuctx_switch_state(ctx, SPU_UTIL_USER);
+		}
+	}
+
+	set_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags);
+	return 0;
+}
+
+static int spu_run_fini(struct spu_context *ctx, u32 *npc,
+			       u32 *status)
+{
+	int ret = 0;
+
+	spu_del_from_rq(ctx);
+
+	*status = ctx->ops->status_read(ctx);
+	*npc = ctx->ops->npc_read(ctx);
+
+	spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+	clear_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags);
+	spu_switch_log_notify(NULL, ctx, SWITCH_LOG_EXIT, *status);
+	spu_release(ctx);
+
+	if (signal_pending(current))
+		ret = -ERESTARTSYS;
+
+	return ret;
+}
+
+/*
+ * SPU syscall restarting is tricky because we violate the basic
+ * assumption that the signal handler is running on the interrupted
+ * thread. Here instead, the handler runs on PowerPC user space code,
+ * while the syscall was called from the SPU.
+ * This means we can only do a very rough approximation of POSIX
+ * signal semantics.
+ */
+static int spu_handle_restartsys(struct spu_context *ctx, long *spu_ret,
+			  unsigned int *npc)
+{
+	int ret;
+
+	switch (*spu_ret) {
+	case -ERESTARTSYS:
+	case -ERESTARTNOINTR:
+		/*
+		 * Enter the regular syscall restarting for
+		 * sys_spu_run, then restart the SPU syscall
+		 * callback.
+		 */
+		*npc -= 8;
+		ret = -ERESTARTSYS;
+		break;
+	case -ERESTARTNOHAND:
+	case -ERESTART_RESTARTBLOCK:
+		/*
+		 * Restart block is too hard for now, just return -EINTR
+		 * to the SPU.
+		 * ERESTARTNOHAND comes from sys_pause, we also return
+		 * -EINTR from there.
+		 * Assume that we need to be restarted ourselves though.
+		 */
+		*spu_ret = -EINTR;
+		ret = -ERESTARTSYS;
+		break;
+	default:
+		printk(KERN_WARNING "%s: unexpected return code %ld\n",
+			__func__, *spu_ret);
+		ret = 0;
+	}
+	return ret;
+}
+
+static int spu_process_callback(struct spu_context *ctx)
+{
+	struct spu_syscall_block s;
+	u32 ls_pointer, npc;
+	void __iomem *ls;
+	long spu_ret;
+	int ret;
+
+	/* get syscall block from local store */
+	npc = ctx->ops->npc_read(ctx) & ~3;
+	ls = (void __iomem *)ctx->ops->get_ls(ctx);
+	ls_pointer = in_be32(ls + npc);
+	if (ls_pointer > (LS_SIZE - sizeof(s)))
+		return -EFAULT;
+	memcpy_fromio(&s, ls + ls_pointer, sizeof(s));
+
+	/* do actual syscall without pinning the spu */
+	ret = 0;
+	spu_ret = -ENOSYS;
+	npc += 4;
+
+	if (s.nr_ret < NR_syscalls) {
+		spu_release(ctx);
+		/* do actual system call from here */
+		spu_ret = spu_sys_callback(&s);
+		if (spu_ret <= -ERESTARTSYS) {
+			ret = spu_handle_restartsys(ctx, &spu_ret, &npc);
+		}
+		mutex_lock(&ctx->state_mutex);
+		if (ret == -ERESTARTSYS)
+			return ret;
+	}
+
+	/* need to re-get the ls, as it may have changed when we released the
+	 * spu */
+	ls = (void __iomem *)ctx->ops->get_ls(ctx);
+
+	/* write result, jump over indirect pointer */
+	memcpy_toio(ls + ls_pointer, &spu_ret, sizeof(spu_ret));
+	ctx->ops->npc_write(ctx, npc);
+	ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+	return ret;
+}
+
+long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
+{
+	int ret;
+	u32 status;
+
+	if (mutex_lock_interruptible(&ctx->run_mutex))
+		return -ERESTARTSYS;
+
+	ctx->event_return = 0;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		goto out_unlock;
+
+	spu_enable_spu(ctx);
+
+	spu_update_sched_info(ctx);
+
+	ret = spu_run_init(ctx, npc);
+	if (ret) {
+		spu_release(ctx);
+		goto out;
+	}
+
+	do {
+		ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
+		if (unlikely(ret)) {
+			/*
+			 * This is nasty: we need the state_mutex for all the
+			 * bookkeeping even if the syscall was interrupted by
+			 * a signal. ewww.
+			 */
+			mutex_lock(&ctx->state_mutex);
+			break;
+		}
+		if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE,
+						&ctx->sched_flags))) {
+			if (!(status & SPU_STATUS_STOPPED_BY_STOP))
+				continue;
+		}
+
+		spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+		if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+		    (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) {
+			ret = spu_process_callback(ctx);
+			if (ret)
+				break;
+			status &= ~SPU_STATUS_STOPPED_BY_STOP;
+		}
+		ret = spufs_handle_class1(ctx);
+		if (ret)
+			break;
+
+		ret = spufs_handle_class0(ctx);
+		if (ret)
+			break;
+
+		if (signal_pending(current))
+			ret = -ERESTARTSYS;
+	} while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP |
+				      SPU_STATUS_STOPPED_BY_HALT |
+				       SPU_STATUS_SINGLE_STEP)));
+
+	spu_disable_spu(ctx);
+	ret = spu_run_fini(ctx, npc, &status);
+	spu_yield(ctx);
+
+	if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+	    (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100))
+		ctx->stats.libassist++;
+
+	if ((ret == 0) ||
+	    ((ret == -ERESTARTSYS) &&
+	     ((status & SPU_STATUS_STOPPED_BY_HALT) ||
+	      (status & SPU_STATUS_SINGLE_STEP) ||
+	      ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+	       (status >> SPU_STOP_STATUS_SHIFT != 0x2104)))))
+		ret = status;
+
+	/* Note: we don't need to force_sig SIGTRAP on single-step
+	 * since we have TIF_SINGLESTEP set, thus the kernel will do
+	 * it upon return from the syscall anyway.
+	 */
+	if (unlikely(status & SPU_STATUS_SINGLE_STEP))
+		ret = -ERESTARTSYS;
+
+	else if (unlikely((status & SPU_STATUS_STOPPED_BY_STOP)
+	    && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff)) {
+		force_sig(SIGTRAP);
+		ret = -ERESTARTSYS;
+	}
+
+out:
+	*event = ctx->event_return;
+out_unlock:
+	mutex_unlock(&ctx->run_mutex);
+	return ret;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
new file mode 100644
index 0000000000..99bd027a7f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -0,0 +1,1141 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* sched.c - SPU scheduler.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * 2006-03-31	NUMA domains added.
+ */
+
+#undef DEBUG
+
+#include <linux/errno.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/loadavg.h>
+#include <linux/sched/rt.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/completion.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/numa.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+#include <linux/pid_namespace.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_priv1.h>
+#include "spufs.h"
+#define CREATE_TRACE_POINTS
+#include "sputrace.h"
+
+struct spu_prio_array {
+	DECLARE_BITMAP(bitmap, MAX_PRIO);
+	struct list_head runq[MAX_PRIO];
+	spinlock_t runq_lock;
+	int nr_waiting;
+};
+
+static unsigned long spu_avenrun[3];
+static struct spu_prio_array *spu_prio;
+static struct task_struct *spusched_task;
+static struct timer_list spusched_timer;
+static struct timer_list spuloadavg_timer;
+
+/*
+ * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
+ */
+#define NORMAL_PRIO		120
+
+/*
+ * Frequency of the spu scheduler tick.  By default we do one SPU scheduler
+ * tick for every 10 CPU scheduler ticks.
+ */
+#define SPUSCHED_TICK		(10)
+
+/*
+ * These are the 'tuning knobs' of the scheduler:
+ *
+ * Minimum timeslice is 5 msecs (or 1 spu scheduler tick, whichever is
+ * larger), default timeslice is 100 msecs, maximum timeslice is 800 msecs.
+ */
+#define MIN_SPU_TIMESLICE	max(5 * HZ / (1000 * SPUSCHED_TICK), 1)
+#define DEF_SPU_TIMESLICE	(100 * HZ / (1000 * SPUSCHED_TICK))
+
+#define SCALE_PRIO(x, prio) \
+	max(x * (MAX_PRIO - prio) / (NICE_WIDTH / 2), MIN_SPU_TIMESLICE)
+
+/*
+ * scale user-nice values [ -20 ... 0 ... 19 ] to time slice values:
+ * [800ms ... 100ms ... 5ms]
+ *
+ * The higher a thread's priority, the bigger timeslices
+ * it gets during one round of execution. But even the lowest
+ * priority thread gets MIN_TIMESLICE worth of execution time.
+ */
+void spu_set_timeslice(struct spu_context *ctx)
+{
+	if (ctx->prio < NORMAL_PRIO)
+		ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE * 4, ctx->prio);
+	else
+		ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE, ctx->prio);
+}
+
+/*
+ * Update scheduling information from the owning thread.
+ */
+void __spu_update_sched_info(struct spu_context *ctx)
+{
+	/*
+	 * assert that the context is not on the runqueue, so it is safe
+	 * to change its scheduling parameters.
+	 */
+	BUG_ON(!list_empty(&ctx->rq));
+
+	/*
+	 * 32-Bit assignments are atomic on powerpc, and we don't care about
+	 * memory ordering here because retrieving the controlling thread is
+	 * per definition racy.
+	 */
+	ctx->tid = current->pid;
+
+	/*
+	 * We do our own priority calculations, so we normally want
+	 * ->static_prio to start with. Unfortunately this field
+	 * contains junk for threads with a realtime scheduling
+	 * policy so we have to look at ->prio in this case.
+	 */
+	if (rt_prio(current->prio))
+		ctx->prio = current->prio;
+	else
+		ctx->prio = current->static_prio;
+	ctx->policy = current->policy;
+
+	/*
+	 * TO DO: the context may be loaded, so we may need to activate
+	 * it again on a different node. But it shouldn't hurt anything
+	 * to update its parameters, because we know that the scheduler
+	 * is not actively looking at this field, since it is not on the
+	 * runqueue. The context will be rescheduled on the proper node
+	 * if it is timesliced or preempted.
+	 */
+	cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr);
+
+	/* Save the current cpu id for spu interrupt routing. */
+	ctx->last_ran = raw_smp_processor_id();
+}
+
+void spu_update_sched_info(struct spu_context *ctx)
+{
+	int node;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		node = ctx->spu->node;
+
+		/*
+		 * Take list_mutex to sync with find_victim().
+		 */
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		__spu_update_sched_info(ctx);
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	} else {
+		__spu_update_sched_info(ctx);
+	}
+}
+
+static int __node_allowed(struct spu_context *ctx, int node)
+{
+	if (nr_cpus_node(node)) {
+		const struct cpumask *mask = cpumask_of_node(node);
+
+		if (cpumask_intersects(mask, &ctx->cpus_allowed))
+			return 1;
+	}
+
+	return 0;
+}
+
+static int node_allowed(struct spu_context *ctx, int node)
+{
+	int rval;
+
+	spin_lock(&spu_prio->runq_lock);
+	rval = __node_allowed(ctx, node);
+	spin_unlock(&spu_prio->runq_lock);
+
+	return rval;
+}
+
+void do_notify_spus_active(void)
+{
+	int node;
+
+	/*
+	 * Wake up the active spu_contexts.
+	 */
+	for_each_online_node(node) {
+		struct spu *spu;
+
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if (spu->alloc_state != SPU_FREE) {
+				struct spu_context *ctx = spu->ctx;
+				set_bit(SPU_SCHED_NOTIFY_ACTIVE,
+					&ctx->sched_flags);
+				mb();
+				wake_up_all(&ctx->stop_wq);
+			}
+		}
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	}
+}
+
+/**
+ * spu_bind_context - bind spu context to physical spu
+ * @spu:	physical spu to bind to
+ * @ctx:	context to bind
+ */
+static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
+{
+	spu_context_trace(spu_bind_context__enter, ctx, spu);
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+	if (ctx->flags & SPU_CREATE_NOSCHED)
+		atomic_inc(&cbe_spu_info[spu->node].reserved_spus);
+
+	ctx->stats.slb_flt_base = spu->stats.slb_flt;
+	ctx->stats.class2_intr_base = spu->stats.class2_intr;
+
+	spu_associate_mm(spu, ctx->owner);
+
+	spin_lock_irq(&spu->register_lock);
+	spu->ctx = ctx;
+	spu->flags = 0;
+	ctx->spu = spu;
+	ctx->ops = &spu_hw_ops;
+	spu->pid = current->pid;
+	spu->tgid = current->tgid;
+	spu->ibox_callback = spufs_ibox_callback;
+	spu->wbox_callback = spufs_wbox_callback;
+	spu->stop_callback = spufs_stop_callback;
+	spu->mfc_callback = spufs_mfc_callback;
+	spin_unlock_irq(&spu->register_lock);
+
+	spu_unmap_mappings(ctx);
+
+	spu_switch_log_notify(spu, ctx, SWITCH_LOG_START, 0);
+	spu_restore(&ctx->csa, spu);
+	spu->timestamp = jiffies;
+	ctx->state = SPU_STATE_RUNNABLE;
+
+	spuctx_switch_state(ctx, SPU_UTIL_USER);
+}
+
+/*
+ * Must be used with the list_mutex held.
+ */
+static inline int sched_spu(struct spu *spu)
+{
+	BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex));
+
+	return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));
+}
+
+static void aff_merge_remaining_ctxs(struct spu_gang *gang)
+{
+	struct spu_context *ctx;
+
+	list_for_each_entry(ctx, &gang->aff_list_head, aff_list) {
+		if (list_empty(&ctx->aff_list))
+			list_add(&ctx->aff_list, &gang->aff_list_head);
+	}
+	gang->aff_flags |= AFF_MERGED;
+}
+
+static void aff_set_offsets(struct spu_gang *gang)
+{
+	struct spu_context *ctx;
+	int offset;
+
+	offset = -1;
+	list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+								aff_list) {
+		if (&ctx->aff_list == &gang->aff_list_head)
+			break;
+		ctx->aff_offset = offset--;
+	}
+
+	offset = 0;
+	list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) {
+		if (&ctx->aff_list == &gang->aff_list_head)
+			break;
+		ctx->aff_offset = offset++;
+	}
+
+	gang->aff_flags |= AFF_OFFSETS_SET;
+}
+
+static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
+		 int group_size, int lowest_offset)
+{
+	struct spu *spu;
+	int node, n;
+
+	/*
+	 * TODO: A better algorithm could be used to find a good spu to be
+	 *       used as reference location for the ctxs chain.
+	 */
+	node = cpu_to_node(raw_smp_processor_id());
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		/*
+		 * "available_spus" counts how many spus are not potentially
+		 * going to be used by other affinity gangs whose reference
+		 * context is already in place. Although this code seeks to
+		 * avoid having affinity gangs with a summed amount of
+		 * contexts bigger than the amount of spus in the node,
+		 * this may happen sporadically. In this case, available_spus
+		 * becomes negative, which is harmless.
+		 */
+		int available_spus;
+
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(ctx, node))
+			continue;
+
+		available_spus = 0;
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if (spu->ctx && spu->ctx->gang && !spu->ctx->aff_offset
+					&& spu->ctx->gang->aff_ref_spu)
+				available_spus -= spu->ctx->gang->contexts;
+			available_spus++;
+		}
+		if (available_spus < ctx->gang->contexts) {
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
+			continue;
+		}
+
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if ((!mem_aff || spu->has_mem_affinity) &&
+							sched_spu(spu)) {
+				mutex_unlock(&cbe_spu_info[node].list_mutex);
+				return spu;
+			}
+		}
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	}
+	return NULL;
+}
+
+static void aff_set_ref_point_location(struct spu_gang *gang)
+{
+	int mem_aff, gs, lowest_offset;
+	struct spu_context *tmp, *ctx;
+
+	mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM;
+	lowest_offset = 0;
+	gs = 0;
+
+	list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+		gs++;
+
+	list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+								aff_list) {
+		if (&ctx->aff_list == &gang->aff_list_head)
+			break;
+		lowest_offset = ctx->aff_offset;
+	}
+
+	gang->aff_ref_spu = aff_ref_location(gang->aff_ref_ctx, mem_aff, gs,
+							lowest_offset);
+}
+
+static struct spu *ctx_location(struct spu *ref, int offset, int node)
+{
+	struct spu *spu;
+
+	spu = NULL;
+	if (offset >= 0) {
+		list_for_each_entry(spu, ref->aff_list.prev, aff_list) {
+			BUG_ON(spu->node != node);
+			if (offset == 0)
+				break;
+			if (sched_spu(spu))
+				offset--;
+		}
+	} else {
+		list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) {
+			BUG_ON(spu->node != node);
+			if (offset == 0)
+				break;
+			if (sched_spu(spu))
+				offset++;
+		}
+	}
+
+	return spu;
+}
+
+/*
+ * affinity_check is called each time a context is going to be scheduled.
+ * It returns the spu ptr on which the context must run.
+ */
+static int has_affinity(struct spu_context *ctx)
+{
+	struct spu_gang *gang = ctx->gang;
+
+	if (list_empty(&ctx->aff_list))
+		return 0;
+
+	if (atomic_read(&ctx->gang->aff_sched_count) == 0)
+		ctx->gang->aff_ref_spu = NULL;
+
+	if (!gang->aff_ref_spu) {
+		if (!(gang->aff_flags & AFF_MERGED))
+			aff_merge_remaining_ctxs(gang);
+		if (!(gang->aff_flags & AFF_OFFSETS_SET))
+			aff_set_offsets(gang);
+		aff_set_ref_point_location(gang);
+	}
+
+	return gang->aff_ref_spu != NULL;
+}
+
+/**
+ * spu_unbind_context - unbind spu context from physical spu
+ * @spu:	physical spu to unbind from
+ * @ctx:	context to unbind
+ */
+static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
+{
+	u32 status;
+
+	spu_context_trace(spu_unbind_context__enter, ctx, spu);
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+ 	if (spu->ctx->flags & SPU_CREATE_NOSCHED)
+		atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
+
+	if (ctx->gang)
+		/*
+		 * If ctx->gang->aff_sched_count is positive, SPU affinity is
+		 * being considered in this gang. Using atomic_dec_if_positive
+		 * allow us to skip an explicit check for affinity in this gang
+		 */
+		atomic_dec_if_positive(&ctx->gang->aff_sched_count);
+
+	spu_unmap_mappings(ctx);
+	spu_save(&ctx->csa, spu);
+	spu_switch_log_notify(spu, ctx, SWITCH_LOG_STOP, 0);
+
+	spin_lock_irq(&spu->register_lock);
+	spu->timestamp = jiffies;
+	ctx->state = SPU_STATE_SAVED;
+	spu->ibox_callback = NULL;
+	spu->wbox_callback = NULL;
+	spu->stop_callback = NULL;
+	spu->mfc_callback = NULL;
+	spu->pid = 0;
+	spu->tgid = 0;
+	ctx->ops = &spu_backing_ops;
+	spu->flags = 0;
+	spu->ctx = NULL;
+	spin_unlock_irq(&spu->register_lock);
+
+	spu_associate_mm(spu, NULL);
+
+	ctx->stats.slb_flt +=
+		(spu->stats.slb_flt - ctx->stats.slb_flt_base);
+	ctx->stats.class2_intr +=
+		(spu->stats.class2_intr - ctx->stats.class2_intr_base);
+
+	/* This maps the underlying spu state to idle */
+	spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+	ctx->spu = NULL;
+
+	if (spu_stopped(ctx, &status))
+		wake_up_all(&ctx->stop_wq);
+}
+
+/**
+ * spu_add_to_rq - add a context to the runqueue
+ * @ctx:       context to add
+ */
+static void __spu_add_to_rq(struct spu_context *ctx)
+{
+	/*
+	 * Unfortunately this code path can be called from multiple threads
+	 * on behalf of a single context due to the way the problem state
+	 * mmap support works.
+	 *
+	 * Fortunately we need to wake up all these threads at the same time
+	 * and can simply skip the runqueue addition for every but the first
+	 * thread getting into this codepath.
+	 *
+	 * It's still quite hacky, and long-term we should proxy all other
+	 * threads through the owner thread so that spu_run is in control
+	 * of all the scheduling activity for a given context.
+	 */
+	if (list_empty(&ctx->rq)) {
+		list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
+		set_bit(ctx->prio, spu_prio->bitmap);
+		if (!spu_prio->nr_waiting++)
+			mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+	}
+}
+
+static void spu_add_to_rq(struct spu_context *ctx)
+{
+	spin_lock(&spu_prio->runq_lock);
+	__spu_add_to_rq(ctx);
+	spin_unlock(&spu_prio->runq_lock);
+}
+
+static void __spu_del_from_rq(struct spu_context *ctx)
+{
+	int prio = ctx->prio;
+
+	if (!list_empty(&ctx->rq)) {
+		if (!--spu_prio->nr_waiting)
+			del_timer(&spusched_timer);
+		list_del_init(&ctx->rq);
+
+		if (list_empty(&spu_prio->runq[prio]))
+			clear_bit(prio, spu_prio->bitmap);
+	}
+}
+
+void spu_del_from_rq(struct spu_context *ctx)
+{
+	spin_lock(&spu_prio->runq_lock);
+	__spu_del_from_rq(ctx);
+	spin_unlock(&spu_prio->runq_lock);
+}
+
+static void spu_prio_wait(struct spu_context *ctx)
+{
+	DEFINE_WAIT(wait);
+
+	/*
+	 * The caller must explicitly wait for a context to be loaded
+	 * if the nosched flag is set.  If NOSCHED is not set, the caller
+	 * queues the context and waits for an spu event or error.
+	 */
+	BUG_ON(!(ctx->flags & SPU_CREATE_NOSCHED));
+
+	spin_lock(&spu_prio->runq_lock);
+	prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
+	if (!signal_pending(current)) {
+		__spu_add_to_rq(ctx);
+		spin_unlock(&spu_prio->runq_lock);
+		mutex_unlock(&ctx->state_mutex);
+		schedule();
+		mutex_lock(&ctx->state_mutex);
+		spin_lock(&spu_prio->runq_lock);
+		__spu_del_from_rq(ctx);
+	}
+	spin_unlock(&spu_prio->runq_lock);
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(&ctx->stop_wq, &wait);
+}
+
+static struct spu *spu_get_idle(struct spu_context *ctx)
+{
+	struct spu *spu, *aff_ref_spu;
+	int node, n;
+
+	spu_context_nospu_trace(spu_get_idle__enter, ctx);
+
+	if (ctx->gang) {
+		mutex_lock(&ctx->gang->aff_mutex);
+		if (has_affinity(ctx)) {
+			aff_ref_spu = ctx->gang->aff_ref_spu;
+			atomic_inc(&ctx->gang->aff_sched_count);
+			mutex_unlock(&ctx->gang->aff_mutex);
+			node = aff_ref_spu->node;
+
+			mutex_lock(&cbe_spu_info[node].list_mutex);
+			spu = ctx_location(aff_ref_spu, ctx->aff_offset, node);
+			if (spu && spu->alloc_state == SPU_FREE)
+				goto found;
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+			atomic_dec(&ctx->gang->aff_sched_count);
+			goto not_found;
+		}
+		mutex_unlock(&ctx->gang->aff_mutex);
+	}
+	node = cpu_to_node(raw_smp_processor_id());
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(ctx, node))
+			continue;
+
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if (spu->alloc_state == SPU_FREE)
+				goto found;
+		}
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	}
+
+ not_found:
+	spu_context_nospu_trace(spu_get_idle__not_found, ctx);
+	return NULL;
+
+ found:
+	spu->alloc_state = SPU_USED;
+	mutex_unlock(&cbe_spu_info[node].list_mutex);
+	spu_context_trace(spu_get_idle__found, ctx, spu);
+	spu_init_channels(spu);
+	return spu;
+}
+
+/**
+ * find_victim - find a lower priority context to preempt
+ * @ctx:	candidate context for running
+ *
+ * Returns the freed physical spu to run the new context on.
+ */
+static struct spu *find_victim(struct spu_context *ctx)
+{
+	struct spu_context *victim = NULL;
+	struct spu *spu;
+	int node, n;
+
+	spu_context_nospu_trace(spu_find_victim__enter, ctx);
+
+	/*
+	 * Look for a possible preemption candidate on the local node first.
+	 * If there is no candidate look at the other nodes.  This isn't
+	 * exactly fair, but so far the whole spu scheduler tries to keep
+	 * a strong node affinity.  We might want to fine-tune this in
+	 * the future.
+	 */
+ restart:
+	node = cpu_to_node(raw_smp_processor_id());
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(ctx, node))
+			continue;
+
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			struct spu_context *tmp = spu->ctx;
+
+			if (tmp && tmp->prio > ctx->prio &&
+			    !(tmp->flags & SPU_CREATE_NOSCHED) &&
+			    (!victim || tmp->prio > victim->prio)) {
+				victim = spu->ctx;
+			}
+		}
+		if (victim)
+			get_spu_context(victim);
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+		if (victim) {
+			/*
+			 * This nests ctx->state_mutex, but we always lock
+			 * higher priority contexts before lower priority
+			 * ones, so this is safe until we introduce
+			 * priority inheritance schemes.
+			 *
+			 * XXX if the highest priority context is locked,
+			 * this can loop a long time.  Might be better to
+			 * look at another context or give up after X retries.
+			 */
+			if (!mutex_trylock(&victim->state_mutex)) {
+				put_spu_context(victim);
+				victim = NULL;
+				goto restart;
+			}
+
+			spu = victim->spu;
+			if (!spu || victim->prio <= ctx->prio) {
+				/*
+				 * This race can happen because we've dropped
+				 * the active list mutex.  Not a problem, just
+				 * restart the search.
+				 */
+				mutex_unlock(&victim->state_mutex);
+				put_spu_context(victim);
+				victim = NULL;
+				goto restart;
+			}
+
+			spu_context_trace(__spu_deactivate__unload, ctx, spu);
+
+			mutex_lock(&cbe_spu_info[node].list_mutex);
+			cbe_spu_info[node].nr_active--;
+			spu_unbind_context(spu, victim);
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+			victim->stats.invol_ctx_switch++;
+			spu->stats.invol_ctx_switch++;
+			if (test_bit(SPU_SCHED_SPU_RUN, &victim->sched_flags))
+				spu_add_to_rq(victim);
+
+			mutex_unlock(&victim->state_mutex);
+			put_spu_context(victim);
+
+			return spu;
+		}
+	}
+
+	return NULL;
+}
+
+static void __spu_schedule(struct spu *spu, struct spu_context *ctx)
+{
+	int node = spu->node;
+	int success = 0;
+
+	spu_set_timeslice(ctx);
+
+	mutex_lock(&cbe_spu_info[node].list_mutex);
+	if (spu->ctx == NULL) {
+		spu_bind_context(spu, ctx);
+		cbe_spu_info[node].nr_active++;
+		spu->alloc_state = SPU_USED;
+		success = 1;
+	}
+	mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+	if (success)
+		wake_up_all(&ctx->run_wq);
+	else
+		spu_add_to_rq(ctx);
+}
+
+static void spu_schedule(struct spu *spu, struct spu_context *ctx)
+{
+	/* not a candidate for interruptible because it's called either
+	   from the scheduler thread or from spu_deactivate */
+	mutex_lock(&ctx->state_mutex);
+	if (ctx->state == SPU_STATE_SAVED)
+		__spu_schedule(spu, ctx);
+	spu_release(ctx);
+}
+
+/**
+ * spu_unschedule - remove a context from a spu, and possibly release it.
+ * @spu:	The SPU to unschedule from
+ * @ctx:	The context currently scheduled on the SPU
+ * @free_spu	Whether to free the SPU for other contexts
+ *
+ * Unbinds the context @ctx from the SPU @spu. If @free_spu is non-zero, the
+ * SPU is made available for other contexts (ie, may be returned by
+ * spu_get_idle). If this is zero, the caller is expected to schedule another
+ * context to this spu.
+ *
+ * Should be called with ctx->state_mutex held.
+ */
+static void spu_unschedule(struct spu *spu, struct spu_context *ctx,
+		int free_spu)
+{
+	int node = spu->node;
+
+	mutex_lock(&cbe_spu_info[node].list_mutex);
+	cbe_spu_info[node].nr_active--;
+	if (free_spu)
+		spu->alloc_state = SPU_FREE;
+	spu_unbind_context(spu, ctx);
+	ctx->stats.invol_ctx_switch++;
+	spu->stats.invol_ctx_switch++;
+	mutex_unlock(&cbe_spu_info[node].list_mutex);
+}
+
+/**
+ * spu_activate - find a free spu for a context and execute it
+ * @ctx:	spu context to schedule
+ * @flags:	flags (currently ignored)
+ *
+ * Tries to find a free spu to run @ctx.  If no free spu is available
+ * add the context to the runqueue so it gets woken up once an spu
+ * is available.
+ */
+int spu_activate(struct spu_context *ctx, unsigned long flags)
+{
+	struct spu *spu;
+
+	/*
+	 * If there are multiple threads waiting for a single context
+	 * only one actually binds the context while the others will
+	 * only be able to acquire the state_mutex once the context
+	 * already is in runnable state.
+	 */
+	if (ctx->spu)
+		return 0;
+
+spu_activate_top:
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+
+	spu = spu_get_idle(ctx);
+	/*
+	 * If this is a realtime thread we try to get it running by
+	 * preempting a lower priority thread.
+	 */
+	if (!spu && rt_prio(ctx->prio))
+		spu = find_victim(ctx);
+	if (spu) {
+		unsigned long runcntl;
+
+		runcntl = ctx->ops->runcntl_read(ctx);
+		__spu_schedule(spu, ctx);
+		if (runcntl & SPU_RUNCNTL_RUNNABLE)
+			spuctx_switch_state(ctx, SPU_UTIL_USER);
+
+		return 0;
+	}
+
+	if (ctx->flags & SPU_CREATE_NOSCHED) {
+		spu_prio_wait(ctx);
+		goto spu_activate_top;
+	}
+
+	spu_add_to_rq(ctx);
+
+	return 0;
+}
+
+/**
+ * grab_runnable_context - try to find a runnable context
+ *
+ * Remove the highest priority context on the runqueue and return it
+ * to the caller.  Returns %NULL if no runnable context was found.
+ */
+static struct spu_context *grab_runnable_context(int prio, int node)
+{
+	struct spu_context *ctx;
+	int best;
+
+	spin_lock(&spu_prio->runq_lock);
+	best = find_first_bit(spu_prio->bitmap, prio);
+	while (best < prio) {
+		struct list_head *rq = &spu_prio->runq[best];
+
+		list_for_each_entry(ctx, rq, rq) {
+			/* XXX(hch): check for affinity here as well */
+			if (__node_allowed(ctx, node)) {
+				__spu_del_from_rq(ctx);
+				goto found;
+			}
+		}
+		best++;
+	}
+	ctx = NULL;
+ found:
+	spin_unlock(&spu_prio->runq_lock);
+	return ctx;
+}
+
+static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_context *new = NULL;
+
+	if (spu) {
+		new = grab_runnable_context(max_prio, spu->node);
+		if (new || force) {
+			spu_unschedule(spu, ctx, new == NULL);
+			if (new) {
+				if (new->flags & SPU_CREATE_NOSCHED)
+					wake_up(&new->stop_wq);
+				else {
+					spu_release(ctx);
+					spu_schedule(spu, new);
+					/* this one can't easily be made
+					   interruptible */
+					mutex_lock(&ctx->state_mutex);
+				}
+			}
+		}
+	}
+
+	return new != NULL;
+}
+
+/**
+ * spu_deactivate - unbind a context from it's physical spu
+ * @ctx:	spu context to unbind
+ *
+ * Unbind @ctx from the physical spu it is running on and schedule
+ * the highest priority context to run on the freed physical spu.
+ */
+void spu_deactivate(struct spu_context *ctx)
+{
+	spu_context_nospu_trace(spu_deactivate__enter, ctx);
+	__spu_deactivate(ctx, 1, MAX_PRIO);
+}
+
+/**
+ * spu_yield -	yield a physical spu if others are waiting
+ * @ctx:	spu context to yield
+ *
+ * Check if there is a higher priority context waiting and if yes
+ * unbind @ctx from the physical spu and schedule the highest
+ * priority context to run on the freed physical spu instead.
+ */
+void spu_yield(struct spu_context *ctx)
+{
+	spu_context_nospu_trace(spu_yield__enter, ctx);
+	if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
+		mutex_lock(&ctx->state_mutex);
+		__spu_deactivate(ctx, 0, MAX_PRIO);
+		mutex_unlock(&ctx->state_mutex);
+	}
+}
+
+static noinline void spusched_tick(struct spu_context *ctx)
+{
+	struct spu_context *new = NULL;
+	struct spu *spu = NULL;
+
+	if (spu_acquire(ctx))
+		BUG();	/* a kernel thread never has signals pending */
+
+	if (ctx->state != SPU_STATE_RUNNABLE)
+		goto out;
+	if (ctx->flags & SPU_CREATE_NOSCHED)
+		goto out;
+	if (ctx->policy == SCHED_FIFO)
+		goto out;
+
+	if (--ctx->time_slice && test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+		goto out;
+
+	spu = ctx->spu;
+
+	spu_context_trace(spusched_tick__preempt, ctx, spu);
+
+	new = grab_runnable_context(ctx->prio + 1, spu->node);
+	if (new) {
+		spu_unschedule(spu, ctx, 0);
+		if (test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+			spu_add_to_rq(ctx);
+	} else {
+		spu_context_nospu_trace(spusched_tick__newslice, ctx);
+		if (!ctx->time_slice)
+			ctx->time_slice++;
+	}
+out:
+	spu_release(ctx);
+
+	if (new)
+		spu_schedule(spu, new);
+}
+
+/**
+ * count_active_contexts - count nr of active tasks
+ *
+ * Return the number of tasks currently running or waiting to run.
+ *
+ * Note that we don't take runq_lock / list_mutex here.  Reading
+ * a single 32bit value is atomic on powerpc, and we don't care
+ * about memory ordering issues here.
+ */
+static unsigned long count_active_contexts(void)
+{
+	int nr_active = 0, node;
+
+	for (node = 0; node < MAX_NUMNODES; node++)
+		nr_active += cbe_spu_info[node].nr_active;
+	nr_active += spu_prio->nr_waiting;
+
+	return nr_active;
+}
+
+/**
+ * spu_calc_load - update the avenrun load estimates.
+ *
+ * No locking against reading these values from userspace, as for
+ * the CPU loadavg code.
+ */
+static void spu_calc_load(void)
+{
+	unsigned long active_tasks; /* fixed-point */
+
+	active_tasks = count_active_contexts() * FIXED_1;
+	spu_avenrun[0] = calc_load(spu_avenrun[0], EXP_1, active_tasks);
+	spu_avenrun[1] = calc_load(spu_avenrun[1], EXP_5, active_tasks);
+	spu_avenrun[2] = calc_load(spu_avenrun[2], EXP_15, active_tasks);
+}
+
+static void spusched_wake(struct timer_list *unused)
+{
+	mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+	wake_up_process(spusched_task);
+}
+
+static void spuloadavg_wake(struct timer_list *unused)
+{
+	mod_timer(&spuloadavg_timer, jiffies + LOAD_FREQ);
+	spu_calc_load();
+}
+
+static int spusched_thread(void *unused)
+{
+	struct spu *spu;
+	int node;
+
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+		for (node = 0; node < MAX_NUMNODES; node++) {
+			struct mutex *mtx = &cbe_spu_info[node].list_mutex;
+
+			mutex_lock(mtx);
+			list_for_each_entry(spu, &cbe_spu_info[node].spus,
+					cbe_list) {
+				struct spu_context *ctx = spu->ctx;
+
+				if (ctx) {
+					get_spu_context(ctx);
+					mutex_unlock(mtx);
+					spusched_tick(ctx);
+					mutex_lock(mtx);
+					put_spu_context(ctx);
+				}
+			}
+			mutex_unlock(mtx);
+		}
+	}
+
+	return 0;
+}
+
+void spuctx_switch_state(struct spu_context *ctx,
+		enum spu_utilization_state new_state)
+{
+	unsigned long long curtime;
+	signed long long delta;
+	struct spu *spu;
+	enum spu_utilization_state old_state;
+	int node;
+
+	curtime = ktime_get_ns();
+	delta = curtime - ctx->stats.tstamp;
+
+	WARN_ON(!mutex_is_locked(&ctx->state_mutex));
+	WARN_ON(delta < 0);
+
+	spu = ctx->spu;
+	old_state = ctx->stats.util_state;
+	ctx->stats.util_state = new_state;
+	ctx->stats.tstamp = curtime;
+
+	/*
+	 * Update the physical SPU utilization statistics.
+	 */
+	if (spu) {
+		ctx->stats.times[old_state] += delta;
+		spu->stats.times[old_state] += delta;
+		spu->stats.util_state = new_state;
+		spu->stats.tstamp = curtime;
+		node = spu->node;
+		if (old_state == SPU_UTIL_USER)
+			atomic_dec(&cbe_spu_info[node].busy_spus);
+		if (new_state == SPU_UTIL_USER)
+			atomic_inc(&cbe_spu_info[node].busy_spus);
+	}
+}
+
+#ifdef CONFIG_PROC_FS
+static int show_spu_loadavg(struct seq_file *s, void *private)
+{
+	int a, b, c;
+
+	a = spu_avenrun[0] + (FIXED_1/200);
+	b = spu_avenrun[1] + (FIXED_1/200);
+	c = spu_avenrun[2] + (FIXED_1/200);
+
+	/*
+	 * Note that last_pid doesn't really make much sense for the
+	 * SPU loadavg (it even seems very odd on the CPU side...),
+	 * but we include it here to have a 100% compatible interface.
+	 */
+	seq_printf(s, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
+		LOAD_INT(a), LOAD_FRAC(a),
+		LOAD_INT(b), LOAD_FRAC(b),
+		LOAD_INT(c), LOAD_FRAC(c),
+		count_active_contexts(),
+		atomic_read(&nr_spu_contexts),
+		idr_get_cursor(&task_active_pid_ns(current)->idr) - 1);
+	return 0;
+}
+#endif
+
+int __init spu_sched_init(void)
+{
+	struct proc_dir_entry *entry;
+	int err = -ENOMEM, i;
+
+	spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
+	if (!spu_prio)
+		goto out;
+
+	for (i = 0; i < MAX_PRIO; i++) {
+		INIT_LIST_HEAD(&spu_prio->runq[i]);
+		__clear_bit(i, spu_prio->bitmap);
+	}
+	spin_lock_init(&spu_prio->runq_lock);
+
+	timer_setup(&spusched_timer, spusched_wake, 0);
+	timer_setup(&spuloadavg_timer, spuloadavg_wake, 0);
+
+	spusched_task = kthread_run(spusched_thread, NULL, "spusched");
+	if (IS_ERR(spusched_task)) {
+		err = PTR_ERR(spusched_task);
+		goto out_free_spu_prio;
+	}
+
+	mod_timer(&spuloadavg_timer, 0);
+
+	entry = proc_create_single("spu_loadavg", 0, NULL, show_spu_loadavg);
+	if (!entry)
+		goto out_stop_kthread;
+
+	pr_debug("spusched: tick: %d, min ticks: %d, default ticks: %d\n",
+			SPUSCHED_TICK, MIN_SPU_TIMESLICE, DEF_SPU_TIMESLICE);
+	return 0;
+
+ out_stop_kthread:
+	kthread_stop(spusched_task);
+ out_free_spu_prio:
+	kfree(spu_prio);
+ out:
+	return err;
+}
+
+void spu_sched_exit(void)
+{
+	struct spu *spu;
+	int node;
+
+	remove_proc_entry("spu_loadavg", NULL);
+
+	del_timer_sync(&spusched_timer);
+	del_timer_sync(&spuloadavg_timer);
+	kthread_stop(spusched_task);
+
+	for (node = 0; node < MAX_NUMNODES; node++) {
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
+			if (spu->alloc_state != SPU_FREE)
+				spu->alloc_state = SPU_FREE;
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	}
+	kfree(spu_prio);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore.c b/arch/powerpc/platforms/cell/spufs/spu_restore.c
new file mode 100644
index 0000000000..2cbb6efb2d
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * spu_restore.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * SPU-side context restore sequence outlined in
+ * Synergistic Processor Element Book IV
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ */
+
+
+#ifndef LS_SIZE
+#define LS_SIZE                 0x40000	/* 256K (in bytes) */
+#endif
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+#include <spu_intrinsics.h>
+#include <asm/spu_csa.h>
+#include "spu_utils.h"
+
+#define BR_INSTR		0x327fff80	/* br -4         */
+#define NOP_INSTR		0x40200000	/* nop           */
+#define HEQ_INSTR		0x7b000000	/* heq $0, $0    */
+#define STOP_INSTR		0x00000000	/* stop 0x0      */
+#define ILLEGAL_INSTR		0x00800000	/* illegal instr */
+#define RESTORE_COMPLETE	0x00003ffc	/* stop 0x3ffc   */
+
+static inline void fetch_regs_from_mem(addr64 lscsa_ea)
+{
+	unsigned int ls = (unsigned int)&regs_spill[0];
+	unsigned int size = sizeof(regs_spill);
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0x40;	/* GET */
+
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void restore_upper_240kb(addr64 lscsa_ea)
+{
+	unsigned int ls = 16384;
+	unsigned int list = (unsigned int)&dma_list[0];
+	unsigned int size = sizeof(dma_list);
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0x44;	/* GETL */
+
+	/* Restore, Step 4:
+	 *    Enqueue the GETL command (tag 0) to the MFC SPU command
+	 *    queue to transfer the upper 240 kb of LS from CSA.
+	 */
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, list);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void restore_decr(void)
+{
+	unsigned int offset;
+	unsigned int decr_running;
+	unsigned int decr;
+
+	/* Restore, Step 6(moved):
+	 *    If the LSCSA "decrementer running" flag is set
+	 *    then write the SPU_WrDec channel with the
+	 *    decrementer value from LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(decr_status);
+	decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING;
+	if (decr_running) {
+		offset = LSCSA_QW_OFFSET(decr);
+		decr = regs_spill[offset].slot[0];
+		spu_writech(SPU_WrDec, decr);
+	}
+}
+
+static inline void write_ppu_mb(void)
+{
+	unsigned int offset;
+	unsigned int data;
+
+	/* Restore, Step 11:
+	 *    Write the MFC_WrOut_MB channel with the PPU_MB
+	 *    data from LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(ppu_mb);
+	data = regs_spill[offset].slot[0];
+	spu_writech(SPU_WrOutMbox, data);
+}
+
+static inline void write_ppuint_mb(void)
+{
+	unsigned int offset;
+	unsigned int data;
+
+	/* Restore, Step 12:
+	 *    Write the MFC_WrInt_MB channel with the PPUINT_MB
+	 *    data from LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(ppuint_mb);
+	data = regs_spill[offset].slot[0];
+	spu_writech(SPU_WrOutIntrMbox, data);
+}
+
+static inline void restore_fpcr(void)
+{
+	unsigned int offset;
+	vector unsigned int fpcr;
+
+	/* Restore, Step 13:
+	 *    Restore the floating-point status and control
+	 *    register from the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(fpcr);
+	fpcr = regs_spill[offset].v;
+	spu_mtfpscr(fpcr);
+}
+
+static inline void restore_srr0(void)
+{
+	unsigned int offset;
+	unsigned int srr0;
+
+	/* Restore, Step 14:
+	 *    Restore the SPU SRR0 data from the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(srr0);
+	srr0 = regs_spill[offset].slot[0];
+	spu_writech(SPU_WrSRR0, srr0);
+}
+
+static inline void restore_event_mask(void)
+{
+	unsigned int offset;
+	unsigned int event_mask;
+
+	/* Restore, Step 15:
+	 *    Restore the SPU_RdEventMsk data from the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(event_mask);
+	event_mask = regs_spill[offset].slot[0];
+	spu_writech(SPU_WrEventMask, event_mask);
+}
+
+static inline void restore_tag_mask(void)
+{
+	unsigned int offset;
+	unsigned int tag_mask;
+
+	/* Restore, Step 16:
+	 *    Restore the SPU_RdTagMsk data from the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(tag_mask);
+	tag_mask = regs_spill[offset].slot[0];
+	spu_writech(MFC_WrTagMask, tag_mask);
+}
+
+static inline void restore_complete(void)
+{
+	extern void exit_fini(void);
+	unsigned int *exit_instrs = (unsigned int *)exit_fini;
+	unsigned int offset;
+	unsigned int stopped_status;
+	unsigned int stopped_code;
+
+	/* Restore, Step 18:
+	 *    Issue a stop-and-signal instruction with
+	 *    "good context restore" signal value.
+	 *
+	 * Restore, Step 19:
+	 *    There may be additional instructions placed
+	 *    here by the PPE Sequence for SPU Context
+	 *    Restore in order to restore the correct
+	 *    "stopped state".
+	 *
+	 *    This step is handled here by analyzing the
+	 *    LSCSA.stopped_status and then modifying the
+	 *    exit() function to behave appropriately.
+	 */
+
+	offset = LSCSA_QW_OFFSET(stopped_status);
+	stopped_status = regs_spill[offset].slot[0];
+	stopped_code = regs_spill[offset].slot[1];
+
+	switch (stopped_status) {
+	case SPU_STOPPED_STATUS_P_I:
+		/* SPU_Status[P,I]=1.  Add illegal instruction
+		 * followed by stop-and-signal instruction after
+		 * end of restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = ILLEGAL_INSTR;
+		exit_instrs[2] = STOP_INSTR | stopped_code;
+		break;
+	case SPU_STOPPED_STATUS_P_H:
+		/* SPU_Status[P,H]=1.  Add 'heq $0, $0' followed
+		 * by stop-and-signal instruction after end of
+		 * restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = HEQ_INSTR;
+		exit_instrs[2] = STOP_INSTR | stopped_code;
+		break;
+	case SPU_STOPPED_STATUS_S_P:
+		/* SPU_Status[S,P]=1.  Add nop instruction
+		 * followed by 'br -4' after end of restore
+		 * code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = STOP_INSTR | stopped_code;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_S_I:
+		/* SPU_Status[S,I]=1.  Add  illegal instruction
+		 * followed by 'br -4' after end of restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = ILLEGAL_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_I:
+		/* SPU_Status[I]=1. Add illegal instruction followed
+		 * by infinite loop after end of restore sequence.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = ILLEGAL_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_S:
+		/* SPU_Status[S]=1. Add two 'nop' instructions. */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = NOP_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_H:
+		/* SPU_Status[H]=1. Add 'heq $0, $0' instruction
+		 * after end of restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = HEQ_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_P:
+		/* SPU_Status[P]=1. Add stop-and-signal instruction
+		 * after end of restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = STOP_INSTR | stopped_code;
+		break;
+	case SPU_STOPPED_STATUS_R:
+		/* SPU_Status[I,S,H,P,R]=0. Add infinite loop. */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = NOP_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	default:
+		/* SPU_Status[R]=1. No additional instructions. */
+		break;
+	}
+	spu_sync();
+}
+
+/**
+ * main - entry point for SPU-side context restore.
+ *
+ * This code deviates from the documented sequence in the
+ * following aspects:
+ *
+ *	1. The EA for LSCSA is passed from PPE in the
+ *	   signal notification channels.
+ *	2. The register spill area is pulled by SPU
+ *	   into LS, rather than pushed by PPE.
+ *	3. All 128 registers are restored by exit().
+ *	4. The exit() function is modified at run
+ *	   time in order to properly restore the
+ *	   SPU_Status register.
+ */
+int main()
+{
+	addr64 lscsa_ea;
+
+	lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1);
+	lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2);
+	fetch_regs_from_mem(lscsa_ea);
+
+	set_event_mask();		/* Step 1.  */
+	set_tag_mask();			/* Step 2.  */
+	build_dma_list(lscsa_ea);	/* Step 3.  */
+	restore_upper_240kb(lscsa_ea);	/* Step 4.  */
+					/* Step 5: done by 'exit'. */
+	enqueue_putllc(lscsa_ea);	/* Step 7. */
+	set_tag_update();		/* Step 8. */
+	read_tag_status();		/* Step 9. */
+	restore_decr();			/* moved Step 6. */
+	read_llar_status();		/* Step 10. */
+	write_ppu_mb();			/* Step 11. */
+	write_ppuint_mb();		/* Step 12. */
+	restore_fpcr();			/* Step 13. */
+	restore_srr0();			/* Step 14. */
+	restore_event_mask();		/* Step 15. */
+	restore_tag_mask();		/* Step 16. */
+					/* Step 17. done by 'exit'. */
+	restore_complete();		/* Step 18. */
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S b/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S
new file mode 100644
index 0000000000..6d799f8476
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * crt0_r.S: Entry function for SPU-side context restore.
+ *
+ * Copyright (C) 2005 IBM
+ *
+ * Entry and exit function for SPU-side of the context restore
+ * sequence.  Sets up an initial stack frame, then branches to
+ * 'main'.  On return, restores all 128 registers from the LSCSA
+ * and exits.
+ */
+
+#include <asm/spu_csa.h>
+
+.data
+.align 7
+.globl regs_spill
+regs_spill:
+.space SIZEOF_SPU_SPILL_REGS, 0x0
+
+.text
+.global _start
+_start:
+	/* Initialize the stack pointer to point to 16368
+	 * (16kb-16). The back chain pointer is initialized
+	 * to NULL.
+	 */
+	il      $0, 0
+	il      $SP, 16368
+	stqd    $0, 0($SP)
+
+	/* Allocate a minimum stack frame for the called main.
+	 * This is needed so that main has a place to save the
+	 * link register when it calls another function.
+	 */
+	stqd    $SP, -160($SP)
+	ai      $SP, $SP, -160
+
+	/* Call the program's main function. */
+	brsl    $0, main
+
+.global exit
+.global	_exit
+exit:
+_exit:
+	/* SPU Context Restore, Step 5: Restore the remaining 112 GPRs. */
+	ila     $3, regs_spill + 256
+restore_regs:
+	lqr     $4, restore_reg_insts
+restore_reg_loop:
+	ai      $4, $4, 4
+	.balignl 16, 0x40200000
+restore_reg_insts:       /* must be quad-word aligned. */
+	lqd     $16, 0($3)
+	lqd     $17, 16($3)
+	lqd     $18, 32($3)
+	lqd     $19, 48($3)
+	andi    $5, $4, 0x7F
+	stqr    $4, restore_reg_insts
+	ai      $3, $3, 64
+	brnz    $5, restore_reg_loop
+
+	/* SPU Context Restore Step 17: Restore the first 16 GPRs. */
+	lqa $0, regs_spill + 0
+	lqa $1, regs_spill + 16
+	lqa $2, regs_spill + 32
+	lqa $3, regs_spill + 48
+	lqa $4, regs_spill + 64
+	lqa $5, regs_spill + 80
+	lqa $6, regs_spill + 96
+	lqa $7, regs_spill + 112
+	lqa $8, regs_spill + 128
+	lqa $9, regs_spill + 144
+	lqa $10, regs_spill + 160
+	lqa $11, regs_spill + 176
+	lqa $12, regs_spill + 192
+	lqa $13, regs_spill + 208
+	lqa $14, regs_spill + 224
+	lqa $15, regs_spill + 240
+
+	/* Under normal circumstances, the 'exit' function
+	 * terminates with 'stop SPU_RESTORE_COMPLETE',
+	 * indicating that the SPU-side restore code has
+	 * completed.
+	 *
+	 * However it is possible that instructions immediately
+	 * following the 'stop 0x3ffc' have been modified at run
+	 * time so as to recreate the exact SPU_Status settings
+	 * from the application, e.g. illegal instruciton, halt,
+	 * etc.
+	 */
+.global exit_fini
+.global	_exit_fini
+exit_fini:
+_exit_fini:
+	stop	SPU_RESTORE_COMPLETE
+	stop	0
+	stop	0
+	stop	0
+
+	/* Pad the size of this crt0.o to be multiple of 16 bytes. */
+.balignl 16, 0x0
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
new file mode 100644
index 0000000000..f383b027e8
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
@@ -0,0 +1,935 @@
+/*
+ * spu_restore_dump.h: Copyright (C) 2005 IBM.
+ * Hex-dump auto generated from spu_restore.c.
+ * Do not edit!
+ */
+static unsigned int spu_restore_code[]  __attribute__((__aligned__(128))) = {
+0x40800000,
+0x409ff801,
+0x24000080,
+0x24fd8081,
+0x1cd80081,
+0x33001180,
+0x42034003,
+0x33800284,
+0x1c010204,
+0x40200000,
+0x40200000,
+0x40200000,
+0x34000190,
+0x34004191,
+0x34008192,
+0x3400c193,
+0x141fc205,
+0x23fffd84,
+0x1c100183,
+0x217ffa85,
+0x3080b000,
+0x3080b201,
+0x3080b402,
+0x3080b603,
+0x3080b804,
+0x3080ba05,
+0x3080bc06,
+0x3080be07,
+0x3080c008,
+0x3080c209,
+0x3080c40a,
+0x3080c60b,
+0x3080c80c,
+0x3080ca0d,
+0x3080cc0e,
+0x3080ce0f,
+0x00003ffc,
+0x00000000,
+0x00000000,
+0x00000000,
+0x01a00182,
+0x3ec00083,
+0xb0a14103,
+0x01a00204,
+0x3ec10083,
+0x4202c002,
+0xb0a14203,
+0x21a00802,
+0x3fbf028a,
+0x3f20050a,
+0x3fbe0502,
+0x3fe30102,
+0x21a00882,
+0x3f82028b,
+0x3fe3058b,
+0x3fbf0584,
+0x3f200204,
+0x3fbe0204,
+0x3fe30204,
+0x04000203,
+0x21a00903,
+0x40848002,
+0x21a00982,
+0x40800003,
+0x21a00a03,
+0x40802002,
+0x21a00a82,
+0x21a00083,
+0x40800082,
+0x21a00b02,
+0x10002612,
+0x42a00003,
+0x42074006,
+0x1800c204,
+0x40a00008,
+0x40800789,
+0x1c010305,
+0x34000302,
+0x1cffc489,
+0x3ec00303,
+0x3ec00287,
+0xb0408403,
+0x24000302,
+0x34000282,
+0x1c020306,
+0xb0408207,
+0x18020204,
+0x24000282,
+0x217ffa09,
+0x04000402,
+0x21a00802,
+0x3fbe0504,
+0x3fe30204,
+0x21a00884,
+0x42074002,
+0x21a00902,
+0x40803c03,
+0x21a00983,
+0x04000485,
+0x21a00a05,
+0x40802202,
+0x21a00a82,
+0x21a00805,
+0x21a00884,
+0x3fbf0582,
+0x3f200102,
+0x3fbe0102,
+0x3fe30102,
+0x21a00902,
+0x40804003,
+0x21a00983,
+0x21a00a05,
+0x40805a02,
+0x21a00a82,
+0x40800083,
+0x21a00b83,
+0x01a00c02,
+0x30809c03,
+0x34000182,
+0x14004102,
+0x21002082,
+0x01a00d82,
+0x3080a003,
+0x34000182,
+0x21a00e02,
+0x3080a203,
+0x34000182,
+0x21a00f02,
+0x3080a403,
+0x34000182,
+0x77400100,
+0x3080a603,
+0x34000182,
+0x21a00702,
+0x3080a803,
+0x34000182,
+0x21a00082,
+0x3080aa03,
+0x34000182,
+0x21a00b02,
+0x4020007f,
+0x3080ae02,
+0x42004805,
+0x3080ac04,
+0x34000103,
+0x34000202,
+0x1cffc183,
+0x3b810106,
+0x0f608184,
+0x42013802,
+0x5c020183,
+0x38810102,
+0x3b810102,
+0x21000e83,
+0x4020007f,
+0x35000100,
+0x00000470,
+0x000002f8,
+0x00000430,
+0x00000360,
+0x000002f8,
+0x000003c8,
+0x000004a8,
+0x00000298,
+0x00000360,
+0x00200000,
+0x409ffe02,
+0x30801203,
+0x40800208,
+0x3ec40084,
+0x40800407,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x38820282,
+0x41004003,
+0xb0408189,
+0x28820282,
+0x3881c282,
+0xb0408304,
+0x2881c282,
+0x00400000,
+0x40800003,
+0x35000000,
+0x30809e03,
+0x34000182,
+0x21a00382,
+0x4020007f,
+0x327fde00,
+0x409ffe02,
+0x30801203,
+0x40800206,
+0x3ec40084,
+0x40800407,
+0x40800608,
+0x3ac1828a,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x38818282,
+0x41004003,
+0xb040818a,
+0x10005b0b,
+0x41201003,
+0x28818282,
+0x3881c282,
+0xb0408184,
+0x41193f83,
+0x60ffc003,
+0x2881c282,
+0x38820282,
+0xb0408189,
+0x28820282,
+0x327fef80,
+0x409ffe02,
+0x30801203,
+0x40800207,
+0x3ec40086,
+0x4120100b,
+0x10005b14,
+0x40800404,
+0x3ac1c289,
+0x40800608,
+0xb060c106,
+0x3ac10286,
+0x3ac2028a,
+0x20801203,
+0x3881c282,
+0x41193f83,
+0x60ffc003,
+0xb0408589,
+0x2881c282,
+0x38810282,
+0xb0408586,
+0x28810282,
+0x38820282,
+0xb040818a,
+0x28820282,
+0x4020007f,
+0x327fe280,
+0x409ffe02,
+0x30801203,
+0x40800207,
+0x3ec40084,
+0x40800408,
+0x10005b14,
+0x40800609,
+0x3ac1c28a,
+0x3ac2028b,
+0xb060c104,
+0x3ac24284,
+0x20801203,
+0x41201003,
+0x3881c282,
+0xb040830a,
+0x2881c282,
+0x38820282,
+0xb040818b,
+0x41193f83,
+0x60ffc003,
+0x28820282,
+0x38824282,
+0xb0408184,
+0x28824282,
+0x4020007f,
+0x327fd580,
+0x409ffe02,
+0x1000658e,
+0x40800206,
+0x30801203,
+0x40800407,
+0x3ec40084,
+0x40800608,
+0x3ac1828a,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x413d8003,
+0x38818282,
+0x4020007f,
+0x327fd800,
+0x409ffe03,
+0x30801202,
+0x40800207,
+0x3ec40084,
+0x10005b09,
+0x3ac1c288,
+0xb0408184,
+0x4020007f,
+0x4020007f,
+0x20801202,
+0x3881c282,
+0xb0408308,
+0x2881c282,
+0x327fc680,
+0x409ffe02,
+0x1000588b,
+0x40800208,
+0x30801203,
+0x40800407,
+0x3ec40084,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x413d8003,
+0x38820282,
+0x327fbd80,
+0x00200000,
+0x00000da0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000d90,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000db0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000dc0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000d80,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000df0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000de0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000dd0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000e04,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000e00,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save.c b/arch/powerpc/platforms/cell/spufs/spu_save.c
new file mode 100644
index 0000000000..28c88e3243
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_save.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * spu_save.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * SPU-side context save sequence outlined in
+ * Synergistic Processor Element Book IV
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ */
+
+
+#ifndef LS_SIZE
+#define LS_SIZE                 0x40000	/* 256K (in bytes) */
+#endif
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+#include <spu_intrinsics.h>
+#include <asm/spu_csa.h>
+#include "spu_utils.h"
+
+static inline void save_event_mask(void)
+{
+	unsigned int offset;
+
+	/* Save, Step 2:
+	 *    Read the SPU_RdEventMsk channel and save to the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(event_mask);
+	regs_spill[offset].slot[0] = spu_readch(SPU_RdEventMask);
+}
+
+static inline void save_tag_mask(void)
+{
+	unsigned int offset;
+
+	/* Save, Step 3:
+	 *    Read the SPU_RdTagMsk channel and save to the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(tag_mask);
+	regs_spill[offset].slot[0] = spu_readch(MFC_RdTagMask);
+}
+
+static inline void save_upper_240kb(addr64 lscsa_ea)
+{
+	unsigned int ls = 16384;
+	unsigned int list = (unsigned int)&dma_list[0];
+	unsigned int size = sizeof(dma_list);
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0x24;	/* PUTL */
+
+	/* Save, Step 7:
+	 *    Enqueue the PUTL command (tag 0) to the MFC SPU command
+	 *    queue to transfer the remaining 240 kb of LS to CSA.
+	 */
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, list);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void save_fpcr(void)
+{
+	// vector unsigned int fpcr;
+	unsigned int offset;
+
+	/* Save, Step 9:
+	 *    Issue the floating-point status and control register
+	 *    read instruction, and save to the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(fpcr);
+	regs_spill[offset].v = spu_mffpscr();
+}
+
+static inline void save_decr(void)
+{
+	unsigned int offset;
+
+	/* Save, Step 10:
+	 *    Read and save the SPU_RdDec channel data to
+	 *    the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(decr);
+	regs_spill[offset].slot[0] = spu_readch(SPU_RdDec);
+}
+
+static inline void save_srr0(void)
+{
+	unsigned int offset;
+
+	/* Save, Step 11:
+	 *    Read and save the SPU_WSRR0 channel data to
+	 *    the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(srr0);
+	regs_spill[offset].slot[0] = spu_readch(SPU_RdSRR0);
+}
+
+static inline void spill_regs_to_mem(addr64 lscsa_ea)
+{
+	unsigned int ls = (unsigned int)&regs_spill[0];
+	unsigned int size = sizeof(regs_spill);
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0x20;	/* PUT */
+
+	/* Save, Step 13:
+	 *    Enqueue a PUT command (tag 0) to send the LSCSA
+	 *    to the CSA.
+	 */
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void enqueue_sync(addr64 lscsa_ea)
+{
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0xCC;
+
+	/* Save, Step 14:
+	 *    Enqueue an MFC_SYNC command (tag 0).
+	 */
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void save_complete(void)
+{
+	/* Save, Step 18:
+	 *    Issue a stop-and-signal instruction indicating
+	 *    "save complete".  Note: This function will not
+	 *    return!!
+	 */
+	spu_stop(SPU_SAVE_COMPLETE);
+}
+
+/**
+ * main - entry point for SPU-side context save.
+ *
+ * This code deviates from the documented sequence as follows:
+ *
+ *      1. The EA for LSCSA is passed from PPE in the
+ *         signal notification channels.
+ *      2. All 128 registers are saved by crt0.o.
+ */
+int main()
+{
+	addr64 lscsa_ea;
+
+	lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1);
+	lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2);
+
+	/* Step 1: done by exit(). */
+	save_event_mask();	/* Step 2.  */
+	save_tag_mask();	/* Step 3.  */
+	set_event_mask();	/* Step 4.  */
+	set_tag_mask();		/* Step 5.  */
+	build_dma_list(lscsa_ea);	/* Step 6.  */
+	save_upper_240kb(lscsa_ea);	/* Step 7.  */
+	/* Step 8: done by exit(). */
+	save_fpcr();		/* Step 9.  */
+	save_decr();		/* Step 10. */
+	save_srr0();		/* Step 11. */
+	enqueue_putllc(lscsa_ea);	/* Step 12. */
+	spill_regs_to_mem(lscsa_ea);	/* Step 13. */
+	enqueue_sync(lscsa_ea);	/* Step 14. */
+	set_tag_update();	/* Step 15. */
+	read_tag_status();	/* Step 16. */
+	read_llar_status();	/* Step 17. */
+	save_complete();	/* Step 18. */
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S b/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S
new file mode 100644
index 0000000000..5ce32efdca
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * crt0_s.S: Entry function for SPU-side context save.
+ *
+ * Copyright (C) 2005 IBM
+ *
+ * Entry function for SPU-side of the context save sequence.
+ * Saves all 128 GPRs, sets up an initial stack frame, then
+ * branches to 'main'.
+ */
+
+#include <asm/spu_csa.h>
+
+.data
+.align 7
+.globl regs_spill
+regs_spill:
+.space SIZEOF_SPU_SPILL_REGS, 0x0
+
+.text
+.global _start
+_start:
+	/* SPU Context Save Step 1: Save the first 16 GPRs. */
+	stqa $0, regs_spill + 0
+	stqa $1, regs_spill + 16
+	stqa $2, regs_spill + 32
+	stqa $3, regs_spill + 48
+	stqa $4, regs_spill + 64
+	stqa $5, regs_spill + 80
+	stqa $6, regs_spill + 96
+	stqa $7, regs_spill + 112
+	stqa $8, regs_spill + 128
+	stqa $9, regs_spill + 144
+	stqa $10, regs_spill + 160
+	stqa $11, regs_spill + 176
+	stqa $12, regs_spill + 192
+	stqa $13, regs_spill + 208
+	stqa $14, regs_spill + 224
+	stqa $15, regs_spill + 240
+
+	/* SPU Context Save, Step 8: Save the remaining 112 GPRs. */
+	ila     $3, regs_spill + 256
+save_regs:
+	lqr     $4, save_reg_insts
+save_reg_loop:
+	ai      $4, $4, 4
+	.balignl 16, 0x40200000
+save_reg_insts:       /* must be quad-word aligned. */
+	stqd    $16, 0($3)
+	stqd    $17, 16($3)
+	stqd    $18, 32($3)
+	stqd    $19, 48($3)
+	andi    $5, $4, 0x7F
+	stqr    $4, save_reg_insts
+	ai      $3, $3, 64
+	brnz    $5, save_reg_loop
+
+	/* Initialize the stack pointer to point to 16368
+	 * (16kb-16). The back chain pointer is initialized
+	 * to NULL.
+	 */
+	il	$0, 0
+	il	$SP, 16368
+	stqd	$0, 0($SP)
+
+	/* Allocate a minimum stack frame for the called main.
+	 * This is needed so that main has a place to save the
+	 * link register when it calls another function.
+	 */
+	stqd	$SP, -160($SP)
+	ai	$SP, $SP, -160
+
+	/* Call the program's main function. */
+	brsl	$0, main
+
+	/* In this case main should not return; if it does
+	 * there has been an error in the sequence.  Execute
+	 * stop-and-signal with code=0.
+	 */
+.global exit
+.global	_exit
+exit:
+_exit:
+	stop	0x0
+
+	/* Pad the size of this crt0.o to be multiple of 16 bytes. */
+.balignl 16, 0x0
+
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped
new file mode 100644
index 0000000000..b9f81ac8a6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped
@@ -0,0 +1,743 @@
+/*
+ * spu_save_dump.h: Copyright (C) 2005 IBM.
+ * Hex-dump auto generated from spu_save.c.
+ * Do not edit!
+ */
+static unsigned int spu_save_code[]  __attribute__((__aligned__(128))) = {
+0x20805000,
+0x20805201,
+0x20805402,
+0x20805603,
+0x20805804,
+0x20805a05,
+0x20805c06,
+0x20805e07,
+0x20806008,
+0x20806209,
+0x2080640a,
+0x2080660b,
+0x2080680c,
+0x20806a0d,
+0x20806c0e,
+0x20806e0f,
+0x4201c003,
+0x33800184,
+0x1c010204,
+0x40200000,
+0x24000190,
+0x24004191,
+0x24008192,
+0x2400c193,
+0x141fc205,
+0x23fffd84,
+0x1c100183,
+0x217ffb85,
+0x40800000,
+0x409ff801,
+0x24000080,
+0x24fd8081,
+0x1cd80081,
+0x33000180,
+0x00000000,
+0x00000000,
+0x01a00182,
+0x3ec00083,
+0xb1c38103,
+0x01a00204,
+0x3ec10082,
+0x4201400d,
+0xb1c38202,
+0x01a00583,
+0x34218682,
+0x3ed80684,
+0xb0408184,
+0x24218682,
+0x01a00603,
+0x00200000,
+0x34214682,
+0x3ed40684,
+0xb0408184,
+0x40800003,
+0x24214682,
+0x21a00083,
+0x40800082,
+0x21a00b02,
+0x4020007f,
+0x1000251e,
+0x42a00002,
+0x32800008,
+0x4205c00c,
+0x00200000,
+0x40a0000b,
+0x3f82070f,
+0x4080020a,
+0x40800709,
+0x3fe3078f,
+0x3fbf0783,
+0x3f200183,
+0x3fbe0183,
+0x3fe30187,
+0x18008387,
+0x4205c002,
+0x3ac30404,
+0x1cffc489,
+0x00200000,
+0x18008403,
+0x38830402,
+0x4cffc486,
+0x3ac28185,
+0xb0408584,
+0x28830402,
+0x1c020408,
+0x38828182,
+0xb0408385,
+0x1802c387,
+0x28828182,
+0x217ff886,
+0x04000582,
+0x32800007,
+0x21a00802,
+0x3fbf0705,
+0x3f200285,
+0x3fbe0285,
+0x3fe30285,
+0x21a00885,
+0x04000603,
+0x21a00903,
+0x40803c02,
+0x21a00982,
+0x04000386,
+0x21a00a06,
+0x40801202,
+0x21a00a82,
+0x73000003,
+0x24200683,
+0x01a00404,
+0x00200000,
+0x34204682,
+0x3ec40683,
+0xb0408203,
+0x24204682,
+0x01a00783,
+0x00200000,
+0x3421c682,
+0x3edc0684,
+0xb0408184,
+0x2421c682,
+0x21a00806,
+0x21a00885,
+0x3fbf0784,
+0x3f200204,
+0x3fbe0204,
+0x3fe30204,
+0x21a00904,
+0x40804002,
+0x21a00982,
+0x21a00a06,
+0x40805a02,
+0x21a00a82,
+0x04000683,
+0x21a00803,
+0x21a00885,
+0x21a00904,
+0x40848002,
+0x21a00982,
+0x21a00a06,
+0x40801002,
+0x21a00a82,
+0x21a00a06,
+0x40806602,
+0x00200000,
+0x35800009,
+0x21a00a82,
+0x40800083,
+0x21a00b83,
+0x01a00c02,
+0x01a00d83,
+0x00003ffb,
+0x40800003,
+0x4020007f,
+0x35000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/spu_utils.h b/arch/powerpc/platforms/cell/spufs/spu_utils.h
new file mode 100644
index 0000000000..4fc1ebb45e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_utils.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * utils.h: Utilities for SPU-side of the context switch operation.
+ *
+ * (C) Copyright IBM 2005
+ */
+
+#ifndef _SPU_CONTEXT_UTILS_H_
+#define _SPU_CONTEXT_UTILS_H_
+
+/*
+ * 64-bit safe EA.
+ */
+typedef union {
+	unsigned long long ull;
+	unsigned int ui[2];
+} addr64;
+
+/*
+ * 128-bit register template.
+ */
+typedef union {
+	unsigned int slot[4];
+	vector unsigned int v;
+} spu_reg128v;
+
+/*
+ * DMA list structure.
+ */
+struct dma_list_elem {
+	unsigned int size;
+	unsigned int ea_low;
+};
+
+/*
+ * Declare storage for 8-byte aligned DMA list.
+ */
+struct dma_list_elem dma_list[15] __attribute__ ((aligned(8)));
+
+/*
+ * External definition for storage
+ * declared in crt0.
+ */
+extern spu_reg128v regs_spill[NR_SPU_SPILL_REGS];
+
+/*
+ * Compute LSCSA byte offset for a given field.
+ */
+static struct spu_lscsa *dummy = (struct spu_lscsa *)0;
+#define LSCSA_BYTE_OFFSET(_field)  \
+	((char *)(&(dummy->_field)) - (char *)(&(dummy->gprs[0].slot[0])))
+#define LSCSA_QW_OFFSET(_field)  (LSCSA_BYTE_OFFSET(_field) >> 4)
+
+static inline void set_event_mask(void)
+{
+	unsigned int event_mask = 0;
+
+	/* Save, Step 4:
+	 * Restore, Step 1:
+	 *    Set the SPU_RdEventMsk channel to zero to mask
+	 *    all events.
+	 */
+	spu_writech(SPU_WrEventMask, event_mask);
+}
+
+static inline void set_tag_mask(void)
+{
+	unsigned int tag_mask = 1;
+
+	/* Save, Step 5:
+	 * Restore, Step 2:
+	 *    Set the SPU_WrTagMsk channel to '01' to unmask
+	 *    only tag group 0.
+	 */
+	spu_writech(MFC_WrTagMask, tag_mask);
+}
+
+static inline void build_dma_list(addr64 lscsa_ea)
+{
+	unsigned int ea_low;
+	int i;
+
+	/* Save, Step 6:
+	 * Restore, Step 3:
+	 *    Update the effective address for the CSA in the
+	 *    pre-canned DMA-list in local storage.
+	 */
+	ea_low = lscsa_ea.ui[1];
+	ea_low += LSCSA_BYTE_OFFSET(ls[16384]);
+
+	for (i = 0; i < 15; i++, ea_low += 16384) {
+		dma_list[i].size = 16384;
+		dma_list[i].ea_low = ea_low;
+	}
+}
+
+static inline void enqueue_putllc(addr64 lscsa_ea)
+{
+	unsigned int ls = 0;
+	unsigned int size = 128;
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0xB4;	/* PUTLLC */
+
+	/* Save, Step 12:
+	 * Restore, Step 7:
+	 *    Send a PUTLLC (tag 0) command to the MFC using
+	 *    an effective address in the CSA in order to
+	 *    remove any possible lock-line reservation.
+	 */
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void set_tag_update(void)
+{
+	unsigned int update_any = 1;
+
+	/* Save, Step 15:
+	 * Restore, Step 8:
+	 *    Write the MFC_TagUpdate channel with '01'.
+	 */
+	spu_writech(MFC_WrTagUpdate, update_any);
+}
+
+static inline void read_tag_status(void)
+{
+	/* Save, Step 16:
+	 * Restore, Step 9:
+	 *    Read the MFC_TagStat channel data.
+	 */
+	spu_readch(MFC_RdTagStat);
+}
+
+static inline void read_llar_status(void)
+{
+	/* Save, Step 17:
+	 * Restore, Step 10:
+	 *    Read the MFC_AtomicStat channel data.
+	 */
+	spu_readch(MFC_RdAtomicStat);
+}
+
+#endif				/* _SPU_CONTEXT_UTILS_H_ */
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
new file mode 100644
index 0000000000..84958487f6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -0,0 +1,356 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+#ifndef SPUFS_H
+#define SPUFS_H
+
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/cpumask.h>
+#include <linux/sched/signal.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_info.h>
+
+#define SPUFS_PS_MAP_SIZE	0x20000
+#define SPUFS_MFC_MAP_SIZE	0x1000
+#define SPUFS_CNTL_MAP_SIZE	0x1000
+#define SPUFS_SIGNAL_MAP_SIZE	PAGE_SIZE
+#define SPUFS_MSS_MAP_SIZE	0x1000
+
+/* The magic number for our file system */
+enum {
+	SPUFS_MAGIC = 0x23c9b64e,
+};
+
+struct spu_context_ops;
+struct spu_gang;
+
+/* ctx->sched_flags */
+enum {
+	SPU_SCHED_NOTIFY_ACTIVE,
+	SPU_SCHED_WAS_ACTIVE,	/* was active upon spu_acquire_saved()  */
+	SPU_SCHED_SPU_RUN,	/* context is within spu_run */
+};
+
+enum {
+	SWITCH_LOG_BUFSIZE = 4096,
+};
+
+enum {
+	SWITCH_LOG_START,
+	SWITCH_LOG_STOP,
+	SWITCH_LOG_EXIT,
+};
+
+struct switch_log {
+	wait_queue_head_t	wait;
+	unsigned long		head;
+	unsigned long		tail;
+	struct switch_log_entry {
+		struct timespec64 tstamp;
+		s32		spu_id;
+		u32		type;
+		u32		val;
+		u64		timebase;
+	} log[];
+};
+
+struct spu_context {
+	struct spu *spu;		  /* pointer to a physical SPU */
+	struct spu_state csa;		  /* SPU context save area. */
+	spinlock_t mmio_lock;		  /* protects mmio access */
+	struct address_space *local_store; /* local store mapping.  */
+	struct address_space *mfc;	   /* 'mfc' area mappings. */
+	struct address_space *cntl;	   /* 'control' area mappings. */
+	struct address_space *signal1;	   /* 'signal1' area mappings. */
+	struct address_space *signal2;	   /* 'signal2' area mappings. */
+	struct address_space *mss;	   /* 'mss' area mappings. */
+	struct address_space *psmap;	   /* 'psmap' area mappings. */
+	struct mutex mapping_lock;
+	u64 object_id;		   /* user space pointer for GNU Debugger */
+
+	enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state;
+	struct mutex state_mutex;
+	struct mutex run_mutex;
+
+	struct mm_struct *owner;
+
+	struct kref kref;
+	wait_queue_head_t ibox_wq;
+	wait_queue_head_t wbox_wq;
+	wait_queue_head_t stop_wq;
+	wait_queue_head_t mfc_wq;
+	wait_queue_head_t run_wq;
+	u32 tagwait;
+	struct spu_context_ops *ops;
+	struct work_struct reap_work;
+	unsigned long flags;
+	unsigned long event_return;
+
+	struct list_head gang_list;
+	struct spu_gang *gang;
+	struct kref *prof_priv_kref;
+	void ( * prof_priv_release) (struct kref *kref);
+
+	/* owner thread */
+	pid_t tid;
+
+	/* scheduler fields */
+	struct list_head rq;
+	unsigned int time_slice;
+	unsigned long sched_flags;
+	cpumask_t cpus_allowed;
+	int policy;
+	int prio;
+	int last_ran;
+
+	/* statistics */
+	struct {
+		/* updates protected by ctx->state_mutex */
+		enum spu_utilization_state util_state;
+		unsigned long long tstamp;	/* time of last state switch */
+		unsigned long long times[SPU_UTIL_MAX];
+		unsigned long long vol_ctx_switch;
+		unsigned long long invol_ctx_switch;
+		unsigned long long min_flt;
+		unsigned long long maj_flt;
+		unsigned long long hash_flt;
+		unsigned long long slb_flt;
+		unsigned long long slb_flt_base; /* # at last ctx switch */
+		unsigned long long class2_intr;
+		unsigned long long class2_intr_base; /* # at last ctx switch */
+		unsigned long long libassist;
+	} stats;
+
+	/* context switch log */
+	struct switch_log *switch_log;
+
+	struct list_head aff_list;
+	int aff_head;
+	int aff_offset;
+};
+
+struct spu_gang {
+	struct list_head list;
+	struct mutex mutex;
+	struct kref kref;
+	int contexts;
+
+	struct spu_context *aff_ref_ctx;
+	struct list_head aff_list_head;
+	struct mutex aff_mutex;
+	int aff_flags;
+	struct spu *aff_ref_spu;
+	atomic_t aff_sched_count;
+};
+
+/* Flag bits for spu_gang aff_flags */
+#define AFF_OFFSETS_SET		1
+#define AFF_MERGED		2
+
+struct mfc_dma_command {
+	int32_t pad;	/* reserved */
+	uint32_t lsa;	/* local storage address */
+	uint64_t ea;	/* effective address */
+	uint16_t size;	/* transfer size */
+	uint16_t tag;	/* command tag */
+	uint16_t class;	/* class ID */
+	uint16_t cmd;	/* command opcode */
+};
+
+
+/* SPU context query/set operations. */
+struct spu_context_ops {
+	int (*mbox_read) (struct spu_context * ctx, u32 * data);
+	 u32(*mbox_stat_read) (struct spu_context * ctx);
+	__poll_t (*mbox_stat_poll)(struct spu_context *ctx, __poll_t events);
+	int (*ibox_read) (struct spu_context * ctx, u32 * data);
+	int (*wbox_write) (struct spu_context * ctx, u32 data);
+	 u32(*signal1_read) (struct spu_context * ctx);
+	void (*signal1_write) (struct spu_context * ctx, u32 data);
+	 u32(*signal2_read) (struct spu_context * ctx);
+	void (*signal2_write) (struct spu_context * ctx, u32 data);
+	void (*signal1_type_set) (struct spu_context * ctx, u64 val);
+	 u64(*signal1_type_get) (struct spu_context * ctx);
+	void (*signal2_type_set) (struct spu_context * ctx, u64 val);
+	 u64(*signal2_type_get) (struct spu_context * ctx);
+	 u32(*npc_read) (struct spu_context * ctx);
+	void (*npc_write) (struct spu_context * ctx, u32 data);
+	 u32(*status_read) (struct spu_context * ctx);
+	char*(*get_ls) (struct spu_context * ctx);
+	void (*privcntl_write) (struct spu_context *ctx, u64 data);
+	 u32 (*runcntl_read) (struct spu_context * ctx);
+	void (*runcntl_write) (struct spu_context * ctx, u32 data);
+	void (*runcntl_stop) (struct spu_context * ctx);
+	void (*master_start) (struct spu_context * ctx);
+	void (*master_stop) (struct spu_context * ctx);
+	int (*set_mfc_query)(struct spu_context * ctx, u32 mask, u32 mode);
+	u32 (*read_mfc_tagstatus)(struct spu_context * ctx);
+	u32 (*get_mfc_free_elements)(struct spu_context *ctx);
+	int (*send_mfc_command)(struct spu_context * ctx,
+				struct mfc_dma_command * cmd);
+	void (*dma_info_read) (struct spu_context * ctx,
+			       struct spu_dma_info * info);
+	void (*proxydma_info_read) (struct spu_context * ctx,
+				    struct spu_proxydma_info * info);
+	void (*restart_dma)(struct spu_context *ctx);
+};
+
+extern struct spu_context_ops spu_hw_ops;
+extern struct spu_context_ops spu_backing_ops;
+
+struct spufs_inode_info {
+	struct spu_context *i_ctx;
+	struct spu_gang *i_gang;
+	struct inode vfs_inode;
+	int i_openers;
+};
+#define SPUFS_I(inode) \
+	container_of(inode, struct spufs_inode_info, vfs_inode)
+
+struct spufs_tree_descr {
+	const char *name;
+	const struct file_operations *ops;
+	umode_t mode;
+	size_t size;
+};
+
+extern const struct spufs_tree_descr spufs_dir_contents[];
+extern const struct spufs_tree_descr spufs_dir_nosched_contents[];
+extern const struct spufs_tree_descr spufs_dir_debug_contents[];
+
+/* system call implementation */
+extern struct spufs_calls spufs_calls;
+struct coredump_params;
+long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
+long spufs_create(const struct path *nd, struct dentry *dentry, unsigned int flags,
+			umode_t mode, struct file *filp);
+/* ELF coredump callbacks for writing SPU ELF notes */
+extern int spufs_coredump_extra_notes_size(void);
+extern int spufs_coredump_extra_notes_write(struct coredump_params *cprm);
+
+extern const struct file_operations spufs_context_fops;
+
+/* gang management */
+struct spu_gang *alloc_spu_gang(void);
+struct spu_gang *get_spu_gang(struct spu_gang *gang);
+int put_spu_gang(struct spu_gang *gang);
+void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx);
+void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
+
+/* fault handling */
+int spufs_handle_class1(struct spu_context *ctx);
+int spufs_handle_class0(struct spu_context *ctx);
+
+/* affinity */
+struct spu *affinity_check(struct spu_context *ctx);
+
+/* context management */
+extern atomic_t nr_spu_contexts;
+static inline int __must_check spu_acquire(struct spu_context *ctx)
+{
+	return mutex_lock_interruptible(&ctx->state_mutex);
+}
+
+static inline void spu_release(struct spu_context *ctx)
+{
+	mutex_unlock(&ctx->state_mutex);
+}
+
+struct spu_context * alloc_spu_context(struct spu_gang *gang);
+void destroy_spu_context(struct kref *kref);
+struct spu_context * get_spu_context(struct spu_context *ctx);
+int put_spu_context(struct spu_context *ctx);
+void spu_unmap_mappings(struct spu_context *ctx);
+
+void spu_forget(struct spu_context *ctx);
+int __must_check spu_acquire_saved(struct spu_context *ctx);
+void spu_release_saved(struct spu_context *ctx);
+
+int spu_stopped(struct spu_context *ctx, u32 * stat);
+void spu_del_from_rq(struct spu_context *ctx);
+int spu_activate(struct spu_context *ctx, unsigned long flags);
+void spu_deactivate(struct spu_context *ctx);
+void spu_yield(struct spu_context *ctx);
+void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
+		u32 type, u32 val);
+void spu_set_timeslice(struct spu_context *ctx);
+void spu_update_sched_info(struct spu_context *ctx);
+void __spu_update_sched_info(struct spu_context *ctx);
+int __init spu_sched_init(void);
+void spu_sched_exit(void);
+
+extern char *isolated_loader;
+
+/*
+ * spufs_wait
+ *	Same as wait_event_interruptible(), except that here
+ *	we need to call spu_release(ctx) before sleeping, and
+ *	then spu_acquire(ctx) when awoken.
+ *
+ * 	Returns with state_mutex re-acquired when successful or
+ * 	with -ERESTARTSYS and the state_mutex dropped when interrupted.
+ */
+
+#define spufs_wait(wq, condition)					\
+({									\
+	int __ret = 0;							\
+	DEFINE_WAIT(__wait);						\
+	for (;;) {							\
+		prepare_to_wait(&(wq), &__wait, TASK_INTERRUPTIBLE);	\
+		if (condition)						\
+			break;						\
+		spu_release(ctx);					\
+		if (signal_pending(current)) {				\
+			__ret = -ERESTARTSYS;				\
+			break;						\
+		}							\
+		schedule();						\
+		__ret = spu_acquire(ctx);				\
+		if (__ret)						\
+			break;						\
+	}								\
+	finish_wait(&(wq), &__wait);					\
+	__ret;								\
+})
+
+size_t spu_wbox_write(struct spu_context *ctx, u32 data);
+size_t spu_ibox_read(struct spu_context *ctx, u32 *data);
+
+/* irq callback funcs. */
+void spufs_ibox_callback(struct spu *spu);
+void spufs_wbox_callback(struct spu *spu);
+void spufs_stop_callback(struct spu *spu, int irq);
+void spufs_mfc_callback(struct spu *spu);
+void spufs_dma_callback(struct spu *spu, int type);
+
+struct spufs_coredump_reader {
+	char *name;
+	ssize_t (*dump)(struct spu_context *ctx, struct coredump_params *cprm);
+	u64 (*get)(struct spu_context *ctx);
+	size_t size;
+};
+extern const struct spufs_coredump_reader spufs_coredump_read[];
+
+extern int spu_init_csa(struct spu_state *csa);
+extern void spu_fini_csa(struct spu_state *csa);
+extern int spu_save(struct spu_state *prev, struct spu *spu);
+extern int spu_restore(struct spu_state *new, struct spu *spu);
+extern int spu_switch(struct spu_state *prev, struct spu_state *new,
+		      struct spu *spu);
+extern int spu_alloc_lscsa(struct spu_state *csa);
+extern void spu_free_lscsa(struct spu_state *csa);
+
+extern void spuctx_switch_state(struct spu_context *ctx,
+		enum spu_utilization_state new_state);
+
+#endif
diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.h b/arch/powerpc/platforms/cell/spufs/sputrace.h
new file mode 100644
index 0000000000..1def11e911
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_SPUFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SPUFS_H
+
+#include <linux/tracepoint.h>
+#include <linux/stringify.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM spufs
+
+TRACE_EVENT(spufs_context,
+	TP_PROTO(struct spu_context *ctx, struct spu *spu, const char *name),
+	TP_ARGS(ctx, spu, name),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(int, owner_tid)
+		__field(int, number)
+	),
+
+	TP_fast_assign(
+		__entry->name = name;
+		__entry->owner_tid = ctx->tid;
+		__entry->number = spu ? spu->number : -1;
+	),
+
+	TP_printk("%s (ctxthread = %d, spu = %d)",
+		__entry->name, __entry->owner_tid, __entry->number)
+);
+
+#define spu_context_trace(name, ctx, spu) \
+	trace_spufs_context(ctx, spu, __stringify(name))
+#define spu_context_nospu_trace(name, ctx) \
+	trace_spufs_context(ctx, NULL, __stringify(name))
+
+#endif /* _TRACE_SPUFS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE sputrace
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
new file mode 100644
index 0000000000..b41e81b22f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -0,0 +1,2206 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * spu_switch.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * Host-side part of SPU context switch sequence outlined in
+ * Synergistic Processor Element, Book IV.
+ *
+ * A fully premptive switch of an SPE is very expensive in terms
+ * of time and system resources.  SPE Book IV indicates that SPE
+ * allocation should follow a "serially reusable device" model,
+ * in which the SPE is assigned a task until it completes.  When
+ * this is not possible, this sequence may be used to premptively
+ * save, and then later (optionally) restore the context of a
+ * program executing on an SPE.
+ */
+
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu_context.h>
+
+#include "spufs.h"
+
+#include "spu_save_dump.h"
+#include "spu_restore_dump.h"
+
+#if 0
+#define POLL_WHILE_TRUE(_c) {				\
+    do {						\
+    } while (_c);					\
+  }
+#else
+#define RELAX_SPIN_COUNT				1000
+#define POLL_WHILE_TRUE(_c) {				\
+    do {						\
+	int _i;						\
+	for (_i=0; _i<RELAX_SPIN_COUNT && (_c); _i++) { \
+	    cpu_relax();				\
+	}						\
+	if (unlikely(_c)) yield();			\
+	else break;					\
+    } while (_c);					\
+  }
+#endif				/* debug */
+
+#define POLL_WHILE_FALSE(_c)	POLL_WHILE_TRUE(!(_c))
+
+static inline void acquire_spu_lock(struct spu *spu)
+{
+	/* Save, Step 1:
+	 * Restore, Step 1:
+	 *    Acquire SPU-specific mutual exclusion lock.
+	 *    TBD.
+	 */
+}
+
+static inline void release_spu_lock(struct spu *spu)
+{
+	/* Restore, Step 76:
+	 *    Release SPU-specific mutual exclusion lock.
+	 *    TBD.
+	 */
+}
+
+static inline int check_spu_isolate(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 isolate_state;
+
+	/* Save, Step 2:
+	 * Save, Step 6:
+	 *     If SPU_Status[E,L,IS] any field is '1', this
+	 *     SPU is in isolate state and cannot be context
+	 *     saved at this time.
+	 */
+	isolate_state = SPU_STATUS_ISOLATED_STATE |
+	    SPU_STATUS_ISOLATED_LOAD_STATUS | SPU_STATUS_ISOLATED_EXIT_STATUS;
+	return (in_be32(&prob->spu_status_R) & isolate_state) ? 1 : 0;
+}
+
+static inline void disable_interrupts(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 3:
+	 * Restore, Step 2:
+	 *     Save INT_Mask_class0 in CSA.
+	 *     Write INT_MASK_class0 with value of 0.
+	 *     Save INT_Mask_class1 in CSA.
+	 *     Write INT_MASK_class1 with value of 0.
+	 *     Save INT_Mask_class2 in CSA.
+	 *     Write INT_MASK_class2 with value of 0.
+	 *     Synchronize all three interrupts to be sure
+	 *     we no longer execute a handler on another CPU.
+	 */
+	spin_lock_irq(&spu->register_lock);
+	if (csa) {
+		csa->priv1.int_mask_class0_RW = spu_int_mask_get(spu, 0);
+		csa->priv1.int_mask_class1_RW = spu_int_mask_get(spu, 1);
+		csa->priv1.int_mask_class2_RW = spu_int_mask_get(spu, 2);
+	}
+	spu_int_mask_set(spu, 0, 0ul);
+	spu_int_mask_set(spu, 1, 0ul);
+	spu_int_mask_set(spu, 2, 0ul);
+	eieio();
+	spin_unlock_irq(&spu->register_lock);
+
+	/*
+	 * This flag needs to be set before calling synchronize_irq so
+	 * that the update will be visible to the relevant handlers
+	 * via a simple load.
+	 */
+	set_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
+	clear_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags);
+	synchronize_irq(spu->irqs[0]);
+	synchronize_irq(spu->irqs[1]);
+	synchronize_irq(spu->irqs[2]);
+}
+
+static inline void set_watchdog_timer(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 4:
+	 * Restore, Step 25.
+	 *    Set a software watchdog timer, which specifies the
+	 *    maximum allowable time for a context save sequence.
+	 *
+	 *    For present, this implementation will not set a global
+	 *    watchdog timer, as virtualization & variable system load
+	 *    may cause unpredictable execution times.
+	 */
+}
+
+static inline void inhibit_user_access(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 5:
+	 * Restore, Step 3:
+	 *     Inhibit user-space access (if provided) to this
+	 *     SPU by unmapping the virtual pages assigned to
+	 *     the SPU memory-mapped I/O (MMIO) for problem
+	 *     state. TBD.
+	 */
+}
+
+static inline void set_switch_pending(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 7:
+	 * Restore, Step 5:
+	 *     Set a software context switch pending flag.
+	 *     Done above in Step 3 - disable_interrupts().
+	 */
+}
+
+static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 8:
+	 *     Suspend DMA and save MFC_CNTL.
+	 */
+	switch (in_be64(&priv2->mfc_control_RW) &
+	       MFC_CNTL_SUSPEND_DMA_STATUS_MASK) {
+	case MFC_CNTL_SUSPEND_IN_PROGRESS:
+		POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+				  MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
+				 MFC_CNTL_SUSPEND_COMPLETE);
+		fallthrough;
+	case MFC_CNTL_SUSPEND_COMPLETE:
+		if (csa)
+			csa->priv2.mfc_control_RW =
+				in_be64(&priv2->mfc_control_RW) |
+				MFC_CNTL_SUSPEND_DMA_QUEUE;
+		break;
+	case MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION:
+		out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
+		POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+				  MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
+				 MFC_CNTL_SUSPEND_COMPLETE);
+		if (csa)
+			csa->priv2.mfc_control_RW =
+				in_be64(&priv2->mfc_control_RW) &
+				~MFC_CNTL_SUSPEND_DMA_QUEUE &
+				~MFC_CNTL_SUSPEND_MASK;
+		break;
+	}
+}
+
+static inline void save_spu_runcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 9:
+	 *     Save SPU_Runcntl in the CSA.  This value contains
+	 *     the "Application Desired State".
+	 */
+	csa->prob.spu_runcntl_RW = in_be32(&prob->spu_runcntl_RW);
+}
+
+static inline void save_mfc_sr1(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 10:
+	 *     Save MFC_SR1 in the CSA.
+	 */
+	csa->priv1.mfc_sr1_RW = spu_mfc_sr1_get(spu);
+}
+
+static inline void save_spu_status(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 11:
+	 *     Read SPU_Status[R], and save to CSA.
+	 */
+	if ((in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) == 0) {
+		csa->prob.spu_status_R = in_be32(&prob->spu_status_R);
+	} else {
+		u32 stopped;
+
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+		eieio();
+		POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+				SPU_STATUS_RUNNING);
+		stopped =
+		    SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
+		    SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+		if ((in_be32(&prob->spu_status_R) & stopped) == 0)
+			csa->prob.spu_status_R = SPU_STATUS_RUNNING;
+		else
+			csa->prob.spu_status_R = in_be32(&prob->spu_status_R);
+	}
+}
+
+static inline void save_mfc_stopped_status(struct spu_state *csa,
+		struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	const u64 mask = MFC_CNTL_DECREMENTER_RUNNING |
+			MFC_CNTL_DMA_QUEUES_EMPTY;
+
+	/* Save, Step 12:
+	 *     Read MFC_CNTL[Ds].  Update saved copy of
+	 *     CSA.MFC_CNTL[Ds].
+	 *
+	 * update: do the same with MFC_CNTL[Q].
+	 */
+	csa->priv2.mfc_control_RW &= ~mask;
+	csa->priv2.mfc_control_RW |= in_be64(&priv2->mfc_control_RW) & mask;
+}
+
+static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 13:
+	 *     Write MFC_CNTL[Dh] set to a '1' to halt
+	 *     the decrementer.
+	 */
+	out_be64(&priv2->mfc_control_RW,
+		 MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK);
+	eieio();
+}
+
+static inline void save_timebase(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 14:
+	 *    Read PPE Timebase High and Timebase low registers
+	 *    and save in CSA.  TBD.
+	 */
+	csa->suspend_time = get_cycles();
+}
+
+static inline void remove_other_spu_access(struct spu_state *csa,
+					   struct spu *spu)
+{
+	/* Save, Step 15:
+	 *     Remove other SPU access to this SPU by unmapping
+	 *     this SPU's pages from their address space.  TBD.
+	 */
+}
+
+static inline void do_mfc_mssync(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 16:
+	 * Restore, Step 11.
+	 *     Write SPU_MSSync register. Poll SPU_MSSync[P]
+	 *     for a value of 0.
+	 */
+	out_be64(&prob->spc_mssync_RW, 1UL);
+	POLL_WHILE_TRUE(in_be64(&prob->spc_mssync_RW) & MS_SYNC_PENDING);
+}
+
+static inline void issue_mfc_tlbie(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 17:
+	 * Restore, Step 12.
+	 * Restore, Step 48.
+	 *     Write TLB_Invalidate_Entry[IS,VPN,L,Lp]=0 register.
+	 *     Then issue a PPE sync instruction.
+	 */
+	spu_tlb_invalidate(spu);
+	mb();
+}
+
+static inline void handle_pending_interrupts(struct spu_state *csa,
+					     struct spu *spu)
+{
+	/* Save, Step 18:
+	 *     Handle any pending interrupts from this SPU
+	 *     here.  This is OS or hypervisor specific.  One
+	 *     option is to re-enable interrupts to handle any
+	 *     pending interrupts, with the interrupt handlers
+	 *     recognizing the software Context Switch Pending
+	 *     flag, to ensure the SPU execution or MFC command
+	 *     queue is not restarted.  TBD.
+	 */
+}
+
+static inline void save_mfc_queues(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int i;
+
+	/* Save, Step 19:
+	 *     If MFC_Cntl[Se]=0 then save
+	 *     MFC command queues.
+	 */
+	if ((in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DMA_QUEUES_EMPTY) == 0) {
+		for (i = 0; i < 8; i++) {
+			csa->priv2.puq[i].mfc_cq_data0_RW =
+			    in_be64(&priv2->puq[i].mfc_cq_data0_RW);
+			csa->priv2.puq[i].mfc_cq_data1_RW =
+			    in_be64(&priv2->puq[i].mfc_cq_data1_RW);
+			csa->priv2.puq[i].mfc_cq_data2_RW =
+			    in_be64(&priv2->puq[i].mfc_cq_data2_RW);
+			csa->priv2.puq[i].mfc_cq_data3_RW =
+			    in_be64(&priv2->puq[i].mfc_cq_data3_RW);
+		}
+		for (i = 0; i < 16; i++) {
+			csa->priv2.spuq[i].mfc_cq_data0_RW =
+			    in_be64(&priv2->spuq[i].mfc_cq_data0_RW);
+			csa->priv2.spuq[i].mfc_cq_data1_RW =
+			    in_be64(&priv2->spuq[i].mfc_cq_data1_RW);
+			csa->priv2.spuq[i].mfc_cq_data2_RW =
+			    in_be64(&priv2->spuq[i].mfc_cq_data2_RW);
+			csa->priv2.spuq[i].mfc_cq_data3_RW =
+			    in_be64(&priv2->spuq[i].mfc_cq_data3_RW);
+		}
+	}
+}
+
+static inline void save_ppu_querymask(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 20:
+	 *     Save the PPU_QueryMask register
+	 *     in the CSA.
+	 */
+	csa->prob.dma_querymask_RW = in_be32(&prob->dma_querymask_RW);
+}
+
+static inline void save_ppu_querytype(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 21:
+	 *     Save the PPU_QueryType register
+	 *     in the CSA.
+	 */
+	csa->prob.dma_querytype_RW = in_be32(&prob->dma_querytype_RW);
+}
+
+static inline void save_ppu_tagstatus(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save the Prxy_TagStatus register in the CSA.
+	 *
+	 * It is unnecessary to restore dma_tagstatus_R, however,
+	 * dma_tagstatus_R in the CSA is accessed via backing_ops, so
+	 * we must save it.
+	 */
+	csa->prob.dma_tagstatus_R = in_be32(&prob->dma_tagstatus_R);
+}
+
+static inline void save_mfc_csr_tsq(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 22:
+	 *     Save the MFC_CSR_TSQ register
+	 *     in the LSCSA.
+	 */
+	csa->priv2.spu_tag_status_query_RW =
+	    in_be64(&priv2->spu_tag_status_query_RW);
+}
+
+static inline void save_mfc_csr_cmd(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 23:
+	 *     Save the MFC_CSR_CMD1 and MFC_CSR_CMD2
+	 *     registers in the CSA.
+	 */
+	csa->priv2.spu_cmd_buf1_RW = in_be64(&priv2->spu_cmd_buf1_RW);
+	csa->priv2.spu_cmd_buf2_RW = in_be64(&priv2->spu_cmd_buf2_RW);
+}
+
+static inline void save_mfc_csr_ato(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 24:
+	 *     Save the MFC_CSR_ATO register in
+	 *     the CSA.
+	 */
+	csa->priv2.spu_atomic_status_RW = in_be64(&priv2->spu_atomic_status_RW);
+}
+
+static inline void save_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 25:
+	 *     Save the MFC_TCLASS_ID register in
+	 *     the CSA.
+	 */
+	csa->priv1.mfc_tclass_id_RW = spu_mfc_tclass_id_get(spu);
+}
+
+static inline void set_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 26:
+	 * Restore, Step 23.
+	 *     Write the MFC_TCLASS_ID register with
+	 *     the value 0x10000000.
+	 */
+	spu_mfc_tclass_id_set(spu, 0x10000000);
+	eieio();
+}
+
+static inline void purge_mfc_queue(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 27:
+	 * Restore, Step 14.
+	 *     Write MFC_CNTL[Pc]=1 (purge queue).
+	 */
+	out_be64(&priv2->mfc_control_RW,
+			MFC_CNTL_PURGE_DMA_REQUEST |
+			MFC_CNTL_SUSPEND_MASK);
+	eieio();
+}
+
+static inline void wait_purge_complete(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 28:
+	 *     Poll MFC_CNTL[Ps] until value '11' is read
+	 *     (purge complete).
+	 */
+	POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+			 MFC_CNTL_PURGE_DMA_STATUS_MASK) ==
+			 MFC_CNTL_PURGE_DMA_COMPLETE);
+}
+
+static inline void setup_mfc_sr1(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 30:
+	 * Restore, Step 18:
+	 *     Write MFC_SR1 with MFC_SR1[D=0,S=1] and
+	 *     MFC_SR1[TL,R,Pr,T] set correctly for the
+	 *     OS specific environment.
+	 *
+	 *     Implementation note: The SPU-side code
+	 *     for save/restore is privileged, so the
+	 *     MFC_SR1[Pr] bit is not set.
+	 *
+	 */
+	spu_mfc_sr1_set(spu, (MFC_STATE1_MASTER_RUN_CONTROL_MASK |
+			      MFC_STATE1_RELOCATE_MASK |
+			      MFC_STATE1_BUS_TLBIE_MASK));
+}
+
+static inline void save_spu_npc(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 31:
+	 *     Save SPU_NPC in the CSA.
+	 */
+	csa->prob.spu_npc_RW = in_be32(&prob->spu_npc_RW);
+}
+
+static inline void save_spu_privcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 32:
+	 *     Save SPU_PrivCntl in the CSA.
+	 */
+	csa->priv2.spu_privcntl_RW = in_be64(&priv2->spu_privcntl_RW);
+}
+
+static inline void reset_spu_privcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 33:
+	 * Restore, Step 16:
+	 *     Write SPU_PrivCntl[S,Le,A] fields reset to 0.
+	 */
+	out_be64(&priv2->spu_privcntl_RW, 0UL);
+	eieio();
+}
+
+static inline void save_spu_lslr(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 34:
+	 *     Save SPU_LSLR in the CSA.
+	 */
+	csa->priv2.spu_lslr_RW = in_be64(&priv2->spu_lslr_RW);
+}
+
+static inline void reset_spu_lslr(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 35:
+	 * Restore, Step 17.
+	 *     Reset SPU_LSLR.
+	 */
+	out_be64(&priv2->spu_lslr_RW, LS_ADDR_MASK);
+	eieio();
+}
+
+static inline void save_spu_cfg(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 36:
+	 *     Save SPU_Cfg in the CSA.
+	 */
+	csa->priv2.spu_cfg_RW = in_be64(&priv2->spu_cfg_RW);
+}
+
+static inline void save_pm_trace(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 37:
+	 *     Save PM_Trace_Tag_Wait_Mask in the CSA.
+	 *     Not performed by this implementation.
+	 */
+}
+
+static inline void save_mfc_rag(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 38:
+	 *     Save RA_GROUP_ID register and the
+	 *     RA_ENABLE reigster in the CSA.
+	 */
+	csa->priv1.resource_allocation_groupID_RW =
+		spu_resource_allocation_groupID_get(spu);
+	csa->priv1.resource_allocation_enable_RW =
+		spu_resource_allocation_enable_get(spu);
+}
+
+static inline void save_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 39:
+	 *     Save MB_Stat register in the CSA.
+	 */
+	csa->prob.mb_stat_R = in_be32(&prob->mb_stat_R);
+}
+
+static inline void save_ppu_mb(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 40:
+	 *     Save the PPU_MB register in the CSA.
+	 */
+	csa->prob.pu_mb_R = in_be32(&prob->pu_mb_R);
+}
+
+static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 41:
+	 *     Save the PPUINT_MB register in the CSA.
+	 */
+	csa->priv2.puint_mb_R = in_be64(&priv2->puint_mb_R);
+}
+
+static inline void save_ch_part1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+	int i;
+
+	/* Save, Step 42:
+	 */
+
+	/* Save CH 1, without channel count */
+	out_be64(&priv2->spu_chnlcntptr_RW, 1);
+	csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW);
+
+	/* Save the following CH: [0,3,4,24,25,27] */
+	for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		csa->spu_chnldata_RW[idx] = in_be64(&priv2->spu_chnldata_RW);
+		csa->spu_chnlcnt_RW[idx] = in_be64(&priv2->spu_chnlcnt_RW);
+		out_be64(&priv2->spu_chnldata_RW, 0UL);
+		out_be64(&priv2->spu_chnlcnt_RW, 0UL);
+		eieio();
+	}
+}
+
+static inline void save_spu_mb(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int i;
+
+	/* Save, Step 43:
+	 *     Save SPU Read Mailbox Channel.
+	 */
+	out_be64(&priv2->spu_chnlcntptr_RW, 29UL);
+	eieio();
+	csa->spu_chnlcnt_RW[29] = in_be64(&priv2->spu_chnlcnt_RW);
+	for (i = 0; i < 4; i++) {
+		csa->spu_mailbox_data[i] = in_be64(&priv2->spu_chnldata_RW);
+	}
+	out_be64(&priv2->spu_chnlcnt_RW, 0UL);
+	eieio();
+}
+
+static inline void save_mfc_cmd(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 44:
+	 *     Save MFC_CMD Channel.
+	 */
+	out_be64(&priv2->spu_chnlcntptr_RW, 21UL);
+	eieio();
+	csa->spu_chnlcnt_RW[21] = in_be64(&priv2->spu_chnlcnt_RW);
+	eieio();
+}
+
+static inline void reset_ch(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 ch_indices[4] = { 21UL, 23UL, 28UL, 30UL };
+	u64 ch_counts[4] = { 16UL, 1UL, 1UL, 1UL };
+	u64 idx;
+	int i;
+
+	/* Save, Step 45:
+	 *     Reset the following CH: [21, 23, 28, 30]
+	 */
+	for (i = 0; i < 4; i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
+		eieio();
+	}
+}
+
+static inline void resume_mfc_queue(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 46:
+	 * Restore, Step 25.
+	 *     Write MFC_CNTL[Sc]=0 (resume queue processing).
+	 */
+	out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE);
+}
+
+static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu,
+		unsigned int *code, int code_size)
+{
+	/* Save, Step 47:
+	 * Restore, Step 30.
+	 *     If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All
+	 *     register, then initialize SLB_VSID and SLB_ESID
+	 *     to provide access to SPU context save code and
+	 *     LSCSA.
+	 *
+	 *     This implementation places both the context
+	 *     switch code and LSCSA in kernel address space.
+	 *
+	 *     Further this implementation assumes that the
+	 *     MFC_SR1[R]=1 (in other words, assume that
+	 *     translation is desired by OS environment).
+	 */
+	spu_invalidate_slbs(spu);
+	spu_setup_kernel_slbs(spu, csa->lscsa, code, code_size);
+}
+
+static inline void set_switch_active(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 48:
+	 * Restore, Step 23.
+	 *     Change the software context switch pending flag
+	 *     to context switch active.  This implementation does
+	 *     not uses a switch active flag.
+	 *
+	 * Now that we have saved the mfc in the csa, we can add in the
+	 * restart command if an exception occurred.
+	 */
+	if (test_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags))
+		csa->priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND;
+	clear_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
+	mb();
+}
+
+static inline void enable_interrupts(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long class1_mask = CLASS1_ENABLE_SEGMENT_FAULT_INTR |
+	    CLASS1_ENABLE_STORAGE_FAULT_INTR;
+
+	/* Save, Step 49:
+	 * Restore, Step 22:
+	 *     Reset and then enable interrupts, as
+	 *     needed by OS.
+	 *
+	 *     This implementation enables only class1
+	 *     (translation) interrupts.
+	 */
+	spin_lock_irq(&spu->register_lock);
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+	spu_int_mask_set(spu, 0, 0ul);
+	spu_int_mask_set(spu, 1, class1_mask);
+	spu_int_mask_set(spu, 2, 0ul);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static inline int send_mfc_dma(struct spu *spu, unsigned long ea,
+			       unsigned int ls_offset, unsigned int size,
+			       unsigned int tag, unsigned int rclass,
+			       unsigned int cmd)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	union mfc_tag_size_class_cmd command;
+	unsigned int transfer_size;
+	volatile unsigned int status = 0x0;
+
+	while (size > 0) {
+		transfer_size =
+		    (size > MFC_MAX_DMA_SIZE) ? MFC_MAX_DMA_SIZE : size;
+		command.u.mfc_size = transfer_size;
+		command.u.mfc_tag = tag;
+		command.u.mfc_rclassid = rclass;
+		command.u.mfc_cmd = cmd;
+		do {
+			out_be32(&prob->mfc_lsa_W, ls_offset);
+			out_be64(&prob->mfc_ea_W, ea);
+			out_be64(&prob->mfc_union_W.all64, command.all64);
+			status =
+			    in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32);
+			if (unlikely(status & 0x2)) {
+				cpu_relax();
+			}
+		} while (status & 0x3);
+		size -= transfer_size;
+		ea += transfer_size;
+		ls_offset += transfer_size;
+	}
+	return 0;
+}
+
+static inline void save_ls_16kb(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long addr = (unsigned long)&csa->lscsa->ls[0];
+	unsigned int ls_offset = 0x0;
+	unsigned int size = 16384;
+	unsigned int tag = 0;
+	unsigned int rclass = 0;
+	unsigned int cmd = MFC_PUT_CMD;
+
+	/* Save, Step 50:
+	 *     Issue a DMA command to copy the first 16K bytes
+	 *     of local storage to the CSA.
+	 */
+	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void set_spu_npc(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 51:
+	 * Restore, Step 31.
+	 *     Write SPU_NPC[IE]=0 and SPU_NPC[LSA] to entry
+	 *     point address of context save code in local
+	 *     storage.
+	 *
+	 *     This implementation uses SPU-side save/restore
+	 *     programs with entry points at LSA of 0.
+	 */
+	out_be32(&prob->spu_npc_RW, 0);
+	eieio();
+}
+
+static inline void set_signot1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	union {
+		u64 ull;
+		u32 ui[2];
+	} addr64;
+
+	/* Save, Step 52:
+	 * Restore, Step 32:
+	 *    Write SPU_Sig_Notify_1 register with upper 32-bits
+	 *    of the CSA.LSCSA effective address.
+	 */
+	addr64.ull = (u64) csa->lscsa;
+	out_be32(&prob->signal_notify1, addr64.ui[0]);
+	eieio();
+}
+
+static inline void set_signot2(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	union {
+		u64 ull;
+		u32 ui[2];
+	} addr64;
+
+	/* Save, Step 53:
+	 * Restore, Step 33:
+	 *    Write SPU_Sig_Notify_2 register with lower 32-bits
+	 *    of the CSA.LSCSA effective address.
+	 */
+	addr64.ull = (u64) csa->lscsa;
+	out_be32(&prob->signal_notify2, addr64.ui[1]);
+	eieio();
+}
+
+static inline void send_save_code(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long addr = (unsigned long)&spu_save_code[0];
+	unsigned int ls_offset = 0x0;
+	unsigned int size = sizeof(spu_save_code);
+	unsigned int tag = 0;
+	unsigned int rclass = 0;
+	unsigned int cmd = MFC_GETFS_CMD;
+
+	/* Save, Step 54:
+	 *     Issue a DMA command to copy context save code
+	 *     to local storage and start SPU.
+	 */
+	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void set_ppu_querymask(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 55:
+	 * Restore, Step 38.
+	 *     Write PPU_QueryMask=1 (enable Tag Group 0)
+	 *     and issue eieio instruction.
+	 */
+	out_be32(&prob->dma_querymask_RW, MFC_TAGID_TO_TAGMASK(0));
+	eieio();
+}
+
+static inline void wait_tag_complete(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 mask = MFC_TAGID_TO_TAGMASK(0);
+	unsigned long flags;
+
+	/* Save, Step 56:
+	 * Restore, Step 39.
+	 * Restore, Step 39.
+	 * Restore, Step 46.
+	 *     Poll PPU_TagStatus[gn] until 01 (Tag group 0 complete)
+	 *     or write PPU_QueryType[TS]=01 and wait for Tag Group
+	 *     Complete Interrupt.  Write INT_Stat_Class0 or
+	 *     INT_Stat_Class2 with value of 'handled'.
+	 */
+	POLL_WHILE_FALSE(in_be32(&prob->dma_tagstatus_R) & mask);
+
+	local_irq_save(flags);
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+	local_irq_restore(flags);
+}
+
+static inline void wait_spu_stopped(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	unsigned long flags;
+
+	/* Save, Step 57:
+	 * Restore, Step 40.
+	 *     Poll until SPU_Status[R]=0 or wait for SPU Class 0
+	 *     or SPU Class 2 interrupt.  Write INT_Stat_class0
+	 *     or INT_Stat_class2 with value of handled.
+	 */
+	POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING);
+
+	local_irq_save(flags);
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+	local_irq_restore(flags);
+}
+
+static inline int check_save_status(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 complete;
+
+	/* Save, Step 54:
+	 *     If SPU_Status[P]=1 and SPU_Status[SC] = "success",
+	 *     context save succeeded, otherwise context save
+	 *     failed.
+	 */
+	complete = ((SPU_SAVE_COMPLETE << SPU_STOP_STATUS_SHIFT) |
+		    SPU_STATUS_STOPPED_BY_STOP);
+	return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0;
+}
+
+static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 4:
+	 *    If required, notify the "using application" that
+	 *    the SPU task has been terminated.  TBD.
+	 */
+}
+
+static inline void suspend_mfc_and_halt_decr(struct spu_state *csa,
+		struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 7:
+	 *     Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend
+	 *     the queue and halt the decrementer.
+	 */
+	out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE |
+		 MFC_CNTL_DECREMENTER_HALTED);
+	eieio();
+}
+
+static inline void wait_suspend_mfc_complete(struct spu_state *csa,
+					     struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 8:
+	 * Restore, Step 47.
+	 *     Poll MFC_CNTL[Ss] until 11 is returned.
+	 */
+	POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+			 MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
+			 MFC_CNTL_SUSPEND_COMPLETE);
+}
+
+static inline int suspend_spe(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 9:
+	 *    If SPU_Status[R]=1, stop SPU execution
+	 *    and wait for stop to complete.
+	 *
+	 *    Returns       1 if SPU_Status[R]=1 on entry.
+	 *                  0 otherwise
+	 */
+	if (in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) {
+		if (in_be32(&prob->spu_status_R) &
+		    SPU_STATUS_ISOLATED_EXIT_STATUS) {
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+		if ((in_be32(&prob->spu_status_R) &
+		     SPU_STATUS_ISOLATED_LOAD_STATUS)
+		    || (in_be32(&prob->spu_status_R) &
+			SPU_STATUS_ISOLATED_STATE)) {
+			out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+			out_be32(&prob->spu_runcntl_RW, 0x2);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+		if (in_be32(&prob->spu_status_R) &
+		    SPU_STATUS_WAITING_FOR_CHANNEL) {
+			out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+		return 1;
+	}
+	return 0;
+}
+
+static inline void clear_spu_status(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 10:
+	 *    If SPU_Status[R]=0 and SPU_Status[E,L,IS]=1,
+	 *    release SPU from isolate state.
+	 */
+	if (!(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING)) {
+		if (in_be32(&prob->spu_status_R) &
+		    SPU_STATUS_ISOLATED_EXIT_STATUS) {
+			spu_mfc_sr1_set(spu,
+					MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+			eieio();
+			out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+		if ((in_be32(&prob->spu_status_R) &
+		     SPU_STATUS_ISOLATED_LOAD_STATUS)
+		    || (in_be32(&prob->spu_status_R) &
+			SPU_STATUS_ISOLATED_STATE)) {
+			spu_mfc_sr1_set(spu,
+					MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+			eieio();
+			out_be32(&prob->spu_runcntl_RW, 0x2);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+	}
+}
+
+static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+	u64 idx;
+	int i;
+
+	/* Restore, Step 20:
+	 */
+
+	/* Reset CH 1 */
+	out_be64(&priv2->spu_chnlcntptr_RW, 1);
+	out_be64(&priv2->spu_chnldata_RW, 0UL);
+
+	/* Reset the following CH: [0,3,4,24,25,27] */
+	for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnldata_RW, 0UL);
+		out_be64(&priv2->spu_chnlcnt_RW, 0UL);
+		eieio();
+	}
+}
+
+static inline void reset_ch_part2(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 ch_indices[5] = { 21UL, 23UL, 28UL, 29UL, 30UL };
+	u64 ch_counts[5] = { 16UL, 1UL, 1UL, 0UL, 1UL };
+	u64 idx;
+	int i;
+
+	/* Restore, Step 21:
+	 *     Reset the following CH: [21, 23, 28, 29, 30]
+	 */
+	for (i = 0; i < 5; i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
+		eieio();
+	}
+}
+
+static inline void setup_spu_status_part1(struct spu_state *csa,
+					  struct spu *spu)
+{
+	u32 status_P = SPU_STATUS_STOPPED_BY_STOP;
+	u32 status_I = SPU_STATUS_INVALID_INSTR;
+	u32 status_H = SPU_STATUS_STOPPED_BY_HALT;
+	u32 status_S = SPU_STATUS_SINGLE_STEP;
+	u32 status_S_I = SPU_STATUS_SINGLE_STEP | SPU_STATUS_INVALID_INSTR;
+	u32 status_S_P = SPU_STATUS_SINGLE_STEP | SPU_STATUS_STOPPED_BY_STOP;
+	u32 status_P_H = SPU_STATUS_STOPPED_BY_HALT |SPU_STATUS_STOPPED_BY_STOP;
+	u32 status_P_I = SPU_STATUS_STOPPED_BY_STOP |SPU_STATUS_INVALID_INSTR;
+	u32 status_code;
+
+	/* Restore, Step 27:
+	 *     If the CSA.SPU_Status[I,S,H,P]=1 then add the correct
+	 *     instruction sequence to the end of the SPU based restore
+	 *     code (after the "context restored" stop and signal) to
+	 *     restore the correct SPU status.
+	 *
+	 *     NOTE: Rather than modifying the SPU executable, we
+	 *     instead add a new 'stopped_status' field to the
+	 *     LSCSA.  The SPU-side restore reads this field and
+	 *     takes the appropriate action when exiting.
+	 */
+
+	status_code =
+	    (csa->prob.spu_status_R >> SPU_STOP_STATUS_SHIFT) & 0xFFFF;
+	if ((csa->prob.spu_status_R & status_P_I) == status_P_I) {
+
+		/* SPU_Status[P,I]=1 - Illegal Instruction followed
+		 * by Stop and Signal instruction, followed by 'br -4'.
+		 *
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_I;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_P_H) == status_P_H) {
+
+		/* SPU_Status[P,H]=1 - Halt Conditional, followed
+		 * by Stop and Signal instruction, followed by
+		 * 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_H;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_S_P) == status_S_P) {
+
+		/* SPU_Status[S,P]=1 - Stop and Signal instruction
+		 * followed by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_P;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_S_I) == status_S_I) {
+
+		/* SPU_Status[S,I]=1 - Illegal instruction followed
+		 * by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_I;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_P) == status_P) {
+
+		/* SPU_Status[P]=1 - Stop and Signal instruction
+		 * followed by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_H) == status_H) {
+
+		/* SPU_Status[H]=1 - Halt Conditional, followed
+		 * by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_H;
+
+	} else if ((csa->prob.spu_status_R & status_S) == status_S) {
+
+		/* SPU_Status[S]=1 - Two nop instructions.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S;
+
+	} else if ((csa->prob.spu_status_R & status_I) == status_I) {
+
+		/* SPU_Status[I]=1 - Illegal instruction followed
+		 * by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_I;
+
+	}
+}
+
+static inline void setup_spu_status_part2(struct spu_state *csa,
+					  struct spu *spu)
+{
+	u32 mask;
+
+	/* Restore, Step 28:
+	 *     If the CSA.SPU_Status[I,S,H,P,R]=0 then
+	 *     add a 'br *' instruction to the end of
+	 *     the SPU based restore code.
+	 *
+	 *     NOTE: Rather than modifying the SPU executable, we
+	 *     instead add a new 'stopped_status' field to the
+	 *     LSCSA.  The SPU-side restore reads this field and
+	 *     takes the appropriate action when exiting.
+	 */
+	mask = SPU_STATUS_INVALID_INSTR |
+	    SPU_STATUS_SINGLE_STEP |
+	    SPU_STATUS_STOPPED_BY_HALT |
+	    SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING;
+	if (!(csa->prob.spu_status_R & mask)) {
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_R;
+	}
+}
+
+static inline void restore_mfc_rag(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 29:
+	 *     Restore RA_GROUP_ID register and the
+	 *     RA_ENABLE reigster from the CSA.
+	 */
+	spu_resource_allocation_groupID_set(spu,
+			csa->priv1.resource_allocation_groupID_RW);
+	spu_resource_allocation_enable_set(spu,
+			csa->priv1.resource_allocation_enable_RW);
+}
+
+static inline void send_restore_code(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long addr = (unsigned long)&spu_restore_code[0];
+	unsigned int ls_offset = 0x0;
+	unsigned int size = sizeof(spu_restore_code);
+	unsigned int tag = 0;
+	unsigned int rclass = 0;
+	unsigned int cmd = MFC_GETFS_CMD;
+
+	/* Restore, Step 37:
+	 *     Issue MFC DMA command to copy context
+	 *     restore code to local storage.
+	 */
+	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void setup_decr(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 34:
+	 *     If CSA.MFC_CNTL[Ds]=1 (decrementer was
+	 *     running) then adjust decrementer, set
+	 *     decrementer running status in LSCSA,
+	 *     and set decrementer "wrapped" status
+	 *     in LSCSA.
+	 */
+	if (csa->priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING) {
+		cycles_t resume_time = get_cycles();
+		cycles_t delta_time = resume_time - csa->suspend_time;
+
+		csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING;
+		if (csa->lscsa->decr.slot[0] < delta_time) {
+			csa->lscsa->decr_status.slot[0] |=
+				 SPU_DECR_STATUS_WRAPPED;
+		}
+
+		csa->lscsa->decr.slot[0] -= delta_time;
+	} else {
+		csa->lscsa->decr_status.slot[0] = 0;
+	}
+}
+
+static inline void setup_ppu_mb(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 35:
+	 *     Copy the CSA.PU_MB data into the LSCSA.
+	 */
+	csa->lscsa->ppu_mb.slot[0] = csa->prob.pu_mb_R;
+}
+
+static inline void setup_ppuint_mb(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 36:
+	 *     Copy the CSA.PUINT_MB data into the LSCSA.
+	 */
+	csa->lscsa->ppuint_mb.slot[0] = csa->priv2.puint_mb_R;
+}
+
+static inline int check_restore_status(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 complete;
+
+	/* Restore, Step 40:
+	 *     If SPU_Status[P]=1 and SPU_Status[SC] = "success",
+	 *     context restore succeeded, otherwise context restore
+	 *     failed.
+	 */
+	complete = ((SPU_RESTORE_COMPLETE << SPU_STOP_STATUS_SHIFT) |
+		    SPU_STATUS_STOPPED_BY_STOP);
+	return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0;
+}
+
+static inline void restore_spu_privcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 41:
+	 *     Restore SPU_PrivCntl from the CSA.
+	 */
+	out_be64(&priv2->spu_privcntl_RW, csa->priv2.spu_privcntl_RW);
+	eieio();
+}
+
+static inline void restore_status_part1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 mask;
+
+	/* Restore, Step 42:
+	 *     If any CSA.SPU_Status[I,S,H,P]=1, then
+	 *     restore the error or single step state.
+	 */
+	mask = SPU_STATUS_INVALID_INSTR |
+	    SPU_STATUS_SINGLE_STEP |
+	    SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+	if (csa->prob.spu_status_R & mask) {
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+		eieio();
+		POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+				SPU_STATUS_RUNNING);
+	}
+}
+
+static inline void restore_status_part2(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 mask;
+
+	/* Restore, Step 43:
+	 *     If all CSA.SPU_Status[I,S,H,P,R]=0 then write
+	 *     SPU_RunCntl[R0R1]='01', wait for SPU_Status[R]=1,
+	 *     then write '00' to SPU_RunCntl[R0R1] and wait
+	 *     for SPU_Status[R]=0.
+	 */
+	mask = SPU_STATUS_INVALID_INSTR |
+	    SPU_STATUS_SINGLE_STEP |
+	    SPU_STATUS_STOPPED_BY_HALT |
+	    SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING;
+	if (!(csa->prob.spu_status_R & mask)) {
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+		eieio();
+		POLL_WHILE_FALSE(in_be32(&prob->spu_status_R) &
+				 SPU_STATUS_RUNNING);
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+		eieio();
+		POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+				SPU_STATUS_RUNNING);
+	}
+}
+
+static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long addr = (unsigned long)&csa->lscsa->ls[0];
+	unsigned int ls_offset = 0x0;
+	unsigned int size = 16384;
+	unsigned int tag = 0;
+	unsigned int rclass = 0;
+	unsigned int cmd = MFC_GET_CMD;
+
+	/* Restore, Step 44:
+	 *     Issue a DMA command to restore the first
+	 *     16kb of local storage from CSA.
+	 */
+	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void suspend_mfc(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 47.
+	 *     Write MFC_Cntl[Sc,Sm]='1','0' to suspend
+	 *     the queue.
+	 */
+	out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
+	eieio();
+}
+
+static inline void clear_interrupts(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 49:
+	 *     Write INT_MASK_class0 with value of 0.
+	 *     Write INT_MASK_class1 with value of 0.
+	 *     Write INT_MASK_class2 with value of 0.
+	 *     Write INT_STAT_class0 with value of -1.
+	 *     Write INT_STAT_class1 with value of -1.
+	 *     Write INT_STAT_class2 with value of -1.
+	 */
+	spin_lock_irq(&spu->register_lock);
+	spu_int_mask_set(spu, 0, 0ul);
+	spu_int_mask_set(spu, 1, 0ul);
+	spu_int_mask_set(spu, 2, 0ul);
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static inline void restore_mfc_queues(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int i;
+
+	/* Restore, Step 50:
+	 *     If MFC_Cntl[Se]!=0 then restore
+	 *     MFC command queues.
+	 */
+	if ((csa->priv2.mfc_control_RW & MFC_CNTL_DMA_QUEUES_EMPTY_MASK) == 0) {
+		for (i = 0; i < 8; i++) {
+			out_be64(&priv2->puq[i].mfc_cq_data0_RW,
+				 csa->priv2.puq[i].mfc_cq_data0_RW);
+			out_be64(&priv2->puq[i].mfc_cq_data1_RW,
+				 csa->priv2.puq[i].mfc_cq_data1_RW);
+			out_be64(&priv2->puq[i].mfc_cq_data2_RW,
+				 csa->priv2.puq[i].mfc_cq_data2_RW);
+			out_be64(&priv2->puq[i].mfc_cq_data3_RW,
+				 csa->priv2.puq[i].mfc_cq_data3_RW);
+		}
+		for (i = 0; i < 16; i++) {
+			out_be64(&priv2->spuq[i].mfc_cq_data0_RW,
+				 csa->priv2.spuq[i].mfc_cq_data0_RW);
+			out_be64(&priv2->spuq[i].mfc_cq_data1_RW,
+				 csa->priv2.spuq[i].mfc_cq_data1_RW);
+			out_be64(&priv2->spuq[i].mfc_cq_data2_RW,
+				 csa->priv2.spuq[i].mfc_cq_data2_RW);
+			out_be64(&priv2->spuq[i].mfc_cq_data3_RW,
+				 csa->priv2.spuq[i].mfc_cq_data3_RW);
+		}
+	}
+	eieio();
+}
+
+static inline void restore_ppu_querymask(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 51:
+	 *     Restore the PPU_QueryMask register from CSA.
+	 */
+	out_be32(&prob->dma_querymask_RW, csa->prob.dma_querymask_RW);
+	eieio();
+}
+
+static inline void restore_ppu_querytype(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 52:
+	 *     Restore the PPU_QueryType register from CSA.
+	 */
+	out_be32(&prob->dma_querytype_RW, csa->prob.dma_querytype_RW);
+	eieio();
+}
+
+static inline void restore_mfc_csr_tsq(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 53:
+	 *     Restore the MFC_CSR_TSQ register from CSA.
+	 */
+	out_be64(&priv2->spu_tag_status_query_RW,
+		 csa->priv2.spu_tag_status_query_RW);
+	eieio();
+}
+
+static inline void restore_mfc_csr_cmd(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 54:
+	 *     Restore the MFC_CSR_CMD1 and MFC_CSR_CMD2
+	 *     registers from CSA.
+	 */
+	out_be64(&priv2->spu_cmd_buf1_RW, csa->priv2.spu_cmd_buf1_RW);
+	out_be64(&priv2->spu_cmd_buf2_RW, csa->priv2.spu_cmd_buf2_RW);
+	eieio();
+}
+
+static inline void restore_mfc_csr_ato(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 55:
+	 *     Restore the MFC_CSR_ATO register from CSA.
+	 */
+	out_be64(&priv2->spu_atomic_status_RW, csa->priv2.spu_atomic_status_RW);
+}
+
+static inline void restore_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 56:
+	 *     Restore the MFC_TCLASS_ID register from CSA.
+	 */
+	spu_mfc_tclass_id_set(spu, csa->priv1.mfc_tclass_id_RW);
+	eieio();
+}
+
+static inline void set_llr_event(struct spu_state *csa, struct spu *spu)
+{
+	u64 ch0_cnt, ch0_data;
+	u64 ch1_data;
+
+	/* Restore, Step 57:
+	 *    Set the Lock Line Reservation Lost Event by:
+	 *      1. OR CSA.SPU_Event_Status with bit 21 (Lr) set to 1.
+	 *      2. If CSA.SPU_Channel_0_Count=0 and
+	 *         CSA.SPU_Wr_Event_Mask[Lr]=1 and
+	 *         CSA.SPU_Event_Status[Lr]=0 then set
+	 *         CSA.SPU_Event_Status_Count=1.
+	 */
+	ch0_cnt = csa->spu_chnlcnt_RW[0];
+	ch0_data = csa->spu_chnldata_RW[0];
+	ch1_data = csa->spu_chnldata_RW[1];
+	csa->spu_chnldata_RW[0] |= MFC_LLR_LOST_EVENT;
+	if ((ch0_cnt == 0) && !(ch0_data & MFC_LLR_LOST_EVENT) &&
+	    (ch1_data & MFC_LLR_LOST_EVENT)) {
+		csa->spu_chnlcnt_RW[0] = 1;
+	}
+}
+
+static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 58:
+	 *     If the status of the CSA software decrementer
+	 *     "wrapped" flag is set, OR in a '1' to
+	 *     CSA.SPU_Event_Status[Tm].
+	 */
+	if (!(csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED))
+		return;
+
+	if ((csa->spu_chnlcnt_RW[0] == 0) &&
+	    (csa->spu_chnldata_RW[1] & 0x20) &&
+	    !(csa->spu_chnldata_RW[0] & 0x20))
+		csa->spu_chnlcnt_RW[0] = 1;
+
+	csa->spu_chnldata_RW[0] |= 0x20;
+}
+
+static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+	int i;
+
+	/* Restore, Step 59:
+	 *	Restore the following CH: [0,3,4,24,25,27]
+	 */
+	for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[idx]);
+		out_be64(&priv2->spu_chnlcnt_RW, csa->spu_chnlcnt_RW[idx]);
+		eieio();
+	}
+}
+
+static inline void restore_ch_part2(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 ch_indices[3] = { 9UL, 21UL, 23UL };
+	u64 ch_counts[3] = { 1UL, 16UL, 1UL };
+	u64 idx;
+	int i;
+
+	/* Restore, Step 60:
+	 *     Restore the following CH: [9,21,23].
+	 */
+	ch_counts[0] = 1UL;
+	ch_counts[1] = csa->spu_chnlcnt_RW[21];
+	ch_counts[2] = 1UL;
+	for (i = 0; i < 3; i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
+		eieio();
+	}
+}
+
+static inline void restore_spu_lslr(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 61:
+	 *     Restore the SPU_LSLR register from CSA.
+	 */
+	out_be64(&priv2->spu_lslr_RW, csa->priv2.spu_lslr_RW);
+	eieio();
+}
+
+static inline void restore_spu_cfg(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 62:
+	 *     Restore the SPU_Cfg register from CSA.
+	 */
+	out_be64(&priv2->spu_cfg_RW, csa->priv2.spu_cfg_RW);
+	eieio();
+}
+
+static inline void restore_pm_trace(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 63:
+	 *     Restore PM_Trace_Tag_Wait_Mask from CSA.
+	 *     Not performed by this implementation.
+	 */
+}
+
+static inline void restore_spu_npc(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 64:
+	 *     Restore SPU_NPC from CSA.
+	 */
+	out_be32(&prob->spu_npc_RW, csa->prob.spu_npc_RW);
+	eieio();
+}
+
+static inline void restore_spu_mb(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int i;
+
+	/* Restore, Step 65:
+	 *     Restore MFC_RdSPU_MB from CSA.
+	 */
+	out_be64(&priv2->spu_chnlcntptr_RW, 29UL);
+	eieio();
+	out_be64(&priv2->spu_chnlcnt_RW, csa->spu_chnlcnt_RW[29]);
+	for (i = 0; i < 4; i++) {
+		out_be64(&priv2->spu_chnldata_RW, csa->spu_mailbox_data[i]);
+	}
+	eieio();
+}
+
+static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 66:
+	 *     If CSA.MB_Stat[P]=0 (mailbox empty) then
+	 *     read from the PPU_MB register.
+	 */
+	if ((csa->prob.mb_stat_R & 0xFF) == 0) {
+		in_be32(&prob->pu_mb_R);
+		eieio();
+	}
+}
+
+static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 66:
+	 *     If CSA.MB_Stat[I]=0 (mailbox empty) then
+	 *     read from the PPUINT_MB register.
+	 */
+	if ((csa->prob.mb_stat_R & 0xFF0000) == 0) {
+		in_be64(&priv2->puint_mb_R);
+		eieio();
+		spu_int_stat_clear(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+		eieio();
+	}
+}
+
+static inline void restore_mfc_sr1(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 69:
+	 *     Restore the MFC_SR1 register from CSA.
+	 */
+	spu_mfc_sr1_set(spu, csa->priv1.mfc_sr1_RW);
+	eieio();
+}
+
+static inline void set_int_route(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	spu_cpu_affinity_set(spu, ctx->last_ran);
+}
+
+static inline void restore_other_spu_access(struct spu_state *csa,
+					    struct spu *spu)
+{
+	/* Restore, Step 70:
+	 *     Restore other SPU mappings to this SPU. TBD.
+	 */
+}
+
+static inline void restore_spu_runcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 71:
+	 *     If CSA.SPU_Status[R]=1 then write
+	 *     SPU_RunCntl[R0R1]='01'.
+	 */
+	if (csa->prob.spu_status_R & SPU_STATUS_RUNNING) {
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+		eieio();
+	}
+}
+
+static inline void restore_mfc_cntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 72:
+	 *    Restore the MFC_CNTL register for the CSA.
+	 */
+	out_be64(&priv2->mfc_control_RW, csa->priv2.mfc_control_RW);
+	eieio();
+
+	/*
+	 * The queue is put back into the same state that was evident prior to
+	 * the context switch. The suspend flag is added to the saved state in
+	 * the csa, if the operational state was suspending or suspended. In
+	 * this case, the code that suspended the mfc is responsible for
+	 * continuing it. Note that SPE faults do not change the operational
+	 * state of the spu.
+	 */
+}
+
+static inline void enable_user_access(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 73:
+	 *     Enable user-space access (if provided) to this
+	 *     SPU by mapping the virtual pages assigned to
+	 *     the SPU memory-mapped I/O (MMIO) for problem
+	 *     state. TBD.
+	 */
+}
+
+static inline void reset_switch_active(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 74:
+	 *     Reset the "context switch active" flag.
+	 *     Not performed by this implementation.
+	 */
+}
+
+static inline void reenable_interrupts(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 75:
+	 *     Re-enable SPU interrupts.
+	 */
+	spin_lock_irq(&spu->register_lock);
+	spu_int_mask_set(spu, 0, csa->priv1.int_mask_class0_RW);
+	spu_int_mask_set(spu, 1, csa->priv1.int_mask_class1_RW);
+	spu_int_mask_set(spu, 2, csa->priv1.int_mask_class2_RW);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static int quiece_spu(struct spu_state *prev, struct spu *spu)
+{
+	/*
+	 * Combined steps 2-18 of SPU context save sequence, which
+	 * quiesce the SPU state (disable SPU execution, MFC command
+	 * queues, decrementer, SPU interrupts, etc.).
+	 *
+	 * Returns      0 on success.
+	 *              2 if failed step 2.
+	 *              6 if failed step 6.
+	 */
+
+	if (check_spu_isolate(prev, spu)) {	/* Step 2. */
+		return 2;
+	}
+	disable_interrupts(prev, spu);	        /* Step 3. */
+	set_watchdog_timer(prev, spu);	        /* Step 4. */
+	inhibit_user_access(prev, spu);	        /* Step 5. */
+	if (check_spu_isolate(prev, spu)) {	/* Step 6. */
+		return 6;
+	}
+	set_switch_pending(prev, spu);	        /* Step 7. */
+	save_mfc_cntl(prev, spu);		/* Step 8. */
+	save_spu_runcntl(prev, spu);	        /* Step 9. */
+	save_mfc_sr1(prev, spu);	        /* Step 10. */
+	save_spu_status(prev, spu);	        /* Step 11. */
+	save_mfc_stopped_status(prev, spu);     /* Step 12. */
+	halt_mfc_decr(prev, spu);	        /* Step 13. */
+	save_timebase(prev, spu);		/* Step 14. */
+	remove_other_spu_access(prev, spu);	/* Step 15. */
+	do_mfc_mssync(prev, spu);	        /* Step 16. */
+	issue_mfc_tlbie(prev, spu);	        /* Step 17. */
+	handle_pending_interrupts(prev, spu);	/* Step 18. */
+
+	return 0;
+}
+
+static void save_csa(struct spu_state *prev, struct spu *spu)
+{
+	/*
+	 * Combine steps 19-44 of SPU context save sequence, which
+	 * save regions of the privileged & problem state areas.
+	 */
+
+	save_mfc_queues(prev, spu);	/* Step 19. */
+	save_ppu_querymask(prev, spu);	/* Step 20. */
+	save_ppu_querytype(prev, spu);	/* Step 21. */
+	save_ppu_tagstatus(prev, spu);  /* NEW.     */
+	save_mfc_csr_tsq(prev, spu);	/* Step 22. */
+	save_mfc_csr_cmd(prev, spu);	/* Step 23. */
+	save_mfc_csr_ato(prev, spu);	/* Step 24. */
+	save_mfc_tclass_id(prev, spu);	/* Step 25. */
+	set_mfc_tclass_id(prev, spu);	/* Step 26. */
+	save_mfc_cmd(prev, spu);	/* Step 26a - moved from 44. */
+	purge_mfc_queue(prev, spu);	/* Step 27. */
+	wait_purge_complete(prev, spu);	/* Step 28. */
+	setup_mfc_sr1(prev, spu);	/* Step 30. */
+	save_spu_npc(prev, spu);	/* Step 31. */
+	save_spu_privcntl(prev, spu);	/* Step 32. */
+	reset_spu_privcntl(prev, spu);	/* Step 33. */
+	save_spu_lslr(prev, spu);	/* Step 34. */
+	reset_spu_lslr(prev, spu);	/* Step 35. */
+	save_spu_cfg(prev, spu);	/* Step 36. */
+	save_pm_trace(prev, spu);	/* Step 37. */
+	save_mfc_rag(prev, spu);	/* Step 38. */
+	save_ppu_mb_stat(prev, spu);	/* Step 39. */
+	save_ppu_mb(prev, spu);	        /* Step 40. */
+	save_ppuint_mb(prev, spu);	/* Step 41. */
+	save_ch_part1(prev, spu);	/* Step 42. */
+	save_spu_mb(prev, spu);	        /* Step 43. */
+	reset_ch(prev, spu);	        /* Step 45. */
+}
+
+static void save_lscsa(struct spu_state *prev, struct spu *spu)
+{
+	/*
+	 * Perform steps 46-57 of SPU context save sequence,
+	 * which save regions of the local store and register
+	 * file.
+	 */
+
+	resume_mfc_queue(prev, spu);	/* Step 46. */
+	/* Step 47. */
+	setup_mfc_slbs(prev, spu, spu_save_code, sizeof(spu_save_code));
+	set_switch_active(prev, spu);	/* Step 48. */
+	enable_interrupts(prev, spu);	/* Step 49. */
+	save_ls_16kb(prev, spu);	/* Step 50. */
+	set_spu_npc(prev, spu);	        /* Step 51. */
+	set_signot1(prev, spu);		/* Step 52. */
+	set_signot2(prev, spu);		/* Step 53. */
+	send_save_code(prev, spu);	/* Step 54. */
+	set_ppu_querymask(prev, spu);	/* Step 55. */
+	wait_tag_complete(prev, spu);	/* Step 56. */
+	wait_spu_stopped(prev, spu);	/* Step 57. */
+}
+
+static void force_spu_isolate_exit(struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Stop SPE execution and wait for completion. */
+	out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+	iobarrier_rw();
+	POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING);
+
+	/* Restart SPE master runcntl. */
+	spu_mfc_sr1_set(spu, MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+	iobarrier_w();
+
+	/* Initiate isolate exit request and wait for completion. */
+	out_be64(&priv2->spu_privcntl_RW, 4LL);
+	iobarrier_w();
+	out_be32(&prob->spu_runcntl_RW, 2);
+	iobarrier_rw();
+	POLL_WHILE_FALSE((in_be32(&prob->spu_status_R)
+				& SPU_STATUS_STOPPED_BY_STOP));
+
+	/* Reset load request to normal. */
+	out_be64(&priv2->spu_privcntl_RW, SPU_PRIVCNT_LOAD_REQUEST_NORMAL);
+	iobarrier_w();
+}
+
+/**
+ * stop_spu_isolate
+ *	Check SPU run-control state and force isolated
+ *	exit function as necessary.
+ */
+static void stop_spu_isolate(struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	if (in_be32(&prob->spu_status_R) & SPU_STATUS_ISOLATED_STATE) {
+		/* The SPU is in isolated state; the only way
+		 * to get it out is to perform an isolated
+		 * exit (clean) operation.
+		 */
+		force_spu_isolate_exit(spu);
+	}
+}
+
+static void harvest(struct spu_state *prev, struct spu *spu)
+{
+	/*
+	 * Perform steps 2-25 of SPU context restore sequence,
+	 * which resets an SPU either after a failed save, or
+	 * when using SPU for first time.
+	 */
+
+	disable_interrupts(prev, spu);	        /* Step 2.  */
+	inhibit_user_access(prev, spu);	        /* Step 3.  */
+	terminate_spu_app(prev, spu);	        /* Step 4.  */
+	set_switch_pending(prev, spu);	        /* Step 5.  */
+	stop_spu_isolate(spu);			/* NEW.     */
+	remove_other_spu_access(prev, spu);	/* Step 6.  */
+	suspend_mfc_and_halt_decr(prev, spu);	/* Step 7.  */
+	wait_suspend_mfc_complete(prev, spu);	/* Step 8.  */
+	if (!suspend_spe(prev, spu))	        /* Step 9.  */
+		clear_spu_status(prev, spu);	/* Step 10. */
+	do_mfc_mssync(prev, spu);	        /* Step 11. */
+	issue_mfc_tlbie(prev, spu);	        /* Step 12. */
+	handle_pending_interrupts(prev, spu);	/* Step 13. */
+	purge_mfc_queue(prev, spu);	        /* Step 14. */
+	wait_purge_complete(prev, spu);	        /* Step 15. */
+	reset_spu_privcntl(prev, spu);	        /* Step 16. */
+	reset_spu_lslr(prev, spu);              /* Step 17. */
+	setup_mfc_sr1(prev, spu);	        /* Step 18. */
+	spu_invalidate_slbs(spu);		/* Step 19. */
+	reset_ch_part1(prev, spu);	        /* Step 20. */
+	reset_ch_part2(prev, spu);	        /* Step 21. */
+	enable_interrupts(prev, spu);	        /* Step 22. */
+	set_switch_active(prev, spu);	        /* Step 23. */
+	set_mfc_tclass_id(prev, spu);	        /* Step 24. */
+	resume_mfc_queue(prev, spu);	        /* Step 25. */
+}
+
+static void restore_lscsa(struct spu_state *next, struct spu *spu)
+{
+	/*
+	 * Perform steps 26-40 of SPU context restore sequence,
+	 * which restores regions of the local store and register
+	 * file.
+	 */
+
+	set_watchdog_timer(next, spu);	        /* Step 26. */
+	setup_spu_status_part1(next, spu);	/* Step 27. */
+	setup_spu_status_part2(next, spu);	/* Step 28. */
+	restore_mfc_rag(next, spu);	        /* Step 29. */
+	/* Step 30. */
+	setup_mfc_slbs(next, spu, spu_restore_code, sizeof(spu_restore_code));
+	set_spu_npc(next, spu);	                /* Step 31. */
+	set_signot1(next, spu);	                /* Step 32. */
+	set_signot2(next, spu);	                /* Step 33. */
+	setup_decr(next, spu);	                /* Step 34. */
+	setup_ppu_mb(next, spu);	        /* Step 35. */
+	setup_ppuint_mb(next, spu);	        /* Step 36. */
+	send_restore_code(next, spu);	        /* Step 37. */
+	set_ppu_querymask(next, spu);	        /* Step 38. */
+	wait_tag_complete(next, spu);	        /* Step 39. */
+	wait_spu_stopped(next, spu);	        /* Step 40. */
+}
+
+static void restore_csa(struct spu_state *next, struct spu *spu)
+{
+	/*
+	 * Combine steps 41-76 of SPU context restore sequence, which
+	 * restore regions of the privileged & problem state areas.
+	 */
+
+	restore_spu_privcntl(next, spu);	/* Step 41. */
+	restore_status_part1(next, spu);	/* Step 42. */
+	restore_status_part2(next, spu);	/* Step 43. */
+	restore_ls_16kb(next, spu);	        /* Step 44. */
+	wait_tag_complete(next, spu);	        /* Step 45. */
+	suspend_mfc(next, spu);	                /* Step 46. */
+	wait_suspend_mfc_complete(next, spu);	/* Step 47. */
+	issue_mfc_tlbie(next, spu);	        /* Step 48. */
+	clear_interrupts(next, spu);	        /* Step 49. */
+	restore_mfc_queues(next, spu);	        /* Step 50. */
+	restore_ppu_querymask(next, spu);	/* Step 51. */
+	restore_ppu_querytype(next, spu);	/* Step 52. */
+	restore_mfc_csr_tsq(next, spu);	        /* Step 53. */
+	restore_mfc_csr_cmd(next, spu);	        /* Step 54. */
+	restore_mfc_csr_ato(next, spu);	        /* Step 55. */
+	restore_mfc_tclass_id(next, spu);	/* Step 56. */
+	set_llr_event(next, spu);	        /* Step 57. */
+	restore_decr_wrapped(next, spu);	/* Step 58. */
+	restore_ch_part1(next, spu);	        /* Step 59. */
+	restore_ch_part2(next, spu);	        /* Step 60. */
+	restore_spu_lslr(next, spu);	        /* Step 61. */
+	restore_spu_cfg(next, spu);	        /* Step 62. */
+	restore_pm_trace(next, spu);	        /* Step 63. */
+	restore_spu_npc(next, spu);	        /* Step 64. */
+	restore_spu_mb(next, spu);	        /* Step 65. */
+	check_ppu_mb_stat(next, spu);	        /* Step 66. */
+	check_ppuint_mb_stat(next, spu);	/* Step 67. */
+	spu_invalidate_slbs(spu);		/* Modified Step 68. */
+	restore_mfc_sr1(next, spu);	        /* Step 69. */
+	set_int_route(next, spu);		/* NEW      */
+	restore_other_spu_access(next, spu);	/* Step 70. */
+	restore_spu_runcntl(next, spu);	        /* Step 71. */
+	restore_mfc_cntl(next, spu);	        /* Step 72. */
+	enable_user_access(next, spu);	        /* Step 73. */
+	reset_switch_active(next, spu);	        /* Step 74. */
+	reenable_interrupts(next, spu);	        /* Step 75. */
+}
+
+static int __do_spu_save(struct spu_state *prev, struct spu *spu)
+{
+	int rc;
+
+	/*
+	 * SPU context save can be broken into three phases:
+	 *
+	 *     (a) quiesce [steps 2-16].
+	 *     (b) save of CSA, performed by PPE [steps 17-42]
+	 *     (c) save of LSCSA, mostly performed by SPU [steps 43-52].
+	 *
+	 * Returns      0 on success.
+	 *              2,6 if failed to quiece SPU
+	 *              53 if SPU-side of save failed.
+	 */
+
+	rc = quiece_spu(prev, spu);	        /* Steps 2-16. */
+	switch (rc) {
+	default:
+	case 2:
+	case 6:
+		harvest(prev, spu);
+		return rc;
+		break;
+	case 0:
+		break;
+	}
+	save_csa(prev, spu);	                /* Steps 17-43. */
+	save_lscsa(prev, spu);	                /* Steps 44-53. */
+	return check_save_status(prev, spu);	/* Step 54.     */
+}
+
+static int __do_spu_restore(struct spu_state *next, struct spu *spu)
+{
+	int rc;
+
+	/*
+	 * SPU context restore can be broken into three phases:
+	 *
+	 *    (a) harvest (or reset) SPU [steps 2-24].
+	 *    (b) restore LSCSA [steps 25-40], mostly performed by SPU.
+	 *    (c) restore CSA [steps 41-76], performed by PPE.
+	 *
+	 * The 'harvest' step is not performed here, but rather
+	 * as needed below.
+	 */
+
+	restore_lscsa(next, spu);	        /* Steps 24-39. */
+	rc = check_restore_status(next, spu);	/* Step 40.     */
+	switch (rc) {
+	default:
+		/* Failed. Return now. */
+		return rc;
+		break;
+	case 0:
+		/* Fall through to next step. */
+		break;
+	}
+	restore_csa(next, spu);
+
+	return 0;
+}
+
+/**
+ * spu_save - SPU context save, with locking.
+ * @prev: pointer to SPU context save area, to be saved.
+ * @spu: pointer to SPU iomem structure.
+ *
+ * Acquire locks, perform the save operation then return.
+ */
+int spu_save(struct spu_state *prev, struct spu *spu)
+{
+	int rc;
+
+	acquire_spu_lock(spu);	        /* Step 1.     */
+	rc = __do_spu_save(prev, spu);	/* Steps 2-53. */
+	release_spu_lock(spu);
+	if (rc != 0 && rc != 2 && rc != 6) {
+		panic("%s failed on SPU[%d], rc=%d.\n",
+		      __func__, spu->number, rc);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(spu_save);
+
+/**
+ * spu_restore - SPU context restore, with harvest and locking.
+ * @new: pointer to SPU context save area, to be restored.
+ * @spu: pointer to SPU iomem structure.
+ *
+ * Perform harvest + restore, as we may not be coming
+ * from a previous successful save operation, and the
+ * hardware state is unknown.
+ */
+int spu_restore(struct spu_state *new, struct spu *spu)
+{
+	int rc;
+
+	acquire_spu_lock(spu);
+	harvest(NULL, spu);
+	spu->slb_replace = 0;
+	rc = __do_spu_restore(new, spu);
+	release_spu_lock(spu);
+	if (rc) {
+		panic("%s failed on SPU[%d] rc=%d.\n",
+		       __func__, spu->number, rc);
+	}
+	return rc;
+}
+EXPORT_SYMBOL_GPL(spu_restore);
+
+static void init_prob(struct spu_state *csa)
+{
+	csa->spu_chnlcnt_RW[9] = 1;
+	csa->spu_chnlcnt_RW[21] = 16;
+	csa->spu_chnlcnt_RW[23] = 1;
+	csa->spu_chnlcnt_RW[28] = 1;
+	csa->spu_chnlcnt_RW[30] = 1;
+	csa->prob.spu_runcntl_RW = SPU_RUNCNTL_STOP;
+	csa->prob.mb_stat_R = 0x000400;
+}
+
+static void init_priv1(struct spu_state *csa)
+{
+	/* Enable decode, relocate, tlbie response, master runcntl. */
+	csa->priv1.mfc_sr1_RW = MFC_STATE1_LOCAL_STORAGE_DECODE_MASK |
+	    MFC_STATE1_MASTER_RUN_CONTROL_MASK |
+	    MFC_STATE1_PROBLEM_STATE_MASK |
+	    MFC_STATE1_RELOCATE_MASK | MFC_STATE1_BUS_TLBIE_MASK;
+
+	/* Enable OS-specific set of interrupts. */
+	csa->priv1.int_mask_class0_RW = CLASS0_ENABLE_DMA_ALIGNMENT_INTR |
+	    CLASS0_ENABLE_INVALID_DMA_COMMAND_INTR |
+	    CLASS0_ENABLE_SPU_ERROR_INTR;
+	csa->priv1.int_mask_class1_RW = CLASS1_ENABLE_SEGMENT_FAULT_INTR |
+	    CLASS1_ENABLE_STORAGE_FAULT_INTR;
+	csa->priv1.int_mask_class2_RW = CLASS2_ENABLE_SPU_STOP_INTR |
+	    CLASS2_ENABLE_SPU_HALT_INTR |
+	    CLASS2_ENABLE_SPU_DMA_TAG_GROUP_COMPLETE_INTR;
+}
+
+static void init_priv2(struct spu_state *csa)
+{
+	csa->priv2.spu_lslr_RW = LS_ADDR_MASK;
+	csa->priv2.mfc_control_RW = MFC_CNTL_RESUME_DMA_QUEUE |
+	    MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION |
+	    MFC_CNTL_DMA_QUEUES_EMPTY_MASK;
+}
+
+/**
+ * spu_alloc_csa - allocate and initialize an SPU context save area.
+ *
+ * Allocate and initialize the contents of an SPU context save area.
+ * This includes enabling address translation, interrupt masks, etc.,
+ * as appropriate for the given OS environment.
+ *
+ * Note that storage for the 'lscsa' is allocated separately,
+ * as it is by far the largest of the context save regions,
+ * and may need to be pinned or otherwise specially aligned.
+ */
+int spu_init_csa(struct spu_state *csa)
+{
+	int rc;
+
+	if (!csa)
+		return -EINVAL;
+	memset(csa, 0, sizeof(struct spu_state));
+
+	rc = spu_alloc_lscsa(csa);
+	if (rc)
+		return rc;
+
+	spin_lock_init(&csa->register_lock);
+
+	init_prob(csa);
+	init_priv1(csa);
+	init_priv2(csa);
+
+	return 0;
+}
+
+void spu_fini_csa(struct spu_state *csa)
+{
+	spu_free_lscsa(csa);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
new file mode 100644
index 0000000000..157e046e6e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/export.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/slab.h>
+
+#include <linux/uaccess.h>
+
+#include "spufs.h"
+
+/**
+ * sys_spu_run - run code loaded into an SPU
+ *
+ * @unpc:    next program counter for the SPU
+ * @ustatus: status of the SPU
+ *
+ * This system call transfers the control of execution of a
+ * user space thread to an SPU. It will return when the
+ * SPU has finished executing or when it hits an error
+ * condition and it will be interrupted if a signal needs
+ * to be delivered to a handler in user space.
+ *
+ * The next program counter is set to the passed value
+ * before the SPU starts fetching code and the user space
+ * pointer gets updated with the new value when returning
+ * from kernel space.
+ *
+ * The status value returned from spu_run reflects the
+ * value of the spu_status register after the SPU has stopped.
+ *
+ */
+static long do_spu_run(struct file *filp,
+			__u32 __user *unpc,
+			__u32 __user *ustatus)
+{
+	long ret;
+	struct spufs_inode_info *i;
+	u32 npc, status;
+
+	ret = -EFAULT;
+	if (get_user(npc, unpc))
+		goto out;
+
+	/* check if this file was created by spu_create */
+	ret = -EINVAL;
+	if (filp->f_op != &spufs_context_fops)
+		goto out;
+
+	i = SPUFS_I(file_inode(filp));
+	ret = spufs_run_spu(i->i_ctx, &npc, &status);
+
+	if (put_user(npc, unpc))
+		ret = -EFAULT;
+
+	if (ustatus && put_user(status, ustatus))
+		ret = -EFAULT;
+out:
+	return ret;
+}
+
+static long do_spu_create(const char __user *pathname, unsigned int flags,
+		umode_t mode, struct file *neighbor)
+{
+	struct path path;
+	struct dentry *dentry;
+	int ret;
+
+	dentry = user_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY);
+	ret = PTR_ERR(dentry);
+	if (!IS_ERR(dentry)) {
+		ret = spufs_create(&path, dentry, flags, mode, neighbor);
+		done_path_create(&path, dentry);
+	}
+
+	return ret;
+}
+
+struct spufs_calls spufs_calls = {
+	.create_thread = do_spu_create,
+	.spu_run = do_spu_run,
+	.notify_spus_active = do_notify_spus_active,
+	.owner = THIS_MODULE,
+#ifdef CONFIG_COREDUMP
+	.coredump_extra_notes_size = spufs_coredump_extra_notes_size,
+	.coredump_extra_notes_write = spufs_coredump_extra_notes_write,
+#endif
+};
diff --git a/arch/powerpc/platforms/chrp/Kconfig b/arch/powerpc/platforms/chrp/Kconfig
new file mode 100644
index 0000000000..ff30ed579a
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/Kconfig
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_CHRP
+	bool "Common Hardware Reference Platform (CHRP) based machines"
+	depends on PPC_BOOK3S_32
+	select HAVE_PCSPKR_PLATFORM
+	select MPIC
+	select PPC_I8259
+	select PPC_INDIRECT_PCI
+	select PPC_RTAS
+	select PPC_RTAS_DAEMON
+	select RTAS_ERROR_LOGGING
+	select PPC_MPC106
+	select PPC_UDBG_16550
+	select PPC_HASH_MMU_NATIVE
+	select FORCE_PCI
+	default y
diff --git a/arch/powerpc/platforms/chrp/Makefile b/arch/powerpc/platforms/chrp/Makefile
new file mode 100644
index 0000000000..05639db9a3
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-y				+= setup.o time.o pegasos_eth.o pci.o
+obj-$(CONFIG_SMP)		+= smp.o
+obj-$(CONFIG_NVRAM:m=y)		+= nvram.o
diff --git a/arch/powerpc/platforms/chrp/chrp.h b/arch/powerpc/platforms/chrp/chrp.h
new file mode 100644
index 0000000000..6ff4631d9d
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/chrp.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Declarations of CHRP platform-specific things.
+ */
+
+extern void chrp_nvram_init(void);
+extern void chrp_get_rtc_time(struct rtc_time *);
+extern int chrp_set_rtc_time(struct rtc_time *);
+extern long chrp_time_init(void);
+
+extern void chrp_find_bridges(void);
diff --git a/arch/powerpc/platforms/chrp/gg2.h b/arch/powerpc/platforms/chrp/gg2.h
new file mode 100644
index 0000000000..341ae55b99
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/gg2.h
@@ -0,0 +1,61 @@
+/*
+ *  include/asm-ppc/gg2.h -- VLSI VAS96011/12 `Golden Gate 2' register definitions
+ *
+ *  Copyright (C) 1997 Geert Uytterhoeven
+ *
+ *  This file is based on the following documentation:
+ *
+ *	The VAS96011/12 Chipset, Data Book, Edition 1.0
+ *	VLSI Technology, Inc.
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of this archive
+ *  for more details.
+ */
+
+#ifndef _ASMPPC_GG2_H
+#define _ASMPPC_GG2_H
+
+    /*
+     *  Memory Map (CHRP mode)
+     */
+
+#define GG2_PCI_MEM_BASE	0xc0000000	/* Peripheral memory space */
+#define GG2_ISA_MEM_BASE	0xf7000000	/* Peripheral memory alias */
+#define GG2_ISA_IO_BASE		0xf8000000	/* Peripheral I/O space */
+#define GG2_PCI_CONFIG_BASE	0xfec00000	/* PCI configuration space */
+#define GG2_INT_ACK_SPECIAL	0xfec80000	/* Interrupt acknowledge and */
+						/* special PCI cycles */
+#define GG2_ROM_BASE0		0xff000000	/* ROM bank 0 */
+#define GG2_ROM_BASE1		0xff800000	/* ROM bank 1 */
+
+
+    /*
+     *  GG2 specific PCI Registers
+     */
+
+extern void __iomem *gg2_pci_config_base;	/* kernel virtual address */
+
+#define GG2_PCI_BUSNO		0x40	/* Bus number */
+#define GG2_PCI_SUBBUSNO	0x41	/* Subordinate bus number */
+#define GG2_PCI_DISCCTR		0x42	/* Disconnect counter */
+#define GG2_PCI_PPC_CTRL	0x50	/* PowerPC interface control register */
+#define GG2_PCI_ADDR_MAP	0x5c	/* Address map */
+#define GG2_PCI_PCI_CTRL	0x60	/* PCI interface control register */
+#define GG2_PCI_ROM_CTRL	0x70	/* ROM interface control register */
+#define GG2_PCI_ROM_TIME	0x74	/* ROM timing */
+#define GG2_PCI_CC_CTRL		0x80	/* Cache controller control register */
+#define GG2_PCI_DRAM_BANK0	0x90	/* Control register for DRAM bank #0 */
+#define GG2_PCI_DRAM_BANK1	0x94	/* Control register for DRAM bank #1 */
+#define GG2_PCI_DRAM_BANK2	0x98	/* Control register for DRAM bank #2 */
+#define GG2_PCI_DRAM_BANK3	0x9c	/* Control register for DRAM bank #3 */
+#define GG2_PCI_DRAM_BANK4	0xa0	/* Control register for DRAM bank #4 */
+#define GG2_PCI_DRAM_BANK5	0xa4	/* Control register for DRAM bank #5 */
+#define GG2_PCI_DRAM_TIME0	0xb0	/* Timing parameters set #0 */
+#define GG2_PCI_DRAM_TIME1	0xb4	/* Timing parameters set #1 */
+#define GG2_PCI_DRAM_CTRL	0xc0	/* DRAM control */
+#define GG2_PCI_ERR_CTRL	0xd0	/* Error control register */
+#define GG2_PCI_ERR_STATUS	0xd4	/* Error status register */
+					/* Cleared when read */
+
+#endif /* _ASMPPC_GG2_H */
diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c
new file mode 100644
index 0000000000..0eedae9649
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/nvram.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  c 2001 PPC 64 Team, IBM Corp
+ *
+ * /dev/nvram driver for PPC
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/of.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include "chrp.h"
+
+static unsigned int nvram_size;
+static unsigned char nvram_buf[4];
+static DEFINE_SPINLOCK(nvram_lock);
+
+static unsigned char chrp_nvram_read_val(int addr)
+{
+	unsigned int done;
+	unsigned long flags;
+	unsigned char ret;
+
+	if (addr >= nvram_size) {
+		printk(KERN_DEBUG "%s: read addr %d > nvram_size %u\n",
+		       current->comm, addr, nvram_size);
+		return 0xff;
+	}
+	spin_lock_irqsave(&nvram_lock, flags);
+	if ((rtas_call(rtas_function_token(RTAS_FN_NVRAM_FETCH), 3, 2, &done, addr,
+		       __pa(nvram_buf), 1) != 0) || 1 != done)
+		ret = 0xff;
+	else
+		ret = nvram_buf[0];
+	spin_unlock_irqrestore(&nvram_lock, flags);
+
+	return ret;
+}
+
+static void chrp_nvram_write_val(int addr, unsigned char val)
+{
+	unsigned int done;
+	unsigned long flags;
+
+	if (addr >= nvram_size) {
+		printk(KERN_DEBUG "%s: write addr %d > nvram_size %u\n",
+		       current->comm, addr, nvram_size);
+		return;
+	}
+	spin_lock_irqsave(&nvram_lock, flags);
+	nvram_buf[0] = val;
+	if ((rtas_call(rtas_function_token(RTAS_FN_NVRAM_STORE), 3, 2, &done, addr,
+		       __pa(nvram_buf), 1) != 0) || 1 != done)
+		printk(KERN_DEBUG "rtas IO error storing 0x%02x at %d", val, addr);
+	spin_unlock_irqrestore(&nvram_lock, flags);
+}
+
+static ssize_t chrp_nvram_size(void)
+{
+	return nvram_size;
+}
+
+void __init chrp_nvram_init(void)
+{
+	struct device_node *nvram;
+	const __be32 *nbytes_p;
+	unsigned int proplen;
+
+	nvram = of_find_node_by_type(NULL, "nvram");
+	if (nvram == NULL)
+		return;
+
+	nbytes_p = of_get_property(nvram, "#bytes", &proplen);
+	if (nbytes_p == NULL || proplen != sizeof(unsigned int)) {
+		of_node_put(nvram);
+		return;
+	}
+
+	nvram_size = be32_to_cpup(nbytes_p);
+
+	printk(KERN_INFO "CHRP nvram contains %u bytes\n", nvram_size);
+	of_node_put(nvram);
+
+	ppc_md.nvram_read_val  = chrp_nvram_read_val;
+	ppc_md.nvram_write_val = chrp_nvram_write_val;
+	ppc_md.nvram_size      = chrp_nvram_size;
+
+	return;
+}
+
+MODULE_LICENSE("GPL v2");
diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c
new file mode 100644
index 0000000000..428fd2a7b3
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/pci.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CHRP pci routines.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/pgtable.h>
+#include <linux/of_address.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/hydra.h>
+#include <asm/machdep.h>
+#include <asm/sections.h>
+#include <asm/pci-bridge.h>
+#include <asm/grackle.h>
+#include <asm/rtas.h>
+
+#include "chrp.h"
+#include "gg2.h"
+
+/* LongTrail */
+void __iomem *gg2_pci_config_base;
+
+/*
+ * The VLSI Golden Gate II has only 512K of PCI configuration space, so we
+ * limit the bus number to 3 bits
+ */
+
+static int gg2_read_config(struct pci_bus *bus, unsigned int devfn, int off,
+			   int len, u32 *val)
+{
+	volatile void __iomem *cfg_data;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	if (bus->number > 7)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	/*
+	 * Note: the caller has already checked that off is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	cfg_data = hose->cfg_data + ((bus->number<<16) | (devfn<<8) | off);
+	switch (len) {
+	case 1:
+		*val =  in_8(cfg_data);
+		break;
+	case 2:
+		*val = in_le16(cfg_data);
+		break;
+	default:
+		*val = in_le32(cfg_data);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int gg2_write_config(struct pci_bus *bus, unsigned int devfn, int off,
+			    int len, u32 val)
+{
+	volatile void __iomem *cfg_data;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	if (bus->number > 7)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	/*
+	 * Note: the caller has already checked that off is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	cfg_data = hose->cfg_data + ((bus->number<<16) | (devfn<<8) | off);
+	switch (len) {
+	case 1:
+		out_8(cfg_data, val);
+		break;
+	case 2:
+		out_le16(cfg_data, val);
+		break;
+	default:
+		out_le32(cfg_data, val);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops gg2_pci_ops =
+{
+	.read = gg2_read_config,
+	.write = gg2_write_config,
+};
+
+/*
+ * Access functions for PCI config space using RTAS calls.
+ */
+static int rtas_read_config(struct pci_bus *bus, unsigned int devfn, int offset,
+			    int len, u32 *val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	unsigned long addr = (offset & 0xff) | ((devfn & 0xff) << 8)
+		| (((bus->number - hose->first_busno) & 0xff) << 16)
+		| (hose->global_number << 24);
+        int ret = -1;
+	int rval;
+
+	rval = rtas_call(rtas_function_token(RTAS_FN_READ_PCI_CONFIG), 2, 2, &ret, addr, len);
+	*val = ret;
+	return rval? PCIBIOS_DEVICE_NOT_FOUND: PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_write_config(struct pci_bus *bus, unsigned int devfn, int offset,
+			     int len, u32 val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	unsigned long addr = (offset & 0xff) | ((devfn & 0xff) << 8)
+		| (((bus->number - hose->first_busno) & 0xff) << 16)
+		| (hose->global_number << 24);
+	int rval;
+
+	rval = rtas_call(rtas_function_token(RTAS_FN_WRITE_PCI_CONFIG), 3, 1, NULL,
+			 addr, len, val);
+	return rval? PCIBIOS_DEVICE_NOT_FOUND: PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops rtas_pci_ops =
+{
+	.read = rtas_read_config,
+	.write = rtas_write_config,
+};
+
+volatile struct Hydra __iomem *Hydra = NULL;
+
+static int __init hydra_init(void)
+{
+	struct device_node *np;
+	struct resource r;
+
+	np = of_find_node_by_name(NULL, "mac-io");
+	if (np == NULL || of_address_to_resource(np, 0, &r)) {
+		of_node_put(np);
+		return 0;
+	}
+	of_node_put(np);
+	Hydra = ioremap(r.start, resource_size(&r));
+	printk("Hydra Mac I/O at %llx\n", (unsigned long long)r.start);
+	printk("Hydra Feature_Control was %x",
+	       in_le32(&Hydra->Feature_Control));
+	out_le32(&Hydra->Feature_Control, (HYDRA_FC_SCC_CELL_EN |
+					   HYDRA_FC_SCSI_CELL_EN |
+					   HYDRA_FC_SCCA_ENABLE |
+					   HYDRA_FC_SCCB_ENABLE |
+					   HYDRA_FC_ARB_BYPASS |
+					   HYDRA_FC_MPIC_ENABLE |
+					   HYDRA_FC_SLOW_SCC_PCLK |
+					   HYDRA_FC_MPIC_IS_MASTER));
+	printk(", now %x\n", in_le32(&Hydra->Feature_Control));
+	return 1;
+}
+
+#define PRG_CL_RESET_VALID 0x00010000
+
+static void __init
+setup_python(struct pci_controller *hose, struct device_node *dev)
+{
+	u32 __iomem *reg;
+	u32 val;
+	struct resource r;
+
+	if (of_address_to_resource(dev, 0, &r)) {
+		printk(KERN_ERR "No address for Python PCI controller\n");
+		return;
+	}
+
+	/* Clear the magic go-slow bit */
+	reg = ioremap(r.start + 0xf6000, 0x40);
+	BUG_ON(!reg); 
+	val = in_be32(&reg[12]);
+	if (val & PRG_CL_RESET_VALID) {
+		out_be32(&reg[12], val & ~PRG_CL_RESET_VALID);
+		in_be32(&reg[12]);
+	}
+	iounmap(reg);
+
+	setup_indirect_pci(hose, r.start + 0xf8000, r.start + 0xf8010, 0);
+}
+
+/* Marvell Discovery II based Pegasos 2 */
+static void __init setup_peg2(struct pci_controller *hose, struct device_node *dev)
+{
+	struct device_node *root = of_find_node_by_path("/");
+	struct device_node *rtas;
+
+	rtas = of_find_node_by_name (root, "rtas");
+	if (rtas) {
+		hose->ops = &rtas_pci_ops;
+		of_node_put(rtas);
+	} else {
+		printk ("RTAS supporting Pegasos OF not found, please upgrade"
+			" your firmware\n");
+	}
+	pci_add_flags(PCI_REASSIGN_ALL_BUS);
+	/* keep the reference to the root node */
+}
+
+void __init
+chrp_find_bridges(void)
+{
+	struct device_node *dev;
+	const int *bus_range;
+	int len, index = -1;
+	struct pci_controller *hose;
+	const unsigned int *dma;
+	const char *model, *machine;
+	int is_longtrail = 0, is_mot = 0, is_pegasos = 0;
+	struct device_node *root = of_find_node_by_path("/");
+	struct resource r;
+	/*
+	 * The PCI host bridge nodes on some machines don't have
+	 * properties to adequately identify them, so we have to
+	 * look at what sort of machine this is as well.
+	 */
+	machine = of_get_property(root, "model", NULL);
+	if (machine != NULL) {
+		is_longtrail = strncmp(machine, "IBM,LongTrail", 13) == 0;
+		is_mot = strncmp(machine, "MOT", 3) == 0;
+		if (strncmp(machine, "Pegasos2", 8) == 0)
+			is_pegasos = 2;
+		else if (strncmp(machine, "Pegasos", 7) == 0)
+			is_pegasos = 1;
+	}
+	for_each_child_of_node(root, dev) {
+		if (!of_node_is_type(dev, "pci"))
+			continue;
+		++index;
+		/* The GG2 bridge on the LongTrail doesn't have an address */
+		if (of_address_to_resource(dev, 0, &r) && !is_longtrail) {
+			printk(KERN_WARNING "Can't use %pOF: no address\n",
+			       dev);
+			continue;
+		}
+		bus_range = of_get_property(dev, "bus-range", &len);
+		if (bus_range == NULL || len < 2 * sizeof(int)) {
+			printk(KERN_WARNING "Can't get bus-range for %pOF\n",
+				dev);
+			continue;
+		}
+		if (bus_range[1] == bus_range[0])
+			printk(KERN_INFO "PCI bus %d", bus_range[0]);
+		else
+			printk(KERN_INFO "PCI buses %d..%d",
+			       bus_range[0], bus_range[1]);
+		printk(" controlled by %pOF", dev);
+		if (!is_longtrail)
+			printk(" at %llx", (unsigned long long)r.start);
+		printk("\n");
+
+		hose = pcibios_alloc_controller(dev);
+		if (!hose) {
+			printk("Can't allocate PCI controller structure for %pOF\n",
+				dev);
+			continue;
+		}
+		hose->first_busno = hose->self_busno = bus_range[0];
+		hose->last_busno = bus_range[1];
+
+		model = of_get_property(dev, "model", NULL);
+		if (model == NULL)
+			model = "<none>";
+		if (strncmp(model, "IBM, Python", 11) == 0) {
+			setup_python(hose, dev);
+		} else if (is_mot
+			   || strncmp(model, "Motorola, Grackle", 17) == 0) {
+			setup_grackle(hose);
+		} else if (is_longtrail) {
+			void __iomem *p = ioremap(GG2_PCI_CONFIG_BASE, 0x80000);
+			hose->ops = &gg2_pci_ops;
+			hose->cfg_data = p;
+			gg2_pci_config_base = p;
+		} else if (is_pegasos == 1) {
+			setup_indirect_pci(hose, 0xfec00cf8, 0xfee00cfc, 0);
+		} else if (is_pegasos == 2) {
+			setup_peg2(hose, dev);
+		} else if (!strncmp(model, "IBM,CPC710", 10)) {
+			setup_indirect_pci(hose,
+					   r.start + 0x000f8000,
+					   r.start + 0x000f8010,
+					   0);
+			if (index == 0) {
+				dma = of_get_property(dev, "system-dma-base",
+							&len);
+				if (dma && len >= sizeof(*dma)) {
+					dma = (unsigned int *)
+						(((unsigned long)dma) +
+						len - sizeof(*dma));
+						pci_dram_offset = *dma;
+				}
+			}
+		} else {
+			printk("No methods for %pOF (model %s), using RTAS\n",
+			       dev, model);
+			hose->ops = &rtas_pci_ops;
+		}
+
+		pci_process_bridge_OF_ranges(hose, dev, index == 0);
+
+		/* check the first bridge for a property that we can
+		   use to set pci_dram_offset */
+		dma = of_get_property(dev, "ibm,dma-ranges", &len);
+		if (index == 0 && dma != NULL && len >= 6 * sizeof(*dma)) {
+			pci_dram_offset = dma[2] - dma[3];
+			printk("pci_dram_offset = %lx\n", pci_dram_offset);
+		}
+	}
+	of_node_put(root);
+
+	/*
+	 *  "Temporary" fixes for PCI devices.
+	 *  -- Geert
+	 */
+	hydra_init();		/* Mac I/O */
+
+	pci_create_OF_bus_map();
+}
+
+/* SL82C105 IDE Control/Status Register */
+#define SL82C105_IDECSR                0x40
+
+/* Fixup for Winbond ATA quirk, required for briq mostly because the
+ * 8259 is configured for level sensitive IRQ 14 and so wants the
+ * ATA controller to be set to fully native mode or bad things
+ * will happen.
+ */
+static void chrp_pci_fixup_winbond_ata(struct pci_dev *sl82c105)
+{
+	u8 progif;
+
+	/* If non-briq machines need that fixup too, please speak up */
+	if (!machine_is(chrp) || _chrp_type != _CHRP_briq)
+		return;
+
+	if ((sl82c105->class & 5) != 5) {
+		printk("W83C553: Switching SL82C105 IDE to PCI native mode\n");
+		/* Enable SL82C105 PCI native IDE mode */
+		pci_read_config_byte(sl82c105, PCI_CLASS_PROG, &progif);
+		pci_write_config_byte(sl82c105, PCI_CLASS_PROG, progif | 0x05);
+		sl82c105->class |= 0x05;
+		/* Disable SL82C105 second port */
+		pci_write_config_word(sl82c105, SL82C105_IDECSR, 0x0003);
+		/* Clear IO BARs, they will be reassigned */
+		pci_write_config_dword(sl82c105, PCI_BASE_ADDRESS_0, 0);
+		pci_write_config_dword(sl82c105, PCI_BASE_ADDRESS_1, 0);
+		pci_write_config_dword(sl82c105, PCI_BASE_ADDRESS_2, 0);
+		pci_write_config_dword(sl82c105, PCI_BASE_ADDRESS_3, 0);
+	}
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105,
+			chrp_pci_fixup_winbond_ata);
+
+/* Pegasos2 firmware version 20040810 configures the built-in IDE controller
+ * in legacy mode, but sets the PCI registers to PCI native mode.
+ * The chip can only operate in legacy mode, so force the PCI class into legacy
+ * mode as well. The same fixup must be done to the class-code property in
+ * the IDE node /pci@80000000/ide@C,1
+ */
+static void chrp_pci_fixup_vt8231_ata(struct pci_dev *viaide)
+{
+	u8 progif;
+	struct pci_dev *viaisa;
+
+	if (!machine_is(chrp) || _chrp_type != _CHRP_Pegasos)
+		return;
+	if (viaide->irq != 14)
+		return;
+
+	viaisa = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8231, NULL);
+	if (!viaisa)
+		return;
+	dev_info(&viaide->dev, "Fixing VIA IDE, force legacy mode on\n");
+
+	pci_read_config_byte(viaide, PCI_CLASS_PROG, &progif);
+	pci_write_config_byte(viaide, PCI_CLASS_PROG, progif & ~0x5);
+	viaide->class &= ~0x5;
+
+	pci_dev_put(viaisa);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_1, chrp_pci_fixup_vt8231_ata);
diff --git a/arch/powerpc/platforms/chrp/pegasos_eth.c b/arch/powerpc/platforms/chrp/pegasos_eth.c
new file mode 100644
index 0000000000..5c4f1a9ca1
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/pegasos_eth.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2005 Sven Luther <sl@bplan-gmbh.de>
+ *  Thanks to :
+ *	Dale Farnsworth <dale@farnsworth.org>
+ *	Mark A. Greer <mgreer@mvista.com>
+ *	Nicolas DET <nd@bplan-gmbh.de>
+ *	Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ *  And anyone else who helped me on this.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/mv643xx.h>
+#include <linux/pci.h>
+
+#define PEGASOS2_MARVELL_REGBASE 		(0xf1000000)
+#define PEGASOS2_MARVELL_REGSIZE 		(0x00004000)
+#define PEGASOS2_SRAM_BASE 			(0xf2000000)
+#define PEGASOS2_SRAM_SIZE			(256*1024)
+
+#define PEGASOS2_SRAM_BASE_ETH_PORT0			(PEGASOS2_SRAM_BASE)
+#define PEGASOS2_SRAM_BASE_ETH_PORT1			(PEGASOS2_SRAM_BASE_ETH_PORT0 + (PEGASOS2_SRAM_SIZE / 2) )
+
+
+#define PEGASOS2_SRAM_RXRING_SIZE		(PEGASOS2_SRAM_SIZE/4)
+#define PEGASOS2_SRAM_TXRING_SIZE		(PEGASOS2_SRAM_SIZE/4)
+
+#undef BE_VERBOSE
+
+static struct resource mv643xx_eth_shared_resources[] = {
+	[0] = {
+		.name	= "ethernet shared base",
+		.start	= 0xf1000000 + MV643XX_ETH_SHARED_REGS,
+		.end	= 0xf1000000 + MV643XX_ETH_SHARED_REGS +
+					MV643XX_ETH_SHARED_REGS_SIZE - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+static struct platform_device mv643xx_eth_shared_device = {
+	.name		= MV643XX_ETH_SHARED_NAME,
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(mv643xx_eth_shared_resources),
+	.resource	= mv643xx_eth_shared_resources,
+};
+
+/*
+ * The orion mdio driver only covers shared + 0x4 up to shared + 0x84 - 1
+ */
+static struct resource mv643xx_eth_mvmdio_resources[] = {
+	[0] = {
+		.name	= "ethernet mdio base",
+		.start	= 0xf1000000 + MV643XX_ETH_SHARED_REGS + 0x4,
+		.end	= 0xf1000000 + MV643XX_ETH_SHARED_REGS + 0x83,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+static struct platform_device mv643xx_eth_mvmdio_device = {
+	.name		= "orion-mdio",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(mv643xx_eth_mvmdio_resources),
+	.resource	= mv643xx_eth_mvmdio_resources,
+};
+
+static struct resource mv643xx_eth_port1_resources[] = {
+	[0] = {
+		.name	= "eth port1 irq",
+		.start	= 9,
+		.end	= 9,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct mv643xx_eth_platform_data eth_port1_pd = {
+	.shared		= &mv643xx_eth_shared_device,
+	.port_number	= 1,
+	.phy_addr	= MV643XX_ETH_PHY_ADDR(7),
+
+	.tx_sram_addr = PEGASOS2_SRAM_BASE_ETH_PORT1,
+	.tx_sram_size = PEGASOS2_SRAM_TXRING_SIZE,
+	.tx_queue_size = PEGASOS2_SRAM_TXRING_SIZE/16,
+
+	.rx_sram_addr = PEGASOS2_SRAM_BASE_ETH_PORT1 + PEGASOS2_SRAM_TXRING_SIZE,
+	.rx_sram_size = PEGASOS2_SRAM_RXRING_SIZE,
+	.rx_queue_size = PEGASOS2_SRAM_RXRING_SIZE/16,
+};
+
+static struct platform_device eth_port1_device = {
+	.name		= MV643XX_ETH_NAME,
+	.id		= 1,
+	.num_resources	= ARRAY_SIZE(mv643xx_eth_port1_resources),
+	.resource	= mv643xx_eth_port1_resources,
+	.dev = {
+		.platform_data = &eth_port1_pd,
+	},
+};
+
+static struct platform_device *mv643xx_eth_pd_devs[] __initdata = {
+	&mv643xx_eth_shared_device,
+	&mv643xx_eth_mvmdio_device,
+	&eth_port1_device,
+};
+
+/***********/
+/***********/
+#define MV_READ(offset,val) 	{ val = readl(mv643xx_reg_base + offset); }
+#define MV_WRITE(offset,data) writel(data, mv643xx_reg_base + offset)
+
+static void __iomem *mv643xx_reg_base;
+
+static int __init Enable_SRAM(void)
+{
+	u32 ALong;
+
+	if (mv643xx_reg_base == NULL)
+		mv643xx_reg_base = ioremap(PEGASOS2_MARVELL_REGBASE,
+					PEGASOS2_MARVELL_REGSIZE);
+
+	if (mv643xx_reg_base == NULL)
+		return -ENOMEM;
+
+#ifdef BE_VERBOSE
+	printk("Pegasos II/Marvell MV64361: register remapped from %p to %p\n",
+		(void *)PEGASOS2_MARVELL_REGBASE, (void *)mv643xx_reg_base);
+#endif
+
+	MV_WRITE(MV64340_SRAM_CONFIG, 0);
+
+	MV_WRITE(MV64340_INTEGRATED_SRAM_BASE_ADDR, PEGASOS2_SRAM_BASE >> 16);
+
+	MV_READ(MV64340_BASE_ADDR_ENABLE, ALong);
+	ALong &= ~(1 << 19);
+	MV_WRITE(MV64340_BASE_ADDR_ENABLE, ALong);
+
+	ALong = 0x02;
+	ALong |= PEGASOS2_SRAM_BASE & 0xffff0000;
+	MV_WRITE(MV643XX_ETH_BAR_4, ALong);
+
+	MV_WRITE(MV643XX_ETH_SIZE_REG_4, (PEGASOS2_SRAM_SIZE-1) & 0xffff0000);
+
+	MV_READ(MV643XX_ETH_BASE_ADDR_ENABLE_REG, ALong);
+	ALong &= ~(1 << 4);
+	MV_WRITE(MV643XX_ETH_BASE_ADDR_ENABLE_REG, ALong);
+
+#ifdef BE_VERBOSE
+	printk("Pegasos II/Marvell MV64361: register unmapped\n");
+	printk("Pegasos II/Marvell MV64361: SRAM at %p, size=%x\n", (void*) PEGASOS2_SRAM_BASE, PEGASOS2_SRAM_SIZE);
+#endif
+
+	iounmap(mv643xx_reg_base);
+	mv643xx_reg_base = NULL;
+
+	return 1;
+}
+
+
+/***********/
+/***********/
+static int __init mv643xx_eth_add_pds(void)
+{
+	int ret = 0;
+	static struct pci_device_id pci_marvell_mv64360[] = {
+		{ PCI_DEVICE(PCI_VENDOR_ID_MARVELL, PCI_DEVICE_ID_MARVELL_MV64360) },
+		{ }
+	};
+
+#ifdef BE_VERBOSE
+	printk("Pegasos II/Marvell MV64361: init\n");
+#endif
+
+	if (pci_dev_present(pci_marvell_mv64360)) {
+		ret = platform_add_devices(mv643xx_eth_pd_devs,
+				ARRAY_SIZE(mv643xx_eth_pd_devs));
+
+		if ( Enable_SRAM() < 0)
+		{
+			eth_port1_pd.tx_sram_addr = 0;
+			eth_port1_pd.tx_sram_size = 0;
+			eth_port1_pd.rx_sram_addr = 0;
+			eth_port1_pd.rx_sram_size = 0;
+
+#ifdef BE_VERBOSE
+			printk("Pegasos II/Marvell MV64361: Can't enable the "
+				"SRAM\n");
+#endif
+		}
+	}
+
+#ifdef BE_VERBOSE
+	printk("Pegasos II/Marvell MV64361: init is over\n");
+#endif
+
+	return ret;
+}
+
+device_initcall(mv643xx_eth_add_pds);
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
new file mode 100644
index 0000000000..36ee3a5056
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -0,0 +1,586 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Adapted from 'alpha' version by Gary Thomas
+ *  Modified by Cort Dougan (cort@cs.nmt.edu)
+ */
+
+/*
+ * bootup setup stuff..
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/tty.h>
+#include <linux/major.h>
+#include <linux/interrupt.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <generated/utsrelease.h>
+#include <linux/adb.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/console.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/initrd.h>
+#include <linux/timer.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_irq.h>
+
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/hydra.h>
+#include <asm/sections.h>
+#include <asm/time.h>
+#include <asm/i8259.h>
+#include <asm/mpic.h>
+#include <asm/rtas.h>
+#include <asm/xmon.h>
+
+#include "chrp.h"
+#include "gg2.h"
+
+void rtas_indicator_progress(char *, unsigned short);
+
+int _chrp_type;
+EXPORT_SYMBOL(_chrp_type);
+
+static struct mpic *chrp_mpic;
+
+/* Used for doing CHRP event-scans */
+DEFINE_PER_CPU(struct timer_list, heartbeat_timer);
+unsigned long event_scan_interval;
+
+extern unsigned long loops_per_jiffy;
+
+/* To be replaced by RTAS when available */
+static unsigned int __iomem *briq_SPOR;
+
+#ifdef CONFIG_SMP
+extern struct smp_ops_t chrp_smp_ops;
+#endif
+
+static const char *gg2_memtypes[4] = {
+	"FPM", "SDRAM", "EDO", "BEDO"
+};
+static const char *gg2_cachesizes[4] = {
+	"256 KB", "512 KB", "1 MB", "Reserved"
+};
+static const char *gg2_cachetypes[4] = {
+	"Asynchronous", "Reserved", "Flow-Through Synchronous",
+	"Pipelined Synchronous"
+};
+static const char *gg2_cachemodes[4] = {
+	"Disabled", "Write-Through", "Copy-Back", "Transparent Mode"
+};
+
+static const char *chrp_names[] = {
+	"Unknown",
+	"","","",
+	"Motorola",
+	"IBM or Longtrail",
+	"Genesi Pegasos",
+	"Total Impact Briq"
+};
+
+static void chrp_show_cpuinfo(struct seq_file *m)
+{
+	int i, sdramen;
+	unsigned int t;
+	struct device_node *root;
+	const char *model = "";
+
+	root = of_find_node_by_path("/");
+	if (root)
+		model = of_get_property(root, "model", NULL);
+	seq_printf(m, "machine\t\t: CHRP %s\n", model);
+
+	/* longtrail (goldengate) stuff */
+	if (model && !strncmp(model, "IBM,LongTrail", 13)) {
+		/* VLSI VAS96011/12 `Golden Gate 2' */
+		/* Memory banks */
+		sdramen = (in_le32(gg2_pci_config_base + GG2_PCI_DRAM_CTRL)
+			   >>31) & 1;
+		for (i = 0; i < (sdramen ? 4 : 6); i++) {
+			t = in_le32(gg2_pci_config_base+
+						 GG2_PCI_DRAM_BANK0+
+						 i*4);
+			if (!(t & 1))
+				continue;
+			switch ((t>>8) & 0x1f) {
+			case 0x1f:
+				model = "4 MB";
+				break;
+			case 0x1e:
+				model = "8 MB";
+				break;
+			case 0x1c:
+				model = "16 MB";
+				break;
+			case 0x18:
+				model = "32 MB";
+				break;
+			case 0x10:
+				model = "64 MB";
+				break;
+			case 0x00:
+				model = "128 MB";
+				break;
+			default:
+				model = "Reserved";
+				break;
+			}
+			seq_printf(m, "memory bank %d\t: %s %s\n", i, model,
+				   gg2_memtypes[sdramen ? 1 : ((t>>1) & 3)]);
+		}
+		/* L2 cache */
+		t = in_le32(gg2_pci_config_base+GG2_PCI_CC_CTRL);
+		seq_printf(m, "board l2\t: %s %s (%s)\n",
+			   gg2_cachesizes[(t>>7) & 3],
+			   gg2_cachetypes[(t>>2) & 3],
+			   gg2_cachemodes[t & 3]);
+	}
+	of_node_put(root);
+}
+
+/*
+ *  Fixes for the National Semiconductor PC78308VUL SuperI/O
+ *
+ *  Some versions of Open Firmware incorrectly initialize the IRQ settings
+ *  for keyboard and mouse
+ */
+static inline void __init sio_write(u8 val, u8 index)
+{
+	outb(index, 0x15c);
+	outb(val, 0x15d);
+}
+
+static inline u8 __init sio_read(u8 index)
+{
+	outb(index, 0x15c);
+	return inb(0x15d);
+}
+
+static void __init sio_fixup_irq(const char *name, u8 device, u8 level,
+				     u8 type)
+{
+	u8 level0, type0, active;
+
+	/* select logical device */
+	sio_write(device, 0x07);
+	active = sio_read(0x30);
+	level0 = sio_read(0x70);
+	type0 = sio_read(0x71);
+	if (level0 != level || type0 != type || !active) {
+		printk(KERN_WARNING "sio: %s irq level %d, type %d, %sactive: "
+		       "remapping to level %d, type %d, active\n",
+		       name, level0, type0, !active ? "in" : "", level, type);
+		sio_write(0x01, 0x30);
+		sio_write(level, 0x70);
+		sio_write(type, 0x71);
+	}
+}
+
+static void __init sio_init(void)
+{
+	struct device_node *root;
+	const char *model;
+
+	root = of_find_node_by_path("/");
+	if (!root)
+		return;
+
+	model = of_get_property(root, "model", NULL);
+	if (model && !strncmp(model, "IBM,LongTrail", 13)) {
+		/* logical device 0 (KBC/Keyboard) */
+		sio_fixup_irq("keyboard", 0, 1, 2);
+		/* select logical device 1 (KBC/Mouse) */
+		sio_fixup_irq("mouse", 1, 12, 2);
+	}
+
+	of_node_put(root);
+}
+
+
+static void __init pegasos_set_l2cr(void)
+{
+	struct device_node *np;
+
+	/* On Pegasos, enable the l2 cache if needed, as the OF forgets it */
+	if (_chrp_type != _CHRP_Pegasos)
+		return;
+
+	/* Enable L2 cache if needed */
+	np = of_find_node_by_type(NULL, "cpu");
+	if (np != NULL) {
+		const unsigned int *l2cr = of_get_property(np, "l2cr", NULL);
+		if (l2cr == NULL) {
+			printk ("Pegasos l2cr : no cpu l2cr property found\n");
+			goto out;
+		}
+		if (!((*l2cr) & 0x80000000)) {
+			printk ("Pegasos l2cr : L2 cache was not active, "
+				"activating\n");
+			_set_L2CR(0);
+			_set_L2CR((*l2cr) | 0x80000000);
+		}
+	}
+out:
+	of_node_put(np);
+}
+
+static void __noreturn briq_restart(char *cmd)
+{
+	local_irq_disable();
+	if (briq_SPOR)
+		out_be32(briq_SPOR, 0);
+	for(;;);
+}
+
+/*
+ * Per default, input/output-device points to the keyboard/screen
+ * If no card is installed, the built-in serial port is used as a fallback.
+ * But unfortunately, the firmware does not connect /chosen/{stdin,stdout}
+ * to the built-in serial node. Instead, a /failsafe node is created.
+ */
+static __init void chrp_init(void)
+{
+	struct device_node *node;
+	const char *property;
+
+	if (strstr(boot_command_line, "console="))
+		return;
+	/* find the boot console from /chosen/stdout */
+	if (!of_chosen)
+		return;
+	node = of_find_node_by_path("/");
+	if (!node)
+		return;
+	property = of_get_property(node, "model", NULL);
+	if (!property)
+		goto out_put;
+	if (strcmp(property, "Pegasos2"))
+		goto out_put;
+	/* this is a Pegasos2 */
+	property = of_get_property(of_chosen, "linux,stdout-path", NULL);
+	if (!property)
+		goto out_put;
+	of_node_put(node);
+	node = of_find_node_by_path(property);
+	if (!node)
+		return;
+	if (!of_node_is_type(node, "serial"))
+		goto out_put;
+	/*
+	 * The 9pin connector is either /failsafe
+	 * or /pci@80000000/isa@C/serial@i2F8
+	 * The optional graphics card has also type 'serial' in VGA mode.
+	 */
+	if (of_node_name_eq(node, "failsafe") || of_node_name_eq(node, "serial"))
+		add_preferred_console("ttyS", 0, NULL);
+out_put:
+	of_node_put(node);
+}
+
+static void __init chrp_setup_arch(void)
+{
+	struct device_node *root = of_find_node_by_path("/");
+	const char *machine = NULL;
+
+	/* init to some ~sane value until calibrate_delay() runs */
+	loops_per_jiffy = 50000000/HZ;
+
+	if (root)
+		machine = of_get_property(root, "model", NULL);
+	if (machine && strncmp(machine, "Pegasos", 7) == 0) {
+		_chrp_type = _CHRP_Pegasos;
+	} else if (machine && strncmp(machine, "IBM", 3) == 0) {
+		_chrp_type = _CHRP_IBM;
+	} else if (machine && strncmp(machine, "MOT", 3) == 0) {
+		_chrp_type = _CHRP_Motorola;
+	} else if (machine && strncmp(machine, "TotalImpact,BRIQ-1", 18) == 0) {
+		_chrp_type = _CHRP_briq;
+		/* Map the SPOR register on briq and change the restart hook */
+		briq_SPOR = ioremap(0xff0000e8, 4);
+		ppc_md.restart = briq_restart;
+	} else {
+		/* Let's assume it is an IBM chrp if all else fails */
+		_chrp_type = _CHRP_IBM;
+	}
+	of_node_put(root);
+	printk("chrp type = %x [%s]\n", _chrp_type, chrp_names[_chrp_type]);
+
+	rtas_initialize();
+	if (rtas_function_token(RTAS_FN_DISPLAY_CHARACTER) >= 0)
+		ppc_md.progress = rtas_progress;
+
+	/* use RTAS time-of-day routines if available */
+	if (rtas_function_token(RTAS_FN_GET_TIME_OF_DAY) != RTAS_UNKNOWN_SERVICE) {
+		ppc_md.get_boot_time	= rtas_get_boot_time;
+		ppc_md.get_rtc_time	= rtas_get_rtc_time;
+		ppc_md.set_rtc_time	= rtas_set_rtc_time;
+	}
+
+	/* On pegasos, enable the L2 cache if not already done by OF */
+	pegasos_set_l2cr();
+
+	/*
+	 *  Fix the Super I/O configuration
+	 */
+	sio_init();
+
+	/*
+	 * Print the banner, then scroll down so boot progress
+	 * can be printed.  -- Cort
+	 */
+	if (ppc_md.progress) ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0x0);
+}
+
+static void chrp_8259_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int cascade_irq = i8259_irq();
+
+	if (cascade_irq)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+/*
+ * Finds the open-pic node and sets up the mpic driver.
+ */
+static void __init chrp_find_openpic(void)
+{
+	struct device_node *np, *root;
+	int len, i, j;
+	int isu_size;
+	const unsigned int *iranges, *opprop = NULL;
+	int oplen = 0;
+	unsigned long opaddr;
+	int na = 1;
+
+	np = of_find_node_by_type(NULL, "open-pic");
+	if (np == NULL)
+		return;
+	root = of_find_node_by_path("/");
+	if (root) {
+		opprop = of_get_property(root, "platform-open-pic", &oplen);
+		na = of_n_addr_cells(root);
+	}
+	if (opprop && oplen >= na * sizeof(unsigned int)) {
+		opaddr = opprop[na-1];	/* assume 32-bit */
+		oplen /= na * sizeof(unsigned int);
+	} else {
+		struct resource r;
+		if (of_address_to_resource(np, 0, &r)) {
+			goto bail;
+		}
+		opaddr = r.start;
+		oplen = 0;
+	}
+
+	printk(KERN_INFO "OpenPIC at %lx\n", opaddr);
+
+	iranges = of_get_property(np, "interrupt-ranges", &len);
+	if (iranges == NULL)
+		len = 0;	/* non-distributed mpic */
+	else
+		len /= 2 * sizeof(unsigned int);
+
+	/*
+	 * The first pair of cells in interrupt-ranges refers to the
+	 * IDU; subsequent pairs refer to the ISUs.
+	 */
+	if (oplen < len) {
+		printk(KERN_ERR "Insufficient addresses for distributed"
+		       " OpenPIC (%d < %d)\n", oplen, len);
+		len = oplen;
+	}
+
+	isu_size = 0;
+	if (len > 0 && iranges[1] != 0) {
+		printk(KERN_INFO "OpenPIC irqs %d..%d in IDU\n",
+		       iranges[0], iranges[0] + iranges[1] - 1);
+	}
+	if (len > 1)
+		isu_size = iranges[3];
+
+	chrp_mpic = mpic_alloc(np, opaddr, MPIC_NO_RESET,
+			isu_size, 0, " MPIC    ");
+	if (chrp_mpic == NULL) {
+		printk(KERN_ERR "Failed to allocate MPIC structure\n");
+		goto bail;
+	}
+	j = na - 1;
+	for (i = 1; i < len; ++i) {
+		iranges += 2;
+		j += na;
+		printk(KERN_INFO "OpenPIC irqs %d..%d in ISU at %x\n",
+		       iranges[0], iranges[0] + iranges[1] - 1,
+		       opprop[j]);
+		mpic_assign_isu(chrp_mpic, i - 1, opprop[j]);
+	}
+
+	mpic_init(chrp_mpic);
+	ppc_md.get_irq = mpic_get_irq;
+ bail:
+	of_node_put(root);
+	of_node_put(np);
+}
+
+static void __init chrp_find_8259(void)
+{
+	struct device_node *np, *pic = NULL;
+	unsigned long chrp_int_ack = 0;
+	unsigned int cascade_irq;
+
+	/* Look for cascade */
+	for_each_node_by_type(np, "interrupt-controller")
+		if (of_device_is_compatible(np, "chrp,iic")) {
+			pic = np;
+			break;
+		}
+	/* Ok, 8259 wasn't found. We need to handle the case where
+	 * we have a pegasos that claims to be chrp but doesn't have
+	 * a proper interrupt tree
+	 */
+	if (pic == NULL && chrp_mpic != NULL) {
+		printk(KERN_ERR "i8259: Not found in device-tree"
+		       " assuming no legacy interrupts\n");
+		return;
+	}
+
+	/* Look for intack. In a perfect world, we would look for it on
+	 * the ISA bus that holds the 8259 but heh... Works that way. If
+	 * we ever see a problem, we can try to re-use the pSeries code here.
+	 * Also, Pegasos-type platforms don't have a proper node to start
+	 * from anyway
+	 */
+	for_each_node_by_name(np, "pci") {
+		const unsigned int *addrp = of_get_property(np,
+				"8259-interrupt-acknowledge", NULL);
+
+		if (addrp == NULL)
+			continue;
+		chrp_int_ack = addrp[of_n_addr_cells(np)-1];
+		break;
+	}
+	of_node_put(np);
+	if (np == NULL)
+		printk(KERN_WARNING "Cannot find PCI interrupt acknowledge"
+		       " address, polling\n");
+
+	i8259_init(pic, chrp_int_ack);
+	if (ppc_md.get_irq == NULL) {
+		ppc_md.get_irq = i8259_irq;
+		irq_set_default_host(i8259_get_host());
+	}
+	if (chrp_mpic != NULL) {
+		cascade_irq = irq_of_parse_and_map(pic, 0);
+		if (!cascade_irq)
+			printk(KERN_ERR "i8259: failed to map cascade irq\n");
+		else
+			irq_set_chained_handler(cascade_irq,
+						chrp_8259_cascade);
+	}
+}
+
+static void __init chrp_init_IRQ(void)
+{
+#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(CONFIG_XMON)
+	struct device_node *kbd;
+#endif
+	chrp_find_openpic();
+	chrp_find_8259();
+
+#ifdef CONFIG_SMP
+	/* Pegasos has no MPIC, those ops would make it crash. It might be an
+	 * option to move setting them to after we probe the PIC though
+	 */
+	if (chrp_mpic != NULL)
+		smp_ops = &chrp_smp_ops;
+#endif /* CONFIG_SMP */
+
+	if (_chrp_type == _CHRP_Pegasos)
+		ppc_md.get_irq        = i8259_irq;
+
+#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(CONFIG_XMON)
+	/* see if there is a keyboard in the device tree
+	   with a parent of type "adb" */
+	for_each_node_by_name(kbd, "keyboard")
+		if (of_node_is_type(kbd->parent, "adb"))
+			break;
+	of_node_put(kbd);
+	if (kbd) {
+		if (request_irq(HYDRA_INT_ADB_NMI, xmon_irq, 0, "XMON break",
+				NULL))
+			pr_err("Failed to register XMON break interrupt\n");
+	}
+#endif
+}
+
+static void __init
+chrp_init2(void)
+{
+#if IS_ENABLED(CONFIG_NVRAM)
+	chrp_nvram_init();
+#endif
+
+	request_region(0x20,0x20,"pic1");
+	request_region(0xa0,0x20,"pic2");
+	request_region(0x00,0x20,"dma1");
+	request_region(0x40,0x20,"timer");
+	request_region(0x80,0x10,"dma page reg");
+	request_region(0xc0,0x20,"dma2");
+
+	if (ppc_md.progress)
+		ppc_md.progress("  Have fun!    ", 0x7777);
+}
+
+static int __init chrp_probe(void)
+{
+	const char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(),
+						"device_type", NULL);
+ 	if (dtype == NULL)
+ 		return 0;
+ 	if (strcmp(dtype, "chrp"))
+		return 0;
+
+	DMA_MODE_READ = 0x44;
+	DMA_MODE_WRITE = 0x48;
+
+	pm_power_off = rtas_power_off;
+
+	chrp_init();
+
+	return 1;
+}
+
+define_machine(chrp) {
+	.name			= "CHRP",
+	.probe			= chrp_probe,
+	.setup_arch		= chrp_setup_arch,
+	.discover_phbs		= chrp_find_bridges,
+	.init			= chrp_init2,
+	.show_cpuinfo		= chrp_show_cpuinfo,
+	.init_IRQ		= chrp_init_IRQ,
+	.restart		= rtas_restart,
+	.halt			= rtas_halt,
+	.time_init		= chrp_time_init,
+	.set_rtc_time		= chrp_set_rtc_time,
+	.get_rtc_time		= chrp_get_rtc_time,
+	.phys_mem_access_prot	= pci_phys_mem_access_prot,
+};
diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c
new file mode 100644
index 0000000000..ab95155647
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/smp.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Smp support for CHRP machines.
+ *
+ * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great
+ * deal of code from the sparc and intel versions.
+ *
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/pgtable.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/mpic.h>
+#include <asm/rtas.h>
+
+static int smp_chrp_kick_cpu(int nr)
+{
+	*(unsigned long *)KERNELBASE = nr;
+	asm volatile("dcbf 0,%0"::"r"(KERNELBASE):"memory");
+
+	return 0;
+}
+
+static void smp_chrp_setup_cpu(int cpu_nr)
+{
+	mpic_setup_this_cpu();
+}
+
+/* CHRP with openpic */
+struct smp_ops_t chrp_smp_ops = {
+	.cause_nmi_ipi = NULL,
+	.message_pass = smp_mpic_message_pass,
+	.probe = smp_mpic_probe,
+	.kick_cpu = smp_chrp_kick_cpu,
+	.setup_cpu = smp_chrp_setup_cpu,
+	.give_timebase = rtas_give_timebase,
+	.take_timebase = rtas_take_timebase,
+};
diff --git a/arch/powerpc/platforms/chrp/time.c b/arch/powerpc/platforms/chrp/time.c
new file mode 100644
index 0000000000..d46417e3d8
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/time.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ *
+ * Adapted for PowerPC (PReP) by Gary Thomas
+ * Modified by Cort Dougan (cort@cs.nmt.edu).
+ * Copied and modified from arch/i386/kernel/time.c
+ *
+ */
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/init.h>
+#include <linux/bcd.h>
+#include <linux/ioport.h>
+#include <linux/of_address.h>
+
+#include <asm/io.h>
+#include <asm/nvram.h>
+#include <asm/sections.h>
+#include <asm/time.h>
+
+#include <platforms/chrp/chrp.h>
+
+#define NVRAM_AS0  0x74
+#define NVRAM_AS1  0x75
+#define NVRAM_DATA 0x77
+
+static int nvram_as1 = NVRAM_AS1;
+static int nvram_as0 = NVRAM_AS0;
+static int nvram_data = NVRAM_DATA;
+
+long __init chrp_time_init(void)
+{
+	struct device_node *rtcs;
+	struct resource r;
+	int base;
+
+	rtcs = of_find_compatible_node(NULL, "rtc", "pnpPNP,b00");
+	if (rtcs == NULL)
+		rtcs = of_find_compatible_node(NULL, "rtc", "ds1385-rtc");
+	if (rtcs == NULL)
+		return 0;
+	if (of_address_to_resource(rtcs, 0, &r)) {
+		of_node_put(rtcs);
+		return 0;
+	}
+	of_node_put(rtcs);
+
+	base = r.start;
+	nvram_as1 = 0;
+	nvram_as0 = base;
+	nvram_data = base + 1;
+
+	return 0;
+}
+
+static int chrp_cmos_clock_read(int addr)
+{
+	if (nvram_as1 != 0)
+		outb(addr>>8, nvram_as1);
+	outb(addr, nvram_as0);
+	return (inb(nvram_data));
+}
+
+static void chrp_cmos_clock_write(unsigned long val, int addr)
+{
+	if (nvram_as1 != 0)
+		outb(addr>>8, nvram_as1);
+	outb(addr, nvram_as0);
+	outb(val, nvram_data);
+	return;
+}
+
+/*
+ * Set the hardware clock. -- Cort
+ */
+int chrp_set_rtc_time(struct rtc_time *tmarg)
+{
+	unsigned char save_control, save_freq_select;
+	struct rtc_time tm = *tmarg;
+
+	spin_lock(&rtc_lock);
+
+	save_control = chrp_cmos_clock_read(RTC_CONTROL); /* tell the clock it's being set */
+
+	chrp_cmos_clock_write((save_control|RTC_SET), RTC_CONTROL);
+
+	save_freq_select = chrp_cmos_clock_read(RTC_FREQ_SELECT); /* stop and reset prescaler */
+
+	chrp_cmos_clock_write((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
+
+	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+		tm.tm_sec = bin2bcd(tm.tm_sec);
+		tm.tm_min = bin2bcd(tm.tm_min);
+		tm.tm_hour = bin2bcd(tm.tm_hour);
+		tm.tm_mon = bin2bcd(tm.tm_mon);
+		tm.tm_mday = bin2bcd(tm.tm_mday);
+		tm.tm_year = bin2bcd(tm.tm_year);
+	}
+	chrp_cmos_clock_write(tm.tm_sec,RTC_SECONDS);
+	chrp_cmos_clock_write(tm.tm_min,RTC_MINUTES);
+	chrp_cmos_clock_write(tm.tm_hour,RTC_HOURS);
+	chrp_cmos_clock_write(tm.tm_mon,RTC_MONTH);
+	chrp_cmos_clock_write(tm.tm_mday,RTC_DAY_OF_MONTH);
+	chrp_cmos_clock_write(tm.tm_year,RTC_YEAR);
+
+	/* The following flags have to be released exactly in this order,
+	 * otherwise the DS12887 (popular MC146818A clone with integrated
+	 * battery and quartz) will not reset the oscillator and will not
+	 * update precisely 500 ms later. You won't find this mentioned in
+	 * the Dallas Semiconductor data sheets, but who believes data
+	 * sheets anyway ...                           -- Markus Kuhn
+	 */
+	chrp_cmos_clock_write(save_control, RTC_CONTROL);
+	chrp_cmos_clock_write(save_freq_select, RTC_FREQ_SELECT);
+
+	spin_unlock(&rtc_lock);
+	return 0;
+}
+
+void chrp_get_rtc_time(struct rtc_time *tm)
+{
+	unsigned int year, mon, day, hour, min, sec;
+
+	do {
+		sec = chrp_cmos_clock_read(RTC_SECONDS);
+		min = chrp_cmos_clock_read(RTC_MINUTES);
+		hour = chrp_cmos_clock_read(RTC_HOURS);
+		day = chrp_cmos_clock_read(RTC_DAY_OF_MONTH);
+		mon = chrp_cmos_clock_read(RTC_MONTH);
+		year = chrp_cmos_clock_read(RTC_YEAR);
+	} while (sec != chrp_cmos_clock_read(RTC_SECONDS));
+
+	if (!(chrp_cmos_clock_read(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+		sec = bcd2bin(sec);
+		min = bcd2bin(min);
+		hour = bcd2bin(hour);
+		day = bcd2bin(day);
+		mon = bcd2bin(mon);
+		year = bcd2bin(year);
+	}
+	if (year < 70)
+		year += 100;
+	tm->tm_sec = sec;
+	tm->tm_min = min;
+	tm->tm_hour = hour;
+	tm->tm_mday = day;
+	tm->tm_mon = mon;
+	tm->tm_year = year;
+}
diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig
new file mode 100644
index 0000000000..c6adff216f
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/Kconfig
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: GPL-2.0
+config EMBEDDED6xx
+	bool "Embedded 6xx/7xx/7xxx-based boards"
+	depends on PPC_BOOK3S_32 && BROKEN_ON_SMP
+
+config LINKSTATION
+	bool "Linkstation / Kurobox(HG) from Buffalo"
+	depends on EMBEDDED6xx
+	select MPIC
+	select FSL_SOC
+	select PPC_UDBG_16550 if SERIAL_8250
+	select DEFAULT_UIMAGE
+	imply MPC10X_BRIDGE if PCI
+	help
+	  Select LINKSTATION if configuring for one of PPC- (MPC8241)
+	  based NAS systems from Buffalo Technology. So far only
+	  KuroboxHG has been tested. In the future classical Kurobox,
+	  Linkstation-I HD-HLAN and HD-HGLAN versions, and PPC-based
+	  Terastation systems should be supported too.
+
+config STORCENTER
+	bool "IOMEGA StorCenter"
+	depends on EMBEDDED6xx
+	select MPIC
+	select FSL_SOC
+	select PPC_UDBG_16550 if SERIAL_8250
+	imply MPC10X_BRIDGE if PCI
+	help
+	  Select STORCENTER if configuring for the iomega StorCenter
+	  with an 8241 CPU in it.
+
+config PPC_HOLLY
+	bool "PPC750GX/CL with TSI10x bridge (Hickory/Holly)"
+	depends on EMBEDDED6xx
+	select TSI108_BRIDGE
+	select PPC_UDBG_16550
+	help
+	  Select PPC_HOLLY if configuring for an IBM 750GX/CL Eval
+	  Board with TSI108/9 bridge (Hickory/Holly)
+
+config MVME5100
+	bool "Motorola/Emerson MVME5100"
+	depends on EMBEDDED6xx
+	select MPIC
+	select FORCE_PCI
+	select PPC_INDIRECT_PCI
+	select PPC_I8259
+	select PPC_HASH_MMU_NATIVE
+	select PPC_UDBG_16550
+	help
+	  This option enables support for the Motorola (now Emerson) MVME5100
+	  board.
+
+config TSI108_BRIDGE
+	bool
+	select FORCE_PCI
+	select MPIC
+	select MPIC_WEIRD
+
+config MPC10X_BRIDGE
+	bool
+	select PPC_INDIRECT_PCI
+
+config GAMECUBE_COMMON
+	bool
+
+config USBGECKO_UDBG
+	bool "USB Gecko udbg console for the Nintendo GameCube/Wii"
+	depends on GAMECUBE_COMMON
+	help
+	  If you say yes to this option, support will be included for the
+	  USB Gecko adapter as an udbg console.
+	  The USB Gecko is a EXI to USB Serial converter that can be plugged
+	  into a memcard slot in the Nintendo GameCube/Wii.
+
+	  This driver bypasses the EXI layer completely.
+
+	  If in doubt, say N here.
+
+config GAMECUBE
+	bool "Nintendo-GameCube"
+	depends on EMBEDDED6xx
+	select GAMECUBE_COMMON
+	help
+	  Select GAMECUBE if configuring for the Nintendo GameCube.
+	  More information at: <http://gc-linux.sourceforge.net/>
+
+config WII
+	bool "Nintendo-Wii"
+	depends on EMBEDDED6xx
+	select GAMECUBE_COMMON
+	help
+	  Select WII if configuring for the Nintendo Wii.
+	  More information at: <http://gc-linux.sourceforge.net/>
diff --git a/arch/powerpc/platforms/embedded6xx/Makefile b/arch/powerpc/platforms/embedded6xx/Makefile
new file mode 100644
index 0000000000..7f2a8154e5
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the 6xx/7xx/7xxxx linux kernel.
+#
+obj-$(CONFIG_LINKSTATION)	+= linkstation.o ls_uart.o
+obj-$(CONFIG_STORCENTER)	+= storcenter.o
+obj-$(CONFIG_PPC_HOLLY)		+= holly.o
+obj-$(CONFIG_USBGECKO_UDBG)	+= usbgecko_udbg.o
+obj-$(CONFIG_GAMECUBE_COMMON)	+= flipper-pic.o
+obj-$(CONFIG_GAMECUBE)		+= gamecube.o
+obj-$(CONFIG_WII)		+= wii.o hlwd-pic.o
+obj-$(CONFIG_MVME5100)		+= mvme5100.o
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
new file mode 100644
index 0000000000..4d9200bdba
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/embedded6xx/flipper-pic.c
+ *
+ * Nintendo GameCube/Wii "Flipper" interrupt controller support.
+ * Copyright (C) 2004-2009 The GameCube Linux Team
+ * Copyright (C) 2007,2008,2009 Albert Herranz
+ */
+#define DRV_MODULE_NAME "flipper-pic"
+#define pr_fmt(fmt) DRV_MODULE_NAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <asm/io.h>
+
+#include "flipper-pic.h"
+
+#define FLIPPER_NR_IRQS		32
+
+/*
+ * Each interrupt has a corresponding bit in both
+ * the Interrupt Cause (ICR) and Interrupt Mask (IMR) registers.
+ *
+ * Enabling/disabling an interrupt line involves setting/clearing
+ * the corresponding bit in IMR.
+ * Except for the RSW interrupt, all interrupts get deasserted automatically
+ * when the source deasserts the interrupt.
+ */
+#define FLIPPER_ICR		0x00
+#define FLIPPER_ICR_RSS		(1<<16) /* reset switch state */
+
+#define FLIPPER_IMR		0x04
+
+#define FLIPPER_RESET		0x24
+
+
+/*
+ * IRQ chip hooks.
+ *
+ */
+
+static void flipper_pic_mask_and_ack(struct irq_data *d)
+{
+	int irq = irqd_to_hwirq(d);
+	void __iomem *io_base = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << irq;
+
+	clrbits32(io_base + FLIPPER_IMR, mask);
+	/* this is at least needed for RSW */
+	out_be32(io_base + FLIPPER_ICR, mask);
+}
+
+static void flipper_pic_ack(struct irq_data *d)
+{
+	int irq = irqd_to_hwirq(d);
+	void __iomem *io_base = irq_data_get_irq_chip_data(d);
+
+	/* this is at least needed for RSW */
+	out_be32(io_base + FLIPPER_ICR, 1 << irq);
+}
+
+static void flipper_pic_mask(struct irq_data *d)
+{
+	int irq = irqd_to_hwirq(d);
+	void __iomem *io_base = irq_data_get_irq_chip_data(d);
+
+	clrbits32(io_base + FLIPPER_IMR, 1 << irq);
+}
+
+static void flipper_pic_unmask(struct irq_data *d)
+{
+	int irq = irqd_to_hwirq(d);
+	void __iomem *io_base = irq_data_get_irq_chip_data(d);
+
+	setbits32(io_base + FLIPPER_IMR, 1 << irq);
+}
+
+
+static struct irq_chip flipper_pic = {
+	.name		= "flipper-pic",
+	.irq_ack	= flipper_pic_ack,
+	.irq_mask_ack	= flipper_pic_mask_and_ack,
+	.irq_mask	= flipper_pic_mask,
+	.irq_unmask	= flipper_pic_unmask,
+};
+
+/*
+ * IRQ host hooks.
+ *
+ */
+
+static struct irq_domain *flipper_irq_host;
+
+static int flipper_pic_map(struct irq_domain *h, unsigned int virq,
+			   irq_hw_number_t hwirq)
+{
+	irq_set_chip_data(virq, h->host_data);
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &flipper_pic, handle_level_irq);
+	return 0;
+}
+
+static const struct irq_domain_ops flipper_irq_domain_ops = {
+	.map = flipper_pic_map,
+};
+
+/*
+ * Platform hooks.
+ *
+ */
+
+static void __flipper_quiesce(void __iomem *io_base)
+{
+	/* mask and ack all IRQs */
+	out_be32(io_base + FLIPPER_IMR, 0x00000000);
+	out_be32(io_base + FLIPPER_ICR, 0xffffffff);
+}
+
+static struct irq_domain * __init flipper_pic_init(struct device_node *np)
+{
+	struct device_node *pi;
+	struct irq_domain *irq_domain = NULL;
+	struct resource res;
+	void __iomem *io_base;
+	int retval;
+
+	pi = of_get_parent(np);
+	if (!pi) {
+		pr_err("no parent found\n");
+		goto out;
+	}
+	if (!of_device_is_compatible(pi, "nintendo,flipper-pi")) {
+		pr_err("unexpected parent compatible\n");
+		goto out;
+	}
+
+	retval = of_address_to_resource(pi, 0, &res);
+	if (retval) {
+		pr_err("no io memory range found\n");
+		goto out;
+	}
+	io_base = ioremap(res.start, resource_size(&res));
+
+	pr_info("controller at 0x%pa mapped to 0x%p\n", &res.start, io_base);
+
+	__flipper_quiesce(io_base);
+
+	irq_domain = irq_domain_add_linear(np, FLIPPER_NR_IRQS,
+				  &flipper_irq_domain_ops, io_base);
+	if (!irq_domain) {
+		pr_err("failed to allocate irq_domain\n");
+		return NULL;
+	}
+
+out:
+	return irq_domain;
+}
+
+unsigned int flipper_pic_get_irq(void)
+{
+	void __iomem *io_base = flipper_irq_host->host_data;
+	int irq;
+	u32 irq_status;
+
+	irq_status = in_be32(io_base + FLIPPER_ICR) &
+		     in_be32(io_base + FLIPPER_IMR);
+	if (irq_status == 0)
+		return 0;	/* no more IRQs pending */
+
+	irq = __ffs(irq_status);
+	return irq_linear_revmap(flipper_irq_host, irq);
+}
+
+/*
+ * Probe function.
+ *
+ */
+
+void __init flipper_pic_probe(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "nintendo,flipper-pic");
+	BUG_ON(!np);
+
+	flipper_irq_host = flipper_pic_init(np);
+	BUG_ON(!flipper_irq_host);
+
+	irq_set_default_host(flipper_irq_host);
+
+	of_node_put(np);
+}
+
+/*
+ * Misc functions related to the flipper chipset.
+ *
+ */
+
+/**
+ * flipper_quiesce() - quiesce flipper irq controller
+ *
+ * Mask and ack all interrupt sources.
+ *
+ */
+void flipper_quiesce(void)
+{
+	void __iomem *io_base = flipper_irq_host->host_data;
+
+	__flipper_quiesce(io_base);
+}
+
+/*
+ * Resets the platform.
+ */
+void flipper_platform_reset(void)
+{
+	void __iomem *io_base;
+
+	if (flipper_irq_host && flipper_irq_host->host_data) {
+		io_base = flipper_irq_host->host_data;
+		out_8(io_base + FLIPPER_RESET, 0x00);
+	}
+}
+
+/*
+ * Returns non-zero if the reset button is pressed.
+ */
+int flipper_is_reset_button_pressed(void)
+{
+	void __iomem *io_base;
+	u32 icr;
+
+	if (flipper_irq_host && flipper_irq_host->host_data) {
+		io_base = flipper_irq_host->host_data;
+		icr = in_be32(io_base + FLIPPER_ICR);
+		return !(icr & FLIPPER_ICR_RSS);
+	}
+	return 0;
+}
+
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.h b/arch/powerpc/platforms/embedded6xx/flipper-pic.h
new file mode 100644
index 0000000000..024ae70baa
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * arch/powerpc/platforms/embedded6xx/flipper-pic.h
+ *
+ * Nintendo GameCube/Wii "Flipper" interrupt controller support.
+ * Copyright (C) 2004-2009 The GameCube Linux Team
+ * Copyright (C) 2007,2008,2009 Albert Herranz
+ */
+
+#ifndef __FLIPPER_PIC_H
+#define __FLIPPER_PIC_H
+
+unsigned int flipper_pic_get_irq(void);
+void __init flipper_pic_probe(void);
+
+void flipper_quiesce(void);
+void flipper_platform_reset(void);
+int flipper_is_reset_button_pressed(void);
+
+#endif
diff --git a/arch/powerpc/platforms/embedded6xx/gamecube.c b/arch/powerpc/platforms/embedded6xx/gamecube.c
new file mode 100644
index 0000000000..e3b2c74647
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/gamecube.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/embedded6xx/gamecube.c
+ *
+ * Nintendo GameCube board-specific support
+ * Copyright (C) 2004-2009 The GameCube Linux Team
+ * Copyright (C) 2007,2008,2009 Albert Herranz
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/kexec.h>
+#include <linux/seq_file.h>
+#include <linux/of_platform.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+
+#include "flipper-pic.h"
+#include "usbgecko_udbg.h"
+
+
+static void __noreturn gamecube_spin(void)
+{
+	/* spin until power button pressed */
+	for (;;)
+		cpu_relax();
+}
+
+static void __noreturn gamecube_restart(char *cmd)
+{
+	local_irq_disable();
+	flipper_platform_reset();
+	gamecube_spin();
+}
+
+static void gamecube_power_off(void)
+{
+	local_irq_disable();
+	gamecube_spin();
+}
+
+static void __noreturn gamecube_halt(void)
+{
+	gamecube_restart(NULL);
+}
+
+static int __init gamecube_probe(void)
+{
+	pm_power_off = gamecube_power_off;
+
+	ug_udbg_init();
+
+	return 1;
+}
+
+static void gamecube_shutdown(void)
+{
+	flipper_quiesce();
+}
+
+define_machine(gamecube) {
+	.name			= "gamecube",
+	.compatible		= "nintendo,gamecube",
+	.probe			= gamecube_probe,
+	.restart		= gamecube_restart,
+	.halt			= gamecube_halt,
+	.init_IRQ		= flipper_pic_probe,
+	.get_irq		= flipper_pic_get_irq,
+	.progress		= udbg_progress,
+	.machine_shutdown	= gamecube_shutdown,
+};
+
+
+static const struct of_device_id gamecube_of_bus[] = {
+	{ .compatible = "nintendo,flipper", },
+	{ },
+};
+
+static int __init gamecube_device_probe(void)
+{
+	of_platform_bus_probe(NULL, gamecube_of_bus, NULL);
+	return 0;
+}
+machine_device_initcall(gamecube, gamecube_device_probe);
+
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
new file mode 100644
index 0000000000..4d2d92de30
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+ *
+ * Nintendo Wii "Hollywood" interrupt controller support.
+ * Copyright (C) 2009 The GameCube Linux Team
+ * Copyright (C) 2009 Albert Herranz
+ */
+#define DRV_MODULE_NAME "hlwd-pic"
+#define pr_fmt(fmt) DRV_MODULE_NAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <asm/io.h>
+
+#include "hlwd-pic.h"
+
+#define HLWD_NR_IRQS	32
+
+/*
+ * Each interrupt has a corresponding bit in both
+ * the Interrupt Cause (ICR) and Interrupt Mask (IMR) registers.
+ *
+ * Enabling/disabling an interrupt line involves asserting/clearing
+ * the corresponding bit in IMR. ACK'ing a request simply involves
+ * asserting the corresponding bit in ICR.
+ */
+#define HW_BROADWAY_ICR		0x00
+#define HW_BROADWAY_IMR		0x04
+#define HW_STARLET_ICR		0x08
+#define HW_STARLET_IMR		0x0c
+
+
+/*
+ * IRQ chip hooks.
+ *
+ */
+
+static void hlwd_pic_mask_and_ack(struct irq_data *d)
+{
+	int irq = irqd_to_hwirq(d);
+	void __iomem *io_base = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << irq;
+
+	clrbits32(io_base + HW_BROADWAY_IMR, mask);
+	out_be32(io_base + HW_BROADWAY_ICR, mask);
+}
+
+static void hlwd_pic_ack(struct irq_data *d)
+{
+	int irq = irqd_to_hwirq(d);
+	void __iomem *io_base = irq_data_get_irq_chip_data(d);
+
+	out_be32(io_base + HW_BROADWAY_ICR, 1 << irq);
+}
+
+static void hlwd_pic_mask(struct irq_data *d)
+{
+	int irq = irqd_to_hwirq(d);
+	void __iomem *io_base = irq_data_get_irq_chip_data(d);
+
+	clrbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
+}
+
+static void hlwd_pic_unmask(struct irq_data *d)
+{
+	int irq = irqd_to_hwirq(d);
+	void __iomem *io_base = irq_data_get_irq_chip_data(d);
+
+	setbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
+
+	/* Make sure the ARM (aka. Starlet) doesn't handle this interrupt. */
+	clrbits32(io_base + HW_STARLET_IMR, 1 << irq);
+}
+
+
+static struct irq_chip hlwd_pic = {
+	.name		= "hlwd-pic",
+	.irq_ack	= hlwd_pic_ack,
+	.irq_mask_ack	= hlwd_pic_mask_and_ack,
+	.irq_mask	= hlwd_pic_mask,
+	.irq_unmask	= hlwd_pic_unmask,
+};
+
+/*
+ * IRQ host hooks.
+ *
+ */
+
+static struct irq_domain *hlwd_irq_host;
+
+static int hlwd_pic_map(struct irq_domain *h, unsigned int virq,
+			   irq_hw_number_t hwirq)
+{
+	irq_set_chip_data(virq, h->host_data);
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &hlwd_pic, handle_level_irq);
+	return 0;
+}
+
+static const struct irq_domain_ops hlwd_irq_domain_ops = {
+	.map = hlwd_pic_map,
+};
+
+static unsigned int __hlwd_pic_get_irq(struct irq_domain *h)
+{
+	void __iomem *io_base = h->host_data;
+	u32 irq_status;
+
+	irq_status = in_be32(io_base + HW_BROADWAY_ICR) &
+		     in_be32(io_base + HW_BROADWAY_IMR);
+	if (irq_status == 0)
+		return 0;	/* no more IRQs pending */
+
+	return __ffs(irq_status);
+}
+
+static void hlwd_pic_irq_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct irq_domain *irq_domain = irq_desc_get_handler_data(desc);
+	unsigned int hwirq;
+
+	raw_spin_lock(&desc->lock);
+	chip->irq_mask(&desc->irq_data); /* IRQ_LEVEL */
+	raw_spin_unlock(&desc->lock);
+
+	hwirq = __hlwd_pic_get_irq(irq_domain);
+	if (hwirq)
+		generic_handle_domain_irq(irq_domain, hwirq);
+	else
+		pr_err("spurious interrupt!\n");
+
+	raw_spin_lock(&desc->lock);
+	chip->irq_ack(&desc->irq_data); /* IRQ_LEVEL */
+	if (!irqd_irq_disabled(&desc->irq_data) && chip->irq_unmask)
+		chip->irq_unmask(&desc->irq_data);
+	raw_spin_unlock(&desc->lock);
+}
+
+/*
+ * Platform hooks.
+ *
+ */
+
+static void __hlwd_quiesce(void __iomem *io_base)
+{
+	/* mask and ack all IRQs */
+	out_be32(io_base + HW_BROADWAY_IMR, 0);
+	out_be32(io_base + HW_BROADWAY_ICR, 0xffffffff);
+}
+
+static struct irq_domain *__init hlwd_pic_init(struct device_node *np)
+{
+	struct irq_domain *irq_domain;
+	struct resource res;
+	void __iomem *io_base;
+	int retval;
+
+	retval = of_address_to_resource(np, 0, &res);
+	if (retval) {
+		pr_err("no io memory range found\n");
+		return NULL;
+	}
+	io_base = ioremap(res.start, resource_size(&res));
+	if (!io_base) {
+		pr_err("ioremap failed\n");
+		return NULL;
+	}
+
+	pr_info("controller at 0x%pa mapped to 0x%p\n", &res.start, io_base);
+
+	__hlwd_quiesce(io_base);
+
+	irq_domain = irq_domain_add_linear(np, HLWD_NR_IRQS,
+					   &hlwd_irq_domain_ops, io_base);
+	if (!irq_domain) {
+		pr_err("failed to allocate irq_domain\n");
+		iounmap(io_base);
+		return NULL;
+	}
+
+	return irq_domain;
+}
+
+unsigned int hlwd_pic_get_irq(void)
+{
+	unsigned int hwirq = __hlwd_pic_get_irq(hlwd_irq_host);
+	return hwirq ? irq_linear_revmap(hlwd_irq_host, hwirq) : 0;
+}
+
+/*
+ * Probe function.
+ *
+ */
+
+void __init hlwd_pic_probe(void)
+{
+	struct irq_domain *host;
+	struct device_node *np;
+	const u32 *interrupts;
+	int cascade_virq;
+
+	for_each_compatible_node(np, NULL, "nintendo,hollywood-pic") {
+		interrupts = of_get_property(np, "interrupts", NULL);
+		if (interrupts) {
+			host = hlwd_pic_init(np);
+			BUG_ON(!host);
+			cascade_virq = irq_of_parse_and_map(np, 0);
+			irq_set_handler_data(cascade_virq, host);
+			irq_set_chained_handler(cascade_virq,
+						hlwd_pic_irq_cascade);
+			hlwd_irq_host = host;
+			of_node_put(np);
+			break;
+		}
+	}
+}
+
+/**
+ * hlwd_quiesce() - quiesce hollywood irq controller
+ *
+ * Mask and ack all interrupt sources.
+ *
+ */
+void hlwd_quiesce(void)
+{
+	void __iomem *io_base = hlwd_irq_host->host_data;
+
+	__hlwd_quiesce(io_base);
+}
+
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.h b/arch/powerpc/platforms/embedded6xx/hlwd-pic.h
new file mode 100644
index 0000000000..c2fa42e191
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * arch/powerpc/platforms/embedded6xx/hlwd-pic.h
+ *
+ * Nintendo Wii "Hollywood" interrupt controller support.
+ * Copyright (C) 2009 The GameCube Linux Team
+ * Copyright (C) 2009 Albert Herranz
+ */
+
+#ifndef __HLWD_PIC_H
+#define __HLWD_PIC_H
+
+extern unsigned int hlwd_pic_get_irq(void);
+void __init hlwd_pic_probe(void);
+extern void hlwd_quiesce(void);
+
+#endif
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
new file mode 100644
index 0000000000..ce9e58ee97
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Board setup routines for the IBM 750GX/CL platform w/ TSI10x bridge
+ *
+ * Copyright 2007 IBM Corporation
+ *
+ * Stephen Winiecki <stevewin@us.ibm.com>
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ *
+ * Based on code from mpc7448_hpc2.c
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/serial.h>
+#include <linux/tty.h>
+#include <linux/serial_core.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/extable.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/tsi108.h>
+#include <asm/pci-bridge.h>
+#include <asm/reg.h>
+#include <mm/mmu_decl.h>
+#include <asm/tsi108_irq.h>
+#include <asm/tsi108_pci.h>
+#include <asm/mpic.h>
+
+#undef DEBUG
+
+#define HOLLY_PCI_CFG_PHYS 0x7c000000
+
+static int holly_exclude_device(struct pci_controller *hose, u_char bus,
+				u_char devfn)
+{
+	if (bus == 0 && PCI_SLOT(devfn) == 0)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	else
+		return PCIBIOS_SUCCESSFUL;
+}
+
+static void __init holly_remap_bridge(void)
+{
+	u32 lut_val, lut_addr;
+	int i;
+
+	printk(KERN_INFO "Remapping PCI bridge\n");
+
+	/* Re-init the PCI bridge and LUT registers to have mappings that don't
+	 * rely on PIBS
+	 */
+	lut_addr = 0x900;
+	for (i = 0; i < 31; i++) {
+		tsi108_write_reg(TSI108_PB_OFFSET + lut_addr, 0x00000201);
+		lut_addr += 4;
+		tsi108_write_reg(TSI108_PB_OFFSET + lut_addr, 0x0);
+		lut_addr += 4;
+	}
+
+	/* Reserve the last LUT entry for PCI I/O space */
+	tsi108_write_reg(TSI108_PB_OFFSET + lut_addr, 0x00000241);
+	lut_addr += 4;
+	tsi108_write_reg(TSI108_PB_OFFSET + lut_addr, 0x0);
+
+	/* Map PCI I/O space */
+	tsi108_write_reg(TSI108_PCI_PFAB_IO_UPPER, 0x0);
+	tsi108_write_reg(TSI108_PCI_PFAB_IO, 0x1);
+
+	/* Map PCI CFG space */
+	tsi108_write_reg(TSI108_PCI_PFAB_BAR0_UPPER, 0x0);
+	tsi108_write_reg(TSI108_PCI_PFAB_BAR0, 0x7c000000 | 0x01);
+
+	/* We don't need MEM32 and PRM remapping so disable them */
+	tsi108_write_reg(TSI108_PCI_PFAB_MEM32, 0x0);
+	tsi108_write_reg(TSI108_PCI_PFAB_PFM3, 0x0);
+	tsi108_write_reg(TSI108_PCI_PFAB_PFM4, 0x0);
+
+	/* Set P2O_BAR0 */
+	tsi108_write_reg(TSI108_PCI_P2O_BAR0_UPPER, 0x0);
+	tsi108_write_reg(TSI108_PCI_P2O_BAR0, 0xc0000000);
+
+	/* Init the PCI LUTs to do no remapping */
+	lut_addr = 0x500;
+	lut_val = 0x00000002;
+
+	for (i = 0; i < 32; i++) {
+		tsi108_write_reg(TSI108_PCI_OFFSET + lut_addr, lut_val);
+		lut_addr += 4;
+		tsi108_write_reg(TSI108_PCI_OFFSET + lut_addr, 0x40000000);
+		lut_addr += 4;
+		lut_val += 0x02000000;
+	}
+	tsi108_write_reg(TSI108_PCI_P2O_PAGE_SIZES, 0x00007900);
+
+	/* Set 64-bit PCI bus address for system memory */
+	tsi108_write_reg(TSI108_PCI_P2O_BAR2_UPPER, 0x0);
+	tsi108_write_reg(TSI108_PCI_P2O_BAR2, 0x0);
+}
+
+static void __init holly_init_pci(void)
+{
+	struct device_node *np;
+
+	if (ppc_md.progress)
+		ppc_md.progress("holly_setup_arch():set_bridge", 0);
+
+	/* setup PCI host bridge */
+	holly_remap_bridge();
+
+	np = of_find_node_by_type(NULL, "pci");
+	if (np)
+		tsi108_setup_pci(np, HOLLY_PCI_CFG_PHYS, 1);
+
+	of_node_put(np);
+
+	ppc_md.pci_exclude_device = holly_exclude_device;
+	if (ppc_md.progress)
+		ppc_md.progress("tsi108: resources set", 0x100);
+}
+
+static void __init holly_setup_arch(void)
+{
+	tsi108_csr_vir_base = get_vir_csrbase();
+
+	printk(KERN_INFO "PPC750GX/CL Platform\n");
+}
+
+/*
+ * Interrupt setup and service.  Interrupts on the holly come
+ * from the four external INT pins, PCI interrupts are routed via
+ * PCI interrupt control registers, it generates internal IRQ23
+ *
+ * Interrupt routing on the Holly Board:
+ * TSI108:PB_INT[0] -> CPU0:INT#
+ * TSI108:PB_INT[1] -> CPU0:MCP#
+ * TSI108:PB_INT[2] -> N/C
+ * TSI108:PB_INT[3] -> N/C
+ */
+static void __init holly_init_IRQ(void)
+{
+	struct mpic *mpic;
+#ifdef CONFIG_PCI
+	unsigned int cascade_pci_irq;
+	struct device_node *tsi_pci;
+	struct device_node *cascade_node = NULL;
+#endif
+
+	mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+			MPIC_SPV_EOI | MPIC_NO_PTHROU_DIS | MPIC_REGSET_TSI108,
+			24, 0,
+			"Tsi108_PIC");
+
+	BUG_ON(mpic == NULL);
+
+	mpic_assign_isu(mpic, 0, mpic->paddr + 0x100);
+
+	mpic_init(mpic);
+
+#ifdef CONFIG_PCI
+	tsi_pci = of_find_node_by_type(NULL, "pci");
+	if (tsi_pci == NULL) {
+		printk(KERN_ERR "%s: No tsi108 pci node found !\n", __func__);
+		return;
+	}
+
+	cascade_node = of_find_node_by_type(NULL, "pic-router");
+	if (cascade_node == NULL) {
+		printk(KERN_ERR "%s: No tsi108 pci cascade node found !\n", __func__);
+		return;
+	}
+
+	cascade_pci_irq = irq_of_parse_and_map(tsi_pci, 0);
+	pr_debug("%s: tsi108 cascade_pci_irq = 0x%x\n", __func__, (u32) cascade_pci_irq);
+	tsi108_pci_int_init(cascade_node);
+	irq_set_handler_data(cascade_pci_irq, mpic);
+	irq_set_chained_handler(cascade_pci_irq, tsi108_irq_cascade);
+
+	of_node_put(tsi_pci);
+	of_node_put(cascade_node);
+#endif
+	/* Configure MPIC outputs to CPU0 */
+	tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0);
+}
+
+static void holly_show_cpuinfo(struct seq_file *m)
+{
+	seq_printf(m, "vendor\t\t: IBM\n");
+	seq_printf(m, "machine\t\t: PPC750 GX/CL\n");
+}
+
+static void __noreturn holly_restart(char *cmd)
+{
+	__be32 __iomem *ocn_bar1 = NULL;
+	unsigned long bar;
+	struct device_node *bridge = NULL;
+	struct resource res;
+	phys_addr_t addr = 0xc0000000;
+
+	local_irq_disable();
+
+	bridge = of_find_node_by_type(NULL, "tsi-bridge");
+	if (bridge) {
+		of_address_to_resource(bridge, 0, &res);
+		addr = res.start;
+		of_node_put(bridge);
+	}
+	addr += (TSI108_PB_OFFSET + 0x414);
+
+	ocn_bar1 = ioremap(addr, 0x4);
+
+	/* Turn on the BOOT bit so the addresses are correctly
+	 * routed to the HLP interface */
+	bar = ioread32be(ocn_bar1);
+	bar |= 2;
+	iowrite32be(bar, ocn_bar1);
+	iosync();
+
+	/* Set SRR0 to the reset vector and turn on MSR_IP */
+	mtspr(SPRN_SRR0, 0xfff00100);
+	mtspr(SPRN_SRR1, MSR_IP);
+
+	/* Do an rfi to jump back to firmware.  Somewhat evil,
+	 * but it works
+	 */
+	__asm__ __volatile__("rfi" : : : "memory");
+
+	/* Spin until reset happens.  Shouldn't really get here */
+	for (;;) ;
+}
+
+static int ppc750_machine_check_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *entry;
+
+	/* Are we prepared to handle this fault */
+	if ((entry = search_exception_tables(regs->nip)) != NULL) {
+		tsi108_clear_pci_cfg_error();
+		regs_set_recoverable(regs);
+		regs_set_return_ip(regs, extable_fixup(entry));
+		return 1;
+	}
+	return 0;
+}
+
+define_machine(holly){
+	.name                   	= "PPC750 GX/CL TSI",
+	.compatible			= "ibm,holly",
+	.setup_arch             	= holly_setup_arch,
+	.discover_phbs			= holly_init_pci,
+	.init_IRQ               	= holly_init_IRQ,
+	.show_cpuinfo           	= holly_show_cpuinfo,
+	.get_irq                	= mpic_get_irq,
+	.restart                	= holly_restart,
+	.machine_check_exception	= ppc750_machine_check_exception,
+	.progress               	= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c
new file mode 100644
index 0000000000..9c10aac40c
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/linkstation.c
@@ -0,0 +1,162 @@
+/*
+ * Board setup routines for the Buffalo Linkstation / Kurobox Platform.
+ *
+ * Copyright (C) 2006 G. Liakhovetski (g.liakhovetski@gmx.de)
+ *
+ * Based on sandpoint.c by Mark A. Greer
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of
+ * any kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/initrd.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/mpic.h>
+#include <asm/pci-bridge.h>
+
+#include "mpc10x.h"
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .type = "soc", },
+	{ .compatible = "simple-bus", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+	return 0;
+}
+machine_device_initcall(linkstation, declare_of_platform_devices);
+
+static int __init linkstation_add_bridge(struct device_node *dev)
+{
+#ifdef CONFIG_PCI
+	int len;
+	struct pci_controller *hose;
+	const int *bus_range;
+
+	printk("Adding PCI host bridge %pOF\n", dev);
+
+	bus_range = of_get_property(dev, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int))
+		printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+				" bus 0\n", dev);
+
+	hose = pcibios_alloc_controller(dev);
+	if (hose == NULL)
+		return -ENOMEM;
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+	setup_indirect_pci(hose, 0xfec00000, 0xfee00000, 0);
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(hose, dev, 1);
+#endif
+	return 0;
+}
+
+static void __init linkstation_setup_arch(void)
+{
+	printk(KERN_INFO "BUFFALO Network Attached Storage Series\n");
+	printk(KERN_INFO "(C) 2002-2005 BUFFALO INC.\n");
+}
+
+static void __init linkstation_setup_pci(void)
+{
+	struct device_node *np;
+
+	/* Lookup PCI host bridges */
+	for_each_compatible_node(np, "pci", "mpc10x-pci")
+		linkstation_add_bridge(np);
+}
+
+/*
+ * Interrupt setup and service.  Interrupts on the linkstation come
+ * from the four PCI slots plus onboard 8241 devices: I2C, DUART.
+ */
+static void __init linkstation_init_IRQ(void)
+{
+	struct mpic *mpic;
+
+	mpic = mpic_alloc(NULL, 0, 0, 4, 0, " EPIC     ");
+	BUG_ON(mpic == NULL);
+
+	/* PCI IRQs */
+	mpic_assign_isu(mpic, 0, mpic->paddr + 0x10200);
+
+	/* I2C */
+	mpic_assign_isu(mpic, 1, mpic->paddr + 0x11000);
+
+	/* ttyS0, ttyS1 */
+	mpic_assign_isu(mpic, 2, mpic->paddr + 0x11100);
+
+	mpic_init(mpic);
+}
+
+extern void avr_uart_configure(void);
+extern void avr_uart_send(const char);
+
+static void __noreturn linkstation_restart(char *cmd)
+{
+	local_irq_disable();
+
+	/* Reset system via AVR */
+	avr_uart_configure();
+	/* Send reboot command */
+	avr_uart_send('C');
+
+	for(;;)  /* Spin until reset happens */
+		avr_uart_send('G');	/* "kick" */
+}
+
+static void __noreturn linkstation_power_off(void)
+{
+	local_irq_disable();
+
+	/* Power down system via AVR */
+	avr_uart_configure();
+	/* send shutdown command */
+	avr_uart_send('E');
+
+	for(;;)  /* Spin until power-off happens */
+		avr_uart_send('G');	/* "kick" */
+	/* NOTREACHED */
+}
+
+static void __noreturn linkstation_halt(void)
+{
+	linkstation_power_off();
+	/* NOTREACHED */
+}
+
+static void linkstation_show_cpuinfo(struct seq_file *m)
+{
+	seq_printf(m, "vendor\t\t: Buffalo Technology\n");
+	seq_printf(m, "machine\t\t: Linkstation I/Kurobox(HG)\n");
+}
+
+static int __init linkstation_probe(void)
+{
+	pm_power_off = linkstation_power_off;
+
+	return 1;
+}
+
+define_machine(linkstation){
+	.name 			= "Buffalo Linkstation",
+	.compatible		= "linkstation",
+	.probe 			= linkstation_probe,
+	.setup_arch 		= linkstation_setup_arch,
+	.discover_phbs		= linkstation_setup_pci,
+	.init_IRQ 		= linkstation_init_IRQ,
+	.show_cpuinfo 		= linkstation_show_cpuinfo,
+	.get_irq 		= mpic_get_irq,
+	.restart 		= linkstation_restart,
+	.halt	 		= linkstation_halt,
+};
diff --git a/arch/powerpc/platforms/embedded6xx/ls_uart.c b/arch/powerpc/platforms/embedded6xx/ls_uart.c
new file mode 100644
index 0000000000..6c1dbf8ae7
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/ls_uart.c
@@ -0,0 +1,147 @@
+/*
+ * AVR power-management chip interface for the Buffalo Linkstation /
+ * Kurobox Platform.
+ *
+ * Author: 2006 (c) G. Liakhovetski
+ *	 g.liakhovetski@gmx.de
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of
+ * any kind, whether express or implied.
+ */
+#include <linux/workqueue.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/serial_reg.h>
+#include <linux/serial_8250.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <asm/io.h>
+#include <asm/termbits.h>
+
+#include "mpc10x.h"
+
+static void __iomem *avr_addr;
+static unsigned long avr_clock;
+
+static struct work_struct wd_work;
+
+static void wd_stop(struct work_struct *unused)
+{
+	const char string[] = "AAAAFFFFJJJJ>>>>VVVV>>>>ZZZZVVVVKKKK";
+	int i = 0, rescue = 8;
+	int len = strlen(string);
+
+	while (rescue--) {
+		int j;
+		char lsr = in_8(avr_addr + UART_LSR);
+
+		if (lsr & (UART_LSR_THRE | UART_LSR_TEMT)) {
+			for (j = 0; j < 16 && i < len; j++, i++)
+				out_8(avr_addr + UART_TX, string[i]);
+			if (i == len) {
+				/* Read "OK" back: 4ms for the last "KKKK"
+				   plus a couple bytes back */
+				msleep(7);
+				printk("linkstation: disarming the AVR watchdog: ");
+				while (in_8(avr_addr + UART_LSR) & UART_LSR_DR)
+					printk("%c", in_8(avr_addr + UART_RX));
+				break;
+			}
+		}
+		msleep(17);
+	}
+	printk("\n");
+}
+
+#define AVR_QUOT(clock) ((clock) + 8 * 9600) / (16 * 9600)
+
+void avr_uart_configure(void)
+{
+	unsigned char cval = UART_LCR_WLEN8;
+	unsigned int quot = AVR_QUOT(avr_clock);
+
+	if (!avr_addr || !avr_clock)
+		return;
+
+	out_8(avr_addr + UART_LCR, cval);			/* initialise UART */
+	out_8(avr_addr + UART_MCR, 0);
+	out_8(avr_addr + UART_IER, 0);
+
+	cval |= UART_LCR_STOP | UART_LCR_PARITY | UART_LCR_EPAR;
+
+	out_8(avr_addr + UART_LCR, cval);			/* Set character format */
+
+	out_8(avr_addr + UART_LCR, cval | UART_LCR_DLAB);	/* set DLAB */
+	out_8(avr_addr + UART_DLL, quot & 0xff);		/* LS of divisor */
+	out_8(avr_addr + UART_DLM, quot >> 8);			/* MS of divisor */
+	out_8(avr_addr + UART_LCR, cval);			/* reset DLAB */
+	out_8(avr_addr + UART_FCR, UART_FCR_ENABLE_FIFO);	/* enable FIFO */
+}
+
+void avr_uart_send(const char c)
+{
+	if (!avr_addr || !avr_clock)
+		return;
+
+	out_8(avr_addr + UART_TX, c);
+	out_8(avr_addr + UART_TX, c);
+	out_8(avr_addr + UART_TX, c);
+	out_8(avr_addr + UART_TX, c);
+}
+
+static void __init ls_uart_init(void)
+{
+	local_irq_disable();
+
+#ifndef CONFIG_SERIAL_8250
+	out_8(avr_addr + UART_FCR, UART_FCR_ENABLE_FIFO);	/* enable FIFO */
+	out_8(avr_addr + UART_FCR, UART_FCR_ENABLE_FIFO |
+	      UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);	/* clear FIFOs */
+	out_8(avr_addr + UART_FCR, 0);
+	out_8(avr_addr + UART_IER, 0);
+
+	/* Clear up interrupts */
+	(void) in_8(avr_addr + UART_LSR);
+	(void) in_8(avr_addr + UART_RX);
+	(void) in_8(avr_addr + UART_IIR);
+	(void) in_8(avr_addr + UART_MSR);
+#endif
+	avr_uart_configure();
+
+	local_irq_enable();
+}
+
+static int __init ls_uarts_init(void)
+{
+	struct device_node *avr;
+	struct resource res;
+	int len, ret;
+
+	avr = of_find_node_by_path("/soc10x/serial@80004500");
+	if (!avr)
+		return -EINVAL;
+
+	avr_clock = *(u32*)of_get_property(avr, "clock-frequency", &len);
+	if (!avr_clock)
+		return -EINVAL;
+
+	ret = of_address_to_resource(avr, 0, &res);
+	if (ret)
+		return ret;
+
+	of_node_put(avr);
+
+	avr_addr = ioremap(res.start, 32);
+	if (!avr_addr)
+		return -EFAULT;
+
+	ls_uart_init();
+
+	INIT_WORK(&wd_work, wd_stop);
+	schedule_work(&wd_work);
+
+	return 0;
+}
+
+machine_late_initcall(linkstation, ls_uarts_init);
diff --git a/arch/powerpc/platforms/embedded6xx/mpc10x.h b/arch/powerpc/platforms/embedded6xx/mpc10x.h
new file mode 100644
index 0000000000..5ad12023e5
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/mpc10x.h
@@ -0,0 +1,159 @@
+/*
+ * Common routines for the Motorola SPS MPC106/8240/107 Host bridge/Mem
+ * ctlr/EPIC/etc.
+ *
+ * Author: Mark A. Greer
+ *         mgreer@mvista.com
+ *
+ * 2001 (c) MontaVista, Software, Inc.  This file is licensed under
+ * the terms of the GNU General Public License version 2.  This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#ifndef __PPC_KERNEL_MPC10X_H
+#define __PPC_KERNEL_MPC10X_H
+
+#include <linux/pci_ids.h>
+#include <asm/pci-bridge.h>
+
+/*
+ * The values here don't completely map everything but should work in most
+ * cases.
+ *
+ * MAP A (PReP Map)
+ *   Processor: 0x80000000 - 0x807fffff -> PCI I/O: 0x00000000 - 0x007fffff
+ *   Processor: 0xc0000000 - 0xdfffffff -> PCI MEM: 0x00000000 - 0x1fffffff
+ *   PCI MEM:   0x80000000 -> Processor System Memory: 0x00000000
+ *
+ * MAP B (CHRP Map)
+ *   Processor: 0xfe000000 - 0xfebfffff -> PCI I/O: 0x00000000 - 0x00bfffff
+ *   Processor: 0x80000000 - 0xbfffffff -> PCI MEM: 0x80000000 - 0xbfffffff
+ *   PCI MEM:   0x00000000 -> Processor System Memory: 0x00000000
+ */
+
+/*
+ * Define the vendor/device IDs for the various bridges--should be added to
+ * <linux/pci_ids.h>
+ */
+#define	MPC10X_BRIDGE_106	((PCI_DEVICE_ID_MOTOROLA_MPC106 << 16) |  \
+				  PCI_VENDOR_ID_MOTOROLA)
+#define	MPC10X_BRIDGE_8240	((0x0003 << 16) | PCI_VENDOR_ID_MOTOROLA)
+#define	MPC10X_BRIDGE_107	((0x0004 << 16) | PCI_VENDOR_ID_MOTOROLA)
+#define	MPC10X_BRIDGE_8245	((0x0006 << 16) | PCI_VENDOR_ID_MOTOROLA)
+
+/* Define the type of map to use */
+#define	MPC10X_MEM_MAP_A		1
+#define	MPC10X_MEM_MAP_B		2
+
+/* Map A (PReP Map) Defines */
+#define	MPC10X_MAPA_CNFG_ADDR		0x80000cf8
+#define	MPC10X_MAPA_CNFG_DATA		0x80000cfc
+
+#define MPC10X_MAPA_ISA_IO_BASE		0x80000000
+#define MPC10X_MAPA_ISA_MEM_BASE	0xc0000000
+#define	MPC10X_MAPA_DRAM_OFFSET		0x80000000
+
+#define	MPC10X_MAPA_PCI_INTACK_ADDR	0xbffffff0
+#define	MPC10X_MAPA_PCI_IO_START	0x00000000
+#define	MPC10X_MAPA_PCI_IO_END	       (0x00800000 - 1)
+#define	MPC10X_MAPA_PCI_MEM_START	0x00000000
+#define	MPC10X_MAPA_PCI_MEM_END	       (0x20000000 - 1)
+
+#define	MPC10X_MAPA_PCI_MEM_OFFSET	(MPC10X_MAPA_ISA_MEM_BASE -	\
+					 MPC10X_MAPA_PCI_MEM_START)
+
+/* Map B (CHRP Map) Defines */
+#define	MPC10X_MAPB_CNFG_ADDR		0xfec00000
+#define	MPC10X_MAPB_CNFG_DATA		0xfee00000
+
+#define MPC10X_MAPB_ISA_IO_BASE		0xfe000000
+#define MPC10X_MAPB_ISA_MEM_BASE	0x80000000
+#define	MPC10X_MAPB_DRAM_OFFSET		0x00000000
+
+#define	MPC10X_MAPB_PCI_INTACK_ADDR	0xfef00000
+#define	MPC10X_MAPB_PCI_IO_START	0x00000000
+#define	MPC10X_MAPB_PCI_IO_END	       (0x00c00000 - 1)
+#define	MPC10X_MAPB_PCI_MEM_START	0x80000000
+#define	MPC10X_MAPB_PCI_MEM_END	       (0xc0000000 - 1)
+
+#define	MPC10X_MAPB_PCI_MEM_OFFSET	(MPC10X_MAPB_ISA_MEM_BASE -	\
+					 MPC10X_MAPB_PCI_MEM_START)
+
+/* Miscellaneous Configuration register offsets */
+#define	MPC10X_CFG_PIR_REG		0x09
+#define	MPC10X_CFG_PIR_HOST_BRIDGE	0x00
+#define	MPC10X_CFG_PIR_AGENT		0x01
+
+#define	MPC10X_CFG_EUMBBAR		0x78
+
+#define	MPC10X_CFG_PICR1_REG		0xa8
+#define	MPC10X_CFG_PICR1_ADDR_MAP_MASK	0x00010000
+#define	MPC10X_CFG_PICR1_ADDR_MAP_A	0x00010000
+#define	MPC10X_CFG_PICR1_ADDR_MAP_B	0x00000000
+#define	MPC10X_CFG_PICR1_SPEC_PCI_RD	0x00000004
+#define	MPC10X_CFG_PICR1_ST_GATH_EN	0x00000040
+
+#define	MPC10X_CFG_PICR2_REG		0xac
+#define	MPC10X_CFG_PICR2_COPYBACK_OPT	0x00000001
+
+#define	MPC10X_CFG_MAPB_OPTIONS_REG	0xe0
+#define	MPC10X_CFG_MAPB_OPTIONS_CFAE	0x80	/* CPU_FD_ALIAS_EN */
+#define	MPC10X_CFG_MAPB_OPTIONS_PFAE	0x40	/* PCI_FD_ALIAS_EN */
+#define	MPC10X_CFG_MAPB_OPTIONS_DR	0x20	/* DLL_RESET */
+#define	MPC10X_CFG_MAPB_OPTIONS_PCICH	0x08	/* PCI_COMPATIBILITY_HOLE */
+#define	MPC10X_CFG_MAPB_OPTIONS_PROCCH	0x04	/* PROC_COMPATIBILITY_HOLE */
+
+/* Define offsets for the memory controller registers in the config space */
+#define MPC10X_MCTLR_MEM_START_1	0x80	/* Banks 0-3 */
+#define MPC10X_MCTLR_MEM_START_2	0x84	/* Banks 4-7 */
+#define MPC10X_MCTLR_EXT_MEM_START_1	0x88	/* Banks 0-3 */
+#define MPC10X_MCTLR_EXT_MEM_START_2	0x8c	/* Banks 4-7 */
+
+#define MPC10X_MCTLR_MEM_END_1		0x90	/* Banks 0-3 */
+#define MPC10X_MCTLR_MEM_END_2		0x94	/* Banks 4-7 */
+#define MPC10X_MCTLR_EXT_MEM_END_1	0x98	/* Banks 0-3 */
+#define MPC10X_MCTLR_EXT_MEM_END_2	0x9c	/* Banks 4-7 */
+
+#define MPC10X_MCTLR_MEM_BANK_ENABLES	0xa0
+
+/* Define some offset in the EUMB */
+#define	MPC10X_EUMB_SIZE		0x00100000 /* Total EUMB size (1MB) */
+
+#define MPC10X_EUMB_MU_OFFSET		0x00000000 /* Msg Unit reg offset */
+#define MPC10X_EUMB_MU_SIZE		0x00001000 /* Msg Unit reg size */
+#define MPC10X_EUMB_DMA_OFFSET		0x00001000 /* DMA Unit reg offset */
+#define MPC10X_EUMB_DMA_SIZE		0x00001000 /* DMA Unit reg size  */
+#define MPC10X_EUMB_ATU_OFFSET		0x00002000 /* Addr xlate reg offset */
+#define MPC10X_EUMB_ATU_SIZE		0x00001000 /* Addr xlate reg size  */
+#define MPC10X_EUMB_I2C_OFFSET		0x00003000 /* I2C Unit reg offset */
+#define MPC10X_EUMB_I2C_SIZE		0x00001000 /* I2C Unit reg size  */
+#define MPC10X_EUMB_DUART_OFFSET	0x00004000 /* DUART Unit reg offset (8245) */
+#define MPC10X_EUMB_DUART_SIZE		0x00001000 /* DUART Unit reg size (8245) */
+#define	MPC10X_EUMB_EPIC_OFFSET		0x00040000 /* EPIC offset in EUMB */
+#define	MPC10X_EUMB_EPIC_SIZE		0x00030000 /* EPIC size */
+#define MPC10X_EUMB_PM_OFFSET		0x000fe000 /* Performance Monitor reg offset (8245) */
+#define MPC10X_EUMB_PM_SIZE		0x00001000 /* Performance Monitor reg size (8245) */
+#define MPC10X_EUMB_WP_OFFSET		0x000ff000 /* Data path diagnostic, watchpoint reg offset */
+#define MPC10X_EUMB_WP_SIZE		0x00001000 /* Data path diagnostic, watchpoint reg size */
+
+enum ppc_sys_devices {
+	MPC10X_IIC1,
+	MPC10X_DMA0,
+	MPC10X_DMA1,
+	MPC10X_UART0,
+	MPC10X_UART1,
+	NUM_PPC_SYS_DEVS,
+};
+
+int mpc10x_bridge_init(struct pci_controller *hose,
+		       uint current_map,
+		       uint new_map,
+		       uint phys_eumb_base);
+unsigned long mpc10x_get_mem_size(uint mem_map);
+int mpc10x_enable_store_gathering(struct pci_controller *hose);
+int mpc10x_disable_store_gathering(struct pci_controller *hose);
+
+/* For MPC107 boards that use the built-in openpic */
+void mpc10x_set_openpic(void);
+
+#endif	/* __PPC_KERNEL_MPC10X_H */
diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c
new file mode 100644
index 0000000000..00bec0f051
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Board setup routines for the Motorola/Emerson MVME5100.
+ *
+ * Copyright 2013 CSC Australia Pty. Ltd.
+ *
+ * Based on earlier code by:
+ *
+ *    Matt Porter, MontaVista Software Inc.
+ *    Copyright 2001 MontaVista Software Inc.
+ *
+ * Author: Stephen Chivers <schivers@csc.com>
+ */
+
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+
+#include <asm/i8259.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#define HAWK_MPIC_SIZE		0x00040000U
+#define MVME5100_PCI_MEM_OFFSET 0x00000000
+
+/* Board register addresses. */
+#define BOARD_STATUS_REG	0xfef88080
+#define BOARD_MODFAIL_REG	0xfef88090
+#define BOARD_MODRST_REG	0xfef880a0
+#define BOARD_TBEN_REG		0xfef880c0
+#define BOARD_SW_READ_REG	0xfef880e0
+#define BOARD_GEO_ADDR_REG	0xfef880e8
+#define BOARD_EXT_FEATURE1_REG	0xfef880f0
+#define BOARD_EXT_FEATURE2_REG	0xfef88100
+
+static phys_addr_t pci_membase;
+static u_char *restart;
+
+static void mvme5100_8259_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int cascade_irq = i8259_irq();
+
+	if (cascade_irq)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+static void __init mvme5100_pic_init(void)
+{
+	struct mpic *mpic;
+	struct device_node *np;
+	struct device_node *cp = NULL;
+	unsigned int cirq;
+	unsigned long intack = 0;
+	const u32 *prop = NULL;
+
+	np = of_find_node_by_type(NULL, "open-pic");
+	if (!np) {
+		pr_err("Could not find open-pic node\n");
+		return;
+	}
+
+	mpic = mpic_alloc(np, pci_membase, 0, 16, 256, " OpenPIC  ");
+
+	BUG_ON(mpic == NULL);
+	of_node_put(np);
+
+	mpic_assign_isu(mpic, 0, pci_membase + 0x10000);
+
+	mpic_init(mpic);
+
+	cp = of_find_compatible_node(NULL, NULL, "chrp,iic");
+	if (cp == NULL) {
+		pr_warn("mvme5100_pic_init: couldn't find i8259\n");
+		return;
+	}
+
+	cirq = irq_of_parse_and_map(cp, 0);
+	if (!cirq) {
+		pr_warn("mvme5100_pic_init: no cascade interrupt?\n");
+		return;
+	}
+
+	np = of_find_compatible_node(NULL, "pci", "mpc10x-pci");
+	if (np) {
+		prop = of_get_property(np, "8259-interrupt-acknowledge", NULL);
+
+		if (prop)
+			intack = prop[0];
+
+		of_node_put(np);
+	}
+
+	if (intack)
+		pr_debug("mvme5100_pic_init: PCI 8259 intack at 0x%016lx\n",
+		   intack);
+
+	i8259_init(cp, intack);
+	of_node_put(cp);
+	irq_set_chained_handler(cirq, mvme5100_8259_cascade);
+}
+
+static int __init mvme5100_add_bridge(struct device_node *dev)
+{
+	const int		*bus_range;
+	int			len;
+	struct pci_controller	*hose;
+	unsigned short		devid;
+
+	pr_info("Adding PCI host bridge %pOF\n", dev);
+
+	bus_range = of_get_property(dev, "bus-range", &len);
+
+	hose = pcibios_alloc_controller(dev);
+	if (hose == NULL)
+		return -ENOMEM;
+
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	setup_indirect_pci(hose, 0xfe000cf8, 0xfe000cfc, 0);
+
+	pci_process_bridge_OF_ranges(hose, dev, 1);
+
+	early_read_config_word(hose, 0, 0, PCI_DEVICE_ID, &devid);
+
+	if (devid != PCI_DEVICE_ID_MOTOROLA_HAWK) {
+		pr_err("HAWK PHB not present?\n");
+		return 0;
+	}
+
+	early_read_config_dword(hose, 0, 0, PCI_BASE_ADDRESS_1, &pci_membase);
+
+	if (pci_membase == 0) {
+		pr_err("HAWK PHB mibar not correctly set?\n");
+		return 0;
+	}
+
+	pr_info("mvme5100_pic_init: pci_membase: %x\n", pci_membase);
+
+	return 0;
+}
+
+static const struct of_device_id mvme5100_of_bus_ids[] __initconst = {
+	{ .compatible = "hawk-bridge", },
+	{},
+};
+
+/*
+ * Setup the architecture
+ */
+static void __init mvme5100_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mvme5100_setup_arch()", 0);
+
+	restart = ioremap(BOARD_MODRST_REG, 4);
+}
+
+static void __init mvme5100_setup_pci(void)
+{
+	struct device_node *np;
+
+	for_each_compatible_node(np, "pci", "hawk-pci")
+		mvme5100_add_bridge(np);
+}
+
+static void mvme5100_show_cpuinfo(struct seq_file *m)
+{
+	seq_puts(m, "Vendor\t\t: Motorola/Emerson\n");
+	seq_puts(m, "Machine\t\t: MVME5100\n");
+}
+
+static void __noreturn mvme5100_restart(char *cmd)
+{
+
+	local_irq_disable();
+	mtmsr(mfmsr() | MSR_IP);
+
+	out_8((u_char *) restart, 0x01);
+
+	while (1)
+		;
+}
+
+static int __init probe_of_platform_devices(void)
+{
+
+	of_platform_bus_probe(NULL, mvme5100_of_bus_ids, NULL);
+	return 0;
+}
+
+machine_device_initcall(mvme5100, probe_of_platform_devices);
+
+define_machine(mvme5100) {
+	.name			= "MVME5100",
+	.compatible		= "MVME5100",
+	.setup_arch		= mvme5100_setup_arch,
+	.discover_phbs		= mvme5100_setup_pci,
+	.init_IRQ		= mvme5100_pic_init,
+	.show_cpuinfo		= mvme5100_show_cpuinfo,
+	.get_irq		= mpic_get_irq,
+	.restart		= mvme5100_restart,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c
new file mode 100644
index 0000000000..e49880e8da
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/storcenter.c
@@ -0,0 +1,121 @@
+/*
+ * Board setup routines for the storcenter
+ *
+ * Copyright 2007 (C) Oyvind Repvik (nail@nslu2-linux.org)
+ * Copyright 2007 Andy Wilcox, Jon Loeliger
+ *
+ * Based on linkstation.c by G. Liakhovetski
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of
+ * any kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/initrd.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/mpic.h>
+#include <asm/pci-bridge.h>
+
+#include "mpc10x.h"
+
+
+static const struct of_device_id storcenter_of_bus[] __initconst = {
+	{ .name = "soc", },
+	{},
+};
+
+static int __init storcenter_device_probe(void)
+{
+	of_platform_bus_probe(NULL, storcenter_of_bus, NULL);
+	return 0;
+}
+machine_device_initcall(storcenter, storcenter_device_probe);
+
+
+static int __init storcenter_add_bridge(struct device_node *dev)
+{
+#ifdef CONFIG_PCI
+	int len;
+	struct pci_controller *hose;
+	const int *bus_range;
+
+	printk("Adding PCI host bridge %pOF\n", dev);
+
+	hose = pcibios_alloc_controller(dev);
+	if (hose == NULL)
+		return -ENOMEM;
+
+	bus_range = of_get_property(dev, "bus-range", &len);
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	setup_indirect_pci(hose, MPC10X_MAPB_CNFG_ADDR, MPC10X_MAPB_CNFG_DATA, 0);
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(hose, dev, 1);
+#endif
+
+	return 0;
+}
+
+static void __init storcenter_setup_arch(void)
+{
+	printk(KERN_INFO "IOMEGA StorCenter\n");
+}
+
+static void __init storcenter_setup_pci(void)
+{
+	struct device_node *np;
+
+	/* Lookup PCI host bridges */
+	for_each_compatible_node(np, "pci", "mpc10x-pci")
+		storcenter_add_bridge(np);
+}
+
+/*
+ * Interrupt setup and service.  Interrupts on the turbostation come
+ * from the four PCI slots plus onboard 8241 devices: I2C, DUART.
+ */
+static void __init storcenter_init_IRQ(void)
+{
+	struct mpic *mpic;
+
+	mpic = mpic_alloc(NULL, 0, 0, 16, 0, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+
+	/*
+	 * 16 Serial Interrupts followed by 16 Internal Interrupts.
+	 * I2C is the second internal, so it is at 17, 0x11020.
+	 */
+	mpic_assign_isu(mpic, 0, mpic->paddr + 0x10200);
+	mpic_assign_isu(mpic, 1, mpic->paddr + 0x11000);
+
+	mpic_init(mpic);
+}
+
+static void __noreturn storcenter_restart(char *cmd)
+{
+	local_irq_disable();
+
+	/* Set exception prefix high - to the firmware */
+	mtmsr(mfmsr() | MSR_IP);
+	isync();
+
+	/* Wait for reset to happen */
+	for (;;) ;
+}
+
+define_machine(storcenter){
+	.name 			= "IOMEGA StorCenter",
+	.compatible		= "iomega,storcenter",
+	.setup_arch 		= storcenter_setup_arch,
+	.discover_phbs 		= storcenter_setup_pci,
+	.init_IRQ 		= storcenter_init_IRQ,
+	.get_irq 		= mpic_get_irq,
+	.restart 		= storcenter_restart,
+};
diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
new file mode 100644
index 0000000000..221577f32b
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
@@ -0,0 +1,306 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
+ *
+ * udbg serial input/output routines for the USB Gecko adapter.
+ * Copyright (C) 2008-2009 The GameCube Linux Team
+ * Copyright (C) 2008,2009 Albert Herranz
+ */
+
+#include <linux/of_address.h>
+
+#include <mm/mmu_decl.h>
+
+#include <asm/io.h>
+#include <asm/udbg.h>
+#include <asm/fixmap.h>
+
+#include "usbgecko_udbg.h"
+
+
+#define EXI_CLK_32MHZ           5
+
+#define EXI_CSR                 0x00
+#define   EXI_CSR_CLKMASK       (0x7<<4)
+#define     EXI_CSR_CLK_32MHZ   (EXI_CLK_32MHZ<<4)
+#define   EXI_CSR_CSMASK        (0x7<<7)
+#define     EXI_CSR_CS_0        (0x1<<7)  /* Chip Select 001 */
+
+#define EXI_CR                  0x0c
+#define   EXI_CR_TSTART         (1<<0)
+#define   EXI_CR_WRITE		(1<<2)
+#define   EXI_CR_READ_WRITE     (2<<2)
+#define   EXI_CR_TLEN(len)      (((len)-1)<<4)
+
+#define EXI_DATA                0x10
+
+#define UG_READ_ATTEMPTS	100
+#define UG_WRITE_ATTEMPTS	100
+
+
+static void __iomem *ug_io_base;
+
+/*
+ * Performs one input/output transaction between the exi host and the usbgecko.
+ */
+static u32 ug_io_transaction(u32 in)
+{
+	u32 __iomem *csr_reg = ug_io_base + EXI_CSR;
+	u32 __iomem *data_reg = ug_io_base + EXI_DATA;
+	u32 __iomem *cr_reg = ug_io_base + EXI_CR;
+	u32 csr, data, cr;
+
+	/* select */
+	csr = EXI_CSR_CLK_32MHZ | EXI_CSR_CS_0;
+	out_be32(csr_reg, csr);
+
+	/* read/write */
+	data = in;
+	out_be32(data_reg, data);
+	cr = EXI_CR_TLEN(2) | EXI_CR_READ_WRITE | EXI_CR_TSTART;
+	out_be32(cr_reg, cr);
+
+	while (in_be32(cr_reg) & EXI_CR_TSTART)
+		barrier();
+
+	/* deselect */
+	out_be32(csr_reg, 0);
+
+	/* result */
+	data = in_be32(data_reg);
+
+	return data;
+}
+
+/*
+ * Returns true if an usbgecko adapter is found.
+ */
+static int ug_is_adapter_present(void)
+{
+	if (!ug_io_base)
+		return 0;
+
+	return ug_io_transaction(0x90000000) == 0x04700000;
+}
+
+/*
+ * Returns true if the TX fifo is ready for transmission.
+ */
+static int ug_is_txfifo_ready(void)
+{
+	return ug_io_transaction(0xc0000000) & 0x04000000;
+}
+
+/*
+ * Tries to transmit a character.
+ * If the TX fifo is not ready the result is undefined.
+ */
+static void ug_raw_putc(char ch)
+{
+	ug_io_transaction(0xb0000000 | (ch << 20));
+}
+
+/*
+ * Transmits a character.
+ * It silently fails if the TX fifo is not ready after a number of retries.
+ */
+static void ug_putc(char ch)
+{
+	int count = UG_WRITE_ATTEMPTS;
+
+	if (!ug_io_base)
+		return;
+
+	if (ch == '\n')
+		ug_putc('\r');
+
+	while (!ug_is_txfifo_ready() && count--)
+		barrier();
+	if (count >= 0)
+		ug_raw_putc(ch);
+}
+
+/*
+ * Returns true if the RX fifo is ready for transmission.
+ */
+static int ug_is_rxfifo_ready(void)
+{
+	return ug_io_transaction(0xd0000000) & 0x04000000;
+}
+
+/*
+ * Tries to receive a character.
+ * If a character is unavailable the function returns -1.
+ */
+static int ug_raw_getc(void)
+{
+	u32 data = ug_io_transaction(0xa0000000);
+	if (data & 0x08000000)
+		return (data >> 16) & 0xff;
+	else
+		return -1;
+}
+
+/*
+ * Receives a character.
+ * It fails if the RX fifo is not ready after a number of retries.
+ */
+static int ug_getc(void)
+{
+	int count = UG_READ_ATTEMPTS;
+
+	if (!ug_io_base)
+		return -1;
+
+	while (!ug_is_rxfifo_ready() && count--)
+		barrier();
+	return ug_raw_getc();
+}
+
+/*
+ * udbg functions.
+ *
+ */
+
+/*
+ * Transmits a character.
+ */
+static void ug_udbg_putc(char ch)
+{
+	ug_putc(ch);
+}
+
+/*
+ * Receives a character. Waits until a character is available.
+ */
+static int ug_udbg_getc(void)
+{
+	int ch;
+
+	while ((ch = ug_getc()) == -1)
+		barrier();
+	return ch;
+}
+
+/*
+ * Receives a character. If a character is not available, returns -1.
+ */
+static int ug_udbg_getc_poll(void)
+{
+	if (!ug_is_rxfifo_ready())
+		return -1;
+	return ug_getc();
+}
+
+/*
+ * Checks if a USB Gecko adapter is inserted in any memory card slot.
+ */
+static void __iomem *__init ug_udbg_probe(void __iomem *exi_io_base)
+{
+	int i;
+
+	/* look for a usbgecko on memcard slots A and B */
+	for (i = 0; i < 2; i++) {
+		ug_io_base = exi_io_base + 0x14 * i;
+		if (ug_is_adapter_present())
+			break;
+	}
+	if (i == 2)
+		ug_io_base = NULL;
+	return ug_io_base;
+
+}
+
+/*
+ * USB Gecko udbg support initialization.
+ */
+void __init ug_udbg_init(void)
+{
+	struct device_node *np;
+	void __iomem *exi_io_base;
+
+	if (ug_io_base)
+		udbg_printf("%s: early -> final\n", __func__);
+
+	np = of_find_compatible_node(NULL, NULL, "nintendo,flipper-exi");
+	if (!np) {
+		udbg_printf("%s: EXI node not found\n", __func__);
+		goto out;
+	}
+
+	exi_io_base = of_iomap(np, 0);
+	if (!exi_io_base) {
+		udbg_printf("%s: failed to setup EXI io base\n", __func__);
+		goto done;
+	}
+
+	if (!ug_udbg_probe(exi_io_base)) {
+		udbg_printf("usbgecko_udbg: not found\n");
+		iounmap(exi_io_base);
+	} else {
+		udbg_putc = ug_udbg_putc;
+		udbg_getc = ug_udbg_getc;
+		udbg_getc_poll = ug_udbg_getc_poll;
+		udbg_printf("usbgecko_udbg: ready\n");
+	}
+
+done:
+	of_node_put(np);
+out:
+	return;
+}
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO
+
+static phys_addr_t __init ug_early_grab_io_addr(void)
+{
+#if defined(CONFIG_GAMECUBE)
+	return 0x0c000000;
+#elif defined(CONFIG_WII)
+	return 0x0d000000;
+#else
+#error Invalid platform for USB Gecko based early debugging.
+#endif
+}
+
+/*
+ * USB Gecko early debug support initialization for udbg.
+ */
+void __init udbg_init_usbgecko(void)
+{
+	void __iomem *early_debug_area;
+	void __iomem *exi_io_base;
+
+	/*
+	 * At this point we have a BAT already setup that enables I/O
+	 * to the EXI hardware.
+	 *
+	 * The BAT uses a virtual address range reserved at the fixmap.
+	 * This must match the virtual address configured in
+	 * head_32.S:setup_usbgecko_bat().
+	 */
+	early_debug_area = (void __iomem *)__fix_to_virt(FIX_EARLY_DEBUG_BASE);
+	exi_io_base = early_debug_area + 0x00006800;
+
+	/* try to detect a USB Gecko */
+	if (!ug_udbg_probe(exi_io_base))
+		return;
+
+	/* we found a USB Gecko, load udbg hooks */
+	udbg_putc = ug_udbg_putc;
+	udbg_getc = ug_udbg_getc;
+	udbg_getc_poll = ug_udbg_getc_poll;
+
+	/*
+	 * Prepare again the same BAT for MMU_init.
+	 * This allows udbg I/O to continue working after the MMU is
+	 * turned on for real.
+	 * It is safe to continue using the same virtual address as it is
+	 * a reserved fixmap area.
+	 */
+	setbat(1, (unsigned long)early_debug_area,
+	       ug_early_grab_io_addr(), 128*1024, PAGE_KERNEL_NCG);
+}
+
+#endif /* CONFIG_PPC_EARLY_DEBUG_USBGECKO */
+
diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h
new file mode 100644
index 0000000000..bceb11911e
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h
+ *
+ * udbg serial input/output routines for the USB Gecko adapter.
+ * Copyright (C) 2008-2009 The GameCube Linux Team
+ * Copyright (C) 2008,2009 Albert Herranz
+ */
+
+#ifndef __USBGECKO_UDBG_H
+#define __USBGECKO_UDBG_H
+
+#ifdef CONFIG_USBGECKO_UDBG
+
+extern void __init ug_udbg_init(void);
+
+#else
+
+static inline void __init ug_udbg_init(void)
+{
+}
+
+#endif /* CONFIG_USBGECKO_UDBG */
+
+void __init udbg_init_usbgecko(void);
+
+#endif /* __USBGECKO_UDBG_H */
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
new file mode 100644
index 0000000000..cb3be6d6e3
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/embedded6xx/wii.c
+ *
+ * Nintendo Wii board-specific support
+ * Copyright (C) 2008-2009 The GameCube Linux Team
+ * Copyright (C) 2008,2009 Albert Herranz
+ */
+#define DRV_MODULE_NAME "wii"
+#define pr_fmt(fmt) DRV_MODULE_NAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+
+#include "flipper-pic.h"
+#include "hlwd-pic.h"
+#include "usbgecko_udbg.h"
+
+/* control block */
+#define HW_CTRL_COMPATIBLE	"nintendo,hollywood-control"
+
+#define HW_CTRL_RESETS		0x94
+#define HW_CTRL_RESETS_SYS	(1<<0)
+
+/* gpio */
+#define HW_GPIO_COMPATIBLE	"nintendo,hollywood-gpio"
+
+#define HW_GPIO_BASE(idx)	(idx * 0x20)
+#define HW_GPIO_OUT(idx)	(HW_GPIO_BASE(idx) + 0)
+#define HW_GPIO_DIR(idx)	(HW_GPIO_BASE(idx) + 4)
+#define HW_GPIO_OWNER		(HW_GPIO_BASE(1) + 0x1c)
+
+#define HW_GPIO_SHUTDOWN	(1<<1)
+#define HW_GPIO_SLOT_LED	(1<<5)
+#define HW_GPIO_SENSOR_BAR	(1<<8)
+
+
+static void __iomem *hw_ctrl;
+static void __iomem *hw_gpio;
+
+static void __noreturn wii_spin(void)
+{
+	local_irq_disable();
+	for (;;)
+		cpu_relax();
+}
+
+static void __iomem *__init wii_ioremap_hw_regs(char *name, char *compatible)
+{
+	void __iomem *hw_regs = NULL;
+	struct device_node *np;
+	struct resource res;
+	int error = -ENODEV;
+
+	np = of_find_compatible_node(NULL, NULL, compatible);
+	if (!np) {
+		pr_err("no compatible node found for %s\n", compatible);
+		goto out;
+	}
+	error = of_address_to_resource(np, 0, &res);
+	if (error) {
+		pr_err("no valid reg found for %pOFn\n", np);
+		goto out_put;
+	}
+
+	hw_regs = ioremap(res.start, resource_size(&res));
+	if (hw_regs) {
+		pr_info("%s at 0x%pa mapped to 0x%p\n", name,
+			&res.start, hw_regs);
+	}
+
+out_put:
+	of_node_put(np);
+out:
+	return hw_regs;
+}
+
+static void __init wii_setup_arch(void)
+{
+	hw_ctrl = wii_ioremap_hw_regs("hw_ctrl", HW_CTRL_COMPATIBLE);
+	hw_gpio = wii_ioremap_hw_regs("hw_gpio", HW_GPIO_COMPATIBLE);
+	if (hw_gpio) {
+		/* turn off the front blue led and IR light */
+		clrbits32(hw_gpio + HW_GPIO_OUT(0),
+			  HW_GPIO_SLOT_LED | HW_GPIO_SENSOR_BAR);
+	}
+}
+
+static void __noreturn wii_restart(char *cmd)
+{
+	local_irq_disable();
+
+	if (hw_ctrl) {
+		/* clear the system reset pin to cause a reset */
+		clrbits32(hw_ctrl + HW_CTRL_RESETS, HW_CTRL_RESETS_SYS);
+	}
+	wii_spin();
+}
+
+static void wii_power_off(void)
+{
+	local_irq_disable();
+
+	if (hw_gpio) {
+		/*
+		 * set the owner of the shutdown pin to ARM, because it is
+		 * accessed through the registers for the ARM, below
+		 */
+		clrbits32(hw_gpio + HW_GPIO_OWNER, HW_GPIO_SHUTDOWN);
+
+		/* make sure that the poweroff GPIO is configured as output */
+		setbits32(hw_gpio + HW_GPIO_DIR(1), HW_GPIO_SHUTDOWN);
+
+		/* drive the poweroff GPIO high */
+		setbits32(hw_gpio + HW_GPIO_OUT(1), HW_GPIO_SHUTDOWN);
+	}
+	wii_spin();
+}
+
+static void __noreturn wii_halt(void)
+{
+	if (ppc_md.restart)
+		ppc_md.restart(NULL);
+	wii_spin();
+}
+
+static void __init wii_pic_probe(void)
+{
+	flipper_pic_probe();
+	hlwd_pic_probe();
+}
+
+static int __init wii_probe(void)
+{
+	pm_power_off = wii_power_off;
+
+	ug_udbg_init();
+
+	return 1;
+}
+
+static void wii_shutdown(void)
+{
+	hlwd_quiesce();
+	flipper_quiesce();
+}
+
+static const struct of_device_id wii_of_bus[] = {
+	{ .compatible = "nintendo,hollywood", },
+	{ },
+};
+
+static int __init wii_device_probe(void)
+{
+	of_platform_populate(NULL, wii_of_bus, NULL, NULL);
+	return 0;
+}
+machine_device_initcall(wii, wii_device_probe);
+
+define_machine(wii) {
+	.name			= "wii",
+	.compatible		= "nintendo,wii",
+	.probe			= wii_probe,
+	.setup_arch		= wii_setup_arch,
+	.restart		= wii_restart,
+	.halt			= wii_halt,
+	.init_IRQ		= wii_pic_probe,
+	.get_irq		= flipper_pic_get_irq,
+	.progress		= udbg_progress,
+	.machine_shutdown	= wii_shutdown,
+};
diff --git a/arch/powerpc/platforms/fsl_uli1575.c b/arch/powerpc/platforms/fsl_uli1575.c
new file mode 100644
index 0000000000..b8d37a9932
--- /dev/null
+++ b/arch/powerpc/platforms/fsl_uli1575.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ULI M1575 setup code - specific to Freescale boards
+ *
+ * Copyright 2007 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+#include <linux/of_irq.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+#include <sysdev/fsl_pci.h>
+
+#define ULI_PIRQA	0x08
+#define ULI_PIRQB	0x09
+#define ULI_PIRQC	0x0a
+#define ULI_PIRQD	0x0b
+#define ULI_PIRQE	0x0c
+#define ULI_PIRQF	0x0d
+#define ULI_PIRQG	0x0e
+
+#define ULI_8259_NONE	0x00
+#define ULI_8259_IRQ1	0x08
+#define ULI_8259_IRQ3	0x02
+#define ULI_8259_IRQ4	0x04
+#define ULI_8259_IRQ5	0x05
+#define ULI_8259_IRQ6	0x07
+#define ULI_8259_IRQ7	0x06
+#define ULI_8259_IRQ9	0x01
+#define ULI_8259_IRQ10	0x03
+#define ULI_8259_IRQ11	0x09
+#define ULI_8259_IRQ12	0x0b
+#define ULI_8259_IRQ14	0x0d
+#define ULI_8259_IRQ15	0x0f
+
+static u8 uli_pirq_to_irq[8] = {
+	ULI_8259_IRQ9,		/* PIRQA */
+	ULI_8259_IRQ10,		/* PIRQB */
+	ULI_8259_IRQ11,		/* PIRQC */
+	ULI_8259_IRQ12,		/* PIRQD */
+	ULI_8259_IRQ5,		/* PIRQE */
+	ULI_8259_IRQ6,		/* PIRQF */
+	ULI_8259_IRQ7,		/* PIRQG */
+	ULI_8259_NONE,		/* PIRQH */
+};
+
+static inline bool is_quirk_valid(void)
+{
+	return (machine_is(mpc86xx_hpcn) ||
+		machine_is(mpc8544_ds) ||
+		machine_is(p2020_ds) ||
+		machine_is(mpc8572_ds));
+}
+
+/* Bridge */
+static void early_uli5249(struct pci_dev *dev)
+{
+	unsigned char temp;
+
+	if (!is_quirk_valid())
+		return;
+
+	pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_IO |
+		 PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+
+	/* read/write lock */
+	pci_read_config_byte(dev, 0x7c, &temp);
+	pci_write_config_byte(dev, 0x7c, 0x80);
+
+	/* set as P2P bridge */
+	pci_write_config_byte(dev, PCI_CLASS_PROG, 0x01);
+	dev->class |= 0x1;
+
+	/* restore lock */
+	pci_write_config_byte(dev, 0x7c, temp);
+}
+
+
+static void quirk_uli1575(struct pci_dev *dev)
+{
+	int i;
+
+	if (!is_quirk_valid())
+		return;
+
+	/*
+	 * ULI1575 interrupts route setup
+	 */
+
+	/* ULI1575 IRQ mapping conf register maps PIRQx to IRQn */
+	for (i = 0; i < 4; i++) {
+		u8 val = uli_pirq_to_irq[i*2] | (uli_pirq_to_irq[i*2+1] << 4);
+		pci_write_config_byte(dev, 0x48 + i, val);
+	}
+
+	/* USB 1.1 OHCI controller 1: dev 28, func 0 - IRQ12 */
+	pci_write_config_byte(dev, 0x86, ULI_PIRQD);
+
+	/* USB 1.1 OHCI controller 2: dev 28, func 1 - IRQ9 */
+	pci_write_config_byte(dev, 0x87, ULI_PIRQA);
+
+	/* USB 1.1 OHCI controller 3: dev 28, func 2 - IRQ10 */
+	pci_write_config_byte(dev, 0x88, ULI_PIRQB);
+
+	/* Lan controller: dev 27, func 0 - IRQ6 */
+	pci_write_config_byte(dev, 0x89, ULI_PIRQF);
+
+	/* AC97 Audio controller: dev 29, func 0 - IRQ6 */
+	pci_write_config_byte(dev, 0x8a, ULI_PIRQF);
+
+	/* Modem controller: dev 29, func 1 - IRQ6 */
+	pci_write_config_byte(dev, 0x8b, ULI_PIRQF);
+
+	/* HD Audio controller: dev 29, func 2 - IRQ6 */
+	pci_write_config_byte(dev, 0x8c, ULI_PIRQF);
+
+	/* SATA controller: dev 31, func 1 - IRQ5 */
+	pci_write_config_byte(dev, 0x8d, ULI_PIRQE);
+
+	/* SMB interrupt: dev 30, func 1 - IRQ7 */
+	pci_write_config_byte(dev, 0x8e, ULI_PIRQG);
+
+	/* PMU ACPI SCI interrupt: dev 30, func 2 - IRQ7 */
+	pci_write_config_byte(dev, 0x8f, ULI_PIRQG);
+
+	/* USB 2.0 controller: dev 28, func 3 */
+	pci_write_config_byte(dev, 0x74, ULI_8259_IRQ11);
+
+	/* Primary PATA IDE IRQ: 14
+	 * Secondary PATA IDE IRQ: 15
+	 */
+	pci_write_config_byte(dev, 0x44, 0x30 | ULI_8259_IRQ14);
+	pci_write_config_byte(dev, 0x75, ULI_8259_IRQ15);
+}
+
+static void quirk_final_uli1575(struct pci_dev *dev)
+{
+	/* Set i8259 interrupt trigger
+	 * IRQ 3:  Level
+	 * IRQ 4:  Level
+	 * IRQ 5:  Level
+	 * IRQ 6:  Level
+	 * IRQ 7:  Level
+	 * IRQ 9:  Level
+	 * IRQ 10: Level
+	 * IRQ 11: Level
+	 * IRQ 12: Level
+	 * IRQ 14: Edge
+	 * IRQ 15: Edge
+	 */
+	if (!is_quirk_valid())
+		return;
+
+	outb(0xfa, 0x4d0);
+	outb(0x1e, 0x4d1);
+
+	/* setup RTC */
+	CMOS_WRITE(RTC_SET, RTC_CONTROL);
+	CMOS_WRITE(RTC_24H, RTC_CONTROL);
+
+	/* ensure month, date, and week alarm fields are ignored */
+	CMOS_WRITE(0, RTC_VALID);
+
+	outb_p(0x7c, 0x72);
+	outb_p(RTC_ALARM_DONT_CARE, 0x73);
+
+	outb_p(0x7d, 0x72);
+	outb_p(RTC_ALARM_DONT_CARE, 0x73);
+}
+
+/* SATA */
+static void quirk_uli5288(struct pci_dev *dev)
+{
+	unsigned char c;
+	unsigned int d;
+
+	if (!is_quirk_valid())
+		return;
+
+	/* read/write lock */
+	pci_read_config_byte(dev, 0x83, &c);
+	pci_write_config_byte(dev, 0x83, c|0x80);
+
+	pci_read_config_dword(dev, PCI_CLASS_REVISION, &d);
+	d = (d & 0xff) | (PCI_CLASS_STORAGE_SATA_AHCI << 8);
+	pci_write_config_dword(dev, PCI_CLASS_REVISION, d);
+
+	/* restore lock */
+	pci_write_config_byte(dev, 0x83, c);
+
+	/* disable emulated PATA mode enabled */
+	pci_read_config_byte(dev, 0x84, &c);
+	pci_write_config_byte(dev, 0x84, c & ~0x01);
+}
+
+/* PATA */
+static void quirk_uli5229(struct pci_dev *dev)
+{
+	unsigned short temp;
+
+	if (!is_quirk_valid())
+		return;
+
+	pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE |
+		PCI_COMMAND_MASTER | PCI_COMMAND_IO);
+
+	/* Enable Native IRQ 14/15 */
+	pci_read_config_word(dev, 0x4a, &temp);
+	pci_write_config_word(dev, 0x4a, temp | 0x1000);
+}
+
+/* We have to do a dummy read on the P2P for the RTC to work, WTF */
+static void quirk_final_uli5249(struct pci_dev *dev)
+{
+	int i;
+	u8 *dummy;
+	struct pci_bus *bus = dev->bus;
+	struct resource *res;
+	resource_size_t end = 0;
+
+	for (i = PCI_BRIDGE_RESOURCES; i < PCI_BRIDGE_RESOURCES+3; i++) {
+		unsigned long flags = pci_resource_flags(dev, i);
+		if ((flags & (IORESOURCE_MEM|IORESOURCE_PREFETCH)) == IORESOURCE_MEM)
+			end = pci_resource_end(dev, i);
+	}
+
+	pci_bus_for_each_resource(bus, res, i) {
+		if (res && res->flags & IORESOURCE_MEM) {
+			if (res->end == end)
+				dummy = ioremap(res->start, 0x4);
+			else
+				dummy = ioremap(res->end - 3, 0x4);
+			if (dummy) {
+				in_8(dummy);
+				iounmap(dummy);
+			}
+			break;
+		}
+	}
+}
+
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AL, 0x5249, early_uli5249);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, quirk_uli1575);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5288, quirk_uli5288);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5229, quirk_uli5229);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 0x5249, quirk_final_uli5249);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 0x1575, quirk_final_uli1575);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AL, 0x5229, quirk_uli5229);
+
+static void hpcd_quirk_uli1575(struct pci_dev *dev)
+{
+	u32 temp32;
+
+	if (!machine_is(mpc86xx_hpcd))
+		return;
+
+	/* Disable INTx */
+	pci_read_config_dword(dev, 0x48, &temp32);
+	pci_write_config_dword(dev, 0x48, (temp32 | 1<<26));
+
+	/* Enable sideband interrupt */
+	pci_read_config_dword(dev, 0x90, &temp32);
+	pci_write_config_dword(dev, 0x90, (temp32 | 1<<22));
+}
+
+static void hpcd_quirk_uli5288(struct pci_dev *dev)
+{
+	unsigned char c;
+
+	if (!machine_is(mpc86xx_hpcd))
+		return;
+
+	pci_read_config_byte(dev, 0x83, &c);
+	c |= 0x80;
+	pci_write_config_byte(dev, 0x83, c);
+
+	pci_write_config_byte(dev, PCI_CLASS_PROG, 0x01);
+	pci_write_config_byte(dev, PCI_CLASS_DEVICE, 0x06);
+
+	pci_read_config_byte(dev, 0x83, &c);
+	c &= 0x7f;
+	pci_write_config_byte(dev, 0x83, c);
+}
+
+/*
+ * Since 8259PIC was disabled on the board, the IDE device can not
+ * use the legacy IRQ, we need to let the IDE device work under
+ * native mode and use the interrupt line like other PCI devices.
+ * IRQ14 is a sideband interrupt from IDE device to CPU and we use this
+ * as the interrupt for IDE device.
+ */
+static void hpcd_quirk_uli5229(struct pci_dev *dev)
+{
+	unsigned char c;
+
+	if (!machine_is(mpc86xx_hpcd))
+		return;
+
+	pci_read_config_byte(dev, 0x4b, &c);
+	c |= 0x10;
+	pci_write_config_byte(dev, 0x4b, c);
+}
+
+/*
+ * SATA interrupt pin bug fix
+ * There's a chip bug for 5288, The interrupt pin should be 2,
+ * not the read only value 1, So it use INTB#, not INTA# which
+ * actually used by the IDE device 5229.
+ * As of this bug, during the PCI initialization, 5288 read the
+ * irq of IDE device from the device tree, this function fix this
+ * bug by re-assigning a correct irq to 5288.
+ *
+ */
+static void hpcd_final_uli5288(struct pci_dev *dev)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct device_node *hosenode = hose ? hose->dn : NULL;
+	struct of_phandle_args oirq;
+	u32 laddr[3];
+
+	if (!machine_is(mpc86xx_hpcd))
+		return;
+
+	if (!hosenode)
+		return;
+
+	oirq.np = hosenode;
+	oirq.args[0] = 2;
+	oirq.args_count = 1;
+	laddr[0] = (hose->first_busno << 16) | (PCI_DEVFN(31, 0) << 8);
+	laddr[1] = laddr[2] = 0;
+	of_irq_parse_raw(laddr, &oirq);
+	dev->irq = irq_create_of_mapping(&oirq);
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, hpcd_quirk_uli1575);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5288, hpcd_quirk_uli5288);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5229, hpcd_quirk_uli5229);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 0x5288, hpcd_final_uli5288);
+
+static int uli_exclude_device(struct pci_controller *hose, u_char bus, u_char devfn)
+{
+	if (hose->dn == fsl_pci_primary && bus == (hose->first_busno + 2)) {
+		/* exclude Modem controller */
+		if ((PCI_SLOT(devfn) == 29) && (PCI_FUNC(devfn) == 1))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+		/* exclude HD Audio controller */
+		if ((PCI_SLOT(devfn) == 29) && (PCI_FUNC(devfn) == 2))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+void __init uli_init(void)
+{
+	struct device_node *node;
+	struct device_node *pci_with_uli;
+
+	/* See if we have a ULI under the primary */
+
+	node = of_find_node_by_name(NULL, "uli1575");
+	while ((pci_with_uli = of_get_parent(node))) {
+		of_node_put(node);
+		node = pci_with_uli;
+
+		if (pci_with_uli == fsl_pci_primary) {
+			ppc_md.pci_exclude_device = uli_exclude_device;
+			break;
+		}
+	}
+}
diff --git a/arch/powerpc/platforms/maple/Kconfig b/arch/powerpc/platforms/maple/Kconfig
new file mode 100644
index 0000000000..4c058cc57c
--- /dev/null
+++ b/arch/powerpc/platforms/maple/Kconfig
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_MAPLE
+	depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN
+	bool "Maple 970FX Evaluation Board"
+	select FORCE_PCI
+	select MPIC
+	select U3_DART
+	select MPIC_U3_HT_IRQS
+	select GENERIC_TBSYNC
+	select PPC_UDBG_16550
+	select PPC_970_NAP
+	select PPC_64S_HASH_MMU
+	select PPC_HASH_MMU_NATIVE
+	select PPC_RTAS
+	select MMIO_NVRAM
+	select ATA_NONSTANDARD if ATA
+	help
+	  This option enables support for the Maple 970FX Evaluation Board.
+	  For more information, refer to <http://www.970eval.com>
diff --git a/arch/powerpc/platforms/maple/Makefile b/arch/powerpc/platforms/maple/Makefile
new file mode 100644
index 0000000000..19f35ab828
--- /dev/null
+++ b/arch/powerpc/platforms/maple/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-y	+= setup.o pci.o time.o
diff --git a/arch/powerpc/platforms/maple/maple.h b/arch/powerpc/platforms/maple/maple.h
new file mode 100644
index 0000000000..4f358b55c3
--- /dev/null
+++ b/arch/powerpc/platforms/maple/maple.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Declarations for maple-specific code.
+ *
+ * Maple is the name of a PPC970 evaluation board.
+ */
+extern int maple_set_rtc_time(struct rtc_time *tm);
+extern void maple_get_rtc_time(struct rtc_time *tm);
+extern time64_t maple_get_boot_time(void);
+extern void maple_calibrate_decr(void);
+extern void maple_pci_init(void);
+extern void maple_pci_irq_fixup(struct pci_dev *dev);
+extern int maple_pci_get_legacy_ide_irq(struct pci_dev *dev, int channel);
+
+extern struct pci_controller_ops maple_pci_controller_ops;
diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
new file mode 100644
index 0000000000..b911b31717
--- /dev/null
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ *		      IBM Corp.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/of_irq.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/iommu.h>
+#include <asm/ppc-pci.h>
+#include <asm/isa-bridge.h>
+
+#include "maple.h"
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+static struct pci_controller *u3_agp, *u3_ht, *u4_pcie;
+
+static int __init fixup_one_level_bus_range(struct device_node *node, int higher)
+{
+	for (; node; node = node->sibling) {
+		const int *bus_range;
+		const unsigned int *class_code;
+		int len;
+
+		/* For PCI<->PCI bridges or CardBus bridges, we go down */
+		class_code = of_get_property(node, "class-code", NULL);
+		if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
+			(*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS))
+			continue;
+		bus_range = of_get_property(node, "bus-range", &len);
+		if (bus_range != NULL && len > 2 * sizeof(int)) {
+			if (bus_range[1] > higher)
+				higher = bus_range[1];
+		}
+		higher = fixup_one_level_bus_range(node->child, higher);
+	}
+	return higher;
+}
+
+/* This routine fixes the "bus-range" property of all bridges in the
+ * system since they tend to have their "last" member wrong on macs
+ *
+ * Note that the bus numbers manipulated here are OF bus numbers, they
+ * are not Linux bus numbers.
+ */
+static void __init fixup_bus_range(struct device_node *bridge)
+{
+	int *bus_range;
+	struct property *prop;
+	int len;
+
+	/* Lookup the "bus-range" property for the hose */
+	prop = of_find_property(bridge, "bus-range", &len);
+	if (prop == NULL  || prop->value == NULL || len < 2 * sizeof(int)) {
+		printk(KERN_WARNING "Can't get bus-range for %pOF\n",
+			       bridge);
+		return;
+	}
+	bus_range = prop->value;
+	bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]);
+}
+
+
+static unsigned long u3_agp_cfa0(u8 devfn, u8 off)
+{
+	return (1 << (unsigned long)PCI_SLOT(devfn)) |
+		((unsigned long)PCI_FUNC(devfn) << 8) |
+		((unsigned long)off & 0xFCUL);
+}
+
+static unsigned long u3_agp_cfa1(u8 bus, u8 devfn, u8 off)
+{
+	return ((unsigned long)bus << 16) |
+		((unsigned long)devfn << 8) |
+		((unsigned long)off & 0xFCUL) |
+		1UL;
+}
+
+static volatile void __iomem *u3_agp_cfg_access(struct pci_controller* hose,
+				       u8 bus, u8 dev_fn, u8 offset)
+{
+	unsigned int caddr;
+
+	if (bus == hose->first_busno) {
+		if (dev_fn < (11 << 3))
+			return NULL;
+		caddr = u3_agp_cfa0(dev_fn, offset);
+	} else
+		caddr = u3_agp_cfa1(bus, dev_fn, offset);
+
+	/* Uninorth will return garbage if we don't read back the value ! */
+	do {
+		out_le32(hose->cfg_addr, caddr);
+	} while (in_le32(hose->cfg_addr) != caddr);
+
+	offset &= 0x07;
+	return hose->cfg_data + offset;
+}
+
+static int u3_agp_read_config(struct pci_bus *bus, unsigned int devfn,
+			      int offset, int len, u32 *val)
+{
+	struct pci_controller *hose;
+	volatile void __iomem *addr;
+
+	hose = pci_bus_to_host(bus);
+	if (hose == NULL)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	addr = u3_agp_cfg_access(hose, bus->number, devfn, offset);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		*val = in_8(addr);
+		break;
+	case 2:
+		*val = in_le16(addr);
+		break;
+	default:
+		*val = in_le32(addr);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int u3_agp_write_config(struct pci_bus *bus, unsigned int devfn,
+			       int offset, int len, u32 val)
+{
+	struct pci_controller *hose;
+	volatile void __iomem *addr;
+
+	hose = pci_bus_to_host(bus);
+	if (hose == NULL)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	addr = u3_agp_cfg_access(hose, bus->number, devfn, offset);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		out_8(addr, val);
+		break;
+	case 2:
+		out_le16(addr, val);
+		break;
+	default:
+		out_le32(addr, val);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops u3_agp_pci_ops =
+{
+	.read = u3_agp_read_config,
+	.write = u3_agp_write_config,
+};
+
+static unsigned long u3_ht_cfa0(u8 devfn, u8 off)
+{
+	return (devfn << 8) | off;
+}
+
+static unsigned long u3_ht_cfa1(u8 bus, u8 devfn, u8 off)
+{
+	return u3_ht_cfa0(devfn, off) + (bus << 16) + 0x01000000UL;
+}
+
+static volatile void __iomem *u3_ht_cfg_access(struct pci_controller* hose,
+				      u8 bus, u8 devfn, u8 offset)
+{
+	if (bus == hose->first_busno) {
+		if (PCI_SLOT(devfn) == 0)
+			return NULL;
+		return hose->cfg_data + u3_ht_cfa0(devfn, offset);
+	} else
+		return hose->cfg_data + u3_ht_cfa1(bus, devfn, offset);
+}
+
+static int u3_ht_root_read_config(struct pci_controller *hose, u8 offset,
+				  int len, u32 *val)
+{
+	volatile void __iomem *addr;
+
+	addr = hose->cfg_addr;
+	addr += ((offset & ~3) << 2) + (4 - len - (offset & 3));
+
+	switch (len) {
+	case 1:
+		*val = in_8(addr);
+		break;
+	case 2:
+		*val = in_be16(addr);
+		break;
+	default:
+		*val = in_be32(addr);
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int u3_ht_root_write_config(struct pci_controller *hose, u8 offset,
+				  int len, u32 val)
+{
+	volatile void __iomem *addr;
+
+	addr = hose->cfg_addr + ((offset & ~3) << 2) + (4 - len - (offset & 3));
+
+	if (offset >= PCI_BASE_ADDRESS_0 && offset < PCI_CAPABILITY_LIST)
+		return PCIBIOS_SUCCESSFUL;
+
+	switch (len) {
+	case 1:
+		out_8(addr, val);
+		break;
+	case 2:
+		out_be16(addr, val);
+		break;
+	default:
+		out_be32(addr, val);
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn,
+			     int offset, int len, u32 *val)
+{
+	struct pci_controller *hose;
+	volatile void __iomem *addr;
+
+	hose = pci_bus_to_host(bus);
+	if (hose == NULL)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (bus->number == hose->first_busno && devfn == PCI_DEVFN(0, 0))
+		return u3_ht_root_read_config(hose, offset, len, val);
+
+	if (offset > 0xff)
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	addr = u3_ht_cfg_access(hose, bus->number, devfn, offset);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		*val = in_8(addr);
+		break;
+	case 2:
+		*val = in_le16(addr);
+		break;
+	default:
+		*val = in_le32(addr);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn,
+			      int offset, int len, u32 val)
+{
+	struct pci_controller *hose;
+	volatile void __iomem *addr;
+
+	hose = pci_bus_to_host(bus);
+	if (hose == NULL)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (bus->number == hose->first_busno && devfn == PCI_DEVFN(0, 0))
+		return u3_ht_root_write_config(hose, offset, len, val);
+
+	if (offset > 0xff)
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	addr = u3_ht_cfg_access(hose, bus->number, devfn, offset);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		out_8(addr, val);
+		break;
+	case 2:
+		out_le16(addr, val);
+		break;
+	default:
+		out_le32(addr, val);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops u3_ht_pci_ops =
+{
+	.read = u3_ht_read_config,
+	.write = u3_ht_write_config,
+};
+
+static unsigned int u4_pcie_cfa0(unsigned int devfn, unsigned int off)
+{
+	return (1 << PCI_SLOT(devfn))	|
+	       (PCI_FUNC(devfn) << 8)	|
+	       ((off >> 8) << 28) 	|
+	       (off & 0xfcu);
+}
+
+static unsigned int u4_pcie_cfa1(unsigned int bus, unsigned int devfn,
+				 unsigned int off)
+{
+        return (bus << 16)		|
+	       (devfn << 8)		|
+	       ((off >> 8) << 28)	|
+	       (off & 0xfcu)		| 1u;
+}
+
+static volatile void __iomem *u4_pcie_cfg_access(struct pci_controller* hose,
+                                        u8 bus, u8 dev_fn, int offset)
+{
+        unsigned int caddr;
+
+        if (bus == hose->first_busno)
+                caddr = u4_pcie_cfa0(dev_fn, offset);
+        else
+                caddr = u4_pcie_cfa1(bus, dev_fn, offset);
+
+        /* Uninorth will return garbage if we don't read back the value ! */
+        do {
+                out_le32(hose->cfg_addr, caddr);
+        } while (in_le32(hose->cfg_addr) != caddr);
+
+        offset &= 0x03;
+        return hose->cfg_data + offset;
+}
+
+static int u4_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
+                               int offset, int len, u32 *val)
+{
+        struct pci_controller *hose;
+        volatile void __iomem *addr;
+
+        hose = pci_bus_to_host(bus);
+        if (hose == NULL)
+                return PCIBIOS_DEVICE_NOT_FOUND;
+        if (offset >= 0x1000)
+                return  PCIBIOS_BAD_REGISTER_NUMBER;
+        addr = u4_pcie_cfg_access(hose, bus->number, devfn, offset);
+        if (!addr)
+                return PCIBIOS_DEVICE_NOT_FOUND;
+        /*
+         * Note: the caller has already checked that offset is
+         * suitably aligned and that len is 1, 2 or 4.
+         */
+        switch (len) {
+        case 1:
+                *val = in_8(addr);
+                break;
+        case 2:
+                *val = in_le16(addr);
+                break;
+        default:
+                *val = in_le32(addr);
+                break;
+        }
+        return PCIBIOS_SUCCESSFUL;
+}
+static int u4_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
+                                int offset, int len, u32 val)
+{
+        struct pci_controller *hose;
+        volatile void __iomem *addr;
+
+        hose = pci_bus_to_host(bus);
+        if (hose == NULL)
+                return PCIBIOS_DEVICE_NOT_FOUND;
+        if (offset >= 0x1000)
+                return  PCIBIOS_BAD_REGISTER_NUMBER;
+        addr = u4_pcie_cfg_access(hose, bus->number, devfn, offset);
+        if (!addr)
+                return PCIBIOS_DEVICE_NOT_FOUND;
+        /*
+         * Note: the caller has already checked that offset is
+         * suitably aligned and that len is 1, 2 or 4.
+         */
+        switch (len) {
+        case 1:
+                out_8(addr, val);
+                break;
+        case 2:
+                out_le16(addr, val);
+                break;
+        default:
+                out_le32(addr, val);
+                break;
+        }
+        return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops u4_pcie_pci_ops =
+{
+	.read = u4_pcie_read_config,
+	.write = u4_pcie_write_config,
+};
+
+static void __init setup_u3_agp(struct pci_controller* hose)
+{
+	/* On G5, we move AGP up to high bus number so we don't need
+	 * to reassign bus numbers for HT. If we ever have P2P bridges
+	 * on AGP, we'll have to move pci_assign_all_buses to the
+	 * pci_controller structure so we enable it for AGP and not for
+	 * HT childs.
+	 * We hard code the address because of the different size of
+	 * the reg address cell, we shall fix that by killing struct
+	 * reg_property and using some accessor functions instead
+	 */
+	hose->first_busno = 0xf0;
+	hose->last_busno = 0xff;
+	hose->ops = &u3_agp_pci_ops;
+	hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000);
+	hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000);
+
+	u3_agp = hose;
+}
+
+static void __init setup_u4_pcie(struct pci_controller* hose)
+{
+        /* We currently only implement the "non-atomic" config space, to
+         * be optimised later.
+         */
+        hose->ops = &u4_pcie_pci_ops;
+        hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000);
+        hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000);
+
+        u4_pcie = hose;
+}
+
+static void __init setup_u3_ht(struct pci_controller* hose)
+{
+	hose->ops = &u3_ht_pci_ops;
+
+	/* We hard code the address because of the different size of
+	 * the reg address cell, we shall fix that by killing struct
+	 * reg_property and using some accessor functions instead
+	 */
+	hose->cfg_data = ioremap(0xf2000000, 0x02000000);
+	hose->cfg_addr = ioremap(0xf8070000, 0x1000);
+
+	hose->first_busno = 0;
+	hose->last_busno = 0xef;
+
+	u3_ht = hose;
+}
+
+static int __init maple_add_bridge(struct device_node *dev)
+{
+	int len;
+	struct pci_controller *hose;
+	char* disp_name;
+	const int *bus_range;
+	int primary = 1;
+
+	DBG("Adding PCI host bridge %pOF\n", dev);
+
+	bus_range = of_get_property(dev, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int)) {
+		printk(KERN_WARNING "Can't get bus-range for %pOF, assume bus 0\n",
+		dev);
+	}
+
+	hose = pcibios_alloc_controller(dev);
+	if (hose == NULL)
+		return -ENOMEM;
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+	hose->controller_ops = maple_pci_controller_ops;
+
+	disp_name = NULL;
+	if (of_device_is_compatible(dev, "u3-agp")) {
+		setup_u3_agp(hose);
+		disp_name = "U3-AGP";
+		primary = 0;
+	} else if (of_device_is_compatible(dev, "u3-ht")) {
+		setup_u3_ht(hose);
+		disp_name = "U3-HT";
+		primary = 1;
+        } else if (of_device_is_compatible(dev, "u4-pcie")) {
+                setup_u4_pcie(hose);
+                disp_name = "U4-PCIE";
+                primary = 0;
+	}
+	printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number: %d->%d\n",
+		disp_name, hose->first_busno, hose->last_busno);
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(hose, dev, primary);
+
+	/* Fixup "bus-range" OF property */
+	fixup_bus_range(dev);
+
+	/* Check for legacy IOs */
+	isa_bridge_find_early(hose);
+
+	/* create pci_dn's for DT nodes under this PHB */
+	pci_devs_phb_init_dynamic(hose);
+
+	return 0;
+}
+
+
+void maple_pci_irq_fixup(struct pci_dev *dev)
+{
+	DBG(" -> maple_pci_irq_fixup\n");
+
+	/* Fixup IRQ for PCIe host */
+	if (u4_pcie != NULL && dev->bus->number == 0 &&
+	    pci_bus_to_host(dev->bus) == u4_pcie) {
+		printk(KERN_DEBUG "Fixup U4 PCIe IRQ\n");
+		dev->irq = irq_create_mapping(NULL, 1);
+		if (dev->irq)
+			irq_set_irq_type(dev->irq, IRQ_TYPE_LEVEL_LOW);
+	}
+
+	/* Hide AMD8111 IDE interrupt when in legacy mode so
+	 * the driver calls pci_get_legacy_ide_irq()
+	 */
+	if (dev->vendor == PCI_VENDOR_ID_AMD &&
+	    dev->device == PCI_DEVICE_ID_AMD_8111_IDE &&
+	    (dev->class & 5) != 5) {
+		dev->irq = 0;
+	}
+
+	DBG(" <- maple_pci_irq_fixup\n");
+}
+
+static int maple_pci_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+	struct pci_controller *hose = pci_bus_to_host(bridge->bus);
+	struct device_node *np, *child;
+
+	if (hose != u3_agp)
+		return 0;
+
+	/* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We
+	 * assume there is no P2P bridge on the AGP bus, which should be a
+	 * safe assumptions hopefully.
+	 */
+	np = hose->dn;
+	PCI_DN(np)->busno = 0xf0;
+	for_each_child_of_node(np, child)
+		PCI_DN(child)->busno = 0xf0;
+
+	return 0;
+}
+
+void __init maple_pci_init(void)
+{
+	struct device_node *np, *root;
+	struct device_node *ht = NULL;
+
+	/* Probe root PCI hosts, that is on U3 the AGP host and the
+	 * HyperTransport host. That one is actually "kept" around
+	 * and actually added last as it's resource management relies
+	 * on the AGP resources to have been setup first
+	 */
+	root = of_find_node_by_path("/");
+	if (root == NULL) {
+		printk(KERN_CRIT "maple_find_bridges: can't find root of device tree\n");
+		return;
+	}
+	for_each_child_of_node(root, np) {
+		if (!of_node_is_type(np, "pci") && !of_node_is_type(np, "ht"))
+			continue;
+		if ((of_device_is_compatible(np, "u4-pcie") ||
+		     of_device_is_compatible(np, "u3-agp")) &&
+		    maple_add_bridge(np) == 0)
+			of_node_get(np);
+
+		if (of_device_is_compatible(np, "u3-ht")) {
+			of_node_get(np);
+			ht = np;
+		}
+	}
+	of_node_put(root);
+
+	/* Now setup the HyperTransport host if we found any
+	 */
+	if (ht && maple_add_bridge(ht) != 0)
+		of_node_put(ht);
+
+	ppc_md.pcibios_root_bridge_prepare = maple_pci_root_bridge_prepare;
+
+	/* Tell pci.c to not change any resource allocations.  */
+	pci_add_flags(PCI_PROBE_ONLY);
+}
+
+int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel)
+{
+	struct device_node *np;
+	unsigned int defirq = channel ? 15 : 14;
+	unsigned int irq;
+
+	if (pdev->vendor != PCI_VENDOR_ID_AMD ||
+	    pdev->device != PCI_DEVICE_ID_AMD_8111_IDE)
+		return defirq;
+
+	np = pci_device_to_OF_node(pdev);
+	if (np == NULL) {
+		printk("Failed to locate OF node for IDE %s\n",
+		       pci_name(pdev));
+		return defirq;
+	}
+	irq = irq_of_parse_and_map(np, channel & 0x1);
+	if (!irq) {
+		printk("Failed to map onboard IDE interrupt for channel %d\n",
+		       channel);
+		return defirq;
+	}
+	return irq;
+}
+
+static void quirk_ipr_msi(struct pci_dev *dev)
+{
+	/* Something prevents MSIs from the IPR from working on Bimini,
+	 * and the driver has no smarts to recover. So disable MSI
+	 * on it for now. */
+
+	if (machine_is(maple)) {
+		dev->no_msi = 1;
+		dev_info(&dev->dev, "Quirk disabled MSI\n");
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_OBSIDIAN,
+			quirk_ipr_msi);
+
+struct pci_controller_ops maple_pci_controller_ops = {
+};
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
new file mode 100644
index 0000000000..f329a03edf
--- /dev/null
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Maple (970 eval board) setup code
+ *
+ *  (c) Copyright 2004 Benjamin Herrenschmidt (benh@kernel.crashing.org),
+ *                     IBM Corp. 
+ */
+
+#undef DEBUG
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/tty.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/major.h>
+#include <linux/initrd.h>
+#include <linux/vt_kern.h>
+#include <linux/console.h>
+#include <linux/pci.h>
+#include <linux/adb.h>
+#include <linux/cuda.h>
+#include <linux/pmu.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/serial.h>
+#include <linux/smp.h>
+#include <linux/bitops.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/memblock.h>
+
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/dma.h>
+#include <asm/cputable.h>
+#include <asm/time.h>
+#include <asm/mpic.h>
+#include <asm/rtas.h>
+#include <asm/udbg.h>
+#include <asm/nvram.h>
+
+#include "maple.h"
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static unsigned long maple_find_nvram_base(void)
+{
+	struct device_node *rtcs;
+	unsigned long result = 0;
+
+	/* find NVRAM device */
+	rtcs = of_find_compatible_node(NULL, "nvram", "AMD8111");
+	if (rtcs) {
+		struct resource r;
+		if (of_address_to_resource(rtcs, 0, &r)) {
+			printk(KERN_EMERG "Maple: Unable to translate NVRAM"
+			       " address\n");
+			goto bail;
+		}
+		if (!(r.flags & IORESOURCE_IO)) {
+			printk(KERN_EMERG "Maple: NVRAM address isn't PIO!\n");
+			goto bail;
+		}
+		result = r.start;
+	} else
+		printk(KERN_EMERG "Maple: Unable to find NVRAM\n");
+ bail:
+	of_node_put(rtcs);
+	return result;
+}
+
+static void __noreturn maple_restart(char *cmd)
+{
+	unsigned int maple_nvram_base;
+	const unsigned int *maple_nvram_offset, *maple_nvram_command;
+	struct device_node *sp;
+
+	maple_nvram_base = maple_find_nvram_base();
+	if (maple_nvram_base == 0)
+		goto fail;
+
+	/* find service processor device */
+	sp = of_find_node_by_name(NULL, "service-processor");
+	if (!sp) {
+		printk(KERN_EMERG "Maple: Unable to find Service Processor\n");
+		goto fail;
+	}
+	maple_nvram_offset = of_get_property(sp, "restart-addr", NULL);
+	maple_nvram_command = of_get_property(sp, "restart-value", NULL);
+	of_node_put(sp);
+
+	/* send command */
+	outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset);
+	for (;;) ;
+ fail:
+	printk(KERN_EMERG "Maple: Manual Restart Required\n");
+	for (;;) ;
+}
+
+static void __noreturn maple_power_off(void)
+{
+	unsigned int maple_nvram_base;
+	const unsigned int *maple_nvram_offset, *maple_nvram_command;
+	struct device_node *sp;
+
+	maple_nvram_base = maple_find_nvram_base();
+	if (maple_nvram_base == 0)
+		goto fail;
+
+	/* find service processor device */
+	sp = of_find_node_by_name(NULL, "service-processor");
+	if (!sp) {
+		printk(KERN_EMERG "Maple: Unable to find Service Processor\n");
+		goto fail;
+	}
+	maple_nvram_offset = of_get_property(sp, "power-off-addr", NULL);
+	maple_nvram_command = of_get_property(sp, "power-off-value", NULL);
+	of_node_put(sp);
+
+	/* send command */
+	outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset);
+	for (;;) ;
+ fail:
+	printk(KERN_EMERG "Maple: Manual Power-Down Required\n");
+	for (;;) ;
+}
+
+static void __noreturn maple_halt(void)
+{
+	maple_power_off();
+}
+
+#ifdef CONFIG_SMP
+static struct smp_ops_t maple_smp_ops = {
+	.probe		= smp_mpic_probe,
+	.message_pass	= smp_mpic_message_pass,
+	.kick_cpu	= smp_generic_kick_cpu,
+	.setup_cpu	= smp_mpic_setup_cpu,
+	.give_timebase	= smp_generic_give_timebase,
+	.take_timebase	= smp_generic_take_timebase,
+};
+#endif /* CONFIG_SMP */
+
+static void __init maple_use_rtas_reboot_and_halt_if_present(void)
+{
+	if (rtas_function_implemented(RTAS_FN_SYSTEM_REBOOT) &&
+	    rtas_function_implemented(RTAS_FN_POWER_OFF)) {
+		ppc_md.restart = rtas_restart;
+		pm_power_off = rtas_power_off;
+		ppc_md.halt = rtas_halt;
+	}
+}
+
+static void __init maple_setup_arch(void)
+{
+	/* init to some ~sane value until calibrate_delay() runs */
+	loops_per_jiffy = 50000000;
+
+	/* Setup SMP callback */
+#ifdef CONFIG_SMP
+	smp_ops = &maple_smp_ops;
+#endif
+	maple_use_rtas_reboot_and_halt_if_present();
+
+	printk(KERN_DEBUG "Using native/NAP idle loop\n");
+
+	mmio_nvram_init();
+}
+
+/*
+ * This is almost identical to pSeries and CHRP. We need to make that
+ * code generic at one point, with appropriate bits in the device-tree to
+ * identify the presence of an HT APIC
+ */
+static void __init maple_init_IRQ(void)
+{
+	struct device_node *root, *np, *mpic_node = NULL;
+	const unsigned int *opprop;
+	unsigned long openpic_addr = 0;
+	int naddr, n, i, opplen, has_isus = 0;
+	struct mpic *mpic;
+	unsigned int flags = 0;
+
+	/* Locate MPIC in the device-tree. Note that there is a bug
+	 * in Maple device-tree where the type of the controller is
+	 * open-pic and not interrupt-controller
+	 */
+
+	for_each_node_by_type(np, "interrupt-controller")
+		if (of_device_is_compatible(np, "open-pic")) {
+			mpic_node = np;
+			break;
+		}
+	if (mpic_node == NULL)
+		for_each_node_by_type(np, "open-pic") {
+			mpic_node = np;
+			break;
+		}
+	if (mpic_node == NULL) {
+		printk(KERN_ERR
+		       "Failed to locate the MPIC interrupt controller\n");
+		return;
+	}
+
+	/* Find address list in /platform-open-pic */
+	root = of_find_node_by_path("/");
+	naddr = of_n_addr_cells(root);
+	opprop = of_get_property(root, "platform-open-pic", &opplen);
+	if (opprop) {
+		openpic_addr = of_read_number(opprop, naddr);
+		has_isus = (opplen > naddr);
+		printk(KERN_DEBUG "OpenPIC addr: %lx, has ISUs: %d\n",
+		       openpic_addr, has_isus);
+	}
+
+	BUG_ON(openpic_addr == 0);
+
+	/* Check for a big endian MPIC */
+	if (of_property_read_bool(np, "big-endian"))
+		flags |= MPIC_BIG_ENDIAN;
+
+	/* XXX Maple specific bits */
+	flags |= MPIC_U3_HT_IRQS;
+	/* All U3/U4 are big-endian, older SLOF firmware doesn't encode this */
+	flags |= MPIC_BIG_ENDIAN;
+
+	/* Setup the openpic driver. More device-tree junks, we hard code no
+	 * ISUs for now. I'll have to revisit some stuffs with the folks doing
+	 * the firmware for those
+	 */
+	mpic = mpic_alloc(mpic_node, openpic_addr, flags,
+			  /*has_isus ? 16 :*/ 0, 0, " MPIC     ");
+	BUG_ON(mpic == NULL);
+
+	/* Add ISUs */
+	opplen /= sizeof(u32);
+	for (n = 0, i = naddr; i < opplen; i += naddr, n++) {
+		unsigned long isuaddr = of_read_number(opprop + i, naddr);
+		mpic_assign_isu(mpic, n, isuaddr);
+	}
+
+	/* All ISUs are setup, complete initialization */
+	mpic_init(mpic);
+	ppc_md.get_irq = mpic_get_irq;
+	of_node_put(mpic_node);
+	of_node_put(root);
+}
+
+static void __init maple_progress(char *s, unsigned short hex)
+{
+	printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init maple_probe(void)
+{
+	if (!of_machine_is_compatible("Momentum,Maple") &&
+	    !of_machine_is_compatible("Momentum,Apache"))
+		return 0;
+
+	pm_power_off = maple_power_off;
+
+	iommu_init_early_dart(&maple_pci_controller_ops);
+
+	return 1;
+}
+
+#ifdef CONFIG_EDAC
+/*
+ * Register a platform device for CPC925 memory controller on
+ * all boards with U3H (CPC925) bridge.
+ */
+static int __init maple_cpc925_edac_setup(void)
+{
+	struct platform_device *pdev;
+	struct device_node *np = NULL;
+	struct resource r;
+	int ret;
+	volatile void __iomem *mem;
+	u32 rev;
+
+	np = of_find_node_by_type(NULL, "memory-controller");
+	if (!np) {
+		printk(KERN_ERR "%s: Unable to find memory-controller node\n",
+			__func__);
+		return -ENODEV;
+	}
+
+	ret = of_address_to_resource(np, 0, &r);
+	of_node_put(np);
+
+	if (ret < 0) {
+		printk(KERN_ERR "%s: Unable to get memory-controller reg\n",
+			__func__);
+		return -ENODEV;
+	}
+
+	mem = ioremap(r.start, resource_size(&r));
+	if (!mem) {
+		printk(KERN_ERR "%s: Unable to map memory-controller memory\n",
+				__func__);
+		return -ENOMEM;
+	}
+
+	rev = __raw_readl(mem);
+	iounmap(mem);
+
+	if (rev < 0x34 || rev > 0x3f) { /* U3H */
+		printk(KERN_ERR "%s: Non-CPC925(U3H) bridge revision: %02x\n",
+			__func__, rev);
+		return 0;
+	}
+
+	pdev = platform_device_register_simple("cpc925_edac", 0, &r, 1);
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	printk(KERN_INFO "%s: CPC925 platform device created\n", __func__);
+
+	return 0;
+}
+machine_device_initcall(maple, maple_cpc925_edac_setup);
+#endif
+
+define_machine(maple) {
+	.name			= "Maple",
+	.probe			= maple_probe,
+	.setup_arch		= maple_setup_arch,
+	.discover_phbs		= maple_pci_init,
+	.init_IRQ		= maple_init_IRQ,
+	.pci_irq_fixup		= maple_pci_irq_fixup,
+	.pci_get_legacy_ide_irq	= maple_pci_get_legacy_ide_irq,
+	.restart		= maple_restart,
+	.halt			= maple_halt,
+	.get_boot_time		= maple_get_boot_time,
+	.set_rtc_time		= maple_set_rtc_time,
+	.get_rtc_time		= maple_get_rtc_time,
+	.progress		= maple_progress,
+	.power_save		= power4_idle,
+};
diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c
new file mode 100644
index 0000000000..91606411d2
--- /dev/null
+++ b/arch/powerpc/platforms/maple/time.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  (c) Copyright 2004 Benjamin Herrenschmidt (benh@kernel.crashing.org),
+ *                     IBM Corp. 
+ */
+
+#undef DEBUG
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/time.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+#include <linux/bcd.h>
+#include <linux/of_address.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+
+#include "maple.h"
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+static int maple_rtc_addr;
+
+static int maple_clock_read(int addr)
+{
+	outb_p(addr, maple_rtc_addr);
+	return inb_p(maple_rtc_addr+1);
+}
+
+static void maple_clock_write(unsigned long val, int addr)
+{
+	outb_p(addr, maple_rtc_addr);
+	outb_p(val, maple_rtc_addr+1);
+}
+
+void maple_get_rtc_time(struct rtc_time *tm)
+{
+	do {
+		tm->tm_sec = maple_clock_read(RTC_SECONDS);
+		tm->tm_min = maple_clock_read(RTC_MINUTES);
+		tm->tm_hour = maple_clock_read(RTC_HOURS);
+		tm->tm_mday = maple_clock_read(RTC_DAY_OF_MONTH);
+		tm->tm_mon = maple_clock_read(RTC_MONTH);
+		tm->tm_year = maple_clock_read(RTC_YEAR);
+	} while (tm->tm_sec != maple_clock_read(RTC_SECONDS));
+
+	if (!(maple_clock_read(RTC_CONTROL) & RTC_DM_BINARY)
+	    || RTC_ALWAYS_BCD) {
+		tm->tm_sec = bcd2bin(tm->tm_sec);
+		tm->tm_min = bcd2bin(tm->tm_min);
+		tm->tm_hour = bcd2bin(tm->tm_hour);
+		tm->tm_mday = bcd2bin(tm->tm_mday);
+		tm->tm_mon = bcd2bin(tm->tm_mon);
+		tm->tm_year = bcd2bin(tm->tm_year);
+	  }
+	if ((tm->tm_year + 1900) < 1970)
+		tm->tm_year += 100;
+
+	tm->tm_wday = -1;
+}
+
+int maple_set_rtc_time(struct rtc_time *tm)
+{
+	unsigned char save_control, save_freq_select;
+	int sec, min, hour, mon, mday, year;
+
+	spin_lock(&rtc_lock);
+
+	save_control = maple_clock_read(RTC_CONTROL); /* tell the clock it's being set */
+
+	maple_clock_write((save_control|RTC_SET), RTC_CONTROL);
+
+	save_freq_select = maple_clock_read(RTC_FREQ_SELECT); /* stop and reset prescaler */
+
+	maple_clock_write((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
+
+	sec = tm->tm_sec;
+	min = tm->tm_min;
+	hour = tm->tm_hour;
+	mon = tm->tm_mon;
+	mday = tm->tm_mday;
+	year = tm->tm_year;
+
+	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+		sec = bin2bcd(sec);
+		min = bin2bcd(min);
+		hour = bin2bcd(hour);
+		mon = bin2bcd(mon);
+		mday = bin2bcd(mday);
+		year = bin2bcd(year);
+	}
+	maple_clock_write(sec, RTC_SECONDS);
+	maple_clock_write(min, RTC_MINUTES);
+	maple_clock_write(hour, RTC_HOURS);
+	maple_clock_write(mon, RTC_MONTH);
+	maple_clock_write(mday, RTC_DAY_OF_MONTH);
+	maple_clock_write(year, RTC_YEAR);
+
+	/* The following flags have to be released exactly in this order,
+	 * otherwise the DS12887 (popular MC146818A clone with integrated
+	 * battery and quartz) will not reset the oscillator and will not
+	 * update precisely 500 ms later. You won't find this mentioned in
+	 * the Dallas Semiconductor data sheets, but who believes data
+	 * sheets anyway ...                           -- Markus Kuhn
+	 */
+	maple_clock_write(save_control, RTC_CONTROL);
+	maple_clock_write(save_freq_select, RTC_FREQ_SELECT);
+
+	spin_unlock(&rtc_lock);
+
+	return 0;
+}
+
+static struct resource rtc_iores = {
+	.name = "rtc",
+	.flags = IORESOURCE_IO | IORESOURCE_BUSY,
+};
+
+time64_t __init maple_get_boot_time(void)
+{
+	struct rtc_time tm;
+	struct device_node *rtcs;
+
+	rtcs = of_find_compatible_node(NULL, "rtc", "pnpPNP,b00");
+	if (rtcs) {
+		struct resource r;
+		if (of_address_to_resource(rtcs, 0, &r)) {
+			printk(KERN_EMERG "Maple: Unable to translate RTC"
+			       " address\n");
+			goto bail;
+		}
+		if (!(r.flags & IORESOURCE_IO)) {
+			printk(KERN_EMERG "Maple: RTC address isn't PIO!\n");
+			goto bail;
+		}
+		maple_rtc_addr = r.start;
+		printk(KERN_INFO "Maple: Found RTC at IO 0x%x\n",
+		       maple_rtc_addr);
+	}
+ bail:
+	of_node_put(rtcs);
+	if (maple_rtc_addr == 0) {
+		maple_rtc_addr = RTC_PORT(0); /* legacy address */
+		printk(KERN_INFO "Maple: No device node for RTC, assuming "
+		       "legacy address (0x%x)\n", maple_rtc_addr);
+	}
+
+	rtc_iores.start = maple_rtc_addr;
+	rtc_iores.end = maple_rtc_addr + 7;
+	request_resource(&ioport_resource, &rtc_iores);
+
+	maple_get_rtc_time(&tm);
+	return rtc_tm_to_time64(&tm);
+}
+
diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig
new file mode 100644
index 0000000000..6af443a1db
--- /dev/null
+++ b/arch/powerpc/platforms/microwatt/Kconfig
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_MICROWATT
+	depends on PPC_BOOK3S_64 && !SMP
+	bool "Microwatt SoC platform"
+	select PPC_XICS
+	select PPC_ICS_NATIVE
+	select PPC_ICP_NATIVE
+	select PPC_UDBG_16550
+	help
+          This option enables support for FPGA-based Microwatt implementations.
+
diff --git a/arch/powerpc/platforms/microwatt/Makefile b/arch/powerpc/platforms/microwatt/Makefile
new file mode 100644
index 0000000000..116d6d3ad3
--- /dev/null
+++ b/arch/powerpc/platforms/microwatt/Makefile
@@ -0,0 +1 @@
+obj-y	+= setup.o rng.o
diff --git a/arch/powerpc/platforms/microwatt/microwatt.h b/arch/powerpc/platforms/microwatt/microwatt.h
new file mode 100644
index 0000000000..335417e95e
--- /dev/null
+++ b/arch/powerpc/platforms/microwatt/microwatt.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MICROWATT_H
+#define _MICROWATT_H
+
+void microwatt_rng_init(void);
+
+#endif /* _MICROWATT_H */
diff --git a/arch/powerpc/platforms/microwatt/rng.c b/arch/powerpc/platforms/microwatt/rng.c
new file mode 100644
index 0000000000..8ece87d005
--- /dev/null
+++ b/arch/powerpc/platforms/microwatt/rng.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Derived from arch/powerpc/platforms/powernv/rng.c, which is:
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ */
+
+#define pr_fmt(fmt)	"microwatt-rng: " fmt
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <asm/archrandom.h>
+#include <asm/cputable.h>
+#include <asm/machdep.h>
+#include "microwatt.h"
+
+#define DARN_ERR 0xFFFFFFFFFFFFFFFFul
+
+static int microwatt_get_random_darn(unsigned long *v)
+{
+	unsigned long val;
+
+	/* Using DARN with L=1 - 64-bit conditioned random number */
+	asm volatile(PPC_DARN(%0, 1) : "=r"(val));
+
+	if (val == DARN_ERR)
+		return 0;
+
+	*v = val;
+
+	return 1;
+}
+
+void __init microwatt_rng_init(void)
+{
+	unsigned long val;
+	int i;
+
+	for (i = 0; i < 10; i++) {
+		if (microwatt_get_random_darn(&val)) {
+			ppc_md.get_random_seed = microwatt_get_random_darn;
+			return;
+		}
+	}
+}
diff --git a/arch/powerpc/platforms/microwatt/setup.c b/arch/powerpc/platforms/microwatt/setup.c
new file mode 100644
index 0000000000..5e1c099717
--- /dev/null
+++ b/arch/powerpc/platforms/microwatt/setup.c
@@ -0,0 +1,43 @@
+/*
+ * Microwatt FPGA-based SoC platform setup code.
+ *
+ * Copyright 2020 Paul Mackerras (paulus@ozlabs.org), IBM Corp.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/xics.h>
+#include <asm/udbg.h>
+
+#include "microwatt.h"
+
+static void __init microwatt_init_IRQ(void)
+{
+	xics_init();
+}
+
+static int __init microwatt_populate(void)
+{
+	return of_platform_default_populate(NULL, NULL, NULL);
+}
+machine_arch_initcall(microwatt, microwatt_populate);
+
+static void __init microwatt_setup_arch(void)
+{
+	microwatt_rng_init();
+}
+
+define_machine(microwatt) {
+	.name			= "microwatt",
+	.compatible		= "microwatt-soc",
+	.init_IRQ		= microwatt_init_IRQ,
+	.setup_arch		= microwatt_setup_arch,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/pasemi/Kconfig b/arch/powerpc/platforms/pasemi/Kconfig
new file mode 100644
index 0000000000..85ae18ddd9
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/Kconfig
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_PASEMI
+	depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN
+	bool "PA Semi SoC-based platforms"
+	select MPIC
+	select FORCE_PCI
+	select PPC_UDBG_16550
+	select PPC_64S_HASH_MMU
+	select PPC_HASH_MMU_NATIVE
+	select MPIC_BROKEN_REGREAD
+	help
+	  This option enables support for PA Semi's PWRficient line
+	  of SoC processors, including PA6T-1682M
+
+menu "PA Semi PWRficient options"
+	depends on PPC_PASEMI
+
+config PPC_PASEMI_NEMO
+	bool "Nemo motherboard Support"
+	depends on PPC_PASEMI
+	select PPC_I8259
+	help
+	  This option enables support for the 'Nemo' motherboard
+	  used in A-Eons's Amigaone X1000. This consists of some
+	  device tree patches and workarounds for the SB600 South
+	  Bridge that provides SATA/USB/Audio.
+
+config PPC_PASEMI_IOMMU
+	bool "PA Semi IOMMU support"
+	depends on PPC_PASEMI
+	help
+	  IOMMU support for PA Semi PWRficient
+
+config PPC_PASEMI_IOMMU_DMA_FORCE
+	bool "Force DMA engine to use IOMMU"
+	depends on PPC_PASEMI_IOMMU
+	help
+	  This option forces the use of the IOMMU also for the
+	  DMA engine. Otherwise the kernel will use it only when
+	  running under a hypervisor.
+
+	  If in doubt, say "N".
+
+config PPC_PASEMI_MDIO
+	depends on PHYLIB
+	tristate "MDIO support via GPIO"
+	default y
+	help
+	  Driver for MDIO via GPIO on PWRficient platforms
+
+endmenu
diff --git a/arch/powerpc/platforms/pasemi/Makefile b/arch/powerpc/platforms/pasemi/Makefile
new file mode 100644
index 0000000000..d2ce954a50
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-y	+= setup.o pci.o time.o idle.o powersave.o iommu.o dma_lib.o misc.o
+obj-$(CONFIG_PPC_PASEMI_MDIO)	+= gpio_mdio.o
+obj-$(CONFIG_PCI_MSI)		+= msi.o
diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c
new file mode 100644
index 0000000000..1be1f18f6f
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/dma_lib.c
@@ -0,0 +1,621 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Common functions for DMA access on PA Semi PWRficient
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/sched.h>
+
+#include <asm/pasemi_dma.h>
+
+#define MAX_TXCH 64
+#define MAX_RXCH 64
+#define MAX_FLAGS 64
+#define MAX_FUN 8
+
+static struct pasdma_status *dma_status;
+
+static void __iomem *iob_regs;
+static void __iomem *mac_regs[6];
+static void __iomem *dma_regs;
+
+static int base_hw_irq;
+
+static int num_txch, num_rxch;
+
+static struct pci_dev *dma_pdev;
+
+/* Bitmaps to handle allocation of channels */
+
+static DECLARE_BITMAP(txch_free, MAX_TXCH);
+static DECLARE_BITMAP(rxch_free, MAX_RXCH);
+static DECLARE_BITMAP(flags_free, MAX_FLAGS);
+static DECLARE_BITMAP(fun_free, MAX_FUN);
+
+/* pasemi_read_iob_reg - read IOB register
+ * @reg: Register to read (offset into PCI CFG space)
+ */
+unsigned int pasemi_read_iob_reg(unsigned int reg)
+{
+	return in_le32(iob_regs+reg);
+}
+EXPORT_SYMBOL(pasemi_read_iob_reg);
+
+/* pasemi_write_iob_reg - write IOB register
+ * @reg: Register to write to (offset into PCI CFG space)
+ * @val: Value to write
+ */
+void pasemi_write_iob_reg(unsigned int reg, unsigned int val)
+{
+	out_le32(iob_regs+reg, val);
+}
+EXPORT_SYMBOL(pasemi_write_iob_reg);
+
+/* pasemi_read_mac_reg - read MAC register
+ * @intf: MAC interface
+ * @reg: Register to read (offset into PCI CFG space)
+ */
+unsigned int pasemi_read_mac_reg(int intf, unsigned int reg)
+{
+	return in_le32(mac_regs[intf]+reg);
+}
+EXPORT_SYMBOL(pasemi_read_mac_reg);
+
+/* pasemi_write_mac_reg - write MAC register
+ * @intf: MAC interface
+ * @reg: Register to write to (offset into PCI CFG space)
+ * @val: Value to write
+ */
+void pasemi_write_mac_reg(int intf, unsigned int reg, unsigned int val)
+{
+	out_le32(mac_regs[intf]+reg, val);
+}
+EXPORT_SYMBOL(pasemi_write_mac_reg);
+
+/* pasemi_read_dma_reg - read DMA register
+ * @reg: Register to read (offset into PCI CFG space)
+ */
+unsigned int pasemi_read_dma_reg(unsigned int reg)
+{
+	return in_le32(dma_regs+reg);
+}
+EXPORT_SYMBOL(pasemi_read_dma_reg);
+
+/* pasemi_write_dma_reg - write DMA register
+ * @reg: Register to write to (offset into PCI CFG space)
+ * @val: Value to write
+ */
+void pasemi_write_dma_reg(unsigned int reg, unsigned int val)
+{
+	out_le32(dma_regs+reg, val);
+}
+EXPORT_SYMBOL(pasemi_write_dma_reg);
+
+static int pasemi_alloc_tx_chan(enum pasemi_dmachan_type type)
+{
+	int bit;
+	int start, limit;
+
+	switch (type & (TXCHAN_EVT0|TXCHAN_EVT1)) {
+	case TXCHAN_EVT0:
+		start = 0;
+		limit = 10;
+		break;
+	case TXCHAN_EVT1:
+		start = 10;
+		limit = MAX_TXCH;
+		break;
+	default:
+		start = 0;
+		limit = MAX_TXCH;
+		break;
+	}
+retry:
+	bit = find_next_bit(txch_free, MAX_TXCH, start);
+	if (bit >= limit)
+		return -ENOSPC;
+	if (!test_and_clear_bit(bit, txch_free))
+		goto retry;
+
+	return bit;
+}
+
+static void pasemi_free_tx_chan(int chan)
+{
+	BUG_ON(test_bit(chan, txch_free));
+	set_bit(chan, txch_free);
+}
+
+static int pasemi_alloc_rx_chan(void)
+{
+	int bit;
+retry:
+	bit = find_first_bit(rxch_free, MAX_RXCH);
+	if (bit >= MAX_TXCH)
+		return -ENOSPC;
+	if (!test_and_clear_bit(bit, rxch_free))
+		goto retry;
+
+	return bit;
+}
+
+static void pasemi_free_rx_chan(int chan)
+{
+	BUG_ON(test_bit(chan, rxch_free));
+	set_bit(chan, rxch_free);
+}
+
+/* pasemi_dma_alloc_chan - Allocate a DMA channel
+ * @type: Type of channel to allocate
+ * @total_size: Total size of structure to allocate (to allow for more
+ *		room behind the structure to be used by the client)
+ * @offset: Offset in bytes from start of the total structure to the beginning
+ *	    of struct pasemi_dmachan. Needed when struct pasemi_dmachan is
+ *	    not the first member of the client structure.
+ *
+ * pasemi_dma_alloc_chan allocates a DMA channel for use by a client. The
+ * type argument specifies whether it's a RX or TX channel, and in the case
+ * of TX channels which group it needs to belong to (if any).
+ *
+ * Returns a pointer to the total structure allocated on success, NULL
+ * on failure.
+ */
+void *pasemi_dma_alloc_chan(enum pasemi_dmachan_type type,
+			    int total_size, int offset)
+{
+	void *buf;
+	struct pasemi_dmachan *chan;
+	int chno;
+
+	BUG_ON(total_size < sizeof(struct pasemi_dmachan));
+
+	buf = kzalloc(total_size, GFP_KERNEL);
+
+	if (!buf)
+		return NULL;
+	chan = buf + offset;
+
+	chan->priv = buf;
+
+	switch (type & (TXCHAN|RXCHAN)) {
+	case RXCHAN:
+		chno = pasemi_alloc_rx_chan();
+		chan->chno = chno;
+		chan->irq = irq_create_mapping(NULL,
+					       base_hw_irq + num_txch + chno);
+		chan->status = &dma_status->rx_sta[chno];
+		break;
+	case TXCHAN:
+		chno = pasemi_alloc_tx_chan(type);
+		chan->chno = chno;
+		chan->irq = irq_create_mapping(NULL, base_hw_irq + chno);
+		chan->status = &dma_status->tx_sta[chno];
+		break;
+	}
+
+	chan->chan_type = type;
+
+	return chan;
+}
+EXPORT_SYMBOL(pasemi_dma_alloc_chan);
+
+/* pasemi_dma_free_chan - Free a previously allocated channel
+ * @chan: Channel to free
+ *
+ * Frees a previously allocated channel. It will also deallocate any
+ * descriptor ring associated with the channel, if allocated.
+ */
+void pasemi_dma_free_chan(struct pasemi_dmachan *chan)
+{
+	if (chan->ring_virt)
+		pasemi_dma_free_ring(chan);
+
+	switch (chan->chan_type & (RXCHAN|TXCHAN)) {
+	case RXCHAN:
+		pasemi_free_rx_chan(chan->chno);
+		break;
+	case TXCHAN:
+		pasemi_free_tx_chan(chan->chno);
+		break;
+	}
+
+	kfree(chan->priv);
+}
+EXPORT_SYMBOL(pasemi_dma_free_chan);
+
+/* pasemi_dma_alloc_ring - Allocate descriptor ring for a channel
+ * @chan: Channel for which to allocate
+ * @ring_size: Ring size in 64-bit (8-byte) words
+ *
+ * Allocate a descriptor ring for a channel. Returns 0 on success, errno
+ * on failure. The passed in struct pasemi_dmachan is updated with the
+ * virtual and DMA addresses of the ring.
+ */
+int pasemi_dma_alloc_ring(struct pasemi_dmachan *chan, int ring_size)
+{
+	BUG_ON(chan->ring_virt);
+
+	chan->ring_size = ring_size;
+
+	chan->ring_virt = dma_alloc_coherent(&dma_pdev->dev,
+					     ring_size * sizeof(u64),
+					     &chan->ring_dma, GFP_KERNEL);
+
+	if (!chan->ring_virt)
+		return -ENOMEM;
+
+	return 0;
+}
+EXPORT_SYMBOL(pasemi_dma_alloc_ring);
+
+/* pasemi_dma_free_ring - Free an allocated descriptor ring for a channel
+ * @chan: Channel for which to free the descriptor ring
+ *
+ * Frees a previously allocated descriptor ring for a channel.
+ */
+void pasemi_dma_free_ring(struct pasemi_dmachan *chan)
+{
+	BUG_ON(!chan->ring_virt);
+
+	dma_free_coherent(&dma_pdev->dev, chan->ring_size * sizeof(u64),
+			  chan->ring_virt, chan->ring_dma);
+	chan->ring_virt = NULL;
+	chan->ring_size = 0;
+	chan->ring_dma = 0;
+}
+EXPORT_SYMBOL(pasemi_dma_free_ring);
+
+/* pasemi_dma_start_chan - Start a DMA channel
+ * @chan: Channel to start
+ * @cmdsta: Additional CCMDSTA/TCMDSTA bits to write
+ *
+ * Enables (starts) a DMA channel with optional additional arguments.
+ */
+void pasemi_dma_start_chan(const struct pasemi_dmachan *chan, const u32 cmdsta)
+{
+	if (chan->chan_type == RXCHAN)
+		pasemi_write_dma_reg(PAS_DMA_RXCHAN_CCMDSTA(chan->chno),
+				     cmdsta | PAS_DMA_RXCHAN_CCMDSTA_EN);
+	else
+		pasemi_write_dma_reg(PAS_DMA_TXCHAN_TCMDSTA(chan->chno),
+				     cmdsta | PAS_DMA_TXCHAN_TCMDSTA_EN);
+}
+EXPORT_SYMBOL(pasemi_dma_start_chan);
+
+/* pasemi_dma_stop_chan - Stop a DMA channel
+ * @chan: Channel to stop
+ *
+ * Stops (disables) a DMA channel. This is done by setting the ST bit in the
+ * CMDSTA register and waiting on the ACT (active) bit to clear, then
+ * finally disabling the whole channel.
+ *
+ * This function will only try for a short while for the channel to stop, if
+ * it doesn't it will return failure.
+ *
+ * Returns 1 on success, 0 on failure.
+ */
+#define MAX_RETRIES 5000
+int pasemi_dma_stop_chan(const struct pasemi_dmachan *chan)
+{
+	int reg, retries;
+	u32 sta;
+
+	if (chan->chan_type == RXCHAN) {
+		reg = PAS_DMA_RXCHAN_CCMDSTA(chan->chno);
+		pasemi_write_dma_reg(reg, PAS_DMA_RXCHAN_CCMDSTA_ST);
+		for (retries = 0; retries < MAX_RETRIES; retries++) {
+			sta = pasemi_read_dma_reg(reg);
+			if (!(sta & PAS_DMA_RXCHAN_CCMDSTA_ACT)) {
+				pasemi_write_dma_reg(reg, 0);
+				return 1;
+			}
+			cond_resched();
+		}
+	} else {
+		reg = PAS_DMA_TXCHAN_TCMDSTA(chan->chno);
+		pasemi_write_dma_reg(reg, PAS_DMA_TXCHAN_TCMDSTA_ST);
+		for (retries = 0; retries < MAX_RETRIES; retries++) {
+			sta = pasemi_read_dma_reg(reg);
+			if (!(sta & PAS_DMA_TXCHAN_TCMDSTA_ACT)) {
+				pasemi_write_dma_reg(reg, 0);
+				return 1;
+			}
+			cond_resched();
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(pasemi_dma_stop_chan);
+
+/* pasemi_dma_alloc_buf - Allocate a buffer to use for DMA
+ * @chan: Channel to allocate for
+ * @size: Size of buffer in bytes
+ * @handle: DMA handle
+ *
+ * Allocate a buffer to be used by the DMA engine for read/write,
+ * similar to dma_alloc_coherent().
+ *
+ * Returns the virtual address of the buffer, or NULL in case of failure.
+ */
+void *pasemi_dma_alloc_buf(struct pasemi_dmachan *chan, int size,
+			   dma_addr_t *handle)
+{
+	return dma_alloc_coherent(&dma_pdev->dev, size, handle, GFP_KERNEL);
+}
+EXPORT_SYMBOL(pasemi_dma_alloc_buf);
+
+/* pasemi_dma_free_buf - Free a buffer used for DMA
+ * @chan: Channel the buffer was allocated for
+ * @size: Size of buffer in bytes
+ * @handle: DMA handle
+ *
+ * Frees a previously allocated buffer.
+ */
+void pasemi_dma_free_buf(struct pasemi_dmachan *chan, int size,
+			 dma_addr_t *handle)
+{
+	dma_free_coherent(&dma_pdev->dev, size, handle, GFP_KERNEL);
+}
+EXPORT_SYMBOL(pasemi_dma_free_buf);
+
+/* pasemi_dma_alloc_flag - Allocate a flag (event) for channel synchronization
+ *
+ * Allocates a flag for use with channel synchronization (event descriptors).
+ * Returns allocated flag (0-63), < 0 on error.
+ */
+int pasemi_dma_alloc_flag(void)
+{
+	int bit;
+
+retry:
+	bit = find_first_bit(flags_free, MAX_FLAGS);
+	if (bit >= MAX_FLAGS)
+		return -ENOSPC;
+	if (!test_and_clear_bit(bit, flags_free))
+		goto retry;
+
+	return bit;
+}
+EXPORT_SYMBOL(pasemi_dma_alloc_flag);
+
+
+/* pasemi_dma_free_flag - Deallocates a flag (event)
+ * @flag: Flag number to deallocate
+ *
+ * Frees up a flag so it can be reused for other purposes.
+ */
+void pasemi_dma_free_flag(int flag)
+{
+	BUG_ON(test_bit(flag, flags_free));
+	BUG_ON(flag >= MAX_FLAGS);
+	set_bit(flag, flags_free);
+}
+EXPORT_SYMBOL(pasemi_dma_free_flag);
+
+
+/* pasemi_dma_set_flag - Sets a flag (event) to 1
+ * @flag: Flag number to set active
+ *
+ * Sets the flag provided to 1.
+ */
+void pasemi_dma_set_flag(int flag)
+{
+	BUG_ON(flag >= MAX_FLAGS);
+	if (flag < 32)
+		pasemi_write_dma_reg(PAS_DMA_TXF_SFLG0, 1 << flag);
+	else
+		pasemi_write_dma_reg(PAS_DMA_TXF_SFLG1, 1 << flag);
+}
+EXPORT_SYMBOL(pasemi_dma_set_flag);
+
+/* pasemi_dma_clear_flag - Sets a flag (event) to 0
+ * @flag: Flag number to set inactive
+ *
+ * Sets the flag provided to 0.
+ */
+void pasemi_dma_clear_flag(int flag)
+{
+	BUG_ON(flag >= MAX_FLAGS);
+	if (flag < 32)
+		pasemi_write_dma_reg(PAS_DMA_TXF_CFLG0, 1 << flag);
+	else
+		pasemi_write_dma_reg(PAS_DMA_TXF_CFLG1, 1 << flag);
+}
+EXPORT_SYMBOL(pasemi_dma_clear_flag);
+
+/* pasemi_dma_alloc_fun - Allocate a function engine
+ *
+ * Allocates a function engine to use for crypto/checksum offload
+ * Returns allocated engine (0-8), < 0 on error.
+ */
+int pasemi_dma_alloc_fun(void)
+{
+	int bit;
+
+retry:
+	bit = find_first_bit(fun_free, MAX_FLAGS);
+	if (bit >= MAX_FLAGS)
+		return -ENOSPC;
+	if (!test_and_clear_bit(bit, fun_free))
+		goto retry;
+
+	return bit;
+}
+EXPORT_SYMBOL(pasemi_dma_alloc_fun);
+
+
+/* pasemi_dma_free_fun - Deallocates a function engine
+ * @flag: Engine number to deallocate
+ *
+ * Frees up a function engine so it can be used for other purposes.
+ */
+void pasemi_dma_free_fun(int fun)
+{
+	BUG_ON(test_bit(fun, fun_free));
+	BUG_ON(fun >= MAX_FLAGS);
+	set_bit(fun, fun_free);
+}
+EXPORT_SYMBOL(pasemi_dma_free_fun);
+
+
+static void *map_onedev(struct pci_dev *p, int index)
+{
+	struct device_node *dn;
+	void __iomem *ret;
+
+	dn = pci_device_to_OF_node(p);
+	if (!dn)
+		goto fallback;
+
+	ret = of_iomap(dn, index);
+	if (!ret)
+		goto fallback;
+
+	return ret;
+fallback:
+	/* This is hardcoded and ugly, but we have some firmware versions
+	 * that don't provide the register space in the device tree. Luckily
+	 * they are at well-known locations so we can just do the math here.
+	 */
+	return ioremap(0xe0000000 + (p->devfn << 12), 0x2000);
+}
+
+/* pasemi_dma_init - Initialize the PA Semi DMA library
+ *
+ * This function initializes the DMA library. It must be called before
+ * any other function in the library.
+ *
+ * Returns 0 on success, errno on failure.
+ */
+int pasemi_dma_init(void)
+{
+	static DEFINE_SPINLOCK(init_lock);
+	struct pci_dev *iob_pdev;
+	struct pci_dev *pdev;
+	struct resource res;
+	struct device_node *dn;
+	int i, intf, err = 0;
+	unsigned long timeout;
+	u32 tmp;
+
+	if (!machine_is(pasemi))
+		return -ENODEV;
+
+	spin_lock(&init_lock);
+
+	/* Make sure we haven't already initialized */
+	if (dma_pdev)
+		goto out;
+
+	iob_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa001, NULL);
+	if (!iob_pdev) {
+		BUG();
+		pr_warn("Can't find I/O Bridge\n");
+		err = -ENODEV;
+		goto out;
+	}
+	iob_regs = map_onedev(iob_pdev, 0);
+
+	dma_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa007, NULL);
+	if (!dma_pdev) {
+		BUG();
+		pr_warn("Can't find DMA controller\n");
+		err = -ENODEV;
+		goto out;
+	}
+	dma_regs = map_onedev(dma_pdev, 0);
+	base_hw_irq = virq_to_hw(dma_pdev->irq);
+
+	pci_read_config_dword(dma_pdev, PAS_DMA_CAP_TXCH, &tmp);
+	num_txch = (tmp & PAS_DMA_CAP_TXCH_TCHN_M) >> PAS_DMA_CAP_TXCH_TCHN_S;
+
+	pci_read_config_dword(dma_pdev, PAS_DMA_CAP_RXCH, &tmp);
+	num_rxch = (tmp & PAS_DMA_CAP_RXCH_RCHN_M) >> PAS_DMA_CAP_RXCH_RCHN_S;
+
+	intf = 0;
+	for (pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa006, NULL);
+	     pdev;
+	     pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa006, pdev))
+		mac_regs[intf++] = map_onedev(pdev, 0);
+
+	pci_dev_put(pdev);
+
+	for (pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa005, NULL);
+	     pdev;
+	     pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa005, pdev))
+		mac_regs[intf++] = map_onedev(pdev, 0);
+
+	pci_dev_put(pdev);
+
+	dn = pci_device_to_OF_node(iob_pdev);
+	if (dn)
+		err = of_address_to_resource(dn, 1, &res);
+	if (!dn || err) {
+		/* Fallback for old firmware */
+		res.start = 0xfd800000;
+		res.end = res.start + 0x1000;
+	}
+	dma_status = ioremap_cache(res.start, resource_size(&res));
+	pci_dev_put(iob_pdev);
+
+	for (i = 0; i < MAX_TXCH; i++)
+		__set_bit(i, txch_free);
+
+	for (i = 0; i < MAX_RXCH; i++)
+		__set_bit(i, rxch_free);
+
+	timeout = jiffies + HZ;
+	pasemi_write_dma_reg(PAS_DMA_COM_RXCMD, 0);
+	while (pasemi_read_dma_reg(PAS_DMA_COM_RXSTA) & 1) {
+		if (time_after(jiffies, timeout)) {
+			pr_warn("Warning: Could not disable RX section\n");
+			break;
+		}
+	}
+
+	timeout = jiffies + HZ;
+	pasemi_write_dma_reg(PAS_DMA_COM_TXCMD, 0);
+	while (pasemi_read_dma_reg(PAS_DMA_COM_TXSTA) & 1) {
+		if (time_after(jiffies, timeout)) {
+			pr_warn("Warning: Could not disable TX section\n");
+			break;
+		}
+	}
+
+	/* setup resource allocations for the different DMA sections */
+	tmp = pasemi_read_dma_reg(PAS_DMA_COM_CFG);
+	pasemi_write_dma_reg(PAS_DMA_COM_CFG, tmp | 0x18000000);
+
+	/* enable tx section */
+	pasemi_write_dma_reg(PAS_DMA_COM_TXCMD, PAS_DMA_COM_TXCMD_EN);
+
+	/* enable rx section */
+	pasemi_write_dma_reg(PAS_DMA_COM_RXCMD, PAS_DMA_COM_RXCMD_EN);
+
+	for (i = 0; i < MAX_FLAGS; i++)
+		__set_bit(i, flags_free);
+
+	for (i = 0; i < MAX_FUN; i++)
+		__set_bit(i, fun_free);
+
+	/* clear all status flags */
+	pasemi_write_dma_reg(PAS_DMA_TXF_CFLG0, 0xffffffff);
+	pasemi_write_dma_reg(PAS_DMA_TXF_CFLG1, 0xffffffff);
+
+	pr_info("PA Semi PWRficient DMA library initialized "
+		"(%d tx, %d rx channels)\n", num_txch, num_rxch);
+
+out:
+	spin_unlock(&init_lock);
+	return err;
+}
+EXPORT_SYMBOL(pasemi_dma_init);
diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c
new file mode 100644
index 0000000000..fd130fe7a6
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Author: Olof Johansson, PA Semi
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * Based on drivers/net/fs_enet/mii-bitbang.c.
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/phy.h>
+#include <linux/of_address.h>
+#include <linux/of_mdio.h>
+#include <linux/platform_device.h>
+
+#define DELAY 1
+
+static void __iomem *gpio_regs;
+
+struct gpio_priv {
+	int mdc_pin;
+	int mdio_pin;
+};
+
+#define MDC_PIN(bus)	(((struct gpio_priv *)bus->priv)->mdc_pin)
+#define MDIO_PIN(bus)	(((struct gpio_priv *)bus->priv)->mdio_pin)
+
+static inline void mdio_lo(struct mii_bus *bus)
+{
+	out_le32(gpio_regs+0x10, 1 << MDIO_PIN(bus));
+}
+
+static inline void mdio_hi(struct mii_bus *bus)
+{
+	out_le32(gpio_regs, 1 << MDIO_PIN(bus));
+}
+
+static inline void mdc_lo(struct mii_bus *bus)
+{
+	out_le32(gpio_regs+0x10, 1 << MDC_PIN(bus));
+}
+
+static inline void mdc_hi(struct mii_bus *bus)
+{
+	out_le32(gpio_regs, 1 << MDC_PIN(bus));
+}
+
+static inline void mdio_active(struct mii_bus *bus)
+{
+	out_le32(gpio_regs+0x20, (1 << MDC_PIN(bus)) | (1 << MDIO_PIN(bus)));
+}
+
+static inline void mdio_tristate(struct mii_bus *bus)
+{
+	out_le32(gpio_regs+0x30, (1 << MDIO_PIN(bus)));
+}
+
+static inline int mdio_read(struct mii_bus *bus)
+{
+	return !!(in_le32(gpio_regs+0x40) & (1 << MDIO_PIN(bus)));
+}
+
+static void clock_out(struct mii_bus *bus, int bit)
+{
+	if (bit)
+		mdio_hi(bus);
+	else
+		mdio_lo(bus);
+	udelay(DELAY);
+	mdc_hi(bus);
+	udelay(DELAY);
+	mdc_lo(bus);
+}
+
+/* Utility to send the preamble, address, and register (common to read and write). */
+static void bitbang_pre(struct mii_bus *bus, int read, u8 addr, u8 reg)
+{
+	int i;
+
+	/* CFE uses a really long preamble (40 bits). We'll do the same. */
+	mdio_active(bus);
+	for (i = 0; i < 40; i++) {
+		clock_out(bus, 1);
+	}
+
+	/* send the start bit (01) and the read opcode (10) or write (10) */
+	clock_out(bus, 0);
+	clock_out(bus, 1);
+
+	clock_out(bus, read);
+	clock_out(bus, !read);
+
+	/* send the PHY address */
+	for (i = 0; i < 5; i++) {
+		clock_out(bus, (addr & 0x10) != 0);
+		addr <<= 1;
+	}
+
+	/* send the register address */
+	for (i = 0; i < 5; i++) {
+		clock_out(bus, (reg & 0x10) != 0);
+		reg <<= 1;
+	}
+}
+
+static int gpio_mdio_read(struct mii_bus *bus, int phy_id, int location)
+{
+	u16 rdreg;
+	int ret, i;
+	u8 addr = phy_id & 0xff;
+	u8 reg = location & 0xff;
+
+	bitbang_pre(bus, 1, addr, reg);
+
+	/* tri-state our MDIO I/O pin so we can read */
+	mdio_tristate(bus);
+	udelay(DELAY);
+	mdc_hi(bus);
+	udelay(DELAY);
+	mdc_lo(bus);
+
+	/* read 16 bits of register data, MSB first */
+	rdreg = 0;
+	for (i = 0; i < 16; i++) {
+		mdc_lo(bus);
+		udelay(DELAY);
+		mdc_hi(bus);
+		udelay(DELAY);
+		mdc_lo(bus);
+		udelay(DELAY);
+		rdreg <<= 1;
+		rdreg |= mdio_read(bus);
+	}
+
+	mdc_hi(bus);
+	udelay(DELAY);
+	mdc_lo(bus);
+	udelay(DELAY);
+
+	ret = rdreg;
+
+	return ret;
+}
+
+static int gpio_mdio_write(struct mii_bus *bus, int phy_id, int location, u16 val)
+{
+	int i;
+
+	u8 addr = phy_id & 0xff;
+	u8 reg = location & 0xff;
+	u16 value = val & 0xffff;
+
+	bitbang_pre(bus, 0, addr, reg);
+
+	/* send the turnaround (10) */
+	mdc_lo(bus);
+	mdio_hi(bus);
+	udelay(DELAY);
+	mdc_hi(bus);
+	udelay(DELAY);
+	mdc_lo(bus);
+	mdio_lo(bus);
+	udelay(DELAY);
+	mdc_hi(bus);
+	udelay(DELAY);
+
+	/* write 16 bits of register data, MSB first */
+	for (i = 0; i < 16; i++) {
+		mdc_lo(bus);
+		if (value & 0x8000)
+			mdio_hi(bus);
+		else
+			mdio_lo(bus);
+		udelay(DELAY);
+		mdc_hi(bus);
+		udelay(DELAY);
+		value <<= 1;
+	}
+
+	/*
+	 * Tri-state the MDIO line.
+	 */
+	mdio_tristate(bus);
+	mdc_lo(bus);
+	udelay(DELAY);
+	mdc_hi(bus);
+	udelay(DELAY);
+	return 0;
+}
+
+static int gpio_mdio_reset(struct mii_bus *bus)
+{
+	/*nothing here - dunno how to reset it*/
+	return 0;
+}
+
+
+static int gpio_mdio_probe(struct platform_device *ofdev)
+{
+	struct device *dev = &ofdev->dev;
+	struct device_node *np = ofdev->dev.of_node;
+	struct mii_bus *new_bus;
+	struct gpio_priv *priv;
+	const unsigned int *prop;
+	int err;
+
+	err = -ENOMEM;
+	priv = kzalloc(sizeof(struct gpio_priv), GFP_KERNEL);
+	if (!priv)
+		goto out;
+
+	new_bus = mdiobus_alloc();
+
+	if (!new_bus)
+		goto out_free_priv;
+
+	new_bus->name = "pasemi gpio mdio bus";
+	new_bus->read = &gpio_mdio_read;
+	new_bus->write = &gpio_mdio_write;
+	new_bus->reset = &gpio_mdio_reset;
+
+	prop = of_get_property(np, "reg", NULL);
+	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", *prop);
+	new_bus->priv = priv;
+
+	prop = of_get_property(np, "mdc-pin", NULL);
+	priv->mdc_pin = *prop;
+
+	prop = of_get_property(np, "mdio-pin", NULL);
+	priv->mdio_pin = *prop;
+
+	new_bus->parent = dev;
+	dev_set_drvdata(dev, new_bus);
+
+	err = of_mdiobus_register(new_bus, np);
+
+	if (err != 0) {
+		pr_err("%s: Cannot register as MDIO bus, err %d\n",
+				new_bus->name, err);
+		goto out_free_irq;
+	}
+
+	return 0;
+
+out_free_irq:
+	kfree(new_bus);
+out_free_priv:
+	kfree(priv);
+out:
+	return err;
+}
+
+
+static int gpio_mdio_remove(struct platform_device *dev)
+{
+	struct mii_bus *bus = dev_get_drvdata(&dev->dev);
+
+	mdiobus_unregister(bus);
+
+	dev_set_drvdata(&dev->dev, NULL);
+
+	kfree(bus->priv);
+	bus->priv = NULL;
+	mdiobus_free(bus);
+
+	return 0;
+}
+
+static const struct of_device_id gpio_mdio_match[] =
+{
+	{
+		.compatible      = "gpio-mdio",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, gpio_mdio_match);
+
+static struct platform_driver gpio_mdio_driver =
+{
+	.probe		= gpio_mdio_probe,
+	.remove		= gpio_mdio_remove,
+	.driver = {
+		.name = "gpio-mdio-bitbang",
+		.of_match_table = gpio_mdio_match,
+	},
+};
+
+static int __init gpio_mdio_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "1682m-gpio");
+	if (!np)
+		np = of_find_compatible_node(NULL, NULL,
+					     "pasemi,pwrficient-gpio");
+	if (!np)
+		return -ENODEV;
+	gpio_regs = of_iomap(np, 0);
+	of_node_put(np);
+
+	if (!gpio_regs)
+		return -ENODEV;
+
+	return platform_driver_register(&gpio_mdio_driver);
+}
+module_init(gpio_mdio_init);
+
+static void __exit gpio_mdio_exit(void)
+{
+	platform_driver_unregister(&gpio_mdio_driver);
+	if (gpio_regs)
+		iounmap(gpio_regs);
+}
+module_exit(gpio_mdio_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Olof Johansson <olof@lixom.net>");
+MODULE_DESCRIPTION("Driver for MDIO over GPIO on PA Semi PWRficient-based boards");
diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c
new file mode 100644
index 0000000000..6087c70ed2
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/idle.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/irq.h>
+
+#include <asm/machdep.h>
+#include <asm/reg.h>
+#include <asm/smp.h>
+
+#include "pasemi.h"
+
+struct sleep_mode {
+	char *name;
+	void (*entry)(void);
+};
+
+static struct sleep_mode modes[] = {
+	{ .name = "spin", .entry = &idle_spin },
+	{ .name = "doze", .entry = &idle_doze },
+};
+
+static int current_mode = 0;
+
+static int pasemi_system_reset_exception(struct pt_regs *regs)
+{
+	/* If we were woken up from power savings, we need to return
+	 * to the calling function, since nip is not saved across
+	 * all modes.
+	 */
+
+	if (regs->msr & SRR1_WAKEMASK)
+		regs_set_return_ip(regs, regs->link);
+
+	switch (regs->msr & SRR1_WAKEMASK) {
+	case SRR1_WAKEDEC:
+		set_dec(1);
+		break;
+	case SRR1_WAKEEE:
+		/*
+		 * Handle these when interrupts get re-enabled and we take
+		 * them as regular exceptions. We are in an NMI context
+		 * and can't handle these here.
+		 */
+		break;
+	default:
+		/* do system reset */
+		return 0;
+	}
+
+	/* Set higher astate since we come out of power savings at 0 */
+	restore_astate(hard_smp_processor_id());
+
+	/* everything handled */
+	regs_set_recoverable(regs);
+	return 1;
+}
+
+static int __init pasemi_idle_init(void)
+{
+#ifndef CONFIG_PPC_PASEMI_CPUFREQ
+	pr_warn("No cpufreq driver, powersavings modes disabled\n");
+	current_mode = 0;
+#endif
+
+	ppc_md.system_reset_exception = pasemi_system_reset_exception;
+	ppc_md.power_save = modes[current_mode].entry;
+	pr_info("Using PA6T idle loop (%s)\n", modes[current_mode].name);
+
+	return 0;
+}
+machine_late_initcall(pasemi, pasemi_idle_init);
+
+static int __init idle_param(char *p)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(modes); i++) {
+		if (!strcmp(modes[i].name, p)) {
+			current_mode = i;
+			break;
+		}
+	}
+	return 0;
+}
+
+early_param("idle", idle_param);
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
new file mode 100644
index 0000000000..375487cba8
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2005-2008, PA Semi, Inc
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ */
+
+#undef DEBUG
+
+#include <linux/memblock.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+
+#include "pasemi.h"
+
+#define IOBMAP_PAGE_SHIFT	12
+#define IOBMAP_PAGE_SIZE	(1 << IOBMAP_PAGE_SHIFT)
+#define IOBMAP_PAGE_MASK	(IOBMAP_PAGE_SIZE - 1)
+
+#define IOB_BASE		0xe0000000
+#define IOB_SIZE		0x3000
+/* Configuration registers */
+#define IOBCAP_REG		0x40
+#define IOBCOM_REG		0x100
+/* Enable IOB address translation */
+#define IOBCOM_ATEN		0x00000100
+
+/* Address decode configuration register */
+#define IOB_AD_REG		0x14c
+/* IOBCOM_AD_REG fields */
+#define IOB_AD_VGPRT		0x00000e00
+#define IOB_AD_VGAEN		0x00000100
+/* Direct mapping settings */
+#define IOB_AD_MPSEL_MASK	0x00000030
+#define IOB_AD_MPSEL_B38	0x00000000
+#define IOB_AD_MPSEL_B40	0x00000010
+#define IOB_AD_MPSEL_B42	0x00000020
+/* Translation window size / enable */
+#define IOB_AD_TRNG_MASK	0x00000003
+#define IOB_AD_TRNG_256M	0x00000000
+#define IOB_AD_TRNG_2G		0x00000001
+#define IOB_AD_TRNG_128G	0x00000003
+
+#define IOB_TABLEBASE_REG	0x154
+
+/* Base of the 64 4-byte L1 registers */
+#define IOB_XLT_L1_REGBASE	0x2b00
+
+/* Register to invalidate TLB entries */
+#define IOB_AT_INVAL_TLB_REG	0x2d00
+
+/* The top two bits of the level 1 entry contains valid and type flags */
+#define IOBMAP_L1E_V		0x40000000
+#define IOBMAP_L1E_V_B		0x80000000
+
+/* For big page entries, the bottom two bits contains flags */
+#define IOBMAP_L1E_BIG_CACHED	0x00000002
+#define IOBMAP_L1E_BIG_PRIORITY	0x00000001
+
+/* For regular level 2 entries, top 2 bits contain valid and cache flags */
+#define IOBMAP_L2E_V		0x80000000
+#define IOBMAP_L2E_V_CACHED	0xc0000000
+
+static void __iomem *iob;
+static u32 iob_l1_emptyval;
+static u32 iob_l2_emptyval;
+static u32 *iob_l2_base;
+
+static struct iommu_table iommu_table_iobmap;
+static int iommu_table_iobmap_inited;
+
+static int iobmap_build(struct iommu_table *tbl, long index,
+			 long npages, unsigned long uaddr,
+			 enum dma_data_direction direction,
+			 unsigned long attrs)
+{
+	u32 *ip;
+	u32 rpn;
+	unsigned long bus_addr;
+
+	pr_debug("iobmap: build at: %lx, %lx, addr: %lx\n", index, npages, uaddr);
+
+	bus_addr = (tbl->it_offset + index) << IOBMAP_PAGE_SHIFT;
+
+	ip = ((u32 *)tbl->it_base) + index;
+
+	while (npages--) {
+		rpn = __pa(uaddr) >> IOBMAP_PAGE_SHIFT;
+
+		*(ip++) = IOBMAP_L2E_V | rpn;
+		/* invalidate tlb, can be optimized more */
+		out_le32(iob+IOB_AT_INVAL_TLB_REG, bus_addr >> 14);
+
+		uaddr += IOBMAP_PAGE_SIZE;
+		bus_addr += IOBMAP_PAGE_SIZE;
+	}
+	return 0;
+}
+
+
+static void iobmap_free(struct iommu_table *tbl, long index,
+			long npages)
+{
+	u32 *ip;
+	unsigned long bus_addr;
+
+	pr_debug("iobmap: free at: %lx, %lx\n", index, npages);
+
+	bus_addr = (tbl->it_offset + index) << IOBMAP_PAGE_SHIFT;
+
+	ip = ((u32 *)tbl->it_base) + index;
+
+	while (npages--) {
+		*(ip++) = iob_l2_emptyval;
+		/* invalidate tlb, can be optimized more */
+		out_le32(iob+IOB_AT_INVAL_TLB_REG, bus_addr >> 14);
+		bus_addr += IOBMAP_PAGE_SIZE;
+	}
+}
+
+static struct iommu_table_ops iommu_table_iobmap_ops = {
+	.set = iobmap_build,
+	.clear  = iobmap_free
+};
+
+static void iommu_table_iobmap_setup(void)
+{
+	pr_debug(" -> %s\n", __func__);
+	iommu_table_iobmap.it_busno = 0;
+	iommu_table_iobmap.it_offset = 0;
+	iommu_table_iobmap.it_page_shift = IOBMAP_PAGE_SHIFT;
+
+	/* it_size is in number of entries */
+	iommu_table_iobmap.it_size =
+		0x80000000 >> iommu_table_iobmap.it_page_shift;
+
+	/* Initialize the common IOMMU code */
+	iommu_table_iobmap.it_base = (unsigned long)iob_l2_base;
+	iommu_table_iobmap.it_index = 0;
+	/* XXXOJN tune this to avoid IOB cache invals.
+	 * Should probably be 8 (64 bytes)
+	 */
+	iommu_table_iobmap.it_blocksize = 4;
+	iommu_table_iobmap.it_ops = &iommu_table_iobmap_ops;
+	if (!iommu_init_table(&iommu_table_iobmap, 0, 0, 0))
+		panic("Failed to initialize iommu table");
+
+	pr_debug(" <- %s\n", __func__);
+}
+
+
+
+static void pci_dma_bus_setup_pasemi(struct pci_bus *bus)
+{
+	pr_debug("pci_dma_bus_setup, bus %p, bus->self %p\n", bus, bus->self);
+
+	if (!iommu_table_iobmap_inited) {
+		iommu_table_iobmap_inited = 1;
+		iommu_table_iobmap_setup();
+	}
+}
+
+
+static void pci_dma_dev_setup_pasemi(struct pci_dev *dev)
+{
+	pr_debug("pci_dma_dev_setup, dev %p (%s)\n", dev, pci_name(dev));
+
+#if !defined(CONFIG_PPC_PASEMI_IOMMU_DMA_FORCE)
+	/* For non-LPAR environment, don't translate anything for the DMA
+	 * engine. The exception to this is if the user has enabled
+	 * CONFIG_PPC_PASEMI_IOMMU_DMA_FORCE at build time.
+	 */
+	if (dev->vendor == 0x1959 && dev->device == 0xa007 &&
+	    !firmware_has_feature(FW_FEATURE_LPAR)) {
+		dev->dev.dma_ops = NULL;
+		/*
+		 * Set the coherent DMA mask to prevent the iommu
+		 * being used unnecessarily
+		 */
+		dev->dev.coherent_dma_mask = DMA_BIT_MASK(44);
+		return;
+	}
+#endif
+
+	set_iommu_table_base(&dev->dev, &iommu_table_iobmap);
+}
+
+static int __init iob_init(struct device_node *dn)
+{
+	unsigned long tmp;
+	u32 regword;
+	int i;
+
+	pr_debug(" -> %s\n", __func__);
+
+	/* For 2G space, 8x64 pages (2^21 bytes) is max total l2 size */
+	iob_l2_base = memblock_alloc_try_nid_raw(1UL << 21, 1UL << 21,
+					MEMBLOCK_LOW_LIMIT, 0x80000000,
+					NUMA_NO_NODE);
+	if (!iob_l2_base)
+		panic("%s: Failed to allocate %lu bytes align=0x%lx max_addr=%x\n",
+		      __func__, 1UL << 21, 1UL << 21, 0x80000000);
+
+	pr_info("IOBMAP L2 allocated at: %p\n", iob_l2_base);
+
+	/* Allocate a spare page to map all invalid IOTLB pages. */
+	tmp = memblock_phys_alloc(IOBMAP_PAGE_SIZE, IOBMAP_PAGE_SIZE);
+	if (!tmp)
+		panic("IOBMAP: Cannot allocate spare page!");
+	/* Empty l1 is marked invalid */
+	iob_l1_emptyval = 0;
+	/* Empty l2 is mapped to dummy page */
+	iob_l2_emptyval = IOBMAP_L2E_V | (tmp >> IOBMAP_PAGE_SHIFT);
+
+	iob = ioremap(IOB_BASE, IOB_SIZE);
+	if (!iob)
+		panic("IOBMAP: Cannot map registers!");
+
+	/* setup direct mapping of the L1 entries */
+	for (i = 0; i < 64; i++) {
+		/* Each L1 covers 32MB, i.e. 8K entries = 32K of ram */
+		regword = IOBMAP_L1E_V | (__pa(iob_l2_base + i*0x2000) >> 12);
+		out_le32(iob+IOB_XLT_L1_REGBASE+i*4, regword);
+	}
+
+	/* set 2GB translation window, based at 0 */
+	regword = in_le32(iob+IOB_AD_REG);
+	regword &= ~IOB_AD_TRNG_MASK;
+	regword |= IOB_AD_TRNG_2G;
+	out_le32(iob+IOB_AD_REG, regword);
+
+	/* Enable translation */
+	regword = in_le32(iob+IOBCOM_REG);
+	regword |= IOBCOM_ATEN;
+	out_le32(iob+IOBCOM_REG, regword);
+
+	pr_debug(" <- %s\n", __func__);
+
+	return 0;
+}
+
+
+/* These are called very early. */
+void __init iommu_init_early_pasemi(void)
+{
+	int iommu_off;
+
+#ifndef CONFIG_PPC_PASEMI_IOMMU
+	iommu_off = 1;
+#else
+	iommu_off = of_chosen &&
+			of_property_read_bool(of_chosen, "linux,iommu-off");
+#endif
+	if (iommu_off)
+		return;
+
+	iob_init(NULL);
+
+	pasemi_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pasemi;
+	pasemi_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pasemi;
+	set_pci_dma_ops(&dma_iommu_ops);
+}
diff --git a/arch/powerpc/platforms/pasemi/misc.c b/arch/powerpc/platforms/pasemi/misc.c
new file mode 100644
index 0000000000..9e9a7e4628
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/misc.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 PA Semi, Inc
+ *
+ * Parts based on arch/powerpc/sysdev/fsl_soc.c:
+ *
+ * 2006 (c) MontaVista Software, Inc.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/i2c.h>
+
+#ifdef CONFIG_I2C_BOARDINFO
+/* The below is from fsl_soc.c.  It's copied because since there are no
+ * official bus bindings at this time it doesn't make sense to share across
+ * the platforms, even though they happen to be common.
+ */
+struct i2c_driver_device {
+	char    *of_device;
+	char    *i2c_type;
+};
+
+static struct i2c_driver_device i2c_devices[] __initdata = {
+	{"dallas,ds1338",  "ds1338"},
+};
+
+static int __init find_i2c_driver(struct device_node *node,
+				     struct i2c_board_info *info)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(i2c_devices); i++) {
+		if (!of_device_is_compatible(node, i2c_devices[i].of_device))
+			continue;
+		if (strscpy(info->type, i2c_devices[i].i2c_type, I2C_NAME_SIZE) < 0)
+			return -ENOMEM;
+		return 0;
+	}
+	return -ENODEV;
+}
+
+static int __init pasemi_register_i2c_devices(void)
+{
+	struct pci_dev *pdev;
+	struct device_node *adap_node;
+	struct device_node *node;
+
+	pdev = NULL;
+	while ((pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa003, pdev))) {
+		adap_node = pci_device_to_OF_node(pdev);
+
+		if (!adap_node)
+			continue;
+
+		for_each_child_of_node(adap_node, node) {
+			struct i2c_board_info info = {};
+			const u32 *addr;
+			int len;
+
+			addr = of_get_property(node, "reg", &len);
+			if (!addr || len < sizeof(int) ||
+			    *addr > (1 << 10) - 1) {
+				pr_warn("pasemi_register_i2c_devices: invalid i2c device entry\n");
+				continue;
+			}
+
+			info.irq = irq_of_parse_and_map(node, 0);
+			if (!info.irq)
+				info.irq = -1;
+
+			if (find_i2c_driver(node, &info) < 0)
+				continue;
+
+			info.addr = *addr;
+
+			i2c_register_board_info(PCI_FUNC(pdev->devfn), &info,
+						1);
+		}
+	}
+	return 0;
+}
+device_initcall(pasemi_register_i2c_devices);
+#endif
diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c
new file mode 100644
index 0000000000..166c97fff1
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/msi.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2007, Olof Johansson, PA Semi
+ *
+ * Based on arch/powerpc/sysdev/mpic_u3msi.c:
+ *
+ * Copyright 2006, Segher Boessenkool, IBM Corporation.
+ * Copyright 2006-2007, Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/msi.h>
+#include <asm/mpic.h>
+#include <asm/hw_irq.h>
+#include <asm/ppc-pci.h>
+#include <asm/msi_bitmap.h>
+
+#include <sysdev/mpic.h>
+
+/* Allocate 16 interrupts per device, to give an alignment of 16,
+ * since that's the size of the grouping w.r.t. affinity. If someone
+ * needs more than 32 MSI's down the road we'll have to rethink this,
+ * but it should be OK for now.
+ */
+#define ALLOC_CHUNK 16
+
+#define PASEMI_MSI_ADDR 0xfc080000
+
+/* A bit ugly, can we get this from the pci_dev somehow? */
+static struct mpic *msi_mpic;
+
+
+static void mpic_pasemi_msi_mask_irq(struct irq_data *data)
+{
+	pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq);
+	pci_msi_mask_irq(data);
+	mpic_mask_irq(data);
+}
+
+static void mpic_pasemi_msi_unmask_irq(struct irq_data *data)
+{
+	pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq);
+	mpic_unmask_irq(data);
+	pci_msi_unmask_irq(data);
+}
+
+static struct irq_chip mpic_pasemi_msi_chip = {
+	.irq_shutdown		= mpic_pasemi_msi_mask_irq,
+	.irq_mask		= mpic_pasemi_msi_mask_irq,
+	.irq_unmask		= mpic_pasemi_msi_unmask_irq,
+	.irq_eoi		= mpic_end_irq,
+	.irq_set_type		= mpic_set_irq_type,
+	.irq_set_affinity	= mpic_set_affinity,
+	.name			= "PASEMI-MSI",
+};
+
+static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev)
+{
+	struct msi_desc *entry;
+	irq_hw_number_t hwirq;
+
+	pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev);
+
+	msi_for_each_desc(entry, &pdev->dev, MSI_DESC_ASSOCIATED) {
+		hwirq = virq_to_hw(entry->irq);
+		irq_set_msi_desc(entry->irq, NULL);
+		irq_dispose_mapping(entry->irq);
+		entry->irq = 0;
+		msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, ALLOC_CHUNK);
+	}
+}
+
+static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+	unsigned int virq;
+	struct msi_desc *entry;
+	struct msi_msg msg;
+	int hwirq;
+
+	if (type == PCI_CAP_ID_MSIX)
+		pr_debug("pasemi_msi: MSI-X untested, trying anyway\n");
+	pr_debug("pasemi_msi_setup_msi_irqs, pdev %p nvec %d type %d\n",
+		 pdev, nvec, type);
+
+	msg.address_hi = 0;
+	msg.address_lo = PASEMI_MSI_ADDR;
+
+	msi_for_each_desc(entry, &pdev->dev, MSI_DESC_NOTASSOCIATED) {
+		/* Allocate 16 interrupts for now, since that's the grouping for
+		 * affinity. This can be changed later if it turns out 32 is too
+		 * few MSIs for someone, but restrictions will apply to how the
+		 * sources can be changed independently.
+		 */
+		hwirq = msi_bitmap_alloc_hwirqs(&msi_mpic->msi_bitmap,
+						ALLOC_CHUNK);
+		if (hwirq < 0) {
+			pr_debug("pasemi_msi: failed allocating hwirq\n");
+			return hwirq;
+		}
+
+		virq = irq_create_mapping(msi_mpic->irqhost, hwirq);
+		if (!virq) {
+			pr_debug("pasemi_msi: failed mapping hwirq 0x%x\n",
+				  hwirq);
+			msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq,
+					       ALLOC_CHUNK);
+			return -ENOSPC;
+		}
+
+		/* Vector on MSI is really an offset, the hardware adds
+		 * it to the value written at the magic address. So set
+		 * it to 0 to remain sane.
+		 */
+		mpic_set_vector(virq, 0);
+
+		irq_set_msi_desc(virq, entry);
+		irq_set_chip(virq, &mpic_pasemi_msi_chip);
+		irq_set_irq_type(virq, IRQ_TYPE_EDGE_RISING);
+
+		pr_debug("pasemi_msi: allocated virq 0x%x (hw 0x%x) " \
+			 "addr 0x%x\n", virq, hwirq, msg.address_lo);
+
+		/* Likewise, the device writes [0...511] into the target
+		 * register to generate MSI [512...1023]
+		 */
+		msg.data = hwirq-0x200;
+		pci_write_msi_msg(virq, &msg);
+	}
+
+	return 0;
+}
+
+int __init mpic_pasemi_msi_init(struct mpic *mpic)
+{
+	int rc;
+	struct pci_controller *phb;
+	struct device_node *of_node;
+
+	of_node = irq_domain_get_of_node(mpic->irqhost);
+	if (!of_node ||
+	    !of_device_is_compatible(of_node,
+				     "pasemi,pwrficient-openpic"))
+		return -ENODEV;
+
+	rc = mpic_msi_init_allocator(mpic);
+	if (rc) {
+		pr_debug("pasemi_msi: Error allocating bitmap!\n");
+		return rc;
+	}
+
+	pr_debug("pasemi_msi: Registering PA Semi MPIC MSI callbacks\n");
+
+	msi_mpic = mpic;
+	list_for_each_entry(phb, &hose_list, list_node) {
+		WARN_ON(phb->controller_ops.setup_msi_irqs);
+		phb->controller_ops.setup_msi_irqs = pasemi_msi_setup_msi_irqs;
+		phb->controller_ops.teardown_msi_irqs = pasemi_msi_teardown_msi_irqs;
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h
new file mode 100644
index 0000000000..018c30665e
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/pasemi.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PASEMI_PASEMI_H
+#define _PASEMI_PASEMI_H
+
+extern time64_t pas_get_boot_time(void);
+extern void pas_pci_init(void);
+struct pci_dev;
+extern void pas_pci_irq_fixup(struct pci_dev *dev);
+extern void pas_pci_dma_dev_setup(struct pci_dev *dev);
+
+void __iomem *__init pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset);
+
+extern void __init pasemi_map_registers(void);
+
+/* Power savings modes, implemented in asm */
+extern void idle_spin(void);
+extern void idle_doze(void);
+
+/* Restore astate to last set */
+#ifdef CONFIG_PPC_PASEMI_CPUFREQ
+extern int check_astate(void);
+extern void restore_astate(int cpu);
+#else
+static inline int check_astate(void)
+{
+	/* Always return >0 so we never power save */
+	return 1;
+}
+static inline void restore_astate(int cpu)
+{
+}
+#endif
+
+extern struct pci_controller_ops pasemi_pci_controller_ops;
+
+#endif /* _PASEMI_PASEMI_H */
diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c
new file mode 100644
index 0000000000..f27d314147
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/pci.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2006 PA Semi, Inc
+ *
+ * Authors: Kip Walker, PA Semi
+ *	    Olof Johansson, PA Semi
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * Based on arch/powerpc/platforms/maple/pci.c
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/pci.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/isa-bridge.h>
+#include <asm/machdep.h>
+
+#include <asm/ppc-pci.h>
+
+#include "pasemi.h"
+
+#define PA_PXP_CFA(bus, devfn, off) (((bus) << 20) | ((devfn) << 12) | (off))
+
+static inline int pa_pxp_offset_valid(u8 bus, u8 devfn, int offset)
+{
+	/* Device 0 Function 0 is special: It's config space spans function 1 as
+	 * well, so allow larger offset. It's really a two-function device but the
+	 * second function does not probe.
+	 */
+	if (bus == 0 && devfn == 0)
+		return offset < 8192;
+	else
+		return offset < 4096;
+}
+
+static void volatile __iomem *pa_pxp_cfg_addr(struct pci_controller *hose,
+				       u8 bus, u8 devfn, int offset)
+{
+	return hose->cfg_data + PA_PXP_CFA(bus, devfn, offset);
+}
+
+static inline int is_root_port(int busno, int devfn)
+{
+	return ((busno == 0) && (PCI_FUNC(devfn) < 4) &&
+		 ((PCI_SLOT(devfn) == 16) || (PCI_SLOT(devfn) == 17)));
+}
+
+static inline int is_5945_reg(int reg)
+{
+	return (((reg >= 0x18) && (reg < 0x34)) ||
+		((reg >= 0x158) && (reg < 0x178)));
+}
+
+static int workaround_5945(struct pci_bus *bus, unsigned int devfn,
+			   int offset, int len, u32 *val)
+{
+	struct pci_controller *hose;
+	void volatile __iomem *addr, *dummy;
+	int byte;
+	u32 tmp;
+
+	if (!is_root_port(bus->number, devfn) || !is_5945_reg(offset))
+		return 0;
+
+	hose = pci_bus_to_host(bus);
+
+	addr = pa_pxp_cfg_addr(hose, bus->number, devfn, offset & ~0x3);
+	byte = offset & 0x3;
+
+	/* Workaround bug 5945: write 0 to a dummy register before reading,
+	 * and write back what we read. We must read/write the full 32-bit
+	 * contents so we need to shift and mask by hand.
+	 */
+	dummy = pa_pxp_cfg_addr(hose, bus->number, devfn, 0x10);
+	out_le32(dummy, 0);
+	tmp = in_le32(addr);
+	out_le32(addr, tmp);
+
+	switch (len) {
+	case 1:
+		*val = (tmp >> (8*byte)) & 0xff;
+		break;
+	case 2:
+		if (byte == 0)
+			*val = tmp & 0xffff;
+		else
+			*val = (tmp >> 16) & 0xffff;
+		break;
+	default:
+		*val = tmp;
+		break;
+	}
+
+	return 1;
+}
+
+#ifdef CONFIG_PPC_PASEMI_NEMO
+#define PXP_ERR_CFG_REG	0x4
+#define PXP_IGNORE_PCIE_ERRORS	0x800
+#define SB600_BUS 5
+
+static void sb600_set_flag(int bus)
+{
+	static void __iomem *iob_mapbase = NULL;
+	struct resource res;
+	struct device_node *dn;
+	int err;
+
+	if (iob_mapbase == NULL) {
+		dn = of_find_compatible_node(NULL, "isa", "pasemi,1682m-iob");
+		if (!dn) {
+			pr_crit("NEMO SB600 missing iob node\n");
+			return;
+		}
+
+		err = of_address_to_resource(dn, 0, &res);
+		of_node_put(dn);
+
+		if (err) {
+			pr_crit("NEMO SB600 missing resource\n");
+			return;
+		}
+
+		pr_info("NEMO SB600 IOB base %08llx\n",res.start);
+
+		iob_mapbase = ioremap(res.start + 0x100, 0x94);
+	}
+
+	if (iob_mapbase != NULL) {
+		if (bus == SB600_BUS) {
+			/*
+			 * This is the SB600's bus, tell the PCI-e root port
+			 * to allow non-zero devices to enumerate.
+			 */
+			out_le32(iob_mapbase + PXP_ERR_CFG_REG, in_le32(iob_mapbase + PXP_ERR_CFG_REG) | PXP_IGNORE_PCIE_ERRORS);
+		} else {
+			/*
+			 * Only scan device 0 on other busses
+			 */
+			out_le32(iob_mapbase + PXP_ERR_CFG_REG, in_le32(iob_mapbase + PXP_ERR_CFG_REG) & ~PXP_IGNORE_PCIE_ERRORS);
+		}
+	}
+}
+
+#else
+
+static void sb600_set_flag(int bus)
+{
+}
+#endif
+
+static int pa_pxp_read_config(struct pci_bus *bus, unsigned int devfn,
+			      int offset, int len, u32 *val)
+{
+	struct pci_controller *hose;
+	void volatile __iomem *addr;
+
+	hose = pci_bus_to_host(bus);
+	if (!hose)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (!pa_pxp_offset_valid(bus->number, devfn, offset))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	if (workaround_5945(bus, devfn, offset, len, val))
+		return PCIBIOS_SUCCESSFUL;
+
+	addr = pa_pxp_cfg_addr(hose, bus->number, devfn, offset);
+
+	sb600_set_flag(bus->number);
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		*val = in_8(addr);
+		break;
+	case 2:
+		*val = in_le16(addr);
+		break;
+	default:
+		*val = in_le32(addr);
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int pa_pxp_write_config(struct pci_bus *bus, unsigned int devfn,
+			       int offset, int len, u32 val)
+{
+	struct pci_controller *hose;
+	void volatile __iomem *addr;
+
+	hose = pci_bus_to_host(bus);
+	if (!hose)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (!pa_pxp_offset_valid(bus->number, devfn, offset))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	addr = pa_pxp_cfg_addr(hose, bus->number, devfn, offset);
+
+	sb600_set_flag(bus->number);
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		out_8(addr, val);
+		break;
+	case 2:
+		out_le16(addr, val);
+		break;
+	default:
+		out_le32(addr, val);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops pa_pxp_ops = {
+	.read = pa_pxp_read_config,
+	.write = pa_pxp_write_config,
+};
+
+static void __init setup_pa_pxp(struct pci_controller *hose)
+{
+	hose->ops = &pa_pxp_ops;
+	hose->cfg_data = ioremap(0xe0000000, 0x10000000);
+}
+
+static int __init pas_add_bridge(struct device_node *dev)
+{
+	struct pci_controller *hose;
+
+	pr_debug("Adding PCI host bridge %pOF\n", dev);
+
+	hose = pcibios_alloc_controller(dev);
+	if (!hose)
+		return -ENOMEM;
+
+	hose->first_busno = 0;
+	hose->last_busno = 0xff;
+	hose->controller_ops = pasemi_pci_controller_ops;
+
+	setup_pa_pxp(hose);
+
+	pr_info("Found PA-PXP PCI host bridge.\n");
+
+	/* Interpret the "ranges" property */
+	pci_process_bridge_OF_ranges(hose, dev, 1);
+
+	/*
+	 * Scan for an isa bridge. This is needed to find the SB600 on the nemo
+	 * and does nothing on machines without one.
+	 */
+	isa_bridge_find_early(hose);
+
+	return 0;
+}
+
+void __init pas_pci_init(void)
+{
+	struct device_node *np;
+	int res;
+
+	pci_set_flags(PCI_SCAN_ALL_PCIE_DEVS);
+
+	np = of_find_compatible_node(of_root, NULL, "pasemi,rootbus");
+	if (np) {
+		res = pas_add_bridge(np);
+		of_node_put(np);
+	}
+}
+
+void __iomem *__init pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset)
+{
+	struct pci_controller *hose;
+
+	hose = pci_bus_to_host(dev->bus);
+
+	return (void __iomem *)pa_pxp_cfg_addr(hose, dev->bus->number, dev->devfn, offset);
+}
+
+struct pci_controller_ops pasemi_pci_controller_ops;
diff --git a/arch/powerpc/platforms/pasemi/powersave.S b/arch/powerpc/platforms/pasemi/powersave.S
new file mode 100644
index 0000000000..d0215d5329
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/powersave.S
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/cputable.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+
+/* Power savings opcodes since not all binutils have them at this time */
+#define DOZE	.long	0x4c000324
+#define NAP	.long	0x4c000364
+#define SLEEP	.long	0x4c0003a4
+#define RVW	.long	0x4c0003e4
+
+/* Common sequence to do before going to any of the
+ * powersavings modes.
+ */
+
+#define PRE_SLEEP_SEQUENCE	\
+	std	r3,8(r1);	\
+	ptesync	;		\
+	ld	r3,8(r1);	\
+1:	cmpd 	r3,r3;		\
+	bne	1b
+
+_doze:
+	PRE_SLEEP_SEQUENCE
+	DOZE
+	b	.
+
+
+_GLOBAL(idle_spin)
+	blr
+
+_GLOBAL(idle_doze)
+	LOAD_REG_ADDR(r3, _doze)
+	b	sleep_common
+
+/* Add more modes here later */
+
+sleep_common:
+	mflr	r0
+	std	r0, 16(r1)
+	stdu	r1,-64(r1)
+#ifdef CONFIG_PPC_PASEMI_CPUFREQ
+	std	r3, 48(r1)
+
+	/* Only do power savings when in astate 0 */
+	bl	check_astate
+	cmpwi	r3,0
+	bne	1f
+
+	ld	r3, 48(r1)
+#endif
+	LOAD_REG_IMMEDIATE(r6,MSR_DR|MSR_IR|MSR_ME|MSR_EE)
+	mfmsr	r4
+	andc	r5,r4,r6
+	mtmsrd	r5,0
+
+	mtctr	r3
+	bctrl
+
+	mtmsrd	r4,0
+
+1:	addi	r1,r1,64
+	ld	r0,16(r1)
+	mtlr	r0
+	blr
+
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
new file mode 100644
index 0000000000..ef985ba2bf
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -0,0 +1,456 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Authors: Kip Walker, PA Semi
+ *	    Olof Johansson, PA Semi
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * Based on arch/powerpc/platforms/maple/setup.c
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/console.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/gfp.h>
+#include <linux/irqdomain.h>
+
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/i8259.h>
+#include <asm/mpic.h>
+#include <asm/smp.h>
+#include <asm/time.h>
+#include <asm/mmu.h>
+#include <asm/debug.h>
+
+#include <pcmcia/ss.h>
+#include <pcmcia/cistpl.h>
+#include <pcmcia/ds.h>
+
+#include "pasemi.h"
+
+/* SDC reset register, must be pre-mapped at reset time */
+static void __iomem *reset_reg;
+
+/* Various error status registers, must be pre-mapped at MCE time */
+
+#define MAX_MCE_REGS	32
+struct mce_regs {
+	char *name;
+	void __iomem *addr;
+};
+
+static struct mce_regs mce_regs[MAX_MCE_REGS];
+static int num_mce_regs;
+static int nmi_virq = 0;
+
+
+static void __noreturn pas_restart(char *cmd)
+{
+	/* Need to put others cpu in hold loop so they're not sleeping */
+	smp_send_stop();
+	udelay(10000);
+	printk("Restarting...\n");
+	while (1)
+		out_le32(reset_reg, 0x6000000);
+}
+
+#ifdef CONFIG_PPC_PASEMI_NEMO
+void pas_shutdown(void)
+{
+	/* Set the PLD bit that makes the SB600 think the power button is being pressed */
+	void __iomem *pld_map = ioremap(0xf5000000,4096);
+	while (1)
+		out_8(pld_map+7,0x01);
+}
+
+/* RTC platform device structure as is not in device tree */
+static struct resource rtc_resource[] = {{
+	.name = "rtc",
+	.start = 0x70,
+	.end = 0x71,
+	.flags = IORESOURCE_IO,
+}, {
+	.name = "rtc",
+	.start = 8,
+	.end = 8,
+	.flags = IORESOURCE_IRQ,
+}};
+
+static inline void nemo_init_rtc(void)
+{
+	platform_device_register_simple("rtc_cmos", -1, rtc_resource, 2);
+}
+
+#else
+
+static inline void nemo_init_rtc(void)
+{
+}
+#endif
+
+#ifdef CONFIG_SMP
+static arch_spinlock_t timebase_lock;
+static unsigned long timebase;
+
+static void pas_give_timebase(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	hard_irq_disable();
+	arch_spin_lock(&timebase_lock);
+	mtspr(SPRN_TBCTL, TBCTL_FREEZE);
+	isync();
+	timebase = get_tb();
+	arch_spin_unlock(&timebase_lock);
+
+	while (timebase)
+		barrier();
+	mtspr(SPRN_TBCTL, TBCTL_RESTART);
+	local_irq_restore(flags);
+}
+
+static void pas_take_timebase(void)
+{
+	while (!timebase)
+		smp_rmb();
+
+	arch_spin_lock(&timebase_lock);
+	set_tb(timebase >> 32, timebase & 0xffffffff);
+	timebase = 0;
+	arch_spin_unlock(&timebase_lock);
+}
+
+static struct smp_ops_t pas_smp_ops = {
+	.probe		= smp_mpic_probe,
+	.message_pass	= smp_mpic_message_pass,
+	.kick_cpu	= smp_generic_kick_cpu,
+	.setup_cpu	= smp_mpic_setup_cpu,
+	.give_timebase	= pas_give_timebase,
+	.take_timebase	= pas_take_timebase,
+};
+#endif /* CONFIG_SMP */
+
+static void __init pas_setup_arch(void)
+{
+#ifdef CONFIG_SMP
+	/* Setup SMP callback */
+	smp_ops = &pas_smp_ops;
+#endif
+
+	/* Remap SDC register for doing reset */
+	/* XXXOJN This should maybe come out of the device tree */
+	reset_reg = ioremap(0xfc101100, 4);
+}
+
+static int __init pas_setup_mce_regs(void)
+{
+	struct pci_dev *dev;
+	int reg;
+
+	/* Remap various SoC status registers for use by the MCE handler */
+
+	reg = 0;
+
+	dev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa00a, NULL);
+	while (dev && reg < MAX_MCE_REGS) {
+		mce_regs[reg].name = kasprintf(GFP_KERNEL,
+						"mc%d_mcdebug_errsta", reg);
+		mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x730);
+		dev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa00a, dev);
+		reg++;
+	}
+
+	dev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa001, NULL);
+	if (dev && reg+4 < MAX_MCE_REGS) {
+		mce_regs[reg].name = "iobdbg_IntStatus1";
+		mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x438);
+		reg++;
+		mce_regs[reg].name = "iobdbg_IOCTbusIntDbgReg";
+		mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x454);
+		reg++;
+		mce_regs[reg].name = "iobiom_IntStatus";
+		mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0xc10);
+		reg++;
+		mce_regs[reg].name = "iobiom_IntDbgReg";
+		mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0xc1c);
+		reg++;
+	}
+
+	dev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa009, NULL);
+	if (dev && reg+2 < MAX_MCE_REGS) {
+		mce_regs[reg].name = "l2csts_IntStatus";
+		mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x200);
+		reg++;
+		mce_regs[reg].name = "l2csts_Cnt";
+		mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x214);
+		reg++;
+	}
+
+	num_mce_regs = reg;
+
+	return 0;
+}
+machine_device_initcall(pasemi, pas_setup_mce_regs);
+
+#ifdef CONFIG_PPC_PASEMI_NEMO
+static void sb600_8259_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int cascade_irq = i8259_irq();
+
+	if (cascade_irq)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+static void __init nemo_init_IRQ(struct mpic *mpic)
+{
+	struct device_node *np;
+	int gpio_virq;
+	/* Connect the SB600's legacy i8259 controller */
+	np = of_find_node_by_path("/pxp@0,e0000000");
+	i8259_init(np, 0);
+	of_node_put(np);
+
+	gpio_virq = irq_create_mapping(NULL, 3);
+	irq_set_irq_type(gpio_virq, IRQ_TYPE_LEVEL_HIGH);
+	irq_set_chained_handler(gpio_virq, sb600_8259_cascade);
+	mpic_unmask_irq(irq_get_irq_data(gpio_virq));
+
+	irq_set_default_host(mpic->irqhost);
+}
+
+#else
+
+static inline void nemo_init_IRQ(struct mpic *mpic)
+{
+}
+#endif
+
+static __init void pas_init_IRQ(void)
+{
+	struct device_node *np;
+	struct device_node *root, *mpic_node;
+	unsigned long openpic_addr;
+	const unsigned int *opprop;
+	int naddr, opplen;
+	int mpic_flags;
+	const unsigned int *nmiprop;
+	struct mpic *mpic;
+
+	mpic_node = NULL;
+
+	for_each_node_by_type(np, "interrupt-controller")
+		if (of_device_is_compatible(np, "open-pic")) {
+			mpic_node = np;
+			break;
+		}
+	if (!mpic_node)
+		for_each_node_by_type(np, "open-pic") {
+			mpic_node = np;
+			break;
+		}
+	if (!mpic_node) {
+		pr_err("Failed to locate the MPIC interrupt controller\n");
+		return;
+	}
+
+	/* Find address list in /platform-open-pic */
+	root = of_find_node_by_path("/");
+	naddr = of_n_addr_cells(root);
+	opprop = of_get_property(root, "platform-open-pic", &opplen);
+	if (!opprop) {
+		pr_err("No platform-open-pic property.\n");
+		of_node_put(root);
+		return;
+	}
+	openpic_addr = of_read_number(opprop, naddr);
+	pr_debug("OpenPIC addr: %lx\n", openpic_addr);
+
+	mpic_flags = MPIC_LARGE_VECTORS | MPIC_NO_BIAS | MPIC_NO_RESET;
+
+	nmiprop = of_get_property(mpic_node, "nmi-source", NULL);
+	if (nmiprop)
+		mpic_flags |= MPIC_ENABLE_MCK;
+
+	mpic = mpic_alloc(mpic_node, openpic_addr,
+			  mpic_flags, 0, 0, "PASEMI-OPIC");
+	BUG_ON(!mpic);
+
+	mpic_assign_isu(mpic, 0, mpic->paddr + 0x10000);
+	mpic_init(mpic);
+	/* The NMI/MCK source needs to be prio 15 */
+	if (nmiprop) {
+		nmi_virq = irq_create_mapping(NULL, *nmiprop);
+		mpic_irq_set_priority(nmi_virq, 15);
+		irq_set_irq_type(nmi_virq, IRQ_TYPE_EDGE_RISING);
+		mpic_unmask_irq(irq_get_irq_data(nmi_virq));
+	}
+
+	nemo_init_IRQ(mpic);
+
+	of_node_put(mpic_node);
+	of_node_put(root);
+}
+
+static void __init pas_progress(char *s, unsigned short hex)
+{
+	printk("[%04x] : %s\n", hex, s ? s : "");
+}
+
+
+static int pas_machine_check_handler(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+	unsigned long srr0, srr1, dsisr;
+	int dump_slb = 0;
+	int i;
+
+	srr0 = regs->nip;
+	srr1 = regs->msr;
+
+	if (nmi_virq && mpic_get_mcirq() == nmi_virq) {
+		pr_err("NMI delivered\n");
+		debugger(regs);
+		mpic_end_irq(irq_get_irq_data(nmi_virq));
+		goto out;
+	}
+
+	dsisr = mfspr(SPRN_DSISR);
+	pr_err("Machine Check on CPU %d\n", cpu);
+	pr_err("SRR0  0x%016lx SRR1 0x%016lx\n", srr0, srr1);
+	pr_err("DSISR 0x%016lx DAR  0x%016lx\n", dsisr, regs->dar);
+	pr_err("BER   0x%016lx MER  0x%016lx\n", mfspr(SPRN_PA6T_BER),
+		mfspr(SPRN_PA6T_MER));
+	pr_err("IER   0x%016lx DER  0x%016lx\n", mfspr(SPRN_PA6T_IER),
+		mfspr(SPRN_PA6T_DER));
+	pr_err("Cause:\n");
+
+	if (srr1 & 0x200000)
+		pr_err("Signalled by SDC\n");
+
+	if (srr1 & 0x100000) {
+		pr_err("Load/Store detected error:\n");
+		if (dsisr & 0x8000)
+			pr_err("D-cache ECC double-bit error or bus error\n");
+		if (dsisr & 0x4000)
+			pr_err("LSU snoop response error\n");
+		if (dsisr & 0x2000) {
+			pr_err("MMU SLB multi-hit or invalid B field\n");
+			dump_slb = 1;
+		}
+		if (dsisr & 0x1000)
+			pr_err("Recoverable Duptags\n");
+		if (dsisr & 0x800)
+			pr_err("Recoverable D-cache parity error count overflow\n");
+		if (dsisr & 0x400)
+			pr_err("TLB parity error count overflow\n");
+	}
+
+	if (srr1 & 0x80000)
+		pr_err("Bus Error\n");
+
+	if (srr1 & 0x40000) {
+		pr_err("I-side SLB multiple hit\n");
+		dump_slb = 1;
+	}
+
+	if (srr1 & 0x20000)
+		pr_err("I-cache parity error hit\n");
+
+	if (num_mce_regs == 0)
+		pr_err("No MCE registers mapped yet, can't dump\n");
+	else
+		pr_err("SoC debug registers:\n");
+
+	for (i = 0; i < num_mce_regs; i++)
+		pr_err("%s: 0x%08x\n", mce_regs[i].name,
+			in_le32(mce_regs[i].addr));
+
+	if (dump_slb) {
+		unsigned long e, v;
+		int i;
+
+		pr_err("slb contents:\n");
+		for (i = 0; i < mmu_slb_size; i++) {
+			asm volatile("slbmfee  %0,%1" : "=r" (e) : "r" (i));
+			asm volatile("slbmfev  %0,%1" : "=r" (v) : "r" (i));
+			pr_err("%02d %016lx %016lx\n", i, e, v);
+		}
+	}
+
+out:
+	/* SRR1[62] is from MSR[62] if recoverable, so pass that back */
+	return !!(srr1 & 0x2);
+}
+
+static const struct of_device_id pasemi_bus_ids[] = {
+	/* Unfortunately needed for legacy firmwares */
+	{ .type = "localbus", },
+	{ .type = "sdc", },
+	/* These are the proper entries, which newer firmware uses */
+	{ .compatible = "pasemi,localbus", },
+	{ .compatible = "pasemi,sdc", },
+	{},
+};
+
+static int __init pasemi_publish_devices(void)
+{
+	/* Publish OF platform devices for SDC and other non-PCI devices */
+	of_platform_bus_probe(NULL, pasemi_bus_ids, NULL);
+
+	nemo_init_rtc();
+
+	return 0;
+}
+machine_device_initcall(pasemi, pasemi_publish_devices);
+
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init pas_probe(void)
+{
+	if (!of_machine_is_compatible("PA6T-1682M") &&
+	    !of_machine_is_compatible("pasemi,pwrficient"))
+		return 0;
+
+#ifdef CONFIG_PPC_PASEMI_NEMO
+	/*
+	 * Check for the Nemo motherboard here, if we are running on one
+	 * change the machine definition to fit
+	 */
+	if (of_machine_is_compatible("pasemi,nemo")) {
+		pm_power_off		= pas_shutdown;
+		ppc_md.name		= "A-EON Amigaone X1000";
+	}
+#endif
+
+	iommu_init_early_pasemi();
+
+	return 1;
+}
+
+define_machine(pasemi) {
+	.name			= "PA Semi PWRficient",
+	.probe			= pas_probe,
+	.setup_arch		= pas_setup_arch,
+	.discover_phbs		= pas_pci_init,
+	.init_IRQ		= pas_init_IRQ,
+	.get_irq		= mpic_get_irq,
+	.restart		= pas_restart,
+	.get_boot_time		= pas_get_boot_time,
+	.progress		= pas_progress,
+	.machine_check_exception = pas_machine_check_handler,
+};
diff --git a/arch/powerpc/platforms/pasemi/time.c b/arch/powerpc/platforms/pasemi/time.c
new file mode 100644
index 0000000000..70ac6db027
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/time.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2006 PA Semi, Inc
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ */
+
+#include <linux/time.h>
+
+#include <asm/time.h>
+
+#include "pasemi.h"
+
+time64_t __init pas_get_boot_time(void)
+{
+	/* Let's just return a fake date right now */
+	return mktime64(2006, 1, 1, 12, 0, 0);
+}
diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig
new file mode 100644
index 0000000000..130707ec9f
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_PMAC
+	bool "Apple PowerMac based machines"
+	depends on PPC_BOOK3S && CPU_BIG_ENDIAN
+	select MPIC
+	select FORCE_PCI
+	select PPC_INDIRECT_PCI if PPC32
+	select PPC_MPC106 if PPC32
+	select PPC_64S_HASH_MMU if PPC64
+	select PPC_HASH_MMU_NATIVE
+	select ZONE_DMA if PPC32
+	default y
+
+config PPC_PMAC64
+	bool
+	depends on PPC_PMAC && PPC64
+	select MPIC
+	select U3_DART
+	select MPIC_U3_HT_IRQS
+	select GENERIC_TBSYNC
+	select PPC_970_NAP
+	default y
+
+config PPC_PMAC32_PSURGE
+	bool "Support for powersurge upgrade cards" if EXPERT
+	depends on SMP && PPC32 && PPC_PMAC
+	select PPC_SMP_MUXED_IPI
+	select IRQ_DOMAIN_NOMAP
+	default y
+	help
+	  The powersurge cpu boards can be used in the generation
+	  of powermacs that have a socket for an upgradeable cpu card,
+	  including the 7500, 8500, 9500, 9600.  Support exists for
+	  both dual and quad socket upgrade cards.
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
new file mode 100644
index 0000000000..cf85f0662d
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS_bootx_init.o  		+= -fPIC
+CFLAGS_bootx_init.o		+= -fno-stack-protector
+
+KASAN_SANITIZE_bootx_init.o := n
+
+ifdef CONFIG_KASAN
+CFLAGS_bootx_init.o  		+= -DDISABLE_BRANCH_PROFILING
+endif
+
+ifdef CONFIG_FUNCTION_TRACER
+# Do not trace early boot code
+CFLAGS_REMOVE_bootx_init.o = $(CC_FLAGS_FTRACE)
+endif
+
+obj-y				+= pic.o setup.o time.o feature.o pci.o \
+				   sleep.o low_i2c.o cache.o pfunc_core.o \
+				   pfunc_base.o udbg_scc.o udbg_adb.o
+obj-$(CONFIG_PMAC_BACKLIGHT)	+= backlight.o
+# CONFIG_NVRAM is an arch. independent tristate symbol, for pmac32 we really
+# need this to be a bool.  Cheat here and pretend CONFIG_NVRAM=m is really
+# CONFIG_NVRAM=y
+obj-$(CONFIG_NVRAM:m=y)		+= nvram.o
+obj-$(CONFIG_PPC32)		+= bootx_init.o
+obj-$(CONFIG_SMP)		+= smp.o
diff --git a/arch/powerpc/platforms/powermac/backlight.c b/arch/powerpc/platforms/powermac/backlight.c
new file mode 100644
index 0000000000..aeb79a8b3e
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/backlight.c
@@ -0,0 +1,220 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Miscellaneous procedures for dealing with the PowerMac hardware.
+ * Contains support for the backlight.
+ *
+ *   Copyright (C) 2000 Benjamin Herrenschmidt
+ *   Copyright (C) 2006 Michael Hanselmann <linux-kernel@hansmi.ch>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/fb.h>
+#include <linux/backlight.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/atomic.h>
+#include <linux/export.h>
+#include <asm/backlight.h>
+
+#define OLD_BACKLIGHT_MAX 15
+
+static void pmac_backlight_key_worker(struct work_struct *work);
+static void pmac_backlight_set_legacy_worker(struct work_struct *work);
+
+static DECLARE_WORK(pmac_backlight_key_work, pmac_backlight_key_worker);
+static DECLARE_WORK(pmac_backlight_set_legacy_work, pmac_backlight_set_legacy_worker);
+
+/* Although these variables are used in interrupt context, it makes no sense to
+ * protect them. No user is able to produce enough key events per second and
+ * notice the errors that might happen.
+ */
+static int pmac_backlight_key_queued;
+static int pmac_backlight_set_legacy_queued;
+
+/* The via-pmu code allows the backlight to be grabbed, in which case the
+ * in-kernel control of the brightness needs to be disabled. This should
+ * only be used by really old PowerBooks.
+ */
+static atomic_t kernel_backlight_disabled = ATOMIC_INIT(0);
+
+/* Protect the pmac_backlight variable below.
+   You should hold this lock when using the pmac_backlight pointer to
+   prevent its potential removal. */
+DEFINE_MUTEX(pmac_backlight_mutex);
+
+/* Main backlight storage
+ *
+ * Backlight drivers in this variable are required to have the "ops"
+ * attribute set and to have an update_status function.
+ *
+ * We can only store one backlight here, but since Apple laptops have only one
+ * internal display, it doesn't matter. Other backlight drivers can be used
+ * independently.
+ *
+ */
+struct backlight_device *pmac_backlight;
+
+int pmac_has_backlight_type(const char *type)
+{
+	struct device_node* bk_node = of_find_node_by_name(NULL, "backlight");
+
+	if (bk_node) {
+		const char *prop = of_get_property(bk_node,
+				"backlight-control", NULL);
+		if (prop && strncmp(prop, type, strlen(type)) == 0) {
+			of_node_put(bk_node);
+			return 1;
+		}
+		of_node_put(bk_node);
+	}
+
+	return 0;
+}
+
+int pmac_backlight_curve_lookup(struct fb_info *info, int value)
+{
+	int level = (FB_BACKLIGHT_LEVELS - 1);
+
+	if (info && info->bl_dev) {
+		int i, max = 0;
+
+		/* Look for biggest value */
+		for (i = 0; i < FB_BACKLIGHT_LEVELS; i++)
+			max = max((int)info->bl_curve[i], max);
+
+		/* Look for nearest value */
+		for (i = 0; i < FB_BACKLIGHT_LEVELS; i++) {
+			int diff = abs(info->bl_curve[i] - value);
+			if (diff < max) {
+				max = diff;
+				level = i;
+			}
+		}
+
+	}
+
+	return level;
+}
+
+static void pmac_backlight_key_worker(struct work_struct *work)
+{
+	if (atomic_read(&kernel_backlight_disabled))
+		return;
+
+	mutex_lock(&pmac_backlight_mutex);
+	if (pmac_backlight) {
+		struct backlight_properties *props;
+		int brightness;
+
+		props = &pmac_backlight->props;
+
+		brightness = props->brightness +
+			((pmac_backlight_key_queued?-1:1) *
+			 (props->max_brightness / 15));
+
+		if (brightness < 0)
+			brightness = 0;
+		else if (brightness > props->max_brightness)
+			brightness = props->max_brightness;
+
+		props->brightness = brightness;
+		backlight_update_status(pmac_backlight);
+	}
+	mutex_unlock(&pmac_backlight_mutex);
+}
+
+/* This function is called in interrupt context */
+void pmac_backlight_key(int direction)
+{
+	if (atomic_read(&kernel_backlight_disabled))
+		return;
+
+	/* we can receive multiple interrupts here, but the scheduled work
+	 * will run only once, with the last value
+	 */
+	pmac_backlight_key_queued = direction;
+	schedule_work(&pmac_backlight_key_work);
+}
+
+static int __pmac_backlight_set_legacy_brightness(int brightness)
+{
+	int error = -ENXIO;
+
+	mutex_lock(&pmac_backlight_mutex);
+	if (pmac_backlight) {
+		struct backlight_properties *props;
+
+		props = &pmac_backlight->props;
+		props->brightness = brightness *
+			(props->max_brightness + 1) /
+			(OLD_BACKLIGHT_MAX + 1);
+
+		if (props->brightness > props->max_brightness)
+			props->brightness = props->max_brightness;
+		else if (props->brightness < 0)
+			props->brightness = 0;
+
+		backlight_update_status(pmac_backlight);
+
+		error = 0;
+	}
+	mutex_unlock(&pmac_backlight_mutex);
+
+	return error;
+}
+
+static void pmac_backlight_set_legacy_worker(struct work_struct *work)
+{
+	if (atomic_read(&kernel_backlight_disabled))
+		return;
+
+	__pmac_backlight_set_legacy_brightness(pmac_backlight_set_legacy_queued);
+}
+
+/* This function is called in interrupt context */
+void pmac_backlight_set_legacy_brightness_pmu(int brightness) {
+	if (atomic_read(&kernel_backlight_disabled))
+		return;
+
+	pmac_backlight_set_legacy_queued = brightness;
+	schedule_work(&pmac_backlight_set_legacy_work);
+}
+
+int pmac_backlight_set_legacy_brightness(int brightness)
+{
+	return __pmac_backlight_set_legacy_brightness(brightness);
+}
+
+int pmac_backlight_get_legacy_brightness(void)
+{
+	int result = -ENXIO;
+
+	mutex_lock(&pmac_backlight_mutex);
+	if (pmac_backlight) {
+		struct backlight_properties *props;
+
+		props = &pmac_backlight->props;
+
+		result = props->brightness *
+			(OLD_BACKLIGHT_MAX + 1) /
+			(props->max_brightness + 1);
+	}
+	mutex_unlock(&pmac_backlight_mutex);
+
+	return result;
+}
+
+void pmac_backlight_disable(void)
+{
+	atomic_inc(&kernel_backlight_disabled);
+}
+
+void pmac_backlight_enable(void)
+{
+	atomic_dec(&kernel_backlight_disabled);
+}
+
+EXPORT_SYMBOL_GPL(pmac_backlight);
+EXPORT_SYMBOL_GPL(pmac_backlight_mutex);
+EXPORT_SYMBOL_GPL(pmac_has_backlight_type);
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
new file mode 100644
index 0000000000..72eb99aba4
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -0,0 +1,595 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Early boot support code for BootX bootloader
+ *
+ *  Copyright (C) 2005 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/of_fdt.h>
+#include <generated/utsrelease.h>
+#include <asm/sections.h>
+#include <asm/prom.h>
+#include <asm/page.h>
+#include <asm/bootx.h>
+#include <asm/btext.h>
+#include <asm/io.h>
+#include <asm/setup.h>
+
+#undef DEBUG
+#define SET_BOOT_BAT
+
+#ifdef DEBUG
+#define DBG(fmt...) do { bootx_printf(fmt); } while(0)
+#else
+#define DBG(fmt...) do { } while(0)
+#endif
+
+extern void __start(unsigned long r3, unsigned long r4, unsigned long r5);
+
+static unsigned long __initdata bootx_dt_strbase;
+static unsigned long __initdata bootx_dt_strend;
+static unsigned long __initdata bootx_node_chosen;
+static boot_infos_t * __initdata bootx_info;
+static char __initdata bootx_disp_path[256];
+
+/* Is boot-info compatible ? */
+#define BOOT_INFO_IS_COMPATIBLE(bi) \
+	((bi)->compatible_version <= BOOT_INFO_VERSION)
+#define BOOT_INFO_IS_V2_COMPATIBLE(bi)	((bi)->version >= 2)
+#define BOOT_INFO_IS_V4_COMPATIBLE(bi)	((bi)->version >= 4)
+
+#ifdef CONFIG_BOOTX_TEXT
+static void __init bootx_printf(const char *format, ...)
+{
+	const char *p, *q, *s;
+	va_list args;
+	unsigned long v;
+
+	va_start(args, format);
+	for (p = format; *p != 0; p = q) {
+		for (q = p; *q != 0 && *q != '\n' && *q != '%'; ++q)
+			;
+		if (q > p)
+			btext_drawtext(p, q - p);
+		if (*q == 0)
+			break;
+		if (*q == '\n') {
+			++q;
+			btext_flushline();
+			btext_drawstring("\r\n");
+			btext_flushline();
+			continue;
+		}
+		++q;
+		if (*q == 0)
+			break;
+		switch (*q) {
+		case 's':
+			++q;
+			s = va_arg(args, const char *);
+			if (s == NULL)
+				s = "<NULL>";
+			btext_drawstring(s);
+			break;
+		case 'x':
+			++q;
+			v = va_arg(args, unsigned long);
+			btext_drawhex(v);
+			break;
+		}
+	}
+	va_end(args);
+}
+#else /* CONFIG_BOOTX_TEXT */
+static void __init bootx_printf(const char *format, ...) {}
+#endif /* CONFIG_BOOTX_TEXT */
+
+static void * __init bootx_early_getprop(unsigned long base,
+					 unsigned long node,
+					 char *prop)
+{
+	struct bootx_dt_node *np = (struct bootx_dt_node *)(base + node);
+	u32 *ppp = &np->properties;
+
+	while(*ppp) {
+		struct bootx_dt_prop *pp =
+			(struct bootx_dt_prop *)(base + *ppp);
+
+		if (strcmp((char *)((unsigned long)pp->name + base),
+			   prop) == 0) {
+			return (void *)((unsigned long)pp->value + base);
+		}
+		ppp = &pp->next;
+	}
+	return NULL;
+}
+
+#define dt_push_token(token, mem) \
+	do { \
+		*(mem) = ALIGN(*(mem),4); \
+		*((u32 *)*(mem)) = token; \
+		*(mem) += 4; \
+	} while(0)
+
+static unsigned long __init bootx_dt_find_string(char *str)
+{
+	char *s, *os;
+
+	s = os = (char *)bootx_dt_strbase;
+	s += 4;
+	while (s <  (char *)bootx_dt_strend) {
+		if (strcmp(s, str) == 0)
+			return s - os;
+		s += strlen(s) + 1;
+	}
+	return 0;
+}
+
+static void __init bootx_dt_add_prop(char *name, void *data, int size,
+				  unsigned long *mem_end)
+{
+	unsigned long soff = bootx_dt_find_string(name);
+	if (data == NULL)
+		size = 0;
+	if (soff == 0) {
+		bootx_printf("WARNING: Can't find string index for <%s>\n",
+			     name);
+		return;
+	}
+	if (size > 0x20000) {
+		bootx_printf("WARNING: ignoring large property ");
+		bootx_printf("%s length 0x%x\n", name, size);
+		return;
+	}
+	dt_push_token(OF_DT_PROP, mem_end);
+	dt_push_token(size, mem_end);
+	dt_push_token(soff, mem_end);
+
+	/* push property content */
+	if (size && data) {
+		memcpy((void *)*mem_end, data, size);
+		*mem_end = ALIGN(*mem_end + size, 4);
+	}
+}
+
+static void __init bootx_add_chosen_props(unsigned long base,
+					  unsigned long *mem_end)
+{
+	u32 val;
+
+	bootx_dt_add_prop("linux,bootx", NULL, 0, mem_end);
+
+	if (bootx_info->kernelParamsOffset) {
+		char *args = (char *)((unsigned long)bootx_info) +
+			bootx_info->kernelParamsOffset;
+		bootx_dt_add_prop("bootargs", args, strlen(args) + 1, mem_end);
+	}
+	if (bootx_info->ramDisk) {
+		val = ((unsigned long)bootx_info) + bootx_info->ramDisk;
+		bootx_dt_add_prop("linux,initrd-start", &val, 4, mem_end);
+		val += bootx_info->ramDiskSize;
+		bootx_dt_add_prop("linux,initrd-end", &val, 4, mem_end);
+	}
+	if (strlen(bootx_disp_path))
+		bootx_dt_add_prop("linux,stdout-path", bootx_disp_path,
+				  strlen(bootx_disp_path) + 1, mem_end);
+}
+
+static void __init bootx_add_display_props(unsigned long base,
+					   unsigned long *mem_end,
+					   int has_real_node)
+{
+	boot_infos_t *bi = bootx_info;
+	u32 tmp;
+
+	if (has_real_node) {
+		bootx_dt_add_prop("linux,boot-display", NULL, 0, mem_end);
+		bootx_dt_add_prop("linux,opened", NULL, 0, mem_end);
+	} else
+		bootx_dt_add_prop("linux,bootx-noscreen", NULL, 0, mem_end);
+
+	tmp = bi->dispDeviceDepth;
+	bootx_dt_add_prop("linux,bootx-depth", &tmp, 4, mem_end);
+	tmp = bi->dispDeviceRect[2] - bi->dispDeviceRect[0];
+	bootx_dt_add_prop("linux,bootx-width", &tmp, 4, mem_end);
+	tmp = bi->dispDeviceRect[3] - bi->dispDeviceRect[1];
+	bootx_dt_add_prop("linux,bootx-height", &tmp, 4, mem_end);
+	tmp = bi->dispDeviceRowBytes;
+	bootx_dt_add_prop("linux,bootx-linebytes", &tmp, 4, mem_end);
+	tmp = (u32)bi->dispDeviceBase;
+	if (tmp == 0)
+		tmp = (u32)bi->logicalDisplayBase;
+	tmp += bi->dispDeviceRect[1] * bi->dispDeviceRowBytes;
+	tmp += bi->dispDeviceRect[0] * ((bi->dispDeviceDepth + 7) / 8);
+	bootx_dt_add_prop("linux,bootx-addr", &tmp, 4, mem_end);
+}
+
+static void __init bootx_dt_add_string(char *s, unsigned long *mem_end)
+{
+	unsigned int l = strlen(s) + 1;
+	memcpy((void *)*mem_end, s, l);
+	bootx_dt_strend = *mem_end = *mem_end + l;
+}
+
+static void __init bootx_scan_dt_build_strings(unsigned long base,
+					       unsigned long node,
+					       unsigned long *mem_end)
+{
+	struct bootx_dt_node *np = (struct bootx_dt_node *)(base + node);
+	u32 *cpp, *ppp = &np->properties;
+	unsigned long soff;
+	char *namep;
+
+	/* Keep refs to known nodes */
+	namep = np->full_name ? (char *)(base + np->full_name) : NULL;
+       	if (namep == NULL) {
+		bootx_printf("Node without a full name !\n");
+		namep = "";
+	}
+	DBG("* strings: %s\n", namep);
+
+	if (!strcmp(namep, "/chosen")) {
+		DBG(" detected /chosen ! adding properties names !\n");
+		bootx_dt_add_string("linux,bootx", mem_end);
+		bootx_dt_add_string("linux,stdout-path", mem_end);
+		bootx_dt_add_string("linux,initrd-start", mem_end);
+		bootx_dt_add_string("linux,initrd-end", mem_end);
+		bootx_dt_add_string("bootargs", mem_end);
+		bootx_node_chosen = node;
+	}
+	if (node == bootx_info->dispDeviceRegEntryOffset) {
+		DBG(" detected display ! adding properties names !\n");
+		bootx_dt_add_string("linux,boot-display", mem_end);
+		bootx_dt_add_string("linux,opened", mem_end);
+		strscpy(bootx_disp_path, namep, sizeof(bootx_disp_path));
+	}
+
+	/* get and store all property names */
+	while (*ppp) {
+		struct bootx_dt_prop *pp =
+			(struct bootx_dt_prop *)(base + *ppp);
+
+		namep = pp->name ? (char *)(base + pp->name) : NULL;
+ 		if (namep == NULL || strcmp(namep, "name") == 0)
+ 			goto next;
+		/* get/create string entry */
+		soff = bootx_dt_find_string(namep);
+		if (soff == 0)
+			bootx_dt_add_string(namep, mem_end);
+	next:
+		ppp = &pp->next;
+	}
+
+	/* do all our children */
+	cpp = &np->child;
+	while(*cpp) {
+		np = (struct bootx_dt_node *)(base + *cpp);
+		bootx_scan_dt_build_strings(base, *cpp, mem_end);
+		cpp = &np->sibling;
+	}
+}
+
+static void __init bootx_scan_dt_build_struct(unsigned long base,
+					      unsigned long node,
+					      unsigned long *mem_end)
+{
+	struct bootx_dt_node *np = (struct bootx_dt_node *)(base + node);
+	u32 *cpp, *ppp = &np->properties;
+	char *namep, *p, *ep, *lp;
+	int l;
+
+	dt_push_token(OF_DT_BEGIN_NODE, mem_end);
+
+	/* get the node's full name */
+	namep = np->full_name ? (char *)(base + np->full_name) : NULL;
+	if (namep == NULL)
+		namep = "";
+	l = strlen(namep);
+
+	DBG("* struct: %s\n", namep);
+
+	/* Fixup an Apple bug where they have bogus \0 chars in the
+	 * middle of the path in some properties, and extract
+	 * the unit name (everything after the last '/').
+	 */
+	memcpy((void *)*mem_end, namep, l + 1);
+	namep = (char *)*mem_end;
+	for (lp = p = namep, ep = namep + l; p < ep; p++) {
+		if (*p == '/')
+			lp = namep;
+		else if (*p != 0)
+			*lp++ = *p;
+	}
+	*lp = 0;
+	*mem_end = ALIGN((unsigned long)lp + 1, 4);
+
+	/* get and store all properties */
+	while (*ppp) {
+		struct bootx_dt_prop *pp =
+			(struct bootx_dt_prop *)(base + *ppp);
+
+		namep = pp->name ? (char *)(base + pp->name) : NULL;
+		/* Skip "name" */
+ 		if (namep == NULL || !strcmp(namep, "name"))
+ 			goto next;
+		/* Skip "bootargs" in /chosen too as we replace it */
+		if (node == bootx_node_chosen && !strcmp(namep, "bootargs"))
+			goto next;
+
+		/* push property head */
+		bootx_dt_add_prop(namep,
+				  pp->value ? (void *)(base + pp->value): NULL,
+				  pp->length, mem_end);
+	next:
+		ppp = &pp->next;
+	}
+
+	if (node == bootx_node_chosen) {
+		bootx_add_chosen_props(base, mem_end);
+		if (bootx_info->dispDeviceRegEntryOffset == 0)
+			bootx_add_display_props(base, mem_end, 0);
+	}
+	else if (node == bootx_info->dispDeviceRegEntryOffset)
+		bootx_add_display_props(base, mem_end, 1);
+
+	/* do all our children */
+	cpp = &np->child;
+	while(*cpp) {
+		np = (struct bootx_dt_node *)(base + *cpp);
+		bootx_scan_dt_build_struct(base, *cpp, mem_end);
+		cpp = &np->sibling;
+	}
+
+	dt_push_token(OF_DT_END_NODE, mem_end);
+}
+
+static unsigned long __init bootx_flatten_dt(unsigned long start)
+{
+	boot_infos_t *bi = bootx_info;
+	unsigned long mem_start, mem_end;
+	struct boot_param_header *hdr;
+	unsigned long base;
+	u64 *rsvmap;
+
+	/* Start using memory after the big blob passed by BootX, get
+	 * some space for the header
+	 */
+	mem_start = mem_end = ALIGN(((unsigned long)bi) + start, 4);
+	DBG("Boot params header at: %x\n", mem_start);
+	hdr = (struct boot_param_header *)mem_start;
+	mem_end += sizeof(struct boot_param_header);
+	rsvmap = (u64 *)(ALIGN(mem_end, 8));
+	hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - mem_start;
+	mem_end = ((unsigned long)rsvmap) + 8 * sizeof(u64);
+
+	/* Get base of tree */
+	base = ((unsigned long)bi) + bi->deviceTreeOffset;
+
+	/* Build string array */
+	DBG("Building string array at: %x\n", mem_end);
+	DBG("Device Tree Base=%x\n", base);
+	bootx_dt_strbase = mem_end;
+	mem_end += 4;
+	bootx_dt_strend = mem_end;
+	bootx_scan_dt_build_strings(base, 4, &mem_end);
+	/* Add some strings */
+	bootx_dt_add_string("linux,bootx-noscreen", &mem_end);
+	bootx_dt_add_string("linux,bootx-depth", &mem_end);
+	bootx_dt_add_string("linux,bootx-width", &mem_end);
+	bootx_dt_add_string("linux,bootx-height", &mem_end);
+	bootx_dt_add_string("linux,bootx-linebytes", &mem_end);
+	bootx_dt_add_string("linux,bootx-addr", &mem_end);
+	/* Wrap up strings */
+	hdr->off_dt_strings = bootx_dt_strbase - mem_start;
+	hdr->dt_strings_size = bootx_dt_strend - bootx_dt_strbase;
+
+	/* Build structure */
+	mem_end = ALIGN(mem_end, 16);
+	DBG("Building device tree structure at: %x\n", mem_end);
+	hdr->off_dt_struct = mem_end - mem_start;
+	bootx_scan_dt_build_struct(base, 4, &mem_end);
+	dt_push_token(OF_DT_END, &mem_end);
+
+	/* Finish header */
+	hdr->boot_cpuid_phys = 0;
+	hdr->magic = OF_DT_HEADER;
+	hdr->totalsize = mem_end - mem_start;
+	hdr->version = OF_DT_VERSION;
+	/* Version 16 is not backward compatible */
+	hdr->last_comp_version = 0x10;
+
+	/* Reserve the whole thing and copy the reserve map in, we
+	 * also bump mem_reserve_cnt to cause further reservations to
+	 * fail since it's too late.
+	 */
+	mem_end = ALIGN(mem_end, PAGE_SIZE);
+	DBG("End of boot params: %x\n", mem_end);
+	rsvmap[0] = mem_start;
+	rsvmap[1] = mem_end;
+	if (bootx_info->ramDisk) {
+		rsvmap[2] = ((unsigned long)bootx_info) + bootx_info->ramDisk;
+		rsvmap[3] = rsvmap[2] + bootx_info->ramDiskSize;
+		rsvmap[4] = 0;
+		rsvmap[5] = 0;
+	} else {
+		rsvmap[2] = 0;
+		rsvmap[3] = 0;
+	}
+
+	return (unsigned long)hdr;
+}
+
+
+#ifdef CONFIG_BOOTX_TEXT
+static void __init btext_welcome(boot_infos_t *bi)
+{
+	unsigned long flags;
+	unsigned long pvr;
+
+	bootx_printf("Welcome to Linux, kernel " UTS_RELEASE "\n");
+	bootx_printf("\nlinked at        : 0x%x", KERNELBASE);
+	bootx_printf("\nframe buffer at  : 0x%x", bi->dispDeviceBase);
+	bootx_printf(" (phys), 0x%x", bi->logicalDisplayBase);
+	bootx_printf(" (log)");
+	bootx_printf("\nklimit           : 0x%x",(unsigned long)_end);
+	bootx_printf("\nboot_info at     : 0x%x", bi);
+	__asm__ __volatile__ ("mfmsr %0" : "=r" (flags));
+	bootx_printf("\nMSR              : 0x%x", flags);
+	__asm__ __volatile__ ("mfspr %0, 287" : "=r" (pvr));
+	bootx_printf("\nPVR              : 0x%x", pvr);
+	pvr >>= 16;
+	if (pvr > 1) {
+	    __asm__ __volatile__ ("mfspr %0, 1008" : "=r" (flags));
+	    bootx_printf("\nHID0             : 0x%x", flags);
+	}
+	if (pvr == 8 || pvr == 12 || pvr == 0x800c) {
+	    __asm__ __volatile__ ("mfspr %0, 1019" : "=r" (flags));
+	    bootx_printf("\nICTC             : 0x%x", flags);
+	}
+#ifdef DEBUG
+	bootx_printf("\n\n");
+	bootx_printf("bi->deviceTreeOffset   : 0x%x\n",
+		     bi->deviceTreeOffset);
+	bootx_printf("bi->deviceTreeSize     : 0x%x\n",
+		     bi->deviceTreeSize);
+#endif
+	bootx_printf("\n\n");
+}
+#endif /* CONFIG_BOOTX_TEXT */
+
+void __init bootx_init(unsigned long r3, unsigned long r4)
+{
+	boot_infos_t *bi = (boot_infos_t *) r4;
+	unsigned long hdr;
+	unsigned long space;
+	unsigned long ptr;
+	char *model;
+	unsigned long offset = reloc_offset();
+
+	reloc_got2(offset);
+
+	bootx_info = bi;
+
+	/* We haven't cleared any bss at this point, make sure
+	 * what we need is initialized
+	 */
+	bootx_dt_strbase = bootx_dt_strend = 0;
+	bootx_node_chosen = 0;
+	bootx_disp_path[0] = 0;
+
+	if (!BOOT_INFO_IS_V2_COMPATIBLE(bi))
+		bi->logicalDisplayBase = bi->dispDeviceBase;
+
+	/* Fixup depth 16 -> 15 as that's what MacOS calls 16bpp */
+	if (bi->dispDeviceDepth == 16)
+		bi->dispDeviceDepth = 15;
+
+
+#ifdef CONFIG_BOOTX_TEXT
+	ptr = (unsigned long)bi->logicalDisplayBase;
+	ptr += bi->dispDeviceRect[1] * bi->dispDeviceRowBytes;
+	ptr += bi->dispDeviceRect[0] * ((bi->dispDeviceDepth + 7) / 8);
+	btext_setup_display(bi->dispDeviceRect[2] - bi->dispDeviceRect[0],
+			    bi->dispDeviceRect[3] - bi->dispDeviceRect[1],
+			    bi->dispDeviceDepth, bi->dispDeviceRowBytes,
+			    (unsigned long)bi->logicalDisplayBase);
+	btext_clearscreen();
+	btext_flushscreen();
+#endif /* CONFIG_BOOTX_TEXT */
+
+	/*
+	 * Test if boot-info is compatible.  Done only in config
+	 * CONFIG_BOOTX_TEXT since there is nothing much we can do
+	 * with an incompatible version, except display a message
+	 * and eventually hang the processor...
+	 *
+	 * I'll try to keep enough of boot-info compatible in the
+	 * future to always allow display of this message;
+	 */
+	if (!BOOT_INFO_IS_COMPATIBLE(bi)) {
+		bootx_printf(" !!! WARNING - Incompatible version"
+			     " of BootX !!!\n\n\n");
+		for (;;)
+			;
+	}
+	if (bi->architecture != BOOT_ARCH_PCI) {
+		bootx_printf(" !!! WARNING - Unsupported machine"
+			     " architecture !\n");
+		for (;;)
+			;
+	}
+
+#ifdef CONFIG_BOOTX_TEXT
+	btext_welcome(bi);
+#endif
+
+	/* New BootX enters kernel with MMU off, i/os are not allowed
+	 * here. This hack will have been done by the boostrap anyway.
+	 */
+	if (bi->version < 4) {
+		/*
+		 * XXX If this is an iMac, turn off the USB controller.
+		 */
+		model = (char *) bootx_early_getprop(r4 + bi->deviceTreeOffset,
+						     4, "model");
+		if (model
+		    && (strcmp(model, "iMac,1") == 0
+			|| strcmp(model, "PowerMac1,1") == 0)) {
+			bootx_printf("iMac,1 detected, shutting down USB\n");
+			out_le32((unsigned __iomem *)0x80880008, 1);	/* XXX */
+		}
+	}
+
+	/* Get a pointer that points above the device tree, args, ramdisk,
+	 * etc... to use for generating the flattened tree
+	 */
+	if (bi->version < 5) {
+		space = bi->deviceTreeOffset + bi->deviceTreeSize;
+		if (bi->ramDisk >= space)
+			space = bi->ramDisk + bi->ramDiskSize;
+	} else
+		space = bi->totalParamsSize;
+
+	bootx_printf("Total space used by parameters & ramdisk: 0x%x\n", space);
+
+	/* New BootX will have flushed all TLBs and enters kernel with
+	 * MMU switched OFF, so this should not be useful anymore.
+	 */
+	if (bi->version < 4) {
+		unsigned long x __maybe_unused;
+
+		bootx_printf("Touching pages...\n");
+
+		/*
+		 * Touch each page to make sure the PTEs for them
+		 * are in the hash table - the aim is to try to avoid
+		 * getting DSI exceptions while copying the kernel image.
+		 */
+		for (ptr = ((unsigned long) &_stext) & PAGE_MASK;
+		     ptr < (unsigned long)bi + space; ptr += PAGE_SIZE)
+			x = *(volatile unsigned long *)ptr;
+	}
+
+	/* Ok, now we need to generate a flattened device-tree to pass
+	 * to the kernel
+	 */
+	bootx_printf("Preparing boot params...\n");
+
+	hdr = bootx_flatten_dt(space);
+
+#ifdef CONFIG_BOOTX_TEXT
+#ifdef SET_BOOT_BAT
+	bootx_printf("Preparing BAT...\n");
+	btext_prepare_BAT();
+#else
+	btext_unmap();
+#endif
+#endif
+
+	reloc_got2(-offset);
+
+	__start(hdr, KERNELBASE + offset, 0);
+}
diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S
new file mode 100644
index 0000000000..b8ae56e9f4
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/cache.S
@@ -0,0 +1,356 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains low-level cache management functions
+ * used for sleep and CPU speed changes on Apple machines.
+ * (In fact the only thing that is Apple-specific is that we assume
+ * that we can read from ROM at physical address 0xfff00000.)
+ *
+ *    Copyright (C) 2004 Paul Mackerras (paulus@samba.org) and
+ *                       Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/cputable.h>
+#include <asm/feature-fixups.h>
+
+/*
+ * Flush and disable all data caches (dL1, L2, L3). This is used
+ * when going to sleep, when doing a PMU based cpufreq transition,
+ * or when "offlining" a CPU on SMP machines. This code is over
+ * paranoid, but I've had enough issues with various CPU revs and
+ * bugs that I decided it was worth being over cautious
+ */
+
+_GLOBAL(flush_disable_caches)
+#ifndef CONFIG_PPC_BOOK3S_32
+	blr
+#else
+BEGIN_FTR_SECTION
+	b	flush_disable_745x
+END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
+BEGIN_FTR_SECTION
+	b	flush_disable_75x
+END_FTR_SECTION_IFSET(CPU_FTR_L2CR)
+	b	__flush_disable_L1
+
+/* This is the code for G3 and 74[01]0 */
+flush_disable_75x:
+	mflr	r10
+
+	/* Turn off EE and DR in MSR */
+	mfmsr	r11
+	rlwinm	r0,r11,0,~MSR_EE
+	rlwinm	r0,r0,0,~MSR_DR
+	sync
+	mtmsr	r0
+	isync
+
+	/* Stop DST streams */
+BEGIN_FTR_SECTION
+	PPC_DSSALL
+	sync
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+
+	/* Stop DPM */
+	mfspr	r8,SPRN_HID0		/* Save SPRN_HID0 in r8 */
+	rlwinm	r4,r8,0,12,10		/* Turn off HID0[DPM] */
+	sync
+	mtspr	SPRN_HID0,r4		/* Disable DPM */
+	sync
+
+	/* Disp-flush L1. We have a weird problem here that I never
+	 * totally figured out. On 750FX, using the ROM for the flush
+	 * results in a non-working flush. We use that workaround for
+	 * now until I finally understand what's going on. --BenH
+	 */
+
+	/* ROM base by default */
+	lis	r4,0xfff0
+	mfpvr	r3
+	srwi	r3,r3,16
+	cmplwi	cr0,r3,0x7000
+	bne+	1f
+	/* RAM base on 750FX */
+	li	r4,0
+1:	li	r4,0x4000
+	mtctr	r4
+1:	lwz	r0,0(r4)
+	addi	r4,r4,32
+	bdnz	1b
+	sync
+	isync
+
+	/* Disable / invalidate / enable L1 data */
+	mfspr	r3,SPRN_HID0
+	rlwinm	r3,r3,0,~(HID0_DCE | HID0_ICE)
+	mtspr	SPRN_HID0,r3
+	sync
+	isync
+	ori	r3,r3,(HID0_DCE|HID0_DCI|HID0_ICE|HID0_ICFI)
+	sync
+	isync
+	mtspr	SPRN_HID0,r3
+	xori	r3,r3,(HID0_DCI|HID0_ICFI)
+	mtspr	SPRN_HID0,r3
+	sync
+
+	/* Get the current enable bit of the L2CR into r4 */
+	mfspr	r5,SPRN_L2CR
+	/* Set to data-only (pre-745x bit) */
+	oris	r3,r5,L2CR_L2DO@h
+	b	2f
+	/* When disabling L2, code must be in L1 */
+	.balign 32
+1:	mtspr	SPRN_L2CR,r3
+3:	sync
+	isync
+	b	1f
+2:	b	3f
+3:	sync
+	isync
+	b	1b
+1:	/* disp-flush L2. The interesting thing here is that the L2 can be
+	 * up to 2Mb ... so using the ROM, we'll end up wrapping back to memory
+	 * but that is probbaly fine. We disp-flush over 4Mb to be safe
+	 */
+	lis	r4,2
+	mtctr	r4
+	lis	r4,0xfff0
+1:	lwz	r0,0(r4)
+	addi	r4,r4,32
+	bdnz	1b
+	sync
+	isync
+	lis	r4,2
+	mtctr	r4
+	lis	r4,0xfff0
+1:	dcbf	0,r4
+	addi	r4,r4,32
+	bdnz	1b
+	sync
+	isync
+
+	/* now disable L2 */
+	rlwinm	r5,r5,0,~L2CR_L2E
+	b	2f
+	/* When disabling L2, code must be in L1 */
+	.balign 32
+1:	mtspr	SPRN_L2CR,r5
+3:	sync
+	isync
+	b	1f
+2:	b	3f
+3:	sync
+	isync
+	b	1b
+1:	sync
+	isync
+	/* Invalidate L2. This is pre-745x, we clear the L2I bit ourselves */
+	oris	r4,r5,L2CR_L2I@h
+	mtspr	SPRN_L2CR,r4
+	sync
+	isync
+
+	/* Wait for the invalidation to complete */
+1:	mfspr	r3,SPRN_L2CR
+	rlwinm.	r0,r3,0,31,31
+	bne	1b
+
+	/* Clear L2I */
+	xoris	r4,r4,L2CR_L2I@h
+	sync
+	mtspr	SPRN_L2CR,r4
+	sync
+
+	/* now disable the L1 data cache */
+	mfspr	r0,SPRN_HID0
+	rlwinm	r0,r0,0,~(HID0_DCE|HID0_ICE)
+	mtspr	SPRN_HID0,r0
+	sync
+	isync
+
+	/* Restore HID0[DPM] to whatever it was before */
+	sync
+	mfspr	r0,SPRN_HID0
+	rlwimi	r0,r8,0,11,11		/* Turn back HID0[DPM] */
+	mtspr	SPRN_HID0,r0
+	sync
+
+	/* restore DR and EE */
+	sync
+	mtmsr	r11
+	isync
+
+	mtlr	r10
+	blr
+_ASM_NOKPROBE_SYMBOL(flush_disable_75x)
+
+/* This code is for 745x processors */
+flush_disable_745x:
+	/* Turn off EE and DR in MSR */
+	mfmsr	r11
+	rlwinm	r0,r11,0,~MSR_EE
+	rlwinm	r0,r0,0,~MSR_DR
+	sync
+	mtmsr	r0
+	isync
+
+	/* Stop prefetch streams */
+	PPC_DSSALL
+	sync
+
+	/* Disable L2 prefetching */
+	mfspr	r0,SPRN_MSSCR0
+	rlwinm	r0,r0,0,0,29
+	mtspr	SPRN_MSSCR0,r0
+	sync
+	isync
+	lis	r4,0
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+
+	/* Due to a bug with the HW flush on some CPU revs, we occasionally
+	 * experience data corruption. I'm adding a displacement flush along
+	 * with a dcbf loop over a few Mb to "help". The problem isn't totally
+	 * fixed by this in theory, but at least, in practice, I couldn't reproduce
+	 * it even with a big hammer...
+	 */
+
+        lis     r4,0x0002
+        mtctr   r4
+ 	li      r4,0
+1:
+        lwz     r0,0(r4)
+        addi    r4,r4,32                /* Go to start of next cache line */
+        bdnz    1b
+        isync
+
+        /* Now, flush the first 4MB of memory */
+        lis     r4,0x0002
+        mtctr   r4
+	li      r4,0
+        sync
+1:
+        dcbf    0,r4
+        addi    r4,r4,32                /* Go to start of next cache line */
+        bdnz    1b
+
+	/* Flush and disable the L1 data cache */
+	mfspr	r6,SPRN_LDSTCR
+	lis	r3,0xfff0	/* read from ROM for displacement flush */
+	li	r4,0xfe		/* start with only way 0 unlocked */
+	li	r5,128		/* 128 lines in each way */
+1:	mtctr	r5
+	rlwimi	r6,r4,0,24,31
+	mtspr	SPRN_LDSTCR,r6
+	sync
+	isync
+2:	lwz	r0,0(r3)	/* touch each cache line */
+	addi	r3,r3,32
+	bdnz	2b
+	rlwinm	r4,r4,1,24,30	/* move on to the next way */
+	ori	r4,r4,1
+	cmpwi	r4,0xff		/* all done? */
+	bne	1b
+	/* now unlock the L1 data cache */
+	li	r4,0
+	rlwimi	r6,r4,0,24,31
+	sync
+	mtspr	SPRN_LDSTCR,r6
+	sync
+	isync
+
+	/* Flush the L2 cache using the hardware assist */
+	mfspr	r3,SPRN_L2CR
+	cmpwi	r3,0		/* check if it is enabled first */
+	bge	4f
+	oris	r0,r3,(L2CR_L2IO_745x|L2CR_L2DO_745x)@h
+	b	2f
+	/* When disabling/locking L2, code must be in L1 */
+	.balign 32
+1:	mtspr	SPRN_L2CR,r0	/* lock the L2 cache */
+3:	sync
+	isync
+	b	1f
+2:	b	3f
+3:	sync
+	isync
+	b	1b
+1:	sync
+	isync
+	ori	r0,r3,L2CR_L2HWF_745x
+	sync
+	mtspr	SPRN_L2CR,r0	/* set the hardware flush bit */
+3:	mfspr	r0,SPRN_L2CR	/* wait for it to go to 0 */
+	andi.	r0,r0,L2CR_L2HWF_745x
+	bne	3b
+	sync
+	rlwinm	r3,r3,0,~L2CR_L2E
+	b	2f
+	/* When disabling L2, code must be in L1 */
+	.balign 32
+1:	mtspr	SPRN_L2CR,r3	/* disable the L2 cache */
+3:	sync
+	isync
+	b	1f
+2:	b	3f
+3:	sync
+	isync
+	b	1b
+1:	sync
+	isync
+	oris	r4,r3,L2CR_L2I@h
+	mtspr	SPRN_L2CR,r4
+	sync
+	isync
+1:	mfspr	r4,SPRN_L2CR
+	andis.	r0,r4,L2CR_L2I@h
+	bne	1b
+	sync
+
+BEGIN_FTR_SECTION
+	/* Flush the L3 cache using the hardware assist */
+4:	mfspr	r3,SPRN_L3CR
+	cmpwi	r3,0		/* check if it is enabled */
+	bge	6f
+	oris	r0,r3,L3CR_L3IO@h
+	ori	r0,r0,L3CR_L3DO
+	sync
+	mtspr	SPRN_L3CR,r0	/* lock the L3 cache */
+	sync
+	isync
+	ori	r0,r0,L3CR_L3HWF
+	sync
+	mtspr	SPRN_L3CR,r0	/* set the hardware flush bit */
+5:	mfspr	r0,SPRN_L3CR	/* wait for it to go to zero */
+	andi.	r0,r0,L3CR_L3HWF
+	bne	5b
+	rlwinm	r3,r3,0,~L3CR_L3E
+	sync
+	mtspr	SPRN_L3CR,r3	/* disable the L3 cache */
+	sync
+	ori	r4,r3,L3CR_L3I
+	mtspr	SPRN_L3CR,r4
+1:	mfspr	r4,SPRN_L3CR
+	andi.	r0,r4,L3CR_L3I
+	bne	1b
+	sync
+END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
+
+6:	mfspr	r0,SPRN_HID0	/* now disable the L1 data cache */
+	rlwinm	r0,r0,0,~HID0_DCE
+	mtspr	SPRN_HID0,r0
+	sync
+	isync
+	mtmsr	r11		/* restore DR and EE */
+	isync
+	blr
+_ASM_NOKPROBE_SYMBOL(flush_disable_745x)
+#endif	/* CONFIG_PPC_BOOK3S_32 */
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
new file mode 100644
index 0000000000..ae62d432db
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -0,0 +1,3022 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Copyright (C) 1996-2001 Paul Mackerras (paulus@cs.anu.edu.au)
+ *                          Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  TODO:
+ *
+ *   - Replace mdelay with some schedule loop if possible
+ *   - Shorten some obfuscated delays on some routines (like modem
+ *     power)
+ *   - Refcount some clocks (see darwin)
+ *   - Split split split...
+ */
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/spinlock.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/ioport.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <asm/sections.h>
+#include <asm/errno.h>
+#include <asm/ohare.h>
+#include <asm/heathrow.h>
+#include <asm/keylargo.h>
+#include <asm/uninorth.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+#include <asm/dbdma.h>
+#include <asm/pci-bridge.h>
+#include <asm/pmac_low_i2c.h>
+
+#include "pmac.h"
+
+#undef DEBUG_FEATURE
+
+#ifdef DEBUG_FEATURE
+#define DBG(fmt...) printk(KERN_DEBUG fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+extern int powersave_lowspeed;
+#endif
+
+extern int powersave_nap;
+extern struct device_node *k2_skiplist[2];
+
+/*
+ * We use a single global lock to protect accesses. Each driver has
+ * to take care of its own locking
+ */
+DEFINE_RAW_SPINLOCK(feature_lock);
+
+#define LOCK(flags)	raw_spin_lock_irqsave(&feature_lock, flags);
+#define UNLOCK(flags)	raw_spin_unlock_irqrestore(&feature_lock, flags);
+
+
+/*
+ * Instance of some macio stuffs
+ */
+struct macio_chip macio_chips[MAX_MACIO_CHIPS];
+
+struct macio_chip *macio_find(struct device_node *child, int type)
+{
+	while(child) {
+		int	i;
+
+		for (i=0; i < MAX_MACIO_CHIPS && macio_chips[i].of_node; i++)
+			if (child == macio_chips[i].of_node &&
+			    (!type || macio_chips[i].type == type))
+				return &macio_chips[i];
+		child = child->parent;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(macio_find);
+
+static const char *macio_names[] =
+{
+	"Unknown",
+	"Grand Central",
+	"OHare",
+	"OHareII",
+	"Heathrow",
+	"Gatwick",
+	"Paddington",
+	"Keylargo",
+	"Pangea",
+	"Intrepid",
+	"K2",
+	"Shasta",
+};
+
+
+struct device_node *uninorth_node;
+u32 __iomem *uninorth_base;
+
+static u32 uninorth_rev;
+static int uninorth_maj;
+static void __iomem *u3_ht_base;
+
+/*
+ * For each motherboard family, we have a table of functions pointers
+ * that handle the various features.
+ */
+
+typedef long (*feature_call)(struct device_node *node, long param, long value);
+
+struct feature_table_entry {
+	unsigned int	selector;
+	feature_call	function;
+};
+
+struct pmac_mb_def
+{
+	const char*			model_string;
+	const char*			model_name;
+	int				model_id;
+	struct feature_table_entry*	features;
+	unsigned long			board_flags;
+};
+static struct pmac_mb_def pmac_mb;
+
+/*
+ * Here are the chip specific feature functions
+ */
+
+#ifndef CONFIG_PPC64
+
+static int simple_feature_tweak(struct device_node *node, int type, int reg,
+				u32 mask, int value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+
+	macio = macio_find(node, type);
+	if (!macio)
+		return -ENODEV;
+	LOCK(flags);
+	if (value)
+		MACIO_BIS(reg, mask);
+	else
+		MACIO_BIC(reg, mask);
+	(void)MACIO_IN32(reg);
+	UNLOCK(flags);
+
+	return 0;
+}
+
+static long ohare_htw_scc_enable(struct device_node *node, long param,
+				 long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		chan_mask;
+	unsigned long		fcr;
+	unsigned long		flags;
+	int			htw, trans;
+	unsigned long		rmask;
+
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	if (of_node_name_eq(node, "ch-a"))
+		chan_mask = MACIO_FLAG_SCCA_ON;
+	else if (of_node_name_eq(node, "ch-b"))
+		chan_mask = MACIO_FLAG_SCCB_ON;
+	else
+		return -ENODEV;
+
+	htw = (macio->type == macio_heathrow || macio->type == macio_paddington
+		|| macio->type == macio_gatwick);
+	/* On these machines, the HRW_SCC_TRANS_EN_N bit mustn't be touched */
+	trans = (pmac_mb.model_id != PMAC_TYPE_YOSEMITE &&
+		 pmac_mb.model_id != PMAC_TYPE_YIKES);
+	if (value) {
+#ifdef CONFIG_ADB_PMU
+		if ((param & 0xfff) == PMAC_SCC_IRDA)
+			pmu_enable_irled(1);
+#endif /* CONFIG_ADB_PMU */
+		LOCK(flags);
+		fcr = MACIO_IN32(OHARE_FCR);
+		/* Check if scc cell need enabling */
+		if (!(fcr & OH_SCC_ENABLE)) {
+			fcr |= OH_SCC_ENABLE;
+			if (htw) {
+				/* Side effect: this will also power up the
+				 * modem, but it's too messy to figure out on which
+				 * ports this controls the transceiver and on which
+				 * it controls the modem
+				 */
+				if (trans)
+					fcr &= ~HRW_SCC_TRANS_EN_N;
+				MACIO_OUT32(OHARE_FCR, fcr);
+				fcr |= (rmask = HRW_RESET_SCC);
+				MACIO_OUT32(OHARE_FCR, fcr);
+			} else {
+				fcr |= (rmask = OH_SCC_RESET);
+				MACIO_OUT32(OHARE_FCR, fcr);
+			}
+			UNLOCK(flags);
+			(void)MACIO_IN32(OHARE_FCR);
+			mdelay(15);
+			LOCK(flags);
+			fcr &= ~rmask;
+			MACIO_OUT32(OHARE_FCR, fcr);
+		}
+		if (chan_mask & MACIO_FLAG_SCCA_ON)
+			fcr |= OH_SCCA_IO;
+		if (chan_mask & MACIO_FLAG_SCCB_ON)
+			fcr |= OH_SCCB_IO;
+		MACIO_OUT32(OHARE_FCR, fcr);
+		macio->flags |= chan_mask;
+		UNLOCK(flags);
+		if (param & PMAC_SCC_FLAG_XMON)
+			macio->flags |= MACIO_FLAG_SCC_LOCKED;
+	} else {
+		if (macio->flags & MACIO_FLAG_SCC_LOCKED)
+			return -EPERM;
+		LOCK(flags);
+		fcr = MACIO_IN32(OHARE_FCR);
+		if (chan_mask & MACIO_FLAG_SCCA_ON)
+			fcr &= ~OH_SCCA_IO;
+		if (chan_mask & MACIO_FLAG_SCCB_ON)
+			fcr &= ~OH_SCCB_IO;
+		MACIO_OUT32(OHARE_FCR, fcr);
+		if ((fcr & (OH_SCCA_IO | OH_SCCB_IO)) == 0) {
+			fcr &= ~OH_SCC_ENABLE;
+			if (htw && trans)
+				fcr |= HRW_SCC_TRANS_EN_N;
+			MACIO_OUT32(OHARE_FCR, fcr);
+		}
+		macio->flags &= ~(chan_mask);
+		UNLOCK(flags);
+		mdelay(10);
+#ifdef CONFIG_ADB_PMU
+		if ((param & 0xfff) == PMAC_SCC_IRDA)
+			pmu_enable_irled(0);
+#endif /* CONFIG_ADB_PMU */
+	}
+	return 0;
+}
+
+static long ohare_floppy_enable(struct device_node *node, long param,
+				long value)
+{
+	return simple_feature_tweak(node, macio_ohare,
+		OHARE_FCR, OH_FLOPPY_ENABLE, value);
+}
+
+static long ohare_mesh_enable(struct device_node *node, long param, long value)
+{
+	return simple_feature_tweak(node, macio_ohare,
+		OHARE_FCR, OH_MESH_ENABLE, value);
+}
+
+static long ohare_ide_enable(struct device_node *node, long param, long value)
+{
+	switch(param) {
+	case 0:
+		/* For some reason, setting the bit in set_initial_features()
+		 * doesn't stick. I'm still investigating... --BenH.
+		 */
+		if (value)
+			simple_feature_tweak(node, macio_ohare,
+				OHARE_FCR, OH_IOBUS_ENABLE, 1);
+		return simple_feature_tweak(node, macio_ohare,
+			OHARE_FCR, OH_IDE0_ENABLE, value);
+	case 1:
+		return simple_feature_tweak(node, macio_ohare,
+			OHARE_FCR, OH_BAY_IDE_ENABLE, value);
+	default:
+		return -ENODEV;
+	}
+}
+
+static long ohare_ide_reset(struct device_node *node, long param, long value)
+{
+	switch(param) {
+	case 0:
+		return simple_feature_tweak(node, macio_ohare,
+			OHARE_FCR, OH_IDE0_RESET_N, !value);
+	case 1:
+		return simple_feature_tweak(node, macio_ohare,
+			OHARE_FCR, OH_IDE1_RESET_N, !value);
+	default:
+		return -ENODEV;
+	}
+}
+
+static long ohare_sleep_state(struct device_node *node, long param, long value)
+{
+	struct macio_chip*	macio = &macio_chips[0];
+
+	if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0)
+		return -EPERM;
+	if (value == 1) {
+		MACIO_BIC(OHARE_FCR, OH_IOBUS_ENABLE);
+	} else if (value == 0) {
+		MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE);
+	}
+
+	return 0;
+}
+
+static long heathrow_modem_enable(struct device_node *node, long param,
+				  long value)
+{
+	struct macio_chip*	macio;
+	u8			gpio;
+	unsigned long		flags;
+
+	macio = macio_find(node, macio_unknown);
+	if (!macio)
+		return -ENODEV;
+	gpio = MACIO_IN8(HRW_GPIO_MODEM_RESET) & ~1;
+	if (!value) {
+		LOCK(flags);
+		MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio);
+		UNLOCK(flags);
+		(void)MACIO_IN8(HRW_GPIO_MODEM_RESET);
+		mdelay(250);
+	}
+	if (pmac_mb.model_id != PMAC_TYPE_YOSEMITE &&
+	    pmac_mb.model_id != PMAC_TYPE_YIKES) {
+		LOCK(flags);
+		if (value)
+			MACIO_BIC(HEATHROW_FCR, HRW_SCC_TRANS_EN_N);
+		else
+			MACIO_BIS(HEATHROW_FCR, HRW_SCC_TRANS_EN_N);
+		UNLOCK(flags);
+		(void)MACIO_IN32(HEATHROW_FCR);
+		mdelay(250);
+	}
+	if (value) {
+		LOCK(flags);
+		MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio | 1);
+		(void)MACIO_IN8(HRW_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio);
+		(void)MACIO_IN8(HRW_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio | 1);
+		(void)MACIO_IN8(HRW_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250);
+	}
+	return 0;
+}
+
+static long heathrow_floppy_enable(struct device_node *node, long param,
+				   long value)
+{
+	return simple_feature_tweak(node, macio_unknown,
+		HEATHROW_FCR,
+		HRW_SWIM_ENABLE|HRW_BAY_FLOPPY_ENABLE,
+		value);
+}
+
+static long heathrow_mesh_enable(struct device_node *node, long param,
+				 long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+
+	macio = macio_find(node, macio_unknown);
+	if (!macio)
+		return -ENODEV;
+	LOCK(flags);
+	/* Set clear mesh cell enable */
+	if (value)
+		MACIO_BIS(HEATHROW_FCR, HRW_MESH_ENABLE);
+	else
+		MACIO_BIC(HEATHROW_FCR, HRW_MESH_ENABLE);
+	(void)MACIO_IN32(HEATHROW_FCR);
+	udelay(10);
+	/* Set/Clear termination power */
+	if (value)
+		MACIO_BIC(HEATHROW_MBCR, 0x04000000);
+	else
+		MACIO_BIS(HEATHROW_MBCR, 0x04000000);
+	(void)MACIO_IN32(HEATHROW_MBCR);
+	udelay(10);
+	UNLOCK(flags);
+
+	return 0;
+}
+
+static long heathrow_ide_enable(struct device_node *node, long param,
+				long value)
+{
+	switch(param) {
+	case 0:
+		return simple_feature_tweak(node, macio_unknown,
+			HEATHROW_FCR, HRW_IDE0_ENABLE, value);
+	case 1:
+		return simple_feature_tweak(node, macio_unknown,
+			HEATHROW_FCR, HRW_BAY_IDE_ENABLE, value);
+	default:
+		return -ENODEV;
+	}
+}
+
+static long heathrow_ide_reset(struct device_node *node, long param,
+			       long value)
+{
+	switch(param) {
+	case 0:
+		return simple_feature_tweak(node, macio_unknown,
+			HEATHROW_FCR, HRW_IDE0_RESET_N, !value);
+	case 1:
+		return simple_feature_tweak(node, macio_unknown,
+			HEATHROW_FCR, HRW_IDE1_RESET_N, !value);
+	default:
+		return -ENODEV;
+	}
+}
+
+static long heathrow_bmac_enable(struct device_node *node, long param,
+				 long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	if (value) {
+		LOCK(flags);
+		MACIO_BIS(HEATHROW_FCR, HRW_BMAC_IO_ENABLE);
+		MACIO_BIS(HEATHROW_FCR, HRW_BMAC_RESET);
+		UNLOCK(flags);
+		(void)MACIO_IN32(HEATHROW_FCR);
+		mdelay(10);
+		LOCK(flags);
+		MACIO_BIC(HEATHROW_FCR, HRW_BMAC_RESET);
+		UNLOCK(flags);
+		(void)MACIO_IN32(HEATHROW_FCR);
+		mdelay(10);
+	} else {
+		LOCK(flags);
+		MACIO_BIC(HEATHROW_FCR, HRW_BMAC_IO_ENABLE);
+		UNLOCK(flags);
+	}
+	return 0;
+}
+
+static long heathrow_sound_enable(struct device_node *node, long param,
+				  long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+
+	/* B&W G3 and Yikes don't support that properly (the
+	 * sound appear to never come back after being shut down).
+	 */
+	if (pmac_mb.model_id == PMAC_TYPE_YOSEMITE ||
+	    pmac_mb.model_id == PMAC_TYPE_YIKES)
+		return 0;
+
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	if (value) {
+		LOCK(flags);
+		MACIO_BIS(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE);
+		MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N);
+		UNLOCK(flags);
+		(void)MACIO_IN32(HEATHROW_FCR);
+	} else {
+		LOCK(flags);
+		MACIO_BIS(HEATHROW_FCR, HRW_SOUND_POWER_N);
+		MACIO_BIC(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE);
+		UNLOCK(flags);
+	}
+	return 0;
+}
+
+static u32 save_fcr[6];
+static u32 save_mbcr;
+static struct dbdma_regs save_dbdma[13];
+static struct dbdma_regs save_alt_dbdma[13];
+
+static void dbdma_save(struct macio_chip *macio, struct dbdma_regs *save)
+{
+	int i;
+
+	/* Save state & config of DBDMA channels */
+	for (i = 0; i < 13; i++) {
+		volatile struct dbdma_regs __iomem * chan = (void __iomem *)
+			(macio->base + ((0x8000+i*0x100)>>2));
+		save[i].cmdptr_hi = in_le32(&chan->cmdptr_hi);
+		save[i].cmdptr = in_le32(&chan->cmdptr);
+		save[i].intr_sel = in_le32(&chan->intr_sel);
+		save[i].br_sel = in_le32(&chan->br_sel);
+		save[i].wait_sel = in_le32(&chan->wait_sel);
+	}
+}
+
+static void dbdma_restore(struct macio_chip *macio, struct dbdma_regs *save)
+{
+	int i;
+
+	/* Save state & config of DBDMA channels */
+	for (i = 0; i < 13; i++) {
+		volatile struct dbdma_regs __iomem * chan = (void __iomem *)
+			(macio->base + ((0x8000+i*0x100)>>2));
+		out_le32(&chan->control, (ACTIVE|DEAD|WAKE|FLUSH|PAUSE|RUN)<<16);
+		while (in_le32(&chan->status) & ACTIVE)
+			mb();
+		out_le32(&chan->cmdptr_hi, save[i].cmdptr_hi);
+		out_le32(&chan->cmdptr, save[i].cmdptr);
+		out_le32(&chan->intr_sel, save[i].intr_sel);
+		out_le32(&chan->br_sel, save[i].br_sel);
+		out_le32(&chan->wait_sel, save[i].wait_sel);
+	}
+}
+
+static void heathrow_sleep(struct macio_chip *macio, int secondary)
+{
+	if (secondary) {
+		dbdma_save(macio, save_alt_dbdma);
+		save_fcr[2] = MACIO_IN32(0x38);
+		save_fcr[3] = MACIO_IN32(0x3c);
+	} else {
+		dbdma_save(macio, save_dbdma);
+		save_fcr[0] = MACIO_IN32(0x38);
+		save_fcr[1] = MACIO_IN32(0x3c);
+		save_mbcr = MACIO_IN32(0x34);
+		/* Make sure sound is shut down */
+		MACIO_BIS(HEATHROW_FCR, HRW_SOUND_POWER_N);
+		MACIO_BIC(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE);
+		/* This seems to be necessary as well or the fan
+		 * keeps coming up and battery drains fast */
+		MACIO_BIC(HEATHROW_FCR, HRW_IOBUS_ENABLE);
+		MACIO_BIC(HEATHROW_FCR, HRW_IDE0_RESET_N);
+		/* Make sure eth is down even if module or sleep
+		 * won't work properly */
+		MACIO_BIC(HEATHROW_FCR, HRW_BMAC_IO_ENABLE | HRW_BMAC_RESET);
+	}
+	/* Make sure modem is shut down */
+	MACIO_OUT8(HRW_GPIO_MODEM_RESET,
+		MACIO_IN8(HRW_GPIO_MODEM_RESET) & ~1);
+	MACIO_BIS(HEATHROW_FCR, HRW_SCC_TRANS_EN_N);
+	MACIO_BIC(HEATHROW_FCR, OH_SCCA_IO|OH_SCCB_IO|HRW_SCC_ENABLE);
+
+	/* Let things settle */
+	(void)MACIO_IN32(HEATHROW_FCR);
+}
+
+static void heathrow_wakeup(struct macio_chip *macio, int secondary)
+{
+	if (secondary) {
+		MACIO_OUT32(0x38, save_fcr[2]);
+		(void)MACIO_IN32(0x38);
+		mdelay(1);
+		MACIO_OUT32(0x3c, save_fcr[3]);
+		(void)MACIO_IN32(0x38);
+		mdelay(10);
+		dbdma_restore(macio, save_alt_dbdma);
+	} else {
+		MACIO_OUT32(0x38, save_fcr[0] | HRW_IOBUS_ENABLE);
+		(void)MACIO_IN32(0x38);
+		mdelay(1);
+		MACIO_OUT32(0x3c, save_fcr[1]);
+		(void)MACIO_IN32(0x38);
+		mdelay(1);
+		MACIO_OUT32(0x34, save_mbcr);
+		(void)MACIO_IN32(0x38);
+		mdelay(10);
+		dbdma_restore(macio, save_dbdma);
+	}
+}
+
+static long heathrow_sleep_state(struct device_node *node, long param,
+				 long value)
+{
+	if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0)
+		return -EPERM;
+	if (value == 1) {
+		if (macio_chips[1].type == macio_gatwick)
+			heathrow_sleep(&macio_chips[0], 1);
+		heathrow_sleep(&macio_chips[0], 0);
+	} else if (value == 0) {
+		heathrow_wakeup(&macio_chips[0], 0);
+		if (macio_chips[1].type == macio_gatwick)
+			heathrow_wakeup(&macio_chips[0], 1);
+	}
+	return 0;
+}
+
+static long core99_scc_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+	unsigned long		chan_mask;
+	u32			fcr;
+
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	if (of_node_name_eq(node, "ch-a"))
+		chan_mask = MACIO_FLAG_SCCA_ON;
+	else if (of_node_name_eq(node, "ch-b"))
+		chan_mask = MACIO_FLAG_SCCB_ON;
+	else
+		return -ENODEV;
+
+	if (value) {
+		int need_reset_scc = 0;
+		int need_reset_irda = 0;
+
+		LOCK(flags);
+		fcr = MACIO_IN32(KEYLARGO_FCR0);
+		/* Check if scc cell need enabling */
+		if (!(fcr & KL0_SCC_CELL_ENABLE)) {
+			fcr |= KL0_SCC_CELL_ENABLE;
+			need_reset_scc = 1;
+		}
+		if (chan_mask & MACIO_FLAG_SCCA_ON) {
+			fcr |= KL0_SCCA_ENABLE;
+			/* Don't enable line drivers for I2S modem */
+			if ((param & 0xfff) == PMAC_SCC_I2S1)
+				fcr &= ~KL0_SCC_A_INTF_ENABLE;
+			else
+				fcr |= KL0_SCC_A_INTF_ENABLE;
+		}
+		if (chan_mask & MACIO_FLAG_SCCB_ON) {
+			fcr |= KL0_SCCB_ENABLE;
+			/* Perform irda specific inits */
+			if ((param & 0xfff) == PMAC_SCC_IRDA) {
+				fcr &= ~KL0_SCC_B_INTF_ENABLE;
+				fcr |= KL0_IRDA_ENABLE;
+				fcr |= KL0_IRDA_CLK32_ENABLE | KL0_IRDA_CLK19_ENABLE;
+				fcr |= KL0_IRDA_SOURCE1_SEL;
+				fcr &= ~(KL0_IRDA_FAST_CONNECT|KL0_IRDA_DEFAULT1|KL0_IRDA_DEFAULT0);
+				fcr &= ~(KL0_IRDA_SOURCE2_SEL|KL0_IRDA_HIGH_BAND);
+				need_reset_irda = 1;
+			} else
+				fcr |= KL0_SCC_B_INTF_ENABLE;
+		}
+		MACIO_OUT32(KEYLARGO_FCR0, fcr);
+		macio->flags |= chan_mask;
+		if (need_reset_scc)  {
+			MACIO_BIS(KEYLARGO_FCR0, KL0_SCC_RESET);
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+			UNLOCK(flags);
+			mdelay(15);
+			LOCK(flags);
+			MACIO_BIC(KEYLARGO_FCR0, KL0_SCC_RESET);
+		}
+		if (need_reset_irda)  {
+			MACIO_BIS(KEYLARGO_FCR0, KL0_IRDA_RESET);
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+			UNLOCK(flags);
+			mdelay(15);
+			LOCK(flags);
+			MACIO_BIC(KEYLARGO_FCR0, KL0_IRDA_RESET);
+		}
+		UNLOCK(flags);
+		if (param & PMAC_SCC_FLAG_XMON)
+			macio->flags |= MACIO_FLAG_SCC_LOCKED;
+	} else {
+		if (macio->flags & MACIO_FLAG_SCC_LOCKED)
+			return -EPERM;
+		LOCK(flags);
+		fcr = MACIO_IN32(KEYLARGO_FCR0);
+		if (chan_mask & MACIO_FLAG_SCCA_ON)
+			fcr &= ~KL0_SCCA_ENABLE;
+		if (chan_mask & MACIO_FLAG_SCCB_ON) {
+			fcr &= ~KL0_SCCB_ENABLE;
+			/* Perform irda specific clears */
+			if ((param & 0xfff) == PMAC_SCC_IRDA) {
+				fcr &= ~KL0_IRDA_ENABLE;
+				fcr &= ~(KL0_IRDA_CLK32_ENABLE | KL0_IRDA_CLK19_ENABLE);
+				fcr &= ~(KL0_IRDA_FAST_CONNECT|KL0_IRDA_DEFAULT1|KL0_IRDA_DEFAULT0);
+				fcr &= ~(KL0_IRDA_SOURCE1_SEL|KL0_IRDA_SOURCE2_SEL|KL0_IRDA_HIGH_BAND);
+			}
+		}
+		MACIO_OUT32(KEYLARGO_FCR0, fcr);
+		if ((fcr & (KL0_SCCA_ENABLE | KL0_SCCB_ENABLE)) == 0) {
+			fcr &= ~KL0_SCC_CELL_ENABLE;
+			MACIO_OUT32(KEYLARGO_FCR0, fcr);
+		}
+		macio->flags &= ~(chan_mask);
+		UNLOCK(flags);
+		mdelay(10);
+	}
+	return 0;
+}
+
+static long
+core99_modem_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip*	macio;
+	u8			gpio;
+	unsigned long		flags;
+
+	/* Hack for internal USB modem */
+	if (node == NULL) {
+		if (macio_chips[0].type != macio_keylargo)
+			return -ENODEV;
+		node = macio_chips[0].of_node;
+	}
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	gpio = MACIO_IN8(KL_GPIO_MODEM_RESET);
+	gpio |= KEYLARGO_GPIO_OUTPUT_ENABLE;
+	gpio &= ~KEYLARGO_GPIO_OUTOUT_DATA;
+
+	if (!value) {
+		LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
+		UNLOCK(flags);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		mdelay(250);
+	}
+	LOCK(flags);
+	if (value) {
+		MACIO_BIC(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
+		UNLOCK(flags);
+		(void)MACIO_IN32(KEYLARGO_FCR2);
+		mdelay(250);
+	} else {
+		MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
+		UNLOCK(flags);
+	}
+	if (value) {
+		LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250);
+	}
+	return 0;
+}
+
+static long
+pangea_modem_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip*	macio;
+	u8			gpio;
+	unsigned long		flags;
+
+	/* Hack for internal USB modem */
+	if (node == NULL) {
+		if (macio_chips[0].type != macio_pangea &&
+		    macio_chips[0].type != macio_intrepid)
+			return -ENODEV;
+		node = macio_chips[0].of_node;
+	}
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	gpio = MACIO_IN8(KL_GPIO_MODEM_RESET);
+	gpio |= KEYLARGO_GPIO_OUTPUT_ENABLE;
+	gpio &= ~KEYLARGO_GPIO_OUTOUT_DATA;
+
+	if (!value) {
+		LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
+		UNLOCK(flags);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		mdelay(250);
+	}
+	LOCK(flags);
+	if (value) {
+		MACIO_OUT8(KL_GPIO_MODEM_POWER,
+			KEYLARGO_GPIO_OUTPUT_ENABLE);
+		UNLOCK(flags);
+		(void)MACIO_IN32(KEYLARGO_FCR2);
+		mdelay(250);
+	} else {
+		MACIO_OUT8(KL_GPIO_MODEM_POWER,
+			KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA);
+		UNLOCK(flags);
+	}
+	if (value) {
+		LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250);
+	}
+	return 0;
+}
+
+static long
+core99_ata100_enable(struct device_node *node, long value)
+{
+	unsigned long flags;
+	struct pci_dev *pdev = NULL;
+	u8 pbus, pid;
+	int rc;
+
+	if (uninorth_rev < 0x24)
+		return -ENODEV;
+
+	LOCK(flags);
+	if (value)
+		UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_ATA100);
+	else
+		UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_ATA100);
+	(void)UN_IN(UNI_N_CLOCK_CNTL);
+	UNLOCK(flags);
+	udelay(20);
+
+	if (value) {
+		if (pci_device_from_OF_node(node, &pbus, &pid) == 0)
+			pdev = pci_get_domain_bus_and_slot(0, pbus, pid);
+		if (pdev == NULL)
+			return 0;
+		rc = pci_enable_device(pdev);
+		if (rc == 0)
+			pci_set_master(pdev);
+		pci_dev_put(pdev);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+static long
+core99_ide_enable(struct device_node *node, long param, long value)
+{
+	/* Bus ID 0 to 2 are KeyLargo based IDE, busID 3 is U2
+	 * based ata-100
+	 */
+	switch(param) {
+	    case 0:
+		return simple_feature_tweak(node, macio_unknown,
+			KEYLARGO_FCR1, KL1_EIDE0_ENABLE, value);
+	    case 1:
+		return simple_feature_tweak(node, macio_unknown,
+			KEYLARGO_FCR1, KL1_EIDE1_ENABLE, value);
+	    case 2:
+		return simple_feature_tweak(node, macio_unknown,
+			KEYLARGO_FCR1, KL1_UIDE_ENABLE, value);
+	    case 3:
+		return core99_ata100_enable(node, value);
+	    default:
+		return -ENODEV;
+	}
+}
+
+static long
+core99_ide_reset(struct device_node *node, long param, long value)
+{
+	switch(param) {
+	    case 0:
+		return simple_feature_tweak(node, macio_unknown,
+			KEYLARGO_FCR1, KL1_EIDE0_RESET_N, !value);
+	    case 1:
+		return simple_feature_tweak(node, macio_unknown,
+			KEYLARGO_FCR1, KL1_EIDE1_RESET_N, !value);
+	    case 2:
+		return simple_feature_tweak(node, macio_unknown,
+			KEYLARGO_FCR1, KL1_UIDE_RESET_N, !value);
+	    default:
+		return -ENODEV;
+	}
+}
+
+static long
+core99_gmac_enable(struct device_node *node, long param, long value)
+{
+	unsigned long flags;
+
+	LOCK(flags);
+	if (value)
+		UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_GMAC);
+	else
+		UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_GMAC);
+	(void)UN_IN(UNI_N_CLOCK_CNTL);
+	UNLOCK(flags);
+	udelay(20);
+
+	return 0;
+}
+
+static long
+core99_gmac_phy_reset(struct device_node *node, long param, long value)
+{
+	unsigned long flags;
+	struct macio_chip *macio;
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
+		return -ENODEV;
+
+	LOCK(flags);
+	MACIO_OUT8(KL_GPIO_ETH_PHY_RESET, KEYLARGO_GPIO_OUTPUT_ENABLE);
+	(void)MACIO_IN8(KL_GPIO_ETH_PHY_RESET);
+	UNLOCK(flags);
+	mdelay(10);
+	LOCK(flags);
+	MACIO_OUT8(KL_GPIO_ETH_PHY_RESET, /*KEYLARGO_GPIO_OUTPUT_ENABLE | */
+		KEYLARGO_GPIO_OUTOUT_DATA);
+	UNLOCK(flags);
+	mdelay(10);
+
+	return 0;
+}
+
+static long
+core99_sound_chip_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+
+	/* Do a better probe code, screamer G4 desktops &
+	 * iMacs can do that too, add a recalibrate  in
+	 * the driver as well
+	 */
+	if (pmac_mb.model_id == PMAC_TYPE_PISMO ||
+	    pmac_mb.model_id == PMAC_TYPE_TITANIUM) {
+		LOCK(flags);
+		if (value)
+			MACIO_OUT8(KL_GPIO_SOUND_POWER,
+				KEYLARGO_GPIO_OUTPUT_ENABLE |
+				KEYLARGO_GPIO_OUTOUT_DATA);
+		else
+			MACIO_OUT8(KL_GPIO_SOUND_POWER,
+				KEYLARGO_GPIO_OUTPUT_ENABLE);
+		(void)MACIO_IN8(KL_GPIO_SOUND_POWER);
+		UNLOCK(flags);
+	}
+	return 0;
+}
+
+static long
+core99_airport_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+	int			state;
+
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+
+	/* Hint: we allow passing of macio itself for the sake of the
+	 * sleep code
+	 */
+	if (node != macio->of_node &&
+	    (!node->parent || node->parent != macio->of_node))
+		return -ENODEV;
+	state = (macio->flags & MACIO_FLAG_AIRPORT_ON) != 0;
+	if (value == state)
+		return 0;
+	if (value) {
+		/* This code is a reproduction of OF enable-cardslot
+		 * and init-wireless methods, slightly hacked until
+		 * I got it working.
+		 */
+		LOCK(flags);
+		MACIO_OUT8(KEYLARGO_GPIO_0+0xf, 5);
+		(void)MACIO_IN8(KEYLARGO_GPIO_0+0xf);
+		UNLOCK(flags);
+		mdelay(10);
+		LOCK(flags);
+		MACIO_OUT8(KEYLARGO_GPIO_0+0xf, 4);
+		(void)MACIO_IN8(KEYLARGO_GPIO_0+0xf);
+		UNLOCK(flags);
+
+		mdelay(10);
+
+		LOCK(flags);
+		MACIO_BIC(KEYLARGO_FCR2, KL2_CARDSEL_16);
+		(void)MACIO_IN32(KEYLARGO_FCR2);
+		udelay(10);
+		MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xb, 0);
+		(void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xb);
+		udelay(10);
+		MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xa, 0x28);
+		(void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xa);
+		udelay(10);
+		MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xd, 0x28);
+		(void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xd);
+		udelay(10);
+		MACIO_OUT8(KEYLARGO_GPIO_0+0xd, 0x28);
+		(void)MACIO_IN8(KEYLARGO_GPIO_0+0xd);
+		udelay(10);
+		MACIO_OUT8(KEYLARGO_GPIO_0+0xe, 0x28);
+		(void)MACIO_IN8(KEYLARGO_GPIO_0+0xe);
+		UNLOCK(flags);
+		udelay(10);
+		MACIO_OUT32(0x1c000, 0);
+		mdelay(1);
+		MACIO_OUT8(0x1a3e0, 0x41);
+		(void)MACIO_IN8(0x1a3e0);
+		udelay(10);
+		LOCK(flags);
+		MACIO_BIS(KEYLARGO_FCR2, KL2_CARDSEL_16);
+		(void)MACIO_IN32(KEYLARGO_FCR2);
+		UNLOCK(flags);
+		mdelay(100);
+
+		macio->flags |= MACIO_FLAG_AIRPORT_ON;
+	} else {
+		LOCK(flags);
+		MACIO_BIC(KEYLARGO_FCR2, KL2_CARDSEL_16);
+		(void)MACIO_IN32(KEYLARGO_FCR2);
+		MACIO_OUT8(KL_GPIO_AIRPORT_0, 0);
+		MACIO_OUT8(KL_GPIO_AIRPORT_1, 0);
+		MACIO_OUT8(KL_GPIO_AIRPORT_2, 0);
+		MACIO_OUT8(KL_GPIO_AIRPORT_3, 0);
+		MACIO_OUT8(KL_GPIO_AIRPORT_4, 0);
+		(void)MACIO_IN8(KL_GPIO_AIRPORT_4);
+		UNLOCK(flags);
+
+		macio->flags &= ~MACIO_FLAG_AIRPORT_ON;
+	}
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+static long
+core99_reset_cpu(struct device_node *node, long param, long value)
+{
+	unsigned int reset_io = 0;
+	unsigned long flags;
+	struct macio_chip *macio;
+	struct device_node *np;
+	const int dflt_reset_lines[] = {	KL_GPIO_RESET_CPU0,
+						KL_GPIO_RESET_CPU1,
+						KL_GPIO_RESET_CPU2,
+						KL_GPIO_RESET_CPU3 };
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo)
+		return -ENODEV;
+
+	for_each_of_cpu_node(np) {
+		const u32 *rst = of_get_property(np, "soft-reset", NULL);
+		if (!rst)
+			continue;
+		if (param == of_get_cpu_hwid(np, 0)) {
+			of_node_put(np);
+			reset_io = *rst;
+			break;
+		}
+	}
+	if (np == NULL || reset_io == 0)
+		reset_io = dflt_reset_lines[param];
+
+	LOCK(flags);
+	MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE);
+	(void)MACIO_IN8(reset_io);
+	udelay(1);
+	MACIO_OUT8(reset_io, 0);
+	(void)MACIO_IN8(reset_io);
+	UNLOCK(flags);
+
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+static long
+core99_usb_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip *macio;
+	unsigned long flags;
+	const char *prop;
+	int number;
+	u32 reg;
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
+		return -ENODEV;
+
+	prop = of_get_property(node, "AAPL,clock-id", NULL);
+	if (!prop)
+		return -ENODEV;
+	if (strncmp(prop, "usb0u048", 8) == 0)
+		number = 0;
+	else if (strncmp(prop, "usb1u148", 8) == 0)
+		number = 2;
+	else if (strncmp(prop, "usb2u248", 8) == 0)
+		number = 4;
+	else
+		return -ENODEV;
+
+	/* Sorry for the brute-force locking, but this is only used during
+	 * sleep and the timing seem to be critical
+	 */
+	LOCK(flags);
+	if (value) {
+		/* Turn ON */
+		if (number == 0) {
+			MACIO_BIC(KEYLARGO_FCR0, (KL0_USB0_PAD_SUSPEND0 | KL0_USB0_PAD_SUSPEND1));
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+			UNLOCK(flags);
+			mdelay(1);
+			LOCK(flags);
+			MACIO_BIS(KEYLARGO_FCR0, KL0_USB0_CELL_ENABLE);
+		} else if (number == 2) {
+			MACIO_BIC(KEYLARGO_FCR0, (KL0_USB1_PAD_SUSPEND0 | KL0_USB1_PAD_SUSPEND1));
+			UNLOCK(flags);
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+			mdelay(1);
+			LOCK(flags);
+			MACIO_BIS(KEYLARGO_FCR0, KL0_USB1_CELL_ENABLE);
+		} else if (number == 4) {
+			MACIO_BIC(KEYLARGO_FCR1, (KL1_USB2_PAD_SUSPEND0 | KL1_USB2_PAD_SUSPEND1));
+			UNLOCK(flags);
+			(void)MACIO_IN32(KEYLARGO_FCR1);
+			mdelay(1);
+			LOCK(flags);
+			MACIO_BIS(KEYLARGO_FCR1, KL1_USB2_CELL_ENABLE);
+		}
+		if (number < 4) {
+			reg = MACIO_IN32(KEYLARGO_FCR4);
+			reg &=	~(KL4_PORT_WAKEUP_ENABLE(number) | KL4_PORT_RESUME_WAKE_EN(number) |
+				KL4_PORT_CONNECT_WAKE_EN(number) | KL4_PORT_DISCONNECT_WAKE_EN(number));
+			reg &=	~(KL4_PORT_WAKEUP_ENABLE(number+1) | KL4_PORT_RESUME_WAKE_EN(number+1) |
+				KL4_PORT_CONNECT_WAKE_EN(number+1) | KL4_PORT_DISCONNECT_WAKE_EN(number+1));
+			MACIO_OUT32(KEYLARGO_FCR4, reg);
+			(void)MACIO_IN32(KEYLARGO_FCR4);
+			udelay(10);
+		} else {
+			reg = MACIO_IN32(KEYLARGO_FCR3);
+			reg &=	~(KL3_IT_PORT_WAKEUP_ENABLE(0) | KL3_IT_PORT_RESUME_WAKE_EN(0) |
+				KL3_IT_PORT_CONNECT_WAKE_EN(0) | KL3_IT_PORT_DISCONNECT_WAKE_EN(0));
+			reg &=	~(KL3_IT_PORT_WAKEUP_ENABLE(1) | KL3_IT_PORT_RESUME_WAKE_EN(1) |
+				KL3_IT_PORT_CONNECT_WAKE_EN(1) | KL3_IT_PORT_DISCONNECT_WAKE_EN(1));
+			MACIO_OUT32(KEYLARGO_FCR3, reg);
+			(void)MACIO_IN32(KEYLARGO_FCR3);
+			udelay(10);
+		}
+		if (macio->type == macio_intrepid) {
+			/* wait for clock stopped bits to clear */
+			u32 test0 = 0, test1 = 0;
+			u32 status0, status1;
+			int timeout = 1000;
+
+			UNLOCK(flags);
+			switch (number) {
+			case 0:
+				test0 = UNI_N_CLOCK_STOPPED_USB0;
+				test1 = UNI_N_CLOCK_STOPPED_USB0PCI;
+				break;
+			case 2:
+				test0 = UNI_N_CLOCK_STOPPED_USB1;
+				test1 = UNI_N_CLOCK_STOPPED_USB1PCI;
+				break;
+			case 4:
+				test0 = UNI_N_CLOCK_STOPPED_USB2;
+				test1 = UNI_N_CLOCK_STOPPED_USB2PCI;
+				break;
+			}
+			do {
+				if (--timeout <= 0) {
+					printk(KERN_ERR "core99_usb_enable: "
+					       "Timeout waiting for clocks\n");
+					break;
+				}
+				mdelay(1);
+				status0 = UN_IN(UNI_N_CLOCK_STOP_STATUS0);
+				status1 = UN_IN(UNI_N_CLOCK_STOP_STATUS1);
+			} while ((status0 & test0) | (status1 & test1));
+			LOCK(flags);
+		}
+	} else {
+		/* Turn OFF */
+		if (number < 4) {
+			reg = MACIO_IN32(KEYLARGO_FCR4);
+			reg |=	KL4_PORT_WAKEUP_ENABLE(number) | KL4_PORT_RESUME_WAKE_EN(number) |
+				KL4_PORT_CONNECT_WAKE_EN(number) | KL4_PORT_DISCONNECT_WAKE_EN(number);
+			reg |=	KL4_PORT_WAKEUP_ENABLE(number+1) | KL4_PORT_RESUME_WAKE_EN(number+1) |
+				KL4_PORT_CONNECT_WAKE_EN(number+1) | KL4_PORT_DISCONNECT_WAKE_EN(number+1);
+			MACIO_OUT32(KEYLARGO_FCR4, reg);
+			(void)MACIO_IN32(KEYLARGO_FCR4);
+			udelay(1);
+		} else {
+			reg = MACIO_IN32(KEYLARGO_FCR3);
+			reg |=	KL3_IT_PORT_WAKEUP_ENABLE(0) | KL3_IT_PORT_RESUME_WAKE_EN(0) |
+				KL3_IT_PORT_CONNECT_WAKE_EN(0) | KL3_IT_PORT_DISCONNECT_WAKE_EN(0);
+			reg |=	KL3_IT_PORT_WAKEUP_ENABLE(1) | KL3_IT_PORT_RESUME_WAKE_EN(1) |
+				KL3_IT_PORT_CONNECT_WAKE_EN(1) | KL3_IT_PORT_DISCONNECT_WAKE_EN(1);
+			MACIO_OUT32(KEYLARGO_FCR3, reg);
+			(void)MACIO_IN32(KEYLARGO_FCR3);
+			udelay(1);
+		}
+		if (number == 0) {
+			if (macio->type != macio_intrepid)
+				MACIO_BIC(KEYLARGO_FCR0, KL0_USB0_CELL_ENABLE);
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+			udelay(1);
+			MACIO_BIS(KEYLARGO_FCR0, (KL0_USB0_PAD_SUSPEND0 | KL0_USB0_PAD_SUSPEND1));
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+		} else if (number == 2) {
+			if (macio->type != macio_intrepid)
+				MACIO_BIC(KEYLARGO_FCR0, KL0_USB1_CELL_ENABLE);
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+			udelay(1);
+			MACIO_BIS(KEYLARGO_FCR0, (KL0_USB1_PAD_SUSPEND0 | KL0_USB1_PAD_SUSPEND1));
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+		} else if (number == 4) {
+			udelay(1);
+			MACIO_BIS(KEYLARGO_FCR1, (KL1_USB2_PAD_SUSPEND0 | KL1_USB2_PAD_SUSPEND1));
+			(void)MACIO_IN32(KEYLARGO_FCR1);
+		}
+		udelay(1);
+	}
+	UNLOCK(flags);
+
+	return 0;
+}
+
+static long
+core99_firewire_enable(struct device_node *node, long param, long value)
+{
+	unsigned long flags;
+	struct macio_chip *macio;
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
+		return -ENODEV;
+	if (!(macio->flags & MACIO_FLAG_FW_SUPPORTED))
+		return -ENODEV;
+
+	LOCK(flags);
+	if (value) {
+		UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_FW);
+		(void)UN_IN(UNI_N_CLOCK_CNTL);
+	} else {
+		UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_FW);
+		(void)UN_IN(UNI_N_CLOCK_CNTL);
+	}
+	UNLOCK(flags);
+	mdelay(1);
+
+	return 0;
+}
+
+static long
+core99_firewire_cable_power(struct device_node *node, long param, long value)
+{
+	unsigned long flags;
+	struct macio_chip *macio;
+
+	/* Trick: we allow NULL node */
+	if ((pmac_mb.board_flags & PMAC_MB_HAS_FW_POWER) == 0)
+		return -ENODEV;
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
+		return -ENODEV;
+	if (!(macio->flags & MACIO_FLAG_FW_SUPPORTED))
+		return -ENODEV;
+
+	LOCK(flags);
+	if (value) {
+		MACIO_OUT8(KL_GPIO_FW_CABLE_POWER , 0);
+		MACIO_IN8(KL_GPIO_FW_CABLE_POWER);
+		udelay(10);
+	} else {
+		MACIO_OUT8(KL_GPIO_FW_CABLE_POWER , 4);
+		MACIO_IN8(KL_GPIO_FW_CABLE_POWER); udelay(10);
+	}
+	UNLOCK(flags);
+	mdelay(1);
+
+	return 0;
+}
+
+static long
+intrepid_aack_delay_enable(struct device_node *node, long param, long value)
+{
+	unsigned long flags;
+
+	if (uninorth_rev < 0xd2)
+		return -ENODEV;
+
+	LOCK(flags);
+	if (param)
+		UN_BIS(UNI_N_AACK_DELAY, UNI_N_AACK_DELAY_ENABLE);
+	else
+		UN_BIC(UNI_N_AACK_DELAY, UNI_N_AACK_DELAY_ENABLE);
+	UNLOCK(flags);
+
+	return 0;
+}
+
+
+#endif /* CONFIG_PPC64 */
+
+static long
+core99_read_gpio(struct device_node *node, long param, long value)
+{
+	struct macio_chip *macio = &macio_chips[0];
+
+	return MACIO_IN8(param);
+}
+
+
+static long
+core99_write_gpio(struct device_node *node, long param, long value)
+{
+	struct macio_chip *macio = &macio_chips[0];
+
+	MACIO_OUT8(param, (u8)(value & 0xff));
+	return 0;
+}
+
+#ifdef CONFIG_PPC64
+static long g5_gmac_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip *macio = &macio_chips[0];
+	unsigned long flags;
+
+	if (node == NULL)
+		return -ENODEV;
+
+	LOCK(flags);
+	if (value) {
+		MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE);
+		mb();
+		k2_skiplist[0] = NULL;
+	} else {
+		k2_skiplist[0] = node;
+		mb();
+		MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE);
+	}
+	
+	UNLOCK(flags);
+	mdelay(1);
+
+	return 0;
+}
+
+static long g5_fw_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip *macio = &macio_chips[0];
+	unsigned long flags;
+
+	if (node == NULL)
+		return -ENODEV;
+
+	LOCK(flags);
+	if (value) {
+		MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE);
+		mb();
+		k2_skiplist[1] = NULL;
+	} else {
+		k2_skiplist[1] = node;
+		mb();
+		MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE);
+	}
+	
+	UNLOCK(flags);
+	mdelay(1);
+
+	return 0;
+}
+
+static long g5_mpic_enable(struct device_node *node, long param, long value)
+{
+	unsigned long flags;
+	struct device_node *parent = of_get_parent(node);
+	int is_u3;
+
+	if (parent == NULL)
+		return 0;
+	is_u3 = of_node_name_eq(parent, "u3") || of_node_name_eq(parent, "u4");
+	of_node_put(parent);
+	if (!is_u3)
+		return 0;
+
+	LOCK(flags);
+	UN_BIS(U3_TOGGLE_REG, U3_MPIC_RESET | U3_MPIC_OUTPUT_ENABLE);
+	UNLOCK(flags);
+
+	return 0;
+}
+
+static long g5_eth_phy_reset(struct device_node *node, long param, long value)
+{
+	struct macio_chip *macio = &macio_chips[0];
+	struct device_node *phy;
+	int need_reset;
+
+	/*
+	 * We must not reset the combo PHYs, only the BCM5221 found in
+	 * the iMac G5.
+	 */
+	phy = of_get_next_child(node, NULL);
+	if (!phy)
+		return -ENODEV;
+	need_reset = of_device_is_compatible(phy, "B5221");
+	of_node_put(phy);
+	if (!need_reset)
+		return 0;
+
+	/* PHY reset is GPIO 29, not in device-tree unfortunately */
+	MACIO_OUT8(K2_GPIO_EXTINT_0 + 29,
+		   KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA);
+	/* Thankfully, this is now always called at a time when we can
+	 * schedule by sungem.
+	 */
+	msleep(10);
+	MACIO_OUT8(K2_GPIO_EXTINT_0 + 29, 0);
+
+	return 0;
+}
+
+static long g5_i2s_enable(struct device_node *node, long param, long value)
+{
+	/* Very crude implementation for now */
+	struct macio_chip *macio = &macio_chips[0];
+	unsigned long flags;
+	int cell;
+	u32 fcrs[3][3] = {
+		{ 0,
+		  K2_FCR1_I2S0_CELL_ENABLE |
+		  K2_FCR1_I2S0_CLK_ENABLE_BIT | K2_FCR1_I2S0_ENABLE,
+		  KL3_I2S0_CLK18_ENABLE
+		},
+		{ KL0_SCC_A_INTF_ENABLE,
+		  K2_FCR1_I2S1_CELL_ENABLE |
+		  K2_FCR1_I2S1_CLK_ENABLE_BIT | K2_FCR1_I2S1_ENABLE,
+		  KL3_I2S1_CLK18_ENABLE
+		},
+		{ KL0_SCC_B_INTF_ENABLE,
+		  SH_FCR1_I2S2_CELL_ENABLE |
+		  SH_FCR1_I2S2_CLK_ENABLE_BIT | SH_FCR1_I2S2_ENABLE,
+		  SH_FCR3_I2S2_CLK18_ENABLE
+		},
+	};
+
+	if (macio->type != macio_keylargo2 && macio->type != macio_shasta)
+		return -ENODEV;
+	if (strncmp(node->name, "i2s-", 4))
+		return -ENODEV;
+	cell = node->name[4] - 'a';
+	switch(cell) {
+	case 0:
+	case 1:
+		break;
+	case 2:
+		if (macio->type == macio_shasta)
+			break;
+		fallthrough;
+	default:
+		return -ENODEV;
+	}
+
+	LOCK(flags);
+	if (value) {
+		MACIO_BIC(KEYLARGO_FCR0, fcrs[cell][0]);
+		MACIO_BIS(KEYLARGO_FCR1, fcrs[cell][1]);
+		MACIO_BIS(KEYLARGO_FCR3, fcrs[cell][2]);
+	} else {
+		MACIO_BIC(KEYLARGO_FCR3, fcrs[cell][2]);
+		MACIO_BIC(KEYLARGO_FCR1, fcrs[cell][1]);
+		MACIO_BIS(KEYLARGO_FCR0, fcrs[cell][0]);
+	}
+	udelay(10);
+	UNLOCK(flags);
+
+	return 0;
+}
+
+
+#ifdef CONFIG_SMP
+static long g5_reset_cpu(struct device_node *node, long param, long value)
+{
+	unsigned int reset_io = 0;
+	unsigned long flags;
+	struct macio_chip *macio;
+	struct device_node *np;
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo2 && macio->type != macio_shasta)
+		return -ENODEV;
+
+	for_each_of_cpu_node(np) {
+		const u32 *rst = of_get_property(np, "soft-reset", NULL);
+		if (!rst)
+			continue;
+		if (param == of_get_cpu_hwid(np, 0)) {
+			of_node_put(np);
+			reset_io = *rst;
+			break;
+		}
+	}
+	if (np == NULL || reset_io == 0)
+		return -ENODEV;
+
+	LOCK(flags);
+	MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE);
+	(void)MACIO_IN8(reset_io);
+	udelay(1);
+	MACIO_OUT8(reset_io, 0);
+	(void)MACIO_IN8(reset_io);
+	UNLOCK(flags);
+
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * This can be called from pmac_smp so isn't static
+ *
+ * This takes the second CPU off the bus on dual CPU machines
+ * running UP
+ */
+void __init g5_phy_disable_cpu1(void)
+{
+	if (uninorth_maj == 3)
+		UN_OUT(U3_API_PHY_CONFIG_1, 0);
+}
+#endif /* CONFIG_PPC64 */
+
+#ifndef CONFIG_PPC64
+
+
+#ifdef CONFIG_PM
+static u32 save_gpio_levels[2];
+static u8 save_gpio_extint[KEYLARGO_GPIO_EXTINT_CNT];
+static u8 save_gpio_normal[KEYLARGO_GPIO_CNT];
+static u32 save_unin_clock_ctl;
+
+static void keylargo_shutdown(struct macio_chip *macio, int sleep_mode)
+{
+	u32 temp;
+
+	if (sleep_mode) {
+		mdelay(1);
+		MACIO_BIS(KEYLARGO_FCR0, KL0_USB_REF_SUSPEND);
+		(void)MACIO_IN32(KEYLARGO_FCR0);
+		mdelay(1);
+	}
+
+	MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE |
+				KL0_SCC_CELL_ENABLE |
+				KL0_IRDA_ENABLE | KL0_IRDA_CLK32_ENABLE |
+				KL0_IRDA_CLK19_ENABLE);
+
+	MACIO_BIC(KEYLARGO_MBCR, KL_MBCR_MB0_DEV_MASK);
+	MACIO_BIS(KEYLARGO_MBCR, KL_MBCR_MB0_IDE_ENABLE);
+
+	MACIO_BIC(KEYLARGO_FCR1,
+		KL1_AUDIO_SEL_22MCLK | KL1_AUDIO_CLK_ENABLE_BIT |
+		KL1_AUDIO_CLK_OUT_ENABLE | KL1_AUDIO_CELL_ENABLE |
+		KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT |
+		KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE |
+		KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE |
+		KL1_EIDE0_ENABLE | KL1_EIDE0_RESET_N |
+		KL1_EIDE1_ENABLE | KL1_EIDE1_RESET_N |
+		KL1_UIDE_ENABLE);
+
+	MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
+	MACIO_BIC(KEYLARGO_FCR2, KL2_IOBUS_ENABLE);
+
+	temp = MACIO_IN32(KEYLARGO_FCR3);
+	if (macio->rev >= 2) {
+		temp |= KL3_SHUTDOWN_PLL2X;
+		if (sleep_mode)
+			temp |= KL3_SHUTDOWN_PLL_TOTAL;
+	}
+
+	temp |= KL3_SHUTDOWN_PLLKW6 | KL3_SHUTDOWN_PLLKW4 |
+		KL3_SHUTDOWN_PLLKW35;
+	if (sleep_mode)
+		temp |= KL3_SHUTDOWN_PLLKW12;
+	temp &= ~(KL3_CLK66_ENABLE | KL3_CLK49_ENABLE | KL3_CLK45_ENABLE
+		| KL3_CLK31_ENABLE | KL3_I2S1_CLK18_ENABLE | KL3_I2S0_CLK18_ENABLE);
+	if (sleep_mode)
+		temp &= ~(KL3_TIMER_CLK18_ENABLE | KL3_VIA_CLK16_ENABLE);
+	MACIO_OUT32(KEYLARGO_FCR3, temp);
+
+	/* Flush posted writes & wait a bit */
+	(void)MACIO_IN32(KEYLARGO_FCR0); mdelay(1);
+}
+
+static void pangea_shutdown(struct macio_chip *macio, int sleep_mode)
+{
+	u32 temp;
+
+	MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE |
+				KL0_SCC_CELL_ENABLE |
+				KL0_USB0_CELL_ENABLE | KL0_USB1_CELL_ENABLE);
+
+	MACIO_BIC(KEYLARGO_FCR1,
+		KL1_AUDIO_SEL_22MCLK | KL1_AUDIO_CLK_ENABLE_BIT |
+		KL1_AUDIO_CLK_OUT_ENABLE | KL1_AUDIO_CELL_ENABLE |
+		KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT |
+		KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE |
+		KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE |
+		KL1_UIDE_ENABLE);
+	if (pmac_mb.board_flags & PMAC_MB_MOBILE)
+		MACIO_BIC(KEYLARGO_FCR1, KL1_UIDE_RESET_N);
+
+	MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
+
+	temp = MACIO_IN32(KEYLARGO_FCR3);
+	temp |= KL3_SHUTDOWN_PLLKW6 | KL3_SHUTDOWN_PLLKW4 |
+		KL3_SHUTDOWN_PLLKW35;
+	temp &= ~(KL3_CLK49_ENABLE | KL3_CLK45_ENABLE | KL3_CLK31_ENABLE
+		| KL3_I2S0_CLK18_ENABLE | KL3_I2S1_CLK18_ENABLE);
+	if (sleep_mode)
+		temp &= ~(KL3_VIA_CLK16_ENABLE | KL3_TIMER_CLK18_ENABLE);
+	MACIO_OUT32(KEYLARGO_FCR3, temp);
+
+	/* Flush posted writes & wait a bit */
+	(void)MACIO_IN32(KEYLARGO_FCR0); mdelay(1);
+}
+
+static void intrepid_shutdown(struct macio_chip *macio, int sleep_mode)
+{
+	u32 temp;
+
+	MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE |
+		  KL0_SCC_CELL_ENABLE);
+
+	MACIO_BIC(KEYLARGO_FCR1,
+		KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT |
+		KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE |
+		KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE |
+		KL1_EIDE0_ENABLE);
+	if (pmac_mb.board_flags & PMAC_MB_MOBILE)
+		MACIO_BIC(KEYLARGO_FCR1, KL1_UIDE_RESET_N);
+
+	temp = MACIO_IN32(KEYLARGO_FCR3);
+	temp &= ~(KL3_CLK49_ENABLE | KL3_CLK45_ENABLE |
+		  KL3_I2S1_CLK18_ENABLE | KL3_I2S0_CLK18_ENABLE);
+	if (sleep_mode)
+		temp &= ~(KL3_TIMER_CLK18_ENABLE | KL3_IT_VIA_CLK32_ENABLE);
+	MACIO_OUT32(KEYLARGO_FCR3, temp);
+
+	/* Flush posted writes & wait a bit */
+	(void)MACIO_IN32(KEYLARGO_FCR0);
+	mdelay(10);
+}
+
+
+static int
+core99_sleep(void)
+{
+	struct macio_chip *macio;
+	int i;
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
+		return -ENODEV;
+
+	/* We power off the wireless slot in case it was not done
+	 * by the driver. We don't power it on automatically however
+	 */
+	if (macio->flags & MACIO_FLAG_AIRPORT_ON)
+		core99_airport_enable(macio->of_node, 0, 0);
+
+	/* We power off the FW cable. Should be done by the driver... */
+	if (macio->flags & MACIO_FLAG_FW_SUPPORTED) {
+		core99_firewire_enable(NULL, 0, 0);
+		core99_firewire_cable_power(NULL, 0, 0);
+	}
+
+	/* We make sure int. modem is off (in case driver lost it) */
+	if (macio->type == macio_keylargo)
+		core99_modem_enable(macio->of_node, 0, 0);
+	else
+		pangea_modem_enable(macio->of_node, 0, 0);
+
+	/* We make sure the sound is off as well */
+	core99_sound_chip_enable(macio->of_node, 0, 0);
+
+	/*
+	 * Save various bits of KeyLargo
+	 */
+
+	/* Save the state of the various GPIOs */
+	save_gpio_levels[0] = MACIO_IN32(KEYLARGO_GPIO_LEVELS0);
+	save_gpio_levels[1] = MACIO_IN32(KEYLARGO_GPIO_LEVELS1);
+	for (i=0; i<KEYLARGO_GPIO_EXTINT_CNT; i++)
+		save_gpio_extint[i] = MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+i);
+	for (i=0; i<KEYLARGO_GPIO_CNT; i++)
+		save_gpio_normal[i] = MACIO_IN8(KEYLARGO_GPIO_0+i);
+
+	/* Save the FCRs */
+	if (macio->type == macio_keylargo)
+		save_mbcr = MACIO_IN32(KEYLARGO_MBCR);
+	save_fcr[0] = MACIO_IN32(KEYLARGO_FCR0);
+	save_fcr[1] = MACIO_IN32(KEYLARGO_FCR1);
+	save_fcr[2] = MACIO_IN32(KEYLARGO_FCR2);
+	save_fcr[3] = MACIO_IN32(KEYLARGO_FCR3);
+	save_fcr[4] = MACIO_IN32(KEYLARGO_FCR4);
+	if (macio->type == macio_pangea || macio->type == macio_intrepid)
+		save_fcr[5] = MACIO_IN32(KEYLARGO_FCR5);
+
+	/* Save state & config of DBDMA channels */
+	dbdma_save(macio, save_dbdma);
+
+	/*
+	 * Turn off as much as we can
+	 */
+	if (macio->type == macio_pangea)
+		pangea_shutdown(macio, 1);
+	else if (macio->type == macio_intrepid)
+		intrepid_shutdown(macio, 1);
+	else if (macio->type == macio_keylargo)
+		keylargo_shutdown(macio, 1);
+
+	/*
+	 * Put the host bridge to sleep
+	 */
+
+	save_unin_clock_ctl = UN_IN(UNI_N_CLOCK_CNTL);
+	/* Note: do not switch GMAC off, driver does it when necessary, WOL must keep it
+	 * enabled !
+	 */
+	UN_OUT(UNI_N_CLOCK_CNTL, save_unin_clock_ctl &
+	       ~(/*UNI_N_CLOCK_CNTL_GMAC|*/UNI_N_CLOCK_CNTL_FW/*|UNI_N_CLOCK_CNTL_PCI*/));
+	udelay(100);
+	UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_SLEEPING);
+	UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_SLEEP);
+	mdelay(10);
+
+	/*
+	 * FIXME: A bit of black magic with OpenPIC (don't ask me why)
+	 */
+	if (pmac_mb.model_id == PMAC_TYPE_SAWTOOTH) {
+		MACIO_BIS(0x506e0, 0x00400000);
+		MACIO_BIS(0x506e0, 0x80000000);
+	}
+	return 0;
+}
+
+static int
+core99_wake_up(void)
+{
+	struct macio_chip *macio;
+	int i;
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
+		return -ENODEV;
+
+	/*
+	 * Wakeup the host bridge
+	 */
+	UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_NORMAL);
+	udelay(10);
+	UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_RUNNING);
+	udelay(10);
+
+	/*
+	 * Restore KeyLargo
+	 */
+
+	if (macio->type == macio_keylargo) {
+		MACIO_OUT32(KEYLARGO_MBCR, save_mbcr);
+		(void)MACIO_IN32(KEYLARGO_MBCR); udelay(10);
+	}
+	MACIO_OUT32(KEYLARGO_FCR0, save_fcr[0]);
+	(void)MACIO_IN32(KEYLARGO_FCR0); udelay(10);
+	MACIO_OUT32(KEYLARGO_FCR1, save_fcr[1]);
+	(void)MACIO_IN32(KEYLARGO_FCR1); udelay(10);
+	MACIO_OUT32(KEYLARGO_FCR2, save_fcr[2]);
+	(void)MACIO_IN32(KEYLARGO_FCR2); udelay(10);
+	MACIO_OUT32(KEYLARGO_FCR3, save_fcr[3]);
+	(void)MACIO_IN32(KEYLARGO_FCR3); udelay(10);
+	MACIO_OUT32(KEYLARGO_FCR4, save_fcr[4]);
+	(void)MACIO_IN32(KEYLARGO_FCR4); udelay(10);
+	if (macio->type == macio_pangea || macio->type == macio_intrepid) {
+		MACIO_OUT32(KEYLARGO_FCR5, save_fcr[5]);
+		(void)MACIO_IN32(KEYLARGO_FCR5); udelay(10);
+	}
+
+	dbdma_restore(macio, save_dbdma);
+
+	MACIO_OUT32(KEYLARGO_GPIO_LEVELS0, save_gpio_levels[0]);
+	MACIO_OUT32(KEYLARGO_GPIO_LEVELS1, save_gpio_levels[1]);
+	for (i=0; i<KEYLARGO_GPIO_EXTINT_CNT; i++)
+		MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+i, save_gpio_extint[i]);
+	for (i=0; i<KEYLARGO_GPIO_CNT; i++)
+		MACIO_OUT8(KEYLARGO_GPIO_0+i, save_gpio_normal[i]);
+
+	/* FIXME more black magic with OpenPIC ... */
+	if (pmac_mb.model_id == PMAC_TYPE_SAWTOOTH) {
+		MACIO_BIC(0x506e0, 0x00400000);
+		MACIO_BIC(0x506e0, 0x80000000);
+	}
+
+	UN_OUT(UNI_N_CLOCK_CNTL, save_unin_clock_ctl);
+	udelay(100);
+
+	return 0;
+}
+
+#endif /* CONFIG_PM */
+
+static long
+core99_sleep_state(struct device_node *node, long param, long value)
+{
+	/* Param == 1 means to enter the "fake sleep" mode that is
+	 * used for CPU speed switch
+	 */
+	if (param == 1) {
+		if (value == 1) {
+			UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_SLEEPING);
+			UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_IDLE2);
+		} else {
+			UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_NORMAL);
+			udelay(10);
+			UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_RUNNING);
+			udelay(10);
+		}
+		return 0;
+	}
+	if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0)
+		return -EPERM;
+
+#ifdef CONFIG_PM
+	if (value == 1)
+		return core99_sleep();
+	else if (value == 0)
+		return core99_wake_up();
+
+#endif /* CONFIG_PM */
+	return 0;
+}
+
+#endif /* CONFIG_PPC64 */
+
+static long
+generic_dev_can_wake(struct device_node *node, long param, long value)
+{
+	/* Todo: eventually check we are really dealing with on-board
+	 * video device ...
+	 */
+
+	if (pmac_mb.board_flags & PMAC_MB_MAY_SLEEP)
+		pmac_mb.board_flags |= PMAC_MB_CAN_SLEEP;
+	return 0;
+}
+
+static long generic_get_mb_info(struct device_node *node, long param, long value)
+{
+	switch(param) {
+		case PMAC_MB_INFO_MODEL:
+			return pmac_mb.model_id;
+		case PMAC_MB_INFO_FLAGS:
+			return pmac_mb.board_flags;
+		case PMAC_MB_INFO_NAME:
+			/* hack hack hack... but should work */
+			*((const char **)value) = pmac_mb.model_name;
+			return 0;
+	}
+	return -EINVAL;
+}
+
+
+/*
+ * Table definitions
+ */
+
+/* Used on any machine
+ */
+static struct feature_table_entry any_features[] = {
+	{ PMAC_FTR_GET_MB_INFO,		generic_get_mb_info },
+	{ PMAC_FTR_DEVICE_CAN_WAKE,	generic_dev_can_wake },
+	{ 0, NULL }
+};
+
+#ifndef CONFIG_PPC64
+
+/* OHare based motherboards. Currently, we only use these on the
+ * 2400,3400 and 3500 series powerbooks. Some older desktops seem
+ * to have issues with turning on/off those asic cells
+ */
+static struct feature_table_entry ohare_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		ohare_htw_scc_enable },
+	{ PMAC_FTR_SWIM3_ENABLE,	ohare_floppy_enable },
+	{ PMAC_FTR_MESH_ENABLE,		ohare_mesh_enable },
+	{ PMAC_FTR_IDE_ENABLE,		ohare_ide_enable},
+	{ PMAC_FTR_IDE_RESET,		ohare_ide_reset},
+	{ PMAC_FTR_SLEEP_STATE,		ohare_sleep_state },
+	{ 0, NULL }
+};
+
+/* Heathrow desktop machines (Beige G3).
+ * Separated as some features couldn't be properly tested
+ * and the serial port control bits appear to confuse it.
+ */
+static struct feature_table_entry heathrow_desktop_features[] = {
+	{ PMAC_FTR_SWIM3_ENABLE,	heathrow_floppy_enable },
+	{ PMAC_FTR_MESH_ENABLE,		heathrow_mesh_enable },
+	{ PMAC_FTR_IDE_ENABLE,		heathrow_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		heathrow_ide_reset },
+	{ PMAC_FTR_BMAC_ENABLE,		heathrow_bmac_enable },
+	{ 0, NULL }
+};
+
+/* Heathrow based laptop, that is the Wallstreet and mainstreet
+ * powerbooks.
+ */
+static struct feature_table_entry heathrow_laptop_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		ohare_htw_scc_enable },
+	{ PMAC_FTR_MODEM_ENABLE,	heathrow_modem_enable },
+	{ PMAC_FTR_SWIM3_ENABLE,	heathrow_floppy_enable },
+	{ PMAC_FTR_MESH_ENABLE,		heathrow_mesh_enable },
+	{ PMAC_FTR_IDE_ENABLE,		heathrow_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		heathrow_ide_reset },
+	{ PMAC_FTR_BMAC_ENABLE,		heathrow_bmac_enable },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	heathrow_sound_enable },
+	{ PMAC_FTR_SLEEP_STATE,		heathrow_sleep_state },
+	{ 0, NULL }
+};
+
+/* Paddington based machines
+ * The lombard (101) powerbook, first iMac models, B&W G3 and Yikes G4.
+ */
+static struct feature_table_entry paddington_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		ohare_htw_scc_enable },
+	{ PMAC_FTR_MODEM_ENABLE,	heathrow_modem_enable },
+	{ PMAC_FTR_SWIM3_ENABLE,	heathrow_floppy_enable },
+	{ PMAC_FTR_MESH_ENABLE,		heathrow_mesh_enable },
+	{ PMAC_FTR_IDE_ENABLE,		heathrow_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		heathrow_ide_reset },
+	{ PMAC_FTR_BMAC_ENABLE,		heathrow_bmac_enable },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	heathrow_sound_enable },
+	{ PMAC_FTR_SLEEP_STATE,		heathrow_sleep_state },
+	{ 0, NULL }
+};
+
+/* Core99 & MacRISC 2 machines (all machines released since the
+ * iBook (included), that is all AGP machines, except pangea
+ * chipset. The pangea chipset is the "combo" UniNorth/KeyLargo
+ * used on iBook2 & iMac "flow power".
+ */
+static struct feature_table_entry core99_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		core99_scc_enable },
+	{ PMAC_FTR_MODEM_ENABLE,	core99_modem_enable },
+	{ PMAC_FTR_IDE_ENABLE,		core99_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		core99_ide_reset },
+	{ PMAC_FTR_GMAC_ENABLE,		core99_gmac_enable },
+	{ PMAC_FTR_GMAC_PHY_RESET,	core99_gmac_phy_reset },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	core99_sound_chip_enable },
+	{ PMAC_FTR_AIRPORT_ENABLE,	core99_airport_enable },
+	{ PMAC_FTR_USB_ENABLE,		core99_usb_enable },
+	{ PMAC_FTR_1394_ENABLE,		core99_firewire_enable },
+	{ PMAC_FTR_1394_CABLE_POWER,	core99_firewire_cable_power },
+#ifdef CONFIG_PM
+	{ PMAC_FTR_SLEEP_STATE,		core99_sleep_state },
+#endif
+#ifdef CONFIG_SMP
+	{ PMAC_FTR_RESET_CPU,		core99_reset_cpu },
+#endif /* CONFIG_SMP */
+	{ PMAC_FTR_READ_GPIO,		core99_read_gpio },
+	{ PMAC_FTR_WRITE_GPIO,		core99_write_gpio },
+	{ 0, NULL }
+};
+
+/* RackMac
+ */
+static struct feature_table_entry rackmac_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		core99_scc_enable },
+	{ PMAC_FTR_IDE_ENABLE,		core99_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		core99_ide_reset },
+	{ PMAC_FTR_GMAC_ENABLE,		core99_gmac_enable },
+	{ PMAC_FTR_GMAC_PHY_RESET,	core99_gmac_phy_reset },
+	{ PMAC_FTR_USB_ENABLE,		core99_usb_enable },
+	{ PMAC_FTR_1394_ENABLE,		core99_firewire_enable },
+	{ PMAC_FTR_1394_CABLE_POWER,	core99_firewire_cable_power },
+	{ PMAC_FTR_SLEEP_STATE,		core99_sleep_state },
+#ifdef CONFIG_SMP
+	{ PMAC_FTR_RESET_CPU,		core99_reset_cpu },
+#endif /* CONFIG_SMP */
+	{ PMAC_FTR_READ_GPIO,		core99_read_gpio },
+	{ PMAC_FTR_WRITE_GPIO,		core99_write_gpio },
+	{ 0, NULL }
+};
+
+/* Pangea features
+ */
+static struct feature_table_entry pangea_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		core99_scc_enable },
+	{ PMAC_FTR_MODEM_ENABLE,	pangea_modem_enable },
+	{ PMAC_FTR_IDE_ENABLE,		core99_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		core99_ide_reset },
+	{ PMAC_FTR_GMAC_ENABLE,		core99_gmac_enable },
+	{ PMAC_FTR_GMAC_PHY_RESET,	core99_gmac_phy_reset },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	core99_sound_chip_enable },
+	{ PMAC_FTR_AIRPORT_ENABLE,	core99_airport_enable },
+	{ PMAC_FTR_USB_ENABLE,		core99_usb_enable },
+	{ PMAC_FTR_1394_ENABLE,		core99_firewire_enable },
+	{ PMAC_FTR_1394_CABLE_POWER,	core99_firewire_cable_power },
+	{ PMAC_FTR_SLEEP_STATE,		core99_sleep_state },
+	{ PMAC_FTR_READ_GPIO,		core99_read_gpio },
+	{ PMAC_FTR_WRITE_GPIO,		core99_write_gpio },
+	{ 0, NULL }
+};
+
+/* Intrepid features
+ */
+static struct feature_table_entry intrepid_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		core99_scc_enable },
+	{ PMAC_FTR_MODEM_ENABLE,	pangea_modem_enable },
+	{ PMAC_FTR_IDE_ENABLE,		core99_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		core99_ide_reset },
+	{ PMAC_FTR_GMAC_ENABLE,		core99_gmac_enable },
+	{ PMAC_FTR_GMAC_PHY_RESET,	core99_gmac_phy_reset },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	core99_sound_chip_enable },
+	{ PMAC_FTR_AIRPORT_ENABLE,	core99_airport_enable },
+	{ PMAC_FTR_USB_ENABLE,		core99_usb_enable },
+	{ PMAC_FTR_1394_ENABLE,		core99_firewire_enable },
+	{ PMAC_FTR_1394_CABLE_POWER,	core99_firewire_cable_power },
+	{ PMAC_FTR_SLEEP_STATE,		core99_sleep_state },
+	{ PMAC_FTR_READ_GPIO,		core99_read_gpio },
+	{ PMAC_FTR_WRITE_GPIO,		core99_write_gpio },
+	{ PMAC_FTR_AACK_DELAY_ENABLE,	intrepid_aack_delay_enable },
+	{ 0, NULL }
+};
+
+#else /* CONFIG_PPC64 */
+
+/* G5 features
+ */
+static struct feature_table_entry g5_features[] = {
+	{ PMAC_FTR_GMAC_ENABLE,		g5_gmac_enable },
+	{ PMAC_FTR_1394_ENABLE,		g5_fw_enable },
+	{ PMAC_FTR_ENABLE_MPIC,		g5_mpic_enable },
+	{ PMAC_FTR_GMAC_PHY_RESET,	g5_eth_phy_reset },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	g5_i2s_enable },
+#ifdef CONFIG_SMP
+	{ PMAC_FTR_RESET_CPU,		g5_reset_cpu },
+#endif /* CONFIG_SMP */
+	{ PMAC_FTR_READ_GPIO,		core99_read_gpio },
+	{ PMAC_FTR_WRITE_GPIO,		core99_write_gpio },
+	{ 0, NULL }
+};
+
+#endif /* CONFIG_PPC64 */
+
+static struct pmac_mb_def pmac_mb_defs[] = {
+#ifndef CONFIG_PPC64
+	/*
+	 * Desktops
+	 */
+
+	{	"AAPL,8500",			"PowerMac 8500/8600",
+		PMAC_TYPE_PSURGE,		NULL,
+		0
+	},
+	{	"AAPL,9500",			"PowerMac 9500/9600",
+		PMAC_TYPE_PSURGE,		NULL,
+		0
+	},
+	{	"AAPL,7200",			"PowerMac 7200",
+		PMAC_TYPE_PSURGE,		NULL,
+		0
+	},
+	{	"AAPL,7300",			"PowerMac 7200/7300",
+		PMAC_TYPE_PSURGE,		NULL,
+		0
+	},
+	{	"AAPL,7500",			"PowerMac 7500",
+		PMAC_TYPE_PSURGE,		NULL,
+		0
+	},
+	{	"AAPL,ShinerESB",		"Apple Network Server",
+		PMAC_TYPE_ANS,			NULL,
+		0
+	},
+	{	"AAPL,e407",			"Alchemy",
+		PMAC_TYPE_ALCHEMY,		NULL,
+		0
+	},
+	{	"AAPL,e411",			"Gazelle",
+		PMAC_TYPE_GAZELLE,		NULL,
+		0
+	},
+	{	"AAPL,Gossamer",		"PowerMac G3 (Gossamer)",
+		PMAC_TYPE_GOSSAMER,		heathrow_desktop_features,
+		0
+	},
+	{	"AAPL,PowerMac G3",		"PowerMac G3 (Silk)",
+		PMAC_TYPE_SILK,			heathrow_desktop_features,
+		0
+	},
+	{	"PowerMac1,1",			"Blue&White G3",
+		PMAC_TYPE_YOSEMITE,		paddington_features,
+		0
+	},
+	{	"PowerMac1,2",			"PowerMac G4 PCI Graphics",
+		PMAC_TYPE_YIKES,		paddington_features,
+		0
+	},
+	{	"PowerMac2,1",			"iMac FireWire",
+		PMAC_TYPE_FW_IMAC,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+	},
+	{	"PowerMac2,2",			"iMac FireWire",
+		PMAC_TYPE_FW_IMAC,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+	},
+	{	"PowerMac3,1",			"PowerMac G4 AGP Graphics",
+		PMAC_TYPE_SAWTOOTH,		core99_features,
+		PMAC_MB_OLD_CORE99
+	},
+	{	"PowerMac3,2",			"PowerMac G4 AGP Graphics",
+		PMAC_TYPE_SAWTOOTH,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+	},
+	{	"PowerMac3,3",			"PowerMac G4 AGP Graphics",
+		PMAC_TYPE_SAWTOOTH,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+	},
+	{	"PowerMac3,4",			"PowerMac G4 Silver",
+		PMAC_TYPE_QUICKSILVER,		core99_features,
+		PMAC_MB_MAY_SLEEP
+	},
+	{	"PowerMac3,5",			"PowerMac G4 Silver",
+		PMAC_TYPE_QUICKSILVER,		core99_features,
+		PMAC_MB_MAY_SLEEP
+	},
+	{	"PowerMac3,6",			"PowerMac G4 Windtunnel",
+		PMAC_TYPE_WINDTUNNEL,		core99_features,
+		PMAC_MB_MAY_SLEEP,
+	},
+	{	"PowerMac4,1",			"iMac \"Flower Power\"",
+		PMAC_TYPE_PANGEA_IMAC,		pangea_features,
+		PMAC_MB_MAY_SLEEP
+	},
+	{	"PowerMac4,2",			"Flat panel iMac",
+		PMAC_TYPE_FLAT_PANEL_IMAC,	pangea_features,
+		PMAC_MB_CAN_SLEEP
+	},
+	{	"PowerMac4,4",			"eMac",
+		PMAC_TYPE_EMAC,			core99_features,
+		PMAC_MB_MAY_SLEEP
+	},
+	{	"PowerMac5,1",			"PowerMac G4 Cube",
+		PMAC_TYPE_CUBE,			core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+	},
+	{	"PowerMac6,1",			"Flat panel iMac",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP,
+	},
+	{	"PowerMac6,3",			"Flat panel iMac",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP,
+	},
+	{	"PowerMac6,4",			"eMac",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP,
+	},
+	{	"PowerMac10,1",			"Mac mini",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP,
+	},
+	{       "PowerMac10,2",                 "Mac mini (Late 2005)",
+		PMAC_TYPE_UNKNOWN_INTREPID,     intrepid_features,
+		PMAC_MB_MAY_SLEEP,
+	},
+ 	{	"iMac,1",			"iMac (first generation)",
+		PMAC_TYPE_ORIG_IMAC,		paddington_features,
+		0
+	},
+
+	/*
+	 * Xserve's
+	 */
+
+	{	"RackMac1,1",			"XServe",
+		PMAC_TYPE_RACKMAC,		rackmac_features,
+		0,
+	},
+	{	"RackMac1,2",			"XServe rev. 2",
+		PMAC_TYPE_RACKMAC,		rackmac_features,
+		0,
+	},
+
+	/*
+	 * Laptops
+	 */
+
+	{	"AAPL,3400/2400",		"PowerBook 3400",
+		PMAC_TYPE_HOOPER,		ohare_features,
+		PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
+	},
+	{	"AAPL,3500",			"PowerBook 3500",
+		PMAC_TYPE_KANGA,		ohare_features,
+		PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
+	},
+	{	"AAPL,PowerBook1998",		"PowerBook Wallstreet",
+		PMAC_TYPE_WALLSTREET,		heathrow_laptop_features,
+		PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
+	},
+	{	"PowerBook1,1",			"PowerBook 101 (Lombard)",
+		PMAC_TYPE_101_PBOOK,		paddington_features,
+		PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
+	},
+	{	"PowerBook2,1",			"iBook (first generation)",
+		PMAC_TYPE_ORIG_IBOOK,		core99_features,
+		PMAC_MB_CAN_SLEEP | PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE
+	},
+	{	"PowerBook2,2",			"iBook FireWire",
+		PMAC_TYPE_FW_IBOOK,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER |
+		PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE
+	},
+	{	"PowerBook3,1",			"PowerBook Pismo",
+		PMAC_TYPE_PISMO,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER |
+		PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE
+	},
+	{	"PowerBook3,2",			"PowerBook Titanium",
+		PMAC_TYPE_TITANIUM,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook3,3",			"PowerBook Titanium II",
+		PMAC_TYPE_TITANIUM2,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook3,4",			"PowerBook Titanium III",
+		PMAC_TYPE_TITANIUM3,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook3,5",			"PowerBook Titanium IV",
+		PMAC_TYPE_TITANIUM4,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook4,1",			"iBook 2",
+		PMAC_TYPE_IBOOK2,		pangea_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook4,2",			"iBook 2",
+		PMAC_TYPE_IBOOK2,		pangea_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook4,3",			"iBook 2 rev. 2",
+		PMAC_TYPE_IBOOK2,		pangea_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook5,1",			"PowerBook G4 17\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,2",			"PowerBook G4 15\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,3",			"PowerBook G4 17\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,4",			"PowerBook G4 15\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,5",			"PowerBook G4 17\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,6",			"PowerBook G4 15\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,7",			"PowerBook G4 17\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,8",			"PowerBook G4 15\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP  | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,9",			"PowerBook G4 17\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,1",			"PowerBook G4 12\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,2",			"PowerBook G4",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,3",			"iBook G4",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,4",			"PowerBook G4 12\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,5",			"iBook G4",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,7",			"iBook G4",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,8",			"PowerBook G4 12\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+#else /* CONFIG_PPC64 */
+	{	"PowerMac7,2",			"PowerMac G5",
+		PMAC_TYPE_POWERMAC_G5,		g5_features,
+		0,
+	},
+#ifdef CONFIG_PPC64
+	{	"PowerMac7,3",			"PowerMac G5",
+		PMAC_TYPE_POWERMAC_G5,		g5_features,
+		0,
+	},
+	{	"PowerMac8,1",			"iMac G5",
+		PMAC_TYPE_IMAC_G5,		g5_features,
+		0,
+	},
+	{	"PowerMac9,1",			"PowerMac G5",
+		PMAC_TYPE_POWERMAC_G5_U3L,	g5_features,
+		0,
+	},
+	{	"PowerMac11,2",			"PowerMac G5 Dual Core",
+		PMAC_TYPE_POWERMAC_G5_U3L,	g5_features,
+		0,
+	},
+	{	"PowerMac12,1",			"iMac G5 (iSight)",
+		PMAC_TYPE_POWERMAC_G5_U3L,	g5_features,
+		0,
+	},
+	{       "RackMac3,1",                   "XServe G5",
+		PMAC_TYPE_XSERVE_G5,		g5_features,
+		0,
+	},
+#endif /* CONFIG_PPC64 */
+#endif /* CONFIG_PPC64 */
+};
+
+/*
+ * The toplevel feature_call callback
+ */
+long pmac_do_feature_call(unsigned int selector, ...)
+{
+	struct device_node *node;
+	long param, value;
+	int i;
+	feature_call func = NULL;
+	va_list args;
+
+	if (pmac_mb.features)
+		for (i=0; pmac_mb.features[i].function; i++)
+			if (pmac_mb.features[i].selector == selector) {
+				func = pmac_mb.features[i].function;
+				break;
+			}
+	if (!func)
+		for (i=0; any_features[i].function; i++)
+			if (any_features[i].selector == selector) {
+				func = any_features[i].function;
+				break;
+			}
+	if (!func)
+		return -ENODEV;
+
+	va_start(args, selector);
+	node = (struct device_node*)va_arg(args, void*);
+	param = va_arg(args, long);
+	value = va_arg(args, long);
+	va_end(args);
+
+	return func(node, param, value);
+}
+
+static int __init probe_motherboard(void)
+{
+	int i;
+	struct macio_chip *macio = &macio_chips[0];
+	const char *model = NULL;
+	struct device_node *dt;
+	int ret = 0;
+
+	/* Lookup known motherboard type in device-tree. First try an
+	 * exact match on the "model" property, then try a "compatible"
+	 * match is none is found.
+	 */
+	dt = of_find_node_by_name(NULL, "device-tree");
+	if (dt != NULL)
+		model = of_get_property(dt, "model", NULL);
+	for(i=0; model && i<ARRAY_SIZE(pmac_mb_defs); i++) {
+	    if (strcmp(model, pmac_mb_defs[i].model_string) == 0) {
+		pmac_mb = pmac_mb_defs[i];
+		goto found;
+	    }
+	}
+	for(i=0; i<ARRAY_SIZE(pmac_mb_defs); i++) {
+	    if (of_machine_is_compatible(pmac_mb_defs[i].model_string)) {
+		pmac_mb = pmac_mb_defs[i];
+		goto found;
+	    }
+	}
+
+	/* Fallback to selection depending on mac-io chip type */
+	switch(macio->type) {
+#ifndef CONFIG_PPC64
+	    case macio_grand_central:
+		pmac_mb.model_id = PMAC_TYPE_PSURGE;
+		pmac_mb.model_name = "Unknown PowerSurge";
+		break;
+	    case macio_ohare:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_OHARE;
+		pmac_mb.model_name = "Unknown OHare-based";
+		break;
+	    case macio_heathrow:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_HEATHROW;
+		pmac_mb.model_name = "Unknown Heathrow-based";
+		pmac_mb.features = heathrow_desktop_features;
+		break;
+	    case macio_paddington:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_PADDINGTON;
+		pmac_mb.model_name = "Unknown Paddington-based";
+		pmac_mb.features = paddington_features;
+		break;
+	    case macio_keylargo:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_CORE99;
+		pmac_mb.model_name = "Unknown Keylargo-based";
+		pmac_mb.features = core99_features;
+		break;
+	    case macio_pangea:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_PANGEA;
+		pmac_mb.model_name = "Unknown Pangea-based";
+		pmac_mb.features = pangea_features;
+		break;
+	    case macio_intrepid:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_INTREPID;
+		pmac_mb.model_name = "Unknown Intrepid-based";
+		pmac_mb.features = intrepid_features;
+		break;
+#else /* CONFIG_PPC64 */
+	case macio_keylargo2:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_K2;
+		pmac_mb.model_name = "Unknown K2-based";
+		pmac_mb.features = g5_features;
+		break;
+	case macio_shasta:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_SHASTA;
+		pmac_mb.model_name = "Unknown Shasta-based";
+		pmac_mb.features = g5_features;
+		break;
+#endif /* CONFIG_PPC64 */
+	default:
+		ret = -ENODEV;
+		goto done;
+	}
+found:
+#ifndef CONFIG_PPC64
+	/* Fixup Hooper vs. Comet */
+	if (pmac_mb.model_id == PMAC_TYPE_HOOPER) {
+		u32 __iomem * mach_id_ptr = ioremap(0xf3000034, 4);
+		if (!mach_id_ptr) {
+			ret = -ENODEV;
+			goto done;
+		}
+		/* Here, I used to disable the media-bay on comet. It
+		 * appears this is wrong, the floppy connector is actually
+		 * a kind of media-bay and works with the current driver.
+		 */
+		if (__raw_readl(mach_id_ptr) & 0x20000000UL)
+			pmac_mb.model_id = PMAC_TYPE_COMET;
+		iounmap(mach_id_ptr);
+	}
+
+	/* Set default value of powersave_nap on machines that support it.
+	 * It appears that uninorth rev 3 has a problem with it, we don't
+	 * enable it on those. In theory, the flush-on-lock property is
+	 * supposed to be set when not supported, but I'm not very confident
+	 * that all Apple OF revs did it properly, I do it the paranoid way.
+	 */
+	if (uninorth_base && uninorth_rev > 3) {
+		struct device_node *np;
+
+		for_each_of_cpu_node(np) {
+			int cpu_count = 1;
+
+			/* Nap mode not supported on SMP */
+			if (of_property_read_bool(np, "flush-on-lock") ||
+			    (cpu_count > 1)) {
+				powersave_nap = 0;
+				of_node_put(np);
+				break;
+			}
+
+			cpu_count++;
+			powersave_nap = 1;
+		}
+	}
+	if (powersave_nap)
+		printk(KERN_DEBUG "Processor NAP mode on idle enabled.\n");
+
+	/* On CPUs that support it (750FX), lowspeed by default during
+	 * NAP mode
+	 */
+	powersave_lowspeed = 1;
+
+#else /* CONFIG_PPC64 */
+	powersave_nap = 1;
+#endif  /* CONFIG_PPC64 */
+
+	/* Check for "mobile" machine */
+	if (model && (strncmp(model, "PowerBook", 9) == 0
+		   || strncmp(model, "iBook", 5) == 0))
+		pmac_mb.board_flags |= PMAC_MB_MOBILE;
+
+
+	printk(KERN_INFO "PowerMac motherboard: %s\n", pmac_mb.model_name);
+done:
+	of_node_put(dt);
+	return ret;
+}
+
+/* Initialize the Core99 UniNorth host bridge and memory controller
+ */
+static void __init probe_uninorth(void)
+{
+	struct resource res;
+	unsigned long actrl;
+
+	/* Locate core99 Uni-N */
+	uninorth_node = of_find_node_by_name(NULL, "uni-n");
+	uninorth_maj = 1;
+
+	/* Locate G5 u3 */
+	if (uninorth_node == NULL) {
+		uninorth_node = of_find_node_by_name(NULL, "u3");
+		uninorth_maj = 3;
+	}
+	/* Locate G5 u4 */
+	if (uninorth_node == NULL) {
+		uninorth_node = of_find_node_by_name(NULL, "u4");
+		uninorth_maj = 4;
+	}
+	if (uninorth_node == NULL) {
+		uninorth_maj = 0;
+		return;
+	}
+
+	if (of_address_to_resource(uninorth_node, 0, &res))
+		return;
+
+	uninorth_base = ioremap(res.start, 0x40000);
+	if (uninorth_base == NULL)
+		return;
+	uninorth_rev = in_be32(UN_REG(UNI_N_VERSION));
+	if (uninorth_maj == 3 || uninorth_maj == 4) {
+		u3_ht_base = ioremap(res.start + U3_HT_CONFIG_BASE, 0x1000);
+		if (u3_ht_base == NULL) {
+			iounmap(uninorth_base);
+			return;
+		}
+	}
+
+	printk(KERN_INFO "Found %s memory controller & host bridge"
+	       " @ 0x%08x revision: 0x%02x\n", uninorth_maj == 3 ? "U3" :
+	       uninorth_maj == 4 ? "U4" : "UniNorth",
+	       (unsigned int)res.start, uninorth_rev);
+	printk(KERN_INFO "Mapped at 0x%08lx\n", (unsigned long)uninorth_base);
+
+	/* Set the arbitrer QAck delay according to what Apple does
+	 */
+	if (uninorth_rev < 0x11) {
+		actrl = UN_IN(UNI_N_ARB_CTRL) & ~UNI_N_ARB_CTRL_QACK_DELAY_MASK;
+		actrl |= ((uninorth_rev < 3) ? UNI_N_ARB_CTRL_QACK_DELAY105 :
+			UNI_N_ARB_CTRL_QACK_DELAY) <<
+			UNI_N_ARB_CTRL_QACK_DELAY_SHIFT;
+		UN_OUT(UNI_N_ARB_CTRL, actrl);
+	}
+
+	/* Some more magic as done by them in recent MacOS X on UniNorth
+	 * revs 1.5 to 2.O and Pangea. Seem to toggle the UniN Maxbus/PCI
+	 * memory timeout
+	 */
+	if ((uninorth_rev >= 0x11 && uninorth_rev <= 0x24) ||
+	    uninorth_rev == 0xc0)
+		UN_OUT(0x2160, UN_IN(0x2160) & 0x00ffffff);
+}
+
+static void __init probe_one_macio(const char *name, const char *compat, int type)
+{
+	struct device_node*	node;
+	int			i;
+	volatile u32 __iomem	*base;
+	const u32		*addrp, *revp;
+	phys_addr_t		addr;
+	u64			size;
+
+	for_each_node_by_name(node, name) {
+		if (!compat)
+			break;
+		if (of_device_is_compatible(node, compat))
+			break;
+	}
+	if (!node)
+		return;
+	for(i=0; i<MAX_MACIO_CHIPS; i++) {
+		if (!macio_chips[i].of_node)
+			break;
+		if (macio_chips[i].of_node == node)
+			goto out_put;
+	}
+
+	if (i >= MAX_MACIO_CHIPS) {
+		printk(KERN_ERR "pmac_feature: Please increase MAX_MACIO_CHIPS !\n");
+		printk(KERN_ERR "pmac_feature: %pOF skipped\n", node);
+		goto out_put;
+	}
+	addrp = of_get_pci_address(node, 0, &size, NULL);
+	if (addrp == NULL) {
+		printk(KERN_ERR "pmac_feature: %pOF: can't find base !\n",
+		       node);
+		goto out_put;
+	}
+	addr = of_translate_address(node, addrp);
+	if (addr == 0) {
+		printk(KERN_ERR "pmac_feature: %pOF, can't translate base !\n",
+		       node);
+		goto out_put;
+	}
+	base = ioremap(addr, (unsigned long)size);
+	if (!base) {
+		printk(KERN_ERR "pmac_feature: %pOF, can't map mac-io chip !\n",
+		       node);
+		goto out_put;
+	}
+	if (type == macio_keylargo || type == macio_keylargo2) {
+		const u32 *did = of_get_property(node, "device-id", NULL);
+		if (*did == 0x00000025)
+			type = macio_pangea;
+		if (*did == 0x0000003e)
+			type = macio_intrepid;
+		if (*did == 0x0000004f)
+			type = macio_shasta;
+	}
+	macio_chips[i].of_node	= node;
+	macio_chips[i].type	= type;
+	macio_chips[i].base	= base;
+	macio_chips[i].flags	= MACIO_FLAG_SCCA_ON | MACIO_FLAG_SCCB_ON;
+	macio_chips[i].name	= macio_names[type];
+	revp = of_get_property(node, "revision-id", NULL);
+	if (revp)
+		macio_chips[i].rev = *revp;
+	printk(KERN_INFO "Found a %s mac-io controller, rev: %d, mapped at 0x%p\n",
+		macio_names[type], macio_chips[i].rev, macio_chips[i].base);
+
+	return;
+
+out_put:
+	of_node_put(node);
+}
+
+static int __init
+probe_macios(void)
+{
+	/* Warning, ordering is important */
+	probe_one_macio("gc", NULL, macio_grand_central);
+	probe_one_macio("ohare", NULL, macio_ohare);
+	probe_one_macio("pci106b,7", NULL, macio_ohareII);
+	probe_one_macio("mac-io", "keylargo", macio_keylargo);
+	probe_one_macio("mac-io", "paddington", macio_paddington);
+	probe_one_macio("mac-io", "gatwick", macio_gatwick);
+	probe_one_macio("mac-io", "heathrow", macio_heathrow);
+	probe_one_macio("mac-io", "K2-Keylargo", macio_keylargo2);
+
+	/* Make sure the "main" macio chip appear first */
+	if (macio_chips[0].type == macio_gatwick
+	    && macio_chips[1].type == macio_heathrow) {
+		struct macio_chip temp = macio_chips[0];
+		macio_chips[0] = macio_chips[1];
+		macio_chips[1] = temp;
+	}
+	if (macio_chips[0].type == macio_ohareII
+	    && macio_chips[1].type == macio_ohare) {
+		struct macio_chip temp = macio_chips[0];
+		macio_chips[0] = macio_chips[1];
+		macio_chips[1] = temp;
+	}
+	macio_chips[0].lbus.index = 0;
+	macio_chips[1].lbus.index = 1;
+
+	return (macio_chips[0].of_node == NULL) ? -ENODEV : 0;
+}
+
+static void __init
+initial_serial_shutdown(struct device_node *np)
+{
+	int len;
+	const struct slot_names_prop {
+		int	count;
+		char	name[1];
+	} *slots;
+	const char *conn;
+	int port_type = PMAC_SCC_ASYNC;
+	int modem = 0;
+
+	slots = of_get_property(np, "slot-names", &len);
+	conn = of_get_property(np, "AAPL,connector", &len);
+	if (conn && (strcmp(conn, "infrared") == 0))
+		port_type = PMAC_SCC_IRDA;
+	else if (of_device_is_compatible(np, "cobalt"))
+		modem = 1;
+	else if (slots && slots->count > 0) {
+		if (strcmp(slots->name, "IrDA") == 0)
+			port_type = PMAC_SCC_IRDA;
+		else if (strcmp(slots->name, "Modem") == 0)
+			modem = 1;
+	}
+	if (modem)
+		pmac_call_feature(PMAC_FTR_MODEM_ENABLE, np, 0, 0);
+	pmac_call_feature(PMAC_FTR_SCC_ENABLE, np, port_type, 0);
+}
+
+static void __init
+set_initial_features(void)
+{
+	struct device_node *np;
+
+	/* That hack appears to be necessary for some StarMax motherboards
+	 * but I'm not too sure it was audited for side-effects on other
+	 * ohare based machines...
+	 * Since I still have difficulties figuring the right way to
+	 * differentiate them all and since that hack was there for a long
+	 * time, I'll keep it around
+	 */
+	if (macio_chips[0].type == macio_ohare) {
+		struct macio_chip *macio = &macio_chips[0];
+		np = of_find_node_by_name(NULL, "via-pmu");
+		if (np)
+			MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE);
+		else
+			MACIO_OUT32(OHARE_FCR, STARMAX_FEATURES);
+		of_node_put(np);
+	} else if (macio_chips[1].type == macio_ohare) {
+		struct macio_chip *macio = &macio_chips[1];
+		MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE);
+	}
+
+#ifdef CONFIG_PPC64
+	if (macio_chips[0].type == macio_keylargo2 ||
+	    macio_chips[0].type == macio_shasta) {
+#ifndef CONFIG_SMP
+		/* On SMP machines running UP, we have the second CPU eating
+		 * bus cycles. We need to take it off the bus. This is done
+		 * from pmac_smp for SMP kernels running on one CPU
+		 */
+		np = of_find_node_by_type(NULL, "cpu");
+		if (np != NULL)
+			np = of_find_node_by_type(np, "cpu");
+		if (np != NULL) {
+			g5_phy_disable_cpu1();
+			of_node_put(np);
+		}
+#endif /* CONFIG_SMP */
+		/* Enable GMAC for now for PCI probing. It will be disabled
+		 * later on after PCI probe
+		 */
+		for_each_node_by_name(np, "ethernet")
+			if (of_device_is_compatible(np, "K2-GMAC"))
+				g5_gmac_enable(np, 0, 1);
+
+		/* Enable FW before PCI probe. Will be disabled later on
+		 * Note: We should have a batter way to check that we are
+		 * dealing with uninorth internal cell and not a PCI cell
+		 * on the external PCI. The code below works though.
+		 */
+		for_each_node_by_name(np, "firewire") {
+			if (of_device_is_compatible(np, "pci106b,5811")) {
+				macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED;
+				g5_fw_enable(np, 0, 1);
+			}
+		}
+	}
+#else /* CONFIG_PPC64 */
+
+	if (macio_chips[0].type == macio_keylargo ||
+	    macio_chips[0].type == macio_pangea ||
+	    macio_chips[0].type == macio_intrepid) {
+		/* Enable GMAC for now for PCI probing. It will be disabled
+		 * later on after PCI probe
+		 */
+		for_each_node_by_name(np, "ethernet") {
+			if (np->parent
+			    && of_device_is_compatible(np->parent, "uni-north")
+			    && of_device_is_compatible(np, "gmac"))
+				core99_gmac_enable(np, 0, 1);
+		}
+
+		/* Enable FW before PCI probe. Will be disabled later on
+		 * Note: We should have a batter way to check that we are
+		 * dealing with uninorth internal cell and not a PCI cell
+		 * on the external PCI. The code below works though.
+		 */
+		for_each_node_by_name(np, "firewire") {
+			if (np->parent
+			    && of_device_is_compatible(np->parent, "uni-north")
+			    && (of_device_is_compatible(np, "pci106b,18") ||
+			        of_device_is_compatible(np, "pci106b,30") ||
+			        of_device_is_compatible(np, "pci11c1,5811"))) {
+				macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED;
+				core99_firewire_enable(np, 0, 1);
+			}
+		}
+
+		/* Enable ATA-100 before PCI probe. */
+		for_each_node_by_name(np, "ata-6") {
+			if (np->parent
+			    && of_device_is_compatible(np->parent, "uni-north")
+			    && of_device_is_compatible(np, "kauai-ata")) {
+				core99_ata100_enable(np, 1);
+			}
+		}
+
+		/* Switch airport off */
+		for_each_node_by_name(np, "radio") {
+			if (np->parent == macio_chips[0].of_node) {
+				macio_chips[0].flags |= MACIO_FLAG_AIRPORT_ON;
+				core99_airport_enable(np, 0, 0);
+			}
+		}
+	}
+
+	/* On all machines that support sound PM, switch sound off */
+	if (macio_chips[0].of_node)
+		pmac_do_feature_call(PMAC_FTR_SOUND_CHIP_ENABLE,
+			macio_chips[0].of_node, 0, 0);
+
+	/* While on some desktop G3s, we turn it back on */
+	if (macio_chips[0].of_node && macio_chips[0].type == macio_heathrow
+		&& (pmac_mb.model_id == PMAC_TYPE_GOSSAMER ||
+		    pmac_mb.model_id == PMAC_TYPE_SILK)) {
+		struct macio_chip *macio = &macio_chips[0];
+		MACIO_BIS(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE);
+		MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N);
+	}
+
+#endif /* CONFIG_PPC64 */
+
+	/* On all machines, switch modem & serial ports off */
+	for_each_node_by_name(np, "ch-a")
+		initial_serial_shutdown(np);
+	for_each_node_by_name(np, "ch-b")
+		initial_serial_shutdown(np);
+}
+
+void __init
+pmac_feature_init(void)
+{
+	/* Detect the UniNorth memory controller */
+	probe_uninorth();
+
+	/* Probe mac-io controllers */
+	if (probe_macios()) {
+		printk(KERN_WARNING "No mac-io chip found\n");
+		return;
+	}
+
+	/* Probe machine type */
+	if (probe_motherboard())
+		printk(KERN_WARNING "Unknown PowerMac !\n");
+
+	/* Set some initial features (turn off some chips that will
+	 * be later turned on)
+	 */
+	set_initial_features();
+}
+
+#if 0
+static void dump_HT_speeds(char *name, u32 cfg, u32 frq)
+{
+	int	freqs[16] = { 200,300,400,500,600,800,1000,0,0,0,0,0,0,0,0,0 };
+	int	bits[8] = { 8,16,0,32,2,4,0,0 };
+	int	freq = (frq >> 8) & 0xf;
+
+	if (freqs[freq] == 0)
+		printk("%s: Unknown HT link frequency %x\n", name, freq);
+	else
+		printk("%s: %d MHz on main link, (%d in / %d out) bits width\n",
+		       name, freqs[freq],
+		       bits[(cfg >> 28) & 0x7], bits[(cfg >> 24) & 0x7]);
+}
+
+void __init pmac_check_ht_link(void)
+{
+	u32	ufreq, freq, ucfg, cfg;
+	struct device_node *pcix_node;
+	u8	px_bus, px_devfn;
+	struct pci_controller *px_hose;
+
+	(void)in_be32(u3_ht_base + U3_HT_LINK_COMMAND);
+	ucfg = cfg = in_be32(u3_ht_base + U3_HT_LINK_CONFIG);
+	ufreq = freq = in_be32(u3_ht_base + U3_HT_LINK_FREQ);
+	dump_HT_speeds("U3 HyperTransport", cfg, freq);
+
+	pcix_node = of_find_compatible_node(NULL, "pci", "pci-x");
+	if (pcix_node == NULL) {
+		printk("No PCI-X bridge found\n");
+		return;
+	}
+	if (pci_device_from_OF_node(pcix_node, &px_bus, &px_devfn) != 0) {
+		printk("PCI-X bridge found but not matched to pci\n");
+		return;
+	}
+	px_hose = pci_find_hose_for_OF_device(pcix_node);
+	if (px_hose == NULL) {
+		printk("PCI-X bridge found but not matched to host\n");
+		return;
+	}	
+	early_read_config_dword(px_hose, px_bus, px_devfn, 0xc4, &cfg);
+	early_read_config_dword(px_hose, px_bus, px_devfn, 0xcc, &freq);
+	dump_HT_speeds("PCI-X HT Uplink", cfg, freq);
+	early_read_config_dword(px_hose, px_bus, px_devfn, 0xc8, &cfg);
+	early_read_config_dword(px_hose, px_bus, px_devfn, 0xd0, &freq);
+	dump_HT_speeds("PCI-X HT Downlink", cfg, freq);
+}
+#endif /* 0 */
+
+/*
+ * Early video resume hook
+ */
+
+static void (*pmac_early_vresume_proc)(void *data);
+static void *pmac_early_vresume_data;
+
+void pmac_set_early_video_resume(void (*proc)(void *data), void *data)
+{
+	if (!machine_is(powermac))
+		return;
+	preempt_disable();
+	pmac_early_vresume_proc = proc;
+	pmac_early_vresume_data = data;
+	preempt_enable();
+}
+EXPORT_SYMBOL(pmac_set_early_video_resume);
+
+void pmac_call_early_video_resume(void)
+{
+	if (pmac_early_vresume_proc)
+		pmac_early_vresume_proc(pmac_early_vresume_data);
+}
+
+/*
+ * AGP related suspend/resume code
+ */
+
+static struct pci_dev *pmac_agp_bridge;
+static int (*pmac_agp_suspend)(struct pci_dev *bridge);
+static int (*pmac_agp_resume)(struct pci_dev *bridge);
+
+void pmac_register_agp_pm(struct pci_dev *bridge,
+				 int (*suspend)(struct pci_dev *bridge),
+				 int (*resume)(struct pci_dev *bridge))
+{
+	if (suspend || resume) {
+		pmac_agp_bridge = bridge;
+		pmac_agp_suspend = suspend;
+		pmac_agp_resume = resume;
+		return;
+	}
+	if (bridge != pmac_agp_bridge)
+		return;
+	pmac_agp_suspend = pmac_agp_resume = NULL;
+	return;
+}
+EXPORT_SYMBOL(pmac_register_agp_pm);
+
+void pmac_suspend_agp_for_card(struct pci_dev *dev)
+{
+	if (pmac_agp_bridge == NULL || pmac_agp_suspend == NULL)
+		return;
+	if (pmac_agp_bridge->bus != dev->bus)
+		return;
+	pmac_agp_suspend(pmac_agp_bridge);
+}
+EXPORT_SYMBOL(pmac_suspend_agp_for_card);
+
+void pmac_resume_agp_for_card(struct pci_dev *dev)
+{
+	if (pmac_agp_bridge == NULL || pmac_agp_resume == NULL)
+		return;
+	if (pmac_agp_bridge->bus != dev->bus)
+		return;
+	pmac_agp_resume(pmac_agp_bridge);
+}
+EXPORT_SYMBOL(pmac_resume_agp_for_card);
+
+int pmac_get_uninorth_variant(void)
+{
+	return uninorth_maj;
+}
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
new file mode 100644
index 0000000000..40f3aa432f
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -0,0 +1,1514 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/powermac/low_i2c.c
+ *
+ *  Copyright (C) 2003-2005 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * The linux i2c layer isn't completely suitable for our needs for various
+ * reasons ranging from too late initialisation to semantics not perfectly
+ * matching some requirements of the apple platform functions etc...
+ *
+ * This file thus provides a simple low level unified i2c interface for
+ * powermac that covers the various types of i2c busses used in Apple machines.
+ * For now, keywest, PMU and SMU, though we could add Cuda, or other bit
+ * banging busses found on older chipsets in earlier machines if we ever need
+ * one of them.
+ *
+ * The drivers in this file are synchronous/blocking. In addition, the
+ * keywest one is fairly slow due to the use of msleep instead of interrupts
+ * as the interrupt is currently used by i2c-keywest. In the long run, we
+ * might want to get rid of those high-level interfaces to linux i2c layer
+ * either completely (converting all drivers) or replacing them all with a
+ * single stub driver on top of this one. Once done, the interrupt will be
+ * available for our use.
+ */
+
+#undef DEBUG
+#undef DEBUG_LOW
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/delay.h>
+#include <linux/completion.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/timer.h>
+#include <linux/mutex.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+#include <linux/of_irq.h>
+#include <asm/keylargo.h>
+#include <asm/uninorth.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/smu.h>
+#include <asm/pmac_pfunc.h>
+#include <asm/pmac_low_i2c.h>
+
+#ifdef DEBUG
+#define DBG(x...) do {\
+		printk(KERN_DEBUG "low_i2c:" x);	\
+	} while(0)
+#else
+#define DBG(x...)
+#endif
+
+#ifdef DEBUG_LOW
+#define DBG_LOW(x...) do {\
+		printk(KERN_DEBUG "low_i2c:" x);	\
+	} while(0)
+#else
+#define DBG_LOW(x...)
+#endif
+
+
+static int pmac_i2c_force_poll = 1;
+
+/*
+ * A bus structure. Each bus in the system has such a structure associated.
+ */
+struct pmac_i2c_bus
+{
+	struct list_head	link;
+	struct device_node	*controller;
+	struct device_node	*busnode;
+	int			type;
+	int			flags;
+	struct i2c_adapter	adapter;
+	void			*hostdata;
+	int			channel;	/* some hosts have multiple */
+	int			mode;		/* current mode */
+	struct mutex		mutex;
+	int			opened;
+	int			polled;		/* open mode */
+	struct platform_device	*platform_dev;
+	struct lock_class_key   lock_key;
+
+	/* ops */
+	int (*open)(struct pmac_i2c_bus *bus);
+	void (*close)(struct pmac_i2c_bus *bus);
+	int (*xfer)(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
+		    u32 subaddr, u8 *data, int len);
+};
+
+static LIST_HEAD(pmac_i2c_busses);
+
+/*
+ * Keywest implementation
+ */
+
+struct pmac_i2c_host_kw
+{
+	struct mutex		mutex;		/* Access mutex for use by
+						 * i2c-keywest */
+	void __iomem		*base;		/* register base address */
+	int			bsteps;		/* register stepping */
+	int			speed;		/* speed */
+	int			irq;
+	u8			*data;
+	unsigned		len;
+	int			state;
+	int			rw;
+	int			polled;
+	int			result;
+	struct completion	complete;
+	spinlock_t		lock;
+	struct timer_list	timeout_timer;
+};
+
+/* Register indices */
+typedef enum {
+	reg_mode = 0,
+	reg_control,
+	reg_status,
+	reg_isr,
+	reg_ier,
+	reg_addr,
+	reg_subaddr,
+	reg_data
+} reg_t;
+
+/* The Tumbler audio equalizer can be really slow sometimes */
+#define KW_POLL_TIMEOUT		(2*HZ)
+
+/* Mode register */
+#define KW_I2C_MODE_100KHZ	0x00
+#define KW_I2C_MODE_50KHZ	0x01
+#define KW_I2C_MODE_25KHZ	0x02
+#define KW_I2C_MODE_DUMB	0x00
+#define KW_I2C_MODE_STANDARD	0x04
+#define KW_I2C_MODE_STANDARDSUB	0x08
+#define KW_I2C_MODE_COMBINED	0x0C
+#define KW_I2C_MODE_MODE_MASK	0x0C
+#define KW_I2C_MODE_CHAN_MASK	0xF0
+
+/* Control register */
+#define KW_I2C_CTL_AAK		0x01
+#define KW_I2C_CTL_XADDR	0x02
+#define KW_I2C_CTL_STOP		0x04
+#define KW_I2C_CTL_START	0x08
+
+/* Status register */
+#define KW_I2C_STAT_BUSY	0x01
+#define KW_I2C_STAT_LAST_AAK	0x02
+#define KW_I2C_STAT_LAST_RW	0x04
+#define KW_I2C_STAT_SDA		0x08
+#define KW_I2C_STAT_SCL		0x10
+
+/* IER & ISR registers */
+#define KW_I2C_IRQ_DATA		0x01
+#define KW_I2C_IRQ_ADDR		0x02
+#define KW_I2C_IRQ_STOP		0x04
+#define KW_I2C_IRQ_START	0x08
+#define KW_I2C_IRQ_MASK		0x0F
+
+/* State machine states */
+enum {
+	state_idle,
+	state_addr,
+	state_read,
+	state_write,
+	state_stop,
+	state_dead
+};
+
+#define WRONG_STATE(name) do {\
+		printk(KERN_DEBUG "KW: wrong state. Got %s, state: %s " \
+		       "(isr: %02x)\n",	\
+		       name, __kw_state_names[host->state], isr); \
+	} while(0)
+
+static const char *__kw_state_names[] = {
+	"state_idle",
+	"state_addr",
+	"state_read",
+	"state_write",
+	"state_stop",
+	"state_dead"
+};
+
+static inline u8 __kw_read_reg(struct pmac_i2c_host_kw *host, reg_t reg)
+{
+	return readb(host->base + (((unsigned int)reg) << host->bsteps));
+}
+
+static inline void __kw_write_reg(struct pmac_i2c_host_kw *host,
+				  reg_t reg, u8 val)
+{
+	writeb(val, host->base + (((unsigned)reg) << host->bsteps));
+	(void)__kw_read_reg(host, reg_subaddr);
+}
+
+#define kw_write_reg(reg, val)	__kw_write_reg(host, reg, val)
+#define kw_read_reg(reg)	__kw_read_reg(host, reg)
+
+static u8 kw_i2c_wait_interrupt(struct pmac_i2c_host_kw *host)
+{
+	int i, j;
+	u8 isr;
+	
+	for (i = 0; i < 1000; i++) {
+		isr = kw_read_reg(reg_isr) & KW_I2C_IRQ_MASK;
+		if (isr != 0)
+			return isr;
+
+		/* This code is used with the timebase frozen, we cannot rely
+		 * on udelay nor schedule when in polled mode !
+		 * For now, just use a bogus loop....
+		 */
+		if (host->polled) {
+			for (j = 1; j < 100000; j++)
+				mb();
+		} else
+			msleep(1);
+	}
+	return isr;
+}
+
+static void kw_i2c_do_stop(struct pmac_i2c_host_kw *host, int result)
+{
+	kw_write_reg(reg_control, KW_I2C_CTL_STOP);
+	host->state = state_stop;
+	host->result = result;
+}
+
+
+static void kw_i2c_handle_interrupt(struct pmac_i2c_host_kw *host, u8 isr)
+{
+	u8 ack;
+
+	DBG_LOW("kw_handle_interrupt(%s, isr: %x)\n",
+		__kw_state_names[host->state], isr);
+
+	if (host->state == state_idle) {
+		printk(KERN_WARNING "low_i2c: Keywest got an out of state"
+		       " interrupt, ignoring\n");
+		kw_write_reg(reg_isr, isr);
+		return;
+	}
+
+	if (isr == 0) {
+		printk(KERN_WARNING "low_i2c: Timeout in i2c transfer"
+		       " on keywest !\n");
+		if (host->state != state_stop) {
+			kw_i2c_do_stop(host, -EIO);
+			return;
+		}
+		ack = kw_read_reg(reg_status);
+		if (ack & KW_I2C_STAT_BUSY)
+			kw_write_reg(reg_status, 0);
+		host->state = state_idle;
+		kw_write_reg(reg_ier, 0x00);
+		if (!host->polled)
+			complete(&host->complete);
+		return;
+	}
+
+	if (isr & KW_I2C_IRQ_ADDR) {
+		ack = kw_read_reg(reg_status);
+		if (host->state != state_addr) {
+			WRONG_STATE("KW_I2C_IRQ_ADDR"); 
+			kw_i2c_do_stop(host, -EIO);
+		}
+		if ((ack & KW_I2C_STAT_LAST_AAK) == 0) {
+			host->result = -ENXIO;
+			host->state = state_stop;
+			DBG_LOW("KW: NAK on address\n");
+		} else {
+			if (host->len == 0)
+				kw_i2c_do_stop(host, 0);
+			else if (host->rw) {
+				host->state = state_read;
+				if (host->len > 1)
+					kw_write_reg(reg_control,
+						     KW_I2C_CTL_AAK);
+			} else {
+				host->state = state_write;
+				kw_write_reg(reg_data, *(host->data++));
+				host->len--;
+			}
+		}
+		kw_write_reg(reg_isr, KW_I2C_IRQ_ADDR);
+	}
+
+	if (isr & KW_I2C_IRQ_DATA) {
+		if (host->state == state_read) {
+			*(host->data++) = kw_read_reg(reg_data);
+			host->len--;
+			kw_write_reg(reg_isr, KW_I2C_IRQ_DATA);
+			if (host->len == 0)
+				host->state = state_stop;
+			else if (host->len == 1)
+				kw_write_reg(reg_control, 0);
+		} else if (host->state == state_write) {
+			ack = kw_read_reg(reg_status);
+			if ((ack & KW_I2C_STAT_LAST_AAK) == 0) {
+				DBG_LOW("KW: nack on data write\n");
+				host->result = -EFBIG;
+				host->state = state_stop;
+			} else if (host->len) {
+				kw_write_reg(reg_data, *(host->data++));
+				host->len--;
+			} else
+				kw_i2c_do_stop(host, 0);
+		} else {
+			WRONG_STATE("KW_I2C_IRQ_DATA"); 
+			if (host->state != state_stop)
+				kw_i2c_do_stop(host, -EIO);
+		}
+		kw_write_reg(reg_isr, KW_I2C_IRQ_DATA);
+	}
+
+	if (isr & KW_I2C_IRQ_STOP) {
+		kw_write_reg(reg_isr, KW_I2C_IRQ_STOP);
+		if (host->state != state_stop) {
+			WRONG_STATE("KW_I2C_IRQ_STOP");
+			host->result = -EIO;
+		}
+		host->state = state_idle;
+		if (!host->polled)
+			complete(&host->complete);
+	}
+
+	/* Below should only happen in manual mode which we don't use ... */
+	if (isr & KW_I2C_IRQ_START)
+		kw_write_reg(reg_isr, KW_I2C_IRQ_START);
+
+}
+
+/* Interrupt handler */
+static irqreturn_t kw_i2c_irq(int irq, void *dev_id)
+{
+	struct pmac_i2c_host_kw *host = dev_id;
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->lock, flags);
+	del_timer(&host->timeout_timer);
+	kw_i2c_handle_interrupt(host, kw_read_reg(reg_isr));
+	if (host->state != state_idle) {
+		host->timeout_timer.expires = jiffies + KW_POLL_TIMEOUT;
+		add_timer(&host->timeout_timer);
+	}
+	spin_unlock_irqrestore(&host->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static void kw_i2c_timeout(struct timer_list *t)
+{
+	struct pmac_i2c_host_kw *host = from_timer(host, t, timeout_timer);
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	/*
+	 * If the timer is pending, that means we raced with the
+	 * irq, in which case we just return
+	 */
+	if (timer_pending(&host->timeout_timer))
+		goto skip;
+
+	kw_i2c_handle_interrupt(host, kw_read_reg(reg_isr));
+	if (host->state != state_idle) {
+		host->timeout_timer.expires = jiffies + KW_POLL_TIMEOUT;
+		add_timer(&host->timeout_timer);
+	}
+ skip:
+	spin_unlock_irqrestore(&host->lock, flags);
+}
+
+static int kw_i2c_open(struct pmac_i2c_bus *bus)
+{
+	struct pmac_i2c_host_kw *host = bus->hostdata;
+	mutex_lock(&host->mutex);
+	return 0;
+}
+
+static void kw_i2c_close(struct pmac_i2c_bus *bus)
+{
+	struct pmac_i2c_host_kw *host = bus->hostdata;
+	mutex_unlock(&host->mutex);
+}
+
+static int kw_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
+		       u32 subaddr, u8 *data, int len)
+{
+	struct pmac_i2c_host_kw *host = bus->hostdata;
+	u8 mode_reg = host->speed;
+	int use_irq = host->irq && !bus->polled;
+
+	/* Setup mode & subaddress if any */
+	switch(bus->mode) {
+	case pmac_i2c_mode_dumb:
+		return -EINVAL;
+	case pmac_i2c_mode_std:
+		mode_reg |= KW_I2C_MODE_STANDARD;
+		if (subsize != 0)
+			return -EINVAL;
+		break;
+	case pmac_i2c_mode_stdsub:
+		mode_reg |= KW_I2C_MODE_STANDARDSUB;
+		if (subsize != 1)
+			return -EINVAL;
+		break;
+	case pmac_i2c_mode_combined:
+		mode_reg |= KW_I2C_MODE_COMBINED;
+		if (subsize != 1)
+			return -EINVAL;
+		break;
+	}
+
+	/* Setup channel & clear pending irqs */
+	kw_write_reg(reg_isr, kw_read_reg(reg_isr));
+	kw_write_reg(reg_mode, mode_reg | (bus->channel << 4));
+	kw_write_reg(reg_status, 0);
+
+	/* Set up address and r/w bit, strip possible stale bus number from
+	 * address top bits
+	 */
+	kw_write_reg(reg_addr, addrdir & 0xff);
+
+	/* Set up the sub address */
+	if ((mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_STANDARDSUB
+	    || (mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_COMBINED)
+		kw_write_reg(reg_subaddr, subaddr);
+
+	/* Prepare for async operations */
+	host->data = data;
+	host->len = len;
+	host->state = state_addr;
+	host->result = 0;
+	host->rw = (addrdir & 1);
+	host->polled = bus->polled;
+
+	/* Enable interrupt if not using polled mode and interrupt is
+	 * available
+	 */
+	if (use_irq) {
+		/* Clear completion */
+		reinit_completion(&host->complete);
+		/* Ack stale interrupts */
+		kw_write_reg(reg_isr, kw_read_reg(reg_isr));
+		/* Arm timeout */
+		host->timeout_timer.expires = jiffies + KW_POLL_TIMEOUT;
+		add_timer(&host->timeout_timer);
+		/* Enable emission */
+		kw_write_reg(reg_ier, KW_I2C_IRQ_MASK);
+	}
+
+	/* Start sending address */
+	kw_write_reg(reg_control, KW_I2C_CTL_XADDR);
+
+	/* Wait for completion */
+	if (use_irq)
+		wait_for_completion(&host->complete);
+	else {
+		while(host->state != state_idle) {
+			unsigned long flags;
+
+			u8 isr = kw_i2c_wait_interrupt(host);
+			spin_lock_irqsave(&host->lock, flags);
+			kw_i2c_handle_interrupt(host, isr);
+			spin_unlock_irqrestore(&host->lock, flags);
+		}
+	}
+
+	/* Disable emission */
+	kw_write_reg(reg_ier, 0);
+
+	return host->result;
+}
+
+static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
+{
+	struct pmac_i2c_host_kw *host;
+	const u32		*psteps, *prate, *addrp;
+	u32			steps;
+
+	host = kzalloc(sizeof(*host), GFP_KERNEL);
+	if (host == NULL) {
+		printk(KERN_ERR "low_i2c: Can't allocate host for %pOF\n",
+		       np);
+		return NULL;
+	}
+
+	/* Apple is kind enough to provide a valid AAPL,address property
+	 * on all i2c keywest nodes so far ... we would have to fallback
+	 * to macio parsing if that wasn't the case
+	 */
+	addrp = of_get_property(np, "AAPL,address", NULL);
+	if (addrp == NULL) {
+		printk(KERN_ERR "low_i2c: Can't find address for %pOF\n",
+		       np);
+		kfree(host);
+		return NULL;
+	}
+	mutex_init(&host->mutex);
+	init_completion(&host->complete);
+	spin_lock_init(&host->lock);
+	timer_setup(&host->timeout_timer, kw_i2c_timeout, 0);
+
+	psteps = of_get_property(np, "AAPL,address-step", NULL);
+	steps = psteps ? (*psteps) : 0x10;
+	for (host->bsteps = 0; (steps & 0x01) == 0; host->bsteps++)
+		steps >>= 1;
+	/* Select interface rate */
+	host->speed = KW_I2C_MODE_25KHZ;
+	prate = of_get_property(np, "AAPL,i2c-rate", NULL);
+	if (prate) switch(*prate) {
+	case 100:
+		host->speed = KW_I2C_MODE_100KHZ;
+		break;
+	case 50:
+		host->speed = KW_I2C_MODE_50KHZ;
+		break;
+	case 25:
+		host->speed = KW_I2C_MODE_25KHZ;
+		break;
+	}	
+	host->irq = irq_of_parse_and_map(np, 0);
+	if (!host->irq)
+		printk(KERN_WARNING
+		       "low_i2c: Failed to map interrupt for %pOF\n",
+		       np);
+
+	host->base = ioremap((*addrp), 0x1000);
+	if (host->base == NULL) {
+		printk(KERN_ERR "low_i2c: Can't map registers for %pOF\n",
+		       np);
+		kfree(host);
+		return NULL;
+	}
+
+	/* Make sure IRQ is disabled */
+	kw_write_reg(reg_ier, 0);
+
+	/* Request chip interrupt. We set IRQF_NO_SUSPEND because we don't
+	 * want that interrupt disabled between the 2 passes of driver
+	 * suspend or we'll have issues running the pfuncs
+	 */
+	if (request_irq(host->irq, kw_i2c_irq, IRQF_NO_SUSPEND,
+			"keywest i2c", host))
+		host->irq = 0;
+
+	printk(KERN_INFO "KeyWest i2c @0x%08x irq %d %pOF\n",
+	       *addrp, host->irq, np);
+
+	return host;
+}
+
+
+static void __init kw_i2c_add(struct pmac_i2c_host_kw *host,
+			      struct device_node *controller,
+			      struct device_node *busnode,
+			      int channel)
+{
+	struct pmac_i2c_bus *bus;
+
+	bus = kzalloc(sizeof(struct pmac_i2c_bus), GFP_KERNEL);
+	if (bus == NULL)
+		return;
+
+	bus->controller = of_node_get(controller);
+	bus->busnode = of_node_get(busnode);
+	bus->type = pmac_i2c_bus_keywest;
+	bus->hostdata = host;
+	bus->channel = channel;
+	bus->mode = pmac_i2c_mode_std;
+	bus->open = kw_i2c_open;
+	bus->close = kw_i2c_close;
+	bus->xfer = kw_i2c_xfer;
+	mutex_init(&bus->mutex);
+	lockdep_register_key(&bus->lock_key);
+	lockdep_set_class(&bus->mutex, &bus->lock_key);
+	if (controller == busnode)
+		bus->flags = pmac_i2c_multibus;
+	list_add(&bus->link, &pmac_i2c_busses);
+
+	printk(KERN_INFO " channel %d bus %s\n", channel,
+	       (controller == busnode) ? "<multibus>" : busnode->full_name);
+}
+
+static void __init kw_i2c_probe(void)
+{
+	struct device_node *np, *child, *parent;
+
+	/* Probe keywest-i2c busses */
+	for_each_compatible_node(np, "i2c","keywest-i2c") {
+		struct pmac_i2c_host_kw *host;
+		int multibus;
+
+		/* Found one, init a host structure */
+		host = kw_i2c_host_init(np);
+		if (host == NULL)
+			continue;
+
+		/* Now check if we have a multibus setup (old style) or if we
+		 * have proper bus nodes. Note that the "new" way (proper bus
+		 * nodes) might cause us to not create some busses that are
+		 * kept hidden in the device-tree. In the future, we might
+		 * want to work around that by creating busses without a node
+		 * but not for now
+		 */
+		child = of_get_next_child(np, NULL);
+		multibus = !of_node_name_eq(child, "i2c-bus");
+		of_node_put(child);
+
+		/* For a multibus setup, we get the bus count based on the
+		 * parent type
+		 */
+		if (multibus) {
+			int chans, i;
+
+			parent = of_get_parent(np);
+			if (parent == NULL)
+				continue;
+			chans = parent->name[0] == 'u' ? 2 : 1;
+			of_node_put(parent);
+			for (i = 0; i < chans; i++)
+				kw_i2c_add(host, np, np, i);
+		} else {
+			for_each_child_of_node(np, child) {
+				const u32 *reg = of_get_property(child,
+						"reg", NULL);
+				if (reg == NULL)
+					continue;
+				kw_i2c_add(host, np, child, *reg);
+			}
+		}
+	}
+}
+
+
+/*
+ *
+ * PMU implementation
+ *
+ */
+
+#ifdef CONFIG_ADB_PMU
+
+/*
+ * i2c command block to the PMU
+ */
+struct pmu_i2c_hdr {
+	u8	bus;
+	u8	mode;
+	u8	bus2;
+	u8	address;
+	u8	sub_addr;
+	u8	comb_addr;
+	u8	count;
+	u8	data[];
+};
+
+static void pmu_i2c_complete(struct adb_request *req)
+{
+	complete(req->arg);
+}
+
+static int pmu_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
+			u32 subaddr, u8 *data, int len)
+{
+	struct adb_request *req = bus->hostdata;
+	struct pmu_i2c_hdr *hdr = (struct pmu_i2c_hdr *)&req->data[1];
+	struct completion comp;
+	int read = addrdir & 1;
+	int retry;
+	int rc = 0;
+
+	/* For now, limit ourselves to 16 bytes transfers */
+	if (len > 16)
+		return -EINVAL;
+
+	init_completion(&comp);
+
+	for (retry = 0; retry < 16; retry++) {
+		memset(req, 0, sizeof(struct adb_request));
+		hdr->bus = bus->channel;
+		hdr->count = len;
+
+		switch(bus->mode) {
+		case pmac_i2c_mode_std:
+			if (subsize != 0)
+				return -EINVAL;
+			hdr->address = addrdir;
+			hdr->mode = PMU_I2C_MODE_SIMPLE;
+			break;
+		case pmac_i2c_mode_stdsub:
+		case pmac_i2c_mode_combined:
+			if (subsize != 1)
+				return -EINVAL;
+			hdr->address = addrdir & 0xfe;
+			hdr->comb_addr = addrdir;
+			hdr->sub_addr = subaddr;
+			if (bus->mode == pmac_i2c_mode_stdsub)
+				hdr->mode = PMU_I2C_MODE_STDSUB;
+			else
+				hdr->mode = PMU_I2C_MODE_COMBINED;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		reinit_completion(&comp);
+		req->data[0] = PMU_I2C_CMD;
+		req->reply[0] = 0xff;
+		req->nbytes = sizeof(struct pmu_i2c_hdr) + 1;
+		req->done = pmu_i2c_complete;
+		req->arg = &comp;
+		if (!read && len) {
+			memcpy(hdr->data, data, len);
+			req->nbytes += len;
+		}
+		rc = pmu_queue_request(req);
+		if (rc)
+			return rc;
+		wait_for_completion(&comp);
+		if (req->reply[0] == PMU_I2C_STATUS_OK)
+			break;
+		msleep(15);
+	}
+	if (req->reply[0] != PMU_I2C_STATUS_OK)
+		return -EIO;
+
+	for (retry = 0; retry < 16; retry++) {
+		memset(req, 0, sizeof(struct adb_request));
+
+		/* I know that looks like a lot, slow as hell, but darwin
+		 * does it so let's be on the safe side for now
+		 */
+		msleep(15);
+
+		hdr->bus = PMU_I2C_BUS_STATUS;
+
+		reinit_completion(&comp);
+		req->data[0] = PMU_I2C_CMD;
+		req->reply[0] = 0xff;
+		req->nbytes = 2;
+		req->done = pmu_i2c_complete;
+		req->arg = &comp;
+		rc = pmu_queue_request(req);
+		if (rc)
+			return rc;
+		wait_for_completion(&comp);
+
+		if (req->reply[0] == PMU_I2C_STATUS_OK && !read)
+			return 0;
+		if (req->reply[0] == PMU_I2C_STATUS_DATAREAD && read) {
+			int rlen = req->reply_len - 1;
+
+			if (rlen != len) {
+				printk(KERN_WARNING "low_i2c: PMU returned %d"
+				       " bytes, expected %d !\n", rlen, len);
+				return -EIO;
+			}
+			if (len)
+				memcpy(data, &req->reply[1], len);
+			return 0;
+		}
+	}
+	return -EIO;
+}
+
+static void __init pmu_i2c_probe(void)
+{
+	struct pmac_i2c_bus *bus;
+	struct device_node *busnode;
+	int channel, sz;
+
+	if (!pmu_present())
+		return;
+
+	/* There might or might not be a "pmu-i2c" node, we use that
+	 * or via-pmu itself, whatever we find. I haven't seen a machine
+	 * with separate bus nodes, so we assume a multibus setup
+	 */
+	busnode = of_find_node_by_name(NULL, "pmu-i2c");
+	if (busnode == NULL)
+		busnode = of_find_node_by_name(NULL, "via-pmu");
+	if (busnode == NULL)
+		return;
+
+	printk(KERN_INFO "PMU i2c %pOF\n", busnode);
+
+	/*
+	 * We add bus 1 and 2 only for now, bus 0 is "special"
+	 */
+	for (channel = 1; channel <= 2; channel++) {
+		sz = sizeof(struct pmac_i2c_bus) + sizeof(struct adb_request);
+		bus = kzalloc(sz, GFP_KERNEL);
+		if (bus == NULL)
+			return;
+
+		bus->controller = busnode;
+		bus->busnode = busnode;
+		bus->type = pmac_i2c_bus_pmu;
+		bus->channel = channel;
+		bus->mode = pmac_i2c_mode_std;
+		bus->hostdata = bus + 1;
+		bus->xfer = pmu_i2c_xfer;
+		mutex_init(&bus->mutex);
+		lockdep_register_key(&bus->lock_key);
+		lockdep_set_class(&bus->mutex, &bus->lock_key);
+		bus->flags = pmac_i2c_multibus;
+		list_add(&bus->link, &pmac_i2c_busses);
+
+		printk(KERN_INFO " channel %d bus <multibus>\n", channel);
+	}
+}
+
+#endif /* CONFIG_ADB_PMU */
+
+
+/*
+ *
+ * SMU implementation
+ *
+ */
+
+#ifdef CONFIG_PMAC_SMU
+
+static void smu_i2c_complete(struct smu_i2c_cmd *cmd, void *misc)
+{
+	complete(misc);
+}
+
+static int smu_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
+			u32 subaddr, u8 *data, int len)
+{
+	struct smu_i2c_cmd *cmd = bus->hostdata;
+	struct completion comp;
+	int read = addrdir & 1;
+	int rc = 0;
+
+	if ((read && len > SMU_I2C_READ_MAX) ||
+	    ((!read) && len > SMU_I2C_WRITE_MAX))
+		return -EINVAL;
+
+	memset(cmd, 0, sizeof(struct smu_i2c_cmd));
+	cmd->info.bus = bus->channel;
+	cmd->info.devaddr = addrdir;
+	cmd->info.datalen = len;
+
+	switch(bus->mode) {
+	case pmac_i2c_mode_std:
+		if (subsize != 0)
+			return -EINVAL;
+		cmd->info.type = SMU_I2C_TRANSFER_SIMPLE;
+		break;
+	case pmac_i2c_mode_stdsub:
+	case pmac_i2c_mode_combined:
+		if (subsize > 3 || subsize < 1)
+			return -EINVAL;
+		cmd->info.sublen = subsize;
+		/* that's big-endian only but heh ! */
+		memcpy(&cmd->info.subaddr, ((char *)&subaddr) + (4 - subsize),
+		       subsize);
+		if (bus->mode == pmac_i2c_mode_stdsub)
+			cmd->info.type = SMU_I2C_TRANSFER_STDSUB;
+		else
+			cmd->info.type = SMU_I2C_TRANSFER_COMBINED;
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (!read && len)
+		memcpy(cmd->info.data, data, len);
+
+	init_completion(&comp);
+	cmd->done = smu_i2c_complete;
+	cmd->misc = &comp;
+	rc = smu_queue_i2c(cmd);
+	if (rc < 0)
+		return rc;
+	wait_for_completion(&comp);
+	rc = cmd->status;
+
+	if (read && len)
+		memcpy(data, cmd->info.data, len);
+	return rc < 0 ? rc : 0;
+}
+
+static void __init smu_i2c_probe(void)
+{
+	struct device_node *controller, *busnode;
+	struct pmac_i2c_bus *bus;
+	const u32 *reg;
+	int sz;
+
+	if (!smu_present())
+		return;
+
+	controller = of_find_node_by_name(NULL, "smu-i2c-control");
+	if (controller == NULL)
+		controller = of_find_node_by_name(NULL, "smu");
+	if (controller == NULL)
+		return;
+
+	printk(KERN_INFO "SMU i2c %pOF\n", controller);
+
+	/* Look for childs, note that they might not be of the right
+	 * type as older device trees mix i2c busses and other things
+	 * at the same level
+	 */
+	for_each_child_of_node(controller, busnode) {
+		if (!of_node_is_type(busnode, "i2c") &&
+		    !of_node_is_type(busnode, "i2c-bus"))
+			continue;
+		reg = of_get_property(busnode, "reg", NULL);
+		if (reg == NULL)
+			continue;
+
+		sz = sizeof(struct pmac_i2c_bus) + sizeof(struct smu_i2c_cmd);
+		bus = kzalloc(sz, GFP_KERNEL);
+		if (bus == NULL)
+			return;
+
+		bus->controller = controller;
+		bus->busnode = of_node_get(busnode);
+		bus->type = pmac_i2c_bus_smu;
+		bus->channel = *reg;
+		bus->mode = pmac_i2c_mode_std;
+		bus->hostdata = bus + 1;
+		bus->xfer = smu_i2c_xfer;
+		mutex_init(&bus->mutex);
+		lockdep_register_key(&bus->lock_key);
+		lockdep_set_class(&bus->mutex, &bus->lock_key);
+		bus->flags = 0;
+		list_add(&bus->link, &pmac_i2c_busses);
+
+		printk(KERN_INFO " channel %x bus %pOF\n",
+		       bus->channel, busnode);
+	}
+}
+
+#endif /* CONFIG_PMAC_SMU */
+
+/*
+ *
+ * Core code
+ *
+ */
+
+
+struct pmac_i2c_bus *pmac_i2c_find_bus(struct device_node *node)
+{
+	struct device_node *p = of_node_get(node);
+	struct device_node *prev = NULL;
+	struct pmac_i2c_bus *bus;
+
+	while(p) {
+		list_for_each_entry(bus, &pmac_i2c_busses, link) {
+			if (p == bus->busnode) {
+				if (prev && bus->flags & pmac_i2c_multibus) {
+					const u32 *reg;
+					reg = of_get_property(prev, "reg",
+								NULL);
+					if (!reg)
+						continue;
+					if (((*reg) >> 8) != bus->channel)
+						continue;
+				}
+				of_node_put(p);
+				of_node_put(prev);
+				return bus;
+			}
+		}
+		of_node_put(prev);
+		prev = p;
+		p = of_get_parent(p);
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_find_bus);
+
+u8 pmac_i2c_get_dev_addr(struct device_node *device)
+{
+	const u32 *reg = of_get_property(device, "reg", NULL);
+
+	if (reg == NULL)
+		return 0;
+
+	return (*reg) & 0xff;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_dev_addr);
+
+struct device_node *pmac_i2c_get_controller(struct pmac_i2c_bus *bus)
+{
+	return bus->controller;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_controller);
+
+struct device_node *pmac_i2c_get_bus_node(struct pmac_i2c_bus *bus)
+{
+	return bus->busnode;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_bus_node);
+
+int pmac_i2c_get_type(struct pmac_i2c_bus *bus)
+{
+	return bus->type;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_type);
+
+int pmac_i2c_get_flags(struct pmac_i2c_bus *bus)
+{
+	return bus->flags;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_flags);
+
+int pmac_i2c_get_channel(struct pmac_i2c_bus *bus)
+{
+	return bus->channel;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_channel);
+
+
+struct i2c_adapter *pmac_i2c_get_adapter(struct pmac_i2c_bus *bus)
+{
+	return &bus->adapter;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_adapter);
+
+struct pmac_i2c_bus *pmac_i2c_adapter_to_bus(struct i2c_adapter *adapter)
+{
+	struct pmac_i2c_bus *bus;
+
+	list_for_each_entry(bus, &pmac_i2c_busses, link)
+		if (&bus->adapter == adapter)
+			return bus;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_adapter_to_bus);
+
+int pmac_i2c_match_adapter(struct device_node *dev, struct i2c_adapter *adapter)
+{
+	struct pmac_i2c_bus *bus = pmac_i2c_find_bus(dev);
+
+	if (bus == NULL)
+		return 0;
+	return (&bus->adapter == adapter);
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_match_adapter);
+
+int pmac_low_i2c_lock(struct device_node *np)
+{
+	struct pmac_i2c_bus *bus, *found = NULL;
+
+	list_for_each_entry(bus, &pmac_i2c_busses, link) {
+		if (np == bus->controller) {
+			found = bus;
+			break;
+		}
+	}
+	if (!found)
+		return -ENODEV;
+	return pmac_i2c_open(bus, 0);
+}
+EXPORT_SYMBOL_GPL(pmac_low_i2c_lock);
+
+int pmac_low_i2c_unlock(struct device_node *np)
+{
+	struct pmac_i2c_bus *bus, *found = NULL;
+
+	list_for_each_entry(bus, &pmac_i2c_busses, link) {
+		if (np == bus->controller) {
+			found = bus;
+			break;
+		}
+	}
+	if (!found)
+		return -ENODEV;
+	pmac_i2c_close(bus);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pmac_low_i2c_unlock);
+
+
+int pmac_i2c_open(struct pmac_i2c_bus *bus, int polled)
+{
+	int rc;
+
+	mutex_lock(&bus->mutex);
+	bus->polled = polled || pmac_i2c_force_poll;
+	bus->opened = 1;
+	bus->mode = pmac_i2c_mode_std;
+	if (bus->open && (rc = bus->open(bus)) != 0) {
+		bus->opened = 0;
+		mutex_unlock(&bus->mutex);
+		return rc;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_open);
+
+void pmac_i2c_close(struct pmac_i2c_bus *bus)
+{
+	WARN_ON(!bus->opened);
+	if (bus->close)
+		bus->close(bus);
+	bus->opened = 0;
+	mutex_unlock(&bus->mutex);
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_close);
+
+int pmac_i2c_setmode(struct pmac_i2c_bus *bus, int mode)
+{
+	WARN_ON(!bus->opened);
+
+	/* Report me if you see the error below as there might be a new
+	 * "combined4" mode that I need to implement for the SMU bus
+	 */
+	if (mode < pmac_i2c_mode_dumb || mode > pmac_i2c_mode_combined) {
+		printk(KERN_ERR "low_i2c: Invalid mode %d requested on"
+		       " bus %pOF !\n", mode, bus->busnode);
+		return -EINVAL;
+	}
+	bus->mode = mode;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_setmode);
+
+int pmac_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
+		  u32 subaddr, u8 *data, int len)
+{
+	int rc;
+
+	WARN_ON(!bus->opened);
+
+	DBG("xfer() chan=%d, addrdir=0x%x, mode=%d, subsize=%d, subaddr=0x%x,"
+	    " %d bytes, bus %pOF\n", bus->channel, addrdir, bus->mode, subsize,
+	    subaddr, len, bus->busnode);
+
+	rc = bus->xfer(bus, addrdir, subsize, subaddr, data, len);
+
+#ifdef DEBUG
+	if (rc)
+		DBG("xfer error %d\n", rc);
+#endif
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_xfer);
+
+/* some quirks for platform function decoding */
+enum {
+	pmac_i2c_quirk_invmask = 0x00000001u,
+	pmac_i2c_quirk_skip = 0x00000002u,
+};
+
+static void pmac_i2c_devscan(void (*callback)(struct device_node *dev,
+					      int quirks))
+{
+	struct pmac_i2c_bus *bus;
+	struct device_node *np;
+	static struct whitelist_ent {
+		char *name;
+		char *compatible;
+		int quirks;
+	} whitelist[] = {
+		/* XXX Study device-tree's & apple drivers are get the quirks
+		 * right !
+		 */
+		/* Workaround: It seems that running the clockspreading
+		 * properties on the eMac will cause lockups during boot.
+		 * The machine seems to work fine without that. So for now,
+		 * let's make sure i2c-hwclock doesn't match about "imic"
+		 * clocks and we'll figure out if we really need to do
+		 * something special about those later.
+		 */
+		{ "i2c-hwclock", "imic5002", pmac_i2c_quirk_skip },
+		{ "i2c-hwclock", "imic5003", pmac_i2c_quirk_skip },
+		{ "i2c-hwclock", NULL, pmac_i2c_quirk_invmask },
+		{ "i2c-cpu-voltage", NULL, 0},
+		{  "temp-monitor", NULL, 0 },
+		{  "supply-monitor", NULL, 0 },
+		{ NULL, NULL, 0 },
+	};
+
+	/* Only some devices need to have platform functions instantiated
+	 * here. For now, we have a table. Others, like 9554 i2c GPIOs used
+	 * on Xserve, if we ever do a driver for them, will use their own
+	 * platform function instance
+	 */
+	list_for_each_entry(bus, &pmac_i2c_busses, link) {
+		for_each_child_of_node(bus->busnode, np) {
+			struct whitelist_ent *p;
+			/* If multibus, check if device is on that bus */
+			if (bus->flags & pmac_i2c_multibus)
+				if (bus != pmac_i2c_find_bus(np))
+					continue;
+			for (p = whitelist; p->name != NULL; p++) {
+				if (!of_node_name_eq(np, p->name))
+					continue;
+				if (p->compatible &&
+				    !of_device_is_compatible(np, p->compatible))
+					continue;
+				if (p->quirks & pmac_i2c_quirk_skip)
+					break;
+				callback(np, p->quirks);
+				break;
+			}
+		}
+	}
+}
+
+#define MAX_I2C_DATA	64
+
+struct pmac_i2c_pf_inst
+{
+	struct pmac_i2c_bus	*bus;
+	u8			addr;
+	u8			buffer[MAX_I2C_DATA];
+	u8			scratch[MAX_I2C_DATA];
+	int			bytes;
+	int			quirks;
+};
+
+static void* pmac_i2c_do_begin(struct pmf_function *func, struct pmf_args *args)
+{
+	struct pmac_i2c_pf_inst *inst;
+	struct pmac_i2c_bus	*bus;
+
+	bus = pmac_i2c_find_bus(func->node);
+	if (bus == NULL) {
+		printk(KERN_ERR "low_i2c: Can't find bus for %pOF (pfunc)\n",
+		       func->node);
+		return NULL;
+	}
+	if (pmac_i2c_open(bus, 0)) {
+		printk(KERN_ERR "low_i2c: Can't open i2c bus for %pOF (pfunc)\n",
+		       func->node);
+		return NULL;
+	}
+
+	/* XXX might need GFP_ATOMIC when called during the suspend process,
+	 * but then, there are already lots of issues with suspending when
+	 * near OOM that need to be resolved, the allocator itself should
+	 * probably make GFP_NOIO implicit during suspend
+	 */
+	inst = kzalloc(sizeof(struct pmac_i2c_pf_inst), GFP_KERNEL);
+	if (inst == NULL) {
+		pmac_i2c_close(bus);
+		return NULL;
+	}
+	inst->bus = bus;
+	inst->addr = pmac_i2c_get_dev_addr(func->node);
+	inst->quirks = (int)(long)func->driver_data;
+	return inst;
+}
+
+static void pmac_i2c_do_end(struct pmf_function *func, void *instdata)
+{
+	struct pmac_i2c_pf_inst *inst = instdata;
+
+	if (inst == NULL)
+		return;
+	pmac_i2c_close(inst->bus);
+	kfree(inst);
+}
+
+static int pmac_i2c_do_read(PMF_STD_ARGS, u32 len)
+{
+	struct pmac_i2c_pf_inst *inst = instdata;
+
+	inst->bytes = len;
+	return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_read, 0, 0,
+			     inst->buffer, len);
+}
+
+static int pmac_i2c_do_write(PMF_STD_ARGS, u32 len, const u8 *data)
+{
+	struct pmac_i2c_pf_inst *inst = instdata;
+
+	return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_write, 0, 0,
+			     (u8 *)data, len);
+}
+
+/* This function is used to do the masking & OR'ing for the "rmw" type
+ * callbacks. Ze should apply the mask and OR in the values in the
+ * buffer before writing back. The problem is that it seems that
+ * various darwin drivers implement the mask/or differently, thus
+ * we need to check the quirks first
+ */
+static void pmac_i2c_do_apply_rmw(struct pmac_i2c_pf_inst *inst,
+				  u32 len, const u8 *mask, const u8 *val)
+{
+	int i;
+
+	if (inst->quirks & pmac_i2c_quirk_invmask) {
+		for (i = 0; i < len; i ++)
+			inst->scratch[i] = (inst->buffer[i] & mask[i]) | val[i];
+	} else {
+		for (i = 0; i < len; i ++)
+			inst->scratch[i] = (inst->buffer[i] & ~mask[i])
+				| (val[i] & mask[i]);
+	}
+}
+
+static int pmac_i2c_do_rmw(PMF_STD_ARGS, u32 masklen, u32 valuelen,
+			   u32 totallen, const u8 *maskdata,
+			   const u8 *valuedata)
+{
+	struct pmac_i2c_pf_inst *inst = instdata;
+
+	if (masklen > inst->bytes || valuelen > inst->bytes ||
+	    totallen > inst->bytes || valuelen > masklen)
+		return -EINVAL;
+
+	pmac_i2c_do_apply_rmw(inst, masklen, maskdata, valuedata);
+
+	return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_write, 0, 0,
+			     inst->scratch, totallen);
+}
+
+static int pmac_i2c_do_read_sub(PMF_STD_ARGS, u8 subaddr, u32 len)
+{
+	struct pmac_i2c_pf_inst *inst = instdata;
+
+	inst->bytes = len;
+	return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_read, 1, subaddr,
+			     inst->buffer, len);
+}
+
+static int pmac_i2c_do_write_sub(PMF_STD_ARGS, u8 subaddr, u32 len,
+				     const u8 *data)
+{
+	struct pmac_i2c_pf_inst *inst = instdata;
+
+	return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_write, 1,
+			     subaddr, (u8 *)data, len);
+}
+
+static int pmac_i2c_do_set_mode(PMF_STD_ARGS, int mode)
+{
+	struct pmac_i2c_pf_inst *inst = instdata;
+
+	return pmac_i2c_setmode(inst->bus, mode);
+}
+
+static int pmac_i2c_do_rmw_sub(PMF_STD_ARGS, u8 subaddr, u32 masklen,
+			       u32 valuelen, u32 totallen, const u8 *maskdata,
+			       const u8 *valuedata)
+{
+	struct pmac_i2c_pf_inst *inst = instdata;
+
+	if (masklen > inst->bytes || valuelen > inst->bytes ||
+	    totallen > inst->bytes || valuelen > masklen)
+		return -EINVAL;
+
+	pmac_i2c_do_apply_rmw(inst, masklen, maskdata, valuedata);
+
+	return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_write, 1,
+			     subaddr, inst->scratch, totallen);
+}
+
+static int pmac_i2c_do_mask_and_comp(PMF_STD_ARGS, u32 len,
+				     const u8 *maskdata,
+				     const u8 *valuedata)
+{
+	struct pmac_i2c_pf_inst *inst = instdata;
+	int i, match;
+
+	/* Get return value pointer, it's assumed to be a u32 */
+	if (!args || !args->count || !args->u[0].p)
+		return -EINVAL;
+
+	/* Check buffer */
+	if (len > inst->bytes)
+		return -EINVAL;
+
+	for (i = 0, match = 1; match && i < len; i ++)
+		if ((inst->buffer[i] & maskdata[i]) != valuedata[i])
+			match = 0;
+	*args->u[0].p = match;
+	return 0;
+}
+
+static int pmac_i2c_do_delay(PMF_STD_ARGS, u32 duration)
+{
+	msleep((duration + 999) / 1000);
+	return 0;
+}
+
+
+static struct pmf_handlers pmac_i2c_pfunc_handlers = {
+	.begin			= pmac_i2c_do_begin,
+	.end			= pmac_i2c_do_end,
+	.read_i2c		= pmac_i2c_do_read,
+	.write_i2c		= pmac_i2c_do_write,
+	.rmw_i2c		= pmac_i2c_do_rmw,
+	.read_i2c_sub		= pmac_i2c_do_read_sub,
+	.write_i2c_sub		= pmac_i2c_do_write_sub,
+	.rmw_i2c_sub		= pmac_i2c_do_rmw_sub,
+	.set_i2c_mode		= pmac_i2c_do_set_mode,
+	.mask_and_compare	= pmac_i2c_do_mask_and_comp,
+	.delay			= pmac_i2c_do_delay,
+};
+
+static void __init pmac_i2c_dev_create(struct device_node *np, int quirks)
+{
+	DBG("dev_create(%pOF)\n", np);
+
+	pmf_register_driver(np, &pmac_i2c_pfunc_handlers,
+			    (void *)(long)quirks);
+}
+
+static void __init pmac_i2c_dev_init(struct device_node *np, int quirks)
+{
+	DBG("dev_create(%pOF)\n", np);
+
+	pmf_do_functions(np, NULL, 0, PMF_FLAGS_ON_INIT, NULL);
+}
+
+static void pmac_i2c_dev_suspend(struct device_node *np, int quirks)
+{
+	DBG("dev_suspend(%pOF)\n", np);
+	pmf_do_functions(np, NULL, 0, PMF_FLAGS_ON_SLEEP, NULL);
+}
+
+static void pmac_i2c_dev_resume(struct device_node *np, int quirks)
+{
+	DBG("dev_resume(%pOF)\n", np);
+	pmf_do_functions(np, NULL, 0, PMF_FLAGS_ON_WAKE, NULL);
+}
+
+void pmac_pfunc_i2c_suspend(void)
+{
+	pmac_i2c_devscan(pmac_i2c_dev_suspend);
+}
+
+void pmac_pfunc_i2c_resume(void)
+{
+	pmac_i2c_devscan(pmac_i2c_dev_resume);
+}
+
+/*
+ * Initialize us: probe all i2c busses on the machine, instantiate
+ * busses and platform functions as needed.
+ */
+/* This is non-static as it might be called early by smp code */
+int __init pmac_i2c_init(void)
+{
+	static int i2c_inited;
+
+	if (i2c_inited)
+		return 0;
+	i2c_inited = 1;
+
+	/* Probe keywest-i2c busses */
+	kw_i2c_probe();
+
+#ifdef CONFIG_ADB_PMU
+	/* Probe PMU i2c busses */
+	pmu_i2c_probe();
+#endif
+
+#ifdef CONFIG_PMAC_SMU
+	/* Probe SMU i2c busses */
+	smu_i2c_probe();
+#endif
+
+	/* Now add platform functions for some known devices */
+	pmac_i2c_devscan(pmac_i2c_dev_create);
+
+	return 0;
+}
+machine_arch_initcall(powermac, pmac_i2c_init);
+
+/* Since pmac_i2c_init can be called too early for the platform device
+ * registration, we need to do it at a later time. In our case, subsys
+ * happens to fit well, though I agree it's a bit of a hack...
+ */
+static int __init pmac_i2c_create_platform_devices(void)
+{
+	struct pmac_i2c_bus *bus;
+	int i = 0;
+
+	/* In the case where we are initialized from smp_init(), we must
+	 * not use the timer (and thus the irq). It's safe from now on
+	 * though
+	 */
+	pmac_i2c_force_poll = 0;
+
+	/* Create platform devices */
+	list_for_each_entry(bus, &pmac_i2c_busses, link) {
+		bus->platform_dev =
+			platform_device_alloc("i2c-powermac", i++);
+		if (bus->platform_dev == NULL)
+			return -ENOMEM;
+		bus->platform_dev->dev.platform_data = bus;
+		bus->platform_dev->dev.of_node = bus->busnode;
+		platform_device_add(bus->platform_dev);
+	}
+
+	/* Now call platform "init" functions */
+	pmac_i2c_devscan(pmac_i2c_dev_init);
+
+	return 0;
+}
+machine_subsys_initcall(powermac, pmac_i2c_create_platform_devices);
diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c
new file mode 100644
index 0000000000..fe2e0249cb
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/nvram.c
@@ -0,0 +1,656 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Copyright (C) 2002 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  Todo: - add support for the OF persistent properties
+ */
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/nvram.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/memblock.h>
+#include <linux/completion.h>
+#include <linux/spinlock.h>
+#include <linux/of_address.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/nvram.h>
+
+#include "pmac.h"
+
+#define DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+#define NVRAM_SIZE		0x2000	/* 8kB of non-volatile RAM */
+
+#define CORE99_SIGNATURE	0x5a
+#define CORE99_ADLER_START	0x14
+
+/* On Core99, nvram is either a sharp, a micron or an AMD flash */
+#define SM_FLASH_STATUS_DONE	0x80
+#define SM_FLASH_STATUS_ERR	0x38
+
+#define SM_FLASH_CMD_ERASE_CONFIRM	0xd0
+#define SM_FLASH_CMD_ERASE_SETUP	0x20
+#define SM_FLASH_CMD_RESET		0xff
+#define SM_FLASH_CMD_WRITE_SETUP	0x40
+#define SM_FLASH_CMD_CLEAR_STATUS	0x50
+#define SM_FLASH_CMD_READ_STATUS	0x70
+
+/* CHRP NVRAM header */
+struct chrp_header {
+  u8		signature;
+  u8		cksum;
+  u16		len;
+  char          name[12];
+  u8		data[];
+};
+
+struct core99_header {
+  struct chrp_header	hdr;
+  u32			adler;
+  u32			generation;
+  u32			reserved[2];
+};
+
+/*
+ * Read and write the non-volatile RAM on PowerMacs and CHRP machines.
+ */
+static int nvram_naddrs;
+static volatile unsigned char __iomem *nvram_data;
+static int is_core_99;
+static int core99_bank;
+static int nvram_partitions[3];
+// XXX Turn that into a sem
+static DEFINE_RAW_SPINLOCK(nv_lock);
+
+static int (*core99_write_bank)(int bank, u8* datas);
+static int (*core99_erase_bank)(int bank);
+
+static char *nvram_image;
+
+
+static unsigned char core99_nvram_read_byte(int addr)
+{
+	if (nvram_image == NULL)
+		return 0xff;
+	return nvram_image[addr];
+}
+
+static void core99_nvram_write_byte(int addr, unsigned char val)
+{
+	if (nvram_image == NULL)
+		return;
+	nvram_image[addr] = val;
+}
+
+static ssize_t core99_nvram_read(char *buf, size_t count, loff_t *index)
+{
+	int i;
+
+	if (nvram_image == NULL)
+		return -ENODEV;
+	if (*index > NVRAM_SIZE)
+		return 0;
+
+	i = *index;
+	if (i + count > NVRAM_SIZE)
+		count = NVRAM_SIZE - i;
+
+	memcpy(buf, &nvram_image[i], count);
+	*index = i + count;
+	return count;
+}
+
+static ssize_t core99_nvram_write(char *buf, size_t count, loff_t *index)
+{
+	int i;
+
+	if (nvram_image == NULL)
+		return -ENODEV;
+	if (*index > NVRAM_SIZE)
+		return 0;
+
+	i = *index;
+	if (i + count > NVRAM_SIZE)
+		count = NVRAM_SIZE - i;
+
+	memcpy(&nvram_image[i], buf, count);
+	*index = i + count;
+	return count;
+}
+
+static ssize_t core99_nvram_size(void)
+{
+	if (nvram_image == NULL)
+		return -ENODEV;
+	return NVRAM_SIZE;
+}
+
+#ifdef CONFIG_PPC32
+static volatile unsigned char __iomem *nvram_addr;
+static int nvram_mult;
+
+static ssize_t ppc32_nvram_size(void)
+{
+	return NVRAM_SIZE;
+}
+
+static unsigned char direct_nvram_read_byte(int addr)
+{
+	return in_8(&nvram_data[(addr & (NVRAM_SIZE - 1)) * nvram_mult]);
+}
+
+static void direct_nvram_write_byte(int addr, unsigned char val)
+{
+	out_8(&nvram_data[(addr & (NVRAM_SIZE - 1)) * nvram_mult], val);
+}
+
+
+static unsigned char indirect_nvram_read_byte(int addr)
+{
+	unsigned char val;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&nv_lock, flags);
+	out_8(nvram_addr, addr >> 5);
+	val = in_8(&nvram_data[(addr & 0x1f) << 4]);
+	raw_spin_unlock_irqrestore(&nv_lock, flags);
+
+	return val;
+}
+
+static void indirect_nvram_write_byte(int addr, unsigned char val)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&nv_lock, flags);
+	out_8(nvram_addr, addr >> 5);
+	out_8(&nvram_data[(addr & 0x1f) << 4], val);
+	raw_spin_unlock_irqrestore(&nv_lock, flags);
+}
+
+
+#ifdef CONFIG_ADB_PMU
+
+static void pmu_nvram_complete(struct adb_request *req)
+{
+	if (req->arg)
+		complete((struct completion *)req->arg);
+}
+
+static unsigned char pmu_nvram_read_byte(int addr)
+{
+	struct adb_request req;
+	DECLARE_COMPLETION_ONSTACK(req_complete);
+	
+	req.arg = system_state == SYSTEM_RUNNING ? &req_complete : NULL;
+	if (pmu_request(&req, pmu_nvram_complete, 3, PMU_READ_NVRAM,
+			(addr >> 8) & 0xff, addr & 0xff))
+		return 0xff;
+	if (system_state == SYSTEM_RUNNING)
+		wait_for_completion(&req_complete);
+	while (!req.complete)
+		pmu_poll();
+	return req.reply[0];
+}
+
+static void pmu_nvram_write_byte(int addr, unsigned char val)
+{
+	struct adb_request req;
+	DECLARE_COMPLETION_ONSTACK(req_complete);
+	
+	req.arg = system_state == SYSTEM_RUNNING ? &req_complete : NULL;
+	if (pmu_request(&req, pmu_nvram_complete, 4, PMU_WRITE_NVRAM,
+			(addr >> 8) & 0xff, addr & 0xff, val))
+		return;
+	if (system_state == SYSTEM_RUNNING)
+		wait_for_completion(&req_complete);
+	while (!req.complete)
+		pmu_poll();
+}
+
+#endif /* CONFIG_ADB_PMU */
+#endif /* CONFIG_PPC32 */
+
+static u8 chrp_checksum(struct chrp_header* hdr)
+{
+	u8 *ptr;
+	u16 sum = hdr->signature;
+	for (ptr = (u8 *)&hdr->len; ptr < hdr->data; ptr++)
+		sum += *ptr;
+	while (sum > 0xFF)
+		sum = (sum & 0xFF) + (sum>>8);
+	return sum;
+}
+
+static u32 core99_calc_adler(u8 *buffer)
+{
+	int cnt;
+	u32 low, high;
+
+   	buffer += CORE99_ADLER_START;
+	low = 1;
+	high = 0;
+	for (cnt=0; cnt<(NVRAM_SIZE-CORE99_ADLER_START); cnt++) {
+		if ((cnt % 5000) == 0) {
+			high  %= 65521UL;
+			high %= 65521UL;
+		}
+		low += buffer[cnt];
+		high += low;
+	}
+	low  %= 65521UL;
+	high %= 65521UL;
+
+	return (high << 16) | low;
+}
+
+static u32 __init core99_check(u8 *datas)
+{
+	struct core99_header* hdr99 = (struct core99_header*)datas;
+
+	if (hdr99->hdr.signature != CORE99_SIGNATURE) {
+		DBG("Invalid signature\n");
+		return 0;
+	}
+	if (hdr99->hdr.cksum != chrp_checksum(&hdr99->hdr)) {
+		DBG("Invalid checksum\n");
+		return 0;
+	}
+	if (hdr99->adler != core99_calc_adler(datas)) {
+		DBG("Invalid adler\n");
+		return 0;
+	}
+	return hdr99->generation;
+}
+
+static int sm_erase_bank(int bank)
+{
+	int stat;
+	unsigned long timeout;
+
+	u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE;
+
+       	DBG("nvram: Sharp/Micron Erasing bank %d...\n", bank);
+
+	out_8(base, SM_FLASH_CMD_ERASE_SETUP);
+	out_8(base, SM_FLASH_CMD_ERASE_CONFIRM);
+	timeout = 0;
+	do {
+		if (++timeout > 1000000) {
+			printk(KERN_ERR "nvram: Sharp/Micron flash erase timeout !\n");
+			break;
+		}
+		out_8(base, SM_FLASH_CMD_READ_STATUS);
+		stat = in_8(base);
+	} while (!(stat & SM_FLASH_STATUS_DONE));
+
+	out_8(base, SM_FLASH_CMD_CLEAR_STATUS);
+	out_8(base, SM_FLASH_CMD_RESET);
+
+	if (memchr_inv(base, 0xff, NVRAM_SIZE)) {
+		printk(KERN_ERR "nvram: Sharp/Micron flash erase failed !\n");
+		return -ENXIO;
+	}
+	return 0;
+}
+
+static int sm_write_bank(int bank, u8* datas)
+{
+	int i, stat = 0;
+	unsigned long timeout;
+
+	u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE;
+
+       	DBG("nvram: Sharp/Micron Writing bank %d...\n", bank);
+
+	for (i=0; i<NVRAM_SIZE; i++) {
+		out_8(base+i, SM_FLASH_CMD_WRITE_SETUP);
+		udelay(1);
+		out_8(base+i, datas[i]);
+		timeout = 0;
+		do {
+			if (++timeout > 1000000) {
+				printk(KERN_ERR "nvram: Sharp/Micron flash write timeout !\n");
+				break;
+			}
+			out_8(base, SM_FLASH_CMD_READ_STATUS);
+			stat = in_8(base);
+		} while (!(stat & SM_FLASH_STATUS_DONE));
+		if (!(stat & SM_FLASH_STATUS_DONE))
+			break;
+	}
+	out_8(base, SM_FLASH_CMD_CLEAR_STATUS);
+	out_8(base, SM_FLASH_CMD_RESET);
+	if (memcmp(base, datas, NVRAM_SIZE)) {
+		printk(KERN_ERR "nvram: Sharp/Micron flash write failed !\n");
+		return -ENXIO;
+	}
+	return 0;
+}
+
+static int amd_erase_bank(int bank)
+{
+	int stat = 0;
+	unsigned long timeout;
+
+	u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE;
+
+       	DBG("nvram: AMD Erasing bank %d...\n", bank);
+
+	/* Unlock 1 */
+	out_8(base+0x555, 0xaa);
+	udelay(1);
+	/* Unlock 2 */
+	out_8(base+0x2aa, 0x55);
+	udelay(1);
+
+	/* Sector-Erase */
+	out_8(base+0x555, 0x80);
+	udelay(1);
+	out_8(base+0x555, 0xaa);
+	udelay(1);
+	out_8(base+0x2aa, 0x55);
+	udelay(1);
+	out_8(base, 0x30);
+	udelay(1);
+
+	timeout = 0;
+	do {
+		if (++timeout > 1000000) {
+			printk(KERN_ERR "nvram: AMD flash erase timeout !\n");
+			break;
+		}
+		stat = in_8(base) ^ in_8(base);
+	} while (stat != 0);
+	
+	/* Reset */
+	out_8(base, 0xf0);
+	udelay(1);
+
+	if (memchr_inv(base, 0xff, NVRAM_SIZE)) {
+		printk(KERN_ERR "nvram: AMD flash erase failed !\n");
+		return -ENXIO;
+	}
+	return 0;
+}
+
+static int amd_write_bank(int bank, u8* datas)
+{
+	int i, stat = 0;
+	unsigned long timeout;
+
+	u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE;
+
+       	DBG("nvram: AMD Writing bank %d...\n", bank);
+
+	for (i=0; i<NVRAM_SIZE; i++) {
+		/* Unlock 1 */
+		out_8(base+0x555, 0xaa);
+		udelay(1);
+		/* Unlock 2 */
+		out_8(base+0x2aa, 0x55);
+		udelay(1);
+
+		/* Write single word */
+		out_8(base+0x555, 0xa0);
+		udelay(1);
+		out_8(base+i, datas[i]);
+		
+		timeout = 0;
+		do {
+			if (++timeout > 1000000) {
+				printk(KERN_ERR "nvram: AMD flash write timeout !\n");
+				break;
+			}
+			stat = in_8(base) ^ in_8(base);
+		} while (stat != 0);
+		if (stat != 0)
+			break;
+	}
+
+	/* Reset */
+	out_8(base, 0xf0);
+	udelay(1);
+
+	if (memcmp(base, datas, NVRAM_SIZE)) {
+		printk(KERN_ERR "nvram: AMD flash write failed !\n");
+		return -ENXIO;
+	}
+	return 0;
+}
+
+static void __init lookup_partitions(void)
+{
+	u8 buffer[17];
+	int i, offset;
+	struct chrp_header* hdr;
+
+	if (pmac_newworld) {
+		nvram_partitions[pmac_nvram_OF] = -1;
+		nvram_partitions[pmac_nvram_XPRAM] = -1;
+		nvram_partitions[pmac_nvram_NR] = -1;
+		hdr = (struct chrp_header *)buffer;
+
+		offset = 0;
+		buffer[16] = 0;
+		do {
+			for (i=0;i<16;i++)
+				buffer[i] = ppc_md.nvram_read_val(offset+i);
+			if (!strcmp(hdr->name, "common"))
+				nvram_partitions[pmac_nvram_OF] = offset + 0x10;
+			if (!strcmp(hdr->name, "APL,MacOS75")) {
+				nvram_partitions[pmac_nvram_XPRAM] = offset + 0x10;
+				nvram_partitions[pmac_nvram_NR] = offset + 0x110;
+			}
+			offset += (hdr->len * 0x10);
+		} while(offset < NVRAM_SIZE);
+	} else {
+		nvram_partitions[pmac_nvram_OF] = 0x1800;
+		nvram_partitions[pmac_nvram_XPRAM] = 0x1300;
+		nvram_partitions[pmac_nvram_NR] = 0x1400;
+	}
+	DBG("nvram: OF partition at 0x%x\n", nvram_partitions[pmac_nvram_OF]);
+	DBG("nvram: XP partition at 0x%x\n", nvram_partitions[pmac_nvram_XPRAM]);
+	DBG("nvram: NR partition at 0x%x\n", nvram_partitions[pmac_nvram_NR]);
+}
+
+static void core99_nvram_sync(void)
+{
+	struct core99_header* hdr99;
+	unsigned long flags;
+
+	if (!is_core_99 || !nvram_data || !nvram_image)
+		return;
+
+	raw_spin_lock_irqsave(&nv_lock, flags);
+	if (!memcmp(nvram_image, (u8*)nvram_data + core99_bank*NVRAM_SIZE,
+		NVRAM_SIZE))
+		goto bail;
+
+	DBG("Updating nvram...\n");
+
+	hdr99 = (struct core99_header*)nvram_image;
+	hdr99->generation++;
+	hdr99->hdr.signature = CORE99_SIGNATURE;
+	hdr99->hdr.cksum = chrp_checksum(&hdr99->hdr);
+	hdr99->adler = core99_calc_adler(nvram_image);
+	core99_bank = core99_bank ? 0 : 1;
+	if (core99_erase_bank)
+		if (core99_erase_bank(core99_bank)) {
+			printk("nvram: Error erasing bank %d\n", core99_bank);
+			goto bail;
+		}
+	if (core99_write_bank)
+		if (core99_write_bank(core99_bank, nvram_image))
+			printk("nvram: Error writing bank %d\n", core99_bank);
+ bail:
+	raw_spin_unlock_irqrestore(&nv_lock, flags);
+
+#ifdef DEBUG
+       	mdelay(2000);
+#endif
+}
+
+static int __init core99_nvram_setup(struct device_node *dp, unsigned long addr)
+{
+	int i;
+	u32 gen_bank0, gen_bank1;
+
+	if (nvram_naddrs < 1) {
+		printk(KERN_ERR "nvram: no address\n");
+		return -EINVAL;
+	}
+	nvram_image = memblock_alloc(NVRAM_SIZE, SMP_CACHE_BYTES);
+	if (!nvram_image)
+		panic("%s: Failed to allocate %u bytes\n", __func__,
+		      NVRAM_SIZE);
+	nvram_data = ioremap(addr, NVRAM_SIZE*2);
+	nvram_naddrs = 1; /* Make sure we get the correct case */
+
+	DBG("nvram: Checking bank 0...\n");
+
+	gen_bank0 = core99_check((u8 *)nvram_data);
+	gen_bank1 = core99_check((u8 *)nvram_data + NVRAM_SIZE);
+	core99_bank = (gen_bank0 < gen_bank1) ? 1 : 0;
+
+	DBG("nvram: gen0=%d, gen1=%d\n", gen_bank0, gen_bank1);
+	DBG("nvram: Active bank is: %d\n", core99_bank);
+
+	for (i=0; i<NVRAM_SIZE; i++)
+		nvram_image[i] = nvram_data[i + core99_bank*NVRAM_SIZE];
+
+	ppc_md.nvram_read_val	= core99_nvram_read_byte;
+	ppc_md.nvram_write_val	= core99_nvram_write_byte;
+	ppc_md.nvram_read	= core99_nvram_read;
+	ppc_md.nvram_write	= core99_nvram_write;
+	ppc_md.nvram_size	= core99_nvram_size;
+	ppc_md.nvram_sync	= core99_nvram_sync;
+	ppc_md.machine_shutdown	= core99_nvram_sync;
+	/* 
+	 * Maybe we could be smarter here though making an exclusive list
+	 * of known flash chips is a bit nasty as older OF didn't provide us
+	 * with a useful "compatible" entry. A solution would be to really
+	 * identify the chip using flash id commands and base ourselves on
+	 * a list of known chips IDs
+	 */
+	if (of_device_is_compatible(dp, "amd-0137")) {
+		core99_erase_bank = amd_erase_bank;
+		core99_write_bank = amd_write_bank;
+	} else {
+		core99_erase_bank = sm_erase_bank;
+		core99_write_bank = sm_write_bank;
+	}
+	return 0;
+}
+
+int __init pmac_nvram_init(void)
+{
+	struct device_node *dp;
+	struct resource r1, r2;
+	unsigned int s1 = 0, s2 = 0;
+	int err = 0;
+
+	nvram_naddrs = 0;
+
+	dp = of_find_node_by_name(NULL, "nvram");
+	if (dp == NULL) {
+		printk(KERN_ERR "Can't find NVRAM device\n");
+		return -ENODEV;
+	}
+
+	/* Try to obtain an address */
+	if (of_address_to_resource(dp, 0, &r1) == 0) {
+		nvram_naddrs = 1;
+		s1 = resource_size(&r1);
+		if (of_address_to_resource(dp, 1, &r2) == 0) {
+			nvram_naddrs = 2;
+			s2 = resource_size(&r2);
+		}
+	}
+
+	is_core_99 = of_device_is_compatible(dp, "nvram,flash");
+	if (is_core_99) {
+		err = core99_nvram_setup(dp, r1.start);
+		goto bail;
+	}
+
+#ifdef CONFIG_PPC32
+	if (machine_is(chrp) && nvram_naddrs == 1) {
+		nvram_data = ioremap(r1.start, s1);
+		nvram_mult = 1;
+		ppc_md.nvram_read_val	= direct_nvram_read_byte;
+		ppc_md.nvram_write_val	= direct_nvram_write_byte;
+		ppc_md.nvram_size	= ppc32_nvram_size;
+	} else if (nvram_naddrs == 1) {
+		nvram_data = ioremap(r1.start, s1);
+		nvram_mult = (s1 + NVRAM_SIZE - 1) / NVRAM_SIZE;
+		ppc_md.nvram_read_val	= direct_nvram_read_byte;
+		ppc_md.nvram_write_val	= direct_nvram_write_byte;
+		ppc_md.nvram_size	= ppc32_nvram_size;
+	} else if (nvram_naddrs == 2) {
+		nvram_addr = ioremap(r1.start, s1);
+		nvram_data = ioremap(r2.start, s2);
+		ppc_md.nvram_read_val	= indirect_nvram_read_byte;
+		ppc_md.nvram_write_val	= indirect_nvram_write_byte;
+		ppc_md.nvram_size	= ppc32_nvram_size;
+	} else if (nvram_naddrs == 0 && sys_ctrler == SYS_CTRLER_PMU) {
+#ifdef CONFIG_ADB_PMU
+		nvram_naddrs = -1;
+		ppc_md.nvram_read_val	= pmu_nvram_read_byte;
+		ppc_md.nvram_write_val	= pmu_nvram_write_byte;
+		ppc_md.nvram_size	= ppc32_nvram_size;
+#endif /* CONFIG_ADB_PMU */
+	} else {
+		printk(KERN_ERR "Incompatible type of NVRAM\n");
+		err = -ENXIO;
+	}
+#endif /* CONFIG_PPC32 */
+bail:
+	of_node_put(dp);
+	if (err == 0)
+		lookup_partitions();
+	return err;
+}
+
+int pmac_get_partition(int partition)
+{
+	return nvram_partitions[partition];
+}
+
+u8 pmac_xpram_read(int xpaddr)
+{
+	int offset = pmac_get_partition(pmac_nvram_XPRAM);
+
+	if (offset < 0 || xpaddr < 0 || xpaddr > 0x100)
+		return 0xff;
+
+	return ppc_md.nvram_read_val(xpaddr + offset);
+}
+
+void pmac_xpram_write(int xpaddr, u8 data)
+{
+	int offset = pmac_get_partition(pmac_nvram_XPRAM);
+
+	if (offset < 0 || xpaddr < 0 || xpaddr > 0x100)
+		return;
+
+	ppc_md.nvram_write_val(xpaddr + offset, data);
+}
+
+EXPORT_SYMBOL(pmac_get_partition);
+EXPORT_SYMBOL(pmac_xpram_read);
+EXPORT_SYMBOL(pmac_xpram_write);
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
new file mode 100644
index 0000000000..d71359b533
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -0,0 +1,1261 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Support for PCI bridges found on Power Macintoshes.
+ *
+ * Copyright (C) 2003-2005 Benjamin Herrenschmuidt (benh@kernel.crashing.org)
+ * Copyright (C) 1997 Paul Mackerras (paulus@samba.org)
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+#include <asm/grackle.h>
+#include <asm/ppc-pci.h>
+
+#include "pmac.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+/* XXX Could be per-controller, but I don't think we risk anything by
+ * assuming we won't have both UniNorth and Bandit */
+static int has_uninorth;
+#ifdef CONFIG_PPC64
+static struct pci_controller *u3_agp;
+#else
+static int has_second_ohare;
+#endif /* CONFIG_PPC64 */
+
+extern int pcibios_assign_bus_offset;
+
+struct device_node *k2_skiplist[2];
+
+/*
+ * Magic constants for enabling cache coherency in the bandit/PSX bridge.
+ */
+#define BANDIT_DEVID_2	8
+#define BANDIT_REVID	3
+
+#define BANDIT_DEVNUM	11
+#define BANDIT_MAGIC	0x50
+#define BANDIT_COHERENT	0x40
+
+static int __init fixup_one_level_bus_range(struct device_node *node, int higher)
+{
+	for (; node; node = node->sibling) {
+		const int * bus_range;
+		const unsigned int *class_code;
+		int len;
+
+		/* For PCI<->PCI bridges or CardBus bridges, we go down */
+		class_code = of_get_property(node, "class-code", NULL);
+		if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
+			(*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS))
+			continue;
+		bus_range = of_get_property(node, "bus-range", &len);
+		if (bus_range != NULL && len > 2 * sizeof(int)) {
+			if (bus_range[1] > higher)
+				higher = bus_range[1];
+		}
+		higher = fixup_one_level_bus_range(node->child, higher);
+	}
+	return higher;
+}
+
+/* This routine fixes the "bus-range" property of all bridges in the
+ * system since they tend to have their "last" member wrong on macs
+ *
+ * Note that the bus numbers manipulated here are OF bus numbers, they
+ * are not Linux bus numbers.
+ */
+static void __init fixup_bus_range(struct device_node *bridge)
+{
+	int *bus_range, len;
+	struct property *prop;
+
+	/* Lookup the "bus-range" property for the hose */
+	prop = of_find_property(bridge, "bus-range", &len);
+	if (prop == NULL || prop->length < 2 * sizeof(int))
+		return;
+
+	bus_range = prop->value;
+	bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]);
+}
+
+/*
+ * Apple MacRISC (U3, UniNorth, Bandit, Chaos) PCI controllers.
+ *
+ * The "Bandit" version is present in all early PCI PowerMacs,
+ * and up to the first ones using Grackle. Some machines may
+ * have 2 bandit controllers (2 PCI busses).
+ *
+ * "Chaos" is used in some "Bandit"-type machines as a bridge
+ * for the separate display bus. It is accessed the same
+ * way as bandit, but cannot be probed for devices. It therefore
+ * has its own config access functions.
+ *
+ * The "UniNorth" version is present in all Core99 machines
+ * (iBook, G4, new IMacs, and all the recent Apple machines).
+ * It contains 3 controllers in one ASIC.
+ *
+ * The U3 is the bridge used on G5 machines. It contains an
+ * AGP bus which is dealt with the old UniNorth access routines
+ * and a HyperTransport bus which uses its own set of access
+ * functions.
+ */
+
+#define MACRISC_CFA0(devfn, off)	\
+	((1 << (unsigned int)PCI_SLOT(dev_fn)) \
+	| (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
+	| (((unsigned int)(off)) & 0xFCUL))
+
+#define MACRISC_CFA1(bus, devfn, off)	\
+	((((unsigned int)(bus)) << 16) \
+	|(((unsigned int)(devfn)) << 8) \
+	|(((unsigned int)(off)) & 0xFCUL) \
+	|1UL)
+
+static void __iomem *macrisc_cfg_map_bus(struct pci_bus *bus,
+					 unsigned int dev_fn,
+					 int offset)
+{
+	unsigned int caddr;
+	struct pci_controller *hose;
+
+	hose = pci_bus_to_host(bus);
+	if (hose == NULL)
+		return NULL;
+
+	if (bus->number == hose->first_busno) {
+		if (dev_fn < (11 << 3))
+			return NULL;
+		caddr = MACRISC_CFA0(dev_fn, offset);
+	} else
+		caddr = MACRISC_CFA1(bus->number, dev_fn, offset);
+
+	/* Uninorth will return garbage if we don't read back the value ! */
+	do {
+		out_le32(hose->cfg_addr, caddr);
+	} while (in_le32(hose->cfg_addr) != caddr);
+
+	offset &= has_uninorth ? 0x07 : 0x03;
+	return hose->cfg_data + offset;
+}
+
+static struct pci_ops macrisc_pci_ops =
+{
+	.map_bus = macrisc_cfg_map_bus,
+	.read = pci_generic_config_read,
+	.write = pci_generic_config_write,
+};
+
+#ifdef CONFIG_PPC32
+/*
+ * Verify that a specific (bus, dev_fn) exists on chaos
+ */
+static void __iomem *chaos_map_bus(struct pci_bus *bus, unsigned int devfn,
+				   int offset)
+{
+	struct device_node *np;
+	const u32 *vendor, *device;
+
+	if (offset >= 0x100)
+		return NULL;
+	np = of_pci_find_child_device(bus->dev.of_node, devfn);
+	if (np == NULL)
+		return NULL;
+
+	vendor = of_get_property(np, "vendor-id", NULL);
+	device = of_get_property(np, "device-id", NULL);
+	if (vendor == NULL || device == NULL)
+		return NULL;
+
+	if ((*vendor == 0x106b) && (*device == 3) && (offset >= 0x10)
+	    && (offset != 0x14) && (offset != 0x18) && (offset <= 0x24))
+		return NULL;
+
+	return macrisc_cfg_map_bus(bus, devfn, offset);
+}
+
+static struct pci_ops chaos_pci_ops =
+{
+	.map_bus = chaos_map_bus,
+	.read = pci_generic_config_read,
+	.write = pci_generic_config_write,
+};
+
+static void __init setup_chaos(struct pci_controller *hose,
+			       struct resource *addr)
+{
+	/* assume a `chaos' bridge */
+	hose->ops = &chaos_pci_ops;
+	hose->cfg_addr = ioremap(addr->start + 0x800000, 0x1000);
+	hose->cfg_data = ioremap(addr->start + 0xc00000, 0x1000);
+}
+#endif /* CONFIG_PPC32 */
+
+#ifdef CONFIG_PPC64
+/*
+ * These versions of U3 HyperTransport config space access ops do not
+ * implement self-view of the HT host yet
+ */
+
+/*
+ * This function deals with some "special cases" devices.
+ *
+ *  0 -> No special case
+ *  1 -> Skip the device but act as if the access was successful
+ *       (return 0xff's on reads, eventually, cache config space
+ *       accesses in a later version)
+ * -1 -> Hide the device (unsuccessful access)
+ */
+static int u3_ht_skip_device(struct pci_controller *hose,
+			     struct pci_bus *bus, unsigned int devfn)
+{
+	struct device_node *busdn, *dn;
+	int i;
+
+	/* We only allow config cycles to devices that are in OF device-tree
+	 * as we are apparently having some weird things going on with some
+	 * revs of K2 on recent G5s, except for the host bridge itself, which
+	 * is missing from the tree but we know we can probe.
+	 */
+	if (bus->self)
+		busdn = pci_device_to_OF_node(bus->self);
+	else if (devfn == 0)
+		return 0;
+	else
+		busdn = hose->dn;
+	for (dn = busdn->child; dn; dn = dn->sibling)
+		if (PCI_DN(dn) && PCI_DN(dn)->devfn == devfn)
+			break;
+	if (dn == NULL)
+		return -1;
+
+	/*
+	 * When a device in K2 is powered down, we die on config
+	 * cycle accesses. Fix that here.
+	 */
+	for (i=0; i<2; i++)
+		if (k2_skiplist[i] == dn)
+			return 1;
+
+	return 0;
+}
+
+#define U3_HT_CFA0(devfn, off)		\
+		((((unsigned int)devfn) << 8) | offset)
+#define U3_HT_CFA1(bus, devfn, off)	\
+		(U3_HT_CFA0(devfn, off) \
+		+ (((unsigned int)bus) << 16) \
+		+ 0x01000000UL)
+
+static void __iomem *u3_ht_cfg_access(struct pci_controller *hose, u8 bus,
+				      u8 devfn, u8 offset, int *swap)
+{
+	*swap = 1;
+	if (bus == hose->first_busno) {
+		if (devfn != 0)
+			return hose->cfg_data + U3_HT_CFA0(devfn, offset);
+		*swap = 0;
+		return ((void __iomem *)hose->cfg_addr) + (offset << 2);
+	} else
+		return hose->cfg_data + U3_HT_CFA1(bus, devfn, offset);
+}
+
+static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn,
+				    int offset, int len, u32 *val)
+{
+	struct pci_controller *hose;
+	void __iomem *addr;
+	int swap;
+
+	hose = pci_bus_to_host(bus);
+	if (hose == NULL)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	if (offset >= 0x100)
+		return  PCIBIOS_BAD_REGISTER_NUMBER;
+	addr = u3_ht_cfg_access(hose, bus->number, devfn, offset, &swap);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	switch (u3_ht_skip_device(hose, bus, devfn)) {
+	case 0:
+		break;
+	case 1:
+		switch (len) {
+		case 1:
+			*val = 0xff; break;
+		case 2:
+			*val = 0xffff; break;
+		default:
+			*val = 0xfffffffful; break;
+		}
+		return PCIBIOS_SUCCESSFUL;
+	default:
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		*val = in_8(addr);
+		break;
+	case 2:
+		*val = swap ? in_le16(addr) : in_be16(addr);
+		break;
+	default:
+		*val = swap ? in_le32(addr) : in_be32(addr);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn,
+				     int offset, int len, u32 val)
+{
+	struct pci_controller *hose;
+	void __iomem *addr;
+	int swap;
+
+	hose = pci_bus_to_host(bus);
+	if (hose == NULL)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	if (offset >= 0x100)
+		return  PCIBIOS_BAD_REGISTER_NUMBER;
+	addr = u3_ht_cfg_access(hose, bus->number, devfn, offset, &swap);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	switch (u3_ht_skip_device(hose, bus, devfn)) {
+	case 0:
+		break;
+	case 1:
+		return PCIBIOS_SUCCESSFUL;
+	default:
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		out_8(addr, val);
+		break;
+	case 2:
+		swap ? out_le16(addr, val) : out_be16(addr, val);
+		break;
+	default:
+		swap ? out_le32(addr, val) : out_be32(addr, val);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops u3_ht_pci_ops =
+{
+	.read = u3_ht_read_config,
+	.write = u3_ht_write_config,
+};
+
+#define U4_PCIE_CFA0(devfn, off)	\
+	((1 << ((unsigned int)PCI_SLOT(dev_fn)))	\
+	 | (((unsigned int)PCI_FUNC(dev_fn)) << 8)	\
+	 | ((((unsigned int)(off)) >> 8) << 28) \
+	 | (((unsigned int)(off)) & 0xfcU))
+
+#define U4_PCIE_CFA1(bus, devfn, off)	\
+	((((unsigned int)(bus)) << 16) \
+	 |(((unsigned int)(devfn)) << 8)	\
+	 | ((((unsigned int)(off)) >> 8) << 28) \
+	 |(((unsigned int)(off)) & 0xfcU)	\
+	 |1UL)
+
+static void __iomem *u4_pcie_cfg_map_bus(struct pci_bus *bus,
+					 unsigned int dev_fn,
+					 int offset)
+{
+	struct pci_controller *hose;
+	unsigned int caddr;
+
+	if (offset >= 0x1000)
+		return NULL;
+
+	hose = pci_bus_to_host(bus);
+	if (!hose)
+		return NULL;
+
+	if (bus->number == hose->first_busno) {
+		caddr = U4_PCIE_CFA0(dev_fn, offset);
+	} else
+		caddr = U4_PCIE_CFA1(bus->number, dev_fn, offset);
+
+	/* Uninorth will return garbage if we don't read back the value ! */
+	do {
+		out_le32(hose->cfg_addr, caddr);
+	} while (in_le32(hose->cfg_addr) != caddr);
+
+	offset &= 0x03;
+	return hose->cfg_data + offset;
+}
+
+static struct pci_ops u4_pcie_pci_ops =
+{
+	.map_bus = u4_pcie_cfg_map_bus,
+	.read = pci_generic_config_read,
+	.write = pci_generic_config_write,
+};
+
+static void pmac_pci_fixup_u4_of_node(struct pci_dev *dev)
+{
+	/* Apple's device-tree "hides" the root complex virtual P2P bridge
+	 * on U4. However, Linux sees it, causing the PCI <-> OF matching
+	 * code to fail to properly match devices below it. This works around
+	 * it by setting the node of the bridge to point to the PHB node,
+	 * which is not entirely correct but fixes the matching code and
+	 * doesn't break anything else. It's also the simplest possible fix.
+	 */
+	if (dev->dev.of_node == NULL)
+		dev->dev.of_node = pcibios_get_phb_of_node(dev->bus);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_APPLE, 0x5b, pmac_pci_fixup_u4_of_node);
+
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_PPC32
+/*
+ * For a bandit bridge, turn on cache coherency if necessary.
+ * N.B. we could clean this up using the hose ops directly.
+ */
+static void __init init_bandit(struct pci_controller *bp)
+{
+	unsigned int vendev, magic;
+	int rev;
+
+	/* read the word at offset 0 in config space for device 11 */
+	out_le32(bp->cfg_addr, (1UL << BANDIT_DEVNUM) + PCI_VENDOR_ID);
+	udelay(2);
+	vendev = in_le32(bp->cfg_data);
+	if (vendev == (PCI_DEVICE_ID_APPLE_BANDIT << 16) +
+			PCI_VENDOR_ID_APPLE) {
+		/* read the revision id */
+		out_le32(bp->cfg_addr,
+			 (1UL << BANDIT_DEVNUM) + PCI_REVISION_ID);
+		udelay(2);
+		rev = in_8(bp->cfg_data);
+		if (rev != BANDIT_REVID)
+			printk(KERN_WARNING
+			       "Unknown revision %d for bandit\n", rev);
+	} else if (vendev != (BANDIT_DEVID_2 << 16) + PCI_VENDOR_ID_APPLE) {
+		printk(KERN_WARNING "bandit isn't? (%x)\n", vendev);
+		return;
+	}
+
+	/* read the word at offset 0x50 */
+	out_le32(bp->cfg_addr, (1UL << BANDIT_DEVNUM) + BANDIT_MAGIC);
+	udelay(2);
+	magic = in_le32(bp->cfg_data);
+	if ((magic & BANDIT_COHERENT) != 0)
+		return;
+	magic |= BANDIT_COHERENT;
+	udelay(2);
+	out_le32(bp->cfg_data, magic);
+	printk(KERN_INFO "Cache coherency enabled for bandit/PSX\n");
+}
+
+/*
+ * Tweak the PCI-PCI bridge chip on the blue & white G3s.
+ */
+static void __init init_p2pbridge(void)
+{
+	struct device_node *p2pbridge;
+	struct pci_controller* hose;
+	u8 bus, devfn;
+	u16 val;
+
+	/* XXX it would be better here to identify the specific
+	   PCI-PCI bridge chip we have. */
+	p2pbridge = of_find_node_by_name(NULL, "pci-bridge");
+	if (p2pbridge == NULL || !of_node_name_eq(p2pbridge->parent, "pci"))
+		goto done;
+	if (pci_device_from_OF_node(p2pbridge, &bus, &devfn) < 0) {
+		DBG("Can't find PCI infos for PCI<->PCI bridge\n");
+		goto done;
+	}
+	/* Warning: At this point, we have not yet renumbered all busses.
+	 * So we must use OF walking to find out hose
+	 */
+	hose = pci_find_hose_for_OF_device(p2pbridge);
+	if (!hose) {
+		DBG("Can't find hose for PCI<->PCI bridge\n");
+		goto done;
+	}
+	if (early_read_config_word(hose, bus, devfn,
+				   PCI_BRIDGE_CONTROL, &val) < 0) {
+		printk(KERN_ERR "init_p2pbridge: couldn't read bridge"
+		       " control\n");
+		goto done;
+	}
+	val &= ~PCI_BRIDGE_CTL_MASTER_ABORT;
+	early_write_config_word(hose, bus, devfn, PCI_BRIDGE_CONTROL, val);
+done:
+	of_node_put(p2pbridge);
+}
+
+static void __init init_second_ohare(void)
+{
+	struct device_node *np = of_find_node_by_name(NULL, "pci106b,7");
+	unsigned char bus, devfn;
+	unsigned short cmd;
+
+	if (np == NULL)
+		return;
+
+	/* This must run before we initialize the PICs since the second
+	 * ohare hosts a PIC that will be accessed there.
+	 */
+	if (pci_device_from_OF_node(np, &bus, &devfn) == 0) {
+		struct pci_controller* hose =
+			pci_find_hose_for_OF_device(np);
+		if (!hose) {
+			printk(KERN_ERR "Can't find PCI hose for OHare2 !\n");
+			of_node_put(np);
+			return;
+		}
+		early_read_config_word(hose, bus, devfn, PCI_COMMAND, &cmd);
+		cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+		cmd &= ~PCI_COMMAND_IO;
+		early_write_config_word(hose, bus, devfn, PCI_COMMAND, cmd);
+	}
+	has_second_ohare = 1;
+	of_node_put(np);
+}
+
+/*
+ * Some Apple desktop machines have a NEC PD720100A USB2 controller
+ * on the motherboard. Open Firmware, on these, will disable the
+ * EHCI part of it so it behaves like a pair of OHCI's. This fixup
+ * code re-enables it ;)
+ */
+static void __init fixup_nec_usb2(void)
+{
+	struct device_node *nec;
+
+	for_each_node_by_name(nec, "usb") {
+		struct pci_controller *hose;
+		u32 data;
+		const u32 *prop;
+		u8 bus, devfn;
+
+		prop = of_get_property(nec, "vendor-id", NULL);
+		if (prop == NULL)
+			continue;
+		if (0x1033 != *prop)
+			continue;
+		prop = of_get_property(nec, "device-id", NULL);
+		if (prop == NULL)
+			continue;
+		if (0x0035 != *prop)
+			continue;
+		prop = of_get_property(nec, "reg", NULL);
+		if (prop == NULL)
+			continue;
+		devfn = (prop[0] >> 8) & 0xff;
+		bus = (prop[0] >> 16) & 0xff;
+		if (PCI_FUNC(devfn) != 0)
+			continue;
+		hose = pci_find_hose_for_OF_device(nec);
+		if (!hose)
+			continue;
+		early_read_config_dword(hose, bus, devfn, 0xe4, &data);
+		if (data & 1UL) {
+			printk("Found NEC PD720100A USB2 chip with disabled"
+			       " EHCI, fixing up...\n");
+			data &= ~1UL;
+			early_write_config_dword(hose, bus, devfn, 0xe4, data);
+		}
+	}
+}
+
+static void __init setup_bandit(struct pci_controller *hose,
+				struct resource *addr)
+{
+	hose->ops = &macrisc_pci_ops;
+	hose->cfg_addr = ioremap(addr->start + 0x800000, 0x1000);
+	hose->cfg_data = ioremap(addr->start + 0xc00000, 0x1000);
+	init_bandit(hose);
+}
+
+static int __init setup_uninorth(struct pci_controller *hose,
+				 struct resource *addr)
+{
+	pci_add_flags(PCI_REASSIGN_ALL_BUS);
+	has_uninorth = 1;
+	hose->ops = &macrisc_pci_ops;
+	hose->cfg_addr = ioremap(addr->start + 0x800000, 0x1000);
+	hose->cfg_data = ioremap(addr->start + 0xc00000, 0x1000);
+	/* We "know" that the bridge at f2000000 has the PCI slots. */
+	return addr->start == 0xf2000000;
+}
+#endif /* CONFIG_PPC32 */
+
+#ifdef CONFIG_PPC64
+static void __init setup_u3_agp(struct pci_controller* hose)
+{
+	/* On G5, we move AGP up to high bus number so we don't need
+	 * to reassign bus numbers for HT. If we ever have P2P bridges
+	 * on AGP, we'll have to move pci_assign_all_busses to the
+	 * pci_controller structure so we enable it for AGP and not for
+	 * HT childs.
+	 * We hard code the address because of the different size of
+	 * the reg address cell, we shall fix that by killing struct
+	 * reg_property and using some accessor functions instead
+	 */
+	hose->first_busno = 0xf0;
+	hose->last_busno = 0xff;
+	has_uninorth = 1;
+	hose->ops = &macrisc_pci_ops;
+	hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000);
+	hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000);
+	u3_agp = hose;
+}
+
+static void __init setup_u4_pcie(struct pci_controller* hose)
+{
+	/* We currently only implement the "non-atomic" config space, to
+	 * be optimised later.
+	 */
+	hose->ops = &u4_pcie_pci_ops;
+	hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000);
+	hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000);
+
+	/* The bus contains a bridge from root -> device, we need to
+	 * make it visible on bus 0 so that we pick the right type
+	 * of config cycles. If we didn't, we would have to force all
+	 * config cycles to be type 1. So we override the "bus-range"
+	 * property here
+	 */
+	hose->first_busno = 0x00;
+	hose->last_busno = 0xff;
+}
+
+static void __init parse_region_decode(struct pci_controller *hose,
+				       u32 decode)
+{
+	unsigned long base, end, next = -1;
+	int i, cur = -1;
+
+	/* Iterate through all bits. We ignore the last bit as this region is
+	 * reserved for the ROM among other niceties
+	 */
+	for (i = 0; i < 31; i++) {
+		if ((decode & (0x80000000 >> i)) == 0)
+			continue;
+		if (i < 16) {
+			base = 0xf0000000 | (((u32)i) << 24);
+			end = base + 0x00ffffff;
+		} else {
+			base = ((u32)i-16) << 28;
+			end = base + 0x0fffffff;
+		}
+		if (base != next) {
+			if (++cur >= 3) {
+				printk(KERN_WARNING "PCI: Too many ranges !\n");
+				break;
+			}
+			hose->mem_resources[cur].flags = IORESOURCE_MEM;
+			hose->mem_resources[cur].name = hose->dn->full_name;
+			hose->mem_resources[cur].start = base;
+			hose->mem_resources[cur].end = end;
+			hose->mem_offset[cur] = 0;
+			DBG("  %d: 0x%08lx-0x%08lx\n", cur, base, end);
+		} else {
+			DBG("   :           -0x%08lx\n", end);
+			hose->mem_resources[cur].end = end;
+		}
+		next = end + 1;
+	}
+}
+
+static void __init setup_u3_ht(struct pci_controller* hose)
+{
+	struct device_node *np = hose->dn;
+	struct resource cfg_res, self_res;
+	u32 decode;
+
+	hose->ops = &u3_ht_pci_ops;
+
+	/* Get base addresses from OF tree
+	 */
+	if (of_address_to_resource(np, 0, &cfg_res) ||
+	    of_address_to_resource(np, 1, &self_res)) {
+		printk(KERN_ERR "PCI: Failed to get U3/U4 HT resources !\n");
+		return;
+	}
+
+	/* Map external cfg space access into cfg_data and self registers
+	 * into cfg_addr
+	 */
+	hose->cfg_data = ioremap(cfg_res.start, 0x02000000);
+	hose->cfg_addr = ioremap(self_res.start, resource_size(&self_res));
+
+	/*
+	 * /ht node doesn't expose a "ranges" property, we read the register
+	 * that controls the decoding logic and use that for memory regions.
+	 * The IO region is hard coded since it is fixed in HW as well.
+	 */
+	hose->io_base_phys = 0xf4000000;
+	hose->pci_io_size = 0x00400000;
+	hose->io_resource.name = np->full_name;
+	hose->io_resource.start = 0;
+	hose->io_resource.end = 0x003fffff;
+	hose->io_resource.flags = IORESOURCE_IO;
+	hose->first_busno = 0;
+	hose->last_busno = 0xef;
+
+	/* Note: fix offset when cfg_addr becomes a void * */
+	decode = in_be32(hose->cfg_addr + 0x80);
+
+	DBG("PCI: Apple HT bridge decode register: 0x%08x\n", decode);
+
+	/* NOTE: The decode register setup is a bit weird... region
+	 * 0xf8000000 for example is marked as enabled in there while it's
+	 & actually the memory controller registers.
+	 * That means that we are incorrectly attributing it to HT.
+	 *
+	 * In a similar vein, region 0xf4000000 is actually the HT IO space but
+	 * also marked as enabled in here and 0xf9000000 is used by some other
+	 * internal bits of the northbridge.
+	 *
+	 * Unfortunately, we can't just mask out those bit as we would end
+	 * up with more regions than we can cope (linux can only cope with
+	 * 3 memory regions for a PHB at this stage).
+	 *
+	 * So for now, we just do a little hack. We happen to -know- that
+	 * Apple firmware doesn't assign things below 0xfa000000 for that
+	 * bridge anyway so we mask out all bits we don't want.
+	 */
+	decode &= 0x003fffff;
+
+	/* Now parse the resulting bits and build resources */
+	parse_region_decode(hose, decode);
+}
+#endif /* CONFIG_PPC64 */
+
+/*
+ * We assume that if we have a G3 powermac, we have one bridge called
+ * "pci" (a MPC106) and no bandit or chaos bridges, and contrariwise,
+ * if we have one or more bandit or chaos bridges, we don't have a MPC106.
+ */
+static int __init pmac_add_bridge(struct device_node *dev)
+{
+	int len;
+	struct pci_controller *hose;
+	struct resource rsrc;
+	char *disp_name;
+	const int *bus_range;
+	int primary = 1;
+
+	DBG("Adding PCI host bridge %pOF\n", dev);
+
+	/* Fetch host bridge registers address */
+	of_address_to_resource(dev, 0, &rsrc);
+
+	/* Get bus range if any */
+	bus_range = of_get_property(dev, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int)) {
+		printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+		       " bus 0\n", dev);
+	}
+
+	hose = pcibios_alloc_controller(dev);
+	if (!hose)
+		return -ENOMEM;
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+	hose->controller_ops = pmac_pci_controller_ops;
+
+	disp_name = NULL;
+
+	/* 64 bits only bridges */
+#ifdef CONFIG_PPC64
+	if (of_device_is_compatible(dev, "u3-agp")) {
+		setup_u3_agp(hose);
+		disp_name = "U3-AGP";
+		primary = 0;
+	} else if (of_device_is_compatible(dev, "u3-ht")) {
+		setup_u3_ht(hose);
+		disp_name = "U3-HT";
+		primary = 1;
+	} else if (of_device_is_compatible(dev, "u4-pcie")) {
+		setup_u4_pcie(hose);
+		disp_name = "U4-PCIE";
+		primary = 0;
+	}
+	printk(KERN_INFO "Found %s PCI host bridge.  Firmware bus number:"
+	       " %d->%d\n", disp_name, hose->first_busno, hose->last_busno);
+#endif /* CONFIG_PPC64 */
+
+	/* 32 bits only bridges */
+#ifdef CONFIG_PPC32
+	if (of_device_is_compatible(dev, "uni-north")) {
+		primary = setup_uninorth(hose, &rsrc);
+		disp_name = "UniNorth";
+	} else if (of_node_name_eq(dev, "pci")) {
+		/* XXX assume this is a mpc106 (grackle) */
+		setup_grackle(hose);
+		disp_name = "Grackle (MPC106)";
+	} else if (of_node_name_eq(dev, "bandit")) {
+		setup_bandit(hose, &rsrc);
+		disp_name = "Bandit";
+	} else if (of_node_name_eq(dev, "chaos")) {
+		setup_chaos(hose, &rsrc);
+		disp_name = "Chaos";
+		primary = 0;
+	}
+	printk(KERN_INFO "Found %s PCI host bridge at 0x%016llx. "
+	       "Firmware bus number: %d->%d\n",
+		disp_name, (unsigned long long)rsrc.start, hose->first_busno,
+		hose->last_busno);
+#endif /* CONFIG_PPC32 */
+
+	DBG(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n",
+		hose, hose->cfg_addr, hose->cfg_data);
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(hose, dev, primary);
+
+	/* Fixup "bus-range" OF property */
+	fixup_bus_range(dev);
+
+	/* create pci_dn's for DT nodes under this PHB */
+	if (IS_ENABLED(CONFIG_PPC64))
+		pci_devs_phb_init_dynamic(hose);
+
+	return 0;
+}
+
+void pmac_pci_irq_fixup(struct pci_dev *dev)
+{
+#ifdef CONFIG_PPC32
+	/* Fixup interrupt for the modem/ethernet combo controller.
+	 * on machines with a second ohare chip.
+	 * The number in the device tree (27) is bogus (correct for
+	 * the ethernet-only board but not the combo ethernet/modem
+	 * board). The real interrupt is 28 on the second controller
+	 * -> 28+32 = 60.
+	 */
+	if (has_second_ohare &&
+	    dev->vendor == PCI_VENDOR_ID_DEC &&
+	    dev->device == PCI_DEVICE_ID_DEC_TULIP_PLUS) {
+		dev->irq = irq_create_mapping(NULL, 60);
+		irq_set_irq_type(dev->irq, IRQ_TYPE_LEVEL_LOW);
+	}
+#endif /* CONFIG_PPC32 */
+}
+
+#ifdef CONFIG_PPC64
+static int pmac_pci_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+	struct pci_controller *hose = pci_bus_to_host(bridge->bus);
+	struct device_node *np, *child;
+
+	if (hose != u3_agp)
+		return 0;
+
+	/* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We
+	 * assume there is no P2P bridge on the AGP bus, which should be a
+	 * safe assumptions for now. We should do something better in the
+	 * future though
+	 */
+	np = hose->dn;
+	PCI_DN(np)->busno = 0xf0;
+	for_each_child_of_node(np, child)
+		PCI_DN(child)->busno = 0xf0;
+
+	return 0;
+}
+#endif /* CONFIG_PPC64 */
+
+void __init pmac_pci_init(void)
+{
+	struct device_node *np, *root;
+	struct device_node *ht __maybe_unused = NULL;
+
+	pci_set_flags(PCI_CAN_SKIP_ISA_ALIGN);
+
+	root = of_find_node_by_path("/");
+	if (root == NULL) {
+		printk(KERN_CRIT "pmac_pci_init: can't find root "
+		       "of device tree\n");
+		return;
+	}
+	for_each_child_of_node(root, np) {
+		if (of_node_name_eq(np, "bandit")
+		    || of_node_name_eq(np, "chaos")
+		    || of_node_name_eq(np, "pci")) {
+			if (pmac_add_bridge(np) == 0)
+				of_node_get(np);
+		}
+		if (of_node_name_eq(np, "ht")) {
+			of_node_get(np);
+			ht = np;
+		}
+	}
+	of_node_put(root);
+
+#ifdef CONFIG_PPC64
+	/* Probe HT last as it relies on the agp resources to be already
+	 * setup
+	 */
+	if (ht && pmac_add_bridge(ht) != 0)
+		of_node_put(ht);
+
+	ppc_md.pcibios_root_bridge_prepare = pmac_pci_root_bridge_prepare;
+	/* pmac_check_ht_link(); */
+
+#else /* CONFIG_PPC64 */
+	init_p2pbridge();
+	init_second_ohare();
+	fixup_nec_usb2();
+
+	/* We are still having some issues with the Xserve G4, enabling
+	 * some offset between bus number and domains for now when we
+	 * assign all busses should help for now
+	 */
+	if (pci_has_flag(PCI_REASSIGN_ALL_BUS))
+		pcibios_assign_bus_offset = 0x10;
+#endif
+}
+
+#ifdef CONFIG_PPC32
+static bool pmac_pci_enable_device_hook(struct pci_dev *dev)
+{
+	struct device_node* node;
+	int updatecfg = 0;
+	int uninorth_child;
+
+	node = pci_device_to_OF_node(dev);
+
+	/* We don't want to enable USB controllers absent from the OF tree
+	 * (iBook second controller)
+	 */
+	if (dev->vendor == PCI_VENDOR_ID_APPLE
+	    && dev->class == PCI_CLASS_SERIAL_USB_OHCI
+	    && !node) {
+		printk(KERN_INFO "Apple USB OHCI %s disabled by firmware\n",
+		       pci_name(dev));
+		return false;
+	}
+
+	if (!node)
+		return true;
+
+	uninorth_child = node->parent &&
+		of_device_is_compatible(node->parent, "uni-north");
+
+	/* Firewire & GMAC were disabled after PCI probe, the driver is
+	 * claiming them, we must re-enable them now.
+	 */
+	if (uninorth_child && of_node_name_eq(node, "firewire") &&
+	    (of_device_is_compatible(node, "pci106b,18") ||
+	     of_device_is_compatible(node, "pci106b,30") ||
+	     of_device_is_compatible(node, "pci11c1,5811"))) {
+		pmac_call_feature(PMAC_FTR_1394_CABLE_POWER, node, 0, 1);
+		pmac_call_feature(PMAC_FTR_1394_ENABLE, node, 0, 1);
+		updatecfg = 1;
+	}
+	if (uninorth_child && of_node_name_eq(node, "ethernet") &&
+	    of_device_is_compatible(node, "gmac")) {
+		pmac_call_feature(PMAC_FTR_GMAC_ENABLE, node, 0, 1);
+		updatecfg = 1;
+	}
+
+	/*
+	 * Fixup various header fields on 32 bits. We don't do that on
+	 * 64 bits as some of these have strange values behind the HT
+	 * bridge and we must not, for example, enable MWI or set the
+	 * cache line size on them.
+	 */
+	if (updatecfg) {
+		u16 cmd;
+
+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+		cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER
+			| PCI_COMMAND_INVALIDATE;
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+		pci_write_config_byte(dev, PCI_LATENCY_TIMER, 16);
+
+		pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE,
+				      L1_CACHE_BYTES >> 2);
+	}
+
+	return true;
+}
+
+static void pmac_pci_fixup_ohci(struct pci_dev *dev)
+{
+	struct device_node *node = pci_device_to_OF_node(dev);
+
+	/* We don't want to assign resources to USB controllers
+	 * absent from the OF tree (iBook second controller)
+	 */
+	if (dev->class == PCI_CLASS_SERIAL_USB_OHCI && !node)
+		dev->resource[0].flags = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_APPLE, PCI_ANY_ID, pmac_pci_fixup_ohci);
+
+/* We power down some devices after they have been probed. They'll
+ * be powered back on later on
+ */
+void __init pmac_pcibios_after_init(void)
+{
+	struct device_node* nd;
+
+	for_each_node_by_name(nd, "firewire") {
+		if (nd->parent && (of_device_is_compatible(nd, "pci106b,18") ||
+				   of_device_is_compatible(nd, "pci106b,30") ||
+				   of_device_is_compatible(nd, "pci11c1,5811"))
+		    && of_device_is_compatible(nd->parent, "uni-north")) {
+			pmac_call_feature(PMAC_FTR_1394_ENABLE, nd, 0, 0);
+			pmac_call_feature(PMAC_FTR_1394_CABLE_POWER, nd, 0, 0);
+		}
+	}
+	for_each_node_by_name(nd, "ethernet") {
+		if (nd->parent && of_device_is_compatible(nd, "gmac")
+		    && of_device_is_compatible(nd->parent, "uni-north"))
+			pmac_call_feature(PMAC_FTR_GMAC_ENABLE, nd, 0, 0);
+	}
+}
+
+static void pmac_pci_fixup_cardbus(struct pci_dev *dev)
+{
+	if (!machine_is(powermac))
+		return;
+	/*
+	 * Fix the interrupt routing on the various cardbus bridges
+	 * used on powerbooks
+	 */
+	if (dev->vendor != PCI_VENDOR_ID_TI)
+		return;
+	if (dev->device == PCI_DEVICE_ID_TI_1130 ||
+	    dev->device == PCI_DEVICE_ID_TI_1131) {
+		u8 val;
+		/* Enable PCI interrupt */
+		if (pci_read_config_byte(dev, 0x91, &val) == 0)
+			pci_write_config_byte(dev, 0x91, val | 0x30);
+		/* Disable ISA interrupt mode */
+		if (pci_read_config_byte(dev, 0x92, &val) == 0)
+			pci_write_config_byte(dev, 0x92, val & ~0x06);
+	}
+	if (dev->device == PCI_DEVICE_ID_TI_1210 ||
+	    dev->device == PCI_DEVICE_ID_TI_1211 ||
+	    dev->device == PCI_DEVICE_ID_TI_1410 ||
+	    dev->device == PCI_DEVICE_ID_TI_1510) {
+		u8 val;
+		/* 0x8c == TI122X_IRQMUX, 2 says to route the INTA
+		   signal out the MFUNC0 pin */
+		if (pci_read_config_byte(dev, 0x8c, &val) == 0)
+			pci_write_config_byte(dev, 0x8c, (val & ~0x0f) | 2);
+		/* Disable ISA interrupt mode */
+		if (pci_read_config_byte(dev, 0x92, &val) == 0)
+			pci_write_config_byte(dev, 0x92, val & ~0x06);
+	}
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_ANY_ID, pmac_pci_fixup_cardbus);
+
+static void pmac_pci_fixup_pciata(struct pci_dev *dev)
+{
+       u8 progif = 0;
+
+       /*
+        * On PowerMacs, we try to switch any PCI ATA controller to
+	* fully native mode
+        */
+	if (!machine_is(powermac))
+		return;
+
+	/* Some controllers don't have the class IDE */
+	if (dev->vendor == PCI_VENDOR_ID_PROMISE)
+		switch(dev->device) {
+		case PCI_DEVICE_ID_PROMISE_20246:
+		case PCI_DEVICE_ID_PROMISE_20262:
+		case PCI_DEVICE_ID_PROMISE_20263:
+		case PCI_DEVICE_ID_PROMISE_20265:
+		case PCI_DEVICE_ID_PROMISE_20267:
+		case PCI_DEVICE_ID_PROMISE_20268:
+		case PCI_DEVICE_ID_PROMISE_20269:
+		case PCI_DEVICE_ID_PROMISE_20270:
+		case PCI_DEVICE_ID_PROMISE_20271:
+		case PCI_DEVICE_ID_PROMISE_20275:
+		case PCI_DEVICE_ID_PROMISE_20276:
+		case PCI_DEVICE_ID_PROMISE_20277:
+			goto good;
+		}
+	/* Others, check PCI class */
+	if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE)
+		return;
+ good:
+	pci_read_config_byte(dev, PCI_CLASS_PROG, &progif);
+	if ((progif & 5) != 5) {
+		printk(KERN_INFO "PCI: %s Forcing PCI IDE into native mode\n",
+		       pci_name(dev));
+		(void) pci_write_config_byte(dev, PCI_CLASS_PROG, progif|5);
+		if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) ||
+		    (progif & 5) != 5)
+			printk(KERN_ERR "Rewrite of PROGIF failed !\n");
+		else {
+			/* Clear IO BARs, they will be reassigned */
+			pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, 0);
+			pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, 0);
+			pci_write_config_dword(dev, PCI_BASE_ADDRESS_2, 0);
+			pci_write_config_dword(dev, PCI_BASE_ADDRESS_3, 0);
+		}
+	}
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pmac_pci_fixup_pciata);
+#endif /* CONFIG_PPC32 */
+
+/*
+ * Disable second function on K2-SATA, it's broken
+ * and disable IO BARs on first one
+ */
+static void fixup_k2_sata(struct pci_dev* dev)
+{
+	int i;
+	u16 cmd;
+
+	if (PCI_FUNC(dev->devfn) > 0) {
+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+		cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+		for (i = 0; i < 6; i++) {
+			dev->resource[i].start = dev->resource[i].end = 0;
+			dev->resource[i].flags = 0;
+			pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i,
+					       0);
+		}
+	} else {
+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+		cmd &= ~PCI_COMMAND_IO;
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+		for (i = 0; i < 5; i++) {
+			dev->resource[i].start = dev->resource[i].end = 0;
+			dev->resource[i].flags = 0;
+			pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i,
+					       0);
+		}
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SERVERWORKS, 0x0240, fixup_k2_sata);
+
+/*
+ * On U4 (aka CPC945) the PCIe root complex "P2P" bridge resource ranges aren't
+ * configured by the firmware. The bridge itself seems to ignore them but it
+ * causes problems with Linux which then re-assigns devices below the bridge,
+ * thus changing addresses of those devices from what was in the device-tree,
+ * which sucks when those are video cards using offb
+ *
+ * We could just mark it transparent but I prefer fixing up the resources to
+ * properly show what's going on here, as I have some doubts about having them
+ * badly configured potentially being an issue for DMA.
+ *
+ * We leave PIO alone, it seems to be fine
+ *
+ * Oh and there's another funny bug. The OF properties advertize the region
+ * 0xf1000000..0xf1ffffff as being forwarded as memory space. But that's
+ * actually not true, this region is the memory mapped config space. So we
+ * also need to filter it out or we'll map things in the wrong place.
+ */
+static void fixup_u4_pcie(struct pci_dev* dev)
+{
+	struct pci_controller *host = pci_bus_to_host(dev->bus);
+	struct resource *region = NULL;
+	u32 reg;
+	int i;
+
+	/* Only do that on PowerMac */
+	if (!machine_is(powermac))
+		return;
+
+	/* Find the largest MMIO region */
+	for (i = 0; i < 3; i++) {
+		struct resource *r = &host->mem_resources[i];
+		if (!(r->flags & IORESOURCE_MEM))
+			continue;
+		/* Skip the 0xf0xxxxxx..f2xxxxxx regions, we know they
+		 * are reserved by HW for other things
+		 */
+		if (r->start >= 0xf0000000 && r->start < 0xf3000000)
+			continue;
+		if (!region || resource_size(r) > resource_size(region))
+			region = r;
+	}
+	/* Nothing found, bail */
+	if (!region)
+		return;
+
+	/* Print things out */
+	printk(KERN_INFO "PCI: Fixup U4 PCIe bridge range: %pR\n", region);
+
+	/* Fixup bridge config space. We know it's a Mac, resource aren't
+	 * offset so let's just blast them as-is. We also know that they
+	 * fit in 32 bits
+	 */
+	reg = ((region->start >> 16) & 0xfff0) | (region->end & 0xfff00000);
+	pci_write_config_dword(dev, PCI_MEMORY_BASE, reg);
+	pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0);
+	pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
+	pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_U4_PCIE, fixup_u4_pcie);
+
+#ifdef CONFIG_PPC64
+static int pmac_pci_probe_mode(struct pci_bus *bus)
+{
+	struct device_node *node = pci_bus_to_OF_node(bus);
+
+	/* We need to use normal PCI probing for the AGP bus,
+	 * since the device for the AGP bridge isn't in the tree.
+	 * Same for the PCIe host on U4 and the HT host bridge.
+	 */
+	if (bus->self == NULL && (of_device_is_compatible(node, "u3-agp") ||
+				  of_device_is_compatible(node, "u4-pcie") ||
+				  of_device_is_compatible(node, "u3-ht")))
+		return PCI_PROBE_NORMAL;
+	return PCI_PROBE_DEVTREE;
+}
+#endif /* CONFIG_PPC64 */
+
+struct pci_controller_ops pmac_pci_controller_ops = {
+#ifdef CONFIG_PPC64
+	.probe_mode		= pmac_pci_probe_mode,
+#endif
+#ifdef CONFIG_PPC32
+	.enable_device_hook	= pmac_pci_enable_device_hook,
+#endif
+};
diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c
new file mode 100644
index 0000000000..085e0ad20e
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pfunc_base.c
@@ -0,0 +1,412 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/of_irq.h>
+
+#include <asm/pmac_feature.h>
+#include <asm/pmac_pfunc.h>
+
+#undef DEBUG
+#ifdef DEBUG
+#define DBG(fmt...)	printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static irqreturn_t macio_gpio_irq(int irq, void *data)
+{
+	pmf_do_irq(data);
+
+	return IRQ_HANDLED;
+}
+
+static int macio_do_gpio_irq_enable(struct pmf_function *func)
+{
+	unsigned int irq = irq_of_parse_and_map(func->node, 0);
+	if (!irq)
+		return -EINVAL;
+	return request_irq(irq, macio_gpio_irq, 0, func->node->name, func);
+}
+
+static int macio_do_gpio_irq_disable(struct pmf_function *func)
+{
+	unsigned int irq = irq_of_parse_and_map(func->node, 0);
+	if (!irq)
+		return -EINVAL;
+	free_irq(irq, func);
+	return 0;
+}
+
+static int macio_do_gpio_write(PMF_STD_ARGS, u8 value, u8 mask)
+{
+	u8 __iomem *addr = (u8 __iomem *)func->driver_data;
+	unsigned long flags;
+	u8 tmp;
+
+	/* Check polarity */
+	if (args && args->count && !args->u[0].v)
+		value = ~value;
+
+	/* Toggle the GPIO */
+	raw_spin_lock_irqsave(&feature_lock, flags);
+	tmp = readb(addr);
+	tmp = (tmp & ~mask) | (value & mask);
+	DBG("Do write 0x%02x to GPIO %pOF (%p)\n",
+	    tmp, func->node, addr);
+	writeb(tmp, addr);
+	raw_spin_unlock_irqrestore(&feature_lock, flags);
+
+	return 0;
+}
+
+static int macio_do_gpio_read(PMF_STD_ARGS, u8 mask, int rshift, u8 xor)
+{
+	u8 __iomem *addr = (u8 __iomem *)func->driver_data;
+	u32 value;
+
+	/* Check if we have room for reply */
+	if (args == NULL || args->count == 0 || args->u[0].p == NULL)
+		return -EINVAL;
+
+	value = readb(addr);
+	*args->u[0].p = ((value & mask) >> rshift) ^ xor;
+
+	return 0;
+}
+
+static int macio_do_delay(PMF_STD_ARGS, u32 duration)
+{
+	/* assume we can sleep ! */
+	msleep((duration + 999) / 1000);
+	return 0;
+}
+
+static struct pmf_handlers macio_gpio_handlers = {
+	.irq_enable	= macio_do_gpio_irq_enable,
+	.irq_disable	= macio_do_gpio_irq_disable,
+	.write_gpio	= macio_do_gpio_write,
+	.read_gpio	= macio_do_gpio_read,
+	.delay		= macio_do_delay,
+};
+
+static void __init macio_gpio_init_one(struct macio_chip *macio)
+{
+	struct device_node *gparent, *gp;
+
+	/*
+	 * Find the "gpio" parent node
+	 */
+
+	for_each_child_of_node(macio->of_node, gparent)
+		if (of_node_name_eq(gparent, "gpio"))
+			break;
+	if (gparent == NULL)
+		return;
+
+	DBG("Installing GPIO functions for macio %pOF\n",
+	    macio->of_node);
+
+	/*
+	 * Ok, got one, we dont need anything special to track them down, so
+	 * we just create them all
+	 */
+	for_each_child_of_node(gparent, gp) {
+		const u32 *reg = of_get_property(gp, "reg", NULL);
+		unsigned long offset;
+		if (reg == NULL)
+			continue;
+		offset = *reg;
+		/* Deal with old style device-tree. We can safely hard code the
+		 * offset for now too even if it's a bit gross ...
+		 */
+		if (offset < 0x50)
+			offset += 0x50;
+		offset += (unsigned long)macio->base;
+		pmf_register_driver(gp, &macio_gpio_handlers, (void *)offset);
+	}
+
+	DBG("Calling initial GPIO functions for macio %pOF\n",
+	    macio->of_node);
+
+	/* And now we run all the init ones */
+	for_each_child_of_node(gparent, gp)
+		pmf_do_functions(gp, NULL, 0, PMF_FLAGS_ON_INIT, NULL);
+
+	of_node_put(gparent);
+
+	/* Note: We do not at this point implement the "at sleep" or "at wake"
+	 * functions. I yet to find any for GPIOs anyway
+	 */
+}
+
+static int macio_do_write_reg32(PMF_STD_ARGS, u32 offset, u32 value, u32 mask)
+{
+	struct macio_chip *macio = func->driver_data;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&feature_lock, flags);
+	MACIO_OUT32(offset, (MACIO_IN32(offset) & ~mask) | (value & mask));
+	raw_spin_unlock_irqrestore(&feature_lock, flags);
+	return 0;
+}
+
+static int macio_do_read_reg32(PMF_STD_ARGS, u32 offset)
+{
+	struct macio_chip *macio = func->driver_data;
+
+	/* Check if we have room for reply */
+	if (args == NULL || args->count == 0 || args->u[0].p == NULL)
+		return -EINVAL;
+
+	*args->u[0].p = MACIO_IN32(offset);
+	return 0;
+}
+
+static int macio_do_write_reg8(PMF_STD_ARGS, u32 offset, u8 value, u8 mask)
+{
+	struct macio_chip *macio = func->driver_data;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&feature_lock, flags);
+	MACIO_OUT8(offset, (MACIO_IN8(offset) & ~mask) | (value & mask));
+	raw_spin_unlock_irqrestore(&feature_lock, flags);
+	return 0;
+}
+
+static int macio_do_read_reg8(PMF_STD_ARGS, u32 offset)
+{
+	struct macio_chip *macio = func->driver_data;
+
+	/* Check if we have room for reply */
+	if (args == NULL || args->count == 0 || args->u[0].p == NULL)
+		return -EINVAL;
+
+	*((u8 *)(args->u[0].p)) = MACIO_IN8(offset);
+	return 0;
+}
+
+static int macio_do_read_reg32_msrx(PMF_STD_ARGS, u32 offset, u32 mask,
+				    u32 shift, u32 xor)
+{
+	struct macio_chip *macio = func->driver_data;
+
+	/* Check if we have room for reply */
+	if (args == NULL || args->count == 0 || args->u[0].p == NULL)
+		return -EINVAL;
+
+	*args->u[0].p = ((MACIO_IN32(offset) & mask) >> shift) ^ xor;
+	return 0;
+}
+
+static int macio_do_read_reg8_msrx(PMF_STD_ARGS, u32 offset, u32 mask,
+				   u32 shift, u32 xor)
+{
+	struct macio_chip *macio = func->driver_data;
+
+	/* Check if we have room for reply */
+	if (args == NULL || args->count == 0 || args->u[0].p == NULL)
+		return -EINVAL;
+
+	*((u8 *)(args->u[0].p)) = ((MACIO_IN8(offset) & mask) >> shift) ^ xor;
+	return 0;
+}
+
+static int macio_do_write_reg32_slm(PMF_STD_ARGS, u32 offset, u32 shift,
+				    u32 mask)
+{
+	struct macio_chip *macio = func->driver_data;
+	unsigned long flags;
+	u32 tmp, val;
+
+	/* Check args */
+	if (args == NULL || args->count == 0)
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&feature_lock, flags);
+	tmp = MACIO_IN32(offset);
+	val = args->u[0].v << shift;
+	tmp = (tmp & ~mask) | (val & mask);
+	MACIO_OUT32(offset, tmp);
+	raw_spin_unlock_irqrestore(&feature_lock, flags);
+	return 0;
+}
+
+static int macio_do_write_reg8_slm(PMF_STD_ARGS, u32 offset, u32 shift,
+				   u32 mask)
+{
+	struct macio_chip *macio = func->driver_data;
+	unsigned long flags;
+	u32 tmp, val;
+
+	/* Check args */
+	if (args == NULL || args->count == 0)
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&feature_lock, flags);
+	tmp = MACIO_IN8(offset);
+	val = args->u[0].v << shift;
+	tmp = (tmp & ~mask) | (val & mask);
+	MACIO_OUT8(offset, tmp);
+	raw_spin_unlock_irqrestore(&feature_lock, flags);
+	return 0;
+}
+
+static struct pmf_handlers macio_mmio_handlers = {
+	.write_reg32		= macio_do_write_reg32,
+	.read_reg32		= macio_do_read_reg32,
+	.write_reg8		= macio_do_write_reg8,
+	.read_reg8		= macio_do_read_reg8,
+	.read_reg32_msrx	= macio_do_read_reg32_msrx,
+	.read_reg8_msrx		= macio_do_read_reg8_msrx,
+	.write_reg32_slm	= macio_do_write_reg32_slm,
+	.write_reg8_slm		= macio_do_write_reg8_slm,
+	.delay			= macio_do_delay,
+};
+
+static void __init macio_mmio_init_one(struct macio_chip *macio)
+{
+	DBG("Installing MMIO functions for macio %pOF\n",
+	    macio->of_node);
+
+	pmf_register_driver(macio->of_node, &macio_mmio_handlers, macio);
+}
+
+static struct device_node *unin_hwclock;
+
+static int unin_do_write_reg32(PMF_STD_ARGS, u32 offset, u32 value, u32 mask)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&feature_lock, flags);
+	/* This is fairly bogus in darwin, but it should work for our needs
+	 * implemeted that way:
+	 */
+	UN_OUT(offset, (UN_IN(offset) & ~mask) | (value & mask));
+	raw_spin_unlock_irqrestore(&feature_lock, flags);
+	return 0;
+}
+
+
+static struct pmf_handlers unin_mmio_handlers = {
+	.write_reg32		= unin_do_write_reg32,
+	.delay			= macio_do_delay,
+};
+
+static void __init uninorth_install_pfunc(void)
+{
+	struct device_node *np;
+
+	DBG("Installing functions for UniN %pOF\n",
+	    uninorth_node);
+
+	/*
+	 * Install handlers for the bridge itself
+	 */
+	pmf_register_driver(uninorth_node, &unin_mmio_handlers, NULL);
+	pmf_do_functions(uninorth_node, NULL, 0, PMF_FLAGS_ON_INIT, NULL);
+
+
+	/*
+	 * Install handlers for the hwclock child if any
+	 */
+	for (np = NULL; (np = of_get_next_child(uninorth_node, np)) != NULL;)
+		if (of_node_name_eq(np, "hw-clock")) {
+			unin_hwclock = np;
+			break;
+		}
+	if (unin_hwclock) {
+		DBG("Installing functions for UniN clock %pOF\n",
+		    unin_hwclock);
+		pmf_register_driver(unin_hwclock, &unin_mmio_handlers, NULL);
+		pmf_do_functions(unin_hwclock, NULL, 0, PMF_FLAGS_ON_INIT,
+				 NULL);
+	}
+}
+
+/* We export this as the SMP code might init us early */
+int __init pmac_pfunc_base_install(void)
+{
+	static int pfbase_inited;
+	int i;
+
+	if (pfbase_inited)
+		return 0;
+	pfbase_inited = 1;
+
+	if (!machine_is(powermac))
+		return 0;
+
+	DBG("Installing base platform functions...\n");
+
+	/*
+	 * Locate mac-io chips and install handlers
+	 */
+	for (i = 0 ; i < MAX_MACIO_CHIPS; i++) {
+		if (macio_chips[i].of_node) {
+			macio_mmio_init_one(&macio_chips[i]);
+			macio_gpio_init_one(&macio_chips[i]);
+		}
+	}
+
+	/*
+	 * Install handlers for northbridge and direct mapped hwclock
+	 * if any. We do not implement the config space access callback
+	 * which is only ever used for functions that we do not call in
+	 * the current driver (enabling/disabling cells in U2, mostly used
+	 * to restore the PCI settings, we do that differently)
+	 */
+	if (uninorth_node && uninorth_base)
+		uninorth_install_pfunc();
+
+	DBG("All base functions installed\n");
+
+	return 0;
+}
+machine_arch_initcall(powermac, pmac_pfunc_base_install);
+
+#ifdef CONFIG_PM
+
+/* Those can be called by pmac_feature. Ultimately, I should use a sysdev
+ * or a device, but for now, that's good enough until I sort out some
+ * ordering issues. Also, we do not bother with GPIOs, as so far I yet have
+ * to see a case where a GPIO function has the on-suspend or on-resume bit
+ */
+void pmac_pfunc_base_suspend(void)
+{
+	int i;
+
+	for (i = 0 ; i < MAX_MACIO_CHIPS; i++) {
+		if (macio_chips[i].of_node)
+			pmf_do_functions(macio_chips[i].of_node, NULL, 0,
+					 PMF_FLAGS_ON_SLEEP, NULL);
+	}
+	if (uninorth_node)
+		pmf_do_functions(uninorth_node, NULL, 0,
+				 PMF_FLAGS_ON_SLEEP, NULL);
+	if (unin_hwclock)
+		pmf_do_functions(unin_hwclock, NULL, 0,
+				 PMF_FLAGS_ON_SLEEP, NULL);
+}
+
+void pmac_pfunc_base_resume(void)
+{
+	int i;
+
+	if (unin_hwclock)
+		pmf_do_functions(unin_hwclock, NULL, 0,
+				 PMF_FLAGS_ON_WAKE, NULL);
+	if (uninorth_node)
+		pmf_do_functions(uninorth_node, NULL, 0,
+				 PMF_FLAGS_ON_WAKE, NULL);
+	for (i = 0 ; i < MAX_MACIO_CHIPS; i++) {
+		if (macio_chips[i].of_node)
+			pmf_do_functions(macio_chips[i].of_node, NULL, 0,
+					 PMF_FLAGS_ON_WAKE, NULL);
+	}
+}
+
+#endif /* CONFIG_PM */
diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c
new file mode 100644
index 0000000000..22741ddfd5
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pfunc_core.c
@@ -0,0 +1,1022 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * FIXME: Properly make this race free with refcounting etc...
+ *
+ * FIXME: LOCKING !!!
+ */
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+
+#include <asm/pmac_pfunc.h>
+
+/* Debug */
+#define LOG_PARSE(fmt...)
+#define LOG_ERROR(fmt...)	printk(fmt)
+#define LOG_BLOB(t,b,c)
+
+#undef DEBUG
+#ifdef DEBUG
+#define DBG(fmt...)		printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/* Command numbers */
+#define PMF_CMD_LIST			0
+#define PMF_CMD_WRITE_GPIO		1
+#define PMF_CMD_READ_GPIO		2
+#define PMF_CMD_WRITE_REG32		3
+#define PMF_CMD_READ_REG32		4
+#define PMF_CMD_WRITE_REG16		5
+#define PMF_CMD_READ_REG16		6
+#define PMF_CMD_WRITE_REG8		7
+#define PMF_CMD_READ_REG8		8
+#define PMF_CMD_DELAY			9
+#define PMF_CMD_WAIT_REG32		10
+#define PMF_CMD_WAIT_REG16		11
+#define PMF_CMD_WAIT_REG8		12
+#define PMF_CMD_READ_I2C		13
+#define PMF_CMD_WRITE_I2C		14
+#define PMF_CMD_RMW_I2C			15
+#define PMF_CMD_GEN_I2C			16
+#define PMF_CMD_SHIFT_BYTES_RIGHT	17
+#define PMF_CMD_SHIFT_BYTES_LEFT	18
+#define PMF_CMD_READ_CFG		19
+#define PMF_CMD_WRITE_CFG		20
+#define PMF_CMD_RMW_CFG			21
+#define PMF_CMD_READ_I2C_SUBADDR	22
+#define PMF_CMD_WRITE_I2C_SUBADDR	23
+#define PMF_CMD_SET_I2C_MODE		24
+#define PMF_CMD_RMW_I2C_SUBADDR		25
+#define PMF_CMD_READ_REG32_MASK_SHR_XOR	26
+#define PMF_CMD_READ_REG16_MASK_SHR_XOR	27
+#define PMF_CMD_READ_REG8_MASK_SHR_XOR	28
+#define PMF_CMD_WRITE_REG32_SHL_MASK	29
+#define PMF_CMD_WRITE_REG16_SHL_MASK	30
+#define PMF_CMD_WRITE_REG8_SHL_MASK	31
+#define PMF_CMD_MASK_AND_COMPARE	32
+#define PMF_CMD_COUNT			33
+
+/* This structure holds the state of the parser while walking through
+ * a function definition
+ */
+struct pmf_cmd {
+	const void		*cmdptr;
+	const void		*cmdend;
+	struct pmf_function	*func;
+	void			*instdata;
+	struct pmf_args		*args;
+	int			error;
+};
+
+#if 0
+/* Debug output */
+static void print_blob(const char *title, const void *blob, int bytes)
+{
+	printk("%s", title);
+	while(bytes--) {
+		printk("%02x ", *((u8 *)blob));
+		blob += 1;
+	}
+	printk("\n");
+}
+#endif
+
+/*
+ * Parser helpers
+ */
+
+static u32 pmf_next32(struct pmf_cmd *cmd)
+{
+	u32 value;
+	if ((cmd->cmdend - cmd->cmdptr) < 4) {
+		cmd->error = 1;
+		return 0;
+	}
+	value = *((u32 *)cmd->cmdptr);
+	cmd->cmdptr += 4;
+	return value;
+}
+
+static const void* pmf_next_blob(struct pmf_cmd *cmd, int count)
+{
+	const void *value;
+	if ((cmd->cmdend - cmd->cmdptr) < count) {
+		cmd->error = 1;
+		return NULL;
+	}
+	value = cmd->cmdptr;
+	cmd->cmdptr += count;
+	return value;
+}
+
+/*
+ * Individual command parsers
+ */
+
+#define PMF_PARSE_CALL(name, cmd, handlers, p...) \
+	do { \
+		if (cmd->error) \
+			return -ENXIO; \
+		if (handlers == NULL) \
+			return 0; \
+		if (handlers->name)				      \
+			return handlers->name(cmd->func, cmd->instdata, \
+					      cmd->args, p);	      \
+		return -1; \
+	} while(0) \
+
+
+static int pmf_parser_write_gpio(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u8 value = (u8)pmf_next32(cmd);
+	u8 mask = (u8)pmf_next32(cmd);
+
+	LOG_PARSE("pmf: write_gpio(value: %02x, mask: %02x)\n", value, mask);
+
+	PMF_PARSE_CALL(write_gpio, cmd, h, value, mask);
+}
+
+static int pmf_parser_read_gpio(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u8 mask = (u8)pmf_next32(cmd);
+	int rshift = (int)pmf_next32(cmd);
+	u8 xor = (u8)pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_gpio(mask: %02x, rshift: %d, xor: %02x)\n",
+		  mask, rshift, xor);
+
+	PMF_PARSE_CALL(read_gpio, cmd, h, mask, rshift, xor);
+}
+
+static int pmf_parser_write_reg32(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 value = pmf_next32(cmd);
+	u32 mask = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: write_reg32(offset: %08x, value: %08x, mask: %08x)\n",
+		  offset, value, mask);
+
+	PMF_PARSE_CALL(write_reg32, cmd, h, offset, value, mask);
+}
+
+static int pmf_parser_read_reg32(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_reg32(offset: %08x)\n", offset);
+
+	PMF_PARSE_CALL(read_reg32, cmd, h, offset);
+}
+
+
+static int pmf_parser_write_reg16(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u16 value = (u16)pmf_next32(cmd);
+	u16 mask = (u16)pmf_next32(cmd);
+
+	LOG_PARSE("pmf: write_reg16(offset: %08x, value: %04x, mask: %04x)\n",
+		  offset, value, mask);
+
+	PMF_PARSE_CALL(write_reg16, cmd, h, offset, value, mask);
+}
+
+static int pmf_parser_read_reg16(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_reg16(offset: %08x)\n", offset);
+
+	PMF_PARSE_CALL(read_reg16, cmd, h, offset);
+}
+
+
+static int pmf_parser_write_reg8(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u8 value = (u16)pmf_next32(cmd);
+	u8 mask = (u16)pmf_next32(cmd);
+
+	LOG_PARSE("pmf: write_reg8(offset: %08x, value: %02x, mask: %02x)\n",
+		  offset, value, mask);
+
+	PMF_PARSE_CALL(write_reg8, cmd, h, offset, value, mask);
+}
+
+static int pmf_parser_read_reg8(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_reg8(offset: %08x)\n", offset);
+
+	PMF_PARSE_CALL(read_reg8, cmd, h, offset);
+}
+
+static int pmf_parser_delay(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 duration = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: delay(duration: %d us)\n", duration);
+
+	PMF_PARSE_CALL(delay, cmd, h, duration);
+}
+
+static int pmf_parser_wait_reg32(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 value = pmf_next32(cmd);
+	u32 mask = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: wait_reg32(offset: %08x, comp_value: %08x,mask: %08x)\n",
+		  offset, value, mask);
+
+	PMF_PARSE_CALL(wait_reg32, cmd, h, offset, value, mask);
+}
+
+static int pmf_parser_wait_reg16(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u16 value = (u16)pmf_next32(cmd);
+	u16 mask = (u16)pmf_next32(cmd);
+
+	LOG_PARSE("pmf: wait_reg16(offset: %08x, comp_value: %04x,mask: %04x)\n",
+		  offset, value, mask);
+
+	PMF_PARSE_CALL(wait_reg16, cmd, h, offset, value, mask);
+}
+
+static int pmf_parser_wait_reg8(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u8 value = (u8)pmf_next32(cmd);
+	u8 mask = (u8)pmf_next32(cmd);
+
+	LOG_PARSE("pmf: wait_reg8(offset: %08x, comp_value: %02x,mask: %02x)\n",
+		  offset, value, mask);
+
+	PMF_PARSE_CALL(wait_reg8, cmd, h, offset, value, mask);
+}
+
+static int pmf_parser_read_i2c(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 bytes = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_i2c(bytes: %ud)\n", bytes);
+
+	PMF_PARSE_CALL(read_i2c, cmd, h, bytes);
+}
+
+static int pmf_parser_write_i2c(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 bytes = pmf_next32(cmd);
+	const void *blob = pmf_next_blob(cmd, bytes);
+
+	LOG_PARSE("pmf: write_i2c(bytes: %ud) ...\n", bytes);
+	LOG_BLOB("pmf:   data: \n", blob, bytes);
+
+	PMF_PARSE_CALL(write_i2c, cmd, h, bytes, blob);
+}
+
+
+static int pmf_parser_rmw_i2c(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 maskbytes = pmf_next32(cmd);
+	u32 valuesbytes = pmf_next32(cmd);
+	u32 totalbytes = pmf_next32(cmd);
+	const void *maskblob = pmf_next_blob(cmd, maskbytes);
+	const void *valuesblob = pmf_next_blob(cmd, valuesbytes);
+
+	LOG_PARSE("pmf: rmw_i2c(maskbytes: %ud, valuebytes: %ud, "
+		  "totalbytes: %d) ...\n",
+		  maskbytes, valuesbytes, totalbytes);
+	LOG_BLOB("pmf:   mask data: \n", maskblob, maskbytes);
+	LOG_BLOB("pmf:   values data: \n", valuesblob, valuesbytes);
+
+	PMF_PARSE_CALL(rmw_i2c, cmd, h, maskbytes, valuesbytes, totalbytes,
+		       maskblob, valuesblob);
+}
+
+static int pmf_parser_read_cfg(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 bytes = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_cfg(offset: %x, bytes: %ud)\n", offset, bytes);
+
+	PMF_PARSE_CALL(read_cfg, cmd, h, offset, bytes);
+}
+
+
+static int pmf_parser_write_cfg(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 bytes = pmf_next32(cmd);
+	const void *blob = pmf_next_blob(cmd, bytes);
+
+	LOG_PARSE("pmf: write_cfg(offset: %x, bytes: %ud)\n", offset, bytes);
+	LOG_BLOB("pmf:   data: \n", blob, bytes);
+
+	PMF_PARSE_CALL(write_cfg, cmd, h, offset, bytes, blob);
+}
+
+static int pmf_parser_rmw_cfg(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 maskbytes = pmf_next32(cmd);
+	u32 valuesbytes = pmf_next32(cmd);
+	u32 totalbytes = pmf_next32(cmd);
+	const void *maskblob = pmf_next_blob(cmd, maskbytes);
+	const void *valuesblob = pmf_next_blob(cmd, valuesbytes);
+
+	LOG_PARSE("pmf: rmw_cfg(maskbytes: %ud, valuebytes: %ud,"
+		  " totalbytes: %d) ...\n",
+		  maskbytes, valuesbytes, totalbytes);
+	LOG_BLOB("pmf:   mask data: \n", maskblob, maskbytes);
+	LOG_BLOB("pmf:   values data: \n", valuesblob, valuesbytes);
+
+	PMF_PARSE_CALL(rmw_cfg, cmd, h, offset, maskbytes, valuesbytes,
+		       totalbytes, maskblob, valuesblob);
+}
+
+
+static int pmf_parser_read_i2c_sub(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u8 subaddr = (u8)pmf_next32(cmd);
+	u32 bytes = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_i2c_sub(subaddr: %x, bytes: %ud)\n",
+		  subaddr, bytes);
+
+	PMF_PARSE_CALL(read_i2c_sub, cmd, h, subaddr, bytes);
+}
+
+static int pmf_parser_write_i2c_sub(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u8 subaddr = (u8)pmf_next32(cmd);
+	u32 bytes = pmf_next32(cmd);
+	const void *blob = pmf_next_blob(cmd, bytes);
+
+	LOG_PARSE("pmf: write_i2c_sub(subaddr: %x, bytes: %ud) ...\n",
+		  subaddr, bytes);
+	LOG_BLOB("pmf:   data: \n", blob, bytes);
+
+	PMF_PARSE_CALL(write_i2c_sub, cmd, h, subaddr, bytes, blob);
+}
+
+static int pmf_parser_set_i2c_mode(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 mode = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: set_i2c_mode(mode: %d)\n", mode);
+
+	PMF_PARSE_CALL(set_i2c_mode, cmd, h, mode);
+}
+
+
+static int pmf_parser_rmw_i2c_sub(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u8 subaddr = (u8)pmf_next32(cmd);
+	u32 maskbytes = pmf_next32(cmd);
+	u32 valuesbytes = pmf_next32(cmd);
+	u32 totalbytes = pmf_next32(cmd);
+	const void *maskblob = pmf_next_blob(cmd, maskbytes);
+	const void *valuesblob = pmf_next_blob(cmd, valuesbytes);
+
+	LOG_PARSE("pmf: rmw_i2c_sub(subaddr: %x, maskbytes: %ud, valuebytes: %ud"
+		  ", totalbytes: %d) ...\n",
+		  subaddr, maskbytes, valuesbytes, totalbytes);
+	LOG_BLOB("pmf:   mask data: \n", maskblob, maskbytes);
+	LOG_BLOB("pmf:   values data: \n", valuesblob, valuesbytes);
+
+	PMF_PARSE_CALL(rmw_i2c_sub, cmd, h, subaddr, maskbytes, valuesbytes,
+		       totalbytes, maskblob, valuesblob);
+}
+
+static int pmf_parser_read_reg32_msrx(struct pmf_cmd *cmd,
+				      struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 mask = pmf_next32(cmd);
+	u32 shift = pmf_next32(cmd);
+	u32 xor = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_reg32_msrx(offset: %x, mask: %x, shift: %x,"
+		  " xor: %x\n", offset, mask, shift, xor);
+
+	PMF_PARSE_CALL(read_reg32_msrx, cmd, h, offset, mask, shift, xor);
+}
+
+static int pmf_parser_read_reg16_msrx(struct pmf_cmd *cmd,
+				      struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 mask = pmf_next32(cmd);
+	u32 shift = pmf_next32(cmd);
+	u32 xor = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_reg16_msrx(offset: %x, mask: %x, shift: %x,"
+		  " xor: %x\n", offset, mask, shift, xor);
+
+	PMF_PARSE_CALL(read_reg16_msrx, cmd, h, offset, mask, shift, xor);
+}
+static int pmf_parser_read_reg8_msrx(struct pmf_cmd *cmd,
+				     struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 mask = pmf_next32(cmd);
+	u32 shift = pmf_next32(cmd);
+	u32 xor = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_reg8_msrx(offset: %x, mask: %x, shift: %x,"
+		  " xor: %x\n", offset, mask, shift, xor);
+
+	PMF_PARSE_CALL(read_reg8_msrx, cmd, h, offset, mask, shift, xor);
+}
+
+static int pmf_parser_write_reg32_slm(struct pmf_cmd *cmd,
+				      struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 shift = pmf_next32(cmd);
+	u32 mask = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: write_reg32_slm(offset: %x, shift: %x, mask: %x\n",
+		  offset, shift, mask);
+
+	PMF_PARSE_CALL(write_reg32_slm, cmd, h, offset, shift, mask);
+}
+
+static int pmf_parser_write_reg16_slm(struct pmf_cmd *cmd,
+				      struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 shift = pmf_next32(cmd);
+	u32 mask = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: write_reg16_slm(offset: %x, shift: %x, mask: %x\n",
+		  offset, shift, mask);
+
+	PMF_PARSE_CALL(write_reg16_slm, cmd, h, offset, shift, mask);
+}
+
+static int pmf_parser_write_reg8_slm(struct pmf_cmd *cmd,
+				     struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 shift = pmf_next32(cmd);
+	u32 mask = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: write_reg8_slm(offset: %x, shift: %x, mask: %x\n",
+		  offset, shift, mask);
+
+	PMF_PARSE_CALL(write_reg8_slm, cmd, h, offset, shift, mask);
+}
+
+static int pmf_parser_mask_and_compare(struct pmf_cmd *cmd,
+				       struct pmf_handlers *h)
+{
+	u32 bytes = pmf_next32(cmd);
+	const void *maskblob = pmf_next_blob(cmd, bytes);
+	const void *valuesblob = pmf_next_blob(cmd, bytes);
+
+	LOG_PARSE("pmf: mask_and_compare(length: %ud ...\n", bytes);
+	LOG_BLOB("pmf:   mask data: \n", maskblob, bytes);
+	LOG_BLOB("pmf:   values data: \n", valuesblob, bytes);
+
+	PMF_PARSE_CALL(mask_and_compare, cmd, h,
+		       bytes, maskblob, valuesblob);
+}
+
+
+typedef int (*pmf_cmd_parser_t)(struct pmf_cmd *cmd, struct pmf_handlers *h);
+
+static pmf_cmd_parser_t pmf_parsers[PMF_CMD_COUNT] =
+{
+	NULL,
+	pmf_parser_write_gpio,
+	pmf_parser_read_gpio,
+	pmf_parser_write_reg32,
+	pmf_parser_read_reg32,
+	pmf_parser_write_reg16,
+	pmf_parser_read_reg16,
+	pmf_parser_write_reg8,
+	pmf_parser_read_reg8,
+	pmf_parser_delay,
+	pmf_parser_wait_reg32,
+	pmf_parser_wait_reg16,
+	pmf_parser_wait_reg8,
+	pmf_parser_read_i2c,
+	pmf_parser_write_i2c,
+	pmf_parser_rmw_i2c,
+	NULL, /* Bogus command */
+	NULL, /* Shift bytes right: NYI */
+	NULL, /* Shift bytes left: NYI */
+	pmf_parser_read_cfg,
+	pmf_parser_write_cfg,
+	pmf_parser_rmw_cfg,
+	pmf_parser_read_i2c_sub,
+	pmf_parser_write_i2c_sub,
+	pmf_parser_set_i2c_mode,
+	pmf_parser_rmw_i2c_sub,
+	pmf_parser_read_reg32_msrx,
+	pmf_parser_read_reg16_msrx,
+	pmf_parser_read_reg8_msrx,
+	pmf_parser_write_reg32_slm,
+	pmf_parser_write_reg16_slm,
+	pmf_parser_write_reg8_slm,
+	pmf_parser_mask_and_compare,
+};
+
+struct pmf_device {
+	struct list_head	link;
+	struct device_node	*node;
+	struct pmf_handlers	*handlers;
+	struct list_head	functions;
+	struct kref		ref;
+};
+
+static LIST_HEAD(pmf_devices);
+static DEFINE_SPINLOCK(pmf_lock);
+static DEFINE_MUTEX(pmf_irq_mutex);
+
+static void pmf_release_device(struct kref *kref)
+{
+	struct pmf_device *dev = container_of(kref, struct pmf_device, ref);
+	kfree(dev);
+}
+
+static inline void pmf_put_device(struct pmf_device *dev)
+{
+	kref_put(&dev->ref, pmf_release_device);
+}
+
+static inline struct pmf_device *pmf_get_device(struct pmf_device *dev)
+{
+	kref_get(&dev->ref);
+	return dev;
+}
+
+static inline struct pmf_device *pmf_find_device(struct device_node *np)
+{
+	struct pmf_device *dev;
+
+	list_for_each_entry(dev, &pmf_devices, link) {
+		if (dev->node == np)
+			return pmf_get_device(dev);
+	}
+	return NULL;
+}
+
+static int pmf_parse_one(struct pmf_function *func,
+			 struct pmf_handlers *handlers,
+			 void *instdata, struct pmf_args *args)
+{
+	struct pmf_cmd cmd;
+	u32 ccode;
+	int count, rc;
+
+	cmd.cmdptr		= func->data;
+	cmd.cmdend		= func->data + func->length;
+	cmd.func       		= func;
+	cmd.instdata		= instdata;
+	cmd.args		= args;
+	cmd.error		= 0;
+
+	LOG_PARSE("pmf: func %s, %d bytes, %s...\n",
+		  func->name, func->length,
+		  handlers ? "executing" : "parsing");
+
+	/* One subcommand to parse for now */
+	count = 1;
+
+	while(count-- && cmd.cmdptr < cmd.cmdend) {
+		/* Get opcode */
+		ccode = pmf_next32(&cmd);
+		/* Check if we are hitting a command list, fetch new count */
+		if (ccode == 0) {
+			count = pmf_next32(&cmd) - 1;
+			ccode = pmf_next32(&cmd);
+		}
+		if (cmd.error) {
+			LOG_ERROR("pmf: parse error, not enough data\n");
+			return -ENXIO;
+		}
+		if (ccode >= PMF_CMD_COUNT) {
+			LOG_ERROR("pmf: command code %d unknown !\n", ccode);
+			return -ENXIO;
+		}
+		if (pmf_parsers[ccode] == NULL) {
+			LOG_ERROR("pmf: no parser for command %d !\n", ccode);
+			return -ENXIO;
+		}
+		rc = pmf_parsers[ccode](&cmd, handlers);
+		if (rc != 0) {
+			LOG_ERROR("pmf: parser for command %d returned"
+				  " error %d\n", ccode, rc);
+			return rc;
+		}
+	}
+
+	/* We are doing an initial parse pass, we need to adjust the size */
+	if (handlers == NULL)
+		func->length = cmd.cmdptr - func->data;
+
+	return 0;
+}
+
+static int pmf_add_function_prop(struct pmf_device *dev, void *driverdata,
+				 const char *name, u32 *data,
+				 unsigned int length)
+{
+	int count = 0;
+	struct pmf_function *func = NULL;
+
+	DBG("pmf: Adding functions for platform-do-%s\n", name);
+
+	while (length >= 12) {
+		/* Allocate a structure */
+		func = kzalloc(sizeof(*func), GFP_KERNEL);
+		if (func == NULL)
+			goto bail;
+		kref_init(&func->ref);
+		INIT_LIST_HEAD(&func->irq_clients);
+		func->node = dev->node;
+		func->driver_data = driverdata;
+		func->name = name;
+		func->phandle = data[0];
+		func->flags = data[1];
+		data += 2;
+		length -= 8;
+		func->data = data;
+		func->length = length;
+		func->dev = dev;
+		DBG("pmf: idx %d: flags=%08x, phandle=%08x "
+		    " %d bytes remaining, parsing...\n",
+		    count+1, func->flags, func->phandle, length);
+		if (pmf_parse_one(func, NULL, NULL, NULL)) {
+			kfree(func);
+			goto bail;
+		}
+		length -= func->length;
+		data = (u32 *)(((u8 *)data) + func->length);
+		list_add(&func->link, &dev->functions);
+		pmf_get_device(dev);
+		count++;
+	}
+ bail:
+	DBG("pmf: Added %d functions\n", count);
+
+	return count;
+}
+
+static int pmf_add_functions(struct pmf_device *dev, void *driverdata)
+{
+	struct property *pp;
+#define PP_PREFIX "platform-do-"
+	const int plen = strlen(PP_PREFIX);
+	int count = 0;
+
+	for_each_property_of_node(dev->node, pp) {
+		const char *name;
+		if (strncmp(pp->name, PP_PREFIX, plen) != 0)
+			continue;
+		name = pp->name + plen;
+		if (strlen(name) && pp->length >= 12)
+			count += pmf_add_function_prop(dev, driverdata, name,
+						       pp->value, pp->length);
+	}
+	return count;
+}
+
+
+int pmf_register_driver(struct device_node *np,
+			struct pmf_handlers *handlers,
+			void *driverdata)
+{
+	struct pmf_device *dev;
+	unsigned long flags;
+	int rc = 0;
+
+	if (handlers == NULL)
+		return -EINVAL;
+
+	DBG("pmf: registering driver for node %pOF\n", np);
+
+	spin_lock_irqsave(&pmf_lock, flags);
+	dev = pmf_find_device(np);
+	spin_unlock_irqrestore(&pmf_lock, flags);
+	if (dev != NULL) {
+		DBG("pmf: already there !\n");
+		pmf_put_device(dev);
+		return -EBUSY;
+	}
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (dev == NULL) {
+		DBG("pmf: no memory !\n");
+		return -ENOMEM;
+	}
+	kref_init(&dev->ref);
+	dev->node = of_node_get(np);
+	dev->handlers = handlers;
+	INIT_LIST_HEAD(&dev->functions);
+
+	rc = pmf_add_functions(dev, driverdata);
+	if (rc == 0) {
+		DBG("pmf: no functions, disposing.. \n");
+		of_node_put(np);
+		kfree(dev);
+		return -ENODEV;
+	}
+
+	spin_lock_irqsave(&pmf_lock, flags);
+	list_add(&dev->link, &pmf_devices);
+	spin_unlock_irqrestore(&pmf_lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pmf_register_driver);
+
+struct pmf_function *pmf_get_function(struct pmf_function *func)
+{
+	if (!try_module_get(func->dev->handlers->owner))
+		return NULL;
+	kref_get(&func->ref);
+	return func;
+}
+EXPORT_SYMBOL_GPL(pmf_get_function);
+
+static void pmf_release_function(struct kref *kref)
+{
+	struct pmf_function *func =
+		container_of(kref, struct pmf_function, ref);
+	pmf_put_device(func->dev);
+	kfree(func);
+}
+
+static inline void __pmf_put_function(struct pmf_function *func)
+{
+	kref_put(&func->ref, pmf_release_function);
+}
+
+void pmf_put_function(struct pmf_function *func)
+{
+	if (func == NULL)
+		return;
+	module_put(func->dev->handlers->owner);
+	__pmf_put_function(func);
+}
+EXPORT_SYMBOL_GPL(pmf_put_function);
+
+void pmf_unregister_driver(struct device_node *np)
+{
+	struct pmf_device *dev;
+	unsigned long flags;
+
+	DBG("pmf: unregistering driver for node %pOF\n", np);
+
+	spin_lock_irqsave(&pmf_lock, flags);
+	dev = pmf_find_device(np);
+	if (dev == NULL) {
+		DBG("pmf: not such driver !\n");
+		spin_unlock_irqrestore(&pmf_lock, flags);
+		return;
+	}
+	list_del(&dev->link);
+
+	while(!list_empty(&dev->functions)) {
+		struct pmf_function *func =
+			list_entry(dev->functions.next, typeof(*func), link);
+		list_del(&func->link);
+		__pmf_put_function(func);
+	}
+
+	pmf_put_device(dev);
+	spin_unlock_irqrestore(&pmf_lock, flags);
+}
+EXPORT_SYMBOL_GPL(pmf_unregister_driver);
+
+static struct pmf_function *__pmf_find_function(struct device_node *target,
+					 const char *name, u32 flags)
+{
+	struct device_node *actor = of_node_get(target);
+	struct pmf_device *dev;
+	struct pmf_function *func, *result = NULL;
+	char fname[64];
+	const u32 *prop;
+	u32 ph;
+
+	/*
+	 * Look for a "platform-*" function reference. If we can't find
+	 * one, then we fallback to a direct call attempt
+	 */
+	snprintf(fname, 63, "platform-%s", name);
+	prop = of_get_property(target, fname, NULL);
+	if (prop == NULL)
+		goto find_it;
+	ph = *prop;
+	if (ph == 0)
+		goto find_it;
+
+	/*
+	 * Ok, now try to find the actor. If we can't find it, we fail,
+	 * there is no point in falling back there
+	 */
+	of_node_put(actor);
+	actor = of_find_node_by_phandle(ph);
+	if (actor == NULL)
+		return NULL;
+ find_it:
+	dev = pmf_find_device(actor);
+	if (dev == NULL) {
+		result = NULL;
+		goto out;
+	}
+
+	list_for_each_entry(func, &dev->functions, link) {
+		if (name && strcmp(name, func->name))
+			continue;
+		if (func->phandle && target->phandle != func->phandle)
+			continue;
+		if ((func->flags & flags) == 0)
+			continue;
+		result = func;
+		break;
+	}
+	pmf_put_device(dev);
+out:
+	of_node_put(actor);
+	return result;
+}
+
+
+int pmf_register_irq_client(struct device_node *target,
+			    const char *name,
+			    struct pmf_irq_client *client)
+{
+	struct pmf_function *func;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pmf_lock, flags);
+	func = __pmf_find_function(target, name, PMF_FLAGS_INT_GEN);
+	if (func)
+		func = pmf_get_function(func);
+	spin_unlock_irqrestore(&pmf_lock, flags);
+	if (func == NULL)
+		return -ENODEV;
+
+	/* guard against manipulations of list */
+	mutex_lock(&pmf_irq_mutex);
+	if (list_empty(&func->irq_clients))
+		func->dev->handlers->irq_enable(func);
+
+	/* guard against pmf_do_irq while changing list */
+	spin_lock_irqsave(&pmf_lock, flags);
+	list_add(&client->link, &func->irq_clients);
+	spin_unlock_irqrestore(&pmf_lock, flags);
+
+	client->func = func;
+	mutex_unlock(&pmf_irq_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pmf_register_irq_client);
+
+void pmf_unregister_irq_client(struct pmf_irq_client *client)
+{
+	struct pmf_function *func = client->func;
+	unsigned long flags;
+
+	BUG_ON(func == NULL);
+
+	/* guard against manipulations of list */
+	mutex_lock(&pmf_irq_mutex);
+	client->func = NULL;
+
+	/* guard against pmf_do_irq while changing list */
+	spin_lock_irqsave(&pmf_lock, flags);
+	list_del(&client->link);
+	spin_unlock_irqrestore(&pmf_lock, flags);
+
+	if (list_empty(&func->irq_clients))
+		func->dev->handlers->irq_disable(func);
+	mutex_unlock(&pmf_irq_mutex);
+	pmf_put_function(func);
+}
+EXPORT_SYMBOL_GPL(pmf_unregister_irq_client);
+
+
+void pmf_do_irq(struct pmf_function *func)
+{
+	unsigned long flags;
+	struct pmf_irq_client *client;
+
+	/* For now, using a spinlock over the whole function. Can be made
+	 * to drop the lock using 2 lists if necessary
+	 */
+	spin_lock_irqsave(&pmf_lock, flags);
+	list_for_each_entry(client, &func->irq_clients, link) {
+		if (!try_module_get(client->owner))
+			continue;
+		client->handler(client->data);
+		module_put(client->owner);
+	}
+	spin_unlock_irqrestore(&pmf_lock, flags);
+}
+EXPORT_SYMBOL_GPL(pmf_do_irq);
+
+
+int pmf_call_one(struct pmf_function *func, struct pmf_args *args)
+{
+	struct pmf_device *dev = func->dev;
+	void *instdata = NULL;
+	int rc = 0;
+
+	DBG(" ** pmf_call_one(%pOF/%s) **\n", dev->node, func->name);
+
+	if (dev->handlers->begin)
+		instdata = dev->handlers->begin(func, args);
+	rc = pmf_parse_one(func, dev->handlers, instdata, args);
+	if (dev->handlers->end)
+		dev->handlers->end(func, instdata);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pmf_call_one);
+
+int pmf_do_functions(struct device_node *np, const char *name,
+		     u32 phandle, u32 fflags, struct pmf_args *args)
+{
+	struct pmf_device *dev;
+	struct pmf_function *func, *tmp;
+	unsigned long flags;
+	int rc = -ENODEV;
+
+	spin_lock_irqsave(&pmf_lock, flags);
+
+	dev = pmf_find_device(np);
+	if (dev == NULL) {
+		spin_unlock_irqrestore(&pmf_lock, flags);
+		return -ENODEV;
+	}
+	list_for_each_entry_safe(func, tmp, &dev->functions, link) {
+		if (name && strcmp(name, func->name))
+			continue;
+		if (phandle && func->phandle && phandle != func->phandle)
+			continue;
+		if ((func->flags & fflags) == 0)
+			continue;
+		if (pmf_get_function(func) == NULL)
+			continue;
+		spin_unlock_irqrestore(&pmf_lock, flags);
+		rc = pmf_call_one(func, args);
+		pmf_put_function(func);
+		spin_lock_irqsave(&pmf_lock, flags);
+	}
+	pmf_put_device(dev);
+	spin_unlock_irqrestore(&pmf_lock, flags);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pmf_do_functions);
+
+
+struct pmf_function *pmf_find_function(struct device_node *target,
+				       const char *name)
+{
+	struct pmf_function *func;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pmf_lock, flags);
+	func = __pmf_find_function(target, name, PMF_FLAGS_ON_DEMAND);
+	if (func)
+		func = pmf_get_function(func);
+	spin_unlock_irqrestore(&pmf_lock, flags);
+	return func;
+}
+EXPORT_SYMBOL_GPL(pmf_find_function);
+
+int pmf_call_function(struct device_node *target, const char *name,
+		      struct pmf_args *args)
+{
+	struct pmf_function *func = pmf_find_function(target, name);
+	int rc;
+
+	if (func == NULL)
+		return -ENODEV;
+
+	rc = pmf_call_one(func, args);
+	pmf_put_function(func);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pmf_call_function);
+
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
new file mode 100644
index 0000000000..7135ea1d7d
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -0,0 +1,650 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Support for the interrupt controllers found on Power Macintosh,
+ *  currently Apple's "Grand Central" interrupt controller in all
+ *  it's incarnations. OpenPIC support used on newer machines is
+ *  in a separate file
+ *
+ *  Copyright (C) 1997 Paul Mackerras (paulus@samba.org)
+ *  Copyright (C) 2005 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *                     IBM, Corp.
+ */
+
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/syscore_ops.h>
+#include <linux/adb.h>
+#include <linux/minmax.h>
+#include <linux/pmu.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/pci-bridge.h>
+#include <asm/time.h>
+#include <asm/pmac_feature.h>
+#include <asm/mpic.h>
+#include <asm/xmon.h>
+
+#include "pmac.h"
+
+#ifdef CONFIG_PPC32
+struct pmac_irq_hw {
+        unsigned int    event;
+        unsigned int    enable;
+        unsigned int    ack;
+        unsigned int    level;
+};
+
+/* Workaround flags for 32bit powermac machines */
+unsigned int of_irq_workarounds;
+struct device_node *of_irq_dflt_pic;
+
+/* Default addresses */
+static volatile struct pmac_irq_hw __iomem *pmac_irq_hw[4];
+
+static int max_irqs;
+static int max_real_irqs;
+
+static DEFINE_RAW_SPINLOCK(pmac_pic_lock);
+
+/* The max irq number this driver deals with is 128; see max_irqs */
+static DECLARE_BITMAP(ppc_lost_interrupts, 128);
+static DECLARE_BITMAP(ppc_cached_irq_mask, 128);
+static int pmac_irq_cascade = -1;
+static struct irq_domain *pmac_pic_host;
+
+static void __pmac_retrigger(unsigned int irq_nr)
+{
+	if (irq_nr >= max_real_irqs && pmac_irq_cascade > 0) {
+		__set_bit(irq_nr, ppc_lost_interrupts);
+		irq_nr = pmac_irq_cascade;
+		mb();
+	}
+	if (!__test_and_set_bit(irq_nr, ppc_lost_interrupts)) {
+		atomic_inc(&ppc_n_lost_interrupts);
+		set_dec(1);
+	}
+}
+
+static void pmac_mask_and_ack_irq(struct irq_data *d)
+{
+	unsigned int src = irqd_to_hwirq(d);
+        unsigned long bit = 1UL << (src & 0x1f);
+        int i = src >> 5;
+        unsigned long flags;
+
+	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+        __clear_bit(src, ppc_cached_irq_mask);
+        if (__test_and_clear_bit(src, ppc_lost_interrupts))
+                atomic_dec(&ppc_n_lost_interrupts);
+        out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]);
+        out_le32(&pmac_irq_hw[i]->ack, bit);
+        do {
+                /* make sure ack gets to controller before we enable
+                   interrupts */
+                mb();
+        } while((in_le32(&pmac_irq_hw[i]->enable) & bit)
+                != (ppc_cached_irq_mask[i] & bit));
+	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+}
+
+static void pmac_ack_irq(struct irq_data *d)
+{
+	unsigned int src = irqd_to_hwirq(d);
+        unsigned long bit = 1UL << (src & 0x1f);
+        int i = src >> 5;
+        unsigned long flags;
+
+	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+	if (__test_and_clear_bit(src, ppc_lost_interrupts))
+                atomic_dec(&ppc_n_lost_interrupts);
+        out_le32(&pmac_irq_hw[i]->ack, bit);
+        (void)in_le32(&pmac_irq_hw[i]->ack);
+	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+}
+
+static void __pmac_set_irq_mask(unsigned int irq_nr, int nokicklost)
+{
+        unsigned long bit = 1UL << (irq_nr & 0x1f);
+        int i = irq_nr >> 5;
+
+        if ((unsigned)irq_nr >= max_irqs)
+                return;
+
+        /* enable unmasked interrupts */
+        out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]);
+
+        do {
+                /* make sure mask gets to controller before we
+                   return to user */
+                mb();
+        } while((in_le32(&pmac_irq_hw[i]->enable) & bit)
+                != (ppc_cached_irq_mask[i] & bit));
+
+        /*
+         * Unfortunately, setting the bit in the enable register
+         * when the device interrupt is already on *doesn't* set
+         * the bit in the flag register or request another interrupt.
+         */
+        if (bit & ppc_cached_irq_mask[i] & in_le32(&pmac_irq_hw[i]->level))
+		__pmac_retrigger(irq_nr);
+}
+
+/* When an irq gets requested for the first client, if it's an
+ * edge interrupt, we clear any previous one on the controller
+ */
+static unsigned int pmac_startup_irq(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int src = irqd_to_hwirq(d);
+        unsigned long bit = 1UL << (src & 0x1f);
+        int i = src >> 5;
+
+	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+	if (!irqd_is_level_type(d))
+		out_le32(&pmac_irq_hw[i]->ack, bit);
+        __set_bit(src, ppc_cached_irq_mask);
+        __pmac_set_irq_mask(src, 0);
+	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+
+	return 0;
+}
+
+static void pmac_mask_irq(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int src = irqd_to_hwirq(d);
+
+	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+        __clear_bit(src, ppc_cached_irq_mask);
+        __pmac_set_irq_mask(src, 1);
+	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+}
+
+static void pmac_unmask_irq(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int src = irqd_to_hwirq(d);
+
+	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+	__set_bit(src, ppc_cached_irq_mask);
+        __pmac_set_irq_mask(src, 0);
+	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+}
+
+static int pmac_retrigger(struct irq_data *d)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+	__pmac_retrigger(irqd_to_hwirq(d));
+	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+	return 1;
+}
+
+static struct irq_chip pmac_pic = {
+	.name		= "PMAC-PIC",
+	.irq_startup	= pmac_startup_irq,
+	.irq_mask	= pmac_mask_irq,
+	.irq_ack	= pmac_ack_irq,
+	.irq_mask_ack	= pmac_mask_and_ack_irq,
+	.irq_unmask	= pmac_unmask_irq,
+	.irq_retrigger	= pmac_retrigger,
+};
+
+static irqreturn_t gatwick_action(int cpl, void *dev_id)
+{
+	unsigned long flags;
+	int irq, bits;
+	int rc = IRQ_NONE;
+
+	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+	for (irq = max_irqs; (irq -= 32) >= max_real_irqs; ) {
+		int i = irq >> 5;
+		bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i];
+		bits |= in_le32(&pmac_irq_hw[i]->level);
+		bits &= ppc_cached_irq_mask[i];
+		if (bits == 0)
+			continue;
+		irq += __ilog2(bits);
+		raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+		generic_handle_irq(irq);
+		raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+		rc = IRQ_HANDLED;
+	}
+	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+	return rc;
+}
+
+static unsigned int pmac_pic_get_irq(void)
+{
+	int irq;
+	unsigned long bits = 0;
+	unsigned long flags;
+
+#ifdef CONFIG_PPC_PMAC32_PSURGE
+	/* IPI's are a hack on the powersurge -- Cort */
+	if (smp_processor_id() != 0) {
+		return  psurge_secondary_virq;
+        }
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
+	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+	for (irq = max_real_irqs; (irq -= 32) >= 0; ) {
+		int i = irq >> 5;
+		bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i];
+		bits |= in_le32(&pmac_irq_hw[i]->level);
+		bits &= ppc_cached_irq_mask[i];
+		if (bits == 0)
+			continue;
+		irq += __ilog2(bits);
+		break;
+	}
+	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+	if (unlikely(irq < 0))
+		return 0;
+	return irq_linear_revmap(pmac_pic_host, irq);
+}
+
+static int pmac_pic_host_match(struct irq_domain *h, struct device_node *node,
+			       enum irq_domain_bus_token bus_token)
+{
+	/* We match all, we don't always have a node anyway */
+	return 1;
+}
+
+static int pmac_pic_host_map(struct irq_domain *h, unsigned int virq,
+			     irq_hw_number_t hw)
+{
+	if (hw >= max_irqs)
+		return -EINVAL;
+
+	/* Mark level interrupts, set delayed disable for edge ones and set
+	 * handlers
+	 */
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &pmac_pic, handle_level_irq);
+	return 0;
+}
+
+static const struct irq_domain_ops pmac_pic_host_ops = {
+	.match = pmac_pic_host_match,
+	.map = pmac_pic_host_map,
+	.xlate = irq_domain_xlate_onecell,
+};
+
+static void __init pmac_pic_probe_oldstyle(void)
+{
+        int i;
+        struct device_node *master = NULL;
+	struct device_node *slave = NULL;
+	u8 __iomem *addr;
+	struct resource r;
+
+	/* Set our get_irq function */
+	ppc_md.get_irq = pmac_pic_get_irq;
+
+	/*
+	 * Find the interrupt controller type & node
+	 */
+
+	if ((master = of_find_node_by_name(NULL, "gc")) != NULL) {
+		max_irqs = max_real_irqs = 32;
+	} else if ((master = of_find_node_by_name(NULL, "ohare")) != NULL) {
+		max_irqs = max_real_irqs = 32;
+		/* We might have a second cascaded ohare */
+		slave = of_find_node_by_name(NULL, "pci106b,7");
+		if (slave)
+			max_irqs = 64;
+	} else if ((master = of_find_node_by_name(NULL, "mac-io")) != NULL) {
+		max_irqs = max_real_irqs = 64;
+
+		/* We might have a second cascaded heathrow */
+
+		/* Compensate for of_node_put() in of_find_node_by_name() */
+		of_node_get(master);
+		slave = of_find_node_by_name(master, "mac-io");
+
+		/* Check ordering of master & slave */
+		if (of_device_is_compatible(master, "gatwick")) {
+			BUG_ON(slave == NULL);
+			swap(master, slave);
+		}
+
+		/* We found a slave */
+		if (slave)
+			max_irqs = 128;
+	}
+	BUG_ON(master == NULL);
+
+	/*
+	 * Allocate an irq host
+	 */
+	pmac_pic_host = irq_domain_add_linear(master, max_irqs,
+					      &pmac_pic_host_ops, NULL);
+	BUG_ON(pmac_pic_host == NULL);
+	irq_set_default_host(pmac_pic_host);
+
+	/* Get addresses of first controller if we have a node for it */
+	BUG_ON(of_address_to_resource(master, 0, &r));
+
+	/* Map interrupts of primary controller */
+	addr = (u8 __iomem *) ioremap(r.start, 0x40);
+	i = 0;
+	pmac_irq_hw[i++] = (volatile struct pmac_irq_hw __iomem *)
+		(addr + 0x20);
+	if (max_real_irqs > 32)
+		pmac_irq_hw[i++] = (volatile struct pmac_irq_hw __iomem *)
+			(addr + 0x10);
+	of_node_put(master);
+
+	printk(KERN_INFO "irq: Found primary Apple PIC %pOF for %d irqs\n",
+	       master, max_real_irqs);
+
+	/* Map interrupts of cascaded controller */
+	if (slave && !of_address_to_resource(slave, 0, &r)) {
+		addr = (u8 __iomem *)ioremap(r.start, 0x40);
+		pmac_irq_hw[i++] = (volatile struct pmac_irq_hw __iomem *)
+			(addr + 0x20);
+		if (max_irqs > 64)
+			pmac_irq_hw[i++] =
+				(volatile struct pmac_irq_hw __iomem *)
+				(addr + 0x10);
+		pmac_irq_cascade = irq_of_parse_and_map(slave, 0);
+
+		printk(KERN_INFO "irq: Found slave Apple PIC %pOF for %d irqs"
+		       " cascade: %d\n", slave,
+		       max_irqs - max_real_irqs, pmac_irq_cascade);
+	}
+	of_node_put(slave);
+
+	/* Disable all interrupts in all controllers */
+	for (i = 0; i * 32 < max_irqs; ++i)
+		out_le32(&pmac_irq_hw[i]->enable, 0);
+
+	/* Hookup cascade irq */
+	if (slave && pmac_irq_cascade) {
+		if (request_irq(pmac_irq_cascade, gatwick_action,
+				IRQF_NO_THREAD, "cascade", NULL))
+			pr_err("Failed to register cascade interrupt\n");
+	}
+
+	printk(KERN_INFO "irq: System has %d possible interrupts\n", max_irqs);
+#ifdef CONFIG_XMON
+	i = irq_create_mapping(NULL, 20);
+	if (request_irq(i, xmon_irq, IRQF_NO_THREAD, "NMI - XMON", NULL))
+		pr_err("Failed to register NMI-XMON interrupt\n");
+#endif
+}
+
+int of_irq_parse_oldworld(const struct device_node *device, int index,
+			struct of_phandle_args *out_irq)
+{
+	const u32 *ints = NULL;
+	int intlen;
+
+	/*
+	 * Old machines just have a list of interrupt numbers
+	 * and no interrupt-controller nodes. We also have dodgy
+	 * cases where the APPL,interrupts property is completely
+	 * missing behind pci-pci bridges and we have to get it
+	 * from the parent (the bridge itself, as apple just wired
+	 * everything together on these)
+	 */
+	while (device) {
+		ints = of_get_property(device, "AAPL,interrupts", &intlen);
+		if (ints != NULL)
+			break;
+		device = device->parent;
+		if (!of_node_is_type(device, "pci"))
+			break;
+	}
+	if (ints == NULL)
+		return -EINVAL;
+	intlen /= sizeof(u32);
+
+	if (index >= intlen)
+		return -EINVAL;
+
+	out_irq->np = NULL;
+	out_irq->args[0] = ints[index];
+	out_irq->args_count = 1;
+
+	return 0;
+}
+#endif /* CONFIG_PPC32 */
+
+static void __init pmac_pic_setup_mpic_nmi(struct mpic *mpic)
+{
+#if defined(CONFIG_XMON) && defined(CONFIG_PPC32)
+	struct device_node* pswitch;
+	int nmi_irq;
+
+	pswitch = of_find_node_by_name(NULL, "programmer-switch");
+	if (pswitch) {
+		nmi_irq = irq_of_parse_and_map(pswitch, 0);
+		if (nmi_irq) {
+			mpic_irq_set_priority(nmi_irq, 9);
+			if (request_irq(nmi_irq, xmon_irq, IRQF_NO_THREAD,
+					"NMI - XMON", NULL))
+				pr_err("Failed to register NMI-XMON interrupt\n");
+		}
+		of_node_put(pswitch);
+	}
+#endif	/* defined(CONFIG_XMON) && defined(CONFIG_PPC32) */
+}
+
+static struct mpic * __init pmac_setup_one_mpic(struct device_node *np,
+						int master)
+{
+	const char *name = master ? " MPIC 1   " : " MPIC 2   ";
+	struct mpic *mpic;
+	unsigned int flags = master ? 0 : MPIC_SECONDARY;
+
+	pmac_call_feature(PMAC_FTR_ENABLE_MPIC, np, 0, 0);
+
+	if (of_property_read_bool(np, "big-endian"))
+		flags |= MPIC_BIG_ENDIAN;
+
+	/* Primary Big Endian means HT interrupts. This is quite dodgy
+	 * but works until I find a better way
+	 */
+	if (master && (flags & MPIC_BIG_ENDIAN))
+		flags |= MPIC_U3_HT_IRQS;
+
+	mpic = mpic_alloc(np, 0, flags, 0, 0, name);
+	if (mpic == NULL)
+		return NULL;
+
+	mpic_init(mpic);
+
+	return mpic;
+ }
+
+static int __init pmac_pic_probe_mpic(void)
+{
+	struct mpic *mpic1, *mpic2;
+	struct device_node *np, *master = NULL, *slave = NULL;
+
+	/* We can have up to 2 MPICs cascaded */
+	for_each_node_by_type(np, "open-pic") {
+		if (master == NULL && !of_property_present(np, "interrupts"))
+			master = of_node_get(np);
+		else if (slave == NULL)
+			slave = of_node_get(np);
+		if (master && slave) {
+			of_node_put(np);
+			break;
+		}
+	}
+
+	/* Check for bogus setups */
+	if (master == NULL && slave != NULL) {
+		master = slave;
+		slave = NULL;
+	}
+
+	/* Not found, default to good old pmac pic */
+	if (master == NULL)
+		return -ENODEV;
+
+	/* Set master handler */
+	ppc_md.get_irq = mpic_get_irq;
+
+	/* Setup master */
+	mpic1 = pmac_setup_one_mpic(master, 1);
+	BUG_ON(mpic1 == NULL);
+
+	/* Install NMI if any */
+	pmac_pic_setup_mpic_nmi(mpic1);
+
+	of_node_put(master);
+
+	/* Set up a cascaded controller, if present */
+	if (slave) {
+		mpic2 = pmac_setup_one_mpic(slave, 0);
+		if (mpic2 == NULL)
+			printk(KERN_ERR "Failed to setup slave MPIC\n");
+		of_node_put(slave);
+	}
+
+	return 0;
+}
+
+
+void __init pmac_pic_init(void)
+{
+	/* We configure the OF parsing based on our oldworld vs. newworld
+	 * platform type and whether we were booted by BootX.
+	 */
+#ifdef CONFIG_PPC32
+	if (!pmac_newworld)
+		of_irq_workarounds |= OF_IMAP_OLDWORLD_MAC;
+	if (of_property_read_bool(of_chosen, "linux,bootx"))
+		of_irq_workarounds |= OF_IMAP_NO_PHANDLE;
+
+	/* If we don't have phandles on a newworld, then try to locate a
+	 * default interrupt controller (happens when booting with BootX).
+	 * We do a first match here, hopefully, that only ever happens on
+	 * machines with one controller.
+	 */
+	if (pmac_newworld && (of_irq_workarounds & OF_IMAP_NO_PHANDLE)) {
+		struct device_node *np;
+
+		for_each_node_with_property(np, "interrupt-controller") {
+			/* Skip /chosen/interrupt-controller */
+			if (of_node_name_eq(np, "chosen"))
+				continue;
+			/* It seems like at least one person wants
+			 * to use BootX on a machine with an AppleKiwi
+			 * controller which happens to pretend to be an
+			 * interrupt controller too. */
+			if (of_node_name_eq(np, "AppleKiwi"))
+				continue;
+			/* I think we found one ! */
+			of_irq_dflt_pic = np;
+			break;
+		}
+	}
+#endif /* CONFIG_PPC32 */
+
+	/* We first try to detect Apple's new Core99 chipset, since mac-io
+	 * is quite different on those machines and contains an IBM MPIC2.
+	 */
+	if (pmac_pic_probe_mpic() == 0)
+		return;
+
+#ifdef CONFIG_PPC32
+	pmac_pic_probe_oldstyle();
+#endif
+}
+
+#if defined(CONFIG_PM) && defined(CONFIG_PPC32)
+/*
+ * These procedures are used in implementing sleep on the powerbooks.
+ * sleep_save_intrs() saves the states of all interrupt enables
+ * and disables all interrupts except for the nominated one.
+ * sleep_restore_intrs() restores the states of all interrupt enables.
+ */
+unsigned long sleep_save_mask[2];
+
+/* This used to be passed by the PMU driver but that link got
+ * broken with the new driver model. We use this tweak for now...
+ * We really want to do things differently though...
+ */
+static int pmacpic_find_viaint(void)
+{
+	int viaint = -1;
+
+#ifdef CONFIG_ADB_PMU
+	struct device_node *np;
+
+	if (pmu_get_model() != PMU_OHARE_BASED)
+		goto not_found;
+	np = of_find_node_by_name(NULL, "via-pmu");
+	if (np == NULL)
+		goto not_found;
+	viaint = irq_of_parse_and_map(np, 0);
+	of_node_put(np);
+
+not_found:
+#endif /* CONFIG_ADB_PMU */
+	return viaint;
+}
+
+static int pmacpic_suspend(void)
+{
+	int viaint = pmacpic_find_viaint();
+
+	sleep_save_mask[0] = ppc_cached_irq_mask[0];
+	sleep_save_mask[1] = ppc_cached_irq_mask[1];
+	ppc_cached_irq_mask[0] = 0;
+	ppc_cached_irq_mask[1] = 0;
+	if (viaint > 0)
+		set_bit(viaint, ppc_cached_irq_mask);
+	out_le32(&pmac_irq_hw[0]->enable, ppc_cached_irq_mask[0]);
+	if (max_real_irqs > 32)
+		out_le32(&pmac_irq_hw[1]->enable, ppc_cached_irq_mask[1]);
+	(void)in_le32(&pmac_irq_hw[0]->event);
+	/* make sure mask gets to controller before we return to caller */
+	mb();
+        (void)in_le32(&pmac_irq_hw[0]->enable);
+
+        return 0;
+}
+
+static void pmacpic_resume(void)
+{
+	int i;
+
+	out_le32(&pmac_irq_hw[0]->enable, 0);
+	if (max_real_irqs > 32)
+		out_le32(&pmac_irq_hw[1]->enable, 0);
+	mb();
+	for (i = 0; i < max_real_irqs; ++i)
+		if (test_bit(i, sleep_save_mask))
+			pmac_unmask_irq(irq_get_irq_data(i));
+}
+
+static struct syscore_ops pmacpic_syscore_ops = {
+	.suspend	= pmacpic_suspend,
+	.resume		= pmacpic_resume,
+};
+
+static int __init init_pmacpic_syscore(void)
+{
+	if (pmac_irq_hw[0])
+		register_syscore_ops(&pmacpic_syscore_ops);
+	return 0;
+}
+
+machine_subsys_initcall(powermac, init_pmacpic_syscore);
+
+#endif /* CONFIG_PM && CONFIG_PPC32 */
diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h
new file mode 100644
index 0000000000..1b696f3526
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pmac.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PMAC_H__
+#define __PMAC_H__
+
+#include <linux/pci.h>
+#include <linux/irq.h>
+
+#include <asm/pmac_feature.h>
+
+/*
+ * Declaration for the various functions exported by the
+ * pmac_* files. Mostly for use by pmac_setup
+ */
+
+struct rtc_time;
+
+extern int pmac_newworld;
+
+void g5_phy_disable_cpu1(void);
+
+extern long pmac_time_init(void);
+extern time64_t pmac_get_boot_time(void);
+extern void pmac_get_rtc_time(struct rtc_time *);
+extern int pmac_set_rtc_time(struct rtc_time *);
+extern void pmac_read_rtc_time(void);
+extern void pmac_calibrate_decr(void);
+extern void pmac_pci_irq_fixup(struct pci_dev *);
+extern void pmac_pci_init(void);
+
+extern void pmac_nvram_update(void);
+extern unsigned char pmac_nvram_read_byte(int addr);
+extern void pmac_nvram_write_byte(int addr, unsigned char val);
+extern void pmac_pcibios_after_init(void);
+
+extern void pmac_setup_pci_dma(void);
+extern void pmac_check_ht_link(void);
+
+extern void pmac_setup_smp(void);
+extern int psurge_secondary_virq;
+extern void low_cpu_offline_self(void) __attribute__((noreturn));
+
+extern int pmac_nvram_init(void);
+extern void pmac_pic_init(void);
+
+extern struct pci_controller_ops pmac_pci_controller_ops;
+
+#endif /* __PMAC_H__ */
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
new file mode 100644
index 0000000000..6de1cd5d8a
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -0,0 +1,601 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Powermac setup and early boot code plus other random bits.
+ *
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Adapted for Power Macintosh by Paul Mackerras
+ *    Copyright (C) 1996 Paul Mackerras (paulus@samba.org)
+ *
+ *  Derived from "arch/alpha/kernel/setup.c"
+ *    Copyright (C) 1995 Linus Torvalds
+ *
+ *  Maintained by Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+/*
+ * bootup setup stuff..
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/export.h>
+#include <linux/user.h>
+#include <linux/tty.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/major.h>
+#include <linux/initrd.h>
+#include <linux/vt_kern.h>
+#include <linux/console.h>
+#include <linux/pci.h>
+#include <linux/adb.h>
+#include <linux/cuda.h>
+#include <linux/pmu.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/bitops.h>
+#include <linux/suspend.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+
+#include <asm/reg.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/ohare.h>
+#include <asm/mediabay.h>
+#include <asm/machdep.h>
+#include <asm/dma.h>
+#include <asm/cputable.h>
+#include <asm/btext.h>
+#include <asm/pmac_feature.h>
+#include <asm/time.h>
+#include <asm/mmu_context.h>
+#include <asm/iommu.h>
+#include <asm/smu.h>
+#include <asm/pmc.h>
+#include <asm/udbg.h>
+
+#include "pmac.h"
+
+#undef SHOW_GATWICK_IRQS
+
+static int has_l2cache;
+
+int pmac_newworld;
+
+static int current_root_goodness = -1;
+
+/* sda1 - slightly silly choice */
+#define DEFAULT_ROOT_DEVICE	MKDEV(SCSI_DISK0_MAJOR, 1)
+
+sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN;
+EXPORT_SYMBOL(sys_ctrler);
+
+static void pmac_show_cpuinfo(struct seq_file *m)
+{
+	struct device_node *np;
+	const char *pp;
+	int plen;
+	int mbmodel;
+	unsigned int mbflags;
+	char* mbname;
+
+	mbmodel = pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL,
+				    PMAC_MB_INFO_MODEL, 0);
+	mbflags = pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL,
+				    PMAC_MB_INFO_FLAGS, 0);
+	if (pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL, PMAC_MB_INFO_NAME,
+			      (long) &mbname) != 0)
+		mbname = "Unknown";
+
+	/* find motherboard type */
+	seq_printf(m, "machine\t\t: ");
+	np = of_find_node_by_path("/");
+	if (np != NULL) {
+		pp = of_get_property(np, "model", NULL);
+		if (pp != NULL)
+			seq_printf(m, "%s\n", pp);
+		else
+			seq_printf(m, "PowerMac\n");
+		pp = of_get_property(np, "compatible", &plen);
+		if (pp != NULL) {
+			seq_printf(m, "motherboard\t:");
+			while (plen > 0) {
+				int l = strlen(pp) + 1;
+				seq_printf(m, " %s", pp);
+				plen -= l;
+				pp += l;
+			}
+			seq_printf(m, "\n");
+		}
+		of_node_put(np);
+	} else
+		seq_printf(m, "PowerMac\n");
+
+	/* print parsed model */
+	seq_printf(m, "detected as\t: %d (%s)\n", mbmodel, mbname);
+	seq_printf(m, "pmac flags\t: %08x\n", mbflags);
+
+	/* find l2 cache info */
+	np = of_find_node_by_name(NULL, "l2-cache");
+	if (np == NULL)
+		np = of_find_node_by_type(NULL, "cache");
+	if (np != NULL) {
+		const unsigned int *ic =
+			of_get_property(np, "i-cache-size", NULL);
+		const unsigned int *dc =
+			of_get_property(np, "d-cache-size", NULL);
+		seq_printf(m, "L2 cache\t:");
+		has_l2cache = 1;
+		if (of_property_read_bool(np, "cache-unified") && dc) {
+			seq_printf(m, " %dK unified", *dc / 1024);
+		} else {
+			if (ic)
+				seq_printf(m, " %dK instruction", *ic / 1024);
+			if (dc)
+				seq_printf(m, "%s %dK data",
+					   (ic? " +": ""), *dc / 1024);
+		}
+		pp = of_get_property(np, "ram-type", NULL);
+		if (pp)
+			seq_printf(m, " %s", pp);
+		seq_printf(m, "\n");
+		of_node_put(np);
+	}
+
+	/* Indicate newworld/oldworld */
+	seq_printf(m, "pmac-generation\t: %s\n",
+		   pmac_newworld ? "NewWorld" : "OldWorld");
+}
+
+#ifndef CONFIG_ADB_CUDA
+int __init find_via_cuda(void)
+{
+	struct device_node *dn = of_find_node_by_name(NULL, "via-cuda");
+
+	if (!dn)
+		return 0;
+	of_node_put(dn);
+	printk("WARNING ! Your machine is CUDA-based but your kernel\n");
+	printk("          wasn't compiled with CONFIG_ADB_CUDA option !\n");
+	return 0;
+}
+#endif
+
+#ifndef CONFIG_ADB_PMU
+int __init find_via_pmu(void)
+{
+	struct device_node *dn = of_find_node_by_name(NULL, "via-pmu");
+
+	if (!dn)
+		return 0;
+	of_node_put(dn);
+	printk("WARNING ! Your machine is PMU-based but your kernel\n");
+	printk("          wasn't compiled with CONFIG_ADB_PMU option !\n");
+	return 0;
+}
+#endif
+
+#ifndef CONFIG_PMAC_SMU
+int __init smu_init(void)
+{
+	/* should check and warn if SMU is present */
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_PPC32
+static volatile u32 *sysctrl_regs;
+
+static void __init ohare_init(void)
+{
+	struct device_node *dn;
+
+	/* this area has the CPU identification register
+	   and some registers used by smp boards */
+	sysctrl_regs = (volatile u32 *) ioremap(0xf8000000, 0x1000);
+
+	/*
+	 * Turn on the L2 cache.
+	 * We assume that we have a PSX memory controller iff
+	 * we have an ohare I/O controller.
+	 */
+	dn = of_find_node_by_name(NULL, "ohare");
+	if (dn) {
+		of_node_put(dn);
+		if (((sysctrl_regs[2] >> 24) & 0xf) >= 3) {
+			if (sysctrl_regs[4] & 0x10)
+				sysctrl_regs[4] |= 0x04000020;
+			else
+				sysctrl_regs[4] |= 0x04000000;
+			if(has_l2cache)
+				printk(KERN_INFO "Level 2 cache enabled\n");
+		}
+	}
+}
+
+static void __init l2cr_init(void)
+{
+	/* Checks "l2cr-value" property in the registry */
+	if (cpu_has_feature(CPU_FTR_L2CR)) {
+		struct device_node *np;
+
+		for_each_of_cpu_node(np) {
+			const unsigned int *l2cr =
+				of_get_property(np, "l2cr-value", NULL);
+			if (l2cr) {
+				_set_L2CR(0);
+				_set_L2CR(*l2cr);
+				pr_info("L2CR overridden (0x%x), backside cache is %s\n",
+					*l2cr, ((*l2cr) & 0x80000000) ?
+					"enabled" : "disabled");
+			}
+			of_node_put(np);
+			break;
+		}
+	}
+}
+#endif
+
+static void __init pmac_setup_arch(void)
+{
+	struct device_node *cpu, *ic;
+	const int *fp;
+	unsigned long pvr;
+
+	pvr = PVR_VER(mfspr(SPRN_PVR));
+
+	/* Set loops_per_jiffy to a half-way reasonable value,
+	   for use until calibrate_delay gets called. */
+	loops_per_jiffy = 50000000 / HZ;
+
+	for_each_of_cpu_node(cpu) {
+		fp = of_get_property(cpu, "clock-frequency", NULL);
+		if (fp != NULL) {
+			if (pvr >= 0x30 && pvr < 0x80)
+				/* PPC970 etc. */
+				loops_per_jiffy = *fp / (3 * HZ);
+			else if (pvr == 4 || pvr >= 8)
+				/* 604, G3, G4 etc. */
+				loops_per_jiffy = *fp / HZ;
+			else
+				/* 603, etc. */
+				loops_per_jiffy = *fp / (2 * HZ);
+			of_node_put(cpu);
+			break;
+		}
+	}
+
+	/* See if newworld or oldworld */
+	ic = of_find_node_with_property(NULL, "interrupt-controller");
+	if (ic) {
+		pmac_newworld = 1;
+		of_node_put(ic);
+	}
+
+#ifdef CONFIG_PPC32
+	ohare_init();
+	l2cr_init();
+#endif /* CONFIG_PPC32 */
+
+	find_via_cuda();
+	find_via_pmu();
+	smu_init();
+
+#if IS_ENABLED(CONFIG_NVRAM)
+	pmac_nvram_init();
+#endif
+#ifdef CONFIG_PPC32
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (initrd_start)
+		ROOT_DEV = Root_RAM0;
+	else
+#endif
+		ROOT_DEV = DEFAULT_ROOT_DEVICE;
+#endif
+
+#ifdef CONFIG_ADB
+	if (strstr(boot_command_line, "adb_sync")) {
+		extern int __adb_probe_sync;
+		__adb_probe_sync = 1;
+	}
+#endif /* CONFIG_ADB */
+}
+
+static int initializing = 1;
+
+static int pmac_late_init(void)
+{
+	initializing = 0;
+	return 0;
+}
+machine_late_initcall(powermac, pmac_late_init);
+
+void note_bootable_part(dev_t dev, int part, int goodness);
+/*
+ * This is __ref because we check for "initializing" before
+ * touching any of the __init sensitive things and "initializing"
+ * will be false after __init time. This can't be __init because it
+ * can be called whenever a disk is first accessed.
+ */
+void __ref note_bootable_part(dev_t dev, int part, int goodness)
+{
+	char *p;
+
+	if (!initializing)
+		return;
+	if ((goodness <= current_root_goodness) &&
+	    ROOT_DEV != DEFAULT_ROOT_DEVICE)
+		return;
+	p = strstr(boot_command_line, "root=");
+	if (p != NULL && (p == boot_command_line || p[-1] == ' '))
+		return;
+
+	ROOT_DEV = dev + part;
+	current_root_goodness = goodness;
+}
+
+#ifdef CONFIG_ADB_CUDA
+static void __noreturn cuda_restart(void)
+{
+	struct adb_request req;
+
+	cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_RESET_SYSTEM);
+	for (;;)
+		cuda_poll();
+}
+
+static void __noreturn cuda_shutdown(void)
+{
+	struct adb_request req;
+
+	cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_POWERDOWN);
+	for (;;)
+		cuda_poll();
+}
+
+#else
+#define cuda_restart()
+#define cuda_shutdown()
+#endif
+
+#ifndef CONFIG_ADB_PMU
+#define pmu_restart()
+#define pmu_shutdown()
+#endif
+
+#ifndef CONFIG_PMAC_SMU
+#define smu_restart()
+#define smu_shutdown()
+#endif
+
+static void __noreturn pmac_restart(char *cmd)
+{
+	switch (sys_ctrler) {
+	case SYS_CTRLER_CUDA:
+		cuda_restart();
+		break;
+	case SYS_CTRLER_PMU:
+		pmu_restart();
+		break;
+	case SYS_CTRLER_SMU:
+		smu_restart();
+		break;
+	default: ;
+	}
+	while (1) ;
+}
+
+static void __noreturn pmac_power_off(void)
+{
+	switch (sys_ctrler) {
+	case SYS_CTRLER_CUDA:
+		cuda_shutdown();
+		break;
+	case SYS_CTRLER_PMU:
+		pmu_shutdown();
+		break;
+	case SYS_CTRLER_SMU:
+		smu_shutdown();
+		break;
+	default: ;
+	}
+	while (1) ;
+}
+
+static void __noreturn
+pmac_halt(void)
+{
+	pmac_power_off();
+}
+
+/* 
+ * Early initialization.
+ */
+static void __init pmac_init(void)
+{
+	/* Enable early btext debug if requested */
+	if (strstr(boot_command_line, "btextdbg")) {
+		udbg_adb_init_early();
+		register_early_udbg_console();
+	}
+
+	/* Probe motherboard chipset */
+	pmac_feature_init();
+
+	/* Initialize debug stuff */
+	udbg_scc_init(!!strstr(boot_command_line, "sccdbg"));
+	udbg_adb_init(!!strstr(boot_command_line, "btextdbg"));
+
+#ifdef CONFIG_PPC64
+	iommu_init_early_dart(&pmac_pci_controller_ops);
+#endif
+
+	/* SMP Init has to be done early as we need to patch up
+	 * cpu_possible_mask before interrupt stacks are allocated
+	 * or kaboom...
+	 */
+#ifdef CONFIG_SMP
+	pmac_setup_smp();
+#endif
+}
+
+static int __init pmac_declare_of_platform_devices(void)
+{
+	struct device_node *np;
+
+	np = of_find_node_by_name(NULL, "valkyrie");
+	if (np) {
+		of_platform_device_create(np, "valkyrie", NULL);
+		of_node_put(np);
+	}
+	np = of_find_node_by_name(NULL, "platinum");
+	if (np) {
+		of_platform_device_create(np, "platinum", NULL);
+		of_node_put(np);
+	}
+        np = of_find_node_by_type(NULL, "smu");
+        if (np) {
+		of_platform_device_create(np, "smu", NULL);
+		of_node_put(np);
+	}
+	np = of_find_node_by_type(NULL, "fcu");
+	if (np == NULL) {
+		/* Some machines have strangely broken device-tree */
+		np = of_find_node_by_path("/u3@0,f8000000/i2c@f8001000/fan@15e");
+	}
+	if (np) {
+		of_platform_device_create(np, "temperature", NULL);
+		of_node_put(np);
+	}
+
+	return 0;
+}
+machine_device_initcall(powermac, pmac_declare_of_platform_devices);
+
+#ifdef CONFIG_SERIAL_PMACZILOG_CONSOLE
+/*
+ * This is called very early, as part of console_init() (typically just after
+ * time_init()). This function is respondible for trying to find a good
+ * default console on serial ports. It tries to match the open firmware
+ * default output with one of the available serial console drivers.
+ */
+static int __init check_pmac_serial_console(void)
+{
+	struct device_node *prom_stdout = NULL;
+	int offset = 0;
+	const char *name;
+#ifdef CONFIG_SERIAL_PMACZILOG_TTYS
+	char *devname = "ttyS";
+#else
+	char *devname = "ttyPZ";
+#endif
+
+	pr_debug(" -> check_pmac_serial_console()\n");
+
+	/* The user has requested a console so this is already set up. */
+	if (strstr(boot_command_line, "console=")) {
+		pr_debug(" console was specified !\n");
+		return -EBUSY;
+	}
+
+	if (!of_chosen) {
+		pr_debug(" of_chosen is NULL !\n");
+		return -ENODEV;
+	}
+
+	/* We are getting a weird phandle from OF ... */
+	/* ... So use the full path instead */
+	name = of_get_property(of_chosen, "linux,stdout-path", NULL);
+	if (name == NULL) {
+		pr_debug(" no linux,stdout-path !\n");
+		return -ENODEV;
+	}
+	prom_stdout = of_find_node_by_path(name);
+	if (!prom_stdout) {
+		pr_debug(" can't find stdout package %s !\n", name);
+		return -ENODEV;
+	}
+	pr_debug("stdout is %pOF\n", prom_stdout);
+
+	if (of_node_name_eq(prom_stdout, "ch-a"))
+		offset = 0;
+	else if (of_node_name_eq(prom_stdout, "ch-b"))
+		offset = 1;
+	else
+		goto not_found;
+	of_node_put(prom_stdout);
+
+	pr_debug("Found serial console at %s%d\n", devname, offset);
+
+	return add_preferred_console(devname, offset, NULL);
+
+ not_found:
+	pr_debug("No preferred console found !\n");
+	of_node_put(prom_stdout);
+	return -ENODEV;
+}
+console_initcall(check_pmac_serial_console);
+
+#endif /* CONFIG_SERIAL_PMACZILOG_CONSOLE */
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init pmac_probe(void)
+{
+	if (!of_machine_is_compatible("Power Macintosh") &&
+	    !of_machine_is_compatible("MacRISC"))
+		return 0;
+
+#ifdef CONFIG_PPC32
+	/* isa_io_base gets set in pmac_pci_init */
+	DMA_MODE_READ = 1;
+	DMA_MODE_WRITE = 2;
+#endif /* CONFIG_PPC32 */
+
+	pm_power_off = pmac_power_off;
+
+	pmac_init();
+
+	return 1;
+}
+
+define_machine(powermac) {
+	.name			= "PowerMac",
+	.probe			= pmac_probe,
+	.setup_arch		= pmac_setup_arch,
+	.discover_phbs		= pmac_pci_init,
+	.show_cpuinfo		= pmac_show_cpuinfo,
+	.init_IRQ		= pmac_pic_init,
+	.get_irq		= NULL,	/* changed later */
+	.pci_irq_fixup		= pmac_pci_irq_fixup,
+	.restart		= pmac_restart,
+	.halt			= pmac_halt,
+	.time_init		= pmac_time_init,
+	.get_boot_time		= pmac_get_boot_time,
+	.set_rtc_time		= pmac_set_rtc_time,
+	.get_rtc_time		= pmac_get_rtc_time,
+	.calibrate_decr		= pmac_calibrate_decr,
+	.feature_call		= pmac_do_feature_call,
+	.progress		= udbg_progress,
+#ifdef CONFIG_PPC64
+	.power_save		= power4_idle,
+	.enable_pmcs		= power4_enable_pmcs,
+#endif /* CONFIG_PPC64 */
+#ifdef CONFIG_PPC32
+	.pcibios_after_init	= pmac_pcibios_after_init,
+	.phys_mem_access_prot	= pci_phys_mem_access_prot,
+#endif
+};
diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S
new file mode 100644
index 0000000000..d497a60003
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/sleep.S
@@ -0,0 +1,433 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains sleep low-level functions for PowerBook G3.
+ *    Copyright (C) 1999 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *    and Paul Mackerras (paulus@samba.org).
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/cputable.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/mmu.h>
+#include <asm/feature-fixups.h>
+
+#define MAGIC	0x4c617273	/* 'Lars' */
+
+/*
+ * Structure for storing CPU registers on the stack.
+ */
+#define SL_SP		0
+#define SL_PC		4
+#define SL_MSR		8
+#define SL_SDR1		0xc
+#define SL_SPRG0	0x10	/* 4 sprg's */
+#define SL_DBAT0	0x20
+#define SL_IBAT0	0x28
+#define SL_DBAT1	0x30
+#define SL_IBAT1	0x38
+#define SL_DBAT2	0x40
+#define SL_IBAT2	0x48
+#define SL_DBAT3	0x50
+#define SL_IBAT3	0x58
+#define SL_DBAT4	0x60
+#define SL_IBAT4	0x68
+#define SL_DBAT5	0x70
+#define SL_IBAT5	0x78
+#define SL_DBAT6	0x80
+#define SL_IBAT6	0x88
+#define SL_DBAT7	0x90
+#define SL_IBAT7	0x98
+#define SL_TB		0xa0
+#define SL_R2		0xa8
+#define SL_CR		0xac
+#define SL_LR		0xb0
+#define SL_R12		0xb4	/* r12 to r31 */
+#define SL_SIZE		(SL_R12 + 80)
+
+	.section .text
+	.align	5
+
+#if defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ_PMAC) || \
+    (defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PPC32))
+
+/* This gets called by via-pmu.c late during the sleep process.
+ * The PMU was already send the sleep command and will shut us down
+ * soon. We need to save all that is needed and setup the wakeup
+ * vector that will be called by the ROM on wakeup
+ */
+_GLOBAL(low_sleep_handler)
+#ifndef CONFIG_PPC_BOOK3S_32
+	blr
+#else
+	mflr	r0
+	lis	r11,sleep_storage@ha
+	addi	r11,r11,sleep_storage@l
+	stw	r0,SL_LR(r11)
+	mfcr	r0
+	stw	r0,SL_CR(r11)
+	stw	r1,SL_SP(r11)
+	stw	r2,SL_R2(r11)
+	stmw	r12,SL_R12(r11)
+
+	/* Save MSR & SDR1 */
+	mfmsr	r4
+	stw	r4,SL_MSR(r11)
+	mfsdr1	r4
+	stw	r4,SL_SDR1(r11)
+
+	/* Get a stable timebase and save it */
+1:	mftbu	r4
+	stw	r4,SL_TB(r11)
+	mftb	r5
+	stw	r5,SL_TB+4(r11)
+	mftbu	r3
+	cmpw	r3,r4
+	bne	1b
+
+	/* Save SPRGs */
+	mfsprg	r4,0
+	stw	r4,SL_SPRG0(r11)
+	mfsprg	r4,1
+	stw	r4,SL_SPRG0+4(r11)
+	mfsprg	r4,2
+	stw	r4,SL_SPRG0+8(r11)
+	mfsprg	r4,3
+	stw	r4,SL_SPRG0+12(r11)
+
+	/* Save BATs */
+	mfdbatu	r4,0
+	stw	r4,SL_DBAT0(r11)
+	mfdbatl	r4,0
+	stw	r4,SL_DBAT0+4(r11)
+	mfdbatu	r4,1
+	stw	r4,SL_DBAT1(r11)
+	mfdbatl	r4,1
+	stw	r4,SL_DBAT1+4(r11)
+	mfdbatu	r4,2
+	stw	r4,SL_DBAT2(r11)
+	mfdbatl	r4,2
+	stw	r4,SL_DBAT2+4(r11)
+	mfdbatu	r4,3
+	stw	r4,SL_DBAT3(r11)
+	mfdbatl	r4,3
+	stw	r4,SL_DBAT3+4(r11)
+	mfibatu	r4,0
+	stw	r4,SL_IBAT0(r11)
+	mfibatl	r4,0
+	stw	r4,SL_IBAT0+4(r11)
+	mfibatu	r4,1
+	stw	r4,SL_IBAT1(r11)
+	mfibatl	r4,1
+	stw	r4,SL_IBAT1+4(r11)
+	mfibatu	r4,2
+	stw	r4,SL_IBAT2(r11)
+	mfibatl	r4,2
+	stw	r4,SL_IBAT2+4(r11)
+	mfibatu	r4,3
+	stw	r4,SL_IBAT3(r11)
+	mfibatl	r4,3
+	stw	r4,SL_IBAT3+4(r11)
+
+BEGIN_MMU_FTR_SECTION
+	mfspr	r4,SPRN_DBAT4U
+	stw	r4,SL_DBAT4(r11)
+	mfspr	r4,SPRN_DBAT4L
+	stw	r4,SL_DBAT4+4(r11)
+	mfspr	r4,SPRN_DBAT5U
+	stw	r4,SL_DBAT5(r11)
+	mfspr	r4,SPRN_DBAT5L
+	stw	r4,SL_DBAT5+4(r11)
+	mfspr	r4,SPRN_DBAT6U
+	stw	r4,SL_DBAT6(r11)
+	mfspr	r4,SPRN_DBAT6L
+	stw	r4,SL_DBAT6+4(r11)
+	mfspr	r4,SPRN_DBAT7U
+	stw	r4,SL_DBAT7(r11)
+	mfspr	r4,SPRN_DBAT7L
+	stw	r4,SL_DBAT7+4(r11)
+	mfspr	r4,SPRN_IBAT4U
+	stw	r4,SL_IBAT4(r11)
+	mfspr	r4,SPRN_IBAT4L
+	stw	r4,SL_IBAT4+4(r11)
+	mfspr	r4,SPRN_IBAT5U
+	stw	r4,SL_IBAT5(r11)
+	mfspr	r4,SPRN_IBAT5L
+	stw	r4,SL_IBAT5+4(r11)
+	mfspr	r4,SPRN_IBAT6U
+	stw	r4,SL_IBAT6(r11)
+	mfspr	r4,SPRN_IBAT6L
+	stw	r4,SL_IBAT6+4(r11)
+	mfspr	r4,SPRN_IBAT7U
+	stw	r4,SL_IBAT7(r11)
+	mfspr	r4,SPRN_IBAT7L
+	stw	r4,SL_IBAT7+4(r11)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+
+	/* Backup various CPU config stuffs */
+	bl	__save_cpu_setup
+
+	/* The ROM can wake us up via 2 different vectors:
+	 *  - On wallstreet & lombard, we must write a magic
+	 *    value 'Lars' at address 4 and a pointer to a
+	 *    memory location containing the PC to resume from
+	 *    at address 0.
+	 *  - On Core99, we must store the wakeup vector at
+	 *    address 0x80 and eventually it's parameters
+	 *    at address 0x84. I've have some trouble with those
+	 *    parameters however and I no longer use them.
+	 */
+	lis	r5,grackle_wake_up@ha
+	addi	r5,r5,grackle_wake_up@l
+	tophys(r5,r5)
+	stw	r5,SL_PC(r11)
+	lis	r4,KERNELBASE@h
+	tophys(r5,r11)
+	addi	r5,r5,SL_PC
+	lis	r6,MAGIC@ha
+	addi	r6,r6,MAGIC@l
+	stw	r5,0(r4)
+	stw	r6,4(r4)
+	/* Setup stuffs at 0x80-0x84 for Core99 */
+	lis	r3,core99_wake_up@ha
+	addi	r3,r3,core99_wake_up@l
+	tophys(r3,r3)
+	stw	r3,0x80(r4)
+	stw	r5,0x84(r4)
+
+	.globl	low_cpu_offline_self
+low_cpu_offline_self:
+	/* Flush & disable all caches */
+	bl	flush_disable_caches
+
+	/* Turn off data relocation. */
+	mfmsr	r3		/* Save MSR in r7 */
+	rlwinm	r3,r3,0,28,26	/* Turn off DR bit */
+	sync
+	mtmsr	r3
+	isync
+
+BEGIN_FTR_SECTION
+	/* Flush any pending L2 data prefetches to work around HW bug */
+	sync
+	lis	r3,0xfff0
+	lwz	r0,0(r3)	/* perform cache-inhibited load to ROM */
+	sync			/* (caches are disabled at this point) */
+END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
+
+/*
+ * Set the HID0 and MSR for sleep.
+ */
+	mfspr	r2,SPRN_HID0
+	rlwinm	r2,r2,0,10,7	/* clear doze, nap */
+	oris	r2,r2,HID0_SLEEP@h
+	sync
+	isync
+	mtspr	SPRN_HID0,r2
+	sync
+
+/* This loop puts us back to sleep in case we have a spurrious
+ * wakeup so that the host bridge properly stays asleep. The
+ * CPU will be turned off, either after a known time (about 1
+ * second) on wallstreet & lombard, or as soon as the CPU enters
+ * SLEEP mode on core99
+ */
+	mfmsr	r2
+	oris	r2,r2,MSR_POW@h
+1:	sync
+	mtmsr	r2
+	isync
+	b	1b
+_ASM_NOKPROBE_SYMBOL(low_cpu_offline_self)
+/*
+ * Here is the resume code.
+ */
+
+
+/*
+ * Core99 machines resume here
+ * r4 has the physical address of SL_PC(sp) (unused)
+ */
+_GLOBAL(core99_wake_up)
+	/* Make sure HID0 no longer contains any sleep bit and that data cache
+	 * is disabled
+	 */
+	mfspr	r3,SPRN_HID0
+	rlwinm	r3,r3,0,11,7		/* clear SLEEP, NAP, DOZE bits */
+	rlwinm	3,r3,0,18,15		/* clear DCE, ICE */
+	mtspr	SPRN_HID0,r3
+	sync
+	isync
+
+	/* sanitize MSR */
+	mfmsr	r3
+	ori	r3,r3,MSR_EE|MSR_IP
+	xori	r3,r3,MSR_EE|MSR_IP
+	sync
+	isync
+	mtmsr	r3
+	sync
+	isync
+
+	/* Recover sleep storage */
+	lis	r3,sleep_storage@ha
+	addi	r3,r3,sleep_storage@l
+	tophys(r3,r3)
+	addi	r1,r3,SL_PC
+
+	/* Pass thru to older resume code ... */
+_ASM_NOKPROBE_SYMBOL(core99_wake_up)
+/*
+ * Here is the resume code for older machines.
+ * r1 has the physical address of SL_PC(sp).
+ */
+
+grackle_wake_up:
+
+	/* Restore the kernel's segment registers before
+	 * we do any r1 memory access as we are not sure they
+	 * are in a sane state above the first 256Mb region
+	 */
+	bl	load_segment_registers
+	sync
+	isync
+
+	subi	r1,r1,SL_PC
+
+	/* Restore various CPU config stuffs */
+	bl	__restore_cpu_setup
+
+	/* Make sure all FPRs have been initialized */
+	bl	reloc_offset
+	bl	__init_fpu_registers
+
+	/* Invalidate & enable L1 cache, we don't care about
+	 * whatever the ROM may have tried to write to memory
+	 */
+	bl	__inval_enable_L1
+
+	/* Restore the BATs, and SDR1.  Then we can turn on the MMU. */
+	lwz	r4,SL_SDR1(r1)
+	mtsdr1	r4
+	lwz	r4,SL_SPRG0(r1)
+	mtsprg	0,r4
+	lwz	r4,SL_SPRG0+4(r1)
+	mtsprg	1,r4
+	lwz	r4,SL_SPRG0+8(r1)
+	mtsprg	2,r4
+	lwz	r4,SL_SPRG0+12(r1)
+	mtsprg	3,r4
+
+	lwz	r4,SL_DBAT0(r1)
+	mtdbatu	0,r4
+	lwz	r4,SL_DBAT0+4(r1)
+	mtdbatl	0,r4
+	lwz	r4,SL_DBAT1(r1)
+	mtdbatu	1,r4
+	lwz	r4,SL_DBAT1+4(r1)
+	mtdbatl	1,r4
+	lwz	r4,SL_DBAT2(r1)
+	mtdbatu	2,r4
+	lwz	r4,SL_DBAT2+4(r1)
+	mtdbatl	2,r4
+	lwz	r4,SL_DBAT3(r1)
+	mtdbatu	3,r4
+	lwz	r4,SL_DBAT3+4(r1)
+	mtdbatl	3,r4
+	lwz	r4,SL_IBAT0(r1)
+	mtibatu	0,r4
+	lwz	r4,SL_IBAT0+4(r1)
+	mtibatl	0,r4
+	lwz	r4,SL_IBAT1(r1)
+	mtibatu	1,r4
+	lwz	r4,SL_IBAT1+4(r1)
+	mtibatl	1,r4
+	lwz	r4,SL_IBAT2(r1)
+	mtibatu	2,r4
+	lwz	r4,SL_IBAT2+4(r1)
+	mtibatl	2,r4
+	lwz	r4,SL_IBAT3(r1)
+	mtibatu	3,r4
+	lwz	r4,SL_IBAT3+4(r1)
+	mtibatl	3,r4
+
+BEGIN_MMU_FTR_SECTION
+	lwz	r4,SL_DBAT4(r1)
+	mtspr	SPRN_DBAT4U,r4
+	lwz	r4,SL_DBAT4+4(r1)
+	mtspr	SPRN_DBAT4L,r4
+	lwz	r4,SL_DBAT5(r1)
+	mtspr	SPRN_DBAT5U,r4
+	lwz	r4,SL_DBAT5+4(r1)
+	mtspr	SPRN_DBAT5L,r4
+	lwz	r4,SL_DBAT6(r1)
+	mtspr	SPRN_DBAT6U,r4
+	lwz	r4,SL_DBAT6+4(r1)
+	mtspr	SPRN_DBAT6L,r4
+	lwz	r4,SL_DBAT7(r1)
+	mtspr	SPRN_DBAT7U,r4
+	lwz	r4,SL_DBAT7+4(r1)
+	mtspr	SPRN_DBAT7L,r4
+	lwz	r4,SL_IBAT4(r1)
+	mtspr	SPRN_IBAT4U,r4
+	lwz	r4,SL_IBAT4+4(r1)
+	mtspr	SPRN_IBAT4L,r4
+	lwz	r4,SL_IBAT5(r1)
+	mtspr	SPRN_IBAT5U,r4
+	lwz	r4,SL_IBAT5+4(r1)
+	mtspr	SPRN_IBAT5L,r4
+	lwz	r4,SL_IBAT6(r1)
+	mtspr	SPRN_IBAT6U,r4
+	lwz	r4,SL_IBAT6+4(r1)
+	mtspr	SPRN_IBAT6L,r4
+	lwz	r4,SL_IBAT7(r1)
+	mtspr	SPRN_IBAT7U,r4
+	lwz	r4,SL_IBAT7+4(r1)
+	mtspr	SPRN_IBAT7L,r4
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+
+	/* Flush all TLBs */
+	lis	r4,0x1000
+1:	addic.	r4,r4,-0x1000
+	tlbie	r4
+	blt	1b
+	sync
+
+	/* Restore TB */
+	li	r3,0
+	mttbl	r3
+	lwz	r3,SL_TB(r1)
+	lwz	r4,SL_TB+4(r1)
+	mttbu	r3
+	mttbl	r4
+
+	/* Restore the callee-saved registers and return */
+	lwz	r0,SL_CR(r1)
+	mtcr	r0
+	lwz	r2,SL_R2(r1)
+	lmw	r12,SL_R12(r1)
+
+	/* restore the MSR and SP and turn on the MMU and return */
+	lwz	r3,SL_MSR(r1)
+	lwz	r4,SL_LR(r1)
+	lwz	r1,SL_SP(r1)
+	mtsrr0	r4
+	mtsrr1	r3
+	sync
+	isync
+	rfi
+_ASM_NOKPROBE_SYMBOL(grackle_wake_up)
+
+#endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */
+
+	.section .bss
+	.balign	L1_CACHE_BYTES
+sleep_storage:
+	.space SL_SIZE
+	.balign	L1_CACHE_BYTES, 0
+
+#endif /* CONFIG_PPC_BOOK3S_32 */
+	.section .text
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
new file mode 100644
index 0000000000..8be71920e6
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -0,0 +1,1025 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SMP support for power macintosh.
+ *
+ * We support both the old "powersurge" SMP architecture
+ * and the current Core99 (G4 PowerMac) machines.
+ *
+ * Note that we don't support the very first rev. of
+ * Apple/DayStar 2 CPUs board, the one with the funky
+ * watchdog. Hopefully, none of these should be there except
+ * maybe internally to Apple. I should probably still add some
+ * code to detect this card though and disable SMP. --BenH.
+ *
+ * Support Macintosh G4 SMP by Troy Benjegerdes (hozer@drgw.net)
+ * and Ben Herrenschmidt <benh@kernel.crashing.org>.
+ *
+ * Support for DayStar quad CPU cards
+ * Copyright (C) XLR8, Inc. 1994-2000
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/hotplug.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel_stat.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/cpu.h>
+#include <linux/compiler.h>
+#include <linux/pgtable.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/code-patching.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+#include <asm/time.h>
+#include <asm/mpic.h>
+#include <asm/cacheflush.h>
+#include <asm/keylargo.h>
+#include <asm/pmac_low_i2c.h>
+#include <asm/pmac_pfunc.h>
+#include <asm/inst.h>
+
+#include "pmac.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+extern void __secondary_start_pmac_0(void);
+
+static void (*pmac_tb_freeze)(int freeze);
+static u64 timebase;
+static int tb_req;
+
+#ifdef CONFIG_PPC_PMAC32_PSURGE
+
+/*
+ * Powersurge (old powermac SMP) support.
+ */
+
+/* Addresses for powersurge registers */
+#define HAMMERHEAD_BASE		0xf8000000
+#define HHEAD_CONFIG		0x90
+#define HHEAD_SEC_INTR		0xc0
+
+/* register for interrupting the primary processor on the powersurge */
+/* N.B. this is actually the ethernet ROM! */
+#define PSURGE_PRI_INTR		0xf3019000
+
+/* register for storing the start address for the secondary processor */
+/* N.B. this is the PCI config space address register for the 1st bridge */
+#define PSURGE_START		0xf2800000
+
+/* Daystar/XLR8 4-CPU card */
+#define PSURGE_QUAD_REG_ADDR	0xf8800000
+
+#define PSURGE_QUAD_IRQ_SET	0
+#define PSURGE_QUAD_IRQ_CLR	1
+#define PSURGE_QUAD_IRQ_PRIMARY	2
+#define PSURGE_QUAD_CKSTOP_CTL	3
+#define PSURGE_QUAD_PRIMARY_ARB	4
+#define PSURGE_QUAD_BOARD_ID	6
+#define PSURGE_QUAD_WHICH_CPU	7
+#define PSURGE_QUAD_CKSTOP_RDBK	8
+#define PSURGE_QUAD_RESET_CTL	11
+
+#define PSURGE_QUAD_OUT(r, v)	(out_8(quad_base + ((r) << 4) + 4, (v)))
+#define PSURGE_QUAD_IN(r)	(in_8(quad_base + ((r) << 4) + 4) & 0x0f)
+#define PSURGE_QUAD_BIS(r, v)	(PSURGE_QUAD_OUT((r), PSURGE_QUAD_IN(r) | (v)))
+#define PSURGE_QUAD_BIC(r, v)	(PSURGE_QUAD_OUT((r), PSURGE_QUAD_IN(r) & ~(v)))
+
+/* virtual addresses for the above */
+static volatile u8 __iomem *hhead_base;
+static volatile u8 __iomem *quad_base;
+static volatile u32 __iomem *psurge_pri_intr;
+static volatile u8 __iomem *psurge_sec_intr;
+static volatile u32 __iomem *psurge_start;
+
+/* values for psurge_type */
+#define PSURGE_NONE		-1
+#define PSURGE_DUAL		0
+#define PSURGE_QUAD_OKEE	1
+#define PSURGE_QUAD_COTTON	2
+#define PSURGE_QUAD_ICEGRASS	3
+
+/* what sort of powersurge board we have */
+static int psurge_type = PSURGE_NONE;
+
+/* irq for secondary cpus to report */
+static struct irq_domain *psurge_host;
+int psurge_secondary_virq;
+
+/*
+ * Set and clear IPIs for powersurge.
+ */
+static inline void psurge_set_ipi(int cpu)
+{
+	if (psurge_type == PSURGE_NONE)
+		return;
+	if (cpu == 0)
+		in_be32(psurge_pri_intr);
+	else if (psurge_type == PSURGE_DUAL)
+		out_8(psurge_sec_intr, 0);
+	else
+		PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_SET, 1 << cpu);
+}
+
+static inline void psurge_clr_ipi(int cpu)
+{
+	if (cpu > 0) {
+		switch(psurge_type) {
+		case PSURGE_DUAL:
+			out_8(psurge_sec_intr, ~0);
+			break;
+		case PSURGE_NONE:
+			break;
+		default:
+			PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_CLR, 1 << cpu);
+		}
+	}
+}
+
+/*
+ * On powersurge (old SMP powermac architecture) we don't have
+ * separate IPIs for separate messages like openpic does.  Instead
+ * use the generic demux helpers
+ *  -- paulus.
+ */
+static irqreturn_t psurge_ipi_intr(int irq, void *d)
+{
+	psurge_clr_ipi(smp_processor_id());
+	smp_ipi_demux();
+
+	return IRQ_HANDLED;
+}
+
+static void smp_psurge_cause_ipi(int cpu)
+{
+	psurge_set_ipi(cpu);
+}
+
+static int psurge_host_map(struct irq_domain *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_percpu_irq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops psurge_host_ops = {
+	.map	= psurge_host_map,
+};
+
+static int __init psurge_secondary_ipi_init(void)
+{
+	int rc = -ENOMEM;
+
+	psurge_host = irq_domain_add_nomap(NULL, ~0, &psurge_host_ops, NULL);
+
+	if (psurge_host)
+		psurge_secondary_virq = irq_create_direct_mapping(psurge_host);
+
+	if (psurge_secondary_virq)
+		rc = request_irq(psurge_secondary_virq, psurge_ipi_intr,
+			IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL);
+
+	if (rc)
+		pr_err("Failed to setup secondary cpu IPI\n");
+
+	return rc;
+}
+
+/*
+ * Determine a quad card presence. We read the board ID register, we
+ * force the data bus to change to something else, and we read it again.
+ * It it's stable, then the register probably exist (ugh !)
+ */
+static int __init psurge_quad_probe(void)
+{
+	int type;
+	unsigned int i;
+
+	type = PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID);
+	if (type < PSURGE_QUAD_OKEE || type > PSURGE_QUAD_ICEGRASS
+	    || type != PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID))
+		return PSURGE_DUAL;
+
+	/* looks OK, try a slightly more rigorous test */
+	/* bogus is not necessarily cacheline-aligned,
+	   though I don't suppose that really matters.  -- paulus */
+	for (i = 0; i < 100; i++) {
+		volatile u32 bogus[8];
+		bogus[(0+i)%8] = 0x00000000;
+		bogus[(1+i)%8] = 0x55555555;
+		bogus[(2+i)%8] = 0xFFFFFFFF;
+		bogus[(3+i)%8] = 0xAAAAAAAA;
+		bogus[(4+i)%8] = 0x33333333;
+		bogus[(5+i)%8] = 0xCCCCCCCC;
+		bogus[(6+i)%8] = 0xCCCCCCCC;
+		bogus[(7+i)%8] = 0x33333333;
+		wmb();
+		asm volatile("dcbf 0,%0" : : "r" (bogus) : "memory");
+		mb();
+		if (type != PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID))
+			return PSURGE_DUAL;
+	}
+	return type;
+}
+
+static void __init psurge_quad_init(void)
+{
+	int procbits;
+
+	if (ppc_md.progress) ppc_md.progress("psurge_quad_init", 0x351);
+	procbits = ~PSURGE_QUAD_IN(PSURGE_QUAD_WHICH_CPU);
+	if (psurge_type == PSURGE_QUAD_ICEGRASS)
+		PSURGE_QUAD_BIS(PSURGE_QUAD_RESET_CTL, procbits);
+	else
+		PSURGE_QUAD_BIC(PSURGE_QUAD_CKSTOP_CTL, procbits);
+	mdelay(33);
+	out_8(psurge_sec_intr, ~0);
+	PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_CLR, procbits);
+	PSURGE_QUAD_BIS(PSURGE_QUAD_RESET_CTL, procbits);
+	if (psurge_type != PSURGE_QUAD_ICEGRASS)
+		PSURGE_QUAD_BIS(PSURGE_QUAD_CKSTOP_CTL, procbits);
+	PSURGE_QUAD_BIC(PSURGE_QUAD_PRIMARY_ARB, procbits);
+	mdelay(33);
+	PSURGE_QUAD_BIC(PSURGE_QUAD_RESET_CTL, procbits);
+	mdelay(33);
+	PSURGE_QUAD_BIS(PSURGE_QUAD_PRIMARY_ARB, procbits);
+	mdelay(33);
+}
+
+static void __init smp_psurge_probe(void)
+{
+	int i, ncpus;
+	struct device_node *dn;
+
+	/*
+	 * The powersurge cpu board can be used in the generation
+	 * of powermacs that have a socket for an upgradeable cpu card,
+	 * including the 7500, 8500, 9500, 9600.
+	 * The device tree doesn't tell you if you have 2 cpus because
+	 * OF doesn't know anything about the 2nd processor.
+	 * Instead we look for magic bits in magic registers,
+	 * in the hammerhead memory controller in the case of the
+	 * dual-cpu powersurge board.  -- paulus.
+	 */
+	dn = of_find_node_by_name(NULL, "hammerhead");
+	if (dn == NULL)
+		return;
+	of_node_put(dn);
+
+	hhead_base = ioremap(HAMMERHEAD_BASE, 0x800);
+	quad_base = ioremap(PSURGE_QUAD_REG_ADDR, 1024);
+	psurge_sec_intr = hhead_base + HHEAD_SEC_INTR;
+
+	psurge_type = psurge_quad_probe();
+	if (psurge_type != PSURGE_DUAL) {
+		psurge_quad_init();
+		/* All released cards using this HW design have 4 CPUs */
+		ncpus = 4;
+		/* No sure how timebase sync works on those, let's use SW */
+		smp_ops->give_timebase = smp_generic_give_timebase;
+		smp_ops->take_timebase = smp_generic_take_timebase;
+	} else {
+		iounmap(quad_base);
+		if ((in_8(hhead_base + HHEAD_CONFIG) & 0x02) == 0) {
+			/* not a dual-cpu card */
+			iounmap(hhead_base);
+			psurge_type = PSURGE_NONE;
+			return;
+		}
+		ncpus = 2;
+	}
+
+	if (psurge_secondary_ipi_init())
+		return;
+
+	psurge_start = ioremap(PSURGE_START, 4);
+	psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4);
+
+	/* This is necessary because OF doesn't know about the
+	 * secondary cpu(s), and thus there aren't nodes in the
+	 * device tree for them, and smp_setup_cpu_maps hasn't
+	 * set their bits in cpu_present_mask.
+	 */
+	if (ncpus > NR_CPUS)
+		ncpus = NR_CPUS;
+	for (i = 1; i < ncpus ; ++i)
+		set_cpu_present(i, true);
+
+	if (ppc_md.progress) ppc_md.progress("smp_psurge_probe - done", 0x352);
+}
+
+static int __init smp_psurge_kick_cpu(int nr)
+{
+	unsigned long start = __pa(__secondary_start_pmac_0) + nr * 8;
+	unsigned long a, flags;
+	int i, j;
+
+	/* Defining this here is evil ... but I prefer hiding that
+	 * crap to avoid giving people ideas that they can do the
+	 * same.
+	 */
+	extern volatile unsigned int cpu_callin_map[NR_CPUS];
+
+	/* may need to flush here if secondary bats aren't setup */
+	for (a = KERNELBASE; a < KERNELBASE + 0x800000; a += 32)
+		asm volatile("dcbf 0,%0" : : "r" (a) : "memory");
+	asm volatile("sync");
+
+	if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu", 0x353);
+
+	/* This is going to freeze the timeebase, we disable interrupts */
+	local_irq_save(flags);
+
+	out_be32(psurge_start, start);
+	mb();
+
+	psurge_set_ipi(nr);
+
+	/*
+	 * We can't use udelay here because the timebase is now frozen.
+	 */
+	for (i = 0; i < 2000; ++i)
+		asm volatile("nop" : : : "memory");
+	psurge_clr_ipi(nr);
+
+	/*
+	 * Also, because the timebase is frozen, we must not return to the
+	 * caller which will try to do udelay's etc... Instead, we wait -here-
+	 * for the CPU to callin.
+	 */
+	for (i = 0; i < 100000 && !cpu_callin_map[nr]; ++i) {
+		for (j = 1; j < 10000; j++)
+			asm volatile("nop" : : : "memory");
+		asm volatile("sync" : : : "memory");
+	}
+	if (!cpu_callin_map[nr])
+		goto stuck;
+
+	/* And we do the TB sync here too for standard dual CPU cards */
+	if (psurge_type == PSURGE_DUAL) {
+		while(!tb_req)
+			barrier();
+		tb_req = 0;
+		mb();
+		timebase = get_tb();
+		mb();
+		while (timebase)
+			barrier();
+		mb();
+	}
+ stuck:
+	/* now interrupt the secondary, restarting both TBs */
+	if (psurge_type == PSURGE_DUAL)
+		psurge_set_ipi(1);
+
+	if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu - done", 0x354);
+
+	return 0;
+}
+
+static void __init smp_psurge_setup_cpu(int cpu_nr)
+{
+	unsigned long flags = IRQF_PERCPU | IRQF_NO_THREAD;
+	int irq;
+
+	if (cpu_nr != 0 || !psurge_start)
+		return;
+
+	/* reset the entry point so if we get another intr we won't
+	 * try to startup again */
+	out_be32(psurge_start, 0x100);
+	irq = irq_create_mapping(NULL, 30);
+	if (request_irq(irq, psurge_ipi_intr, flags, "primary IPI", NULL))
+		printk(KERN_ERR "Couldn't get primary IPI interrupt");
+}
+
+void __init smp_psurge_take_timebase(void)
+{
+	if (psurge_type != PSURGE_DUAL)
+		return;
+
+	tb_req = 1;
+	mb();
+	while (!timebase)
+		barrier();
+	mb();
+	set_tb(timebase >> 32, timebase & 0xffffffff);
+	timebase = 0;
+	mb();
+	set_dec(tb_ticks_per_jiffy/2);
+}
+
+void __init smp_psurge_give_timebase(void)
+{
+	/* Nothing to do here */
+}
+
+/* PowerSurge-style Macs */
+struct smp_ops_t psurge_smp_ops = {
+	.message_pass	= NULL,	/* Use smp_muxed_ipi_message_pass */
+	.cause_ipi	= smp_psurge_cause_ipi,
+	.cause_nmi_ipi	= NULL,
+	.probe		= smp_psurge_probe,
+	.kick_cpu	= smp_psurge_kick_cpu,
+	.setup_cpu	= smp_psurge_setup_cpu,
+	.give_timebase	= smp_psurge_give_timebase,
+	.take_timebase	= smp_psurge_take_timebase,
+};
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
+
+/*
+ * Core 99 and later support
+ */
+
+
+static void smp_core99_give_timebase(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	while(!tb_req)
+		barrier();
+	tb_req = 0;
+	(*pmac_tb_freeze)(1);
+	mb();
+	timebase = get_tb();
+	mb();
+	while (timebase)
+		barrier();
+	mb();
+	(*pmac_tb_freeze)(0);
+	mb();
+
+	local_irq_restore(flags);
+}
+
+
+static void smp_core99_take_timebase(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	tb_req = 1;
+	mb();
+	while (!timebase)
+		barrier();
+	mb();
+	set_tb(timebase >> 32, timebase & 0xffffffff);
+	timebase = 0;
+	mb();
+
+	local_irq_restore(flags);
+}
+
+#ifdef CONFIG_PPC64
+/*
+ * G5s enable/disable the timebase via an i2c-connected clock chip.
+ */
+static struct pmac_i2c_bus *pmac_tb_clock_chip_host;
+static u8 pmac_tb_pulsar_addr;
+
+static void smp_core99_cypress_tb_freeze(int freeze)
+{
+	u8 data;
+	int rc;
+
+	/* Strangely, the device-tree says address is 0xd2, but darwin
+	 * accesses 0xd0 ...
+	 */
+	pmac_i2c_setmode(pmac_tb_clock_chip_host,
+			 pmac_i2c_mode_combined);
+	rc = pmac_i2c_xfer(pmac_tb_clock_chip_host,
+			   0xd0 | pmac_i2c_read,
+			   1, 0x81, &data, 1);
+	if (rc != 0)
+		goto bail;
+
+	data = (data & 0xf3) | (freeze ? 0x00 : 0x0c);
+
+       	pmac_i2c_setmode(pmac_tb_clock_chip_host, pmac_i2c_mode_stdsub);
+	rc = pmac_i2c_xfer(pmac_tb_clock_chip_host,
+			   0xd0 | pmac_i2c_write,
+			   1, 0x81, &data, 1);
+
+ bail:
+	if (rc != 0) {
+		printk("Cypress Timebase %s rc: %d\n",
+		       freeze ? "freeze" : "unfreeze", rc);
+		panic("Timebase freeze failed !\n");
+	}
+}
+
+
+static void smp_core99_pulsar_tb_freeze(int freeze)
+{
+	u8 data;
+	int rc;
+
+	pmac_i2c_setmode(pmac_tb_clock_chip_host,
+			 pmac_i2c_mode_combined);
+	rc = pmac_i2c_xfer(pmac_tb_clock_chip_host,
+			   pmac_tb_pulsar_addr | pmac_i2c_read,
+			   1, 0x2e, &data, 1);
+	if (rc != 0)
+		goto bail;
+
+	data = (data & 0x88) | (freeze ? 0x11 : 0x22);
+
+	pmac_i2c_setmode(pmac_tb_clock_chip_host, pmac_i2c_mode_stdsub);
+	rc = pmac_i2c_xfer(pmac_tb_clock_chip_host,
+			   pmac_tb_pulsar_addr | pmac_i2c_write,
+			   1, 0x2e, &data, 1);
+ bail:
+	if (rc != 0) {
+		printk(KERN_ERR "Pulsar Timebase %s rc: %d\n",
+		       freeze ? "freeze" : "unfreeze", rc);
+		panic("Timebase freeze failed !\n");
+	}
+}
+
+static void __init smp_core99_setup_i2c_hwsync(int ncpus)
+{
+	struct device_node *cc = NULL;	
+	struct device_node *p;
+	const char *name = NULL;
+	const u32 *reg;
+	int ok;
+
+	/* Look for the clock chip */
+	for_each_node_by_name(cc, "i2c-hwclock") {
+		p = of_get_parent(cc);
+		ok = p && of_device_is_compatible(p, "uni-n-i2c");
+		of_node_put(p);
+		if (!ok)
+			continue;
+
+		pmac_tb_clock_chip_host = pmac_i2c_find_bus(cc);
+		if (pmac_tb_clock_chip_host == NULL)
+			continue;
+		reg = of_get_property(cc, "reg", NULL);
+		if (reg == NULL)
+			continue;
+		switch (*reg) {
+		case 0xd2:
+			if (of_device_is_compatible(cc,"pulsar-legacy-slewing")) {
+				pmac_tb_freeze = smp_core99_pulsar_tb_freeze;
+				pmac_tb_pulsar_addr = 0xd2;
+				name = "Pulsar";
+			} else if (of_device_is_compatible(cc, "cy28508")) {
+				pmac_tb_freeze = smp_core99_cypress_tb_freeze;
+				name = "Cypress";
+			}
+			break;
+		case 0xd4:
+			pmac_tb_freeze = smp_core99_pulsar_tb_freeze;
+			pmac_tb_pulsar_addr = 0xd4;
+			name = "Pulsar";
+			break;
+		}
+		if (pmac_tb_freeze != NULL)
+			break;
+	}
+	if (pmac_tb_freeze != NULL) {
+		/* Open i2c bus for synchronous access */
+		if (pmac_i2c_open(pmac_tb_clock_chip_host, 1)) {
+			printk(KERN_ERR "Failed top open i2c bus for clock"
+			       " sync, fallback to software sync !\n");
+			goto no_i2c_sync;
+		}
+		printk(KERN_INFO "Processor timebase sync using %s i2c clock\n",
+		       name);
+		return;
+	}
+ no_i2c_sync:
+	pmac_tb_freeze = NULL;
+	pmac_tb_clock_chip_host = NULL;
+}
+
+
+
+/*
+ * Newer G5s uses a platform function
+ */
+
+static void smp_core99_pfunc_tb_freeze(int freeze)
+{
+	struct device_node *cpus;
+	struct pmf_args args;
+
+	cpus = of_find_node_by_path("/cpus");
+	BUG_ON(cpus == NULL);
+	args.count = 1;
+	args.u[0].v = !freeze;
+	pmf_call_function(cpus, "cpu-timebase", &args);
+	of_node_put(cpus);
+}
+
+#else /* CONFIG_PPC64 */
+
+/*
+ * SMP G4 use a GPIO to enable/disable the timebase.
+ */
+
+static unsigned int core99_tb_gpio;	/* Timebase freeze GPIO */
+
+static void smp_core99_gpio_tb_freeze(int freeze)
+{
+	if (freeze)
+		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 4);
+	else
+		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 0);
+	pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0);
+}
+
+
+#endif /* !CONFIG_PPC64 */
+
+static void core99_init_caches(int cpu)
+{
+#ifndef CONFIG_PPC64
+	/* L2 and L3 cache settings to pass from CPU0 to CPU1 on G4 cpus */
+	static long int core99_l2_cache;
+	static long int core99_l3_cache;
+
+	if (!cpu_has_feature(CPU_FTR_L2CR))
+		return;
+
+	if (cpu == 0) {
+		core99_l2_cache = _get_L2CR();
+		printk("CPU0: L2CR is %lx\n", core99_l2_cache);
+	} else {
+		printk("CPU%d: L2CR was %lx\n", cpu, _get_L2CR());
+		_set_L2CR(0);
+		_set_L2CR(core99_l2_cache);
+		printk("CPU%d: L2CR set to %lx\n", cpu, core99_l2_cache);
+	}
+
+	if (!cpu_has_feature(CPU_FTR_L3CR))
+		return;
+
+	if (cpu == 0){
+		core99_l3_cache = _get_L3CR();
+		printk("CPU0: L3CR is %lx\n", core99_l3_cache);
+	} else {
+		printk("CPU%d: L3CR was %lx\n", cpu, _get_L3CR());
+		_set_L3CR(0);
+		_set_L3CR(core99_l3_cache);
+		printk("CPU%d: L3CR set to %lx\n", cpu, core99_l3_cache);
+	}
+#endif /* !CONFIG_PPC64 */
+}
+
+static void __init smp_core99_setup(int ncpus)
+{
+#ifdef CONFIG_PPC64
+
+	/* i2c based HW sync on some G5s */
+	if (of_machine_is_compatible("PowerMac7,2") ||
+	    of_machine_is_compatible("PowerMac7,3") ||
+	    of_machine_is_compatible("RackMac3,1"))
+		smp_core99_setup_i2c_hwsync(ncpus);
+
+	/* pfunc based HW sync on recent G5s */
+	if (pmac_tb_freeze == NULL) {
+		struct device_node *cpus =
+			of_find_node_by_path("/cpus");
+		if (cpus &&
+		    of_property_read_bool(cpus, "platform-cpu-timebase")) {
+			pmac_tb_freeze = smp_core99_pfunc_tb_freeze;
+			printk(KERN_INFO "Processor timebase sync using"
+			       " platform function\n");
+		}
+		of_node_put(cpus);
+	}
+
+#else /* CONFIG_PPC64 */
+
+	/* GPIO based HW sync on ppc32 Core99 */
+	if (pmac_tb_freeze == NULL && !of_machine_is_compatible("MacRISC4")) {
+		struct device_node *cpu;
+		const u32 *tbprop = NULL;
+
+		core99_tb_gpio = KL_GPIO_TB_ENABLE;	/* default value */
+		cpu = of_find_node_by_type(NULL, "cpu");
+		if (cpu != NULL) {
+			tbprop = of_get_property(cpu, "timebase-enable", NULL);
+			if (tbprop)
+				core99_tb_gpio = *tbprop;
+			of_node_put(cpu);
+		}
+		pmac_tb_freeze = smp_core99_gpio_tb_freeze;
+		printk(KERN_INFO "Processor timebase sync using"
+		       " GPIO 0x%02x\n", core99_tb_gpio);
+	}
+
+#endif /* CONFIG_PPC64 */
+
+	/* No timebase sync, fallback to software */
+	if (pmac_tb_freeze == NULL) {
+		smp_ops->give_timebase = smp_generic_give_timebase;
+		smp_ops->take_timebase = smp_generic_take_timebase;
+		printk(KERN_INFO "Processor timebase sync using software\n");
+	}
+
+#ifndef CONFIG_PPC64
+	{
+		int i;
+
+		/* XXX should get this from reg properties */
+		for (i = 1; i < ncpus; ++i)
+			set_hard_smp_processor_id(i, i);
+	}
+#endif
+
+	/* 32 bits SMP can't NAP */
+	if (!of_machine_is_compatible("MacRISC4"))
+		powersave_nap = 0;
+}
+
+static void __init smp_core99_probe(void)
+{
+	struct device_node *cpus;
+	int ncpus = 0;
+
+	if (ppc_md.progress) ppc_md.progress("smp_core99_probe", 0x345);
+
+	/* Count CPUs in the device-tree */
+	for_each_node_by_type(cpus, "cpu")
+		++ncpus;
+
+	printk(KERN_INFO "PowerMac SMP probe found %d cpus\n", ncpus);
+
+	/* Nothing more to do if less than 2 of them */
+	if (ncpus <= 1)
+		return;
+
+	/* We need to perform some early initialisations before we can start
+	 * setting up SMP as we are running before initcalls
+	 */
+	pmac_pfunc_base_install();
+	pmac_i2c_init();
+
+	/* Setup various bits like timebase sync method, ability to nap, ... */
+	smp_core99_setup(ncpus);
+
+	/* Install IPIs */
+	mpic_request_ipis();
+
+	/* Collect l2cr and l3cr values from CPU 0 */
+	core99_init_caches(0);
+}
+
+static int smp_core99_kick_cpu(int nr)
+{
+	unsigned int save_vector;
+	unsigned long target, flags;
+	unsigned int *vector = (unsigned int *)(PAGE_OFFSET+0x100);
+
+	if (nr < 0 || nr > 3)
+		return -ENOENT;
+
+	if (ppc_md.progress)
+		ppc_md.progress("smp_core99_kick_cpu", 0x346);
+
+	local_irq_save(flags);
+
+	/* Save reset vector */
+	save_vector = *vector;
+
+	/* Setup fake reset vector that does
+	 *   b __secondary_start_pmac_0 + nr*8
+	 */
+	target = (unsigned long) __secondary_start_pmac_0 + nr * 8;
+	patch_branch(vector, target, BRANCH_SET_LINK);
+
+	/* Put some life in our friend */
+	pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0);
+
+	/* FIXME: We wait a bit for the CPU to take the exception, I should
+	 * instead wait for the entry code to set something for me. Well,
+	 * ideally, all that crap will be done in prom.c and the CPU left
+	 * in a RAM-based wait loop like CHRP.
+	 */
+	mdelay(1);
+
+	/* Restore our exception vector */
+	patch_instruction(vector, ppc_inst(save_vector));
+
+	local_irq_restore(flags);
+	if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347);
+
+	return 0;
+}
+
+static void smp_core99_setup_cpu(int cpu_nr)
+{
+	/* Setup L2/L3 */
+	if (cpu_nr != 0)
+		core99_init_caches(cpu_nr);
+
+	/* Setup openpic */
+	mpic_setup_this_cpu();
+}
+
+#ifdef CONFIG_PPC64
+#ifdef CONFIG_HOTPLUG_CPU
+static unsigned int smp_core99_host_open;
+
+static int smp_core99_cpu_prepare(unsigned int cpu)
+{
+	int rc;
+
+	/* Open i2c bus if it was used for tb sync */
+	if (pmac_tb_clock_chip_host && !smp_core99_host_open) {
+		rc = pmac_i2c_open(pmac_tb_clock_chip_host, 1);
+		if (rc) {
+			pr_err("Failed to open i2c bus for time sync\n");
+			return notifier_from_errno(rc);
+		}
+		smp_core99_host_open = 1;
+	}
+	return 0;
+}
+
+static int smp_core99_cpu_online(unsigned int cpu)
+{
+	/* Close i2c bus if it was used for tb sync */
+	if (pmac_tb_clock_chip_host && smp_core99_host_open) {
+		pmac_i2c_close(pmac_tb_clock_chip_host);
+		smp_core99_host_open = 0;
+	}
+	return 0;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static void __init smp_core99_bringup_done(void)
+{
+	/* Close i2c bus if it was used for tb sync */
+	if (pmac_tb_clock_chip_host)
+		pmac_i2c_close(pmac_tb_clock_chip_host);
+
+	/* If we didn't start the second CPU, we must take
+	 * it off the bus.
+	 */
+	if (of_machine_is_compatible("MacRISC4") &&
+	    num_online_cpus() < 2) {
+		set_cpu_present(1, false);
+		g5_phy_disable_cpu1();
+	}
+#ifdef CONFIG_HOTPLUG_CPU
+	cpuhp_setup_state_nocalls(CPUHP_POWERPC_PMAC_PREPARE,
+				  "powerpc/pmac:prepare", smp_core99_cpu_prepare,
+				  NULL);
+	cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "powerpc/pmac:online",
+				  smp_core99_cpu_online, NULL);
+#endif
+
+	if (ppc_md.progress)
+		ppc_md.progress("smp_core99_bringup_done", 0x349);
+}
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static int smp_core99_cpu_disable(void)
+{
+	int rc = generic_cpu_disable();
+	if (rc)
+		return rc;
+
+	mpic_cpu_set_priority(0xf);
+
+	cleanup_cpu_mmu_context();
+
+	return 0;
+}
+
+#ifdef CONFIG_PPC32
+
+static void pmac_cpu_offline_self(void)
+{
+	int cpu = smp_processor_id();
+
+	local_irq_disable();
+	idle_task_exit();
+	pr_debug("CPU%d offline\n", cpu);
+	generic_set_cpu_dead(cpu);
+	smp_wmb();
+	mb();
+	low_cpu_offline_self();
+}
+
+#else /* CONFIG_PPC32 */
+
+static void pmac_cpu_offline_self(void)
+{
+	int cpu = smp_processor_id();
+
+	local_irq_disable();
+	idle_task_exit();
+
+	/*
+	 * turn off as much as possible, we'll be
+	 * kicked out as this will only be invoked
+	 * on core99 platforms for now ...
+	 */
+
+	printk(KERN_INFO "CPU#%d offline\n", cpu);
+	generic_set_cpu_dead(cpu);
+	smp_wmb();
+
+	/*
+	 * Re-enable interrupts. The NAP code needs to enable them
+	 * anyways, do it now so we deal with the case where one already
+	 * happened while soft-disabled.
+	 * We shouldn't get any external interrupts, only decrementer, and the
+	 * decrementer handler is safe for use on offline CPUs
+	 */
+	local_irq_enable();
+
+	while (1) {
+		/* let's not take timer interrupts too often ... */
+		set_dec(0x7fffffff);
+
+		/* Enter NAP mode */
+		power4_idle();
+	}
+}
+
+#endif /* else CONFIG_PPC32 */
+#endif /* CONFIG_HOTPLUG_CPU */
+
+/* Core99 Macs (dual G4s and G5s) */
+static struct smp_ops_t core99_smp_ops = {
+	.message_pass	= smp_mpic_message_pass,
+	.probe		= smp_core99_probe,
+#ifdef CONFIG_PPC64
+	.bringup_done	= smp_core99_bringup_done,
+#endif
+	.kick_cpu	= smp_core99_kick_cpu,
+	.setup_cpu	= smp_core99_setup_cpu,
+	.give_timebase	= smp_core99_give_timebase,
+	.take_timebase	= smp_core99_take_timebase,
+#if defined(CONFIG_HOTPLUG_CPU)
+	.cpu_disable	= smp_core99_cpu_disable,
+	.cpu_die	= generic_cpu_die,
+#endif
+};
+
+void __init pmac_setup_smp(void)
+{
+	struct device_node *np;
+
+	/* Check for Core99 */
+	np = of_find_node_by_name(NULL, "uni-n");
+	if (!np)
+		np = of_find_node_by_name(NULL, "u3");
+	if (!np)
+		np = of_find_node_by_name(NULL, "u4");
+	if (np) {
+		of_node_put(np);
+		smp_ops = &core99_smp_ops;
+	}
+#ifdef CONFIG_PPC_PMAC32_PSURGE
+	else {
+		/* We have to set bits in cpu_possible_mask here since the
+		 * secondary CPU(s) aren't in the device tree. Various
+		 * things won't be initialized for CPUs not in the possible
+		 * map, so we really need to fix it up here.
+		 */
+		int cpu;
+
+		for (cpu = 1; cpu < 4 && cpu < NR_CPUS; ++cpu)
+			set_cpu_possible(cpu, true);
+		smp_ops = &psurge_smp_ops;
+	}
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
+
+#ifdef CONFIG_HOTPLUG_CPU
+	smp_ops->cpu_offline_self = pmac_cpu_offline_self;
+#endif
+}
+
+
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
new file mode 100644
index 0000000000..8633891b7a
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for periodic interrupts (100 per second) and for getting
+ * the current time from the RTC on Power Macintoshes.
+ *
+ * We use the decrementer register for our periodic interrupts.
+ *
+ * Paul Mackerras	August 1996.
+ * Copyright (C) 1996 Paul Mackerras.
+ * Copyright (C) 2003-2005 Benjamin Herrenschmidt.
+ *
+ */
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/time.h>
+#include <linux/adb.h>
+#include <linux/cuda.h>
+#include <linux/pmu.h>
+#include <linux/interrupt.h>
+#include <linux/hardirq.h>
+#include <linux/rtc.h>
+#include <linux/of_address.h>
+
+#include <asm/early_ioremap.h>
+#include <asm/sections.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/nvram.h>
+#include <asm/smu.h>
+
+#include "pmac.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+/*
+ * Calibrate the decrementer frequency with the VIA timer 1.
+ */
+#define VIA_TIMER_FREQ_6	4700000	/* time 1 frequency * 6 */
+
+/* VIA registers */
+#define RS		0x200		/* skip between registers */
+#define T1CL		(4*RS)		/* Timer 1 ctr/latch (low 8 bits) */
+#define T1CH		(5*RS)		/* Timer 1 counter (high 8 bits) */
+#define T1LL		(6*RS)		/* Timer 1 latch (low 8 bits) */
+#define T1LH		(7*RS)		/* Timer 1 latch (high 8 bits) */
+#define ACR		(11*RS)		/* Auxiliary control register */
+#define IFR		(13*RS)		/* Interrupt flag register */
+
+/* Bits in ACR */
+#define T1MODE		0xc0		/* Timer 1 mode */
+#define T1MODE_CONT	0x40		/*  continuous interrupts */
+
+/* Bits in IFR and IER */
+#define T1_INT		0x40		/* Timer 1 interrupt */
+
+long __init pmac_time_init(void)
+{
+	s32 delta = 0;
+#if defined(CONFIG_NVRAM) && defined(CONFIG_PPC32)
+	int dst;
+	
+	delta = ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x9)) << 16;
+	delta |= ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xa)) << 8;
+	delta |= pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xb);
+	if (delta & 0x00800000UL)
+		delta |= 0xFF000000UL;
+	dst = ((pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x8) & 0x80) != 0);
+	printk("GMT Delta read from XPRAM: %d minutes, DST: %s\n", delta/60,
+		dst ? "on" : "off");
+#endif
+	return delta;
+}
+
+#ifdef CONFIG_PMAC_SMU
+static time64_t smu_get_time(void)
+{
+	struct rtc_time tm;
+
+	if (smu_get_rtc_time(&tm, 1))
+		return 0;
+	return rtc_tm_to_time64(&tm);
+}
+#endif
+
+/* Can't be __init, it's called when suspending and resuming */
+time64_t pmac_get_boot_time(void)
+{
+	/* Get the time from the RTC, used only at boot time */
+	switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
+	case SYS_CTRLER_CUDA:
+		return cuda_get_time();
+#endif
+#ifdef CONFIG_ADB_PMU
+	case SYS_CTRLER_PMU:
+		return pmu_get_time();
+#endif
+#ifdef CONFIG_PMAC_SMU
+	case SYS_CTRLER_SMU:
+		return smu_get_time();
+#endif
+	default:
+		return 0;
+	}
+}
+
+void pmac_get_rtc_time(struct rtc_time *tm)
+{
+	/* Get the time from the RTC, used only at boot time */
+	switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
+	case SYS_CTRLER_CUDA:
+		rtc_time64_to_tm(cuda_get_time(), tm);
+		break;
+#endif
+#ifdef CONFIG_ADB_PMU
+	case SYS_CTRLER_PMU:
+		rtc_time64_to_tm(pmu_get_time(), tm);
+		break;
+#endif
+#ifdef CONFIG_PMAC_SMU
+	case SYS_CTRLER_SMU:
+		smu_get_rtc_time(tm, 1);
+		break;
+#endif
+	default:
+		;
+	}
+}
+
+int pmac_set_rtc_time(struct rtc_time *tm)
+{
+	switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
+	case SYS_CTRLER_CUDA:
+		return cuda_set_rtc_time(tm);
+#endif
+#ifdef CONFIG_ADB_PMU
+	case SYS_CTRLER_PMU:
+		return pmu_set_rtc_time(tm);
+#endif
+#ifdef CONFIG_PMAC_SMU
+	case SYS_CTRLER_SMU:
+		return smu_set_rtc_time(tm, 1);
+#endif
+	default:
+		return -ENODEV;
+	}
+}
+
+#ifdef CONFIG_PPC32
+/*
+ * Calibrate the decrementer register using VIA timer 1.
+ * This is used both on powermacs and CHRP machines.
+ */
+static int __init via_calibrate_decr(void)
+{
+	struct device_node *vias;
+	volatile unsigned char __iomem *via;
+	int count = VIA_TIMER_FREQ_6 / 100;
+	unsigned int dstart, dend;
+	struct resource rsrc;
+
+	vias = of_find_node_by_name(NULL, "via-cuda");
+	if (vias == NULL)
+		vias = of_find_node_by_name(NULL, "via-pmu");
+	if (vias == NULL)
+		vias = of_find_node_by_name(NULL, "via");
+	if (vias == NULL || of_address_to_resource(vias, 0, &rsrc)) {
+	        of_node_put(vias);
+		return 0;
+	}
+	of_node_put(vias);
+	via = early_ioremap(rsrc.start, resource_size(&rsrc));
+	if (via == NULL) {
+		printk(KERN_ERR "Failed to map VIA for timer calibration !\n");
+		return 0;
+	}
+
+	/* set timer 1 for continuous interrupts */
+	out_8(&via[ACR], (via[ACR] & ~T1MODE) | T1MODE_CONT);
+	/* set the counter to a small value */
+	out_8(&via[T1CH], 2);
+	/* set the latch to `count' */
+	out_8(&via[T1LL], count);
+	out_8(&via[T1LH], count >> 8);
+	/* wait until it hits 0 */
+	while ((in_8(&via[IFR]) & T1_INT) == 0)
+		;
+	dstart = get_dec();
+	/* clear the interrupt & wait until it hits 0 again */
+	in_8(&via[T1CL]);
+	while ((in_8(&via[IFR]) & T1_INT) == 0)
+		;
+	dend = get_dec();
+
+	ppc_tb_freq = (dstart - dend) * 100 / 6;
+
+	early_iounmap((void *)via, resource_size(&rsrc));
+
+	return 1;
+}
+#endif
+
+/*
+ * Query the OF and get the decr frequency.
+ */
+void __init pmac_calibrate_decr(void)
+{
+	generic_calibrate_decr();
+
+#ifdef CONFIG_PPC32
+	/* We assume MacRISC2 machines have correct device-tree
+	 * calibration. That's better since the VIA itself seems
+	 * to be slightly off. --BenH
+	 */
+	if (!of_machine_is_compatible("MacRISC2") &&
+	    !of_machine_is_compatible("MacRISC3") &&
+	    !of_machine_is_compatible("MacRISC4"))
+		if (via_calibrate_decr())
+			return;
+
+	/* Special case: QuickSilver G4s seem to have a badly calibrated
+	 * timebase-frequency in OF, VIA is much better on these. We should
+	 * probably implement calibration based on the KL timer on these
+	 * machines anyway... -BenH
+	 */
+	if (of_machine_is_compatible("PowerMac3,5"))
+		if (via_calibrate_decr())
+			return;
+#endif
+}
diff --git a/arch/powerpc/platforms/powermac/udbg_adb.c b/arch/powerpc/platforms/powermac/udbg_adb.c
new file mode 100644
index 0000000000..b4756defd5
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/udbg_adb.c
@@ -0,0 +1,220 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/bitops.h>
+#include <linux/ptrace.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/cuda.h>
+#include <linux/of.h>
+#include <asm/machdep.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/xmon.h>
+#include <asm/bootx.h>
+#include <asm/errno.h>
+#include <asm/pmac_feature.h>
+#include <asm/processor.h>
+#include <asm/delay.h>
+#include <asm/btext.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+
+/*
+ * This implementation is "special", it can "patch" the current
+ * udbg implementation and work on top of it. It must thus be
+ * initialized last
+ */
+
+static void (*udbg_adb_old_putc)(char c);
+static int (*udbg_adb_old_getc)(void);
+static int (*udbg_adb_old_getc_poll)(void);
+
+static enum {
+	input_adb_none,
+	input_adb_pmu,
+	input_adb_cuda,
+} input_type = input_adb_none;
+
+int xmon_wants_key, xmon_adb_keycode;
+
+static inline void udbg_adb_poll(void)
+{
+#ifdef CONFIG_ADB_PMU
+	if (input_type == input_adb_pmu)
+		pmu_poll_adb();
+#endif /* CONFIG_ADB_PMU */
+#ifdef CONFIG_ADB_CUDA
+	if (input_type == input_adb_cuda)
+		cuda_poll();
+#endif /* CONFIG_ADB_CUDA */
+}
+
+#ifdef CONFIG_BOOTX_TEXT
+
+static int udbg_adb_use_btext;
+static int xmon_adb_shiftstate;
+
+static unsigned char xmon_keytab[128] =
+	"asdfhgzxcv\000bqwer"				/* 0x00 - 0x0f */
+	"yt123465=97-80]o"				/* 0x10 - 0x1f */
+	"u[ip\rlj'k;\\,/nm."				/* 0x20 - 0x2f */
+	"\t `\177\0\033\0\0\0\0\0\0\0\0\0\0"		/* 0x30 - 0x3f */
+	"\0.\0*\0+\0\0\0\0\0/\r\0-\0"			/* 0x40 - 0x4f */
+	"\0\0000123456789\0\0\0";			/* 0x50 - 0x5f */
+
+static unsigned char xmon_shift_keytab[128] =
+	"ASDFHGZXCV\000BQWER"				/* 0x00 - 0x0f */
+	"YT!@#$^%+(&_*)}O"				/* 0x10 - 0x1f */
+	"U{IP\rLJ\"K:|<?NM>"				/* 0x20 - 0x2f */
+	"\t ~\177\0\033\0\0\0\0\0\0\0\0\0\0"		/* 0x30 - 0x3f */
+	"\0.\0*\0+\0\0\0\0\0/\r\0-\0"			/* 0x40 - 0x4f */
+	"\0\0000123456789\0\0\0";			/* 0x50 - 0x5f */
+
+static int udbg_adb_local_getc(void)
+{
+	int k, t, on;
+
+	xmon_wants_key = 1;
+	for (;;) {
+		xmon_adb_keycode = -1;
+		t = 0;
+		on = 0;
+		k = -1;
+		do {
+			if (--t < 0) {
+				on = 1 - on;
+				btext_drawchar(on? 0xdb: 0x20);
+				btext_drawchar('\b');
+				t = 200000;
+			}
+			udbg_adb_poll();
+			if (udbg_adb_old_getc_poll)
+				k = udbg_adb_old_getc_poll();
+		} while (k == -1 && xmon_adb_keycode == -1);
+		if (on)
+			btext_drawstring(" \b");
+		if (k != -1)
+			return k;
+		k = xmon_adb_keycode;
+
+		/* test for shift keys */
+		if ((k & 0x7f) == 0x38 || (k & 0x7f) == 0x7b) {
+			xmon_adb_shiftstate = (k & 0x80) == 0;
+			continue;
+		}
+		if (k >= 0x80)
+			continue;	/* ignore up transitions */
+		k = (xmon_adb_shiftstate? xmon_shift_keytab: xmon_keytab)[k];
+		if (k != 0)
+			break;
+	}
+	xmon_wants_key = 0;
+	return k;
+}
+#endif /* CONFIG_BOOTX_TEXT */
+
+static int udbg_adb_getc(void)
+{
+#ifdef CONFIG_BOOTX_TEXT
+	if (udbg_adb_use_btext && input_type != input_adb_none)
+		return udbg_adb_local_getc();
+#endif
+	if (udbg_adb_old_getc)
+		return udbg_adb_old_getc();
+	return -1;
+}
+
+/* getc_poll() is not really used, unless you have the xmon-over modem
+ * hack that doesn't quite concern us here, thus we just poll the low level
+ * ADB driver to prevent it from timing out and call back the original poll
+ * routine.
+ */
+static int udbg_adb_getc_poll(void)
+{
+	udbg_adb_poll();
+
+	if (udbg_adb_old_getc_poll)
+		return udbg_adb_old_getc_poll();
+	return -1;
+}
+
+static void udbg_adb_putc(char c)
+{
+#ifdef CONFIG_BOOTX_TEXT
+	if (udbg_adb_use_btext)
+		btext_drawchar(c);
+#endif
+	if (udbg_adb_old_putc)
+		return udbg_adb_old_putc(c);
+}
+
+void __init udbg_adb_init_early(void)
+{
+#ifdef CONFIG_BOOTX_TEXT
+	if (btext_find_display(1) == 0) {
+		udbg_adb_use_btext = 1;
+		udbg_putc = udbg_adb_putc;
+	}
+#endif
+}
+
+int __init udbg_adb_init(int force_btext)
+{
+	struct device_node *np;
+
+	/* Capture existing callbacks */
+	udbg_adb_old_putc = udbg_putc;
+	udbg_adb_old_getc = udbg_getc;
+	udbg_adb_old_getc_poll = udbg_getc_poll;
+
+	/* Check if our early init was already called */
+	if (udbg_adb_old_putc == udbg_adb_putc)
+		udbg_adb_old_putc = NULL;
+#ifdef CONFIG_BOOTX_TEXT
+	if (udbg_adb_old_putc == btext_drawchar)
+		udbg_adb_old_putc = NULL;
+#endif
+
+	/* Set ours as output */
+	udbg_putc = udbg_adb_putc;
+	udbg_getc = udbg_adb_getc;
+	udbg_getc_poll = udbg_adb_getc_poll;
+
+#ifdef CONFIG_BOOTX_TEXT
+	/* Check if we should use btext output */
+	if (btext_find_display(force_btext) == 0)
+		udbg_adb_use_btext = 1;
+#endif
+
+	/* See if there is a keyboard in the device tree with a parent
+	 * of type "adb". If not, we return a failure, but we keep the
+	 * bext output set for now
+	 */
+	for_each_node_by_name(np, "keyboard") {
+		struct device_node *parent = of_get_parent(np);
+		int found = of_node_is_type(parent, "adb");
+		of_node_put(parent);
+		if (found)
+			break;
+	}
+	if (np == NULL)
+		return -ENODEV;
+	of_node_put(np);
+
+#ifdef CONFIG_ADB_PMU
+	if (find_via_pmu())
+		input_type = input_adb_pmu;
+#endif
+#ifdef CONFIG_ADB_CUDA
+	if (find_via_cuda())
+		input_type = input_adb_cuda;
+#endif
+
+	/* Same as above: nothing found, keep btext set for output */
+	if (input_type == input_adb_none)
+		return -ENODEV;
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c
new file mode 100644
index 0000000000..1b7c39e841
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/udbg_scc.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * udbg for zilog scc ports as found on Apple PowerMacs
+ *
+ * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
+ */
+#include <linux/types.h>
+#include <linux/of.h>
+#include <asm/udbg.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/pmac_feature.h>
+
+extern u8 real_readb(volatile u8 __iomem  *addr);
+extern void real_writeb(u8 data, volatile u8 __iomem *addr);
+
+#define	SCC_TXRDY	4
+#define SCC_RXRDY	1
+
+static volatile u8 __iomem *sccc;
+static volatile u8 __iomem *sccd;
+
+static void udbg_scc_putc(char c)
+{
+	if (sccc) {
+		while ((in_8(sccc) & SCC_TXRDY) == 0)
+			;
+		out_8(sccd,  c);
+		if (c == '\n')
+			udbg_scc_putc('\r');
+	}
+}
+
+static int udbg_scc_getc_poll(void)
+{
+	if (sccc) {
+		if ((in_8(sccc) & SCC_RXRDY) != 0)
+			return in_8(sccd);
+		else
+			return -1;
+	}
+	return -1;
+}
+
+static int udbg_scc_getc(void)
+{
+	if (sccc) {
+		while ((in_8(sccc) & SCC_RXRDY) == 0)
+			;
+		return in_8(sccd);
+	}
+	return -1;
+}
+
+static unsigned char scc_inittab[] = {
+    13, 0,		/* set baud rate divisor */
+    12, 0,
+    14, 1,		/* baud rate gen enable, src=rtxc */
+    11, 0x50,		/* clocks = br gen */
+    5,  0xea,		/* tx 8 bits, assert DTR & RTS */
+    4,  0x46,		/* x16 clock, 1 stop */
+    3,  0xc1,		/* rx enable, 8 bits */
+};
+
+void __init udbg_scc_init(int force_scc)
+{
+	const u32 *reg;
+	unsigned long addr;
+	struct device_node *stdout = NULL, *escc = NULL, *macio = NULL;
+	struct device_node *ch, *ch_def = NULL, *ch_a = NULL;
+	const char *path;
+	int i;
+
+	escc = of_find_node_by_name(NULL, "escc");
+	if (escc == NULL)
+		goto bail;
+	macio = of_get_parent(escc);
+	if (macio == NULL)
+		goto bail;
+	path = of_get_property(of_chosen, "linux,stdout-path", NULL);
+	if (path != NULL)
+		stdout = of_find_node_by_path(path);
+	for_each_child_of_node(escc, ch) {
+		if (ch == stdout) {
+			of_node_put(ch_def);
+			ch_def = of_node_get(ch);
+		}
+		if (of_node_name_eq(ch, "ch-a")) {
+			of_node_put(ch_a);
+			ch_a = of_node_get(ch);
+		}
+	}
+	if (ch_def == NULL && !force_scc)
+		goto bail;
+
+	ch = ch_def ? ch_def : ch_a;
+
+	/* Get address within mac-io ASIC */
+	reg = of_get_property(escc, "reg", NULL);
+	if (reg == NULL)
+		goto bail;
+	addr = reg[0];
+
+	/* Get address of mac-io PCI itself */
+	reg = of_get_property(macio, "assigned-addresses", NULL);
+	if (reg == NULL)
+		goto bail;
+	addr += reg[2];
+
+	/* Lock the serial port */
+	pmac_call_feature(PMAC_FTR_SCC_ENABLE, ch,
+			  PMAC_SCC_ASYNC | PMAC_SCC_FLAG_XMON, 1);
+
+	if (ch == ch_a)
+		addr += 0x20;
+	sccc = ioremap(addr & PAGE_MASK, PAGE_SIZE) ;
+	sccc += addr & ~PAGE_MASK;
+	sccd = sccc + 0x10;
+
+	mb();
+
+	for (i = 20000; i != 0; --i)
+		in_8(sccc);
+	out_8(sccc, 0x09);		/* reset A or B side */
+	out_8(sccc, 0xc0);
+
+	/* If SCC was the OF output port, read the BRG value, else
+	 * Setup for 38400 or 57600 8N1 depending on the machine
+	 */
+	if (ch_def != NULL) {
+		out_8(sccc, 13);
+		scc_inittab[1] = in_8(sccc);
+		out_8(sccc, 12);
+		scc_inittab[3] = in_8(sccc);
+	} else if (of_machine_is_compatible("RackMac1,1")
+		   || of_machine_is_compatible("RackMac1,2")
+		   || of_machine_is_compatible("MacRISC4")) {
+		/* Xserves and G5s default to 57600 */
+		scc_inittab[1] = 0;
+		scc_inittab[3] = 0;
+	} else {
+		/* Others default to 38400 */
+		scc_inittab[1] = 0;
+		scc_inittab[3] = 1;
+	}
+
+	for (i = 0; i < sizeof(scc_inittab); ++i)
+		out_8(sccc, scc_inittab[i]);
+
+
+	udbg_putc = udbg_scc_putc;
+	udbg_getc = udbg_scc_getc;
+	udbg_getc_poll = udbg_scc_getc_poll;
+
+	udbg_puts("Hello World !\n");
+
+ bail:
+	of_node_put(macio);
+	of_node_put(escc);
+	of_node_put(stdout);
+	of_node_put(ch_def);
+	of_node_put(ch_a);
+}
+
+#ifdef CONFIG_PPC64
+static void udbg_real_scc_putc(char c)
+{
+	while ((real_readb(sccc) & SCC_TXRDY) == 0)
+		;
+	real_writeb(c, sccd);
+	if (c == '\n')
+		udbg_real_scc_putc('\r');
+}
+
+void __init udbg_init_pmac_realmode(void)
+{
+	sccc = (volatile u8 __iomem *)0x80013020ul;
+	sccd = (volatile u8 __iomem *)0x80013030ul;
+
+	udbg_putc = udbg_real_scc_putc;
+	udbg_getc = NULL;
+	udbg_getc_poll = NULL;
+}
+#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
new file mode 100644
index 0000000000..70a46acc70
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_POWERNV
+	depends on PPC64 && PPC_BOOK3S
+	bool "IBM PowerNV (Non-Virtualized) platform support"
+	select PPC_HASH_MMU_NATIVE if PPC_64S_HASH_MMU
+	select PPC_XICS
+	select PPC_ICP_NATIVE
+	select PPC_XIVE_NATIVE
+	select PPC_P7_NAP
+	select FORCE_PCI
+	select PCI_MSI
+	select EPAPR_BOOT
+	select PPC_INDIRECT_PIO
+	select PPC_UDBG_16550
+	select CPU_FREQ
+	select PPC_DOORBELL
+	select MMU_NOTIFIER
+	select FORCE_SMP
+	select ARCH_SUPPORTS_PER_VMA_LOCK
+	default y
+
+config OPAL_PRD
+	tristate "OPAL PRD driver"
+	depends on PPC_POWERNV
+	help
+	  This enables the opal-prd driver, a facility to run processor
+	  recovery diagnostics on OpenPower machines
+
+config PPC_MEMTRACE
+	bool "Enable runtime allocation of RAM for tracing"
+	depends on PPC_POWERNV && MEMORY_HOTPLUG && CONTIG_ALLOC
+	help
+	  Enabling this option allows for runtime allocation of memory (RAM)
+	  for hardware tracing.
+
+config SCOM_DEBUGFS
+	bool "Expose SCOM controllers via debugfs"
+	depends on DEBUG_FS
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
new file mode 100644
index 0000000000..19f0fc5c6f
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# nothing that deals with real mode is safe to KASAN
+# in particular, idle code runs a bunch of things in real mode
+KASAN_SANITIZE_idle.o := n
+KASAN_SANITIZE_pci-ioda.o := n
+KASAN_SANITIZE_pci-ioda-tce.o := n
+# pnv_machine_check_early
+KASAN_SANITIZE_setup.o := n
+
+obj-y			+= setup.o opal-call.o opal-wrappers.o opal.o opal-async.o
+obj-y			+= idle.o opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
+obj-y			+= rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
+obj-y			+= opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
+obj-y			+= opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o
+obj-y			+= ultravisor.o
+
+obj-$(CONFIG_SMP)	+= smp.o subcore.o subcore-asm.o
+obj-$(CONFIG_FA_DUMP)	+= opal-fadump.o
+obj-$(CONFIG_PRESERVE_FA_DUMP)	+= opal-fadump.o
+obj-$(CONFIG_OPAL_CORE)	+= opal-core.o
+obj-$(CONFIG_PCI)	+= pci.o pci-ioda.o pci-ioda-tce.o
+obj-$(CONFIG_PCI_IOV)   += pci-sriov.o
+obj-$(CONFIG_CXL_BASE)	+= pci-cxl.o
+obj-$(CONFIG_EEH)	+= eeh-powernv.o
+obj-$(CONFIG_MEMORY_FAILURE)	+= opal-memory-errors.o
+obj-$(CONFIG_OPAL_PRD)	+= opal-prd.o
+obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
+obj-$(CONFIG_PPC_MEMTRACE)	+= memtrace.o
+obj-$(CONFIG_PPC_VAS)	+= vas.o vas-window.o vas-debug.o vas-fault.o
+obj-$(CONFIG_OCXL_BASE)	+= ocxl.o
+obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o
+obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o
diff --git a/arch/powerpc/platforms/powernv/copy-paste.h b/arch/powerpc/platforms/powernv/copy-paste.h
new file mode 100644
index 0000000000..f063807eda
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/copy-paste.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2016-17 IBM Corp.
+ */
+#include <asm/ppc-opcode.h>
+#include <asm/reg.h>
+
+/*
+ * Copy/paste instructions:
+ *
+ *	copy RA,RB
+ *		Copy contents of address (RA) + effective_address(RB)
+ *		to internal copy-buffer.
+ *
+ *	paste RA,RB
+ *		Paste contents of internal copy-buffer to the address
+ *		(RA) + effective_address(RB)
+ */
+static inline int vas_copy(void *crb, int offset)
+{
+	asm volatile(PPC_COPY(%0, %1)";"
+		:
+		: "b" (offset), "b" (crb)
+		: "memory");
+
+	return 0;
+}
+
+static inline int vas_paste(void *paste_address, int offset)
+{
+	u32 cr;
+
+	cr = 0;
+	asm volatile(PPC_PASTE(%1, %2)";"
+		"mfocrf %0, 0x80;"
+		: "=r" (cr)
+		: "b" (offset), "b" (paste_address)
+		: "memory", "cr0");
+
+	/* We mask with 0xE to ignore SO */
+	return (cr >> CR0_SHIFT) & 0xE;
+}
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
new file mode 100644
index 0000000000..af3a5d37a1
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -0,0 +1,1696 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV Platform dependent EEH operations
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
+ */
+
+#include <linux/atomic.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/list.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/firmware.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/msi_bitmap.h>
+#include <asm/opal.h>
+#include <asm/ppc-pci.h>
+#include <asm/pnv-pci.h>
+
+#include "powernv.h"
+#include "pci.h"
+#include "../../../../drivers/pci/pci.h"
+
+static int eeh_event_irq = -EINVAL;
+
+static void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
+{
+	dev_dbg(&pdev->dev, "EEH: Setting up device\n");
+	eeh_probe_device(pdev);
+}
+
+static irqreturn_t pnv_eeh_event(int irq, void *data)
+{
+	/*
+	 * We simply send a special EEH event if EEH has been
+	 * enabled. We don't care about EEH events until we've
+	 * finished processing the outstanding ones. Event processing
+	 * gets unmasked in next_error() if EEH is enabled.
+	 */
+	disable_irq_nosync(irq);
+
+	if (eeh_enabled())
+		eeh_send_failure_event(NULL);
+
+	return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static ssize_t pnv_eeh_ei_write(struct file *filp,
+				const char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct pci_controller *hose = filp->private_data;
+	struct eeh_pe *pe;
+	int pe_no, type, func;
+	unsigned long addr, mask;
+	char buf[50];
+	int ret;
+
+	if (!eeh_ops || !eeh_ops->err_inject)
+		return -ENXIO;
+
+	/* Copy over argument buffer */
+	ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
+	if (!ret)
+		return -EFAULT;
+
+	/* Retrieve parameters */
+	ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
+		     &pe_no, &type, &func, &addr, &mask);
+	if (ret != 5)
+		return -EINVAL;
+
+	/* Retrieve PE */
+	pe = eeh_pe_get(hose, pe_no);
+	if (!pe)
+		return -ENODEV;
+
+	/* Do error injection */
+	ret = eeh_ops->err_inject(pe, type, func, addr, mask);
+	return ret < 0 ? ret : count;
+}
+
+static const struct file_operations pnv_eeh_ei_fops = {
+	.open	= simple_open,
+	.llseek	= no_llseek,
+	.write	= pnv_eeh_ei_write,
+};
+
+static int pnv_eeh_dbgfs_set(void *data, int offset, u64 val)
+{
+	struct pci_controller *hose = data;
+	struct pnv_phb *phb = hose->private_data;
+
+	out_be64(phb->regs + offset, val);
+	return 0;
+}
+
+static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val)
+{
+	struct pci_controller *hose = data;
+	struct pnv_phb *phb = hose->private_data;
+
+	*val = in_be64(phb->regs + offset);
+	return 0;
+}
+
+#define PNV_EEH_DBGFS_ENTRY(name, reg)				\
+static int pnv_eeh_dbgfs_set_##name(void *data, u64 val)	\
+{								\
+	return pnv_eeh_dbgfs_set(data, reg, val);		\
+}								\
+								\
+static int pnv_eeh_dbgfs_get_##name(void *data, u64 *val)	\
+{								\
+	return pnv_eeh_dbgfs_get(data, reg, val);		\
+}								\
+								\
+DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_dbgfs_ops_##name,		\
+			pnv_eeh_dbgfs_get_##name,		\
+                        pnv_eeh_dbgfs_set_##name,		\
+			"0x%llx\n")
+
+PNV_EEH_DBGFS_ENTRY(outb, 0xD10);
+PNV_EEH_DBGFS_ENTRY(inbA, 0xD90);
+PNV_EEH_DBGFS_ENTRY(inbB, 0xE10);
+
+#endif /* CONFIG_DEBUG_FS */
+
+static void pnv_eeh_enable_phbs(void)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+
+	list_for_each_entry(hose, &hose_list, list_node) {
+		phb = hose->private_data;
+		/*
+		 * If EEH is enabled, we're going to rely on that.
+		 * Otherwise, we restore to conventional mechanism
+		 * to clear frozen PE during PCI config access.
+		 */
+		if (eeh_enabled())
+			phb->flags |= PNV_PHB_FLAG_EEH;
+		else
+			phb->flags &= ~PNV_PHB_FLAG_EEH;
+	}
+}
+
+/**
+ * pnv_eeh_post_init - EEH platform dependent post initialization
+ *
+ * EEH platform dependent post initialization on powernv. When
+ * the function is called, the EEH PEs and devices should have
+ * been built. If the I/O cache staff has been built, EEH is
+ * ready to supply service.
+ */
+int pnv_eeh_post_init(void)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	int ret = 0;
+
+	eeh_show_enabled();
+
+	/* Register OPAL event notifier */
+	eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
+	if (eeh_event_irq < 0) {
+		pr_err("%s: Can't register OPAL event interrupt (%d)\n",
+		       __func__, eeh_event_irq);
+		return eeh_event_irq;
+	}
+
+	ret = request_irq(eeh_event_irq, pnv_eeh_event,
+			  IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL);
+	if (ret < 0) {
+		irq_dispose_mapping(eeh_event_irq);
+		pr_err("%s: Can't request OPAL event interrupt (%d)\n",
+		       __func__, eeh_event_irq);
+		return ret;
+	}
+
+	if (!eeh_enabled())
+		disable_irq(eeh_event_irq);
+
+	pnv_eeh_enable_phbs();
+
+	list_for_each_entry(hose, &hose_list, list_node) {
+		phb = hose->private_data;
+
+		/* Create debugfs entries */
+#ifdef CONFIG_DEBUG_FS
+		if (phb->has_dbgfs || !phb->dbgfs)
+			continue;
+
+		phb->has_dbgfs = 1;
+		debugfs_create_file("err_injct", 0200,
+				    phb->dbgfs, hose,
+				    &pnv_eeh_ei_fops);
+
+		debugfs_create_file("err_injct_outbound", 0600,
+				    phb->dbgfs, hose,
+				    &pnv_eeh_dbgfs_ops_outb);
+		debugfs_create_file("err_injct_inboundA", 0600,
+				    phb->dbgfs, hose,
+				    &pnv_eeh_dbgfs_ops_inbA);
+		debugfs_create_file("err_injct_inboundB", 0600,
+				    phb->dbgfs, hose,
+				    &pnv_eeh_dbgfs_ops_inbB);
+#endif /* CONFIG_DEBUG_FS */
+	}
+
+	return ret;
+}
+
+static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap)
+{
+	int pos = PCI_CAPABILITY_LIST;
+	int cnt = 48;   /* Maximal number of capabilities */
+	u32 status, id;
+
+	if (!pdn)
+		return 0;
+
+	/* Check if the device supports capabilities */
+	pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status);
+	if (!(status & PCI_STATUS_CAP_LIST))
+		return 0;
+
+	while (cnt--) {
+		pnv_pci_cfg_read(pdn, pos, 1, &pos);
+		if (pos < 0x40)
+			break;
+
+		pos &= ~3;
+		pnv_pci_cfg_read(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
+		if (id == 0xff)
+			break;
+
+		/* Found */
+		if (id == cap)
+			return pos;
+
+		/* Next one */
+		pos += PCI_CAP_LIST_NEXT;
+	}
+
+	return 0;
+}
+
+static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap)
+{
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	u32 header;
+	int pos = 256, ttl = (4096 - 256) / 8;
+
+	if (!edev || !edev->pcie_cap)
+		return 0;
+	if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+		return 0;
+	else if (!header)
+		return 0;
+
+	while (ttl-- > 0) {
+		if (PCI_EXT_CAP_ID(header) == cap && pos)
+			return pos;
+
+		pos = PCI_EXT_CAP_NEXT(header);
+		if (pos < 256)
+			break;
+
+		if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+			break;
+	}
+
+	return 0;
+}
+
+static struct eeh_pe *pnv_eeh_get_upstream_pe(struct pci_dev *pdev)
+{
+	struct pci_controller *hose = pdev->bus->sysdata;
+	struct pnv_phb *phb = hose->private_data;
+	struct pci_dev *parent = pdev->bus->self;
+
+#ifdef CONFIG_PCI_IOV
+	/* for VFs we use the PF's PE as the upstream PE */
+	if (pdev->is_virtfn)
+		parent = pdev->physfn;
+#endif
+
+	/* otherwise use the PE of our parent bridge */
+	if (parent) {
+		struct pnv_ioda_pe *ioda_pe = pnv_ioda_get_pe(parent);
+
+		return eeh_pe_get(phb->hose, ioda_pe->pe_number);
+	}
+
+	return NULL;
+}
+
+/**
+ * pnv_eeh_probe - Do probe on PCI device
+ * @pdev: pci_dev to probe
+ *
+ * Create, or find the existing, eeh_dev for this pci_dev.
+ */
+static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev)
+{
+	struct pci_dn *pdn = pci_get_pdn(pdev);
+	struct pci_controller *hose = pdn->phb;
+	struct pnv_phb *phb = hose->private_data;
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	struct eeh_pe *upstream_pe;
+	uint32_t pcie_flags;
+	int ret;
+	int config_addr = (pdn->busno << 8) | (pdn->devfn);
+
+	/*
+	 * When probing the root bridge, which doesn't have any
+	 * subordinate PCI devices. We don't have OF node for
+	 * the root bridge. So it's not reasonable to continue
+	 * the probing.
+	 */
+	if (!edev || edev->pe)
+		return NULL;
+
+	/* already configured? */
+	if (edev->pdev) {
+		pr_debug("%s: found existing edev for %04x:%02x:%02x.%01x\n",
+			__func__, hose->global_number, config_addr >> 8,
+			PCI_SLOT(config_addr), PCI_FUNC(config_addr));
+		return edev;
+	}
+
+	/* Skip for PCI-ISA bridge */
+	if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
+		return NULL;
+
+	eeh_edev_dbg(edev, "Probing device\n");
+
+	/* Initialize eeh device */
+	edev->mode	&= 0xFFFFFF00;
+	edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
+	edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
+	edev->af_cap   = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF);
+	edev->aer_cap  = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
+	if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+		edev->mode |= EEH_DEV_BRIDGE;
+		if (edev->pcie_cap) {
+			pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
+					 2, &pcie_flags);
+			pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
+			if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
+				edev->mode |= EEH_DEV_ROOT_PORT;
+			else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
+				edev->mode |= EEH_DEV_DS_PORT;
+		}
+	}
+
+	edev->pe_config_addr = phb->ioda.pe_rmap[config_addr];
+
+	upstream_pe = pnv_eeh_get_upstream_pe(pdev);
+
+	/* Create PE */
+	ret = eeh_pe_tree_insert(edev, upstream_pe);
+	if (ret) {
+		eeh_edev_warn(edev, "Failed to add device to PE (code %d)\n", ret);
+		return NULL;
+	}
+
+	/*
+	 * If the PE contains any one of following adapters, the
+	 * PCI config space can't be accessed when dumping EEH log.
+	 * Otherwise, we will run into fenced PHB caused by shortage
+	 * of outbound credits in the adapter. The PCI config access
+	 * should be blocked until PE reset. MMIO access is dropped
+	 * by hardware certainly. In order to drop PCI config requests,
+	 * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which
+	 * will be checked in the backend for PE state retrieval. If
+	 * the PE becomes frozen for the first time and the flag has
+	 * been set for the PE, we will set EEH_PE_CFG_BLOCKED for
+	 * that PE to block its config space.
+	 *
+	 * Broadcom BCM5718 2-ports NICs (14e4:1656)
+	 * Broadcom Austin 4-ports NICs (14e4:1657)
+	 * Broadcom Shiner 4-ports 1G NICs (14e4:168a)
+	 * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
+	 */
+	if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+	     pdn->device_id == 0x1656) ||
+	    (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+	     pdn->device_id == 0x1657) ||
+	    (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+	     pdn->device_id == 0x168a) ||
+	    (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+	     pdn->device_id == 0x168e))
+		edev->pe->state |= EEH_PE_CFG_RESTRICTED;
+
+	/*
+	 * Cache the PE primary bus, which can't be fetched when
+	 * full hotplug is in progress. In that case, all child
+	 * PCI devices of the PE are expected to be removed prior
+	 * to PE reset.
+	 */
+	if (!(edev->pe->state & EEH_PE_PRI_BUS)) {
+		edev->pe->bus = pci_find_bus(hose->global_number,
+					     pdn->busno);
+		if (edev->pe->bus)
+			edev->pe->state |= EEH_PE_PRI_BUS;
+	}
+
+	/*
+	 * Enable EEH explicitly so that we will do EEH check
+	 * while accessing I/O stuff
+	 */
+	if (!eeh_has_flag(EEH_ENABLED)) {
+		enable_irq(eeh_event_irq);
+		pnv_eeh_enable_phbs();
+		eeh_add_flag(EEH_ENABLED);
+	}
+
+	/* Save memory bars */
+	eeh_save_bars(edev);
+
+	eeh_edev_dbg(edev, "EEH enabled on device\n");
+
+	return edev;
+}
+
+/**
+ * pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
+ * @pe: EEH PE
+ * @option: operation to be issued
+ *
+ * The function is used to control the EEH functionality globally.
+ * Currently, following options are support according to PAPR:
+ * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
+ */
+static int pnv_eeh_set_option(struct eeh_pe *pe, int option)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	bool freeze_pe = false;
+	int opt;
+	s64 rc;
+
+	switch (option) {
+	case EEH_OPT_DISABLE:
+		return -EPERM;
+	case EEH_OPT_ENABLE:
+		return 0;
+	case EEH_OPT_THAW_MMIO:
+		opt = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
+		break;
+	case EEH_OPT_THAW_DMA:
+		opt = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
+		break;
+	case EEH_OPT_FREEZE_PE:
+		freeze_pe = true;
+		opt = OPAL_EEH_ACTION_SET_FREEZE_ALL;
+		break;
+	default:
+		pr_warn("%s: Invalid option %d\n", __func__, option);
+		return -EINVAL;
+	}
+
+	/* Freeze master and slave PEs if PHB supports compound PEs */
+	if (freeze_pe) {
+		if (phb->freeze_pe) {
+			phb->freeze_pe(phb, pe->addr);
+			return 0;
+		}
+
+		rc = opal_pci_eeh_freeze_set(phb->opal_id, pe->addr, opt);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
+				__func__, rc, phb->hose->global_number,
+				pe->addr);
+			return -EIO;
+		}
+
+		return 0;
+	}
+
+	/* Unfreeze master and slave PEs if PHB supports */
+	if (phb->unfreeze_pe)
+		return phb->unfreeze_pe(phb, pe->addr, opt);
+
+	rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe->addr, opt);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld enable %d for PHB#%x-PE#%x\n",
+			__func__, rc, option, phb->hose->global_number,
+			pe->addr);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static void pnv_eeh_get_phb_diag(struct eeh_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb->private_data;
+	s64 rc;
+
+	rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
+					 phb->diag_data_size);
+	if (rc != OPAL_SUCCESS)
+		pr_warn("%s: Failure %lld getting PHB#%x diag-data\n",
+			__func__, rc, pe->phb->global_number);
+}
+
+static int pnv_eeh_get_phb_state(struct eeh_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb->private_data;
+	u8 fstate = 0;
+	__be16 pcierr = 0;
+	s64 rc;
+	int result = 0;
+
+	rc = opal_pci_eeh_freeze_status(phb->opal_id,
+					pe->addr,
+					&fstate,
+					&pcierr,
+					NULL);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld getting PHB#%x state\n",
+			__func__, rc, phb->hose->global_number);
+		return EEH_STATE_NOT_SUPPORT;
+	}
+
+	/*
+	 * Check PHB state. If the PHB is frozen for the
+	 * first time, to dump the PHB diag-data.
+	 */
+	if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
+		result = (EEH_STATE_MMIO_ACTIVE  |
+			  EEH_STATE_DMA_ACTIVE   |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
+	} else if (!(pe->state & EEH_PE_ISOLATED)) {
+		eeh_pe_mark_isolated(pe);
+		pnv_eeh_get_phb_diag(pe);
+
+		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+	}
+
+	return result;
+}
+
+static int pnv_eeh_get_pe_state(struct eeh_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb->private_data;
+	u8 fstate = 0;
+	__be16 pcierr = 0;
+	s64 rc;
+	int result;
+
+	/*
+	 * We don't clobber hardware frozen state until PE
+	 * reset is completed. In order to keep EEH core
+	 * moving forward, we have to return operational
+	 * state during PE reset.
+	 */
+	if (pe->state & EEH_PE_RESET) {
+		result = (EEH_STATE_MMIO_ACTIVE  |
+			  EEH_STATE_DMA_ACTIVE   |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
+		return result;
+	}
+
+	/*
+	 * Fetch PE state from hardware. If the PHB
+	 * supports compound PE, let it handle that.
+	 */
+	if (phb->get_pe_state) {
+		fstate = phb->get_pe_state(phb, pe->addr);
+	} else {
+		rc = opal_pci_eeh_freeze_status(phb->opal_id,
+						pe->addr,
+						&fstate,
+						&pcierr,
+						NULL);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
+				__func__, rc, phb->hose->global_number,
+				pe->addr);
+			return EEH_STATE_NOT_SUPPORT;
+		}
+	}
+
+	/* Figure out state */
+	switch (fstate) {
+	case OPAL_EEH_STOPPED_NOT_FROZEN:
+		result = (EEH_STATE_MMIO_ACTIVE  |
+			  EEH_STATE_DMA_ACTIVE   |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
+		break;
+	case OPAL_EEH_STOPPED_MMIO_FREEZE:
+		result = (EEH_STATE_DMA_ACTIVE |
+			  EEH_STATE_DMA_ENABLED);
+		break;
+	case OPAL_EEH_STOPPED_DMA_FREEZE:
+		result = (EEH_STATE_MMIO_ACTIVE |
+			  EEH_STATE_MMIO_ENABLED);
+		break;
+	case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
+		result = 0;
+		break;
+	case OPAL_EEH_STOPPED_RESET:
+		result = EEH_STATE_RESET_ACTIVE;
+		break;
+	case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
+		result = EEH_STATE_UNAVAILABLE;
+		break;
+	case OPAL_EEH_STOPPED_PERM_UNAVAIL:
+		result = EEH_STATE_NOT_SUPPORT;
+		break;
+	default:
+		result = EEH_STATE_NOT_SUPPORT;
+		pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
+			__func__, phb->hose->global_number,
+			pe->addr, fstate);
+	}
+
+	/*
+	 * If PHB supports compound PE, to freeze all
+	 * slave PEs for consistency.
+	 *
+	 * If the PE is switching to frozen state for the
+	 * first time, to dump the PHB diag-data.
+	 */
+	if (!(result & EEH_STATE_NOT_SUPPORT) &&
+	    !(result & EEH_STATE_UNAVAILABLE) &&
+	    !(result & EEH_STATE_MMIO_ACTIVE) &&
+	    !(result & EEH_STATE_DMA_ACTIVE)  &&
+	    !(pe->state & EEH_PE_ISOLATED)) {
+		if (phb->freeze_pe)
+			phb->freeze_pe(phb, pe->addr);
+
+		eeh_pe_mark_isolated(pe);
+		pnv_eeh_get_phb_diag(pe);
+
+		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+	}
+
+	return result;
+}
+
+/**
+ * pnv_eeh_get_state - Retrieve PE state
+ * @pe: EEH PE
+ * @delay: delay while PE state is temporarily unavailable
+ *
+ * Retrieve the state of the specified PE. For IODA-compitable
+ * platform, it should be retrieved from IODA table. Therefore,
+ * we prefer passing down to hardware implementation to handle
+ * it.
+ */
+static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay)
+{
+	int ret;
+
+	if (pe->type & EEH_PE_PHB)
+		ret = pnv_eeh_get_phb_state(pe);
+	else
+		ret = pnv_eeh_get_pe_state(pe);
+
+	if (!delay)
+		return ret;
+
+	/*
+	 * If the PE state is temporarily unavailable,
+	 * to inform the EEH core delay for default
+	 * period (1 second)
+	 */
+	*delay = 0;
+	if (ret & EEH_STATE_UNAVAILABLE)
+		*delay = 1000;
+
+	return ret;
+}
+
+static s64 pnv_eeh_poll(unsigned long id)
+{
+	s64 rc = OPAL_HARDWARE;
+
+	while (1) {
+		rc = opal_pci_poll(id);
+		if (rc <= 0)
+			break;
+
+		if (system_state < SYSTEM_RUNNING)
+			udelay(1000 * rc);
+		else
+			msleep(rc);
+	}
+
+	return rc;
+}
+
+int pnv_eeh_phb_reset(struct pci_controller *hose, int option)
+{
+	struct pnv_phb *phb = hose->private_data;
+	s64 rc = OPAL_HARDWARE;
+
+	pr_debug("%s: Reset PHB#%x, option=%d\n",
+		 __func__, hose->global_number, option);
+
+	/* Issue PHB complete reset request */
+	if (option == EEH_RESET_FUNDAMENTAL ||
+	    option == EEH_RESET_HOT)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PHB_COMPLETE,
+				    OPAL_ASSERT_RESET);
+	else if (option == EEH_RESET_DEACTIVATE)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PHB_COMPLETE,
+				    OPAL_DEASSERT_RESET);
+	if (rc < 0)
+		goto out;
+
+	/*
+	 * Poll state of the PHB until the request is done
+	 * successfully. The PHB reset is usually PHB complete
+	 * reset followed by hot reset on root bus. So we also
+	 * need the PCI bus settlement delay.
+	 */
+	if (rc > 0)
+		rc = pnv_eeh_poll(phb->opal_id);
+	if (option == EEH_RESET_DEACTIVATE) {
+		if (system_state < SYSTEM_RUNNING)
+			udelay(1000 * EEH_PE_RST_SETTLE_TIME);
+		else
+			msleep(EEH_PE_RST_SETTLE_TIME);
+	}
+out:
+	if (rc != OPAL_SUCCESS)
+		return -EIO;
+
+	return 0;
+}
+
+static int pnv_eeh_root_reset(struct pci_controller *hose, int option)
+{
+	struct pnv_phb *phb = hose->private_data;
+	s64 rc = OPAL_HARDWARE;
+
+	pr_debug("%s: Reset PHB#%x, option=%d\n",
+		 __func__, hose->global_number, option);
+
+	/*
+	 * During the reset deassert time, we needn't care
+	 * the reset scope because the firmware does nothing
+	 * for fundamental or hot reset during deassert phase.
+	 */
+	if (option == EEH_RESET_FUNDAMENTAL)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PCI_FUNDAMENTAL,
+				    OPAL_ASSERT_RESET);
+	else if (option == EEH_RESET_HOT)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PCI_HOT,
+				    OPAL_ASSERT_RESET);
+	else if (option == EEH_RESET_DEACTIVATE)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PCI_HOT,
+				    OPAL_DEASSERT_RESET);
+	if (rc < 0)
+		goto out;
+
+	/* Poll state of the PHB until the request is done */
+	if (rc > 0)
+		rc = pnv_eeh_poll(phb->opal_id);
+	if (option == EEH_RESET_DEACTIVATE)
+		msleep(EEH_PE_RST_SETTLE_TIME);
+out:
+	if (rc != OPAL_SUCCESS)
+		return -EIO;
+
+	return 0;
+}
+
+static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
+{
+	struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	int aer = edev ? edev->aer_cap : 0;
+	u32 ctrl;
+
+	pr_debug("%s: Secondary Reset PCI bus %04x:%02x with option %d\n",
+		 __func__, pci_domain_nr(dev->bus),
+		 dev->bus->number, option);
+
+	switch (option) {
+	case EEH_RESET_FUNDAMENTAL:
+	case EEH_RESET_HOT:
+		/* Don't report linkDown event */
+		if (aer) {
+			eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK,
+					     4, &ctrl);
+			ctrl |= PCI_ERR_UNC_SURPDN;
+			eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK,
+					      4, ctrl);
+		}
+
+		eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl);
+		ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+		eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl);
+
+		msleep(EEH_PE_RST_HOLD_TIME);
+		break;
+	case EEH_RESET_DEACTIVATE:
+		eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl);
+		ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+		eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl);
+
+		msleep(EEH_PE_RST_SETTLE_TIME);
+
+		/* Continue reporting linkDown event */
+		if (aer) {
+			eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK,
+					     4, &ctrl);
+			ctrl &= ~PCI_ERR_UNC_SURPDN;
+			eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK,
+					      4, ctrl);
+		}
+
+		break;
+	}
+
+	return 0;
+}
+
+static int pnv_eeh_bridge_reset(struct pci_dev *pdev, int option)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	struct device_node *dn = pci_device_to_OF_node(pdev);
+	uint64_t id = PCI_SLOT_ID(phb->opal_id, pci_dev_id(pdev));
+	uint8_t scope;
+	int64_t rc;
+
+	/* Hot reset to the bus if firmware cannot handle */
+	if (!dn || !of_get_property(dn, "ibm,reset-by-firmware", NULL))
+		return __pnv_eeh_bridge_reset(pdev, option);
+
+	pr_debug("%s: FW reset PCI bus %04x:%02x with option %d\n",
+		 __func__, pci_domain_nr(pdev->bus),
+		 pdev->bus->number, option);
+
+	switch (option) {
+	case EEH_RESET_FUNDAMENTAL:
+		scope = OPAL_RESET_PCI_FUNDAMENTAL;
+		break;
+	case EEH_RESET_HOT:
+		scope = OPAL_RESET_PCI_HOT;
+		break;
+	case EEH_RESET_DEACTIVATE:
+		return 0;
+	default:
+		dev_dbg(&pdev->dev, "%s: Unsupported reset %d\n",
+			__func__, option);
+		return -EINVAL;
+	}
+
+	rc = opal_pci_reset(id, scope, OPAL_ASSERT_RESET);
+	if (rc <= OPAL_SUCCESS)
+		goto out;
+
+	rc = pnv_eeh_poll(id);
+out:
+	return (rc == OPAL_SUCCESS) ? 0 : -EIO;
+}
+
+void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
+{
+	struct pci_controller *hose;
+
+	if (pci_is_root_bus(dev->bus)) {
+		hose = pci_bus_to_host(dev->bus);
+		pnv_eeh_root_reset(hose, EEH_RESET_HOT);
+		pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
+	} else {
+		pnv_eeh_bridge_reset(dev, EEH_RESET_HOT);
+		pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
+	}
+}
+
+static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type,
+				     int pos, u16 mask)
+{
+	struct eeh_dev *edev = pdn->edev;
+	int i, status = 0;
+
+	/* Wait for Transaction Pending bit to be cleared */
+	for (i = 0; i < 4; i++) {
+		eeh_ops->read_config(edev, pos, 2, &status);
+		if (!(status & mask))
+			return;
+
+		msleep((1 << i) * 100);
+	}
+
+	pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n",
+		__func__, type,
+		pdn->phb->global_number, pdn->busno,
+		PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+}
+
+static int pnv_eeh_do_flr(struct pci_dn *pdn, int option)
+{
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	u32 reg = 0;
+
+	if (WARN_ON(!edev->pcie_cap))
+		return -ENOTTY;
+
+	eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCAP, 4, &reg);
+	if (!(reg & PCI_EXP_DEVCAP_FLR))
+		return -ENOTTY;
+
+	switch (option) {
+	case EEH_RESET_HOT:
+	case EEH_RESET_FUNDAMENTAL:
+		pnv_eeh_wait_for_pending(pdn, "",
+					 edev->pcie_cap + PCI_EXP_DEVSTA,
+					 PCI_EXP_DEVSTA_TRPND);
+		eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
+				     4, &reg);
+		reg |= PCI_EXP_DEVCTL_BCR_FLR;
+		eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
+				      4, reg);
+		msleep(EEH_PE_RST_HOLD_TIME);
+		break;
+	case EEH_RESET_DEACTIVATE:
+		eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
+				     4, &reg);
+		reg &= ~PCI_EXP_DEVCTL_BCR_FLR;
+		eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
+				      4, reg);
+		msleep(EEH_PE_RST_SETTLE_TIME);
+		break;
+	}
+
+	return 0;
+}
+
+static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option)
+{
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	u32 cap = 0;
+
+	if (WARN_ON(!edev->af_cap))
+		return -ENOTTY;
+
+	eeh_ops->read_config(edev, edev->af_cap + PCI_AF_CAP, 1, &cap);
+	if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR))
+		return -ENOTTY;
+
+	switch (option) {
+	case EEH_RESET_HOT:
+	case EEH_RESET_FUNDAMENTAL:
+		/*
+		 * Wait for Transaction Pending bit to clear. A word-aligned
+		 * test is used, so we use the control offset rather than status
+		 * and shift the test bit to match.
+		 */
+		pnv_eeh_wait_for_pending(pdn, "AF",
+					 edev->af_cap + PCI_AF_CTRL,
+					 PCI_AF_STATUS_TP << 8);
+		eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL,
+				      1, PCI_AF_CTRL_FLR);
+		msleep(EEH_PE_RST_HOLD_TIME);
+		break;
+	case EEH_RESET_DEACTIVATE:
+		eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL, 1, 0);
+		msleep(EEH_PE_RST_SETTLE_TIME);
+		break;
+	}
+
+	return 0;
+}
+
+static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option)
+{
+	struct eeh_dev *edev;
+	struct pci_dn *pdn;
+	int ret;
+
+	/* The VF PE should have only one child device */
+	edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
+	pdn = eeh_dev_to_pdn(edev);
+	if (!pdn)
+		return -ENXIO;
+
+	ret = pnv_eeh_do_flr(pdn, option);
+	if (!ret)
+		return ret;
+
+	return pnv_eeh_do_af_flr(pdn, option);
+}
+
+/**
+ * pnv_eeh_reset - Reset the specified PE
+ * @pe: EEH PE
+ * @option: reset option
+ *
+ * Do reset on the indicated PE. For PCI bus sensitive PE,
+ * we need to reset the parent p2p bridge. The PHB has to
+ * be reinitialized if the p2p bridge is root bridge. For
+ * PCI device sensitive PE, we will try to reset the device
+ * through FLR. For now, we don't have OPAL APIs to do HARD
+ * reset yet, so all reset would be SOFT (HOT) reset.
+ */
+static int pnv_eeh_reset(struct eeh_pe *pe, int option)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb;
+	struct pci_bus *bus;
+	int64_t rc;
+
+	/*
+	 * For PHB reset, we always have complete reset. For those PEs whose
+	 * primary bus derived from root complex (root bus) or root port
+	 * (usually bus#1), we apply hot or fundamental reset on the root port.
+	 * For other PEs, we always have hot reset on the PE primary bus.
+	 *
+	 * Here, we have different design to pHyp, which always clear the
+	 * frozen state during PE reset. However, the good idea here from
+	 * benh is to keep frozen state before we get PE reset done completely
+	 * (until BAR restore). With the frozen state, HW drops illegal IO
+	 * or MMIO access, which can incur recursive frozen PE during PE
+	 * reset. The side effect is that EEH core has to clear the frozen
+	 * state explicitly after BAR restore.
+	 */
+	if (pe->type & EEH_PE_PHB)
+		return pnv_eeh_phb_reset(hose, option);
+
+	/*
+	 * The frozen PE might be caused by PAPR error injection
+	 * registers, which are expected to be cleared after hitting
+	 * frozen PE as stated in the hardware spec. Unfortunately,
+	 * that's not true on P7IOC. So we have to clear it manually
+	 * to avoid recursive EEH errors during recovery.
+	 */
+	phb = hose->private_data;
+	if (phb->model == PNV_PHB_MODEL_P7IOC &&
+	    (option == EEH_RESET_HOT ||
+	     option == EEH_RESET_FUNDAMENTAL)) {
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PHB_ERROR,
+				    OPAL_ASSERT_RESET);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld clearing error injection registers\n",
+				__func__, rc);
+			return -EIO;
+		}
+	}
+
+	if (pe->type & EEH_PE_VF)
+		return pnv_eeh_reset_vf_pe(pe, option);
+
+	bus = eeh_pe_bus_get(pe);
+	if (!bus) {
+		pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
+			__func__, pe->phb->global_number, pe->addr);
+		return -EIO;
+	}
+
+	if (pci_is_root_bus(bus))
+		return pnv_eeh_root_reset(hose, option);
+
+	/*
+	 * For hot resets try use the generic PCI error recovery reset
+	 * functions. These correctly handles the case where the secondary
+	 * bus is behind a hotplug slot and it will use the slot provided
+	 * reset methods to prevent spurious hotplug events during the reset.
+	 *
+	 * Fundamental resets need to be handled internally to EEH since the
+	 * PCI core doesn't really have a concept of a fundamental reset,
+	 * mainly because there's no standard way to generate one. Only a
+	 * few devices require an FRESET so it should be fine.
+	 */
+	if (option != EEH_RESET_FUNDAMENTAL) {
+		/*
+		 * NB: Skiboot and pnv_eeh_bridge_reset() also no-op the
+		 *     de-assert step. It's like the OPAL reset API was
+		 *     poorly designed or something...
+		 */
+		if (option == EEH_RESET_DEACTIVATE)
+			return 0;
+
+		rc = pci_bus_error_reset(bus->self);
+		if (!rc)
+			return 0;
+	}
+
+	/* otherwise, use the generic bridge reset. this might call into FW */
+	if (pci_is_root_bus(bus->parent))
+		return pnv_eeh_root_reset(hose, option);
+	return pnv_eeh_bridge_reset(bus->self, option);
+}
+
+/**
+ * pnv_eeh_get_log - Retrieve error log
+ * @pe: EEH PE
+ * @severity: temporary or permanent error log
+ * @drv_log: driver log to be combined with retrieved error log
+ * @len: length of driver log
+ *
+ * Retrieve the temporary or permanent error from the PE.
+ */
+static int pnv_eeh_get_log(struct eeh_pe *pe, int severity,
+			   char *drv_log, unsigned long len)
+{
+	if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
+		pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+
+	return 0;
+}
+
+/**
+ * pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
+ * @pe: EEH PE
+ *
+ * The function will be called to reconfigure the bridges included
+ * in the specified PE so that the mulfunctional PE would be recovered
+ * again.
+ */
+static int pnv_eeh_configure_bridge(struct eeh_pe *pe)
+{
+	return 0;
+}
+
+/**
+ * pnv_pe_err_inject - Inject specified error to the indicated PE
+ * @pe: the indicated PE
+ * @type: error type
+ * @func: specific error type
+ * @addr: address
+ * @mask: address mask
+ *
+ * The routine is called to inject specified error, which is
+ * determined by @type and @func, to the indicated PE for
+ * testing purpose.
+ */
+static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
+			      unsigned long addr, unsigned long mask)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	s64 rc;
+
+	if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
+	    type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
+		pr_warn("%s: Invalid error type %d\n",
+			__func__, type);
+		return -ERANGE;
+	}
+
+	if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR ||
+	    func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {
+		pr_warn("%s: Invalid error function %d\n",
+			__func__, func);
+		return -ERANGE;
+	}
+
+	/* Firmware supports error injection ? */
+	if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {
+		pr_warn("%s: Firmware doesn't support error injection\n",
+			__func__);
+		return -ENXIO;
+	}
+
+	/* Do error injection */
+	rc = opal_pci_err_inject(phb->opal_id, pe->addr,
+				 type, func, addr, mask);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld injecting error "
+			"%d-%d to PHB#%x-PE#%x\n",
+			__func__, rc, type, func,
+			hose->global_number, pe->addr);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn)
+{
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+
+	if (!edev || !edev->pe)
+		return false;
+
+	/*
+	 * We will issue FLR or AF FLR to all VFs, which are contained
+	 * in VF PE. It relies on the EEH PCI config accessors. So we
+	 * can't block them during the window.
+	 */
+	if (edev->physfn && (edev->pe->state & EEH_PE_RESET))
+		return false;
+
+	if (edev->pe->state & EEH_PE_CFG_BLOCKED)
+		return true;
+
+	return false;
+}
+
+static int pnv_eeh_read_config(struct eeh_dev *edev,
+			       int where, int size, u32 *val)
+{
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (pnv_eeh_cfg_blocked(pdn)) {
+		*val = 0xFFFFFFFF;
+		return PCIBIOS_SET_FAILED;
+	}
+
+	return pnv_pci_cfg_read(pdn, where, size, val);
+}
+
+static int pnv_eeh_write_config(struct eeh_dev *edev,
+				int where, int size, u32 val)
+{
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (pnv_eeh_cfg_blocked(pdn))
+		return PCIBIOS_SET_FAILED;
+
+	return pnv_pci_cfg_write(pdn, where, size, val);
+}
+
+static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data)
+{
+	/* GEM */
+	if (data->gemXfir || data->gemRfir ||
+	    data->gemRirqfir || data->gemMask || data->gemRwof)
+		pr_info("  GEM: %016llx %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->gemXfir),
+			be64_to_cpu(data->gemRfir),
+			be64_to_cpu(data->gemRirqfir),
+			be64_to_cpu(data->gemMask),
+			be64_to_cpu(data->gemRwof));
+
+	/* LEM */
+	if (data->lemFir || data->lemErrMask ||
+	    data->lemAction0 || data->lemAction1 || data->lemWof)
+		pr_info("  LEM: %016llx %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->lemFir),
+			be64_to_cpu(data->lemErrMask),
+			be64_to_cpu(data->lemAction0),
+			be64_to_cpu(data->lemAction1),
+			be64_to_cpu(data->lemWof));
+}
+
+static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct OpalIoP7IOCErrorData *data =
+		(struct OpalIoP7IOCErrorData*)phb->diag_data;
+	long rc;
+
+	rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
+			__func__, phb->hub_id, rc);
+		return;
+	}
+
+	switch (be16_to_cpu(data->type)) {
+	case OPAL_P7IOC_DIAG_TYPE_RGC:
+		pr_info("P7IOC diag-data for RGC\n\n");
+		pnv_eeh_dump_hub_diag_common(data);
+		if (data->rgc.rgcStatus || data->rgc.rgcLdcp)
+			pr_info("  RGC: %016llx %016llx\n",
+				be64_to_cpu(data->rgc.rgcStatus),
+				be64_to_cpu(data->rgc.rgcLdcp));
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_BI:
+		pr_info("P7IOC diag-data for BI %s\n\n",
+			data->bi.biDownbound ? "Downbound" : "Upbound");
+		pnv_eeh_dump_hub_diag_common(data);
+		if (data->bi.biLdcp0 || data->bi.biLdcp1 ||
+		    data->bi.biLdcp2 || data->bi.biFenceStatus)
+			pr_info("  BI:  %016llx %016llx %016llx %016llx\n",
+				be64_to_cpu(data->bi.biLdcp0),
+				be64_to_cpu(data->bi.biLdcp1),
+				be64_to_cpu(data->bi.biLdcp2),
+				be64_to_cpu(data->bi.biFenceStatus));
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_CI:
+		pr_info("P7IOC diag-data for CI Port %d\n\n",
+			data->ci.ciPort);
+		pnv_eeh_dump_hub_diag_common(data);
+		if (data->ci.ciPortStatus || data->ci.ciPortLdcp)
+			pr_info("  CI:  %016llx %016llx\n",
+				be64_to_cpu(data->ci.ciPortStatus),
+				be64_to_cpu(data->ci.ciPortLdcp));
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_MISC:
+		pr_info("P7IOC diag-data for MISC\n\n");
+		pnv_eeh_dump_hub_diag_common(data);
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_I2C:
+		pr_info("P7IOC diag-data for I2C\n\n");
+		pnv_eeh_dump_hub_diag_common(data);
+		break;
+	default:
+		pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
+			__func__, phb->hub_id, data->type);
+	}
+}
+
+static int pnv_eeh_get_pe(struct pci_controller *hose,
+			  u16 pe_no, struct eeh_pe **pe)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct pnv_ioda_pe *pnv_pe;
+	struct eeh_pe *dev_pe;
+
+	/*
+	 * If PHB supports compound PE, to fetch
+	 * the master PE because slave PE is invisible
+	 * to EEH core.
+	 */
+	pnv_pe = &phb->ioda.pe_array[pe_no];
+	if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
+		pnv_pe = pnv_pe->master;
+		WARN_ON(!pnv_pe ||
+			!(pnv_pe->flags & PNV_IODA_PE_MASTER));
+		pe_no = pnv_pe->pe_number;
+	}
+
+	/* Find the PE according to PE# */
+	dev_pe = eeh_pe_get(hose, pe_no);
+	if (!dev_pe)
+		return -EEXIST;
+
+	/* Freeze the (compound) PE */
+	*pe = dev_pe;
+	if (!(dev_pe->state & EEH_PE_ISOLATED))
+		phb->freeze_pe(phb, pe_no);
+
+	/*
+	 * At this point, we're sure the (compound) PE should
+	 * have been frozen. However, we still need poke until
+	 * hitting the frozen PE on top level.
+	 */
+	dev_pe = dev_pe->parent;
+	while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
+		int ret;
+		ret = eeh_ops->get_state(dev_pe, NULL);
+		if (ret <= 0 || eeh_state_active(ret)) {
+			dev_pe = dev_pe->parent;
+			continue;
+		}
+
+		/* Frozen parent PE */
+		*pe = dev_pe;
+		if (!(dev_pe->state & EEH_PE_ISOLATED))
+			phb->freeze_pe(phb, dev_pe->addr);
+
+		/* Next one */
+		dev_pe = dev_pe->parent;
+	}
+
+	return 0;
+}
+
+/**
+ * pnv_eeh_next_error - Retrieve next EEH error to handle
+ * @pe: Affected PE
+ *
+ * The function is expected to be called by EEH core while it gets
+ * special EEH event (without binding PE). The function calls to
+ * OPAL APIs for next error to handle. The informational error is
+ * handled internally by platform. However, the dead IOC, dead PHB,
+ * fenced PHB and frozen PE should be handled by EEH core eventually.
+ */
+static int pnv_eeh_next_error(struct eeh_pe **pe)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	struct eeh_pe *phb_pe, *parent_pe;
+	__be64 frozen_pe_no;
+	__be16 err_type, severity;
+	long rc;
+	int state, ret = EEH_NEXT_ERR_NONE;
+
+	/*
+	 * While running here, it's safe to purge the event queue. The
+	 * event should still be masked.
+	 */
+	eeh_remove_event(NULL, false);
+
+	list_for_each_entry(hose, &hose_list, list_node) {
+		/*
+		 * If the subordinate PCI buses of the PHB has been
+		 * removed or is exactly under error recovery, we
+		 * needn't take care of it any more.
+		 */
+		phb = hose->private_data;
+		phb_pe = eeh_phb_pe_get(hose);
+		if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
+			continue;
+
+		rc = opal_pci_next_error(phb->opal_id,
+					 &frozen_pe_no, &err_type, &severity);
+		if (rc != OPAL_SUCCESS) {
+			pr_devel("%s: Invalid return value on "
+				 "PHB#%x (0x%lx) from opal_pci_next_error",
+				 __func__, hose->global_number, rc);
+			continue;
+		}
+
+		/* If the PHB doesn't have error, stop processing */
+		if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||
+		    be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
+			pr_devel("%s: No error found on PHB#%x\n",
+				 __func__, hose->global_number);
+			continue;
+		}
+
+		/*
+		 * Processing the error. We're expecting the error with
+		 * highest priority reported upon multiple errors on the
+		 * specific PHB.
+		 */
+		pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
+			__func__, be16_to_cpu(err_type),
+			be16_to_cpu(severity), be64_to_cpu(frozen_pe_no),
+			hose->global_number);
+		switch (be16_to_cpu(err_type)) {
+		case OPAL_EEH_IOC_ERROR:
+			if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
+				pr_err("EEH: dead IOC detected\n");
+				ret = EEH_NEXT_ERR_DEAD_IOC;
+			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
+				pr_info("EEH: IOC informative error "
+					"detected\n");
+				pnv_eeh_get_and_dump_hub_diag(hose);
+				ret = EEH_NEXT_ERR_NONE;
+			}
+
+			break;
+		case OPAL_EEH_PHB_ERROR:
+			if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
+				*pe = phb_pe;
+				pr_err("EEH: dead PHB#%x detected, "
+				       "location: %s\n",
+					hose->global_number,
+					eeh_pe_loc_get(phb_pe));
+				ret = EEH_NEXT_ERR_DEAD_PHB;
+			} else if (be16_to_cpu(severity) ==
+				   OPAL_EEH_SEV_PHB_FENCED) {
+				*pe = phb_pe;
+				pr_err("EEH: Fenced PHB#%x detected, "
+				       "location: %s\n",
+					hose->global_number,
+					eeh_pe_loc_get(phb_pe));
+				ret = EEH_NEXT_ERR_FENCED_PHB;
+			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
+				pr_info("EEH: PHB#%x informative error "
+					"detected, location: %s\n",
+					hose->global_number,
+					eeh_pe_loc_get(phb_pe));
+				pnv_eeh_get_phb_diag(phb_pe);
+				pnv_pci_dump_phb_diag_data(hose, phb_pe->data);
+				ret = EEH_NEXT_ERR_NONE;
+			}
+
+			break;
+		case OPAL_EEH_PE_ERROR:
+			/*
+			 * If we can't find the corresponding PE, we
+			 * just try to unfreeze.
+			 */
+			if (pnv_eeh_get_pe(hose,
+				be64_to_cpu(frozen_pe_no), pe)) {
+				pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
+					hose->global_number, be64_to_cpu(frozen_pe_no));
+				pr_info("EEH: PHB location: %s\n",
+					eeh_pe_loc_get(phb_pe));
+
+				/* Dump PHB diag-data */
+				rc = opal_pci_get_phb_diag_data2(phb->opal_id,
+					phb->diag_data, phb->diag_data_size);
+				if (rc == OPAL_SUCCESS)
+					pnv_pci_dump_phb_diag_data(hose,
+							phb->diag_data);
+
+				/* Try best to clear it */
+				opal_pci_eeh_freeze_clear(phb->opal_id,
+					be64_to_cpu(frozen_pe_no),
+					OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+				ret = EEH_NEXT_ERR_NONE;
+			} else if ((*pe)->state & EEH_PE_ISOLATED ||
+				   eeh_pe_passed(*pe)) {
+				ret = EEH_NEXT_ERR_NONE;
+			} else {
+				pr_err("EEH: Frozen PE#%x "
+				       "on PHB#%x detected\n",
+				       (*pe)->addr,
+					(*pe)->phb->global_number);
+				pr_err("EEH: PE location: %s, "
+				       "PHB location: %s\n",
+				       eeh_pe_loc_get(*pe),
+				       eeh_pe_loc_get(phb_pe));
+				ret = EEH_NEXT_ERR_FROZEN_PE;
+			}
+
+			break;
+		default:
+			pr_warn("%s: Unexpected error type %d\n",
+				__func__, be16_to_cpu(err_type));
+		}
+
+		/*
+		 * EEH core will try recover from fenced PHB or
+		 * frozen PE. In the time for frozen PE, EEH core
+		 * enable IO path for that before collecting logs,
+		 * but it ruins the site. So we have to dump the
+		 * log in advance here.
+		 */
+		if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
+		    ret == EEH_NEXT_ERR_FENCED_PHB) &&
+		    !((*pe)->state & EEH_PE_ISOLATED)) {
+			eeh_pe_mark_isolated(*pe);
+			pnv_eeh_get_phb_diag(*pe);
+
+			if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+				pnv_pci_dump_phb_diag_data((*pe)->phb,
+							   (*pe)->data);
+		}
+
+		/*
+		 * We probably have the frozen parent PE out there and
+		 * we need have to handle frozen parent PE firstly.
+		 */
+		if (ret == EEH_NEXT_ERR_FROZEN_PE) {
+			parent_pe = (*pe)->parent;
+			while (parent_pe) {
+				/* Hit the ceiling ? */
+				if (parent_pe->type & EEH_PE_PHB)
+					break;
+
+				/* Frozen parent PE ? */
+				state = eeh_ops->get_state(parent_pe, NULL);
+				if (state > 0 && !eeh_state_active(state))
+					*pe = parent_pe;
+
+				/* Next parent level */
+				parent_pe = parent_pe->parent;
+			}
+
+			/* We possibly migrate to another PE */
+			eeh_pe_mark_isolated(*pe);
+		}
+
+		/*
+		 * If we have no errors on the specific PHB or only
+		 * informative error there, we continue poking it.
+		 * Otherwise, we need actions to be taken by upper
+		 * layer.
+		 */
+		if (ret > EEH_NEXT_ERR_INF)
+			break;
+	}
+
+	/* Unmask the event */
+	if (ret == EEH_NEXT_ERR_NONE && eeh_enabled())
+		enable_irq(eeh_event_irq);
+
+	return ret;
+}
+
+static int pnv_eeh_restore_config(struct eeh_dev *edev)
+{
+	struct pnv_phb *phb;
+	s64 ret = 0;
+
+	if (!edev)
+		return -EEXIST;
+
+	if (edev->physfn)
+		return 0;
+
+	phb = edev->controller->private_data;
+	ret = opal_pci_reinit(phb->opal_id,
+			      OPAL_REINIT_PCI_DEV, edev->bdfn);
+
+	if (ret) {
+		pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
+			__func__, edev->bdfn, ret);
+		return -EIO;
+	}
+
+	return ret;
+}
+
+static struct eeh_ops pnv_eeh_ops = {
+	.name                   = "powernv",
+	.probe			= pnv_eeh_probe,
+	.set_option             = pnv_eeh_set_option,
+	.get_state              = pnv_eeh_get_state,
+	.reset                  = pnv_eeh_reset,
+	.get_log                = pnv_eeh_get_log,
+	.configure_bridge       = pnv_eeh_configure_bridge,
+	.err_inject		= pnv_eeh_err_inject,
+	.read_config            = pnv_eeh_read_config,
+	.write_config           = pnv_eeh_write_config,
+	.next_error		= pnv_eeh_next_error,
+	.restore_config		= pnv_eeh_restore_config,
+	.notify_resume		= NULL
+};
+
+/**
+ * eeh_powernv_init - Register platform dependent EEH operations
+ *
+ * EEH initialization on powernv platform. This function should be
+ * called before any EEH related functions.
+ */
+static int __init eeh_powernv_init(void)
+{
+	int max_diag_size = PNV_PCI_DIAG_BUF_SIZE;
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	int ret = -EINVAL;
+
+	if (!firmware_has_feature(FW_FEATURE_OPAL)) {
+		pr_warn("%s: OPAL is required !\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Set probe mode */
+	eeh_add_flag(EEH_PROBE_MODE_DEV);
+
+	/*
+	 * P7IOC blocks PCI config access to frozen PE, but PHB3
+	 * doesn't do that. So we have to selectively enable I/O
+	 * prior to collecting error log.
+	 */
+	list_for_each_entry(hose, &hose_list, list_node) {
+		phb = hose->private_data;
+
+		if (phb->model == PNV_PHB_MODEL_P7IOC)
+			eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
+
+		if (phb->diag_data_size > max_diag_size)
+			max_diag_size = phb->diag_data_size;
+
+		break;
+	}
+
+	/*
+	 * eeh_init() allocates the eeh_pe and its aux data buf so the
+	 * size needs to be set before calling eeh_init().
+	 */
+	eeh_set_pe_aux_size(max_diag_size);
+	ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device;
+
+	ret = eeh_init(&pnv_eeh_ops);
+	if (!ret)
+		pr_info("EEH: PowerNV platform initialized\n");
+	else
+		pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret);
+
+	return ret;
+}
+machine_arch_initcall(powernv, eeh_powernv_init);
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
new file mode 100644
index 0000000000..ad41dffe4d
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -0,0 +1,1507 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV cpuidle code
+ *
+ * Copyright 2015 IBM Corp.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+
+#include <asm/firmware.h>
+#include <asm/interrupt.h>
+#include <asm/machdep.h>
+#include <asm/opal.h>
+#include <asm/cputhreads.h>
+#include <asm/cpuidle.h>
+#include <asm/code-patching.h>
+#include <asm/smp.h>
+#include <asm/runlatch.h>
+#include <asm/dbell.h>
+
+#include "powernv.h"
+#include "subcore.h"
+
+/* Power ISA 3.0 allows for stop states 0x0 - 0xF */
+#define MAX_STOP_STATE	0xF
+
+#define P9_STOP_SPR_MSR 2000
+#define P9_STOP_SPR_PSSCR      855
+
+static u32 supported_cpuidle_states;
+struct pnv_idle_states_t *pnv_idle_states;
+int nr_pnv_idle_states;
+
+/*
+ * The default stop state that will be used by ppc_md.power_save
+ * function on platforms that support stop instruction.
+ */
+static u64 pnv_default_stop_val;
+static u64 pnv_default_stop_mask;
+static bool default_stop_found;
+
+/*
+ * First stop state levels when SPR and TB loss can occur.
+ */
+static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
+static u64 deep_spr_loss_state = MAX_STOP_STATE + 1;
+
+/*
+ * psscr value and mask of the deepest stop idle state.
+ * Used when a cpu is offlined.
+ */
+static u64 pnv_deepest_stop_psscr_val;
+static u64 pnv_deepest_stop_psscr_mask;
+static u64 pnv_deepest_stop_flag;
+static bool deepest_stop_found;
+
+static unsigned long power7_offline_type;
+
+static int __init pnv_save_sprs_for_deep_states(void)
+{
+	int cpu;
+	int rc;
+
+	/*
+	 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across
+	 * all cpus at boot. Get these reg values of current cpu and use the
+	 * same across all cpus.
+	 */
+	uint64_t lpcr_val	= mfspr(SPRN_LPCR);
+	uint64_t hid0_val	= mfspr(SPRN_HID0);
+	uint64_t hmeer_val	= mfspr(SPRN_HMEER);
+	uint64_t msr_val = MSR_IDLE;
+	uint64_t psscr_val = pnv_deepest_stop_psscr_val;
+
+	for_each_present_cpu(cpu) {
+		uint64_t pir = get_hard_smp_processor_id(cpu);
+		uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu];
+
+		rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
+		if (rc != 0)
+			return rc;
+
+		rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+		if (rc != 0)
+			return rc;
+
+		if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+			rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val);
+			if (rc)
+				return rc;
+
+			rc = opal_slw_set_reg(pir,
+					      P9_STOP_SPR_PSSCR, psscr_val);
+
+			if (rc)
+				return rc;
+		}
+
+		/* HIDs are per core registers */
+		if (cpu_thread_in_core(cpu) == 0) {
+
+			rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
+			if (rc != 0)
+				return rc;
+
+			/* Only p8 needs to set extra HID registers */
+			if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+				uint64_t hid1_val = mfspr(SPRN_HID1);
+				uint64_t hid4_val = mfspr(SPRN_HID4);
+				uint64_t hid5_val = mfspr(SPRN_HID5);
+
+				rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
+				if (rc != 0)
+					return rc;
+
+				rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
+				if (rc != 0)
+					return rc;
+
+				rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
+				if (rc != 0)
+					return rc;
+			}
+		}
+	}
+
+	return 0;
+}
+
+u32 pnv_get_supported_cpuidle_states(void)
+{
+	return supported_cpuidle_states;
+}
+EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
+
+static void pnv_fastsleep_workaround_apply(void *info)
+
+{
+	int cpu = smp_processor_id();
+	int rc;
+	int *err = info;
+
+	if (cpu_first_thread_sibling(cpu) != cpu)
+		return;
+
+	rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
+					OPAL_CONFIG_IDLE_APPLY);
+	if (rc)
+		*err = 1;
+}
+
+static bool power7_fastsleep_workaround_entry = true;
+static bool power7_fastsleep_workaround_exit = true;
+
+/*
+ * Used to store fastsleep workaround state
+ * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
+ * 1 - Workaround applied once, never undone.
+ */
+static u8 fastsleep_workaround_applyonce;
+
+static ssize_t show_fastsleep_workaround_applyonce(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", fastsleep_workaround_applyonce);
+}
+
+static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
+		struct device_attribute *attr, const char *buf,
+		size_t count)
+{
+	int err;
+	u8 val;
+
+	if (kstrtou8(buf, 0, &val) || val != 1)
+		return -EINVAL;
+
+	if (fastsleep_workaround_applyonce == 1)
+		return count;
+
+	/*
+	 * fastsleep_workaround_applyonce = 1 implies
+	 * fastsleep workaround needs to be left in 'applied' state on all
+	 * the cores. Do this by-
+	 * 1. Disable the 'undo' workaround in fastsleep exit path
+	 * 2. Sendi IPIs to all the cores which have at least one online thread
+	 * 3. Disable the 'apply' workaround in fastsleep entry path
+	 *
+	 * There is no need to send ipi to cores which have all threads
+	 * offlined, as last thread of the core entering fastsleep or deeper
+	 * state would have applied workaround.
+	 */
+	power7_fastsleep_workaround_exit = false;
+
+	cpus_read_lock();
+	on_each_cpu(pnv_fastsleep_workaround_apply, &err, 1);
+	cpus_read_unlock();
+	if (err) {
+		pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
+		goto fail;
+	}
+
+	power7_fastsleep_workaround_entry = false;
+
+	fastsleep_workaround_applyonce = 1;
+
+	return count;
+fail:
+	return -EIO;
+}
+
+static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
+			show_fastsleep_workaround_applyonce,
+			store_fastsleep_workaround_applyonce);
+
+static inline void atomic_start_thread_idle(void)
+{
+	int cpu = raw_smp_processor_id();
+	int first = cpu_first_thread_sibling(cpu);
+	int thread_nr = cpu_thread_in_core(cpu);
+	unsigned long *state = &paca_ptrs[first]->idle_state;
+
+	clear_bit(thread_nr, state);
+}
+
+static inline void atomic_stop_thread_idle(void)
+{
+	int cpu = raw_smp_processor_id();
+	int first = cpu_first_thread_sibling(cpu);
+	int thread_nr = cpu_thread_in_core(cpu);
+	unsigned long *state = &paca_ptrs[first]->idle_state;
+
+	set_bit(thread_nr, state);
+}
+
+static inline void atomic_lock_thread_idle(void)
+{
+	int cpu = raw_smp_processor_id();
+	int first = cpu_first_thread_sibling(cpu);
+	unsigned long *lock = &paca_ptrs[first]->idle_lock;
+
+	while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, lock)))
+		barrier();
+}
+
+static inline void atomic_unlock_and_stop_thread_idle(void)
+{
+	int cpu = raw_smp_processor_id();
+	int first = cpu_first_thread_sibling(cpu);
+	unsigned long thread = 1UL << cpu_thread_in_core(cpu);
+	unsigned long *state = &paca_ptrs[first]->idle_state;
+	unsigned long *lock = &paca_ptrs[first]->idle_lock;
+	u64 s = READ_ONCE(*state);
+	u64 new, tmp;
+
+	BUG_ON(!(READ_ONCE(*lock) & PNV_CORE_IDLE_LOCK_BIT));
+	BUG_ON(s & thread);
+
+again:
+	new = s | thread;
+	tmp = cmpxchg(state, s, new);
+	if (unlikely(tmp != s)) {
+		s = tmp;
+		goto again;
+	}
+	clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock);
+}
+
+static inline void atomic_unlock_thread_idle(void)
+{
+	int cpu = raw_smp_processor_id();
+	int first = cpu_first_thread_sibling(cpu);
+	unsigned long *lock = &paca_ptrs[first]->idle_lock;
+
+	BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, lock));
+	clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock);
+}
+
+/* P7 and P8 */
+struct p7_sprs {
+	/* per core */
+	u64 tscr;
+	u64 worc;
+
+	/* per subcore */
+	u64 sdr1;
+	u64 rpr;
+
+	/* per thread */
+	u64 lpcr;
+	u64 hfscr;
+	u64 fscr;
+	u64 purr;
+	u64 spurr;
+	u64 dscr;
+	u64 wort;
+
+	/* per thread SPRs that get lost in shallow states */
+	u64 amr;
+	u64 iamr;
+	u64 uamor;
+	/* amor is restored to constant ~0 */
+};
+
+static unsigned long power7_idle_insn(unsigned long type)
+{
+	int cpu = raw_smp_processor_id();
+	int first = cpu_first_thread_sibling(cpu);
+	unsigned long *state = &paca_ptrs[first]->idle_state;
+	unsigned long thread = 1UL << cpu_thread_in_core(cpu);
+	unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
+	unsigned long srr1;
+	bool full_winkle;
+	struct p7_sprs sprs = {}; /* avoid false use-uninitialised */
+	bool sprs_saved = false;
+	int rc;
+
+	if (unlikely(type != PNV_THREAD_NAP)) {
+		atomic_lock_thread_idle();
+
+		BUG_ON(!(*state & thread));
+		*state &= ~thread;
+
+		if (power7_fastsleep_workaround_entry) {
+			if ((*state & core_thread_mask) == 0) {
+				rc = opal_config_cpu_idle_state(
+						OPAL_CONFIG_IDLE_FASTSLEEP,
+						OPAL_CONFIG_IDLE_APPLY);
+				BUG_ON(rc);
+			}
+		}
+
+		if (type == PNV_THREAD_WINKLE) {
+			sprs.tscr	= mfspr(SPRN_TSCR);
+			sprs.worc	= mfspr(SPRN_WORC);
+
+			sprs.sdr1	= mfspr(SPRN_SDR1);
+			sprs.rpr	= mfspr(SPRN_RPR);
+
+			sprs.lpcr	= mfspr(SPRN_LPCR);
+			if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+				sprs.hfscr	= mfspr(SPRN_HFSCR);
+				sprs.fscr	= mfspr(SPRN_FSCR);
+			}
+			sprs.purr	= mfspr(SPRN_PURR);
+			sprs.spurr	= mfspr(SPRN_SPURR);
+			sprs.dscr	= mfspr(SPRN_DSCR);
+			sprs.wort	= mfspr(SPRN_WORT);
+
+			sprs_saved = true;
+
+			/*
+			 * Increment winkle counter and set all winkle bits if
+			 * all threads are winkling. This allows wakeup side to
+			 * distinguish between fast sleep and winkle state
+			 * loss. Fast sleep still has to resync the timebase so
+			 * this may not be a really big win.
+			 */
+			*state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
+			if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS)
+					>> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT
+					== threads_per_core)
+				*state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS;
+			WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
+		}
+
+		atomic_unlock_thread_idle();
+	}
+
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		sprs.amr	= mfspr(SPRN_AMR);
+		sprs.iamr	= mfspr(SPRN_IAMR);
+		sprs.uamor	= mfspr(SPRN_UAMOR);
+	}
+
+	local_paca->thread_idle_state = type;
+	srr1 = isa206_idle_insn_mayloss(type);		/* go idle */
+	local_paca->thread_idle_state = PNV_THREAD_RUNNING;
+
+	WARN_ON_ONCE(!srr1);
+	WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
+
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
+			/*
+			 * We don't need an isync after the mtsprs here because
+			 * the upcoming mtmsrd is execution synchronizing.
+			 */
+			mtspr(SPRN_AMR,		sprs.amr);
+			mtspr(SPRN_IAMR,	sprs.iamr);
+			mtspr(SPRN_AMOR,	~0);
+			mtspr(SPRN_UAMOR,	sprs.uamor);
+		}
+	}
+
+	if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
+		hmi_exception_realmode(NULL);
+
+	if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) {
+		if (unlikely(type != PNV_THREAD_NAP)) {
+			atomic_lock_thread_idle();
+			if (type == PNV_THREAD_WINKLE) {
+				WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
+				*state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
+				*state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
+			}
+			atomic_unlock_and_stop_thread_idle();
+		}
+		return srr1;
+	}
+
+	/* HV state loss */
+	BUG_ON(type == PNV_THREAD_NAP);
+
+	atomic_lock_thread_idle();
+
+	full_winkle = false;
+	if (type == PNV_THREAD_WINKLE) {
+		WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
+		*state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
+		if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) {
+			*state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
+			full_winkle = true;
+			BUG_ON(!sprs_saved);
+		}
+	}
+
+	WARN_ON(*state & thread);
+
+	if ((*state & core_thread_mask) != 0)
+		goto core_woken;
+
+	/* Per-core SPRs */
+	if (full_winkle) {
+		mtspr(SPRN_TSCR,	sprs.tscr);
+		mtspr(SPRN_WORC,	sprs.worc);
+	}
+
+	if (power7_fastsleep_workaround_exit) {
+		rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
+						OPAL_CONFIG_IDLE_UNDO);
+		BUG_ON(rc);
+	}
+
+	/* TB */
+	if (opal_resync_timebase() != OPAL_SUCCESS)
+		BUG();
+
+core_woken:
+	if (!full_winkle)
+		goto subcore_woken;
+
+	if ((*state & local_paca->subcore_sibling_mask) != 0)
+		goto subcore_woken;
+
+	/* Per-subcore SPRs */
+	mtspr(SPRN_SDR1,	sprs.sdr1);
+	mtspr(SPRN_RPR,		sprs.rpr);
+
+subcore_woken:
+	/*
+	 * isync after restoring shared SPRs and before unlocking. Unlock
+	 * only contains hwsync which does not necessarily do the right
+	 * thing for SPRs.
+	 */
+	isync();
+	atomic_unlock_and_stop_thread_idle();
+
+	/* Fast sleep does not lose SPRs */
+	if (!full_winkle)
+		return srr1;
+
+	/* Per-thread SPRs */
+	mtspr(SPRN_LPCR,	sprs.lpcr);
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		mtspr(SPRN_HFSCR,	sprs.hfscr);
+		mtspr(SPRN_FSCR,	sprs.fscr);
+	}
+	mtspr(SPRN_PURR,	sprs.purr);
+	mtspr(SPRN_SPURR,	sprs.spurr);
+	mtspr(SPRN_DSCR,	sprs.dscr);
+	mtspr(SPRN_WORT,	sprs.wort);
+
+	mtspr(SPRN_SPRG3,	local_paca->sprg_vdso);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	/*
+	 * The SLB has to be restored here, but it sometimes still
+	 * contains entries, so the __ variant must be used to prevent
+	 * multi hits.
+	 */
+	__slb_restore_bolted_realmode();
+#endif
+
+	return srr1;
+}
+
+extern unsigned long idle_kvm_start_guest(unsigned long srr1);
+
+#ifdef CONFIG_HOTPLUG_CPU
+static unsigned long power7_offline(void)
+{
+	unsigned long srr1;
+
+	mtmsr(MSR_IDLE);
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	/* Tell KVM we're entering idle. */
+	/******************************************************/
+	/*  N O T E   W E L L    ! ! !    N O T E   W E L L   */
+	/* The following store to HSTATE_HWTHREAD_STATE(r13)  */
+	/* MUST occur in real mode, i.e. with the MMU off,    */
+	/* and the MMU must stay off until we clear this flag */
+	/* and test HSTATE_HWTHREAD_REQ(r13) in               */
+	/* pnv_powersave_wakeup in this file.                 */
+	/* The reason is that another thread can switch the   */
+	/* MMU to a guest context whenever this flag is set   */
+	/* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on,    */
+	/* that would potentially cause this thread to start  */
+	/* executing instructions from guest memory in        */
+	/* hypervisor mode, leading to a host crash or data   */
+	/* corruption, or worse.                              */
+	/******************************************************/
+	local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
+#endif
+
+	__ppc64_runlatch_off();
+	srr1 = power7_idle_insn(power7_offline_type);
+	__ppc64_runlatch_on();
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
+	/* Order setting hwthread_state vs. testing hwthread_req */
+	smp_mb();
+	if (local_paca->kvm_hstate.hwthread_req)
+		srr1 = idle_kvm_start_guest(srr1);
+#endif
+
+	mtmsr(MSR_KERNEL);
+
+	return srr1;
+}
+#endif
+
+void power7_idle_type(unsigned long type)
+{
+	unsigned long srr1;
+
+	if (!prep_irq_for_idle_irqsoff())
+		return;
+
+	mtmsr(MSR_IDLE);
+	__ppc64_runlatch_off();
+	srr1 = power7_idle_insn(type);
+	__ppc64_runlatch_on();
+	mtmsr(MSR_KERNEL);
+
+	fini_irq_for_idle_irqsoff();
+	irq_set_pending_from_srr1(srr1);
+}
+
+static void power7_idle(void)
+{
+	if (!powersave_nap)
+		return;
+
+	power7_idle_type(PNV_THREAD_NAP);
+}
+
+struct p9_sprs {
+	/* per core */
+	u64 ptcr;
+	u64 rpr;
+	u64 tscr;
+	u64 ldbar;
+
+	/* per thread */
+	u64 lpcr;
+	u64 hfscr;
+	u64 fscr;
+	u64 pid;
+	u64 purr;
+	u64 spurr;
+	u64 dscr;
+	u64 ciabr;
+
+	u64 mmcra;
+	u32 mmcr0;
+	u32 mmcr1;
+	u64 mmcr2;
+
+	/* per thread SPRs that get lost in shallow states */
+	u64 amr;
+	u64 iamr;
+	u64 amor;
+	u64 uamor;
+};
+
+static unsigned long power9_idle_stop(unsigned long psscr)
+{
+	int cpu = raw_smp_processor_id();
+	int first = cpu_first_thread_sibling(cpu);
+	unsigned long *state = &paca_ptrs[first]->idle_state;
+	unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
+	unsigned long srr1;
+	unsigned long pls;
+	unsigned long mmcr0 = 0;
+	unsigned long mmcra = 0;
+	struct p9_sprs sprs = {}; /* avoid false used-uninitialised */
+	bool sprs_saved = false;
+
+	if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
+		/* EC=ESL=0 case */
+
+		/*
+		 * Wake synchronously. SRESET via xscom may still cause
+		 * a 0x100 powersave wakeup with SRR1 reason!
+		 */
+		srr1 = isa300_idle_stop_noloss(psscr);		/* go idle */
+		if (likely(!srr1))
+			return 0;
+
+		/*
+		 * Registers not saved, can't recover!
+		 * This would be a hardware bug
+		 */
+		BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
+
+		goto out;
+	}
+
+	/* EC=ESL=1 case */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) {
+		local_paca->requested_psscr = psscr;
+		/* order setting requested_psscr vs testing dont_stop */
+		smp_mb();
+		if (atomic_read(&local_paca->dont_stop)) {
+			local_paca->requested_psscr = 0;
+			return 0;
+		}
+	}
+#endif
+
+	if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
+		 /*
+		  * POWER9 DD2 can incorrectly set PMAO when waking up
+		  * after a state-loss idle. Saving and restoring MMCR0
+		  * over idle is a workaround.
+		  */
+		mmcr0		= mfspr(SPRN_MMCR0);
+	}
+
+	if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
+		sprs.lpcr	= mfspr(SPRN_LPCR);
+		sprs.hfscr	= mfspr(SPRN_HFSCR);
+		sprs.fscr	= mfspr(SPRN_FSCR);
+		sprs.pid	= mfspr(SPRN_PID);
+		sprs.purr	= mfspr(SPRN_PURR);
+		sprs.spurr	= mfspr(SPRN_SPURR);
+		sprs.dscr	= mfspr(SPRN_DSCR);
+		sprs.ciabr	= mfspr(SPRN_CIABR);
+
+		sprs.mmcra	= mfspr(SPRN_MMCRA);
+		sprs.mmcr0	= mfspr(SPRN_MMCR0);
+		sprs.mmcr1	= mfspr(SPRN_MMCR1);
+		sprs.mmcr2	= mfspr(SPRN_MMCR2);
+
+		sprs.ptcr	= mfspr(SPRN_PTCR);
+		sprs.rpr	= mfspr(SPRN_RPR);
+		sprs.tscr	= mfspr(SPRN_TSCR);
+		if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+			sprs.ldbar = mfspr(SPRN_LDBAR);
+
+		sprs_saved = true;
+
+		atomic_start_thread_idle();
+	}
+
+	sprs.amr	= mfspr(SPRN_AMR);
+	sprs.iamr	= mfspr(SPRN_IAMR);
+	sprs.uamor	= mfspr(SPRN_UAMOR);
+
+	srr1 = isa300_idle_stop_mayloss(psscr);		/* go idle */
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	local_paca->requested_psscr = 0;
+#endif
+
+	psscr = mfspr(SPRN_PSSCR);
+
+	WARN_ON_ONCE(!srr1);
+	WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
+
+	if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
+		/*
+		 * We don't need an isync after the mtsprs here because the
+		 * upcoming mtmsrd is execution synchronizing.
+		 */
+		mtspr(SPRN_AMR,		sprs.amr);
+		mtspr(SPRN_IAMR,	sprs.iamr);
+		mtspr(SPRN_AMOR,	~0);
+		mtspr(SPRN_UAMOR,	sprs.uamor);
+
+		/*
+		 * Workaround for POWER9 DD2.0, if we lost resources, the ERAT
+		 * might have been corrupted and needs flushing. We also need
+		 * to reload MMCR0 (see mmcr0 comment above).
+		 */
+		if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
+			asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT);
+			mtspr(SPRN_MMCR0, mmcr0);
+		}
+
+		/*
+		 * DD2.2 and earlier need to set then clear bit 60 in MMCRA
+		 * to ensure the PMU starts running.
+		 */
+		mmcra = mfspr(SPRN_MMCRA);
+		mmcra |= PPC_BIT(60);
+		mtspr(SPRN_MMCRA, mmcra);
+		mmcra &= ~PPC_BIT(60);
+		mtspr(SPRN_MMCRA, mmcra);
+	}
+
+	if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
+		hmi_exception_realmode(NULL);
+
+	/*
+	 * On POWER9, SRR1 bits do not match exactly as expected.
+	 * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
+	 * just always test PSSCR for SPR/TB state loss.
+	 */
+	pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
+	if (likely(pls < deep_spr_loss_state)) {
+		if (sprs_saved)
+			atomic_stop_thread_idle();
+		goto out;
+	}
+
+	/* HV state loss */
+	BUG_ON(!sprs_saved);
+
+	atomic_lock_thread_idle();
+
+	if ((*state & core_thread_mask) != 0)
+		goto core_woken;
+
+	/* Per-core SPRs */
+	mtspr(SPRN_PTCR,	sprs.ptcr);
+	mtspr(SPRN_RPR,		sprs.rpr);
+	mtspr(SPRN_TSCR,	sprs.tscr);
+
+	if (pls >= pnv_first_tb_loss_level) {
+		/* TB loss */
+		if (opal_resync_timebase() != OPAL_SUCCESS)
+			BUG();
+	}
+
+	/*
+	 * isync after restoring shared SPRs and before unlocking. Unlock
+	 * only contains hwsync which does not necessarily do the right
+	 * thing for SPRs.
+	 */
+	isync();
+
+core_woken:
+	atomic_unlock_and_stop_thread_idle();
+
+	/* Per-thread SPRs */
+	mtspr(SPRN_LPCR,	sprs.lpcr);
+	mtspr(SPRN_HFSCR,	sprs.hfscr);
+	mtspr(SPRN_FSCR,	sprs.fscr);
+	mtspr(SPRN_PID,		sprs.pid);
+	mtspr(SPRN_PURR,	sprs.purr);
+	mtspr(SPRN_SPURR,	sprs.spurr);
+	mtspr(SPRN_DSCR,	sprs.dscr);
+	mtspr(SPRN_CIABR,	sprs.ciabr);
+
+	mtspr(SPRN_MMCRA,	sprs.mmcra);
+	mtspr(SPRN_MMCR0,	sprs.mmcr0);
+	mtspr(SPRN_MMCR1,	sprs.mmcr1);
+	mtspr(SPRN_MMCR2,	sprs.mmcr2);
+	if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+		mtspr(SPRN_LDBAR, sprs.ldbar);
+
+	mtspr(SPRN_SPRG3,	local_paca->sprg_vdso);
+
+	if (!radix_enabled())
+		__slb_restore_bolted_realmode();
+
+out:
+	mtmsr(MSR_KERNEL);
+
+	return srr1;
+}
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+/*
+ * This is used in working around bugs in thread reconfiguration
+ * on POWER9 (at least up to Nimbus DD2.2) relating to transactional
+ * memory and the way that XER[SO] is checkpointed.
+ * This function forces the core into SMT4 in order by asking
+ * all other threads not to stop, and sending a message to any
+ * that are in a stop state.
+ * Must be called with preemption disabled.
+ */
+void pnv_power9_force_smt4_catch(void)
+{
+	int cpu, cpu0, thr;
+	int awake_threads = 1;		/* this thread is awake */
+	int poke_threads = 0;
+	int need_awake = threads_per_core;
+
+	cpu = smp_processor_id();
+	cpu0 = cpu & ~(threads_per_core - 1);
+	for (thr = 0; thr < threads_per_core; ++thr) {
+		if (cpu != cpu0 + thr)
+			atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
+	}
+	/* order setting dont_stop vs testing requested_psscr */
+	smp_mb();
+	for (thr = 0; thr < threads_per_core; ++thr) {
+		if (!paca_ptrs[cpu0+thr]->requested_psscr)
+			++awake_threads;
+		else
+			poke_threads |= (1 << thr);
+	}
+
+	/* If at least 3 threads are awake, the core is in SMT4 already */
+	if (awake_threads < need_awake) {
+		/* We have to wake some threads; we'll use msgsnd */
+		for (thr = 0; thr < threads_per_core; ++thr) {
+			if (poke_threads & (1 << thr)) {
+				ppc_msgsnd_sync();
+				ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
+					   paca_ptrs[cpu0+thr]->hw_cpu_id);
+			}
+		}
+		/* now spin until at least 3 threads are awake */
+		do {
+			for (thr = 0; thr < threads_per_core; ++thr) {
+				if ((poke_threads & (1 << thr)) &&
+				    !paca_ptrs[cpu0+thr]->requested_psscr) {
+					++awake_threads;
+					poke_threads &= ~(1 << thr);
+				}
+			}
+		} while (awake_threads < need_awake);
+	}
+}
+EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch);
+
+void pnv_power9_force_smt4_release(void)
+{
+	int cpu, cpu0, thr;
+
+	cpu = smp_processor_id();
+	cpu0 = cpu & ~(threads_per_core - 1);
+
+	/* clear all the dont_stop flags */
+	for (thr = 0; thr < threads_per_core; ++thr) {
+		if (cpu != cpu0 + thr)
+			atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop);
+	}
+}
+EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
+struct p10_sprs {
+	/*
+	 * SPRs that get lost in shallow states:
+	 *
+	 * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1
+	 * isa300 idle routines restore CR, LR.
+	 * CTR is volatile
+	 * idle thread doesn't use FP or VEC
+	 * kernel doesn't use TAR
+	 * HSPRG1 is only live in HV interrupt entry
+	 * SPRG2 is only live in KVM guests, KVM handles it.
+	 */
+};
+
+static unsigned long power10_idle_stop(unsigned long psscr)
+{
+	int cpu = raw_smp_processor_id();
+	int first = cpu_first_thread_sibling(cpu);
+	unsigned long *state = &paca_ptrs[first]->idle_state;
+	unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
+	unsigned long srr1;
+	unsigned long pls;
+//	struct p10_sprs sprs = {}; /* avoid false used-uninitialised */
+	bool sprs_saved = false;
+
+	if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
+		/* EC=ESL=0 case */
+
+		/*
+		 * Wake synchronously. SRESET via xscom may still cause
+		 * a 0x100 powersave wakeup with SRR1 reason!
+		 */
+		srr1 = isa300_idle_stop_noloss(psscr);		/* go idle */
+		if (likely(!srr1))
+			return 0;
+
+		/*
+		 * Registers not saved, can't recover!
+		 * This would be a hardware bug
+		 */
+		BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
+
+		goto out;
+	}
+
+	/* EC=ESL=1 case */
+	if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
+		/* XXX: save SPRs for deep state loss here. */
+
+		sprs_saved = true;
+
+		atomic_start_thread_idle();
+	}
+
+	srr1 = isa300_idle_stop_mayloss(psscr);		/* go idle */
+
+	psscr = mfspr(SPRN_PSSCR);
+
+	WARN_ON_ONCE(!srr1);
+	WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
+
+	if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
+		hmi_exception_realmode(NULL);
+
+	/*
+	 * On POWER10, SRR1 bits do not match exactly as expected.
+	 * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
+	 * just always test PSSCR for SPR/TB state loss.
+	 */
+	pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
+	if (likely(pls < deep_spr_loss_state)) {
+		if (sprs_saved)
+			atomic_stop_thread_idle();
+		goto out;
+	}
+
+	/* HV state loss */
+	BUG_ON(!sprs_saved);
+
+	atomic_lock_thread_idle();
+
+	if ((*state & core_thread_mask) != 0)
+		goto core_woken;
+
+	/* XXX: restore per-core SPRs here */
+
+	if (pls >= pnv_first_tb_loss_level) {
+		/* TB loss */
+		if (opal_resync_timebase() != OPAL_SUCCESS)
+			BUG();
+	}
+
+	/*
+	 * isync after restoring shared SPRs and before unlocking. Unlock
+	 * only contains hwsync which does not necessarily do the right
+	 * thing for SPRs.
+	 */
+	isync();
+
+core_woken:
+	atomic_unlock_and_stop_thread_idle();
+
+	/* XXX: restore per-thread SPRs here */
+
+	if (!radix_enabled())
+		__slb_restore_bolted_realmode();
+
+out:
+	mtmsr(MSR_KERNEL);
+
+	return srr1;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static unsigned long arch300_offline_stop(unsigned long psscr)
+{
+	unsigned long srr1;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		srr1 = power10_idle_stop(psscr);
+	else
+		srr1 = power9_idle_stop(psscr);
+
+	return srr1;
+}
+#endif
+
+void arch300_idle_type(unsigned long stop_psscr_val,
+				      unsigned long stop_psscr_mask)
+{
+	unsigned long psscr;
+	unsigned long srr1;
+
+	if (!prep_irq_for_idle_irqsoff())
+		return;
+
+	psscr = mfspr(SPRN_PSSCR);
+	psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
+
+	__ppc64_runlatch_off();
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		srr1 = power10_idle_stop(psscr);
+	else
+		srr1 = power9_idle_stop(psscr);
+	__ppc64_runlatch_on();
+
+	fini_irq_for_idle_irqsoff();
+
+	irq_set_pending_from_srr1(srr1);
+}
+
+/*
+ * Used for ppc_md.power_save which needs a function with no parameters
+ */
+static void arch300_idle(void)
+{
+	arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
+{
+	u64 pir = get_hard_smp_processor_id(cpu);
+
+	mtspr(SPRN_LPCR, lpcr_val);
+
+	/*
+	 * Program the LPCR via stop-api only if the deepest stop state
+	 * can lose hypervisor context.
+	 */
+	if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
+		opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+}
+
+/*
+ * pnv_cpu_offline: A function that puts the CPU into the deepest
+ * available platform idle state on a CPU-Offline.
+ * interrupts hard disabled and no lazy irq pending.
+ */
+unsigned long pnv_cpu_offline(unsigned int cpu)
+{
+	unsigned long srr1;
+
+	__ppc64_runlatch_off();
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
+		unsigned long psscr;
+
+		psscr = mfspr(SPRN_PSSCR);
+		psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
+						pnv_deepest_stop_psscr_val;
+		srr1 = arch300_offline_stop(psscr);
+	} else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
+		srr1 = power7_offline();
+	} else {
+		/* This is the fallback method. We emulate snooze */
+		while (!generic_check_cpu_restart(cpu)) {
+			HMT_low();
+			HMT_very_low();
+		}
+		srr1 = 0;
+		HMT_medium();
+	}
+
+	__ppc64_runlatch_on();
+
+	return srr1;
+}
+#endif
+
+/*
+ * Power ISA 3.0 idle initialization.
+ *
+ * POWER ISA 3.0 defines a new SPR Processor stop Status and Control
+ * Register (PSSCR) to control idle behavior.
+ *
+ * PSSCR layout:
+ * ----------------------------------------------------------
+ * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL |
+ * ----------------------------------------------------------
+ * 0      4     41   42    43   44     48    54   56    60
+ *
+ * PSSCR key fields:
+ *	Bits 0:3  - Power-Saving Level Status (PLS). This field indicates the
+ *	lowest power-saving state the thread entered since stop instruction was
+ *	last executed.
+ *
+ *	Bit 41 - Status Disable(SD)
+ *	0 - Shows PLS entries
+ *	1 - PLS entries are all 0
+ *
+ *	Bit 42 - Enable State Loss
+ *	0 - No state is lost irrespective of other fields
+ *	1 - Allows state loss
+ *
+ *	Bit 43 - Exit Criterion
+ *	0 - Exit from power-save mode on any interrupt
+ *	1 - Exit from power-save mode controlled by LPCR's PECE bits
+ *
+ *	Bits 44:47 - Power-Saving Level Limit
+ *	This limits the power-saving level that can be entered into.
+ *
+ *	Bits 60:63 - Requested Level
+ *	Used to specify which power-saving level must be entered on executing
+ *	stop instruction
+ */
+
+int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
+{
+	int err = 0;
+
+	/*
+	 * psscr_mask == 0xf indicates an older firmware.
+	 * Set remaining fields of psscr to the default values.
+	 * See NOTE above definition of PSSCR_HV_DEFAULT_VAL
+	 */
+	if (*psscr_mask == 0xf) {
+		*psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL;
+		*psscr_mask = PSSCR_HV_DEFAULT_MASK;
+		return err;
+	}
+
+	/*
+	 * New firmware is expected to set the psscr_val bits correctly.
+	 * Validate that the following invariants are correctly maintained by
+	 * the new firmware.
+	 * - ESL bit value matches the EC bit value.
+	 * - ESL bit is set for all the deep stop states.
+	 */
+	if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) {
+		err = ERR_EC_ESL_MISMATCH;
+	} else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
+		GET_PSSCR_ESL(*psscr_val) == 0) {
+		err = ERR_DEEP_STATE_ESL_MISMATCH;
+	}
+
+	return err;
+}
+
+/*
+ * pnv_arch300_idle_init: Initializes the default idle state, first
+ *                        deep idle state and deepest idle state on
+ *                        ISA 3.0 CPUs.
+ *
+ * @np: /ibm,opal/power-mgt device node
+ * @flags: cpu-idle-state-flags array
+ * @dt_idle_states: Number of idle state entries
+ * Returns 0 on success
+ */
+static void __init pnv_arch300_idle_init(void)
+{
+	u64 max_residency_ns = 0;
+	int i;
+
+	/* stop is not really architected, we only have p9,p10 drivers */
+	if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9))
+		return;
+
+	/*
+	 * pnv_deepest_stop_{val,mask} should be set to values corresponding to
+	 * the deepest stop state.
+	 *
+	 * pnv_default_stop_{val,mask} should be set to values corresponding to
+	 * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state.
+	 */
+	pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
+	deep_spr_loss_state = MAX_STOP_STATE + 1;
+	for (i = 0; i < nr_pnv_idle_states; i++) {
+		int err;
+		struct pnv_idle_states_t *state = &pnv_idle_states[i];
+		u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
+
+		/* No deep loss driver implemented for POWER10 yet */
+		if (pvr_version_is(PVR_POWER10) &&
+				state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT))
+			continue;
+
+		if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
+		     (pnv_first_tb_loss_level > psscr_rl))
+			pnv_first_tb_loss_level = psscr_rl;
+
+		if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
+		     (deep_spr_loss_state > psscr_rl))
+			deep_spr_loss_state = psscr_rl;
+
+		/*
+		 * The idle code does not deal with TB loss occurring
+		 * in a shallower state than SPR loss, so force it to
+		 * behave like SPRs are lost if TB is lost. POWER9 would
+		 * never encounter this, but a POWER8 core would if it
+		 * implemented the stop instruction. So this is for forward
+		 * compatibility.
+		 */
+		if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
+		     (deep_spr_loss_state > psscr_rl))
+			deep_spr_loss_state = psscr_rl;
+
+		err = validate_psscr_val_mask(&state->psscr_val,
+					      &state->psscr_mask,
+					      state->flags);
+		if (err) {
+			report_invalid_psscr_val(state->psscr_val, err);
+			continue;
+		}
+
+		state->valid = true;
+
+		if (max_residency_ns < state->residency_ns) {
+			max_residency_ns = state->residency_ns;
+			pnv_deepest_stop_psscr_val = state->psscr_val;
+			pnv_deepest_stop_psscr_mask = state->psscr_mask;
+			pnv_deepest_stop_flag = state->flags;
+			deepest_stop_found = true;
+		}
+
+		if (!default_stop_found &&
+		    (state->flags & OPAL_PM_STOP_INST_FAST)) {
+			pnv_default_stop_val = state->psscr_val;
+			pnv_default_stop_mask = state->psscr_mask;
+			default_stop_found = true;
+			WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT);
+		}
+	}
+
+	if (unlikely(!default_stop_found)) {
+		pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
+	} else {
+		ppc_md.power_save = arch300_idle;
+		pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
+			pnv_default_stop_val, pnv_default_stop_mask);
+	}
+
+	if (unlikely(!deepest_stop_found)) {
+		pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait");
+	} else {
+		pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n",
+			pnv_deepest_stop_psscr_val,
+			pnv_deepest_stop_psscr_mask);
+	}
+
+	pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n",
+		deep_spr_loss_state);
+
+	pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n",
+		pnv_first_tb_loss_level);
+}
+
+static void __init pnv_disable_deep_states(void)
+{
+	/*
+	 * The stop-api is unable to restore hypervisor
+	 * resources on wakeup from platform idle states which
+	 * lose full context. So disable such states.
+	 */
+	supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
+	pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
+	pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+	    (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
+		/*
+		 * Use the default stop state for CPU-Hotplug
+		 * if available.
+		 */
+		if (default_stop_found) {
+			pnv_deepest_stop_psscr_val = pnv_default_stop_val;
+			pnv_deepest_stop_psscr_mask = pnv_default_stop_mask;
+			pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
+				pnv_deepest_stop_psscr_val);
+		} else { /* Fallback to snooze loop for CPU-Hotplug */
+			deepest_stop_found = false;
+			pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
+		}
+	}
+}
+
+/*
+ * Probe device tree for supported idle states
+ */
+static void __init pnv_probe_idle_states(void)
+{
+	int i;
+
+	if (nr_pnv_idle_states < 0) {
+		pr_warn("cpuidle-powernv: no idle states found in the DT\n");
+		return;
+	}
+
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		pnv_arch300_idle_init();
+
+	for (i = 0; i < nr_pnv_idle_states; i++)
+		supported_cpuidle_states |= pnv_idle_states[i].flags;
+}
+
+/*
+ * This function parses device-tree and populates all the information
+ * into pnv_idle_states structure. It also sets up nr_pnv_idle_states
+ * which is the number of cpuidle states discovered through device-tree.
+ */
+
+static int __init pnv_parse_cpuidle_dt(void)
+{
+	struct device_node *np;
+	int nr_idle_states, i;
+	int rc = 0;
+	u32 *temp_u32;
+	u64 *temp_u64;
+	const char **temp_string;
+
+	np = of_find_node_by_path("/ibm,opal/power-mgt");
+	if (!np) {
+		pr_warn("opal: PowerMgmt Node not found\n");
+		return -ENODEV;
+	}
+	nr_idle_states = of_property_count_u32_elems(np,
+						"ibm,cpu-idle-state-flags");
+
+	pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states),
+				  GFP_KERNEL);
+	temp_u32 = kcalloc(nr_idle_states, sizeof(u32),  GFP_KERNEL);
+	temp_u64 = kcalloc(nr_idle_states, sizeof(u64),  GFP_KERNEL);
+	temp_string = kcalloc(nr_idle_states, sizeof(char *),  GFP_KERNEL);
+
+	if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) {
+		pr_err("Could not allocate memory for dt parsing\n");
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Read flags */
+	if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags",
+				       temp_u32, nr_idle_states)) {
+		pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
+		rc = -EINVAL;
+		goto out;
+	}
+	for (i = 0; i < nr_idle_states; i++)
+		pnv_idle_states[i].flags = temp_u32[i];
+
+	/* Read latencies */
+	if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns",
+				       temp_u32, nr_idle_states)) {
+		pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
+		rc = -EINVAL;
+		goto out;
+	}
+	for (i = 0; i < nr_idle_states; i++)
+		pnv_idle_states[i].latency_ns = temp_u32[i];
+
+	/* Read residencies */
+	if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns",
+				       temp_u32, nr_idle_states)) {
+		pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n");
+		rc = -EINVAL;
+		goto out;
+	}
+	for (i = 0; i < nr_idle_states; i++)
+		pnv_idle_states[i].residency_ns = temp_u32[i];
+
+	/* For power9 and later */
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		/* Read pm_crtl_val */
+		if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr",
+					       temp_u64, nr_idle_states)) {
+			pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
+			rc = -EINVAL;
+			goto out;
+		}
+		for (i = 0; i < nr_idle_states; i++)
+			pnv_idle_states[i].psscr_val = temp_u64[i];
+
+		/* Read pm_crtl_mask */
+		if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask",
+					       temp_u64, nr_idle_states)) {
+			pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n");
+			rc = -EINVAL;
+			goto out;
+		}
+		for (i = 0; i < nr_idle_states; i++)
+			pnv_idle_states[i].psscr_mask = temp_u64[i];
+	}
+
+	/*
+	 * power8 specific properties ibm,cpu-idle-state-pmicr-mask and
+	 * ibm,cpu-idle-state-pmicr-val were never used and there is no
+	 * plan to use it in near future. Hence, not parsing these properties
+	 */
+
+	if (of_property_read_string_array(np, "ibm,cpu-idle-state-names",
+					  temp_string, nr_idle_states) < 0) {
+		pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n");
+		rc = -EINVAL;
+		goto out;
+	}
+	for (i = 0; i < nr_idle_states; i++)
+		strscpy(pnv_idle_states[i].name, temp_string[i],
+			PNV_IDLE_NAME_LEN);
+	nr_pnv_idle_states = nr_idle_states;
+	rc = 0;
+out:
+	kfree(temp_u32);
+	kfree(temp_u64);
+	kfree(temp_string);
+	of_node_put(np);
+	return rc;
+}
+
+static int __init pnv_init_idle_states(void)
+{
+	int cpu;
+	int rc = 0;
+
+	/* Set up PACA fields */
+	for_each_present_cpu(cpu) {
+		struct paca_struct *p = paca_ptrs[cpu];
+
+		p->idle_state = 0;
+		if (cpu == cpu_first_thread_sibling(cpu))
+			p->idle_state = (1 << threads_per_core) - 1;
+
+		if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+			/* P7/P8 nap */
+			p->thread_idle_state = PNV_THREAD_RUNNING;
+		} else if (pvr_version_is(PVR_POWER9)) {
+			/* P9 stop workarounds */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+			p->requested_psscr = 0;
+			atomic_set(&p->dont_stop, 0);
+#endif
+		}
+	}
+
+	/* In case we error out nr_pnv_idle_states will be zero */
+	nr_pnv_idle_states = 0;
+	supported_cpuidle_states = 0;
+
+	if (cpuidle_disable != IDLE_NO_OVERRIDE)
+		goto out;
+	rc = pnv_parse_cpuidle_dt();
+	if (rc)
+		return rc;
+	pnv_probe_idle_states();
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+		if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
+			power7_fastsleep_workaround_entry = false;
+			power7_fastsleep_workaround_exit = false;
+		} else {
+			struct device *dev_root;
+			/*
+			 * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
+			 * workaround is needed to use fastsleep. Provide sysfs
+			 * control to choose how this workaround has to be
+			 * applied.
+			 */
+			dev_root = bus_get_dev_root(&cpu_subsys);
+			if (dev_root) {
+				device_create_file(dev_root,
+						   &dev_attr_fastsleep_workaround_applyonce);
+				put_device(dev_root);
+			}
+		}
+
+		update_subcore_sibling_mask();
+
+		if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) {
+			ppc_md.power_save = power7_idle;
+			power7_offline_type = PNV_THREAD_NAP;
+		}
+
+		if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) &&
+			   (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT))
+			power7_offline_type = PNV_THREAD_WINKLE;
+		else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) ||
+			   (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1))
+			power7_offline_type = PNV_THREAD_SLEEP;
+	}
+
+	if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
+		if (pnv_save_sprs_for_deep_states())
+			pnv_disable_deep_states();
+	}
+
+out:
+	return 0;
+}
+machine_subsys_initcall(powernv, pnv_init_idle_states);
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
new file mode 100644
index 0000000000..877720c645
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) IBM Corporation, 2014, 2017
+ * Anton Blanchard, Rashmica Gupta.
+ */
+
+#define pr_fmt(fmt) "memtrace: " fmt
+
+#include <linux/bitops.h>
+#include <linux/string.h>
+#include <linux/memblock.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/memory.h>
+#include <linux/memory_hotplug.h>
+#include <linux/numa.h>
+#include <asm/machdep.h>
+#include <asm/cacheflush.h>
+
+/* This enables us to keep track of the memory removed from each node. */
+struct memtrace_entry {
+	void *mem;
+	u64 start;
+	u64 size;
+	u32 nid;
+	struct dentry *dir;
+	char name[16];
+};
+
+static DEFINE_MUTEX(memtrace_mutex);
+static u64 memtrace_size;
+
+static struct memtrace_entry *memtrace_array;
+static unsigned int memtrace_array_nr;
+
+
+static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
+			     size_t count, loff_t *ppos)
+{
+	struct memtrace_entry *ent = filp->private_data;
+
+	return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size);
+}
+
+static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct memtrace_entry *ent = filp->private_data;
+
+	if (ent->size < vma->vm_end - vma->vm_start)
+		return -EINVAL;
+
+	if (vma->vm_pgoff << PAGE_SHIFT >= ent->size)
+		return -EINVAL;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	return remap_pfn_range(vma, vma->vm_start, PHYS_PFN(ent->start) + vma->vm_pgoff,
+			       vma->vm_end - vma->vm_start, vma->vm_page_prot);
+}
+
+static const struct file_operations memtrace_fops = {
+	.llseek = default_llseek,
+	.read	= memtrace_read,
+	.open	= simple_open,
+	.mmap   = memtrace_mmap,
+};
+
+#define FLUSH_CHUNK_SIZE SZ_1G
+/**
+ * flush_dcache_range_chunked(): Write any modified data cache blocks out to
+ * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE
+ * Does not invalidate the corresponding instruction cache blocks.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ * @chunk: the max size of the chunks
+ */
+static void flush_dcache_range_chunked(unsigned long start, unsigned long stop,
+				       unsigned long chunk)
+{
+	unsigned long i;
+
+	for (i = start; i < stop; i += chunk) {
+		flush_dcache_range(i, min(stop, i + chunk));
+		cond_resched();
+	}
+}
+
+static void memtrace_clear_range(unsigned long start_pfn,
+				 unsigned long nr_pages)
+{
+	unsigned long pfn;
+
+	/* As HIGHMEM does not apply, use clear_page() directly. */
+	for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
+		if (IS_ALIGNED(pfn, PAGES_PER_SECTION))
+			cond_resched();
+		clear_page(__va(PFN_PHYS(pfn)));
+	}
+	/*
+	 * Before we go ahead and use this range as cache inhibited range
+	 * flush the cache.
+	 */
+	flush_dcache_range_chunked((unsigned long)pfn_to_kaddr(start_pfn),
+				   (unsigned long)pfn_to_kaddr(start_pfn + nr_pages),
+				   FLUSH_CHUNK_SIZE);
+}
+
+static u64 memtrace_alloc_node(u32 nid, u64 size)
+{
+	const unsigned long nr_pages = PHYS_PFN(size);
+	unsigned long pfn, start_pfn;
+	struct page *page;
+
+	/*
+	 * Trace memory needs to be aligned to the size, which is guaranteed
+	 * by alloc_contig_pages().
+	 */
+	page = alloc_contig_pages(nr_pages, GFP_KERNEL | __GFP_THISNODE |
+				  __GFP_NOWARN, nid, NULL);
+	if (!page)
+		return 0;
+	start_pfn = page_to_pfn(page);
+
+	/*
+	 * Clear the range while we still have a linear mapping.
+	 *
+	 * TODO: use __GFP_ZERO with alloc_contig_pages() once supported.
+	 */
+	memtrace_clear_range(start_pfn, nr_pages);
+
+	/*
+	 * Set pages PageOffline(), to indicate that nobody (e.g., hibernation,
+	 * dumping, ...) should be touching these pages.
+	 */
+	for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++)
+		__SetPageOffline(pfn_to_page(pfn));
+
+	arch_remove_linear_mapping(PFN_PHYS(start_pfn), size);
+
+	return PFN_PHYS(start_pfn);
+}
+
+static int memtrace_init_regions_runtime(u64 size)
+{
+	u32 nid;
+	u64 m;
+
+	memtrace_array = kcalloc(num_online_nodes(),
+				sizeof(struct memtrace_entry), GFP_KERNEL);
+	if (!memtrace_array) {
+		pr_err("Failed to allocate memtrace_array\n");
+		return -EINVAL;
+	}
+
+	for_each_online_node(nid) {
+		m = memtrace_alloc_node(nid, size);
+
+		/*
+		 * A node might not have any local memory, so warn but
+		 * continue on.
+		 */
+		if (!m) {
+			pr_err("Failed to allocate trace memory on node %d\n", nid);
+			continue;
+		}
+
+		pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m);
+
+		memtrace_array[memtrace_array_nr].start = m;
+		memtrace_array[memtrace_array_nr].size = size;
+		memtrace_array[memtrace_array_nr].nid = nid;
+		memtrace_array_nr++;
+	}
+
+	return 0;
+}
+
+static struct dentry *memtrace_debugfs_dir;
+
+static int memtrace_init_debugfs(void)
+{
+	int ret = 0;
+	int i;
+
+	for (i = 0; i < memtrace_array_nr; i++) {
+		struct dentry *dir;
+		struct memtrace_entry *ent = &memtrace_array[i];
+
+		ent->mem = ioremap(ent->start, ent->size);
+		/* Warn but continue on */
+		if (!ent->mem) {
+			pr_err("Failed to map trace memory at 0x%llx\n",
+				 ent->start);
+			ret = -1;
+			continue;
+		}
+
+		snprintf(ent->name, 16, "%08x", ent->nid);
+		dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir);
+
+		ent->dir = dir;
+		debugfs_create_file_unsafe("trace", 0600, dir, ent, &memtrace_fops);
+		debugfs_create_x64("start", 0400, dir, &ent->start);
+		debugfs_create_x64("size", 0400, dir, &ent->size);
+	}
+
+	return ret;
+}
+
+static int memtrace_free(int nid, u64 start, u64 size)
+{
+	struct mhp_params params = { .pgprot = PAGE_KERNEL };
+	const unsigned long nr_pages = PHYS_PFN(size);
+	const unsigned long start_pfn = PHYS_PFN(start);
+	unsigned long pfn;
+	int ret;
+
+	ret = arch_create_linear_mapping(nid, start, size, &params);
+	if (ret)
+		return ret;
+
+	for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++)
+		__ClearPageOffline(pfn_to_page(pfn));
+
+	free_contig_range(start_pfn, nr_pages);
+	return 0;
+}
+
+/*
+ * Iterate through the chunks of memory we allocated and attempt to expose
+ * them back to the kernel.
+ */
+static int memtrace_free_regions(void)
+{
+	int i, ret = 0;
+	struct memtrace_entry *ent;
+
+	for (i = memtrace_array_nr - 1; i >= 0; i--) {
+		ent = &memtrace_array[i];
+
+		/* We have freed this chunk previously */
+		if (ent->nid == NUMA_NO_NODE)
+			continue;
+
+		/* Remove from io mappings */
+		if (ent->mem) {
+			iounmap(ent->mem);
+			ent->mem = 0;
+		}
+
+		if (memtrace_free(ent->nid, ent->start, ent->size)) {
+			pr_err("Failed to free trace memory on node %d\n",
+				ent->nid);
+			ret += 1;
+			continue;
+		}
+
+		/*
+		 * Memory was freed successfully so clean up references to it
+		 * so on reentry we can tell that this chunk was freed.
+		 */
+		debugfs_remove_recursive(ent->dir);
+		pr_info("Freed trace memory back on node %d\n", ent->nid);
+		ent->size = ent->start = ent->nid = NUMA_NO_NODE;
+	}
+	if (ret)
+		return ret;
+
+	/* If all chunks of memory were freed successfully, reset globals */
+	kfree(memtrace_array);
+	memtrace_array = NULL;
+	memtrace_size = 0;
+	memtrace_array_nr = 0;
+	return 0;
+}
+
+static int memtrace_enable_set(void *data, u64 val)
+{
+	int rc = -EAGAIN;
+	u64 bytes;
+
+	/*
+	 * Don't attempt to do anything if size isn't aligned to a memory
+	 * block or equal to zero.
+	 */
+	bytes = memory_block_size_bytes();
+	if (val & (bytes - 1)) {
+		pr_err("Value must be aligned with 0x%llx\n", bytes);
+		return -EINVAL;
+	}
+
+	mutex_lock(&memtrace_mutex);
+
+	/* Free all previously allocated memory. */
+	if (memtrace_size && memtrace_free_regions())
+		goto out_unlock;
+
+	if (!val) {
+		rc = 0;
+		goto out_unlock;
+	}
+
+	/* Allocate memory. */
+	if (memtrace_init_regions_runtime(val))
+		goto out_unlock;
+
+	if (memtrace_init_debugfs())
+		goto out_unlock;
+
+	memtrace_size = val;
+	rc = 0;
+out_unlock:
+	mutex_unlock(&memtrace_mutex);
+	return rc;
+}
+
+static int memtrace_enable_get(void *data, u64 *val)
+{
+	*val = memtrace_size;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get,
+					memtrace_enable_set, "0x%016llx\n");
+
+static int memtrace_init(void)
+{
+	memtrace_debugfs_dir = debugfs_create_dir("memtrace",
+						  arch_debugfs_dir);
+
+	debugfs_create_file("enable", 0600, memtrace_debugfs_dir,
+			    NULL, &memtrace_init_fops);
+
+	return 0;
+}
+machine_device_initcall(powernv, memtrace_init);
diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
new file mode 100644
index 0000000000..64a9c7125c
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -0,0 +1,598 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <asm/pnv-ocxl.h>
+#include <asm/opal.h>
+#include <misc/ocxl-config.h>
+#include "pci.h"
+
+#define PNV_OCXL_TL_P9_RECV_CAP		0x000000000000000Full
+#define PNV_OCXL_ACTAG_MAX		64
+/* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */
+#define PNV_OCXL_PASID_BITS		15
+#define PNV_OCXL_PASID_MAX		((1 << PNV_OCXL_PASID_BITS) - 1)
+
+#define AFU_PRESENT (1 << 31)
+#define AFU_INDEX_MASK 0x3F000000
+#define AFU_INDEX_SHIFT 24
+#define ACTAG_MASK 0xFFF
+
+
+struct actag_range {
+	u16 start;
+	u16 count;
+};
+
+struct npu_link {
+	struct list_head list;
+	int domain;
+	int bus;
+	int dev;
+	u16 fn_desired_actags[8];
+	struct actag_range fn_actags[8];
+	bool assignment_done;
+};
+static struct list_head links_list = LIST_HEAD_INIT(links_list);
+static DEFINE_MUTEX(links_list_lock);
+
+
+/*
+ * opencapi actags handling:
+ *
+ * When sending commands, the opencapi device references the memory
+ * context it's targeting with an 'actag', which is really an alias
+ * for a (BDF, pasid) combination. When it receives a command, the NPU
+ * must do a lookup of the actag to identify the memory context. The
+ * hardware supports a finite number of actags per link (64 for
+ * POWER9).
+ *
+ * The device can carry multiple functions, and each function can have
+ * multiple AFUs. Each AFU advertises in its config space the number
+ * of desired actags. The host must configure in the config space of
+ * the AFU how many actags the AFU is really allowed to use (which can
+ * be less than what the AFU desires).
+ *
+ * When a PCI function is probed by the driver, it has no visibility
+ * about the other PCI functions and how many actags they'd like,
+ * which makes it impossible to distribute actags fairly among AFUs.
+ *
+ * Unfortunately, the only way to know how many actags a function
+ * desires is by looking at the data for each AFU in the config space
+ * and add them up. Similarly, the only way to know how many actags
+ * all the functions of the physical device desire is by adding the
+ * previously computed function counts. Then we can match that against
+ * what the hardware supports.
+ *
+ * To get a comprehensive view, we use a 'pci fixup': at the end of
+ * PCI enumeration, each function counts how many actags its AFUs
+ * desire and we save it in a 'npu_link' structure, shared between all
+ * the PCI functions of a same device. Therefore, when the first
+ * function is probed by the driver, we can get an idea of the total
+ * count of desired actags for the device, and assign the actags to
+ * the AFUs, by pro-rating if needed.
+ */
+
+static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos)
+{
+	int vsec = pos;
+	u16 vendor, id;
+
+	while ((vsec = pci_find_next_ext_capability(dev, vsec,
+						    OCXL_EXT_CAP_ID_DVSEC))) {
+		pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
+				&vendor);
+		pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
+		if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
+			return vsec;
+	}
+	return 0;
+}
+
+static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
+{
+	int vsec = 0;
+	u8 idx;
+
+	while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID,
+					   vsec))) {
+		pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
+				&idx);
+		if (idx == afu_idx)
+			return vsec;
+	}
+	return 0;
+}
+
+static int get_max_afu_index(struct pci_dev *dev, int *afu_idx)
+{
+	int pos;
+	u32 val;
+
+	pos = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_IBM,
+					OCXL_DVSEC_FUNC_ID);
+	if (!pos)
+		return -ESRCH;
+
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
+	if (val & AFU_PRESENT)
+		*afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT;
+	else
+		*afu_idx = -1;
+	return 0;
+}
+
+static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag)
+{
+	int pos;
+	u16 actag_sup;
+
+	pos = find_dvsec_afu_ctrl(dev, afu_idx);
+	if (!pos)
+		return -ESRCH;
+
+	pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP,
+			&actag_sup);
+	*actag = actag_sup & ACTAG_MASK;
+	return 0;
+}
+
+static struct npu_link *find_link(struct pci_dev *dev)
+{
+	struct npu_link *link;
+
+	list_for_each_entry(link, &links_list, list) {
+		/* The functions of a device all share the same link */
+		if (link->domain == pci_domain_nr(dev->bus) &&
+			link->bus == dev->bus->number &&
+			link->dev == PCI_SLOT(dev->devfn)) {
+			return link;
+		}
+	}
+
+	/* link doesn't exist yet. Allocate one */
+	link = kzalloc(sizeof(struct npu_link), GFP_KERNEL);
+	if (!link)
+		return NULL;
+	link->domain = pci_domain_nr(dev->bus);
+	link->bus = dev->bus->number;
+	link->dev = PCI_SLOT(dev->devfn);
+	list_add(&link->list, &links_list);
+	return link;
+}
+
+static void pnv_ocxl_fixup_actag(struct pci_dev *dev)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	struct npu_link *link;
+	int rc, afu_idx = -1, i, actag;
+
+	if (!machine_is(powernv))
+		return;
+
+	if (phb->type != PNV_PHB_NPU_OCAPI)
+		return;
+
+	mutex_lock(&links_list_lock);
+
+	link = find_link(dev);
+	if (!link) {
+		dev_warn(&dev->dev, "couldn't update actag information\n");
+		mutex_unlock(&links_list_lock);
+		return;
+	}
+
+	/*
+	 * Check how many actags are desired for the AFUs under that
+	 * function and add it to the count for the link
+	 */
+	rc = get_max_afu_index(dev, &afu_idx);
+	if (rc) {
+		/* Most likely an invalid config space */
+		dev_dbg(&dev->dev, "couldn't find AFU information\n");
+		afu_idx = -1;
+	}
+
+	link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0;
+	for (i = 0; i <= afu_idx; i++) {
+		/*
+		 * AFU index 'holes' are allowed. So don't fail if we
+		 * can't read the actag info for an index
+		 */
+		rc = get_actag_count(dev, i, &actag);
+		if (rc)
+			continue;
+		link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag;
+	}
+	dev_dbg(&dev->dev, "total actags for function: %d\n",
+		link->fn_desired_actags[PCI_FUNC(dev->devfn)]);
+
+	mutex_unlock(&links_list_lock);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag);
+
+static u16 assign_fn_actags(u16 desired, u16 total)
+{
+	u16 count;
+
+	if (total <= PNV_OCXL_ACTAG_MAX)
+		count = desired;
+	else
+		count = PNV_OCXL_ACTAG_MAX * desired / total;
+
+	return count;
+}
+
+static void assign_actags(struct npu_link *link)
+{
+	u16 actag_count, range_start = 0, total_desired = 0;
+	int i;
+
+	for (i = 0; i < 8; i++)
+		total_desired += link->fn_desired_actags[i];
+
+	for (i = 0; i < 8; i++) {
+		if (link->fn_desired_actags[i]) {
+			actag_count = assign_fn_actags(
+				link->fn_desired_actags[i],
+				total_desired);
+			link->fn_actags[i].start = range_start;
+			link->fn_actags[i].count = actag_count;
+			range_start += actag_count;
+			WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX);
+		}
+		pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n",
+			link->domain, link->bus, link->dev, i,
+			link->fn_actags[i].start, link->fn_actags[i].count,
+			link->fn_desired_actags[i]);
+	}
+	link->assignment_done = true;
+}
+
+int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
+		u16 *supported)
+{
+	struct npu_link *link;
+
+	mutex_lock(&links_list_lock);
+
+	link = find_link(dev);
+	if (!link) {
+		dev_err(&dev->dev, "actag information not found\n");
+		mutex_unlock(&links_list_lock);
+		return -ENODEV;
+	}
+	/*
+	 * On p9, we only have 64 actags per link, so they must be
+	 * shared by all the functions of the same adapter. We counted
+	 * the desired actag counts during PCI enumeration, so that we
+	 * can allocate a pro-rated number of actags to each function.
+	 */
+	if (!link->assignment_done)
+		assign_actags(link);
+
+	*base      = link->fn_actags[PCI_FUNC(dev->devfn)].start;
+	*enabled   = link->fn_actags[PCI_FUNC(dev->devfn)].count;
+	*supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)];
+
+	mutex_unlock(&links_list_lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag);
+
+int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count)
+{
+	struct npu_link *link;
+	int i, rc = -EINVAL;
+
+	/*
+	 * The number of PASIDs (process address space ID) which can
+	 * be used by a function depends on how many functions exist
+	 * on the device. The NPU needs to be configured to know how
+	 * many bits are available to PASIDs and how many are to be
+	 * used by the function BDF identifier.
+	 *
+	 * We only support one AFU-carrying function for now.
+	 */
+	mutex_lock(&links_list_lock);
+
+	link = find_link(dev);
+	if (!link) {
+		dev_err(&dev->dev, "actag information not found\n");
+		mutex_unlock(&links_list_lock);
+		return -ENODEV;
+	}
+
+	for (i = 0; i < 8; i++)
+		if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) {
+			*count = PNV_OCXL_PASID_MAX;
+			rc = 0;
+			break;
+		}
+
+	mutex_unlock(&links_list_lock);
+	dev_dbg(&dev->dev, "%d PASIDs available for function\n",
+		rc ? 0 : *count);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count);
+
+static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf)
+{
+	int shift, idx;
+
+	WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE);
+	idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2;
+	shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2));
+	buf[idx] |= rate << shift;
+}
+
+int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
+			char *rate_buf, int rate_buf_size)
+{
+	if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
+		return -EINVAL;
+	/*
+	 * The TL capabilities are a characteristic of the NPU, so
+	 * we go with hard-coded values.
+	 *
+	 * The receiving rate of each template is encoded on 4 bits.
+	 *
+	 * On P9:
+	 * - templates 0 -> 3 are supported
+	 * - templates 0, 1 and 3 have a 0 receiving rate
+	 * - template 2 has receiving rate of 1 (extra cycle)
+	 */
+	memset(rate_buf, 0, rate_buf_size);
+	set_templ_rate(2, 1, rate_buf);
+	*cap = PNV_OCXL_TL_P9_RECV_CAP;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);
+
+int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
+			uint64_t rate_buf_phys, int rate_buf_size)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	int rc;
+
+	if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
+		return -EINVAL;
+
+	rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap,
+			rate_buf_phys, rate_buf_size);
+	if (rc) {
+		dev_err(&dev->dev, "Can't configure host TL: %d\n", rc);
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);
+
+int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq)
+{
+	int rc;
+
+	rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq);
+	if (rc) {
+		dev_err(&dev->dev,
+			"Can't get translation interrupt for device\n");
+		return rc;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq);
+
+void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
+			void __iomem *tfc, void __iomem *pe_handle)
+{
+	iounmap(dsisr);
+	iounmap(dar);
+	iounmap(tfc);
+	iounmap(pe_handle);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs);
+
+int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
+			void __iomem **dar, void __iomem **tfc,
+			void __iomem **pe_handle)
+{
+	u64 reg;
+	int i, j, rc = 0;
+	void __iomem *regs[4];
+
+	/*
+	 * opal stores the mmio addresses of the DSISR, DAR, TFC and
+	 * PE_HANDLE registers in a device tree property, in that
+	 * order
+	 */
+	for (i = 0; i < 4; i++) {
+		rc = of_property_read_u64_index(dev->dev.of_node,
+						"ibm,opal-xsl-mmio", i, &reg);
+		if (rc)
+			break;
+		regs[i] = ioremap(reg, 8);
+		if (!regs[i]) {
+			rc = -EINVAL;
+			break;
+		}
+	}
+	if (rc) {
+		dev_err(&dev->dev, "Can't map translation mmio registers\n");
+		for (j = i - 1; j >= 0; j--)
+			iounmap(regs[j]);
+	} else {
+		*dsisr = regs[0];
+		*dar = regs[1];
+		*tfc = regs[2];
+		*pe_handle = regs[3];
+	}
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs);
+
+struct spa_data {
+	u64 phb_opal_id;
+	u32 bdfn;
+};
+
+int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
+		void **platform_data)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	struct spa_data *data;
+	u32 bdfn;
+	int rc;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	bdfn = pci_dev_id(dev);
+	rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem),
+				PE_mask);
+	if (rc) {
+		dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc);
+		kfree(data);
+		return rc;
+	}
+	data->phb_opal_id = phb->opal_id;
+	data->bdfn = bdfn;
+	*platform_data = (void *) data;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup);
+
+void pnv_ocxl_spa_release(void *platform_data)
+{
+	struct spa_data *data = (struct spa_data *) platform_data;
+	int rc;
+
+	rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0);
+	WARN_ON(rc);
+	kfree(data);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
+
+int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
+{
+	struct spa_data *data = (struct spa_data *) platform_data;
+
+	return opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
+
+int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,
+		      uint64_t lpcr, void __iomem **arva)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	u64 mmio_atsd;
+	int rc;
+
+	/* ATSD physical address.
+	 * ATSD LAUNCH register: write access initiates a shoot down to
+	 * initiate the TLB Invalidate command.
+	 */
+	rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd",
+					0, &mmio_atsd);
+	if (rc) {
+		dev_info(&dev->dev, "No available ATSD found\n");
+		return rc;
+	}
+
+	/* Assign a register set to a Logical Partition and MMIO ATSD
+	 * LPARID register to the required value.
+	 */
+	rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev),
+			       lparid, lpcr);
+	if (rc) {
+		dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc);
+		return rc;
+	}
+
+	*arva = ioremap(mmio_atsd, 24);
+	if (!(*arva)) {
+		dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd);
+		rc = -ENOMEM;
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar);
+
+void pnv_ocxl_unmap_lpar(void __iomem *arva)
+{
+	iounmap(arva);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar);
+
+void pnv_ocxl_tlb_invalidate(void __iomem *arva,
+			     unsigned long pid,
+			     unsigned long addr,
+			     unsigned long page_size)
+{
+	unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT);
+	u64 val = 0ull;
+	int pend;
+	u8 size;
+
+	if (!(arva))
+		return;
+
+	if (addr) {
+		/* load Abbreviated Virtual Address register with
+		 * the necessary value
+		 */
+		val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51));
+		out_be64(arva + PNV_OCXL_ATSD_AVA, val);
+	}
+
+	/* Write access initiates a shoot down to initiate the
+	 * TLB Invalidate command
+	 */
+	val = PNV_OCXL_ATSD_LNCH_R;
+	val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10);
+	if (addr)
+		val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00);
+	else {
+		val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01);
+		val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON;
+	}
+	val |= PNV_OCXL_ATSD_LNCH_PRS;
+	/* Actual Page Size to be invalidated
+	 * 000 4KB
+	 * 101 64KB
+	 * 001 2MB
+	 * 010 1GB
+	 */
+	size = 0b101;
+	if (page_size == 0x1000)
+		size = 0b000;
+	if (page_size == 0x200000)
+		size = 0b001;
+	if (page_size == 0x40000000)
+		size = 0b010;
+	val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size);
+	val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid);
+	out_be64(arva + PNV_OCXL_ATSD_LNCH, val);
+
+	/* Poll the ATSD status register to determine when the
+	 * TLB Invalidate has been completed.
+	 */
+	val = in_be64(arva + PNV_OCXL_ATSD_STAT);
+	pend = val >> 63;
+
+	while (pend) {
+		if (time_after_eq(jiffies, timeout)) {
+			pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n",
+			       __func__, val, pid);
+			return;
+		}
+		cpu_relax();
+		val = in_be64(arva + PNV_OCXL_ATSD_STAT);
+		pend = val >> 63;
+	}
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate);
diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c
new file mode 100644
index 0000000000..c094fdf582
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-async.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL asynchronous completion interfaces
+ *
+ * Copyright 2013-2017 IBM Corp.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/gfp.h>
+#include <linux/of.h>
+#include <asm/machdep.h>
+#include <asm/opal.h>
+
+enum opal_async_token_state {
+	ASYNC_TOKEN_UNALLOCATED = 0,
+	ASYNC_TOKEN_ALLOCATED,
+	ASYNC_TOKEN_DISPATCHED,
+	ASYNC_TOKEN_ABANDONED,
+	ASYNC_TOKEN_COMPLETED
+};
+
+struct opal_async_token {
+	enum opal_async_token_state state;
+	struct opal_msg response;
+};
+
+static DECLARE_WAIT_QUEUE_HEAD(opal_async_wait);
+static DEFINE_SPINLOCK(opal_async_comp_lock);
+static struct semaphore opal_async_sem;
+static unsigned int opal_max_async_tokens;
+static struct opal_async_token *opal_async_tokens;
+
+static int __opal_async_get_token(void)
+{
+	unsigned long flags;
+	int i, token = -EBUSY;
+
+	spin_lock_irqsave(&opal_async_comp_lock, flags);
+
+	for (i = 0; i < opal_max_async_tokens; i++) {
+		if (opal_async_tokens[i].state == ASYNC_TOKEN_UNALLOCATED) {
+			opal_async_tokens[i].state = ASYNC_TOKEN_ALLOCATED;
+			token = i;
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+	return token;
+}
+
+/*
+ * Note: If the returned token is used in an opal call and opal returns
+ * OPAL_ASYNC_COMPLETION you MUST call one of opal_async_wait_response() or
+ * opal_async_wait_response_interruptible() at least once before calling another
+ * opal_async_* function
+ */
+int opal_async_get_token_interruptible(void)
+{
+	int token;
+
+	/* Wait until a token is available */
+	if (down_interruptible(&opal_async_sem))
+		return -ERESTARTSYS;
+
+	token = __opal_async_get_token();
+	if (token < 0)
+		up(&opal_async_sem);
+
+	return token;
+}
+EXPORT_SYMBOL_GPL(opal_async_get_token_interruptible);
+
+static int __opal_async_release_token(int token)
+{
+	unsigned long flags;
+	int rc;
+
+	if (token < 0 || token >= opal_max_async_tokens) {
+		pr_err("%s: Passed token is out of range, token %d\n",
+				__func__, token);
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&opal_async_comp_lock, flags);
+	switch (opal_async_tokens[token].state) {
+	case ASYNC_TOKEN_COMPLETED:
+	case ASYNC_TOKEN_ALLOCATED:
+		opal_async_tokens[token].state = ASYNC_TOKEN_UNALLOCATED;
+		rc = 0;
+		break;
+	/*
+	 * DISPATCHED and ABANDONED tokens must wait for OPAL to respond.
+	 * Mark a DISPATCHED token as ABANDONED so that the response handling
+	 * code knows no one cares and that it can free it then.
+	 */
+	case ASYNC_TOKEN_DISPATCHED:
+		opal_async_tokens[token].state = ASYNC_TOKEN_ABANDONED;
+		fallthrough;
+	default:
+		rc = 1;
+	}
+	spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+
+	return rc;
+}
+
+int opal_async_release_token(int token)
+{
+	int ret;
+
+	ret = __opal_async_release_token(token);
+	if (!ret)
+		up(&opal_async_sem);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(opal_async_release_token);
+
+int opal_async_wait_response(uint64_t token, struct opal_msg *msg)
+{
+	if (token >= opal_max_async_tokens) {
+		pr_err("%s: Invalid token passed\n", __func__);
+		return -EINVAL;
+	}
+
+	if (!msg) {
+		pr_err("%s: Invalid message pointer passed\n", __func__);
+		return -EINVAL;
+	}
+
+	/*
+	 * There is no need to mark the token as dispatched, wait_event()
+	 * will block until the token completes.
+	 *
+	 * Wakeup the poller before we wait for events to speed things
+	 * up on platforms or simulators where the interrupts aren't
+	 * functional.
+	 */
+	opal_wake_poller();
+	wait_event(opal_async_wait, opal_async_tokens[token].state
+			== ASYNC_TOKEN_COMPLETED);
+	memcpy(msg, &opal_async_tokens[token].response, sizeof(*msg));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(opal_async_wait_response);
+
+int opal_async_wait_response_interruptible(uint64_t token, struct opal_msg *msg)
+{
+	unsigned long flags;
+	int ret;
+
+	if (token >= opal_max_async_tokens) {
+		pr_err("%s: Invalid token passed\n", __func__);
+		return -EINVAL;
+	}
+
+	if (!msg) {
+		pr_err("%s: Invalid message pointer passed\n", __func__);
+		return -EINVAL;
+	}
+
+	/*
+	 * The first time this gets called we mark the token as DISPATCHED
+	 * so that if wait_event_interruptible() returns not zero and the
+	 * caller frees the token, we know not to actually free the token
+	 * until the response comes.
+	 *
+	 * Only change if the token is ALLOCATED - it may have been
+	 * completed even before the caller gets around to calling this
+	 * the first time.
+	 *
+	 * There is also a dirty great comment at the token allocation
+	 * function that if the opal call returns OPAL_ASYNC_COMPLETION to
+	 * the caller then the caller *must* call this or the not
+	 * interruptible version before doing anything else with the
+	 * token.
+	 */
+	if (opal_async_tokens[token].state == ASYNC_TOKEN_ALLOCATED) {
+		spin_lock_irqsave(&opal_async_comp_lock, flags);
+		if (opal_async_tokens[token].state == ASYNC_TOKEN_ALLOCATED)
+			opal_async_tokens[token].state = ASYNC_TOKEN_DISPATCHED;
+		spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+	}
+
+	/*
+	 * Wakeup the poller before we wait for events to speed things
+	 * up on platforms or simulators where the interrupts aren't
+	 * functional.
+	 */
+	opal_wake_poller();
+	ret = wait_event_interruptible(opal_async_wait,
+			opal_async_tokens[token].state ==
+			ASYNC_TOKEN_COMPLETED);
+	if (!ret)
+		memcpy(msg, &opal_async_tokens[token].response, sizeof(*msg));
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(opal_async_wait_response_interruptible);
+
+/* Called from interrupt context */
+static int opal_async_comp_event(struct notifier_block *nb,
+		unsigned long msg_type, void *msg)
+{
+	struct opal_msg *comp_msg = msg;
+	enum opal_async_token_state state;
+	unsigned long flags;
+	uint64_t token;
+
+	if (msg_type != OPAL_MSG_ASYNC_COMP)
+		return 0;
+
+	token = be64_to_cpu(comp_msg->params[0]);
+	spin_lock_irqsave(&opal_async_comp_lock, flags);
+	state = opal_async_tokens[token].state;
+	opal_async_tokens[token].state = ASYNC_TOKEN_COMPLETED;
+	spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+
+	if (state == ASYNC_TOKEN_ABANDONED) {
+		/* Free the token, no one else will */
+		opal_async_release_token(token);
+		return 0;
+	}
+	memcpy(&opal_async_tokens[token].response, comp_msg, sizeof(*comp_msg));
+	wake_up(&opal_async_wait);
+
+	return 0;
+}
+
+static struct notifier_block opal_async_comp_nb = {
+		.notifier_call	= opal_async_comp_event,
+		.next		= NULL,
+		.priority	= 0,
+};
+
+int __init opal_async_comp_init(void)
+{
+	struct device_node *opal_node;
+	const __be32 *async;
+	int err;
+
+	opal_node = of_find_node_by_path("/ibm,opal");
+	if (!opal_node) {
+		pr_err("%s: Opal node not found\n", __func__);
+		err = -ENOENT;
+		goto out;
+	}
+
+	async = of_get_property(opal_node, "opal-msg-async-num", NULL);
+	if (!async) {
+		pr_err("%s: %pOF has no opal-msg-async-num\n",
+				__func__, opal_node);
+		err = -ENOENT;
+		goto out_opal_node;
+	}
+
+	opal_max_async_tokens = be32_to_cpup(async);
+	opal_async_tokens = kcalloc(opal_max_async_tokens,
+			sizeof(*opal_async_tokens), GFP_KERNEL);
+	if (!opal_async_tokens) {
+		err = -ENOMEM;
+		goto out_opal_node;
+	}
+
+	err = opal_message_notifier_register(OPAL_MSG_ASYNC_COMP,
+			&opal_async_comp_nb);
+	if (err) {
+		pr_err("%s: Can't register OPAL event notifier (%d)\n",
+				__func__, err);
+		kfree(opal_async_tokens);
+		goto out_opal_node;
+	}
+
+	sema_init(&opal_async_sem, opal_max_async_tokens);
+
+out_opal_node:
+	of_node_put(opal_node);
+out:
+	return err;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
new file mode 100644
index 0000000000..021b0ec29e
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/percpu.h>
+#include <linux/jump_label.h>
+#include <asm/interrupt.h>
+#include <asm/opal-api.h>
+#include <asm/trace.h>
+#include <asm/asm-prototypes.h>
+
+#ifdef CONFIG_TRACEPOINTS
+/*
+ * Since the tracing code might execute OPAL calls we need to guard against
+ * recursion.
+ */
+static DEFINE_PER_CPU(unsigned int, opal_trace_depth);
+
+static void __trace_opal_entry(s64 a0, s64 a1, s64 a2, s64 a3,
+			       s64 a4, s64 a5, s64 a6, s64 a7,
+			       unsigned long opcode)
+{
+	unsigned int *depth;
+	unsigned long args[8];
+
+	depth = this_cpu_ptr(&opal_trace_depth);
+
+	if (*depth)
+		return;
+
+	args[0] = a0;
+	args[1] = a1;
+	args[2] = a2;
+	args[3] = a3;
+	args[4] = a4;
+	args[5] = a5;
+	args[6] = a6;
+	args[7] = a7;
+
+	(*depth)++;
+	trace_opal_entry(opcode, &args[0]);
+	(*depth)--;
+}
+
+static void __trace_opal_exit(unsigned long opcode, unsigned long retval)
+{
+	unsigned int *depth;
+
+	depth = this_cpu_ptr(&opal_trace_depth);
+
+	if (*depth)
+		return;
+
+	(*depth)++;
+	trace_opal_exit(opcode, retval);
+	(*depth)--;
+}
+
+static DEFINE_STATIC_KEY_FALSE(opal_tracepoint_key);
+
+int opal_tracepoint_regfunc(void)
+{
+	static_branch_inc(&opal_tracepoint_key);
+	return 0;
+}
+
+void opal_tracepoint_unregfunc(void)
+{
+	static_branch_dec(&opal_tracepoint_key);
+}
+
+static s64 __opal_call_trace(s64 a0, s64 a1, s64 a2, s64 a3,
+			     s64 a4, s64 a5, s64 a6, s64 a7,
+			      unsigned long opcode, unsigned long msr)
+{
+	s64 ret;
+
+	__trace_opal_entry(a0, a1, a2, a3, a4, a5, a6, a7, opcode);
+	ret = __opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr);
+	__trace_opal_exit(opcode, ret);
+
+	return ret;
+}
+
+#define DO_TRACE (static_branch_unlikely(&opal_tracepoint_key))
+
+#else /* CONFIG_TRACEPOINTS */
+
+static s64 __opal_call_trace(s64 a0, s64 a1, s64 a2, s64 a3,
+			     s64 a4, s64 a5, s64 a6, s64 a7,
+			      unsigned long opcode, unsigned long msr)
+{
+	return 0;
+}
+
+#define DO_TRACE false
+#endif /* CONFIG_TRACEPOINTS */
+
+static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
+	     int64_t a4, int64_t a5, int64_t a6, int64_t a7, int64_t opcode)
+{
+	unsigned long flags;
+	unsigned long msr = mfmsr();
+	bool mmu = (msr & (MSR_IR|MSR_DR));
+	int64_t ret;
+
+	/* OPAL call / firmware may use SRR and/or HSRR */
+	srr_regs_clobbered();
+
+	msr &= ~MSR_EE;
+
+	if (unlikely(!mmu))
+		return __opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr);
+
+	local_save_flags(flags);
+	hard_irq_disable();
+
+	if (DO_TRACE) {
+		ret = __opal_call_trace(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr);
+	} else {
+		ret = __opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr);
+	}
+
+	local_irq_restore(flags);
+
+	return ret;
+}
+
+#define OPAL_CALL(name, opcode)					\
+int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3,	\
+	     int64_t a4, int64_t a5, int64_t a6, int64_t a7);	\
+int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3,	\
+	     int64_t a4, int64_t a5, int64_t a6, int64_t a7)	\
+{								\
+	return opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode); \
+}
+
+OPAL_CALL(opal_invalid_call,			OPAL_INVALID_CALL);
+OPAL_CALL(opal_console_write,			OPAL_CONSOLE_WRITE);
+OPAL_CALL(opal_console_read,			OPAL_CONSOLE_READ);
+OPAL_CALL(opal_console_write_buffer_space,	OPAL_CONSOLE_WRITE_BUFFER_SPACE);
+OPAL_CALL(opal_rtc_read,			OPAL_RTC_READ);
+OPAL_CALL(opal_rtc_write,			OPAL_RTC_WRITE);
+OPAL_CALL(opal_cec_power_down,			OPAL_CEC_POWER_DOWN);
+OPAL_CALL(opal_cec_reboot,			OPAL_CEC_REBOOT);
+OPAL_CALL(opal_cec_reboot2,			OPAL_CEC_REBOOT2);
+OPAL_CALL(opal_read_nvram,			OPAL_READ_NVRAM);
+OPAL_CALL(opal_write_nvram,			OPAL_WRITE_NVRAM);
+OPAL_CALL(opal_handle_interrupt,		OPAL_HANDLE_INTERRUPT);
+OPAL_CALL(opal_poll_events,			OPAL_POLL_EVENTS);
+OPAL_CALL(opal_pci_set_hub_tce_memory,		OPAL_PCI_SET_HUB_TCE_MEMORY);
+OPAL_CALL(opal_pci_set_phb_tce_memory,		OPAL_PCI_SET_PHB_TCE_MEMORY);
+OPAL_CALL(opal_pci_config_read_byte,		OPAL_PCI_CONFIG_READ_BYTE);
+OPAL_CALL(opal_pci_config_read_half_word,	OPAL_PCI_CONFIG_READ_HALF_WORD);
+OPAL_CALL(opal_pci_config_read_word,		OPAL_PCI_CONFIG_READ_WORD);
+OPAL_CALL(opal_pci_config_write_byte,		OPAL_PCI_CONFIG_WRITE_BYTE);
+OPAL_CALL(opal_pci_config_write_half_word,	OPAL_PCI_CONFIG_WRITE_HALF_WORD);
+OPAL_CALL(opal_pci_config_write_word,		OPAL_PCI_CONFIG_WRITE_WORD);
+OPAL_CALL(opal_set_xive,			OPAL_SET_XIVE);
+OPAL_CALL(opal_get_xive,			OPAL_GET_XIVE);
+OPAL_CALL(opal_register_exception_handler,	OPAL_REGISTER_OPAL_EXCEPTION_HANDLER);
+OPAL_CALL(opal_pci_eeh_freeze_status,		OPAL_PCI_EEH_FREEZE_STATUS);
+OPAL_CALL(opal_pci_eeh_freeze_clear,		OPAL_PCI_EEH_FREEZE_CLEAR);
+OPAL_CALL(opal_pci_eeh_freeze_set,		OPAL_PCI_EEH_FREEZE_SET);
+OPAL_CALL(opal_pci_err_inject,			OPAL_PCI_ERR_INJECT);
+OPAL_CALL(opal_pci_shpc,			OPAL_PCI_SHPC);
+OPAL_CALL(opal_pci_phb_mmio_enable,		OPAL_PCI_PHB_MMIO_ENABLE);
+OPAL_CALL(opal_pci_set_phb_mem_window,		OPAL_PCI_SET_PHB_MEM_WINDOW);
+OPAL_CALL(opal_pci_map_pe_mmio_window,		OPAL_PCI_MAP_PE_MMIO_WINDOW);
+OPAL_CALL(opal_pci_set_phb_table_memory,	OPAL_PCI_SET_PHB_TABLE_MEMORY);
+OPAL_CALL(opal_pci_set_pe,			OPAL_PCI_SET_PE);
+OPAL_CALL(opal_pci_set_peltv,			OPAL_PCI_SET_PELTV);
+OPAL_CALL(opal_pci_get_xive_reissue,		OPAL_PCI_GET_XIVE_REISSUE);
+OPAL_CALL(opal_pci_set_xive_reissue,		OPAL_PCI_SET_XIVE_REISSUE);
+OPAL_CALL(opal_pci_set_xive_pe,			OPAL_PCI_SET_XIVE_PE);
+OPAL_CALL(opal_get_xive_source,			OPAL_GET_XIVE_SOURCE);
+OPAL_CALL(opal_get_msi_32,			OPAL_GET_MSI_32);
+OPAL_CALL(opal_get_msi_64,			OPAL_GET_MSI_64);
+OPAL_CALL(opal_start_cpu,			OPAL_START_CPU);
+OPAL_CALL(opal_query_cpu_status,		OPAL_QUERY_CPU_STATUS);
+OPAL_CALL(opal_write_oppanel,			OPAL_WRITE_OPPANEL);
+OPAL_CALL(opal_pci_map_pe_dma_window,		OPAL_PCI_MAP_PE_DMA_WINDOW);
+OPAL_CALL(opal_pci_map_pe_dma_window_real,	OPAL_PCI_MAP_PE_DMA_WINDOW_REAL);
+OPAL_CALL(opal_pci_reset,			OPAL_PCI_RESET);
+OPAL_CALL(opal_pci_get_hub_diag_data,		OPAL_PCI_GET_HUB_DIAG_DATA);
+OPAL_CALL(opal_pci_get_phb_diag_data,		OPAL_PCI_GET_PHB_DIAG_DATA);
+OPAL_CALL(opal_pci_fence_phb,			OPAL_PCI_FENCE_PHB);
+OPAL_CALL(opal_pci_reinit,			OPAL_PCI_REINIT);
+OPAL_CALL(opal_pci_mask_pe_error,		OPAL_PCI_MASK_PE_ERROR);
+OPAL_CALL(opal_set_slot_led_status,		OPAL_SET_SLOT_LED_STATUS);
+OPAL_CALL(opal_get_epow_status,			OPAL_GET_EPOW_STATUS);
+OPAL_CALL(opal_get_dpo_status,			OPAL_GET_DPO_STATUS);
+OPAL_CALL(opal_set_system_attention_led,	OPAL_SET_SYSTEM_ATTENTION_LED);
+OPAL_CALL(opal_pci_next_error,			OPAL_PCI_NEXT_ERROR);
+OPAL_CALL(opal_pci_poll,			OPAL_PCI_POLL);
+OPAL_CALL(opal_pci_msi_eoi,			OPAL_PCI_MSI_EOI);
+OPAL_CALL(opal_pci_get_phb_diag_data2,		OPAL_PCI_GET_PHB_DIAG_DATA2);
+OPAL_CALL(opal_xscom_read,			OPAL_XSCOM_READ);
+OPAL_CALL(opal_xscom_write,			OPAL_XSCOM_WRITE);
+OPAL_CALL(opal_lpc_read,			OPAL_LPC_READ);
+OPAL_CALL(opal_lpc_write,			OPAL_LPC_WRITE);
+OPAL_CALL(opal_return_cpu,			OPAL_RETURN_CPU);
+OPAL_CALL(opal_reinit_cpus,			OPAL_REINIT_CPUS);
+OPAL_CALL(opal_read_elog,			OPAL_ELOG_READ);
+OPAL_CALL(opal_send_ack_elog,			OPAL_ELOG_ACK);
+OPAL_CALL(opal_get_elog_size,			OPAL_ELOG_SIZE);
+OPAL_CALL(opal_resend_pending_logs,		OPAL_ELOG_RESEND);
+OPAL_CALL(opal_write_elog,			OPAL_ELOG_WRITE);
+OPAL_CALL(opal_validate_flash,			OPAL_FLASH_VALIDATE);
+OPAL_CALL(opal_manage_flash,			OPAL_FLASH_MANAGE);
+OPAL_CALL(opal_update_flash,			OPAL_FLASH_UPDATE);
+OPAL_CALL(opal_resync_timebase,			OPAL_RESYNC_TIMEBASE);
+OPAL_CALL(opal_check_token,			OPAL_CHECK_TOKEN);
+OPAL_CALL(opal_dump_init,			OPAL_DUMP_INIT);
+OPAL_CALL(opal_dump_info,			OPAL_DUMP_INFO);
+OPAL_CALL(opal_dump_info2,			OPAL_DUMP_INFO2);
+OPAL_CALL(opal_dump_read,			OPAL_DUMP_READ);
+OPAL_CALL(opal_dump_ack,			OPAL_DUMP_ACK);
+OPAL_CALL(opal_get_msg,				OPAL_GET_MSG);
+OPAL_CALL(opal_write_oppanel_async,		OPAL_WRITE_OPPANEL_ASYNC);
+OPAL_CALL(opal_check_completion,		OPAL_CHECK_ASYNC_COMPLETION);
+OPAL_CALL(opal_dump_resend_notification,	OPAL_DUMP_RESEND);
+OPAL_CALL(opal_sync_host_reboot,		OPAL_SYNC_HOST_REBOOT);
+OPAL_CALL(opal_sensor_read,			OPAL_SENSOR_READ);
+OPAL_CALL(opal_get_param,			OPAL_GET_PARAM);
+OPAL_CALL(opal_set_param,			OPAL_SET_PARAM);
+OPAL_CALL(opal_handle_hmi,			OPAL_HANDLE_HMI);
+OPAL_CALL(opal_handle_hmi2,			OPAL_HANDLE_HMI2);
+OPAL_CALL(opal_config_cpu_idle_state,		OPAL_CONFIG_CPU_IDLE_STATE);
+OPAL_CALL(opal_slw_set_reg,			OPAL_SLW_SET_REG);
+OPAL_CALL(opal_register_dump_region,		OPAL_REGISTER_DUMP_REGION);
+OPAL_CALL(opal_unregister_dump_region,		OPAL_UNREGISTER_DUMP_REGION);
+OPAL_CALL(opal_pci_set_phb_cxl_mode,		OPAL_PCI_SET_PHB_CAPI_MODE);
+OPAL_CALL(opal_tpo_write,			OPAL_WRITE_TPO);
+OPAL_CALL(opal_tpo_read,			OPAL_READ_TPO);
+OPAL_CALL(opal_ipmi_send,			OPAL_IPMI_SEND);
+OPAL_CALL(opal_ipmi_recv,			OPAL_IPMI_RECV);
+OPAL_CALL(opal_i2c_request,			OPAL_I2C_REQUEST);
+OPAL_CALL(opal_flash_read,			OPAL_FLASH_READ);
+OPAL_CALL(opal_flash_write,			OPAL_FLASH_WRITE);
+OPAL_CALL(opal_flash_erase,			OPAL_FLASH_ERASE);
+OPAL_CALL(opal_prd_msg,				OPAL_PRD_MSG);
+OPAL_CALL(opal_leds_get_ind,			OPAL_LEDS_GET_INDICATOR);
+OPAL_CALL(opal_leds_set_ind,			OPAL_LEDS_SET_INDICATOR);
+OPAL_CALL(opal_console_flush,			OPAL_CONSOLE_FLUSH);
+OPAL_CALL(opal_get_device_tree,			OPAL_GET_DEVICE_TREE);
+OPAL_CALL(opal_pci_get_presence_state,		OPAL_PCI_GET_PRESENCE_STATE);
+OPAL_CALL(opal_pci_get_power_state,		OPAL_PCI_GET_POWER_STATE);
+OPAL_CALL(opal_pci_set_power_state,		OPAL_PCI_SET_POWER_STATE);
+OPAL_CALL(opal_int_get_xirr,			OPAL_INT_GET_XIRR);
+OPAL_CALL(opal_int_set_cppr,			OPAL_INT_SET_CPPR);
+OPAL_CALL(opal_int_eoi,				OPAL_INT_EOI);
+OPAL_CALL(opal_int_set_mfrr,			OPAL_INT_SET_MFRR);
+OPAL_CALL(opal_pci_tce_kill,			OPAL_PCI_TCE_KILL);
+OPAL_CALL(opal_nmmu_set_ptcr,			OPAL_NMMU_SET_PTCR);
+OPAL_CALL(opal_xive_reset,			OPAL_XIVE_RESET);
+OPAL_CALL(opal_xive_get_irq_info,		OPAL_XIVE_GET_IRQ_INFO);
+OPAL_CALL(opal_xive_get_irq_config,		OPAL_XIVE_GET_IRQ_CONFIG);
+OPAL_CALL(opal_xive_set_irq_config,		OPAL_XIVE_SET_IRQ_CONFIG);
+OPAL_CALL(opal_xive_get_queue_info,		OPAL_XIVE_GET_QUEUE_INFO);
+OPAL_CALL(opal_xive_set_queue_info,		OPAL_XIVE_SET_QUEUE_INFO);
+OPAL_CALL(opal_xive_donate_page,		OPAL_XIVE_DONATE_PAGE);
+OPAL_CALL(opal_xive_alloc_vp_block,		OPAL_XIVE_ALLOCATE_VP_BLOCK);
+OPAL_CALL(opal_xive_free_vp_block,		OPAL_XIVE_FREE_VP_BLOCK);
+OPAL_CALL(opal_xive_allocate_irq_raw,		OPAL_XIVE_ALLOCATE_IRQ);
+OPAL_CALL(opal_xive_free_irq,			OPAL_XIVE_FREE_IRQ);
+OPAL_CALL(opal_xive_get_vp_info,		OPAL_XIVE_GET_VP_INFO);
+OPAL_CALL(opal_xive_set_vp_info,		OPAL_XIVE_SET_VP_INFO);
+OPAL_CALL(opal_xive_sync,			OPAL_XIVE_SYNC);
+OPAL_CALL(opal_xive_dump,			OPAL_XIVE_DUMP);
+OPAL_CALL(opal_xive_get_queue_state,		OPAL_XIVE_GET_QUEUE_STATE);
+OPAL_CALL(opal_xive_set_queue_state,		OPAL_XIVE_SET_QUEUE_STATE);
+OPAL_CALL(opal_xive_get_vp_state,		OPAL_XIVE_GET_VP_STATE);
+OPAL_CALL(opal_signal_system_reset,		OPAL_SIGNAL_SYSTEM_RESET);
+OPAL_CALL(opal_npu_map_lpar,			OPAL_NPU_MAP_LPAR);
+OPAL_CALL(opal_imc_counters_init,		OPAL_IMC_COUNTERS_INIT);
+OPAL_CALL(opal_imc_counters_start,		OPAL_IMC_COUNTERS_START);
+OPAL_CALL(opal_imc_counters_stop,		OPAL_IMC_COUNTERS_STOP);
+OPAL_CALL(opal_get_powercap,			OPAL_GET_POWERCAP);
+OPAL_CALL(opal_set_powercap,			OPAL_SET_POWERCAP);
+OPAL_CALL(opal_get_power_shift_ratio,		OPAL_GET_POWER_SHIFT_RATIO);
+OPAL_CALL(opal_set_power_shift_ratio,		OPAL_SET_POWER_SHIFT_RATIO);
+OPAL_CALL(opal_sensor_group_clear,		OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_quiesce,				OPAL_QUIESCE);
+OPAL_CALL(opal_npu_spa_setup,			OPAL_NPU_SPA_SETUP);
+OPAL_CALL(opal_npu_spa_clear_cache,		OPAL_NPU_SPA_CLEAR_CACHE);
+OPAL_CALL(opal_npu_tl_set,			OPAL_NPU_TL_SET);
+OPAL_CALL(opal_pci_get_pbcq_tunnel_bar,		OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_pci_set_pbcq_tunnel_bar,		OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_sensor_read_u64,			OPAL_SENSOR_READ_U64);
+OPAL_CALL(opal_sensor_group_enable,		OPAL_SENSOR_GROUP_ENABLE);
+OPAL_CALL(opal_nx_coproc_init,			OPAL_NX_COPROC_INIT);
+OPAL_CALL(opal_mpipl_update,			OPAL_MPIPL_UPDATE);
+OPAL_CALL(opal_mpipl_register_tag,		OPAL_MPIPL_REGISTER_TAG);
+OPAL_CALL(opal_mpipl_query_tag,			OPAL_MPIPL_QUERY_TAG);
+OPAL_CALL(opal_secvar_get,			OPAL_SECVAR_GET);
+OPAL_CALL(opal_secvar_get_next,			OPAL_SECVAR_GET_NEXT);
+OPAL_CALL(opal_secvar_enqueue_update,		OPAL_SECVAR_ENQUEUE_UPDATE);
diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c
new file mode 100644
index 0000000000..bb7657115f
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-core.c
@@ -0,0 +1,663 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Interface for exporting the OPAL ELF core.
+ * Heavily inspired from fs/proc/vmcore.c
+ *
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "opal core: " fmt
+
+#include <linux/memblock.h>
+#include <linux/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/slab.h>
+#include <linux/crash_core.h>
+#include <linux/of.h>
+
+#include <asm/page.h>
+#include <asm/opal.h>
+#include <asm/fadump-internal.h>
+
+#include "opal-fadump.h"
+
+#define MAX_PT_LOAD_CNT		8
+
+/* NT_AUXV note related info */
+#define AUXV_CNT		1
+#define AUXV_DESC_SZ		(((2 * AUXV_CNT) + 1) * sizeof(Elf64_Off))
+
+struct opalcore_config {
+	u32			num_cpus;
+	/* PIR value of crashing CPU */
+	u32			crashing_cpu;
+
+	/* CPU state data info from F/W */
+	u64			cpu_state_destination_vaddr;
+	u64			cpu_state_data_size;
+	u64			cpu_state_entry_size;
+
+	/* OPAL memory to be exported as PT_LOAD segments */
+	u64			ptload_addr[MAX_PT_LOAD_CNT];
+	u64			ptload_size[MAX_PT_LOAD_CNT];
+	u64			ptload_cnt;
+
+	/* Pointer to the first PT_LOAD in the ELF core file */
+	Elf64_Phdr		*ptload_phdr;
+
+	/* Total size of opalcore file. */
+	size_t			opalcore_size;
+
+	/* Buffer for all the ELF core headers and the PT_NOTE */
+	size_t			opalcorebuf_sz;
+	char			*opalcorebuf;
+
+	/* NT_AUXV buffer */
+	char			auxv_buf[AUXV_DESC_SZ];
+};
+
+struct opalcore {
+	struct list_head	list;
+	u64			paddr;
+	size_t			size;
+	loff_t			offset;
+};
+
+static LIST_HEAD(opalcore_list);
+static struct opalcore_config *oc_conf;
+static const struct opal_mpipl_fadump *opalc_metadata;
+static const struct opal_mpipl_fadump *opalc_cpu_metadata;
+static struct kobject *mpipl_kobj;
+
+/*
+ * Set crashing CPU's signal to SIGUSR1. if the kernel is triggered
+ * by kernel, SIGTERM otherwise.
+ */
+bool kernel_initiated;
+
+static struct opalcore * __init get_new_element(void)
+{
+	return kzalloc(sizeof(struct opalcore), GFP_KERNEL);
+}
+
+static inline int is_opalcore_usable(void)
+{
+	return (oc_conf && oc_conf->opalcorebuf != NULL) ? 1 : 0;
+}
+
+static Elf64_Word *__init append_elf64_note(Elf64_Word *buf, char *name,
+				     u32 type, void *data,
+				     size_t data_len)
+{
+	Elf64_Nhdr *note = (Elf64_Nhdr *)buf;
+	Elf64_Word namesz = strlen(name) + 1;
+
+	note->n_namesz = cpu_to_be32(namesz);
+	note->n_descsz = cpu_to_be32(data_len);
+	note->n_type   = cpu_to_be32(type);
+	buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf64_Word));
+	memcpy(buf, name, namesz);
+	buf += DIV_ROUND_UP(namesz, sizeof(Elf64_Word));
+	memcpy(buf, data, data_len);
+	buf += DIV_ROUND_UP(data_len, sizeof(Elf64_Word));
+
+	return buf;
+}
+
+static void __init fill_prstatus(struct elf_prstatus *prstatus, int pir,
+			  struct pt_regs *regs)
+{
+	memset(prstatus, 0, sizeof(struct elf_prstatus));
+	elf_core_copy_regs(&(prstatus->pr_reg), regs);
+
+	/*
+	 * Overload PID with PIR value.
+	 * As a PIR value could also be '0', add an offset of '100'
+	 * to every PIR to avoid misinterpretations in GDB.
+	 */
+	prstatus->common.pr_pid  = cpu_to_be32(100 + pir);
+	prstatus->common.pr_ppid = cpu_to_be32(1);
+
+	/*
+	 * Indicate SIGUSR1 for crash initiated from kernel.
+	 * SIGTERM otherwise.
+	 */
+	if (pir == oc_conf->crashing_cpu) {
+		short sig;
+
+		sig = kernel_initiated ? SIGUSR1 : SIGTERM;
+		prstatus->common.pr_cursig = cpu_to_be16(sig);
+	}
+}
+
+static Elf64_Word *__init auxv_to_elf64_notes(Elf64_Word *buf,
+				       u64 opal_boot_entry)
+{
+	Elf64_Off *bufp = (Elf64_Off *)oc_conf->auxv_buf;
+	int idx = 0;
+
+	memset(bufp, 0, AUXV_DESC_SZ);
+
+	/* Entry point of OPAL */
+	bufp[idx++] = cpu_to_be64(AT_ENTRY);
+	bufp[idx++] = cpu_to_be64(opal_boot_entry);
+
+	/* end of vector */
+	bufp[idx++] = cpu_to_be64(AT_NULL);
+
+	buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME, NT_AUXV,
+				oc_conf->auxv_buf, AUXV_DESC_SZ);
+	return buf;
+}
+
+/*
+ * Read from the ELF header and then the crash dump.
+ * Returns number of bytes read on success, -errno on failure.
+ */
+static ssize_t read_opalcore(struct file *file, struct kobject *kobj,
+			     struct bin_attribute *bin_attr, char *to,
+			     loff_t pos, size_t count)
+{
+	struct opalcore *m;
+	ssize_t tsz, avail;
+	loff_t tpos = pos;
+
+	if (pos >= oc_conf->opalcore_size)
+		return 0;
+
+	/* Adjust count if it goes beyond opalcore size */
+	avail = oc_conf->opalcore_size - pos;
+	if (count > avail)
+		count = avail;
+
+	if (count == 0)
+		return 0;
+
+	/* Read ELF core header and/or PT_NOTE segment */
+	if (tpos < oc_conf->opalcorebuf_sz) {
+		tsz = min_t(size_t, oc_conf->opalcorebuf_sz - tpos, count);
+		memcpy(to, oc_conf->opalcorebuf + tpos, tsz);
+		to += tsz;
+		tpos += tsz;
+		count -= tsz;
+	}
+
+	list_for_each_entry(m, &opalcore_list, list) {
+		/* nothing more to read here */
+		if (count == 0)
+			break;
+
+		if (tpos < m->offset + m->size) {
+			void *addr;
+
+			tsz = min_t(size_t, m->offset + m->size - tpos, count);
+			addr = (void *)(m->paddr + tpos - m->offset);
+			memcpy(to, __va(addr), tsz);
+			to += tsz;
+			tpos += tsz;
+			count -= tsz;
+		}
+	}
+
+	return (tpos - pos);
+}
+
+static struct bin_attribute opal_core_attr = {
+	.attr = {.name = "core", .mode = 0400},
+	.read = read_opalcore
+};
+
+/*
+ * Read CPU state dump data and convert it into ELF notes.
+ *
+ * Each register entry is of 16 bytes, A numerical identifier along with
+ * a GPR/SPR flag in the first 8 bytes and the register value in the next
+ * 8 bytes. For more details refer to F/W documentation.
+ */
+static Elf64_Word * __init opalcore_append_cpu_notes(Elf64_Word *buf)
+{
+	u32 thread_pir, size_per_thread, regs_offset, regs_cnt, reg_esize;
+	struct hdat_fadump_thread_hdr *thdr;
+	struct elf_prstatus prstatus;
+	Elf64_Word *first_cpu_note;
+	struct pt_regs regs;
+	char *bufp;
+	int i;
+
+	size_per_thread = oc_conf->cpu_state_entry_size;
+	bufp = __va(oc_conf->cpu_state_destination_vaddr);
+
+	/*
+	 * Offset for register entries, entry size and registers count is
+	 * duplicated in every thread header in keeping with HDAT format.
+	 * Use these values from the first thread header.
+	 */
+	thdr = (struct hdat_fadump_thread_hdr *)bufp;
+	regs_offset = (offsetof(struct hdat_fadump_thread_hdr, offset) +
+		       be32_to_cpu(thdr->offset));
+	reg_esize = be32_to_cpu(thdr->esize);
+	regs_cnt  = be32_to_cpu(thdr->ecnt);
+
+	pr_debug("--------CPU State Data------------\n");
+	pr_debug("NumCpus     : %u\n", oc_conf->num_cpus);
+	pr_debug("\tOffset: %u, Entry size: %u, Cnt: %u\n",
+		 regs_offset, reg_esize, regs_cnt);
+
+	/*
+	 * Skip past the first CPU note. Fill this note with the
+	 * crashing CPU's prstatus.
+	 */
+	first_cpu_note = buf;
+	buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
+				&prstatus, sizeof(prstatus));
+
+	for (i = 0; i < oc_conf->num_cpus; i++, bufp += size_per_thread) {
+		thdr = (struct hdat_fadump_thread_hdr *)bufp;
+		thread_pir = be32_to_cpu(thdr->pir);
+
+		pr_debug("[%04d] PIR: 0x%x, core state: 0x%02x\n",
+			 i, thread_pir, thdr->core_state);
+
+		/*
+		 * Register state data of MAX cores is provided by firmware,
+		 * but some of this cores may not be active. So, while
+		 * processing register state data, check core state and
+		 * skip threads that belong to inactive cores.
+		 */
+		if (thdr->core_state == HDAT_FADUMP_CORE_INACTIVE)
+			continue;
+
+		opal_fadump_read_regs((bufp + regs_offset), regs_cnt,
+				      reg_esize, false, &regs);
+
+		pr_debug("PIR 0x%x - R1 : 0x%llx, NIP : 0x%llx\n", thread_pir,
+			 be64_to_cpu(regs.gpr[1]), be64_to_cpu(regs.nip));
+		fill_prstatus(&prstatus, thread_pir, &regs);
+
+		if (thread_pir != oc_conf->crashing_cpu) {
+			buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME,
+						NT_PRSTATUS, &prstatus,
+						sizeof(prstatus));
+		} else {
+			/*
+			 * Add crashing CPU as the first NT_PRSTATUS note for
+			 * GDB to process the core file appropriately.
+			 */
+			append_elf64_note(first_cpu_note, CRASH_CORE_NOTE_NAME,
+					  NT_PRSTATUS, &prstatus,
+					  sizeof(prstatus));
+		}
+	}
+
+	return buf;
+}
+
+static int __init create_opalcore(void)
+{
+	u64 opal_boot_entry, opal_base_addr, paddr;
+	u32 hdr_size, cpu_notes_size, count;
+	struct device_node *dn;
+	struct opalcore *new;
+	loff_t opalcore_off;
+	struct page *page;
+	Elf64_Phdr *phdr;
+	Elf64_Ehdr *elf;
+	int i, ret;
+	char *bufp;
+
+	/* Get size of header & CPU notes for OPAL core */
+	hdr_size = (sizeof(Elf64_Ehdr) +
+		    ((oc_conf->ptload_cnt + 1) * sizeof(Elf64_Phdr)));
+	cpu_notes_size = ((oc_conf->num_cpus * (CRASH_CORE_NOTE_HEAD_BYTES +
+			  CRASH_CORE_NOTE_NAME_BYTES +
+			  CRASH_CORE_NOTE_DESC_BYTES)) +
+			  (CRASH_CORE_NOTE_HEAD_BYTES +
+			  CRASH_CORE_NOTE_NAME_BYTES + AUXV_DESC_SZ));
+
+	/* Allocate buffer to setup OPAL core */
+	oc_conf->opalcorebuf_sz = PAGE_ALIGN(hdr_size + cpu_notes_size);
+	oc_conf->opalcorebuf = alloc_pages_exact(oc_conf->opalcorebuf_sz,
+						 GFP_KERNEL | __GFP_ZERO);
+	if (!oc_conf->opalcorebuf) {
+		pr_err("Not enough memory to setup OPAL core (size: %lu)\n",
+		       oc_conf->opalcorebuf_sz);
+		oc_conf->opalcorebuf_sz = 0;
+		return -ENOMEM;
+	}
+	count = oc_conf->opalcorebuf_sz / PAGE_SIZE;
+	page = virt_to_page(oc_conf->opalcorebuf);
+	for (i = 0; i < count; i++)
+		mark_page_reserved(page + i);
+
+	pr_debug("opalcorebuf = 0x%llx\n", (u64)oc_conf->opalcorebuf);
+
+	/* Read OPAL related device-tree entries */
+	dn = of_find_node_by_name(NULL, "ibm,opal");
+	if (dn) {
+		ret = of_property_read_u64(dn, "opal-base-address",
+					   &opal_base_addr);
+		pr_debug("opal-base-address: %llx\n", opal_base_addr);
+		ret |= of_property_read_u64(dn, "opal-boot-address",
+					    &opal_boot_entry);
+		pr_debug("opal-boot-address: %llx\n", opal_boot_entry);
+	}
+	if (!dn || ret)
+		pr_warn("WARNING: Failed to read OPAL base & entry values\n");
+
+	of_node_put(dn);
+
+	/* Use count to keep track of the program headers */
+	count = 0;
+
+	bufp = oc_conf->opalcorebuf;
+	elf = (Elf64_Ehdr *)bufp;
+	bufp += sizeof(Elf64_Ehdr);
+	memcpy(elf->e_ident, ELFMAG, SELFMAG);
+	elf->e_ident[EI_CLASS] = ELF_CLASS;
+	elf->e_ident[EI_DATA] = ELFDATA2MSB;
+	elf->e_ident[EI_VERSION] = EV_CURRENT;
+	elf->e_ident[EI_OSABI] = ELF_OSABI;
+	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
+	elf->e_type = cpu_to_be16(ET_CORE);
+	elf->e_machine = cpu_to_be16(ELF_ARCH);
+	elf->e_version = cpu_to_be32(EV_CURRENT);
+	elf->e_entry = 0;
+	elf->e_phoff = cpu_to_be64(sizeof(Elf64_Ehdr));
+	elf->e_shoff = 0;
+	elf->e_flags = 0;
+
+	elf->e_ehsize = cpu_to_be16(sizeof(Elf64_Ehdr));
+	elf->e_phentsize = cpu_to_be16(sizeof(Elf64_Phdr));
+	elf->e_phnum = 0;
+	elf->e_shentsize = 0;
+	elf->e_shnum = 0;
+	elf->e_shstrndx = 0;
+
+	phdr = (Elf64_Phdr *)bufp;
+	bufp += sizeof(Elf64_Phdr);
+	phdr->p_type	= cpu_to_be32(PT_NOTE);
+	phdr->p_flags	= 0;
+	phdr->p_align	= 0;
+	phdr->p_paddr	= phdr->p_vaddr = 0;
+	phdr->p_offset	= cpu_to_be64(hdr_size);
+	phdr->p_filesz	= phdr->p_memsz = cpu_to_be64(cpu_notes_size);
+	count++;
+
+	opalcore_off = oc_conf->opalcorebuf_sz;
+	oc_conf->ptload_phdr  = (Elf64_Phdr *)bufp;
+	paddr = 0;
+	for (i = 0; i < oc_conf->ptload_cnt; i++) {
+		phdr = (Elf64_Phdr *)bufp;
+		bufp += sizeof(Elf64_Phdr);
+		phdr->p_type	= cpu_to_be32(PT_LOAD);
+		phdr->p_flags	= cpu_to_be32(PF_R|PF_W|PF_X);
+		phdr->p_align	= 0;
+
+		new = get_new_element();
+		if (!new)
+			return -ENOMEM;
+		new->paddr  = oc_conf->ptload_addr[i];
+		new->size   = oc_conf->ptload_size[i];
+		new->offset = opalcore_off;
+		list_add_tail(&new->list, &opalcore_list);
+
+		phdr->p_paddr	= cpu_to_be64(paddr);
+		phdr->p_vaddr	= cpu_to_be64(opal_base_addr + paddr);
+		phdr->p_filesz	= phdr->p_memsz  =
+			cpu_to_be64(oc_conf->ptload_size[i]);
+		phdr->p_offset	= cpu_to_be64(opalcore_off);
+
+		count++;
+		opalcore_off += oc_conf->ptload_size[i];
+		paddr += oc_conf->ptload_size[i];
+	}
+
+	elf->e_phnum = cpu_to_be16(count);
+
+	bufp = (char *)opalcore_append_cpu_notes((Elf64_Word *)bufp);
+	bufp = (char *)auxv_to_elf64_notes((Elf64_Word *)bufp, opal_boot_entry);
+
+	oc_conf->opalcore_size = opalcore_off;
+	return 0;
+}
+
+static void opalcore_cleanup(void)
+{
+	if (oc_conf == NULL)
+		return;
+
+	/* Remove OPAL core sysfs file */
+	sysfs_remove_bin_file(mpipl_kobj, &opal_core_attr);
+	oc_conf->ptload_phdr = NULL;
+	oc_conf->ptload_cnt = 0;
+
+	/* free the buffer used for setting up OPAL core */
+	if (oc_conf->opalcorebuf) {
+		void *end = (void *)((u64)oc_conf->opalcorebuf +
+				     oc_conf->opalcorebuf_sz);
+
+		free_reserved_area(oc_conf->opalcorebuf, end, -1, NULL);
+		oc_conf->opalcorebuf = NULL;
+		oc_conf->opalcorebuf_sz = 0;
+	}
+
+	kfree(oc_conf);
+	oc_conf = NULL;
+}
+__exitcall(opalcore_cleanup);
+
+static void __init opalcore_config_init(void)
+{
+	u32 idx, cpu_data_version;
+	struct device_node *np;
+	const __be32 *prop;
+	u64 addr = 0;
+	int i, ret;
+
+	np = of_find_node_by_path("/ibm,opal/dump");
+	if (np == NULL)
+		return;
+
+	if (!of_device_is_compatible(np, "ibm,opal-dump")) {
+		pr_warn("Support missing for this f/w version!\n");
+		return;
+	}
+
+	/* Check if dump has been initiated on last reboot */
+	prop = of_get_property(np, "mpipl-boot", NULL);
+	if (!prop) {
+		of_node_put(np);
+		return;
+	}
+
+	/* Get OPAL metadata */
+	ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_OPAL, &addr);
+	if ((ret != OPAL_SUCCESS) || !addr) {
+		pr_err("Failed to get OPAL metadata (%d)\n", ret);
+		goto error_out;
+	}
+
+	addr = be64_to_cpu(addr);
+	pr_debug("OPAL metadata addr: %llx\n", addr);
+	opalc_metadata = __va(addr);
+
+	/* Get OPAL CPU metadata */
+	ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &addr);
+	if ((ret != OPAL_SUCCESS) || !addr) {
+		pr_err("Failed to get OPAL CPU metadata (%d)\n", ret);
+		goto error_out;
+	}
+
+	addr = be64_to_cpu(addr);
+	pr_debug("CPU metadata addr: %llx\n", addr);
+	opalc_cpu_metadata = __va(addr);
+
+	/* Allocate memory for config buffer */
+	oc_conf = kzalloc(sizeof(struct opalcore_config), GFP_KERNEL);
+	if (oc_conf == NULL)
+		goto error_out;
+
+	/* Parse OPAL metadata */
+	if (opalc_metadata->version != OPAL_MPIPL_VERSION) {
+		pr_warn("Supported OPAL metadata version: %u, found: %u!\n",
+			OPAL_MPIPL_VERSION, opalc_metadata->version);
+		pr_warn("WARNING: F/W using newer OPAL metadata format!!\n");
+	}
+
+	oc_conf->ptload_cnt = 0;
+	idx = be32_to_cpu(opalc_metadata->region_cnt);
+	if (idx > MAX_PT_LOAD_CNT) {
+		pr_warn("WARNING: OPAL regions count (%d) adjusted to limit (%d)",
+			idx, MAX_PT_LOAD_CNT);
+		idx = MAX_PT_LOAD_CNT;
+	}
+	for (i = 0; i < idx; i++) {
+		oc_conf->ptload_addr[oc_conf->ptload_cnt] =
+				be64_to_cpu(opalc_metadata->region[i].dest);
+		oc_conf->ptload_size[oc_conf->ptload_cnt++] =
+				be64_to_cpu(opalc_metadata->region[i].size);
+	}
+	oc_conf->ptload_cnt = i;
+	oc_conf->crashing_cpu = be32_to_cpu(opalc_metadata->crashing_pir);
+
+	if (!oc_conf->ptload_cnt) {
+		pr_err("OPAL memory regions not found\n");
+		goto error_out;
+	}
+
+	/* Parse OPAL CPU metadata */
+	cpu_data_version = be32_to_cpu(opalc_cpu_metadata->cpu_data_version);
+	if (cpu_data_version != HDAT_FADUMP_CPU_DATA_VER) {
+		pr_warn("Supported CPU data version: %u, found: %u!\n",
+			HDAT_FADUMP_CPU_DATA_VER, cpu_data_version);
+		pr_warn("WARNING: F/W using newer CPU state data format!!\n");
+	}
+
+	addr = be64_to_cpu(opalc_cpu_metadata->region[0].dest);
+	if (!addr) {
+		pr_err("CPU state data not found!\n");
+		goto error_out;
+	}
+	oc_conf->cpu_state_destination_vaddr = (u64)__va(addr);
+
+	oc_conf->cpu_state_data_size =
+			be64_to_cpu(opalc_cpu_metadata->region[0].size);
+	oc_conf->cpu_state_entry_size =
+			be32_to_cpu(opalc_cpu_metadata->cpu_data_size);
+
+	if ((oc_conf->cpu_state_entry_size == 0) ||
+	    (oc_conf->cpu_state_entry_size > oc_conf->cpu_state_data_size)) {
+		pr_err("CPU state data is invalid.\n");
+		goto error_out;
+	}
+	oc_conf->num_cpus = (oc_conf->cpu_state_data_size /
+			     oc_conf->cpu_state_entry_size);
+
+	of_node_put(np);
+	return;
+
+error_out:
+	pr_err("Could not export /sys/firmware/opal/core\n");
+	opalcore_cleanup();
+	of_node_put(np);
+}
+
+static ssize_t release_core_store(struct kobject *kobj,
+				  struct kobj_attribute *attr,
+				  const char *buf, size_t count)
+{
+	int input = -1;
+
+	if (kstrtoint(buf, 0, &input))
+		return -EINVAL;
+
+	if (input == 1) {
+		if (oc_conf == NULL) {
+			pr_err("'/sys/firmware/opal/core' file not accessible!\n");
+			return -EPERM;
+		}
+
+		/*
+		 * Take away '/sys/firmware/opal/core' and release all memory
+		 * used for exporting this file.
+		 */
+		opalcore_cleanup();
+	} else
+		return -EINVAL;
+
+	return count;
+}
+
+static struct kobj_attribute opalcore_rel_attr = __ATTR_WO(release_core);
+
+static struct attribute *mpipl_attr[] = {
+	&opalcore_rel_attr.attr,
+	NULL,
+};
+
+static struct bin_attribute *mpipl_bin_attr[] = {
+	&opal_core_attr,
+	NULL,
+
+};
+
+static const struct attribute_group mpipl_group = {
+	.attrs = mpipl_attr,
+	.bin_attrs =  mpipl_bin_attr,
+};
+
+static int __init opalcore_init(void)
+{
+	int rc = -1;
+
+	opalcore_config_init();
+
+	if (oc_conf == NULL)
+		return rc;
+
+	create_opalcore();
+
+	/*
+	 * If oc_conf->opalcorebuf= is set in the 2nd kernel,
+	 * then capture the dump.
+	 */
+	if (!(is_opalcore_usable())) {
+		pr_err("Failed to export /sys/firmware/opal/mpipl/core\n");
+		opalcore_cleanup();
+		return rc;
+	}
+
+	/* Set OPAL core file size */
+	opal_core_attr.size = oc_conf->opalcore_size;
+
+	mpipl_kobj = kobject_create_and_add("mpipl", opal_kobj);
+	if (!mpipl_kobj) {
+		pr_err("unable to create mpipl kobject\n");
+		return -ENOMEM;
+	}
+
+	/* Export OPAL core sysfs file */
+	rc = sysfs_create_group(mpipl_kobj, &mpipl_group);
+	if (rc) {
+		pr_err("mpipl sysfs group creation failed (%d)", rc);
+		opalcore_cleanup();
+		return rc;
+	}
+	/* The /sys/firmware/opal/core is moved to /sys/firmware/opal/mpipl/
+	 * directory, need to create symlink at old location to maintain
+	 * backward compatibility.
+	 */
+	rc = compat_only_sysfs_link_entry_to_kobj(opal_kobj, mpipl_kobj,
+						  "core", NULL);
+	if (rc) {
+		pr_err("unable to create core symlink (%d)\n", rc);
+		return rc;
+	}
+
+	return 0;
+}
+fs_initcall(opalcore_init);
diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
new file mode 100644
index 0000000000..16c5860f13
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL Dump Interface
+ *
+ * Copyright 2013,2014 IBM Corp.
+ */
+
+#include <linux/kobject.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+
+#include <asm/opal.h>
+
+#define DUMP_TYPE_FSP	0x01
+
+struct dump_obj {
+	struct kobject  kobj;
+	struct bin_attribute dump_attr;
+	uint32_t	id;  /* becomes object name */
+	uint32_t	type;
+	uint32_t	size;
+	char		*buffer;
+};
+#define to_dump_obj(x) container_of(x, struct dump_obj, kobj)
+
+struct dump_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct dump_obj *dump, struct dump_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct dump_obj *dump, struct dump_attribute *attr,
+			 const char *buf, size_t count);
+};
+#define to_dump_attr(x) container_of(x, struct dump_attribute, attr)
+
+static ssize_t dump_id_show(struct dump_obj *dump_obj,
+			    struct dump_attribute *attr,
+			    char *buf)
+{
+	return sprintf(buf, "0x%x\n", dump_obj->id);
+}
+
+static const char* dump_type_to_string(uint32_t type)
+{
+	switch (type) {
+	case 0x01: return "SP Dump";
+	case 0x02: return "System/Platform Dump";
+	case 0x03: return "SMA Dump";
+	default: return "unknown";
+	}
+}
+
+static ssize_t dump_type_show(struct dump_obj *dump_obj,
+			      struct dump_attribute *attr,
+			      char *buf)
+{
+
+	return sprintf(buf, "0x%x %s\n", dump_obj->type,
+		       dump_type_to_string(dump_obj->type));
+}
+
+static ssize_t dump_ack_show(struct dump_obj *dump_obj,
+			     struct dump_attribute *attr,
+			     char *buf)
+{
+	return sprintf(buf, "ack - acknowledge dump\n");
+}
+
+/*
+ * Send acknowledgement to OPAL
+ */
+static int64_t dump_send_ack(uint32_t dump_id)
+{
+	int rc;
+
+	rc = opal_dump_ack(dump_id);
+	if (rc)
+		pr_warn("%s: Failed to send ack to Dump ID 0x%x (%d)\n",
+			__func__, dump_id, rc);
+	return rc;
+}
+
+static ssize_t dump_ack_store(struct dump_obj *dump_obj,
+			      struct dump_attribute *attr,
+			      const char *buf,
+			      size_t count)
+{
+	/*
+	 * Try to self remove this attribute. If we are successful,
+	 * delete the kobject itself.
+	 */
+	if (sysfs_remove_file_self(&dump_obj->kobj, &attr->attr)) {
+		dump_send_ack(dump_obj->id);
+		kobject_put(&dump_obj->kobj);
+	}
+	return count;
+}
+
+/* Attributes of a dump
+ * The binary attribute of the dump itself is dynamic
+ * due to the dynamic size of the dump
+ */
+static struct dump_attribute id_attribute =
+	__ATTR(id, 0444, dump_id_show, NULL);
+static struct dump_attribute type_attribute =
+	__ATTR(type, 0444, dump_type_show, NULL);
+static struct dump_attribute ack_attribute =
+	__ATTR(acknowledge, 0660, dump_ack_show, dump_ack_store);
+
+static ssize_t init_dump_show(struct dump_obj *dump_obj,
+			      struct dump_attribute *attr,
+			      char *buf)
+{
+	return sprintf(buf, "1 - initiate Service Processor(FSP) dump\n");
+}
+
+static int64_t dump_fips_init(uint8_t type)
+{
+	int rc;
+
+	rc = opal_dump_init(type);
+	if (rc)
+		pr_warn("%s: Failed to initiate FSP dump (%d)\n",
+			__func__, rc);
+	return rc;
+}
+
+static ssize_t init_dump_store(struct dump_obj *dump_obj,
+			       struct dump_attribute *attr,
+			       const char *buf,
+			       size_t count)
+{
+	int rc;
+
+	rc = dump_fips_init(DUMP_TYPE_FSP);
+	if (rc == OPAL_SUCCESS)
+		pr_info("%s: Initiated FSP dump\n", __func__);
+
+	return count;
+}
+
+static struct dump_attribute initiate_attribute =
+	__ATTR(initiate_dump, 0600, init_dump_show, init_dump_store);
+
+static struct attribute *initiate_attrs[] = {
+	&initiate_attribute.attr,
+	NULL,
+};
+
+static const struct attribute_group initiate_attr_group = {
+	.attrs = initiate_attrs,
+};
+
+static struct kset *dump_kset;
+
+static ssize_t dump_attr_show(struct kobject *kobj,
+			      struct attribute *attr,
+			      char *buf)
+{
+	struct dump_attribute *attribute;
+	struct dump_obj *dump;
+
+	attribute = to_dump_attr(attr);
+	dump = to_dump_obj(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(dump, attribute, buf);
+}
+
+static ssize_t dump_attr_store(struct kobject *kobj,
+			       struct attribute *attr,
+			       const char *buf, size_t len)
+{
+	struct dump_attribute *attribute;
+	struct dump_obj *dump;
+
+	attribute = to_dump_attr(attr);
+	dump = to_dump_obj(kobj);
+
+	if (!attribute->store)
+		return -EIO;
+
+	return attribute->store(dump, attribute, buf, len);
+}
+
+static const struct sysfs_ops dump_sysfs_ops = {
+	.show = dump_attr_show,
+	.store = dump_attr_store,
+};
+
+static void dump_release(struct kobject *kobj)
+{
+	struct dump_obj *dump;
+
+	dump = to_dump_obj(kobj);
+	vfree(dump->buffer);
+	kfree(dump);
+}
+
+static struct attribute *dump_default_attrs[] = {
+	&id_attribute.attr,
+	&type_attribute.attr,
+	&ack_attribute.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(dump_default);
+
+static struct kobj_type dump_ktype = {
+	.sysfs_ops = &dump_sysfs_ops,
+	.release = &dump_release,
+	.default_groups = dump_default_groups,
+};
+
+static int64_t dump_read_info(uint32_t *dump_id, uint32_t *dump_size, uint32_t *dump_type)
+{
+	__be32 id, size, type;
+	int rc;
+
+	type = cpu_to_be32(0xffffffff);
+
+	rc = opal_dump_info2(&id, &size, &type);
+	if (rc == OPAL_PARAMETER)
+		rc = opal_dump_info(&id, &size);
+
+	if (rc) {
+		pr_warn("%s: Failed to get dump info (%d)\n",
+			__func__, rc);
+		return rc;
+	}
+
+	*dump_id = be32_to_cpu(id);
+	*dump_size = be32_to_cpu(size);
+	*dump_type = be32_to_cpu(type);
+
+	return rc;
+}
+
+static int64_t dump_read_data(struct dump_obj *dump)
+{
+	struct opal_sg_list *list;
+	uint64_t addr;
+	int64_t rc;
+
+	/* Allocate memory */
+	dump->buffer = vzalloc(PAGE_ALIGN(dump->size));
+	if (!dump->buffer) {
+		pr_err("%s : Failed to allocate memory\n", __func__);
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Generate SG list */
+	list = opal_vmalloc_to_sg_list(dump->buffer, dump->size);
+	if (!list) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* First entry address */
+	addr = __pa(list);
+
+	/* Fetch data */
+	rc = OPAL_BUSY_EVENT;
+	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+		rc = opal_dump_read(dump->id, addr);
+		if (rc == OPAL_BUSY_EVENT) {
+			opal_poll_events(NULL);
+			msleep(20);
+		}
+	}
+
+	if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL)
+		pr_warn("%s: Extract dump failed for ID 0x%x\n",
+			__func__, dump->id);
+
+	/* Free SG list */
+	opal_free_sg_list(list);
+
+out:
+	return rc;
+}
+
+static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj,
+			      struct bin_attribute *bin_attr,
+			      char *buffer, loff_t pos, size_t count)
+{
+	ssize_t rc;
+
+	struct dump_obj *dump = to_dump_obj(kobj);
+
+	if (!dump->buffer) {
+		rc = dump_read_data(dump);
+
+		if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL) {
+			vfree(dump->buffer);
+			dump->buffer = NULL;
+
+			return -EIO;
+		}
+		if (rc == OPAL_PARTIAL) {
+			/* On a partial read, we just return EIO
+			 * and rely on userspace to ask us to try
+			 * again.
+			 */
+			pr_info("%s: Platform dump partially read. ID = 0x%x\n",
+				__func__, dump->id);
+			return -EIO;
+		}
+	}
+
+	memcpy(buffer, dump->buffer + pos, count);
+
+	/* You may think we could free the dump buffer now and retrieve
+	 * it again later if needed, but due to current firmware limitation,
+	 * that's not the case. So, once read into userspace once,
+	 * we keep the dump around until it's acknowledged by userspace.
+	 */
+
+	return count;
+}
+
+static void create_dump_obj(uint32_t id, size_t size, uint32_t type)
+{
+	struct dump_obj *dump;
+	int rc;
+
+	dump = kzalloc(sizeof(*dump), GFP_KERNEL);
+	if (!dump)
+		return;
+
+	dump->kobj.kset = dump_kset;
+
+	kobject_init(&dump->kobj, &dump_ktype);
+
+	sysfs_bin_attr_init(&dump->dump_attr);
+
+	dump->dump_attr.attr.name = "dump";
+	dump->dump_attr.attr.mode = 0400;
+	dump->dump_attr.size = size;
+	dump->dump_attr.read = dump_attr_read;
+
+	dump->id = id;
+	dump->size = size;
+	dump->type = type;
+
+	rc = kobject_add(&dump->kobj, NULL, "0x%x-0x%x", type, id);
+	if (rc) {
+		kobject_put(&dump->kobj);
+		return;
+	}
+
+	/*
+	 * As soon as the sysfs file for this dump is created/activated there is
+	 * a chance the opal_errd daemon (or any userspace) might read and
+	 * acknowledge the dump before kobject_uevent() is called. If that
+	 * happens then there is a potential race between
+	 * dump_ack_store->kobject_put() and kobject_uevent() which leads to a
+	 * use-after-free of a kernfs object resulting in a kernel crash.
+	 *
+	 * To avoid that, we need to take a reference on behalf of the bin file,
+	 * so that our reference remains valid while we call kobject_uevent().
+	 * We then drop our reference before exiting the function, leaving the
+	 * bin file to drop the last reference (if it hasn't already).
+	 */
+
+	/* Take a reference for the bin file */
+	kobject_get(&dump->kobj);
+	rc = sysfs_create_bin_file(&dump->kobj, &dump->dump_attr);
+	if (rc == 0) {
+		kobject_uevent(&dump->kobj, KOBJ_ADD);
+
+		pr_info("%s: New platform dump. ID = 0x%x Size %u\n",
+			__func__, dump->id, dump->size);
+	} else {
+		/* Drop reference count taken for bin file */
+		kobject_put(&dump->kobj);
+	}
+
+	/* Drop our reference */
+	kobject_put(&dump->kobj);
+	return;
+}
+
+static irqreturn_t process_dump(int irq, void *data)
+{
+	int rc;
+	uint32_t dump_id, dump_size, dump_type;
+	char name[22];
+	struct kobject *kobj;
+
+	rc = dump_read_info(&dump_id, &dump_size, &dump_type);
+	if (rc != OPAL_SUCCESS)
+		return IRQ_HANDLED;
+
+	sprintf(name, "0x%x-0x%x", dump_type, dump_id);
+
+	/* we may get notified twice, let's handle
+	 * that gracefully and not create two conflicting
+	 * entries.
+	 */
+	kobj = kset_find_obj(dump_kset, name);
+	if (kobj) {
+		/* Drop reference added by kset_find_obj() */
+		kobject_put(kobj);
+		return IRQ_HANDLED;
+	}
+
+	create_dump_obj(dump_id, dump_size, dump_type);
+
+	return IRQ_HANDLED;
+}
+
+void __init opal_platform_dump_init(void)
+{
+	int rc;
+	int dump_irq;
+
+	/* Dump not supported by firmware */
+	if (!opal_check_token(OPAL_DUMP_READ))
+		return;
+
+	dump_kset = kset_create_and_add("dump", NULL, opal_kobj);
+	if (!dump_kset) {
+		pr_warn("%s: Failed to create dump kset\n", __func__);
+		return;
+	}
+
+	rc = sysfs_create_group(&dump_kset->kobj, &initiate_attr_group);
+	if (rc) {
+		pr_warn("%s: Failed to create initiate dump attr group\n",
+			__func__);
+		kobject_put(&dump_kset->kobj);
+		return;
+	}
+
+	dump_irq = opal_event_request(ilog2(OPAL_EVENT_DUMP_AVAIL));
+	if (!dump_irq) {
+		pr_err("%s: Can't register OPAL event irq (%d)\n",
+		       __func__, dump_irq);
+		return;
+	}
+
+	rc = request_threaded_irq(dump_irq, NULL, process_dump,
+				IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+				"opal-dump", NULL);
+	if (rc) {
+		pr_err("%s: Can't request OPAL event irq (%d)\n",
+		       __func__, rc);
+		return;
+	}
+
+	if (opal_check_token(OPAL_DUMP_RESEND))
+		opal_dump_resend_notification();
+}
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
new file mode 100644
index 0000000000..554fdd7f88
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Error log support on PowerNV.
+ *
+ * Copyright 2013,2014 IBM Corp.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/fs.h>
+#include <linux/vmalloc.h>
+#include <linux/fcntl.h>
+#include <linux/kobject.h>
+#include <linux/uaccess.h>
+#include <asm/opal.h>
+
+struct elog_obj {
+	struct kobject kobj;
+	struct bin_attribute raw_attr;
+	uint64_t id;
+	uint64_t type;
+	size_t size;
+	char *buffer;
+};
+#define to_elog_obj(x) container_of(x, struct elog_obj, kobj)
+
+struct elog_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct elog_obj *elog, struct elog_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct elog_obj *elog, struct elog_attribute *attr,
+			 const char *buf, size_t count);
+};
+#define to_elog_attr(x) container_of(x, struct elog_attribute, attr)
+
+static ssize_t elog_id_show(struct elog_obj *elog_obj,
+			    struct elog_attribute *attr,
+			    char *buf)
+{
+	return sprintf(buf, "0x%llx\n", elog_obj->id);
+}
+
+static const char *elog_type_to_string(uint64_t type)
+{
+	switch (type) {
+	case 0: return "PEL";
+	default: return "unknown";
+	}
+}
+
+static ssize_t elog_type_show(struct elog_obj *elog_obj,
+			      struct elog_attribute *attr,
+			      char *buf)
+{
+	return sprintf(buf, "0x%llx %s\n",
+		       elog_obj->type,
+		       elog_type_to_string(elog_obj->type));
+}
+
+static ssize_t elog_ack_show(struct elog_obj *elog_obj,
+			     struct elog_attribute *attr,
+			     char *buf)
+{
+	return sprintf(buf, "ack - acknowledge log message\n");
+}
+
+static ssize_t elog_ack_store(struct elog_obj *elog_obj,
+			      struct elog_attribute *attr,
+			      const char *buf,
+			      size_t count)
+{
+	/*
+	 * Try to self remove this attribute. If we are successful,
+	 * delete the kobject itself.
+	 */
+	if (sysfs_remove_file_self(&elog_obj->kobj, &attr->attr)) {
+		opal_send_ack_elog(elog_obj->id);
+		kobject_put(&elog_obj->kobj);
+	}
+	return count;
+}
+
+static struct elog_attribute id_attribute =
+	__ATTR(id, 0444, elog_id_show, NULL);
+static struct elog_attribute type_attribute =
+	__ATTR(type, 0444, elog_type_show, NULL);
+static struct elog_attribute ack_attribute =
+	__ATTR(acknowledge, 0660, elog_ack_show, elog_ack_store);
+
+static struct kset *elog_kset;
+
+static ssize_t elog_attr_show(struct kobject *kobj,
+			      struct attribute *attr,
+			      char *buf)
+{
+	struct elog_attribute *attribute;
+	struct elog_obj *elog;
+
+	attribute = to_elog_attr(attr);
+	elog = to_elog_obj(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(elog, attribute, buf);
+}
+
+static ssize_t elog_attr_store(struct kobject *kobj,
+			       struct attribute *attr,
+			       const char *buf, size_t len)
+{
+	struct elog_attribute *attribute;
+	struct elog_obj *elog;
+
+	attribute = to_elog_attr(attr);
+	elog = to_elog_obj(kobj);
+
+	if (!attribute->store)
+		return -EIO;
+
+	return attribute->store(elog, attribute, buf, len);
+}
+
+static const struct sysfs_ops elog_sysfs_ops = {
+	.show = elog_attr_show,
+	.store = elog_attr_store,
+};
+
+static void elog_release(struct kobject *kobj)
+{
+	struct elog_obj *elog;
+
+	elog = to_elog_obj(kobj);
+	kfree(elog->buffer);
+	kfree(elog);
+}
+
+static struct attribute *elog_default_attrs[] = {
+	&id_attribute.attr,
+	&type_attribute.attr,
+	&ack_attribute.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(elog_default);
+
+static struct kobj_type elog_ktype = {
+	.sysfs_ops = &elog_sysfs_ops,
+	.release = &elog_release,
+	.default_groups = elog_default_groups,
+};
+
+/* Maximum size of a single log on FSP is 16KB */
+#define OPAL_MAX_ERRLOG_SIZE	16384
+
+static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj,
+			     struct bin_attribute *bin_attr,
+			     char *buffer, loff_t pos, size_t count)
+{
+	int opal_rc;
+
+	struct elog_obj *elog = to_elog_obj(kobj);
+
+	/* We may have had an error reading before, so let's retry */
+	if (!elog->buffer) {
+		elog->buffer = kzalloc(elog->size, GFP_KERNEL);
+		if (!elog->buffer)
+			return -EIO;
+
+		opal_rc = opal_read_elog(__pa(elog->buffer),
+					 elog->size, elog->id);
+		if (opal_rc != OPAL_SUCCESS) {
+			pr_err_ratelimited("ELOG: log read failed for log-id=%llx\n",
+					   elog->id);
+			kfree(elog->buffer);
+			elog->buffer = NULL;
+			return -EIO;
+		}
+	}
+
+	memcpy(buffer, elog->buffer + pos, count);
+
+	return count;
+}
+
+static void create_elog_obj(uint64_t id, size_t size, uint64_t type)
+{
+	struct elog_obj *elog;
+	int rc;
+
+	elog = kzalloc(sizeof(*elog), GFP_KERNEL);
+	if (!elog)
+		return;
+
+	elog->kobj.kset = elog_kset;
+
+	kobject_init(&elog->kobj, &elog_ktype);
+
+	sysfs_bin_attr_init(&elog->raw_attr);
+
+	elog->raw_attr.attr.name = "raw";
+	elog->raw_attr.attr.mode = 0400;
+	elog->raw_attr.size = size;
+	elog->raw_attr.read = raw_attr_read;
+
+	elog->id = id;
+	elog->size = size;
+	elog->type = type;
+
+	elog->buffer = kzalloc(elog->size, GFP_KERNEL);
+
+	if (elog->buffer) {
+		rc = opal_read_elog(__pa(elog->buffer),
+					 elog->size, elog->id);
+		if (rc != OPAL_SUCCESS) {
+			pr_err("ELOG: log read failed for log-id=%llx\n",
+			       elog->id);
+			kfree(elog->buffer);
+			elog->buffer = NULL;
+		}
+	}
+
+	rc = kobject_add(&elog->kobj, NULL, "0x%llx", id);
+	if (rc) {
+		kobject_put(&elog->kobj);
+		return;
+	}
+
+	/*
+	 * As soon as the sysfs file for this elog is created/activated there is
+	 * a chance the opal_errd daemon (or any userspace) might read and
+	 * acknowledge the elog before kobject_uevent() is called. If that
+	 * happens then there is a potential race between
+	 * elog_ack_store->kobject_put() and kobject_uevent() which leads to a
+	 * use-after-free of a kernfs object resulting in a kernel crash.
+	 *
+	 * To avoid that, we need to take a reference on behalf of the bin file,
+	 * so that our reference remains valid while we call kobject_uevent().
+	 * We then drop our reference before exiting the function, leaving the
+	 * bin file to drop the last reference (if it hasn't already).
+	 */
+
+	/* Take a reference for the bin file */
+	kobject_get(&elog->kobj);
+	rc = sysfs_create_bin_file(&elog->kobj, &elog->raw_attr);
+	if (rc == 0) {
+		kobject_uevent(&elog->kobj, KOBJ_ADD);
+	} else {
+		/* Drop the reference taken for the bin file */
+		kobject_put(&elog->kobj);
+	}
+
+	/* Drop our reference */
+	kobject_put(&elog->kobj);
+
+	return;
+}
+
+static irqreturn_t elog_event(int irq, void *data)
+{
+	__be64 size;
+	__be64 id;
+	__be64 type;
+	uint64_t elog_size;
+	uint64_t log_id;
+	uint64_t elog_type;
+	int rc;
+	char name[2+16+1];
+	struct kobject *kobj;
+
+	rc = opal_get_elog_size(&id, &size, &type);
+	if (rc != OPAL_SUCCESS) {
+		pr_err("ELOG: OPAL log info read failed\n");
+		return IRQ_HANDLED;
+	}
+
+	elog_size = be64_to_cpu(size);
+	log_id = be64_to_cpu(id);
+	elog_type = be64_to_cpu(type);
+
+	WARN_ON(elog_size > OPAL_MAX_ERRLOG_SIZE);
+
+	if (elog_size >= OPAL_MAX_ERRLOG_SIZE)
+		elog_size  =  OPAL_MAX_ERRLOG_SIZE;
+
+	sprintf(name, "0x%llx", log_id);
+
+	/* we may get notified twice, let's handle
+	 * that gracefully and not create two conflicting
+	 * entries.
+	 */
+	kobj = kset_find_obj(elog_kset, name);
+	if (kobj) {
+		/* Drop reference added by kset_find_obj() */
+		kobject_put(kobj);
+		return IRQ_HANDLED;
+	}
+
+	create_elog_obj(log_id, elog_size, elog_type);
+
+	return IRQ_HANDLED;
+}
+
+int __init opal_elog_init(void)
+{
+	int rc = 0, irq;
+
+	/* ELOG not supported by firmware */
+	if (!opal_check_token(OPAL_ELOG_READ))
+		return -1;
+
+	elog_kset = kset_create_and_add("elog", NULL, opal_kobj);
+	if (!elog_kset) {
+		pr_warn("%s: failed to create elog kset\n", __func__);
+		return -1;
+	}
+
+	irq = opal_event_request(ilog2(OPAL_EVENT_ERROR_LOG_AVAIL));
+	if (!irq) {
+		pr_err("%s: Can't register OPAL event irq (%d)\n",
+		       __func__, irq);
+		return irq;
+	}
+
+	rc = request_threaded_irq(irq, NULL, elog_event,
+			IRQF_TRIGGER_HIGH | IRQF_ONESHOT, "opal-elog", NULL);
+	if (rc) {
+		pr_err("%s: Can't request OPAL event irq (%d)\n",
+		       __func__, rc);
+		return rc;
+	}
+
+	/* We are now ready to pull error logs from opal. */
+	if (opal_check_token(OPAL_ELOG_RESEND))
+		opal_resend_pending_logs();
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c
new file mode 100644
index 0000000000..964f464b1b
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-fadump.c
@@ -0,0 +1,726 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Firmware-Assisted Dump support on POWER platform (OPAL).
+ *
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "opal fadump: " fmt
+
+#include <linux/string.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <linux/mm.h>
+#include <linux/crash_dump.h>
+
+#include <asm/page.h>
+#include <asm/opal.h>
+#include <asm/fadump-internal.h>
+
+#include "opal-fadump.h"
+
+
+#ifdef CONFIG_PRESERVE_FA_DUMP
+/*
+ * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
+ * ensure crash data is preserved in hope that the subsequent memory
+ * preserving kernel boot is going to process this crash data.
+ */
+void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
+{
+	const struct opal_fadump_mem_struct *opal_fdm_active;
+	const __be32 *prop;
+	unsigned long dn;
+	u64 addr = 0;
+	s64 ret;
+
+	dn = of_get_flat_dt_subnode_by_name(node, "dump");
+	if (dn == -FDT_ERR_NOTFOUND)
+		return;
+
+	/*
+	 * Check if dump has been initiated on last reboot.
+	 */
+	prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL);
+	if (!prop)
+		return;
+
+	ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr);
+	if ((ret != OPAL_SUCCESS) || !addr) {
+		pr_debug("Could not get Kernel metadata (%lld)\n", ret);
+		return;
+	}
+
+	/*
+	 * Preserve memory only if kernel memory regions are registered
+	 * with f/w for MPIPL.
+	 */
+	addr = be64_to_cpu(addr);
+	pr_debug("Kernel metadata addr: %llx\n", addr);
+	opal_fdm_active = (void *)addr;
+	if (be16_to_cpu(opal_fdm_active->registered_regions) == 0)
+		return;
+
+	ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr);
+	if ((ret != OPAL_SUCCESS) || !addr) {
+		pr_err("Failed to get boot memory tag (%lld)\n", ret);
+		return;
+	}
+
+	/*
+	 * Memory below this address can be used for booting a
+	 * capture kernel or petitboot kernel. Preserve everything
+	 * above this address for processing crashdump.
+	 */
+	fadump_conf->boot_mem_top = be64_to_cpu(addr);
+	pr_debug("Preserve everything above %llx\n", fadump_conf->boot_mem_top);
+
+	pr_info("Firmware-assisted dump is active.\n");
+	fadump_conf->dump_active = 1;
+}
+
+#else /* CONFIG_PRESERVE_FA_DUMP */
+static const struct opal_fadump_mem_struct *opal_fdm_active;
+static const struct opal_mpipl_fadump *opal_cpu_metadata;
+static struct opal_fadump_mem_struct *opal_fdm;
+
+#ifdef CONFIG_OPAL_CORE
+extern bool kernel_initiated;
+#endif
+
+static int opal_fadump_unregister(struct fw_dump *fadump_conf);
+
+static void opal_fadump_update_config(struct fw_dump *fadump_conf,
+				      const struct opal_fadump_mem_struct *fdm)
+{
+	pr_debug("Boot memory regions count: %d\n", be16_to_cpu(fdm->region_cnt));
+
+	/*
+	 * The destination address of the first boot memory region is the
+	 * destination address of boot memory regions.
+	 */
+	fadump_conf->boot_mem_dest_addr = be64_to_cpu(fdm->rgn[0].dest);
+	pr_debug("Destination address of boot memory regions: %#016llx\n",
+		 fadump_conf->boot_mem_dest_addr);
+
+	fadump_conf->fadumphdr_addr = be64_to_cpu(fdm->fadumphdr_addr);
+}
+
+/*
+ * This function is called in the capture kernel to get configuration details
+ * from metadata setup by the first kernel.
+ */
+static void __init opal_fadump_get_config(struct fw_dump *fadump_conf,
+				   const struct opal_fadump_mem_struct *fdm)
+{
+	unsigned long base, size, last_end, hole_size;
+	int i;
+
+	if (!fadump_conf->dump_active)
+		return;
+
+	last_end = 0;
+	hole_size = 0;
+	fadump_conf->boot_memory_size = 0;
+
+	pr_debug("Boot memory regions:\n");
+	for (i = 0; i < be16_to_cpu(fdm->region_cnt); i++) {
+		base = be64_to_cpu(fdm->rgn[i].src);
+		size = be64_to_cpu(fdm->rgn[i].size);
+		pr_debug("\t[%03d] base: 0x%lx, size: 0x%lx\n", i, base, size);
+
+		fadump_conf->boot_mem_addr[i] = base;
+		fadump_conf->boot_mem_sz[i] = size;
+		fadump_conf->boot_memory_size += size;
+		hole_size += (base - last_end);
+
+		last_end = base + size;
+	}
+
+	/*
+	 * Start address of reserve dump area (permanent reservation) for
+	 * re-registering FADump after dump capture.
+	 */
+	fadump_conf->reserve_dump_area_start = be64_to_cpu(fdm->rgn[0].dest);
+
+	/*
+	 * Rarely, but it can so happen that system crashes before all
+	 * boot memory regions are registered for MPIPL. In such
+	 * cases, warn that the vmcore may not be accurate and proceed
+	 * anyway as that is the best bet considering free pages, cache
+	 * pages, user pages, etc are usually filtered out.
+	 *
+	 * Hope the memory that could not be preserved only has pages
+	 * that are usually filtered out while saving the vmcore.
+	 */
+	if (be16_to_cpu(fdm->region_cnt) > be16_to_cpu(fdm->registered_regions)) {
+		pr_warn("Not all memory regions were saved!!!\n");
+		pr_warn("  Unsaved memory regions:\n");
+		i = be16_to_cpu(fdm->registered_regions);
+		while (i < be16_to_cpu(fdm->region_cnt)) {
+			pr_warn("\t[%03d] base: 0x%llx, size: 0x%llx\n",
+				i, be64_to_cpu(fdm->rgn[i].src),
+				be64_to_cpu(fdm->rgn[i].size));
+			i++;
+		}
+
+		pr_warn("If the unsaved regions only contain pages that are filtered out (eg. free/user pages), the vmcore should still be usable.\n");
+		pr_warn("WARNING: If the unsaved regions contain kernel pages, the vmcore will be corrupted.\n");
+	}
+
+	fadump_conf->boot_mem_top = (fadump_conf->boot_memory_size + hole_size);
+	fadump_conf->boot_mem_regs_cnt = be16_to_cpu(fdm->region_cnt);
+	opal_fadump_update_config(fadump_conf, fdm);
+}
+
+/* Initialize kernel metadata */
+static void opal_fadump_init_metadata(struct opal_fadump_mem_struct *fdm)
+{
+	fdm->version = OPAL_FADUMP_VERSION;
+	fdm->region_cnt = cpu_to_be16(0);
+	fdm->registered_regions = cpu_to_be16(0);
+	fdm->fadumphdr_addr = cpu_to_be64(0);
+}
+
+static u64 opal_fadump_init_mem_struct(struct fw_dump *fadump_conf)
+{
+	u64 addr = fadump_conf->reserve_dump_area_start;
+	u16 reg_cnt;
+	int i;
+
+	opal_fdm = __va(fadump_conf->kernel_metadata);
+	opal_fadump_init_metadata(opal_fdm);
+
+	/* Boot memory regions */
+	reg_cnt = be16_to_cpu(opal_fdm->region_cnt);
+	for (i = 0; i < fadump_conf->boot_mem_regs_cnt; i++) {
+		opal_fdm->rgn[i].src	= cpu_to_be64(fadump_conf->boot_mem_addr[i]);
+		opal_fdm->rgn[i].dest	= cpu_to_be64(addr);
+		opal_fdm->rgn[i].size	= cpu_to_be64(fadump_conf->boot_mem_sz[i]);
+
+		reg_cnt++;
+		addr += fadump_conf->boot_mem_sz[i];
+	}
+	opal_fdm->region_cnt = cpu_to_be16(reg_cnt);
+
+	/*
+	 * Kernel metadata is passed to f/w and retrieved in capture kernel.
+	 * So, use it to save fadump header address instead of calculating it.
+	 */
+	opal_fdm->fadumphdr_addr = cpu_to_be64(be64_to_cpu(opal_fdm->rgn[0].dest) +
+					       fadump_conf->boot_memory_size);
+
+	opal_fadump_update_config(fadump_conf, opal_fdm);
+
+	return addr;
+}
+
+static u64 opal_fadump_get_metadata_size(void)
+{
+	return PAGE_ALIGN(sizeof(struct opal_fadump_mem_struct));
+}
+
+static int opal_fadump_setup_metadata(struct fw_dump *fadump_conf)
+{
+	int err = 0;
+	s64 ret;
+
+	/*
+	 * Use the last page(s) in FADump memory reservation for
+	 * kernel metadata.
+	 */
+	fadump_conf->kernel_metadata = (fadump_conf->reserve_dump_area_start +
+					fadump_conf->reserve_dump_area_size -
+					opal_fadump_get_metadata_size());
+	pr_info("Kernel metadata addr: %llx\n", fadump_conf->kernel_metadata);
+
+	/* Initialize kernel metadata before registering the address with f/w */
+	opal_fdm = __va(fadump_conf->kernel_metadata);
+	opal_fadump_init_metadata(opal_fdm);
+
+	/*
+	 * Register metadata address with f/w. Can be retrieved in
+	 * the capture kernel.
+	 */
+	ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_KERNEL,
+				      fadump_conf->kernel_metadata);
+	if (ret != OPAL_SUCCESS) {
+		pr_err("Failed to set kernel metadata tag!\n");
+		err = -EPERM;
+	}
+
+	/*
+	 * Register boot memory top address with f/w. Should be retrieved
+	 * by a kernel that intends to preserve crash'ed kernel's memory.
+	 */
+	ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_BOOT_MEM,
+				      fadump_conf->boot_mem_top);
+	if (ret != OPAL_SUCCESS) {
+		pr_err("Failed to set boot memory tag!\n");
+		err = -EPERM;
+	}
+
+	return err;
+}
+
+static u64 opal_fadump_get_bootmem_min(void)
+{
+	return OPAL_FADUMP_MIN_BOOT_MEM;
+}
+
+static int opal_fadump_register(struct fw_dump *fadump_conf)
+{
+	s64 rc = OPAL_PARAMETER;
+	u16 registered_regs;
+	int i, err = -EIO;
+
+	registered_regs = be16_to_cpu(opal_fdm->registered_regions);
+	for (i = 0; i < be16_to_cpu(opal_fdm->region_cnt); i++) {
+		rc = opal_mpipl_update(OPAL_MPIPL_ADD_RANGE,
+				       be64_to_cpu(opal_fdm->rgn[i].src),
+				       be64_to_cpu(opal_fdm->rgn[i].dest),
+				       be64_to_cpu(opal_fdm->rgn[i].size));
+		if (rc != OPAL_SUCCESS)
+			break;
+
+		registered_regs++;
+	}
+	opal_fdm->registered_regions = cpu_to_be16(registered_regs);
+
+	switch (rc) {
+	case OPAL_SUCCESS:
+		pr_info("Registration is successful!\n");
+		fadump_conf->dump_registered = 1;
+		err = 0;
+		break;
+	case OPAL_RESOURCE:
+		/* If MAX regions limit in f/w is hit, warn and proceed. */
+		pr_warn("%d regions could not be registered for MPIPL as MAX limit is reached!\n",
+			(be16_to_cpu(opal_fdm->region_cnt) -
+			 be16_to_cpu(opal_fdm->registered_regions)));
+		fadump_conf->dump_registered = 1;
+		err = 0;
+		break;
+	case OPAL_PARAMETER:
+		pr_err("Failed to register. Parameter Error(%lld).\n", rc);
+		break;
+	case OPAL_HARDWARE:
+		pr_err("Support not available.\n");
+		fadump_conf->fadump_supported = 0;
+		fadump_conf->fadump_enabled = 0;
+		break;
+	default:
+		pr_err("Failed to register. Unknown Error(%lld).\n", rc);
+		break;
+	}
+
+	/*
+	 * If some regions were registered before OPAL_MPIPL_ADD_RANGE
+	 * OPAL call failed, unregister all regions.
+	 */
+	if ((err < 0) && (be16_to_cpu(opal_fdm->registered_regions) > 0))
+		opal_fadump_unregister(fadump_conf);
+
+	return err;
+}
+
+static int opal_fadump_unregister(struct fw_dump *fadump_conf)
+{
+	s64 rc;
+
+	rc = opal_mpipl_update(OPAL_MPIPL_REMOVE_ALL, 0, 0, 0);
+	if (rc) {
+		pr_err("Failed to un-register - unexpected Error(%lld).\n", rc);
+		return -EIO;
+	}
+
+	opal_fdm->registered_regions = cpu_to_be16(0);
+	fadump_conf->dump_registered = 0;
+	return 0;
+}
+
+static int opal_fadump_invalidate(struct fw_dump *fadump_conf)
+{
+	s64 rc;
+
+	rc = opal_mpipl_update(OPAL_MPIPL_FREE_PRESERVED_MEMORY, 0, 0, 0);
+	if (rc) {
+		pr_err("Failed to invalidate - unexpected Error(%lld).\n", rc);
+		return -EIO;
+	}
+
+	fadump_conf->dump_active = 0;
+	opal_fdm_active = NULL;
+	return 0;
+}
+
+static void opal_fadump_cleanup(struct fw_dump *fadump_conf)
+{
+	s64 ret;
+
+	ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_KERNEL, 0);
+	if (ret != OPAL_SUCCESS)
+		pr_warn("Could not reset (%llu) kernel metadata tag!\n", ret);
+}
+
+/*
+ * Verify if CPU state data is available. If available, do a bit of sanity
+ * checking before processing this data.
+ */
+static bool __init is_opal_fadump_cpu_data_valid(struct fw_dump *fadump_conf)
+{
+	if (!opal_cpu_metadata)
+		return false;
+
+	fadump_conf->cpu_state_data_version =
+		be32_to_cpu(opal_cpu_metadata->cpu_data_version);
+	fadump_conf->cpu_state_entry_size =
+		be32_to_cpu(opal_cpu_metadata->cpu_data_size);
+	fadump_conf->cpu_state_dest_vaddr =
+		(u64)__va(be64_to_cpu(opal_cpu_metadata->region[0].dest));
+	fadump_conf->cpu_state_data_size =
+		be64_to_cpu(opal_cpu_metadata->region[0].size);
+
+	if (fadump_conf->cpu_state_data_version != HDAT_FADUMP_CPU_DATA_VER) {
+		pr_warn("Supported CPU state data version: %u, found: %d!\n",
+			HDAT_FADUMP_CPU_DATA_VER,
+			fadump_conf->cpu_state_data_version);
+		pr_warn("WARNING: F/W using newer CPU state data format!!\n");
+	}
+
+	if ((fadump_conf->cpu_state_dest_vaddr == 0) ||
+	    (fadump_conf->cpu_state_entry_size == 0) ||
+	    (fadump_conf->cpu_state_entry_size >
+	     fadump_conf->cpu_state_data_size)) {
+		pr_err("CPU state data is invalid. Ignoring!\n");
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * Convert CPU state data saved at the time of crash into ELF notes.
+ *
+ * While the crashing CPU's register data is saved by the kernel, CPU state
+ * data for all CPUs is saved by f/w. In CPU state data provided by f/w,
+ * each register entry is of 16 bytes, a numerical identifier along with
+ * a GPR/SPR flag in the first 8 bytes and the register value in the next
+ * 8 bytes. For more details refer to F/W documentation. If this data is
+ * missing or in unsupported format, append crashing CPU's register data
+ * saved by the kernel in the PT_NOTE, to have something to work with in
+ * the vmcore file.
+ */
+static int __init
+opal_fadump_build_cpu_notes(struct fw_dump *fadump_conf,
+			    struct fadump_crash_info_header *fdh)
+{
+	u32 thread_pir, size_per_thread, regs_offset, regs_cnt, reg_esize;
+	struct hdat_fadump_thread_hdr *thdr;
+	bool is_cpu_data_valid = false;
+	u32 num_cpus = 1, *note_buf;
+	struct pt_regs regs;
+	char *bufp;
+	int rc, i;
+
+	if (is_opal_fadump_cpu_data_valid(fadump_conf)) {
+		size_per_thread = fadump_conf->cpu_state_entry_size;
+		num_cpus = (fadump_conf->cpu_state_data_size / size_per_thread);
+		bufp = __va(fadump_conf->cpu_state_dest_vaddr);
+		is_cpu_data_valid = true;
+	}
+
+	rc = fadump_setup_cpu_notes_buf(num_cpus);
+	if (rc != 0)
+		return rc;
+
+	note_buf = (u32 *)fadump_conf->cpu_notes_buf_vaddr;
+	if (!is_cpu_data_valid)
+		goto out;
+
+	/*
+	 * Offset for register entries, entry size and registers count is
+	 * duplicated in every thread header in keeping with HDAT format.
+	 * Use these values from the first thread header.
+	 */
+	thdr = (struct hdat_fadump_thread_hdr *)bufp;
+	regs_offset = (offsetof(struct hdat_fadump_thread_hdr, offset) +
+		       be32_to_cpu(thdr->offset));
+	reg_esize = be32_to_cpu(thdr->esize);
+	regs_cnt  = be32_to_cpu(thdr->ecnt);
+
+	pr_debug("--------CPU State Data------------\n");
+	pr_debug("NumCpus     : %u\n", num_cpus);
+	pr_debug("\tOffset: %u, Entry size: %u, Cnt: %u\n",
+		 regs_offset, reg_esize, regs_cnt);
+
+	for (i = 0; i < num_cpus; i++, bufp += size_per_thread) {
+		thdr = (struct hdat_fadump_thread_hdr *)bufp;
+
+		thread_pir = be32_to_cpu(thdr->pir);
+		pr_debug("[%04d] PIR: 0x%x, core state: 0x%02x\n",
+			 i, thread_pir, thdr->core_state);
+
+		/*
+		 * If this is kernel initiated crash, crashing_cpu would be set
+		 * appropriately and register data of the crashing CPU saved by
+		 * crashing kernel. Add this saved register data of crashing CPU
+		 * to elf notes and populate the pt_regs for the remaining CPUs
+		 * from register state data provided by firmware.
+		 */
+		if (fdh->crashing_cpu == thread_pir) {
+			note_buf = fadump_regs_to_elf_notes(note_buf,
+							    &fdh->regs);
+			pr_debug("Crashing CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n",
+				 fdh->crashing_cpu, fdh->regs.gpr[1],
+				 fdh->regs.nip);
+			continue;
+		}
+
+		/*
+		 * Register state data of MAX cores is provided by firmware,
+		 * but some of this cores may not be active. So, while
+		 * processing register state data, check core state and
+		 * skip threads that belong to inactive cores.
+		 */
+		if (thdr->core_state == HDAT_FADUMP_CORE_INACTIVE)
+			continue;
+
+		opal_fadump_read_regs((bufp + regs_offset), regs_cnt,
+				      reg_esize, true, &regs);
+		note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+		pr_debug("CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n",
+			 thread_pir, regs.gpr[1], regs.nip);
+	}
+
+out:
+	/*
+	 * CPU state data is invalid/unsupported. Try appending crashing CPU's
+	 * register data, if it is saved by the kernel.
+	 */
+	if (fadump_conf->cpu_notes_buf_vaddr == (u64)note_buf) {
+		if (fdh->crashing_cpu == FADUMP_CPU_UNKNOWN) {
+			fadump_free_cpu_notes_buf();
+			return -ENODEV;
+		}
+
+		pr_warn("WARNING: appending only crashing CPU's register data\n");
+		note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs));
+	}
+
+	final_note(note_buf);
+
+	pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+		 fdh->elfcorehdr_addr);
+	fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr));
+	return 0;
+}
+
+static int __init opal_fadump_process(struct fw_dump *fadump_conf)
+{
+	struct fadump_crash_info_header *fdh;
+	int rc = -EINVAL;
+
+	if (!opal_fdm_active || !fadump_conf->fadumphdr_addr)
+		return rc;
+
+	/* Validate the fadump crash info header */
+	fdh = __va(fadump_conf->fadumphdr_addr);
+	if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+		pr_err("Crash info header is not valid.\n");
+		return rc;
+	}
+
+#ifdef CONFIG_OPAL_CORE
+	/*
+	 * If this is a kernel initiated crash, crashing_cpu would be set
+	 * appropriately and register data of the crashing CPU saved by
+	 * crashing kernel. Add this saved register data of crashing CPU
+	 * to elf notes and populate the pt_regs for the remaining CPUs
+	 * from register state data provided by firmware.
+	 */
+	if (fdh->crashing_cpu != FADUMP_CPU_UNKNOWN)
+		kernel_initiated = true;
+#endif
+
+	rc = opal_fadump_build_cpu_notes(fadump_conf, fdh);
+	if (rc)
+		return rc;
+
+	/*
+	 * We are done validating dump info and elfcore header is now ready
+	 * to be exported. set elfcorehdr_addr so that vmcore module will
+	 * export the elfcore header through '/proc/vmcore'.
+	 */
+	elfcorehdr_addr = fdh->elfcorehdr_addr;
+
+	return rc;
+}
+
+static void opal_fadump_region_show(struct fw_dump *fadump_conf,
+				    struct seq_file *m)
+{
+	const struct opal_fadump_mem_struct *fdm_ptr;
+	u64 dumped_bytes = 0;
+	int i;
+
+	if (fadump_conf->dump_active)
+		fdm_ptr = opal_fdm_active;
+	else
+		fdm_ptr = opal_fdm;
+
+	for (i = 0; i < be16_to_cpu(fdm_ptr->region_cnt); i++) {
+		/*
+		 * Only regions that are registered for MPIPL
+		 * would have dump data.
+		 */
+		if ((fadump_conf->dump_active) &&
+		    (i < be16_to_cpu(fdm_ptr->registered_regions)))
+			dumped_bytes = be64_to_cpu(fdm_ptr->rgn[i].size);
+
+		seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ",
+			   be64_to_cpu(fdm_ptr->rgn[i].src),
+			   be64_to_cpu(fdm_ptr->rgn[i].dest));
+		seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n",
+			   be64_to_cpu(fdm_ptr->rgn[i].size), dumped_bytes);
+	}
+
+	/* Dump is active. Show preserved area start address. */
+	if (fadump_conf->dump_active) {
+		seq_printf(m, "\nMemory above %#016llx is reserved for saving crash dump\n",
+			   fadump_conf->boot_mem_top);
+	}
+}
+
+static void opal_fadump_trigger(struct fadump_crash_info_header *fdh,
+				const char *msg)
+{
+	int rc;
+
+	/*
+	 * Unlike on pSeries platform, logical CPU number is not provided
+	 * with architected register state data. So, store the crashing
+	 * CPU's PIR instead to plug the appropriate register data for
+	 * crashing CPU in the vmcore file.
+	 */
+	fdh->crashing_cpu = (u32)mfspr(SPRN_PIR);
+
+	rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, msg);
+	if (rc == OPAL_UNSUPPORTED) {
+		pr_emerg("Reboot type %d not supported.\n",
+			 OPAL_REBOOT_MPIPL);
+	} else if (rc == OPAL_HARDWARE)
+		pr_emerg("No backend support for MPIPL!\n");
+}
+
+static struct fadump_ops opal_fadump_ops = {
+	.fadump_init_mem_struct		= opal_fadump_init_mem_struct,
+	.fadump_get_metadata_size	= opal_fadump_get_metadata_size,
+	.fadump_setup_metadata		= opal_fadump_setup_metadata,
+	.fadump_get_bootmem_min		= opal_fadump_get_bootmem_min,
+	.fadump_register		= opal_fadump_register,
+	.fadump_unregister		= opal_fadump_unregister,
+	.fadump_invalidate		= opal_fadump_invalidate,
+	.fadump_cleanup			= opal_fadump_cleanup,
+	.fadump_process			= opal_fadump_process,
+	.fadump_region_show		= opal_fadump_region_show,
+	.fadump_trigger			= opal_fadump_trigger,
+};
+
+void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
+{
+	const __be32 *prop;
+	unsigned long dn;
+	__be64 be_addr;
+	u64 addr = 0;
+	int i, len;
+	s64 ret;
+
+	/*
+	 * Check if Firmware-Assisted Dump is supported. if yes, check
+	 * if dump has been initiated on last reboot.
+	 */
+	dn = of_get_flat_dt_subnode_by_name(node, "dump");
+	if (dn == -FDT_ERR_NOTFOUND) {
+		pr_debug("FADump support is missing!\n");
+		return;
+	}
+
+	if (!of_flat_dt_is_compatible(dn, "ibm,opal-dump")) {
+		pr_err("Support missing for this f/w version!\n");
+		return;
+	}
+
+	prop = of_get_flat_dt_prop(dn, "fw-load-area", &len);
+	if (prop) {
+		/*
+		 * Each f/w load area is an (address,size) pair,
+		 * 2 cells each, totalling 4 cells per range.
+		 */
+		for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
+			u64 base, end;
+
+			base = of_read_number(prop + (i * 4) + 0, 2);
+			end = base;
+			end += of_read_number(prop + (i * 4) + 2, 2);
+			if (end > OPAL_FADUMP_MIN_BOOT_MEM) {
+				pr_err("F/W load area: 0x%llx-0x%llx\n",
+				       base, end);
+				pr_err("F/W version not supported!\n");
+				return;
+			}
+		}
+	}
+
+	fadump_conf->ops		= &opal_fadump_ops;
+	fadump_conf->fadump_supported	= 1;
+
+	/*
+	 * Firmware supports 32-bit field for size. Align it to PAGE_SIZE
+	 * and request firmware to copy multiple kernel boot memory regions.
+	 */
+	fadump_conf->max_copy_size = ALIGN_DOWN(U32_MAX, PAGE_SIZE);
+
+	/*
+	 * Check if dump has been initiated on last reboot.
+	 */
+	prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL);
+	if (!prop)
+		return;
+
+	ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &be_addr);
+	if ((ret != OPAL_SUCCESS) || !be_addr) {
+		pr_err("Failed to get Kernel metadata (%lld)\n", ret);
+		return;
+	}
+
+	addr = be64_to_cpu(be_addr);
+	pr_debug("Kernel metadata addr: %llx\n", addr);
+
+	opal_fdm_active = __va(addr);
+	if (opal_fdm_active->version != OPAL_FADUMP_VERSION) {
+		pr_warn("Supported kernel metadata version: %u, found: %d!\n",
+			OPAL_FADUMP_VERSION, opal_fdm_active->version);
+		pr_warn("WARNING: Kernel metadata format mismatch identified! Core file maybe corrupted..\n");
+	}
+
+	/* Kernel regions not registered with f/w for MPIPL */
+	if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) {
+		opal_fdm_active = NULL;
+		return;
+	}
+
+	ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &be_addr);
+	if (be_addr) {
+		addr = be64_to_cpu(be_addr);
+		pr_debug("CPU metadata addr: %llx\n", addr);
+		opal_cpu_metadata = __va(addr);
+	}
+
+	pr_info("Firmware-assisted dump is active.\n");
+	fadump_conf->dump_active = 1;
+	opal_fadump_get_config(fadump_conf, opal_fdm_active);
+}
+#endif /* !CONFIG_PRESERVE_FA_DUMP */
diff --git a/arch/powerpc/platforms/powernv/opal-fadump.h b/arch/powerpc/platforms/powernv/opal-fadump.h
new file mode 100644
index 0000000000..3f715efb0a
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-fadump.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Firmware-Assisted Dump support on POWER platform (OPAL).
+ *
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#ifndef _POWERNV_OPAL_FADUMP_H
+#define _POWERNV_OPAL_FADUMP_H
+
+#include <asm/reg.h>
+
+/*
+ * With kernel & initrd loaded at 512MB (with 256MB size), enforce a minimum
+ * boot memory size of 768MB to ensure f/w loading kernel and initrd doesn't
+ * mess with crash'ed kernel's memory during MPIPL.
+ */
+#define OPAL_FADUMP_MIN_BOOT_MEM		(0x30000000UL)
+
+/*
+ * OPAL FADump metadata structure format version
+ *
+ * OPAL FADump kernel metadata structure stores kernel metadata needed to
+ * register-for/process crash dump. Format version is used to keep a tab on
+ * the changes in the structure format. The changes, if any, to the format
+ * are expected to be minimal and backward compatible.
+ */
+#define OPAL_FADUMP_VERSION			0x1
+
+/*
+ * OPAL FADump kernel metadata
+ *
+ * The address of this structure will be registered with f/w for retrieving
+ * in the capture kernel to process the crash dump.
+ */
+struct opal_fadump_mem_struct {
+	u8	version;
+	u8	reserved[3];
+	__be16	region_cnt;		/* number of regions */
+	__be16	registered_regions;	/* Regions registered for MPIPL */
+	__be64	fadumphdr_addr;
+	struct opal_mpipl_region	rgn[FADUMP_MAX_MEM_REGS];
+} __packed;
+
+/*
+ * CPU state data
+ *
+ * CPU state data information is provided by f/w. The format for this data
+ * is defined in the HDAT spec. Version is used to keep a tab on the changes
+ * in this CPU state data format. Changes to this format are unlikely, but
+ * if there are any changes, please refer to latest HDAT specification.
+ */
+#define HDAT_FADUMP_CPU_DATA_VER		1
+
+#define HDAT_FADUMP_CORE_INACTIVE		(0x0F)
+
+/* HDAT thread header for register entries */
+struct hdat_fadump_thread_hdr {
+	__be32  pir;
+	/* 0x00 - 0x0F - The corresponding stop state of the core */
+	u8      core_state;
+	u8      reserved[3];
+
+	__be32	offset;	/* Offset to Register Entries array */
+	__be32	ecnt;	/* Number of entries */
+	__be32	esize;	/* Alloc size of each array entry in bytes */
+	__be32	eactsz;	/* Actual size of each array entry in bytes */
+} __packed;
+
+/* Register types populated by f/w */
+#define HDAT_FADUMP_REG_TYPE_GPR		0x01
+#define HDAT_FADUMP_REG_TYPE_SPR		0x02
+
+/* ID numbers used by f/w while populating certain registers */
+#define HDAT_FADUMP_REG_ID_NIP			0x7D0
+#define HDAT_FADUMP_REG_ID_MSR			0x7D1
+#define HDAT_FADUMP_REG_ID_CCR			0x7D2
+
+/* HDAT register entry. */
+struct hdat_fadump_reg_entry {
+	__be32		reg_type;
+	__be32		reg_num;
+	__be64		reg_val;
+} __packed;
+
+static inline void opal_fadump_set_regval_regnum(struct pt_regs *regs,
+						 u32 reg_type, u32 reg_num,
+						 u64 reg_val)
+{
+	if (reg_type == HDAT_FADUMP_REG_TYPE_GPR) {
+		if (reg_num < 32)
+			regs->gpr[reg_num] = reg_val;
+		return;
+	}
+
+	switch (reg_num) {
+	case SPRN_CTR:
+		regs->ctr = reg_val;
+		break;
+	case SPRN_LR:
+		regs->link = reg_val;
+		break;
+	case SPRN_XER:
+		regs->xer = reg_val;
+		break;
+	case SPRN_DAR:
+		regs->dar = reg_val;
+		break;
+	case SPRN_DSISR:
+		regs->dsisr = reg_val;
+		break;
+	case HDAT_FADUMP_REG_ID_NIP:
+		regs->nip = reg_val;
+		break;
+	case HDAT_FADUMP_REG_ID_MSR:
+		regs->msr = reg_val;
+		break;
+	case HDAT_FADUMP_REG_ID_CCR:
+		regs->ccr = reg_val;
+		break;
+	}
+}
+
+static inline void opal_fadump_read_regs(char *bufp, unsigned int regs_cnt,
+					 unsigned int reg_entry_size,
+					 bool cpu_endian,
+					 struct pt_regs *regs)
+{
+	struct hdat_fadump_reg_entry *reg_entry;
+	u64 val;
+	int i;
+
+	memset(regs, 0, sizeof(struct pt_regs));
+
+	for (i = 0; i < regs_cnt; i++, bufp += reg_entry_size) {
+		reg_entry = (struct hdat_fadump_reg_entry *)bufp;
+		val = (cpu_endian ? be64_to_cpu(reg_entry->reg_val) :
+		       (u64)(reg_entry->reg_val));
+		opal_fadump_set_regval_regnum(regs,
+					      be32_to_cpu(reg_entry->reg_type),
+					      be32_to_cpu(reg_entry->reg_num),
+					      val);
+	}
+}
+
+#endif /* _POWERNV_OPAL_FADUMP_H */
diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c
new file mode 100644
index 0000000000..d5ea04e8e4
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-flash.c
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL Firmware Update Interface
+ *
+ * Copyright 2013 IBM Corp.
+ */
+
+#define DEBUG
+
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/delay.h>
+
+#include <asm/opal.h>
+
+/* FLASH status codes */
+#define FLASH_NO_OP		-1099	/* No operation initiated by user */
+#define FLASH_NO_AUTH		-9002	/* Not a service authority partition */
+
+/* Validate image status values */
+#define VALIDATE_IMG_READY	-1001	/* Image ready for validation */
+#define VALIDATE_IMG_INCOMPLETE	-1002	/* User copied < VALIDATE_BUF_SIZE */
+
+/* Manage image status values */
+#define MANAGE_ACTIVE_ERR	-9001	/* Cannot overwrite active img */
+
+/* Flash image status values */
+#define FLASH_IMG_READY		0	/* Img ready for flash on reboot */
+#define FLASH_INVALID_IMG	-1003	/* Flash image shorter than expected */
+#define FLASH_IMG_NULL_DATA	-1004	/* Bad data in sg list entry */
+#define FLASH_IMG_BAD_LEN	-1005	/* Bad length in sg list entry */
+
+/* Manage operation tokens */
+#define FLASH_REJECT_TMP_SIDE	0	/* Reject temporary fw image */
+#define FLASH_COMMIT_TMP_SIDE	1	/* Commit temporary fw image */
+
+/* Update tokens */
+#define FLASH_UPDATE_CANCEL	0	/* Cancel update request */
+#define FLASH_UPDATE_INIT	1	/* Initiate update */
+
+/* Validate image update result tokens */
+#define VALIDATE_TMP_UPDATE	0     /* T side will be updated */
+#define VALIDATE_FLASH_AUTH	1     /* Partition does not have authority */
+#define VALIDATE_INVALID_IMG	2     /* Candidate image is not valid */
+#define VALIDATE_CUR_UNKNOWN	3     /* Current fixpack level is unknown */
+/*
+ * Current T side will be committed to P side before being replace with new
+ * image, and the new image is downlevel from current image
+ */
+#define VALIDATE_TMP_COMMIT_DL	4
+/*
+ * Current T side will be committed to P side before being replaced with new
+ * image
+ */
+#define VALIDATE_TMP_COMMIT	5
+/*
+ * T side will be updated with a downlevel image
+ */
+#define VALIDATE_TMP_UPDATE_DL	6
+/*
+ * The candidate image's release date is later than the system's firmware
+ * service entitlement date - service warranty period has expired
+ */
+#define VALIDATE_OUT_OF_WRNTY	7
+
+/* Validate buffer size */
+#define VALIDATE_BUF_SIZE	4096
+
+/* XXX: Assume candidate image size is <= 1GB */
+#define MAX_IMAGE_SIZE	0x40000000
+
+/* Image status */
+enum {
+	IMAGE_INVALID,
+	IMAGE_LOADING,
+	IMAGE_READY,
+};
+
+/* Candidate image data */
+struct image_data_t {
+	int		status;
+	void		*data;
+	uint32_t	size;
+};
+
+/* Candidate image header */
+struct image_header_t {
+	uint16_t	magic;
+	uint16_t	version;
+	uint32_t	size;
+};
+
+struct validate_flash_t {
+	int		status;		/* Return status */
+	void		*buf;		/* Candidate image buffer */
+	uint32_t	buf_size;	/* Image size */
+	uint32_t	result;		/* Update results token */
+};
+
+struct manage_flash_t {
+	int status;		/* Return status */
+};
+
+struct update_flash_t {
+	int status;		/* Return status */
+};
+
+static struct image_header_t	image_header;
+static struct image_data_t	image_data;
+static struct validate_flash_t	validate_flash_data;
+static struct manage_flash_t	manage_flash_data;
+
+/* Initialize update_flash_data status to No Operation */
+static struct update_flash_t	update_flash_data = {
+	.status = FLASH_NO_OP,
+};
+
+static DEFINE_MUTEX(image_data_mutex);
+
+/*
+ * Validate candidate image
+ */
+static inline void opal_flash_validate(void)
+{
+	long ret;
+	void *buf = validate_flash_data.buf;
+	__be32 size = cpu_to_be32(validate_flash_data.buf_size);
+	__be32 result;
+
+	ret = opal_validate_flash(__pa(buf), &size, &result);
+
+	validate_flash_data.status = ret;
+	validate_flash_data.buf_size = be32_to_cpu(size);
+	validate_flash_data.result = be32_to_cpu(result);
+}
+
+/*
+ * Validate output format:
+ *     validate result token
+ *     current image version details
+ *     new image version details
+ */
+static ssize_t validate_show(struct kobject *kobj,
+			     struct kobj_attribute *attr, char *buf)
+{
+	struct validate_flash_t *args_buf = &validate_flash_data;
+	int len;
+
+	/* Candidate image is not validated */
+	if (args_buf->status < VALIDATE_TMP_UPDATE) {
+		len = sprintf(buf, "%d\n", args_buf->status);
+		goto out;
+	}
+
+	/* Result token */
+	len = sprintf(buf, "%d\n", args_buf->result);
+
+	/* Current and candidate image version details */
+	if ((args_buf->result != VALIDATE_TMP_UPDATE) &&
+	    (args_buf->result < VALIDATE_CUR_UNKNOWN))
+		goto out;
+
+	if (args_buf->buf_size > (VALIDATE_BUF_SIZE - len)) {
+		memcpy(buf + len, args_buf->buf, VALIDATE_BUF_SIZE - len);
+		len = VALIDATE_BUF_SIZE;
+	} else {
+		memcpy(buf + len, args_buf->buf, args_buf->buf_size);
+		len += args_buf->buf_size;
+	}
+out:
+	/* Set status to default */
+	args_buf->status = FLASH_NO_OP;
+	return len;
+}
+
+/*
+ * Validate candidate firmware image
+ *
+ * Note:
+ *   We are only interested in first 4K bytes of the
+ *   candidate image.
+ */
+static ssize_t validate_store(struct kobject *kobj,
+			      struct kobj_attribute *attr,
+			      const char *buf, size_t count)
+{
+	struct validate_flash_t *args_buf = &validate_flash_data;
+
+	if (buf[0] != '1')
+		return -EINVAL;
+
+	mutex_lock(&image_data_mutex);
+
+	if (image_data.status != IMAGE_READY ||
+	    image_data.size < VALIDATE_BUF_SIZE) {
+		args_buf->result = VALIDATE_INVALID_IMG;
+		args_buf->status = VALIDATE_IMG_INCOMPLETE;
+		goto out;
+	}
+
+	/* Copy first 4k bytes of candidate image */
+	memcpy(args_buf->buf, image_data.data, VALIDATE_BUF_SIZE);
+
+	args_buf->status = VALIDATE_IMG_READY;
+	args_buf->buf_size = VALIDATE_BUF_SIZE;
+
+	/* Validate candidate image */
+	opal_flash_validate();
+
+out:
+	mutex_unlock(&image_data_mutex);
+	return count;
+}
+
+/*
+ * Manage flash routine
+ */
+static inline void opal_flash_manage(uint8_t op)
+{
+	struct manage_flash_t *const args_buf = &manage_flash_data;
+
+	args_buf->status = opal_manage_flash(op);
+}
+
+/*
+ * Show manage flash status
+ */
+static ssize_t manage_show(struct kobject *kobj,
+			   struct kobj_attribute *attr, char *buf)
+{
+	struct manage_flash_t *const args_buf = &manage_flash_data;
+	int rc;
+
+	rc = sprintf(buf, "%d\n", args_buf->status);
+	/* Set status to default*/
+	args_buf->status = FLASH_NO_OP;
+	return rc;
+}
+
+/*
+ * Manage operations:
+ *   0 - Reject
+ *   1 - Commit
+ */
+static ssize_t manage_store(struct kobject *kobj,
+			    struct kobj_attribute *attr,
+			    const char *buf, size_t count)
+{
+	uint8_t op;
+	switch (buf[0]) {
+	case '0':
+		op = FLASH_REJECT_TMP_SIDE;
+		break;
+	case '1':
+		op = FLASH_COMMIT_TMP_SIDE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* commit/reject temporary image */
+	opal_flash_manage(op);
+	return count;
+}
+
+/*
+ * OPAL update flash
+ */
+static int opal_flash_update(int op)
+{
+	struct opal_sg_list *list;
+	unsigned long addr;
+	int64_t rc = OPAL_PARAMETER;
+
+	if (op == FLASH_UPDATE_CANCEL) {
+		pr_alert("FLASH: Image update cancelled\n");
+		addr = '\0';
+		goto flash;
+	}
+
+	list = opal_vmalloc_to_sg_list(image_data.data, image_data.size);
+	if (!list)
+		goto invalid_img;
+
+	/* First entry address */
+	addr = __pa(list);
+
+flash:
+	rc = opal_update_flash(addr);
+
+invalid_img:
+	return rc;
+}
+
+/* This gets called just before system reboots */
+void opal_flash_update_print_message(void)
+{
+	if (update_flash_data.status != FLASH_IMG_READY)
+		return;
+
+	pr_alert("FLASH: Flashing new firmware\n");
+	pr_alert("FLASH: Image is %u bytes\n", image_data.size);
+	pr_alert("FLASH: Performing flash and reboot/shutdown\n");
+	pr_alert("FLASH: This will take several minutes. Do not power off!\n");
+
+	/* Small delay to help getting the above message out */
+	msleep(500);
+}
+
+/*
+ * Show candidate image status
+ */
+static ssize_t update_show(struct kobject *kobj,
+			   struct kobj_attribute *attr, char *buf)
+{
+	struct update_flash_t *const args_buf = &update_flash_data;
+	return sprintf(buf, "%d\n", args_buf->status);
+}
+
+/*
+ * Set update image flag
+ *  1 - Flash new image
+ *  0 - Cancel flash request
+ */
+static ssize_t update_store(struct kobject *kobj,
+			    struct kobj_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct update_flash_t *const args_buf = &update_flash_data;
+	int rc = count;
+
+	mutex_lock(&image_data_mutex);
+
+	switch (buf[0]) {
+	case '0':
+		if (args_buf->status == FLASH_IMG_READY)
+			opal_flash_update(FLASH_UPDATE_CANCEL);
+		args_buf->status = FLASH_NO_OP;
+		break;
+	case '1':
+		/* Image is loaded? */
+		if (image_data.status == IMAGE_READY)
+			args_buf->status =
+				opal_flash_update(FLASH_UPDATE_INIT);
+		else
+			args_buf->status = FLASH_INVALID_IMG;
+		break;
+	default:
+		rc = -EINVAL;
+	}
+
+	mutex_unlock(&image_data_mutex);
+	return rc;
+}
+
+/*
+ * Free image buffer
+ */
+static void free_image_buf(void)
+{
+	void *addr;
+	int size;
+
+	addr = image_data.data;
+	size = PAGE_ALIGN(image_data.size);
+	while (size > 0) {
+		ClearPageReserved(vmalloc_to_page(addr));
+		addr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+	vfree(image_data.data);
+	image_data.data = NULL;
+	image_data.status = IMAGE_INVALID;
+}
+
+/*
+ * Allocate image buffer.
+ */
+static int alloc_image_buf(char *buffer, size_t count)
+{
+	void *addr;
+	int size;
+
+	if (count < sizeof(image_header)) {
+		pr_warn("FLASH: Invalid candidate image\n");
+		return -EINVAL;
+	}
+
+	memcpy(&image_header, (void *)buffer, sizeof(image_header));
+	image_data.size = be32_to_cpu(image_header.size);
+	pr_debug("FLASH: Candidate image size = %u\n", image_data.size);
+
+	if (image_data.size > MAX_IMAGE_SIZE) {
+		pr_warn("FLASH: Too large image\n");
+		return -EINVAL;
+	}
+	if (image_data.size < VALIDATE_BUF_SIZE) {
+		pr_warn("FLASH: Image is shorter than expected\n");
+		return -EINVAL;
+	}
+
+	image_data.data = vzalloc(PAGE_ALIGN(image_data.size));
+	if (!image_data.data) {
+		pr_err("%s : Failed to allocate memory\n", __func__);
+		return -ENOMEM;
+	}
+
+	/* Pin memory */
+	addr = image_data.data;
+	size = PAGE_ALIGN(image_data.size);
+	while (size > 0) {
+		SetPageReserved(vmalloc_to_page(addr));
+		addr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+
+	image_data.status = IMAGE_LOADING;
+	return 0;
+}
+
+/*
+ * Copy candidate image
+ *
+ * Parse candidate image header to get total image size
+ * and pre-allocate required memory.
+ */
+static ssize_t image_data_write(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *bin_attr,
+				char *buffer, loff_t pos, size_t count)
+{
+	int rc;
+
+	mutex_lock(&image_data_mutex);
+
+	/* New image ? */
+	if (pos == 0) {
+		/* Free memory, if already allocated */
+		if (image_data.data)
+			free_image_buf();
+
+		/* Cancel outstanding image update request */
+		if (update_flash_data.status == FLASH_IMG_READY)
+			opal_flash_update(FLASH_UPDATE_CANCEL);
+
+		/* Allocate memory */
+		rc = alloc_image_buf(buffer, count);
+		if (rc)
+			goto out;
+	}
+
+	if (image_data.status != IMAGE_LOADING) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	if ((pos + count) > image_data.size) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	memcpy(image_data.data + pos, (void *)buffer, count);
+	rc = count;
+
+	/* Set image status */
+	if ((pos + count) == image_data.size) {
+		pr_debug("FLASH: Candidate image loaded....\n");
+		image_data.status = IMAGE_READY;
+	}
+
+out:
+	mutex_unlock(&image_data_mutex);
+	return rc;
+}
+
+/*
+ * sysfs interface :
+ *  OPAL uses below sysfs files for code update.
+ *  We create these files under /sys/firmware/opal.
+ *
+ *   image		: Interface to load candidate firmware image
+ *   validate_flash	: Validate firmware image
+ *   manage_flash	: Commit/Reject firmware image
+ *   update_flash	: Flash new firmware image
+ *
+ */
+static const struct bin_attribute image_data_attr = {
+	.attr = {.name = "image", .mode = 0200},
+	.size = MAX_IMAGE_SIZE,	/* Limit image size */
+	.write = image_data_write,
+};
+
+static struct kobj_attribute validate_attribute =
+	__ATTR(validate_flash, 0600, validate_show, validate_store);
+
+static struct kobj_attribute manage_attribute =
+	__ATTR(manage_flash, 0600, manage_show, manage_store);
+
+static struct kobj_attribute update_attribute =
+	__ATTR(update_flash, 0600, update_show, update_store);
+
+static struct attribute *image_op_attrs[] = {
+	&validate_attribute.attr,
+	&manage_attribute.attr,
+	&update_attribute.attr,
+	NULL	/* need to NULL terminate the list of attributes */
+};
+
+static const struct attribute_group image_op_attr_group = {
+	.attrs = image_op_attrs,
+};
+
+void __init opal_flash_update_init(void)
+{
+	int ret;
+
+	/* Firmware update is not supported by firmware */
+	if (!opal_check_token(OPAL_FLASH_VALIDATE))
+		return;
+
+	/* Allocate validate image buffer */
+	validate_flash_data.buf = kzalloc(VALIDATE_BUF_SIZE, GFP_KERNEL);
+	if (!validate_flash_data.buf) {
+		pr_err("%s : Failed to allocate memory\n", __func__);
+		return;
+	}
+
+	/* Make sure /sys/firmware/opal directory is created */
+	if (!opal_kobj) {
+		pr_warn("FLASH: opal kobject is not available\n");
+		goto nokobj;
+	}
+
+	/* Create the sysfs files */
+	ret = sysfs_create_group(opal_kobj, &image_op_attr_group);
+	if (ret) {
+		pr_warn("FLASH: Failed to create sysfs files\n");
+		goto nokobj;
+	}
+
+	ret = sysfs_create_bin_file(opal_kobj, &image_data_attr);
+	if (ret) {
+		pr_warn("FLASH: Failed to create sysfs files\n");
+		goto nosysfs_file;
+	}
+
+	/* Set default status */
+	validate_flash_data.status = FLASH_NO_OP;
+	manage_flash_data.status = FLASH_NO_OP;
+	update_flash_data.status = FLASH_NO_OP;
+	image_data.status = IMAGE_INVALID;
+	return;
+
+nosysfs_file:
+	sysfs_remove_group(opal_kobj, &image_op_attr_group);
+
+nokobj:
+	kfree(validate_flash_data.buf);
+	return;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
new file mode 100644
index 0000000000..f0c1830deb
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -0,0 +1,381 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * OPAL hypervisor Maintenance interrupt handling support in PowerNV.
+ *
+ * Copyright 2014 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+#include <asm/cputable.h>
+#include <asm/machdep.h>
+
+#include "powernv.h"
+
+static int opal_hmi_handler_nb_init;
+struct OpalHmiEvtNode {
+	struct list_head list;
+	struct OpalHMIEvent hmi_evt;
+};
+
+struct xstop_reason {
+	uint32_t xstop_reason;
+	const char *unit_failed;
+	const char *description;
+};
+
+static LIST_HEAD(opal_hmi_evt_list);
+static DEFINE_SPINLOCK(opal_hmi_evt_lock);
+
+static void print_core_checkstop_reason(const char *level,
+					struct OpalHMIEvent *hmi_evt)
+{
+	int i;
+	static const struct xstop_reason xstop_reason[] = {
+		{ CORE_CHECKSTOP_IFU_REGFILE, "IFU",
+				"RegFile core check stop" },
+		{ CORE_CHECKSTOP_IFU_LOGIC, "IFU", "Logic core check stop" },
+		{ CORE_CHECKSTOP_PC_DURING_RECOV, "PC",
+				"Core checkstop during recovery" },
+		{ CORE_CHECKSTOP_ISU_REGFILE, "ISU",
+				"RegFile core check stop (mapper error)" },
+		{ CORE_CHECKSTOP_ISU_LOGIC, "ISU", "Logic core check stop" },
+		{ CORE_CHECKSTOP_FXU_LOGIC, "FXU", "Logic core check stop" },
+		{ CORE_CHECKSTOP_VSU_LOGIC, "VSU", "Logic core check stop" },
+		{ CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE, "PC",
+				"Recovery in maintenance mode" },
+		{ CORE_CHECKSTOP_LSU_REGFILE, "LSU",
+				"RegFile core check stop" },
+		{ CORE_CHECKSTOP_PC_FWD_PROGRESS, "PC",
+				"Forward Progress Error" },
+		{ CORE_CHECKSTOP_LSU_LOGIC, "LSU", "Logic core check stop" },
+		{ CORE_CHECKSTOP_PC_LOGIC, "PC", "Logic core check stop" },
+		{ CORE_CHECKSTOP_PC_HYP_RESOURCE, "PC",
+				"Hypervisor Resource error - core check stop" },
+		{ CORE_CHECKSTOP_PC_HANG_RECOV_FAILED, "PC",
+				"Hang Recovery Failed (core check stop)" },
+		{ CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED, "PC",
+				"Ambiguous Hang Detected (unknown source)" },
+		{ CORE_CHECKSTOP_PC_DEBUG_TRIG_ERR_INJ, "PC",
+				"Debug Trigger Error inject" },
+		{ CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ, "PC",
+				"Hypervisor check stop via SPRC/SPRD" },
+	};
+
+	/* Validity check */
+	if (!hmi_evt->u.xstop_error.xstop_reason) {
+		printk("%s	Unknown Core check stop.\n", level);
+		return;
+	}
+
+	printk("%s	CPU PIR: %08x\n", level,
+			be32_to_cpu(hmi_evt->u.xstop_error.u.pir));
+	for (i = 0; i < ARRAY_SIZE(xstop_reason); i++)
+		if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) &
+					xstop_reason[i].xstop_reason)
+			printk("%s	[Unit: %-3s] %s\n", level,
+					xstop_reason[i].unit_failed,
+					xstop_reason[i].description);
+}
+
+static void print_nx_checkstop_reason(const char *level,
+					struct OpalHMIEvent *hmi_evt)
+{
+	int i;
+	static const struct xstop_reason xstop_reason[] = {
+		{ NX_CHECKSTOP_SHM_INVAL_STATE_ERR, "DMA & Engine",
+					"SHM invalid state error" },
+		{ NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1, "DMA & Engine",
+					"DMA invalid state error bit 15" },
+		{ NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2, "DMA & Engine",
+					"DMA invalid state error bit 16" },
+		{ NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR, "DMA & Engine",
+					"Channel 0 invalid state error" },
+		{ NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR, "DMA & Engine",
+					"Channel 1 invalid state error" },
+		{ NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR, "DMA & Engine",
+					"Channel 2 invalid state error" },
+		{ NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR, "DMA & Engine",
+					"Channel 3 invalid state error" },
+		{ NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR, "DMA & Engine",
+					"Channel 4 invalid state error" },
+		{ NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR, "DMA & Engine",
+					"Channel 5 invalid state error" },
+		{ NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR, "DMA & Engine",
+					"Channel 6 invalid state error" },
+		{ NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR, "DMA & Engine",
+					"Channel 7 invalid state error" },
+		{ NX_CHECKSTOP_DMA_CRB_UE, "DMA & Engine",
+					"UE error on CRB(CSB address, CCB)" },
+		{ NX_CHECKSTOP_DMA_CRB_SUE, "DMA & Engine",
+					"SUE error on CRB(CSB address, CCB)" },
+		{ NX_CHECKSTOP_PBI_ISN_UE, "PowerBus Interface",
+		"CRB Kill ISN received while holding ISN with UE error" },
+	};
+
+	/* Validity check */
+	if (!hmi_evt->u.xstop_error.xstop_reason) {
+		printk("%s	Unknown NX check stop.\n", level);
+		return;
+	}
+
+	printk("%s	NX checkstop on CHIP ID: %x\n", level,
+			be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id));
+	for (i = 0; i < ARRAY_SIZE(xstop_reason); i++)
+		if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) &
+					xstop_reason[i].xstop_reason)
+			printk("%s	[Unit: %-3s] %s\n", level,
+					xstop_reason[i].unit_failed,
+					xstop_reason[i].description);
+}
+
+static void print_npu_checkstop_reason(const char *level,
+					struct OpalHMIEvent *hmi_evt)
+{
+	uint8_t reason, reason_count, i;
+
+	/*
+	 * We may not have a checkstop reason on some combination of
+	 * hardware and/or skiboot version
+	 */
+	if (!hmi_evt->u.xstop_error.xstop_reason) {
+		printk("%s	NPU checkstop on chip %x\n", level,
+			be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id));
+		return;
+	}
+
+	/*
+	 * NPU2 has 3 FIRs. Reason encoded on a byte as:
+	 *   2 bits for the FIR number
+	 *   6 bits for the bit number
+	 * It may be possible to find several reasons.
+	 *
+	 * We don't display a specific message per FIR bit as there
+	 * are too many and most are meaningless without the workbook
+	 * and/or hw team help anyway.
+	 */
+	reason_count = sizeof(hmi_evt->u.xstop_error.xstop_reason) /
+		sizeof(reason);
+	for (i = 0; i < reason_count; i++) {
+		reason = (hmi_evt->u.xstop_error.xstop_reason >> (8 * i)) & 0xFF;
+		if (reason)
+			printk("%s	NPU checkstop on chip %x: FIR%d bit %d is set\n",
+				level,
+				be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id),
+				reason >> 6, reason & 0x3F);
+	}
+}
+
+static void print_checkstop_reason(const char *level,
+					struct OpalHMIEvent *hmi_evt)
+{
+	uint8_t type = hmi_evt->u.xstop_error.xstop_type;
+	switch (type) {
+	case CHECKSTOP_TYPE_CORE:
+		print_core_checkstop_reason(level, hmi_evt);
+		break;
+	case CHECKSTOP_TYPE_NX:
+		print_nx_checkstop_reason(level, hmi_evt);
+		break;
+	case CHECKSTOP_TYPE_NPU:
+		print_npu_checkstop_reason(level, hmi_evt);
+		break;
+	default:
+		printk("%s	Unknown Malfunction Alert of type %d\n",
+		       level, type);
+		break;
+	}
+}
+
+static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
+{
+	const char *level, *sevstr, *error_info;
+	static const char *hmi_error_types[] = {
+		"Malfunction Alert",
+		"Processor Recovery done",
+		"Processor recovery occurred again",
+		"Processor recovery occurred for masked error",
+		"Timer facility experienced an error",
+		"TFMR SPR is corrupted",
+		"UPS (Uninterrupted Power System) Overflow indication",
+		"An XSCOM operation failure",
+		"An XSCOM operation completed",
+		"SCOM has set a reserved FIR bit to cause recovery",
+		"Debug trigger has set a reserved FIR bit to cause recovery",
+		"A hypervisor resource error occurred",
+		"CAPP recovery process is in progress",
+	};
+	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+				      DEFAULT_RATELIMIT_BURST);
+
+	/* Print things out */
+	if (hmi_evt->version < OpalHMIEvt_V1) {
+		pr_err("HMI Interrupt, Unknown event version %d !\n",
+			hmi_evt->version);
+		return;
+	}
+	switch (hmi_evt->severity) {
+	case OpalHMI_SEV_NO_ERROR:
+		level = KERN_INFO;
+		sevstr = "Harmless";
+		break;
+	case OpalHMI_SEV_WARNING:
+		level = KERN_WARNING;
+		sevstr = "";
+		break;
+	case OpalHMI_SEV_ERROR_SYNC:
+		level = KERN_ERR;
+		sevstr = "Severe";
+		break;
+	case OpalHMI_SEV_FATAL:
+	default:
+		level = KERN_ERR;
+		sevstr = "Fatal";
+		break;
+	}
+
+	if (hmi_evt->severity != OpalHMI_SEV_NO_ERROR || __ratelimit(&rs)) {
+		printk("%s%s Hypervisor Maintenance interrupt [%s]\n",
+			level, sevstr,
+			hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ?
+			"Recovered" : "Not recovered");
+		error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ?
+				hmi_error_types[hmi_evt->type]
+				: "Unknown";
+		printk("%s Error detail: %s\n", level, error_info);
+		printk("%s	HMER: %016llx\n", level,
+					be64_to_cpu(hmi_evt->hmer));
+		if ((hmi_evt->type == OpalHMI_ERROR_TFAC) ||
+			(hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY))
+			printk("%s	TFMR: %016llx\n", level,
+						be64_to_cpu(hmi_evt->tfmr));
+	}
+
+	if (hmi_evt->version < OpalHMIEvt_V2)
+		return;
+
+	/* OpalHMIEvt_V2 and above provides reason for malfunction alert. */
+	if (hmi_evt->type == OpalHMI_ERROR_MALFUNC_ALERT)
+		print_checkstop_reason(level, hmi_evt);
+}
+
+static void hmi_event_handler(struct work_struct *work)
+{
+	unsigned long flags;
+	struct OpalHMIEvent *hmi_evt;
+	struct OpalHmiEvtNode *msg_node;
+	uint8_t disposition;
+	struct opal_msg msg;
+	int unrecoverable = 0;
+
+	spin_lock_irqsave(&opal_hmi_evt_lock, flags);
+	while (!list_empty(&opal_hmi_evt_list)) {
+		msg_node = list_entry(opal_hmi_evt_list.next,
+					   struct OpalHmiEvtNode, list);
+		list_del(&msg_node->list);
+		spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
+
+		hmi_evt = (struct OpalHMIEvent *) &msg_node->hmi_evt;
+		print_hmi_event_info(hmi_evt);
+		disposition = hmi_evt->disposition;
+		kfree(msg_node);
+
+		/*
+		 * Check if HMI event has been recovered or not. If not
+		 * then kernel can't continue, we need to panic.
+		 * But before we do that, display all the HMI event
+		 * available on the list and set unrecoverable flag to 1.
+		 */
+		if (disposition != OpalHMI_DISPOSITION_RECOVERED)
+			unrecoverable = 1;
+
+		spin_lock_irqsave(&opal_hmi_evt_lock, flags);
+	}
+	spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
+
+	if (unrecoverable) {
+		/* Pull all HMI events from OPAL before we panic. */
+		while (opal_get_msg(__pa(&msg), sizeof(msg)) == OPAL_SUCCESS) {
+			u32 type;
+
+			type = be32_to_cpu(msg.msg_type);
+
+			/* skip if not HMI event */
+			if (type != OPAL_MSG_HMI_EVT)
+				continue;
+
+			/* HMI event info starts from param[0] */
+			hmi_evt = (struct OpalHMIEvent *)&msg.params[0];
+			print_hmi_event_info(hmi_evt);
+		}
+
+		pnv_platform_error_reboot(NULL, "Unrecoverable HMI exception");
+	}
+}
+
+static DECLARE_WORK(hmi_event_work, hmi_event_handler);
+/*
+ * opal_handle_hmi_event - notifier handler that queues up HMI events
+ * to be preocessed later.
+ */
+static int opal_handle_hmi_event(struct notifier_block *nb,
+			  unsigned long msg_type, void *msg)
+{
+	unsigned long flags;
+	struct OpalHMIEvent *hmi_evt;
+	struct opal_msg *hmi_msg = msg;
+	struct OpalHmiEvtNode *msg_node;
+
+	/* Sanity Checks */
+	if (msg_type != OPAL_MSG_HMI_EVT)
+		return 0;
+
+	/* HMI event info starts from param[0] */
+	hmi_evt = (struct OpalHMIEvent *)&hmi_msg->params[0];
+
+	/* Delay the logging of HMI events to workqueue. */
+	msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
+	if (!msg_node) {
+		pr_err("HMI: out of memory, Opal message event not handled\n");
+		return -ENOMEM;
+	}
+	memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(*hmi_evt));
+
+	spin_lock_irqsave(&opal_hmi_evt_lock, flags);
+	list_add(&msg_node->list, &opal_hmi_evt_list);
+	spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
+
+	schedule_work(&hmi_event_work);
+	return 0;
+}
+
+static struct notifier_block opal_hmi_handler_nb = {
+	.notifier_call	= opal_handle_hmi_event,
+	.next		= NULL,
+	.priority	= 0,
+};
+
+int __init opal_hmi_handler_init(void)
+{
+	int ret;
+
+	if (!opal_hmi_handler_nb_init) {
+		ret = opal_message_notifier_register(
+				OPAL_MSG_HMI_EVT, &opal_hmi_handler_nb);
+		if (ret) {
+			pr_err("%s: Can't register OPAL event notifier (%d)\n",
+			       __func__, ret);
+			return ret;
+		}
+		opal_hmi_handler_nb_init = 1;
+	}
+	return 0;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
new file mode 100644
index 0000000000..828fc4d884
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * OPAL IMC interface detection driver
+ * Supported on POWERNV platform
+ *
+ * Copyright	(C) 2017 Madhavan Srinivasan, IBM Corporation.
+ *		(C) 2017 Anju T Sudhakar, IBM Corporation.
+ *		(C) 2017 Hemant K Shaw, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/crash_dump.h>
+#include <linux/debugfs.h>
+#include <asm/opal.h>
+#include <asm/io.h>
+#include <asm/imc-pmu.h>
+#include <asm/cputhreads.h>
+
+static struct dentry *imc_debugfs_parent;
+
+/* Helpers to export imc command and mode via debugfs */
+static int imc_mem_get(void *data, u64 *val)
+{
+	*val = cpu_to_be64(*(u64 *)data);
+	return 0;
+}
+
+static int imc_mem_set(void *data, u64 val)
+{
+	*(u64 *)data = cpu_to_be64(val);
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_imc_x64, imc_mem_get, imc_mem_set, "0x%016llx\n");
+
+static void imc_debugfs_create_x64(const char *name, umode_t mode,
+				   struct dentry *parent, u64  *value)
+{
+	debugfs_create_file_unsafe(name, mode, parent, value, &fops_imc_x64);
+}
+
+/*
+ * export_imc_mode_and_cmd: Create a debugfs interface
+ *                     for imc_cmd and imc_mode
+ *                     for each node in the system.
+ *  imc_mode and imc_cmd can be changed by echo into
+ *  this interface.
+ */
+static void export_imc_mode_and_cmd(struct device_node *node,
+				    struct imc_pmu *pmu_ptr)
+{
+	static u64 loc, *imc_mode_addr, *imc_cmd_addr;
+	char mode[16], cmd[16];
+	u32 cb_offset;
+	struct imc_mem_info *ptr = pmu_ptr->mem_info;
+
+	imc_debugfs_parent = debugfs_create_dir("imc", arch_debugfs_dir);
+
+	if (of_property_read_u32(node, "cb_offset", &cb_offset))
+		cb_offset = IMC_CNTL_BLK_OFFSET;
+
+	while (ptr->vbase != NULL) {
+		loc = (u64)(ptr->vbase) + cb_offset;
+		imc_mode_addr = (u64 *)(loc + IMC_CNTL_BLK_MODE_OFFSET);
+		sprintf(mode, "imc_mode_%d", (u32)(ptr->id));
+		imc_debugfs_create_x64(mode, 0600, imc_debugfs_parent,
+				       imc_mode_addr);
+
+		imc_cmd_addr = (u64 *)(loc + IMC_CNTL_BLK_CMD_OFFSET);
+		sprintf(cmd, "imc_cmd_%d", (u32)(ptr->id));
+		imc_debugfs_create_x64(cmd, 0600, imc_debugfs_parent,
+				       imc_cmd_addr);
+		ptr++;
+	}
+}
+
+/*
+ * imc_get_mem_addr_nest: Function to get nest counter memory region
+ * for each chip
+ */
+static int imc_get_mem_addr_nest(struct device_node *node,
+				 struct imc_pmu *pmu_ptr,
+				 u32 offset)
+{
+	int nr_chips = 0, i;
+	u64 *base_addr_arr, baddr;
+	u32 *chipid_arr;
+
+	nr_chips = of_property_count_u32_elems(node, "chip-id");
+	if (nr_chips <= 0)
+		return -ENODEV;
+
+	base_addr_arr = kcalloc(nr_chips, sizeof(*base_addr_arr), GFP_KERNEL);
+	if (!base_addr_arr)
+		return -ENOMEM;
+
+	chipid_arr = kcalloc(nr_chips, sizeof(*chipid_arr), GFP_KERNEL);
+	if (!chipid_arr) {
+		kfree(base_addr_arr);
+		return -ENOMEM;
+	}
+
+	if (of_property_read_u32_array(node, "chip-id", chipid_arr, nr_chips))
+		goto error;
+
+	if (of_property_read_u64_array(node, "base-addr", base_addr_arr,
+								nr_chips))
+		goto error;
+
+	pmu_ptr->mem_info = kcalloc(nr_chips + 1, sizeof(*pmu_ptr->mem_info),
+				    GFP_KERNEL);
+	if (!pmu_ptr->mem_info)
+		goto error;
+
+	for (i = 0; i < nr_chips; i++) {
+		pmu_ptr->mem_info[i].id = chipid_arr[i];
+		baddr = base_addr_arr[i] + offset;
+		pmu_ptr->mem_info[i].vbase = phys_to_virt(baddr);
+	}
+
+	pmu_ptr->imc_counter_mmaped = true;
+	kfree(base_addr_arr);
+	kfree(chipid_arr);
+	return 0;
+
+error:
+	kfree(base_addr_arr);
+	kfree(chipid_arr);
+	return -1;
+}
+
+/*
+ * imc_pmu_create : Takes the parent device which is the pmu unit, pmu_index
+ *		    and domain as the inputs.
+ * Allocates memory for the struct imc_pmu, sets up its domain, size and offsets
+ */
+static struct imc_pmu *imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
+{
+	int ret = 0;
+	struct imc_pmu *pmu_ptr;
+	u32 offset;
+
+	/* Return for unknown domain */
+	if (domain < 0)
+		return NULL;
+
+	/* memory for pmu */
+	pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL);
+	if (!pmu_ptr)
+		return NULL;
+
+	/* Set the domain */
+	pmu_ptr->domain = domain;
+
+	ret = of_property_read_u32(parent, "size", &pmu_ptr->counter_mem_size);
+	if (ret)
+		goto free_pmu;
+
+	if (!of_property_read_u32(parent, "offset", &offset)) {
+		if (imc_get_mem_addr_nest(parent, pmu_ptr, offset))
+			goto free_pmu;
+	}
+
+	/* Function to register IMC pmu */
+	ret = init_imc_pmu(parent, pmu_ptr, pmu_index);
+	if (ret) {
+		pr_err("IMC PMU %s Register failed\n", pmu_ptr->pmu.name);
+		kfree(pmu_ptr->pmu.name);
+		if (pmu_ptr->domain == IMC_DOMAIN_NEST)
+			kfree(pmu_ptr->mem_info);
+		kfree(pmu_ptr);
+		return NULL;
+	}
+
+	return pmu_ptr;
+
+free_pmu:
+	kfree(pmu_ptr);
+	return NULL;
+}
+
+static void disable_nest_pmu_counters(void)
+{
+	int nid, cpu;
+	const struct cpumask *l_cpumask;
+
+	cpus_read_lock();
+	for_each_node_with_cpus(nid) {
+		l_cpumask = cpumask_of_node(nid);
+		cpu = cpumask_first_and(l_cpumask, cpu_online_mask);
+		if (cpu >= nr_cpu_ids)
+			continue;
+		opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+				       get_hard_smp_processor_id(cpu));
+	}
+	cpus_read_unlock();
+}
+
+static void disable_core_pmu_counters(void)
+{
+	int cpu, rc;
+
+	cpus_read_lock();
+	/* Disable the IMC Core functions */
+	for_each_online_cpu(cpu) {
+		if (cpu_first_thread_sibling(cpu) != cpu)
+			continue;
+		rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+					    get_hard_smp_processor_id(cpu));
+		if (rc)
+			pr_err("%s: Failed to stop Core (cpu = %d)\n",
+				__func__, cpu);
+	}
+	cpus_read_unlock();
+}
+
+int get_max_nest_dev(void)
+{
+	struct device_node *node;
+	u32 pmu_units = 0, type;
+
+	for_each_compatible_node(node, NULL, IMC_DTB_UNIT_COMPAT) {
+		if (of_property_read_u32(node, "type", &type))
+			continue;
+
+		if (type == IMC_TYPE_CHIP)
+			pmu_units++;
+	}
+
+	return pmu_units;
+}
+
+static int opal_imc_counters_probe(struct platform_device *pdev)
+{
+	struct device_node *imc_dev = pdev->dev.of_node;
+	struct imc_pmu *pmu;
+	int pmu_count = 0, domain;
+	bool core_imc_reg = false, thread_imc_reg = false;
+	u32 type;
+
+	/*
+	 * Check whether this is kdump kernel. If yes, force the engines to
+	 * stop and return.
+	 */
+	if (is_kdump_kernel()) {
+		disable_nest_pmu_counters();
+		disable_core_pmu_counters();
+		return -ENODEV;
+	}
+
+	for_each_compatible_node(imc_dev, NULL, IMC_DTB_UNIT_COMPAT) {
+		pmu = NULL;
+		if (of_property_read_u32(imc_dev, "type", &type)) {
+			pr_warn("IMC Device without type property\n");
+			continue;
+		}
+
+		switch (type) {
+		case IMC_TYPE_CHIP:
+			domain = IMC_DOMAIN_NEST;
+			break;
+		case IMC_TYPE_CORE:
+			domain =IMC_DOMAIN_CORE;
+			break;
+		case IMC_TYPE_THREAD:
+			domain = IMC_DOMAIN_THREAD;
+			break;
+		case IMC_TYPE_TRACE:
+			domain = IMC_DOMAIN_TRACE;
+			break;
+		default:
+			pr_warn("IMC Unknown Device type \n");
+			domain = -1;
+			break;
+		}
+
+		pmu = imc_pmu_create(imc_dev, pmu_count, domain);
+		if (pmu != NULL) {
+			if (domain == IMC_DOMAIN_NEST) {
+				if (!imc_debugfs_parent)
+					export_imc_mode_and_cmd(imc_dev, pmu);
+				pmu_count++;
+			}
+			if (domain == IMC_DOMAIN_CORE)
+				core_imc_reg = true;
+			if (domain == IMC_DOMAIN_THREAD)
+				thread_imc_reg = true;
+		}
+	}
+
+	/* If core imc is not registered, unregister thread-imc */
+	if (!core_imc_reg && thread_imc_reg)
+		unregister_thread_imc();
+
+	return 0;
+}
+
+static void opal_imc_counters_shutdown(struct platform_device *pdev)
+{
+	/*
+	 * Function only stops the engines which is bare minimum.
+	 * TODO: Need to handle proper memory cleanup and pmu
+	 * unregister.
+	 */
+	disable_nest_pmu_counters();
+	disable_core_pmu_counters();
+}
+
+static const struct of_device_id opal_imc_match[] = {
+	{ .compatible = IMC_DTB_COMPAT },
+	{},
+};
+
+static struct platform_driver opal_imc_driver = {
+	.driver = {
+		.name = "opal-imc-counters",
+		.of_match_table = opal_imc_match,
+	},
+	.probe = opal_imc_counters_probe,
+	.shutdown = opal_imc_counters_shutdown,
+};
+
+builtin_platform_driver(opal_imc_driver);
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
new file mode 100644
index 0000000000..56a1f7ce78
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file implements an irqchip for OPAL events. Whenever there is
+ * an interrupt that is handled by OPAL we get passed a list of events
+ * that Linux needs to do something about. These basically look like
+ * interrupts to Linux so we implement an irqchip to handle them.
+ *
+ * Copyright Alistair Popple, IBM Corporation 2014.
+ */
+#include <linux/bitops.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/of_irq.h>
+
+#include <asm/machdep.h>
+#include <asm/opal.h>
+
+#include "powernv.h"
+
+/* Maximum number of events supported by OPAL firmware */
+#define MAX_NUM_EVENTS 64
+
+struct opal_event_irqchip {
+	struct irq_chip irqchip;
+	struct irq_domain *domain;
+	unsigned long mask;
+};
+static struct opal_event_irqchip opal_event_irqchip;
+static u64 last_outstanding_events;
+static int opal_irq_count;
+static struct resource *opal_irqs;
+
+void opal_handle_events(void)
+{
+	__be64 events = 0;
+	u64 e;
+
+	e = READ_ONCE(last_outstanding_events) & opal_event_irqchip.mask;
+again:
+	while (e) {
+		int hwirq;
+
+		hwirq = fls64(e) - 1;
+		e &= ~BIT_ULL(hwirq);
+
+		local_irq_disable();
+		irq_enter();
+		generic_handle_domain_irq(opal_event_irqchip.domain, hwirq);
+		irq_exit();
+		local_irq_enable();
+
+		cond_resched();
+	}
+	WRITE_ONCE(last_outstanding_events, 0);
+	if (opal_poll_events(&events) != OPAL_SUCCESS)
+		return;
+	e = be64_to_cpu(events) & opal_event_irqchip.mask;
+	if (e)
+		goto again;
+}
+
+bool opal_have_pending_events(void)
+{
+	if (READ_ONCE(last_outstanding_events) & opal_event_irqchip.mask)
+		return true;
+	return false;
+}
+
+static void opal_event_mask(struct irq_data *d)
+{
+	clear_bit(d->hwirq, &opal_event_irqchip.mask);
+}
+
+static void opal_event_unmask(struct irq_data *d)
+{
+	set_bit(d->hwirq, &opal_event_irqchip.mask);
+	if (opal_have_pending_events())
+		opal_wake_poller();
+}
+
+static int opal_event_set_type(struct irq_data *d, unsigned int flow_type)
+{
+	/*
+	 * For now we only support level triggered events. The irq
+	 * handler will be called continuously until the event has
+	 * been cleared in OPAL.
+	 */
+	if (flow_type != IRQ_TYPE_LEVEL_HIGH)
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct opal_event_irqchip opal_event_irqchip = {
+	.irqchip = {
+		.name = "OPAL EVT",
+		.irq_mask = opal_event_mask,
+		.irq_unmask = opal_event_unmask,
+		.irq_set_type = opal_event_set_type,
+	},
+	.mask = 0,
+};
+
+static int opal_event_map(struct irq_domain *d, unsigned int irq,
+			irq_hw_number_t hwirq)
+{
+	irq_set_chip_data(irq, &opal_event_irqchip);
+	irq_set_chip_and_handler(irq, &opal_event_irqchip.irqchip,
+				handle_level_irq);
+
+	return 0;
+}
+
+static irqreturn_t opal_interrupt(int irq, void *data)
+{
+	__be64 events;
+
+	opal_handle_interrupt(virq_to_hw(irq), &events);
+	WRITE_ONCE(last_outstanding_events, be64_to_cpu(events));
+	if (opal_have_pending_events())
+		opal_wake_poller();
+
+	return IRQ_HANDLED;
+}
+
+static int opal_event_match(struct irq_domain *h, struct device_node *node,
+			    enum irq_domain_bus_token bus_token)
+{
+	return irq_domain_get_of_node(h) == node;
+}
+
+static int opal_event_xlate(struct irq_domain *h, struct device_node *np,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+{
+	*out_hwirq = intspec[0];
+	*out_flags = IRQ_TYPE_LEVEL_HIGH;
+
+	return 0;
+}
+
+static const struct irq_domain_ops opal_event_domain_ops = {
+	.match	= opal_event_match,
+	.map	= opal_event_map,
+	.xlate	= opal_event_xlate,
+};
+
+void opal_event_shutdown(void)
+{
+	unsigned int i;
+
+	/* First free interrupts, which will also mask them */
+	for (i = 0; i < opal_irq_count; i++) {
+		if (!opal_irqs || !opal_irqs[i].start)
+			continue;
+
+		if (in_interrupt() || irqs_disabled())
+			disable_irq_nosync(opal_irqs[i].start);
+		else
+			free_irq(opal_irqs[i].start, NULL);
+
+		opal_irqs[i].start = 0;
+	}
+}
+
+int __init opal_event_init(void)
+{
+	struct device_node *dn, *opal_node;
+	bool old_style = false;
+	int i, rc = 0;
+
+	opal_node = of_find_node_by_path("/ibm,opal");
+	if (!opal_node) {
+		pr_warn("opal: Node not found\n");
+		return -ENODEV;
+	}
+
+	/* If dn is NULL it means the domain won't be linked to a DT
+	 * node so therefore irq_of_parse_and_map(...) wont work. But
+	 * that shouldn't be problem because if we're running a
+	 * version of skiboot that doesn't have the dn then the
+	 * devices won't have the correct properties and will have to
+	 * fall back to the legacy method (opal_event_request(...))
+	 * anyway. */
+	dn = of_find_compatible_node(NULL, NULL, "ibm,opal-event");
+	opal_event_irqchip.domain = irq_domain_add_linear(dn, MAX_NUM_EVENTS,
+				&opal_event_domain_ops, &opal_event_irqchip);
+	of_node_put(dn);
+	if (!opal_event_irqchip.domain) {
+		pr_warn("opal: Unable to create irq domain\n");
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Look for new-style (standard) "interrupts" property */
+	opal_irq_count = of_irq_count(opal_node);
+
+	/* Absent ? Look for the old one */
+	if (opal_irq_count < 1) {
+		/* Get opal-interrupts property and names if present */
+		rc = of_property_count_u32_elems(opal_node, "opal-interrupts");
+		if (rc > 0)
+			opal_irq_count = rc;
+		old_style = true;
+	}
+
+	/* No interrupts ? Bail out */
+	if (!opal_irq_count)
+		goto out;
+
+	pr_debug("OPAL: Found %d interrupts reserved for OPAL using %s scheme\n",
+		 opal_irq_count, old_style ? "old" : "new");
+
+	/* Allocate an IRQ resources array */
+	opal_irqs = kcalloc(opal_irq_count, sizeof(struct resource), GFP_KERNEL);
+	if (WARN_ON(!opal_irqs)) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Build the resources array */
+	if (old_style) {
+		/* Old style "opal-interrupts" property */
+		for (i = 0; i < opal_irq_count; i++) {
+			struct resource *r = &opal_irqs[i];
+			const char *name = NULL;
+			u32 hw_irq;
+			int virq;
+
+			rc = of_property_read_u32_index(opal_node, "opal-interrupts",
+							i, &hw_irq);
+			if (WARN_ON(rc < 0)) {
+				opal_irq_count = i;
+				break;
+			}
+			of_property_read_string_index(opal_node, "opal-interrupts-names",
+						      i, &name);
+			virq = irq_create_mapping(NULL, hw_irq);
+			if (!virq) {
+				pr_warn("Failed to map OPAL irq 0x%x\n", hw_irq);
+				continue;
+			}
+			r->start = r->end = virq;
+			r->flags = IORESOURCE_IRQ | IRQ_TYPE_LEVEL_LOW;
+			r->name = name;
+		}
+	} else {
+		/* new style standard "interrupts" property */
+		rc = of_irq_to_resource_table(opal_node, opal_irqs, opal_irq_count);
+		if (WARN_ON(rc < 0)) {
+			opal_irq_count = 0;
+			kfree(opal_irqs);
+			goto out;
+		}
+		if (WARN_ON(rc < opal_irq_count))
+			opal_irq_count = rc;
+	}
+
+	/* Install interrupt handlers */
+	for (i = 0; i < opal_irq_count; i++) {
+		struct resource *r = &opal_irqs[i];
+		const char *name;
+
+		/* Prefix name */
+		if (r->name && strlen(r->name))
+			name = kasprintf(GFP_KERNEL, "opal-%s", r->name);
+		else
+			name = kasprintf(GFP_KERNEL, "opal");
+
+		if (!name)
+			continue;
+		/* Install interrupt handler */
+		rc = request_irq(r->start, opal_interrupt, r->flags & IRQD_TRIGGER_MASK,
+				 name, NULL);
+		if (rc) {
+			pr_warn("Error %d requesting OPAL irq %d\n", rc, (int)r->start);
+			continue;
+		}
+	}
+	rc = 0;
+ out:
+	of_node_put(opal_node);
+	return rc;
+}
+machine_arch_initcall(powernv, opal_event_init);
+
+/**
+ * opal_event_request(unsigned int opal_event_nr) - Request an event
+ * @opal_event_nr: the opal event number to request
+ *
+ * This routine can be used to find the linux virq number which can
+ * then be passed to request_irq to assign a handler for a particular
+ * opal event. This should only be used by legacy devices which don't
+ * have proper device tree bindings. Most devices should use
+ * irq_of_parse_and_map() instead.
+ */
+int opal_event_request(unsigned int opal_event_nr)
+{
+	if (WARN_ON_ONCE(!opal_event_irqchip.domain))
+		return 0;
+
+	return irq_create_mapping(opal_event_irqchip.domain, opal_event_nr);
+}
+EXPORT_SYMBOL(opal_event_request);
diff --git a/arch/powerpc/platforms/powernv/opal-kmsg.c b/arch/powerpc/platforms/powernv/opal-kmsg.c
new file mode 100644
index 0000000000..6c3bc4b4da
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-kmsg.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * kmsg dumper that ensures the OPAL console fully flushes panic messages
+ *
+ * Author: Russell Currey <ruscur@russell.cc>
+ *
+ * Copyright 2015 IBM Corporation.
+ */
+
+#include <linux/kmsg_dump.h>
+
+#include <asm/opal.h>
+#include <asm/opal-api.h>
+
+/*
+ * Console output is controlled by OPAL firmware.  The kernel regularly calls
+ * OPAL_POLL_EVENTS, which flushes some console output.  In a panic state,
+ * however, the kernel no longer calls OPAL_POLL_EVENTS and the panic message
+ * may not be completely printed.  This function does not actually dump the
+ * message, it just ensures that OPAL completely flushes the console buffer.
+ */
+static void kmsg_dump_opal_console_flush(struct kmsg_dumper *dumper,
+				     enum kmsg_dump_reason reason)
+{
+	/*
+	 * Outside of a panic context the pollers will continue to run,
+	 * so we don't need to do any special flushing.
+	 */
+	if (reason != KMSG_DUMP_PANIC)
+		return;
+
+	opal_flush_console(0);
+}
+
+static struct kmsg_dumper opal_kmsg_dumper = {
+	.dump = kmsg_dump_opal_console_flush
+};
+
+void __init opal_kmsg_init(void)
+{
+	int rc;
+
+	/* Add our dumper to the list */
+	rc = kmsg_dump_register(&opal_kmsg_dumper);
+	if (rc != 0)
+		pr_err("opal: kmsg_dump_register failed; returned %d\n", rc);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c
new file mode 100644
index 0000000000..a16f07cdab
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-lpc.c
@@ -0,0 +1,418 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV LPC bus handling.
+ *
+ * Copyright 2013 IBM Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/bug.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/opal.h>
+#include <asm/prom.h>
+#include <linux/uaccess.h>
+#include <asm/isa-bridge.h>
+
+static int opal_lpc_chip_id = -1;
+
+static u8 opal_lpc_inb(unsigned long port)
+{
+	int64_t rc;
+	__be32 data;
+
+	if (opal_lpc_chip_id < 0 || port > 0xffff)
+		return 0xff;
+	rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 1);
+	return rc ? 0xff : be32_to_cpu(data);
+}
+
+static __le16 __opal_lpc_inw(unsigned long port)
+{
+	int64_t rc;
+	__be32 data;
+
+	if (opal_lpc_chip_id < 0 || port > 0xfffe)
+		return 0xffff;
+	if (port & 1)
+		return (__le16)opal_lpc_inb(port) << 8 | opal_lpc_inb(port + 1);
+	rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 2);
+	return rc ? 0xffff : be32_to_cpu(data);
+}
+static u16 opal_lpc_inw(unsigned long port)
+{
+	return le16_to_cpu(__opal_lpc_inw(port));
+}
+
+static __le32 __opal_lpc_inl(unsigned long port)
+{
+	int64_t rc;
+	__be32 data;
+
+	if (opal_lpc_chip_id < 0 || port > 0xfffc)
+		return 0xffffffff;
+	if (port & 3)
+		return (__le32)opal_lpc_inb(port    ) << 24 |
+		       (__le32)opal_lpc_inb(port + 1) << 16 |
+		       (__le32)opal_lpc_inb(port + 2) <<  8 |
+			       opal_lpc_inb(port + 3);
+	rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 4);
+	return rc ? 0xffffffff : be32_to_cpu(data);
+}
+
+static u32 opal_lpc_inl(unsigned long port)
+{
+	return le32_to_cpu(__opal_lpc_inl(port));
+}
+
+static void opal_lpc_outb(u8 val, unsigned long port)
+{
+	if (opal_lpc_chip_id < 0 || port > 0xffff)
+		return;
+	opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 1);
+}
+
+static void __opal_lpc_outw(__le16 val, unsigned long port)
+{
+	if (opal_lpc_chip_id < 0 || port > 0xfffe)
+		return;
+	if (port & 1) {
+		opal_lpc_outb(val >> 8, port);
+		opal_lpc_outb(val     , port + 1);
+		return;
+	}
+	opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 2);
+}
+
+static void opal_lpc_outw(u16 val, unsigned long port)
+{
+	__opal_lpc_outw(cpu_to_le16(val), port);
+}
+
+static void __opal_lpc_outl(__le32 val, unsigned long port)
+{
+	if (opal_lpc_chip_id < 0 || port > 0xfffc)
+		return;
+	if (port & 3) {
+		opal_lpc_outb(val >> 24, port);
+		opal_lpc_outb(val >> 16, port + 1);
+		opal_lpc_outb(val >>  8, port + 2);
+		opal_lpc_outb(val      , port + 3);
+		return;
+	}
+	opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 4);
+}
+
+static void opal_lpc_outl(u32 val, unsigned long port)
+{
+	__opal_lpc_outl(cpu_to_le32(val), port);
+}
+
+static void opal_lpc_insb(unsigned long p, void *b, unsigned long c)
+{
+	u8 *ptr = b;
+
+	while(c--)
+		*(ptr++) = opal_lpc_inb(p);
+}
+
+static void opal_lpc_insw(unsigned long p, void *b, unsigned long c)
+{
+	__le16 *ptr = b;
+
+	while(c--)
+		*(ptr++) = __opal_lpc_inw(p);
+}
+
+static void opal_lpc_insl(unsigned long p, void *b, unsigned long c)
+{
+	__le32 *ptr = b;
+
+	while(c--)
+		*(ptr++) = __opal_lpc_inl(p);
+}
+
+static void opal_lpc_outsb(unsigned long p, const void *b, unsigned long c)
+{
+	const u8 *ptr = b;
+
+	while(c--)
+		opal_lpc_outb(*(ptr++), p);
+}
+
+static void opal_lpc_outsw(unsigned long p, const void *b, unsigned long c)
+{
+	const __le16 *ptr = b;
+
+	while(c--)
+		__opal_lpc_outw(*(ptr++), p);
+}
+
+static void opal_lpc_outsl(unsigned long p, const void *b, unsigned long c)
+{
+	const __le32 *ptr = b;
+
+	while(c--)
+		__opal_lpc_outl(*(ptr++), p);
+}
+
+static const struct ppc_pci_io opal_lpc_io = {
+	.inb	= opal_lpc_inb,
+	.inw	= opal_lpc_inw,
+	.inl	= opal_lpc_inl,
+	.outb	= opal_lpc_outb,
+	.outw	= opal_lpc_outw,
+	.outl	= opal_lpc_outl,
+	.insb	= opal_lpc_insb,
+	.insw	= opal_lpc_insw,
+	.insl	= opal_lpc_insl,
+	.outsb	= opal_lpc_outsb,
+	.outsw	= opal_lpc_outsw,
+	.outsl	= opal_lpc_outsl,
+};
+
+#ifdef CONFIG_DEBUG_FS
+struct lpc_debugfs_entry {
+	enum OpalLPCAddressType lpc_type;
+};
+
+static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf,
+			      size_t count, loff_t *ppos)
+{
+	struct lpc_debugfs_entry *lpc = filp->private_data;
+	u32 data, pos, len, todo;
+	int rc;
+
+	if (!access_ok(ubuf, count))
+		return -EFAULT;
+
+	todo = count;
+	while (todo) {
+		pos = *ppos;
+
+		/*
+		 * Select access size based on count and alignment and
+		 * access type. IO and MEM only support byte accesses,
+		 * FW supports all 3.
+		 */
+		len = 1;
+		if (lpc->lpc_type == OPAL_LPC_FW) {
+			if (todo > 3 && (pos & 3) == 0)
+				len = 4;
+			else if (todo > 1 && (pos & 1) == 0)
+				len = 2;
+		}
+		rc = opal_lpc_read(opal_lpc_chip_id, lpc->lpc_type, pos,
+				   &data, len);
+		if (rc)
+			return -ENXIO;
+
+		/*
+		 * Now there is some trickery with the data returned by OPAL
+		 * as it's the desired data right justified in a 32-bit BE
+		 * word.
+		 *
+		 * This is a very bad interface and I'm to blame for it :-(
+		 *
+		 * So we can't just apply a 32-bit swap to what comes from OPAL,
+		 * because user space expects the *bytes* to be in their proper
+		 * respective positions (ie, LPC position).
+		 *
+		 * So what we really want to do here is to shift data right
+		 * appropriately on a LE kernel.
+		 *
+		 * IE. If the LPC transaction has bytes B0, B1, B2 and B3 in that
+		 * order, we have in memory written to by OPAL at the "data"
+		 * pointer:
+		 *
+		 *               Bytes:      OPAL "data"   LE "data"
+		 *   32-bit:   B0 B1 B2 B3   B0B1B2B3      B3B2B1B0
+		 *   16-bit:   B0 B1         0000B0B1      B1B00000
+		 *    8-bit:   B0            000000B0      B0000000
+		 *
+		 * So a BE kernel will have the leftmost of the above in the MSB
+		 * and rightmost in the LSB and can just then "cast" the u32 "data"
+		 * down to the appropriate quantity and write it.
+		 *
+		 * However, an LE kernel can't. It doesn't need to swap because a
+		 * load from data followed by a store to user are going to preserve
+		 * the byte ordering which is the wire byte order which is what the
+		 * user wants, but in order to "crop" to the right size, we need to
+		 * shift right first.
+		 */
+		switch(len) {
+		case 4:
+			rc = __put_user((u32)data, (u32 __user *)ubuf);
+			break;
+		case 2:
+#ifdef __LITTLE_ENDIAN__
+			data >>= 16;
+#endif
+			rc = __put_user((u16)data, (u16 __user *)ubuf);
+			break;
+		default:
+#ifdef __LITTLE_ENDIAN__
+			data >>= 24;
+#endif
+			rc = __put_user((u8)data, (u8 __user *)ubuf);
+			break;
+		}
+		if (rc)
+			return -EFAULT;
+		*ppos += len;
+		ubuf += len;
+		todo -= len;
+	}
+
+	return count;
+}
+
+static ssize_t lpc_debug_write(struct file *filp, const char __user *ubuf,
+			       size_t count, loff_t *ppos)
+{
+	struct lpc_debugfs_entry *lpc = filp->private_data;
+	u32 data, pos, len, todo;
+	int rc;
+
+	if (!access_ok(ubuf, count))
+		return -EFAULT;
+
+	todo = count;
+	while (todo) {
+		pos = *ppos;
+
+		/*
+		 * Select access size based on count and alignment and
+		 * access type. IO and MEM only support byte acceses,
+		 * FW supports all 3.
+		 */
+		len = 1;
+		if (lpc->lpc_type == OPAL_LPC_FW) {
+			if (todo > 3 && (pos & 3) == 0)
+				len = 4;
+			else if (todo > 1 && (pos & 1) == 0)
+				len = 2;
+		}
+
+		/*
+		 * Similarly to the read case, we have some trickery here but
+		 * it's different to handle. We need to pass the value to OPAL in
+		 * a register whose layout depends on the access size. We want
+		 * to reproduce the memory layout of the user, however we aren't
+		 * doing a load from user and a store to another memory location
+		 * which would achieve that. Here we pass the value to OPAL via
+		 * a register which is expected to contain the "BE" interpretation
+		 * of the byte sequence. IE: for a 32-bit access, byte 0 should be
+		 * in the MSB. So here we *do* need to byteswap on LE.
+		 *
+		 *           User bytes:    LE "data"  OPAL "data"
+		 *  32-bit:  B0 B1 B2 B3    B3B2B1B0   B0B1B2B3
+		 *  16-bit:  B0 B1          0000B1B0   0000B0B1
+		 *   8-bit:  B0             000000B0   000000B0
+		 */
+		switch(len) {
+		case 4:
+			rc = __get_user(data, (u32 __user *)ubuf);
+			data = cpu_to_be32(data);
+			break;
+		case 2:
+			rc = __get_user(data, (u16 __user *)ubuf);
+			data = cpu_to_be16(data);
+			break;
+		default:
+			rc = __get_user(data, (u8 __user *)ubuf);
+			break;
+		}
+		if (rc)
+			return -EFAULT;
+
+		rc = opal_lpc_write(opal_lpc_chip_id, lpc->lpc_type, pos,
+				    data, len);
+		if (rc)
+			return -ENXIO;
+		*ppos += len;
+		ubuf += len;
+		todo -= len;
+	}
+
+	return count;
+}
+
+static const struct file_operations lpc_fops = {
+	.read =		lpc_debug_read,
+	.write =	lpc_debug_write,
+	.open =		simple_open,
+	.llseek =	default_llseek,
+};
+
+static int opal_lpc_debugfs_create_type(struct dentry *folder,
+					const char *fname,
+					enum OpalLPCAddressType type)
+{
+	struct lpc_debugfs_entry *entry;
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+	entry->lpc_type = type;
+	debugfs_create_file(fname, 0600, folder, entry, &lpc_fops);
+	return 0;
+}
+
+static int opal_lpc_init_debugfs(void)
+{
+	struct dentry *root;
+	int rc = 0;
+
+	if (opal_lpc_chip_id < 0)
+		return -ENODEV;
+
+	root = debugfs_create_dir("lpc", arch_debugfs_dir);
+
+	rc |= opal_lpc_debugfs_create_type(root, "io", OPAL_LPC_IO);
+	rc |= opal_lpc_debugfs_create_type(root, "mem", OPAL_LPC_MEM);
+	rc |= opal_lpc_debugfs_create_type(root, "fw", OPAL_LPC_FW);
+	return rc;
+}
+machine_device_initcall(powernv, opal_lpc_init_debugfs);
+#endif  /* CONFIG_DEBUG_FS */
+
+void __init opal_lpc_init(void)
+{
+	struct device_node *np;
+
+	/*
+	 * Look for a Power8 LPC bus tagged as "primary",
+	 * we currently support only one though the OPAL APIs
+	 * support any number.
+	 */
+	for_each_compatible_node(np, NULL, "ibm,power8-lpc") {
+		if (!of_device_is_available(np))
+			continue;
+		if (!of_get_property(np, "primary", NULL))
+			continue;
+		opal_lpc_chip_id = of_get_ibm_chip_id(np);
+		of_node_put(np);
+		break;
+	}
+	if (opal_lpc_chip_id < 0)
+		return;
+
+	/* Does it support direct mapping ? */
+	if (of_property_present(np, "ranges")) {
+		pr_info("OPAL: Found memory mapped LPC bus on chip %d\n",
+			opal_lpc_chip_id);
+		isa_bridge_init_non_pci(np);
+	} else {
+		pr_info("OPAL: Found non-mapped LPC bus on chip %d\n",
+			opal_lpc_chip_id);
+
+		/* Setup special IO ops */
+		ppc_pci_io = opal_lpc_io;
+		isa_io_special = true;
+	}
+}
diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c
new file mode 100644
index 0000000000..a1754a2826
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * OPAL asynchronus Memory error handling support in PowerNV.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/machdep.h>
+#include <asm/opal.h>
+#include <asm/cputable.h>
+
+static int opal_mem_err_nb_init;
+static LIST_HEAD(opal_memory_err_list);
+static DEFINE_SPINLOCK(opal_mem_err_lock);
+
+struct OpalMsgNode {
+	struct list_head list;
+	struct opal_msg msg;
+};
+
+static void handle_memory_error_event(struct OpalMemoryErrorData *merr_evt)
+{
+	uint64_t paddr_start, paddr_end;
+
+	pr_debug("%s: Retrieved memory error event, type: 0x%x\n",
+		  __func__, merr_evt->type);
+	switch (merr_evt->type) {
+	case OPAL_MEM_ERR_TYPE_RESILIENCE:
+		paddr_start = be64_to_cpu(merr_evt->u.resilience.physical_address_start);
+		paddr_end = be64_to_cpu(merr_evt->u.resilience.physical_address_end);
+		break;
+	case OPAL_MEM_ERR_TYPE_DYN_DALLOC:
+		paddr_start = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_start);
+		paddr_end = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_end);
+		break;
+	default:
+		return;
+	}
+
+	for (; paddr_start < paddr_end; paddr_start += PAGE_SIZE) {
+		memory_failure(paddr_start >> PAGE_SHIFT, 0);
+	}
+}
+
+static void handle_memory_error(void)
+{
+	unsigned long flags;
+	struct OpalMemoryErrorData *merr_evt;
+	struct OpalMsgNode *msg_node;
+
+	spin_lock_irqsave(&opal_mem_err_lock, flags);
+	while (!list_empty(&opal_memory_err_list)) {
+		 msg_node = list_entry(opal_memory_err_list.next,
+					   struct OpalMsgNode, list);
+		list_del(&msg_node->list);
+		spin_unlock_irqrestore(&opal_mem_err_lock, flags);
+
+		merr_evt = (struct OpalMemoryErrorData *)
+					&msg_node->msg.params[0];
+		handle_memory_error_event(merr_evt);
+		kfree(msg_node);
+		spin_lock_irqsave(&opal_mem_err_lock, flags);
+	}
+	spin_unlock_irqrestore(&opal_mem_err_lock, flags);
+}
+
+static void mem_error_handler(struct work_struct *work)
+{
+	handle_memory_error();
+}
+
+static DECLARE_WORK(mem_error_work, mem_error_handler);
+
+/*
+ * opal_memory_err_event - notifier handler that queues up the opal message
+ * to be processed later.
+ */
+static int opal_memory_err_event(struct notifier_block *nb,
+			  unsigned long msg_type, void *msg)
+{
+	unsigned long flags;
+	struct OpalMsgNode *msg_node;
+
+	if (msg_type != OPAL_MSG_MEM_ERR)
+		return 0;
+
+	msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
+	if (!msg_node) {
+		pr_err("MEMORY_ERROR: out of memory, Opal message event not"
+		       "handled\n");
+		return -ENOMEM;
+	}
+	memcpy(&msg_node->msg, msg, sizeof(msg_node->msg));
+
+	spin_lock_irqsave(&opal_mem_err_lock, flags);
+	list_add(&msg_node->list, &opal_memory_err_list);
+	spin_unlock_irqrestore(&opal_mem_err_lock, flags);
+
+	schedule_work(&mem_error_work);
+	return 0;
+}
+
+static struct notifier_block opal_mem_err_nb = {
+	.notifier_call	= opal_memory_err_event,
+	.next		= NULL,
+	.priority	= 0,
+};
+
+static int __init opal_mem_err_init(void)
+{
+	int ret;
+
+	if (!opal_mem_err_nb_init) {
+		ret = opal_message_notifier_register(
+					OPAL_MSG_MEM_ERR, &opal_mem_err_nb);
+		if (ret) {
+			pr_err("%s: Can't register OPAL event notifier (%d)\n",
+			       __func__, ret);
+			return ret;
+		}
+		opal_mem_err_nb_init = 1;
+	}
+	return 0;
+}
+machine_device_initcall(powernv, opal_mem_err_init);
diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c
new file mode 100644
index 0000000000..22d6efe17b
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL in-memory console interface
+ *
+ * Copyright 2014 IBM Corp.
+ */
+
+#include <asm/io.h>
+#include <asm/opal.h>
+#include <linux/debugfs.h>
+#include <linux/of.h>
+#include <linux/types.h>
+#include <asm/barrier.h>
+
+#include "powernv.h"
+
+/* OPAL in-memory console. Defined in OPAL source at core/console.c */
+struct memcons {
+	__be64 magic;
+#define MEMCONS_MAGIC	0x6630696567726173L
+	__be64 obuf_phys;
+	__be64 ibuf_phys;
+	__be32 obuf_size;
+	__be32 ibuf_size;
+	__be32 out_pos;
+#define MEMCONS_OUT_POS_WRAP	0x80000000u
+#define MEMCONS_OUT_POS_MASK	0x00ffffffu
+	__be32 in_prod;
+	__be32 in_cons;
+};
+
+static struct memcons *opal_memcons = NULL;
+
+ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count)
+{
+	const char *conbuf;
+	ssize_t ret;
+	size_t first_read = 0;
+	uint32_t out_pos, avail;
+
+	if (!mc)
+		return -ENODEV;
+
+	out_pos = be32_to_cpu(READ_ONCE(mc->out_pos));
+
+	/* Now we've read out_pos, put a barrier in before reading the new
+	 * data it points to in conbuf. */
+	smp_rmb();
+
+	conbuf = phys_to_virt(be64_to_cpu(mc->obuf_phys));
+
+	/* When the buffer has wrapped, read from the out_pos marker to the end
+	 * of the buffer, and then read the remaining data as in the un-wrapped
+	 * case. */
+	if (out_pos & MEMCONS_OUT_POS_WRAP) {
+
+		out_pos &= MEMCONS_OUT_POS_MASK;
+		avail = be32_to_cpu(mc->obuf_size) - out_pos;
+
+		ret = memory_read_from_buffer(to, count, &pos,
+				conbuf + out_pos, avail);
+
+		if (ret < 0)
+			goto out;
+
+		first_read = ret;
+		to += first_read;
+		count -= first_read;
+		pos -= avail;
+
+		if (count <= 0)
+			goto out;
+	}
+
+	/* Sanity check. The firmware should not do this to us. */
+	if (out_pos > be32_to_cpu(mc->obuf_size)) {
+		pr_err("OPAL: memory console corruption. Aborting read.\n");
+		return -EINVAL;
+	}
+
+	ret = memory_read_from_buffer(to, count, &pos, conbuf, out_pos);
+
+	if (ret < 0)
+		goto out;
+
+	ret += first_read;
+out:
+	return ret;
+}
+
+ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
+{
+	return memcons_copy(opal_memcons, to, pos, count);
+}
+
+static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
+				struct bin_attribute *bin_attr, char *to,
+				loff_t pos, size_t count)
+{
+	return opal_msglog_copy(to, pos, count);
+}
+
+static struct bin_attribute opal_msglog_attr = {
+	.attr = {.name = "msglog", .mode = 0400},
+	.read = opal_msglog_read
+};
+
+struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name)
+{
+	u64 mcaddr;
+	struct memcons *mc;
+
+	if (of_property_read_u64(node, mc_prop_name, &mcaddr)) {
+		pr_warn("%s property not found, no message log\n",
+			mc_prop_name);
+		goto out_err;
+	}
+
+	mc = phys_to_virt(mcaddr);
+	if (!mc) {
+		pr_warn("memory console address is invalid\n");
+		goto out_err;
+	}
+
+	if (be64_to_cpu(mc->magic) != MEMCONS_MAGIC) {
+		pr_warn("memory console version is invalid\n");
+		goto out_err;
+	}
+
+	return mc;
+
+out_err:
+	return NULL;
+}
+
+u32 __init memcons_get_size(struct memcons *mc)
+{
+	return be32_to_cpu(mc->ibuf_size) + be32_to_cpu(mc->obuf_size);
+}
+
+void __init opal_msglog_init(void)
+{
+	opal_memcons = memcons_init(opal_node, "ibm,opal-memcons");
+	if (!opal_memcons) {
+		pr_warn("OPAL: memcons failed to load from ibm,opal-memcons\n");
+		return;
+	}
+
+	opal_msglog_attr.size = memcons_get_size(opal_memcons);
+}
+
+void __init opal_msglog_sysfs_init(void)
+{
+	if (!opal_memcons) {
+		pr_warn("OPAL: message log initialisation failed, not creating sysfs entry\n");
+		return;
+	}
+
+	if (sysfs_create_bin_file(opal_kobj, &opal_msglog_attr) != 0)
+		pr_warn("OPAL: sysfs file creation failed\n");
+}
diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c
new file mode 100644
index 0000000000..380bc2d7eb
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-nvram.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV nvram code.
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+#define DEBUG
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+
+#include <asm/opal.h>
+#include <asm/nvram.h>
+#include <asm/machdep.h>
+
+static unsigned int nvram_size;
+
+static ssize_t opal_nvram_size(void)
+{
+	return nvram_size;
+}
+
+static ssize_t opal_nvram_read(char *buf, size_t count, loff_t *index)
+{
+	s64 rc;
+	int off;
+
+	if (*index >= nvram_size)
+		return 0;
+	off = *index;
+	if ((off + count) > nvram_size)
+		count = nvram_size - off;
+	rc = opal_read_nvram(__pa(buf), count, off);
+	if (rc != OPAL_SUCCESS)
+		return -EIO;
+	*index += count;
+	return count;
+}
+
+/*
+ * This can be called in the panic path with interrupts off, so use
+ * mdelay in that case.
+ */
+static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
+{
+	s64 rc = OPAL_BUSY;
+	int off;
+
+	if (*index >= nvram_size)
+		return 0;
+	off = *index;
+	if ((off + count) > nvram_size)
+		count = nvram_size - off;
+
+	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+		rc = opal_write_nvram(__pa(buf), count, off);
+		if (rc == OPAL_BUSY_EVENT) {
+			if (in_interrupt() || irqs_disabled())
+				mdelay(OPAL_BUSY_DELAY_MS);
+			else
+				msleep(OPAL_BUSY_DELAY_MS);
+			opal_poll_events(NULL);
+		} else if (rc == OPAL_BUSY) {
+			if (in_interrupt() || irqs_disabled())
+				mdelay(OPAL_BUSY_DELAY_MS);
+			else
+				msleep(OPAL_BUSY_DELAY_MS);
+		}
+	}
+
+	if (rc)
+		return -EIO;
+
+	*index += count;
+	return count;
+}
+
+static int __init opal_nvram_init_log_partitions(void)
+{
+	/* Scan nvram for partitions */
+	nvram_scan_partitions();
+	nvram_init_oops_partition(0);
+	return 0;
+}
+machine_arch_initcall(powernv, opal_nvram_init_log_partitions);
+
+void __init opal_nvram_init(void)
+{
+	struct device_node *np;
+	const __be32 *nbytes_p;
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,opal-nvram");
+	if (np == NULL)
+		return;
+
+	nbytes_p = of_get_property(np, "#bytes", NULL);
+	if (!nbytes_p) {
+		of_node_put(np);
+		return;
+	}
+	nvram_size = be32_to_cpup(nbytes_p);
+
+	pr_info("OPAL nvram setup, %u bytes\n", nvram_size);
+	of_node_put(np);
+
+	ppc_md.nvram_read = opal_nvram_read;
+	ppc_md.nvram_write = opal_nvram_write;
+	ppc_md.nvram_size = opal_nvram_size;
+}
+
diff --git a/arch/powerpc/platforms/powernv/opal-power.c b/arch/powerpc/platforms/powernv/opal-power.c
new file mode 100644
index 0000000000..db99ffcb7b
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-power.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL power control for graceful shutdown handling
+ *
+ * Copyright 2015 IBM Corp.
+ */
+
+#define pr_fmt(fmt)	"opal-power: "	fmt
+
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+
+#include <asm/opal.h>
+#include <asm/machdep.h>
+
+#define SOFT_OFF 0x00
+#define SOFT_REBOOT 0x01
+
+/* Detect EPOW event */
+static bool detect_epow(void)
+{
+	u16 epow;
+	int i, rc;
+	__be16 epow_classes;
+	__be16 opal_epow_status[OPAL_SYSEPOW_MAX] = {0};
+
+	/*
+	* Check for EPOW event. Kernel sends supported EPOW classes info
+	* to OPAL. OPAL returns EPOW info along with classes present.
+	*/
+	epow_classes = cpu_to_be16(OPAL_SYSEPOW_MAX);
+	rc = opal_get_epow_status(opal_epow_status, &epow_classes);
+	if (rc != OPAL_SUCCESS) {
+		pr_err("Failed to get EPOW event information\n");
+		return false;
+	}
+
+	/* Look for EPOW events present */
+	for (i = 0; i < be16_to_cpu(epow_classes); i++) {
+		epow = be16_to_cpu(opal_epow_status[i]);
+
+		/* Filter events which do not need shutdown. */
+		if (i == OPAL_SYSEPOW_POWER)
+			epow &= ~(OPAL_SYSPOWER_CHNG | OPAL_SYSPOWER_FAIL |
+					OPAL_SYSPOWER_INCL);
+		if (epow)
+			return true;
+	}
+
+	return false;
+}
+
+/* Check for existing EPOW, DPO events */
+static bool __init poweroff_pending(void)
+{
+	int rc;
+	__be64 opal_dpo_timeout;
+
+	/* Check for DPO event */
+	rc = opal_get_dpo_status(&opal_dpo_timeout);
+	if (rc == OPAL_SUCCESS) {
+		pr_info("Existing DPO event detected.\n");
+		return true;
+	}
+
+	/* Check for EPOW event */
+	if (detect_epow()) {
+		pr_info("Existing EPOW event detected.\n");
+		return true;
+	}
+
+	return false;
+}
+
+/* OPAL power-control events notifier */
+static int opal_power_control_event(struct notifier_block *nb,
+					unsigned long msg_type, void *msg)
+{
+	uint64_t type;
+
+	switch (msg_type) {
+	case OPAL_MSG_EPOW:
+		if (detect_epow()) {
+			pr_info("EPOW msg received. Powering off system\n");
+			orderly_poweroff(true);
+		}
+		break;
+	case OPAL_MSG_DPO:
+		pr_info("DPO msg received. Powering off system\n");
+		orderly_poweroff(true);
+		break;
+	case OPAL_MSG_SHUTDOWN:
+		type = be64_to_cpu(((struct opal_msg *)msg)->params[0]);
+		switch (type) {
+		case SOFT_REBOOT:
+			pr_info("Reboot requested\n");
+			orderly_reboot();
+			break;
+		case SOFT_OFF:
+			pr_info("Poweroff requested\n");
+			orderly_poweroff(true);
+			break;
+		default:
+			pr_err("Unknown power-control type %llu\n", type);
+		}
+		break;
+	default:
+		pr_err("Unknown OPAL message type %lu\n", msg_type);
+	}
+
+	return 0;
+}
+
+/* OPAL EPOW event notifier block */
+static struct notifier_block opal_epow_nb = {
+	.notifier_call	= opal_power_control_event,
+	.next		= NULL,
+	.priority	= 0,
+};
+
+/* OPAL DPO event notifier block */
+static struct notifier_block opal_dpo_nb = {
+	.notifier_call	= opal_power_control_event,
+	.next		= NULL,
+	.priority	= 0,
+};
+
+/* OPAL power-control event notifier block */
+static struct notifier_block opal_power_control_nb = {
+	.notifier_call	= opal_power_control_event,
+	.next		= NULL,
+	.priority	= 0,
+};
+
+int __init opal_power_control_init(void)
+{
+	int ret, supported = 0;
+	struct device_node *np;
+
+	/* Register OPAL power-control events notifier */
+	ret = opal_message_notifier_register(OPAL_MSG_SHUTDOWN,
+						&opal_power_control_nb);
+	if (ret)
+		pr_err("Failed to register SHUTDOWN notifier, ret = %d\n", ret);
+
+	/* Determine OPAL EPOW, DPO support */
+	np = of_find_node_by_path("/ibm,opal/epow");
+	if (np) {
+		supported = of_device_is_compatible(np, "ibm,opal-v3-epow");
+		of_node_put(np);
+	}
+
+	if (!supported)
+		return 0;
+	pr_info("OPAL EPOW, DPO support detected.\n");
+
+	/* Register EPOW event notifier */
+	ret = opal_message_notifier_register(OPAL_MSG_EPOW, &opal_epow_nb);
+	if (ret)
+		pr_err("Failed to register EPOW notifier, ret = %d\n", ret);
+
+	/* Register DPO event notifier */
+	ret = opal_message_notifier_register(OPAL_MSG_DPO, &opal_dpo_nb);
+	if (ret)
+		pr_err("Failed to register DPO notifier, ret = %d\n", ret);
+
+	/* Check for any pending EPOW or DPO events. */
+	if (poweroff_pending())
+		orderly_poweroff(true);
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c
new file mode 100644
index 0000000000..ea917266aa
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-powercap.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL Powercap interface
+ *
+ * Copyright 2017 IBM Corp.
+ */
+
+#define pr_fmt(fmt)     "opal-powercap: " fmt
+
+#include <linux/of.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+
+static DEFINE_MUTEX(powercap_mutex);
+
+static struct kobject *powercap_kobj;
+
+struct powercap_attr {
+	u32 handle;
+	struct kobj_attribute attr;
+};
+
+static struct pcap {
+	struct attribute_group pg;
+	struct powercap_attr *pattrs;
+} *pcaps;
+
+static ssize_t powercap_show(struct kobject *kobj, struct kobj_attribute *attr,
+			     char *buf)
+{
+	struct powercap_attr *pcap_attr = container_of(attr,
+						struct powercap_attr, attr);
+	struct opal_msg msg;
+	u32 pcap;
+	int ret, token;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		pr_devel("Failed to get token\n");
+		return token;
+	}
+
+	ret = mutex_lock_interruptible(&powercap_mutex);
+	if (ret)
+		goto out_token;
+
+	ret = opal_get_powercap(pcap_attr->handle, token, (u32 *)__pa(&pcap));
+	switch (ret) {
+	case OPAL_ASYNC_COMPLETION:
+		ret = opal_async_wait_response(token, &msg);
+		if (ret) {
+			pr_devel("Failed to wait for the async response\n");
+			ret = -EIO;
+			goto out;
+		}
+		ret = opal_error_code(opal_get_async_rc(msg));
+		if (!ret) {
+			ret = sprintf(buf, "%u\n", be32_to_cpu(pcap));
+			if (ret < 0)
+				ret = -EIO;
+		}
+		break;
+	case OPAL_SUCCESS:
+		ret = sprintf(buf, "%u\n", be32_to_cpu(pcap));
+		if (ret < 0)
+			ret = -EIO;
+		break;
+	default:
+		ret = opal_error_code(ret);
+	}
+
+out:
+	mutex_unlock(&powercap_mutex);
+out_token:
+	opal_async_release_token(token);
+	return ret;
+}
+
+static ssize_t powercap_store(struct kobject *kobj,
+			      struct kobj_attribute *attr, const char *buf,
+			      size_t count)
+{
+	struct powercap_attr *pcap_attr = container_of(attr,
+						struct powercap_attr, attr);
+	struct opal_msg msg;
+	u32 pcap;
+	int ret, token;
+
+	ret = kstrtoint(buf, 0, &pcap);
+	if (ret)
+		return ret;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		pr_devel("Failed to get token\n");
+		return token;
+	}
+
+	ret = mutex_lock_interruptible(&powercap_mutex);
+	if (ret)
+		goto out_token;
+
+	ret = opal_set_powercap(pcap_attr->handle, token, pcap);
+	switch (ret) {
+	case OPAL_ASYNC_COMPLETION:
+		ret = opal_async_wait_response(token, &msg);
+		if (ret) {
+			pr_devel("Failed to wait for the async response\n");
+			ret = -EIO;
+			goto out;
+		}
+		ret = opal_error_code(opal_get_async_rc(msg));
+		if (!ret)
+			ret = count;
+		break;
+	case OPAL_SUCCESS:
+		ret = count;
+		break;
+	default:
+		ret = opal_error_code(ret);
+	}
+
+out:
+	mutex_unlock(&powercap_mutex);
+out_token:
+	opal_async_release_token(token);
+	return ret;
+}
+
+static void __init powercap_add_attr(int handle, const char *name,
+			      struct powercap_attr *attr)
+{
+	attr->handle = handle;
+	sysfs_attr_init(&attr->attr.attr);
+	attr->attr.attr.name = name;
+	attr->attr.attr.mode = 0444;
+	attr->attr.show = powercap_show;
+}
+
+void __init opal_powercap_init(void)
+{
+	struct device_node *powercap, *node;
+	int i = 0;
+
+	powercap = of_find_compatible_node(NULL, NULL, "ibm,opal-powercap");
+	if (!powercap) {
+		pr_devel("Powercap node not found\n");
+		return;
+	}
+
+	pcaps = kcalloc(of_get_child_count(powercap), sizeof(*pcaps),
+			GFP_KERNEL);
+	if (!pcaps)
+		goto out_put_powercap;
+
+	powercap_kobj = kobject_create_and_add("powercap", opal_kobj);
+	if (!powercap_kobj) {
+		pr_warn("Failed to create powercap kobject\n");
+		goto out_pcaps;
+	}
+
+	i = 0;
+	for_each_child_of_node(powercap, node) {
+		u32 cur, min, max;
+		int j = 0;
+		bool has_cur = false, has_min = false, has_max = false;
+
+		if (!of_property_read_u32(node, "powercap-min", &min)) {
+			j++;
+			has_min = true;
+		}
+
+		if (!of_property_read_u32(node, "powercap-max", &max)) {
+			j++;
+			has_max = true;
+		}
+
+		if (!of_property_read_u32(node, "powercap-current", &cur)) {
+			j++;
+			has_cur = true;
+		}
+
+		pcaps[i].pattrs = kcalloc(j, sizeof(struct powercap_attr),
+					  GFP_KERNEL);
+		if (!pcaps[i].pattrs)
+			goto out_pcaps_pattrs;
+
+		pcaps[i].pg.attrs = kcalloc(j + 1, sizeof(struct attribute *),
+					    GFP_KERNEL);
+		if (!pcaps[i].pg.attrs) {
+			kfree(pcaps[i].pattrs);
+			goto out_pcaps_pattrs;
+		}
+
+		j = 0;
+		pcaps[i].pg.name = kasprintf(GFP_KERNEL, "%pOFn", node);
+		if (!pcaps[i].pg.name) {
+			kfree(pcaps[i].pattrs);
+			kfree(pcaps[i].pg.attrs);
+			goto out_pcaps_pattrs;
+		}
+
+		if (has_min) {
+			powercap_add_attr(min, "powercap-min",
+					  &pcaps[i].pattrs[j]);
+			pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr;
+			j++;
+		}
+
+		if (has_max) {
+			powercap_add_attr(max, "powercap-max",
+					  &pcaps[i].pattrs[j]);
+			pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr;
+			j++;
+		}
+
+		if (has_cur) {
+			powercap_add_attr(cur, "powercap-current",
+					  &pcaps[i].pattrs[j]);
+			pcaps[i].pattrs[j].attr.attr.mode |= 0220;
+			pcaps[i].pattrs[j].attr.store = powercap_store;
+			pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr;
+			j++;
+		}
+
+		if (sysfs_create_group(powercap_kobj, &pcaps[i].pg)) {
+			pr_warn("Failed to create powercap attribute group %s\n",
+				pcaps[i].pg.name);
+			goto out_pcaps_pattrs;
+		}
+		i++;
+	}
+	of_node_put(powercap);
+
+	return;
+
+out_pcaps_pattrs:
+	while (--i >= 0) {
+		kfree(pcaps[i].pattrs);
+		kfree(pcaps[i].pg.attrs);
+		kfree(pcaps[i].pg.name);
+	}
+	kobject_put(powercap_kobj);
+	of_node_put(node);
+out_pcaps:
+	kfree(pcaps);
+out_put_powercap:
+	of_node_put(powercap);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
new file mode 100644
index 0000000000..327e2f7690
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -0,0 +1,452 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * OPAL Runtime Diagnostics interface driver
+ * Supported on POWERNV platform
+ *
+ * Copyright IBM Corporation 2015
+ */
+
+#define pr_fmt(fmt) "opal-prd: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/poll.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/opal-prd.h>
+#include <asm/opal.h>
+#include <asm/io.h>
+#include <linux/uaccess.h>
+
+
+struct opal_prd_msg {
+	union {
+		struct opal_prd_msg_header header;
+		DECLARE_FLEX_ARRAY(u8, data);
+	};
+};
+
+/*
+ * The msg member must be at the end of the struct, as it's followed by the
+ * message data.
+ */
+struct opal_prd_msg_queue_item {
+	struct list_head	list;
+	struct opal_prd_msg	msg;
+};
+
+static struct device_node *prd_node;
+static LIST_HEAD(opal_prd_msg_queue);
+static DEFINE_SPINLOCK(opal_prd_msg_queue_lock);
+static DECLARE_WAIT_QUEUE_HEAD(opal_prd_msg_wait);
+static atomic_t prd_usage;
+
+static bool opal_prd_range_is_valid(uint64_t addr, uint64_t size)
+{
+	struct device_node *parent, *node;
+	bool found;
+
+	if (addr + size < addr)
+		return false;
+
+	parent = of_find_node_by_path("/reserved-memory");
+	if (!parent)
+		return false;
+
+	found = false;
+
+	for_each_child_of_node(parent, node) {
+		uint64_t range_addr, range_size, range_end;
+		const __be32 *addrp;
+		const char *label;
+
+		addrp = of_get_address(node, 0, &range_size, NULL);
+
+		range_addr = of_read_number(addrp, 2);
+		range_end = range_addr + range_size;
+
+		label = of_get_property(node, "ibm,prd-label", NULL);
+
+		/* PRD ranges need a label */
+		if (!label)
+			continue;
+
+		if (range_end <= range_addr)
+			continue;
+
+		if (addr >= range_addr && addr + size <= range_end) {
+			found = true;
+			of_node_put(node);
+			break;
+		}
+	}
+
+	of_node_put(parent);
+	return found;
+}
+
+static int opal_prd_open(struct inode *inode, struct file *file)
+{
+	/*
+	 * Prevent multiple (separate) processes from concurrent interactions
+	 * with the FW PRD channel
+	 */
+	if (atomic_xchg(&prd_usage, 1) == 1)
+		return -EBUSY;
+
+	return 0;
+}
+
+/*
+ * opal_prd_mmap - maps firmware-provided ranges into userspace
+ * @file: file structure for the device
+ * @vma: VMA to map the registers into
+ */
+
+static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	size_t addr, size;
+	pgprot_t page_prot;
+
+	pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n",
+			vma->vm_start, vma->vm_end, vma->vm_pgoff,
+			vma->vm_flags);
+
+	addr = vma->vm_pgoff << PAGE_SHIFT;
+	size = vma->vm_end - vma->vm_start;
+
+	/* ensure we're mapping within one of the allowable ranges */
+	if (!opal_prd_range_is_valid(addr, size))
+		return -EINVAL;
+
+	page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
+					 size, vma->vm_page_prot);
+
+	return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size,
+				page_prot);
+}
+
+static bool opal_msg_queue_empty(void)
+{
+	unsigned long flags;
+	bool ret;
+
+	spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+	ret = list_empty(&opal_prd_msg_queue);
+	spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+
+	return ret;
+}
+
+static __poll_t opal_prd_poll(struct file *file,
+		struct poll_table_struct *wait)
+{
+	poll_wait(file, &opal_prd_msg_wait, wait);
+
+	if (!opal_msg_queue_empty())
+		return EPOLLIN | EPOLLRDNORM;
+
+	return 0;
+}
+
+static ssize_t opal_prd_read(struct file *file, char __user *buf,
+		size_t count, loff_t *ppos)
+{
+	struct opal_prd_msg_queue_item *item;
+	unsigned long flags;
+	ssize_t size, err;
+	int rc;
+
+	/* we need at least a header's worth of data */
+	if (count < sizeof(item->msg.header))
+		return -EINVAL;
+
+	if (*ppos)
+		return -ESPIPE;
+
+	item = NULL;
+
+	for (;;) {
+
+		spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+		if (!list_empty(&opal_prd_msg_queue)) {
+			item = list_first_entry(&opal_prd_msg_queue,
+					struct opal_prd_msg_queue_item, list);
+			list_del(&item->list);
+		}
+		spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+
+		if (item)
+			break;
+
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		rc = wait_event_interruptible(opal_prd_msg_wait,
+				!opal_msg_queue_empty());
+		if (rc)
+			return -EINTR;
+	}
+
+	size = be16_to_cpu(item->msg.header.size);
+	if (size > count) {
+		err = -EINVAL;
+		goto err_requeue;
+	}
+
+	rc = copy_to_user(buf, &item->msg, size);
+	if (rc) {
+		err = -EFAULT;
+		goto err_requeue;
+	}
+
+	kfree(item);
+
+	return size;
+
+err_requeue:
+	/* eep! re-queue at the head of the list */
+	spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+	list_add(&item->list, &opal_prd_msg_queue);
+	spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+	return err;
+}
+
+static ssize_t opal_prd_write(struct file *file, const char __user *buf,
+		size_t count, loff_t *ppos)
+{
+	struct opal_prd_msg_header hdr;
+	struct opal_prd_msg *msg;
+	ssize_t size;
+	int rc;
+
+	size = sizeof(hdr);
+
+	if (count < size)
+		return -EINVAL;
+
+	/* grab the header */
+	rc = copy_from_user(&hdr, buf, sizeof(hdr));
+	if (rc)
+		return -EFAULT;
+
+	size = be16_to_cpu(hdr.size);
+
+	msg = memdup_user(buf, size);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
+
+	rc = opal_prd_msg(msg);
+	if (rc) {
+		pr_warn("write: opal_prd_msg returned %d\n", rc);
+		size = -EIO;
+	}
+
+	kfree(msg);
+
+	return size;
+}
+
+static int opal_prd_release(struct inode *inode, struct file *file)
+{
+	struct opal_prd_msg msg;
+
+	msg.header.size = cpu_to_be16(sizeof(msg));
+	msg.header.type = OPAL_PRD_MSG_TYPE_FINI;
+
+	opal_prd_msg(&msg);
+
+	atomic_xchg(&prd_usage, 0);
+
+	return 0;
+}
+
+static long opal_prd_ioctl(struct file *file, unsigned int cmd,
+		unsigned long param)
+{
+	struct opal_prd_info info;
+	struct opal_prd_scom scom;
+	int rc = 0;
+
+	switch (cmd) {
+	case OPAL_PRD_GET_INFO:
+		memset(&info, 0, sizeof(info));
+		info.version = OPAL_PRD_KERNEL_VERSION;
+		rc = copy_to_user((void __user *)param, &info, sizeof(info));
+		if (rc)
+			return -EFAULT;
+		break;
+
+	case OPAL_PRD_SCOM_READ:
+		rc = copy_from_user(&scom, (void __user *)param, sizeof(scom));
+		if (rc)
+			return -EFAULT;
+
+		scom.rc = opal_xscom_read(scom.chip, scom.addr,
+				(__be64 *)&scom.data);
+		scom.data = be64_to_cpu(scom.data);
+		pr_devel("ioctl SCOM_READ: chip %llx addr %016llx data %016llx rc %lld\n",
+				scom.chip, scom.addr, scom.data, scom.rc);
+
+		rc = copy_to_user((void __user *)param, &scom, sizeof(scom));
+		if (rc)
+			return -EFAULT;
+		break;
+
+	case OPAL_PRD_SCOM_WRITE:
+		rc = copy_from_user(&scom, (void __user *)param, sizeof(scom));
+		if (rc)
+			return -EFAULT;
+
+		scom.rc = opal_xscom_write(scom.chip, scom.addr, scom.data);
+		pr_devel("ioctl SCOM_WRITE: chip %llx addr %016llx data %016llx rc %lld\n",
+				scom.chip, scom.addr, scom.data, scom.rc);
+
+		rc = copy_to_user((void __user *)param, &scom, sizeof(scom));
+		if (rc)
+			return -EFAULT;
+		break;
+
+	default:
+		rc = -EINVAL;
+	}
+
+	return rc;
+}
+
+static const struct file_operations opal_prd_fops = {
+	.open		= opal_prd_open,
+	.mmap		= opal_prd_mmap,
+	.poll		= opal_prd_poll,
+	.read		= opal_prd_read,
+	.write		= opal_prd_write,
+	.unlocked_ioctl	= opal_prd_ioctl,
+	.release	= opal_prd_release,
+	.owner		= THIS_MODULE,
+};
+
+static struct miscdevice opal_prd_dev = {
+	.minor		= MISC_DYNAMIC_MINOR,
+	.name		= "opal-prd",
+	.fops		= &opal_prd_fops,
+};
+
+/* opal interface */
+static int opal_prd_msg_notifier(struct notifier_block *nb,
+		unsigned long msg_type, void *_msg)
+{
+	struct opal_prd_msg_queue_item *item;
+	struct opal_prd_msg_header *hdr;
+	struct opal_msg *msg = _msg;
+	int msg_size, item_size;
+	unsigned long flags;
+
+	if (msg_type != OPAL_MSG_PRD && msg_type != OPAL_MSG_PRD2)
+		return 0;
+
+	/* Calculate total size of the message and item we need to store. The
+	 * 'size' field in the header includes the header itself. */
+	hdr = (void *)msg->params;
+	msg_size = be16_to_cpu(hdr->size);
+	item_size = msg_size + sizeof(*item) - sizeof(item->msg);
+
+	item = kzalloc(item_size, GFP_ATOMIC);
+	if (!item)
+		return -ENOMEM;
+
+	memcpy(&item->msg.data, msg->params, msg_size);
+
+	spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+	list_add_tail(&item->list, &opal_prd_msg_queue);
+	spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+
+	wake_up_interruptible(&opal_prd_msg_wait);
+
+	return 0;
+}
+
+static struct notifier_block opal_prd_event_nb = {
+	.notifier_call	= opal_prd_msg_notifier,
+	.next		= NULL,
+	.priority	= 0,
+};
+
+static struct notifier_block opal_prd_event_nb2 = {
+	.notifier_call	= opal_prd_msg_notifier,
+	.next		= NULL,
+	.priority	= 0,
+};
+
+static int opal_prd_probe(struct platform_device *pdev)
+{
+	int rc;
+
+	if (!pdev || !pdev->dev.of_node)
+		return -ENODEV;
+
+	/* We should only have one prd driver instance per machine; ensure
+	 * that we only get a valid probe on a single OF node.
+	 */
+	if (prd_node)
+		return -EBUSY;
+
+	prd_node = pdev->dev.of_node;
+
+	rc = opal_message_notifier_register(OPAL_MSG_PRD, &opal_prd_event_nb);
+	if (rc) {
+		pr_err("Couldn't register event notifier\n");
+		return rc;
+	}
+
+	rc = opal_message_notifier_register(OPAL_MSG_PRD2, &opal_prd_event_nb2);
+	if (rc) {
+		pr_err("Couldn't register PRD2 event notifier\n");
+		opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb);
+		return rc;
+	}
+
+	rc = misc_register(&opal_prd_dev);
+	if (rc) {
+		pr_err("failed to register miscdev\n");
+		opal_message_notifier_unregister(OPAL_MSG_PRD,
+				&opal_prd_event_nb);
+		opal_message_notifier_unregister(OPAL_MSG_PRD2,
+				&opal_prd_event_nb2);
+		return rc;
+	}
+
+	return 0;
+}
+
+static int opal_prd_remove(struct platform_device *pdev)
+{
+	misc_deregister(&opal_prd_dev);
+	opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb);
+	opal_message_notifier_unregister(OPAL_MSG_PRD2, &opal_prd_event_nb2);
+	return 0;
+}
+
+static const struct of_device_id opal_prd_match[] = {
+	{ .compatible = "ibm,opal-prd" },
+	{ },
+};
+
+static struct platform_driver opal_prd_driver = {
+	.driver = {
+		.name		= "opal-prd",
+		.of_match_table	= opal_prd_match,
+	},
+	.probe	= opal_prd_probe,
+	.remove	= opal_prd_remove,
+};
+
+module_platform_driver(opal_prd_driver);
+
+MODULE_DEVICE_TABLE(of, opal_prd_match);
+MODULE_DESCRIPTION("PowerNV OPAL runtime diagnostic driver");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/powernv/opal-psr.c b/arch/powerpc/platforms/powernv/opal-psr.c
new file mode 100644
index 0000000000..6441e17b69
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-psr.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL Power-Shift-Ratio interface
+ *
+ * Copyright 2017 IBM Corp.
+ */
+
+#define pr_fmt(fmt)     "opal-psr: " fmt
+
+#include <linux/of.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+
+static DEFINE_MUTEX(psr_mutex);
+
+static struct kobject *psr_kobj;
+
+static struct psr_attr {
+	u32 handle;
+	struct kobj_attribute attr;
+} *psr_attrs;
+
+static ssize_t psr_show(struct kobject *kobj, struct kobj_attribute *attr,
+			char *buf)
+{
+	struct psr_attr *psr_attr = container_of(attr, struct psr_attr, attr);
+	struct opal_msg msg;
+	int psr, ret, token;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		pr_devel("Failed to get token\n");
+		return token;
+	}
+
+	ret = mutex_lock_interruptible(&psr_mutex);
+	if (ret)
+		goto out_token;
+
+	ret = opal_get_power_shift_ratio(psr_attr->handle, token,
+					    (u32 *)__pa(&psr));
+	switch (ret) {
+	case OPAL_ASYNC_COMPLETION:
+		ret = opal_async_wait_response(token, &msg);
+		if (ret) {
+			pr_devel("Failed to wait for the async response\n");
+			ret = -EIO;
+			goto out;
+		}
+		ret = opal_error_code(opal_get_async_rc(msg));
+		if (!ret) {
+			ret = sprintf(buf, "%u\n", be32_to_cpu(psr));
+			if (ret < 0)
+				ret = -EIO;
+		}
+		break;
+	case OPAL_SUCCESS:
+		ret = sprintf(buf, "%u\n", be32_to_cpu(psr));
+		if (ret < 0)
+			ret = -EIO;
+		break;
+	default:
+		ret = opal_error_code(ret);
+	}
+
+out:
+	mutex_unlock(&psr_mutex);
+out_token:
+	opal_async_release_token(token);
+	return ret;
+}
+
+static ssize_t psr_store(struct kobject *kobj, struct kobj_attribute *attr,
+			 const char *buf, size_t count)
+{
+	struct psr_attr *psr_attr = container_of(attr, struct psr_attr, attr);
+	struct opal_msg msg;
+	int psr, ret, token;
+
+	ret = kstrtoint(buf, 0, &psr);
+	if (ret)
+		return ret;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		pr_devel("Failed to get token\n");
+		return token;
+	}
+
+	ret = mutex_lock_interruptible(&psr_mutex);
+	if (ret)
+		goto out_token;
+
+	ret = opal_set_power_shift_ratio(psr_attr->handle, token, psr);
+	switch (ret) {
+	case OPAL_ASYNC_COMPLETION:
+		ret = opal_async_wait_response(token, &msg);
+		if (ret) {
+			pr_devel("Failed to wait for the async response\n");
+			ret = -EIO;
+			goto out;
+		}
+		ret = opal_error_code(opal_get_async_rc(msg));
+		if (!ret)
+			ret = count;
+		break;
+	case OPAL_SUCCESS:
+		ret = count;
+		break;
+	default:
+		ret = opal_error_code(ret);
+	}
+
+out:
+	mutex_unlock(&psr_mutex);
+out_token:
+	opal_async_release_token(token);
+	return ret;
+}
+
+void __init opal_psr_init(void)
+{
+	struct device_node *psr, *node;
+	int i = 0;
+
+	psr = of_find_compatible_node(NULL, NULL,
+				      "ibm,opal-power-shift-ratio");
+	if (!psr) {
+		pr_devel("Power-shift-ratio node not found\n");
+		return;
+	}
+
+	psr_attrs = kcalloc(of_get_child_count(psr), sizeof(*psr_attrs),
+			    GFP_KERNEL);
+	if (!psr_attrs)
+		goto out_put_psr;
+
+	psr_kobj = kobject_create_and_add("psr", opal_kobj);
+	if (!psr_kobj) {
+		pr_warn("Failed to create psr kobject\n");
+		goto out;
+	}
+
+	for_each_child_of_node(psr, node) {
+		if (of_property_read_u32(node, "handle",
+					 &psr_attrs[i].handle))
+			goto out_kobj;
+
+		sysfs_attr_init(&psr_attrs[i].attr.attr);
+		if (of_property_read_string(node, "label",
+					    &psr_attrs[i].attr.attr.name))
+			goto out_kobj;
+		psr_attrs[i].attr.attr.mode = 0664;
+		psr_attrs[i].attr.show = psr_show;
+		psr_attrs[i].attr.store = psr_store;
+		if (sysfs_create_file(psr_kobj, &psr_attrs[i].attr.attr)) {
+			pr_devel("Failed to create psr sysfs file %s\n",
+				 psr_attrs[i].attr.attr.name);
+			goto out_kobj;
+		}
+		i++;
+	}
+	of_node_put(psr);
+
+	return;
+out_kobj:
+	of_node_put(node);
+	kobject_put(psr_kobj);
+out:
+	kfree(psr_attrs);
+out_put_psr:
+	of_node_put(psr);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
new file mode 100644
index 0000000000..79011a263a
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV Real Time Clock.
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/bcd.h>
+#include <linux/rtc.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+
+#include <asm/opal.h>
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+
+static void __init opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm)
+{
+	tm->tm_year	= ((bcd2bin(y_m_d >> 24) * 100) +
+			   bcd2bin((y_m_d >> 16) & 0xff)) - 1900;
+	tm->tm_mon	= bcd2bin((y_m_d >> 8) & 0xff) - 1;
+	tm->tm_mday	= bcd2bin(y_m_d & 0xff);
+	tm->tm_hour	= bcd2bin((h_m_s_ms >> 56) & 0xff);
+	tm->tm_min	= bcd2bin((h_m_s_ms >> 48) & 0xff);
+	tm->tm_sec	= bcd2bin((h_m_s_ms >> 40) & 0xff);
+	tm->tm_wday     = -1;
+}
+
+time64_t __init opal_get_boot_time(void)
+{
+	struct rtc_time tm;
+	u32 y_m_d;
+	u64 h_m_s_ms;
+	__be32 __y_m_d;
+	__be64 __h_m_s_ms;
+	long rc = OPAL_BUSY;
+
+	if (!opal_check_token(OPAL_RTC_READ))
+		return 0;
+
+	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+		rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
+		if (rc == OPAL_BUSY_EVENT) {
+			mdelay(OPAL_BUSY_DELAY_MS);
+			opal_poll_events(NULL);
+		} else if (rc == OPAL_BUSY) {
+			mdelay(OPAL_BUSY_DELAY_MS);
+		}
+	}
+	if (rc != OPAL_SUCCESS)
+		return 0;
+
+	y_m_d = be32_to_cpu(__y_m_d);
+	h_m_s_ms = be64_to_cpu(__h_m_s_ms);
+	opal_to_tm(y_m_d, h_m_s_ms, &tm);
+	return rtc_tm_to_time64(&tm);
+}
+
+static __init int opal_time_init(void)
+{
+	struct platform_device *pdev;
+	struct device_node *rtc;
+
+	rtc = of_find_node_by_path("/ibm,opal/rtc");
+	if (rtc) {
+		pdev = of_platform_device_create(rtc, "opal-rtc", NULL);
+		of_node_put(rtc);
+	} else {
+		if (opal_check_token(OPAL_RTC_READ) ||
+		    opal_check_token(OPAL_READ_TPO))
+			pdev = platform_device_register_simple("opal-rtc", -1,
+							       NULL, 0);
+		else
+			return -ENODEV;
+	}
+
+	return PTR_ERR_OR_ZERO(pdev);
+}
+machine_subsys_initcall(powernv, opal_time_init);
diff --git a/arch/powerpc/platforms/powernv/opal-secvar.c b/arch/powerpc/platforms/powernv/opal-secvar.c
new file mode 100644
index 0000000000..6ac410f4d3
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-secvar.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PowerNV code for secure variables
+ *
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Claudio Carvalho
+ *         Nayna Jain
+ *
+ * APIs to access secure variables managed by OPAL.
+ */
+
+#define pr_fmt(fmt) "secvar: "fmt
+
+#include <linux/types.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <asm/opal.h>
+#include <asm/secvar.h>
+#include <asm/secure_boot.h>
+
+static int opal_status_to_err(int rc)
+{
+	int err;
+
+	switch (rc) {
+	case OPAL_SUCCESS:
+		err = 0;
+		break;
+	case OPAL_UNSUPPORTED:
+		err = -ENXIO;
+		break;
+	case OPAL_PARAMETER:
+		err = -EINVAL;
+		break;
+	case OPAL_RESOURCE:
+		err = -ENOSPC;
+		break;
+	case OPAL_HARDWARE:
+		err = -EIO;
+		break;
+	case OPAL_NO_MEM:
+		err = -ENOMEM;
+		break;
+	case OPAL_EMPTY:
+		err = -ENOENT;
+		break;
+	case OPAL_PARTIAL:
+		err = -EFBIG;
+		break;
+	default:
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+static int opal_get_variable(const char *key, u64 ksize, u8 *data, u64 *dsize)
+{
+	int rc;
+
+	if (!key || !dsize)
+		return -EINVAL;
+
+	*dsize = cpu_to_be64(*dsize);
+
+	rc = opal_secvar_get(key, ksize, data, dsize);
+
+	*dsize = be64_to_cpu(*dsize);
+
+	return opal_status_to_err(rc);
+}
+
+static int opal_get_next_variable(const char *key, u64 *keylen, u64 keybufsize)
+{
+	int rc;
+
+	if (!key || !keylen)
+		return -EINVAL;
+
+	*keylen = cpu_to_be64(*keylen);
+
+	rc = opal_secvar_get_next(key, keylen, keybufsize);
+
+	*keylen = be64_to_cpu(*keylen);
+
+	return opal_status_to_err(rc);
+}
+
+static int opal_set_variable(const char *key, u64 ksize, u8 *data, u64 dsize)
+{
+	int rc;
+
+	if (!key || !data)
+		return -EINVAL;
+
+	rc = opal_secvar_enqueue_update(key, ksize, data, dsize);
+
+	return opal_status_to_err(rc);
+}
+
+static ssize_t opal_secvar_format(char *buf, size_t bufsize)
+{
+	ssize_t rc = 0;
+	struct device_node *node;
+	const char *format;
+
+	node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend");
+	if (!of_device_is_available(node)) {
+		rc = -ENODEV;
+		goto out;
+	}
+
+	rc = of_property_read_string(node, "format", &format);
+	if (rc)
+		goto out;
+
+	rc = snprintf(buf, bufsize, "%s", format);
+
+out:
+	of_node_put(node);
+
+	return rc;
+}
+
+static int opal_secvar_max_size(u64 *max_size)
+{
+	int rc;
+	struct device_node *node;
+
+	node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend");
+	if (!node)
+		return -ENODEV;
+
+	if (!of_device_is_available(node)) {
+		rc = -ENODEV;
+		goto out;
+	}
+
+	rc = of_property_read_u64(node, "max-var-size", max_size);
+
+out:
+	of_node_put(node);
+	return rc;
+}
+
+static const struct secvar_operations opal_secvar_ops = {
+	.get = opal_get_variable,
+	.get_next = opal_get_next_variable,
+	.set = opal_set_variable,
+	.format = opal_secvar_format,
+	.max_size = opal_secvar_max_size,
+};
+
+static int opal_secvar_probe(struct platform_device *pdev)
+{
+	if (!opal_check_token(OPAL_SECVAR_GET)
+			|| !opal_check_token(OPAL_SECVAR_GET_NEXT)
+			|| !opal_check_token(OPAL_SECVAR_ENQUEUE_UPDATE)) {
+		pr_err("OPAL doesn't support secure variables\n");
+		return -ENODEV;
+	}
+
+	return set_secvar_ops(&opal_secvar_ops);
+}
+
+static const struct of_device_id opal_secvar_match[] = {
+	{ .compatible = "ibm,secvar-backend",},
+	{},
+};
+
+static struct platform_driver opal_secvar_driver = {
+	.driver = {
+		.name = "secvar",
+		.of_match_table = opal_secvar_match,
+	},
+};
+
+static int __init opal_secvar_init(void)
+{
+	return platform_driver_probe(&opal_secvar_driver, opal_secvar_probe);
+}
+device_initcall(opal_secvar_init);
diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
new file mode 100644
index 0000000000..9944376b11
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL Sensor-groups interface
+ *
+ * Copyright 2017 IBM Corp.
+ */
+
+#define pr_fmt(fmt)     "opal-sensor-groups: " fmt
+
+#include <linux/of.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+
+static DEFINE_MUTEX(sg_mutex);
+
+static struct kobject *sg_kobj;
+
+struct sg_attr {
+	u32 handle;
+	struct kobj_attribute attr;
+};
+
+static struct sensor_group {
+	char name[20];
+	struct attribute_group sg;
+	struct sg_attr *sgattrs;
+} *sgs;
+
+int sensor_group_enable(u32 handle, bool enable)
+{
+	struct opal_msg msg;
+	int token, ret;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0)
+		return token;
+
+	ret = opal_sensor_group_enable(handle, token, enable);
+	if (ret == OPAL_ASYNC_COMPLETION) {
+		ret = opal_async_wait_response(token, &msg);
+		if (ret) {
+			pr_devel("Failed to wait for the async response\n");
+			ret = -EIO;
+			goto out;
+		}
+		ret = opal_error_code(opal_get_async_rc(msg));
+	} else {
+		ret = opal_error_code(ret);
+	}
+
+out:
+	opal_async_release_token(token);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sensor_group_enable);
+
+static ssize_t sg_store(struct kobject *kobj, struct kobj_attribute *attr,
+			const char *buf, size_t count)
+{
+	struct sg_attr *sattr = container_of(attr, struct sg_attr, attr);
+	struct opal_msg msg;
+	u32 data;
+	int ret, token;
+
+	ret = kstrtoint(buf, 0, &data);
+	if (ret)
+		return ret;
+
+	if (data != 1)
+		return -EINVAL;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		pr_devel("Failed to get token\n");
+		return token;
+	}
+
+	ret = mutex_lock_interruptible(&sg_mutex);
+	if (ret)
+		goto out_token;
+
+	ret = opal_sensor_group_clear(sattr->handle, token);
+	switch (ret) {
+	case OPAL_ASYNC_COMPLETION:
+		ret = opal_async_wait_response(token, &msg);
+		if (ret) {
+			pr_devel("Failed to wait for the async response\n");
+			ret = -EIO;
+			goto out;
+		}
+		ret = opal_error_code(opal_get_async_rc(msg));
+		if (!ret)
+			ret = count;
+		break;
+	case OPAL_SUCCESS:
+		ret = count;
+		break;
+	default:
+		ret = opal_error_code(ret);
+	}
+
+out:
+	mutex_unlock(&sg_mutex);
+out_token:
+	opal_async_release_token(token);
+	return ret;
+}
+
+static struct sg_ops_info {
+	int opal_no;
+	const char *attr_name;
+	ssize_t (*store)(struct kobject *kobj, struct kobj_attribute *attr,
+			const char *buf, size_t count);
+} ops_info[] = {
+	{ OPAL_SENSOR_GROUP_CLEAR, "clear", sg_store },
+};
+
+static void add_attr(int handle, struct sg_attr *attr, int index)
+{
+	attr->handle = handle;
+	sysfs_attr_init(&attr->attr.attr);
+	attr->attr.attr.name = ops_info[index].attr_name;
+	attr->attr.attr.mode = 0220;
+	attr->attr.store = ops_info[index].store;
+}
+
+static int __init add_attr_group(const __be32 *ops, int len, struct sensor_group *sg,
+			   u32 handle)
+{
+	int i, j;
+	int count = 0;
+
+	for (i = 0; i < len; i++)
+		for (j = 0; j < ARRAY_SIZE(ops_info); j++)
+			if (be32_to_cpu(ops[i]) == ops_info[j].opal_no) {
+				add_attr(handle, &sg->sgattrs[count], j);
+				sg->sg.attrs[count] =
+					&sg->sgattrs[count].attr.attr;
+				count++;
+			}
+
+	return sysfs_create_group(sg_kobj, &sg->sg);
+}
+
+static int __init get_nr_attrs(const __be32 *ops, int len)
+{
+	int i, j;
+	int nr_attrs = 0;
+
+	for (i = 0; i < len; i++)
+		for (j = 0; j < ARRAY_SIZE(ops_info); j++)
+			if (be32_to_cpu(ops[i]) == ops_info[j].opal_no)
+				nr_attrs++;
+
+	return nr_attrs;
+}
+
+void __init opal_sensor_groups_init(void)
+{
+	struct device_node *sg, *node;
+	int i = 0;
+
+	sg = of_find_compatible_node(NULL, NULL, "ibm,opal-sensor-group");
+	if (!sg) {
+		pr_devel("Sensor groups node not found\n");
+		return;
+	}
+
+	sgs = kcalloc(of_get_child_count(sg), sizeof(*sgs), GFP_KERNEL);
+	if (!sgs)
+		goto out_sg_put;
+
+	sg_kobj = kobject_create_and_add("sensor_groups", opal_kobj);
+	if (!sg_kobj) {
+		pr_warn("Failed to create sensor group kobject\n");
+		goto out_sgs;
+	}
+
+	for_each_child_of_node(sg, node) {
+		const __be32 *ops;
+		u32 sgid, len, nr_attrs, chipid;
+
+		ops = of_get_property(node, "ops", &len);
+		if (!ops)
+			continue;
+
+		nr_attrs = get_nr_attrs(ops, len);
+		if (!nr_attrs)
+			continue;
+
+		sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(*sgs[i].sgattrs),
+					 GFP_KERNEL);
+		if (!sgs[i].sgattrs)
+			goto out_sgs_sgattrs;
+
+		sgs[i].sg.attrs = kcalloc(nr_attrs + 1,
+					  sizeof(*sgs[i].sg.attrs),
+					  GFP_KERNEL);
+
+		if (!sgs[i].sg.attrs) {
+			kfree(sgs[i].sgattrs);
+			goto out_sgs_sgattrs;
+		}
+
+		if (of_property_read_u32(node, "sensor-group-id", &sgid)) {
+			pr_warn("sensor-group-id property not found\n");
+			goto out_sgs_sgattrs;
+		}
+
+		if (!of_property_read_u32(node, "ibm,chip-id", &chipid))
+			sprintf(sgs[i].name, "%pOFn%d", node, chipid);
+		else
+			sprintf(sgs[i].name, "%pOFn", node);
+
+		sgs[i].sg.name = sgs[i].name;
+		if (add_attr_group(ops, len, &sgs[i], sgid)) {
+			pr_warn("Failed to create sensor attribute group %s\n",
+				sgs[i].sg.name);
+			goto out_sgs_sgattrs;
+		}
+		i++;
+	}
+	of_node_put(sg);
+
+	return;
+
+out_sgs_sgattrs:
+	while (--i >= 0) {
+		kfree(sgs[i].sgattrs);
+		kfree(sgs[i].sg.attrs);
+	}
+	kobject_put(sg_kobj);
+	of_node_put(node);
+out_sgs:
+	kfree(sgs);
+out_sg_put:
+	of_node_put(sg);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c
new file mode 100644
index 0000000000..8880a1c145
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-sensor.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV sensor code
+ *
+ * Copyright (C) 2013 IBM
+ */
+
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <asm/opal.h>
+#include <asm/machdep.h>
+
+/*
+ * This will return sensor information to driver based on the requested sensor
+ * handle. A handle is an opaque id for the powernv, read by the driver from the
+ * device tree..
+ */
+int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
+{
+	int ret, token;
+	struct opal_msg msg;
+	__be32 data;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0)
+		return token;
+
+	ret = opal_sensor_read(sensor_hndl, token, &data);
+	switch (ret) {
+	case OPAL_ASYNC_COMPLETION:
+		ret = opal_async_wait_response(token, &msg);
+		if (ret) {
+			pr_err("%s: Failed to wait for the async response, %d\n",
+			       __func__, ret);
+			goto out;
+		}
+
+		ret = opal_error_code(opal_get_async_rc(msg));
+		*sensor_data = be32_to_cpu(data);
+		break;
+
+	case OPAL_SUCCESS:
+		ret = 0;
+		*sensor_data = be32_to_cpu(data);
+		break;
+
+	case OPAL_WRONG_STATE:
+		ret = -EIO;
+		break;
+
+	default:
+		ret = opal_error_code(ret);
+		break;
+	}
+
+out:
+	opal_async_release_token(token);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(opal_get_sensor_data);
+
+int opal_get_sensor_data_u64(u32 sensor_hndl, u64 *sensor_data)
+{
+	int ret, token;
+	struct opal_msg msg;
+	__be64 data;
+
+	if (!opal_check_token(OPAL_SENSOR_READ_U64)) {
+		u32 sdata;
+
+		ret = opal_get_sensor_data(sensor_hndl, &sdata);
+		if (!ret)
+			*sensor_data = sdata;
+		return ret;
+	}
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0)
+		return token;
+
+	ret = opal_sensor_read_u64(sensor_hndl, token, &data);
+	switch (ret) {
+	case OPAL_ASYNC_COMPLETION:
+		ret = opal_async_wait_response(token, &msg);
+		if (ret) {
+			pr_err("%s: Failed to wait for the async response, %d\n",
+			       __func__, ret);
+			goto out_token;
+		}
+
+		ret = opal_error_code(opal_get_async_rc(msg));
+		*sensor_data = be64_to_cpu(data);
+		break;
+
+	case OPAL_SUCCESS:
+		ret = 0;
+		*sensor_data = be64_to_cpu(data);
+		break;
+
+	case OPAL_WRONG_STATE:
+		ret = -EIO;
+		break;
+
+	default:
+		ret = opal_error_code(ret);
+		break;
+	}
+
+out_token:
+	opal_async_release_token(token);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(opal_get_sensor_data_u64);
+
+int __init opal_sensor_init(void)
+{
+	struct platform_device *pdev;
+	struct device_node *sensor;
+
+	sensor = of_find_node_by_path("/ibm,opal/sensors");
+	if (!sensor) {
+		pr_err("Opal node 'sensors' not found\n");
+		return -ENODEV;
+	}
+
+	pdev = of_platform_device_create(sensor, "opal-sensor", NULL);
+	of_node_put(sensor);
+
+	return PTR_ERR_OR_ZERO(pdev);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c
new file mode 100644
index 0000000000..a12312afe4
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-sysparam.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV system parameter code
+ *
+ * Copyright (C) 2013 IBM
+ */
+
+#include <linux/kobject.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/gfp.h>
+#include <linux/stat.h>
+#include <asm/opal.h>
+
+#define MAX_PARAM_DATA_LEN	64
+
+static DEFINE_MUTEX(opal_sysparam_mutex);
+static struct kobject *sysparam_kobj;
+static void *param_data_buf;
+
+struct param_attr {
+	struct list_head list;
+	u32 param_id;
+	u32 param_size;
+	struct kobj_attribute kobj_attr;
+};
+
+static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer)
+{
+	struct opal_msg msg;
+	ssize_t ret;
+	int token;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		if (token != -ERESTARTSYS)
+			pr_err("%s: Couldn't get the token, returning\n",
+					__func__);
+		ret = token;
+		goto out;
+	}
+
+	ret = opal_get_param(token, param_id, (u64)buffer, length);
+	if (ret != OPAL_ASYNC_COMPLETION) {
+		ret = opal_error_code(ret);
+		goto out_token;
+	}
+
+	ret = opal_async_wait_response(token, &msg);
+	if (ret) {
+		pr_err("%s: Failed to wait for the async response, %zd\n",
+				__func__, ret);
+		goto out_token;
+	}
+
+	ret = opal_error_code(opal_get_async_rc(msg));
+
+out_token:
+	opal_async_release_token(token);
+out:
+	return ret;
+}
+
+static int opal_set_sys_param(u32 param_id, u32 length, void *buffer)
+{
+	struct opal_msg msg;
+	int ret, token;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		if (token != -ERESTARTSYS)
+			pr_err("%s: Couldn't get the token, returning\n",
+					__func__);
+		ret = token;
+		goto out;
+	}
+
+	ret = opal_set_param(token, param_id, (u64)buffer, length);
+
+	if (ret != OPAL_ASYNC_COMPLETION) {
+		ret = opal_error_code(ret);
+		goto out_token;
+	}
+
+	ret = opal_async_wait_response(token, &msg);
+	if (ret) {
+		pr_err("%s: Failed to wait for the async response, %d\n",
+				__func__, ret);
+		goto out_token;
+	}
+
+	ret = opal_error_code(opal_get_async_rc(msg));
+
+out_token:
+	opal_async_release_token(token);
+out:
+	return ret;
+}
+
+static ssize_t sys_param_show(struct kobject *kobj,
+		struct kobj_attribute *kobj_attr, char *buf)
+{
+	struct param_attr *attr = container_of(kobj_attr, struct param_attr,
+			kobj_attr);
+	ssize_t ret;
+
+	mutex_lock(&opal_sysparam_mutex);
+	ret = opal_get_sys_param(attr->param_id, attr->param_size,
+			param_data_buf);
+	if (ret)
+		goto out;
+
+	memcpy(buf, param_data_buf, attr->param_size);
+
+	ret = attr->param_size;
+out:
+	mutex_unlock(&opal_sysparam_mutex);
+	return ret;
+}
+
+static ssize_t sys_param_store(struct kobject *kobj,
+		struct kobj_attribute *kobj_attr, const char *buf, size_t count)
+{
+	struct param_attr *attr = container_of(kobj_attr, struct param_attr,
+			kobj_attr);
+	ssize_t ret;
+
+        /* MAX_PARAM_DATA_LEN is sizeof(param_data_buf) */
+        if (count > MAX_PARAM_DATA_LEN)
+                count = MAX_PARAM_DATA_LEN;
+
+	mutex_lock(&opal_sysparam_mutex);
+	memcpy(param_data_buf, buf, count);
+	ret = opal_set_sys_param(attr->param_id, attr->param_size,
+			param_data_buf);
+	mutex_unlock(&opal_sysparam_mutex);
+	if (!ret)
+		ret = count;
+	return ret;
+}
+
+void __init opal_sys_param_init(void)
+{
+	struct device_node *sysparam;
+	struct param_attr *attr;
+	u32 *id, *size;
+	int count, i;
+	u8 *perm;
+
+	if (!opal_kobj) {
+		pr_warn("SYSPARAM: opal kobject is not available\n");
+		goto out;
+	}
+
+	/* Some systems do not use sysparams; this is not an error */
+	sysparam = of_find_node_by_path("/ibm,opal/sysparams");
+	if (!sysparam)
+		goto out;
+
+	if (!of_device_is_compatible(sysparam, "ibm,opal-sysparams")) {
+		pr_err("SYSPARAM: Opal sysparam node not compatible\n");
+		goto out_node_put;
+	}
+
+	sysparam_kobj = kobject_create_and_add("sysparams", opal_kobj);
+	if (!sysparam_kobj) {
+		pr_err("SYSPARAM: Failed to create sysparam kobject\n");
+		goto out_node_put;
+	}
+
+	/* Allocate big enough buffer for any get/set transactions */
+	param_data_buf = kzalloc(MAX_PARAM_DATA_LEN, GFP_KERNEL);
+	if (!param_data_buf) {
+		pr_err("SYSPARAM: Failed to allocate memory for param data "
+				"buf\n");
+		goto out_kobj_put;
+	}
+
+	/* Number of parameters exposed through DT */
+	count = of_property_count_strings(sysparam, "param-name");
+	if (count < 0) {
+		pr_err("SYSPARAM: No string found of property param-name in "
+				"the node %pOFn\n", sysparam);
+		goto out_param_buf;
+	}
+
+	id = kcalloc(count, sizeof(*id), GFP_KERNEL);
+	if (!id) {
+		pr_err("SYSPARAM: Failed to allocate memory to read parameter "
+				"id\n");
+		goto out_param_buf;
+	}
+
+	size = kcalloc(count, sizeof(*size), GFP_KERNEL);
+	if (!size) {
+		pr_err("SYSPARAM: Failed to allocate memory to read parameter "
+				"size\n");
+		goto out_free_id;
+	}
+
+	perm = kcalloc(count, sizeof(*perm), GFP_KERNEL);
+	if (!perm) {
+		pr_err("SYSPARAM: Failed to allocate memory to read supported "
+				"action on the parameter");
+		goto out_free_size;
+	}
+
+	if (of_property_read_u32_array(sysparam, "param-id", id, count)) {
+		pr_err("SYSPARAM: Missing property param-id in the DT\n");
+		goto out_free_perm;
+	}
+
+	if (of_property_read_u32_array(sysparam, "param-len", size, count)) {
+		pr_err("SYSPARAM: Missing property param-len in the DT\n");
+		goto out_free_perm;
+	}
+
+
+	if (of_property_read_u8_array(sysparam, "param-perm", perm, count)) {
+		pr_err("SYSPARAM: Missing property param-perm in the DT\n");
+		goto out_free_perm;
+	}
+
+	attr = kcalloc(count, sizeof(*attr), GFP_KERNEL);
+	if (!attr) {
+		pr_err("SYSPARAM: Failed to allocate memory for parameter "
+				"attributes\n");
+		goto out_free_perm;
+	}
+
+	/* For each of the parameters, populate the parameter attributes */
+	for (i = 0; i < count; i++) {
+		if (size[i] > MAX_PARAM_DATA_LEN) {
+			pr_warn("SYSPARAM: Not creating parameter %d as size "
+				"exceeds buffer length\n", i);
+			continue;
+		}
+
+		sysfs_attr_init(&attr[i].kobj_attr.attr);
+		attr[i].param_id = id[i];
+		attr[i].param_size = size[i];
+		if (of_property_read_string_index(sysparam, "param-name", i,
+				&attr[i].kobj_attr.attr.name))
+			continue;
+
+		/* If the parameter is read-only or read-write */
+		switch (perm[i] & 3) {
+		case OPAL_SYSPARAM_READ:
+			attr[i].kobj_attr.attr.mode = 0444;
+			break;
+		case OPAL_SYSPARAM_WRITE:
+			attr[i].kobj_attr.attr.mode = 0200;
+			break;
+		case OPAL_SYSPARAM_RW:
+			attr[i].kobj_attr.attr.mode = 0644;
+			break;
+		default:
+			break;
+		}
+
+		attr[i].kobj_attr.show = sys_param_show;
+		attr[i].kobj_attr.store = sys_param_store;
+
+		if (sysfs_create_file(sysparam_kobj, &attr[i].kobj_attr.attr)) {
+			pr_err("SYSPARAM: Failed to create sysfs file %s\n",
+					attr[i].kobj_attr.attr.name);
+			goto out_free_attr;
+		}
+	}
+
+	kfree(perm);
+	kfree(size);
+	kfree(id);
+	of_node_put(sysparam);
+	return;
+
+out_free_attr:
+	kfree(attr);
+out_free_perm:
+	kfree(perm);
+out_free_size:
+	kfree(size);
+out_free_id:
+	kfree(id);
+out_param_buf:
+	kfree(param_data_buf);
+out_kobj_put:
+	kobject_put(sysparam_kobj);
+out_node_put:
+	of_node_put(sysparam);
+out:
+	return;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c
new file mode 100644
index 0000000000..91b36541b9
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/percpu.h>
+#include <linux/jump_label.h>
+#include <asm/trace.h>
+
+#ifdef CONFIG_JUMP_LABEL
+struct static_key opal_tracepoint_key = STATIC_KEY_INIT;
+
+int opal_tracepoint_regfunc(void)
+{
+	static_key_slow_inc(&opal_tracepoint_key);
+	return 0;
+}
+
+void opal_tracepoint_unregfunc(void)
+{
+	static_key_slow_dec(&opal_tracepoint_key);
+}
+#else
+/*
+ * We optimise OPAL calls by placing opal_tracepoint_refcount
+ * directly in the TOC so we can check if the opal tracepoints are
+ * enabled via a single load.
+ */
+
+/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
+extern long opal_tracepoint_refcount;
+
+int opal_tracepoint_regfunc(void)
+{
+	opal_tracepoint_refcount++;
+	return 0;
+}
+
+void opal_tracepoint_unregfunc(void)
+{
+	opal_tracepoint_refcount--;
+}
+#endif
+
+/*
+ * Since the tracing code might execute OPAL calls we need to guard against
+ * recursion.
+ */
+static DEFINE_PER_CPU(unsigned int, opal_trace_depth);
+
+void __trace_opal_entry(unsigned long opcode, unsigned long *args)
+{
+	unsigned long flags;
+	unsigned int *depth;
+
+	local_irq_save(flags);
+
+	depth = this_cpu_ptr(&opal_trace_depth);
+
+	if (*depth)
+		goto out;
+
+	(*depth)++;
+	preempt_disable();
+	trace_opal_entry(opcode, args);
+	(*depth)--;
+
+out:
+	local_irq_restore(flags);
+}
+
+void __trace_opal_exit(long opcode, unsigned long retval)
+{
+	unsigned long flags;
+	unsigned int *depth;
+
+	local_irq_save(flags);
+
+	depth = this_cpu_ptr(&opal_trace_depth);
+
+	if (*depth)
+		goto out;
+
+	(*depth)++;
+	trace_opal_exit(opcode, retval);
+	preempt_enable();
+	(*depth)--;
+
+out:
+	local_irq_restore(flags);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
new file mode 100644
index 0000000000..0ed95f7534
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * PowerNV OPAL API wrappers
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+#include <linux/jump_label.h>
+#include <asm/ppc_asm.h>
+#include <asm/hvcall.h>
+#include <asm/asm-offsets.h>
+#include <asm/opal.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+
+	.section ".text"
+
+/*
+ * r3-r10		- OPAL call arguments
+ * STK_PARAM(R11)	- OPAL opcode
+ * STK_PARAM(R12)	- MSR to restore
+ */
+_GLOBAL_TOC(__opal_call)
+	mflr	r0
+	std	r0,PPC_LR_STKOFF(r1)
+	ld	r12,STK_PARAM(R12)(r1)
+	li	r0,MSR_IR|MSR_DR|MSR_LE
+	andc	r12,r12,r0
+	LOAD_REG_ADDR(r11, opal_return)
+	mtlr	r11
+	LOAD_REG_ADDR(r11, opal)
+	ld	r2,0(r11)
+	ld	r11,8(r11)
+	mtspr	SPRN_HSRR0,r11
+	mtspr	SPRN_HSRR1,r12
+	/* set token to r0 */
+	ld	r0,STK_PARAM(R11)(r1)
+	hrfid
+opal_return:
+	/*
+	 * Restore MSR on OPAL return. The MSR is set to big-endian.
+	 */
+#ifdef __BIG_ENDIAN__
+	ld	r11,STK_PARAM(R12)(r1)
+	mtmsrd	r11
+#else
+	/* Endian can only be switched with rfi, must byte reverse MSR load */
+	.short 0x4039	 /* li r10,STK_PARAM(R12)		*/
+	.byte (STK_PARAM(R12) >> 8) & 0xff
+	.byte STK_PARAM(R12) & 0xff
+
+	.long 0x280c6a7d /* ldbrx r11,r10,r1			*/
+	.long 0x05009f42 /* bcl 20,31,$+4			*/
+	.long 0xa602487d /* mflr r10				*/
+	.long 0x14004a39 /* addi r10,r10,20			*/
+	.long 0xa64b5a7d /* mthsrr0 r10				*/
+	.long 0xa64b7b7d /* mthsrr1 r11				*/
+	.long 0x2402004c /* hrfid				*/
+#endif
+	LOAD_PACA_TOC()
+	ld	r0,PPC_LR_STKOFF(r1)
+	mtlr	r0
+	blr
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
new file mode 100644
index 0000000000..748c2b97fa
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV SCOM bus debugfs interface
+ *
+ * Copyright 2010 Benjamin Herrenschmidt, IBM Corp
+ *                <benh@kernel.crashing.org>
+ *     and        David Gibson, IBM Corporation.
+ * Copyright 2013 IBM Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/bug.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/opal.h>
+#include <asm/prom.h>
+
+static u64 opal_scom_unmangle(u64 addr)
+{
+	u64 tmp;
+
+	/*
+	 * XSCOM addresses use the top nibble to set indirect mode and
+	 * its form.  Bits 4-11 are always 0.
+	 *
+	 * Because the debugfs interface uses signed offsets and shifts
+	 * the address left by 3, we basically cannot use the top 4 bits
+	 * of the 64-bit address, and thus cannot use the indirect bit.
+	 *
+	 * To deal with that, we support the indirect bits being in
+	 * bits 4-7 (IBM notation) instead of bit 0-3 in this API, we
+	 * do the conversion here.
+	 *
+	 * For in-kernel use, we don't need to do this mangling.  In
+	 * kernel won't have bits 4-7 set.
+	 *
+	 * So:
+	 *   debugfs will always   set 0-3 = 0 and clear 4-7
+	 *    kernel will always clear 0-3 = 0 and   set 4-7
+	 */
+	tmp = addr;
+	tmp  &= 0x0f00000000000000;
+	addr &= 0xf0ffffffffffffff;
+	addr |= tmp << 4;
+
+	return addr;
+}
+
+static int opal_scom_read(uint32_t chip, uint64_t addr, u64 reg, u64 *value)
+{
+	int64_t rc;
+	__be64 v;
+
+	reg = opal_scom_unmangle(addr + reg);
+	rc = opal_xscom_read(chip, reg, (__be64 *)__pa(&v));
+	if (rc) {
+		*value = 0xfffffffffffffffful;
+		return -EIO;
+	}
+	*value = be64_to_cpu(v);
+	return 0;
+}
+
+static int opal_scom_write(uint32_t chip, uint64_t addr, u64 reg, u64 value)
+{
+	int64_t rc;
+
+	reg = opal_scom_unmangle(addr + reg);
+	rc = opal_xscom_write(chip, reg, value);
+	if (rc)
+		return -EIO;
+	return 0;
+}
+
+struct scom_debug_entry {
+	u32 chip;
+	struct debugfs_blob_wrapper path;
+	char name[16];
+};
+
+static ssize_t scom_debug_read(struct file *filp, char __user *ubuf,
+			       size_t count, loff_t *ppos)
+{
+	struct scom_debug_entry *ent = filp->private_data;
+	u64 __user *ubuf64 = (u64 __user *)ubuf;
+	loff_t off = *ppos;
+	ssize_t done = 0;
+	u64 reg, reg_base, reg_cnt, val;
+	int rc;
+
+	if (off < 0 || (off & 7) || (count & 7))
+		return -EINVAL;
+	reg_base = off >> 3;
+	reg_cnt = count >> 3;
+
+	for (reg = 0; reg < reg_cnt; reg++) {
+		rc = opal_scom_read(ent->chip, reg_base, reg, &val);
+		if (!rc)
+			rc = put_user(val, ubuf64);
+		if (rc) {
+			if (!done)
+				done = rc;
+			break;
+		}
+		ubuf64++;
+		*ppos += 8;
+		done += 8;
+	}
+	return done;
+}
+
+static ssize_t scom_debug_write(struct file *filp, const char __user *ubuf,
+				size_t count, loff_t *ppos)
+{
+	struct scom_debug_entry *ent = filp->private_data;
+	u64 __user *ubuf64 = (u64 __user *)ubuf;
+	loff_t off = *ppos;
+	ssize_t done = 0;
+	u64 reg, reg_base, reg_cnt, val;
+	int rc;
+
+	if (off < 0 || (off & 7) || (count & 7))
+		return -EINVAL;
+	reg_base = off >> 3;
+	reg_cnt = count >> 3;
+
+	for (reg = 0; reg < reg_cnt; reg++) {
+		rc = get_user(val, ubuf64);
+		if (!rc)
+			rc = opal_scom_write(ent->chip, reg_base, reg,  val);
+		if (rc) {
+			if (!done)
+				done = rc;
+			break;
+		}
+		ubuf64++;
+		done += 8;
+	}
+	return done;
+}
+
+static const struct file_operations scom_debug_fops = {
+	.read =		scom_debug_read,
+	.write =	scom_debug_write,
+	.open =		simple_open,
+	.llseek =	default_llseek,
+};
+
+static int scom_debug_init_one(struct dentry *root, struct device_node *dn,
+			       int chip)
+{
+	struct scom_debug_entry *ent;
+	struct dentry *dir;
+
+	ent = kzalloc(sizeof(*ent), GFP_KERNEL);
+	if (!ent)
+		return -ENOMEM;
+
+	ent->chip = chip;
+	snprintf(ent->name, 16, "%08x", chip);
+	ent->path.data = (void *)kasprintf(GFP_KERNEL, "%pOF", dn);
+	if (!ent->path.data) {
+		kfree(ent);
+		return -ENOMEM;
+	}
+
+	ent->path.size = strlen((char *)ent->path.data);
+
+	dir = debugfs_create_dir(ent->name, root);
+	if (IS_ERR(dir)) {
+		kfree(ent->path.data);
+		kfree(ent);
+		return -1;
+	}
+
+	debugfs_create_blob("devspec", 0400, dir, &ent->path);
+	debugfs_create_file("access", 0600, dir, ent, &scom_debug_fops);
+
+	return 0;
+}
+
+static int scom_debug_init(void)
+{
+	struct device_node *dn;
+	struct dentry *root;
+	int chip, rc;
+
+	if (!firmware_has_feature(FW_FEATURE_OPAL))
+		return 0;
+
+	root = debugfs_create_dir("scom", arch_debugfs_dir);
+	if (IS_ERR(root))
+		return -1;
+
+	rc = 0;
+	for_each_node_with_property(dn, "scom-controller") {
+		chip = of_get_ibm_chip_id(dn);
+		WARN_ON(chip == -1);
+		rc |= scom_debug_init_one(root, dn, chip);
+	}
+
+	return rc;
+}
+device_initcall(scom_debug_init);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
new file mode 100644
index 0000000000..cdf3838f08
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -0,0 +1,1251 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL high level interfaces
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+#define pr_fmt(fmt)	"opal: " fmt
+
+#include <linux/printk.h>
+#include <linux/types.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/kobject.h>
+#include <linux/delay.h>
+#include <linux/memblock.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/kmsg_dump.h>
+#include <linux/console.h>
+#include <linux/sched/debug.h>
+
+#include <asm/machdep.h>
+#include <asm/opal.h>
+#include <asm/firmware.h>
+#include <asm/mce.h>
+#include <asm/imc-pmu.h>
+#include <asm/bug.h>
+
+#include "powernv.h"
+
+#define OPAL_MSG_QUEUE_MAX 16
+
+struct opal_msg_node {
+	struct list_head	list;
+	struct opal_msg		msg;
+};
+
+static DEFINE_SPINLOCK(msg_list_lock);
+static LIST_HEAD(msg_list);
+
+/* /sys/firmware/opal */
+struct kobject *opal_kobj;
+
+struct opal {
+	u64 base;
+	u64 entry;
+	u64 size;
+} opal;
+
+struct mcheck_recoverable_range {
+	u64 start_addr;
+	u64 end_addr;
+	u64 recover_addr;
+};
+
+static int msg_list_size;
+
+static struct mcheck_recoverable_range *mc_recoverable_range;
+static int mc_recoverable_range_len;
+
+struct device_node *opal_node;
+static DEFINE_SPINLOCK(opal_write_lock);
+static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
+static uint32_t opal_heartbeat;
+static struct task_struct *kopald_tsk;
+static struct opal_msg *opal_msg;
+static u32 opal_msg_size __ro_after_init;
+
+void __init opal_configure_cores(void)
+{
+	u64 reinit_flags = 0;
+
+	/* Do the actual re-init, This will clobber all FPRs, VRs, etc...
+	 *
+	 * It will preserve non volatile GPRs and HSPRG0/1. It will
+	 * also restore HIDs and other SPRs to their original value
+	 * but it might clobber a bunch.
+	 */
+#ifdef __BIG_ENDIAN__
+	reinit_flags |= OPAL_REINIT_CPUS_HILE_BE;
+#else
+	reinit_flags |= OPAL_REINIT_CPUS_HILE_LE;
+#endif
+
+	/*
+	 * POWER9 always support running hash:
+	 *  ie. Host hash  supports  hash guests
+	 *      Host radix supports  hash/radix guests
+	 */
+	if (early_cpu_has_feature(CPU_FTR_ARCH_300)) {
+		reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH;
+		if (early_radix_enabled())
+			reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX;
+	}
+
+	opal_reinit_cpus(reinit_flags);
+
+	/* Restore some bits */
+	if (cur_cpu_spec->cpu_restore)
+		cur_cpu_spec->cpu_restore();
+}
+
+int __init early_init_dt_scan_opal(unsigned long node,
+				   const char *uname, int depth, void *data)
+{
+	const void *basep, *entryp, *sizep;
+	int basesz, entrysz, runtimesz;
+
+	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
+		return 0;
+
+	basep  = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
+	entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
+	sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
+
+	if (!basep || !entryp || !sizep)
+		return 1;
+
+	opal.base = of_read_number(basep, basesz/4);
+	opal.entry = of_read_number(entryp, entrysz/4);
+	opal.size = of_read_number(sizep, runtimesz/4);
+
+	pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%d)\n",
+		 opal.base, basep, basesz);
+	pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
+		 opal.entry, entryp, entrysz);
+	pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
+		 opal.size, sizep, runtimesz);
+
+	if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
+		powerpc_firmware_features |= FW_FEATURE_OPAL;
+		pr_debug("OPAL detected !\n");
+	} else {
+		panic("OPAL != V3 detected, no longer supported.\n");
+	}
+
+	return 1;
+}
+
+int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
+				   const char *uname, int depth, void *data)
+{
+	int i, psize, size;
+	const __be32 *prop;
+
+	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
+		return 0;
+
+	prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize);
+
+	if (!prop)
+		return 1;
+
+	pr_debug("Found machine check recoverable ranges.\n");
+
+	/*
+	 * Calculate number of available entries.
+	 *
+	 * Each recoverable address range entry is (start address, len,
+	 * recovery address), 2 cells each for start and recovery address,
+	 * 1 cell for len, totalling 5 cells per entry.
+	 */
+	mc_recoverable_range_len = psize / (sizeof(*prop) * 5);
+
+	/* Sanity check */
+	if (!mc_recoverable_range_len)
+		return 1;
+
+	/* Size required to hold all the entries. */
+	size = mc_recoverable_range_len *
+			sizeof(struct mcheck_recoverable_range);
+
+	/*
+	 * Allocate a buffer to hold the MC recoverable ranges.
+	 */
+	mc_recoverable_range = memblock_alloc(size, __alignof__(u64));
+	if (!mc_recoverable_range)
+		panic("%s: Failed to allocate %u bytes align=0x%lx\n",
+		      __func__, size, __alignof__(u64));
+
+	for (i = 0; i < mc_recoverable_range_len; i++) {
+		mc_recoverable_range[i].start_addr =
+					of_read_number(prop + (i * 5) + 0, 2);
+		mc_recoverable_range[i].end_addr =
+					mc_recoverable_range[i].start_addr +
+					of_read_number(prop + (i * 5) + 2, 1);
+		mc_recoverable_range[i].recover_addr =
+					of_read_number(prop + (i * 5) + 3, 2);
+
+		pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
+				mc_recoverable_range[i].start_addr,
+				mc_recoverable_range[i].end_addr,
+				mc_recoverable_range[i].recover_addr);
+	}
+	return 1;
+}
+
+static int __init opal_register_exception_handlers(void)
+{
+#ifdef __BIG_ENDIAN__
+	u64 glue;
+
+	if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
+		return -ENODEV;
+
+	/* Hookup some exception handlers except machine check. We use the
+	 * fwnmi area at 0x7000 to provide the glue space to OPAL
+	 */
+	glue = 0x7000;
+
+	/*
+	 * Only ancient OPAL firmware requires this.
+	 * Specifically, firmware from FW810.00 (released June 2014)
+	 * through FW810.20 (Released October 2014).
+	 *
+	 * Check if we are running on newer (post Oct 2014) firmware that
+	 * exports the OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to
+	 * patch the HMI interrupt and we catch it directly in Linux.
+	 *
+	 * For older firmware (i.e < FW810.20), we fallback to old behavior and
+	 * let OPAL patch the HMI vector and handle it inside OPAL firmware.
+	 *
+	 * For newer firmware we catch/handle the HMI directly in Linux.
+	 */
+	if (!opal_check_token(OPAL_HANDLE_HMI)) {
+		pr_info("Old firmware detected, OPAL handles HMIs.\n");
+		opal_register_exception_handler(
+				OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
+				0, glue);
+		glue += 128;
+	}
+
+	/*
+	 * Only applicable to ancient firmware, all modern
+	 * (post March 2015/skiboot 5.0) firmware will just return
+	 * OPAL_UNSUPPORTED.
+	 */
+	opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
+#endif
+
+	return 0;
+}
+machine_early_initcall(powernv, opal_register_exception_handlers);
+
+static void queue_replay_msg(void *msg)
+{
+	struct opal_msg_node *msg_node;
+
+	if (msg_list_size < OPAL_MSG_QUEUE_MAX) {
+		msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
+		if (msg_node) {
+			INIT_LIST_HEAD(&msg_node->list);
+			memcpy(&msg_node->msg, msg, sizeof(struct opal_msg));
+			list_add_tail(&msg_node->list, &msg_list);
+			msg_list_size++;
+		} else
+			pr_warn_once("message queue no memory\n");
+
+		if (msg_list_size >= OPAL_MSG_QUEUE_MAX)
+			pr_warn_once("message queue full\n");
+	}
+}
+
+static void dequeue_replay_msg(enum opal_msg_type msg_type)
+{
+	struct opal_msg_node *msg_node, *tmp;
+
+	list_for_each_entry_safe(msg_node, tmp, &msg_list, list) {
+		if (be32_to_cpu(msg_node->msg.msg_type) != msg_type)
+			continue;
+
+		atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
+					msg_type,
+					&msg_node->msg);
+
+		list_del(&msg_node->list);
+		kfree(msg_node);
+		msg_list_size--;
+	}
+}
+
+/*
+ * Opal message notifier based on message type. Allow subscribers to get
+ * notified for specific messgae type.
+ */
+int opal_message_notifier_register(enum opal_msg_type msg_type,
+					struct notifier_block *nb)
+{
+	int ret;
+	unsigned long flags;
+
+	if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
+		pr_warn("%s: Invalid arguments, msg_type:%d\n",
+			__func__, msg_type);
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&msg_list_lock, flags);
+	ret = atomic_notifier_chain_register(
+		&opal_msg_notifier_head[msg_type], nb);
+
+	/*
+	 * If the registration succeeded, replay any queued messages that came
+	 * in prior to the notifier chain registration. msg_list_lock held here
+	 * to ensure they're delivered prior to any subsequent messages.
+	 */
+	if (ret == 0)
+		dequeue_replay_msg(msg_type);
+
+	spin_unlock_irqrestore(&msg_list_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(opal_message_notifier_register);
+
+int opal_message_notifier_unregister(enum opal_msg_type msg_type,
+				     struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(
+			&opal_msg_notifier_head[msg_type], nb);
+}
+EXPORT_SYMBOL_GPL(opal_message_notifier_unregister);
+
+static void opal_message_do_notify(uint32_t msg_type, void *msg)
+{
+	unsigned long flags;
+	bool queued = false;
+
+	spin_lock_irqsave(&msg_list_lock, flags);
+	if (opal_msg_notifier_head[msg_type].head == NULL) {
+		/*
+		 * Queue up the msg since no notifiers have registered
+		 * yet for this msg_type.
+		 */
+		queue_replay_msg(msg);
+		queued = true;
+	}
+	spin_unlock_irqrestore(&msg_list_lock, flags);
+
+	if (queued)
+		return;
+
+	/* notify subscribers */
+	atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
+					msg_type, msg);
+}
+
+static void opal_handle_message(void)
+{
+	s64 ret;
+	u32 type;
+
+	ret = opal_get_msg(__pa(opal_msg), opal_msg_size);
+	/* No opal message pending. */
+	if (ret == OPAL_RESOURCE)
+		return;
+
+	/* check for errors. */
+	if (ret) {
+		pr_warn("%s: Failed to retrieve opal message, err=%lld\n",
+			__func__, ret);
+		return;
+	}
+
+	type = be32_to_cpu(opal_msg->msg_type);
+
+	/* Sanity check */
+	if (type >= OPAL_MSG_TYPE_MAX) {
+		pr_warn_once("%s: Unknown message type: %u\n", __func__, type);
+		return;
+	}
+	opal_message_do_notify(type, (void *)opal_msg);
+}
+
+static irqreturn_t opal_message_notify(int irq, void *data)
+{
+	opal_handle_message();
+	return IRQ_HANDLED;
+}
+
+static int __init opal_message_init(struct device_node *opal_node)
+{
+	int ret, i, irq;
+
+	ret = of_property_read_u32(opal_node, "opal-msg-size", &opal_msg_size);
+	if (ret) {
+		pr_notice("Failed to read opal-msg-size property\n");
+		opal_msg_size = sizeof(struct opal_msg);
+	}
+
+	opal_msg = kmalloc(opal_msg_size, GFP_KERNEL);
+	if (!opal_msg) {
+		opal_msg_size = sizeof(struct opal_msg);
+		/* Try to allocate fixed message size */
+		opal_msg = kmalloc(opal_msg_size, GFP_KERNEL);
+		BUG_ON(opal_msg == NULL);
+	}
+
+	for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
+		ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
+
+	irq = opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING));
+	if (!irq) {
+		pr_err("%s: Can't register OPAL event irq (%d)\n",
+		       __func__, irq);
+		return irq;
+	}
+
+	ret = request_irq(irq, opal_message_notify,
+			IRQ_TYPE_LEVEL_HIGH, "opal-msg", NULL);
+	if (ret) {
+		pr_err("%s: Can't request OPAL event irq (%d)\n",
+		       __func__, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int opal_get_chars(uint32_t vtermno, char *buf, int count)
+{
+	s64 rc;
+	__be64 evt, len;
+
+	if (!opal.entry)
+		return -ENODEV;
+	opal_poll_events(&evt);
+	if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
+		return 0;
+	len = cpu_to_be64(count);
+	rc = opal_console_read(vtermno, &len, buf);
+	if (rc == OPAL_SUCCESS)
+		return be64_to_cpu(len);
+	return 0;
+}
+
+static int __opal_put_chars(uint32_t vtermno, const char *data, int total_len, bool atomic)
+{
+	unsigned long flags = 0 /* shut up gcc */;
+	int written;
+	__be64 olen;
+	s64 rc;
+
+	if (!opal.entry)
+		return -ENODEV;
+
+	if (atomic)
+		spin_lock_irqsave(&opal_write_lock, flags);
+	rc = opal_console_write_buffer_space(vtermno, &olen);
+	if (rc || be64_to_cpu(olen) < total_len) {
+		/* Closed -> drop characters */
+		if (rc)
+			written = total_len;
+		else
+			written = -EAGAIN;
+		goto out;
+	}
+
+	/* Should not get a partial write here because space is available. */
+	olen = cpu_to_be64(total_len);
+	rc = opal_console_write(vtermno, &olen, data);
+	if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+		if (rc == OPAL_BUSY_EVENT)
+			opal_poll_events(NULL);
+		written = -EAGAIN;
+		goto out;
+	}
+
+	/* Closed or other error drop */
+	if (rc != OPAL_SUCCESS) {
+		written = opal_error_code(rc);
+		goto out;
+	}
+
+	written = be64_to_cpu(olen);
+	if (written < total_len) {
+		if (atomic) {
+			/* Should not happen */
+			pr_warn("atomic console write returned partial "
+				"len=%d written=%d\n", total_len, written);
+		}
+		if (!written)
+			written = -EAGAIN;
+	}
+
+out:
+	if (atomic)
+		spin_unlock_irqrestore(&opal_write_lock, flags);
+
+	return written;
+}
+
+int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
+{
+	return __opal_put_chars(vtermno, data, total_len, false);
+}
+
+/*
+ * opal_put_chars_atomic will not perform partial-writes. Data will be
+ * atomically written to the terminal or not at all. This is not strictly
+ * true at the moment because console space can race with OPAL's console
+ * writes.
+ */
+int opal_put_chars_atomic(uint32_t vtermno, const char *data, int total_len)
+{
+	return __opal_put_chars(vtermno, data, total_len, true);
+}
+
+static s64 __opal_flush_console(uint32_t vtermno)
+{
+	s64 rc;
+
+	if (!opal_check_token(OPAL_CONSOLE_FLUSH)) {
+		__be64 evt;
+
+		/*
+		 * If OPAL_CONSOLE_FLUSH is not implemented in the firmware,
+		 * the console can still be flushed by calling the polling
+		 * function while it has OPAL_EVENT_CONSOLE_OUTPUT events.
+		 */
+		WARN_ONCE(1, "opal: OPAL_CONSOLE_FLUSH missing.\n");
+
+		opal_poll_events(&evt);
+		if (!(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT))
+			return OPAL_SUCCESS;
+		return OPAL_BUSY;
+
+	} else {
+		rc = opal_console_flush(vtermno);
+		if (rc == OPAL_BUSY_EVENT) {
+			opal_poll_events(NULL);
+			rc = OPAL_BUSY;
+		}
+		return rc;
+	}
+
+}
+
+/*
+ * opal_flush_console spins until the console is flushed
+ */
+int opal_flush_console(uint32_t vtermno)
+{
+	for (;;) {
+		s64 rc = __opal_flush_console(vtermno);
+
+		if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) {
+			mdelay(1);
+			continue;
+		}
+
+		return opal_error_code(rc);
+	}
+}
+
+/*
+ * opal_flush_chars is an hvc interface that sleeps until the console is
+ * flushed if wait, otherwise it will return -EBUSY if the console has data,
+ * -EAGAIN if it has data and some of it was flushed.
+ */
+int opal_flush_chars(uint32_t vtermno, bool wait)
+{
+	for (;;) {
+		s64 rc = __opal_flush_console(vtermno);
+
+		if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) {
+			if (wait) {
+				msleep(OPAL_BUSY_DELAY_MS);
+				continue;
+			}
+			if (rc == OPAL_PARTIAL)
+				return -EAGAIN;
+		}
+
+		return opal_error_code(rc);
+	}
+}
+
+static int opal_recover_mce(struct pt_regs *regs,
+					struct machine_check_event *evt)
+{
+	int recovered = 0;
+
+	if (regs_is_unrecoverable(regs)) {
+		/* If MSR_RI isn't set, we cannot recover */
+		pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
+		recovered = 0;
+	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
+		/* Platform corrected itself */
+		recovered = 1;
+	} else if (evt->severity == MCE_SEV_FATAL) {
+		/* Fatal machine check */
+		pr_err("Machine check interrupt is fatal\n");
+		recovered = 0;
+	}
+
+	if (!recovered && evt->sync_error) {
+		/*
+		 * Try to kill processes if we get a synchronous machine check
+		 * (e.g., one caused by execution of this instruction). This
+		 * will devolve into a panic if we try to kill init or are in
+		 * an interrupt etc.
+		 *
+		 * TODO: Queue up this address for hwpoisioning later.
+		 * TODO: This is not quite right for d-side machine
+		 *       checks ->nip is not necessarily the important
+		 *       address.
+		 */
+		if ((user_mode(regs))) {
+			_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+			recovered = 1;
+		} else if (die_will_crash()) {
+			/*
+			 * die() would kill the kernel, so better to go via
+			 * the platform reboot code that will log the
+			 * machine check.
+			 */
+			recovered = 0;
+		} else {
+			die_mce("Machine check", regs, SIGBUS);
+			recovered = 1;
+		}
+	}
+
+	return recovered;
+}
+
+void __noreturn pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
+{
+	panic_flush_kmsg_start();
+
+	pr_emerg("Hardware platform error: %s\n", msg);
+	if (regs)
+		show_regs(regs);
+	smp_send_stop();
+
+	panic_flush_kmsg_end();
+
+	/*
+	 * Don't bother to shut things down because this will
+	 * xstop the system.
+	 */
+	if (opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, msg)
+						== OPAL_UNSUPPORTED) {
+		pr_emerg("Reboot type %d not supported for %s\n",
+				OPAL_REBOOT_PLATFORM_ERROR, msg);
+	}
+
+	/*
+	 * We reached here. There can be three possibilities:
+	 * 1. We are running on a firmware level that do not support
+	 *    opal_cec_reboot2()
+	 * 2. We are running on a firmware level that do not support
+	 *    OPAL_REBOOT_PLATFORM_ERROR reboot type.
+	 * 3. We are running on FSP based system that does not need
+	 *    opal to trigger checkstop explicitly for error analysis.
+	 *    The FSP PRD component would have already got notified
+	 *    about this error through other channels.
+	 * 4. We are running on a newer skiboot that by default does
+	 *    not cause a checkstop, drops us back to the kernel to
+	 *    extract context and state at the time of the error.
+	 */
+
+	panic(msg);
+}
+
+int opal_machine_check(struct pt_regs *regs)
+{
+	struct machine_check_event evt;
+
+	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+		return 0;
+
+	/* Print things out */
+	if (evt.version != MCE_V1) {
+		pr_err("Machine Check Exception, Unknown event version %d !\n",
+		       evt.version);
+		return 0;
+	}
+	machine_check_print_event_info(&evt, user_mode(regs), false);
+
+	if (opal_recover_mce(regs, &evt))
+		return 1;
+
+	pnv_platform_error_reboot(regs, "Unrecoverable Machine Check exception");
+}
+
+/* Early hmi handler called in real mode. */
+int opal_hmi_exception_early(struct pt_regs *regs)
+{
+	s64 rc;
+
+	/*
+	 * call opal hmi handler. Pass paca address as token.
+	 * The return value OPAL_SUCCESS is an indication that there is
+	 * an HMI event generated waiting to pull by Linux.
+	 */
+	rc = opal_handle_hmi();
+	if (rc == OPAL_SUCCESS) {
+		local_paca->hmi_event_available = 1;
+		return 1;
+	}
+	return 0;
+}
+
+int opal_hmi_exception_early2(struct pt_regs *regs)
+{
+	s64 rc;
+	__be64 out_flags;
+
+	/*
+	 * call opal hmi handler.
+	 * Check 64-bit flag mask to find out if an event was generated,
+	 * and whether TB is still valid or not etc.
+	 */
+	rc = opal_handle_hmi2(&out_flags);
+	if (rc != OPAL_SUCCESS)
+		return 0;
+
+	if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_NEW_EVENT)
+		local_paca->hmi_event_available = 1;
+	if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_TOD_TB_FAIL)
+		tb_invalid = true;
+	return 1;
+}
+
+/* HMI exception handler called in virtual mode when irqs are next enabled. */
+int opal_handle_hmi_exception(struct pt_regs *regs)
+{
+	/*
+	 * Check if HMI event is available.
+	 * if Yes, then wake kopald to process them.
+	 */
+	if (!local_paca->hmi_event_available)
+		return 0;
+
+	local_paca->hmi_event_available = 0;
+	opal_wake_poller();
+
+	return 1;
+}
+
+static uint64_t find_recovery_address(uint64_t nip)
+{
+	int i;
+
+	for (i = 0; i < mc_recoverable_range_len; i++)
+		if ((nip >= mc_recoverable_range[i].start_addr) &&
+		    (nip < mc_recoverable_range[i].end_addr))
+		    return mc_recoverable_range[i].recover_addr;
+	return 0;
+}
+
+bool opal_mce_check_early_recovery(struct pt_regs *regs)
+{
+	uint64_t recover_addr = 0;
+
+	if (!opal.base || !opal.size)
+		goto out;
+
+	if ((regs->nip >= opal.base) &&
+			(regs->nip < (opal.base + opal.size)))
+		recover_addr = find_recovery_address(regs->nip);
+
+	/*
+	 * Setup regs->nip to rfi into fixup address.
+	 */
+	if (recover_addr)
+		regs_set_return_ip(regs, recover_addr);
+
+out:
+	return !!recover_addr;
+}
+
+static int __init opal_sysfs_init(void)
+{
+	opal_kobj = kobject_create_and_add("opal", firmware_kobj);
+	if (!opal_kobj) {
+		pr_warn("kobject_create_and_add opal failed\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static ssize_t export_attr_read(struct file *fp, struct kobject *kobj,
+				struct bin_attribute *bin_attr, char *buf,
+				loff_t off, size_t count)
+{
+	return memory_read_from_buffer(buf, count, &off, bin_attr->private,
+				       bin_attr->size);
+}
+
+static int opal_add_one_export(struct kobject *parent, const char *export_name,
+			       struct device_node *np, const char *prop_name)
+{
+	struct bin_attribute *attr = NULL;
+	const char *name = NULL;
+	u64 vals[2];
+	int rc;
+
+	rc = of_property_read_u64_array(np, prop_name, &vals[0], 2);
+	if (rc)
+		goto out;
+
+	attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+	if (!attr) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	name = kstrdup(export_name, GFP_KERNEL);
+	if (!name) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	sysfs_bin_attr_init(attr);
+	attr->attr.name = name;
+	attr->attr.mode = 0400;
+	attr->read = export_attr_read;
+	attr->private = __va(vals[0]);
+	attr->size = vals[1];
+
+	rc = sysfs_create_bin_file(parent, attr);
+out:
+	if (rc) {
+		kfree(name);
+		kfree(attr);
+	}
+
+	return rc;
+}
+
+static void opal_add_exported_attrs(struct device_node *np,
+				    struct kobject *kobj)
+{
+	struct device_node *child;
+	struct property *prop;
+
+	for_each_property_of_node(np, prop) {
+		int rc;
+
+		if (!strcmp(prop->name, "name") ||
+		    !strcmp(prop->name, "phandle"))
+			continue;
+
+		rc = opal_add_one_export(kobj, prop->name, np, prop->name);
+		if (rc) {
+			pr_warn("Unable to add export %pOF/%s, rc = %d!\n",
+				np, prop->name, rc);
+		}
+	}
+
+	for_each_child_of_node(np, child) {
+		struct kobject *child_kobj;
+
+		child_kobj = kobject_create_and_add(child->name, kobj);
+		if (!child_kobj) {
+			pr_err("Unable to create export dir for %pOF\n", child);
+			continue;
+		}
+
+		opal_add_exported_attrs(child, child_kobj);
+	}
+}
+
+/*
+ * opal_export_attrs: creates a sysfs node for each property listed in
+ * the device-tree under /ibm,opal/firmware/exports/
+ * All new sysfs nodes are created under /opal/exports/.
+ * This allows for reserved memory regions (e.g. HDAT) to be read.
+ * The new sysfs nodes are only readable by root.
+ */
+static void opal_export_attrs(void)
+{
+	struct device_node *np;
+	struct kobject *kobj;
+	int rc;
+
+	np = of_find_node_by_path("/ibm,opal/firmware/exports");
+	if (!np)
+		return;
+
+	/* Create new 'exports' directory - /sys/firmware/opal/exports */
+	kobj = kobject_create_and_add("exports", opal_kobj);
+	if (!kobj) {
+		pr_warn("kobject_create_and_add() of exports failed\n");
+		of_node_put(np);
+		return;
+	}
+
+	opal_add_exported_attrs(np, kobj);
+
+	/*
+	 * NB: symbol_map existed before the generic export interface so it
+	 * lives under the top level opal_kobj.
+	 */
+	rc = opal_add_one_export(opal_kobj, "symbol_map",
+				 np->parent, "symbol-map");
+	if (rc)
+		pr_warn("Error %d creating OPAL symbols file\n", rc);
+
+	of_node_put(np);
+}
+
+static void __init opal_dump_region_init(void)
+{
+	void *addr;
+	uint64_t size;
+	int rc;
+
+	if (!opal_check_token(OPAL_REGISTER_DUMP_REGION))
+		return;
+
+	/* Register kernel log buffer */
+	addr = log_buf_addr_get();
+	if (addr == NULL)
+		return;
+
+	size = log_buf_len_get();
+	if (size == 0)
+		return;
+
+	rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
+				       __pa(addr), size);
+	/* Don't warn if this is just an older OPAL that doesn't
+	 * know about that call
+	 */
+	if (rc && rc != OPAL_UNSUPPORTED)
+		pr_warn("DUMP: Failed to register kernel log buffer. "
+			"rc = %d\n", rc);
+}
+
+static void __init opal_pdev_init(const char *compatible)
+{
+	struct device_node *np;
+
+	for_each_compatible_node(np, NULL, compatible)
+		of_platform_device_create(np, NULL, NULL);
+}
+
+static void __init opal_imc_init_dev(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT);
+	if (np)
+		of_platform_device_create(np, NULL, NULL);
+
+	of_node_put(np);
+}
+
+static int kopald(void *unused)
+{
+	unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1;
+
+	set_freezable();
+	do {
+		try_to_freeze();
+
+		opal_handle_events();
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (opal_have_pending_events())
+			__set_current_state(TASK_RUNNING);
+		else
+			schedule_timeout(timeout);
+
+	} while (!kthread_should_stop());
+
+	return 0;
+}
+
+void opal_wake_poller(void)
+{
+	if (kopald_tsk)
+		wake_up_process(kopald_tsk);
+}
+
+static void __init opal_init_heartbeat(void)
+{
+	/* Old firwmware, we assume the HVC heartbeat is sufficient */
+	if (of_property_read_u32(opal_node, "ibm,heartbeat-ms",
+				 &opal_heartbeat) != 0)
+		opal_heartbeat = 0;
+
+	if (opal_heartbeat)
+		kopald_tsk = kthread_run(kopald, NULL, "kopald");
+}
+
+static int __init opal_init(void)
+{
+	struct device_node *np, *consoles, *leds;
+	int rc;
+
+	opal_node = of_find_node_by_path("/ibm,opal");
+	if (!opal_node) {
+		pr_warn("Device node not found\n");
+		return -ENODEV;
+	}
+
+	/* Register OPAL consoles if any ports */
+	consoles = of_find_node_by_path("/ibm,opal/consoles");
+	if (consoles) {
+		for_each_child_of_node(consoles, np) {
+			if (!of_node_name_eq(np, "serial"))
+				continue;
+			of_platform_device_create(np, NULL, NULL);
+		}
+		of_node_put(consoles);
+	}
+
+	/* Initialise OPAL messaging system */
+	opal_message_init(opal_node);
+
+	/* Initialise OPAL asynchronous completion interface */
+	opal_async_comp_init();
+
+	/* Initialise OPAL sensor interface */
+	opal_sensor_init();
+
+	/* Initialise OPAL hypervisor maintainence interrupt handling */
+	opal_hmi_handler_init();
+
+	/* Create i2c platform devices */
+	opal_pdev_init("ibm,opal-i2c");
+
+	/* Handle non-volatile memory devices */
+	opal_pdev_init("pmem-region");
+
+	/* Setup a heatbeat thread if requested by OPAL */
+	opal_init_heartbeat();
+
+	/* Detect In-Memory Collection counters and create devices*/
+	opal_imc_init_dev();
+
+	/* Create leds platform devices */
+	leds = of_find_node_by_path("/ibm,opal/leds");
+	if (leds) {
+		of_platform_device_create(leds, "opal_leds", NULL);
+		of_node_put(leds);
+	}
+
+	/* Initialise OPAL message log interface */
+	opal_msglog_init();
+
+	/* Create "opal" kobject under /sys/firmware */
+	rc = opal_sysfs_init();
+	if (rc == 0) {
+		/* Setup dump region interface */
+		opal_dump_region_init();
+		/* Setup error log interface */
+		rc = opal_elog_init();
+		/* Setup code update interface */
+		opal_flash_update_init();
+		/* Setup platform dump extract interface */
+		opal_platform_dump_init();
+		/* Setup system parameters interface */
+		opal_sys_param_init();
+		/* Setup message log sysfs interface. */
+		opal_msglog_sysfs_init();
+		/* Add all export properties*/
+		opal_export_attrs();
+	}
+
+	/* Initialize platform devices: IPMI backend, PRD & flash interface */
+	opal_pdev_init("ibm,opal-ipmi");
+	opal_pdev_init("ibm,opal-flash");
+	opal_pdev_init("ibm,opal-prd");
+
+	/* Initialise platform device: oppanel interface */
+	opal_pdev_init("ibm,opal-oppanel");
+
+	/* Initialise OPAL kmsg dumper for flushing console on panic */
+	opal_kmsg_init();
+
+	/* Initialise OPAL powercap interface */
+	opal_powercap_init();
+
+	/* Initialise OPAL Power-Shifting-Ratio interface */
+	opal_psr_init();
+
+	/* Initialise OPAL sensor groups */
+	opal_sensor_groups_init();
+
+	/* Initialise OPAL Power control interface */
+	opal_power_control_init();
+
+	/* Initialize OPAL secure variables */
+	opal_pdev_init("ibm,secvar-backend");
+
+	return 0;
+}
+machine_subsys_initcall(powernv, opal_init);
+
+void opal_shutdown(void)
+{
+	long rc = OPAL_BUSY;
+
+	opal_event_shutdown();
+
+	/*
+	 * Then sync with OPAL which ensure anything that can
+	 * potentially write to our memory has completed such
+	 * as an ongoing dump retrieval
+	 */
+	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+		rc = opal_sync_host_reboot();
+		if (rc == OPAL_BUSY)
+			opal_poll_events(NULL);
+		else
+			mdelay(10);
+	}
+
+	/* Unregister memory dump region */
+	if (opal_check_token(OPAL_UNREGISTER_DUMP_REGION))
+		opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
+}
+
+/* Export this so that test modules can use it */
+EXPORT_SYMBOL_GPL(opal_invalid_call);
+EXPORT_SYMBOL_GPL(opal_xscom_read);
+EXPORT_SYMBOL_GPL(opal_xscom_write);
+EXPORT_SYMBOL_GPL(opal_ipmi_send);
+EXPORT_SYMBOL_GPL(opal_ipmi_recv);
+EXPORT_SYMBOL_GPL(opal_flash_read);
+EXPORT_SYMBOL_GPL(opal_flash_write);
+EXPORT_SYMBOL_GPL(opal_flash_erase);
+EXPORT_SYMBOL_GPL(opal_prd_msg);
+EXPORT_SYMBOL_GPL(opal_check_token);
+
+/* Convert a region of vmalloc memory to an opal sg list */
+struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
+					     unsigned long vmalloc_size)
+{
+	struct opal_sg_list *sg, *first = NULL;
+	unsigned long i = 0;
+
+	sg = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!sg)
+		goto nomem;
+
+	first = sg;
+
+	while (vmalloc_size > 0) {
+		uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT;
+		uint64_t length = min(vmalloc_size, PAGE_SIZE);
+
+		sg->entry[i].data = cpu_to_be64(data);
+		sg->entry[i].length = cpu_to_be64(length);
+		i++;
+
+		if (i >= SG_ENTRIES_PER_NODE) {
+			struct opal_sg_list *next;
+
+			next = kzalloc(PAGE_SIZE, GFP_KERNEL);
+			if (!next)
+				goto nomem;
+
+			sg->length = cpu_to_be64(
+					i * sizeof(struct opal_sg_entry) + 16);
+			i = 0;
+			sg->next = cpu_to_be64(__pa(next));
+			sg = next;
+		}
+
+		vmalloc_addr += length;
+		vmalloc_size -= length;
+	}
+
+	sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16);
+
+	return first;
+
+nomem:
+	pr_err("%s : Failed to allocate memory\n", __func__);
+	opal_free_sg_list(first);
+	return NULL;
+}
+
+void opal_free_sg_list(struct opal_sg_list *sg)
+{
+	while (sg) {
+		uint64_t next = be64_to_cpu(sg->next);
+
+		kfree(sg);
+
+		if (next)
+			sg = __va(next);
+		else
+			sg = NULL;
+	}
+}
+
+int opal_error_code(int rc)
+{
+	switch (rc) {
+	case OPAL_SUCCESS:		return 0;
+
+	case OPAL_PARAMETER:		return -EINVAL;
+	case OPAL_ASYNC_COMPLETION:	return -EINPROGRESS;
+	case OPAL_BUSY:
+	case OPAL_BUSY_EVENT:		return -EBUSY;
+	case OPAL_NO_MEM:		return -ENOMEM;
+	case OPAL_PERMISSION:		return -EPERM;
+
+	case OPAL_UNSUPPORTED:		return -EIO;
+	case OPAL_HARDWARE:		return -EIO;
+	case OPAL_INTERNAL_ERROR:	return -EIO;
+	case OPAL_TIMEOUT:		return -ETIMEDOUT;
+	default:
+		pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
+		return -EIO;
+	}
+}
+
+void powernv_set_nmmu_ptcr(unsigned long ptcr)
+{
+	int rc;
+
+	if (firmware_has_feature(FW_FEATURE_OPAL)) {
+		rc = opal_nmmu_set_ptcr(-1UL, ptcr);
+		if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED)
+			pr_warn("%s: Unable to set nest mmu ptcr\n", __func__);
+	}
+}
+
+EXPORT_SYMBOL_GPL(opal_poll_events);
+EXPORT_SYMBOL_GPL(opal_rtc_read);
+EXPORT_SYMBOL_GPL(opal_rtc_write);
+EXPORT_SYMBOL_GPL(opal_tpo_read);
+EXPORT_SYMBOL_GPL(opal_tpo_write);
+EXPORT_SYMBOL_GPL(opal_i2c_request);
+/* Export these symbols for PowerNV LED class driver */
+EXPORT_SYMBOL_GPL(opal_leds_get_ind);
+EXPORT_SYMBOL_GPL(opal_leds_set_ind);
+/* Export this symbol for PowerNV Operator Panel class driver */
+EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
+/* Export this for KVM */
+EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
+EXPORT_SYMBOL_GPL(opal_int_eoi);
+EXPORT_SYMBOL_GPL(opal_error_code);
+/* Export the below symbol for NX compression */
+EXPORT_SYMBOL(opal_nx_coproc_init);
diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c
new file mode 100644
index 0000000000..7e419de71d
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci-cxl.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2014-2016 IBM Corp.
+ */
+
+#include <linux/module.h>
+#include <misc/cxl-base.h>
+#include <asm/pnv-pci.h>
+#include <asm/opal.h>
+
+#include "pci.h"
+
+int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	struct pnv_ioda_pe *pe;
+	int rc;
+
+	pe = pnv_ioda_get_pe(dev);
+	if (!pe)
+		return -ENODEV;
+
+	pe_info(pe, "Switching PHB to CXL\n");
+
+	rc = opal_pci_set_phb_cxl_mode(phb->opal_id, mode, pe->pe_number);
+	if (rc == OPAL_UNSUPPORTED)
+		dev_err(&dev->dev, "Required cxl mode not supported by firmware - update skiboot\n");
+	else if (rc)
+		dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc);
+
+	return rc;
+}
+EXPORT_SYMBOL(pnv_phb_to_cxl_mode);
+
+/* Find PHB for cxl dev and allocate MSI hwirqs?
+ * Returns the absolute hardware IRQ number
+ */
+int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	int hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, num);
+
+	if (hwirq < 0) {
+		dev_warn(&dev->dev, "Failed to find a free MSI\n");
+		return -ENOSPC;
+	}
+
+	return phb->msi_base + hwirq;
+}
+EXPORT_SYMBOL(pnv_cxl_alloc_hwirqs);
+
+void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+
+	msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, num);
+}
+EXPORT_SYMBOL(pnv_cxl_release_hwirqs);
+
+void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs,
+				  struct pci_dev *dev)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	int i, hwirq;
+
+	for (i = 1; i < CXL_IRQ_RANGES; i++) {
+		if (!irqs->range[i])
+			continue;
+		pr_devel("cxl release irq range 0x%x: offset: 0x%lx  limit: %ld\n",
+			 i, irqs->offset[i],
+			 irqs->range[i]);
+		hwirq = irqs->offset[i] - phb->msi_base;
+		msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq,
+				       irqs->range[i]);
+	}
+}
+EXPORT_SYMBOL(pnv_cxl_release_hwirq_ranges);
+
+int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs,
+			       struct pci_dev *dev, int num)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	int i, hwirq, try;
+
+	memset(irqs, 0, sizeof(struct cxl_irq_ranges));
+
+	/* 0 is reserved for the multiplexed PSL DSI interrupt */
+	for (i = 1; i < CXL_IRQ_RANGES && num; i++) {
+		try = num;
+		while (try) {
+			hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, try);
+			if (hwirq >= 0)
+				break;
+			try /= 2;
+		}
+		if (!try)
+			goto fail;
+
+		irqs->offset[i] = phb->msi_base + hwirq;
+		irqs->range[i] = try;
+		pr_devel("cxl alloc irq range 0x%x: offset: 0x%lx  limit: %li\n",
+			 i, irqs->offset[i], irqs->range[i]);
+		num -= try;
+	}
+	if (num)
+		goto fail;
+
+	return 0;
+fail:
+	pnv_cxl_release_hwirq_ranges(irqs, dev);
+	return -ENOSPC;
+}
+EXPORT_SYMBOL(pnv_cxl_alloc_hwirq_ranges);
+
+int pnv_cxl_get_irq_count(struct pci_dev *dev)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+
+	return phb->msi_bmp.irq_count;
+}
+EXPORT_SYMBOL(pnv_cxl_get_irq_count);
+
+int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
+			   unsigned int virq)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	unsigned int xive_num = hwirq - phb->msi_base;
+	struct pnv_ioda_pe *pe;
+	int rc;
+
+	if (!(pe = pnv_ioda_get_pe(dev)))
+		return -ENODEV;
+
+	/* Assign XIVE to PE */
+	rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
+	if (rc) {
+		pe_warn(pe, "%s: OPAL error %d setting msi_base 0x%x "
+			"hwirq 0x%x XIVE 0x%x PE\n",
+			pci_name(dev), rc, phb->msi_base, hwirq, xive_num);
+		return -EIO;
+	}
+	pnv_set_msi_irq_chip(phb, virq);
+
+	return 0;
+}
+EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
new file mode 100644
index 0000000000..e96324502d
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * TCE helpers for IODA PCI/PCIe on PowerNV platforms
+ *
+ * Copyright 2018 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/iommu.h>
+
+#include <asm/iommu.h>
+#include <asm/tce.h>
+#include "pci.h"
+
+unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
+{
+	struct pci_controller *hose = phb->hose;
+	struct device_node *dn = hose->dn;
+	unsigned long mask = 0;
+	int i, rc, count;
+	u32 val;
+
+	count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes");
+	if (count <= 0) {
+		mask = SZ_4K | SZ_64K;
+		/* Add 16M for POWER8 by default */
+		if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+				!cpu_has_feature(CPU_FTR_ARCH_300))
+			mask |= SZ_16M | SZ_256M;
+		return mask;
+	}
+
+	for (i = 0; i < count; i++) {
+		rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes",
+						i, &val);
+		if (rc == 0)
+			mask |= 1ULL << val;
+	}
+
+	return mask;
+}
+
+void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
+		void *tce_mem, u64 tce_size,
+		u64 dma_offset, unsigned int page_shift)
+{
+	tbl->it_blocksize = 16;
+	tbl->it_base = (unsigned long)tce_mem;
+	tbl->it_page_shift = page_shift;
+	tbl->it_offset = dma_offset >> tbl->it_page_shift;
+	tbl->it_index = 0;
+	tbl->it_size = tce_size >> 3;
+	tbl->it_busno = 0;
+	tbl->it_type = TCE_PCI;
+}
+
+static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
+{
+	struct page *tce_mem = NULL;
+	__be64 *addr;
+
+	tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN,
+			shift - PAGE_SHIFT);
+	if (!tce_mem) {
+		pr_err("Failed to allocate a TCE memory, level shift=%d\n",
+				shift);
+		return NULL;
+	}
+	addr = page_address(tce_mem);
+	memset(addr, 0, 1UL << shift);
+
+	return addr;
+}
+
+static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+		unsigned long size, unsigned int levels);
+
+static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
+{
+	__be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
+	int  level = tbl->it_indirect_levels;
+	const long shift = ilog2(tbl->it_level_size);
+	unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
+
+	while (level) {
+		int n = (idx & mask) >> (level * shift);
+		unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n]));
+
+		if (!tce) {
+			__be64 *tmp2;
+
+			if (!alloc)
+				return NULL;
+
+			tmp2 = pnv_alloc_tce_level(tbl->it_nid,
+					ilog2(tbl->it_level_size) + 3);
+			if (!tmp2)
+				return NULL;
+
+			tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE;
+			oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0,
+					cpu_to_be64(tce)));
+			if (oldtce) {
+				pnv_pci_ioda2_table_do_free_pages(tmp2,
+					ilog2(tbl->it_level_size) + 3, 1);
+				tce = oldtce;
+			}
+		}
+
+		tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
+		idx &= ~mask;
+		mask >>= shift;
+		--level;
+	}
+
+	return tmp + idx;
+}
+
+int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+		unsigned long uaddr, enum dma_data_direction direction,
+		unsigned long attrs)
+{
+	u64 proto_tce = iommu_direction_to_tce_perm(direction);
+	u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
+	long i;
+
+	if (proto_tce & TCE_PCI_WRITE)
+		proto_tce |= TCE_PCI_READ;
+
+	for (i = 0; i < npages; i++) {
+		unsigned long newtce = proto_tce |
+			((rpn + i) << tbl->it_page_shift);
+		unsigned long idx = index - tbl->it_offset + i;
+
+		*(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce);
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_IOMMU_API
+int pnv_tce_xchg(struct iommu_table *tbl, long index,
+		unsigned long *hpa, enum dma_data_direction *direction)
+{
+	u64 proto_tce = iommu_direction_to_tce_perm(*direction);
+	unsigned long newtce = *hpa | proto_tce, oldtce;
+	unsigned long idx = index - tbl->it_offset;
+	__be64 *ptce = NULL;
+
+	BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
+
+	if (*direction == DMA_NONE) {
+		ptce = pnv_tce(tbl, false, idx, false);
+		if (!ptce) {
+			*hpa = 0;
+			return 0;
+		}
+	}
+
+	if (!ptce) {
+		ptce = pnv_tce(tbl, false, idx, true);
+		if (!ptce)
+			return -ENOMEM;
+	}
+
+	if (newtce & TCE_PCI_WRITE)
+		newtce |= TCE_PCI_READ;
+
+	oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce)));
+	*hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+	*direction = iommu_tce_direction(oldtce);
+
+	return 0;
+}
+
+__be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc)
+{
+	if (WARN_ON_ONCE(!tbl->it_userspace))
+		return NULL;
+
+	return pnv_tce(tbl, true, index - tbl->it_offset, alloc);
+}
+#endif
+
+void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
+{
+	long i;
+
+	for (i = 0; i < npages; i++) {
+		unsigned long idx = index - tbl->it_offset + i;
+		__be64 *ptce = pnv_tce(tbl, false, idx,	false);
+
+		if (ptce)
+			*ptce = cpu_to_be64(0);
+		else
+			/* Skip the rest of the level */
+			i |= tbl->it_level_size - 1;
+	}
+}
+
+unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
+{
+	__be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false);
+
+	if (!ptce)
+		return 0;
+
+	return be64_to_cpu(*ptce);
+}
+
+static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+		unsigned long size, unsigned int levels)
+{
+	const unsigned long addr_ul = (unsigned long) addr &
+			~(TCE_PCI_READ | TCE_PCI_WRITE);
+
+	if (levels) {
+		long i;
+		u64 *tmp = (u64 *) addr_ul;
+
+		for (i = 0; i < size; ++i) {
+			unsigned long hpa = be64_to_cpu(tmp[i]);
+
+			if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
+				continue;
+
+			pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
+					levels - 1);
+		}
+	}
+
+	free_pages(addr_ul, get_order(size << 3));
+}
+
+void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
+{
+	const unsigned long size = tbl->it_indirect_levels ?
+			tbl->it_level_size : tbl->it_size;
+
+	if (!tbl->it_size)
+		return;
+
+	pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
+			tbl->it_indirect_levels);
+	if (tbl->it_userspace) {
+		pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
+				tbl->it_indirect_levels);
+	}
+}
+
+static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
+		unsigned int levels, unsigned long limit,
+		unsigned long *current_offset, unsigned long *total_allocated)
+{
+	__be64 *addr, *tmp;
+	unsigned long allocated = 1UL << shift;
+	unsigned int entries = 1UL << (shift - 3);
+	long i;
+
+	addr = pnv_alloc_tce_level(nid, shift);
+	*total_allocated += allocated;
+
+	--levels;
+	if (!levels) {
+		*current_offset += allocated;
+		return addr;
+	}
+
+	for (i = 0; i < entries; ++i) {
+		tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
+				levels, limit, current_offset, total_allocated);
+		if (!tmp)
+			break;
+
+		addr[i] = cpu_to_be64(__pa(tmp) |
+				TCE_PCI_READ | TCE_PCI_WRITE);
+
+		if (*current_offset >= limit)
+			break;
+	}
+
+	return addr;
+}
+
+long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+		__u32 page_shift, __u64 window_size, __u32 levels,
+		bool alloc_userspace_copy, struct iommu_table *tbl)
+{
+	void *addr, *uas = NULL;
+	unsigned long offset = 0, level_shift, total_allocated = 0;
+	unsigned long total_allocated_uas = 0;
+	const unsigned int window_shift = ilog2(window_size);
+	unsigned int entries_shift = window_shift - page_shift;
+	unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
+			PAGE_SHIFT);
+	const unsigned long tce_table_size = 1UL << table_shift;
+
+	if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
+		return -EINVAL;
+
+	if (!is_power_of_2(window_size))
+		return -EINVAL;
+
+	/* Adjust direct table size from window_size and levels */
+	entries_shift = (entries_shift + levels - 1) / levels;
+	level_shift = entries_shift + 3;
+	level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
+
+	if ((level_shift - 3) * levels + page_shift >= 55)
+		return -EINVAL;
+
+	/* Allocate TCE table */
+	addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
+			1, tce_table_size, &offset, &total_allocated);
+
+	/* addr==NULL means that the first level allocation failed */
+	if (!addr)
+		return -ENOMEM;
+
+	/*
+	 * First level was allocated but some lower level failed as
+	 * we did not allocate as much as we wanted,
+	 * release partially allocated table.
+	 */
+	if (levels == 1 && offset < tce_table_size)
+		goto free_tces_exit;
+
+	/* Allocate userspace view of the TCE table */
+	if (alloc_userspace_copy) {
+		offset = 0;
+		uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
+				1, tce_table_size, &offset,
+				&total_allocated_uas);
+		if (!uas)
+			goto free_tces_exit;
+		if (levels == 1 && (offset < tce_table_size ||
+				total_allocated_uas != total_allocated))
+			goto free_uas_exit;
+	}
+
+	/* Setup linux iommu table */
+	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
+			page_shift);
+	tbl->it_level_size = 1ULL << (level_shift - 3);
+	tbl->it_indirect_levels = levels - 1;
+	tbl->it_userspace = uas;
+	tbl->it_nid = nid;
+
+	pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
+			window_size, tce_table_size, bus_offset, tbl->it_base,
+			tbl->it_userspace, 1, levels);
+
+	return 0;
+
+free_uas_exit:
+	pnv_pci_ioda2_table_do_free_pages(uas,
+			1ULL << (level_shift - 3), levels - 1);
+free_tces_exit:
+	pnv_pci_ioda2_table_do_free_pages(addr,
+			1ULL << (level_shift - 3), levels - 1);
+
+	return -ENOMEM;
+}
+
+void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
+		struct iommu_table_group *table_group)
+{
+	long i;
+	bool found;
+	struct iommu_table_group_link *tgl;
+
+	if (!tbl || !table_group)
+		return;
+
+	/* Remove link to a group from table's list of attached groups */
+	found = false;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
+		if (tgl->table_group == table_group) {
+			list_del_rcu(&tgl->next);
+			kfree_rcu(tgl, rcu);
+			found = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	if (WARN_ON(!found))
+		return;
+
+	/* Clean a pointer to iommu_table in iommu_table_group::tables[] */
+	found = false;
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+		if (table_group->tables[i] == tbl) {
+			iommu_tce_table_put(tbl);
+			table_group->tables[i] = NULL;
+			found = true;
+			break;
+		}
+	}
+	WARN_ON(!found);
+}
+
+long pnv_pci_link_table_and_group(int node, int num,
+		struct iommu_table *tbl,
+		struct iommu_table_group *table_group)
+{
+	struct iommu_table_group_link *tgl = NULL;
+
+	if (WARN_ON(!tbl || !table_group))
+		return -EINVAL;
+
+	tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
+			node);
+	if (!tgl)
+		return -ENOMEM;
+
+	tgl->table_group = table_group;
+	list_add_rcu(&tgl->next, &tbl->it_group_list);
+
+	table_group->tables[num] = iommu_tce_table_get(tbl);
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
new file mode 100644
index 0000000000..28fac47700
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -0,0 +1,2827 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Support PCI/PCIe on PowerNV platforms
+ *
+ * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/crash_dump.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/memblock.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/msi.h>
+#include <linux/iommu.h>
+#include <linux/rculist.h>
+#include <linux/sizes.h>
+#include <linux/debugfs.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/msi_bitmap.h>
+#include <asm/ppc-pci.h>
+#include <asm/opal.h>
+#include <asm/iommu.h>
+#include <asm/tce.h>
+#include <asm/xics.h>
+#include <asm/firmware.h>
+#include <asm/pnv-pci.h>
+#include <asm/mmzone.h>
+#include <asm/xive.h>
+
+#include <misc/cxl-base.h>
+
+#include "powernv.h"
+#include "pci.h"
+#include "../../../../drivers/pci/pci.h"
+
+/* This array is indexed with enum pnv_phb_type */
+static const char * const pnv_phb_names[] = { "IODA2", "NPU_OCAPI" };
+
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
+static void pnv_pci_configure_bus(struct pci_bus *bus);
+
+void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
+			    const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+	char pfix[32];
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	if (pe->flags & PNV_IODA_PE_DEV)
+		strscpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
+	else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
+		sprintf(pfix, "%04x:%02x     ",
+			pci_domain_nr(pe->pbus), pe->pbus->number);
+#ifdef CONFIG_PCI_IOV
+	else if (pe->flags & PNV_IODA_PE_VF)
+		sprintf(pfix, "%04x:%02x:%2x.%d",
+			pci_domain_nr(pe->parent_dev->bus),
+			(pe->rid & 0xff00) >> 8,
+			PCI_SLOT(pe->rid), PCI_FUNC(pe->rid));
+#endif /* CONFIG_PCI_IOV*/
+
+	printk("%spci %s: [PE# %.2x] %pV",
+	       level, pfix, pe->pe_number, &vaf);
+
+	va_end(args);
+}
+
+static bool pnv_iommu_bypass_disabled __read_mostly;
+static bool pci_reset_phbs __read_mostly;
+
+static int __init iommu_setup(char *str)
+{
+	if (!str)
+		return -EINVAL;
+
+	while (*str) {
+		if (!strncmp(str, "nobypass", 8)) {
+			pnv_iommu_bypass_disabled = true;
+			pr_info("PowerNV: IOMMU bypass window disabled.\n");
+			break;
+		}
+		str += strcspn(str, ",");
+		if (*str == ',')
+			str++;
+	}
+
+	return 0;
+}
+early_param("iommu", iommu_setup);
+
+static int __init pci_reset_phbs_setup(char *str)
+{
+	pci_reset_phbs = true;
+	return 0;
+}
+
+early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup);
+
+static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
+{
+	s64 rc;
+
+	phb->ioda.pe_array[pe_no].phb = phb;
+	phb->ioda.pe_array[pe_no].pe_number = pe_no;
+	phb->ioda.pe_array[pe_no].dma_setup_done = false;
+
+	/*
+	 * Clear the PE frozen state as it might be put into frozen state
+	 * in the last PCI remove path. It's not harmful to do so when the
+	 * PE is already in unfrozen state.
+	 */
+	rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
+				       OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+	if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED)
+		pr_warn("%s: Error %lld unfreezing PHB#%x-PE#%x\n",
+			__func__, rc, phb->hose->global_number, pe_no);
+
+	return &phb->ioda.pe_array[pe_no];
+}
+
+static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
+{
+	if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe_num)) {
+		pr_warn("%s: Invalid PE %x on PHB#%x\n",
+			__func__, pe_no, phb->hose->global_number);
+		return;
+	}
+
+	mutex_lock(&phb->ioda.pe_alloc_mutex);
+	if (test_and_set_bit(pe_no, phb->ioda.pe_alloc))
+		pr_debug("%s: PE %x was reserved on PHB#%x\n",
+			 __func__, pe_no, phb->hose->global_number);
+	mutex_unlock(&phb->ioda.pe_alloc_mutex);
+
+	pnv_ioda_init_pe(phb, pe_no);
+}
+
+struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count)
+{
+	struct pnv_ioda_pe *ret = NULL;
+	int run = 0, pe, i;
+
+	mutex_lock(&phb->ioda.pe_alloc_mutex);
+
+	/* scan backwards for a run of @count cleared bits */
+	for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) {
+		if (test_bit(pe, phb->ioda.pe_alloc)) {
+			run = 0;
+			continue;
+		}
+
+		run++;
+		if (run == count)
+			break;
+	}
+	if (run != count)
+		goto out;
+
+	for (i = pe; i < pe + count; i++) {
+		set_bit(i, phb->ioda.pe_alloc);
+		pnv_ioda_init_pe(phb, i);
+	}
+	ret = &phb->ioda.pe_array[pe];
+
+out:
+	mutex_unlock(&phb->ioda.pe_alloc_mutex);
+	return ret;
+}
+
+void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb;
+	unsigned int pe_num = pe->pe_number;
+
+	WARN_ON(pe->pdev);
+	memset(pe, 0, sizeof(struct pnv_ioda_pe));
+
+	mutex_lock(&phb->ioda.pe_alloc_mutex);
+	clear_bit(pe_num, phb->ioda.pe_alloc);
+	mutex_unlock(&phb->ioda.pe_alloc_mutex);
+}
+
+/* The default M64 BAR is shared by all PEs */
+static int pnv_ioda2_init_m64(struct pnv_phb *phb)
+{
+	const char *desc;
+	struct resource *r;
+	s64 rc;
+
+	/* Configure the default M64 BAR */
+	rc = opal_pci_set_phb_mem_window(phb->opal_id,
+					 OPAL_M64_WINDOW_TYPE,
+					 phb->ioda.m64_bar_idx,
+					 phb->ioda.m64_base,
+					 0, /* unused */
+					 phb->ioda.m64_size);
+	if (rc != OPAL_SUCCESS) {
+		desc = "configuring";
+		goto fail;
+	}
+
+	/* Enable the default M64 BAR */
+	rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				      OPAL_M64_WINDOW_TYPE,
+				      phb->ioda.m64_bar_idx,
+				      OPAL_ENABLE_M64_SPLIT);
+	if (rc != OPAL_SUCCESS) {
+		desc = "enabling";
+		goto fail;
+	}
+
+	/*
+	 * Exclude the segments for reserved and root bus PE, which
+	 * are first or last two PEs.
+	 */
+	r = &phb->hose->mem_resources[1];
+	if (phb->ioda.reserved_pe_idx == 0)
+		r->start += (2 * phb->ioda.m64_segsize);
+	else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
+		r->end -= (2 * phb->ioda.m64_segsize);
+	else
+		pr_warn("  Cannot strip M64 segment for reserved PE#%x\n",
+			phb->ioda.reserved_pe_idx);
+
+	return 0;
+
+fail:
+	pr_warn("  Failure %lld %s M64 BAR#%d\n",
+		rc, desc, phb->ioda.m64_bar_idx);
+	opal_pci_phb_mmio_enable(phb->opal_id,
+				 OPAL_M64_WINDOW_TYPE,
+				 phb->ioda.m64_bar_idx,
+				 OPAL_DISABLE_M64);
+	return -EIO;
+}
+
+static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
+					 unsigned long *pe_bitmap)
+{
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
+	struct resource *r;
+	resource_size_t base, sgsz, start, end;
+	int segno, i;
+
+	base = phb->ioda.m64_base;
+	sgsz = phb->ioda.m64_segsize;
+	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+		r = &pdev->resource[i];
+		if (!r->parent || !pnv_pci_is_m64(phb, r))
+			continue;
+
+		start = ALIGN_DOWN(r->start - base, sgsz);
+		end = ALIGN(r->end - base, sgsz);
+		for (segno = start / sgsz; segno < end / sgsz; segno++) {
+			if (pe_bitmap)
+				set_bit(segno, pe_bitmap);
+			else
+				pnv_ioda_reserve_pe(phb, segno);
+		}
+	}
+}
+
+static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus,
+				    unsigned long *pe_bitmap,
+				    bool all)
+{
+	struct pci_dev *pdev;
+
+	list_for_each_entry(pdev, &bus->devices, bus_list) {
+		pnv_ioda_reserve_dev_m64_pe(pdev, pe_bitmap);
+
+		if (all && pdev->subordinate)
+			pnv_ioda_reserve_m64_pe(pdev->subordinate,
+						pe_bitmap, all);
+	}
+}
+
+static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
+{
+	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
+	struct pnv_ioda_pe *master_pe, *pe;
+	unsigned long size, *pe_alloc;
+	int i;
+
+	/* Root bus shouldn't use M64 */
+	if (pci_is_root_bus(bus))
+		return NULL;
+
+	/* Allocate bitmap */
+	size = ALIGN(phb->ioda.total_pe_num / 8, sizeof(unsigned long));
+	pe_alloc = kzalloc(size, GFP_KERNEL);
+	if (!pe_alloc) {
+		pr_warn("%s: Out of memory !\n",
+			__func__);
+		return NULL;
+	}
+
+	/* Figure out reserved PE numbers by the PE */
+	pnv_ioda_reserve_m64_pe(bus, pe_alloc, all);
+
+	/*
+	 * the current bus might not own M64 window and that's all
+	 * contributed by its child buses. For the case, we needn't
+	 * pick M64 dependent PE#.
+	 */
+	if (bitmap_empty(pe_alloc, phb->ioda.total_pe_num)) {
+		kfree(pe_alloc);
+		return NULL;
+	}
+
+	/*
+	 * Figure out the master PE and put all slave PEs to master
+	 * PE's list to form compound PE.
+	 */
+	master_pe = NULL;
+	i = -1;
+	while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe_num, i + 1)) <
+		phb->ioda.total_pe_num) {
+		pe = &phb->ioda.pe_array[i];
+
+		phb->ioda.m64_segmap[pe->pe_number] = pe->pe_number;
+		if (!master_pe) {
+			pe->flags |= PNV_IODA_PE_MASTER;
+			INIT_LIST_HEAD(&pe->slaves);
+			master_pe = pe;
+		} else {
+			pe->flags |= PNV_IODA_PE_SLAVE;
+			pe->master = master_pe;
+			list_add_tail(&pe->list, &master_pe->slaves);
+		}
+	}
+
+	kfree(pe_alloc);
+	return master_pe;
+}
+
+static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
+{
+	struct pci_controller *hose = phb->hose;
+	struct device_node *dn = hose->dn;
+	struct resource *res;
+	u32 m64_range[2], i;
+	const __be32 *r;
+	u64 pci_addr;
+
+	if (phb->type != PNV_PHB_IODA2) {
+		pr_info("  Not support M64 window\n");
+		return;
+	}
+
+	if (!firmware_has_feature(FW_FEATURE_OPAL)) {
+		pr_info("  Firmware too old to support M64 window\n");
+		return;
+	}
+
+	r = of_get_property(dn, "ibm,opal-m64-window", NULL);
+	if (!r) {
+		pr_info("  No <ibm,opal-m64-window> on %pOF\n",
+			dn);
+		return;
+	}
+
+	/*
+	 * Find the available M64 BAR range and pickup the last one for
+	 * covering the whole 64-bits space. We support only one range.
+	 */
+	if (of_property_read_u32_array(dn, "ibm,opal-available-m64-ranges",
+				       m64_range, 2)) {
+		/* In absence of the property, assume 0..15 */
+		m64_range[0] = 0;
+		m64_range[1] = 16;
+	}
+	/* We only support 64 bits in our allocator */
+	if (m64_range[1] > 63) {
+		pr_warn("%s: Limiting M64 range to 63 (from %d) on PHB#%x\n",
+			__func__, m64_range[1], phb->hose->global_number);
+		m64_range[1] = 63;
+	}
+	/* Empty range, no m64 */
+	if (m64_range[1] <= m64_range[0]) {
+		pr_warn("%s: M64 empty, disabling M64 usage on PHB#%x\n",
+			__func__, phb->hose->global_number);
+		return;
+	}
+
+	/* Configure M64 informations */
+	res = &hose->mem_resources[1];
+	res->name = dn->full_name;
+	res->start = of_translate_address(dn, r + 2);
+	res->end = res->start + of_read_number(r + 4, 2) - 1;
+	res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
+	pci_addr = of_read_number(r, 2);
+	hose->mem_offset[1] = res->start - pci_addr;
+
+	phb->ioda.m64_size = resource_size(res);
+	phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe_num;
+	phb->ioda.m64_base = pci_addr;
+
+	/* This lines up nicely with the display from processing OF ranges */
+	pr_info(" MEM 0x%016llx..0x%016llx -> 0x%016llx (M64 #%d..%d)\n",
+		res->start, res->end, pci_addr, m64_range[0],
+		m64_range[0] + m64_range[1] - 1);
+
+	/* Mark all M64 used up by default */
+	phb->ioda.m64_bar_alloc = (unsigned long)-1;
+
+	/* Use last M64 BAR to cover M64 window */
+	m64_range[1]--;
+	phb->ioda.m64_bar_idx = m64_range[0] + m64_range[1];
+
+	pr_info(" Using M64 #%d as default window\n", phb->ioda.m64_bar_idx);
+
+	/* Mark remaining ones free */
+	for (i = m64_range[0]; i < m64_range[1]; i++)
+		clear_bit(i, &phb->ioda.m64_bar_alloc);
+
+	/*
+	 * Setup init functions for M64 based on IODA version, IODA3 uses
+	 * the IODA2 code.
+	 */
+	phb->init_m64 = pnv_ioda2_init_m64;
+}
+
+static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no)
+{
+	struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_no];
+	struct pnv_ioda_pe *slave;
+	s64 rc;
+
+	/* Fetch master PE */
+	if (pe->flags & PNV_IODA_PE_SLAVE) {
+		pe = pe->master;
+		if (WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)))
+			return;
+
+		pe_no = pe->pe_number;
+	}
+
+	/* Freeze master PE */
+	rc = opal_pci_eeh_freeze_set(phb->opal_id,
+				     pe_no,
+				     OPAL_EEH_ACTION_SET_FREEZE_ALL);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
+			__func__, rc, phb->hose->global_number, pe_no);
+		return;
+	}
+
+	/* Freeze slave PEs */
+	if (!(pe->flags & PNV_IODA_PE_MASTER))
+		return;
+
+	list_for_each_entry(slave, &pe->slaves, list) {
+		rc = opal_pci_eeh_freeze_set(phb->opal_id,
+					     slave->pe_number,
+					     OPAL_EEH_ACTION_SET_FREEZE_ALL);
+		if (rc != OPAL_SUCCESS)
+			pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
+				__func__, rc, phb->hose->global_number,
+				slave->pe_number);
+	}
+}
+
+static int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt)
+{
+	struct pnv_ioda_pe *pe, *slave;
+	s64 rc;
+
+	/* Find master PE */
+	pe = &phb->ioda.pe_array[pe_no];
+	if (pe->flags & PNV_IODA_PE_SLAVE) {
+		pe = pe->master;
+		WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
+		pe_no = pe->pe_number;
+	}
+
+	/* Clear frozen state for master PE */
+	rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, opt);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n",
+			__func__, rc, opt, phb->hose->global_number, pe_no);
+		return -EIO;
+	}
+
+	if (!(pe->flags & PNV_IODA_PE_MASTER))
+		return 0;
+
+	/* Clear frozen state for slave PEs */
+	list_for_each_entry(slave, &pe->slaves, list) {
+		rc = opal_pci_eeh_freeze_clear(phb->opal_id,
+					     slave->pe_number,
+					     opt);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n",
+				__func__, rc, opt, phb->hose->global_number,
+				slave->pe_number);
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no)
+{
+	struct pnv_ioda_pe *slave, *pe;
+	u8 fstate = 0, state;
+	__be16 pcierr = 0;
+	s64 rc;
+
+	/* Sanity check on PE number */
+	if (pe_no < 0 || pe_no >= phb->ioda.total_pe_num)
+		return OPAL_EEH_STOPPED_PERM_UNAVAIL;
+
+	/*
+	 * Fetch the master PE and the PE instance might be
+	 * not initialized yet.
+	 */
+	pe = &phb->ioda.pe_array[pe_no];
+	if (pe->flags & PNV_IODA_PE_SLAVE) {
+		pe = pe->master;
+		WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
+		pe_no = pe->pe_number;
+	}
+
+	/* Check the master PE */
+	rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
+					&state, &pcierr, NULL);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld getting "
+			"PHB#%x-PE#%x state\n",
+			__func__, rc,
+			phb->hose->global_number, pe_no);
+		return OPAL_EEH_STOPPED_TEMP_UNAVAIL;
+	}
+
+	/* Check the slave PE */
+	if (!(pe->flags & PNV_IODA_PE_MASTER))
+		return state;
+
+	list_for_each_entry(slave, &pe->slaves, list) {
+		rc = opal_pci_eeh_freeze_status(phb->opal_id,
+						slave->pe_number,
+						&fstate,
+						&pcierr,
+						NULL);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld getting "
+				"PHB#%x-PE#%x state\n",
+				__func__, rc,
+				phb->hose->global_number, slave->pe_number);
+			return OPAL_EEH_STOPPED_TEMP_UNAVAIL;
+		}
+
+		/*
+		 * Override the result based on the ascending
+		 * priority.
+		 */
+		if (fstate > state)
+			state = fstate;
+	}
+
+	return state;
+}
+
+struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn)
+{
+	int pe_number = phb->ioda.pe_rmap[bdfn];
+
+	if (pe_number == IODA_INVALID_PE)
+		return NULL;
+
+	return &phb->ioda.pe_array[pe_number];
+}
+
+struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
+{
+	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
+	struct pci_dn *pdn = pci_get_pdn(dev);
+
+	if (!pdn)
+		return NULL;
+	if (pdn->pe_number == IODA_INVALID_PE)
+		return NULL;
+	return &phb->ioda.pe_array[pdn->pe_number];
+}
+
+static int pnv_ioda_set_one_peltv(struct pnv_phb *phb,
+				  struct pnv_ioda_pe *parent,
+				  struct pnv_ioda_pe *child,
+				  bool is_add)
+{
+	const char *desc = is_add ? "adding" : "removing";
+	uint8_t op = is_add ? OPAL_ADD_PE_TO_DOMAIN :
+			      OPAL_REMOVE_PE_FROM_DOMAIN;
+	struct pnv_ioda_pe *slave;
+	long rc;
+
+	/* Parent PE affects child PE */
+	rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number,
+				child->pe_number, op);
+	if (rc != OPAL_SUCCESS) {
+		pe_warn(child, "OPAL error %ld %s to parent PELTV\n",
+			rc, desc);
+		return -ENXIO;
+	}
+
+	if (!(child->flags & PNV_IODA_PE_MASTER))
+		return 0;
+
+	/* Compound case: parent PE affects slave PEs */
+	list_for_each_entry(slave, &child->slaves, list) {
+		rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number,
+					slave->pe_number, op);
+		if (rc != OPAL_SUCCESS) {
+			pe_warn(slave, "OPAL error %ld %s to parent PELTV\n",
+				rc, desc);
+			return -ENXIO;
+		}
+	}
+
+	return 0;
+}
+
+static int pnv_ioda_set_peltv(struct pnv_phb *phb,
+			      struct pnv_ioda_pe *pe,
+			      bool is_add)
+{
+	struct pnv_ioda_pe *slave;
+	struct pci_dev *pdev = NULL;
+	int ret;
+
+	/*
+	 * Clear PE frozen state. If it's master PE, we need
+	 * clear slave PE frozen state as well.
+	 */
+	if (is_add) {
+		opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
+					  OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+		if (pe->flags & PNV_IODA_PE_MASTER) {
+			list_for_each_entry(slave, &pe->slaves, list)
+				opal_pci_eeh_freeze_clear(phb->opal_id,
+							  slave->pe_number,
+							  OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+		}
+	}
+
+	/*
+	 * Associate PE in PELT. We need add the PE into the
+	 * corresponding PELT-V as well. Otherwise, the error
+	 * originated from the PE might contribute to other
+	 * PEs.
+	 */
+	ret = pnv_ioda_set_one_peltv(phb, pe, pe, is_add);
+	if (ret)
+		return ret;
+
+	/* For compound PEs, any one affects all of them */
+	if (pe->flags & PNV_IODA_PE_MASTER) {
+		list_for_each_entry(slave, &pe->slaves, list) {
+			ret = pnv_ioda_set_one_peltv(phb, slave, pe, is_add);
+			if (ret)
+				return ret;
+		}
+	}
+
+	if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS))
+		pdev = pe->pbus->self;
+	else if (pe->flags & PNV_IODA_PE_DEV)
+		pdev = pe->pdev->bus->self;
+#ifdef CONFIG_PCI_IOV
+	else if (pe->flags & PNV_IODA_PE_VF)
+		pdev = pe->parent_dev;
+#endif /* CONFIG_PCI_IOV */
+	while (pdev) {
+		struct pci_dn *pdn = pci_get_pdn(pdev);
+		struct pnv_ioda_pe *parent;
+
+		if (pdn && pdn->pe_number != IODA_INVALID_PE) {
+			parent = &phb->ioda.pe_array[pdn->pe_number];
+			ret = pnv_ioda_set_one_peltv(phb, parent, pe, is_add);
+			if (ret)
+				return ret;
+		}
+
+		pdev = pdev->bus->self;
+	}
+
+	return 0;
+}
+
+static void pnv_ioda_unset_peltv(struct pnv_phb *phb,
+				 struct pnv_ioda_pe *pe,
+				 struct pci_dev *parent)
+{
+	int64_t rc;
+
+	while (parent) {
+		struct pci_dn *pdn = pci_get_pdn(parent);
+
+		if (pdn && pdn->pe_number != IODA_INVALID_PE) {
+			rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
+						pe->pe_number,
+						OPAL_REMOVE_PE_FROM_DOMAIN);
+			/* XXX What to do in case of error ? */
+		}
+		parent = parent->bus->self;
+	}
+
+	opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
+				  OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+
+	/* Disassociate PE in PELT */
+	rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
+				pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
+	if (rc)
+		pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc);
+}
+
+int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
+{
+	struct pci_dev *parent;
+	uint8_t bcomp, dcomp, fcomp;
+	int64_t rc;
+	long rid_end, rid;
+
+	/* Currently, we just deconfigure VF PE. Bus PE will always there.*/
+	if (pe->pbus) {
+		int count;
+
+		dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
+		fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
+		parent = pe->pbus->self;
+		if (pe->flags & PNV_IODA_PE_BUS_ALL)
+			count = resource_size(&pe->pbus->busn_res);
+		else
+			count = 1;
+
+		switch(count) {
+		case  1: bcomp = OpalPciBusAll;         break;
+		case  2: bcomp = OpalPciBus7Bits;       break;
+		case  4: bcomp = OpalPciBus6Bits;       break;
+		case  8: bcomp = OpalPciBus5Bits;       break;
+		case 16: bcomp = OpalPciBus4Bits;       break;
+		case 32: bcomp = OpalPciBus3Bits;       break;
+		default:
+			dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
+			        count);
+			/* Do an exact match only */
+			bcomp = OpalPciBusAll;
+		}
+		rid_end = pe->rid + (count << 8);
+	} else {
+#ifdef CONFIG_PCI_IOV
+		if (pe->flags & PNV_IODA_PE_VF)
+			parent = pe->parent_dev;
+		else
+#endif
+			parent = pe->pdev->bus->self;
+		bcomp = OpalPciBusAll;
+		dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
+		fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
+		rid_end = pe->rid + 1;
+	}
+
+	/* Clear the reverse map */
+	for (rid = pe->rid; rid < rid_end; rid++)
+		phb->ioda.pe_rmap[rid] = IODA_INVALID_PE;
+
+	/*
+	 * Release from all parents PELT-V. NPUs don't have a PELTV
+	 * table
+	 */
+	if (phb->type != PNV_PHB_NPU_OCAPI)
+		pnv_ioda_unset_peltv(phb, pe, parent);
+
+	rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
+			     bcomp, dcomp, fcomp, OPAL_UNMAP_PE);
+	if (rc)
+		pe_err(pe, "OPAL error %lld trying to setup PELT table\n", rc);
+
+	pe->pbus = NULL;
+	pe->pdev = NULL;
+#ifdef CONFIG_PCI_IOV
+	pe->parent_dev = NULL;
+#endif
+
+	return 0;
+}
+
+int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
+{
+	uint8_t bcomp, dcomp, fcomp;
+	long rc, rid_end, rid;
+
+	/* Bus validation ? */
+	if (pe->pbus) {
+		int count;
+
+		dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
+		fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
+		if (pe->flags & PNV_IODA_PE_BUS_ALL)
+			count = resource_size(&pe->pbus->busn_res);
+		else
+			count = 1;
+
+		switch(count) {
+		case  1: bcomp = OpalPciBusAll;		break;
+		case  2: bcomp = OpalPciBus7Bits;	break;
+		case  4: bcomp = OpalPciBus6Bits;	break;
+		case  8: bcomp = OpalPciBus5Bits;	break;
+		case 16: bcomp = OpalPciBus4Bits;	break;
+		case 32: bcomp = OpalPciBus3Bits;	break;
+		default:
+			dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
+			        count);
+			/* Do an exact match only */
+			bcomp = OpalPciBusAll;
+		}
+		rid_end = pe->rid + (count << 8);
+	} else {
+		bcomp = OpalPciBusAll;
+		dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
+		fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
+		rid_end = pe->rid + 1;
+	}
+
+	/*
+	 * Associate PE in PELT. We need add the PE into the
+	 * corresponding PELT-V as well. Otherwise, the error
+	 * originated from the PE might contribute to other
+	 * PEs.
+	 */
+	rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
+			     bcomp, dcomp, fcomp, OPAL_MAP_PE);
+	if (rc) {
+		pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
+		return -ENXIO;
+	}
+
+	/*
+	 * Configure PELTV. NPUs don't have a PELTV table so skip
+	 * configuration on them.
+	 */
+	if (phb->type != PNV_PHB_NPU_OCAPI)
+		pnv_ioda_set_peltv(phb, pe, true);
+
+	/* Setup reverse map */
+	for (rid = pe->rid; rid < rid_end; rid++)
+		phb->ioda.pe_rmap[rid] = pe->pe_number;
+
+	pe->mve_number = 0;
+
+	return 0;
+}
+
+static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
+{
+	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
+	struct pci_dn *pdn = pci_get_pdn(dev);
+	struct pnv_ioda_pe *pe;
+
+	if (!pdn) {
+		pr_err("%s: Device tree node not associated properly\n",
+			   pci_name(dev));
+		return NULL;
+	}
+	if (pdn->pe_number != IODA_INVALID_PE)
+		return NULL;
+
+	pe = pnv_ioda_alloc_pe(phb, 1);
+	if (!pe) {
+		pr_warn("%s: Not enough PE# available, disabling device\n",
+			pci_name(dev));
+		return NULL;
+	}
+
+	/* NOTE: We don't get a reference for the pointer in the PE
+	 * data structure, both the device and PE structures should be
+	 * destroyed at the same time.
+	 *
+	 * At some point we want to remove the PDN completely anyways
+	 */
+	pdn->pe_number = pe->pe_number;
+	pe->flags = PNV_IODA_PE_DEV;
+	pe->pdev = dev;
+	pe->pbus = NULL;
+	pe->mve_number = -1;
+	pe->rid = dev->bus->number << 8 | pdn->devfn;
+	pe->device_count++;
+
+	pe_info(pe, "Associated device to PE\n");
+
+	if (pnv_ioda_configure_pe(phb, pe)) {
+		/* XXX What do we do here ? */
+		pnv_ioda_free_pe(pe);
+		pdn->pe_number = IODA_INVALID_PE;
+		pe->pdev = NULL;
+		return NULL;
+	}
+
+	/* Put PE to the list */
+	mutex_lock(&phb->ioda.pe_list_mutex);
+	list_add_tail(&pe->list, &phb->ioda.pe_list);
+	mutex_unlock(&phb->ioda.pe_list_mutex);
+	return pe;
+}
+
+/*
+ * There're 2 types of PCI bus sensitive PEs: One that is compromised of
+ * single PCI bus. Another one that contains the primary PCI bus and its
+ * subordinate PCI devices and buses. The second type of PE is normally
+ * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
+ */
+static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
+{
+	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
+	struct pnv_ioda_pe *pe = NULL;
+	unsigned int pe_num;
+
+	/*
+	 * In partial hotplug case, the PE instance might be still alive.
+	 * We should reuse it instead of allocating a new one.
+	 */
+	pe_num = phb->ioda.pe_rmap[bus->number << 8];
+	if (WARN_ON(pe_num != IODA_INVALID_PE)) {
+		pe = &phb->ioda.pe_array[pe_num];
+		return NULL;
+	}
+
+	/* PE number for root bus should have been reserved */
+	if (pci_is_root_bus(bus))
+		pe = &phb->ioda.pe_array[phb->ioda.root_pe_idx];
+
+	/* Check if PE is determined by M64 */
+	if (!pe)
+		pe = pnv_ioda_pick_m64_pe(bus, all);
+
+	/* The PE number isn't pinned by M64 */
+	if (!pe)
+		pe = pnv_ioda_alloc_pe(phb, 1);
+
+	if (!pe) {
+		pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n",
+			__func__, pci_domain_nr(bus), bus->number);
+		return NULL;
+	}
+
+	pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
+	pe->pbus = bus;
+	pe->pdev = NULL;
+	pe->mve_number = -1;
+	pe->rid = bus->busn_res.start << 8;
+
+	if (all)
+		pe_info(pe, "Secondary bus %pad..%pad associated with PE#%x\n",
+			&bus->busn_res.start, &bus->busn_res.end,
+			pe->pe_number);
+	else
+		pe_info(pe, "Secondary bus %pad associated with PE#%x\n",
+			&bus->busn_res.start, pe->pe_number);
+
+	if (pnv_ioda_configure_pe(phb, pe)) {
+		/* XXX What do we do here ? */
+		pnv_ioda_free_pe(pe);
+		pe->pbus = NULL;
+		return NULL;
+	}
+
+	/* Put PE to the list */
+	list_add_tail(&pe->list, &phb->ioda.pe_list);
+
+	return pe;
+}
+
+static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
+{
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
+	struct pci_dn *pdn = pci_get_pdn(pdev);
+	struct pnv_ioda_pe *pe;
+
+	/* Check if the BDFN for this device is associated with a PE yet */
+	pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev));
+	if (!pe) {
+		/* VF PEs should be pre-configured in pnv_pci_sriov_enable() */
+		if (WARN_ON(pdev->is_virtfn))
+			return;
+
+		pnv_pci_configure_bus(pdev->bus);
+		pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev));
+		pci_info(pdev, "Configured PE#%x\n", pe ? pe->pe_number : 0xfffff);
+
+
+		/*
+		 * If we can't setup the IODA PE something has gone horribly
+		 * wrong and we can't enable DMA for the device.
+		 */
+		if (WARN_ON(!pe))
+			return;
+	} else {
+		pci_info(pdev, "Added to existing PE#%x\n", pe->pe_number);
+	}
+
+	/*
+	 * We assume that bridges *probably* don't need to do any DMA so we can
+	 * skip allocating a TCE table, etc unless we get a non-bridge device.
+	 */
+	if (!pe->dma_setup_done && !pci_is_bridge(pdev)) {
+		switch (phb->type) {
+		case PNV_PHB_IODA2:
+			pnv_pci_ioda2_setup_dma_pe(phb, pe);
+			break;
+		default:
+			pr_warn("%s: No DMA for PHB#%x (type %d)\n",
+				__func__, phb->hose->global_number, phb->type);
+		}
+	}
+
+	if (pdn)
+		pdn->pe_number = pe->pe_number;
+	pe->device_count++;
+
+	WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
+	pdev->dev.archdata.dma_offset = pe->tce_bypass_base;
+	set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
+
+	/* PEs with a DMA weight of zero won't have a group */
+	if (pe->table_group.group)
+		iommu_add_device(&pe->table_group, &pdev->dev);
+}
+
+/*
+ * Reconfigure TVE#0 to be usable as 64-bit DMA space.
+ *
+ * The first 4GB of virtual memory for a PE is reserved for 32-bit accesses.
+ * Devices can only access more than that if bit 59 of the PCI address is set
+ * by hardware, which indicates TVE#1 should be used instead of TVE#0.
+ * Many PCI devices are not capable of addressing that many bits, and as a
+ * result are limited to the 4GB of virtual memory made available to 32-bit
+ * devices in TVE#0.
+ *
+ * In order to work around this, reconfigure TVE#0 to be suitable for 64-bit
+ * devices by configuring the virtual memory past the first 4GB inaccessible
+ * by 64-bit DMAs.  This should only be used by devices that want more than
+ * 4GB, and only on PEs that have no 32-bit devices.
+ *
+ * Currently this will only work on PHB3 (POWER8).
+ */
+static int pnv_pci_ioda_dma_64bit_bypass(struct pnv_ioda_pe *pe)
+{
+	u64 window_size, table_size, tce_count, addr;
+	struct page *table_pages;
+	u64 tce_order = 28; /* 256MB TCEs */
+	__be64 *tces;
+	s64 rc;
+
+	/*
+	 * Window size needs to be a power of two, but needs to account for
+	 * shifting memory by the 4GB offset required to skip 32bit space.
+	 */
+	window_size = roundup_pow_of_two(memory_hotplug_max() + (1ULL << 32));
+	tce_count = window_size >> tce_order;
+	table_size = tce_count << 3;
+
+	if (table_size < PAGE_SIZE)
+		table_size = PAGE_SIZE;
+
+	table_pages = alloc_pages_node(pe->phb->hose->node, GFP_KERNEL,
+				       get_order(table_size));
+	if (!table_pages)
+		goto err;
+
+	tces = page_address(table_pages);
+	if (!tces)
+		goto err;
+
+	memset(tces, 0, table_size);
+
+	for (addr = 0; addr < memory_hotplug_max(); addr += (1 << tce_order)) {
+		tces[(addr + (1ULL << 32)) >> tce_order] =
+			cpu_to_be64(addr | TCE_PCI_READ | TCE_PCI_WRITE);
+	}
+
+	rc = opal_pci_map_pe_dma_window(pe->phb->opal_id,
+					pe->pe_number,
+					/* reconfigure window 0 */
+					(pe->pe_number << 1) + 0,
+					1,
+					__pa(tces),
+					table_size,
+					1 << tce_order);
+	if (rc == OPAL_SUCCESS) {
+		pe_info(pe, "Using 64-bit DMA iommu bypass (through TVE#0)\n");
+		return 0;
+	}
+err:
+	pe_err(pe, "Error configuring 64-bit DMA bypass\n");
+	return -EIO;
+}
+
+static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
+		u64 dma_mask)
+{
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
+	struct pci_dn *pdn = pci_get_pdn(pdev);
+	struct pnv_ioda_pe *pe;
+
+	if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+		return false;
+
+	pe = &phb->ioda.pe_array[pdn->pe_number];
+	if (pe->tce_bypass_enabled) {
+		u64 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
+		if (dma_mask >= top)
+			return true;
+	}
+
+	/*
+	 * If the device can't set the TCE bypass bit but still wants
+	 * to access 4GB or more, on PHB3 we can reconfigure TVE#0 to
+	 * bypass the 32-bit region and be usable for 64-bit DMAs.
+	 * The device needs to be able to address all of this space.
+	 */
+	if (dma_mask >> 32 &&
+	    dma_mask > (memory_hotplug_max() + (1ULL << 32)) &&
+	    /* pe->pdev should be set if it's a single device, pe->pbus if not */
+	    (pe->device_count == 1 || !pe->pbus) &&
+	    phb->model == PNV_PHB_MODEL_PHB3) {
+		/* Configure the bypass mode */
+		s64 rc = pnv_pci_ioda_dma_64bit_bypass(pe);
+		if (rc)
+			return false;
+		/* 4GB offset bypasses 32-bit space */
+		pdev->dev.archdata.dma_offset = (1ULL << 32);
+		return true;
+	}
+
+	return false;
+}
+
+static inline __be64 __iomem *pnv_ioda_get_inval_reg(struct pnv_phb *phb)
+{
+	return phb->regs + 0x210;
+}
+
+#ifdef CONFIG_IOMMU_API
+/* Common for IODA1 and IODA2 */
+static int pnv_ioda_tce_xchg_no_kill(struct iommu_table *tbl, long index,
+		unsigned long *hpa, enum dma_data_direction *direction)
+{
+	return pnv_tce_xchg(tbl, index, hpa, direction);
+}
+#endif
+
+#define PHB3_TCE_KILL_INVAL_ALL		PPC_BIT(0)
+#define PHB3_TCE_KILL_INVAL_PE		PPC_BIT(1)
+#define PHB3_TCE_KILL_INVAL_ONE		PPC_BIT(2)
+
+static inline void pnv_pci_phb3_tce_invalidate_pe(struct pnv_ioda_pe *pe)
+{
+	/* 01xb - invalidate TCEs that match the specified PE# */
+	__be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb);
+	unsigned long val = PHB3_TCE_KILL_INVAL_PE | (pe->pe_number & 0xFF);
+
+	mb(); /* Ensure above stores are visible */
+	__raw_writeq_be(val, invalidate);
+}
+
+static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe,
+					unsigned shift, unsigned long index,
+					unsigned long npages)
+{
+	__be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb);
+	unsigned long start, end, inc;
+
+	/* We'll invalidate DMA address in PE scope */
+	start = PHB3_TCE_KILL_INVAL_ONE;
+	start |= (pe->pe_number & 0xFF);
+	end = start;
+
+	/* Figure out the start, end and step */
+	start |= (index << shift);
+	end |= ((index + npages - 1) << shift);
+	inc = (0x1ull << shift);
+	mb();
+
+	while (start <= end) {
+		__raw_writeq_be(start, invalidate);
+		start += inc;
+	}
+}
+
+static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb;
+
+	if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs)
+		pnv_pci_phb3_tce_invalidate_pe(pe);
+	else
+		opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL_PE,
+				  pe->pe_number, 0, 0, 0);
+}
+
+static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
+		unsigned long index, unsigned long npages)
+{
+	struct iommu_table_group_link *tgl;
+
+	list_for_each_entry_lockless(tgl, &tbl->it_group_list, next) {
+		struct pnv_ioda_pe *pe = container_of(tgl->table_group,
+				struct pnv_ioda_pe, table_group);
+		struct pnv_phb *phb = pe->phb;
+		unsigned int shift = tbl->it_page_shift;
+
+		if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs)
+			pnv_pci_phb3_tce_invalidate(pe, shift,
+						    index, npages);
+		else
+			opal_pci_tce_kill(phb->opal_id,
+					  OPAL_PCI_TCE_KILL_PAGES,
+					  pe->pe_number, 1u << shift,
+					  index << shift, npages);
+	}
+}
+
+static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
+		long npages, unsigned long uaddr,
+		enum dma_data_direction direction,
+		unsigned long attrs)
+{
+	int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
+			attrs);
+
+	if (!ret)
+		pnv_pci_ioda2_tce_invalidate(tbl, index, npages);
+
+	return ret;
+}
+
+static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
+		long npages)
+{
+	pnv_tce_free(tbl, index, npages);
+
+	pnv_pci_ioda2_tce_invalidate(tbl, index, npages);
+}
+
+static struct iommu_table_ops pnv_ioda2_iommu_ops = {
+	.set = pnv_ioda2_tce_build,
+#ifdef CONFIG_IOMMU_API
+	.xchg_no_kill = pnv_ioda_tce_xchg_no_kill,
+	.tce_kill = pnv_pci_ioda2_tce_invalidate,
+	.useraddrptr = pnv_tce_useraddrptr,
+#endif
+	.clear = pnv_ioda2_tce_free,
+	.get = pnv_tce_get,
+	.free = pnv_pci_ioda2_table_free_pages,
+};
+
+static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
+		int num, struct iommu_table *tbl)
+{
+	struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+			table_group);
+	struct pnv_phb *phb = pe->phb;
+	int64_t rc;
+	const unsigned long size = tbl->it_indirect_levels ?
+			tbl->it_level_size : tbl->it_size;
+	const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
+	const __u64 win_size = tbl->it_size << tbl->it_page_shift;
+
+	pe_info(pe, "Setting up window#%d %llx..%llx pg=%lx\n",
+		num, start_addr, start_addr + win_size - 1,
+		IOMMU_PAGE_SIZE(tbl));
+
+	/*
+	 * Map TCE table through TVT. The TVE index is the PE number
+	 * shifted by 1 bit for 32-bits DMA space.
+	 */
+	rc = opal_pci_map_pe_dma_window(phb->opal_id,
+			pe->pe_number,
+			(pe->pe_number << 1) + num,
+			tbl->it_indirect_levels + 1,
+			__pa(tbl->it_base),
+			size << 3,
+			IOMMU_PAGE_SIZE(tbl));
+	if (rc) {
+		pe_err(pe, "Failed to configure TCE table, err %lld\n", rc);
+		return rc;
+	}
+
+	pnv_pci_link_table_and_group(phb->hose->node, num,
+			tbl, &pe->table_group);
+	pnv_pci_ioda2_tce_invalidate_pe(pe);
+
+	return 0;
+}
+
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
+{
+	uint16_t window_id = (pe->pe_number << 1 ) + 1;
+	int64_t rc;
+
+	pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis");
+	if (enable) {
+		phys_addr_t top = memblock_end_of_DRAM();
+
+		top = roundup_pow_of_two(top);
+		rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+						     pe->pe_number,
+						     window_id,
+						     pe->tce_bypass_base,
+						     top);
+	} else {
+		rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+						     pe->pe_number,
+						     window_id,
+						     pe->tce_bypass_base,
+						     0);
+	}
+	if (rc)
+		pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
+	else
+		pe->tce_bypass_enabled = enable;
+}
+
+static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
+		int num, __u32 page_shift, __u64 window_size, __u32 levels,
+		bool alloc_userspace_copy, struct iommu_table **ptbl)
+{
+	struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+			table_group);
+	int nid = pe->phb->hose->node;
+	__u64 bus_offset = num ? pe->tce_bypass_base : table_group->tce32_start;
+	long ret;
+	struct iommu_table *tbl;
+
+	tbl = pnv_pci_table_alloc(nid);
+	if (!tbl)
+		return -ENOMEM;
+
+	tbl->it_ops = &pnv_ioda2_iommu_ops;
+
+	ret = pnv_pci_ioda2_table_alloc_pages(nid,
+			bus_offset, page_shift, window_size,
+			levels, alloc_userspace_copy, tbl);
+	if (ret) {
+		iommu_tce_table_put(tbl);
+		return ret;
+	}
+
+	*ptbl = tbl;
+
+	return 0;
+}
+
+static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
+{
+	struct iommu_table *tbl = NULL;
+	long rc;
+	unsigned long res_start, res_end;
+
+	/*
+	 * crashkernel= specifies the kdump kernel's maximum memory at
+	 * some offset and there is no guaranteed the result is a power
+	 * of 2, which will cause errors later.
+	 */
+	const u64 max_memory = __rounddown_pow_of_two(memory_hotplug_max());
+
+	/*
+	 * In memory constrained environments, e.g. kdump kernel, the
+	 * DMA window can be larger than available memory, which will
+	 * cause errors later.
+	 */
+	const u64 maxblock = 1UL << (PAGE_SHIFT + MAX_ORDER);
+
+	/*
+	 * We create the default window as big as we can. The constraint is
+	 * the max order of allocation possible. The TCE table is likely to
+	 * end up being multilevel and with on-demand allocation in place,
+	 * the initial use is not going to be huge as the default window aims
+	 * to support crippled devices (i.e. not fully 64bit DMAble) only.
+	 */
+	/* iommu_table::it_map uses 1 bit per IOMMU page, hence 8 */
+	const u64 window_size = min((maxblock * 8) << PAGE_SHIFT, max_memory);
+	/* Each TCE level cannot exceed maxblock so go multilevel if needed */
+	unsigned long tces_order = ilog2(window_size >> PAGE_SHIFT);
+	unsigned long tcelevel_order = ilog2(maxblock >> 3);
+	unsigned int levels = tces_order / tcelevel_order;
+
+	if (tces_order % tcelevel_order)
+		levels += 1;
+	/*
+	 * We try to stick to default levels (which is >1 at the moment) in
+	 * order to save memory by relying on on-demain TCE level allocation.
+	 */
+	levels = max_t(unsigned int, levels, POWERNV_IOMMU_DEFAULT_LEVELS);
+
+	rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, PAGE_SHIFT,
+			window_size, levels, false, &tbl);
+	if (rc) {
+		pe_err(pe, "Failed to create 32-bit TCE table, err %ld",
+				rc);
+		return rc;
+	}
+
+	/* We use top part of 32bit space for MMIO so exclude it from DMA */
+	res_start = 0;
+	res_end = 0;
+	if (window_size > pe->phb->ioda.m32_pci_base) {
+		res_start = pe->phb->ioda.m32_pci_base >> tbl->it_page_shift;
+		res_end = min(window_size, SZ_4G) >> tbl->it_page_shift;
+	}
+
+	tbl->it_index = (pe->phb->hose->global_number << 16) | pe->pe_number;
+	if (iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end))
+		rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
+	else
+		rc = -ENOMEM;
+	if (rc) {
+		pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", rc);
+		iommu_tce_table_put(tbl);
+		tbl = NULL; /* This clears iommu_table_base below */
+	}
+	if (!pnv_iommu_bypass_disabled)
+		pnv_pci_ioda2_set_bypass(pe, true);
+
+	/*
+	 * Set table base for the case of IOMMU DMA use. Usually this is done
+	 * from dma_dev_setup() which is not called when a device is returned
+	 * from VFIO so do it here.
+	 */
+	if (pe->pdev)
+		set_iommu_table_base(&pe->pdev->dev, tbl);
+
+	return 0;
+}
+
+static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
+		int num)
+{
+	struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+			table_group);
+	struct pnv_phb *phb = pe->phb;
+	long ret;
+
+	pe_info(pe, "Removing DMA window #%d\n", num);
+
+	ret = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
+			(pe->pe_number << 1) + num,
+			0/* levels */, 0/* table address */,
+			0/* table size */, 0/* page size */);
+	if (ret)
+		pe_warn(pe, "Unmapping failed, ret = %ld\n", ret);
+	else
+		pnv_pci_ioda2_tce_invalidate_pe(pe);
+
+	pnv_pci_unlink_table_and_group(table_group->tables[num], table_group);
+
+	return ret;
+}
+
+#ifdef CONFIG_IOMMU_API
+unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
+		__u64 window_size, __u32 levels)
+{
+	unsigned long bytes = 0;
+	const unsigned window_shift = ilog2(window_size);
+	unsigned entries_shift = window_shift - page_shift;
+	unsigned table_shift = entries_shift + 3;
+	unsigned long tce_table_size = max(0x1000UL, 1UL << table_shift);
+	unsigned long direct_table_size;
+
+	if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) ||
+			!is_power_of_2(window_size))
+		return 0;
+
+	/* Calculate a direct table size from window_size and levels */
+	entries_shift = (entries_shift + levels - 1) / levels;
+	table_shift = entries_shift + 3;
+	table_shift = max_t(unsigned, table_shift, PAGE_SHIFT);
+	direct_table_size =  1UL << table_shift;
+
+	for ( ; levels; --levels) {
+		bytes += ALIGN(tce_table_size, direct_table_size);
+
+		tce_table_size /= direct_table_size;
+		tce_table_size <<= 3;
+		tce_table_size = max_t(unsigned long,
+				tce_table_size, direct_table_size);
+	}
+
+	return bytes + bytes; /* one for HW table, one for userspace copy */
+}
+
+static long pnv_pci_ioda2_create_table_userspace(
+		struct iommu_table_group *table_group,
+		int num, __u32 page_shift, __u64 window_size, __u32 levels,
+		struct iommu_table **ptbl)
+{
+	long ret = pnv_pci_ioda2_create_table(table_group,
+			num, page_shift, window_size, levels, true, ptbl);
+
+	if (!ret)
+		(*ptbl)->it_allocated_size = pnv_pci_ioda2_get_table_size(
+				page_shift, window_size, levels);
+	return ret;
+}
+
+static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
+		dev->dev.archdata.dma_offset = pe->tce_bypass_base;
+
+		if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
+			pnv_ioda_setup_bus_dma(pe, dev->subordinate);
+	}
+}
+
+static long pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
+{
+	struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+						table_group);
+	/* Store @tbl as pnv_pci_ioda2_unset_window() resets it */
+	struct iommu_table *tbl = pe->table_group.tables[0];
+
+	/*
+	 * iommu_ops transfers the ownership per a device and we mode
+	 * the group ownership with the first device in the group.
+	 */
+	if (!tbl)
+		return 0;
+
+	pnv_pci_ioda2_set_bypass(pe, false);
+	pnv_pci_ioda2_unset_window(&pe->table_group, 0);
+	if (pe->pbus)
+		pnv_ioda_setup_bus_dma(pe, pe->pbus);
+	else if (pe->pdev)
+		set_iommu_table_base(&pe->pdev->dev, NULL);
+	iommu_tce_table_put(tbl);
+
+	return 0;
+}
+
+static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
+{
+	struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+						table_group);
+
+	/* See the comment about iommu_ops above */
+	if (pe->table_group.tables[0])
+		return;
+	pnv_pci_ioda2_setup_default_config(pe);
+	if (pe->pbus)
+		pnv_ioda_setup_bus_dma(pe, pe->pbus);
+}
+
+static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
+	.get_table_size = pnv_pci_ioda2_get_table_size,
+	.create_table = pnv_pci_ioda2_create_table_userspace,
+	.set_window = pnv_pci_ioda2_set_window,
+	.unset_window = pnv_pci_ioda2_unset_window,
+	.take_ownership = pnv_ioda2_take_ownership,
+	.release_ownership = pnv_ioda2_release_ownership,
+};
+#endif
+
+void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+				struct pnv_ioda_pe *pe)
+{
+	int64_t rc;
+
+	/* TVE #1 is selected by PCI address bit 59 */
+	pe->tce_bypass_base = 1ull << 59;
+
+	/* The PE will reserve all possible 32-bits space */
+	pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
+		phb->ioda.m32_pci_base);
+
+	/* Setup linux iommu table */
+	pe->table_group.tce32_start = 0;
+	pe->table_group.tce32_size = phb->ioda.m32_pci_base;
+	pe->table_group.max_dynamic_windows_supported =
+			IOMMU_TABLE_GROUP_MAX_TABLES;
+	pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS;
+	pe->table_group.pgsizes = pnv_ioda_parse_tce_sizes(phb);
+
+	rc = pnv_pci_ioda2_setup_default_config(pe);
+	if (rc)
+		return;
+
+#ifdef CONFIG_IOMMU_API
+	pe->table_group.ops = &pnv_pci_ioda2_ops;
+	iommu_register_group(&pe->table_group, phb->hose->global_number,
+			     pe->pe_number);
+#endif
+	pe->dma_setup_done = true;
+}
+
+/*
+ * Called from KVM in real mode to EOI passthru interrupts. The ICP
+ * EOI is handled directly in KVM in kvmppc_deliver_irq_passthru().
+ *
+ * The IRQ data is mapped in the PCI-MSI domain and the EOI OPAL call
+ * needs an HW IRQ number mapped in the XICS IRQ domain. The HW IRQ
+ * numbers of the in-the-middle MSI domain are vector numbers and it's
+ * good enough for OPAL. Use that.
+ */
+int64_t pnv_opal_pci_msi_eoi(struct irq_data *d)
+{
+	struct pci_controller *hose = irq_data_get_irq_chip_data(d->parent_data);
+	struct pnv_phb *phb = hose->private_data;
+
+	return opal_pci_msi_eoi(phb->opal_id, d->parent_data->hwirq);
+}
+
+/*
+ * The IRQ data is mapped in the XICS domain, with OPAL HW IRQ numbers
+ */
+static void pnv_ioda2_msi_eoi(struct irq_data *d)
+{
+	int64_t rc;
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+	struct pnv_phb *phb = hose->private_data;
+
+	rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
+	WARN_ON_ONCE(rc);
+
+	icp_native_eoi(d);
+}
+
+/* P8/CXL only */
+void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
+{
+	struct irq_data *idata;
+	struct irq_chip *ichip;
+
+	/* The MSI EOI OPAL call is only needed on PHB3 */
+	if (phb->model != PNV_PHB_MODEL_PHB3)
+		return;
+
+	if (!phb->ioda.irq_chip_init) {
+		/*
+		 * First time we setup an MSI IRQ, we need to setup the
+		 * corresponding IRQ chip to route correctly.
+		 */
+		idata = irq_get_irq_data(virq);
+		ichip = irq_data_get_irq_chip(idata);
+		phb->ioda.irq_chip_init = 1;
+		phb->ioda.irq_chip = *ichip;
+		phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
+	}
+	irq_set_chip(virq, &phb->ioda.irq_chip);
+	irq_set_chip_data(virq, phb->hose);
+}
+
+static struct irq_chip pnv_pci_msi_irq_chip;
+
+/*
+ * Returns true iff chip is something that we could call
+ * pnv_opal_pci_msi_eoi for.
+ */
+bool is_pnv_opal_msi(struct irq_chip *chip)
+{
+	return chip == &pnv_pci_msi_irq_chip;
+}
+EXPORT_SYMBOL_GPL(is_pnv_opal_msi);
+
+static int __pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
+				    unsigned int xive_num,
+				    unsigned int is_64, struct msi_msg *msg)
+{
+	struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
+	__be32 data;
+	int rc;
+
+	dev_dbg(&dev->dev, "%s: setup %s-bit MSI for vector #%d\n", __func__,
+		is_64 ? "64" : "32", xive_num);
+
+	/* No PE assigned ? bail out ... no MSI for you ! */
+	if (pe == NULL)
+		return -ENXIO;
+
+	/* Check if we have an MVE */
+	if (pe->mve_number < 0)
+		return -ENXIO;
+
+	/* Force 32-bit MSI on some broken devices */
+	if (dev->no_64bit_msi)
+		is_64 = 0;
+
+	/* Assign XIVE to PE */
+	rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
+	if (rc) {
+		pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
+			pci_name(dev), rc, xive_num);
+		return -EIO;
+	}
+
+	if (is_64) {
+		__be64 addr64;
+
+		rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
+				     &addr64, &data);
+		if (rc) {
+			pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
+				pci_name(dev), rc);
+			return -EIO;
+		}
+		msg->address_hi = be64_to_cpu(addr64) >> 32;
+		msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful;
+	} else {
+		__be32 addr32;
+
+		rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
+				     &addr32, &data);
+		if (rc) {
+			pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
+				pci_name(dev), rc);
+			return -EIO;
+		}
+		msg->address_hi = 0;
+		msg->address_lo = be32_to_cpu(addr32);
+	}
+	msg->data = be32_to_cpu(data);
+
+	return 0;
+}
+
+/*
+ * The msi_free() op is called before irq_domain_free_irqs_top() when
+ * the handler data is still available. Use that to clear the XIVE
+ * controller.
+ */
+static void pnv_msi_ops_msi_free(struct irq_domain *domain,
+				 struct msi_domain_info *info,
+				 unsigned int irq)
+{
+	if (xive_enabled())
+		xive_irq_free_data(irq);
+}
+
+static struct msi_domain_ops pnv_pci_msi_domain_ops = {
+	.msi_free	= pnv_msi_ops_msi_free,
+};
+
+static void pnv_msi_shutdown(struct irq_data *d)
+{
+	d = d->parent_data;
+	if (d->chip->irq_shutdown)
+		d->chip->irq_shutdown(d);
+}
+
+static void pnv_msi_mask(struct irq_data *d)
+{
+	pci_msi_mask_irq(d);
+	irq_chip_mask_parent(d);
+}
+
+static void pnv_msi_unmask(struct irq_data *d)
+{
+	pci_msi_unmask_irq(d);
+	irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip pnv_pci_msi_irq_chip = {
+	.name		= "PNV-PCI-MSI",
+	.irq_shutdown	= pnv_msi_shutdown,
+	.irq_mask	= pnv_msi_mask,
+	.irq_unmask	= pnv_msi_unmask,
+	.irq_eoi	= irq_chip_eoi_parent,
+};
+
+static struct msi_domain_info pnv_msi_domain_info = {
+	.flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		  MSI_FLAG_MULTI_PCI_MSI  | MSI_FLAG_PCI_MSIX),
+	.ops   = &pnv_pci_msi_domain_ops,
+	.chip  = &pnv_pci_msi_irq_chip,
+};
+
+static void pnv_msi_compose_msg(struct irq_data *d, struct msi_msg *msg)
+{
+	struct msi_desc *entry = irq_data_get_msi_desc(d);
+	struct pci_dev *pdev = msi_desc_to_pci_dev(entry);
+	struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+	struct pnv_phb *phb = hose->private_data;
+	int rc;
+
+	rc = __pnv_pci_ioda_msi_setup(phb, pdev, d->hwirq,
+				      entry->pci.msi_attrib.is_64, msg);
+	if (rc)
+		dev_err(&pdev->dev, "Failed to setup %s-bit MSI #%ld : %d\n",
+			entry->pci.msi_attrib.is_64 ? "64" : "32", d->hwirq, rc);
+}
+
+/*
+ * The IRQ data is mapped in the MSI domain in which HW IRQ numbers
+ * correspond to vector numbers.
+ */
+static void pnv_msi_eoi(struct irq_data *d)
+{
+	struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+	struct pnv_phb *phb = hose->private_data;
+
+	if (phb->model == PNV_PHB_MODEL_PHB3) {
+		/*
+		 * The EOI OPAL call takes an OPAL HW IRQ number but
+		 * since it is translated into a vector number in
+		 * OPAL, use that directly.
+		 */
+		WARN_ON_ONCE(opal_pci_msi_eoi(phb->opal_id, d->hwirq));
+	}
+
+	irq_chip_eoi_parent(d);
+}
+
+static struct irq_chip pnv_msi_irq_chip = {
+	.name			= "PNV-MSI",
+	.irq_shutdown		= pnv_msi_shutdown,
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_eoi		= pnv_msi_eoi,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+	.irq_compose_msi_msg	= pnv_msi_compose_msg,
+};
+
+static int pnv_irq_parent_domain_alloc(struct irq_domain *domain,
+				       unsigned int virq, int hwirq)
+{
+	struct irq_fwspec parent_fwspec;
+	int ret;
+
+	parent_fwspec.fwnode = domain->parent->fwnode;
+	parent_fwspec.param_count = 2;
+	parent_fwspec.param[0] = hwirq;
+	parent_fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int pnv_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				unsigned int nr_irqs, void *arg)
+{
+	struct pci_controller *hose = domain->host_data;
+	struct pnv_phb *phb = hose->private_data;
+	msi_alloc_info_t *info = arg;
+	struct pci_dev *pdev = msi_desc_to_pci_dev(info->desc);
+	int hwirq;
+	int i, ret;
+
+	hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, nr_irqs);
+	if (hwirq < 0) {
+		dev_warn(&pdev->dev, "failed to find a free MSI\n");
+		return -ENOSPC;
+	}
+
+	dev_dbg(&pdev->dev, "%s bridge %pOF %d/%x #%d\n", __func__,
+		hose->dn, virq, hwirq, nr_irqs);
+
+	for (i = 0; i < nr_irqs; i++) {
+		ret = pnv_irq_parent_domain_alloc(domain, virq + i,
+						  phb->msi_base + hwirq + i);
+		if (ret)
+			goto out;
+
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &pnv_msi_irq_chip, hose);
+	}
+
+	return 0;
+
+out:
+	irq_domain_free_irqs_parent(domain, virq, i - 1);
+	msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, nr_irqs);
+	return ret;
+}
+
+static void pnv_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+				unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+	struct pnv_phb *phb = hose->private_data;
+
+	pr_debug("%s bridge %pOF %d/%lx #%d\n", __func__, hose->dn,
+		 virq, d->hwirq, nr_irqs);
+
+	msi_bitmap_free_hwirqs(&phb->msi_bmp, d->hwirq, nr_irqs);
+	/* XIVE domain is cleared through ->msi_free() */
+}
+
+static const struct irq_domain_ops pnv_irq_domain_ops = {
+	.alloc  = pnv_irq_domain_alloc,
+	.free   = pnv_irq_domain_free,
+};
+
+static int __init pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int count)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct irq_domain *parent = irq_get_default_host();
+
+	hose->fwnode = irq_domain_alloc_named_id_fwnode("PNV-MSI", phb->opal_id);
+	if (!hose->fwnode)
+		return -ENOMEM;
+
+	hose->dev_domain = irq_domain_create_hierarchy(parent, 0, count,
+						       hose->fwnode,
+						       &pnv_irq_domain_ops, hose);
+	if (!hose->dev_domain) {
+		pr_err("PCI: failed to create IRQ domain bridge %pOF (domain %d)\n",
+		       hose->dn, hose->global_number);
+		irq_domain_free_fwnode(hose->fwnode);
+		return -ENOMEM;
+	}
+
+	hose->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(hose->dn),
+						     &pnv_msi_domain_info,
+						     hose->dev_domain);
+	if (!hose->msi_domain) {
+		pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n",
+		       hose->dn, hose->global_number);
+		irq_domain_free_fwnode(hose->fwnode);
+		irq_domain_remove(hose->dev_domain);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void __init pnv_pci_init_ioda_msis(struct pnv_phb *phb)
+{
+	unsigned int count;
+	const __be32 *prop = of_get_property(phb->hose->dn,
+					     "ibm,opal-msi-ranges", NULL);
+	if (!prop) {
+		/* BML Fallback */
+		prop = of_get_property(phb->hose->dn, "msi-ranges", NULL);
+	}
+	if (!prop)
+		return;
+
+	phb->msi_base = be32_to_cpup(prop);
+	count = be32_to_cpup(prop + 1);
+	if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
+		pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
+		       phb->hose->global_number);
+		return;
+	}
+
+	pr_info("  Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
+		count, phb->msi_base);
+
+	pnv_msi_allocate_domains(phb->hose, count);
+}
+
+static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
+				  struct resource *res)
+{
+	struct pnv_phb *phb = pe->phb;
+	struct pci_bus_region region;
+	int index;
+	int64_t rc;
+
+	if (!res || !res->flags || res->start > res->end ||
+	    res->flags & IORESOURCE_UNSET)
+		return;
+
+	if (res->flags & IORESOURCE_IO) {
+		region.start = res->start - phb->ioda.io_pci_base;
+		region.end   = res->end - phb->ioda.io_pci_base;
+		index = region.start / phb->ioda.io_segsize;
+
+		while (index < phb->ioda.total_pe_num &&
+		       region.start <= region.end) {
+			phb->ioda.io_segmap[index] = pe->pe_number;
+			rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+				pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
+			if (rc != OPAL_SUCCESS) {
+				pr_err("%s: Error %lld mapping IO segment#%d to PE#%x\n",
+				       __func__, rc, index, pe->pe_number);
+				break;
+			}
+
+			region.start += phb->ioda.io_segsize;
+			index++;
+		}
+	} else if ((res->flags & IORESOURCE_MEM) &&
+		   !pnv_pci_is_m64(phb, res)) {
+		region.start = res->start -
+			       phb->hose->mem_offset[0] -
+			       phb->ioda.m32_pci_base;
+		region.end   = res->end -
+			       phb->hose->mem_offset[0] -
+			       phb->ioda.m32_pci_base;
+		index = region.start / phb->ioda.m32_segsize;
+
+		while (index < phb->ioda.total_pe_num &&
+		       region.start <= region.end) {
+			phb->ioda.m32_segmap[index] = pe->pe_number;
+			rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+				pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
+			if (rc != OPAL_SUCCESS) {
+				pr_err("%s: Error %lld mapping M32 segment#%d to PE#%x",
+				       __func__, rc, index, pe->pe_number);
+				break;
+			}
+
+			region.start += phb->ioda.m32_segsize;
+			index++;
+		}
+	}
+}
+
+/*
+ * This function is supposed to be called on basis of PE from top
+ * to bottom style. So the I/O or MMIO segment assigned to
+ * parent PE could be overridden by its child PEs if necessary.
+ */
+static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
+{
+	struct pci_dev *pdev;
+	int i;
+
+	/*
+	 * NOTE: We only care PCI bus based PE for now. For PCI
+	 * device based PE, for example SRIOV sensitive VF should
+	 * be figured out later.
+	 */
+	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
+
+	list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
+		for (i = 0; i <= PCI_ROM_RESOURCE; i++)
+			pnv_ioda_setup_pe_res(pe, &pdev->resource[i]);
+
+		/*
+		 * If the PE contains all subordinate PCI buses, the
+		 * windows of the child bridges should be mapped to
+		 * the PE as well.
+		 */
+		if (!(pe->flags & PNV_IODA_PE_BUS_ALL) || !pci_is_bridge(pdev))
+			continue;
+		for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++)
+			pnv_ioda_setup_pe_res(pe,
+				&pdev->resource[PCI_BRIDGE_RESOURCES + i]);
+	}
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int pnv_pci_diag_data_set(void *data, u64 val)
+{
+	struct pnv_phb *phb = data;
+	s64 ret;
+
+	/* Retrieve the diag data from firmware */
+	ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data,
+					  phb->diag_data_size);
+	if (ret != OPAL_SUCCESS)
+		return -EIO;
+
+	/* Print the diag data to the kernel log */
+	pnv_pci_dump_phb_diag_data(phb->hose, phb->diag_data);
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_diag_data_fops, NULL, pnv_pci_diag_data_set,
+			 "%llu\n");
+
+static int pnv_pci_ioda_pe_dump(void *data, u64 val)
+{
+	struct pnv_phb *phb = data;
+	int pe_num;
+
+	for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) {
+		struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_num];
+
+		if (!test_bit(pe_num, phb->ioda.pe_alloc))
+			continue;
+
+		pe_warn(pe, "rid: %04x dev count: %2d flags: %s%s%s%s%s%s\n",
+			pe->rid, pe->device_count,
+			(pe->flags & PNV_IODA_PE_DEV) ? "dev " : "",
+			(pe->flags & PNV_IODA_PE_BUS) ? "bus " : "",
+			(pe->flags & PNV_IODA_PE_BUS_ALL) ? "all " : "",
+			(pe->flags & PNV_IODA_PE_MASTER) ? "master " : "",
+			(pe->flags & PNV_IODA_PE_SLAVE) ? "slave " : "",
+			(pe->flags & PNV_IODA_PE_VF) ? "vf " : "");
+	}
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_ioda_pe_dump_fops, NULL,
+			 pnv_pci_ioda_pe_dump, "%llu\n");
+
+#endif /* CONFIG_DEBUG_FS */
+
+static void pnv_pci_ioda_create_dbgfs(void)
+{
+#ifdef CONFIG_DEBUG_FS
+	struct pci_controller *hose, *tmp;
+	struct pnv_phb *phb;
+	char name[16];
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		phb = hose->private_data;
+
+		sprintf(name, "PCI%04x", hose->global_number);
+		phb->dbgfs = debugfs_create_dir(name, arch_debugfs_dir);
+
+		debugfs_create_file_unsafe("dump_diag_regs", 0200, phb->dbgfs,
+					   phb, &pnv_pci_diag_data_fops);
+		debugfs_create_file_unsafe("dump_ioda_pe_state", 0200, phb->dbgfs,
+					   phb, &pnv_pci_ioda_pe_dump_fops);
+	}
+#endif /* CONFIG_DEBUG_FS */
+}
+
+static void pnv_pci_enable_bridge(struct pci_bus *bus)
+{
+	struct pci_dev *dev = bus->self;
+	struct pci_bus *child;
+
+	/* Empty bus ? bail */
+	if (list_empty(&bus->devices))
+		return;
+
+	/*
+	 * If there's a bridge associated with that bus enable it. This works
+	 * around races in the generic code if the enabling is done during
+	 * parallel probing. This can be removed once those races have been
+	 * fixed.
+	 */
+	if (dev) {
+		int rc = pci_enable_device(dev);
+		if (rc)
+			pci_err(dev, "Error enabling bridge (%d)\n", rc);
+		pci_set_master(dev);
+	}
+
+	/* Perform the same to child busses */
+	list_for_each_entry(child, &bus->children, node)
+		pnv_pci_enable_bridge(child);
+}
+
+static void pnv_pci_enable_bridges(void)
+{
+	struct pci_controller *hose;
+
+	list_for_each_entry(hose, &hose_list, list_node)
+		pnv_pci_enable_bridge(hose->bus);
+}
+
+static void pnv_pci_ioda_fixup(void)
+{
+	pnv_pci_ioda_create_dbgfs();
+
+	pnv_pci_enable_bridges();
+
+#ifdef CONFIG_EEH
+	pnv_eeh_post_init();
+#endif
+}
+
+/*
+ * Returns the alignment for I/O or memory windows for P2P
+ * bridges. That actually depends on how PEs are segmented.
+ * For now, we return I/O or M32 segment size for PE sensitive
+ * P2P bridges. Otherwise, the default values (4KiB for I/O,
+ * 1MiB for memory) will be returned.
+ *
+ * The current PCI bus might be put into one PE, which was
+ * create against the parent PCI bridge. For that case, we
+ * needn't enlarge the alignment so that we can save some
+ * resources.
+ */
+static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
+						unsigned long type)
+{
+	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
+	int num_pci_bridges = 0;
+	struct pci_dev *bridge;
+
+	bridge = bus->self;
+	while (bridge) {
+		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) {
+			num_pci_bridges++;
+			if (num_pci_bridges >= 2)
+				return 1;
+		}
+
+		bridge = bridge->bus->self;
+	}
+
+	/*
+	 * We fall back to M32 if M64 isn't supported. We enforce the M64
+	 * alignment for any 64-bit resource, PCIe doesn't care and
+	 * bridges only do 64-bit prefetchable anyway.
+	 */
+	if (phb->ioda.m64_segsize && pnv_pci_is_m64_flags(type))
+		return phb->ioda.m64_segsize;
+	if (type & IORESOURCE_MEM)
+		return phb->ioda.m32_segsize;
+
+	return phb->ioda.io_segsize;
+}
+
+/*
+ * We are updating root port or the upstream port of the
+ * bridge behind the root port with PHB's windows in order
+ * to accommodate the changes on required resources during
+ * PCI (slot) hotplug, which is connected to either root
+ * port or the downstream ports of PCIe switch behind the
+ * root port.
+ */
+static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
+					   unsigned long type)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct pnv_phb *phb = hose->private_data;
+	struct pci_dev *bridge = bus->self;
+	struct resource *r, *w;
+	bool msi_region = false;
+	int i;
+
+	/* Check if we need apply fixup to the bridge's windows */
+	if (!pci_is_root_bus(bridge->bus) &&
+	    !pci_is_root_bus(bridge->bus->self->bus))
+		return;
+
+	/* Fixup the resources */
+	for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
+		r = &bridge->resource[PCI_BRIDGE_RESOURCES + i];
+		if (!r->flags || !r->parent)
+			continue;
+
+		w = NULL;
+		if (r->flags & type & IORESOURCE_IO)
+			w = &hose->io_resource;
+		else if (pnv_pci_is_m64(phb, r) &&
+			 (type & IORESOURCE_PREFETCH) &&
+			 phb->ioda.m64_segsize)
+			w = &hose->mem_resources[1];
+		else if (r->flags & type & IORESOURCE_MEM) {
+			w = &hose->mem_resources[0];
+			msi_region = true;
+		}
+
+		r->start = w->start;
+		r->end = w->end;
+
+		/* The 64KB 32-bits MSI region shouldn't be included in
+		 * the 32-bits bridge window. Otherwise, we can see strange
+		 * issues. One of them is EEH error observed on Garrison.
+		 *
+		 * Exclude top 1MB region which is the minimal alignment of
+		 * 32-bits bridge window.
+		 */
+		if (msi_region) {
+			r->end += 0x10000;
+			r->end -= 0x100000;
+		}
+	}
+}
+
+static void pnv_pci_configure_bus(struct pci_bus *bus)
+{
+	struct pci_dev *bridge = bus->self;
+	struct pnv_ioda_pe *pe;
+	bool all = (bridge && pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE);
+
+	dev_info(&bus->dev, "Configuring PE for bus\n");
+
+	/* Don't assign PE to PCI bus, which doesn't have subordinate devices */
+	if (WARN_ON(list_empty(&bus->devices)))
+		return;
+
+	/* Reserve PEs according to used M64 resources */
+	pnv_ioda_reserve_m64_pe(bus, NULL, all);
+
+	/*
+	 * Assign PE. We might run here because of partial hotplug.
+	 * For the case, we just pick up the existing PE and should
+	 * not allocate resources again.
+	 */
+	pe = pnv_ioda_setup_bus_PE(bus, all);
+	if (!pe)
+		return;
+
+	pnv_ioda_setup_pe_seg(pe);
+}
+
+static resource_size_t pnv_pci_default_alignment(void)
+{
+	return PAGE_SIZE;
+}
+
+/* Prevent enabling devices for which we couldn't properly
+ * assign a PE
+ */
+static bool pnv_pci_enable_device_hook(struct pci_dev *dev)
+{
+	struct pci_dn *pdn;
+
+	pdn = pci_get_pdn(dev);
+	if (!pdn || pdn->pe_number == IODA_INVALID_PE) {
+		pci_err(dev, "pci_enable_device() blocked, no PE assigned.\n");
+		return false;
+	}
+
+	return true;
+}
+
+static bool pnv_ocapi_enable_device_hook(struct pci_dev *dev)
+{
+	struct pci_dn *pdn;
+	struct pnv_ioda_pe *pe;
+
+	pdn = pci_get_pdn(dev);
+	if (!pdn)
+		return false;
+
+	if (pdn->pe_number == IODA_INVALID_PE) {
+		pe = pnv_ioda_setup_dev_PE(dev);
+		if (!pe)
+			return false;
+	}
+	return true;
+}
+
+void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
+{
+	struct iommu_table *tbl = pe->table_group.tables[0];
+	int64_t rc;
+
+	if (!pe->dma_setup_done)
+		return;
+
+	rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
+	if (rc)
+		pe_warn(pe, "OPAL error %lld release DMA window\n", rc);
+
+	pnv_pci_ioda2_set_bypass(pe, false);
+	if (pe->table_group.group) {
+		iommu_group_put(pe->table_group.group);
+		WARN_ON(pe->table_group.group);
+	}
+
+	iommu_tce_table_put(tbl);
+}
+
+static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe,
+				 unsigned short win,
+				 unsigned int *map)
+{
+	struct pnv_phb *phb = pe->phb;
+	int idx;
+	int64_t rc;
+
+	for (idx = 0; idx < phb->ioda.total_pe_num; idx++) {
+		if (map[idx] != pe->pe_number)
+			continue;
+
+		rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+				phb->ioda.reserved_pe_idx, win, 0, idx);
+
+		if (rc != OPAL_SUCCESS)
+			pe_warn(pe, "Error %lld unmapping (%d) segment#%d\n",
+				rc, win, idx);
+
+		map[idx] = IODA_INVALID_PE;
+	}
+}
+
+static void pnv_ioda_release_pe_seg(struct pnv_ioda_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb;
+
+	if (phb->type == PNV_PHB_IODA2) {
+		pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE,
+				     phb->ioda.m32_segmap);
+	}
+}
+
+static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb;
+	struct pnv_ioda_pe *slave, *tmp;
+
+	pe_info(pe, "Releasing PE\n");
+
+	mutex_lock(&phb->ioda.pe_list_mutex);
+	list_del(&pe->list);
+	mutex_unlock(&phb->ioda.pe_list_mutex);
+
+	switch (phb->type) {
+	case PNV_PHB_IODA2:
+		pnv_pci_ioda2_release_pe_dma(pe);
+		break;
+	case PNV_PHB_NPU_OCAPI:
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	pnv_ioda_release_pe_seg(pe);
+	pnv_ioda_deconfigure_pe(pe->phb, pe);
+
+	/* Release slave PEs in the compound PE */
+	if (pe->flags & PNV_IODA_PE_MASTER) {
+		list_for_each_entry_safe(slave, tmp, &pe->slaves, list) {
+			list_del(&slave->list);
+			pnv_ioda_free_pe(slave);
+		}
+	}
+
+	/*
+	 * The PE for root bus can be removed because of hotplug in EEH
+	 * recovery for fenced PHB error. We need to mark the PE dead so
+	 * that it can be populated again in PCI hot add path. The PE
+	 * shouldn't be destroyed as it's the global reserved resource.
+	 */
+	if (phb->ioda.root_pe_idx == pe->pe_number)
+		return;
+
+	pnv_ioda_free_pe(pe);
+}
+
+static void pnv_pci_release_device(struct pci_dev *pdev)
+{
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
+	struct pci_dn *pdn = pci_get_pdn(pdev);
+	struct pnv_ioda_pe *pe;
+
+	/* The VF PE state is torn down when sriov_disable() is called */
+	if (pdev->is_virtfn)
+		return;
+
+	if (!pdn || pdn->pe_number == IODA_INVALID_PE)
+		return;
+
+#ifdef CONFIG_PCI_IOV
+	/*
+	 * FIXME: Try move this to sriov_disable(). It's here since we allocate
+	 * the iov state at probe time since we need to fiddle with the IOV
+	 * resources.
+	 */
+	if (pdev->is_physfn)
+		kfree(pdev->dev.archdata.iov_data);
+#endif
+
+	/*
+	 * PCI hotplug can happen as part of EEH error recovery. The @pdn
+	 * isn't removed and added afterwards in this scenario. We should
+	 * set the PE number in @pdn to an invalid one. Otherwise, the PE's
+	 * device count is decreased on removing devices while failing to
+	 * be increased on adding devices. It leads to unbalanced PE's device
+	 * count and eventually make normal PCI hotplug path broken.
+	 */
+	pe = &phb->ioda.pe_array[pdn->pe_number];
+	pdn->pe_number = IODA_INVALID_PE;
+
+	WARN_ON(--pe->device_count < 0);
+	if (pe->device_count == 0)
+		pnv_ioda_release_pe(pe);
+}
+
+static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
+{
+	struct pnv_phb *phb = hose->private_data;
+
+	opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE,
+		       OPAL_ASSERT_RESET);
+}
+
+static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus)
+{
+	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
+	struct pnv_ioda_pe *pe;
+
+	list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+		if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)))
+			continue;
+
+		if (!pe->pbus)
+			continue;
+
+		if (bus->number == ((pe->rid >> 8) & 0xFF)) {
+			pe->pbus = bus;
+			break;
+		}
+	}
+}
+
+#ifdef CONFIG_IOMMU_API
+static struct iommu_group *pnv_pci_device_group(struct pci_controller *hose,
+						struct pci_dev *pdev)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct pnv_ioda_pe *pe;
+
+	if (WARN_ON(!phb))
+		return ERR_PTR(-ENODEV);
+
+	pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev));
+	if (!pe)
+		return ERR_PTR(-ENODEV);
+
+	if (!pe->table_group.group)
+		return ERR_PTR(-ENODEV);
+
+	return iommu_group_ref_get(pe->table_group.group);
+}
+#endif
+
+static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
+	.dma_dev_setup		= pnv_pci_ioda_dma_dev_setup,
+	.dma_bus_setup		= pnv_pci_ioda_dma_bus_setup,
+	.iommu_bypass_supported	= pnv_pci_ioda_iommu_bypass_supported,
+	.enable_device_hook	= pnv_pci_enable_device_hook,
+	.release_device		= pnv_pci_release_device,
+	.window_alignment	= pnv_pci_window_alignment,
+	.setup_bridge		= pnv_pci_fixup_bridge_resources,
+	.reset_secondary_bus	= pnv_pci_reset_secondary_bus,
+	.shutdown		= pnv_pci_ioda_shutdown,
+#ifdef CONFIG_IOMMU_API
+	.device_group		= pnv_pci_device_group,
+#endif
+};
+
+static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
+	.enable_device_hook	= pnv_ocapi_enable_device_hook,
+	.release_device		= pnv_pci_release_device,
+	.window_alignment	= pnv_pci_window_alignment,
+	.reset_secondary_bus	= pnv_pci_reset_secondary_bus,
+	.shutdown		= pnv_pci_ioda_shutdown,
+};
+
+static void __init pnv_pci_init_ioda_phb(struct device_node *np,
+					 u64 hub_id, int ioda_type)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	unsigned long size, m64map_off, m32map_off, pemap_off;
+	struct pnv_ioda_pe *root_pe;
+	struct resource r;
+	const __be64 *prop64;
+	const __be32 *prop32;
+	int len;
+	unsigned int segno;
+	u64 phb_id;
+	void *aux;
+	long rc;
+
+	if (!of_device_is_available(np))
+		return;
+
+	pr_info("Initializing %s PHB (%pOF)\n",	pnv_phb_names[ioda_type], np);
+
+	prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
+	if (!prop64) {
+		pr_err("  Missing \"ibm,opal-phbid\" property !\n");
+		return;
+	}
+	phb_id = be64_to_cpup(prop64);
+	pr_debug("  PHB-ID  : 0x%016llx\n", phb_id);
+
+	phb = kzalloc(sizeof(*phb), GFP_KERNEL);
+	if (!phb)
+		panic("%s: Failed to allocate %zu bytes\n", __func__,
+		      sizeof(*phb));
+
+	/* Allocate PCI controller */
+	phb->hose = hose = pcibios_alloc_controller(np);
+	if (!phb->hose) {
+		pr_err("  Can't allocate PCI controller for %pOF\n",
+		       np);
+		memblock_free(phb, sizeof(struct pnv_phb));
+		return;
+	}
+
+	spin_lock_init(&phb->lock);
+	prop32 = of_get_property(np, "bus-range", &len);
+	if (prop32 && len == 8) {
+		hose->first_busno = be32_to_cpu(prop32[0]);
+		hose->last_busno = be32_to_cpu(prop32[1]);
+	} else {
+		pr_warn("  Broken <bus-range> on %pOF\n", np);
+		hose->first_busno = 0;
+		hose->last_busno = 0xff;
+	}
+	hose->private_data = phb;
+	phb->hub_id = hub_id;
+	phb->opal_id = phb_id;
+	phb->type = ioda_type;
+	mutex_init(&phb->ioda.pe_alloc_mutex);
+
+	/* Detect specific models for error handling */
+	if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
+		phb->model = PNV_PHB_MODEL_P7IOC;
+	else if (of_device_is_compatible(np, "ibm,power8-pciex"))
+		phb->model = PNV_PHB_MODEL_PHB3;
+	else
+		phb->model = PNV_PHB_MODEL_UNKNOWN;
+
+	/* Initialize diagnostic data buffer */
+	prop32 = of_get_property(np, "ibm,phb-diag-data-size", NULL);
+	if (prop32)
+		phb->diag_data_size = be32_to_cpup(prop32);
+	else
+		phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE;
+
+	phb->diag_data = kzalloc(phb->diag_data_size, GFP_KERNEL);
+	if (!phb->diag_data)
+		panic("%s: Failed to allocate %u bytes\n", __func__,
+		      phb->diag_data_size);
+
+	/* Parse 32-bit and IO ranges (if any) */
+	pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
+
+	/* Get registers */
+	if (!of_address_to_resource(np, 0, &r)) {
+		phb->regs_phys = r.start;
+		phb->regs = ioremap(r.start, resource_size(&r));
+		if (phb->regs == NULL)
+			pr_err("  Failed to map registers !\n");
+	}
+
+	/* Initialize more IODA stuff */
+	phb->ioda.total_pe_num = 1;
+	prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
+	if (prop32)
+		phb->ioda.total_pe_num = be32_to_cpup(prop32);
+	prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
+	if (prop32)
+		phb->ioda.reserved_pe_idx = be32_to_cpup(prop32);
+
+	/* Invalidate RID to PE# mapping */
+	for (segno = 0; segno < ARRAY_SIZE(phb->ioda.pe_rmap); segno++)
+		phb->ioda.pe_rmap[segno] = IODA_INVALID_PE;
+
+	/* Parse 64-bit MMIO range */
+	pnv_ioda_parse_m64_window(phb);
+
+	phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
+	/* FW Has already off top 64k of M32 space (MSI space) */
+	phb->ioda.m32_size += 0x10000;
+
+	phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe_num;
+	phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0];
+	phb->ioda.io_size = hose->pci_io_size;
+	phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe_num;
+	phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
+
+	/* Allocate aux data & arrays. We don't have IO ports on PHB3 */
+	size = ALIGN(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8,
+			sizeof(unsigned long));
+	m64map_off = size;
+	size += phb->ioda.total_pe_num * sizeof(phb->ioda.m64_segmap[0]);
+	m32map_off = size;
+	size += phb->ioda.total_pe_num * sizeof(phb->ioda.m32_segmap[0]);
+	pemap_off = size;
+	size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe);
+	aux = kzalloc(size, GFP_KERNEL);
+	if (!aux)
+		panic("%s: Failed to allocate %lu bytes\n", __func__, size);
+
+	phb->ioda.pe_alloc = aux;
+	phb->ioda.m64_segmap = aux + m64map_off;
+	phb->ioda.m32_segmap = aux + m32map_off;
+	for (segno = 0; segno < phb->ioda.total_pe_num; segno++) {
+		phb->ioda.m64_segmap[segno] = IODA_INVALID_PE;
+		phb->ioda.m32_segmap[segno] = IODA_INVALID_PE;
+	}
+	phb->ioda.pe_array = aux + pemap_off;
+
+	/*
+	 * Choose PE number for root bus, which shouldn't have
+	 * M64 resources consumed by its child devices. To pick
+	 * the PE number adjacent to the reserved one if possible.
+	 */
+	pnv_ioda_reserve_pe(phb, phb->ioda.reserved_pe_idx);
+	if (phb->ioda.reserved_pe_idx == 0) {
+		phb->ioda.root_pe_idx = 1;
+		pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx);
+	} else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) {
+		phb->ioda.root_pe_idx = phb->ioda.reserved_pe_idx - 1;
+		pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx);
+	} else {
+		/* otherwise just allocate one */
+		root_pe = pnv_ioda_alloc_pe(phb, 1);
+		phb->ioda.root_pe_idx = root_pe->pe_number;
+	}
+
+	INIT_LIST_HEAD(&phb->ioda.pe_list);
+	mutex_init(&phb->ioda.pe_list_mutex);
+
+#if 0 /* We should really do that ... */
+	rc = opal_pci_set_phb_mem_window(opal->phb_id,
+					 window_type,
+					 window_num,
+					 starting_real_address,
+					 starting_pci_address,
+					 segment_size);
+#endif
+
+	pr_info("  %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n",
+		phb->ioda.total_pe_num, phb->ioda.reserved_pe_idx,
+		phb->ioda.m32_size, phb->ioda.m32_segsize);
+	if (phb->ioda.m64_size)
+		pr_info("                 M64: 0x%lx [segment=0x%lx]\n",
+			phb->ioda.m64_size, phb->ioda.m64_segsize);
+	if (phb->ioda.io_size)
+		pr_info("                  IO: 0x%x [segment=0x%x]\n",
+			phb->ioda.io_size, phb->ioda.io_segsize);
+
+
+	phb->hose->ops = &pnv_pci_ops;
+	phb->get_pe_state = pnv_ioda_get_pe_state;
+	phb->freeze_pe = pnv_ioda_freeze_pe;
+	phb->unfreeze_pe = pnv_ioda_unfreeze_pe;
+
+	/* Setup MSI support */
+	pnv_pci_init_ioda_msis(phb);
+
+	/*
+	 * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here
+	 * to let the PCI core do resource assignment. It's supposed
+	 * that the PCI core will do correct I/O and MMIO alignment
+	 * for the P2P bridge bars so that each PCI bus (excluding
+	 * the child P2P bridges) can form individual PE.
+	 */
+	ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
+
+	switch (phb->type) {
+	case PNV_PHB_NPU_OCAPI:
+		hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops;
+		break;
+	default:
+		hose->controller_ops = pnv_pci_ioda_controller_ops;
+	}
+
+	ppc_md.pcibios_default_alignment = pnv_pci_default_alignment;
+
+#ifdef CONFIG_PCI_IOV
+	ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov;
+	ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment;
+	ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable;
+	ppc_md.pcibios_sriov_disable = pnv_pcibios_sriov_disable;
+#endif
+
+	pci_add_flags(PCI_REASSIGN_ALL_RSRC);
+
+	/* Reset IODA tables to a clean state */
+	rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET);
+	if (rc)
+		pr_warn("  OPAL Error %ld performing IODA table reset !\n", rc);
+
+	/*
+	 * If we're running in kdump kernel, the previous kernel never
+	 * shutdown PCI devices correctly. We already got IODA table
+	 * cleaned out. So we have to issue PHB reset to stop all PCI
+	 * transactions from previous kernel. The ppc_pci_reset_phbs
+	 * kernel parameter will force this reset too. Additionally,
+	 * if the IODA reset above failed then use a bigger hammer.
+	 * This can happen if we get a PHB fatal error in very early
+	 * boot.
+	 */
+	if (is_kdump_kernel() || pci_reset_phbs || rc) {
+		pr_info("  Issue PHB reset ...\n");
+		pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
+		pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
+	}
+
+	/* Remove M64 resource if we can't configure it successfully */
+	if (!phb->init_m64 || phb->init_m64(phb))
+		hose->mem_resources[1].flags = 0;
+
+	/* create pci_dn's for DT nodes under this PHB */
+	pci_devs_phb_init_dynamic(hose);
+}
+
+void __init pnv_pci_init_ioda2_phb(struct device_node *np)
+{
+	pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
+}
+
+void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np)
+{
+	pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_OCAPI);
+}
+
+static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev)
+{
+	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
+
+	if (!machine_is(powernv))
+		return;
+
+	if (phb->type == PNV_PHB_NPU_OCAPI)
+		dev->cfg_size = PCI_CFG_SPACE_EXP_SIZE;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pnv_npu2_opencapi_cfg_size_fixup);
diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
new file mode 100644
index 0000000000..59882da3e7
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -0,0 +1,760 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/kernel.h>
+#include <linux/ioport.h>
+#include <linux/bitmap.h>
+#include <linux/pci.h>
+
+#include <asm/opal.h>
+
+#include "pci.h"
+
+/*
+ * The majority of the complexity in supporting SR-IOV on PowerNV comes from
+ * the need to put the MMIO space for each VF into a separate PE. Internally
+ * the PHB maps MMIO addresses to a specific PE using the "Memory BAR Table".
+ * The MBT historically only applied to the 64bit MMIO window of the PHB
+ * so it's common to see it referred to as the "M64BT".
+ *
+ * An MBT entry stores the mapped range as an <base>,<mask> pair. This forces
+ * the address range that we want to map to be power-of-two sized and aligned.
+ * For conventional PCI devices this isn't really an issue since PCI device BARs
+ * have the same requirement.
+ *
+ * For a SR-IOV BAR things are a little more awkward since size and alignment
+ * are not coupled. The alignment is set based on the per-VF BAR size, but
+ * the total BAR area is: number-of-vfs * per-vf-size. The number of VFs
+ * isn't necessarily a power of two, so neither is the total size. To fix that
+ * we need to finesse (read: hack) the Linux BAR allocator so that it will
+ * allocate the SR-IOV BARs in a way that lets us map them using the MBT.
+ *
+ * The changes to size and alignment that we need to do depend on the "mode"
+ * of MBT entry that we use. We only support SR-IOV on PHB3 (IODA2) and above,
+ * so as a baseline we can assume that we have the following BAR modes
+ * available:
+ *
+ *   NB: $PE_COUNT is the number of PEs that the PHB supports.
+ *
+ * a) A segmented BAR that splits the mapped range into $PE_COUNT equally sized
+ *    segments. The n'th segment is mapped to the n'th PE.
+ * b) An un-segmented BAR that maps the whole address range to a specific PE.
+ *
+ *
+ * We prefer to use mode a) since it only requires one MBT entry per SR-IOV BAR
+ * For comparison b) requires one entry per-VF per-BAR, or:
+ * (num-vfs * num-sriov-bars) in total. To use a) we need the size of each segment
+ * to equal the size of the per-VF BAR area. So:
+ *
+ *	new_size = per-vf-size * number-of-PEs
+ *
+ * The alignment for the SR-IOV BAR also needs to be changed from per-vf-size
+ * to "new_size", calculated above. Implementing this is a convoluted process
+ * which requires several hooks in the PCI core:
+ *
+ * 1. In pcibios_device_add() we call pnv_pci_ioda_fixup_iov().
+ *
+ *    At this point the device has been probed and the device's BARs are sized,
+ *    but no resource allocations have been done. The SR-IOV BARs are sized
+ *    based on the maximum number of VFs supported by the device and we need
+ *    to increase that to new_size.
+ *
+ * 2. Later, when Linux actually assigns resources it tries to make the resource
+ *    allocations for each PCI bus as compact as possible. As a part of that it
+ *    sorts the BARs on a bus by their required alignment, which is calculated
+ *    using pci_resource_alignment().
+ *
+ *    For IOV resources this goes:
+ *    pci_resource_alignment()
+ *        pci_sriov_resource_alignment()
+ *            pcibios_sriov_resource_alignment()
+ *                pnv_pci_iov_resource_alignment()
+ *
+ *    Our hook overrides the default alignment, equal to the per-vf-size, with
+ *    new_size computed above.
+ *
+ * 3. When userspace enables VFs for a device:
+ *
+ *    sriov_enable()
+ *       pcibios_sriov_enable()
+ *           pnv_pcibios_sriov_enable()
+ *
+ *    This is where we actually allocate PE numbers for each VF and setup the
+ *    MBT mapping for each SR-IOV BAR. In steps 1) and 2) we setup an "arena"
+ *    where each MBT segment is equal in size to the VF BAR so we can shift
+ *    around the actual SR-IOV BAR location within this arena. We need this
+ *    ability because the PE space is shared by all devices on the same PHB.
+ *    When using mode a) described above segment 0 in maps to PE#0 which might
+ *    be already being used by another device on the PHB.
+ *
+ *    As a result we need allocate a contigious range of PE numbers, then shift
+ *    the address programmed into the SR-IOV BAR of the PF so that the address
+ *    of VF0 matches up with the segment corresponding to the first allocated
+ *    PE number. This is handled in pnv_pci_vf_resource_shift().
+ *
+ *    Once all that is done we return to the PCI core which then enables VFs,
+ *    scans them and creates pci_devs for each. The init process for a VF is
+ *    largely the same as a normal device, but the VF is inserted into the IODA
+ *    PE that we allocated for it rather than the PE associated with the bus.
+ *
+ * 4. When userspace disables VFs we unwind the above in
+ *    pnv_pcibios_sriov_disable(). Fortunately this is relatively simple since
+ *    we don't need to validate anything, just tear down the mappings and
+ *    move SR-IOV resource back to its "proper" location.
+ *
+ * That's how mode a) works. In theory mode b) (single PE mapping) is less work
+ * since we can map each individual VF with a separate BAR. However, there's a
+ * few limitations:
+ *
+ * 1) For IODA2 mode b) has a minimum alignment requirement of 32MB. This makes
+ *    it only usable for devices with very large per-VF BARs. Such devices are
+ *    similar to Big Foot. They definitely exist, but I've never seen one.
+ *
+ * 2) The number of MBT entries that we have is limited. PHB3 and PHB4 only
+ *    16 total and some are needed for. Most SR-IOV capable network cards can support
+ *    more than 16 VFs on each port.
+ *
+ * We use b) when using a) would use more than 1/4 of the entire 64 bit MMIO
+ * window of the PHB.
+ *
+ *
+ *
+ * PHB4 (IODA3) added a few new features that would be useful for SR-IOV. It
+ * allowed the MBT to map 32bit MMIO space in addition to 64bit which allows
+ * us to support SR-IOV BARs in the 32bit MMIO window. This is useful since
+ * the Linux BAR allocation will place any BAR marked as non-prefetchable into
+ * the non-prefetchable bridge window, which is 32bit only. It also added two
+ * new modes:
+ *
+ * c) A segmented BAR similar to a), but each segment can be individually
+ *    mapped to any PE. This is matches how the 32bit MMIO window worked on
+ *    IODA1&2.
+ *
+ * d) A segmented BAR with 8, 64, or 128 segments. This works similarly to a),
+ *    but with fewer segments and configurable base PE.
+ *
+ *    i.e. The n'th segment maps to the (n + base)'th PE.
+ *
+ *    The base PE is also required to be a multiple of the window size.
+ *
+ * Unfortunately, the OPAL API doesn't currently (as of skiboot v6.6) allow us
+ * to exploit any of the IODA3 features.
+ */
+
+static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
+{
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
+	struct resource *res;
+	int i;
+	resource_size_t vf_bar_sz;
+	struct pnv_iov_data *iov;
+	int mul;
+
+	iov = kzalloc(sizeof(*iov), GFP_KERNEL);
+	if (!iov)
+		goto disable_iov;
+	pdev->dev.archdata.iov_data = iov;
+	mul = phb->ioda.total_pe_num;
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || res->parent)
+			continue;
+		if (!pnv_pci_is_m64_flags(res->flags)) {
+			dev_warn(&pdev->dev, "Don't support SR-IOV with non M64 VF BAR%d: %pR. \n",
+				 i, res);
+			goto disable_iov;
+		}
+
+		vf_bar_sz = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+
+		/*
+		 * Generally, one segmented M64 BAR maps one IOV BAR. However,
+		 * if a VF BAR is too large we end up wasting a lot of space.
+		 * If each VF needs more than 1/4 of the default m64 segment
+		 * then each VF BAR should be mapped in single-PE mode to reduce
+		 * the amount of space required. This does however limit the
+		 * number of VFs we can support.
+		 *
+		 * The 1/4 limit is arbitrary and can be tweaked.
+		 */
+		if (vf_bar_sz > (phb->ioda.m64_segsize >> 2)) {
+			/*
+			 * On PHB3, the minimum size alignment of M64 BAR in
+			 * single mode is 32MB. If this VF BAR is smaller than
+			 * 32MB, but still too large for a segmented window
+			 * then we can't map it and need to disable SR-IOV for
+			 * this device.
+			 */
+			if (vf_bar_sz < SZ_32M) {
+				pci_err(pdev, "VF BAR%d: %pR can't be mapped in single PE mode\n",
+					i, res);
+				goto disable_iov;
+			}
+
+			iov->m64_single_mode[i] = true;
+			continue;
+		}
+
+		/*
+		 * This BAR can be mapped with one segmented window, so adjust
+		 * te resource size to accommodate.
+		 */
+		pci_dbg(pdev, " Fixing VF BAR%d: %pR to\n", i, res);
+		res->end = res->start + vf_bar_sz * mul - 1;
+		pci_dbg(pdev, "                       %pR\n", res);
+
+		pci_info(pdev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
+			 i, res, mul);
+
+		iov->need_shift = true;
+	}
+
+	return;
+
+disable_iov:
+	/* Save ourselves some MMIO space by disabling the unusable BARs */
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		res->flags = 0;
+		res->end = res->start - 1;
+	}
+
+	pdev->dev.archdata.iov_data = NULL;
+	kfree(iov);
+}
+
+void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev)
+{
+	if (pdev->is_virtfn) {
+		struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev);
+
+		/*
+		 * VF PEs are single-device PEs so their pdev pointer needs to
+		 * be set. The pdev doesn't exist when the PE is allocated (in
+		 * (pcibios_sriov_enable()) so we fix it up here.
+		 */
+		pe->pdev = pdev;
+		WARN_ON(!(pe->flags & PNV_IODA_PE_VF));
+	} else if (pdev->is_physfn) {
+		/*
+		 * For PFs adjust their allocated IOV resources to match what
+		 * the PHB can support using it's M64 BAR table.
+		 */
+		pnv_pci_ioda_fixup_iov_resources(pdev);
+	}
+}
+
+resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
+						      int resno)
+{
+	resource_size_t align = pci_iov_resource_size(pdev, resno);
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
+	struct pnv_iov_data *iov = pnv_iov_get(pdev);
+
+	/*
+	 * iov can be null if we have an SR-IOV device with IOV BAR that can't
+	 * be placed in the m64 space (i.e. The BAR is 32bit or non-prefetch).
+	 * In that case we don't allow VFs to be enabled since one of their
+	 * BARs would not be placed in the correct PE.
+	 */
+	if (!iov)
+		return align;
+
+	/*
+	 * If we're using single mode then we can just use the native VF BAR
+	 * alignment. We validated that it's possible to use a single PE
+	 * window above when we did the fixup.
+	 */
+	if (iov->m64_single_mode[resno - PCI_IOV_RESOURCES])
+		return align;
+
+	/*
+	 * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
+	 * SR-IOV. While from hardware perspective, the range mapped by M64
+	 * BAR should be size aligned.
+	 *
+	 * This function returns the total IOV BAR size if M64 BAR is in
+	 * Shared PE mode or just VF BAR size if not.
+	 * If the M64 BAR is in Single PE mode, return the VF BAR size or
+	 * M64 segment size if IOV BAR size is less.
+	 */
+	return phb->ioda.total_pe_num * align;
+}
+
+static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pnv_iov_data   *iov;
+	struct pnv_phb        *phb;
+	int window_id;
+
+	phb = pci_bus_to_pnvhb(pdev->bus);
+	iov = pnv_iov_get(pdev);
+
+	for_each_set_bit(window_id, iov->used_m64_bar_mask, MAX_M64_BARS) {
+		opal_pci_phb_mmio_enable(phb->opal_id,
+					 OPAL_M64_WINDOW_TYPE,
+					 window_id,
+					 0);
+
+		clear_bit(window_id, &phb->ioda.m64_bar_alloc);
+	}
+
+	return 0;
+}
+
+
+/*
+ * PHB3 and beyond support segmented windows. The window's address range
+ * is subdivided into phb->ioda.total_pe_num segments and there's a 1-1
+ * mapping between PEs and segments.
+ */
+static int64_t pnv_ioda_map_m64_segmented(struct pnv_phb *phb,
+					  int window_id,
+					  resource_size_t start,
+					  resource_size_t size)
+{
+	int64_t rc;
+
+	rc = opal_pci_set_phb_mem_window(phb->opal_id,
+					 OPAL_M64_WINDOW_TYPE,
+					 window_id,
+					 start,
+					 0, /* unused */
+					 size);
+	if (rc)
+		goto out;
+
+	rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				      OPAL_M64_WINDOW_TYPE,
+				      window_id,
+				      OPAL_ENABLE_M64_SPLIT);
+out:
+	if (rc)
+		pr_err("Failed to map M64 window #%d: %lld\n", window_id, rc);
+
+	return rc;
+}
+
+static int64_t pnv_ioda_map_m64_single(struct pnv_phb *phb,
+				       int pe_num,
+				       int window_id,
+				       resource_size_t start,
+				       resource_size_t size)
+{
+	int64_t rc;
+
+	/*
+	 * The API for setting up m64 mmio windows seems to have been designed
+	 * with P7-IOC in mind. For that chip each M64 BAR (window) had a fixed
+	 * split of 8 equally sized segments each of which could individually
+	 * assigned to a PE.
+	 *
+	 * The problem with this is that the API doesn't have any way to
+	 * communicate the number of segments we want on a BAR. This wasn't
+	 * a problem for p7-ioc since you didn't have a choice, but the
+	 * single PE windows added in PHB3 don't map cleanly to this API.
+	 *
+	 * As a result we've got this slightly awkward process where we
+	 * call opal_pci_map_pe_mmio_window() to put the single in single
+	 * PE mode, and set the PE for the window before setting the address
+	 * bounds. We need to do it this way because the single PE windows
+	 * for PHB3 have different alignment requirements on PHB3.
+	 */
+	rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+					 pe_num,
+					 OPAL_M64_WINDOW_TYPE,
+					 window_id,
+					 0);
+	if (rc)
+		goto out;
+
+	/*
+	 * NB: In single PE mode the window needs to be aligned to 32MB
+	 */
+	rc = opal_pci_set_phb_mem_window(phb->opal_id,
+					 OPAL_M64_WINDOW_TYPE,
+					 window_id,
+					 start,
+					 0, /* ignored by FW, m64 is 1-1 */
+					 size);
+	if (rc)
+		goto out;
+
+	/*
+	 * Now actually enable it. We specified the BAR should be in "non-split"
+	 * mode so FW will validate that the BAR is in single PE mode.
+	 */
+	rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				      OPAL_M64_WINDOW_TYPE,
+				      window_id,
+				      OPAL_ENABLE_M64_NON_SPLIT);
+out:
+	if (rc)
+		pr_err("Error mapping single PE BAR\n");
+
+	return rc;
+}
+
+static int pnv_pci_alloc_m64_bar(struct pnv_phb *phb, struct pnv_iov_data *iov)
+{
+	int win;
+
+	do {
+		win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
+				phb->ioda.m64_bar_idx + 1, 0);
+
+		if (win >= phb->ioda.m64_bar_idx + 1)
+			return -1;
+	} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
+
+	set_bit(win, iov->used_m64_bar_mask);
+
+	return win;
+}
+
+static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pnv_iov_data   *iov;
+	struct pnv_phb        *phb;
+	int                    win;
+	struct resource       *res;
+	int                    i, j;
+	int64_t                rc;
+	resource_size_t        size, start;
+	int                    base_pe_num;
+
+	phb = pci_bus_to_pnvhb(pdev->bus);
+	iov = pnv_iov_get(pdev);
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || !res->parent)
+			continue;
+
+		/* don't need single mode? map everything in one go! */
+		if (!iov->m64_single_mode[i]) {
+			win = pnv_pci_alloc_m64_bar(phb, iov);
+			if (win < 0)
+				goto m64_failed;
+
+			size = resource_size(res);
+			start = res->start;
+
+			rc = pnv_ioda_map_m64_segmented(phb, win, start, size);
+			if (rc)
+				goto m64_failed;
+
+			continue;
+		}
+
+		/* otherwise map each VF with single PE BARs */
+		size = pci_iov_resource_size(pdev, PCI_IOV_RESOURCES + i);
+		base_pe_num = iov->vf_pe_arr[0].pe_number;
+
+		for (j = 0; j < num_vfs; j++) {
+			win = pnv_pci_alloc_m64_bar(phb, iov);
+			if (win < 0)
+				goto m64_failed;
+
+			start = res->start + size * j;
+			rc = pnv_ioda_map_m64_single(phb, win,
+						     base_pe_num + j,
+						     start,
+						     size);
+			if (rc)
+				goto m64_failed;
+		}
+	}
+	return 0;
+
+m64_failed:
+	pnv_pci_vf_release_m64(pdev, num_vfs);
+	return -EBUSY;
+}
+
+static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
+{
+	struct pnv_phb        *phb;
+	struct pnv_ioda_pe    *pe, *pe_n;
+
+	phb = pci_bus_to_pnvhb(pdev->bus);
+
+	if (!pdev->is_physfn)
+		return;
+
+	/* FIXME: Use pnv_ioda_release_pe()? */
+	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
+		if (pe->parent_dev != pdev)
+			continue;
+
+		pnv_pci_ioda2_release_pe_dma(pe);
+
+		/* Remove from list */
+		mutex_lock(&phb->ioda.pe_list_mutex);
+		list_del(&pe->list);
+		mutex_unlock(&phb->ioda.pe_list_mutex);
+
+		pnv_ioda_deconfigure_pe(phb, pe);
+
+		pnv_ioda_free_pe(pe);
+	}
+}
+
+static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
+{
+	struct resource *res, res2;
+	struct pnv_iov_data *iov;
+	resource_size_t size;
+	u16 num_vfs;
+	int i;
+
+	if (!dev->is_physfn)
+		return -EINVAL;
+	iov = pnv_iov_get(dev);
+
+	/*
+	 * "offset" is in VFs.  The M64 windows are sized so that when they
+	 * are segmented, each segment is the same size as the IOV BAR.
+	 * Each segment is in a separate PE, and the high order bits of the
+	 * address are the PE number.  Therefore, each VF's BAR is in a
+	 * separate PE, and changing the IOV BAR start address changes the
+	 * range of PEs the VFs are in.
+	 */
+	num_vfs = iov->num_vfs;
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || !res->parent)
+			continue;
+		if (iov->m64_single_mode[i])
+			continue;
+
+		/*
+		 * The actual IOV BAR range is determined by the start address
+		 * and the actual size for num_vfs VFs BAR.  This check is to
+		 * make sure that after shifting, the range will not overlap
+		 * with another device.
+		 */
+		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+		res2.flags = res->flags;
+		res2.start = res->start + (size * offset);
+		res2.end = res2.start + (size * num_vfs) - 1;
+
+		if (res2.end > res->end) {
+			dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n",
+				i, &res2, res, num_vfs, offset);
+			return -EBUSY;
+		}
+	}
+
+	/*
+	 * Since M64 BAR shares segments among all possible 256 PEs,
+	 * we have to shift the beginning of PF IOV BAR to make it start from
+	 * the segment which belongs to the PE number assigned to the first VF.
+	 * This creates a "hole" in the /proc/iomem which could be used for
+	 * allocating other resources so we reserve this area below and
+	 * release when IOV is released.
+	 */
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || !res->parent)
+			continue;
+		if (iov->m64_single_mode[i])
+			continue;
+
+		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+		res2 = *res;
+		res->start += size * offset;
+
+		dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n",
+			 i, &res2, res, (offset > 0) ? "En" : "Dis",
+			 num_vfs, offset);
+
+		if (offset < 0) {
+			devm_release_resource(&dev->dev, &iov->holes[i]);
+			memset(&iov->holes[i], 0, sizeof(iov->holes[i]));
+		}
+
+		pci_update_resource(dev, i + PCI_IOV_RESOURCES);
+
+		if (offset > 0) {
+			iov->holes[i].start = res2.start;
+			iov->holes[i].end = res2.start + size * offset - 1;
+			iov->holes[i].flags = IORESOURCE_BUS;
+			iov->holes[i].name = "pnv_iov_reserved";
+			devm_request_resource(&dev->dev, res->parent,
+					&iov->holes[i]);
+		}
+	}
+	return 0;
+}
+
+static void pnv_pci_sriov_disable(struct pci_dev *pdev)
+{
+	u16                    num_vfs, base_pe;
+	struct pnv_iov_data   *iov;
+
+	iov = pnv_iov_get(pdev);
+	if (WARN_ON(!iov))
+		return;
+
+	num_vfs = iov->num_vfs;
+	base_pe = iov->vf_pe_arr[0].pe_number;
+
+	/* Release VF PEs */
+	pnv_ioda_release_vf_PE(pdev);
+
+	/* Un-shift the IOV BARs if we need to */
+	if (iov->need_shift)
+		pnv_pci_vf_resource_shift(pdev, -base_pe);
+
+	/* Release M64 windows */
+	pnv_pci_vf_release_m64(pdev, num_vfs);
+}
+
+static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pnv_phb        *phb;
+	struct pnv_ioda_pe    *pe;
+	int                    pe_num;
+	u16                    vf_index;
+	struct pnv_iov_data   *iov;
+	struct pci_dn         *pdn;
+
+	if (!pdev->is_physfn)
+		return;
+
+	phb = pci_bus_to_pnvhb(pdev->bus);
+	pdn = pci_get_pdn(pdev);
+	iov = pnv_iov_get(pdev);
+
+	/* Reserve PE for each VF */
+	for (vf_index = 0; vf_index < num_vfs; vf_index++) {
+		int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index);
+		int vf_bus = pci_iov_virtfn_bus(pdev, vf_index);
+		struct pci_dn *vf_pdn;
+
+		pe = &iov->vf_pe_arr[vf_index];
+		pe->phb = phb;
+		pe->flags = PNV_IODA_PE_VF;
+		pe->pbus = NULL;
+		pe->parent_dev = pdev;
+		pe->mve_number = -1;
+		pe->rid = (vf_bus << 8) | vf_devfn;
+
+		pe_num = pe->pe_number;
+		pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
+			pci_domain_nr(pdev->bus), pdev->bus->number,
+			PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num);
+
+		if (pnv_ioda_configure_pe(phb, pe)) {
+			/* XXX What do we do here ? */
+			pnv_ioda_free_pe(pe);
+			pe->pdev = NULL;
+			continue;
+		}
+
+		/* Put PE to the list */
+		mutex_lock(&phb->ioda.pe_list_mutex);
+		list_add_tail(&pe->list, &phb->ioda.pe_list);
+		mutex_unlock(&phb->ioda.pe_list_mutex);
+
+		/* associate this pe to it's pdn */
+		list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) {
+			if (vf_pdn->busno == vf_bus &&
+			    vf_pdn->devfn == vf_devfn) {
+				vf_pdn->pe_number = pe_num;
+				break;
+			}
+		}
+
+		pnv_pci_ioda2_setup_dma_pe(phb, pe);
+	}
+}
+
+static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pnv_ioda_pe    *base_pe;
+	struct pnv_iov_data   *iov;
+	struct pnv_phb        *phb;
+	int                    ret;
+	u16                    i;
+
+	phb = pci_bus_to_pnvhb(pdev->bus);
+	iov = pnv_iov_get(pdev);
+
+	/*
+	 * There's a calls to IODA2 PE setup code littered throughout. We could
+	 * probably fix that, but we'd still have problems due to the
+	 * restriction inherent on IODA1 PHBs.
+	 *
+	 * NB: We class IODA3 as IODA2 since they're very similar.
+	 */
+	if (phb->type != PNV_PHB_IODA2) {
+		pci_err(pdev, "SR-IOV is not supported on this PHB\n");
+		return -ENXIO;
+	}
+
+	if (!iov) {
+		dev_info(&pdev->dev, "don't support this SRIOV device with non 64bit-prefetchable IOV BAR\n");
+		return -ENOSPC;
+	}
+
+	/* allocate a contiguous block of PEs for our VFs */
+	base_pe = pnv_ioda_alloc_pe(phb, num_vfs);
+	if (!base_pe) {
+		pci_err(pdev, "Unable to allocate PEs for %d VFs\n", num_vfs);
+		return -EBUSY;
+	}
+
+	iov->vf_pe_arr = base_pe;
+	iov->num_vfs = num_vfs;
+
+	/* Assign M64 window accordingly */
+	ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
+	if (ret) {
+		dev_info(&pdev->dev, "Not enough M64 window resources\n");
+		goto m64_failed;
+	}
+
+	/*
+	 * When using one M64 BAR to map one IOV BAR, we need to shift
+	 * the IOV BAR according to the PE# allocated to the VFs.
+	 * Otherwise, the PE# for the VF will conflict with others.
+	 */
+	if (iov->need_shift) {
+		ret = pnv_pci_vf_resource_shift(pdev, base_pe->pe_number);
+		if (ret)
+			goto shift_failed;
+	}
+
+	/* Setup VF PEs */
+	pnv_ioda_setup_vf_PE(pdev, num_vfs);
+
+	return 0;
+
+shift_failed:
+	pnv_pci_vf_release_m64(pdev, num_vfs);
+
+m64_failed:
+	for (i = 0; i < num_vfs; i++)
+		pnv_ioda_free_pe(&iov->vf_pe_arr[i]);
+
+	return ret;
+}
+
+int pnv_pcibios_sriov_disable(struct pci_dev *pdev)
+{
+	pnv_pci_sriov_disable(pdev);
+
+	/* Release PCI data */
+	remove_sriov_vf_pdns(pdev);
+	return 0;
+}
+
+int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+	/* Allocate PCI data */
+	add_sriov_vf_pdns(pdev);
+
+	return pnv_pci_sriov_enable(pdev, num_vfs);
+}
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
new file mode 100644
index 0000000000..35f566aa04
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -0,0 +1,862 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Support PCI/PCIe on PowerNV platforms
+ *
+ * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/msi.h>
+#include <linux/iommu.h>
+#include <linux/sched/mm.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/msi_bitmap.h>
+#include <asm/ppc-pci.h>
+#include <asm/pnv-pci.h>
+#include <asm/opal.h>
+#include <asm/iommu.h>
+#include <asm/tce.h>
+#include <asm/firmware.h>
+#include <asm/eeh_event.h>
+#include <asm/eeh.h>
+
+#include "powernv.h"
+#include "pci.h"
+
+static DEFINE_MUTEX(tunnel_mutex);
+
+int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id)
+{
+	struct device_node *node = np;
+	u32 bdfn;
+	u64 phbid;
+	int ret;
+
+	ret = of_property_read_u32(np, "reg", &bdfn);
+	if (ret)
+		return -ENXIO;
+
+	bdfn = ((bdfn & 0x00ffff00) >> 8);
+	for (node = np; node; node = of_get_parent(node)) {
+		if (!PCI_DN(node)) {
+			of_node_put(node);
+			break;
+		}
+
+		if (!of_device_is_compatible(node, "ibm,ioda2-phb") &&
+		    !of_device_is_compatible(node, "ibm,ioda3-phb") &&
+		    !of_device_is_compatible(node, "ibm,ioda2-npu2-opencapi-phb")) {
+			of_node_put(node);
+			continue;
+		}
+
+		ret = of_property_read_u64(node, "ibm,opal-phbid", &phbid);
+		if (ret) {
+			of_node_put(node);
+			return -ENXIO;
+		}
+
+		if (of_device_is_compatible(node, "ibm,ioda2-npu2-opencapi-phb"))
+			*id = PCI_PHB_SLOT_ID(phbid);
+		else
+			*id = PCI_SLOT_ID(phbid, bdfn);
+		return 0;
+	}
+
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_get_slot_id);
+
+int pnv_pci_get_device_tree(uint32_t phandle, void *buf, uint64_t len)
+{
+	int64_t rc;
+
+	if (!opal_check_token(OPAL_GET_DEVICE_TREE))
+		return -ENXIO;
+
+	rc = opal_get_device_tree(phandle, (uint64_t)buf, len);
+	if (rc < OPAL_SUCCESS)
+		return -EIO;
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_get_device_tree);
+
+int pnv_pci_get_presence_state(uint64_t id, uint8_t *state)
+{
+	int64_t rc;
+
+	if (!opal_check_token(OPAL_PCI_GET_PRESENCE_STATE))
+		return -ENXIO;
+
+	rc = opal_pci_get_presence_state(id, (uint64_t)state);
+	if (rc != OPAL_SUCCESS)
+		return -EIO;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_get_presence_state);
+
+int pnv_pci_get_power_state(uint64_t id, uint8_t *state)
+{
+	int64_t rc;
+
+	if (!opal_check_token(OPAL_PCI_GET_POWER_STATE))
+		return -ENXIO;
+
+	rc = opal_pci_get_power_state(id, (uint64_t)state);
+	if (rc != OPAL_SUCCESS)
+		return -EIO;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_get_power_state);
+
+int pnv_pci_set_power_state(uint64_t id, uint8_t state, struct opal_msg *msg)
+{
+	struct opal_msg m;
+	int token, ret;
+	int64_t rc;
+
+	if (!opal_check_token(OPAL_PCI_SET_POWER_STATE))
+		return -ENXIO;
+
+	token = opal_async_get_token_interruptible();
+	if (unlikely(token < 0))
+		return token;
+
+	rc = opal_pci_set_power_state(token, id, (uint64_t)&state);
+	if (rc == OPAL_SUCCESS) {
+		ret = 0;
+		goto exit;
+	} else if (rc != OPAL_ASYNC_COMPLETION) {
+		ret = -EIO;
+		goto exit;
+	}
+
+	ret = opal_async_wait_response(token, &m);
+	if (ret < 0)
+		goto exit;
+
+	if (msg) {
+		ret = 1;
+		memcpy(msg, &m, sizeof(m));
+	}
+
+exit:
+	opal_async_release_token(token);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_set_power_state);
+
+/* Nicely print the contents of the PE State Tables (PEST). */
+static void pnv_pci_dump_pest(__be64 pestA[], __be64 pestB[], int pest_size)
+{
+	__be64 prevA = ULONG_MAX, prevB = ULONG_MAX;
+	bool dup = false;
+	int i;
+
+	for (i = 0; i < pest_size; i++) {
+		__be64 peA = be64_to_cpu(pestA[i]);
+		__be64 peB = be64_to_cpu(pestB[i]);
+
+		if (peA != prevA || peB != prevB) {
+			if (dup) {
+				pr_info("PE[..%03x] A/B: as above\n", i-1);
+				dup = false;
+			}
+			prevA = peA;
+			prevB = peB;
+			if (peA & PNV_IODA_STOPPED_STATE ||
+			    peB & PNV_IODA_STOPPED_STATE)
+				pr_info("PE[%03x] A/B: %016llx %016llx\n",
+					i, peA, peB);
+		} else if (!dup && (peA & PNV_IODA_STOPPED_STATE ||
+				    peB & PNV_IODA_STOPPED_STATE)) {
+			dup = true;
+		}
+	}
+}
+
+static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
+					 struct OpalIoPhbErrorCommon *common)
+{
+	struct OpalIoP7IOCPhbErrorData *data;
+
+	data = (struct OpalIoP7IOCPhbErrorData *)common;
+	pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n",
+		hose->global_number, be32_to_cpu(common->version));
+
+	if (data->brdgCtl)
+		pr_info("brdgCtl:     %08x\n",
+			be32_to_cpu(data->brdgCtl));
+	if (data->portStatusReg || data->rootCmplxStatus ||
+	    data->busAgentStatus)
+		pr_info("UtlSts:      %08x %08x %08x\n",
+			be32_to_cpu(data->portStatusReg),
+			be32_to_cpu(data->rootCmplxStatus),
+			be32_to_cpu(data->busAgentStatus));
+	if (data->deviceStatus || data->slotStatus   ||
+	    data->linkStatus   || data->devCmdStatus ||
+	    data->devSecStatus)
+		pr_info("RootSts:     %08x %08x %08x %08x %08x\n",
+			be32_to_cpu(data->deviceStatus),
+			be32_to_cpu(data->slotStatus),
+			be32_to_cpu(data->linkStatus),
+			be32_to_cpu(data->devCmdStatus),
+			be32_to_cpu(data->devSecStatus));
+	if (data->rootErrorStatus   || data->uncorrErrorStatus ||
+	    data->corrErrorStatus)
+		pr_info("RootErrSts:  %08x %08x %08x\n",
+			be32_to_cpu(data->rootErrorStatus),
+			be32_to_cpu(data->uncorrErrorStatus),
+			be32_to_cpu(data->corrErrorStatus));
+	if (data->tlpHdr1 || data->tlpHdr2 ||
+	    data->tlpHdr3 || data->tlpHdr4)
+		pr_info("RootErrLog:  %08x %08x %08x %08x\n",
+			be32_to_cpu(data->tlpHdr1),
+			be32_to_cpu(data->tlpHdr2),
+			be32_to_cpu(data->tlpHdr3),
+			be32_to_cpu(data->tlpHdr4));
+	if (data->sourceId || data->errorClass ||
+	    data->correlator)
+		pr_info("RootErrLog1: %08x %016llx %016llx\n",
+			be32_to_cpu(data->sourceId),
+			be64_to_cpu(data->errorClass),
+			be64_to_cpu(data->correlator));
+	if (data->p7iocPlssr || data->p7iocCsr)
+		pr_info("PhbSts:      %016llx %016llx\n",
+			be64_to_cpu(data->p7iocPlssr),
+			be64_to_cpu(data->p7iocCsr));
+	if (data->lemFir)
+		pr_info("Lem:         %016llx %016llx %016llx\n",
+			be64_to_cpu(data->lemFir),
+			be64_to_cpu(data->lemErrorMask),
+			be64_to_cpu(data->lemWOF));
+	if (data->phbErrorStatus)
+		pr_info("PhbErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->phbErrorStatus),
+			be64_to_cpu(data->phbFirstErrorStatus),
+			be64_to_cpu(data->phbErrorLog0),
+			be64_to_cpu(data->phbErrorLog1));
+	if (data->mmioErrorStatus)
+		pr_info("OutErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->mmioErrorStatus),
+			be64_to_cpu(data->mmioFirstErrorStatus),
+			be64_to_cpu(data->mmioErrorLog0),
+			be64_to_cpu(data->mmioErrorLog1));
+	if (data->dma0ErrorStatus)
+		pr_info("InAErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->dma0ErrorStatus),
+			be64_to_cpu(data->dma0FirstErrorStatus),
+			be64_to_cpu(data->dma0ErrorLog0),
+			be64_to_cpu(data->dma0ErrorLog1));
+	if (data->dma1ErrorStatus)
+		pr_info("InBErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->dma1ErrorStatus),
+			be64_to_cpu(data->dma1FirstErrorStatus),
+			be64_to_cpu(data->dma1ErrorLog0),
+			be64_to_cpu(data->dma1ErrorLog1));
+
+	pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_P7IOC_NUM_PEST_REGS);
+}
+
+static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
+					struct OpalIoPhbErrorCommon *common)
+{
+	struct OpalIoPhb3ErrorData *data;
+
+	data = (struct OpalIoPhb3ErrorData*)common;
+	pr_info("PHB3 PHB#%x Diag-data (Version: %d)\n",
+		hose->global_number, be32_to_cpu(common->version));
+	if (data->brdgCtl)
+		pr_info("brdgCtl:     %08x\n",
+			be32_to_cpu(data->brdgCtl));
+	if (data->portStatusReg || data->rootCmplxStatus ||
+	    data->busAgentStatus)
+		pr_info("UtlSts:      %08x %08x %08x\n",
+			be32_to_cpu(data->portStatusReg),
+			be32_to_cpu(data->rootCmplxStatus),
+			be32_to_cpu(data->busAgentStatus));
+	if (data->deviceStatus || data->slotStatus   ||
+	    data->linkStatus   || data->devCmdStatus ||
+	    data->devSecStatus)
+		pr_info("RootSts:     %08x %08x %08x %08x %08x\n",
+			be32_to_cpu(data->deviceStatus),
+			be32_to_cpu(data->slotStatus),
+			be32_to_cpu(data->linkStatus),
+			be32_to_cpu(data->devCmdStatus),
+			be32_to_cpu(data->devSecStatus));
+	if (data->rootErrorStatus || data->uncorrErrorStatus ||
+	    data->corrErrorStatus)
+		pr_info("RootErrSts:  %08x %08x %08x\n",
+			be32_to_cpu(data->rootErrorStatus),
+			be32_to_cpu(data->uncorrErrorStatus),
+			be32_to_cpu(data->corrErrorStatus));
+	if (data->tlpHdr1 || data->tlpHdr2 ||
+	    data->tlpHdr3 || data->tlpHdr4)
+		pr_info("RootErrLog:  %08x %08x %08x %08x\n",
+			be32_to_cpu(data->tlpHdr1),
+			be32_to_cpu(data->tlpHdr2),
+			be32_to_cpu(data->tlpHdr3),
+			be32_to_cpu(data->tlpHdr4));
+	if (data->sourceId || data->errorClass ||
+	    data->correlator)
+		pr_info("RootErrLog1: %08x %016llx %016llx\n",
+			be32_to_cpu(data->sourceId),
+			be64_to_cpu(data->errorClass),
+			be64_to_cpu(data->correlator));
+	if (data->nFir)
+		pr_info("nFir:        %016llx %016llx %016llx\n",
+			be64_to_cpu(data->nFir),
+			be64_to_cpu(data->nFirMask),
+			be64_to_cpu(data->nFirWOF));
+	if (data->phbPlssr || data->phbCsr)
+		pr_info("PhbSts:      %016llx %016llx\n",
+			be64_to_cpu(data->phbPlssr),
+			be64_to_cpu(data->phbCsr));
+	if (data->lemFir)
+		pr_info("Lem:         %016llx %016llx %016llx\n",
+			be64_to_cpu(data->lemFir),
+			be64_to_cpu(data->lemErrorMask),
+			be64_to_cpu(data->lemWOF));
+	if (data->phbErrorStatus)
+		pr_info("PhbErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->phbErrorStatus),
+			be64_to_cpu(data->phbFirstErrorStatus),
+			be64_to_cpu(data->phbErrorLog0),
+			be64_to_cpu(data->phbErrorLog1));
+	if (data->mmioErrorStatus)
+		pr_info("OutErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->mmioErrorStatus),
+			be64_to_cpu(data->mmioFirstErrorStatus),
+			be64_to_cpu(data->mmioErrorLog0),
+			be64_to_cpu(data->mmioErrorLog1));
+	if (data->dma0ErrorStatus)
+		pr_info("InAErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->dma0ErrorStatus),
+			be64_to_cpu(data->dma0FirstErrorStatus),
+			be64_to_cpu(data->dma0ErrorLog0),
+			be64_to_cpu(data->dma0ErrorLog1));
+	if (data->dma1ErrorStatus)
+		pr_info("InBErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->dma1ErrorStatus),
+			be64_to_cpu(data->dma1FirstErrorStatus),
+			be64_to_cpu(data->dma1ErrorLog0),
+			be64_to_cpu(data->dma1ErrorLog1));
+
+	pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_PHB3_NUM_PEST_REGS);
+}
+
+static void pnv_pci_dump_phb4_diag_data(struct pci_controller *hose,
+					struct OpalIoPhbErrorCommon *common)
+{
+	struct OpalIoPhb4ErrorData *data;
+
+	data = (struct OpalIoPhb4ErrorData*)common;
+	pr_info("PHB4 PHB#%d Diag-data (Version: %d)\n",
+		hose->global_number, be32_to_cpu(common->version));
+	if (data->brdgCtl)
+		pr_info("brdgCtl:    %08x\n",
+			be32_to_cpu(data->brdgCtl));
+	if (data->deviceStatus || data->slotStatus   ||
+	    data->linkStatus   || data->devCmdStatus ||
+	    data->devSecStatus)
+		pr_info("RootSts:    %08x %08x %08x %08x %08x\n",
+			be32_to_cpu(data->deviceStatus),
+			be32_to_cpu(data->slotStatus),
+			be32_to_cpu(data->linkStatus),
+			be32_to_cpu(data->devCmdStatus),
+			be32_to_cpu(data->devSecStatus));
+	if (data->rootErrorStatus || data->uncorrErrorStatus ||
+	    data->corrErrorStatus)
+		pr_info("RootErrSts: %08x %08x %08x\n",
+			be32_to_cpu(data->rootErrorStatus),
+			be32_to_cpu(data->uncorrErrorStatus),
+			be32_to_cpu(data->corrErrorStatus));
+	if (data->tlpHdr1 || data->tlpHdr2 ||
+	    data->tlpHdr3 || data->tlpHdr4)
+		pr_info("RootErrLog: %08x %08x %08x %08x\n",
+			be32_to_cpu(data->tlpHdr1),
+			be32_to_cpu(data->tlpHdr2),
+			be32_to_cpu(data->tlpHdr3),
+			be32_to_cpu(data->tlpHdr4));
+	if (data->sourceId)
+		pr_info("sourceId:   %08x\n", be32_to_cpu(data->sourceId));
+	if (data->nFir)
+		pr_info("nFir:       %016llx %016llx %016llx\n",
+			be64_to_cpu(data->nFir),
+			be64_to_cpu(data->nFirMask),
+			be64_to_cpu(data->nFirWOF));
+	if (data->phbPlssr || data->phbCsr)
+		pr_info("PhbSts:     %016llx %016llx\n",
+			be64_to_cpu(data->phbPlssr),
+			be64_to_cpu(data->phbCsr));
+	if (data->lemFir)
+		pr_info("Lem:        %016llx %016llx %016llx\n",
+			be64_to_cpu(data->lemFir),
+			be64_to_cpu(data->lemErrorMask),
+			be64_to_cpu(data->lemWOF));
+	if (data->phbErrorStatus)
+		pr_info("PhbErr:     %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->phbErrorStatus),
+			be64_to_cpu(data->phbFirstErrorStatus),
+			be64_to_cpu(data->phbErrorLog0),
+			be64_to_cpu(data->phbErrorLog1));
+	if (data->phbTxeErrorStatus)
+		pr_info("PhbTxeErr:  %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->phbTxeErrorStatus),
+			be64_to_cpu(data->phbTxeFirstErrorStatus),
+			be64_to_cpu(data->phbTxeErrorLog0),
+			be64_to_cpu(data->phbTxeErrorLog1));
+	if (data->phbRxeArbErrorStatus)
+		pr_info("RxeArbErr:  %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->phbRxeArbErrorStatus),
+			be64_to_cpu(data->phbRxeArbFirstErrorStatus),
+			be64_to_cpu(data->phbRxeArbErrorLog0),
+			be64_to_cpu(data->phbRxeArbErrorLog1));
+	if (data->phbRxeMrgErrorStatus)
+		pr_info("RxeMrgErr:  %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->phbRxeMrgErrorStatus),
+			be64_to_cpu(data->phbRxeMrgFirstErrorStatus),
+			be64_to_cpu(data->phbRxeMrgErrorLog0),
+			be64_to_cpu(data->phbRxeMrgErrorLog1));
+	if (data->phbRxeTceErrorStatus)
+		pr_info("RxeTceErr:  %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->phbRxeTceErrorStatus),
+			be64_to_cpu(data->phbRxeTceFirstErrorStatus),
+			be64_to_cpu(data->phbRxeTceErrorLog0),
+			be64_to_cpu(data->phbRxeTceErrorLog1));
+
+	if (data->phbPblErrorStatus)
+		pr_info("PblErr:     %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->phbPblErrorStatus),
+			be64_to_cpu(data->phbPblFirstErrorStatus),
+			be64_to_cpu(data->phbPblErrorLog0),
+			be64_to_cpu(data->phbPblErrorLog1));
+	if (data->phbPcieDlpErrorStatus)
+		pr_info("PcieDlp:    %016llx %016llx %016llx\n",
+			be64_to_cpu(data->phbPcieDlpErrorLog1),
+			be64_to_cpu(data->phbPcieDlpErrorLog2),
+			be64_to_cpu(data->phbPcieDlpErrorStatus));
+	if (data->phbRegbErrorStatus)
+		pr_info("RegbErr:    %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->phbRegbErrorStatus),
+			be64_to_cpu(data->phbRegbFirstErrorStatus),
+			be64_to_cpu(data->phbRegbErrorLog0),
+			be64_to_cpu(data->phbRegbErrorLog1));
+
+
+	pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_PHB4_NUM_PEST_REGS);
+}
+
+void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
+				unsigned char *log_buff)
+{
+	struct OpalIoPhbErrorCommon *common;
+
+	if (!hose || !log_buff)
+		return;
+
+	common = (struct OpalIoPhbErrorCommon *)log_buff;
+	switch (be32_to_cpu(common->ioType)) {
+	case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
+		pnv_pci_dump_p7ioc_diag_data(hose, common);
+		break;
+	case OPAL_PHB_ERROR_DATA_TYPE_PHB3:
+		pnv_pci_dump_phb3_diag_data(hose, common);
+		break;
+	case OPAL_PHB_ERROR_DATA_TYPE_PHB4:
+		pnv_pci_dump_phb4_diag_data(hose, common);
+		break;
+	default:
+		pr_warn("%s: Unrecognized ioType %d\n",
+			__func__, be32_to_cpu(common->ioType));
+	}
+}
+
+static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
+{
+	unsigned long flags, rc;
+	int has_diag, ret = 0;
+
+	spin_lock_irqsave(&phb->lock, flags);
+
+	/* Fetch PHB diag-data */
+	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data,
+					 phb->diag_data_size);
+	has_diag = (rc == OPAL_SUCCESS);
+
+	/* If PHB supports compound PE, to handle it */
+	if (phb->unfreeze_pe) {
+		ret = phb->unfreeze_pe(phb,
+				       pe_no,
+				       OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+	} else {
+		rc = opal_pci_eeh_freeze_clear(phb->opal_id,
+					     pe_no,
+					     OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+		if (rc) {
+			pr_warn("%s: Failure %ld clearing frozen "
+				"PHB#%x-PE#%x\n",
+				__func__, rc, phb->hose->global_number,
+				pe_no);
+			ret = -EIO;
+		}
+	}
+
+	/*
+	 * For now, let's only display the diag buffer when we fail to clear
+	 * the EEH status. We'll do more sensible things later when we have
+	 * proper EEH support. We need to make sure we don't pollute ourselves
+	 * with the normal errors generated when probing empty slots
+	 */
+	if (has_diag && ret)
+		pnv_pci_dump_phb_diag_data(phb->hose, phb->diag_data);
+
+	spin_unlock_irqrestore(&phb->lock, flags);
+}
+
+static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
+{
+	struct pnv_phb *phb = pdn->phb->private_data;
+	u8	fstate = 0;
+	__be16	pcierr = 0;
+	unsigned int pe_no;
+	s64	rc;
+
+	/*
+	 * Get the PE#. During the PCI probe stage, we might not
+	 * setup that yet. So all ER errors should be mapped to
+	 * reserved PE.
+	 */
+	pe_no = pdn->pe_number;
+	if (pe_no == IODA_INVALID_PE) {
+		pe_no = phb->ioda.reserved_pe_idx;
+	}
+
+	/*
+	 * Fetch frozen state. If the PHB support compound PE,
+	 * we need handle that case.
+	 */
+	if (phb->get_pe_state) {
+		fstate = phb->get_pe_state(phb, pe_no);
+	} else {
+		rc = opal_pci_eeh_freeze_status(phb->opal_id,
+						pe_no,
+						&fstate,
+						&pcierr,
+						NULL);
+		if (rc) {
+			pr_warn("%s: Failure %lld getting PHB#%x-PE#%x state\n",
+				__func__, rc, phb->hose->global_number, pe_no);
+			return;
+		}
+	}
+
+	pr_devel(" -> EEH check, bdfn=%04x PE#%x fstate=%x\n",
+		 (pdn->busno << 8) | (pdn->devfn), pe_no, fstate);
+
+	/* Clear the frozen state if applicable */
+	if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE ||
+	    fstate == OPAL_EEH_STOPPED_DMA_FREEZE  ||
+	    fstate == OPAL_EEH_STOPPED_MMIO_DMA_FREEZE) {
+		/*
+		 * If PHB supports compound PE, freeze it for
+		 * consistency.
+		 */
+		if (phb->freeze_pe)
+			phb->freeze_pe(phb, pe_no);
+
+		pnv_pci_handle_eeh_config(phb, pe_no);
+	}
+}
+
+int pnv_pci_cfg_read(struct pci_dn *pdn,
+		     int where, int size, u32 *val)
+{
+	struct pnv_phb *phb = pdn->phb->private_data;
+	u32 bdfn = (pdn->busno << 8) | pdn->devfn;
+	s64 rc;
+
+	switch (size) {
+	case 1: {
+		u8 v8;
+		rc = opal_pci_config_read_byte(phb->opal_id, bdfn, where, &v8);
+		*val = (rc == OPAL_SUCCESS) ? v8 : 0xff;
+		break;
+	}
+	case 2: {
+		__be16 v16;
+		rc = opal_pci_config_read_half_word(phb->opal_id, bdfn, where,
+						   &v16);
+		*val = (rc == OPAL_SUCCESS) ? be16_to_cpu(v16) : 0xffff;
+		break;
+	}
+	case 4: {
+		__be32 v32;
+		rc = opal_pci_config_read_word(phb->opal_id, bdfn, where, &v32);
+		*val = (rc == OPAL_SUCCESS) ? be32_to_cpu(v32) : 0xffffffff;
+		break;
+	}
+	default:
+		return PCIBIOS_FUNC_NOT_SUPPORTED;
+	}
+
+	pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
+		 __func__, pdn->busno, pdn->devfn, where, size, *val);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+int pnv_pci_cfg_write(struct pci_dn *pdn,
+		      int where, int size, u32 val)
+{
+	struct pnv_phb *phb = pdn->phb->private_data;
+	u32 bdfn = (pdn->busno << 8) | pdn->devfn;
+
+	pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
+		 __func__, pdn->busno, pdn->devfn, where, size, val);
+	switch (size) {
+	case 1:
+		opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
+		break;
+	case 2:
+		opal_pci_config_write_half_word(phb->opal_id, bdfn, where, val);
+		break;
+	case 4:
+		opal_pci_config_write_word(phb->opal_id, bdfn, where, val);
+		break;
+	default:
+		return PCIBIOS_FUNC_NOT_SUPPORTED;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+#ifdef CONFIG_EEH
+static bool pnv_pci_cfg_check(struct pci_dn *pdn)
+{
+	struct eeh_dev *edev = NULL;
+	struct pnv_phb *phb = pdn->phb->private_data;
+
+	/* EEH not enabled ? */
+	if (!(phb->flags & PNV_PHB_FLAG_EEH))
+		return true;
+
+	/* PE reset or device removed ? */
+	edev = pdn->edev;
+	if (edev) {
+		if (edev->pe &&
+		    (edev->pe->state & EEH_PE_CFG_BLOCKED))
+			return false;
+
+		if (edev->mode & EEH_DEV_REMOVED)
+			return false;
+	}
+
+	return true;
+}
+#else
+static inline pnv_pci_cfg_check(struct pci_dn *pdn)
+{
+	return true;
+}
+#endif /* CONFIG_EEH */
+
+static int pnv_pci_read_config(struct pci_bus *bus,
+			       unsigned int devfn,
+			       int where, int size, u32 *val)
+{
+	struct pci_dn *pdn;
+	struct pnv_phb *phb;
+	int ret;
+
+	*val = 0xFFFFFFFF;
+	pdn = pci_get_pdn_by_devfn(bus, devfn);
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (!pnv_pci_cfg_check(pdn))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	ret = pnv_pci_cfg_read(pdn, where, size, val);
+	phb = pdn->phb->private_data;
+	if (phb->flags & PNV_PHB_FLAG_EEH && pdn->edev) {
+		if (*val == EEH_IO_ERROR_VALUE(size) &&
+		    eeh_dev_check_failure(pdn->edev))
+                        return PCIBIOS_DEVICE_NOT_FOUND;
+	} else {
+		pnv_pci_config_check_eeh(pdn);
+	}
+
+	return ret;
+}
+
+static int pnv_pci_write_config(struct pci_bus *bus,
+				unsigned int devfn,
+				int where, int size, u32 val)
+{
+	struct pci_dn *pdn;
+	struct pnv_phb *phb;
+	int ret;
+
+	pdn = pci_get_pdn_by_devfn(bus, devfn);
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (!pnv_pci_cfg_check(pdn))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	ret = pnv_pci_cfg_write(pdn, where, size, val);
+	phb = pdn->phb->private_data;
+	if (!(phb->flags & PNV_PHB_FLAG_EEH))
+		pnv_pci_config_check_eeh(pdn);
+
+	return ret;
+}
+
+struct pci_ops pnv_pci_ops = {
+	.read  = pnv_pci_read_config,
+	.write = pnv_pci_write_config,
+};
+
+struct iommu_table *pnv_pci_table_alloc(int nid)
+{
+	struct iommu_table *tbl;
+
+	tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid);
+	if (!tbl)
+		return NULL;
+
+	INIT_LIST_HEAD_RCU(&tbl->it_group_list);
+	kref_init(&tbl->it_kref);
+
+	return tbl;
+}
+
+struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+	return of_node_get(hose->dn);
+}
+EXPORT_SYMBOL(pnv_pci_get_phb_node);
+
+int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable)
+{
+	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
+	u64 tunnel_bar;
+	__be64 val;
+	int rc;
+
+	if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR))
+		return -ENXIO;
+	if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR))
+		return -ENXIO;
+
+	mutex_lock(&tunnel_mutex);
+	rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val);
+	if (rc != OPAL_SUCCESS) {
+		rc = -EIO;
+		goto out;
+	}
+	tunnel_bar = be64_to_cpu(val);
+	if (enable) {
+		/*
+		* Only one device per PHB can use atomics.
+		* Our policy is first-come, first-served.
+		*/
+		if (tunnel_bar) {
+			if (tunnel_bar != addr)
+				rc = -EBUSY;
+			else
+				rc = 0;	/* Setting same address twice is ok */
+			goto out;
+		}
+	} else {
+		/*
+		* The device that owns atomics and wants to release
+		* them must pass the same address with enable == 0.
+		*/
+		if (tunnel_bar != addr) {
+			rc = -EPERM;
+			goto out;
+		}
+		addr = 0x0ULL;
+	}
+	rc = opal_pci_set_pbcq_tunnel_bar(phb->opal_id, addr);
+	rc = opal_error_code(rc);
+out:
+	mutex_unlock(&tunnel_mutex);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_set_tunnel_bar);
+
+void pnv_pci_shutdown(void)
+{
+	struct pci_controller *hose;
+
+	list_for_each_entry(hose, &hose_list, list_node)
+		if (hose->controller_ops.shutdown)
+			hose->controller_ops.shutdown(hose);
+}
+
+/* Fixup wrong class code in p7ioc and p8 root complex */
+static void pnv_p7ioc_rc_quirk(struct pci_dev *dev)
+{
+	dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
+
+void __init pnv_pci_init(void)
+{
+	struct device_node *np;
+
+	pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN);
+
+	/* If we don't have OPAL, eg. in sim, just skip PCI probe */
+	if (!firmware_has_feature(FW_FEATURE_OPAL))
+		return;
+
+#ifdef CONFIG_PCIEPORTBUS
+	/*
+	 * On PowerNV PCIe devices are (currently) managed in cooperation
+	 * with firmware. This isn't *strictly* required, but there's enough
+	 * assumptions baked into both firmware and the platform code that
+	 * it's unwise to allow the portbus services to be used.
+	 *
+	 * We need to fix this eventually, but for now set this flag to disable
+	 * the portbus driver. The AER service isn't required since that AER
+	 * events are handled via EEH. The pciehp hotplug driver can't work
+	 * without kernel changes (and portbus binding breaks pnv_php). The
+	 * other services also require some thinking about how we're going
+	 * to integrate them.
+	 */
+	pcie_ports_disabled = true;
+#endif
+
+	/* Look for ioda2 built-in PHB3's */
+	for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
+		pnv_pci_init_ioda2_phb(np);
+
+	/* Look for ioda3 built-in PHB4's, we treat them as IODA2 */
+	for_each_compatible_node(np, NULL, "ibm,ioda3-phb")
+		pnv_pci_init_ioda2_phb(np);
+
+	/* Look for NPU2 OpenCAPI PHBs */
+	for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-opencapi-phb")
+		pnv_pci_init_npu2_opencapi_phb(np);
+
+	/* Configure IOMMU DMA hooks */
+	set_pci_dma_ops(&dma_iommu_ops);
+}
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
new file mode 100644
index 0000000000..957f2b47a3
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -0,0 +1,340 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __POWERNV_PCI_H
+#define __POWERNV_PCI_H
+
+#include <linux/compiler.h>		/* for __printf */
+#include <linux/iommu.h>
+#include <asm/iommu.h>
+#include <asm/msi_bitmap.h>
+
+struct pci_dn;
+
+enum pnv_phb_type {
+	PNV_PHB_IODA2,
+	PNV_PHB_NPU_OCAPI,
+};
+
+/* Precise PHB model for error management */
+enum pnv_phb_model {
+	PNV_PHB_MODEL_UNKNOWN,
+	PNV_PHB_MODEL_P7IOC,
+	PNV_PHB_MODEL_PHB3,
+};
+
+#define PNV_PCI_DIAG_BUF_SIZE	8192
+#define PNV_IODA_PE_DEV		(1 << 0)	/* PE has single PCI device	*/
+#define PNV_IODA_PE_BUS		(1 << 1)	/* PE has primary PCI bus	*/
+#define PNV_IODA_PE_BUS_ALL	(1 << 2)	/* PE has subordinate buses	*/
+#define PNV_IODA_PE_MASTER	(1 << 3)	/* Master PE in compound case	*/
+#define PNV_IODA_PE_SLAVE	(1 << 4)	/* Slave PE in compound case	*/
+#define PNV_IODA_PE_VF		(1 << 5)	/* PE for one VF 		*/
+
+/*
+ * A brief note on PNV_IODA_PE_BUS_ALL
+ *
+ * This is needed because of the behaviour of PCIe-to-PCI bridges. The PHB uses
+ * the Requester ID field of the PCIe request header to determine the device
+ * (and PE) that initiated a DMA. In legacy PCI individual memory read/write
+ * requests aren't tagged with the RID. To work around this the PCIe-to-PCI
+ * bridge will use (secondary_bus_no << 8) | 0x00 as the RID on the PCIe side.
+ *
+ * PCIe-to-X bridges have a similar issue even though PCI-X requests also have
+ * a RID in the transaction header. The PCIe-to-X bridge is permitted to "take
+ * ownership" of a transaction by a PCI-X device when forwarding it to the PCIe
+ * side of the bridge.
+ *
+ * To work around these problems we use the BUS_ALL flag since every subordinate
+ * bus of the bridge should go into the same PE.
+ */
+
+/* Indicates operations are frozen for a PE: MMIO in PESTA & DMA in PESTB. */
+#define PNV_IODA_STOPPED_STATE	0x8000000000000000
+
+/* Data associated with a PE, including IOMMU tracking etc.. */
+struct pnv_phb;
+struct pnv_ioda_pe {
+	unsigned long		flags;
+	struct pnv_phb		*phb;
+	int			device_count;
+
+	/* A PE can be associated with a single device or an
+	 * entire bus (& children). In the former case, pdev
+	 * is populated, in the later case, pbus is.
+	 */
+#ifdef CONFIG_PCI_IOV
+	struct pci_dev          *parent_dev;
+#endif
+	struct pci_dev		*pdev;
+	struct pci_bus		*pbus;
+
+	/* Effective RID (device RID for a device PE and base bus
+	 * RID with devfn 0 for a bus PE)
+	 */
+	unsigned int		rid;
+
+	/* PE number */
+	unsigned int		pe_number;
+
+	/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
+	struct iommu_table_group table_group;
+
+	/* 64-bit TCE bypass region */
+	bool			tce_bypass_enabled;
+	uint64_t		tce_bypass_base;
+
+	/*
+	 * Used to track whether we've done DMA setup for this PE or not. We
+	 * want to defer allocating TCE tables, etc until we've added a
+	 * non-bridge device to the PE.
+	 */
+	bool			dma_setup_done;
+
+	/* MSIs. MVE index is identical for 32 and 64 bit MSI
+	 * and -1 if not supported. (It's actually identical to the
+	 * PE number)
+	 */
+	int			mve_number;
+
+	/* PEs in compound case */
+	struct pnv_ioda_pe	*master;
+	struct list_head	slaves;
+
+	/* Link in list of PE#s */
+	struct list_head	list;
+};
+
+#define PNV_PHB_FLAG_EEH	(1 << 0)
+
+struct pnv_phb {
+	struct pci_controller	*hose;
+	enum pnv_phb_type	type;
+	enum pnv_phb_model	model;
+	u64			hub_id;
+	u64			opal_id;
+	int			flags;
+	void __iomem		*regs;
+	u64			regs_phys;
+	spinlock_t		lock;
+
+#ifdef CONFIG_DEBUG_FS
+	int			has_dbgfs;
+	struct dentry		*dbgfs;
+#endif
+
+	unsigned int		msi_base;
+	struct msi_bitmap	msi_bmp;
+	int (*init_m64)(struct pnv_phb *phb);
+	int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
+	void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
+	int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt);
+
+	struct {
+		/* Global bridge info */
+		unsigned int		total_pe_num;
+		unsigned int		reserved_pe_idx;
+		unsigned int		root_pe_idx;
+
+		/* 32-bit MMIO window */
+		unsigned int		m32_size;
+		unsigned int		m32_segsize;
+		unsigned int		m32_pci_base;
+
+		/* 64-bit MMIO window */
+		unsigned int		m64_bar_idx;
+		unsigned long		m64_size;
+		unsigned long		m64_segsize;
+		unsigned long		m64_base;
+#define MAX_M64_BARS 64
+		unsigned long		m64_bar_alloc;
+
+		/* IO ports */
+		unsigned int		io_size;
+		unsigned int		io_segsize;
+		unsigned int		io_pci_base;
+
+		/* PE allocation */
+		struct mutex		pe_alloc_mutex;
+		unsigned long		*pe_alloc;
+		struct pnv_ioda_pe	*pe_array;
+
+		/* M32 & IO segment maps */
+		unsigned int		*m64_segmap;
+		unsigned int		*m32_segmap;
+		unsigned int		*io_segmap;
+
+		/* IRQ chip */
+		int			irq_chip_init;
+		struct irq_chip		irq_chip;
+
+		/* Sorted list of used PE's based
+		 * on the sequence of creation
+		 */
+		struct list_head	pe_list;
+		struct mutex            pe_list_mutex;
+
+		/* Reverse map of PEs, indexed by {bus, devfn} */
+		unsigned int		pe_rmap[0x10000];
+	} ioda;
+
+	/* PHB and hub diagnostics */
+	unsigned int		diag_data_size;
+	u8			*diag_data;
+};
+
+
+/* IODA PE management */
+
+static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
+{
+	/*
+	 * WARNING: We cannot rely on the resource flags. The Linux PCI
+	 * allocation code sometimes decides to put a 64-bit prefetchable
+	 * BAR in the 32-bit window, so we have to compare the addresses.
+	 *
+	 * For simplicity we only test resource start.
+	 */
+	return (r->start >= phb->ioda.m64_base &&
+		r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
+}
+
+static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags)
+{
+	unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
+
+	return (resource_flags & flags) == flags;
+}
+
+int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
+int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
+
+void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
+void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe);
+
+struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count);
+void pnv_ioda_free_pe(struct pnv_ioda_pe *pe);
+
+#ifdef CONFIG_PCI_IOV
+/*
+ * For SR-IOV we want to put each VF's MMIO resource in to a separate PE.
+ * This requires a bit of acrobatics with the MMIO -> PE configuration
+ * and this structure is used to keep track of it all.
+ */
+struct pnv_iov_data {
+	/* number of VFs enabled */
+	u16     num_vfs;
+
+	/* pointer to the array of VF PEs. num_vfs long*/
+	struct pnv_ioda_pe *vf_pe_arr;
+
+	/* Did we map the VF BAR with single-PE IODA BARs? */
+	bool    m64_single_mode[PCI_SRIOV_NUM_BARS];
+
+	/*
+	 * True if we're using any segmented windows. In that case we need
+	 * shift the start of the IOV resource the segment corresponding to
+	 * the allocated PE.
+	 */
+	bool    need_shift;
+
+	/*
+	 * Bit mask used to track which m64 windows are used to map the
+	 * SR-IOV BARs for this device.
+	 */
+	DECLARE_BITMAP(used_m64_bar_mask, MAX_M64_BARS);
+
+	/*
+	 * If we map the SR-IOV BARs with a segmented window then
+	 * parts of that window will be "claimed" by other PEs.
+	 *
+	 * "holes" here is used to reserve the leading portion
+	 * of the window that is used by other (non VF) PEs.
+	 */
+	struct resource holes[PCI_SRIOV_NUM_BARS];
+};
+
+static inline struct pnv_iov_data *pnv_iov_get(struct pci_dev *pdev)
+{
+	return pdev->dev.archdata.iov_data;
+}
+
+void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev);
+resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, int resno);
+
+int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs);
+int pnv_pcibios_sriov_disable(struct pci_dev *pdev);
+#endif /* CONFIG_PCI_IOV */
+
+extern struct pci_ops pnv_pci_ops;
+
+void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
+				unsigned char *log_buff);
+int pnv_pci_cfg_read(struct pci_dn *pdn,
+		     int where, int size, u32 *val);
+int pnv_pci_cfg_write(struct pci_dn *pdn,
+		      int where, int size, u32 val);
+extern struct iommu_table *pnv_pci_table_alloc(int nid);
+
+extern void pnv_pci_init_ioda_hub(struct device_node *np);
+extern void pnv_pci_init_ioda2_phb(struct device_node *np);
+extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np);
+extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
+extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
+
+extern struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn);
+extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
+extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
+extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
+		__u64 window_size, __u32 levels);
+extern int pnv_eeh_post_init(void);
+
+__printf(3, 4)
+extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
+			    const char *fmt, ...);
+#define pe_err(pe, fmt, ...)					\
+	pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__)
+#define pe_warn(pe, fmt, ...)					\
+	pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__)
+#define pe_info(pe, fmt, ...)					\
+	pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__)
+
+/* pci-ioda-tce.c */
+#define POWERNV_IOMMU_DEFAULT_LEVELS	2
+#define POWERNV_IOMMU_MAX_LEVELS	5
+
+extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+		unsigned long uaddr, enum dma_data_direction direction,
+		unsigned long attrs);
+extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
+extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
+		unsigned long *hpa, enum dma_data_direction *direction);
+extern __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index,
+		bool alloc);
+extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
+
+extern long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+		__u32 page_shift, __u64 window_size, __u32 levels,
+		bool alloc_userspace_copy, struct iommu_table *tbl);
+extern void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
+
+extern long pnv_pci_link_table_and_group(int node, int num,
+		struct iommu_table *tbl,
+		struct iommu_table_group *table_group);
+extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
+		struct iommu_table_group *table_group);
+extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
+		void *tce_mem, u64 tce_size,
+		u64 dma_offset, unsigned int page_shift);
+
+extern unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb);
+
+static inline struct pnv_phb *pci_bus_to_pnvhb(struct pci_bus *bus)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	if (hose)
+		return hose->private_data;
+
+	return NULL;
+}
+
+#endif /* __POWERNV_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
new file mode 100644
index 0000000000..866efdc103
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _POWERNV_H
+#define _POWERNV_H
+
+/*
+ * There's various hacks scattered throughout the generic powerpc arch code
+ * that needs to call into powernv platform stuff. The prototypes for those
+ * functions are in asm/powernv.h
+ */
+#include <asm/powernv.h>
+
+#ifdef CONFIG_SMP
+extern void pnv_smp_init(void);
+#else
+static inline void pnv_smp_init(void) { }
+#endif
+
+extern void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) __noreturn;
+
+struct pci_dev;
+
+#ifdef CONFIG_PCI
+extern void pnv_pci_init(void);
+extern void pnv_pci_shutdown(void);
+#else
+static inline void pnv_pci_init(void) { }
+static inline void pnv_pci_shutdown(void) { }
+#endif
+
+extern u32 pnv_get_supported_cpuidle_states(void);
+
+extern void pnv_lpc_init(void);
+
+extern void opal_handle_events(void);
+extern bool opal_have_pending_events(void);
+extern void opal_event_shutdown(void);
+
+bool cpu_core_split_required(void);
+
+struct memcons;
+ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count);
+u32 __init memcons_get_size(struct memcons *mc);
+struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name);
+
+void pnv_rng_init(void);
+
+#endif /* _POWERNV_H */
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
new file mode 100644
index 0000000000..196aa70fe0
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ */
+
+#define pr_fmt(fmt)	"powernv-rng: " fmt
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <asm/archrandom.h>
+#include <asm/cputable.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/smp.h>
+#include "powernv.h"
+
+#define DARN_ERR 0xFFFFFFFFFFFFFFFFul
+
+struct pnv_rng {
+	void __iomem *regs;
+	void __iomem *regs_real;
+	unsigned long mask;
+};
+
+static DEFINE_PER_CPU(struct pnv_rng *, pnv_rng);
+
+static unsigned long rng_whiten(struct pnv_rng *rng, unsigned long val)
+{
+	unsigned long parity;
+
+	/* Calculate the parity of the value */
+	asm (".machine push;   \
+	      .machine power7; \
+	      popcntd %0,%1;   \
+	      .machine pop;"
+	     : "=r" (parity) : "r" (val));
+
+	/* xor our value with the previous mask */
+	val ^= rng->mask;
+
+	/* update the mask based on the parity of this value */
+	rng->mask = (rng->mask << 1) | (parity & 1);
+
+	return val;
+}
+
+static int pnv_get_random_darn(unsigned long *v)
+{
+	unsigned long val;
+
+	/* Using DARN with L=1 - 64-bit conditioned random number */
+	asm volatile(PPC_DARN(%0, 1) : "=r"(val));
+
+	if (val == DARN_ERR)
+		return 0;
+
+	*v = val;
+
+	return 1;
+}
+
+static int __init initialise_darn(void)
+{
+	unsigned long val;
+	int i;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		return -ENODEV;
+
+	for (i = 0; i < 10; i++) {
+		if (pnv_get_random_darn(&val)) {
+			ppc_md.get_random_seed = pnv_get_random_darn;
+			return 0;
+		}
+	}
+	return -EIO;
+}
+
+int pnv_get_random_long(unsigned long *v)
+{
+	struct pnv_rng *rng;
+
+	if (mfmsr() & MSR_DR) {
+		rng = get_cpu_var(pnv_rng);
+		*v = rng_whiten(rng, in_be64(rng->regs));
+		put_cpu_var(rng);
+	} else {
+		rng = raw_cpu_read(pnv_rng);
+		*v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+	}
+	return 1;
+}
+EXPORT_SYMBOL_GPL(pnv_get_random_long);
+
+static __init void rng_init_per_cpu(struct pnv_rng *rng,
+				    struct device_node *dn)
+{
+	int chip_id, cpu;
+
+	chip_id = of_get_ibm_chip_id(dn);
+	if (chip_id == -1)
+		pr_warn("No ibm,chip-id found for %pOF.\n", dn);
+
+	for_each_possible_cpu(cpu) {
+		if (per_cpu(pnv_rng, cpu) == NULL ||
+		    cpu_to_chip_id(cpu) == chip_id) {
+			per_cpu(pnv_rng, cpu) = rng;
+		}
+	}
+}
+
+static __init int rng_create(struct device_node *dn)
+{
+	struct pnv_rng *rng;
+	struct resource res;
+	unsigned long val;
+
+	rng = kzalloc(sizeof(*rng), GFP_KERNEL);
+	if (!rng)
+		return -ENOMEM;
+
+	if (of_address_to_resource(dn, 0, &res)) {
+		kfree(rng);
+		return -ENXIO;
+	}
+
+	rng->regs_real = (void __iomem *)res.start;
+
+	rng->regs = of_iomap(dn, 0);
+	if (!rng->regs) {
+		kfree(rng);
+		return -ENXIO;
+	}
+
+	val = in_be64(rng->regs);
+	rng->mask = val;
+
+	rng_init_per_cpu(rng, dn);
+
+	ppc_md.get_random_seed = pnv_get_random_long;
+
+	return 0;
+}
+
+static int __init pnv_get_random_long_early(unsigned long *v)
+{
+	struct device_node *dn;
+
+	if (!slab_is_available())
+		return 0;
+
+	if (cmpxchg(&ppc_md.get_random_seed, pnv_get_random_long_early,
+		    NULL) != pnv_get_random_long_early)
+		return 0;
+
+	for_each_compatible_node(dn, NULL, "ibm,power-rng")
+		rng_create(dn);
+
+	if (!ppc_md.get_random_seed)
+		return 0;
+	return ppc_md.get_random_seed(v);
+}
+
+void __init pnv_rng_init(void)
+{
+	struct device_node *dn;
+
+	/* Prefer darn over the rest. */
+	if (!initialise_darn())
+		return;
+
+	dn = of_find_compatible_node(NULL, NULL, "ibm,power-rng");
+	if (dn)
+		ppc_md.get_random_seed = pnv_get_random_long_early;
+
+	of_node_put(dn);
+}
+
+static int __init pnv_rng_late_init(void)
+{
+	struct device_node *dn;
+	unsigned long v;
+
+	/* In case it wasn't called during init for some other reason. */
+	if (ppc_md.get_random_seed == pnv_get_random_long_early)
+		pnv_get_random_long_early(&v);
+
+	if (ppc_md.get_random_seed == pnv_get_random_long) {
+		for_each_compatible_node(dn, NULL, "ibm,power-rng")
+			of_platform_device_create(dn, NULL, NULL);
+	}
+
+	return 0;
+}
+machine_subsys_initcall(powernv, pnv_rng_late_init);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
new file mode 100644
index 0000000000..4dbb47ddbd
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -0,0 +1,587 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV setup code.
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+#undef DEBUG
+
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/tty.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_buf.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/interrupt.h>
+#include <linux/bug.h>
+#include <linux/pci.h>
+#include <linux/cpufreq.h>
+#include <linux/memblock.h>
+
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/opal.h>
+#include <asm/kexec.h>
+#include <asm/smp.h>
+#include <asm/tm.h>
+#include <asm/setup.h>
+#include <asm/security_features.h>
+
+#include "powernv.h"
+
+
+static bool __init fw_feature_is(const char *state, const char *name,
+			  struct device_node *fw_features)
+{
+	struct device_node *np;
+	bool rc = false;
+
+	np = of_get_child_by_name(fw_features, name);
+	if (np) {
+		rc = of_property_read_bool(np, state);
+		of_node_put(np);
+	}
+
+	return rc;
+}
+
+static void __init init_fw_feat_flags(struct device_node *np)
+{
+	if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np))
+		security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
+
+	if (fw_feature_is("enabled", "fw-bcctrl-serialized", np))
+		security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
+
+	if (fw_feature_is("enabled", "inst-l1d-flush-ori30,30,0", np))
+		security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
+
+	if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np))
+		security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
+
+	if (fw_feature_is("enabled", "fw-l1d-thread-split", np))
+		security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
+
+	if (fw_feature_is("enabled", "fw-count-cache-disabled", np))
+		security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
+
+	if (fw_feature_is("enabled", "fw-count-cache-flush-bcctr2,0,0", np))
+		security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
+
+	if (fw_feature_is("enabled", "needs-count-cache-flush-on-context-switch", np))
+		security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
+
+	/*
+	 * The features below are enabled by default, so we instead look to see
+	 * if firmware has *disabled* them, and clear them if so.
+	 */
+	if (fw_feature_is("disabled", "speculation-policy-favor-security", np))
+		security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
+
+	if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np))
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
+
+	if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np))
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
+
+	if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np))
+		security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
+
+	if (fw_feature_is("enabled", "no-need-l1d-flush-msr-pr-1-to-0", np))
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
+
+	if (fw_feature_is("enabled", "no-need-l1d-flush-kernel-on-user-access", np))
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
+
+	if (fw_feature_is("enabled", "no-need-store-drain-on-priv-state-switch", np))
+		security_ftr_clear(SEC_FTR_STF_BARRIER);
+}
+
+static void __init pnv_setup_security_mitigations(void)
+{
+	struct device_node *np, *fw_features;
+	enum l1d_flush_type type;
+	bool enable;
+
+	/* Default to fallback in case fw-features are not available */
+	type = L1D_FLUSH_FALLBACK;
+
+	np = of_find_node_by_name(NULL, "ibm,opal");
+	fw_features = of_get_child_by_name(np, "fw-features");
+	of_node_put(np);
+
+	if (fw_features) {
+		init_fw_feat_flags(fw_features);
+		of_node_put(fw_features);
+
+		if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
+			type = L1D_FLUSH_MTTRIG;
+
+		if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
+			type = L1D_FLUSH_ORI;
+	}
+
+	/*
+	 * The issues addressed by the entry and uaccess flush don't affect P7
+	 * or P8, so on bare metal disable them explicitly in case firmware does
+	 * not include the features to disable them. POWER9 and newer processors
+	 * should have the appropriate firmware flags.
+	 */
+	if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p) ||
+	    pvr_version_is(PVR_POWER8E) || pvr_version_is(PVR_POWER8NVL) ||
+	    pvr_version_is(PVR_POWER8)) {
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
+	}
+
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
+		 (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)   || \
+		  security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
+
+	setup_rfi_flush(type, enable);
+	setup_count_cache_flush();
+
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
+	setup_entry_flush(enable);
+
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
+	setup_uaccess_flush(enable);
+
+	setup_stf_barrier();
+}
+
+static void __init pnv_check_guarded_cores(void)
+{
+	struct device_node *dn;
+	int bad_count = 0;
+
+	for_each_node_by_type(dn, "cpu") {
+		if (of_property_match_string(dn, "status", "bad") >= 0)
+			bad_count++;
+	}
+
+	if (bad_count) {
+		printk("  _     _______________\n");
+		pr_cont(" | |   /               \\\n");
+		pr_cont(" | |   |    WARNING!   |\n");
+		pr_cont(" | |   |               |\n");
+		pr_cont(" | |   | It looks like |\n");
+		pr_cont(" |_|   |  you have %*d |\n", 3, bad_count);
+		pr_cont("  _    | guarded cores |\n");
+		pr_cont(" (_)   \\_______________/\n");
+	}
+}
+
+static void __init pnv_setup_arch(void)
+{
+	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
+
+	pnv_setup_security_mitigations();
+
+	/* Initialize SMP */
+	pnv_smp_init();
+
+	/* Setup RTC and NVRAM callbacks */
+	if (firmware_has_feature(FW_FEATURE_OPAL))
+		opal_nvram_init();
+
+	/* Enable NAP mode */
+	powersave_nap = 1;
+
+	pnv_check_guarded_cores();
+
+	/* XXX PMCS */
+
+	pnv_rng_init();
+}
+
+static void __init pnv_add_hw_description(void)
+{
+	struct device_node *dn;
+	const char *s;
+
+	dn = of_find_node_by_path("/ibm,opal/firmware");
+	if (!dn)
+		return;
+
+	if (of_property_read_string(dn, "version", &s) == 0 ||
+	    of_property_read_string(dn, "git-id", &s) == 0)
+		seq_buf_printf(&ppc_hw_desc, "opal:%s ", s);
+
+	if (of_property_read_string(dn, "mi-version", &s) == 0)
+		seq_buf_printf(&ppc_hw_desc, "mi:%s ", s);
+
+	of_node_put(dn);
+}
+
+static void __init pnv_init(void)
+{
+	pnv_add_hw_description();
+
+	/*
+	 * Initialize the LPC bus now so that legacy serial
+	 * ports can be found on it
+	 */
+	opal_lpc_init();
+
+#ifdef CONFIG_HVC_OPAL
+	if (firmware_has_feature(FW_FEATURE_OPAL))
+		hvc_opal_init_early();
+	else
+#endif
+		add_preferred_console("hvc", 0, NULL);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	if (!radix_enabled()) {
+		size_t size = sizeof(struct slb_entry) * mmu_slb_size;
+		int i;
+
+		/* Allocate per cpu area to save old slb contents during MCE */
+		for_each_possible_cpu(i) {
+			paca_ptrs[i]->mce_faulty_slbs =
+					memblock_alloc_node(size,
+						__alignof__(struct slb_entry),
+						cpu_to_node(i));
+		}
+	}
+#endif
+}
+
+static void __init pnv_init_IRQ(void)
+{
+	/* Try using a XIVE if available, otherwise use a XICS */
+	if (!xive_native_init())
+		xics_init();
+
+	WARN_ON(!ppc_md.get_irq);
+}
+
+static void pnv_show_cpuinfo(struct seq_file *m)
+{
+	struct device_node *root;
+	const char *model = "";
+
+	root = of_find_node_by_path("/");
+	if (root)
+		model = of_get_property(root, "model", NULL);
+	seq_printf(m, "machine\t\t: PowerNV %s\n", model);
+	if (firmware_has_feature(FW_FEATURE_OPAL))
+		seq_printf(m, "firmware\t: OPAL\n");
+	else
+		seq_printf(m, "firmware\t: BML\n");
+	of_node_put(root);
+	if (radix_enabled())
+		seq_printf(m, "MMU\t\t: Radix\n");
+	else
+		seq_printf(m, "MMU\t\t: Hash\n");
+}
+
+static void pnv_prepare_going_down(void)
+{
+	/*
+	 * Disable all notifiers from OPAL, we can't
+	 * service interrupts anymore anyway
+	 */
+	opal_event_shutdown();
+
+	/* Print flash update message if one is scheduled. */
+	opal_flash_update_print_message();
+
+	smp_send_stop();
+
+	hard_irq_disable();
+}
+
+static void  __noreturn pnv_restart(char *cmd)
+{
+	long rc;
+
+	pnv_prepare_going_down();
+
+	do {
+		if (!cmd || !strlen(cmd))
+			rc = opal_cec_reboot();
+		else if (strcmp(cmd, "full") == 0)
+			rc = opal_cec_reboot2(OPAL_REBOOT_FULL_IPL, NULL);
+		else if (strcmp(cmd, "mpipl") == 0)
+			rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, NULL);
+		else if (strcmp(cmd, "error") == 0)
+			rc = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, NULL);
+		else if (strcmp(cmd, "fast") == 0)
+			rc = opal_cec_reboot2(OPAL_REBOOT_FAST, NULL);
+		else
+			rc = OPAL_UNSUPPORTED;
+
+		if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+			/* Opal is busy wait for some time and retry */
+			opal_poll_events(NULL);
+			mdelay(10);
+
+		} else	if (cmd && rc) {
+			/* Unknown error while issuing reboot */
+			if (rc == OPAL_UNSUPPORTED)
+				pr_err("Unsupported '%s' reboot.\n", cmd);
+			else
+				pr_err("Unable to issue '%s' reboot. Err=%ld\n",
+				       cmd, rc);
+			pr_info("Forcing a cec-reboot\n");
+			cmd = NULL;
+			rc = OPAL_BUSY;
+
+		} else if (rc != OPAL_SUCCESS) {
+			/* Unknown error while issuing cec-reboot */
+			pr_err("Unable to reboot. Err=%ld\n", rc);
+		}
+
+	} while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT);
+
+	for (;;)
+		opal_poll_events(NULL);
+}
+
+static void __noreturn pnv_power_off(void)
+{
+	long rc = OPAL_BUSY;
+
+	pnv_prepare_going_down();
+
+	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+		rc = opal_cec_power_down(0);
+		if (rc == OPAL_BUSY_EVENT)
+			opal_poll_events(NULL);
+		else
+			mdelay(10);
+	}
+	for (;;)
+		opal_poll_events(NULL);
+}
+
+static void __noreturn pnv_halt(void)
+{
+	pnv_power_off();
+}
+
+static void pnv_progress(char *s, unsigned short hex)
+{
+}
+
+static void pnv_shutdown(void)
+{
+	/* Let the PCI code clear up IODA tables */
+	pnv_pci_shutdown();
+
+	/*
+	 * Stop OPAL activity: Unregister all OPAL interrupts so they
+	 * don't fire up while we kexec and make sure all potentially
+	 * DMA'ing ops are complete (such as dump retrieval).
+	 */
+	opal_shutdown();
+}
+
+#ifdef CONFIG_KEXEC_CORE
+static void pnv_kexec_wait_secondaries_down(void)
+{
+	int my_cpu, i, notified = -1;
+
+	my_cpu = get_cpu();
+
+	for_each_online_cpu(i) {
+		uint8_t status;
+		int64_t rc, timeout = 1000;
+
+		if (i == my_cpu)
+			continue;
+
+		for (;;) {
+			rc = opal_query_cpu_status(get_hard_smp_processor_id(i),
+						   &status);
+			if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED)
+				break;
+			barrier();
+			if (i != notified) {
+				printk(KERN_INFO "kexec: waiting for cpu %d "
+				       "(physical %d) to enter OPAL\n",
+				       i, paca_ptrs[i]->hw_cpu_id);
+				notified = i;
+			}
+
+			/*
+			 * On crash secondaries might be unreachable or hung,
+			 * so timeout if we've waited too long
+			 * */
+			mdelay(1);
+			if (timeout-- == 0) {
+				printk(KERN_ERR "kexec: timed out waiting for "
+				       "cpu %d (physical %d) to enter OPAL\n",
+				       i, paca_ptrs[i]->hw_cpu_id);
+				break;
+			}
+		}
+	}
+}
+
+static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
+{
+	u64 reinit_flags;
+
+	if (xive_enabled())
+		xive_teardown_cpu();
+	else
+		xics_kexec_teardown_cpu(secondary);
+
+	/* On OPAL, we return all CPUs to firmware */
+	if (!firmware_has_feature(FW_FEATURE_OPAL))
+		return;
+
+	if (secondary) {
+		/* Return secondary CPUs to firmware on OPAL v3 */
+		mb();
+		get_paca()->kexec_state = KEXEC_STATE_REAL_MODE;
+		mb();
+
+		/* Return the CPU to OPAL */
+		opal_return_cpu();
+	} else {
+		/* Primary waits for the secondaries to have reached OPAL */
+		pnv_kexec_wait_secondaries_down();
+
+		/* Switch XIVE back to emulation mode */
+		if (xive_enabled())
+			xive_shutdown();
+
+		/*
+		 * We might be running as little-endian - now that interrupts
+		 * are disabled, reset the HILE bit to big-endian so we don't
+		 * take interrupts in the wrong endian later
+		 *
+		 * We reinit to enable both radix and hash on P9 to ensure
+		 * the mode used by the next kernel is always supported.
+		 */
+		reinit_flags = OPAL_REINIT_CPUS_HILE_BE;
+		if (cpu_has_feature(CPU_FTR_ARCH_300))
+			reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX |
+				OPAL_REINIT_CPUS_MMU_HASH;
+		opal_reinit_cpus(reinit_flags);
+	}
+}
+#endif /* CONFIG_KEXEC_CORE */
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static unsigned long pnv_memory_block_size(void)
+{
+	return memory_block_size;
+}
+#endif
+
+static void __init pnv_setup_machdep_opal(void)
+{
+	ppc_md.get_boot_time = opal_get_boot_time;
+	ppc_md.restart = pnv_restart;
+	pm_power_off = pnv_power_off;
+	ppc_md.halt = pnv_halt;
+	/* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */
+	ppc_md.machine_check_exception = opal_machine_check;
+	ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
+	if (opal_check_token(OPAL_HANDLE_HMI2))
+		ppc_md.hmi_exception_early = opal_hmi_exception_early2;
+	else
+		ppc_md.hmi_exception_early = opal_hmi_exception_early;
+	ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
+}
+
+static int __init pnv_probe(void)
+{
+	if (firmware_has_feature(FW_FEATURE_OPAL))
+		pnv_setup_machdep_opal();
+
+	pr_debug("PowerNV detected !\n");
+
+	pnv_init();
+
+	return 1;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void __init pnv_tm_init(void)
+{
+	if (!firmware_has_feature(FW_FEATURE_OPAL) ||
+	    !pvr_version_is(PVR_POWER9) ||
+	    early_cpu_has_feature(CPU_FTR_TM))
+		return;
+
+	if (opal_reinit_cpus(OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) != OPAL_SUCCESS)
+		return;
+
+	pr_info("Enabling TM (Transactional Memory) with Suspend Disabled\n");
+	cur_cpu_spec->cpu_features |= CPU_FTR_TM;
+	/* Make sure "normal" HTM is off (it should be) */
+	cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_HTM;
+	/* Turn on no suspend mode, and HTM no SC */
+	cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NO_SUSPEND | \
+					    PPC_FEATURE2_HTM_NOSC;
+	tm_suspend_disabled = true;
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+/*
+ * Returns the cpu frequency for 'cpu' in Hz. This is used by
+ * /proc/cpuinfo
+ */
+static unsigned long pnv_get_proc_freq(unsigned int cpu)
+{
+	unsigned long ret_freq;
+
+	ret_freq = cpufreq_get(cpu) * 1000ul;
+
+	/*
+	 * If the backend cpufreq driver does not exist,
+         * then fallback to old way of reporting the clockrate.
+	 */
+	if (!ret_freq)
+		ret_freq = ppc_proc_freq;
+	return ret_freq;
+}
+
+static long pnv_machine_check_early(struct pt_regs *regs)
+{
+	long handled = 0;
+
+	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
+		handled = cur_cpu_spec->machine_check_early(regs);
+
+	return handled;
+}
+
+define_machine(powernv) {
+	.name			= "PowerNV",
+	.compatible		= "ibm,powernv",
+	.probe			= pnv_probe,
+	.setup_arch		= pnv_setup_arch,
+	.init_IRQ		= pnv_init_IRQ,
+	.show_cpuinfo		= pnv_show_cpuinfo,
+	.get_proc_freq          = pnv_get_proc_freq,
+	.discover_phbs		= pnv_pci_init,
+	.progress		= pnv_progress,
+	.machine_shutdown	= pnv_shutdown,
+	.power_save             = NULL,
+	.machine_check_early	= pnv_machine_check_early,
+#ifdef CONFIG_KEXEC_CORE
+	.kexec_cpu_down		= pnv_kexec_cpu_down,
+#endif
+#ifdef CONFIG_MEMORY_HOTPLUG
+	.memory_block_size	= pnv_memory_block_size,
+#endif
+};
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
new file mode 100644
index 0000000000..9e1a25398f
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -0,0 +1,441 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SMP support for PowerNV machines.
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sched/hotplug.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/cpu.h>
+
+#include <asm/irq.h>
+#include <asm/smp.h>
+#include <asm/paca.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/vdso_datapage.h>
+#include <asm/cputhreads.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/opal.h>
+#include <asm/runlatch.h>
+#include <asm/code-patching.h>
+#include <asm/dbell.h>
+#include <asm/kvm_ppc.h>
+#include <asm/ppc-opcode.h>
+#include <asm/cpuidle.h>
+#include <asm/kexec.h>
+#include <asm/reg.h>
+#include <asm/powernv.h>
+
+#include "powernv.h"
+
+#ifdef DEBUG
+#include <asm/udbg.h>
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...) do { } while (0)
+#endif
+
+static void pnv_smp_setup_cpu(int cpu)
+{
+	/*
+	 * P9 workaround for CI vector load (see traps.c),
+	 * enable the corresponding HMI interrupt
+	 */
+	if (pvr_version_is(PVR_POWER9))
+		mtspr(SPRN_HMEER, mfspr(SPRN_HMEER) | PPC_BIT(17));
+
+	if (xive_enabled())
+		xive_smp_setup_cpu();
+	else if (cpu != boot_cpuid)
+		xics_setup_cpu();
+}
+
+static int pnv_smp_kick_cpu(int nr)
+{
+	unsigned int pcpu;
+	unsigned long start_here =
+			__pa(ppc_function_entry(generic_secondary_smp_init));
+	long rc;
+	uint8_t status;
+
+	if (nr < 0 || nr >= nr_cpu_ids)
+		return -EINVAL;
+
+	pcpu = get_hard_smp_processor_id(nr);
+	/*
+	 * If we already started or OPAL is not supported, we just
+	 * kick the CPU via the PACA
+	 */
+	if (paca_ptrs[nr]->cpu_start || !firmware_has_feature(FW_FEATURE_OPAL))
+		goto kick;
+
+	/*
+	 * At this point, the CPU can either be spinning on the way in
+	 * from kexec or be inside OPAL waiting to be started for the
+	 * first time. OPAL v3 allows us to query OPAL to know if it
+	 * has the CPUs, so we do that
+	 */
+	rc = opal_query_cpu_status(pcpu, &status);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("OPAL Error %ld querying CPU %d state\n", rc, nr);
+		return -ENODEV;
+	}
+
+	/*
+	 * Already started, just kick it, probably coming from
+	 * kexec and spinning
+	 */
+	if (status == OPAL_THREAD_STARTED)
+		goto kick;
+
+	/*
+	 * Available/inactive, let's kick it
+	 */
+	if (status == OPAL_THREAD_INACTIVE) {
+		pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", nr, pcpu);
+		rc = opal_start_cpu(pcpu, start_here);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("OPAL Error %ld starting CPU %d\n", rc, nr);
+			return -ENODEV;
+		}
+	} else {
+		/*
+		 * An unavailable CPU (or any other unknown status)
+		 * shouldn't be started. It should also
+		 * not be in the possible map but currently it can
+		 * happen
+		 */
+		pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable"
+			 " (status %d)...\n", nr, pcpu, status);
+		return -ENODEV;
+	}
+
+kick:
+	return smp_generic_kick_cpu(nr);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static int pnv_smp_cpu_disable(void)
+{
+	int cpu = smp_processor_id();
+
+	/* This is identical to pSeries... might consolidate by
+	 * moving migrate_irqs_away to a ppc_md with default to
+	 * the generic fixup_irqs. --BenH.
+	 */
+	set_cpu_online(cpu, false);
+	vdso_data->processorCount--;
+	if (cpu == boot_cpuid)
+		boot_cpuid = cpumask_any(cpu_online_mask);
+	if (xive_enabled())
+		xive_smp_disable_cpu();
+	else
+		xics_migrate_irqs_away();
+
+	cleanup_cpu_mmu_context();
+
+	return 0;
+}
+
+static void pnv_flush_interrupts(void)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		if (xive_enabled())
+			xive_flush_interrupt();
+		else
+			icp_opal_flush_interrupt();
+	} else {
+		icp_native_flush_interrupt();
+	}
+}
+
+static void pnv_cpu_offline_self(void)
+{
+	unsigned long srr1, unexpected_mask, wmask;
+	unsigned int cpu;
+	u64 lpcr_val;
+
+	/* Standard hot unplug procedure */
+
+	idle_task_exit();
+	cpu = smp_processor_id();
+	DBG("CPU%d offline\n", cpu);
+	generic_set_cpu_dead(cpu);
+	smp_wmb();
+
+	wmask = SRR1_WAKEMASK;
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		wmask = SRR1_WAKEMASK_P8;
+
+	/*
+	 * This turns the irq soft-disabled state we're called with, into a
+	 * hard-disabled state with pending irq_happened interrupts cleared.
+	 *
+	 * PACA_IRQ_DEC   - Decrementer should be ignored.
+	 * PACA_IRQ_HMI   - Can be ignored, processing is done in real mode.
+	 * PACA_IRQ_DBELL, EE, PMI - Unexpected.
+	 */
+	hard_irq_disable();
+	if (generic_check_cpu_restart(cpu))
+		goto out;
+
+	unexpected_mask = ~(PACA_IRQ_DEC | PACA_IRQ_HMI | PACA_IRQ_HARD_DIS);
+	if (local_paca->irq_happened & unexpected_mask) {
+		if (local_paca->irq_happened & PACA_IRQ_EE)
+			pnv_flush_interrupts();
+		DBG("CPU%d Unexpected exit while offline irq_happened=%lx!\n",
+				cpu, local_paca->irq_happened);
+	}
+	local_paca->irq_happened = PACA_IRQ_HARD_DIS;
+
+	/*
+	 * We don't want to take decrementer interrupts while we are
+	 * offline, so clear LPCR:PECE1. We keep PECE2 (and
+	 * LPCR_PECE_HVEE on P9) enabled so as to let IPIs in.
+	 *
+	 * If the CPU gets woken up by a special wakeup, ensure that
+	 * the SLW engine sets LPCR with decrementer bit cleared, else
+	 * the CPU will come back to the kernel due to a spurious
+	 * wakeup.
+	 */
+	lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
+	pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
+
+	while (!generic_check_cpu_restart(cpu)) {
+		/*
+		 * Clear IPI flag, since we don't handle IPIs while
+		 * offline, except for those when changing micro-threading
+		 * mode, which are handled explicitly below, and those
+		 * for coming online, which are handled via
+		 * generic_check_cpu_restart() calls.
+		 */
+		kvmppc_clear_host_ipi(cpu);
+
+		srr1 = pnv_cpu_offline(cpu);
+
+		WARN_ON_ONCE(!irqs_disabled());
+		WARN_ON(lazy_irq_pending());
+
+		/*
+		 * If the SRR1 value indicates that we woke up due to
+		 * an external interrupt, then clear the interrupt.
+		 * We clear the interrupt before checking for the
+		 * reason, so as to avoid a race where we wake up for
+		 * some other reason, find nothing and clear the interrupt
+		 * just as some other cpu is sending us an interrupt.
+		 * If we returned from power7_nap as a result of
+		 * having finished executing in a KVM guest, then srr1
+		 * contains 0.
+		 */
+		if (((srr1 & wmask) == SRR1_WAKEEE) ||
+		    ((srr1 & wmask) == SRR1_WAKEHVI)) {
+			pnv_flush_interrupts();
+		} else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
+			unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+			asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
+		} else if ((srr1 & wmask) == SRR1_WAKERESET) {
+			irq_set_pending_from_srr1(srr1);
+			/* Does not return */
+		}
+
+		smp_mb();
+
+		/*
+		 * For kdump kernels, we process the ipi and jump to
+		 * crash_ipi_callback
+		 */
+		if (kdump_in_progress()) {
+			/*
+			 * If we got to this point, we've not used
+			 * NMI's, otherwise we would have gone
+			 * via the SRR1_WAKERESET path. We are
+			 * using regular IPI's for waking up offline
+			 * threads.
+			 */
+			struct pt_regs regs;
+
+			ppc_save_regs(&regs);
+			crash_ipi_callback(&regs);
+			/* Does not return */
+		}
+
+		if (cpu_core_split_required())
+			continue;
+
+		if (srr1 && !generic_check_cpu_restart(cpu))
+			DBG("CPU%d Unexpected exit while offline srr1=%lx!\n",
+					cpu, srr1);
+
+	}
+
+	/*
+	 * Re-enable decrementer interrupts in LPCR.
+	 *
+	 * Further, we want stop states to be woken up by decrementer
+	 * for non-hotplug cases. So program the LPCR via stop api as
+	 * well.
+	 */
+	lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
+	pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
+out:
+	DBG("CPU%d coming online...\n", cpu);
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int pnv_cpu_bootable(unsigned int nr)
+{
+	/*
+	 * Starting with POWER8, the subcore logic relies on all threads of a
+	 * core being booted so that they can participate in split mode
+	 * switches. So on those machines we ignore the smt_enabled_at_boot
+	 * setting (smt-enabled on the kernel command line).
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		return 1;
+
+	return smp_generic_cpu_bootable(nr);
+}
+
+static int pnv_smp_prepare_cpu(int cpu)
+{
+	if (xive_enabled())
+		return xive_smp_prepare_cpu(cpu);
+	return 0;
+}
+
+/* Cause IPI as setup by the interrupt controller (xics or xive) */
+static void (*ic_cause_ipi)(int cpu);
+
+static void pnv_cause_ipi(int cpu)
+{
+	if (doorbell_try_core_ipi(cpu))
+		return;
+
+	ic_cause_ipi(cpu);
+}
+
+static void __init pnv_smp_probe(void)
+{
+	if (xive_enabled())
+		xive_smp_probe();
+	else
+		xics_smp_probe();
+
+	if (cpu_has_feature(CPU_FTR_DBELL)) {
+		ic_cause_ipi = smp_ops->cause_ipi;
+		WARN_ON(!ic_cause_ipi);
+
+		if (cpu_has_feature(CPU_FTR_ARCH_300))
+			smp_ops->cause_ipi = doorbell_global_ipi;
+		else
+			smp_ops->cause_ipi = pnv_cause_ipi;
+	}
+}
+
+noinstr static int pnv_system_reset_exception(struct pt_regs *regs)
+{
+	if (smp_handle_nmi_ipi(regs))
+		return 1;
+	return 0;
+}
+
+static int pnv_cause_nmi_ipi(int cpu)
+{
+	int64_t rc;
+
+	if (cpu >= 0) {
+		int h = get_hard_smp_processor_id(cpu);
+
+		if (opal_check_token(OPAL_QUIESCE))
+			opal_quiesce(QUIESCE_HOLD, h);
+
+		rc = opal_signal_system_reset(h);
+
+		if (opal_check_token(OPAL_QUIESCE))
+			opal_quiesce(QUIESCE_RESUME, h);
+
+		if (rc != OPAL_SUCCESS)
+			return 0;
+		return 1;
+
+	} else if (cpu == NMI_IPI_ALL_OTHERS) {
+		bool success = true;
+		int c;
+
+		if (opal_check_token(OPAL_QUIESCE))
+			opal_quiesce(QUIESCE_HOLD, -1);
+
+		/*
+		 * We do not use broadcasts (yet), because it's not clear
+		 * exactly what semantics Linux wants or the firmware should
+		 * provide.
+		 */
+		for_each_online_cpu(c) {
+			if (c == smp_processor_id())
+				continue;
+
+			rc = opal_signal_system_reset(
+						get_hard_smp_processor_id(c));
+			if (rc != OPAL_SUCCESS)
+				success = false;
+		}
+
+		if (opal_check_token(OPAL_QUIESCE))
+			opal_quiesce(QUIESCE_RESUME, -1);
+
+		if (success)
+			return 1;
+
+		/*
+		 * Caller will fall back to doorbells, which may pick
+		 * up the remainders.
+		 */
+	}
+
+	return 0;
+}
+
+static struct smp_ops_t pnv_smp_ops = {
+	.message_pass	= NULL, /* Use smp_muxed_ipi_message_pass */
+	.cause_ipi	= NULL,	/* Filled at runtime by pnv_smp_probe() */
+	.cause_nmi_ipi	= NULL,
+	.probe		= pnv_smp_probe,
+	.prepare_cpu	= pnv_smp_prepare_cpu,
+	.kick_cpu	= pnv_smp_kick_cpu,
+	.setup_cpu	= pnv_smp_setup_cpu,
+	.cpu_bootable	= pnv_cpu_bootable,
+#ifdef CONFIG_HOTPLUG_CPU
+	.cpu_disable	= pnv_smp_cpu_disable,
+	.cpu_die	= generic_cpu_die,
+	.cpu_offline_self = pnv_cpu_offline_self,
+#endif /* CONFIG_HOTPLUG_CPU */
+};
+
+/* This is called very early during platform setup_arch */
+void __init pnv_smp_init(void)
+{
+	if (opal_check_token(OPAL_SIGNAL_SYSTEM_RESET)) {
+		ppc_md.system_reset_exception = pnv_system_reset_exception;
+		pnv_smp_ops.cause_nmi_ipi = pnv_cause_nmi_ipi;
+	}
+	smp_ops = &pnv_smp_ops;
+
+#ifdef CONFIG_HOTPLUG_CPU
+#ifdef CONFIG_KEXEC_CORE
+	crash_wake_offline = 1;
+#endif
+#endif
+}
diff --git a/arch/powerpc/platforms/powernv/subcore-asm.S b/arch/powerpc/platforms/powernv/subcore-asm.S
new file mode 100644
index 0000000000..e038f67617
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/subcore-asm.S
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation.
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/ppc_asm.h>
+#include <asm/reg.h>
+
+#include "subcore.h"
+
+
+_GLOBAL(split_core_secondary_loop)
+	/*
+	 * r3 = u8 *state, used throughout the routine
+	 * r4 = temp
+	 * r5 = temp
+	 * ..
+	 * r12 = MSR
+	 */
+	mfmsr	r12
+
+	/* Disable interrupts so SRR0/1 don't get trashed */
+	li	r4,0
+	ori	r4,r4,MSR_EE|MSR_SE|MSR_BE|MSR_RI
+	andc	r4,r12,r4
+	sync
+	mtmsrd	r4
+
+	/* Switch to real mode and leave interrupts off */
+	li	r5, MSR_IR|MSR_DR
+	andc	r5, r4, r5
+
+	LOAD_REG_ADDR(r4, real_mode)
+
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r5
+	rfid
+	b	.	/* prevent speculative execution */
+
+real_mode:
+	/* Grab values from unsplit SPRs */
+	mfspr	r6,  SPRN_LDBAR
+	mfspr	r7,  SPRN_PMMAR
+	mfspr	r8,  SPRN_PMCR
+	mfspr	r9,  SPRN_RPR
+	mfspr	r10, SPRN_SDR1
+
+	/* Order reading the SPRs vs telling the primary we are ready to split */
+	sync
+
+	/* Tell thread 0 we are in real mode */
+	li	r4, SYNC_STEP_REAL_MODE
+	stb	r4, 0(r3)
+
+	li	r5, (HID0_POWER8_4LPARMODE | HID0_POWER8_2LPARMODE)@highest
+	sldi	r5, r5, 48
+
+	/* Loop until we see the split happen in HID0 */
+1:	mfspr	r4, SPRN_HID0
+	and.	r4, r4, r5
+	beq	1b
+
+	/*
+	 * We only need to initialise the below regs once for each subcore,
+	 * but it's simpler and harmless to do it on each thread.
+	 */
+
+	/* Make sure various SPRS have sane values */
+	li	r4, 0
+	mtspr	SPRN_LPID, r4
+	mtspr	SPRN_PCR, r4
+	mtspr	SPRN_HDEC, r4
+
+	/* Restore SPR values now we are split */
+	mtspr	SPRN_LDBAR, r6
+	mtspr	SPRN_PMMAR, r7
+	mtspr	SPRN_PMCR, r8
+	mtspr	SPRN_RPR, r9
+	mtspr	SPRN_SDR1, r10
+
+	LOAD_REG_ADDR(r5, virtual_mode)
+
+	/* Get out of real mode */
+	mtspr	SPRN_SRR0,r5
+	mtspr	SPRN_SRR1,r12
+	rfid
+	b	.	/* prevent speculative execution */
+
+virtual_mode:
+	blr
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
new file mode 100644
index 0000000000..191424468f
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -0,0 +1,449 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation.
+ */
+
+#define pr_fmt(fmt)	"powernv: " fmt
+
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/gfp.h>
+#include <linux/smp.h>
+#include <linux/stop_machine.h>
+
+#include <asm/cputhreads.h>
+#include <asm/cpuidle.h>
+#include <asm/kvm_ppc.h>
+#include <asm/machdep.h>
+#include <asm/opal.h>
+#include <asm/smp.h>
+
+#include <trace/events/ipi.h>
+
+#include "subcore.h"
+#include "powernv.h"
+
+
+/*
+ * Split/unsplit procedure:
+ *
+ * A core can be in one of three states, unsplit, 2-way split, and 4-way split.
+ *
+ * The mapping to subcores_per_core is simple:
+ *
+ *  State       | subcores_per_core
+ *  ------------|------------------
+ *  Unsplit     |        1
+ *  2-way split |        2
+ *  4-way split |        4
+ *
+ * The core is split along thread boundaries, the mapping between subcores and
+ * threads is as follows:
+ *
+ *  Unsplit:
+ *          ----------------------------
+ *  Subcore |            0             |
+ *          ----------------------------
+ *  Thread  |  0  1  2  3  4  5  6  7  |
+ *          ----------------------------
+ *
+ *  2-way split:
+ *          -------------------------------------
+ *  Subcore |        0        |        1        |
+ *          -------------------------------------
+ *  Thread  |  0   1   2   3  |  4   5   6   7  |
+ *          -------------------------------------
+ *
+ *  4-way split:
+ *          -----------------------------------------
+ *  Subcore |    0    |    1    |    2    |    3    |
+ *          -----------------------------------------
+ *  Thread  |  0   1  |  2   3  |  4   5  |  6   7  |
+ *          -----------------------------------------
+ *
+ *
+ * Transitions
+ * -----------
+ *
+ * It is not possible to transition between either of the split states, the
+ * core must first be unsplit. The legal transitions are:
+ *
+ *  -----------          ---------------
+ *  |         |  <---->  | 2-way split |
+ *  |         |          ---------------
+ *  | Unsplit |
+ *  |         |          ---------------
+ *  |         |  <---->  | 4-way split |
+ *  -----------          ---------------
+ *
+ * Unsplitting
+ * -----------
+ *
+ * Unsplitting is the simpler procedure. It requires thread 0 to request the
+ * unsplit while all other threads NAP.
+ *
+ * Thread 0 clears HID0_POWER8_DYNLPARDIS (Dynamic LPAR Disable). This tells
+ * the hardware that if all threads except 0 are napping, the hardware should
+ * unsplit the core.
+ *
+ * Non-zero threads are sent to a NAP loop, they don't exit the loop until they
+ * see the core unsplit.
+ *
+ * Core 0 spins waiting for the hardware to see all the other threads napping
+ * and perform the unsplit.
+ *
+ * Once thread 0 sees the unsplit, it IPIs the secondary threads to wake them
+ * out of NAP. They will then see the core unsplit and exit the NAP loop.
+ *
+ * Splitting
+ * ---------
+ *
+ * The basic splitting procedure is fairly straight forward. However it is
+ * complicated by the fact that after the split occurs, the newly created
+ * subcores are not in a fully initialised state.
+ *
+ * Most notably the subcores do not have the correct value for SDR1, which
+ * means they must not be running in virtual mode when the split occurs. The
+ * subcores have separate timebases SPRs but these are pre-synchronised by
+ * opal.
+ *
+ * To begin with secondary threads are sent to an assembly routine. There they
+ * switch to real mode, so they are immune to the uninitialised SDR1 value.
+ * Once in real mode they indicate that they are in real mode, and spin waiting
+ * to see the core split.
+ *
+ * Thread 0 waits to see that all secondaries are in real mode, and then begins
+ * the splitting procedure. It firstly sets HID0_POWER8_DYNLPARDIS, which
+ * prevents the hardware from unsplitting. Then it sets the appropriate HID bit
+ * to request the split, and spins waiting to see that the split has happened.
+ *
+ * Concurrently the secondaries will notice the split. When they do they set up
+ * their SPRs, notably SDR1, and then they can return to virtual mode and exit
+ * the procedure.
+ */
+
+/* Initialised at boot by subcore_init() */
+static int subcores_per_core;
+
+/*
+ * Used to communicate to offline cpus that we want them to pop out of the
+ * offline loop and do a split or unsplit.
+ *
+ * 0 - no split happening
+ * 1 - unsplit in progress
+ * 2 - split to 2 in progress
+ * 4 - split to 4 in progress
+ */
+static int new_split_mode;
+
+static cpumask_var_t cpu_offline_mask;
+
+struct split_state {
+	u8 step;
+	u8 master;
+};
+
+static DEFINE_PER_CPU(struct split_state, split_state);
+
+static void wait_for_sync_step(int step)
+{
+	int i, cpu = smp_processor_id();
+
+	for (i = cpu + 1; i < cpu + threads_per_core; i++)
+		while(per_cpu(split_state, i).step < step)
+			barrier();
+
+	/* Order the wait loop vs any subsequent loads/stores. */
+	mb();
+}
+
+static void update_hid_in_slw(u64 hid0)
+{
+	u64 idle_states = pnv_get_supported_cpuidle_states();
+
+	if (idle_states & OPAL_PM_WINKLE_ENABLED) {
+		/* OPAL call to patch slw with the new HID0 value */
+		u64 cpu_pir = hard_smp_processor_id();
+
+		opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0);
+	}
+}
+
+static inline void update_power8_hid0(unsigned long hid0)
+{
+	/*
+	 *  The HID0 update on Power8 should at the very least be
+	 *  preceded by a SYNC instruction followed by an ISYNC
+	 *  instruction
+	 */
+	asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0));
+}
+
+static void unsplit_core(void)
+{
+	u64 hid0, mask;
+	int i, cpu;
+
+	mask = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
+
+	cpu = smp_processor_id();
+	if (cpu_thread_in_core(cpu) != 0) {
+		while (mfspr(SPRN_HID0) & mask)
+			power7_idle_type(PNV_THREAD_NAP);
+
+		per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT;
+		return;
+	}
+
+	hid0 = mfspr(SPRN_HID0);
+	hid0 &= ~HID0_POWER8_DYNLPARDIS;
+	update_power8_hid0(hid0);
+	update_hid_in_slw(hid0);
+
+	while (mfspr(SPRN_HID0) & mask)
+		cpu_relax();
+
+	/* Wake secondaries out of NAP */
+	for (i = cpu + 1; i < cpu + threads_per_core; i++)
+		smp_send_reschedule(i);
+
+	wait_for_sync_step(SYNC_STEP_UNSPLIT);
+}
+
+static void split_core(int new_mode)
+{
+	struct {  u64 value; u64 mask; } split_parms[2] = {
+		{ HID0_POWER8_1TO2LPAR, HID0_POWER8_2LPARMODE },
+		{ HID0_POWER8_1TO4LPAR, HID0_POWER8_4LPARMODE }
+	};
+	int i, cpu;
+	u64 hid0;
+
+	/* Convert new_mode (2 or 4) into an index into our parms array */
+	i = (new_mode >> 1) - 1;
+	BUG_ON(i < 0 || i > 1);
+
+	cpu = smp_processor_id();
+	if (cpu_thread_in_core(cpu) != 0) {
+		split_core_secondary_loop(&per_cpu(split_state, cpu).step);
+		return;
+	}
+
+	wait_for_sync_step(SYNC_STEP_REAL_MODE);
+
+	/* Write new mode */
+	hid0  = mfspr(SPRN_HID0);
+	hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
+	update_power8_hid0(hid0);
+	update_hid_in_slw(hid0);
+
+	/* Wait for it to happen */
+	while (!(mfspr(SPRN_HID0) & split_parms[i].mask))
+		cpu_relax();
+}
+
+static void cpu_do_split(int new_mode)
+{
+	/*
+	 * At boot subcores_per_core will be 0, so we will always unsplit at
+	 * boot. In the usual case where the core is already unsplit it's a
+	 * nop, and this just ensures the kernel's notion of the mode is
+	 * consistent with the hardware.
+	 */
+	if (subcores_per_core != 1)
+		unsplit_core();
+
+	if (new_mode != 1)
+		split_core(new_mode);
+
+	mb();
+	per_cpu(split_state, smp_processor_id()).step = SYNC_STEP_FINISHED;
+}
+
+bool cpu_core_split_required(void)
+{
+	smp_rmb();
+
+	if (!new_split_mode)
+		return false;
+
+	cpu_do_split(new_split_mode);
+
+	return true;
+}
+
+void update_subcore_sibling_mask(void)
+{
+	int cpu;
+	/*
+	 * sibling mask for the first cpu. Left shift this by required bits
+	 * to get sibling mask for the rest of the cpus.
+	 */
+	int sibling_mask_first_cpu =  (1 << threads_per_subcore) - 1;
+
+	for_each_possible_cpu(cpu) {
+		int tid = cpu_thread_in_core(cpu);
+		int offset = (tid / threads_per_subcore) * threads_per_subcore;
+		int mask = sibling_mask_first_cpu << offset;
+
+		paca_ptrs[cpu]->subcore_sibling_mask = mask;
+
+	}
+}
+
+static int cpu_update_split_mode(void *data)
+{
+	int cpu, new_mode = *(int *)data;
+
+	if (this_cpu_ptr(&split_state)->master) {
+		new_split_mode = new_mode;
+		smp_wmb();
+
+		cpumask_andnot(cpu_offline_mask, cpu_present_mask,
+			       cpu_online_mask);
+
+		/* This should work even though the cpu is offline */
+		for_each_cpu(cpu, cpu_offline_mask)
+			smp_send_reschedule(cpu);
+	}
+
+	cpu_do_split(new_mode);
+
+	if (this_cpu_ptr(&split_state)->master) {
+		/* Wait for all cpus to finish before we touch subcores_per_core */
+		for_each_present_cpu(cpu) {
+			if (cpu >= setup_max_cpus)
+				break;
+
+			while(per_cpu(split_state, cpu).step < SYNC_STEP_FINISHED)
+				barrier();
+		}
+
+		new_split_mode = 0;
+
+		/* Make the new mode public */
+		subcores_per_core = new_mode;
+		threads_per_subcore = threads_per_core / subcores_per_core;
+		update_subcore_sibling_mask();
+
+		/* Make sure the new mode is written before we exit */
+		mb();
+	}
+
+	return 0;
+}
+
+static int set_subcores_per_core(int new_mode)
+{
+	struct split_state *state;
+	int cpu;
+
+	if (kvm_hv_mode_active()) {
+		pr_err("Unable to change split core mode while KVM active.\n");
+		return -EBUSY;
+	}
+
+	/*
+	 * We are only called at boot, or from the sysfs write. If that ever
+	 * changes we'll need a lock here.
+	 */
+	BUG_ON(new_mode < 1 || new_mode > 4 || new_mode == 3);
+
+	for_each_present_cpu(cpu) {
+		state = &per_cpu(split_state, cpu);
+		state->step = SYNC_STEP_INITIAL;
+		state->master = 0;
+	}
+
+	cpus_read_lock();
+
+	/* This cpu will update the globals before exiting stop machine */
+	this_cpu_ptr(&split_state)->master = 1;
+
+	/* Ensure state is consistent before we call the other cpus */
+	mb();
+
+	stop_machine_cpuslocked(cpu_update_split_mode, &new_mode,
+				cpu_online_mask);
+
+	cpus_read_unlock();
+
+	return 0;
+}
+
+static ssize_t __used store_subcores_per_core(struct device *dev,
+		struct device_attribute *attr, const char *buf,
+		size_t count)
+{
+	unsigned long val;
+	int rc;
+
+	/* We are serialised by the attribute lock */
+
+	rc = sscanf(buf, "%lx", &val);
+	if (rc != 1)
+		return -EINVAL;
+
+	switch (val) {
+	case 1:
+	case 2:
+	case 4:
+		if (subcores_per_core == val)
+			/* Nothing to do */
+			goto out;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	rc = set_subcores_per_core(val);
+	if (rc)
+		return rc;
+
+out:
+	return count;
+}
+
+static ssize_t show_subcores_per_core(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%x\n", subcores_per_core);
+}
+
+static DEVICE_ATTR(subcores_per_core, 0644,
+		show_subcores_per_core, store_subcores_per_core);
+
+static int subcore_init(void)
+{
+	struct device *dev_root;
+	unsigned pvr_ver;
+	int rc = 0;
+
+	pvr_ver = PVR_VER(mfspr(SPRN_PVR));
+
+	if (pvr_ver != PVR_POWER8 &&
+	    pvr_ver != PVR_POWER8E &&
+	    pvr_ver != PVR_POWER8NVL)
+		return 0;
+
+	/*
+	 * We need all threads in a core to be present to split/unsplit so
+         * continue only if max_cpus are aligned to threads_per_core.
+	 */
+	if (setup_max_cpus % threads_per_core)
+		return 0;
+
+	BUG_ON(!alloc_cpumask_var(&cpu_offline_mask, GFP_KERNEL));
+
+	set_subcores_per_core(1);
+
+	dev_root = bus_get_dev_root(&cpu_subsys);
+	if (dev_root) {
+		rc = device_create_file(dev_root, &dev_attr_subcores_per_core);
+		put_device(dev_root);
+	}
+	return rc;
+}
+machine_device_initcall(powernv, subcore_init);
diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h
new file mode 100644
index 0000000000..77feee8436
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/subcore.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ */
+
+/* These are ordered and tested with <= */
+#define SYNC_STEP_INITIAL	0
+#define SYNC_STEP_UNSPLIT	1	/* Set by secondary when it sees unsplit */
+#define SYNC_STEP_REAL_MODE	2	/* Set by secondary when in real mode  */
+#define SYNC_STEP_FINISHED	3	/* Set by secondary when split/unsplit is done */
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_SMP
+void split_core_secondary_loop(u8 *state);
+extern void update_subcore_sibling_mask(void);
+#else
+static inline void update_subcore_sibling_mask(void) { }
+#endif /* CONFIG_SMP */
+
+#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/platforms/powernv/ultravisor.c b/arch/powerpc/platforms/powernv/ultravisor.c
new file mode 100644
index 0000000000..67c8c4b2d8
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/ultravisor.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Ultravisor high level interfaces
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#include <linux/init.h>
+#include <linux/printk.h>
+#include <linux/of_fdt.h>
+#include <linux/of.h>
+
+#include <asm/ultravisor.h>
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+
+#include "powernv.h"
+
+static struct kobject *ultravisor_kobj;
+
+int __init early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
+					 int depth, void *data)
+{
+	if (!of_flat_dt_is_compatible(node, "ibm,ultravisor"))
+		return 0;
+
+	powerpc_firmware_features |= FW_FEATURE_ULTRAVISOR;
+	pr_debug("Ultravisor detected!\n");
+	return 1;
+}
+
+static struct memcons *uv_memcons;
+
+static ssize_t uv_msglog_read(struct file *file, struct kobject *kobj,
+			      struct bin_attribute *bin_attr, char *to,
+			      loff_t pos, size_t count)
+{
+	return memcons_copy(uv_memcons, to, pos, count);
+}
+
+static struct bin_attribute uv_msglog_attr = {
+	.attr = {.name = "msglog", .mode = 0400},
+	.read = uv_msglog_read
+};
+
+static int __init uv_init(void)
+{
+	struct device_node *node;
+
+	if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+		return 0;
+
+	node = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware");
+	if (!node)
+		return -ENODEV;
+
+	uv_memcons = memcons_init(node, "memcons");
+	of_node_put(node);
+	if (!uv_memcons)
+		return -ENOENT;
+
+	uv_msglog_attr.size = memcons_get_size(uv_memcons);
+
+	ultravisor_kobj = kobject_create_and_add("ultravisor", firmware_kobj);
+	if (!ultravisor_kobj)
+		return -ENOMEM;
+
+	return sysfs_create_bin_file(ultravisor_kobj, &uv_msglog_attr);
+}
+machine_subsys_initcall(powernv, uv_init);
diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c
new file mode 100644
index 0000000000..3ce89a4b54
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-debug.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2016-17 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <asm/vas.h>
+#include "vas.h"
+
+static struct dentry *vas_debugfs;
+
+static char *cop_to_str(int cop)
+{
+	switch (cop) {
+	case VAS_COP_TYPE_FAULT:	return "Fault";
+	case VAS_COP_TYPE_842:		return "NX-842 Normal Priority";
+	case VAS_COP_TYPE_842_HIPRI:	return "NX-842 High Priority";
+	case VAS_COP_TYPE_GZIP:		return "NX-GZIP Normal Priority";
+	case VAS_COP_TYPE_GZIP_HIPRI:	return "NX-GZIP High Priority";
+	case VAS_COP_TYPE_FTW:		return "Fast Thread-wakeup";
+	default:			return "Unknown";
+	}
+}
+
+static int info_show(struct seq_file *s, void *private)
+{
+	struct pnv_vas_window *window = s->private;
+
+	mutex_lock(&vas_mutex);
+
+	/* ensure window is not unmapped */
+	if (!window->hvwc_map)
+		goto unlock;
+
+	seq_printf(s, "Type: %s, %s\n", cop_to_str(window->vas_win.cop),
+					window->tx_win ? "Send" : "Receive");
+	seq_printf(s, "Pid : %d\n", vas_window_pid(&window->vas_win));
+
+unlock:
+	mutex_unlock(&vas_mutex);
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(info);
+
+static inline void print_reg(struct seq_file *s, struct pnv_vas_window *win,
+			char *name, u32 reg)
+{
+	seq_printf(s, "0x%016llx %s\n", read_hvwc_reg(win, name, reg), name);
+}
+
+static int hvwc_show(struct seq_file *s, void *private)
+{
+	struct pnv_vas_window *window = s->private;
+
+	mutex_lock(&vas_mutex);
+
+	/* ensure window is not unmapped */
+	if (!window->hvwc_map)
+		goto unlock;
+
+	print_reg(s, window, VREG(LPID));
+	print_reg(s, window, VREG(PID));
+	print_reg(s, window, VREG(XLATE_MSR));
+	print_reg(s, window, VREG(XLATE_LPCR));
+	print_reg(s, window, VREG(XLATE_CTL));
+	print_reg(s, window, VREG(AMR));
+	print_reg(s, window, VREG(SEIDR));
+	print_reg(s, window, VREG(FAULT_TX_WIN));
+	print_reg(s, window, VREG(OSU_INTR_SRC_RA));
+	print_reg(s, window, VREG(HV_INTR_SRC_RA));
+	print_reg(s, window, VREG(PSWID));
+	print_reg(s, window, VREG(LFIFO_BAR));
+	print_reg(s, window, VREG(LDATA_STAMP_CTL));
+	print_reg(s, window, VREG(LDMA_CACHE_CTL));
+	print_reg(s, window, VREG(LRFIFO_PUSH));
+	print_reg(s, window, VREG(CURR_MSG_COUNT));
+	print_reg(s, window, VREG(LNOTIFY_AFTER_COUNT));
+	print_reg(s, window, VREG(LRX_WCRED));
+	print_reg(s, window, VREG(LRX_WCRED_ADDER));
+	print_reg(s, window, VREG(TX_WCRED));
+	print_reg(s, window, VREG(TX_WCRED_ADDER));
+	print_reg(s, window, VREG(LFIFO_SIZE));
+	print_reg(s, window, VREG(WINCTL));
+	print_reg(s, window, VREG(WIN_STATUS));
+	print_reg(s, window, VREG(WIN_CTX_CACHING_CTL));
+	print_reg(s, window, VREG(TX_RSVD_BUF_COUNT));
+	print_reg(s, window, VREG(LRFIFO_WIN_PTR));
+	print_reg(s, window, VREG(LNOTIFY_CTL));
+	print_reg(s, window, VREG(LNOTIFY_PID));
+	print_reg(s, window, VREG(LNOTIFY_LPID));
+	print_reg(s, window, VREG(LNOTIFY_TID));
+	print_reg(s, window, VREG(LNOTIFY_SCOPE));
+	print_reg(s, window, VREG(NX_UTIL_ADDER));
+unlock:
+	mutex_unlock(&vas_mutex);
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(hvwc);
+
+void vas_window_free_dbgdir(struct pnv_vas_window *pnv_win)
+{
+	struct vas_window *window =  &pnv_win->vas_win;
+
+	if (window->dbgdir) {
+		debugfs_remove_recursive(window->dbgdir);
+		kfree(window->dbgname);
+		window->dbgdir = NULL;
+		window->dbgname = NULL;
+	}
+}
+
+void vas_window_init_dbgdir(struct pnv_vas_window *window)
+{
+	struct dentry *d;
+
+	if (!window->vinst->dbgdir)
+		return;
+
+	window->vas_win.dbgname = kzalloc(16, GFP_KERNEL);
+	if (!window->vas_win.dbgname)
+		return;
+
+	snprintf(window->vas_win.dbgname, 16, "w%d", window->vas_win.winid);
+
+	d = debugfs_create_dir(window->vas_win.dbgname, window->vinst->dbgdir);
+	window->vas_win.dbgdir = d;
+
+	debugfs_create_file("info", 0444, d, window, &info_fops);
+	debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops);
+}
+
+void vas_instance_init_dbgdir(struct vas_instance *vinst)
+{
+	struct dentry *d;
+
+	vas_init_dbgdir();
+
+	vinst->dbgname = kzalloc(16, GFP_KERNEL);
+	if (!vinst->dbgname)
+		return;
+
+	snprintf(vinst->dbgname, 16, "v%d", vinst->vas_id);
+
+	d = debugfs_create_dir(vinst->dbgname, vas_debugfs);
+	vinst->dbgdir = d;
+}
+
+/*
+ * Set up the "root" VAS debugfs dir. Return if we already set it up
+ * (or failed to) in an earlier instance of VAS.
+ */
+void vas_init_dbgdir(void)
+{
+	static bool first_time = true;
+
+	if (!first_time)
+		return;
+
+	first_time = false;
+	vas_debugfs = debugfs_create_dir("vas", NULL);
+}
diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c
new file mode 100644
index 0000000000..2b47d5a863
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-fault.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * VAS Fault handling.
+ * Copyright 2019, IBM Corporation
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/kthread.h>
+#include <linux/sched/signal.h>
+#include <linux/mmu_context.h>
+#include <asm/icswx.h>
+
+#include "vas.h"
+
+/*
+ * The maximum FIFO size for fault window can be 8MB
+ * (VAS_RX_FIFO_SIZE_MAX). Using 4MB FIFO since each VAS
+ * instance will be having fault window.
+ * 8MB FIFO can be used if expects more faults for each VAS
+ * instance.
+ */
+#define VAS_FAULT_WIN_FIFO_SIZE	(4 << 20)
+
+static void dump_fifo(struct vas_instance *vinst, void *entry)
+{
+	unsigned long *end = vinst->fault_fifo + vinst->fault_fifo_size;
+	unsigned long *fifo = entry;
+	int i;
+
+	pr_err("Fault fifo size %d, Max crbs %d\n", vinst->fault_fifo_size,
+			vinst->fault_fifo_size / CRB_SIZE);
+
+	/* Dump 10 CRB entries or until end of FIFO */
+	pr_err("Fault FIFO Dump:\n");
+	for (i = 0; i < 10*(CRB_SIZE/8) && fifo < end; i += 4, fifo += 4) {
+		pr_err("[%.3d, %p]: 0x%.16lx 0x%.16lx 0x%.16lx 0x%.16lx\n",
+			i, fifo, *fifo, *(fifo+1), *(fifo+2), *(fifo+3));
+	}
+}
+
+/*
+ * Process valid CRBs in fault FIFO.
+ * NX process user space requests, return credit and update the status
+ * in CRB. If it encounters transalation error when accessing CRB or
+ * request buffers, raises interrupt on the CPU to handle the fault.
+ * It takes credit on fault window, updates nx_fault_stamp in CRB with
+ * the following information and pastes CRB in fault FIFO.
+ *
+ * pswid - window ID of the window on which the request is sent.
+ * fault_storage_addr - fault address
+ *
+ * It can raise a single interrupt for multiple faults. Expects OS to
+ * process all valid faults and return credit for each fault on user
+ * space and fault windows. This fault FIFO control will be done with
+ * credit mechanism. NX can continuously paste CRBs until credits are not
+ * available on fault window. Otherwise, returns with RMA_reject.
+ *
+ * Total credits available on fault window: FIFO_SIZE(4MB)/CRBS_SIZE(128)
+ *
+ */
+irqreturn_t vas_fault_thread_fn(int irq, void *data)
+{
+	struct vas_instance *vinst = data;
+	struct coprocessor_request_block *crb, *entry;
+	struct coprocessor_request_block buf;
+	struct pnv_vas_window *window;
+	unsigned long flags;
+	void *fifo;
+
+	crb = &buf;
+
+	/*
+	 * VAS can interrupt with multiple page faults. So process all
+	 * valid CRBs within fault FIFO until reaches invalid CRB.
+	 * We use CCW[0] and pswid to validate CRBs:
+	 *
+	 * CCW[0]	Reserved bit. When NX pastes CRB, CCW[0]=0
+	 *		OS sets this bit to 1 after reading CRB.
+	 * pswid	NX assigns window ID. Set pswid to -1 after
+	 *		reading CRB from fault FIFO.
+	 *
+	 * We exit this function if no valid CRBs are available to process.
+	 * So acquire fault_lock and reset fifo_in_progress to 0 before
+	 * exit.
+	 * In case kernel receives another interrupt with different page
+	 * fault, interrupt handler returns with IRQ_HANDLED if
+	 * fifo_in_progress is set. Means these new faults will be
+	 * handled by the current thread. Otherwise set fifo_in_progress
+	 * and return IRQ_WAKE_THREAD to wake up thread.
+	 */
+	while (true) {
+		spin_lock_irqsave(&vinst->fault_lock, flags);
+		/*
+		 * Advance the fault fifo pointer to next CRB.
+		 * Use CRB_SIZE rather than sizeof(*crb) since the latter is
+		 * aligned to CRB_ALIGN (256) but the CRB written to by VAS is
+		 * only CRB_SIZE in len.
+		 */
+		fifo = vinst->fault_fifo + (vinst->fault_crbs * CRB_SIZE);
+		entry = fifo;
+
+		if ((entry->stamp.nx.pswid == cpu_to_be32(FIFO_INVALID_ENTRY))
+			|| (entry->ccw & cpu_to_be32(CCW0_INVALID))) {
+			vinst->fifo_in_progress = 0;
+			spin_unlock_irqrestore(&vinst->fault_lock, flags);
+			return IRQ_HANDLED;
+		}
+
+		spin_unlock_irqrestore(&vinst->fault_lock, flags);
+		vinst->fault_crbs++;
+		if (vinst->fault_crbs == (vinst->fault_fifo_size / CRB_SIZE))
+			vinst->fault_crbs = 0;
+
+		memcpy(crb, fifo, CRB_SIZE);
+		entry->stamp.nx.pswid = cpu_to_be32(FIFO_INVALID_ENTRY);
+		entry->ccw |= cpu_to_be32(CCW0_INVALID);
+		/*
+		 * Return credit for the fault window.
+		 */
+		vas_return_credit(vinst->fault_win, false);
+
+		pr_devel("VAS[%d] fault_fifo %p, fifo %p, fault_crbs %d\n",
+				vinst->vas_id, vinst->fault_fifo, fifo,
+				vinst->fault_crbs);
+
+		vas_dump_crb(crb);
+		window = vas_pswid_to_window(vinst,
+				be32_to_cpu(crb->stamp.nx.pswid));
+
+		if (IS_ERR(window)) {
+			/*
+			 * We got an interrupt about a specific send
+			 * window but we can't find that window and we can't
+			 * even clean it up (return credit on user space
+			 * window).
+			 * But we should not get here.
+			 * TODO: Disable IRQ.
+			 */
+			dump_fifo(vinst, (void *)entry);
+			pr_err("VAS[%d] fault_fifo %p, fifo %p, pswid 0x%x, fault_crbs %d bad CRB?\n",
+				vinst->vas_id, vinst->fault_fifo, fifo,
+				be32_to_cpu(crb->stamp.nx.pswid),
+				vinst->fault_crbs);
+
+			WARN_ON_ONCE(1);
+		} else {
+			/*
+			 * NX sees faults only with user space windows.
+			 */
+			if (window->user_win)
+				vas_update_csb(crb, &window->vas_win.task_ref);
+			else
+				WARN_ON_ONCE(!window->user_win);
+
+			/*
+			 * Return credit for send window after processing
+			 * fault CRB.
+			 */
+			vas_return_credit(window, true);
+		}
+	}
+}
+
+irqreturn_t vas_fault_handler(int irq, void *dev_id)
+{
+	struct vas_instance *vinst = dev_id;
+	irqreturn_t ret = IRQ_WAKE_THREAD;
+	unsigned long flags;
+
+	/*
+	 * NX can generate an interrupt for multiple faults. So the
+	 * fault handler thread process all CRBs until finds invalid
+	 * entry. In case if NX sees continuous faults, it is possible
+	 * that the thread function entered with the first interrupt
+	 * can execute and process all valid CRBs.
+	 * So wake up thread only if the fault thread is not in progress.
+	 */
+	spin_lock_irqsave(&vinst->fault_lock, flags);
+
+	if (vinst->fifo_in_progress)
+		ret = IRQ_HANDLED;
+	else
+		vinst->fifo_in_progress = 1;
+
+	spin_unlock_irqrestore(&vinst->fault_lock, flags);
+
+	return ret;
+}
+
+/*
+ * Fault window is opened per VAS instance. NX pastes fault CRB in fault
+ * FIFO upon page faults.
+ */
+int vas_setup_fault_window(struct vas_instance *vinst)
+{
+	struct vas_rx_win_attr attr;
+	struct vas_window *win;
+
+	vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE;
+	vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL);
+	if (!vinst->fault_fifo) {
+		pr_err("Unable to alloc %d bytes for fault_fifo\n",
+				vinst->fault_fifo_size);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Invalidate all CRB entries. NX pastes valid entry for each fault.
+	 */
+	memset(vinst->fault_fifo, FIFO_INVALID_ENTRY, vinst->fault_fifo_size);
+	vas_init_rx_win_attr(&attr, VAS_COP_TYPE_FAULT);
+
+	attr.rx_fifo_size = vinst->fault_fifo_size;
+	attr.rx_fifo = __pa(vinst->fault_fifo);
+
+	/*
+	 * Max creds is based on number of CRBs can fit in the FIFO.
+	 * (fault_fifo_size/CRB_SIZE). If 8MB FIFO is used, max creds
+	 * will be 0xffff since the receive creds field is 16bits wide.
+	 */
+	attr.wcreds_max = vinst->fault_fifo_size / CRB_SIZE;
+	attr.lnotify_lpid = 0;
+	attr.lnotify_pid = mfspr(SPRN_PID);
+	attr.lnotify_tid = mfspr(SPRN_PID);
+
+	win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, &attr);
+	if (IS_ERR(win)) {
+		pr_err("VAS: Error %ld opening FaultWin\n", PTR_ERR(win));
+		kfree(vinst->fault_fifo);
+		return PTR_ERR(win);
+	}
+
+	vinst->fault_win = container_of(win, struct pnv_vas_window, vas_win);
+
+	pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n",
+			vinst->fault_win->vas_win.winid, attr.lnotify_lpid,
+			attr.lnotify_pid, attr.lnotify_tid);
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/powernv/vas-trace.h b/arch/powerpc/platforms/powernv/vas-trace.h
new file mode 100644
index 0000000000..ca2e08f2dd
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-trace.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM	vas
+
+#if !defined(_VAS_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _VAS_TRACE_H
+#include <linux/tracepoint.h>
+#include <linux/sched.h>
+#include <asm/vas.h>
+
+TRACE_EVENT(	vas_rx_win_open,
+
+		TP_PROTO(struct task_struct *tsk,
+			 int vasid,
+			 int cop,
+			 struct vas_rx_win_attr *rxattr),
+
+		TP_ARGS(tsk, vasid, cop, rxattr),
+
+		TP_STRUCT__entry(
+			__field(struct task_struct *, tsk)
+			__field(int, pid)
+			__field(int, cop)
+			__field(int, vasid)
+			__field(struct vas_rx_win_attr *, rxattr)
+			__field(int, lnotify_lpid)
+			__field(int, lnotify_pid)
+			__field(int, lnotify_tid)
+		),
+
+		TP_fast_assign(
+			__entry->pid = tsk->pid;
+			__entry->vasid = vasid;
+			__entry->cop = cop;
+			__entry->lnotify_lpid = rxattr->lnotify_lpid;
+			__entry->lnotify_pid = rxattr->lnotify_pid;
+			__entry->lnotify_tid = rxattr->lnotify_tid;
+		),
+
+		TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pid=%d, tid=%d",
+			__entry->pid, __entry->vasid, __entry->cop,
+			__entry->lnotify_lpid, __entry->lnotify_pid,
+			__entry->lnotify_tid)
+);
+
+TRACE_EVENT(	vas_tx_win_open,
+
+		TP_PROTO(struct task_struct *tsk,
+			 int vasid,
+			 int cop,
+			 struct vas_tx_win_attr *txattr),
+
+		TP_ARGS(tsk, vasid, cop, txattr),
+
+		TP_STRUCT__entry(
+			__field(struct task_struct *, tsk)
+			__field(int, pid)
+			__field(int, cop)
+			__field(int, vasid)
+			__field(struct vas_tx_win_attr *, txattr)
+			__field(int, lpid)
+			__field(int, pidr)
+		),
+
+		TP_fast_assign(
+			__entry->pid = tsk->pid;
+			__entry->vasid = vasid;
+			__entry->cop = cop;
+			__entry->lpid = txattr->lpid;
+			__entry->pidr = txattr->pidr;
+		),
+
+		TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pidr=%d",
+			__entry->pid, __entry->vasid, __entry->cop,
+			__entry->lpid, __entry->pidr)
+);
+
+TRACE_EVENT(	vas_paste_crb,
+
+		TP_PROTO(struct task_struct *tsk,
+			struct pnv_vas_window *win),
+
+		TP_ARGS(tsk, win),
+
+		TP_STRUCT__entry(
+			__field(struct task_struct *, tsk)
+			__field(struct vas_window *, win)
+			__field(int, pid)
+			__field(int, vasid)
+			__field(int, winid)
+			__field(unsigned long, paste_kaddr)
+		),
+
+		TP_fast_assign(
+			__entry->pid = tsk->pid;
+			__entry->vasid = win->vinst->vas_id;
+			__entry->winid = win->vas_win.winid;
+			__entry->paste_kaddr = (unsigned long)win->paste_kaddr
+		),
+
+		TP_printk("pid=%d, vasid=%d, winid=%d, paste_kaddr=0x%016lx\n",
+			__entry->pid, __entry->vasid, __entry->winid,
+			__entry->paste_kaddr)
+);
+
+#endif /* _VAS_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../arch/powerpc/platforms/powernv
+#define TRACE_INCLUDE_FILE vas-trace
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
new file mode 100644
index 0000000000..b664838008
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -0,0 +1,1471 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2016-17 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/log2.h>
+#include <linux/rcupdate.h>
+#include <linux/cred.h>
+#include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
+#include <asm/switch_to.h>
+#include <asm/ppc-opcode.h>
+#include <asm/vas.h>
+#include "vas.h"
+#include "copy-paste.h"
+
+#define CREATE_TRACE_POINTS
+#include "vas-trace.h"
+
+/*
+ * Compute the paste address region for the window @window using the
+ * ->paste_base_addr and ->paste_win_id_shift we got from device tree.
+ */
+void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr, int *len)
+{
+	int winid;
+	u64 base, shift;
+
+	base = window->vinst->paste_base_addr;
+	shift = window->vinst->paste_win_id_shift;
+	winid = window->vas_win.winid;
+
+	*addr  = base + (winid << shift);
+	if (len)
+		*len = PAGE_SIZE;
+
+	pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
+}
+
+static inline void get_hvwc_mmio_bar(struct pnv_vas_window *window,
+			u64 *start, int *len)
+{
+	u64 pbaddr;
+
+	pbaddr = window->vinst->hvwc_bar_start;
+	*start = pbaddr + window->vas_win.winid * VAS_HVWC_SIZE;
+	*len = VAS_HVWC_SIZE;
+}
+
+static inline void get_uwc_mmio_bar(struct pnv_vas_window *window,
+			u64 *start, int *len)
+{
+	u64 pbaddr;
+
+	pbaddr = window->vinst->uwc_bar_start;
+	*start = pbaddr + window->vas_win.winid * VAS_UWC_SIZE;
+	*len = VAS_UWC_SIZE;
+}
+
+/*
+ * Map the paste bus address of the given send window into kernel address
+ * space. Unlike MMIO regions (map_mmio_region() below), paste region must
+ * be mapped cache-able and is only applicable to send windows.
+ */
+static void *map_paste_region(struct pnv_vas_window *txwin)
+{
+	int len;
+	void *map;
+	char *name;
+	u64 start;
+
+	name = kasprintf(GFP_KERNEL, "window-v%d-w%d", txwin->vinst->vas_id,
+				txwin->vas_win.winid);
+	if (!name)
+		goto free_name;
+
+	txwin->paste_addr_name = name;
+	vas_win_paste_addr(txwin, &start, &len);
+
+	if (!request_mem_region(start, len, name)) {
+		pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n",
+				__func__, start, len);
+		goto free_name;
+	}
+
+	map = ioremap_cache(start, len);
+	if (!map) {
+		pr_devel("%s(): ioremap_cache(0x%llx, %d) failed\n", __func__,
+				start, len);
+		goto free_name;
+	}
+
+	pr_devel("Mapped paste addr 0x%llx to kaddr 0x%p\n", start, map);
+	return map;
+
+free_name:
+	kfree(name);
+	return ERR_PTR(-ENOMEM);
+}
+
+static void *map_mmio_region(char *name, u64 start, int len)
+{
+	void *map;
+
+	if (!request_mem_region(start, len, name)) {
+		pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n",
+				__func__, start, len);
+		return NULL;
+	}
+
+	map = ioremap(start, len);
+	if (!map) {
+		pr_devel("%s(): ioremap(0x%llx, %d) failed\n", __func__, start,
+				len);
+		return NULL;
+	}
+
+	return map;
+}
+
+static void unmap_region(void *addr, u64 start, int len)
+{
+	iounmap(addr);
+	release_mem_region((phys_addr_t)start, len);
+}
+
+/*
+ * Unmap the paste address region for a window.
+ */
+static void unmap_paste_region(struct pnv_vas_window *window)
+{
+	int len;
+	u64 busaddr_start;
+
+	if (window->paste_kaddr) {
+		vas_win_paste_addr(window, &busaddr_start, &len);
+		unmap_region(window->paste_kaddr, busaddr_start, len);
+		window->paste_kaddr = NULL;
+		kfree(window->paste_addr_name);
+		window->paste_addr_name = NULL;
+	}
+}
+
+/*
+ * Unmap the MMIO regions for a window. Hold the vas_mutex so we don't
+ * unmap when the window's debugfs dir is in use. This serializes close
+ * of a window even on another VAS instance but since its not a critical
+ * path, just minimize the time we hold the mutex for now. We can add
+ * a per-instance mutex later if necessary.
+ */
+static void unmap_winctx_mmio_bars(struct pnv_vas_window *window)
+{
+	int len;
+	void *uwc_map;
+	void *hvwc_map;
+	u64 busaddr_start;
+
+	mutex_lock(&vas_mutex);
+
+	hvwc_map = window->hvwc_map;
+	window->hvwc_map = NULL;
+
+	uwc_map = window->uwc_map;
+	window->uwc_map = NULL;
+
+	mutex_unlock(&vas_mutex);
+
+	if (hvwc_map) {
+		get_hvwc_mmio_bar(window, &busaddr_start, &len);
+		unmap_region(hvwc_map, busaddr_start, len);
+	}
+
+	if (uwc_map) {
+		get_uwc_mmio_bar(window, &busaddr_start, &len);
+		unmap_region(uwc_map, busaddr_start, len);
+	}
+}
+
+/*
+ * Find the Hypervisor Window Context (HVWC) MMIO Base Address Region and the
+ * OS/User Window Context (UWC) MMIO Base Address Region for the given window.
+ * Map these bus addresses and save the mapped kernel addresses in @window.
+ */
+static int map_winctx_mmio_bars(struct pnv_vas_window *window)
+{
+	int len;
+	u64 start;
+
+	get_hvwc_mmio_bar(window, &start, &len);
+	window->hvwc_map = map_mmio_region("HVWCM_Window", start, len);
+
+	get_uwc_mmio_bar(window, &start, &len);
+	window->uwc_map = map_mmio_region("UWCM_Window", start, len);
+
+	if (!window->hvwc_map || !window->uwc_map) {
+		unmap_winctx_mmio_bars(window);
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Reset all valid registers in the HV and OS/User Window Contexts for
+ * the window identified by @window.
+ *
+ * NOTE: We cannot really use a for loop to reset window context. Not all
+ *	 offsets in a window context are valid registers and the valid
+ *	 registers are not sequential. And, we can only write to offsets
+ *	 with valid registers.
+ */
+static void reset_window_regs(struct pnv_vas_window *window)
+{
+	write_hvwc_reg(window, VREG(LPID), 0ULL);
+	write_hvwc_reg(window, VREG(PID), 0ULL);
+	write_hvwc_reg(window, VREG(XLATE_MSR), 0ULL);
+	write_hvwc_reg(window, VREG(XLATE_LPCR), 0ULL);
+	write_hvwc_reg(window, VREG(XLATE_CTL), 0ULL);
+	write_hvwc_reg(window, VREG(AMR), 0ULL);
+	write_hvwc_reg(window, VREG(SEIDR), 0ULL);
+	write_hvwc_reg(window, VREG(FAULT_TX_WIN), 0ULL);
+	write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL);
+	write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), 0ULL);
+	write_hvwc_reg(window, VREG(PSWID), 0ULL);
+	write_hvwc_reg(window, VREG(LFIFO_BAR), 0ULL);
+	write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), 0ULL);
+	write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), 0ULL);
+	write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL);
+	write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL);
+	write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL);
+	write_hvwc_reg(window, VREG(LRX_WCRED), 0ULL);
+	write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+	write_hvwc_reg(window, VREG(TX_WCRED), 0ULL);
+	write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+	write_hvwc_reg(window, VREG(LFIFO_SIZE), 0ULL);
+	write_hvwc_reg(window, VREG(WINCTL), 0ULL);
+	write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL);
+	write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), 0ULL);
+	write_hvwc_reg(window, VREG(TX_RSVD_BUF_COUNT), 0ULL);
+	write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), 0ULL);
+	write_hvwc_reg(window, VREG(LNOTIFY_CTL), 0ULL);
+	write_hvwc_reg(window, VREG(LNOTIFY_PID), 0ULL);
+	write_hvwc_reg(window, VREG(LNOTIFY_LPID), 0ULL);
+	write_hvwc_reg(window, VREG(LNOTIFY_TID), 0ULL);
+	write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), 0ULL);
+	write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL);
+
+	/* Skip read-only registers: NX_UTIL and NX_UTIL_SE */
+
+	/*
+	 * The send and receive window credit adder registers are also
+	 * accessible from HVWC and have been initialized above. We don't
+	 * need to initialize from the OS/User Window Context, so skip
+	 * following calls:
+	 *
+	 *	write_uwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+	 *	write_uwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+	 */
+}
+
+/*
+ * Initialize window context registers related to Address Translation.
+ * These registers are common to send/receive windows although they
+ * differ for user/kernel windows. As we resolve the TODOs we may
+ * want to add fields to vas_winctx and move the initialization to
+ * init_vas_winctx_regs().
+ */
+static void init_xlate_regs(struct pnv_vas_window *window, bool user_win)
+{
+	u64 lpcr, val;
+
+	/*
+	 * MSR_TA, MSR_US are false for both kernel and user.
+	 * MSR_DR and MSR_PR are false for kernel.
+	 */
+	val = 0ULL;
+	val = SET_FIELD(VAS_XLATE_MSR_HV, val, 1);
+	val = SET_FIELD(VAS_XLATE_MSR_SF, val, 1);
+	if (user_win) {
+		val = SET_FIELD(VAS_XLATE_MSR_DR, val, 1);
+		val = SET_FIELD(VAS_XLATE_MSR_PR, val, 1);
+	}
+	write_hvwc_reg(window, VREG(XLATE_MSR), val);
+
+	lpcr = mfspr(SPRN_LPCR);
+	val = 0ULL;
+	/*
+	 * NOTE: From Section 5.7.8.1 Segment Lookaside Buffer of the
+	 *	 Power ISA, v3.0B, Page size encoding is 0 = 4KB, 5 = 64KB.
+	 *
+	 * NOTE: From Section 1.3.1, Address Translation Context of the
+	 *	 Nest MMU Workbook, LPCR_SC should be 0 for Power9.
+	 */
+	val = SET_FIELD(VAS_XLATE_LPCR_PAGE_SIZE, val, 5);
+	val = SET_FIELD(VAS_XLATE_LPCR_ISL, val, lpcr & LPCR_ISL);
+	val = SET_FIELD(VAS_XLATE_LPCR_TC, val, lpcr & LPCR_TC);
+	val = SET_FIELD(VAS_XLATE_LPCR_SC, val, 0);
+	write_hvwc_reg(window, VREG(XLATE_LPCR), val);
+
+	/*
+	 * Section 1.3.1 (Address translation Context) of NMMU workbook.
+	 *	0b00	Hashed Page Table mode
+	 *	0b01	Reserved
+	 *	0b10	Radix on HPT
+	 *	0b11	Radix on Radix
+	 */
+	val = 0ULL;
+	val = SET_FIELD(VAS_XLATE_MODE, val, radix_enabled() ? 3 : 2);
+	write_hvwc_reg(window, VREG(XLATE_CTL), val);
+
+	/*
+	 * TODO: Can we mfspr(AMR) even for user windows?
+	 */
+	val = 0ULL;
+	val = SET_FIELD(VAS_AMR, val, mfspr(SPRN_AMR));
+	write_hvwc_reg(window, VREG(AMR), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_SEIDR, val, 0);
+	write_hvwc_reg(window, VREG(SEIDR), val);
+}
+
+/*
+ * Initialize Reserved Send Buffer Count for the send window. It involves
+ * writing to the register, reading it back to confirm that the hardware
+ * has enough buffers to reserve. See section 1.3.1.2.1 of VAS workbook.
+ *
+ * Since we can only make a best-effort attempt to fulfill the request,
+ * we don't return any errors if we cannot.
+ *
+ * TODO: Reserved (aka dedicated) send buffers are not supported yet.
+ */
+static void init_rsvd_tx_buf_count(struct pnv_vas_window *txwin,
+				struct vas_winctx *winctx)
+{
+	write_hvwc_reg(txwin, VREG(TX_RSVD_BUF_COUNT), 0ULL);
+}
+
+/*
+ * init_winctx_regs()
+ *	Initialize window context registers for a receive window.
+ *	Except for caching control and marking window open, the registers
+ *	are initialized in the order listed in Section 3.1.4 (Window Context
+ *	Cache Register Details) of the VAS workbook although they don't need
+ *	to be.
+ *
+ * Design note: For NX receive windows, NX allocates the FIFO buffer in OPAL
+ *	(so that it can get a large contiguous area) and passes that buffer
+ *	to kernel via device tree. We now write that buffer address to the
+ *	FIFO BAR. Would it make sense to do this all in OPAL? i.e have OPAL
+ *	write the per-chip RX FIFO addresses to the windows during boot-up
+ *	as a one-time task? That could work for NX but what about other
+ *	receivers?  Let the receivers tell us the rx-fifo buffers for now.
+ */
+static void init_winctx_regs(struct pnv_vas_window *window,
+			     struct vas_winctx *winctx)
+{
+	u64 val;
+	int fifo_size;
+
+	reset_window_regs(window);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_LPID, val, winctx->lpid);
+	write_hvwc_reg(window, VREG(LPID), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_PID_ID, val, winctx->pidr);
+	write_hvwc_reg(window, VREG(PID), val);
+
+	init_xlate_regs(window, winctx->user_win);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_FAULT_TX_WIN, val, winctx->fault_win_id);
+	write_hvwc_reg(window, VREG(FAULT_TX_WIN), val);
+
+	/* In PowerNV, interrupts go to HV. */
+	write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_HV_INTR_SRC_RA, val, winctx->irq_port);
+	write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_PSWID_EA_HANDLE, val, winctx->pswid);
+	write_hvwc_reg(window, VREG(PSWID), val);
+
+	write_hvwc_reg(window, VREG(SPARE1), 0ULL);
+	write_hvwc_reg(window, VREG(SPARE2), 0ULL);
+	write_hvwc_reg(window, VREG(SPARE3), 0ULL);
+
+	/*
+	 * NOTE: VAS expects the FIFO address to be copied into the LFIFO_BAR
+	 *	 register as is - do NOT shift the address into VAS_LFIFO_BAR
+	 *	 bit fields! Ok to set the page migration select fields -
+	 *	 VAS ignores the lower 10+ bits in the address anyway, because
+	 *	 the minimum FIFO size is 1K?
+	 *
+	 * See also: Design note in function header.
+	 */
+	val = winctx->rx_fifo;
+	val = SET_FIELD(VAS_PAGE_MIGRATION_SELECT, val, 0);
+	write_hvwc_reg(window, VREG(LFIFO_BAR), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_LDATA_STAMP, val, winctx->data_stamp);
+	write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_LDMA_TYPE, val, winctx->dma_type);
+	val = SET_FIELD(VAS_LDMA_FIFO_DISABLE, val, winctx->fifo_disable);
+	write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), val);
+
+	write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL);
+	write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL);
+	write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_LRX_WCRED, val, winctx->wcreds_max);
+	write_hvwc_reg(window, VREG(LRX_WCRED), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_TX_WCRED, val, winctx->wcreds_max);
+	write_hvwc_reg(window, VREG(TX_WCRED), val);
+
+	write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+	write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+
+	fifo_size = winctx->rx_fifo_size / 1024;
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_LFIFO_SIZE, val, ilog2(fifo_size));
+	write_hvwc_reg(window, VREG(LFIFO_SIZE), val);
+
+	/* Update window control and caching control registers last so
+	 * we mark the window open only after fully initializing it and
+	 * pushing context to cache.
+	 */
+
+	write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL);
+
+	init_rsvd_tx_buf_count(window, winctx);
+
+	/* for a send window, point to the matching receive window */
+	val = 0ULL;
+	val = SET_FIELD(VAS_LRX_WIN_ID, val, winctx->rx_win_id);
+	write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), val);
+
+	write_hvwc_reg(window, VREG(SPARE4), 0ULL);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_NOTIFY_DISABLE, val, winctx->notify_disable);
+	val = SET_FIELD(VAS_INTR_DISABLE, val, winctx->intr_disable);
+	val = SET_FIELD(VAS_NOTIFY_EARLY, val, winctx->notify_early);
+	val = SET_FIELD(VAS_NOTIFY_OSU_INTR, val, winctx->notify_os_intr_reg);
+	write_hvwc_reg(window, VREG(LNOTIFY_CTL), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_LNOTIFY_PID, val, winctx->lnotify_pid);
+	write_hvwc_reg(window, VREG(LNOTIFY_PID), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_LNOTIFY_LPID, val, winctx->lnotify_lpid);
+	write_hvwc_reg(window, VREG(LNOTIFY_LPID), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_LNOTIFY_TID, val, winctx->lnotify_tid);
+	write_hvwc_reg(window, VREG(LNOTIFY_TID), val);
+
+	val = 0ULL;
+	val = SET_FIELD(VAS_LNOTIFY_MIN_SCOPE, val, winctx->min_scope);
+	val = SET_FIELD(VAS_LNOTIFY_MAX_SCOPE, val, winctx->max_scope);
+	write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), val);
+
+	/* Skip read-only registers NX_UTIL and NX_UTIL_SE */
+
+	write_hvwc_reg(window, VREG(SPARE5), 0ULL);
+	write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL);
+	write_hvwc_reg(window, VREG(SPARE6), 0ULL);
+
+	/* Finally, push window context to memory and... */
+	val = 0ULL;
+	val = SET_FIELD(VAS_PUSH_TO_MEM, val, 1);
+	write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val);
+
+	/* ... mark the window open for business */
+	val = 0ULL;
+	val = SET_FIELD(VAS_WINCTL_REJ_NO_CREDIT, val, winctx->rej_no_credit);
+	val = SET_FIELD(VAS_WINCTL_PIN, val, winctx->pin_win);
+	val = SET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val, winctx->tx_wcred_mode);
+	val = SET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val, winctx->rx_wcred_mode);
+	val = SET_FIELD(VAS_WINCTL_TX_WORD_MODE, val, winctx->tx_word_mode);
+	val = SET_FIELD(VAS_WINCTL_RX_WORD_MODE, val, winctx->rx_word_mode);
+	val = SET_FIELD(VAS_WINCTL_FAULT_WIN, val, winctx->fault_win);
+	val = SET_FIELD(VAS_WINCTL_NX_WIN, val, winctx->nx_win);
+	val = SET_FIELD(VAS_WINCTL_OPEN, val, 1);
+	write_hvwc_reg(window, VREG(WINCTL), val);
+}
+
+static void vas_release_window_id(struct ida *ida, int winid)
+{
+	ida_free(ida, winid);
+}
+
+static int vas_assign_window_id(struct ida *ida)
+{
+	int winid = ida_alloc_max(ida, VAS_WINDOWS_PER_CHIP - 1, GFP_KERNEL);
+
+	if (winid == -ENOSPC) {
+		pr_err("Too many (%d) open windows\n", VAS_WINDOWS_PER_CHIP);
+		return -EAGAIN;
+	}
+
+	return winid;
+}
+
+static void vas_window_free(struct pnv_vas_window *window)
+{
+	struct vas_instance *vinst = window->vinst;
+	int winid = window->vas_win.winid;
+
+	unmap_winctx_mmio_bars(window);
+
+	vas_window_free_dbgdir(window);
+
+	kfree(window);
+
+	vas_release_window_id(&vinst->ida, winid);
+}
+
+static struct pnv_vas_window *vas_window_alloc(struct vas_instance *vinst)
+{
+	int winid;
+	struct pnv_vas_window *window;
+
+	winid = vas_assign_window_id(&vinst->ida);
+	if (winid < 0)
+		return ERR_PTR(winid);
+
+	window = kzalloc(sizeof(*window), GFP_KERNEL);
+	if (!window)
+		goto out_free;
+
+	window->vinst = vinst;
+	window->vas_win.winid = winid;
+
+	if (map_winctx_mmio_bars(window))
+		goto out_free;
+
+	vas_window_init_dbgdir(window);
+
+	return window;
+
+out_free:
+	kfree(window);
+	vas_release_window_id(&vinst->ida, winid);
+	return ERR_PTR(-ENOMEM);
+}
+
+static void put_rx_win(struct pnv_vas_window *rxwin)
+{
+	/* Better not be a send window! */
+	WARN_ON_ONCE(rxwin->tx_win);
+
+	atomic_dec(&rxwin->num_txwins);
+}
+
+/*
+ * Find the user space receive window given the @pswid.
+ *      - We must have a valid vasid and it must belong to this instance.
+ *        (so both send and receive windows are on the same VAS instance)
+ *      - The window must refer to an OPEN, FTW, RECEIVE window.
+ *
+ * NOTE: We access ->windows[] table and assume that vinst->mutex is held.
+ */
+static struct pnv_vas_window *get_user_rxwin(struct vas_instance *vinst,
+					     u32 pswid)
+{
+	int vasid, winid;
+	struct pnv_vas_window *rxwin;
+
+	decode_pswid(pswid, &vasid, &winid);
+
+	if (vinst->vas_id != vasid)
+		return ERR_PTR(-EINVAL);
+
+	rxwin = vinst->windows[winid];
+
+	if (!rxwin || rxwin->tx_win || rxwin->vas_win.cop != VAS_COP_TYPE_FTW)
+		return ERR_PTR(-EINVAL);
+
+	return rxwin;
+}
+
+/*
+ * Get the VAS receive window associated with NX engine identified
+ * by @cop and if applicable, @pswid.
+ *
+ * See also function header of set_vinst_win().
+ */
+static struct pnv_vas_window *get_vinst_rxwin(struct vas_instance *vinst,
+			enum vas_cop_type cop, u32 pswid)
+{
+	struct pnv_vas_window *rxwin;
+
+	mutex_lock(&vinst->mutex);
+
+	if (cop == VAS_COP_TYPE_FTW)
+		rxwin = get_user_rxwin(vinst, pswid);
+	else
+		rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL);
+
+	if (!IS_ERR(rxwin))
+		atomic_inc(&rxwin->num_txwins);
+
+	mutex_unlock(&vinst->mutex);
+
+	return rxwin;
+}
+
+/*
+ * We have two tables of windows in a VAS instance. The first one,
+ * ->windows[], contains all the windows in the instance and allows
+ * looking up a window by its id. It is used to look up send windows
+ * during fault handling and receive windows when pairing user space
+ * send/receive windows.
+ *
+ * The second table, ->rxwin[], contains receive windows that are
+ * associated with NX engines. This table has VAS_COP_TYPE_MAX
+ * entries and is used to look up a receive window by its
+ * coprocessor type.
+ *
+ * Here, we save @window in the ->windows[] table. If it is a receive
+ * window, we also save the window in the ->rxwin[] table.
+ */
+static void set_vinst_win(struct vas_instance *vinst,
+			struct pnv_vas_window *window)
+{
+	int id = window->vas_win.winid;
+
+	mutex_lock(&vinst->mutex);
+
+	/*
+	 * There should only be one receive window for a coprocessor type
+	 * unless its a user (FTW) window.
+	 */
+	if (!window->user_win && !window->tx_win) {
+		WARN_ON_ONCE(vinst->rxwin[window->vas_win.cop]);
+		vinst->rxwin[window->vas_win.cop] = window;
+	}
+
+	WARN_ON_ONCE(vinst->windows[id] != NULL);
+	vinst->windows[id] = window;
+
+	mutex_unlock(&vinst->mutex);
+}
+
+/*
+ * Clear this window from the table(s) of windows for this VAS instance.
+ * See also function header of set_vinst_win().
+ */
+static void clear_vinst_win(struct pnv_vas_window *window)
+{
+	int id = window->vas_win.winid;
+	struct vas_instance *vinst = window->vinst;
+
+	mutex_lock(&vinst->mutex);
+
+	if (!window->user_win && !window->tx_win) {
+		WARN_ON_ONCE(!vinst->rxwin[window->vas_win.cop]);
+		vinst->rxwin[window->vas_win.cop] = NULL;
+	}
+
+	WARN_ON_ONCE(vinst->windows[id] != window);
+	vinst->windows[id] = NULL;
+
+	mutex_unlock(&vinst->mutex);
+}
+
+static void init_winctx_for_rxwin(struct pnv_vas_window *rxwin,
+			struct vas_rx_win_attr *rxattr,
+			struct vas_winctx *winctx)
+{
+	/*
+	 * We first zero (memset()) all fields and only set non-zero fields.
+	 * Following fields are 0/false but maybe deserve a comment:
+	 *
+	 *	->notify_os_intr_reg	In powerNV, send intrs to HV
+	 *	->notify_disable	False for NX windows
+	 *	->intr_disable		False for Fault Windows
+	 *	->xtra_write		False for NX windows
+	 *	->notify_early		NA for NX windows
+	 *	->rsvd_txbuf_count	NA for Rx windows
+	 *	->lpid, ->pid, ->tid	NA for Rx windows
+	 */
+
+	memset(winctx, 0, sizeof(struct vas_winctx));
+
+	winctx->rx_fifo = rxattr->rx_fifo;
+	winctx->rx_fifo_size = rxattr->rx_fifo_size;
+	winctx->wcreds_max = rxwin->vas_win.wcreds_max;
+	winctx->pin_win = rxattr->pin_win;
+
+	winctx->nx_win = rxattr->nx_win;
+	winctx->fault_win = rxattr->fault_win;
+	winctx->user_win = rxattr->user_win;
+	winctx->rej_no_credit = rxattr->rej_no_credit;
+	winctx->rx_word_mode = rxattr->rx_win_ord_mode;
+	winctx->tx_word_mode = rxattr->tx_win_ord_mode;
+	winctx->rx_wcred_mode = rxattr->rx_wcred_mode;
+	winctx->tx_wcred_mode = rxattr->tx_wcred_mode;
+	winctx->notify_early = rxattr->notify_early;
+
+	if (winctx->nx_win) {
+		winctx->data_stamp = true;
+		winctx->intr_disable = true;
+		winctx->pin_win = true;
+
+		WARN_ON_ONCE(winctx->fault_win);
+		WARN_ON_ONCE(!winctx->rx_word_mode);
+		WARN_ON_ONCE(!winctx->tx_word_mode);
+		WARN_ON_ONCE(winctx->notify_after_count);
+	} else if (winctx->fault_win) {
+		winctx->notify_disable = true;
+	} else if (winctx->user_win) {
+		/*
+		 * Section 1.8.1 Low Latency Core-Core Wake up of
+		 * the VAS workbook:
+		 *
+		 *      - disable credit checks ([tr]x_wcred_mode = false)
+		 *      - disable FIFO writes
+		 *      - enable ASB_Notify, disable interrupt
+		 */
+		winctx->fifo_disable = true;
+		winctx->intr_disable = true;
+		winctx->rx_fifo = 0;
+	}
+
+	winctx->lnotify_lpid = rxattr->lnotify_lpid;
+	winctx->lnotify_pid = rxattr->lnotify_pid;
+	winctx->lnotify_tid = rxattr->lnotify_tid;
+	winctx->pswid = rxattr->pswid;
+	winctx->dma_type = VAS_DMA_TYPE_INJECT;
+	winctx->tc_mode = rxattr->tc_mode;
+
+	winctx->min_scope = VAS_SCOPE_LOCAL;
+	winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
+	if (rxwin->vinst->virq)
+		winctx->irq_port = rxwin->vinst->irq_port;
+}
+
+static bool rx_win_args_valid(enum vas_cop_type cop,
+			struct vas_rx_win_attr *attr)
+{
+	pr_debug("Rxattr: fault %d, notify %d, intr %d, early %d, fifo %d\n",
+			attr->fault_win, attr->notify_disable,
+			attr->intr_disable, attr->notify_early,
+			attr->rx_fifo_size);
+
+	if (cop >= VAS_COP_TYPE_MAX)
+		return false;
+
+	if (cop != VAS_COP_TYPE_FTW &&
+				attr->rx_fifo_size < VAS_RX_FIFO_SIZE_MIN)
+		return false;
+
+	if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX)
+		return false;
+
+	if (!attr->wcreds_max)
+		return false;
+
+	if (attr->nx_win) {
+		/* cannot be fault or user window if it is nx */
+		if (attr->fault_win || attr->user_win)
+			return false;
+		/*
+		 * Section 3.1.4.32: NX Windows must not disable notification,
+		 *	and must not enable interrupts or early notification.
+		 */
+		if (attr->notify_disable || !attr->intr_disable ||
+				attr->notify_early)
+			return false;
+	} else if (attr->fault_win) {
+		/* cannot be both fault and user window */
+		if (attr->user_win)
+			return false;
+
+		/*
+		 * Section 3.1.4.32: Fault windows must disable notification
+		 *	but not interrupts.
+		 */
+		if (!attr->notify_disable || attr->intr_disable)
+			return false;
+
+	} else if (attr->user_win) {
+		/*
+		 * User receive windows are only for fast-thread-wakeup
+		 * (FTW). They don't need a FIFO and must disable interrupts
+		 */
+		if (attr->rx_fifo || attr->rx_fifo_size || !attr->intr_disable)
+			return false;
+	} else {
+		/* Rx window must be one of NX or Fault or User window. */
+		return false;
+	}
+
+	return true;
+}
+
+void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop)
+{
+	memset(rxattr, 0, sizeof(*rxattr));
+
+	if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI ||
+		cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) {
+		rxattr->pin_win = true;
+		rxattr->nx_win = true;
+		rxattr->fault_win = false;
+		rxattr->intr_disable = true;
+		rxattr->rx_wcred_mode = true;
+		rxattr->tx_wcred_mode = true;
+		rxattr->rx_win_ord_mode = true;
+		rxattr->tx_win_ord_mode = true;
+	} else if (cop == VAS_COP_TYPE_FAULT) {
+		rxattr->pin_win = true;
+		rxattr->fault_win = true;
+		rxattr->notify_disable = true;
+		rxattr->rx_wcred_mode = true;
+		rxattr->rx_win_ord_mode = true;
+		rxattr->rej_no_credit = true;
+		rxattr->tc_mode = VAS_THRESH_DISABLED;
+	} else if (cop == VAS_COP_TYPE_FTW) {
+		rxattr->user_win = true;
+		rxattr->intr_disable = true;
+
+		/*
+		 * As noted in the VAS Workbook we disable credit checks.
+		 * If we enable credit checks in the future, we must also
+		 * implement a mechanism to return the user credits or new
+		 * paste operations will fail.
+		 */
+	}
+}
+EXPORT_SYMBOL_GPL(vas_init_rx_win_attr);
+
+struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
+			struct vas_rx_win_attr *rxattr)
+{
+	struct pnv_vas_window *rxwin;
+	struct vas_winctx winctx;
+	struct vas_instance *vinst;
+
+	trace_vas_rx_win_open(current, vasid, cop, rxattr);
+
+	if (!rx_win_args_valid(cop, rxattr))
+		return ERR_PTR(-EINVAL);
+
+	vinst = find_vas_instance(vasid);
+	if (!vinst) {
+		pr_devel("vasid %d not found!\n", vasid);
+		return ERR_PTR(-EINVAL);
+	}
+	pr_devel("Found instance %d\n", vasid);
+
+	rxwin = vas_window_alloc(vinst);
+	if (IS_ERR(rxwin)) {
+		pr_devel("Unable to allocate memory for Rx window\n");
+		return (struct vas_window *)rxwin;
+	}
+
+	rxwin->tx_win = false;
+	rxwin->nx_win = rxattr->nx_win;
+	rxwin->user_win = rxattr->user_win;
+	rxwin->vas_win.cop = cop;
+	rxwin->vas_win.wcreds_max = rxattr->wcreds_max;
+
+	init_winctx_for_rxwin(rxwin, rxattr, &winctx);
+	init_winctx_regs(rxwin, &winctx);
+
+	set_vinst_win(vinst, rxwin);
+
+	return &rxwin->vas_win;
+}
+EXPORT_SYMBOL_GPL(vas_rx_win_open);
+
+void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop)
+{
+	memset(txattr, 0, sizeof(*txattr));
+
+	if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI ||
+		cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) {
+		txattr->rej_no_credit = false;
+		txattr->rx_wcred_mode = true;
+		txattr->tx_wcred_mode = true;
+		txattr->rx_win_ord_mode = true;
+		txattr->tx_win_ord_mode = true;
+	} else if (cop == VAS_COP_TYPE_FTW) {
+		txattr->user_win = true;
+	}
+}
+EXPORT_SYMBOL_GPL(vas_init_tx_win_attr);
+
+static void init_winctx_for_txwin(struct pnv_vas_window *txwin,
+			struct vas_tx_win_attr *txattr,
+			struct vas_winctx *winctx)
+{
+	/*
+	 * We first zero all fields and only set non-zero ones. Following
+	 * are some fields set to 0/false for the stated reason:
+	 *
+	 *	->notify_os_intr_reg	In powernv, send intrs to HV
+	 *	->rsvd_txbuf_count	Not supported yet.
+	 *	->notify_disable	False for NX windows
+	 *	->xtra_write		False for NX windows
+	 *	->notify_early		NA for NX windows
+	 *	->lnotify_lpid		NA for Tx windows
+	 *	->lnotify_pid		NA for Tx windows
+	 *	->lnotify_tid		NA for Tx windows
+	 *	->tx_win_cred_mode	Ignore for now for NX windows
+	 *	->rx_win_cred_mode	Ignore for now for NX windows
+	 */
+	memset(winctx, 0, sizeof(struct vas_winctx));
+
+	winctx->wcreds_max = txwin->vas_win.wcreds_max;
+
+	winctx->user_win = txattr->user_win;
+	winctx->nx_win = txwin->rxwin->nx_win;
+	winctx->pin_win = txattr->pin_win;
+	winctx->rej_no_credit = txattr->rej_no_credit;
+	winctx->rsvd_txbuf_enable = txattr->rsvd_txbuf_enable;
+
+	winctx->rx_wcred_mode = txattr->rx_wcred_mode;
+	winctx->tx_wcred_mode = txattr->tx_wcred_mode;
+	winctx->rx_word_mode = txattr->rx_win_ord_mode;
+	winctx->tx_word_mode = txattr->tx_win_ord_mode;
+	winctx->rsvd_txbuf_count = txattr->rsvd_txbuf_count;
+
+	winctx->intr_disable = true;
+	if (winctx->nx_win)
+		winctx->data_stamp = true;
+
+	winctx->lpid = txattr->lpid;
+	winctx->pidr = txattr->pidr;
+	winctx->rx_win_id = txwin->rxwin->vas_win.winid;
+	/*
+	 * IRQ and fault window setup is successful. Set fault window
+	 * for the send window so that ready to handle faults.
+	 */
+	if (txwin->vinst->virq)
+		winctx->fault_win_id = txwin->vinst->fault_win->vas_win.winid;
+
+	winctx->dma_type = VAS_DMA_TYPE_INJECT;
+	winctx->tc_mode = txattr->tc_mode;
+	winctx->min_scope = VAS_SCOPE_LOCAL;
+	winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
+	if (txwin->vinst->virq)
+		winctx->irq_port = txwin->vinst->irq_port;
+
+	winctx->pswid = txattr->pswid ? txattr->pswid :
+			encode_pswid(txwin->vinst->vas_id,
+			txwin->vas_win.winid);
+}
+
+static bool tx_win_args_valid(enum vas_cop_type cop,
+			struct vas_tx_win_attr *attr)
+{
+	if (attr->tc_mode != VAS_THRESH_DISABLED)
+		return false;
+
+	if (cop > VAS_COP_TYPE_MAX)
+		return false;
+
+	if (attr->wcreds_max > VAS_TX_WCREDS_MAX)
+		return false;
+
+	if (attr->user_win) {
+		if (attr->rsvd_txbuf_count)
+			return false;
+
+		if (cop != VAS_COP_TYPE_FTW && cop != VAS_COP_TYPE_GZIP &&
+			cop != VAS_COP_TYPE_GZIP_HIPRI)
+			return false;
+	}
+
+	return true;
+}
+
+struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
+			struct vas_tx_win_attr *attr)
+{
+	int rc;
+	struct pnv_vas_window *txwin;
+	struct pnv_vas_window *rxwin;
+	struct vas_winctx winctx;
+	struct vas_instance *vinst;
+
+	trace_vas_tx_win_open(current, vasid, cop, attr);
+
+	if (!tx_win_args_valid(cop, attr))
+		return ERR_PTR(-EINVAL);
+
+	/*
+	 * If caller did not specify a vasid but specified the PSWID of a
+	 * receive window (applicable only to FTW windows), use the vasid
+	 * from that receive window.
+	 */
+	if (vasid == -1 && attr->pswid)
+		decode_pswid(attr->pswid, &vasid, NULL);
+
+	vinst = find_vas_instance(vasid);
+	if (!vinst) {
+		pr_devel("vasid %d not found!\n", vasid);
+		return ERR_PTR(-EINVAL);
+	}
+
+	rxwin = get_vinst_rxwin(vinst, cop, attr->pswid);
+	if (IS_ERR(rxwin)) {
+		pr_devel("No RxWin for vasid %d, cop %d\n", vasid, cop);
+		return (struct vas_window *)rxwin;
+	}
+
+	txwin = vas_window_alloc(vinst);
+	if (IS_ERR(txwin)) {
+		rc = PTR_ERR(txwin);
+		goto put_rxwin;
+	}
+
+	txwin->vas_win.cop = cop;
+	txwin->tx_win = 1;
+	txwin->rxwin = rxwin;
+	txwin->nx_win = txwin->rxwin->nx_win;
+	txwin->user_win = attr->user_win;
+	txwin->vas_win.wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+
+	init_winctx_for_txwin(txwin, attr, &winctx);
+
+	init_winctx_regs(txwin, &winctx);
+
+	/*
+	 * If its a kernel send window, map the window address into the
+	 * kernel's address space. For user windows, user must issue an
+	 * mmap() to map the window into their address space.
+	 *
+	 * NOTE: If kernel ever resubmits a user CRB after handling a page
+	 *	 fault, we will need to map this into kernel as well.
+	 */
+	if (!txwin->user_win) {
+		txwin->paste_kaddr = map_paste_region(txwin);
+		if (IS_ERR(txwin->paste_kaddr)) {
+			rc = PTR_ERR(txwin->paste_kaddr);
+			goto free_window;
+		}
+	} else {
+		/*
+		 * Interrupt hanlder or fault window setup failed. Means
+		 * NX can not generate fault for page fault. So not
+		 * opening for user space tx window.
+		 */
+		if (!vinst->virq) {
+			rc = -ENODEV;
+			goto free_window;
+		}
+		rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
+		if (rc)
+			goto free_window;
+
+		vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
+	}
+
+	set_vinst_win(vinst, txwin);
+
+	return &txwin->vas_win;
+
+free_window:
+	vas_window_free(txwin);
+
+put_rxwin:
+	put_rx_win(rxwin);
+	return ERR_PTR(rc);
+
+}
+EXPORT_SYMBOL_GPL(vas_tx_win_open);
+
+int vas_copy_crb(void *crb, int offset)
+{
+	return vas_copy(crb, offset);
+}
+EXPORT_SYMBOL_GPL(vas_copy_crb);
+
+#define RMA_LSMP_REPORT_ENABLE PPC_BIT(53)
+int vas_paste_crb(struct vas_window *vwin, int offset, bool re)
+{
+	struct pnv_vas_window *txwin;
+	int rc;
+	void *addr;
+	uint64_t val;
+
+	txwin = container_of(vwin, struct pnv_vas_window, vas_win);
+	trace_vas_paste_crb(current, txwin);
+
+	/*
+	 * Only NX windows are supported for now and hardware assumes
+	 * report-enable flag is set for NX windows. Ensure software
+	 * complies too.
+	 */
+	WARN_ON_ONCE(txwin->nx_win && !re);
+
+	addr = txwin->paste_kaddr;
+	if (re) {
+		/*
+		 * Set the REPORT_ENABLE bit (equivalent to writing
+		 * to 1K offset of the paste address)
+		 */
+		val = SET_FIELD(RMA_LSMP_REPORT_ENABLE, 0ULL, 1);
+		addr += val;
+	}
+
+	/*
+	 * Map the raw CR value from vas_paste() to an error code (there
+	 * is just pass or fail for now though).
+	 */
+	rc = vas_paste(addr, offset);
+	if (rc == 2)
+		rc = 0;
+	else
+		rc = -EINVAL;
+
+	pr_debug("Txwin #%d: Msg count %llu\n", txwin->vas_win.winid,
+			read_hvwc_reg(txwin, VREG(LRFIFO_PUSH)));
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(vas_paste_crb);
+
+/*
+ * If credit checking is enabled for this window, poll for the return
+ * of window credits (i.e for NX engines to process any outstanding CRBs).
+ * Since NX-842 waits for the CRBs to be processed before closing the
+ * window, we should not have to wait for too long.
+ *
+ * TODO: We retry in 10ms intervals now. We could/should probably peek at
+ *	the VAS_LRFIFO_PUSH_OFFSET register to get an estimate of pending
+ *	CRBs on the FIFO and compute the delay dynamically on each retry.
+ *	But that is not really needed until we support NX-GZIP access from
+ *	user space. (NX-842 driver waits for CSB and Fast thread-wakeup
+ *	doesn't use credit checking).
+ */
+static void poll_window_credits(struct pnv_vas_window *window)
+{
+	u64 val;
+	int creds, mode;
+	int count = 0;
+
+	val = read_hvwc_reg(window, VREG(WINCTL));
+	if (window->tx_win)
+		mode = GET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val);
+	else
+		mode = GET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val);
+
+	if (!mode)
+		return;
+retry:
+	if (window->tx_win) {
+		val = read_hvwc_reg(window, VREG(TX_WCRED));
+		creds = GET_FIELD(VAS_TX_WCRED, val);
+	} else {
+		val = read_hvwc_reg(window, VREG(LRX_WCRED));
+		creds = GET_FIELD(VAS_LRX_WCRED, val);
+	}
+
+	/*
+	 * Takes around few milliseconds to complete all pending requests
+	 * and return credits.
+	 * TODO: Scan fault FIFO and invalidate CRBs points to this window
+	 *       and issue CRB Kill to stop all pending requests. Need only
+	 *       if there is a bug in NX or fault handling in kernel.
+	 */
+	if (creds < window->vas_win.wcreds_max) {
+		val = 0;
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(msecs_to_jiffies(10));
+		count++;
+		/*
+		 * Process can not close send window until all credits are
+		 * returned.
+		 */
+		if (!(count % 1000))
+			pr_warn_ratelimited("VAS: pid %d stuck. Waiting for credits returned for Window(%d). creds %d, Retries %d\n",
+				vas_window_pid(&window->vas_win),
+				window->vas_win.winid,
+				creds, count);
+
+		goto retry;
+	}
+}
+
+/*
+ * Wait for the window to go to "not-busy" state. It should only take a
+ * short time to queue a CRB, so window should not be busy for too long.
+ * Trying 5ms intervals.
+ */
+static void poll_window_busy_state(struct pnv_vas_window *window)
+{
+	int busy;
+	u64 val;
+	int count = 0;
+
+retry:
+	val = read_hvwc_reg(window, VREG(WIN_STATUS));
+	busy = GET_FIELD(VAS_WIN_BUSY, val);
+	if (busy) {
+		val = 0;
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(msecs_to_jiffies(10));
+		count++;
+		/*
+		 * Takes around few milliseconds to process all pending
+		 * requests.
+		 */
+		if (!(count % 1000))
+			pr_warn_ratelimited("VAS: pid %d stuck. Window (ID=%d) is in busy state. Retries %d\n",
+				vas_window_pid(&window->vas_win),
+				window->vas_win.winid, count);
+
+		goto retry;
+	}
+}
+
+/*
+ * Have the hardware cast a window out of cache and wait for it to
+ * be completed.
+ *
+ * NOTE: It can take a relatively long time to cast the window context
+ *	out of the cache. It is not strictly necessary to cast out if:
+ *
+ *	- we clear the "Pin Window" bit (so hardware is free to evict)
+ *
+ *	- we re-initialize the window context when it is reassigned.
+ *
+ *	We do the former in vas_win_close() and latter in vas_win_open().
+ *	So, ignoring the cast-out for now. We can add it as needed. If
+ *	casting out becomes necessary we should consider offloading the
+ *	job to a worker thread, so the window close can proceed quickly.
+ */
+static void poll_window_castout(struct pnv_vas_window *window)
+{
+	/* stub for now */
+}
+
+/*
+ * Unpin and close a window so no new requests are accepted and the
+ * hardware can evict this window from cache if necessary.
+ */
+static void unpin_close_window(struct pnv_vas_window *window)
+{
+	u64 val;
+
+	val = read_hvwc_reg(window, VREG(WINCTL));
+	val = SET_FIELD(VAS_WINCTL_PIN, val, 0);
+	val = SET_FIELD(VAS_WINCTL_OPEN, val, 0);
+	write_hvwc_reg(window, VREG(WINCTL), val);
+}
+
+/*
+ * Close a window.
+ *
+ * See Section 1.12.1 of VAS workbook v1.05 for details on closing window:
+ *	- Disable new paste operations (unmap paste address)
+ *	- Poll for the "Window Busy" bit to be cleared
+ *	- Clear the Open/Enable bit for the Window.
+ *	- Poll for return of window Credits (implies FIFO empty for Rx win?)
+ *	- Unpin and cast window context out of cache
+ *
+ * Besides the hardware, kernel has some bookkeeping of course.
+ */
+int vas_win_close(struct vas_window *vwin)
+{
+	struct pnv_vas_window *window;
+
+	if (!vwin)
+		return 0;
+
+	window = container_of(vwin, struct pnv_vas_window, vas_win);
+
+	if (!window->tx_win && atomic_read(&window->num_txwins) != 0) {
+		pr_devel("Attempting to close an active Rx window!\n");
+		WARN_ON_ONCE(1);
+		return -EBUSY;
+	}
+
+	unmap_paste_region(window);
+
+	poll_window_busy_state(window);
+
+	unpin_close_window(window);
+
+	poll_window_credits(window);
+
+	clear_vinst_win(window);
+
+	poll_window_castout(window);
+
+	/* if send window, drop reference to matching receive window */
+	if (window->tx_win) {
+		if (window->user_win) {
+			mm_context_remove_vas_window(vwin->task_ref.mm);
+			put_vas_user_win_ref(&vwin->task_ref);
+		}
+		put_rx_win(window->rxwin);
+	}
+
+	vas_window_free(window);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vas_win_close);
+
+/*
+ * Return credit for the given window.
+ * Send windows and fault window uses credit mechanism as follows:
+ *
+ * Send windows:
+ * - The default number of credits available for each send window is
+ *   1024. It means 1024 requests can be issued asynchronously at the
+ *   same time. If the credit is not available, that request will be
+ *   returned with RMA_Busy.
+ * - One credit is taken when NX request is issued.
+ * - This credit is returned after NX processed that request.
+ * - If NX encounters translation error, kernel will return the
+ *   credit on the specific send window after processing the fault CRB.
+ *
+ * Fault window:
+ * - The total number credits available is FIFO_SIZE/CRB_SIZE.
+ *   Means 4MB/128 in the current implementation. If credit is not
+ *   available, RMA_Reject is returned.
+ * - A credit is taken when NX pastes CRB in fault FIFO.
+ * - The kernel with return credit on fault window after reading entry
+ *   from fault FIFO.
+ */
+void vas_return_credit(struct pnv_vas_window *window, bool tx)
+{
+	uint64_t val;
+
+	val = 0ULL;
+	if (tx) { /* send window */
+		val = SET_FIELD(VAS_TX_WCRED, val, 1);
+		write_hvwc_reg(window, VREG(TX_WCRED_ADDER), val);
+	} else {
+		val = SET_FIELD(VAS_LRX_WCRED, val, 1);
+		write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), val);
+	}
+}
+
+struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst,
+		uint32_t pswid)
+{
+	struct pnv_vas_window *window;
+	int winid;
+
+	if (!pswid) {
+		pr_devel("%s: called for pswid 0!\n", __func__);
+		return ERR_PTR(-ESRCH);
+	}
+
+	decode_pswid(pswid, NULL, &winid);
+
+	if (winid >= VAS_WINDOWS_PER_CHIP)
+		return ERR_PTR(-ESRCH);
+
+	/*
+	 * If application closes the window before the hardware
+	 * returns the fault CRB, we should wait in vas_win_close()
+	 * for the pending requests. so the window must be active
+	 * and the process alive.
+	 *
+	 * If its a kernel process, we should not get any faults and
+	 * should not get here.
+	 */
+	window = vinst->windows[winid];
+
+	if (!window) {
+		pr_err("PSWID decode: Could not find window for winid %d pswid %d vinst 0x%p\n",
+			winid, pswid, vinst);
+		return NULL;
+	}
+
+	/*
+	 * Do some sanity checks on the decoded window.  Window should be
+	 * NX GZIP user send window. FTW windows should not incur faults
+	 * since their CRBs are ignored (not queued on FIFO or processed
+	 * by NX).
+	 */
+	if (!window->tx_win || !window->user_win || !window->nx_win ||
+			window->vas_win.cop == VAS_COP_TYPE_FAULT ||
+			window->vas_win.cop == VAS_COP_TYPE_FTW) {
+		pr_err("PSWID decode: id %d, tx %d, user %d, nx %d, cop %d\n",
+			winid, window->tx_win, window->user_win,
+			window->nx_win, window->vas_win.cop);
+		WARN_ON(1);
+	}
+
+	return window;
+}
+
+static struct vas_window *vas_user_win_open(int vas_id, u64 flags,
+				enum vas_cop_type cop_type)
+{
+	struct vas_tx_win_attr txattr = {};
+
+	vas_init_tx_win_attr(&txattr, cop_type);
+
+	txattr.lpid = mfspr(SPRN_LPID);
+	txattr.pidr = mfspr(SPRN_PID);
+	txattr.user_win = true;
+	txattr.rsvd_txbuf_count = false;
+	txattr.pswid = false;
+
+	pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr,
+				mfspr(SPRN_PID));
+
+	return vas_tx_win_open(vas_id, cop_type, &txattr);
+}
+
+static u64 vas_user_win_paste_addr(struct vas_window *txwin)
+{
+	struct pnv_vas_window *win;
+	u64 paste_addr;
+
+	win = container_of(txwin, struct pnv_vas_window, vas_win);
+	vas_win_paste_addr(win, &paste_addr, NULL);
+
+	return paste_addr;
+}
+
+static int vas_user_win_close(struct vas_window *txwin)
+{
+	vas_win_close(txwin);
+
+	return 0;
+}
+
+static const struct vas_user_win_ops vops =  {
+	.open_win	=	vas_user_win_open,
+	.paste_addr	=	vas_user_win_paste_addr,
+	.close_win	=	vas_user_win_close,
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type,
+			     const char *name)
+{
+
+	return vas_register_coproc_api(mod, cop_type, name, &vops);
+}
+EXPORT_SYMBOL_GPL(vas_register_api_powernv);
+
+void vas_unregister_api_powernv(void)
+{
+	vas_unregister_coproc_api();
+}
+EXPORT_SYMBOL_GPL(vas_unregister_api_powernv);
diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c
new file mode 100644
index 0000000000..b65256a63e
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2016-17 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/of.h>
+#include <linux/irqdomain.h>
+#include <linux/interrupt.h>
+#include <asm/prom.h>
+#include <asm/xive.h>
+
+#include "vas.h"
+
+DEFINE_MUTEX(vas_mutex);
+static LIST_HEAD(vas_instances);
+
+static DEFINE_PER_CPU(int, cpu_vas_id);
+
+static int vas_irq_fault_window_setup(struct vas_instance *vinst)
+{
+	int rc = 0;
+
+	rc = request_threaded_irq(vinst->virq, vas_fault_handler,
+				vas_fault_thread_fn, 0, vinst->name, vinst);
+
+	if (rc) {
+		pr_err("VAS[%d]: Request IRQ(%d) failed with %d\n",
+				vinst->vas_id, vinst->virq, rc);
+		goto out;
+	}
+
+	rc = vas_setup_fault_window(vinst);
+	if (rc)
+		free_irq(vinst->virq, vinst);
+
+out:
+	return rc;
+}
+
+static int init_vas_instance(struct platform_device *pdev)
+{
+	struct device_node *dn = pdev->dev.of_node;
+	struct vas_instance *vinst;
+	struct xive_irq_data *xd;
+	uint32_t chipid, hwirq;
+	struct resource *res;
+	int rc, cpu, vasid;
+
+	rc = of_property_read_u32(dn, "ibm,vas-id", &vasid);
+	if (rc) {
+		pr_err("No ibm,vas-id property for %s?\n", pdev->name);
+		return -ENODEV;
+	}
+
+	rc = of_property_read_u32(dn, "ibm,chip-id", &chipid);
+	if (rc) {
+		pr_err("No ibm,chip-id property for %s?\n", pdev->name);
+		return -ENODEV;
+	}
+
+	if (pdev->num_resources != 4) {
+		pr_err("Unexpected DT configuration for [%s, %d]\n",
+				pdev->name, vasid);
+		return -ENODEV;
+	}
+
+	vinst = kzalloc(sizeof(*vinst), GFP_KERNEL);
+	if (!vinst)
+		return -ENOMEM;
+
+	vinst->name = kasprintf(GFP_KERNEL, "vas-%d", vasid);
+	if (!vinst->name) {
+		kfree(vinst);
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&vinst->node);
+	ida_init(&vinst->ida);
+	mutex_init(&vinst->mutex);
+	vinst->vas_id = vasid;
+	vinst->pdev = pdev;
+
+	res = &pdev->resource[0];
+	vinst->hvwc_bar_start = res->start;
+
+	res = &pdev->resource[1];
+	vinst->uwc_bar_start = res->start;
+
+	res = &pdev->resource[2];
+	vinst->paste_base_addr = res->start;
+
+	res = &pdev->resource[3];
+	if (res->end > 62) {
+		pr_err("Bad 'paste_win_id_shift' in DT, %llx\n", res->end);
+		goto free_vinst;
+	}
+
+	vinst->paste_win_id_shift = 63 - res->end;
+
+	hwirq = xive_native_alloc_irq_on_chip(chipid);
+	if (!hwirq) {
+		pr_err("Inst%d: Unable to allocate global irq for chip %d\n",
+				vinst->vas_id, chipid);
+		return -ENOENT;
+	}
+
+	vinst->virq = irq_create_mapping(NULL, hwirq);
+	if (!vinst->virq) {
+		pr_err("Inst%d: Unable to map global irq %d\n",
+				vinst->vas_id, hwirq);
+		return -EINVAL;
+	}
+
+	xd = irq_get_handler_data(vinst->virq);
+	if (!xd) {
+		pr_err("Inst%d: Invalid virq %d\n",
+				vinst->vas_id, vinst->virq);
+		return -EINVAL;
+	}
+
+	vinst->irq_port = xd->trig_page;
+	pr_devel("Initialized instance [%s, %d] paste_base 0x%llx paste_win_id_shift 0x%llx IRQ %d Port 0x%llx\n",
+			pdev->name, vasid, vinst->paste_base_addr,
+			vinst->paste_win_id_shift, vinst->virq,
+			vinst->irq_port);
+
+	for_each_possible_cpu(cpu) {
+		if (cpu_to_chip_id(cpu) == of_get_ibm_chip_id(dn))
+			per_cpu(cpu_vas_id, cpu) = vasid;
+	}
+
+	mutex_lock(&vas_mutex);
+	list_add(&vinst->node, &vas_instances);
+	mutex_unlock(&vas_mutex);
+
+	spin_lock_init(&vinst->fault_lock);
+	/*
+	 * IRQ and fault handling setup is needed only for user space
+	 * send windows.
+	 */
+	if (vinst->virq) {
+		rc = vas_irq_fault_window_setup(vinst);
+		/*
+		 * Fault window is used only for user space send windows.
+		 * So if vinst->virq is NULL, tx_win_open returns -ENODEV
+		 * for user space.
+		 */
+		if (rc)
+			vinst->virq = 0;
+	}
+
+	vas_instance_init_dbgdir(vinst);
+
+	dev_set_drvdata(&pdev->dev, vinst);
+
+	return 0;
+
+free_vinst:
+	kfree(vinst->name);
+	kfree(vinst);
+	return -ENODEV;
+
+}
+
+/*
+ * Although this is read/used multiple times, it is written to only
+ * during initialization.
+ */
+struct vas_instance *find_vas_instance(int vasid)
+{
+	struct list_head *ent;
+	struct vas_instance *vinst;
+
+	mutex_lock(&vas_mutex);
+
+	if (vasid == -1)
+		vasid = per_cpu(cpu_vas_id, smp_processor_id());
+
+	list_for_each(ent, &vas_instances) {
+		vinst = list_entry(ent, struct vas_instance, node);
+		if (vinst->vas_id == vasid) {
+			mutex_unlock(&vas_mutex);
+			return vinst;
+		}
+	}
+	mutex_unlock(&vas_mutex);
+
+	pr_devel("Instance %d not found\n", vasid);
+	return NULL;
+}
+
+int chip_to_vas_id(int chipid)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		if (cpu_to_chip_id(cpu) == chipid)
+			return per_cpu(cpu_vas_id, cpu);
+	}
+	return -1;
+}
+EXPORT_SYMBOL(chip_to_vas_id);
+
+static int vas_probe(struct platform_device *pdev)
+{
+	return init_vas_instance(pdev);
+}
+
+static const struct of_device_id powernv_vas_match[] = {
+	{ .compatible = "ibm,vas",},
+	{},
+};
+
+static struct platform_driver vas_driver = {
+	.driver = {
+		.name = "vas",
+		.of_match_table = powernv_vas_match,
+	},
+	.probe = vas_probe,
+};
+
+static int __init vas_init(void)
+{
+	int found = 0;
+	struct device_node *dn;
+
+	platform_driver_register(&vas_driver);
+
+	for_each_compatible_node(dn, NULL, "ibm,vas") {
+		of_platform_device_create(dn, NULL, NULL);
+		found++;
+	}
+
+	if (!found) {
+		platform_driver_unregister(&vas_driver);
+		return -ENODEV;
+	}
+
+	pr_devel("Found %d instances\n", found);
+
+	return 0;
+}
+device_initcall(vas_init);
diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h
new file mode 100644
index 0000000000..08d9d3d5a2
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -0,0 +1,501 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2016-17 IBM Corp.
+ */
+
+#ifndef _VAS_H
+#define _VAS_H
+#include <linux/atomic.h>
+#include <linux/idr.h>
+#include <asm/vas.h>
+#include <linux/io.h>
+#include <linux/dcache.h>
+#include <linux/mutex.h>
+#include <linux/stringify.h>
+
+/*
+ * Overview of Virtual Accelerator Switchboard (VAS).
+ *
+ * VAS is a hardware "switchboard" that allows senders and receivers to
+ * exchange messages with _minimal_ kernel involvment. The receivers are
+ * typically NX coprocessor engines that perform compression or encryption
+ * in hardware, but receivers can also be other software threads.
+ *
+ * Senders are user/kernel threads that submit compression/encryption or
+ * other requests to the receivers. Senders must format their messages as
+ * Coprocessor Request Blocks (CRB)s and submit them using the "copy" and
+ * "paste" instructions which were introduced in Power9.
+ *
+ * A Power node can have (upto?) 8 Power chips. There is one instance of
+ * VAS in each Power9 chip. Each instance of VAS has 64K windows or ports,
+ * Senders and receivers must each connect to a separate window before they
+ * can exchange messages through the switchboard.
+ *
+ * Each window is described by two types of window contexts:
+ *
+ *	Hypervisor Window Context (HVWC) of size VAS_HVWC_SIZE bytes
+ *
+ *	OS/User Window Context (UWC) of size VAS_UWC_SIZE bytes.
+ *
+ * A window context can be viewed as a set of 64-bit registers. The settings
+ * in these registers configure/control/determine the behavior of the VAS
+ * hardware when messages are sent/received through the window. The registers
+ * in the HVWC are configured by the kernel while the registers in the UWC can
+ * be configured by the kernel or by the user space application that is using
+ * the window.
+ *
+ * The HVWCs for all windows on a specific instance of VAS are in a contiguous
+ * range of hardware addresses or Base address region (BAR) referred to as the
+ * HVWC BAR for the instance. Similarly the UWCs for all windows on an instance
+ * are referred to as the UWC BAR for the instance.
+ *
+ * The two BARs for each instance are defined Power9 MMIO Ranges spreadsheet
+ * and available to the kernel in the VAS node's "reg" property in the device
+ * tree:
+ *
+ *	/proc/device-tree/vasm@.../reg
+ *
+ * (see vas_probe() for details on the reg property).
+ *
+ * The kernel maps the HVWC and UWC BAR regions into the kernel address
+ * space (hvwc_map and uwc_map). The kernel can then access the window
+ * contexts of a specific window using:
+ *
+ *	 hvwc = hvwc_map + winid * VAS_HVWC_SIZE.
+ *	 uwc = uwc_map + winid * VAS_UWC_SIZE.
+ *
+ * where winid is the window index (0..64K).
+ *
+ * As mentioned, a window context is used to "configure" a window. Besides
+ * this configuration address, each _send_ window also has a unique hardware
+ * "paste" address that is used to submit requests/CRBs (see vas_paste_crb()).
+ *
+ * The hardware paste address for a window is computed using the "paste
+ * base address" and "paste win id shift" reg properties in the VAS device
+ * tree node using:
+ *
+ *	paste_addr = paste_base + ((winid << paste_win_id_shift))
+ *
+ * (again, see vas_probe() for ->paste_base_addr and ->paste_win_id_shift).
+ *
+ * The kernel maps this hardware address into the sender's address space
+ * after which they can use the 'paste' instruction (new in Power9) to
+ * send a message (submit a request aka CRB) to the coprocessor.
+ *
+ * NOTE: In the initial version, senders can only in-kernel drivers/threads.
+ *	 Support for user space threads will be added in follow-on patches.
+ *
+ * TODO: Do we need to map the UWC into user address space so they can return
+ *	 credits? Its NA for NX but may be needed for other receive windows.
+ *
+ */
+
+#define VAS_WINDOWS_PER_CHIP		(64 << 10)
+
+/*
+ * Hypervisor and OS/USer Window Context sizes
+ */
+#define VAS_HVWC_SIZE			512
+#define VAS_UWC_SIZE			PAGE_SIZE
+
+/*
+ * Initial per-process credits.
+ * Max send window credits:    4K-1 (12-bits in VAS_TX_WCRED)
+ *
+ * TODO: Needs tuning for per-process credits
+ */
+#define VAS_TX_WCREDS_MAX		((4 << 10) - 1)
+#define VAS_WCREDS_DEFAULT		(1 << 10)
+
+/*
+ * VAS Window Context Register Offsets and bitmasks.
+ * See Section 3.1.4 of VAS Work book
+ */
+#define VAS_LPID_OFFSET			0x010
+#define VAS_LPID			PPC_BITMASK(0, 11)
+
+#define VAS_PID_OFFSET			0x018
+#define VAS_PID_ID			PPC_BITMASK(0, 19)
+
+#define VAS_XLATE_MSR_OFFSET		0x020
+#define VAS_XLATE_MSR_DR		PPC_BIT(0)
+#define VAS_XLATE_MSR_TA		PPC_BIT(1)
+#define VAS_XLATE_MSR_PR		PPC_BIT(2)
+#define VAS_XLATE_MSR_US		PPC_BIT(3)
+#define VAS_XLATE_MSR_HV		PPC_BIT(4)
+#define VAS_XLATE_MSR_SF		PPC_BIT(5)
+
+#define VAS_XLATE_LPCR_OFFSET		0x028
+#define VAS_XLATE_LPCR_PAGE_SIZE	PPC_BITMASK(0, 2)
+#define VAS_XLATE_LPCR_ISL		PPC_BIT(3)
+#define VAS_XLATE_LPCR_TC		PPC_BIT(4)
+#define VAS_XLATE_LPCR_SC		PPC_BIT(5)
+
+#define VAS_XLATE_CTL_OFFSET		0x030
+#define VAS_XLATE_MODE			PPC_BITMASK(0, 1)
+
+#define VAS_AMR_OFFSET			0x040
+#define VAS_AMR				PPC_BITMASK(0, 63)
+
+#define VAS_SEIDR_OFFSET		0x048
+#define VAS_SEIDR			PPC_BITMASK(0, 63)
+
+#define VAS_FAULT_TX_WIN_OFFSET		0x050
+#define VAS_FAULT_TX_WIN		PPC_BITMASK(48, 63)
+
+#define VAS_OSU_INTR_SRC_RA_OFFSET	0x060
+#define VAS_OSU_INTR_SRC_RA		PPC_BITMASK(8, 63)
+
+#define VAS_HV_INTR_SRC_RA_OFFSET	0x070
+#define VAS_HV_INTR_SRC_RA		PPC_BITMASK(8, 63)
+
+#define VAS_PSWID_OFFSET		0x078
+#define VAS_PSWID_EA_HANDLE		PPC_BITMASK(0, 31)
+
+#define VAS_SPARE1_OFFSET		0x080
+#define VAS_SPARE2_OFFSET		0x088
+#define VAS_SPARE3_OFFSET		0x090
+#define VAS_SPARE4_OFFSET		0x130
+#define VAS_SPARE5_OFFSET		0x160
+#define VAS_SPARE6_OFFSET		0x188
+
+#define VAS_LFIFO_BAR_OFFSET		0x0A0
+#define VAS_LFIFO_BAR			PPC_BITMASK(8, 53)
+#define VAS_PAGE_MIGRATION_SELECT	PPC_BITMASK(54, 56)
+
+#define VAS_LDATA_STAMP_CTL_OFFSET	0x0A8
+#define VAS_LDATA_STAMP			PPC_BITMASK(0, 1)
+#define VAS_XTRA_WRITE			PPC_BIT(2)
+
+#define VAS_LDMA_CACHE_CTL_OFFSET	0x0B0
+#define VAS_LDMA_TYPE			PPC_BITMASK(0, 1)
+#define VAS_LDMA_FIFO_DISABLE		PPC_BIT(2)
+
+#define VAS_LRFIFO_PUSH_OFFSET		0x0B8
+#define VAS_LRFIFO_PUSH			PPC_BITMASK(0, 15)
+
+#define VAS_CURR_MSG_COUNT_OFFSET	0x0C0
+#define VAS_CURR_MSG_COUNT		PPC_BITMASK(0, 7)
+
+#define VAS_LNOTIFY_AFTER_COUNT_OFFSET	0x0C8
+#define VAS_LNOTIFY_AFTER_COUNT		PPC_BITMASK(0, 7)
+
+#define VAS_LRX_WCRED_OFFSET		0x0E0
+#define VAS_LRX_WCRED			PPC_BITMASK(0, 15)
+
+#define VAS_LRX_WCRED_ADDER_OFFSET	0x190
+#define VAS_LRX_WCRED_ADDER		PPC_BITMASK(0, 15)
+
+#define VAS_TX_WCRED_OFFSET		0x0F0
+#define VAS_TX_WCRED			PPC_BITMASK(4, 15)
+
+#define VAS_TX_WCRED_ADDER_OFFSET	0x1A0
+#define VAS_TX_WCRED_ADDER		PPC_BITMASK(4, 15)
+
+#define VAS_LFIFO_SIZE_OFFSET		0x100
+#define VAS_LFIFO_SIZE			PPC_BITMASK(0, 3)
+
+#define VAS_WINCTL_OFFSET		0x108
+#define VAS_WINCTL_OPEN			PPC_BIT(0)
+#define VAS_WINCTL_REJ_NO_CREDIT	PPC_BIT(1)
+#define VAS_WINCTL_PIN			PPC_BIT(2)
+#define VAS_WINCTL_TX_WCRED_MODE	PPC_BIT(3)
+#define VAS_WINCTL_RX_WCRED_MODE	PPC_BIT(4)
+#define VAS_WINCTL_TX_WORD_MODE		PPC_BIT(5)
+#define VAS_WINCTL_RX_WORD_MODE		PPC_BIT(6)
+#define VAS_WINCTL_RSVD_TXBUF		PPC_BIT(7)
+#define VAS_WINCTL_THRESH_CTL		PPC_BITMASK(8, 9)
+#define VAS_WINCTL_FAULT_WIN		PPC_BIT(10)
+#define VAS_WINCTL_NX_WIN		PPC_BIT(11)
+
+#define VAS_WIN_STATUS_OFFSET		0x110
+#define VAS_WIN_BUSY			PPC_BIT(1)
+
+#define VAS_WIN_CTX_CACHING_CTL_OFFSET	0x118
+#define VAS_CASTOUT_REQ			PPC_BIT(0)
+#define VAS_PUSH_TO_MEM			PPC_BIT(1)
+#define VAS_WIN_CACHE_STATUS		PPC_BIT(4)
+
+#define VAS_TX_RSVD_BUF_COUNT_OFFSET	0x120
+#define VAS_RXVD_BUF_COUNT		PPC_BITMASK(58, 63)
+
+#define VAS_LRFIFO_WIN_PTR_OFFSET	0x128
+#define VAS_LRX_WIN_ID			PPC_BITMASK(0, 15)
+
+/*
+ * Local Notification Control Register controls what happens in _response_
+ * to a paste command and hence applies only to receive windows.
+ */
+#define VAS_LNOTIFY_CTL_OFFSET		0x138
+#define VAS_NOTIFY_DISABLE		PPC_BIT(0)
+#define VAS_INTR_DISABLE		PPC_BIT(1)
+#define VAS_NOTIFY_EARLY		PPC_BIT(2)
+#define VAS_NOTIFY_OSU_INTR		PPC_BIT(3)
+
+#define VAS_LNOTIFY_PID_OFFSET		0x140
+#define VAS_LNOTIFY_PID			PPC_BITMASK(0, 19)
+
+#define VAS_LNOTIFY_LPID_OFFSET		0x148
+#define VAS_LNOTIFY_LPID		PPC_BITMASK(0, 11)
+
+#define VAS_LNOTIFY_TID_OFFSET		0x150
+#define VAS_LNOTIFY_TID			PPC_BITMASK(0, 15)
+
+#define VAS_LNOTIFY_SCOPE_OFFSET	0x158
+#define VAS_LNOTIFY_MIN_SCOPE		PPC_BITMASK(0, 1)
+#define VAS_LNOTIFY_MAX_SCOPE		PPC_BITMASK(2, 3)
+
+#define VAS_NX_UTIL_OFFSET		0x1B0
+#define VAS_NX_UTIL			PPC_BITMASK(0, 63)
+
+/* SE: Side effects */
+#define VAS_NX_UTIL_SE_OFFSET		0x1B8
+#define VAS_NX_UTIL_SE			PPC_BITMASK(0, 63)
+
+#define VAS_NX_UTIL_ADDER_OFFSET	0x180
+#define VAS_NX_UTIL_ADDER		PPC_BITMASK(32, 63)
+
+/*
+ * VREG(x):
+ * Expand a register's short name (eg: LPID) into two parameters:
+ *	- the register's short name in string form ("LPID"), and
+ *	- the name of the macro (eg: VAS_LPID_OFFSET), defining the
+ *	  register's offset in the window context
+ */
+#define VREG_SFX(n, s)	__stringify(n), VAS_##n##s
+#define VREG(r)		VREG_SFX(r, _OFFSET)
+
+/*
+ * Local Notify Scope Control Register. (Receive windows only).
+ */
+enum vas_notify_scope {
+	VAS_SCOPE_LOCAL,
+	VAS_SCOPE_GROUP,
+	VAS_SCOPE_VECTORED_GROUP,
+	VAS_SCOPE_UNUSED,
+};
+
+/*
+ * Local DMA Cache Control Register (Receive windows only).
+ */
+enum vas_dma_type {
+	VAS_DMA_TYPE_INJECT,
+	VAS_DMA_TYPE_WRITE,
+};
+
+/*
+ * Local Notify Scope Control Register. (Receive windows only).
+ * Not applicable to NX receive windows.
+ */
+enum vas_notify_after_count {
+	VAS_NOTIFY_AFTER_256 = 0,
+	VAS_NOTIFY_NONE,
+	VAS_NOTIFY_AFTER_2
+};
+
+/*
+ * NX can generate an interrupt for multiple faults and expects kernel
+ * to process all of them. So read all valid CRB entries until find the
+ * invalid one. So use pswid which is pasted by NX and ccw[0] (reserved
+ * bit in BE) to check valid CRB. CCW[0] will not be touched by user
+ * space. Application gets CRB formt error if it updates this bit.
+ *
+ * Invalidate FIFO during allocation and process all entries from last
+ * successful read until finds invalid pswid and ccw[0] values.
+ * After reading each CRB entry from fault FIFO, the kernel invalidate
+ * it by updating pswid with FIFO_INVALID_ENTRY and CCW[0] with
+ * CCW0_INVALID.
+ */
+#define FIFO_INVALID_ENTRY	0xffffffff
+#define CCW0_INVALID		1
+
+/*
+ * One per instance of VAS. Each instance will have a separate set of
+ * receive windows, one per coprocessor type.
+ *
+ * See also function header of set_vinst_win() for details on ->windows[]
+ * and ->rxwin[] tables.
+ */
+struct vas_instance {
+	int vas_id;
+	struct ida ida;
+	struct list_head node;
+	struct platform_device *pdev;
+
+	u64 hvwc_bar_start;
+	u64 uwc_bar_start;
+	u64 paste_base_addr;
+	u64 paste_win_id_shift;
+
+	u64 irq_port;
+	int virq;
+	int fault_crbs;
+	int fault_fifo_size;
+	int fifo_in_progress;	/* To wake up thread or return IRQ_HANDLED */
+	spinlock_t fault_lock;	/* Protects fifo_in_progress update */
+	void *fault_fifo;
+	struct pnv_vas_window *fault_win; /* Fault window */
+
+	struct mutex mutex;
+	struct pnv_vas_window *rxwin[VAS_COP_TYPE_MAX];
+	struct pnv_vas_window *windows[VAS_WINDOWS_PER_CHIP];
+
+	char *name;
+	char *dbgname;
+	struct dentry *dbgdir;
+};
+
+/*
+ * In-kernel state a VAS window on PowerNV. One per window.
+ */
+struct pnv_vas_window {
+	struct vas_window vas_win;
+	/* Fields common to send and receive windows */
+	struct vas_instance *vinst;
+	bool tx_win;		/* True if send window */
+	bool nx_win;		/* True if NX window */
+	bool user_win;		/* True if user space window */
+	void *hvwc_map;		/* HV window context */
+	void *uwc_map;		/* OS/User window context */
+
+	/* Fields applicable only to send windows */
+	void *paste_kaddr;
+	char *paste_addr_name;
+	struct pnv_vas_window *rxwin;
+
+	/* Fields applicable only to receive windows */
+	atomic_t num_txwins;
+};
+
+/*
+ * Container for the hardware state of a window. One per-window.
+ *
+ * A VAS Window context is a 512-byte area in the hardware that contains
+ * a set of 64-bit registers. Individual bit-fields in these registers
+ * determine the configuration/operation of the hardware. struct vas_winctx
+ * is a container for the register fields in the window context.
+ */
+struct vas_winctx {
+	u64 rx_fifo;
+	int rx_fifo_size;
+	int wcreds_max;
+	int rsvd_txbuf_count;
+
+	bool user_win;
+	bool nx_win;
+	bool fault_win;
+	bool rsvd_txbuf_enable;
+	bool pin_win;
+	bool rej_no_credit;
+	bool tx_wcred_mode;
+	bool rx_wcred_mode;
+	bool tx_word_mode;
+	bool rx_word_mode;
+	bool data_stamp;
+	bool xtra_write;
+	bool notify_disable;
+	bool intr_disable;
+	bool fifo_disable;
+	bool notify_early;
+	bool notify_os_intr_reg;
+
+	int lpid;
+	int pidr;		/* value from SPRN_PID, not linux pid */
+	int lnotify_lpid;
+	int lnotify_pid;
+	int lnotify_tid;
+	u32 pswid;
+	int rx_win_id;
+	int fault_win_id;
+	int tc_mode;
+
+	u64 irq_port;
+
+	enum vas_dma_type dma_type;
+	enum vas_notify_scope min_scope;
+	enum vas_notify_scope max_scope;
+	enum vas_notify_after_count notify_after_count;
+};
+
+extern struct mutex vas_mutex;
+
+extern struct vas_instance *find_vas_instance(int vasid);
+extern void vas_init_dbgdir(void);
+extern void vas_instance_init_dbgdir(struct vas_instance *vinst);
+extern void vas_window_init_dbgdir(struct pnv_vas_window *win);
+extern void vas_window_free_dbgdir(struct pnv_vas_window *win);
+extern int vas_setup_fault_window(struct vas_instance *vinst);
+extern irqreturn_t vas_fault_thread_fn(int irq, void *data);
+extern irqreturn_t vas_fault_handler(int irq, void *dev_id);
+extern void vas_return_credit(struct pnv_vas_window *window, bool tx);
+extern struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst,
+						uint32_t pswid);
+extern void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr,
+				int *len);
+
+static inline int vas_window_pid(struct vas_window *window)
+{
+	return pid_vnr(window->task_ref.pid);
+}
+
+static inline void vas_log_write(struct pnv_vas_window *win, char *name,
+			void *regptr, u64 val)
+{
+	if (val)
+		pr_debug("%swin #%d: %s reg %p, val 0x%016llx\n",
+				win->tx_win ? "Tx" : "Rx", win->vas_win.winid,
+				name, regptr, val);
+}
+
+static inline void write_uwc_reg(struct pnv_vas_window *win, char *name,
+			s32 reg, u64 val)
+{
+	void *regptr;
+
+	regptr = win->uwc_map + reg;
+	vas_log_write(win, name, regptr, val);
+
+	out_be64(regptr, val);
+}
+
+static inline void write_hvwc_reg(struct pnv_vas_window *win, char *name,
+			s32 reg, u64 val)
+{
+	void *regptr;
+
+	regptr = win->hvwc_map + reg;
+	vas_log_write(win, name, regptr, val);
+
+	out_be64(regptr, val);
+}
+
+static inline u64 read_hvwc_reg(struct pnv_vas_window *win,
+			char *name __maybe_unused, s32 reg)
+{
+	return in_be64(win->hvwc_map+reg);
+}
+
+/*
+ * Encode/decode the Partition Send Window ID (PSWID) for a window in
+ * a way that we can uniquely identify any window in the system. i.e.
+ * we should be able to locate the 'struct vas_window' given the PSWID.
+ *
+ *	Bits	Usage
+ *	0:7	VAS id (8 bits)
+ *	8:15	Unused, 0 (3 bits)
+ *	16:31	Window id (16 bits)
+ */
+static inline u32 encode_pswid(int vasid, int winid)
+{
+	return ((u32)winid | (vasid << (31 - 7)));
+}
+
+static inline void decode_pswid(u32 pswid, int *vasid, int *winid)
+{
+	if (vasid)
+		*vasid = pswid >> (31 - 7) & 0xFF;
+
+	if (winid)
+		*winid = pswid & 0xFFFF;
+}
+#endif /* _VAS_H */
diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig
new file mode 100644
index 0000000000..a44869e5ea
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/Kconfig
@@ -0,0 +1,182 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_PS3
+	bool "Sony PS3"
+	depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN
+	select PPC_CELL
+	select USB_OHCI_LITTLE_ENDIAN
+	select USB_OHCI_BIG_ENDIAN_MMIO
+	select USB_EHCI_BIG_ENDIAN_MMIO
+	select HAVE_PCI
+	select IRQ_DOMAIN_NOMAP
+	help
+	  This option enables support for the Sony PS3 game console
+	  and other platforms using the PS3 hypervisor.  Enabling this
+	  option will allow building otheros.bld, a kernel image suitable
+	  for programming into flash memory, and vmlinux, a kernel image
+	  suitable for loading via kexec.
+
+menu "PS3 Platform Options"
+	depends on PPC_PS3
+
+config PS3_ADVANCED
+	depends on PPC_PS3
+	bool "PS3 Advanced configuration options"
+	help
+	  This gives you access to some advanced options for the PS3. The
+	  defaults should be fine for most users, but these options may make
+	  it possible to better control the kernel configuration if you know
+	  what you are doing.
+
+	  Note that the answer to this question won't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  the questions about these options.
+
+	  Most users should say N to this question.
+
+config PS3_HTAB_SIZE
+	depends on PPC_PS3
+	int "PS3 Platform pagetable size" if PS3_ADVANCED
+	range 18 20
+	default 20
+	help
+	  This option is only for experts who may have the desire to fine
+	  tune the pagetable size on their system.  The value here is
+	  expressed as the log2 of the page table size.  Valid values are
+	  18, 19, and 20, corresponding to 256KB, 512KB and 1MB respectively.
+
+	  If unsure, choose the default (20) with the confidence that your
+	  system will have optimal runtime performance.
+
+config PS3_DYNAMIC_DMA
+	depends on PPC_PS3
+	bool "PS3 Platform dynamic DMA page table management"
+	help
+	  This option will enable kernel support to take advantage of the
+	  per device dynamic DMA page table management provided by the Cell
+	  processor's IO Controller.  This support incurs some runtime
+	  overhead and also slightly increases kernel memory usage.  The
+	  current implementation should be considered experimental.
+
+	  This support is mainly for Linux kernel development.  If unsure,
+	  say N.
+
+config PS3_VUART
+	depends on PPC_PS3
+	tristate
+
+config PS3_PS3AV
+	depends on PPC_PS3
+	tristate "PS3 AV settings driver" if PS3_ADVANCED
+	select PS3_VUART
+	default y
+	help
+	  Include support for the PS3 AV Settings driver.
+
+	  This support is required for PS3 graphics and sound. In
+	  general, all users will say Y or M.
+
+config PS3_SYS_MANAGER
+	depends on PPC_PS3
+	tristate "PS3 System Manager driver" if PS3_ADVANCED
+	select PS3_VUART
+	default y
+	help
+	  Include support for the PS3 System Manager.
+
+	  This support is required for PS3 system control.  In
+	  general, all users will say Y or M.
+
+config PS3_VERBOSE_RESULT
+	bool "PS3 Verbose LV1 hypercall results" if PS3_ADVANCED
+	depends on PPC_PS3
+	help
+	  Enables more verbose log messages for LV1 hypercall results.
+
+	  If in doubt, say N here and reduce the size of the kernel by a
+	  small amount.
+
+config PS3_REPOSITORY_WRITE
+	bool "PS3 Repository write support" if PS3_ADVANCED
+	depends on PPC_PS3
+	help
+	  Enables support for writing to the PS3 System Repository.
+
+	  This support is intended for bootloaders that need to store data
+	  in the repository for later boot stages.
+
+	  If in doubt, say N here and reduce the size of the kernel by a
+	  small amount.
+
+config PS3_STORAGE
+	depends on PPC_PS3
+	tristate
+
+config PS3_DISK
+	tristate "PS3 Disk Storage Driver"
+	depends on PPC_PS3 && BLOCK
+	select PS3_STORAGE
+	help
+	  Include support for the PS3 Disk Storage.
+
+	  This support is required to access the PS3 hard disk.
+	  In general, all users will say Y or M.
+
+config PS3_ROM
+	tristate "PS3 BD/DVD/CD-ROM Storage Driver"
+	depends on PPC_PS3 && SCSI
+	select PS3_STORAGE
+	help
+	  Include support for the PS3 ROM Storage.
+
+	  This support is required to access the PS3 BD/DVD/CD-ROM drive.
+	  In general, all users will say Y or M.
+	  Also make sure to say Y or M to "SCSI CDROM support" later.
+
+config PS3_FLASH
+	tristate "PS3 FLASH ROM Storage Driver"
+	depends on PPC_PS3
+	select PS3_STORAGE
+	help
+	  Include support for the PS3 FLASH ROM Storage.
+
+	  This support is required to access the PS3 FLASH ROM, which
+	  contains the boot loader and some boot options.
+	  In general, PS3 OtherOS users will say Y or M.
+
+	  As this driver needs a fixed buffer of 256 KiB of memory, it can
+	  be disabled on the kernel command line using "ps3flash=off", to
+	  not allocate this fixed buffer.
+
+config PS3_VRAM
+	tristate "PS3 Video RAM Storage Driver"
+	depends on FB_PS3=y && BLOCK && m
+	help
+	  This driver allows you to use excess PS3 video RAM as volatile
+	  storage or system swap.
+
+config PS3_LPM
+	tristate "PS3 Logical Performance Monitor support"
+	depends on PPC_PS3
+	help
+	  Include support for the PS3 Logical Performance Monitor.
+
+	  This support is required to use the logical performance monitor
+	  of the PS3's LV1 hypervisor.
+
+	  If you intend to use the advanced performance monitoring and
+	  profiling support of the Cell processor with programs like
+	  perfmon2, then say Y or M, otherwise say N.
+
+config PS3GELIC_UDBG
+	bool "PS3 udbg output via UDP broadcasts on Ethernet"
+	depends on PPC_PS3
+	help
+	  Enables udbg early debugging output by sending broadcast UDP
+	  via the Ethernet port (UDP port number 18194).
+
+	  This driver uses a trivial implementation and is independent
+	  from the main PS3 gelic network driver.
+
+	  If in doubt, say N here.
+
+endmenu
diff --git a/arch/powerpc/platforms/ps3/Makefile b/arch/powerpc/platforms/ps3/Makefile
new file mode 100644
index 0000000000..86bf2967a8
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-y += setup.o mm.o time.o hvcall.o htab.o repository.o
+obj-y += interrupt.o exports.o os-area.o
+obj-y += system-bus.o
+
+obj-$(CONFIG_PS3GELIC_UDBG) += gelic_udbg.o
+obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_SPU_BASE) += spu.o
+obj-y += device-init.o
diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
new file mode 100644
index 0000000000..e87360a0fb
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/device-init.c
@@ -0,0 +1,975 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  PS3 device registration routines.
+ *
+ *  Copyright (C) 2007 Sony Computer Entertainment Inc.
+ *  Copyright 2007 Sony Corp.
+ */
+
+#include <linux/delay.h>
+#include <linux/freezer.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/reboot.h>
+#include <linux/rcuwait.h>
+
+#include <asm/firmware.h>
+#include <asm/lv1call.h>
+#include <asm/ps3stor.h>
+
+#include "platform.h"
+
+static int __init ps3_register_lpm_devices(void)
+{
+	int result;
+	u64 tmp1;
+	u64 tmp2;
+	struct ps3_system_bus_device *dev;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	dev->match_id = PS3_MATCH_ID_LPM;
+	dev->dev_type = PS3_DEVICE_TYPE_LPM;
+
+	/* The current lpm driver only supports a single BE processor. */
+
+	result = ps3_repository_read_be_node_id(0, &dev->lpm.node_id);
+
+	if (result) {
+		pr_debug("%s:%d: ps3_repository_read_be_node_id failed \n",
+			__func__, __LINE__);
+		goto fail_read_repo;
+	}
+
+	result = ps3_repository_read_lpm_privileges(dev->lpm.node_id, &tmp1,
+		&dev->lpm.rights);
+
+	if (result) {
+		pr_debug("%s:%d: ps3_repository_read_lpm_privileges failed\n",
+			__func__, __LINE__);
+		goto fail_read_repo;
+	}
+
+	lv1_get_logical_partition_id(&tmp2);
+
+	if (tmp1 != tmp2) {
+		pr_debug("%s:%d: wrong lpar\n",
+			__func__, __LINE__);
+		result = -ENODEV;
+		goto fail_rights;
+	}
+
+	if (!(dev->lpm.rights & PS3_LPM_RIGHTS_USE_LPM)) {
+		pr_debug("%s:%d: don't have rights to use lpm\n",
+			__func__, __LINE__);
+		result = -EPERM;
+		goto fail_rights;
+	}
+
+	pr_debug("%s:%d: pu_id %llu, rights %llu(%llxh)\n",
+		__func__, __LINE__, dev->lpm.pu_id, dev->lpm.rights,
+		dev->lpm.rights);
+
+	result = ps3_repository_read_pu_id(0, &dev->lpm.pu_id);
+
+	if (result) {
+		pr_debug("%s:%d: ps3_repository_read_pu_id failed \n",
+			__func__, __LINE__);
+		goto fail_read_repo;
+	}
+
+	result = ps3_system_bus_device_register(dev);
+
+	if (result) {
+		pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+			__func__, __LINE__);
+		goto fail_register;
+	}
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return 0;
+
+
+fail_register:
+fail_rights:
+fail_read_repo:
+	kfree(dev);
+	pr_debug(" <- %s:%d: failed\n", __func__, __LINE__);
+	return result;
+}
+
+/**
+ * ps3_setup_gelic_device - Setup and register a gelic device instance.
+ *
+ * Allocates memory for a struct ps3_system_bus_device instance, initialises the
+ * structure members, and registers the device instance with the system bus.
+ */
+
+static int __init ps3_setup_gelic_device(
+	const struct ps3_repository_device *repo)
+{
+	int result;
+	struct layout {
+		struct ps3_system_bus_device dev;
+		struct ps3_dma_region d_region;
+	} *p;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	BUG_ON(repo->bus_type != PS3_BUS_TYPE_SB);
+	BUG_ON(repo->dev_type != PS3_DEV_TYPE_SB_GELIC);
+
+	p = kzalloc(sizeof(struct layout), GFP_KERNEL);
+
+	if (!p) {
+		result = -ENOMEM;
+		goto fail_malloc;
+	}
+
+	p->dev.match_id = PS3_MATCH_ID_GELIC;
+	p->dev.dev_type = PS3_DEVICE_TYPE_SB;
+	p->dev.bus_id = repo->bus_id;
+	p->dev.dev_id = repo->dev_id;
+	p->dev.d_region = &p->d_region;
+
+	result = ps3_repository_find_interrupt(repo,
+		PS3_INTERRUPT_TYPE_EVENT_PORT, &p->dev.interrupt_id);
+
+	if (result) {
+		pr_debug("%s:%d ps3_repository_find_interrupt failed\n",
+			__func__, __LINE__);
+		goto fail_find_interrupt;
+	}
+
+	BUG_ON(p->dev.interrupt_id != 0);
+
+	result = ps3_dma_region_init(&p->dev, p->dev.d_region, PS3_DMA_64K,
+		PS3_DMA_OTHER, NULL, 0);
+
+	if (result) {
+		pr_debug("%s:%d ps3_dma_region_init failed\n",
+			__func__, __LINE__);
+		goto fail_dma_init;
+	}
+
+	result = ps3_system_bus_device_register(&p->dev);
+
+	if (result) {
+		pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+			__func__, __LINE__);
+		goto fail_device_register;
+	}
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+
+fail_device_register:
+fail_dma_init:
+fail_find_interrupt:
+	kfree(p);
+fail_malloc:
+	pr_debug(" <- %s:%d: fail.\n", __func__, __LINE__);
+	return result;
+}
+
+static int __ref ps3_setup_uhc_device(
+	const struct ps3_repository_device *repo, enum ps3_match_id match_id,
+	enum ps3_interrupt_type interrupt_type, enum ps3_reg_type reg_type)
+{
+	int result;
+	struct layout {
+		struct ps3_system_bus_device dev;
+		struct ps3_dma_region d_region;
+		struct ps3_mmio_region m_region;
+	} *p;
+	u64 bus_addr;
+	u64 len;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	BUG_ON(repo->bus_type != PS3_BUS_TYPE_SB);
+	BUG_ON(repo->dev_type != PS3_DEV_TYPE_SB_USB);
+
+	p = kzalloc(sizeof(struct layout), GFP_KERNEL);
+
+	if (!p) {
+		result = -ENOMEM;
+		goto fail_malloc;
+	}
+
+	p->dev.match_id = match_id;
+	p->dev.dev_type = PS3_DEVICE_TYPE_SB;
+	p->dev.bus_id = repo->bus_id;
+	p->dev.dev_id = repo->dev_id;
+	p->dev.d_region = &p->d_region;
+	p->dev.m_region = &p->m_region;
+
+	result = ps3_repository_find_interrupt(repo,
+		interrupt_type, &p->dev.interrupt_id);
+
+	if (result) {
+		pr_debug("%s:%d ps3_repository_find_interrupt failed\n",
+			__func__, __LINE__);
+		goto fail_find_interrupt;
+	}
+
+	result = ps3_repository_find_reg(repo, reg_type,
+		&bus_addr, &len);
+
+	if (result) {
+		pr_debug("%s:%d ps3_repository_find_reg failed\n",
+			__func__, __LINE__);
+		goto fail_find_reg;
+	}
+
+	result = ps3_dma_region_init(&p->dev, p->dev.d_region, PS3_DMA_64K,
+		PS3_DMA_INTERNAL, NULL, 0);
+
+	if (result) {
+		pr_debug("%s:%d ps3_dma_region_init failed\n",
+			__func__, __LINE__);
+		goto fail_dma_init;
+	}
+
+	result = ps3_mmio_region_init(&p->dev, p->dev.m_region, bus_addr, len,
+		PS3_MMIO_4K);
+
+	if (result) {
+		pr_debug("%s:%d ps3_mmio_region_init failed\n",
+			__func__, __LINE__);
+		goto fail_mmio_init;
+	}
+
+	result = ps3_system_bus_device_register(&p->dev);
+
+	if (result) {
+		pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+			__func__, __LINE__);
+		goto fail_device_register;
+	}
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+
+fail_device_register:
+fail_mmio_init:
+fail_dma_init:
+fail_find_reg:
+fail_find_interrupt:
+	kfree(p);
+fail_malloc:
+	pr_debug(" <- %s:%d: fail.\n", __func__, __LINE__);
+	return result;
+}
+
+static int __init ps3_setup_ehci_device(
+	const struct ps3_repository_device *repo)
+{
+	return ps3_setup_uhc_device(repo, PS3_MATCH_ID_EHCI,
+		PS3_INTERRUPT_TYPE_SB_EHCI, PS3_REG_TYPE_SB_EHCI);
+}
+
+static int __init ps3_setup_ohci_device(
+	const struct ps3_repository_device *repo)
+{
+	return ps3_setup_uhc_device(repo, PS3_MATCH_ID_OHCI,
+		PS3_INTERRUPT_TYPE_SB_OHCI, PS3_REG_TYPE_SB_OHCI);
+}
+
+static int __init ps3_setup_vuart_device(enum ps3_match_id match_id,
+	unsigned int port_number)
+{
+	int result;
+	struct layout {
+		struct ps3_system_bus_device dev;
+	} *p;
+
+	pr_debug(" -> %s:%d: match_id %u, port %u\n", __func__, __LINE__,
+		match_id, port_number);
+
+	p = kzalloc(sizeof(struct layout), GFP_KERNEL);
+
+	if (!p)
+		return -ENOMEM;
+
+	p->dev.match_id = match_id;
+	p->dev.dev_type = PS3_DEVICE_TYPE_VUART;
+	p->dev.port_number = port_number;
+
+	result = ps3_system_bus_device_register(&p->dev);
+
+	if (result) {
+		pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+			__func__, __LINE__);
+		goto fail_device_register;
+	}
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return 0;
+
+fail_device_register:
+	kfree(p);
+	pr_debug(" <- %s:%d fail\n", __func__, __LINE__);
+	return result;
+}
+
+static int ps3_setup_storage_dev(const struct ps3_repository_device *repo,
+				 enum ps3_match_id match_id)
+{
+	int result;
+	struct ps3_storage_device *p;
+	u64 port, blk_size, num_blocks;
+	unsigned int num_regions, i;
+
+	pr_debug(" -> %s:%u: match_id %u\n", __func__, __LINE__, match_id);
+
+	result = ps3_repository_read_stor_dev_info(repo->bus_index,
+						   repo->dev_index, &port,
+						   &blk_size, &num_blocks,
+						   &num_regions);
+	if (result) {
+		printk(KERN_ERR "%s:%u: _read_stor_dev_info failed %d\n",
+		       __func__, __LINE__, result);
+		return -ENODEV;
+	}
+
+	pr_debug("%s:%u: (%u:%u:%u): port %llu blk_size %llu num_blocks %llu "
+		 "num_regions %u\n", __func__, __LINE__, repo->bus_index,
+		 repo->dev_index, repo->dev_type, port, blk_size, num_blocks,
+		 num_regions);
+
+	p = kzalloc(struct_size(p, regions, num_regions), GFP_KERNEL);
+	if (!p) {
+		result = -ENOMEM;
+		goto fail_malloc;
+	}
+
+	p->sbd.match_id = match_id;
+	p->sbd.dev_type = PS3_DEVICE_TYPE_SB;
+	p->sbd.bus_id = repo->bus_id;
+	p->sbd.dev_id = repo->dev_id;
+	p->sbd.d_region = &p->dma_region;
+	p->blk_size = blk_size;
+	p->num_regions = num_regions;
+
+	result = ps3_repository_find_interrupt(repo,
+					       PS3_INTERRUPT_TYPE_EVENT_PORT,
+					       &p->sbd.interrupt_id);
+	if (result) {
+		printk(KERN_ERR "%s:%u: find_interrupt failed %d\n", __func__,
+		       __LINE__, result);
+		result = -ENODEV;
+		goto fail_find_interrupt;
+	}
+
+	for (i = 0; i < num_regions; i++) {
+		unsigned int id;
+		u64 start, size;
+
+		result = ps3_repository_read_stor_dev_region(repo->bus_index,
+							     repo->dev_index,
+							     i, &id, &start,
+							     &size);
+		if (result) {
+			printk(KERN_ERR
+			       "%s:%u: read_stor_dev_region failed %d\n",
+			       __func__, __LINE__, result);
+			result = -ENODEV;
+			goto fail_read_region;
+		}
+		pr_debug("%s:%u: region %u: id %u start %llu size %llu\n",
+			 __func__, __LINE__, i, id, start, size);
+
+		p->regions[i].id = id;
+		p->regions[i].start = start;
+		p->regions[i].size = size;
+	}
+
+	result = ps3_system_bus_device_register(&p->sbd);
+	if (result) {
+		pr_debug("%s:%u ps3_system_bus_device_register failed\n",
+			 __func__, __LINE__);
+		goto fail_device_register;
+	}
+
+	pr_debug(" <- %s:%u\n", __func__, __LINE__);
+	return 0;
+
+fail_device_register:
+fail_read_region:
+fail_find_interrupt:
+	kfree(p);
+fail_malloc:
+	pr_debug(" <- %s:%u: fail.\n", __func__, __LINE__);
+	return result;
+}
+
+static int __init ps3_register_vuart_devices(void)
+{
+	int result;
+	unsigned int port_number;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	result = ps3_repository_read_vuart_av_port(&port_number);
+	if (result)
+		port_number = 0; /* av default */
+
+	result = ps3_setup_vuart_device(PS3_MATCH_ID_AV_SETTINGS, port_number);
+	WARN_ON(result);
+
+	result = ps3_repository_read_vuart_sysmgr_port(&port_number);
+	if (result)
+		port_number = 2; /* sysmgr default */
+
+	result = ps3_setup_vuart_device(PS3_MATCH_ID_SYSTEM_MANAGER,
+		port_number);
+	WARN_ON(result);
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+
+static int __init ps3_register_sound_devices(void)
+{
+	int result;
+	struct layout {
+		struct ps3_system_bus_device dev;
+		struct ps3_dma_region d_region;
+		struct ps3_mmio_region m_region;
+	} *p;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	p->dev.match_id = PS3_MATCH_ID_SOUND;
+	p->dev.dev_type = PS3_DEVICE_TYPE_IOC0;
+	p->dev.d_region = &p->d_region;
+	p->dev.m_region = &p->m_region;
+
+	result = ps3_system_bus_device_register(&p->dev);
+
+	if (result) {
+		pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+			__func__, __LINE__);
+		goto fail_device_register;
+	}
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return 0;
+
+fail_device_register:
+	kfree(p);
+	pr_debug(" <- %s:%d failed\n", __func__, __LINE__);
+	return result;
+}
+
+static int __init ps3_register_graphics_devices(void)
+{
+	int result;
+	struct layout {
+		struct ps3_system_bus_device dev;
+	} *p;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	p = kzalloc(sizeof(struct layout), GFP_KERNEL);
+
+	if (!p)
+		return -ENOMEM;
+
+	p->dev.match_id = PS3_MATCH_ID_GPU;
+	p->dev.match_sub_id = PS3_MATCH_SUB_ID_GPU_FB;
+	p->dev.dev_type = PS3_DEVICE_TYPE_IOC0;
+
+	result = ps3_system_bus_device_register(&p->dev);
+
+	if (result) {
+		pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+			__func__, __LINE__);
+		goto fail_device_register;
+	}
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return 0;
+
+fail_device_register:
+	kfree(p);
+	pr_debug(" <- %s:%d failed\n", __func__, __LINE__);
+	return result;
+}
+
+static int __init ps3_register_ramdisk_device(void)
+{
+	int result;
+	struct layout {
+		struct ps3_system_bus_device dev;
+	} *p;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	p = kzalloc(sizeof(struct layout), GFP_KERNEL);
+
+	if (!p)
+		return -ENOMEM;
+
+	p->dev.match_id = PS3_MATCH_ID_GPU;
+	p->dev.match_sub_id = PS3_MATCH_SUB_ID_GPU_RAMDISK;
+	p->dev.dev_type = PS3_DEVICE_TYPE_IOC0;
+
+	result = ps3_system_bus_device_register(&p->dev);
+
+	if (result) {
+		pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+			__func__, __LINE__);
+		goto fail_device_register;
+	}
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return 0;
+
+fail_device_register:
+	kfree(p);
+	pr_debug(" <- %s:%d failed\n", __func__, __LINE__);
+	return result;
+}
+
+/**
+ * ps3_setup_dynamic_device - Setup a dynamic device from the repository
+ */
+
+static int ps3_setup_dynamic_device(const struct ps3_repository_device *repo)
+{
+	int result;
+
+	switch (repo->dev_type) {
+	case PS3_DEV_TYPE_STOR_DISK:
+		result = ps3_setup_storage_dev(repo, PS3_MATCH_ID_STOR_DISK);
+
+		/* Some devices are not accessible from the Other OS lpar. */
+		if (result == -ENODEV) {
+			result = 0;
+			pr_debug("%s:%u: not accessible\n", __func__,
+				 __LINE__);
+		}
+
+		if (result)
+			pr_debug("%s:%u ps3_setup_storage_dev failed\n",
+				 __func__, __LINE__);
+		break;
+
+	case PS3_DEV_TYPE_STOR_ROM:
+		result = ps3_setup_storage_dev(repo, PS3_MATCH_ID_STOR_ROM);
+		if (result)
+			pr_debug("%s:%u ps3_setup_storage_dev failed\n",
+				 __func__, __LINE__);
+		break;
+
+	case PS3_DEV_TYPE_STOR_FLASH:
+		result = ps3_setup_storage_dev(repo, PS3_MATCH_ID_STOR_FLASH);
+		if (result)
+			pr_debug("%s:%u ps3_setup_storage_dev failed\n",
+				 __func__, __LINE__);
+		break;
+
+	default:
+		result = 0;
+		pr_debug("%s:%u: unsupported dev_type %u\n", __func__, __LINE__,
+			repo->dev_type);
+	}
+
+	return result;
+}
+
+/**
+ * ps3_setup_static_device - Setup a static device from the repository
+ */
+
+static int __init ps3_setup_static_device(const struct ps3_repository_device *repo)
+{
+	int result;
+
+	switch (repo->dev_type) {
+	case PS3_DEV_TYPE_SB_GELIC:
+		result = ps3_setup_gelic_device(repo);
+		if (result) {
+			pr_debug("%s:%d ps3_setup_gelic_device failed\n",
+				__func__, __LINE__);
+		}
+		break;
+	case PS3_DEV_TYPE_SB_USB:
+
+		/* Each USB device has both an EHCI and an OHCI HC */
+
+		result = ps3_setup_ehci_device(repo);
+
+		if (result) {
+			pr_debug("%s:%d ps3_setup_ehci_device failed\n",
+				__func__, __LINE__);
+		}
+
+		result = ps3_setup_ohci_device(repo);
+
+		if (result) {
+			pr_debug("%s:%d ps3_setup_ohci_device failed\n",
+				__func__, __LINE__);
+		}
+		break;
+
+	default:
+		return ps3_setup_dynamic_device(repo);
+	}
+
+	return result;
+}
+
+static void ps3_find_and_add_device(u64 bus_id, u64 dev_id)
+{
+	struct ps3_repository_device repo;
+	int res;
+	unsigned int retries;
+	unsigned long rem;
+
+	/*
+	 * On some firmware versions (e.g. 1.90), the device may not show up
+	 * in the repository immediately
+	 */
+	for (retries = 0; retries < 10; retries++) {
+		res = ps3_repository_find_device_by_id(&repo, bus_id, dev_id);
+		if (!res)
+			goto found;
+
+		rem = msleep_interruptible(100);
+		if (rem)
+			break;
+	}
+	pr_warn("%s:%u: device %llu:%llu not found\n",
+		__func__, __LINE__, bus_id, dev_id);
+	return;
+
+found:
+	if (retries)
+		pr_debug("%s:%u: device %llu:%llu found after %u retries\n",
+			 __func__, __LINE__, bus_id, dev_id, retries);
+
+	ps3_setup_dynamic_device(&repo);
+	return;
+}
+
+#define PS3_NOTIFICATION_DEV_ID		ULONG_MAX
+#define PS3_NOTIFICATION_INTERRUPT_ID	0
+
+struct ps3_notification_device {
+	struct ps3_system_bus_device sbd;
+	spinlock_t lock;
+	u64 tag;
+	u64 lv1_status;
+	struct rcuwait wait;
+	bool done;
+};
+
+enum ps3_notify_type {
+	notify_device_ready = 0,
+	notify_region_probe = 1,
+	notify_region_update = 2,
+};
+
+struct ps3_notify_cmd {
+	u64 operation_code;		/* must be zero */
+	u64 event_mask;			/* OR of 1UL << enum ps3_notify_type */
+};
+
+struct ps3_notify_event {
+	u64 event_type;			/* enum ps3_notify_type */
+	u64 bus_id;
+	u64 dev_id;
+	u64 dev_type;
+	u64 dev_port;
+};
+
+static irqreturn_t ps3_notification_interrupt(int irq, void *data)
+{
+	struct ps3_notification_device *dev = data;
+	int res;
+	u64 tag, status;
+
+	spin_lock(&dev->lock);
+	res = lv1_storage_get_async_status(PS3_NOTIFICATION_DEV_ID, &tag,
+					   &status);
+	if (tag != dev->tag)
+		pr_err("%s:%u: tag mismatch, got %llx, expected %llx\n",
+		       __func__, __LINE__, tag, dev->tag);
+
+	if (res) {
+		pr_err("%s:%u: res %d status 0x%llx\n", __func__, __LINE__, res,
+		       status);
+	} else {
+		pr_debug("%s:%u: completed, status 0x%llx\n", __func__,
+			 __LINE__, status);
+		dev->lv1_status = status;
+		dev->done = true;
+		rcuwait_wake_up(&dev->wait);
+	}
+	spin_unlock(&dev->lock);
+	return IRQ_HANDLED;
+}
+
+static int ps3_notification_read_write(struct ps3_notification_device *dev,
+				       u64 lpar, int write)
+{
+	const char *op = write ? "write" : "read";
+	unsigned long flags;
+	int res;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	res = write ? lv1_storage_write(dev->sbd.dev_id, 0, 0, 1, 0, lpar,
+					&dev->tag)
+		    : lv1_storage_read(dev->sbd.dev_id, 0, 0, 1, 0, lpar,
+				       &dev->tag);
+	dev->done = false;
+	spin_unlock_irqrestore(&dev->lock, flags);
+	if (res) {
+		pr_err("%s:%u: %s failed %d\n", __func__, __LINE__, op, res);
+		return -EPERM;
+	}
+	pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op);
+
+	rcuwait_wait_event(&dev->wait, dev->done || kthread_should_stop(), TASK_IDLE);
+
+	if (kthread_should_stop())
+		res = -EINTR;
+
+	if (dev->lv1_status) {
+		pr_err("%s:%u: %s not completed, status 0x%llx\n", __func__,
+		       __LINE__, op, dev->lv1_status);
+		return -EIO;
+	}
+	pr_debug("%s:%u: notification %s completed\n", __func__, __LINE__, op);
+
+	return 0;
+}
+
+static struct task_struct *probe_task;
+
+/**
+ * ps3_probe_thread - Background repository probing at system startup.
+ *
+ * This implementation only supports background probing on a single bus.
+ * It uses the hypervisor's storage device notification mechanism to wait until
+ * a storage device is ready.  The device notification mechanism uses a
+ * pseudo device to asynchronously notify the guest when storage devices become
+ * ready.  The notification device has a block size of 512 bytes.
+ */
+
+static int ps3_probe_thread(void *data)
+{
+	struct ps3_notification_device dev;
+	int res;
+	unsigned int irq;
+	u64 lpar;
+	void *buf;
+	struct ps3_notify_cmd *notify_cmd;
+	struct ps3_notify_event *notify_event;
+
+	pr_debug(" -> %s:%u: kthread started\n", __func__, __LINE__);
+
+	buf = kzalloc(512, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	lpar = ps3_mm_phys_to_lpar(__pa(buf));
+	notify_cmd = buf;
+	notify_event = buf;
+
+	/* dummy system bus device */
+	dev.sbd.bus_id = (u64)data;
+	dev.sbd.dev_id = PS3_NOTIFICATION_DEV_ID;
+	dev.sbd.interrupt_id = PS3_NOTIFICATION_INTERRUPT_ID;
+
+	res = lv1_open_device(dev.sbd.bus_id, dev.sbd.dev_id, 0);
+	if (res) {
+		pr_err("%s:%u: lv1_open_device failed %s\n", __func__,
+		       __LINE__, ps3_result(res));
+		goto fail_free;
+	}
+
+	res = ps3_sb_event_receive_port_setup(&dev.sbd, PS3_BINDING_CPU_ANY,
+					      &irq);
+	if (res) {
+		pr_err("%s:%u: ps3_sb_event_receive_port_setup failed %d\n",
+		       __func__, __LINE__, res);
+	       goto fail_close_device;
+	}
+
+	spin_lock_init(&dev.lock);
+	rcuwait_init(&dev.wait);
+
+	res = request_irq(irq, ps3_notification_interrupt, 0,
+			  "ps3_notification", &dev);
+	if (res) {
+		pr_err("%s:%u: request_irq failed %d\n", __func__, __LINE__,
+		       res);
+		goto fail_sb_event_receive_port_destroy;
+	}
+
+	/* Setup and write the request for device notification. */
+	notify_cmd->operation_code = 0; /* must be zero */
+	notify_cmd->event_mask = 1UL << notify_region_probe;
+
+	res = ps3_notification_read_write(&dev, lpar, 1);
+	if (res)
+		goto fail_free_irq;
+
+	/* Loop here processing the requested notification events. */
+	do {
+		try_to_freeze();
+
+		memset(notify_event, 0, sizeof(*notify_event));
+
+		res = ps3_notification_read_write(&dev, lpar, 0);
+		if (res)
+			break;
+
+		pr_debug("%s:%u: notify event type 0x%llx bus id %llu dev id %llu"
+			 " type %llu port %llu\n", __func__, __LINE__,
+			 notify_event->event_type, notify_event->bus_id,
+			 notify_event->dev_id, notify_event->dev_type,
+			 notify_event->dev_port);
+
+		if (notify_event->event_type != notify_region_probe ||
+		    notify_event->bus_id != dev.sbd.bus_id) {
+			pr_warn("%s:%u: bad notify_event: event %llu, dev_id %llu, dev_type %llu\n",
+				__func__, __LINE__, notify_event->event_type,
+				notify_event->dev_id, notify_event->dev_type);
+			continue;
+		}
+
+		ps3_find_and_add_device(dev.sbd.bus_id, notify_event->dev_id);
+
+	} while (!kthread_should_stop());
+
+fail_free_irq:
+	free_irq(irq, &dev);
+fail_sb_event_receive_port_destroy:
+	ps3_sb_event_receive_port_destroy(&dev.sbd, irq);
+fail_close_device:
+	lv1_close_device(dev.sbd.bus_id, dev.sbd.dev_id);
+fail_free:
+	kfree(buf);
+
+	probe_task = NULL;
+
+	pr_debug(" <- %s:%u: kthread finished\n", __func__, __LINE__);
+
+	return 0;
+}
+
+/**
+ * ps3_stop_probe_thread - Stops the background probe thread.
+ *
+ */
+
+static int ps3_stop_probe_thread(struct notifier_block *nb, unsigned long code,
+				 void *data)
+{
+	if (probe_task)
+		kthread_stop(probe_task);
+	return 0;
+}
+
+static struct notifier_block nb = {
+	.notifier_call = ps3_stop_probe_thread
+};
+
+/**
+ * ps3_start_probe_thread - Starts the background probe thread.
+ *
+ */
+
+static int __init ps3_start_probe_thread(enum ps3_bus_type bus_type)
+{
+	int result;
+	struct task_struct *task;
+	struct ps3_repository_device repo;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	memset(&repo, 0, sizeof(repo));
+
+	repo.bus_type = bus_type;
+
+	result = ps3_repository_find_bus(repo.bus_type, 0, &repo.bus_index);
+
+	if (result) {
+		printk(KERN_ERR "%s: Cannot find bus (%d)\n", __func__, result);
+		return -ENODEV;
+	}
+
+	result = ps3_repository_read_bus_id(repo.bus_index, &repo.bus_id);
+
+	if (result) {
+		printk(KERN_ERR "%s: read_bus_id failed %d\n", __func__,
+			result);
+		return -ENODEV;
+	}
+
+	task = kthread_run(ps3_probe_thread, (void *)repo.bus_id,
+			   "ps3-probe-%u", bus_type);
+
+	if (IS_ERR(task)) {
+		result = PTR_ERR(task);
+		printk(KERN_ERR "%s: kthread_run failed %d\n", __func__,
+		       result);
+		return result;
+	}
+
+	probe_task = task;
+	register_reboot_notifier(&nb);
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return 0;
+}
+
+/**
+ * ps3_register_devices - Probe the system and register devices found.
+ *
+ * A device_initcall() routine.
+ */
+
+static int __init ps3_register_devices(void)
+{
+	int result;
+
+	if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
+		return -ENODEV;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	/* ps3_repository_dump_bus_info(); */
+
+	result = ps3_start_probe_thread(PS3_BUS_TYPE_STORAGE);
+
+	ps3_register_vuart_devices();
+
+	ps3_register_graphics_devices();
+
+	ps3_repository_find_devices(PS3_BUS_TYPE_SB, ps3_setup_static_device);
+
+	ps3_register_sound_devices();
+
+	ps3_register_lpm_devices();
+
+	ps3_register_ramdisk_device();
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return 0;
+}
+
+device_initcall(ps3_register_devices);
diff --git a/arch/powerpc/platforms/ps3/exports.c b/arch/powerpc/platforms/ps3/exports.c
new file mode 100644
index 0000000000..1ac31abcf9
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/exports.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  PS3 hvcall exports for modules.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ */
+
+#define LV1_CALL(name, in, out, num)                          \
+  extern s64 _lv1_##name(LV1_##in##_IN_##out##_OUT_ARG_DECL); \
+  EXPORT_SYMBOL(_lv1_##name);
+
+#include <asm/lv1call.h>
diff --git a/arch/powerpc/platforms/ps3/gelic_udbg.c b/arch/powerpc/platforms/ps3/gelic_udbg.c
new file mode 100644
index 0000000000..6b298010fd
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/gelic_udbg.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * udbg debug output routine via GELIC UDP broadcasts
+ *
+ * Copyright (C) 2007 Sony Computer Entertainment Inc.
+ * Copyright 2006, 2007 Sony Corporation
+ * Copyright (C) 2010 Hector Martin <hector@marcansoft.com>
+ * Copyright (C) 2011 Andre Heider <a.heider@gmail.com>
+ */
+
+#include <linux/if_ether.h>
+#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+
+#include <asm/io.h>
+#include <asm/udbg.h>
+#include <asm/lv1call.h>
+
+#define GELIC_BUS_ID 1
+#define GELIC_DEVICE_ID 0
+#define GELIC_DEBUG_PORT 18194
+#define GELIC_MAX_MESSAGE_SIZE 1000
+
+#define GELIC_LV1_GET_MAC_ADDRESS 1
+#define GELIC_LV1_GET_VLAN_ID 4
+#define GELIC_LV1_VLAN_TX_ETHERNET_0 2
+
+#define GELIC_DESCR_DMA_STAT_MASK 0xf0000000
+#define GELIC_DESCR_DMA_CARDOWNED 0xa0000000
+
+#define GELIC_DESCR_TX_DMA_IKE 0x00080000
+#define GELIC_DESCR_TX_DMA_NO_CHKSUM 0x00000000
+#define GELIC_DESCR_TX_DMA_FRAME_TAIL 0x00040000
+
+#define GELIC_DESCR_DMA_CMD_NO_CHKSUM (GELIC_DESCR_DMA_CARDOWNED | \
+				       GELIC_DESCR_TX_DMA_IKE | \
+				       GELIC_DESCR_TX_DMA_NO_CHKSUM)
+
+static u64 bus_addr;
+
+struct gelic_descr {
+	/* as defined by the hardware */
+	__be32 buf_addr;
+	__be32 buf_size;
+	__be32 next_descr_addr;
+	__be32 dmac_cmd_status;
+	__be32 result_size;
+	__be32 valid_size;	/* all zeroes for tx */
+	__be32 data_status;
+	__be32 data_error;	/* all zeroes for tx */
+} __attribute__((aligned(32)));
+
+struct debug_block {
+	struct gelic_descr descr;
+	u8 pkt[1520];
+} __packed;
+
+static __iomem struct ethhdr *h_eth;
+static __iomem struct vlan_hdr *h_vlan;
+static __iomem struct iphdr *h_ip;
+static __iomem struct udphdr *h_udp;
+
+static __iomem char *pmsg;
+static __iomem char *pmsgc;
+
+static __iomem struct debug_block dbg __attribute__((aligned(32)));
+
+static int header_size;
+
+static void map_dma_mem(int bus_id, int dev_id, void *start, size_t len,
+			u64 *real_bus_addr)
+{
+	s64 result;
+	u64 real_addr = ((u64)start) & 0x0fffffffffffffffUL;
+	u64 real_end = real_addr + len;
+	u64 map_start = real_addr & ~0xfff;
+	u64 map_end = (real_end + 0xfff) & ~0xfff;
+	u64 bus_addr = 0;
+
+	u64 flags = 0xf800000000000000UL;
+
+	result = lv1_allocate_device_dma_region(bus_id, dev_id,
+						map_end - map_start, 12, 0,
+						&bus_addr);
+	if (result)
+		lv1_panic(0);
+
+	result = lv1_map_device_dma_region(bus_id, dev_id, map_start,
+					   bus_addr, map_end - map_start,
+					   flags);
+	if (result)
+		lv1_panic(0);
+
+	*real_bus_addr = bus_addr + real_addr - map_start;
+}
+
+static int unmap_dma_mem(int bus_id, int dev_id, u64 bus_addr, size_t len)
+{
+	s64 result;
+	u64 real_bus_addr;
+
+	real_bus_addr = bus_addr & ~0xfff;
+	len += bus_addr - real_bus_addr;
+	len = (len + 0xfff) & ~0xfff;
+
+	result = lv1_unmap_device_dma_region(bus_id, dev_id, real_bus_addr,
+					     len);
+	if (result)
+		return result;
+
+	return lv1_free_device_dma_region(bus_id, dev_id, real_bus_addr);
+}
+
+static void __init gelic_debug_init(void)
+{
+	s64 result;
+	u64 v2;
+	u64 mac;
+	u64 vlan_id;
+
+	result = lv1_open_device(GELIC_BUS_ID, GELIC_DEVICE_ID, 0);
+	if (result)
+		lv1_panic(0);
+
+	map_dma_mem(GELIC_BUS_ID, GELIC_DEVICE_ID, &dbg, sizeof(dbg),
+		    &bus_addr);
+
+	memset(&dbg, 0, sizeof(dbg));
+
+	dbg.descr.buf_addr = bus_addr + offsetof(struct debug_block, pkt);
+
+	wmb();
+
+	result = lv1_net_control(GELIC_BUS_ID, GELIC_DEVICE_ID,
+				 GELIC_LV1_GET_MAC_ADDRESS, 0, 0, 0,
+				 &mac, &v2);
+	if (result)
+		lv1_panic(0);
+
+	mac <<= 16;
+
+	h_eth = (struct ethhdr *)dbg.pkt;
+
+	eth_broadcast_addr(h_eth->h_dest);
+	memcpy(&h_eth->h_source, &mac, ETH_ALEN);
+
+	header_size = sizeof(struct ethhdr);
+
+	result = lv1_net_control(GELIC_BUS_ID, GELIC_DEVICE_ID,
+				 GELIC_LV1_GET_VLAN_ID,
+				 GELIC_LV1_VLAN_TX_ETHERNET_0, 0, 0,
+				 &vlan_id, &v2);
+	if (!result) {
+		h_eth->h_proto= ETH_P_8021Q;
+
+		header_size += sizeof(struct vlan_hdr);
+		h_vlan = (struct vlan_hdr *)(h_eth + 1);
+		h_vlan->h_vlan_TCI = vlan_id;
+		h_vlan->h_vlan_encapsulated_proto = ETH_P_IP;
+		h_ip = (struct iphdr *)(h_vlan + 1);
+	} else {
+		h_eth->h_proto= 0x0800;
+		h_ip = (struct iphdr *)(h_eth + 1);
+	}
+
+	header_size += sizeof(struct iphdr);
+	h_ip->version = 4;
+	h_ip->ihl = 5;
+	h_ip->ttl = 10;
+	h_ip->protocol = 0x11;
+	h_ip->saddr = 0x00000000;
+	h_ip->daddr = 0xffffffff;
+
+	header_size += sizeof(struct udphdr);
+	h_udp = (struct udphdr *)(h_ip + 1);
+	h_udp->source = GELIC_DEBUG_PORT;
+	h_udp->dest = GELIC_DEBUG_PORT;
+
+	pmsgc = pmsg = (char *)(h_udp + 1);
+}
+
+static void gelic_debug_shutdown(void)
+{
+	if (bus_addr)
+		unmap_dma_mem(GELIC_BUS_ID, GELIC_DEVICE_ID,
+			      bus_addr, sizeof(dbg));
+	lv1_close_device(GELIC_BUS_ID, GELIC_DEVICE_ID);
+}
+
+static void gelic_sendbuf(int msgsize)
+{
+	u16 *p;
+	u32 sum;
+	int i;
+
+	dbg.descr.buf_size = header_size + msgsize;
+	h_ip->tot_len = msgsize + sizeof(struct udphdr) +
+			     sizeof(struct iphdr);
+	h_udp->len = msgsize + sizeof(struct udphdr);
+
+	h_ip->check = 0;
+	sum = 0;
+	p = (u16 *)h_ip;
+	for (i = 0; i < 5; i++)
+		sum += *p++;
+	h_ip->check = ~(sum + (sum >> 16));
+
+	dbg.descr.dmac_cmd_status = GELIC_DESCR_DMA_CMD_NO_CHKSUM |
+				    GELIC_DESCR_TX_DMA_FRAME_TAIL;
+	dbg.descr.result_size = 0;
+	dbg.descr.data_status = 0;
+
+	wmb();
+
+	lv1_net_start_tx_dma(GELIC_BUS_ID, GELIC_DEVICE_ID, bus_addr, 0);
+
+	while ((dbg.descr.dmac_cmd_status & GELIC_DESCR_DMA_STAT_MASK) ==
+	       GELIC_DESCR_DMA_CARDOWNED)
+		cpu_relax();
+}
+
+static void ps3gelic_udbg_putc(char ch)
+{
+	*pmsgc++ = ch;
+	if (ch == '\n' || (pmsgc-pmsg) >= GELIC_MAX_MESSAGE_SIZE) {
+		gelic_sendbuf(pmsgc-pmsg);
+		pmsgc = pmsg;
+	}
+}
+
+void __init udbg_init_ps3gelic(void)
+{
+	gelic_debug_init();
+	udbg_putc = ps3gelic_udbg_putc;
+}
+
+void udbg_shutdown_ps3gelic(void)
+{
+	udbg_putc = NULL;
+	gelic_debug_shutdown();
+}
+EXPORT_SYMBOL(udbg_shutdown_ps3gelic);
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
new file mode 100644
index 0000000000..9de62bd526
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  PS3 pagetable management routines.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006, 2007 Sony Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/lv1call.h>
+#include <asm/ps3fb.h>
+
+#define PS3_VERBOSE_RESULT
+#include "platform.h"
+
+/**
+ * enum lpar_vas_id - id of LPAR virtual address space.
+ * @lpar_vas_id_current: Current selected virtual address space
+ *
+ * Identify the target LPAR address space.
+ */
+
+enum ps3_lpar_vas_id {
+	PS3_LPAR_VAS_ID_CURRENT = 0,
+};
+
+
+static DEFINE_SPINLOCK(ps3_htab_lock);
+
+static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn,
+	unsigned long pa, unsigned long rflags, unsigned long vflags,
+	int psize, int apsize, int ssize)
+{
+	int result;
+	u64 hpte_v, hpte_r;
+	u64 inserted_index;
+	u64 evicted_v, evicted_r;
+	u64 hpte_v_array[4], hpte_rs;
+	unsigned long flags;
+	long ret = -1;
+
+	/*
+	 * lv1_insert_htab_entry() will search for victim
+	 * entry in both primary and secondary pte group
+	 */
+	vflags &= ~HPTE_V_SECONDARY;
+
+	hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
+	hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags;
+
+	spin_lock_irqsave(&ps3_htab_lock, flags);
+
+	/* talk hvc to replace entries BOLTED == 0 */
+	result = lv1_insert_htab_entry(PS3_LPAR_VAS_ID_CURRENT, hpte_group,
+				       hpte_v, hpte_r,
+				       HPTE_V_BOLTED, 0,
+				       &inserted_index,
+				       &evicted_v, &evicted_r);
+
+	if (result) {
+		/* all entries bolted !*/
+		pr_info("%s:result=%s vpn=%lx pa=%lx ix=%lx v=%llx r=%llx\n",
+			__func__, ps3_result(result), vpn, pa, hpte_group,
+			hpte_v, hpte_r);
+		BUG();
+	}
+
+	/*
+	 * see if the entry is inserted into secondary pteg
+	 */
+	result = lv1_read_htab_entries(PS3_LPAR_VAS_ID_CURRENT,
+				       inserted_index & ~0x3UL,
+				       &hpte_v_array[0], &hpte_v_array[1],
+				       &hpte_v_array[2], &hpte_v_array[3],
+				       &hpte_rs);
+	BUG_ON(result);
+
+	if (hpte_v_array[inserted_index % 4] & HPTE_V_SECONDARY)
+		ret = (inserted_index & 7) | (1 << 3);
+	else
+		ret = inserted_index & 7;
+
+	spin_unlock_irqrestore(&ps3_htab_lock, flags);
+
+	return ret;
+}
+
+static long ps3_hpte_remove(unsigned long hpte_group)
+{
+	panic("ps3_hpte_remove() not implemented");
+	return 0;
+}
+
+static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp,
+			      unsigned long vpn, int psize, int apsize,
+			      int ssize, unsigned long inv_flags)
+{
+	int result;
+	u64 hpte_v, want_v, hpte_rs;
+	u64 hpte_v_array[4];
+	unsigned long flags;
+	long ret;
+
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+	spin_lock_irqsave(&ps3_htab_lock, flags);
+
+	result = lv1_read_htab_entries(PS3_LPAR_VAS_ID_CURRENT, slot & ~0x3UL,
+				       &hpte_v_array[0], &hpte_v_array[1],
+				       &hpte_v_array[2], &hpte_v_array[3],
+				       &hpte_rs);
+
+	if (result) {
+		pr_info("%s: result=%s read vpn=%lx slot=%lx psize=%d\n",
+			__func__, ps3_result(result), vpn, slot, psize);
+		BUG();
+	}
+
+	hpte_v = hpte_v_array[slot % 4];
+
+	/*
+	 * As lv1_read_htab_entries() does not give us the RPN, we can
+	 * not synthesize the new hpte_r value here, and therefore can
+	 * not update the hpte with lv1_insert_htab_entry(), so we
+	 * instead invalidate it and ask the caller to update it via
+	 * ps3_hpte_insert() by returning a -1 value.
+	 */
+	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
+		/* not found */
+		ret = -1;
+	} else {
+		/* entry found, just invalidate it */
+		result = lv1_write_htab_entry(PS3_LPAR_VAS_ID_CURRENT,
+					      slot, 0, 0);
+		ret = -1;
+	}
+
+	spin_unlock_irqrestore(&ps3_htab_lock, flags);
+	return ret;
+}
+
+static void ps3_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
+	int psize, int ssize)
+{
+	pr_info("ps3_hpte_updateboltedpp() not implemented");
+}
+
+static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn,
+				int psize, int apsize, int ssize, int local)
+{
+	unsigned long flags;
+	int result;
+
+	spin_lock_irqsave(&ps3_htab_lock, flags);
+
+	result = lv1_write_htab_entry(PS3_LPAR_VAS_ID_CURRENT, slot, 0, 0);
+
+	if (result) {
+		pr_info("%s: result=%s vpn=%lx slot=%lx psize=%d\n",
+			__func__, ps3_result(result), vpn, slot, psize);
+		BUG();
+	}
+
+	spin_unlock_irqrestore(&ps3_htab_lock, flags);
+}
+
+/* Called during kexec sequence with MMU off */
+static notrace void ps3_hpte_clear(void)
+{
+	unsigned long hpte_count = (1UL << ppc64_pft_size) >> 4;
+	u64 i;
+
+	for (i = 0; i < hpte_count; i++)
+		lv1_write_htab_entry(PS3_LPAR_VAS_ID_CURRENT, i, 0, 0);
+
+	ps3_mm_shutdown();
+	ps3_mm_vas_destroy();
+}
+
+void __init ps3_hpte_init(unsigned long htab_size)
+{
+	mmu_hash_ops.hpte_invalidate = ps3_hpte_invalidate;
+	mmu_hash_ops.hpte_updatepp = ps3_hpte_updatepp;
+	mmu_hash_ops.hpte_updateboltedpp = ps3_hpte_updateboltedpp;
+	mmu_hash_ops.hpte_insert = ps3_hpte_insert;
+	mmu_hash_ops.hpte_remove = ps3_hpte_remove;
+	mmu_hash_ops.hpte_clear_all = ps3_hpte_clear;
+
+	ppc64_pft_size = __ilog2(htab_size);
+}
+
diff --git a/arch/powerpc/platforms/ps3/hvcall.S b/arch/powerpc/platforms/ps3/hvcall.S
new file mode 100644
index 0000000000..509e30ad01
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/hvcall.S
@@ -0,0 +1,792 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  PS3 hvcall interface.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *  Copyright 2003, 2004 (c) MontaVista Software, Inc.
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+#define lv1call .long 0x44000022; extsw r3, r3
+
+#define LV1_N_IN_0_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_0_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_1_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_2_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_3_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_4_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_5_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_6_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_7_IN_0_OUT LV1_N_IN_0_OUT
+
+#define LV1_0_IN_1_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	stdu    r3, -8(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 8;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_0_IN_2_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r3, -8(r1);			\
+	stdu	r4, -16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 16;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_0_IN_3_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r3, -8(r1);			\
+	std	r4, -16(r1);			\
+	stdu	r5, -24(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 24;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_0_IN_7_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r3, -8(r1);			\
+	std	r4, -16(r1);			\
+	std	r5, -24(r1);			\
+	std	r6, -32(r1);			\
+	std	r7, -40(r1);			\
+	std	r8, -48(r1);			\
+	stdu	r9, -56(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 56;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+	ld	r11, -32(r1);			\
+	std	r7, 0(r11);			\
+	ld	r11, -40(r1);			\
+	std	r8, 0(r11);			\
+	ld	r11, -48(r1);			\
+	std	r9, 0(r11);			\
+	ld	r11, -56(r1);			\
+	std	r10, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_1_IN_1_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	stdu    r4, -8(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 8;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_1_IN_2_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r4, -8(r1);			\
+	stdu	r5, -16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 16;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_1_IN_3_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r4, -8(r1);			\
+	std	r5, -16(r1);			\
+	stdu	r6, -24(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 24;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_1_IN_4_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r4, -8(r1);			\
+	std	r5, -16(r1);			\
+	std	r6, -24(r1);			\
+	stdu	r7, -32(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 32;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+	ld	r11, -32(r1);			\
+	std	r7, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_1_IN_5_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r4, -8(r1);			\
+	std	r5, -16(r1);			\
+	std	r6, -24(r1);			\
+	std	r7, -32(r1);			\
+	stdu	r8, -40(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 40;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+	ld	r11, -32(r1);			\
+	std	r7, 0(r11);			\
+	ld	r11, -40(r1);			\
+	std	r8, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_1_IN_6_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r4, -8(r1);			\
+	std	r5, -16(r1);			\
+	std	r6, -24(r1);			\
+	std	r7, -32(r1);			\
+	std	r8, -40(r1);			\
+	stdu	r9, -48(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 48;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+	ld	r11, -32(r1);			\
+	std	r7, 0(r11);			\
+	ld	r11, -40(r1);			\
+	std	r8, 0(r11);			\
+	ld	r11, -48(r1);			\
+	std	r9, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_1_IN_7_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r4, -8(r1);			\
+	std	r5, -16(r1);			\
+	std	r6, -24(r1);			\
+	std	r7, -32(r1);			\
+	std	r8, -40(r1);			\
+	std	r9, -48(r1);			\
+	stdu	r10, -56(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 56;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+	ld	r11, -32(r1);			\
+	std	r7, 0(r11);			\
+	ld	r11, -40(r1);			\
+	std	r8, 0(r11);			\
+	ld	r11, -48(r1);			\
+	std	r9, 0(r11);			\
+	ld	r11, -56(r1);			\
+	std	r10, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_2_IN_1_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	stdu	r5, -8(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 8;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_2_IN_2_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r5, -8(r1);			\
+	stdu	r6, -16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 16;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_2_IN_3_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r5, -8(r1);			\
+	std	r6, -16(r1);			\
+	stdu	r7, -24(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 24;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_2_IN_4_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r5, -8(r1);			\
+	std	r6, -16(r1);			\
+	std	r7, -24(r1);			\
+	stdu	r8, -32(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 32;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+	ld	r11, -32(r1);			\
+	std	r7, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_2_IN_5_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r5, -8(r1);			\
+	std	r6, -16(r1);			\
+	std	r7, -24(r1);			\
+	std	r8, -32(r1);			\
+	stdu	r9, -40(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 40;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+	ld	r11, -32(r1);			\
+	std	r7, 0(r11);			\
+	ld	r11, -40(r1);			\
+	std	r8, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_3_IN_1_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	stdu	r6, -8(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 8;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_3_IN_2_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r6, -8(r1);			\
+	stdu	r7, -16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 16;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_3_IN_3_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r6, -8(r1);			\
+	std	r7, -16(r1);			\
+	stdu	r8, -24(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 24;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_4_IN_1_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	stdu    r7, -8(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 8;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_4_IN_2_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r7, -8(r1);			\
+	stdu	r8, -16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 16;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_4_IN_3_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r7, -8(r1);			\
+	std	r8, -16(r1);			\
+	stdu	r9, -24(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 24;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_5_IN_1_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	stdu    r8, -8(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 8;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_5_IN_2_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r8, -8(r1);			\
+	stdu	r9, -16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 16;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_5_IN_3_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r8, -8(r1);			\
+	std	r9, -16(r1);			\
+	stdu	r10, -24(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 24;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_6_IN_1_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	stdu    r9, -8(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 8;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_6_IN_2_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r9, -8(r1);			\
+	stdu    r10, -16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 16;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_6_IN_3_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r9, -8(r1);			\
+	stdu    r10, -16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 16;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, 48+8*8(r1);		\
+	std	r6, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_7_IN_1_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	stdu    r10, -8(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 8;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_7_IN_6_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std	r10, 48+8*7(r1);		\
+						\
+	li	r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	ld	r11, 48+8*7(r1);		\
+	std	r4, 0(r11);			\
+	ld	r11, 48+8*8(r1);		\
+	std	r5, 0(r11);			\
+	ld	r11, 48+8*9(r1);		\
+	std	r6, 0(r11);			\
+	ld	r11, 48+8*10(r1);		\
+	std	r7, 0(r11);			\
+	ld	r11, 48+8*11(r1);		\
+	std	r8, 0(r11);			\
+	ld	r11, 48+8*12(r1);		\
+	std	r9, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_8_IN_1_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	ld	r11, 48+8*8(r1);		\
+	std	r4, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+	.text
+
+/* the lv1 underscored call definitions expand here */
+
+#define LV1_CALL(name, in, out, num) LV1_##in##_IN_##out##_OUT(lv1_##name, num)
+#include <asm/lv1call.h>
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
new file mode 100644
index 0000000000..49871427f5
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -0,0 +1,783 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  PS3 interrupt routines.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/lv1call.h>
+#include <asm/smp.h>
+
+#include "platform.h"
+
+#if defined(DEBUG)
+#define DBG udbg_printf
+#define FAIL udbg_printf
+#else
+#define DBG pr_devel
+#define FAIL pr_debug
+#endif
+
+/**
+ * struct ps3_bmp - a per cpu irq status and mask bitmap structure
+ * @status: 256 bit status bitmap indexed by plug
+ * @unused_1: Alignment
+ * @mask: 256 bit mask bitmap indexed by plug
+ * @unused_2: Alignment
+ *
+ * The HV maintains per SMT thread mappings of HV outlet to HV plug on
+ * behalf of the guest.  These mappings are implemented as 256 bit guest
+ * supplied bitmaps indexed by plug number.  The addresses of the bitmaps
+ * are registered with the HV through lv1_configure_irq_state_bitmap().
+ * The HV requires that the 512 bits of status + mask not cross a page
+ * boundary.  PS3_BMP_MINALIGN is used to define this minimal 64 byte
+ * alignment.
+ *
+ * The HV supports 256 plugs per thread, assigned as {0..255}, for a total
+ * of 512 plugs supported on a processor.  To simplify the logic this
+ * implementation equates HV plug value to Linux virq value, constrains each
+ * interrupt to have a system wide unique plug number, and limits the range
+ * of the plug values to map into the first dword of the bitmaps.  This
+ * gives a usable range of plug values of  {NR_IRQS_LEGACY..63}.  Note
+ * that there is no constraint on how many in this set an individual thread
+ * can acquire.
+ *
+ * The mask is declared as unsigned long so we can use set/clear_bit on it.
+ */
+
+#define PS3_BMP_MINALIGN 64
+
+struct ps3_bmp {
+	struct {
+		u64 status;
+		u64 unused_1[3];
+		unsigned long mask;
+		u64 unused_2[3];
+	};
+};
+
+/**
+ * struct ps3_private - a per cpu data structure
+ * @bmp: ps3_bmp structure
+ * @bmp_lock: Synchronize access to bmp.
+ * @ipi_debug_brk_mask: Mask for debug break IPIs
+ * @ppe_id: HV logical_ppe_id
+ * @thread_id: HV thread_id
+ * @ipi_mask: Mask of IPI virqs
+ */
+
+struct ps3_private {
+	struct ps3_bmp bmp __attribute__ ((aligned (PS3_BMP_MINALIGN)));
+	spinlock_t bmp_lock;
+	u64 ppe_id;
+	u64 thread_id;
+	unsigned long ipi_debug_brk_mask;
+	unsigned long ipi_mask;
+};
+
+static DEFINE_PER_CPU(struct ps3_private, ps3_private);
+
+/**
+ * ps3_chip_mask - Set an interrupt mask bit in ps3_bmp.
+ * @virq: The assigned Linux virq.
+ *
+ * Sets ps3_bmp.mask and calls lv1_did_update_interrupt_mask().
+ */
+
+static void ps3_chip_mask(struct irq_data *d)
+{
+	struct ps3_private *pd = irq_data_get_irq_chip_data(d);
+	unsigned long flags;
+
+	DBG("%s:%d: thread_id %llu, virq %d\n", __func__, __LINE__,
+		pd->thread_id, d->irq);
+
+	local_irq_save(flags);
+	clear_bit(63 - d->irq, &pd->bmp.mask);
+	lv1_did_update_interrupt_mask(pd->ppe_id, pd->thread_id);
+	local_irq_restore(flags);
+}
+
+/**
+ * ps3_chip_unmask - Clear an interrupt mask bit in ps3_bmp.
+ * @virq: The assigned Linux virq.
+ *
+ * Clears ps3_bmp.mask and calls lv1_did_update_interrupt_mask().
+ */
+
+static void ps3_chip_unmask(struct irq_data *d)
+{
+	struct ps3_private *pd = irq_data_get_irq_chip_data(d);
+	unsigned long flags;
+
+	DBG("%s:%d: thread_id %llu, virq %d\n", __func__, __LINE__,
+		pd->thread_id, d->irq);
+
+	local_irq_save(flags);
+	set_bit(63 - d->irq, &pd->bmp.mask);
+	lv1_did_update_interrupt_mask(pd->ppe_id, pd->thread_id);
+	local_irq_restore(flags);
+}
+
+/**
+ * ps3_chip_eoi - HV end-of-interrupt.
+ * @virq: The assigned Linux virq.
+ *
+ * Calls lv1_end_of_interrupt_ext().
+ */
+
+static void ps3_chip_eoi(struct irq_data *d)
+{
+	const struct ps3_private *pd = irq_data_get_irq_chip_data(d);
+
+	/* non-IPIs are EOIed here. */
+
+	if (!test_bit(63 - d->irq, &pd->ipi_mask))
+		lv1_end_of_interrupt_ext(pd->ppe_id, pd->thread_id, d->irq);
+}
+
+/**
+ * ps3_irq_chip - Represents the ps3_bmp as a Linux struct irq_chip.
+ */
+
+static struct irq_chip ps3_irq_chip = {
+	.name = "ps3",
+	.irq_mask = ps3_chip_mask,
+	.irq_unmask = ps3_chip_unmask,
+	.irq_eoi = ps3_chip_eoi,
+};
+
+/**
+ * ps3_virq_setup - virq related setup.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @outlet: The HV outlet from the various create outlet routines.
+ * @virq: The assigned Linux virq.
+ *
+ * Calls irq_create_mapping() to get a virq and sets the chip data to
+ * ps3_private data.
+ */
+
+static int ps3_virq_setup(enum ps3_cpu_binding cpu, unsigned long outlet,
+			  unsigned int *virq)
+{
+	int result;
+	struct ps3_private *pd;
+
+	/* This defines the default interrupt distribution policy. */
+
+	if (cpu == PS3_BINDING_CPU_ANY)
+		cpu = 0;
+
+	pd = &per_cpu(ps3_private, cpu);
+
+	*virq = irq_create_mapping(NULL, outlet);
+
+	if (!*virq) {
+		FAIL("%s:%d: irq_create_mapping failed: outlet %lu\n",
+			__func__, __LINE__, outlet);
+		result = -ENOMEM;
+		goto fail_create;
+	}
+
+	DBG("%s:%d: outlet %lu => cpu %u, virq %u\n", __func__, __LINE__,
+		outlet, cpu, *virq);
+
+	result = irq_set_chip_data(*virq, pd);
+
+	if (result) {
+		FAIL("%s:%d: irq_set_chip_data failed\n",
+			__func__, __LINE__);
+		goto fail_set;
+	}
+
+	ps3_chip_mask(irq_get_irq_data(*virq));
+
+	return result;
+
+fail_set:
+	irq_dispose_mapping(*virq);
+fail_create:
+	return result;
+}
+
+/**
+ * ps3_virq_destroy - virq related teardown.
+ * @virq: The assigned Linux virq.
+ *
+ * Clears chip data and calls irq_dispose_mapping() for the virq.
+ */
+
+static int ps3_virq_destroy(unsigned int virq)
+{
+	const struct ps3_private *pd = irq_get_chip_data(virq);
+
+	DBG("%s:%d: ppe_id %llu, thread_id %llu, virq %u\n", __func__,
+		__LINE__, pd->ppe_id, pd->thread_id, virq);
+
+	irq_set_chip_data(virq, NULL);
+	irq_dispose_mapping(virq);
+
+	DBG("%s:%d <-\n", __func__, __LINE__);
+	return 0;
+}
+
+/**
+ * ps3_irq_plug_setup - Generic outlet and virq related setup.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @outlet: The HV outlet from the various create outlet routines.
+ * @virq: The assigned Linux virq.
+ *
+ * Sets up virq and connects the irq plug.
+ */
+
+int ps3_irq_plug_setup(enum ps3_cpu_binding cpu, unsigned long outlet,
+	unsigned int *virq)
+{
+	int result;
+	struct ps3_private *pd;
+
+	result = ps3_virq_setup(cpu, outlet, virq);
+
+	if (result) {
+		FAIL("%s:%d: ps3_virq_setup failed\n", __func__, __LINE__);
+		goto fail_setup;
+	}
+
+	pd = irq_get_chip_data(*virq);
+
+	/* Binds outlet to cpu + virq. */
+
+	result = lv1_connect_irq_plug_ext(pd->ppe_id, pd->thread_id, *virq,
+		outlet, 0);
+
+	if (result) {
+		FAIL("%s:%d: lv1_connect_irq_plug_ext failed: %s\n",
+		__func__, __LINE__, ps3_result(result));
+		result = -EPERM;
+		goto fail_connect;
+	}
+
+	return result;
+
+fail_connect:
+	ps3_virq_destroy(*virq);
+fail_setup:
+	return result;
+}
+EXPORT_SYMBOL_GPL(ps3_irq_plug_setup);
+
+/**
+ * ps3_irq_plug_destroy - Generic outlet and virq related teardown.
+ * @virq: The assigned Linux virq.
+ *
+ * Disconnects the irq plug and tears down virq.
+ * Do not call for system bus event interrupts setup with
+ * ps3_sb_event_receive_port_setup().
+ */
+
+int ps3_irq_plug_destroy(unsigned int virq)
+{
+	int result;
+	const struct ps3_private *pd = irq_get_chip_data(virq);
+
+	DBG("%s:%d: ppe_id %llu, thread_id %llu, virq %u\n", __func__,
+		__LINE__, pd->ppe_id, pd->thread_id, virq);
+
+	ps3_chip_mask(irq_get_irq_data(virq));
+
+	result = lv1_disconnect_irq_plug_ext(pd->ppe_id, pd->thread_id, virq);
+
+	if (result)
+		FAIL("%s:%d: lv1_disconnect_irq_plug_ext failed: %s\n",
+		__func__, __LINE__, ps3_result(result));
+
+	ps3_virq_destroy(virq);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(ps3_irq_plug_destroy);
+
+/**
+ * ps3_event_receive_port_setup - Setup an event receive port.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @virq: The assigned Linux virq.
+ *
+ * The virq can be used with lv1_connect_interrupt_event_receive_port() to
+ * arrange to receive interrupts from system-bus devices, or with
+ * ps3_send_event_locally() to signal events.
+ */
+
+int ps3_event_receive_port_setup(enum ps3_cpu_binding cpu, unsigned int *virq)
+{
+	int result;
+	u64 outlet;
+
+	result = lv1_construct_event_receive_port(&outlet);
+
+	if (result) {
+		FAIL("%s:%d: lv1_construct_event_receive_port failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		*virq = 0;
+		return result;
+	}
+
+	result = ps3_irq_plug_setup(cpu, outlet, virq);
+	BUG_ON(result);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(ps3_event_receive_port_setup);
+
+/**
+ * ps3_event_receive_port_destroy - Destroy an event receive port.
+ * @virq: The assigned Linux virq.
+ *
+ * Since ps3_event_receive_port_destroy destroys the receive port outlet,
+ * SB devices need to call disconnect_interrupt_event_receive_port() before
+ * this.
+ */
+
+int ps3_event_receive_port_destroy(unsigned int virq)
+{
+	int result;
+
+	DBG(" -> %s:%d virq %u\n", __func__, __LINE__, virq);
+
+	ps3_chip_mask(irq_get_irq_data(virq));
+
+	result = lv1_destruct_event_receive_port(virq_to_hw(virq));
+
+	if (result)
+		FAIL("%s:%d: lv1_destruct_event_receive_port failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+
+	/*
+	 * Don't call ps3_virq_destroy() here since ps3_smp_cleanup_cpu()
+	 * calls from interrupt context (smp_call_function) when kexecing.
+	 */
+
+	DBG(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+
+int ps3_send_event_locally(unsigned int virq)
+{
+	return lv1_send_event_locally(virq_to_hw(virq));
+}
+
+/**
+ * ps3_sb_event_receive_port_setup - Setup a system bus event receive port.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @dev: The system bus device instance.
+ * @virq: The assigned Linux virq.
+ *
+ * An event irq represents a virtual device interrupt.  The interrupt_id
+ * coresponds to the software interrupt number.
+ */
+
+int ps3_sb_event_receive_port_setup(struct ps3_system_bus_device *dev,
+	enum ps3_cpu_binding cpu, unsigned int *virq)
+{
+	/* this should go in system-bus.c */
+
+	int result;
+
+	result = ps3_event_receive_port_setup(cpu, virq);
+
+	if (result)
+		return result;
+
+	result = lv1_connect_interrupt_event_receive_port(dev->bus_id,
+		dev->dev_id, virq_to_hw(*virq), dev->interrupt_id);
+
+	if (result) {
+		FAIL("%s:%d: lv1_connect_interrupt_event_receive_port"
+			" failed: %s\n", __func__, __LINE__,
+			ps3_result(result));
+		ps3_event_receive_port_destroy(*virq);
+		*virq = 0;
+		return result;
+	}
+
+	DBG("%s:%d: interrupt_id %u, virq %u\n", __func__, __LINE__,
+		dev->interrupt_id, *virq);
+
+	return 0;
+}
+EXPORT_SYMBOL(ps3_sb_event_receive_port_setup);
+
+int ps3_sb_event_receive_port_destroy(struct ps3_system_bus_device *dev,
+	unsigned int virq)
+{
+	/* this should go in system-bus.c */
+
+	int result;
+
+	DBG(" -> %s:%d: interrupt_id %u, virq %u\n", __func__, __LINE__,
+		dev->interrupt_id, virq);
+
+	result = lv1_disconnect_interrupt_event_receive_port(dev->bus_id,
+		dev->dev_id, virq_to_hw(virq), dev->interrupt_id);
+
+	if (result)
+		FAIL("%s:%d: lv1_disconnect_interrupt_event_receive_port"
+			" failed: %s\n", __func__, __LINE__,
+			ps3_result(result));
+
+	result = ps3_event_receive_port_destroy(virq);
+	BUG_ON(result);
+
+	/*
+	 * ps3_event_receive_port_destroy() destroys the IRQ plug,
+	 * so don't call ps3_irq_plug_destroy() here.
+	 */
+
+	result = ps3_virq_destroy(virq);
+	BUG_ON(result);
+
+	DBG(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+EXPORT_SYMBOL(ps3_sb_event_receive_port_destroy);
+
+/**
+ * ps3_io_irq_setup - Setup a system bus io irq.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @interrupt_id: The device interrupt id read from the system repository.
+ * @virq: The assigned Linux virq.
+ *
+ * An io irq represents a non-virtualized device interrupt.  interrupt_id
+ * coresponds to the interrupt number of the interrupt controller.
+ */
+
+int ps3_io_irq_setup(enum ps3_cpu_binding cpu, unsigned int interrupt_id,
+	unsigned int *virq)
+{
+	int result;
+	u64 outlet;
+
+	result = lv1_construct_io_irq_outlet(interrupt_id, &outlet);
+
+	if (result) {
+		FAIL("%s:%d: lv1_construct_io_irq_outlet failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		return result;
+	}
+
+	result = ps3_irq_plug_setup(cpu, outlet, virq);
+	BUG_ON(result);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(ps3_io_irq_setup);
+
+int ps3_io_irq_destroy(unsigned int virq)
+{
+	int result;
+	unsigned long outlet = virq_to_hw(virq);
+
+	ps3_chip_mask(irq_get_irq_data(virq));
+
+	/*
+	 * lv1_destruct_io_irq_outlet() will destroy the IRQ plug,
+	 * so call ps3_irq_plug_destroy() first.
+	 */
+
+	result = ps3_irq_plug_destroy(virq);
+	BUG_ON(result);
+
+	result = lv1_destruct_io_irq_outlet(outlet);
+
+	if (result)
+		FAIL("%s:%d: lv1_destruct_io_irq_outlet failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(ps3_io_irq_destroy);
+
+/**
+ * ps3_vuart_irq_setup - Setup the system virtual uart virq.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @virt_addr_bmp: The caller supplied virtual uart interrupt bitmap.
+ * @virq: The assigned Linux virq.
+ *
+ * The system supports only a single virtual uart, so multiple calls without
+ * freeing the interrupt will return a wrong state error.
+ */
+
+int ps3_vuart_irq_setup(enum ps3_cpu_binding cpu, void* virt_addr_bmp,
+	unsigned int *virq)
+{
+	int result;
+	u64 outlet;
+	u64 lpar_addr;
+
+	BUG_ON(!is_kernel_addr((u64)virt_addr_bmp));
+
+	lpar_addr = ps3_mm_phys_to_lpar(__pa(virt_addr_bmp));
+
+	result = lv1_configure_virtual_uart_irq(lpar_addr, &outlet);
+
+	if (result) {
+		FAIL("%s:%d: lv1_configure_virtual_uart_irq failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		return result;
+	}
+
+	result = ps3_irq_plug_setup(cpu, outlet, virq);
+	BUG_ON(result);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(ps3_vuart_irq_setup);
+
+int ps3_vuart_irq_destroy(unsigned int virq)
+{
+	int result;
+
+	ps3_chip_mask(irq_get_irq_data(virq));
+	result = lv1_deconfigure_virtual_uart_irq();
+
+	if (result) {
+		FAIL("%s:%d: lv1_configure_virtual_uart_irq failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		return result;
+	}
+
+	result = ps3_irq_plug_destroy(virq);
+	BUG_ON(result);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(ps3_vuart_irq_destroy);
+
+/**
+ * ps3_spe_irq_setup - Setup an spe virq.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @spe_id: The spe_id returned from lv1_construct_logical_spe().
+ * @class: The spe interrupt class {0,1,2}.
+ * @virq: The assigned Linux virq.
+ *
+ */
+
+int ps3_spe_irq_setup(enum ps3_cpu_binding cpu, unsigned long spe_id,
+	unsigned int class, unsigned int *virq)
+{
+	int result;
+	u64 outlet;
+
+	BUG_ON(class > 2);
+
+	result = lv1_get_spe_irq_outlet(spe_id, class, &outlet);
+
+	if (result) {
+		FAIL("%s:%d: lv1_get_spe_irq_outlet failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		return result;
+	}
+
+	result = ps3_irq_plug_setup(cpu, outlet, virq);
+	BUG_ON(result);
+
+	return result;
+}
+
+int ps3_spe_irq_destroy(unsigned int virq)
+{
+	int result;
+
+	ps3_chip_mask(irq_get_irq_data(virq));
+
+	result = ps3_irq_plug_destroy(virq);
+	BUG_ON(result);
+
+	return result;
+}
+
+
+#define PS3_INVALID_OUTLET ((irq_hw_number_t)-1)
+#define PS3_PLUG_MAX 63
+
+#if defined(DEBUG)
+static void _dump_64_bmp(const char *header, const u64 *p, unsigned cpu,
+	const char* func, int line)
+{
+	pr_debug("%s:%d: %s %u {%04llx_%04llx_%04llx_%04llx}\n",
+		func, line, header, cpu,
+		*p >> 48, (*p >> 32) & 0xffff, (*p >> 16) & 0xffff,
+		*p & 0xffff);
+}
+
+static void __maybe_unused _dump_256_bmp(const char *header,
+	const u64 *p, unsigned cpu, const char* func, int line)
+{
+	pr_debug("%s:%d: %s %u {%016llx:%016llx:%016llx:%016llx}\n",
+		func, line, header, cpu, p[0], p[1], p[2], p[3]);
+}
+
+#define dump_bmp(_x) _dump_bmp(_x, __func__, __LINE__)
+static void _dump_bmp(struct ps3_private* pd, const char* func, int line)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pd->bmp_lock, flags);
+	_dump_64_bmp("stat", &pd->bmp.status, pd->thread_id, func, line);
+	_dump_64_bmp("mask", (u64*)&pd->bmp.mask, pd->thread_id, func, line);
+	spin_unlock_irqrestore(&pd->bmp_lock, flags);
+}
+
+#define dump_mask(_x) _dump_mask(_x, __func__, __LINE__)
+static void __maybe_unused _dump_mask(struct ps3_private *pd,
+	const char* func, int line)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pd->bmp_lock, flags);
+	_dump_64_bmp("mask", (u64*)&pd->bmp.mask, pd->thread_id, func, line);
+	spin_unlock_irqrestore(&pd->bmp_lock, flags);
+}
+#else
+static void dump_bmp(struct ps3_private* pd) {};
+#endif /* defined(DEBUG) */
+
+static int ps3_host_map(struct irq_domain *h, unsigned int virq,
+	irq_hw_number_t hwirq)
+{
+	DBG("%s:%d: hwirq %lu, virq %u\n", __func__, __LINE__, hwirq,
+		virq);
+
+	irq_set_chip_and_handler(virq, &ps3_irq_chip, handle_fasteoi_irq);
+
+	return 0;
+}
+
+static int ps3_host_match(struct irq_domain *h, struct device_node *np,
+			  enum irq_domain_bus_token bus_token)
+{
+	/* Match all */
+	return 1;
+}
+
+static const struct irq_domain_ops ps3_host_ops = {
+	.map = ps3_host_map,
+	.match = ps3_host_match,
+};
+
+void __init ps3_register_ipi_debug_brk(unsigned int cpu, unsigned int virq)
+{
+	struct ps3_private *pd = &per_cpu(ps3_private, cpu);
+
+	set_bit(63 - virq, &pd->ipi_debug_brk_mask);
+
+	DBG("%s:%d: cpu %u, virq %u, mask %lxh\n", __func__, __LINE__,
+		cpu, virq, pd->ipi_debug_brk_mask);
+}
+
+void __init ps3_register_ipi_irq(unsigned int cpu, unsigned int virq)
+{
+	struct ps3_private *pd = &per_cpu(ps3_private, cpu);
+
+	set_bit(63 - virq, &pd->ipi_mask);
+
+	DBG("%s:%d: cpu %u, virq %u, ipi_mask %lxh\n", __func__, __LINE__,
+		cpu, virq, pd->ipi_mask);
+}
+
+static unsigned int ps3_get_irq(void)
+{
+	struct ps3_private *pd = this_cpu_ptr(&ps3_private);
+	u64 x = (pd->bmp.status & pd->bmp.mask);
+	unsigned int plug;
+
+	/* check for ipi break first to stop this cpu ASAP */
+
+	if (x & pd->ipi_debug_brk_mask)
+		x &= pd->ipi_debug_brk_mask;
+
+	asm volatile("cntlzd %0,%1" : "=r" (plug) : "r" (x));
+	plug &= 0x3f;
+
+	if (unlikely(!plug)) {
+		DBG("%s:%d: no plug found: thread_id %llu\n", __func__,
+			__LINE__, pd->thread_id);
+		dump_bmp(&per_cpu(ps3_private, 0));
+		dump_bmp(&per_cpu(ps3_private, 1));
+		return 0;
+	}
+
+#if defined(DEBUG)
+	if (unlikely(plug < NR_IRQS_LEGACY || plug > PS3_PLUG_MAX)) {
+		dump_bmp(&per_cpu(ps3_private, 0));
+		dump_bmp(&per_cpu(ps3_private, 1));
+		BUG();
+	}
+#endif
+
+	/* IPIs are EOIed here. */
+
+	if (test_bit(63 - plug, &pd->ipi_mask))
+		lv1_end_of_interrupt_ext(pd->ppe_id, pd->thread_id, plug);
+
+	return plug;
+}
+
+void __init ps3_init_IRQ(void)
+{
+	int result;
+	unsigned cpu;
+	struct irq_domain *host;
+
+	host = irq_domain_add_nomap(NULL, PS3_PLUG_MAX + 1, &ps3_host_ops, NULL);
+	irq_set_default_host(host);
+
+	for_each_possible_cpu(cpu) {
+		struct ps3_private *pd = &per_cpu(ps3_private, cpu);
+
+		lv1_get_logical_ppe_id(&pd->ppe_id);
+		pd->thread_id = get_hard_smp_processor_id(cpu);
+		spin_lock_init(&pd->bmp_lock);
+
+		DBG("%s:%d: ppe_id %llu, thread_id %llu, bmp %lxh\n",
+			__func__, __LINE__, pd->ppe_id, pd->thread_id,
+			ps3_mm_phys_to_lpar(__pa(&pd->bmp)));
+
+		result = lv1_configure_irq_state_bitmap(pd->ppe_id,
+			pd->thread_id, ps3_mm_phys_to_lpar(__pa(&pd->bmp)));
+
+		if (result)
+			FAIL("%s:%d: lv1_configure_irq_state_bitmap failed:"
+				" %s\n", __func__, __LINE__,
+				ps3_result(result));
+	}
+
+	ppc_md.get_irq = ps3_get_irq;
+}
+
+void ps3_shutdown_IRQ(int cpu)
+{
+	int result;
+	u64 ppe_id;
+	u64 thread_id = get_hard_smp_processor_id(cpu);
+
+	lv1_get_logical_ppe_id(&ppe_id);
+	result = lv1_configure_irq_state_bitmap(ppe_id, thread_id, 0);
+
+	DBG("%s:%d: lv1_configure_irq_state_bitmap (%llu:%llu/%d) %s\n", __func__,
+		__LINE__, ppe_id, thread_id, cpu, ps3_result(result));
+}
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
new file mode 100644
index 0000000000..1326de55fd
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -0,0 +1,1254 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  PS3 address space management.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+
+#include <asm/cell-regs.h>
+#include <asm/firmware.h>
+#include <asm/udbg.h>
+#include <asm/lv1call.h>
+#include <asm/setup.h>
+
+#include "platform.h"
+
+#if defined(DEBUG)
+#define DBG udbg_printf
+#else
+#define DBG pr_devel
+#endif
+
+enum {
+#if defined(CONFIG_PS3_DYNAMIC_DMA)
+	USE_DYNAMIC_DMA = 1,
+#else
+	USE_DYNAMIC_DMA = 0,
+#endif
+};
+
+enum {
+	PAGE_SHIFT_4K = 12U,
+	PAGE_SHIFT_64K = 16U,
+	PAGE_SHIFT_16M = 24U,
+};
+
+static unsigned long __init make_page_sizes(unsigned long a, unsigned long b)
+{
+	return (a << 56) | (b << 48);
+}
+
+enum {
+	ALLOCATE_MEMORY_TRY_ALT_UNIT = 0X04,
+	ALLOCATE_MEMORY_ADDR_ZERO = 0X08,
+};
+
+/* valid htab sizes are {18,19,20} = 256K, 512K, 1M */
+
+enum {
+	HTAB_SIZE_MAX = 20U, /* HV limit of 1MB */
+	HTAB_SIZE_MIN = 18U, /* CPU limit of 256KB */
+};
+
+/*============================================================================*/
+/* virtual address space routines                                             */
+/*============================================================================*/
+
+/**
+ * struct mem_region - memory region structure
+ * @base: base address
+ * @size: size in bytes
+ * @offset: difference between base and rm.size
+ * @destroy: flag if region should be destroyed upon shutdown
+ */
+
+struct mem_region {
+	u64 base;
+	u64 size;
+	unsigned long offset;
+	int destroy;
+};
+
+/**
+ * struct map - address space state variables holder
+ * @total: total memory available as reported by HV
+ * @vas_id - HV virtual address space id
+ * @htab_size: htab size in bytes
+ *
+ * The HV virtual address space (vas) allows for hotplug memory regions.
+ * Memory regions can be created and destroyed in the vas at runtime.
+ * @rm: real mode (bootmem) region
+ * @r1: highmem region(s)
+ *
+ * ps3 addresses
+ * virt_addr: a cpu 'translated' effective address
+ * phys_addr: an address in what Linux thinks is the physical address space
+ * lpar_addr: an address in the HV virtual address space
+ * bus_addr: an io controller 'translated' address on a device bus
+ */
+
+struct map {
+	u64 total;
+	u64 vas_id;
+	u64 htab_size;
+	struct mem_region rm;
+	struct mem_region r1;
+};
+
+#define debug_dump_map(x) _debug_dump_map(x, __func__, __LINE__)
+static void __maybe_unused _debug_dump_map(const struct map *m,
+	const char *func, int line)
+{
+	DBG("%s:%d: map.total     = %llxh\n", func, line, m->total);
+	DBG("%s:%d: map.rm.size   = %llxh\n", func, line, m->rm.size);
+	DBG("%s:%d: map.vas_id    = %llu\n", func, line, m->vas_id);
+	DBG("%s:%d: map.htab_size = %llxh\n", func, line, m->htab_size);
+	DBG("%s:%d: map.r1.base   = %llxh\n", func, line, m->r1.base);
+	DBG("%s:%d: map.r1.offset = %lxh\n", func, line, m->r1.offset);
+	DBG("%s:%d: map.r1.size   = %llxh\n", func, line, m->r1.size);
+}
+
+static struct map map;
+
+/**
+ * ps3_mm_phys_to_lpar - translate a linux physical address to lpar address
+ * @phys_addr: linux physical address
+ */
+
+unsigned long ps3_mm_phys_to_lpar(unsigned long phys_addr)
+{
+	BUG_ON(is_kernel_addr(phys_addr));
+	return (phys_addr < map.rm.size || phys_addr >= map.total)
+		? phys_addr : phys_addr + map.r1.offset;
+}
+
+EXPORT_SYMBOL(ps3_mm_phys_to_lpar);
+
+/**
+ * ps3_mm_vas_create - create the virtual address space
+ */
+
+void __init ps3_mm_vas_create(unsigned long* htab_size)
+{
+	int result;
+	u64 start_address;
+	u64 size;
+	u64 access_right;
+	u64 max_page_size;
+	u64 flags;
+
+	result = lv1_query_logical_partition_address_region_info(0,
+		&start_address, &size, &access_right, &max_page_size,
+		&flags);
+
+	if (result) {
+		DBG("%s:%d: lv1_query_logical_partition_address_region_info "
+			"failed: %s\n", __func__, __LINE__,
+			ps3_result(result));
+		goto fail;
+	}
+
+	if (max_page_size < PAGE_SHIFT_16M) {
+		DBG("%s:%d: bad max_page_size %llxh\n", __func__, __LINE__,
+			max_page_size);
+		goto fail;
+	}
+
+	BUILD_BUG_ON(CONFIG_PS3_HTAB_SIZE > HTAB_SIZE_MAX);
+	BUILD_BUG_ON(CONFIG_PS3_HTAB_SIZE < HTAB_SIZE_MIN);
+
+	result = lv1_construct_virtual_address_space(CONFIG_PS3_HTAB_SIZE,
+			2, make_page_sizes(PAGE_SHIFT_16M, PAGE_SHIFT_64K),
+			&map.vas_id, &map.htab_size);
+
+	if (result) {
+		DBG("%s:%d: lv1_construct_virtual_address_space failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		goto fail;
+	}
+
+	result = lv1_select_virtual_address_space(map.vas_id);
+
+	if (result) {
+		DBG("%s:%d: lv1_select_virtual_address_space failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		goto fail;
+	}
+
+	*htab_size = map.htab_size;
+
+	debug_dump_map(&map);
+
+	return;
+
+fail:
+	panic("ps3_mm_vas_create failed");
+}
+
+/**
+ * ps3_mm_vas_destroy -
+ *
+ * called during kexec sequence with MMU off.
+ */
+
+notrace void ps3_mm_vas_destroy(void)
+{
+	int result;
+
+	if (map.vas_id) {
+		result = lv1_select_virtual_address_space(0);
+		result += lv1_destruct_virtual_address_space(map.vas_id);
+
+		if (result) {
+			lv1_panic(0);
+		}
+
+		map.vas_id = 0;
+	}
+}
+
+static int __init ps3_mm_get_repository_highmem(struct mem_region *r)
+{
+	int result;
+
+	/* Assume a single highmem region. */
+
+	result = ps3_repository_read_highmem_info(0, &r->base, &r->size);
+
+	if (result)
+		goto zero_region;
+
+	if (!r->base || !r->size) {
+		result = -1;
+		goto zero_region;
+	}
+
+	r->offset = r->base - map.rm.size;
+
+	DBG("%s:%d: Found high region in repository: %llxh %llxh\n",
+	    __func__, __LINE__, r->base, r->size);
+
+	return 0;
+
+zero_region:
+	DBG("%s:%d: No high region in repository.\n", __func__, __LINE__);
+
+	r->size = r->base = r->offset = 0;
+	return result;
+}
+
+static int ps3_mm_set_repository_highmem(const struct mem_region *r)
+{
+	/* Assume a single highmem region. */
+
+	return r ? ps3_repository_write_highmem_info(0, r->base, r->size) :
+		ps3_repository_write_highmem_info(0, 0, 0);
+}
+
+/**
+ * ps3_mm_region_create - create a memory region in the vas
+ * @r: pointer to a struct mem_region to accept initialized values
+ * @size: requested region size
+ *
+ * This implementation creates the region with the vas large page size.
+ * @size is rounded down to a multiple of the vas large page size.
+ */
+
+static int ps3_mm_region_create(struct mem_region *r, unsigned long size)
+{
+	int result;
+	u64 muid;
+
+	r->size = ALIGN_DOWN(size, 1 << PAGE_SHIFT_16M);
+
+	DBG("%s:%d requested  %lxh\n", __func__, __LINE__, size);
+	DBG("%s:%d actual     %llxh\n", __func__, __LINE__, r->size);
+	DBG("%s:%d difference %llxh (%lluMB)\n", __func__, __LINE__,
+		size - r->size, (size - r->size) / 1024 / 1024);
+
+	if (r->size == 0) {
+		DBG("%s:%d: size == 0\n", __func__, __LINE__);
+		result = -1;
+		goto zero_region;
+	}
+
+	result = lv1_allocate_memory(r->size, PAGE_SHIFT_16M, 0,
+		ALLOCATE_MEMORY_TRY_ALT_UNIT, &r->base, &muid);
+
+	if (result || r->base < map.rm.size) {
+		DBG("%s:%d: lv1_allocate_memory failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		goto zero_region;
+	}
+
+	r->destroy = 1;
+	r->offset = r->base - map.rm.size;
+	return result;
+
+zero_region:
+	r->size = r->base = r->offset = 0;
+	return result;
+}
+
+/**
+ * ps3_mm_region_destroy - destroy a memory region
+ * @r: pointer to struct mem_region
+ */
+
+static void ps3_mm_region_destroy(struct mem_region *r)
+{
+	int result;
+
+	if (!r->destroy) {
+		return;
+	}
+
+	if (r->base) {
+		result = lv1_release_memory(r->base);
+
+		if (result) {
+			lv1_panic(0);
+		}
+
+		r->size = r->base = r->offset = 0;
+		map.total = map.rm.size;
+	}
+
+	ps3_mm_set_repository_highmem(NULL);
+}
+
+/*============================================================================*/
+/* dma routines                                                               */
+/*============================================================================*/
+
+/**
+ * dma_sb_lpar_to_bus - Translate an lpar address to ioc mapped bus address.
+ * @r: pointer to dma region structure
+ * @lpar_addr: HV lpar address
+ */
+
+static unsigned long dma_sb_lpar_to_bus(struct ps3_dma_region *r,
+	unsigned long lpar_addr)
+{
+	if (lpar_addr >= map.rm.size)
+		lpar_addr -= map.r1.offset;
+	BUG_ON(lpar_addr < r->offset);
+	BUG_ON(lpar_addr >= r->offset + r->len);
+	return r->bus_addr + lpar_addr - r->offset;
+}
+
+#define dma_dump_region(_a) _dma_dump_region(_a, __func__, __LINE__)
+static void  __maybe_unused _dma_dump_region(const struct ps3_dma_region *r,
+	const char *func, int line)
+{
+	DBG("%s:%d: dev        %llu:%llu\n", func, line, r->dev->bus_id,
+		r->dev->dev_id);
+	DBG("%s:%d: page_size  %u\n", func, line, r->page_size);
+	DBG("%s:%d: bus_addr   %lxh\n", func, line, r->bus_addr);
+	DBG("%s:%d: len        %lxh\n", func, line, r->len);
+	DBG("%s:%d: offset     %lxh\n", func, line, r->offset);
+}
+
+  /**
+ * dma_chunk - A chunk of dma pages mapped by the io controller.
+ * @region - The dma region that owns this chunk.
+ * @lpar_addr: Starting lpar address of the area to map.
+ * @bus_addr: Starting ioc bus address of the area to map.
+ * @len: Length in bytes of the area to map.
+ * @link: A struct list_head used with struct ps3_dma_region.chunk_list, the
+ * list of all chunks owned by the region.
+ *
+ * This implementation uses a very simple dma page manager
+ * based on the dma_chunk structure.  This scheme assumes
+ * that all drivers use very well behaved dma ops.
+ */
+
+struct dma_chunk {
+	struct ps3_dma_region *region;
+	unsigned long lpar_addr;
+	unsigned long bus_addr;
+	unsigned long len;
+	struct list_head link;
+	unsigned int usage_count;
+};
+
+#define dma_dump_chunk(_a) _dma_dump_chunk(_a, __func__, __LINE__)
+static void _dma_dump_chunk (const struct dma_chunk* c, const char* func,
+	int line)
+{
+	DBG("%s:%d: r.dev        %llu:%llu\n", func, line,
+		c->region->dev->bus_id, c->region->dev->dev_id);
+	DBG("%s:%d: r.bus_addr   %lxh\n", func, line, c->region->bus_addr);
+	DBG("%s:%d: r.page_size  %u\n", func, line, c->region->page_size);
+	DBG("%s:%d: r.len        %lxh\n", func, line, c->region->len);
+	DBG("%s:%d: r.offset     %lxh\n", func, line, c->region->offset);
+	DBG("%s:%d: c.lpar_addr  %lxh\n", func, line, c->lpar_addr);
+	DBG("%s:%d: c.bus_addr   %lxh\n", func, line, c->bus_addr);
+	DBG("%s:%d: c.len        %lxh\n", func, line, c->len);
+}
+
+static struct dma_chunk * dma_find_chunk(struct ps3_dma_region *r,
+	unsigned long bus_addr, unsigned long len)
+{
+	struct dma_chunk *c;
+	unsigned long aligned_bus = ALIGN_DOWN(bus_addr, 1 << r->page_size);
+	unsigned long aligned_len = ALIGN(len+bus_addr-aligned_bus,
+					      1 << r->page_size);
+
+	list_for_each_entry(c, &r->chunk_list.head, link) {
+		/* intersection */
+		if (aligned_bus >= c->bus_addr &&
+		    aligned_bus + aligned_len <= c->bus_addr + c->len)
+			return c;
+
+		/* below */
+		if (aligned_bus + aligned_len <= c->bus_addr)
+			continue;
+
+		/* above */
+		if (aligned_bus >= c->bus_addr + c->len)
+			continue;
+
+		/* we don't handle the multi-chunk case for now */
+		dma_dump_chunk(c);
+		BUG();
+	}
+	return NULL;
+}
+
+static struct dma_chunk *dma_find_chunk_lpar(struct ps3_dma_region *r,
+	unsigned long lpar_addr, unsigned long len)
+{
+	struct dma_chunk *c;
+	unsigned long aligned_lpar = ALIGN_DOWN(lpar_addr, 1 << r->page_size);
+	unsigned long aligned_len = ALIGN(len + lpar_addr - aligned_lpar,
+					      1 << r->page_size);
+
+	list_for_each_entry(c, &r->chunk_list.head, link) {
+		/* intersection */
+		if (c->lpar_addr <= aligned_lpar &&
+		    aligned_lpar < c->lpar_addr + c->len) {
+			if (aligned_lpar + aligned_len <= c->lpar_addr + c->len)
+				return c;
+			else {
+				dma_dump_chunk(c);
+				BUG();
+			}
+		}
+		/* below */
+		if (aligned_lpar + aligned_len <= c->lpar_addr) {
+			continue;
+		}
+		/* above */
+		if (c->lpar_addr + c->len <= aligned_lpar) {
+			continue;
+		}
+	}
+	return NULL;
+}
+
+static int dma_sb_free_chunk(struct dma_chunk *c)
+{
+	int result = 0;
+
+	if (c->bus_addr) {
+		result = lv1_unmap_device_dma_region(c->region->dev->bus_id,
+			c->region->dev->dev_id, c->bus_addr, c->len);
+		BUG_ON(result);
+	}
+
+	kfree(c);
+	return result;
+}
+
+static int dma_ioc0_free_chunk(struct dma_chunk *c)
+{
+	int result = 0;
+	int iopage;
+	unsigned long offset;
+	struct ps3_dma_region *r = c->region;
+
+	DBG("%s:start\n", __func__);
+	for (iopage = 0; iopage < (c->len >> r->page_size); iopage++) {
+		offset = (1 << r->page_size) * iopage;
+		/* put INVALID entry */
+		result = lv1_put_iopte(0,
+				       c->bus_addr + offset,
+				       c->lpar_addr + offset,
+				       r->ioid,
+				       0);
+		DBG("%s: bus=%#lx, lpar=%#lx, ioid=%d\n", __func__,
+		    c->bus_addr + offset,
+		    c->lpar_addr + offset,
+		    r->ioid);
+
+		if (result) {
+			DBG("%s:%d: lv1_put_iopte failed: %s\n", __func__,
+			    __LINE__, ps3_result(result));
+		}
+	}
+	kfree(c);
+	DBG("%s:end\n", __func__);
+	return result;
+}
+
+/**
+ * dma_sb_map_pages - Maps dma pages into the io controller bus address space.
+ * @r: Pointer to a struct ps3_dma_region.
+ * @phys_addr: Starting physical address of the area to map.
+ * @len: Length in bytes of the area to map.
+ * c_out: A pointer to receive an allocated struct dma_chunk for this area.
+ *
+ * This is the lowest level dma mapping routine, and is the one that will
+ * make the HV call to add the pages into the io controller address space.
+ */
+
+static int dma_sb_map_pages(struct ps3_dma_region *r, unsigned long phys_addr,
+	    unsigned long len, struct dma_chunk **c_out, u64 iopte_flag)
+{
+	int result;
+	struct dma_chunk *c;
+
+	c = kzalloc(sizeof(*c), GFP_ATOMIC);
+	if (!c) {
+		result = -ENOMEM;
+		goto fail_alloc;
+	}
+
+	c->region = r;
+	c->lpar_addr = ps3_mm_phys_to_lpar(phys_addr);
+	c->bus_addr = dma_sb_lpar_to_bus(r, c->lpar_addr);
+	c->len = len;
+
+	BUG_ON(iopte_flag != 0xf800000000000000UL);
+	result = lv1_map_device_dma_region(c->region->dev->bus_id,
+					   c->region->dev->dev_id, c->lpar_addr,
+					   c->bus_addr, c->len, iopte_flag);
+	if (result) {
+		DBG("%s:%d: lv1_map_device_dma_region failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		goto fail_map;
+	}
+
+	list_add(&c->link, &r->chunk_list.head);
+
+	*c_out = c;
+	return 0;
+
+fail_map:
+	kfree(c);
+fail_alloc:
+	*c_out = NULL;
+	DBG(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+
+static int dma_ioc0_map_pages(struct ps3_dma_region *r, unsigned long phys_addr,
+			      unsigned long len, struct dma_chunk **c_out,
+			      u64 iopte_flag)
+{
+	int result;
+	struct dma_chunk *c, *last;
+	int iopage, pages;
+	unsigned long offset;
+
+	DBG(KERN_ERR "%s: phy=%#lx, lpar%#lx, len=%#lx\n", __func__,
+	    phys_addr, ps3_mm_phys_to_lpar(phys_addr), len);
+	c = kzalloc(sizeof(*c), GFP_ATOMIC);
+	if (!c) {
+		result = -ENOMEM;
+		goto fail_alloc;
+	}
+
+	c->region = r;
+	c->len = len;
+	c->lpar_addr = ps3_mm_phys_to_lpar(phys_addr);
+	/* allocate IO address */
+	if (list_empty(&r->chunk_list.head)) {
+		/* first one */
+		c->bus_addr = r->bus_addr;
+	} else {
+		/* derive from last bus addr*/
+		last  = list_entry(r->chunk_list.head.next,
+				   struct dma_chunk, link);
+		c->bus_addr = last->bus_addr + last->len;
+		DBG("%s: last bus=%#lx, len=%#lx\n", __func__,
+		    last->bus_addr, last->len);
+	}
+
+	/* FIXME: check whether length exceeds region size */
+
+	/* build ioptes for the area */
+	pages = len >> r->page_size;
+	DBG("%s: pgsize=%#x len=%#lx pages=%#x iopteflag=%#llx\n", __func__,
+	    r->page_size, r->len, pages, iopte_flag);
+	for (iopage = 0; iopage < pages; iopage++) {
+		offset = (1 << r->page_size) * iopage;
+		result = lv1_put_iopte(0,
+				       c->bus_addr + offset,
+				       c->lpar_addr + offset,
+				       r->ioid,
+				       iopte_flag);
+		if (result) {
+			pr_warn("%s:%d: lv1_put_iopte failed: %s\n",
+				__func__, __LINE__, ps3_result(result));
+			goto fail_map;
+		}
+		DBG("%s: pg=%d bus=%#lx, lpar=%#lx, ioid=%#x\n", __func__,
+		    iopage, c->bus_addr + offset, c->lpar_addr + offset,
+		    r->ioid);
+	}
+
+	/* be sure that last allocated one is inserted at head */
+	list_add(&c->link, &r->chunk_list.head);
+
+	*c_out = c;
+	DBG("%s: end\n", __func__);
+	return 0;
+
+fail_map:
+	for (iopage--; 0 <= iopage; iopage--) {
+		lv1_put_iopte(0,
+			      c->bus_addr + offset,
+			      c->lpar_addr + offset,
+			      r->ioid,
+			      0);
+	}
+	kfree(c);
+fail_alloc:
+	*c_out = NULL;
+	return result;
+}
+
+/**
+ * dma_sb_region_create - Create a device dma region.
+ * @r: Pointer to a struct ps3_dma_region.
+ *
+ * This is the lowest level dma region create routine, and is the one that
+ * will make the HV call to create the region.
+ */
+
+static int dma_sb_region_create(struct ps3_dma_region *r)
+{
+	int result;
+	u64 bus_addr;
+
+	DBG(" -> %s:%d:\n", __func__, __LINE__);
+
+	BUG_ON(!r);
+
+	if (!r->dev->bus_id) {
+		pr_info("%s:%d: %llu:%llu no dma\n", __func__, __LINE__,
+			r->dev->bus_id, r->dev->dev_id);
+		return 0;
+	}
+
+	DBG("%s:%u: len = 0x%lx, page_size = %u, offset = 0x%lx\n", __func__,
+	    __LINE__, r->len, r->page_size, r->offset);
+
+	BUG_ON(!r->len);
+	BUG_ON(!r->page_size);
+	BUG_ON(!r->region_ops);
+
+	INIT_LIST_HEAD(&r->chunk_list.head);
+	spin_lock_init(&r->chunk_list.lock);
+
+	result = lv1_allocate_device_dma_region(r->dev->bus_id, r->dev->dev_id,
+		roundup_pow_of_two(r->len), r->page_size, r->region_type,
+		&bus_addr);
+	r->bus_addr = bus_addr;
+
+	if (result) {
+		DBG("%s:%d: lv1_allocate_device_dma_region failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		r->len = r->bus_addr = 0;
+	}
+
+	return result;
+}
+
+static int dma_ioc0_region_create(struct ps3_dma_region *r)
+{
+	int result;
+	u64 bus_addr;
+
+	INIT_LIST_HEAD(&r->chunk_list.head);
+	spin_lock_init(&r->chunk_list.lock);
+
+	result = lv1_allocate_io_segment(0,
+					 r->len,
+					 r->page_size,
+					 &bus_addr);
+	r->bus_addr = bus_addr;
+	if (result) {
+		DBG("%s:%d: lv1_allocate_io_segment failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		r->len = r->bus_addr = 0;
+	}
+	DBG("%s: len=%#lx, pg=%d, bus=%#lx\n", __func__,
+	    r->len, r->page_size, r->bus_addr);
+	return result;
+}
+
+/**
+ * dma_region_free - Free a device dma region.
+ * @r: Pointer to a struct ps3_dma_region.
+ *
+ * This is the lowest level dma region free routine, and is the one that
+ * will make the HV call to free the region.
+ */
+
+static int dma_sb_region_free(struct ps3_dma_region *r)
+{
+	int result;
+	struct dma_chunk *c;
+	struct dma_chunk *tmp;
+
+	BUG_ON(!r);
+
+	if (!r->dev->bus_id) {
+		pr_info("%s:%d: %llu:%llu no dma\n", __func__, __LINE__,
+			r->dev->bus_id, r->dev->dev_id);
+		return 0;
+	}
+
+	list_for_each_entry_safe(c, tmp, &r->chunk_list.head, link) {
+		list_del(&c->link);
+		dma_sb_free_chunk(c);
+	}
+
+	result = lv1_free_device_dma_region(r->dev->bus_id, r->dev->dev_id,
+		r->bus_addr);
+
+	if (result)
+		DBG("%s:%d: lv1_free_device_dma_region failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+
+	r->bus_addr = 0;
+
+	return result;
+}
+
+static int dma_ioc0_region_free(struct ps3_dma_region *r)
+{
+	int result;
+	struct dma_chunk *c, *n;
+
+	DBG("%s: start\n", __func__);
+	list_for_each_entry_safe(c, n, &r->chunk_list.head, link) {
+		list_del(&c->link);
+		dma_ioc0_free_chunk(c);
+	}
+
+	result = lv1_release_io_segment(0, r->bus_addr);
+
+	if (result)
+		DBG("%s:%d: lv1_free_device_dma_region failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+
+	r->bus_addr = 0;
+	DBG("%s: end\n", __func__);
+
+	return result;
+}
+
+/**
+ * dma_sb_map_area - Map an area of memory into a device dma region.
+ * @r: Pointer to a struct ps3_dma_region.
+ * @virt_addr: Starting virtual address of the area to map.
+ * @len: Length in bytes of the area to map.
+ * @bus_addr: A pointer to return the starting ioc bus address of the area to
+ * map.
+ *
+ * This is the common dma mapping routine.
+ */
+
+static int dma_sb_map_area(struct ps3_dma_region *r, unsigned long virt_addr,
+	   unsigned long len, dma_addr_t *bus_addr,
+	   u64 iopte_flag)
+{
+	int result;
+	unsigned long flags;
+	struct dma_chunk *c;
+	unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr)
+		: virt_addr;
+	unsigned long aligned_phys = ALIGN_DOWN(phys_addr, 1 << r->page_size);
+	unsigned long aligned_len = ALIGN(len + phys_addr - aligned_phys,
+					      1 << r->page_size);
+	*bus_addr = dma_sb_lpar_to_bus(r, ps3_mm_phys_to_lpar(phys_addr));
+
+	if (!USE_DYNAMIC_DMA) {
+		unsigned long lpar_addr = ps3_mm_phys_to_lpar(phys_addr);
+		DBG(" -> %s:%d\n", __func__, __LINE__);
+		DBG("%s:%d virt_addr %lxh\n", __func__, __LINE__,
+			virt_addr);
+		DBG("%s:%d phys_addr %lxh\n", __func__, __LINE__,
+			phys_addr);
+		DBG("%s:%d lpar_addr %lxh\n", __func__, __LINE__,
+			lpar_addr);
+		DBG("%s:%d len       %lxh\n", __func__, __LINE__, len);
+		DBG("%s:%d bus_addr  %llxh (%lxh)\n", __func__, __LINE__,
+		*bus_addr, len);
+	}
+
+	spin_lock_irqsave(&r->chunk_list.lock, flags);
+	c = dma_find_chunk(r, *bus_addr, len);
+
+	if (c) {
+		DBG("%s:%d: reusing mapped chunk", __func__, __LINE__);
+		dma_dump_chunk(c);
+		c->usage_count++;
+		spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+		return 0;
+	}
+
+	result = dma_sb_map_pages(r, aligned_phys, aligned_len, &c, iopte_flag);
+
+	if (result) {
+		*bus_addr = 0;
+		DBG("%s:%d: dma_sb_map_pages failed (%d)\n",
+			__func__, __LINE__, result);
+		spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+		return result;
+	}
+
+	c->usage_count = 1;
+
+	spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+	return result;
+}
+
+static int dma_ioc0_map_area(struct ps3_dma_region *r, unsigned long virt_addr,
+	     unsigned long len, dma_addr_t *bus_addr,
+	     u64 iopte_flag)
+{
+	int result;
+	unsigned long flags;
+	struct dma_chunk *c;
+	unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr)
+		: virt_addr;
+	unsigned long aligned_phys = ALIGN_DOWN(phys_addr, 1 << r->page_size);
+	unsigned long aligned_len = ALIGN(len + phys_addr - aligned_phys,
+					      1 << r->page_size);
+
+	DBG(KERN_ERR "%s: vaddr=%#lx, len=%#lx\n", __func__,
+	    virt_addr, len);
+	DBG(KERN_ERR "%s: ph=%#lx a_ph=%#lx a_l=%#lx\n", __func__,
+	    phys_addr, aligned_phys, aligned_len);
+
+	spin_lock_irqsave(&r->chunk_list.lock, flags);
+	c = dma_find_chunk_lpar(r, ps3_mm_phys_to_lpar(phys_addr), len);
+
+	if (c) {
+		/* FIXME */
+		BUG();
+		*bus_addr = c->bus_addr + phys_addr - aligned_phys;
+		c->usage_count++;
+		spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+		return 0;
+	}
+
+	result = dma_ioc0_map_pages(r, aligned_phys, aligned_len, &c,
+				    iopte_flag);
+
+	if (result) {
+		*bus_addr = 0;
+		DBG("%s:%d: dma_ioc0_map_pages failed (%d)\n",
+			__func__, __LINE__, result);
+		spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+		return result;
+	}
+	*bus_addr = c->bus_addr + phys_addr - aligned_phys;
+	DBG("%s: va=%#lx pa=%#lx a_pa=%#lx bus=%#llx\n", __func__,
+	    virt_addr, phys_addr, aligned_phys, *bus_addr);
+	c->usage_count = 1;
+
+	spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+	return result;
+}
+
+/**
+ * dma_sb_unmap_area - Unmap an area of memory from a device dma region.
+ * @r: Pointer to a struct ps3_dma_region.
+ * @bus_addr: The starting ioc bus address of the area to unmap.
+ * @len: Length in bytes of the area to unmap.
+ *
+ * This is the common dma unmap routine.
+ */
+
+static int dma_sb_unmap_area(struct ps3_dma_region *r, dma_addr_t bus_addr,
+	unsigned long len)
+{
+	unsigned long flags;
+	struct dma_chunk *c;
+
+	spin_lock_irqsave(&r->chunk_list.lock, flags);
+	c = dma_find_chunk(r, bus_addr, len);
+
+	if (!c) {
+		unsigned long aligned_bus = ALIGN_DOWN(bus_addr,
+			1 << r->page_size);
+		unsigned long aligned_len = ALIGN(len + bus_addr
+			- aligned_bus, 1 << r->page_size);
+		DBG("%s:%d: not found: bus_addr %llxh\n",
+			__func__, __LINE__, bus_addr);
+		DBG("%s:%d: not found: len %lxh\n",
+			__func__, __LINE__, len);
+		DBG("%s:%d: not found: aligned_bus %lxh\n",
+			__func__, __LINE__, aligned_bus);
+		DBG("%s:%d: not found: aligned_len %lxh\n",
+			__func__, __LINE__, aligned_len);
+		BUG();
+	}
+
+	c->usage_count--;
+
+	if (!c->usage_count) {
+		list_del(&c->link);
+		dma_sb_free_chunk(c);
+	}
+
+	spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+	return 0;
+}
+
+static int dma_ioc0_unmap_area(struct ps3_dma_region *r,
+			dma_addr_t bus_addr, unsigned long len)
+{
+	unsigned long flags;
+	struct dma_chunk *c;
+
+	DBG("%s: start a=%#llx l=%#lx\n", __func__, bus_addr, len);
+	spin_lock_irqsave(&r->chunk_list.lock, flags);
+	c = dma_find_chunk(r, bus_addr, len);
+
+	if (!c) {
+		unsigned long aligned_bus = ALIGN_DOWN(bus_addr,
+							1 << r->page_size);
+		unsigned long aligned_len = ALIGN(len + bus_addr
+						      - aligned_bus,
+						      1 << r->page_size);
+		DBG("%s:%d: not found: bus_addr %llxh\n",
+		    __func__, __LINE__, bus_addr);
+		DBG("%s:%d: not found: len %lxh\n",
+		    __func__, __LINE__, len);
+		DBG("%s:%d: not found: aligned_bus %lxh\n",
+		    __func__, __LINE__, aligned_bus);
+		DBG("%s:%d: not found: aligned_len %lxh\n",
+		    __func__, __LINE__, aligned_len);
+		BUG();
+	}
+
+	c->usage_count--;
+
+	if (!c->usage_count) {
+		list_del(&c->link);
+		dma_ioc0_free_chunk(c);
+	}
+
+	spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+	DBG("%s: end\n", __func__);
+	return 0;
+}
+
+/**
+ * dma_sb_region_create_linear - Setup a linear dma mapping for a device.
+ * @r: Pointer to a struct ps3_dma_region.
+ *
+ * This routine creates an HV dma region for the device and maps all available
+ * ram into the io controller bus address space.
+ */
+
+static int dma_sb_region_create_linear(struct ps3_dma_region *r)
+{
+	int result;
+	unsigned long virt_addr, len;
+	dma_addr_t tmp;
+
+	if (r->len > 16*1024*1024) {	/* FIXME: need proper fix */
+		/* force 16M dma pages for linear mapping */
+		if (r->page_size != PS3_DMA_16M) {
+			pr_info("%s:%d: forcing 16M pages for linear map\n",
+				__func__, __LINE__);
+			r->page_size = PS3_DMA_16M;
+			r->len = ALIGN(r->len, 1 << r->page_size);
+		}
+	}
+
+	result = dma_sb_region_create(r);
+	BUG_ON(result);
+
+	if (r->offset < map.rm.size) {
+		/* Map (part of) 1st RAM chunk */
+		virt_addr = map.rm.base + r->offset;
+		len = map.rm.size - r->offset;
+		if (len > r->len)
+			len = r->len;
+		result = dma_sb_map_area(r, virt_addr, len, &tmp,
+			CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_SO_RW |
+			CBE_IOPTE_M);
+		BUG_ON(result);
+	}
+
+	if (r->offset + r->len > map.rm.size) {
+		/* Map (part of) 2nd RAM chunk */
+		virt_addr = map.rm.size;
+		len = r->len;
+		if (r->offset >= map.rm.size)
+			virt_addr += r->offset - map.rm.size;
+		else
+			len -= map.rm.size - r->offset;
+		result = dma_sb_map_area(r, virt_addr, len, &tmp,
+			CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_SO_RW |
+			CBE_IOPTE_M);
+		BUG_ON(result);
+	}
+
+	return result;
+}
+
+/**
+ * dma_sb_region_free_linear - Free a linear dma mapping for a device.
+ * @r: Pointer to a struct ps3_dma_region.
+ *
+ * This routine will unmap all mapped areas and free the HV dma region.
+ */
+
+static int dma_sb_region_free_linear(struct ps3_dma_region *r)
+{
+	int result;
+	dma_addr_t bus_addr;
+	unsigned long len, lpar_addr;
+
+	if (r->offset < map.rm.size) {
+		/* Unmap (part of) 1st RAM chunk */
+		lpar_addr = map.rm.base + r->offset;
+		len = map.rm.size - r->offset;
+		if (len > r->len)
+			len = r->len;
+		bus_addr = dma_sb_lpar_to_bus(r, lpar_addr);
+		result = dma_sb_unmap_area(r, bus_addr, len);
+		BUG_ON(result);
+	}
+
+	if (r->offset + r->len > map.rm.size) {
+		/* Unmap (part of) 2nd RAM chunk */
+		lpar_addr = map.r1.base;
+		len = r->len;
+		if (r->offset >= map.rm.size)
+			lpar_addr += r->offset - map.rm.size;
+		else
+			len -= map.rm.size - r->offset;
+		bus_addr = dma_sb_lpar_to_bus(r, lpar_addr);
+		result = dma_sb_unmap_area(r, bus_addr, len);
+		BUG_ON(result);
+	}
+
+	result = dma_sb_region_free(r);
+	BUG_ON(result);
+
+	return result;
+}
+
+/**
+ * dma_sb_map_area_linear - Map an area of memory into a device dma region.
+ * @r: Pointer to a struct ps3_dma_region.
+ * @virt_addr: Starting virtual address of the area to map.
+ * @len: Length in bytes of the area to map.
+ * @bus_addr: A pointer to return the starting ioc bus address of the area to
+ * map.
+ *
+ * This routine just returns the corresponding bus address.  Actual mapping
+ * occurs in dma_region_create_linear().
+ */
+
+static int dma_sb_map_area_linear(struct ps3_dma_region *r,
+	unsigned long virt_addr, unsigned long len, dma_addr_t *bus_addr,
+	u64 iopte_flag)
+{
+	unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr)
+		: virt_addr;
+	*bus_addr = dma_sb_lpar_to_bus(r, ps3_mm_phys_to_lpar(phys_addr));
+	return 0;
+}
+
+/**
+ * dma_unmap_area_linear - Unmap an area of memory from a device dma region.
+ * @r: Pointer to a struct ps3_dma_region.
+ * @bus_addr: The starting ioc bus address of the area to unmap.
+ * @len: Length in bytes of the area to unmap.
+ *
+ * This routine does nothing.  Unmapping occurs in dma_sb_region_free_linear().
+ */
+
+static int dma_sb_unmap_area_linear(struct ps3_dma_region *r,
+	dma_addr_t bus_addr, unsigned long len)
+{
+	return 0;
+};
+
+static const struct ps3_dma_region_ops ps3_dma_sb_region_ops =  {
+	.create = dma_sb_region_create,
+	.free = dma_sb_region_free,
+	.map = dma_sb_map_area,
+	.unmap = dma_sb_unmap_area
+};
+
+static const struct ps3_dma_region_ops ps3_dma_sb_region_linear_ops = {
+	.create = dma_sb_region_create_linear,
+	.free = dma_sb_region_free_linear,
+	.map = dma_sb_map_area_linear,
+	.unmap = dma_sb_unmap_area_linear
+};
+
+static const struct ps3_dma_region_ops ps3_dma_ioc0_region_ops = {
+	.create = dma_ioc0_region_create,
+	.free = dma_ioc0_region_free,
+	.map = dma_ioc0_map_area,
+	.unmap = dma_ioc0_unmap_area
+};
+
+int ps3_dma_region_init(struct ps3_system_bus_device *dev,
+	struct ps3_dma_region *r, enum ps3_dma_page_size page_size,
+	enum ps3_dma_region_type region_type, void *addr, unsigned long len)
+{
+	unsigned long lpar_addr;
+	int result;
+
+	lpar_addr = addr ? ps3_mm_phys_to_lpar(__pa(addr)) : 0;
+
+	r->dev = dev;
+	r->page_size = page_size;
+	r->region_type = region_type;
+	r->offset = lpar_addr;
+	if (r->offset >= map.rm.size)
+		r->offset -= map.r1.offset;
+	r->len = len ? len : ALIGN(map.total, 1 << r->page_size);
+
+	dev->core.dma_mask = &r->dma_mask;
+
+	result = dma_set_mask_and_coherent(&dev->core, DMA_BIT_MASK(32));
+
+	if (result < 0) {
+		dev_err(&dev->core, "%s:%d: dma_set_mask_and_coherent failed: %d\n",
+			__func__, __LINE__, result);
+		return result;
+	}
+
+	switch (dev->dev_type) {
+	case PS3_DEVICE_TYPE_SB:
+		r->region_ops =  (USE_DYNAMIC_DMA)
+			? &ps3_dma_sb_region_ops
+			: &ps3_dma_sb_region_linear_ops;
+		break;
+	case PS3_DEVICE_TYPE_IOC0:
+		r->region_ops = &ps3_dma_ioc0_region_ops;
+		break;
+	default:
+		BUG();
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ps3_dma_region_init);
+
+int ps3_dma_region_create(struct ps3_dma_region *r)
+{
+	BUG_ON(!r);
+	BUG_ON(!r->region_ops);
+	BUG_ON(!r->region_ops->create);
+	return r->region_ops->create(r);
+}
+EXPORT_SYMBOL(ps3_dma_region_create);
+
+int ps3_dma_region_free(struct ps3_dma_region *r)
+{
+	BUG_ON(!r);
+	BUG_ON(!r->region_ops);
+	BUG_ON(!r->region_ops->free);
+	return r->region_ops->free(r);
+}
+EXPORT_SYMBOL(ps3_dma_region_free);
+
+int ps3_dma_map(struct ps3_dma_region *r, unsigned long virt_addr,
+	unsigned long len, dma_addr_t *bus_addr,
+	u64 iopte_flag)
+{
+	return r->region_ops->map(r, virt_addr, len, bus_addr, iopte_flag);
+}
+
+int ps3_dma_unmap(struct ps3_dma_region *r, dma_addr_t bus_addr,
+	unsigned long len)
+{
+	return r->region_ops->unmap(r, bus_addr, len);
+}
+
+/*============================================================================*/
+/* system startup routines                                                    */
+/*============================================================================*/
+
+/**
+ * ps3_mm_init - initialize the address space state variables
+ */
+
+void __init ps3_mm_init(void)
+{
+	int result;
+
+	DBG(" -> %s:%d\n", __func__, __LINE__);
+
+	result = ps3_repository_read_mm_info(&map.rm.base, &map.rm.size,
+		&map.total);
+
+	if (result)
+		panic("ps3_repository_read_mm_info() failed");
+
+	map.rm.offset = map.rm.base;
+	map.vas_id = map.htab_size = 0;
+
+	/* this implementation assumes map.rm.base is zero */
+
+	BUG_ON(map.rm.base);
+	BUG_ON(!map.rm.size);
+
+	/* Check if we got the highmem region from an earlier boot step */
+
+	if (ps3_mm_get_repository_highmem(&map.r1)) {
+		result = ps3_mm_region_create(&map.r1, map.total - map.rm.size);
+
+		if (!result)
+			ps3_mm_set_repository_highmem(&map.r1);
+	}
+
+	/* correct map.total for the real total amount of memory we use */
+	map.total = map.rm.size + map.r1.size;
+
+	if (!map.r1.size) {
+		DBG("%s:%d: No highmem region found\n", __func__, __LINE__);
+	} else {
+		DBG("%s:%d: Adding highmem region: %llxh %llxh\n",
+			__func__, __LINE__, map.rm.size,
+			map.total - map.rm.size);
+		memblock_add(map.rm.size, map.total - map.rm.size);
+	}
+
+	DBG(" <- %s:%d\n", __func__, __LINE__);
+}
+
+/**
+ * ps3_mm_shutdown - final cleanup of address space
+ *
+ * called during kexec sequence with MMU off.
+ */
+
+notrace void ps3_mm_shutdown(void)
+{
+	ps3_mm_region_destroy(&map.r1);
+}
diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c
new file mode 100644
index 0000000000..b384cd2d6b
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/os-area.c
@@ -0,0 +1,830 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  PS3 flash memory os area.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/workqueue.h>
+#include <linux/fs.h>
+#include <linux/syscalls.h>
+#include <linux/export.h>
+#include <linux/ctype.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+
+#include "platform.h"
+
+enum {
+	OS_AREA_SEGMENT_SIZE = 0X200,
+};
+
+enum os_area_ldr_format {
+	HEADER_LDR_FORMAT_RAW = 0,
+	HEADER_LDR_FORMAT_GZIP = 1,
+};
+
+#define OS_AREA_HEADER_MAGIC_NUM "cell_ext_os_area"
+
+/**
+ * struct os_area_header - os area header segment.
+ * @magic_num: Always 'cell_ext_os_area'.
+ * @hdr_version: Header format version number.
+ * @db_area_offset: Starting segment number of other os database area.
+ * @ldr_area_offset: Starting segment number of bootloader image area.
+ * @ldr_format: HEADER_LDR_FORMAT flag.
+ * @ldr_size: Size of bootloader image in bytes.
+ *
+ * Note that the docs refer to area offsets.  These are offsets in units of
+ * segments from the start of the os area (top of the header).  These are
+ * better thought of as segment numbers.  The os area of the os area is
+ * reserved for the os image.
+ */
+
+struct os_area_header {
+	u8 magic_num[16];
+	u32 hdr_version;
+	u32 db_area_offset;
+	u32 ldr_area_offset;
+	u32 _reserved_1;
+	u32 ldr_format;
+	u32 ldr_size;
+	u32 _reserved_2[6];
+};
+
+enum os_area_boot_flag {
+	PARAM_BOOT_FLAG_GAME_OS = 0,
+	PARAM_BOOT_FLAG_OTHER_OS = 1,
+};
+
+enum os_area_ctrl_button {
+	PARAM_CTRL_BUTTON_O_IS_YES = 0,
+	PARAM_CTRL_BUTTON_X_IS_YES = 1,
+};
+
+/**
+ * struct os_area_params - os area params segment.
+ * @boot_flag: User preference of operating system, PARAM_BOOT_FLAG flag.
+ * @num_params: Number of params in this (params) segment.
+ * @rtc_diff: Difference in seconds between 1970 and the ps3 rtc value.
+ * @av_multi_out: User preference of AV output, PARAM_AV_MULTI_OUT flag.
+ * @ctrl_button: User preference of controller button config, PARAM_CTRL_BUTTON
+ *	flag.
+ * @static_ip_addr: User preference of static IP address.
+ * @network_mask: User preference of static network mask.
+ * @default_gateway: User preference of static default gateway.
+ * @dns_primary: User preference of static primary dns server.
+ * @dns_secondary: User preference of static secondary dns server.
+ *
+ * The ps3 rtc maintains a read-only value that approximates seconds since
+ * 2000-01-01 00:00:00 UTC.
+ *
+ * User preference of zero for static_ip_addr means use dhcp.
+ */
+
+struct os_area_params {
+	u32 boot_flag;
+	u32 _reserved_1[3];
+	u32 num_params;
+	u32 _reserved_2[3];
+	/* param 0 */
+	s64 rtc_diff;
+	u8 av_multi_out;
+	u8 ctrl_button;
+	u8 _reserved_3[6];
+	/* param 1 */
+	u8 static_ip_addr[4];
+	u8 network_mask[4];
+	u8 default_gateway[4];
+	u8 _reserved_4[4];
+	/* param 2 */
+	u8 dns_primary[4];
+	u8 dns_secondary[4];
+	u8 _reserved_5[8];
+};
+
+#define OS_AREA_DB_MAGIC_NUM "-db-"
+
+/**
+ * struct os_area_db - Shared flash memory database.
+ * @magic_num: Always '-db-'.
+ * @version: os_area_db format version number.
+ * @index_64: byte offset of the database id index for 64 bit variables.
+ * @count_64: number of usable 64 bit index entries
+ * @index_32: byte offset of the database id index for 32 bit variables.
+ * @count_32: number of usable 32 bit index entries
+ * @index_16: byte offset of the database id index for 16 bit variables.
+ * @count_16: number of usable 16 bit index entries
+ *
+ * Flash rom storage for exclusive use by guests running in the other os lpar.
+ * The current system configuration allocates 1K (two segments) for other os
+ * use.
+ */
+
+struct os_area_db {
+	u8 magic_num[4];
+	u16 version;
+	u16 _reserved_1;
+	u16 index_64;
+	u16 count_64;
+	u16 index_32;
+	u16 count_32;
+	u16 index_16;
+	u16 count_16;
+	u32 _reserved_2;
+	u8 _db_data[1000];
+};
+
+/**
+ * enum os_area_db_owner - Data owners.
+ */
+
+enum os_area_db_owner {
+	OS_AREA_DB_OWNER_ANY = -1,
+	OS_AREA_DB_OWNER_NONE = 0,
+	OS_AREA_DB_OWNER_PROTOTYPE = 1,
+	OS_AREA_DB_OWNER_LINUX = 2,
+	OS_AREA_DB_OWNER_PETITBOOT = 3,
+	OS_AREA_DB_OWNER_MAX = 32,
+};
+
+enum os_area_db_key {
+	OS_AREA_DB_KEY_ANY = -1,
+	OS_AREA_DB_KEY_NONE = 0,
+	OS_AREA_DB_KEY_RTC_DIFF = 1,
+	OS_AREA_DB_KEY_VIDEO_MODE = 2,
+	OS_AREA_DB_KEY_MAX = 8,
+};
+
+struct os_area_db_id {
+	int owner;
+	int key;
+};
+
+static const struct os_area_db_id os_area_db_id_empty = {
+	.owner = OS_AREA_DB_OWNER_NONE,
+	.key = OS_AREA_DB_KEY_NONE
+};
+
+static const struct os_area_db_id os_area_db_id_any = {
+	.owner = OS_AREA_DB_OWNER_ANY,
+	.key = OS_AREA_DB_KEY_ANY
+};
+
+static const struct os_area_db_id os_area_db_id_rtc_diff = {
+	.owner = OS_AREA_DB_OWNER_LINUX,
+	.key = OS_AREA_DB_KEY_RTC_DIFF
+};
+
+#define SECONDS_FROM_1970_TO_2000 946684800LL
+
+/**
+ * struct saved_params - Static working copies of data from the PS3 'os area'.
+ *
+ * The order of preference we use for the rtc_diff source:
+ *  1) The database value.
+ *  2) The game os value.
+ *  3) The number of seconds from 1970 to 2000.
+ */
+
+static struct saved_params {
+	unsigned int valid;
+	s64 rtc_diff;
+	unsigned int av_multi_out;
+} saved_params;
+
+static struct property property_rtc_diff = {
+	.name = "linux,rtc_diff",
+	.length = sizeof(saved_params.rtc_diff),
+	.value = &saved_params.rtc_diff,
+};
+
+static struct property property_av_multi_out = {
+	.name = "linux,av_multi_out",
+	.length = sizeof(saved_params.av_multi_out),
+	.value = &saved_params.av_multi_out,
+};
+
+
+static DEFINE_MUTEX(os_area_flash_mutex);
+
+static const struct ps3_os_area_flash_ops *os_area_flash_ops;
+
+void ps3_os_area_flash_register(const struct ps3_os_area_flash_ops *ops)
+{
+	mutex_lock(&os_area_flash_mutex);
+	os_area_flash_ops = ops;
+	mutex_unlock(&os_area_flash_mutex);
+}
+EXPORT_SYMBOL_GPL(ps3_os_area_flash_register);
+
+static ssize_t os_area_flash_read(void *buf, size_t count, loff_t pos)
+{
+	ssize_t res = -ENODEV;
+
+	mutex_lock(&os_area_flash_mutex);
+	if (os_area_flash_ops)
+		res = os_area_flash_ops->read(buf, count, pos);
+	mutex_unlock(&os_area_flash_mutex);
+
+	return res;
+}
+
+static ssize_t os_area_flash_write(const void *buf, size_t count, loff_t pos)
+{
+	ssize_t res = -ENODEV;
+
+	mutex_lock(&os_area_flash_mutex);
+	if (os_area_flash_ops)
+		res = os_area_flash_ops->write(buf, count, pos);
+	mutex_unlock(&os_area_flash_mutex);
+
+	return res;
+}
+
+
+/**
+ * os_area_set_property - Add or overwrite a saved_params value to the device tree.
+ *
+ * Overwrites an existing property.
+ */
+
+static void os_area_set_property(struct device_node *node,
+	struct property *prop)
+{
+	int result;
+	struct property *tmp = of_find_property(node, prop->name, NULL);
+
+	if (tmp) {
+		pr_debug("%s:%d found %s\n", __func__, __LINE__, prop->name);
+		of_remove_property(node, tmp);
+	}
+
+	result = of_add_property(node, prop);
+
+	if (result)
+		pr_debug("%s:%d of_set_property failed\n", __func__,
+			__LINE__);
+}
+
+/**
+ * os_area_get_property - Get a saved_params value from the device tree.
+ *
+ */
+
+static void __init os_area_get_property(struct device_node *node,
+	struct property *prop)
+{
+	const struct property *tmp = of_find_property(node, prop->name, NULL);
+
+	if (tmp) {
+		BUG_ON(prop->length != tmp->length);
+		memcpy(prop->value, tmp->value, prop->length);
+	} else
+		pr_debug("%s:%d not found %s\n", __func__, __LINE__,
+			prop->name);
+}
+
+static void dump_field(char *s, const u8 *field, int size_of_field)
+{
+#if defined(DEBUG)
+	int i;
+
+	for (i = 0; i < size_of_field; i++)
+		s[i] = isprint(field[i]) ? field[i] : '.';
+	s[i] = 0;
+#endif
+}
+
+#define dump_header(_a) _dump_header(_a, __func__, __LINE__)
+static void _dump_header(const struct os_area_header *h, const char *func,
+	int line)
+{
+	char str[sizeof(h->magic_num) + 1];
+
+	dump_field(str, h->magic_num, sizeof(h->magic_num));
+	pr_debug("%s:%d: h.magic_num:       '%s'\n", func, line,
+		str);
+	pr_debug("%s:%d: h.hdr_version:     %u\n", func, line,
+		h->hdr_version);
+	pr_debug("%s:%d: h.db_area_offset:  %u\n", func, line,
+		h->db_area_offset);
+	pr_debug("%s:%d: h.ldr_area_offset: %u\n", func, line,
+		h->ldr_area_offset);
+	pr_debug("%s:%d: h.ldr_format:      %u\n", func, line,
+		h->ldr_format);
+	pr_debug("%s:%d: h.ldr_size:        %xh\n", func, line,
+		h->ldr_size);
+}
+
+#define dump_params(_a) _dump_params(_a, __func__, __LINE__)
+static void _dump_params(const struct os_area_params *p, const char *func,
+	int line)
+{
+	pr_debug("%s:%d: p.boot_flag:       %u\n", func, line, p->boot_flag);
+	pr_debug("%s:%d: p.num_params:      %u\n", func, line, p->num_params);
+	pr_debug("%s:%d: p.rtc_diff         %lld\n", func, line, p->rtc_diff);
+	pr_debug("%s:%d: p.av_multi_out     %u\n", func, line, p->av_multi_out);
+	pr_debug("%s:%d: p.ctrl_button:     %u\n", func, line, p->ctrl_button);
+	pr_debug("%s:%d: p.static_ip_addr:  %u.%u.%u.%u\n", func, line,
+		p->static_ip_addr[0], p->static_ip_addr[1],
+		p->static_ip_addr[2], p->static_ip_addr[3]);
+	pr_debug("%s:%d: p.network_mask:    %u.%u.%u.%u\n", func, line,
+		p->network_mask[0], p->network_mask[1],
+		p->network_mask[2], p->network_mask[3]);
+	pr_debug("%s:%d: p.default_gateway: %u.%u.%u.%u\n", func, line,
+		p->default_gateway[0], p->default_gateway[1],
+		p->default_gateway[2], p->default_gateway[3]);
+	pr_debug("%s:%d: p.dns_primary:     %u.%u.%u.%u\n", func, line,
+		p->dns_primary[0], p->dns_primary[1],
+		p->dns_primary[2], p->dns_primary[3]);
+	pr_debug("%s:%d: p.dns_secondary:   %u.%u.%u.%u\n", func, line,
+		p->dns_secondary[0], p->dns_secondary[1],
+		p->dns_secondary[2], p->dns_secondary[3]);
+}
+
+static int verify_header(const struct os_area_header *header)
+{
+	if (memcmp(header->magic_num, OS_AREA_HEADER_MAGIC_NUM,
+		sizeof(header->magic_num))) {
+		pr_debug("%s:%d magic_num failed\n", __func__, __LINE__);
+		return -1;
+	}
+
+	if (header->hdr_version < 1) {
+		pr_debug("%s:%d hdr_version failed\n", __func__, __LINE__);
+		return -1;
+	}
+
+	if (header->db_area_offset > header->ldr_area_offset) {
+		pr_debug("%s:%d offsets failed\n", __func__, __LINE__);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int db_verify(const struct os_area_db *db)
+{
+	if (memcmp(db->magic_num, OS_AREA_DB_MAGIC_NUM,
+		sizeof(db->magic_num))) {
+		pr_debug("%s:%d magic_num failed\n", __func__, __LINE__);
+		return -EINVAL;
+	}
+
+	if (db->version != 1) {
+		pr_debug("%s:%d version failed\n", __func__, __LINE__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+struct db_index {
+       uint8_t owner:5;
+       uint8_t key:3;
+};
+
+struct db_iterator {
+	const struct os_area_db *db;
+	struct os_area_db_id match_id;
+	struct db_index *idx;
+	struct db_index *last_idx;
+	union {
+		uint64_t *value_64;
+		uint32_t *value_32;
+		uint16_t *value_16;
+	};
+};
+
+static unsigned int db_align_up(unsigned int val, unsigned int size)
+{
+	return (val + (size - 1)) & (~(size - 1));
+}
+
+/**
+ * db_for_each_64 - Iterator for 64 bit entries.
+ *
+ * A NULL value for id can be used to match all entries.
+ * OS_AREA_DB_OWNER_ANY and OS_AREA_DB_KEY_ANY can be used to match all.
+ */
+
+static int db_for_each_64(const struct os_area_db *db,
+	const struct os_area_db_id *match_id, struct db_iterator *i)
+{
+next:
+	if (!i->db) {
+		i->db = db;
+		i->match_id = match_id ? *match_id : os_area_db_id_any;
+		i->idx = (void *)db + db->index_64;
+		i->last_idx = i->idx + db->count_64;
+		i->value_64 = (void *)db + db->index_64
+			+ db_align_up(db->count_64, 8);
+	} else {
+		i->idx++;
+		i->value_64++;
+	}
+
+	if (i->idx >= i->last_idx) {
+		pr_debug("%s:%d: reached end\n", __func__, __LINE__);
+		return 0;
+	}
+
+	if (i->match_id.owner != OS_AREA_DB_OWNER_ANY
+		&& i->match_id.owner != (int)i->idx->owner)
+		goto next;
+	if (i->match_id.key != OS_AREA_DB_KEY_ANY
+		&& i->match_id.key != (int)i->idx->key)
+		goto next;
+
+	return 1;
+}
+
+static int db_delete_64(struct os_area_db *db, const struct os_area_db_id *id)
+{
+	struct db_iterator i;
+
+	for (i.db = NULL; db_for_each_64(db, id, &i); ) {
+
+		pr_debug("%s:%d: got (%d:%d) %llxh\n", __func__, __LINE__,
+			i.idx->owner, i.idx->key,
+			(unsigned long long)*i.value_64);
+
+		i.idx->owner = 0;
+		i.idx->key = 0;
+		*i.value_64 = 0;
+	}
+	return 0;
+}
+
+static int db_set_64(struct os_area_db *db, const struct os_area_db_id *id,
+	uint64_t value)
+{
+	struct db_iterator i;
+
+	pr_debug("%s:%d: (%d:%d) <= %llxh\n", __func__, __LINE__,
+		id->owner, id->key, (unsigned long long)value);
+
+	if (!id->owner || id->owner == OS_AREA_DB_OWNER_ANY
+		|| id->key == OS_AREA_DB_KEY_ANY) {
+		pr_debug("%s:%d: bad id: (%d:%d)\n", __func__,
+			__LINE__, id->owner, id->key);
+		return -1;
+	}
+
+	db_delete_64(db, id);
+
+	i.db = NULL;
+	if (db_for_each_64(db, &os_area_db_id_empty, &i)) {
+
+		pr_debug("%s:%d: got (%d:%d) %llxh\n", __func__, __LINE__,
+			i.idx->owner, i.idx->key,
+			(unsigned long long)*i.value_64);
+
+		i.idx->owner = id->owner;
+		i.idx->key = id->key;
+		*i.value_64 = value;
+
+		pr_debug("%s:%d: set (%d:%d) <= %llxh\n", __func__, __LINE__,
+			i.idx->owner, i.idx->key,
+			(unsigned long long)*i.value_64);
+		return 0;
+	}
+	pr_debug("%s:%d: database full.\n",
+		__func__, __LINE__);
+	return -1;
+}
+
+static int __init db_get_64(const struct os_area_db *db,
+	const struct os_area_db_id *id, uint64_t *value)
+{
+	struct db_iterator i;
+
+	i.db = NULL;
+	if (db_for_each_64(db, id, &i)) {
+		*value = *i.value_64;
+		pr_debug("%s:%d: found %lld\n", __func__, __LINE__,
+				(long long int)*i.value_64);
+		return 0;
+	}
+	pr_debug("%s:%d: not found\n", __func__, __LINE__);
+	return -1;
+}
+
+static int __init db_get_rtc_diff(const struct os_area_db *db, int64_t *rtc_diff)
+{
+	return db_get_64(db, &os_area_db_id_rtc_diff, (uint64_t*)rtc_diff);
+}
+
+#define dump_db(a) _dump_db(a, __func__, __LINE__)
+static void _dump_db(const struct os_area_db *db, const char *func,
+	int line)
+{
+	char str[sizeof(db->magic_num) + 1];
+
+	dump_field(str, db->magic_num, sizeof(db->magic_num));
+	pr_debug("%s:%d: db.magic_num:      '%s'\n", func, line,
+		str);
+	pr_debug("%s:%d: db.version:         %u\n", func, line,
+		db->version);
+	pr_debug("%s:%d: db.index_64:        %u\n", func, line,
+		db->index_64);
+	pr_debug("%s:%d: db.count_64:        %u\n", func, line,
+		db->count_64);
+	pr_debug("%s:%d: db.index_32:        %u\n", func, line,
+		db->index_32);
+	pr_debug("%s:%d: db.count_32:        %u\n", func, line,
+		db->count_32);
+	pr_debug("%s:%d: db.index_16:        %u\n", func, line,
+		db->index_16);
+	pr_debug("%s:%d: db.count_16:        %u\n", func, line,
+		db->count_16);
+}
+
+static void os_area_db_init(struct os_area_db *db)
+{
+	enum {
+		HEADER_SIZE = offsetof(struct os_area_db, _db_data),
+		INDEX_64_COUNT = 64,
+		VALUES_64_COUNT = 57,
+		INDEX_32_COUNT = 64,
+		VALUES_32_COUNT = 57,
+		INDEX_16_COUNT = 64,
+		VALUES_16_COUNT = 57,
+	};
+
+	memset(db, 0, sizeof(struct os_area_db));
+
+	memcpy(db->magic_num, OS_AREA_DB_MAGIC_NUM, sizeof(db->magic_num));
+	db->version = 1;
+	db->index_64 = HEADER_SIZE;
+	db->count_64 = VALUES_64_COUNT;
+	db->index_32 = HEADER_SIZE
+			+ INDEX_64_COUNT * sizeof(struct db_index)
+			+ VALUES_64_COUNT * sizeof(u64);
+	db->count_32 = VALUES_32_COUNT;
+	db->index_16 = HEADER_SIZE
+			+ INDEX_64_COUNT * sizeof(struct db_index)
+			+ VALUES_64_COUNT * sizeof(u64)
+			+ INDEX_32_COUNT * sizeof(struct db_index)
+			+ VALUES_32_COUNT * sizeof(u32);
+	db->count_16 = VALUES_16_COUNT;
+
+	/* Rules to check db layout. */
+
+	BUILD_BUG_ON(sizeof(struct db_index) != 1);
+	BUILD_BUG_ON(sizeof(struct os_area_db) != 2 * OS_AREA_SEGMENT_SIZE);
+	BUILD_BUG_ON(INDEX_64_COUNT & 0x7);
+	BUILD_BUG_ON(VALUES_64_COUNT > INDEX_64_COUNT);
+	BUILD_BUG_ON(INDEX_32_COUNT & 0x7);
+	BUILD_BUG_ON(VALUES_32_COUNT > INDEX_32_COUNT);
+	BUILD_BUG_ON(INDEX_16_COUNT & 0x7);
+	BUILD_BUG_ON(VALUES_16_COUNT > INDEX_16_COUNT);
+	BUILD_BUG_ON(HEADER_SIZE
+			+ INDEX_64_COUNT * sizeof(struct db_index)
+			+ VALUES_64_COUNT * sizeof(u64)
+			+ INDEX_32_COUNT * sizeof(struct db_index)
+			+ VALUES_32_COUNT * sizeof(u32)
+			+ INDEX_16_COUNT * sizeof(struct db_index)
+			+ VALUES_16_COUNT * sizeof(u16)
+			> sizeof(struct os_area_db));
+}
+
+/**
+ * update_flash_db - Helper for os_area_queue_work_handler.
+ *
+ */
+
+static int update_flash_db(void)
+{
+	const unsigned int buf_len = 8 * OS_AREA_SEGMENT_SIZE;
+	struct os_area_header *header;
+	ssize_t count;
+	int error;
+	loff_t pos;
+	struct os_area_db* db;
+
+	/* Read in header and db from flash. */
+
+	header = kmalloc(buf_len, GFP_KERNEL);
+	if (!header)
+		return -ENOMEM;
+
+	count = os_area_flash_read(header, buf_len, 0);
+	if (count < 0) {
+		pr_debug("%s: os_area_flash_read failed %zd\n", __func__,
+			 count);
+		error = count;
+		goto fail;
+	}
+
+	pos = header->db_area_offset * OS_AREA_SEGMENT_SIZE;
+	if (count < OS_AREA_SEGMENT_SIZE || verify_header(header) ||
+	    count < pos) {
+		pr_debug("%s: verify_header failed\n", __func__);
+		dump_header(header);
+		error = -EINVAL;
+		goto fail;
+	}
+
+	/* Now got a good db offset and some maybe good db data. */
+
+	db = (void *)header + pos;
+
+	error = db_verify(db);
+	if (error) {
+		pr_notice("%s: Verify of flash database failed, formatting.\n",
+			  __func__);
+		dump_db(db);
+		os_area_db_init(db);
+	}
+
+	/* Now got good db data. */
+
+	db_set_64(db, &os_area_db_id_rtc_diff, saved_params.rtc_diff);
+
+	count = os_area_flash_write(db, sizeof(struct os_area_db), pos);
+	if (count < 0 || count < sizeof(struct os_area_db)) {
+		pr_debug("%s: os_area_flash_write failed %zd\n", __func__,
+			 count);
+		error = count < 0 ? count : -EIO;
+	}
+
+fail:
+	kfree(header);
+	return error;
+}
+
+/**
+ * os_area_queue_work_handler - Asynchronous write handler.
+ *
+ * An asynchronous write for flash memory and the device tree.  Do not
+ * call directly, use os_area_queue_work().
+ */
+
+static void os_area_queue_work_handler(struct work_struct *work)
+{
+	struct device_node *node;
+	int error;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	node = of_find_node_by_path("/");
+	if (node) {
+		os_area_set_property(node, &property_rtc_diff);
+		of_node_put(node);
+	} else
+		pr_debug("%s:%d of_find_node_by_path failed\n",
+			__func__, __LINE__);
+
+	error = update_flash_db();
+	if (error)
+		pr_warn("%s: Could not update FLASH ROM\n", __func__);
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+}
+
+static void os_area_queue_work(void)
+{
+	static DECLARE_WORK(q, os_area_queue_work_handler);
+
+	wmb();
+	schedule_work(&q);
+}
+
+/**
+ * ps3_os_area_save_params - Copy data from os area mirror to @saved_params.
+ *
+ * For the convenience of the guest the HV makes a copy of the os area in
+ * flash to a high address in the boot memory region and then puts that RAM
+ * address and the byte count into the repository for retrieval by the guest.
+ * We copy the data we want into a static variable and allow the memory setup
+ * by the HV to be claimed by the memblock manager.
+ *
+ * The os area mirror will not be available to a second stage kernel, and
+ * the header verify will fail.  In this case, the saved_params values will
+ * be set from flash memory or the passed in device tree in ps3_os_area_init().
+ */
+
+void __init ps3_os_area_save_params(void)
+{
+	int result;
+	u64 lpar_addr;
+	unsigned int size;
+	struct os_area_header *header;
+	struct os_area_params *params;
+	struct os_area_db *db;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	result = ps3_repository_read_boot_dat_info(&lpar_addr, &size);
+
+	if (result) {
+		pr_debug("%s:%d ps3_repository_read_boot_dat_info failed\n",
+			__func__, __LINE__);
+		return;
+	}
+
+	header = (struct os_area_header *)__va(lpar_addr);
+	params = (struct os_area_params *)__va(lpar_addr
+		+ OS_AREA_SEGMENT_SIZE);
+
+	result = verify_header(header);
+
+	if (result) {
+		/* Second stage kernels exit here. */
+		pr_debug("%s:%d verify_header failed\n", __func__, __LINE__);
+		dump_header(header);
+		return;
+	}
+
+	db = (struct os_area_db *)__va(lpar_addr
+		+ header->db_area_offset * OS_AREA_SEGMENT_SIZE);
+
+	dump_header(header);
+	dump_params(params);
+	dump_db(db);
+
+	result = db_verify(db) || db_get_rtc_diff(db, &saved_params.rtc_diff);
+	if (result)
+		saved_params.rtc_diff = params->rtc_diff ? params->rtc_diff
+			: SECONDS_FROM_1970_TO_2000;
+	saved_params.av_multi_out = params->av_multi_out;
+	saved_params.valid = 1;
+
+	memset(header, 0, sizeof(*header));
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+}
+
+/**
+ * ps3_os_area_init - Setup os area device tree properties as needed.
+ */
+
+void __init ps3_os_area_init(void)
+{
+	struct device_node *node;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	node = of_find_node_by_path("/");
+
+	if (!saved_params.valid && node) {
+		/* Second stage kernels should have a dt entry. */
+		os_area_get_property(node, &property_rtc_diff);
+		os_area_get_property(node, &property_av_multi_out);
+	}
+
+	if(!saved_params.rtc_diff)
+		saved_params.rtc_diff = SECONDS_FROM_1970_TO_2000;
+
+	if (node) {
+		os_area_set_property(node, &property_rtc_diff);
+		os_area_set_property(node, &property_av_multi_out);
+		of_node_put(node);
+	} else
+		pr_debug("%s:%d of_find_node_by_path failed\n",
+			__func__, __LINE__);
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+}
+
+/**
+ * ps3_os_area_get_rtc_diff - Returns the rtc diff value.
+ */
+
+u64 ps3_os_area_get_rtc_diff(void)
+{
+	return saved_params.rtc_diff;
+}
+EXPORT_SYMBOL_GPL(ps3_os_area_get_rtc_diff);
+
+/**
+ * ps3_os_area_set_rtc_diff - Set the rtc diff value.
+ *
+ * An asynchronous write is needed to support writing updates from
+ * the timer interrupt context.
+ */
+
+void ps3_os_area_set_rtc_diff(u64 rtc_diff)
+{
+	if (saved_params.rtc_diff != rtc_diff) {
+		saved_params.rtc_diff = rtc_diff;
+		os_area_queue_work();
+	}
+}
+EXPORT_SYMBOL_GPL(ps3_os_area_set_rtc_diff);
+
+/**
+ * ps3_os_area_get_av_multi_out - Returns the default video mode.
+ */
+
+enum ps3_param_av_multi_out ps3_os_area_get_av_multi_out(void)
+{
+    return saved_params.av_multi_out;
+}
+EXPORT_SYMBOL_GPL(ps3_os_area_get_av_multi_out);
diff --git a/arch/powerpc/platforms/ps3/platform.h b/arch/powerpc/platforms/ps3/platform.h
new file mode 100644
index 0000000000..6beecdb0d5
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/platform.h
@@ -0,0 +1,253 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  PS3 platform declarations.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ */
+
+#if !defined(_PS3_PLATFORM_H)
+#define _PS3_PLATFORM_H
+
+#include <linux/rtc.h>
+#include <scsi/scsi.h>
+
+#include <asm/ps3.h>
+
+/* htab */
+
+void __init ps3_hpte_init(unsigned long htab_size);
+void __init ps3_map_htab(void);
+
+/* mm */
+
+void __init ps3_mm_init(void);
+void __init ps3_mm_vas_create(unsigned long* htab_size);
+void ps3_mm_vas_destroy(void);
+void ps3_mm_shutdown(void);
+
+/* irq */
+
+void ps3_init_IRQ(void);
+void ps3_shutdown_IRQ(int cpu);
+void __init ps3_register_ipi_debug_brk(unsigned int cpu, unsigned int virq);
+void __init ps3_register_ipi_irq(unsigned int cpu, unsigned int virq);
+
+/* smp */
+
+void __init smp_init_ps3(void);
+#ifdef CONFIG_SMP
+void ps3_smp_cleanup_cpu(int cpu);
+#else
+static inline void ps3_smp_cleanup_cpu(int cpu) { }
+#endif
+
+/* time */
+
+void __init ps3_calibrate_decr(void);
+time64_t __init ps3_get_boot_time(void);
+void ps3_get_rtc_time(struct rtc_time *time);
+int ps3_set_rtc_time(struct rtc_time *time);
+
+/* os area */
+
+void __init ps3_os_area_save_params(void);
+void __init ps3_os_area_init(void);
+
+/* spu */
+
+#if defined(CONFIG_SPU_BASE)
+void ps3_spu_set_platform (void);
+#else
+static inline void ps3_spu_set_platform (void) {}
+#endif
+
+/* repository bus info */
+
+enum ps3_bus_type {
+	PS3_BUS_TYPE_SB = 4,
+	PS3_BUS_TYPE_STORAGE = 5,
+};
+
+enum ps3_dev_type {
+	PS3_DEV_TYPE_STOR_DISK = TYPE_DISK,	/* 0 */
+	PS3_DEV_TYPE_SB_GELIC = 3,
+	PS3_DEV_TYPE_SB_USB = 4,
+	PS3_DEV_TYPE_STOR_ROM = TYPE_ROM,	/* 5 */
+	PS3_DEV_TYPE_SB_GPIO = 6,
+	PS3_DEV_TYPE_STOR_FLASH = TYPE_RBC,	/* 14 */
+};
+
+int ps3_repository_read_bus_str(unsigned int bus_index, const char *bus_str,
+	u64 *value);
+int ps3_repository_read_bus_id(unsigned int bus_index, u64 *bus_id);
+int ps3_repository_read_bus_type(unsigned int bus_index,
+	enum ps3_bus_type *bus_type);
+int ps3_repository_read_bus_num_dev(unsigned int bus_index,
+	unsigned int *num_dev);
+
+/* repository bus device info */
+
+enum ps3_interrupt_type {
+	PS3_INTERRUPT_TYPE_EVENT_PORT = 2,
+	PS3_INTERRUPT_TYPE_SB_OHCI = 3,
+	PS3_INTERRUPT_TYPE_SB_EHCI = 4,
+	PS3_INTERRUPT_TYPE_OTHER = 5,
+};
+
+enum ps3_reg_type {
+	PS3_REG_TYPE_SB_OHCI = 3,
+	PS3_REG_TYPE_SB_EHCI = 4,
+	PS3_REG_TYPE_SB_GPIO = 5,
+};
+
+int ps3_repository_read_dev_str(unsigned int bus_index,
+	unsigned int dev_index, const char *dev_str, u64 *value);
+int ps3_repository_read_dev_id(unsigned int bus_index, unsigned int dev_index,
+	u64 *dev_id);
+int ps3_repository_read_dev_type(unsigned int bus_index,
+	unsigned int dev_index, enum ps3_dev_type *dev_type);
+int ps3_repository_read_dev_intr(unsigned int bus_index,
+	unsigned int dev_index, unsigned int intr_index,
+	enum ps3_interrupt_type *intr_type, unsigned int *interrupt_id);
+int ps3_repository_read_dev_reg_type(unsigned int bus_index,
+	unsigned int dev_index, unsigned int reg_index,
+	enum ps3_reg_type *reg_type);
+int ps3_repository_read_dev_reg_addr(unsigned int bus_index,
+	unsigned int dev_index, unsigned int reg_index, u64 *bus_addr,
+	u64 *len);
+int ps3_repository_read_dev_reg(unsigned int bus_index,
+	unsigned int dev_index, unsigned int reg_index,
+	enum ps3_reg_type *reg_type, u64 *bus_addr, u64 *len);
+
+/* repository bus enumerators */
+
+struct ps3_repository_device {
+	unsigned int bus_index;
+	unsigned int dev_index;
+	enum ps3_bus_type bus_type;
+	enum ps3_dev_type dev_type;
+	u64 bus_id;
+	u64 dev_id;
+};
+
+int ps3_repository_find_device(struct ps3_repository_device *repo);
+int ps3_repository_find_device_by_id(struct ps3_repository_device *repo,
+				     u64 bus_id, u64 dev_id);
+int __init ps3_repository_find_devices(enum ps3_bus_type bus_type,
+	int (*callback)(const struct ps3_repository_device *repo));
+int __init ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from,
+	unsigned int *bus_index);
+int ps3_repository_find_interrupt(const struct ps3_repository_device *repo,
+	enum ps3_interrupt_type intr_type, unsigned int *interrupt_id);
+int ps3_repository_find_reg(const struct ps3_repository_device *repo,
+	enum ps3_reg_type reg_type, u64 *bus_addr, u64 *len);
+
+/* repository block device info */
+
+int ps3_repository_read_stor_dev_port(unsigned int bus_index,
+	unsigned int dev_index, u64 *port);
+int ps3_repository_read_stor_dev_blk_size(unsigned int bus_index,
+	unsigned int dev_index, u64 *blk_size);
+int ps3_repository_read_stor_dev_num_blocks(unsigned int bus_index,
+	unsigned int dev_index, u64 *num_blocks);
+int ps3_repository_read_stor_dev_num_regions(unsigned int bus_index,
+	unsigned int dev_index, unsigned int *num_regions);
+int ps3_repository_read_stor_dev_region_id(unsigned int bus_index,
+	unsigned int dev_index, unsigned int region_index,
+	unsigned int *region_id);
+int ps3_repository_read_stor_dev_region_size(unsigned int bus_index,
+	unsigned int dev_index,	unsigned int region_index, u64 *region_size);
+int ps3_repository_read_stor_dev_region_start(unsigned int bus_index,
+	unsigned int dev_index, unsigned int region_index, u64 *region_start);
+int ps3_repository_read_stor_dev_info(unsigned int bus_index,
+	unsigned int dev_index, u64 *port, u64 *blk_size,
+	u64 *num_blocks, unsigned int *num_regions);
+int ps3_repository_read_stor_dev_region(unsigned int bus_index,
+	unsigned int dev_index, unsigned int region_index,
+	unsigned int *region_id, u64 *region_start, u64 *region_size);
+
+/* repository logical pu and memory info */
+
+int ps3_repository_read_num_pu(u64 *num_pu);
+int ps3_repository_read_pu_id(unsigned int pu_index, u64 *pu_id);
+int ps3_repository_read_rm_base(unsigned int ppe_id, u64 *rm_base);
+int ps3_repository_read_rm_size(unsigned int ppe_id, u64 *rm_size);
+int ps3_repository_read_region_total(u64 *region_total);
+int ps3_repository_read_mm_info(u64 *rm_base, u64 *rm_size,
+	u64 *region_total);
+int ps3_repository_read_highmem_region_count(unsigned int *region_count);
+int ps3_repository_read_highmem_base(unsigned int region_index,
+	u64 *highmem_base);
+int ps3_repository_read_highmem_size(unsigned int region_index,
+	u64 *highmem_size);
+int ps3_repository_read_highmem_info(unsigned int region_index,
+	u64 *highmem_base, u64 *highmem_size);
+
+#if defined (CONFIG_PS3_REPOSITORY_WRITE)
+int ps3_repository_write_highmem_region_count(unsigned int region_count);
+int ps3_repository_write_highmem_base(unsigned int region_index,
+	u64 highmem_base);
+int ps3_repository_write_highmem_size(unsigned int region_index,
+	u64 highmem_size);
+int ps3_repository_write_highmem_info(unsigned int region_index,
+	u64 highmem_base, u64 highmem_size);
+int ps3_repository_delete_highmem_info(unsigned int region_index);
+#else
+static inline int ps3_repository_write_highmem_region_count(
+	unsigned int region_count) {return 0;}
+static inline int ps3_repository_write_highmem_base(unsigned int region_index,
+	u64 highmem_base) {return 0;}
+static inline int ps3_repository_write_highmem_size(unsigned int region_index,
+	u64 highmem_size) {return 0;}
+static inline int ps3_repository_write_highmem_info(unsigned int region_index,
+	u64 highmem_base, u64 highmem_size) {return 0;}
+static inline int ps3_repository_delete_highmem_info(unsigned int region_index)
+	{return 0;}
+#endif
+
+/* repository pme info */
+
+int ps3_repository_read_num_be(unsigned int *num_be);
+int ps3_repository_read_be_node_id(unsigned int be_index, u64 *node_id);
+int ps3_repository_read_be_id(u64 node_id, u64 *be_id);
+int __init ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq);
+int __init ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq);
+
+/* repository performance monitor info */
+
+int ps3_repository_read_lpm_privileges(unsigned int be_index, u64 *lpar,
+	u64 *rights);
+
+/* repository 'Other OS' area */
+
+int ps3_repository_read_boot_dat_addr(u64 *lpar_addr);
+int ps3_repository_read_boot_dat_size(unsigned int *size);
+int ps3_repository_read_boot_dat_info(u64 *lpar_addr, unsigned int *size);
+
+/* repository spu info */
+
+/**
+ * enum spu_resource_type - Type of spu resource.
+ * @spu_resource_type_shared: Logical spu is shared with other partions.
+ * @spu_resource_type_exclusive: Logical spu is not shared with other partions.
+ *
+ * Returned by ps3_repository_read_spu_resource_id().
+ */
+
+enum ps3_spu_resource_type {
+	PS3_SPU_RESOURCE_TYPE_SHARED = 0,
+	PS3_SPU_RESOURCE_TYPE_EXCLUSIVE = 0x8000000000000000UL,
+};
+
+int ps3_repository_read_num_spu_reserved(unsigned int *num_spu_reserved);
+int ps3_repository_read_num_spu_resource_id(unsigned int *num_resource_id);
+int ps3_repository_read_spu_resource_id(unsigned int res_index,
+	enum ps3_spu_resource_type* resource_type, unsigned int *resource_id);
+
+/* repository vuart info */
+
+int __init ps3_repository_read_vuart_av_port(unsigned int *port);
+int __init ps3_repository_read_vuart_sysmgr_port(unsigned int *port);
+
+#endif
diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c
new file mode 100644
index 0000000000..1abe33fbe5
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/repository.c
@@ -0,0 +1,1380 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  PS3 repository routines.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ */
+
+#include <asm/lv1call.h>
+
+#include "platform.h"
+
+enum ps3_vendor_id {
+	PS3_VENDOR_ID_NONE = 0,
+	PS3_VENDOR_ID_SONY = 0x8000000000000000UL,
+};
+
+enum ps3_lpar_id {
+	PS3_LPAR_ID_CURRENT = 0,
+	PS3_LPAR_ID_PME = 1,
+};
+
+#define dump_field(_a, _b) _dump_field(_a, _b, __func__, __LINE__)
+static void _dump_field(const char *hdr, u64 n, const char *func, int line)
+{
+#if defined(DEBUG)
+	char s[16];
+	const char *const in = (const char *)&n;
+	unsigned int i;
+
+	for (i = 0; i < 8; i++)
+		s[i] = (in[i] <= 126 && in[i] >= 32) ? in[i] : '.';
+	s[i] = 0;
+
+	pr_devel("%s:%d: %s%016llx : %s\n", func, line, hdr, n, s);
+#endif
+}
+
+#define dump_node_name(_a, _b, _c, _d, _e) \
+	_dump_node_name(_a, _b, _c, _d, _e, __func__, __LINE__)
+static void _dump_node_name(unsigned int lpar_id, u64 n1, u64 n2, u64 n3,
+	u64 n4, const char *func, int line)
+{
+	pr_devel("%s:%d: lpar: %u\n", func, line, lpar_id);
+	_dump_field("n1: ", n1, func, line);
+	_dump_field("n2: ", n2, func, line);
+	_dump_field("n3: ", n3, func, line);
+	_dump_field("n4: ", n4, func, line);
+}
+
+#define dump_node(_a, _b, _c, _d, _e, _f, _g) \
+	_dump_node(_a, _b, _c, _d, _e, _f, _g, __func__, __LINE__)
+static void _dump_node(unsigned int lpar_id, u64 n1, u64 n2, u64 n3, u64 n4,
+	u64 v1, u64 v2, const char *func, int line)
+{
+	pr_devel("%s:%d: lpar: %u\n", func, line, lpar_id);
+	_dump_field("n1: ", n1, func, line);
+	_dump_field("n2: ", n2, func, line);
+	_dump_field("n3: ", n3, func, line);
+	_dump_field("n4: ", n4, func, line);
+	pr_devel("%s:%d: v1: %016llx\n", func, line, v1);
+	pr_devel("%s:%d: v2: %016llx\n", func, line, v2);
+}
+
+/**
+ * make_first_field - Make the first field of a repository node name.
+ * @text: Text portion of the field.
+ * @index: Numeric index portion of the field.  Use zero for 'don't care'.
+ *
+ * This routine sets the vendor id to zero (non-vendor specific).
+ * Returns field value.
+ */
+
+static u64 make_first_field(const char *text, u64 index)
+{
+	u64 n = 0;
+
+	memcpy((char *)&n, text, strnlen(text, sizeof(n)));
+	return PS3_VENDOR_ID_NONE + (n >> 32) + index;
+}
+
+/**
+ * make_field - Make subsequent fields of a repository node name.
+ * @text: Text portion of the field.  Use "" for 'don't care'.
+ * @index: Numeric index portion of the field.  Use zero for 'don't care'.
+ *
+ * Returns field value.
+ */
+
+static u64 make_field(const char *text, u64 index)
+{
+	u64 n = 0;
+
+	memcpy((char *)&n, text, strnlen(text, sizeof(n)));
+	return n + index;
+}
+
+/**
+ * read_node - Read a repository node from raw fields.
+ * @n1: First field of node name.
+ * @n2: Second field of node name.  Use zero for 'don't care'.
+ * @n3: Third field of node name.  Use zero for 'don't care'.
+ * @n4: Fourth field of node name.  Use zero for 'don't care'.
+ * @v1: First repository value (high word).
+ * @v2: Second repository value (low word).  Optional parameter, use zero
+ *      for 'don't care'.
+ */
+
+static int read_node(unsigned int lpar_id, u64 n1, u64 n2, u64 n3, u64 n4,
+	u64 *_v1, u64 *_v2)
+{
+	int result;
+	u64 v1;
+	u64 v2;
+
+	if (lpar_id == PS3_LPAR_ID_CURRENT) {
+		u64 id;
+		lv1_get_logical_partition_id(&id);
+		lpar_id = id;
+	}
+
+	result = lv1_read_repository_node(lpar_id, n1, n2, n3, n4, &v1,
+		&v2);
+
+	if (result) {
+		pr_warn("%s:%d: lv1_read_repository_node failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		dump_node_name(lpar_id, n1, n2, n3, n4);
+		return -ENOENT;
+	}
+
+	dump_node(lpar_id, n1, n2, n3, n4, v1, v2);
+
+	if (_v1)
+		*_v1 = v1;
+	if (_v2)
+		*_v2 = v2;
+
+	if (v1 && !_v1)
+		pr_devel("%s:%d: warning: discarding non-zero v1: %016llx\n",
+			__func__, __LINE__, v1);
+	if (v2 && !_v2)
+		pr_devel("%s:%d: warning: discarding non-zero v2: %016llx\n",
+			__func__, __LINE__, v2);
+
+	return 0;
+}
+
+int ps3_repository_read_bus_str(unsigned int bus_index, const char *bus_str,
+	u64 *value)
+{
+	return read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field(bus_str, 0),
+		0, 0,
+		value, NULL);
+}
+
+int ps3_repository_read_bus_id(unsigned int bus_index, u64 *bus_id)
+{
+	return read_node(PS3_LPAR_ID_PME, make_first_field("bus", bus_index),
+			 make_field("id", 0), 0, 0, bus_id, NULL);
+}
+
+int ps3_repository_read_bus_type(unsigned int bus_index,
+	enum ps3_bus_type *bus_type)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("type", 0),
+		0, 0,
+		&v1, NULL);
+	*bus_type = v1;
+	return result;
+}
+
+int ps3_repository_read_bus_num_dev(unsigned int bus_index,
+	unsigned int *num_dev)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("num_dev", 0),
+		0, 0,
+		&v1, NULL);
+	*num_dev = v1;
+	return result;
+}
+
+int ps3_repository_read_dev_str(unsigned int bus_index,
+	unsigned int dev_index, const char *dev_str, u64 *value)
+{
+	return read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field(dev_str, 0),
+		0,
+		value, NULL);
+}
+
+int ps3_repository_read_dev_id(unsigned int bus_index, unsigned int dev_index,
+	u64 *dev_id)
+{
+	return read_node(PS3_LPAR_ID_PME, make_first_field("bus", bus_index),
+			 make_field("dev", dev_index), make_field("id", 0), 0,
+			 dev_id, NULL);
+}
+
+int ps3_repository_read_dev_type(unsigned int bus_index,
+	unsigned int dev_index, enum ps3_dev_type *dev_type)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field("type", 0),
+		0,
+		&v1, NULL);
+	*dev_type = v1;
+	return result;
+}
+
+int ps3_repository_read_dev_intr(unsigned int bus_index,
+	unsigned int dev_index, unsigned int intr_index,
+	enum ps3_interrupt_type *intr_type, unsigned int *interrupt_id)
+{
+	int result;
+	u64 v1 = 0;
+	u64 v2 = 0;
+
+	result = read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field("intr", intr_index),
+		0,
+		&v1, &v2);
+	*intr_type = v1;
+	*interrupt_id = v2;
+	return result;
+}
+
+int ps3_repository_read_dev_reg_type(unsigned int bus_index,
+	unsigned int dev_index, unsigned int reg_index,
+	enum ps3_reg_type *reg_type)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field("reg", reg_index),
+		make_field("type", 0),
+		&v1, NULL);
+	*reg_type = v1;
+	return result;
+}
+
+int ps3_repository_read_dev_reg_addr(unsigned int bus_index,
+	unsigned int dev_index, unsigned int reg_index, u64 *bus_addr, u64 *len)
+{
+	return read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field("reg", reg_index),
+		make_field("data", 0),
+		bus_addr, len);
+}
+
+int ps3_repository_read_dev_reg(unsigned int bus_index,
+	unsigned int dev_index, unsigned int reg_index,
+	enum ps3_reg_type *reg_type, u64 *bus_addr, u64 *len)
+{
+	int result = ps3_repository_read_dev_reg_type(bus_index, dev_index,
+		reg_index, reg_type);
+	return result ? result
+		: ps3_repository_read_dev_reg_addr(bus_index, dev_index,
+		reg_index, bus_addr, len);
+}
+
+
+
+int ps3_repository_find_device(struct ps3_repository_device *repo)
+{
+	int result;
+	struct ps3_repository_device tmp = *repo;
+	unsigned int num_dev;
+
+	BUG_ON(repo->bus_index > 10);
+	BUG_ON(repo->dev_index > 10);
+
+	result = ps3_repository_read_bus_num_dev(tmp.bus_index, &num_dev);
+
+	if (result) {
+		pr_devel("%s:%d read_bus_num_dev failed\n", __func__, __LINE__);
+		return result;
+	}
+
+	pr_devel("%s:%d: bus_type %u, bus_index %u, bus_id %llu, num_dev %u\n",
+		__func__, __LINE__, tmp.bus_type, tmp.bus_index, tmp.bus_id,
+		num_dev);
+
+	if (tmp.dev_index >= num_dev) {
+		pr_devel("%s:%d: no device found\n", __func__, __LINE__);
+		return -ENODEV;
+	}
+
+	result = ps3_repository_read_dev_type(tmp.bus_index, tmp.dev_index,
+		&tmp.dev_type);
+
+	if (result) {
+		pr_devel("%s:%d read_dev_type failed\n", __func__, __LINE__);
+		return result;
+	}
+
+	result = ps3_repository_read_dev_id(tmp.bus_index, tmp.dev_index,
+		&tmp.dev_id);
+
+	if (result) {
+		pr_devel("%s:%d ps3_repository_read_dev_id failed\n", __func__,
+		__LINE__);
+		return result;
+	}
+
+	pr_devel("%s:%d: found: dev_type %u, dev_index %u, dev_id %llu\n",
+		__func__, __LINE__, tmp.dev_type, tmp.dev_index, tmp.dev_id);
+
+	*repo = tmp;
+	return 0;
+}
+
+int ps3_repository_find_device_by_id(struct ps3_repository_device *repo,
+				     u64 bus_id, u64 dev_id)
+{
+	int result = -ENODEV;
+	struct ps3_repository_device tmp;
+	unsigned int num_dev;
+
+	pr_devel(" -> %s:%u: find device by id %llu:%llu\n", __func__, __LINE__,
+		 bus_id, dev_id);
+
+	for (tmp.bus_index = 0; tmp.bus_index < 10; tmp.bus_index++) {
+		result = ps3_repository_read_bus_id(tmp.bus_index,
+						    &tmp.bus_id);
+		if (result) {
+			pr_devel("%s:%u read_bus_id(%u) failed\n", __func__,
+				 __LINE__, tmp.bus_index);
+			return result;
+		}
+
+		if (tmp.bus_id == bus_id)
+			goto found_bus;
+
+		pr_devel("%s:%u: skip, bus_id %llu\n", __func__, __LINE__,
+			 tmp.bus_id);
+	}
+	pr_devel(" <- %s:%u: bus not found\n", __func__, __LINE__);
+	return result;
+
+found_bus:
+	result = ps3_repository_read_bus_type(tmp.bus_index, &tmp.bus_type);
+	if (result) {
+		pr_devel("%s:%u read_bus_type(%u) failed\n", __func__,
+			 __LINE__, tmp.bus_index);
+		return result;
+	}
+
+	result = ps3_repository_read_bus_num_dev(tmp.bus_index, &num_dev);
+	if (result) {
+		pr_devel("%s:%u read_bus_num_dev failed\n", __func__,
+			 __LINE__);
+		return result;
+	}
+
+	for (tmp.dev_index = 0; tmp.dev_index < num_dev; tmp.dev_index++) {
+		result = ps3_repository_read_dev_id(tmp.bus_index,
+						    tmp.dev_index,
+						    &tmp.dev_id);
+		if (result) {
+			pr_devel("%s:%u read_dev_id(%u:%u) failed\n", __func__,
+				 __LINE__, tmp.bus_index, tmp.dev_index);
+			return result;
+		}
+
+		if (tmp.dev_id == dev_id)
+			goto found_dev;
+
+		pr_devel("%s:%u: skip, dev_id %llu\n", __func__, __LINE__,
+			 tmp.dev_id);
+	}
+	pr_devel(" <- %s:%u: dev not found\n", __func__, __LINE__);
+	return result;
+
+found_dev:
+	result = ps3_repository_read_dev_type(tmp.bus_index, tmp.dev_index,
+					      &tmp.dev_type);
+	if (result) {
+		pr_devel("%s:%u read_dev_type failed\n", __func__, __LINE__);
+		return result;
+	}
+
+	pr_devel(" <- %s:%u: found: type (%u:%u) index (%u:%u) id (%llu:%llu)\n",
+		 __func__, __LINE__, tmp.bus_type, tmp.dev_type, tmp.bus_index,
+		 tmp.dev_index, tmp.bus_id, tmp.dev_id);
+	*repo = tmp;
+	return 0;
+}
+
+int __init ps3_repository_find_devices(enum ps3_bus_type bus_type,
+	int (*callback)(const struct ps3_repository_device *repo))
+{
+	int result = 0;
+	struct ps3_repository_device repo;
+
+	pr_devel(" -> %s:%d: find bus_type %u\n", __func__, __LINE__, bus_type);
+
+	repo.bus_type = bus_type;
+	result = ps3_repository_find_bus(repo.bus_type, 0, &repo.bus_index);
+	if (result) {
+		pr_devel(" <- %s:%u: bus not found\n", __func__, __LINE__);
+		return result;
+	}
+
+	result = ps3_repository_read_bus_id(repo.bus_index, &repo.bus_id);
+	if (result) {
+		pr_devel("%s:%d read_bus_id(%u) failed\n", __func__, __LINE__,
+			 repo.bus_index);
+		return result;
+	}
+
+	for (repo.dev_index = 0; ; repo.dev_index++) {
+		result = ps3_repository_find_device(&repo);
+		if (result == -ENODEV) {
+			result = 0;
+			break;
+		} else if (result)
+			break;
+
+		result = callback(&repo);
+		if (result) {
+			pr_devel("%s:%d: abort at callback\n", __func__,
+				__LINE__);
+			break;
+		}
+	}
+
+	pr_devel(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+
+int __init ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from,
+	unsigned int *bus_index)
+{
+	unsigned int i;
+	enum ps3_bus_type type;
+	int error;
+
+	for (i = from; i < 10; i++) {
+		error = ps3_repository_read_bus_type(i, &type);
+		if (error) {
+			pr_devel("%s:%d read_bus_type failed\n",
+				__func__, __LINE__);
+			*bus_index = UINT_MAX;
+			return error;
+		}
+		if (type == bus_type) {
+			*bus_index = i;
+			return 0;
+		}
+	}
+	*bus_index = UINT_MAX;
+	return -ENODEV;
+}
+
+int ps3_repository_find_interrupt(const struct ps3_repository_device *repo,
+	enum ps3_interrupt_type intr_type, unsigned int *interrupt_id)
+{
+	int result = 0;
+	unsigned int res_index;
+
+	pr_devel("%s:%d: find intr_type %u\n", __func__, __LINE__, intr_type);
+
+	*interrupt_id = UINT_MAX;
+
+	for (res_index = 0; res_index < 10; res_index++) {
+		enum ps3_interrupt_type t;
+		unsigned int id;
+
+		result = ps3_repository_read_dev_intr(repo->bus_index,
+			repo->dev_index, res_index, &t, &id);
+
+		if (result) {
+			pr_devel("%s:%d read_dev_intr failed\n",
+				__func__, __LINE__);
+			return result;
+		}
+
+		if (t == intr_type) {
+			*interrupt_id = id;
+			break;
+		}
+	}
+
+	if (res_index == 10)
+		return -ENODEV;
+
+	pr_devel("%s:%d: found intr_type %u at res_index %u\n",
+		__func__, __LINE__, intr_type, res_index);
+
+	return result;
+}
+
+int ps3_repository_find_reg(const struct ps3_repository_device *repo,
+	enum ps3_reg_type reg_type, u64 *bus_addr, u64 *len)
+{
+	int result = 0;
+	unsigned int res_index;
+
+	pr_devel("%s:%d: find reg_type %u\n", __func__, __LINE__, reg_type);
+
+	*bus_addr = *len = 0;
+
+	for (res_index = 0; res_index < 10; res_index++) {
+		enum ps3_reg_type t;
+		u64 a;
+		u64 l;
+
+		result = ps3_repository_read_dev_reg(repo->bus_index,
+			repo->dev_index, res_index, &t, &a, &l);
+
+		if (result) {
+			pr_devel("%s:%d read_dev_reg failed\n",
+				__func__, __LINE__);
+			return result;
+		}
+
+		if (t == reg_type) {
+			*bus_addr = a;
+			*len = l;
+			break;
+		}
+	}
+
+	if (res_index == 10)
+		return -ENODEV;
+
+	pr_devel("%s:%d: found reg_type %u at res_index %u\n",
+		__func__, __LINE__, reg_type, res_index);
+
+	return result;
+}
+
+int ps3_repository_read_stor_dev_port(unsigned int bus_index,
+	unsigned int dev_index, u64 *port)
+{
+	return read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field("port", 0),
+		0, port, NULL);
+}
+
+int ps3_repository_read_stor_dev_blk_size(unsigned int bus_index,
+	unsigned int dev_index, u64 *blk_size)
+{
+	return read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field("blk_size", 0),
+		0, blk_size, NULL);
+}
+
+int ps3_repository_read_stor_dev_num_blocks(unsigned int bus_index,
+	unsigned int dev_index, u64 *num_blocks)
+{
+	return read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field("n_blocks", 0),
+		0, num_blocks, NULL);
+}
+
+int ps3_repository_read_stor_dev_num_regions(unsigned int bus_index,
+	unsigned int dev_index, unsigned int *num_regions)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field("n_regs", 0),
+		0, &v1, NULL);
+	*num_regions = v1;
+	return result;
+}
+
+int ps3_repository_read_stor_dev_region_id(unsigned int bus_index,
+	unsigned int dev_index, unsigned int region_index,
+	unsigned int *region_id)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_PME,
+	    make_first_field("bus", bus_index),
+	    make_field("dev", dev_index),
+	    make_field("region", region_index),
+	    make_field("id", 0),
+	    &v1, NULL);
+	*region_id = v1;
+	return result;
+}
+
+int ps3_repository_read_stor_dev_region_size(unsigned int bus_index,
+	unsigned int dev_index,	unsigned int region_index, u64 *region_size)
+{
+	return read_node(PS3_LPAR_ID_PME,
+	    make_first_field("bus", bus_index),
+	    make_field("dev", dev_index),
+	    make_field("region", region_index),
+	    make_field("size", 0),
+	    region_size, NULL);
+}
+
+int ps3_repository_read_stor_dev_region_start(unsigned int bus_index,
+	unsigned int dev_index, unsigned int region_index, u64 *region_start)
+{
+	return read_node(PS3_LPAR_ID_PME,
+	    make_first_field("bus", bus_index),
+	    make_field("dev", dev_index),
+	    make_field("region", region_index),
+	    make_field("start", 0),
+	    region_start, NULL);
+}
+
+int ps3_repository_read_stor_dev_info(unsigned int bus_index,
+	unsigned int dev_index, u64 *port, u64 *blk_size,
+	u64 *num_blocks, unsigned int *num_regions)
+{
+	int result;
+
+	result = ps3_repository_read_stor_dev_port(bus_index, dev_index, port);
+	if (result)
+	    return result;
+
+	result = ps3_repository_read_stor_dev_blk_size(bus_index, dev_index,
+		blk_size);
+	if (result)
+	    return result;
+
+	result = ps3_repository_read_stor_dev_num_blocks(bus_index, dev_index,
+		num_blocks);
+	if (result)
+	    return result;
+
+	result = ps3_repository_read_stor_dev_num_regions(bus_index, dev_index,
+		num_regions);
+	return result;
+}
+
+int ps3_repository_read_stor_dev_region(unsigned int bus_index,
+	unsigned int dev_index, unsigned int region_index,
+	unsigned int *region_id, u64 *region_start, u64 *region_size)
+{
+	int result;
+
+	result = ps3_repository_read_stor_dev_region_id(bus_index, dev_index,
+		region_index, region_id);
+	if (result)
+	    return result;
+
+	result = ps3_repository_read_stor_dev_region_start(bus_index, dev_index,
+		region_index, region_start);
+	if (result)
+	    return result;
+
+	result = ps3_repository_read_stor_dev_region_size(bus_index, dev_index,
+		region_index, region_size);
+	return result;
+}
+
+/**
+ * ps3_repository_read_num_pu - Number of logical PU processors for this lpar.
+ */
+
+int ps3_repository_read_num_pu(u64 *num_pu)
+{
+	*num_pu = 0;
+	return read_node(PS3_LPAR_ID_CURRENT,
+			   make_first_field("bi", 0),
+			   make_field("pun", 0),
+			   0, 0,
+			   num_pu, NULL);
+}
+
+/**
+ * ps3_repository_read_pu_id - Read the logical PU id.
+ * @pu_index: Zero based index.
+ * @pu_id: The logical PU id.
+ */
+
+int ps3_repository_read_pu_id(unsigned int pu_index, u64 *pu_id)
+{
+	return read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("pu", pu_index),
+		0, 0,
+		pu_id, NULL);
+}
+
+int ps3_repository_read_rm_size(unsigned int ppe_id, u64 *rm_size)
+{
+	return read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("pu", 0),
+		ppe_id,
+		make_field("rm_size", 0),
+		rm_size, NULL);
+}
+
+int ps3_repository_read_region_total(u64 *region_total)
+{
+	return read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("rgntotal", 0),
+		0, 0,
+		region_total, NULL);
+}
+
+/**
+ * ps3_repository_read_mm_info - Read mm info for single pu system.
+ * @rm_base: Real mode memory base address.
+ * @rm_size: Real mode memory size.
+ * @region_total: Maximum memory region size.
+ */
+
+int ps3_repository_read_mm_info(u64 *rm_base, u64 *rm_size, u64 *region_total)
+{
+	int result;
+	u64 ppe_id;
+
+	lv1_get_logical_ppe_id(&ppe_id);
+	*rm_base = 0;
+	result = ps3_repository_read_rm_size(ppe_id, rm_size);
+	return result ? result
+		: ps3_repository_read_region_total(region_total);
+}
+
+/**
+ * ps3_repository_read_highmem_region_count - Read the number of highmem regions
+ *
+ * Bootloaders must arrange the repository nodes such that regions are indexed
+ * with a region_index from 0 to region_count-1.
+ */
+
+int ps3_repository_read_highmem_region_count(unsigned int *region_count)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("highmem", 0),
+		make_field("region", 0),
+		make_field("count", 0),
+		0,
+		&v1, NULL);
+	*region_count = v1;
+	return result;
+}
+
+
+int ps3_repository_read_highmem_base(unsigned int region_index,
+	u64 *highmem_base)
+{
+	return read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("highmem", 0),
+		make_field("region", region_index),
+		make_field("base", 0),
+		0,
+		highmem_base, NULL);
+}
+
+int ps3_repository_read_highmem_size(unsigned int region_index,
+	u64 *highmem_size)
+{
+	return read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("highmem", 0),
+		make_field("region", region_index),
+		make_field("size", 0),
+		0,
+		highmem_size, NULL);
+}
+
+/**
+ * ps3_repository_read_highmem_info - Read high memory region info
+ * @region_index: Region index, {0,..,region_count-1}.
+ * @highmem_base: High memory base address.
+ * @highmem_size: High memory size.
+ *
+ * Bootloaders that preallocate highmem regions must place the
+ * region info into the repository at these well known nodes.
+ */
+
+int ps3_repository_read_highmem_info(unsigned int region_index,
+	u64 *highmem_base, u64 *highmem_size)
+{
+	int result;
+
+	*highmem_base = 0;
+	result = ps3_repository_read_highmem_base(region_index, highmem_base);
+	return result ? result
+		: ps3_repository_read_highmem_size(region_index, highmem_size);
+}
+
+/**
+ * ps3_repository_read_num_spu_reserved - Number of physical spus reserved.
+ * @num_spu: Number of physical spus.
+ */
+
+int ps3_repository_read_num_spu_reserved(unsigned int *num_spu_reserved)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("spun", 0),
+		0, 0,
+		&v1, NULL);
+	*num_spu_reserved = v1;
+	return result;
+}
+
+/**
+ * ps3_repository_read_num_spu_resource_id - Number of spu resource reservations.
+ * @num_resource_id: Number of spu resource ids.
+ */
+
+int ps3_repository_read_num_spu_resource_id(unsigned int *num_resource_id)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("spursvn", 0),
+		0, 0,
+		&v1, NULL);
+	*num_resource_id = v1;
+	return result;
+}
+
+/**
+ * ps3_repository_read_spu_resource_id - spu resource reservation id value.
+ * @res_index: Resource reservation index.
+ * @resource_type: Resource reservation type.
+ * @resource_id: Resource reservation id.
+ */
+
+int ps3_repository_read_spu_resource_id(unsigned int res_index,
+	enum ps3_spu_resource_type *resource_type, unsigned int *resource_id)
+{
+	int result;
+	u64 v1 = 0;
+	u64 v2 = 0;
+
+	result = read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("spursv", 0),
+		res_index,
+		0,
+		&v1, &v2);
+	*resource_type = v1;
+	*resource_id = v2;
+	return result;
+}
+
+static int ps3_repository_read_boot_dat_address(u64 *address)
+{
+	return read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("boot_dat", 0),
+		make_field("address", 0),
+		0,
+		address, NULL);
+}
+
+int ps3_repository_read_boot_dat_size(unsigned int *size)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("boot_dat", 0),
+		make_field("size", 0),
+		0,
+		&v1, NULL);
+	*size = v1;
+	return result;
+}
+
+int __init ps3_repository_read_vuart_av_port(unsigned int *port)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("vir_uart", 0),
+		make_field("port", 0),
+		make_field("avset", 0),
+		&v1, NULL);
+	*port = v1;
+	return result;
+}
+
+int __init ps3_repository_read_vuart_sysmgr_port(unsigned int *port)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("vir_uart", 0),
+		make_field("port", 0),
+		make_field("sysmgr", 0),
+		&v1, NULL);
+	*port = v1;
+	return result;
+}
+
+/**
+  * ps3_repository_read_boot_dat_info - Get address and size of cell_ext_os_area.
+  * address: lpar address of cell_ext_os_area
+  * @size: size of cell_ext_os_area
+  */
+
+int ps3_repository_read_boot_dat_info(u64 *lpar_addr, unsigned int *size)
+{
+	int result;
+
+	*size = 0;
+	result = ps3_repository_read_boot_dat_address(lpar_addr);
+	return result ? result
+		: ps3_repository_read_boot_dat_size(size);
+}
+
+/**
+ * ps3_repository_read_num_be - Number of physical BE processors in the system.
+ */
+
+int ps3_repository_read_num_be(unsigned int *num_be)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_PME,
+		make_first_field("ben", 0),
+		0,
+		0,
+		0,
+		&v1, NULL);
+	*num_be = v1;
+	return result;
+}
+
+/**
+ * ps3_repository_read_be_node_id - Read the physical BE processor node id.
+ * @be_index: Zero based index.
+ * @node_id: The BE processor node id.
+ */
+
+int ps3_repository_read_be_node_id(unsigned int be_index, u64 *node_id)
+{
+	return read_node(PS3_LPAR_ID_PME,
+		make_first_field("be", be_index),
+		0,
+		0,
+		0,
+		node_id, NULL);
+}
+
+/**
+ * ps3_repository_read_be_id - Read the physical BE processor id.
+ * @node_id: The BE processor node id.
+ * @be_id: The BE processor id.
+ */
+
+int ps3_repository_read_be_id(u64 node_id, u64 *be_id)
+{
+	return read_node(PS3_LPAR_ID_PME,
+		make_first_field("be", 0),
+		node_id,
+		0,
+		0,
+		be_id, NULL);
+}
+
+int __init ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq)
+{
+	return read_node(PS3_LPAR_ID_PME,
+		make_first_field("be", 0),
+		node_id,
+		make_field("clock", 0),
+		0,
+		tb_freq, NULL);
+}
+
+int __init ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq)
+{
+	int result;
+	u64 node_id;
+
+	*tb_freq = 0;
+	result = ps3_repository_read_be_node_id(be_index, &node_id);
+	return result ? result
+		: ps3_repository_read_tb_freq(node_id, tb_freq);
+}
+
+int ps3_repository_read_lpm_privileges(unsigned int be_index, u64 *lpar,
+	u64 *rights)
+{
+	int result;
+	u64 node_id;
+
+	*lpar = 0;
+	*rights = 0;
+	result = ps3_repository_read_be_node_id(be_index, &node_id);
+	return result ? result
+		: read_node(PS3_LPAR_ID_PME,
+			    make_first_field("be", 0),
+			    node_id,
+			    make_field("lpm", 0),
+			    make_field("priv", 0),
+			    lpar, rights);
+}
+
+#if defined(CONFIG_PS3_REPOSITORY_WRITE)
+
+static int create_node(u64 n1, u64 n2, u64 n3, u64 n4, u64 v1, u64 v2)
+{
+	int result;
+
+	dump_node(0, n1, n2, n3, n4, v1, v2);
+
+	result = lv1_create_repository_node(n1, n2, n3, n4, v1, v2);
+
+	if (result) {
+		pr_devel("%s:%d: lv1_create_repository_node failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static int delete_node(u64 n1, u64 n2, u64 n3, u64 n4)
+{
+	int result;
+
+	dump_node(0, n1, n2, n3, n4, 0, 0);
+
+	result = lv1_delete_repository_node(n1, n2, n3, n4);
+
+	if (result) {
+		pr_devel("%s:%d: lv1_delete_repository_node failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static int write_node(u64 n1, u64 n2, u64 n3, u64 n4, u64 v1, u64 v2)
+{
+	int result;
+
+	result = create_node(n1, n2, n3, n4, v1, v2);
+
+	if (!result)
+		return 0;
+
+	result = lv1_write_repository_node(n1, n2, n3, n4, v1, v2);
+
+	if (result) {
+		pr_devel("%s:%d: lv1_write_repository_node failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+int ps3_repository_write_highmem_region_count(unsigned int region_count)
+{
+	int result;
+	u64 v1 = (u64)region_count;
+
+	result = write_node(
+		make_first_field("highmem", 0),
+		make_field("region", 0),
+		make_field("count", 0),
+		0,
+		v1, 0);
+	return result;
+}
+
+int ps3_repository_write_highmem_base(unsigned int region_index,
+	u64 highmem_base)
+{
+	return write_node(
+		make_first_field("highmem", 0),
+		make_field("region", region_index),
+		make_field("base", 0),
+		0,
+		highmem_base, 0);
+}
+
+int ps3_repository_write_highmem_size(unsigned int region_index,
+	u64 highmem_size)
+{
+	return write_node(
+		make_first_field("highmem", 0),
+		make_field("region", region_index),
+		make_field("size", 0),
+		0,
+		highmem_size, 0);
+}
+
+int ps3_repository_write_highmem_info(unsigned int region_index,
+	u64 highmem_base, u64 highmem_size)
+{
+	int result;
+
+	result = ps3_repository_write_highmem_base(region_index, highmem_base);
+	return result ? result
+		: ps3_repository_write_highmem_size(region_index, highmem_size);
+}
+
+static int ps3_repository_delete_highmem_base(unsigned int region_index)
+{
+	return delete_node(
+		make_first_field("highmem", 0),
+		make_field("region", region_index),
+		make_field("base", 0),
+		0);
+}
+
+static int ps3_repository_delete_highmem_size(unsigned int region_index)
+{
+	return delete_node(
+		make_first_field("highmem", 0),
+		make_field("region", region_index),
+		make_field("size", 0),
+		0);
+}
+
+int ps3_repository_delete_highmem_info(unsigned int region_index)
+{
+	int result;
+
+	result = ps3_repository_delete_highmem_base(region_index);
+	result += ps3_repository_delete_highmem_size(region_index);
+
+	return result ? -1 : 0;
+}
+
+#endif /* defined(CONFIG_PS3_REPOSITORY_WRITE) */
+
+#if defined(DEBUG)
+
+int __init ps3_repository_dump_resource_info(const struct ps3_repository_device *repo)
+{
+	int result = 0;
+	unsigned int res_index;
+
+	pr_devel(" -> %s:%d: (%u:%u)\n", __func__, __LINE__,
+		repo->bus_index, repo->dev_index);
+
+	for (res_index = 0; res_index < 10; res_index++) {
+		enum ps3_interrupt_type intr_type;
+		unsigned int interrupt_id;
+
+		result = ps3_repository_read_dev_intr(repo->bus_index,
+			repo->dev_index, res_index, &intr_type, &interrupt_id);
+
+		if (result) {
+			if (result !=  LV1_NO_ENTRY)
+				pr_devel("%s:%d ps3_repository_read_dev_intr"
+					" (%u:%u) failed\n", __func__, __LINE__,
+					repo->bus_index, repo->dev_index);
+			break;
+		}
+
+		pr_devel("%s:%d (%u:%u) intr_type %u, interrupt_id %u\n",
+			__func__, __LINE__, repo->bus_index, repo->dev_index,
+			intr_type, interrupt_id);
+	}
+
+	for (res_index = 0; res_index < 10; res_index++) {
+		enum ps3_reg_type reg_type;
+		u64 bus_addr;
+		u64 len;
+
+		result = ps3_repository_read_dev_reg(repo->bus_index,
+			repo->dev_index, res_index, &reg_type, &bus_addr, &len);
+
+		if (result) {
+			if (result !=  LV1_NO_ENTRY)
+				pr_devel("%s:%d ps3_repository_read_dev_reg"
+					" (%u:%u) failed\n", __func__, __LINE__,
+					repo->bus_index, repo->dev_index);
+			break;
+		}
+
+		pr_devel("%s:%d (%u:%u) reg_type %u, bus_addr %llxh, len %llxh\n",
+			__func__, __LINE__, repo->bus_index, repo->dev_index,
+			reg_type, bus_addr, len);
+	}
+
+	pr_devel(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+
+static int __init dump_stor_dev_info(struct ps3_repository_device *repo)
+{
+	int result = 0;
+	unsigned int num_regions, region_index;
+	u64 port, blk_size, num_blocks;
+
+	pr_devel(" -> %s:%d: (%u:%u)\n", __func__, __LINE__,
+		repo->bus_index, repo->dev_index);
+
+	result = ps3_repository_read_stor_dev_info(repo->bus_index,
+		repo->dev_index, &port, &blk_size, &num_blocks, &num_regions);
+	if (result) {
+		pr_devel("%s:%d ps3_repository_read_stor_dev_info"
+			" (%u:%u) failed\n", __func__, __LINE__,
+			repo->bus_index, repo->dev_index);
+		goto out;
+	}
+
+	pr_devel("%s:%d  (%u:%u): port %llu, blk_size %llu, num_blocks "
+		 "%llu, num_regions %u\n",
+		 __func__, __LINE__, repo->bus_index, repo->dev_index,
+		port, blk_size, num_blocks, num_regions);
+
+	for (region_index = 0; region_index < num_regions; region_index++) {
+		unsigned int region_id;
+		u64 region_start, region_size;
+
+		result = ps3_repository_read_stor_dev_region(repo->bus_index,
+			repo->dev_index, region_index, &region_id,
+			&region_start, &region_size);
+		if (result) {
+			 pr_devel("%s:%d ps3_repository_read_stor_dev_region"
+				  " (%u:%u) failed\n", __func__, __LINE__,
+				  repo->bus_index, repo->dev_index);
+			break;
+		}
+
+		pr_devel("%s:%d (%u:%u) region_id %u, start %lxh, size %lxh\n",
+			__func__, __LINE__, repo->bus_index, repo->dev_index,
+			region_id, (unsigned long)region_start,
+			(unsigned long)region_size);
+	}
+
+out:
+	pr_devel(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+
+static int __init dump_device_info(struct ps3_repository_device *repo,
+	unsigned int num_dev)
+{
+	int result = 0;
+
+	pr_devel(" -> %s:%d: bus_%u\n", __func__, __LINE__, repo->bus_index);
+
+	for (repo->dev_index = 0; repo->dev_index < num_dev;
+		repo->dev_index++) {
+
+		result = ps3_repository_read_dev_type(repo->bus_index,
+			repo->dev_index, &repo->dev_type);
+
+		if (result) {
+			pr_devel("%s:%d ps3_repository_read_dev_type"
+				" (%u:%u) failed\n", __func__, __LINE__,
+				repo->bus_index, repo->dev_index);
+			break;
+		}
+
+		result = ps3_repository_read_dev_id(repo->bus_index,
+			repo->dev_index, &repo->dev_id);
+
+		if (result) {
+			pr_devel("%s:%d ps3_repository_read_dev_id"
+				" (%u:%u) failed\n", __func__, __LINE__,
+				repo->bus_index, repo->dev_index);
+			continue;
+		}
+
+		pr_devel("%s:%d  (%u:%u): dev_type %u, dev_id %lu\n", __func__,
+			__LINE__, repo->bus_index, repo->dev_index,
+			repo->dev_type, (unsigned long)repo->dev_id);
+
+		ps3_repository_dump_resource_info(repo);
+
+		if (repo->bus_type == PS3_BUS_TYPE_STORAGE)
+			dump_stor_dev_info(repo);
+	}
+
+	pr_devel(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+
+int __init ps3_repository_dump_bus_info(void)
+{
+	int result = 0;
+	struct ps3_repository_device repo;
+
+	pr_devel(" -> %s:%d\n", __func__, __LINE__);
+
+	memset(&repo, 0, sizeof(repo));
+
+	for (repo.bus_index = 0; repo.bus_index < 10; repo.bus_index++) {
+		unsigned int num_dev;
+
+		result = ps3_repository_read_bus_type(repo.bus_index,
+			&repo.bus_type);
+
+		if (result) {
+			pr_devel("%s:%d read_bus_type(%u) failed\n",
+				__func__, __LINE__, repo.bus_index);
+			break;
+		}
+
+		result = ps3_repository_read_bus_id(repo.bus_index,
+			&repo.bus_id);
+
+		if (result) {
+			pr_devel("%s:%d read_bus_id(%u) failed\n",
+				__func__, __LINE__, repo.bus_index);
+			continue;
+		}
+
+		if (repo.bus_index != repo.bus_id)
+			pr_devel("%s:%d bus_index != bus_id\n",
+				__func__, __LINE__);
+
+		result = ps3_repository_read_bus_num_dev(repo.bus_index,
+			&num_dev);
+
+		if (result) {
+			pr_devel("%s:%d read_bus_num_dev(%u) failed\n",
+				__func__, __LINE__, repo.bus_index);
+			continue;
+		}
+
+		pr_devel("%s:%d bus_%u: bus_type %u, bus_id %lu, num_dev %u\n",
+			__func__, __LINE__, repo.bus_index, repo.bus_type,
+			(unsigned long)repo.bus_id, num_dev);
+
+		dump_device_info(&repo, num_dev);
+	}
+
+	pr_devel(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+
+#endif /* defined(DEBUG) */
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
new file mode 100644
index 0000000000..5144f11359
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  PS3 platform setup routines.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/root_dev.h>
+#include <linux/console.h>
+#include <linux/export.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/time.h>
+#include <asm/iommu.h>
+#include <asm/udbg.h>
+#include <asm/lv1call.h>
+#include <asm/ps3gpu.h>
+
+#include "platform.h"
+
+#if defined(DEBUG)
+#define DBG udbg_printf
+#else
+#define DBG pr_debug
+#endif
+
+/* mutex synchronizing GPU accesses and video mode changes */
+DEFINE_MUTEX(ps3_gpu_mutex);
+EXPORT_SYMBOL_GPL(ps3_gpu_mutex);
+
+static union ps3_firmware_version ps3_firmware_version;
+static char ps3_firmware_version_str[16];
+
+void ps3_get_firmware_version(union ps3_firmware_version *v)
+{
+	*v = ps3_firmware_version;
+}
+EXPORT_SYMBOL_GPL(ps3_get_firmware_version);
+
+int ps3_compare_firmware_version(u16 major, u16 minor, u16 rev)
+{
+	union ps3_firmware_version x;
+
+	x.pad = 0;
+	x.major = major;
+	x.minor = minor;
+	x.rev = rev;
+
+	return (ps3_firmware_version.raw > x.raw) -
+	       (ps3_firmware_version.raw < x.raw);
+}
+EXPORT_SYMBOL_GPL(ps3_compare_firmware_version);
+
+static void ps3_power_save(void)
+{
+	/*
+	 * lv1_pause() puts the PPE thread into inactive state until an
+	 * irq on an unmasked plug exists. MSR[EE] has no effect.
+	 * flags: 0 = wake on DEC interrupt, 1 = ignore DEC interrupt.
+	 */
+
+	lv1_pause(0);
+}
+
+static void __noreturn ps3_restart(char *cmd)
+{
+	DBG("%s:%d cmd '%s'\n", __func__, __LINE__, cmd);
+
+	smp_send_stop();
+	ps3_sys_manager_restart(); /* never returns */
+}
+
+static void ps3_power_off(void)
+{
+	DBG("%s:%d\n", __func__, __LINE__);
+
+	smp_send_stop();
+	ps3_sys_manager_power_off(); /* never returns */
+}
+
+static void __noreturn ps3_halt(void)
+{
+	DBG("%s:%d\n", __func__, __LINE__);
+
+	smp_send_stop();
+	ps3_sys_manager_halt(); /* never returns */
+}
+
+static void ps3_panic(char *str)
+{
+	DBG("%s:%d %s\n", __func__, __LINE__, str);
+
+	smp_send_stop();
+	printk("\n");
+	printk("   System does not reboot automatically.\n");
+	printk("   Please press POWER button.\n");
+	printk("\n");
+	panic_flush_kmsg_end();
+
+	while(1)
+		lv1_pause(1);
+}
+
+#if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE) || \
+    defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE)
+static void __init prealloc(struct ps3_prealloc *p)
+{
+	if (!p->size)
+		return;
+
+	p->address = memblock_alloc(p->size, p->align);
+	if (!p->address)
+		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+		      __func__, p->size, p->align);
+
+	printk(KERN_INFO "%s: %lu bytes at %p\n", p->name, p->size,
+	       p->address);
+}
+#endif
+
+#if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE)
+struct ps3_prealloc ps3fb_videomemory = {
+	.name = "ps3fb videomemory",
+	.size = CONFIG_FB_PS3_DEFAULT_SIZE_M*1024*1024,
+	.align = 1024*1024		/* the GPU requires 1 MiB alignment */
+};
+EXPORT_SYMBOL_GPL(ps3fb_videomemory);
+#define prealloc_ps3fb_videomemory()	prealloc(&ps3fb_videomemory)
+
+static int __init early_parse_ps3fb(char *p)
+{
+	if (!p)
+		return 1;
+
+	ps3fb_videomemory.size = ALIGN(memparse(p, &p),
+					   ps3fb_videomemory.align);
+	return 0;
+}
+early_param("ps3fb", early_parse_ps3fb);
+#else
+#define prealloc_ps3fb_videomemory()	do { } while (0)
+#endif
+
+#if defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE)
+struct ps3_prealloc ps3flash_bounce_buffer = {
+	.name = "ps3flash bounce buffer",
+	.size = 256*1024,
+	.align = 256*1024
+};
+EXPORT_SYMBOL_GPL(ps3flash_bounce_buffer);
+#define prealloc_ps3flash_bounce_buffer()	prealloc(&ps3flash_bounce_buffer)
+
+static int __init early_parse_ps3flash(char *p)
+{
+	if (!p)
+		return 1;
+
+	if (!strcmp(p, "off"))
+		ps3flash_bounce_buffer.size = 0;
+
+	return 0;
+}
+early_param("ps3flash", early_parse_ps3flash);
+#else
+#define prealloc_ps3flash_bounce_buffer()	do { } while (0)
+#endif
+
+static int ps3_set_dabr(unsigned long dabr, unsigned long dabrx)
+{
+	/* Have to set at least one bit in the DABRX */
+	if (dabrx == 0 && dabr == 0)
+		dabrx = DABRX_USER;
+	/* hypervisor only allows us to set BTI, Kernel and user */
+	dabrx &= DABRX_BTI | DABRX_KERNEL | DABRX_USER;
+
+	return lv1_set_dabr(dabr, dabrx) ? -1 : 0;
+}
+
+static ssize_t ps3_fw_version_show(struct kobject *kobj,
+	struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s", ps3_firmware_version_str);
+}
+
+static int __init ps3_setup_sysfs(void)
+{
+	static struct kobj_attribute attr = __ATTR(fw-version, S_IRUGO,
+		ps3_fw_version_show, NULL);
+	static struct kobject *kobj;
+	int result;
+
+	kobj = kobject_create_and_add("ps3", firmware_kobj);
+
+	if (!kobj) {
+		pr_warn("%s:%d: kobject_create_and_add failed.\n", __func__,
+			__LINE__);
+		return -ENOMEM;
+	}
+
+	result = sysfs_create_file(kobj, &attr.attr);
+
+	if (result) {
+		pr_warn("%s:%d: sysfs_create_file failed.\n", __func__,
+			__LINE__);
+		kobject_put(kobj);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+core_initcall(ps3_setup_sysfs);
+
+static void __init ps3_setup_arch(void)
+{
+	u64 tmp;
+
+	DBG(" -> %s:%d\n", __func__, __LINE__);
+
+	lv1_get_version_info(&ps3_firmware_version.raw, &tmp);
+
+	snprintf(ps3_firmware_version_str, sizeof(ps3_firmware_version_str),
+		"%u.%u.%u", ps3_firmware_version.major,
+		ps3_firmware_version.minor, ps3_firmware_version.rev);
+
+	printk(KERN_INFO "PS3 firmware version %s\n", ps3_firmware_version_str);
+
+	ps3_spu_set_platform();
+
+#ifdef CONFIG_SMP
+	smp_init_ps3();
+#endif
+
+	prealloc_ps3fb_videomemory();
+	prealloc_ps3flash_bounce_buffer();
+
+	ppc_md.power_save = ps3_power_save;
+	ps3_os_area_init();
+
+	DBG(" <- %s:%d\n", __func__, __LINE__);
+}
+
+static void __init ps3_progress(char *s, unsigned short hex)
+{
+	printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+void __init ps3_early_mm_init(void)
+{
+	unsigned long htab_size;
+
+	ps3_mm_init();
+	ps3_mm_vas_create(&htab_size);
+	ps3_hpte_init(htab_size);
+}
+
+static int __init ps3_probe(void)
+{
+	DBG(" -> %s:%d\n", __func__, __LINE__);
+
+	ps3_os_area_save_params();
+
+	pm_power_off = ps3_power_off;
+
+	DBG(" <- %s:%d\n", __func__, __LINE__);
+	return 1;
+}
+
+#if defined(CONFIG_KEXEC_CORE)
+static void ps3_kexec_cpu_down(int crash_shutdown, int secondary)
+{
+	int cpu = smp_processor_id();
+
+	DBG(" -> %s:%d: (%d)\n", __func__, __LINE__, cpu);
+
+	ps3_smp_cleanup_cpu(cpu);
+	ps3_shutdown_IRQ(cpu);
+
+	DBG(" <- %s:%d\n", __func__, __LINE__);
+}
+#endif
+
+define_machine(ps3) {
+	.name				= "PS3",
+	.compatible			= "sony,ps3",
+	.probe				= ps3_probe,
+	.setup_arch			= ps3_setup_arch,
+	.init_IRQ			= ps3_init_IRQ,
+	.panic				= ps3_panic,
+	.get_boot_time			= ps3_get_boot_time,
+	.set_dabr			= ps3_set_dabr,
+	.calibrate_decr			= ps3_calibrate_decr,
+	.progress			= ps3_progress,
+	.restart			= ps3_restart,
+	.halt				= ps3_halt,
+#if defined(CONFIG_KEXEC_CORE)
+	.kexec_cpu_down			= ps3_kexec_cpu_down,
+#endif
+};
diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c
new file mode 100644
index 0000000000..8529575600
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/smp.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  PS3 SMP routines.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+
+#include "platform.h"
+
+#if defined(DEBUG)
+#define DBG udbg_printf
+#else
+#define DBG pr_debug
+#endif
+
+/**
+  * ps3_ipi_virqs - a per cpu array of virqs for ipi use
+  */
+
+#define MSG_COUNT 4
+static DEFINE_PER_CPU(unsigned int [MSG_COUNT], ps3_ipi_virqs);
+
+static void ps3_smp_message_pass(int cpu, int msg)
+{
+	int result;
+	unsigned int virq;
+
+	if (msg >= MSG_COUNT) {
+		DBG("%s:%d: bad msg: %d\n", __func__, __LINE__, msg);
+		return;
+	}
+
+	virq = per_cpu(ps3_ipi_virqs, cpu)[msg];
+	result = ps3_send_event_locally(virq);
+
+	if (result)
+		DBG("%s:%d: ps3_send_event_locally(%d, %d) failed"
+			" (%d)\n", __func__, __LINE__, cpu, msg, result);
+}
+
+static void __init ps3_smp_probe(void)
+{
+	int cpu;
+
+	for (cpu = 0; cpu < 2; cpu++) {
+		int result;
+		unsigned int *virqs = per_cpu(ps3_ipi_virqs, cpu);
+		int i;
+
+		DBG(" -> %s:%d: (%d)\n", __func__, __LINE__, cpu);
+
+		/*
+		* Check assumptions on ps3_ipi_virqs[] indexing. If this
+		* check fails, then a different mapping of PPC_MSG_
+		* to index needs to be setup.
+		*/
+
+		BUILD_BUG_ON(PPC_MSG_CALL_FUNCTION    != 0);
+		BUILD_BUG_ON(PPC_MSG_RESCHEDULE       != 1);
+		BUILD_BUG_ON(PPC_MSG_TICK_BROADCAST   != 2);
+		BUILD_BUG_ON(PPC_MSG_NMI_IPI          != 3);
+
+		for (i = 0; i < MSG_COUNT; i++) {
+			result = ps3_event_receive_port_setup(cpu, &virqs[i]);
+
+			if (result)
+				continue;
+
+			DBG("%s:%d: (%d, %d) => virq %u\n",
+				__func__, __LINE__, cpu, i, virqs[i]);
+
+			result = smp_request_message_ipi(virqs[i], i);
+
+			if (result)
+				virqs[i] = 0;
+			else
+				ps3_register_ipi_irq(cpu, virqs[i]);
+		}
+
+		ps3_register_ipi_debug_brk(cpu, virqs[PPC_MSG_NMI_IPI]);
+
+		DBG(" <- %s:%d: (%d)\n", __func__, __LINE__, cpu);
+	}
+}
+
+void ps3_smp_cleanup_cpu(int cpu)
+{
+	unsigned int *virqs = per_cpu(ps3_ipi_virqs, cpu);
+	int i;
+
+	DBG(" -> %s:%d: (%d)\n", __func__, __LINE__, cpu);
+
+	for (i = 0; i < MSG_COUNT; i++) {
+		/* Can't call free_irq from interrupt context. */
+		ps3_event_receive_port_destroy(virqs[i]);
+		virqs[i] = 0;
+	}
+
+	DBG(" <- %s:%d: (%d)\n", __func__, __LINE__, cpu);
+}
+
+static struct smp_ops_t ps3_smp_ops = {
+	.probe		= ps3_smp_probe,
+	.message_pass	= ps3_smp_message_pass,
+	.kick_cpu	= smp_generic_kick_cpu,
+};
+
+void __init smp_init_ps3(void)
+{
+	DBG(" -> %s\n", __func__);
+	smp_ops = &ps3_smp_ops;
+	DBG(" <- %s\n", __func__);
+}
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
new file mode 100644
index 0000000000..4a2520ec6d
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -0,0 +1,619 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  PS3 Platform spu routines.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/mmzone.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/lv1call.h>
+#include <asm/ps3.h>
+
+#include "../cell/spufs/spufs.h"
+#include "platform.h"
+
+/* spu_management_ops */
+
+/**
+ * enum spe_type - Type of spe to create.
+ * @spe_type_logical: Standard logical spe.
+ *
+ * For use with lv1_construct_logical_spe().  The current HV does not support
+ * any types other than those listed.
+ */
+
+enum spe_type {
+	SPE_TYPE_LOGICAL = 0,
+};
+
+/**
+ * struct spe_shadow - logical spe shadow register area.
+ *
+ * Read-only shadow of spe registers.
+ */
+
+struct spe_shadow {
+	u8 padding_0140[0x0140];
+	u64 int_status_class0_RW;       /* 0x0140 */
+	u64 int_status_class1_RW;       /* 0x0148 */
+	u64 int_status_class2_RW;       /* 0x0150 */
+	u8 padding_0158[0x0610-0x0158];
+	u64 mfc_dsisr_RW;               /* 0x0610 */
+	u8 padding_0618[0x0620-0x0618];
+	u64 mfc_dar_RW;                 /* 0x0620 */
+	u8 padding_0628[0x0800-0x0628];
+	u64 mfc_dsipr_R;                /* 0x0800 */
+	u8 padding_0808[0x0810-0x0808];
+	u64 mfc_lscrr_R;                /* 0x0810 */
+	u8 padding_0818[0x0c00-0x0818];
+	u64 mfc_cer_R;                  /* 0x0c00 */
+	u8 padding_0c08[0x0f00-0x0c08];
+	u64 spe_execution_status;       /* 0x0f00 */
+	u8 padding_0f08[0x1000-0x0f08];
+};
+
+/**
+ * enum spe_ex_state - Logical spe execution state.
+ * @spe_ex_state_unexecutable: Uninitialized.
+ * @spe_ex_state_executable: Enabled, not ready.
+ * @spe_ex_state_executed: Ready for use.
+ *
+ * The execution state (status) of the logical spe as reported in
+ * struct spe_shadow:spe_execution_status.
+ */
+
+enum spe_ex_state {
+	SPE_EX_STATE_UNEXECUTABLE = 0,
+	SPE_EX_STATE_EXECUTABLE = 2,
+	SPE_EX_STATE_EXECUTED = 3,
+};
+
+/**
+ * struct priv1_cache - Cached values of priv1 registers.
+ * @masks[]: Array of cached spe interrupt masks, indexed by class.
+ * @sr1: Cached mfc_sr1 register.
+ * @tclass_id: Cached mfc_tclass_id register.
+ */
+
+struct priv1_cache {
+	u64 masks[3];
+	u64 sr1;
+	u64 tclass_id;
+};
+
+/**
+ * struct spu_pdata - Platform state variables.
+ * @spe_id: HV spe id returned by lv1_construct_logical_spe().
+ * @resource_id: HV spe resource id returned by
+ * 	ps3_repository_read_spe_resource_id().
+ * @priv2_addr: lpar address of spe priv2 area returned by
+ * 	lv1_construct_logical_spe().
+ * @shadow_addr: lpar address of spe register shadow area returned by
+ * 	lv1_construct_logical_spe().
+ * @shadow: Virtual (ioremap) address of spe register shadow area.
+ * @cache: Cached values of priv1 registers.
+ */
+
+struct spu_pdata {
+	u64 spe_id;
+	u64 resource_id;
+	u64 priv2_addr;
+	u64 shadow_addr;
+	struct spe_shadow __iomem *shadow;
+	struct priv1_cache cache;
+};
+
+static struct spu_pdata *spu_pdata(struct spu *spu)
+{
+	return spu->pdata;
+}
+
+#define dump_areas(_a, _b, _c, _d, _e) \
+	_dump_areas(_a, _b, _c, _d, _e, __func__, __LINE__)
+static void _dump_areas(unsigned int spe_id, unsigned long priv2,
+	unsigned long problem, unsigned long ls, unsigned long shadow,
+	const char* func, int line)
+{
+	pr_debug("%s:%d: spe_id:  %xh (%u)\n", func, line, spe_id, spe_id);
+	pr_debug("%s:%d: priv2:   %lxh\n", func, line, priv2);
+	pr_debug("%s:%d: problem: %lxh\n", func, line, problem);
+	pr_debug("%s:%d: ls:      %lxh\n", func, line, ls);
+	pr_debug("%s:%d: shadow:  %lxh\n", func, line, shadow);
+}
+
+u64 ps3_get_spe_id(void *arg)
+{
+	return spu_pdata(arg)->spe_id;
+}
+EXPORT_SYMBOL_GPL(ps3_get_spe_id);
+
+static unsigned long __init get_vas_id(void)
+{
+	u64 id;
+
+	lv1_get_logical_ppe_id(&id);
+	lv1_get_virtual_address_space_id_of_ppe(&id);
+
+	return id;
+}
+
+static int __init construct_spu(struct spu *spu)
+{
+	int result;
+	u64 unused;
+	u64 problem_phys;
+	u64 local_store_phys;
+
+	result = lv1_construct_logical_spe(PAGE_SHIFT, PAGE_SHIFT, PAGE_SHIFT,
+		PAGE_SHIFT, PAGE_SHIFT, get_vas_id(), SPE_TYPE_LOGICAL,
+		&spu_pdata(spu)->priv2_addr, &problem_phys,
+		&local_store_phys, &unused,
+		&spu_pdata(spu)->shadow_addr,
+		&spu_pdata(spu)->spe_id);
+	spu->problem_phys = problem_phys;
+	spu->local_store_phys = local_store_phys;
+
+	if (result) {
+		pr_debug("%s:%d: lv1_construct_logical_spe failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		return result;
+	}
+
+	return result;
+}
+
+static void spu_unmap(struct spu *spu)
+{
+	iounmap(spu->priv2);
+	iounmap(spu->problem);
+	iounmap((__force u8 __iomem *)spu->local_store);
+	iounmap(spu_pdata(spu)->shadow);
+}
+
+/**
+ * setup_areas - Map the spu regions into the address space.
+ *
+ * The current HV requires the spu shadow regs to be mapped with the
+ * PTE page protection bits set as read-only.
+ */
+
+static int __init setup_areas(struct spu *spu)
+{
+	struct table {char* name; unsigned long addr; unsigned long size;};
+	unsigned long shadow_flags = pgprot_val(pgprot_noncached_wc(PAGE_KERNEL_RO));
+
+	spu_pdata(spu)->shadow = ioremap_prot(spu_pdata(spu)->shadow_addr,
+					      sizeof(struct spe_shadow), shadow_flags);
+	if (!spu_pdata(spu)->shadow) {
+		pr_debug("%s:%d: ioremap shadow failed\n", __func__, __LINE__);
+		goto fail_ioremap;
+	}
+
+	spu->local_store = (__force void *)ioremap_wc(spu->local_store_phys, LS_SIZE);
+
+	if (!spu->local_store) {
+		pr_debug("%s:%d: ioremap local_store failed\n",
+			__func__, __LINE__);
+		goto fail_ioremap;
+	}
+
+	spu->problem = ioremap(spu->problem_phys,
+		sizeof(struct spu_problem));
+
+	if (!spu->problem) {
+		pr_debug("%s:%d: ioremap problem failed\n", __func__, __LINE__);
+		goto fail_ioremap;
+	}
+
+	spu->priv2 = ioremap(spu_pdata(spu)->priv2_addr,
+		sizeof(struct spu_priv2));
+
+	if (!spu->priv2) {
+		pr_debug("%s:%d: ioremap priv2 failed\n", __func__, __LINE__);
+		goto fail_ioremap;
+	}
+
+	dump_areas(spu_pdata(spu)->spe_id, spu_pdata(spu)->priv2_addr,
+		spu->problem_phys, spu->local_store_phys,
+		spu_pdata(spu)->shadow_addr);
+	dump_areas(spu_pdata(spu)->spe_id, (unsigned long)spu->priv2,
+		(unsigned long)spu->problem, (unsigned long)spu->local_store,
+		(unsigned long)spu_pdata(spu)->shadow);
+
+	return 0;
+
+fail_ioremap:
+	spu_unmap(spu);
+
+	return -ENOMEM;
+}
+
+static int __init setup_interrupts(struct spu *spu)
+{
+	int result;
+
+	result = ps3_spe_irq_setup(PS3_BINDING_CPU_ANY, spu_pdata(spu)->spe_id,
+		0, &spu->irqs[0]);
+
+	if (result)
+		goto fail_alloc_0;
+
+	result = ps3_spe_irq_setup(PS3_BINDING_CPU_ANY, spu_pdata(spu)->spe_id,
+		1, &spu->irqs[1]);
+
+	if (result)
+		goto fail_alloc_1;
+
+	result = ps3_spe_irq_setup(PS3_BINDING_CPU_ANY, spu_pdata(spu)->spe_id,
+		2, &spu->irqs[2]);
+
+	if (result)
+		goto fail_alloc_2;
+
+	return result;
+
+fail_alloc_2:
+	ps3_spe_irq_destroy(spu->irqs[1]);
+fail_alloc_1:
+	ps3_spe_irq_destroy(spu->irqs[0]);
+fail_alloc_0:
+	spu->irqs[0] = spu->irqs[1] = spu->irqs[2] = 0;
+	return result;
+}
+
+static int __init enable_spu(struct spu *spu)
+{
+	int result;
+
+	result = lv1_enable_logical_spe(spu_pdata(spu)->spe_id,
+		spu_pdata(spu)->resource_id);
+
+	if (result) {
+		pr_debug("%s:%d: lv1_enable_logical_spe failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		goto fail_enable;
+	}
+
+	result = setup_areas(spu);
+
+	if (result)
+		goto fail_areas;
+
+	result = setup_interrupts(spu);
+
+	if (result)
+		goto fail_interrupts;
+
+	return 0;
+
+fail_interrupts:
+	spu_unmap(spu);
+fail_areas:
+	lv1_disable_logical_spe(spu_pdata(spu)->spe_id, 0);
+fail_enable:
+	return result;
+}
+
+static int ps3_destroy_spu(struct spu *spu)
+{
+	int result;
+
+	pr_debug("%s:%d spu_%d\n", __func__, __LINE__, spu->number);
+
+	result = lv1_disable_logical_spe(spu_pdata(spu)->spe_id, 0);
+	BUG_ON(result);
+
+	ps3_spe_irq_destroy(spu->irqs[2]);
+	ps3_spe_irq_destroy(spu->irqs[1]);
+	ps3_spe_irq_destroy(spu->irqs[0]);
+
+	spu->irqs[0] = spu->irqs[1] = spu->irqs[2] = 0;
+
+	spu_unmap(spu);
+
+	result = lv1_destruct_logical_spe(spu_pdata(spu)->spe_id);
+	BUG_ON(result);
+
+	kfree(spu->pdata);
+	spu->pdata = NULL;
+
+	return 0;
+}
+
+static int __init ps3_create_spu(struct spu *spu, void *data)
+{
+	int result;
+
+	pr_debug("%s:%d spu_%d\n", __func__, __LINE__, spu->number);
+
+	spu->pdata = kzalloc(sizeof(struct spu_pdata),
+		GFP_KERNEL);
+
+	if (!spu->pdata) {
+		result = -ENOMEM;
+		goto fail_malloc;
+	}
+
+	spu_pdata(spu)->resource_id = (unsigned long)data;
+
+	/* Init cached reg values to HV defaults. */
+
+	spu_pdata(spu)->cache.sr1 = 0x33;
+
+	result = construct_spu(spu);
+
+	if (result)
+		goto fail_construct;
+
+	/* For now, just go ahead and enable it. */
+
+	result = enable_spu(spu);
+
+	if (result)
+		goto fail_enable;
+
+	/* Make sure the spu is in SPE_EX_STATE_EXECUTED. */
+
+	/* need something better here!!! */
+	while (in_be64(&spu_pdata(spu)->shadow->spe_execution_status)
+		!= SPE_EX_STATE_EXECUTED)
+		(void)0;
+
+	return result;
+
+fail_enable:
+fail_construct:
+	ps3_destroy_spu(spu);
+fail_malloc:
+	return result;
+}
+
+static int __init ps3_enumerate_spus(int (*fn)(void *data))
+{
+	int result;
+	unsigned int num_resource_id;
+	unsigned int i;
+
+	result = ps3_repository_read_num_spu_resource_id(&num_resource_id);
+
+	pr_debug("%s:%d: num_resource_id %u\n", __func__, __LINE__,
+		num_resource_id);
+
+	/*
+	 * For now, just create logical spus equal to the number
+	 * of physical spus reserved for the partition.
+	 */
+
+	for (i = 0; i < num_resource_id; i++) {
+		enum ps3_spu_resource_type resource_type;
+		unsigned int resource_id;
+
+		result = ps3_repository_read_spu_resource_id(i,
+			&resource_type, &resource_id);
+
+		if (result)
+			break;
+
+		if (resource_type == PS3_SPU_RESOURCE_TYPE_EXCLUSIVE) {
+			result = fn((void*)(unsigned long)resource_id);
+
+			if (result)
+				break;
+		}
+	}
+
+	if (result) {
+		printk(KERN_WARNING "%s:%d: Error initializing spus\n",
+			__func__, __LINE__);
+		return result;
+	}
+
+	return num_resource_id;
+}
+
+static int ps3_init_affinity(void)
+{
+	return 0;
+}
+
+/**
+ * ps3_enable_spu - Enable SPU run control.
+ *
+ * An outstanding enhancement for the PS3 would be to add a guard to check
+ * for incorrect access to the spu problem state when the spu context is
+ * disabled.  This check could be implemented with a flag added to the spu
+ * context that would inhibit mapping problem state pages, and a routine
+ * to unmap spu problem state pages.  When the spu is enabled with
+ * ps3_enable_spu() the flag would be set allowing pages to be mapped,
+ * and when the spu is disabled with ps3_disable_spu() the flag would be
+ * cleared and the mapped problem state pages would be unmapped.
+ */
+
+static void ps3_enable_spu(struct spu_context *ctx)
+{
+}
+
+static void ps3_disable_spu(struct spu_context *ctx)
+{
+	ctx->ops->runcntl_stop(ctx);
+}
+
+static const struct spu_management_ops spu_management_ps3_ops = {
+	.enumerate_spus = ps3_enumerate_spus,
+	.create_spu = ps3_create_spu,
+	.destroy_spu = ps3_destroy_spu,
+	.enable_spu = ps3_enable_spu,
+	.disable_spu = ps3_disable_spu,
+	.init_affinity = ps3_init_affinity,
+};
+
+/* spu_priv1_ops */
+
+static void int_mask_and(struct spu *spu, int class, u64 mask)
+{
+	u64 old_mask;
+
+	/* are these serialized by caller??? */
+	old_mask = spu_int_mask_get(spu, class);
+	spu_int_mask_set(spu, class, old_mask & mask);
+}
+
+static void int_mask_or(struct spu *spu, int class, u64 mask)
+{
+	u64 old_mask;
+
+	old_mask = spu_int_mask_get(spu, class);
+	spu_int_mask_set(spu, class, old_mask | mask);
+}
+
+static void int_mask_set(struct spu *spu, int class, u64 mask)
+{
+	spu_pdata(spu)->cache.masks[class] = mask;
+	lv1_set_spe_interrupt_mask(spu_pdata(spu)->spe_id, class,
+		spu_pdata(spu)->cache.masks[class]);
+}
+
+static u64 int_mask_get(struct spu *spu, int class)
+{
+	return spu_pdata(spu)->cache.masks[class];
+}
+
+static void int_stat_clear(struct spu *spu, int class, u64 stat)
+{
+	/* Note that MFC_DSISR will be cleared when class1[MF] is set. */
+
+	lv1_clear_spe_interrupt_status(spu_pdata(spu)->spe_id, class,
+		stat, 0);
+}
+
+static u64 int_stat_get(struct spu *spu, int class)
+{
+	u64 stat;
+
+	lv1_get_spe_interrupt_status(spu_pdata(spu)->spe_id, class, &stat);
+	return stat;
+}
+
+static void cpu_affinity_set(struct spu *spu, int cpu)
+{
+	/* No support. */
+}
+
+static u64 mfc_dar_get(struct spu *spu)
+{
+	return in_be64(&spu_pdata(spu)->shadow->mfc_dar_RW);
+}
+
+static void mfc_dsisr_set(struct spu *spu, u64 dsisr)
+{
+	/* Nothing to do, cleared in int_stat_clear(). */
+}
+
+static u64 mfc_dsisr_get(struct spu *spu)
+{
+	return in_be64(&spu_pdata(spu)->shadow->mfc_dsisr_RW);
+}
+
+static void mfc_sdr_setup(struct spu *spu)
+{
+	/* Nothing to do. */
+}
+
+static void mfc_sr1_set(struct spu *spu, u64 sr1)
+{
+	/* Check bits allowed by HV. */
+
+	static const u64 allowed = ~(MFC_STATE1_LOCAL_STORAGE_DECODE_MASK
+		| MFC_STATE1_PROBLEM_STATE_MASK);
+
+	BUG_ON((sr1 & allowed) != (spu_pdata(spu)->cache.sr1 & allowed));
+
+	spu_pdata(spu)->cache.sr1 = sr1;
+	lv1_set_spe_privilege_state_area_1_register(
+		spu_pdata(spu)->spe_id,
+		offsetof(struct spu_priv1, mfc_sr1_RW),
+		spu_pdata(spu)->cache.sr1);
+}
+
+static u64 mfc_sr1_get(struct spu *spu)
+{
+	return spu_pdata(spu)->cache.sr1;
+}
+
+static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
+{
+	spu_pdata(spu)->cache.tclass_id = tclass_id;
+	lv1_set_spe_privilege_state_area_1_register(
+		spu_pdata(spu)->spe_id,
+		offsetof(struct spu_priv1, mfc_tclass_id_RW),
+		spu_pdata(spu)->cache.tclass_id);
+}
+
+static u64 mfc_tclass_id_get(struct spu *spu)
+{
+	return spu_pdata(spu)->cache.tclass_id;
+}
+
+static void tlb_invalidate(struct spu *spu)
+{
+	/* Nothing to do. */
+}
+
+static void resource_allocation_groupID_set(struct spu *spu, u64 id)
+{
+	/* No support. */
+}
+
+static u64 resource_allocation_groupID_get(struct spu *spu)
+{
+	return 0; /* No support. */
+}
+
+static void resource_allocation_enable_set(struct spu *spu, u64 enable)
+{
+	/* No support. */
+}
+
+static u64 resource_allocation_enable_get(struct spu *spu)
+{
+	return 0; /* No support. */
+}
+
+static const struct spu_priv1_ops spu_priv1_ps3_ops = {
+	.int_mask_and = int_mask_and,
+	.int_mask_or = int_mask_or,
+	.int_mask_set = int_mask_set,
+	.int_mask_get = int_mask_get,
+	.int_stat_clear = int_stat_clear,
+	.int_stat_get = int_stat_get,
+	.cpu_affinity_set = cpu_affinity_set,
+	.mfc_dar_get = mfc_dar_get,
+	.mfc_dsisr_set = mfc_dsisr_set,
+	.mfc_dsisr_get = mfc_dsisr_get,
+	.mfc_sdr_setup = mfc_sdr_setup,
+	.mfc_sr1_set = mfc_sr1_set,
+	.mfc_sr1_get = mfc_sr1_get,
+	.mfc_tclass_id_set = mfc_tclass_id_set,
+	.mfc_tclass_id_get = mfc_tclass_id_get,
+	.tlb_invalidate = tlb_invalidate,
+	.resource_allocation_groupID_set = resource_allocation_groupID_set,
+	.resource_allocation_groupID_get = resource_allocation_groupID_get,
+	.resource_allocation_enable_set = resource_allocation_enable_set,
+	.resource_allocation_enable_get = resource_allocation_enable_get,
+};
+
+void ps3_spu_set_platform(void)
+{
+	spu_priv1_ops = &spu_priv1_ps3_ops;
+	spu_management_ops = &spu_management_ps3_ops;
+}
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
new file mode 100644
index 0000000000..d6b5f5ecd5
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -0,0 +1,803 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  PS3 system bus driver.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/dma-map-ops.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <asm/udbg.h>
+#include <asm/lv1call.h>
+#include <asm/firmware.h>
+#include <asm/cell-regs.h>
+
+#include "platform.h"
+
+static struct device ps3_system_bus = {
+	.init_name = "ps3_system",
+};
+
+/* FIXME: need device usage counters! */
+static struct {
+	struct mutex mutex;
+	int sb_11; /* usb 0 */
+	int sb_12; /* usb 0 */
+	int gpu;
+} usage_hack;
+
+static int ps3_is_device(struct ps3_system_bus_device *dev, u64 bus_id,
+			 u64 dev_id)
+{
+	return dev->bus_id == bus_id && dev->dev_id == dev_id;
+}
+
+static int ps3_open_hv_device_sb(struct ps3_system_bus_device *dev)
+{
+	int result;
+
+	BUG_ON(!dev->bus_id);
+	mutex_lock(&usage_hack.mutex);
+
+	if (ps3_is_device(dev, 1, 1)) {
+		usage_hack.sb_11++;
+		if (usage_hack.sb_11 > 1) {
+			result = 0;
+			goto done;
+		}
+	}
+
+	if (ps3_is_device(dev, 1, 2)) {
+		usage_hack.sb_12++;
+		if (usage_hack.sb_12 > 1) {
+			result = 0;
+			goto done;
+		}
+	}
+
+	result = lv1_open_device(dev->bus_id, dev->dev_id, 0);
+
+	if (result) {
+		pr_warn("%s:%d: lv1_open_device dev=%u.%u(%s) failed: %s\n",
+			__func__, __LINE__, dev->match_id, dev->match_sub_id,
+			dev_name(&dev->core), ps3_result(result));
+		result = -EPERM;
+	}
+
+done:
+	mutex_unlock(&usage_hack.mutex);
+	return result;
+}
+
+static int ps3_close_hv_device_sb(struct ps3_system_bus_device *dev)
+{
+	int result;
+
+	BUG_ON(!dev->bus_id);
+	mutex_lock(&usage_hack.mutex);
+
+	if (ps3_is_device(dev, 1, 1)) {
+		usage_hack.sb_11--;
+		if (usage_hack.sb_11) {
+			result = 0;
+			goto done;
+		}
+	}
+
+	if (ps3_is_device(dev, 1, 2)) {
+		usage_hack.sb_12--;
+		if (usage_hack.sb_12) {
+			result = 0;
+			goto done;
+		}
+	}
+
+	result = lv1_close_device(dev->bus_id, dev->dev_id);
+	BUG_ON(result);
+
+done:
+	mutex_unlock(&usage_hack.mutex);
+	return result;
+}
+
+static int ps3_open_hv_device_gpu(struct ps3_system_bus_device *dev)
+{
+	int result;
+
+	mutex_lock(&usage_hack.mutex);
+
+	usage_hack.gpu++;
+	if (usage_hack.gpu > 1) {
+		result = 0;
+		goto done;
+	}
+
+	result = lv1_gpu_open(0);
+
+	if (result) {
+		pr_warn("%s:%d: lv1_gpu_open failed: %s\n", __func__,
+			__LINE__, ps3_result(result));
+			result = -EPERM;
+	}
+
+done:
+	mutex_unlock(&usage_hack.mutex);
+	return result;
+}
+
+static int ps3_close_hv_device_gpu(struct ps3_system_bus_device *dev)
+{
+	int result;
+
+	mutex_lock(&usage_hack.mutex);
+
+	usage_hack.gpu--;
+	if (usage_hack.gpu) {
+		result = 0;
+		goto done;
+	}
+
+	result = lv1_gpu_close();
+	BUG_ON(result);
+
+done:
+	mutex_unlock(&usage_hack.mutex);
+	return result;
+}
+
+int ps3_open_hv_device(struct ps3_system_bus_device *dev)
+{
+	BUG_ON(!dev);
+	pr_debug("%s:%d: match_id: %u\n", __func__, __LINE__, dev->match_id);
+
+	switch (dev->match_id) {
+	case PS3_MATCH_ID_EHCI:
+	case PS3_MATCH_ID_OHCI:
+	case PS3_MATCH_ID_GELIC:
+	case PS3_MATCH_ID_STOR_DISK:
+	case PS3_MATCH_ID_STOR_ROM:
+	case PS3_MATCH_ID_STOR_FLASH:
+		return ps3_open_hv_device_sb(dev);
+
+	case PS3_MATCH_ID_SOUND:
+	case PS3_MATCH_ID_GPU:
+		return ps3_open_hv_device_gpu(dev);
+
+	case PS3_MATCH_ID_AV_SETTINGS:
+	case PS3_MATCH_ID_SYSTEM_MANAGER:
+		pr_debug("%s:%d: unsupported match_id: %u\n", __func__,
+			__LINE__, dev->match_id);
+		pr_debug("%s:%d: bus_id: %llu\n", __func__, __LINE__,
+			dev->bus_id);
+		BUG();
+		return -EINVAL;
+
+	default:
+		break;
+	}
+
+	pr_debug("%s:%d: unknown match_id: %u\n", __func__, __LINE__,
+		dev->match_id);
+	BUG();
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(ps3_open_hv_device);
+
+int ps3_close_hv_device(struct ps3_system_bus_device *dev)
+{
+	BUG_ON(!dev);
+	pr_debug("%s:%d: match_id: %u\n", __func__, __LINE__, dev->match_id);
+
+	switch (dev->match_id) {
+	case PS3_MATCH_ID_EHCI:
+	case PS3_MATCH_ID_OHCI:
+	case PS3_MATCH_ID_GELIC:
+	case PS3_MATCH_ID_STOR_DISK:
+	case PS3_MATCH_ID_STOR_ROM:
+	case PS3_MATCH_ID_STOR_FLASH:
+		return ps3_close_hv_device_sb(dev);
+
+	case PS3_MATCH_ID_SOUND:
+	case PS3_MATCH_ID_GPU:
+		return ps3_close_hv_device_gpu(dev);
+
+	case PS3_MATCH_ID_AV_SETTINGS:
+	case PS3_MATCH_ID_SYSTEM_MANAGER:
+		pr_debug("%s:%d: unsupported match_id: %u\n", __func__,
+			__LINE__, dev->match_id);
+		pr_debug("%s:%d: bus_id: %llu\n", __func__, __LINE__,
+			dev->bus_id);
+		BUG();
+		return -EINVAL;
+
+	default:
+		break;
+	}
+
+	pr_debug("%s:%d: unknown match_id: %u\n", __func__, __LINE__,
+		dev->match_id);
+	BUG();
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(ps3_close_hv_device);
+
+#define dump_mmio_region(_a) _dump_mmio_region(_a, __func__, __LINE__)
+static void _dump_mmio_region(const struct ps3_mmio_region* r,
+	const char* func, int line)
+{
+	pr_debug("%s:%d: dev       %llu:%llu\n", func, line, r->dev->bus_id,
+		r->dev->dev_id);
+	pr_debug("%s:%d: bus_addr  %lxh\n", func, line, r->bus_addr);
+	pr_debug("%s:%d: len       %lxh\n", func, line, r->len);
+	pr_debug("%s:%d: lpar_addr %lxh\n", func, line, r->lpar_addr);
+}
+
+static int ps3_sb_mmio_region_create(struct ps3_mmio_region *r)
+{
+	int result;
+	u64 lpar_addr;
+
+	result = lv1_map_device_mmio_region(r->dev->bus_id, r->dev->dev_id,
+		r->bus_addr, r->len, r->page_size, &lpar_addr);
+	r->lpar_addr = lpar_addr;
+
+	if (result) {
+		pr_debug("%s:%d: lv1_map_device_mmio_region failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		r->lpar_addr = 0;
+	}
+
+	dump_mmio_region(r);
+	return result;
+}
+
+static int ps3_ioc0_mmio_region_create(struct ps3_mmio_region *r)
+{
+	/* device specific; do nothing currently */
+	return 0;
+}
+
+int ps3_mmio_region_create(struct ps3_mmio_region *r)
+{
+	return r->mmio_ops->create(r);
+}
+EXPORT_SYMBOL_GPL(ps3_mmio_region_create);
+
+static int ps3_sb_free_mmio_region(struct ps3_mmio_region *r)
+{
+	int result;
+
+	dump_mmio_region(r);
+	result = lv1_unmap_device_mmio_region(r->dev->bus_id, r->dev->dev_id,
+		r->lpar_addr);
+
+	if (result)
+		pr_debug("%s:%d: lv1_unmap_device_mmio_region failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+
+	r->lpar_addr = 0;
+	return result;
+}
+
+static int ps3_ioc0_free_mmio_region(struct ps3_mmio_region *r)
+{
+	/* device specific; do nothing currently */
+	return 0;
+}
+
+
+int ps3_free_mmio_region(struct ps3_mmio_region *r)
+{
+	return r->mmio_ops->free(r);
+}
+
+EXPORT_SYMBOL_GPL(ps3_free_mmio_region);
+
+static const struct ps3_mmio_region_ops ps3_mmio_sb_region_ops = {
+	.create = ps3_sb_mmio_region_create,
+	.free = ps3_sb_free_mmio_region
+};
+
+static const struct ps3_mmio_region_ops ps3_mmio_ioc0_region_ops = {
+	.create = ps3_ioc0_mmio_region_create,
+	.free = ps3_ioc0_free_mmio_region
+};
+
+int ps3_mmio_region_init(struct ps3_system_bus_device *dev,
+	struct ps3_mmio_region *r, unsigned long bus_addr, unsigned long len,
+	enum ps3_mmio_page_size page_size)
+{
+	r->dev = dev;
+	r->bus_addr = bus_addr;
+	r->len = len;
+	r->page_size = page_size;
+	switch (dev->dev_type) {
+	case PS3_DEVICE_TYPE_SB:
+		r->mmio_ops = &ps3_mmio_sb_region_ops;
+		break;
+	case PS3_DEVICE_TYPE_IOC0:
+		r->mmio_ops = &ps3_mmio_ioc0_region_ops;
+		break;
+	default:
+		BUG();
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ps3_mmio_region_init);
+
+static int ps3_system_bus_match(struct device *_dev,
+	struct device_driver *_drv)
+{
+	int result;
+	struct ps3_system_bus_driver *drv = ps3_drv_to_system_bus_drv(_drv);
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+
+	if (!dev->match_sub_id)
+		result = dev->match_id == drv->match_id;
+	else
+		result = dev->match_sub_id == drv->match_sub_id &&
+			dev->match_id == drv->match_id;
+
+	if (result)
+		pr_info("%s:%d: dev=%u.%u(%s), drv=%u.%u(%s): match\n",
+			__func__, __LINE__,
+			dev->match_id, dev->match_sub_id, dev_name(&dev->core),
+			drv->match_id, drv->match_sub_id, drv->core.name);
+	else
+		pr_debug("%s:%d: dev=%u.%u(%s), drv=%u.%u(%s): miss\n",
+			__func__, __LINE__,
+			dev->match_id, dev->match_sub_id, dev_name(&dev->core),
+			drv->match_id, drv->match_sub_id, drv->core.name);
+
+	return result;
+}
+
+static int ps3_system_bus_probe(struct device *_dev)
+{
+	int result = 0;
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	struct ps3_system_bus_driver *drv;
+
+	BUG_ON(!dev);
+	dev_dbg(_dev, "%s:%d\n", __func__, __LINE__);
+
+	drv = ps3_system_bus_dev_to_system_bus_drv(dev);
+	BUG_ON(!drv);
+
+	if (drv->probe)
+		result = drv->probe(dev);
+	else
+		pr_debug("%s:%d: %s no probe method\n", __func__, __LINE__,
+			dev_name(&dev->core));
+
+	pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, dev_name(&dev->core));
+	return result;
+}
+
+static void ps3_system_bus_remove(struct device *_dev)
+{
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	struct ps3_system_bus_driver *drv;
+
+	BUG_ON(!dev);
+	dev_dbg(_dev, "%s:%d\n", __func__, __LINE__);
+
+	drv = ps3_system_bus_dev_to_system_bus_drv(dev);
+	BUG_ON(!drv);
+
+	if (drv->remove)
+		drv->remove(dev);
+	else
+		dev_dbg(&dev->core, "%s:%d %s: no remove method\n",
+			__func__, __LINE__, drv->core.name);
+
+	pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, dev_name(&dev->core));
+}
+
+static void ps3_system_bus_shutdown(struct device *_dev)
+{
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	struct ps3_system_bus_driver *drv;
+
+	BUG_ON(!dev);
+
+	dev_dbg(&dev->core, " -> %s:%d: match_id %d\n", __func__, __LINE__,
+		dev->match_id);
+
+	if (!dev->core.driver) {
+		dev_dbg(&dev->core, "%s:%d: no driver bound\n", __func__,
+			__LINE__);
+		return;
+	}
+
+	drv = ps3_system_bus_dev_to_system_bus_drv(dev);
+
+	BUG_ON(!drv);
+
+	dev_dbg(&dev->core, "%s:%d: %s -> %s\n", __func__, __LINE__,
+		dev_name(&dev->core), drv->core.name);
+
+	if (drv->shutdown)
+		drv->shutdown(dev);
+	else if (drv->remove) {
+		dev_dbg(&dev->core, "%s:%d %s: no shutdown, calling remove\n",
+			__func__, __LINE__, drv->core.name);
+		drv->remove(dev);
+	} else {
+		dev_dbg(&dev->core, "%s:%d %s: no shutdown method\n",
+			__func__, __LINE__, drv->core.name);
+		BUG();
+	}
+
+	dev_dbg(&dev->core, " <- %s:%d\n", __func__, __LINE__);
+}
+
+static int ps3_system_bus_uevent(const struct device *_dev, struct kobj_uevent_env *env)
+{
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+
+	if (add_uevent_var(env, "MODALIAS=ps3:%d:%d", dev->match_id,
+			   dev->match_sub_id))
+		return -ENOMEM;
+	return 0;
+}
+
+static ssize_t modalias_show(struct device *_dev, struct device_attribute *a,
+	char *buf)
+{
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	int len = snprintf(buf, PAGE_SIZE, "ps3:%d:%d\n", dev->match_id,
+			   dev->match_sub_id);
+
+	return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *ps3_system_bus_dev_attrs[] = {
+	&dev_attr_modalias.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(ps3_system_bus_dev);
+
+static struct bus_type ps3_system_bus_type = {
+	.name = "ps3_system_bus",
+	.match = ps3_system_bus_match,
+	.uevent = ps3_system_bus_uevent,
+	.probe = ps3_system_bus_probe,
+	.remove = ps3_system_bus_remove,
+	.shutdown = ps3_system_bus_shutdown,
+	.dev_groups = ps3_system_bus_dev_groups,
+};
+
+static int __init ps3_system_bus_init(void)
+{
+	int result;
+
+	if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
+		return -ENODEV;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	mutex_init(&usage_hack.mutex);
+
+	result = device_register(&ps3_system_bus);
+	BUG_ON(result);
+
+	result = bus_register(&ps3_system_bus_type);
+	BUG_ON(result);
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+
+core_initcall(ps3_system_bus_init);
+
+/* Allocates a contiguous real buffer and creates mappings over it.
+ * Returns the virtual address of the buffer and sets dma_handle
+ * to the dma address (mapping) of the first page.
+ */
+static void * ps3_alloc_coherent(struct device *_dev, size_t size,
+				 dma_addr_t *dma_handle, gfp_t flag,
+				 unsigned long attrs)
+{
+	int result;
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	unsigned long virt_addr;
+
+	flag &= ~(__GFP_DMA | __GFP_HIGHMEM);
+	flag |= __GFP_ZERO;
+
+	virt_addr = __get_free_pages(flag, get_order(size));
+
+	if (!virt_addr) {
+		pr_debug("%s:%d: get_free_pages failed\n", __func__, __LINE__);
+		goto clean_none;
+	}
+
+	result = ps3_dma_map(dev->d_region, virt_addr, size, dma_handle,
+			     CBE_IOPTE_PP_W | CBE_IOPTE_PP_R |
+			     CBE_IOPTE_SO_RW | CBE_IOPTE_M);
+
+	if (result) {
+		pr_debug("%s:%d: ps3_dma_map failed (%d)\n",
+			__func__, __LINE__, result);
+		BUG_ON("check region type");
+		goto clean_alloc;
+	}
+
+	return (void*)virt_addr;
+
+clean_alloc:
+	free_pages(virt_addr, get_order(size));
+clean_none:
+	dma_handle = NULL;
+	return NULL;
+}
+
+static void ps3_free_coherent(struct device *_dev, size_t size, void *vaddr,
+			      dma_addr_t dma_handle, unsigned long attrs)
+{
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+
+	ps3_dma_unmap(dev->d_region, dma_handle, size);
+	free_pages((unsigned long)vaddr, get_order(size));
+}
+
+/* Creates TCEs for a user provided buffer.  The user buffer must be
+ * contiguous real kernel storage (not vmalloc).  The address passed here
+ * comprises a page address and offset into that page. The dma_addr_t
+ * returned will point to the same byte within the page as was passed in.
+ */
+
+static dma_addr_t ps3_sb_map_page(struct device *_dev, struct page *page,
+	unsigned long offset, size_t size, enum dma_data_direction direction,
+	unsigned long attrs)
+{
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	int result;
+	dma_addr_t bus_addr;
+	void *ptr = page_address(page) + offset;
+
+	result = ps3_dma_map(dev->d_region, (unsigned long)ptr, size,
+			     &bus_addr,
+			     CBE_IOPTE_PP_R | CBE_IOPTE_PP_W |
+			     CBE_IOPTE_SO_RW | CBE_IOPTE_M);
+
+	if (result) {
+		pr_debug("%s:%d: ps3_dma_map failed (%d)\n",
+			__func__, __LINE__, result);
+	}
+
+	return bus_addr;
+}
+
+static dma_addr_t ps3_ioc0_map_page(struct device *_dev, struct page *page,
+				    unsigned long offset, size_t size,
+				    enum dma_data_direction direction,
+				    unsigned long attrs)
+{
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	int result;
+	dma_addr_t bus_addr;
+	u64 iopte_flag;
+	void *ptr = page_address(page) + offset;
+
+	iopte_flag = CBE_IOPTE_M;
+	switch (direction) {
+	case DMA_BIDIRECTIONAL:
+		iopte_flag |= CBE_IOPTE_PP_R | CBE_IOPTE_PP_W | CBE_IOPTE_SO_RW;
+		break;
+	case DMA_TO_DEVICE:
+		iopte_flag |= CBE_IOPTE_PP_R | CBE_IOPTE_SO_R;
+		break;
+	case DMA_FROM_DEVICE:
+		iopte_flag |= CBE_IOPTE_PP_W | CBE_IOPTE_SO_RW;
+		break;
+	default:
+		/* not happened */
+		BUG();
+	}
+	result = ps3_dma_map(dev->d_region, (unsigned long)ptr, size,
+			     &bus_addr, iopte_flag);
+
+	if (result) {
+		pr_debug("%s:%d: ps3_dma_map failed (%d)\n",
+			__func__, __LINE__, result);
+	}
+	return bus_addr;
+}
+
+static void ps3_unmap_page(struct device *_dev, dma_addr_t dma_addr,
+	size_t size, enum dma_data_direction direction, unsigned long attrs)
+{
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	int result;
+
+	result = ps3_dma_unmap(dev->d_region, dma_addr, size);
+
+	if (result) {
+		pr_debug("%s:%d: ps3_dma_unmap failed (%d)\n",
+			__func__, __LINE__, result);
+	}
+}
+
+static int ps3_sb_map_sg(struct device *_dev, struct scatterlist *sgl,
+	int nents, enum dma_data_direction direction, unsigned long attrs)
+{
+#if defined(CONFIG_PS3_DYNAMIC_DMA)
+	BUG_ON("do");
+	return -EPERM;
+#else
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i) {
+		int result = ps3_dma_map(dev->d_region, sg_phys(sg),
+					sg->length, &sg->dma_address, 0);
+
+		if (result) {
+			pr_debug("%s:%d: ps3_dma_map failed (%d)\n",
+				__func__, __LINE__, result);
+			return -EINVAL;
+		}
+
+		sg->dma_length = sg->length;
+	}
+
+	return nents;
+#endif
+}
+
+static int ps3_ioc0_map_sg(struct device *_dev, struct scatterlist *sg,
+			   int nents,
+			   enum dma_data_direction direction,
+			   unsigned long attrs)
+{
+	BUG();
+	return -EINVAL;
+}
+
+static void ps3_sb_unmap_sg(struct device *_dev, struct scatterlist *sg,
+	int nents, enum dma_data_direction direction, unsigned long attrs)
+{
+#if defined(CONFIG_PS3_DYNAMIC_DMA)
+	BUG_ON("do");
+#endif
+}
+
+static void ps3_ioc0_unmap_sg(struct device *_dev, struct scatterlist *sg,
+			    int nents, enum dma_data_direction direction,
+			    unsigned long attrs)
+{
+	BUG();
+}
+
+static int ps3_dma_supported(struct device *_dev, u64 mask)
+{
+	return mask >= DMA_BIT_MASK(32);
+}
+
+static const struct dma_map_ops ps3_sb_dma_ops = {
+	.alloc = ps3_alloc_coherent,
+	.free = ps3_free_coherent,
+	.map_sg = ps3_sb_map_sg,
+	.unmap_sg = ps3_sb_unmap_sg,
+	.dma_supported = ps3_dma_supported,
+	.map_page = ps3_sb_map_page,
+	.unmap_page = ps3_unmap_page,
+	.mmap = dma_common_mmap,
+	.get_sgtable = dma_common_get_sgtable,
+	.alloc_pages = dma_common_alloc_pages,
+	.free_pages = dma_common_free_pages,
+};
+
+static const struct dma_map_ops ps3_ioc0_dma_ops = {
+	.alloc = ps3_alloc_coherent,
+	.free = ps3_free_coherent,
+	.map_sg = ps3_ioc0_map_sg,
+	.unmap_sg = ps3_ioc0_unmap_sg,
+	.dma_supported = ps3_dma_supported,
+	.map_page = ps3_ioc0_map_page,
+	.unmap_page = ps3_unmap_page,
+	.mmap = dma_common_mmap,
+	.get_sgtable = dma_common_get_sgtable,
+	.alloc_pages = dma_common_alloc_pages,
+	.free_pages = dma_common_free_pages,
+};
+
+/**
+ * ps3_system_bus_release_device - remove a device from the system bus
+ */
+
+static void ps3_system_bus_release_device(struct device *_dev)
+{
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	kfree(dev);
+}
+
+/**
+ * ps3_system_bus_device_register - add a device to the system bus
+ *
+ * ps3_system_bus_device_register() expects the dev object to be allocated
+ * dynamically by the caller.  The system bus takes ownership of the dev
+ * object and frees the object in ps3_system_bus_release_device().
+ */
+
+int ps3_system_bus_device_register(struct ps3_system_bus_device *dev)
+{
+	int result;
+	static unsigned int dev_ioc0_count;
+	static unsigned int dev_sb_count;
+	static unsigned int dev_vuart_count;
+	static unsigned int dev_lpm_count;
+
+	if (!dev->core.parent)
+		dev->core.parent = &ps3_system_bus;
+	dev->core.bus = &ps3_system_bus_type;
+	dev->core.release = ps3_system_bus_release_device;
+
+	switch (dev->dev_type) {
+	case PS3_DEVICE_TYPE_IOC0:
+		dev->core.dma_ops = &ps3_ioc0_dma_ops;
+		dev_set_name(&dev->core, "ioc0_%02x", ++dev_ioc0_count);
+		break;
+	case PS3_DEVICE_TYPE_SB:
+		dev->core.dma_ops = &ps3_sb_dma_ops;
+		dev_set_name(&dev->core, "sb_%02x", ++dev_sb_count);
+
+		break;
+	case PS3_DEVICE_TYPE_VUART:
+		dev_set_name(&dev->core, "vuart_%02x", ++dev_vuart_count);
+		break;
+	case PS3_DEVICE_TYPE_LPM:
+		dev_set_name(&dev->core, "lpm_%02x", ++dev_lpm_count);
+		break;
+	default:
+		BUG();
+	}
+
+	dev->core.of_node = NULL;
+	set_dev_node(&dev->core, 0);
+
+	pr_debug("%s:%d add %s\n", __func__, __LINE__, dev_name(&dev->core));
+
+	result = device_register(&dev->core);
+	return result;
+}
+
+EXPORT_SYMBOL_GPL(ps3_system_bus_device_register);
+
+int ps3_system_bus_driver_register(struct ps3_system_bus_driver *drv)
+{
+	int result;
+
+	pr_debug(" -> %s:%d: %s\n", __func__, __LINE__, drv->core.name);
+
+	if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
+		return -ENODEV;
+
+	drv->core.bus = &ps3_system_bus_type;
+
+	result = driver_register(&drv->core);
+	pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, drv->core.name);
+	return result;
+}
+
+EXPORT_SYMBOL_GPL(ps3_system_bus_driver_register);
+
+void ps3_system_bus_driver_unregister(struct ps3_system_bus_driver *drv)
+{
+	pr_debug(" -> %s:%d: %s\n", __func__, __LINE__, drv->core.name);
+	driver_unregister(&drv->core);
+	pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, drv->core.name);
+}
+
+EXPORT_SYMBOL_GPL(ps3_system_bus_driver_unregister);
diff --git a/arch/powerpc/platforms/ps3/time.c b/arch/powerpc/platforms/ps3/time.c
new file mode 100644
index 0000000000..c9bfc113a9
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/time.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  PS3 time and rtc routines.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+
+#include <asm/firmware.h>
+#include <asm/lv1call.h>
+#include <asm/ps3.h>
+
+#include "platform.h"
+
+void __init ps3_calibrate_decr(void)
+{
+	int result;
+	u64 tmp;
+
+	result = ps3_repository_read_be_tb_freq(0, &tmp);
+	BUG_ON(result);
+
+	ppc_tb_freq = tmp;
+	ppc_proc_freq = ppc_tb_freq * 40;
+}
+
+static u64 read_rtc(void)
+{
+	int result;
+	u64 rtc_val;
+	u64 tb_val;
+
+	result = lv1_get_rtc(&rtc_val, &tb_val);
+	BUG_ON(result);
+
+	return rtc_val;
+}
+
+time64_t __init ps3_get_boot_time(void)
+{
+	return read_rtc() + ps3_os_area_get_rtc_diff();
+}
+
+static int __init ps3_rtc_init(void)
+{
+	struct platform_device *pdev;
+
+	if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
+		return -ENODEV;
+
+	pdev = platform_device_register_simple("rtc-ps3", -1, NULL, 0);
+
+	return PTR_ERR_OR_ZERO(pdev);
+}
+device_initcall(ps3_rtc_init);
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
new file mode 100644
index 0000000000..4ebf2ef284
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -0,0 +1,186 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_PSERIES
+	depends on PPC64 && PPC_BOOK3S
+	bool "IBM pSeries & new (POWER5-based) iSeries"
+	select HAVE_PCSPKR_PLATFORM
+	select MPIC
+	select OF_DYNAMIC
+	select FORCE_PCI
+	select PCI_MSI
+	select GENERIC_ALLOCATOR
+	select PPC_XICS
+	select PPC_XIVE_SPAPR
+	select PPC_ICP_NATIVE
+	select PPC_ICP_HV
+	select PPC_ICS_RTAS
+	select PPC_I8259
+	select PPC_RTAS
+	select PPC_RTAS_DAEMON
+	select RTAS_ERROR_LOGGING
+	select PPC_UDBG_16550
+	select PPC_DOORBELL
+	select HOTPLUG_CPU
+	select FORCE_SMP
+	select SWIOTLB
+	select ARCH_SUPPORTS_PER_VMA_LOCK
+	default y
+
+config PARAVIRT
+	bool
+
+config PARAVIRT_SPINLOCKS
+	bool
+
+config PARAVIRT_TIME_ACCOUNTING
+	select PARAVIRT
+	bool
+
+config PPC_SPLPAR
+	bool "Support for shared-processor logical partitions"
+	depends on PPC_PSERIES
+	select PARAVIRT_SPINLOCKS if PPC_QUEUED_SPINLOCKS
+	select PARAVIRT_TIME_ACCOUNTING if VIRT_CPU_ACCOUNTING_GEN
+	default y
+	help
+	  Enabling this option will make the kernel run more efficiently
+	  on logically-partitioned pSeries systems which use shared
+	  processors, that is, which share physical processors between
+	  two or more partitions.
+
+	  Say Y if you are unsure.
+
+config DTL
+	bool "Dispatch Trace Log"
+	depends on PPC_SPLPAR && DEBUG_FS
+	help
+	  SPLPAR machines can log hypervisor preempt & dispatch events to a
+	  kernel buffer. Saying Y here will enable logging these events,
+	  which are accessible through a debugfs file.
+
+	  Say N if you are unsure.
+
+config PSERIES_ENERGY
+	tristate "pSeries energy management capabilities driver"
+	depends on PPC_PSERIES
+	default y
+	help
+	  Provides interface to platform energy management capabilities
+	  on supported PSERIES platforms.
+	  Provides: /sys/devices/system/cpu/pseries_(de)activation_hint_list
+	  and /sys/devices/system/cpu/cpuN/pseries_(de)activation_hint
+
+config IO_EVENT_IRQ
+	bool "IO Event Interrupt support"
+	depends on PPC_PSERIES
+	default y
+	help
+	  Select this option, if you want to enable support for IO Event
+	  interrupts. IO event interrupt is a mechanism provided by RTAS
+	  to return information about hardware error and non-error events
+	  which may need OS attention. RTAS returns events for multiple
+	  event types and scopes. Device drivers can register their handlers
+	  to receive events.
+
+	  This option will only enable the IO event platform code. You
+	  will still need to enable or compile the actual drivers
+	  that use this infrastructure to handle IO event interrupts.
+
+	  Say Y if you are unsure.
+
+config LPARCFG
+	bool "LPAR Configuration Data"
+	depends on PPC_PSERIES
+	help
+	  Provide system capacity information via human readable
+	  <key word>=<value> pairs through a /proc/ppc64/lparcfg interface.
+
+config PPC_PSERIES_DEBUG
+	depends on PPC_PSERIES && PPC_EARLY_DEBUG
+	bool "Enable extra debug logging in platforms/pseries"
+	default y
+	help
+	  Say Y here if you want the pseries core to produce a bunch of
+	  debug messages to the system log. Select this if you are having a
+	  problem with the pseries core and want to see more of what is
+	  going on. This does not enable debugging in lpar.c, which must
+	  be manually done due to its verbosity.
+
+config PPC_SMLPAR
+	bool "Support for shared-memory logical partitions"
+	depends on PPC_PSERIES
+	select LPARCFG
+	help
+	  Select this option to enable shared memory partition support.
+	  With this option a system running in an LPAR can be given more
+	  memory than physically available and will allow firmware to
+	  balance memory across many LPARs.
+
+config CMM
+	tristate "Collaborative memory management"
+	depends on PPC_SMLPAR
+	select MEMORY_BALLOON
+	default y
+	help
+	  Select this option, if you want to enable the kernel interface
+	  to reduce the memory size of the system. This is accomplished
+	  by allocating pages of memory and put them "on hold". This only
+	  makes sense for a system running in an LPAR where the unused pages
+	  will be reused for other LPARs. The interface allows firmware to
+	  balance memory across many LPARs.
+
+config HV_PERF_CTRS
+	bool "Hypervisor supplied PMU events (24x7 & GPCI)"
+	default y
+	depends on PERF_EVENTS && PPC_PSERIES
+	help
+	  Enable access to hypervisor supplied counters in perf. Currently,
+	  this enables code that uses the hcall GetPerfCounterInfo and 24x7
+	  interfaces to retrieve counters. GPCI exists on Power 6 and later
+	  systems. 24x7 is available on Power 8 and later systems.
+
+	  If unsure, select Y.
+
+config IBMVIO
+	depends on PPC_PSERIES
+	bool
+	default y
+
+config IBMEBUS
+	depends on PPC_PSERIES && !CPU_LITTLE_ENDIAN
+	bool "Support for GX bus based adapters"
+	help
+	  Bus device driver for GX bus based adapters.
+
+config PSERIES_PLPKS
+	depends on PPC_PSERIES
+	select NLS
+	bool
+	# PowerVM provides an isolated Platform Keystore (PKS) storage
+	# allocation for each LPAR with individually managed access
+	# controls to store sensitive information securely. It can be
+	# used to store asymmetric public keys or secrets as required
+	# by different usecases.
+	#
+	# This option is selected by in-kernel consumers that require
+	# access to the PKS.
+
+config PAPR_SCM
+	depends on PPC_PSERIES && MEMORY_HOTPLUG && LIBNVDIMM
+	tristate "Support for the PAPR Storage Class Memory interface"
+	help
+	  Enable access to hypervisor provided storage class memory.
+
+config PPC_SVM
+	bool "Secure virtual machine (SVM) support for POWER"
+	depends on PPC_PSERIES
+	select SWIOTLB
+	select ARCH_HAS_MEM_ENCRYPT
+	select ARCH_HAS_FORCE_DMA_UNENCRYPTED
+	select ARCH_HAS_CC_PLATFORM
+	help
+	 There are certain POWER platforms which support secure guests using
+	 the Protected Execution Facility, with the help of an Ultravisor
+	 executing below the hypervisor layer. This enables support for
+	 those guests.
+
+	 If unsure, say "N".
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
new file mode 100644
index 0000000000..53c3b91af2
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+ccflags-$(CONFIG_PPC64)			:= $(NO_MINIMAL_TOC)
+ccflags-$(CONFIG_PPC_PSERIES_DEBUG)	+= -DDEBUG
+
+obj-y			:= lpar.o hvCall.o nvram.o reconfig.o \
+			   of_helpers.o rtas-work-area.o papr-sysparm.o \
+			   setup.o iommu.o event_sources.o ras.o \
+			   firmware.o power.o dlpar.o mobility.o rng.o \
+			   pci.o pci_dlpar.o eeh_pseries.o msi.o \
+			   papr_platform_attributes.o dtl.o
+obj-$(CONFIG_SMP)	+= smp.o
+obj-$(CONFIG_KEXEC_CORE)	+= kexec.o
+obj-$(CONFIG_PSERIES_ENERGY)	+= pseries_energy.o
+
+obj-$(CONFIG_HOTPLUG_CPU)	+= hotplug-cpu.o
+obj-$(CONFIG_MEMORY_HOTPLUG)	+= hotplug-memory.o pmem.o
+
+obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
+obj-$(CONFIG_HVCS)		+= hvcserver.o
+obj-$(CONFIG_HCALL_STATS)	+= hvCall_inst.o
+obj-$(CONFIG_CMM)		+= cmm.o
+obj-$(CONFIG_IO_EVENT_IRQ)	+= io_event_irq.o
+obj-$(CONFIG_LPARCFG)		+= lparcfg.o
+obj-$(CONFIG_IBMVIO)		+= vio.o
+obj-$(CONFIG_IBMEBUS)		+= ibmebus.o
+obj-$(CONFIG_PAPR_SCM)		+= papr_scm.o
+obj-$(CONFIG_PPC_SPLPAR)	+= vphn.o
+obj-$(CONFIG_PPC_SVM)		+= svm.o
+obj-$(CONFIG_FA_DUMP)		+= rtas-fadump.o
+obj-$(CONFIG_PSERIES_PLPKS)	+= plpks.o
+obj-$(CONFIG_PPC_SECURE_BOOT)	+= plpks-secvar.o
+obj-$(CONFIG_SUSPEND)		+= suspend.o
+obj-$(CONFIG_PPC_VAS)		+= vas.o vas-sysfs.o
+
+obj-$(CONFIG_ARCH_HAS_CC_PLATFORM)	+= cc_platform.o
+
+# nothing that operates in real mode is safe for KASAN
+KASAN_SANITIZE_ras.o := n
+KASAN_SANITIZE_kexec.o := n
diff --git a/arch/powerpc/platforms/pseries/cc_platform.c b/arch/powerpc/platforms/pseries/cc_platform.c
new file mode 100644
index 0000000000..e8021af83a
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/cc_platform.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Confidential Computing Platform Capability checks
+ *
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ */
+
+#include <linux/export.h>
+#include <linux/cc_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/svm.h>
+
+bool cc_platform_has(enum cc_attr attr)
+{
+	switch (attr) {
+	case CC_ATTR_MEM_ENCRYPT:
+		return is_secure_guest();
+
+	default:
+		return false;
+	}
+}
+EXPORT_SYMBOL_GPL(cc_platform_has);
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
new file mode 100644
index 0000000000..5f4037c1d7
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -0,0 +1,663 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Collaborative memory management interface.
+ *
+ * Copyright (C) 2008 IBM Corporation
+ * Author(s): Brian King (brking@linux.vnet.ibm.com),
+ */
+
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/oom.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/stringify.h>
+#include <linux/swap.h>
+#include <linux/device.h>
+#include <linux/balloon_compaction.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/mmu.h>
+#include <linux/uaccess.h>
+#include <linux/memory.h>
+#include <asm/plpar_wrappers.h>
+
+#include "pseries.h"
+
+#define CMM_DRIVER_VERSION	"1.0.0"
+#define CMM_DEFAULT_DELAY	1
+#define CMM_HOTPLUG_DELAY	5
+#define CMM_DEBUG			0
+#define CMM_DISABLE		0
+#define CMM_OOM_KB		1024
+#define CMM_MIN_MEM_MB		256
+#define KB2PAGES(_p)		((_p)>>(PAGE_SHIFT-10))
+#define PAGES2KB(_p)		((_p)<<(PAGE_SHIFT-10))
+
+#define CMM_MEM_HOTPLUG_PRI	1
+
+static unsigned int delay = CMM_DEFAULT_DELAY;
+static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
+static unsigned int oom_kb = CMM_OOM_KB;
+static unsigned int cmm_debug = CMM_DEBUG;
+static unsigned int cmm_disabled = CMM_DISABLE;
+static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
+static bool __read_mostly simulate;
+static unsigned long simulate_loan_target_kb;
+static struct device cmm_dev;
+
+MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(CMM_DRIVER_VERSION);
+
+module_param_named(delay, delay, uint, 0644);
+MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
+		 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
+module_param_named(hotplug_delay, hotplug_delay, uint, 0644);
+MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove "
+		 "before loaning resumes. "
+		 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
+module_param_named(oom_kb, oom_kb, uint, 0644);
+MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
+		 "[Default=" __stringify(CMM_OOM_KB) "]");
+module_param_named(min_mem_mb, min_mem_mb, ulong, 0644);
+MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
+		 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
+module_param_named(debug, cmm_debug, uint, 0644);
+MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
+		 "[Default=" __stringify(CMM_DEBUG) "]");
+module_param_named(simulate, simulate, bool, 0444);
+MODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw).");
+
+#define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
+
+static atomic_long_t loaned_pages;
+static unsigned long loaned_pages_target;
+static unsigned long oom_freed_pages;
+
+static DEFINE_MUTEX(hotplug_mutex);
+static int hotplug_occurred; /* protected by the hotplug mutex */
+
+static struct task_struct *cmm_thread_ptr;
+static struct balloon_dev_info b_dev_info;
+
+static long plpar_page_set_loaned(struct page *page)
+{
+	const unsigned long vpa = page_to_phys(page);
+	unsigned long cmo_page_sz = cmo_get_page_size();
+	long rc = 0;
+	int i;
+
+	if (unlikely(simulate))
+		return 0;
+
+	for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
+		rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0);
+
+	for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
+		plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE,
+				   vpa + i - cmo_page_sz, 0);
+
+	return rc;
+}
+
+static long plpar_page_set_active(struct page *page)
+{
+	const unsigned long vpa = page_to_phys(page);
+	unsigned long cmo_page_sz = cmo_get_page_size();
+	long rc = 0;
+	int i;
+
+	if (unlikely(simulate))
+		return 0;
+
+	for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
+		rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0);
+
+	for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
+		plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED,
+				   vpa + i - cmo_page_sz, 0);
+
+	return rc;
+}
+
+/**
+ * cmm_alloc_pages - Allocate pages and mark them as loaned
+ * @nr:	number of pages to allocate
+ *
+ * Return value:
+ * 	number of pages requested to be allocated which were not
+ **/
+static long cmm_alloc_pages(long nr)
+{
+	struct page *page;
+	long rc;
+
+	cmm_dbg("Begin request for %ld pages\n", nr);
+
+	while (nr) {
+		/* Exit if a hotplug operation is in progress or occurred */
+		if (mutex_trylock(&hotplug_mutex)) {
+			if (hotplug_occurred) {
+				mutex_unlock(&hotplug_mutex);
+				break;
+			}
+			mutex_unlock(&hotplug_mutex);
+		} else {
+			break;
+		}
+
+		page = balloon_page_alloc();
+		if (!page)
+			break;
+		rc = plpar_page_set_loaned(page);
+		if (rc) {
+			pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
+			__free_page(page);
+			break;
+		}
+
+		balloon_page_enqueue(&b_dev_info, page);
+		atomic_long_inc(&loaned_pages);
+		adjust_managed_page_count(page, -1);
+		nr--;
+	}
+
+	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
+	return nr;
+}
+
+/**
+ * cmm_free_pages - Free pages and mark them as active
+ * @nr:	number of pages to free
+ *
+ * Return value:
+ * 	number of pages requested to be freed which were not
+ **/
+static long cmm_free_pages(long nr)
+{
+	struct page *page;
+
+	cmm_dbg("Begin free of %ld pages.\n", nr);
+	while (nr) {
+		page = balloon_page_dequeue(&b_dev_info);
+		if (!page)
+			break;
+		plpar_page_set_active(page);
+		adjust_managed_page_count(page, 1);
+		__free_page(page);
+		atomic_long_dec(&loaned_pages);
+		nr--;
+	}
+	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
+	return nr;
+}
+
+/**
+ * cmm_oom_notify - OOM notifier
+ * @self:	notifier block struct
+ * @dummy:	not used
+ * @parm:	returned - number of pages freed
+ *
+ * Return value:
+ * 	NOTIFY_OK
+ **/
+static int cmm_oom_notify(struct notifier_block *self,
+			  unsigned long dummy, void *parm)
+{
+	unsigned long *freed = parm;
+	long nr = KB2PAGES(oom_kb);
+
+	cmm_dbg("OOM processing started\n");
+	nr = cmm_free_pages(nr);
+	loaned_pages_target = atomic_long_read(&loaned_pages);
+	*freed += KB2PAGES(oom_kb) - nr;
+	oom_freed_pages += KB2PAGES(oom_kb) - nr;
+	cmm_dbg("OOM processing complete\n");
+	return NOTIFY_OK;
+}
+
+/**
+ * cmm_get_mpp - Read memory performance parameters
+ *
+ * Makes hcall to query the current page loan request from the hypervisor.
+ *
+ * Return value:
+ * 	nothing
+ **/
+static void cmm_get_mpp(void)
+{
+	const long __loaned_pages = atomic_long_read(&loaned_pages);
+	const long total_pages = totalram_pages() + __loaned_pages;
+	int rc;
+	struct hvcall_mpp_data mpp_data;
+	signed long active_pages_target, page_loan_request, target;
+	signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
+
+	if (likely(!simulate)) {
+		rc = h_get_mpp(&mpp_data);
+		if (rc != H_SUCCESS)
+			return;
+		page_loan_request = div_s64((s64)mpp_data.loan_request,
+					    PAGE_SIZE);
+		target = page_loan_request + __loaned_pages;
+	} else {
+		target = KB2PAGES(simulate_loan_target_kb);
+		page_loan_request = target - __loaned_pages;
+	}
+
+	if (target < 0 || total_pages < min_mem_pages)
+		target = 0;
+
+	if (target > oom_freed_pages)
+		target -= oom_freed_pages;
+	else
+		target = 0;
+
+	active_pages_target = total_pages - target;
+
+	if (min_mem_pages > active_pages_target)
+		target = total_pages - min_mem_pages;
+
+	if (target < 0)
+		target = 0;
+
+	loaned_pages_target = target;
+
+	cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
+		page_loan_request, __loaned_pages, loaned_pages_target,
+		oom_freed_pages, totalram_pages());
+}
+
+static struct notifier_block cmm_oom_nb = {
+	.notifier_call = cmm_oom_notify
+};
+
+/**
+ * cmm_thread - CMM task thread
+ * @dummy:	not used
+ *
+ * Return value:
+ * 	0
+ **/
+static int cmm_thread(void *dummy)
+{
+	unsigned long timeleft;
+	long __loaned_pages;
+
+	while (1) {
+		timeleft = msleep_interruptible(delay * 1000);
+
+		if (kthread_should_stop() || timeleft)
+			break;
+
+		if (mutex_trylock(&hotplug_mutex)) {
+			if (hotplug_occurred) {
+				hotplug_occurred = 0;
+				mutex_unlock(&hotplug_mutex);
+				cmm_dbg("Hotplug operation has occurred, "
+						"loaning activity suspended "
+						"for %d seconds.\n",
+						hotplug_delay);
+				timeleft = msleep_interruptible(hotplug_delay *
+						1000);
+				if (kthread_should_stop() || timeleft)
+					break;
+				continue;
+			}
+			mutex_unlock(&hotplug_mutex);
+		} else {
+			cmm_dbg("Hotplug operation in progress, activity "
+					"suspended\n");
+			continue;
+		}
+
+		cmm_get_mpp();
+
+		__loaned_pages = atomic_long_read(&loaned_pages);
+		if (loaned_pages_target > __loaned_pages) {
+			if (cmm_alloc_pages(loaned_pages_target - __loaned_pages))
+				loaned_pages_target = __loaned_pages;
+		} else if (loaned_pages_target < __loaned_pages)
+			cmm_free_pages(__loaned_pages - loaned_pages_target);
+	}
+	return 0;
+}
+
+#define CMM_SHOW(name, format, args...)			\
+	static ssize_t show_##name(struct device *dev,	\
+				   struct device_attribute *attr,	\
+				   char *buf)			\
+	{							\
+		return sprintf(buf, format, ##args);		\
+	}							\
+	static DEVICE_ATTR(name, 0444, show_##name, NULL)
+
+CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages)));
+CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
+
+static ssize_t show_oom_pages(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
+}
+
+static ssize_t store_oom_pages(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	unsigned long val = simple_strtoul (buf, NULL, 10);
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (val != 0)
+		return -EBADMSG;
+
+	oom_freed_pages = 0;
+	return count;
+}
+
+static DEVICE_ATTR(oom_freed_kb, 0644,
+		   show_oom_pages, store_oom_pages);
+
+static struct device_attribute *cmm_attrs[] = {
+	&dev_attr_loaned_kb,
+	&dev_attr_loaned_target_kb,
+	&dev_attr_oom_freed_kb,
+};
+
+static DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644,
+			 simulate_loan_target_kb);
+
+static struct bus_type cmm_subsys = {
+	.name = "cmm",
+	.dev_name = "cmm",
+};
+
+static void cmm_release_device(struct device *dev)
+{
+}
+
+/**
+ * cmm_sysfs_register - Register with sysfs
+ *
+ * Return value:
+ * 	0 on success / other on failure
+ **/
+static int cmm_sysfs_register(struct device *dev)
+{
+	int i, rc;
+
+	if ((rc = subsys_system_register(&cmm_subsys, NULL)))
+		return rc;
+
+	dev->id = 0;
+	dev->bus = &cmm_subsys;
+	dev->release = cmm_release_device;
+
+	if ((rc = device_register(dev)))
+		goto subsys_unregister;
+
+	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
+		if ((rc = device_create_file(dev, cmm_attrs[i])))
+			goto fail;
+	}
+
+	if (!simulate)
+		return 0;
+	rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr);
+	if (rc)
+		goto fail;
+	return 0;
+
+fail:
+	while (--i >= 0)
+		device_remove_file(dev, cmm_attrs[i]);
+	device_unregister(dev);
+subsys_unregister:
+	bus_unregister(&cmm_subsys);
+	return rc;
+}
+
+/**
+ * cmm_unregister_sysfs - Unregister from sysfs
+ *
+ **/
+static void cmm_unregister_sysfs(struct device *dev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
+		device_remove_file(dev, cmm_attrs[i]);
+	device_unregister(dev);
+	bus_unregister(&cmm_subsys);
+}
+
+/**
+ * cmm_reboot_notifier - Make sure pages are not still marked as "loaned"
+ *
+ **/
+static int cmm_reboot_notifier(struct notifier_block *nb,
+			       unsigned long action, void *unused)
+{
+	if (action == SYS_RESTART) {
+		if (cmm_thread_ptr)
+			kthread_stop(cmm_thread_ptr);
+		cmm_thread_ptr = NULL;
+		cmm_free_pages(atomic_long_read(&loaned_pages));
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block cmm_reboot_nb = {
+	.notifier_call = cmm_reboot_notifier,
+};
+
+/**
+ * cmm_memory_cb - Handle memory hotplug notifier calls
+ * @self:	notifier block struct
+ * @action:	action to take
+ * @arg:	struct memory_notify data for handler
+ *
+ * Return value:
+ *	NOTIFY_OK or notifier error based on subfunction return value
+ *
+ **/
+static int cmm_memory_cb(struct notifier_block *self,
+			unsigned long action, void *arg)
+{
+	switch (action) {
+	case MEM_GOING_OFFLINE:
+		mutex_lock(&hotplug_mutex);
+		hotplug_occurred = 1;
+		break;
+	case MEM_OFFLINE:
+	case MEM_CANCEL_OFFLINE:
+		mutex_unlock(&hotplug_mutex);
+		cmm_dbg("Memory offline operation complete.\n");
+		break;
+	case MEM_GOING_ONLINE:
+	case MEM_ONLINE:
+	case MEM_CANCEL_ONLINE:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cmm_mem_nb = {
+	.notifier_call = cmm_memory_cb,
+	.priority = CMM_MEM_HOTPLUG_PRI
+};
+
+#ifdef CONFIG_BALLOON_COMPACTION
+static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
+			   struct page *newpage, struct page *page,
+			   enum migrate_mode mode)
+{
+	unsigned long flags;
+
+	/*
+	 * loan/"inflate" the newpage first.
+	 *
+	 * We might race against the cmm_thread who might discover after our
+	 * loan request that another page is to be unloaned. However, once
+	 * the cmm_thread runs again later, this error will automatically
+	 * be corrected.
+	 */
+	if (plpar_page_set_loaned(newpage)) {
+		/* Unlikely, but possible. Tell the caller not to retry now. */
+		pr_err_ratelimited("%s: Cannot set page to loaned.", __func__);
+		return -EBUSY;
+	}
+
+	/* balloon page list reference */
+	get_page(newpage);
+
+	/*
+	 * When we migrate a page to a different zone, we have to fixup the
+	 * count of both involved zones as we adjusted the managed page count
+	 * when inflating.
+	 */
+	if (page_zone(page) != page_zone(newpage)) {
+		adjust_managed_page_count(page, 1);
+		adjust_managed_page_count(newpage, -1);
+	}
+
+	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+	balloon_page_insert(b_dev_info, newpage);
+	balloon_page_delete(page);
+	b_dev_info->isolated_pages--;
+	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
+
+	/*
+	 * activate/"deflate" the old page. We ignore any errors just like the
+	 * other callers.
+	 */
+	plpar_page_set_active(page);
+
+	/* balloon page list reference */
+	put_page(page);
+
+	return MIGRATEPAGE_SUCCESS;
+}
+
+static void cmm_balloon_compaction_init(void)
+{
+	balloon_devinfo_init(&b_dev_info);
+	b_dev_info.migratepage = cmm_migratepage;
+}
+#else /* CONFIG_BALLOON_COMPACTION */
+static void cmm_balloon_compaction_init(void)
+{
+}
+#endif /* CONFIG_BALLOON_COMPACTION */
+
+/**
+ * cmm_init - Module initialization
+ *
+ * Return value:
+ * 	0 on success / other on failure
+ **/
+static int cmm_init(void)
+{
+	int rc;
+
+	if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
+		return -EOPNOTSUPP;
+
+	cmm_balloon_compaction_init();
+
+	rc = register_oom_notifier(&cmm_oom_nb);
+	if (rc < 0)
+		goto out_balloon_compaction;
+
+	if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
+		goto out_oom_notifier;
+
+	if ((rc = cmm_sysfs_register(&cmm_dev)))
+		goto out_reboot_notifier;
+
+	rc = register_memory_notifier(&cmm_mem_nb);
+	if (rc)
+		goto out_unregister_notifier;
+
+	if (cmm_disabled)
+		return 0;
+
+	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
+	if (IS_ERR(cmm_thread_ptr)) {
+		rc = PTR_ERR(cmm_thread_ptr);
+		goto out_unregister_notifier;
+	}
+
+	return 0;
+out_unregister_notifier:
+	unregister_memory_notifier(&cmm_mem_nb);
+	cmm_unregister_sysfs(&cmm_dev);
+out_reboot_notifier:
+	unregister_reboot_notifier(&cmm_reboot_nb);
+out_oom_notifier:
+	unregister_oom_notifier(&cmm_oom_nb);
+out_balloon_compaction:
+	return rc;
+}
+
+/**
+ * cmm_exit - Module exit
+ *
+ * Return value:
+ * 	nothing
+ **/
+static void cmm_exit(void)
+{
+	if (cmm_thread_ptr)
+		kthread_stop(cmm_thread_ptr);
+	unregister_oom_notifier(&cmm_oom_nb);
+	unregister_reboot_notifier(&cmm_reboot_nb);
+	unregister_memory_notifier(&cmm_mem_nb);
+	cmm_free_pages(atomic_long_read(&loaned_pages));
+	cmm_unregister_sysfs(&cmm_dev);
+}
+
+/**
+ * cmm_set_disable - Disable/Enable CMM
+ *
+ * Return value:
+ * 	0 on success / other on failure
+ **/
+static int cmm_set_disable(const char *val, const struct kernel_param *kp)
+{
+	int disable = simple_strtoul(val, NULL, 10);
+
+	if (disable != 0 && disable != 1)
+		return -EINVAL;
+
+	if (disable && !cmm_disabled) {
+		if (cmm_thread_ptr)
+			kthread_stop(cmm_thread_ptr);
+		cmm_thread_ptr = NULL;
+		cmm_free_pages(atomic_long_read(&loaned_pages));
+	} else if (!disable && cmm_disabled) {
+		cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
+		if (IS_ERR(cmm_thread_ptr))
+			return PTR_ERR(cmm_thread_ptr);
+	}
+
+	cmm_disabled = disable;
+	return 0;
+}
+
+module_param_call(disable, cmm_set_disable, param_get_uint,
+		  &cmm_disabled, 0644);
+MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
+		 "[Default=" __stringify(CMM_DISABLE) "]");
+
+module_init(cmm_init);
+module_exit(cmm_exit);
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
new file mode 100644
index 0000000000..47f8eabd1b
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -0,0 +1,583 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Support for dynamic reconfiguration for PCI, Memory, and CPU
+ * Hotplug and Dynamic Logical Partitioning on RPA platforms.
+ *
+ * Copyright (C) 2009 Nathan Fontenot
+ * Copyright (C) 2009 IBM Corporation
+ */
+
+#define pr_fmt(fmt)	"dlpar: " fmt
+
+#include <linux/kernel.h>
+#include <linux/notifier.h>
+#include <linux/spinlock.h>
+#include <linux/cpu.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+
+#include "of_helpers.h"
+#include "pseries.h"
+
+#include <asm/machdep.h>
+#include <linux/uaccess.h>
+#include <asm/rtas.h>
+#include <asm/rtas-work-area.h>
+
+static struct workqueue_struct *pseries_hp_wq;
+
+struct pseries_hp_work {
+	struct work_struct work;
+	struct pseries_hp_errorlog *errlog;
+};
+
+struct cc_workarea {
+	__be32	drc_index;
+	__be32	zero;
+	__be32	name_offset;
+	__be32	prop_length;
+	__be32	prop_offset;
+};
+
+void dlpar_free_cc_property(struct property *prop)
+{
+	kfree(prop->name);
+	kfree(prop->value);
+	kfree(prop);
+}
+
+static struct property *dlpar_parse_cc_property(struct cc_workarea *ccwa)
+{
+	struct property *prop;
+	char *name;
+	char *value;
+
+	prop = kzalloc(sizeof(*prop), GFP_KERNEL);
+	if (!prop)
+		return NULL;
+
+	name = (char *)ccwa + be32_to_cpu(ccwa->name_offset);
+	prop->name = kstrdup(name, GFP_KERNEL);
+	if (!prop->name) {
+		dlpar_free_cc_property(prop);
+		return NULL;
+	}
+
+	prop->length = be32_to_cpu(ccwa->prop_length);
+	value = (char *)ccwa + be32_to_cpu(ccwa->prop_offset);
+	prop->value = kmemdup(value, prop->length, GFP_KERNEL);
+	if (!prop->value) {
+		dlpar_free_cc_property(prop);
+		return NULL;
+	}
+
+	return prop;
+}
+
+static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa)
+{
+	struct device_node *dn;
+	const char *name;
+
+	dn = kzalloc(sizeof(*dn), GFP_KERNEL);
+	if (!dn)
+		return NULL;
+
+	name = (const char *)ccwa + be32_to_cpu(ccwa->name_offset);
+	dn->full_name = kstrdup(name, GFP_KERNEL);
+	if (!dn->full_name) {
+		kfree(dn);
+		return NULL;
+	}
+
+	of_node_set_flag(dn, OF_DYNAMIC);
+	of_node_init(dn);
+
+	return dn;
+}
+
+static void dlpar_free_one_cc_node(struct device_node *dn)
+{
+	struct property *prop;
+
+	while (dn->properties) {
+		prop = dn->properties;
+		dn->properties = prop->next;
+		dlpar_free_cc_property(prop);
+	}
+
+	kfree(dn->full_name);
+	kfree(dn);
+}
+
+void dlpar_free_cc_nodes(struct device_node *dn)
+{
+	if (dn->child)
+		dlpar_free_cc_nodes(dn->child);
+
+	if (dn->sibling)
+		dlpar_free_cc_nodes(dn->sibling);
+
+	dlpar_free_one_cc_node(dn);
+}
+
+#define COMPLETE	0
+#define NEXT_SIBLING    1
+#define NEXT_CHILD      2
+#define NEXT_PROPERTY   3
+#define PREV_PARENT     4
+#define MORE_MEMORY     5
+#define ERR_CFG_USE     -9003
+
+struct device_node *dlpar_configure_connector(__be32 drc_index,
+					      struct device_node *parent)
+{
+	struct device_node *dn;
+	struct device_node *first_dn = NULL;
+	struct device_node *last_dn = NULL;
+	struct property *property;
+	struct property *last_property = NULL;
+	struct cc_workarea *ccwa;
+	struct rtas_work_area *work_area;
+	char *data_buf;
+	int cc_token;
+	int rc = -1;
+
+	cc_token = rtas_function_token(RTAS_FN_IBM_CONFIGURE_CONNECTOR);
+	if (cc_token == RTAS_UNKNOWN_SERVICE)
+		return NULL;
+
+	work_area = rtas_work_area_alloc(SZ_4K);
+	data_buf = rtas_work_area_raw_buf(work_area);
+
+	ccwa = (struct cc_workarea *)&data_buf[0];
+	ccwa->drc_index = drc_index;
+	ccwa->zero = 0;
+
+	do {
+		do {
+			rc = rtas_call(cc_token, 2, 1, NULL,
+				       rtas_work_area_phys(work_area), NULL);
+		} while (rtas_busy_delay(rc));
+
+		switch (rc) {
+		case COMPLETE:
+			break;
+
+		case NEXT_SIBLING:
+			dn = dlpar_parse_cc_node(ccwa);
+			if (!dn)
+				goto cc_error;
+
+			dn->parent = last_dn->parent;
+			last_dn->sibling = dn;
+			last_dn = dn;
+			break;
+
+		case NEXT_CHILD:
+			dn = dlpar_parse_cc_node(ccwa);
+			if (!dn)
+				goto cc_error;
+
+			if (!first_dn) {
+				dn->parent = parent;
+				first_dn = dn;
+			} else {
+				dn->parent = last_dn;
+				if (last_dn)
+					last_dn->child = dn;
+			}
+
+			last_dn = dn;
+			break;
+
+		case NEXT_PROPERTY:
+			property = dlpar_parse_cc_property(ccwa);
+			if (!property)
+				goto cc_error;
+
+			if (!last_dn->properties)
+				last_dn->properties = property;
+			else
+				last_property->next = property;
+
+			last_property = property;
+			break;
+
+		case PREV_PARENT:
+			last_dn = last_dn->parent;
+			break;
+
+		case MORE_MEMORY:
+		case ERR_CFG_USE:
+		default:
+			printk(KERN_ERR "Unexpected Error (%d) "
+			       "returned from configure-connector\n", rc);
+			goto cc_error;
+		}
+	} while (rc);
+
+cc_error:
+	rtas_work_area_free(work_area);
+
+	if (rc) {
+		if (first_dn)
+			dlpar_free_cc_nodes(first_dn);
+
+		return NULL;
+	}
+
+	return first_dn;
+}
+
+int dlpar_attach_node(struct device_node *dn, struct device_node *parent)
+{
+	int rc;
+
+	dn->parent = parent;
+
+	rc = of_attach_node(dn);
+	if (rc) {
+		printk(KERN_ERR "Failed to add device node %pOF\n", dn);
+		return rc;
+	}
+
+	return 0;
+}
+
+int dlpar_detach_node(struct device_node *dn)
+{
+	struct device_node *child;
+	int rc;
+
+	child = of_get_next_child(dn, NULL);
+	while (child) {
+		dlpar_detach_node(child);
+		child = of_get_next_child(dn, child);
+	}
+
+	rc = of_detach_node(dn);
+	if (rc)
+		return rc;
+
+	of_node_put(dn);
+
+	return 0;
+}
+
+#define DR_ENTITY_SENSE		9003
+#define DR_ENTITY_PRESENT	1
+#define DR_ENTITY_UNUSABLE	2
+#define ALLOCATION_STATE	9003
+#define ALLOC_UNUSABLE		0
+#define ALLOC_USABLE		1
+#define ISOLATION_STATE		9001
+#define ISOLATE			0
+#define UNISOLATE		1
+
+int dlpar_acquire_drc(u32 drc_index)
+{
+	int dr_status, rc;
+
+	rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status);
+	if (rc || dr_status != DR_ENTITY_UNUSABLE)
+		return -1;
+
+	rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_USABLE);
+	if (rc)
+		return rc;
+
+	rc = rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+	if (rc) {
+		rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
+		return rc;
+	}
+
+	return 0;
+}
+
+int dlpar_release_drc(u32 drc_index)
+{
+	int dr_status, rc;
+
+	rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status);
+	if (rc || dr_status != DR_ENTITY_PRESENT)
+		return -1;
+
+	rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE);
+	if (rc)
+		return rc;
+
+	rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
+	if (rc) {
+		rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+		return rc;
+	}
+
+	return 0;
+}
+
+int dlpar_unisolate_drc(u32 drc_index)
+{
+	int dr_status, rc;
+
+	rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status);
+	if (rc || dr_status != DR_ENTITY_PRESENT)
+		return -1;
+
+	rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+
+	return 0;
+}
+
+int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
+{
+	int rc;
+
+	/* pseries error logs are in BE format, convert to cpu type */
+	switch (hp_elog->id_type) {
+	case PSERIES_HP_ELOG_ID_DRC_COUNT:
+		hp_elog->_drc_u.drc_count =
+				be32_to_cpu(hp_elog->_drc_u.drc_count);
+		break;
+	case PSERIES_HP_ELOG_ID_DRC_INDEX:
+		hp_elog->_drc_u.drc_index =
+				be32_to_cpu(hp_elog->_drc_u.drc_index);
+		break;
+	case PSERIES_HP_ELOG_ID_DRC_IC:
+		hp_elog->_drc_u.ic.count =
+				be32_to_cpu(hp_elog->_drc_u.ic.count);
+		hp_elog->_drc_u.ic.index =
+				be32_to_cpu(hp_elog->_drc_u.ic.index);
+	}
+
+	switch (hp_elog->resource) {
+	case PSERIES_HP_ELOG_RESOURCE_MEM:
+		rc = dlpar_memory(hp_elog);
+		break;
+	case PSERIES_HP_ELOG_RESOURCE_CPU:
+		rc = dlpar_cpu(hp_elog);
+		break;
+	case PSERIES_HP_ELOG_RESOURCE_PMEM:
+		rc = dlpar_hp_pmem(hp_elog);
+		break;
+
+	default:
+		pr_warn_ratelimited("Invalid resource (%d) specified\n",
+				    hp_elog->resource);
+		rc = -EINVAL;
+	}
+
+	return rc;
+}
+
+static void pseries_hp_work_fn(struct work_struct *work)
+{
+	struct pseries_hp_work *hp_work =
+			container_of(work, struct pseries_hp_work, work);
+
+	handle_dlpar_errorlog(hp_work->errlog);
+
+	kfree(hp_work->errlog);
+	kfree(work);
+}
+
+void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog)
+{
+	struct pseries_hp_work *work;
+	struct pseries_hp_errorlog *hp_errlog_copy;
+
+	hp_errlog_copy = kmemdup(hp_errlog, sizeof(*hp_errlog), GFP_ATOMIC);
+	if (!hp_errlog_copy)
+		return;
+
+	work = kmalloc(sizeof(struct pseries_hp_work), GFP_ATOMIC);
+	if (work) {
+		INIT_WORK((struct work_struct *)work, pseries_hp_work_fn);
+		work->errlog = hp_errlog_copy;
+		queue_work(pseries_hp_wq, (struct work_struct *)work);
+	} else {
+		kfree(hp_errlog_copy);
+	}
+}
+
+static int dlpar_parse_resource(char **cmd, struct pseries_hp_errorlog *hp_elog)
+{
+	char *arg;
+
+	arg = strsep(cmd, " ");
+	if (!arg)
+		return -EINVAL;
+
+	if (sysfs_streq(arg, "memory")) {
+		hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM;
+	} else if (sysfs_streq(arg, "cpu")) {
+		hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_CPU;
+	} else {
+		pr_err("Invalid resource specified.\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int dlpar_parse_action(char **cmd, struct pseries_hp_errorlog *hp_elog)
+{
+	char *arg;
+
+	arg = strsep(cmd, " ");
+	if (!arg)
+		return -EINVAL;
+
+	if (sysfs_streq(arg, "add")) {
+		hp_elog->action = PSERIES_HP_ELOG_ACTION_ADD;
+	} else if (sysfs_streq(arg, "remove")) {
+		hp_elog->action = PSERIES_HP_ELOG_ACTION_REMOVE;
+	} else {
+		pr_err("Invalid action specified.\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int dlpar_parse_id_type(char **cmd, struct pseries_hp_errorlog *hp_elog)
+{
+	char *arg;
+	u32 count, index;
+
+	arg = strsep(cmd, " ");
+	if (!arg)
+		return -EINVAL;
+
+	if (sysfs_streq(arg, "indexed-count")) {
+		hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_IC;
+		arg = strsep(cmd, " ");
+		if (!arg) {
+			pr_err("No DRC count specified.\n");
+			return -EINVAL;
+		}
+
+		if (kstrtou32(arg, 0, &count)) {
+			pr_err("Invalid DRC count specified.\n");
+			return -EINVAL;
+		}
+
+		arg = strsep(cmd, " ");
+		if (!arg) {
+			pr_err("No DRC Index specified.\n");
+			return -EINVAL;
+		}
+
+		if (kstrtou32(arg, 0, &index)) {
+			pr_err("Invalid DRC Index specified.\n");
+			return -EINVAL;
+		}
+
+		hp_elog->_drc_u.ic.count = cpu_to_be32(count);
+		hp_elog->_drc_u.ic.index = cpu_to_be32(index);
+	} else if (sysfs_streq(arg, "index")) {
+		hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
+		arg = strsep(cmd, " ");
+		if (!arg) {
+			pr_err("No DRC Index specified.\n");
+			return -EINVAL;
+		}
+
+		if (kstrtou32(arg, 0, &index)) {
+			pr_err("Invalid DRC Index specified.\n");
+			return -EINVAL;
+		}
+
+		hp_elog->_drc_u.drc_index = cpu_to_be32(index);
+	} else if (sysfs_streq(arg, "count")) {
+		hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_COUNT;
+		arg = strsep(cmd, " ");
+		if (!arg) {
+			pr_err("No DRC count specified.\n");
+			return -EINVAL;
+		}
+
+		if (kstrtou32(arg, 0, &count)) {
+			pr_err("Invalid DRC count specified.\n");
+			return -EINVAL;
+		}
+
+		hp_elog->_drc_u.drc_count = cpu_to_be32(count);
+	} else {
+		pr_err("Invalid id_type specified.\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static ssize_t dlpar_store(const struct class *class, const struct class_attribute *attr,
+			   const char *buf, size_t count)
+{
+	struct pseries_hp_errorlog hp_elog;
+	char *argbuf;
+	char *args;
+	int rc;
+
+	args = argbuf = kstrdup(buf, GFP_KERNEL);
+	if (!argbuf)
+		return -ENOMEM;
+
+	/*
+	 * Parse out the request from the user, this will be in the form:
+	 * <resource> <action> <id_type> <id>
+	 */
+	rc = dlpar_parse_resource(&args, &hp_elog);
+	if (rc)
+		goto dlpar_store_out;
+
+	rc = dlpar_parse_action(&args, &hp_elog);
+	if (rc)
+		goto dlpar_store_out;
+
+	rc = dlpar_parse_id_type(&args, &hp_elog);
+	if (rc)
+		goto dlpar_store_out;
+
+	rc = handle_dlpar_errorlog(&hp_elog);
+
+dlpar_store_out:
+	kfree(argbuf);
+
+	if (rc)
+		pr_err("Could not handle DLPAR request \"%s\"\n", buf);
+
+	return rc ? rc : count;
+}
+
+static ssize_t dlpar_show(const struct class *class, const struct class_attribute *attr,
+			  char *buf)
+{
+	return sprintf(buf, "%s\n", "memory,cpu");
+}
+
+static CLASS_ATTR_RW(dlpar);
+
+int __init dlpar_workqueue_init(void)
+{
+	if (pseries_hp_wq)
+		return 0;
+
+	pseries_hp_wq = alloc_ordered_workqueue("pseries hotplug workqueue", 0);
+
+	return pseries_hp_wq ? 0 : -ENOMEM;
+}
+
+static int __init dlpar_sysfs_init(void)
+{
+	int rc;
+
+	rc = dlpar_workqueue_init();
+	if (rc)
+		return rc;
+
+	return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
+}
+machine_device_initcall(pseries, dlpar_sysfs_init);
+
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
new file mode 100644
index 0000000000..3f1cdccebc
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Virtual Processor Dispatch Trace Log
+ *
+ * (C) Copyright IBM Corporation 2009
+ *
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ */
+
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <asm/smp.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <asm/firmware.h>
+#include <asm/dtl.h>
+#include <asm/lppaca.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/machdep.h>
+
+#ifdef CONFIG_DTL
+struct dtl {
+	struct dtl_entry	*buf;
+	int			cpu;
+	int			buf_entries;
+	u64			last_idx;
+	spinlock_t		lock;
+};
+static DEFINE_PER_CPU(struct dtl, cpu_dtl);
+
+static u8 dtl_event_mask = DTL_LOG_ALL;
+
+
+/*
+ * Size of per-cpu log buffers. Firmware requires that the buffer does
+ * not cross a 4k boundary.
+ */
+static int dtl_buf_entries = N_DISPATCH_LOG;
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+
+/*
+ * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
+ * reading from the dispatch trace log.  If other code wants to consume
+ * DTL entries, it can set this pointer to a function that will get
+ * called once for each DTL entry that gets processed.
+ */
+static void (*dtl_consumer)(struct dtl_entry *entry, u64 index);
+
+struct dtl_ring {
+	u64	write_index;
+	struct dtl_entry *write_ptr;
+	struct dtl_entry *buf;
+	struct dtl_entry *buf_end;
+};
+
+static DEFINE_PER_CPU(struct dtl_ring, dtl_rings);
+
+static atomic_t dtl_count;
+
+/*
+ * The cpu accounting code controls the DTL ring buffer, and we get
+ * given entries as they are processed.
+ */
+static void consume_dtle(struct dtl_entry *dtle, u64 index)
+{
+	struct dtl_ring *dtlr = this_cpu_ptr(&dtl_rings);
+	struct dtl_entry *wp = dtlr->write_ptr;
+	struct lppaca *vpa = local_paca->lppaca_ptr;
+
+	if (!wp)
+		return;
+
+	*wp = *dtle;
+	barrier();
+
+	/* check for hypervisor ring buffer overflow, ignore this entry if so */
+	if (index + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx))
+		return;
+
+	++wp;
+	if (wp == dtlr->buf_end)
+		wp = dtlr->buf;
+	dtlr->write_ptr = wp;
+
+	/* incrementing write_index makes the new entry visible */
+	smp_wmb();
+	++dtlr->write_index;
+}
+
+static int dtl_start(struct dtl *dtl)
+{
+	struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu);
+
+	dtlr->buf = dtl->buf;
+	dtlr->buf_end = dtl->buf + dtl->buf_entries;
+	dtlr->write_index = 0;
+
+	/* setting write_ptr enables logging into our buffer */
+	smp_wmb();
+	dtlr->write_ptr = dtl->buf;
+
+	/* enable event logging */
+	lppaca_of(dtl->cpu).dtl_enable_mask |= dtl_event_mask;
+
+	dtl_consumer = consume_dtle;
+	atomic_inc(&dtl_count);
+	return 0;
+}
+
+static void dtl_stop(struct dtl *dtl)
+{
+	struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu);
+
+	dtlr->write_ptr = NULL;
+	smp_wmb();
+
+	dtlr->buf = NULL;
+
+	/* restore dtl_enable_mask */
+	lppaca_of(dtl->cpu).dtl_enable_mask = DTL_LOG_PREEMPT;
+
+	if (atomic_dec_and_test(&dtl_count))
+		dtl_consumer = NULL;
+}
+
+static u64 dtl_current_index(struct dtl *dtl)
+{
+	return per_cpu(dtl_rings, dtl->cpu).write_index;
+}
+
+#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+static int dtl_start(struct dtl *dtl)
+{
+	unsigned long addr;
+	int ret, hwcpu;
+
+	/* Register our dtl buffer with the hypervisor. The HV expects the
+	 * buffer size to be passed in the second word of the buffer */
+	((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES);
+
+	hwcpu = get_hard_smp_processor_id(dtl->cpu);
+	addr = __pa(dtl->buf);
+	ret = register_dtl(hwcpu, addr);
+	if (ret) {
+		printk(KERN_WARNING "%s: DTL registration for cpu %d (hw %d) "
+		       "failed with %d\n", __func__, dtl->cpu, hwcpu, ret);
+		return -EIO;
+	}
+
+	/* set our initial buffer indices */
+	lppaca_of(dtl->cpu).dtl_idx = 0;
+
+	/* ensure that our updates to the lppaca fields have occurred before
+	 * we actually enable the logging */
+	smp_wmb();
+
+	/* enable event logging */
+	lppaca_of(dtl->cpu).dtl_enable_mask = dtl_event_mask;
+
+	return 0;
+}
+
+static void dtl_stop(struct dtl *dtl)
+{
+	int hwcpu = get_hard_smp_processor_id(dtl->cpu);
+
+	lppaca_of(dtl->cpu).dtl_enable_mask = 0x0;
+
+	unregister_dtl(hwcpu);
+}
+
+static u64 dtl_current_index(struct dtl *dtl)
+{
+	return be64_to_cpu(lppaca_of(dtl->cpu).dtl_idx);
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+static int dtl_enable(struct dtl *dtl)
+{
+	long int n_entries;
+	long int rc;
+	struct dtl_entry *buf = NULL;
+
+	if (!dtl_cache)
+		return -ENOMEM;
+
+	/* only allow one reader */
+	if (dtl->buf)
+		return -EBUSY;
+
+	/* ensure there are no other conflicting dtl users */
+	if (!read_trylock(&dtl_access_lock))
+		return -EBUSY;
+
+	n_entries = dtl_buf_entries;
+	buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu));
+	if (!buf) {
+		printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n",
+				__func__, dtl->cpu);
+		read_unlock(&dtl_access_lock);
+		return -ENOMEM;
+	}
+
+	spin_lock(&dtl->lock);
+	rc = -EBUSY;
+	if (!dtl->buf) {
+		/* store the original allocation size for use during read */
+		dtl->buf_entries = n_entries;
+		dtl->buf = buf;
+		dtl->last_idx = 0;
+		rc = dtl_start(dtl);
+		if (rc)
+			dtl->buf = NULL;
+	}
+	spin_unlock(&dtl->lock);
+
+	if (rc) {
+		read_unlock(&dtl_access_lock);
+		kmem_cache_free(dtl_cache, buf);
+	}
+
+	return rc;
+}
+
+static void dtl_disable(struct dtl *dtl)
+{
+	spin_lock(&dtl->lock);
+	dtl_stop(dtl);
+	kmem_cache_free(dtl_cache, dtl->buf);
+	dtl->buf = NULL;
+	dtl->buf_entries = 0;
+	spin_unlock(&dtl->lock);
+	read_unlock(&dtl_access_lock);
+}
+
+/* file interface */
+
+static int dtl_file_open(struct inode *inode, struct file *filp)
+{
+	struct dtl *dtl = inode->i_private;
+	int rc;
+
+	rc = dtl_enable(dtl);
+	if (rc)
+		return rc;
+
+	filp->private_data = dtl;
+	return 0;
+}
+
+static int dtl_file_release(struct inode *inode, struct file *filp)
+{
+	struct dtl *dtl = inode->i_private;
+	dtl_disable(dtl);
+	return 0;
+}
+
+static ssize_t dtl_file_read(struct file *filp, char __user *buf, size_t len,
+		loff_t *pos)
+{
+	long int rc, n_read, n_req, read_size;
+	struct dtl *dtl;
+	u64 cur_idx, last_idx, i;
+
+	if ((len % sizeof(struct dtl_entry)) != 0)
+		return -EINVAL;
+
+	dtl = filp->private_data;
+
+	/* requested number of entries to read */
+	n_req = len / sizeof(struct dtl_entry);
+
+	/* actual number of entries read */
+	n_read = 0;
+
+	spin_lock(&dtl->lock);
+
+	cur_idx = dtl_current_index(dtl);
+	last_idx = dtl->last_idx;
+
+	if (last_idx + dtl->buf_entries <= cur_idx)
+		last_idx = cur_idx - dtl->buf_entries + 1;
+
+	if (last_idx + n_req > cur_idx)
+		n_req = cur_idx - last_idx;
+
+	if (n_req > 0)
+		dtl->last_idx = last_idx + n_req;
+
+	spin_unlock(&dtl->lock);
+
+	if (n_req <= 0)
+		return 0;
+
+	i = last_idx % dtl->buf_entries;
+
+	/* read the tail of the buffer if we've wrapped */
+	if (i + n_req > dtl->buf_entries) {
+		read_size = dtl->buf_entries - i;
+
+		rc = copy_to_user(buf, &dtl->buf[i],
+				read_size * sizeof(struct dtl_entry));
+		if (rc)
+			return -EFAULT;
+
+		i = 0;
+		n_req -= read_size;
+		n_read += read_size;
+		buf += read_size * sizeof(struct dtl_entry);
+	}
+
+	/* .. and now the head */
+	rc = copy_to_user(buf, &dtl->buf[i], n_req * sizeof(struct dtl_entry));
+	if (rc)
+		return -EFAULT;
+
+	n_read += n_req;
+
+	return n_read * sizeof(struct dtl_entry);
+}
+
+static const struct file_operations dtl_fops = {
+	.open		= dtl_file_open,
+	.release	= dtl_file_release,
+	.read		= dtl_file_read,
+	.llseek		= no_llseek,
+};
+
+static struct dentry *dtl_dir;
+
+static void dtl_setup_file(struct dtl *dtl)
+{
+	char name[10];
+
+	sprintf(name, "cpu-%d", dtl->cpu);
+
+	debugfs_create_file(name, 0400, dtl_dir, dtl, &dtl_fops);
+}
+
+static int dtl_init(void)
+{
+	int i;
+
+	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+		return -ENODEV;
+
+	/* set up common debugfs structure */
+
+	dtl_dir = debugfs_create_dir("dtl", arch_debugfs_dir);
+
+	debugfs_create_x8("dtl_event_mask", 0600, dtl_dir, &dtl_event_mask);
+	debugfs_create_u32("dtl_buf_entries", 0400, dtl_dir, &dtl_buf_entries);
+
+	/* set up the per-cpu log structures */
+	for_each_possible_cpu(i) {
+		struct dtl *dtl = &per_cpu(cpu_dtl, i);
+		spin_lock_init(&dtl->lock);
+		dtl->cpu = i;
+
+		dtl_setup_file(dtl);
+	}
+
+	return 0;
+}
+machine_arch_initcall(pseries, dtl_init);
+#endif /* CONFIG_DTL */
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+/*
+ * Scan the dispatch trace log and count up the stolen time.
+ * Should be called with interrupts disabled.
+ */
+static notrace u64 scan_dispatch_log(u64 stop_tb)
+{
+	u64 i = local_paca->dtl_ridx;
+	struct dtl_entry *dtl = local_paca->dtl_curr;
+	struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
+	struct lppaca *vpa = local_paca->lppaca_ptr;
+	u64 tb_delta;
+	u64 stolen = 0;
+	u64 dtb;
+
+	if (!dtl)
+		return 0;
+
+	if (i == be64_to_cpu(vpa->dtl_idx))
+		return 0;
+	while (i < be64_to_cpu(vpa->dtl_idx)) {
+		dtb = be64_to_cpu(dtl->timebase);
+		tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) +
+			be32_to_cpu(dtl->ready_to_enqueue_time);
+		barrier();
+		if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) {
+			/* buffer has overflowed */
+			i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG;
+			dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
+			continue;
+		}
+		if (dtb > stop_tb)
+			break;
+#ifdef CONFIG_DTL
+		if (dtl_consumer)
+			dtl_consumer(dtl, i);
+#endif
+		stolen += tb_delta;
+		++i;
+		++dtl;
+		if (dtl == dtl_end)
+			dtl = local_paca->dispatch_log;
+	}
+	local_paca->dtl_ridx = i;
+	local_paca->dtl_curr = dtl;
+	return stolen;
+}
+
+/*
+ * Accumulate stolen time by scanning the dispatch trace log.
+ * Called on entry from user mode.
+ */
+void notrace pseries_accumulate_stolen_time(void)
+{
+	u64 sst, ust;
+	struct cpu_accounting_data *acct = &local_paca->accounting;
+
+	sst = scan_dispatch_log(acct->starttime_user);
+	ust = scan_dispatch_log(acct->starttime);
+	acct->stime -= sst;
+	acct->utime -= ust;
+	acct->steal_time += ust + sst;
+}
+
+u64 pseries_calculate_stolen_time(u64 stop_tb)
+{
+	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+		return 0;
+
+	if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx))
+		return scan_dispatch_log(stop_tb);
+
+	return 0;
+}
+
+#endif
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
new file mode 100644
index 0000000000..def184da51
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -0,0 +1,887 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * The file intends to implement the platform dependent EEH operations on pseries.
+ * Actually, the pseries platform is built based on RTAS heavily. That means the
+ * pseries platform dependent EEH operations will be built on RTAS calls. The functions
+ * are derived from arch/powerpc/platforms/pseries/eeh.c and necessary cleanup has
+ * been done.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2011.
+ * Copyright IBM Corporation 2001, 2005, 2006
+ * Copyright Dave Engebretsen & Todd Inglett 2001
+ * Copyright Linas Vepstas 2005, 2006
+ */
+
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/crash_dump.h>
+
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+#include <asm/rtas.h>
+
+/* RTAS tokens */
+static int ibm_set_eeh_option;
+static int ibm_set_slot_reset;
+static int ibm_read_slot_reset_state;
+static int ibm_read_slot_reset_state2;
+static int ibm_slot_error_detail;
+static int ibm_get_config_addr_info;
+static int ibm_get_config_addr_info2;
+static int ibm_configure_pe;
+
+static void pseries_eeh_init_edev(struct pci_dn *pdn);
+
+static void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
+{
+	struct pci_dn *pdn = pci_get_pdn(pdev);
+
+	if (eeh_has_flag(EEH_FORCE_DISABLED))
+		return;
+
+	dev_dbg(&pdev->dev, "EEH: Setting up device\n");
+#ifdef CONFIG_PCI_IOV
+	if (pdev->is_virtfn) {
+		pdn->device_id  =  pdev->device;
+		pdn->vendor_id  =  pdev->vendor;
+		pdn->class_code =  pdev->class;
+		/*
+		 * Last allow unfreeze return code used for retrieval
+		 * by user space in eeh-sysfs to show the last command
+		 * completion from platform.
+		 */
+		pdn->last_allow_rc =  0;
+	}
+#endif
+	pseries_eeh_init_edev(pdn);
+#ifdef CONFIG_PCI_IOV
+	if (pdev->is_virtfn) {
+		/*
+		 * FIXME: This really should be handled by choosing the right
+		 *        parent PE in pseries_eeh_init_edev().
+		 */
+		struct eeh_pe *physfn_pe = pci_dev_to_eeh_dev(pdev->physfn)->pe;
+		struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+
+		edev->pe_config_addr =  (pdn->busno << 16) | (pdn->devfn << 8);
+		eeh_pe_tree_remove(edev); /* Remove as it is adding to bus pe */
+		eeh_pe_tree_insert(edev, physfn_pe);   /* Add as VF PE type */
+	}
+#endif
+	eeh_probe_device(pdev);
+}
+
+
+/**
+ * pseries_eeh_get_pe_config_addr - Find the pe_config_addr for a device
+ * @pdn: pci_dn of the input device
+ *
+ * The EEH RTAS calls use a tuple consisting of: (buid_hi, buid_lo,
+ * pe_config_addr) as a handle to a given PE. This function finds the
+ * pe_config_addr based on the device's config addr.
+ *
+ * Keep in mind that the pe_config_addr *might* be numerically identical to the
+ * device's config addr, but the two are conceptually distinct.
+ *
+ * Returns the pe_config_addr, or a negative error code.
+ */
+static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn)
+{
+	int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+	struct pci_controller *phb = pdn->phb;
+	int ret, rets[3];
+
+	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
+		/*
+		 * First of all, use function 1 to determine if this device is
+		 * part of a PE or not. ret[0] being zero indicates it's not.
+		 */
+		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid), 1);
+		if (ret || (rets[0] == 0))
+			return -ENOENT;
+
+		/* Retrieve the associated PE config address with function 0 */
+		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid), 0);
+		if (ret) {
+			pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
+				__func__, phb->global_number, config_addr);
+			return -ENXIO;
+		}
+
+		return rets[0];
+	}
+
+	if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
+		ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid), 0);
+		if (ret) {
+			pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
+				__func__, phb->global_number, config_addr);
+			return -ENXIO;
+		}
+
+		return rets[0];
+	}
+
+	/*
+	 * PAPR does describe a process for finding the pe_config_addr that was
+	 * used before the ibm,get-config-addr-info calls were added. However,
+	 * I haven't found *any* systems that don't have that RTAS call
+	 * implemented. If you happen to find one that needs the old DT based
+	 * process, patches are welcome!
+	 */
+	return -ENOENT;
+}
+
+/**
+ * pseries_eeh_phb_reset - Reset the specified PHB
+ * @phb: PCI controller
+ * @config_addr: the associated config address
+ * @option: reset option
+ *
+ * Reset the specified PHB/PE
+ */
+static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, int option)
+{
+	int ret;
+
+	/* Reset PE through RTAS call */
+	ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+			config_addr, BUID_HI(phb->buid),
+			BUID_LO(phb->buid), option);
+
+	/* If fundamental-reset not supported, try hot-reset */
+	if (option == EEH_RESET_FUNDAMENTAL && ret == -8) {
+		option = EEH_RESET_HOT;
+		ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid), option);
+	}
+
+	/* We need reset hold or settlement delay */
+	if (option == EEH_RESET_FUNDAMENTAL || option == EEH_RESET_HOT)
+		msleep(EEH_PE_RST_HOLD_TIME);
+	else
+		msleep(EEH_PE_RST_SETTLE_TIME);
+
+	return ret;
+}
+
+/**
+ * pseries_eeh_phb_configure_bridge - Configure PCI bridges in the indicated PE
+ * @phb: PCI controller
+ * @config_addr: the associated config address
+ *
+ * The function will be called to reconfigure the bridges included
+ * in the specified PE so that the mulfunctional PE would be recovered
+ * again.
+ */
+static int pseries_eeh_phb_configure_bridge(struct pci_controller *phb, int config_addr)
+{
+	int ret;
+	/* Waiting 0.2s maximum before skipping configuration */
+	int max_wait = 200;
+
+	while (max_wait > 0) {
+		ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid));
+
+		if (!ret)
+			return ret;
+		if (ret < 0)
+			break;
+
+		/*
+		 * If RTAS returns a delay value that's above 100ms, cut it
+		 * down to 100ms in case firmware made a mistake.  For more
+		 * on how these delay values work see rtas_busy_delay_time
+		 */
+		if (ret > RTAS_EXTENDED_DELAY_MIN+2 &&
+		    ret <= RTAS_EXTENDED_DELAY_MAX)
+			ret = RTAS_EXTENDED_DELAY_MIN+2;
+
+		max_wait -= rtas_busy_delay_time(ret);
+
+		if (max_wait < 0)
+			break;
+
+		rtas_busy_delay(ret);
+	}
+
+	pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n",
+		__func__, phb->global_number, config_addr, ret);
+	/* PAPR defines -3 as "Parameter Error" for this function: */
+	if (ret == -3)
+		return -EINVAL;
+	else
+		return -EIO;
+}
+
+/*
+ * Buffer for reporting slot-error-detail rtas calls. Its here
+ * in BSS, and not dynamically alloced, so that it ends up in
+ * RMO where RTAS can access it.
+ */
+static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
+static DEFINE_SPINLOCK(slot_errbuf_lock);
+static int eeh_error_buf_size;
+
+static int pseries_eeh_cap_start(struct pci_dn *pdn)
+{
+	u32 status;
+
+	if (!pdn)
+		return 0;
+
+	rtas_read_config(pdn, PCI_STATUS, 2, &status);
+	if (!(status & PCI_STATUS_CAP_LIST))
+		return 0;
+
+	return PCI_CAPABILITY_LIST;
+}
+
+
+static int pseries_eeh_find_cap(struct pci_dn *pdn, int cap)
+{
+	int pos = pseries_eeh_cap_start(pdn);
+	int cnt = 48;	/* Maximal number of capabilities */
+	u32 id;
+
+	if (!pos)
+		return 0;
+
+        while (cnt--) {
+		rtas_read_config(pdn, pos, 1, &pos);
+		if (pos < 0x40)
+			break;
+		pos &= ~3;
+		rtas_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
+		if (id == 0xff)
+			break;
+		if (id == cap)
+			return pos;
+		pos += PCI_CAP_LIST_NEXT;
+	}
+
+	return 0;
+}
+
+static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap)
+{
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	u32 header;
+	int pos = 256;
+	int ttl = (4096 - 256) / 8;
+
+	if (!edev || !edev->pcie_cap)
+		return 0;
+	if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+		return 0;
+	else if (!header)
+		return 0;
+
+	while (ttl-- > 0) {
+		if (PCI_EXT_CAP_ID(header) == cap && pos)
+			return pos;
+
+		pos = PCI_EXT_CAP_NEXT(header);
+		if (pos < 256)
+			break;
+
+		if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+			break;
+	}
+
+	return 0;
+}
+
+/**
+ * pseries_eeh_pe_get_parent - Retrieve the parent PE
+ * @edev: EEH device
+ *
+ * The whole PEs existing in the system are organized as hierarchy
+ * tree. The function is used to retrieve the parent PE according
+ * to the parent EEH device.
+ */
+static struct eeh_pe *pseries_eeh_pe_get_parent(struct eeh_dev *edev)
+{
+	struct eeh_dev *parent;
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+	/*
+	 * It might have the case for the indirect parent
+	 * EEH device already having associated PE, but
+	 * the direct parent EEH device doesn't have yet.
+	 */
+	if (edev->physfn)
+		pdn = pci_get_pdn(edev->physfn);
+	else
+		pdn = pdn ? pdn->parent : NULL;
+	while (pdn) {
+		/* We're poking out of PCI territory */
+		parent = pdn_to_eeh_dev(pdn);
+		if (!parent)
+			return NULL;
+
+		if (parent->pe)
+			return parent->pe;
+
+		pdn = pdn->parent;
+	}
+
+	return NULL;
+}
+
+/**
+ * pseries_eeh_init_edev - initialise the eeh_dev and eeh_pe for a pci_dn
+ *
+ * @pdn: PCI device node
+ *
+ * When we discover a new PCI device via the device-tree we create a
+ * corresponding pci_dn and we allocate, but don't initialise, an eeh_dev.
+ * This function takes care of the initialisation and inserts the eeh_dev
+ * into the correct eeh_pe. If no eeh_pe exists we'll allocate one.
+ */
+static void pseries_eeh_init_edev(struct pci_dn *pdn)
+{
+	struct eeh_pe pe, *parent;
+	struct eeh_dev *edev;
+	u32 pcie_flags;
+	int ret;
+
+	if (WARN_ON_ONCE(!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)))
+		return;
+
+	/*
+	 * Find the eeh_dev for this pdn. The storage for the eeh_dev was
+	 * allocated at the same time as the pci_dn.
+	 *
+	 * XXX: We should probably re-visit that.
+	 */
+	edev = pdn_to_eeh_dev(pdn);
+	if (!edev)
+		return;
+
+	/*
+	 * If ->pe is set then we've already probed this device. We hit
+	 * this path when a pci_dev is removed and rescanned while recovering
+	 * a PE (i.e. for devices where the driver doesn't support error
+	 * recovery).
+	 */
+	if (edev->pe)
+		return;
+
+	/* Check class/vendor/device IDs */
+	if (!pdn->vendor_id || !pdn->device_id || !pdn->class_code)
+		return;
+
+	/* Skip for PCI-ISA bridge */
+        if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
+		return;
+
+	eeh_edev_dbg(edev, "Probing device\n");
+
+	/*
+	 * Update class code and mode of eeh device. We need
+	 * correctly reflects that current device is root port
+	 * or PCIe switch downstream port.
+	 */
+	edev->pcix_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
+	edev->pcie_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
+	edev->aer_cap = pseries_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
+	edev->mode &= 0xFFFFFF00;
+	if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
+		edev->mode |= EEH_DEV_BRIDGE;
+		if (edev->pcie_cap) {
+			rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
+					 2, &pcie_flags);
+			pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
+			if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
+				edev->mode |= EEH_DEV_ROOT_PORT;
+			else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
+				edev->mode |= EEH_DEV_DS_PORT;
+		}
+	}
+
+	/* first up, find the pe_config_addr for the PE containing the device */
+	ret = pseries_eeh_get_pe_config_addr(pdn);
+	if (ret < 0) {
+		eeh_edev_dbg(edev, "Unable to find pe_config_addr\n");
+		goto err;
+	}
+
+	/* Try enable EEH on the fake PE */
+	memset(&pe, 0, sizeof(struct eeh_pe));
+	pe.phb = pdn->phb;
+	pe.addr = ret;
+
+	eeh_edev_dbg(edev, "Enabling EEH on device\n");
+	ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
+	if (ret) {
+		eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret);
+		goto err;
+	}
+
+	edev->pe_config_addr = pe.addr;
+
+	eeh_add_flag(EEH_ENABLED);
+
+	parent = pseries_eeh_pe_get_parent(edev);
+	eeh_pe_tree_insert(edev, parent);
+	eeh_save_bars(edev);
+	eeh_edev_dbg(edev, "EEH enabled for device");
+
+	return;
+
+err:
+	eeh_edev_dbg(edev, "EEH is unsupported on device (code = %d)\n", ret);
+}
+
+static struct eeh_dev *pseries_eeh_probe(struct pci_dev *pdev)
+{
+	struct eeh_dev *edev;
+	struct pci_dn *pdn;
+
+	pdn = pci_get_pdn_by_devfn(pdev->bus, pdev->devfn);
+	if (!pdn)
+		return NULL;
+
+	/*
+	 * If the system supports EEH on this device then the eeh_dev was
+	 * configured and inserted into a PE in pseries_eeh_init_edev()
+	 */
+	edev = pdn_to_eeh_dev(pdn);
+	if (!edev || !edev->pe)
+		return NULL;
+
+	return edev;
+}
+
+/**
+ * pseries_eeh_init_edev_recursive - Enable EEH for the indicated device
+ * @pdn: PCI device node
+ *
+ * This routine must be used to perform EEH initialization for the
+ * indicated PCI device that was added after system boot (e.g.
+ * hotplug, dlpar).
+ */
+void pseries_eeh_init_edev_recursive(struct pci_dn *pdn)
+{
+	struct pci_dn *n;
+
+	if (!pdn)
+		return;
+
+	list_for_each_entry(n, &pdn->child_list, list)
+		pseries_eeh_init_edev_recursive(n);
+
+	pseries_eeh_init_edev(pdn);
+}
+EXPORT_SYMBOL_GPL(pseries_eeh_init_edev_recursive);
+
+/**
+ * pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable
+ * @pe: EEH PE
+ * @option: operation to be issued
+ *
+ * The function is used to control the EEH functionality globally.
+ * Currently, following options are support according to PAPR:
+ * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
+ */
+static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
+{
+	int ret = 0;
+
+	/*
+	 * When we're enabling or disabling EEH functionality on
+	 * the particular PE, the PE config address is possibly
+	 * unavailable. Therefore, we have to figure it out from
+	 * the FDT node.
+	 */
+	switch (option) {
+	case EEH_OPT_DISABLE:
+	case EEH_OPT_ENABLE:
+	case EEH_OPT_THAW_MMIO:
+	case EEH_OPT_THAW_DMA:
+		break;
+	case EEH_OPT_FREEZE_PE:
+		/* Not support */
+		return 0;
+	default:
+		pr_err("%s: Invalid option %d\n", __func__, option);
+		return -EINVAL;
+	}
+
+	ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
+			pe->addr, BUID_HI(pe->phb->buid),
+			BUID_LO(pe->phb->buid), option);
+
+	return ret;
+}
+
+/**
+ * pseries_eeh_get_state - Retrieve PE state
+ * @pe: EEH PE
+ * @delay: suggested time to wait if state is unavailable
+ *
+ * Retrieve the state of the specified PE. On RTAS compliant
+ * pseries platform, there already has one dedicated RTAS function
+ * for the purpose. It's notable that the associated PE config address
+ * might be ready when calling the function. Therefore, endeavour to
+ * use the PE config address if possible. Further more, there're 2
+ * RTAS calls for the purpose, we need to try the new one and back
+ * to the old one if the new one couldn't work properly.
+ */
+static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay)
+{
+	int ret;
+	int rets[4];
+	int result;
+
+	if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
+		ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets,
+				pe->addr, BUID_HI(pe->phb->buid),
+				BUID_LO(pe->phb->buid));
+	} else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) {
+		/* Fake PE unavailable info */
+		rets[2] = 0;
+		ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets,
+				pe->addr, BUID_HI(pe->phb->buid),
+				BUID_LO(pe->phb->buid));
+	} else {
+		return EEH_STATE_NOT_SUPPORT;
+	}
+
+	if (ret)
+		return ret;
+
+	/* Parse the result out */
+	if (!rets[1])
+		return EEH_STATE_NOT_SUPPORT;
+
+	switch(rets[0]) {
+	case 0:
+		result = EEH_STATE_MMIO_ACTIVE |
+			 EEH_STATE_DMA_ACTIVE;
+		break;
+	case 1:
+		result = EEH_STATE_RESET_ACTIVE |
+			 EEH_STATE_MMIO_ACTIVE  |
+			 EEH_STATE_DMA_ACTIVE;
+		break;
+	case 2:
+		result = 0;
+		break;
+	case 4:
+		result = EEH_STATE_MMIO_ENABLED;
+		break;
+	case 5:
+		if (rets[2]) {
+			if (delay)
+				*delay = rets[2];
+			result = EEH_STATE_UNAVAILABLE;
+		} else {
+			result = EEH_STATE_NOT_SUPPORT;
+		}
+		break;
+	default:
+		result = EEH_STATE_NOT_SUPPORT;
+	}
+
+	return result;
+}
+
+/**
+ * pseries_eeh_reset - Reset the specified PE
+ * @pe: EEH PE
+ * @option: reset option
+ *
+ * Reset the specified PE
+ */
+static int pseries_eeh_reset(struct eeh_pe *pe, int option)
+{
+	return pseries_eeh_phb_reset(pe->phb, pe->addr, option);
+}
+
+/**
+ * pseries_eeh_get_log - Retrieve error log
+ * @pe: EEH PE
+ * @severity: temporary or permanent error log
+ * @drv_log: driver log to be combined with retrieved error log
+ * @len: length of driver log
+ *
+ * Retrieve the temporary or permanent error from the PE.
+ * Actually, the error will be retrieved through the dedicated
+ * RTAS call.
+ */
+static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&slot_errbuf_lock, flags);
+	memset(slot_errbuf, 0, eeh_error_buf_size);
+
+	ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, pe->addr,
+			BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid),
+			virt_to_phys(drv_log), len,
+			virt_to_phys(slot_errbuf), eeh_error_buf_size,
+			severity);
+	if (!ret)
+		log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
+	spin_unlock_irqrestore(&slot_errbuf_lock, flags);
+
+	return ret;
+}
+
+/**
+ * pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE
+ * @pe: EEH PE
+ *
+ */
+static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
+{
+	return pseries_eeh_phb_configure_bridge(pe->phb, pe->addr);
+}
+
+/**
+ * pseries_eeh_read_config - Read PCI config space
+ * @edev: EEH device handle
+ * @where: PCI config space offset
+ * @size: size to read
+ * @val: return value
+ *
+ * Read config space from the speicifed device
+ */
+static int pseries_eeh_read_config(struct eeh_dev *edev, int where, int size, u32 *val)
+{
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+	return rtas_read_config(pdn, where, size, val);
+}
+
+/**
+ * pseries_eeh_write_config - Write PCI config space
+ * @edev: EEH device handle
+ * @where: PCI config space offset
+ * @size: size to write
+ * @val: value to be written
+ *
+ * Write config space to the specified device
+ */
+static int pseries_eeh_write_config(struct eeh_dev *edev, int where, int size, u32 val)
+{
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+	return rtas_write_config(pdn, where, size, val);
+}
+
+#ifdef CONFIG_PCI_IOV
+static int pseries_send_allow_unfreeze(struct pci_dn *pdn, u16 *vf_pe_array, int cur_vfs)
+{
+	int rc;
+	int ibm_allow_unfreeze = rtas_function_token(RTAS_FN_IBM_OPEN_SRIOV_ALLOW_UNFREEZE);
+	unsigned long buid, addr;
+
+	addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+	buid = pdn->phb->buid;
+	spin_lock(&rtas_data_buf_lock);
+	memcpy(rtas_data_buf, vf_pe_array, RTAS_DATA_BUF_SIZE);
+	rc = rtas_call(ibm_allow_unfreeze, 5, 1, NULL,
+		       addr,
+		       BUID_HI(buid),
+		       BUID_LO(buid),
+		       rtas_data_buf, cur_vfs * sizeof(u16));
+	spin_unlock(&rtas_data_buf_lock);
+	if (rc)
+		pr_warn("%s: Failed to allow unfreeze for PHB#%x-PE#%lx, rc=%x\n",
+			__func__,
+			pdn->phb->global_number, addr, rc);
+	return rc;
+}
+
+static int pseries_call_allow_unfreeze(struct eeh_dev *edev)
+{
+	int cur_vfs = 0, rc = 0, vf_index, bus, devfn, vf_pe_num;
+	struct pci_dn *pdn, *tmp, *parent, *physfn_pdn;
+	u16 *vf_pe_array;
+
+	vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+	if (!vf_pe_array)
+		return -ENOMEM;
+	if (pci_num_vf(edev->physfn ? edev->physfn : edev->pdev)) {
+		if (edev->pdev->is_physfn) {
+			cur_vfs = pci_num_vf(edev->pdev);
+			pdn = eeh_dev_to_pdn(edev);
+			parent = pdn->parent;
+			for (vf_index = 0; vf_index < cur_vfs; vf_index++)
+				vf_pe_array[vf_index] =
+					cpu_to_be16(pdn->pe_num_map[vf_index]);
+			rc = pseries_send_allow_unfreeze(pdn, vf_pe_array,
+							 cur_vfs);
+			pdn->last_allow_rc = rc;
+			for (vf_index = 0; vf_index < cur_vfs; vf_index++) {
+				list_for_each_entry_safe(pdn, tmp,
+							 &parent->child_list,
+							 list) {
+					bus = pci_iov_virtfn_bus(edev->pdev,
+								 vf_index);
+					devfn = pci_iov_virtfn_devfn(edev->pdev,
+								     vf_index);
+					if (pdn->busno != bus ||
+					    pdn->devfn != devfn)
+						continue;
+					pdn->last_allow_rc = rc;
+				}
+			}
+		} else {
+			pdn = pci_get_pdn(edev->pdev);
+			physfn_pdn = pci_get_pdn(edev->physfn);
+
+			vf_pe_num = physfn_pdn->pe_num_map[edev->vf_index];
+			vf_pe_array[0] = cpu_to_be16(vf_pe_num);
+			rc = pseries_send_allow_unfreeze(physfn_pdn,
+							 vf_pe_array, 1);
+			pdn->last_allow_rc = rc;
+		}
+	}
+
+	kfree(vf_pe_array);
+	return rc;
+}
+
+static int pseries_notify_resume(struct eeh_dev *edev)
+{
+	if (!edev)
+		return -EEXIST;
+
+	if (rtas_function_token(RTAS_FN_IBM_OPEN_SRIOV_ALLOW_UNFREEZE) == RTAS_UNKNOWN_SERVICE)
+		return -EINVAL;
+
+	if (edev->pdev->is_physfn || edev->pdev->is_virtfn)
+		return pseries_call_allow_unfreeze(edev);
+
+	return 0;
+}
+#endif
+
+static struct eeh_ops pseries_eeh_ops = {
+	.name			= "pseries",
+	.probe			= pseries_eeh_probe,
+	.set_option		= pseries_eeh_set_option,
+	.get_state		= pseries_eeh_get_state,
+	.reset			= pseries_eeh_reset,
+	.get_log		= pseries_eeh_get_log,
+	.configure_bridge       = pseries_eeh_configure_bridge,
+	.err_inject		= NULL,
+	.read_config		= pseries_eeh_read_config,
+	.write_config		= pseries_eeh_write_config,
+	.next_error		= NULL,
+	.restore_config		= NULL, /* NB: configure_bridge() does this */
+#ifdef CONFIG_PCI_IOV
+	.notify_resume		= pseries_notify_resume
+#endif
+};
+
+/**
+ * eeh_pseries_init - Register platform dependent EEH operations
+ *
+ * EEH initialization on pseries platform. This function should be
+ * called before any EEH related functions.
+ */
+static int __init eeh_pseries_init(void)
+{
+	struct pci_controller *phb;
+	struct pci_dn *pdn;
+	int ret, config_addr;
+
+	/* figure out EEH RTAS function call tokens */
+	ibm_set_eeh_option		= rtas_function_token(RTAS_FN_IBM_SET_EEH_OPTION);
+	ibm_set_slot_reset		= rtas_function_token(RTAS_FN_IBM_SET_SLOT_RESET);
+	ibm_read_slot_reset_state2	= rtas_function_token(RTAS_FN_IBM_READ_SLOT_RESET_STATE2);
+	ibm_read_slot_reset_state	= rtas_function_token(RTAS_FN_IBM_READ_SLOT_RESET_STATE);
+	ibm_slot_error_detail		= rtas_function_token(RTAS_FN_IBM_SLOT_ERROR_DETAIL);
+	ibm_get_config_addr_info2	= rtas_function_token(RTAS_FN_IBM_GET_CONFIG_ADDR_INFO2);
+	ibm_get_config_addr_info	= rtas_function_token(RTAS_FN_IBM_GET_CONFIG_ADDR_INFO);
+	ibm_configure_pe		= rtas_function_token(RTAS_FN_IBM_CONFIGURE_PE);
+
+	/*
+	 * ibm,configure-pe and ibm,configure-bridge have the same semantics,
+	 * however ibm,configure-pe can be faster.  If we can't find
+	 * ibm,configure-pe then fall back to using ibm,configure-bridge.
+	 */
+	if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE)
+		ibm_configure_pe	= rtas_function_token(RTAS_FN_IBM_CONFIGURE_BRIDGE);
+
+	/*
+	 * Necessary sanity check. We needn't check "get-config-addr-info"
+	 * and its variant since the old firmware probably support address
+	 * of domain/bus/slot/function for EEH RTAS operations.
+	 */
+	if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE		||
+	    ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE		||
+	    (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
+	     ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE)	||
+	    ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE	||
+	    ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
+		pr_info("EEH functionality not supported\n");
+		return -EINVAL;
+	}
+
+	/* Initialize error log size */
+	eeh_error_buf_size = rtas_get_error_log_max();
+
+	/* Set EEH probe mode */
+	eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
+
+	/* Set EEH machine dependent code */
+	ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
+
+	if (is_kdump_kernel() || reset_devices) {
+		pr_info("Issue PHB reset ...\n");
+		list_for_each_entry(phb, &hose_list, list_node) {
+			// Skip if the slot is empty
+			if (list_empty(&PCI_DN(phb->dn)->child_list))
+				continue;
+
+			pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list);
+			config_addr = pseries_eeh_get_pe_config_addr(pdn);
+
+			/* invalid PE config addr */
+			if (config_addr < 0)
+				continue;
+
+			pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL);
+			pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE);
+			pseries_eeh_phb_configure_bridge(phb, config_addr);
+		}
+	}
+
+	ret = eeh_init(&pseries_eeh_ops);
+	if (!ret)
+		pr_info("EEH: pSeries platform initialized\n");
+	else
+		pr_info("EEH: pSeries platform initialization failure (%d)\n",
+			ret);
+	return ret;
+}
+machine_arch_initcall(pseries, eeh_pseries_init);
diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c
new file mode 100644
index 0000000000..623dfe0d8e
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/event_sources.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2001 Dave Engebretsen IBM Corporation
+ */
+
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
+
+#include "pseries.h"
+
+void __init request_event_sources_irqs(struct device_node *np,
+				irq_handler_t handler,
+				const char *name)
+{
+	int i, virq, rc;
+
+	for (i = 0; i < 16; i++) {
+		virq = of_irq_get(np, i);
+		if (virq < 0)
+			return;
+		if (WARN(!virq, "event-sources: Unable to allocate "
+			        "interrupt number for %pOF\n", np))
+			continue;
+
+		rc = request_irq(virq, handler, 0, name, NULL);
+		if (WARN(rc, "event-sources: Unable to request interrupt %d for %pOF\n",
+		    virq, np))
+			return;
+	}
+}
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
new file mode 100644
index 0000000000..18447e5fa1
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  pSeries firmware setup code.
+ *
+ *  Portions from arch/powerpc/platforms/pseries/setup.c:
+ *   Copyright (C) 1995  Linus Torvalds
+ *   Adapted from 'alpha' version by Gary Thomas
+ *   Modified by Cort Dougan (cort@cs.nmt.edu)
+ *   Modified by PPC64 Team, IBM Corp
+ *
+ *  Portions from arch/powerpc/kernel/firmware.c
+ *   Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *   Modifications for ppc64:
+ *    Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ *    Copyright (C) 2005 Stephen Rothwell, IBM Corporation
+ *
+ *  Copyright 2006 IBM Corporation.
+ */
+
+
+#include <linux/of_fdt.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/svm.h>
+
+#include "pseries.h"
+
+struct hypertas_fw_feature {
+    unsigned long val;
+    char * name;
+};
+
+/*
+ * The names in this table match names in rtas/ibm,hypertas-functions.  If the
+ * entry ends in a '*', only upto the '*' is matched.  Otherwise the entire
+ * string must match.
+ */
+static __initdata struct hypertas_fw_feature
+hypertas_fw_features_table[] = {
+	{FW_FEATURE_PFT,		"hcall-pft"},
+	{FW_FEATURE_TCE,		"hcall-tce"},
+	{FW_FEATURE_SPRG0,		"hcall-sprg0"},
+	{FW_FEATURE_DABR,		"hcall-dabr"},
+	{FW_FEATURE_COPY,		"hcall-copy"},
+	{FW_FEATURE_ASR,		"hcall-asr"},
+	{FW_FEATURE_DEBUG,		"hcall-debug"},
+	{FW_FEATURE_PERF,		"hcall-perf"},
+	{FW_FEATURE_DUMP,		"hcall-dump"},
+	{FW_FEATURE_INTERRUPT,		"hcall-interrupt"},
+	{FW_FEATURE_MIGRATE,		"hcall-migrate"},
+	{FW_FEATURE_PERFMON,		"hcall-perfmon"},
+	{FW_FEATURE_CRQ,		"hcall-crq"},
+	{FW_FEATURE_VIO,		"hcall-vio"},
+	{FW_FEATURE_RDMA,		"hcall-rdma"},
+	{FW_FEATURE_LLAN,		"hcall-lLAN"},
+	{FW_FEATURE_BULK_REMOVE,	"hcall-bulk"},
+	{FW_FEATURE_XDABR,		"hcall-xdabr"},
+	{FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE,
+					"hcall-multi-tce"},
+	{FW_FEATURE_SPLPAR,		"hcall-splpar"},
+	{FW_FEATURE_VPHN,		"hcall-vphn"},
+	{FW_FEATURE_SET_MODE,		"hcall-set-mode"},
+	{FW_FEATURE_BEST_ENERGY,	"hcall-best-energy-1*"},
+	{FW_FEATURE_HPT_RESIZE,		"hcall-hpt-resize"},
+	{FW_FEATURE_BLOCK_REMOVE,	"hcall-block-remove"},
+	{FW_FEATURE_PAPR_SCM,		"hcall-scm"},
+	{FW_FEATURE_RPT_INVALIDATE,	"hcall-rpt-invalidate"},
+	{FW_FEATURE_ENERGY_SCALE_INFO,	"hcall-energy-scale-info"},
+	{FW_FEATURE_WATCHDOG,		"hcall-watchdog"},
+	{FW_FEATURE_PLPKS,		"hcall-pks"},
+};
+
+/* Build up the firmware features bitmask using the contents of
+ * device-tree/ibm,hypertas-functions.  Ultimately this functionality may
+ * be moved into prom.c prom_init().
+ */
+static void __init fw_hypertas_feature_init(const char *hypertas,
+					    unsigned long len)
+{
+	const char *s;
+	int i;
+
+	pr_debug(" -> fw_hypertas_feature_init()\n");
+
+	for (s = hypertas; s < hypertas + len; s += strlen(s) + 1) {
+		for (i = 0; i < ARRAY_SIZE(hypertas_fw_features_table); i++) {
+			const char *name = hypertas_fw_features_table[i].name;
+			size_t size;
+
+			/*
+			 * If there is a '*' at the end of name, only check
+			 * upto there
+			 */
+			size = strlen(name);
+			if (size && name[size - 1] == '*') {
+				if (strncmp(name, s, size - 1))
+					continue;
+			} else if (strcmp(name, s))
+				continue;
+
+			/* we have a match */
+			powerpc_firmware_features |=
+				hypertas_fw_features_table[i].val;
+			break;
+		}
+	}
+
+	if (is_secure_guest() &&
+	    (powerpc_firmware_features & FW_FEATURE_PUT_TCE_IND)) {
+		powerpc_firmware_features &= ~FW_FEATURE_PUT_TCE_IND;
+		pr_debug("SVM: disabling PUT_TCE_IND firmware feature\n");
+	}
+
+	pr_debug(" <- fw_hypertas_feature_init()\n");
+}
+
+struct vec5_fw_feature {
+	unsigned long	val;
+	unsigned int	feature;
+};
+
+static __initdata struct vec5_fw_feature
+vec5_fw_features_table[] = {
+	{FW_FEATURE_FORM1_AFFINITY,	OV5_FORM1_AFFINITY},
+	{FW_FEATURE_PRRN,		OV5_PRRN},
+	{FW_FEATURE_DRMEM_V2,		OV5_DRMEM_V2},
+	{FW_FEATURE_DRC_INFO,		OV5_DRC_INFO},
+	{FW_FEATURE_FORM2_AFFINITY,	OV5_FORM2_AFFINITY},
+};
+
+static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)
+{
+	unsigned int index, feat;
+	int i;
+
+	pr_debug(" -> fw_vec5_feature_init()\n");
+
+	for (i = 0; i < ARRAY_SIZE(vec5_fw_features_table); i++) {
+		index = OV5_INDX(vec5_fw_features_table[i].feature);
+		feat = OV5_FEAT(vec5_fw_features_table[i].feature);
+
+		if (index < len && (vec5[index] & feat))
+			powerpc_firmware_features |=
+				vec5_fw_features_table[i].val;
+	}
+
+	pr_debug(" <- fw_vec5_feature_init()\n");
+}
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init probe_fw_features(unsigned long node, const char *uname, int
+				    depth, void *data)
+{
+	const char *prop;
+	int len;
+	static int hypertas_found;
+	static int vec5_found;
+
+	if (depth != 1)
+		return 0;
+
+	if (!strcmp(uname, "rtas") || !strcmp(uname, "rtas@0")) {
+		prop = of_get_flat_dt_prop(node, "ibm,hypertas-functions",
+					   &len);
+		if (prop) {
+			powerpc_firmware_features |= FW_FEATURE_LPAR;
+			fw_hypertas_feature_init(prop, len);
+		}
+
+		hypertas_found = 1;
+	}
+
+	if (!strcmp(uname, "chosen")) {
+		prop = of_get_flat_dt_prop(node, "ibm,architecture-vec-5",
+					   &len);
+		if (prop)
+			fw_vec5_feature_init(prop, len);
+
+		vec5_found = 1;
+	}
+
+	return hypertas_found && vec5_found;
+}
+
+void __init pseries_probe_fw_features(void)
+{
+	of_scan_flat_dt(probe_fw_features, NULL);
+}
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
new file mode 100644
index 0000000000..e62835a12d
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -0,0 +1,901 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * pseries CPU Hotplug infrastructure.
+ *
+ * Split out from arch/powerpc/platforms/pseries/setup.c
+ *  arch/powerpc/kernel/rtas.c, and arch/powerpc/platforms/pseries/smp.c
+ *
+ * Peter Bergner, IBM	March 2001.
+ * Copyright (C) 2001 IBM.
+ * Dave Engebretsen, Peter Bergner, and
+ * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
+ * Plus various changes from other IBM teams...
+ *
+ * Copyright (C) 2006 Michael Ellerman, IBM Corporation
+ */
+
+#define pr_fmt(fmt)     "pseries-hotplug-cpu: " fmt
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/sched.h>	/* for idle_task_exit */
+#include <linux/sched/hotplug.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+#include <asm/vdso_datapage.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/topology.h>
+
+#include "pseries.h"
+
+/* This version can't take the spinlock, because it never returns */
+static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
+
+/*
+ * Record the CPU ids used on each nodes.
+ * Protected by cpu_add_remove_lock.
+ */
+static cpumask_var_t node_recorded_ids_map[MAX_NUMNODES];
+
+static void rtas_stop_self(void)
+{
+	static struct rtas_args args;
+
+	local_irq_disable();
+
+	BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE);
+
+	rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL);
+
+	panic("Alas, I survived.\n");
+}
+
+static void pseries_cpu_offline_self(void)
+{
+	unsigned int hwcpu = hard_smp_processor_id();
+
+	local_irq_disable();
+	idle_task_exit();
+	if (xive_enabled())
+		xive_teardown_cpu();
+	else
+		xics_teardown_cpu();
+
+	unregister_slb_shadow(hwcpu);
+	unregister_vpa(hwcpu);
+	rtas_stop_self();
+
+	/* Should never get here... */
+	BUG();
+	for(;;);
+}
+
+static int pseries_cpu_disable(void)
+{
+	int cpu = smp_processor_id();
+
+	set_cpu_online(cpu, false);
+	vdso_data->processorCount--;
+
+	/*fix boot_cpuid here*/
+	if (cpu == boot_cpuid)
+		boot_cpuid = cpumask_any(cpu_online_mask);
+
+	/* FIXME: abstract this to not be platform specific later on */
+	if (xive_enabled())
+		xive_smp_disable_cpu();
+	else
+		xics_migrate_irqs_away();
+
+	cleanup_cpu_mmu_context();
+
+	return 0;
+}
+
+/*
+ * pseries_cpu_die: Wait for the cpu to die.
+ * @cpu: logical processor id of the CPU whose death we're awaiting.
+ *
+ * This function is called from the context of the thread which is performing
+ * the cpu-offline. Here we wait for long enough to allow the cpu in question
+ * to self-destroy so that the cpu-offline thread can send the CPU_DEAD
+ * notifications.
+ *
+ * OTOH, pseries_cpu_offline_self() is called by the @cpu when it wants to
+ * self-destruct.
+ */
+static void pseries_cpu_die(unsigned int cpu)
+{
+	int cpu_status = 1;
+	unsigned int pcpu = get_hard_smp_processor_id(cpu);
+	unsigned long timeout = jiffies + msecs_to_jiffies(120000);
+
+	while (true) {
+		cpu_status = smp_query_cpu_stopped(pcpu);
+		if (cpu_status == QCSS_STOPPED ||
+		    cpu_status == QCSS_HARDWARE_ERROR)
+			break;
+
+		if (time_after(jiffies, timeout)) {
+			pr_warn("CPU %i (hwid %i) didn't die after 120 seconds\n",
+				cpu, pcpu);
+			timeout = jiffies + msecs_to_jiffies(120000);
+		}
+
+		cond_resched();
+	}
+
+	if (cpu_status == QCSS_HARDWARE_ERROR) {
+		pr_warn("CPU %i (hwid %i) reported error while dying\n",
+			cpu, pcpu);
+	}
+
+	paca_ptrs[cpu]->cpu_start = 0;
+}
+
+/**
+ * find_cpu_id_range - found a linear ranger of @nthreads free CPU ids.
+ * @nthreads : the number of threads (cpu ids)
+ * @assigned_node : the node it belongs to or NUMA_NO_NODE if free ids from any
+ *                  node can be peek.
+ * @cpu_mask: the returned CPU mask.
+ *
+ * Returns 0 on success.
+ */
+static int find_cpu_id_range(unsigned int nthreads, int assigned_node,
+			     cpumask_var_t *cpu_mask)
+{
+	cpumask_var_t candidate_mask;
+	unsigned int cpu, node;
+	int rc = -ENOSPC;
+
+	if (!zalloc_cpumask_var(&candidate_mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	cpumask_clear(*cpu_mask);
+	for (cpu = 0; cpu < nthreads; cpu++)
+		cpumask_set_cpu(cpu, *cpu_mask);
+
+	BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
+
+	/* Get a bitmap of unoccupied slots. */
+	cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
+
+	if (assigned_node != NUMA_NO_NODE) {
+		/*
+		 * Remove free ids previously assigned on the other nodes. We
+		 * can walk only online nodes because once a node became online
+		 * it is not turned offlined back.
+		 */
+		for_each_online_node(node) {
+			if (node == assigned_node)
+				continue;
+			cpumask_andnot(candidate_mask, candidate_mask,
+				       node_recorded_ids_map[node]);
+		}
+	}
+
+	if (cpumask_empty(candidate_mask))
+		goto out;
+
+	while (!cpumask_empty(*cpu_mask)) {
+		if (cpumask_subset(*cpu_mask, candidate_mask))
+			/* Found a range where we can insert the new cpu(s) */
+			break;
+		cpumask_shift_left(*cpu_mask, *cpu_mask, nthreads);
+	}
+
+	if (!cpumask_empty(*cpu_mask))
+		rc = 0;
+
+out:
+	free_cpumask_var(candidate_mask);
+	return rc;
+}
+
+/*
+ * Update cpu_present_mask and paca(s) for a new cpu node.  The wrinkle
+ * here is that a cpu device node may represent multiple logical cpus
+ * in the SMT case.  We must honor the assumption in other code that
+ * the logical ids for sibling SMT threads x and y are adjacent, such
+ * that x^1 == y and y^1 == x.
+ */
+static int pseries_add_processor(struct device_node *np)
+{
+	int len, nthreads, node, cpu, assigned_node;
+	int rc = 0;
+	cpumask_var_t cpu_mask;
+	const __be32 *intserv;
+
+	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
+	if (!intserv)
+		return 0;
+
+	nthreads = len / sizeof(u32);
+
+	if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	/*
+	 * Fetch from the DT nodes read by dlpar_configure_connector() the NUMA
+	 * node id the added CPU belongs to.
+	 */
+	node = of_node_to_nid(np);
+	if (node < 0 || !node_possible(node))
+		node = first_online_node;
+
+	BUG_ON(node == NUMA_NO_NODE);
+	assigned_node = node;
+
+	cpu_maps_update_begin();
+
+	rc = find_cpu_id_range(nthreads, node, &cpu_mask);
+	if (rc && nr_node_ids > 1) {
+		/*
+		 * Try again, considering the free CPU ids from the other node.
+		 */
+		node = NUMA_NO_NODE;
+		rc = find_cpu_id_range(nthreads, NUMA_NO_NODE, &cpu_mask);
+	}
+
+	if (rc) {
+		pr_err("Cannot add cpu %pOF; this system configuration"
+		       " supports %d logical cpus.\n", np, num_possible_cpus());
+		goto out;
+	}
+
+	for_each_cpu(cpu, cpu_mask) {
+		BUG_ON(cpu_present(cpu));
+		set_cpu_present(cpu, true);
+		set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));
+	}
+
+	/* Record the newly used CPU ids for the associate node. */
+	cpumask_or(node_recorded_ids_map[assigned_node],
+		   node_recorded_ids_map[assigned_node], cpu_mask);
+
+	/*
+	 * If node is set to NUMA_NO_NODE, CPU ids have be reused from
+	 * another node, remove them from its mask.
+	 */
+	if (node == NUMA_NO_NODE) {
+		cpu = cpumask_first(cpu_mask);
+		pr_warn("Reusing free CPU ids %d-%d from another node\n",
+			cpu, cpu + nthreads - 1);
+		for_each_online_node(node) {
+			if (node == assigned_node)
+				continue;
+			cpumask_andnot(node_recorded_ids_map[node],
+				       node_recorded_ids_map[node],
+				       cpu_mask);
+		}
+	}
+
+out:
+	cpu_maps_update_done();
+	free_cpumask_var(cpu_mask);
+	return rc;
+}
+
+/*
+ * Update the present map for a cpu node which is going away, and set
+ * the hard id in the paca(s) to -1 to be consistent with boot time
+ * convention for non-present cpus.
+ */
+static void pseries_remove_processor(struct device_node *np)
+{
+	unsigned int cpu;
+	int len, nthreads, i;
+	const __be32 *intserv;
+	u32 thread;
+
+	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
+	if (!intserv)
+		return;
+
+	nthreads = len / sizeof(u32);
+
+	cpu_maps_update_begin();
+	for (i = 0; i < nthreads; i++) {
+		thread = be32_to_cpu(intserv[i]);
+		for_each_present_cpu(cpu) {
+			if (get_hard_smp_processor_id(cpu) != thread)
+				continue;
+			BUG_ON(cpu_online(cpu));
+			set_cpu_present(cpu, false);
+			set_hard_smp_processor_id(cpu, -1);
+			update_numa_cpu_lookup_table(cpu, -1);
+			break;
+		}
+		if (cpu >= nr_cpu_ids)
+			printk(KERN_WARNING "Could not find cpu to remove "
+			       "with physical id 0x%x\n", thread);
+	}
+	cpu_maps_update_done();
+}
+
+static int dlpar_offline_cpu(struct device_node *dn)
+{
+	int rc = 0;
+	unsigned int cpu;
+	int len, nthreads, i;
+	const __be32 *intserv;
+	u32 thread;
+
+	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+	if (!intserv)
+		return -EINVAL;
+
+	nthreads = len / sizeof(u32);
+
+	cpu_maps_update_begin();
+	for (i = 0; i < nthreads; i++) {
+		thread = be32_to_cpu(intserv[i]);
+		for_each_present_cpu(cpu) {
+			if (get_hard_smp_processor_id(cpu) != thread)
+				continue;
+
+			if (!cpu_online(cpu))
+				break;
+
+			/*
+			 * device_offline() will return -EBUSY (via cpu_down()) if there
+			 * is only one CPU left. Check it here to fail earlier and with a
+			 * more informative error message, while also retaining the
+			 * cpu_add_remove_lock to be sure that no CPUs are being
+			 * online/offlined during this check.
+			 */
+			if (num_online_cpus() == 1) {
+				pr_warn("Unable to remove last online CPU %pOFn\n", dn);
+				rc = -EBUSY;
+				goto out_unlock;
+			}
+
+			cpu_maps_update_done();
+			rc = device_offline(get_cpu_device(cpu));
+			if (rc)
+				goto out;
+			cpu_maps_update_begin();
+			break;
+		}
+		if (cpu == num_possible_cpus()) {
+			pr_warn("Could not find cpu to offline with physical id 0x%x\n",
+				thread);
+		}
+	}
+out_unlock:
+	cpu_maps_update_done();
+
+out:
+	return rc;
+}
+
+static int dlpar_online_cpu(struct device_node *dn)
+{
+	int rc = 0;
+	unsigned int cpu;
+	int len, nthreads, i;
+	const __be32 *intserv;
+	u32 thread;
+
+	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+	if (!intserv)
+		return -EINVAL;
+
+	nthreads = len / sizeof(u32);
+
+	cpu_maps_update_begin();
+	for (i = 0; i < nthreads; i++) {
+		thread = be32_to_cpu(intserv[i]);
+		for_each_present_cpu(cpu) {
+			if (get_hard_smp_processor_id(cpu) != thread)
+				continue;
+
+			if (!topology_is_primary_thread(cpu)) {
+				if (cpu_smt_control != CPU_SMT_ENABLED)
+					break;
+				if (!topology_smt_thread_allowed(cpu))
+					break;
+			}
+
+			cpu_maps_update_done();
+			find_and_update_cpu_nid(cpu);
+			rc = device_online(get_cpu_device(cpu));
+			if (rc) {
+				dlpar_offline_cpu(dn);
+				goto out;
+			}
+			cpu_maps_update_begin();
+
+			break;
+		}
+		if (cpu == num_possible_cpus())
+			printk(KERN_WARNING "Could not find cpu to online "
+			       "with physical id 0x%x\n", thread);
+	}
+	cpu_maps_update_done();
+
+out:
+	return rc;
+
+}
+
+static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
+{
+	struct device_node *child = NULL;
+	u32 my_drc_index;
+	bool found;
+	int rc;
+
+	/* Assume cpu doesn't exist */
+	found = false;
+
+	for_each_child_of_node(parent, child) {
+		rc = of_property_read_u32(child, "ibm,my-drc-index",
+					  &my_drc_index);
+		if (rc)
+			continue;
+
+		if (my_drc_index == drc_index) {
+			of_node_put(child);
+			found = true;
+			break;
+		}
+	}
+
+	return found;
+}
+
+static bool drc_info_valid_index(struct device_node *parent, u32 drc_index)
+{
+	struct property *info;
+	struct of_drc_info drc;
+	const __be32 *value;
+	u32 index;
+	int count, i, j;
+
+	info = of_find_property(parent, "ibm,drc-info", NULL);
+	if (!info)
+		return false;
+
+	value = of_prop_next_u32(info, NULL, &count);
+
+	/* First value of ibm,drc-info is number of drc-info records */
+	if (value)
+		value++;
+	else
+		return false;
+
+	for (i = 0; i < count; i++) {
+		if (of_read_drc_info_cell(&info, &value, &drc))
+			return false;
+
+		if (strncmp(drc.drc_type, "CPU", 3))
+			break;
+
+		if (drc_index > drc.last_drc_index)
+			continue;
+
+		index = drc.drc_index_start;
+		for (j = 0; j < drc.num_sequential_elems; j++) {
+			if (drc_index == index)
+				return true;
+
+			index += drc.sequential_inc;
+		}
+	}
+
+	return false;
+}
+
+static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
+{
+	bool found = false;
+	int rc, index;
+
+	if (of_property_present(parent, "ibm,drc-info"))
+		return drc_info_valid_index(parent, drc_index);
+
+	/* Note that the format of the ibm,drc-indexes array is
+	 * the number of entries in the array followed by the array
+	 * of drc values so we start looking at index = 1.
+	 */
+	index = 1;
+	while (!found) {
+		u32 drc;
+
+		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
+						index++, &drc);
+
+		if (rc)
+			break;
+
+		if (drc == drc_index)
+			found = true;
+	}
+
+	return found;
+}
+
+static int pseries_cpuhp_attach_nodes(struct device_node *dn)
+{
+	struct of_changeset cs;
+	int ret;
+
+	/*
+	 * This device node is unattached but may have siblings; open-code the
+	 * traversal.
+	 */
+	for (of_changeset_init(&cs); dn != NULL; dn = dn->sibling) {
+		ret = of_changeset_attach_node(&cs, dn);
+		if (ret)
+			goto out;
+	}
+
+	ret = of_changeset_apply(&cs);
+out:
+	of_changeset_destroy(&cs);
+	return ret;
+}
+
+static ssize_t dlpar_cpu_add(u32 drc_index)
+{
+	struct device_node *dn, *parent;
+	int rc, saved_rc;
+
+	pr_debug("Attempting to add CPU, drc index: %x\n", drc_index);
+
+	parent = of_find_node_by_path("/cpus");
+	if (!parent) {
+		pr_warn("Failed to find CPU root node \"/cpus\"\n");
+		return -ENODEV;
+	}
+
+	if (dlpar_cpu_exists(parent, drc_index)) {
+		of_node_put(parent);
+		pr_warn("CPU with drc index %x already exists\n", drc_index);
+		return -EINVAL;
+	}
+
+	if (!valid_cpu_drc_index(parent, drc_index)) {
+		of_node_put(parent);
+		pr_warn("Cannot find CPU (drc index %x) to add.\n", drc_index);
+		return -EINVAL;
+	}
+
+	rc = dlpar_acquire_drc(drc_index);
+	if (rc) {
+		pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
+			rc, drc_index);
+		of_node_put(parent);
+		return -EINVAL;
+	}
+
+	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
+	if (!dn) {
+		pr_warn("Failed call to configure-connector, drc index: %x\n",
+			drc_index);
+		dlpar_release_drc(drc_index);
+		of_node_put(parent);
+		return -EINVAL;
+	}
+
+	rc = pseries_cpuhp_attach_nodes(dn);
+
+	/* Regardless we are done with parent now */
+	of_node_put(parent);
+
+	if (rc) {
+		saved_rc = rc;
+		pr_warn("Failed to attach node %pOFn, rc: %d, drc index: %x\n",
+			dn, rc, drc_index);
+
+		rc = dlpar_release_drc(drc_index);
+		if (!rc)
+			dlpar_free_cc_nodes(dn);
+
+		return saved_rc;
+	}
+
+	update_numa_distance(dn);
+
+	rc = dlpar_online_cpu(dn);
+	if (rc) {
+		saved_rc = rc;
+		pr_warn("Failed to online cpu %pOFn, rc: %d, drc index: %x\n",
+			dn, rc, drc_index);
+
+		rc = dlpar_detach_node(dn);
+		if (!rc)
+			dlpar_release_drc(drc_index);
+
+		return saved_rc;
+	}
+
+	pr_debug("Successfully added CPU %pOFn, drc index: %x\n", dn,
+		 drc_index);
+	return rc;
+}
+
+static unsigned int pseries_cpuhp_cache_use_count(const struct device_node *cachedn)
+{
+	unsigned int use_count = 0;
+	struct device_node *dn, *tn;
+
+	WARN_ON(!of_node_is_type(cachedn, "cache"));
+
+	for_each_of_cpu_node(dn) {
+		tn = of_find_next_cache_node(dn);
+		of_node_put(tn);
+		if (tn == cachedn)
+			use_count++;
+	}
+
+	for_each_node_by_type(dn, "cache") {
+		tn = of_find_next_cache_node(dn);
+		of_node_put(tn);
+		if (tn == cachedn)
+			use_count++;
+	}
+
+	return use_count;
+}
+
+static int pseries_cpuhp_detach_nodes(struct device_node *cpudn)
+{
+	struct device_node *dn;
+	struct of_changeset cs;
+	int ret = 0;
+
+	of_changeset_init(&cs);
+	ret = of_changeset_detach_node(&cs, cpudn);
+	if (ret)
+		goto out;
+
+	dn = cpudn;
+	while ((dn = of_find_next_cache_node(dn))) {
+		if (pseries_cpuhp_cache_use_count(dn) > 1) {
+			of_node_put(dn);
+			break;
+		}
+
+		ret = of_changeset_detach_node(&cs, dn);
+		of_node_put(dn);
+		if (ret)
+			goto out;
+	}
+
+	ret = of_changeset_apply(&cs);
+out:
+	of_changeset_destroy(&cs);
+	return ret;
+}
+
+static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
+{
+	int rc;
+
+	pr_debug("Attempting to remove CPU %pOFn, drc index: %x\n",
+		 dn, drc_index);
+
+	rc = dlpar_offline_cpu(dn);
+	if (rc) {
+		pr_warn("Failed to offline CPU %pOFn, rc: %d\n", dn, rc);
+		return -EINVAL;
+	}
+
+	rc = dlpar_release_drc(drc_index);
+	if (rc) {
+		pr_warn("Failed to release drc (%x) for CPU %pOFn, rc: %d\n",
+			drc_index, dn, rc);
+		dlpar_online_cpu(dn);
+		return rc;
+	}
+
+	rc = pseries_cpuhp_detach_nodes(dn);
+	if (rc) {
+		int saved_rc = rc;
+
+		pr_warn("Failed to detach CPU %pOFn, rc: %d", dn, rc);
+
+		rc = dlpar_acquire_drc(drc_index);
+		if (!rc)
+			dlpar_online_cpu(dn);
+
+		return saved_rc;
+	}
+
+	pr_debug("Successfully removed CPU, drc index: %x\n", drc_index);
+	return 0;
+}
+
+static struct device_node *cpu_drc_index_to_dn(u32 drc_index)
+{
+	struct device_node *dn;
+	u32 my_index;
+	int rc;
+
+	for_each_node_by_type(dn, "cpu") {
+		rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index);
+		if (rc)
+			continue;
+
+		if (my_index == drc_index)
+			break;
+	}
+
+	return dn;
+}
+
+static int dlpar_cpu_remove_by_index(u32 drc_index)
+{
+	struct device_node *dn;
+	int rc;
+
+	dn = cpu_drc_index_to_dn(drc_index);
+	if (!dn) {
+		pr_warn("Cannot find CPU (drc index %x) to remove\n",
+			drc_index);
+		return -ENODEV;
+	}
+
+	rc = dlpar_cpu_remove(dn, drc_index);
+	of_node_put(dn);
+	return rc;
+}
+
+int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
+{
+	u32 drc_index;
+	int rc;
+
+	drc_index = hp_elog->_drc_u.drc_index;
+
+	lock_device_hotplug();
+
+	switch (hp_elog->action) {
+	case PSERIES_HP_ELOG_ACTION_REMOVE:
+		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
+			rc = dlpar_cpu_remove_by_index(drc_index);
+			/*
+			 * Setting the isolation state of an UNISOLATED/CONFIGURED
+			 * device to UNISOLATE is a no-op, but the hypervisor can
+			 * use it as a hint that the CPU removal failed.
+			 */
+			if (rc)
+				dlpar_unisolate_drc(drc_index);
+		}
+		else
+			rc = -EINVAL;
+		break;
+	case PSERIES_HP_ELOG_ACTION_ADD:
+		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
+			rc = dlpar_cpu_add(drc_index);
+		else
+			rc = -EINVAL;
+		break;
+	default:
+		pr_err("Invalid action (%d) specified\n", hp_elog->action);
+		rc = -EINVAL;
+		break;
+	}
+
+	unlock_device_hotplug();
+	return rc;
+}
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+
+static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
+{
+	u32 drc_index;
+	int rc;
+
+	rc = kstrtou32(buf, 0, &drc_index);
+	if (rc)
+		return -EINVAL;
+
+	rc = dlpar_cpu_add(drc_index);
+
+	return rc ? rc : count;
+}
+
+static ssize_t dlpar_cpu_release(const char *buf, size_t count)
+{
+	struct device_node *dn;
+	u32 drc_index;
+	int rc;
+
+	dn = of_find_node_by_path(buf);
+	if (!dn)
+		return -EINVAL;
+
+	rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index);
+	if (rc) {
+		of_node_put(dn);
+		return -EINVAL;
+	}
+
+	rc = dlpar_cpu_remove(dn, drc_index);
+	of_node_put(dn);
+
+	return rc ? rc : count;
+}
+
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
+static int pseries_smp_notifier(struct notifier_block *nb,
+				unsigned long action, void *data)
+{
+	struct of_reconfig_data *rd = data;
+	int err = 0;
+
+	switch (action) {
+	case OF_RECONFIG_ATTACH_NODE:
+		err = pseries_add_processor(rd->dn);
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		pseries_remove_processor(rd->dn);
+		break;
+	}
+	return notifier_from_errno(err);
+}
+
+static struct notifier_block pseries_smp_nb = {
+	.notifier_call = pseries_smp_notifier,
+};
+
+void __init pseries_cpu_hotplug_init(void)
+{
+	int qcss_tok;
+
+	rtas_stop_self_token = rtas_function_token(RTAS_FN_STOP_SELF);
+	qcss_tok = rtas_function_token(RTAS_FN_QUERY_CPU_STOPPED_STATE);
+
+	if (rtas_stop_self_token == RTAS_UNKNOWN_SERVICE ||
+			qcss_tok == RTAS_UNKNOWN_SERVICE) {
+		printk(KERN_INFO "CPU Hotplug not supported by firmware "
+				"- disabling.\n");
+		return;
+	}
+
+	smp_ops->cpu_offline_self = pseries_cpu_offline_self;
+	smp_ops->cpu_disable = pseries_cpu_disable;
+	smp_ops->cpu_die = pseries_cpu_die;
+}
+
+static int __init pseries_dlpar_init(void)
+{
+	unsigned int node;
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+	ppc_md.cpu_probe = dlpar_cpu_probe;
+	ppc_md.cpu_release = dlpar_cpu_release;
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
+	/* Processors can be added/removed only on LPAR */
+	if (firmware_has_feature(FW_FEATURE_LPAR)) {
+		for_each_node(node) {
+			if (!alloc_cpumask_var_node(&node_recorded_ids_map[node],
+						    GFP_KERNEL, node))
+				return -ENOMEM;
+
+			/* Record ids of CPU added at boot time */
+			cpumask_copy(node_recorded_ids_map[node],
+				     cpumask_of_node(node));
+		}
+
+		of_reconfig_notifier_register(&pseries_smp_nb);
+	}
+
+	return 0;
+}
+machine_arch_initcall(pseries, pseries_dlpar_init);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
new file mode 100644
index 0000000000..4adca5b61d
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -0,0 +1,923 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * pseries Memory Hotplug infrastructure.
+ *
+ * Copyright (C) 2008 Badari Pulavarty, IBM Corporation
+ */
+
+#define pr_fmt(fmt)	"pseries-hotplug-mem: " fmt
+
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/memblock.h>
+#include <linux/memory.h>
+#include <linux/memory_hotplug.h>
+#include <linux/slab.h>
+
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+#include <asm/sparsemem.h>
+#include <asm/fadump.h>
+#include <asm/drmem.h>
+#include "pseries.h"
+
+static void dlpar_free_property(struct property *prop)
+{
+	kfree(prop->name);
+	kfree(prop->value);
+	kfree(prop);
+}
+
+static struct property *dlpar_clone_property(struct property *prop,
+					     u32 prop_size)
+{
+	struct property *new_prop;
+
+	new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+	if (!new_prop)
+		return NULL;
+
+	new_prop->name = kstrdup(prop->name, GFP_KERNEL);
+	new_prop->value = kzalloc(prop_size, GFP_KERNEL);
+	if (!new_prop->name || !new_prop->value) {
+		dlpar_free_property(new_prop);
+		return NULL;
+	}
+
+	memcpy(new_prop->value, prop->value, prop->length);
+	new_prop->length = prop_size;
+
+	of_property_set_flag(new_prop, OF_DYNAMIC);
+	return new_prop;
+}
+
+static bool find_aa_index(struct device_node *dr_node,
+			 struct property *ala_prop,
+			 const u32 *lmb_assoc, u32 *aa_index)
+{
+	u32 *assoc_arrays, new_prop_size;
+	struct property *new_prop;
+	int aa_arrays, aa_array_entries, aa_array_sz;
+	int i, index;
+
+	/*
+	 * The ibm,associativity-lookup-arrays property is defined to be
+	 * a 32-bit value specifying the number of associativity arrays
+	 * followed by a 32-bitvalue specifying the number of entries per
+	 * array, followed by the associativity arrays.
+	 */
+	assoc_arrays = ala_prop->value;
+
+	aa_arrays = be32_to_cpu(assoc_arrays[0]);
+	aa_array_entries = be32_to_cpu(assoc_arrays[1]);
+	aa_array_sz = aa_array_entries * sizeof(u32);
+
+	for (i = 0; i < aa_arrays; i++) {
+		index = (i * aa_array_entries) + 2;
+
+		if (memcmp(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz))
+			continue;
+
+		*aa_index = i;
+		return true;
+	}
+
+	new_prop_size = ala_prop->length + aa_array_sz;
+	new_prop = dlpar_clone_property(ala_prop, new_prop_size);
+	if (!new_prop)
+		return false;
+
+	assoc_arrays = new_prop->value;
+
+	/* increment the number of entries in the lookup array */
+	assoc_arrays[0] = cpu_to_be32(aa_arrays + 1);
+
+	/* copy the new associativity into the lookup array */
+	index = aa_arrays * aa_array_entries + 2;
+	memcpy(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz);
+
+	of_update_property(dr_node, new_prop);
+
+	/*
+	 * The associativity lookup array index for this lmb is
+	 * number of entries - 1 since we added its associativity
+	 * to the end of the lookup array.
+	 */
+	*aa_index = be32_to_cpu(assoc_arrays[0]) - 1;
+	return true;
+}
+
+static int update_lmb_associativity_index(struct drmem_lmb *lmb)
+{
+	struct device_node *parent, *lmb_node, *dr_node;
+	struct property *ala_prop;
+	const u32 *lmb_assoc;
+	u32 aa_index;
+	bool found;
+
+	parent = of_find_node_by_path("/");
+	if (!parent)
+		return -ENODEV;
+
+	lmb_node = dlpar_configure_connector(cpu_to_be32(lmb->drc_index),
+					     parent);
+	of_node_put(parent);
+	if (!lmb_node)
+		return -EINVAL;
+
+	lmb_assoc = of_get_property(lmb_node, "ibm,associativity", NULL);
+	if (!lmb_assoc) {
+		dlpar_free_cc_nodes(lmb_node);
+		return -ENODEV;
+	}
+
+	update_numa_distance(lmb_node);
+
+	dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+	if (!dr_node) {
+		dlpar_free_cc_nodes(lmb_node);
+		return -ENODEV;
+	}
+
+	ala_prop = of_find_property(dr_node, "ibm,associativity-lookup-arrays",
+				    NULL);
+	if (!ala_prop) {
+		of_node_put(dr_node);
+		dlpar_free_cc_nodes(lmb_node);
+		return -ENODEV;
+	}
+
+	found = find_aa_index(dr_node, ala_prop, lmb_assoc, &aa_index);
+
+	of_node_put(dr_node);
+	dlpar_free_cc_nodes(lmb_node);
+
+	if (!found) {
+		pr_err("Could not find LMB associativity\n");
+		return -1;
+	}
+
+	lmb->aa_index = aa_index;
+	return 0;
+}
+
+static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb)
+{
+	unsigned long section_nr;
+	struct memory_block *mem_block;
+
+	section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
+
+	mem_block = find_memory_block(section_nr);
+	return mem_block;
+}
+
+static int get_lmb_range(u32 drc_index, int n_lmbs,
+			 struct drmem_lmb **start_lmb,
+			 struct drmem_lmb **end_lmb)
+{
+	struct drmem_lmb *lmb, *start, *end;
+	struct drmem_lmb *limit;
+
+	start = NULL;
+	for_each_drmem_lmb(lmb) {
+		if (lmb->drc_index == drc_index) {
+			start = lmb;
+			break;
+		}
+	}
+
+	if (!start)
+		return -EINVAL;
+
+	end = &start[n_lmbs];
+
+	limit = &drmem_info->lmbs[drmem_info->n_lmbs];
+	if (end > limit)
+		return -EINVAL;
+
+	*start_lmb = start;
+	*end_lmb = end;
+	return 0;
+}
+
+static int dlpar_change_lmb_state(struct drmem_lmb *lmb, bool online)
+{
+	struct memory_block *mem_block;
+	int rc;
+
+	mem_block = lmb_to_memblock(lmb);
+	if (!mem_block)
+		return -EINVAL;
+
+	if (online && mem_block->dev.offline)
+		rc = device_online(&mem_block->dev);
+	else if (!online && !mem_block->dev.offline)
+		rc = device_offline(&mem_block->dev);
+	else
+		rc = 0;
+
+	put_device(&mem_block->dev);
+
+	return rc;
+}
+
+static int dlpar_online_lmb(struct drmem_lmb *lmb)
+{
+	return dlpar_change_lmb_state(lmb, true);
+}
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static int dlpar_offline_lmb(struct drmem_lmb *lmb)
+{
+	return dlpar_change_lmb_state(lmb, false);
+}
+
+static int pseries_remove_memblock(unsigned long base, unsigned long memblock_size)
+{
+	unsigned long start_pfn;
+	int sections_per_block;
+	int i;
+
+	start_pfn = base >> PAGE_SHIFT;
+
+	lock_device_hotplug();
+
+	if (!pfn_valid(start_pfn))
+		goto out;
+
+	sections_per_block = memory_block_size / MIN_MEMORY_BLOCK_SIZE;
+
+	for (i = 0; i < sections_per_block; i++) {
+		__remove_memory(base, MIN_MEMORY_BLOCK_SIZE);
+		base += MIN_MEMORY_BLOCK_SIZE;
+	}
+
+out:
+	/* Update memory regions for memory remove */
+	memblock_remove(base, memblock_size);
+	unlock_device_hotplug();
+	return 0;
+}
+
+static int pseries_remove_mem_node(struct device_node *np)
+{
+	int ret;
+	struct resource res;
+
+	/*
+	 * Check to see if we are actually removing memory
+	 */
+	if (!of_node_is_type(np, "memory"))
+		return 0;
+
+	/*
+	 * Find the base address and size of the memblock
+	 */
+	ret = of_address_to_resource(np, 0, &res);
+	if (ret)
+		return ret;
+
+	pseries_remove_memblock(res.start, resource_size(&res));
+	return 0;
+}
+
+static bool lmb_is_removable(struct drmem_lmb *lmb)
+{
+	if ((lmb->flags & DRCONF_MEM_RESERVED) ||
+		!(lmb->flags & DRCONF_MEM_ASSIGNED))
+		return false;
+
+#ifdef CONFIG_FA_DUMP
+	/*
+	 * Don't hot-remove memory that falls in fadump boot memory area
+	 * and memory that is reserved for capturing old kernel memory.
+	 */
+	if (is_fadump_memory_area(lmb->base_addr, memory_block_size_bytes()))
+		return false;
+#endif
+	/* device_offline() will determine if we can actually remove this lmb */
+	return true;
+}
+
+static int dlpar_add_lmb(struct drmem_lmb *);
+
+static int dlpar_remove_lmb(struct drmem_lmb *lmb)
+{
+	struct memory_block *mem_block;
+	int rc;
+
+	if (!lmb_is_removable(lmb))
+		return -EINVAL;
+
+	mem_block = lmb_to_memblock(lmb);
+	if (mem_block == NULL)
+		return -EINVAL;
+
+	rc = dlpar_offline_lmb(lmb);
+	if (rc) {
+		put_device(&mem_block->dev);
+		return rc;
+	}
+
+	__remove_memory(lmb->base_addr, memory_block_size);
+	put_device(&mem_block->dev);
+
+	/* Update memory regions for memory remove */
+	memblock_remove(lmb->base_addr, memory_block_size);
+
+	invalidate_lmb_associativity_index(lmb);
+	lmb->flags &= ~DRCONF_MEM_ASSIGNED;
+
+	return 0;
+}
+
+static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
+{
+	struct drmem_lmb *lmb;
+	int lmbs_reserved = 0;
+	int lmbs_available = 0;
+	int rc;
+
+	pr_info("Attempting to hot-remove %d LMB(s)\n", lmbs_to_remove);
+
+	if (lmbs_to_remove == 0)
+		return -EINVAL;
+
+	/* Validate that there are enough LMBs to satisfy the request */
+	for_each_drmem_lmb(lmb) {
+		if (lmb_is_removable(lmb))
+			lmbs_available++;
+
+		if (lmbs_available == lmbs_to_remove)
+			break;
+	}
+
+	if (lmbs_available < lmbs_to_remove) {
+		pr_info("Not enough LMBs available (%d of %d) to satisfy request\n",
+			lmbs_available, lmbs_to_remove);
+		return -EINVAL;
+	}
+
+	for_each_drmem_lmb(lmb) {
+		rc = dlpar_remove_lmb(lmb);
+		if (rc)
+			continue;
+
+		/* Mark this lmb so we can add it later if all of the
+		 * requested LMBs cannot be removed.
+		 */
+		drmem_mark_lmb_reserved(lmb);
+
+		lmbs_reserved++;
+		if (lmbs_reserved == lmbs_to_remove)
+			break;
+	}
+
+	if (lmbs_reserved != lmbs_to_remove) {
+		pr_err("Memory hot-remove failed, adding LMB's back\n");
+
+		for_each_drmem_lmb(lmb) {
+			if (!drmem_lmb_reserved(lmb))
+				continue;
+
+			rc = dlpar_add_lmb(lmb);
+			if (rc)
+				pr_err("Failed to add LMB back, drc index %x\n",
+				       lmb->drc_index);
+
+			drmem_remove_lmb_reservation(lmb);
+
+			lmbs_reserved--;
+			if (lmbs_reserved == 0)
+				break;
+		}
+
+		rc = -EINVAL;
+	} else {
+		for_each_drmem_lmb(lmb) {
+			if (!drmem_lmb_reserved(lmb))
+				continue;
+
+			dlpar_release_drc(lmb->drc_index);
+			pr_info("Memory at %llx was hot-removed\n",
+				lmb->base_addr);
+
+			drmem_remove_lmb_reservation(lmb);
+
+			lmbs_reserved--;
+			if (lmbs_reserved == 0)
+				break;
+		}
+		rc = 0;
+	}
+
+	return rc;
+}
+
+static int dlpar_memory_remove_by_index(u32 drc_index)
+{
+	struct drmem_lmb *lmb;
+	int lmb_found;
+	int rc;
+
+	pr_debug("Attempting to hot-remove LMB, drc index %x\n", drc_index);
+
+	lmb_found = 0;
+	for_each_drmem_lmb(lmb) {
+		if (lmb->drc_index == drc_index) {
+			lmb_found = 1;
+			rc = dlpar_remove_lmb(lmb);
+			if (!rc)
+				dlpar_release_drc(lmb->drc_index);
+
+			break;
+		}
+	}
+
+	if (!lmb_found) {
+		pr_debug("Failed to look up LMB for drc index %x\n", drc_index);
+		rc = -EINVAL;
+	} else if (rc) {
+		pr_debug("Failed to hot-remove memory at %llx\n",
+			 lmb->base_addr);
+	} else {
+		pr_debug("Memory at %llx was hot-removed\n", lmb->base_addr);
+	}
+
+	return rc;
+}
+
+static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
+{
+	struct drmem_lmb *lmb, *start_lmb, *end_lmb;
+	int rc;
+
+	pr_info("Attempting to hot-remove %u LMB(s) at %x\n",
+		lmbs_to_remove, drc_index);
+
+	if (lmbs_to_remove == 0)
+		return -EINVAL;
+
+	rc = get_lmb_range(drc_index, lmbs_to_remove, &start_lmb, &end_lmb);
+	if (rc)
+		return -EINVAL;
+
+	/*
+	 * Validate that all LMBs in range are not reserved. Note that it
+	 * is ok if they are !ASSIGNED since our goal here is to remove the
+	 * LMB range, regardless of whether some LMBs were already removed
+	 * by any other reason.
+	 *
+	 * This is a contrast to what is done in remove_by_count() where we
+	 * check for both RESERVED and !ASSIGNED (via lmb_is_removable()),
+	 * because we want to remove a fixed amount of LMBs in that function.
+	 */
+	for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+		if (lmb->flags & DRCONF_MEM_RESERVED) {
+			pr_err("Memory at %llx (drc index %x) is reserved\n",
+				lmb->base_addr, lmb->drc_index);
+			return -EINVAL;
+		}
+	}
+
+	for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+		/*
+		 * dlpar_remove_lmb() will error out if the LMB is already
+		 * !ASSIGNED, but this case is a no-op for us.
+		 */
+		if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
+			continue;
+
+		rc = dlpar_remove_lmb(lmb);
+		if (rc)
+			break;
+
+		drmem_mark_lmb_reserved(lmb);
+	}
+
+	if (rc) {
+		pr_err("Memory indexed-count-remove failed, adding any removed LMBs\n");
+
+
+		for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+			if (!drmem_lmb_reserved(lmb))
+				continue;
+
+			/*
+			 * Setting the isolation state of an UNISOLATED/CONFIGURED
+			 * device to UNISOLATE is a no-op, but the hypervisor can
+			 * use it as a hint that the LMB removal failed.
+			 */
+			dlpar_unisolate_drc(lmb->drc_index);
+
+			rc = dlpar_add_lmb(lmb);
+			if (rc)
+				pr_err("Failed to add LMB, drc index %x\n",
+				       lmb->drc_index);
+
+			drmem_remove_lmb_reservation(lmb);
+		}
+		rc = -EINVAL;
+	} else {
+		for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+			if (!drmem_lmb_reserved(lmb))
+				continue;
+
+			dlpar_release_drc(lmb->drc_index);
+			pr_info("Memory at %llx (drc index %x) was hot-removed\n",
+				lmb->base_addr, lmb->drc_index);
+
+			drmem_remove_lmb_reservation(lmb);
+		}
+	}
+
+	return rc;
+}
+
+#else
+static inline int pseries_remove_memblock(unsigned long base,
+					  unsigned long memblock_size)
+{
+	return -EOPNOTSUPP;
+}
+static inline int pseries_remove_mem_node(struct device_node *np)
+{
+	return 0;
+}
+static int dlpar_remove_lmb(struct drmem_lmb *lmb)
+{
+	return -EOPNOTSUPP;
+}
+static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
+{
+	return -EOPNOTSUPP;
+}
+static int dlpar_memory_remove_by_index(u32 drc_index)
+{
+	return -EOPNOTSUPP;
+}
+
+static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
+{
+	return -EOPNOTSUPP;
+}
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+
+static int dlpar_add_lmb(struct drmem_lmb *lmb)
+{
+	unsigned long block_sz;
+	int nid, rc;
+
+	if (lmb->flags & DRCONF_MEM_ASSIGNED)
+		return -EINVAL;
+
+	rc = update_lmb_associativity_index(lmb);
+	if (rc) {
+		dlpar_release_drc(lmb->drc_index);
+		return rc;
+	}
+
+	block_sz = memory_block_size_bytes();
+
+	/* Find the node id for this LMB.  Fake one if necessary. */
+	nid = of_drconf_to_nid_single(lmb);
+	if (nid < 0 || !node_possible(nid))
+		nid = first_online_node;
+
+	/* Add the memory */
+	rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_MEMMAP_ON_MEMORY);
+	if (rc) {
+		invalidate_lmb_associativity_index(lmb);
+		return rc;
+	}
+
+	rc = dlpar_online_lmb(lmb);
+	if (rc) {
+		__remove_memory(lmb->base_addr, block_sz);
+		invalidate_lmb_associativity_index(lmb);
+	} else {
+		lmb->flags |= DRCONF_MEM_ASSIGNED;
+	}
+
+	return rc;
+}
+
+static int dlpar_memory_add_by_count(u32 lmbs_to_add)
+{
+	struct drmem_lmb *lmb;
+	int lmbs_available = 0;
+	int lmbs_reserved = 0;
+	int rc;
+
+	pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add);
+
+	if (lmbs_to_add == 0)
+		return -EINVAL;
+
+	/* Validate that there are enough LMBs to satisfy the request */
+	for_each_drmem_lmb(lmb) {
+		if (lmb->flags & DRCONF_MEM_RESERVED)
+			continue;
+
+		if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
+			lmbs_available++;
+
+		if (lmbs_available == lmbs_to_add)
+			break;
+	}
+
+	if (lmbs_available < lmbs_to_add)
+		return -EINVAL;
+
+	for_each_drmem_lmb(lmb) {
+		if (lmb->flags & DRCONF_MEM_ASSIGNED)
+			continue;
+
+		rc = dlpar_acquire_drc(lmb->drc_index);
+		if (rc)
+			continue;
+
+		rc = dlpar_add_lmb(lmb);
+		if (rc) {
+			dlpar_release_drc(lmb->drc_index);
+			continue;
+		}
+
+		/* Mark this lmb so we can remove it later if all of the
+		 * requested LMBs cannot be added.
+		 */
+		drmem_mark_lmb_reserved(lmb);
+		lmbs_reserved++;
+		if (lmbs_reserved == lmbs_to_add)
+			break;
+	}
+
+	if (lmbs_reserved != lmbs_to_add) {
+		pr_err("Memory hot-add failed, removing any added LMBs\n");
+
+		for_each_drmem_lmb(lmb) {
+			if (!drmem_lmb_reserved(lmb))
+				continue;
+
+			rc = dlpar_remove_lmb(lmb);
+			if (rc)
+				pr_err("Failed to remove LMB, drc index %x\n",
+				       lmb->drc_index);
+			else
+				dlpar_release_drc(lmb->drc_index);
+
+			drmem_remove_lmb_reservation(lmb);
+			lmbs_reserved--;
+
+			if (lmbs_reserved == 0)
+				break;
+		}
+		rc = -EINVAL;
+	} else {
+		for_each_drmem_lmb(lmb) {
+			if (!drmem_lmb_reserved(lmb))
+				continue;
+
+			pr_debug("Memory at %llx (drc index %x) was hot-added\n",
+				 lmb->base_addr, lmb->drc_index);
+			drmem_remove_lmb_reservation(lmb);
+			lmbs_reserved--;
+
+			if (lmbs_reserved == 0)
+				break;
+		}
+		rc = 0;
+	}
+
+	return rc;
+}
+
+static int dlpar_memory_add_by_index(u32 drc_index)
+{
+	struct drmem_lmb *lmb;
+	int rc, lmb_found;
+
+	pr_info("Attempting to hot-add LMB, drc index %x\n", drc_index);
+
+	lmb_found = 0;
+	for_each_drmem_lmb(lmb) {
+		if (lmb->drc_index == drc_index) {
+			lmb_found = 1;
+			rc = dlpar_acquire_drc(lmb->drc_index);
+			if (!rc) {
+				rc = dlpar_add_lmb(lmb);
+				if (rc)
+					dlpar_release_drc(lmb->drc_index);
+			}
+
+			break;
+		}
+	}
+
+	if (!lmb_found)
+		rc = -EINVAL;
+
+	if (rc)
+		pr_info("Failed to hot-add memory, drc index %x\n", drc_index);
+	else
+		pr_info("Memory at %llx (drc index %x) was hot-added\n",
+			lmb->base_addr, drc_index);
+
+	return rc;
+}
+
+static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index)
+{
+	struct drmem_lmb *lmb, *start_lmb, *end_lmb;
+	int rc;
+
+	pr_info("Attempting to hot-add %u LMB(s) at index %x\n",
+		lmbs_to_add, drc_index);
+
+	if (lmbs_to_add == 0)
+		return -EINVAL;
+
+	rc = get_lmb_range(drc_index, lmbs_to_add, &start_lmb, &end_lmb);
+	if (rc)
+		return -EINVAL;
+
+	/* Validate that the LMBs in this range are not reserved */
+	for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+		/* Fail immediately if the whole range can't be hot-added */
+		if (lmb->flags & DRCONF_MEM_RESERVED) {
+			pr_err("Memory at %llx (drc index %x) is reserved\n",
+					lmb->base_addr, lmb->drc_index);
+			return -EINVAL;
+		}
+	}
+
+	for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+		if (lmb->flags & DRCONF_MEM_ASSIGNED)
+			continue;
+
+		rc = dlpar_acquire_drc(lmb->drc_index);
+		if (rc)
+			break;
+
+		rc = dlpar_add_lmb(lmb);
+		if (rc) {
+			dlpar_release_drc(lmb->drc_index);
+			break;
+		}
+
+		drmem_mark_lmb_reserved(lmb);
+	}
+
+	if (rc) {
+		pr_err("Memory indexed-count-add failed, removing any added LMBs\n");
+
+		for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+			if (!drmem_lmb_reserved(lmb))
+				continue;
+
+			rc = dlpar_remove_lmb(lmb);
+			if (rc)
+				pr_err("Failed to remove LMB, drc index %x\n",
+				       lmb->drc_index);
+			else
+				dlpar_release_drc(lmb->drc_index);
+
+			drmem_remove_lmb_reservation(lmb);
+		}
+		rc = -EINVAL;
+	} else {
+		for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+			if (!drmem_lmb_reserved(lmb))
+				continue;
+
+			pr_info("Memory at %llx (drc index %x) was hot-added\n",
+				lmb->base_addr, lmb->drc_index);
+			drmem_remove_lmb_reservation(lmb);
+		}
+	}
+
+	return rc;
+}
+
+int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
+{
+	u32 count, drc_index;
+	int rc;
+
+	lock_device_hotplug();
+
+	switch (hp_elog->action) {
+	case PSERIES_HP_ELOG_ACTION_ADD:
+		switch (hp_elog->id_type) {
+		case PSERIES_HP_ELOG_ID_DRC_COUNT:
+			count = hp_elog->_drc_u.drc_count;
+			rc = dlpar_memory_add_by_count(count);
+			break;
+		case PSERIES_HP_ELOG_ID_DRC_INDEX:
+			drc_index = hp_elog->_drc_u.drc_index;
+			rc = dlpar_memory_add_by_index(drc_index);
+			break;
+		case PSERIES_HP_ELOG_ID_DRC_IC:
+			count = hp_elog->_drc_u.ic.count;
+			drc_index = hp_elog->_drc_u.ic.index;
+			rc = dlpar_memory_add_by_ic(count, drc_index);
+			break;
+		default:
+			rc = -EINVAL;
+			break;
+		}
+
+		break;
+	case PSERIES_HP_ELOG_ACTION_REMOVE:
+		switch (hp_elog->id_type) {
+		case PSERIES_HP_ELOG_ID_DRC_COUNT:
+			count = hp_elog->_drc_u.drc_count;
+			rc = dlpar_memory_remove_by_count(count);
+			break;
+		case PSERIES_HP_ELOG_ID_DRC_INDEX:
+			drc_index = hp_elog->_drc_u.drc_index;
+			rc = dlpar_memory_remove_by_index(drc_index);
+			break;
+		case PSERIES_HP_ELOG_ID_DRC_IC:
+			count = hp_elog->_drc_u.ic.count;
+			drc_index = hp_elog->_drc_u.ic.index;
+			rc = dlpar_memory_remove_by_ic(count, drc_index);
+			break;
+		default:
+			rc = -EINVAL;
+			break;
+		}
+
+		break;
+	default:
+		pr_err("Invalid action (%d) specified\n", hp_elog->action);
+		rc = -EINVAL;
+		break;
+	}
+
+	if (!rc)
+		rc = drmem_update_dt();
+
+	unlock_device_hotplug();
+	return rc;
+}
+
+static int pseries_add_mem_node(struct device_node *np)
+{
+	int ret;
+	struct resource res;
+
+	/*
+	 * Check to see if we are actually adding memory
+	 */
+	if (!of_node_is_type(np, "memory"))
+		return 0;
+
+	/*
+	 * Find the base and size of the memblock
+	 */
+	ret = of_address_to_resource(np, 0, &res);
+	if (ret)
+		return ret;
+
+	/*
+	 * Update memory region to represent the memory add
+	 */
+	ret = memblock_add(res.start, resource_size(&res));
+	return (ret < 0) ? -EINVAL : 0;
+}
+
+static int pseries_memory_notifier(struct notifier_block *nb,
+				   unsigned long action, void *data)
+{
+	struct of_reconfig_data *rd = data;
+	int err = 0;
+
+	switch (action) {
+	case OF_RECONFIG_ATTACH_NODE:
+		err = pseries_add_mem_node(rd->dn);
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		err = pseries_remove_mem_node(rd->dn);
+		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		if (!strcmp(rd->dn->name,
+			    "ibm,dynamic-reconfiguration-memory"))
+			drmem_update_lmbs(rd->prop);
+	}
+	return notifier_from_errno(err);
+}
+
+static struct notifier_block pseries_mem_nb = {
+	.notifier_call = pseries_memory_notifier,
+};
+
+static int __init pseries_memory_hotplug_init(void)
+{
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		of_reconfig_notifier_register(&pseries_mem_nb);
+
+	return 0;
+}
+machine_device_initcall(pseries, pseries_memory_hotplug_init);
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
new file mode 100644
index 0000000000..2b0cac6fb6
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -0,0 +1,370 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains the generic code to perform a call to the
+ * pSeries LPAR hypervisor.
+ */
+#include <linux/jump_label.h>
+#include <asm/hvcall.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+#include <asm/feature-fixups.h>
+
+	.section	".text"
+	
+#ifdef CONFIG_TRACEPOINTS
+
+#ifndef CONFIG_JUMP_LABEL
+	.data
+
+	.globl hcall_tracepoint_refcount
+hcall_tracepoint_refcount:
+	.8byte	0
+
+	.section	".text"
+#endif
+
+/*
+ * precall must preserve all registers.  use unused STK_PARAM()
+ * areas to save snapshots and opcode. STK_PARAM() in the caller's
+ * frame will be available even on ELFv2 because these are all
+ * variadic functions.
+ */
+#define HCALL_INST_PRECALL(FIRST_REG)				\
+	mflr	r0;						\
+	std	r3,STK_PARAM(R3)(r1);				\
+	std	r4,STK_PARAM(R4)(r1);				\
+	std	r5,STK_PARAM(R5)(r1);				\
+	std	r6,STK_PARAM(R6)(r1);				\
+	std	r7,STK_PARAM(R7)(r1);				\
+	std	r8,STK_PARAM(R8)(r1);				\
+	std	r9,STK_PARAM(R9)(r1);				\
+	std	r10,STK_PARAM(R10)(r1);				\
+	std	r0,16(r1);					\
+	addi	r4,r1,STK_PARAM(FIRST_REG);			\
+	stdu	r1,-STACK_FRAME_MIN_SIZE(r1);			\
+	bl	CFUNC(__trace_hcall_entry);			\
+	ld	r3,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1);	\
+	ld	r4,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1);	\
+	ld	r5,STACK_FRAME_MIN_SIZE+STK_PARAM(R5)(r1);	\
+	ld	r6,STACK_FRAME_MIN_SIZE+STK_PARAM(R6)(r1);	\
+	ld	r7,STACK_FRAME_MIN_SIZE+STK_PARAM(R7)(r1);	\
+	ld	r8,STACK_FRAME_MIN_SIZE+STK_PARAM(R8)(r1);	\
+	ld	r9,STACK_FRAME_MIN_SIZE+STK_PARAM(R9)(r1);	\
+	ld	r10,STACK_FRAME_MIN_SIZE+STK_PARAM(R10)(r1)
+
+/*
+ * postcall is performed immediately before function return which
+ * allows liberal use of volatile registers.
+ */
+#define __HCALL_INST_POSTCALL					\
+	ld	r0,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1);	\
+	std	r3,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1);	\
+	mr	r4,r3;						\
+	mr	r3,r0;						\
+	bl	CFUNC(__trace_hcall_exit);			\
+	ld	r0,STACK_FRAME_MIN_SIZE+16(r1);			\
+	addi	r1,r1,STACK_FRAME_MIN_SIZE;			\
+	ld	r3,STK_PARAM(R3)(r1);				\
+	mtlr	r0
+
+#define HCALL_INST_POSTCALL_NORETS				\
+	li	r5,0;						\
+	__HCALL_INST_POSTCALL
+
+#define HCALL_INST_POSTCALL(BUFREG)				\
+	mr	r5,BUFREG;					\
+	__HCALL_INST_POSTCALL
+
+#ifdef CONFIG_JUMP_LABEL
+#define HCALL_BRANCH(LABEL)					\
+	ARCH_STATIC_BRANCH(LABEL, hcall_tracepoint_key)
+#else
+
+/*
+ * We branch around this in early init (eg when populating the MMU
+ * hashtable) by using an unconditional cpu feature.
+ */
+#define HCALL_BRANCH(LABEL)					\
+BEGIN_FTR_SECTION;						\
+	b	1f;						\
+END_FTR_SECTION(0, 1);						\
+	LOAD_REG_ADDR(r12, hcall_tracepoint_refcount) ;		\
+	ld	r12,0(r12);					\
+	cmpdi	r12,0;						\
+	bne-	LABEL;						\
+1:
+#endif
+
+#else
+#define HCALL_INST_PRECALL(FIRST_ARG)
+#define HCALL_INST_POSTCALL_NORETS
+#define HCALL_INST_POSTCALL(BUFREG)
+#define HCALL_BRANCH(LABEL)
+#endif
+
+_GLOBAL_TOC(plpar_hcall_norets_notrace)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+	HVSC				/* invoke the hypervisor */
+
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+	blr				/* return r3 = status */
+
+_GLOBAL_TOC(plpar_hcall_norets)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+	HCALL_BRANCH(plpar_hcall_norets_trace)
+	HVSC				/* invoke the hypervisor */
+
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+	blr				/* return r3 = status */
+
+#ifdef CONFIG_TRACEPOINTS
+plpar_hcall_norets_trace:
+	HCALL_INST_PRECALL(R4)
+	HVSC
+	HCALL_INST_POSTCALL_NORETS
+
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+	blr
+#endif
+
+_GLOBAL_TOC(plpar_hcall)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_BRANCH(plpar_hcall_trace)
+
+	std     r4,STK_PARAM(R4)(r1)     /* Save ret buffer */
+
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	mr	r9,r10
+
+	HVSC				/* invoke the hypervisor */
+
+	ld	r12,STK_PARAM(R4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+	std	r7, 24(r12)
+
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+#ifdef CONFIG_TRACEPOINTS
+plpar_hcall_trace:
+	HCALL_INST_PRECALL(R5)
+
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	mr	r9,r10
+
+	HVSC
+
+	ld	r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1)
+	std	r4,0(r12)
+	std	r5,8(r12)
+	std	r6,16(r12)
+	std	r7,24(r12)
+
+	HCALL_INST_POSTCALL(r12)
+
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr
+#endif
+
+/*
+ * plpar_hcall_raw can be called in real mode. kexec/kdump need some
+ * hypervisor calls to be executed in real mode. So plpar_hcall_raw
+ * does not access the per cpu hypervisor call statistics variables,
+ * since these variables may not be present in the RMO region.
+ */
+_GLOBAL(plpar_hcall_raw)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	std     r4,STK_PARAM(R4)(r1)     /* Save ret buffer */
+
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	mr	r9,r10
+
+	HVSC				/* invoke the hypervisor */
+
+	ld	r12,STK_PARAM(R4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+	std	r7, 24(r12)
+
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+_GLOBAL_TOC(plpar_hcall9)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_BRANCH(plpar_hcall9_trace)
+
+	std     r4,STK_PARAM(R4)(r1)     /* Save ret buffer */
+
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	mr	r9,r10
+	ld	r10,STK_PARAM(R11)(r1)	 /* put arg7 in R10 */
+	ld	r11,STK_PARAM(R12)(r1)	 /* put arg8 in R11 */
+	ld	r12,STK_PARAM(R13)(r1)    /* put arg9 in R12 */
+
+	HVSC				/* invoke the hypervisor */
+
+	mr	r0,r12
+	ld	r12,STK_PARAM(R4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+	std	r7, 24(r12)
+	std	r8, 32(r12)
+	std	r9, 40(r12)
+	std	r10,48(r12)
+	std	r11,56(r12)
+	std	r0, 64(r12)
+
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+#ifdef CONFIG_TRACEPOINTS
+plpar_hcall9_trace:
+	HCALL_INST_PRECALL(R5)
+
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	mr	r9,r10
+	ld	r10,STACK_FRAME_MIN_SIZE+STK_PARAM(R11)(r1)
+	ld	r11,STACK_FRAME_MIN_SIZE+STK_PARAM(R12)(r1)
+	ld	r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R13)(r1)
+
+	HVSC
+
+	mr	r0,r12
+	ld	r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1)
+	std	r4,0(r12)
+	std	r5,8(r12)
+	std	r6,16(r12)
+	std	r7,24(r12)
+	std	r8,32(r12)
+	std	r9,40(r12)
+	std	r10,48(r12)
+	std	r11,56(r12)
+	std	r0,64(r12)
+
+	HCALL_INST_POSTCALL(r12)
+
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr
+#endif
+
+/* See plpar_hcall_raw to see why this is needed */
+_GLOBAL(plpar_hcall9_raw)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	std     r4,STK_PARAM(R4)(r1)     /* Save ret buffer */
+
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	mr	r9,r10
+	ld	r10,STK_PARAM(R11)(r1)	 /* put arg7 in R10 */
+	ld	r11,STK_PARAM(R12)(r1)	 /* put arg8 in R11 */
+	ld	r12,STK_PARAM(R13)(r1)    /* put arg9 in R12 */
+
+	HVSC				/* invoke the hypervisor */
+
+	mr	r0,r12
+	ld	r12,STK_PARAM(R4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+	std	r7, 24(r12)
+	std	r8, 32(r12)
+	std	r9, 40(r12)
+	std	r10,48(r12)
+	std	r11,56(r12)
+	std	r0, 64(r12)
+
+	li	r4,0
+	stb	r4,PACASRR_VALID(r13)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
new file mode 100644
index 0000000000..3a50612a78
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2006 Mike Kravetz IBM Corporation
+ *
+ * Hypervisor Call Instrumentation
+ */
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/cputable.h>
+#include <asm/trace.h>
+#include <asm/machdep.h>
+
+/* For hcall instrumentation. One structure per-hcall, per-CPU */
+struct hcall_stats {
+	unsigned long	num_calls;	/* number of calls (on this CPU) */
+	unsigned long	tb_total;	/* total wall time (mftb) of calls. */
+	unsigned long	purr_total;	/* total cpu time (PURR) of calls. */
+	unsigned long	tb_start;
+	unsigned long	purr_start;
+};
+#define HCALL_STAT_ARRAY_SIZE	((MAX_HCALL_OPCODE >> 2) + 1)
+
+static DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
+
+/*
+ * Routines for displaying the statistics in debugfs
+ */
+static void *hc_start(struct seq_file *m, loff_t *pos)
+{
+	if ((int)*pos < (HCALL_STAT_ARRAY_SIZE-1))
+		return (void *)(unsigned long)(*pos + 1);
+
+	return NULL;
+}
+
+static void *hc_next(struct seq_file *m, void *p, loff_t * pos)
+{
+	++*pos;
+
+	return hc_start(m, pos);
+}
+
+static void hc_stop(struct seq_file *m, void *p)
+{
+}
+
+static int hc_show(struct seq_file *m, void *p)
+{
+	unsigned long h_num = (unsigned long)p;
+	struct hcall_stats *hs = m->private;
+
+	if (hs[h_num].num_calls) {
+		if (cpu_has_feature(CPU_FTR_PURR))
+			seq_printf(m, "%lu %lu %lu %lu\n", h_num<<2,
+				   hs[h_num].num_calls,
+				   hs[h_num].tb_total,
+				   hs[h_num].purr_total);
+		else
+			seq_printf(m, "%lu %lu %lu\n", h_num<<2,
+				   hs[h_num].num_calls,
+				   hs[h_num].tb_total);
+	}
+
+	return 0;
+}
+
+static const struct seq_operations hcall_inst_sops = {
+        .start = hc_start,
+        .next  = hc_next,
+        .stop  = hc_stop,
+        .show  = hc_show
+};
+
+DEFINE_SEQ_ATTRIBUTE(hcall_inst);
+
+#define	HCALL_ROOT_DIR		"hcall_inst"
+#define CPU_NAME_BUF_SIZE	32
+
+
+static void probe_hcall_entry(void *ignored, unsigned long opcode, unsigned long *args)
+{
+	struct hcall_stats *h;
+
+	if (opcode > MAX_HCALL_OPCODE)
+		return;
+
+	h = this_cpu_ptr(&hcall_stats[opcode / 4]);
+	h->tb_start = mftb();
+	h->purr_start = mfspr(SPRN_PURR);
+}
+
+static void probe_hcall_exit(void *ignored, unsigned long opcode, long retval,
+			     unsigned long *retbuf)
+{
+	struct hcall_stats *h;
+
+	if (opcode > MAX_HCALL_OPCODE)
+		return;
+
+	h = this_cpu_ptr(&hcall_stats[opcode / 4]);
+	h->num_calls++;
+	h->tb_total += mftb() - h->tb_start;
+	h->purr_total += mfspr(SPRN_PURR) - h->purr_start;
+}
+
+static int __init hcall_inst_init(void)
+{
+	struct dentry *hcall_root;
+	char cpu_name_buf[CPU_NAME_BUF_SIZE];
+	int cpu;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		return 0;
+
+	if (register_trace_hcall_entry(probe_hcall_entry, NULL))
+		return -EINVAL;
+
+	if (register_trace_hcall_exit(probe_hcall_exit, NULL)) {
+		unregister_trace_hcall_entry(probe_hcall_entry, NULL);
+		return -EINVAL;
+	}
+
+	hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
+
+	for_each_possible_cpu(cpu) {
+		snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
+		debugfs_create_file(cpu_name_buf, 0444, hcall_root,
+				    per_cpu(hcall_stats, cpu),
+				    &hcall_inst_fops);
+	}
+
+	return 0;
+}
+machine_device_initcall(pseries, hcall_inst_init);
diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c
new file mode 100644
index 0000000000..1ac52963e0
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvconsole.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * hvconsole.c
+ * Copyright (C) 2004 Hollis Blanchard, IBM Corporation
+ * Copyright (C) 2004 IBM Corporation
+ *
+ * Additional Author(s):
+ *  Ryan S. Arnold <rsa@us.ibm.com>
+ *
+ * LPAR console support.
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <asm/hvcall.h>
+#include <asm/hvconsole.h>
+#include <asm/plpar_wrappers.h>
+
+/**
+ * hvc_get_chars - retrieve characters from firmware for denoted vterm adapter
+ * @vtermno: The vtermno or unit_address of the adapter from which to fetch the
+ *	data.
+ * @buf: The character buffer into which to put the character data fetched from
+ *	firmware.
+ * @count: not used?
+ */
+int hvc_get_chars(uint32_t vtermno, char *buf, int count)
+{
+	long ret;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	unsigned long *lbuf = (unsigned long *)buf;
+
+	ret = plpar_hcall(H_GET_TERM_CHAR, retbuf, vtermno);
+	lbuf[0] = be64_to_cpu(retbuf[1]);
+	lbuf[1] = be64_to_cpu(retbuf[2]);
+
+	if (ret == H_SUCCESS)
+		return retbuf[0];
+
+	return 0;
+}
+
+EXPORT_SYMBOL(hvc_get_chars);
+
+
+/**
+ * hvc_put_chars: send characters to firmware for denoted vterm adapter
+ * @vtermno: The vtermno or unit_address of the adapter from which the data
+ *	originated.
+ * @buf: The character buffer that contains the character data to send to
+ *	firmware. Must be at least 16 bytes, even if count is less than 16.
+ * @count: Send this number of characters.
+ */
+int hvc_put_chars(uint32_t vtermno, const char *buf, int count)
+{
+	unsigned long *lbuf = (unsigned long *) buf;
+	long ret;
+
+
+	/* hcall will ret H_PARAMETER if 'count' exceeds firmware max.*/
+	if (count > MAX_VIO_PUT_CHARS)
+		count = MAX_VIO_PUT_CHARS;
+
+	ret = plpar_hcall_norets(H_PUT_TERM_CHAR, vtermno, count,
+				 cpu_to_be64(lbuf[0]),
+				 cpu_to_be64(lbuf[1]));
+	if (ret == H_SUCCESS)
+		return count;
+	if (ret == H_BUSY)
+		return -EAGAIN;
+	return -EIO;
+}
+
+EXPORT_SYMBOL(hvc_put_chars);
diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c
new file mode 100644
index 0000000000..d48c9c7ce1
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvcserver.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * hvcserver.c
+ * Copyright (C) 2004 Ryan S Arnold, IBM Corporation
+ *
+ * PPC64 virtual I/O console server support.
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include <asm/hvcall.h>
+#include <asm/hvcserver.h>
+#include <asm/io.h>
+
+#define HVCS_ARCH_VERSION "1.0.0"
+
+MODULE_AUTHOR("Ryan S. Arnold <rsa@us.ibm.com>");
+MODULE_DESCRIPTION("IBM hvcs ppc64 API");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(HVCS_ARCH_VERSION);
+
+/*
+ * Convert arch specific return codes into relevant errnos.  The hvcs
+ * functions aren't performance sensitive, so this conversion isn't an
+ * issue.
+ */
+static int hvcs_convert(long to_convert)
+{
+	switch (to_convert) {
+		case H_SUCCESS:
+			return 0;
+		case H_PARAMETER:
+			return -EINVAL;
+		case H_HARDWARE:
+			return -EIO;
+		case H_BUSY:
+		case H_LONG_BUSY_ORDER_1_MSEC:
+		case H_LONG_BUSY_ORDER_10_MSEC:
+		case H_LONG_BUSY_ORDER_100_MSEC:
+		case H_LONG_BUSY_ORDER_1_SEC:
+		case H_LONG_BUSY_ORDER_10_SEC:
+		case H_LONG_BUSY_ORDER_100_SEC:
+			return -EBUSY;
+		case H_FUNCTION:
+		default:
+			return -EPERM;
+	}
+}
+
+/**
+ * hvcs_free_partner_info - free pi allocated by hvcs_get_partner_info
+ * @head: list_head pointer for an allocated list of partner info structs to
+ *	free.
+ *
+ * This function is used to free the partner info list that was returned by
+ * calling hvcs_get_partner_info().
+ */
+int hvcs_free_partner_info(struct list_head *head)
+{
+	struct hvcs_partner_info *pi;
+	struct list_head *element;
+
+	if (!head)
+		return -EINVAL;
+
+	while (!list_empty(head)) {
+		element = head->next;
+		pi = list_entry(element, struct hvcs_partner_info, node);
+		list_del(element);
+		kfree(pi);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(hvcs_free_partner_info);
+
+/* Helper function for hvcs_get_partner_info */
+static int hvcs_next_partner(uint32_t unit_address,
+		unsigned long last_p_partition_ID,
+		unsigned long last_p_unit_address, unsigned long *pi_buff)
+
+{
+	long retval;
+	retval = plpar_hcall_norets(H_VTERM_PARTNER_INFO, unit_address,
+			last_p_partition_ID,
+				last_p_unit_address, virt_to_phys(pi_buff));
+	return hvcs_convert(retval);
+}
+
+/**
+ * hvcs_get_partner_info - Get all of the partner info for a vty-server adapter
+ * @unit_address: The unit_address of the vty-server adapter for which this
+ *	function is fetching partner info.
+ * @head: An initialized list_head pointer to an empty list to use to return the
+ *	list of partner info fetched from the hypervisor to the caller.
+ * @pi_buff: A page sized buffer pre-allocated prior to calling this function
+ *	that is to be used to be used by firmware as an iterator to keep track
+ *	of the partner info retrieval.
+ *
+ * This function returns non-zero on success, or if there is no partner info.
+ *
+ * The pi_buff is pre-allocated prior to calling this function because this
+ * function may be called with a spin_lock held and kmalloc of a page is not
+ * recommended as GFP_ATOMIC.
+ *
+ * The first long of this buffer is used to store a partner unit address.  The
+ * second long is used to store a partner partition ID and starting at
+ * pi_buff[2] is the 79 character Converged Location Code (diff size than the
+ * unsigned longs, hence the casting mumbo jumbo you see later).
+ *
+ * Invocation of this function should always be followed by an invocation of
+ * hvcs_free_partner_info() using a pointer to the SAME list head instance
+ * that was passed as a parameter to this function.
+ */
+int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head,
+		unsigned long *pi_buff)
+{
+	/*
+	 * Dealt with as longs because of the hcall interface even though the
+	 * values are uint32_t.
+	 */
+	unsigned long	last_p_partition_ID;
+	unsigned long	last_p_unit_address;
+	struct hvcs_partner_info *next_partner_info = NULL;
+	int more = 1;
+	int retval;
+
+	/* invalid parameters */
+	if (!head || !pi_buff)
+		return -EINVAL;
+
+	memset(pi_buff, 0x00, PAGE_SIZE);
+	last_p_partition_ID = last_p_unit_address = ~0UL;
+	INIT_LIST_HEAD(head);
+
+	do {
+		retval = hvcs_next_partner(unit_address, last_p_partition_ID,
+				last_p_unit_address, pi_buff);
+		if (retval) {
+			/*
+			 * Don't indicate that we've failed if we have
+			 * any list elements.
+			 */
+			if (!list_empty(head))
+				return 0;
+			return retval;
+		}
+
+		last_p_partition_ID = be64_to_cpu(pi_buff[0]);
+		last_p_unit_address = be64_to_cpu(pi_buff[1]);
+
+		/* This indicates that there are no further partners */
+		if (last_p_partition_ID == ~0UL
+				&& last_p_unit_address == ~0UL)
+			break;
+
+		/* This is a very small struct and will be freed soon in
+		 * hvcs_free_partner_info(). */
+		next_partner_info = kmalloc(sizeof(struct hvcs_partner_info),
+				GFP_ATOMIC);
+
+		if (!next_partner_info) {
+			printk(KERN_WARNING "HVCONSOLE: kmalloc() failed to"
+				" allocate partner info struct.\n");
+			hvcs_free_partner_info(head);
+			return -ENOMEM;
+		}
+
+		next_partner_info->unit_address
+			= (unsigned int)last_p_unit_address;
+		next_partner_info->partition_ID
+			= (unsigned int)last_p_partition_ID;
+
+		/* copy the Null-term char too */
+		strscpy(&next_partner_info->location_code[0],
+			(char *)&pi_buff[2],
+			sizeof(next_partner_info->location_code));
+
+		list_add_tail(&(next_partner_info->node), head);
+		next_partner_info = NULL;
+
+	} while (more);
+
+	return 0;
+}
+EXPORT_SYMBOL(hvcs_get_partner_info);
+
+/**
+ * hvcs_register_connection - establish a connection between this vty-server and
+ *	a vty.
+ * @unit_address: The unit address of the vty-server adapter that is to be
+ *	establish a connection.
+ * @p_partition_ID: The partition ID of the vty adapter that is to be connected.
+ * @p_unit_address: The unit address of the vty adapter to which the vty-server
+ *	is to be connected.
+ *
+ * If this function is called once and -EINVAL is returned it may
+ * indicate that the partner info needs to be refreshed for the
+ * target unit address at which point the caller must invoke
+ * hvcs_get_partner_info() and then call this function again.  If,
+ * for a second time, -EINVAL is returned then it indicates that
+ * there is probably already a partner connection registered to a
+ * different vty-server adapter.  It is also possible that a second
+ * -EINVAL may indicate that one of the parms is not valid, for
+ * instance if the link was removed between the vty-server adapter
+ * and the vty adapter that you are trying to open.  Don't shoot the
+ * messenger.  Firmware implemented it this way.
+ */
+int hvcs_register_connection( uint32_t unit_address,
+		uint32_t p_partition_ID, uint32_t p_unit_address)
+{
+	long retval;
+	retval = plpar_hcall_norets(H_REGISTER_VTERM, unit_address,
+				p_partition_ID, p_unit_address);
+	return hvcs_convert(retval);
+}
+EXPORT_SYMBOL(hvcs_register_connection);
+
+/**
+ * hvcs_free_connection - free the connection between a vty-server and vty
+ * @unit_address: The unit address of the vty-server that is to have its
+ *	connection severed.
+ *
+ * This function is used to free the partner connection between a vty-server
+ * adapter and a vty adapter.
+ *
+ * If -EBUSY is returned continue to call this function until 0 is returned.
+ */
+int hvcs_free_connection(uint32_t unit_address)
+{
+	long retval;
+	retval = plpar_hcall_norets(H_FREE_VTERM, unit_address);
+	return hvcs_convert(retval);
+}
+EXPORT_SYMBOL(hvcs_free_connection);
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
new file mode 100644
index 0000000000..998e3aff24
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -0,0 +1,479 @@
+/*
+ * IBM PowerPC IBM eBus Infrastructure Support.
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *  Joachim Fenkes <fenkes@de.ibm.com>
+ *  Heiko J Schick <schickhj@de.ibm.com>
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/console.h>
+#include <linux/kobject.h>
+#include <linux/dma-map-ops.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <asm/ibmebus.h>
+#include <asm/machdep.h>
+
+static struct device ibmebus_bus_device = { /* fake "parent" device */
+	.init_name = "ibmebus",
+};
+
+struct bus_type ibmebus_bus_type;
+
+/* These devices will automatically be added to the bus during init */
+static const struct of_device_id ibmebus_matches[] __initconst = {
+	{ .compatible = "IBM,lhca" },
+	{ .compatible = "IBM,lhea" },
+	{},
+};
+
+static void *ibmebus_alloc_coherent(struct device *dev,
+				    size_t size,
+				    dma_addr_t *dma_handle,
+				    gfp_t flag,
+				    unsigned long attrs)
+{
+	void *mem;
+
+	mem = kmalloc(size, flag);
+	*dma_handle = (dma_addr_t)mem;
+
+	return mem;
+}
+
+static void ibmebus_free_coherent(struct device *dev,
+				  size_t size, void *vaddr,
+				  dma_addr_t dma_handle,
+				  unsigned long attrs)
+{
+	kfree(vaddr);
+}
+
+static dma_addr_t ibmebus_map_page(struct device *dev,
+				   struct page *page,
+				   unsigned long offset,
+				   size_t size,
+				   enum dma_data_direction direction,
+				   unsigned long attrs)
+{
+	return (dma_addr_t)(page_address(page) + offset);
+}
+
+static void ibmebus_unmap_page(struct device *dev,
+			       dma_addr_t dma_addr,
+			       size_t size,
+			       enum dma_data_direction direction,
+			       unsigned long attrs)
+{
+	return;
+}
+
+static int ibmebus_map_sg(struct device *dev,
+			  struct scatterlist *sgl,
+			  int nents, enum dma_data_direction direction,
+			  unsigned long attrs)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i) {
+		sg->dma_address = (dma_addr_t) sg_virt(sg);
+		sg->dma_length = sg->length;
+	}
+
+	return nents;
+}
+
+static void ibmebus_unmap_sg(struct device *dev,
+			     struct scatterlist *sg,
+			     int nents, enum dma_data_direction direction,
+			     unsigned long attrs)
+{
+	return;
+}
+
+static int ibmebus_dma_supported(struct device *dev, u64 mask)
+{
+	return mask == DMA_BIT_MASK(64);
+}
+
+static u64 ibmebus_dma_get_required_mask(struct device *dev)
+{
+	return DMA_BIT_MASK(64);
+}
+
+static const struct dma_map_ops ibmebus_dma_ops = {
+	.alloc              = ibmebus_alloc_coherent,
+	.free               = ibmebus_free_coherent,
+	.map_sg             = ibmebus_map_sg,
+	.unmap_sg           = ibmebus_unmap_sg,
+	.dma_supported      = ibmebus_dma_supported,
+	.get_required_mask  = ibmebus_dma_get_required_mask,
+	.map_page           = ibmebus_map_page,
+	.unmap_page         = ibmebus_unmap_page,
+};
+
+static int ibmebus_match_path(struct device *dev, const void *data)
+{
+	struct device_node *dn = to_platform_device(dev)->dev.of_node;
+	struct device_node *tn = of_find_node_by_path(data);
+
+	of_node_put(tn);
+
+	return (tn == dn);
+}
+
+static int ibmebus_match_node(struct device *dev, const void *data)
+{
+	return to_platform_device(dev)->dev.of_node == data;
+}
+
+static int ibmebus_create_device(struct device_node *dn)
+{
+	struct platform_device *dev;
+	int ret;
+
+	dev = of_device_alloc(dn, NULL, &ibmebus_bus_device);
+	if (!dev)
+		return -ENOMEM;
+
+	dev->dev.bus = &ibmebus_bus_type;
+	dev->dev.dma_ops = &ibmebus_dma_ops;
+
+	ret = of_device_add(dev);
+	if (ret)
+		platform_device_put(dev);
+	return ret;
+}
+
+static int ibmebus_create_devices(const struct of_device_id *matches)
+{
+	struct device_node *root, *child;
+	struct device *dev;
+	int ret = 0;
+
+	root = of_find_node_by_path("/");
+
+	for_each_child_of_node(root, child) {
+		if (!of_match_node(matches, child))
+			continue;
+
+		dev = bus_find_device(&ibmebus_bus_type, NULL, child,
+				      ibmebus_match_node);
+		if (dev) {
+			put_device(dev);
+			continue;
+		}
+
+		ret = ibmebus_create_device(child);
+		if (ret) {
+			printk(KERN_ERR "%s: failed to create device (%i)",
+			       __func__, ret);
+			of_node_put(child);
+			break;
+		}
+	}
+
+	of_node_put(root);
+	return ret;
+}
+
+int ibmebus_register_driver(struct platform_driver *drv)
+{
+	/* If the driver uses devices that ibmebus doesn't know, add them */
+	ibmebus_create_devices(drv->driver.of_match_table);
+
+	drv->driver.bus = &ibmebus_bus_type;
+	return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL(ibmebus_register_driver);
+
+void ibmebus_unregister_driver(struct platform_driver *drv)
+{
+	driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL(ibmebus_unregister_driver);
+
+int ibmebus_request_irq(u32 ist, irq_handler_t handler,
+			unsigned long irq_flags, const char *devname,
+			void *dev_id)
+{
+	unsigned int irq = irq_create_mapping(NULL, ist);
+
+	if (!irq)
+		return -EINVAL;
+
+	return request_irq(irq, handler, irq_flags, devname, dev_id);
+}
+EXPORT_SYMBOL(ibmebus_request_irq);
+
+void ibmebus_free_irq(u32 ist, void *dev_id)
+{
+	unsigned int irq = irq_find_mapping(NULL, ist);
+
+	free_irq(irq, dev_id);
+	irq_dispose_mapping(irq);
+}
+EXPORT_SYMBOL(ibmebus_free_irq);
+
+static char *ibmebus_chomp(const char *in, size_t count)
+{
+	char *out = kmalloc(count + 1, GFP_KERNEL);
+
+	if (!out)
+		return NULL;
+
+	memcpy(out, in, count);
+	out[count] = '\0';
+	if (out[count - 1] == '\n')
+		out[count - 1] = '\0';
+
+	return out;
+}
+
+static ssize_t probe_store(const struct bus_type *bus, const char *buf, size_t count)
+{
+	struct device_node *dn = NULL;
+	struct device *dev;
+	char *path;
+	ssize_t rc = 0;
+
+	path = ibmebus_chomp(buf, count);
+	if (!path)
+		return -ENOMEM;
+
+	dev = bus_find_device(&ibmebus_bus_type, NULL, path,
+			      ibmebus_match_path);
+	if (dev) {
+		put_device(dev);
+		printk(KERN_WARNING "%s: %s has already been probed\n",
+		       __func__, path);
+		rc = -EEXIST;
+		goto out;
+	}
+
+	if ((dn = of_find_node_by_path(path))) {
+		rc = ibmebus_create_device(dn);
+		of_node_put(dn);
+	} else {
+		printk(KERN_WARNING "%s: no such device node: %s\n",
+		       __func__, path);
+		rc = -ENODEV;
+	}
+
+out:
+	kfree(path);
+	if (rc)
+		return rc;
+	return count;
+}
+static BUS_ATTR_WO(probe);
+
+static ssize_t remove_store(const struct bus_type *bus, const char *buf, size_t count)
+{
+	struct device *dev;
+	char *path;
+
+	path = ibmebus_chomp(buf, count);
+	if (!path)
+		return -ENOMEM;
+
+	if ((dev = bus_find_device(&ibmebus_bus_type, NULL, path,
+				   ibmebus_match_path))) {
+		of_device_unregister(to_platform_device(dev));
+		put_device(dev);
+
+		kfree(path);
+		return count;
+	} else {
+		printk(KERN_WARNING "%s: %s not on the bus\n",
+		       __func__, path);
+
+		kfree(path);
+		return -ENODEV;
+	}
+}
+static BUS_ATTR_WO(remove);
+
+static struct attribute *ibmbus_bus_attrs[] = {
+	&bus_attr_probe.attr,
+	&bus_attr_remove.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(ibmbus_bus);
+
+static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv)
+{
+	const struct of_device_id *matches = drv->of_match_table;
+
+	if (!matches)
+		return 0;
+
+	return of_match_device(matches, dev) != NULL;
+}
+
+static int ibmebus_bus_device_probe(struct device *dev)
+{
+	int error = -ENODEV;
+	struct platform_driver *drv;
+	struct platform_device *of_dev;
+
+	drv = to_platform_driver(dev->driver);
+	of_dev = to_platform_device(dev);
+
+	if (!drv->probe)
+		return error;
+
+	get_device(dev);
+
+	if (of_driver_match_device(dev, dev->driver))
+		error = drv->probe(of_dev);
+	if (error)
+		put_device(dev);
+
+	return error;
+}
+
+static void ibmebus_bus_device_remove(struct device *dev)
+{
+	struct platform_device *of_dev = to_platform_device(dev);
+	struct platform_driver *drv = to_platform_driver(dev->driver);
+
+	if (dev->driver && drv->remove)
+		drv->remove(of_dev);
+}
+
+static void ibmebus_bus_device_shutdown(struct device *dev)
+{
+	struct platform_device *of_dev = to_platform_device(dev);
+	struct platform_driver *drv = to_platform_driver(dev->driver);
+
+	if (dev->driver && drv->shutdown)
+		drv->shutdown(of_dev);
+}
+
+/*
+ * ibmebus_bus_device_attrs
+ */
+static ssize_t devspec_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct platform_device *ofdev;
+
+	ofdev = to_platform_device(dev);
+	return sprintf(buf, "%pOF\n", ofdev->dev.of_node);
+}
+static DEVICE_ATTR_RO(devspec);
+
+static ssize_t name_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct platform_device *ofdev;
+
+	ofdev = to_platform_device(dev);
+	return sprintf(buf, "%pOFn\n", ofdev->dev.of_node);
+}
+static DEVICE_ATTR_RO(name);
+
+static ssize_t modalias_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return of_device_modalias(dev, buf, PAGE_SIZE);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *ibmebus_bus_device_attrs[] = {
+	&dev_attr_devspec.attr,
+	&dev_attr_name.attr,
+	&dev_attr_modalias.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(ibmebus_bus_device);
+
+static int ibmebus_bus_modalias(const struct device *dev, struct kobj_uevent_env *env)
+{
+	return of_device_uevent_modalias(dev, env);
+}
+
+struct bus_type ibmebus_bus_type = {
+	.name      = "ibmebus",
+	.uevent    = ibmebus_bus_modalias,
+	.bus_groups = ibmbus_bus_groups,
+	.match     = ibmebus_bus_bus_match,
+	.probe     = ibmebus_bus_device_probe,
+	.remove    = ibmebus_bus_device_remove,
+	.shutdown  = ibmebus_bus_device_shutdown,
+	.dev_groups = ibmebus_bus_device_groups,
+};
+EXPORT_SYMBOL(ibmebus_bus_type);
+
+static int __init ibmebus_bus_init(void)
+{
+	int err;
+
+	printk(KERN_INFO "IBM eBus Device Driver\n");
+
+	err = bus_register(&ibmebus_bus_type);
+	if (err) {
+		printk(KERN_ERR "%s: failed to register IBM eBus.\n",
+		       __func__);
+		return err;
+	}
+
+	err = device_register(&ibmebus_bus_device);
+	if (err) {
+		printk(KERN_WARNING "%s: device_register returned %i\n",
+		       __func__, err);
+		put_device(&ibmebus_bus_device);
+		bus_unregister(&ibmebus_bus_type);
+
+		return err;
+	}
+
+	err = ibmebus_create_devices(ibmebus_matches);
+	if (err) {
+		device_unregister(&ibmebus_bus_device);
+		bus_unregister(&ibmebus_bus_type);
+		return err;
+	}
+
+	return 0;
+}
+machine_postcore_initcall(pseries, ibmebus_bus_init);
diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c
new file mode 100644
index 0000000000..f411d4fe7b
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/io_event_irq.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2010 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/list.h>
+#include <linux/notifier.h>
+
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/irq.h>
+#include <asm/io_event_irq.h>
+
+#include "pseries.h"
+
+/*
+ * IO event interrupt is a mechanism provided by RTAS to return
+ * information about hardware error and non-error events. Device
+ * drivers can register their event handlers to receive events.
+ * Device drivers are expected to use atomic_notifier_chain_register()
+ * and atomic_notifier_chain_unregister() to register and unregister
+ * their event handlers. Since multiple IO event types and scopes
+ * share an IO event interrupt, the event handlers are called one
+ * by one until the IO event is claimed by one of the handlers.
+ * The event handlers are expected to return NOTIFY_OK if the
+ * event is handled by the event handler or NOTIFY_DONE if the
+ * event does not belong to the handler.
+ *
+ * Usage:
+ *
+ * Notifier function:
+ * #include <asm/io_event_irq.h>
+ * int event_handler(struct notifier_block *nb, unsigned long val, void *data) {
+ * 	p = (struct pseries_io_event_sect_data *) data;
+ * 	if (! is_my_event(p->scope, p->event_type)) return NOTIFY_DONE;
+ * 		:
+ * 		:
+ * 	return NOTIFY_OK;
+ * }
+ * struct notifier_block event_nb = {
+ * 	.notifier_call = event_handler,
+ * }
+ *
+ * Registration:
+ * atomic_notifier_chain_register(&pseries_ioei_notifier_list, &event_nb);
+ *
+ * Unregistration:
+ * atomic_notifier_chain_unregister(&pseries_ioei_notifier_list, &event_nb);
+ */
+
+ATOMIC_NOTIFIER_HEAD(pseries_ioei_notifier_list);
+EXPORT_SYMBOL_GPL(pseries_ioei_notifier_list);
+
+static int ioei_check_exception_token;
+
+static char ioei_rtas_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned;
+
+/**
+ * Find the data portion of an IO Event section from event log.
+ * @elog: RTAS error/event log.
+ *
+ * Return:
+ * 	pointer to a valid IO event section data. NULL if not found.
+ */
+static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog)
+{
+	struct pseries_errorlog *sect;
+
+	/* We should only ever get called for io-event interrupts, but if
+	 * we do get called for another type then something went wrong so
+	 * make some noise about it.
+	 * RTAS_TYPE_IO only exists in extended event log version 6 or later.
+	 * No need to check event log version.
+	 */
+	if (unlikely(rtas_error_type(elog) != RTAS_TYPE_IO)) {
+		printk_once(KERN_WARNING"io_event_irq: Unexpected event type %d",
+			    rtas_error_type(elog));
+		return NULL;
+	}
+
+	sect = get_pseries_errorlog(elog, PSERIES_ELOG_SECT_ID_IO_EVENT);
+	if (unlikely(!sect)) {
+		printk_once(KERN_WARNING "io_event_irq: RTAS extended event "
+			    "log does not contain an IO Event section. "
+			    "Could be a bug in system firmware!\n");
+		return NULL;
+	}
+	return (struct pseries_io_event *) &sect->data;
+}
+
+/*
+ * PAPR:
+ * - check-exception returns the first found error or event and clear that
+ *   error or event so it is reported once.
+ * - Each interrupt returns one event. If a plateform chooses to report
+ *   multiple events through a single interrupt, it must ensure that the
+ *   interrupt remains asserted until check-exception has been used to
+ *   process all out-standing events for that interrupt.
+ *
+ * Implementation notes:
+ * - Events must be processed in the order they are returned. Hence,
+ *   sequential in nature.
+ * - The owner of an event is determined by combinations of scope,
+ *   event type, and sub-type. There is no easy way to pre-sort clients
+ *   by scope or event type alone. For example, Torrent ISR route change
+ *   event is reported with scope 0x00 (Not Applicable) rather than
+ *   0x3B (Torrent-hub). It is better to let the clients to identify
+ *   who owns the event.
+ */
+
+static irqreturn_t ioei_interrupt(int irq, void *dev_id)
+{
+	struct pseries_io_event *event;
+	int rtas_rc;
+
+	for (;;) {
+		rtas_rc = rtas_call(ioei_check_exception_token, 6, 1, NULL,
+				    RTAS_VECTOR_EXTERNAL_INTERRUPT,
+				    virq_to_hw(irq),
+				    RTAS_IO_EVENTS, 1 /* Time Critical */,
+				    __pa(ioei_rtas_buf),
+				    RTAS_DATA_BUF_SIZE);
+		if (rtas_rc != 0)
+			break;
+
+		event = ioei_find_event((struct rtas_error_log *)ioei_rtas_buf);
+		if (!event)
+			continue;
+
+		atomic_notifier_call_chain(&pseries_ioei_notifier_list,
+					   0, event);
+	}
+	return IRQ_HANDLED;
+}
+
+static int __init ioei_init(void)
+{
+	struct device_node *np;
+
+	ioei_check_exception_token = rtas_function_token(RTAS_FN_CHECK_EXCEPTION);
+	if (ioei_check_exception_token == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	np = of_find_node_by_path("/event-sources/ibm,io-events");
+	if (np) {
+		request_event_sources_irqs(np, ioei_interrupt, "IO_EVENT");
+		pr_info("IBM I/O event interrupts enabled\n");
+		of_node_put(np);
+	} else {
+		return -ENODEV;
+	}
+	return 0;
+}
+machine_subsys_initcall(pseries, ioei_init);
+
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
new file mode 100644
index 0000000000..496e16c588
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -0,0 +1,1742 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
+ *
+ * Rewrite, cleanup:
+ *
+ * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
+ * Copyright (C) 2006 Olof Johansson <olof@lixom.net>
+ *
+ * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR.
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/crash_dump.h>
+#include <linux/memory.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/iommu.h>
+#include <linux/rculist.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/iommu.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/tce.h>
+#include <asm/ppc-pci.h>
+#include <asm/udbg.h>
+#include <asm/mmzone.h>
+#include <asm/plpar_wrappers.h>
+
+#include "pseries.h"
+
+enum {
+	DDW_QUERY_PE_DMA_WIN  = 0,
+	DDW_CREATE_PE_DMA_WIN = 1,
+	DDW_REMOVE_PE_DMA_WIN = 2,
+
+	DDW_APPLICABLE_SIZE
+};
+
+enum {
+	DDW_EXT_SIZE = 0,
+	DDW_EXT_RESET_DMA_WIN = 1,
+	DDW_EXT_QUERY_OUT_SIZE = 2
+};
+
+static struct iommu_table *iommu_pseries_alloc_table(int node)
+{
+	struct iommu_table *tbl;
+
+	tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node);
+	if (!tbl)
+		return NULL;
+
+	INIT_LIST_HEAD_RCU(&tbl->it_group_list);
+	kref_init(&tbl->it_kref);
+	return tbl;
+}
+
+static struct iommu_table_group *iommu_pseries_alloc_group(int node)
+{
+	struct iommu_table_group *table_group;
+
+	table_group = kzalloc_node(sizeof(*table_group), GFP_KERNEL, node);
+	if (!table_group)
+		return NULL;
+
+#ifdef CONFIG_IOMMU_API
+	table_group->ops = &spapr_tce_table_group_ops;
+	table_group->pgsizes = SZ_4K;
+#endif
+
+	table_group->tables[0] = iommu_pseries_alloc_table(node);
+	if (table_group->tables[0])
+		return table_group;
+
+	kfree(table_group);
+	return NULL;
+}
+
+static void iommu_pseries_free_group(struct iommu_table_group *table_group,
+		const char *node_name)
+{
+	if (!table_group)
+		return;
+
+#ifdef CONFIG_IOMMU_API
+	if (table_group->group) {
+		iommu_group_put(table_group->group);
+		BUG_ON(table_group->group);
+	}
+#endif
+
+	/* Default DMA window table is at index 0, while DDW at 1. SR-IOV
+	 * adapters only have table on index 1.
+	 */
+	if (table_group->tables[0])
+		iommu_tce_table_put(table_group->tables[0]);
+
+	if (table_group->tables[1])
+		iommu_tce_table_put(table_group->tables[1]);
+
+	kfree(table_group);
+}
+
+static int tce_build_pSeries(struct iommu_table *tbl, long index,
+			      long npages, unsigned long uaddr,
+			      enum dma_data_direction direction,
+			      unsigned long attrs)
+{
+	u64 proto_tce;
+	__be64 *tcep;
+	u64 rpn;
+	const unsigned long tceshift = tbl->it_page_shift;
+	const unsigned long pagesize = IOMMU_PAGE_SIZE(tbl);
+
+	proto_tce = TCE_PCI_READ; // Read allowed
+
+	if (direction != DMA_TO_DEVICE)
+		proto_tce |= TCE_PCI_WRITE;
+
+	tcep = ((__be64 *)tbl->it_base) + index;
+
+	while (npages--) {
+		/* can't move this out since we might cross MEMBLOCK boundary */
+		rpn = __pa(uaddr) >> tceshift;
+		*tcep = cpu_to_be64(proto_tce | rpn << tceshift);
+
+		uaddr += pagesize;
+		tcep++;
+	}
+	return 0;
+}
+
+
+static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
+{
+	__be64 *tcep;
+
+	tcep = ((__be64 *)tbl->it_base) + index;
+
+	while (npages--)
+		*(tcep++) = 0;
+}
+
+static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
+{
+	__be64 *tcep;
+
+	tcep = ((__be64 *)tbl->it_base) + index;
+
+	return be64_to_cpu(*tcep);
+}
+
+static void tce_free_pSeriesLP(unsigned long liobn, long, long, long);
+static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
+
+static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
+				long npages, unsigned long uaddr,
+				enum dma_data_direction direction,
+				unsigned long attrs)
+{
+	u64 rc = 0;
+	u64 proto_tce, tce;
+	u64 rpn;
+	int ret = 0;
+	long tcenum_start = tcenum, npages_start = npages;
+
+	rpn = __pa(uaddr) >> tceshift;
+	proto_tce = TCE_PCI_READ;
+	if (direction != DMA_TO_DEVICE)
+		proto_tce |= TCE_PCI_WRITE;
+
+	while (npages--) {
+		tce = proto_tce | rpn << tceshift;
+		rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce);
+
+		if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+			ret = (int)rc;
+			tce_free_pSeriesLP(liobn, tcenum_start, tceshift,
+			                   (npages_start - (npages + 1)));
+			break;
+		}
+
+		if (rc && printk_ratelimit()) {
+			printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
+			printk("\tindex   = 0x%llx\n", (u64)liobn);
+			printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
+			printk("\ttce val = 0x%llx\n", tce );
+			dump_stack();
+		}
+
+		tcenum++;
+		rpn++;
+	}
+	return ret;
+}
+
+static DEFINE_PER_CPU(__be64 *, tce_page);
+
+static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+				     long npages, unsigned long uaddr,
+				     enum dma_data_direction direction,
+				     unsigned long attrs)
+{
+	u64 rc = 0;
+	u64 proto_tce;
+	__be64 *tcep;
+	u64 rpn;
+	long l, limit;
+	long tcenum_start = tcenum, npages_start = npages;
+	int ret = 0;
+	unsigned long flags;
+	const unsigned long tceshift = tbl->it_page_shift;
+
+	if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
+		return tce_build_pSeriesLP(tbl->it_index, tcenum,
+					   tceshift, npages, uaddr,
+		                           direction, attrs);
+	}
+
+	local_irq_save(flags);	/* to protect tcep and the page behind it */
+
+	tcep = __this_cpu_read(tce_page);
+
+	/* This is safe to do since interrupts are off when we're called
+	 * from iommu_alloc{,_sg}()
+	 */
+	if (!tcep) {
+		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
+		/* If allocation fails, fall back to the loop implementation */
+		if (!tcep) {
+			local_irq_restore(flags);
+			return tce_build_pSeriesLP(tbl->it_index, tcenum,
+					tceshift,
+					npages, uaddr, direction, attrs);
+		}
+		__this_cpu_write(tce_page, tcep);
+	}
+
+	rpn = __pa(uaddr) >> tceshift;
+	proto_tce = TCE_PCI_READ;
+	if (direction != DMA_TO_DEVICE)
+		proto_tce |= TCE_PCI_WRITE;
+
+	/* We can map max one pageful of TCEs at a time */
+	do {
+		/*
+		 * Set up the page with TCE data, looping through and setting
+		 * the values.
+		 */
+		limit = min_t(long, npages, 4096 / TCE_ENTRY_SIZE);
+
+		for (l = 0; l < limit; l++) {
+			tcep[l] = cpu_to_be64(proto_tce | rpn << tceshift);
+			rpn++;
+		}
+
+		rc = plpar_tce_put_indirect((u64)tbl->it_index,
+					    (u64)tcenum << tceshift,
+					    (u64)__pa(tcep),
+					    limit);
+
+		npages -= limit;
+		tcenum += limit;
+	} while (npages > 0 && !rc);
+
+	local_irq_restore(flags);
+
+	if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+		ret = (int)rc;
+		tce_freemulti_pSeriesLP(tbl, tcenum_start,
+		                        (npages_start - (npages + limit)));
+		return ret;
+	}
+
+	if (rc && printk_ratelimit()) {
+		printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
+		printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
+		printk("\tnpages  = 0x%llx\n", (u64)npages);
+		printk("\ttce[0] val = 0x%llx\n", tcep[0]);
+		dump_stack();
+	}
+	return ret;
+}
+
+static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
+			       long npages)
+{
+	u64 rc;
+
+	while (npages--) {
+		rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, 0);
+
+		if (rc && printk_ratelimit()) {
+			printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
+			printk("\tindex   = 0x%llx\n", (u64)liobn);
+			printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
+			dump_stack();
+		}
+
+		tcenum++;
+	}
+}
+
+
+static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
+{
+	u64 rc;
+	long rpages = npages;
+	unsigned long limit;
+
+	if (!firmware_has_feature(FW_FEATURE_STUFF_TCE))
+		return tce_free_pSeriesLP(tbl->it_index, tcenum,
+					  tbl->it_page_shift, npages);
+
+	do {
+		limit = min_t(unsigned long, rpages, 512);
+
+		rc = plpar_tce_stuff((u64)tbl->it_index,
+				     (u64)tcenum << tbl->it_page_shift, 0, limit);
+
+		rpages -= limit;
+		tcenum += limit;
+	} while (rpages > 0 && !rc);
+
+	if (rc && printk_ratelimit()) {
+		printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
+		printk("\trc      = %lld\n", rc);
+		printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
+		printk("\tnpages  = 0x%llx\n", (u64)npages);
+		dump_stack();
+	}
+}
+
+static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
+{
+	u64 rc;
+	unsigned long tce_ret;
+
+	rc = plpar_tce_get((u64)tbl->it_index,
+			   (u64)tcenum << tbl->it_page_shift, &tce_ret);
+
+	if (rc && printk_ratelimit()) {
+		printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc);
+		printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
+		printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
+		dump_stack();
+	}
+
+	return tce_ret;
+}
+
+/* this is compatible with cells for the device tree property */
+struct dynamic_dma_window_prop {
+	__be32	liobn;		/* tce table number */
+	__be64	dma_base;	/* address hi,lo */
+	__be32	tce_shift;	/* ilog2(tce_page_size) */
+	__be32	window_shift;	/* ilog2(tce_window_size) */
+};
+
+struct dma_win {
+	struct device_node *device;
+	const struct dynamic_dma_window_prop *prop;
+	bool    direct;
+	struct list_head list;
+};
+
+/* Dynamic DMA Window support */
+struct ddw_query_response {
+	u32 windows_available;
+	u64 largest_available_block;
+	u32 page_size;
+	u32 migration_capable;
+};
+
+struct ddw_create_response {
+	u32 liobn;
+	u32 addr_hi;
+	u32 addr_lo;
+};
+
+static LIST_HEAD(dma_win_list);
+/* prevents races between memory on/offline and window creation */
+static DEFINE_SPINLOCK(dma_win_list_lock);
+/* protects initializing window twice for same device */
+static DEFINE_MUTEX(dma_win_init_mutex);
+
+static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
+					unsigned long num_pfn, const void *arg)
+{
+	const struct dynamic_dma_window_prop *maprange = arg;
+	int rc;
+	u64 tce_size, num_tce, dma_offset, next;
+	u32 tce_shift;
+	long limit;
+
+	tce_shift = be32_to_cpu(maprange->tce_shift);
+	tce_size = 1ULL << tce_shift;
+	next = start_pfn << PAGE_SHIFT;
+	num_tce = num_pfn << PAGE_SHIFT;
+
+	/* round back to the beginning of the tce page size */
+	num_tce += next & (tce_size - 1);
+	next &= ~(tce_size - 1);
+
+	/* covert to number of tces */
+	num_tce |= tce_size - 1;
+	num_tce >>= tce_shift;
+
+	do {
+		/*
+		 * Set up the page with TCE data, looping through and setting
+		 * the values.
+		 */
+		limit = min_t(long, num_tce, 512);
+		dma_offset = next + be64_to_cpu(maprange->dma_base);
+
+		rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn),
+					     dma_offset,
+					     0, limit);
+		next += limit * tce_size;
+		num_tce -= limit;
+	} while (num_tce > 0 && !rc);
+
+	return rc;
+}
+
+static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
+					unsigned long num_pfn, const void *arg)
+{
+	const struct dynamic_dma_window_prop *maprange = arg;
+	u64 tce_size, num_tce, dma_offset, next, proto_tce, liobn;
+	__be64 *tcep;
+	u32 tce_shift;
+	u64 rc = 0;
+	long l, limit;
+
+	if (!firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
+		unsigned long tceshift = be32_to_cpu(maprange->tce_shift);
+		unsigned long dmastart = (start_pfn << PAGE_SHIFT) +
+				be64_to_cpu(maprange->dma_base);
+		unsigned long tcenum = dmastart >> tceshift;
+		unsigned long npages = num_pfn << PAGE_SHIFT >> tceshift;
+		void *uaddr = __va(start_pfn << PAGE_SHIFT);
+
+		return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn),
+				tcenum, tceshift, npages, (unsigned long) uaddr,
+				DMA_BIDIRECTIONAL, 0);
+	}
+
+	local_irq_disable();	/* to protect tcep and the page behind it */
+	tcep = __this_cpu_read(tce_page);
+
+	if (!tcep) {
+		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
+		if (!tcep) {
+			local_irq_enable();
+			return -ENOMEM;
+		}
+		__this_cpu_write(tce_page, tcep);
+	}
+
+	proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
+
+	liobn = (u64)be32_to_cpu(maprange->liobn);
+	tce_shift = be32_to_cpu(maprange->tce_shift);
+	tce_size = 1ULL << tce_shift;
+	next = start_pfn << PAGE_SHIFT;
+	num_tce = num_pfn << PAGE_SHIFT;
+
+	/* round back to the beginning of the tce page size */
+	num_tce += next & (tce_size - 1);
+	next &= ~(tce_size - 1);
+
+	/* covert to number of tces */
+	num_tce |= tce_size - 1;
+	num_tce >>= tce_shift;
+
+	/* We can map max one pageful of TCEs at a time */
+	do {
+		/*
+		 * Set up the page with TCE data, looping through and setting
+		 * the values.
+		 */
+		limit = min_t(long, num_tce, 4096 / TCE_ENTRY_SIZE);
+		dma_offset = next + be64_to_cpu(maprange->dma_base);
+
+		for (l = 0; l < limit; l++) {
+			tcep[l] = cpu_to_be64(proto_tce | next);
+			next += tce_size;
+		}
+
+		rc = plpar_tce_put_indirect(liobn,
+					    dma_offset,
+					    (u64)__pa(tcep),
+					    limit);
+
+		num_tce -= limit;
+	} while (num_tce > 0 && !rc);
+
+	/* error cleanup: caller will clear whole range */
+
+	local_irq_enable();
+	return rc;
+}
+
+static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn,
+		unsigned long num_pfn, void *arg)
+{
+	return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg);
+}
+
+static void iommu_table_setparms_common(struct iommu_table *tbl, unsigned long busno,
+					unsigned long liobn, unsigned long win_addr,
+					unsigned long window_size, unsigned long page_shift,
+					void *base, struct iommu_table_ops *table_ops)
+{
+	tbl->it_busno = busno;
+	tbl->it_index = liobn;
+	tbl->it_offset = win_addr >> page_shift;
+	tbl->it_size = window_size >> page_shift;
+	tbl->it_page_shift = page_shift;
+	tbl->it_base = (unsigned long)base;
+	tbl->it_blocksize = 16;
+	tbl->it_type = TCE_PCI;
+	tbl->it_ops = table_ops;
+}
+
+struct iommu_table_ops iommu_table_pseries_ops;
+
+static void iommu_table_setparms(struct pci_controller *phb,
+				 struct device_node *dn,
+				 struct iommu_table *tbl)
+{
+	struct device_node *node;
+	const unsigned long *basep;
+	const u32 *sizep;
+
+	/* Test if we are going over 2GB of DMA space */
+	if (phb->dma_window_base_cur + phb->dma_window_size > SZ_2G) {
+		udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
+		panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
+	}
+
+	node = phb->dn;
+	basep = of_get_property(node, "linux,tce-base", NULL);
+	sizep = of_get_property(node, "linux,tce-size", NULL);
+	if (basep == NULL || sizep == NULL) {
+		printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %pOF has "
+				"missing tce entries !\n", dn);
+		return;
+	}
+
+	iommu_table_setparms_common(tbl, phb->bus->number, 0, phb->dma_window_base_cur,
+				    phb->dma_window_size, IOMMU_PAGE_SHIFT_4K,
+				    __va(*basep), &iommu_table_pseries_ops);
+
+	if (!is_kdump_kernel())
+		memset((void *)tbl->it_base, 0, *sizep);
+
+	phb->dma_window_base_cur += phb->dma_window_size;
+}
+
+struct iommu_table_ops iommu_table_lpar_multi_ops;
+
+/*
+ * iommu_table_setparms_lpar
+ *
+ * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
+ */
+static void iommu_table_setparms_lpar(struct pci_controller *phb,
+				      struct device_node *dn,
+				      struct iommu_table *tbl,
+				      struct iommu_table_group *table_group,
+				      const __be32 *dma_window)
+{
+	unsigned long offset, size, liobn;
+
+	of_parse_dma_window(dn, dma_window, &liobn, &offset, &size);
+
+	iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL,
+				    &iommu_table_lpar_multi_ops);
+
+
+	table_group->tce32_start = offset;
+	table_group->tce32_size = size;
+}
+
+struct iommu_table_ops iommu_table_pseries_ops = {
+	.set = tce_build_pSeries,
+	.clear = tce_free_pSeries,
+	.get = tce_get_pseries
+};
+
+static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
+{
+	struct device_node *dn;
+	struct iommu_table *tbl;
+	struct device_node *isa_dn, *isa_dn_orig;
+	struct device_node *tmp;
+	struct pci_dn *pci;
+	int children;
+
+	dn = pci_bus_to_OF_node(bus);
+
+	pr_debug("pci_dma_bus_setup_pSeries: setting up bus %pOF\n", dn);
+
+	if (bus->self) {
+		/* This is not a root bus, any setup will be done for the
+		 * device-side of the bridge in iommu_dev_setup_pSeries().
+		 */
+		return;
+	}
+	pci = PCI_DN(dn);
+
+	/* Check if the ISA bus on the system is under
+	 * this PHB.
+	 */
+	isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa");
+
+	while (isa_dn && isa_dn != dn)
+		isa_dn = isa_dn->parent;
+
+	of_node_put(isa_dn_orig);
+
+	/* Count number of direct PCI children of the PHB. */
+	for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling)
+		children++;
+
+	pr_debug("Children: %d\n", children);
+
+	/* Calculate amount of DMA window per slot. Each window must be
+	 * a power of two (due to pci_alloc_consistent requirements).
+	 *
+	 * Keep 256MB aside for PHBs with ISA.
+	 */
+
+	if (!isa_dn) {
+		/* No ISA/IDE - just set window size and return */
+		pci->phb->dma_window_size = 0x80000000ul; /* To be divided */
+
+		while (pci->phb->dma_window_size * children > 0x80000000ul)
+			pci->phb->dma_window_size >>= 1;
+		pr_debug("No ISA/IDE, window size is 0x%llx\n",
+			 pci->phb->dma_window_size);
+		pci->phb->dma_window_base_cur = 0;
+
+		return;
+	}
+
+	/* If we have ISA, then we probably have an IDE
+	 * controller too. Allocate a 128MB table but
+	 * skip the first 128MB to avoid stepping on ISA
+	 * space.
+	 */
+	pci->phb->dma_window_size = 0x8000000ul;
+	pci->phb->dma_window_base_cur = 0x8000000ul;
+
+	pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
+	tbl = pci->table_group->tables[0];
+
+	iommu_table_setparms(pci->phb, dn, tbl);
+
+	if (!iommu_init_table(tbl, pci->phb->node, 0, 0))
+		panic("Failed to initialize iommu table");
+
+	/* Divide the rest (1.75GB) among the children */
+	pci->phb->dma_window_size = 0x80000000ul;
+	while (pci->phb->dma_window_size * children > 0x70000000ul)
+		pci->phb->dma_window_size >>= 1;
+
+	pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size);
+}
+
+#ifdef CONFIG_IOMMU_API
+static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
+				long *tce, enum dma_data_direction *direction)
+{
+	long rc;
+	unsigned long ioba = (unsigned long) index << tbl->it_page_shift;
+	unsigned long flags, oldtce = 0;
+	u64 proto_tce = iommu_direction_to_tce_perm(*direction);
+	unsigned long newtce = *tce | proto_tce;
+
+	spin_lock_irqsave(&tbl->large_pool.lock, flags);
+
+	rc = plpar_tce_get((u64)tbl->it_index, ioba, &oldtce);
+	if (!rc)
+		rc = plpar_tce_put((u64)tbl->it_index, ioba, newtce);
+
+	if (!rc) {
+		*direction = iommu_tce_direction(oldtce);
+		*tce = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+	}
+
+	spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
+
+	return rc;
+}
+#endif
+
+struct iommu_table_ops iommu_table_lpar_multi_ops = {
+	.set = tce_buildmulti_pSeriesLP,
+#ifdef CONFIG_IOMMU_API
+	.xchg_no_kill = tce_exchange_pseries,
+#endif
+	.clear = tce_freemulti_pSeriesLP,
+	.get = tce_get_pSeriesLP
+};
+
+/*
+ * Find nearest ibm,dma-window (default DMA window) or direct DMA window or
+ * dynamic 64bit DMA window, walking up the device tree.
+ */
+static struct device_node *pci_dma_find(struct device_node *dn,
+					const __be32 **dma_window)
+{
+	const __be32 *dw = NULL;
+
+	for ( ; dn && PCI_DN(dn); dn = dn->parent) {
+		dw = of_get_property(dn, "ibm,dma-window", NULL);
+		if (dw) {
+			if (dma_window)
+				*dma_window = dw;
+			return dn;
+		}
+		dw = of_get_property(dn, DIRECT64_PROPNAME, NULL);
+		if (dw)
+			return dn;
+		dw = of_get_property(dn, DMA64_PROPNAME, NULL);
+		if (dw)
+			return dn;
+	}
+
+	return NULL;
+}
+
+static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
+{
+	struct iommu_table *tbl;
+	struct device_node *dn, *pdn;
+	struct pci_dn *ppci;
+	const __be32 *dma_window = NULL;
+
+	dn = pci_bus_to_OF_node(bus);
+
+	pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
+		 dn);
+
+	pdn = pci_dma_find(dn, &dma_window);
+
+	if (dma_window == NULL)
+		pr_debug("  no ibm,dma-window property !\n");
+
+	ppci = PCI_DN(pdn);
+
+	pr_debug("  parent is %pOF, iommu_table: 0x%p\n",
+		 pdn, ppci->table_group);
+
+	if (!ppci->table_group) {
+		ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
+		tbl = ppci->table_group->tables[0];
+		if (dma_window) {
+			iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
+						  ppci->table_group, dma_window);
+
+			if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
+				panic("Failed to initialize iommu table");
+		}
+		iommu_register_group(ppci->table_group,
+				pci_domain_nr(bus), 0);
+		pr_debug("  created table: %p\n", ppci->table_group);
+	}
+}
+
+
+static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
+{
+	struct device_node *dn;
+	struct iommu_table *tbl;
+
+	pr_debug("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev));
+
+	dn = dev->dev.of_node;
+
+	/* If we're the direct child of a root bus, then we need to allocate
+	 * an iommu table ourselves. The bus setup code should have setup
+	 * the window sizes already.
+	 */
+	if (!dev->bus->self) {
+		struct pci_controller *phb = PCI_DN(dn)->phb;
+
+		pr_debug(" --> first child, no bridge. Allocating iommu table.\n");
+		PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node);
+		tbl = PCI_DN(dn)->table_group->tables[0];
+		iommu_table_setparms(phb, dn, tbl);
+
+		if (!iommu_init_table(tbl, phb->node, 0, 0))
+			panic("Failed to initialize iommu table");
+
+		set_iommu_table_base(&dev->dev, tbl);
+		return;
+	}
+
+	/* If this device is further down the bus tree, search upwards until
+	 * an already allocated iommu table is found and use that.
+	 */
+
+	while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL)
+		dn = dn->parent;
+
+	if (dn && PCI_DN(dn))
+		set_iommu_table_base(&dev->dev,
+				PCI_DN(dn)->table_group->tables[0]);
+	else
+		printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
+		       pci_name(dev));
+}
+
+static int __read_mostly disable_ddw;
+
+static int __init disable_ddw_setup(char *str)
+{
+	disable_ddw = 1;
+	printk(KERN_INFO "ppc iommu: disabling ddw.\n");
+
+	return 0;
+}
+
+early_param("disable_ddw", disable_ddw_setup);
+
+static void clean_dma_window(struct device_node *np, struct dynamic_dma_window_prop *dwp)
+{
+	int ret;
+
+	ret = tce_clearrange_multi_pSeriesLP(0,
+		1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp);
+	if (ret)
+		pr_warn("%pOF failed to clear tces in window.\n",
+			np);
+	else
+		pr_debug("%pOF successfully cleared tces in window.\n",
+			 np);
+}
+
+/*
+ * Call only if DMA window is clean.
+ */
+static void __remove_dma_window(struct device_node *np, u32 *ddw_avail, u64 liobn)
+{
+	int ret;
+
+	ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn);
+	if (ret)
+		pr_warn("%pOF: failed to remove DMA window: rtas returned "
+			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
+			np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
+	else
+		pr_debug("%pOF: successfully removed DMA window: rtas returned "
+			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
+			np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
+}
+
+static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
+			      struct property *win)
+{
+	struct dynamic_dma_window_prop *dwp;
+	u64 liobn;
+
+	dwp = win->value;
+	liobn = (u64)be32_to_cpu(dwp->liobn);
+
+	clean_dma_window(np, dwp);
+	__remove_dma_window(np, ddw_avail, liobn);
+}
+
+static int remove_ddw(struct device_node *np, bool remove_prop, const char *win_name)
+{
+	struct property *win;
+	u32 ddw_avail[DDW_APPLICABLE_SIZE];
+	int ret = 0;
+
+	win = of_find_property(np, win_name, NULL);
+	if (!win)
+		return -EINVAL;
+
+	ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
+					 &ddw_avail[0], DDW_APPLICABLE_SIZE);
+	if (ret)
+		return 0;
+
+
+	if (win->length >= sizeof(struct dynamic_dma_window_prop))
+		remove_dma_window(np, ddw_avail, win);
+
+	if (!remove_prop)
+		return 0;
+
+	ret = of_remove_property(np, win);
+	if (ret)
+		pr_warn("%pOF: failed to remove DMA window property: %d\n",
+			np, ret);
+	return 0;
+}
+
+static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift,
+			      bool *direct_mapping)
+{
+	struct dma_win *window;
+	const struct dynamic_dma_window_prop *dma64;
+	bool found = false;
+
+	spin_lock(&dma_win_list_lock);
+	/* check if we already created a window and dupe that config if so */
+	list_for_each_entry(window, &dma_win_list, list) {
+		if (window->device == pdn) {
+			dma64 = window->prop;
+			*dma_addr = be64_to_cpu(dma64->dma_base);
+			*window_shift = be32_to_cpu(dma64->window_shift);
+			*direct_mapping = window->direct;
+			found = true;
+			break;
+		}
+	}
+	spin_unlock(&dma_win_list_lock);
+
+	return found;
+}
+
+static struct dma_win *ddw_list_new_entry(struct device_node *pdn,
+					  const struct dynamic_dma_window_prop *dma64)
+{
+	struct dma_win *window;
+
+	window = kzalloc(sizeof(*window), GFP_KERNEL);
+	if (!window)
+		return NULL;
+
+	window->device = pdn;
+	window->prop = dma64;
+	window->direct = false;
+
+	return window;
+}
+
+static void find_existing_ddw_windows_named(const char *name)
+{
+	int len;
+	struct device_node *pdn;
+	struct dma_win *window;
+	const struct dynamic_dma_window_prop *dma64;
+
+	for_each_node_with_property(pdn, name) {
+		dma64 = of_get_property(pdn, name, &len);
+		if (!dma64 || len < sizeof(*dma64)) {
+			remove_ddw(pdn, true, name);
+			continue;
+		}
+
+		window = ddw_list_new_entry(pdn, dma64);
+		if (!window) {
+			of_node_put(pdn);
+			break;
+		}
+
+		spin_lock(&dma_win_list_lock);
+		list_add(&window->list, &dma_win_list);
+		spin_unlock(&dma_win_list_lock);
+	}
+}
+
+static int find_existing_ddw_windows(void)
+{
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		return 0;
+
+	find_existing_ddw_windows_named(DIRECT64_PROPNAME);
+	find_existing_ddw_windows_named(DMA64_PROPNAME);
+
+	return 0;
+}
+machine_arch_initcall(pseries, find_existing_ddw_windows);
+
+/**
+ * ddw_read_ext - Get the value of an DDW extension
+ * @np:		device node from which the extension value is to be read.
+ * @extnum:	index number of the extension.
+ * @value:	pointer to return value, modified when extension is available.
+ *
+ * Checks if "ibm,ddw-extensions" exists for this node, and get the value
+ * on index 'extnum'.
+ * It can be used only to check if a property exists, passing value == NULL.
+ *
+ * Returns:
+ *	0 if extension successfully read
+ *	-EINVAL if the "ibm,ddw-extensions" does not exist,
+ *	-ENODATA if "ibm,ddw-extensions" does not have a value, and
+ *	-EOVERFLOW if "ibm,ddw-extensions" does not contain this extension.
+ */
+static inline int ddw_read_ext(const struct device_node *np, int extnum,
+			       u32 *value)
+{
+	static const char propname[] = "ibm,ddw-extensions";
+	u32 count;
+	int ret;
+
+	ret = of_property_read_u32_index(np, propname, DDW_EXT_SIZE, &count);
+	if (ret)
+		return ret;
+
+	if (count < extnum)
+		return -EOVERFLOW;
+
+	if (!value)
+		value = &count;
+
+	return of_property_read_u32_index(np, propname, extnum, value);
+}
+
+static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
+		     struct ddw_query_response *query,
+		     struct device_node *parent)
+{
+	struct device_node *dn;
+	struct pci_dn *pdn;
+	u32 cfg_addr, ext_query, query_out[5];
+	u64 buid;
+	int ret, out_sz;
+
+	/*
+	 * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many
+	 * output parameters ibm,query-pe-dma-windows will have, ranging from
+	 * 5 to 6.
+	 */
+	ret = ddw_read_ext(parent, DDW_EXT_QUERY_OUT_SIZE, &ext_query);
+	if (!ret && ext_query == 1)
+		out_sz = 6;
+	else
+		out_sz = 5;
+
+	/*
+	 * Get the config address and phb buid of the PE window.
+	 * Rely on eeh to retrieve this for us.
+	 * Retrieve them from the pci device, not the node with the
+	 * dma-window property
+	 */
+	dn = pci_device_to_OF_node(dev);
+	pdn = PCI_DN(dn);
+	buid = pdn->phb->buid;
+	cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
+
+	ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out,
+			cfg_addr, BUID_HI(buid), BUID_LO(buid));
+
+	switch (out_sz) {
+	case 5:
+		query->windows_available = query_out[0];
+		query->largest_available_block = query_out[1];
+		query->page_size = query_out[2];
+		query->migration_capable = query_out[3];
+		break;
+	case 6:
+		query->windows_available = query_out[0];
+		query->largest_available_block = ((u64)query_out[1] << 32) |
+						 query_out[2];
+		query->page_size = query_out[3];
+		query->migration_capable = query_out[4];
+		break;
+	}
+
+	dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d, lb=%llx ps=%x wn=%d\n",
+		 ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
+		 BUID_LO(buid), ret, query->largest_available_block,
+		 query->page_size, query->windows_available);
+
+	return ret;
+}
+
+static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
+			struct ddw_create_response *create, int page_shift,
+			int window_shift)
+{
+	struct device_node *dn;
+	struct pci_dn *pdn;
+	u32 cfg_addr;
+	u64 buid;
+	int ret;
+
+	/*
+	 * Get the config address and phb buid of the PE window.
+	 * Rely on eeh to retrieve this for us.
+	 * Retrieve them from the pci device, not the node with the
+	 * dma-window property
+	 */
+	dn = pci_device_to_OF_node(dev);
+	pdn = PCI_DN(dn);
+	buid = pdn->phb->buid;
+	cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
+
+	do {
+		/* extra outputs are LIOBN and dma-addr (hi, lo) */
+		ret = rtas_call(ddw_avail[DDW_CREATE_PE_DMA_WIN], 5, 4,
+				(u32 *)create, cfg_addr, BUID_HI(buid),
+				BUID_LO(buid), page_shift, window_shift);
+	} while (rtas_busy_delay(ret));
+	dev_info(&dev->dev,
+		"ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
+		"(liobn = 0x%x starting addr = %x %x)\n",
+		 ddw_avail[DDW_CREATE_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
+		 BUID_LO(buid), page_shift, window_shift, ret, create->liobn,
+		 create->addr_hi, create->addr_lo);
+
+	return ret;
+}
+
+struct failed_ddw_pdn {
+	struct device_node *pdn;
+	struct list_head list;
+};
+
+static LIST_HEAD(failed_ddw_pdn_list);
+
+static phys_addr_t ddw_memory_hotplug_max(void)
+{
+	resource_size_t max_addr = memory_hotplug_max();
+	struct device_node *memory;
+
+	for_each_node_by_type(memory, "memory") {
+		struct resource res;
+
+		if (of_address_to_resource(memory, 0, &res))
+			continue;
+
+		max_addr = max_t(resource_size_t, max_addr, res.end + 1);
+	}
+
+	return max_addr;
+}
+
+/*
+ * Platforms supporting the DDW option starting with LoPAR level 2.7 implement
+ * ibm,ddw-extensions, which carries the rtas token for
+ * ibm,reset-pe-dma-windows.
+ * That rtas-call can be used to restore the default DMA window for the device.
+ */
+static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
+{
+	int ret;
+	u32 cfg_addr, reset_dma_win;
+	u64 buid;
+	struct device_node *dn;
+	struct pci_dn *pdn;
+
+	ret = ddw_read_ext(par_dn, DDW_EXT_RESET_DMA_WIN, &reset_dma_win);
+	if (ret)
+		return;
+
+	dn = pci_device_to_OF_node(dev);
+	pdn = PCI_DN(dn);
+	buid = pdn->phb->buid;
+	cfg_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+
+	ret = rtas_call(reset_dma_win, 3, 1, NULL, cfg_addr, BUID_HI(buid),
+			BUID_LO(buid));
+	if (ret)
+		dev_info(&dev->dev,
+			 "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d ",
+			 reset_dma_win, cfg_addr, BUID_HI(buid), BUID_LO(buid),
+			 ret);
+}
+
+/* Return largest page shift based on "IO Page Sizes" output of ibm,query-pe-dma-window. */
+static int iommu_get_page_shift(u32 query_page_size)
+{
+	/* Supported IO page-sizes according to LoPAR, note that 2M is out of order */
+	const int shift[] = {
+		__builtin_ctzll(SZ_4K),   __builtin_ctzll(SZ_64K), __builtin_ctzll(SZ_16M),
+		__builtin_ctzll(SZ_32M),  __builtin_ctzll(SZ_64M), __builtin_ctzll(SZ_128M),
+		__builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G), __builtin_ctzll(SZ_2M)
+	};
+
+	int i = ARRAY_SIZE(shift) - 1;
+	int ret = 0;
+
+	/*
+	 * On LoPAR, ibm,query-pe-dma-window outputs "IO Page Sizes" using a bit field:
+	 * - bit 31 means 4k pages are supported,
+	 * - bit 30 means 64k pages are supported, and so on.
+	 * Larger pagesizes map more memory with the same amount of TCEs, so start probing them.
+	 */
+	for (; i >= 0 ; i--) {
+		if (query_page_size & (1 << i))
+			ret = max(ret, shift[i]);
+	}
+
+	return ret;
+}
+
+static struct property *ddw_property_create(const char *propname, u32 liobn, u64 dma_addr,
+					    u32 page_shift, u32 window_shift)
+{
+	struct dynamic_dma_window_prop *ddwprop;
+	struct property *win64;
+
+	win64 = kzalloc(sizeof(*win64), GFP_KERNEL);
+	if (!win64)
+		return NULL;
+
+	win64->name = kstrdup(propname, GFP_KERNEL);
+	ddwprop = kzalloc(sizeof(*ddwprop), GFP_KERNEL);
+	win64->value = ddwprop;
+	win64->length = sizeof(*ddwprop);
+	if (!win64->name || !win64->value) {
+		kfree(win64->name);
+		kfree(win64->value);
+		kfree(win64);
+		return NULL;
+	}
+
+	ddwprop->liobn = cpu_to_be32(liobn);
+	ddwprop->dma_base = cpu_to_be64(dma_addr);
+	ddwprop->tce_shift = cpu_to_be32(page_shift);
+	ddwprop->window_shift = cpu_to_be32(window_shift);
+
+	return win64;
+}
+
+/*
+ * If the PE supports dynamic dma windows, and there is space for a table
+ * that can map all pages in a linear offset, then setup such a table,
+ * and record the dma-offset in the struct device.
+ *
+ * dev: the pci device we are checking
+ * pdn: the parent pe node with the ibm,dma_window property
+ * Future: also check if we can remap the base window for our base page size
+ *
+ * returns true if can map all pages (direct mapping), false otherwise..
+ */
+static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
+{
+	int len = 0, ret;
+	int max_ram_len = order_base_2(ddw_memory_hotplug_max());
+	struct ddw_query_response query;
+	struct ddw_create_response create;
+	int page_shift;
+	u64 win_addr;
+	const char *win_name;
+	struct device_node *dn;
+	u32 ddw_avail[DDW_APPLICABLE_SIZE];
+	struct dma_win *window;
+	struct property *win64;
+	struct failed_ddw_pdn *fpdn;
+	bool default_win_removed = false, direct_mapping = false;
+	bool pmem_present;
+	struct pci_dn *pci = PCI_DN(pdn);
+	struct property *default_win = NULL;
+
+	dn = of_find_node_by_type(NULL, "ibm,pmemory");
+	pmem_present = dn != NULL;
+	of_node_put(dn);
+
+	mutex_lock(&dma_win_init_mutex);
+
+	if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len, &direct_mapping))
+		goto out_unlock;
+
+	/*
+	 * If we already went through this for a previous function of
+	 * the same device and failed, we don't want to muck with the
+	 * DMA window again, as it will race with in-flight operations
+	 * and can lead to EEHs. The above mutex protects access to the
+	 * list.
+	 */
+	list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) {
+		if (fpdn->pdn == pdn)
+			goto out_unlock;
+	}
+
+	/*
+	 * the ibm,ddw-applicable property holds the tokens for:
+	 * ibm,query-pe-dma-window
+	 * ibm,create-pe-dma-window
+	 * ibm,remove-pe-dma-window
+	 * for the given node in that order.
+	 * the property is actually in the parent, not the PE
+	 */
+	ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable",
+					 &ddw_avail[0], DDW_APPLICABLE_SIZE);
+	if (ret)
+		goto out_failed;
+
+       /*
+	 * Query if there is a second window of size to map the
+	 * whole partition.  Query returns number of windows, largest
+	 * block assigned to PE (partition endpoint), and two bitmasks
+	 * of page sizes: supported and supported for migrate-dma.
+	 */
+	dn = pci_device_to_OF_node(dev);
+	ret = query_ddw(dev, ddw_avail, &query, pdn);
+	if (ret != 0)
+		goto out_failed;
+
+	/*
+	 * If there is no window available, remove the default DMA window,
+	 * if it's present. This will make all the resources available to the
+	 * new DDW window.
+	 * If anything fails after this, we need to restore it, so also check
+	 * for extensions presence.
+	 */
+	if (query.windows_available == 0) {
+		int reset_win_ext;
+
+		/* DDW + IOMMU on single window may fail if there is any allocation */
+		if (iommu_table_in_use(pci->table_group->tables[0])) {
+			dev_warn(&dev->dev, "current IOMMU table in use, can't be replaced.\n");
+			goto out_failed;
+		}
+
+		default_win = of_find_property(pdn, "ibm,dma-window", NULL);
+		if (!default_win)
+			goto out_failed;
+
+		reset_win_ext = ddw_read_ext(pdn, DDW_EXT_RESET_DMA_WIN, NULL);
+		if (reset_win_ext)
+			goto out_failed;
+
+		remove_dma_window(pdn, ddw_avail, default_win);
+		default_win_removed = true;
+
+		/* Query again, to check if the window is available */
+		ret = query_ddw(dev, ddw_avail, &query, pdn);
+		if (ret != 0)
+			goto out_failed;
+
+		if (query.windows_available == 0) {
+			/* no windows are available for this device. */
+			dev_dbg(&dev->dev, "no free dynamic windows");
+			goto out_failed;
+		}
+	}
+
+	page_shift = iommu_get_page_shift(query.page_size);
+	if (!page_shift) {
+		dev_dbg(&dev->dev, "no supported page size in mask %x",
+			query.page_size);
+		goto out_failed;
+	}
+
+
+	/*
+	 * The "ibm,pmemory" can appear anywhere in the address space.
+	 * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS
+	 * for the upper limit and fallback to max RAM otherwise but this
+	 * disables device::dma_ops_bypass.
+	 */
+	len = max_ram_len;
+	if (pmem_present) {
+		if (query.largest_available_block >=
+		    (1ULL << (MAX_PHYSMEM_BITS - page_shift)))
+			len = MAX_PHYSMEM_BITS;
+		else
+			dev_info(&dev->dev, "Skipping ibm,pmemory");
+	}
+
+	/* check if the available block * number of ptes will map everything */
+	if (query.largest_available_block < (1ULL << (len - page_shift))) {
+		dev_dbg(&dev->dev,
+			"can't map partition max 0x%llx with %llu %llu-sized pages\n",
+			1ULL << len,
+			query.largest_available_block,
+			1ULL << page_shift);
+
+		len = order_base_2(query.largest_available_block << page_shift);
+		win_name = DMA64_PROPNAME;
+	} else {
+		direct_mapping = !default_win_removed ||
+			(len == MAX_PHYSMEM_BITS) ||
+			(!pmem_present && (len == max_ram_len));
+		win_name = direct_mapping ? DIRECT64_PROPNAME : DMA64_PROPNAME;
+	}
+
+	ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
+	if (ret != 0)
+		goto out_failed;
+
+	dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n",
+		  create.liobn, dn);
+
+	win_addr = ((u64)create.addr_hi << 32) | create.addr_lo;
+	win64 = ddw_property_create(win_name, create.liobn, win_addr, page_shift, len);
+
+	if (!win64) {
+		dev_info(&dev->dev,
+			 "couldn't allocate property, property name, or value\n");
+		goto out_remove_win;
+	}
+
+	ret = of_add_property(pdn, win64);
+	if (ret) {
+		dev_err(&dev->dev, "unable to add DMA window property for %pOF: %d",
+			pdn, ret);
+		goto out_free_prop;
+	}
+
+	window = ddw_list_new_entry(pdn, win64->value);
+	if (!window)
+		goto out_del_prop;
+
+	if (direct_mapping) {
+		window->direct = true;
+
+		/* DDW maps the whole partition, so enable direct DMA mapping */
+		ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
+					    win64->value, tce_setrange_multi_pSeriesLP_walk);
+		if (ret) {
+			dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n",
+				 dn, ret);
+
+			/* Make sure to clean DDW if any TCE was set*/
+			clean_dma_window(pdn, win64->value);
+			goto out_del_list;
+		}
+	} else {
+		struct iommu_table *newtbl;
+		int i;
+		unsigned long start = 0, end = 0;
+
+		window->direct = false;
+
+		for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) {
+			const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM;
+
+			/* Look for MMIO32 */
+			if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) {
+				start = pci->phb->mem_resources[i].start;
+				end = pci->phb->mem_resources[i].end;
+				break;
+			}
+		}
+
+		/* New table for using DDW instead of the default DMA window */
+		newtbl = iommu_pseries_alloc_table(pci->phb->node);
+		if (!newtbl) {
+			dev_dbg(&dev->dev, "couldn't create new IOMMU table\n");
+			goto out_del_list;
+		}
+
+		iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, win_addr,
+					    1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops);
+		iommu_init_table(newtbl, pci->phb->node, start, end);
+
+		pci->table_group->tables[1] = newtbl;
+
+		set_iommu_table_base(&dev->dev, newtbl);
+	}
+
+	if (default_win_removed) {
+		iommu_tce_table_put(pci->table_group->tables[0]);
+		pci->table_group->tables[0] = NULL;
+
+		/* default_win is valid here because default_win_removed == true */
+		of_remove_property(pdn, default_win);
+		dev_info(&dev->dev, "Removed default DMA window for %pOF\n", pdn);
+	}
+
+	spin_lock(&dma_win_list_lock);
+	list_add(&window->list, &dma_win_list);
+	spin_unlock(&dma_win_list_lock);
+
+	dev->dev.archdata.dma_offset = win_addr;
+	goto out_unlock;
+
+out_del_list:
+	kfree(window);
+
+out_del_prop:
+	of_remove_property(pdn, win64);
+
+out_free_prop:
+	kfree(win64->name);
+	kfree(win64->value);
+	kfree(win64);
+
+out_remove_win:
+	/* DDW is clean, so it's ok to call this directly. */
+	__remove_dma_window(pdn, ddw_avail, create.liobn);
+
+out_failed:
+	if (default_win_removed)
+		reset_dma_window(dev, pdn);
+
+	fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL);
+	if (!fpdn)
+		goto out_unlock;
+	fpdn->pdn = pdn;
+	list_add(&fpdn->list, &failed_ddw_pdn_list);
+
+out_unlock:
+	mutex_unlock(&dma_win_init_mutex);
+
+	/*
+	 * If we have persistent memory and the window size is only as big
+	 * as RAM, then we failed to create a window to cover persistent
+	 * memory and need to set the DMA limit.
+	 */
+	if (pmem_present && direct_mapping && len == max_ram_len)
+		dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len);
+
+	return direct_mapping;
+}
+
+static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
+{
+	struct device_node *pdn, *dn;
+	struct iommu_table *tbl;
+	const __be32 *dma_window = NULL;
+	struct pci_dn *pci;
+
+	pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));
+
+	/* dev setup for LPAR is a little tricky, since the device tree might
+	 * contain the dma-window properties per-device and not necessarily
+	 * for the bus. So we need to search upwards in the tree until we
+	 * either hit a dma-window property, OR find a parent with a table
+	 * already allocated.
+	 */
+	dn = pci_device_to_OF_node(dev);
+	pr_debug("  node is %pOF\n", dn);
+
+	pdn = pci_dma_find(dn, &dma_window);
+	if (!pdn || !PCI_DN(pdn)) {
+		printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: "
+		       "no DMA window found for pci dev=%s dn=%pOF\n",
+				 pci_name(dev), dn);
+		return;
+	}
+	pr_debug("  parent is %pOF\n", pdn);
+
+	pci = PCI_DN(pdn);
+	if (!pci->table_group) {
+		pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
+		tbl = pci->table_group->tables[0];
+		iommu_table_setparms_lpar(pci->phb, pdn, tbl,
+				pci->table_group, dma_window);
+
+		iommu_init_table(tbl, pci->phb->node, 0, 0);
+		iommu_register_group(pci->table_group,
+				pci_domain_nr(pci->phb->bus), 0);
+		pr_debug("  created table: %p\n", pci->table_group);
+	} else {
+		pr_debug("  found DMA window, table: %p\n", pci->table_group);
+	}
+
+	set_iommu_table_base(&dev->dev, pci->table_group->tables[0]);
+	iommu_add_device(pci->table_group, &dev->dev);
+}
+
+static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
+{
+	struct device_node *dn = pci_device_to_OF_node(pdev), *pdn;
+
+	/* only attempt to use a new window if 64-bit DMA is requested */
+	if (dma_mask < DMA_BIT_MASK(64))
+		return false;
+
+	dev_dbg(&pdev->dev, "node is %pOF\n", dn);
+
+	/*
+	 * the device tree might contain the dma-window properties
+	 * per-device and not necessarily for the bus. So we need to
+	 * search upwards in the tree until we either hit a dma-window
+	 * property, OR find a parent with a table already allocated.
+	 */
+	pdn = pci_dma_find(dn, NULL);
+	if (pdn && PCI_DN(pdn))
+		return enable_ddw(pdev, pdn);
+
+	return false;
+}
+
+static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
+		void *data)
+{
+	struct dma_win *window;
+	struct memory_notify *arg = data;
+	int ret = 0;
+
+	switch (action) {
+	case MEM_GOING_ONLINE:
+		spin_lock(&dma_win_list_lock);
+		list_for_each_entry(window, &dma_win_list, list) {
+			if (window->direct) {
+				ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
+						arg->nr_pages, window->prop);
+			}
+			/* XXX log error */
+		}
+		spin_unlock(&dma_win_list_lock);
+		break;
+	case MEM_CANCEL_ONLINE:
+	case MEM_OFFLINE:
+		spin_lock(&dma_win_list_lock);
+		list_for_each_entry(window, &dma_win_list, list) {
+			if (window->direct) {
+				ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn,
+						arg->nr_pages, window->prop);
+			}
+			/* XXX log error */
+		}
+		spin_unlock(&dma_win_list_lock);
+		break;
+	default:
+		break;
+	}
+	if (ret && action != MEM_CANCEL_ONLINE)
+		return NOTIFY_BAD;
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block iommu_mem_nb = {
+	.notifier_call = iommu_mem_notifier,
+};
+
+static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data)
+{
+	int err = NOTIFY_OK;
+	struct of_reconfig_data *rd = data;
+	struct device_node *np = rd->dn;
+	struct pci_dn *pci = PCI_DN(np);
+	struct dma_win *window;
+
+	switch (action) {
+	case OF_RECONFIG_DETACH_NODE:
+		/*
+		 * Removing the property will invoke the reconfig
+		 * notifier again, which causes dead-lock on the
+		 * read-write semaphore of the notifier chain. So
+		 * we have to remove the property when releasing
+		 * the device node.
+		 */
+		if (remove_ddw(np, false, DIRECT64_PROPNAME))
+			remove_ddw(np, false, DMA64_PROPNAME);
+
+		if (pci && pci->table_group)
+			iommu_pseries_free_group(pci->table_group,
+					np->full_name);
+
+		spin_lock(&dma_win_list_lock);
+		list_for_each_entry(window, &dma_win_list, list) {
+			if (window->device == np) {
+				list_del(&window->list);
+				kfree(window);
+				break;
+			}
+		}
+		spin_unlock(&dma_win_list_lock);
+		break;
+	default:
+		err = NOTIFY_DONE;
+		break;
+	}
+	return err;
+}
+
+static struct notifier_block iommu_reconfig_nb = {
+	.notifier_call = iommu_reconfig_notifier,
+};
+
+/* These are called very early. */
+void __init iommu_init_early_pSeries(void)
+{
+	if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL))
+		return;
+
+	if (firmware_has_feature(FW_FEATURE_LPAR)) {
+		pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
+		pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
+		if (!disable_ddw)
+			pseries_pci_controller_ops.iommu_bypass_supported =
+				iommu_bypass_supported_pSeriesLP;
+	} else {
+		pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries;
+		pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries;
+	}
+
+
+	of_reconfig_notifier_register(&iommu_reconfig_nb);
+	register_memory_notifier(&iommu_mem_nb);
+
+	set_pci_dma_ops(&dma_iommu_ops);
+}
+
+static int __init disable_multitce(char *str)
+{
+	if (strcmp(str, "off") == 0 &&
+	    firmware_has_feature(FW_FEATURE_LPAR) &&
+	    (firmware_has_feature(FW_FEATURE_PUT_TCE_IND) ||
+	     firmware_has_feature(FW_FEATURE_STUFF_TCE))) {
+		printk(KERN_INFO "Disabling MULTITCE firmware feature\n");
+		powerpc_firmware_features &=
+			~(FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE);
+	}
+	return 1;
+}
+
+__setup("multitce=", disable_multitce);
+
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose,
+					     struct pci_dev *pdev)
+{
+	struct device_node *pdn, *dn = pdev->dev.of_node;
+	struct iommu_group *grp;
+	struct pci_dn *pci;
+
+	pdn = pci_dma_find(dn, NULL);
+	if (!pdn || !PCI_DN(pdn))
+		return ERR_PTR(-ENODEV);
+
+	pci = PCI_DN(pdn);
+	if (!pci->table_group)
+		return ERR_PTR(-ENODEV);
+
+	grp = pci->table_group->group;
+	if (!grp)
+		return ERR_PTR(-ENODEV);
+
+	return iommu_group_ref_get(grp);
+}
+#endif
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
new file mode 100644
index 0000000000..096d09ed89
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Copyright 2006 Michael Ellerman, IBM Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/firmware.h>
+#include <asm/kexec.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/smp.h>
+#include <asm/plpar_wrappers.h>
+
+#include "pseries.h"
+
+void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
+{
+	/*
+	 * Don't risk a hypervisor call if we're crashing
+	 * XXX: Why? The hypervisor is not crashing. It might be better
+	 * to at least attempt unregister to avoid the hypervisor stepping
+	 * on our memory.
+	 */
+	if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) {
+		int ret;
+		int cpu = smp_processor_id();
+		int hwcpu = hard_smp_processor_id();
+
+		if (get_lppaca()->dtl_enable_mask) {
+			ret = unregister_dtl(hwcpu);
+			if (ret) {
+				pr_err("WARNING: DTL deregistration for cpu "
+				       "%d (hw %d) failed with %d\n",
+				       cpu, hwcpu, ret);
+			}
+		}
+
+		ret = unregister_slb_shadow(hwcpu);
+		if (ret) {
+			pr_err("WARNING: SLB shadow buffer deregistration "
+			       "for cpu %d (hw %d) failed with %d\n",
+			       cpu, hwcpu, ret);
+		}
+
+		ret = unregister_vpa(hwcpu);
+		if (ret) {
+			pr_err("WARNING: VPA deregistration for cpu %d "
+			       "(hw %d) failed with %d\n", cpu, hwcpu, ret);
+		}
+	}
+
+	if (xive_enabled()) {
+		xive_teardown_cpu();
+
+		if (!secondary)
+			xive_shutdown();
+	} else
+		xics_kexec_teardown_cpu(secondary);
+}
+
+void pseries_machine_kexec(struct kimage *image)
+{
+	if (firmware_has_feature(FW_FEATURE_SET_MODE))
+		pseries_disable_reloc_on_exc();
+
+	default_machine_kexec(image);
+}
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
new file mode 100644
index 0000000000..d4d6de0628
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -0,0 +1,2026 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * pSeries_lpar.c
+ * Copyright (C) 2001 Todd Inglett, IBM Corporation
+ *
+ * pSeries LPAR support.
+ */
+
+/* Enables debugging of low-level hash table routines - careful! */
+#undef DEBUG
+#define pr_fmt(fmt) "lpar: " fmt
+
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <linux/console.h>
+#include <linux/export.h>
+#include <linux/jump_label.h>
+#include <linux/delay.h>
+#include <linux/stop_machine.h>
+#include <linux/spinlock.h>
+#include <linux/cpuhotplug.h>
+#include <linux/workqueue.h>
+#include <linux/proc_fs.h>
+#include <linux/pgtable.h>
+#include <linux/debugfs.h>
+
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/setup.h>
+#include <asm/mmu_context.h>
+#include <asm/iommu.h>
+#include <asm/tlb.h>
+#include <asm/cputable.h>
+#include <asm/papr-sysparm.h>
+#include <asm/udbg.h>
+#include <asm/smp.h>
+#include <asm/trace.h>
+#include <asm/firmware.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/kexec.h>
+#include <asm/fadump.h>
+#include <asm/dtl.h>
+#include <asm/vphn.h>
+
+#include "pseries.h"
+
+/* Flag bits for H_BULK_REMOVE */
+#define HBR_REQUEST	0x4000000000000000UL
+#define HBR_RESPONSE	0x8000000000000000UL
+#define HBR_END		0xc000000000000000UL
+#define HBR_AVPN	0x0200000000000000UL
+#define HBR_ANDCOND	0x0100000000000000UL
+
+
+/* in hvCall.S */
+EXPORT_SYMBOL(plpar_hcall);
+EXPORT_SYMBOL(plpar_hcall9);
+EXPORT_SYMBOL(plpar_hcall_norets);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+/*
+ * H_BLOCK_REMOVE supported block size for this page size in segment who's base
+ * page size is that page size.
+ *
+ * The first index is the segment base page size, the second one is the actual
+ * page size.
+ */
+static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init;
+#endif
+
+/*
+ * Due to the involved complexity, and that the current hypervisor is only
+ * returning this value or 0, we are limiting the support of the H_BLOCK_REMOVE
+ * buffer size to 8 size block.
+ */
+#define HBLKRM_SUPPORTED_BLOCK_SIZE 8
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+static u8 dtl_mask = DTL_LOG_PREEMPT;
+#else
+static u8 dtl_mask;
+#endif
+
+void alloc_dtl_buffers(unsigned long *time_limit)
+{
+	int cpu;
+	struct paca_struct *pp;
+	struct dtl_entry *dtl;
+
+	for_each_possible_cpu(cpu) {
+		pp = paca_ptrs[cpu];
+		if (pp->dispatch_log)
+			continue;
+		dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
+		if (!dtl) {
+			pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
+				cpu);
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+			pr_warn("Stolen time statistics will be unreliable\n");
+#endif
+			break;
+		}
+
+		pp->dtl_ridx = 0;
+		pp->dispatch_log = dtl;
+		pp->dispatch_log_end = dtl + N_DISPATCH_LOG;
+		pp->dtl_curr = dtl;
+
+		if (time_limit && time_after(jiffies, *time_limit)) {
+			cond_resched();
+			*time_limit = jiffies + HZ;
+		}
+	}
+}
+
+void register_dtl_buffer(int cpu)
+{
+	long ret;
+	struct paca_struct *pp;
+	struct dtl_entry *dtl;
+	int hwcpu = get_hard_smp_processor_id(cpu);
+
+	pp = paca_ptrs[cpu];
+	dtl = pp->dispatch_log;
+	if (dtl && dtl_mask) {
+		pp->dtl_ridx = 0;
+		pp->dtl_curr = dtl;
+		lppaca_of(cpu).dtl_idx = 0;
+
+		/* hypervisor reads buffer length from this field */
+		dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
+		ret = register_dtl(hwcpu, __pa(dtl));
+		if (ret)
+			pr_err("WARNING: DTL registration of cpu %d (hw %d) failed with %ld\n",
+			       cpu, hwcpu, ret);
+
+		lppaca_of(cpu).dtl_enable_mask = dtl_mask;
+	}
+}
+
+#ifdef CONFIG_PPC_SPLPAR
+struct dtl_worker {
+	struct delayed_work work;
+	int cpu;
+};
+
+struct vcpu_dispatch_data {
+	int last_disp_cpu;
+
+	int total_disp;
+
+	int same_cpu_disp;
+	int same_chip_disp;
+	int diff_chip_disp;
+	int far_chip_disp;
+
+	int numa_home_disp;
+	int numa_remote_disp;
+	int numa_far_disp;
+};
+
+/*
+ * This represents the number of cpus in the hypervisor. Since there is no
+ * architected way to discover the number of processors in the host, we
+ * provision for dealing with NR_CPUS. This is currently 2048 by default, and
+ * is sufficient for our purposes. This will need to be tweaked if
+ * CONFIG_NR_CPUS is changed.
+ */
+#define NR_CPUS_H	NR_CPUS
+
+DEFINE_RWLOCK(dtl_access_lock);
+static DEFINE_PER_CPU(struct vcpu_dispatch_data, vcpu_disp_data);
+static DEFINE_PER_CPU(u64, dtl_entry_ridx);
+static DEFINE_PER_CPU(struct dtl_worker, dtl_workers);
+static enum cpuhp_state dtl_worker_state;
+static DEFINE_MUTEX(dtl_enable_mutex);
+static int vcpudispatch_stats_on __read_mostly;
+static int vcpudispatch_stats_freq = 50;
+static __be32 *vcpu_associativity, *pcpu_associativity;
+
+
+static void free_dtl_buffers(unsigned long *time_limit)
+{
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	int cpu;
+	struct paca_struct *pp;
+
+	for_each_possible_cpu(cpu) {
+		pp = paca_ptrs[cpu];
+		if (!pp->dispatch_log)
+			continue;
+		kmem_cache_free(dtl_cache, pp->dispatch_log);
+		pp->dtl_ridx = 0;
+		pp->dispatch_log = 0;
+		pp->dispatch_log_end = 0;
+		pp->dtl_curr = 0;
+
+		if (time_limit && time_after(jiffies, *time_limit)) {
+			cond_resched();
+			*time_limit = jiffies + HZ;
+		}
+	}
+#endif
+}
+
+static int init_cpu_associativity(void)
+{
+	vcpu_associativity = kcalloc(num_possible_cpus() / threads_per_core,
+			VPHN_ASSOC_BUFSIZE * sizeof(__be32), GFP_KERNEL);
+	pcpu_associativity = kcalloc(NR_CPUS_H / threads_per_core,
+			VPHN_ASSOC_BUFSIZE * sizeof(__be32), GFP_KERNEL);
+
+	if (!vcpu_associativity || !pcpu_associativity) {
+		pr_err("error allocating memory for associativity information\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void destroy_cpu_associativity(void)
+{
+	kfree(vcpu_associativity);
+	kfree(pcpu_associativity);
+	vcpu_associativity = pcpu_associativity = 0;
+}
+
+static __be32 *__get_cpu_associativity(int cpu, __be32 *cpu_assoc, int flag)
+{
+	__be32 *assoc;
+	int rc = 0;
+
+	assoc = &cpu_assoc[(int)(cpu / threads_per_core) * VPHN_ASSOC_BUFSIZE];
+	if (!assoc[0]) {
+		rc = hcall_vphn(cpu, flag, &assoc[0]);
+		if (rc)
+			return NULL;
+	}
+
+	return assoc;
+}
+
+static __be32 *get_pcpu_associativity(int cpu)
+{
+	return __get_cpu_associativity(cpu, pcpu_associativity, VPHN_FLAG_PCPU);
+}
+
+static __be32 *get_vcpu_associativity(int cpu)
+{
+	return __get_cpu_associativity(cpu, vcpu_associativity, VPHN_FLAG_VCPU);
+}
+
+static int cpu_relative_dispatch_distance(int last_disp_cpu, int cur_disp_cpu)
+{
+	__be32 *last_disp_cpu_assoc, *cur_disp_cpu_assoc;
+
+	if (last_disp_cpu >= NR_CPUS_H || cur_disp_cpu >= NR_CPUS_H)
+		return -EINVAL;
+
+	last_disp_cpu_assoc = get_pcpu_associativity(last_disp_cpu);
+	cur_disp_cpu_assoc = get_pcpu_associativity(cur_disp_cpu);
+
+	if (!last_disp_cpu_assoc || !cur_disp_cpu_assoc)
+		return -EIO;
+
+	return cpu_relative_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc);
+}
+
+static int cpu_home_node_dispatch_distance(int disp_cpu)
+{
+	__be32 *disp_cpu_assoc, *vcpu_assoc;
+	int vcpu_id = smp_processor_id();
+
+	if (disp_cpu >= NR_CPUS_H) {
+		pr_debug_ratelimited("vcpu dispatch cpu %d > %d\n",
+						disp_cpu, NR_CPUS_H);
+		return -EINVAL;
+	}
+
+	disp_cpu_assoc = get_pcpu_associativity(disp_cpu);
+	vcpu_assoc = get_vcpu_associativity(vcpu_id);
+
+	if (!disp_cpu_assoc || !vcpu_assoc)
+		return -EIO;
+
+	return cpu_relative_distance(disp_cpu_assoc, vcpu_assoc);
+}
+
+static void update_vcpu_disp_stat(int disp_cpu)
+{
+	struct vcpu_dispatch_data *disp;
+	int distance;
+
+	disp = this_cpu_ptr(&vcpu_disp_data);
+	if (disp->last_disp_cpu == -1) {
+		disp->last_disp_cpu = disp_cpu;
+		return;
+	}
+
+	disp->total_disp++;
+
+	if (disp->last_disp_cpu == disp_cpu ||
+		(cpu_first_thread_sibling(disp->last_disp_cpu) ==
+					cpu_first_thread_sibling(disp_cpu)))
+		disp->same_cpu_disp++;
+	else {
+		distance = cpu_relative_dispatch_distance(disp->last_disp_cpu,
+								disp_cpu);
+		if (distance < 0)
+			pr_debug_ratelimited("vcpudispatch_stats: cpu %d: error determining associativity\n",
+					smp_processor_id());
+		else {
+			switch (distance) {
+			case 0:
+				disp->same_chip_disp++;
+				break;
+			case 1:
+				disp->diff_chip_disp++;
+				break;
+			case 2:
+				disp->far_chip_disp++;
+				break;
+			default:
+				pr_debug_ratelimited("vcpudispatch_stats: cpu %d (%d -> %d): unexpected relative dispatch distance %d\n",
+						 smp_processor_id(),
+						 disp->last_disp_cpu,
+						 disp_cpu,
+						 distance);
+			}
+		}
+	}
+
+	distance = cpu_home_node_dispatch_distance(disp_cpu);
+	if (distance < 0)
+		pr_debug_ratelimited("vcpudispatch_stats: cpu %d: error determining associativity\n",
+				smp_processor_id());
+	else {
+		switch (distance) {
+		case 0:
+			disp->numa_home_disp++;
+			break;
+		case 1:
+			disp->numa_remote_disp++;
+			break;
+		case 2:
+			disp->numa_far_disp++;
+			break;
+		default:
+			pr_debug_ratelimited("vcpudispatch_stats: cpu %d on %d: unexpected numa dispatch distance %d\n",
+						 smp_processor_id(),
+						 disp_cpu,
+						 distance);
+		}
+	}
+
+	disp->last_disp_cpu = disp_cpu;
+}
+
+static void process_dtl_buffer(struct work_struct *work)
+{
+	struct dtl_entry dtle;
+	u64 i = __this_cpu_read(dtl_entry_ridx);
+	struct dtl_entry *dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
+	struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
+	struct lppaca *vpa = local_paca->lppaca_ptr;
+	struct dtl_worker *d = container_of(work, struct dtl_worker, work.work);
+
+	if (!local_paca->dispatch_log)
+		return;
+
+	/* if we have been migrated away, we cancel ourself */
+	if (d->cpu != smp_processor_id()) {
+		pr_debug("vcpudispatch_stats: cpu %d worker migrated -- canceling worker\n",
+						smp_processor_id());
+		return;
+	}
+
+	if (i == be64_to_cpu(vpa->dtl_idx))
+		goto out;
+
+	while (i < be64_to_cpu(vpa->dtl_idx)) {
+		dtle = *dtl;
+		barrier();
+		if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) {
+			/* buffer has overflowed */
+			pr_debug_ratelimited("vcpudispatch_stats: cpu %d lost %lld DTL samples\n",
+				d->cpu,
+				be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG - i);
+			i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG;
+			dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
+			continue;
+		}
+		update_vcpu_disp_stat(be16_to_cpu(dtle.processor_id));
+		++i;
+		++dtl;
+		if (dtl == dtl_end)
+			dtl = local_paca->dispatch_log;
+	}
+
+	__this_cpu_write(dtl_entry_ridx, i);
+
+out:
+	schedule_delayed_work_on(d->cpu, to_delayed_work(work),
+					HZ / vcpudispatch_stats_freq);
+}
+
+static int dtl_worker_online(unsigned int cpu)
+{
+	struct dtl_worker *d = &per_cpu(dtl_workers, cpu);
+
+	memset(d, 0, sizeof(*d));
+	INIT_DELAYED_WORK(&d->work, process_dtl_buffer);
+	d->cpu = cpu;
+
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	per_cpu(dtl_entry_ridx, cpu) = 0;
+	register_dtl_buffer(cpu);
+#else
+	per_cpu(dtl_entry_ridx, cpu) = be64_to_cpu(lppaca_of(cpu).dtl_idx);
+#endif
+
+	schedule_delayed_work_on(cpu, &d->work, HZ / vcpudispatch_stats_freq);
+	return 0;
+}
+
+static int dtl_worker_offline(unsigned int cpu)
+{
+	struct dtl_worker *d = &per_cpu(dtl_workers, cpu);
+
+	cancel_delayed_work_sync(&d->work);
+
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	unregister_dtl(get_hard_smp_processor_id(cpu));
+#endif
+
+	return 0;
+}
+
+static void set_global_dtl_mask(u8 mask)
+{
+	int cpu;
+
+	dtl_mask = mask;
+	for_each_present_cpu(cpu)
+		lppaca_of(cpu).dtl_enable_mask = dtl_mask;
+}
+
+static void reset_global_dtl_mask(void)
+{
+	int cpu;
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	dtl_mask = DTL_LOG_PREEMPT;
+#else
+	dtl_mask = 0;
+#endif
+	for_each_present_cpu(cpu)
+		lppaca_of(cpu).dtl_enable_mask = dtl_mask;
+}
+
+static int dtl_worker_enable(unsigned long *time_limit)
+{
+	int rc = 0, state;
+
+	if (!write_trylock(&dtl_access_lock)) {
+		rc = -EBUSY;
+		goto out;
+	}
+
+	set_global_dtl_mask(DTL_LOG_ALL);
+
+	/* Setup dtl buffers and register those */
+	alloc_dtl_buffers(time_limit);
+
+	state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/dtl:online",
+					dtl_worker_online, dtl_worker_offline);
+	if (state < 0) {
+		pr_err("vcpudispatch_stats: unable to setup workqueue for DTL processing\n");
+		free_dtl_buffers(time_limit);
+		reset_global_dtl_mask();
+		write_unlock(&dtl_access_lock);
+		rc = -EINVAL;
+		goto out;
+	}
+	dtl_worker_state = state;
+
+out:
+	return rc;
+}
+
+static void dtl_worker_disable(unsigned long *time_limit)
+{
+	cpuhp_remove_state(dtl_worker_state);
+	free_dtl_buffers(time_limit);
+	reset_global_dtl_mask();
+	write_unlock(&dtl_access_lock);
+}
+
+static ssize_t vcpudispatch_stats_write(struct file *file, const char __user *p,
+		size_t count, loff_t *ppos)
+{
+	unsigned long time_limit = jiffies + HZ;
+	struct vcpu_dispatch_data *disp;
+	int rc, cmd, cpu;
+	char buf[16];
+
+	if (count > 15)
+		return -EINVAL;
+
+	if (copy_from_user(buf, p, count))
+		return -EFAULT;
+
+	buf[count] = 0;
+	rc = kstrtoint(buf, 0, &cmd);
+	if (rc || cmd < 0 || cmd > 1) {
+		pr_err("vcpudispatch_stats: please use 0 to disable or 1 to enable dispatch statistics\n");
+		return rc ? rc : -EINVAL;
+	}
+
+	mutex_lock(&dtl_enable_mutex);
+
+	if ((cmd == 0 && !vcpudispatch_stats_on) ||
+			(cmd == 1 && vcpudispatch_stats_on))
+		goto out;
+
+	if (cmd) {
+		rc = init_cpu_associativity();
+		if (rc) {
+			destroy_cpu_associativity();
+			goto out;
+		}
+
+		for_each_possible_cpu(cpu) {
+			disp = per_cpu_ptr(&vcpu_disp_data, cpu);
+			memset(disp, 0, sizeof(*disp));
+			disp->last_disp_cpu = -1;
+		}
+
+		rc = dtl_worker_enable(&time_limit);
+		if (rc) {
+			destroy_cpu_associativity();
+			goto out;
+		}
+	} else {
+		dtl_worker_disable(&time_limit);
+		destroy_cpu_associativity();
+	}
+
+	vcpudispatch_stats_on = cmd;
+
+out:
+	mutex_unlock(&dtl_enable_mutex);
+	if (rc)
+		return rc;
+	return count;
+}
+
+static int vcpudispatch_stats_display(struct seq_file *p, void *v)
+{
+	int cpu;
+	struct vcpu_dispatch_data *disp;
+
+	if (!vcpudispatch_stats_on) {
+		seq_puts(p, "off\n");
+		return 0;
+	}
+
+	for_each_online_cpu(cpu) {
+		disp = per_cpu_ptr(&vcpu_disp_data, cpu);
+		seq_printf(p, "cpu%d", cpu);
+		seq_put_decimal_ull(p, " ", disp->total_disp);
+		seq_put_decimal_ull(p, " ", disp->same_cpu_disp);
+		seq_put_decimal_ull(p, " ", disp->same_chip_disp);
+		seq_put_decimal_ull(p, " ", disp->diff_chip_disp);
+		seq_put_decimal_ull(p, " ", disp->far_chip_disp);
+		seq_put_decimal_ull(p, " ", disp->numa_home_disp);
+		seq_put_decimal_ull(p, " ", disp->numa_remote_disp);
+		seq_put_decimal_ull(p, " ", disp->numa_far_disp);
+		seq_puts(p, "\n");
+	}
+
+	return 0;
+}
+
+static int vcpudispatch_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, vcpudispatch_stats_display, NULL);
+}
+
+static const struct proc_ops vcpudispatch_stats_proc_ops = {
+	.proc_open	= vcpudispatch_stats_open,
+	.proc_read	= seq_read,
+	.proc_write	= vcpudispatch_stats_write,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
+};
+
+static ssize_t vcpudispatch_stats_freq_write(struct file *file,
+		const char __user *p, size_t count, loff_t *ppos)
+{
+	int rc, freq;
+	char buf[16];
+
+	if (count > 15)
+		return -EINVAL;
+
+	if (copy_from_user(buf, p, count))
+		return -EFAULT;
+
+	buf[count] = 0;
+	rc = kstrtoint(buf, 0, &freq);
+	if (rc || freq < 1 || freq > HZ) {
+		pr_err("vcpudispatch_stats_freq: please specify a frequency between 1 and %d\n",
+				HZ);
+		return rc ? rc : -EINVAL;
+	}
+
+	vcpudispatch_stats_freq = freq;
+
+	return count;
+}
+
+static int vcpudispatch_stats_freq_display(struct seq_file *p, void *v)
+{
+	seq_printf(p, "%d\n", vcpudispatch_stats_freq);
+	return 0;
+}
+
+static int vcpudispatch_stats_freq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, vcpudispatch_stats_freq_display, NULL);
+}
+
+static const struct proc_ops vcpudispatch_stats_freq_proc_ops = {
+	.proc_open	= vcpudispatch_stats_freq_open,
+	.proc_read	= seq_read,
+	.proc_write	= vcpudispatch_stats_freq_write,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
+};
+
+static int __init vcpudispatch_stats_procfs_init(void)
+{
+	if (!lppaca_shared_proc())
+		return 0;
+
+	if (!proc_create("powerpc/vcpudispatch_stats", 0600, NULL,
+					&vcpudispatch_stats_proc_ops))
+		pr_err("vcpudispatch_stats: error creating procfs file\n");
+	else if (!proc_create("powerpc/vcpudispatch_stats_freq", 0600, NULL,
+					&vcpudispatch_stats_freq_proc_ops))
+		pr_err("vcpudispatch_stats_freq: error creating procfs file\n");
+
+	return 0;
+}
+
+machine_device_initcall(pseries, vcpudispatch_stats_procfs_init);
+
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+u64 pseries_paravirt_steal_clock(int cpu)
+{
+	struct lppaca *lppaca = &lppaca_of(cpu);
+
+	return be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) +
+		be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb));
+}
+#endif
+
+#endif /* CONFIG_PPC_SPLPAR */
+
+void vpa_init(int cpu)
+{
+	int hwcpu = get_hard_smp_processor_id(cpu);
+	unsigned long addr;
+	long ret;
+
+	/*
+	 * The spec says it "may be problematic" if CPU x registers the VPA of
+	 * CPU y. We should never do that, but wail if we ever do.
+	 */
+	WARN_ON(cpu != smp_processor_id());
+
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		lppaca_of(cpu).vmxregs_in_use = 1;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		lppaca_of(cpu).ebb_regs_in_use = 1;
+
+	addr = __pa(&lppaca_of(cpu));
+	ret = register_vpa(hwcpu, addr);
+
+	if (ret) {
+		pr_err("WARNING: VPA registration for cpu %d (hw %d) of area "
+		       "%lx failed with %ld\n", cpu, hwcpu, addr, ret);
+		return;
+	}
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	/*
+	 * PAPR says this feature is SLB-Buffer but firmware never
+	 * reports that.  All SPLPAR support SLB shadow buffer.
+	 */
+	if (!radix_enabled() && firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		addr = __pa(paca_ptrs[cpu]->slb_shadow_ptr);
+		ret = register_slb_shadow(hwcpu, addr);
+		if (ret)
+			pr_err("WARNING: SLB shadow buffer registration for "
+			       "cpu %d (hw %d) of area %lx failed with %ld\n",
+			       cpu, hwcpu, addr, ret);
+	}
+#endif /* CONFIG_PPC_64S_HASH_MMU */
+
+	/*
+	 * Register dispatch trace log, if one has been allocated.
+	 */
+	register_dtl_buffer(cpu);
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+static int __init pseries_lpar_register_process_table(unsigned long base,
+			unsigned long page_size, unsigned long table_size)
+{
+	long rc;
+	unsigned long flags = 0;
+
+	if (table_size)
+		flags |= PROC_TABLE_NEW;
+	if (radix_enabled()) {
+		flags |= PROC_TABLE_RADIX;
+		if (mmu_has_feature(MMU_FTR_GTSE))
+			flags |= PROC_TABLE_GTSE;
+	} else
+		flags |= PROC_TABLE_HPT_SLB;
+	for (;;) {
+		rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
+					page_size, table_size);
+		if (!H_IS_LONG_BUSY(rc))
+			break;
+		mdelay(get_longbusy_msecs(rc));
+	}
+	if (rc != H_SUCCESS) {
+		pr_err("Failed to register process table (rc=%ld)\n", rc);
+		BUG();
+	}
+	return rc;
+}
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+
+static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
+				     unsigned long vpn, unsigned long pa,
+				     unsigned long rflags, unsigned long vflags,
+				     int psize, int apsize, int ssize)
+{
+	unsigned long lpar_rc;
+	unsigned long flags;
+	unsigned long slot;
+	unsigned long hpte_v, hpte_r;
+
+	if (!(vflags & HPTE_V_BOLTED))
+		pr_devel("hpte_insert(group=%lx, vpn=%016lx, "
+			 "pa=%016lx, rflags=%lx, vflags=%lx, psize=%d)\n",
+			 hpte_group, vpn,  pa, rflags, vflags, psize);
+
+	hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
+	hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
+
+	if (!(vflags & HPTE_V_BOLTED))
+		pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
+
+	/* Now fill in the actual HPTE */
+	/* Set CEC cookie to 0         */
+	/* Zero page = 0               */
+	/* I-cache Invalidate = 0      */
+	/* I-cache synchronize = 0     */
+	/* Exact = 0                   */
+	flags = 0;
+
+	if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
+		flags |= H_COALESCE_CAND;
+
+	lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot);
+	if (unlikely(lpar_rc == H_PTEG_FULL)) {
+		pr_devel("Hash table group is full\n");
+		return -1;
+	}
+
+	/*
+	 * Since we try and ioremap PHBs we don't own, the pte insert
+	 * will fail. However we must catch the failure in hash_page
+	 * or we will loop forever, so return -2 in this case.
+	 */
+	if (unlikely(lpar_rc != H_SUCCESS)) {
+		pr_err("Failed hash pte insert with error %ld\n", lpar_rc);
+		return -2;
+	}
+	if (!(vflags & HPTE_V_BOLTED))
+		pr_devel(" -> slot: %lu\n", slot & 7);
+
+	/* Because of iSeries, we have to pass down the secondary
+	 * bucket bit here as well
+	 */
+	return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3);
+}
+
+static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock);
+
+static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
+{
+	unsigned long slot_offset;
+	unsigned long lpar_rc;
+	int i;
+	unsigned long dummy1, dummy2;
+
+	/* pick a random slot to start at */
+	slot_offset = mftb() & 0x7;
+
+	for (i = 0; i < HPTES_PER_GROUP; i++) {
+
+		/* don't remove a bolted entry */
+		lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
+					   HPTE_V_BOLTED, &dummy1, &dummy2);
+		if (lpar_rc == H_SUCCESS)
+			return i;
+
+		/*
+		 * The test for adjunct partition is performed before the
+		 * ANDCOND test.  H_RESOURCE may be returned, so we need to
+		 * check for that as well.
+		 */
+		BUG_ON(lpar_rc != H_NOT_FOUND && lpar_rc != H_RESOURCE);
+
+		slot_offset++;
+		slot_offset &= 0x7;
+	}
+
+	return -1;
+}
+
+/* Called during kexec sequence with MMU off */
+static notrace void manual_hpte_clear_all(void)
+{
+	unsigned long size_bytes = 1UL << ppc64_pft_size;
+	unsigned long hpte_count = size_bytes >> 4;
+	struct {
+		unsigned long pteh;
+		unsigned long ptel;
+	} ptes[4];
+	long lpar_rc;
+	unsigned long i, j;
+
+	/* Read in batches of 4,
+	 * invalidate only valid entries not in the VRMA
+	 * hpte_count will be a multiple of 4
+         */
+	for (i = 0; i < hpte_count; i += 4) {
+		lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes);
+		if (lpar_rc != H_SUCCESS) {
+			pr_info("Failed to read hash page table at %ld err %ld\n",
+				i, lpar_rc);
+			continue;
+		}
+		for (j = 0; j < 4; j++){
+			if ((ptes[j].pteh & HPTE_V_VRMA_MASK) ==
+				HPTE_V_VRMA_MASK)
+				continue;
+			if (ptes[j].pteh & HPTE_V_VALID)
+				plpar_pte_remove_raw(0, i + j, 0,
+					&(ptes[j].pteh), &(ptes[j].ptel));
+		}
+	}
+}
+
+/* Called during kexec sequence with MMU off */
+static notrace int hcall_hpte_clear_all(void)
+{
+	int rc;
+
+	do {
+		rc = plpar_hcall_norets(H_CLEAR_HPT);
+	} while (rc == H_CONTINUE);
+
+	return rc;
+}
+
+/* Called during kexec sequence with MMU off */
+static notrace void pseries_hpte_clear_all(void)
+{
+	int rc;
+
+	rc = hcall_hpte_clear_all();
+	if (rc != H_SUCCESS)
+		manual_hpte_clear_all();
+
+#ifdef __LITTLE_ENDIAN__
+	/*
+	 * Reset exceptions to big endian.
+	 *
+	 * FIXME this is a hack for kexec, we need to reset the exception
+	 * endian before starting the new kernel and this is a convenient place
+	 * to do it.
+	 *
+	 * This is also called on boot when a fadump happens. In that case we
+	 * must not change the exception endian mode.
+	 */
+	if (firmware_has_feature(FW_FEATURE_SET_MODE) && !is_fadump_active())
+		pseries_big_endian_exceptions();
+#endif
+}
+
+/*
+ * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
+ * the low 3 bits of flags happen to line up.  So no transform is needed.
+ * We can probably optimize here and assume the high bits of newpp are
+ * already zero.  For now I am paranoid.
+ */
+static long pSeries_lpar_hpte_updatepp(unsigned long slot,
+				       unsigned long newpp,
+				       unsigned long vpn,
+				       int psize, int apsize,
+				       int ssize, unsigned long inv_flags)
+{
+	unsigned long lpar_rc;
+	unsigned long flags;
+	unsigned long want_v;
+
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+	flags = (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO)) | H_AVPN;
+	flags |= (newpp & HPTE_R_KEY_HI) >> 48;
+	if (mmu_has_feature(MMU_FTR_KERNEL_RO))
+		/* Move pp0 into bit 8 (IBM 55) */
+		flags |= (newpp & HPTE_R_PP0) >> 55;
+
+	pr_devel("    update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
+		 want_v, slot, flags, psize);
+
+	lpar_rc = plpar_pte_protect(flags, slot, want_v);
+
+	if (lpar_rc == H_NOT_FOUND) {
+		pr_devel("not found !\n");
+		return -1;
+	}
+
+	pr_devel("ok\n");
+
+	BUG_ON(lpar_rc != H_SUCCESS);
+
+	return 0;
+}
+
+static long __pSeries_lpar_hpte_find(unsigned long want_v, unsigned long hpte_group)
+{
+	long lpar_rc;
+	unsigned long i, j;
+	struct {
+		unsigned long pteh;
+		unsigned long ptel;
+	} ptes[4];
+
+	for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) {
+
+		lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes);
+		if (lpar_rc != H_SUCCESS) {
+			pr_info("Failed to read hash page table at %ld err %ld\n",
+				hpte_group, lpar_rc);
+			continue;
+		}
+
+		for (j = 0; j < 4; j++) {
+			if (HPTE_V_COMPARE(ptes[j].pteh, want_v) &&
+			    (ptes[j].pteh & HPTE_V_VALID))
+				return i + j;
+		}
+	}
+
+	return -1;
+}
+
+static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize)
+{
+	long slot;
+	unsigned long hash;
+	unsigned long want_v;
+	unsigned long hpte_group;
+
+	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+	/*
+	 * We try to keep bolted entries always in primary hash
+	 * But in some case we can find them in secondary too.
+	 */
+	hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+	slot = __pSeries_lpar_hpte_find(want_v, hpte_group);
+	if (slot < 0) {
+		/* Try in secondary */
+		hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot = __pSeries_lpar_hpte_find(want_v, hpte_group);
+		if (slot < 0)
+			return -1;
+	}
+	return hpte_group + slot;
+}
+
+static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
+					     unsigned long ea,
+					     int psize, int ssize)
+{
+	unsigned long vpn;
+	unsigned long lpar_rc, slot, vsid, flags;
+
+	vsid = get_kernel_vsid(ea, ssize);
+	vpn = hpt_vpn(ea, vsid, ssize);
+
+	slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
+	BUG_ON(slot == -1);
+
+	flags = newpp & (HPTE_R_PP | HPTE_R_N);
+	if (mmu_has_feature(MMU_FTR_KERNEL_RO))
+		/* Move pp0 into bit 8 (IBM 55) */
+		flags |= (newpp & HPTE_R_PP0) >> 55;
+
+	flags |= ((newpp & HPTE_R_KEY_HI) >> 48) | (newpp & HPTE_R_KEY_LO);
+
+	lpar_rc = plpar_pte_protect(flags, slot, 0);
+
+	BUG_ON(lpar_rc != H_SUCCESS);
+}
+
+static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
+					 int psize, int apsize,
+					 int ssize, int local)
+{
+	unsigned long want_v;
+	unsigned long lpar_rc;
+	unsigned long dummy1, dummy2;
+
+	pr_devel("    inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n",
+		 slot, vpn, psize, local);
+
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
+	lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2);
+	if (lpar_rc == H_NOT_FOUND)
+		return;
+
+	BUG_ON(lpar_rc != H_SUCCESS);
+}
+
+
+/*
+ * As defined in the PAPR's section 14.5.4.1.8
+ * The control mask doesn't include the returned reference and change bit from
+ * the processed PTE.
+ */
+#define HBLKR_AVPN		0x0100000000000000UL
+#define HBLKR_CTRL_MASK		0xf800000000000000UL
+#define HBLKR_CTRL_SUCCESS	0x8000000000000000UL
+#define HBLKR_CTRL_ERRNOTFOUND	0x8800000000000000UL
+#define HBLKR_CTRL_ERRBUSY	0xa000000000000000UL
+
+/*
+ * Returned true if we are supporting this block size for the specified segment
+ * base page size and actual page size.
+ *
+ * Currently, we only support 8 size block.
+ */
+static inline bool is_supported_hlbkrm(int bpsize, int psize)
+{
+	return (hblkrm_size[bpsize][psize] == HBLKRM_SUPPORTED_BLOCK_SIZE);
+}
+
+/**
+ * H_BLOCK_REMOVE caller.
+ * @idx should point to the latest @param entry set with a PTEX.
+ * If PTE cannot be processed because another CPUs has already locked that
+ * group, those entries are put back in @param starting at index 1.
+ * If entries has to be retried and @retry_busy is set to true, these entries
+ * are retried until success. If @retry_busy is set to false, the returned
+ * is the number of entries yet to process.
+ */
+static unsigned long call_block_remove(unsigned long idx, unsigned long *param,
+				       bool retry_busy)
+{
+	unsigned long i, rc, new_idx;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+	if (idx < 2) {
+		pr_warn("Unexpected empty call to H_BLOCK_REMOVE");
+		return 0;
+	}
+again:
+	new_idx = 0;
+	if (idx > PLPAR_HCALL9_BUFSIZE) {
+		pr_err("Too many PTEs (%lu) for H_BLOCK_REMOVE", idx);
+		idx = PLPAR_HCALL9_BUFSIZE;
+	} else if (idx < PLPAR_HCALL9_BUFSIZE)
+		param[idx] = HBR_END;
+
+	rc = plpar_hcall9(H_BLOCK_REMOVE, retbuf,
+			  param[0], /* AVA */
+			  param[1],  param[2],  param[3],  param[4], /* TS0-7 */
+			  param[5],  param[6],  param[7],  param[8]);
+	if (rc == H_SUCCESS)
+		return 0;
+
+	BUG_ON(rc != H_PARTIAL);
+
+	/* Check that the unprocessed entries were 'not found' or 'busy' */
+	for (i = 0; i < idx-1; i++) {
+		unsigned long ctrl = retbuf[i] & HBLKR_CTRL_MASK;
+
+		if (ctrl == HBLKR_CTRL_ERRBUSY) {
+			param[++new_idx] = param[i+1];
+			continue;
+		}
+
+		BUG_ON(ctrl != HBLKR_CTRL_SUCCESS
+		       && ctrl != HBLKR_CTRL_ERRNOTFOUND);
+	}
+
+	/*
+	 * If there were entries found busy, retry these entries if requested,
+	 * of if all the entries have to be retried.
+	 */
+	if (new_idx && (retry_busy || new_idx == (PLPAR_HCALL9_BUFSIZE-1))) {
+		idx = new_idx + 1;
+		goto again;
+	}
+
+	return new_idx;
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/*
+ * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
+ * to make sure that we avoid bouncing the hypervisor tlbie lock.
+ */
+#define PPC64_HUGE_HPTE_BATCH 12
+
+static void hugepage_block_invalidate(unsigned long *slot, unsigned long *vpn,
+				      int count, int psize, int ssize)
+{
+	unsigned long param[PLPAR_HCALL9_BUFSIZE];
+	unsigned long shift, current_vpgb, vpgb;
+	int i, pix = 0;
+
+	shift = mmu_psize_defs[psize].shift;
+
+	for (i = 0; i < count; i++) {
+		/*
+		 * Shifting 3 bits more on the right to get a
+		 * 8 pages aligned virtual addresse.
+		 */
+		vpgb = (vpn[i] >> (shift - VPN_SHIFT + 3));
+		if (!pix || vpgb != current_vpgb) {
+			/*
+			 * Need to start a new 8 pages block, flush
+			 * the current one if needed.
+			 */
+			if (pix)
+				(void)call_block_remove(pix, param, true);
+			current_vpgb = vpgb;
+			param[0] = hpte_encode_avpn(vpn[i], psize, ssize);
+			pix = 1;
+		}
+
+		param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot[i];
+		if (pix == PLPAR_HCALL9_BUFSIZE) {
+			pix = call_block_remove(pix, param, false);
+			/*
+			 * pix = 0 means that all the entries were
+			 * removed, we can start a new block.
+			 * Otherwise, this means that there are entries
+			 * to retry, and pix points to latest one, so
+			 * we should increment it and try to continue
+			 * the same block.
+			 */
+			if (pix)
+				pix++;
+		}
+	}
+	if (pix)
+		(void)call_block_remove(pix, param, true);
+}
+
+static void hugepage_bulk_invalidate(unsigned long *slot, unsigned long *vpn,
+				     int count, int psize, int ssize)
+{
+	unsigned long param[PLPAR_HCALL9_BUFSIZE];
+	int i = 0, pix = 0, rc;
+
+	for (i = 0; i < count; i++) {
+
+		if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
+			pSeries_lpar_hpte_invalidate(slot[i], vpn[i], psize, 0,
+						     ssize, 0);
+		} else {
+			param[pix] = HBR_REQUEST | HBR_AVPN | slot[i];
+			param[pix+1] = hpte_encode_avpn(vpn[i], psize, ssize);
+			pix += 2;
+			if (pix == 8) {
+				rc = plpar_hcall9(H_BULK_REMOVE, param,
+						  param[0], param[1], param[2],
+						  param[3], param[4], param[5],
+						  param[6], param[7]);
+				BUG_ON(rc != H_SUCCESS);
+				pix = 0;
+			}
+		}
+	}
+	if (pix) {
+		param[pix] = HBR_END;
+		rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1],
+				  param[2], param[3], param[4], param[5],
+				  param[6], param[7]);
+		BUG_ON(rc != H_SUCCESS);
+	}
+}
+
+static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
+						      unsigned long *vpn,
+						      int count, int psize,
+						      int ssize)
+{
+	unsigned long flags = 0;
+	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+	if (lock_tlbie)
+		spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+
+	/* Assuming THP size is 16M */
+	if (is_supported_hlbkrm(psize, MMU_PAGE_16M))
+		hugepage_block_invalidate(slot, vpn, count, psize, ssize);
+	else
+		hugepage_bulk_invalidate(slot, vpn, count, psize, ssize);
+
+	if (lock_tlbie)
+		spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
+}
+
+static void pSeries_lpar_hugepage_invalidate(unsigned long vsid,
+					     unsigned long addr,
+					     unsigned char *hpte_slot_array,
+					     int psize, int ssize, int local)
+{
+	int i, index = 0;
+	unsigned long s_addr = addr;
+	unsigned int max_hpte_count, valid;
+	unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH];
+	unsigned long slot_array[PPC64_HUGE_HPTE_BATCH];
+	unsigned long shift, hidx, vpn = 0, hash, slot;
+
+	shift = mmu_psize_defs[psize].shift;
+	max_hpte_count = 1U << (PMD_SHIFT - shift);
+
+	for (i = 0; i < max_hpte_count; i++) {
+		valid = hpte_valid(hpte_slot_array, i);
+		if (!valid)
+			continue;
+		hidx =  hpte_hash_index(hpte_slot_array, i);
+
+		/* get the vpn */
+		addr = s_addr + (i * (1ul << shift));
+		vpn = hpt_vpn(addr, vsid, ssize);
+		hash = hpt_hash(vpn, shift, ssize);
+		if (hidx & _PTEIDX_SECONDARY)
+			hash = ~hash;
+
+		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot += hidx & _PTEIDX_GROUP_IX;
+
+		slot_array[index] = slot;
+		vpn_array[index] = vpn;
+		if (index == PPC64_HUGE_HPTE_BATCH - 1) {
+			/*
+			 * Now do a bluk invalidate
+			 */
+			__pSeries_lpar_hugepage_invalidate(slot_array,
+							   vpn_array,
+							   PPC64_HUGE_HPTE_BATCH,
+							   psize, ssize);
+			index = 0;
+		} else
+			index++;
+	}
+	if (index)
+		__pSeries_lpar_hugepage_invalidate(slot_array, vpn_array,
+						   index, psize, ssize);
+}
+#else
+static void pSeries_lpar_hugepage_invalidate(unsigned long vsid,
+					     unsigned long addr,
+					     unsigned char *hpte_slot_array,
+					     int psize, int ssize, int local)
+{
+	WARN(1, "%s called without THP support\n", __func__);
+}
+#endif
+
+static int pSeries_lpar_hpte_removebolted(unsigned long ea,
+					  int psize, int ssize)
+{
+	unsigned long vpn;
+	unsigned long slot, vsid;
+
+	vsid = get_kernel_vsid(ea, ssize);
+	vpn = hpt_vpn(ea, vsid, ssize);
+
+	slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
+	if (slot == -1)
+		return -ENOENT;
+
+	/*
+	 * lpar doesn't use the passed actual page size
+	 */
+	pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0);
+	return 0;
+}
+
+
+static inline unsigned long compute_slot(real_pte_t pte,
+					 unsigned long vpn,
+					 unsigned long index,
+					 unsigned long shift,
+					 int ssize)
+{
+	unsigned long slot, hash, hidx;
+
+	hash = hpt_hash(vpn, shift, ssize);
+	hidx = __rpte_to_hidx(pte, index);
+	if (hidx & _PTEIDX_SECONDARY)
+		hash = ~hash;
+	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+	slot += hidx & _PTEIDX_GROUP_IX;
+	return slot;
+}
+
+/**
+ * The hcall H_BLOCK_REMOVE implies that the virtual pages to processed are
+ * "all within the same naturally aligned 8 page virtual address block".
+ */
+static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch,
+			    unsigned long *param)
+{
+	unsigned long vpn;
+	unsigned long i, pix = 0;
+	unsigned long index, shift, slot, current_vpgb, vpgb;
+	real_pte_t pte;
+	int psize, ssize;
+
+	psize = batch->psize;
+	ssize = batch->ssize;
+
+	for (i = 0; i < number; i++) {
+		vpn = batch->vpn[i];
+		pte = batch->pte[i];
+		pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+			/*
+			 * Shifting 3 bits more on the right to get a
+			 * 8 pages aligned virtual addresse.
+			 */
+			vpgb = (vpn >> (shift - VPN_SHIFT + 3));
+			if (!pix || vpgb != current_vpgb) {
+				/*
+				 * Need to start a new 8 pages block, flush
+				 * the current one if needed.
+				 */
+				if (pix)
+					(void)call_block_remove(pix, param,
+								true);
+				current_vpgb = vpgb;
+				param[0] = hpte_encode_avpn(vpn, psize,
+							    ssize);
+				pix = 1;
+			}
+
+			slot = compute_slot(pte, vpn, index, shift, ssize);
+			param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot;
+
+			if (pix == PLPAR_HCALL9_BUFSIZE) {
+				pix = call_block_remove(pix, param, false);
+				/*
+				 * pix = 0 means that all the entries were
+				 * removed, we can start a new block.
+				 * Otherwise, this means that there are entries
+				 * to retry, and pix points to latest one, so
+				 * we should increment it and try to continue
+				 * the same block.
+				 */
+				if (pix)
+					pix++;
+			}
+		} pte_iterate_hashed_end();
+	}
+
+	if (pix)
+		(void)call_block_remove(pix, param, true);
+}
+
+/*
+ * TLB Block Invalidate Characteristics
+ *
+ * These characteristics define the size of the block the hcall H_BLOCK_REMOVE
+ * is able to process for each couple segment base page size, actual page size.
+ *
+ * The ibm,get-system-parameter properties is returning a buffer with the
+ * following layout:
+ *
+ * [ 2 bytes size of the RTAS buffer (excluding these 2 bytes) ]
+ * -----------------
+ * TLB Block Invalidate Specifiers:
+ * [ 1 byte LOG base 2 of the TLB invalidate block size being specified ]
+ * [ 1 byte Number of page sizes (N) that are supported for the specified
+ *          TLB invalidate block size ]
+ * [ 1 byte Encoded segment base page size and actual page size
+ *          MSB=0 means 4k segment base page size and actual page size
+ *          MSB=1 the penc value in mmu_psize_def ]
+ * ...
+ * -----------------
+ * Next TLB Block Invalidate Specifiers...
+ * -----------------
+ * [ 0 ]
+ */
+static inline void set_hblkrm_bloc_size(int bpsize, int psize,
+					unsigned int block_size)
+{
+	if (block_size > hblkrm_size[bpsize][psize])
+		hblkrm_size[bpsize][psize] = block_size;
+}
+
+/*
+ * Decode the Encoded segment base page size and actual page size.
+ * PAPR specifies:
+ *   - bit 7 is the L bit
+ *   - bits 0-5 are the penc value
+ * If the L bit is 0, this means 4K segment base page size and actual page size
+ * otherwise the penc value should be read.
+ */
+#define HBLKRM_L_MASK		0x80
+#define HBLKRM_PENC_MASK	0x3f
+static inline void __init check_lp_set_hblkrm(unsigned int lp,
+					      unsigned int block_size)
+{
+	unsigned int bpsize, psize;
+
+	/* First, check the L bit, if not set, this means 4K */
+	if ((lp & HBLKRM_L_MASK) == 0) {
+		set_hblkrm_bloc_size(MMU_PAGE_4K, MMU_PAGE_4K, block_size);
+		return;
+	}
+
+	lp &= HBLKRM_PENC_MASK;
+	for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) {
+		struct mmu_psize_def *def = &mmu_psize_defs[bpsize];
+
+		for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+			if (def->penc[psize] == lp) {
+				set_hblkrm_bloc_size(bpsize, psize, block_size);
+				return;
+			}
+		}
+	}
+}
+
+/*
+ * The size of the TLB Block Invalidate Characteristics is variable. But at the
+ * maximum it will be the number of possible page sizes *2 + 10 bytes.
+ * Currently MMU_PAGE_COUNT is 16, which means 42 bytes. Use a cache line size
+ * (128 bytes) for the buffer to get plenty of space.
+ */
+#define SPLPAR_TLB_BIC_MAXLENGTH	128
+
+void __init pseries_lpar_read_hblkrm_characteristics(void)
+{
+	static struct papr_sysparm_buf buf __initdata;
+	int len, idx, bpsize;
+
+	if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
+		return;
+
+	if (papr_sysparm_get(PAPR_SYSPARM_TLB_BLOCK_INVALIDATE_ATTRS, &buf))
+		return;
+
+	len = be16_to_cpu(buf.len);
+	if (len > SPLPAR_TLB_BIC_MAXLENGTH) {
+		pr_warn("%s too large returned buffer %d", __func__, len);
+		return;
+	}
+
+	idx = 0;
+	while (idx < len) {
+		u8 block_shift = buf.val[idx++];
+		u32 block_size;
+		unsigned int npsize;
+
+		if (!block_shift)
+			break;
+
+		block_size = 1 << block_shift;
+
+		for (npsize = buf.val[idx++];
+		     npsize > 0 && idx < len; npsize--)
+			check_lp_set_hblkrm((unsigned int)buf.val[idx++],
+					    block_size);
+	}
+
+	for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
+		for (idx = 0; idx < MMU_PAGE_COUNT; idx++)
+			if (hblkrm_size[bpsize][idx])
+				pr_info("H_BLOCK_REMOVE supports base psize:%d psize:%d block size:%d",
+					bpsize, idx, hblkrm_size[bpsize][idx]);
+}
+
+/*
+ * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
+ * lock.
+ */
+static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
+{
+	unsigned long vpn;
+	unsigned long i, pix, rc;
+	unsigned long flags = 0;
+	struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
+	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+	unsigned long param[PLPAR_HCALL9_BUFSIZE];
+	unsigned long index, shift, slot;
+	real_pte_t pte;
+	int psize, ssize;
+
+	if (lock_tlbie)
+		spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+
+	if (is_supported_hlbkrm(batch->psize, batch->psize)) {
+		do_block_remove(number, batch, param);
+		goto out;
+	}
+
+	psize = batch->psize;
+	ssize = batch->ssize;
+	pix = 0;
+	for (i = 0; i < number; i++) {
+		vpn = batch->vpn[i];
+		pte = batch->pte[i];
+		pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+			slot = compute_slot(pte, vpn, index, shift, ssize);
+			if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
+				/*
+				 * lpar doesn't use the passed actual page size
+				 */
+				pSeries_lpar_hpte_invalidate(slot, vpn, psize,
+							     0, ssize, local);
+			} else {
+				param[pix] = HBR_REQUEST | HBR_AVPN | slot;
+				param[pix+1] = hpte_encode_avpn(vpn, psize,
+								ssize);
+				pix += 2;
+				if (pix == 8) {
+					rc = plpar_hcall9(H_BULK_REMOVE, param,
+						param[0], param[1], param[2],
+						param[3], param[4], param[5],
+						param[6], param[7]);
+					BUG_ON(rc != H_SUCCESS);
+					pix = 0;
+				}
+			}
+		} pte_iterate_hashed_end();
+	}
+	if (pix) {
+		param[pix] = HBR_END;
+		rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1],
+				  param[2], param[3], param[4], param[5],
+				  param[6], param[7]);
+		BUG_ON(rc != H_SUCCESS);
+	}
+
+out:
+	if (lock_tlbie)
+		spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
+}
+
+static int __init disable_bulk_remove(char *str)
+{
+	if (strcmp(str, "off") == 0 &&
+	    firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
+		pr_info("Disabling BULK_REMOVE firmware feature");
+		powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE;
+	}
+	return 1;
+}
+
+__setup("bulk_remove=", disable_bulk_remove);
+
+#define HPT_RESIZE_TIMEOUT	10000 /* ms */
+
+struct hpt_resize_state {
+	unsigned long shift;
+	int commit_rc;
+};
+
+static int pseries_lpar_resize_hpt_commit(void *data)
+{
+	struct hpt_resize_state *state = data;
+
+	state->commit_rc = plpar_resize_hpt_commit(0, state->shift);
+	if (state->commit_rc != H_SUCCESS)
+		return -EIO;
+
+	/* Hypervisor has transitioned the HTAB, update our globals */
+	ppc64_pft_size = state->shift;
+	htab_size_bytes = 1UL << ppc64_pft_size;
+	htab_hash_mask = (htab_size_bytes >> 7) - 1;
+
+	return 0;
+}
+
+/*
+ * Must be called in process context. The caller must hold the
+ * cpus_lock.
+ */
+static int pseries_lpar_resize_hpt(unsigned long shift)
+{
+	struct hpt_resize_state state = {
+		.shift = shift,
+		.commit_rc = H_FUNCTION,
+	};
+	unsigned int delay, total_delay = 0;
+	int rc;
+	ktime_t t0, t1, t2;
+
+	might_sleep();
+
+	if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE))
+		return -ENODEV;
+
+	pr_info("Attempting to resize HPT to shift %lu\n", shift);
+
+	t0 = ktime_get();
+
+	rc = plpar_resize_hpt_prepare(0, shift);
+	while (H_IS_LONG_BUSY(rc)) {
+		delay = get_longbusy_msecs(rc);
+		total_delay += delay;
+		if (total_delay > HPT_RESIZE_TIMEOUT) {
+			/* prepare with shift==0 cancels an in-progress resize */
+			rc = plpar_resize_hpt_prepare(0, 0);
+			if (rc != H_SUCCESS)
+				pr_warn("Unexpected error %d cancelling timed out HPT resize\n",
+				       rc);
+			return -ETIMEDOUT;
+		}
+		msleep(delay);
+		rc = plpar_resize_hpt_prepare(0, shift);
+	}
+
+	switch (rc) {
+	case H_SUCCESS:
+		/* Continue on */
+		break;
+
+	case H_PARAMETER:
+		pr_warn("Invalid argument from H_RESIZE_HPT_PREPARE\n");
+		return -EINVAL;
+	case H_RESOURCE:
+		pr_warn("Operation not permitted from H_RESIZE_HPT_PREPARE\n");
+		return -EPERM;
+	default:
+		pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc);
+		return -EIO;
+	}
+
+	t1 = ktime_get();
+
+	rc = stop_machine_cpuslocked(pseries_lpar_resize_hpt_commit,
+				     &state, NULL);
+
+	t2 = ktime_get();
+
+	if (rc != 0) {
+		switch (state.commit_rc) {
+		case H_PTEG_FULL:
+			return -ENOSPC;
+
+		default:
+			pr_warn("Unexpected error %d from H_RESIZE_HPT_COMMIT\n",
+				state.commit_rc);
+			return -EIO;
+		};
+	}
+
+	pr_info("HPT resize to shift %lu complete (%lld ms / %lld ms)\n",
+		shift, (long long) ktime_ms_delta(t1, t0),
+		(long long) ktime_ms_delta(t2, t1));
+
+	return 0;
+}
+
+void __init hpte_init_pseries(void)
+{
+	mmu_hash_ops.hpte_invalidate	 = pSeries_lpar_hpte_invalidate;
+	mmu_hash_ops.hpte_updatepp	 = pSeries_lpar_hpte_updatepp;
+	mmu_hash_ops.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp;
+	mmu_hash_ops.hpte_insert	 = pSeries_lpar_hpte_insert;
+	mmu_hash_ops.hpte_remove	 = pSeries_lpar_hpte_remove;
+	mmu_hash_ops.hpte_removebolted   = pSeries_lpar_hpte_removebolted;
+	mmu_hash_ops.flush_hash_range	 = pSeries_lpar_flush_hash_range;
+	mmu_hash_ops.hpte_clear_all      = pseries_hpte_clear_all;
+	mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
+
+	if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
+		mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
+
+	/*
+	 * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall
+	 * to inform the hypervisor that we wish to use the HPT.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		pseries_lpar_register_process_table(0, 0, 0);
+}
+#endif /* CONFIG_PPC_64S_HASH_MMU */
+
+#ifdef CONFIG_PPC_RADIX_MMU
+void __init radix_init_pseries(void)
+{
+	pr_info("Using radix MMU under hypervisor\n");
+
+	pseries_lpar_register_process_table(__pa(process_tb),
+						0, PRTB_SIZE_SHIFT - 12);
+}
+#endif
+
+#ifdef CONFIG_PPC_SMLPAR
+#define CMO_FREE_HINT_DEFAULT 1
+static int cmo_free_hint_flag = CMO_FREE_HINT_DEFAULT;
+
+static int __init cmo_free_hint(char *str)
+{
+	char *parm;
+	parm = strstrip(str);
+
+	if (strcasecmp(parm, "no") == 0 || strcasecmp(parm, "off") == 0) {
+		pr_info("%s: CMO free page hinting is not active.\n", __func__);
+		cmo_free_hint_flag = 0;
+		return 1;
+	}
+
+	cmo_free_hint_flag = 1;
+	pr_info("%s: CMO free page hinting is active.\n", __func__);
+
+	if (strcasecmp(parm, "yes") == 0 || strcasecmp(parm, "on") == 0)
+		return 1;
+
+	return 0;
+}
+
+__setup("cmo_free_hint=", cmo_free_hint);
+
+static void pSeries_set_page_state(struct page *page, int order,
+				   unsigned long state)
+{
+	int i, j;
+	unsigned long cmo_page_sz, addr;
+
+	cmo_page_sz = cmo_get_page_size();
+	addr = __pa((unsigned long)page_address(page));
+
+	for (i = 0; i < (1 << order); i++, addr += PAGE_SIZE) {
+		for (j = 0; j < PAGE_SIZE; j += cmo_page_sz)
+			plpar_hcall_norets(H_PAGE_INIT, state, addr + j, 0);
+	}
+}
+
+void arch_free_page(struct page *page, int order)
+{
+	if (radix_enabled())
+		return;
+	if (!cmo_free_hint_flag || !firmware_has_feature(FW_FEATURE_CMO))
+		return;
+
+	pSeries_set_page_state(page, order, H_PAGE_SET_UNUSED);
+}
+EXPORT_SYMBOL(arch_free_page);
+
+#endif /* CONFIG_PPC_SMLPAR */
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_TRACEPOINTS
+#ifdef CONFIG_JUMP_LABEL
+struct static_key hcall_tracepoint_key = STATIC_KEY_INIT;
+
+int hcall_tracepoint_regfunc(void)
+{
+	static_key_slow_inc(&hcall_tracepoint_key);
+	return 0;
+}
+
+void hcall_tracepoint_unregfunc(void)
+{
+	static_key_slow_dec(&hcall_tracepoint_key);
+}
+#else
+/*
+ * We optimise our hcall path by placing hcall_tracepoint_refcount
+ * directly in the TOC so we can check if the hcall tracepoints are
+ * enabled via a single load.
+ */
+
+/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
+extern long hcall_tracepoint_refcount;
+
+int hcall_tracepoint_regfunc(void)
+{
+	hcall_tracepoint_refcount++;
+	return 0;
+}
+
+void hcall_tracepoint_unregfunc(void)
+{
+	hcall_tracepoint_refcount--;
+}
+#endif
+
+/*
+ * Keep track of hcall tracing depth and prevent recursion. Warn if any is
+ * detected because it may indicate a problem. This will not catch all
+ * problems with tracing code making hcalls, because the tracing might have
+ * been invoked from a non-hcall, so the first hcall could recurse into it
+ * without warning here, but this better than nothing.
+ *
+ * Hcalls with specific problems being traced should use the _notrace
+ * plpar_hcall variants.
+ */
+static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
+
+
+notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
+{
+	unsigned long flags;
+	unsigned int *depth;
+
+	local_irq_save(flags);
+
+	depth = this_cpu_ptr(&hcall_trace_depth);
+
+	if (WARN_ON_ONCE(*depth))
+		goto out;
+
+	(*depth)++;
+	preempt_disable();
+	trace_hcall_entry(opcode, args);
+	(*depth)--;
+
+out:
+	local_irq_restore(flags);
+}
+
+notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
+{
+	unsigned long flags;
+	unsigned int *depth;
+
+	local_irq_save(flags);
+
+	depth = this_cpu_ptr(&hcall_trace_depth);
+
+	if (*depth) /* Don't warn again on the way out */
+		goto out;
+
+	(*depth)++;
+	trace_hcall_exit(opcode, retval, retbuf);
+	preempt_enable();
+	(*depth)--;
+
+out:
+	local_irq_restore(flags);
+}
+#endif
+
+/**
+ * h_get_mpp
+ * H_GET_MPP hcall returns info in 7 parms
+ */
+int h_get_mpp(struct hvcall_mpp_data *mpp_data)
+{
+	int rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+	rc = plpar_hcall9(H_GET_MPP, retbuf);
+
+	mpp_data->entitled_mem = retbuf[0];
+	mpp_data->mapped_mem = retbuf[1];
+
+	mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+	mpp_data->pool_num = retbuf[2] & 0xffff;
+
+	mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
+	mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
+	mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffffUL;
+
+	mpp_data->pool_size = retbuf[4];
+	mpp_data->loan_request = retbuf[5];
+	mpp_data->backing_mem = retbuf[6];
+
+	return rc;
+}
+EXPORT_SYMBOL(h_get_mpp);
+
+int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data)
+{
+	int rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 };
+
+	rc = plpar_hcall9(H_GET_MPP_X, retbuf);
+
+	mpp_x_data->coalesced_bytes = retbuf[0];
+	mpp_x_data->pool_coalesced_bytes = retbuf[1];
+	mpp_x_data->pool_purr_cycles = retbuf[2];
+	mpp_x_data->pool_spurr_cycles = retbuf[3];
+
+	return rc;
+}
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+static unsigned long __init vsid_unscramble(unsigned long vsid, int ssize)
+{
+	unsigned long protovsid;
+	unsigned long va_bits = VA_BITS;
+	unsigned long modinv, vsid_modulus;
+	unsigned long max_mod_inv, tmp_modinv;
+
+	if (!mmu_has_feature(MMU_FTR_68_BIT_VA))
+		va_bits = 65;
+
+	if (ssize == MMU_SEGSIZE_256M) {
+		modinv = VSID_MULINV_256M;
+		vsid_modulus = ((1UL << (va_bits - SID_SHIFT)) - 1);
+	} else {
+		modinv = VSID_MULINV_1T;
+		vsid_modulus = ((1UL << (va_bits - SID_SHIFT_1T)) - 1);
+	}
+
+	/*
+	 * vsid outside our range.
+	 */
+	if (vsid >= vsid_modulus)
+		return 0;
+
+	/*
+	 * If modinv is the modular multiplicate inverse of (x % vsid_modulus)
+	 * and vsid = (protovsid * x) % vsid_modulus, then we say:
+	 *   protovsid = (vsid * modinv) % vsid_modulus
+	 */
+
+	/* Check if (vsid * modinv) overflow (63 bits) */
+	max_mod_inv = 0x7fffffffffffffffull / vsid;
+	if (modinv < max_mod_inv)
+		return (vsid * modinv) % vsid_modulus;
+
+	tmp_modinv = modinv/max_mod_inv;
+	modinv %= max_mod_inv;
+
+	protovsid = (((vsid * max_mod_inv) % vsid_modulus) * tmp_modinv) % vsid_modulus;
+	protovsid = (protovsid + vsid * modinv) % vsid_modulus;
+
+	return protovsid;
+}
+
+static int __init reserve_vrma_context_id(void)
+{
+	unsigned long protovsid;
+
+	/*
+	 * Reserve context ids which map to reserved virtual addresses. For now
+	 * we only reserve the context id which maps to the VRMA VSID. We ignore
+	 * the addresses in "ibm,adjunct-virtual-addresses" because we don't
+	 * enable adjunct support via the "ibm,client-architecture-support"
+	 * interface.
+	 */
+	protovsid = vsid_unscramble(VRMA_VSID, MMU_SEGSIZE_1T);
+	hash__reserve_context_id(protovsid >> ESID_BITS_1T);
+	return 0;
+}
+machine_device_initcall(pseries, reserve_vrma_context_id);
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+/* debugfs file interface for vpa data */
+static ssize_t vpa_file_read(struct file *filp, char __user *buf, size_t len,
+			      loff_t *pos)
+{
+	int cpu = (long)filp->private_data;
+	struct lppaca *lppaca = &lppaca_of(cpu);
+
+	return simple_read_from_buffer(buf, len, pos, lppaca,
+				sizeof(struct lppaca));
+}
+
+static const struct file_operations vpa_fops = {
+	.open		= simple_open,
+	.read		= vpa_file_read,
+	.llseek		= default_llseek,
+};
+
+static int __init vpa_debugfs_init(void)
+{
+	char name[16];
+	long i;
+	struct dentry *vpa_dir;
+
+	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+		return 0;
+
+	vpa_dir = debugfs_create_dir("vpa", arch_debugfs_dir);
+
+	/* set up the per-cpu vpa file*/
+	for_each_possible_cpu(i) {
+		sprintf(name, "cpu-%ld", i);
+		debugfs_create_file(name, 0400, vpa_dir, (void *)i, &vpa_fops);
+	}
+
+	return 0;
+}
+machine_arch_initcall(pseries, vpa_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
new file mode 100644
index 0000000000..1c151d77e7
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -0,0 +1,802 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerPC64 LPAR Configuration Information Driver
+ *
+ * Dave Engebretsen engebret@us.ibm.com
+ *    Copyright (c) 2003 Dave Engebretsen
+ * Will Schmidt willschm@us.ibm.com
+ *    SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation.
+ *    seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation.
+ * Nathan Lynch nathanl@austin.ibm.com
+ *    Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation.
+ *
+ * This driver creates a proc file at /proc/ppc64/lparcfg which contains
+ * keyword - value pairs that specify the configuration of the partition.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <asm/papr-sysparm.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/hugetlb.h>
+#include <asm/lppaca.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/time.h>
+#include <asm/vdso_datapage.h>
+#include <asm/vio.h>
+#include <asm/mmu.h>
+#include <asm/machdep.h>
+#include <asm/drmem.h>
+
+#include "pseries.h"
+#include "vas.h"	/* pseries_vas_dlpar_cpu() */
+
+/*
+ * This isn't a module but we expose that to userspace
+ * via /proc so leave the definitions here
+ */
+#define MODULE_VERS "1.9"
+#define MODULE_NAME "lparcfg"
+
+/* #define LPARCFG_DEBUG */
+
+/*
+ * Track sum of all purrs across all processors. This is used to further
+ * calculate usage values by different applications
+ */
+static void cpu_get_purr(void *arg)
+{
+	atomic64_t *sum = arg;
+
+	atomic64_add(mfspr(SPRN_PURR), sum);
+}
+
+static unsigned long get_purr(void)
+{
+	atomic64_t purr = ATOMIC64_INIT(0);
+
+	on_each_cpu(cpu_get_purr, &purr, 1);
+
+	return atomic64_read(&purr);
+}
+
+/*
+ * Methods used to fetch LPAR data when running on a pSeries platform.
+ */
+
+struct hvcall_ppp_data {
+	u64	entitlement;
+	u64	unallocated_entitlement;
+	u16	group_num;
+	u16	pool_num;
+	u8	capped;
+	u8	weight;
+	u8	unallocated_weight;
+	u16	active_procs_in_pool;
+	u16	active_system_procs;
+	u16	phys_platform_procs;
+	u32	max_proc_cap_avail;
+	u32	entitled_proc_cap_avail;
+};
+
+/*
+ * H_GET_PPP hcall returns info in 4 parms.
+ *  entitled_capacity,unallocated_capacity,
+ *  aggregation, resource_capability).
+ *
+ *  R4 = Entitled Processor Capacity Percentage.
+ *  R5 = Unallocated Processor Capacity Percentage.
+ *  R6 (AABBCCDDEEFFGGHH).
+ *      XXXX - reserved (0)
+ *          XXXX - reserved (0)
+ *              XXXX - Group Number
+ *                  XXXX - Pool Number.
+ *  R7 (IIJJKKLLMMNNOOPP).
+ *      XX - reserved. (0)
+ *        XX - bit 0-6 reserved (0).   bit 7 is Capped indicator.
+ *          XX - variable processor Capacity Weight
+ *            XX - Unallocated Variable Processor Capacity Weight.
+ *              XXXX - Active processors in Physical Processor Pool.
+ *                  XXXX  - Processors active on platform.
+ *  R8 (QQQQRRRRRRSSSSSS). if ibm,partition-performance-parameters-level >= 1
+ *	XXXX - Physical platform procs allocated to virtualization.
+ *	    XXXXXX - Max procs capacity % available to the partitions pool.
+ *	          XXXXXX - Entitled procs capacity % available to the
+ *			   partitions pool.
+ */
+static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
+{
+	unsigned long rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+	rc = plpar_hcall9(H_GET_PPP, retbuf);
+
+	ppp_data->entitlement = retbuf[0];
+	ppp_data->unallocated_entitlement = retbuf[1];
+
+	ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+	ppp_data->pool_num = retbuf[2] & 0xffff;
+
+	ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01;
+	ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff;
+	ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff;
+	ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff;
+	ppp_data->active_system_procs = retbuf[3] & 0xffff;
+
+	ppp_data->phys_platform_procs = retbuf[4] >> 6 * 8;
+	ppp_data->max_proc_cap_avail = (retbuf[4] >> 3 * 8) & 0xffffff;
+	ppp_data->entitled_proc_cap_avail = retbuf[4] & 0xffffff;
+
+	return rc;
+}
+
+static void show_gpci_data(struct seq_file *m)
+{
+	struct hv_gpci_request_buffer *buf;
+	unsigned int affinity_score;
+	long ret;
+
+	buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+	if (buf == NULL)
+		return;
+
+	/*
+	 * Show the local LPAR's affinity score.
+	 *
+	 * 0xB1 selects the Affinity_Domain_Info_By_Partition subcall.
+	 * The score is at byte 0xB in the output buffer.
+	 */
+	memset(&buf->params, 0, sizeof(buf->params));
+	buf->params.counter_request = cpu_to_be32(0xB1);
+	buf->params.starting_index = cpu_to_be32(-1);	/* local LPAR */
+	buf->params.counter_info_version_in = 0x5;	/* v5+ for score */
+	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, virt_to_phys(buf),
+				 sizeof(*buf));
+	if (ret != H_SUCCESS) {
+		pr_debug("hcall failed: H_GET_PERF_COUNTER_INFO: %ld, %x\n",
+			 ret, be32_to_cpu(buf->params.detail_rc));
+		goto out;
+	}
+	affinity_score = buf->bytes[0xB];
+	seq_printf(m, "partition_affinity_score=%u\n", affinity_score);
+out:
+	kfree(buf);
+}
+
+static unsigned h_pic(unsigned long *pool_idle_time,
+		      unsigned long *num_procs)
+{
+	unsigned long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_PIC, retbuf);
+
+	*pool_idle_time = retbuf[0];
+	*num_procs = retbuf[1];
+
+	return rc;
+}
+
+/*
+ * parse_ppp_data
+ * Parse out the data returned from h_get_ppp and h_pic
+ */
+static void parse_ppp_data(struct seq_file *m)
+{
+	struct hvcall_ppp_data ppp_data;
+	struct device_node *root;
+	const __be32 *perf_level;
+	int rc;
+
+	rc = h_get_ppp(&ppp_data);
+	if (rc)
+		return;
+
+	seq_printf(m, "partition_entitled_capacity=%lld\n",
+	           ppp_data.entitlement);
+	seq_printf(m, "group=%d\n", ppp_data.group_num);
+	seq_printf(m, "system_active_processors=%d\n",
+	           ppp_data.active_system_procs);
+
+	/* pool related entries are appropriate for shared configs */
+	if (lppaca_shared_proc()) {
+		unsigned long pool_idle_time, pool_procs;
+
+		seq_printf(m, "pool=%d\n", ppp_data.pool_num);
+
+		/* report pool_capacity in percentage */
+		seq_printf(m, "pool_capacity=%d\n",
+			   ppp_data.active_procs_in_pool * 100);
+
+		h_pic(&pool_idle_time, &pool_procs);
+		seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
+		seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
+	}
+
+	seq_printf(m, "unallocated_capacity_weight=%d\n",
+		   ppp_data.unallocated_weight);
+	seq_printf(m, "capacity_weight=%d\n", ppp_data.weight);
+	seq_printf(m, "capped=%d\n", ppp_data.capped);
+	seq_printf(m, "unallocated_capacity=%lld\n",
+		   ppp_data.unallocated_entitlement);
+
+	/* The last bits of information returned from h_get_ppp are only
+	 * valid if the ibm,partition-performance-parameters-level
+	 * property is >= 1.
+	 */
+	root = of_find_node_by_path("/");
+	if (root) {
+		perf_level = of_get_property(root,
+				"ibm,partition-performance-parameters-level",
+					     NULL);
+		if (perf_level && (be32_to_cpup(perf_level) >= 1)) {
+			seq_printf(m,
+			    "physical_procs_allocated_to_virtualization=%d\n",
+				   ppp_data.phys_platform_procs);
+			seq_printf(m, "max_proc_capacity_available=%d\n",
+				   ppp_data.max_proc_cap_avail);
+			seq_printf(m, "entitled_proc_capacity_available=%d\n",
+				   ppp_data.entitled_proc_cap_avail);
+		}
+
+		of_node_put(root);
+	}
+}
+
+/**
+ * parse_mpp_data
+ * Parse out data returned from h_get_mpp
+ */
+static void parse_mpp_data(struct seq_file *m)
+{
+	struct hvcall_mpp_data mpp_data;
+	int rc;
+
+	rc = h_get_mpp(&mpp_data);
+	if (rc)
+		return;
+
+	seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem);
+
+	if (mpp_data.mapped_mem != -1)
+		seq_printf(m, "mapped_entitled_memory=%ld\n",
+		           mpp_data.mapped_mem);
+
+	seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num);
+	seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num);
+
+	seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight);
+	seq_printf(m, "unallocated_entitled_memory_weight=%d\n",
+	           mpp_data.unallocated_mem_weight);
+	seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n",
+	           mpp_data.unallocated_entitlement);
+
+	if (mpp_data.pool_size != -1)
+		seq_printf(m, "entitled_memory_pool_size=%ld bytes\n",
+		           mpp_data.pool_size);
+
+	seq_printf(m, "entitled_memory_loan_request=%ld\n",
+	           mpp_data.loan_request);
+
+	seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem);
+}
+
+/**
+ * parse_mpp_x_data
+ * Parse out data returned from h_get_mpp_x
+ */
+static void parse_mpp_x_data(struct seq_file *m)
+{
+	struct hvcall_mpp_x_data mpp_x_data;
+
+	if (!firmware_has_feature(FW_FEATURE_XCMO))
+		return;
+	if (h_get_mpp_x(&mpp_x_data))
+		return;
+
+	seq_printf(m, "coalesced_bytes=%ld\n", mpp_x_data.coalesced_bytes);
+
+	if (mpp_x_data.pool_coalesced_bytes)
+		seq_printf(m, "pool_coalesced_bytes=%ld\n",
+			   mpp_x_data.pool_coalesced_bytes);
+	if (mpp_x_data.pool_purr_cycles)
+		seq_printf(m, "coalesce_pool_purr=%ld\n", mpp_x_data.pool_purr_cycles);
+	if (mpp_x_data.pool_spurr_cycles)
+		seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles);
+}
+
+/*
+ * Read the lpar name using the RTAS ibm,get-system-parameter call.
+ *
+ * The name read through this call is updated if changes are made by the end
+ * user on the hypervisor side.
+ *
+ * Some hypervisor (like Qemu) may not provide this value. In that case, a non
+ * null value is returned.
+ */
+static int read_rtas_lpar_name(struct seq_file *m)
+{
+	struct papr_sysparm_buf *buf;
+	int err;
+
+	buf = papr_sysparm_buf_alloc();
+	if (!buf)
+		return -ENOMEM;
+
+	err = papr_sysparm_get(PAPR_SYSPARM_LPAR_NAME, buf);
+	if (!err)
+		seq_printf(m, "partition_name=%s\n", buf->val);
+
+	papr_sysparm_buf_free(buf);
+	return err;
+}
+
+/*
+ * Read the LPAR name from the Device Tree.
+ *
+ * The value read in the DT is not updated if the end-user is touching the LPAR
+ * name on the hypervisor side.
+ */
+static int read_dt_lpar_name(struct seq_file *m)
+{
+	const char *name;
+
+	if (of_property_read_string(of_root, "ibm,partition-name", &name))
+		return -ENOENT;
+
+	seq_printf(m, "partition_name=%s\n", name);
+	return 0;
+}
+
+static void read_lpar_name(struct seq_file *m)
+{
+	if (read_rtas_lpar_name(m) && read_dt_lpar_name(m))
+		pr_err_once("Error can't get the LPAR name");
+}
+
+#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
+
+/*
+ * parse_system_parameter_string()
+ * Retrieve the potential_processors, max_entitled_capacity and friends
+ * through the get-system-parameter rtas call.  Replace keyword strings as
+ * necessary.
+ */
+static void parse_system_parameter_string(struct seq_file *m)
+{
+	struct papr_sysparm_buf *buf;
+
+	buf = papr_sysparm_buf_alloc();
+	if (!buf)
+		return;
+
+	if (papr_sysparm_get(PAPR_SYSPARM_SHARED_PROC_LPAR_ATTRS, buf)) {
+		goto out_free;
+	} else {
+		const char *local_buffer;
+		int splpar_strlen;
+		int idx, w_idx;
+		char *workbuffer = kzalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+
+		if (!workbuffer)
+			goto out_free;
+
+		splpar_strlen = be16_to_cpu(buf->len);
+		local_buffer = buf->val;
+
+		w_idx = 0;
+		idx = 0;
+		while ((*local_buffer) && (idx < splpar_strlen)) {
+			workbuffer[w_idx++] = local_buffer[idx++];
+			if ((local_buffer[idx] == ',')
+			    || (local_buffer[idx] == '\0')) {
+				workbuffer[w_idx] = '\0';
+				if (w_idx) {
+					/* avoid the empty string */
+					seq_printf(m, "%s\n", workbuffer);
+				}
+				memset(workbuffer, 0, SPLPAR_MAXLENGTH);
+				idx++;	/* skip the comma */
+				w_idx = 0;
+			} else if (local_buffer[idx] == '=') {
+				/* code here to replace workbuffer contents
+				   with different keyword strings */
+				if (0 == strcmp(workbuffer, "MaxEntCap")) {
+					strcpy(workbuffer,
+					       "partition_max_entitled_capacity");
+					w_idx = strlen(workbuffer);
+				}
+				if (0 == strcmp(workbuffer, "MaxPlatProcs")) {
+					strcpy(workbuffer,
+					       "system_potential_processors");
+					w_idx = strlen(workbuffer);
+				}
+			}
+		}
+		kfree(workbuffer);
+		local_buffer -= 2;	/* back up over strlen value */
+	}
+out_free:
+	papr_sysparm_buf_free(buf);
+}
+
+/* Return the number of processors in the system.
+ * This function reads through the device tree and counts
+ * the virtual processors, this does not include threads.
+ */
+static int lparcfg_count_active_processors(void)
+{
+	struct device_node *cpus_dn;
+	int count = 0;
+
+	for_each_node_by_type(cpus_dn, "cpu") {
+#ifdef LPARCFG_DEBUG
+		printk(KERN_ERR "cpus_dn %p\n", cpus_dn);
+#endif
+		count++;
+	}
+	return count;
+}
+
+static void pseries_cmo_data(struct seq_file *m)
+{
+	int cpu;
+	unsigned long cmo_faults = 0;
+	unsigned long cmo_fault_time = 0;
+
+	seq_printf(m, "cmo_enabled=%d\n", firmware_has_feature(FW_FEATURE_CMO));
+
+	if (!firmware_has_feature(FW_FEATURE_CMO))
+		return;
+
+	for_each_possible_cpu(cpu) {
+		cmo_faults += be64_to_cpu(lppaca_of(cpu).cmo_faults);
+		cmo_fault_time += be64_to_cpu(lppaca_of(cpu).cmo_fault_time);
+	}
+
+	seq_printf(m, "cmo_faults=%lu\n", cmo_faults);
+	seq_printf(m, "cmo_fault_time_usec=%lu\n",
+		   cmo_fault_time / tb_ticks_per_usec);
+	seq_printf(m, "cmo_primary_psp=%d\n", cmo_get_primary_psp());
+	seq_printf(m, "cmo_secondary_psp=%d\n", cmo_get_secondary_psp());
+	seq_printf(m, "cmo_page_size=%lu\n", cmo_get_page_size());
+}
+
+static void splpar_dispatch_data(struct seq_file *m)
+{
+	int cpu;
+	unsigned long dispatches = 0;
+	unsigned long dispatch_dispersions = 0;
+
+	for_each_possible_cpu(cpu) {
+		dispatches += be32_to_cpu(lppaca_of(cpu).yield_count);
+		dispatch_dispersions +=
+			be32_to_cpu(lppaca_of(cpu).dispersion_count);
+	}
+
+	seq_printf(m, "dispatches=%lu\n", dispatches);
+	seq_printf(m, "dispatch_dispersions=%lu\n", dispatch_dispersions);
+}
+
+static void parse_em_data(struct seq_file *m)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	if (firmware_has_feature(FW_FEATURE_LPAR) &&
+	    plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS)
+		seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]);
+}
+
+static void maxmem_data(struct seq_file *m)
+{
+	unsigned long maxmem = 0;
+
+	maxmem += (unsigned long)drmem_info->n_lmbs * drmem_info->lmb_size;
+	maxmem += hugetlb_total_pages() * PAGE_SIZE;
+
+	seq_printf(m, "MaxMem=%lu\n", maxmem);
+}
+
+static int pseries_lparcfg_data(struct seq_file *m, void *v)
+{
+	int partition_potential_processors;
+	int partition_active_processors;
+	struct device_node *rtas_node;
+	const __be32 *lrdrp = NULL;
+
+	rtas_node = of_find_node_by_path("/rtas");
+	if (rtas_node)
+		lrdrp = of_get_property(rtas_node, "ibm,lrdr-capacity", NULL);
+
+	if (lrdrp == NULL) {
+		partition_potential_processors = vdso_data->processorCount;
+	} else {
+		partition_potential_processors = be32_to_cpup(lrdrp + 4);
+	}
+	of_node_put(rtas_node);
+
+	partition_active_processors = lparcfg_count_active_processors();
+
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		/* this call handles the ibm,get-system-parameter contents */
+		read_lpar_name(m);
+		parse_system_parameter_string(m);
+		parse_ppp_data(m);
+		parse_mpp_data(m);
+		parse_mpp_x_data(m);
+		pseries_cmo_data(m);
+		splpar_dispatch_data(m);
+
+		seq_printf(m, "purr=%ld\n", get_purr());
+		seq_printf(m, "tbr=%ld\n", mftb());
+	} else {		/* non SPLPAR case */
+
+		seq_printf(m, "system_active_processors=%d\n",
+			   partition_potential_processors);
+
+		seq_printf(m, "system_potential_processors=%d\n",
+			   partition_potential_processors);
+
+		seq_printf(m, "partition_max_entitled_capacity=%d\n",
+			   partition_potential_processors * 100);
+
+		seq_printf(m, "partition_entitled_capacity=%d\n",
+			   partition_active_processors * 100);
+	}
+
+	show_gpci_data(m);
+
+	seq_printf(m, "partition_active_processors=%d\n",
+		   partition_active_processors);
+
+	seq_printf(m, "partition_potential_processors=%d\n",
+		   partition_potential_processors);
+
+	seq_printf(m, "shared_processor_mode=%d\n",
+		   lppaca_shared_proc());
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	if (!radix_enabled())
+		seq_printf(m, "slb_size=%d\n", mmu_slb_size);
+#endif
+	parse_em_data(m);
+	maxmem_data(m);
+
+	seq_printf(m, "security_flavor=%u\n", pseries_security_flavor);
+
+	return 0;
+}
+
+static ssize_t update_ppp(u64 *entitlement, u8 *weight)
+{
+	struct hvcall_ppp_data ppp_data;
+	u8 new_weight;
+	u64 new_entitled;
+	ssize_t retval;
+
+	/* Get our current parameters */
+	retval = h_get_ppp(&ppp_data);
+	if (retval)
+		return retval;
+
+	if (entitlement) {
+		new_weight = ppp_data.weight;
+		new_entitled = *entitlement;
+	} else if (weight) {
+		new_weight = *weight;
+		new_entitled = ppp_data.entitlement;
+	} else
+		return -EINVAL;
+
+	pr_debug("%s: current_entitled = %llu, current_weight = %u\n",
+		 __func__, ppp_data.entitlement, ppp_data.weight);
+
+	pr_debug("%s: new_entitled = %llu, new_weight = %u\n",
+		 __func__, new_entitled, new_weight);
+
+	retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight);
+	return retval;
+}
+
+/**
+ * update_mpp
+ *
+ * Update the memory entitlement and weight for the partition.  Caller must
+ * specify either a new entitlement or weight, not both, to be updated
+ * since the h_set_mpp call takes both entitlement and weight as parameters.
+ */
+static ssize_t update_mpp(u64 *entitlement, u8 *weight)
+{
+	struct hvcall_mpp_data mpp_data;
+	u64 new_entitled;
+	u8 new_weight;
+	ssize_t rc;
+
+	if (entitlement) {
+		/* Check with vio to ensure the new memory entitlement
+		 * can be handled.
+		 */
+		rc = vio_cmo_entitlement_update(*entitlement);
+		if (rc)
+			return rc;
+	}
+
+	rc = h_get_mpp(&mpp_data);
+	if (rc)
+		return rc;
+
+	if (entitlement) {
+		new_weight = mpp_data.mem_weight;
+		new_entitled = *entitlement;
+	} else if (weight) {
+		new_weight = *weight;
+		new_entitled = mpp_data.entitled_mem;
+	} else
+		return -EINVAL;
+
+	pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
+	         __func__, mpp_data.entitled_mem, mpp_data.mem_weight);
+
+	pr_debug("%s: new_entitled = %llu, new_weight = %u\n",
+		 __func__, new_entitled, new_weight);
+
+	rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight);
+	return rc;
+}
+
+/*
+ * Interface for changing system parameters (variable capacity weight
+ * and entitled capacity).  Format of input is "param_name=value";
+ * anything after value is ignored.  Valid parameters at this time are
+ * "partition_entitled_capacity" and "capacity_weight".  We use
+ * H_SET_PPP to alter parameters.
+ *
+ * This function should be invoked only on systems with
+ * FW_FEATURE_SPLPAR.
+ */
+static ssize_t lparcfg_write(struct file *file, const char __user * buf,
+			     size_t count, loff_t * off)
+{
+	char kbuf[64];
+	char *tmp;
+	u64 new_entitled, *new_entitled_ptr = &new_entitled;
+	u8 new_weight, *new_weight_ptr = &new_weight;
+	ssize_t retval;
+
+	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+		return -EINVAL;
+
+	if (count > sizeof(kbuf))
+		return -EINVAL;
+
+	if (copy_from_user(kbuf, buf, count))
+		return -EFAULT;
+
+	kbuf[count - 1] = '\0';
+	tmp = strchr(kbuf, '=');
+	if (!tmp)
+		return -EINVAL;
+
+	*tmp++ = '\0';
+
+	if (!strcmp(kbuf, "partition_entitled_capacity")) {
+		char *endp;
+		*new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+		if (endp == tmp)
+			return -EINVAL;
+
+		retval = update_ppp(new_entitled_ptr, NULL);
+
+		if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
+			/*
+			 * The hypervisor assigns VAS resources based
+			 * on entitled capacity for shared mode.
+			 * Reconfig VAS windows based on DLPAR CPU events.
+			 */
+			if (pseries_vas_dlpar_cpu() != 0)
+				retval = H_HARDWARE;
+		}
+	} else if (!strcmp(kbuf, "capacity_weight")) {
+		char *endp;
+		*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+		if (endp == tmp)
+			return -EINVAL;
+
+		retval = update_ppp(NULL, new_weight_ptr);
+	} else if (!strcmp(kbuf, "entitled_memory")) {
+		char *endp;
+		*new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+		if (endp == tmp)
+			return -EINVAL;
+
+		retval = update_mpp(new_entitled_ptr, NULL);
+	} else if (!strcmp(kbuf, "entitled_memory_weight")) {
+		char *endp;
+		*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+		if (endp == tmp)
+			return -EINVAL;
+
+		retval = update_mpp(NULL, new_weight_ptr);
+	} else
+		return -EINVAL;
+
+	if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
+		retval = count;
+	} else if (retval == H_BUSY) {
+		retval = -EBUSY;
+	} else if (retval == H_HARDWARE) {
+		retval = -EIO;
+	} else if (retval == H_PARAMETER) {
+		retval = -EINVAL;
+	}
+
+	return retval;
+}
+
+static int lparcfg_data(struct seq_file *m, void *v)
+{
+	struct device_node *rootdn;
+	const char *model = "";
+	const char *system_id = "";
+	const char *tmp;
+	const __be32 *lp_index_ptr;
+	unsigned int lp_index = 0;
+
+	seq_printf(m, "%s %s\n", MODULE_NAME, MODULE_VERS);
+
+	rootdn = of_find_node_by_path("/");
+	if (rootdn) {
+		tmp = of_get_property(rootdn, "model", NULL);
+		if (tmp)
+			model = tmp;
+		tmp = of_get_property(rootdn, "system-id", NULL);
+		if (tmp)
+			system_id = tmp;
+		lp_index_ptr = of_get_property(rootdn, "ibm,partition-no",
+					NULL);
+		if (lp_index_ptr)
+			lp_index = be32_to_cpup(lp_index_ptr);
+		of_node_put(rootdn);
+	}
+	seq_printf(m, "serial_number=%s\n", system_id);
+	seq_printf(m, "system_type=%s\n", model);
+	seq_printf(m, "partition_id=%d\n", (int)lp_index);
+
+	return pseries_lparcfg_data(m, v);
+}
+
+static int lparcfg_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, lparcfg_data, NULL);
+}
+
+static const struct proc_ops lparcfg_proc_ops = {
+	.proc_read	= seq_read,
+	.proc_write	= lparcfg_write,
+	.proc_open	= lparcfg_open,
+	.proc_release	= single_release,
+	.proc_lseek	= seq_lseek,
+};
+
+static int __init lparcfg_init(void)
+{
+	umode_t mode = 0444;
+
+	/* Allow writing if we have FW_FEATURE_SPLPAR */
+	if (firmware_has_feature(FW_FEATURE_SPLPAR))
+		mode |= 0200;
+
+	if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_proc_ops)) {
+		printk(KERN_ERR "Failed to create powerpc/lparcfg\n");
+		return -EIO;
+	}
+	return 0;
+}
+machine_device_initcall(pseries, lparcfg_init);
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
new file mode 100644
index 0000000000..0161226d8f
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -0,0 +1,830 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Support for Partition Mobility/Migration
+ *
+ * Copyright (C) 2010 Nathan Fontenot
+ * Copyright (C) 2010 IBM Corporation
+ */
+
+
+#define pr_fmt(fmt) "mobility: " fmt
+
+#include <linux/cpu.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/nmi.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/stat.h>
+#include <linux/stop_machine.h>
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/stringify.h>
+
+#include <asm/machdep.h>
+#include <asm/nmi.h>
+#include <asm/rtas.h>
+#include "pseries.h"
+#include "vas.h"	/* vas_migration_handler() */
+#include "../../kernel/cacheinfo.h"
+
+static struct kobject *mobility_kobj;
+
+struct update_props_workarea {
+	__be32 phandle;
+	__be32 state;
+	__be64 reserved;
+	__be32 nprops;
+} __packed;
+
+#define NODE_ACTION_MASK	0xff000000
+#define NODE_COUNT_MASK		0x00ffffff
+
+#define DELETE_DT_NODE	0x01000000
+#define UPDATE_DT_NODE	0x02000000
+#define ADD_DT_NODE	0x03000000
+
+#define MIGRATION_SCOPE	(1)
+#define PRRN_SCOPE -2
+
+#ifdef CONFIG_PPC_WATCHDOG
+static unsigned int nmi_wd_lpm_factor = 200;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table nmi_wd_lpm_factor_ctl_table[] = {
+	{
+		.procname	= "nmi_wd_lpm_factor",
+		.data		= &nmi_wd_lpm_factor,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_douintvec_minmax,
+	},
+	{}
+};
+
+static int __init register_nmi_wd_lpm_factor_sysctl(void)
+{
+	register_sysctl("kernel", nmi_wd_lpm_factor_ctl_table);
+
+	return 0;
+}
+device_initcall(register_nmi_wd_lpm_factor_sysctl);
+#endif /* CONFIG_SYSCTL */
+#endif /* CONFIG_PPC_WATCHDOG */
+
+static int mobility_rtas_call(int token, char *buf, s32 scope)
+{
+	int rc;
+
+	spin_lock(&rtas_data_buf_lock);
+
+	memcpy(rtas_data_buf, buf, RTAS_DATA_BUF_SIZE);
+	rc = rtas_call(token, 2, 1, NULL, rtas_data_buf, scope);
+	memcpy(buf, rtas_data_buf, RTAS_DATA_BUF_SIZE);
+
+	spin_unlock(&rtas_data_buf_lock);
+	return rc;
+}
+
+static int delete_dt_node(struct device_node *dn)
+{
+	struct device_node *pdn;
+	bool is_platfac;
+
+	pdn = of_get_parent(dn);
+	is_platfac = of_node_is_type(dn, "ibm,platform-facilities") ||
+		     of_node_is_type(pdn, "ibm,platform-facilities");
+	of_node_put(pdn);
+
+	/*
+	 * The drivers that bind to nodes in the platform-facilities
+	 * hierarchy don't support node removal, and the removal directive
+	 * from firmware is always followed by an add of an equivalent
+	 * node. The capability (e.g. RNG, encryption, compression)
+	 * represented by the node is never interrupted by the migration.
+	 * So ignore changes to this part of the tree.
+	 */
+	if (is_platfac) {
+		pr_notice("ignoring remove operation for %pOFfp\n", dn);
+		return 0;
+	}
+
+	pr_debug("removing node %pOFfp\n", dn);
+	dlpar_detach_node(dn);
+	return 0;
+}
+
+static int update_dt_property(struct device_node *dn, struct property **prop,
+			      const char *name, u32 vd, char *value)
+{
+	struct property *new_prop = *prop;
+	int more = 0;
+
+	/* A negative 'vd' value indicates that only part of the new property
+	 * value is contained in the buffer and we need to call
+	 * ibm,update-properties again to get the rest of the value.
+	 *
+	 * A negative value is also the two's compliment of the actual value.
+	 */
+	if (vd & 0x80000000) {
+		vd = ~vd + 1;
+		more = 1;
+	}
+
+	if (new_prop) {
+		/* partial property fixup */
+		char *new_data = kzalloc(new_prop->length + vd, GFP_KERNEL);
+		if (!new_data)
+			return -ENOMEM;
+
+		memcpy(new_data, new_prop->value, new_prop->length);
+		memcpy(new_data + new_prop->length, value, vd);
+
+		kfree(new_prop->value);
+		new_prop->value = new_data;
+		new_prop->length += vd;
+	} else {
+		new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+		if (!new_prop)
+			return -ENOMEM;
+
+		new_prop->name = kstrdup(name, GFP_KERNEL);
+		if (!new_prop->name) {
+			kfree(new_prop);
+			return -ENOMEM;
+		}
+
+		new_prop->length = vd;
+		new_prop->value = kzalloc(new_prop->length, GFP_KERNEL);
+		if (!new_prop->value) {
+			kfree(new_prop->name);
+			kfree(new_prop);
+			return -ENOMEM;
+		}
+
+		memcpy(new_prop->value, value, vd);
+		*prop = new_prop;
+	}
+
+	if (!more) {
+		pr_debug("updating node %pOF property %s\n", dn, name);
+		of_update_property(dn, new_prop);
+		*prop = NULL;
+	}
+
+	return 0;
+}
+
+static int update_dt_node(struct device_node *dn, s32 scope)
+{
+	struct update_props_workarea *upwa;
+	struct property *prop = NULL;
+	int i, rc, rtas_rc;
+	char *prop_data;
+	char *rtas_buf;
+	int update_properties_token;
+	u32 nprops;
+	u32 vd;
+
+	update_properties_token = rtas_function_token(RTAS_FN_IBM_UPDATE_PROPERTIES);
+	if (update_properties_token == RTAS_UNKNOWN_SERVICE)
+		return -EINVAL;
+
+	rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+	if (!rtas_buf)
+		return -ENOMEM;
+
+	upwa = (struct update_props_workarea *)&rtas_buf[0];
+	upwa->phandle = cpu_to_be32(dn->phandle);
+
+	do {
+		rtas_rc = mobility_rtas_call(update_properties_token, rtas_buf,
+					scope);
+		if (rtas_rc < 0)
+			break;
+
+		prop_data = rtas_buf + sizeof(*upwa);
+		nprops = be32_to_cpu(upwa->nprops);
+
+		/* On the first call to ibm,update-properties for a node the
+		 * first property value descriptor contains an empty
+		 * property name, the property value length encoded as u32,
+		 * and the property value is the node path being updated.
+		 */
+		if (*prop_data == 0) {
+			prop_data++;
+			vd = be32_to_cpu(*(__be32 *)prop_data);
+			prop_data += vd + sizeof(vd);
+			nprops--;
+		}
+
+		for (i = 0; i < nprops; i++) {
+			char *prop_name;
+
+			prop_name = prop_data;
+			prop_data += strlen(prop_name) + 1;
+			vd = be32_to_cpu(*(__be32 *)prop_data);
+			prop_data += sizeof(vd);
+
+			switch (vd) {
+			case 0x00000000:
+				/* name only property, nothing to do */
+				break;
+
+			case 0x80000000:
+				of_remove_property(dn, of_find_property(dn,
+							prop_name, NULL));
+				prop = NULL;
+				break;
+
+			default:
+				rc = update_dt_property(dn, &prop, prop_name,
+							vd, prop_data);
+				if (rc) {
+					pr_err("updating %s property failed: %d\n",
+					       prop_name, rc);
+				}
+
+				prop_data += vd;
+				break;
+			}
+
+			cond_resched();
+		}
+
+		cond_resched();
+	} while (rtas_rc == 1);
+
+	kfree(rtas_buf);
+	return 0;
+}
+
+static int add_dt_node(struct device_node *parent_dn, __be32 drc_index)
+{
+	struct device_node *dn;
+	int rc;
+
+	dn = dlpar_configure_connector(drc_index, parent_dn);
+	if (!dn)
+		return -ENOENT;
+
+	/*
+	 * Since delete_dt_node() ignores this node type, this is the
+	 * necessary counterpart. We also know that a platform-facilities
+	 * node returned from dlpar_configure_connector() has children
+	 * attached, and dlpar_attach_node() only adds the parent, leaking
+	 * the children. So ignore these on the add side for now.
+	 */
+	if (of_node_is_type(dn, "ibm,platform-facilities")) {
+		pr_notice("ignoring add operation for %pOF\n", dn);
+		dlpar_free_cc_nodes(dn);
+		return 0;
+	}
+
+	rc = dlpar_attach_node(dn, parent_dn);
+	if (rc)
+		dlpar_free_cc_nodes(dn);
+
+	pr_debug("added node %pOFfp\n", dn);
+
+	return rc;
+}
+
+static int pseries_devicetree_update(s32 scope)
+{
+	char *rtas_buf;
+	__be32 *data;
+	int update_nodes_token;
+	int rc;
+
+	update_nodes_token = rtas_function_token(RTAS_FN_IBM_UPDATE_NODES);
+	if (update_nodes_token == RTAS_UNKNOWN_SERVICE)
+		return 0;
+
+	rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+	if (!rtas_buf)
+		return -ENOMEM;
+
+	do {
+		rc = mobility_rtas_call(update_nodes_token, rtas_buf, scope);
+		if (rc && rc != 1)
+			break;
+
+		data = (__be32 *)rtas_buf + 4;
+		while (be32_to_cpu(*data) & NODE_ACTION_MASK) {
+			int i;
+			u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK;
+			u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK;
+
+			data++;
+
+			for (i = 0; i < node_count; i++) {
+				struct device_node *np;
+				__be32 phandle = *data++;
+				__be32 drc_index;
+
+				np = of_find_node_by_phandle(be32_to_cpu(phandle));
+				if (!np) {
+					pr_warn("Failed lookup: phandle 0x%x for action 0x%x\n",
+						be32_to_cpu(phandle), action);
+					continue;
+				}
+
+				switch (action) {
+				case DELETE_DT_NODE:
+					delete_dt_node(np);
+					break;
+				case UPDATE_DT_NODE:
+					update_dt_node(np, scope);
+					break;
+				case ADD_DT_NODE:
+					drc_index = *data++;
+					add_dt_node(np, drc_index);
+					break;
+				}
+
+				of_node_put(np);
+				cond_resched();
+			}
+		}
+
+		cond_resched();
+	} while (rc == 1);
+
+	kfree(rtas_buf);
+	return rc;
+}
+
+void post_mobility_fixup(void)
+{
+	int rc;
+
+	rtas_activate_firmware();
+
+	/*
+	 * We don't want CPUs to go online/offline while the device
+	 * tree is being updated.
+	 */
+	cpus_read_lock();
+
+	/*
+	 * It's common for the destination firmware to replace cache
+	 * nodes.  Release all of the cacheinfo hierarchy's references
+	 * before updating the device tree.
+	 */
+	cacheinfo_teardown();
+
+	rc = pseries_devicetree_update(MIGRATION_SCOPE);
+	if (rc)
+		pr_err("device tree update failed: %d\n", rc);
+
+	cacheinfo_rebuild();
+
+	cpus_read_unlock();
+
+	/* Possibly switch to a new L1 flush type */
+	pseries_setup_security_mitigations();
+
+	/* Reinitialise system information for hv-24x7 */
+	read_24x7_sys_info();
+
+	return;
+}
+
+static int poll_vasi_state(u64 handle, unsigned long *res)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long hvrc;
+	int ret;
+
+	hvrc = plpar_hcall(H_VASI_STATE, retbuf, handle);
+	switch (hvrc) {
+	case H_SUCCESS:
+		ret = 0;
+		*res = retbuf[0];
+		break;
+	case H_PARAMETER:
+		ret = -EINVAL;
+		break;
+	case H_FUNCTION:
+		ret = -EOPNOTSUPP;
+		break;
+	case H_HARDWARE:
+	default:
+		pr_err("unexpected H_VASI_STATE result %ld\n", hvrc);
+		ret = -EIO;
+		break;
+	}
+	return ret;
+}
+
+static int wait_for_vasi_session_suspending(u64 handle)
+{
+	unsigned long state;
+	int ret;
+
+	/*
+	 * Wait for transition from H_VASI_ENABLED to
+	 * H_VASI_SUSPENDING. Treat anything else as an error.
+	 */
+	while (true) {
+		ret = poll_vasi_state(handle, &state);
+
+		if (ret != 0 || state == H_VASI_SUSPENDING) {
+			break;
+		} else if (state == H_VASI_ENABLED) {
+			ssleep(1);
+		} else {
+			pr_err("unexpected H_VASI_STATE result %lu\n", state);
+			ret = -EIO;
+			break;
+		}
+	}
+
+	/*
+	 * Proceed even if H_VASI_STATE is unavailable. If H_JOIN or
+	 * ibm,suspend-me are also unimplemented, we'll recover then.
+	 */
+	if (ret == -EOPNOTSUPP)
+		ret = 0;
+
+	return ret;
+}
+
+static void wait_for_vasi_session_completed(u64 handle)
+{
+	unsigned long state = 0;
+	int ret;
+
+	pr_info("waiting for memory transfer to complete...\n");
+
+	/*
+	 * Wait for transition from H_VASI_RESUMED to H_VASI_COMPLETED.
+	 */
+	while (true) {
+		ret = poll_vasi_state(handle, &state);
+
+		/*
+		 * If the memory transfer is already complete and the migration
+		 * has been cleaned up by the hypervisor, H_PARAMETER is return,
+		 * which is translate in EINVAL by poll_vasi_state().
+		 */
+		if (ret == -EINVAL || (!ret && state == H_VASI_COMPLETED)) {
+			pr_info("memory transfer completed.\n");
+			break;
+		}
+
+		if (ret) {
+			pr_err("H_VASI_STATE return error (%d)\n", ret);
+			break;
+		}
+
+		if (state != H_VASI_RESUMED) {
+			pr_err("unexpected H_VASI_STATE result %lu\n", state);
+			break;
+		}
+
+		msleep(500);
+	}
+}
+
+static void prod_single(unsigned int target_cpu)
+{
+	long hvrc;
+	int hwid;
+
+	hwid = get_hard_smp_processor_id(target_cpu);
+	hvrc = plpar_hcall_norets(H_PROD, hwid);
+	if (hvrc == H_SUCCESS)
+		return;
+	pr_err_ratelimited("H_PROD of CPU %u (hwid %d) error: %ld\n",
+			   target_cpu, hwid, hvrc);
+}
+
+static void prod_others(void)
+{
+	unsigned int cpu;
+
+	for_each_online_cpu(cpu) {
+		if (cpu != smp_processor_id())
+			prod_single(cpu);
+	}
+}
+
+static u16 clamp_slb_size(void)
+{
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	u16 prev = mmu_slb_size;
+
+	slb_set_size(SLB_MIN_SIZE);
+
+	return prev;
+#else
+	return 0;
+#endif
+}
+
+static int do_suspend(void)
+{
+	u16 saved_slb_size;
+	int status;
+	int ret;
+
+	pr_info("calling ibm,suspend-me on CPU %i\n", smp_processor_id());
+
+	/*
+	 * The destination processor model may have fewer SLB entries
+	 * than the source. We reduce mmu_slb_size to a safe minimum
+	 * before suspending in order to minimize the possibility of
+	 * programming non-existent entries on the destination. If
+	 * suspend fails, we restore it before returning. On success
+	 * the OF reconfig path will update it from the new device
+	 * tree after resuming on the destination.
+	 */
+	saved_slb_size = clamp_slb_size();
+
+	ret = rtas_ibm_suspend_me(&status);
+	if (ret != 0) {
+		pr_err("ibm,suspend-me error: %d\n", status);
+		slb_set_size(saved_slb_size);
+	}
+
+	return ret;
+}
+
+/**
+ * struct pseries_suspend_info - State shared between CPUs for join/suspend.
+ * @counter: Threads are to increment this upon resuming from suspend
+ *           or if an error is received from H_JOIN. The thread which performs
+ *           the first increment (i.e. sets it to 1) is responsible for
+ *           waking the other threads.
+ * @done: False if join/suspend is in progress. True if the operation is
+ *        complete (successful or not).
+ */
+struct pseries_suspend_info {
+	atomic_t counter;
+	bool done;
+};
+
+static int do_join(void *arg)
+{
+	struct pseries_suspend_info *info = arg;
+	atomic_t *counter = &info->counter;
+	long hvrc;
+	int ret;
+
+retry:
+	/* Must ensure MSR.EE off for H_JOIN. */
+	hard_irq_disable();
+	hvrc = plpar_hcall_norets(H_JOIN);
+
+	switch (hvrc) {
+	case H_CONTINUE:
+		/*
+		 * All other CPUs are offline or in H_JOIN. This CPU
+		 * attempts the suspend.
+		 */
+		ret = do_suspend();
+		break;
+	case H_SUCCESS:
+		/*
+		 * The suspend is complete and this cpu has received a
+		 * prod, or we've received a stray prod from unrelated
+		 * code (e.g. paravirt spinlocks) and we need to join
+		 * again.
+		 *
+		 * This barrier orders the return from H_JOIN above vs
+		 * the load of info->done. It pairs with the barrier
+		 * in the wakeup/prod path below.
+		 */
+		smp_mb();
+		if (READ_ONCE(info->done) == false) {
+			pr_info_ratelimited("premature return from H_JOIN on CPU %i, retrying",
+					    smp_processor_id());
+			goto retry;
+		}
+		ret = 0;
+		break;
+	case H_BAD_MODE:
+	case H_HARDWARE:
+	default:
+		ret = -EIO;
+		pr_err_ratelimited("H_JOIN error %ld on CPU %i\n",
+				   hvrc, smp_processor_id());
+		break;
+	}
+
+	if (atomic_inc_return(counter) == 1) {
+		pr_info("CPU %u waking all threads\n", smp_processor_id());
+		WRITE_ONCE(info->done, true);
+		/*
+		 * This barrier orders the store to info->done vs subsequent
+		 * H_PRODs to wake the other CPUs. It pairs with the barrier
+		 * in the H_SUCCESS case above.
+		 */
+		smp_mb();
+		prod_others();
+	}
+	/*
+	 * Execution may have been suspended for several seconds, so reset
+	 * the watchdogs. touch_nmi_watchdog() also touches the soft lockup
+	 * watchdog.
+	 */
+	rcu_cpu_stall_reset();
+	touch_nmi_watchdog();
+
+	return ret;
+}
+
+/*
+ * Abort reason code byte 0. We use only the 'Migrating partition' value.
+ */
+enum vasi_aborting_entity {
+	ORCHESTRATOR        = 1,
+	VSP_SOURCE          = 2,
+	PARTITION_FIRMWARE  = 3,
+	PLATFORM_FIRMWARE   = 4,
+	VSP_TARGET          = 5,
+	MIGRATING_PARTITION = 6,
+};
+
+static void pseries_cancel_migration(u64 handle, int err)
+{
+	u32 reason_code;
+	u32 detail;
+	u8 entity;
+	long hvrc;
+
+	entity = MIGRATING_PARTITION;
+	detail = abs(err) & 0xffffff;
+	reason_code = (entity << 24) | detail;
+
+	hvrc = plpar_hcall_norets(H_VASI_SIGNAL, handle,
+				  H_VASI_SIGNAL_CANCEL, reason_code);
+	if (hvrc)
+		pr_err("H_VASI_SIGNAL error: %ld\n", hvrc);
+}
+
+static int pseries_suspend(u64 handle)
+{
+	const unsigned int max_attempts = 5;
+	unsigned int retry_interval_ms = 1;
+	unsigned int attempt = 1;
+	int ret;
+
+	while (true) {
+		struct pseries_suspend_info info;
+		unsigned long vasi_state;
+		int vasi_err;
+
+		info = (struct pseries_suspend_info) {
+			.counter = ATOMIC_INIT(0),
+			.done = false,
+		};
+
+		ret = stop_machine(do_join, &info, cpu_online_mask);
+		if (ret == 0)
+			break;
+		/*
+		 * Encountered an error. If the VASI stream is still
+		 * in Suspending state, it's likely a transient
+		 * condition related to some device in the partition
+		 * and we can retry in the hope that the cause has
+		 * cleared after some delay.
+		 *
+		 * A better design would allow drivers etc to prepare
+		 * for the suspend and avoid conditions which prevent
+		 * the suspend from succeeding. For now, we have this
+		 * mitigation.
+		 */
+		pr_notice("Partition suspend attempt %u of %u error: %d\n",
+			  attempt, max_attempts, ret);
+
+		if (attempt == max_attempts)
+			break;
+
+		vasi_err = poll_vasi_state(handle, &vasi_state);
+		if (vasi_err == 0) {
+			if (vasi_state != H_VASI_SUSPENDING) {
+				pr_notice("VASI state %lu after failed suspend\n",
+					  vasi_state);
+				break;
+			}
+		} else if (vasi_err != -EOPNOTSUPP) {
+			pr_err("VASI state poll error: %d", vasi_err);
+			break;
+		}
+
+		pr_notice("Will retry partition suspend after %u ms\n",
+			  retry_interval_ms);
+
+		msleep(retry_interval_ms);
+		retry_interval_ms *= 10;
+		attempt++;
+	}
+
+	return ret;
+}
+
+static int pseries_migrate_partition(u64 handle)
+{
+	int ret;
+	unsigned int factor = 0;
+
+#ifdef CONFIG_PPC_WATCHDOG
+	factor = nmi_wd_lpm_factor;
+#endif
+	/*
+	 * When the migration is initiated, the hypervisor changes VAS
+	 * mappings to prepare before OS gets the notification and
+	 * closes all VAS windows. NX generates continuous faults during
+	 * this time and the user space can not differentiate these
+	 * faults from the migration event. So reduce this time window
+	 * by closing VAS windows at the beginning of this function.
+	 */
+	vas_migration_handler(VAS_SUSPEND);
+
+	ret = wait_for_vasi_session_suspending(handle);
+	if (ret)
+		goto out;
+
+	if (factor)
+		watchdog_hardlockup_set_timeout_pct(factor);
+
+	ret = pseries_suspend(handle);
+	if (ret == 0) {
+		post_mobility_fixup();
+		/*
+		 * Wait until the memory transfer is complete, so that the user
+		 * space process returns from the syscall after the transfer is
+		 * complete. This allows the user hooks to be executed at the
+		 * right time.
+		 */
+		wait_for_vasi_session_completed(handle);
+	} else
+		pseries_cancel_migration(handle, ret);
+
+	if (factor)
+		watchdog_hardlockup_set_timeout_pct(0);
+
+out:
+	vas_migration_handler(VAS_RESUME);
+
+	return ret;
+}
+
+int rtas_syscall_dispatch_ibm_suspend_me(u64 handle)
+{
+	return pseries_migrate_partition(handle);
+}
+
+static ssize_t migration_store(const struct class *class,
+			       const struct class_attribute *attr, const char *buf,
+			       size_t count)
+{
+	u64 streamid;
+	int rc;
+
+	rc = kstrtou64(buf, 0, &streamid);
+	if (rc)
+		return rc;
+
+	rc = pseries_migrate_partition(streamid);
+	if (rc)
+		return rc;
+
+	return count;
+}
+
+/*
+ * Used by drmgr to determine the kernel behavior of the migration interface.
+ *
+ * Version 1: Performs all PAPR requirements for migration including
+ *	firmware activation and device tree update.
+ */
+#define MIGRATION_API_VERSION	1
+
+static CLASS_ATTR_WO(migration);
+static CLASS_ATTR_STRING(api_version, 0444, __stringify(MIGRATION_API_VERSION));
+
+static int __init mobility_sysfs_init(void)
+{
+	int rc;
+
+	mobility_kobj = kobject_create_and_add("mobility", kernel_kobj);
+	if (!mobility_kobj)
+		return -ENOMEM;
+
+	rc = sysfs_create_file(mobility_kobj, &class_attr_migration.attr);
+	if (rc)
+		pr_err("unable to create migration sysfs file (%d)\n", rc);
+
+	rc = sysfs_create_file(mobility_kobj, &class_attr_api_version.attr.attr);
+	if (rc)
+		pr_err("unable to create api_version sysfs file (%d)\n", rc);
+
+	return 0;
+}
+machine_device_initcall(pseries, mobility_sysfs_init);
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
new file mode 100644
index 0000000000..423ee1d5bd
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -0,0 +1,698 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2006 Jake Moilanen <moilanen@austin.ibm.com>, IBM Corp.
+ * Copyright 2006-2007 Michael Ellerman, IBM Corp.
+ */
+
+#include <linux/crash_dump.h>
+#include <linux/device.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/msi.h>
+
+#include <asm/rtas.h>
+#include <asm/hw_irq.h>
+#include <asm/ppc-pci.h>
+#include <asm/machdep.h>
+#include <asm/xive.h>
+
+#include "pseries.h"
+
+static int query_token, change_token;
+
+#define RTAS_QUERY_FN		0
+#define RTAS_CHANGE_FN		1
+#define RTAS_RESET_FN		2
+#define RTAS_CHANGE_MSI_FN	3
+#define RTAS_CHANGE_MSIX_FN	4
+#define RTAS_CHANGE_32MSI_FN	5
+
+/* RTAS Helpers */
+
+static int rtas_change_msi(struct pci_dn *pdn, u32 func, u32 num_irqs)
+{
+	u32 addr, seq_num, rtas_ret[3];
+	unsigned long buid;
+	int rc;
+
+	addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+	buid = pdn->phb->buid;
+
+	seq_num = 1;
+	do {
+		if (func == RTAS_CHANGE_MSI_FN || func == RTAS_CHANGE_MSIX_FN ||
+		    func == RTAS_CHANGE_32MSI_FN)
+			rc = rtas_call(change_token, 6, 4, rtas_ret, addr,
+					BUID_HI(buid), BUID_LO(buid),
+					func, num_irqs, seq_num);
+		else
+			rc = rtas_call(change_token, 6, 3, rtas_ret, addr,
+					BUID_HI(buid), BUID_LO(buid),
+					func, num_irqs, seq_num);
+
+		seq_num = rtas_ret[1];
+	} while (rtas_busy_delay(rc));
+
+	/*
+	 * If the RTAS call succeeded, return the number of irqs allocated.
+	 * If not, make sure we return a negative error code.
+	 */
+	if (rc == 0)
+		rc = rtas_ret[0];
+	else if (rc > 0)
+		rc = -rc;
+
+	pr_debug("rtas_msi: ibm,change_msi(func=%d,num=%d), got %d rc = %d\n",
+		 func, num_irqs, rtas_ret[0], rc);
+
+	return rc;
+}
+
+static void rtas_disable_msi(struct pci_dev *pdev)
+{
+	struct pci_dn *pdn;
+
+	pdn = pci_get_pdn(pdev);
+	if (!pdn)
+		return;
+
+	/*
+	 * disabling MSI with the explicit interface also disables MSI-X
+	 */
+	if (rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, 0) != 0) {
+		/* 
+		 * may have failed because explicit interface is not
+		 * present
+		 */
+		if (rtas_change_msi(pdn, RTAS_CHANGE_FN, 0) != 0) {
+			pr_debug("rtas_msi: Setting MSIs to 0 failed!\n");
+		}
+	}
+}
+
+static int rtas_query_irq_number(struct pci_dn *pdn, int offset)
+{
+	u32 addr, rtas_ret[2];
+	unsigned long buid;
+	int rc;
+
+	addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+	buid = pdn->phb->buid;
+
+	do {
+		rc = rtas_call(query_token, 4, 3, rtas_ret, addr,
+			       BUID_HI(buid), BUID_LO(buid), offset);
+	} while (rtas_busy_delay(rc));
+
+	if (rc) {
+		pr_debug("rtas_msi: error (%d) querying source number\n", rc);
+		return rc;
+	}
+
+	return rtas_ret[0];
+}
+
+static int check_req(struct pci_dev *pdev, int nvec, char *prop_name)
+{
+	struct device_node *dn;
+	const __be32 *p;
+	u32 req_msi;
+
+	dn = pci_device_to_OF_node(pdev);
+
+	p = of_get_property(dn, prop_name, NULL);
+	if (!p) {
+		pr_debug("rtas_msi: No %s on %pOF\n", prop_name, dn);
+		return -ENOENT;
+	}
+
+	req_msi = be32_to_cpup(p);
+	if (req_msi < nvec) {
+		pr_debug("rtas_msi: %s requests < %d MSIs\n", prop_name, nvec);
+
+		if (req_msi == 0) /* Be paranoid */
+			return -ENOSPC;
+
+		return req_msi;
+	}
+
+	return 0;
+}
+
+static int check_req_msi(struct pci_dev *pdev, int nvec)
+{
+	return check_req(pdev, nvec, "ibm,req#msi");
+}
+
+static int check_req_msix(struct pci_dev *pdev, int nvec)
+{
+	return check_req(pdev, nvec, "ibm,req#msi-x");
+}
+
+/* Quota calculation */
+
+static struct device_node *__find_pe_total_msi(struct device_node *node, int *total)
+{
+	struct device_node *dn;
+	const __be32 *p;
+
+	dn = of_node_get(node);
+	while (dn) {
+		p = of_get_property(dn, "ibm,pe-total-#msi", NULL);
+		if (p) {
+			pr_debug("rtas_msi: found prop on dn %pOF\n",
+				dn);
+			*total = be32_to_cpup(p);
+			return dn;
+		}
+
+		dn = of_get_next_parent(dn);
+	}
+
+	return NULL;
+}
+
+static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
+{
+	return __find_pe_total_msi(pci_device_to_OF_node(dev), total);
+}
+
+static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
+{
+	struct device_node *dn;
+	struct eeh_dev *edev;
+
+	/* Found our PE and assume 8 at that point. */
+
+	dn = pci_device_to_OF_node(dev);
+	if (!dn)
+		return NULL;
+
+	/* Get the top level device in the PE */
+	edev = pdn_to_eeh_dev(PCI_DN(dn));
+	if (edev->pe)
+		edev = list_first_entry(&edev->pe->edevs, struct eeh_dev,
+					entry);
+	dn = pci_device_to_OF_node(edev->pdev);
+	if (!dn)
+		return NULL;
+
+	/* We actually want the parent */
+	dn = of_get_parent(dn);
+	if (!dn)
+		return NULL;
+
+	/* Hardcode of 8 for old firmwares */
+	*total = 8;
+	pr_debug("rtas_msi: using PE dn %pOF\n", dn);
+
+	return dn;
+}
+
+struct msi_counts {
+	struct device_node *requestor;
+	int num_devices;
+	int request;
+	int quota;
+	int spare;
+	int over_quota;
+};
+
+static void *count_non_bridge_devices(struct device_node *dn, void *data)
+{
+	struct msi_counts *counts = data;
+	const __be32 *p;
+	u32 class;
+
+	pr_debug("rtas_msi: counting %pOF\n", dn);
+
+	p = of_get_property(dn, "class-code", NULL);
+	class = p ? be32_to_cpup(p) : 0;
+
+	if ((class >> 8) != PCI_CLASS_BRIDGE_PCI)
+		counts->num_devices++;
+
+	return NULL;
+}
+
+static void *count_spare_msis(struct device_node *dn, void *data)
+{
+	struct msi_counts *counts = data;
+	const __be32 *p;
+	int req;
+
+	if (dn == counts->requestor)
+		req = counts->request;
+	else {
+		/* We don't know if a driver will try to use MSI or MSI-X,
+		 * so we just have to punt and use the larger of the two. */
+		req = 0;
+		p = of_get_property(dn, "ibm,req#msi", NULL);
+		if (p)
+			req = be32_to_cpup(p);
+
+		p = of_get_property(dn, "ibm,req#msi-x", NULL);
+		if (p)
+			req = max(req, (int)be32_to_cpup(p));
+	}
+
+	if (req < counts->quota)
+		counts->spare += counts->quota - req;
+	else if (req > counts->quota)
+		counts->over_quota++;
+
+	return NULL;
+}
+
+static int msi_quota_for_device(struct pci_dev *dev, int request)
+{
+	struct device_node *pe_dn;
+	struct msi_counts counts;
+	int total;
+
+	pr_debug("rtas_msi: calc quota for %s, request %d\n", pci_name(dev),
+		  request);
+
+	pe_dn = find_pe_total_msi(dev, &total);
+	if (!pe_dn)
+		pe_dn = find_pe_dn(dev, &total);
+
+	if (!pe_dn) {
+		pr_err("rtas_msi: couldn't find PE for %s\n", pci_name(dev));
+		goto out;
+	}
+
+	pr_debug("rtas_msi: found PE %pOF\n", pe_dn);
+
+	memset(&counts, 0, sizeof(struct msi_counts));
+
+	/* Work out how many devices we have below this PE */
+	pci_traverse_device_nodes(pe_dn, count_non_bridge_devices, &counts);
+
+	if (counts.num_devices == 0) {
+		pr_err("rtas_msi: found 0 devices under PE for %s\n",
+			pci_name(dev));
+		goto out;
+	}
+
+	counts.quota = total / counts.num_devices;
+	if (request <= counts.quota)
+		goto out;
+
+	/* else, we have some more calculating to do */
+	counts.requestor = pci_device_to_OF_node(dev);
+	counts.request = request;
+	pci_traverse_device_nodes(pe_dn, count_spare_msis, &counts);
+
+	/* If the quota isn't an integer multiple of the total, we can
+	 * use the remainder as spare MSIs for anyone that wants them. */
+	counts.spare += total % counts.num_devices;
+
+	/* Divide any spare by the number of over-quota requestors */
+	if (counts.over_quota)
+		counts.quota += counts.spare / counts.over_quota;
+
+	/* And finally clamp the request to the possibly adjusted quota */
+	request = min(counts.quota, request);
+
+	pr_debug("rtas_msi: request clamped to quota %d\n", request);
+out:
+	of_node_put(pe_dn);
+
+	return request;
+}
+
+static void rtas_hack_32bit_msi_gen2(struct pci_dev *pdev)
+{
+	u32 addr_hi, addr_lo;
+
+	/*
+	 * We should only get in here for IODA1 configs. This is based on the
+	 * fact that we using RTAS for MSIs, we don't have the 32 bit MSI RTAS
+	 * support, and we are in a PCIe Gen2 slot.
+	 */
+	dev_info(&pdev->dev,
+		 "rtas_msi: No 32 bit MSI firmware support, forcing 32 bit MSI\n");
+	pci_read_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, &addr_hi);
+	addr_lo = 0xffff0000 | ((addr_hi >> (48 - 32)) << 4);
+	pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, addr_lo);
+	pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, 0);
+}
+
+static int rtas_prepare_msi_irqs(struct pci_dev *pdev, int nvec_in, int type,
+				 msi_alloc_info_t *arg)
+{
+	struct pci_dn *pdn;
+	int quota, rc;
+	int nvec = nvec_in;
+	int use_32bit_msi_hack = 0;
+
+	if (type == PCI_CAP_ID_MSIX)
+		rc = check_req_msix(pdev, nvec);
+	else
+		rc = check_req_msi(pdev, nvec);
+
+	if (rc)
+		return rc;
+
+	quota = msi_quota_for_device(pdev, nvec);
+
+	if (quota && quota < nvec)
+		return quota;
+
+	/*
+	 * Firmware currently refuse any non power of two allocation
+	 * so we round up if the quota will allow it.
+	 */
+	if (type == PCI_CAP_ID_MSIX) {
+		int m = roundup_pow_of_two(nvec);
+		quota = msi_quota_for_device(pdev, m);
+
+		if (quota >= m)
+			nvec = m;
+	}
+
+	pdn = pci_get_pdn(pdev);
+
+	/*
+	 * Try the new more explicit firmware interface, if that fails fall
+	 * back to the old interface. The old interface is known to never
+	 * return MSI-Xs.
+	 */
+again:
+	if (type == PCI_CAP_ID_MSI) {
+		if (pdev->no_64bit_msi) {
+			rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSI_FN, nvec);
+			if (rc < 0) {
+				/*
+				 * We only want to run the 32 bit MSI hack below if
+				 * the max bus speed is Gen2 speed
+				 */
+				if (pdev->bus->max_bus_speed != PCIE_SPEED_5_0GT)
+					return rc;
+
+				use_32bit_msi_hack = 1;
+			}
+		} else
+			rc = -1;
+
+		if (rc < 0)
+			rc = rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, nvec);
+
+		if (rc < 0) {
+			pr_debug("rtas_msi: trying the old firmware call.\n");
+			rc = rtas_change_msi(pdn, RTAS_CHANGE_FN, nvec);
+		}
+
+		if (use_32bit_msi_hack && rc > 0)
+			rtas_hack_32bit_msi_gen2(pdev);
+	} else
+		rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec);
+
+	if (rc != nvec) {
+		if (nvec != nvec_in) {
+			nvec = nvec_in;
+			goto again;
+		}
+		pr_debug("rtas_msi: rtas_change_msi() failed\n");
+		return rc;
+	}
+
+	return 0;
+}
+
+static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev,
+				   int nvec, msi_alloc_info_t *arg)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int type = pdev->msix_enabled ? PCI_CAP_ID_MSIX : PCI_CAP_ID_MSI;
+
+	return rtas_prepare_msi_irqs(pdev, nvec, type, arg);
+}
+
+/*
+ * ->msi_free() is called before irq_domain_free_irqs_top() when the
+ * handler data is still available. Use that to clear the XIVE
+ * controller data.
+ */
+static void pseries_msi_ops_msi_free(struct irq_domain *domain,
+				     struct msi_domain_info *info,
+				     unsigned int irq)
+{
+	if (xive_enabled())
+		xive_irq_free_data(irq);
+}
+
+/*
+ * RTAS can not disable one MSI at a time. It's all or nothing. Do it
+ * at the end after all IRQs have been freed.
+ */
+static void pseries_msi_post_free(struct irq_domain *domain, struct device *dev)
+{
+	if (WARN_ON_ONCE(!dev_is_pci(dev)))
+		return;
+
+	rtas_disable_msi(to_pci_dev(dev));
+}
+
+static struct msi_domain_ops pseries_pci_msi_domain_ops = {
+	.msi_prepare	= pseries_msi_ops_prepare,
+	.msi_free	= pseries_msi_ops_msi_free,
+	.msi_post_free	= pseries_msi_post_free,
+};
+
+static void pseries_msi_shutdown(struct irq_data *d)
+{
+	d = d->parent_data;
+	if (d->chip->irq_shutdown)
+		d->chip->irq_shutdown(d);
+}
+
+static void pseries_msi_mask(struct irq_data *d)
+{
+	pci_msi_mask_irq(d);
+	irq_chip_mask_parent(d);
+}
+
+static void pseries_msi_unmask(struct irq_data *d)
+{
+	pci_msi_unmask_irq(d);
+	irq_chip_unmask_parent(d);
+}
+
+static void pseries_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct msi_desc *entry = irq_data_get_msi_desc(data);
+
+	/*
+	 * Do not update the MSIx vector table. It's not strictly necessary
+	 * because the table is initialized by the underlying hypervisor, PowerVM
+	 * or QEMU/KVM. However, if the MSIx vector entry is cleared, any further
+	 * activation will fail. This can happen in some drivers (eg. IPR) which
+	 * deactivate an IRQ used for testing MSI support.
+	 */
+	entry->msg = *msg;
+}
+
+static struct irq_chip pseries_pci_msi_irq_chip = {
+	.name		= "pSeries-PCI-MSI",
+	.irq_shutdown	= pseries_msi_shutdown,
+	.irq_mask	= pseries_msi_mask,
+	.irq_unmask	= pseries_msi_unmask,
+	.irq_eoi	= irq_chip_eoi_parent,
+	.irq_write_msi_msg	= pseries_msi_write_msg,
+};
+
+
+/*
+ * Set MSI_FLAG_MSIX_CONTIGUOUS as there is no way to express to
+ * firmware to request a discontiguous or non-zero based range of
+ * MSI-X entries. Core code will reject such setup attempts.
+ */
+static struct msi_domain_info pseries_msi_domain_info = {
+	.flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		  MSI_FLAG_MULTI_PCI_MSI  | MSI_FLAG_PCI_MSIX |
+		  MSI_FLAG_MSIX_CONTIGUOUS),
+	.ops   = &pseries_pci_msi_domain_ops,
+	.chip  = &pseries_pci_msi_irq_chip,
+};
+
+static void pseries_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	__pci_read_msi_msg(irq_data_get_msi_desc(data), msg);
+}
+
+static struct irq_chip pseries_msi_irq_chip = {
+	.name			= "pSeries-MSI",
+	.irq_shutdown		= pseries_msi_shutdown,
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+	.irq_compose_msi_msg	= pseries_msi_compose_msg,
+};
+
+static int pseries_irq_parent_domain_alloc(struct irq_domain *domain, unsigned int virq,
+					   irq_hw_number_t hwirq)
+{
+	struct irq_fwspec parent_fwspec;
+	int ret;
+
+	parent_fwspec.fwnode = domain->parent->fwnode;
+	parent_fwspec.param_count = 2;
+	parent_fwspec.param[0] = hwirq;
+	parent_fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int pseries_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				    unsigned int nr_irqs, void *arg)
+{
+	struct pci_controller *phb = domain->host_data;
+	msi_alloc_info_t *info = arg;
+	struct msi_desc *desc = info->desc;
+	struct pci_dev *pdev = msi_desc_to_pci_dev(desc);
+	int hwirq;
+	int i, ret;
+
+	hwirq = rtas_query_irq_number(pci_get_pdn(pdev), desc->msi_index);
+	if (hwirq < 0) {
+		dev_err(&pdev->dev, "Failed to query HW IRQ: %d\n", hwirq);
+		return hwirq;
+	}
+
+	dev_dbg(&pdev->dev, "%s bridge %pOF %d/%x #%d\n", __func__,
+		phb->dn, virq, hwirq, nr_irqs);
+
+	for (i = 0; i < nr_irqs; i++) {
+		ret = pseries_irq_parent_domain_alloc(domain, virq + i, hwirq + i);
+		if (ret)
+			goto out;
+
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &pseries_msi_irq_chip, domain->host_data);
+	}
+
+	return 0;
+
+out:
+	/* TODO: handle RTAS cleanup in ->msi_finish() ? */
+	irq_domain_free_irqs_parent(domain, virq, i - 1);
+	return ret;
+}
+
+static void pseries_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+				    unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct pci_controller *phb = irq_data_get_irq_chip_data(d);
+
+	pr_debug("%s bridge %pOF %d #%d\n", __func__, phb->dn, virq, nr_irqs);
+
+	/* XIVE domain data is cleared through ->msi_free() */
+}
+
+static const struct irq_domain_ops pseries_irq_domain_ops = {
+	.alloc  = pseries_irq_domain_alloc,
+	.free   = pseries_irq_domain_free,
+};
+
+static int __pseries_msi_allocate_domains(struct pci_controller *phb,
+					  unsigned int count)
+{
+	struct irq_domain *parent = irq_get_default_host();
+
+	phb->fwnode = irq_domain_alloc_named_id_fwnode("pSeries-MSI",
+						       phb->global_number);
+	if (!phb->fwnode)
+		return -ENOMEM;
+
+	phb->dev_domain = irq_domain_create_hierarchy(parent, 0, count,
+						      phb->fwnode,
+						      &pseries_irq_domain_ops, phb);
+	if (!phb->dev_domain) {
+		pr_err("PCI: failed to create IRQ domain bridge %pOF (domain %d)\n",
+		       phb->dn, phb->global_number);
+		irq_domain_free_fwnode(phb->fwnode);
+		return -ENOMEM;
+	}
+
+	phb->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(phb->dn),
+						    &pseries_msi_domain_info,
+						    phb->dev_domain);
+	if (!phb->msi_domain) {
+		pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n",
+		       phb->dn, phb->global_number);
+		irq_domain_free_fwnode(phb->fwnode);
+		irq_domain_remove(phb->dev_domain);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+int pseries_msi_allocate_domains(struct pci_controller *phb)
+{
+	int count;
+
+	if (!__find_pe_total_msi(phb->dn, &count)) {
+		pr_err("PCI: failed to find MSIs for bridge %pOF (domain %d)\n",
+		       phb->dn, phb->global_number);
+		return -ENOSPC;
+	}
+
+	return __pseries_msi_allocate_domains(phb, count);
+}
+
+void pseries_msi_free_domains(struct pci_controller *phb)
+{
+	if (phb->msi_domain)
+		irq_domain_remove(phb->msi_domain);
+	if (phb->dev_domain)
+		irq_domain_remove(phb->dev_domain);
+	if (phb->fwnode)
+		irq_domain_free_fwnode(phb->fwnode);
+}
+
+static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev)
+{
+	/* No LSI -> leave MSIs (if any) configured */
+	if (!pdev->irq) {
+		dev_dbg(&pdev->dev, "rtas_msi: no LSI, nothing to do.\n");
+		return;
+	}
+
+	/* No MSI -> MSIs can't have been assigned by fw, leave LSI */
+	if (check_req_msi(pdev, 1) && check_req_msix(pdev, 1)) {
+		dev_dbg(&pdev->dev, "rtas_msi: no req#msi/x, nothing to do.\n");
+		return;
+	}
+
+	dev_dbg(&pdev->dev, "rtas_msi: disabling existing MSI.\n");
+	rtas_disable_msi(pdev);
+}
+
+static int rtas_msi_init(void)
+{
+	query_token  = rtas_function_token(RTAS_FN_IBM_QUERY_INTERRUPT_SOURCE_NUMBER);
+	change_token = rtas_function_token(RTAS_FN_IBM_CHANGE_MSI);
+
+	if ((query_token == RTAS_UNKNOWN_SERVICE) ||
+			(change_token == RTAS_UNKNOWN_SERVICE)) {
+		pr_debug("rtas_msi: no RTAS tokens, no MSI support.\n");
+		return -1;
+	}
+
+	pr_debug("rtas_msi: Registering RTAS MSI callbacks.\n");
+
+	WARN_ON(ppc_md.pci_irq_fixup);
+	ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup;
+
+	return 0;
+}
+machine_arch_initcall(pseries, rtas_msi_init);
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
new file mode 100644
index 0000000000..8130c37962
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  c 2001 PPC 64 Team, IBM Corp
+ *
+ * /dev/nvram driver for PPC64
+ */
+
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/uaccess.h>
+#include <linux/of.h>
+#include <asm/nvram.h>
+#include <asm/rtas.h>
+#include <asm/machdep.h>
+
+/* Max bytes to read/write in one go */
+#define NVRW_CNT 0x20
+
+static unsigned int nvram_size;
+static int nvram_fetch, nvram_store;
+static char nvram_buf[NVRW_CNT];	/* assume this is in the first 4GB */
+static DEFINE_SPINLOCK(nvram_lock);
+
+/* See clobbering_unread_rtas_event() */
+#define NVRAM_RTAS_READ_TIMEOUT 5		/* seconds */
+static time64_t last_unread_rtas_event;		/* timestamp */
+
+#ifdef CONFIG_PSTORE
+time64_t last_rtas_event;
+#endif
+
+static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
+{
+	unsigned int i;
+	unsigned long len;
+	int done;
+	unsigned long flags;
+	char *p = buf;
+
+
+	if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	if (*index >= nvram_size)
+		return 0;
+
+	i = *index;
+	if (i + count > nvram_size)
+		count = nvram_size - i;
+
+	spin_lock_irqsave(&nvram_lock, flags);
+
+	for (; count != 0; count -= len) {
+		len = count;
+		if (len > NVRW_CNT)
+			len = NVRW_CNT;
+		
+		if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf),
+			       len) != 0) || len != done) {
+			spin_unlock_irqrestore(&nvram_lock, flags);
+			return -EIO;
+		}
+		
+		memcpy(p, nvram_buf, len);
+
+		p += len;
+		i += len;
+	}
+
+	spin_unlock_irqrestore(&nvram_lock, flags);
+	
+	*index = i;
+	return p - buf;
+}
+
+static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index)
+{
+	unsigned int i;
+	unsigned long len;
+	int done;
+	unsigned long flags;
+	const char *p = buf;
+
+	if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	if (*index >= nvram_size)
+		return 0;
+
+	i = *index;
+	if (i + count > nvram_size)
+		count = nvram_size - i;
+
+	spin_lock_irqsave(&nvram_lock, flags);
+
+	for (; count != 0; count -= len) {
+		len = count;
+		if (len > NVRW_CNT)
+			len = NVRW_CNT;
+
+		memcpy(nvram_buf, p, len);
+
+		if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf),
+			       len) != 0) || len != done) {
+			spin_unlock_irqrestore(&nvram_lock, flags);
+			return -EIO;
+		}
+		
+		p += len;
+		i += len;
+	}
+	spin_unlock_irqrestore(&nvram_lock, flags);
+	
+	*index = i;
+	return p - buf;
+}
+
+static ssize_t pSeries_nvram_get_size(void)
+{
+	return nvram_size ? nvram_size : -ENODEV;
+}
+
+/* nvram_write_error_log
+ *
+ * We need to buffer the error logs into nvram to ensure that we have
+ * the failure information to decode.
+ */
+int nvram_write_error_log(char * buff, int length,
+                          unsigned int err_type, unsigned int error_log_cnt)
+{
+	int rc = nvram_write_os_partition(&rtas_log_partition, buff, length,
+						err_type, error_log_cnt);
+	if (!rc) {
+		last_unread_rtas_event = ktime_get_real_seconds();
+#ifdef CONFIG_PSTORE
+		last_rtas_event = ktime_get_real_seconds();
+#endif
+	}
+
+	return rc;
+}
+
+/* nvram_read_error_log
+ *
+ * Reads nvram for error log for at most 'length'
+ */
+int nvram_read_error_log(char *buff, int length,
+			unsigned int *err_type, unsigned int *error_log_cnt)
+{
+	return nvram_read_partition(&rtas_log_partition, buff, length,
+						err_type, error_log_cnt);
+}
+
+/* This doesn't actually zero anything, but it sets the event_logged
+ * word to tell that this event is safely in syslog.
+ */
+int nvram_clear_error_log(void)
+{
+	loff_t tmp_index;
+	int clear_word = ERR_FLAG_ALREADY_LOGGED;
+	int rc;
+
+	if (rtas_log_partition.index == -1)
+		return -1;
+
+	tmp_index = rtas_log_partition.index;
+	
+	rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
+	if (rc <= 0) {
+		printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
+		return rc;
+	}
+	last_unread_rtas_event = 0;
+
+	return 0;
+}
+
+/*
+ * Are we using the ibm,rtas-log for oops/panic reports?  And if so,
+ * would logging this oops/panic overwrite an RTAS event that rtas_errd
+ * hasn't had a chance to read and process?  Return 1 if so, else 0.
+ *
+ * We assume that if rtas_errd hasn't read the RTAS event in
+ * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
+ */
+int clobbering_unread_rtas_event(void)
+{
+	return (oops_log_partition.index == rtas_log_partition.index
+		&& last_unread_rtas_event
+		&& ktime_get_real_seconds() - last_unread_rtas_event <=
+						NVRAM_RTAS_READ_TIMEOUT);
+}
+
+static int __init pseries_nvram_init_log_partitions(void)
+{
+	int rc;
+
+	/* Scan nvram for partitions */
+	nvram_scan_partitions();
+
+	rc = nvram_init_os_partition(&rtas_log_partition);
+	nvram_init_oops_partition(rc == 0);
+	return 0;
+}
+machine_arch_initcall(pseries, pseries_nvram_init_log_partitions);
+
+int __init pSeries_nvram_init(void)
+{
+	struct device_node *nvram;
+	const __be32 *nbytes_p;
+	unsigned int proplen;
+
+	nvram = of_find_node_by_type(NULL, "nvram");
+	if (nvram == NULL)
+		return -ENODEV;
+
+	nbytes_p = of_get_property(nvram, "#bytes", &proplen);
+	if (nbytes_p == NULL || proplen != sizeof(unsigned int)) {
+		of_node_put(nvram);
+		return -EIO;
+	}
+
+	nvram_size = be32_to_cpup(nbytes_p);
+
+	nvram_fetch = rtas_function_token(RTAS_FN_NVRAM_FETCH);
+	nvram_store = rtas_function_token(RTAS_FN_NVRAM_STORE);
+	printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size);
+	of_node_put(nvram);
+
+	ppc_md.nvram_read	= pSeries_nvram_read;
+	ppc_md.nvram_write	= pSeries_nvram_write;
+	ppc_md.nvram_size	= pSeries_nvram_get_size;
+
+	return 0;
+}
+
diff --git a/arch/powerpc/platforms/pseries/of_helpers.c b/arch/powerpc/platforms/pseries/of_helpers.c
new file mode 100644
index 0000000000..23241c71ef
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/of_helpers.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/string.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <asm/prom.h>
+
+#include "of_helpers.h"
+
+/**
+ * pseries_of_derive_parent - basically like dirname(1)
+ * @path:  the full_name of a node to be added to the tree
+ *
+ * Returns the node which should be the parent of the node
+ * described by path.  E.g., for path = "/foo/bar", returns
+ * the node with full_name = "/foo".
+ */
+struct device_node *pseries_of_derive_parent(const char *path)
+{
+	struct device_node *parent;
+	char *parent_path = "/";
+	const char *tail;
+
+	/* We do not want the trailing '/' character */
+	tail = kbasename(path) - 1;
+
+	/* reject if path is "/" */
+	if (!strcmp(path, "/"))
+		return ERR_PTR(-EINVAL);
+
+	if (tail > path) {
+		parent_path = kstrndup(path, tail - path, GFP_KERNEL);
+		if (!parent_path)
+			return ERR_PTR(-ENOMEM);
+	}
+	parent = of_find_node_by_path(parent_path);
+	if (strcmp(parent_path, "/"))
+		kfree(parent_path);
+	return parent ? parent : ERR_PTR(-EINVAL);
+}
+
+
+/* Helper Routines to convert between drc_index to cpu numbers */
+
+int of_read_drc_info_cell(struct property **prop, const __be32 **curval,
+			struct of_drc_info *data)
+{
+	const char *p = (char *)(*curval);
+	const __be32 *p2;
+
+	if (!data)
+		return -EINVAL;
+
+	/* Get drc-type:encode-string */
+	data->drc_type = (char *)p;
+	p = of_prop_next_string(*prop, p);
+	if (!p)
+		return -EINVAL;
+
+	/* Get drc-name-prefix:encode-string */
+	data->drc_name_prefix = (char *)p;
+	p = of_prop_next_string(*prop, p);
+	if (!p)
+		return -EINVAL;
+
+	/* Get drc-index-start:encode-int */
+	p2 = (const __be32 *)p;
+	data->drc_index_start = be32_to_cpu(*p2);
+
+	/* Get drc-name-suffix-start:encode-int */
+	p2 = of_prop_next_u32(*prop, p2, &data->drc_name_suffix_start);
+	if (!p2)
+		return -EINVAL;
+
+	/* Get number-sequential-elements:encode-int */
+	p2 = of_prop_next_u32(*prop, p2, &data->num_sequential_elems);
+	if (!p2)
+		return -EINVAL;
+
+	/* Get sequential-increment:encode-int */
+	p2 = of_prop_next_u32(*prop, p2, &data->sequential_inc);
+	if (!p2)
+		return -EINVAL;
+
+	/* Get drc-power-domain:encode-int */
+	p2 = of_prop_next_u32(*prop, p2, &data->drc_power_domain);
+	if (!p2)
+		return -EINVAL;
+
+	/* Should now know end of current entry */
+	(*curval) = (void *)(++p2);
+	data->last_drc_index = data->drc_index_start +
+		((data->num_sequential_elems - 1) * data->sequential_inc);
+
+	return 0;
+}
+EXPORT_SYMBOL(of_read_drc_info_cell);
diff --git a/arch/powerpc/platforms/pseries/of_helpers.h b/arch/powerpc/platforms/pseries/of_helpers.h
new file mode 100644
index 0000000000..decad6553d
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/of_helpers.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PSERIES_OF_HELPERS_H
+#define _PSERIES_OF_HELPERS_H
+
+#include <linux/of.h>
+
+struct device_node *pseries_of_derive_parent(const char *path);
+
+#endif /* _PSERIES_OF_HELPERS_H */
diff --git a/arch/powerpc/platforms/pseries/papr-sysparm.c b/arch/powerpc/platforms/pseries/papr-sysparm.c
new file mode 100644
index 0000000000..fedc61599e
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr-sysparm.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt)	"papr-sysparm: " fmt
+
+#include <linux/bug.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <asm/rtas.h>
+#include <asm/papr-sysparm.h>
+#include <asm/rtas-work-area.h>
+
+struct papr_sysparm_buf *papr_sysparm_buf_alloc(void)
+{
+	struct papr_sysparm_buf *buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+
+	return buf;
+}
+
+void papr_sysparm_buf_free(struct papr_sysparm_buf *buf)
+{
+	kfree(buf);
+}
+
+/**
+ * papr_sysparm_get() - Retrieve the value of a PAPR system parameter.
+ * @param: PAPR system parameter token as described in
+ *         7.3.16 "System Parameters Option".
+ * @buf: A &struct papr_sysparm_buf as returned from papr_sysparm_buf_alloc().
+ *
+ * Place the result of querying the specified parameter, if available,
+ * in @buf. The result includes a be16 length header followed by the
+ * value, which may be a string or binary data. See &struct papr_sysparm_buf.
+ *
+ * Since there is at least one parameter (60, OS Service Entitlement
+ * Status) where the results depend on the incoming contents of the
+ * work area, the caller-supplied buffer is copied unmodified into the
+ * work area before calling ibm,get-system-parameter.
+ *
+ * A defined parameter may not be implemented on a given system, and
+ * some implemented parameters may not be available to all partitions
+ * on a system. A parameter's disposition may change at any time due
+ * to system configuration changes or partition migration.
+ *
+ * Context: This function may sleep.
+ *
+ * Return: 0 on success, -errno otherwise. @buf is unmodified on error.
+ */
+
+int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf)
+{
+	const s32 token = rtas_function_token(RTAS_FN_IBM_GET_SYSTEM_PARAMETER);
+	struct rtas_work_area *work_area;
+	s32 fwrc;
+	int ret;
+
+	might_sleep();
+
+	if (WARN_ON(!buf))
+		return -EFAULT;
+
+	if (token == RTAS_UNKNOWN_SERVICE)
+		return -ENOENT;
+
+	work_area = rtas_work_area_alloc(sizeof(*buf));
+
+	memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf));
+
+	do {
+		fwrc = rtas_call(token, 3, 1, NULL, param.token,
+				 rtas_work_area_phys(work_area),
+				 rtas_work_area_size(work_area));
+	} while (rtas_busy_delay(fwrc));
+
+	switch (fwrc) {
+	case 0:
+		ret = 0;
+		memcpy(buf, rtas_work_area_raw_buf(work_area), sizeof(*buf));
+		break;
+	case -3: /* parameter not implemented */
+		ret = -EOPNOTSUPP;
+		break;
+	case -9002: /* this partition not authorized to retrieve this parameter */
+		ret = -EPERM;
+		break;
+	case -9999: /* "parameter error" e.g. the buffer is too small */
+		ret = -EINVAL;
+		break;
+	default:
+		pr_err("unexpected ibm,get-system-parameter result %d\n", fwrc);
+		fallthrough;
+	case -1: /* Hardware/platform error */
+		ret = -EIO;
+		break;
+	}
+
+	rtas_work_area_free(work_area);
+
+	return ret;
+}
+
+int papr_sysparm_set(papr_sysparm_t param, const struct papr_sysparm_buf *buf)
+{
+	const s32 token = rtas_function_token(RTAS_FN_IBM_SET_SYSTEM_PARAMETER);
+	struct rtas_work_area *work_area;
+	s32 fwrc;
+	int ret;
+
+	might_sleep();
+
+	if (WARN_ON(!buf))
+		return -EFAULT;
+
+	if (token == RTAS_UNKNOWN_SERVICE)
+		return -ENOENT;
+
+	work_area = rtas_work_area_alloc(sizeof(*buf));
+
+	memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf));
+
+	do {
+		fwrc = rtas_call(token, 2, 1, NULL, param.token,
+				 rtas_work_area_phys(work_area));
+	} while (rtas_busy_delay(fwrc));
+
+	switch (fwrc) {
+	case 0:
+		ret = 0;
+		break;
+	case -3: /* parameter not supported */
+		ret = -EOPNOTSUPP;
+		break;
+	case -9002: /* this partition not authorized to modify this parameter */
+		ret = -EPERM;
+		break;
+	case -9999: /* "parameter error" e.g. invalid input data */
+		ret = -EINVAL;
+		break;
+	default:
+		pr_err("unexpected ibm,set-system-parameter result %d\n", fwrc);
+		fallthrough;
+	case -1: /* Hardware/platform error */
+		ret = -EIO;
+		break;
+	}
+
+	rtas_work_area_free(work_area);
+
+	return ret;
+}
diff --git a/arch/powerpc/platforms/pseries/papr_platform_attributes.c b/arch/powerpc/platforms/pseries/papr_platform_attributes.c
new file mode 100644
index 0000000000..526c621b09
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr_platform_attributes.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Platform energy and frequency attributes driver
+ *
+ * This driver creates a sys file at /sys/firmware/papr/ which encapsulates a
+ * directory structure containing files in keyword - value pairs that specify
+ * energy and frequency configuration of the system.
+ *
+ * The format of exposing the sysfs information is as follows:
+ * /sys/firmware/papr/energy_scale_info/
+ *  |-- <id>/
+ *    |-- desc
+ *    |-- value
+ *    |-- value_desc (if exists)
+ *  |-- <id>/
+ *    |-- desc
+ *    |-- value
+ *    |-- value_desc (if exists)
+ *
+ * Copyright 2022 IBM Corp.
+ */
+
+#include <asm/hvcall.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+
+#include "pseries.h"
+
+/*
+ * Flag attributes to fetch either all or one attribute from the HCALL
+ * flag = BE(0) => fetch all attributes with firstAttributeId = 0
+ * flag = BE(1) => fetch a single attribute with firstAttributeId = id
+ */
+#define ESI_FLAGS_ALL		0
+#define ESI_FLAGS_SINGLE	(1ull << 63)
+
+#define KOBJ_MAX_ATTRS		3
+
+#define ESI_HDR_SIZE		sizeof(struct h_energy_scale_info_hdr)
+#define ESI_ATTR_SIZE		sizeof(struct energy_scale_attribute)
+#define CURR_MAX_ESI_ATTRS	8
+
+struct energy_scale_attribute {
+	__be64 id;
+	__be64 val;
+	u8 desc[64];
+	u8 value_desc[64];
+} __packed;
+
+struct h_energy_scale_info_hdr {
+	__be64 num_attrs;
+	__be64 array_offset;
+	u8 data_header_version;
+} __packed;
+
+struct papr_attr {
+	u64 id;
+	struct kobj_attribute kobj_attr;
+};
+
+struct papr_group {
+	struct attribute_group pg;
+	struct papr_attr pgattrs[KOBJ_MAX_ATTRS];
+};
+
+static struct papr_group *papr_groups;
+/* /sys/firmware/papr */
+static struct kobject *papr_kobj;
+/* /sys/firmware/papr/energy_scale_info */
+static struct kobject *esi_kobj;
+
+/*
+ * Energy modes can change dynamically hence making a new hcall each time the
+ * information needs to be retrieved
+ */
+static int papr_get_attr(u64 id, struct energy_scale_attribute *esi)
+{
+	int esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * ESI_ATTR_SIZE);
+	int ret, max_esi_attrs = CURR_MAX_ESI_ATTRS;
+	struct energy_scale_attribute *curr_esi;
+	struct h_energy_scale_info_hdr *hdr;
+	char *buf;
+
+	buf = kmalloc(esi_buf_size, GFP_KERNEL);
+	if (buf == NULL)
+		return -ENOMEM;
+
+retry:
+	ret = plpar_hcall_norets(H_GET_ENERGY_SCALE_INFO, ESI_FLAGS_SINGLE,
+				 id, virt_to_phys(buf),
+				 esi_buf_size);
+
+	/*
+	 * If the hcall fails with not enough memory for either the
+	 * header or data, attempt to allocate more
+	 */
+	if (ret == H_PARTIAL || ret == H_P4) {
+		char *temp_buf;
+
+		max_esi_attrs += 4;
+		esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs);
+
+		temp_buf = krealloc(buf, esi_buf_size, GFP_KERNEL);
+		if (temp_buf)
+			buf = temp_buf;
+		else
+			return -ENOMEM;
+
+		goto retry;
+	}
+
+	if (ret != H_SUCCESS) {
+		pr_warn("hcall failed: H_GET_ENERGY_SCALE_INFO");
+		ret = -EIO;
+		goto out_buf;
+	}
+
+	hdr = (struct h_energy_scale_info_hdr *) buf;
+	curr_esi = (struct energy_scale_attribute *)
+		(buf + be64_to_cpu(hdr->array_offset));
+
+	if (esi_buf_size <
+	    be64_to_cpu(hdr->array_offset) + (be64_to_cpu(hdr->num_attrs)
+	    * sizeof(struct energy_scale_attribute))) {
+		ret = -EIO;
+		goto out_buf;
+	}
+
+	*esi = *curr_esi;
+
+out_buf:
+	kfree(buf);
+
+	return ret;
+}
+
+/*
+ * Extract and export the description of the energy scale attributes
+ */
+static ssize_t desc_show(struct kobject *kobj,
+			  struct kobj_attribute *kobj_attr,
+			  char *buf)
+{
+	struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr,
+					       kobj_attr);
+	struct energy_scale_attribute esi;
+	int ret;
+
+	ret = papr_get_attr(pattr->id, &esi);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%s\n", esi.desc);
+}
+
+/*
+ * Extract and export the numeric value of the energy scale attributes
+ */
+static ssize_t val_show(struct kobject *kobj,
+			 struct kobj_attribute *kobj_attr,
+			 char *buf)
+{
+	struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr,
+					       kobj_attr);
+	struct energy_scale_attribute esi;
+	int ret;
+
+	ret = papr_get_attr(pattr->id, &esi);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%llu\n", be64_to_cpu(esi.val));
+}
+
+/*
+ * Extract and export the value description in string format of the energy
+ * scale attributes
+ */
+static ssize_t val_desc_show(struct kobject *kobj,
+			      struct kobj_attribute *kobj_attr,
+			      char *buf)
+{
+	struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr,
+					       kobj_attr);
+	struct energy_scale_attribute esi;
+	int ret;
+
+	ret = papr_get_attr(pattr->id, &esi);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%s\n", esi.value_desc);
+}
+
+static struct papr_ops_info {
+	const char *attr_name;
+	ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *kobj_attr,
+			char *buf);
+} ops_info[KOBJ_MAX_ATTRS] = {
+	{ "desc", desc_show },
+	{ "value", val_show },
+	{ "value_desc", val_desc_show },
+};
+
+static void add_attr(u64 id, int index, struct papr_attr *attr)
+{
+	attr->id = id;
+	sysfs_attr_init(&attr->kobj_attr.attr);
+	attr->kobj_attr.attr.name = ops_info[index].attr_name;
+	attr->kobj_attr.attr.mode = 0444;
+	attr->kobj_attr.show = ops_info[index].show;
+}
+
+static int add_attr_group(u64 id, struct papr_group *pg, bool show_val_desc)
+{
+	int i;
+
+	for (i = 0; i < KOBJ_MAX_ATTRS; i++) {
+		if (!strcmp(ops_info[i].attr_name, "value_desc") &&
+		    !show_val_desc) {
+			continue;
+		}
+		add_attr(id, i, &pg->pgattrs[i]);
+		pg->pg.attrs[i] = &pg->pgattrs[i].kobj_attr.attr;
+	}
+
+	return sysfs_create_group(esi_kobj, &pg->pg);
+}
+
+
+static int __init papr_init(void)
+{
+	int esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * ESI_ATTR_SIZE);
+	int ret, idx, i, max_esi_attrs = CURR_MAX_ESI_ATTRS;
+	struct h_energy_scale_info_hdr *esi_hdr;
+	struct energy_scale_attribute *esi_attrs;
+	uint64_t num_attrs;
+	char *esi_buf;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR) ||
+	    !firmware_has_feature(FW_FEATURE_ENERGY_SCALE_INFO)) {
+		return -ENXIO;
+	}
+
+	esi_buf = kmalloc(esi_buf_size, GFP_KERNEL);
+	if (esi_buf == NULL)
+		return -ENOMEM;
+	/*
+	 * hcall(
+	 * uint64 H_GET_ENERGY_SCALE_INFO,  // Get energy scale info
+	 * uint64 flags,            // Per the flag request
+	 * uint64 firstAttributeId, // The attribute id
+	 * uint64 bufferAddress,    // Guest physical address of the output buffer
+	 * uint64 bufferSize);      // The size in bytes of the output buffer
+	 */
+retry:
+
+	ret = plpar_hcall_norets(H_GET_ENERGY_SCALE_INFO, ESI_FLAGS_ALL, 0,
+				 virt_to_phys(esi_buf), esi_buf_size);
+
+	/*
+	 * If the hcall fails with not enough memory for either the
+	 * header or data, attempt to allocate more
+	 */
+	if (ret == H_PARTIAL || ret == H_P4) {
+		char *temp_esi_buf;
+
+		max_esi_attrs += 4;
+		esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs);
+
+		temp_esi_buf = krealloc(esi_buf, esi_buf_size, GFP_KERNEL);
+		if (temp_esi_buf)
+			esi_buf = temp_esi_buf;
+		else
+			return -ENOMEM;
+
+		goto retry;
+	}
+
+	if (ret != H_SUCCESS) {
+		pr_warn("hcall failed: H_GET_ENERGY_SCALE_INFO, ret: %d\n", ret);
+		goto out_free_esi_buf;
+	}
+
+	esi_hdr = (struct h_energy_scale_info_hdr *) esi_buf;
+	num_attrs = be64_to_cpu(esi_hdr->num_attrs);
+	esi_attrs = (struct energy_scale_attribute *)
+		    (esi_buf + be64_to_cpu(esi_hdr->array_offset));
+
+	if (esi_buf_size <
+	    be64_to_cpu(esi_hdr->array_offset) +
+	    (num_attrs * sizeof(struct energy_scale_attribute))) {
+		goto out_free_esi_buf;
+	}
+
+	papr_groups = kcalloc(num_attrs, sizeof(*papr_groups), GFP_KERNEL);
+	if (!papr_groups)
+		goto out_free_esi_buf;
+
+	papr_kobj = kobject_create_and_add("papr", firmware_kobj);
+	if (!papr_kobj) {
+		pr_warn("kobject_create_and_add papr failed\n");
+		goto out_papr_groups;
+	}
+
+	esi_kobj = kobject_create_and_add("energy_scale_info", papr_kobj);
+	if (!esi_kobj) {
+		pr_warn("kobject_create_and_add energy_scale_info failed\n");
+		goto out_kobj;
+	}
+
+	/* Allocate the groups before registering */
+	for (idx = 0; idx < num_attrs; idx++) {
+		papr_groups[idx].pg.attrs = kcalloc(KOBJ_MAX_ATTRS + 1,
+					    sizeof(*papr_groups[idx].pg.attrs),
+					    GFP_KERNEL);
+		if (!papr_groups[idx].pg.attrs)
+			goto out_pgattrs;
+
+		papr_groups[idx].pg.name = kasprintf(GFP_KERNEL, "%lld",
+					     be64_to_cpu(esi_attrs[idx].id));
+		if (papr_groups[idx].pg.name == NULL)
+			goto out_pgattrs;
+	}
+
+	for (idx = 0; idx < num_attrs; idx++) {
+		bool show_val_desc = true;
+
+		/* Do not add the value desc attr if it does not exist */
+		if (strnlen(esi_attrs[idx].value_desc,
+			    sizeof(esi_attrs[idx].value_desc)) == 0)
+			show_val_desc = false;
+
+		if (add_attr_group(be64_to_cpu(esi_attrs[idx].id),
+				   &papr_groups[idx],
+				   show_val_desc)) {
+			pr_warn("Failed to create papr attribute group %s\n",
+				papr_groups[idx].pg.name);
+			idx = num_attrs;
+			goto out_pgattrs;
+		}
+	}
+
+	kfree(esi_buf);
+	return 0;
+out_pgattrs:
+	for (i = 0; i < idx ; i++) {
+		kfree(papr_groups[i].pg.attrs);
+		kfree(papr_groups[i].pg.name);
+	}
+	kobject_put(esi_kobj);
+out_kobj:
+	kobject_put(papr_kobj);
+out_papr_groups:
+	kfree(papr_groups);
+out_free_esi_buf:
+	kfree(esi_buf);
+
+	return -ENOMEM;
+}
+
+machine_device_initcall(pseries, papr_init);
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
new file mode 100644
index 0000000000..1a53e048ce
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -0,0 +1,1581 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define pr_fmt(fmt)	"papr-scm: " fmt
+
+#include <linux/of.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/ndctl.h>
+#include <linux/sched.h>
+#include <linux/libnvdimm.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/seq_buf.h>
+#include <linux/nd.h>
+
+#include <asm/plpar_wrappers.h>
+#include <asm/papr_pdsm.h>
+#include <asm/mce.h>
+#include <asm/unaligned.h>
+#include <linux/perf_event.h>
+
+#define BIND_ANY_ADDR (~0ul)
+
+#define PAPR_SCM_DIMM_CMD_MASK \
+	((1ul << ND_CMD_GET_CONFIG_SIZE) | \
+	 (1ul << ND_CMD_GET_CONFIG_DATA) | \
+	 (1ul << ND_CMD_SET_CONFIG_DATA) | \
+	 (1ul << ND_CMD_CALL))
+
+/* DIMM health bitmap indicators */
+/* SCM device is unable to persist memory contents */
+#define PAPR_PMEM_UNARMED                   (1ULL << (63 - 0))
+/* SCM device failed to persist memory contents */
+#define PAPR_PMEM_SHUTDOWN_DIRTY            (1ULL << (63 - 1))
+/* SCM device contents are persisted from previous IPL */
+#define PAPR_PMEM_SHUTDOWN_CLEAN            (1ULL << (63 - 2))
+/* SCM device contents are not persisted from previous IPL */
+#define PAPR_PMEM_EMPTY                     (1ULL << (63 - 3))
+/* SCM device memory life remaining is critically low */
+#define PAPR_PMEM_HEALTH_CRITICAL           (1ULL << (63 - 4))
+/* SCM device will be garded off next IPL due to failure */
+#define PAPR_PMEM_HEALTH_FATAL              (1ULL << (63 - 5))
+/* SCM contents cannot persist due to current platform health status */
+#define PAPR_PMEM_HEALTH_UNHEALTHY          (1ULL << (63 - 6))
+/* SCM device is unable to persist memory contents in certain conditions */
+#define PAPR_PMEM_HEALTH_NON_CRITICAL       (1ULL << (63 - 7))
+/* SCM device is encrypted */
+#define PAPR_PMEM_ENCRYPTED                 (1ULL << (63 - 8))
+/* SCM device has been scrubbed and locked */
+#define PAPR_PMEM_SCRUBBED_AND_LOCKED       (1ULL << (63 - 9))
+
+/* Bits status indicators for health bitmap indicating unarmed dimm */
+#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED |		\
+				PAPR_PMEM_HEALTH_UNHEALTHY)
+
+/* Bits status indicators for health bitmap indicating unflushed dimm */
+#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY)
+
+/* Bits status indicators for health bitmap indicating unrestored dimm */
+#define PAPR_PMEM_BAD_RESTORE_MASK  (PAPR_PMEM_EMPTY)
+
+/* Bit status indicators for smart event notification */
+#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \
+				    PAPR_PMEM_HEALTH_FATAL |	\
+				    PAPR_PMEM_HEALTH_UNHEALTHY)
+
+#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS)
+#define PAPR_SCM_PERF_STATS_VERSION 0x1
+
+/* Struct holding a single performance metric */
+struct papr_scm_perf_stat {
+	u8 stat_id[8];
+	__be64 stat_val;
+} __packed;
+
+/* Struct exchanged between kernel and PHYP for fetching drc perf stats */
+struct papr_scm_perf_stats {
+	u8 eye_catcher[8];
+	/* Should be PAPR_SCM_PERF_STATS_VERSION */
+	__be32 stats_version;
+	/* Number of stats following */
+	__be32 num_statistics;
+	/* zero or more performance matrics */
+	struct papr_scm_perf_stat scm_statistic[];
+} __packed;
+
+/* private struct associated with each region */
+struct papr_scm_priv {
+	struct platform_device *pdev;
+	struct device_node *dn;
+	uint32_t drc_index;
+	uint64_t blocks;
+	uint64_t block_size;
+	int metadata_size;
+	bool is_volatile;
+	bool hcall_flush_required;
+
+	uint64_t bound_addr;
+
+	struct nvdimm_bus_descriptor bus_desc;
+	struct nvdimm_bus *bus;
+	struct nvdimm *nvdimm;
+	struct resource res;
+	struct nd_region *region;
+	struct nd_interleave_set nd_set;
+	struct list_head region_list;
+
+	/* Protect dimm health data from concurrent read/writes */
+	struct mutex health_mutex;
+
+	/* Last time the health information of the dimm was updated */
+	unsigned long lasthealth_jiffies;
+
+	/* Health information for the dimm */
+	u64 health_bitmap;
+
+	/* Holds the last known dirty shutdown counter value */
+	u64 dirty_shutdown_counter;
+
+	/* length of the stat buffer as expected by phyp */
+	size_t stat_buffer_len;
+
+	/* The bits which needs to be overridden */
+	u64 health_bitmap_inject_mask;
+};
+
+static int papr_scm_pmem_flush(struct nd_region *nd_region,
+			       struct bio *bio __maybe_unused)
+{
+	struct papr_scm_priv *p = nd_region_provider_data(nd_region);
+	unsigned long ret_buf[PLPAR_HCALL_BUFSIZE], token = 0;
+	long rc;
+
+	dev_dbg(&p->pdev->dev, "flush drc 0x%x", p->drc_index);
+
+	do {
+		rc = plpar_hcall(H_SCM_FLUSH, ret_buf, p->drc_index, token);
+		token = ret_buf[0];
+
+		/* Check if we are stalled for some time */
+		if (H_IS_LONG_BUSY(rc)) {
+			msleep(get_longbusy_msecs(rc));
+			rc = H_BUSY;
+		} else if (rc == H_BUSY) {
+			cond_resched();
+		}
+	} while (rc == H_BUSY);
+
+	if (rc) {
+		dev_err(&p->pdev->dev, "flush error: %ld", rc);
+		rc = -EIO;
+	} else {
+		dev_dbg(&p->pdev->dev, "flush drc 0x%x complete", p->drc_index);
+	}
+
+	return rc;
+}
+
+static LIST_HEAD(papr_nd_regions);
+static DEFINE_MUTEX(papr_ndr_lock);
+
+static int drc_pmem_bind(struct papr_scm_priv *p)
+{
+	unsigned long ret[PLPAR_HCALL_BUFSIZE];
+	uint64_t saved = 0;
+	uint64_t token;
+	int64_t rc;
+
+	/*
+	 * When the hypervisor cannot map all the requested memory in a single
+	 * hcall it returns H_BUSY and we call again with the token until
+	 * we get H_SUCCESS. Aborting the retry loop before getting H_SUCCESS
+	 * leave the system in an undefined state, so we wait.
+	 */
+	token = 0;
+
+	do {
+		rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0,
+				p->blocks, BIND_ANY_ADDR, token);
+		token = ret[0];
+		if (!saved)
+			saved = ret[1];
+		cond_resched();
+	} while (rc == H_BUSY);
+
+	if (rc)
+		return rc;
+
+	p->bound_addr = saved;
+	dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n",
+		p->drc_index, (unsigned long)saved);
+	return rc;
+}
+
+static void drc_pmem_unbind(struct papr_scm_priv *p)
+{
+	unsigned long ret[PLPAR_HCALL_BUFSIZE];
+	uint64_t token = 0;
+	int64_t rc;
+
+	dev_dbg(&p->pdev->dev, "unbind drc 0x%x\n", p->drc_index);
+
+	/* NB: unbind has the same retry requirements as drc_pmem_bind() */
+	do {
+
+		/* Unbind of all SCM resources associated with drcIndex */
+		rc = plpar_hcall(H_SCM_UNBIND_ALL, ret, H_UNBIND_SCOPE_DRC,
+				 p->drc_index, token);
+		token = ret[0];
+
+		/* Check if we are stalled for some time */
+		if (H_IS_LONG_BUSY(rc)) {
+			msleep(get_longbusy_msecs(rc));
+			rc = H_BUSY;
+		} else if (rc == H_BUSY) {
+			cond_resched();
+		}
+
+	} while (rc == H_BUSY);
+
+	if (rc)
+		dev_err(&p->pdev->dev, "unbind error: %lld\n", rc);
+	else
+		dev_dbg(&p->pdev->dev, "unbind drc 0x%x complete\n",
+			p->drc_index);
+
+	return;
+}
+
+static int drc_pmem_query_n_bind(struct papr_scm_priv *p)
+{
+	unsigned long start_addr;
+	unsigned long end_addr;
+	unsigned long ret[PLPAR_HCALL_BUFSIZE];
+	int64_t rc;
+
+
+	rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret,
+			 p->drc_index, 0);
+	if (rc)
+		goto err_out;
+	start_addr = ret[0];
+
+	/* Make sure the full region is bound. */
+	rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret,
+			 p->drc_index, p->blocks - 1);
+	if (rc)
+		goto err_out;
+	end_addr = ret[0];
+
+	if ((end_addr - start_addr) != ((p->blocks - 1) * p->block_size))
+		goto err_out;
+
+	p->bound_addr = start_addr;
+	dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n", p->drc_index, start_addr);
+	return rc;
+
+err_out:
+	dev_info(&p->pdev->dev,
+		 "Failed to query, trying an unbind followed by bind");
+	drc_pmem_unbind(p);
+	return drc_pmem_bind(p);
+}
+
+/*
+ * Query the Dimm performance stats from PHYP and copy them (if returned) to
+ * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast
+ * (num_stats + header) bytes.
+ * - If buff_stats == NULL the return value is the size in bytes of the buffer
+ * needed to hold all supported performance-statistics.
+ * - If buff_stats != NULL and num_stats == 0 then we copy all known
+ * performance-statistics to 'buff_stat' and expect to be large enough to
+ * hold them.
+ * - if buff_stats != NULL and num_stats > 0 then copy the requested
+ * performance-statistics to buff_stats.
+ */
+static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
+				    struct papr_scm_perf_stats *buff_stats,
+				    unsigned int num_stats)
+{
+	unsigned long ret[PLPAR_HCALL_BUFSIZE];
+	size_t size;
+	s64 rc;
+
+	/* Setup the out buffer */
+	if (buff_stats) {
+		memcpy(buff_stats->eye_catcher,
+		       PAPR_SCM_PERF_STATS_EYECATCHER, 8);
+		buff_stats->stats_version =
+			cpu_to_be32(PAPR_SCM_PERF_STATS_VERSION);
+		buff_stats->num_statistics =
+			cpu_to_be32(num_stats);
+
+		/*
+		 * Calculate the buffer size based on num-stats provided
+		 * or use the prefetched max buffer length
+		 */
+		if (num_stats)
+			/* Calculate size from the num_stats */
+			size = sizeof(struct papr_scm_perf_stats) +
+				num_stats * sizeof(struct papr_scm_perf_stat);
+		else
+			size = p->stat_buffer_len;
+	} else {
+		/* In case of no out buffer ignore the size */
+		size = 0;
+	}
+
+	/* Do the HCALL asking PHYP for info */
+	rc = plpar_hcall(H_SCM_PERFORMANCE_STATS, ret, p->drc_index,
+			 buff_stats ? virt_to_phys(buff_stats) : 0,
+			 size);
+
+	/* Check if the error was due to an unknown stat-id */
+	if (rc == H_PARTIAL) {
+		dev_err(&p->pdev->dev,
+			"Unknown performance stats, Err:0x%016lX\n", ret[0]);
+		return -ENOENT;
+	} else if (rc == H_AUTHORITY) {
+		dev_info(&p->pdev->dev,
+			 "Permission denied while accessing performance stats");
+		return -EPERM;
+	} else if (rc == H_UNSUPPORTED) {
+		dev_dbg(&p->pdev->dev, "Performance stats unsupported\n");
+		return -EOPNOTSUPP;
+	} else if (rc != H_SUCCESS) {
+		dev_err(&p->pdev->dev,
+			"Failed to query performance stats, Err:%lld\n", rc);
+		return -EIO;
+
+	} else if (!size) {
+		/* Handle case where stat buffer size was requested */
+		dev_dbg(&p->pdev->dev,
+			"Performance stats size %ld\n", ret[0]);
+		return ret[0];
+	}
+
+	/* Successfully fetched the requested stats from phyp */
+	dev_dbg(&p->pdev->dev,
+		"Performance stats returned %d stats\n",
+		be32_to_cpu(buff_stats->num_statistics));
+	return 0;
+}
+
+#ifdef CONFIG_PERF_EVENTS
+#define to_nvdimm_pmu(_pmu)	container_of(_pmu, struct nvdimm_pmu, pmu)
+
+static const char * const nvdimm_events_map[] = {
+	[1] = "CtlResCt",
+	[2] = "CtlResTm",
+	[3] = "PonSecs ",
+	[4] = "MemLife ",
+	[5] = "CritRscU",
+	[6] = "HostLCnt",
+	[7] = "HostSCnt",
+	[8] = "HostSDur",
+	[9] = "HostLDur",
+	[10] = "MedRCnt ",
+	[11] = "MedWCnt ",
+	[12] = "MedRDur ",
+	[13] = "MedWDur ",
+	[14] = "CchRHCnt",
+	[15] = "CchWHCnt",
+	[16] = "FastWCnt",
+};
+
+static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count)
+{
+	struct papr_scm_perf_stat *stat;
+	struct papr_scm_perf_stats *stats;
+	struct papr_scm_priv *p = dev_get_drvdata(dev);
+	int rc, size;
+
+	/* Invalid eventcode */
+	if (event->attr.config == 0 || event->attr.config >= ARRAY_SIZE(nvdimm_events_map))
+		return -EINVAL;
+
+	/* Allocate request buffer enough to hold single performance stat */
+	size = sizeof(struct papr_scm_perf_stats) +
+		sizeof(struct papr_scm_perf_stat);
+
+	if (!p)
+		return -EINVAL;
+
+	stats = kzalloc(size, GFP_KERNEL);
+	if (!stats)
+		return -ENOMEM;
+
+	stat = &stats->scm_statistic[0];
+	memcpy(&stat->stat_id,
+	       nvdimm_events_map[event->attr.config],
+		sizeof(stat->stat_id));
+	stat->stat_val = 0;
+
+	rc = drc_pmem_query_stats(p, stats, 1);
+	if (rc < 0) {
+		kfree(stats);
+		return rc;
+	}
+
+	*count = be64_to_cpu(stat->stat_val);
+	kfree(stats);
+	return 0;
+}
+
+static int papr_scm_pmu_event_init(struct perf_event *event)
+{
+	struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
+	struct papr_scm_priv *p;
+
+	if (!nd_pmu)
+		return -EINVAL;
+
+	/* test the event attr type for PMU enumeration */
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* it does not support event sampling mode */
+	if (is_sampling_event(event))
+		return -EOPNOTSUPP;
+
+	/* no branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	p = (struct papr_scm_priv *)nd_pmu->dev->driver_data;
+	if (!p)
+		return -EINVAL;
+
+	/* Invalid eventcode */
+	if (event->attr.config == 0 || event->attr.config > 16)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int papr_scm_pmu_add(struct perf_event *event, int flags)
+{
+	u64 count;
+	int rc;
+	struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
+
+	if (!nd_pmu)
+		return -EINVAL;
+
+	if (flags & PERF_EF_START) {
+		rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &count);
+		if (rc)
+			return rc;
+
+		local64_set(&event->hw.prev_count, count);
+	}
+
+	return 0;
+}
+
+static void papr_scm_pmu_read(struct perf_event *event)
+{
+	u64 prev, now;
+	int rc;
+	struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
+
+	if (!nd_pmu)
+		return;
+
+	rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &now);
+	if (rc)
+		return;
+
+	prev = local64_xchg(&event->hw.prev_count, now);
+	local64_add(now - prev, &event->count);
+}
+
+static void papr_scm_pmu_del(struct perf_event *event, int flags)
+{
+	papr_scm_pmu_read(event);
+}
+
+static void papr_scm_pmu_register(struct papr_scm_priv *p)
+{
+	struct nvdimm_pmu *nd_pmu;
+	int rc, nodeid;
+
+	nd_pmu = kzalloc(sizeof(*nd_pmu), GFP_KERNEL);
+	if (!nd_pmu) {
+		rc = -ENOMEM;
+		goto pmu_err_print;
+	}
+
+	if (!p->stat_buffer_len) {
+		rc = -ENOENT;
+		goto pmu_check_events_err;
+	}
+
+	nd_pmu->pmu.task_ctx_nr = perf_invalid_context;
+	nd_pmu->pmu.name = nvdimm_name(p->nvdimm);
+	nd_pmu->pmu.event_init = papr_scm_pmu_event_init;
+	nd_pmu->pmu.read = papr_scm_pmu_read;
+	nd_pmu->pmu.add = papr_scm_pmu_add;
+	nd_pmu->pmu.del = papr_scm_pmu_del;
+
+	nd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_INTERRUPT |
+				PERF_PMU_CAP_NO_EXCLUDE;
+
+	/*updating the cpumask variable */
+	nodeid = numa_map_to_online_node(dev_to_node(&p->pdev->dev));
+	nd_pmu->arch_cpumask = *cpumask_of_node(nodeid);
+
+	rc = register_nvdimm_pmu(nd_pmu, p->pdev);
+	if (rc)
+		goto pmu_check_events_err;
+
+	/*
+	 * Set archdata.priv value to nvdimm_pmu structure, to handle the
+	 * unregistering of pmu device.
+	 */
+	p->pdev->archdata.priv = nd_pmu;
+	return;
+
+pmu_check_events_err:
+	kfree(nd_pmu);
+pmu_err_print:
+	dev_info(&p->pdev->dev, "nvdimm pmu didn't register rc=%d\n", rc);
+}
+
+#else
+static void papr_scm_pmu_register(struct papr_scm_priv *p) { }
+#endif
+
+/*
+ * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the
+ * health information.
+ */
+static int __drc_pmem_query_health(struct papr_scm_priv *p)
+{
+	unsigned long ret[PLPAR_HCALL_BUFSIZE];
+	u64 bitmap = 0;
+	long rc;
+
+	/* issue the hcall */
+	rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index);
+	if (rc == H_SUCCESS)
+		bitmap = ret[0] & ret[1];
+	else if (rc == H_FUNCTION)
+		dev_info_once(&p->pdev->dev,
+			      "Hcall H_SCM_HEALTH not implemented, assuming empty health bitmap");
+	else {
+
+		dev_err(&p->pdev->dev,
+			"Failed to query health information, Err:%ld\n", rc);
+		return -ENXIO;
+	}
+
+	p->lasthealth_jiffies = jiffies;
+	/* Allow injecting specific health bits via inject mask. */
+	if (p->health_bitmap_inject_mask)
+		bitmap = (bitmap & ~p->health_bitmap_inject_mask) |
+			p->health_bitmap_inject_mask;
+	WRITE_ONCE(p->health_bitmap, bitmap);
+	dev_dbg(&p->pdev->dev,
+		"Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n",
+		ret[0], ret[1]);
+
+	return 0;
+}
+
+/* Min interval in seconds for assuming stable dimm health */
+#define MIN_HEALTH_QUERY_INTERVAL 60
+
+/* Query cached health info and if needed call drc_pmem_query_health */
+static int drc_pmem_query_health(struct papr_scm_priv *p)
+{
+	unsigned long cache_timeout;
+	int rc;
+
+	/* Protect concurrent modifications to papr_scm_priv */
+	rc = mutex_lock_interruptible(&p->health_mutex);
+	if (rc)
+		return rc;
+
+	/* Jiffies offset for which the health data is assumed to be same */
+	cache_timeout = p->lasthealth_jiffies +
+		msecs_to_jiffies(MIN_HEALTH_QUERY_INTERVAL * 1000);
+
+	/* Fetch new health info is its older than MIN_HEALTH_QUERY_INTERVAL */
+	if (time_after(jiffies, cache_timeout))
+		rc = __drc_pmem_query_health(p);
+	else
+		/* Assume cached health data is valid */
+		rc = 0;
+
+	mutex_unlock(&p->health_mutex);
+	return rc;
+}
+
+static int papr_scm_meta_get(struct papr_scm_priv *p,
+			     struct nd_cmd_get_config_data_hdr *hdr)
+{
+	unsigned long data[PLPAR_HCALL_BUFSIZE];
+	unsigned long offset, data_offset;
+	int len, read;
+	int64_t ret;
+
+	if ((hdr->in_offset + hdr->in_length) > p->metadata_size)
+		return -EINVAL;
+
+	for (len = hdr->in_length; len; len -= read) {
+
+		data_offset = hdr->in_length - len;
+		offset = hdr->in_offset + data_offset;
+
+		if (len >= 8)
+			read = 8;
+		else if (len >= 4)
+			read = 4;
+		else if (len >= 2)
+			read = 2;
+		else
+			read = 1;
+
+		ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index,
+				  offset, read);
+
+		if (ret == H_PARAMETER) /* bad DRC index */
+			return -ENODEV;
+		if (ret)
+			return -EINVAL; /* other invalid parameter */
+
+		switch (read) {
+		case 8:
+			*(uint64_t *)(hdr->out_buf + data_offset) = be64_to_cpu(data[0]);
+			break;
+		case 4:
+			*(uint32_t *)(hdr->out_buf + data_offset) = be32_to_cpu(data[0] & 0xffffffff);
+			break;
+
+		case 2:
+			*(uint16_t *)(hdr->out_buf + data_offset) = be16_to_cpu(data[0] & 0xffff);
+			break;
+
+		case 1:
+			*(uint8_t *)(hdr->out_buf + data_offset) = (data[0] & 0xff);
+			break;
+		}
+	}
+	return 0;
+}
+
+static int papr_scm_meta_set(struct papr_scm_priv *p,
+			     struct nd_cmd_set_config_hdr *hdr)
+{
+	unsigned long offset, data_offset;
+	int len, wrote;
+	unsigned long data;
+	__be64 data_be;
+	int64_t ret;
+
+	if ((hdr->in_offset + hdr->in_length) > p->metadata_size)
+		return -EINVAL;
+
+	for (len = hdr->in_length; len; len -= wrote) {
+
+		data_offset = hdr->in_length - len;
+		offset = hdr->in_offset + data_offset;
+
+		if (len >= 8) {
+			data = *(uint64_t *)(hdr->in_buf + data_offset);
+			data_be = cpu_to_be64(data);
+			wrote = 8;
+		} else if (len >= 4) {
+			data = *(uint32_t *)(hdr->in_buf + data_offset);
+			data &= 0xffffffff;
+			data_be = cpu_to_be32(data);
+			wrote = 4;
+		} else if (len >= 2) {
+			data = *(uint16_t *)(hdr->in_buf + data_offset);
+			data &= 0xffff;
+			data_be = cpu_to_be16(data);
+			wrote = 2;
+		} else {
+			data_be = *(uint8_t *)(hdr->in_buf + data_offset);
+			data_be &= 0xff;
+			wrote = 1;
+		}
+
+		ret = plpar_hcall_norets(H_SCM_WRITE_METADATA, p->drc_index,
+					 offset, data_be, wrote);
+		if (ret == H_PARAMETER) /* bad DRC index */
+			return -ENODEV;
+		if (ret)
+			return -EINVAL; /* other invalid parameter */
+	}
+
+	return 0;
+}
+
+/*
+ * Do a sanity checks on the inputs args to dimm-control function and return
+ * '0' if valid. Validation of PDSM payloads happens later in
+ * papr_scm_service_pdsm.
+ */
+static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+			unsigned int buf_len)
+{
+	unsigned long cmd_mask = PAPR_SCM_DIMM_CMD_MASK;
+	struct nd_cmd_pkg *nd_cmd;
+	struct papr_scm_priv *p;
+	enum papr_pdsm pdsm;
+
+	/* Only dimm-specific calls are supported atm */
+	if (!nvdimm)
+		return -EINVAL;
+
+	/* get the provider data from struct nvdimm */
+	p = nvdimm_provider_data(nvdimm);
+
+	if (!test_bit(cmd, &cmd_mask)) {
+		dev_dbg(&p->pdev->dev, "Unsupported cmd=%u\n", cmd);
+		return -EINVAL;
+	}
+
+	/* For CMD_CALL verify pdsm request */
+	if (cmd == ND_CMD_CALL) {
+		/* Verify the envelope and envelop size */
+		if (!buf ||
+		    buf_len < (sizeof(struct nd_cmd_pkg) + ND_PDSM_HDR_SIZE)) {
+			dev_dbg(&p->pdev->dev, "Invalid pkg size=%u\n",
+				buf_len);
+			return -EINVAL;
+		}
+
+		/* Verify that the nd_cmd_pkg.nd_family is correct */
+		nd_cmd = (struct nd_cmd_pkg *)buf;
+
+		if (nd_cmd->nd_family != NVDIMM_FAMILY_PAPR) {
+			dev_dbg(&p->pdev->dev, "Invalid pkg family=0x%llx\n",
+				nd_cmd->nd_family);
+			return -EINVAL;
+		}
+
+		pdsm = (enum papr_pdsm)nd_cmd->nd_command;
+
+		/* Verify if the pdsm command is valid */
+		if (pdsm <= PAPR_PDSM_MIN || pdsm >= PAPR_PDSM_MAX) {
+			dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid PDSM\n",
+				pdsm);
+			return -EINVAL;
+		}
+
+		/* Have enough space to hold returned 'nd_pkg_pdsm' header */
+		if (nd_cmd->nd_size_out < ND_PDSM_HDR_SIZE) {
+			dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid payload\n",
+				pdsm);
+			return -EINVAL;
+		}
+	}
+
+	/* Let the command be further processed */
+	return 0;
+}
+
+static int papr_pdsm_fuel_gauge(struct papr_scm_priv *p,
+				union nd_pdsm_payload *payload)
+{
+	int rc, size;
+	u64 statval;
+	struct papr_scm_perf_stat *stat;
+	struct papr_scm_perf_stats *stats;
+
+	/* Silently fail if fetching performance metrics isn't  supported */
+	if (!p->stat_buffer_len)
+		return 0;
+
+	/* Allocate request buffer enough to hold single performance stat */
+	size = sizeof(struct papr_scm_perf_stats) +
+		sizeof(struct papr_scm_perf_stat);
+
+	stats = kzalloc(size, GFP_KERNEL);
+	if (!stats)
+		return -ENOMEM;
+
+	stat = &stats->scm_statistic[0];
+	memcpy(&stat->stat_id, "MemLife ", sizeof(stat->stat_id));
+	stat->stat_val = 0;
+
+	/* Fetch the fuel gauge and populate it in payload */
+	rc = drc_pmem_query_stats(p, stats, 1);
+	if (rc < 0) {
+		dev_dbg(&p->pdev->dev, "Err(%d) fetching fuel gauge\n", rc);
+		goto free_stats;
+	}
+
+	statval = be64_to_cpu(stat->stat_val);
+	dev_dbg(&p->pdev->dev,
+		"Fetched fuel-gauge %llu", statval);
+	payload->health.extension_flags |=
+		PDSM_DIMM_HEALTH_RUN_GAUGE_VALID;
+	payload->health.dimm_fuel_gauge = statval;
+
+	rc = sizeof(struct nd_papr_pdsm_health);
+
+free_stats:
+	kfree(stats);
+	return rc;
+}
+
+/* Add the dirty-shutdown-counter value to the pdsm */
+static int papr_pdsm_dsc(struct papr_scm_priv *p,
+			 union nd_pdsm_payload *payload)
+{
+	payload->health.extension_flags |= PDSM_DIMM_DSC_VALID;
+	payload->health.dimm_dsc = p->dirty_shutdown_counter;
+
+	return sizeof(struct nd_papr_pdsm_health);
+}
+
+/* Fetch the DIMM health info and populate it in provided package. */
+static int papr_pdsm_health(struct papr_scm_priv *p,
+			    union nd_pdsm_payload *payload)
+{
+	int rc;
+
+	/* Ensure dimm health mutex is taken preventing concurrent access */
+	rc = mutex_lock_interruptible(&p->health_mutex);
+	if (rc)
+		goto out;
+
+	/* Always fetch upto date dimm health data ignoring cached values */
+	rc = __drc_pmem_query_health(p);
+	if (rc) {
+		mutex_unlock(&p->health_mutex);
+		goto out;
+	}
+
+	/* update health struct with various flags derived from health bitmap */
+	payload->health = (struct nd_papr_pdsm_health) {
+		.extension_flags = 0,
+		.dimm_unarmed = !!(p->health_bitmap & PAPR_PMEM_UNARMED_MASK),
+		.dimm_bad_shutdown = !!(p->health_bitmap & PAPR_PMEM_BAD_SHUTDOWN_MASK),
+		.dimm_bad_restore = !!(p->health_bitmap & PAPR_PMEM_BAD_RESTORE_MASK),
+		.dimm_scrubbed = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED),
+		.dimm_locked = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED),
+		.dimm_encrypted = !!(p->health_bitmap & PAPR_PMEM_ENCRYPTED),
+		.dimm_health = PAPR_PDSM_DIMM_HEALTHY,
+	};
+
+	/* Update field dimm_health based on health_bitmap flags */
+	if (p->health_bitmap & PAPR_PMEM_HEALTH_FATAL)
+		payload->health.dimm_health = PAPR_PDSM_DIMM_FATAL;
+	else if (p->health_bitmap & PAPR_PMEM_HEALTH_CRITICAL)
+		payload->health.dimm_health = PAPR_PDSM_DIMM_CRITICAL;
+	else if (p->health_bitmap & PAPR_PMEM_HEALTH_UNHEALTHY)
+		payload->health.dimm_health = PAPR_PDSM_DIMM_UNHEALTHY;
+
+	/* struct populated hence can release the mutex now */
+	mutex_unlock(&p->health_mutex);
+
+	/* Populate the fuel gauge meter in the payload */
+	papr_pdsm_fuel_gauge(p, payload);
+	/* Populate the dirty-shutdown-counter field */
+	papr_pdsm_dsc(p, payload);
+
+	rc = sizeof(struct nd_papr_pdsm_health);
+
+out:
+	return rc;
+}
+
+/* Inject a smart error Add the dirty-shutdown-counter value to the pdsm */
+static int papr_pdsm_smart_inject(struct papr_scm_priv *p,
+				  union nd_pdsm_payload *payload)
+{
+	int rc;
+	u32 supported_flags = 0;
+	u64 inject_mask = 0, clear_mask = 0;
+	u64 mask;
+
+	/* Check for individual smart error flags and update inject/clear masks */
+	if (payload->smart_inject.flags & PDSM_SMART_INJECT_HEALTH_FATAL) {
+		supported_flags |= PDSM_SMART_INJECT_HEALTH_FATAL;
+		if (payload->smart_inject.fatal_enable)
+			inject_mask |= PAPR_PMEM_HEALTH_FATAL;
+		else
+			clear_mask |= PAPR_PMEM_HEALTH_FATAL;
+	}
+
+	if (payload->smart_inject.flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) {
+		supported_flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN;
+		if (payload->smart_inject.unsafe_shutdown_enable)
+			inject_mask |= PAPR_PMEM_SHUTDOWN_DIRTY;
+		else
+			clear_mask |= PAPR_PMEM_SHUTDOWN_DIRTY;
+	}
+
+	dev_dbg(&p->pdev->dev, "[Smart-inject] inject_mask=%#llx clear_mask=%#llx\n",
+		inject_mask, clear_mask);
+
+	/* Prevent concurrent access to dimm health bitmap related members */
+	rc = mutex_lock_interruptible(&p->health_mutex);
+	if (rc)
+		return rc;
+
+	/* Use inject/clear masks to set health_bitmap_inject_mask */
+	mask = READ_ONCE(p->health_bitmap_inject_mask);
+	mask = (mask & ~clear_mask) | inject_mask;
+	WRITE_ONCE(p->health_bitmap_inject_mask, mask);
+
+	/* Invalidate cached health bitmap */
+	p->lasthealth_jiffies = 0;
+
+	mutex_unlock(&p->health_mutex);
+
+	/* Return the supported flags back to userspace */
+	payload->smart_inject.flags = supported_flags;
+
+	return sizeof(struct nd_papr_pdsm_health);
+}
+
+/*
+ * 'struct pdsm_cmd_desc'
+ * Identifies supported PDSMs' expected length of in/out payloads
+ * and pdsm service function.
+ *
+ * size_in	: Size of input payload if any in the PDSM request.
+ * size_out	: Size of output payload if any in the PDSM request.
+ * service	: Service function for the PDSM request. Return semantics:
+ *		  rc < 0 : Error servicing PDSM and rc indicates the error.
+ *		  rc >=0 : Serviced successfully and 'rc' indicate number of
+ *			bytes written to payload.
+ */
+struct pdsm_cmd_desc {
+	u32 size_in;
+	u32 size_out;
+	int (*service)(struct papr_scm_priv *dimm,
+		       union nd_pdsm_payload *payload);
+};
+
+/* Holds all supported PDSMs' command descriptors */
+static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = {
+	[PAPR_PDSM_MIN] = {
+		.size_in = 0,
+		.size_out = 0,
+		.service = NULL,
+	},
+	/* New PDSM command descriptors to be added below */
+
+	[PAPR_PDSM_HEALTH] = {
+		.size_in = 0,
+		.size_out = sizeof(struct nd_papr_pdsm_health),
+		.service = papr_pdsm_health,
+	},
+
+	[PAPR_PDSM_SMART_INJECT] = {
+		.size_in = sizeof(struct nd_papr_pdsm_smart_inject),
+		.size_out = sizeof(struct nd_papr_pdsm_smart_inject),
+		.service = papr_pdsm_smart_inject,
+	},
+	/* Empty */
+	[PAPR_PDSM_MAX] = {
+		.size_in = 0,
+		.size_out = 0,
+		.service = NULL,
+	},
+};
+
+/* Given a valid pdsm cmd return its command descriptor else return NULL */
+static inline const struct pdsm_cmd_desc *pdsm_cmd_desc(enum papr_pdsm cmd)
+{
+	if (cmd >= 0 || cmd < ARRAY_SIZE(__pdsm_cmd_descriptors))
+		return &__pdsm_cmd_descriptors[cmd];
+
+	return NULL;
+}
+
+/*
+ * For a given pdsm request call an appropriate service function.
+ * Returns errors if any while handling the pdsm command package.
+ */
+static int papr_scm_service_pdsm(struct papr_scm_priv *p,
+				 struct nd_cmd_pkg *pkg)
+{
+	/* Get the PDSM header and PDSM command */
+	struct nd_pkg_pdsm *pdsm_pkg = (struct nd_pkg_pdsm *)pkg->nd_payload;
+	enum papr_pdsm pdsm = (enum papr_pdsm)pkg->nd_command;
+	const struct pdsm_cmd_desc *pdsc;
+	int rc;
+
+	/* Fetch corresponding pdsm descriptor for validation and servicing */
+	pdsc = pdsm_cmd_desc(pdsm);
+
+	/* Validate pdsm descriptor */
+	/* Ensure that reserved fields are 0 */
+	if (pdsm_pkg->reserved[0] || pdsm_pkg->reserved[1]) {
+		dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid reserved field\n",
+			pdsm);
+		return -EINVAL;
+	}
+
+	/* If pdsm expects some input, then ensure that the size_in matches */
+	if (pdsc->size_in &&
+	    pkg->nd_size_in != (pdsc->size_in + ND_PDSM_HDR_SIZE)) {
+		dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_in=%d\n",
+			pdsm, pkg->nd_size_in);
+		return -EINVAL;
+	}
+
+	/* If pdsm wants to return data, then ensure that  size_out matches */
+	if (pdsc->size_out &&
+	    pkg->nd_size_out != (pdsc->size_out + ND_PDSM_HDR_SIZE)) {
+		dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_out=%d\n",
+			pdsm, pkg->nd_size_out);
+		return -EINVAL;
+	}
+
+	/* Service the pdsm */
+	if (pdsc->service) {
+		dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Servicing..\n", pdsm);
+
+		rc = pdsc->service(p, &pdsm_pkg->payload);
+
+		if (rc < 0) {
+			/* error encountered while servicing pdsm */
+			pdsm_pkg->cmd_status = rc;
+			pkg->nd_fw_size = ND_PDSM_HDR_SIZE;
+		} else {
+			/* pdsm serviced and 'rc' bytes written to payload */
+			pdsm_pkg->cmd_status = 0;
+			pkg->nd_fw_size = ND_PDSM_HDR_SIZE + rc;
+		}
+	} else {
+		dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Unsupported PDSM request\n",
+			pdsm);
+		pdsm_pkg->cmd_status = -ENOENT;
+		pkg->nd_fw_size = ND_PDSM_HDR_SIZE;
+	}
+
+	return pdsm_pkg->cmd_status;
+}
+
+static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
+			  struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+			  unsigned int buf_len, int *cmd_rc)
+{
+	struct nd_cmd_get_config_size *get_size_hdr;
+	struct nd_cmd_pkg *call_pkg = NULL;
+	struct papr_scm_priv *p;
+	int rc;
+
+	rc = is_cmd_valid(nvdimm, cmd, buf, buf_len);
+	if (rc) {
+		pr_debug("Invalid cmd=0x%x. Err=%d\n", cmd, rc);
+		return rc;
+	}
+
+	/* Use a local variable in case cmd_rc pointer is NULL */
+	if (!cmd_rc)
+		cmd_rc = &rc;
+
+	p = nvdimm_provider_data(nvdimm);
+
+	switch (cmd) {
+	case ND_CMD_GET_CONFIG_SIZE:
+		get_size_hdr = buf;
+
+		get_size_hdr->status = 0;
+		get_size_hdr->max_xfer = 8;
+		get_size_hdr->config_size = p->metadata_size;
+		*cmd_rc = 0;
+		break;
+
+	case ND_CMD_GET_CONFIG_DATA:
+		*cmd_rc = papr_scm_meta_get(p, buf);
+		break;
+
+	case ND_CMD_SET_CONFIG_DATA:
+		*cmd_rc = papr_scm_meta_set(p, buf);
+		break;
+
+	case ND_CMD_CALL:
+		call_pkg = (struct nd_cmd_pkg *)buf;
+		*cmd_rc = papr_scm_service_pdsm(p, call_pkg);
+		break;
+
+	default:
+		dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd);
+		return -EINVAL;
+	}
+
+	dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc);
+
+	return 0;
+}
+
+static ssize_t health_bitmap_inject_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct nvdimm *dimm = to_nvdimm(dev);
+	struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+
+	return sprintf(buf, "%#llx\n",
+		       READ_ONCE(p->health_bitmap_inject_mask));
+}
+
+static DEVICE_ATTR_ADMIN_RO(health_bitmap_inject);
+
+static ssize_t perf_stats_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	int index;
+	ssize_t rc;
+	struct seq_buf s;
+	struct papr_scm_perf_stat *stat;
+	struct papr_scm_perf_stats *stats;
+	struct nvdimm *dimm = to_nvdimm(dev);
+	struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+
+	if (!p->stat_buffer_len)
+		return -ENOENT;
+
+	/* Allocate the buffer for phyp where stats are written */
+	stats = kzalloc(p->stat_buffer_len, GFP_KERNEL);
+	if (!stats)
+		return -ENOMEM;
+
+	/* Ask phyp to return all dimm perf stats */
+	rc = drc_pmem_query_stats(p, stats, 0);
+	if (rc)
+		goto free_stats;
+	/*
+	 * Go through the returned output buffer and print stats and
+	 * values. Since stat_id is essentially a char string of
+	 * 8 bytes, simply use the string format specifier to print it.
+	 */
+	seq_buf_init(&s, buf, PAGE_SIZE);
+	for (index = 0, stat = stats->scm_statistic;
+	     index < be32_to_cpu(stats->num_statistics);
+	     ++index, ++stat) {
+		seq_buf_printf(&s, "%.8s = 0x%016llX\n",
+			       stat->stat_id,
+			       be64_to_cpu(stat->stat_val));
+	}
+
+free_stats:
+	kfree(stats);
+	return rc ? rc : (ssize_t)seq_buf_used(&s);
+}
+static DEVICE_ATTR_ADMIN_RO(perf_stats);
+
+static ssize_t flags_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct nvdimm *dimm = to_nvdimm(dev);
+	struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+	struct seq_buf s;
+	u64 health;
+	int rc;
+
+	rc = drc_pmem_query_health(p);
+	if (rc)
+		return rc;
+
+	/* Copy health_bitmap locally, check masks & update out buffer */
+	health = READ_ONCE(p->health_bitmap);
+
+	seq_buf_init(&s, buf, PAGE_SIZE);
+	if (health & PAPR_PMEM_UNARMED_MASK)
+		seq_buf_printf(&s, "not_armed ");
+
+	if (health & PAPR_PMEM_BAD_SHUTDOWN_MASK)
+		seq_buf_printf(&s, "flush_fail ");
+
+	if (health & PAPR_PMEM_BAD_RESTORE_MASK)
+		seq_buf_printf(&s, "restore_fail ");
+
+	if (health & PAPR_PMEM_ENCRYPTED)
+		seq_buf_printf(&s, "encrypted ");
+
+	if (health & PAPR_PMEM_SMART_EVENT_MASK)
+		seq_buf_printf(&s, "smart_notify ");
+
+	if (health & PAPR_PMEM_SCRUBBED_AND_LOCKED)
+		seq_buf_printf(&s, "scrubbed locked ");
+
+	if (seq_buf_used(&s))
+		seq_buf_printf(&s, "\n");
+
+	return seq_buf_used(&s);
+}
+DEVICE_ATTR_RO(flags);
+
+static ssize_t dirty_shutdown_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct nvdimm *dimm = to_nvdimm(dev);
+	struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+
+	return sysfs_emit(buf, "%llu\n", p->dirty_shutdown_counter);
+}
+DEVICE_ATTR_RO(dirty_shutdown);
+
+static umode_t papr_nd_attribute_visible(struct kobject *kobj,
+					 struct attribute *attr, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct nvdimm *nvdimm = to_nvdimm(dev);
+	struct papr_scm_priv *p = nvdimm_provider_data(nvdimm);
+
+	/* For if perf-stats not available remove perf_stats sysfs */
+	if (attr == &dev_attr_perf_stats.attr && p->stat_buffer_len == 0)
+		return 0;
+
+	return attr->mode;
+}
+
+/* papr_scm specific dimm attributes */
+static struct attribute *papr_nd_attributes[] = {
+	&dev_attr_flags.attr,
+	&dev_attr_perf_stats.attr,
+	&dev_attr_dirty_shutdown.attr,
+	&dev_attr_health_bitmap_inject.attr,
+	NULL,
+};
+
+static const struct attribute_group papr_nd_attribute_group = {
+	.name = "papr",
+	.is_visible = papr_nd_attribute_visible,
+	.attrs = papr_nd_attributes,
+};
+
+static const struct attribute_group *papr_nd_attr_groups[] = {
+	&papr_nd_attribute_group,
+	NULL,
+};
+
+static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
+{
+	struct device *dev = &p->pdev->dev;
+	struct nd_mapping_desc mapping;
+	struct nd_region_desc ndr_desc;
+	unsigned long dimm_flags;
+	int target_nid, online_nid;
+
+	p->bus_desc.ndctl = papr_scm_ndctl;
+	p->bus_desc.module = THIS_MODULE;
+	p->bus_desc.of_node = p->pdev->dev.of_node;
+	p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL);
+
+	/* Set the dimm command family mask to accept PDSMs */
+	set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask);
+
+	if (!p->bus_desc.provider_name)
+		return -ENOMEM;
+
+	p->bus = nvdimm_bus_register(NULL, &p->bus_desc);
+	if (!p->bus) {
+		dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn);
+		kfree(p->bus_desc.provider_name);
+		return -ENXIO;
+	}
+
+	dimm_flags = 0;
+	set_bit(NDD_LABELING, &dimm_flags);
+
+	/*
+	 * Check if the nvdimm is unarmed. No locking needed as we are still
+	 * initializing. Ignore error encountered if any.
+	 */
+	__drc_pmem_query_health(p);
+
+	if (p->health_bitmap & PAPR_PMEM_UNARMED_MASK)
+		set_bit(NDD_UNARMED, &dimm_flags);
+
+	p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups,
+				  dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
+	if (!p->nvdimm) {
+		dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn);
+		goto err;
+	}
+
+	if (nvdimm_bus_check_dimm_count(p->bus, 1))
+		goto err;
+
+	/* now add the region */
+
+	memset(&mapping, 0, sizeof(mapping));
+	mapping.nvdimm = p->nvdimm;
+	mapping.start = 0;
+	mapping.size = p->blocks * p->block_size; // XXX: potential overflow?
+
+	memset(&ndr_desc, 0, sizeof(ndr_desc));
+	target_nid = dev_to_node(&p->pdev->dev);
+	online_nid = numa_map_to_online_node(target_nid);
+	ndr_desc.numa_node = online_nid;
+	ndr_desc.target_node = target_nid;
+	ndr_desc.res = &p->res;
+	ndr_desc.of_node = p->dn;
+	ndr_desc.provider_data = p;
+	ndr_desc.mapping = &mapping;
+	ndr_desc.num_mappings = 1;
+	ndr_desc.nd_set = &p->nd_set;
+
+	if (p->hcall_flush_required) {
+		set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
+		ndr_desc.flush = papr_scm_pmem_flush;
+	}
+
+	if (p->is_volatile)
+		p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
+	else {
+		set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
+		p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc);
+	}
+	if (!p->region) {
+		dev_err(dev, "Error registering region %pR from %pOF\n",
+				ndr_desc.res, p->dn);
+		goto err;
+	}
+	if (target_nid != online_nid)
+		dev_info(dev, "Region registered with target node %d and online node %d",
+			 target_nid, online_nid);
+
+	mutex_lock(&papr_ndr_lock);
+	list_add_tail(&p->region_list, &papr_nd_regions);
+	mutex_unlock(&papr_ndr_lock);
+
+	return 0;
+
+err:	nvdimm_bus_unregister(p->bus);
+	kfree(p->bus_desc.provider_name);
+	return -ENXIO;
+}
+
+static void papr_scm_add_badblock(struct nd_region *region,
+				  struct nvdimm_bus *bus, u64 phys_addr)
+{
+	u64 aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
+
+	if (nvdimm_bus_add_badrange(bus, aligned_addr, L1_CACHE_BYTES)) {
+		pr_err("Bad block registration for 0x%llx failed\n", phys_addr);
+		return;
+	}
+
+	pr_debug("Add memory range (0x%llx - 0x%llx) as bad range\n",
+		 aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+	nvdimm_region_notify(region, NVDIMM_REVALIDATE_POISON);
+}
+
+static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
+			 void *data)
+{
+	struct machine_check_event *evt = data;
+	struct papr_scm_priv *p;
+	u64 phys_addr;
+	bool found = false;
+
+	if (evt->error_type != MCE_ERROR_TYPE_UE)
+		return NOTIFY_DONE;
+
+	if (list_empty(&papr_nd_regions))
+		return NOTIFY_DONE;
+
+	/*
+	 * The physical address obtained here is PAGE_SIZE aligned, so get the
+	 * exact address from the effective address
+	 */
+	phys_addr = evt->u.ue_error.physical_address +
+			(evt->u.ue_error.effective_address & ~PAGE_MASK);
+
+	if (!evt->u.ue_error.physical_address_provided ||
+	    !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
+		return NOTIFY_DONE;
+
+	/* mce notifier is called from a process context, so mutex is safe */
+	mutex_lock(&papr_ndr_lock);
+	list_for_each_entry(p, &papr_nd_regions, region_list) {
+		if (phys_addr >= p->res.start && phys_addr <= p->res.end) {
+			found = true;
+			break;
+		}
+	}
+
+	if (found)
+		papr_scm_add_badblock(p->region, p->bus, phys_addr);
+
+	mutex_unlock(&papr_ndr_lock);
+
+	return found ? NOTIFY_OK : NOTIFY_DONE;
+}
+
+static struct notifier_block mce_ue_nb = {
+	.notifier_call = handle_mce_ue
+};
+
+static int papr_scm_probe(struct platform_device *pdev)
+{
+	struct device_node *dn = pdev->dev.of_node;
+	u32 drc_index, metadata_size;
+	u64 blocks, block_size;
+	struct papr_scm_priv *p;
+	u8 uuid_raw[UUID_SIZE];
+	const char *uuid_str;
+	ssize_t stat_size;
+	uuid_t uuid;
+	int rc;
+
+	/* check we have all the required DT properties */
+	if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) {
+		dev_err(&pdev->dev, "%pOF: missing drc-index!\n", dn);
+		return -ENODEV;
+	}
+
+	if (of_property_read_u64(dn, "ibm,block-size", &block_size)) {
+		dev_err(&pdev->dev, "%pOF: missing block-size!\n", dn);
+		return -ENODEV;
+	}
+
+	if (of_property_read_u64(dn, "ibm,number-of-blocks", &blocks)) {
+		dev_err(&pdev->dev, "%pOF: missing number-of-blocks!\n", dn);
+		return -ENODEV;
+	}
+
+	if (of_property_read_string(dn, "ibm,unit-guid", &uuid_str)) {
+		dev_err(&pdev->dev, "%pOF: missing unit-guid!\n", dn);
+		return -ENODEV;
+	}
+
+	/*
+	 * open firmware platform device create won't update the NUMA 
+	 * distance table. For PAPR SCM devices we use numa_map_to_online_node()
+	 * to find the nearest online NUMA node and that requires correct
+	 * distance table information.
+	 */
+	update_numa_distance(dn);
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	/* Initialize the dimm mutex */
+	mutex_init(&p->health_mutex);
+
+	/* optional DT properties */
+	of_property_read_u32(dn, "ibm,metadata-size", &metadata_size);
+
+	p->dn = dn;
+	p->drc_index = drc_index;
+	p->block_size = block_size;
+	p->blocks = blocks;
+	p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required");
+	p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required");
+
+	if (of_property_read_u64(dn, "ibm,persistence-failed-count",
+				 &p->dirty_shutdown_counter))
+		p->dirty_shutdown_counter = 0;
+
+	/* We just need to ensure that set cookies are unique across */
+	uuid_parse(uuid_str, &uuid);
+
+	/*
+	 * The cookie1 and cookie2 are not really little endian.
+	 * We store a raw buffer representation of the
+	 * uuid string so that we can compare this with the label
+	 * area cookie irrespective of the endian configuration
+	 * with which the kernel is built.
+	 *
+	 * Historically we stored the cookie in the below format.
+	 * for a uuid string 72511b67-0b3b-42fd-8d1d-5be3cae8bcaa
+	 *	cookie1 was 0xfd423b0b671b5172
+	 *	cookie2 was 0xaabce8cae35b1d8d
+	 */
+	export_uuid(uuid_raw, &uuid);
+	p->nd_set.cookie1 = get_unaligned_le64(&uuid_raw[0]);
+	p->nd_set.cookie2 = get_unaligned_le64(&uuid_raw[8]);
+
+	/* might be zero */
+	p->metadata_size = metadata_size;
+	p->pdev = pdev;
+
+	/* request the hypervisor to bind this region to somewhere in memory */
+	rc = drc_pmem_bind(p);
+
+	/* If phyp says drc memory still bound then force unbound and retry */
+	if (rc == H_OVERLAP)
+		rc = drc_pmem_query_n_bind(p);
+
+	if (rc != H_SUCCESS) {
+		dev_err(&p->pdev->dev, "bind err: %d\n", rc);
+		rc = -ENXIO;
+		goto err;
+	}
+
+	/* setup the resource for the newly bound range */
+	p->res.start = p->bound_addr;
+	p->res.end   = p->bound_addr + p->blocks * p->block_size - 1;
+	p->res.name  = pdev->name;
+	p->res.flags = IORESOURCE_MEM;
+
+	/* Try retrieving the stat buffer and see if its supported */
+	stat_size = drc_pmem_query_stats(p, NULL, 0);
+	if (stat_size > 0) {
+		p->stat_buffer_len = stat_size;
+		dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n",
+			p->stat_buffer_len);
+	}
+
+	rc = papr_scm_nvdimm_init(p);
+	if (rc)
+		goto err2;
+
+	platform_set_drvdata(pdev, p);
+	papr_scm_pmu_register(p);
+
+	return 0;
+
+err2:	drc_pmem_unbind(p);
+err:	kfree(p);
+	return rc;
+}
+
+static int papr_scm_remove(struct platform_device *pdev)
+{
+	struct papr_scm_priv *p = platform_get_drvdata(pdev);
+
+	mutex_lock(&papr_ndr_lock);
+	list_del(&p->region_list);
+	mutex_unlock(&papr_ndr_lock);
+
+	nvdimm_bus_unregister(p->bus);
+	drc_pmem_unbind(p);
+
+	if (pdev->archdata.priv)
+		unregister_nvdimm_pmu(pdev->archdata.priv);
+
+	pdev->archdata.priv = NULL;
+	kfree(p->bus_desc.provider_name);
+	kfree(p);
+
+	return 0;
+}
+
+static const struct of_device_id papr_scm_match[] = {
+	{ .compatible = "ibm,pmemory" },
+	{ .compatible = "ibm,pmemory-v2" },
+	{ },
+};
+
+static struct platform_driver papr_scm_driver = {
+	.probe = papr_scm_probe,
+	.remove = papr_scm_remove,
+	.driver = {
+		.name = "papr_scm",
+		.of_match_table = papr_scm_match,
+	},
+};
+
+static int __init papr_scm_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&papr_scm_driver);
+	if (!ret)
+		mce_register_notifier(&mce_ue_nb);
+
+	return ret;
+}
+module_init(papr_scm_init);
+
+static void __exit papr_scm_exit(void)
+{
+	mce_unregister_notifier(&mce_ue_nb);
+	platform_driver_unregister(&papr_scm_driver);
+}
+module_exit(papr_scm_exit);
+
+MODULE_DEVICE_TABLE(of, papr_scm_match);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("IBM Corporation");
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
new file mode 100644
index 0000000000..1772ae3d19
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * pSeries specific routines for PCI.
+ */
+
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+
+#include <asm/eeh.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/pci.h>
+#include "pseries.h"
+
+#if 0
+void pcibios_name_device(struct pci_dev *dev)
+{
+	struct device_node *dn;
+
+	/*
+	 * Add IBM loc code (slot) as a prefix to the device names for service
+	 */
+	dn = pci_device_to_OF_node(dev);
+	if (dn) {
+		const char *loc_code = of_get_property(dn, "ibm,loc-code",
+				NULL);
+		if (loc_code) {
+			int loc_len = strlen(loc_code);
+			if (loc_len < sizeof(dev->dev.name)) {
+				memmove(dev->dev.name+loc_len+1, dev->dev.name,
+					sizeof(dev->dev.name)-loc_len-1);
+				memcpy(dev->dev.name, loc_code, loc_len);
+				dev->dev.name[loc_len] = ' ';
+				dev->dev.name[sizeof(dev->dev.name)-1] = '\0';
+			}
+		}
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device);
+#endif
+
+#ifdef CONFIG_PCI_IOV
+#define MAX_VFS_FOR_MAP_PE 256
+struct pe_map_bar_entry {
+	__be64     bar;       /* Input:  Virtual Function BAR */
+	__be16     rid;       /* Input:  Virtual Function Router ID */
+	__be16     pe_num;    /* Output: Virtual Function PE Number */
+	__be32     reserved;  /* Reserved Space */
+};
+
+static int pseries_send_map_pe(struct pci_dev *pdev, u16 num_vfs,
+			       struct pe_map_bar_entry *vf_pe_array)
+{
+	struct pci_dn *pdn;
+	int rc;
+	unsigned long buid, addr;
+	int ibm_map_pes = rtas_function_token(RTAS_FN_IBM_OPEN_SRIOV_MAP_PE_NUMBER);
+
+	if (ibm_map_pes == RTAS_UNKNOWN_SERVICE)
+		return -EINVAL;
+
+	pdn = pci_get_pdn(pdev);
+	addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+	buid = pdn->phb->buid;
+	spin_lock(&rtas_data_buf_lock);
+	memcpy(rtas_data_buf, vf_pe_array,
+	       RTAS_DATA_BUF_SIZE);
+	rc = rtas_call(ibm_map_pes, 5, 1, NULL, addr,
+		       BUID_HI(buid), BUID_LO(buid),
+		       rtas_data_buf,
+		       num_vfs * sizeof(struct pe_map_bar_entry));
+	memcpy(vf_pe_array, rtas_data_buf, RTAS_DATA_BUF_SIZE);
+	spin_unlock(&rtas_data_buf_lock);
+
+	if (rc)
+		dev_err(&pdev->dev,
+			"%s: Failed to associate pes PE#%lx, rc=%x\n",
+			__func__,  addr, rc);
+
+	return rc;
+}
+
+static void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num)
+{
+	struct pci_dn *pdn;
+
+	pdn = pci_get_pdn(pdev);
+	pdn->pe_num_map[vf_index] = be16_to_cpu(pe_num);
+	dev_dbg(&pdev->dev, "VF %04x:%02x:%02x.%x associated with PE#%x\n",
+		pci_domain_nr(pdev->bus),
+		pdev->bus->number,
+		PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)),
+		PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)),
+		pdn->pe_num_map[vf_index]);
+}
+
+static int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pci_dn *pdn;
+	int i, rc, vf_index;
+	struct pe_map_bar_entry *vf_pe_array;
+	struct resource *res;
+	u64 size;
+
+	vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+	if (!vf_pe_array)
+		return -ENOMEM;
+
+	pdn = pci_get_pdn(pdev);
+	/* create firmware structure to associate pes */
+	for (vf_index = 0; vf_index < num_vfs; vf_index++) {
+		pdn->pe_num_map[vf_index] = IODA_INVALID_PE;
+		for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+			res = &pdev->resource[i + PCI_IOV_RESOURCES];
+			if (!res->parent)
+				continue;
+			size = pcibios_iov_resource_alignment(pdev, i +
+					PCI_IOV_RESOURCES);
+			vf_pe_array[vf_index].bar =
+				cpu_to_be64(res->start + size * vf_index);
+			vf_pe_array[vf_index].rid =
+				cpu_to_be16((pci_iov_virtfn_bus(pdev, vf_index)
+					    << 8) | pci_iov_virtfn_devfn(pdev,
+					    vf_index));
+			vf_pe_array[vf_index].pe_num =
+				cpu_to_be16(IODA_INVALID_PE);
+		}
+	}
+
+	rc = pseries_send_map_pe(pdev, num_vfs, vf_pe_array);
+	/* Only zero is success */
+	if (!rc)
+		for (vf_index = 0; vf_index < num_vfs; vf_index++)
+			pseries_set_pe_num(pdev, vf_index,
+					   vf_pe_array[vf_index].pe_num);
+
+	kfree(vf_pe_array);
+	return rc;
+}
+
+static int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pci_dn         *pdn;
+	int                    rc;
+	const int *max_vfs;
+	int max_config_vfs;
+	struct device_node *dn = pci_device_to_OF_node(pdev);
+
+	max_vfs = of_get_property(dn, "ibm,number-of-configurable-vfs", NULL);
+
+	if (!max_vfs)
+		return -EINVAL;
+
+	/* First integer stores max config */
+	max_config_vfs = of_read_number(&max_vfs[0], 1);
+	if (max_config_vfs < num_vfs && num_vfs > MAX_VFS_FOR_MAP_PE) {
+		dev_err(&pdev->dev,
+			"Num VFs %x > %x Configurable VFs\n",
+			num_vfs, (num_vfs > MAX_VFS_FOR_MAP_PE) ?
+			MAX_VFS_FOR_MAP_PE : max_config_vfs);
+		return -EINVAL;
+	}
+
+	pdn = pci_get_pdn(pdev);
+	pdn->pe_num_map = kmalloc_array(num_vfs,
+					sizeof(*pdn->pe_num_map),
+					GFP_KERNEL);
+	if (!pdn->pe_num_map)
+		return -ENOMEM;
+
+	rc = pseries_associate_pes(pdev, num_vfs);
+
+	/* Anything other than zero is failure */
+	if (rc) {
+		dev_err(&pdev->dev, "Failure to enable sriov: %x\n", rc);
+		kfree(pdn->pe_num_map);
+	} else {
+		pci_vf_drivers_autoprobe(pdev, false);
+	}
+
+	return rc;
+}
+
+static int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+	/* Allocate PCI data */
+	add_sriov_vf_pdns(pdev);
+	return pseries_pci_sriov_enable(pdev, num_vfs);
+}
+
+static int pseries_pcibios_sriov_disable(struct pci_dev *pdev)
+{
+	struct pci_dn         *pdn;
+
+	pdn = pci_get_pdn(pdev);
+	/* Releasing pe_num_map */
+	kfree(pdn->pe_num_map);
+	/* Release PCI data */
+	remove_sriov_vf_pdns(pdev);
+	pci_vf_drivers_autoprobe(pdev, true);
+	return 0;
+}
+#endif
+
+static void __init pSeries_request_regions(void)
+{
+	if (!isa_io_base)
+		return;
+
+	request_region(0x20,0x20,"pic1");
+	request_region(0xa0,0x20,"pic2");
+	request_region(0x00,0x20,"dma1");
+	request_region(0x40,0x20,"timer");
+	request_region(0x80,0x10,"dma page reg");
+	request_region(0xc0,0x20,"dma2");
+}
+
+void __init pSeries_final_fixup(void)
+{
+	pSeries_request_regions();
+
+	eeh_show_enabled();
+
+#ifdef CONFIG_PCI_IOV
+	ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable;
+	ppc_md.pcibios_sriov_disable = pseries_pcibios_sriov_disable;
+#endif
+}
+
+/*
+ * Assume the winbond 82c105 is the IDE controller on a
+ * p610/p615/p630. We should probably be more careful in case
+ * someone tries to plug in a similar adapter.
+ */
+static void fixup_winbond_82c105(struct pci_dev* dev)
+{
+	struct resource *r;
+	unsigned int reg;
+
+	if (!machine_is(pseries))
+		return;
+
+	printk("Using INTC for W82c105 IDE controller.\n");
+	pci_read_config_dword(dev, 0x40, &reg);
+	/* Enable LEGIRQ to use INTC instead of ISA interrupts */
+	pci_write_config_dword(dev, 0x40, reg | (1<<11));
+
+	pci_dev_for_each_resource(dev, r) {
+		/* zap the 2nd function of the winbond chip */
+		if (dev->bus->number == 0 && dev->devfn == 0x81 &&
+		    r->flags & IORESOURCE_IO)
+			r->flags &= ~IORESOURCE_IO;
+		if (r->start == 0 && r->end) {
+			r->flags = 0;
+			r->end = 0;
+		}
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105,
+			 fixup_winbond_82c105);
+
+static enum pci_bus_speed prop_to_pci_speed(u32 prop)
+{
+	switch (prop) {
+	case 0x01:
+		return PCIE_SPEED_2_5GT;
+	case 0x02:
+		return PCIE_SPEED_5_0GT;
+	case 0x04:
+		return PCIE_SPEED_8_0GT;
+	case 0x08:
+		return PCIE_SPEED_16_0GT;
+	case 0x10:
+		return PCIE_SPEED_32_0GT;
+	default:
+		pr_debug("Unexpected PCI link speed property value\n");
+		return PCI_SPEED_UNKNOWN;
+	}
+}
+
+int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+	struct device_node *dn, *pdn;
+	struct pci_bus *bus;
+	u32 pcie_link_speed_stats[2];
+	int rc;
+
+	bus = bridge->bus;
+
+	/* Rely on the pcibios_free_controller_deferred() callback. */
+	pci_set_host_bridge_release(bridge, pcibios_free_controller_deferred,
+					(void *) pci_bus_to_host(bus));
+
+	dn = pcibios_get_phb_of_node(bus);
+	if (!dn)
+		return 0;
+
+	for (pdn = dn; pdn != NULL; pdn = of_get_next_parent(pdn)) {
+		rc = of_property_read_u32_array(pdn,
+				"ibm,pcie-link-speed-stats",
+				&pcie_link_speed_stats[0], 2);
+		if (!rc)
+			break;
+	}
+
+	of_node_put(pdn);
+
+	if (rc) {
+		pr_debug("no ibm,pcie-link-speed-stats property\n");
+		return 0;
+	}
+
+	bus->max_bus_speed = prop_to_pci_speed(pcie_link_speed_stats[0]);
+	bus->cur_bus_speed = prop_to_pci_speed(pcie_link_speed_stats[1]);
+	return 0;
+}
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
new file mode 100644
index 0000000000..4ba8245681
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PCI Dynamic LPAR, PCI Hot Plug and PCI EEH recovery code
+ * for RPA-compliant PPC64 platform.
+ * Copyright (C) 2003 Linda Xie <lxie@us.ibm.com>
+ * Copyright (C) 2005 International Business Machines
+ *
+ * Updates, 2005, John Rose <johnrose@austin.ibm.com>
+ * Updates, 2005, Linas Vepstas <linas@austin.ibm.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/export.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/firmware.h>
+#include <asm/eeh.h>
+
+#include "pseries.h"
+
+struct pci_controller *init_phb_dynamic(struct device_node *dn)
+{
+	struct pci_controller *phb;
+
+	pr_debug("PCI: Initializing new hotplug PHB %pOF\n", dn);
+
+	phb = pcibios_alloc_controller(dn);
+	if (!phb)
+		return NULL;
+	rtas_setup_phb(phb);
+	pci_process_bridge_OF_ranges(phb, dn, 0);
+	phb->controller_ops = pseries_pci_controller_ops;
+
+	pci_devs_phb_init_dynamic(phb);
+
+	pseries_msi_allocate_domains(phb);
+
+	/* Create EEH devices for the PHB */
+	eeh_phb_pe_create(phb);
+
+	if (dn->child)
+		pseries_eeh_init_edev_recursive(PCI_DN(dn));
+
+	pcibios_scan_phb(phb);
+	pcibios_finish_adding_to_bus(phb->bus);
+
+	return phb;
+}
+EXPORT_SYMBOL_GPL(init_phb_dynamic);
+
+/* RPA-specific bits for removing PHBs */
+int remove_phb_dynamic(struct pci_controller *phb)
+{
+	struct pci_bus *b = phb->bus;
+	struct pci_host_bridge *host_bridge = to_pci_host_bridge(b->bridge);
+	struct resource *res;
+	int rc, i;
+
+	pr_debug("PCI: Removing PHB %04x:%02x...\n",
+		 pci_domain_nr(b), b->number);
+
+	/* We cannot to remove a root bus that has children */
+	if (!(list_empty(&b->children) && list_empty(&b->devices)))
+		return -EBUSY;
+
+	/* We -know- there aren't any child devices anymore at this stage
+	 * and thus, we can safely unmap the IO space as it's not in use
+	 */
+	res = &phb->io_resource;
+	if (res->flags & IORESOURCE_IO) {
+		rc = pcibios_unmap_io_space(b);
+		if (rc) {
+			printk(KERN_ERR "%s: failed to unmap IO on bus %s\n",
+			       __func__, b->name);
+			return 1;
+		}
+	}
+
+	pseries_msi_free_domains(phb);
+
+	/* Keep a reference so phb isn't freed yet */
+	get_device(&host_bridge->dev);
+
+	/* Remove the PCI bus and unregister the bridge device from sysfs */
+	phb->bus = NULL;
+	pci_remove_bus(b);
+	host_bridge->bus = NULL;
+	device_unregister(&host_bridge->dev);
+
+	/* Now release the IO resource */
+	if (res->flags & IORESOURCE_IO)
+		release_resource(res);
+
+	/* Release memory resources */
+	for (i = 0; i < 3; ++i) {
+		res = &phb->mem_resources[i];
+		if (!(res->flags & IORESOURCE_MEM))
+			continue;
+		release_resource(res);
+	}
+
+	/*
+	 * The pci_controller data structure is freed by
+	 * the pcibios_free_controller_deferred() callback;
+	 * see pseries_root_bridge_prepare().
+	 */
+	put_device(&host_bridge->dev);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(remove_phb_dynamic);
diff --git a/arch/powerpc/platforms/pseries/plpks-secvar.c b/arch/powerpc/platforms/pseries/plpks-secvar.c
new file mode 100644
index 0000000000..257fd1f8bc
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/plpks-secvar.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+// Secure variable implementation using the PowerVM LPAR Platform KeyStore (PLPKS)
+//
+// Copyright 2022, 2023 IBM Corporation
+// Authors: Russell Currey
+//          Andrew Donnellan
+//          Nayna Jain
+
+#define pr_fmt(fmt) "secvar: "fmt
+
+#include <linux/printk.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/kobject.h>
+#include <linux/nls.h>
+#include <asm/machdep.h>
+#include <asm/secvar.h>
+#include <asm/plpks.h>
+
+// Config attributes for sysfs
+#define PLPKS_CONFIG_ATTR(name, fmt, func)			\
+	static ssize_t name##_show(struct kobject *kobj,	\
+				   struct kobj_attribute *attr,	\
+				   char *buf)			\
+	{							\
+		return sysfs_emit(buf, fmt, func());		\
+	}							\
+	static struct kobj_attribute attr_##name = __ATTR_RO(name)
+
+PLPKS_CONFIG_ATTR(version, "%u\n", plpks_get_version);
+PLPKS_CONFIG_ATTR(max_object_size, "%u\n", plpks_get_maxobjectsize);
+PLPKS_CONFIG_ATTR(total_size, "%u\n", plpks_get_totalsize);
+PLPKS_CONFIG_ATTR(used_space, "%u\n", plpks_get_usedspace);
+PLPKS_CONFIG_ATTR(supported_policies, "%08x\n", plpks_get_supportedpolicies);
+PLPKS_CONFIG_ATTR(signed_update_algorithms, "%016llx\n", plpks_get_signedupdatealgorithms);
+
+static const struct attribute *config_attrs[] = {
+	&attr_version.attr,
+	&attr_max_object_size.attr,
+	&attr_total_size.attr,
+	&attr_used_space.attr,
+	&attr_supported_policies.attr,
+	&attr_signed_update_algorithms.attr,
+	NULL,
+};
+
+static u32 get_policy(const char *name)
+{
+	if ((strcmp(name, "db") == 0) ||
+	    (strcmp(name, "dbx") == 0) ||
+	    (strcmp(name, "grubdb") == 0) ||
+	    (strcmp(name, "grubdbx") == 0) ||
+	    (strcmp(name, "sbat") == 0))
+		return (PLPKS_WORLDREADABLE | PLPKS_SIGNEDUPDATE);
+	else
+		return PLPKS_SIGNEDUPDATE;
+}
+
+static const char * const plpks_var_names[] = {
+	"PK",
+	"KEK",
+	"db",
+	"dbx",
+	"grubdb",
+	"grubdbx",
+	"sbat",
+	"moduledb",
+	"trustedcadb",
+	NULL,
+};
+
+static int plpks_get_variable(const char *key, u64 key_len, u8 *data,
+			      u64 *data_size)
+{
+	struct plpks_var var = {0};
+	int rc = 0;
+
+	// We subtract 1 from key_len because we don't need to include the
+	// null terminator at the end of the string
+	var.name = kcalloc(key_len - 1, sizeof(wchar_t), GFP_KERNEL);
+	if (!var.name)
+		return -ENOMEM;
+	rc = utf8s_to_utf16s(key, key_len - 1, UTF16_LITTLE_ENDIAN, (wchar_t *)var.name,
+			     key_len - 1);
+	if (rc < 0)
+		goto err;
+	var.namelen = rc * 2;
+
+	var.os = PLPKS_VAR_LINUX;
+	if (data) {
+		var.data = data;
+		var.datalen = *data_size;
+	}
+	rc = plpks_read_os_var(&var);
+
+	if (rc)
+		goto err;
+
+	*data_size = var.datalen;
+
+err:
+	kfree(var.name);
+	if (rc && rc != -ENOENT) {
+		pr_err("Failed to read variable '%s': %d\n", key, rc);
+		// Return -EIO since userspace probably doesn't care about the
+		// specific error
+		rc = -EIO;
+	}
+	return rc;
+}
+
+static int plpks_set_variable(const char *key, u64 key_len, u8 *data,
+			      u64 data_size)
+{
+	struct plpks_var var = {0};
+	int rc = 0;
+	u64 flags;
+
+	// Secure variables need to be prefixed with 8 bytes of flags.
+	// We only want to perform the write if we have at least one byte of data.
+	if (data_size <= sizeof(flags))
+		return -EINVAL;
+
+	// We subtract 1 from key_len because we don't need to include the
+	// null terminator at the end of the string
+	var.name = kcalloc(key_len - 1, sizeof(wchar_t), GFP_KERNEL);
+	if (!var.name)
+		return -ENOMEM;
+	rc = utf8s_to_utf16s(key, key_len - 1, UTF16_LITTLE_ENDIAN, (wchar_t *)var.name,
+			     key_len - 1);
+	if (rc < 0)
+		goto err;
+	var.namelen = rc * 2;
+
+	// Flags are contained in the first 8 bytes of the buffer, and are always big-endian
+	flags = be64_to_cpup((__be64 *)data);
+
+	var.datalen = data_size - sizeof(flags);
+	var.data = data + sizeof(flags);
+	var.os = PLPKS_VAR_LINUX;
+	var.policy = get_policy(key);
+
+	// Unlike in the read case, the plpks error code can be useful to
+	// userspace on write, so we return it rather than just -EIO
+	rc = plpks_signed_update_var(&var, flags);
+
+err:
+	kfree(var.name);
+	return rc;
+}
+
+// PLPKS dynamic secure boot doesn't give us a format string in the same way OPAL does.
+// Instead, report the format using the SB_VERSION variable in the keystore.
+// The string is made up by us, and takes the form "ibm,plpks-sb-v<n>" (or "ibm,plpks-sb-unknown"
+// if the SB_VERSION variable doesn't exist). Hypervisor defines the SB_VERSION variable as a
+// "1 byte unsigned integer value".
+static ssize_t plpks_secvar_format(char *buf, size_t bufsize)
+{
+	struct plpks_var var = {0};
+	ssize_t ret;
+	u8 version;
+
+	var.component = NULL;
+	// Only the signed variables have null bytes in their names, this one doesn't
+	var.name = "SB_VERSION";
+	var.namelen = strlen(var.name);
+	var.datalen = 1;
+	var.data = &version;
+
+	// Unlike the other vars, SB_VERSION is owned by firmware instead of the OS
+	ret = plpks_read_fw_var(&var);
+	if (ret) {
+		if (ret == -ENOENT) {
+			ret = snprintf(buf, bufsize, "ibm,plpks-sb-unknown");
+		} else {
+			pr_err("Error %ld reading SB_VERSION from firmware\n", ret);
+			ret = -EIO;
+		}
+		goto err;
+	}
+
+	ret = snprintf(buf, bufsize, "ibm,plpks-sb-v%hhu", version);
+err:
+	return ret;
+}
+
+static int plpks_max_size(u64 *max_size)
+{
+	// The max object size reported by the hypervisor is accurate for the
+	// object itself, but we use the first 8 bytes of data on write as the
+	// signed update flags, so the max size a user can write is larger.
+	*max_size = (u64)plpks_get_maxobjectsize() + sizeof(u64);
+
+	return 0;
+}
+
+
+static const struct secvar_operations plpks_secvar_ops = {
+	.get = plpks_get_variable,
+	.set = plpks_set_variable,
+	.format = plpks_secvar_format,
+	.max_size = plpks_max_size,
+	.config_attrs = config_attrs,
+	.var_names = plpks_var_names,
+};
+
+static int plpks_secvar_init(void)
+{
+	if (!plpks_is_available())
+		return -ENODEV;
+
+	return set_secvar_ops(&plpks_secvar_ops);
+}
+machine_device_initcall(pseries, plpks_secvar_init);
diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c
new file mode 100644
index 0000000000..2d40304eb6
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/plpks.c
@@ -0,0 +1,711 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * POWER LPAR Platform KeyStore(PLPKS)
+ * Copyright (C) 2022 IBM Corporation
+ * Author: Nayna Jain <nayna@linux.ibm.com>
+ *
+ * Provides access to variables stored in Power LPAR Platform KeyStore(PLPKS).
+ */
+
+#define pr_fmt(fmt) "plpks: " fmt
+
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <linux/memblock.h>
+#include <asm/hvcall.h>
+#include <asm/machdep.h>
+#include <asm/plpks.h>
+#include <asm/firmware.h>
+
+static u8 *ospassword;
+static u16 ospasswordlength;
+
+// Retrieved with H_PKS_GET_CONFIG
+static u8 version;
+static u16 objoverhead;
+static u16 maxpwsize;
+static u16 maxobjsize;
+static s16 maxobjlabelsize;
+static u32 totalsize;
+static u32 usedspace;
+static u32 supportedpolicies;
+static u32 maxlargeobjectsize;
+static u64 signedupdatealgorithms;
+
+struct plpks_auth {
+	u8 version;
+	u8 consumer;
+	__be64 rsvd0;
+	__be32 rsvd1;
+	__be16 passwordlength;
+	u8 password[];
+} __packed __aligned(16);
+
+struct label_attr {
+	u8 prefix[8];
+	u8 version;
+	u8 os;
+	u8 length;
+	u8 reserved[5];
+};
+
+struct label {
+	struct label_attr attr;
+	u8 name[PLPKS_MAX_NAME_SIZE];
+	size_t size;
+};
+
+static int pseries_status_to_err(int rc)
+{
+	int err;
+
+	switch (rc) {
+	case H_SUCCESS:
+		err = 0;
+		break;
+	case H_FUNCTION:
+		err = -ENXIO;
+		break;
+	case H_PARAMETER:
+	case H_P2:
+	case H_P3:
+	case H_P4:
+	case H_P5:
+	case H_P6:
+		err = -EINVAL;
+		break;
+	case H_NOT_FOUND:
+		err = -ENOENT;
+		break;
+	case H_BUSY:
+	case H_LONG_BUSY_ORDER_1_MSEC:
+	case H_LONG_BUSY_ORDER_10_MSEC:
+	case H_LONG_BUSY_ORDER_100_MSEC:
+	case H_LONG_BUSY_ORDER_1_SEC:
+	case H_LONG_BUSY_ORDER_10_SEC:
+	case H_LONG_BUSY_ORDER_100_SEC:
+		err = -EBUSY;
+		break;
+	case H_AUTHORITY:
+		err = -EPERM;
+		break;
+	case H_NO_MEM:
+		err = -ENOMEM;
+		break;
+	case H_RESOURCE:
+		err = -EEXIST;
+		break;
+	case H_TOO_BIG:
+		err = -EFBIG;
+		break;
+	case H_STATE:
+		err = -EIO;
+		break;
+	case H_R_STATE:
+		err = -EIO;
+		break;
+	case H_IN_USE:
+		err = -EEXIST;
+		break;
+	case H_ABORTED:
+		err = -EIO;
+		break;
+	default:
+		err = -EINVAL;
+	}
+
+	pr_debug("Converted hypervisor code %d to Linux %d\n", rc, err);
+
+	return err;
+}
+
+static int plpks_gen_password(void)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
+	u8 *password, consumer = PLPKS_OS_OWNER;
+	int rc;
+
+	// If we booted from kexec, we could be reusing an existing password already
+	if (ospassword) {
+		pr_debug("Password of length %u already in use\n", ospasswordlength);
+		return 0;
+	}
+
+	// The password must not cross a page boundary, so we align to the next power of 2
+	password = kzalloc(roundup_pow_of_two(maxpwsize), GFP_KERNEL);
+	if (!password)
+		return -ENOMEM;
+
+	rc = plpar_hcall(H_PKS_GEN_PASSWORD, retbuf, consumer, 0,
+			 virt_to_phys(password), maxpwsize);
+
+	if (!rc) {
+		ospasswordlength = maxpwsize;
+		ospassword = kzalloc(maxpwsize, GFP_KERNEL);
+		if (!ospassword) {
+			kfree(password);
+			return -ENOMEM;
+		}
+		memcpy(ospassword, password, ospasswordlength);
+	} else {
+		if (rc == H_IN_USE) {
+			pr_warn("Password already set - authenticated operations will fail\n");
+			rc = 0;
+		} else {
+			goto out;
+		}
+	}
+out:
+	kfree(password);
+
+	return pseries_status_to_err(rc);
+}
+
+static struct plpks_auth *construct_auth(u8 consumer)
+{
+	struct plpks_auth *auth;
+
+	if (consumer > PLPKS_OS_OWNER)
+		return ERR_PTR(-EINVAL);
+
+	// The auth structure must not cross a page boundary and must be
+	// 16 byte aligned. We align to the next largest power of 2
+	auth = kzalloc(roundup_pow_of_two(struct_size(auth, password, maxpwsize)), GFP_KERNEL);
+	if (!auth)
+		return ERR_PTR(-ENOMEM);
+
+	auth->version = 1;
+	auth->consumer = consumer;
+
+	if (consumer == PLPKS_FW_OWNER || consumer == PLPKS_BOOTLOADER_OWNER)
+		return auth;
+
+	memcpy(auth->password, ospassword, ospasswordlength);
+
+	auth->passwordlength = cpu_to_be16(ospasswordlength);
+
+	return auth;
+}
+
+/*
+ * Label is combination of label attributes + name.
+ * Label attributes are used internally by kernel and not exposed to the user.
+ */
+static struct label *construct_label(char *component, u8 varos, u8 *name,
+				     u16 namelen)
+{
+	struct label *label;
+	size_t slen = 0;
+
+	if (!name || namelen > PLPKS_MAX_NAME_SIZE)
+		return ERR_PTR(-EINVAL);
+
+	// Support NULL component for signed updates
+	if (component) {
+		slen = strlen(component);
+		if (slen > sizeof(label->attr.prefix))
+			return ERR_PTR(-EINVAL);
+	}
+
+	// The label structure must not cross a page boundary, so we align to the next power of 2
+	label = kzalloc(roundup_pow_of_two(sizeof(*label)), GFP_KERNEL);
+	if (!label)
+		return ERR_PTR(-ENOMEM);
+
+	if (component)
+		memcpy(&label->attr.prefix, component, slen);
+
+	label->attr.version = PLPKS_LABEL_VERSION;
+	label->attr.os = varos;
+	label->attr.length = PLPKS_MAX_LABEL_ATTR_SIZE;
+	memcpy(&label->name, name, namelen);
+
+	label->size = sizeof(struct label_attr) + namelen;
+
+	return label;
+}
+
+static int _plpks_get_config(void)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
+	struct config {
+		u8 version;
+		u8 flags;
+		__be16 rsvd0;
+		__be16 objoverhead;
+		__be16 maxpwsize;
+		__be16 maxobjlabelsize;
+		__be16 maxobjsize;
+		__be32 totalsize;
+		__be32 usedspace;
+		__be32 supportedpolicies;
+		__be32 maxlargeobjectsize;
+		__be64 signedupdatealgorithms;
+		u8 rsvd1[476];
+	} __packed * config;
+	size_t size;
+	int rc = 0;
+
+	size = sizeof(*config);
+
+	// Config struct must not cross a page boundary. So long as the struct
+	// size is a power of 2, this should be fine as alignment is guaranteed
+	config = kzalloc(size, GFP_KERNEL);
+	if (!config) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	rc = plpar_hcall(H_PKS_GET_CONFIG, retbuf, virt_to_phys(config), size);
+
+	if (rc != H_SUCCESS) {
+		rc = pseries_status_to_err(rc);
+		goto err;
+	}
+
+	version = config->version;
+	objoverhead = be16_to_cpu(config->objoverhead);
+	maxpwsize = be16_to_cpu(config->maxpwsize);
+	maxobjsize = be16_to_cpu(config->maxobjsize);
+	maxobjlabelsize = be16_to_cpu(config->maxobjlabelsize);
+	totalsize = be32_to_cpu(config->totalsize);
+	usedspace = be32_to_cpu(config->usedspace);
+	supportedpolicies = be32_to_cpu(config->supportedpolicies);
+	maxlargeobjectsize = be32_to_cpu(config->maxlargeobjectsize);
+	signedupdatealgorithms = be64_to_cpu(config->signedupdatealgorithms);
+
+	// Validate that the numbers we get back match the requirements of the spec
+	if (maxpwsize < 32) {
+		pr_err("Invalid Max Password Size received from hypervisor (%d < 32)\n", maxpwsize);
+		rc = -EIO;
+		goto err;
+	}
+
+	if (maxobjlabelsize < 255) {
+		pr_err("Invalid Max Object Label Size received from hypervisor (%d < 255)\n",
+		       maxobjlabelsize);
+		rc = -EIO;
+		goto err;
+	}
+
+	if (totalsize < 4096) {
+		pr_err("Invalid Total Size received from hypervisor (%d < 4096)\n", totalsize);
+		rc = -EIO;
+		goto err;
+	}
+
+	if (version >= 3 && maxlargeobjectsize >= 65536 && maxobjsize != 0xFFFF) {
+		pr_err("Invalid Max Object Size (0x%x != 0xFFFF)\n", maxobjsize);
+		rc = -EIO;
+		goto err;
+	}
+
+err:
+	kfree(config);
+	return rc;
+}
+
+u8 plpks_get_version(void)
+{
+	return version;
+}
+
+u16 plpks_get_objoverhead(void)
+{
+	return objoverhead;
+}
+
+u16 plpks_get_maxpwsize(void)
+{
+	return maxpwsize;
+}
+
+u16 plpks_get_maxobjectsize(void)
+{
+	return maxobjsize;
+}
+
+u16 plpks_get_maxobjectlabelsize(void)
+{
+	return maxobjlabelsize;
+}
+
+u32 plpks_get_totalsize(void)
+{
+	return totalsize;
+}
+
+u32 plpks_get_usedspace(void)
+{
+	// Unlike other config values, usedspace regularly changes as objects
+	// are updated, so we need to refresh.
+	int rc = _plpks_get_config();
+	if (rc) {
+		pr_err("Couldn't get config, rc: %d\n", rc);
+		return 0;
+	}
+	return usedspace;
+}
+
+u32 plpks_get_supportedpolicies(void)
+{
+	return supportedpolicies;
+}
+
+u32 plpks_get_maxlargeobjectsize(void)
+{
+	return maxlargeobjectsize;
+}
+
+u64 plpks_get_signedupdatealgorithms(void)
+{
+	return signedupdatealgorithms;
+}
+
+u16 plpks_get_passwordlen(void)
+{
+	return ospasswordlength;
+}
+
+bool plpks_is_available(void)
+{
+	int rc;
+
+	if (!firmware_has_feature(FW_FEATURE_PLPKS))
+		return false;
+
+	rc = _plpks_get_config();
+	if (rc)
+		return false;
+
+	return true;
+}
+
+static int plpks_confirm_object_flushed(struct label *label,
+					struct plpks_auth *auth)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
+	bool timed_out = true;
+	u64 timeout = 0;
+	u8 status;
+	int rc;
+
+	do {
+		rc = plpar_hcall(H_PKS_CONFIRM_OBJECT_FLUSHED, retbuf,
+				 virt_to_phys(auth), virt_to_phys(label),
+				 label->size);
+
+		status = retbuf[0];
+		if (rc) {
+			timed_out = false;
+			if (rc == H_NOT_FOUND && status == 1)
+				rc = 0;
+			break;
+		}
+
+		if (!rc && status == 1) {
+			timed_out = false;
+			break;
+		}
+
+		usleep_range(PLPKS_FLUSH_SLEEP,
+			     PLPKS_FLUSH_SLEEP + PLPKS_FLUSH_SLEEP_RANGE);
+		timeout = timeout + PLPKS_FLUSH_SLEEP;
+	} while (timeout < PLPKS_MAX_TIMEOUT);
+
+	if (timed_out)
+		return -ETIMEDOUT;
+
+	return pseries_status_to_err(rc);
+}
+
+int plpks_signed_update_var(struct plpks_var *var, u64 flags)
+{
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+	int rc;
+	struct label *label;
+	struct plpks_auth *auth;
+	u64 continuetoken = 0;
+	u64 timeout = 0;
+
+	if (!var->data || var->datalen <= 0 || var->namelen > PLPKS_MAX_NAME_SIZE)
+		return -EINVAL;
+
+	if (!(var->policy & PLPKS_SIGNEDUPDATE))
+		return -EINVAL;
+
+	// Signed updates need the component to be NULL.
+	if (var->component)
+		return -EINVAL;
+
+	auth = construct_auth(PLPKS_OS_OWNER);
+	if (IS_ERR(auth))
+		return PTR_ERR(auth);
+
+	label = construct_label(var->component, var->os, var->name, var->namelen);
+	if (IS_ERR(label)) {
+		rc = PTR_ERR(label);
+		goto out;
+	}
+
+	do {
+		rc = plpar_hcall9(H_PKS_SIGNED_UPDATE, retbuf,
+				  virt_to_phys(auth), virt_to_phys(label),
+				  label->size, var->policy, flags,
+				  virt_to_phys(var->data), var->datalen,
+				  continuetoken);
+
+		continuetoken = retbuf[0];
+		if (pseries_status_to_err(rc) == -EBUSY) {
+			int delay_ms = get_longbusy_msecs(rc);
+			mdelay(delay_ms);
+			timeout += delay_ms;
+		}
+		rc = pseries_status_to_err(rc);
+	} while (rc == -EBUSY && timeout < PLPKS_MAX_TIMEOUT);
+
+	if (!rc)
+		rc = plpks_confirm_object_flushed(label, auth);
+
+	kfree(label);
+out:
+	kfree(auth);
+
+	return rc;
+}
+
+int plpks_write_var(struct plpks_var var)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
+	struct plpks_auth *auth;
+	struct label *label;
+	int rc;
+
+	if (!var.component || !var.data || var.datalen <= 0 ||
+	    var.namelen > PLPKS_MAX_NAME_SIZE || var.datalen > PLPKS_MAX_DATA_SIZE)
+		return -EINVAL;
+
+	if (var.policy & PLPKS_SIGNEDUPDATE)
+		return -EINVAL;
+
+	auth = construct_auth(PLPKS_OS_OWNER);
+	if (IS_ERR(auth))
+		return PTR_ERR(auth);
+
+	label = construct_label(var.component, var.os, var.name, var.namelen);
+	if (IS_ERR(label)) {
+		rc = PTR_ERR(label);
+		goto out;
+	}
+
+	rc = plpar_hcall(H_PKS_WRITE_OBJECT, retbuf, virt_to_phys(auth),
+			 virt_to_phys(label), label->size, var.policy,
+			 virt_to_phys(var.data), var.datalen);
+
+	if (!rc)
+		rc = plpks_confirm_object_flushed(label, auth);
+
+	rc = pseries_status_to_err(rc);
+	kfree(label);
+out:
+	kfree(auth);
+
+	return rc;
+}
+
+int plpks_remove_var(char *component, u8 varos, struct plpks_var_name vname)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
+	struct plpks_auth *auth;
+	struct label *label;
+	int rc;
+
+	if (vname.namelen > PLPKS_MAX_NAME_SIZE)
+		return -EINVAL;
+
+	auth = construct_auth(PLPKS_OS_OWNER);
+	if (IS_ERR(auth))
+		return PTR_ERR(auth);
+
+	label = construct_label(component, varos, vname.name, vname.namelen);
+	if (IS_ERR(label)) {
+		rc = PTR_ERR(label);
+		goto out;
+	}
+
+	rc = plpar_hcall(H_PKS_REMOVE_OBJECT, retbuf, virt_to_phys(auth),
+			 virt_to_phys(label), label->size);
+
+	if (!rc)
+		rc = plpks_confirm_object_flushed(label, auth);
+
+	rc = pseries_status_to_err(rc);
+	kfree(label);
+out:
+	kfree(auth);
+
+	return rc;
+}
+
+static int plpks_read_var(u8 consumer, struct plpks_var *var)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
+	struct plpks_auth *auth;
+	struct label *label = NULL;
+	u8 *output;
+	int rc;
+
+	if (var->namelen > PLPKS_MAX_NAME_SIZE)
+		return -EINVAL;
+
+	auth = construct_auth(consumer);
+	if (IS_ERR(auth))
+		return PTR_ERR(auth);
+
+	if (consumer == PLPKS_OS_OWNER) {
+		label = construct_label(var->component, var->os, var->name,
+					var->namelen);
+		if (IS_ERR(label)) {
+			rc = PTR_ERR(label);
+			goto out_free_auth;
+		}
+	}
+
+	output = kzalloc(maxobjsize, GFP_KERNEL);
+	if (!output) {
+		rc = -ENOMEM;
+		goto out_free_label;
+	}
+
+	if (consumer == PLPKS_OS_OWNER)
+		rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth),
+				 virt_to_phys(label), label->size, virt_to_phys(output),
+				 maxobjsize);
+	else
+		rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth),
+				 virt_to_phys(var->name), var->namelen, virt_to_phys(output),
+				 maxobjsize);
+
+
+	if (rc != H_SUCCESS) {
+		rc = pseries_status_to_err(rc);
+		goto out_free_output;
+	}
+
+	if (!var->data || var->datalen > retbuf[0])
+		var->datalen = retbuf[0];
+
+	var->policy = retbuf[1];
+
+	if (var->data)
+		memcpy(var->data, output, var->datalen);
+
+	rc = 0;
+
+out_free_output:
+	kfree(output);
+out_free_label:
+	kfree(label);
+out_free_auth:
+	kfree(auth);
+
+	return rc;
+}
+
+int plpks_read_os_var(struct plpks_var *var)
+{
+	return plpks_read_var(PLPKS_OS_OWNER, var);
+}
+
+int plpks_read_fw_var(struct plpks_var *var)
+{
+	return plpks_read_var(PLPKS_FW_OWNER, var);
+}
+
+int plpks_read_bootloader_var(struct plpks_var *var)
+{
+	return plpks_read_var(PLPKS_BOOTLOADER_OWNER, var);
+}
+
+int plpks_populate_fdt(void *fdt)
+{
+	int chosen_offset = fdt_path_offset(fdt, "/chosen");
+
+	if (chosen_offset < 0) {
+		pr_err("Can't find chosen node: %s\n",
+		       fdt_strerror(chosen_offset));
+		return chosen_offset;
+	}
+
+	return fdt_setprop(fdt, chosen_offset, "ibm,plpks-pw", ospassword, ospasswordlength);
+}
+
+// Once a password is registered with the hypervisor it cannot be cleared without
+// rebooting the LPAR, so to keep using the PLPKS across kexec boots we need to
+// recover the previous password from the FDT.
+//
+// There are a few challenges here.  We don't want the password to be visible to
+// users, so we need to clear it from the FDT.  This has to be done in early boot.
+// Clearing it from the FDT would make the FDT's checksum invalid, so we have to
+// manually cause the checksum to be recalculated.
+void __init plpks_early_init_devtree(void)
+{
+	void *fdt = initial_boot_params;
+	int chosen_node = fdt_path_offset(fdt, "/chosen");
+	const u8 *password;
+	int len;
+
+	if (chosen_node < 0)
+		return;
+
+	password = fdt_getprop(fdt, chosen_node, "ibm,plpks-pw", &len);
+	if (len <= 0) {
+		pr_debug("Couldn't find ibm,plpks-pw node.\n");
+		return;
+	}
+
+	ospassword = memblock_alloc_raw(len, SMP_CACHE_BYTES);
+	if (!ospassword) {
+		pr_err("Error allocating memory for password.\n");
+		goto out;
+	}
+
+	memcpy(ospassword, password, len);
+	ospasswordlength = (u16)len;
+
+out:
+	fdt_nop_property(fdt, chosen_node, "ibm,plpks-pw");
+	// Since we've cleared the password, we must update the FDT checksum
+	early_init_dt_verify(fdt);
+}
+
+static __init int pseries_plpks_init(void)
+{
+	int rc;
+
+	if (!firmware_has_feature(FW_FEATURE_PLPKS))
+		return -ENODEV;
+
+	rc = _plpks_get_config();
+
+	if (rc) {
+		pr_err("POWER LPAR Platform KeyStore is not supported or enabled\n");
+		return rc;
+	}
+
+	rc = plpks_gen_password();
+	if (rc)
+		pr_err("Failed setting POWER LPAR Platform KeyStore Password\n");
+	else
+		pr_info("POWER LPAR Platform KeyStore initialized successfully\n");
+
+	return rc;
+}
+machine_arch_initcall(pseries, pseries_plpks_init);
diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c
new file mode 100644
index 0000000000..3c290b9ed0
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pmem.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Handles hot and cold plug of persistent memory regions on pseries.
+ */
+
+#define pr_fmt(fmt)     "pseries-pmem: " fmt
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/sched.h>	/* for idle_task_exit */
+#include <linux/sched/hotplug.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <asm/rtas.h>
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+#include <asm/vdso_datapage.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/topology.h>
+
+#include "pseries.h"
+
+static struct device_node *pmem_node;
+
+static ssize_t pmem_drc_add_node(u32 drc_index)
+{
+	struct device_node *dn;
+	int rc;
+
+	pr_debug("Attempting to add pmem node, drc index: %x\n", drc_index);
+
+	rc = dlpar_acquire_drc(drc_index);
+	if (rc) {
+		pr_err("Failed to acquire DRC, rc: %d, drc index: %x\n",
+			rc, drc_index);
+		return -EINVAL;
+	}
+
+	dn = dlpar_configure_connector(cpu_to_be32(drc_index), pmem_node);
+	if (!dn) {
+		pr_err("configure-connector failed for drc %x\n", drc_index);
+		dlpar_release_drc(drc_index);
+		return -EINVAL;
+	}
+
+	/* NB: The of reconfig notifier creates platform device from the node */
+	rc = dlpar_attach_node(dn, pmem_node);
+	if (rc) {
+		pr_err("Failed to attach node %pOF, rc: %d, drc index: %x\n",
+			dn, rc, drc_index);
+
+		if (dlpar_release_drc(drc_index))
+			dlpar_free_cc_nodes(dn);
+
+		return rc;
+	}
+
+	pr_info("Successfully added %pOF, drc index: %x\n", dn, drc_index);
+
+	return 0;
+}
+
+static ssize_t pmem_drc_remove_node(u32 drc_index)
+{
+	struct device_node *dn;
+	uint32_t index;
+	int rc;
+
+	for_each_child_of_node(pmem_node, dn) {
+		if (of_property_read_u32(dn, "ibm,my-drc-index", &index))
+			continue;
+		if (index == drc_index)
+			break;
+	}
+
+	if (!dn) {
+		pr_err("Attempting to remove unused DRC index %x\n", drc_index);
+		return -ENODEV;
+	}
+
+	pr_debug("Attempting to remove %pOF, drc index: %x\n", dn, drc_index);
+
+	/* * NB: tears down the ibm,pmemory device as a side-effect */
+	rc = dlpar_detach_node(dn);
+	if (rc)
+		return rc;
+
+	rc = dlpar_release_drc(drc_index);
+	if (rc) {
+		pr_err("Failed to release drc (%x) for CPU %pOFn, rc: %d\n",
+			drc_index, dn, rc);
+		dlpar_attach_node(dn, pmem_node);
+		return rc;
+	}
+
+	pr_info("Successfully removed PMEM with drc index: %x\n", drc_index);
+
+	return 0;
+}
+
+int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
+{
+	u32 drc_index;
+	int rc;
+
+	/* slim chance, but we might get a hotplug event while booting */
+	if (!pmem_node)
+		pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory");
+	if (!pmem_node) {
+		pr_err("Hotplug event for a pmem device, but none exists\n");
+		return -ENODEV;
+	}
+
+	if (hp_elog->id_type != PSERIES_HP_ELOG_ID_DRC_INDEX) {
+		pr_err("Unsupported hotplug event type %d\n",
+				hp_elog->id_type);
+		return -EINVAL;
+	}
+
+	drc_index = hp_elog->_drc_u.drc_index;
+
+	lock_device_hotplug();
+
+	if (hp_elog->action == PSERIES_HP_ELOG_ACTION_ADD) {
+		rc = pmem_drc_add_node(drc_index);
+	} else if (hp_elog->action == PSERIES_HP_ELOG_ACTION_REMOVE) {
+		rc = pmem_drc_remove_node(drc_index);
+	} else {
+		pr_err("Unsupported hotplug action (%d)\n", hp_elog->action);
+		rc = -EINVAL;
+	}
+
+	unlock_device_hotplug();
+	return rc;
+}
+
+static const struct of_device_id drc_pmem_match[] = {
+	{ .type = "ibm,persistent-memory", },
+	{}
+};
+
+static int pseries_pmem_init(void)
+{
+	/*
+	 * Only supported on POWER8 and above.
+	 */
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return 0;
+
+	pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory");
+	if (!pmem_node)
+		return 0;
+
+	/*
+	 * The generic OF bus probe/populate handles creating platform devices
+	 * from the child (ibm,pmemory) nodes. The generic code registers an of
+	 * reconfig notifier to handle the hot-add/remove cases too.
+	 */
+	of_platform_bus_probe(pmem_node, drc_pmem_match, NULL);
+
+	return 0;
+}
+machine_arch_initcall(pseries, pseries_pmem_init);
diff --git a/arch/powerpc/platforms/pseries/power.c b/arch/powerpc/platforms/pseries/power.c
new file mode 100644
index 0000000000..3676cb2977
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/power.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  Interface for power-management for ppc64 compliant platform
+ *
+ *  Manish Ahuja <mahuja@us.ibm.com>
+ *
+ *  Feb 2007
+ *
+ *  Copyright (C) 2007 IBM Corporation.
+ */
+
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <asm/machdep.h>
+
+#include "pseries.h"
+
+unsigned long rtas_poweron_auto; /* default and normal state is 0 */
+
+static ssize_t auto_poweron_show(struct kobject *kobj,
+				 struct kobj_attribute *attr, char *buf)
+{
+        return sprintf(buf, "%lu\n", rtas_poweron_auto);
+}
+
+static ssize_t auto_poweron_store(struct kobject *kobj,
+				  struct kobj_attribute *attr,
+				  const char *buf, size_t n)
+{
+	int ret;
+	unsigned long ups_restart;
+	ret = sscanf(buf, "%lu", &ups_restart);
+
+	if ((ret == 1) && ((ups_restart == 1) || (ups_restart == 0))){
+		rtas_poweron_auto = ups_restart;
+		return n;
+	}
+	return -EINVAL;
+}
+
+static struct kobj_attribute auto_poweron_attr =
+	__ATTR(auto_poweron, 0644, auto_poweron_show, auto_poweron_store);
+
+#ifndef CONFIG_PM
+struct kobject *power_kobj;
+
+static struct attribute *g[] = {
+        &auto_poweron_attr.attr,
+        NULL,
+};
+
+static const struct attribute_group attr_group = {
+        .attrs = g,
+};
+
+static int __init pm_init(void)
+{
+	power_kobj = kobject_create_and_add("power", NULL);
+	if (!power_kobj)
+		return -ENOMEM;
+	return sysfs_create_group(power_kobj, &attr_group);
+}
+machine_core_initcall(pseries, pm_init);
+#else
+static int __init apo_pm_init(void)
+{
+	return (sysfs_create_file(power_kobj, &auto_poweron_attr.attr));
+}
+machine_device_initcall(pseries, apo_pm_init);
+#endif
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
new file mode 100644
index 0000000000..8376f03f93
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2006 IBM Corporation.
+ */
+
+#ifndef _PSERIES_PSERIES_H
+#define _PSERIES_PSERIES_H
+
+#include <linux/interrupt.h>
+#include <asm/rtas.h>
+
+struct device_node;
+
+void __init request_event_sources_irqs(struct device_node *np,
+				       irq_handler_t handler, const char *name);
+
+#include <linux/of.h>
+
+struct pt_regs;
+
+extern int pSeries_system_reset_exception(struct pt_regs *regs);
+extern int pSeries_machine_check_exception(struct pt_regs *regs);
+extern long pseries_machine_check_realmode(struct pt_regs *regs);
+void pSeries_machine_check_log_err(void);
+
+#ifdef CONFIG_SMP
+extern void smp_init_pseries(void);
+
+/* Get state of physical CPU from query_cpu_stopped */
+int smp_query_cpu_stopped(unsigned int pcpu);
+#define QCSS_STOPPED 0
+#define QCSS_STOPPING 1
+#define QCSS_NOT_STOPPED 2
+#define QCSS_HARDWARE_ERROR -1
+#define QCSS_HARDWARE_BUSY -2
+#else
+static inline void smp_init_pseries(void) { }
+#endif
+
+extern void pseries_kexec_cpu_down(int crash_shutdown, int secondary);
+void pseries_machine_kexec(struct kimage *image);
+
+extern void pSeries_final_fixup(void);
+
+/* Poweron flag used for enabling auto ups restart */
+extern unsigned long rtas_poweron_auto;
+
+/* Dynamic logical Partitioning/Mobility */
+extern void dlpar_free_cc_nodes(struct device_node *);
+extern void dlpar_free_cc_property(struct property *);
+extern struct device_node *dlpar_configure_connector(__be32,
+						struct device_node *);
+extern int dlpar_attach_node(struct device_node *, struct device_node *);
+extern int dlpar_detach_node(struct device_node *);
+extern int dlpar_acquire_drc(u32 drc_index);
+extern int dlpar_release_drc(u32 drc_index);
+extern int dlpar_unisolate_drc(u32 drc_index);
+
+void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog);
+int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog);
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
+int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog);
+#else
+static inline int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
+{
+	return -EOPNOTSUPP;
+}
+static inline int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+int dlpar_cpu(struct pseries_hp_errorlog *hp_elog);
+void pseries_cpu_hotplug_init(void);
+#else
+static inline int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
+{
+	return -EOPNOTSUPP;
+}
+static inline void pseries_cpu_hotplug_init(void) { }
+#endif
+
+/* PCI root bridge prepare function override for pseries */
+struct pci_host_bridge;
+int pseries_root_bridge_prepare(struct pci_host_bridge *bridge);
+
+extern struct pci_controller_ops pseries_pci_controller_ops;
+int pseries_msi_allocate_domains(struct pci_controller *phb);
+void pseries_msi_free_domains(struct pci_controller *phb);
+
+extern int CMO_PrPSP;
+extern int CMO_SecPSP;
+extern unsigned long CMO_PageSize;
+
+static inline int cmo_get_primary_psp(void)
+{
+	return CMO_PrPSP;
+}
+
+static inline int cmo_get_secondary_psp(void)
+{
+	return CMO_SecPSP;
+}
+
+static inline unsigned long cmo_get_page_size(void)
+{
+	return CMO_PageSize;
+}
+
+int dlpar_workqueue_init(void);
+
+extern u32 pseries_security_flavor;
+void pseries_setup_security_mitigations(void);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+void pseries_lpar_read_hblkrm_characteristics(void);
+#else
+static inline void pseries_lpar_read_hblkrm_characteristics(void) { }
+#endif
+
+void pseries_rng_init(void);
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose,
+					     struct pci_dev *pdev);
+#endif
+
+#endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
new file mode 100644
index 0000000000..2c661b7982
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * POWER platform energy management driver
+ * Copyright (C) 2010 IBM Corporation
+ *
+ * This pseries platform device driver provides access to
+ * platform energy management capabilities.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <asm/cputhreads.h>
+#include <asm/page.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
+
+
+#define MODULE_VERS "1.0"
+#define MODULE_NAME "pseries_energy"
+
+/* Driver flags */
+
+static int sysfs_entries;
+
+/* Helper routines */
+
+/* Helper Routines to convert between drc_index to cpu numbers */
+
+static u32 cpu_to_drc_index(int cpu)
+{
+	struct device_node *dn = NULL;
+	struct property *info;
+	int thread_index;
+	int rc = 1;
+	u32 ret = 0;
+
+	dn = of_find_node_by_path("/cpus");
+	if (dn == NULL)
+		goto err;
+
+	/* Convert logical cpu number to core number */
+	thread_index = cpu_core_index_of_thread(cpu);
+
+	info = of_find_property(dn, "ibm,drc-info", NULL);
+	if (info) {
+		struct of_drc_info drc;
+		int j;
+		u32 num_set_entries;
+		const __be32 *value;
+
+		value = of_prop_next_u32(info, NULL, &num_set_entries);
+		if (!value)
+			goto err_of_node_put;
+		else
+			value++;
+
+		for (j = 0; j < num_set_entries; j++) {
+
+			of_read_drc_info_cell(&info, &value, &drc);
+			if (strncmp(drc.drc_type, "CPU", 3))
+				goto err;
+
+			if (thread_index < drc.last_drc_index)
+				break;
+		}
+
+		ret = drc.drc_index_start + (thread_index * drc.sequential_inc);
+	} else {
+		u32 nr_drc_indexes, thread_drc_index;
+
+		/*
+		 * The first element of ibm,drc-indexes array is the
+		 * number of drc_indexes returned in the list.  Hence
+		 * thread_index+1 will get the drc_index corresponding
+		 * to core number thread_index.
+		 */
+		rc = of_property_read_u32_index(dn, "ibm,drc-indexes",
+						0, &nr_drc_indexes);
+		if (rc)
+			goto err_of_node_put;
+
+		WARN_ON_ONCE(thread_index > nr_drc_indexes);
+		rc = of_property_read_u32_index(dn, "ibm,drc-indexes",
+						thread_index + 1,
+						&thread_drc_index);
+		if (rc)
+			goto err_of_node_put;
+
+		ret = thread_drc_index;
+	}
+
+	rc = 0;
+
+err_of_node_put:
+	of_node_put(dn);
+err:
+	if (rc)
+		printk(KERN_WARNING "cpu_to_drc_index(%d) failed", cpu);
+	return ret;
+}
+
+static int drc_index_to_cpu(u32 drc_index)
+{
+	struct device_node *dn = NULL;
+	struct property *info;
+	const int *indexes;
+	int thread_index = 0, cpu = 0;
+	int rc = 1;
+
+	dn = of_find_node_by_path("/cpus");
+	if (dn == NULL)
+		goto err;
+	info = of_find_property(dn, "ibm,drc-info", NULL);
+	if (info) {
+		struct of_drc_info drc;
+		int j;
+		u32 num_set_entries;
+		const __be32 *value;
+
+		value = of_prop_next_u32(info, NULL, &num_set_entries);
+		if (!value)
+			goto err_of_node_put;
+		else
+			value++;
+
+		for (j = 0; j < num_set_entries; j++) {
+
+			of_read_drc_info_cell(&info, &value, &drc);
+			if (strncmp(drc.drc_type, "CPU", 3))
+				goto err;
+
+			if (drc_index > drc.last_drc_index) {
+				cpu += drc.num_sequential_elems;
+				continue;
+			}
+			cpu += ((drc_index - drc.drc_index_start) /
+				drc.sequential_inc);
+
+			thread_index = cpu_first_thread_of_core(cpu);
+			rc = 0;
+			break;
+		}
+	} else {
+		unsigned long int i;
+
+		indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+		if (indexes == NULL)
+			goto err_of_node_put;
+		/*
+		 * First element in the array is the number of drc_indexes
+		 * returned.  Search through the list to find the matching
+		 * drc_index and get the core number
+		 */
+		for (i = 0; i < indexes[0]; i++) {
+			if (indexes[i + 1] == drc_index)
+				break;
+		}
+		/* Convert core number to logical cpu number */
+		thread_index = cpu_first_thread_of_core(i);
+		rc = 0;
+	}
+
+err_of_node_put:
+	of_node_put(dn);
+err:
+	if (rc)
+		printk(KERN_WARNING "drc_index_to_cpu(%d) failed", drc_index);
+	return thread_index;
+}
+
+/*
+ * pseries hypervisor call H_BEST_ENERGY provides hints to OS on
+ * preferred logical cpus to activate or deactivate for optimized
+ * energy consumption.
+ */
+
+#define FLAGS_MODE1	0x004E200000080E01UL
+#define FLAGS_MODE2	0x004E200000080401UL
+#define FLAGS_ACTIVATE  0x100
+
+static ssize_t get_best_energy_list(char *page, int activate)
+{
+	int rc, cnt, i, cpu;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+	unsigned long flags = 0;
+	u32 *buf_page;
+	char *s = page;
+
+	buf_page = (u32 *) get_zeroed_page(GFP_KERNEL);
+	if (!buf_page)
+		return -ENOMEM;
+
+	flags = FLAGS_MODE1;
+	if (activate)
+		flags |= FLAGS_ACTIVATE;
+
+	rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags, 0, __pa(buf_page),
+				0, 0, 0, 0, 0, 0);
+	if (rc != H_SUCCESS) {
+		free_page((unsigned long) buf_page);
+		return -EINVAL;
+	}
+
+	cnt = retbuf[0];
+	for (i = 0; i < cnt; i++) {
+		cpu = drc_index_to_cpu(buf_page[2*i+1]);
+		if ((cpu_online(cpu) && !activate) ||
+		    (!cpu_online(cpu) && activate))
+			s += sprintf(s, "%d,", cpu);
+	}
+	if (s > page) { /* Something to show */
+		s--; /* Suppress last comma */
+		s += sprintf(s, "\n");
+	}
+
+	free_page((unsigned long) buf_page);
+	return s-page;
+}
+
+static ssize_t get_best_energy_data(struct device *dev,
+					char *page, int activate)
+{
+	int rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+	unsigned long flags = 0;
+
+	flags = FLAGS_MODE2;
+	if (activate)
+		flags |= FLAGS_ACTIVATE;
+
+	rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags,
+				cpu_to_drc_index(dev->id),
+				0, 0, 0, 0, 0, 0, 0);
+
+	if (rc != H_SUCCESS)
+		return -EINVAL;
+
+	return sprintf(page, "%lu\n", retbuf[1] >> 32);
+}
+
+/* Wrapper functions */
+
+static ssize_t cpu_activate_hint_list_show(struct device *dev,
+			struct device_attribute *attr, char *page)
+{
+	return get_best_energy_list(page, 1);
+}
+
+static ssize_t cpu_deactivate_hint_list_show(struct device *dev,
+			struct device_attribute *attr, char *page)
+{
+	return get_best_energy_list(page, 0);
+}
+
+static ssize_t percpu_activate_hint_show(struct device *dev,
+			struct device_attribute *attr, char *page)
+{
+	return get_best_energy_data(dev, page, 1);
+}
+
+static ssize_t percpu_deactivate_hint_show(struct device *dev,
+			struct device_attribute *attr, char *page)
+{
+	return get_best_energy_data(dev, page, 0);
+}
+
+/*
+ * Create sysfs interface:
+ * /sys/devices/system/cpu/pseries_activate_hint_list
+ * /sys/devices/system/cpu/pseries_deactivate_hint_list
+ *	Comma separated list of cpus to activate or deactivate
+ * /sys/devices/system/cpu/cpuN/pseries_activate_hint
+ * /sys/devices/system/cpu/cpuN/pseries_deactivate_hint
+ *	Per-cpu value of the hint
+ */
+
+static struct device_attribute attr_cpu_activate_hint_list =
+		__ATTR(pseries_activate_hint_list, 0444,
+		cpu_activate_hint_list_show, NULL);
+
+static struct device_attribute attr_cpu_deactivate_hint_list =
+		__ATTR(pseries_deactivate_hint_list, 0444,
+		cpu_deactivate_hint_list_show, NULL);
+
+static struct device_attribute attr_percpu_activate_hint =
+		__ATTR(pseries_activate_hint, 0444,
+		percpu_activate_hint_show, NULL);
+
+static struct device_attribute attr_percpu_deactivate_hint =
+		__ATTR(pseries_deactivate_hint, 0444,
+		percpu_deactivate_hint_show, NULL);
+
+static int __init pseries_energy_init(void)
+{
+	int cpu, err;
+	struct device *cpu_dev, *dev_root;
+
+	if (!firmware_has_feature(FW_FEATURE_BEST_ENERGY))
+		return 0; /* H_BEST_ENERGY hcall not supported */
+
+	/* Create the sysfs files */
+	dev_root = bus_get_dev_root(&cpu_subsys);
+	if (dev_root) {
+		err = device_create_file(dev_root, &attr_cpu_activate_hint_list);
+		if (!err)
+			err = device_create_file(dev_root, &attr_cpu_deactivate_hint_list);
+		put_device(dev_root);
+		if (err)
+			return err;
+	}
+
+	for_each_possible_cpu(cpu) {
+		cpu_dev = get_cpu_device(cpu);
+		err = device_create_file(cpu_dev,
+				&attr_percpu_activate_hint);
+		if (err)
+			break;
+		err = device_create_file(cpu_dev,
+				&attr_percpu_deactivate_hint);
+		if (err)
+			break;
+	}
+
+	if (err)
+		return err;
+
+	sysfs_entries = 1; /* Removed entries on cleanup */
+	return 0;
+
+}
+
+static void __exit pseries_energy_cleanup(void)
+{
+	int cpu;
+	struct device *cpu_dev, *dev_root;
+
+	if (!sysfs_entries)
+		return;
+
+	/* Remove the sysfs files */
+	dev_root = bus_get_dev_root(&cpu_subsys);
+	if (dev_root) {
+		device_remove_file(dev_root, &attr_cpu_activate_hint_list);
+		device_remove_file(dev_root, &attr_cpu_deactivate_hint_list);
+		put_device(dev_root);
+	}
+
+	for_each_possible_cpu(cpu) {
+		cpu_dev = get_cpu_device(cpu);
+		sysfs_remove_file(&cpu_dev->kobj,
+				&attr_percpu_activate_hint.attr);
+		sysfs_remove_file(&cpu_dev->kobj,
+				&attr_percpu_deactivate_hint.attr);
+	}
+}
+
+module_init(pseries_energy_init);
+module_exit(pseries_energy_cleanup);
+MODULE_DESCRIPTION("Driver for pSeries platform energy management");
+MODULE_AUTHOR("Vaidyanathan Srinivasan");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
new file mode 100644
index 0000000000..adafd593d9
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -0,0 +1,882 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2001 Dave Engebretsen IBM Corporation
+ */
+
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/fs.h>
+#include <linux/reboot.h>
+#include <linux/irq_work.h>
+
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/firmware.h>
+#include <asm/mce.h>
+
+#include "pseries.h"
+
+static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
+static DEFINE_SPINLOCK(ras_log_buf_lock);
+
+static int ras_check_exception_token;
+
+#define EPOW_SENSOR_TOKEN	9
+#define EPOW_SENSOR_INDEX	0
+
+/* EPOW events counter variable */
+static int num_epow_events;
+
+static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id);
+static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
+static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
+
+/* RTAS pseries MCE errorlog section. */
+struct pseries_mc_errorlog {
+	__be32	fru_id;
+	__be32	proc_id;
+	u8	error_type;
+	/*
+	 * sub_err_type (1 byte). Bit fields depends on error_type
+	 *
+	 *   MSB0
+	 *   |
+	 *   V
+	 *   01234567
+	 *   XXXXXXXX
+	 *
+	 * For error_type == MC_ERROR_TYPE_UE
+	 *   XXXXXXXX
+	 *   X		1: Permanent or Transient UE.
+	 *    X		1: Effective address provided.
+	 *     X	1: Logical address provided.
+	 *      XX	2: Reserved.
+	 *        XXX	3: Type of UE error.
+	 *
+	 * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB
+	 *   XXXXXXXX
+	 *   X		1: Effective address provided.
+	 *    XXXXX	5: Reserved.
+	 *         XX	2: Type of SLB/ERAT/TLB error.
+	 *
+	 * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS
+	 *   XXXXXXXX
+	 *   X		1: Error causing address provided.
+	 *    XXX	3: Type of error.
+	 *       XXXX	4: Reserved.
+	 */
+	u8	sub_err_type;
+	u8	reserved_1[6];
+	__be64	effective_address;
+	__be64	logical_address;
+} __packed;
+
+/* RTAS pseries MCE error types */
+#define MC_ERROR_TYPE_UE		0x00
+#define MC_ERROR_TYPE_SLB		0x01
+#define MC_ERROR_TYPE_ERAT		0x02
+#define MC_ERROR_TYPE_UNKNOWN		0x03
+#define MC_ERROR_TYPE_TLB		0x04
+#define MC_ERROR_TYPE_D_CACHE		0x05
+#define MC_ERROR_TYPE_I_CACHE		0x07
+#define MC_ERROR_TYPE_CTRL_MEM_ACCESS	0x08
+
+/* RTAS pseries MCE error sub types */
+#define MC_ERROR_UE_INDETERMINATE		0
+#define MC_ERROR_UE_IFETCH			1
+#define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH	2
+#define MC_ERROR_UE_LOAD_STORE			3
+#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE	4
+
+#define UE_EFFECTIVE_ADDR_PROVIDED		0x40
+#define UE_LOGICAL_ADDR_PROVIDED		0x20
+#define MC_EFFECTIVE_ADDR_PROVIDED		0x80
+
+#define MC_ERROR_SLB_PARITY		0
+#define MC_ERROR_SLB_MULTIHIT		1
+#define MC_ERROR_SLB_INDETERMINATE	2
+
+#define MC_ERROR_ERAT_PARITY		1
+#define MC_ERROR_ERAT_MULTIHIT		2
+#define MC_ERROR_ERAT_INDETERMINATE	3
+
+#define MC_ERROR_TLB_PARITY		1
+#define MC_ERROR_TLB_MULTIHIT		2
+#define MC_ERROR_TLB_INDETERMINATE	3
+
+#define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK	0
+#define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS	1
+
+static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
+{
+	switch (mlog->error_type) {
+	case	MC_ERROR_TYPE_UE:
+		return (mlog->sub_err_type & 0x07);
+	case	MC_ERROR_TYPE_SLB:
+	case	MC_ERROR_TYPE_ERAT:
+	case	MC_ERROR_TYPE_TLB:
+		return (mlog->sub_err_type & 0x03);
+	case	MC_ERROR_TYPE_CTRL_MEM_ACCESS:
+		return (mlog->sub_err_type & 0x70) >> 4;
+	default:
+		return 0;
+	}
+}
+
+/*
+ * Enable the hotplug interrupt late because processing them may touch other
+ * devices or systems (e.g. hugepages) that have not been initialized at the
+ * subsys stage.
+ */
+static int __init init_ras_hotplug_IRQ(void)
+{
+	struct device_node *np;
+
+	/* Hotplug Events */
+	np = of_find_node_by_path("/event-sources/hot-plug-events");
+	if (np != NULL) {
+		if (dlpar_workqueue_init() == 0)
+			request_event_sources_irqs(np, ras_hotplug_interrupt,
+						   "RAS_HOTPLUG");
+		of_node_put(np);
+	}
+
+	return 0;
+}
+machine_late_initcall(pseries, init_ras_hotplug_IRQ);
+
+/*
+ * Initialize handlers for the set of interrupts caused by hardware errors
+ * and power system events.
+ */
+static int __init init_ras_IRQ(void)
+{
+	struct device_node *np;
+
+	ras_check_exception_token = rtas_function_token(RTAS_FN_CHECK_EXCEPTION);
+
+	/* Internal Errors */
+	np = of_find_node_by_path("/event-sources/internal-errors");
+	if (np != NULL) {
+		request_event_sources_irqs(np, ras_error_interrupt,
+					   "RAS_ERROR");
+		of_node_put(np);
+	}
+
+	/* EPOW Events */
+	np = of_find_node_by_path("/event-sources/epow-events");
+	if (np != NULL) {
+		request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW");
+		of_node_put(np);
+	}
+
+	return 0;
+}
+machine_subsys_initcall(pseries, init_ras_IRQ);
+
+#define EPOW_SHUTDOWN_NORMAL				1
+#define EPOW_SHUTDOWN_ON_UPS				2
+#define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS	3
+#define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH	4
+
+static void handle_system_shutdown(char event_modifier)
+{
+	switch (event_modifier) {
+	case EPOW_SHUTDOWN_NORMAL:
+		pr_emerg("Power off requested\n");
+		orderly_poweroff(true);
+		break;
+
+	case EPOW_SHUTDOWN_ON_UPS:
+		pr_emerg("Loss of system power detected. System is running on"
+			 " UPS/battery. Check RTAS error log for details\n");
+		break;
+
+	case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
+		pr_emerg("Loss of system critical functions detected. Check"
+			 " RTAS error log for details\n");
+		orderly_poweroff(true);
+		break;
+
+	case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
+		pr_emerg("High ambient temperature detected. Check RTAS"
+			 " error log for details\n");
+		orderly_poweroff(true);
+		break;
+
+	default:
+		pr_err("Unknown power/cooling shutdown event (modifier = %d)\n",
+			event_modifier);
+	}
+}
+
+struct epow_errorlog {
+	unsigned char sensor_value;
+	unsigned char event_modifier;
+	unsigned char extended_modifier;
+	unsigned char reserved;
+	unsigned char platform_reason;
+};
+
+#define EPOW_RESET			0
+#define EPOW_WARN_COOLING		1
+#define EPOW_WARN_POWER			2
+#define EPOW_SYSTEM_SHUTDOWN		3
+#define EPOW_SYSTEM_HALT		4
+#define EPOW_MAIN_ENCLOSURE		5
+#define EPOW_POWER_OFF			7
+
+static void rtas_parse_epow_errlog(struct rtas_error_log *log)
+{
+	struct pseries_errorlog *pseries_log;
+	struct epow_errorlog *epow_log;
+	char action_code;
+	char modifier;
+
+	pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
+	if (pseries_log == NULL)
+		return;
+
+	epow_log = (struct epow_errorlog *)pseries_log->data;
+	action_code = epow_log->sensor_value & 0xF;	/* bottom 4 bits */
+	modifier = epow_log->event_modifier & 0xF;	/* bottom 4 bits */
+
+	switch (action_code) {
+	case EPOW_RESET:
+		if (num_epow_events) {
+			pr_info("Non critical power/cooling issue cleared\n");
+			num_epow_events--;
+		}
+		break;
+
+	case EPOW_WARN_COOLING:
+		pr_info("Non-critical cooling issue detected. Check RTAS error"
+			" log for details\n");
+		break;
+
+	case EPOW_WARN_POWER:
+		pr_info("Non-critical power issue detected. Check RTAS error"
+			" log for details\n");
+		break;
+
+	case EPOW_SYSTEM_SHUTDOWN:
+		handle_system_shutdown(modifier);
+		break;
+
+	case EPOW_SYSTEM_HALT:
+		pr_emerg("Critical power/cooling issue detected. Check RTAS"
+			 " error log for details. Powering off.\n");
+		orderly_poweroff(true);
+		break;
+
+	case EPOW_MAIN_ENCLOSURE:
+	case EPOW_POWER_OFF:
+		pr_emerg("System about to lose power. Check RTAS error log "
+			 " for details. Powering off immediately.\n");
+		emergency_sync();
+		kernel_power_off();
+		break;
+
+	default:
+		pr_err("Unknown power/cooling event (action code  = %d)\n",
+			action_code);
+	}
+
+	/* Increment epow events counter variable */
+	if (action_code != EPOW_RESET)
+		num_epow_events++;
+}
+
+static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
+{
+	struct pseries_errorlog *pseries_log;
+	struct pseries_hp_errorlog *hp_elog;
+
+	spin_lock(&ras_log_buf_lock);
+
+	rtas_call(ras_check_exception_token, 6, 1, NULL,
+		  RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq),
+		  RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf),
+		  rtas_get_error_log_max());
+
+	pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf,
+					   PSERIES_ELOG_SECT_ID_HOTPLUG);
+	hp_elog = (struct pseries_hp_errorlog *)pseries_log->data;
+
+	/*
+	 * Since PCI hotplug is not currently supported on pseries, put PCI
+	 * hotplug events on the ras_log_buf to be handled by rtas_errd.
+	 */
+	if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
+	    hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
+	    hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
+		queue_hotplug_event(hp_elog);
+	else
+		log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
+
+	spin_unlock(&ras_log_buf_lock);
+	return IRQ_HANDLED;
+}
+
+/* Handle environmental and power warning (EPOW) interrupts. */
+static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
+{
+	int state;
+	int critical;
+
+	rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state);
+
+	if (state > 3)
+		critical = 1;		/* Time Critical */
+	else
+		critical = 0;
+
+	spin_lock(&ras_log_buf_lock);
+
+	rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT,
+		  virq_to_hw(irq), RTAS_EPOW_WARNING, critical, __pa(&ras_log_buf),
+		  rtas_get_error_log_max());
+
+	log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
+
+	rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
+
+	spin_unlock(&ras_log_buf_lock);
+	return IRQ_HANDLED;
+}
+
+/*
+ * Handle hardware error interrupts.
+ *
+ * RTAS check-exception is called to collect data on the exception.  If
+ * the error is deemed recoverable, we log a warning and return.
+ * For nonrecoverable errors, an error is logged and we stop all processing
+ * as quickly as possible in order to prevent propagation of the failure.
+ */
+static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
+{
+	struct rtas_error_log *rtas_elog;
+	int status;
+	int fatal;
+
+	spin_lock(&ras_log_buf_lock);
+
+	status = rtas_call(ras_check_exception_token, 6, 1, NULL,
+			   RTAS_VECTOR_EXTERNAL_INTERRUPT,
+			   virq_to_hw(irq),
+			   RTAS_INTERNAL_ERROR, 1 /* Time Critical */,
+			   __pa(&ras_log_buf),
+				rtas_get_error_log_max());
+
+	rtas_elog = (struct rtas_error_log *)ras_log_buf;
+
+	if (status == 0 &&
+	    rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC)
+		fatal = 1;
+	else
+		fatal = 0;
+
+	/* format and print the extended information */
+	log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
+
+	if (fatal) {
+		pr_emerg("Fatal hardware error detected. Check RTAS error"
+			 " log for details. Powering off immediately\n");
+		emergency_sync();
+		kernel_power_off();
+	} else {
+		pr_err("Recoverable hardware error detected\n");
+	}
+
+	spin_unlock(&ras_log_buf_lock);
+	return IRQ_HANDLED;
+}
+
+/*
+ * Some versions of FWNMI place the buffer inside the 4kB page starting at
+ * 0x7000. Other versions place it inside the rtas buffer. We check both.
+ * Minimum size of the buffer is 16 bytes.
+ */
+#define VALID_FWNMI_BUFFER(A) \
+	((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \
+	(((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16))))
+
+static inline struct rtas_error_log *fwnmi_get_errlog(void)
+{
+	return (struct rtas_error_log *)local_paca->mce_data_buf;
+}
+
+static __be64 *fwnmi_get_savep(struct pt_regs *regs)
+{
+	unsigned long savep_ra;
+
+	/* Mask top two bits */
+	savep_ra = regs->gpr[3] & ~(0x3UL << 62);
+	if (!VALID_FWNMI_BUFFER(savep_ra)) {
+		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
+		return NULL;
+	}
+
+	return __va(savep_ra);
+}
+
+/*
+ * Get the error information for errors coming through the
+ * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
+ * the actual r3 if possible, and a ptr to the error log entry
+ * will be returned if found.
+ *
+ * Use one buffer mce_data_buf per cpu to store RTAS error.
+ *
+ * The mce_data_buf does not have any locks or protection around it,
+ * if a second machine check comes in, or a system reset is done
+ * before we have logged the error, then we will get corruption in the
+ * error log.  This is preferable over holding off on calling
+ * ibm,nmi-interlock which would result in us checkstopping if a
+ * second machine check did come in.
+ */
+static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
+{
+	struct rtas_error_log *h;
+	__be64 *savep;
+
+	savep = fwnmi_get_savep(regs);
+	if (!savep)
+		return NULL;
+
+	regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
+
+	h = (struct rtas_error_log *)&savep[1];
+	/* Use the per cpu buffer from paca to store rtas error log */
+	memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
+	if (!rtas_error_extended(h)) {
+		memcpy(local_paca->mce_data_buf, h, sizeof(__u64));
+	} else {
+		int len, error_log_length;
+
+		error_log_length = 8 + rtas_error_extended_log_length(h);
+		len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
+		memcpy(local_paca->mce_data_buf, h, len);
+	}
+
+	return (struct rtas_error_log *)local_paca->mce_data_buf;
+}
+
+/* Call this when done with the data returned by FWNMI_get_errinfo.
+ * It will release the saved data area for other CPUs in the
+ * partition to receive FWNMI errors.
+ */
+static void fwnmi_release_errinfo(void)
+{
+	struct rtas_args rtas_args;
+	int ret;
+
+	/*
+	 * On pseries, the machine check stack is limited to under 4GB, so
+	 * args can be on-stack.
+	 */
+	rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL);
+	ret = be32_to_cpu(rtas_args.rets[0]);
+	if (ret != 0)
+		printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret);
+}
+
+int pSeries_system_reset_exception(struct pt_regs *regs)
+{
+#ifdef __LITTLE_ENDIAN__
+	/*
+	 * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try
+	 * to detect the bad SRR1 pattern here. Flip the NIP back to correct
+	 * endian for reporting purposes. Unfortunately the MSR can't be fixed,
+	 * so clear it. It will be missing MSR_RI so we won't try to recover.
+	 */
+	if ((be64_to_cpu(regs->msr) &
+			(MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
+			 MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
+		regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip));
+		regs_set_return_msr(regs, 0);
+	}
+#endif
+
+	if (fwnmi_active) {
+		__be64 *savep;
+
+		/*
+		 * Firmware (PowerVM and KVM) saves r3 to a save area like
+		 * machine check, which is not exactly what PAPR (2.9)
+		 * suggests but there is no way to detect otherwise, so this
+		 * is the interface now.
+		 *
+		 * System resets do not save any error log or require an
+		 * "ibm,nmi-interlock" rtas call to release.
+		 */
+
+		savep = fwnmi_get_savep(regs);
+		if (savep)
+			regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
+	}
+
+	if (smp_handle_nmi_ipi(regs))
+		return 1;
+
+	return 0; /* need to perform reset */
+}
+
+static int mce_handle_err_realmode(int disposition, u8 error_type)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (disposition == RTAS_DISP_NOT_RECOVERED) {
+		switch (error_type) {
+		case	MC_ERROR_TYPE_ERAT:
+			flush_erat();
+			disposition = RTAS_DISP_FULLY_RECOVERED;
+			break;
+		case	MC_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
+			/*
+			 * Store the old slb content in paca before flushing.
+			 * Print this when we go to virtual mode.
+			 * There are chances that we may hit MCE again if there
+			 * is a parity error on the SLB entry we trying to read
+			 * for saving. Hence limit the slb saving to single
+			 * level of recursion.
+			 */
+			if (local_paca->in_mce == 1)
+				slb_save_contents(local_paca->mce_faulty_slbs);
+			flush_and_reload_slb();
+			disposition = RTAS_DISP_FULLY_RECOVERED;
+#endif
+			break;
+		default:
+			break;
+		}
+	} else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
+		/* Platform corrected itself but could be degraded */
+		pr_err("MCE: limited recovery, system may be degraded\n");
+		disposition = RTAS_DISP_FULLY_RECOVERED;
+	}
+#endif
+	return disposition;
+}
+
+static int mce_handle_err_virtmode(struct pt_regs *regs,
+				   struct rtas_error_log *errp,
+				   struct pseries_mc_errorlog *mce_log,
+				   int disposition)
+{
+	struct mce_error_info mce_err = { 0 };
+	int initiator = rtas_error_initiator(errp);
+	int severity = rtas_error_severity(errp);
+	unsigned long eaddr = 0, paddr = 0;
+	u8 error_type, err_sub_type;
+
+	if (!mce_log)
+		goto out;
+
+	error_type = mce_log->error_type;
+	err_sub_type = rtas_mc_error_sub_type(mce_log);
+
+	if (initiator == RTAS_INITIATOR_UNKNOWN)
+		mce_err.initiator = MCE_INITIATOR_UNKNOWN;
+	else if (initiator == RTAS_INITIATOR_CPU)
+		mce_err.initiator = MCE_INITIATOR_CPU;
+	else if (initiator == RTAS_INITIATOR_PCI)
+		mce_err.initiator = MCE_INITIATOR_PCI;
+	else if (initiator == RTAS_INITIATOR_ISA)
+		mce_err.initiator = MCE_INITIATOR_ISA;
+	else if (initiator == RTAS_INITIATOR_MEMORY)
+		mce_err.initiator = MCE_INITIATOR_MEMORY;
+	else if (initiator == RTAS_INITIATOR_POWERMGM)
+		mce_err.initiator = MCE_INITIATOR_POWERMGM;
+	else
+		mce_err.initiator = MCE_INITIATOR_UNKNOWN;
+
+	if (severity == RTAS_SEVERITY_NO_ERROR)
+		mce_err.severity = MCE_SEV_NO_ERROR;
+	else if (severity == RTAS_SEVERITY_EVENT)
+		mce_err.severity = MCE_SEV_WARNING;
+	else if (severity == RTAS_SEVERITY_WARNING)
+		mce_err.severity = MCE_SEV_WARNING;
+	else if (severity == RTAS_SEVERITY_ERROR_SYNC)
+		mce_err.severity = MCE_SEV_SEVERE;
+	else if (severity == RTAS_SEVERITY_ERROR)
+		mce_err.severity = MCE_SEV_SEVERE;
+	else
+		mce_err.severity = MCE_SEV_FATAL;
+
+	if (severity <= RTAS_SEVERITY_ERROR_SYNC)
+		mce_err.sync_error = true;
+	else
+		mce_err.sync_error = false;
+
+	mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
+	mce_err.error_class = MCE_ECLASS_UNKNOWN;
+
+	switch (error_type) {
+	case MC_ERROR_TYPE_UE:
+		mce_err.error_type = MCE_ERROR_TYPE_UE;
+		mce_common_process_ue(regs, &mce_err);
+		if (mce_err.ignore_event)
+			disposition = RTAS_DISP_FULLY_RECOVERED;
+		switch (err_sub_type) {
+		case MC_ERROR_UE_IFETCH:
+			mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH;
+			break;
+		case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH:
+			mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
+			break;
+		case MC_ERROR_UE_LOAD_STORE:
+			mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
+			break;
+		case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE:
+			mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+			break;
+		case MC_ERROR_UE_INDETERMINATE:
+		default:
+			mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE;
+			break;
+		}
+		if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED)
+			eaddr = be64_to_cpu(mce_log->effective_address);
+
+		if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
+			paddr = be64_to_cpu(mce_log->logical_address);
+		} else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
+			unsigned long pfn;
+
+			pfn = addr_to_pfn(regs, eaddr);
+			if (pfn != ULONG_MAX)
+				paddr = pfn << PAGE_SHIFT;
+		}
+
+		break;
+	case MC_ERROR_TYPE_SLB:
+		mce_err.error_type = MCE_ERROR_TYPE_SLB;
+		switch (err_sub_type) {
+		case MC_ERROR_SLB_PARITY:
+			mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY;
+			break;
+		case MC_ERROR_SLB_MULTIHIT:
+			mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+			break;
+		case MC_ERROR_SLB_INDETERMINATE:
+		default:
+			mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
+			break;
+		}
+		if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
+			eaddr = be64_to_cpu(mce_log->effective_address);
+		break;
+	case MC_ERROR_TYPE_ERAT:
+		mce_err.error_type = MCE_ERROR_TYPE_ERAT;
+		switch (err_sub_type) {
+		case MC_ERROR_ERAT_PARITY:
+			mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY;
+			break;
+		case MC_ERROR_ERAT_MULTIHIT:
+			mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+			break;
+		case MC_ERROR_ERAT_INDETERMINATE:
+		default:
+			mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
+			break;
+		}
+		if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
+			eaddr = be64_to_cpu(mce_log->effective_address);
+		break;
+	case MC_ERROR_TYPE_TLB:
+		mce_err.error_type = MCE_ERROR_TYPE_TLB;
+		switch (err_sub_type) {
+		case MC_ERROR_TLB_PARITY:
+			mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY;
+			break;
+		case MC_ERROR_TLB_MULTIHIT:
+			mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+			break;
+		case MC_ERROR_TLB_INDETERMINATE:
+		default:
+			mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
+			break;
+		}
+		if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
+			eaddr = be64_to_cpu(mce_log->effective_address);
+		break;
+	case MC_ERROR_TYPE_D_CACHE:
+		mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
+		break;
+	case MC_ERROR_TYPE_I_CACHE:
+		mce_err.error_type = MCE_ERROR_TYPE_ICACHE;
+		break;
+	case MC_ERROR_TYPE_CTRL_MEM_ACCESS:
+		mce_err.error_type = MCE_ERROR_TYPE_RA;
+		switch (err_sub_type) {
+		case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK:
+			mce_err.u.ra_error_type =
+				MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
+			break;
+		case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS:
+			mce_err.u.ra_error_type =
+				MCE_RA_ERROR_LOAD_STORE_FOREIGN;
+			break;
+		}
+		if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
+			eaddr = be64_to_cpu(mce_log->effective_address);
+		break;
+	case MC_ERROR_TYPE_UNKNOWN:
+	default:
+		mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
+		break;
+	}
+out:
+	save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
+		       &mce_err, regs->nip, eaddr, paddr);
+	return disposition;
+}
+
+static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
+{
+	struct pseries_errorlog *pseries_log;
+	struct pseries_mc_errorlog *mce_log = NULL;
+	int disposition = rtas_error_disposition(errp);
+	u8 error_type;
+
+	if (!rtas_error_extended(errp))
+		goto out;
+
+	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
+	if (!pseries_log)
+		goto out;
+
+	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
+	error_type = mce_log->error_type;
+
+	disposition = mce_handle_err_realmode(disposition, error_type);
+out:
+	disposition = mce_handle_err_virtmode(regs, errp, mce_log,
+					      disposition);
+	return disposition;
+}
+
+/*
+ * Process MCE rtas errlog event.
+ */
+void pSeries_machine_check_log_err(void)
+{
+	struct rtas_error_log *err;
+
+	err = fwnmi_get_errlog();
+	log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);
+}
+
+/*
+ * See if we can recover from a machine check exception.
+ * This is only called on power4 (or above) and only via
+ * the Firmware Non-Maskable Interrupts (fwnmi) handler
+ * which provides the error analysis for us.
+ *
+ * Return 1 if corrected (or delivered a signal).
+ * Return 0 if there is nothing we can do.
+ */
+static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
+{
+	int recovered = 0;
+
+	if (regs_is_unrecoverable(regs)) {
+		/* If MSR_RI isn't set, we cannot recover */
+		pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
+		recovered = 0;
+	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
+		/* Platform corrected itself */
+		recovered = 1;
+	} else if (evt->severity == MCE_SEV_FATAL) {
+		/* Fatal machine check */
+		pr_err("Machine check interrupt is fatal\n");
+		recovered = 0;
+	}
+
+	if (!recovered && evt->sync_error) {
+		/*
+		 * Try to kill processes if we get a synchronous machine check
+		 * (e.g., one caused by execution of this instruction). This
+		 * will devolve into a panic if we try to kill init or are in
+		 * an interrupt etc.
+		 *
+		 * TODO: Queue up this address for hwpoisioning later.
+		 * TODO: This is not quite right for d-side machine
+		 *       checks ->nip is not necessarily the important
+		 *       address.
+		 */
+		if ((user_mode(regs))) {
+			_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+			recovered = 1;
+		} else if (die_will_crash()) {
+			/*
+			 * die() would kill the kernel, so better to go via
+			 * the platform reboot code that will log the
+			 * machine check.
+			 */
+			recovered = 0;
+		} else {
+			die_mce("Machine check", regs, SIGBUS);
+			recovered = 1;
+		}
+	}
+
+	return recovered;
+}
+
+/*
+ * Handle a machine check.
+ *
+ * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi)
+ * should be present.  If so the handler which called us tells us if the
+ * error was recovered (never true if RI=0).
+ *
+ * On hardware prior to Power 4 these exceptions were asynchronous which
+ * means we can't tell exactly where it occurred and so we can't recover.
+ */
+int pSeries_machine_check_exception(struct pt_regs *regs)
+{
+	struct machine_check_event evt;
+
+	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+		return 0;
+
+	/* Print things out */
+	if (evt.version != MCE_V1) {
+		pr_err("Machine Check Exception, Unknown event version %d !\n",
+		       evt.version);
+		return 0;
+	}
+	machine_check_print_event_info(&evt, user_mode(regs), false);
+
+	if (recover_mce(regs, &evt))
+		return 1;
+
+	return 0;
+}
+
+long pseries_machine_check_realmode(struct pt_regs *regs)
+{
+	struct rtas_error_log *errp;
+	int disposition;
+
+	if (fwnmi_active) {
+		errp = fwnmi_get_errinfo(regs);
+		/*
+		 * Call to fwnmi_release_errinfo() in real mode causes kernel
+		 * to panic. Hence we will call it as soon as we go into
+		 * virtual mode.
+		 */
+		disposition = mce_handle_error(regs, errp);
+
+		fwnmi_release_errinfo();
+
+		if (disposition == RTAS_DISP_FULLY_RECOVERED)
+			return 1;
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
new file mode 100644
index 0000000000..599bd2c785
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * pSeries_reconfig.c - support for dynamic reconfiguration (including PCI
+ * Hotplug and Dynamic Logical Partitioning on RPA platforms).
+ *
+ * Copyright (C) 2005 Nathan Lynch
+ * Copyright (C) 2005 IBM Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/notifier.h>
+#include <linux/proc_fs.h>
+#include <linux/security.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+
+#include <asm/machdep.h>
+#include <linux/uaccess.h>
+#include <asm/mmu.h>
+
+#include "of_helpers.h"
+
+static int pSeries_reconfig_add_node(const char *path, struct property *proplist)
+{
+	struct device_node *np;
+	int err = -ENOMEM;
+
+	np = kzalloc(sizeof(*np), GFP_KERNEL);
+	if (!np)
+		goto out_err;
+
+	np->full_name = kstrdup(kbasename(path), GFP_KERNEL);
+	if (!np->full_name)
+		goto out_err;
+
+	np->properties = proplist;
+	of_node_set_flag(np, OF_DYNAMIC);
+	of_node_init(np);
+
+	np->parent = pseries_of_derive_parent(path);
+	if (IS_ERR(np->parent)) {
+		err = PTR_ERR(np->parent);
+		goto out_err;
+	}
+
+	err = of_attach_node(np);
+	if (err) {
+		printk(KERN_ERR "Failed to add device node %s\n", path);
+		goto out_err;
+	}
+
+	of_node_put(np->parent);
+
+	return 0;
+
+out_err:
+	if (np) {
+		of_node_put(np->parent);
+		kfree(np->full_name);
+		kfree(np);
+	}
+	return err;
+}
+
+static int pSeries_reconfig_remove_node(struct device_node *np)
+{
+	struct device_node *parent, *child;
+
+	parent = of_get_parent(np);
+	if (!parent)
+		return -EINVAL;
+
+	if ((child = of_get_next_child(np, NULL))) {
+		of_node_put(child);
+		of_node_put(parent);
+		return -EBUSY;
+	}
+
+	of_detach_node(np);
+	of_node_put(parent);
+	return 0;
+}
+
+/*
+ * /proc/powerpc/ofdt - yucky binary interface for adding and removing
+ * OF device nodes.  Should be deprecated as soon as we get an
+ * in-kernel wrapper for the RTAS ibm,configure-connector call.
+ */
+
+static void release_prop_list(const struct property *prop)
+{
+	struct property *next;
+	for (; prop; prop = next) {
+		next = prop->next;
+		kfree(prop->name);
+		kfree(prop->value);
+		kfree(prop);
+	}
+
+}
+
+/**
+ * parse_next_property - process the next property from raw input buffer
+ * @buf: input buffer, must be nul-terminated
+ * @end: end of the input buffer + 1, for validation
+ * @name: return value; set to property name in buf
+ * @length: return value; set to length of value
+ * @value: return value; set to the property value in buf
+ *
+ * Note that the caller must make copies of the name and value returned,
+ * this function does no allocation or copying of the data.  Return value
+ * is set to the next name in buf, or NULL on error.
+ */
+static char * parse_next_property(char *buf, char *end, char **name, int *length,
+				  unsigned char **value)
+{
+	char *tmp;
+
+	*name = buf;
+
+	tmp = strchr(buf, ' ');
+	if (!tmp) {
+		printk(KERN_ERR "property parse failed in %s at line %d\n",
+		       __func__, __LINE__);
+		return NULL;
+	}
+	*tmp = '\0';
+
+	if (++tmp >= end) {
+		printk(KERN_ERR "property parse failed in %s at line %d\n",
+		       __func__, __LINE__);
+		return NULL;
+	}
+
+	/* now we're on the length */
+	*length = -1;
+	*length = simple_strtoul(tmp, &tmp, 10);
+	if (*length == -1) {
+		printk(KERN_ERR "property parse failed in %s at line %d\n",
+		       __func__, __LINE__);
+		return NULL;
+	}
+	if (*tmp != ' ' || ++tmp >= end) {
+		printk(KERN_ERR "property parse failed in %s at line %d\n",
+		       __func__, __LINE__);
+		return NULL;
+	}
+
+	/* now we're on the value */
+	*value = tmp;
+	tmp += *length;
+	if (tmp > end) {
+		printk(KERN_ERR "property parse failed in %s at line %d\n",
+		       __func__, __LINE__);
+		return NULL;
+	}
+	else if (tmp < end && *tmp != ' ' && *tmp != '\0') {
+		printk(KERN_ERR "property parse failed in %s at line %d\n",
+		       __func__, __LINE__);
+		return NULL;
+	}
+	tmp++;
+
+	/* and now we should be on the next name, or the end */
+	return tmp;
+}
+
+static struct property *new_property(const char *name, const int length,
+				     const unsigned char *value, struct property *last)
+{
+	struct property *new = kzalloc(sizeof(*new), GFP_KERNEL);
+
+	if (!new)
+		return NULL;
+
+	if (!(new->name = kstrdup(name, GFP_KERNEL)))
+		goto cleanup;
+	if (!(new->value = kmalloc(length + 1, GFP_KERNEL)))
+		goto cleanup;
+
+	memcpy(new->value, value, length);
+	*(((char *)new->value) + length) = 0;
+	new->length = length;
+	new->next = last;
+	return new;
+
+cleanup:
+	kfree(new->name);
+	kfree(new->value);
+	kfree(new);
+	return NULL;
+}
+
+static int do_add_node(char *buf, size_t bufsize)
+{
+	char *path, *end, *name;
+	struct device_node *np;
+	struct property *prop = NULL;
+	unsigned char* value;
+	int length, rv = 0;
+
+	end = buf + bufsize;
+	path = buf;
+	buf = strchr(buf, ' ');
+	if (!buf)
+		return -EINVAL;
+	*buf = '\0';
+	buf++;
+
+	if ((np = of_find_node_by_path(path))) {
+		of_node_put(np);
+		return -EINVAL;
+	}
+
+	/* rv = build_prop_list(tmp, bufsize - (tmp - buf), &proplist); */
+	while (buf < end &&
+	       (buf = parse_next_property(buf, end, &name, &length, &value))) {
+		struct property *last = prop;
+
+		prop = new_property(name, length, value, last);
+		if (!prop) {
+			rv = -ENOMEM;
+			prop = last;
+			goto out;
+		}
+	}
+	if (!buf) {
+		rv = -EINVAL;
+		goto out;
+	}
+
+	rv = pSeries_reconfig_add_node(path, prop);
+
+out:
+	if (rv)
+		release_prop_list(prop);
+	return rv;
+}
+
+static int do_remove_node(char *buf)
+{
+	struct device_node *node;
+	int rv = -ENODEV;
+
+	if ((node = of_find_node_by_path(buf)))
+		rv = pSeries_reconfig_remove_node(node);
+
+	of_node_put(node);
+	return rv;
+}
+
+static char *parse_node(char *buf, size_t bufsize, struct device_node **npp)
+{
+	char *handle_str;
+	phandle handle;
+	*npp = NULL;
+
+	handle_str = buf;
+
+	buf = strchr(buf, ' ');
+	if (!buf)
+		return NULL;
+	*buf = '\0';
+	buf++;
+
+	handle = simple_strtoul(handle_str, NULL, 0);
+
+	*npp = of_find_node_by_phandle(handle);
+	return buf;
+}
+
+static int do_add_property(char *buf, size_t bufsize)
+{
+	struct property *prop = NULL;
+	struct device_node *np;
+	unsigned char *value;
+	char *name, *end;
+	int length;
+	end = buf + bufsize;
+	buf = parse_node(buf, bufsize, &np);
+
+	if (!np)
+		return -ENODEV;
+
+	if (parse_next_property(buf, end, &name, &length, &value) == NULL)
+		return -EINVAL;
+
+	prop = new_property(name, length, value, NULL);
+	if (!prop)
+		return -ENOMEM;
+
+	of_add_property(np, prop);
+
+	return 0;
+}
+
+static int do_remove_property(char *buf, size_t bufsize)
+{
+	struct device_node *np;
+	char *tmp;
+	buf = parse_node(buf, bufsize, &np);
+
+	if (!np)
+		return -ENODEV;
+
+	tmp = strchr(buf,' ');
+	if (tmp)
+		*tmp = '\0';
+
+	if (strlen(buf) == 0)
+		return -EINVAL;
+
+	return of_remove_property(np, of_find_property(np, buf, NULL));
+}
+
+static int do_update_property(char *buf, size_t bufsize)
+{
+	struct device_node *np;
+	unsigned char *value;
+	char *name, *end, *next_prop;
+	int length;
+	struct property *newprop;
+	buf = parse_node(buf, bufsize, &np);
+	end = buf + bufsize;
+
+	if (!np)
+		return -ENODEV;
+
+	next_prop = parse_next_property(buf, end, &name, &length, &value);
+	if (!next_prop)
+		return -EINVAL;
+
+	if (!strlen(name))
+		return -ENODEV;
+
+	newprop = new_property(name, length, value, NULL);
+	if (!newprop)
+		return -ENOMEM;
+
+	if (!strcmp(name, "slb-size") || !strcmp(name, "ibm,slb-size"))
+		slb_set_size(*(int *)value);
+
+	return of_update_property(np, newprop);
+}
+
+/**
+ * ofdt_write - perform operations on the Open Firmware device tree
+ *
+ * @file: not used
+ * @buf: command and arguments
+ * @count: size of the command buffer
+ * @off: not used
+ *
+ * Operations supported at this time are addition and removal of
+ * whole nodes along with their properties.  Operations on individual
+ * properties are not implemented (yet).
+ */
+static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t count,
+			  loff_t *off)
+{
+	int rv;
+	char *kbuf;
+	char *tmp;
+
+	rv = security_locked_down(LOCKDOWN_DEVICE_TREE);
+	if (rv)
+		return rv;
+
+	kbuf = memdup_user_nul(buf, count);
+	if (IS_ERR(kbuf))
+		return PTR_ERR(kbuf);
+
+	tmp = strchr(kbuf, ' ');
+	if (!tmp) {
+		rv = -EINVAL;
+		goto out;
+	}
+	*tmp = '\0';
+	tmp++;
+
+	if (!strcmp(kbuf, "add_node"))
+		rv = do_add_node(tmp, count - (tmp - kbuf));
+	else if (!strcmp(kbuf, "remove_node"))
+		rv = do_remove_node(tmp);
+	else if (!strcmp(kbuf, "add_property"))
+		rv = do_add_property(tmp, count - (tmp - kbuf));
+	else if (!strcmp(kbuf, "remove_property"))
+		rv = do_remove_property(tmp, count - (tmp - kbuf));
+	else if (!strcmp(kbuf, "update_property"))
+		rv = do_update_property(tmp, count - (tmp - kbuf));
+	else
+		rv = -EINVAL;
+out:
+	kfree(kbuf);
+	return rv ? rv : count;
+}
+
+static const struct proc_ops ofdt_proc_ops = {
+	.proc_write	= ofdt_write,
+	.proc_lseek	= noop_llseek,
+};
+
+/* create /proc/powerpc/ofdt write-only by root */
+static int proc_ppc64_create_ofdt(void)
+{
+	struct proc_dir_entry *ent;
+
+	ent = proc_create("powerpc/ofdt", 0200, NULL, &ofdt_proc_ops);
+	if (ent)
+		proc_set_size(ent, 0);
+
+	return 0;
+}
+machine_device_initcall(pseries, proc_ppc64_create_ofdt);
diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c
new file mode 100644
index 0000000000..6ddfdeaace
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/rng.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ */
+
+#define pr_fmt(fmt)	"pseries-rng: " fmt
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <asm/archrandom.h>
+#include <asm/machdep.h>
+#include <asm/plpar_wrappers.h>
+#include "pseries.h"
+
+
+static int pseries_get_random_long(unsigned long *v)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	if (plpar_hcall(H_RANDOM, retbuf) == H_SUCCESS) {
+		*v = retbuf[0];
+		return 1;
+	}
+
+	return 0;
+}
+
+void __init pseries_rng_init(void)
+{
+	struct device_node *dn;
+
+	dn = of_find_compatible_node(NULL, NULL, "ibm,random");
+	if (!dn)
+		return;
+	ppc_md.get_random_seed = pseries_get_random_long;
+	of_node_put(dn);
+}
diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c
new file mode 100644
index 0000000000..b5853e9fcc
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/rtas-fadump.c
@@ -0,0 +1,557 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Firmware-Assisted Dump support on POWERVM platform.
+ *
+ * Copyright 2011, Mahesh Salgaonkar, IBM Corporation.
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "rtas fadump: " fmt
+
+#include <linux/string.h>
+#include <linux/memblock.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/crash_dump.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+
+#include <asm/page.h>
+#include <asm/rtas.h>
+#include <asm/fadump.h>
+#include <asm/fadump-internal.h>
+
+#include "rtas-fadump.h"
+
+static struct rtas_fadump_mem_struct fdm;
+static const struct rtas_fadump_mem_struct *fdm_active;
+
+static void rtas_fadump_update_config(struct fw_dump *fadump_conf,
+				      const struct rtas_fadump_mem_struct *fdm)
+{
+	fadump_conf->boot_mem_dest_addr =
+		be64_to_cpu(fdm->rmr_region.destination_address);
+
+	fadump_conf->fadumphdr_addr = (fadump_conf->boot_mem_dest_addr +
+				       fadump_conf->boot_memory_size);
+}
+
+/*
+ * This function is called in the capture kernel to get configuration details
+ * setup in the first kernel and passed to the f/w.
+ */
+static void __init rtas_fadump_get_config(struct fw_dump *fadump_conf,
+				   const struct rtas_fadump_mem_struct *fdm)
+{
+	fadump_conf->boot_mem_addr[0] =
+		be64_to_cpu(fdm->rmr_region.source_address);
+	fadump_conf->boot_mem_sz[0] = be64_to_cpu(fdm->rmr_region.source_len);
+	fadump_conf->boot_memory_size = fadump_conf->boot_mem_sz[0];
+
+	fadump_conf->boot_mem_top = fadump_conf->boot_memory_size;
+	fadump_conf->boot_mem_regs_cnt = 1;
+
+	/*
+	 * Start address of reserve dump area (permanent reservation) for
+	 * re-registering FADump after dump capture.
+	 */
+	fadump_conf->reserve_dump_area_start =
+		be64_to_cpu(fdm->cpu_state_data.destination_address);
+
+	rtas_fadump_update_config(fadump_conf, fdm);
+}
+
+static u64 rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf)
+{
+	u64 addr = fadump_conf->reserve_dump_area_start;
+
+	memset(&fdm, 0, sizeof(struct rtas_fadump_mem_struct));
+	addr = addr & PAGE_MASK;
+
+	fdm.header.dump_format_version = cpu_to_be32(0x00000001);
+	fdm.header.dump_num_sections = cpu_to_be16(3);
+	fdm.header.dump_status_flag = 0;
+	fdm.header.offset_first_dump_section =
+		cpu_to_be32((u32)offsetof(struct rtas_fadump_mem_struct,
+					  cpu_state_data));
+
+	/*
+	 * Fields for disk dump option.
+	 * We are not using disk dump option, hence set these fields to 0.
+	 */
+	fdm.header.dd_block_size = 0;
+	fdm.header.dd_block_offset = 0;
+	fdm.header.dd_num_blocks = 0;
+	fdm.header.dd_offset_disk_path = 0;
+
+	/* set 0 to disable an automatic dump-reboot. */
+	fdm.header.max_time_auto = 0;
+
+	/* Kernel dump sections */
+	/* cpu state data section. */
+	fdm.cpu_state_data.request_flag =
+		cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+	fdm.cpu_state_data.source_data_type =
+		cpu_to_be16(RTAS_FADUMP_CPU_STATE_DATA);
+	fdm.cpu_state_data.source_address = 0;
+	fdm.cpu_state_data.source_len =
+		cpu_to_be64(fadump_conf->cpu_state_data_size);
+	fdm.cpu_state_data.destination_address = cpu_to_be64(addr);
+	addr += fadump_conf->cpu_state_data_size;
+
+	/* hpte region section */
+	fdm.hpte_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+	fdm.hpte_region.source_data_type =
+		cpu_to_be16(RTAS_FADUMP_HPTE_REGION);
+	fdm.hpte_region.source_address = 0;
+	fdm.hpte_region.source_len =
+		cpu_to_be64(fadump_conf->hpte_region_size);
+	fdm.hpte_region.destination_address = cpu_to_be64(addr);
+	addr += fadump_conf->hpte_region_size;
+
+	/*
+	 * Align boot memory area destination address to page boundary to
+	 * be able to mmap read this area in the vmcore.
+	 */
+	addr = PAGE_ALIGN(addr);
+
+	/* RMA region section */
+	fdm.rmr_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+	fdm.rmr_region.source_data_type =
+		cpu_to_be16(RTAS_FADUMP_REAL_MODE_REGION);
+	fdm.rmr_region.source_address = cpu_to_be64(0);
+	fdm.rmr_region.source_len = cpu_to_be64(fadump_conf->boot_memory_size);
+	fdm.rmr_region.destination_address = cpu_to_be64(addr);
+	addr += fadump_conf->boot_memory_size;
+
+	rtas_fadump_update_config(fadump_conf, &fdm);
+
+	return addr;
+}
+
+static u64 rtas_fadump_get_bootmem_min(void)
+{
+	return RTAS_FADUMP_MIN_BOOT_MEM;
+}
+
+static int rtas_fadump_register(struct fw_dump *fadump_conf)
+{
+	unsigned int wait_time;
+	int rc, err = -EIO;
+
+	/* TODO: Add upper time limit for the delay */
+	do {
+		rc =  rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
+				NULL, FADUMP_REGISTER, &fdm,
+				sizeof(struct rtas_fadump_mem_struct));
+
+		wait_time = rtas_busy_delay_time(rc);
+		if (wait_time)
+			mdelay(wait_time);
+
+	} while (wait_time);
+
+	switch (rc) {
+	case 0:
+		pr_info("Registration is successful!\n");
+		fadump_conf->dump_registered = 1;
+		err = 0;
+		break;
+	case -1:
+		pr_err("Failed to register. Hardware Error(%d).\n", rc);
+		break;
+	case -3:
+		if (!is_fadump_boot_mem_contiguous())
+			pr_err("Can't have holes in boot memory area.\n");
+		else if (!is_fadump_reserved_mem_contiguous())
+			pr_err("Can't have holes in reserved memory area.\n");
+
+		pr_err("Failed to register. Parameter Error(%d).\n", rc);
+		err = -EINVAL;
+		break;
+	case -9:
+		pr_err("Already registered!\n");
+		fadump_conf->dump_registered = 1;
+		err = -EEXIST;
+		break;
+	default:
+		pr_err("Failed to register. Unknown Error(%d).\n", rc);
+		break;
+	}
+
+	return err;
+}
+
+static int rtas_fadump_unregister(struct fw_dump *fadump_conf)
+{
+	unsigned int wait_time;
+	int rc;
+
+	/* TODO: Add upper time limit for the delay */
+	do {
+		rc =  rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
+				NULL, FADUMP_UNREGISTER, &fdm,
+				sizeof(struct rtas_fadump_mem_struct));
+
+		wait_time = rtas_busy_delay_time(rc);
+		if (wait_time)
+			mdelay(wait_time);
+	} while (wait_time);
+
+	if (rc) {
+		pr_err("Failed to un-register - unexpected error(%d).\n", rc);
+		return -EIO;
+	}
+
+	fadump_conf->dump_registered = 0;
+	return 0;
+}
+
+static int rtas_fadump_invalidate(struct fw_dump *fadump_conf)
+{
+	unsigned int wait_time;
+	int rc;
+
+	/* TODO: Add upper time limit for the delay */
+	do {
+		rc =  rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
+				NULL, FADUMP_INVALIDATE, fdm_active,
+				sizeof(struct rtas_fadump_mem_struct));
+
+		wait_time = rtas_busy_delay_time(rc);
+		if (wait_time)
+			mdelay(wait_time);
+	} while (wait_time);
+
+	if (rc) {
+		pr_err("Failed to invalidate - unexpected error (%d).\n", rc);
+		return -EIO;
+	}
+
+	fadump_conf->dump_active = 0;
+	fdm_active = NULL;
+	return 0;
+}
+
+#define RTAS_FADUMP_GPR_MASK		0xffffff0000000000
+static inline int rtas_fadump_gpr_index(u64 id)
+{
+	char str[3];
+	int i = -1;
+
+	if ((id & RTAS_FADUMP_GPR_MASK) == fadump_str_to_u64("GPR")) {
+		/* get the digits at the end */
+		id &= ~RTAS_FADUMP_GPR_MASK;
+		id >>= 24;
+		str[2] = '\0';
+		str[1] = id & 0xff;
+		str[0] = (id >> 8) & 0xff;
+		if (kstrtoint(str, 10, &i))
+			i = -EINVAL;
+		if (i > 31)
+			i = -1;
+	}
+	return i;
+}
+
+static void __init rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val)
+{
+	int i;
+
+	i = rtas_fadump_gpr_index(reg_id);
+	if (i >= 0)
+		regs->gpr[i] = (unsigned long)reg_val;
+	else if (reg_id == fadump_str_to_u64("NIA"))
+		regs->nip = (unsigned long)reg_val;
+	else if (reg_id == fadump_str_to_u64("MSR"))
+		regs->msr = (unsigned long)reg_val;
+	else if (reg_id == fadump_str_to_u64("CTR"))
+		regs->ctr = (unsigned long)reg_val;
+	else if (reg_id == fadump_str_to_u64("LR"))
+		regs->link = (unsigned long)reg_val;
+	else if (reg_id == fadump_str_to_u64("XER"))
+		regs->xer = (unsigned long)reg_val;
+	else if (reg_id == fadump_str_to_u64("CR"))
+		regs->ccr = (unsigned long)reg_val;
+	else if (reg_id == fadump_str_to_u64("DAR"))
+		regs->dar = (unsigned long)reg_val;
+	else if (reg_id == fadump_str_to_u64("DSISR"))
+		regs->dsisr = (unsigned long)reg_val;
+}
+
+static struct rtas_fadump_reg_entry* __init
+rtas_fadump_read_regs(struct rtas_fadump_reg_entry *reg_entry,
+		      struct pt_regs *regs)
+{
+	memset(regs, 0, sizeof(struct pt_regs));
+
+	while (be64_to_cpu(reg_entry->reg_id) != fadump_str_to_u64("CPUEND")) {
+		rtas_fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id),
+				       be64_to_cpu(reg_entry->reg_value));
+		reg_entry++;
+	}
+	reg_entry++;
+	return reg_entry;
+}
+
+/*
+ * Read CPU state dump data and convert it into ELF notes.
+ * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
+ * used to access the data to allow for additional fields to be added without
+ * affecting compatibility. Each list of registers for a CPU starts with
+ * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
+ * 8 Byte ASCII identifier and 8 Byte register value. The register entry
+ * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
+ * of register value. For more details refer to PAPR document.
+ *
+ * Only for the crashing cpu we ignore the CPU dump data and get exact
+ * state from fadump crash info structure populated by first kernel at the
+ * time of crash.
+ */
+static int __init rtas_fadump_build_cpu_notes(struct fw_dump *fadump_conf)
+{
+	struct rtas_fadump_reg_save_area_header *reg_header;
+	struct fadump_crash_info_header *fdh = NULL;
+	struct rtas_fadump_reg_entry *reg_entry;
+	u32 num_cpus, *note_buf;
+	int i, rc = 0, cpu = 0;
+	struct pt_regs regs;
+	unsigned long addr;
+	void *vaddr;
+
+	addr = be64_to_cpu(fdm_active->cpu_state_data.destination_address);
+	vaddr = __va(addr);
+
+	reg_header = vaddr;
+	if (be64_to_cpu(reg_header->magic_number) !=
+	    fadump_str_to_u64("REGSAVE")) {
+		pr_err("Unable to read register save area.\n");
+		return -ENOENT;
+	}
+
+	pr_debug("--------CPU State Data------------\n");
+	pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number));
+	pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset));
+
+	vaddr += be32_to_cpu(reg_header->num_cpu_offset);
+	num_cpus = be32_to_cpu(*((__be32 *)(vaddr)));
+	pr_debug("NumCpus     : %u\n", num_cpus);
+	vaddr += sizeof(u32);
+	reg_entry = (struct rtas_fadump_reg_entry *)vaddr;
+
+	rc = fadump_setup_cpu_notes_buf(num_cpus);
+	if (rc != 0)
+		return rc;
+
+	note_buf = (u32 *)fadump_conf->cpu_notes_buf_vaddr;
+
+	if (fadump_conf->fadumphdr_addr)
+		fdh = __va(fadump_conf->fadumphdr_addr);
+
+	for (i = 0; i < num_cpus; i++) {
+		if (be64_to_cpu(reg_entry->reg_id) !=
+		    fadump_str_to_u64("CPUSTRT")) {
+			pr_err("Unable to read CPU state data\n");
+			rc = -ENOENT;
+			goto error_out;
+		}
+		/* Lower 4 bytes of reg_value contains logical cpu id */
+		cpu = (be64_to_cpu(reg_entry->reg_value) &
+		       RTAS_FADUMP_CPU_ID_MASK);
+		if (fdh && !cpumask_test_cpu(cpu, &fdh->cpu_mask)) {
+			RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry);
+			continue;
+		}
+		pr_debug("Reading register data for cpu %d...\n", cpu);
+		if (fdh && fdh->crashing_cpu == cpu) {
+			regs = fdh->regs;
+			note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+			RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry);
+		} else {
+			reg_entry++;
+			reg_entry = rtas_fadump_read_regs(reg_entry, &regs);
+			note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+		}
+	}
+	final_note(note_buf);
+
+	if (fdh) {
+		pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+			 fdh->elfcorehdr_addr);
+		fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr));
+	}
+	return 0;
+
+error_out:
+	fadump_free_cpu_notes_buf();
+	return rc;
+
+}
+
+/*
+ * Validate and process the dump data stored by firmware before exporting
+ * it through '/proc/vmcore'.
+ */
+static int __init rtas_fadump_process(struct fw_dump *fadump_conf)
+{
+	struct fadump_crash_info_header *fdh;
+	int rc = 0;
+
+	if (!fdm_active || !fadump_conf->fadumphdr_addr)
+		return -EINVAL;
+
+	/* Check if the dump data is valid. */
+	if ((be16_to_cpu(fdm_active->header.dump_status_flag) ==
+			RTAS_FADUMP_ERROR_FLAG) ||
+			(fdm_active->cpu_state_data.error_flags != 0) ||
+			(fdm_active->rmr_region.error_flags != 0)) {
+		pr_err("Dump taken by platform is not valid\n");
+		return -EINVAL;
+	}
+	if ((fdm_active->rmr_region.bytes_dumped !=
+			fdm_active->rmr_region.source_len) ||
+			!fdm_active->cpu_state_data.bytes_dumped) {
+		pr_err("Dump taken by platform is incomplete\n");
+		return -EINVAL;
+	}
+
+	/* Validate the fadump crash info header */
+	fdh = __va(fadump_conf->fadumphdr_addr);
+	if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+		pr_err("Crash info header is not valid.\n");
+		return -EINVAL;
+	}
+
+	rc = rtas_fadump_build_cpu_notes(fadump_conf);
+	if (rc)
+		return rc;
+
+	/*
+	 * We are done validating dump info and elfcore header is now ready
+	 * to be exported. set elfcorehdr_addr so that vmcore module will
+	 * export the elfcore header through '/proc/vmcore'.
+	 */
+	elfcorehdr_addr = fdh->elfcorehdr_addr;
+
+	return 0;
+}
+
+static void rtas_fadump_region_show(struct fw_dump *fadump_conf,
+				    struct seq_file *m)
+{
+	const struct rtas_fadump_section *cpu_data_section;
+	const struct rtas_fadump_mem_struct *fdm_ptr;
+
+	if (fdm_active)
+		fdm_ptr = fdm_active;
+	else
+		fdm_ptr = &fdm;
+
+	cpu_data_section = &(fdm_ptr->cpu_state_data);
+	seq_printf(m, "CPU :[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
+		   be64_to_cpu(cpu_data_section->destination_address),
+		   be64_to_cpu(cpu_data_section->destination_address) +
+		   be64_to_cpu(cpu_data_section->source_len) - 1,
+		   be64_to_cpu(cpu_data_section->source_len),
+		   be64_to_cpu(cpu_data_section->bytes_dumped));
+
+	seq_printf(m, "HPTE:[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
+		   be64_to_cpu(fdm_ptr->hpte_region.destination_address),
+		   be64_to_cpu(fdm_ptr->hpte_region.destination_address) +
+		   be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1,
+		   be64_to_cpu(fdm_ptr->hpte_region.source_len),
+		   be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped));
+
+	seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ",
+		   be64_to_cpu(fdm_ptr->rmr_region.source_address),
+		   be64_to_cpu(fdm_ptr->rmr_region.destination_address));
+	seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n",
+		   be64_to_cpu(fdm_ptr->rmr_region.source_len),
+		   be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped));
+
+	/* Dump is active. Show preserved area start address. */
+	if (fdm_active) {
+		seq_printf(m, "\nMemory above %#016llx is reserved for saving crash dump\n",
+			   fadump_conf->boot_mem_top);
+	}
+}
+
+static void rtas_fadump_trigger(struct fadump_crash_info_header *fdh,
+				const char *msg)
+{
+	/* Call ibm,os-term rtas call to trigger firmware assisted dump */
+	rtas_os_term((char *)msg);
+}
+
+static struct fadump_ops rtas_fadump_ops = {
+	.fadump_init_mem_struct		= rtas_fadump_init_mem_struct,
+	.fadump_get_bootmem_min		= rtas_fadump_get_bootmem_min,
+	.fadump_register		= rtas_fadump_register,
+	.fadump_unregister		= rtas_fadump_unregister,
+	.fadump_invalidate		= rtas_fadump_invalidate,
+	.fadump_process			= rtas_fadump_process,
+	.fadump_region_show		= rtas_fadump_region_show,
+	.fadump_trigger			= rtas_fadump_trigger,
+};
+
+void __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
+{
+	int i, size, num_sections;
+	const __be32 *sections;
+	const __be32 *token;
+
+	/*
+	 * Check if Firmware Assisted dump is supported. if yes, check
+	 * if dump has been initiated on last reboot.
+	 */
+	token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
+	if (!token)
+		return;
+
+	fadump_conf->ibm_configure_kernel_dump = be32_to_cpu(*token);
+	fadump_conf->ops		= &rtas_fadump_ops;
+	fadump_conf->fadump_supported	= 1;
+
+	/* Firmware supports 64-bit value for size, align it to pagesize. */
+	fadump_conf->max_copy_size = ALIGN_DOWN(U64_MAX, PAGE_SIZE);
+
+	/*
+	 * The 'ibm,kernel-dump' rtas node is present only if there is
+	 * dump data waiting for us.
+	 */
+	fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
+	if (fdm_active) {
+		pr_info("Firmware-assisted dump is active.\n");
+		fadump_conf->dump_active = 1;
+		rtas_fadump_get_config(fadump_conf, (void *)__pa(fdm_active));
+	}
+
+	/* Get the sizes required to store dump data for the firmware provided
+	 * dump sections.
+	 * For each dump section type supported, a 32bit cell which defines
+	 * the ID of a supported section followed by two 32 bit cells which
+	 * gives the size of the section in bytes.
+	 */
+	sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
+					&size);
+
+	if (!sections)
+		return;
+
+	num_sections = size / (3 * sizeof(u32));
+
+	for (i = 0; i < num_sections; i++, sections += 3) {
+		u32 type = (u32)of_read_number(sections, 1);
+
+		switch (type) {
+		case RTAS_FADUMP_CPU_STATE_DATA:
+			fadump_conf->cpu_state_data_size =
+					of_read_ulong(&sections[1], 2);
+			break;
+		case RTAS_FADUMP_HPTE_REGION:
+			fadump_conf->hpte_region_size =
+					of_read_ulong(&sections[1], 2);
+			break;
+		}
+	}
+}
diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.h b/arch/powerpc/platforms/pseries/rtas-fadump.h
new file mode 100644
index 0000000000..fd59bd7ca9
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/rtas-fadump.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Firmware-Assisted Dump support on POWERVM platform.
+ *
+ * Copyright 2011, Mahesh Salgaonkar, IBM Corporation.
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#ifndef _PSERIES_RTAS_FADUMP_H
+#define _PSERIES_RTAS_FADUMP_H
+
+/*
+ * On some Power systems where RMO is 128MB, it still requires minimum of
+ * 256MB for kernel to boot successfully. When kdump infrastructure is
+ * configured to save vmcore over network, we run into OOM issue while
+ * loading modules related to network setup. Hence we need additional 64M
+ * of memory to avoid OOM issue.
+ */
+#define RTAS_FADUMP_MIN_BOOT_MEM	((0x1UL << 28) + (0x1UL << 26))
+
+/* Firmware provided dump sections */
+#define RTAS_FADUMP_CPU_STATE_DATA	0x0001
+#define RTAS_FADUMP_HPTE_REGION		0x0002
+#define RTAS_FADUMP_REAL_MODE_REGION	0x0011
+
+/* Dump request flag */
+#define RTAS_FADUMP_REQUEST_FLAG	0x00000001
+
+/* Dump status flag */
+#define RTAS_FADUMP_ERROR_FLAG		0x2000
+
+/* Kernel Dump section info */
+struct rtas_fadump_section {
+	__be32	request_flag;
+	__be16	source_data_type;
+	__be16	error_flags;
+	__be64	source_address;
+	__be64	source_len;
+	__be64	bytes_dumped;
+	__be64	destination_address;
+};
+
+/* ibm,configure-kernel-dump header. */
+struct rtas_fadump_section_header {
+	__be32	dump_format_version;
+	__be16	dump_num_sections;
+	__be16	dump_status_flag;
+	__be32	offset_first_dump_section;
+
+	/* Fields for disk dump option. */
+	__be32	dd_block_size;
+	__be64	dd_block_offset;
+	__be64	dd_num_blocks;
+	__be32	dd_offset_disk_path;
+
+	/* Maximum time allowed to prevent an automatic dump-reboot. */
+	__be32	max_time_auto;
+};
+
+/*
+ * Firmware Assisted dump memory structure. This structure is required for
+ * registering future kernel dump with power firmware through rtas call.
+ *
+ * No disk dump option. Hence disk dump path string section is not included.
+ */
+struct rtas_fadump_mem_struct {
+	struct rtas_fadump_section_header	header;
+
+	/* Kernel dump sections */
+	struct rtas_fadump_section		cpu_state_data;
+	struct rtas_fadump_section		hpte_region;
+
+	/*
+	 * TODO: Extend multiple boot memory regions support in the kernel
+	 *       for this platform.
+	 */
+	struct rtas_fadump_section		rmr_region;
+};
+
+/*
+ * The firmware-assisted dump format.
+ *
+ * The register save area is an area in the partition's memory used to preserve
+ * the register contents (CPU state data) for the active CPUs during a firmware
+ * assisted dump. The dump format contains register save area header followed
+ * by register entries. Each list of registers for a CPU starts with "CPUSTRT"
+ * and ends with "CPUEND".
+ */
+
+/* Register save area header. */
+struct rtas_fadump_reg_save_area_header {
+	__be64		magic_number;
+	__be32		version;
+	__be32		num_cpu_offset;
+};
+
+/* Register entry. */
+struct rtas_fadump_reg_entry {
+	__be64		reg_id;
+	__be64		reg_value;
+};
+
+/* Utility macros */
+#define RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry)				\
+({									\
+	while (be64_to_cpu(reg_entry->reg_id) !=			\
+	       fadump_str_to_u64("CPUEND"))				\
+		reg_entry++;						\
+	reg_entry++;							\
+})
+
+#define RTAS_FADUMP_CPU_ID_MASK			((1UL << 32) - 1)
+
+#endif /* _PSERIES_RTAS_FADUMP_H */
diff --git a/arch/powerpc/platforms/pseries/rtas-work-area.c b/arch/powerpc/platforms/pseries/rtas-work-area.c
new file mode 100644
index 0000000000..b37d52f403
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/rtas-work-area.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt)	"rtas-work-area: " fmt
+
+#include <linux/genalloc.h>
+#include <linux/log2.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/mempool.h>
+#include <linux/minmax.h>
+#include <linux/mutex.h>
+#include <linux/numa.h>
+#include <linux/sizes.h>
+#include <linux/wait.h>
+
+#include <asm/machdep.h>
+#include <asm/rtas-work-area.h>
+#include <asm/rtas.h>
+
+enum {
+	/*
+	 * Ensure the pool is page-aligned.
+	 */
+	RTAS_WORK_AREA_ARENA_ALIGN = PAGE_SIZE,
+	/*
+	 * Don't let a single allocation claim the whole arena.
+	 */
+	RTAS_WORK_AREA_ARENA_SZ = RTAS_WORK_AREA_MAX_ALLOC_SZ * 2,
+	/*
+	 * The smallest known work area size is for ibm,get-vpd's
+	 * location code argument, which is limited to 79 characters
+	 * plus 1 nul terminator.
+	 *
+	 * PAPR+ 7.3.20 ibm,get-vpd RTAS Call
+	 * PAPR+ 12.3.2.4 Converged Location Code Rules - Length Restrictions
+	 */
+	RTAS_WORK_AREA_MIN_ALLOC_SZ = roundup_pow_of_two(80),
+};
+
+static struct {
+	struct gen_pool *gen_pool;
+	char *arena;
+	struct mutex mutex; /* serializes allocations */
+	struct wait_queue_head wqh;
+	mempool_t descriptor_pool;
+	bool available;
+} rwa_state = {
+	.mutex = __MUTEX_INITIALIZER(rwa_state.mutex),
+	.wqh = __WAIT_QUEUE_HEAD_INITIALIZER(rwa_state.wqh),
+};
+
+/*
+ * A single work area buffer and descriptor to serve requests early in
+ * boot before the allocator is fully initialized. We know 4KB is the
+ * most any boot time user needs (they all call ibm,get-system-parameter).
+ */
+static bool early_work_area_in_use __initdata;
+static char early_work_area_buf[SZ_4K] __initdata __aligned(SZ_4K);
+static struct rtas_work_area early_work_area __initdata = {
+	.buf = early_work_area_buf,
+	.size = sizeof(early_work_area_buf),
+};
+
+
+static struct rtas_work_area * __init rtas_work_area_alloc_early(size_t size)
+{
+	WARN_ON(size > early_work_area.size);
+	WARN_ON(early_work_area_in_use);
+	early_work_area_in_use = true;
+	memset(early_work_area.buf, 0, early_work_area.size);
+	return &early_work_area;
+}
+
+static void __init rtas_work_area_free_early(struct rtas_work_area *work_area)
+{
+	WARN_ON(work_area != &early_work_area);
+	WARN_ON(!early_work_area_in_use);
+	early_work_area_in_use = false;
+}
+
+struct rtas_work_area * __ref __rtas_work_area_alloc(size_t size)
+{
+	struct rtas_work_area *area;
+	unsigned long addr;
+
+	might_sleep();
+
+	/*
+	 * The rtas_work_area_alloc() wrapper enforces this at build
+	 * time. Requests that exceed the arena size will block
+	 * indefinitely.
+	 */
+	WARN_ON(size > RTAS_WORK_AREA_MAX_ALLOC_SZ);
+
+	if (!rwa_state.available)
+		return rtas_work_area_alloc_early(size);
+	/*
+	 * To ensure FCFS behavior and prevent a high rate of smaller
+	 * requests from starving larger ones, use the mutex to queue
+	 * allocations.
+	 */
+	mutex_lock(&rwa_state.mutex);
+	wait_event(rwa_state.wqh,
+		   (addr = gen_pool_alloc(rwa_state.gen_pool, size)) != 0);
+	mutex_unlock(&rwa_state.mutex);
+
+	area = mempool_alloc(&rwa_state.descriptor_pool, GFP_KERNEL);
+	area->buf = (char *)addr;
+	area->size = size;
+
+	return area;
+}
+
+void __ref rtas_work_area_free(struct rtas_work_area *area)
+{
+	if (!rwa_state.available) {
+		rtas_work_area_free_early(area);
+		return;
+	}
+
+	gen_pool_free(rwa_state.gen_pool, (unsigned long)area->buf, area->size);
+	mempool_free(area, &rwa_state.descriptor_pool);
+	wake_up(&rwa_state.wqh);
+}
+
+/*
+ * Initialization of the work area allocator happens in two parts. To
+ * reliably reserve an arena that satisfies RTAS addressing
+ * requirements, we must perform a memblock allocation early,
+ * immmediately after RTAS instantiation. Then we have to wait until
+ * the slab allocator is up before setting up the descriptor mempool
+ * and adding the arena to a gen_pool.
+ */
+static __init int rtas_work_area_allocator_init(void)
+{
+	const unsigned int order = ilog2(RTAS_WORK_AREA_MIN_ALLOC_SZ);
+	const phys_addr_t pa_start = __pa(rwa_state.arena);
+	const phys_addr_t pa_end = pa_start + RTAS_WORK_AREA_ARENA_SZ - 1;
+	struct gen_pool *pool;
+	const int nid = NUMA_NO_NODE;
+	int err;
+
+	err = -ENOMEM;
+	if (!rwa_state.arena)
+		goto err_out;
+
+	pool = gen_pool_create(order, nid);
+	if (!pool)
+		goto err_out;
+	/*
+	 * All RTAS functions that consume work areas are OK with
+	 * natural alignment, when they have alignment requirements at
+	 * all.
+	 */
+	gen_pool_set_algo(pool, gen_pool_first_fit_order_align, NULL);
+
+	err = gen_pool_add(pool, (unsigned long)rwa_state.arena,
+			   RTAS_WORK_AREA_ARENA_SZ, nid);
+	if (err)
+		goto err_destroy;
+
+	err = mempool_init_kmalloc_pool(&rwa_state.descriptor_pool, 1,
+					sizeof(struct rtas_work_area));
+	if (err)
+		goto err_destroy;
+
+	rwa_state.gen_pool = pool;
+	rwa_state.available = true;
+
+	pr_debug("arena [%pa-%pa] (%uK), min/max alloc sizes %u/%u\n",
+		 &pa_start, &pa_end,
+		 RTAS_WORK_AREA_ARENA_SZ / SZ_1K,
+		 RTAS_WORK_AREA_MIN_ALLOC_SZ,
+		 RTAS_WORK_AREA_MAX_ALLOC_SZ);
+
+	return 0;
+
+err_destroy:
+	gen_pool_destroy(pool);
+err_out:
+	return err;
+}
+machine_arch_initcall(pseries, rtas_work_area_allocator_init);
+
+/**
+ * rtas_work_area_reserve_arena() - Reserve memory suitable for RTAS work areas.
+ */
+void __init rtas_work_area_reserve_arena(const phys_addr_t limit)
+{
+	const phys_addr_t align = RTAS_WORK_AREA_ARENA_ALIGN;
+	const phys_addr_t size = RTAS_WORK_AREA_ARENA_SZ;
+	const phys_addr_t min = MEMBLOCK_LOW_LIMIT;
+	const int nid = NUMA_NO_NODE;
+
+	/*
+	 * Too early for a machine_is(pseries) check. But PAPR
+	 * effectively mandates that ibm,get-system-parameter is
+	 * present:
+	 *
+	 * R1–7.3.16–1. All platforms must support the System
+	 * Parameters option.
+	 *
+	 * So set up the arena if we find that, with a fallback to
+	 * ibm,configure-connector, just in case.
+	 */
+	if (rtas_function_implemented(RTAS_FN_IBM_GET_SYSTEM_PARAMETER) ||
+	    rtas_function_implemented(RTAS_FN_IBM_CONFIGURE_CONNECTOR))
+		rwa_state.arena = memblock_alloc_try_nid(size, align, min, limit, nid);
+}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
new file mode 100644
index 0000000000..ecea85c74c
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -0,0 +1,1162 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  64-bit pSeries and RS/6000 setup code.
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Adapted from 'alpha' version by Gary Thomas
+ *  Modified by Cort Dougan (cort@cs.nmt.edu)
+ *  Modified by PPC64 Team, IBM Corp
+ */
+
+/*
+ * bootup setup stuff..
+ */
+
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/user.h>
+#include <linux/tty.h>
+#include <linux/major.h>
+#include <linux/interrupt.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/console.h>
+#include <linux/pci.h>
+#include <linux/utsname.h>
+#include <linux/adb.h>
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/memblock.h>
+#include <linux/swiotlb.h>
+#include <linux/seq_buf.h>
+
+#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/rtas.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/time.h>
+#include <asm/nvram.h>
+#include <asm/pmc.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/papr-sysparm.h>
+#include <asm/ppc-pci.h>
+#include <asm/i8259.h>
+#include <asm/udbg.h>
+#include <asm/smp.h>
+#include <asm/firmware.h>
+#include <asm/eeh.h>
+#include <asm/reg.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/kexec.h>
+#include <asm/isa-bridge.h>
+#include <asm/security_features.h>
+#include <asm/asm-const.h>
+#include <asm/idle.h>
+#include <asm/swiotlb.h>
+#include <asm/svm.h>
+#include <asm/dtl.h>
+#include <asm/hvconsole.h>
+#include <asm/setup.h>
+
+#include "pseries.h"
+
+DEFINE_STATIC_KEY_FALSE(shared_processor);
+EXPORT_SYMBOL(shared_processor);
+
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+	steal_acc = false;
+	return 0;
+}
+
+early_param("no-steal-acc", parse_no_stealacc);
+#endif
+
+int CMO_PrPSP = -1;
+int CMO_SecPSP = -1;
+unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
+EXPORT_SYMBOL(CMO_PageSize);
+
+int fwnmi_active;  /* TRUE if an FWNMI handler is present */
+int ibm_nmi_interlock_token;
+u32 pseries_security_flavor;
+
+static void pSeries_show_cpuinfo(struct seq_file *m)
+{
+	struct device_node *root;
+	const char *model = "";
+
+	root = of_find_node_by_path("/");
+	if (root)
+		model = of_get_property(root, "model", NULL);
+	seq_printf(m, "machine\t\t: CHRP %s\n", model);
+	of_node_put(root);
+	if (radix_enabled())
+		seq_printf(m, "MMU\t\t: Radix\n");
+	else
+		seq_printf(m, "MMU\t\t: Hash\n");
+}
+
+/* Initialize firmware assisted non-maskable interrupts if
+ * the firmware supports this feature.
+ */
+static void __init fwnmi_init(void)
+{
+	unsigned long system_reset_addr, machine_check_addr;
+	u8 *mce_data_buf;
+	unsigned int i;
+	int nr_cpus = num_possible_cpus();
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	struct slb_entry *slb_ptr;
+	size_t size;
+#endif
+	int ibm_nmi_register_token;
+
+	ibm_nmi_register_token = rtas_function_token(RTAS_FN_IBM_NMI_REGISTER);
+	if (ibm_nmi_register_token == RTAS_UNKNOWN_SERVICE)
+		return;
+
+	ibm_nmi_interlock_token = rtas_function_token(RTAS_FN_IBM_NMI_INTERLOCK);
+	if (WARN_ON(ibm_nmi_interlock_token == RTAS_UNKNOWN_SERVICE))
+		return;
+
+	/* If the kernel's not linked at zero we point the firmware at low
+	 * addresses anyway, and use a trampoline to get to the real code. */
+	system_reset_addr  = __pa(system_reset_fwnmi) - PHYSICAL_START;
+	machine_check_addr = __pa(machine_check_fwnmi) - PHYSICAL_START;
+
+	if (0 == rtas_call(ibm_nmi_register_token, 2, 1, NULL,
+			   system_reset_addr, machine_check_addr))
+		fwnmi_active = 1;
+
+	/*
+	 * Allocate a chunk for per cpu buffer to hold rtas errorlog.
+	 * It will be used in real mode mce handler, hence it needs to be
+	 * below RMA.
+	 */
+	mce_data_buf = memblock_alloc_try_nid_raw(RTAS_ERROR_LOG_MAX * nr_cpus,
+					RTAS_ERROR_LOG_MAX, MEMBLOCK_LOW_LIMIT,
+					ppc64_rma_size, NUMA_NO_NODE);
+	if (!mce_data_buf)
+		panic("Failed to allocate %d bytes below %pa for MCE buffer\n",
+		      RTAS_ERROR_LOG_MAX * nr_cpus, &ppc64_rma_size);
+
+	for_each_possible_cpu(i) {
+		paca_ptrs[i]->mce_data_buf = mce_data_buf +
+						(RTAS_ERROR_LOG_MAX * i);
+	}
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	if (!radix_enabled()) {
+		/* Allocate per cpu area to save old slb contents during MCE */
+		size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
+		slb_ptr = memblock_alloc_try_nid_raw(size,
+				sizeof(struct slb_entry), MEMBLOCK_LOW_LIMIT,
+				ppc64_rma_size, NUMA_NO_NODE);
+		if (!slb_ptr)
+			panic("Failed to allocate %zu bytes below %pa for slb area\n",
+			      size, &ppc64_rma_size);
+
+		for_each_possible_cpu(i)
+			paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i);
+	}
+#endif
+}
+
+/*
+ * Affix a device for the first timer to the platform bus if
+ * we have firmware support for the H_WATCHDOG hypercall.
+ */
+static __init int pseries_wdt_init(void)
+{
+	if (firmware_has_feature(FW_FEATURE_WATCHDOG))
+		platform_device_register_simple("pseries-wdt", 0, NULL, 0);
+	return 0;
+}
+machine_subsys_initcall(pseries, pseries_wdt_init);
+
+static void pseries_8259_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int cascade_irq = i8259_irq();
+
+	if (cascade_irq)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+static void __init pseries_setup_i8259_cascade(void)
+{
+	struct device_node *np, *old, *found = NULL;
+	unsigned int cascade;
+	const u32 *addrp;
+	unsigned long intack = 0;
+	int naddr;
+
+	for_each_node_by_type(np, "interrupt-controller") {
+		if (of_device_is_compatible(np, "chrp,iic")) {
+			found = np;
+			break;
+		}
+	}
+
+	if (found == NULL) {
+		printk(KERN_DEBUG "pic: no ISA interrupt controller\n");
+		return;
+	}
+
+	cascade = irq_of_parse_and_map(found, 0);
+	if (!cascade) {
+		printk(KERN_ERR "pic: failed to map cascade interrupt");
+		return;
+	}
+	pr_debug("pic: cascade mapped to irq %d\n", cascade);
+
+	for (old = of_node_get(found); old != NULL ; old = np) {
+		np = of_get_parent(old);
+		of_node_put(old);
+		if (np == NULL)
+			break;
+		if (!of_node_name_eq(np, "pci"))
+			continue;
+		addrp = of_get_property(np, "8259-interrupt-acknowledge", NULL);
+		if (addrp == NULL)
+			continue;
+		naddr = of_n_addr_cells(np);
+		intack = addrp[naddr-1];
+		if (naddr > 1)
+			intack |= ((unsigned long)addrp[naddr-2]) << 32;
+	}
+	if (intack)
+		printk(KERN_DEBUG "pic: PCI 8259 intack at 0x%016lx\n", intack);
+	i8259_init(found, intack);
+	of_node_put(found);
+	irq_set_chained_handler(cascade, pseries_8259_cascade);
+}
+
+static void __init pseries_init_irq(void)
+{
+	/* Try using a XIVE if available, otherwise use a XICS */
+	if (!xive_spapr_init()) {
+		xics_init();
+		pseries_setup_i8259_cascade();
+	}
+}
+
+static void pseries_lpar_enable_pmcs(void)
+{
+	unsigned long set, reset;
+
+	set = 1UL << 63;
+	reset = 0;
+	plpar_hcall_norets(H_PERFMON, set, reset);
+}
+
+static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data)
+{
+	struct of_reconfig_data *rd = data;
+	struct device_node *parent, *np = rd->dn;
+	struct pci_dn *pdn;
+	int err = NOTIFY_OK;
+
+	switch (action) {
+	case OF_RECONFIG_ATTACH_NODE:
+		parent = of_get_parent(np);
+		pdn = parent ? PCI_DN(parent) : NULL;
+		if (pdn)
+			pci_add_device_node_info(pdn->phb, np);
+
+		of_node_put(parent);
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		pdn = PCI_DN(np);
+		if (pdn)
+			list_del(&pdn->list);
+		break;
+	default:
+		err = NOTIFY_DONE;
+		break;
+	}
+	return err;
+}
+
+static struct notifier_block pci_dn_reconfig_nb = {
+	.notifier_call = pci_dn_reconfig_notifier,
+};
+
+struct kmem_cache *dtl_cache;
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+/*
+ * Allocate space for the dispatch trace log for all possible cpus
+ * and register the buffers with the hypervisor.  This is used for
+ * computing time stolen by the hypervisor.
+ */
+static int alloc_dispatch_logs(void)
+{
+	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+		return 0;
+
+	if (!dtl_cache)
+		return 0;
+
+	alloc_dtl_buffers(0);
+
+	/* Register the DTL for the current (boot) cpu */
+	register_dtl_buffer(smp_processor_id());
+
+	return 0;
+}
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+static inline int alloc_dispatch_logs(void)
+{
+	return 0;
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+static int alloc_dispatch_log_kmem_cache(void)
+{
+	void (*ctor)(void *) = get_dtl_cache_ctor();
+
+	dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
+						DISPATCH_LOG_BYTES, 0, ctor);
+	if (!dtl_cache) {
+		pr_warn("Failed to create dispatch trace log buffer cache\n");
+		pr_warn("Stolen time statistics will be unreliable\n");
+		return 0;
+	}
+
+	return alloc_dispatch_logs();
+}
+machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache);
+
+DEFINE_PER_CPU(u64, idle_spurr_cycles);
+DEFINE_PER_CPU(u64, idle_entry_purr_snap);
+DEFINE_PER_CPU(u64, idle_entry_spurr_snap);
+static void pseries_lpar_idle(void)
+{
+	/*
+	 * Default handler to go into low thread priority and possibly
+	 * low power mode by ceding processor to hypervisor
+	 */
+
+	if (!prep_irq_for_idle())
+		return;
+
+	/* Indicate to hypervisor that we are idle. */
+	pseries_idle_prolog();
+
+	/*
+	 * Yield the processor to the hypervisor.  We return if
+	 * an external interrupt occurs (which are driven prior
+	 * to returning here) or if a prod occurs from another
+	 * processor. When returning here, external interrupts
+	 * are enabled.
+	 */
+	cede_processor();
+
+	pseries_idle_epilog();
+}
+
+static bool pseries_reloc_on_exception_enabled;
+
+bool pseries_reloc_on_exception(void)
+{
+	return pseries_reloc_on_exception_enabled;
+}
+EXPORT_SYMBOL_GPL(pseries_reloc_on_exception);
+
+/*
+ * Enable relocation on during exceptions. This has partition wide scope and
+ * may take a while to complete, if it takes longer than one second we will
+ * just give up rather than wasting any more time on this - if that turns out
+ * to ever be a problem in practice we can move this into a kernel thread to
+ * finish off the process later in boot.
+ */
+bool pseries_enable_reloc_on_exc(void)
+{
+	long rc;
+	unsigned int delay, total_delay = 0;
+
+	while (1) {
+		rc = enable_reloc_on_exceptions();
+		if (!H_IS_LONG_BUSY(rc)) {
+			if (rc == H_P2) {
+				pr_info("Relocation on exceptions not"
+					" supported\n");
+				return false;
+			} else if (rc != H_SUCCESS) {
+				pr_warn("Unable to enable relocation"
+					" on exceptions: %ld\n", rc);
+				return false;
+			}
+			pseries_reloc_on_exception_enabled = true;
+			return true;
+		}
+
+		delay = get_longbusy_msecs(rc);
+		total_delay += delay;
+		if (total_delay > 1000) {
+			pr_warn("Warning: Giving up waiting to enable "
+				"relocation on exceptions (%u msec)!\n",
+				total_delay);
+			return false;
+		}
+
+		mdelay(delay);
+	}
+}
+EXPORT_SYMBOL(pseries_enable_reloc_on_exc);
+
+void pseries_disable_reloc_on_exc(void)
+{
+	long rc;
+
+	while (1) {
+		rc = disable_reloc_on_exceptions();
+		if (!H_IS_LONG_BUSY(rc))
+			break;
+		mdelay(get_longbusy_msecs(rc));
+	}
+	if (rc == H_SUCCESS)
+		pseries_reloc_on_exception_enabled = false;
+	else
+		pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n",
+			rc);
+}
+EXPORT_SYMBOL(pseries_disable_reloc_on_exc);
+
+#ifdef __LITTLE_ENDIAN__
+void pseries_big_endian_exceptions(void)
+{
+	long rc;
+
+	while (1) {
+		rc = enable_big_endian_exceptions();
+		if (!H_IS_LONG_BUSY(rc))
+			break;
+		mdelay(get_longbusy_msecs(rc));
+	}
+
+	/*
+	 * At this point it is unlikely panic() will get anything
+	 * out to the user, since this is called very late in kexec
+	 * but at least this will stop us from continuing on further
+	 * and creating an even more difficult to debug situation.
+	 *
+	 * There is a known problem when kdump'ing, if cpus are offline
+	 * the above call will fail. Rather than panicking again, keep
+	 * going and hope the kdump kernel is also little endian, which
+	 * it usually is.
+	 */
+	if (rc && !kdump_in_progress())
+		panic("Could not enable big endian exceptions");
+}
+
+void __init pseries_little_endian_exceptions(void)
+{
+	long rc;
+
+	while (1) {
+		rc = enable_little_endian_exceptions();
+		if (!H_IS_LONG_BUSY(rc))
+			break;
+		mdelay(get_longbusy_msecs(rc));
+	}
+	if (rc) {
+		ppc_md.progress("H_SET_MODE LE exception fail", 0);
+		panic("Could not enable little endian exceptions");
+	}
+}
+#endif
+
+static void __init pSeries_discover_phbs(void)
+{
+	struct device_node *node;
+	struct pci_controller *phb;
+	struct device_node *root = of_find_node_by_path("/");
+
+	for_each_child_of_node(root, node) {
+		if (!of_node_is_type(node, "pci") &&
+		    !of_node_is_type(node, "pciex"))
+			continue;
+
+		phb = pcibios_alloc_controller(node);
+		if (!phb)
+			continue;
+		rtas_setup_phb(phb);
+		pci_process_bridge_OF_ranges(phb, node, 0);
+		isa_bridge_find_early(phb);
+		phb->controller_ops = pseries_pci_controller_ops;
+
+		/* create pci_dn's for DT nodes under this PHB */
+		pci_devs_phb_init_dynamic(phb);
+
+		pseries_msi_allocate_domains(phb);
+	}
+
+	of_node_put(root);
+
+	/*
+	 * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
+	 * in chosen.
+	 */
+	of_pci_check_probe_only();
+}
+
+static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
+{
+	/*
+	 * The features below are disabled by default, so we instead look to see
+	 * if firmware has *enabled* them, and set them if so.
+	 */
+	if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31)
+		security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
+
+	if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED)
+		security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
+
+	if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30)
+		security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
+
+	if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
+		security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
+
+	if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV)
+		security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
+
+	if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED)
+		security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
+
+	if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST)
+		security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
+
+	if (result->character & H_CPU_CHAR_BCCTR_LINK_FLUSH_ASSIST)
+		security_ftr_set(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST);
+
+	if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE)
+		security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
+
+	if (result->behaviour & H_CPU_BEHAV_FLUSH_LINK_STACK)
+		security_ftr_set(SEC_FTR_FLUSH_LINK_STACK);
+
+	/*
+	 * The features below are enabled by default, so we instead look to see
+	 * if firmware has *disabled* them, and clear them if so.
+	 * H_CPU_BEHAV_FAVOUR_SECURITY_H could be set only if
+	 * H_CPU_BEHAV_FAVOUR_SECURITY is.
+	 */
+	if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) {
+		security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
+		pseries_security_flavor = 0;
+	} else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H)
+		pseries_security_flavor = 1;
+	else
+		pseries_security_flavor = 2;
+
+	if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
+
+	if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY)
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
+
+	if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS)
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
+
+	if (result->behaviour & H_CPU_BEHAV_NO_STF_BARRIER)
+		security_ftr_clear(SEC_FTR_STF_BARRIER);
+
+	if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
+		security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
+}
+
+void pseries_setup_security_mitigations(void)
+{
+	struct h_cpu_char_result result;
+	enum l1d_flush_type types;
+	bool enable;
+	long rc;
+
+	/*
+	 * Set features to the defaults assumed by init_cpu_char_feature_flags()
+	 * so it can set/clear again any features that might have changed after
+	 * migration, and in case the hypercall fails and it is not even called.
+	 */
+	powerpc_security_features = SEC_FTR_DEFAULT;
+
+	rc = plpar_get_cpu_characteristics(&result);
+	if (rc == H_SUCCESS)
+		init_cpu_char_feature_flags(&result);
+
+	/*
+	 * We're the guest so this doesn't apply to us, clear it to simplify
+	 * handling of it elsewhere.
+	 */
+	security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
+
+	types = L1D_FLUSH_FALLBACK;
+
+	if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
+		types |= L1D_FLUSH_MTTRIG;
+
+	if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
+		types |= L1D_FLUSH_ORI;
+
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
+		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR);
+
+	setup_rfi_flush(types, enable);
+	setup_count_cache_flush();
+
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
+	setup_entry_flush(enable);
+
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
+	setup_uaccess_flush(enable);
+
+	setup_stf_barrier();
+}
+
+#ifdef CONFIG_PCI_IOV
+enum rtas_iov_fw_value_map {
+	NUM_RES_PROPERTY  = 0, /* Number of Resources */
+	LOW_INT           = 1, /* Lowest 32 bits of Address */
+	START_OF_ENTRIES  = 2, /* Always start of entry */
+	APERTURE_PROPERTY = 2, /* Start of entry+ to  Aperture Size */
+	WDW_SIZE_PROPERTY = 4, /* Start of entry+ to Window Size */
+	NEXT_ENTRY        = 7  /* Go to next entry on array */
+};
+
+enum get_iov_fw_value_index {
+	BAR_ADDRS     = 1,    /*  Get Bar Address */
+	APERTURE_SIZE = 2,    /*  Get Aperture Size */
+	WDW_SIZE      = 3     /*  Get Window Size */
+};
+
+static resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno,
+						enum get_iov_fw_value_index value)
+{
+	const int *indexes;
+	struct device_node *dn = pci_device_to_OF_node(dev);
+	int i, num_res, ret = 0;
+
+	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
+	if (!indexes)
+		return  0;
+
+	/*
+	 * First element in the array is the number of Bars
+	 * returned.  Search through the list to find the matching
+	 * bar
+	 */
+	num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
+	if (resno >= num_res)
+		return 0; /* or an error */
+
+	i = START_OF_ENTRIES + NEXT_ENTRY * resno;
+	switch (value) {
+	case BAR_ADDRS:
+		ret = of_read_number(&indexes[i], 2);
+		break;
+	case APERTURE_SIZE:
+		ret = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
+		break;
+	case WDW_SIZE:
+		ret = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
+		break;
+	}
+
+	return ret;
+}
+
+static void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes)
+{
+	struct resource *res;
+	resource_size_t base, size;
+	int i, r, num_res;
+
+	num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
+	num_res = min_t(int, num_res, PCI_SRIOV_NUM_BARS);
+	for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
+	     i += NEXT_ENTRY, r++) {
+		res = &dev->resource[r + PCI_IOV_RESOURCES];
+		base = of_read_number(&indexes[i], 2);
+		size = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
+		res->flags = pci_parse_of_flags(of_read_number
+						(&indexes[i + LOW_INT], 1), 0);
+		res->flags |= (IORESOURCE_MEM_64 | IORESOURCE_PCI_FIXED);
+		res->name = pci_name(dev);
+		res->start = base;
+		res->end = base + size - 1;
+	}
+}
+
+static void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes)
+{
+	struct resource *res, *root, *conflict;
+	resource_size_t base, size;
+	int i, r, num_res;
+
+	/*
+	 * First element in the array is the number of Bars
+	 * returned.  Search through the list to find the matching
+	 * bars assign them from firmware into resources structure.
+	 */
+	num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
+	for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
+	     i += NEXT_ENTRY, r++) {
+		res = &dev->resource[r + PCI_IOV_RESOURCES];
+		base = of_read_number(&indexes[i], 2);
+		size = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
+		res->name = pci_name(dev);
+		res->start = base;
+		res->end = base + size - 1;
+		root = &iomem_resource;
+		dev_dbg(&dev->dev,
+			"pSeries IOV BAR %d: trying firmware assignment %pR\n",
+			 r + PCI_IOV_RESOURCES, res);
+		conflict = request_resource_conflict(root, res);
+		if (conflict) {
+			dev_info(&dev->dev,
+				 "BAR %d: %pR conflicts with %s %pR\n",
+				 r + PCI_IOV_RESOURCES, res,
+				 conflict->name, conflict);
+			res->flags |= IORESOURCE_UNSET;
+		}
+	}
+}
+
+static void pseries_disable_sriov_resources(struct pci_dev *pdev)
+{
+	int i;
+
+	pci_warn(pdev, "No hypervisor support for SR-IOV on this device, IOV BARs disabled.\n");
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+		pdev->resource[i + PCI_IOV_RESOURCES].flags = 0;
+}
+
+static void pseries_pci_fixup_resources(struct pci_dev *pdev)
+{
+	const int *indexes;
+	struct device_node *dn = pci_device_to_OF_node(pdev);
+
+	/*Firmware must support open sriov otherwise dont configure*/
+	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
+	if (indexes)
+		of_pci_set_vf_bar_size(pdev, indexes);
+	else
+		pseries_disable_sriov_resources(pdev);
+}
+
+static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
+{
+	const int *indexes;
+	struct device_node *dn = pci_device_to_OF_node(pdev);
+
+	if (!pdev->is_physfn)
+		return;
+	/*Firmware must support open sriov otherwise don't configure*/
+	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
+	if (indexes)
+		of_pci_parse_iov_addrs(pdev, indexes);
+	else
+		pseries_disable_sriov_resources(pdev);
+}
+
+static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev,
+							  int resno)
+{
+	const __be32 *reg;
+	struct device_node *dn = pci_device_to_OF_node(pdev);
+
+	/*Firmware must support open sriov otherwise report regular alignment*/
+	reg = of_get_property(dn, "ibm,is-open-sriov-pf", NULL);
+	if (!reg)
+		return pci_iov_resource_size(pdev, resno);
+
+	if (!pdev->is_physfn)
+		return 0;
+	return pseries_get_iov_fw_value(pdev,
+					resno - PCI_IOV_RESOURCES,
+					APERTURE_SIZE);
+}
+#endif
+
+static void __init pSeries_setup_arch(void)
+{
+	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
+
+	/* Discover PIC type and setup ppc_md accordingly */
+	smp_init_pseries();
+
+	// Setup CPU hotplug callbacks
+	pseries_cpu_hotplug_init();
+
+	if (radix_enabled() && !mmu_has_feature(MMU_FTR_GTSE))
+		if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+			panic("BUG: Radix support requires either GTSE or RPT_INVALIDATE\n");
+
+
+	/* openpic global configuration register (64-bit format). */
+	/* openpic Interrupt Source Unit pointer (64-bit format). */
+	/* python0 facility area (mmio) (64-bit format) REAL address. */
+
+	/* init to some ~sane value until calibrate_delay() runs */
+	loops_per_jiffy = 50000000;
+
+	fwnmi_init();
+
+	pseries_setup_security_mitigations();
+	if (!radix_enabled())
+		pseries_lpar_read_hblkrm_characteristics();
+
+	/* By default, only probe PCI (can be overridden by rtas_pci) */
+	pci_add_flags(PCI_PROBE_ONLY);
+
+	/* Find and initialize PCI host bridges */
+	init_pci_config_tokens();
+	of_reconfig_notifier_register(&pci_dn_reconfig_nb);
+
+	pSeries_nvram_init();
+
+	if (firmware_has_feature(FW_FEATURE_LPAR)) {
+		vpa_init(boot_cpuid);
+
+		if (lppaca_shared_proc()) {
+			static_branch_enable(&shared_processor);
+			pv_spinlocks_init();
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+			static_key_slow_inc(&paravirt_steal_enabled);
+			if (steal_acc)
+				static_key_slow_inc(&paravirt_steal_rq_enabled);
+#endif
+		}
+
+		ppc_md.power_save = pseries_lpar_idle;
+		ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
+#ifdef CONFIG_PCI_IOV
+		ppc_md.pcibios_fixup_resources =
+			pseries_pci_fixup_resources;
+		ppc_md.pcibios_fixup_sriov =
+			pseries_pci_fixup_iov_resources;
+		ppc_md.pcibios_iov_resource_alignment =
+			pseries_pci_iov_resource_alignment;
+#endif
+	} else {
+		/* No special idle routine */
+		ppc_md.enable_pmcs = power4_enable_pmcs;
+	}
+
+	ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
+	pseries_rng_init();
+}
+
+static void pseries_panic(char *str)
+{
+	panic_flush_kmsg_end();
+	rtas_os_term(str);
+}
+
+static int __init pSeries_init_panel(void)
+{
+	/* Manually leave the kernel version on the panel. */
+#ifdef __BIG_ENDIAN__
+	ppc_md.progress("Linux ppc64\n", 0);
+#else
+	ppc_md.progress("Linux ppc64le\n", 0);
+#endif
+	ppc_md.progress(init_utsname()->version, 0);
+
+	return 0;
+}
+machine_arch_initcall(pseries, pSeries_init_panel);
+
+static int pseries_set_dabr(unsigned long dabr, unsigned long dabrx)
+{
+	return plpar_hcall_norets(H_SET_DABR, dabr);
+}
+
+static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx)
+{
+	/* Have to set at least one bit in the DABRX according to PAPR */
+	if (dabrx == 0 && dabr == 0)
+		dabrx = DABRX_USER;
+	/* PAPR says we can only set kernel and user bits */
+	dabrx &= DABRX_KERNEL | DABRX_USER;
+
+	return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx);
+}
+
+static int pseries_set_dawr(int nr, unsigned long dawr, unsigned long dawrx)
+{
+	/* PAPR says we can't set HYP */
+	dawrx &= ~DAWRX_HYP;
+
+	if (nr == 0)
+		return plpar_set_watchpoint0(dawr, dawrx);
+	else
+		return plpar_set_watchpoint1(dawr, dawrx);
+}
+
+#define CMO_CHARACTERISTICS_TOKEN 44
+#define CMO_MAXLENGTH 1026
+
+void pSeries_coalesce_init(void)
+{
+	struct hvcall_mpp_x_data mpp_x_data;
+
+	if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data))
+		powerpc_firmware_features |= FW_FEATURE_XCMO;
+	else
+		powerpc_firmware_features &= ~FW_FEATURE_XCMO;
+}
+
+/**
+ * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
+ * handle that here. (Stolen from parse_system_parameter_string)
+ */
+static void __init pSeries_cmo_feature_init(void)
+{
+	static struct papr_sysparm_buf buf __initdata;
+	static_assert(sizeof(buf.val) >= CMO_MAXLENGTH);
+	char *ptr, *key, *value, *end;
+	int page_order = IOMMU_PAGE_SHIFT_4K;
+
+	pr_debug(" -> fw_cmo_feature_init()\n");
+
+	if (papr_sysparm_get(PAPR_SYSPARM_COOP_MEM_OVERCOMMIT_ATTRS, &buf)) {
+		pr_debug("CMO not available\n");
+		pr_debug(" <- fw_cmo_feature_init()\n");
+		return;
+	}
+
+	end = &buf.val[CMO_MAXLENGTH];
+	ptr = &buf.val[0];
+	key = value = ptr;
+
+	while (*ptr && (ptr <= end)) {
+		/* Separate the key and value by replacing '=' with '\0' and
+		 * point the value at the string after the '='
+		 */
+		if (ptr[0] == '=') {
+			ptr[0] = '\0';
+			value = ptr + 1;
+		} else if (ptr[0] == '\0' || ptr[0] == ',') {
+			/* Terminate the string containing the key/value pair */
+			ptr[0] = '\0';
+
+			if (key == value) {
+				pr_debug("Malformed key/value pair\n");
+				/* Never found a '=', end processing */
+				break;
+			}
+
+			if (0 == strcmp(key, "CMOPageSize"))
+				page_order = simple_strtol(value, NULL, 10);
+			else if (0 == strcmp(key, "PrPSP"))
+				CMO_PrPSP = simple_strtol(value, NULL, 10);
+			else if (0 == strcmp(key, "SecPSP"))
+				CMO_SecPSP = simple_strtol(value, NULL, 10);
+			value = key = ptr + 1;
+		}
+		ptr++;
+	}
+
+	/* Page size is returned as the power of 2 of the page size,
+	 * convert to the page size in bytes before returning
+	 */
+	CMO_PageSize = 1 << page_order;
+	pr_debug("CMO_PageSize = %lu\n", CMO_PageSize);
+
+	if (CMO_PrPSP != -1 || CMO_SecPSP != -1) {
+		pr_info("CMO enabled\n");
+		pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
+		         CMO_SecPSP);
+		powerpc_firmware_features |= FW_FEATURE_CMO;
+		pSeries_coalesce_init();
+	} else
+		pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
+		         CMO_SecPSP);
+	pr_debug(" <- fw_cmo_feature_init()\n");
+}
+
+static void __init pseries_add_hw_description(void)
+{
+	struct device_node *dn;
+	const char *s;
+
+	dn = of_find_node_by_path("/openprom");
+	if (dn) {
+		if (of_property_read_string(dn, "model", &s) == 0)
+			seq_buf_printf(&ppc_hw_desc, "of:%s ", s);
+
+		of_node_put(dn);
+	}
+
+	dn = of_find_node_by_path("/hypervisor");
+	if (dn) {
+		if (of_property_read_string(dn, "compatible", &s) == 0)
+			seq_buf_printf(&ppc_hw_desc, "hv:%s ", s);
+
+		of_node_put(dn);
+		return;
+	}
+
+	if (of_property_read_bool(of_root, "ibm,powervm-partition") ||
+	    of_property_read_bool(of_root, "ibm,fw-net-version"))
+		seq_buf_printf(&ppc_hw_desc, "hv:phyp ");
+}
+
+/*
+ * Early initialization.  Relocation is on but do not reference unbolted pages
+ */
+static void __init pseries_init(void)
+{
+	pr_debug(" -> pseries_init()\n");
+
+	pseries_add_hw_description();
+
+#ifdef CONFIG_HVC_CONSOLE
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		hvc_vio_init_early();
+#endif
+	if (firmware_has_feature(FW_FEATURE_XDABR))
+		ppc_md.set_dabr = pseries_set_xdabr;
+	else if (firmware_has_feature(FW_FEATURE_DABR))
+		ppc_md.set_dabr = pseries_set_dabr;
+
+	if (firmware_has_feature(FW_FEATURE_SET_MODE))
+		ppc_md.set_dawr = pseries_set_dawr;
+
+	pSeries_cmo_feature_init();
+	iommu_init_early_pSeries();
+
+	pr_debug(" <- pseries_init()\n");
+}
+
+/**
+ * pseries_power_off - tell firmware about how to power off the system.
+ *
+ * This function calls either the power-off rtas token in normal cases
+ * or the ibm,power-off-ups token (if present & requested) in case of
+ * a power failure. If power-off token is used, power on will only be
+ * possible with power button press. If ibm,power-off-ups token is used
+ * it will allow auto poweron after power is restored.
+ */
+static void pseries_power_off(void)
+{
+	int rc;
+	int rtas_poweroff_ups_token = rtas_function_token(RTAS_FN_IBM_POWER_OFF_UPS);
+
+	if (rtas_flash_term_hook)
+		rtas_flash_term_hook(SYS_POWER_OFF);
+
+	if (rtas_poweron_auto == 0 ||
+		rtas_poweroff_ups_token == RTAS_UNKNOWN_SERVICE) {
+		rc = rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1);
+		printk(KERN_INFO "RTAS power-off returned %d\n", rc);
+	} else {
+		rc = rtas_call(rtas_poweroff_ups_token, 0, 1, NULL);
+		printk(KERN_INFO "RTAS ibm,power-off-ups returned %d\n", rc);
+	}
+	for (;;);
+}
+
+static int __init pSeries_probe(void)
+{
+	if (!of_node_is_type(of_root, "chrp"))
+		return 0;
+
+	/* Cell blades firmware claims to be chrp while it's not. Until this
+	 * is fixed, we need to avoid those here.
+	 */
+	if (of_machine_is_compatible("IBM,CPBW-1.0") ||
+	    of_machine_is_compatible("IBM,CBEA"))
+		return 0;
+
+	pm_power_off = pseries_power_off;
+
+	pr_debug("Machine is%s LPAR !\n",
+	         (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not");
+
+	pseries_init();
+
+	return 1;
+}
+
+static int pSeries_pci_probe_mode(struct pci_bus *bus)
+{
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		return PCI_PROBE_DEVTREE;
+	return PCI_PROBE_NORMAL;
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static unsigned long pseries_memory_block_size(void)
+{
+	return memory_block_size;
+}
+#endif
+
+struct pci_controller_ops pseries_pci_controller_ops = {
+	.probe_mode		= pSeries_pci_probe_mode,
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+	.device_group		= pSeries_pci_device_group,
+#endif
+};
+
+define_machine(pseries) {
+	.name			= "pSeries",
+	.probe			= pSeries_probe,
+	.setup_arch		= pSeries_setup_arch,
+	.init_IRQ		= pseries_init_irq,
+	.show_cpuinfo		= pSeries_show_cpuinfo,
+	.log_error		= pSeries_log_error,
+	.discover_phbs		= pSeries_discover_phbs,
+	.pcibios_fixup		= pSeries_final_fixup,
+	.restart		= rtas_restart,
+	.halt			= rtas_halt,
+	.panic			= pseries_panic,
+	.get_boot_time		= rtas_get_boot_time,
+	.get_rtc_time		= rtas_get_rtc_time,
+	.set_rtc_time		= rtas_set_rtc_time,
+	.progress		= rtas_progress,
+	.system_reset_exception = pSeries_system_reset_exception,
+	.machine_check_early	= pseries_machine_check_realmode,
+	.machine_check_exception = pSeries_machine_check_exception,
+	.machine_check_log_err	= pSeries_machine_check_log_err,
+#ifdef CONFIG_KEXEC_CORE
+	.machine_kexec          = pseries_machine_kexec,
+	.kexec_cpu_down         = pseries_kexec_cpu_down,
+#endif
+#ifdef CONFIG_MEMORY_HOTPLUG
+	.memory_block_size	= pseries_memory_block_size,
+#endif
+};
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
new file mode 100644
index 0000000000..c597711ef2
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SMP support for pSeries machines.
+ *
+ * Dave Engebretsen, Peter Bergner, and
+ * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
+ *
+ * Plus various changes from other IBM teams...
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/pgtable.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/paca.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/vdso_datapage.h>
+#include <asm/cputhreads.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/dbell.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/code-patching.h>
+#include <asm/svm.h>
+#include <asm/kvm_guest.h>
+
+#include "pseries.h"
+
+/*
+ * The Primary thread of each non-boot processor was started from the OF client
+ * interface by prom_hold_cpus and is spinning on secondary_hold_spinloop.
+ */
+static cpumask_var_t of_spin_mask;
+
+/* Query where a cpu is now.  Return codes #defined in plpar_wrappers.h */
+int smp_query_cpu_stopped(unsigned int pcpu)
+{
+	int cpu_status, status;
+	int qcss_tok = rtas_function_token(RTAS_FN_QUERY_CPU_STOPPED_STATE);
+
+	if (qcss_tok == RTAS_UNKNOWN_SERVICE) {
+		printk_once(KERN_INFO
+			"Firmware doesn't support query-cpu-stopped-state\n");
+		return QCSS_HARDWARE_ERROR;
+	}
+
+	status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu);
+	if (status != 0) {
+		printk(KERN_ERR
+		       "RTAS query-cpu-stopped-state failed: %i\n", status);
+		return status;
+	}
+
+	return cpu_status;
+}
+
+/**
+ * smp_startup_cpu() - start the given cpu
+ *
+ * At boot time, there is nothing to do for primary threads which were
+ * started from Open Firmware.  For anything else, call RTAS with the
+ * appropriate start location.
+ *
+ * Returns:
+ *	0	- failure
+ *	1	- success
+ */
+static inline int smp_startup_cpu(unsigned int lcpu)
+{
+	int status;
+	unsigned long start_here =
+			__pa(ppc_function_entry(generic_secondary_smp_init));
+	unsigned int pcpu;
+	int start_cpu;
+
+	if (cpumask_test_cpu(lcpu, of_spin_mask))
+		/* Already started by OF and sitting in spin loop */
+		return 1;
+
+	pcpu = get_hard_smp_processor_id(lcpu);
+
+	/* Check to see if the CPU out of FW already for kexec */
+	if (smp_query_cpu_stopped(pcpu) == QCSS_NOT_STOPPED){
+		cpumask_set_cpu(lcpu, of_spin_mask);
+		return 1;
+	}
+
+	/* 
+	 * If the RTAS start-cpu token does not exist then presume the
+	 * cpu is already spinning.
+	 */
+	start_cpu = rtas_function_token(RTAS_FN_START_CPU);
+	if (start_cpu == RTAS_UNKNOWN_SERVICE)
+		return 1;
+
+	status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, pcpu);
+	if (status != 0) {
+		printk(KERN_ERR "start-cpu failed: %i\n", status);
+		return 0;
+	}
+
+	return 1;
+}
+
+static void smp_setup_cpu(int cpu)
+{
+	if (xive_enabled())
+		xive_smp_setup_cpu();
+	else if (cpu != boot_cpuid)
+		xics_setup_cpu();
+
+	if (firmware_has_feature(FW_FEATURE_SPLPAR))
+		vpa_init(cpu);
+
+	cpumask_clear_cpu(cpu, of_spin_mask);
+}
+
+static int smp_pSeries_kick_cpu(int nr)
+{
+	if (nr < 0 || nr >= nr_cpu_ids)
+		return -EINVAL;
+
+	if (!smp_startup_cpu(nr))
+		return -ENOENT;
+
+	/*
+	 * The processor is currently spinning, waiting for the
+	 * cpu_start field to become non-zero After we set cpu_start,
+	 * the processor will continue on to secondary_start
+	 */
+	paca_ptrs[nr]->cpu_start = 1;
+
+	return 0;
+}
+
+static int pseries_smp_prepare_cpu(int cpu)
+{
+	if (xive_enabled())
+		return xive_smp_prepare_cpu(cpu);
+	return 0;
+}
+
+/* Cause IPI as setup by the interrupt controller (xics or xive) */
+static void (*ic_cause_ipi)(int cpu) __ro_after_init;
+
+/* Use msgsndp doorbells target is a sibling, else use interrupt controller */
+static void dbell_or_ic_cause_ipi(int cpu)
+{
+	if (doorbell_try_core_ipi(cpu))
+		return;
+
+	ic_cause_ipi(cpu);
+}
+
+static int pseries_cause_nmi_ipi(int cpu)
+{
+	int hwcpu;
+
+	if (cpu == NMI_IPI_ALL_OTHERS) {
+		hwcpu = H_SIGNAL_SYS_RESET_ALL_OTHERS;
+	} else {
+		if (cpu < 0) {
+			WARN_ONCE(true, "incorrect cpu parameter %d", cpu);
+			return 0;
+		}
+
+		hwcpu = get_hard_smp_processor_id(cpu);
+	}
+
+	if (plpar_signal_sys_reset(hwcpu) == H_SUCCESS)
+		return 1;
+
+	return 0;
+}
+
+static __init void pSeries_smp_probe(void)
+{
+	if (xive_enabled())
+		xive_smp_probe();
+	else
+		xics_smp_probe();
+
+	/* No doorbell facility, must use the interrupt controller for IPIs */
+	if (!cpu_has_feature(CPU_FTR_DBELL))
+		return;
+
+	/* Doorbells can only be used for IPIs between SMT siblings */
+	if (!cpu_has_feature(CPU_FTR_SMT))
+		return;
+
+	check_kvm_guest();
+
+	if (is_kvm_guest()) {
+		/*
+		 * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp
+		 * faults to the hypervisor which then reads the instruction
+		 * from guest memory, which tends to be slower than using XIVE.
+		 */
+		if (xive_enabled())
+			return;
+
+		/*
+		 * XICS hcalls aren't as fast, so we can use msgsndp (which
+		 * also helps exercise KVM emulation), however KVM can't
+		 * emulate secure guests because it can't read the instruction
+		 * out of their memory.
+		 */
+		if (is_secure_guest())
+			return;
+	}
+
+	/*
+	 * Under PowerVM, FSCR[MSGP] is enabled as guest vCPU siblings are
+	 * gang scheduled on the same physical core, so doorbells are always
+	 * faster than the interrupt controller, and they can be used by
+	 * secure guests.
+	 */
+
+	ic_cause_ipi = smp_ops->cause_ipi;
+	smp_ops->cause_ipi = dbell_or_ic_cause_ipi;
+}
+
+static struct smp_ops_t pseries_smp_ops = {
+	.message_pass	= NULL,	/* Use smp_muxed_ipi_message_pass */
+	.cause_ipi	= NULL,	/* Filled at runtime by pSeries_smp_probe() */
+	.cause_nmi_ipi	= pseries_cause_nmi_ipi,
+	.probe		= pSeries_smp_probe,
+	.prepare_cpu	= pseries_smp_prepare_cpu,
+	.kick_cpu	= smp_pSeries_kick_cpu,
+	.setup_cpu	= smp_setup_cpu,
+	.cpu_bootable	= smp_generic_cpu_bootable,
+};
+
+/* This is called very early */
+void __init smp_init_pseries(void)
+{
+	int i;
+
+	pr_debug(" -> smp_init_pSeries()\n");
+	smp_ops = &pseries_smp_ops;
+
+	alloc_bootmem_cpumask_var(&of_spin_mask);
+
+	/*
+	 * Mark threads which are still spinning in hold loops
+	 *
+	 * We know prom_init will not have started them if RTAS supports
+	 * query-cpu-stopped-state.
+	 */
+	if (rtas_function_token(RTAS_FN_QUERY_CPU_STOPPED_STATE) == RTAS_UNKNOWN_SERVICE) {
+		if (cpu_has_feature(CPU_FTR_SMT)) {
+			for_each_present_cpu(i) {
+				if (cpu_thread_in_core(i) == 0)
+					cpumask_set_cpu(i, of_spin_mask);
+			}
+		} else
+			cpumask_copy(of_spin_mask, cpu_present_mask);
+
+		cpumask_clear_cpu(boot_cpuid, of_spin_mask);
+	}
+
+	pr_debug(" <- smp_init_pSeries()\n");
+}
diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c
new file mode 100644
index 0000000000..5c43435472
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+  * Copyright (C) 2010 Brian King IBM Corporation
+  */
+
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/suspend.h>
+#include <linux/stat.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/machdep.h>
+#include <asm/mmu.h>
+#include <asm/rtas.h>
+#include <asm/topology.h>
+
+static struct device suspend_dev;
+
+/**
+ * pseries_suspend_begin - First phase of hibernation
+ *
+ * Check to ensure we are in a valid state to hibernate
+ *
+ * Return value:
+ * 	0 on success / other on failure
+ **/
+static int pseries_suspend_begin(u64 stream_id)
+{
+	long vasi_state, rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	/* Make sure the state is valid */
+	rc = plpar_hcall(H_VASI_STATE, retbuf, stream_id);
+
+	vasi_state = retbuf[0];
+
+	if (rc) {
+		pr_err("pseries_suspend_begin: vasi_state returned %ld\n",rc);
+		return rc;
+	} else if (vasi_state == H_VASI_ENABLED) {
+		return -EAGAIN;
+	} else if (vasi_state != H_VASI_SUSPENDING) {
+		pr_err("pseries_suspend_begin: vasi_state returned state %ld\n",
+		       vasi_state);
+		return -EIO;
+	}
+	return 0;
+}
+
+/**
+ * pseries_suspend_enter - Final phase of hibernation
+ *
+ * Return value:
+ * 	0 on success / other on failure
+ **/
+static int pseries_suspend_enter(suspend_state_t state)
+{
+	return rtas_ibm_suspend_me(NULL);
+}
+
+/**
+ * store_hibernate - Initiate partition hibernation
+ * @dev:		subsys root device
+ * @attr:		device attribute struct
+ * @buf:		buffer
+ * @count:		buffer size
+ *
+ * Write the stream ID received from the HMC to this file
+ * to trigger hibernating the partition
+ *
+ * Return value:
+ * 	number of bytes printed to buffer / other on failure
+ **/
+static ssize_t store_hibernate(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	u64 stream_id;
+	int rc;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	stream_id = simple_strtoul(buf, NULL, 16);
+
+	do {
+		rc = pseries_suspend_begin(stream_id);
+		if (rc == -EAGAIN)
+			ssleep(1);
+	} while (rc == -EAGAIN);
+
+	if (!rc)
+		rc = pm_suspend(PM_SUSPEND_MEM);
+
+	if (!rc) {
+		rc = count;
+		post_mobility_fixup();
+	}
+
+
+	return rc;
+}
+
+#define USER_DT_UPDATE	0
+#define KERN_DT_UPDATE	1
+
+/**
+ * show_hibernate - Report device tree update responsibilty
+ * @dev:		subsys root device
+ * @attr:		device attribute struct
+ * @buf:		buffer
+ *
+ * Report whether a device tree update is performed by the kernel after a
+ * resume, or if drmgr must coordinate the update from user space.
+ *
+ * Return value:
+ *	0 if drmgr is to initiate update, and 1 otherwise
+ **/
+static ssize_t show_hibernate(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	return sprintf(buf, "%d\n", KERN_DT_UPDATE);
+}
+
+static DEVICE_ATTR(hibernate, 0644, show_hibernate, store_hibernate);
+
+static struct bus_type suspend_subsys = {
+	.name = "power",
+	.dev_name = "power",
+};
+
+static const struct platform_suspend_ops pseries_suspend_ops = {
+	.valid		= suspend_valid_only_mem,
+	.enter		= pseries_suspend_enter,
+};
+
+/**
+ * pseries_suspend_sysfs_register - Register with sysfs
+ *
+ * Return value:
+ * 	0 on success / other on failure
+ **/
+static int pseries_suspend_sysfs_register(struct device *dev)
+{
+	struct device *dev_root;
+	int rc;
+
+	if ((rc = subsys_system_register(&suspend_subsys, NULL)))
+		return rc;
+
+	dev->id = 0;
+	dev->bus = &suspend_subsys;
+
+	dev_root = bus_get_dev_root(&suspend_subsys);
+	if (dev_root) {
+		rc = device_create_file(dev_root, &dev_attr_hibernate);
+		put_device(dev_root);
+		if (rc)
+			goto subsys_unregister;
+	}
+
+	return 0;
+
+subsys_unregister:
+	bus_unregister(&suspend_subsys);
+	return rc;
+}
+
+/**
+ * pseries_suspend_init - initcall for pSeries suspend
+ *
+ * Return value:
+ * 	0 on success / other on failure
+ **/
+static int __init pseries_suspend_init(void)
+{
+	int rc;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		return 0;
+
+	if ((rc = pseries_suspend_sysfs_register(&suspend_dev)))
+		return rc;
+
+	suspend_set_ops(&pseries_suspend_ops);
+	return 0;
+}
+machine_device_initcall(pseries, pseries_suspend_init);
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
new file mode 100644
index 0000000000..3b4045d508
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Secure VM platform
+ *
+ * Copyright 2018 IBM Corporation
+ * Author: Anshuman Khandual <khandual@linux.vnet.ibm.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#include <linux/cc_platform.h>
+#include <asm/machdep.h>
+#include <asm/svm.h>
+#include <asm/swiotlb.h>
+#include <asm/ultravisor.h>
+#include <asm/dtl.h>
+
+static int __init init_svm(void)
+{
+	if (!is_secure_guest())
+		return 0;
+
+	/* Don't release the SWIOTLB buffer. */
+	ppc_swiotlb_enable = 1;
+
+	/*
+	 * Since the guest memory is inaccessible to the host, devices always
+	 * need to use the SWIOTLB buffer for DMA even if dma_capable() says
+	 * otherwise.
+	 */
+	ppc_swiotlb_flags |= SWIOTLB_ANY | SWIOTLB_FORCE;
+
+	/* Share the SWIOTLB buffer with the host. */
+	swiotlb_update_mem_attributes();
+
+	return 0;
+}
+machine_early_initcall(pseries, init_svm);
+
+int set_memory_encrypted(unsigned long addr, int numpages)
+{
+	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
+		return 0;
+
+	if (!PAGE_ALIGNED(addr))
+		return -EINVAL;
+
+	uv_unshare_page(PHYS_PFN(__pa(addr)), numpages);
+
+	return 0;
+}
+
+int set_memory_decrypted(unsigned long addr, int numpages)
+{
+	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
+		return 0;
+
+	if (!PAGE_ALIGNED(addr))
+		return -EINVAL;
+
+	uv_share_page(PHYS_PFN(__pa(addr)), numpages);
+
+	return 0;
+}
+
+/* There's one dispatch log per CPU. */
+#define NR_DTL_PAGE (DISPATCH_LOG_BYTES * CONFIG_NR_CPUS / PAGE_SIZE)
+
+static struct page *dtl_page_store[NR_DTL_PAGE];
+static long dtl_nr_pages;
+
+static bool is_dtl_page_shared(struct page *page)
+{
+	long i;
+
+	for (i = 0; i < dtl_nr_pages; i++)
+		if (dtl_page_store[i] == page)
+			return true;
+
+	return false;
+}
+
+void dtl_cache_ctor(void *addr)
+{
+	unsigned long pfn = PHYS_PFN(__pa(addr));
+	struct page *page = pfn_to_page(pfn);
+
+	if (!is_dtl_page_shared(page)) {
+		dtl_page_store[dtl_nr_pages] = page;
+		dtl_nr_pages++;
+		WARN_ON(dtl_nr_pages >= NR_DTL_PAGE);
+		uv_share_page(pfn, 1);
+	}
+}
diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c
new file mode 100644
index 0000000000..f9f682724e
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vas-sysfs.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2022-23 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include "vas.h"
+
+#ifdef CONFIG_SYSFS
+static struct kobject *pseries_vas_kobj;
+static struct kobject *gzip_caps_kobj;
+
+struct vas_caps_entry {
+	struct kobject kobj;
+	struct vas_cop_feat_caps *caps;
+};
+
+#define to_caps_entry(entry) container_of(entry, struct vas_caps_entry, kobj)
+
+/*
+ * This function is used to get the notification from the drmgr when
+ * QoS credits are changed.
+ */
+static ssize_t update_total_credits_store(struct vas_cop_feat_caps *caps,
+						const char *buf, size_t count)
+{
+	int err;
+	u16 creds;
+
+	err = kstrtou16(buf, 0, &creds);
+	/*
+	 * The user space interface from the management console
+	 * notifies OS with the new QoS credits and then the
+	 * hypervisor. So OS has to use this new credits value
+	 * and reconfigure VAS windows (close or reopen depends
+	 * on the credits available) instead of depending on VAS
+	 * QoS capabilities from the hypervisor.
+	 */
+	if (!err)
+		err = vas_reconfig_capabilties(caps->win_type, creds);
+
+	if (err)
+		return -EINVAL;
+
+	pr_info("Set QoS total credits %u\n", creds);
+
+	return count;
+}
+
+#define sysfs_caps_entry_read(_name)					\
+static ssize_t _name##_show(struct vas_cop_feat_caps *caps, char *buf) 	\
+{									\
+	return sprintf(buf, "%d\n", atomic_read(&caps->_name));	\
+}
+
+struct vas_sysfs_entry {
+	struct attribute attr;
+	ssize_t (*show)(struct vas_cop_feat_caps *, char *);
+	ssize_t (*store)(struct vas_cop_feat_caps *, const char *, size_t);
+};
+
+#define VAS_ATTR_RO(_name)	\
+	sysfs_caps_entry_read(_name);		\
+	static struct vas_sysfs_entry _name##_attribute = __ATTR(_name,	\
+				0444, _name##_show, NULL);
+
+/*
+ * Create sysfs interface:
+ * /sys/devices/virtual/misc/vas/vas0/gzip/default_capabilities
+ *	This directory contains the following VAS GZIP capabilities
+ *	for the default credit type.
+ * /sys/devices/virtual/misc/vas/vas0/gzip/default_capabilities/nr_total_credits
+ *	Total number of default credits assigned to the LPAR which
+ *	can be changed with DLPAR operation.
+ * /sys/devices/virtual/misc/vas/vas0/gzip/default_capabilities/nr_used_credits
+ *	Number of credits used by the user space. One credit will
+ *	be assigned for each window open.
+ *
+ * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities
+ *	This directory contains the following VAS GZIP capabilities
+ *	for the Quality of Service (QoS) credit type.
+ * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities/nr_total_credits
+ *	Total number of QoS credits assigned to the LPAR. The user
+ *	has to define this value using HMC interface. It can be
+ *	changed dynamically by the user.
+ * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities/nr_used_credits
+ *	Number of credits used by the user space.
+ * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities/update_total_credits
+ *	Update total QoS credits dynamically
+ */
+
+VAS_ATTR_RO(nr_total_credits);
+VAS_ATTR_RO(nr_used_credits);
+
+static struct vas_sysfs_entry update_total_credits_attribute =
+	__ATTR(update_total_credits, 0200, NULL, update_total_credits_store);
+
+static struct attribute *vas_def_capab_attrs[] = {
+	&nr_total_credits_attribute.attr,
+	&nr_used_credits_attribute.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(vas_def_capab);
+
+static struct attribute *vas_qos_capab_attrs[] = {
+	&nr_total_credits_attribute.attr,
+	&nr_used_credits_attribute.attr,
+	&update_total_credits_attribute.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(vas_qos_capab);
+
+static ssize_t vas_type_show(struct kobject *kobj, struct attribute *attr,
+			     char *buf)
+{
+	struct vas_caps_entry *centry;
+	struct vas_cop_feat_caps *caps;
+	struct vas_sysfs_entry *entry;
+
+	centry = to_caps_entry(kobj);
+	caps = centry->caps;
+	entry = container_of(attr, struct vas_sysfs_entry, attr);
+
+	if (!entry->show)
+		return -EIO;
+
+	return entry->show(caps, buf);
+}
+
+static ssize_t vas_type_store(struct kobject *kobj, struct attribute *attr,
+			      const char *buf, size_t count)
+{
+	struct vas_caps_entry *centry;
+	struct vas_cop_feat_caps *caps;
+	struct vas_sysfs_entry *entry;
+
+	centry = to_caps_entry(kobj);
+	caps = centry->caps;
+	entry = container_of(attr, struct vas_sysfs_entry, attr);
+	if (!entry->store)
+		return -EIO;
+
+	return entry->store(caps, buf, count);
+}
+
+static void vas_type_release(struct kobject *kobj)
+{
+	struct vas_caps_entry *centry = to_caps_entry(kobj);
+	kfree(centry);
+}
+
+static const struct sysfs_ops vas_sysfs_ops = {
+	.show	=	vas_type_show,
+	.store	=	vas_type_store,
+};
+
+static struct kobj_type vas_def_attr_type = {
+		.release	=	vas_type_release,
+		.sysfs_ops      =       &vas_sysfs_ops,
+		.default_groups	=	vas_def_capab_groups,
+};
+
+static struct kobj_type vas_qos_attr_type = {
+		.release	=	vas_type_release,
+		.sysfs_ops	=	&vas_sysfs_ops,
+		.default_groups	=	vas_qos_capab_groups,
+};
+
+static char *vas_caps_kobj_name(struct vas_caps_entry *centry,
+				struct kobject **kobj)
+{
+	struct vas_cop_feat_caps *caps = centry->caps;
+
+	if (caps->descriptor == VAS_GZIP_QOS_CAPABILITIES) {
+		kobject_init(&centry->kobj, &vas_qos_attr_type);
+		*kobj = gzip_caps_kobj;
+		return "qos_capabilities";
+	} else if (caps->descriptor == VAS_GZIP_DEFAULT_CAPABILITIES) {
+		kobject_init(&centry->kobj, &vas_def_attr_type);
+		*kobj = gzip_caps_kobj;
+		return "default_capabilities";
+	} else
+		return "Unknown";
+}
+
+/*
+ * Add feature specific capability dir entry.
+ * Ex: VDefGzip or VQosGzip
+ */
+int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps)
+{
+	struct vas_caps_entry *centry;
+	struct kobject *kobj = NULL;
+	int ret = 0;
+	char *name;
+
+	centry = kzalloc(sizeof(*centry), GFP_KERNEL);
+	if (!centry)
+		return -ENOMEM;
+
+	centry->caps = caps;
+	name  = vas_caps_kobj_name(centry, &kobj);
+
+	if (kobj) {
+		ret = kobject_add(&centry->kobj, kobj, "%s", name);
+
+		if (ret) {
+			pr_err("VAS: sysfs kobject add / event failed %d\n",
+					ret);
+			kobject_put(&centry->kobj);
+		}
+	}
+
+	return ret;
+}
+
+static struct miscdevice vas_miscdev = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "vas",
+};
+
+/*
+ * Add VAS and VasCaps (overall capabilities) dir entries.
+ */
+int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps)
+{
+	int ret;
+
+	ret = misc_register(&vas_miscdev);
+	if (ret < 0) {
+		pr_err("%s: register vas misc device failed\n", __func__);
+		return ret;
+	}
+
+	/*
+	 * The hypervisor does not expose multiple VAS instances, but can
+	 * see multiple VAS instances on PowerNV. So create 'vas0' directory
+	 * on pseries.
+	 */
+	pseries_vas_kobj = kobject_create_and_add("vas0",
+					&vas_miscdev.this_device->kobj);
+	if (!pseries_vas_kobj) {
+		misc_deregister(&vas_miscdev);
+		pr_err("Failed to create VAS sysfs entry\n");
+		return -ENOMEM;
+	}
+
+	if ((vas_caps->feat_type & VAS_GZIP_QOS_FEAT_BIT) ||
+		(vas_caps->feat_type & VAS_GZIP_DEF_FEAT_BIT)) {
+		gzip_caps_kobj = kobject_create_and_add("gzip",
+						       pseries_vas_kobj);
+		if (!gzip_caps_kobj) {
+			pr_err("Failed to create VAS GZIP capability entry\n");
+			kobject_put(pseries_vas_kobj);
+			misc_deregister(&vas_miscdev);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+#else
+int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps)
+{
+	return 0;
+}
+
+int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps)
+{
+	return 0;
+}
+#endif
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
new file mode 100644
index 0000000000..71d52a670d
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -0,0 +1,1121 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2020-21 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <asm/machdep.h>
+#include <asm/hvcall.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/firmware.h>
+#include <asm/vphn.h>
+#include <asm/vas.h>
+#include "vas.h"
+
+#define VAS_INVALID_WIN_ADDRESS	0xFFFFFFFFFFFFFFFFul
+#define VAS_DEFAULT_DOMAIN_ID	0xFFFFFFFFFFFFFFFFul
+/* The hypervisor allows one credit per window right now */
+#define DEF_WIN_CREDS		1
+
+static struct vas_all_caps caps_all;
+static bool copypaste_feat;
+static struct hv_vas_cop_feat_caps hv_cop_caps;
+
+static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
+static DEFINE_MUTEX(vas_pseries_mutex);
+static bool migration_in_progress;
+
+static long hcall_return_busy_check(long rc)
+{
+	/* Check if we are stalled for some time */
+	if (H_IS_LONG_BUSY(rc)) {
+		msleep(get_longbusy_msecs(rc));
+		rc = H_BUSY;
+	} else if (rc == H_BUSY) {
+		cond_resched();
+	}
+
+	return rc;
+}
+
+/*
+ * Allocate VAS window hcall
+ */
+static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
+				     u8 wintype, u16 credits)
+{
+	long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+	long rc;
+
+	do {
+		rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
+				  credits, domain[0], domain[1], domain[2],
+				  domain[3], domain[4], domain[5]);
+
+		rc = hcall_return_busy_check(rc);
+	} while (rc == H_BUSY);
+
+	if (rc == H_SUCCESS) {
+		if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
+			pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
+			return -ENOTSUPP;
+		}
+		win->vas_win.winid = retbuf[0];
+		win->win_addr = retbuf[1];
+		win->complete_irq = retbuf[2];
+		win->fault_irq = retbuf[3];
+		return 0;
+	}
+
+	pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
+		rc, wintype, credits);
+
+	return -EIO;
+}
+
+/*
+ * Deallocate VAS window hcall.
+ */
+static int h_deallocate_vas_window(u64 winid)
+{
+	long rc;
+
+	do {
+		rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
+
+		rc = hcall_return_busy_check(rc);
+	} while (rc == H_BUSY);
+
+	if (rc == H_SUCCESS)
+		return 0;
+
+	pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
+		rc, winid);
+	return -EIO;
+}
+
+/*
+ * Modify VAS window.
+ * After the window is opened with allocate window hcall, configure it
+ * with flags and LPAR PID before using.
+ */
+static int h_modify_vas_window(struct pseries_vas_window *win)
+{
+	long rc;
+
+	/*
+	 * AMR value is not supported in Linux VAS implementation.
+	 * The hypervisor ignores it if 0 is passed.
+	 */
+	do {
+		rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
+					win->vas_win.winid, win->pid, 0,
+					VAS_MOD_WIN_FLAGS, 0);
+
+		rc = hcall_return_busy_check(rc);
+	} while (rc == H_BUSY);
+
+	if (rc == H_SUCCESS)
+		return 0;
+
+	pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n",
+			rc, win->vas_win.winid, win->pid);
+	return -EIO;
+}
+
+/*
+ * This hcall is used to determine the capabilities from the hypervisor.
+ * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
+ * @query_type: If 0 is passed, the hypervisor returns the overall
+ *		capabilities which provides all feature(s) that are
+ *		available. Then query the hypervisor to get the
+ *		corresponding capabilities for the specific feature.
+ *		Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
+ *			and VAS GZIP Default capabilities.
+ *			H_QUERY_NX_CAPABILITIES provides NX GZIP
+ *			capabilities.
+ * @result: Return buffer to save capabilities.
+ */
+int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
+{
+	long rc;
+
+	rc = plpar_hcall_norets(hcall, query_type, result);
+
+	if (rc == H_SUCCESS)
+		return 0;
+
+	/* H_FUNCTION means HV does not support VAS so don't print an error */
+	if (rc != H_FUNCTION) {
+		pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n",
+			(hcall == H_QUERY_VAS_CAPABILITIES) ?
+				"H_QUERY_VAS_CAPABILITIES" :
+				"H_QUERY_NX_CAPABILITIES",
+			rc, query_type, result);
+	}
+
+	return -EIO;
+}
+EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
+
+/*
+ * hcall to get fault CRB from the hypervisor.
+ */
+static int h_get_nx_fault(u32 winid, u64 buffer)
+{
+	long rc;
+
+	rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
+
+	if (rc == H_SUCCESS)
+		return 0;
+
+	pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
+		rc, winid, buffer);
+	return -EIO;
+
+}
+
+/*
+ * Handle the fault interrupt.
+ * When the fault interrupt is received for each window, query the
+ * hypervisor to get the fault CRB on the specific fault. Then
+ * process the CRB by updating CSB or send signal if the user space
+ * CSB is invalid.
+ * Note: The hypervisor forwards an interrupt for each fault request.
+ *	So one fault CRB to process for each H_GET_NX_FAULT hcall.
+ */
+static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
+{
+	struct pseries_vas_window *txwin = data;
+	struct coprocessor_request_block crb;
+	struct vas_user_win_ref *tsk_ref;
+	int rc;
+
+	while (atomic_read(&txwin->pending_faults)) {
+		rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
+		if (!rc) {
+			tsk_ref = &txwin->vas_win.task_ref;
+			vas_dump_crb(&crb);
+			vas_update_csb(&crb, tsk_ref);
+		}
+		atomic_dec(&txwin->pending_faults);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * irq_default_primary_handler() can be used only with IRQF_ONESHOT
+ * which disables IRQ before executing the thread handler and enables
+ * it after. But this disabling interrupt sets the VAS IRQ OFF
+ * state in the hypervisor. If the NX generates fault interrupt
+ * during this window, the hypervisor will not deliver this
+ * interrupt to the LPAR. So use VAS specific IRQ handler instead
+ * of calling the default primary handler.
+ */
+static irqreturn_t pseries_vas_irq_handler(int irq, void *data)
+{
+	struct pseries_vas_window *txwin = data;
+
+	/*
+	 * The thread hanlder will process this interrupt if it is
+	 * already running.
+	 */
+	atomic_inc(&txwin->pending_faults);
+
+	return IRQ_WAKE_THREAD;
+}
+
+/*
+ * Allocate window and setup IRQ mapping.
+ */
+static int allocate_setup_window(struct pseries_vas_window *txwin,
+				 u64 *domain, u8 wintype)
+{
+	int rc;
+
+	rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
+	if (rc)
+		return rc;
+	/*
+	 * On PowerVM, the hypervisor setup and forwards the fault
+	 * interrupt per window. So the IRQ setup and fault handling
+	 * will be done for each open window separately.
+	 */
+	txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
+	if (!txwin->fault_virq) {
+		pr_err("Failed irq mapping %d\n", txwin->fault_irq);
+		rc = -EINVAL;
+		goto out_win;
+	}
+
+	txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
+				txwin->vas_win.winid);
+	if (!txwin->name) {
+		rc = -ENOMEM;
+		goto out_irq;
+	}
+
+	rc = request_threaded_irq(txwin->fault_virq,
+				  pseries_vas_irq_handler,
+				  pseries_vas_fault_thread_fn, 0,
+				  txwin->name, txwin);
+	if (rc) {
+		pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
+		       txwin->vas_win.winid, txwin->fault_virq, rc);
+		goto out_free;
+	}
+
+	txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
+
+	return 0;
+out_free:
+	kfree(txwin->name);
+out_irq:
+	irq_dispose_mapping(txwin->fault_virq);
+out_win:
+	h_deallocate_vas_window(txwin->vas_win.winid);
+	return rc;
+}
+
+static inline void free_irq_setup(struct pseries_vas_window *txwin)
+{
+	free_irq(txwin->fault_virq, txwin);
+	kfree(txwin->name);
+	irq_dispose_mapping(txwin->fault_virq);
+}
+
+static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
+					      enum vas_cop_type cop_type)
+{
+	long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
+	struct vas_cop_feat_caps *cop_feat_caps;
+	struct vas_caps *caps;
+	struct pseries_vas_window *txwin;
+	int rc;
+
+	txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
+	if (!txwin)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * A VAS window can have many credits which means that many
+	 * requests can be issued simultaneously. But the hypervisor
+	 * restricts one credit per window.
+	 * The hypervisor introduces 2 different types of credits:
+	 * Default credit type (Uses normal priority FIFO):
+	 *	A limited number of credits are assigned to partitions
+	 *	based on processor entitlement. But these credits may be
+	 *	over-committed on a system depends on whether the CPUs
+	 *	are in shared or dedicated modes - that is, more requests
+	 *	may be issued across the system than NX can service at
+	 *	once which can result in paste command failure (RMA_busy).
+	 *	Then the process has to resend requests or fall-back to
+	 *	SW compression.
+	 * Quality of Service (QoS) credit type (Uses high priority FIFO):
+	 *	To avoid NX HW contention, the system admins can assign
+	 *	QoS credits for each LPAR so that this partition is
+	 *	guaranteed access to NX resources. These credits are
+	 *	assigned to partitions via the HMC.
+	 *	Refer PAPR for more information.
+	 *
+	 * Allocate window with QoS credits if user requested. Otherwise
+	 * default credits are used.
+	 */
+	if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
+		caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
+	else
+		caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
+
+	cop_feat_caps = &caps->caps;
+
+	if (atomic_inc_return(&cop_feat_caps->nr_used_credits) >
+			atomic_read(&cop_feat_caps->nr_total_credits)) {
+		pr_err_ratelimited("Credits are not available to allocate window\n");
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if (vas_id == -1) {
+		/*
+		 * The user space is requesting to allocate a window on
+		 * a VAS instance where the process is executing.
+		 * On PowerVM, domain values are passed to the hypervisor
+		 * to select VAS instance. Useful if the process is
+		 * affinity to NUMA node.
+		 * The hypervisor selects VAS instance if
+		 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
+		 * The h_allocate_vas_window hcall is defined to take a
+		 * domain values as specified by h_home_node_associativity,
+		 * So no unpacking needs to be done.
+		 */
+		rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
+				  VPHN_FLAG_VCPU, hard_smp_processor_id());
+		if (rc != H_SUCCESS) {
+			pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
+			goto out;
+		}
+	}
+
+	txwin->pid = mfspr(SPRN_PID);
+
+	/*
+	 * Allocate / Deallocate window hcalls and setup / free IRQs
+	 * have to be protected with mutex.
+	 * Open VAS window: Allocate window hcall and setup IRQ
+	 * Close VAS window: Deallocate window hcall and free IRQ
+	 *	The hypervisor waits until all NX requests are
+	 *	completed before closing the window. So expects OS
+	 *	to handle NX faults, means IRQ can be freed only
+	 *	after the deallocate window hcall is returned.
+	 * So once the window is closed with deallocate hcall before
+	 * the IRQ is freed, it can be assigned to new allocate
+	 * hcall with the same fault IRQ by the hypervisor. It can
+	 * result in setup IRQ fail for the new window since the
+	 * same fault IRQ is not freed by the OS before.
+	 */
+	mutex_lock(&vas_pseries_mutex);
+	if (migration_in_progress) {
+		rc = -EBUSY;
+	} else {
+		rc = allocate_setup_window(txwin, (u64 *)&domain[0],
+				   cop_feat_caps->win_type);
+		if (!rc)
+			caps->nr_open_wins_progress++;
+	}
+
+	mutex_unlock(&vas_pseries_mutex);
+	if (rc)
+		goto out;
+
+	/*
+	 * Modify window and it is ready to use.
+	 */
+	rc = h_modify_vas_window(txwin);
+	if (!rc)
+		rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
+	if (rc)
+		goto out_free;
+
+	txwin->win_type = cop_feat_caps->win_type;
+
+	/*
+	 * The migration SUSPEND thread sets migration_in_progress and
+	 * closes all open windows from the list. But the window is
+	 * added to the list after open and modify HCALLs. So possible
+	 * that migration_in_progress is set before modify HCALL which
+	 * may cause some windows are still open when the hypervisor
+	 * initiates the migration.
+	 * So checks the migration_in_progress flag again and close all
+	 * open windows.
+	 *
+	 * Possible to lose the acquired credit with DLPAR core
+	 * removal after the window is opened. So if there are any
+	 * closed windows (means with lost credits), do not give new
+	 * window to user space. New windows will be opened only
+	 * after the existing windows are reopened when credits are
+	 * available.
+	 */
+	mutex_lock(&vas_pseries_mutex);
+	if (!caps->nr_close_wins && !migration_in_progress) {
+		list_add(&txwin->win_list, &caps->list);
+		caps->nr_open_windows++;
+		caps->nr_open_wins_progress--;
+		mutex_unlock(&vas_pseries_mutex);
+		vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
+		return &txwin->vas_win;
+	}
+	mutex_unlock(&vas_pseries_mutex);
+
+	put_vas_user_win_ref(&txwin->vas_win.task_ref);
+	rc = -EBUSY;
+	pr_err_ratelimited("No credit is available to allocate window\n");
+
+out_free:
+	/*
+	 * Window is not operational. Free IRQ before closing
+	 * window so that do not have to hold mutex.
+	 */
+	free_irq_setup(txwin);
+	h_deallocate_vas_window(txwin->vas_win.winid);
+	/*
+	 * Hold mutex and reduce nr_open_wins_progress counter.
+	 */
+	mutex_lock(&vas_pseries_mutex);
+	caps->nr_open_wins_progress--;
+	mutex_unlock(&vas_pseries_mutex);
+out:
+	atomic_dec(&cop_feat_caps->nr_used_credits);
+	kfree(txwin);
+	return ERR_PTR(rc);
+}
+
+static u64 vas_paste_address(struct vas_window *vwin)
+{
+	struct pseries_vas_window *win;
+
+	win = container_of(vwin, struct pseries_vas_window, vas_win);
+	return win->win_addr;
+}
+
+static int deallocate_free_window(struct pseries_vas_window *win)
+{
+	int rc = 0;
+
+	/*
+	 * The hypervisor waits for all requests including faults
+	 * are processed before closing the window - Means all
+	 * credits have to be returned. In the case of fault
+	 * request, a credit is returned after OS issues
+	 * H_GET_NX_FAULT hcall.
+	 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
+	 * hcall.
+	 */
+	rc = h_deallocate_vas_window(win->vas_win.winid);
+	if (!rc)
+		free_irq_setup(win);
+
+	return rc;
+}
+
+static int vas_deallocate_window(struct vas_window *vwin)
+{
+	struct pseries_vas_window *win;
+	struct vas_cop_feat_caps *caps;
+	int rc = 0;
+
+	if (!vwin)
+		return -EINVAL;
+
+	win = container_of(vwin, struct pseries_vas_window, vas_win);
+
+	/* Should not happen */
+	if (win->win_type >= VAS_MAX_FEAT_TYPE) {
+		pr_err("Window (%u): Invalid window type %u\n",
+				vwin->winid, win->win_type);
+		return -EINVAL;
+	}
+
+	caps = &vascaps[win->win_type].caps;
+	mutex_lock(&vas_pseries_mutex);
+	/*
+	 * VAS window is already closed in the hypervisor when
+	 * lost the credit or with migration. So just remove the entry
+	 * from the list, remove task references and free vas_window
+	 * struct.
+	 */
+	if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
+		!(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
+		rc = deallocate_free_window(win);
+		if (rc) {
+			mutex_unlock(&vas_pseries_mutex);
+			return rc;
+		}
+	} else
+		vascaps[win->win_type].nr_close_wins--;
+
+	list_del(&win->win_list);
+	atomic_dec(&caps->nr_used_credits);
+	vascaps[win->win_type].nr_open_windows--;
+	mutex_unlock(&vas_pseries_mutex);
+
+	mm_context_remove_vas_window(vwin->task_ref.mm);
+	put_vas_user_win_ref(&vwin->task_ref);
+
+	kfree(win);
+	return 0;
+}
+
+static const struct vas_user_win_ops vops_pseries = {
+	.open_win	= vas_allocate_window,	/* Open and configure window */
+	.paste_addr	= vas_paste_address,	/* To do copy/paste */
+	.close_win	= vas_deallocate_window, /* Close window */
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
+			     const char *name)
+{
+	if (!copypaste_feat)
+		return -ENOTSUPP;
+
+	return vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
+}
+EXPORT_SYMBOL_GPL(vas_register_api_pseries);
+
+void vas_unregister_api_pseries(void)
+{
+	vas_unregister_coproc_api();
+}
+EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
+
+/*
+ * Get the specific capabilities based on the feature type.
+ * Right now supports GZIP default and GZIP QoS capabilities.
+ */
+static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
+				struct hv_vas_cop_feat_caps *hv_caps)
+{
+	struct vas_cop_feat_caps *caps;
+	struct vas_caps *vcaps;
+	int rc = 0;
+
+	vcaps = &vascaps[type];
+	memset(vcaps, 0, sizeof(*vcaps));
+	INIT_LIST_HEAD(&vcaps->list);
+
+	vcaps->feat = feat;
+	caps = &vcaps->caps;
+
+	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
+					  (u64)virt_to_phys(hv_caps));
+	if (rc)
+		return rc;
+
+	caps->user_mode = hv_caps->user_mode;
+	if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
+		pr_err("User space COPY/PASTE is not supported\n");
+		return -ENOTSUPP;
+	}
+
+	caps->descriptor = be64_to_cpu(hv_caps->descriptor);
+	caps->win_type = hv_caps->win_type;
+	if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
+		pr_err("Unsupported window type %u\n", caps->win_type);
+		return -EINVAL;
+	}
+	caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
+	caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
+	atomic_set(&caps->nr_total_credits,
+		   be16_to_cpu(hv_caps->target_lpar_creds));
+	if (feat == VAS_GZIP_DEF_FEAT) {
+		caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
+
+		if (caps->max_win_creds < DEF_WIN_CREDS) {
+			pr_err("Window creds(%u) > max allowed window creds(%u)\n",
+			       DEF_WIN_CREDS, caps->max_win_creds);
+			return -EINVAL;
+		}
+	}
+
+	rc = sysfs_add_vas_caps(caps);
+	if (rc)
+		return rc;
+
+	copypaste_feat = true;
+
+	return 0;
+}
+
+/*
+ * VAS windows can be closed due to lost credits when the core is
+ * removed. So reopen them if credits are available due to DLPAR
+ * core add and set the window active status. When NX sees the page
+ * fault on the unmapped paste address, the kernel handles the fault
+ * by setting the remapping to new paste address if the window is
+ * active.
+ */
+static int reconfig_open_windows(struct vas_caps *vcaps, int creds,
+				 bool migrate)
+{
+	long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
+	struct vas_cop_feat_caps *caps = &vcaps->caps;
+	struct pseries_vas_window *win = NULL, *tmp;
+	int rc, mv_ents = 0;
+	int flag;
+
+	/*
+	 * Nothing to do if there are no closed windows.
+	 */
+	if (!vcaps->nr_close_wins)
+		return 0;
+
+	/*
+	 * For the core removal, the hypervisor reduces the credits
+	 * assigned to the LPAR and the kernel closes VAS windows
+	 * in the hypervisor depends on reduced credits. The kernel
+	 * uses LIFO (the last windows that are opened will be closed
+	 * first) and expects to open in the same order when credits
+	 * are available.
+	 * For example, 40 windows are closed when the LPAR lost 2 cores
+	 * (dedicated). If 1 core is added, this LPAR can have 20 more
+	 * credits. It means the kernel can reopen 20 windows. So move
+	 * 20 entries in the VAS windows lost and reopen next 20 windows.
+	 * For partition migration, reopen all windows that are closed
+	 * during resume.
+	 */
+	if ((vcaps->nr_close_wins > creds) && !migrate)
+		mv_ents = vcaps->nr_close_wins - creds;
+
+	list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) {
+		if (!mv_ents)
+			break;
+
+		mv_ents--;
+	}
+
+	/*
+	 * Open windows if they are closed only with migration or
+	 * DLPAR (lost credit) before.
+	 */
+	if (migrate)
+		flag = VAS_WIN_MIGRATE_CLOSE;
+	else
+		flag = VAS_WIN_NO_CRED_CLOSE;
+
+	list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) {
+		/*
+		 * This window is closed with DLPAR and migration events.
+		 * So reopen the window with the last event.
+		 * The user space is not suspended with the current
+		 * migration notifier. So the user space can issue DLPAR
+		 * CPU hotplug while migration in progress. In this case
+		 * this window will be opened with the last event.
+		 */
+		if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
+			(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
+			win->vas_win.status &= ~flag;
+			continue;
+		}
+
+		/*
+		 * Nothing to do on this window if it is not closed
+		 * with this flag
+		 */
+		if (!(win->vas_win.status & flag))
+			continue;
+
+		rc = allocate_setup_window(win, (u64 *)&domain[0],
+					   caps->win_type);
+		if (rc)
+			return rc;
+
+		rc = h_modify_vas_window(win);
+		if (rc)
+			goto out;
+
+		mutex_lock(&win->vas_win.task_ref.mmap_mutex);
+		/*
+		 * Set window status to active
+		 */
+		win->vas_win.status &= ~flag;
+		mutex_unlock(&win->vas_win.task_ref.mmap_mutex);
+		win->win_type = caps->win_type;
+		if (!--vcaps->nr_close_wins)
+			break;
+	}
+
+	return 0;
+out:
+	/*
+	 * Window modify HCALL failed. So close the window to the
+	 * hypervisor and return.
+	 */
+	free_irq_setup(win);
+	h_deallocate_vas_window(win->vas_win.winid);
+	return rc;
+}
+
+/*
+ * The hypervisor reduces the available credits if the LPAR lost core. It
+ * means the excessive windows should not be active and the user space
+ * should not be using these windows to send compression requests to NX.
+ * So the kernel closes the excessive windows and unmap the paste address
+ * such that the user space receives paste instruction failure. Then up to
+ * the user space to fall back to SW compression and manage with the
+ * existing windows.
+ */
+static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
+									bool migrate)
+{
+	struct pseries_vas_window *win, *tmp;
+	struct vas_user_win_ref *task_ref;
+	struct vm_area_struct *vma;
+	int rc = 0, flag;
+
+	if (migrate)
+		flag = VAS_WIN_MIGRATE_CLOSE;
+	else
+		flag = VAS_WIN_NO_CRED_CLOSE;
+
+	list_for_each_entry_safe(win, tmp, &vcap->list, win_list) {
+		/*
+		 * This window is already closed due to lost credit
+		 * or for migration before. Go for next window.
+		 * For migration, nothing to do since this window
+		 * closed for DLPAR and will be reopened even on
+		 * the destination system with other DLPAR operation.
+		 */
+		if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) ||
+			(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) {
+			win->vas_win.status |= flag;
+			continue;
+		}
+
+		task_ref = &win->vas_win.task_ref;
+		/*
+		 * VAS mmap (coproc_mmap()) and its fault handler
+		 * (vas_mmap_fault()) are called after holding mmap lock.
+		 * So hold mmap mutex after mmap_lock to avoid deadlock.
+		 */
+		mmap_write_lock(task_ref->mm);
+		mutex_lock(&task_ref->mmap_mutex);
+		vma = task_ref->vma;
+		/*
+		 * Number of available credits are reduced, So select
+		 * and close windows.
+		 */
+		win->vas_win.status |= flag;
+
+		/*
+		 * vma is set in the original mapping. But this mapping
+		 * is done with mmap() after the window is opened with ioctl.
+		 * so we may not see the original mapping if the core remove
+		 * is done before the original mmap() and after the ioctl.
+		 */
+		if (vma)
+			zap_vma_pages(vma);
+
+		mutex_unlock(&task_ref->mmap_mutex);
+		mmap_write_unlock(task_ref->mm);
+		/*
+		 * Close VAS window in the hypervisor, but do not
+		 * free vas_window struct since it may be reused
+		 * when the credit is available later (DLPAR with
+		 * adding cores). This struct will be used
+		 * later when the process issued with close(FD).
+		 */
+		rc = deallocate_free_window(win);
+		/*
+		 * This failure is from the hypervisor.
+		 * No way to stop migration for these failures.
+		 * So ignore error and continue closing other windows.
+		 */
+		if (rc && !migrate)
+			return rc;
+
+		vcap->nr_close_wins++;
+
+		/*
+		 * For migration, do not depend on lpar_creds in case if
+		 * mismatch with the hypervisor value (should not happen).
+		 * So close all active windows in the list and will be
+		 * reopened windows based on the new lpar_creds on the
+		 * destination system during resume.
+		 */
+		if (!migrate && !--excess_creds)
+			break;
+	}
+
+	return 0;
+}
+
+/*
+ * Get new VAS capabilities when the core add/removal configuration
+ * changes. Reconfig window configurations based on the credits
+ * availability from this new capabilities.
+ */
+int vas_reconfig_capabilties(u8 type, int new_nr_creds)
+{
+	struct vas_cop_feat_caps *caps;
+	int old_nr_creds;
+	struct vas_caps *vcaps;
+	int rc = 0, nr_active_wins;
+
+	if (type >= VAS_MAX_FEAT_TYPE) {
+		pr_err("Invalid credit type %d\n", type);
+		return -EINVAL;
+	}
+
+	vcaps = &vascaps[type];
+	caps = &vcaps->caps;
+
+	mutex_lock(&vas_pseries_mutex);
+
+	old_nr_creds = atomic_read(&caps->nr_total_credits);
+
+	atomic_set(&caps->nr_total_credits, new_nr_creds);
+	/*
+	 * The total number of available credits may be decreased or
+	 * increased with DLPAR operation. Means some windows have to be
+	 * closed / reopened. Hold the vas_pseries_mutex so that the
+	 * user space can not open new windows.
+	 */
+	if (old_nr_creds <  new_nr_creds) {
+		/*
+		 * If the existing target credits is less than the new
+		 * target, reopen windows if they are closed due to
+		 * the previous DLPAR (core removal).
+		 */
+		rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds,
+					   false);
+	} else {
+		/*
+		 * # active windows is more than new LPAR available
+		 * credits. So close the excessive windows.
+		 * On pseries, each window will have 1 credit.
+		 */
+		nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins;
+		if (nr_active_wins > new_nr_creds)
+			rc = reconfig_close_windows(vcaps,
+					nr_active_wins - new_nr_creds,
+					false);
+	}
+
+	mutex_unlock(&vas_pseries_mutex);
+	return rc;
+}
+
+int pseries_vas_dlpar_cpu(void)
+{
+	int new_nr_creds, rc;
+
+	/*
+	 * NX-GZIP is not enabled. Nothing to do for DLPAR event
+	 */
+	if (!copypaste_feat)
+		return 0;
+
+
+	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
+				      vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
+				      (u64)virt_to_phys(&hv_cop_caps));
+	if (!rc) {
+		new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
+		rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds);
+	}
+
+	if (rc)
+		pr_err("Failed reconfig VAS capabilities with DLPAR\n");
+
+	return rc;
+}
+
+/*
+ * Total number of default credits available (target_credits)
+ * in LPAR depends on number of cores configured. It varies based on
+ * whether processors are in shared mode or dedicated mode.
+ * Get the notifier when CPU configuration is changed with DLPAR
+ * operation so that get the new target_credits (vas default capabilities)
+ * and then update the existing windows usage if needed.
+ */
+static int pseries_vas_notifier(struct notifier_block *nb,
+				unsigned long action, void *data)
+{
+	struct of_reconfig_data *rd = data;
+	struct device_node *dn = rd->dn;
+	const __be32 *intserv = NULL;
+	int len;
+
+	/*
+	 * For shared CPU partition, the hypervisor assigns total credits
+	 * based on entitled core capacity. So updating VAS windows will
+	 * be called from lparcfg_write().
+	 */
+	if (is_shared_processor())
+		return NOTIFY_OK;
+
+	if ((action == OF_RECONFIG_ATTACH_NODE) ||
+		(action == OF_RECONFIG_DETACH_NODE))
+		intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
+					  &len);
+	/*
+	 * Processor config is not changed
+	 */
+	if (!intserv)
+		return NOTIFY_OK;
+
+	return pseries_vas_dlpar_cpu();
+}
+
+static struct notifier_block pseries_vas_nb = {
+	.notifier_call = pseries_vas_notifier,
+};
+
+/*
+ * For LPM, all windows have to be closed on the source partition
+ * before migration and reopen them on the destination partition
+ * after migration. So closing windows during suspend and
+ * reopen them during resume.
+ */
+int vas_migration_handler(int action)
+{
+	struct vas_cop_feat_caps *caps;
+	int old_nr_creds, new_nr_creds = 0;
+	struct vas_caps *vcaps;
+	int i, rc = 0;
+
+	pr_info("VAS migration event %d\n", action);
+
+	/*
+	 * NX-GZIP is not enabled. Nothing to do for migration.
+	 */
+	if (!copypaste_feat)
+		return rc;
+
+	if (action == VAS_SUSPEND)
+		migration_in_progress = true;
+	else
+		migration_in_progress = false;
+
+	for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) {
+		vcaps = &vascaps[i];
+		caps = &vcaps->caps;
+		old_nr_creds = atomic_read(&caps->nr_total_credits);
+
+		rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
+					      vcaps->feat,
+					      (u64)virt_to_phys(&hv_cop_caps));
+		if (!rc) {
+			new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
+			/*
+			 * Should not happen. But incase print messages, close
+			 * all windows in the list during suspend and reopen
+			 * windows based on new lpar_creds on the destination
+			 * system.
+			 */
+			if (old_nr_creds != new_nr_creds) {
+				pr_err("Target credits mismatch with the hypervisor\n");
+				pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n",
+					action, old_nr_creds, new_nr_creds);
+				pr_err("Used creds: %d, Active creds: %d\n",
+					atomic_read(&caps->nr_used_credits),
+					vcaps->nr_open_windows - vcaps->nr_close_wins);
+			}
+		} else {
+			pr_err("state(%d): Get VAS capabilities failed with %d\n",
+				action, rc);
+			/*
+			 * We can not stop migration with the current lpm
+			 * implementation. So continue closing all windows in
+			 * the list (during suspend) and return without
+			 * opening windows (during resume) if VAS capabilities
+			 * HCALL failed.
+			 */
+			if (action == VAS_RESUME)
+				goto out;
+		}
+
+		switch (action) {
+		case VAS_SUSPEND:
+			mutex_lock(&vas_pseries_mutex);
+			rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows,
+							true);
+			/*
+			 * Windows are included in the list after successful
+			 * open. So wait for closing these in-progress open
+			 * windows in vas_allocate_window() which will be
+			 * done if the migration_in_progress is set.
+			 */
+			while (vcaps->nr_open_wins_progress) {
+				mutex_unlock(&vas_pseries_mutex);
+				msleep(10);
+				mutex_lock(&vas_pseries_mutex);
+			}
+			mutex_unlock(&vas_pseries_mutex);
+			break;
+		case VAS_RESUME:
+			mutex_lock(&vas_pseries_mutex);
+			atomic_set(&caps->nr_total_credits, new_nr_creds);
+			rc = reconfig_open_windows(vcaps, new_nr_creds, true);
+			mutex_unlock(&vas_pseries_mutex);
+			break;
+		default:
+			/* should not happen */
+			pr_err("Invalid migration action %d\n", action);
+			rc = -EINVAL;
+			goto out;
+		}
+
+		/*
+		 * Ignore errors during suspend and return for resume.
+		 */
+		if (rc && (action == VAS_RESUME))
+			goto out;
+	}
+
+	pr_info("VAS migration event (%d) successful\n", action);
+
+out:
+	return rc;
+}
+
+static int __init pseries_vas_init(void)
+{
+	struct hv_vas_all_caps *hv_caps;
+	int rc = 0;
+
+	/*
+	 * Linux supports user space COPY/PASTE only with Radix
+	 */
+	if (!radix_enabled()) {
+		copypaste_feat = false;
+		pr_err("API is supported only with radix page tables\n");
+		return -ENOTSUPP;
+	}
+
+	hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
+	if (!hv_caps)
+		return -ENOMEM;
+	/*
+	 * Get VAS overall capabilities by passing 0 to feature type.
+	 */
+	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
+					  (u64)virt_to_phys(hv_caps));
+	if (rc)
+		goto out;
+
+	caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
+	caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
+
+	sysfs_pseries_vas_init(&caps_all);
+
+	/*
+	 * QOS capabilities available
+	 */
+	if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
+		rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
+					  VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps);
+
+		if (rc)
+			goto out;
+	}
+	/*
+	 * Default capabilities available
+	 */
+	if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT)
+		rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
+					  VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps);
+
+	if (!rc && copypaste_feat) {
+		if (firmware_has_feature(FW_FEATURE_LPAR))
+			of_reconfig_notifier_register(&pseries_vas_nb);
+
+		pr_info("GZIP feature is available\n");
+	} else {
+		/*
+		 * Should not happen, but only when get default
+		 * capabilities HCALL failed. So disable copy paste
+		 * feature.
+		 */
+		copypaste_feat = false;
+	}
+
+out:
+	kfree(hv_caps);
+	return rc;
+}
+machine_device_initcall(pseries, pseries_vas_init);
diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h
new file mode 100644
index 0000000000..45567cd131
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vas.h
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2020-21 IBM Corp.
+ */
+
+#ifndef _VAS_H
+#define _VAS_H
+#include <asm/vas.h>
+#include <linux/mutex.h>
+#include <linux/stringify.h>
+
+/*
+ * VAS window modify flags
+ */
+#define VAS_MOD_WIN_CLOSE	PPC_BIT(0)
+#define VAS_MOD_WIN_JOBS_KILL	PPC_BIT(1)
+#define VAS_MOD_WIN_DR		PPC_BIT(3)
+#define VAS_MOD_WIN_PR		PPC_BIT(4)
+#define VAS_MOD_WIN_SF		PPC_BIT(5)
+#define VAS_MOD_WIN_TA		PPC_BIT(6)
+#define VAS_MOD_WIN_FLAGS	(VAS_MOD_WIN_JOBS_KILL | VAS_MOD_WIN_DR | \
+				VAS_MOD_WIN_PR | VAS_MOD_WIN_SF)
+
+#define VAS_WIN_ACTIVE		0x0
+#define VAS_WIN_CLOSED		0x1
+#define VAS_WIN_INACTIVE	0x2	/* Inactive due to HW failure */
+/* Process of being modified, deallocated, or quiesced */
+#define VAS_WIN_MOD_IN_PROCESS	0x3
+
+#define VAS_COPY_PASTE_USER_MODE	0x00000001
+#define VAS_COP_OP_USER_MODE		0x00000010
+
+#define VAS_GZIP_QOS_CAPABILITIES	0x56516F73477A6970
+#define VAS_GZIP_DEFAULT_CAPABILITIES	0x56446566477A6970
+
+enum vas_migrate_action {
+	VAS_SUSPEND,
+	VAS_RESUME,
+};
+
+/*
+ * Co-processor feature - GZIP QoS windows or GZIP default windows
+ */
+enum vas_cop_feat_type {
+	VAS_GZIP_QOS_FEAT_TYPE,
+	VAS_GZIP_DEF_FEAT_TYPE,
+	VAS_MAX_FEAT_TYPE,
+};
+
+/*
+ * Use to get feature specific capabilities from the
+ * hypervisor.
+ */
+struct hv_vas_cop_feat_caps {
+	__be64	descriptor;
+	u8	win_type;		/* Default or QoS type */
+	u8	user_mode;
+	__be16	max_lpar_creds;
+	__be16	max_win_creds;
+	union {
+		__be16	reserved;
+		__be16	def_lpar_creds; /* Used for default capabilities */
+	};
+	__be16	target_lpar_creds;
+} __packed __aligned(0x1000);
+
+/*
+ * Feature specific (QoS or default) capabilities.
+ */
+struct vas_cop_feat_caps {
+	u64		descriptor;
+	u8		win_type;	/* Default or QoS type */
+	u8		user_mode;	/* User mode copy/paste or COP HCALL */
+	u16		max_lpar_creds;	/* Max credits available in LPAR */
+	/* Max credits can be assigned per window */
+	u16		max_win_creds;
+	union {
+		u16	reserved;	/* Used for QoS credit type */
+		u16	def_lpar_creds; /* Used for default credit type */
+	};
+	/* Total LPAR available credits. Can be different from max LPAR */
+	/* credits due to DLPAR operation */
+	atomic_t	nr_total_credits;	/* Total credits assigned to LPAR */
+	atomic_t	nr_used_credits;	/* Used credits so far */
+};
+
+/*
+ * Feature (QoS or Default) specific to store capabilities and
+ * the list of open windows.
+ */
+struct vas_caps {
+	struct vas_cop_feat_caps caps;
+	struct list_head list;	/* List of open windows */
+	int nr_open_wins_progress;	/* Number of open windows in */
+					/* progress. Used in migration */
+	int nr_close_wins;	/* closed windows in the hypervisor for DLPAR */
+	int nr_open_windows;	/* Number of successful open windows */
+	u8 feat;		/* Feature type */
+};
+
+/*
+ * To get window information from the hypervisor.
+ */
+struct hv_vas_win_lpar {
+	__be16	version;
+	u8	win_type;
+	u8	status;
+	__be16	credits;	/* No of credits assigned to this window */
+	__be16	reserved;
+	__be32	pid;		/* LPAR Process ID */
+	__be32	tid;		/* LPAR Thread ID */
+	__be64	win_addr;	/* Paste address */
+	__be32	interrupt;	/* Interrupt when NX request completes */
+	__be32	fault;		/* Interrupt when NX sees fault */
+	/* Associativity Domain Identifiers as returned in */
+	/* H_HOME_NODE_ASSOCIATIVITY */
+	__be64	domain[6];
+	__be64	win_util;	/* Number of bytes processed */
+} __packed __aligned(0x1000);
+
+struct pseries_vas_window {
+	struct vas_window vas_win;
+	u64 win_addr;		/* Physical paste address */
+	u8 win_type;		/* QoS or Default window */
+	u32 complete_irq;	/* Completion interrupt */
+	u32 fault_irq;		/* Fault interrupt */
+	u64 domain[6];		/* Associativity domain Ids */
+				/* this window is allocated */
+	u64 util;
+	u32 pid;		/* PID associated with this window */
+
+	/* List of windows opened which is used for LPM */
+	struct list_head win_list;
+	u64 flags;
+	char *name;
+	int fault_virq;
+	atomic_t pending_faults; /* Number of pending faults */
+};
+
+int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps);
+int vas_reconfig_capabilties(u8 type, int new_nr_creds);
+int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps);
+
+#ifdef CONFIG_PPC_VAS
+int vas_migration_handler(int action);
+int pseries_vas_dlpar_cpu(void);
+#else
+static inline int vas_migration_handler(int action)
+{
+	return 0;
+}
+static inline int pseries_vas_dlpar_cpu(void)
+{
+	return 0;
+}
+#endif
+#endif /* _VAS_H */
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
new file mode 100644
index 0000000000..2dc9cbc4bc
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -0,0 +1,1729 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IBM PowerPC Virtual I/O Infrastructure Support.
+ *
+ *    Copyright (c) 2003,2008 IBM Corp.
+ *     Dave Engebretsen engebret@us.ibm.com
+ *     Santiago Leon santil@us.ibm.com
+ *     Hollis Blanchard <hollisb@us.ibm.com>
+ *     Stephen Rothwell
+ *     Robert Jennings <rcjenn@us.ibm.com>
+ */
+
+#include <linux/cpu.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/stat.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/console.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/dma-map-ops.h>
+#include <linux/kobject.h>
+#include <linux/kexec.h>
+#include <linux/of_irq.h>
+
+#include <asm/iommu.h>
+#include <asm/dma.h>
+#include <asm/vio.h>
+#include <asm/prom.h>
+#include <asm/firmware.h>
+#include <asm/tce.h>
+#include <asm/page.h>
+#include <asm/hvcall.h>
+#include <asm/machdep.h>
+
+static struct vio_dev vio_bus_device  = { /* fake "parent" device */
+	.name = "vio",
+	.type = "",
+	.dev.init_name = "vio",
+	.dev.bus = &vio_bus_type,
+};
+
+#ifdef CONFIG_PPC_SMLPAR
+/**
+ * vio_cmo_pool - A pool of IO memory for CMO use
+ *
+ * @size: The size of the pool in bytes
+ * @free: The amount of free memory in the pool
+ */
+struct vio_cmo_pool {
+	size_t size;
+	size_t free;
+};
+
+/* How many ms to delay queued balance work */
+#define VIO_CMO_BALANCE_DELAY 100
+
+/* Portion out IO memory to CMO devices by this chunk size */
+#define VIO_CMO_BALANCE_CHUNK 131072
+
+/**
+ * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement
+ *
+ * @vio_dev: struct vio_dev pointer
+ * @list: pointer to other devices on bus that are being tracked
+ */
+struct vio_cmo_dev_entry {
+	struct vio_dev *viodev;
+	struct list_head list;
+};
+
+/**
+ * vio_cmo - VIO bus accounting structure for CMO entitlement
+ *
+ * @lock: spinlock for entire structure
+ * @balance_q: work queue for balancing system entitlement
+ * @device_list: list of CMO-enabled devices requiring entitlement
+ * @entitled: total system entitlement in bytes
+ * @reserve: pool of memory from which devices reserve entitlement, incl. spare
+ * @excess: pool of excess entitlement not needed for device reserves or spare
+ * @spare: IO memory for device hotplug functionality
+ * @min: minimum necessary for system operation
+ * @desired: desired memory for system operation
+ * @curr: bytes currently allocated
+ * @high: high water mark for IO data usage
+ */
+static struct vio_cmo {
+	spinlock_t lock;
+	struct delayed_work balance_q;
+	struct list_head device_list;
+	size_t entitled;
+	struct vio_cmo_pool reserve;
+	struct vio_cmo_pool excess;
+	size_t spare;
+	size_t min;
+	size_t desired;
+	size_t curr;
+	size_t high;
+} vio_cmo;
+
+/**
+ * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows
+ */
+static int vio_cmo_num_OF_devs(void)
+{
+	struct device_node *node_vroot;
+	int count = 0;
+
+	/*
+	 * Count the number of vdevice entries with an
+	 * ibm,my-dma-window OF property
+	 */
+	node_vroot = of_find_node_by_name(NULL, "vdevice");
+	if (node_vroot) {
+		struct device_node *of_node;
+		struct property *prop;
+
+		for_each_child_of_node(node_vroot, of_node) {
+			prop = of_find_property(of_node, "ibm,my-dma-window",
+			                       NULL);
+			if (prop)
+				count++;
+		}
+	}
+	of_node_put(node_vroot);
+	return count;
+}
+
+/**
+ * vio_cmo_alloc - allocate IO memory for CMO-enable devices
+ *
+ * @viodev: VIO device requesting IO memory
+ * @size: size of allocation requested
+ *
+ * Allocations come from memory reserved for the devices and any excess
+ * IO memory available to all devices.  The spare pool used to service
+ * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be
+ * made available.
+ *
+ * Return codes:
+ *  0 for successful allocation and -ENOMEM for a failure
+ */
+static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size)
+{
+	unsigned long flags;
+	size_t reserve_free = 0;
+	size_t excess_free = 0;
+	int ret = -ENOMEM;
+
+	spin_lock_irqsave(&vio_cmo.lock, flags);
+
+	/* Determine the amount of free entitlement available in reserve */
+	if (viodev->cmo.entitled > viodev->cmo.allocated)
+		reserve_free = viodev->cmo.entitled - viodev->cmo.allocated;
+
+	/* If spare is not fulfilled, the excess pool can not be used. */
+	if (vio_cmo.spare >= VIO_CMO_MIN_ENT)
+		excess_free = vio_cmo.excess.free;
+
+	/* The request can be satisfied */
+	if ((reserve_free + excess_free) >= size) {
+		vio_cmo.curr += size;
+		if (vio_cmo.curr > vio_cmo.high)
+			vio_cmo.high = vio_cmo.curr;
+		viodev->cmo.allocated += size;
+		size -= min(reserve_free, size);
+		vio_cmo.excess.free -= size;
+		ret = 0;
+	}
+
+	spin_unlock_irqrestore(&vio_cmo.lock, flags);
+	return ret;
+}
+
+/**
+ * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices
+ * @viodev: VIO device freeing IO memory
+ * @size: size of deallocation
+ *
+ * IO memory is freed by the device back to the correct memory pools.
+ * The spare pool is replenished first from either memory pool, then
+ * the reserve pool is used to reduce device entitlement, the excess
+ * pool is used to increase the reserve pool toward the desired entitlement
+ * target, and then the remaining memory is returned to the pools.
+ *
+ */
+static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size)
+{
+	unsigned long flags;
+	size_t spare_needed = 0;
+	size_t excess_freed = 0;
+	size_t reserve_freed = size;
+	size_t tmp;
+	int balance = 0;
+
+	spin_lock_irqsave(&vio_cmo.lock, flags);
+	vio_cmo.curr -= size;
+
+	/* Amount of memory freed from the excess pool */
+	if (viodev->cmo.allocated > viodev->cmo.entitled) {
+		excess_freed = min(reserve_freed, (viodev->cmo.allocated -
+		                                   viodev->cmo.entitled));
+		reserve_freed -= excess_freed;
+	}
+
+	/* Remove allocation from device */
+	viodev->cmo.allocated -= (reserve_freed + excess_freed);
+
+	/* Spare is a subset of the reserve pool, replenish it first. */
+	spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare;
+
+	/*
+	 * Replenish the spare in the reserve pool from the excess pool.
+	 * This moves entitlement into the reserve pool.
+	 */
+	if (spare_needed && excess_freed) {
+		tmp = min(excess_freed, spare_needed);
+		vio_cmo.excess.size -= tmp;
+		vio_cmo.reserve.size += tmp;
+		vio_cmo.spare += tmp;
+		excess_freed -= tmp;
+		spare_needed -= tmp;
+		balance = 1;
+	}
+
+	/*
+	 * Replenish the spare in the reserve pool from the reserve pool.
+	 * This removes entitlement from the device down to VIO_CMO_MIN_ENT,
+	 * if needed, and gives it to the spare pool. The amount of used
+	 * memory in this pool does not change.
+	 */
+	if (spare_needed && reserve_freed) {
+		tmp = min3(spare_needed, reserve_freed, (viodev->cmo.entitled - VIO_CMO_MIN_ENT));
+
+		vio_cmo.spare += tmp;
+		viodev->cmo.entitled -= tmp;
+		reserve_freed -= tmp;
+		spare_needed -= tmp;
+		balance = 1;
+	}
+
+	/*
+	 * Increase the reserve pool until the desired allocation is met.
+	 * Move an allocation freed from the excess pool into the reserve
+	 * pool and schedule a balance operation.
+	 */
+	if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) {
+		tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size));
+
+		vio_cmo.excess.size -= tmp;
+		vio_cmo.reserve.size += tmp;
+		excess_freed -= tmp;
+		balance = 1;
+	}
+
+	/* Return memory from the excess pool to that pool */
+	if (excess_freed)
+		vio_cmo.excess.free += excess_freed;
+
+	if (balance)
+		schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY);
+	spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+/**
+ * vio_cmo_entitlement_update - Manage system entitlement changes
+ *
+ * @new_entitlement: new system entitlement to attempt to accommodate
+ *
+ * Increases in entitlement will be used to fulfill the spare entitlement
+ * and the rest is given to the excess pool.  Decreases, if they are
+ * possible, come from the excess pool and from unused device entitlement
+ *
+ * Returns: 0 on success, -ENOMEM when change can not be made
+ */
+int vio_cmo_entitlement_update(size_t new_entitlement)
+{
+	struct vio_dev *viodev;
+	struct vio_cmo_dev_entry *dev_ent;
+	unsigned long flags;
+	size_t avail, delta, tmp;
+
+	spin_lock_irqsave(&vio_cmo.lock, flags);
+
+	/* Entitlement increases */
+	if (new_entitlement > vio_cmo.entitled) {
+		delta = new_entitlement - vio_cmo.entitled;
+
+		/* Fulfill spare allocation */
+		if (vio_cmo.spare < VIO_CMO_MIN_ENT) {
+			tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare));
+			vio_cmo.spare += tmp;
+			vio_cmo.reserve.size += tmp;
+			delta -= tmp;
+		}
+
+		/* Remaining new allocation goes to the excess pool */
+		vio_cmo.entitled += delta;
+		vio_cmo.excess.size += delta;
+		vio_cmo.excess.free += delta;
+
+		goto out;
+	}
+
+	/* Entitlement decreases */
+	delta = vio_cmo.entitled - new_entitlement;
+	avail = vio_cmo.excess.free;
+
+	/*
+	 * Need to check how much unused entitlement each device can
+	 * sacrifice to fulfill entitlement change.
+	 */
+	list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+		if (avail >= delta)
+			break;
+
+		viodev = dev_ent->viodev;
+		if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
+		    (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
+				avail += viodev->cmo.entitled -
+				         max_t(size_t, viodev->cmo.allocated,
+				               VIO_CMO_MIN_ENT);
+	}
+
+	if (delta <= avail) {
+		vio_cmo.entitled -= delta;
+
+		/* Take entitlement from the excess pool first */
+		tmp = min(vio_cmo.excess.free, delta);
+		vio_cmo.excess.size -= tmp;
+		vio_cmo.excess.free -= tmp;
+		delta -= tmp;
+
+		/*
+		 * Remove all but VIO_CMO_MIN_ENT bytes from devices
+		 * until entitlement change is served
+		 */
+		list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+			if (!delta)
+				break;
+
+			viodev = dev_ent->viodev;
+			tmp = 0;
+			if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
+			    (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
+				tmp = viodev->cmo.entitled -
+				      max_t(size_t, viodev->cmo.allocated,
+				            VIO_CMO_MIN_ENT);
+			viodev->cmo.entitled -= min(tmp, delta);
+			delta -= min(tmp, delta);
+		}
+	} else {
+		spin_unlock_irqrestore(&vio_cmo.lock, flags);
+		return -ENOMEM;
+	}
+
+out:
+	schedule_delayed_work(&vio_cmo.balance_q, 0);
+	spin_unlock_irqrestore(&vio_cmo.lock, flags);
+	return 0;
+}
+
+/**
+ * vio_cmo_balance - Balance entitlement among devices
+ *
+ * @work: work queue structure for this operation
+ *
+ * Any system entitlement above the minimum needed for devices, or
+ * already allocated to devices, can be distributed to the devices.
+ * The list of devices is iterated through to recalculate the desired
+ * entitlement level and to determine how much entitlement above the
+ * minimum entitlement is allocated to devices.
+ *
+ * Small chunks of the available entitlement are given to devices until
+ * their requirements are fulfilled or there is no entitlement left to give.
+ * Upon completion sizes of the reserve and excess pools are calculated.
+ *
+ * The system minimum entitlement level is also recalculated here.
+ * Entitlement will be reserved for devices even after vio_bus_remove to
+ * accommodate reloading the driver.  The OF tree is walked to count the
+ * number of devices present and this will remove entitlement for devices
+ * that have actually left the system after having vio_bus_remove called.
+ */
+static void vio_cmo_balance(struct work_struct *work)
+{
+	struct vio_cmo *cmo;
+	struct vio_dev *viodev;
+	struct vio_cmo_dev_entry *dev_ent;
+	unsigned long flags;
+	size_t avail = 0, level, chunk, need;
+	int devcount = 0, fulfilled;
+
+	cmo = container_of(work, struct vio_cmo, balance_q.work);
+
+	spin_lock_irqsave(&vio_cmo.lock, flags);
+
+	/* Calculate minimum entitlement and fulfill spare */
+	cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT;
+	BUG_ON(cmo->min > cmo->entitled);
+	cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min));
+	cmo->min += cmo->spare;
+	cmo->desired = cmo->min;
+
+	/*
+	 * Determine how much entitlement is available and reset device
+	 * entitlements
+	 */
+	avail = cmo->entitled - cmo->spare;
+	list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+		viodev = dev_ent->viodev;
+		devcount++;
+		viodev->cmo.entitled = VIO_CMO_MIN_ENT;
+		cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT);
+		avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT);
+	}
+
+	/*
+	 * Having provided each device with the minimum entitlement, loop
+	 * over the devices portioning out the remaining entitlement
+	 * until there is nothing left.
+	 */
+	level = VIO_CMO_MIN_ENT;
+	while (avail) {
+		fulfilled = 0;
+		list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+			viodev = dev_ent->viodev;
+
+			if (viodev->cmo.desired <= level) {
+				fulfilled++;
+				continue;
+			}
+
+			/*
+			 * Give the device up to VIO_CMO_BALANCE_CHUNK
+			 * bytes of entitlement, but do not exceed the
+			 * desired level of entitlement for the device.
+			 */
+			chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK);
+			chunk = min(chunk, (viodev->cmo.desired -
+			                    viodev->cmo.entitled));
+			viodev->cmo.entitled += chunk;
+
+			/*
+			 * If the memory for this entitlement increase was
+			 * already allocated to the device it does not come
+			 * from the available pool being portioned out.
+			 */
+			need = max(viodev->cmo.allocated, viodev->cmo.entitled)-
+			       max(viodev->cmo.allocated, level);
+			avail -= need;
+
+		}
+		if (fulfilled == devcount)
+			break;
+		level += VIO_CMO_BALANCE_CHUNK;
+	}
+
+	/* Calculate new reserve and excess pool sizes */
+	cmo->reserve.size = cmo->min;
+	cmo->excess.free = 0;
+	cmo->excess.size = 0;
+	need = 0;
+	list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+		viodev = dev_ent->viodev;
+		/* Calculated reserve size above the minimum entitlement */
+		if (viodev->cmo.entitled)
+			cmo->reserve.size += (viodev->cmo.entitled -
+			                      VIO_CMO_MIN_ENT);
+		/* Calculated used excess entitlement */
+		if (viodev->cmo.allocated > viodev->cmo.entitled)
+			need += viodev->cmo.allocated - viodev->cmo.entitled;
+	}
+	cmo->excess.size = cmo->entitled - cmo->reserve.size;
+	cmo->excess.free = cmo->excess.size - need;
+
+	cancel_delayed_work(to_delayed_work(work));
+	spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
+					  dma_addr_t *dma_handle, gfp_t flag,
+					  unsigned long attrs)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	void *ret;
+
+	if (vio_cmo_alloc(viodev, roundup(size, PAGE_SIZE))) {
+		atomic_inc(&viodev->cmo.allocs_failed);
+		return NULL;
+	}
+
+	ret = iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
+				    dma_handle, dev->coherent_dma_mask, flag,
+				    dev_to_node(dev));
+	if (unlikely(ret == NULL)) {
+		vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
+		atomic_inc(&viodev->cmo.allocs_failed);
+	}
+
+	return ret;
+}
+
+static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
+					void *vaddr, dma_addr_t dma_handle,
+					unsigned long attrs)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+
+	iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
+	vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
+}
+
+static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
+                                         unsigned long offset, size_t size,
+                                         enum dma_data_direction direction,
+                                         unsigned long attrs)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	struct iommu_table *tbl = get_iommu_table_base(dev);
+	dma_addr_t ret = DMA_MAPPING_ERROR;
+
+	if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))))
+		goto out_fail;
+	ret = iommu_map_page(dev, tbl, page, offset, size, dma_get_mask(dev),
+			direction, attrs);
+	if (unlikely(ret == DMA_MAPPING_ERROR))
+		goto out_deallocate;
+	return ret;
+
+out_deallocate:
+	vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
+out_fail:
+	atomic_inc(&viodev->cmo.allocs_failed);
+	return DMA_MAPPING_ERROR;
+}
+
+static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
+				     size_t size,
+				     enum dma_data_direction direction,
+				     unsigned long attrs)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	struct iommu_table *tbl = get_iommu_table_base(dev);
+
+	iommu_unmap_page(tbl, dma_handle, size, direction, attrs);
+	vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
+}
+
+static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
+                                int nelems, enum dma_data_direction direction,
+                                unsigned long attrs)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	struct iommu_table *tbl = get_iommu_table_base(dev);
+	struct scatterlist *sgl;
+	int ret, count;
+	size_t alloc_size = 0;
+
+	for_each_sg(sglist, sgl, nelems, count)
+		alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl));
+
+	ret = vio_cmo_alloc(viodev, alloc_size);
+	if (ret)
+		goto out_fail;
+	ret = ppc_iommu_map_sg(dev, tbl, sglist, nelems, dma_get_mask(dev),
+			direction, attrs);
+	if (unlikely(!ret))
+		goto out_deallocate;
+
+	for_each_sg(sglist, sgl, ret, count)
+		alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
+	if (alloc_size)
+		vio_cmo_dealloc(viodev, alloc_size);
+	return ret;
+
+out_deallocate:
+	vio_cmo_dealloc(viodev, alloc_size);
+out_fail:
+	atomic_inc(&viodev->cmo.allocs_failed);
+	return ret;
+}
+
+static void vio_dma_iommu_unmap_sg(struct device *dev,
+		struct scatterlist *sglist, int nelems,
+		enum dma_data_direction direction,
+		unsigned long attrs)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	struct iommu_table *tbl = get_iommu_table_base(dev);
+	struct scatterlist *sgl;
+	size_t alloc_size = 0;
+	int count;
+
+	for_each_sg(sglist, sgl, nelems, count)
+		alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
+
+	ppc_iommu_unmap_sg(tbl, sglist, nelems, direction, attrs);
+	vio_cmo_dealloc(viodev, alloc_size);
+}
+
+static const struct dma_map_ops vio_dma_mapping_ops = {
+	.alloc             = vio_dma_iommu_alloc_coherent,
+	.free              = vio_dma_iommu_free_coherent,
+	.map_sg            = vio_dma_iommu_map_sg,
+	.unmap_sg          = vio_dma_iommu_unmap_sg,
+	.map_page          = vio_dma_iommu_map_page,
+	.unmap_page        = vio_dma_iommu_unmap_page,
+	.dma_supported     = dma_iommu_dma_supported,
+	.get_required_mask = dma_iommu_get_required_mask,
+	.mmap		   = dma_common_mmap,
+	.get_sgtable	   = dma_common_get_sgtable,
+	.alloc_pages	   = dma_common_alloc_pages,
+	.free_pages	   = dma_common_free_pages,
+};
+
+/**
+ * vio_cmo_set_dev_desired - Set desired entitlement for a device
+ *
+ * @viodev: struct vio_dev for device to alter
+ * @desired: new desired entitlement level in bytes
+ *
+ * For use by devices to request a change to their entitlement at runtime or
+ * through sysfs.  The desired entitlement level is changed and a balancing
+ * of system resources is scheduled to run in the future.
+ */
+void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired)
+{
+	unsigned long flags;
+	struct vio_cmo_dev_entry *dev_ent;
+	int found = 0;
+
+	if (!firmware_has_feature(FW_FEATURE_CMO))
+		return;
+
+	spin_lock_irqsave(&vio_cmo.lock, flags);
+	if (desired < VIO_CMO_MIN_ENT)
+		desired = VIO_CMO_MIN_ENT;
+
+	/*
+	 * Changes will not be made for devices not in the device list.
+	 * If it is not in the device list, then no driver is loaded
+	 * for the device and it can not receive entitlement.
+	 */
+	list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
+		if (viodev == dev_ent->viodev) {
+			found = 1;
+			break;
+		}
+	if (!found) {
+		spin_unlock_irqrestore(&vio_cmo.lock, flags);
+		return;
+	}
+
+	/* Increase/decrease in desired device entitlement */
+	if (desired >= viodev->cmo.desired) {
+		/* Just bump the bus and device values prior to a balance*/
+		vio_cmo.desired += desired - viodev->cmo.desired;
+		viodev->cmo.desired = desired;
+	} else {
+		/* Decrease bus and device values for desired entitlement */
+		vio_cmo.desired -= viodev->cmo.desired - desired;
+		viodev->cmo.desired = desired;
+		/*
+		 * If less entitlement is desired than current entitlement, move
+		 * any reserve memory in the change region to the excess pool.
+		 */
+		if (viodev->cmo.entitled > desired) {
+			vio_cmo.reserve.size -= viodev->cmo.entitled - desired;
+			vio_cmo.excess.size += viodev->cmo.entitled - desired;
+			/*
+			 * If entitlement moving from the reserve pool to the
+			 * excess pool is currently unused, add to the excess
+			 * free counter.
+			 */
+			if (viodev->cmo.allocated < viodev->cmo.entitled)
+				vio_cmo.excess.free += viodev->cmo.entitled -
+				                       max(viodev->cmo.allocated, desired);
+			viodev->cmo.entitled = desired;
+		}
+	}
+	schedule_delayed_work(&vio_cmo.balance_q, 0);
+	spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+/**
+ * vio_cmo_bus_probe - Handle CMO specific bus probe activities
+ *
+ * @viodev - Pointer to struct vio_dev for device
+ *
+ * Determine the devices IO memory entitlement needs, attempting
+ * to satisfy the system minimum entitlement at first and scheduling
+ * a balance operation to take care of the rest at a later time.
+ *
+ * Returns: 0 on success, -EINVAL when device doesn't support CMO, and
+ *          -ENOMEM when entitlement is not available for device or
+ *          device entry.
+ *
+ */
+static int vio_cmo_bus_probe(struct vio_dev *viodev)
+{
+	struct vio_cmo_dev_entry *dev_ent;
+	struct device *dev = &viodev->dev;
+	struct iommu_table *tbl;
+	struct vio_driver *viodrv = to_vio_driver(dev->driver);
+	unsigned long flags;
+	size_t size;
+	bool dma_capable = false;
+
+	tbl = get_iommu_table_base(dev);
+
+	/* A device requires entitlement if it has a DMA window property */
+	switch (viodev->family) {
+	case VDEVICE:
+		if (of_get_property(viodev->dev.of_node,
+					"ibm,my-dma-window", NULL))
+			dma_capable = true;
+		break;
+	case PFO:
+		dma_capable = false;
+		break;
+	default:
+		dev_warn(dev, "unknown device family: %d\n", viodev->family);
+		BUG();
+		break;
+	}
+
+	/* Configure entitlement for the device. */
+	if (dma_capable) {
+		/* Check that the driver is CMO enabled and get desired DMA */
+		if (!viodrv->get_desired_dma) {
+			dev_err(dev, "%s: device driver does not support CMO\n",
+			        __func__);
+			return -EINVAL;
+		}
+
+		viodev->cmo.desired =
+			IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev), tbl);
+		if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
+			viodev->cmo.desired = VIO_CMO_MIN_ENT;
+		size = VIO_CMO_MIN_ENT;
+
+		dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry),
+		                  GFP_KERNEL);
+		if (!dev_ent)
+			return -ENOMEM;
+
+		dev_ent->viodev = viodev;
+		spin_lock_irqsave(&vio_cmo.lock, flags);
+		list_add(&dev_ent->list, &vio_cmo.device_list);
+	} else {
+		viodev->cmo.desired = 0;
+		size = 0;
+		spin_lock_irqsave(&vio_cmo.lock, flags);
+	}
+
+	/*
+	 * If the needs for vio_cmo.min have not changed since they
+	 * were last set, the number of devices in the OF tree has
+	 * been constant and the IO memory for this is already in
+	 * the reserve pool.
+	 */
+	if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) *
+	                    VIO_CMO_MIN_ENT)) {
+		/* Updated desired entitlement if device requires it */
+		if (size)
+			vio_cmo.desired += (viodev->cmo.desired -
+		                        VIO_CMO_MIN_ENT);
+	} else {
+		size_t tmp;
+
+		tmp = vio_cmo.spare + vio_cmo.excess.free;
+		if (tmp < size) {
+			dev_err(dev, "%s: insufficient free "
+			        "entitlement to add device. "
+			        "Need %lu, have %lu\n", __func__,
+				size, (vio_cmo.spare + tmp));
+			spin_unlock_irqrestore(&vio_cmo.lock, flags);
+			return -ENOMEM;
+		}
+
+		/* Use excess pool first to fulfill request */
+		tmp = min(size, vio_cmo.excess.free);
+		vio_cmo.excess.free -= tmp;
+		vio_cmo.excess.size -= tmp;
+		vio_cmo.reserve.size += tmp;
+
+		/* Use spare if excess pool was insufficient */
+		vio_cmo.spare -= size - tmp;
+
+		/* Update bus accounting */
+		vio_cmo.min += size;
+		vio_cmo.desired += viodev->cmo.desired;
+	}
+	spin_unlock_irqrestore(&vio_cmo.lock, flags);
+	return 0;
+}
+
+/**
+ * vio_cmo_bus_remove - Handle CMO specific bus removal activities
+ *
+ * @viodev - Pointer to struct vio_dev for device
+ *
+ * Remove the device from the cmo device list.  The minimum entitlement
+ * will be reserved for the device as long as it is in the system.  The
+ * rest of the entitlement the device had been allocated will be returned
+ * to the system.
+ */
+static void vio_cmo_bus_remove(struct vio_dev *viodev)
+{
+	struct vio_cmo_dev_entry *dev_ent;
+	unsigned long flags;
+	size_t tmp;
+
+	spin_lock_irqsave(&vio_cmo.lock, flags);
+	if (viodev->cmo.allocated) {
+		dev_err(&viodev->dev, "%s: device had %lu bytes of IO "
+		        "allocated after remove operation.\n",
+		        __func__, viodev->cmo.allocated);
+		BUG();
+	}
+
+	/*
+	 * Remove the device from the device list being maintained for
+	 * CMO enabled devices.
+	 */
+	list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
+		if (viodev == dev_ent->viodev) {
+			list_del(&dev_ent->list);
+			kfree(dev_ent);
+			break;
+		}
+
+	/*
+	 * Devices may not require any entitlement and they do not need
+	 * to be processed.  Otherwise, return the device's entitlement
+	 * back to the pools.
+	 */
+	if (viodev->cmo.entitled) {
+		/*
+		 * This device has not yet left the OF tree, it's
+		 * minimum entitlement remains in vio_cmo.min and
+		 * vio_cmo.desired
+		 */
+		vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT);
+
+		/*
+		 * Save min allocation for device in reserve as long
+		 * as it exists in OF tree as determined by later
+		 * balance operation
+		 */
+		viodev->cmo.entitled -= VIO_CMO_MIN_ENT;
+
+		/* Replenish spare from freed reserve pool */
+		if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) {
+			tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT -
+			                                 vio_cmo.spare));
+			vio_cmo.spare += tmp;
+			viodev->cmo.entitled -= tmp;
+		}
+
+		/* Remaining reserve goes to excess pool */
+		vio_cmo.excess.size += viodev->cmo.entitled;
+		vio_cmo.excess.free += viodev->cmo.entitled;
+		vio_cmo.reserve.size -= viodev->cmo.entitled;
+
+		/*
+		 * Until the device is removed it will keep a
+		 * minimum entitlement; this will guarantee that
+		 * a module unload/load will result in a success.
+		 */
+		viodev->cmo.entitled = VIO_CMO_MIN_ENT;
+		viodev->cmo.desired = VIO_CMO_MIN_ENT;
+		atomic_set(&viodev->cmo.allocs_failed, 0);
+	}
+
+	spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+static void vio_cmo_set_dma_ops(struct vio_dev *viodev)
+{
+	set_dma_ops(&viodev->dev, &vio_dma_mapping_ops);
+}
+
+/**
+ * vio_cmo_bus_init - CMO entitlement initialization at bus init time
+ *
+ * Set up the reserve and excess entitlement pools based on available
+ * system entitlement and the number of devices in the OF tree that
+ * require entitlement in the reserve pool.
+ */
+static void vio_cmo_bus_init(void)
+{
+	struct hvcall_mpp_data mpp_data;
+	int err;
+
+	memset(&vio_cmo, 0, sizeof(struct vio_cmo));
+	spin_lock_init(&vio_cmo.lock);
+	INIT_LIST_HEAD(&vio_cmo.device_list);
+	INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance);
+
+	/* Get current system entitlement */
+	err = h_get_mpp(&mpp_data);
+
+	/*
+	 * On failure, continue with entitlement set to 0, will panic()
+	 * later when spare is reserved.
+	 */
+	if (err != H_SUCCESS) {
+		printk(KERN_ERR "%s: unable to determine system IO "\
+		       "entitlement. (%d)\n", __func__, err);
+		vio_cmo.entitled = 0;
+	} else {
+		vio_cmo.entitled = mpp_data.entitled_mem;
+	}
+
+	/* Set reservation and check against entitlement */
+	vio_cmo.spare = VIO_CMO_MIN_ENT;
+	vio_cmo.reserve.size = vio_cmo.spare;
+	vio_cmo.reserve.size += (vio_cmo_num_OF_devs() *
+	                         VIO_CMO_MIN_ENT);
+	if (vio_cmo.reserve.size > vio_cmo.entitled) {
+		printk(KERN_ERR "%s: insufficient system entitlement\n",
+		       __func__);
+		panic("%s: Insufficient system entitlement", __func__);
+	}
+
+	/* Set the remaining accounting variables */
+	vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size;
+	vio_cmo.excess.free = vio_cmo.excess.size;
+	vio_cmo.min = vio_cmo.reserve.size;
+	vio_cmo.desired = vio_cmo.reserve.size;
+}
+
+/* sysfs device functions and data structures for CMO */
+
+#define viodev_cmo_rd_attr(name)                                        \
+static ssize_t cmo_##name##_show(struct device *dev,                    \
+                                        struct device_attribute *attr,  \
+                                         char *buf)                     \
+{                                                                       \
+	return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name);        \
+}
+
+static ssize_t cmo_allocs_failed_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
+}
+
+static ssize_t cmo_allocs_failed_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	atomic_set(&viodev->cmo.allocs_failed, 0);
+	return count;
+}
+
+static ssize_t cmo_desired_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	size_t new_desired;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &new_desired);
+	if (ret)
+		return ret;
+
+	vio_cmo_set_dev_desired(viodev, new_desired);
+	return count;
+}
+
+viodev_cmo_rd_attr(desired);
+viodev_cmo_rd_attr(entitled);
+viodev_cmo_rd_attr(allocated);
+
+static ssize_t name_show(struct device *, struct device_attribute *, char *);
+static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+			     char *buf);
+
+static struct device_attribute dev_attr_name;
+static struct device_attribute dev_attr_devspec;
+static struct device_attribute dev_attr_modalias;
+
+static DEVICE_ATTR_RO(cmo_entitled);
+static DEVICE_ATTR_RO(cmo_allocated);
+static DEVICE_ATTR_RW(cmo_desired);
+static DEVICE_ATTR_RW(cmo_allocs_failed);
+
+static struct attribute *vio_cmo_dev_attrs[] = {
+	&dev_attr_name.attr,
+	&dev_attr_devspec.attr,
+	&dev_attr_modalias.attr,
+	&dev_attr_cmo_entitled.attr,
+	&dev_attr_cmo_allocated.attr,
+	&dev_attr_cmo_desired.attr,
+	&dev_attr_cmo_allocs_failed.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(vio_cmo_dev);
+
+/* sysfs bus functions and data structures for CMO */
+
+#define viobus_cmo_rd_attr(name)                                        \
+static ssize_t cmo_bus_##name##_show(const struct bus_type *bt, char *buf)    \
+{                                                                       \
+	return sprintf(buf, "%lu\n", vio_cmo.name);                     \
+}                                                                       \
+static struct bus_attribute bus_attr_cmo_bus_##name =			\
+	__ATTR(cmo_##name, S_IRUGO, cmo_bus_##name##_show, NULL)
+
+#define viobus_cmo_pool_rd_attr(name, var)                              \
+static ssize_t                                                          \
+cmo_##name##_##var##_show(const struct bus_type *bt, char *buf)         \
+{                                                                       \
+	return sprintf(buf, "%lu\n", vio_cmo.name.var);                 \
+}                                                                       \
+static BUS_ATTR_RO(cmo_##name##_##var)
+
+viobus_cmo_rd_attr(entitled);
+viobus_cmo_rd_attr(spare);
+viobus_cmo_rd_attr(min);
+viobus_cmo_rd_attr(desired);
+viobus_cmo_rd_attr(curr);
+viobus_cmo_pool_rd_attr(reserve, size);
+viobus_cmo_pool_rd_attr(excess, size);
+viobus_cmo_pool_rd_attr(excess, free);
+
+static ssize_t cmo_high_show(const struct bus_type *bt, char *buf)
+{
+	return sprintf(buf, "%lu\n", vio_cmo.high);
+}
+
+static ssize_t cmo_high_store(const struct bus_type *bt, const char *buf,
+			      size_t count)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&vio_cmo.lock, flags);
+	vio_cmo.high = vio_cmo.curr;
+	spin_unlock_irqrestore(&vio_cmo.lock, flags);
+
+	return count;
+}
+static BUS_ATTR_RW(cmo_high);
+
+static struct attribute *vio_bus_attrs[] = {
+	&bus_attr_cmo_bus_entitled.attr,
+	&bus_attr_cmo_bus_spare.attr,
+	&bus_attr_cmo_bus_min.attr,
+	&bus_attr_cmo_bus_desired.attr,
+	&bus_attr_cmo_bus_curr.attr,
+	&bus_attr_cmo_high.attr,
+	&bus_attr_cmo_reserve_size.attr,
+	&bus_attr_cmo_excess_size.attr,
+	&bus_attr_cmo_excess_free.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(vio_bus);
+
+static void __init vio_cmo_sysfs_init(void)
+{
+	vio_bus_type.dev_groups = vio_cmo_dev_groups;
+	vio_bus_type.bus_groups = vio_bus_groups;
+}
+#else /* CONFIG_PPC_SMLPAR */
+int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
+void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
+static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
+static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
+static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
+static void vio_cmo_bus_init(void) {}
+static void __init vio_cmo_sysfs_init(void) { }
+#endif /* CONFIG_PPC_SMLPAR */
+EXPORT_SYMBOL(vio_cmo_entitlement_update);
+EXPORT_SYMBOL(vio_cmo_set_dev_desired);
+
+
+/*
+ * Platform Facilities Option (PFO) support
+ */
+
+/**
+ * vio_h_cop_sync - Perform a synchronous PFO co-processor operation
+ *
+ * @vdev - Pointer to a struct vio_dev for device
+ * @op - Pointer to a struct vio_pfo_op for the operation parameters
+ *
+ * Calls the hypervisor to synchronously perform the PFO operation
+ * described in @op.  In the case of a busy response from the hypervisor,
+ * the operation will be re-submitted indefinitely unless a non-zero timeout
+ * is specified or an error occurs. The timeout places a limit on when to
+ * stop re-submitting a operation, the total time can be exceeded if an
+ * operation is in progress.
+ *
+ * If op->hcall_ret is not NULL, this will be set to the return from the
+ * last h_cop_op call or it will be 0 if an error not involving the h_call
+ * was encountered.
+ *
+ * Returns:
+ *	0 on success,
+ *	-EINVAL if the h_call fails due to an invalid parameter,
+ *	-E2BIG if the h_call can not be performed synchronously,
+ *	-EBUSY if a timeout is specified and has elapsed,
+ *	-EACCES if the memory area for data/status has been rescinded, or
+ *	-EPERM if a hardware fault has been indicated
+ */
+int vio_h_cop_sync(struct vio_dev *vdev, struct vio_pfo_op *op)
+{
+	struct device *dev = &vdev->dev;
+	unsigned long deadline = 0;
+	long hret = 0;
+	int ret = 0;
+
+	if (op->timeout)
+		deadline = jiffies + msecs_to_jiffies(op->timeout);
+
+	while (true) {
+		hret = plpar_hcall_norets(H_COP, op->flags,
+				vdev->resource_id,
+				op->in, op->inlen, op->out,
+				op->outlen, op->csbcpb);
+
+		if (hret == H_SUCCESS ||
+		    (hret != H_NOT_ENOUGH_RESOURCES &&
+		     hret != H_BUSY && hret != H_RESOURCE) ||
+		    (op->timeout && time_after(deadline, jiffies)))
+			break;
+
+		dev_dbg(dev, "%s: hcall ret(%ld), retrying.\n", __func__, hret);
+	}
+
+	switch (hret) {
+	case H_SUCCESS:
+		ret = 0;
+		break;
+	case H_OP_MODE:
+	case H_TOO_BIG:
+		ret = -E2BIG;
+		break;
+	case H_RESCINDED:
+		ret = -EACCES;
+		break;
+	case H_HARDWARE:
+		ret = -EPERM;
+		break;
+	case H_NOT_ENOUGH_RESOURCES:
+	case H_RESOURCE:
+	case H_BUSY:
+		ret = -EBUSY;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret)
+		dev_dbg(dev, "%s: Sync h_cop_op failure (ret:%d) (hret:%ld)\n",
+				__func__, ret, hret);
+
+	op->hcall_err = hret;
+	return ret;
+}
+EXPORT_SYMBOL(vio_h_cop_sync);
+
+static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
+{
+	const __be32 *dma_window;
+	struct iommu_table *tbl;
+	unsigned long offset, size;
+
+	dma_window = of_get_property(dev->dev.of_node,
+				  "ibm,my-dma-window", NULL);
+	if (!dma_window)
+		return NULL;
+
+	tbl = kzalloc(sizeof(*tbl), GFP_KERNEL);
+	if (tbl == NULL)
+		return NULL;
+
+	kref_init(&tbl->it_kref);
+
+	of_parse_dma_window(dev->dev.of_node, dma_window,
+			    &tbl->it_index, &offset, &size);
+
+	/* TCE table size - measured in tce entries */
+	tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
+	tbl->it_size = size >> tbl->it_page_shift;
+	/* offset for VIO should always be 0 */
+	tbl->it_offset = offset >> tbl->it_page_shift;
+	tbl->it_busno = 0;
+	tbl->it_type = TCE_VB;
+	tbl->it_blocksize = 16;
+
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		tbl->it_ops = &iommu_table_lpar_multi_ops;
+	else
+		tbl->it_ops = &iommu_table_pseries_ops;
+
+	return iommu_init_table(tbl, -1, 0, 0);
+}
+
+/**
+ * vio_match_device: - Tell if a VIO device has a matching
+ *			VIO device id structure.
+ * @ids:	array of VIO device id structures to search in
+ * @dev:	the VIO device structure to match against
+ *
+ * Used by a driver to check whether a VIO device present in the
+ * system is in its list of supported devices. Returns the matching
+ * vio_device_id structure or NULL if there is no match.
+ */
+static const struct vio_device_id *vio_match_device(
+		const struct vio_device_id *ids, const struct vio_dev *dev)
+{
+	while (ids->type[0] != '\0') {
+		if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) &&
+		    of_device_is_compatible(dev->dev.of_node,
+					 ids->compat))
+			return ids;
+		ids++;
+	}
+	return NULL;
+}
+
+/*
+ * Convert from struct device to struct vio_dev and pass to driver.
+ * dev->driver has already been set by generic code because vio_bus_match
+ * succeeded.
+ */
+static int vio_bus_probe(struct device *dev)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	struct vio_driver *viodrv = to_vio_driver(dev->driver);
+	const struct vio_device_id *id;
+	int error = -ENODEV;
+
+	if (!viodrv->probe)
+		return error;
+
+	id = vio_match_device(viodrv->id_table, viodev);
+	if (id) {
+		memset(&viodev->cmo, 0, sizeof(viodev->cmo));
+		if (firmware_has_feature(FW_FEATURE_CMO)) {
+			error = vio_cmo_bus_probe(viodev);
+			if (error)
+				return error;
+		}
+		error = viodrv->probe(viodev, id);
+		if (error && firmware_has_feature(FW_FEATURE_CMO))
+			vio_cmo_bus_remove(viodev);
+	}
+
+	return error;
+}
+
+/* convert from struct device to struct vio_dev and pass to driver. */
+static void vio_bus_remove(struct device *dev)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	struct vio_driver *viodrv = to_vio_driver(dev->driver);
+	struct device *devptr;
+
+	/*
+	 * Hold a reference to the device after the remove function is called
+	 * to allow for CMO accounting cleanup for the device.
+	 */
+	devptr = get_device(dev);
+
+	if (viodrv->remove)
+		viodrv->remove(viodev);
+
+	if (firmware_has_feature(FW_FEATURE_CMO))
+		vio_cmo_bus_remove(viodev);
+
+	put_device(devptr);
+}
+
+static void vio_bus_shutdown(struct device *dev)
+{
+	struct vio_dev *viodev = to_vio_dev(dev);
+	struct vio_driver *viodrv;
+
+	if (dev->driver) {
+		viodrv = to_vio_driver(dev->driver);
+		if (viodrv->shutdown)
+			viodrv->shutdown(viodev);
+		else if (kexec_in_progress)
+			vio_bus_remove(dev);
+	}
+}
+
+/**
+ * vio_register_driver: - Register a new vio driver
+ * @viodrv:	The vio_driver structure to be registered.
+ */
+int __vio_register_driver(struct vio_driver *viodrv, struct module *owner,
+			  const char *mod_name)
+{
+	// vio_bus_type is only initialised for pseries
+	if (!machine_is(pseries))
+		return -ENODEV;
+
+	pr_debug("%s: driver %s registering\n", __func__, viodrv->name);
+
+	/* fill in 'struct driver' fields */
+	viodrv->driver.name = viodrv->name;
+	viodrv->driver.pm = viodrv->pm;
+	viodrv->driver.bus = &vio_bus_type;
+	viodrv->driver.owner = owner;
+	viodrv->driver.mod_name = mod_name;
+
+	return driver_register(&viodrv->driver);
+}
+EXPORT_SYMBOL(__vio_register_driver);
+
+/**
+ * vio_unregister_driver - Remove registration of vio driver.
+ * @viodrv:	The vio_driver struct to be removed form registration
+ */
+void vio_unregister_driver(struct vio_driver *viodrv)
+{
+	driver_unregister(&viodrv->driver);
+}
+EXPORT_SYMBOL(vio_unregister_driver);
+
+/* vio_dev refcount hit 0 */
+static void vio_dev_release(struct device *dev)
+{
+	struct iommu_table *tbl = get_iommu_table_base(dev);
+
+	if (tbl)
+		iommu_tce_table_put(tbl);
+	of_node_put(dev->of_node);
+	kfree(to_vio_dev(dev));
+}
+
+/**
+ * vio_register_device_node: - Register a new vio device.
+ * @of_node:	The OF node for this device.
+ *
+ * Creates and initializes a vio_dev structure from the data in
+ * of_node and adds it to the list of virtual devices.
+ * Returns a pointer to the created vio_dev or NULL if node has
+ * NULL device_type or compatible fields.
+ */
+struct vio_dev *vio_register_device_node(struct device_node *of_node)
+{
+	struct vio_dev *viodev;
+	struct device_node *parent_node;
+	const __be32 *prop;
+	enum vio_dev_family family;
+
+	/*
+	 * Determine if this node is a under the /vdevice node or under the
+	 * /ibm,platform-facilities node.  This decides the device's family.
+	 */
+	parent_node = of_get_parent(of_node);
+	if (parent_node) {
+		if (of_node_is_type(parent_node, "ibm,platform-facilities"))
+			family = PFO;
+		else if (of_node_is_type(parent_node, "vdevice"))
+			family = VDEVICE;
+		else {
+			pr_warn("%s: parent(%pOF) of %pOFn not recognized.\n",
+					__func__,
+					parent_node,
+					of_node);
+			of_node_put(parent_node);
+			return NULL;
+		}
+		of_node_put(parent_node);
+	} else {
+		pr_warn("%s: could not determine the parent of node %pOFn.\n",
+				__func__, of_node);
+		return NULL;
+	}
+
+	if (family == PFO) {
+		if (of_property_read_bool(of_node, "interrupt-controller")) {
+			pr_debug("%s: Skipping the interrupt controller %pOFn.\n",
+					__func__, of_node);
+			return NULL;
+		}
+	}
+
+	/* allocate a vio_dev for this node */
+	viodev = kzalloc(sizeof(struct vio_dev), GFP_KERNEL);
+	if (viodev == NULL) {
+		pr_warn("%s: allocation failure for VIO device.\n", __func__);
+		return NULL;
+	}
+
+	/* we need the 'device_type' property, in order to match with drivers */
+	viodev->family = family;
+	if (viodev->family == VDEVICE) {
+		unsigned int unit_address;
+
+		viodev->type = of_node_get_device_type(of_node);
+		if (!viodev->type) {
+			pr_warn("%s: node %pOFn is missing the 'device_type' "
+					"property.\n", __func__, of_node);
+			goto out;
+		}
+
+		prop = of_get_property(of_node, "reg", NULL);
+		if (prop == NULL) {
+			pr_warn("%s: node %pOFn missing 'reg'\n",
+					__func__, of_node);
+			goto out;
+		}
+		unit_address = of_read_number(prop, 1);
+		dev_set_name(&viodev->dev, "%x", unit_address);
+		viodev->irq = irq_of_parse_and_map(of_node, 0);
+		viodev->unit_address = unit_address;
+	} else {
+		/* PFO devices need their resource_id for submitting COP_OPs
+		 * This is an optional field for devices, but is required when
+		 * performing synchronous ops */
+		prop = of_get_property(of_node, "ibm,resource-id", NULL);
+		if (prop != NULL)
+			viodev->resource_id = of_read_number(prop, 1);
+
+		dev_set_name(&viodev->dev, "%pOFn", of_node);
+		viodev->type = dev_name(&viodev->dev);
+		viodev->irq = 0;
+	}
+
+	viodev->name = of_node->name;
+	viodev->dev.of_node = of_node_get(of_node);
+
+	set_dev_node(&viodev->dev, of_node_to_nid(of_node));
+
+	/* init generic 'struct device' fields: */
+	viodev->dev.parent = &vio_bus_device.dev;
+	viodev->dev.bus = &vio_bus_type;
+	viodev->dev.release = vio_dev_release;
+
+	if (of_property_present(viodev->dev.of_node, "ibm,my-dma-window")) {
+		if (firmware_has_feature(FW_FEATURE_CMO))
+			vio_cmo_set_dma_ops(viodev);
+		else
+			set_dma_ops(&viodev->dev, &dma_iommu_ops);
+
+		set_iommu_table_base(&viodev->dev,
+				     vio_build_iommu_table(viodev));
+
+		/* needed to ensure proper operation of coherent allocations
+		 * later, in case driver doesn't set it explicitly */
+		viodev->dev.coherent_dma_mask = DMA_BIT_MASK(64);
+		viodev->dev.dma_mask = &viodev->dev.coherent_dma_mask;
+	}
+
+	/* register with generic device framework */
+	if (device_register(&viodev->dev)) {
+		printk(KERN_ERR "%s: failed to register device %s\n",
+				__func__, dev_name(&viodev->dev));
+		put_device(&viodev->dev);
+		return NULL;
+	}
+
+	return viodev;
+
+out:	/* Use this exit point for any return prior to device_register */
+	kfree(viodev);
+
+	return NULL;
+}
+EXPORT_SYMBOL(vio_register_device_node);
+
+/*
+ * vio_bus_scan_for_devices - Scan OF and register each child device
+ * @root_name - OF node name for the root of the subtree to search.
+ *		This must be non-NULL
+ *
+ * Starting from the root node provide, register the device node for
+ * each child beneath the root.
+ */
+static void __init vio_bus_scan_register_devices(char *root_name)
+{
+	struct device_node *node_root, *node_child;
+
+	if (!root_name)
+		return;
+
+	node_root = of_find_node_by_name(NULL, root_name);
+	if (node_root) {
+
+		/*
+		 * Create struct vio_devices for each virtual device in
+		 * the device tree. Drivers will associate with them later.
+		 */
+		node_child = of_get_next_child(node_root, NULL);
+		while (node_child) {
+			vio_register_device_node(node_child);
+			node_child = of_get_next_child(node_root, node_child);
+		}
+		of_node_put(node_root);
+	}
+}
+
+/**
+ * vio_bus_init: - Initialize the virtual IO bus
+ */
+static int __init vio_bus_init(void)
+{
+	int err;
+
+	if (firmware_has_feature(FW_FEATURE_CMO))
+		vio_cmo_sysfs_init();
+
+	err = bus_register(&vio_bus_type);
+	if (err) {
+		printk(KERN_ERR "failed to register VIO bus\n");
+		return err;
+	}
+
+	/*
+	 * The fake parent of all vio devices, just to give us
+	 * a nice directory
+	 */
+	err = device_register(&vio_bus_device.dev);
+	if (err) {
+		printk(KERN_WARNING "%s: device_register returned %i\n",
+				__func__, err);
+		return err;
+	}
+
+	if (firmware_has_feature(FW_FEATURE_CMO))
+		vio_cmo_bus_init();
+
+	return 0;
+}
+machine_postcore_initcall(pseries, vio_bus_init);
+
+static int __init vio_device_init(void)
+{
+	vio_bus_scan_register_devices("vdevice");
+	vio_bus_scan_register_devices("ibm,platform-facilities");
+
+	return 0;
+}
+machine_device_initcall(pseries, vio_device_init);
+
+static ssize_t name_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s\n", to_vio_dev(dev)->name);
+}
+static DEVICE_ATTR_RO(name);
+
+static ssize_t devspec_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct device_node *of_node = dev->of_node;
+
+	return sprintf(buf, "%pOF\n", of_node);
+}
+static DEVICE_ATTR_RO(devspec);
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	const struct vio_dev *vio_dev = to_vio_dev(dev);
+	struct device_node *dn;
+	const char *cp;
+
+	dn = dev->of_node;
+	if (!dn) {
+		strcpy(buf, "\n");
+		return strlen(buf);
+	}
+	cp = of_get_property(dn, "compatible", NULL);
+	if (!cp) {
+		strcpy(buf, "\n");
+		return strlen(buf);
+	}
+
+	return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *vio_dev_attrs[] = {
+	&dev_attr_name.attr,
+	&dev_attr_devspec.attr,
+	&dev_attr_modalias.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(vio_dev);
+
+void vio_unregister_device(struct vio_dev *viodev)
+{
+	device_unregister(&viodev->dev);
+	if (viodev->family == VDEVICE)
+		irq_dispose_mapping(viodev->irq);
+}
+EXPORT_SYMBOL(vio_unregister_device);
+
+static int vio_bus_match(struct device *dev, struct device_driver *drv)
+{
+	const struct vio_dev *vio_dev = to_vio_dev(dev);
+	struct vio_driver *vio_drv = to_vio_driver(drv);
+	const struct vio_device_id *ids = vio_drv->id_table;
+
+	return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL);
+}
+
+static int vio_hotplug(const struct device *dev, struct kobj_uevent_env *env)
+{
+	const struct vio_dev *vio_dev = to_vio_dev(dev);
+	const struct device_node *dn;
+	const char *cp;
+
+	dn = dev->of_node;
+	if (!dn)
+		return -ENODEV;
+	cp = of_get_property(dn, "compatible", NULL);
+	if (!cp)
+		return -ENODEV;
+
+	add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp);
+	return 0;
+}
+
+struct bus_type vio_bus_type = {
+	.name = "vio",
+	.dev_groups = vio_dev_groups,
+	.uevent = vio_hotplug,
+	.match = vio_bus_match,
+	.probe = vio_bus_probe,
+	.remove = vio_bus_remove,
+	.shutdown = vio_bus_shutdown,
+};
+
+/**
+ * vio_get_attribute: - get attribute for virtual device
+ * @vdev:	The vio device to get property.
+ * @which:	The property/attribute to be extracted.
+ * @length:	Pointer to length of returned data size (unused if NULL).
+ *
+ * Calls prom.c's of_get_property() to return the value of the
+ * attribute specified by @which
+*/
+const void *vio_get_attribute(struct vio_dev *vdev, char *which, int *length)
+{
+	return of_get_property(vdev->dev.of_node, which, length);
+}
+EXPORT_SYMBOL(vio_get_attribute);
+
+/* vio_find_name() - internal because only vio.c knows how we formatted the
+ * kobject name
+ */
+static struct vio_dev *vio_find_name(const char *name)
+{
+	struct device *found;
+
+	found = bus_find_device_by_name(&vio_bus_type, NULL, name);
+	if (!found)
+		return NULL;
+
+	return to_vio_dev(found);
+}
+
+/**
+ * vio_find_node - find an already-registered vio_dev
+ * @vnode: device_node of the virtual device we're looking for
+ *
+ * Takes a reference to the embedded struct device which needs to be dropped
+ * after use.
+ */
+struct vio_dev *vio_find_node(struct device_node *vnode)
+{
+	char kobj_name[20];
+	struct device_node *vnode_parent;
+
+	vnode_parent = of_get_parent(vnode);
+	if (!vnode_parent)
+		return NULL;
+
+	/* construct the kobject name from the device node */
+	if (of_node_is_type(vnode_parent, "vdevice")) {
+		const __be32 *prop;
+		
+		prop = of_get_property(vnode, "reg", NULL);
+		if (!prop)
+			goto out;
+		snprintf(kobj_name, sizeof(kobj_name), "%x",
+			 (uint32_t)of_read_number(prop, 1));
+	} else if (of_node_is_type(vnode_parent, "ibm,platform-facilities"))
+		snprintf(kobj_name, sizeof(kobj_name), "%pOFn", vnode);
+	else
+		goto out;
+
+	of_node_put(vnode_parent);
+	return vio_find_name(kobj_name);
+out:
+	of_node_put(vnode_parent);
+	return NULL;
+}
+EXPORT_SYMBOL(vio_find_node);
+
+int vio_enable_interrupts(struct vio_dev *dev)
+{
+	int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE);
+	if (rc != H_SUCCESS)
+		printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc);
+	return rc;
+}
+EXPORT_SYMBOL(vio_enable_interrupts);
+
+int vio_disable_interrupts(struct vio_dev *dev)
+{
+	int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE);
+	if (rc != H_SUCCESS)
+		printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc);
+	return rc;
+}
+EXPORT_SYMBOL(vio_disable_interrupts);
+
+static int __init vio_init(void)
+{
+	dma_debug_add_bus(&vio_bus_type);
+	return 0;
+}
+machine_fs_initcall(pseries, vio_init);
diff --git a/arch/powerpc/platforms/pseries/vphn.c b/arch/powerpc/platforms/pseries/vphn.c
new file mode 100644
index 0000000000..3f85ece3c8
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vphn.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/byteorder.h>
+#include <asm/vphn.h>
+
+/*
+ * The associativity domain numbers are returned from the hypervisor as a
+ * stream of mixed 16-bit and 32-bit fields. The stream is terminated by the
+ * special value of "all ones" (aka. 0xffff) and its size may not exceed 48
+ * bytes.
+ *
+ *    --- 16-bit fields -->
+ *  _________________________
+ *  |  0  |  1  |  2  |  3  |   be_packed[0]
+ *  ------+-----+-----+------
+ *  _________________________
+ *  |  4  |  5  |  6  |  7  |   be_packed[1]
+ *  -------------------------
+ *            ...
+ *  _________________________
+ *  | 20  | 21  | 22  | 23  |   be_packed[5]
+ *  -------------------------
+ *
+ * Convert to the sequence they would appear in the ibm,associativity property.
+ */
+static int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
+{
+	__be64 be_packed[VPHN_REGISTER_COUNT];
+	int i, nr_assoc_doms = 0;
+	const __be16 *field = (const __be16 *) be_packed;
+	u16 last = 0;
+	bool is_32bit = false;
+
+#define VPHN_FIELD_UNUSED	(0xffff)
+#define VPHN_FIELD_MSB		(0x8000)
+#define VPHN_FIELD_MASK		(~VPHN_FIELD_MSB)
+
+	/* Let's fix the values returned by plpar_hcall9() */
+	for (i = 0; i < VPHN_REGISTER_COUNT; i++)
+		be_packed[i] = cpu_to_be64(packed[i]);
+
+	for (i = 1; i < VPHN_ASSOC_BUFSIZE; i++) {
+		u16 new = be16_to_cpup(field++);
+
+		if (is_32bit) {
+			/*
+			 * Let's concatenate the 16 bits of this field to the
+			 * 15 lower bits of the previous field
+			 */
+			unpacked[++nr_assoc_doms] =
+				cpu_to_be32(last << 16 | new);
+			is_32bit = false;
+		} else if (new == VPHN_FIELD_UNUSED)
+			/* This is the list terminator */
+			break;
+		else if (new & VPHN_FIELD_MSB) {
+			/* Data is in the lower 15 bits of this field */
+			unpacked[++nr_assoc_doms] =
+				cpu_to_be32(new & VPHN_FIELD_MASK);
+		} else {
+			/*
+			 * Data is in the lower 15 bits of this field
+			 * concatenated with the next 16 bit field
+			 */
+			last = new;
+			is_32bit = true;
+		}
+	}
+
+	/* The first cell contains the length of the property */
+	unpacked[0] = cpu_to_be32(nr_assoc_doms);
+
+	return nr_assoc_doms;
+}
+
+/* NOTE: This file is included by a selftest and built in userspace. */
+#ifdef __KERNEL__
+#include <asm/hvcall.h>
+
+long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity)
+{
+	long rc;
+	long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+
+	rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, cpu);
+	if (rc == H_SUCCESS)
+		vphn_unpack_associativity(retbuf, associativity);
+
+	return rc;
+}
+#endif
diff --git a/arch/powerpc/purgatory/.gitignore b/arch/powerpc/purgatory/.gitignore
new file mode 100644
index 0000000000..5e40575c1f
--- /dev/null
+++ b/arch/powerpc/purgatory/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+purgatory.ro
diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile
new file mode 100644
index 0000000000..78473d69cd
--- /dev/null
+++ b/arch/powerpc/purgatory/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
+
+targets += trampoline_$(BITS).o purgatory.ro
+
+# When profile-guided optimization is enabled, llvm emits two different
+# overlapping text sections, which is not supported by kexec. Remove profile
+# optimization flags.
+KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS))
+
+LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined
+
+$(obj)/purgatory.ro: $(obj)/trampoline_$(BITS).o FORCE
+		$(call if_changed,ld)
+
+$(obj)/kexec-purgatory.o: $(obj)/purgatory.ro
+
+obj-y	+= kexec-purgatory.o
diff --git a/arch/powerpc/purgatory/kexec-purgatory.S b/arch/powerpc/purgatory/kexec-purgatory.S
new file mode 100644
index 0000000000..f494fd5a05
--- /dev/null
+++ b/arch/powerpc/purgatory/kexec-purgatory.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+	.section .rodata, "a"
+
+	.align	8
+kexec_purgatory:
+	.globl	kexec_purgatory
+	.incbin	"arch/powerpc/purgatory/purgatory.ro"
+.Lkexec_purgatory_end:
+
+	.align	8
+kexec_purgatory_size:
+	.globl	kexec_purgatory_size
+	.quad	.Lkexec_purgatory_end - kexec_purgatory
diff --git a/arch/powerpc/purgatory/trampoline_64.S b/arch/powerpc/purgatory/trampoline_64.S
new file mode 100644
index 0000000000..b35837c138
--- /dev/null
+++ b/arch/powerpc/purgatory/trampoline_64.S
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * kexec trampoline
+ *
+ * Based on code taken from kexec-tools and kexec-lite.
+ *
+ * Copyright (C) 2004 - 2005, Milton D Miller II, IBM Corporation
+ * Copyright (C) 2006, Mohan Kumar M, IBM Corporation
+ * Copyright (C) 2013, Anton Blanchard, IBM Corporation
+ */
+
+#include <asm/asm-compat.h>
+#include <asm/crashdump-ppc64.h>
+
+	.balign 256
+	.globl purgatory_start
+purgatory_start:
+	b	master
+
+	/* ABI: possible run_at_load flag at 0x5c */
+	.org purgatory_start + 0x5c
+	.globl run_at_load
+run_at_load:
+	.long 0
+	.size run_at_load, . - run_at_load
+
+	/* ABI: slaves start at 60 with r3=phys */
+	.org purgatory_start + 0x60
+slave:
+	b .
+	/* ABI: end of copied region */
+	.org purgatory_start + 0x100
+	.size purgatory_start, . - purgatory_start
+
+/*
+ * The above 0x100 bytes at purgatory_start are replaced with the
+ * code from the kernel (or next stage) by setup_purgatory().
+ */
+
+master:
+	or	%r1,%r1,%r1	/* low priority to let other threads catchup */
+	isync
+	mr	%r17,%r3	/* save cpu id to r17 */
+	mr	%r15,%r4	/* save physical address in reg15 */
+
+	/* Work out where we're running */
+	bcl	20, 31, 0f
+0:	mflr	%r18
+
+	/*
+	 * Copy BACKUP_SRC_SIZE bytes from BACKUP_SRC_START to
+	 * backup_start 8 bytes at a time.
+	 *
+	 * Use r3 = dest, r4 = src, r5 = size, r6 = count
+	 */
+	ld	%r3, (backup_start - 0b)(%r18)
+	cmpdi	%cr0, %r3, 0
+	beq	.Lskip_copy	/* skip if there is no backup region */
+	lis	%r5, BACKUP_SRC_SIZE@h
+	ori	%r5, %r5, BACKUP_SRC_SIZE@l
+	cmpdi	%cr0, %r5, 0
+	beq	.Lskip_copy	/* skip if copy size is zero */
+	lis	%r4, BACKUP_SRC_START@h
+	ori	%r4, %r4, BACKUP_SRC_START@l
+	li	%r6, 0
+.Lcopy_loop:
+	ldx	%r0, %r6, %r4
+	stdx	%r0, %r6, %r3
+	addi	%r6, %r6, 8
+	cmpld	%cr0, %r6, %r5
+	blt	.Lcopy_loop
+
+.Lskip_copy:
+	or	%r3,%r3,%r3	/* ok now to high priority, lets boot */
+	lis	%r6,0x1
+	mtctr	%r6		/* delay a bit for slaves to catch up */
+	bdnz	.		/* before we overwrite 0-100 again */
+
+	/* load device-tree address */
+	ld	%r3, (dt_offset - 0b)(%r18)
+	mr	%r16,%r3	/* save dt address in reg16 */
+	li	%r4,20
+	LWZX_BE	%r6,%r3,%r4	/* fetch __be32 version number at byte 20 */
+	cmpwi	%cr0,%r6,2	/* v2 or later? */
+	blt	1f
+	li	%r4,28
+	STWX_BE	%r17,%r3,%r4	/* Store my cpu as __be32 at byte 28 */
+1:
+	/* Load opal base and entry values in r8 & r9 respectively */
+	ld	%r8,(opal_base - 0b)(%r18)
+	ld	%r9,(opal_entry - 0b)(%r18)
+
+	/* load the kernel address */
+	ld	%r4,(kernel - 0b)(%r18)
+
+	/* load the run_at_load flag */
+	/* possibly patched by kexec */
+	ld	%r6,(run_at_load - 0b)(%r18)
+	/* and patch it into the kernel */
+	stw	%r6,(0x5c)(%r4)
+
+	mr	%r3,%r16	/* restore dt address */
+
+	li	%r5,0		/* r5 will be 0 for kernel */
+
+	mfmsr	%r11
+	andi.	%r10,%r11,1	/* test MSR_LE */
+	bne	.Little_endian
+
+	mtctr	%r4		/* prepare branch to */
+	bctr			/* start kernel */
+
+.Little_endian:
+	mtsrr0	%r4		/* prepare branch to */
+
+	clrrdi	%r11,%r11,1	/* clear MSR_LE */
+	mtsrr1	%r11
+
+	rfid			/* update MSR and start kernel */
+
+	.balign 8
+	.globl kernel
+kernel:
+	.8byte  0x0
+	.size kernel, . - kernel
+
+	.balign 8
+	.globl dt_offset
+dt_offset:
+	.8byte  0x0
+	.size dt_offset, . - dt_offset
+
+	.balign 8
+	.globl backup_start
+backup_start:
+	.8byte  0x0
+	.size backup_start, . - backup_start
+
+	.balign 8
+	.globl opal_base
+opal_base:
+	.8byte  0x0
+	.size opal_base, . - opal_base
+
+	.balign 8
+	.globl opal_entry
+opal_entry:
+	.8byte  0x0
+	.size opal_entry, . - opal_entry
+
+	.data
+	.balign 8
+.globl purgatory_sha256_digest
+purgatory_sha256_digest:
+	.skip	32
+	.size purgatory_sha256_digest, . - purgatory_sha256_digest
+
+	.balign 8
+.globl purgatory_sha_regions
+purgatory_sha_regions:
+	.skip	8 * 2 * 16
+	.size purgatory_sha_regions, . - purgatory_sha_regions
diff --git a/arch/powerpc/sysdev/6xx-suspend.S b/arch/powerpc/sysdev/6xx-suspend.S
new file mode 100644
index 0000000000..e882524fff
--- /dev/null
+++ b/arch/powerpc/sysdev/6xx-suspend.S
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Enter and leave sleep state on chips with 6xx-style HID0
+ * power management bits, which don't leave sleep state via reset.
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2006-2007 Freescale Semiconductor, Inc.
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/reg.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+
+_GLOBAL(mpc6xx_enter_standby)
+	mflr	r4
+
+	mfspr	r5, SPRN_HID0
+	rlwinm	r5, r5, 0, ~(HID0_DOZE | HID0_NAP)
+	oris	r5, r5, HID0_SLEEP@h
+	mtspr	SPRN_HID0, r5
+	isync
+
+	lis	r5, ret_from_standby@h
+	ori	r5, r5, ret_from_standby@l
+	mtlr	r5
+
+	lwz	r6, TI_LOCAL_FLAGS(r2)
+	ori	r6, r6, _TLF_SLEEPING
+	stw	r6, TI_LOCAL_FLAGS(r2)
+
+	mfmsr	r5
+	ori	r5, r5, MSR_EE
+	oris	r5, r5, MSR_POW@h
+	sync
+	mtmsr	r5
+	isync
+
+1:	b	1b
+
+ret_from_standby:
+	mfspr	r5, SPRN_HID0
+	rlwinm	r5, r5, 0, ~HID0_SLEEP
+	mtspr	SPRN_HID0, r5
+
+	mtlr	r4
+	blr
diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig
new file mode 100644
index 0000000000..5aa92ff362
--- /dev/null
+++ b/arch/powerpc/sysdev/Kconfig
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.rst.
+#
+
+config PPC4xx_PCI_EXPRESS
+	bool
+	depends on PCI && 4xx
+
+config PPC4xx_HSTA_MSI
+	bool
+	depends on PCI_MSI
+	depends on PCI && 4xx
+
+config PPC_MSI_BITMAP
+	bool
+	depends on PCI_MSI
+	default y if MPIC
+	default y if FSL_PCI
+	default y if PPC_POWERNV
+
+source "arch/powerpc/sysdev/xics/Kconfig"
+source "arch/powerpc/sysdev/xive/Kconfig"
+
+config GE_FPGA
+	bool
+
+config FSL_CORENET_RCPM
+	bool
+	help
+	  This option enables support for RCPM (Run Control/Power Management).
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
new file mode 100644
index 0000000000..9cb1d02951
--- /dev/null
+++ b/arch/powerpc/sysdev/Makefile
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+
+ccflags-$(CONFIG_PPC64)		:= $(NO_MINIMAL_TOC)
+
+mpic-msi-obj-$(CONFIG_PCI_MSI)	+= mpic_msi.o mpic_u3msi.o
+obj-$(CONFIG_MPIC)		+= mpic.o $(mpic-msi-obj-y)
+obj-$(CONFIG_MPIC_TIMER)        += mpic_timer.o
+obj-$(CONFIG_FSL_MPIC_TIMER_WAKEUP)	+= fsl_mpic_timer_wakeup.o
+mpic-msgr-obj-$(CONFIG_MPIC_MSGR)	+= mpic_msgr.o
+obj-$(CONFIG_MPIC)		+= mpic.o $(mpic-msi-obj-y) $(mpic-msgr-obj-y)
+obj-$(CONFIG_PPC_EPAPR_HV_PIC)	+= ehv_pic.o
+fsl-msi-obj-$(CONFIG_PCI_MSI)	+= fsl_msi.o
+obj-$(CONFIG_PPC_MSI_BITMAP)	+= msi_bitmap.o
+
+obj-$(CONFIG_PPC_MPC106)	+= grackle.o
+obj-$(CONFIG_PPC_DCR_NATIVE)	+= dcr-low.o
+obj-$(CONFIG_PPC_PMI)		+= pmi.o
+obj-$(CONFIG_U3_DART)		+= dart_iommu.o
+obj-$(CONFIG_MMIO_NVRAM)	+= mmio_nvram.o
+obj-$(CONFIG_FSL_SOC)		+= fsl_soc.o fsl_mpic_err.o
+obj-$(CONFIG_FSL_PCI)		+= fsl_pci.o $(fsl-msi-obj-y)
+obj-$(CONFIG_FSL_PMC)		+= fsl_pmc.o
+obj-$(CONFIG_FSL_CORENET_RCPM)	+= fsl_rcpm.o
+obj-$(CONFIG_FSL_LBC)		+= fsl_lbc.o
+obj-$(CONFIG_FSL_GTM)		+= fsl_gtm.o
+obj-$(CONFIG_FSL_RIO)		+= fsl_rio.o fsl_rmu.o
+obj-$(CONFIG_TSI108_BRIDGE)	+= tsi108_pci.o tsi108_dev.o
+obj-$(CONFIG_RTC_DRV_CMOS)	+= rtc_cmos_setup.o
+
+obj-$(CONFIG_PPC_INDIRECT_PCI)	+= indirect_pci.o
+obj-$(CONFIG_PPC_I8259)		+= i8259.o
+obj-$(CONFIG_IPIC)		+= ipic.o
+obj-$(CONFIG_OF_RTC)		+= of_rtc.o
+
+obj-$(CONFIG_CPM)		+= cpm_common.o
+obj-$(CONFIG_CPM2)		+= cpm2.o cpm2_pic.o cpm_gpio.o
+obj-$(CONFIG_8xx_GPIO)		+= cpm_gpio.o
+obj-$(CONFIG_QUICC_ENGINE)	+= cpm_common.o
+obj-$(CONFIG_PPC_DCR)		+= dcr.o
+
+obj-$(CONFIG_PPC_MPC512x)	+= mpc5xxx_clocks.o
+obj-$(CONFIG_PPC_MPC52xx)	+= mpc5xxx_clocks.o
+
+ifdef CONFIG_SUSPEND
+obj-$(CONFIG_PPC_BOOK3S_32)	+= 6xx-suspend.o
+endif
+
+obj-$(CONFIG_PPC_EARLY_DEBUG_MEMCONS)	+= udbg_memcons.o
+
+obj-$(CONFIG_PPC_XICS)		+= xics/
+obj-$(CONFIG_PPC_XIVE)		+= xive/
+
+obj-$(CONFIG_GE_FPGA)		+= ge/
diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c
new file mode 100644
index 0000000000..14cc5ea936
--- /dev/null
+++ b/arch/powerpc/sysdev/cpm2.c
@@ -0,0 +1,344 @@
+/*
+ * General Purpose functions for the global management of the
+ * 8260 Communication Processor Module.
+ * Copyright (c) 1999-2001 Dan Malek <dan@embeddedalley.com>
+ * Copyright (c) 2000 MontaVista Software, Inc (source@mvista.com)
+ *	2.3.99 Updates
+ *
+ * 2006 (c) MontaVista Software, Inc.
+ * Vitaly Bordug <vbordug@ru.mvista.com>
+ * 	Merged to arch/powerpc from arch/ppc/syslib/cpm2_common.c
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+/*
+ *
+ * In addition to the individual control of the communication
+ * channels, there are a few functions that globally affect the
+ * communication processor.
+ *
+ * Buffer descriptors must be allocated from the dual ported memory
+ * space.  The allocator for that is here.  When the communication
+ * process is reset, we reclaim the memory available.  There is
+ * currently no deallocator for this memory.
+ */
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/cpm2.h>
+#include <asm/rheap.h>
+
+#include <sysdev/fsl_soc.h>
+
+cpm_cpm2_t __iomem *cpmp; /* Pointer to comm processor space */
+
+/* We allocate this here because it is used almost exclusively for
+ * the communication processor devices.
+ */
+cpm2_map_t __iomem *cpm2_immr;
+EXPORT_SYMBOL(cpm2_immr);
+
+#define CPM_MAP_SIZE	(0x40000)	/* 256k - the PQ3 reserve this amount
+					   of space for CPM as it is larger
+					   than on PQ2 */
+
+void __init cpm2_reset(void)
+{
+#ifdef CONFIG_PPC_85xx
+	cpm2_immr = ioremap(get_immrbase() + 0x80000, CPM_MAP_SIZE);
+#else
+	cpm2_immr = ioremap(get_immrbase(), CPM_MAP_SIZE);
+#endif
+
+	/* Tell everyone where the comm processor resides.
+	 */
+	cpmp = &cpm2_immr->im_cpm;
+
+#ifndef CONFIG_PPC_EARLY_DEBUG_CPM
+	/* Reset the CPM.
+	 */
+	cpm_command(CPM_CR_RST, 0);
+#endif
+}
+
+static DEFINE_SPINLOCK(cmd_lock);
+
+#define MAX_CR_CMD_LOOPS        10000
+
+int cpm_command(u32 command, u8 opcode)
+{
+	int i, ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cmd_lock, flags);
+
+	ret = 0;
+	out_be32(&cpmp->cp_cpcr, command | opcode | CPM_CR_FLG);
+	for (i = 0; i < MAX_CR_CMD_LOOPS; i++)
+		if ((in_be32(&cpmp->cp_cpcr) & CPM_CR_FLG) == 0)
+			goto out;
+
+	printk(KERN_ERR "%s(): Not able to issue CPM command\n", __func__);
+	ret = -EIO;
+out:
+	spin_unlock_irqrestore(&cmd_lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(cpm_command);
+
+/* Set a baud rate generator.  This needs lots of work.  There are
+ * eight BRGs, which can be connected to the CPM channels or output
+ * as clocks.  The BRGs are in two different block of internal
+ * memory mapped space.
+ * The baud rate clock is the system clock divided by something.
+ * It was set up long ago during the initial boot phase and is
+ * given to us.
+ * Baud rate clocks are zero-based in the driver code (as that maps
+ * to port numbers).  Documentation uses 1-based numbering.
+ */
+void __cpm2_setbrg(uint brg, uint rate, uint clk, int div16, int src)
+{
+	u32 __iomem *bp;
+	u32 val;
+
+	/* This is good enough to get SMCs running.....
+	*/
+	if (brg < 4) {
+		bp = &cpm2_immr->im_brgc1;
+	} else {
+		bp = &cpm2_immr->im_brgc5;
+		brg -= 4;
+	}
+	bp += brg;
+	/* Round the clock divider to the nearest integer. */
+	val = (((clk * 2 / rate) - 1) & ~1) | CPM_BRG_EN | src;
+	if (div16)
+		val |= CPM_BRG_DIV16;
+
+	out_be32(bp, val);
+}
+EXPORT_SYMBOL(__cpm2_setbrg);
+
+int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode)
+{
+	int ret = 0;
+	int shift;
+	int i, bits = 0;
+	u32 __iomem *reg;
+	u32 mask = 7;
+
+	u8 clk_map[][3] = {
+		{CPM_CLK_FCC1, CPM_BRG5, 0},
+		{CPM_CLK_FCC1, CPM_BRG6, 1},
+		{CPM_CLK_FCC1, CPM_BRG7, 2},
+		{CPM_CLK_FCC1, CPM_BRG8, 3},
+		{CPM_CLK_FCC1, CPM_CLK9, 4},
+		{CPM_CLK_FCC1, CPM_CLK10, 5},
+		{CPM_CLK_FCC1, CPM_CLK11, 6},
+		{CPM_CLK_FCC1, CPM_CLK12, 7},
+		{CPM_CLK_FCC2, CPM_BRG5, 0},
+		{CPM_CLK_FCC2, CPM_BRG6, 1},
+		{CPM_CLK_FCC2, CPM_BRG7, 2},
+		{CPM_CLK_FCC2, CPM_BRG8, 3},
+		{CPM_CLK_FCC2, CPM_CLK13, 4},
+		{CPM_CLK_FCC2, CPM_CLK14, 5},
+		{CPM_CLK_FCC2, CPM_CLK15, 6},
+		{CPM_CLK_FCC2, CPM_CLK16, 7},
+		{CPM_CLK_FCC3, CPM_BRG5, 0},
+		{CPM_CLK_FCC3, CPM_BRG6, 1},
+		{CPM_CLK_FCC3, CPM_BRG7, 2},
+		{CPM_CLK_FCC3, CPM_BRG8, 3},
+		{CPM_CLK_FCC3, CPM_CLK13, 4},
+		{CPM_CLK_FCC3, CPM_CLK14, 5},
+		{CPM_CLK_FCC3, CPM_CLK15, 6},
+		{CPM_CLK_FCC3, CPM_CLK16, 7},
+		{CPM_CLK_SCC1, CPM_BRG1, 0},
+		{CPM_CLK_SCC1, CPM_BRG2, 1},
+		{CPM_CLK_SCC1, CPM_BRG3, 2},
+		{CPM_CLK_SCC1, CPM_BRG4, 3},
+		{CPM_CLK_SCC1, CPM_CLK11, 4},
+		{CPM_CLK_SCC1, CPM_CLK12, 5},
+		{CPM_CLK_SCC1, CPM_CLK3, 6},
+		{CPM_CLK_SCC1, CPM_CLK4, 7},
+		{CPM_CLK_SCC2, CPM_BRG1, 0},
+		{CPM_CLK_SCC2, CPM_BRG2, 1},
+		{CPM_CLK_SCC2, CPM_BRG3, 2},
+		{CPM_CLK_SCC2, CPM_BRG4, 3},
+		{CPM_CLK_SCC2, CPM_CLK11, 4},
+		{CPM_CLK_SCC2, CPM_CLK12, 5},
+		{CPM_CLK_SCC2, CPM_CLK3, 6},
+		{CPM_CLK_SCC2, CPM_CLK4, 7},
+		{CPM_CLK_SCC3, CPM_BRG1, 0},
+		{CPM_CLK_SCC3, CPM_BRG2, 1},
+		{CPM_CLK_SCC3, CPM_BRG3, 2},
+		{CPM_CLK_SCC3, CPM_BRG4, 3},
+		{CPM_CLK_SCC3, CPM_CLK5, 4},
+		{CPM_CLK_SCC3, CPM_CLK6, 5},
+		{CPM_CLK_SCC3, CPM_CLK7, 6},
+		{CPM_CLK_SCC3, CPM_CLK8, 7},
+		{CPM_CLK_SCC4, CPM_BRG1, 0},
+		{CPM_CLK_SCC4, CPM_BRG2, 1},
+		{CPM_CLK_SCC4, CPM_BRG3, 2},
+		{CPM_CLK_SCC4, CPM_BRG4, 3},
+		{CPM_CLK_SCC4, CPM_CLK5, 4},
+		{CPM_CLK_SCC4, CPM_CLK6, 5},
+		{CPM_CLK_SCC4, CPM_CLK7, 6},
+		{CPM_CLK_SCC4, CPM_CLK8, 7},
+	};
+
+	switch (target) {
+	case CPM_CLK_SCC1:
+		reg = &cpm2_immr->im_cpmux.cmx_scr;
+		shift = 24;
+		break;
+	case CPM_CLK_SCC2:
+		reg = &cpm2_immr->im_cpmux.cmx_scr;
+		shift = 16;
+		break;
+	case CPM_CLK_SCC3:
+		reg = &cpm2_immr->im_cpmux.cmx_scr;
+		shift = 8;
+		break;
+	case CPM_CLK_SCC4:
+		reg = &cpm2_immr->im_cpmux.cmx_scr;
+		shift = 0;
+		break;
+	case CPM_CLK_FCC1:
+		reg = &cpm2_immr->im_cpmux.cmx_fcr;
+		shift = 24;
+		break;
+	case CPM_CLK_FCC2:
+		reg = &cpm2_immr->im_cpmux.cmx_fcr;
+		shift = 16;
+		break;
+	case CPM_CLK_FCC3:
+		reg = &cpm2_immr->im_cpmux.cmx_fcr;
+		shift = 8;
+		break;
+	default:
+		printk(KERN_ERR "cpm2_clock_setup: invalid clock target\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(clk_map); i++) {
+		if (clk_map[i][0] == target && clk_map[i][1] == clock) {
+			bits = clk_map[i][2];
+			break;
+		}
+	}
+	if (i == ARRAY_SIZE(clk_map))
+	    ret = -EINVAL;
+
+	bits <<= shift;
+	mask <<= shift;
+
+	if (mode == CPM_CLK_RTX) {
+		bits |= bits << 3;
+		mask |= mask << 3;
+	} else if (mode == CPM_CLK_RX) {
+		bits <<= 3;
+		mask <<= 3;
+	}
+
+	out_be32(reg, (in_be32(reg) & ~mask) | bits);
+
+	return ret;
+}
+
+int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock)
+{
+	int ret = 0;
+	int shift;
+	int i, bits = 0;
+	u8 __iomem *reg;
+	u8 mask = 3;
+
+	u8 clk_map[][3] = {
+		{CPM_CLK_SMC1, CPM_BRG1, 0},
+		{CPM_CLK_SMC1, CPM_BRG7, 1},
+		{CPM_CLK_SMC1, CPM_CLK7, 2},
+		{CPM_CLK_SMC1, CPM_CLK9, 3},
+		{CPM_CLK_SMC2, CPM_BRG2, 0},
+		{CPM_CLK_SMC2, CPM_BRG8, 1},
+		{CPM_CLK_SMC2, CPM_CLK4, 2},
+		{CPM_CLK_SMC2, CPM_CLK15, 3},
+	};
+
+	switch (target) {
+	case CPM_CLK_SMC1:
+		reg = &cpm2_immr->im_cpmux.cmx_smr;
+		mask = 3;
+		shift = 4;
+		break;
+	case CPM_CLK_SMC2:
+		reg = &cpm2_immr->im_cpmux.cmx_smr;
+		mask = 3;
+		shift = 0;
+		break;
+	default:
+		printk(KERN_ERR "cpm2_smc_clock_setup: invalid clock target\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(clk_map); i++) {
+		if (clk_map[i][0] == target && clk_map[i][1] == clock) {
+			bits = clk_map[i][2];
+			break;
+		}
+	}
+	if (i == ARRAY_SIZE(clk_map))
+	    ret = -EINVAL;
+
+	bits <<= shift;
+	mask <<= shift;
+
+	out_8(reg, (in_8(reg) & ~mask) | bits);
+
+	return ret;
+}
+
+struct cpm2_ioports {
+	u32 dir, par, sor, odr, dat;
+	u32 res[3];
+};
+
+void __init cpm2_set_pin(int port, int pin, int flags)
+{
+	struct cpm2_ioports __iomem *iop =
+		(struct cpm2_ioports __iomem *)&cpm2_immr->im_ioport;
+
+	pin = 1 << (31 - pin);
+
+	if (flags & CPM_PIN_OUTPUT)
+		setbits32(&iop[port].dir, pin);
+	else
+		clrbits32(&iop[port].dir, pin);
+
+	if (!(flags & CPM_PIN_GPIO))
+		setbits32(&iop[port].par, pin);
+	else
+		clrbits32(&iop[port].par, pin);
+
+	if (flags & CPM_PIN_SECONDARY)
+		setbits32(&iop[port].sor, pin);
+	else
+		clrbits32(&iop[port].sor, pin);
+
+	if (flags & CPM_PIN_OPENDRAIN)
+		setbits32(&iop[port].odr, pin);
+	else
+		clrbits32(&iop[port].odr, pin);
+}
diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c
new file mode 100644
index 0000000000..e14493685f
--- /dev/null
+++ b/arch/powerpc/sysdev/cpm2_pic.c
@@ -0,0 +1,267 @@
+/*
+ * Platform information definitions.
+ *
+ * Copied from arch/ppc/syslib/cpm2_pic.c with minor subsequent updates
+ * to make in work in arch/powerpc/. Original (c) belongs to Dan Malek.
+ *
+ * Author:  Vitaly Bordug <vbordug@ru.mvista.com>
+ *
+ * 1999-2001 (c) Dan Malek <dan@embeddedalley.com>
+ * 2006 (c) MontaVista Software, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+/* The CPM2 internal interrupt controller.  It is usually
+ * the only interrupt controller.
+ * There are two 32-bit registers (high/low) for up to 64
+ * possible interrupts.
+ *
+ * Now, the fun starts.....Interrupt Numbers DO NOT MAP
+ * in a simple arithmetic fashion to mask or pending registers.
+ * That is, interrupt 4 does not map to bit position 4.
+ * We create two tables, indexed by vector number, to indicate
+ * which register to use and which bit in the register to use.
+ */
+
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+
+#include <asm/immap_cpm2.h>
+#include <asm/io.h>
+
+#include "cpm2_pic.h"
+
+/* External IRQS */
+#define CPM2_IRQ_EXT1		19
+#define CPM2_IRQ_EXT7		25
+
+/* Port C IRQS */
+#define CPM2_IRQ_PORTC15	48
+#define CPM2_IRQ_PORTC0		63
+
+static intctl_cpm2_t __iomem *cpm2_intctl;
+
+static struct irq_domain *cpm2_pic_host;
+static unsigned long ppc_cached_irq_mask[2]; /* 2 32-bit registers */
+
+static const u_char irq_to_siureg[] = {
+	1, 1, 1, 1, 1, 1, 1, 1,
+	1, 1, 1, 1, 1, 1, 1, 1,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	1, 1, 1, 1, 1, 1, 1, 1,
+	1, 1, 1, 1, 1, 1, 1, 1,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* bit numbers do not match the docs, these are precomputed so the bit for
+ * a given irq is (1 << irq_to_siubit[irq]) */
+static const u_char irq_to_siubit[] = {
+	 0, 15, 14, 13, 12, 11, 10,  9,
+	 8,  7,  6,  5,  4,  3,  2,  1,
+	 2,  1,  0, 14, 13, 12, 11, 10,
+	 9,  8,  7,  6,  5,  4,  3,  0,
+	31, 30, 29, 28, 27, 26, 25, 24,
+	23, 22, 21, 20, 19, 18, 17, 16,
+	16, 17, 18, 19, 20, 21, 22, 23,
+	24, 25, 26, 27, 28, 29, 30, 31,
+};
+
+static void cpm2_mask_irq(struct irq_data *d)
+{
+	int	bit, word;
+	unsigned int irq_nr = irqd_to_hwirq(d);
+
+	bit = irq_to_siubit[irq_nr];
+	word = irq_to_siureg[irq_nr];
+
+	ppc_cached_irq_mask[word] &= ~(1 << bit);
+	out_be32(&cpm2_intctl->ic_simrh + word, ppc_cached_irq_mask[word]);
+}
+
+static void cpm2_unmask_irq(struct irq_data *d)
+{
+	int	bit, word;
+	unsigned int irq_nr = irqd_to_hwirq(d);
+
+	bit = irq_to_siubit[irq_nr];
+	word = irq_to_siureg[irq_nr];
+
+	ppc_cached_irq_mask[word] |= 1 << bit;
+	out_be32(&cpm2_intctl->ic_simrh + word, ppc_cached_irq_mask[word]);
+}
+
+static void cpm2_ack(struct irq_data *d)
+{
+	int	bit, word;
+	unsigned int irq_nr = irqd_to_hwirq(d);
+
+	bit = irq_to_siubit[irq_nr];
+	word = irq_to_siureg[irq_nr];
+
+	out_be32(&cpm2_intctl->ic_sipnrh + word, 1 << bit);
+}
+
+static void cpm2_end_irq(struct irq_data *d)
+{
+	int	bit, word;
+	unsigned int irq_nr = irqd_to_hwirq(d);
+
+	bit = irq_to_siubit[irq_nr];
+	word = irq_to_siureg[irq_nr];
+
+	ppc_cached_irq_mask[word] |= 1 << bit;
+	out_be32(&cpm2_intctl->ic_simrh + word, ppc_cached_irq_mask[word]);
+
+	/*
+	 * Work around large numbers of spurious IRQs on PowerPC 82xx
+	 * systems.
+	 */
+	mb();
+}
+
+static int cpm2_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned int vold, vnew, edibit;
+
+	/* Port C interrupts are either IRQ_TYPE_EDGE_FALLING or
+	 * IRQ_TYPE_EDGE_BOTH (default).  All others are IRQ_TYPE_EDGE_FALLING
+	 * or IRQ_TYPE_LEVEL_LOW (default)
+	 */
+	if (src >= CPM2_IRQ_PORTC15 && src <= CPM2_IRQ_PORTC0) {
+		if (flow_type == IRQ_TYPE_NONE)
+			flow_type = IRQ_TYPE_EDGE_BOTH;
+
+		if (flow_type != IRQ_TYPE_EDGE_BOTH &&
+		    flow_type != IRQ_TYPE_EDGE_FALLING)
+			goto err_sense;
+	} else {
+		if (flow_type == IRQ_TYPE_NONE)
+			flow_type = IRQ_TYPE_LEVEL_LOW;
+
+		if (flow_type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_LEVEL_HIGH))
+			goto err_sense;
+	}
+
+	irqd_set_trigger_type(d, flow_type);
+	if (flow_type & IRQ_TYPE_LEVEL_LOW)
+		irq_set_handler_locked(d, handle_level_irq);
+	else
+		irq_set_handler_locked(d, handle_edge_irq);
+
+	/* internal IRQ senses are LEVEL_LOW
+	 * EXT IRQ and Port C IRQ senses are programmable
+	 */
+	if (src >= CPM2_IRQ_EXT1 && src <= CPM2_IRQ_EXT7)
+			edibit = (14 - (src - CPM2_IRQ_EXT1));
+	else
+		if (src >= CPM2_IRQ_PORTC15 && src <= CPM2_IRQ_PORTC0)
+			edibit = (31 - (CPM2_IRQ_PORTC0 - src));
+		else
+			return (flow_type & IRQ_TYPE_LEVEL_LOW) ?
+				IRQ_SET_MASK_OK_NOCOPY : -EINVAL;
+
+	vold = in_be32(&cpm2_intctl->ic_siexr);
+
+	if ((flow_type & IRQ_TYPE_SENSE_MASK) == IRQ_TYPE_EDGE_FALLING)
+		vnew = vold | (1 << edibit);
+	else
+		vnew = vold & ~(1 << edibit);
+
+	if (vold != vnew)
+		out_be32(&cpm2_intctl->ic_siexr, vnew);
+	return IRQ_SET_MASK_OK_NOCOPY;
+
+err_sense:
+	pr_err("CPM2 PIC: sense type 0x%x not supported\n", flow_type);
+	return -EINVAL;
+}
+
+static struct irq_chip cpm2_pic = {
+	.name = "CPM2 SIU",
+	.irq_mask = cpm2_mask_irq,
+	.irq_unmask = cpm2_unmask_irq,
+	.irq_ack = cpm2_ack,
+	.irq_eoi = cpm2_end_irq,
+	.irq_set_type = cpm2_set_irq_type,
+	.flags = IRQCHIP_EOI_IF_HANDLED,
+};
+
+unsigned int cpm2_get_irq(void)
+{
+	int irq;
+	unsigned long bits;
+
+       /* For CPM2, read the SIVEC register and shift the bits down
+         * to get the irq number.         */
+        bits = in_be32(&cpm2_intctl->ic_sivec);
+        irq = bits >> 26;
+
+	if (irq == 0)
+		return(-1);
+	return irq_linear_revmap(cpm2_pic_host, irq);
+}
+
+static int cpm2_pic_host_map(struct irq_domain *h, unsigned int virq,
+			  irq_hw_number_t hw)
+{
+	pr_debug("cpm2_pic_host_map(%d, 0x%lx)\n", virq, hw);
+
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &cpm2_pic, handle_level_irq);
+	return 0;
+}
+
+static const struct irq_domain_ops cpm2_pic_host_ops = {
+	.map = cpm2_pic_host_map,
+	.xlate = irq_domain_xlate_onetwocell,
+};
+
+void cpm2_pic_init(struct device_node *node)
+{
+	int i;
+
+	cpm2_intctl = &cpm2_immr->im_intctl;
+
+	/* Clear the CPM IRQ controller, in case it has any bits set
+	 * from the bootloader
+	 */
+
+	/* Mask out everything */
+
+	out_be32(&cpm2_intctl->ic_simrh, 0x00000000);
+	out_be32(&cpm2_intctl->ic_simrl, 0x00000000);
+
+	wmb();
+
+	/* Ack everything */
+	out_be32(&cpm2_intctl->ic_sipnrh, 0xffffffff);
+	out_be32(&cpm2_intctl->ic_sipnrl, 0xffffffff);
+	wmb();
+
+	/* Dummy read of the vector */
+	i = in_be32(&cpm2_intctl->ic_sivec);
+	rmb();
+
+	/* Initialize the default interrupt mapping priorities,
+	 * in case the boot rom changed something on us.
+	 */
+	out_be16(&cpm2_intctl->ic_sicr, 0);
+	out_be32(&cpm2_intctl->ic_scprrh, 0x05309770);
+	out_be32(&cpm2_intctl->ic_scprrl, 0x05309770);
+
+	/* create a legacy host */
+	cpm2_pic_host = irq_domain_add_linear(node, 64, &cpm2_pic_host_ops, NULL);
+	if (cpm2_pic_host == NULL) {
+		printk(KERN_ERR "CPM2 PIC: failed to allocate irq host!\n");
+		return;
+	}
+}
diff --git a/arch/powerpc/sysdev/cpm2_pic.h b/arch/powerpc/sysdev/cpm2_pic.h
new file mode 100644
index 0000000000..a8a26951f3
--- /dev/null
+++ b/arch/powerpc/sysdev/cpm2_pic.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_KERNEL_CPM2_H
+#define _PPC_KERNEL_CPM2_H
+
+extern unsigned int cpm2_get_irq(void);
+
+extern void cpm2_pic_init(struct device_node *);
+
+#endif /* _PPC_KERNEL_CPM2_H */
diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
new file mode 100644
index 0000000000..47db732981
--- /dev/null
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Common CPM code
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright 2007-2008,2010 Freescale Semiconductor, Inc.
+ *
+ * Some parts derived from commproc.c/cpm2_common.c, which is:
+ * Copyright (c) 1997 Dan error_act (dmalek@jlc.net)
+ * Copyright (c) 1999-2001 Dan Malek <dan@embeddedalley.com>
+ * Copyright (c) 2000 MontaVista Software, Inc (source@mvista.com)
+ * 2006 (c) MontaVista Software, Inc.
+ * Vitaly Bordug <vbordug@ru.mvista.com>
+ */
+
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/export.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+
+#include <asm/udbg.h>
+#include <asm/io.h>
+#include <asm/cpm.h>
+#include <asm/fixmap.h>
+#include <soc/fsl/qe/qe.h>
+
+#include <mm/mmu_decl.h>
+
+#if defined(CONFIG_CPM2) || defined(CONFIG_8xx_GPIO)
+#include <linux/gpio/legacy-of-mm-gpiochip.h>
+#endif
+
+static int __init cpm_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,cpm1");
+	if (!np)
+		np = of_find_compatible_node(NULL, NULL, "fsl,cpm2");
+	if (!np)
+		return -ENODEV;
+	cpm_muram_init();
+	of_node_put(np);
+	return 0;
+}
+subsys_initcall(cpm_init);
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_CPM
+static u32 __iomem *cpm_udbg_txdesc;
+static u8 __iomem *cpm_udbg_txbuf;
+
+static void udbg_putc_cpm(char c)
+{
+	if (c == '\n')
+		udbg_putc_cpm('\r');
+
+	while (in_be32(&cpm_udbg_txdesc[0]) & 0x80000000)
+		;
+
+	out_8(cpm_udbg_txbuf, c);
+	out_be32(&cpm_udbg_txdesc[0], 0xa0000001);
+}
+
+void __init udbg_init_cpm(void)
+{
+#ifdef CONFIG_PPC_8xx
+	mmu_mapin_immr();
+
+	cpm_udbg_txdesc = (u32 __iomem __force *)
+			  (CONFIG_PPC_EARLY_DEBUG_CPM_ADDR - PHYS_IMMR_BASE +
+			   VIRT_IMMR_BASE);
+	cpm_udbg_txbuf = (u8 __iomem __force *)
+			 (in_be32(&cpm_udbg_txdesc[1]) - PHYS_IMMR_BASE +
+			  VIRT_IMMR_BASE);
+#else
+	cpm_udbg_txdesc = (u32 __iomem __force *)
+			  CONFIG_PPC_EARLY_DEBUG_CPM_ADDR;
+	cpm_udbg_txbuf = (u8 __iomem __force *)in_be32(&cpm_udbg_txdesc[1]);
+#endif
+
+	if (cpm_udbg_txdesc) {
+#ifdef CONFIG_CPM2
+		setbat(1, 0xf0000000, 0xf0000000, 1024*1024, PAGE_KERNEL_NCG);
+#endif
+		udbg_putc = udbg_putc_cpm;
+	}
+}
+#endif
+
+#if defined(CONFIG_CPM2) || defined(CONFIG_8xx_GPIO)
+
+struct cpm2_ioports {
+	u32 dir, par, sor, odr, dat;
+	u32 res[3];
+};
+
+struct cpm2_gpio32_chip {
+	struct of_mm_gpio_chip mm_gc;
+	spinlock_t lock;
+
+	/* shadowed data register to clear/set bits safely */
+	u32 cpdata;
+};
+
+static void cpm2_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc)
+{
+	struct cpm2_gpio32_chip *cpm2_gc =
+		container_of(mm_gc, struct cpm2_gpio32_chip, mm_gc);
+	struct cpm2_ioports __iomem *iop = mm_gc->regs;
+
+	cpm2_gc->cpdata = in_be32(&iop->dat);
+}
+
+static int cpm2_gpio32_get(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm2_ioports __iomem *iop = mm_gc->regs;
+	u32 pin_mask;
+
+	pin_mask = 1 << (31 - gpio);
+
+	return !!(in_be32(&iop->dat) & pin_mask);
+}
+
+static void __cpm2_gpio32_set(struct of_mm_gpio_chip *mm_gc, u32 pin_mask,
+	int value)
+{
+	struct cpm2_gpio32_chip *cpm2_gc = gpiochip_get_data(&mm_gc->gc);
+	struct cpm2_ioports __iomem *iop = mm_gc->regs;
+
+	if (value)
+		cpm2_gc->cpdata |= pin_mask;
+	else
+		cpm2_gc->cpdata &= ~pin_mask;
+
+	out_be32(&iop->dat, cpm2_gc->cpdata);
+}
+
+static void cpm2_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm2_gpio32_chip *cpm2_gc = gpiochip_get_data(gc);
+	unsigned long flags;
+	u32 pin_mask = 1 << (31 - gpio);
+
+	spin_lock_irqsave(&cpm2_gc->lock, flags);
+
+	__cpm2_gpio32_set(mm_gc, pin_mask, value);
+
+	spin_unlock_irqrestore(&cpm2_gc->lock, flags);
+}
+
+static int cpm2_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm2_gpio32_chip *cpm2_gc = gpiochip_get_data(gc);
+	struct cpm2_ioports __iomem *iop = mm_gc->regs;
+	unsigned long flags;
+	u32 pin_mask = 1 << (31 - gpio);
+
+	spin_lock_irqsave(&cpm2_gc->lock, flags);
+
+	setbits32(&iop->dir, pin_mask);
+	__cpm2_gpio32_set(mm_gc, pin_mask, val);
+
+	spin_unlock_irqrestore(&cpm2_gc->lock, flags);
+
+	return 0;
+}
+
+static int cpm2_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+	struct cpm2_gpio32_chip *cpm2_gc = gpiochip_get_data(gc);
+	struct cpm2_ioports __iomem *iop = mm_gc->regs;
+	unsigned long flags;
+	u32 pin_mask = 1 << (31 - gpio);
+
+	spin_lock_irqsave(&cpm2_gc->lock, flags);
+
+	clrbits32(&iop->dir, pin_mask);
+
+	spin_unlock_irqrestore(&cpm2_gc->lock, flags);
+
+	return 0;
+}
+
+int cpm2_gpiochip_add32(struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+	struct cpm2_gpio32_chip *cpm2_gc;
+	struct of_mm_gpio_chip *mm_gc;
+	struct gpio_chip *gc;
+
+	cpm2_gc = kzalloc(sizeof(*cpm2_gc), GFP_KERNEL);
+	if (!cpm2_gc)
+		return -ENOMEM;
+
+	spin_lock_init(&cpm2_gc->lock);
+
+	mm_gc = &cpm2_gc->mm_gc;
+	gc = &mm_gc->gc;
+
+	mm_gc->save_regs = cpm2_gpio32_save_regs;
+	gc->ngpio = 32;
+	gc->direction_input = cpm2_gpio32_dir_in;
+	gc->direction_output = cpm2_gpio32_dir_out;
+	gc->get = cpm2_gpio32_get;
+	gc->set = cpm2_gpio32_set;
+	gc->parent = dev;
+	gc->owner = THIS_MODULE;
+
+	return of_mm_gpiochip_add_data(np, mm_gc, cpm2_gc);
+}
+#endif /* CONFIG_CPM2 || CONFIG_8xx_GPIO */
diff --git a/arch/powerpc/sysdev/cpm_gpio.c b/arch/powerpc/sysdev/cpm_gpio.c
new file mode 100644
index 0000000000..40f57111e9
--- /dev/null
+++ b/arch/powerpc/sysdev/cpm_gpio.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common CPM GPIO wrapper for the CPM GPIO ports
+ *
+ * Author: Christophe Leroy <christophe.leroy@c-s.fr>
+ *
+ * Copyright 2017 CS Systemes d'Information.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include <asm/cpm.h>
+#ifdef CONFIG_8xx_GPIO
+#include <asm/cpm1.h>
+#endif
+
+static int cpm_gpio_probe(struct platform_device *ofdev)
+{
+	struct device *dev = &ofdev->dev;
+	int (*gp_add)(struct device *dev) = of_device_get_match_data(dev);
+
+	if (!gp_add)
+		return -ENODEV;
+
+	return gp_add(dev);
+}
+
+static const struct of_device_id cpm_gpio_match[] = {
+#ifdef CONFIG_8xx_GPIO
+	{
+		.compatible = "fsl,cpm1-pario-bank-a",
+		.data = cpm1_gpiochip_add16,
+	},
+	{
+		.compatible = "fsl,cpm1-pario-bank-b",
+		.data = cpm1_gpiochip_add32,
+	},
+	{
+		.compatible = "fsl,cpm1-pario-bank-c",
+		.data = cpm1_gpiochip_add16,
+	},
+	{
+		.compatible = "fsl,cpm1-pario-bank-d",
+		.data = cpm1_gpiochip_add16,
+	},
+	/* Port E uses CPM2 layout */
+	{
+		.compatible = "fsl,cpm1-pario-bank-e",
+		.data = cpm2_gpiochip_add32,
+	},
+#endif
+	{
+		.compatible = "fsl,cpm2-pario-bank",
+		.data = cpm2_gpiochip_add32,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, cpm_gpio_match);
+
+static struct platform_driver cpm_gpio_driver = {
+	.probe		= cpm_gpio_probe,
+	.driver		= {
+		.name	= "cpm-gpio",
+		.of_match_table	= cpm_gpio_match,
+	},
+};
+
+static int __init cpm_gpio_init(void)
+{
+	return platform_driver_register(&cpm_gpio_driver);
+}
+arch_initcall(cpm_gpio_init);
+
+MODULE_AUTHOR("Christophe Leroy <christophe.leroy@c-s.fr>");
+MODULE_DESCRIPTION("Driver for CPM GPIO");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:cpm-gpio");
diff --git a/arch/powerpc/sysdev/dart.h b/arch/powerpc/sysdev/dart.h
new file mode 100644
index 0000000000..7f5df03d33
--- /dev/null
+++ b/arch/powerpc/sysdev/dart.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
+ */
+
+#ifndef _POWERPC_SYSDEV_DART_H
+#define _POWERPC_SYSDEV_DART_H
+
+
+/* Offset from base to control register */
+#define DART_CNTL	0
+
+/* Offset from base to exception register */
+#define DART_EXCP_U3	0x10
+/* Offset from base to TLB tag registers */
+#define DART_TAGS_U3	0x1000
+
+/* U4 registers */
+#define DART_BASE_U4	0x10
+#define DART_SIZE_U4	0x20
+#define DART_EXCP_U4	0x30
+#define DART_TAGS_U4	0x1000
+
+/* Control Register fields */
+
+/* U3 registers */
+#define DART_CNTL_U3_BASE_MASK	0xfffff
+#define DART_CNTL_U3_BASE_SHIFT	12
+#define DART_CNTL_U3_FLUSHTLB	0x400
+#define DART_CNTL_U3_ENABLE	0x200
+#define DART_CNTL_U3_SIZE_MASK	0x1ff
+#define DART_CNTL_U3_SIZE_SHIFT	0
+
+/* U4 registers */
+#define DART_BASE_U4_BASE_MASK	0xffffff
+#define DART_BASE_U4_BASE_SHIFT	0
+#define DART_CNTL_U4_ENABLE	0x80000000
+#define DART_CNTL_U4_IONE	0x40000000
+#define DART_CNTL_U4_FLUSHTLB	0x20000000
+#define DART_CNTL_U4_IDLE	0x10000000
+#define DART_CNTL_U4_PAR_EN	0x08000000
+#define DART_CNTL_U4_IONE_MASK	0x07ffffff
+#define DART_SIZE_U4_SIZE_MASK	0x1fff
+#define DART_SIZE_U4_SIZE_SHIFT	0
+
+#define DART_REG(r)	(dart + ((r) >> 2))
+#define DART_IN(r)	(in_be32(DART_REG(r)))
+#define DART_OUT(r,v)	(out_be32(DART_REG(r), (v)))
+
+
+/* size of table in pages */
+
+
+/* DART table fields */
+
+#define DARTMAP_VALID   0x80000000
+#define DARTMAP_RPNMASK 0x00ffffff
+
+
+#define DART_PAGE_SHIFT		12
+#define DART_PAGE_SIZE		(1 << DART_PAGE_SHIFT)
+
+
+#endif /* _POWERPC_SYSDEV_DART_H */
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
new file mode 100644
index 0000000000..98096bbfd6
--- /dev/null
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -0,0 +1,444 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/sysdev/dart_iommu.c
+ *
+ * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
+ * Copyright (C) 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>,
+ *                    IBM Corporation
+ *
+ * Based on pSeries_iommu.c:
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
+ * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
+ *
+ * Dynamic DMA mapping support, Apple U3, U4 & IBM CPC925 "DART" iommu.
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
+#include <linux/suspend.h>
+#include <linux/memblock.h>
+#include <linux/gfp.h>
+#include <linux/kmemleak.h>
+#include <linux/of_address.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/cacheflush.h>
+#include <asm/ppc-pci.h>
+
+#include "dart.h"
+
+/* DART table address and size */
+static u32 *dart_tablebase;
+static unsigned long dart_tablesize;
+
+/* Mapped base address for the dart */
+static unsigned int __iomem *dart;
+
+/* Dummy val that entries are set to when unused */
+static unsigned int dart_emptyval;
+
+static struct iommu_table iommu_table_dart;
+static int iommu_table_dart_inited;
+static int dart_dirty;
+static int dart_is_u4;
+
+#define DART_U4_BYPASS_BASE	0x8000000000ull
+
+#define DBG(...)
+
+static DEFINE_SPINLOCK(invalidate_lock);
+
+static inline void dart_tlb_invalidate_all(void)
+{
+	unsigned long l = 0;
+	unsigned int reg, inv_bit;
+	unsigned long limit;
+	unsigned long flags;
+
+	spin_lock_irqsave(&invalidate_lock, flags);
+
+	DBG("dart: flush\n");
+
+	/* To invalidate the DART, set the DARTCNTL_FLUSHTLB bit in the
+	 * control register and wait for it to clear.
+	 *
+	 * Gotcha: Sometimes, the DART won't detect that the bit gets
+	 * set. If so, clear it and set it again.
+	 */
+
+	limit = 0;
+
+	inv_bit = dart_is_u4 ? DART_CNTL_U4_FLUSHTLB : DART_CNTL_U3_FLUSHTLB;
+retry:
+	l = 0;
+	reg = DART_IN(DART_CNTL);
+	reg |= inv_bit;
+	DART_OUT(DART_CNTL, reg);
+
+	while ((DART_IN(DART_CNTL) & inv_bit) && l < (1L << limit))
+		l++;
+	if (l == (1L << limit)) {
+		if (limit < 4) {
+			limit++;
+			reg = DART_IN(DART_CNTL);
+			reg &= ~inv_bit;
+			DART_OUT(DART_CNTL, reg);
+			goto retry;
+		} else
+			panic("DART: TLB did not flush after waiting a long "
+			      "time. Buggy U3 ?");
+	}
+
+	spin_unlock_irqrestore(&invalidate_lock, flags);
+}
+
+static inline void dart_tlb_invalidate_one(unsigned long bus_rpn)
+{
+	unsigned int reg;
+	unsigned int l, limit;
+	unsigned long flags;
+
+	spin_lock_irqsave(&invalidate_lock, flags);
+
+	reg = DART_CNTL_U4_ENABLE | DART_CNTL_U4_IONE |
+		(bus_rpn & DART_CNTL_U4_IONE_MASK);
+	DART_OUT(DART_CNTL, reg);
+
+	limit = 0;
+wait_more:
+	l = 0;
+	while ((DART_IN(DART_CNTL) & DART_CNTL_U4_IONE) && l < (1L << limit)) {
+		rmb();
+		l++;
+	}
+
+	if (l == (1L << limit)) {
+		if (limit < 4) {
+			limit++;
+			goto wait_more;
+		} else
+			panic("DART: TLB did not flush after waiting a long "
+			      "time. Buggy U4 ?");
+	}
+
+	spin_unlock_irqrestore(&invalidate_lock, flags);
+}
+
+static void dart_cache_sync(unsigned int *base, unsigned int count)
+{
+	/*
+	 * We add 1 to the number of entries to flush, following a
+	 * comment in Darwin indicating that the memory controller
+	 * can prefetch unmapped memory under some circumstances.
+	 */
+	unsigned long start = (unsigned long)base;
+	unsigned long end = start + (count + 1) * sizeof(unsigned int);
+	unsigned int tmp;
+
+	/* Perform a standard cache flush */
+	flush_dcache_range(start, end);
+
+	/*
+	 * Perform the sequence described in the CPC925 manual to
+	 * ensure all the data gets to a point the cache incoherent
+	 * DART hardware will see.
+	 */
+	asm volatile(" sync;"
+		     " isync;"
+		     " dcbf 0,%1;"
+		     " sync;"
+		     " isync;"
+		     " lwz %0,0(%1);"
+		     " isync" : "=r" (tmp) : "r" (end) : "memory");
+}
+
+static void dart_flush(struct iommu_table *tbl)
+{
+	mb();
+	if (dart_dirty) {
+		dart_tlb_invalidate_all();
+		dart_dirty = 0;
+	}
+}
+
+static int dart_build(struct iommu_table *tbl, long index,
+		       long npages, unsigned long uaddr,
+		       enum dma_data_direction direction,
+		       unsigned long attrs)
+{
+	unsigned int *dp, *orig_dp;
+	unsigned int rpn;
+	long l;
+
+	DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr);
+
+	orig_dp = dp = ((unsigned int*)tbl->it_base) + index;
+
+	/* On U3, all memory is contiguous, so we can move this
+	 * out of the loop.
+	 */
+	l = npages;
+	while (l--) {
+		rpn = __pa(uaddr) >> DART_PAGE_SHIFT;
+
+		*(dp++) = DARTMAP_VALID | (rpn & DARTMAP_RPNMASK);
+
+		uaddr += DART_PAGE_SIZE;
+	}
+	dart_cache_sync(orig_dp, npages);
+
+	if (dart_is_u4) {
+		rpn = index;
+		while (npages--)
+			dart_tlb_invalidate_one(rpn++);
+	} else {
+		dart_dirty = 1;
+	}
+	return 0;
+}
+
+
+static void dart_free(struct iommu_table *tbl, long index, long npages)
+{
+	unsigned int *dp, *orig_dp;
+	long orig_npages = npages;
+
+	/* We don't worry about flushing the TLB cache. The only drawback of
+	 * not doing it is that we won't catch buggy device drivers doing
+	 * bad DMAs, but then no 32-bit architecture ever does either.
+	 */
+
+	DBG("dart: free at: %lx, %lx\n", index, npages);
+
+	orig_dp = dp  = ((unsigned int *)tbl->it_base) + index;
+
+	while (npages--)
+		*(dp++) = dart_emptyval;
+
+	dart_cache_sync(orig_dp, orig_npages);
+}
+
+static void __init allocate_dart(void)
+{
+	unsigned long tmp;
+
+	/* 512 pages (2MB) is max DART tablesize. */
+	dart_tablesize = 1UL << 21;
+
+	/*
+	 * 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we
+	 * will blow up an entire large page anyway in the kernel mapping.
+	 */
+	dart_tablebase = memblock_alloc_try_nid_raw(SZ_16M, SZ_16M,
+					MEMBLOCK_LOW_LIMIT, SZ_2G,
+					NUMA_NO_NODE);
+	if (!dart_tablebase)
+		panic("Failed to allocate 16MB below 2GB for DART table\n");
+
+	/* There is no point scanning the DART space for leaks*/
+	kmemleak_no_scan((void *)dart_tablebase);
+
+	/* Allocate a spare page to map all invalid DART pages. We need to do
+	 * that to work around what looks like a problem with the HT bridge
+	 * prefetching into invalid pages and corrupting data
+	 */
+	tmp = memblock_phys_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);
+	if (!tmp)
+		panic("DART: table allocation failed\n");
+
+	dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) &
+					 DARTMAP_RPNMASK);
+
+	printk(KERN_INFO "DART table allocated at: %p\n", dart_tablebase);
+}
+
+static int __init dart_init(struct device_node *dart_node)
+{
+	unsigned int i;
+	unsigned long base, size;
+	struct resource r;
+
+	/* IOMMU disabled by the user ? bail out */
+	if (iommu_is_off)
+		return -ENODEV;
+
+	/*
+	 * Only use the DART if the machine has more than 1GB of RAM
+	 * or if requested with iommu=on on cmdline.
+	 *
+	 * 1GB of RAM is picked as limit because some default devices
+	 * (i.e. Airport Extreme) have 30 bit address range limits.
+	 */
+
+	if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull)
+		return -ENODEV;
+
+	/* Get DART registers */
+	if (of_address_to_resource(dart_node, 0, &r))
+		panic("DART: can't get register base ! ");
+
+	/* Map in DART registers */
+	dart = ioremap(r.start, resource_size(&r));
+	if (dart == NULL)
+		panic("DART: Cannot map registers!");
+
+	/* Allocate the DART and dummy page */
+	allocate_dart();
+
+	/* Fill initial table */
+	for (i = 0; i < dart_tablesize/4; i++)
+		dart_tablebase[i] = dart_emptyval;
+
+	/* Push to memory */
+	dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
+
+	/* Initialize DART with table base and enable it. */
+	base = ((unsigned long)dart_tablebase) >> DART_PAGE_SHIFT;
+	size = dart_tablesize >> DART_PAGE_SHIFT;
+	if (dart_is_u4) {
+		size &= DART_SIZE_U4_SIZE_MASK;
+		DART_OUT(DART_BASE_U4, base);
+		DART_OUT(DART_SIZE_U4, size);
+		DART_OUT(DART_CNTL, DART_CNTL_U4_ENABLE);
+	} else {
+		size &= DART_CNTL_U3_SIZE_MASK;
+		DART_OUT(DART_CNTL,
+			 DART_CNTL_U3_ENABLE |
+			 (base << DART_CNTL_U3_BASE_SHIFT) |
+			 (size << DART_CNTL_U3_SIZE_SHIFT));
+	}
+
+	/* Invalidate DART to get rid of possible stale TLBs */
+	dart_tlb_invalidate_all();
+
+	printk(KERN_INFO "DART IOMMU initialized for %s type chipset\n",
+	       dart_is_u4 ? "U4" : "U3");
+
+	return 0;
+}
+
+static struct iommu_table_ops iommu_dart_ops = {
+	.set = dart_build,
+	.clear = dart_free,
+	.flush = dart_flush,
+};
+
+static void iommu_table_dart_setup(void)
+{
+	iommu_table_dart.it_busno = 0;
+	iommu_table_dart.it_offset = 0;
+	/* it_size is in number of entries */
+	iommu_table_dart.it_size = dart_tablesize / sizeof(u32);
+	iommu_table_dart.it_page_shift = IOMMU_PAGE_SHIFT_4K;
+
+	/* Initialize the common IOMMU code */
+	iommu_table_dart.it_base = (unsigned long)dart_tablebase;
+	iommu_table_dart.it_index = 0;
+	iommu_table_dart.it_blocksize = 1;
+	iommu_table_dart.it_ops = &iommu_dart_ops;
+	if (!iommu_init_table(&iommu_table_dart, -1, 0, 0))
+		panic("Failed to initialize iommu table");
+
+	/* Reserve the last page of the DART to avoid possible prefetch
+	 * past the DART mapped area
+	 */
+	set_bit(iommu_table_dart.it_size - 1, iommu_table_dart.it_map);
+}
+
+static void pci_dma_bus_setup_dart(struct pci_bus *bus)
+{
+	if (!iommu_table_dart_inited) {
+		iommu_table_dart_inited = 1;
+		iommu_table_dart_setup();
+	}
+}
+
+static bool dart_device_on_pcie(struct device *dev)
+{
+	struct device_node *np = of_node_get(dev->of_node);
+
+	while(np) {
+		if (of_device_is_compatible(np, "U4-pcie") ||
+		    of_device_is_compatible(np, "u4-pcie")) {
+			of_node_put(np);
+			return true;
+		}
+		np = of_get_next_parent(np);
+	}
+	return false;
+}
+
+static void pci_dma_dev_setup_dart(struct pci_dev *dev)
+{
+	if (dart_is_u4 && dart_device_on_pcie(&dev->dev))
+		dev->dev.archdata.dma_offset = DART_U4_BYPASS_BASE;
+	set_iommu_table_base(&dev->dev, &iommu_table_dart);
+}
+
+static bool iommu_bypass_supported_dart(struct pci_dev *dev, u64 mask)
+{
+	return dart_is_u4 &&
+		dart_device_on_pcie(&dev->dev) &&
+		mask >= DMA_BIT_MASK(40);
+}
+
+void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
+{
+	struct device_node *dn;
+
+	/* Find the DART in the device-tree */
+	dn = of_find_compatible_node(NULL, "dart", "u3-dart");
+	if (dn == NULL) {
+		dn = of_find_compatible_node(NULL, "dart", "u4-dart");
+		if (dn == NULL)
+			return;	/* use default direct_dma_ops */
+		dart_is_u4 = 1;
+	}
+
+	/* Initialize the DART HW */
+	if (dart_init(dn) != 0) {
+		of_node_put(dn);
+		return;
+	}
+	/*
+	 * U4 supports a DART bypass, we use it for 64-bit capable devices to
+	 * improve performance.  However, that only works for devices connected
+	 * to the U4 own PCIe interface, not bridged through hypertransport.
+	 * We need the device to support at least 40 bits of addresses.
+	 */
+	controller_ops->dma_dev_setup = pci_dma_dev_setup_dart;
+	controller_ops->dma_bus_setup = pci_dma_bus_setup_dart;
+	controller_ops->iommu_bypass_supported = iommu_bypass_supported_dart;
+
+	/* Setup pci_dma ops */
+	set_pci_dma_ops(&dma_iommu_ops);
+	of_node_put(dn);
+}
+
+#ifdef CONFIG_PM
+static void iommu_dart_restore(void)
+{
+	dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
+	dart_tlb_invalidate_all();
+}
+
+static int __init iommu_init_late_dart(void)
+{
+	if (!dart_tablebase)
+		return 0;
+
+	ppc_md.iommu_restore = iommu_dart_restore;
+
+	return 0;
+}
+
+late_initcall(iommu_init_late_dart);
+#endif /* CONFIG_PM */
diff --git a/arch/powerpc/sysdev/dcr-low.S b/arch/powerpc/sysdev/dcr-low.S
new file mode 100644
index 0000000000..e8401b205d
--- /dev/null
+++ b/arch/powerpc/sysdev/dcr-low.S
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * "Indirect" DCR access
+ *
+ * Copyright (c) 2004 Eugene Surovegin <ebs@ebshome.net>
+ */
+
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/processor.h>
+#include <asm/bug.h>
+
+#define DCR_ACCESS_PROLOG(table) \
+	cmplwi	cr0,r3,1024;	 \
+	rlwinm  r3,r3,4,18,27;   \
+	lis     r5,table@h;      \
+	ori     r5,r5,table@l;   \
+	add     r3,r3,r5;        \
+	bge-	1f;		 \
+	mtctr   r3;              \
+	bctr;			 \
+1:	trap;			 \
+	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0;	\
+	blr
+
+_GLOBAL(__mfdcr)
+	DCR_ACCESS_PROLOG(__mfdcr_table)
+EXPORT_SYMBOL(__mfdcr)
+
+_GLOBAL(__mtdcr)
+	DCR_ACCESS_PROLOG(__mtdcr_table)
+EXPORT_SYMBOL(__mtdcr)
+
+__mfdcr_table:
+	mfdcr  r3,0; blr
+__mtdcr_table:
+	mtdcr  0,r4; blr
+
+dcr     = 1
+        .rept   1023
+	mfdcr   r3,dcr; blr
+	mtdcr   dcr,r4; blr
+	dcr     = dcr + 1
+	.endr
diff --git a/arch/powerpc/sysdev/dcr.c b/arch/powerpc/sysdev/dcr.c
new file mode 100644
index 0000000000..70ce66eadf
--- /dev/null
+++ b/arch/powerpc/sysdev/dcr.c
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * (c) Copyright 2006 Benjamin Herrenschmidt, IBM Corp.
+ *                    <benh@kernel.crashing.org>
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/of_address.h>
+#include <asm/dcr.h>
+
+#ifdef CONFIG_PPC_DCR_MMIO
+static struct device_node *find_dcr_parent(struct device_node *node)
+{
+	struct device_node *par, *tmp;
+	const u32 *p;
+
+	for (par = of_node_get(node); par;) {
+		if (of_property_read_bool(par, "dcr-controller"))
+			break;
+		p = of_get_property(par, "dcr-parent", NULL);
+		tmp = par;
+		if (p == NULL)
+			par = of_get_parent(par);
+		else
+			par = of_find_node_by_phandle(*p);
+		of_node_put(tmp);
+	}
+	return par;
+}
+#endif
+
+#if defined(CONFIG_PPC_DCR_NATIVE) && defined(CONFIG_PPC_DCR_MMIO)
+
+bool dcr_map_ok_generic(dcr_host_t host)
+{
+	if (host.type == DCR_HOST_NATIVE)
+		return dcr_map_ok_native(host.host.native);
+	else if (host.type == DCR_HOST_MMIO)
+		return dcr_map_ok_mmio(host.host.mmio);
+	else
+		return false;
+}
+EXPORT_SYMBOL_GPL(dcr_map_ok_generic);
+
+dcr_host_t dcr_map_generic(struct device_node *dev,
+			   unsigned int dcr_n,
+			   unsigned int dcr_c)
+{
+	dcr_host_t host;
+	struct device_node *dp;
+	const char *prop;
+
+	host.type = DCR_HOST_INVALID;
+
+	dp = find_dcr_parent(dev);
+	if (dp == NULL)
+		return host;
+
+	prop = of_get_property(dp, "dcr-access-method", NULL);
+
+	pr_debug("dcr_map_generic(dcr-access-method = %s)\n", prop);
+
+	if (!strcmp(prop, "native")) {
+		host.type = DCR_HOST_NATIVE;
+		host.host.native = dcr_map_native(dev, dcr_n, dcr_c);
+	} else if (!strcmp(prop, "mmio")) {
+		host.type = DCR_HOST_MMIO;
+		host.host.mmio = dcr_map_mmio(dev, dcr_n, dcr_c);
+	}
+
+	of_node_put(dp);
+	return host;
+}
+EXPORT_SYMBOL_GPL(dcr_map_generic);
+
+void dcr_unmap_generic(dcr_host_t host, unsigned int dcr_c)
+{
+	if (host.type == DCR_HOST_NATIVE)
+		dcr_unmap_native(host.host.native, dcr_c);
+	else if (host.type == DCR_HOST_MMIO)
+		dcr_unmap_mmio(host.host.mmio, dcr_c);
+	else /* host.type == DCR_HOST_INVALID */
+		WARN_ON(true);
+}
+EXPORT_SYMBOL_GPL(dcr_unmap_generic);
+
+u32 dcr_read_generic(dcr_host_t host, unsigned int dcr_n)
+{
+	if (host.type == DCR_HOST_NATIVE)
+		return dcr_read_native(host.host.native, dcr_n);
+	else if (host.type == DCR_HOST_MMIO)
+		return dcr_read_mmio(host.host.mmio, dcr_n);
+	else /* host.type == DCR_HOST_INVALID */
+		WARN_ON(true);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dcr_read_generic);
+
+void dcr_write_generic(dcr_host_t host, unsigned int dcr_n, u32 value)
+{
+	if (host.type == DCR_HOST_NATIVE)
+		dcr_write_native(host.host.native, dcr_n, value);
+	else if (host.type == DCR_HOST_MMIO)
+		dcr_write_mmio(host.host.mmio, dcr_n, value);
+	else /* host.type == DCR_HOST_INVALID */
+		WARN_ON(true);
+}
+EXPORT_SYMBOL_GPL(dcr_write_generic);
+
+#endif /* defined(CONFIG_PPC_DCR_NATIVE) && defined(CONFIG_PPC_DCR_MMIO) */
+
+unsigned int dcr_resource_start(const struct device_node *np,
+				unsigned int index)
+{
+	unsigned int ds;
+	const u32 *dr = of_get_property(np, "dcr-reg", &ds);
+
+	if (dr == NULL || ds & 1 || index >= (ds / 8))
+		return 0;
+
+	return dr[index * 2];
+}
+EXPORT_SYMBOL_GPL(dcr_resource_start);
+
+unsigned int dcr_resource_len(const struct device_node *np, unsigned int index)
+{
+	unsigned int ds;
+	const u32 *dr = of_get_property(np, "dcr-reg", &ds);
+
+	if (dr == NULL || ds & 1 || index >= (ds / 8))
+		return 0;
+
+	return dr[index * 2 + 1];
+}
+EXPORT_SYMBOL_GPL(dcr_resource_len);
+
+#ifdef CONFIG_PPC_DCR_MMIO
+
+static u64 of_translate_dcr_address(struct device_node *dev,
+				    unsigned int dcr_n,
+				    unsigned int *out_stride)
+{
+	struct device_node *dp;
+	const u32 *p;
+	unsigned int stride;
+	u64 ret = OF_BAD_ADDR;
+
+	dp = find_dcr_parent(dev);
+	if (dp == NULL)
+		return OF_BAD_ADDR;
+
+	/* Stride is not properly defined yet, default to 0x10 for Axon */
+	p = of_get_property(dp, "dcr-mmio-stride", NULL);
+	stride = (p == NULL) ? 0x10 : *p;
+
+	/* XXX FIXME: Which property name is to use of the 2 following ? */
+	p = of_get_property(dp, "dcr-mmio-range", NULL);
+	if (p == NULL)
+		p = of_get_property(dp, "dcr-mmio-space", NULL);
+	if (p == NULL)
+		goto done;
+
+	/* Maybe could do some better range checking here */
+	ret = of_translate_address(dp, p);
+	if (ret != OF_BAD_ADDR)
+		ret += (u64)(stride) * (u64)dcr_n;
+	if (out_stride)
+		*out_stride = stride;
+
+ done:
+	of_node_put(dp);
+	return ret;
+}
+
+dcr_host_mmio_t dcr_map_mmio(struct device_node *dev,
+			     unsigned int dcr_n,
+			     unsigned int dcr_c)
+{
+	dcr_host_mmio_t ret = { .token = NULL, .stride = 0, .base = dcr_n };
+	u64 addr;
+
+	pr_debug("dcr_map(%pOF, 0x%x, 0x%x)\n",
+		 dev, dcr_n, dcr_c);
+
+	addr = of_translate_dcr_address(dev, dcr_n, &ret.stride);
+	pr_debug("translates to addr: 0x%llx, stride: 0x%x\n",
+		 (unsigned long long) addr, ret.stride);
+	if (addr == OF_BAD_ADDR)
+		return ret;
+	pr_debug("mapping 0x%x bytes\n", dcr_c * ret.stride);
+	ret.token = ioremap(addr, dcr_c * ret.stride);
+	if (ret.token == NULL)
+		return ret;
+	pr_debug("mapped at 0x%p -> base is 0x%p\n",
+		 ret.token, ret.token - dcr_n * ret.stride);
+	ret.token -= dcr_n * ret.stride;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dcr_map_mmio);
+
+void dcr_unmap_mmio(dcr_host_mmio_t host, unsigned int dcr_c)
+{
+	dcr_host_mmio_t h = host;
+
+	if (h.token == NULL)
+		return;
+	h.token += host.base * h.stride;
+	iounmap(h.token);
+	h.token = NULL;
+}
+EXPORT_SYMBOL_GPL(dcr_unmap_mmio);
+
+#endif /* defined(CONFIG_PPC_DCR_MMIO) */
+
+#ifdef CONFIG_PPC_DCR_NATIVE
+DEFINE_SPINLOCK(dcr_ind_lock);
+EXPORT_SYMBOL_GPL(dcr_ind_lock);
+#endif	/* defined(CONFIG_PPC_DCR_NATIVE) */
+
diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c
new file mode 100644
index 0000000000..040827671d
--- /dev/null
+++ b/arch/powerpc/sysdev/ehv_pic.c
@@ -0,0 +1,295 @@
+/*
+ *  Driver for ePAPR Embedded Hypervisor PIC
+ *
+ *  Copyright 2008-2011 Freescale Semiconductor, Inc.
+ *
+ *  Author: Ashish Kalra <ashish.kalra@freescale.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/ehv_pic.h>
+#include <asm/fsl_hcalls.h>
+
+static struct ehv_pic *global_ehv_pic;
+static DEFINE_SPINLOCK(ehv_pic_lock);
+
+static u32 hwirq_intspec[NR_EHV_PIC_INTS];
+static u32 __iomem *mpic_percpu_base_vaddr;
+
+#define IRQ_TYPE_MPIC_DIRECT 4
+#define MPIC_EOI  0x00B0
+
+/*
+ * Linux descriptor level callbacks
+ */
+
+static void ehv_pic_unmask_irq(struct irq_data *d)
+{
+	unsigned int src = virq_to_hw(d->irq);
+
+	ev_int_set_mask(src, 0);
+}
+
+static void ehv_pic_mask_irq(struct irq_data *d)
+{
+	unsigned int src = virq_to_hw(d->irq);
+
+	ev_int_set_mask(src, 1);
+}
+
+static void ehv_pic_end_irq(struct irq_data *d)
+{
+	unsigned int src = virq_to_hw(d->irq);
+
+	ev_int_eoi(src);
+}
+
+static void ehv_pic_direct_end_irq(struct irq_data *d)
+{
+	out_be32(mpic_percpu_base_vaddr + MPIC_EOI / 4, 0);
+}
+
+static int ehv_pic_set_affinity(struct irq_data *d, const struct cpumask *dest,
+			 bool force)
+{
+	unsigned int src = virq_to_hw(d->irq);
+	unsigned int config, prio, cpu_dest;
+	int cpuid = irq_choose_cpu(dest);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ehv_pic_lock, flags);
+	ev_int_get_config(src, &config, &prio, &cpu_dest);
+	ev_int_set_config(src, config, prio, cpuid);
+	spin_unlock_irqrestore(&ehv_pic_lock, flags);
+
+	return IRQ_SET_MASK_OK;
+}
+
+static unsigned int ehv_pic_type_to_vecpri(unsigned int type)
+{
+	/* Now convert sense value */
+
+	switch (type & IRQ_TYPE_SENSE_MASK) {
+	case IRQ_TYPE_EDGE_RISING:
+		return EHV_PIC_INFO(VECPRI_SENSE_EDGE) |
+		       EHV_PIC_INFO(VECPRI_POLARITY_POSITIVE);
+
+	case IRQ_TYPE_EDGE_FALLING:
+	case IRQ_TYPE_EDGE_BOTH:
+		return EHV_PIC_INFO(VECPRI_SENSE_EDGE) |
+		       EHV_PIC_INFO(VECPRI_POLARITY_NEGATIVE);
+
+	case IRQ_TYPE_LEVEL_HIGH:
+		return EHV_PIC_INFO(VECPRI_SENSE_LEVEL) |
+		       EHV_PIC_INFO(VECPRI_POLARITY_POSITIVE);
+
+	case IRQ_TYPE_LEVEL_LOW:
+	default:
+		return EHV_PIC_INFO(VECPRI_SENSE_LEVEL) |
+		       EHV_PIC_INFO(VECPRI_POLARITY_NEGATIVE);
+	}
+}
+
+static int ehv_pic_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+	unsigned int src = virq_to_hw(d->irq);
+	unsigned int vecpri, vold, vnew, prio, cpu_dest;
+	unsigned long flags;
+
+	if (flow_type == IRQ_TYPE_NONE)
+		flow_type = IRQ_TYPE_LEVEL_LOW;
+
+	irqd_set_trigger_type(d, flow_type);
+
+	vecpri = ehv_pic_type_to_vecpri(flow_type);
+
+	spin_lock_irqsave(&ehv_pic_lock, flags);
+	ev_int_get_config(src, &vold, &prio, &cpu_dest);
+	vnew = vold & ~(EHV_PIC_INFO(VECPRI_POLARITY_MASK) |
+			EHV_PIC_INFO(VECPRI_SENSE_MASK));
+	vnew |= vecpri;
+
+	/*
+	 * TODO : Add specific interface call for platform to set
+	 * individual interrupt priorities.
+	 * platform currently using static/default priority for all ints
+	 */
+
+	prio = 8;
+
+	ev_int_set_config(src, vecpri, prio, cpu_dest);
+
+	spin_unlock_irqrestore(&ehv_pic_lock, flags);
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static struct irq_chip ehv_pic_irq_chip = {
+	.irq_mask	= ehv_pic_mask_irq,
+	.irq_unmask	= ehv_pic_unmask_irq,
+	.irq_eoi	= ehv_pic_end_irq,
+	.irq_set_type	= ehv_pic_set_irq_type,
+};
+
+static struct irq_chip ehv_pic_direct_eoi_irq_chip = {
+	.irq_mask	= ehv_pic_mask_irq,
+	.irq_unmask	= ehv_pic_unmask_irq,
+	.irq_eoi	= ehv_pic_direct_end_irq,
+	.irq_set_type	= ehv_pic_set_irq_type,
+};
+
+/* Return an interrupt vector or 0 if no interrupt is pending. */
+unsigned int ehv_pic_get_irq(void)
+{
+	int irq;
+
+	BUG_ON(global_ehv_pic == NULL);
+
+	if (global_ehv_pic->coreint_flag)
+		irq = mfspr(SPRN_EPR); /* if core int mode */
+	else
+		ev_int_iack(0, &irq); /* legacy mode */
+
+	if (irq == 0xFFFF)    /* 0xFFFF --> no irq is pending */
+		return 0;
+
+	/*
+	 * this will also setup revmap[] in the slow path for the first
+	 * time, next calls will always use fast path by indexing revmap
+	 */
+	return irq_linear_revmap(global_ehv_pic->irqhost, irq);
+}
+
+static int ehv_pic_host_match(struct irq_domain *h, struct device_node *node,
+			      enum irq_domain_bus_token bus_token)
+{
+	/* Exact match, unless ehv_pic node is NULL */
+	struct device_node *of_node = irq_domain_get_of_node(h);
+	return of_node == NULL || of_node == node;
+}
+
+static int ehv_pic_host_map(struct irq_domain *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	struct ehv_pic *ehv_pic = h->host_data;
+	struct irq_chip *chip;
+
+	/* Default chip */
+	chip = &ehv_pic->hc_irq;
+
+	if (mpic_percpu_base_vaddr)
+		if (hwirq_intspec[hw] & IRQ_TYPE_MPIC_DIRECT)
+			chip = &ehv_pic_direct_eoi_irq_chip;
+
+	irq_set_chip_data(virq, chip);
+	/*
+	 * using handle_fasteoi_irq as our irq handler, this will
+	 * only call the eoi callback and suitable for the MPIC
+	 * controller which set ISR/IPR automatically and clear the
+	 * highest priority active interrupt in ISR/IPR when we do
+	 * a specific eoi
+	 */
+	irq_set_chip_and_handler(virq, chip, handle_fasteoi_irq);
+
+	/* Set default irq type */
+	irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+	return 0;
+}
+
+static int ehv_pic_host_xlate(struct irq_domain *h, struct device_node *ct,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	/*
+	 * interrupt sense values coming from the guest device tree
+	 * interrupt specifiers can have four possible sense and
+	 * level encoding information and they need to
+	 * be translated between firmware type & linux type.
+	 */
+
+	static unsigned char map_of_senses_to_linux_irqtype[4] = {
+		IRQ_TYPE_EDGE_FALLING,
+		IRQ_TYPE_EDGE_RISING,
+		IRQ_TYPE_LEVEL_LOW,
+		IRQ_TYPE_LEVEL_HIGH,
+	};
+
+	*out_hwirq = intspec[0];
+	if (intsize > 1) {
+		hwirq_intspec[intspec[0]] = intspec[1];
+		*out_flags = map_of_senses_to_linux_irqtype[intspec[1] &
+							~IRQ_TYPE_MPIC_DIRECT];
+	} else {
+		*out_flags = IRQ_TYPE_NONE;
+	}
+
+	return 0;
+}
+
+static const struct irq_domain_ops ehv_pic_host_ops = {
+	.match = ehv_pic_host_match,
+	.map = ehv_pic_host_map,
+	.xlate = ehv_pic_host_xlate,
+};
+
+void __init ehv_pic_init(void)
+{
+	struct device_node *np, *np2;
+	struct ehv_pic *ehv_pic;
+
+	np = of_find_compatible_node(NULL, NULL, "epapr,hv-pic");
+	if (!np) {
+		pr_err("ehv_pic_init: could not find epapr,hv-pic node\n");
+		return;
+	}
+
+	ehv_pic = kzalloc(sizeof(struct ehv_pic), GFP_KERNEL);
+	if (!ehv_pic) {
+		of_node_put(np);
+		return;
+	}
+
+	ehv_pic->irqhost = irq_domain_add_linear(np, NR_EHV_PIC_INTS,
+						 &ehv_pic_host_ops, ehv_pic);
+	if (!ehv_pic->irqhost) {
+		of_node_put(np);
+		kfree(ehv_pic);
+		return;
+	}
+
+	np2 = of_find_compatible_node(NULL, NULL, "fsl,hv-mpic-per-cpu");
+	if (np2) {
+		mpic_percpu_base_vaddr = of_iomap(np2, 0);
+		if (!mpic_percpu_base_vaddr)
+			pr_err("ehv_pic_init: of_iomap failed\n");
+
+		of_node_put(np2);
+	}
+
+	ehv_pic->hc_irq = ehv_pic_irq_chip;
+	ehv_pic->hc_irq.irq_set_affinity = ehv_pic_set_affinity;
+	ehv_pic->coreint_flag = of_property_read_bool(np, "has-external-proxy");
+
+	global_ehv_pic = ehv_pic;
+	irq_set_default_host(global_ehv_pic->irqhost);
+}
diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c
new file mode 100644
index 0000000000..39186ad6b3
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_gtm.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale General-purpose Timers Module
+ *
+ * Copyright (c) Freescale Semiconductor, Inc. 2006.
+ *               Shlomi Gridish <gridish@freescale.com>
+ *               Jerry Huang <Chang-Ming.Huang@freescale.com>
+ * Copyright (c) MontaVista Software, Inc. 2008.
+ *               Anton Vorontsov <avorontsov@ru.mvista.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/spinlock.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <asm/fsl_gtm.h>
+
+#define GTCFR_STP(x)		((x) & 1 ? 1 << 5 : 1 << 1)
+#define GTCFR_RST(x)		((x) & 1 ? 1 << 4 : 1 << 0)
+
+#define GTMDR_ICLK_MASK		(3 << 1)
+#define GTMDR_ICLK_ICAS		(0 << 1)
+#define GTMDR_ICLK_ICLK		(1 << 1)
+#define GTMDR_ICLK_SLGO		(2 << 1)
+#define GTMDR_FRR		(1 << 3)
+#define GTMDR_ORI		(1 << 4)
+#define GTMDR_SPS(x)		((x) << 8)
+
+struct gtm_timers_regs {
+	u8	gtcfr1;		/* Timer 1, Timer 2 global config register */
+	u8	res0[0x3];
+	u8	gtcfr2;		/* Timer 3, timer 4 global config register */
+	u8	res1[0xB];
+	__be16	gtmdr1;		/* Timer 1 mode register */
+	__be16	gtmdr2;		/* Timer 2 mode register */
+	__be16	gtrfr1;		/* Timer 1 reference register */
+	__be16	gtrfr2;		/* Timer 2 reference register */
+	__be16	gtcpr1;		/* Timer 1 capture register */
+	__be16	gtcpr2;		/* Timer 2 capture register */
+	__be16	gtcnr1;		/* Timer 1 counter */
+	__be16	gtcnr2;		/* Timer 2 counter */
+	__be16	gtmdr3;		/* Timer 3 mode register */
+	__be16	gtmdr4;		/* Timer 4 mode register */
+	__be16	gtrfr3;		/* Timer 3 reference register */
+	__be16	gtrfr4;		/* Timer 4 reference register */
+	__be16	gtcpr3;		/* Timer 3 capture register */
+	__be16	gtcpr4;		/* Timer 4 capture register */
+	__be16	gtcnr3;		/* Timer 3 counter */
+	__be16	gtcnr4;		/* Timer 4 counter */
+	__be16	gtevr1;		/* Timer 1 event register */
+	__be16	gtevr2;		/* Timer 2 event register */
+	__be16	gtevr3;		/* Timer 3 event register */
+	__be16	gtevr4;		/* Timer 4 event register */
+	__be16	gtpsr1;		/* Timer 1 prescale register */
+	__be16	gtpsr2;		/* Timer 2 prescale register */
+	__be16	gtpsr3;		/* Timer 3 prescale register */
+	__be16	gtpsr4;		/* Timer 4 prescale register */
+	u8 res2[0x40];
+} __attribute__ ((packed));
+
+struct gtm {
+	unsigned int clock;
+	struct gtm_timers_regs __iomem *regs;
+	struct gtm_timer timers[4];
+	spinlock_t lock;
+	struct list_head list_node;
+};
+
+static LIST_HEAD(gtms);
+
+/**
+ * gtm_get_timer - request GTM timer to use it with the rest of GTM API
+ * Context:	non-IRQ
+ *
+ * This function reserves GTM timer for later use. It returns gtm_timer
+ * structure to use with the rest of GTM API, you should use timer->irq
+ * to manage timer interrupt.
+ */
+struct gtm_timer *gtm_get_timer16(void)
+{
+	struct gtm *gtm;
+	int i;
+
+	list_for_each_entry(gtm, &gtms, list_node) {
+		spin_lock_irq(&gtm->lock);
+
+		for (i = 0; i < ARRAY_SIZE(gtm->timers); i++) {
+			if (!gtm->timers[i].requested) {
+				gtm->timers[i].requested = true;
+				spin_unlock_irq(&gtm->lock);
+				return &gtm->timers[i];
+			}
+		}
+
+		spin_unlock_irq(&gtm->lock);
+	}
+
+	if (!list_empty(&gtms))
+		return ERR_PTR(-EBUSY);
+	return ERR_PTR(-ENODEV);
+}
+EXPORT_SYMBOL(gtm_get_timer16);
+
+/**
+ * gtm_get_specific_timer - request specific GTM timer
+ * @gtm:	specific GTM, pass here GTM's device_node->data
+ * @timer:	specific timer number, Timer1 is 0.
+ * Context:	non-IRQ
+ *
+ * This function reserves GTM timer for later use. It returns gtm_timer
+ * structure to use with the rest of GTM API, you should use timer->irq
+ * to manage timer interrupt.
+ */
+struct gtm_timer *gtm_get_specific_timer16(struct gtm *gtm,
+					   unsigned int timer)
+{
+	struct gtm_timer *ret = ERR_PTR(-EBUSY);
+
+	if (timer > 3)
+		return ERR_PTR(-EINVAL);
+
+	spin_lock_irq(&gtm->lock);
+
+	if (gtm->timers[timer].requested)
+		goto out;
+
+	ret = &gtm->timers[timer];
+	ret->requested = true;
+
+out:
+	spin_unlock_irq(&gtm->lock);
+	return ret;
+}
+EXPORT_SYMBOL(gtm_get_specific_timer16);
+
+/**
+ * gtm_put_timer16 - release 16 bits GTM timer
+ * @tmr:	pointer to the gtm_timer structure obtained from gtm_get_timer
+ * Context:	any
+ *
+ * This function releases GTM timer so others may request it.
+ */
+void gtm_put_timer16(struct gtm_timer *tmr)
+{
+	gtm_stop_timer16(tmr);
+
+	spin_lock_irq(&tmr->gtm->lock);
+	tmr->requested = false;
+	spin_unlock_irq(&tmr->gtm->lock);
+}
+EXPORT_SYMBOL(gtm_put_timer16);
+
+/*
+ * This is back-end for the exported functions, it's used to reset single
+ * timer in reference mode.
+ */
+static int gtm_set_ref_timer16(struct gtm_timer *tmr, int frequency,
+			       int reference_value, bool free_run)
+{
+	struct gtm *gtm = tmr->gtm;
+	int num = tmr - &gtm->timers[0];
+	unsigned int prescaler;
+	u8 iclk = GTMDR_ICLK_ICLK;
+	u8 psr;
+	u8 sps;
+	unsigned long flags;
+	int max_prescaler = 256 * 256 * 16;
+
+	/* CPM2 doesn't have primary prescaler */
+	if (!tmr->gtpsr)
+		max_prescaler /= 256;
+
+	prescaler = gtm->clock / frequency;
+	/*
+	 * We have two 8 bit prescalers -- primary and secondary (psr, sps),
+	 * plus "slow go" mode (clk / 16). So, total prescale value is
+	 * 16 * (psr + 1) * (sps + 1). Though, for CPM2 GTMs we losing psr.
+	 */
+	if (prescaler > max_prescaler)
+		return -EINVAL;
+
+	if (prescaler > max_prescaler / 16) {
+		iclk = GTMDR_ICLK_SLGO;
+		prescaler /= 16;
+	}
+
+	if (prescaler <= 256) {
+		psr = 0;
+		sps = prescaler - 1;
+	} else {
+		psr = 256 - 1;
+		sps = prescaler / 256 - 1;
+	}
+
+	spin_lock_irqsave(&gtm->lock, flags);
+
+	/*
+	 * Properly reset timers: stop, reset, set up prescalers, reference
+	 * value and clear event register.
+	 */
+	clrsetbits_8(tmr->gtcfr, ~(GTCFR_STP(num) | GTCFR_RST(num)),
+				 GTCFR_STP(num) | GTCFR_RST(num));
+
+	setbits8(tmr->gtcfr, GTCFR_STP(num));
+
+	if (tmr->gtpsr)
+		out_be16(tmr->gtpsr, psr);
+	clrsetbits_be16(tmr->gtmdr, 0xFFFF, iclk | GTMDR_SPS(sps) |
+			GTMDR_ORI | (free_run ? GTMDR_FRR : 0));
+	out_be16(tmr->gtcnr, 0);
+	out_be16(tmr->gtrfr, reference_value);
+	out_be16(tmr->gtevr, 0xFFFF);
+
+	/* Let it be. */
+	clrbits8(tmr->gtcfr, GTCFR_STP(num));
+
+	spin_unlock_irqrestore(&gtm->lock, flags);
+
+	return 0;
+}
+
+/**
+ * gtm_set_timer16 - (re)set 16 bit timer with arbitrary precision
+ * @tmr:	pointer to the gtm_timer structure obtained from gtm_get_timer
+ * @usec:	timer interval in microseconds
+ * @reload:	if set, the timer will reset upon expiry rather than
+ *         	continue running free.
+ * Context:	any
+ *
+ * This function (re)sets the GTM timer so that it counts up to the requested
+ * interval value, and fires the interrupt when the value is reached. This
+ * function will reduce the precision of the timer as needed in order for the
+ * requested timeout to fit in a 16-bit register.
+ */
+int gtm_set_timer16(struct gtm_timer *tmr, unsigned long usec, bool reload)
+{
+	/* quite obvious, frequency which is enough for µSec precision */
+	int freq = 1000000;
+	unsigned int bit;
+
+	bit = fls_long(usec);
+	if (bit > 15) {
+		freq >>= bit - 15;
+		usec >>= bit - 15;
+	}
+
+	if (!freq)
+		return -EINVAL;
+
+	return gtm_set_ref_timer16(tmr, freq, usec, reload);
+}
+EXPORT_SYMBOL(gtm_set_timer16);
+
+/**
+ * gtm_set_exact_utimer16 - (re)set 16 bits timer
+ * @tmr:	pointer to the gtm_timer structure obtained from gtm_get_timer
+ * @usec:	timer interval in microseconds
+ * @reload:	if set, the timer will reset upon expiry rather than
+ *         	continue running free.
+ * Context:	any
+ *
+ * This function (re)sets GTM timer so that it counts up to the requested
+ * interval value, and fires the interrupt when the value is reached. If reload
+ * flag was set, timer will also reset itself upon reference value, otherwise
+ * it continues to increment.
+ *
+ * The _exact_ bit in the function name states that this function will not
+ * crop precision of the "usec" argument, thus usec is limited to 16 bits
+ * (single timer width).
+ */
+int gtm_set_exact_timer16(struct gtm_timer *tmr, u16 usec, bool reload)
+{
+	/* quite obvious, frequency which is enough for µSec precision */
+	const int freq = 1000000;
+
+	/*
+	 * We can lower the frequency (and probably power consumption) by
+	 * dividing both frequency and usec by 2 until there is no remainder.
+	 * But we won't bother with this unless savings are measured, so just
+	 * run the timer as is.
+	 */
+
+	return gtm_set_ref_timer16(tmr, freq, usec, reload);
+}
+EXPORT_SYMBOL(gtm_set_exact_timer16);
+
+/**
+ * gtm_stop_timer16 - stop single timer
+ * @tmr:	pointer to the gtm_timer structure obtained from gtm_get_timer
+ * Context:	any
+ *
+ * This function simply stops the GTM timer.
+ */
+void gtm_stop_timer16(struct gtm_timer *tmr)
+{
+	struct gtm *gtm = tmr->gtm;
+	int num = tmr - &gtm->timers[0];
+	unsigned long flags;
+
+	spin_lock_irqsave(&gtm->lock, flags);
+
+	setbits8(tmr->gtcfr, GTCFR_STP(num));
+	out_be16(tmr->gtevr, 0xFFFF);
+
+	spin_unlock_irqrestore(&gtm->lock, flags);
+}
+EXPORT_SYMBOL(gtm_stop_timer16);
+
+/**
+ * gtm_ack_timer16 - acknowledge timer event (free-run timers only)
+ * @tmr:	pointer to the gtm_timer structure obtained from gtm_get_timer
+ * @events:	events mask to ack
+ * Context:	any
+ *
+ * Thus function used to acknowledge timer interrupt event, use it inside the
+ * interrupt handler.
+ */
+void gtm_ack_timer16(struct gtm_timer *tmr, u16 events)
+{
+	out_be16(tmr->gtevr, events);
+}
+EXPORT_SYMBOL(gtm_ack_timer16);
+
+static void __init gtm_set_shortcuts(struct device_node *np,
+				     struct gtm_timer *timers,
+				     struct gtm_timers_regs __iomem *regs)
+{
+	/*
+	 * Yeah, I don't like this either, but timers' registers a bit messed,
+	 * so we have to provide shortcuts to write timer independent code.
+	 * Alternative option is to create gt*() accessors, but that will be
+	 * even uglier and cryptic.
+	 */
+	timers[0].gtcfr = &regs->gtcfr1;
+	timers[0].gtmdr = &regs->gtmdr1;
+	timers[0].gtcnr = &regs->gtcnr1;
+	timers[0].gtrfr = &regs->gtrfr1;
+	timers[0].gtevr = &regs->gtevr1;
+
+	timers[1].gtcfr = &regs->gtcfr1;
+	timers[1].gtmdr = &regs->gtmdr2;
+	timers[1].gtcnr = &regs->gtcnr2;
+	timers[1].gtrfr = &regs->gtrfr2;
+	timers[1].gtevr = &regs->gtevr2;
+
+	timers[2].gtcfr = &regs->gtcfr2;
+	timers[2].gtmdr = &regs->gtmdr3;
+	timers[2].gtcnr = &regs->gtcnr3;
+	timers[2].gtrfr = &regs->gtrfr3;
+	timers[2].gtevr = &regs->gtevr3;
+
+	timers[3].gtcfr = &regs->gtcfr2;
+	timers[3].gtmdr = &regs->gtmdr4;
+	timers[3].gtcnr = &regs->gtcnr4;
+	timers[3].gtrfr = &regs->gtrfr4;
+	timers[3].gtevr = &regs->gtevr4;
+
+	/* CPM2 doesn't have primary prescaler */
+	if (!of_device_is_compatible(np, "fsl,cpm2-gtm")) {
+		timers[0].gtpsr = &regs->gtpsr1;
+		timers[1].gtpsr = &regs->gtpsr2;
+		timers[2].gtpsr = &regs->gtpsr3;
+		timers[3].gtpsr = &regs->gtpsr4;
+	}
+}
+
+static int __init fsl_gtm_init(void)
+{
+	struct device_node *np;
+
+	for_each_compatible_node(np, NULL, "fsl,gtm") {
+		int i;
+		struct gtm *gtm;
+		const u32 *clock;
+		int size;
+
+		gtm = kzalloc(sizeof(*gtm), GFP_KERNEL);
+		if (!gtm) {
+			pr_err("%pOF: unable to allocate memory\n",
+				np);
+			continue;
+		}
+
+		spin_lock_init(&gtm->lock);
+
+		clock = of_get_property(np, "clock-frequency", &size);
+		if (!clock || size != sizeof(*clock)) {
+			pr_err("%pOF: no clock-frequency\n", np);
+			goto err;
+		}
+		gtm->clock = *clock;
+
+		for (i = 0; i < ARRAY_SIZE(gtm->timers); i++) {
+			unsigned int irq;
+
+			irq = irq_of_parse_and_map(np, i);
+			if (!irq) {
+				pr_err("%pOF: not enough interrupts specified\n",
+				       np);
+				goto err;
+			}
+			gtm->timers[i].irq = irq;
+			gtm->timers[i].gtm = gtm;
+		}
+
+		gtm->regs = of_iomap(np, 0);
+		if (!gtm->regs) {
+			pr_err("%pOF: unable to iomap registers\n",
+			       np);
+			goto err;
+		}
+
+		gtm_set_shortcuts(np, gtm->timers, gtm->regs);
+		list_add(&gtm->list_node, &gtms);
+
+		/* We don't want to lose the node and its ->data */
+		np->data = gtm;
+		of_node_get(np);
+
+		continue;
+err:
+		kfree(gtm);
+	}
+	return 0;
+}
+arch_initcall(fsl_gtm_init);
diff --git a/arch/powerpc/sysdev/fsl_lbc.c b/arch/powerpc/sysdev/fsl_lbc.c
new file mode 100644
index 0000000000..217cea1509
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_lbc.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale LBC and UPM routines.
+ *
+ * Copyright © 2007-2008  MontaVista Software, Inc.
+ * Copyright © 2010 Freescale Semiconductor
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ * Author: Jack Lan <Jack.Lan@freescale.com>
+ * Author: Roy Zang <tie-fei.zang@freescale.com>
+ */
+
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
+#include <linux/syscore_ops.h>
+#include <asm/fsl_lbc.h>
+
+static DEFINE_SPINLOCK(fsl_lbc_lock);
+struct fsl_lbc_ctrl *fsl_lbc_ctrl_dev;
+EXPORT_SYMBOL(fsl_lbc_ctrl_dev);
+
+/**
+ * fsl_lbc_addr - convert the base address
+ * @addr_base:	base address of the memory bank
+ *
+ * This function converts a base address of lbc into the right format for the
+ * BR register. If the SOC has eLBC then it returns 32bit physical address
+ * else it converts a 34bit local bus physical address to correct format of
+ * 32bit address for BR register (Example: MPC8641).
+ */
+u32 fsl_lbc_addr(phys_addr_t addr_base)
+{
+	struct device_node *np = fsl_lbc_ctrl_dev->dev->of_node;
+	u32 addr = addr_base & 0xffff8000;
+
+	if (of_device_is_compatible(np, "fsl,elbc"))
+		return addr;
+
+	return addr | ((addr_base & 0x300000000ull) >> 19);
+}
+EXPORT_SYMBOL(fsl_lbc_addr);
+
+/**
+ * fsl_lbc_find - find Localbus bank
+ * @addr_base:	base address of the memory bank
+ *
+ * This function walks LBC banks comparing "Base address" field of the BR
+ * registers with the supplied addr_base argument. When bases match this
+ * function returns bank number (starting with 0), otherwise it returns
+ * appropriate errno value.
+ */
+int fsl_lbc_find(phys_addr_t addr_base)
+{
+	int i;
+	struct fsl_lbc_regs __iomem *lbc;
+
+	if (!fsl_lbc_ctrl_dev || !fsl_lbc_ctrl_dev->regs)
+		return -ENODEV;
+
+	lbc = fsl_lbc_ctrl_dev->regs;
+	for (i = 0; i < ARRAY_SIZE(lbc->bank); i++) {
+		u32 br = in_be32(&lbc->bank[i].br);
+		u32 or = in_be32(&lbc->bank[i].or);
+
+		if (br & BR_V && (br & or & BR_BA) == fsl_lbc_addr(addr_base))
+			return i;
+	}
+
+	return -ENOENT;
+}
+EXPORT_SYMBOL(fsl_lbc_find);
+
+/**
+ * fsl_upm_find - find pre-programmed UPM via base address
+ * @addr_base:	base address of the memory bank controlled by the UPM
+ * @upm:	pointer to the allocated fsl_upm structure
+ *
+ * This function fills fsl_upm structure so you can use it with the rest of
+ * UPM API. On success this function returns 0, otherwise it returns
+ * appropriate errno value.
+ */
+int fsl_upm_find(phys_addr_t addr_base, struct fsl_upm *upm)
+{
+	int bank;
+	u32 br;
+	struct fsl_lbc_regs __iomem *lbc;
+
+	bank = fsl_lbc_find(addr_base);
+	if (bank < 0)
+		return bank;
+
+	if (!fsl_lbc_ctrl_dev || !fsl_lbc_ctrl_dev->regs)
+		return -ENODEV;
+
+	lbc = fsl_lbc_ctrl_dev->regs;
+	br = in_be32(&lbc->bank[bank].br);
+
+	switch (br & BR_MSEL) {
+	case BR_MS_UPMA:
+		upm->mxmr = &lbc->mamr;
+		break;
+	case BR_MS_UPMB:
+		upm->mxmr = &lbc->mbmr;
+		break;
+	case BR_MS_UPMC:
+		upm->mxmr = &lbc->mcmr;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (br & BR_PS) {
+	case BR_PS_8:
+		upm->width = 8;
+		break;
+	case BR_PS_16:
+		upm->width = 16;
+		break;
+	case BR_PS_32:
+		upm->width = 32;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(fsl_upm_find);
+
+/**
+ * fsl_upm_run_pattern - actually run an UPM pattern
+ * @upm:	pointer to the fsl_upm structure obtained via fsl_upm_find
+ * @io_base:	remapped pointer to where memory access should happen
+ * @mar:	MAR register content during pattern execution
+ *
+ * This function triggers dummy write to the memory specified by the io_base,
+ * thus UPM pattern actually executed. Note that mar usage depends on the
+ * pre-programmed AMX bits in the UPM RAM.
+ */
+int fsl_upm_run_pattern(struct fsl_upm *upm, void __iomem *io_base, u32 mar)
+{
+	int ret = 0;
+	unsigned long flags;
+
+	if (!fsl_lbc_ctrl_dev || !fsl_lbc_ctrl_dev->regs)
+		return -ENODEV;
+
+	spin_lock_irqsave(&fsl_lbc_lock, flags);
+
+	out_be32(&fsl_lbc_ctrl_dev->regs->mar, mar);
+
+	switch (upm->width) {
+	case 8:
+		out_8(io_base, 0x0);
+		break;
+	case 16:
+		out_be16(io_base, 0x0);
+		break;
+	case 32:
+		out_be32(io_base, 0x0);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	spin_unlock_irqrestore(&fsl_lbc_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(fsl_upm_run_pattern);
+
+static int fsl_lbc_ctrl_init(struct fsl_lbc_ctrl *ctrl,
+			     struct device_node *node)
+{
+	struct fsl_lbc_regs __iomem *lbc = ctrl->regs;
+
+	/* clear event registers */
+	setbits32(&lbc->ltesr, LTESR_CLEAR);
+	out_be32(&lbc->lteatr, 0);
+	out_be32(&lbc->ltear, 0);
+	out_be32(&lbc->lteccr, LTECCR_CLEAR);
+	out_be32(&lbc->ltedr, LTEDR_ENABLE);
+
+	/* Set the monitor timeout value to the maximum for erratum A001 */
+	if (of_device_is_compatible(node, "fsl,elbc"))
+		clrsetbits_be32(&lbc->lbcr, LBCR_BMT, LBCR_BMTPS);
+
+	return 0;
+}
+
+/*
+ * NOTE: This interrupt is used to report localbus events of various kinds,
+ * such as transaction errors on the chipselects.
+ */
+
+static irqreturn_t fsl_lbc_ctrl_irq(int irqno, void *data)
+{
+	struct fsl_lbc_ctrl *ctrl = data;
+	struct fsl_lbc_regs __iomem *lbc = ctrl->regs;
+	u32 status;
+	unsigned long flags;
+
+	spin_lock_irqsave(&fsl_lbc_lock, flags);
+	status = in_be32(&lbc->ltesr);
+	if (!status) {
+		spin_unlock_irqrestore(&fsl_lbc_lock, flags);
+		return IRQ_NONE;
+	}
+
+	out_be32(&lbc->ltesr, LTESR_CLEAR);
+	out_be32(&lbc->lteatr, 0);
+	out_be32(&lbc->ltear, 0);
+	ctrl->irq_status = status;
+
+	if (status & LTESR_BM)
+		dev_err(ctrl->dev, "Local bus monitor time-out: "
+			"LTESR 0x%08X\n", status);
+	if (status & LTESR_WP)
+		dev_err(ctrl->dev, "Write protect error: "
+			"LTESR 0x%08X\n", status);
+	if (status & LTESR_ATMW)
+		dev_err(ctrl->dev, "Atomic write error: "
+			"LTESR 0x%08X\n", status);
+	if (status & LTESR_ATMR)
+		dev_err(ctrl->dev, "Atomic read error: "
+			"LTESR 0x%08X\n", status);
+	if (status & LTESR_CS)
+		dev_err(ctrl->dev, "Chip select error: "
+			"LTESR 0x%08X\n", status);
+	if (status & LTESR_FCT) {
+		dev_err(ctrl->dev, "FCM command time-out: "
+			"LTESR 0x%08X\n", status);
+		smp_wmb();
+		wake_up(&ctrl->irq_wait);
+	}
+	if (status & LTESR_PAR) {
+		dev_err(ctrl->dev, "Parity or Uncorrectable ECC error: "
+			"LTESR 0x%08X\n", status);
+		smp_wmb();
+		wake_up(&ctrl->irq_wait);
+	}
+	if (status & LTESR_CC) {
+		smp_wmb();
+		wake_up(&ctrl->irq_wait);
+	}
+	if (status & ~LTESR_MASK)
+		dev_err(ctrl->dev, "Unknown error: "
+			"LTESR 0x%08X\n", status);
+	spin_unlock_irqrestore(&fsl_lbc_lock, flags);
+	return IRQ_HANDLED;
+}
+
+/*
+ * fsl_lbc_ctrl_probe
+ *
+ * called by device layer when it finds a device matching
+ * one our driver can handled. This code allocates all of
+ * the resources needed for the controller only.  The
+ * resources for the NAND banks themselves are allocated
+ * in the chip probe function.
+*/
+
+static int fsl_lbc_ctrl_probe(struct platform_device *dev)
+{
+	int ret;
+
+	if (!dev->dev.of_node) {
+		dev_err(&dev->dev, "Device OF-Node is NULL");
+		return -EFAULT;
+	}
+
+	fsl_lbc_ctrl_dev = kzalloc(sizeof(*fsl_lbc_ctrl_dev), GFP_KERNEL);
+	if (!fsl_lbc_ctrl_dev)
+		return -ENOMEM;
+
+	dev_set_drvdata(&dev->dev, fsl_lbc_ctrl_dev);
+
+	spin_lock_init(&fsl_lbc_ctrl_dev->lock);
+	init_waitqueue_head(&fsl_lbc_ctrl_dev->irq_wait);
+
+	fsl_lbc_ctrl_dev->regs = of_iomap(dev->dev.of_node, 0);
+	if (!fsl_lbc_ctrl_dev->regs) {
+		dev_err(&dev->dev, "failed to get memory region\n");
+		ret = -ENODEV;
+		goto err;
+	}
+
+	fsl_lbc_ctrl_dev->irq[0] = irq_of_parse_and_map(dev->dev.of_node, 0);
+	if (!fsl_lbc_ctrl_dev->irq[0]) {
+		dev_err(&dev->dev, "failed to get irq resource\n");
+		ret = -ENODEV;
+		goto err;
+	}
+
+	fsl_lbc_ctrl_dev->dev = &dev->dev;
+
+	ret = fsl_lbc_ctrl_init(fsl_lbc_ctrl_dev, dev->dev.of_node);
+	if (ret < 0)
+		goto err;
+
+	ret = request_irq(fsl_lbc_ctrl_dev->irq[0], fsl_lbc_ctrl_irq, 0,
+				"fsl-lbc", fsl_lbc_ctrl_dev);
+	if (ret != 0) {
+		dev_err(&dev->dev, "failed to install irq (%d)\n",
+			fsl_lbc_ctrl_dev->irq[0]);
+		ret = fsl_lbc_ctrl_dev->irq[0];
+		goto err;
+	}
+
+	fsl_lbc_ctrl_dev->irq[1] = irq_of_parse_and_map(dev->dev.of_node, 1);
+	if (fsl_lbc_ctrl_dev->irq[1]) {
+		ret = request_irq(fsl_lbc_ctrl_dev->irq[1], fsl_lbc_ctrl_irq,
+				IRQF_SHARED, "fsl-lbc-err", fsl_lbc_ctrl_dev);
+		if (ret) {
+			dev_err(&dev->dev, "failed to install irq (%d)\n",
+					fsl_lbc_ctrl_dev->irq[1]);
+			ret = fsl_lbc_ctrl_dev->irq[1];
+			goto err1;
+		}
+	}
+
+	/* Enable interrupts for any detected events */
+	out_be32(&fsl_lbc_ctrl_dev->regs->lteir, LTEIR_ENABLE);
+
+	return 0;
+
+err1:
+	free_irq(fsl_lbc_ctrl_dev->irq[0], fsl_lbc_ctrl_dev);
+err:
+	iounmap(fsl_lbc_ctrl_dev->regs);
+	kfree(fsl_lbc_ctrl_dev);
+	fsl_lbc_ctrl_dev = NULL;
+	return ret;
+}
+
+#ifdef CONFIG_SUSPEND
+
+/* save lbc registers */
+static int fsl_lbc_syscore_suspend(void)
+{
+	struct fsl_lbc_ctrl *ctrl;
+	struct fsl_lbc_regs __iomem *lbc;
+
+	ctrl = fsl_lbc_ctrl_dev;
+	if (!ctrl)
+		goto out;
+
+	lbc = ctrl->regs;
+	if (!lbc)
+		goto out;
+
+	ctrl->saved_regs = kmalloc(sizeof(struct fsl_lbc_regs), GFP_KERNEL);
+	if (!ctrl->saved_regs)
+		return -ENOMEM;
+
+	_memcpy_fromio(ctrl->saved_regs, lbc, sizeof(struct fsl_lbc_regs));
+
+out:
+	return 0;
+}
+
+/* restore lbc registers */
+static void fsl_lbc_syscore_resume(void)
+{
+	struct fsl_lbc_ctrl *ctrl;
+	struct fsl_lbc_regs __iomem *lbc;
+
+	ctrl = fsl_lbc_ctrl_dev;
+	if (!ctrl)
+		goto out;
+
+	lbc = ctrl->regs;
+	if (!lbc)
+		goto out;
+
+	if (ctrl->saved_regs) {
+		_memcpy_toio(lbc, ctrl->saved_regs,
+				sizeof(struct fsl_lbc_regs));
+		kfree(ctrl->saved_regs);
+		ctrl->saved_regs = NULL;
+	}
+
+out:
+	return;
+}
+#endif /* CONFIG_SUSPEND */
+
+static const struct of_device_id fsl_lbc_match[] = {
+	{ .compatible = "fsl,elbc", },
+	{ .compatible = "fsl,pq3-localbus", },
+	{ .compatible = "fsl,pq2-localbus", },
+	{ .compatible = "fsl,pq2pro-localbus", },
+	{},
+};
+
+#ifdef CONFIG_SUSPEND
+static struct syscore_ops lbc_syscore_pm_ops = {
+	.suspend = fsl_lbc_syscore_suspend,
+	.resume = fsl_lbc_syscore_resume,
+};
+#endif
+
+static struct platform_driver fsl_lbc_ctrl_driver = {
+	.driver = {
+		.name = "fsl-lbc",
+		.of_match_table = fsl_lbc_match,
+	},
+	.probe = fsl_lbc_ctrl_probe,
+};
+
+static int __init fsl_lbc_init(void)
+{
+#ifdef CONFIG_SUSPEND
+	register_syscore_ops(&lbc_syscore_pm_ops);
+#endif
+	return platform_driver_register(&fsl_lbc_ctrl_driver);
+}
+subsys_initcall(fsl_lbc_init);
diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c b/arch/powerpc/sysdev/fsl_mpic_err.c
new file mode 100644
index 0000000000..df06bb6b83
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_mpic_err.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ *
+ * Author: Varun Sethi <varun.sethi@freescale.com>
+ */
+
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/mpic.h>
+
+#include "mpic.h"
+
+#define MPIC_ERR_INT_BASE	0x3900
+#define MPIC_ERR_INT_EISR	0x0000
+#define MPIC_ERR_INT_EIMR	0x0010
+
+static inline u32 mpic_fsl_err_read(u32 __iomem *base, unsigned int err_reg)
+{
+	return in_be32(base + (err_reg >> 2));
+}
+
+static inline void mpic_fsl_err_write(u32 __iomem *base, u32 value)
+{
+	out_be32(base + (MPIC_ERR_INT_EIMR >> 2), value);
+}
+
+static void fsl_mpic_mask_err(struct irq_data *d)
+{
+	u32 eimr;
+	struct mpic *mpic = irq_data_get_irq_chip_data(d);
+	unsigned int src = virq_to_hw(d->irq) - mpic->err_int_vecs[0];
+
+	eimr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EIMR);
+	eimr |= (1 << (31 - src));
+	mpic_fsl_err_write(mpic->err_regs, eimr);
+}
+
+static void fsl_mpic_unmask_err(struct irq_data *d)
+{
+	u32 eimr;
+	struct mpic *mpic = irq_data_get_irq_chip_data(d);
+	unsigned int src = virq_to_hw(d->irq) - mpic->err_int_vecs[0];
+
+	eimr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EIMR);
+	eimr &= ~(1 << (31 - src));
+	mpic_fsl_err_write(mpic->err_regs, eimr);
+}
+
+static struct irq_chip fsl_mpic_err_chip = {
+	.irq_disable	= fsl_mpic_mask_err,
+	.irq_mask	= fsl_mpic_mask_err,
+	.irq_unmask	= fsl_mpic_unmask_err,
+};
+
+int __init mpic_setup_error_int(struct mpic *mpic, int intvec)
+{
+	int i;
+
+	mpic->err_regs = ioremap(mpic->paddr + MPIC_ERR_INT_BASE, 0x1000);
+	if (!mpic->err_regs) {
+		pr_err("could not map mpic error registers\n");
+		return -ENOMEM;
+	}
+	mpic->hc_err = fsl_mpic_err_chip;
+	mpic->hc_err.name = mpic->name;
+	mpic->flags |= MPIC_FSL_HAS_EIMR;
+	/* allocate interrupt vectors for error interrupts */
+	for (i = MPIC_MAX_ERR - 1; i >= 0; i--)
+		mpic->err_int_vecs[i] = intvec--;
+
+	return 0;
+}
+
+int mpic_map_error_int(struct mpic *mpic, unsigned int virq, irq_hw_number_t  hw)
+{
+	if ((mpic->flags & MPIC_FSL_HAS_EIMR) &&
+	    (hw >= mpic->err_int_vecs[0] &&
+	     hw <= mpic->err_int_vecs[MPIC_MAX_ERR - 1])) {
+		WARN_ON(mpic->flags & MPIC_SECONDARY);
+
+		pr_debug("mpic: mapping as Error Interrupt\n");
+		irq_set_chip_data(virq, mpic);
+		irq_set_chip_and_handler(virq, &mpic->hc_err,
+					 handle_level_irq);
+		return 1;
+	}
+
+	return 0;
+}
+
+static irqreturn_t fsl_error_int_handler(int irq, void *data)
+{
+	struct mpic *mpic = (struct mpic *) data;
+	u32 eisr, eimr;
+	int errint;
+
+	eisr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EISR);
+	eimr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EIMR);
+
+	if (!(eisr & ~eimr))
+		return IRQ_NONE;
+
+	while (eisr) {
+		int ret;
+		errint = __builtin_clz(eisr);
+		ret = generic_handle_domain_irq(mpic->irqhost,
+						mpic->err_int_vecs[errint]);
+		if (WARN_ON(ret)) {
+			eimr |=  1 << (31 - errint);
+			mpic_fsl_err_write(mpic->err_regs, eimr);
+		}
+		eisr &= ~(1 << (31 - errint));
+	}
+
+	return IRQ_HANDLED;
+}
+
+void __init mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum)
+{
+	unsigned int virq;
+	int ret;
+
+	virq = irq_create_mapping(mpic->irqhost, irqnum);
+	if (!virq) {
+		pr_err("Error interrupt setup failed\n");
+		return;
+	}
+
+	/* Mask all error interrupts */
+	mpic_fsl_err_write(mpic->err_regs, ~0);
+
+	ret = request_irq(virq, fsl_error_int_handler, IRQF_NO_THREAD,
+		    "mpic-error-int", mpic);
+	if (ret)
+		pr_err("Failed to register error interrupt handler\n");
+}
diff --git a/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c
new file mode 100644
index 0000000000..ce6c739c51
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPIC timer wakeup driver
+ *
+ * Copyright 2013 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+
+#include <asm/mpic_timer.h>
+#include <asm/mpic.h>
+
+struct fsl_mpic_timer_wakeup {
+	struct mpic_timer *timer;
+	struct work_struct free_work;
+};
+
+static struct fsl_mpic_timer_wakeup *fsl_wakeup;
+static DEFINE_MUTEX(sysfs_lock);
+
+static void fsl_free_resource(struct work_struct *ws)
+{
+	struct fsl_mpic_timer_wakeup *wakeup =
+		container_of(ws, struct fsl_mpic_timer_wakeup, free_work);
+
+	mutex_lock(&sysfs_lock);
+
+	if (wakeup->timer) {
+		disable_irq_wake(wakeup->timer->irq);
+		mpic_free_timer(wakeup->timer);
+	}
+
+	wakeup->timer = NULL;
+	mutex_unlock(&sysfs_lock);
+}
+
+static irqreturn_t fsl_mpic_timer_irq(int irq, void *dev_id)
+{
+	struct fsl_mpic_timer_wakeup *wakeup = dev_id;
+
+	schedule_work(&wakeup->free_work);
+
+	return wakeup->timer ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static ssize_t fsl_timer_wakeup_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	time64_t interval = 0;
+
+	mutex_lock(&sysfs_lock);
+	if (fsl_wakeup->timer) {
+		mpic_get_remain_time(fsl_wakeup->timer, &interval);
+		interval++;
+	}
+	mutex_unlock(&sysfs_lock);
+
+	return sprintf(buf, "%lld\n", interval);
+}
+
+static ssize_t fsl_timer_wakeup_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf,
+				size_t count)
+{
+	time64_t interval;
+	int ret;
+
+	if (kstrtoll(buf, 0, &interval))
+		return -EINVAL;
+
+	mutex_lock(&sysfs_lock);
+
+	if (fsl_wakeup->timer) {
+		disable_irq_wake(fsl_wakeup->timer->irq);
+		mpic_free_timer(fsl_wakeup->timer);
+		fsl_wakeup->timer = NULL;
+	}
+
+	if (!interval) {
+		mutex_unlock(&sysfs_lock);
+		return count;
+	}
+
+	fsl_wakeup->timer = mpic_request_timer(fsl_mpic_timer_irq,
+						fsl_wakeup, interval);
+	if (!fsl_wakeup->timer) {
+		mutex_unlock(&sysfs_lock);
+		return -EINVAL;
+	}
+
+	ret = enable_irq_wake(fsl_wakeup->timer->irq);
+	if (ret) {
+		mpic_free_timer(fsl_wakeup->timer);
+		fsl_wakeup->timer = NULL;
+		mutex_unlock(&sysfs_lock);
+
+		return ret;
+	}
+
+	mpic_start_timer(fsl_wakeup->timer);
+
+	mutex_unlock(&sysfs_lock);
+
+	return count;
+}
+
+static struct device_attribute mpic_attributes = __ATTR(timer_wakeup, 0644,
+			fsl_timer_wakeup_show, fsl_timer_wakeup_store);
+
+static int __init fsl_wakeup_sys_init(void)
+{
+	struct device *dev_root;
+	int ret = -EINVAL;
+
+	fsl_wakeup = kzalloc(sizeof(struct fsl_mpic_timer_wakeup), GFP_KERNEL);
+	if (!fsl_wakeup)
+		return -ENOMEM;
+
+	INIT_WORK(&fsl_wakeup->free_work, fsl_free_resource);
+
+	dev_root = bus_get_dev_root(&mpic_subsys);
+	if (dev_root) {
+		ret = device_create_file(dev_root, &mpic_attributes);
+		put_device(dev_root);
+		if (ret)
+			kfree(fsl_wakeup);
+	}
+
+	return ret;
+}
+
+static void __exit fsl_wakeup_sys_exit(void)
+{
+	struct device *dev_root;
+
+	dev_root = bus_get_dev_root(&mpic_subsys);
+	if (dev_root) {
+		device_remove_file(dev_root, &mpic_attributes);
+		put_device(dev_root);
+	}
+
+	mutex_lock(&sysfs_lock);
+
+	if (fsl_wakeup->timer) {
+		disable_irq_wake(fsl_wakeup->timer->irq);
+		mpic_free_timer(fsl_wakeup->timer);
+	}
+
+	kfree(fsl_wakeup);
+
+	mutex_unlock(&sysfs_lock);
+}
+
+module_init(fsl_wakeup_sys_init);
+module_exit(fsl_wakeup_sys_exit);
+
+MODULE_DESCRIPTION("Freescale MPIC global timer wakeup driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Wang Dongsheng <dongsheng.wang@freescale.com>");
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
new file mode 100644
index 0000000000..57978a44d5
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -0,0 +1,616 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2007-2011 Freescale Semiconductor, Inc.
+ *
+ * Author: Tony Li <tony.li@freescale.com>
+ *	   Jason Jin <Jason.jin@freescale.com>
+ *
+ * The hwirq alloc and free code reuse from sysdev/mpic_msi.c
+ */
+#include <linux/irq.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/seq_file.h>
+#include <sysdev/fsl_soc.h>
+#include <asm/hw_irq.h>
+#include <asm/ppc-pci.h>
+#include <asm/mpic.h>
+#include <asm/fsl_hcalls.h>
+
+#include "fsl_msi.h"
+#include "fsl_pci.h"
+
+#define MSIIR_OFFSET_MASK	0xfffff
+#define MSIIR_IBS_SHIFT		0
+#define MSIIR_SRS_SHIFT		5
+#define MSIIR1_IBS_SHIFT	4
+#define MSIIR1_SRS_SHIFT	0
+#define MSI_SRS_MASK		0xf
+#define MSI_IBS_MASK		0x1f
+
+#define msi_hwirq(msi, msir_index, intr_index) \
+		((msir_index) << (msi)->srs_shift | \
+		 ((intr_index) << (msi)->ibs_shift))
+
+static LIST_HEAD(msi_head);
+
+struct fsl_msi_feature {
+	u32 fsl_pic_ip;
+	u32 msiir_offset; /* Offset of MSIIR, relative to start of MSIR bank */
+};
+
+struct fsl_msi_cascade_data {
+	struct fsl_msi *msi_data;
+	int index;
+	int virq;
+};
+
+static inline u32 fsl_msi_read(u32 __iomem *base, unsigned int reg)
+{
+	return in_be32(base + (reg >> 2));
+}
+
+/*
+ * We do not need this actually. The MSIR register has been read once
+ * in the cascade interrupt. So, this MSI interrupt has been acked
+*/
+static void fsl_msi_end_irq(struct irq_data *d)
+{
+}
+
+static void fsl_msi_print_chip(struct irq_data *irqd, struct seq_file *p)
+{
+	struct fsl_msi *msi_data = irqd->domain->host_data;
+	irq_hw_number_t hwirq = irqd_to_hwirq(irqd);
+	int cascade_virq, srs;
+
+	srs = (hwirq >> msi_data->srs_shift) & MSI_SRS_MASK;
+	cascade_virq = msi_data->cascade_array[srs]->virq;
+
+	seq_printf(p, " fsl-msi-%d", cascade_virq);
+}
+
+
+static struct irq_chip fsl_msi_chip = {
+	.irq_mask	= pci_msi_mask_irq,
+	.irq_unmask	= pci_msi_unmask_irq,
+	.irq_ack	= fsl_msi_end_irq,
+	.irq_print_chip = fsl_msi_print_chip,
+};
+
+static int fsl_msi_host_map(struct irq_domain *h, unsigned int virq,
+				irq_hw_number_t hw)
+{
+	struct fsl_msi *msi_data = h->host_data;
+	struct irq_chip *chip = &fsl_msi_chip;
+
+	irq_set_status_flags(virq, IRQ_TYPE_EDGE_FALLING);
+
+	irq_set_chip_data(virq, msi_data);
+	irq_set_chip_and_handler(virq, chip, handle_edge_irq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops fsl_msi_host_ops = {
+	.map = fsl_msi_host_map,
+};
+
+static int fsl_msi_init_allocator(struct fsl_msi *msi_data)
+{
+	int rc, hwirq;
+
+	rc = msi_bitmap_alloc(&msi_data->bitmap, NR_MSI_IRQS_MAX,
+			      irq_domain_get_of_node(msi_data->irqhost));
+	if (rc)
+		return rc;
+
+	/*
+	 * Reserve all the hwirqs
+	 * The available hwirqs will be released in fsl_msi_setup_hwirq()
+	 */
+	for (hwirq = 0; hwirq < NR_MSI_IRQS_MAX; hwirq++)
+		msi_bitmap_reserve_hwirq(&msi_data->bitmap, hwirq);
+
+	return 0;
+}
+
+static void fsl_teardown_msi_irqs(struct pci_dev *pdev)
+{
+	struct msi_desc *entry;
+	struct fsl_msi *msi_data;
+	irq_hw_number_t hwirq;
+
+	msi_for_each_desc(entry, &pdev->dev, MSI_DESC_ASSOCIATED) {
+		hwirq = virq_to_hw(entry->irq);
+		msi_data = irq_get_chip_data(entry->irq);
+		irq_set_msi_desc(entry->irq, NULL);
+		irq_dispose_mapping(entry->irq);
+		entry->irq = 0;
+		msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1);
+	}
+}
+
+static void fsl_compose_msi_msg(struct pci_dev *pdev, int hwirq,
+				struct msi_msg *msg,
+				struct fsl_msi *fsl_msi_data)
+{
+	struct fsl_msi *msi_data = fsl_msi_data;
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+	u64 address; /* Physical address of the MSIIR */
+	int len;
+	const __be64 *reg;
+
+	/* If the msi-address-64 property exists, then use it */
+	reg = of_get_property(hose->dn, "msi-address-64", &len);
+	if (reg && (len == sizeof(u64)))
+		address = be64_to_cpup(reg);
+	else
+		address = fsl_pci_immrbar_base(hose) + msi_data->msiir_offset;
+
+	msg->address_lo = lower_32_bits(address);
+	msg->address_hi = upper_32_bits(address);
+
+	/*
+	 * MPIC version 2.0 has erratum PIC1. It causes
+	 * that neither MSI nor MSI-X can work fine.
+	 * This is a workaround to allow MSI-X to function
+	 * properly. It only works for MSI-X, we prevent
+	 * MSI on buggy chips in fsl_setup_msi_irqs().
+	 */
+	if (msi_data->feature & MSI_HW_ERRATA_ENDIAN)
+		msg->data = __swab32(hwirq);
+	else
+		msg->data = hwirq;
+
+	pr_debug("%s: allocated srs: %d, ibs: %d\n", __func__,
+		 (hwirq >> msi_data->srs_shift) & MSI_SRS_MASK,
+		 (hwirq >> msi_data->ibs_shift) & MSI_IBS_MASK);
+}
+
+static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+	struct device_node *np;
+	phandle phandle = 0;
+	int rc, hwirq = -ENOMEM;
+	unsigned int virq;
+	struct msi_desc *entry;
+	struct msi_msg msg;
+	struct fsl_msi *msi_data;
+
+	if (type == PCI_CAP_ID_MSI) {
+		/*
+		 * MPIC version 2.0 has erratum PIC1. For now MSI
+		 * could not work. So check to prevent MSI from
+		 * being used on the board with this erratum.
+		 */
+		list_for_each_entry(msi_data, &msi_head, list)
+			if (msi_data->feature & MSI_HW_ERRATA_ENDIAN)
+				return -EINVAL;
+	}
+
+	/*
+	 * If the PCI node has an fsl,msi property, then we need to use it
+	 * to find the specific MSI.
+	 */
+	np = of_parse_phandle(hose->dn, "fsl,msi", 0);
+	if (np) {
+		if (of_device_is_compatible(np, "fsl,mpic-msi") ||
+		    of_device_is_compatible(np, "fsl,vmpic-msi") ||
+		    of_device_is_compatible(np, "fsl,vmpic-msi-v4.3"))
+			phandle = np->phandle;
+		else {
+			dev_err(&pdev->dev,
+				"node %pOF has an invalid fsl,msi phandle %u\n",
+				hose->dn, np->phandle);
+			of_node_put(np);
+			return -EINVAL;
+		}
+		of_node_put(np);
+	}
+
+	msi_for_each_desc(entry, &pdev->dev, MSI_DESC_NOTASSOCIATED) {
+		/*
+		 * Loop over all the MSI devices until we find one that has an
+		 * available interrupt.
+		 */
+		list_for_each_entry(msi_data, &msi_head, list) {
+			/*
+			 * If the PCI node has an fsl,msi property, then we
+			 * restrict our search to the corresponding MSI node.
+			 * The simplest way is to skip over MSI nodes with the
+			 * wrong phandle. Under the Freescale hypervisor, this
+			 * has the additional benefit of skipping over MSI
+			 * nodes that are not mapped in the PAMU.
+			 */
+			if (phandle && (phandle != msi_data->phandle))
+				continue;
+
+			hwirq = msi_bitmap_alloc_hwirqs(&msi_data->bitmap, 1);
+			if (hwirq >= 0)
+				break;
+		}
+
+		if (hwirq < 0) {
+			rc = hwirq;
+			dev_err(&pdev->dev, "could not allocate MSI interrupt\n");
+			goto out_free;
+		}
+
+		virq = irq_create_mapping(msi_data->irqhost, hwirq);
+
+		if (!virq) {
+			dev_err(&pdev->dev, "fail mapping hwirq %i\n", hwirq);
+			msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1);
+			rc = -ENOSPC;
+			goto out_free;
+		}
+		/* chip_data is msi_data via host->hostdata in host->map() */
+		irq_set_msi_desc(virq, entry);
+
+		fsl_compose_msi_msg(pdev, hwirq, &msg, msi_data);
+		pci_write_msi_msg(virq, &msg);
+	}
+	return 0;
+
+out_free:
+	/* free by the caller of this function */
+	return rc;
+}
+
+static irqreturn_t fsl_msi_cascade(int irq, void *data)
+{
+	struct fsl_msi *msi_data;
+	int msir_index = -1;
+	u32 msir_value = 0;
+	u32 intr_index;
+	u32 have_shift = 0;
+	struct fsl_msi_cascade_data *cascade_data = data;
+	irqreturn_t ret = IRQ_NONE;
+
+	msi_data = cascade_data->msi_data;
+
+	msir_index = cascade_data->index;
+
+	switch (msi_data->feature & FSL_PIC_IP_MASK) {
+	case FSL_PIC_IP_MPIC:
+		msir_value = fsl_msi_read(msi_data->msi_regs,
+			msir_index * 0x10);
+		break;
+	case FSL_PIC_IP_IPIC:
+		msir_value = fsl_msi_read(msi_data->msi_regs, msir_index * 0x4);
+		break;
+#ifdef CONFIG_EPAPR_PARAVIRT
+	case FSL_PIC_IP_VMPIC: {
+		unsigned int ret;
+		ret = fh_vmpic_get_msir(virq_to_hw(irq), &msir_value);
+		if (ret) {
+			pr_err("fsl-msi: fh_vmpic_get_msir() failed for "
+			       "irq %u (ret=%u)\n", irq, ret);
+			msir_value = 0;
+		}
+		break;
+	}
+#endif
+	}
+
+	while (msir_value) {
+		int err;
+		intr_index = ffs(msir_value) - 1;
+
+		err = generic_handle_domain_irq(msi_data->irqhost,
+				msi_hwirq(msi_data, msir_index,
+					  intr_index + have_shift));
+		if (!err)
+			ret = IRQ_HANDLED;
+
+		have_shift += intr_index + 1;
+		msir_value = msir_value >> (intr_index + 1);
+	}
+
+	return ret;
+}
+
+static int fsl_of_msi_remove(struct platform_device *ofdev)
+{
+	struct fsl_msi *msi = platform_get_drvdata(ofdev);
+	int virq, i;
+
+	if (msi->list.prev != NULL)
+		list_del(&msi->list);
+	for (i = 0; i < NR_MSI_REG_MAX; i++) {
+		if (msi->cascade_array[i]) {
+			virq = msi->cascade_array[i]->virq;
+
+			BUG_ON(!virq);
+
+			free_irq(virq, msi->cascade_array[i]);
+			kfree(msi->cascade_array[i]);
+			irq_dispose_mapping(virq);
+		}
+	}
+	if (msi->bitmap.bitmap)
+		msi_bitmap_free(&msi->bitmap);
+	if ((msi->feature & FSL_PIC_IP_MASK) != FSL_PIC_IP_VMPIC)
+		iounmap(msi->msi_regs);
+	kfree(msi);
+
+	return 0;
+}
+
+static struct lock_class_key fsl_msi_irq_class;
+static struct lock_class_key fsl_msi_irq_request_class;
+
+static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
+			       int offset, int irq_index)
+{
+	struct fsl_msi_cascade_data *cascade_data = NULL;
+	int virt_msir, i, ret;
+
+	virt_msir = irq_of_parse_and_map(dev->dev.of_node, irq_index);
+	if (!virt_msir) {
+		dev_err(&dev->dev, "%s: Cannot translate IRQ index %d\n",
+			__func__, irq_index);
+		return 0;
+	}
+
+	cascade_data = kzalloc(sizeof(struct fsl_msi_cascade_data), GFP_KERNEL);
+	if (!cascade_data) {
+		dev_err(&dev->dev, "No memory for MSI cascade data\n");
+		return -ENOMEM;
+	}
+	irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class,
+			      &fsl_msi_irq_request_class);
+	cascade_data->index = offset;
+	cascade_data->msi_data = msi;
+	cascade_data->virq = virt_msir;
+	msi->cascade_array[irq_index] = cascade_data;
+
+	ret = request_irq(virt_msir, fsl_msi_cascade, IRQF_NO_THREAD,
+			  "fsl-msi-cascade", cascade_data);
+	if (ret) {
+		dev_err(&dev->dev, "failed to request_irq(%d), ret = %d\n",
+			virt_msir, ret);
+		return ret;
+	}
+
+	/* Release the hwirqs corresponding to this MSI register */
+	for (i = 0; i < IRQS_PER_MSI_REG; i++)
+		msi_bitmap_free_hwirqs(&msi->bitmap,
+				       msi_hwirq(msi, offset, i), 1);
+
+	return 0;
+}
+
+static const struct of_device_id fsl_of_msi_ids[];
+static int fsl_of_msi_probe(struct platform_device *dev)
+{
+	const struct of_device_id *match;
+	struct fsl_msi *msi;
+	struct resource res, msiir;
+	int err, i, j, irq_index, count;
+	const u32 *p;
+	const struct fsl_msi_feature *features;
+	int len;
+	u32 offset;
+	struct pci_controller *phb;
+
+	match = of_match_device(fsl_of_msi_ids, &dev->dev);
+	if (!match)
+		return -EINVAL;
+	features = match->data;
+
+	printk(KERN_DEBUG "Setting up Freescale MSI support\n");
+
+	msi = kzalloc(sizeof(struct fsl_msi), GFP_KERNEL);
+	if (!msi) {
+		dev_err(&dev->dev, "No memory for MSI structure\n");
+		return -ENOMEM;
+	}
+	platform_set_drvdata(dev, msi);
+
+	msi->irqhost = irq_domain_add_linear(dev->dev.of_node,
+				      NR_MSI_IRQS_MAX, &fsl_msi_host_ops, msi);
+
+	if (msi->irqhost == NULL) {
+		dev_err(&dev->dev, "No memory for MSI irqhost\n");
+		err = -ENOMEM;
+		goto error_out;
+	}
+
+	/*
+	 * Under the Freescale hypervisor, the msi nodes don't have a 'reg'
+	 * property.  Instead, we use hypercalls to access the MSI.
+	 */
+	if ((features->fsl_pic_ip & FSL_PIC_IP_MASK) != FSL_PIC_IP_VMPIC) {
+		err = of_address_to_resource(dev->dev.of_node, 0, &res);
+		if (err) {
+			dev_err(&dev->dev, "invalid resource for node %pOF\n",
+				dev->dev.of_node);
+			goto error_out;
+		}
+
+		msi->msi_regs = ioremap(res.start, resource_size(&res));
+		if (!msi->msi_regs) {
+			err = -ENOMEM;
+			dev_err(&dev->dev, "could not map node %pOF\n",
+				dev->dev.of_node);
+			goto error_out;
+		}
+		msi->msiir_offset =
+			features->msiir_offset + (res.start & 0xfffff);
+
+		/*
+		 * First read the MSIIR/MSIIR1 offset from dts
+		 * On failure use the hardcode MSIIR offset
+		 */
+		if (of_address_to_resource(dev->dev.of_node, 1, &msiir))
+			msi->msiir_offset = features->msiir_offset +
+					    (res.start & MSIIR_OFFSET_MASK);
+		else
+			msi->msiir_offset = msiir.start & MSIIR_OFFSET_MASK;
+	}
+
+	msi->feature = features->fsl_pic_ip;
+
+	/* For erratum PIC1 on MPIC version 2.0*/
+	if ((features->fsl_pic_ip & FSL_PIC_IP_MASK) == FSL_PIC_IP_MPIC
+			&& (fsl_mpic_primary_get_version() == 0x0200))
+		msi->feature |= MSI_HW_ERRATA_ENDIAN;
+
+	/*
+	 * Remember the phandle, so that we can match with any PCI nodes
+	 * that have an "fsl,msi" property.
+	 */
+	msi->phandle = dev->dev.of_node->phandle;
+
+	err = fsl_msi_init_allocator(msi);
+	if (err) {
+		dev_err(&dev->dev, "Error allocating MSI bitmap\n");
+		goto error_out;
+	}
+
+	p = of_get_property(dev->dev.of_node, "msi-available-ranges", &len);
+
+	if (of_device_is_compatible(dev->dev.of_node, "fsl,mpic-msi-v4.3") ||
+	    of_device_is_compatible(dev->dev.of_node, "fsl,vmpic-msi-v4.3")) {
+		msi->srs_shift = MSIIR1_SRS_SHIFT;
+		msi->ibs_shift = MSIIR1_IBS_SHIFT;
+		if (p)
+			dev_warn(&dev->dev, "%s: dose not support msi-available-ranges property\n",
+				__func__);
+
+		for (irq_index = 0; irq_index < NR_MSI_REG_MSIIR1;
+		     irq_index++) {
+			err = fsl_msi_setup_hwirq(msi, dev,
+						  irq_index, irq_index);
+			if (err)
+				goto error_out;
+		}
+	} else {
+		static const u32 all_avail[] =
+			{ 0, NR_MSI_REG_MSIIR * IRQS_PER_MSI_REG };
+
+		msi->srs_shift = MSIIR_SRS_SHIFT;
+		msi->ibs_shift = MSIIR_IBS_SHIFT;
+
+		if (p && len % (2 * sizeof(u32)) != 0) {
+			dev_err(&dev->dev, "%s: Malformed msi-available-ranges property\n",
+				__func__);
+			err = -EINVAL;
+			goto error_out;
+		}
+
+		if (!p) {
+			p = all_avail;
+			len = sizeof(all_avail);
+		}
+
+		for (irq_index = 0, i = 0; i < len / (2 * sizeof(u32)); i++) {
+			if (p[i * 2] % IRQS_PER_MSI_REG ||
+			    p[i * 2 + 1] % IRQS_PER_MSI_REG) {
+				pr_warn("%s: %pOF: msi available range of %u at %u is not IRQ-aligned\n",
+				       __func__, dev->dev.of_node,
+				       p[i * 2 + 1], p[i * 2]);
+				err = -EINVAL;
+				goto error_out;
+			}
+
+			offset = p[i * 2] / IRQS_PER_MSI_REG;
+			count = p[i * 2 + 1] / IRQS_PER_MSI_REG;
+
+			for (j = 0; j < count; j++, irq_index++) {
+				err = fsl_msi_setup_hwirq(msi, dev, offset + j,
+							  irq_index);
+				if (err)
+					goto error_out;
+			}
+		}
+	}
+
+	list_add_tail(&msi->list, &msi_head);
+
+	/*
+	 * Apply the MSI ops to all the controllers.
+	 * It doesn't hurt to reassign the same ops,
+	 * but bail out if we find another MSI driver.
+	 */
+	list_for_each_entry(phb, &hose_list, list_node) {
+		if (!phb->controller_ops.setup_msi_irqs) {
+			phb->controller_ops.setup_msi_irqs = fsl_setup_msi_irqs;
+			phb->controller_ops.teardown_msi_irqs = fsl_teardown_msi_irqs;
+		} else if (phb->controller_ops.setup_msi_irqs != fsl_setup_msi_irqs) {
+			dev_err(&dev->dev, "Different MSI driver already installed!\n");
+			err = -ENODEV;
+			goto error_out;
+		}
+	}
+	return 0;
+error_out:
+	fsl_of_msi_remove(dev);
+	return err;
+}
+
+static const struct fsl_msi_feature mpic_msi_feature = {
+	.fsl_pic_ip = FSL_PIC_IP_MPIC,
+	.msiir_offset = 0x140,
+};
+
+static const struct fsl_msi_feature ipic_msi_feature = {
+	.fsl_pic_ip = FSL_PIC_IP_IPIC,
+	.msiir_offset = 0x38,
+};
+
+static const struct fsl_msi_feature vmpic_msi_feature = {
+	.fsl_pic_ip = FSL_PIC_IP_VMPIC,
+	.msiir_offset = 0,
+};
+
+static const struct of_device_id fsl_of_msi_ids[] = {
+	{
+		.compatible = "fsl,mpic-msi",
+		.data = &mpic_msi_feature,
+	},
+	{
+		.compatible = "fsl,mpic-msi-v4.3",
+		.data = &mpic_msi_feature,
+	},
+	{
+		.compatible = "fsl,ipic-msi",
+		.data = &ipic_msi_feature,
+	},
+#ifdef CONFIG_EPAPR_PARAVIRT
+	{
+		.compatible = "fsl,vmpic-msi",
+		.data = &vmpic_msi_feature,
+	},
+	{
+		.compatible = "fsl,vmpic-msi-v4.3",
+		.data = &vmpic_msi_feature,
+	},
+#endif
+	{}
+};
+
+static struct platform_driver fsl_of_msi_driver = {
+	.driver = {
+		.name = "fsl-msi",
+		.of_match_table = fsl_of_msi_ids,
+	},
+	.probe = fsl_of_msi_probe,
+	.remove = fsl_of_msi_remove,
+};
+
+static __init int fsl_of_msi_init(void)
+{
+	return platform_driver_register(&fsl_of_msi_driver);
+}
+
+subsys_initcall(fsl_of_msi_init);
diff --git a/arch/powerpc/sysdev/fsl_msi.h b/arch/powerpc/sysdev/fsl_msi.h
new file mode 100644
index 0000000000..e2a1bfc7c2
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_msi.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2007-2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Tony Li <tony.li@freescale.com>
+ *	   Jason Jin <Jason.jin@freescale.com>
+ */
+#ifndef _POWERPC_SYSDEV_FSL_MSI_H
+#define _POWERPC_SYSDEV_FSL_MSI_H
+
+#include <linux/of.h>
+#include <asm/msi_bitmap.h>
+
+#define NR_MSI_REG_MSIIR	8  /* MSIIR can index 8 MSI registers */
+#define NR_MSI_REG_MSIIR1	16 /* MSIIR1 can index 16 MSI registers */
+#define NR_MSI_REG_MAX		NR_MSI_REG_MSIIR1
+#define IRQS_PER_MSI_REG	32
+#define NR_MSI_IRQS_MAX	(NR_MSI_REG_MAX * IRQS_PER_MSI_REG)
+
+#define FSL_PIC_IP_MASK   0x0000000F
+#define FSL_PIC_IP_MPIC   0x00000001
+#define FSL_PIC_IP_IPIC   0x00000002
+#define FSL_PIC_IP_VMPIC  0x00000003
+
+#define MSI_HW_ERRATA_ENDIAN 0x00000010
+
+struct fsl_msi_cascade_data;
+
+struct fsl_msi {
+	struct irq_domain *irqhost;
+
+	unsigned long cascade_irq;
+
+	u32 msiir_offset; /* Offset of MSIIR, relative to start of CCSR */
+	u32 ibs_shift; /* Shift of interrupt bit select */
+	u32 srs_shift; /* Shift of the shared interrupt register select */
+	void __iomem *msi_regs;
+	u32 feature;
+	struct fsl_msi_cascade_data *cascade_array[NR_MSI_REG_MAX];
+
+	struct msi_bitmap bitmap;
+
+	struct list_head list;          /* support multiple MSI banks */
+
+	phandle phandle;
+};
+
+#endif /* _POWERPC_SYSDEV_FSL_MSI_H */
+
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
new file mode 100644
index 0000000000..3868483fbe
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -0,0 +1,1367 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC83xx/85xx/86xx PCI/PCIE support routing.
+ *
+ * Copyright 2007-2012 Freescale Semiconductor, Inc.
+ * Copyright 2008-2009 MontaVista Software, Inc.
+ *
+ * Initial author: Xianghua Xiao <x.xiao@freescale.com>
+ * Recode: ZHANG WEI <wei.zhang@freescale.com>
+ * Rewrite the routing for Frescale PCI and PCI Express
+ * 	Roy Zang <tie-fei.zang@freescale.com>
+ * MPC83xx PCI-Express support:
+ * 	Tony Li <tony.li@freescale.com>
+ * 	Anton Vorontsov <avorontsov@ru.mvista.com>
+ */
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/fsl/edac.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/memblock.h>
+#include <linux/log2.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/suspend.h>
+#include <linux/syscore_ops.h>
+#include <linux/uaccess.h>
+
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/machdep.h>
+#include <asm/mpc85xx.h>
+#include <asm/disassemble.h>
+#include <asm/ppc-opcode.h>
+#include <asm/swiotlb.h>
+#include <asm/setup.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+static int fsl_pcie_bus_fixup, is_mpc83xx_pci;
+
+static void quirk_fsl_pcie_early(struct pci_dev *dev)
+{
+	u8 hdr_type;
+
+	/* if we aren't a PCIe don't bother */
+	if (!pci_is_pcie(dev))
+		return;
+
+	/* if we aren't in host mode don't bother */
+	pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type);
+	if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE)
+		return;
+
+	dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL;
+	fsl_pcie_bus_fixup = 1;
+	return;
+}
+
+static int fsl_indirect_read_config(struct pci_bus *, unsigned int,
+				    int, int, u32 *);
+
+static int fsl_pcie_check_link(struct pci_controller *hose)
+{
+	u32 val = 0;
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_FSL_CFG_REG_LINK) {
+		if (hose->ops->read == fsl_indirect_read_config)
+			__indirect_read_config(hose, hose->first_busno, 0,
+					       PCIE_LTSSM, 4, &val);
+		else
+			early_read_config_dword(hose, 0, 0, PCIE_LTSSM, &val);
+		if (val < PCIE_LTSSM_L0)
+			return 1;
+	} else {
+		struct ccsr_pci __iomem *pci = hose->private_data;
+		/* for PCIe IP rev 3.0 or greater use CSR0 for link state */
+		val = (in_be32(&pci->pex_csr0) & PEX_CSR0_LTSSM_MASK)
+				>> PEX_CSR0_LTSSM_SHIFT;
+		if (val != PEX_CSR0_LTSSM_L0)
+			return 1;
+	}
+
+	return 0;
+}
+
+static int fsl_indirect_read_config(struct pci_bus *bus, unsigned int devfn,
+				    int offset, int len, u32 *val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	if (fsl_pcie_check_link(hose))
+		hose->indirect_type |= PPC_INDIRECT_TYPE_NO_PCIE_LINK;
+	else
+		hose->indirect_type &= ~PPC_INDIRECT_TYPE_NO_PCIE_LINK;
+
+	return indirect_read_config(bus, devfn, offset, len, val);
+}
+
+#if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx)
+
+static struct pci_ops fsl_indirect_pcie_ops =
+{
+	.read = fsl_indirect_read_config,
+	.write = indirect_write_config,
+};
+
+static u64 pci64_dma_offset;
+
+#ifdef CONFIG_SWIOTLB
+static void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+
+	pdev->dev.bus_dma_limit =
+		hose->dma_window_base_cur + hose->dma_window_size - 1;
+}
+
+static void setup_swiotlb_ops(struct pci_controller *hose)
+{
+	if (ppc_swiotlb_enable)
+		hose->controller_ops.dma_dev_setup = pci_dma_dev_setup_swiotlb;
+}
+#else
+static inline void setup_swiotlb_ops(struct pci_controller *hose) {}
+#endif
+
+static void fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask)
+{
+	/*
+	 * Fix up PCI devices that are able to DMA to the large inbound
+	 * mapping that allows addressing any RAM address from across PCI.
+	 */
+	if (dev_is_pci(dev) && dma_mask >= pci64_dma_offset * 2 - 1) {
+		dev->bus_dma_limit = 0;
+		dev->archdata.dma_offset = pci64_dma_offset;
+	}
+}
+
+static int setup_one_atmu(struct ccsr_pci __iomem *pci,
+	unsigned int index, const struct resource *res,
+	resource_size_t offset)
+{
+	resource_size_t pci_addr = res->start - offset;
+	resource_size_t phys_addr = res->start;
+	resource_size_t size = resource_size(res);
+	u32 flags = 0x80044000; /* enable & mem R/W */
+	unsigned int i;
+
+	pr_debug("PCI MEM resource start 0x%016llx, size 0x%016llx.\n",
+		(u64)res->start, (u64)size);
+
+	if (res->flags & IORESOURCE_PREFETCH)
+		flags |= 0x10000000; /* enable relaxed ordering */
+
+	for (i = 0; size > 0; i++) {
+		unsigned int bits = min_t(u32, ilog2(size),
+					__ffs(pci_addr | phys_addr));
+
+		if (index + i >= 5)
+			return -1;
+
+		out_be32(&pci->pow[index + i].potar, pci_addr >> 12);
+		out_be32(&pci->pow[index + i].potear, (u64)pci_addr >> 44);
+		out_be32(&pci->pow[index + i].powbar, phys_addr >> 12);
+		out_be32(&pci->pow[index + i].powar, flags | (bits - 1));
+
+		pci_addr += (resource_size_t)1U << bits;
+		phys_addr += (resource_size_t)1U << bits;
+		size -= (resource_size_t)1U << bits;
+	}
+
+	return i;
+}
+
+static bool is_kdump(void)
+{
+	struct device_node *node;
+	bool ret;
+
+	node = of_find_node_by_type(NULL, "memory");
+	if (!node) {
+		WARN_ON_ONCE(1);
+		return false;
+	}
+
+	ret = of_property_read_bool(node, "linux,usable-memory");
+	of_node_put(node);
+
+	return ret;
+}
+
+/* atmu setup for fsl pci/pcie controller */
+static void setup_pci_atmu(struct pci_controller *hose)
+{
+	struct ccsr_pci __iomem *pci = hose->private_data;
+	int i, j, n, mem_log, win_idx = 3, start_idx = 1, end_idx = 4;
+	u64 mem, sz, paddr_hi = 0;
+	u64 offset = 0, paddr_lo = ULLONG_MAX;
+	u32 pcicsrbar = 0, pcicsrbar_sz;
+	u32 piwar = PIWAR_EN | PIWAR_PF | PIWAR_TGI_LOCAL |
+			PIWAR_READ_SNOOP | PIWAR_WRITE_SNOOP;
+	const u64 *reg;
+	int len;
+	bool setup_inbound;
+
+	/*
+	 * If this is kdump, we don't want to trigger a bunch of PCI
+	 * errors by closing the window on in-flight DMA.
+	 *
+	 * We still run most of the function's logic so that things like
+	 * hose->dma_window_size still get set.
+	 */
+	setup_inbound = !is_kdump();
+
+	if (of_device_is_compatible(hose->dn, "fsl,bsc9132-pcie")) {
+		/*
+		 * BSC9132 Rev1.0 has an issue where all the PEX inbound
+		 * windows have implemented the default target value as 0xf
+		 * for CCSR space.In all Freescale legacy devices the target
+		 * of 0xf is reserved for local memory space. 9132 Rev1.0
+		 * now has local memory space mapped to target 0x0 instead of
+		 * 0xf. Hence adding a workaround to remove the target 0xf
+		 * defined for memory space from Inbound window attributes.
+		 */
+		piwar &= ~PIWAR_TGI_LOCAL;
+	}
+
+	if (early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP)) {
+		if (in_be32(&pci->block_rev1) >= PCIE_IP_REV_2_2) {
+			win_idx = 2;
+			start_idx = 0;
+			end_idx = 3;
+		}
+	}
+
+	/* Disable all windows (except powar0 since it's ignored) */
+	for(i = 1; i < 5; i++)
+		out_be32(&pci->pow[i].powar, 0);
+
+	if (setup_inbound) {
+		for (i = start_idx; i < end_idx; i++)
+			out_be32(&pci->piw[i].piwar, 0);
+	}
+
+	/* Setup outbound MEM window */
+	for(i = 0, j = 1; i < 3; i++) {
+		if (!(hose->mem_resources[i].flags & IORESOURCE_MEM))
+			continue;
+
+		paddr_lo = min(paddr_lo, (u64)hose->mem_resources[i].start);
+		paddr_hi = max(paddr_hi, (u64)hose->mem_resources[i].end);
+
+		/* We assume all memory resources have the same offset */
+		offset = hose->mem_offset[i];
+		n = setup_one_atmu(pci, j, &hose->mem_resources[i], offset);
+
+		if (n < 0 || j >= 5) {
+			pr_err("Ran out of outbound PCI ATMUs for resource %d!\n", i);
+			hose->mem_resources[i].flags |= IORESOURCE_DISABLED;
+		} else
+			j += n;
+	}
+
+	/* Setup outbound IO window */
+	if (hose->io_resource.flags & IORESOURCE_IO) {
+		if (j >= 5) {
+			pr_err("Ran out of outbound PCI ATMUs for IO resource\n");
+		} else {
+			pr_debug("PCI IO resource start 0x%016llx, size 0x%016llx, "
+				 "phy base 0x%016llx.\n",
+				 (u64)hose->io_resource.start,
+				 (u64)resource_size(&hose->io_resource),
+				 (u64)hose->io_base_phys);
+			out_be32(&pci->pow[j].potar, (hose->io_resource.start >> 12));
+			out_be32(&pci->pow[j].potear, 0);
+			out_be32(&pci->pow[j].powbar, (hose->io_base_phys >> 12));
+			/* Enable, IO R/W */
+			out_be32(&pci->pow[j].powar, 0x80088000
+				| (ilog2(hose->io_resource.end
+				- hose->io_resource.start + 1) - 1));
+		}
+	}
+
+	/* convert to pci address space */
+	paddr_hi -= offset;
+	paddr_lo -= offset;
+
+	if (paddr_hi == paddr_lo) {
+		pr_err("%pOF: No outbound window space\n", hose->dn);
+		return;
+	}
+
+	if (paddr_lo == 0) {
+		pr_err("%pOF: No space for inbound window\n", hose->dn);
+		return;
+	}
+
+	/* setup PCSRBAR/PEXCSRBAR */
+	early_write_config_dword(hose, 0, 0, PCI_BASE_ADDRESS_0, 0xffffffff);
+	early_read_config_dword(hose, 0, 0, PCI_BASE_ADDRESS_0, &pcicsrbar_sz);
+	pcicsrbar_sz = ~pcicsrbar_sz + 1;
+
+	if (paddr_hi < (0x100000000ull - pcicsrbar_sz) ||
+		(paddr_lo > 0x100000000ull))
+		pcicsrbar = 0x100000000ull - pcicsrbar_sz;
+	else
+		pcicsrbar = (paddr_lo - pcicsrbar_sz) & -pcicsrbar_sz;
+	early_write_config_dword(hose, 0, 0, PCI_BASE_ADDRESS_0, pcicsrbar);
+
+	paddr_lo = min(paddr_lo, (u64)pcicsrbar);
+
+	pr_info("%pOF: PCICSRBAR @ 0x%x\n", hose->dn, pcicsrbar);
+
+	/* Setup inbound mem window */
+	mem = memblock_end_of_DRAM();
+	pr_info("%s: end of DRAM %llx\n", __func__, mem);
+
+	/*
+	 * The msi-address-64 property, if it exists, indicates the physical
+	 * address of the MSIIR register.  Normally, this register is located
+	 * inside CCSR, so the ATMU that covers all of CCSR is used. But if
+	 * this property exists, then we normally need to create a new ATMU
+	 * for it.  For now, however, we cheat.  The only entity that creates
+	 * this property is the Freescale hypervisor, and the address is
+	 * specified in the partition configuration.  Typically, the address
+	 * is located in the page immediately after the end of DDR.  If so, we
+	 * can avoid allocating a new ATMU by extending the DDR ATMU by one
+	 * page.
+	 */
+	reg = of_get_property(hose->dn, "msi-address-64", &len);
+	if (reg && (len == sizeof(u64))) {
+		u64 address = be64_to_cpup(reg);
+
+		if ((address >= mem) && (address < (mem + PAGE_SIZE))) {
+			pr_info("%pOF: extending DDR ATMU to cover MSIIR", hose->dn);
+			mem += PAGE_SIZE;
+		} else {
+			/* TODO: Create a new ATMU for MSIIR */
+			pr_warn("%pOF: msi-address-64 address of %llx is "
+				"unsupported\n", hose->dn, address);
+		}
+	}
+
+	sz = min(mem, paddr_lo);
+	mem_log = ilog2(sz);
+
+	/* PCIe can overmap inbound & outbound since RX & TX are separated */
+	if (early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP)) {
+		/* Size window to exact size if power-of-two or one size up */
+		if ((1ull << mem_log) != mem) {
+			mem_log++;
+			if ((1ull << mem_log) > mem)
+				pr_info("%pOF: Setting PCI inbound window "
+					"greater than memory size\n", hose->dn);
+		}
+
+		piwar |= ((mem_log - 1) & PIWAR_SZ_MASK);
+
+		if (setup_inbound) {
+			/* Setup inbound memory window */
+			out_be32(&pci->piw[win_idx].pitar,  0x00000000);
+			out_be32(&pci->piw[win_idx].piwbar, 0x00000000);
+			out_be32(&pci->piw[win_idx].piwar,  piwar);
+		}
+
+		win_idx--;
+		hose->dma_window_base_cur = 0x00000000;
+		hose->dma_window_size = (resource_size_t)sz;
+
+		/*
+		 * if we have >4G of memory setup second PCI inbound window to
+		 * let devices that are 64-bit address capable to work w/o
+		 * SWIOTLB and access the full range of memory
+		 */
+		if (sz != mem) {
+			mem_log = ilog2(mem);
+
+			/* Size window up if we dont fit in exact power-of-2 */
+			if ((1ull << mem_log) != mem)
+				mem_log++;
+
+			piwar = (piwar & ~PIWAR_SZ_MASK) | (mem_log - 1);
+			pci64_dma_offset = 1ULL << mem_log;
+
+			if (setup_inbound) {
+				/* Setup inbound memory window */
+				out_be32(&pci->piw[win_idx].pitar,  0x00000000);
+				out_be32(&pci->piw[win_idx].piwbear,
+						pci64_dma_offset >> 44);
+				out_be32(&pci->piw[win_idx].piwbar,
+						pci64_dma_offset >> 12);
+				out_be32(&pci->piw[win_idx].piwar,  piwar);
+			}
+
+			/*
+			 * install our own dma_set_mask handler to fixup dma_ops
+			 * and dma_offset
+			 */
+			ppc_md.dma_set_mask = fsl_pci_dma_set_mask;
+
+			pr_info("%pOF: Setup 64-bit PCI DMA window\n", hose->dn);
+		}
+	} else {
+		u64 paddr = 0;
+
+		if (setup_inbound) {
+			/* Setup inbound memory window */
+			out_be32(&pci->piw[win_idx].pitar,  paddr >> 12);
+			out_be32(&pci->piw[win_idx].piwbar, paddr >> 12);
+			out_be32(&pci->piw[win_idx].piwar,
+				 (piwar | (mem_log - 1)));
+		}
+
+		win_idx--;
+		paddr += 1ull << mem_log;
+		sz -= 1ull << mem_log;
+
+		if (sz) {
+			mem_log = ilog2(sz);
+			piwar |= (mem_log - 1);
+
+			if (setup_inbound) {
+				out_be32(&pci->piw[win_idx].pitar,
+					 paddr >> 12);
+				out_be32(&pci->piw[win_idx].piwbar,
+					 paddr >> 12);
+				out_be32(&pci->piw[win_idx].piwar, piwar);
+			}
+
+			win_idx--;
+			paddr += 1ull << mem_log;
+		}
+
+		hose->dma_window_base_cur = 0x00000000;
+		hose->dma_window_size = (resource_size_t)paddr;
+	}
+
+	if (hose->dma_window_size < mem) {
+#ifdef CONFIG_SWIOTLB
+		ppc_swiotlb_enable = 1;
+#else
+		pr_err("%pOF: ERROR: Memory size exceeds PCI ATMU ability to "
+			"map - enable CONFIG_SWIOTLB to avoid dma errors.\n",
+			 hose->dn);
+#endif
+		/* adjusting outbound windows could reclaim space in mem map */
+		if (paddr_hi < 0xffffffffull)
+			pr_warn("%pOF: WARNING: Outbound window cfg leaves "
+				"gaps in memory map. Adjusting the memory map "
+				"could reduce unnecessary bounce buffering.\n",
+				hose->dn);
+
+		pr_info("%pOF: DMA window size is 0x%llx\n", hose->dn,
+			(u64)hose->dma_window_size);
+	}
+}
+
+static void setup_pci_cmd(struct pci_controller *hose)
+{
+	u16 cmd;
+	int cap_x;
+
+	early_read_config_word(hose, 0, 0, PCI_COMMAND, &cmd);
+	cmd |= PCI_COMMAND_SERR | PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY
+		| PCI_COMMAND_IO;
+	early_write_config_word(hose, 0, 0, PCI_COMMAND, cmd);
+
+	cap_x = early_find_capability(hose, 0, 0, PCI_CAP_ID_PCIX);
+	if (cap_x) {
+		int pci_x_cmd = cap_x + PCI_X_CMD;
+		cmd = PCI_X_CMD_MAX_SPLIT | PCI_X_CMD_MAX_READ
+			| PCI_X_CMD_ERO | PCI_X_CMD_DPERR_E;
+		early_write_config_word(hose, 0, 0, pci_x_cmd, cmd);
+	} else {
+		early_write_config_byte(hose, 0, 0, PCI_LATENCY_TIMER, 0x80);
+	}
+}
+
+void fsl_pcibios_fixup_bus(struct pci_bus *bus)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	int i, is_pcie = 0, no_link;
+
+	/* The root complex bridge comes up with bogus resources,
+	 * we copy the PHB ones in.
+	 *
+	 * With the current generic PCI code, the PHB bus no longer
+	 * has bus->resource[0..4] set, so things are a bit more
+	 * tricky.
+	 */
+
+	if (fsl_pcie_bus_fixup)
+		is_pcie = early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP);
+	no_link = !!(hose->indirect_type & PPC_INDIRECT_TYPE_NO_PCIE_LINK);
+
+	if (bus->parent == hose->bus && (is_pcie || no_link)) {
+		for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; ++i) {
+			struct resource *res = bus->resource[i];
+			struct resource *par;
+
+			if (!res)
+				continue;
+			if (i == 0)
+				par = &hose->io_resource;
+			else if (i < 4)
+				par = &hose->mem_resources[i-1];
+			else par = NULL;
+
+			res->start = par ? par->start : 0;
+			res->end   = par ? par->end   : 0;
+			res->flags = par ? par->flags : 0;
+		}
+	}
+}
+
+static int fsl_add_bridge(struct platform_device *pdev, int is_primary)
+{
+	int len;
+	struct pci_controller *hose;
+	struct resource rsrc;
+	const int *bus_range;
+	u8 hdr_type, progif;
+	u32 class_code;
+	struct device_node *dev;
+	struct ccsr_pci __iomem *pci;
+	u16 temp;
+	u32 svr = mfspr(SPRN_SVR);
+
+	dev = pdev->dev.of_node;
+
+	if (!of_device_is_available(dev)) {
+		pr_warn("%pOF: disabled\n", dev);
+		return -ENODEV;
+	}
+
+	pr_debug("Adding PCI host bridge %pOF\n", dev);
+
+	/* Fetch host bridge registers address */
+	if (of_address_to_resource(dev, 0, &rsrc)) {
+		printk(KERN_WARNING "Can't get pci register base!");
+		return -ENOMEM;
+	}
+
+	/* Get bus range if any */
+	bus_range = of_get_property(dev, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int))
+		printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+			" bus 0\n", dev);
+
+	pci_add_flags(PCI_REASSIGN_ALL_BUS);
+	hose = pcibios_alloc_controller(dev);
+	if (!hose)
+		return -ENOMEM;
+
+	/* set platform device as the parent */
+	hose->parent = &pdev->dev;
+	hose->first_busno = bus_range ? bus_range[0] : 0x0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	pr_debug("PCI memory map start 0x%016llx, size 0x%016llx\n",
+		 (u64)rsrc.start, (u64)resource_size(&rsrc));
+
+	pci = hose->private_data = ioremap(rsrc.start, resource_size(&rsrc));
+	if (!hose->private_data)
+		goto no_bridge;
+
+	setup_indirect_pci(hose, rsrc.start, rsrc.start + 0x4,
+			   PPC_INDIRECT_TYPE_BIG_ENDIAN);
+
+	if (in_be32(&pci->block_rev1) < PCIE_IP_REV_3_0)
+		hose->indirect_type |= PPC_INDIRECT_TYPE_FSL_CFG_REG_LINK;
+
+	if (early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP)) {
+		/* use fsl_indirect_read_config for PCIe */
+		hose->ops = &fsl_indirect_pcie_ops;
+		/* For PCIE read HEADER_TYPE to identify controller mode */
+		early_read_config_byte(hose, 0, 0, PCI_HEADER_TYPE, &hdr_type);
+		if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE)
+			goto no_bridge;
+
+	} else {
+		/* For PCI read PROG to identify controller mode */
+		early_read_config_byte(hose, 0, 0, PCI_CLASS_PROG, &progif);
+		if ((progif & 1) &&
+		    !of_property_read_bool(dev, "fsl,pci-agent-force-enum"))
+			goto no_bridge;
+	}
+
+	setup_pci_cmd(hose);
+
+	/* check PCI express link status */
+	if (early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP)) {
+		hose->indirect_type |= PPC_INDIRECT_TYPE_EXT_REG |
+			PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS;
+		if (fsl_pcie_check_link(hose))
+			hose->indirect_type |= PPC_INDIRECT_TYPE_NO_PCIE_LINK;
+		/* Fix Class Code to PCI_CLASS_BRIDGE_PCI_NORMAL for pre-3.0 controller */
+		if (in_be32(&pci->block_rev1) < PCIE_IP_REV_3_0) {
+			early_read_config_dword(hose, 0, 0, PCIE_FSL_CSR_CLASSCODE, &class_code);
+			class_code &= 0xff;
+			class_code |= PCI_CLASS_BRIDGE_PCI_NORMAL << 8;
+			early_write_config_dword(hose, 0, 0, PCIE_FSL_CSR_CLASSCODE, class_code);
+		}
+	} else {
+		/*
+		 * Set PBFR(PCI Bus Function Register)[10] = 1 to
+		 * disable the combining of crossing cacheline
+		 * boundary requests into one burst transaction.
+		 * PCI-X operation is not affected.
+		 * Fix erratum PCI 5 on MPC8548
+		 */
+#define PCI_BUS_FUNCTION 0x44
+#define PCI_BUS_FUNCTION_MDS 0x400	/* Master disable streaming */
+		if (((SVR_SOC_VER(svr) == SVR_8543) ||
+		     (SVR_SOC_VER(svr) == SVR_8545) ||
+		     (SVR_SOC_VER(svr) == SVR_8547) ||
+		     (SVR_SOC_VER(svr) == SVR_8548)) &&
+		    !early_find_capability(hose, 0, 0, PCI_CAP_ID_PCIX)) {
+			early_read_config_word(hose, 0, 0,
+					PCI_BUS_FUNCTION, &temp);
+			temp |= PCI_BUS_FUNCTION_MDS;
+			early_write_config_word(hose, 0, 0,
+					PCI_BUS_FUNCTION, temp);
+		}
+	}
+
+	printk(KERN_INFO "Found FSL PCI host bridge at 0x%016llx. "
+		"Firmware bus number: %d->%d\n",
+		(unsigned long long)rsrc.start, hose->first_busno,
+		hose->last_busno);
+
+	pr_debug(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n",
+		hose, hose->cfg_addr, hose->cfg_data);
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(hose, dev, is_primary);
+
+	/* Setup PEX window registers */
+	setup_pci_atmu(hose);
+
+	/* Set up controller operations */
+	setup_swiotlb_ops(hose);
+
+	return 0;
+
+no_bridge:
+	iounmap(hose->private_data);
+	/* unmap cfg_data & cfg_addr separately if not on same page */
+	if (((unsigned long)hose->cfg_data & PAGE_MASK) !=
+	    ((unsigned long)hose->cfg_addr & PAGE_MASK))
+		iounmap(hose->cfg_data);
+	iounmap(hose->cfg_addr);
+	pcibios_free_controller(hose);
+	return -ENODEV;
+}
+#endif /* CONFIG_FSL_SOC_BOOKE || CONFIG_PPC_86xx */
+
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID,
+			quirk_fsl_pcie_early);
+
+#if defined(CONFIG_PPC_83xx) || defined(CONFIG_PPC_MPC512x)
+struct mpc83xx_pcie_priv {
+	void __iomem *cfg_type0;
+	void __iomem *cfg_type1;
+	u32 dev_base;
+};
+
+struct pex_inbound_window {
+	u32 ar;
+	u32 tar;
+	u32 barl;
+	u32 barh;
+};
+
+/*
+ * With the convention of u-boot, the PCIE outbound window 0 serves
+ * as configuration transactions outbound.
+ */
+#define PEX_OUTWIN0_BAR		0xCA4
+#define PEX_OUTWIN0_TAL		0xCA8
+#define PEX_OUTWIN0_TAH		0xCAC
+#define PEX_RC_INWIN_BASE	0xE60
+#define PEX_RCIWARn_EN		0x1
+
+static int mpc83xx_pcie_exclude_device(struct pci_bus *bus, unsigned int devfn)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_NO_PCIE_LINK)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	/*
+	 * Workaround for the HW bug: for Type 0 configure transactions the
+	 * PCI-E controller does not check the device number bits and just
+	 * assumes that the device number bits are 0.
+	 */
+	if (bus->number == hose->first_busno ||
+			bus->primary == hose->first_busno) {
+		if (devfn & 0xf8)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	if (ppc_md.pci_exclude_device) {
+		if (ppc_md.pci_exclude_device(hose, bus->number, devfn))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static void __iomem *mpc83xx_pcie_remap_cfg(struct pci_bus *bus,
+					    unsigned int devfn, int offset)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct mpc83xx_pcie_priv *pcie = hose->dn->data;
+	u32 dev_base = bus->number << 24 | devfn << 16;
+	int ret;
+
+	ret = mpc83xx_pcie_exclude_device(bus, devfn);
+	if (ret)
+		return NULL;
+
+	offset &= 0xfff;
+
+	/* Type 0 */
+	if (bus->number == hose->first_busno)
+		return pcie->cfg_type0 + offset;
+
+	if (pcie->dev_base == dev_base)
+		goto mapped;
+
+	out_le32(pcie->cfg_type0 + PEX_OUTWIN0_TAL, dev_base);
+
+	pcie->dev_base = dev_base;
+mapped:
+	return pcie->cfg_type1 + offset;
+}
+
+static int mpc83xx_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
+				     int offset, int len, u32 val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	/* PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS */
+	if (offset == PCI_PRIMARY_BUS && bus->number == hose->first_busno)
+		val &= 0xffffff00;
+
+	return pci_generic_config_write(bus, devfn, offset, len, val);
+}
+
+static struct pci_ops mpc83xx_pcie_ops = {
+	.map_bus = mpc83xx_pcie_remap_cfg,
+	.read = pci_generic_config_read,
+	.write = mpc83xx_pcie_write_config,
+};
+
+static int __init mpc83xx_pcie_setup(struct pci_controller *hose,
+				     struct resource *reg)
+{
+	struct mpc83xx_pcie_priv *pcie;
+	u32 cfg_bar;
+	int ret = -ENOMEM;
+
+	pcie = kzalloc(sizeof(*pcie), GFP_KERNEL);
+	if (!pcie)
+		return ret;
+
+	pcie->cfg_type0 = ioremap(reg->start, resource_size(reg));
+	if (!pcie->cfg_type0)
+		goto err0;
+
+	cfg_bar = in_le32(pcie->cfg_type0 + PEX_OUTWIN0_BAR);
+	if (!cfg_bar) {
+		/* PCI-E isn't configured. */
+		ret = -ENODEV;
+		goto err1;
+	}
+
+	pcie->cfg_type1 = ioremap(cfg_bar, 0x1000);
+	if (!pcie->cfg_type1)
+		goto err1;
+
+	WARN_ON(hose->dn->data);
+	hose->dn->data = pcie;
+	hose->ops = &mpc83xx_pcie_ops;
+	hose->indirect_type |= PPC_INDIRECT_TYPE_FSL_CFG_REG_LINK;
+
+	out_le32(pcie->cfg_type0 + PEX_OUTWIN0_TAH, 0);
+	out_le32(pcie->cfg_type0 + PEX_OUTWIN0_TAL, 0);
+
+	if (fsl_pcie_check_link(hose))
+		hose->indirect_type |= PPC_INDIRECT_TYPE_NO_PCIE_LINK;
+
+	return 0;
+err1:
+	iounmap(pcie->cfg_type0);
+err0:
+	kfree(pcie);
+	return ret;
+
+}
+
+int __init mpc83xx_add_bridge(struct device_node *dev)
+{
+	int ret;
+	int len;
+	struct pci_controller *hose;
+	struct resource rsrc_reg;
+	struct resource rsrc_cfg;
+	const int *bus_range;
+	int primary;
+
+	is_mpc83xx_pci = 1;
+
+	if (!of_device_is_available(dev)) {
+		pr_warn("%pOF: disabled by the firmware.\n",
+			dev);
+		return -ENODEV;
+	}
+	pr_debug("Adding PCI host bridge %pOF\n", dev);
+
+	/* Fetch host bridge registers address */
+	if (of_address_to_resource(dev, 0, &rsrc_reg)) {
+		printk(KERN_WARNING "Can't get pci register base!\n");
+		return -ENOMEM;
+	}
+
+	memset(&rsrc_cfg, 0, sizeof(rsrc_cfg));
+
+	if (of_address_to_resource(dev, 1, &rsrc_cfg)) {
+		printk(KERN_WARNING
+			"No pci config register base in dev tree, "
+			"using default\n");
+		/*
+		 * MPC83xx supports up to two host controllers
+		 * 	one at 0x8500 has config space registers at 0x8300
+		 * 	one at 0x8600 has config space registers at 0x8380
+		 */
+		if ((rsrc_reg.start & 0xfffff) == 0x8500)
+			rsrc_cfg.start = (rsrc_reg.start & 0xfff00000) + 0x8300;
+		else if ((rsrc_reg.start & 0xfffff) == 0x8600)
+			rsrc_cfg.start = (rsrc_reg.start & 0xfff00000) + 0x8380;
+	}
+	/*
+	 * Controller at offset 0x8500 is primary
+	 */
+	if ((rsrc_reg.start & 0xfffff) == 0x8500)
+		primary = 1;
+	else
+		primary = 0;
+
+	/* Get bus range if any */
+	bus_range = of_get_property(dev, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int)) {
+		printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+		       " bus 0\n", dev);
+	}
+
+	pci_add_flags(PCI_REASSIGN_ALL_BUS);
+	hose = pcibios_alloc_controller(dev);
+	if (!hose)
+		return -ENOMEM;
+
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	if (of_device_is_compatible(dev, "fsl,mpc8314-pcie")) {
+		ret = mpc83xx_pcie_setup(hose, &rsrc_reg);
+		if (ret)
+			goto err0;
+	} else {
+		setup_indirect_pci(hose, rsrc_cfg.start,
+				   rsrc_cfg.start + 4, 0);
+	}
+
+	printk(KERN_INFO "Found FSL PCI host bridge at 0x%016llx. "
+	       "Firmware bus number: %d->%d\n",
+	       (unsigned long long)rsrc_reg.start, hose->first_busno,
+	       hose->last_busno);
+
+	pr_debug(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n",
+	    hose, hose->cfg_addr, hose->cfg_data);
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(hose, dev, primary);
+
+	return 0;
+err0:
+	pcibios_free_controller(hose);
+	return ret;
+}
+#endif /* CONFIG_PPC_83xx */
+
+u64 fsl_pci_immrbar_base(struct pci_controller *hose)
+{
+#ifdef CONFIG_PPC_83xx
+	if (is_mpc83xx_pci) {
+		struct mpc83xx_pcie_priv *pcie = hose->dn->data;
+		struct pex_inbound_window *in;
+		int i;
+
+		/* Walk the Root Complex Inbound windows to match IMMR base */
+		in = pcie->cfg_type0 + PEX_RC_INWIN_BASE;
+		for (i = 0; i < 4; i++) {
+			/* not enabled, skip */
+			if (!(in_le32(&in[i].ar) & PEX_RCIWARn_EN))
+				continue;
+
+			if (get_immrbase() == in_le32(&in[i].tar))
+				return (u64)in_le32(&in[i].barh) << 32 |
+					    in_le32(&in[i].barl);
+		}
+
+		printk(KERN_WARNING "could not find PCI BAR matching IMMR\n");
+	}
+#endif
+
+#if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx)
+	if (!is_mpc83xx_pci) {
+		u32 base;
+
+		pci_bus_read_config_dword(hose->bus,
+			PCI_DEVFN(0, 0), PCI_BASE_ADDRESS_0, &base);
+
+		/*
+		 * For PEXCSRBAR, bit 3-0 indicate prefetchable and
+		 * address type. So when getting base address, these
+		 * bits should be masked
+		 */
+		base &= PCI_BASE_ADDRESS_MEM_MASK;
+
+		return base;
+	}
+#endif
+
+	return 0;
+}
+
+#ifdef CONFIG_PPC_E500
+static int mcheck_handle_load(struct pt_regs *regs, u32 inst)
+{
+	unsigned int rd, ra, rb, d;
+
+	rd = get_rt(inst);
+	ra = get_ra(inst);
+	rb = get_rb(inst);
+	d = get_d(inst);
+
+	switch (get_op(inst)) {
+	case 31:
+		switch (get_xop(inst)) {
+		case OP_31_XOP_LWZX:
+		case OP_31_XOP_LWBRX:
+			regs->gpr[rd] = 0xffffffff;
+			break;
+
+		case OP_31_XOP_LWZUX:
+			regs->gpr[rd] = 0xffffffff;
+			regs->gpr[ra] += regs->gpr[rb];
+			break;
+
+		case OP_31_XOP_LBZX:
+			regs->gpr[rd] = 0xff;
+			break;
+
+		case OP_31_XOP_LBZUX:
+			regs->gpr[rd] = 0xff;
+			regs->gpr[ra] += regs->gpr[rb];
+			break;
+
+		case OP_31_XOP_LHZX:
+		case OP_31_XOP_LHBRX:
+			regs->gpr[rd] = 0xffff;
+			break;
+
+		case OP_31_XOP_LHZUX:
+			regs->gpr[rd] = 0xffff;
+			regs->gpr[ra] += regs->gpr[rb];
+			break;
+
+		case OP_31_XOP_LHAX:
+			regs->gpr[rd] = ~0UL;
+			break;
+
+		case OP_31_XOP_LHAUX:
+			regs->gpr[rd] = ~0UL;
+			regs->gpr[ra] += regs->gpr[rb];
+			break;
+
+		default:
+			return 0;
+		}
+		break;
+
+	case OP_LWZ:
+		regs->gpr[rd] = 0xffffffff;
+		break;
+
+	case OP_LWZU:
+		regs->gpr[rd] = 0xffffffff;
+		regs->gpr[ra] += (s16)d;
+		break;
+
+	case OP_LBZ:
+		regs->gpr[rd] = 0xff;
+		break;
+
+	case OP_LBZU:
+		regs->gpr[rd] = 0xff;
+		regs->gpr[ra] += (s16)d;
+		break;
+
+	case OP_LHZ:
+		regs->gpr[rd] = 0xffff;
+		break;
+
+	case OP_LHZU:
+		regs->gpr[rd] = 0xffff;
+		regs->gpr[ra] += (s16)d;
+		break;
+
+	case OP_LHA:
+		regs->gpr[rd] = ~0UL;
+		break;
+
+	case OP_LHAU:
+		regs->gpr[rd] = ~0UL;
+		regs->gpr[ra] += (s16)d;
+		break;
+
+	default:
+		return 0;
+	}
+
+	return 1;
+}
+
+static int is_in_pci_mem_space(phys_addr_t addr)
+{
+	struct pci_controller *hose;
+	struct resource *res;
+	int i;
+
+	list_for_each_entry(hose, &hose_list, list_node) {
+		if (!(hose->indirect_type & PPC_INDIRECT_TYPE_EXT_REG))
+			continue;
+
+		for (i = 0; i < 3; i++) {
+			res = &hose->mem_resources[i];
+			if ((res->flags & IORESOURCE_MEM) &&
+				addr >= res->start && addr <= res->end)
+				return 1;
+		}
+	}
+	return 0;
+}
+
+int fsl_pci_mcheck_exception(struct pt_regs *regs)
+{
+	u32 inst;
+	int ret;
+	phys_addr_t addr = 0;
+
+	/* Let KVM/QEMU deal with the exception */
+	if (regs->msr & MSR_GS)
+		return 0;
+
+#ifdef CONFIG_PHYS_64BIT
+	addr = mfspr(SPRN_MCARU);
+	addr <<= 32;
+#endif
+	addr += mfspr(SPRN_MCAR);
+
+	if (is_in_pci_mem_space(addr)) {
+		if (user_mode(regs))
+			ret = copy_from_user_nofault(&inst,
+					(void __user *)regs->nip, sizeof(inst));
+		else
+			ret = get_kernel_nofault(inst, (void *)regs->nip);
+
+		if (!ret && mcheck_handle_load(regs, inst)) {
+			regs_add_return_ip(regs, 4);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+#endif
+
+#if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx)
+static const struct of_device_id pci_ids[] = {
+	{ .compatible = "fsl,mpc8540-pci", },
+	{ .compatible = "fsl,mpc8548-pcie", },
+	{ .compatible = "fsl,mpc8610-pci", },
+	{ .compatible = "fsl,mpc8641-pcie", },
+	{ .compatible = "fsl,qoriq-pcie", },
+	{ .compatible = "fsl,qoriq-pcie-v2.1", },
+	{ .compatible = "fsl,qoriq-pcie-v2.2", },
+	{ .compatible = "fsl,qoriq-pcie-v2.3", },
+	{ .compatible = "fsl,qoriq-pcie-v2.4", },
+	{ .compatible = "fsl,qoriq-pcie-v3.0", },
+
+	/*
+	 * The following entries are for compatibility with older device
+	 * trees.
+	 */
+	{ .compatible = "fsl,p1022-pcie", },
+	{ .compatible = "fsl,p4080-pcie", },
+
+	{},
+};
+
+struct device_node *fsl_pci_primary;
+
+void __init fsl_pci_assign_primary(void)
+{
+	struct device_node *np;
+
+	/* Callers can specify the primary bus using other means. */
+	if (fsl_pci_primary)
+		return;
+
+	/* If a PCI host bridge contains an ISA node, it's primary. */
+	np = of_find_node_by_type(NULL, "isa");
+	while ((fsl_pci_primary = of_get_parent(np))) {
+		of_node_put(np);
+		np = fsl_pci_primary;
+
+		if (of_match_node(pci_ids, np) && of_device_is_available(np))
+			return;
+	}
+
+	/*
+	 * If there's no PCI host bridge with ISA then check for
+	 * PCI host bridge with alias "pci0" (first PCI host bridge).
+	 */
+	np = of_find_node_by_path("pci0");
+	if (np && of_match_node(pci_ids, np) && of_device_is_available(np)) {
+		fsl_pci_primary = np;
+		of_node_put(np);
+		return;
+	}
+	if (np)
+		of_node_put(np);
+
+	/*
+	 * If there's no PCI host bridge with ISA, arbitrarily
+	 * designate one as primary.  This can go away once
+	 * various bugs with primary-less systems are fixed.
+	 */
+	for_each_matching_node(np, pci_ids) {
+		if (of_device_is_available(np)) {
+			fsl_pci_primary = np;
+			return;
+		}
+	}
+}
+
+#ifdef CONFIG_PM_SLEEP
+static irqreturn_t fsl_pci_pme_handle(int irq, void *dev_id)
+{
+	struct pci_controller *hose = dev_id;
+	struct ccsr_pci __iomem *pci = hose->private_data;
+	u32 dr;
+
+	dr = in_be32(&pci->pex_pme_mes_dr);
+	if (!dr)
+		return IRQ_NONE;
+
+	out_be32(&pci->pex_pme_mes_dr, dr);
+
+	return IRQ_HANDLED;
+}
+
+static int fsl_pci_pme_probe(struct pci_controller *hose)
+{
+	struct ccsr_pci __iomem *pci;
+	struct pci_dev *dev;
+	int pme_irq;
+	int res;
+	u16 pms;
+
+	/* Get hose's pci_dev */
+	dev = list_first_entry(&hose->bus->devices, typeof(*dev), bus_list);
+
+	/* PME Disable */
+	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pms);
+	pms &= ~PCI_PM_CTRL_PME_ENABLE;
+	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pms);
+
+	pme_irq = irq_of_parse_and_map(hose->dn, 0);
+	if (!pme_irq) {
+		dev_err(&dev->dev, "Failed to map PME interrupt.\n");
+
+		return -ENXIO;
+	}
+
+	res = devm_request_irq(hose->parent, pme_irq,
+			fsl_pci_pme_handle,
+			IRQF_SHARED,
+			"[PCI] PME", hose);
+	if (res < 0) {
+		dev_err(&dev->dev, "Unable to request irq %d for PME\n", pme_irq);
+		irq_dispose_mapping(pme_irq);
+
+		return -ENODEV;
+	}
+
+	pci = hose->private_data;
+
+	/* Enable PTOD, ENL23D & EXL23D */
+	clrbits32(&pci->pex_pme_mes_disr,
+		  PME_DISR_EN_PTOD | PME_DISR_EN_ENL23D | PME_DISR_EN_EXL23D);
+
+	out_be32(&pci->pex_pme_mes_ier, 0);
+	setbits32(&pci->pex_pme_mes_ier,
+		  PME_DISR_EN_PTOD | PME_DISR_EN_ENL23D | PME_DISR_EN_EXL23D);
+
+	/* PME Enable */
+	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pms);
+	pms |= PCI_PM_CTRL_PME_ENABLE;
+	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pms);
+
+	return 0;
+}
+
+static void send_pme_turnoff_message(struct pci_controller *hose)
+{
+	struct ccsr_pci __iomem *pci = hose->private_data;
+	u32 dr;
+	int i;
+
+	/* Send PME_Turn_Off Message Request */
+	setbits32(&pci->pex_pmcr, PEX_PMCR_PTOMR);
+
+	/* Wait trun off done */
+	for (i = 0; i < 150; i++) {
+		dr = in_be32(&pci->pex_pme_mes_dr);
+		if (dr) {
+			out_be32(&pci->pex_pme_mes_dr, dr);
+			break;
+		}
+
+		udelay(1000);
+	}
+}
+
+static void fsl_pci_syscore_do_suspend(struct pci_controller *hose)
+{
+	send_pme_turnoff_message(hose);
+}
+
+static int fsl_pci_syscore_suspend(void)
+{
+	struct pci_controller *hose, *tmp;
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+		fsl_pci_syscore_do_suspend(hose);
+
+	return 0;
+}
+
+static void fsl_pci_syscore_do_resume(struct pci_controller *hose)
+{
+	struct ccsr_pci __iomem *pci = hose->private_data;
+	u32 dr;
+	int i;
+
+	/* Send Exit L2 State Message */
+	setbits32(&pci->pex_pmcr, PEX_PMCR_EXL2S);
+
+	/* Wait exit done */
+	for (i = 0; i < 150; i++) {
+		dr = in_be32(&pci->pex_pme_mes_dr);
+		if (dr) {
+			out_be32(&pci->pex_pme_mes_dr, dr);
+			break;
+		}
+
+		udelay(1000);
+	}
+
+	setup_pci_atmu(hose);
+}
+
+static void fsl_pci_syscore_resume(void)
+{
+	struct pci_controller *hose, *tmp;
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+		fsl_pci_syscore_do_resume(hose);
+}
+
+static struct syscore_ops pci_syscore_pm_ops = {
+	.suspend = fsl_pci_syscore_suspend,
+	.resume = fsl_pci_syscore_resume,
+};
+#endif
+
+void fsl_pcibios_fixup_phb(struct pci_controller *phb)
+{
+#ifdef CONFIG_PM_SLEEP
+	fsl_pci_pme_probe(phb);
+#endif
+}
+
+static int add_err_dev(struct platform_device *pdev)
+{
+	struct platform_device *errdev;
+	struct mpc85xx_edac_pci_plat_data pd = {
+		.of_node = pdev->dev.of_node
+	};
+
+	errdev = platform_device_register_resndata(&pdev->dev,
+						   "mpc85xx-pci-edac",
+						   PLATFORM_DEVID_AUTO,
+						   pdev->resource,
+						   pdev->num_resources,
+						   &pd, sizeof(pd));
+
+	return PTR_ERR_OR_ZERO(errdev);
+}
+
+static int fsl_pci_probe(struct platform_device *pdev)
+{
+	struct device_node *node;
+	int ret;
+
+	node = pdev->dev.of_node;
+	ret = fsl_add_bridge(pdev, fsl_pci_primary == node);
+	if (ret)
+		return ret;
+
+	ret = add_err_dev(pdev);
+	if (ret)
+		dev_err(&pdev->dev, "couldn't register error device: %d\n",
+			ret);
+
+	return 0;
+}
+
+static struct platform_driver fsl_pci_driver = {
+	.driver = {
+		.name = "fsl-pci",
+		.of_match_table = pci_ids,
+	},
+	.probe = fsl_pci_probe,
+	.driver_managed_dma = true,
+};
+
+static int __init fsl_pci_init(void)
+{
+#ifdef CONFIG_PM_SLEEP
+	register_syscore_ops(&pci_syscore_pm_ops);
+#endif
+	return platform_driver_register(&fsl_pci_driver);
+}
+arch_initcall(fsl_pci_init);
+#endif
diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h
new file mode 100644
index 0000000000..3bc4ab9d83
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_pci.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * MPC85xx/86xx PCI Express structure define
+ *
+ * Copyright 2007,2011 Freescale Semiconductor, Inc
+ */
+
+#ifdef __KERNEL__
+#ifndef __POWERPC_FSL_PCI_H
+#define __POWERPC_FSL_PCI_H
+
+struct platform_device;
+
+
+/* FSL PCI controller BRR1 register */
+#define PCI_FSL_BRR1      0xbf8
+#define PCI_FSL_BRR1_VER 0xffff
+
+#define PCIE_LTSSM	0x0404		/* PCIE Link Training and Status */
+#define PCIE_LTSSM_L0	0x16		/* L0 state */
+#define PCIE_FSL_CSR_CLASSCODE	0x474	/* FSL GPEX CSR */
+#define PCIE_IP_REV_2_2		0x02080202 /* PCIE IP block version Rev2.2 */
+#define PCIE_IP_REV_3_0		0x02080300 /* PCIE IP block version Rev3.0 */
+#define PIWAR_EN		0x80000000	/* Enable */
+#define PIWAR_PF		0x20000000	/* prefetch */
+#define PIWAR_TGI_LOCAL		0x00f00000	/* target - local memory */
+#define PIWAR_READ_SNOOP	0x00050000
+#define PIWAR_WRITE_SNOOP	0x00005000
+#define PIWAR_SZ_MASK          0x0000003f
+
+#define PEX_PMCR_PTOMR		0x1
+#define PEX_PMCR_EXL2S		0x2
+
+#define PME_DISR_EN_PTOD	0x00008000
+#define PME_DISR_EN_ENL23D	0x00002000
+#define PME_DISR_EN_EXL23D	0x00001000
+
+/* PCI/PCI Express outbound window reg */
+struct pci_outbound_window_regs {
+	__be32	potar;	/* 0x.0 - Outbound translation address register */
+	__be32	potear;	/* 0x.4 - Outbound translation extended address register */
+	__be32	powbar;	/* 0x.8 - Outbound window base address register */
+	u8	res1[4];
+	__be32	powar;	/* 0x.10 - Outbound window attributes register */
+	u8	res2[12];
+};
+
+/* PCI/PCI Express inbound window reg */
+struct pci_inbound_window_regs {
+	__be32	pitar;	/* 0x.0 - Inbound translation address register */
+	u8	res1[4];
+	__be32	piwbar;	/* 0x.8 - Inbound window base address register */
+	__be32	piwbear;	/* 0x.c - Inbound window base extended address register */
+	__be32	piwar;	/* 0x.10 - Inbound window attributes register */
+	u8	res2[12];
+};
+
+/* PCI/PCI Express IO block registers for 85xx/86xx */
+struct ccsr_pci {
+	__be32	config_addr;		/* 0x.000 - PCI/PCIE Configuration Address Register */
+	__be32	config_data;		/* 0x.004 - PCI/PCIE Configuration Data Register */
+	__be32	int_ack;		/* 0x.008 - PCI Interrupt Acknowledge Register */
+	__be32	pex_otb_cpl_tor;	/* 0x.00c - PCIE Outbound completion timeout register */
+	__be32	pex_conf_tor;		/* 0x.010 - PCIE configuration timeout register */
+	__be32	pex_config;		/* 0x.014 - PCIE CONFIG Register */
+	__be32	pex_int_status;		/* 0x.018 - PCIE interrupt status */
+	u8	res2[4];
+	__be32	pex_pme_mes_dr;		/* 0x.020 - PCIE PME and message detect register */
+	__be32	pex_pme_mes_disr;	/* 0x.024 - PCIE PME and message disable register */
+	__be32	pex_pme_mes_ier;	/* 0x.028 - PCIE PME and message interrupt enable register */
+	__be32	pex_pmcr;		/* 0x.02c - PCIE power management command register */
+	u8	res3[3016];
+	__be32	block_rev1;	/* 0x.bf8 - PCIE Block Revision register 1 */
+	__be32	block_rev2;	/* 0x.bfc - PCIE Block Revision register 2 */
+
+/* PCI/PCI Express outbound window 0-4
+ * Window 0 is the default window and is the only window enabled upon reset.
+ * The default outbound register set is used when a transaction misses
+ * in all of the other outbound windows.
+ */
+	struct pci_outbound_window_regs pow[5];
+	u8	res14[96];
+	struct pci_inbound_window_regs	pmit;	/* 0xd00 - 0xd9c Inbound MSI */
+	u8	res6[96];
+/* PCI/PCI Express inbound window 3-0
+ * inbound window 1 supports only a 32-bit base address and does not
+ * define an inbound window base extended address register.
+ */
+	struct pci_inbound_window_regs piw[4];
+
+	__be32	pex_err_dr;		/* 0x.e00 - PCI/PCIE error detect register */
+	u8	res21[4];
+	__be32	pex_err_en;		/* 0x.e08 - PCI/PCIE error interrupt enable register */
+	u8	res22[4];
+	__be32	pex_err_disr;		/* 0x.e10 - PCI/PCIE error disable register */
+	u8	res23[12];
+	__be32	pex_err_cap_stat;	/* 0x.e20 - PCI/PCIE error capture status register */
+	u8	res24[4];
+	__be32	pex_err_cap_r0;		/* 0x.e28 - PCIE error capture register 0 */
+	__be32	pex_err_cap_r1;		/* 0x.e2c - PCIE error capture register 0 */
+	__be32	pex_err_cap_r2;		/* 0x.e30 - PCIE error capture register 0 */
+	__be32	pex_err_cap_r3;		/* 0x.e34 - PCIE error capture register 0 */
+	u8	res_e38[200];
+	__be32	pdb_stat;		/* 0x.f00 - PCIE Debug Status */
+	u8	res_f04[16];
+	__be32	pex_csr0;		/* 0x.f14 - PEX Control/Status register 0*/
+#define PEX_CSR0_LTSSM_MASK	0xFC
+#define PEX_CSR0_LTSSM_SHIFT	2
+#define PEX_CSR0_LTSSM_L0	0x11
+	__be32	pex_csr1;		/* 0x.f18 - PEX Control/Status register 1*/
+	u8	res_f1c[228];
+
+};
+
+extern void fsl_pcibios_fixup_bus(struct pci_bus *bus);
+extern void fsl_pcibios_fixup_phb(struct pci_controller *phb);
+extern int mpc83xx_add_bridge(struct device_node *dev);
+u64 fsl_pci_immrbar_base(struct pci_controller *hose);
+
+extern struct device_node *fsl_pci_primary;
+
+#ifdef CONFIG_PCI
+void __init fsl_pci_assign_primary(void);
+#else
+static inline void fsl_pci_assign_primary(void) {}
+#endif
+
+#ifdef CONFIG_FSL_PCI
+extern int fsl_pci_mcheck_exception(struct pt_regs *);
+#else
+static inline int fsl_pci_mcheck_exception(struct pt_regs *regs) {return 0; }
+#endif
+
+#endif /* __POWERPC_FSL_PCI_H */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/sysdev/fsl_pmc.c b/arch/powerpc/sysdev/fsl_pmc.c
new file mode 100644
index 0000000000..9f6dd11c13
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_pmc.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Suspend/resume support
+ *
+ * Copyright 2009  MontaVista Software, Inc.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/suspend.h>
+#include <linux/delay.h>
+#include <linux/mod_devicetable.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+
+struct pmc_regs {
+	__be32 devdisr;
+	__be32 devdisr2;
+	__be32 :32;
+	__be32 :32;
+	__be32 pmcsr;
+#define PMCSR_SLP	(1 << 17)
+};
+
+static struct device *pmc_dev;
+static struct pmc_regs __iomem *pmc_regs;
+
+static int pmc_suspend_enter(suspend_state_t state)
+{
+	int ret;
+
+	setbits32(&pmc_regs->pmcsr, PMCSR_SLP);
+	/* At this point, the CPU is asleep. */
+
+	/* Upon resume, wait for SLP bit to be clear. */
+	ret = spin_event_timeout((in_be32(&pmc_regs->pmcsr) & PMCSR_SLP) == 0,
+				 10000, 10) ? 0 : -ETIMEDOUT;
+	if (ret)
+		dev_err(pmc_dev, "tired waiting for SLP bit to clear\n");
+	return ret;
+}
+
+static int pmc_suspend_valid(suspend_state_t state)
+{
+	if (state != PM_SUSPEND_STANDBY)
+		return 0;
+	return 1;
+}
+
+static const struct platform_suspend_ops pmc_suspend_ops = {
+	.valid = pmc_suspend_valid,
+	.enter = pmc_suspend_enter,
+};
+
+static int pmc_probe(struct platform_device *ofdev)
+{
+	pmc_regs = of_iomap(ofdev->dev.of_node, 0);
+	if (!pmc_regs)
+		return -ENOMEM;
+
+	pmc_dev = &ofdev->dev;
+	suspend_set_ops(&pmc_suspend_ops);
+	return 0;
+}
+
+static const struct of_device_id pmc_ids[] = {
+	{ .compatible = "fsl,mpc8548-pmc", },
+	{ .compatible = "fsl,mpc8641d-pmc", },
+	{ },
+};
+
+static struct platform_driver pmc_driver = {
+	.driver = {
+		.name = "fsl-pmc",
+		.of_match_table = pmc_ids,
+	},
+	.probe = pmc_probe,
+};
+
+builtin_platform_driver(pmc_driver);
diff --git a/arch/powerpc/sysdev/fsl_rcpm.c b/arch/powerpc/sysdev/fsl_rcpm.c
new file mode 100644
index 0000000000..aacd0be613
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_rcpm.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RCPM(Run Control/Power Management) support
+ *
+ * Copyright 2012-2015 Freescale Semiconductor Inc.
+ *
+ * Author: Chenhui Zhao <chenhui.zhao@freescale.com>
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/of_address.h>
+#include <linux/export.h>
+
+#include <asm/io.h>
+#include <linux/fsl/guts.h>
+#include <asm/cputhreads.h>
+#include <asm/fsl_pm.h>
+#include <asm/smp.h>
+
+static struct ccsr_rcpm_v1 __iomem *rcpm_v1_regs;
+static struct ccsr_rcpm_v2 __iomem *rcpm_v2_regs;
+static unsigned int fsl_supported_pm_modes;
+
+static void rcpm_v1_irq_mask(int cpu)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	unsigned int mask = 1 << hw_cpu;
+
+	setbits32(&rcpm_v1_regs->cpmimr, mask);
+	setbits32(&rcpm_v1_regs->cpmcimr, mask);
+	setbits32(&rcpm_v1_regs->cpmmcmr, mask);
+	setbits32(&rcpm_v1_regs->cpmnmimr, mask);
+}
+
+static void rcpm_v2_irq_mask(int cpu)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	unsigned int mask = 1 << hw_cpu;
+
+	setbits32(&rcpm_v2_regs->tpmimr0, mask);
+	setbits32(&rcpm_v2_regs->tpmcimr0, mask);
+	setbits32(&rcpm_v2_regs->tpmmcmr0, mask);
+	setbits32(&rcpm_v2_regs->tpmnmimr0, mask);
+}
+
+static void rcpm_v1_irq_unmask(int cpu)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	unsigned int mask = 1 << hw_cpu;
+
+	clrbits32(&rcpm_v1_regs->cpmimr, mask);
+	clrbits32(&rcpm_v1_regs->cpmcimr, mask);
+	clrbits32(&rcpm_v1_regs->cpmmcmr, mask);
+	clrbits32(&rcpm_v1_regs->cpmnmimr, mask);
+}
+
+static void rcpm_v2_irq_unmask(int cpu)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	unsigned int mask = 1 << hw_cpu;
+
+	clrbits32(&rcpm_v2_regs->tpmimr0, mask);
+	clrbits32(&rcpm_v2_regs->tpmcimr0, mask);
+	clrbits32(&rcpm_v2_regs->tpmmcmr0, mask);
+	clrbits32(&rcpm_v2_regs->tpmnmimr0, mask);
+}
+
+static void rcpm_v1_set_ip_power(bool enable, u32 mask)
+{
+	if (enable)
+		setbits32(&rcpm_v1_regs->ippdexpcr, mask);
+	else
+		clrbits32(&rcpm_v1_regs->ippdexpcr, mask);
+}
+
+static void rcpm_v2_set_ip_power(bool enable, u32 mask)
+{
+	if (enable)
+		setbits32(&rcpm_v2_regs->ippdexpcr[0], mask);
+	else
+		clrbits32(&rcpm_v2_regs->ippdexpcr[0], mask);
+}
+
+static void rcpm_v1_cpu_enter_state(int cpu, int state)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	unsigned int mask = 1 << hw_cpu;
+
+	switch (state) {
+	case E500_PM_PH10:
+		setbits32(&rcpm_v1_regs->cdozcr, mask);
+		break;
+	case E500_PM_PH15:
+		setbits32(&rcpm_v1_regs->cnapcr, mask);
+		break;
+	default:
+		pr_warn("Unknown cpu PM state (%d)\n", state);
+		break;
+	}
+}
+
+static void rcpm_v2_cpu_enter_state(int cpu, int state)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	u32 mask = 1 << cpu_core_index_of_thread(cpu);
+
+	switch (state) {
+	case E500_PM_PH10:
+		/* one bit corresponds to one thread for PH10 of 6500 */
+		setbits32(&rcpm_v2_regs->tph10setr0, 1 << hw_cpu);
+		break;
+	case E500_PM_PH15:
+		setbits32(&rcpm_v2_regs->pcph15setr, mask);
+		break;
+	case E500_PM_PH20:
+		setbits32(&rcpm_v2_regs->pcph20setr, mask);
+		break;
+	case E500_PM_PH30:
+		setbits32(&rcpm_v2_regs->pcph30setr, mask);
+		break;
+	default:
+		pr_warn("Unknown cpu PM state (%d)\n", state);
+	}
+}
+
+static void rcpm_v1_cpu_die(int cpu)
+{
+	rcpm_v1_cpu_enter_state(cpu, E500_PM_PH15);
+}
+
+#ifdef CONFIG_PPC64
+static void qoriq_disable_thread(int cpu)
+{
+	int thread = cpu_thread_in_core(cpu);
+
+	book3e_stop_thread(thread);
+}
+#endif
+
+static void rcpm_v2_cpu_die(int cpu)
+{
+#ifdef CONFIG_PPC64
+	int primary;
+
+	if (threads_per_core == 2) {
+		primary = cpu_first_thread_sibling(cpu);
+		if (cpu_is_offline(primary) && cpu_is_offline(primary + 1)) {
+			/* if both threads are offline, put the cpu in PH20 */
+			rcpm_v2_cpu_enter_state(cpu, E500_PM_PH20);
+		} else {
+			/* if only one thread is offline, disable the thread */
+			qoriq_disable_thread(cpu);
+		}
+	}
+#endif
+
+	if (threads_per_core == 1)
+		rcpm_v2_cpu_enter_state(cpu, E500_PM_PH20);
+}
+
+static void rcpm_v1_cpu_exit_state(int cpu, int state)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	unsigned int mask = 1 << hw_cpu;
+
+	switch (state) {
+	case E500_PM_PH10:
+		clrbits32(&rcpm_v1_regs->cdozcr, mask);
+		break;
+	case E500_PM_PH15:
+		clrbits32(&rcpm_v1_regs->cnapcr, mask);
+		break;
+	default:
+		pr_warn("Unknown cpu PM state (%d)\n", state);
+		break;
+	}
+}
+
+static void rcpm_v1_cpu_up_prepare(int cpu)
+{
+	rcpm_v1_cpu_exit_state(cpu, E500_PM_PH15);
+	rcpm_v1_irq_unmask(cpu);
+}
+
+static void rcpm_v2_cpu_exit_state(int cpu, int state)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+	u32 mask = 1 << cpu_core_index_of_thread(cpu);
+
+	switch (state) {
+	case E500_PM_PH10:
+		setbits32(&rcpm_v2_regs->tph10clrr0, 1 << hw_cpu);
+		break;
+	case E500_PM_PH15:
+		setbits32(&rcpm_v2_regs->pcph15clrr, mask);
+		break;
+	case E500_PM_PH20:
+		setbits32(&rcpm_v2_regs->pcph20clrr, mask);
+		break;
+	case E500_PM_PH30:
+		setbits32(&rcpm_v2_regs->pcph30clrr, mask);
+		break;
+	default:
+		pr_warn("Unknown cpu PM state (%d)\n", state);
+	}
+}
+
+static void rcpm_v2_cpu_up_prepare(int cpu)
+{
+	rcpm_v2_cpu_exit_state(cpu, E500_PM_PH20);
+	rcpm_v2_irq_unmask(cpu);
+}
+
+static int rcpm_v1_plat_enter_state(int state)
+{
+	u32 *pmcsr_reg = &rcpm_v1_regs->powmgtcsr;
+	int ret = 0;
+	int result;
+
+	switch (state) {
+	case PLAT_PM_SLEEP:
+		setbits32(pmcsr_reg, RCPM_POWMGTCSR_SLP);
+
+		/* Upon resume, wait for RCPM_POWMGTCSR_SLP bit to be clear. */
+		result = spin_event_timeout(
+		  !(in_be32(pmcsr_reg) & RCPM_POWMGTCSR_SLP), 10000, 10);
+		if (!result) {
+			pr_err("timeout waiting for SLP bit to be cleared\n");
+			ret = -ETIMEDOUT;
+		}
+		break;
+	default:
+		pr_warn("Unknown platform PM state (%d)", state);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int rcpm_v2_plat_enter_state(int state)
+{
+	u32 *pmcsr_reg = &rcpm_v2_regs->powmgtcsr;
+	int ret = 0;
+	int result;
+
+	switch (state) {
+	case PLAT_PM_LPM20:
+		/* clear previous LPM20 status */
+		setbits32(pmcsr_reg, RCPM_POWMGTCSR_P_LPM20_ST);
+		/* enter LPM20 status */
+		setbits32(pmcsr_reg, RCPM_POWMGTCSR_LPM20_RQ);
+
+		/* At this point, the device is in LPM20 status. */
+
+		/* resume ... */
+		result = spin_event_timeout(
+		  !(in_be32(pmcsr_reg) & RCPM_POWMGTCSR_LPM20_ST), 10000, 10);
+		if (!result) {
+			pr_err("timeout waiting for LPM20 bit to be cleared\n");
+			ret = -ETIMEDOUT;
+		}
+		break;
+	default:
+		pr_warn("Unknown platform PM state (%d)\n", state);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int rcpm_v1_plat_enter_sleep(void)
+{
+	return rcpm_v1_plat_enter_state(PLAT_PM_SLEEP);
+}
+
+static int rcpm_v2_plat_enter_sleep(void)
+{
+	return rcpm_v2_plat_enter_state(PLAT_PM_LPM20);
+}
+
+static void rcpm_common_freeze_time_base(u32 *tben_reg, int freeze)
+{
+	static u32 mask;
+
+	if (freeze) {
+		mask = in_be32(tben_reg);
+		clrbits32(tben_reg, mask);
+	} else {
+		setbits32(tben_reg, mask);
+	}
+
+	/* read back to push the previous write */
+	in_be32(tben_reg);
+}
+
+static void rcpm_v1_freeze_time_base(bool freeze)
+{
+	rcpm_common_freeze_time_base(&rcpm_v1_regs->ctbenr, freeze);
+}
+
+static void rcpm_v2_freeze_time_base(bool freeze)
+{
+	rcpm_common_freeze_time_base(&rcpm_v2_regs->pctbenr, freeze);
+}
+
+static unsigned int rcpm_get_pm_modes(void)
+{
+	return fsl_supported_pm_modes;
+}
+
+static const struct fsl_pm_ops qoriq_rcpm_v1_ops = {
+	.irq_mask = rcpm_v1_irq_mask,
+	.irq_unmask = rcpm_v1_irq_unmask,
+	.cpu_enter_state = rcpm_v1_cpu_enter_state,
+	.cpu_exit_state = rcpm_v1_cpu_exit_state,
+	.cpu_up_prepare = rcpm_v1_cpu_up_prepare,
+	.cpu_die = rcpm_v1_cpu_die,
+	.plat_enter_sleep = rcpm_v1_plat_enter_sleep,
+	.set_ip_power = rcpm_v1_set_ip_power,
+	.freeze_time_base = rcpm_v1_freeze_time_base,
+	.get_pm_modes = rcpm_get_pm_modes,
+};
+
+static const struct fsl_pm_ops qoriq_rcpm_v2_ops = {
+	.irq_mask = rcpm_v2_irq_mask,
+	.irq_unmask = rcpm_v2_irq_unmask,
+	.cpu_enter_state = rcpm_v2_cpu_enter_state,
+	.cpu_exit_state = rcpm_v2_cpu_exit_state,
+	.cpu_up_prepare = rcpm_v2_cpu_up_prepare,
+	.cpu_die = rcpm_v2_cpu_die,
+	.plat_enter_sleep = rcpm_v2_plat_enter_sleep,
+	.set_ip_power = rcpm_v2_set_ip_power,
+	.freeze_time_base = rcpm_v2_freeze_time_base,
+	.get_pm_modes = rcpm_get_pm_modes,
+};
+
+static const struct of_device_id rcpm_matches[] = {
+	{
+		.compatible = "fsl,qoriq-rcpm-1.0",
+		.data = &qoriq_rcpm_v1_ops,
+	},
+	{
+		.compatible = "fsl,qoriq-rcpm-2.0",
+		.data = &qoriq_rcpm_v2_ops,
+	},
+	{
+		.compatible = "fsl,qoriq-rcpm-2.1",
+		.data = &qoriq_rcpm_v2_ops,
+	},
+	{},
+};
+
+int __init fsl_rcpm_init(void)
+{
+	struct device_node *np;
+	const struct of_device_id *match;
+	void __iomem *base;
+
+	np = of_find_matching_node_and_match(NULL, rcpm_matches, &match);
+	if (!np)
+		return 0;
+
+	base = of_iomap(np, 0);
+	of_node_put(np);
+	if (!base) {
+		pr_err("of_iomap() error.\n");
+		return -ENOMEM;
+	}
+
+	rcpm_v1_regs = base;
+	rcpm_v2_regs = base;
+
+	/* support sleep by default */
+	fsl_supported_pm_modes = FSL_PM_SLEEP;
+
+	qoriq_pm_ops = match->data;
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
new file mode 100644
index 0000000000..f9b214b299
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -0,0 +1,763 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale MPC85xx/MPC86xx RapidIO support
+ *
+ * Copyright 2009 Sysgo AG
+ * Thomas Moll <thomas.moll@sysgo.com>
+ * - fixed maintenance access routines, check for aligned access
+ *
+ * Copyright 2009 Integrated Device Technology, Inc.
+ * Alex Bounine <alexandre.bounine@idt.com>
+ * - Added Port-Write message handling
+ * - Added Machine Check exception handling
+ *
+ * Copyright (C) 2007, 2008, 2010, 2011 Freescale Semiconductor, Inc.
+ * Zhang Wei <wei.zhang@freescale.com>
+ *
+ * Copyright 2005 MontaVista Software, Inc.
+ * Matt Porter <mporter@kernel.crashing.org>
+ */
+
+#include <linux/init.h>
+#include <linux/extable.h>
+#include <linux/types.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <asm/machdep.h>
+#include <asm/rio.h>
+
+#include "fsl_rio.h"
+
+#undef DEBUG_PW	/* Port-Write debugging */
+
+#define RIO_PORT1_EDCSR		0x0640
+#define RIO_PORT2_EDCSR		0x0680
+#define RIO_PORT1_IECSR		0x10130
+#define RIO_PORT2_IECSR		0x101B0
+
+#define RIO_GCCSR		0x13c
+#define RIO_ESCSR		0x158
+#define ESCSR_CLEAR		0x07120204
+#define RIO_PORT2_ESCSR		0x178
+#define RIO_CCSR		0x15c
+#define RIO_LTLEDCSR_IER	0x80000000
+#define RIO_LTLEDCSR_PRT	0x01000000
+#define IECSR_CLEAR		0x80000000
+#define RIO_ISR_AACR		0x10120
+#define RIO_ISR_AACR_AA		0x1	/* Accept All ID */
+
+#define RIWTAR_TRAD_VAL_SHIFT	12
+#define RIWTAR_TRAD_MASK	0x00FFFFFF
+#define RIWBAR_BADD_VAL_SHIFT	12
+#define RIWBAR_BADD_MASK	0x003FFFFF
+#define RIWAR_ENABLE		0x80000000
+#define RIWAR_TGINT_LOCAL	0x00F00000
+#define RIWAR_RDTYP_NO_SNOOP	0x00040000
+#define RIWAR_RDTYP_SNOOP	0x00050000
+#define RIWAR_WRTYP_NO_SNOOP	0x00004000
+#define RIWAR_WRTYP_SNOOP	0x00005000
+#define RIWAR_WRTYP_ALLOC	0x00006000
+#define RIWAR_SIZE_MASK		0x0000003F
+
+static DEFINE_SPINLOCK(fsl_rio_config_lock);
+
+#define ___fsl_read_rio_config(x, addr, err, op, barrier)	\
+	__asm__ __volatile__(				\
+		"1:	"op" %1,0(%2)\n"		\
+		"	"barrier"\n"			\
+		"2:\n"					\
+		".section .fixup,\"ax\"\n"		\
+		"3:	li %1,-1\n"			\
+		"	li %0,%3\n"			\
+		"	b 2b\n"				\
+		".previous\n"				\
+		EX_TABLE(1b, 3b)			\
+		: "=r" (err), "=r" (x)			\
+		: "b" (addr), "i" (-EFAULT), "0" (err))
+
+#ifdef CONFIG_BOOKE
+#define __fsl_read_rio_config(x, addr, err, op)	\
+	___fsl_read_rio_config(x, addr, err, op, "mbar")
+#else
+#define __fsl_read_rio_config(x, addr, err, op)	\
+	___fsl_read_rio_config(x, addr, err, op, "eieio")
+#endif
+
+void __iomem *rio_regs_win;
+void __iomem *rmu_regs_win;
+resource_size_t rio_law_start;
+
+struct fsl_rio_dbell *dbell;
+struct fsl_rio_pw *pw;
+
+#ifdef CONFIG_PPC_E500
+int fsl_rio_mcheck_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *entry;
+	unsigned long reason;
+
+	if (!rio_regs_win)
+		return 0;
+
+	reason = in_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR));
+	if (reason & (RIO_LTLEDCSR_IER | RIO_LTLEDCSR_PRT)) {
+		/* Check if we are prepared to handle this fault */
+		entry = search_exception_tables(regs->nip);
+		if (entry) {
+			pr_debug("RIO: %s - MC Exception handled\n",
+				 __func__);
+			out_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR),
+				 0);
+			regs_set_recoverable(regs);
+			regs_set_return_ip(regs, extable_fixup(entry));
+			return 1;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fsl_rio_mcheck_exception);
+#endif
+
+/**
+ * fsl_local_config_read - Generate a MPC85xx local config space read
+ * @mport: RapidIO master port info
+ * @index: ID of RapdiIO interface
+ * @offset: Offset into configuration space
+ * @len: Length (in bytes) of the maintenance transaction
+ * @data: Value to be read into
+ *
+ * Generates a MPC85xx local configuration space read. Returns %0 on
+ * success or %-EINVAL on failure.
+ */
+static int fsl_local_config_read(struct rio_mport *mport,
+				int index, u32 offset, int len, u32 *data)
+{
+	struct rio_priv *priv = mport->priv;
+	pr_debug("fsl_local_config_read: index %d offset %8.8x\n", index,
+		 offset);
+	*data = in_be32(priv->regs_win + offset);
+
+	return 0;
+}
+
+/**
+ * fsl_local_config_write - Generate a MPC85xx local config space write
+ * @mport: RapidIO master port info
+ * @index: ID of RapdiIO interface
+ * @offset: Offset into configuration space
+ * @len: Length (in bytes) of the maintenance transaction
+ * @data: Value to be written
+ *
+ * Generates a MPC85xx local configuration space write. Returns %0 on
+ * success or %-EINVAL on failure.
+ */
+static int fsl_local_config_write(struct rio_mport *mport,
+				int index, u32 offset, int len, u32 data)
+{
+	struct rio_priv *priv = mport->priv;
+	pr_debug
+		("fsl_local_config_write: index %d offset %8.8x data %8.8x\n",
+		index, offset, data);
+	out_be32(priv->regs_win + offset, data);
+
+	return 0;
+}
+
+/**
+ * fsl_rio_config_read - Generate a MPC85xx read maintenance transaction
+ * @mport: RapidIO master port info
+ * @index: ID of RapdiIO interface
+ * @destid: Destination ID of transaction
+ * @hopcount: Number of hops to target device
+ * @offset: Offset into configuration space
+ * @len: Length (in bytes) of the maintenance transaction
+ * @val: Location to be read into
+ *
+ * Generates a MPC85xx read maintenance transaction. Returns %0 on
+ * success or %-EINVAL on failure.
+ */
+static int
+fsl_rio_config_read(struct rio_mport *mport, int index, u16 destid,
+			u8 hopcount, u32 offset, int len, u32 *val)
+{
+	struct rio_priv *priv = mport->priv;
+	unsigned long flags;
+	u8 *data;
+	u32 rval, err = 0;
+
+	pr_debug
+		("fsl_rio_config_read:"
+		" index %d destid %d hopcount %d offset %8.8x len %d\n",
+		index, destid, hopcount, offset, len);
+
+	/* 16MB maintenance window possible */
+	/* allow only aligned access to maintenance registers */
+	if (offset > (0x1000000 - len) || !IS_ALIGNED(offset, len))
+		return -EINVAL;
+
+	spin_lock_irqsave(&fsl_rio_config_lock, flags);
+
+	out_be32(&priv->maint_atmu_regs->rowtar,
+		 (destid << 22) | (hopcount << 12) | (offset >> 12));
+	out_be32(&priv->maint_atmu_regs->rowtear, (destid >> 10));
+
+	data = (u8 *) priv->maint_win + (offset & (RIO_MAINT_WIN_SIZE - 1));
+	switch (len) {
+	case 1:
+		__fsl_read_rio_config(rval, data, err, "lbz");
+		break;
+	case 2:
+		__fsl_read_rio_config(rval, data, err, "lhz");
+		break;
+	case 4:
+		__fsl_read_rio_config(rval, data, err, "lwz");
+		break;
+	default:
+		spin_unlock_irqrestore(&fsl_rio_config_lock, flags);
+		return -EINVAL;
+	}
+
+	if (err) {
+		pr_debug("RIO: cfg_read error %d for %x:%x:%x\n",
+			 err, destid, hopcount, offset);
+	}
+
+	spin_unlock_irqrestore(&fsl_rio_config_lock, flags);
+	*val = rval;
+
+	return err;
+}
+
+/**
+ * fsl_rio_config_write - Generate a MPC85xx write maintenance transaction
+ * @mport: RapidIO master port info
+ * @index: ID of RapdiIO interface
+ * @destid: Destination ID of transaction
+ * @hopcount: Number of hops to target device
+ * @offset: Offset into configuration space
+ * @len: Length (in bytes) of the maintenance transaction
+ * @val: Value to be written
+ *
+ * Generates an MPC85xx write maintenance transaction. Returns %0 on
+ * success or %-EINVAL on failure.
+ */
+static int
+fsl_rio_config_write(struct rio_mport *mport, int index, u16 destid,
+			u8 hopcount, u32 offset, int len, u32 val)
+{
+	struct rio_priv *priv = mport->priv;
+	unsigned long flags;
+	u8 *data;
+	int ret = 0;
+
+	pr_debug
+		("fsl_rio_config_write:"
+		" index %d destid %d hopcount %d offset %8.8x len %d val %8.8x\n",
+		index, destid, hopcount, offset, len, val);
+
+	/* 16MB maintenance windows possible */
+	/* allow only aligned access to maintenance registers */
+	if (offset > (0x1000000 - len) || !IS_ALIGNED(offset, len))
+		return -EINVAL;
+
+	spin_lock_irqsave(&fsl_rio_config_lock, flags);
+
+	out_be32(&priv->maint_atmu_regs->rowtar,
+		 (destid << 22) | (hopcount << 12) | (offset >> 12));
+	out_be32(&priv->maint_atmu_regs->rowtear, (destid >> 10));
+
+	data = (u8 *) priv->maint_win + (offset & (RIO_MAINT_WIN_SIZE - 1));
+	switch (len) {
+	case 1:
+		out_8((u8 *) data, val);
+		break;
+	case 2:
+		out_be16((u16 *) data, val);
+		break;
+	case 4:
+		out_be32((u32 *) data, val);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	spin_unlock_irqrestore(&fsl_rio_config_lock, flags);
+
+	return ret;
+}
+
+static void fsl_rio_inbound_mem_init(struct rio_priv *priv)
+{
+	int i;
+
+	/* close inbound windows */
+	for (i = 0; i < RIO_INB_ATMU_COUNT; i++)
+		out_be32(&priv->inb_atmu_regs[i].riwar, 0);
+}
+
+static int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart,
+			   u64 rstart, u64 size, u32 flags)
+{
+	struct rio_priv *priv = mport->priv;
+	u32 base_size;
+	unsigned int base_size_log;
+	u64 win_start, win_end;
+	u32 riwar;
+	int i;
+
+	if ((size & (size - 1)) != 0 || size > 0x400000000ULL)
+		return -EINVAL;
+
+	base_size_log = ilog2(size);
+	base_size = 1 << base_size_log;
+
+	/* check if addresses are aligned with the window size */
+	if (lstart & (base_size - 1))
+		return -EINVAL;
+	if (rstart & (base_size - 1))
+		return -EINVAL;
+
+	/* check for conflicting ranges */
+	for (i = 0; i < RIO_INB_ATMU_COUNT; i++) {
+		riwar = in_be32(&priv->inb_atmu_regs[i].riwar);
+		if ((riwar & RIWAR_ENABLE) == 0)
+			continue;
+		win_start = ((u64)(in_be32(&priv->inb_atmu_regs[i].riwbar) & RIWBAR_BADD_MASK))
+			<< RIWBAR_BADD_VAL_SHIFT;
+		win_end = win_start + ((1 << ((riwar & RIWAR_SIZE_MASK) + 1)) - 1);
+		if (rstart < win_end && (rstart + size) > win_start)
+			return -EINVAL;
+	}
+
+	/* find unused atmu */
+	for (i = 0; i < RIO_INB_ATMU_COUNT; i++) {
+		riwar = in_be32(&priv->inb_atmu_regs[i].riwar);
+		if ((riwar & RIWAR_ENABLE) == 0)
+			break;
+	}
+	if (i >= RIO_INB_ATMU_COUNT)
+		return -ENOMEM;
+
+	out_be32(&priv->inb_atmu_regs[i].riwtar, lstart >> RIWTAR_TRAD_VAL_SHIFT);
+	out_be32(&priv->inb_atmu_regs[i].riwbar, rstart >> RIWBAR_BADD_VAL_SHIFT);
+	out_be32(&priv->inb_atmu_regs[i].riwar, RIWAR_ENABLE | RIWAR_TGINT_LOCAL |
+		RIWAR_RDTYP_SNOOP | RIWAR_WRTYP_SNOOP | (base_size_log - 1));
+
+	return 0;
+}
+
+static void fsl_unmap_inb_mem(struct rio_mport *mport, dma_addr_t lstart)
+{
+	u32 win_start_shift, base_start_shift;
+	struct rio_priv *priv = mport->priv;
+	u32 riwar, riwtar;
+	int i;
+
+	/* skip default window */
+	base_start_shift = lstart >> RIWTAR_TRAD_VAL_SHIFT;
+	for (i = 0; i < RIO_INB_ATMU_COUNT; i++) {
+		riwar = in_be32(&priv->inb_atmu_regs[i].riwar);
+		if ((riwar & RIWAR_ENABLE) == 0)
+			continue;
+
+		riwtar = in_be32(&priv->inb_atmu_regs[i].riwtar);
+		win_start_shift = riwtar & RIWTAR_TRAD_MASK;
+		if (win_start_shift == base_start_shift) {
+			out_be32(&priv->inb_atmu_regs[i].riwar, riwar & ~RIWAR_ENABLE);
+			return;
+		}
+	}
+}
+
+void fsl_rio_port_error_handler(int offset)
+{
+	/*XXX: Error recovery is not implemented, we just clear errors */
+	out_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR), 0);
+
+	if (offset == 0) {
+		out_be32((u32 *)(rio_regs_win + RIO_PORT1_EDCSR), 0);
+		out_be32((u32 *)(rio_regs_win + RIO_PORT1_IECSR), IECSR_CLEAR);
+		out_be32((u32 *)(rio_regs_win + RIO_ESCSR), ESCSR_CLEAR);
+	} else {
+		out_be32((u32 *)(rio_regs_win + RIO_PORT2_EDCSR), 0);
+		out_be32((u32 *)(rio_regs_win + RIO_PORT2_IECSR), IECSR_CLEAR);
+		out_be32((u32 *)(rio_regs_win + RIO_PORT2_ESCSR), ESCSR_CLEAR);
+	}
+}
+static inline void fsl_rio_info(struct device *dev, u32 ccsr)
+{
+	const char *str;
+	if (ccsr & 1) {
+		/* Serial phy */
+		switch (ccsr >> 30) {
+		case 0:
+			str = "1";
+			break;
+		case 1:
+			str = "4";
+			break;
+		default:
+			str = "Unknown";
+			break;
+		}
+		dev_info(dev, "Hardware port width: %s\n", str);
+
+		switch ((ccsr >> 27) & 7) {
+		case 0:
+			str = "Single-lane 0";
+			break;
+		case 1:
+			str = "Single-lane 2";
+			break;
+		case 2:
+			str = "Four-lane";
+			break;
+		default:
+			str = "Unknown";
+			break;
+		}
+		dev_info(dev, "Training connection status: %s\n", str);
+	} else {
+		/* Parallel phy */
+		if (!(ccsr & 0x80000000))
+			dev_info(dev, "Output port operating in 8-bit mode\n");
+		if (!(ccsr & 0x08000000))
+			dev_info(dev, "Input port operating in 8-bit mode\n");
+	}
+}
+
+/**
+ * fsl_rio_setup - Setup Freescale PowerPC RapidIO interface
+ * @dev: platform_device pointer
+ *
+ * Initializes MPC85xx RapidIO hardware interface, configures
+ * master port with system-specific info, and registers the
+ * master port with the RapidIO subsystem.
+ */
+static int fsl_rio_setup(struct platform_device *dev)
+{
+	struct rio_ops *ops;
+	struct rio_mport *port;
+	struct rio_priv *priv;
+	int rc = 0;
+	const u32 *port_index;
+	u32 active_ports = 0;
+	struct device_node *np, *rmu_node;
+	u32 ccsr;
+	u64 range_start;
+	u32 i;
+	static int tmp;
+	struct device_node *rmu_np[MAX_MSG_UNIT_NUM] = {NULL};
+
+	if (!dev->dev.of_node) {
+		dev_err(&dev->dev, "Device OF-Node is NULL");
+		return -ENODEV;
+	}
+
+	rio_regs_win = of_iomap(dev->dev.of_node, 0);
+	if (!rio_regs_win) {
+		dev_err(&dev->dev, "Unable to map rio register window\n");
+		rc = -ENOMEM;
+		goto err_rio_regs;
+	}
+
+	ops = kzalloc(sizeof(struct rio_ops), GFP_KERNEL);
+	if (!ops) {
+		rc = -ENOMEM;
+		goto err_ops;
+	}
+	ops->lcread = fsl_local_config_read;
+	ops->lcwrite = fsl_local_config_write;
+	ops->cread = fsl_rio_config_read;
+	ops->cwrite = fsl_rio_config_write;
+	ops->dsend = fsl_rio_doorbell_send;
+	ops->pwenable = fsl_rio_pw_enable;
+	ops->open_outb_mbox = fsl_open_outb_mbox;
+	ops->open_inb_mbox = fsl_open_inb_mbox;
+	ops->close_outb_mbox = fsl_close_outb_mbox;
+	ops->close_inb_mbox = fsl_close_inb_mbox;
+	ops->add_outb_message = fsl_add_outb_message;
+	ops->add_inb_buffer = fsl_add_inb_buffer;
+	ops->get_inb_message = fsl_get_inb_message;
+	ops->map_inb = fsl_map_inb_mem;
+	ops->unmap_inb = fsl_unmap_inb_mem;
+
+	rmu_node = of_parse_phandle(dev->dev.of_node, "fsl,srio-rmu-handle", 0);
+	if (!rmu_node) {
+		dev_err(&dev->dev, "No valid fsl,srio-rmu-handle property\n");
+		rc = -ENOENT;
+		goto err_rmu;
+	}
+	rmu_regs_win = of_iomap(rmu_node, 0);
+
+	of_node_put(rmu_node);
+	if (!rmu_regs_win) {
+		dev_err(&dev->dev, "Unable to map rmu register window\n");
+		rc = -ENOMEM;
+		goto err_rmu;
+	}
+	for_each_compatible_node(np, NULL, "fsl,srio-msg-unit") {
+		rmu_np[tmp] = np;
+		tmp++;
+	}
+
+	/*set up doobell node*/
+	np = of_find_compatible_node(NULL, NULL, "fsl,srio-dbell-unit");
+	if (!np) {
+		dev_err(&dev->dev, "No fsl,srio-dbell-unit node\n");
+		rc = -ENODEV;
+		goto err_dbell;
+	}
+	dbell = kzalloc(sizeof(struct fsl_rio_dbell), GFP_KERNEL);
+	if (!(dbell)) {
+		dev_err(&dev->dev, "Can't alloc memory for 'fsl_rio_dbell'\n");
+		rc = -ENOMEM;
+		goto err_dbell;
+	}
+	dbell->dev = &dev->dev;
+	dbell->bellirq = irq_of_parse_and_map(np, 1);
+	dev_info(&dev->dev, "bellirq: %d\n", dbell->bellirq);
+
+	if (of_property_read_reg(np, 0, &range_start, NULL)) {
+		pr_err("%pOF: unable to find 'reg' property\n",
+			np);
+		rc = -ENOMEM;
+		goto err_pw;
+	}
+	dbell->dbell_regs = (struct rio_dbell_regs *)(rmu_regs_win +
+				(u32)range_start);
+
+	/*set up port write node*/
+	np = of_find_compatible_node(NULL, NULL, "fsl,srio-port-write-unit");
+	if (!np) {
+		dev_err(&dev->dev, "No fsl,srio-port-write-unit node\n");
+		rc = -ENODEV;
+		goto err_pw;
+	}
+	pw = kzalloc(sizeof(struct fsl_rio_pw), GFP_KERNEL);
+	if (!(pw)) {
+		dev_err(&dev->dev, "Can't alloc memory for 'fsl_rio_pw'\n");
+		rc = -ENOMEM;
+		goto err_pw;
+	}
+	pw->dev = &dev->dev;
+	pw->pwirq = irq_of_parse_and_map(np, 0);
+	dev_info(&dev->dev, "pwirq: %d\n", pw->pwirq);
+	if (of_property_read_reg(np, 0, &range_start, NULL)) {
+		pr_err("%pOF: unable to find 'reg' property\n",
+			np);
+		rc = -ENOMEM;
+		goto err;
+	}
+	pw->pw_regs = (struct rio_pw_regs *)(rmu_regs_win + (u32)range_start);
+
+	/*set up ports node*/
+	for_each_child_of_node(dev->dev.of_node, np) {
+		struct resource res;
+
+		port_index = of_get_property(np, "cell-index", NULL);
+		if (!port_index) {
+			dev_err(&dev->dev, "Can't get %pOF property 'cell-index'\n",
+					np);
+			continue;
+		}
+
+		if (of_range_to_resource(np, 0, &res)) {
+			dev_err(&dev->dev, "Can't get %pOF property 'ranges'\n",
+					np);
+			continue;
+		}
+
+		dev_info(&dev->dev, "%pOF: LAW %pR\n",
+				np, &res);
+
+		port = kzalloc(sizeof(struct rio_mport), GFP_KERNEL);
+		if (!port)
+			continue;
+
+		rc = rio_mport_initialize(port);
+		if (rc) {
+			kfree(port);
+			continue;
+		}
+
+		i = *port_index - 1;
+		port->index = (unsigned char)i;
+
+		priv = kzalloc(sizeof(struct rio_priv), GFP_KERNEL);
+		if (!priv) {
+			dev_err(&dev->dev, "Can't alloc memory for 'priv'\n");
+			kfree(port);
+			continue;
+		}
+
+		INIT_LIST_HEAD(&port->dbells);
+		port->iores = res;	/* struct copy */
+		port->iores.name = "rio_io_win";
+
+		if (request_resource(&iomem_resource, &port->iores) < 0) {
+			dev_err(&dev->dev, "RIO: Error requesting master port region"
+				" 0x%016llx-0x%016llx\n",
+				(u64)port->iores.start, (u64)port->iores.end);
+				kfree(priv);
+				kfree(port);
+				continue;
+		}
+		sprintf(port->name, "RIO mport %d", i);
+
+		priv->dev = &dev->dev;
+		port->dev.parent = &dev->dev;
+		port->ops = ops;
+		port->priv = priv;
+		port->phys_efptr = 0x100;
+		port->phys_rmap = 1;
+		priv->regs_win = rio_regs_win;
+
+		ccsr = in_be32(priv->regs_win + RIO_CCSR + i*0x20);
+
+		/* Checking the port training status */
+		if (in_be32((priv->regs_win + RIO_ESCSR + i*0x20)) & 1) {
+			dev_err(&dev->dev, "Port %d is not ready. "
+			"Try to restart connection...\n", i);
+			/* Disable ports */
+			out_be32(priv->regs_win
+				+ RIO_CCSR + i*0x20, 0);
+			/* Set 1x lane */
+			setbits32(priv->regs_win
+				+ RIO_CCSR + i*0x20, 0x02000000);
+			/* Enable ports */
+			setbits32(priv->regs_win
+				+ RIO_CCSR + i*0x20, 0x00600000);
+			msleep(100);
+			if (in_be32((priv->regs_win
+					+ RIO_ESCSR + i*0x20)) & 1) {
+				dev_err(&dev->dev,
+					"Port %d restart failed.\n", i);
+				release_resource(&port->iores);
+				kfree(priv);
+				kfree(port);
+				continue;
+			}
+			dev_info(&dev->dev, "Port %d restart success!\n", i);
+		}
+		fsl_rio_info(&dev->dev, ccsr);
+
+		port->sys_size = (in_be32((priv->regs_win + RIO_PEF_CAR))
+					& RIO_PEF_CTLS) >> 4;
+		dev_info(&dev->dev, "RapidIO Common Transport System size: %d\n",
+				port->sys_size ? 65536 : 256);
+
+		if (port->host_deviceid >= 0)
+			out_be32(priv->regs_win + RIO_GCCSR, RIO_PORT_GEN_HOST |
+				RIO_PORT_GEN_MASTER | RIO_PORT_GEN_DISCOVERED);
+		else
+			out_be32(priv->regs_win + RIO_GCCSR,
+				RIO_PORT_GEN_MASTER);
+
+		priv->atmu_regs = (struct rio_atmu_regs *)(priv->regs_win
+			+ ((i == 0) ? RIO_ATMU_REGS_PORT1_OFFSET :
+			RIO_ATMU_REGS_PORT2_OFFSET));
+
+		priv->maint_atmu_regs = priv->atmu_regs + 1;
+		priv->inb_atmu_regs = (struct rio_inb_atmu_regs __iomem *)
+			(priv->regs_win +
+			((i == 0) ? RIO_INB_ATMU_REGS_PORT1_OFFSET :
+			RIO_INB_ATMU_REGS_PORT2_OFFSET));
+
+		/* Set to receive packets with any dest ID */
+		out_be32((priv->regs_win + RIO_ISR_AACR + i*0x80),
+			 RIO_ISR_AACR_AA);
+
+		/* Configure maintenance transaction window */
+		out_be32(&priv->maint_atmu_regs->rowbar,
+			port->iores.start >> 12);
+		out_be32(&priv->maint_atmu_regs->rowar,
+			 0x80077000 | (ilog2(RIO_MAINT_WIN_SIZE) - 1));
+
+		priv->maint_win = ioremap(port->iores.start,
+				RIO_MAINT_WIN_SIZE);
+
+		rio_law_start = range_start;
+
+		fsl_rio_setup_rmu(port, rmu_np[i]);
+		fsl_rio_inbound_mem_init(priv);
+
+		dbell->mport[i] = port;
+		pw->mport[i] = port;
+
+		if (rio_register_mport(port)) {
+			release_resource(&port->iores);
+			kfree(priv);
+			kfree(port);
+			continue;
+		}
+		active_ports++;
+	}
+
+	if (!active_ports) {
+		rc = -ENOLINK;
+		goto err;
+	}
+
+	fsl_rio_doorbell_init(dbell);
+	fsl_rio_port_write_init(pw);
+
+	return 0;
+err:
+	kfree(pw);
+	pw = NULL;
+err_pw:
+	kfree(dbell);
+	dbell = NULL;
+err_dbell:
+	iounmap(rmu_regs_win);
+	rmu_regs_win = NULL;
+err_rmu:
+	kfree(ops);
+err_ops:
+	iounmap(rio_regs_win);
+	rio_regs_win = NULL;
+err_rio_regs:
+	return rc;
+}
+
+/* The probe function for RapidIO peer-to-peer network.
+ */
+static int fsl_of_rio_rpn_probe(struct platform_device *dev)
+{
+	printk(KERN_INFO "Setting up RapidIO peer-to-peer network %pOF\n",
+			dev->dev.of_node);
+
+	return fsl_rio_setup(dev);
+};
+
+static const struct of_device_id fsl_of_rio_rpn_ids[] = {
+	{
+		.compatible = "fsl,srio",
+	},
+	{},
+};
+
+static struct platform_driver fsl_of_rio_rpn_driver = {
+	.driver = {
+		.name = "fsl-of-rio",
+		.of_match_table = fsl_of_rio_rpn_ids,
+	},
+	.probe = fsl_of_rio_rpn_probe,
+};
+
+static __init int fsl_of_rio_rpn_init(void)
+{
+	return platform_driver_register(&fsl_of_rio_rpn_driver);
+}
+
+subsys_initcall(fsl_of_rio_rpn_init);
diff --git a/arch/powerpc/sysdev/fsl_rio.h b/arch/powerpc/sysdev/fsl_rio.h
new file mode 100644
index 0000000000..c526b7237a
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_rio.h
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Freescale MPC85xx/MPC86xx RapidIO support
+ *
+ * Copyright 2009 Sysgo AG
+ * Thomas Moll <thomas.moll@sysgo.com>
+ * - fixed maintenance access routines, check for aligned access
+ *
+ * Copyright 2009 Integrated Device Technology, Inc.
+ * Alex Bounine <alexandre.bounine@idt.com>
+ * - Added Port-Write message handling
+ * - Added Machine Check exception handling
+ *
+ * Copyright (C) 2007, 2008, 2010, 2011 Freescale Semiconductor, Inc.
+ * Zhang Wei <wei.zhang@freescale.com>
+ * Lian Minghuan-B31939 <Minghuan.Lian@freescale.com>
+ * Liu Gang <Gang.Liu@freescale.com>
+ *
+ * Copyright 2005 MontaVista Software, Inc.
+ * Matt Porter <mporter@kernel.crashing.org>
+ */
+
+#ifndef __FSL_RIO_H
+#define __FSL_RIO_H
+
+#include <linux/rio.h>
+#include <linux/rio_drv.h>
+#include <linux/kfifo.h>
+
+#define RIO_REGS_WIN(mport)	(((struct rio_priv *)(mport->priv))->regs_win)
+
+#define RIO_MAINT_WIN_SIZE	0x400000
+#define RIO_LTLEDCSR		0x0608
+
+#define DOORBELL_ROWAR_EN	0x80000000
+#define DOORBELL_ROWAR_TFLOWLV	0x08000000 /* highest priority level */
+#define DOORBELL_ROWAR_PCI	0x02000000 /* PCI window */
+#define DOORBELL_ROWAR_NREAD	0x00040000 /* NREAD */
+#define DOORBELL_ROWAR_MAINTRD	0x00070000  /* maintenance read */
+#define DOORBELL_ROWAR_RES	0x00002000 /* wrtpy: reserved */
+#define DOORBELL_ROWAR_MAINTWD	0x00007000
+#define DOORBELL_ROWAR_SIZE	0x0000000b /* window size is 4k */
+
+#define RIO_ATMU_REGS_PORT1_OFFSET	0x10c00
+#define RIO_ATMU_REGS_PORT2_OFFSET	0x10e00
+#define RIO_S_DBELL_REGS_OFFSET	0x13400
+#define RIO_S_PW_REGS_OFFSET	0x134e0
+#define RIO_ATMU_REGS_DBELL_OFFSET	0x10C40
+#define RIO_INB_ATMU_REGS_PORT1_OFFSET 0x10d60
+#define RIO_INB_ATMU_REGS_PORT2_OFFSET 0x10f60
+
+#define MAX_MSG_UNIT_NUM	2
+#define MAX_PORT_NUM		4
+#define RIO_INB_ATMU_COUNT	4
+
+struct rio_atmu_regs {
+	 u32 rowtar;
+	 u32 rowtear;
+	 u32 rowbar;
+	 u32 pad1;
+	 u32 rowar;
+	 u32 pad2[3];
+};
+
+struct rio_inb_atmu_regs {
+	u32 riwtar;
+	u32 pad1;
+	u32 riwbar;
+	u32 pad2;
+	u32 riwar;
+	u32 pad3[3];
+};
+
+struct rio_dbell_ring {
+	void *virt;
+	dma_addr_t phys;
+};
+
+struct rio_port_write_msg {
+	 void *virt;
+	 dma_addr_t phys;
+	 u32 msg_count;
+	 u32 err_count;
+	 u32 discard_count;
+};
+
+struct fsl_rio_dbell {
+	struct rio_mport *mport[MAX_PORT_NUM];
+	struct device *dev;
+	struct rio_dbell_regs __iomem *dbell_regs;
+	struct rio_dbell_ring dbell_ring;
+	int bellirq;
+};
+
+struct fsl_rio_pw {
+	struct rio_mport *mport[MAX_PORT_NUM];
+	struct device *dev;
+	struct rio_pw_regs __iomem *pw_regs;
+	struct rio_port_write_msg port_write_msg;
+	int pwirq;
+	struct work_struct pw_work;
+	struct kfifo pw_fifo;
+	spinlock_t pw_fifo_lock;
+};
+
+struct rio_priv {
+	struct device *dev;
+	void __iomem *regs_win;
+	struct rio_atmu_regs __iomem *atmu_regs;
+	struct rio_atmu_regs __iomem *maint_atmu_regs;
+	struct rio_inb_atmu_regs __iomem *inb_atmu_regs;
+	void __iomem *maint_win;
+	void *rmm_handle; /* RapidIO message manager(unit) Handle */
+};
+
+extern void __iomem *rio_regs_win;
+extern void __iomem *rmu_regs_win;
+
+extern resource_size_t rio_law_start;
+
+extern struct fsl_rio_dbell *dbell;
+extern struct fsl_rio_pw *pw;
+
+extern int fsl_rio_setup_rmu(struct rio_mport *mport,
+	struct device_node *node);
+extern int fsl_rio_port_write_init(struct fsl_rio_pw *pw);
+extern int fsl_rio_pw_enable(struct rio_mport *mport, int enable);
+extern void fsl_rio_port_error_handler(int offset);
+extern int fsl_rio_doorbell_init(struct fsl_rio_dbell *dbell);
+
+extern int fsl_rio_doorbell_send(struct rio_mport *mport,
+				int index, u16 destid, u16 data);
+extern int fsl_add_outb_message(struct rio_mport *mport,
+	struct rio_dev *rdev,
+	int mbox, void *buffer, size_t len);
+extern int fsl_open_outb_mbox(struct rio_mport *mport,
+	void *dev_id, int mbox, int entries);
+extern void fsl_close_outb_mbox(struct rio_mport *mport, int mbox);
+extern int fsl_open_inb_mbox(struct rio_mport *mport,
+	void *dev_id, int mbox, int entries);
+extern void fsl_close_inb_mbox(struct rio_mport *mport, int mbox);
+extern int fsl_add_inb_buffer(struct rio_mport *mport, int mbox, void *buf);
+extern void *fsl_get_inb_message(struct rio_mport *mport, int mbox);
+
+#endif
diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c
new file mode 100644
index 0000000000..f956591cb6
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_rmu.c
@@ -0,0 +1,1107 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale MPC85xx/MPC86xx RapidIO RMU support
+ *
+ * Copyright 2009 Sysgo AG
+ * Thomas Moll <thomas.moll@sysgo.com>
+ * - fixed maintenance access routines, check for aligned access
+ *
+ * Copyright 2009 Integrated Device Technology, Inc.
+ * Alex Bounine <alexandre.bounine@idt.com>
+ * - Added Port-Write message handling
+ * - Added Machine Check exception handling
+ *
+ * Copyright (C) 2007, 2008, 2010, 2011 Freescale Semiconductor, Inc.
+ * Zhang Wei <wei.zhang@freescale.com>
+ * Lian Minghuan-B31939 <Minghuan.Lian@freescale.com>
+ * Liu Gang <Gang.Liu@freescale.com>
+ *
+ * Copyright 2005 MontaVista Software, Inc.
+ * Matt Porter <mporter@kernel.crashing.org>
+ */
+
+#include <linux/types.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/slab.h>
+
+#include "fsl_rio.h"
+
+#define GET_RMM_HANDLE(mport) \
+		(((struct rio_priv *)(mport->priv))->rmm_handle)
+
+/* RapidIO definition irq, which read from OF-tree */
+#define IRQ_RIO_PW(m)		(((struct fsl_rio_pw *)(m))->pwirq)
+#define IRQ_RIO_BELL(m) (((struct fsl_rio_dbell *)(m))->bellirq)
+#define IRQ_RIO_TX(m) (((struct fsl_rmu *)(GET_RMM_HANDLE(m)))->txirq)
+#define IRQ_RIO_RX(m) (((struct fsl_rmu *)(GET_RMM_HANDLE(m)))->rxirq)
+
+#define RIO_MIN_TX_RING_SIZE	2
+#define RIO_MAX_TX_RING_SIZE	2048
+#define RIO_MIN_RX_RING_SIZE	2
+#define RIO_MAX_RX_RING_SIZE	2048
+
+#define RIO_IPWMR_SEN		0x00100000
+#define RIO_IPWMR_QFIE		0x00000100
+#define RIO_IPWMR_EIE		0x00000020
+#define RIO_IPWMR_CQ		0x00000002
+#define RIO_IPWMR_PWE		0x00000001
+
+#define RIO_IPWSR_QF		0x00100000
+#define RIO_IPWSR_TE		0x00000080
+#define RIO_IPWSR_QFI		0x00000010
+#define RIO_IPWSR_PWD		0x00000008
+#define RIO_IPWSR_PWB		0x00000004
+
+#define RIO_EPWISR		0x10010
+/* EPWISR Error match value */
+#define RIO_EPWISR_PINT1	0x80000000
+#define RIO_EPWISR_PINT2	0x40000000
+#define RIO_EPWISR_MU		0x00000002
+#define RIO_EPWISR_PW		0x00000001
+
+#define IPWSR_CLEAR		0x98
+#define OMSR_CLEAR		0x1cb3
+#define IMSR_CLEAR		0x491
+#define IDSR_CLEAR		0x91
+#define ODSR_CLEAR		0x1c00
+#define LTLEECSR_ENABLE_ALL	0xFFC000FC
+#define RIO_LTLEECSR		0x060c
+
+#define RIO_IM0SR		0x64
+#define RIO_IM1SR		0x164
+#define RIO_OM0SR		0x4
+#define RIO_OM1SR		0x104
+
+#define RIO_DBELL_WIN_SIZE	0x1000
+
+#define RIO_MSG_OMR_MUI		0x00000002
+#define RIO_MSG_OSR_TE		0x00000080
+#define RIO_MSG_OSR_QOI		0x00000020
+#define RIO_MSG_OSR_QFI		0x00000010
+#define RIO_MSG_OSR_MUB		0x00000004
+#define RIO_MSG_OSR_EOMI	0x00000002
+#define RIO_MSG_OSR_QEI		0x00000001
+
+#define RIO_MSG_IMR_MI		0x00000002
+#define RIO_MSG_ISR_TE		0x00000080
+#define RIO_MSG_ISR_QFI		0x00000010
+#define RIO_MSG_ISR_DIQI	0x00000001
+
+#define RIO_MSG_DESC_SIZE	32
+#define RIO_MSG_BUFFER_SIZE	4096
+
+#define DOORBELL_DMR_DI		0x00000002
+#define DOORBELL_DSR_TE		0x00000080
+#define DOORBELL_DSR_QFI	0x00000010
+#define DOORBELL_DSR_DIQI	0x00000001
+
+#define DOORBELL_MESSAGE_SIZE	0x08
+
+static DEFINE_SPINLOCK(fsl_rio_doorbell_lock);
+
+struct rio_msg_regs {
+	u32 omr;
+	u32 osr;
+	u32 pad1;
+	u32 odqdpar;
+	u32 pad2;
+	u32 osar;
+	u32 odpr;
+	u32 odatr;
+	u32 odcr;
+	u32 pad3;
+	u32 odqepar;
+	u32 pad4[13];
+	u32 imr;
+	u32 isr;
+	u32 pad5;
+	u32 ifqdpar;
+	u32 pad6;
+	u32 ifqepar;
+};
+
+struct rio_dbell_regs {
+	u32 odmr;
+	u32 odsr;
+	u32 pad1[4];
+	u32 oddpr;
+	u32 oddatr;
+	u32 pad2[3];
+	u32 odretcr;
+	u32 pad3[12];
+	u32 dmr;
+	u32 dsr;
+	u32 pad4;
+	u32 dqdpar;
+	u32 pad5;
+	u32 dqepar;
+};
+
+struct rio_pw_regs {
+	u32 pwmr;
+	u32 pwsr;
+	u32 epwqbar;
+	u32 pwqbar;
+};
+
+
+struct rio_tx_desc {
+	u32 pad1;
+	u32 saddr;
+	u32 dport;
+	u32 dattr;
+	u32 pad2;
+	u32 pad3;
+	u32 dwcnt;
+	u32 pad4;
+};
+
+struct rio_msg_tx_ring {
+	void *virt;
+	dma_addr_t phys;
+	void *virt_buffer[RIO_MAX_TX_RING_SIZE];
+	dma_addr_t phys_buffer[RIO_MAX_TX_RING_SIZE];
+	int tx_slot;
+	int size;
+	void *dev_id;
+};
+
+struct rio_msg_rx_ring {
+	void *virt;
+	dma_addr_t phys;
+	void *virt_buffer[RIO_MAX_RX_RING_SIZE];
+	int rx_slot;
+	int size;
+	void *dev_id;
+};
+
+struct fsl_rmu {
+	struct rio_msg_regs __iomem *msg_regs;
+	struct rio_msg_tx_ring msg_tx_ring;
+	struct rio_msg_rx_ring msg_rx_ring;
+	int txirq;
+	int rxirq;
+};
+
+struct rio_dbell_msg {
+	u16 pad1;
+	u16 tid;
+	u16 sid;
+	u16 info;
+};
+
+/**
+ * fsl_rio_tx_handler - MPC85xx outbound message interrupt handler
+ * @irq: Linux interrupt number
+ * @dev_instance: Pointer to interrupt-specific data
+ *
+ * Handles outbound message interrupts. Executes a register outbound
+ * mailbox event handler and acks the interrupt occurrence.
+ */
+static irqreturn_t
+fsl_rio_tx_handler(int irq, void *dev_instance)
+{
+	int osr;
+	struct rio_mport *port = (struct rio_mport *)dev_instance;
+	struct fsl_rmu *rmu = GET_RMM_HANDLE(port);
+
+	osr = in_be32(&rmu->msg_regs->osr);
+
+	if (osr & RIO_MSG_OSR_TE) {
+		pr_info("RIO: outbound message transmission error\n");
+		out_be32(&rmu->msg_regs->osr, RIO_MSG_OSR_TE);
+		goto out;
+	}
+
+	if (osr & RIO_MSG_OSR_QOI) {
+		pr_info("RIO: outbound message queue overflow\n");
+		out_be32(&rmu->msg_regs->osr, RIO_MSG_OSR_QOI);
+		goto out;
+	}
+
+	if (osr & RIO_MSG_OSR_EOMI) {
+		u32 dqp = in_be32(&rmu->msg_regs->odqdpar);
+		int slot = (dqp - rmu->msg_tx_ring.phys) >> 5;
+		if (port->outb_msg[0].mcback != NULL) {
+			port->outb_msg[0].mcback(port, rmu->msg_tx_ring.dev_id,
+					-1,
+					slot);
+		}
+		/* Ack the end-of-message interrupt */
+		out_be32(&rmu->msg_regs->osr, RIO_MSG_OSR_EOMI);
+	}
+
+out:
+	return IRQ_HANDLED;
+}
+
+/**
+ * fsl_rio_rx_handler - MPC85xx inbound message interrupt handler
+ * @irq: Linux interrupt number
+ * @dev_instance: Pointer to interrupt-specific data
+ *
+ * Handles inbound message interrupts. Executes a registered inbound
+ * mailbox event handler and acks the interrupt occurrence.
+ */
+static irqreturn_t
+fsl_rio_rx_handler(int irq, void *dev_instance)
+{
+	int isr;
+	struct rio_mport *port = (struct rio_mport *)dev_instance;
+	struct fsl_rmu *rmu = GET_RMM_HANDLE(port);
+
+	isr = in_be32(&rmu->msg_regs->isr);
+
+	if (isr & RIO_MSG_ISR_TE) {
+		pr_info("RIO: inbound message reception error\n");
+		out_be32((void *)&rmu->msg_regs->isr, RIO_MSG_ISR_TE);
+		goto out;
+	}
+
+	/* XXX Need to check/dispatch until queue empty */
+	if (isr & RIO_MSG_ISR_DIQI) {
+		/*
+		* Can receive messages for any mailbox/letter to that
+		* mailbox destination. So, make the callback with an
+		* unknown/invalid mailbox number argument.
+		*/
+		if (port->inb_msg[0].mcback != NULL)
+			port->inb_msg[0].mcback(port, rmu->msg_rx_ring.dev_id,
+				-1,
+				-1);
+
+		/* Ack the queueing interrupt */
+		out_be32(&rmu->msg_regs->isr, RIO_MSG_ISR_DIQI);
+	}
+
+out:
+	return IRQ_HANDLED;
+}
+
+/**
+ * fsl_rio_dbell_handler - MPC85xx doorbell interrupt handler
+ * @irq: Linux interrupt number
+ * @dev_instance: Pointer to interrupt-specific data
+ *
+ * Handles doorbell interrupts. Parses a list of registered
+ * doorbell event handlers and executes a matching event handler.
+ */
+static irqreturn_t
+fsl_rio_dbell_handler(int irq, void *dev_instance)
+{
+	int dsr;
+	struct fsl_rio_dbell *fsl_dbell = (struct fsl_rio_dbell *)dev_instance;
+	int i;
+
+	dsr = in_be32(&fsl_dbell->dbell_regs->dsr);
+
+	if (dsr & DOORBELL_DSR_TE) {
+		pr_info("RIO: doorbell reception error\n");
+		out_be32(&fsl_dbell->dbell_regs->dsr, DOORBELL_DSR_TE);
+		goto out;
+	}
+
+	if (dsr & DOORBELL_DSR_QFI) {
+		pr_info("RIO: doorbell queue full\n");
+		out_be32(&fsl_dbell->dbell_regs->dsr, DOORBELL_DSR_QFI);
+	}
+
+	/* XXX Need to check/dispatch until queue empty */
+	if (dsr & DOORBELL_DSR_DIQI) {
+		struct rio_dbell_msg *dmsg =
+			fsl_dbell->dbell_ring.virt +
+			(in_be32(&fsl_dbell->dbell_regs->dqdpar) & 0xfff);
+		struct rio_dbell *dbell;
+		int found = 0;
+
+		pr_debug
+			("RIO: processing doorbell,"
+			" sid %2.2x tid %2.2x info %4.4x\n",
+			dmsg->sid, dmsg->tid, dmsg->info);
+
+		for (i = 0; i < MAX_PORT_NUM; i++) {
+			if (fsl_dbell->mport[i]) {
+				list_for_each_entry(dbell,
+					&fsl_dbell->mport[i]->dbells, node) {
+					if ((dbell->res->start
+						<= dmsg->info)
+						&& (dbell->res->end
+						>= dmsg->info)) {
+						found = 1;
+						break;
+					}
+				}
+				if (found && dbell->dinb) {
+					dbell->dinb(fsl_dbell->mport[i],
+						dbell->dev_id, dmsg->sid,
+						dmsg->tid,
+						dmsg->info);
+					break;
+				}
+			}
+		}
+
+		if (!found) {
+			pr_debug
+				("RIO: spurious doorbell,"
+				" sid %2.2x tid %2.2x info %4.4x\n",
+				dmsg->sid, dmsg->tid,
+				dmsg->info);
+		}
+		setbits32(&fsl_dbell->dbell_regs->dmr, DOORBELL_DMR_DI);
+		out_be32(&fsl_dbell->dbell_regs->dsr, DOORBELL_DSR_DIQI);
+	}
+
+out:
+	return IRQ_HANDLED;
+}
+
+static void msg_unit_error_handler(void)
+{
+
+	/*XXX: Error recovery is not implemented, we just clear errors */
+	out_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR), 0);
+
+	out_be32((u32 *)(rmu_regs_win + RIO_IM0SR), IMSR_CLEAR);
+	out_be32((u32 *)(rmu_regs_win + RIO_IM1SR), IMSR_CLEAR);
+	out_be32((u32 *)(rmu_regs_win + RIO_OM0SR), OMSR_CLEAR);
+	out_be32((u32 *)(rmu_regs_win + RIO_OM1SR), OMSR_CLEAR);
+
+	out_be32(&dbell->dbell_regs->odsr, ODSR_CLEAR);
+	out_be32(&dbell->dbell_regs->dsr, IDSR_CLEAR);
+
+	out_be32(&pw->pw_regs->pwsr, IPWSR_CLEAR);
+}
+
+/**
+ * fsl_rio_port_write_handler - MPC85xx port write interrupt handler
+ * @irq: Linux interrupt number
+ * @dev_instance: Pointer to interrupt-specific data
+ *
+ * Handles port write interrupts. Parses a list of registered
+ * port write event handlers and executes a matching event handler.
+ */
+static irqreturn_t
+fsl_rio_port_write_handler(int irq, void *dev_instance)
+{
+	u32 ipwmr, ipwsr;
+	struct fsl_rio_pw *pw = (struct fsl_rio_pw *)dev_instance;
+	u32 epwisr, tmp;
+
+	epwisr = in_be32(rio_regs_win + RIO_EPWISR);
+	if (!(epwisr & RIO_EPWISR_PW))
+		goto pw_done;
+
+	ipwmr = in_be32(&pw->pw_regs->pwmr);
+	ipwsr = in_be32(&pw->pw_regs->pwsr);
+
+#ifdef DEBUG_PW
+	pr_debug("PW Int->IPWMR: 0x%08x IPWSR: 0x%08x (", ipwmr, ipwsr);
+	if (ipwsr & RIO_IPWSR_QF)
+		pr_debug(" QF");
+	if (ipwsr & RIO_IPWSR_TE)
+		pr_debug(" TE");
+	if (ipwsr & RIO_IPWSR_QFI)
+		pr_debug(" QFI");
+	if (ipwsr & RIO_IPWSR_PWD)
+		pr_debug(" PWD");
+	if (ipwsr & RIO_IPWSR_PWB)
+		pr_debug(" PWB");
+	pr_debug(" )\n");
+#endif
+	/* Schedule deferred processing if PW was received */
+	if (ipwsr & RIO_IPWSR_QFI) {
+		/* Save PW message (if there is room in FIFO),
+		 * otherwise discard it.
+		 */
+		if (kfifo_avail(&pw->pw_fifo) >= RIO_PW_MSG_SIZE) {
+			pw->port_write_msg.msg_count++;
+			kfifo_in(&pw->pw_fifo, pw->port_write_msg.virt,
+				 RIO_PW_MSG_SIZE);
+		} else {
+			pw->port_write_msg.discard_count++;
+			pr_debug("RIO: ISR Discarded Port-Write Msg(s) (%d)\n",
+				 pw->port_write_msg.discard_count);
+		}
+		/* Clear interrupt and issue Clear Queue command. This allows
+		 * another port-write to be received.
+		 */
+		out_be32(&pw->pw_regs->pwsr,	RIO_IPWSR_QFI);
+		out_be32(&pw->pw_regs->pwmr, ipwmr | RIO_IPWMR_CQ);
+
+		schedule_work(&pw->pw_work);
+	}
+
+	if ((ipwmr & RIO_IPWMR_EIE) && (ipwsr & RIO_IPWSR_TE)) {
+		pw->port_write_msg.err_count++;
+		pr_debug("RIO: Port-Write Transaction Err (%d)\n",
+			 pw->port_write_msg.err_count);
+		/* Clear Transaction Error: port-write controller should be
+		 * disabled when clearing this error
+		 */
+		out_be32(&pw->pw_regs->pwmr, ipwmr & ~RIO_IPWMR_PWE);
+		out_be32(&pw->pw_regs->pwsr,	RIO_IPWSR_TE);
+		out_be32(&pw->pw_regs->pwmr, ipwmr);
+	}
+
+	if (ipwsr & RIO_IPWSR_PWD) {
+		pw->port_write_msg.discard_count++;
+		pr_debug("RIO: Port Discarded Port-Write Msg(s) (%d)\n",
+			 pw->port_write_msg.discard_count);
+		out_be32(&pw->pw_regs->pwsr, RIO_IPWSR_PWD);
+	}
+
+pw_done:
+	if (epwisr & RIO_EPWISR_PINT1) {
+		tmp = in_be32(rio_regs_win + RIO_LTLEDCSR);
+		pr_debug("RIO_LTLEDCSR = 0x%x\n", tmp);
+		fsl_rio_port_error_handler(0);
+	}
+
+	if (epwisr & RIO_EPWISR_PINT2) {
+		tmp = in_be32(rio_regs_win + RIO_LTLEDCSR);
+		pr_debug("RIO_LTLEDCSR = 0x%x\n", tmp);
+		fsl_rio_port_error_handler(1);
+	}
+
+	if (epwisr & RIO_EPWISR_MU) {
+		tmp = in_be32(rio_regs_win + RIO_LTLEDCSR);
+		pr_debug("RIO_LTLEDCSR = 0x%x\n", tmp);
+		msg_unit_error_handler();
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void fsl_pw_dpc(struct work_struct *work)
+{
+	struct fsl_rio_pw *pw = container_of(work, struct fsl_rio_pw, pw_work);
+	union rio_pw_msg msg_buffer;
+	int i;
+
+	/*
+	 * Process port-write messages
+	 */
+	while (kfifo_out_spinlocked(&pw->pw_fifo, (unsigned char *)&msg_buffer,
+			 RIO_PW_MSG_SIZE, &pw->pw_fifo_lock)) {
+#ifdef DEBUG_PW
+		{
+		u32 i;
+		pr_debug("%s : Port-Write Message:", __func__);
+		for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32); i++) {
+			if ((i%4) == 0)
+				pr_debug("\n0x%02x: 0x%08x", i*4,
+					 msg_buffer.raw[i]);
+			else
+				pr_debug(" 0x%08x", msg_buffer.raw[i]);
+		}
+		pr_debug("\n");
+		}
+#endif
+		/* Pass the port-write message to RIO core for processing */
+		for (i = 0; i < MAX_PORT_NUM; i++) {
+			if (pw->mport[i])
+				rio_inb_pwrite_handler(pw->mport[i],
+						       &msg_buffer);
+		}
+	}
+}
+
+/**
+ * fsl_rio_pw_enable - enable/disable port-write interface init
+ * @mport: Master port implementing the port write unit
+ * @enable:    1=enable; 0=disable port-write message handling
+ */
+int fsl_rio_pw_enable(struct rio_mport *mport, int enable)
+{
+	u32 rval;
+
+	rval = in_be32(&pw->pw_regs->pwmr);
+
+	if (enable)
+		rval |= RIO_IPWMR_PWE;
+	else
+		rval &= ~RIO_IPWMR_PWE;
+
+	out_be32(&pw->pw_regs->pwmr, rval);
+
+	return 0;
+}
+
+/**
+ * fsl_rio_port_write_init - MPC85xx port write interface init
+ * @mport: Master port implementing the port write unit
+ *
+ * Initializes port write unit hardware and DMA buffer
+ * ring. Called from fsl_rio_setup(). Returns %0 on success
+ * or %-ENOMEM on failure.
+ */
+
+int fsl_rio_port_write_init(struct fsl_rio_pw *pw)
+{
+	int rc = 0;
+
+	/* Following configurations require a disabled port write controller */
+	out_be32(&pw->pw_regs->pwmr,
+		 in_be32(&pw->pw_regs->pwmr) & ~RIO_IPWMR_PWE);
+
+	/* Initialize port write */
+	pw->port_write_msg.virt = dma_alloc_coherent(pw->dev,
+					RIO_PW_MSG_SIZE,
+					&pw->port_write_msg.phys, GFP_KERNEL);
+	if (!pw->port_write_msg.virt) {
+		pr_err("RIO: unable allocate port write queue\n");
+		return -ENOMEM;
+	}
+
+	pw->port_write_msg.err_count = 0;
+	pw->port_write_msg.discard_count = 0;
+
+	/* Point dequeue/enqueue pointers at first entry */
+	out_be32(&pw->pw_regs->epwqbar, 0);
+	out_be32(&pw->pw_regs->pwqbar, (u32) pw->port_write_msg.phys);
+
+	pr_debug("EIPWQBAR: 0x%08x IPWQBAR: 0x%08x\n",
+		 in_be32(&pw->pw_regs->epwqbar),
+		 in_be32(&pw->pw_regs->pwqbar));
+
+	/* Clear interrupt status IPWSR */
+	out_be32(&pw->pw_regs->pwsr,
+		 (RIO_IPWSR_TE | RIO_IPWSR_QFI | RIO_IPWSR_PWD));
+
+	/* Configure port write controller for snooping enable all reporting,
+	   clear queue full */
+	out_be32(&pw->pw_regs->pwmr,
+		 RIO_IPWMR_SEN | RIO_IPWMR_QFIE | RIO_IPWMR_EIE | RIO_IPWMR_CQ);
+
+
+	/* Hook up port-write handler */
+	rc = request_irq(IRQ_RIO_PW(pw), fsl_rio_port_write_handler,
+			IRQF_SHARED, "port-write", (void *)pw);
+	if (rc < 0) {
+		pr_err("MPC85xx RIO: unable to request inbound doorbell irq");
+		goto err_out;
+	}
+	/* Enable Error Interrupt */
+	out_be32((u32 *)(rio_regs_win + RIO_LTLEECSR), LTLEECSR_ENABLE_ALL);
+
+	INIT_WORK(&pw->pw_work, fsl_pw_dpc);
+	spin_lock_init(&pw->pw_fifo_lock);
+	if (kfifo_alloc(&pw->pw_fifo, RIO_PW_MSG_SIZE * 32, GFP_KERNEL)) {
+		pr_err("FIFO allocation failed\n");
+		rc = -ENOMEM;
+		goto err_out_irq;
+	}
+
+	pr_debug("IPWMR: 0x%08x IPWSR: 0x%08x\n",
+		 in_be32(&pw->pw_regs->pwmr),
+		 in_be32(&pw->pw_regs->pwsr));
+
+	return rc;
+
+err_out_irq:
+	free_irq(IRQ_RIO_PW(pw), (void *)pw);
+err_out:
+	dma_free_coherent(pw->dev, RIO_PW_MSG_SIZE,
+		pw->port_write_msg.virt,
+		pw->port_write_msg.phys);
+	return rc;
+}
+
+/**
+ * fsl_rio_doorbell_send - Send a MPC85xx doorbell message
+ * @mport: RapidIO master port info
+ * @index: ID of RapidIO interface
+ * @destid: Destination ID of target device
+ * @data: 16-bit info field of RapidIO doorbell message
+ *
+ * Sends a MPC85xx doorbell message. Returns %0 on success or
+ * %-EINVAL on failure.
+ */
+int fsl_rio_doorbell_send(struct rio_mport *mport,
+				int index, u16 destid, u16 data)
+{
+	unsigned long flags;
+
+	pr_debug("fsl_doorbell_send: index %d destid %4.4x data %4.4x\n",
+		 index, destid, data);
+
+	spin_lock_irqsave(&fsl_rio_doorbell_lock, flags);
+
+	/* In the serial version silicons, such as MPC8548, MPC8641,
+	 * below operations is must be.
+	 */
+	out_be32(&dbell->dbell_regs->odmr, 0x00000000);
+	out_be32(&dbell->dbell_regs->odretcr, 0x00000004);
+	out_be32(&dbell->dbell_regs->oddpr, destid << 16);
+	out_be32(&dbell->dbell_regs->oddatr, (index << 20) | data);
+	out_be32(&dbell->dbell_regs->odmr, 0x00000001);
+
+	spin_unlock_irqrestore(&fsl_rio_doorbell_lock, flags);
+
+	return 0;
+}
+
+/**
+ * fsl_add_outb_message - Add message to the MPC85xx outbound message queue
+ * @mport: Master port with outbound message queue
+ * @rdev: Target of outbound message
+ * @mbox: Outbound mailbox
+ * @buffer: Message to add to outbound queue
+ * @len: Length of message
+ *
+ * Adds the @buffer message to the MPC85xx outbound message queue. Returns
+ * %0 on success or %-EINVAL on failure.
+ */
+int
+fsl_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox,
+			void *buffer, size_t len)
+{
+	struct fsl_rmu *rmu = GET_RMM_HANDLE(mport);
+	u32 omr;
+	struct rio_tx_desc *desc = (struct rio_tx_desc *)rmu->msg_tx_ring.virt
+					+ rmu->msg_tx_ring.tx_slot;
+	int ret = 0;
+
+	pr_debug("RIO: fsl_add_outb_message(): destid %4.4x mbox %d buffer " \
+		 "%p len %8.8zx\n", rdev->destid, mbox, buffer, len);
+	if ((len < 8) || (len > RIO_MAX_MSG_SIZE)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Copy and clear rest of buffer */
+	memcpy(rmu->msg_tx_ring.virt_buffer[rmu->msg_tx_ring.tx_slot], buffer,
+			len);
+	if (len < (RIO_MAX_MSG_SIZE - 4))
+		memset(rmu->msg_tx_ring.virt_buffer[rmu->msg_tx_ring.tx_slot]
+				+ len, 0, RIO_MAX_MSG_SIZE - len);
+
+	/* Set mbox field for message, and set destid */
+	desc->dport = (rdev->destid << 16) | (mbox & 0x3);
+
+	/* Enable EOMI interrupt and priority */
+	desc->dattr = 0x28000000 | ((mport->index) << 20);
+
+	/* Set transfer size aligned to next power of 2 (in double words) */
+	desc->dwcnt = is_power_of_2(len) ? len : 1 << get_bitmask_order(len);
+
+	/* Set snooping and source buffer address */
+	desc->saddr = 0x00000004
+		| rmu->msg_tx_ring.phys_buffer[rmu->msg_tx_ring.tx_slot];
+
+	/* Increment enqueue pointer */
+	omr = in_be32(&rmu->msg_regs->omr);
+	out_be32(&rmu->msg_regs->omr, omr | RIO_MSG_OMR_MUI);
+
+	/* Go to next descriptor */
+	if (++rmu->msg_tx_ring.tx_slot == rmu->msg_tx_ring.size)
+		rmu->msg_tx_ring.tx_slot = 0;
+
+out:
+	return ret;
+}
+
+/**
+ * fsl_open_outb_mbox - Initialize MPC85xx outbound mailbox
+ * @mport: Master port implementing the outbound message unit
+ * @dev_id: Device specific pointer to pass on event
+ * @mbox: Mailbox to open
+ * @entries: Number of entries in the outbound mailbox ring
+ *
+ * Initializes buffer ring, request the outbound message interrupt,
+ * and enables the outbound message unit. Returns %0 on success and
+ * %-EINVAL or %-ENOMEM on failure.
+ */
+int
+fsl_open_outb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entries)
+{
+	int i, j, rc = 0;
+	struct rio_priv *priv = mport->priv;
+	struct fsl_rmu *rmu = GET_RMM_HANDLE(mport);
+
+	if ((entries < RIO_MIN_TX_RING_SIZE) ||
+		(entries > RIO_MAX_TX_RING_SIZE) || (!is_power_of_2(entries))) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	/* Initialize shadow copy ring */
+	rmu->msg_tx_ring.dev_id = dev_id;
+	rmu->msg_tx_ring.size = entries;
+
+	for (i = 0; i < rmu->msg_tx_ring.size; i++) {
+		rmu->msg_tx_ring.virt_buffer[i] =
+			dma_alloc_coherent(priv->dev, RIO_MSG_BUFFER_SIZE,
+				&rmu->msg_tx_ring.phys_buffer[i], GFP_KERNEL);
+		if (!rmu->msg_tx_ring.virt_buffer[i]) {
+			rc = -ENOMEM;
+			for (j = 0; j < rmu->msg_tx_ring.size; j++)
+				if (rmu->msg_tx_ring.virt_buffer[j])
+					dma_free_coherent(priv->dev,
+							RIO_MSG_BUFFER_SIZE,
+							rmu->msg_tx_ring.
+							virt_buffer[j],
+							rmu->msg_tx_ring.
+							phys_buffer[j]);
+			goto out;
+		}
+	}
+
+	/* Initialize outbound message descriptor ring */
+	rmu->msg_tx_ring.virt = dma_alloc_coherent(priv->dev,
+						   rmu->msg_tx_ring.size * RIO_MSG_DESC_SIZE,
+						   &rmu->msg_tx_ring.phys,
+						   GFP_KERNEL);
+	if (!rmu->msg_tx_ring.virt) {
+		rc = -ENOMEM;
+		goto out_dma;
+	}
+	rmu->msg_tx_ring.tx_slot = 0;
+
+	/* Point dequeue/enqueue pointers at first entry in ring */
+	out_be32(&rmu->msg_regs->odqdpar, rmu->msg_tx_ring.phys);
+	out_be32(&rmu->msg_regs->odqepar, rmu->msg_tx_ring.phys);
+
+	/* Configure for snooping */
+	out_be32(&rmu->msg_regs->osar, 0x00000004);
+
+	/* Clear interrupt status */
+	out_be32(&rmu->msg_regs->osr, 0x000000b3);
+
+	/* Hook up outbound message handler */
+	rc = request_irq(IRQ_RIO_TX(mport), fsl_rio_tx_handler, 0,
+			 "msg_tx", (void *)mport);
+	if (rc < 0)
+		goto out_irq;
+
+	/*
+	 * Configure outbound message unit
+	 *      Snooping
+	 *      Interrupts (all enabled, except QEIE)
+	 *      Chaining mode
+	 *      Disable
+	 */
+	out_be32(&rmu->msg_regs->omr, 0x00100220);
+
+	/* Set number of entries */
+	out_be32(&rmu->msg_regs->omr,
+		 in_be32(&rmu->msg_regs->omr) |
+		 ((get_bitmask_order(entries) - 2) << 12));
+
+	/* Now enable the unit */
+	out_be32(&rmu->msg_regs->omr, in_be32(&rmu->msg_regs->omr) | 0x1);
+
+out:
+	return rc;
+
+out_irq:
+	dma_free_coherent(priv->dev,
+		rmu->msg_tx_ring.size * RIO_MSG_DESC_SIZE,
+		rmu->msg_tx_ring.virt, rmu->msg_tx_ring.phys);
+
+out_dma:
+	for (i = 0; i < rmu->msg_tx_ring.size; i++)
+		dma_free_coherent(priv->dev, RIO_MSG_BUFFER_SIZE,
+		rmu->msg_tx_ring.virt_buffer[i],
+		rmu->msg_tx_ring.phys_buffer[i]);
+
+	return rc;
+}
+
+/**
+ * fsl_close_outb_mbox - Shut down MPC85xx outbound mailbox
+ * @mport: Master port implementing the outbound message unit
+ * @mbox: Mailbox to close
+ *
+ * Disables the outbound message unit, free all buffers, and
+ * frees the outbound message interrupt.
+ */
+void fsl_close_outb_mbox(struct rio_mport *mport, int mbox)
+{
+	struct rio_priv *priv = mport->priv;
+	struct fsl_rmu *rmu = GET_RMM_HANDLE(mport);
+
+	/* Disable inbound message unit */
+	out_be32(&rmu->msg_regs->omr, 0);
+
+	/* Free ring */
+	dma_free_coherent(priv->dev,
+	rmu->msg_tx_ring.size * RIO_MSG_DESC_SIZE,
+	rmu->msg_tx_ring.virt, rmu->msg_tx_ring.phys);
+
+	/* Free interrupt */
+	free_irq(IRQ_RIO_TX(mport), (void *)mport);
+}
+
+/**
+ * fsl_open_inb_mbox - Initialize MPC85xx inbound mailbox
+ * @mport: Master port implementing the inbound message unit
+ * @dev_id: Device specific pointer to pass on event
+ * @mbox: Mailbox to open
+ * @entries: Number of entries in the inbound mailbox ring
+ *
+ * Initializes buffer ring, request the inbound message interrupt,
+ * and enables the inbound message unit. Returns %0 on success
+ * and %-EINVAL or %-ENOMEM on failure.
+ */
+int
+fsl_open_inb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entries)
+{
+	int i, rc = 0;
+	struct rio_priv *priv = mport->priv;
+	struct fsl_rmu *rmu = GET_RMM_HANDLE(mport);
+
+	if ((entries < RIO_MIN_RX_RING_SIZE) ||
+		(entries > RIO_MAX_RX_RING_SIZE) || (!is_power_of_2(entries))) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	/* Initialize client buffer ring */
+	rmu->msg_rx_ring.dev_id = dev_id;
+	rmu->msg_rx_ring.size = entries;
+	rmu->msg_rx_ring.rx_slot = 0;
+	for (i = 0; i < rmu->msg_rx_ring.size; i++)
+		rmu->msg_rx_ring.virt_buffer[i] = NULL;
+
+	/* Initialize inbound message ring */
+	rmu->msg_rx_ring.virt = dma_alloc_coherent(priv->dev,
+				rmu->msg_rx_ring.size * RIO_MAX_MSG_SIZE,
+				&rmu->msg_rx_ring.phys, GFP_KERNEL);
+	if (!rmu->msg_rx_ring.virt) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Point dequeue/enqueue pointers at first entry in ring */
+	out_be32(&rmu->msg_regs->ifqdpar, (u32) rmu->msg_rx_ring.phys);
+	out_be32(&rmu->msg_regs->ifqepar, (u32) rmu->msg_rx_ring.phys);
+
+	/* Clear interrupt status */
+	out_be32(&rmu->msg_regs->isr, 0x00000091);
+
+	/* Hook up inbound message handler */
+	rc = request_irq(IRQ_RIO_RX(mport), fsl_rio_rx_handler, 0,
+			 "msg_rx", (void *)mport);
+	if (rc < 0) {
+		dma_free_coherent(priv->dev,
+			rmu->msg_rx_ring.size * RIO_MAX_MSG_SIZE,
+			rmu->msg_rx_ring.virt, rmu->msg_rx_ring.phys);
+		goto out;
+	}
+
+	/*
+	 * Configure inbound message unit:
+	 *      Snooping
+	 *      4KB max message size
+	 *      Unmask all interrupt sources
+	 *      Disable
+	 */
+	out_be32(&rmu->msg_regs->imr, 0x001b0060);
+
+	/* Set number of queue entries */
+	setbits32(&rmu->msg_regs->imr, (get_bitmask_order(entries) - 2) << 12);
+
+	/* Now enable the unit */
+	setbits32(&rmu->msg_regs->imr, 0x1);
+
+out:
+	return rc;
+}
+
+/**
+ * fsl_close_inb_mbox - Shut down MPC85xx inbound mailbox
+ * @mport: Master port implementing the inbound message unit
+ * @mbox: Mailbox to close
+ *
+ * Disables the inbound message unit, free all buffers, and
+ * frees the inbound message interrupt.
+ */
+void fsl_close_inb_mbox(struct rio_mport *mport, int mbox)
+{
+	struct rio_priv *priv = mport->priv;
+	struct fsl_rmu *rmu = GET_RMM_HANDLE(mport);
+
+	/* Disable inbound message unit */
+	out_be32(&rmu->msg_regs->imr, 0);
+
+	/* Free ring */
+	dma_free_coherent(priv->dev, rmu->msg_rx_ring.size * RIO_MAX_MSG_SIZE,
+	rmu->msg_rx_ring.virt, rmu->msg_rx_ring.phys);
+
+	/* Free interrupt */
+	free_irq(IRQ_RIO_RX(mport), (void *)mport);
+}
+
+/**
+ * fsl_add_inb_buffer - Add buffer to the MPC85xx inbound message queue
+ * @mport: Master port implementing the inbound message unit
+ * @mbox: Inbound mailbox number
+ * @buf: Buffer to add to inbound queue
+ *
+ * Adds the @buf buffer to the MPC85xx inbound message queue. Returns
+ * %0 on success or %-EINVAL on failure.
+ */
+int fsl_add_inb_buffer(struct rio_mport *mport, int mbox, void *buf)
+{
+	int rc = 0;
+	struct fsl_rmu *rmu = GET_RMM_HANDLE(mport);
+
+	pr_debug("RIO: fsl_add_inb_buffer(), msg_rx_ring.rx_slot %d\n",
+		 rmu->msg_rx_ring.rx_slot);
+
+	if (rmu->msg_rx_ring.virt_buffer[rmu->msg_rx_ring.rx_slot]) {
+		printk(KERN_ERR
+			"RIO: error adding inbound buffer %d, buffer exists\n",
+			rmu->msg_rx_ring.rx_slot);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	rmu->msg_rx_ring.virt_buffer[rmu->msg_rx_ring.rx_slot] = buf;
+	if (++rmu->msg_rx_ring.rx_slot == rmu->msg_rx_ring.size)
+		rmu->msg_rx_ring.rx_slot = 0;
+
+out:
+	return rc;
+}
+
+/**
+ * fsl_get_inb_message - Fetch inbound message from the MPC85xx message unit
+ * @mport: Master port implementing the inbound message unit
+ * @mbox: Inbound mailbox number
+ *
+ * Gets the next available inbound message from the inbound message queue.
+ * A pointer to the message is returned on success or NULL on failure.
+ */
+void *fsl_get_inb_message(struct rio_mport *mport, int mbox)
+{
+	struct fsl_rmu *rmu = GET_RMM_HANDLE(mport);
+	u32 phys_buf;
+	void *virt_buf;
+	void *buf = NULL;
+	int buf_idx;
+
+	phys_buf = in_be32(&rmu->msg_regs->ifqdpar);
+
+	/* If no more messages, then bail out */
+	if (phys_buf == in_be32(&rmu->msg_regs->ifqepar))
+		goto out2;
+
+	virt_buf = rmu->msg_rx_ring.virt + (phys_buf
+						- rmu->msg_rx_ring.phys);
+	buf_idx = (phys_buf - rmu->msg_rx_ring.phys) / RIO_MAX_MSG_SIZE;
+	buf = rmu->msg_rx_ring.virt_buffer[buf_idx];
+
+	if (!buf) {
+		printk(KERN_ERR
+			"RIO: inbound message copy failed, no buffers\n");
+		goto out1;
+	}
+
+	/* Copy max message size, caller is expected to allocate that big */
+	memcpy(buf, virt_buf, RIO_MAX_MSG_SIZE);
+
+	/* Clear the available buffer */
+	rmu->msg_rx_ring.virt_buffer[buf_idx] = NULL;
+
+out1:
+	setbits32(&rmu->msg_regs->imr, RIO_MSG_IMR_MI);
+
+out2:
+	return buf;
+}
+
+/**
+ * fsl_rio_doorbell_init - MPC85xx doorbell interface init
+ * @mport: Master port implementing the inbound doorbell unit
+ *
+ * Initializes doorbell unit hardware and inbound DMA buffer
+ * ring. Called from fsl_rio_setup(). Returns %0 on success
+ * or %-ENOMEM on failure.
+ */
+int fsl_rio_doorbell_init(struct fsl_rio_dbell *dbell)
+{
+	int rc = 0;
+
+	/* Initialize inbound doorbells */
+	dbell->dbell_ring.virt = dma_alloc_coherent(dbell->dev, 512 *
+		DOORBELL_MESSAGE_SIZE, &dbell->dbell_ring.phys, GFP_KERNEL);
+	if (!dbell->dbell_ring.virt) {
+		printk(KERN_ERR "RIO: unable allocate inbound doorbell ring\n");
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Point dequeue/enqueue pointers at first entry in ring */
+	out_be32(&dbell->dbell_regs->dqdpar, (u32) dbell->dbell_ring.phys);
+	out_be32(&dbell->dbell_regs->dqepar, (u32) dbell->dbell_ring.phys);
+
+	/* Clear interrupt status */
+	out_be32(&dbell->dbell_regs->dsr, 0x00000091);
+
+	/* Hook up doorbell handler */
+	rc = request_irq(IRQ_RIO_BELL(dbell), fsl_rio_dbell_handler, 0,
+			 "dbell_rx", (void *)dbell);
+	if (rc < 0) {
+		dma_free_coherent(dbell->dev, 512 * DOORBELL_MESSAGE_SIZE,
+			 dbell->dbell_ring.virt, dbell->dbell_ring.phys);
+		printk(KERN_ERR
+			"MPC85xx RIO: unable to request inbound doorbell irq");
+		goto out;
+	}
+
+	/* Configure doorbells for snooping, 512 entries, and enable */
+	out_be32(&dbell->dbell_regs->dmr, 0x00108161);
+
+out:
+	return rc;
+}
+
+int fsl_rio_setup_rmu(struct rio_mport *mport, struct device_node *node)
+{
+	struct rio_priv *priv;
+	struct fsl_rmu *rmu;
+	u64 msg_start;
+
+	if (!mport || !mport->priv)
+		return -EINVAL;
+
+	priv = mport->priv;
+
+	if (!node) {
+		dev_warn(priv->dev, "Can't get %pOF property 'fsl,rmu'\n",
+			priv->dev->of_node);
+		return -EINVAL;
+	}
+
+	rmu = kzalloc(sizeof(struct fsl_rmu), GFP_KERNEL);
+	if (!rmu)
+		return -ENOMEM;
+
+	if (of_property_read_reg(node, 0, &msg_start, NULL)) {
+		pr_err("%pOF: unable to find 'reg' property of message-unit\n",
+			node);
+		kfree(rmu);
+		return -ENOMEM;
+	}
+	rmu->msg_regs = (struct rio_msg_regs *)
+			(rmu_regs_win + (u32)msg_start);
+
+	rmu->txirq = irq_of_parse_and_map(node, 0);
+	rmu->rxirq = irq_of_parse_and_map(node, 1);
+	printk(KERN_INFO "%pOF: txirq: %d, rxirq %d\n",
+		node, rmu->txirq, rmu->rxirq);
+
+	priv->rmm_handle = rmu;
+
+	rio_init_dbell_res(&mport->riores[RIO_DOORBELL_RESOURCE], 0, 0xffff);
+	rio_init_mbox_res(&mport->riores[RIO_INB_MBOX_RESOURCE], 0, 0);
+	rio_init_mbox_res(&mport->riores[RIO_OUTB_MBOX_RESOURCE], 0, 0);
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
new file mode 100644
index 0000000000..3949ceb79e
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * FSL SoC setup code
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * 2006 (c) MontaVista Software, Inc.
+ * Vitaly Bordug <vbordug@ru.mvista.com>
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/export.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/phy.h>
+#include <linux/spi/spi.h>
+#include <linux/fsl_devices.h>
+#include <linux/reboot.h>
+
+#include <linux/atomic.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <sysdev/fsl_soc.h>
+#include <mm/mmu_decl.h>
+#include <asm/cpm2.h>
+#include <asm/fsl_hcalls.h>	/* For the Freescale hypervisor */
+
+static phys_addr_t immrbase = -1;
+
+phys_addr_t get_immrbase(void)
+{
+	struct device_node *soc;
+
+	if (immrbase != -1)
+		return immrbase;
+
+	soc = of_find_node_by_type(NULL, "soc");
+	if (soc) {
+		struct resource res;
+
+		if (!of_range_to_resource(soc, 0, &res))
+			immrbase = res.start;
+
+		of_node_put(soc);
+	}
+
+	return immrbase;
+}
+
+EXPORT_SYMBOL(get_immrbase);
+
+u32 fsl_get_sys_freq(void)
+{
+	static u32 sysfreq = -1;
+	struct device_node *soc;
+
+	if (sysfreq != -1)
+		return sysfreq;
+
+	soc = of_find_node_by_type(NULL, "soc");
+	if (!soc)
+		return -1;
+
+	of_property_read_u32(soc, "clock-frequency", &sysfreq);
+	if (sysfreq == -1 || !sysfreq)
+		of_property_read_u32(soc, "bus-frequency", &sysfreq);
+
+	of_node_put(soc);
+	return sysfreq;
+}
+EXPORT_SYMBOL(fsl_get_sys_freq);
+
+#if defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE)
+
+u32 get_brgfreq(void)
+{
+	static u32 brgfreq = -1;
+	struct device_node *node;
+
+	if (brgfreq != -1)
+		return brgfreq;
+
+	node = of_find_compatible_node(NULL, NULL, "fsl,cpm-brg");
+	if (node) {
+		of_property_read_u32(node, "clock-frequency", &brgfreq);
+		of_node_put(node);
+		return brgfreq;
+	}
+
+	/* Legacy device binding -- will go away when no users are left. */
+	node = of_find_node_by_type(NULL, "cpm");
+	if (!node)
+		node = of_find_compatible_node(NULL, NULL, "fsl,qe");
+	if (!node)
+		node = of_find_node_by_type(NULL, "qe");
+
+	if (node) {
+		of_property_read_u32(node, "brg-frequency", &brgfreq);
+		if (brgfreq == -1 || !brgfreq)
+			if (!of_property_read_u32(node, "bus-frequency",
+						  &brgfreq))
+				brgfreq /= 2;
+		of_node_put(node);
+	}
+
+	return brgfreq;
+}
+
+EXPORT_SYMBOL(get_brgfreq);
+
+u32 get_baudrate(void)
+{
+	static u32 fs_baudrate = -1;
+	struct device_node *node;
+
+	if (fs_baudrate != -1)
+		return fs_baudrate;
+
+	node = of_find_node_by_type(NULL, "serial");
+	if (node) {
+		of_property_read_u32(node, "current-speed", &fs_baudrate);
+		of_node_put(node);
+	}
+
+	return fs_baudrate;
+}
+
+EXPORT_SYMBOL(get_baudrate);
+#endif /* CONFIG_CPM2 */
+
+#if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx)
+static __be32 __iomem *rstcr;
+
+static int fsl_rstcr_restart(struct notifier_block *this,
+			     unsigned long mode, void *cmd)
+{
+	local_irq_disable();
+	/* set reset control register */
+	out_be32(rstcr, 0x2);	/* HRESET_REQ */
+
+	return NOTIFY_DONE;
+}
+
+static int __init setup_rstcr(void)
+{
+	struct device_node *np;
+
+	static struct notifier_block restart_handler = {
+		.notifier_call = fsl_rstcr_restart,
+		.priority = 128,
+	};
+
+	for_each_node_by_name(np, "global-utilities") {
+		if (of_property_read_bool(np, "fsl,has-rstcr")) {
+			rstcr = of_iomap(np, 0) + 0xb0;
+			if (!rstcr) {
+				printk (KERN_ERR "Error: reset control "
+						"register not mapped!\n");
+			} else {
+				register_restart_handler(&restart_handler);
+			}
+			break;
+		}
+	}
+
+	of_node_put(np);
+
+	return 0;
+}
+
+arch_initcall(setup_rstcr);
+
+#endif
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+struct platform_diu_data_ops diu_ops;
+EXPORT_SYMBOL(diu_ops);
+#endif
+
+#ifdef CONFIG_EPAPR_PARAVIRT
+/*
+ * Restart the current partition
+ *
+ * This function should be assigned to the ppc_md.restart function pointer,
+ * to initiate a partition restart when we're running under the Freescale
+ * hypervisor.
+ */
+void __noreturn fsl_hv_restart(char *cmd)
+{
+	pr_info("hv restart\n");
+	fh_partition_restart(-1);
+	while (1) ;
+}
+
+/*
+ * Halt the current partition
+ *
+ * This function should be assigned to the pm_power_off and ppc_md.halt
+ * function pointers, to shut down the partition when we're running under
+ * the Freescale hypervisor.
+ */
+void __noreturn fsl_hv_halt(void)
+{
+	pr_info("hv exit\n");
+	fh_partition_stop(-1);
+	while (1) ;
+}
+#endif
diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h
new file mode 100644
index 0000000000..db11b06eb3
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_soc.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PPC_FSL_SOC_H
+#define __PPC_FSL_SOC_H
+#ifdef __KERNEL__
+
+#include <asm/mmu.h>
+
+struct spi_device;
+
+extern phys_addr_t get_immrbase(void);
+#if defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE)
+extern u32 get_brgfreq(void);
+extern u32 get_baudrate(void);
+#else
+static inline u32 get_brgfreq(void) { return -1; }
+static inline u32 get_baudrate(void) { return -1; }
+#endif
+extern u32 fsl_get_sys_freq(void);
+
+struct spi_board_info;
+struct device_node;
+
+/* The different ports that the DIU can be connected to */
+enum fsl_diu_monitor_port {
+	FSL_DIU_PORT_DVI,	/* DVI */
+	FSL_DIU_PORT_LVDS,	/* Single-link LVDS */
+	FSL_DIU_PORT_DLVDS	/* Dual-link LVDS */
+};
+
+struct platform_diu_data_ops {
+	u32 (*get_pixel_format)(enum fsl_diu_monitor_port port,
+		unsigned int bpp);
+	void (*set_gamma_table)(enum fsl_diu_monitor_port port,
+		char *gamma_table_base);
+	void (*set_monitor_port)(enum fsl_diu_monitor_port port);
+	void (*set_pixel_clock)(unsigned int pixclock);
+	enum fsl_diu_monitor_port (*valid_monitor_port)
+		(enum fsl_diu_monitor_port port);
+	void (*release_bootmem)(void);
+};
+
+extern struct platform_diu_data_ops diu_ops;
+
+void __noreturn fsl_hv_restart(char *cmd);
+void __noreturn fsl_hv_halt(void);
+
+#endif
+#endif
diff --git a/arch/powerpc/sysdev/ge/Makefile b/arch/powerpc/sysdev/ge/Makefile
new file mode 100644
index 0000000000..a63fdb3790
--- /dev/null
+++ b/arch/powerpc/sysdev/ge/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_GE_FPGA)		+= ge_pic.o
diff --git a/arch/powerpc/sysdev/ge/ge_pic.c b/arch/powerpc/sysdev/ge/ge_pic.c
new file mode 100644
index 0000000000..a6c424680c
--- /dev/null
+++ b/arch/powerpc/sysdev/ge/ge_pic.c
@@ -0,0 +1,253 @@
+/*
+ * Interrupt handling for GE FPGA based PIC
+ *
+ * Author: Martyn Welch <martyn.welch@ge.com>
+ *
+ * 2008 (c) GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/interrupt.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/spinlock.h>
+
+#include <asm/byteorder.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+
+#include "ge_pic.h"
+
+#define DEBUG
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) do { printk(KERN_DEBUG "gef_pic: " fmt); } while (0)
+#else
+#define DBG(fmt...) do { } while (0)
+#endif
+
+#define GEF_PIC_NUM_IRQS	32
+
+/* Interrupt Controller Interface Registers */
+#define GEF_PIC_INTR_STATUS	0x0000
+
+#define GEF_PIC_INTR_MASK(cpu)	(0x0010 + (0x4 * cpu))
+#define GEF_PIC_CPU0_INTR_MASK	GEF_PIC_INTR_MASK(0)
+#define GEF_PIC_CPU1_INTR_MASK	GEF_PIC_INTR_MASK(1)
+
+#define GEF_PIC_MCP_MASK(cpu)	(0x0018 + (0x4 * cpu))
+#define GEF_PIC_CPU0_MCP_MASK	GEF_PIC_MCP_MASK(0)
+#define GEF_PIC_CPU1_MCP_MASK	GEF_PIC_MCP_MASK(1)
+
+
+static DEFINE_RAW_SPINLOCK(gef_pic_lock);
+
+static void __iomem *gef_pic_irq_reg_base;
+static struct irq_domain *gef_pic_irq_host;
+static int gef_pic_cascade_irq;
+
+/*
+ * Interrupt Controller Handling
+ *
+ * The interrupt controller handles interrupts for most on board interrupts,
+ * apart from PCI interrupts. For example on SBC610:
+ *
+ * 17:31 RO Reserved
+ * 16    RO PCI Express Doorbell 3 Status
+ * 15    RO PCI Express Doorbell 2 Status
+ * 14    RO PCI Express Doorbell 1 Status
+ * 13    RO PCI Express Doorbell 0 Status
+ * 12    RO Real Time Clock Interrupt Status
+ * 11    RO Temperature Interrupt Status
+ * 10    RO Temperature Critical Interrupt Status
+ * 9     RO Ethernet PHY1 Interrupt Status
+ * 8     RO Ethernet PHY3 Interrupt Status
+ * 7     RO PEX8548 Interrupt Status
+ * 6     RO Reserved
+ * 5     RO Watchdog 0 Interrupt Status
+ * 4     RO Watchdog 1 Interrupt Status
+ * 3     RO AXIS Message FIFO A Interrupt Status
+ * 2     RO AXIS Message FIFO B Interrupt Status
+ * 1     RO AXIS Message FIFO C Interrupt Status
+ * 0     RO AXIS Message FIFO D Interrupt Status
+ *
+ * Interrupts can be forwarded to one of two output lines. Nothing
+ * clever is done, so if the masks are incorrectly set, a single input
+ * interrupt could generate interrupts on both output lines!
+ *
+ * The dual lines are there to allow the chained interrupts to be easily
+ * passed into two different cores. We currently do not use this functionality
+ * in this driver.
+ *
+ * Controller can also be configured to generate Machine checks (MCP), again on
+ * two lines, to be attached to two different cores. It is suggested that these
+ * should be masked out.
+ */
+
+static void gef_pic_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int cascade_irq;
+
+	/*
+	 * See if we actually have an interrupt, call generic handling code if
+	 * we do.
+	 */
+	cascade_irq = gef_pic_get_irq();
+
+	if (cascade_irq)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+static void gef_pic_mask(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int hwirq = irqd_to_hwirq(d);
+	u32 mask;
+
+	raw_spin_lock_irqsave(&gef_pic_lock, flags);
+	mask = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0));
+	mask &= ~(1 << hwirq);
+	out_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0), mask);
+	raw_spin_unlock_irqrestore(&gef_pic_lock, flags);
+}
+
+static void gef_pic_mask_ack(struct irq_data *d)
+{
+	/* Don't think we actually have to do anything to ack an interrupt,
+	 * we just need to clear down the devices interrupt and it will go away
+	 */
+	gef_pic_mask(d);
+}
+
+static void gef_pic_unmask(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int hwirq = irqd_to_hwirq(d);
+	u32 mask;
+
+	raw_spin_lock_irqsave(&gef_pic_lock, flags);
+	mask = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0));
+	mask |= (1 << hwirq);
+	out_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0), mask);
+	raw_spin_unlock_irqrestore(&gef_pic_lock, flags);
+}
+
+static struct irq_chip gef_pic_chip = {
+	.name		= "gefp",
+	.irq_mask	= gef_pic_mask,
+	.irq_mask_ack	= gef_pic_mask_ack,
+	.irq_unmask	= gef_pic_unmask,
+};
+
+
+/* When an interrupt is being configured, this call allows some flexibility
+ * in deciding which irq_chip structure is used
+ */
+static int gef_pic_host_map(struct irq_domain *h, unsigned int virq,
+			  irq_hw_number_t hwirq)
+{
+	/* All interrupts are LEVEL sensitive */
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &gef_pic_chip, handle_level_irq);
+
+	return 0;
+}
+
+static int gef_pic_host_xlate(struct irq_domain *h, struct device_node *ct,
+			    const u32 *intspec, unsigned int intsize,
+			    irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+{
+
+	*out_hwirq = intspec[0];
+	if (intsize > 1)
+		*out_flags = intspec[1];
+	else
+		*out_flags = IRQ_TYPE_LEVEL_HIGH;
+
+	return 0;
+}
+
+static const struct irq_domain_ops gef_pic_host_ops = {
+	.map	= gef_pic_host_map,
+	.xlate	= gef_pic_host_xlate,
+};
+
+
+/*
+ * Initialisation of PIC, this should be called in BSP
+ */
+void __init gef_pic_init(struct device_node *np)
+{
+	unsigned long flags;
+
+	/* Map the devices registers into memory */
+	gef_pic_irq_reg_base = of_iomap(np, 0);
+
+	raw_spin_lock_irqsave(&gef_pic_lock, flags);
+
+	/* Initialise everything as masked. */
+	out_be32(gef_pic_irq_reg_base + GEF_PIC_CPU0_INTR_MASK, 0);
+	out_be32(gef_pic_irq_reg_base + GEF_PIC_CPU1_INTR_MASK, 0);
+
+	out_be32(gef_pic_irq_reg_base + GEF_PIC_CPU0_MCP_MASK, 0);
+	out_be32(gef_pic_irq_reg_base + GEF_PIC_CPU1_MCP_MASK, 0);
+
+	raw_spin_unlock_irqrestore(&gef_pic_lock, flags);
+
+	/* Map controller */
+	gef_pic_cascade_irq = irq_of_parse_and_map(np, 0);
+	if (!gef_pic_cascade_irq) {
+		printk(KERN_ERR "SBC610: failed to map cascade interrupt");
+		return;
+	}
+
+	/* Setup an irq_domain structure */
+	gef_pic_irq_host = irq_domain_add_linear(np, GEF_PIC_NUM_IRQS,
+					  &gef_pic_host_ops, NULL);
+	if (gef_pic_irq_host == NULL)
+		return;
+
+	/* Chain with parent controller */
+	irq_set_chained_handler(gef_pic_cascade_irq, gef_pic_cascade);
+}
+
+/*
+ * This is called when we receive an interrupt with apparently comes from this
+ * chip - check, returning the highest interrupt generated or return 0.
+ */
+unsigned int gef_pic_get_irq(void)
+{
+	u32 cause, mask, active;
+	unsigned int virq = 0;
+	int hwirq;
+
+	cause = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_STATUS);
+
+	mask = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0));
+
+	active = cause & mask;
+
+	if (active) {
+		for (hwirq = GEF_PIC_NUM_IRQS - 1; hwirq > -1; hwirq--) {
+			if (active & (0x1 << hwirq))
+				break;
+		}
+		virq = irq_linear_revmap(gef_pic_irq_host,
+			(irq_hw_number_t)hwirq);
+	}
+
+	return virq;
+}
+
diff --git a/arch/powerpc/sysdev/ge/ge_pic.h b/arch/powerpc/sysdev/ge/ge_pic.h
new file mode 100644
index 0000000000..923dedba3c
--- /dev/null
+++ b/arch/powerpc/sysdev/ge/ge_pic.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __GEF_PIC_H__
+#define __GEF_PIC_H__
+
+unsigned int gef_pic_get_irq(void);
+void gef_pic_init(struct device_node *);
+
+#endif /* __GEF_PIC_H__ */
+
diff --git a/arch/powerpc/sysdev/grackle.c b/arch/powerpc/sysdev/grackle.c
new file mode 100644
index 0000000000..fd2f94a884
--- /dev/null
+++ b/arch/powerpc/sysdev/grackle.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Functions for setting up and using a MPC106 northbridge
+ * Extracted from arch/powerpc/platforms/powermac/pci.c.
+ *
+ * Copyright (C) 2003 Benjamin Herrenschmuidt (benh@kernel.crashing.org)
+ * Copyright (C) 1997 Paul Mackerras (paulus@samba.org)
+ */
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/of.h>
+
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/grackle.h>
+
+#define GRACKLE_CFA(b, d, o)	(0x80 | ((b) << 8) | ((d) << 16) \
+				 | (((o) & ~3) << 24))
+
+#define GRACKLE_PICR1_STG		0x00000040
+#define GRACKLE_PICR1_LOOPSNOOP		0x00000010
+
+/* N.B. this is called before bridges is initialized, so we can't
+   use grackle_pcibios_{read,write}_config_dword. */
+static inline void grackle_set_stg(struct pci_controller* bp, int enable)
+{
+	unsigned int val;
+
+	out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8));
+	val = in_le32(bp->cfg_data);
+	val = enable? (val | GRACKLE_PICR1_STG) :
+		(val & ~GRACKLE_PICR1_STG);
+	out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8));
+	out_le32(bp->cfg_data, val);
+	(void)in_le32(bp->cfg_data);
+}
+
+static inline void grackle_set_loop_snoop(struct pci_controller *bp, int enable)
+{
+	unsigned int val;
+
+	out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8));
+	val = in_le32(bp->cfg_data);
+	val = enable? (val | GRACKLE_PICR1_LOOPSNOOP) :
+		(val & ~GRACKLE_PICR1_LOOPSNOOP);
+	out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8));
+	out_le32(bp->cfg_data, val);
+	(void)in_le32(bp->cfg_data);
+}
+
+void __init setup_grackle(struct pci_controller *hose)
+{
+	setup_indirect_pci(hose, 0xfec00000, 0xfee00000, 0);
+	if (of_machine_is_compatible("PowerMac1,1"))
+		pci_add_flags(PCI_REASSIGN_ALL_BUS);
+	if (of_machine_is_compatible("AAPL,PowerBook1998"))
+		grackle_set_loop_snoop(hose, 1);
+#if 0	/* Disabled for now, HW problems ??? */
+	grackle_set_stg(hose, 1);
+#endif
+}
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
new file mode 100644
index 0000000000..06e391485d
--- /dev/null
+++ b/arch/powerpc/sysdev/i8259.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * i8259 interrupt controller driver.
+ */
+#undef DEBUG
+
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <asm/io.h>
+#include <asm/i8259.h>
+
+static volatile void __iomem *pci_intack; /* RO, gives us the irq vector */
+
+static unsigned char cached_8259[2] = { 0xff, 0xff };
+#define cached_A1 (cached_8259[0])
+#define cached_21 (cached_8259[1])
+
+static DEFINE_RAW_SPINLOCK(i8259_lock);
+
+static struct irq_domain *i8259_host;
+
+/*
+ * Acknowledge the IRQ using either the PCI host bridge's interrupt
+ * acknowledge feature or poll.  How i8259_init() is called determines
+ * which is called.  It should be noted that polling is broken on some
+ * IBM and Motorola PReP boxes so we must use the int-ack feature on them.
+ */
+unsigned int i8259_irq(void)
+{
+	int irq;
+	int lock = 0;
+
+	/* Either int-ack or poll for the IRQ */
+	if (pci_intack)
+		irq = readb(pci_intack);
+	else {
+		raw_spin_lock(&i8259_lock);
+		lock = 1;
+
+		/* Perform an interrupt acknowledge cycle on controller 1. */
+		outb(0x0C, 0x20);		/* prepare for poll */
+		irq = inb(0x20) & 7;
+		if (irq == 2 ) {
+			/*
+			 * Interrupt is cascaded so perform interrupt
+			 * acknowledge on controller 2.
+			 */
+			outb(0x0C, 0xA0);	/* prepare for poll */
+			irq = (inb(0xA0) & 7) + 8;
+		}
+	}
+
+	if (irq == 7) {
+		/*
+		 * This may be a spurious interrupt.
+		 *
+		 * Read the interrupt status register (ISR). If the most
+		 * significant bit is not set then there is no valid
+		 * interrupt.
+		 */
+		if (!pci_intack)
+			outb(0x0B, 0x20);	/* ISR register */
+		if(~inb(0x20) & 0x80)
+			irq = 0;
+	} else if (irq == 0xff)
+		irq = 0;
+
+	if (lock)
+		raw_spin_unlock(&i8259_lock);
+	return irq;
+}
+
+static void i8259_mask_and_ack_irq(struct irq_data *d)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&i8259_lock, flags);
+	if (d->irq > 7) {
+		cached_A1 |= 1 << (d->irq-8);
+		inb(0xA1); 	/* DUMMY */
+		outb(cached_A1, 0xA1);
+		outb(0x20, 0xA0);	/* Non-specific EOI */
+		outb(0x20, 0x20);	/* Non-specific EOI to cascade */
+	} else {
+		cached_21 |= 1 << d->irq;
+		inb(0x21); 	/* DUMMY */
+		outb(cached_21, 0x21);
+		outb(0x20, 0x20);	/* Non-specific EOI */
+	}
+	raw_spin_unlock_irqrestore(&i8259_lock, flags);
+}
+
+static void i8259_set_irq_mask(int irq_nr)
+{
+	outb(cached_A1,0xA1);
+	outb(cached_21,0x21);
+}
+
+static void i8259_mask_irq(struct irq_data *d)
+{
+	unsigned long flags;
+
+	pr_debug("i8259_mask_irq(%d)\n", d->irq);
+
+	raw_spin_lock_irqsave(&i8259_lock, flags);
+	if (d->irq < 8)
+		cached_21 |= 1 << d->irq;
+	else
+		cached_A1 |= 1 << (d->irq-8);
+	i8259_set_irq_mask(d->irq);
+	raw_spin_unlock_irqrestore(&i8259_lock, flags);
+}
+
+static void i8259_unmask_irq(struct irq_data *d)
+{
+	unsigned long flags;
+
+	pr_debug("i8259_unmask_irq(%d)\n", d->irq);
+
+	raw_spin_lock_irqsave(&i8259_lock, flags);
+	if (d->irq < 8)
+		cached_21 &= ~(1 << d->irq);
+	else
+		cached_A1 &= ~(1 << (d->irq-8));
+	i8259_set_irq_mask(d->irq);
+	raw_spin_unlock_irqrestore(&i8259_lock, flags);
+}
+
+static struct irq_chip i8259_pic = {
+	.name		= "i8259",
+	.irq_mask	= i8259_mask_irq,
+	.irq_disable	= i8259_mask_irq,
+	.irq_unmask	= i8259_unmask_irq,
+	.irq_mask_ack	= i8259_mask_and_ack_irq,
+};
+
+static struct resource pic1_iores = {
+	.name = "8259 (master)",
+	.start = 0x20,
+	.end = 0x21,
+	.flags = IORESOURCE_IO | IORESOURCE_BUSY,
+};
+
+static struct resource pic2_iores = {
+	.name = "8259 (slave)",
+	.start = 0xa0,
+	.end = 0xa1,
+	.flags = IORESOURCE_IO | IORESOURCE_BUSY,
+};
+
+static struct resource pic_edgectrl_iores = {
+	.name = "8259 edge control",
+	.start = 0x4d0,
+	.end = 0x4d1,
+	.flags = IORESOURCE_IO | IORESOURCE_BUSY,
+};
+
+static int i8259_host_match(struct irq_domain *h, struct device_node *node,
+			    enum irq_domain_bus_token bus_token)
+{
+	struct device_node *of_node = irq_domain_get_of_node(h);
+	return of_node == NULL || of_node == node;
+}
+
+static int i8259_host_map(struct irq_domain *h, unsigned int virq,
+			  irq_hw_number_t hw)
+{
+	pr_debug("i8259_host_map(%d, 0x%lx)\n", virq, hw);
+
+	/* We block the internal cascade */
+	if (hw == 2)
+		irq_set_status_flags(virq, IRQ_NOREQUEST);
+
+	/* We use the level handler only for now, we might want to
+	 * be more cautious here but that works for now
+	 */
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &i8259_pic, handle_level_irq);
+	return 0;
+}
+
+static int i8259_host_xlate(struct irq_domain *h, struct device_node *ct,
+			    const u32 *intspec, unsigned int intsize,
+			    irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+{
+	static unsigned char map_isa_senses[4] = {
+		IRQ_TYPE_LEVEL_LOW,
+		IRQ_TYPE_LEVEL_HIGH,
+		IRQ_TYPE_EDGE_FALLING,
+		IRQ_TYPE_EDGE_RISING,
+	};
+
+	*out_hwirq = intspec[0];
+	if (intsize > 1 && intspec[1] < 4)
+		*out_flags = map_isa_senses[intspec[1]];
+	else
+		*out_flags = IRQ_TYPE_NONE;
+
+	return 0;
+}
+
+static const struct irq_domain_ops i8259_host_ops = {
+	.match = i8259_host_match,
+	.map = i8259_host_map,
+	.xlate = i8259_host_xlate,
+};
+
+struct irq_domain *__init i8259_get_host(void)
+{
+	return i8259_host;
+}
+
+/**
+ * i8259_init - Initialize the legacy controller
+ * @node: device node of the legacy PIC (can be NULL, but then, it will match
+ *        all interrupts, so beware)
+ * @intack_addr: PCI interrupt acknowledge (real) address which will return
+ *             	 the active irq from the 8259
+ */
+void i8259_init(struct device_node *node, unsigned long intack_addr)
+{
+	unsigned long flags;
+
+	/* initialize the controller */
+	raw_spin_lock_irqsave(&i8259_lock, flags);
+
+	/* Mask all first */
+	outb(0xff, 0xA1);
+	outb(0xff, 0x21);
+
+	/* init master interrupt controller */
+	outb(0x11, 0x20); /* Start init sequence */
+	outb(0x00, 0x21); /* Vector base */
+	outb(0x04, 0x21); /* edge triggered, Cascade (slave) on IRQ2 */
+	outb(0x01, 0x21); /* Select 8086 mode */
+
+	/* init slave interrupt controller */
+	outb(0x11, 0xA0); /* Start init sequence */
+	outb(0x08, 0xA1); /* Vector base */
+	outb(0x02, 0xA1); /* edge triggered, Cascade (slave) on IRQ2 */
+	outb(0x01, 0xA1); /* Select 8086 mode */
+
+	/* That thing is slow */
+	udelay(100);
+
+	/* always read ISR */
+	outb(0x0B, 0x20);
+	outb(0x0B, 0xA0);
+
+	/* Unmask the internal cascade */
+	cached_21 &= ~(1 << 2);
+
+	/* Set interrupt masks */
+	outb(cached_A1, 0xA1);
+	outb(cached_21, 0x21);
+
+	raw_spin_unlock_irqrestore(&i8259_lock, flags);
+
+	/* create a legacy host */
+	i8259_host = irq_domain_add_legacy(node, NR_IRQS_LEGACY, 0, 0,
+					   &i8259_host_ops, NULL);
+	if (i8259_host == NULL) {
+		printk(KERN_ERR "i8259: failed to allocate irq host !\n");
+		return;
+	}
+
+	/* reserve our resources */
+	/* XXX should we continue doing that ? it seems to cause problems
+	 * with further requesting of PCI IO resources for that range...
+	 * need to look into it.
+	 */
+	request_resource(&ioport_resource, &pic1_iores);
+	request_resource(&ioport_resource, &pic2_iores);
+	request_resource(&ioport_resource, &pic_edgectrl_iores);
+
+	if (intack_addr != 0)
+		pci_intack = ioremap(intack_addr, 1);
+
+	printk(KERN_INFO "i8259 legacy interrupt controller initialized\n");
+}
diff --git a/arch/powerpc/sysdev/indirect_pci.c b/arch/powerpc/sysdev/indirect_pci.c
new file mode 100644
index 0000000000..1aacb403a0
--- /dev/null
+++ b/arch/powerpc/sysdev/indirect_pci.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Support for indirect PCI bridges.
+ *
+ * Copyright (C) 1998 Gabriel Paubert.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+
+int __indirect_read_config(struct pci_controller *hose,
+			   unsigned char bus_number, unsigned int devfn,
+			   int offset, int len, u32 *val)
+{
+	volatile void __iomem *cfg_data;
+	u8 cfg_type = 0;
+	u32 bus_no, reg;
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_NO_PCIE_LINK) {
+		if (bus_number != hose->first_busno)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		if (devfn != 0)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	if (ppc_md.pci_exclude_device)
+		if (ppc_md.pci_exclude_device(hose, bus_number, devfn))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_SET_CFG_TYPE)
+		if (bus_number != hose->first_busno)
+			cfg_type = 1;
+
+	bus_no = (bus_number == hose->first_busno) ?
+			hose->self_busno : bus_number;
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_EXT_REG)
+		reg = ((offset & 0xf00) << 16) | (offset & 0xfc);
+	else
+		reg = offset & 0xfc;
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_BIG_ENDIAN)
+		out_be32(hose->cfg_addr, (0x80000000 | (bus_no << 16) |
+			 (devfn << 8) | reg | cfg_type));
+	else
+		out_le32(hose->cfg_addr, (0x80000000 | (bus_no << 16) |
+			 (devfn << 8) | reg | cfg_type));
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	cfg_data = hose->cfg_data + (offset & 3);
+	switch (len) {
+	case 1:
+		*val = in_8(cfg_data);
+		break;
+	case 2:
+		*val = in_le16(cfg_data);
+		break;
+	default:
+		*val = in_le32(cfg_data);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+int indirect_read_config(struct pci_bus *bus, unsigned int devfn,
+			 int offset, int len, u32 *val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	return __indirect_read_config(hose, bus->number, devfn, offset, len,
+				      val);
+}
+
+int indirect_write_config(struct pci_bus *bus, unsigned int devfn,
+			  int offset, int len, u32 val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	volatile void __iomem *cfg_data;
+	u8 cfg_type = 0;
+	u32 bus_no, reg;
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_NO_PCIE_LINK) {
+		if (bus->number != hose->first_busno)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		if (devfn != 0)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	if (ppc_md.pci_exclude_device)
+		if (ppc_md.pci_exclude_device(hose, bus->number, devfn))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_SET_CFG_TYPE)
+		if (bus->number != hose->first_busno)
+			cfg_type = 1;
+
+	bus_no = (bus->number == hose->first_busno) ?
+			hose->self_busno : bus->number;
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_EXT_REG)
+		reg = ((offset & 0xf00) << 16) | (offset & 0xfc);
+	else
+		reg = offset & 0xfc;
+
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_BIG_ENDIAN)
+		out_be32(hose->cfg_addr, (0x80000000 | (bus_no << 16) |
+			 (devfn << 8) | reg | cfg_type));
+	else
+		out_le32(hose->cfg_addr, (0x80000000 | (bus_no << 16) |
+			 (devfn << 8) | reg | cfg_type));
+
+	/* suppress setting of PCI_PRIMARY_BUS */
+	if (hose->indirect_type & PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS)
+		if ((offset == PCI_PRIMARY_BUS) &&
+			(bus->number == hose->first_busno))
+		val &= 0xffffff00;
+
+	/* Workaround for PCI_28 Errata in 440EPx/GRx */
+	if ((hose->indirect_type & PPC_INDIRECT_TYPE_BROKEN_MRM) &&
+			offset == PCI_CACHE_LINE_SIZE) {
+		val = 0;
+	}
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	cfg_data = hose->cfg_data + (offset & 3);
+	switch (len) {
+	case 1:
+		out_8(cfg_data, val);
+		break;
+	case 2:
+		out_le16(cfg_data, val);
+		break;
+	default:
+		out_le32(cfg_data, val);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops indirect_pci_ops =
+{
+	.read = indirect_read_config,
+	.write = indirect_write_config,
+};
+
+void setup_indirect_pci(struct pci_controller *hose, resource_size_t cfg_addr,
+			resource_size_t cfg_data, u32 flags)
+{
+	resource_size_t base = cfg_addr & PAGE_MASK;
+	void __iomem *mbase;
+
+	mbase = ioremap(base, PAGE_SIZE);
+	hose->cfg_addr = mbase + (cfg_addr & ~PAGE_MASK);
+	if ((cfg_data & PAGE_MASK) != base)
+		mbase = ioremap(cfg_data & PAGE_MASK, PAGE_SIZE);
+	hose->cfg_data = mbase + (cfg_data & ~PAGE_MASK);
+	hose->ops = &indirect_pci_ops;
+	hose->indirect_type = flags;
+}
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
new file mode 100644
index 0000000000..5f69e2d50f
--- /dev/null
+++ b/arch/powerpc/sysdev/ipic.c
@@ -0,0 +1,889 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/sysdev/ipic.c
+ *
+ * IPIC routines implementations.
+ *
+ * Copyright 2005 Freescale Semiconductor, Inc.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/syscore_ops.h>
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <linux/fsl_devices.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/ipic.h>
+
+#include "ipic.h"
+
+static struct ipic * primary_ipic;
+static struct irq_chip ipic_level_irq_chip, ipic_edge_irq_chip;
+static DEFINE_RAW_SPINLOCK(ipic_lock);
+
+static struct ipic_info ipic_info[] = {
+	[1] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_C,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 16,
+		.prio_mask = 0,
+	},
+	[2] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_C,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 17,
+		.prio_mask = 1,
+	},
+	[3] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_C,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 18,
+		.prio_mask = 2,
+	},
+	[4] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_C,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 19,
+		.prio_mask = 3,
+	},
+	[5] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_C,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 20,
+		.prio_mask = 4,
+	},
+	[6] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_C,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 21,
+		.prio_mask = 5,
+	},
+	[7] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_C,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 22,
+		.prio_mask = 6,
+	},
+	[8] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_C,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 23,
+		.prio_mask = 7,
+	},
+	[9] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_D,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 24,
+		.prio_mask = 0,
+	},
+	[10] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_D,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 25,
+		.prio_mask = 1,
+	},
+	[11] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_D,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 26,
+		.prio_mask = 2,
+	},
+	[12] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_D,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 27,
+		.prio_mask = 3,
+	},
+	[13] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_D,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 28,
+		.prio_mask = 4,
+	},
+	[14] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_D,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 29,
+		.prio_mask = 5,
+	},
+	[15] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_D,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 30,
+		.prio_mask = 6,
+	},
+	[16] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_D,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 31,
+		.prio_mask = 7,
+	},
+	[17] = {
+		.ack	= IPIC_SEPNR,
+		.mask	= IPIC_SEMSR,
+		.prio	= IPIC_SMPRR_A,
+		.force	= IPIC_SEFCR,
+		.bit	= 1,
+		.prio_mask = 5,
+	},
+	[18] = {
+		.ack	= IPIC_SEPNR,
+		.mask	= IPIC_SEMSR,
+		.prio	= IPIC_SMPRR_A,
+		.force	= IPIC_SEFCR,
+		.bit	= 2,
+		.prio_mask = 6,
+	},
+	[19] = {
+		.ack	= IPIC_SEPNR,
+		.mask	= IPIC_SEMSR,
+		.prio	= IPIC_SMPRR_A,
+		.force	= IPIC_SEFCR,
+		.bit	= 3,
+		.prio_mask = 7,
+	},
+	[20] = {
+		.ack	= IPIC_SEPNR,
+		.mask	= IPIC_SEMSR,
+		.prio	= IPIC_SMPRR_B,
+		.force	= IPIC_SEFCR,
+		.bit	= 4,
+		.prio_mask = 4,
+	},
+	[21] = {
+		.ack	= IPIC_SEPNR,
+		.mask	= IPIC_SEMSR,
+		.prio	= IPIC_SMPRR_B,
+		.force	= IPIC_SEFCR,
+		.bit	= 5,
+		.prio_mask = 5,
+	},
+	[22] = {
+		.ack	= IPIC_SEPNR,
+		.mask	= IPIC_SEMSR,
+		.prio	= IPIC_SMPRR_B,
+		.force	= IPIC_SEFCR,
+		.bit	= 6,
+		.prio_mask = 6,
+	},
+	[23] = {
+		.ack	= IPIC_SEPNR,
+		.mask	= IPIC_SEMSR,
+		.prio	= IPIC_SMPRR_B,
+		.force	= IPIC_SEFCR,
+		.bit	= 7,
+		.prio_mask = 7,
+	},
+	[32] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_A,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 0,
+		.prio_mask = 0,
+	},
+	[33] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_A,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 1,
+		.prio_mask = 1,
+	},
+	[34] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_A,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 2,
+		.prio_mask = 2,
+	},
+	[35] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_A,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 3,
+		.prio_mask = 3,
+	},
+	[36] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_A,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 4,
+		.prio_mask = 4,
+	},
+	[37] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_A,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 5,
+		.prio_mask = 5,
+	},
+	[38] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_A,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 6,
+		.prio_mask = 6,
+	},
+	[39] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_A,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 7,
+		.prio_mask = 7,
+	},
+	[40] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_B,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 8,
+		.prio_mask = 0,
+	},
+	[41] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_B,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 9,
+		.prio_mask = 1,
+	},
+	[42] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_B,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 10,
+		.prio_mask = 2,
+	},
+	[43] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_B,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 11,
+		.prio_mask = 3,
+	},
+	[44] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_B,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 12,
+		.prio_mask = 4,
+	},
+	[45] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_B,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 13,
+		.prio_mask = 5,
+	},
+	[46] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_B,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 14,
+		.prio_mask = 6,
+	},
+	[47] = {
+		.mask	= IPIC_SIMSR_H,
+		.prio	= IPIC_SIPRR_B,
+		.force	= IPIC_SIFCR_H,
+		.bit	= 15,
+		.prio_mask = 7,
+	},
+	[48] = {
+		.ack	= IPIC_SEPNR,
+		.mask	= IPIC_SEMSR,
+		.prio	= IPIC_SMPRR_A,
+		.force	= IPIC_SEFCR,
+		.bit	= 0,
+		.prio_mask = 4,
+	},
+	[64] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= IPIC_SMPRR_A,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 0,
+		.prio_mask = 0,
+	},
+	[65] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= IPIC_SMPRR_A,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 1,
+		.prio_mask = 1,
+	},
+	[66] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= IPIC_SMPRR_A,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 2,
+		.prio_mask = 2,
+	},
+	[67] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= IPIC_SMPRR_A,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 3,
+		.prio_mask = 3,
+	},
+	[68] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= IPIC_SMPRR_B,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 4,
+		.prio_mask = 0,
+	},
+	[69] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= IPIC_SMPRR_B,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 5,
+		.prio_mask = 1,
+	},
+	[70] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= IPIC_SMPRR_B,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 6,
+		.prio_mask = 2,
+	},
+	[71] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= IPIC_SMPRR_B,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 7,
+		.prio_mask = 3,
+	},
+	[72] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 8,
+	},
+	[73] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 9,
+	},
+	[74] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 10,
+	},
+	[75] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 11,
+	},
+	[76] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 12,
+	},
+	[77] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 13,
+	},
+	[78] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 14,
+	},
+	[79] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 15,
+	},
+	[80] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 16,
+	},
+	[81] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 17,
+	},
+	[82] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 18,
+	},
+	[83] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 19,
+	},
+	[84] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 20,
+	},
+	[85] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 21,
+	},
+	[86] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 22,
+	},
+	[87] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 23,
+	},
+	[88] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 24,
+	},
+	[89] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 25,
+	},
+	[90] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 26,
+	},
+	[91] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 27,
+	},
+	[94] = {
+		.mask	= IPIC_SIMSR_L,
+		.prio	= 0,
+		.force	= IPIC_SIFCR_L,
+		.bit	= 30,
+	},
+};
+
+static inline u32 ipic_read(volatile u32 __iomem *base, unsigned int reg)
+{
+	return in_be32(base + (reg >> 2));
+}
+
+static inline void ipic_write(volatile u32 __iomem *base, unsigned int reg, u32 value)
+{
+	out_be32(base + (reg >> 2), value);
+}
+
+static inline struct ipic * ipic_from_irq(unsigned int virq)
+{
+	return primary_ipic;
+}
+
+static void ipic_unmask_irq(struct irq_data *d)
+{
+	struct ipic *ipic = ipic_from_irq(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 temp;
+
+	raw_spin_lock_irqsave(&ipic_lock, flags);
+
+	temp = ipic_read(ipic->regs, ipic_info[src].mask);
+	temp |= (1 << (31 - ipic_info[src].bit));
+	ipic_write(ipic->regs, ipic_info[src].mask, temp);
+
+	raw_spin_unlock_irqrestore(&ipic_lock, flags);
+}
+
+static void ipic_mask_irq(struct irq_data *d)
+{
+	struct ipic *ipic = ipic_from_irq(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 temp;
+
+	raw_spin_lock_irqsave(&ipic_lock, flags);
+
+	temp = ipic_read(ipic->regs, ipic_info[src].mask);
+	temp &= ~(1 << (31 - ipic_info[src].bit));
+	ipic_write(ipic->regs, ipic_info[src].mask, temp);
+
+	/* mb() can't guarantee that masking is finished.  But it does finish
+	 * for nearly all cases. */
+	mb();
+
+	raw_spin_unlock_irqrestore(&ipic_lock, flags);
+}
+
+static void ipic_ack_irq(struct irq_data *d)
+{
+	struct ipic *ipic = ipic_from_irq(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 temp;
+
+	raw_spin_lock_irqsave(&ipic_lock, flags);
+
+	temp = 1 << (31 - ipic_info[src].bit);
+	ipic_write(ipic->regs, ipic_info[src].ack, temp);
+
+	/* mb() can't guarantee that ack is finished.  But it does finish
+	 * for nearly all cases. */
+	mb();
+
+	raw_spin_unlock_irqrestore(&ipic_lock, flags);
+}
+
+static void ipic_mask_irq_and_ack(struct irq_data *d)
+{
+	struct ipic *ipic = ipic_from_irq(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned long flags;
+	u32 temp;
+
+	raw_spin_lock_irqsave(&ipic_lock, flags);
+
+	temp = ipic_read(ipic->regs, ipic_info[src].mask);
+	temp &= ~(1 << (31 - ipic_info[src].bit));
+	ipic_write(ipic->regs, ipic_info[src].mask, temp);
+
+	temp = 1 << (31 - ipic_info[src].bit);
+	ipic_write(ipic->regs, ipic_info[src].ack, temp);
+
+	/* mb() can't guarantee that ack is finished.  But it does finish
+	 * for nearly all cases. */
+	mb();
+
+	raw_spin_unlock_irqrestore(&ipic_lock, flags);
+}
+
+static int ipic_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+	struct ipic *ipic = ipic_from_irq(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned int vold, vnew, edibit;
+
+	if (flow_type == IRQ_TYPE_NONE)
+		flow_type = IRQ_TYPE_LEVEL_LOW;
+
+	/* ipic supports only low assertion and high-to-low change senses
+	 */
+	if (!(flow_type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_EDGE_FALLING))) {
+		printk(KERN_ERR "ipic: sense type 0x%x not supported\n",
+			flow_type);
+		return -EINVAL;
+	}
+	/* ipic supports only edge mode on external interrupts */
+	if ((flow_type & IRQ_TYPE_EDGE_FALLING) && !ipic_info[src].ack) {
+		printk(KERN_ERR "ipic: edge sense not supported on internal "
+				"interrupts\n");
+		return -EINVAL;
+
+	}
+
+	irqd_set_trigger_type(d, flow_type);
+	if (flow_type & IRQ_TYPE_LEVEL_LOW)  {
+		irq_set_handler_locked(d, handle_level_irq);
+		d->chip = &ipic_level_irq_chip;
+	} else {
+		irq_set_handler_locked(d, handle_edge_irq);
+		d->chip = &ipic_edge_irq_chip;
+	}
+
+	/* only EXT IRQ senses are programmable on ipic
+	 * internal IRQ senses are LEVEL_LOW
+	 */
+	if (src == IPIC_IRQ_EXT0)
+		edibit = 15;
+	else
+		if (src >= IPIC_IRQ_EXT1 && src <= IPIC_IRQ_EXT7)
+			edibit = (14 - (src - IPIC_IRQ_EXT1));
+		else
+			return (flow_type & IRQ_TYPE_LEVEL_LOW) ? 0 : -EINVAL;
+
+	vold = ipic_read(ipic->regs, IPIC_SECNR);
+	if ((flow_type & IRQ_TYPE_SENSE_MASK) == IRQ_TYPE_EDGE_FALLING) {
+		vnew = vold | (1 << edibit);
+	} else {
+		vnew = vold & ~(1 << edibit);
+	}
+	if (vold != vnew)
+		ipic_write(ipic->regs, IPIC_SECNR, vnew);
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+/* level interrupts and edge interrupts have different ack operations */
+static struct irq_chip ipic_level_irq_chip = {
+	.name		= "IPIC",
+	.irq_unmask	= ipic_unmask_irq,
+	.irq_mask	= ipic_mask_irq,
+	.irq_mask_ack	= ipic_mask_irq,
+	.irq_set_type	= ipic_set_irq_type,
+};
+
+static struct irq_chip ipic_edge_irq_chip = {
+	.name		= "IPIC",
+	.irq_unmask	= ipic_unmask_irq,
+	.irq_mask	= ipic_mask_irq,
+	.irq_mask_ack	= ipic_mask_irq_and_ack,
+	.irq_ack	= ipic_ack_irq,
+	.irq_set_type	= ipic_set_irq_type,
+};
+
+static int ipic_host_match(struct irq_domain *h, struct device_node *node,
+			   enum irq_domain_bus_token bus_token)
+{
+	/* Exact match, unless ipic node is NULL */
+	struct device_node *of_node = irq_domain_get_of_node(h);
+	return of_node == NULL || of_node == node;
+}
+
+static int ipic_host_map(struct irq_domain *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	struct ipic *ipic = h->host_data;
+
+	irq_set_chip_data(virq, ipic);
+	irq_set_chip_and_handler(virq, &ipic_level_irq_chip, handle_level_irq);
+
+	/* Set default irq type */
+	irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+	return 0;
+}
+
+static const struct irq_domain_ops ipic_host_ops = {
+	.match	= ipic_host_match,
+	.map	= ipic_host_map,
+	.xlate	= irq_domain_xlate_onetwocell,
+};
+
+struct ipic * __init ipic_init(struct device_node *node, unsigned int flags)
+{
+	struct ipic	*ipic;
+	struct resource res;
+	u32 temp = 0, ret;
+
+	ret = of_address_to_resource(node, 0, &res);
+	if (ret)
+		return NULL;
+
+	ipic = kzalloc(sizeof(*ipic), GFP_KERNEL);
+	if (ipic == NULL)
+		return NULL;
+
+	ipic->irqhost = irq_domain_add_linear(node, NR_IPIC_INTS,
+					      &ipic_host_ops, ipic);
+	if (ipic->irqhost == NULL) {
+		kfree(ipic);
+		return NULL;
+	}
+
+	ipic->regs = ioremap(res.start, resource_size(&res));
+
+	/* init hw */
+	ipic_write(ipic->regs, IPIC_SICNR, 0x0);
+
+	/* default priority scheme is grouped. If spread mode is required
+	 * configure SICFR accordingly */
+	if (flags & IPIC_SPREADMODE_GRP_A)
+		temp |= SICFR_IPSA;
+	if (flags & IPIC_SPREADMODE_GRP_B)
+		temp |= SICFR_IPSB;
+	if (flags & IPIC_SPREADMODE_GRP_C)
+		temp |= SICFR_IPSC;
+	if (flags & IPIC_SPREADMODE_GRP_D)
+		temp |= SICFR_IPSD;
+	if (flags & IPIC_SPREADMODE_MIX_A)
+		temp |= SICFR_MPSA;
+	if (flags & IPIC_SPREADMODE_MIX_B)
+		temp |= SICFR_MPSB;
+
+	ipic_write(ipic->regs, IPIC_SICFR, temp);
+
+	/* handle MCP route */
+	temp = 0;
+	if (flags & IPIC_DISABLE_MCP_OUT)
+		temp = SERCR_MCPR;
+	ipic_write(ipic->regs, IPIC_SERCR, temp);
+
+	/* handle routing of IRQ0 to MCP */
+	temp = ipic_read(ipic->regs, IPIC_SEMSR);
+
+	if (flags & IPIC_IRQ0_MCP)
+		temp |= SEMSR_SIRQ0;
+	else
+		temp &= ~SEMSR_SIRQ0;
+
+	ipic_write(ipic->regs, IPIC_SEMSR, temp);
+
+	primary_ipic = ipic;
+	irq_set_default_host(primary_ipic->irqhost);
+
+	ipic_write(ipic->regs, IPIC_SIMSR_H, 0);
+	ipic_write(ipic->regs, IPIC_SIMSR_L, 0);
+
+	printk ("IPIC (%d IRQ sources) at %p\n", NR_IPIC_INTS,
+			primary_ipic->regs);
+
+	return ipic;
+}
+
+void __init ipic_set_default_priority(void)
+{
+	ipic_write(primary_ipic->regs, IPIC_SIPRR_A, IPIC_PRIORITY_DEFAULT);
+	ipic_write(primary_ipic->regs, IPIC_SIPRR_B, IPIC_PRIORITY_DEFAULT);
+	ipic_write(primary_ipic->regs, IPIC_SIPRR_C, IPIC_PRIORITY_DEFAULT);
+	ipic_write(primary_ipic->regs, IPIC_SIPRR_D, IPIC_PRIORITY_DEFAULT);
+	ipic_write(primary_ipic->regs, IPIC_SMPRR_A, IPIC_PRIORITY_DEFAULT);
+	ipic_write(primary_ipic->regs, IPIC_SMPRR_B, IPIC_PRIORITY_DEFAULT);
+}
+
+u32 ipic_get_mcp_status(void)
+{
+	return primary_ipic ? ipic_read(primary_ipic->regs, IPIC_SERSR) : 0;
+}
+
+void ipic_clear_mcp_status(u32 mask)
+{
+	ipic_write(primary_ipic->regs, IPIC_SERSR, mask);
+}
+
+/* Return an interrupt vector or 0 if no interrupt is pending. */
+unsigned int ipic_get_irq(void)
+{
+	int irq;
+
+	BUG_ON(primary_ipic == NULL);
+
+#define IPIC_SIVCR_VECTOR_MASK	0x7f
+	irq = ipic_read(primary_ipic->regs, IPIC_SIVCR) & IPIC_SIVCR_VECTOR_MASK;
+
+	if (irq == 0)    /* 0 --> no irq is pending */
+		return 0;
+
+	return irq_linear_revmap(primary_ipic->irqhost, irq);
+}
+
+#ifdef CONFIG_SUSPEND
+static struct {
+	u32 sicfr;
+	u32 siprr[2];
+	u32 simsr[2];
+	u32 sicnr;
+	u32 smprr[2];
+	u32 semsr;
+	u32 secnr;
+	u32 sermr;
+	u32 sercr;
+} ipic_saved_state;
+
+static int ipic_suspend(void)
+{
+	struct ipic *ipic = primary_ipic;
+
+	ipic_saved_state.sicfr = ipic_read(ipic->regs, IPIC_SICFR);
+	ipic_saved_state.siprr[0] = ipic_read(ipic->regs, IPIC_SIPRR_A);
+	ipic_saved_state.siprr[1] = ipic_read(ipic->regs, IPIC_SIPRR_D);
+	ipic_saved_state.simsr[0] = ipic_read(ipic->regs, IPIC_SIMSR_H);
+	ipic_saved_state.simsr[1] = ipic_read(ipic->regs, IPIC_SIMSR_L);
+	ipic_saved_state.sicnr = ipic_read(ipic->regs, IPIC_SICNR);
+	ipic_saved_state.smprr[0] = ipic_read(ipic->regs, IPIC_SMPRR_A);
+	ipic_saved_state.smprr[1] = ipic_read(ipic->regs, IPIC_SMPRR_B);
+	ipic_saved_state.semsr = ipic_read(ipic->regs, IPIC_SEMSR);
+	ipic_saved_state.secnr = ipic_read(ipic->regs, IPIC_SECNR);
+	ipic_saved_state.sermr = ipic_read(ipic->regs, IPIC_SERMR);
+	ipic_saved_state.sercr = ipic_read(ipic->regs, IPIC_SERCR);
+
+	if (fsl_deep_sleep()) {
+		/* In deep sleep, make sure there can be no
+		 * pending interrupts, as this can cause
+		 * problems on 831x.
+		 */
+		ipic_write(ipic->regs, IPIC_SIMSR_H, 0);
+		ipic_write(ipic->regs, IPIC_SIMSR_L, 0);
+		ipic_write(ipic->regs, IPIC_SEMSR, 0);
+		ipic_write(ipic->regs, IPIC_SERMR, 0);
+	}
+
+	return 0;
+}
+
+static void ipic_resume(void)
+{
+	struct ipic *ipic = primary_ipic;
+
+	ipic_write(ipic->regs, IPIC_SICFR, ipic_saved_state.sicfr);
+	ipic_write(ipic->regs, IPIC_SIPRR_A, ipic_saved_state.siprr[0]);
+	ipic_write(ipic->regs, IPIC_SIPRR_D, ipic_saved_state.siprr[1]);
+	ipic_write(ipic->regs, IPIC_SIMSR_H, ipic_saved_state.simsr[0]);
+	ipic_write(ipic->regs, IPIC_SIMSR_L, ipic_saved_state.simsr[1]);
+	ipic_write(ipic->regs, IPIC_SICNR, ipic_saved_state.sicnr);
+	ipic_write(ipic->regs, IPIC_SMPRR_A, ipic_saved_state.smprr[0]);
+	ipic_write(ipic->regs, IPIC_SMPRR_B, ipic_saved_state.smprr[1]);
+	ipic_write(ipic->regs, IPIC_SEMSR, ipic_saved_state.semsr);
+	ipic_write(ipic->regs, IPIC_SECNR, ipic_saved_state.secnr);
+	ipic_write(ipic->regs, IPIC_SERMR, ipic_saved_state.sermr);
+	ipic_write(ipic->regs, IPIC_SERCR, ipic_saved_state.sercr);
+}
+#else
+#define ipic_suspend NULL
+#define ipic_resume NULL
+#endif
+
+static struct syscore_ops ipic_syscore_ops = {
+	.suspend = ipic_suspend,
+	.resume = ipic_resume,
+};
+
+static int __init init_ipic_syscore(void)
+{
+	if (!primary_ipic || !primary_ipic->regs)
+		return -ENODEV;
+
+	printk(KERN_DEBUG "Registering ipic system core operations\n");
+	register_syscore_ops(&ipic_syscore_ops);
+
+	return 0;
+}
+
+subsys_initcall(init_ipic_syscore);
diff --git a/arch/powerpc/sysdev/ipic.h b/arch/powerpc/sysdev/ipic.h
new file mode 100644
index 0000000000..45ab614068
--- /dev/null
+++ b/arch/powerpc/sysdev/ipic.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * IPIC private definitions and structure.
+ *
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
+ *
+ * Copyright 2005 Freescale Semiconductor, Inc
+ */
+#ifndef __IPIC_H__
+#define __IPIC_H__
+
+#include <asm/ipic.h>
+
+#define NR_IPIC_INTS 128
+
+/* External IRQS */
+#define IPIC_IRQ_EXT0 48
+#define IPIC_IRQ_EXT1 17
+#define IPIC_IRQ_EXT7 23
+
+/* Default Priority Registers */
+#define IPIC_PRIORITY_DEFAULT 0x05309770
+
+/* System Global Interrupt Configuration Register */
+#define	SICFR_IPSA	0x00010000
+#define	SICFR_IPSB	0x00020000
+#define	SICFR_IPSC	0x00040000
+#define	SICFR_IPSD	0x00080000
+#define	SICFR_MPSA	0x00200000
+#define	SICFR_MPSB	0x00400000
+
+/* System External Interrupt Mask Register */
+#define	SEMSR_SIRQ0	0x00008000
+
+/* System Error Control Register */
+#define SERCR_MCPR	0x00000001
+
+struct ipic {
+	volatile u32 __iomem	*regs;
+
+	/* The remapper for this IPIC */
+	struct irq_domain		*irqhost;
+};
+
+struct ipic_info {
+	u8	ack;		/* pending register offset from base if the irq
+				   supports ack operation */
+	u8	mask;		/* mask register offset from base */
+	u8	prio;		/* priority register offset from base */
+	u8	force;		/* force register offset from base */
+	u8	bit;		/* register bit position (as per doc)
+				   bit mask = 1 << (31 - bit) */
+	u8	prio_mask;	/* priority mask value */
+};
+
+#endif /* __IPIC_H__ */
diff --git a/arch/powerpc/sysdev/mmio_nvram.c b/arch/powerpc/sysdev/mmio_nvram.c
new file mode 100644
index 0000000000..eb48210ef9
--- /dev/null
+++ b/arch/powerpc/sysdev/mmio_nvram.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * memory mapped NVRAM
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * Authors : Utz Bacher <utz.bacher@de.ibm.com>
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <asm/machdep.h>
+#include <asm/nvram.h>
+
+static void __iomem *mmio_nvram_start;
+static long mmio_nvram_len;
+static DEFINE_SPINLOCK(mmio_nvram_lock);
+
+static ssize_t mmio_nvram_read(char *buf, size_t count, loff_t *index)
+{
+	unsigned long flags;
+
+	if (*index >= mmio_nvram_len)
+		return 0;
+	if (*index + count > mmio_nvram_len)
+		count = mmio_nvram_len - *index;
+
+	spin_lock_irqsave(&mmio_nvram_lock, flags);
+
+	memcpy_fromio(buf, mmio_nvram_start + *index, count);
+
+	spin_unlock_irqrestore(&mmio_nvram_lock, flags);
+	
+	*index += count;
+	return count;
+}
+
+static unsigned char mmio_nvram_read_val(int addr)
+{
+	unsigned long flags;
+	unsigned char val;
+
+	if (addr >= mmio_nvram_len)
+		return 0xff;
+
+	spin_lock_irqsave(&mmio_nvram_lock, flags);
+
+	val = ioread8(mmio_nvram_start + addr);
+
+	spin_unlock_irqrestore(&mmio_nvram_lock, flags);
+
+	return val;
+}
+
+static ssize_t mmio_nvram_write(char *buf, size_t count, loff_t *index)
+{
+	unsigned long flags;
+
+	if (*index >= mmio_nvram_len)
+		return 0;
+	if (*index + count > mmio_nvram_len)
+		count = mmio_nvram_len - *index;
+
+	spin_lock_irqsave(&mmio_nvram_lock, flags);
+
+	memcpy_toio(mmio_nvram_start + *index, buf, count);
+
+	spin_unlock_irqrestore(&mmio_nvram_lock, flags);
+	
+	*index += count;
+	return count;
+}
+
+static void mmio_nvram_write_val(int addr, unsigned char val)
+{
+	unsigned long flags;
+
+	if (addr < mmio_nvram_len) {
+		spin_lock_irqsave(&mmio_nvram_lock, flags);
+
+		iowrite8(val, mmio_nvram_start + addr);
+
+		spin_unlock_irqrestore(&mmio_nvram_lock, flags);
+	}
+}
+
+static ssize_t mmio_nvram_get_size(void)
+{
+	return mmio_nvram_len;
+}
+
+int __init mmio_nvram_init(void)
+{
+	struct device_node *nvram_node;
+	unsigned long nvram_addr;
+	struct resource r;
+	int ret;
+
+	nvram_node = of_find_node_by_type(NULL, "nvram");
+	if (!nvram_node)
+		nvram_node = of_find_compatible_node(NULL, NULL, "nvram");
+	if (!nvram_node) {
+		printk(KERN_WARNING "nvram: no node found in device-tree\n");
+		return -ENODEV;
+	}
+
+	ret = of_address_to_resource(nvram_node, 0, &r);
+	if (ret) {
+		printk(KERN_WARNING "nvram: failed to get address (err %d)\n",
+		       ret);
+		goto out;
+	}
+	nvram_addr = r.start;
+	mmio_nvram_len = resource_size(&r);
+	if ( (!mmio_nvram_len) || (!nvram_addr) ) {
+		printk(KERN_WARNING "nvram: address or length is 0\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	mmio_nvram_start = ioremap(nvram_addr, mmio_nvram_len);
+	if (!mmio_nvram_start) {
+		printk(KERN_WARNING "nvram: failed to ioremap\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	printk(KERN_INFO "mmio NVRAM, %luk at 0x%lx mapped to %p\n",
+	       mmio_nvram_len >> 10, nvram_addr, mmio_nvram_start);
+
+	ppc_md.nvram_read_val	= mmio_nvram_read_val;
+	ppc_md.nvram_write_val	= mmio_nvram_write_val;
+	ppc_md.nvram_read	= mmio_nvram_read;
+	ppc_md.nvram_write	= mmio_nvram_write;
+	ppc_md.nvram_size	= mmio_nvram_get_size;
+
+out:
+	of_node_put(nvram_node);
+	return ret;
+}
diff --git a/arch/powerpc/sysdev/mpc5xxx_clocks.c b/arch/powerpc/sysdev/mpc5xxx_clocks.c
new file mode 100644
index 0000000000..58cee28e23
--- /dev/null
+++ b/arch/powerpc/sysdev/mpc5xxx_clocks.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/property.h>
+
+#include <asm/mpc5xxx.h>
+
+/**
+ * mpc5xxx_fwnode_get_bus_frequency - Find the bus frequency for a firmware node
+ * @fwnode:	firmware node
+ *
+ * Returns bus frequency (IPS on MPC512x, IPB on MPC52xx),
+ * or 0 if the bus frequency cannot be found.
+ */
+unsigned long mpc5xxx_fwnode_get_bus_frequency(struct fwnode_handle *fwnode)
+{
+	struct fwnode_handle *parent;
+	u32 bus_freq;
+	int ret;
+
+	ret = fwnode_property_read_u32(fwnode, "bus-frequency", &bus_freq);
+	if (!ret)
+		return bus_freq;
+
+	fwnode_for_each_parent_node(fwnode, parent) {
+		ret = fwnode_property_read_u32(parent, "bus-frequency", &bus_freq);
+		if (!ret) {
+			fwnode_handle_put(parent);
+			return bus_freq;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(mpc5xxx_fwnode_get_bus_frequency);
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
new file mode 100644
index 0000000000..ba287abcb0
--- /dev/null
+++ b/arch/powerpc/sysdev/mpic.c
@@ -0,0 +1,2019 @@
+/*
+ *  arch/powerpc/kernel/mpic.c
+ *
+ *  Driver for interrupt controllers following the OpenPIC standard, the
+ *  common implementation being IBM's MPIC. This driver also can deal
+ *  with various broken implementations of this HW.
+ *
+ *  Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
+ *  Copyright 2010-2012 Freescale Semiconductor, Inc.
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of this archive
+ *  for more details.
+ */
+
+#undef DEBUG
+#undef DEBUG_IPI
+#undef DEBUG_IRQ
+#undef DEBUG_LOW
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/syscore_ops.h>
+#include <linux/ratelimit.h>
+#include <linux/pgtable.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <asm/ptrace.h>
+#include <asm/signal.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/machdep.h>
+#include <asm/mpic.h>
+#include <asm/smp.h>
+
+#include "mpic.h"
+
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+struct bus_type mpic_subsys = {
+	.name = "mpic",
+	.dev_name = "mpic",
+};
+EXPORT_SYMBOL_GPL(mpic_subsys);
+
+static struct mpic *mpics;
+static struct mpic *mpic_primary;
+static DEFINE_RAW_SPINLOCK(mpic_lock);
+
+#ifdef CONFIG_PPC32	/* XXX for now */
+#ifdef CONFIG_IRQ_ALL_CPUS
+#define distribute_irqs	(1)
+#else
+#define distribute_irqs	(0)
+#endif
+#endif
+
+#ifdef CONFIG_MPIC_WEIRD
+static u32 mpic_infos[][MPIC_IDX_END] = {
+	[0] = {	/* Original OpenPIC compatible MPIC */
+		MPIC_GREG_BASE,
+		MPIC_GREG_FEATURE_0,
+		MPIC_GREG_GLOBAL_CONF_0,
+		MPIC_GREG_VENDOR_ID,
+		MPIC_GREG_IPI_VECTOR_PRI_0,
+		MPIC_GREG_IPI_STRIDE,
+		MPIC_GREG_SPURIOUS,
+		MPIC_GREG_TIMER_FREQ,
+
+		MPIC_TIMER_BASE,
+		MPIC_TIMER_STRIDE,
+		MPIC_TIMER_CURRENT_CNT,
+		MPIC_TIMER_BASE_CNT,
+		MPIC_TIMER_VECTOR_PRI,
+		MPIC_TIMER_DESTINATION,
+
+		MPIC_CPU_BASE,
+		MPIC_CPU_STRIDE,
+		MPIC_CPU_IPI_DISPATCH_0,
+		MPIC_CPU_IPI_DISPATCH_STRIDE,
+		MPIC_CPU_CURRENT_TASK_PRI,
+		MPIC_CPU_WHOAMI,
+		MPIC_CPU_INTACK,
+		MPIC_CPU_EOI,
+		MPIC_CPU_MCACK,
+
+		MPIC_IRQ_BASE,
+		MPIC_IRQ_STRIDE,
+		MPIC_IRQ_VECTOR_PRI,
+		MPIC_VECPRI_VECTOR_MASK,
+		MPIC_VECPRI_POLARITY_POSITIVE,
+		MPIC_VECPRI_POLARITY_NEGATIVE,
+		MPIC_VECPRI_SENSE_LEVEL,
+		MPIC_VECPRI_SENSE_EDGE,
+		MPIC_VECPRI_POLARITY_MASK,
+		MPIC_VECPRI_SENSE_MASK,
+		MPIC_IRQ_DESTINATION
+	},
+	[1] = {	/* Tsi108/109 PIC */
+		TSI108_GREG_BASE,
+		TSI108_GREG_FEATURE_0,
+		TSI108_GREG_GLOBAL_CONF_0,
+		TSI108_GREG_VENDOR_ID,
+		TSI108_GREG_IPI_VECTOR_PRI_0,
+		TSI108_GREG_IPI_STRIDE,
+		TSI108_GREG_SPURIOUS,
+		TSI108_GREG_TIMER_FREQ,
+
+		TSI108_TIMER_BASE,
+		TSI108_TIMER_STRIDE,
+		TSI108_TIMER_CURRENT_CNT,
+		TSI108_TIMER_BASE_CNT,
+		TSI108_TIMER_VECTOR_PRI,
+		TSI108_TIMER_DESTINATION,
+
+		TSI108_CPU_BASE,
+		TSI108_CPU_STRIDE,
+		TSI108_CPU_IPI_DISPATCH_0,
+		TSI108_CPU_IPI_DISPATCH_STRIDE,
+		TSI108_CPU_CURRENT_TASK_PRI,
+		TSI108_CPU_WHOAMI,
+		TSI108_CPU_INTACK,
+		TSI108_CPU_EOI,
+		TSI108_CPU_MCACK,
+
+		TSI108_IRQ_BASE,
+		TSI108_IRQ_STRIDE,
+		TSI108_IRQ_VECTOR_PRI,
+		TSI108_VECPRI_VECTOR_MASK,
+		TSI108_VECPRI_POLARITY_POSITIVE,
+		TSI108_VECPRI_POLARITY_NEGATIVE,
+		TSI108_VECPRI_SENSE_LEVEL,
+		TSI108_VECPRI_SENSE_EDGE,
+		TSI108_VECPRI_POLARITY_MASK,
+		TSI108_VECPRI_SENSE_MASK,
+		TSI108_IRQ_DESTINATION
+	},
+};
+
+#define MPIC_INFO(name) mpic->hw_set[MPIC_IDX_##name]
+
+#else /* CONFIG_MPIC_WEIRD */
+
+#define MPIC_INFO(name) MPIC_##name
+
+#endif /* CONFIG_MPIC_WEIRD */
+
+static inline unsigned int mpic_processor_id(struct mpic *mpic)
+{
+	unsigned int cpu = 0;
+
+	if (!(mpic->flags & MPIC_SECONDARY))
+		cpu = hard_smp_processor_id();
+
+	return cpu;
+}
+
+/*
+ * Register accessor functions
+ */
+
+
+static inline u32 _mpic_read(enum mpic_reg_type type,
+			     struct mpic_reg_bank *rb,
+			     unsigned int reg)
+{
+	switch(type) {
+#ifdef CONFIG_PPC_DCR
+	case mpic_access_dcr:
+		return dcr_read(rb->dhost, reg);
+#endif
+	case mpic_access_mmio_be:
+		return in_be32(rb->base + (reg >> 2));
+	case mpic_access_mmio_le:
+	default:
+		return in_le32(rb->base + (reg >> 2));
+	}
+}
+
+static inline void _mpic_write(enum mpic_reg_type type,
+			       struct mpic_reg_bank *rb,
+ 			       unsigned int reg, u32 value)
+{
+	switch(type) {
+#ifdef CONFIG_PPC_DCR
+	case mpic_access_dcr:
+		dcr_write(rb->dhost, reg, value);
+		break;
+#endif
+	case mpic_access_mmio_be:
+		out_be32(rb->base + (reg >> 2), value);
+		break;
+	case mpic_access_mmio_le:
+	default:
+		out_le32(rb->base + (reg >> 2), value);
+		break;
+	}
+}
+
+static inline u32 _mpic_ipi_read(struct mpic *mpic, unsigned int ipi)
+{
+	enum mpic_reg_type type = mpic->reg_type;
+	unsigned int offset = MPIC_INFO(GREG_IPI_VECTOR_PRI_0) +
+			      (ipi * MPIC_INFO(GREG_IPI_STRIDE));
+
+	if ((mpic->flags & MPIC_BROKEN_IPI) && type == mpic_access_mmio_le)
+		type = mpic_access_mmio_be;
+	return _mpic_read(type, &mpic->gregs, offset);
+}
+
+static inline void _mpic_ipi_write(struct mpic *mpic, unsigned int ipi, u32 value)
+{
+	unsigned int offset = MPIC_INFO(GREG_IPI_VECTOR_PRI_0) +
+			      (ipi * MPIC_INFO(GREG_IPI_STRIDE));
+
+	_mpic_write(mpic->reg_type, &mpic->gregs, offset, value);
+}
+
+static inline unsigned int mpic_tm_offset(struct mpic *mpic, unsigned int tm)
+{
+	return (tm >> 2) * MPIC_TIMER_GROUP_STRIDE +
+	       (tm & 3) * MPIC_INFO(TIMER_STRIDE);
+}
+
+static inline u32 _mpic_tm_read(struct mpic *mpic, unsigned int tm)
+{
+	unsigned int offset = mpic_tm_offset(mpic, tm) +
+			      MPIC_INFO(TIMER_VECTOR_PRI);
+
+	return _mpic_read(mpic->reg_type, &mpic->tmregs, offset);
+}
+
+static inline void _mpic_tm_write(struct mpic *mpic, unsigned int tm, u32 value)
+{
+	unsigned int offset = mpic_tm_offset(mpic, tm) +
+			      MPIC_INFO(TIMER_VECTOR_PRI);
+
+	_mpic_write(mpic->reg_type, &mpic->tmregs, offset, value);
+}
+
+static inline u32 _mpic_cpu_read(struct mpic *mpic, unsigned int reg)
+{
+	unsigned int cpu = mpic_processor_id(mpic);
+
+	return _mpic_read(mpic->reg_type, &mpic->cpuregs[cpu], reg);
+}
+
+static inline void _mpic_cpu_write(struct mpic *mpic, unsigned int reg, u32 value)
+{
+	unsigned int cpu = mpic_processor_id(mpic);
+
+	_mpic_write(mpic->reg_type, &mpic->cpuregs[cpu], reg, value);
+}
+
+static inline u32 _mpic_irq_read(struct mpic *mpic, unsigned int src_no, unsigned int reg)
+{
+	unsigned int	isu = src_no >> mpic->isu_shift;
+	unsigned int	idx = src_no & mpic->isu_mask;
+	unsigned int	val;
+
+	val = _mpic_read(mpic->reg_type, &mpic->isus[isu],
+			 reg + (idx * MPIC_INFO(IRQ_STRIDE)));
+#ifdef CONFIG_MPIC_BROKEN_REGREAD
+	if (reg == 0)
+		val = (val & (MPIC_VECPRI_MASK | MPIC_VECPRI_ACTIVITY)) |
+			mpic->isu_reg0_shadow[src_no];
+#endif
+	return val;
+}
+
+static inline void _mpic_irq_write(struct mpic *mpic, unsigned int src_no,
+				   unsigned int reg, u32 value)
+{
+	unsigned int	isu = src_no >> mpic->isu_shift;
+	unsigned int	idx = src_no & mpic->isu_mask;
+
+	_mpic_write(mpic->reg_type, &mpic->isus[isu],
+		    reg + (idx * MPIC_INFO(IRQ_STRIDE)), value);
+
+#ifdef CONFIG_MPIC_BROKEN_REGREAD
+	if (reg == 0)
+		mpic->isu_reg0_shadow[src_no] =
+			value & ~(MPIC_VECPRI_MASK | MPIC_VECPRI_ACTIVITY);
+#endif
+}
+
+#define mpic_read(b,r)		_mpic_read(mpic->reg_type,&(b),(r))
+#define mpic_write(b,r,v)	_mpic_write(mpic->reg_type,&(b),(r),(v))
+#define mpic_ipi_read(i)	_mpic_ipi_read(mpic,(i))
+#define mpic_ipi_write(i,v)	_mpic_ipi_write(mpic,(i),(v))
+#define mpic_tm_read(i)		_mpic_tm_read(mpic,(i))
+#define mpic_tm_write(i,v)	_mpic_tm_write(mpic,(i),(v))
+#define mpic_cpu_read(i)	_mpic_cpu_read(mpic,(i))
+#define mpic_cpu_write(i,v)	_mpic_cpu_write(mpic,(i),(v))
+#define mpic_irq_read(s,r)	_mpic_irq_read(mpic,(s),(r))
+#define mpic_irq_write(s,r,v)	_mpic_irq_write(mpic,(s),(r),(v))
+
+
+/*
+ * Low level utility functions
+ */
+
+
+static void _mpic_map_mmio(struct mpic *mpic, phys_addr_t phys_addr,
+			   struct mpic_reg_bank *rb, unsigned int offset,
+			   unsigned int size)
+{
+	rb->base = ioremap(phys_addr + offset, size);
+	BUG_ON(rb->base == NULL);
+}
+
+#ifdef CONFIG_PPC_DCR
+static void _mpic_map_dcr(struct mpic *mpic, struct mpic_reg_bank *rb,
+			  unsigned int offset, unsigned int size)
+{
+	phys_addr_t phys_addr = dcr_resource_start(mpic->node, 0);
+	rb->dhost = dcr_map(mpic->node, phys_addr + offset, size);
+	BUG_ON(!DCR_MAP_OK(rb->dhost));
+}
+
+static inline void mpic_map(struct mpic *mpic,
+			    phys_addr_t phys_addr, struct mpic_reg_bank *rb,
+			    unsigned int offset, unsigned int size)
+{
+	if (mpic->flags & MPIC_USES_DCR)
+		_mpic_map_dcr(mpic, rb, offset, size);
+	else
+		_mpic_map_mmio(mpic, phys_addr, rb, offset, size);
+}
+#else /* CONFIG_PPC_DCR */
+#define mpic_map(m,p,b,o,s)	_mpic_map_mmio(m,p,b,o,s)
+#endif /* !CONFIG_PPC_DCR */
+
+
+
+/* Check if we have one of those nice broken MPICs with a flipped endian on
+ * reads from IPI registers
+ */
+static void __init mpic_test_broken_ipi(struct mpic *mpic)
+{
+	u32 r;
+
+	mpic_write(mpic->gregs, MPIC_INFO(GREG_IPI_VECTOR_PRI_0), MPIC_VECPRI_MASK);
+	r = mpic_read(mpic->gregs, MPIC_INFO(GREG_IPI_VECTOR_PRI_0));
+
+	if (r == le32_to_cpu(MPIC_VECPRI_MASK)) {
+		printk(KERN_INFO "mpic: Detected reversed IPI registers\n");
+		mpic->flags |= MPIC_BROKEN_IPI;
+	}
+}
+
+#ifdef CONFIG_MPIC_U3_HT_IRQS
+
+/* Test if an interrupt is sourced from HyperTransport (used on broken U3s)
+ * to force the edge setting on the MPIC and do the ack workaround.
+ */
+static inline int mpic_is_ht_interrupt(struct mpic *mpic, unsigned int source)
+{
+	if (source >= 128 || !mpic->fixups)
+		return 0;
+	return mpic->fixups[source].base != NULL;
+}
+
+
+static inline void mpic_ht_end_irq(struct mpic *mpic, unsigned int source)
+{
+	struct mpic_irq_fixup *fixup = &mpic->fixups[source];
+
+	if (fixup->applebase) {
+		unsigned int soff = (fixup->index >> 3) & ~3;
+		unsigned int mask = 1U << (fixup->index & 0x1f);
+		writel(mask, fixup->applebase + soff);
+	} else {
+		raw_spin_lock(&mpic->fixup_lock);
+		writeb(0x11 + 2 * fixup->index, fixup->base + 2);
+		writel(fixup->data, fixup->base + 4);
+		raw_spin_unlock(&mpic->fixup_lock);
+	}
+}
+
+static void mpic_startup_ht_interrupt(struct mpic *mpic, unsigned int source,
+				      bool level)
+{
+	struct mpic_irq_fixup *fixup = &mpic->fixups[source];
+	unsigned long flags;
+	u32 tmp;
+
+	if (fixup->base == NULL)
+		return;
+
+	DBG("startup_ht_interrupt(0x%x) index: %d\n",
+	    source, fixup->index);
+	raw_spin_lock_irqsave(&mpic->fixup_lock, flags);
+	/* Enable and configure */
+	writeb(0x10 + 2 * fixup->index, fixup->base + 2);
+	tmp = readl(fixup->base + 4);
+	tmp &= ~(0x23U);
+	if (level)
+		tmp |= 0x22;
+	writel(tmp, fixup->base + 4);
+	raw_spin_unlock_irqrestore(&mpic->fixup_lock, flags);
+
+#ifdef CONFIG_PM
+	/* use the lowest bit inverted to the actual HW,
+	 * set if this fixup was enabled, clear otherwise */
+	mpic->save_data[source].fixup_data = tmp | 1;
+#endif
+}
+
+static void mpic_shutdown_ht_interrupt(struct mpic *mpic, unsigned int source)
+{
+	struct mpic_irq_fixup *fixup = &mpic->fixups[source];
+	unsigned long flags;
+	u32 tmp;
+
+	if (fixup->base == NULL)
+		return;
+
+	DBG("shutdown_ht_interrupt(0x%x)\n", source);
+
+	/* Disable */
+	raw_spin_lock_irqsave(&mpic->fixup_lock, flags);
+	writeb(0x10 + 2 * fixup->index, fixup->base + 2);
+	tmp = readl(fixup->base + 4);
+	tmp |= 1;
+	writel(tmp, fixup->base + 4);
+	raw_spin_unlock_irqrestore(&mpic->fixup_lock, flags);
+
+#ifdef CONFIG_PM
+	/* use the lowest bit inverted to the actual HW,
+	 * set if this fixup was enabled, clear otherwise */
+	mpic->save_data[source].fixup_data = tmp & ~1;
+#endif
+}
+
+#ifdef CONFIG_PCI_MSI
+static void __init mpic_scan_ht_msi(struct mpic *mpic, u8 __iomem *devbase,
+				    unsigned int devfn)
+{
+	u8 __iomem *base;
+	u8 pos, flags;
+	u64 addr = 0;
+
+	for (pos = readb(devbase + PCI_CAPABILITY_LIST); pos != 0;
+	     pos = readb(devbase + pos + PCI_CAP_LIST_NEXT)) {
+		u8 id = readb(devbase + pos + PCI_CAP_LIST_ID);
+		if (id == PCI_CAP_ID_HT) {
+			id = readb(devbase + pos + 3);
+			if ((id & HT_5BIT_CAP_MASK) == HT_CAPTYPE_MSI_MAPPING)
+				break;
+		}
+	}
+
+	if (pos == 0)
+		return;
+
+	base = devbase + pos;
+
+	flags = readb(base + HT_MSI_FLAGS);
+	if (!(flags & HT_MSI_FLAGS_FIXED)) {
+		addr = readl(base + HT_MSI_ADDR_LO) & HT_MSI_ADDR_LO_MASK;
+		addr = addr | ((u64)readl(base + HT_MSI_ADDR_HI) << 32);
+	}
+
+	printk(KERN_DEBUG "mpic:   - HT:%02x.%x %s MSI mapping found @ 0x%llx\n",
+		PCI_SLOT(devfn), PCI_FUNC(devfn),
+		flags & HT_MSI_FLAGS_ENABLE ? "enabled" : "disabled", addr);
+
+	if (!(flags & HT_MSI_FLAGS_ENABLE))
+		writeb(flags | HT_MSI_FLAGS_ENABLE, base + HT_MSI_FLAGS);
+}
+#else
+static void __init mpic_scan_ht_msi(struct mpic *mpic, u8 __iomem *devbase,
+				    unsigned int devfn)
+{
+	return;
+}
+#endif
+
+static void __init mpic_scan_ht_pic(struct mpic *mpic, u8 __iomem *devbase,
+				    unsigned int devfn, u32 vdid)
+{
+	int i, irq, n;
+	u8 __iomem *base;
+	u32 tmp;
+	u8 pos;
+
+	for (pos = readb(devbase + PCI_CAPABILITY_LIST); pos != 0;
+	     pos = readb(devbase + pos + PCI_CAP_LIST_NEXT)) {
+		u8 id = readb(devbase + pos + PCI_CAP_LIST_ID);
+		if (id == PCI_CAP_ID_HT) {
+			id = readb(devbase + pos + 3);
+			if ((id & HT_5BIT_CAP_MASK) == HT_CAPTYPE_IRQ)
+				break;
+		}
+	}
+	if (pos == 0)
+		return;
+
+	base = devbase + pos;
+	writeb(0x01, base + 2);
+	n = (readl(base + 4) >> 16) & 0xff;
+
+	printk(KERN_INFO "mpic:   - HT:%02x.%x [0x%02x] vendor %04x device %04x"
+	       " has %d irqs\n",
+	       devfn >> 3, devfn & 0x7, pos, vdid & 0xffff, vdid >> 16, n + 1);
+
+	for (i = 0; i <= n; i++) {
+		writeb(0x10 + 2 * i, base + 2);
+		tmp = readl(base + 4);
+		irq = (tmp >> 16) & 0xff;
+		DBG("HT PIC index 0x%x, irq 0x%x, tmp: %08x\n", i, irq, tmp);
+		/* mask it , will be unmasked later */
+		tmp |= 0x1;
+		writel(tmp, base + 4);
+		mpic->fixups[irq].index = i;
+		mpic->fixups[irq].base = base;
+		/* Apple HT PIC has a non-standard way of doing EOIs */
+		if ((vdid & 0xffff) == 0x106b)
+			mpic->fixups[irq].applebase = devbase + 0x60;
+		else
+			mpic->fixups[irq].applebase = NULL;
+		writeb(0x11 + 2 * i, base + 2);
+		mpic->fixups[irq].data = readl(base + 4) | 0x80000000;
+	}
+}
+
+
+static void __init mpic_scan_ht_pics(struct mpic *mpic)
+{
+	unsigned int devfn;
+	u8 __iomem *cfgspace;
+
+	printk(KERN_INFO "mpic: Setting up HT PICs workarounds for U3/U4\n");
+
+	/* Allocate fixups array */
+	mpic->fixups = kcalloc(128, sizeof(*mpic->fixups), GFP_KERNEL);
+	BUG_ON(mpic->fixups == NULL);
+
+	/* Init spinlock */
+	raw_spin_lock_init(&mpic->fixup_lock);
+
+	/* Map U3 config space. We assume all IO-APICs are on the primary bus
+	 * so we only need to map 64kB.
+	 */
+	cfgspace = ioremap(0xf2000000, 0x10000);
+	BUG_ON(cfgspace == NULL);
+
+	/* Now we scan all slots. We do a very quick scan, we read the header
+	 * type, vendor ID and device ID only, that's plenty enough
+	 */
+	for (devfn = 0; devfn < 0x100; devfn++) {
+		u8 __iomem *devbase = cfgspace + (devfn << 8);
+		u8 hdr_type = readb(devbase + PCI_HEADER_TYPE);
+		u32 l = readl(devbase + PCI_VENDOR_ID);
+		u16 s;
+
+		DBG("devfn %x, l: %x\n", devfn, l);
+
+		/* If no device, skip */
+		if (l == 0xffffffff || l == 0x00000000 ||
+		    l == 0x0000ffff || l == 0xffff0000)
+			goto next;
+		/* Check if is supports capability lists */
+		s = readw(devbase + PCI_STATUS);
+		if (!(s & PCI_STATUS_CAP_LIST))
+			goto next;
+
+		mpic_scan_ht_pic(mpic, devbase, devfn, l);
+		mpic_scan_ht_msi(mpic, devbase, devfn);
+
+	next:
+		/* next device, if function 0 */
+		if (PCI_FUNC(devfn) == 0 && (hdr_type & 0x80) == 0)
+			devfn += 7;
+	}
+}
+
+#else /* CONFIG_MPIC_U3_HT_IRQS */
+
+static inline int mpic_is_ht_interrupt(struct mpic *mpic, unsigned int source)
+{
+	return 0;
+}
+
+static void __init mpic_scan_ht_pics(struct mpic *mpic)
+{
+}
+
+#endif /* CONFIG_MPIC_U3_HT_IRQS */
+
+/* Find an mpic associated with a given linux interrupt */
+static struct mpic *mpic_find(unsigned int irq)
+{
+	if (irq < NR_IRQS_LEGACY)
+		return NULL;
+
+	return irq_get_chip_data(irq);
+}
+
+/* Determine if the linux irq is an IPI */
+static unsigned int mpic_is_ipi(struct mpic *mpic, unsigned int src)
+{
+	return (src >= mpic->ipi_vecs[0] && src <= mpic->ipi_vecs[3]);
+}
+
+/* Determine if the linux irq is a timer */
+static unsigned int mpic_is_tm(struct mpic *mpic, unsigned int src)
+{
+	return (src >= mpic->timer_vecs[0] && src <= mpic->timer_vecs[7]);
+}
+
+/* Convert a cpu mask from logical to physical cpu numbers. */
+static inline u32 mpic_physmask(u32 cpumask)
+{
+	int i;
+	u32 mask = 0;
+
+	for (i = 0; i < min(32, NR_CPUS) && cpu_possible(i); ++i, cpumask >>= 1)
+		mask |= (cpumask & 1) << get_hard_smp_processor_id(i);
+	return mask;
+}
+
+#ifdef CONFIG_SMP
+/* Get the mpic structure from the IPI number */
+static inline struct mpic * mpic_from_ipi(struct irq_data *d)
+{
+	return irq_data_get_irq_chip_data(d);
+}
+#endif
+
+/* Get the mpic structure from the irq number */
+static inline struct mpic * mpic_from_irq(unsigned int irq)
+{
+	return irq_get_chip_data(irq);
+}
+
+/* Get the mpic structure from the irq data */
+static inline struct mpic * mpic_from_irq_data(struct irq_data *d)
+{
+	return irq_data_get_irq_chip_data(d);
+}
+
+/* Send an EOI */
+static inline void mpic_eoi(struct mpic *mpic)
+{
+	mpic_cpu_write(MPIC_INFO(CPU_EOI), 0);
+}
+
+/*
+ * Linux descriptor level callbacks
+ */
+
+
+void mpic_unmask_irq(struct irq_data *d)
+{
+	unsigned int loops = 100000;
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+
+	DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, d->irq, src);
+
+	mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI),
+		       mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)) &
+		       ~MPIC_VECPRI_MASK);
+	/* make sure mask gets to controller before we return to user */
+	do {
+		if (!loops--) {
+			printk(KERN_ERR "%s: timeout on hwirq %u\n",
+			       __func__, src);
+			break;
+		}
+	} while(mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)) & MPIC_VECPRI_MASK);
+}
+
+void mpic_mask_irq(struct irq_data *d)
+{
+	unsigned int loops = 100000;
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+
+	DBG("%s: disable_irq: %d (src %d)\n", mpic->name, d->irq, src);
+
+	mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI),
+		       mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)) |
+		       MPIC_VECPRI_MASK);
+
+	/* make sure mask gets to controller before we return to user */
+	do {
+		if (!loops--) {
+			printk(KERN_ERR "%s: timeout on hwirq %u\n",
+			       __func__, src);
+			break;
+		}
+	} while(!(mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)) & MPIC_VECPRI_MASK));
+}
+
+void mpic_end_irq(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+
+#ifdef DEBUG_IRQ
+	DBG("%s: end_irq: %d\n", mpic->name, d->irq);
+#endif
+	/* We always EOI on end_irq() even for edge interrupts since that
+	 * should only lower the priority, the MPIC should have properly
+	 * latched another edge interrupt coming in anyway
+	 */
+
+	mpic_eoi(mpic);
+}
+
+#ifdef CONFIG_MPIC_U3_HT_IRQS
+
+static void mpic_unmask_ht_irq(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+
+	mpic_unmask_irq(d);
+
+	if (irqd_is_level_type(d))
+		mpic_ht_end_irq(mpic, src);
+}
+
+static unsigned int mpic_startup_ht_irq(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+
+	mpic_unmask_irq(d);
+	mpic_startup_ht_interrupt(mpic, src, irqd_is_level_type(d));
+
+	return 0;
+}
+
+static void mpic_shutdown_ht_irq(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+
+	mpic_shutdown_ht_interrupt(mpic, src);
+	mpic_mask_irq(d);
+}
+
+static void mpic_end_ht_irq(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+
+#ifdef DEBUG_IRQ
+	DBG("%s: end_irq: %d\n", mpic->name, d->irq);
+#endif
+	/* We always EOI on end_irq() even for edge interrupts since that
+	 * should only lower the priority, the MPIC should have properly
+	 * latched another edge interrupt coming in anyway
+	 */
+
+	if (irqd_is_level_type(d))
+		mpic_ht_end_irq(mpic, src);
+	mpic_eoi(mpic);
+}
+#endif /* !CONFIG_MPIC_U3_HT_IRQS */
+
+#ifdef CONFIG_SMP
+
+static void mpic_unmask_ipi(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_ipi(d);
+	unsigned int src = virq_to_hw(d->irq) - mpic->ipi_vecs[0];
+
+	DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, d->irq, src);
+	mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK);
+}
+
+static void mpic_mask_ipi(struct irq_data *d)
+{
+	/* NEVER disable an IPI... that's just plain wrong! */
+}
+
+static void mpic_end_ipi(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_ipi(d);
+
+	/*
+	 * IPIs are marked IRQ_PER_CPU. This has the side effect of
+	 * preventing the IRQ_PENDING/IRQ_INPROGRESS logic from
+	 * applying to them. We EOI them late to avoid re-entering.
+	 */
+	mpic_eoi(mpic);
+}
+
+#endif /* CONFIG_SMP */
+
+static void mpic_unmask_tm(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = virq_to_hw(d->irq) - mpic->timer_vecs[0];
+
+	DBG("%s: enable_tm: %d (tm %d)\n", mpic->name, d->irq, src);
+	mpic_tm_write(src, mpic_tm_read(src) & ~MPIC_VECPRI_MASK);
+	mpic_tm_read(src);
+}
+
+static void mpic_mask_tm(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = virq_to_hw(d->irq) - mpic->timer_vecs[0];
+
+	mpic_tm_write(src, mpic_tm_read(src) | MPIC_VECPRI_MASK);
+	mpic_tm_read(src);
+}
+
+int mpic_set_affinity(struct irq_data *d, const struct cpumask *cpumask,
+		      bool force)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+
+	if (mpic->flags & MPIC_SINGLE_DEST_CPU) {
+		int cpuid = irq_choose_cpu(cpumask);
+
+		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), 1 << cpuid);
+	} else {
+		u32 mask = cpumask_bits(cpumask)[0];
+
+		mask &= cpumask_bits(cpu_online_mask)[0];
+
+		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
+			       mpic_physmask(mask));
+	}
+
+	return IRQ_SET_MASK_OK;
+}
+
+static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type)
+{
+	/* Now convert sense value */
+	switch(type & IRQ_TYPE_SENSE_MASK) {
+	case IRQ_TYPE_EDGE_RISING:
+		return MPIC_INFO(VECPRI_SENSE_EDGE) |
+		       MPIC_INFO(VECPRI_POLARITY_POSITIVE);
+	case IRQ_TYPE_EDGE_FALLING:
+	case IRQ_TYPE_EDGE_BOTH:
+		return MPIC_INFO(VECPRI_SENSE_EDGE) |
+		       MPIC_INFO(VECPRI_POLARITY_NEGATIVE);
+	case IRQ_TYPE_LEVEL_HIGH:
+		return MPIC_INFO(VECPRI_SENSE_LEVEL) |
+		       MPIC_INFO(VECPRI_POLARITY_POSITIVE);
+	case IRQ_TYPE_LEVEL_LOW:
+	default:
+		return MPIC_INFO(VECPRI_SENSE_LEVEL) |
+		       MPIC_INFO(VECPRI_POLARITY_NEGATIVE);
+	}
+}
+
+int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = irqd_to_hwirq(d);
+	unsigned int vecpri, vold, vnew;
+
+	DBG("mpic: set_irq_type(mpic:@%p,virq:%d,src:0x%x,type:0x%x)\n",
+	    mpic, d->irq, src, flow_type);
+
+	if (src >= mpic->num_sources)
+		return -EINVAL;
+
+	vold = mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI));
+
+	/* We don't support "none" type */
+	if (flow_type == IRQ_TYPE_NONE)
+		flow_type = IRQ_TYPE_DEFAULT;
+
+	/* Default: read HW settings */
+	if (flow_type == IRQ_TYPE_DEFAULT) {
+		int vold_ps;
+
+		vold_ps = vold & (MPIC_INFO(VECPRI_POLARITY_MASK) |
+				  MPIC_INFO(VECPRI_SENSE_MASK));
+
+		if (vold_ps == (MPIC_INFO(VECPRI_SENSE_EDGE) |
+				MPIC_INFO(VECPRI_POLARITY_POSITIVE)))
+			flow_type = IRQ_TYPE_EDGE_RISING;
+		else if	(vold_ps == (MPIC_INFO(VECPRI_SENSE_EDGE) |
+				     MPIC_INFO(VECPRI_POLARITY_NEGATIVE)))
+			flow_type = IRQ_TYPE_EDGE_FALLING;
+		else if (vold_ps == (MPIC_INFO(VECPRI_SENSE_LEVEL) |
+				     MPIC_INFO(VECPRI_POLARITY_POSITIVE)))
+			flow_type = IRQ_TYPE_LEVEL_HIGH;
+		else if (vold_ps == (MPIC_INFO(VECPRI_SENSE_LEVEL) |
+				     MPIC_INFO(VECPRI_POLARITY_NEGATIVE)))
+			flow_type = IRQ_TYPE_LEVEL_LOW;
+		else
+			WARN_ONCE(1, "mpic: unknown IRQ type %d\n", vold);
+	}
+
+	/* Apply to irq desc */
+	irqd_set_trigger_type(d, flow_type);
+
+	/* Apply to HW */
+	if (mpic_is_ht_interrupt(mpic, src))
+		vecpri = MPIC_VECPRI_POLARITY_POSITIVE |
+			MPIC_VECPRI_SENSE_EDGE;
+	else
+		vecpri = mpic_type_to_vecpri(mpic, flow_type);
+
+	vnew = vold & ~(MPIC_INFO(VECPRI_POLARITY_MASK) |
+			MPIC_INFO(VECPRI_SENSE_MASK));
+	vnew |= vecpri;
+	if (vold != vnew)
+		mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI), vnew);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+void mpic_set_vector(unsigned int virq, unsigned int vector)
+{
+	struct mpic *mpic = mpic_from_irq(virq);
+	unsigned int src = virq_to_hw(virq);
+	unsigned int vecpri;
+
+	DBG("mpic: set_vector(mpic:@%p,virq:%d,src:%d,vector:0x%x)\n",
+	    mpic, virq, src, vector);
+
+	if (src >= mpic->num_sources)
+		return;
+
+	vecpri = mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI));
+	vecpri = vecpri & ~MPIC_INFO(VECPRI_VECTOR_MASK);
+	vecpri |= vector;
+	mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI), vecpri);
+}
+
+static void mpic_set_destination(unsigned int virq, unsigned int cpuid)
+{
+	struct mpic *mpic = mpic_from_irq(virq);
+	unsigned int src = virq_to_hw(virq);
+
+	DBG("mpic: set_destination(mpic:@%p,virq:%d,src:%d,cpuid:0x%x)\n",
+	    mpic, virq, src, cpuid);
+
+	if (src >= mpic->num_sources)
+		return;
+
+	mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), 1 << cpuid);
+}
+
+static struct irq_chip mpic_irq_chip = {
+	.irq_mask	= mpic_mask_irq,
+	.irq_unmask	= mpic_unmask_irq,
+	.irq_eoi	= mpic_end_irq,
+	.irq_set_type	= mpic_set_irq_type,
+};
+
+#ifdef CONFIG_SMP
+static const struct irq_chip mpic_ipi_chip = {
+	.irq_mask	= mpic_mask_ipi,
+	.irq_unmask	= mpic_unmask_ipi,
+	.irq_eoi	= mpic_end_ipi,
+};
+#endif /* CONFIG_SMP */
+
+static struct irq_chip mpic_tm_chip = {
+	.irq_mask	= mpic_mask_tm,
+	.irq_unmask	= mpic_unmask_tm,
+	.irq_eoi	= mpic_end_irq,
+};
+
+#ifdef CONFIG_MPIC_U3_HT_IRQS
+static const struct irq_chip mpic_irq_ht_chip = {
+	.irq_startup	= mpic_startup_ht_irq,
+	.irq_shutdown	= mpic_shutdown_ht_irq,
+	.irq_mask	= mpic_mask_irq,
+	.irq_unmask	= mpic_unmask_ht_irq,
+	.irq_eoi	= mpic_end_ht_irq,
+	.irq_set_type	= mpic_set_irq_type,
+};
+#endif /* CONFIG_MPIC_U3_HT_IRQS */
+
+
+static int mpic_host_match(struct irq_domain *h, struct device_node *node,
+			   enum irq_domain_bus_token bus_token)
+{
+	/* Exact match, unless mpic node is NULL */
+	struct device_node *of_node = irq_domain_get_of_node(h);
+	return of_node == NULL || of_node == node;
+}
+
+static int mpic_host_map(struct irq_domain *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	struct mpic *mpic = h->host_data;
+	struct irq_chip *chip;
+
+	DBG("mpic: map virq %d, hwirq 0x%lx\n", virq, hw);
+
+	if (hw == mpic->spurious_vec)
+		return -EINVAL;
+	if (mpic->protected && test_bit(hw, mpic->protected)) {
+		pr_warn("mpic: Mapping of source 0x%x failed, source protected by firmware !\n",
+			(unsigned int)hw);
+		return -EPERM;
+	}
+
+#ifdef CONFIG_SMP
+	else if (hw >= mpic->ipi_vecs[0]) {
+		WARN_ON(mpic->flags & MPIC_SECONDARY);
+
+		DBG("mpic: mapping as IPI\n");
+		irq_set_chip_data(virq, mpic);
+		irq_set_chip_and_handler(virq, &mpic->hc_ipi,
+					 handle_percpu_irq);
+		return 0;
+	}
+#endif /* CONFIG_SMP */
+
+	if (hw >= mpic->timer_vecs[0] && hw <= mpic->timer_vecs[7]) {
+		WARN_ON(mpic->flags & MPIC_SECONDARY);
+
+		DBG("mpic: mapping as timer\n");
+		irq_set_chip_data(virq, mpic);
+		irq_set_chip_and_handler(virq, &mpic->hc_tm,
+					 handle_fasteoi_irq);
+		return 0;
+	}
+
+	if (mpic_map_error_int(mpic, virq, hw))
+		return 0;
+
+	if (hw >= mpic->num_sources) {
+		pr_warn("mpic: Mapping of source 0x%x failed, source out of range !\n",
+			(unsigned int)hw);
+		return -EINVAL;
+	}
+
+	mpic_msi_reserve_hwirq(mpic, hw);
+
+	/* Default chip */
+	chip = &mpic->hc_irq;
+
+#ifdef CONFIG_MPIC_U3_HT_IRQS
+	/* Check for HT interrupts, override vecpri */
+	if (mpic_is_ht_interrupt(mpic, hw))
+		chip = &mpic->hc_ht_irq;
+#endif /* CONFIG_MPIC_U3_HT_IRQS */
+
+	DBG("mpic: mapping to irq chip @%p\n", chip);
+
+	irq_set_chip_data(virq, mpic);
+	irq_set_chip_and_handler(virq, chip, handle_fasteoi_irq);
+
+	/* Set default irq type */
+	irq_set_irq_type(virq, IRQ_TYPE_DEFAULT);
+
+	/* If the MPIC was reset, then all vectors have already been
+	 * initialized.  Otherwise, a per source lazy initialization
+	 * is done here.
+	 */
+	if (!mpic_is_ipi(mpic, hw) && (mpic->flags & MPIC_NO_RESET)) {
+		int cpu;
+
+		preempt_disable();
+		cpu = mpic_processor_id(mpic);
+		preempt_enable();
+
+		mpic_set_vector(virq, hw);
+		mpic_set_destination(virq, cpu);
+		mpic_irq_set_priority(virq, 8);
+	}
+
+	return 0;
+}
+
+static int mpic_host_xlate(struct irq_domain *h, struct device_node *ct,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	struct mpic *mpic = h->host_data;
+	static unsigned char map_mpic_senses[4] = {
+		IRQ_TYPE_EDGE_RISING,
+		IRQ_TYPE_LEVEL_LOW,
+		IRQ_TYPE_LEVEL_HIGH,
+		IRQ_TYPE_EDGE_FALLING,
+	};
+
+	*out_hwirq = intspec[0];
+	if (intsize >= 4 && (mpic->flags & MPIC_FSL)) {
+		/*
+		 * Freescale MPIC with extended intspec:
+		 * First two cells are as usual.  Third specifies
+		 * an "interrupt type".  Fourth is type-specific data.
+		 *
+		 * See Documentation/devicetree/bindings/powerpc/fsl/mpic.txt
+		 */
+		switch (intspec[2]) {
+		case 0:
+			break;
+		case 1:
+			if (!(mpic->flags & MPIC_FSL_HAS_EIMR))
+				break;
+
+			if (intspec[3] >= ARRAY_SIZE(mpic->err_int_vecs))
+				return -EINVAL;
+
+			*out_hwirq = mpic->err_int_vecs[intspec[3]];
+
+			break;
+		case 2:
+			if (intspec[0] >= ARRAY_SIZE(mpic->ipi_vecs))
+				return -EINVAL;
+
+			*out_hwirq = mpic->ipi_vecs[intspec[0]];
+			break;
+		case 3:
+			if (intspec[0] >= ARRAY_SIZE(mpic->timer_vecs))
+				return -EINVAL;
+
+			*out_hwirq = mpic->timer_vecs[intspec[0]];
+			break;
+		default:
+			pr_debug("%s: unknown irq type %u\n",
+				 __func__, intspec[2]);
+			return -EINVAL;
+		}
+
+		*out_flags = map_mpic_senses[intspec[1] & 3];
+	} else if (intsize > 1) {
+		u32 mask = 0x3;
+
+		/* Apple invented a new race of encoding on machines with
+		 * an HT APIC. They encode, among others, the index within
+		 * the HT APIC. We don't care about it here since thankfully,
+		 * it appears that they have the APIC already properly
+		 * configured, and thus our current fixup code that reads the
+		 * APIC config works fine. However, we still need to mask out
+		 * bits in the specifier to make sure we only get bit 0 which
+		 * is the level/edge bit (the only sense bit exposed by Apple),
+		 * as their bit 1 means something else.
+		 */
+		if (machine_is(powermac))
+			mask = 0x1;
+		*out_flags = map_mpic_senses[intspec[1] & mask];
+	} else
+		*out_flags = IRQ_TYPE_NONE;
+
+	DBG("mpic: xlate (%d cells: 0x%08x 0x%08x) to line 0x%lx sense 0x%x\n",
+	    intsize, intspec[0], intspec[1], *out_hwirq, *out_flags);
+
+	return 0;
+}
+
+/* IRQ handler for a secondary MPIC cascaded from another IRQ controller */
+static void mpic_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct mpic *mpic = irq_desc_get_handler_data(desc);
+	unsigned int virq;
+
+	BUG_ON(!(mpic->flags & MPIC_SECONDARY));
+
+	virq = mpic_get_one_irq(mpic);
+	if (virq)
+		generic_handle_irq(virq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+static const struct irq_domain_ops mpic_host_ops = {
+	.match = mpic_host_match,
+	.map = mpic_host_map,
+	.xlate = mpic_host_xlate,
+};
+
+static u32 fsl_mpic_get_version(struct mpic *mpic)
+{
+	u32 brr1;
+
+	if (!(mpic->flags & MPIC_FSL))
+		return 0;
+
+	brr1 = _mpic_read(mpic->reg_type, &mpic->thiscpuregs,
+			MPIC_FSL_BRR1);
+
+	return brr1 & MPIC_FSL_BRR1_VER;
+}
+
+/*
+ * Exported functions
+ */
+
+u32 fsl_mpic_primary_get_version(void)
+{
+	struct mpic *mpic = mpic_primary;
+
+	if (mpic)
+		return fsl_mpic_get_version(mpic);
+
+	return 0;
+}
+
+struct mpic * __init mpic_alloc(struct device_node *node,
+				phys_addr_t phys_addr,
+				unsigned int flags,
+				unsigned int isu_size,
+				unsigned int irq_count,
+				const char *name)
+{
+	int i, psize, intvec_top;
+	struct mpic *mpic;
+	u32 greg_feature;
+	const char *vers;
+	const u32 *psrc;
+	u32 last_irq;
+	u32 fsl_version = 0;
+
+	/* Default MPIC search parameters */
+	static const struct of_device_id __initconst mpic_device_id[] = {
+		{ .type	      = "open-pic", },
+		{ .compatible = "open-pic", },
+		{},
+	};
+
+	/*
+	 * If we were not passed a device-tree node, then perform the default
+	 * search for standardized a standardized OpenPIC.
+	 */
+	if (node) {
+		node = of_node_get(node);
+	} else {
+		node = of_find_matching_node(NULL, mpic_device_id);
+		if (!node)
+			return NULL;
+	}
+
+	/* Pick the physical address from the device tree if unspecified */
+	if (!phys_addr) {
+		/* Check if it is DCR-based */
+		if (of_property_read_bool(node, "dcr-reg")) {
+			flags |= MPIC_USES_DCR;
+		} else {
+			struct resource r;
+			if (of_address_to_resource(node, 0, &r))
+				goto err_of_node_put;
+			phys_addr = r.start;
+		}
+	}
+
+	/* Read extra device-tree properties into the flags variable */
+	if (of_property_read_bool(node, "big-endian"))
+		flags |= MPIC_BIG_ENDIAN;
+	if (of_property_read_bool(node, "pic-no-reset"))
+		flags |= MPIC_NO_RESET;
+	if (of_property_read_bool(node, "single-cpu-affinity"))
+		flags |= MPIC_SINGLE_DEST_CPU;
+	if (of_device_is_compatible(node, "fsl,mpic")) {
+		flags |= MPIC_FSL | MPIC_LARGE_VECTORS;
+		mpic_irq_chip.flags |= IRQCHIP_SKIP_SET_WAKE;
+		mpic_tm_chip.flags |= IRQCHIP_SKIP_SET_WAKE;
+	}
+
+	mpic = kzalloc(sizeof(struct mpic), GFP_KERNEL);
+	if (mpic == NULL)
+		goto err_of_node_put;
+
+	mpic->name = name;
+	mpic->node = node;
+	mpic->paddr = phys_addr;
+	mpic->flags = flags;
+
+	mpic->hc_irq = mpic_irq_chip;
+	mpic->hc_irq.name = name;
+	if (!(mpic->flags & MPIC_SECONDARY))
+		mpic->hc_irq.irq_set_affinity = mpic_set_affinity;
+#ifdef CONFIG_MPIC_U3_HT_IRQS
+	mpic->hc_ht_irq = mpic_irq_ht_chip;
+	mpic->hc_ht_irq.name = name;
+	if (!(mpic->flags & MPIC_SECONDARY))
+		mpic->hc_ht_irq.irq_set_affinity = mpic_set_affinity;
+#endif /* CONFIG_MPIC_U3_HT_IRQS */
+
+#ifdef CONFIG_SMP
+	mpic->hc_ipi = mpic_ipi_chip;
+	mpic->hc_ipi.name = name;
+#endif /* CONFIG_SMP */
+
+	mpic->hc_tm = mpic_tm_chip;
+	mpic->hc_tm.name = name;
+
+	mpic->num_sources = 0; /* so far */
+
+	if (mpic->flags & MPIC_LARGE_VECTORS)
+		intvec_top = 2047;
+	else
+		intvec_top = 255;
+
+	mpic->timer_vecs[0] = intvec_top - 12;
+	mpic->timer_vecs[1] = intvec_top - 11;
+	mpic->timer_vecs[2] = intvec_top - 10;
+	mpic->timer_vecs[3] = intvec_top - 9;
+	mpic->timer_vecs[4] = intvec_top - 8;
+	mpic->timer_vecs[5] = intvec_top - 7;
+	mpic->timer_vecs[6] = intvec_top - 6;
+	mpic->timer_vecs[7] = intvec_top - 5;
+	mpic->ipi_vecs[0]   = intvec_top - 4;
+	mpic->ipi_vecs[1]   = intvec_top - 3;
+	mpic->ipi_vecs[2]   = intvec_top - 2;
+	mpic->ipi_vecs[3]   = intvec_top - 1;
+	mpic->spurious_vec  = intvec_top;
+
+	/* Look for protected sources */
+	psrc = of_get_property(mpic->node, "protected-sources", &psize);
+	if (psrc) {
+		/* Allocate a bitmap with one bit per interrupt */
+		mpic->protected = bitmap_zalloc(intvec_top + 1, GFP_KERNEL);
+		BUG_ON(mpic->protected == NULL);
+		for (i = 0; i < psize/sizeof(u32); i++) {
+			if (psrc[i] > intvec_top)
+				continue;
+			__set_bit(psrc[i], mpic->protected);
+		}
+	}
+
+#ifdef CONFIG_MPIC_WEIRD
+	mpic->hw_set = mpic_infos[MPIC_GET_REGSET(mpic->flags)];
+#endif
+
+	/* default register type */
+	if (mpic->flags & MPIC_BIG_ENDIAN)
+		mpic->reg_type = mpic_access_mmio_be;
+	else
+		mpic->reg_type = mpic_access_mmio_le;
+
+	/*
+	 * An MPIC with a "dcr-reg" property must be accessed that way, but
+	 * only if the kernel includes DCR support.
+	 */
+#ifdef CONFIG_PPC_DCR
+	if (mpic->flags & MPIC_USES_DCR)
+		mpic->reg_type = mpic_access_dcr;
+#else
+	BUG_ON(mpic->flags & MPIC_USES_DCR);
+#endif
+
+	/* Map the global registers */
+	mpic_map(mpic, mpic->paddr, &mpic->gregs, MPIC_INFO(GREG_BASE), 0x1000);
+	mpic_map(mpic, mpic->paddr, &mpic->tmregs, MPIC_INFO(TIMER_BASE), 0x1000);
+
+	if (mpic->flags & MPIC_FSL) {
+		int ret;
+
+		/*
+		 * Yes, Freescale really did put global registers in the
+		 * magic per-cpu area -- and they don't even show up in the
+		 * non-magic per-cpu copies that this driver normally uses.
+		 */
+		mpic_map(mpic, mpic->paddr, &mpic->thiscpuregs,
+			 MPIC_CPU_THISBASE, 0x1000);
+
+		fsl_version = fsl_mpic_get_version(mpic);
+
+		/* Error interrupt mask register (EIMR) is required for
+		 * handling individual device error interrupts. EIMR
+		 * was added in MPIC version 4.1.
+		 *
+		 * Over here we reserve vector number space for error
+		 * interrupt vectors. This space is stolen from the
+		 * global vector number space, as in case of ipis
+		 * and timer interrupts.
+		 *
+		 * Available vector space = intvec_top - 13, where 13
+		 * is the number of vectors which have been consumed by
+		 * ipis, timer interrupts and spurious.
+		 */
+		if (fsl_version >= 0x401) {
+			ret = mpic_setup_error_int(mpic, intvec_top - 13);
+			if (ret)
+				return NULL;
+		}
+
+	}
+
+	/*
+	 * EPR is only available starting with v4.0.  To support
+	 * platforms that don't know the MPIC version at compile-time,
+	 * such as qemu-e500, turn off coreint if this MPIC doesn't
+	 * support it.  Note that we never enable it if it wasn't
+	 * requested in the first place.
+	 *
+	 * This is done outside the MPIC_FSL check, so that we
+	 * also disable coreint if the MPIC node doesn't have
+	 * an "fsl,mpic" compatible at all.  This will be the case
+	 * with device trees generated by older versions of QEMU.
+	 * fsl_version will be zero if MPIC_FSL is not set.
+	 */
+	if (fsl_version < 0x400 && (flags & MPIC_ENABLE_COREINT))
+		ppc_md.get_irq = mpic_get_irq;
+
+	/* Reset */
+
+	/* When using a device-node, reset requests are only honored if the MPIC
+	 * is allowed to reset.
+	 */
+	if (!(mpic->flags & MPIC_NO_RESET)) {
+		printk(KERN_DEBUG "mpic: Resetting\n");
+		mpic_write(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0),
+			   mpic_read(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0))
+			   | MPIC_GREG_GCONF_RESET);
+		while( mpic_read(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0))
+		       & MPIC_GREG_GCONF_RESET)
+			mb();
+	}
+
+	/* CoreInt */
+	if (mpic->flags & MPIC_ENABLE_COREINT)
+		mpic_write(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0),
+			   mpic_read(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0))
+			   | MPIC_GREG_GCONF_COREINT);
+
+	if (mpic->flags & MPIC_ENABLE_MCK)
+		mpic_write(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0),
+			   mpic_read(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0))
+			   | MPIC_GREG_GCONF_MCK);
+
+	/*
+	 * The MPIC driver will crash if there are more cores than we
+	 * can initialize, so we may as well catch that problem here.
+	 */
+	BUG_ON(num_possible_cpus() > MPIC_MAX_CPUS);
+
+	/* Map the per-CPU registers */
+	for_each_possible_cpu(i) {
+		unsigned int cpu = get_hard_smp_processor_id(i);
+
+		mpic_map(mpic, mpic->paddr, &mpic->cpuregs[cpu],
+			 MPIC_INFO(CPU_BASE) + cpu * MPIC_INFO(CPU_STRIDE),
+			 0x1000);
+	}
+
+	/*
+	 * Read feature register.  For non-ISU MPICs, num sources as well. On
+	 * ISU MPICs, sources are counted as ISUs are added
+	 */
+	greg_feature = mpic_read(mpic->gregs, MPIC_INFO(GREG_FEATURE_0));
+
+	/*
+	 * By default, the last source number comes from the MPIC, but the
+	 * device-tree and board support code can override it on buggy hw.
+	 * If we get passed an isu_size (multi-isu MPIC) then we use that
+	 * as a default instead of the value read from the HW.
+	 */
+	last_irq = (greg_feature & MPIC_GREG_FEATURE_LAST_SRC_MASK)
+				>> MPIC_GREG_FEATURE_LAST_SRC_SHIFT;
+	if (isu_size)
+		last_irq = isu_size  * MPIC_MAX_ISU - 1;
+	of_property_read_u32(mpic->node, "last-interrupt-source", &last_irq);
+	if (irq_count)
+		last_irq = irq_count - 1;
+
+	/* Initialize main ISU if none provided */
+	if (!isu_size) {
+		isu_size = last_irq + 1;
+		mpic->num_sources = isu_size;
+		mpic_map(mpic, mpic->paddr, &mpic->isus[0],
+				MPIC_INFO(IRQ_BASE),
+				MPIC_INFO(IRQ_STRIDE) * isu_size);
+	}
+
+	mpic->isu_size = isu_size;
+	mpic->isu_shift = 1 + __ilog2(mpic->isu_size - 1);
+	mpic->isu_mask = (1 << mpic->isu_shift) - 1;
+
+	mpic->irqhost = irq_domain_add_linear(mpic->node,
+				       intvec_top,
+				       &mpic_host_ops, mpic);
+
+	/*
+	 * FIXME: The code leaks the MPIC object and mappings here; this
+	 * is very unlikely to fail but it ought to be fixed anyways.
+	 */
+	if (mpic->irqhost == NULL)
+		return NULL;
+
+	/* Display version */
+	switch (greg_feature & MPIC_GREG_FEATURE_VERSION_MASK) {
+	case 1:
+		vers = "1.0";
+		break;
+	case 2:
+		vers = "1.2";
+		break;
+	case 3:
+		vers = "1.3";
+		break;
+	default:
+		vers = "<unknown>";
+		break;
+	}
+	printk(KERN_INFO "mpic: Setting up MPIC \"%s\" version %s at %llx,"
+	       " max %d CPUs\n",
+	       name, vers, (unsigned long long)mpic->paddr, num_possible_cpus());
+	printk(KERN_INFO "mpic: ISU size: %d, shift: %d, mask: %x\n",
+	       mpic->isu_size, mpic->isu_shift, mpic->isu_mask);
+
+	mpic->next = mpics;
+	mpics = mpic;
+
+	if (!(mpic->flags & MPIC_SECONDARY)) {
+		mpic_primary = mpic;
+		irq_set_default_host(mpic->irqhost);
+	}
+
+	return mpic;
+
+err_of_node_put:
+	of_node_put(node);
+	return NULL;
+}
+
+void __init mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
+			    phys_addr_t paddr)
+{
+	unsigned int isu_first = isu_num * mpic->isu_size;
+
+	BUG_ON(isu_num >= MPIC_MAX_ISU);
+
+	mpic_map(mpic,
+		 paddr, &mpic->isus[isu_num], 0,
+		 MPIC_INFO(IRQ_STRIDE) * mpic->isu_size);
+
+	if ((isu_first + mpic->isu_size) > mpic->num_sources)
+		mpic->num_sources = isu_first + mpic->isu_size;
+}
+
+void __init mpic_init(struct mpic *mpic)
+{
+	int i, cpu;
+	int num_timers = 4;
+
+	BUG_ON(mpic->num_sources == 0);
+
+	printk(KERN_INFO "mpic: Initializing for %d sources\n", mpic->num_sources);
+
+	/* Set current processor priority to max */
+	mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0xf);
+
+	if (mpic->flags & MPIC_FSL) {
+		u32 version = fsl_mpic_get_version(mpic);
+
+		/*
+		 * Timer group B is present at the latest in MPIC 3.1 (e.g.
+		 * mpc8536).  It is not present in MPIC 2.0 (e.g. mpc8544).
+		 * I don't know about the status of intermediate versions (or
+		 * whether they even exist).
+		 */
+		if (version >= 0x0301)
+			num_timers = 8;
+	}
+
+	/* Initialize timers to our reserved vectors and mask them for now */
+	for (i = 0; i < num_timers; i++) {
+		unsigned int offset = mpic_tm_offset(mpic, i);
+
+		mpic_write(mpic->tmregs,
+			   offset + MPIC_INFO(TIMER_DESTINATION),
+			   1 << hard_smp_processor_id());
+		mpic_write(mpic->tmregs,
+			   offset + MPIC_INFO(TIMER_VECTOR_PRI),
+			   MPIC_VECPRI_MASK |
+			   (9 << MPIC_VECPRI_PRIORITY_SHIFT) |
+			   (mpic->timer_vecs[0] + i));
+	}
+
+	/* Initialize IPIs to our reserved vectors and mark them disabled for now */
+	mpic_test_broken_ipi(mpic);
+	for (i = 0; i < 4; i++) {
+		mpic_ipi_write(i,
+			       MPIC_VECPRI_MASK |
+			       (10 << MPIC_VECPRI_PRIORITY_SHIFT) |
+			       (mpic->ipi_vecs[0] + i));
+	}
+
+	/* Do the HT PIC fixups on U3 broken mpic */
+	DBG("MPIC flags: %x\n", mpic->flags);
+	if ((mpic->flags & MPIC_U3_HT_IRQS) && !(mpic->flags & MPIC_SECONDARY)) {
+		mpic_scan_ht_pics(mpic);
+		mpic_u3msi_init(mpic);
+	}
+
+	mpic_pasemi_msi_init(mpic);
+
+	cpu = mpic_processor_id(mpic);
+
+	if (!(mpic->flags & MPIC_NO_RESET)) {
+		for (i = 0; i < mpic->num_sources; i++) {
+			/* start with vector = source number, and masked */
+			u32 vecpri = MPIC_VECPRI_MASK | i |
+				(8 << MPIC_VECPRI_PRIORITY_SHIFT);
+
+			/* check if protected */
+			if (mpic->protected && test_bit(i, mpic->protected))
+				continue;
+			/* init hw */
+			mpic_irq_write(i, MPIC_INFO(IRQ_VECTOR_PRI), vecpri);
+			mpic_irq_write(i, MPIC_INFO(IRQ_DESTINATION), 1 << cpu);
+		}
+	}
+
+	/* Init spurious vector */
+	mpic_write(mpic->gregs, MPIC_INFO(GREG_SPURIOUS), mpic->spurious_vec);
+
+	/* Disable 8259 passthrough, if supported */
+	if (!(mpic->flags & MPIC_NO_PTHROU_DIS))
+		mpic_write(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0),
+			   mpic_read(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0))
+			   | MPIC_GREG_GCONF_8259_PTHROU_DIS);
+
+	if (mpic->flags & MPIC_NO_BIAS)
+		mpic_write(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0),
+			mpic_read(mpic->gregs, MPIC_INFO(GREG_GLOBAL_CONF_0))
+			| MPIC_GREG_GCONF_NO_BIAS);
+
+	/* Set current processor priority to 0 */
+	mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0);
+
+#ifdef CONFIG_PM
+	/* allocate memory to save mpic state */
+	mpic->save_data = kmalloc_array(mpic->num_sources,
+				        sizeof(*mpic->save_data),
+				        GFP_KERNEL);
+	BUG_ON(mpic->save_data == NULL);
+#endif
+
+	/* Check if this MPIC is chained from a parent interrupt controller */
+	if (mpic->flags & MPIC_SECONDARY) {
+		int virq = irq_of_parse_and_map(mpic->node, 0);
+		if (virq) {
+			printk(KERN_INFO "%pOF: hooking up to IRQ %d\n",
+					mpic->node, virq);
+			irq_set_handler_data(virq, mpic);
+			irq_set_chained_handler(virq, &mpic_cascade);
+		}
+	}
+
+	/* FSL mpic error interrupt initialization */
+	if (mpic->flags & MPIC_FSL_HAS_EIMR)
+		mpic_err_int_init(mpic, MPIC_FSL_ERR_INT);
+}
+
+void mpic_irq_set_priority(unsigned int irq, unsigned int pri)
+{
+	struct mpic *mpic = mpic_find(irq);
+	unsigned int src = virq_to_hw(irq);
+	unsigned long flags;
+	u32 reg;
+
+	if (!mpic)
+		return;
+
+	raw_spin_lock_irqsave(&mpic_lock, flags);
+	if (mpic_is_ipi(mpic, src)) {
+		reg = mpic_ipi_read(src - mpic->ipi_vecs[0]) &
+			~MPIC_VECPRI_PRIORITY_MASK;
+		mpic_ipi_write(src - mpic->ipi_vecs[0],
+			       reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
+	} else if (mpic_is_tm(mpic, src)) {
+		reg = mpic_tm_read(src - mpic->timer_vecs[0]) &
+			~MPIC_VECPRI_PRIORITY_MASK;
+		mpic_tm_write(src - mpic->timer_vecs[0],
+			      reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
+	} else {
+		reg = mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI))
+			& ~MPIC_VECPRI_PRIORITY_MASK;
+		mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI),
+			       reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
+	}
+	raw_spin_unlock_irqrestore(&mpic_lock, flags);
+}
+
+void mpic_setup_this_cpu(void)
+{
+#ifdef CONFIG_SMP
+	struct mpic *mpic = mpic_primary;
+	unsigned long flags;
+	u32 msk = 1 << hard_smp_processor_id();
+	unsigned int i;
+
+	BUG_ON(mpic == NULL);
+
+	DBG("%s: setup_this_cpu(%d)\n", mpic->name, hard_smp_processor_id());
+
+	raw_spin_lock_irqsave(&mpic_lock, flags);
+
+ 	/* let the mpic know we want intrs. default affinity is 0xffffffff
+	 * until changed via /proc. That's how it's done on x86. If we want
+	 * it differently, then we should make sure we also change the default
+	 * values of irq_desc[].affinity in irq.c.
+ 	 */
+	if (distribute_irqs && !(mpic->flags & MPIC_SINGLE_DEST_CPU)) {
+	 	for (i = 0; i < mpic->num_sources ; i++)
+			mpic_irq_write(i, MPIC_INFO(IRQ_DESTINATION),
+				mpic_irq_read(i, MPIC_INFO(IRQ_DESTINATION)) | msk);
+	}
+
+	/* Set current processor priority to 0 */
+	mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0);
+
+	raw_spin_unlock_irqrestore(&mpic_lock, flags);
+#endif /* CONFIG_SMP */
+}
+
+int mpic_cpu_get_priority(void)
+{
+	struct mpic *mpic = mpic_primary;
+
+	return mpic_cpu_read(MPIC_INFO(CPU_CURRENT_TASK_PRI));
+}
+
+void mpic_cpu_set_priority(int prio)
+{
+	struct mpic *mpic = mpic_primary;
+
+	prio &= MPIC_CPU_TASKPRI_MASK;
+	mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), prio);
+}
+
+void mpic_teardown_this_cpu(int secondary)
+{
+	struct mpic *mpic = mpic_primary;
+	unsigned long flags;
+	u32 msk = 1 << hard_smp_processor_id();
+	unsigned int i;
+
+	BUG_ON(mpic == NULL);
+
+	DBG("%s: teardown_this_cpu(%d)\n", mpic->name, hard_smp_processor_id());
+	raw_spin_lock_irqsave(&mpic_lock, flags);
+
+	/* let the mpic know we don't want intrs.  */
+	for (i = 0; i < mpic->num_sources ; i++)
+		mpic_irq_write(i, MPIC_INFO(IRQ_DESTINATION),
+			mpic_irq_read(i, MPIC_INFO(IRQ_DESTINATION)) & ~msk);
+
+	/* Set current processor priority to max */
+	mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0xf);
+	/* We need to EOI the IPI since not all platforms reset the MPIC
+	 * on boot and new interrupts wouldn't get delivered otherwise.
+	 */
+	mpic_eoi(mpic);
+
+	raw_spin_unlock_irqrestore(&mpic_lock, flags);
+}
+
+
+static unsigned int _mpic_get_one_irq(struct mpic *mpic, int reg)
+{
+	u32 src;
+
+	src = mpic_cpu_read(reg) & MPIC_INFO(VECPRI_VECTOR_MASK);
+#ifdef DEBUG_LOW
+	DBG("%s: get_one_irq(reg 0x%x): %d\n", mpic->name, reg, src);
+#endif
+	if (unlikely(src == mpic->spurious_vec)) {
+		if (mpic->flags & MPIC_SPV_EOI)
+			mpic_eoi(mpic);
+		return 0;
+	}
+	if (unlikely(mpic->protected && test_bit(src, mpic->protected))) {
+		printk_ratelimited(KERN_WARNING "%s: Got protected source %d !\n",
+				   mpic->name, (int)src);
+		mpic_eoi(mpic);
+		return 0;
+	}
+
+	return irq_linear_revmap(mpic->irqhost, src);
+}
+
+unsigned int mpic_get_one_irq(struct mpic *mpic)
+{
+	return _mpic_get_one_irq(mpic, MPIC_INFO(CPU_INTACK));
+}
+
+unsigned int mpic_get_irq(void)
+{
+	struct mpic *mpic = mpic_primary;
+
+	BUG_ON(mpic == NULL);
+
+	return mpic_get_one_irq(mpic);
+}
+
+unsigned int mpic_get_coreint_irq(void)
+{
+#ifdef CONFIG_BOOKE
+	struct mpic *mpic = mpic_primary;
+	u32 src;
+
+	BUG_ON(mpic == NULL);
+
+	src = mfspr(SPRN_EPR);
+
+	if (unlikely(src == mpic->spurious_vec)) {
+		if (mpic->flags & MPIC_SPV_EOI)
+			mpic_eoi(mpic);
+		return 0;
+	}
+	if (unlikely(mpic->protected && test_bit(src, mpic->protected))) {
+		printk_ratelimited(KERN_WARNING "%s: Got protected source %d !\n",
+				   mpic->name, (int)src);
+		return 0;
+	}
+
+	return irq_linear_revmap(mpic->irqhost, src);
+#else
+	return 0;
+#endif
+}
+
+unsigned int mpic_get_mcirq(void)
+{
+	struct mpic *mpic = mpic_primary;
+
+	BUG_ON(mpic == NULL);
+
+	return _mpic_get_one_irq(mpic, MPIC_INFO(CPU_MCACK));
+}
+
+#ifdef CONFIG_SMP
+void __init mpic_request_ipis(void)
+{
+	struct mpic *mpic = mpic_primary;
+	int i;
+	BUG_ON(mpic == NULL);
+
+	printk(KERN_INFO "mpic: requesting IPIs...\n");
+
+	for (i = 0; i < 4; i++) {
+		unsigned int vipi = irq_create_mapping(mpic->irqhost,
+						       mpic->ipi_vecs[0] + i);
+		if (!vipi) {
+			printk(KERN_ERR "Failed to map %s\n", smp_ipi_name[i]);
+			continue;
+		}
+		smp_request_message_ipi(vipi, i);
+	}
+}
+
+void smp_mpic_message_pass(int cpu, int msg)
+{
+	struct mpic *mpic = mpic_primary;
+	u32 physmask;
+
+	BUG_ON(mpic == NULL);
+
+	/* make sure we're sending something that translates to an IPI */
+	if ((unsigned int)msg > 3) {
+		printk("SMP %d: smp_message_pass: unknown msg %d\n",
+		       smp_processor_id(), msg);
+		return;
+	}
+
+#ifdef DEBUG_IPI
+	DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, msg);
+#endif
+
+	physmask = 1 << get_hard_smp_processor_id(cpu);
+
+	mpic_cpu_write(MPIC_INFO(CPU_IPI_DISPATCH_0) +
+		       msg * MPIC_INFO(CPU_IPI_DISPATCH_STRIDE), physmask);
+}
+
+void __init smp_mpic_probe(void)
+{
+	int nr_cpus;
+
+	DBG("smp_mpic_probe()...\n");
+
+	nr_cpus = num_possible_cpus();
+
+	DBG("nr_cpus: %d\n", nr_cpus);
+
+	if (nr_cpus > 1)
+		mpic_request_ipis();
+}
+
+void smp_mpic_setup_cpu(int cpu)
+{
+	mpic_setup_this_cpu();
+}
+
+void mpic_reset_core(int cpu)
+{
+	struct mpic *mpic = mpic_primary;
+	u32 pir;
+	int cpuid = get_hard_smp_processor_id(cpu);
+	int i;
+
+	/* Set target bit for core reset */
+	pir = mpic_read(mpic->gregs, MPIC_INFO(GREG_PROCESSOR_INIT));
+	pir |= (1 << cpuid);
+	mpic_write(mpic->gregs, MPIC_INFO(GREG_PROCESSOR_INIT), pir);
+	mpic_read(mpic->gregs, MPIC_INFO(GREG_PROCESSOR_INIT));
+
+	/* Restore target bit after reset complete */
+	pir &= ~(1 << cpuid);
+	mpic_write(mpic->gregs, MPIC_INFO(GREG_PROCESSOR_INIT), pir);
+	mpic_read(mpic->gregs, MPIC_INFO(GREG_PROCESSOR_INIT));
+
+	/* Perform 15 EOI on each reset core to clear pending interrupts.
+	 * This is required for FSL CoreNet based devices */
+	if (mpic->flags & MPIC_FSL) {
+		for (i = 0; i < 15; i++) {
+			_mpic_write(mpic->reg_type, &mpic->cpuregs[cpuid],
+				      MPIC_CPU_EOI, 0);
+		}
+	}
+}
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_PM
+static void mpic_suspend_one(struct mpic *mpic)
+{
+	int i;
+
+	for (i = 0; i < mpic->num_sources; i++) {
+		mpic->save_data[i].vecprio =
+			mpic_irq_read(i, MPIC_INFO(IRQ_VECTOR_PRI));
+		mpic->save_data[i].dest =
+			mpic_irq_read(i, MPIC_INFO(IRQ_DESTINATION));
+	}
+}
+
+static int mpic_suspend(void)
+{
+	struct mpic *mpic = mpics;
+
+	while (mpic) {
+		mpic_suspend_one(mpic);
+		mpic = mpic->next;
+	}
+
+	return 0;
+}
+
+static void mpic_resume_one(struct mpic *mpic)
+{
+	int i;
+
+	for (i = 0; i < mpic->num_sources; i++) {
+		mpic_irq_write(i, MPIC_INFO(IRQ_VECTOR_PRI),
+			       mpic->save_data[i].vecprio);
+		mpic_irq_write(i, MPIC_INFO(IRQ_DESTINATION),
+			       mpic->save_data[i].dest);
+
+#ifdef CONFIG_MPIC_U3_HT_IRQS
+	if (mpic->fixups) {
+		struct mpic_irq_fixup *fixup = &mpic->fixups[i];
+
+		if (fixup->base) {
+			/* we use the lowest bit in an inverted meaning */
+			if ((mpic->save_data[i].fixup_data & 1) == 0)
+				continue;
+
+			/* Enable and configure */
+			writeb(0x10 + 2 * fixup->index, fixup->base + 2);
+
+			writel(mpic->save_data[i].fixup_data & ~1,
+			       fixup->base + 4);
+		}
+	}
+#endif
+	} /* end for loop */
+}
+
+static void mpic_resume(void)
+{
+	struct mpic *mpic = mpics;
+
+	while (mpic) {
+		mpic_resume_one(mpic);
+		mpic = mpic->next;
+	}
+}
+
+static struct syscore_ops mpic_syscore_ops = {
+	.resume = mpic_resume,
+	.suspend = mpic_suspend,
+};
+
+static int mpic_init_sys(void)
+{
+	int rc;
+
+	register_syscore_ops(&mpic_syscore_ops);
+	rc = subsys_system_register(&mpic_subsys, NULL);
+	if (rc) {
+		unregister_syscore_ops(&mpic_syscore_ops);
+		pr_err("mpic: Failed to register subsystem!\n");
+		return rc;
+	}
+
+	return 0;
+}
+
+device_initcall(mpic_init_sys);
+#endif
diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h
new file mode 100644
index 0000000000..bb460ff57a
--- /dev/null
+++ b/arch/powerpc/sysdev/mpic.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _POWERPC_SYSDEV_MPIC_H
+#define _POWERPC_SYSDEV_MPIC_H
+
+/*
+ * Copyright 2006-2007, Michael Ellerman, IBM Corporation.
+ */
+
+#ifdef CONFIG_PCI_MSI
+extern void mpic_msi_reserve_hwirq(struct mpic *mpic, irq_hw_number_t hwirq);
+int __init mpic_msi_init_allocator(struct mpic *mpic);
+int __init mpic_u3msi_init(struct mpic *mpic);
+#else
+static inline void mpic_msi_reserve_hwirq(struct mpic *mpic,
+					  irq_hw_number_t hwirq)
+{
+	return;
+}
+
+static inline int mpic_u3msi_init(struct mpic *mpic)
+{
+	return -1;
+}
+#endif
+
+#if defined(CONFIG_PCI_MSI) && defined(CONFIG_PPC_PASEMI)
+int __init mpic_pasemi_msi_init(struct mpic *mpic);
+#else
+static inline int mpic_pasemi_msi_init(struct mpic *mpic) { return -1; }
+#endif
+
+extern int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type);
+extern void mpic_set_vector(unsigned int virq, unsigned int vector);
+extern int mpic_set_affinity(struct irq_data *d,
+			     const struct cpumask *cpumask, bool force);
+extern void mpic_reset_core(int cpu);
+
+#ifdef CONFIG_FSL_SOC
+extern int mpic_map_error_int(struct mpic *mpic, unsigned int virq, irq_hw_number_t  hw);
+void __init mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum);
+int __init mpic_setup_error_int(struct mpic *mpic, int intvec);
+#else
+static inline int mpic_map_error_int(struct mpic *mpic, unsigned int virq, irq_hw_number_t  hw)
+{
+	return 0;
+}
+
+
+static inline void mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum)
+{
+	return;
+}
+
+static inline int mpic_setup_error_int(struct mpic *mpic, int intvec)
+{
+	return -1;
+}
+#endif
+
+#endif /* _POWERPC_SYSDEV_MPIC_H */
diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c
new file mode 100644
index 0000000000..7b449cc51a
--- /dev/null
+++ b/arch/powerpc/sysdev/mpic_msgr.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2011-2012, Meador Inge, Mentor Graphics Corporation.
+ *
+ * Some ideas based on un-pushed work done by Vivek Mahajan, Jason Jin, and
+ * Mingkai Hu from Freescale Semiconductor, Inc.
+ */
+
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <asm/hw_irq.h>
+#include <asm/ppc-pci.h>
+#include <asm/mpic_msgr.h>
+
+#define MPIC_MSGR_REGISTERS_PER_BLOCK	4
+#define MPIC_MSGR_STRIDE		0x10
+#define MPIC_MSGR_MER_OFFSET		(0x100 / sizeof(u32))
+#define MSGR_INUSE			0
+#define MSGR_FREE			1
+
+static struct mpic_msgr **mpic_msgrs;
+static unsigned int mpic_msgr_count;
+static DEFINE_RAW_SPINLOCK(msgrs_lock);
+
+static inline void _mpic_msgr_mer_write(struct mpic_msgr *msgr, u32 value)
+{
+	out_be32(msgr->mer, value);
+}
+
+static inline u32 _mpic_msgr_mer_read(struct mpic_msgr *msgr)
+{
+	return in_be32(msgr->mer);
+}
+
+static inline void _mpic_msgr_disable(struct mpic_msgr *msgr)
+{
+	u32 mer = _mpic_msgr_mer_read(msgr);
+
+	_mpic_msgr_mer_write(msgr, mer & ~(1 << msgr->num));
+}
+
+struct mpic_msgr *mpic_msgr_get(unsigned int reg_num)
+{
+	unsigned long flags;
+	struct mpic_msgr *msgr;
+
+	/* Assume busy until proven otherwise.  */
+	msgr = ERR_PTR(-EBUSY);
+
+	if (reg_num >= mpic_msgr_count)
+		return ERR_PTR(-ENODEV);
+
+	raw_spin_lock_irqsave(&msgrs_lock, flags);
+	msgr = mpic_msgrs[reg_num];
+	if (msgr->in_use == MSGR_FREE)
+		msgr->in_use = MSGR_INUSE;
+	raw_spin_unlock_irqrestore(&msgrs_lock, flags);
+
+	return msgr;
+}
+EXPORT_SYMBOL_GPL(mpic_msgr_get);
+
+void mpic_msgr_put(struct mpic_msgr *msgr)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&msgr->lock, flags);
+	msgr->in_use = MSGR_FREE;
+	_mpic_msgr_disable(msgr);
+	raw_spin_unlock_irqrestore(&msgr->lock, flags);
+}
+EXPORT_SYMBOL_GPL(mpic_msgr_put);
+
+void mpic_msgr_enable(struct mpic_msgr *msgr)
+{
+	unsigned long flags;
+	u32 mer;
+
+	raw_spin_lock_irqsave(&msgr->lock, flags);
+	mer = _mpic_msgr_mer_read(msgr);
+	_mpic_msgr_mer_write(msgr, mer | (1 << msgr->num));
+	raw_spin_unlock_irqrestore(&msgr->lock, flags);
+}
+EXPORT_SYMBOL_GPL(mpic_msgr_enable);
+
+void mpic_msgr_disable(struct mpic_msgr *msgr)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&msgr->lock, flags);
+	_mpic_msgr_disable(msgr);
+	raw_spin_unlock_irqrestore(&msgr->lock, flags);
+}
+EXPORT_SYMBOL_GPL(mpic_msgr_disable);
+
+/* The following three functions are used to compute the order and number of
+ * the message register blocks.  They are clearly very inefficient.  However,
+ * they are called *only* a few times during device initialization.
+ */
+static unsigned int mpic_msgr_number_of_blocks(void)
+{
+	unsigned int count;
+	struct device_node *aliases;
+
+	count = 0;
+	aliases = of_find_node_by_name(NULL, "aliases");
+
+	if (aliases) {
+		char buf[32];
+
+		for (;;) {
+			snprintf(buf, sizeof(buf), "mpic-msgr-block%d", count);
+			if (!of_property_present(aliases, buf))
+				break;
+
+			count += 1;
+		}
+		of_node_put(aliases);
+	}
+
+	return count;
+}
+
+static unsigned int mpic_msgr_number_of_registers(void)
+{
+	return mpic_msgr_number_of_blocks() * MPIC_MSGR_REGISTERS_PER_BLOCK;
+}
+
+static int mpic_msgr_block_number(struct device_node *node)
+{
+	struct device_node *aliases;
+	unsigned int index, number_of_blocks;
+	char buf[64];
+
+	number_of_blocks = mpic_msgr_number_of_blocks();
+	aliases = of_find_node_by_name(NULL, "aliases");
+	if (!aliases)
+		return -1;
+
+	for (index = 0; index < number_of_blocks; ++index) {
+		struct property *prop;
+		struct device_node *tn;
+
+		snprintf(buf, sizeof(buf), "mpic-msgr-block%d", index);
+		prop = of_find_property(aliases, buf, NULL);
+		tn = of_find_node_by_path(prop->value);
+		if (node == tn) {
+			of_node_put(tn);
+			break;
+		}
+		of_node_put(tn);
+	}
+	of_node_put(aliases);
+
+	return index == number_of_blocks ? -1 : index;
+}
+
+/* The probe function for a single message register block.
+ */
+static int mpic_msgr_probe(struct platform_device *dev)
+{
+	void __iomem *msgr_block_addr;
+	int block_number;
+	struct resource rsrc;
+	unsigned int i;
+	unsigned int irq_index;
+	struct device_node *np = dev->dev.of_node;
+	unsigned int receive_mask;
+	const unsigned int *prop;
+
+	if (!np) {
+		dev_err(&dev->dev, "Device OF-Node is NULL");
+		return -EFAULT;
+	}
+
+	/* Allocate the message register array upon the first device
+	 * registered.
+	 */
+	if (!mpic_msgrs) {
+		mpic_msgr_count = mpic_msgr_number_of_registers();
+		dev_info(&dev->dev, "Found %d message registers\n",
+				mpic_msgr_count);
+
+		mpic_msgrs = kcalloc(mpic_msgr_count, sizeof(*mpic_msgrs),
+							 GFP_KERNEL);
+		if (!mpic_msgrs) {
+			dev_err(&dev->dev,
+				"No memory for message register blocks\n");
+			return -ENOMEM;
+		}
+	}
+	dev_info(&dev->dev, "Of-device full name %pOF\n", np);
+
+	/* IO map the message register block. */
+	of_address_to_resource(np, 0, &rsrc);
+	msgr_block_addr = devm_ioremap(&dev->dev, rsrc.start, resource_size(&rsrc));
+	if (!msgr_block_addr) {
+		dev_err(&dev->dev, "Failed to iomap MPIC message registers");
+		return -EFAULT;
+	}
+
+	/* Ensure the block has a defined order. */
+	block_number = mpic_msgr_block_number(np);
+	if (block_number < 0) {
+		dev_err(&dev->dev,
+			"Failed to find message register block alias\n");
+		return -ENODEV;
+	}
+	dev_info(&dev->dev, "Setting up message register block %d\n",
+			block_number);
+
+	/* Grab the receive mask which specifies what registers can receive
+	 * interrupts.
+	 */
+	prop = of_get_property(np, "mpic-msgr-receive-mask", NULL);
+	receive_mask = (prop) ? *prop : 0xF;
+
+	/* Build up the appropriate message register data structures. */
+	for (i = 0, irq_index = 0; i < MPIC_MSGR_REGISTERS_PER_BLOCK; ++i) {
+		struct mpic_msgr *msgr;
+		unsigned int reg_number;
+
+		msgr = kzalloc(sizeof(struct mpic_msgr), GFP_KERNEL);
+		if (!msgr) {
+			dev_err(&dev->dev, "No memory for message register\n");
+			return -ENOMEM;
+		}
+
+		reg_number = block_number * MPIC_MSGR_REGISTERS_PER_BLOCK + i;
+		msgr->base = msgr_block_addr + i * MPIC_MSGR_STRIDE;
+		msgr->mer = msgr->base + MPIC_MSGR_MER_OFFSET;
+		msgr->in_use = MSGR_FREE;
+		msgr->num = i;
+		raw_spin_lock_init(&msgr->lock);
+
+		if (receive_mask & (1 << i)) {
+			msgr->irq = irq_of_parse_and_map(np, irq_index);
+			if (!msgr->irq) {
+				dev_err(&dev->dev,
+						"Missing interrupt specifier");
+				kfree(msgr);
+				return -EFAULT;
+			}
+			irq_index += 1;
+		} else {
+			msgr->irq = 0;
+		}
+
+		mpic_msgrs[reg_number] = msgr;
+		mpic_msgr_disable(msgr);
+		dev_info(&dev->dev, "Register %d initialized: irq %d\n",
+				reg_number, msgr->irq);
+
+	}
+
+	return 0;
+}
+
+static const struct of_device_id mpic_msgr_ids[] = {
+	{
+		.compatible = "fsl,mpic-v3.1-msgr",
+		.data = NULL,
+	},
+	{}
+};
+
+static struct platform_driver mpic_msgr_driver = {
+	.driver = {
+		.name = "mpic-msgr",
+		.of_match_table = mpic_msgr_ids,
+	},
+	.probe = mpic_msgr_probe,
+};
+
+static __init int mpic_msgr_init(void)
+{
+	return platform_driver_register(&mpic_msgr_driver);
+}
+subsys_initcall(mpic_msgr_init);
diff --git a/arch/powerpc/sysdev/mpic_msi.c b/arch/powerpc/sysdev/mpic_msi.c
new file mode 100644
index 0000000000..34246c8e01
--- /dev/null
+++ b/arch/powerpc/sysdev/mpic_msi.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2006-2007, Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of_irq.h>
+#include <linux/bitmap.h>
+#include <linux/msi.h>
+#include <asm/mpic.h>
+#include <asm/hw_irq.h>
+#include <asm/ppc-pci.h>
+#include <asm/msi_bitmap.h>
+
+#include <sysdev/mpic.h>
+
+void mpic_msi_reserve_hwirq(struct mpic *mpic, irq_hw_number_t hwirq)
+{
+	/* The mpic calls this even when there is no allocator setup */
+	if (!mpic->msi_bitmap.bitmap)
+		return;
+
+	msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, hwirq);
+}
+
+#ifdef CONFIG_MPIC_U3_HT_IRQS
+static int __init mpic_msi_reserve_u3_hwirqs(struct mpic *mpic)
+{
+	irq_hw_number_t hwirq;
+	const struct irq_domain_ops *ops = mpic->irqhost->ops;
+	struct device_node *np;
+	int flags, index, i;
+	struct of_phandle_args oirq;
+
+	pr_debug("mpic: found U3, guessing msi allocator setup\n");
+
+	/* Reserve source numbers we know are reserved in the HW.
+	 *
+	 * This is a bit of a mix of U3 and U4 reserves but that's going
+	 * to work fine, we have plenty enough numbers left so let's just
+	 * mark anything we don't like reserved.
+	 */
+	for (i = 0;   i < 8;   i++)
+		msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, i);
+
+	for (i = 42;  i < 46;  i++)
+		msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, i);
+
+	for (i = 100; i < 105; i++)
+		msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, i);
+
+	for (i = 124; i < mpic->num_sources; i++)
+		msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, i);
+
+
+	np = NULL;
+	while ((np = of_find_all_nodes(np))) {
+		pr_debug("mpic: mapping hwirqs for %pOF\n", np);
+
+		index = 0;
+		while (of_irq_parse_one(np, index++, &oirq) == 0) {
+			ops->xlate(mpic->irqhost, NULL, oirq.args,
+						oirq.args_count, &hwirq, &flags);
+			msi_bitmap_reserve_hwirq(&mpic->msi_bitmap, hwirq);
+		}
+	}
+
+	return 0;
+}
+#else
+static int __init mpic_msi_reserve_u3_hwirqs(struct mpic *mpic)
+{
+	return -1;
+}
+#endif
+
+int __init mpic_msi_init_allocator(struct mpic *mpic)
+{
+	int rc;
+
+	rc = msi_bitmap_alloc(&mpic->msi_bitmap, mpic->num_sources,
+			      irq_domain_get_of_node(mpic->irqhost));
+	if (rc)
+		return rc;
+
+	rc = msi_bitmap_reserve_dt_hwirqs(&mpic->msi_bitmap);
+	if (rc > 0) {
+		if (mpic->flags & MPIC_U3_HT_IRQS)
+			rc = mpic_msi_reserve_u3_hwirqs(mpic);
+
+		if (rc) {
+			msi_bitmap_free(&mpic->msi_bitmap);
+			return rc;
+		}
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c
new file mode 100644
index 0000000000..7166e2e0ba
--- /dev/null
+++ b/arch/powerpc/sysdev/mpic_timer.c
@@ -0,0 +1,556 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPIC timer driver
+ *
+ * Copyright 2013 Freescale Semiconductor, Inc.
+ * Author: Dongsheng Wang <Dongsheng.Wang@freescale.com>
+ *	   Li Yang <leoli@freescale.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/syscore_ops.h>
+#include <sysdev/fsl_soc.h>
+#include <asm/io.h>
+
+#include <asm/mpic_timer.h>
+
+#define FSL_GLOBAL_TIMER		0x1
+
+/* Clock Ratio
+ * Divide by 64 0x00000300
+ * Divide by 32 0x00000200
+ * Divide by 16 0x00000100
+ * Divide by  8 0x00000000 (Hardware default div)
+ */
+#define MPIC_TIMER_TCR_CLKDIV		0x00000300
+
+#define MPIC_TIMER_TCR_ROVR_OFFSET	24
+
+#define TIMER_STOP			0x80000000
+#define GTCCR_TOG			0x80000000
+#define TIMERS_PER_GROUP		4
+#define MAX_TICKS			(~0U >> 1)
+#define MAX_TICKS_CASCADE		(~0U)
+#define TIMER_OFFSET(num)		(1 << (TIMERS_PER_GROUP - 1 - num))
+
+struct timer_regs {
+	u32	gtccr;
+	u32	res0[3];
+	u32	gtbcr;
+	u32	res1[3];
+	u32	gtvpr;
+	u32	res2[3];
+	u32	gtdr;
+	u32	res3[3];
+};
+
+struct cascade_priv {
+	u32 tcr_value;			/* TCR register: CASC & ROVR value */
+	unsigned int cascade_map;	/* cascade map */
+	unsigned int timer_num;		/* cascade control timer */
+};
+
+struct timer_group_priv {
+	struct timer_regs __iomem	*regs;
+	struct mpic_timer		timer[TIMERS_PER_GROUP];
+	struct list_head		node;
+	unsigned int			timerfreq;
+	unsigned int			idle;
+	unsigned int			flags;
+	spinlock_t			lock;
+	void __iomem			*group_tcr;
+};
+
+static struct cascade_priv cascade_timer[] = {
+	/* cascade timer 0 and 1 */
+	{0x1, 0xc, 0x1},
+	/* cascade timer 1 and 2 */
+	{0x2, 0x6, 0x2},
+	/* cascade timer 2 and 3 */
+	{0x4, 0x3, 0x3}
+};
+
+static LIST_HEAD(timer_group_list);
+
+static void convert_ticks_to_time(struct timer_group_priv *priv,
+		const u64 ticks, time64_t *time)
+{
+	*time = (u64)div_u64(ticks, priv->timerfreq);
+}
+
+/* the time set by the user is converted to "ticks" */
+static int convert_time_to_ticks(struct timer_group_priv *priv,
+		time64_t time, u64 *ticks)
+{
+	u64 max_value;		/* prevent u64 overflow */
+
+	max_value = div_u64(ULLONG_MAX, priv->timerfreq);
+
+	if (time > max_value)
+		return -EINVAL;
+
+	*ticks = (u64)time * (u64)priv->timerfreq;
+
+	return 0;
+}
+
+/* detect whether there is a cascade timer available */
+static struct mpic_timer *detect_idle_cascade_timer(
+					struct timer_group_priv *priv)
+{
+	struct cascade_priv *casc_priv;
+	unsigned int map;
+	unsigned int array_size = ARRAY_SIZE(cascade_timer);
+	unsigned int num;
+	unsigned int i;
+	unsigned long flags;
+
+	casc_priv = cascade_timer;
+	for (i = 0; i < array_size; i++) {
+		spin_lock_irqsave(&priv->lock, flags);
+		map = casc_priv->cascade_map & priv->idle;
+		if (map == casc_priv->cascade_map) {
+			num = casc_priv->timer_num;
+			priv->timer[num].cascade_handle = casc_priv;
+
+			/* set timer busy */
+			priv->idle &= ~casc_priv->cascade_map;
+			spin_unlock_irqrestore(&priv->lock, flags);
+			return &priv->timer[num];
+		}
+		spin_unlock_irqrestore(&priv->lock, flags);
+		casc_priv++;
+	}
+
+	return NULL;
+}
+
+static int set_cascade_timer(struct timer_group_priv *priv, u64 ticks,
+		unsigned int num)
+{
+	struct cascade_priv *casc_priv;
+	u32 tcr;
+	u32 tmp_ticks;
+	u32 rem_ticks;
+
+	/* set group tcr reg for cascade */
+	casc_priv = priv->timer[num].cascade_handle;
+	if (!casc_priv)
+		return -EINVAL;
+
+	tcr = casc_priv->tcr_value |
+		(casc_priv->tcr_value << MPIC_TIMER_TCR_ROVR_OFFSET);
+	setbits32(priv->group_tcr, tcr);
+
+	tmp_ticks = div_u64_rem(ticks, MAX_TICKS_CASCADE, &rem_ticks);
+
+	out_be32(&priv->regs[num].gtccr, 0);
+	out_be32(&priv->regs[num].gtbcr, tmp_ticks | TIMER_STOP);
+
+	out_be32(&priv->regs[num - 1].gtccr, 0);
+	out_be32(&priv->regs[num - 1].gtbcr, rem_ticks);
+
+	return 0;
+}
+
+static struct mpic_timer *get_cascade_timer(struct timer_group_priv *priv,
+					u64 ticks)
+{
+	struct mpic_timer *allocated_timer;
+
+	/* Two cascade timers: Support the maximum time */
+	const u64 max_ticks = (u64)MAX_TICKS * (u64)MAX_TICKS_CASCADE;
+	int ret;
+
+	if (ticks > max_ticks)
+		return NULL;
+
+	/* detect idle timer */
+	allocated_timer = detect_idle_cascade_timer(priv);
+	if (!allocated_timer)
+		return NULL;
+
+	/* set ticks to timer */
+	ret = set_cascade_timer(priv, ticks, allocated_timer->num);
+	if (ret < 0)
+		return NULL;
+
+	return allocated_timer;
+}
+
+static struct mpic_timer *get_timer(time64_t time)
+{
+	struct timer_group_priv *priv;
+	struct mpic_timer *timer;
+
+	u64 ticks;
+	unsigned int num;
+	unsigned int i;
+	unsigned long flags;
+	int ret;
+
+	list_for_each_entry(priv, &timer_group_list, node) {
+		ret = convert_time_to_ticks(priv, time, &ticks);
+		if (ret < 0)
+			return NULL;
+
+		if (ticks > MAX_TICKS) {
+			if (!(priv->flags & FSL_GLOBAL_TIMER))
+				return NULL;
+
+			timer = get_cascade_timer(priv, ticks);
+			if (!timer)
+				continue;
+
+			return timer;
+		}
+
+		for (i = 0; i < TIMERS_PER_GROUP; i++) {
+			/* one timer: Reverse allocation */
+			num = TIMERS_PER_GROUP - 1 - i;
+			spin_lock_irqsave(&priv->lock, flags);
+			if (priv->idle & (1 << i)) {
+				/* set timer busy */
+				priv->idle &= ~(1 << i);
+				/* set ticks & stop timer */
+				out_be32(&priv->regs[num].gtbcr,
+					ticks | TIMER_STOP);
+				out_be32(&priv->regs[num].gtccr, 0);
+				priv->timer[num].cascade_handle = NULL;
+				spin_unlock_irqrestore(&priv->lock, flags);
+				return &priv->timer[num];
+			}
+			spin_unlock_irqrestore(&priv->lock, flags);
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * mpic_start_timer - start hardware timer
+ * @handle: the timer to be started.
+ *
+ * It will do ->fn(->dev) callback from the hardware interrupt at
+ * the 'time64_t' point in the future.
+ */
+void mpic_start_timer(struct mpic_timer *handle)
+{
+	struct timer_group_priv *priv = container_of(handle,
+			struct timer_group_priv, timer[handle->num]);
+
+	clrbits32(&priv->regs[handle->num].gtbcr, TIMER_STOP);
+}
+EXPORT_SYMBOL(mpic_start_timer);
+
+/**
+ * mpic_stop_timer - stop hardware timer
+ * @handle: the timer to be stopped
+ *
+ * The timer periodically generates an interrupt. Unless user stops the timer.
+ */
+void mpic_stop_timer(struct mpic_timer *handle)
+{
+	struct timer_group_priv *priv = container_of(handle,
+			struct timer_group_priv, timer[handle->num]);
+	struct cascade_priv *casc_priv;
+
+	setbits32(&priv->regs[handle->num].gtbcr, TIMER_STOP);
+
+	casc_priv = priv->timer[handle->num].cascade_handle;
+	if (casc_priv) {
+		out_be32(&priv->regs[handle->num].gtccr, 0);
+		out_be32(&priv->regs[handle->num - 1].gtccr, 0);
+	} else {
+		out_be32(&priv->regs[handle->num].gtccr, 0);
+	}
+}
+EXPORT_SYMBOL(mpic_stop_timer);
+
+/**
+ * mpic_get_remain_time - get timer time
+ * @handle: the timer to be selected.
+ * @time: time for timer
+ *
+ * Query timer remaining time.
+ */
+void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time)
+{
+	struct timer_group_priv *priv = container_of(handle,
+			struct timer_group_priv, timer[handle->num]);
+	struct cascade_priv *casc_priv;
+
+	u64 ticks;
+	u32 tmp_ticks;
+
+	casc_priv = priv->timer[handle->num].cascade_handle;
+	if (casc_priv) {
+		tmp_ticks = in_be32(&priv->regs[handle->num].gtccr);
+		tmp_ticks &= ~GTCCR_TOG;
+		ticks = ((u64)tmp_ticks & UINT_MAX) * (u64)MAX_TICKS_CASCADE;
+		tmp_ticks = in_be32(&priv->regs[handle->num - 1].gtccr);
+		ticks += tmp_ticks;
+	} else {
+		ticks = in_be32(&priv->regs[handle->num].gtccr);
+		ticks &= ~GTCCR_TOG;
+	}
+
+	convert_ticks_to_time(priv, ticks, time);
+}
+EXPORT_SYMBOL(mpic_get_remain_time);
+
+/**
+ * mpic_free_timer - free hardware timer
+ * @handle: the timer to be removed.
+ *
+ * Free the timer.
+ *
+ * Note: can not be used in interrupt context.
+ */
+void mpic_free_timer(struct mpic_timer *handle)
+{
+	struct timer_group_priv *priv = container_of(handle,
+			struct timer_group_priv, timer[handle->num]);
+
+	struct cascade_priv *casc_priv;
+	unsigned long flags;
+
+	mpic_stop_timer(handle);
+
+	casc_priv = priv->timer[handle->num].cascade_handle;
+
+	free_irq(priv->timer[handle->num].irq, priv->timer[handle->num].dev);
+
+	spin_lock_irqsave(&priv->lock, flags);
+	if (casc_priv) {
+		u32 tcr;
+		tcr = casc_priv->tcr_value | (casc_priv->tcr_value <<
+					MPIC_TIMER_TCR_ROVR_OFFSET);
+		clrbits32(priv->group_tcr, tcr);
+		priv->idle |= casc_priv->cascade_map;
+		priv->timer[handle->num].cascade_handle = NULL;
+	} else {
+		priv->idle |= TIMER_OFFSET(handle->num);
+	}
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+EXPORT_SYMBOL(mpic_free_timer);
+
+/**
+ * mpic_request_timer - get a hardware timer
+ * @fn: interrupt handler function
+ * @dev: callback function of the data
+ * @time: time for timer
+ *
+ * This executes the "request_irq", returning NULL
+ * else "handle" on success.
+ */
+struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
+				      time64_t time)
+{
+	struct mpic_timer *allocated_timer;
+	int ret;
+
+	if (list_empty(&timer_group_list))
+		return NULL;
+
+	if (time < 0)
+		return NULL;
+
+	allocated_timer = get_timer(time);
+	if (!allocated_timer)
+		return NULL;
+
+	ret = request_irq(allocated_timer->irq, fn,
+			IRQF_TRIGGER_LOW, "global-timer", dev);
+	if (ret) {
+		mpic_free_timer(allocated_timer);
+		return NULL;
+	}
+
+	allocated_timer->dev = dev;
+
+	return allocated_timer;
+}
+EXPORT_SYMBOL(mpic_request_timer);
+
+static int __init timer_group_get_freq(struct device_node *np,
+			struct timer_group_priv *priv)
+{
+	u32 div;
+
+	if (priv->flags & FSL_GLOBAL_TIMER) {
+		struct device_node *dn;
+
+		dn = of_find_compatible_node(NULL, NULL, "fsl,mpic");
+		if (dn) {
+			of_property_read_u32(dn, "clock-frequency",
+					&priv->timerfreq);
+			of_node_put(dn);
+		}
+	}
+
+	if (priv->timerfreq <= 0)
+		return -EINVAL;
+
+	if (priv->flags & FSL_GLOBAL_TIMER) {
+		div = (1 << (MPIC_TIMER_TCR_CLKDIV >> 8)) * 8;
+		priv->timerfreq /= div;
+	}
+
+	return 0;
+}
+
+static int __init timer_group_get_irq(struct device_node *np,
+		struct timer_group_priv *priv)
+{
+	const u32 all_timer[] = { 0, TIMERS_PER_GROUP };
+	const u32 *p;
+	u32 offset;
+	u32 count;
+
+	unsigned int i;
+	unsigned int j;
+	unsigned int irq_index = 0;
+	unsigned int irq;
+	int len;
+
+	p = of_get_property(np, "fsl,available-ranges", &len);
+	if (p && len % (2 * sizeof(u32)) != 0) {
+		pr_err("%pOF: malformed available-ranges property.\n", np);
+		return -EINVAL;
+	}
+
+	if (!p) {
+		p = all_timer;
+		len = sizeof(all_timer);
+	}
+
+	len /= 2 * sizeof(u32);
+
+	for (i = 0; i < len; i++) {
+		offset = p[i * 2];
+		count = p[i * 2 + 1];
+		for (j = 0; j < count; j++) {
+			irq = irq_of_parse_and_map(np, irq_index);
+			if (!irq) {
+				pr_err("%pOF: irq parse and map failed.\n", np);
+				return -EINVAL;
+			}
+
+			/* Set timer idle */
+			priv->idle |= TIMER_OFFSET((offset + j));
+			priv->timer[offset + j].irq = irq;
+			priv->timer[offset + j].num = offset + j;
+			irq_index++;
+		}
+	}
+
+	return 0;
+}
+
+static void __init timer_group_init(struct device_node *np)
+{
+	struct timer_group_priv *priv;
+	unsigned int i = 0;
+	int ret;
+
+	priv = kzalloc(sizeof(struct timer_group_priv), GFP_KERNEL);
+	if (!priv) {
+		pr_err("%pOF: cannot allocate memory for group.\n", np);
+		return;
+	}
+
+	if (of_device_is_compatible(np, "fsl,mpic-global-timer"))
+		priv->flags |= FSL_GLOBAL_TIMER;
+
+	priv->regs = of_iomap(np, i++);
+	if (!priv->regs) {
+		pr_err("%pOF: cannot ioremap timer register address.\n", np);
+		goto out;
+	}
+
+	if (priv->flags & FSL_GLOBAL_TIMER) {
+		priv->group_tcr = of_iomap(np, i++);
+		if (!priv->group_tcr) {
+			pr_err("%pOF: cannot ioremap tcr address.\n", np);
+			goto out;
+		}
+	}
+
+	ret = timer_group_get_freq(np, priv);
+	if (ret < 0) {
+		pr_err("%pOF: cannot get timer frequency.\n", np);
+		goto out;
+	}
+
+	ret = timer_group_get_irq(np, priv);
+	if (ret < 0) {
+		pr_err("%pOF: cannot get timer irqs.\n", np);
+		goto out;
+	}
+
+	spin_lock_init(&priv->lock);
+
+	/* Init FSL timer hardware */
+	if (priv->flags & FSL_GLOBAL_TIMER)
+		setbits32(priv->group_tcr, MPIC_TIMER_TCR_CLKDIV);
+
+	list_add_tail(&priv->node, &timer_group_list);
+
+	return;
+
+out:
+	if (priv->regs)
+		iounmap(priv->regs);
+
+	if (priv->group_tcr)
+		iounmap(priv->group_tcr);
+
+	kfree(priv);
+}
+
+static void mpic_timer_resume(void)
+{
+	struct timer_group_priv *priv;
+
+	list_for_each_entry(priv, &timer_group_list, node) {
+		/* Init FSL timer hardware */
+		if (priv->flags & FSL_GLOBAL_TIMER)
+			setbits32(priv->group_tcr, MPIC_TIMER_TCR_CLKDIV);
+	}
+}
+
+static const struct of_device_id mpic_timer_ids[] = {
+	{ .compatible = "fsl,mpic-global-timer", },
+	{},
+};
+
+static struct syscore_ops mpic_timer_syscore_ops = {
+	.resume = mpic_timer_resume,
+};
+
+static int __init mpic_timer_init(void)
+{
+	struct device_node *np = NULL;
+
+	for_each_matching_node(np, mpic_timer_ids)
+		timer_group_init(np);
+
+	register_syscore_ops(&mpic_timer_syscore_ops);
+
+	if (list_empty(&timer_group_list))
+		return -ENODEV;
+
+	return 0;
+}
+subsys_initcall(mpic_timer_init);
diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c
new file mode 100644
index 0000000000..492cb03c0b
--- /dev/null
+++ b/arch/powerpc/sysdev/mpic_u3msi.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2006, Segher Boessenkool, IBM Corporation.
+ * Copyright 2006-2007, Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/msi.h>
+#include <asm/mpic.h>
+#include <asm/hw_irq.h>
+#include <asm/ppc-pci.h>
+#include <asm/msi_bitmap.h>
+
+#include "mpic.h"
+
+/* A bit ugly, can we get this from the pci_dev somehow? */
+static struct mpic *msi_mpic;
+
+static void mpic_u3msi_mask_irq(struct irq_data *data)
+{
+	pci_msi_mask_irq(data);
+	mpic_mask_irq(data);
+}
+
+static void mpic_u3msi_unmask_irq(struct irq_data *data)
+{
+	mpic_unmask_irq(data);
+	pci_msi_unmask_irq(data);
+}
+
+static struct irq_chip mpic_u3msi_chip = {
+	.irq_shutdown		= mpic_u3msi_mask_irq,
+	.irq_mask		= mpic_u3msi_mask_irq,
+	.irq_unmask		= mpic_u3msi_unmask_irq,
+	.irq_eoi		= mpic_end_irq,
+	.irq_set_type		= mpic_set_irq_type,
+	.irq_set_affinity	= mpic_set_affinity,
+	.name			= "MPIC-U3MSI",
+};
+
+static u64 read_ht_magic_addr(struct pci_dev *pdev, unsigned int pos)
+{
+	u8 flags;
+	u32 tmp;
+	u64 addr;
+
+	pci_read_config_byte(pdev, pos + HT_MSI_FLAGS, &flags);
+
+	if (flags & HT_MSI_FLAGS_FIXED)
+		return HT_MSI_FIXED_ADDR;
+
+	pci_read_config_dword(pdev, pos + HT_MSI_ADDR_LO, &tmp);
+	addr = tmp & HT_MSI_ADDR_LO_MASK;
+	pci_read_config_dword(pdev, pos + HT_MSI_ADDR_HI, &tmp);
+	addr = addr | ((u64)tmp << 32);
+
+	return addr;
+}
+
+static u64 find_ht_magic_addr(struct pci_dev *pdev, unsigned int hwirq)
+{
+	struct pci_bus *bus;
+	unsigned int pos;
+
+	for (bus = pdev->bus; bus && bus->self; bus = bus->parent) {
+		pos = pci_find_ht_capability(bus->self, HT_CAPTYPE_MSI_MAPPING);
+		if (pos)
+			return read_ht_magic_addr(bus->self, pos);
+	}
+
+	return 0;
+}
+
+static u64 find_u4_magic_addr(struct pci_dev *pdev, unsigned int hwirq)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+
+	/* U4 PCIe MSIs need to write to the special register in
+	 * the bridge that generates interrupts. There should be
+	 * theoretically a register at 0xf8005000 where you just write
+	 * the MSI number and that triggers the right interrupt, but
+	 * unfortunately, this is busted in HW, the bridge endian swaps
+	 * the value and hits the wrong nibble in the register.
+	 *
+	 * So instead we use another register set which is used normally
+	 * for converting HT interrupts to MPIC interrupts, which decodes
+	 * the interrupt number as part of the low address bits
+	 *
+	 * This will not work if we ever use more than one legacy MSI in
+	 * a block but we never do. For one MSI or multiple MSI-X where
+	 * each interrupt address can be specified separately, it works
+	 * just fine.
+	 */
+	if (of_device_is_compatible(hose->dn, "u4-pcie") ||
+	    of_device_is_compatible(hose->dn, "U4-pcie"))
+		return 0xf8004000 | (hwirq << 4);
+
+	return 0;
+}
+
+static void u3msi_teardown_msi_irqs(struct pci_dev *pdev)
+{
+	struct msi_desc *entry;
+	irq_hw_number_t hwirq;
+
+	msi_for_each_desc(entry, &pdev->dev, MSI_DESC_ASSOCIATED) {
+		hwirq = virq_to_hw(entry->irq);
+		irq_set_msi_desc(entry->irq, NULL);
+		irq_dispose_mapping(entry->irq);
+		entry->irq = 0;
+		msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, 1);
+	}
+}
+
+static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+	unsigned int virq;
+	struct msi_desc *entry;
+	struct msi_msg msg;
+	u64 addr;
+	int hwirq;
+
+	if (type == PCI_CAP_ID_MSIX)
+		pr_debug("u3msi: MSI-X untested, trying anyway.\n");
+
+	/* If we can't find a magic address then MSI ain't gonna work */
+	if (find_ht_magic_addr(pdev, 0) == 0 &&
+	    find_u4_magic_addr(pdev, 0) == 0) {
+		pr_debug("u3msi: no magic address found for %s\n",
+			 pci_name(pdev));
+		return -ENXIO;
+	}
+
+	msi_for_each_desc(entry, &pdev->dev, MSI_DESC_NOTASSOCIATED) {
+		hwirq = msi_bitmap_alloc_hwirqs(&msi_mpic->msi_bitmap, 1);
+		if (hwirq < 0) {
+			pr_debug("u3msi: failed allocating hwirq\n");
+			return hwirq;
+		}
+
+		addr = find_ht_magic_addr(pdev, hwirq);
+		if (addr == 0)
+			addr = find_u4_magic_addr(pdev, hwirq);
+		msg.address_lo = addr & 0xFFFFFFFF;
+		msg.address_hi = addr >> 32;
+
+		virq = irq_create_mapping(msi_mpic->irqhost, hwirq);
+		if (!virq) {
+			pr_debug("u3msi: failed mapping hwirq 0x%x\n", hwirq);
+			msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, 1);
+			return -ENOSPC;
+		}
+
+		irq_set_msi_desc(virq, entry);
+		irq_set_chip(virq, &mpic_u3msi_chip);
+		irq_set_irq_type(virq, IRQ_TYPE_EDGE_RISING);
+
+		pr_debug("u3msi: allocated virq 0x%x (hw 0x%x) addr 0x%lx\n",
+			  virq, hwirq, (unsigned long)addr);
+
+		printk("u3msi: allocated virq 0x%x (hw 0x%x) addr 0x%lx\n",
+			  virq, hwirq, (unsigned long)addr);
+		msg.data = hwirq;
+		pci_write_msi_msg(virq, &msg);
+
+		hwirq++;
+	}
+
+	return 0;
+}
+
+int __init mpic_u3msi_init(struct mpic *mpic)
+{
+	int rc;
+	struct pci_controller *phb;
+
+	rc = mpic_msi_init_allocator(mpic);
+	if (rc) {
+		pr_debug("u3msi: Error allocating bitmap!\n");
+		return rc;
+	}
+
+	pr_debug("u3msi: Registering MPIC U3 MSI callbacks.\n");
+
+	BUG_ON(msi_mpic);
+	msi_mpic = mpic;
+
+	list_for_each_entry(phb, &hose_list, list_node) {
+		WARN_ON(phb->controller_ops.setup_msi_irqs);
+		phb->controller_ops.setup_msi_irqs = u3msi_setup_msi_irqs;
+		phb->controller_ops.teardown_msi_irqs = u3msi_teardown_msi_irqs;
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c
new file mode 100644
index 0000000000..0b6e37f3ff
--- /dev/null
+++ b/arch/powerpc/sysdev/msi_bitmap.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2006-2008, Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/kmemleak.h>
+#include <linux/bitmap.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <asm/msi_bitmap.h>
+#include <asm/setup.h>
+
+int msi_bitmap_alloc_hwirqs(struct msi_bitmap *bmp, int num)
+{
+	unsigned long flags;
+	int offset, order = get_count_order(num);
+
+	spin_lock_irqsave(&bmp->lock, flags);
+
+	offset = bitmap_find_next_zero_area(bmp->bitmap, bmp->irq_count, 0,
+					    num, (1 << order) - 1);
+	if (offset > bmp->irq_count)
+		goto err;
+
+	bitmap_set(bmp->bitmap, offset, num);
+	spin_unlock_irqrestore(&bmp->lock, flags);
+
+	pr_debug("msi_bitmap: allocated 0x%x at offset 0x%x\n", num, offset);
+
+	return offset;
+err:
+	spin_unlock_irqrestore(&bmp->lock, flags);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(msi_bitmap_alloc_hwirqs);
+
+void msi_bitmap_free_hwirqs(struct msi_bitmap *bmp, unsigned int offset,
+			    unsigned int num)
+{
+	unsigned long flags;
+
+	pr_debug("msi_bitmap: freeing 0x%x at offset 0x%x\n",
+		 num, offset);
+
+	spin_lock_irqsave(&bmp->lock, flags);
+	bitmap_clear(bmp->bitmap, offset, num);
+	spin_unlock_irqrestore(&bmp->lock, flags);
+}
+EXPORT_SYMBOL(msi_bitmap_free_hwirqs);
+
+void msi_bitmap_reserve_hwirq(struct msi_bitmap *bmp, unsigned int hwirq)
+{
+	unsigned long flags;
+
+	pr_debug("msi_bitmap: reserving hwirq 0x%x\n", hwirq);
+
+	spin_lock_irqsave(&bmp->lock, flags);
+	bitmap_allocate_region(bmp->bitmap, hwirq, 0);
+	spin_unlock_irqrestore(&bmp->lock, flags);
+}
+
+/**
+ * msi_bitmap_reserve_dt_hwirqs - Reserve irqs specified in the device tree.
+ * @bmp: pointer to the MSI bitmap.
+ *
+ * Looks in the device tree to see if there is a property specifying which
+ * irqs can be used for MSI. If found those irqs reserved in the device tree
+ * are reserved in the bitmap.
+ *
+ * Returns 0 for success, < 0 if there was an error, and > 0 if no property
+ * was found in the device tree.
+ **/
+int msi_bitmap_reserve_dt_hwirqs(struct msi_bitmap *bmp)
+{
+	int i, j, len;
+	const u32 *p;
+
+	if (!bmp->of_node)
+		return 1;
+
+	p = of_get_property(bmp->of_node, "msi-available-ranges", &len);
+	if (!p) {
+		pr_debug("msi_bitmap: no msi-available-ranges property " \
+			 "found on %pOF\n", bmp->of_node);
+		return 1;
+	}
+
+	if (len % (2 * sizeof(u32)) != 0) {
+		printk(KERN_WARNING "msi_bitmap: Malformed msi-available-ranges"
+		       " property on %pOF\n", bmp->of_node);
+		return -EINVAL;
+	}
+
+	bitmap_allocate_region(bmp->bitmap, 0, get_count_order(bmp->irq_count));
+
+	spin_lock(&bmp->lock);
+
+	/* Format is: (<u32 start> <u32 count>)+ */
+	len /= 2 * sizeof(u32);
+	for (i = 0; i < len; i++, p += 2) {
+		for (j = 0; j < *(p + 1); j++)
+			bitmap_release_region(bmp->bitmap, *p + j, 0);
+	}
+
+	spin_unlock(&bmp->lock);
+
+	return 0;
+}
+
+int __ref msi_bitmap_alloc(struct msi_bitmap *bmp, unsigned int irq_count,
+		     struct device_node *of_node)
+{
+	int size;
+
+	if (!irq_count)
+		return -EINVAL;
+
+	size = BITS_TO_LONGS(irq_count) * sizeof(long);
+	pr_debug("msi_bitmap: allocator bitmap size is 0x%x bytes\n", size);
+
+	bmp->bitmap_from_slab = slab_is_available();
+	if (bmp->bitmap_from_slab)
+		bmp->bitmap = kzalloc(size, GFP_KERNEL);
+	else {
+		bmp->bitmap = memblock_alloc(size, SMP_CACHE_BYTES);
+		if (!bmp->bitmap)
+			panic("%s: Failed to allocate %u bytes\n", __func__,
+			      size);
+		/* the bitmap won't be freed from memblock allocator */
+		kmemleak_not_leak(bmp->bitmap);
+	}
+
+	if (!bmp->bitmap) {
+		pr_debug("msi_bitmap: ENOMEM allocating allocator bitmap!\n");
+		return -ENOMEM;
+	}
+
+	/* We zalloc'ed the bitmap, so all irqs are free by default */
+	spin_lock_init(&bmp->lock);
+	bmp->of_node = of_node_get(of_node);
+	bmp->irq_count = irq_count;
+
+	return 0;
+}
+
+void msi_bitmap_free(struct msi_bitmap *bmp)
+{
+	if (bmp->bitmap_from_slab)
+		kfree(bmp->bitmap);
+	of_node_put(bmp->of_node);
+	bmp->bitmap = NULL;
+}
+
+#ifdef CONFIG_MSI_BITMAP_SELFTEST
+
+static void __init test_basics(void)
+{
+	struct msi_bitmap bmp;
+	int rc, i, size = 512;
+
+	/* Can't allocate a bitmap of 0 irqs */
+	WARN_ON(msi_bitmap_alloc(&bmp, 0, NULL) == 0);
+
+	/* of_node may be NULL */
+	WARN_ON(msi_bitmap_alloc(&bmp, size, NULL));
+
+	/* Should all be free by default */
+	WARN_ON(bitmap_find_free_region(bmp.bitmap, size, get_count_order(size)));
+	bitmap_release_region(bmp.bitmap, 0, get_count_order(size));
+
+	/* With no node, there's no msi-available-ranges, so expect > 0 */
+	WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp) <= 0);
+
+	/* Should all still be free */
+	WARN_ON(bitmap_find_free_region(bmp.bitmap, size, get_count_order(size)));
+	bitmap_release_region(bmp.bitmap, 0, get_count_order(size));
+
+	/* Check we can fill it up and then no more */
+	for (i = 0; i < size; i++)
+		WARN_ON(msi_bitmap_alloc_hwirqs(&bmp, 1) < 0);
+
+	WARN_ON(msi_bitmap_alloc_hwirqs(&bmp, 1) >= 0);
+
+	/* Should all be allocated */
+	WARN_ON(bitmap_find_free_region(bmp.bitmap, size, 0) >= 0);
+
+	/* And if we free one we can then allocate another */
+	msi_bitmap_free_hwirqs(&bmp, size / 2, 1);
+	WARN_ON(msi_bitmap_alloc_hwirqs(&bmp, 1) != size / 2);
+
+	/* Free most of them for the alignment tests */
+	msi_bitmap_free_hwirqs(&bmp, 3, size - 3);
+
+	/* Check we get a naturally aligned offset */
+	rc = msi_bitmap_alloc_hwirqs(&bmp, 2);
+	WARN_ON(rc < 0 && rc % 2 != 0);
+	rc = msi_bitmap_alloc_hwirqs(&bmp, 4);
+	WARN_ON(rc < 0 && rc % 4 != 0);
+	rc = msi_bitmap_alloc_hwirqs(&bmp, 8);
+	WARN_ON(rc < 0 && rc % 8 != 0);
+	rc = msi_bitmap_alloc_hwirqs(&bmp, 9);
+	WARN_ON(rc < 0 && rc % 16 != 0);
+	rc = msi_bitmap_alloc_hwirqs(&bmp, 3);
+	WARN_ON(rc < 0 && rc % 4 != 0);
+	rc = msi_bitmap_alloc_hwirqs(&bmp, 7);
+	WARN_ON(rc < 0 && rc % 8 != 0);
+	rc = msi_bitmap_alloc_hwirqs(&bmp, 121);
+	WARN_ON(rc < 0 && rc % 128 != 0);
+
+	msi_bitmap_free(&bmp);
+
+	/* Clients may WARN_ON bitmap == NULL for "not-allocated" */
+	WARN_ON(bmp.bitmap != NULL);
+}
+
+static void __init test_of_node(void)
+{
+	u32 prop_data[] = { 10, 10, 25, 3, 40, 1, 100, 100, 200, 20 };
+	const char *expected_str = "0-9,20-24,28-39,41-99,220-255";
+	char *prop_name = "msi-available-ranges";
+	char *node_name = "/fakenode";
+	struct device_node of_node;
+	struct property prop;
+	struct msi_bitmap bmp;
+#define SIZE_EXPECTED 256
+	DECLARE_BITMAP(expected, SIZE_EXPECTED);
+
+	/* There should really be a struct device_node allocator */
+	memset(&of_node, 0, sizeof(of_node));
+	of_node_init(&of_node);
+	of_node.full_name = node_name;
+
+	WARN_ON(msi_bitmap_alloc(&bmp, SIZE_EXPECTED, &of_node));
+
+	/* No msi-available-ranges, so expect > 0 */
+	WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp) <= 0);
+
+	/* Should all still be free */
+	WARN_ON(bitmap_find_free_region(bmp.bitmap, SIZE_EXPECTED,
+					get_count_order(SIZE_EXPECTED)));
+	bitmap_release_region(bmp.bitmap, 0, get_count_order(SIZE_EXPECTED));
+
+	/* Now create a fake msi-available-ranges property */
+
+	/* There should really .. oh whatever */
+	memset(&prop, 0, sizeof(prop));
+	prop.name = prop_name;
+	prop.value = &prop_data;
+	prop.length = sizeof(prop_data);
+
+	of_node.properties = &prop;
+
+	/* msi-available-ranges, so expect == 0 */
+	WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp));
+
+	/* Check we got the expected result */
+	WARN_ON(bitmap_parselist(expected_str, expected, SIZE_EXPECTED));
+	WARN_ON(!bitmap_equal(expected, bmp.bitmap, SIZE_EXPECTED));
+
+	msi_bitmap_free(&bmp);
+	kfree(bmp.bitmap);
+}
+
+static int __init msi_bitmap_selftest(void)
+{
+	printk(KERN_DEBUG "Running MSI bitmap self-tests ...\n");
+
+	test_basics();
+	test_of_node();
+
+	return 0;
+}
+late_initcall(msi_bitmap_selftest);
+#endif /* CONFIG_MSI_BITMAP_SELFTEST */
diff --git a/arch/powerpc/sysdev/of_rtc.c b/arch/powerpc/sysdev/of_rtc.c
new file mode 100644
index 0000000000..2211937d37
--- /dev/null
+++ b/arch/powerpc/sysdev/of_rtc.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Instantiate mmio-mapped RTC chips based on device tree information
+ *
+ * Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <asm/prom.h>
+
+static __initdata struct {
+	const char *compatible;
+	char *plat_name;
+} of_rtc_table[] = {
+	{ "ds1743-nvram", "rtc-ds1742" },
+};
+
+void __init of_instantiate_rtc(void)
+{
+	struct device_node *node;
+	int err;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(of_rtc_table); i++) {
+		char *plat_name = of_rtc_table[i].plat_name;
+
+		for_each_compatible_node(node, NULL,
+					 of_rtc_table[i].compatible) {
+			struct resource *res;
+
+			res = kmalloc(sizeof(*res), GFP_KERNEL);
+			if (!res) {
+				printk(KERN_ERR "OF RTC: Out of memory "
+				       "allocating resource structure for %pOF\n",
+				       node);
+				continue;
+			}
+
+			err = of_address_to_resource(node, 0, res);
+			if (err) {
+				printk(KERN_ERR "OF RTC: Error "
+				       "translating resources for %pOF\n",
+				       node);
+				continue;
+			}
+
+			printk(KERN_INFO "OF_RTC: %pOF is a %s @ 0x%llx-0x%llx\n",
+			       node, plat_name,
+			       (unsigned long long)res->start,
+			       (unsigned long long)res->end);
+			platform_device_register_simple(plat_name, -1, res, 1);
+		}
+	}
+}
diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c
new file mode 100644
index 0000000000..fcf8d15162
--- /dev/null
+++ b/arch/powerpc/sysdev/pmi.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * pmi driver
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * PMI (Platform Management Interrupt) is a way to communicate
+ * with the BMC (Baseboard Management Controller) via interrupts.
+ * Unlike IPMI it is bidirectional and has a low latency.
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/completion.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/workqueue.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+#include <asm/pmi.h>
+
+struct pmi_data {
+	struct list_head	handler;
+	spinlock_t		handler_spinlock;
+	spinlock_t		pmi_spinlock;
+	struct mutex		msg_mutex;
+	pmi_message_t		msg;
+	struct completion	*completion;
+	struct platform_device	*dev;
+	int			irq;
+	u8 __iomem		*pmi_reg;
+	struct work_struct	work;
+};
+
+static struct pmi_data *data;
+
+static irqreturn_t pmi_irq_handler(int irq, void *dev_id)
+{
+	u8 type;
+	int rc;
+
+	spin_lock(&data->pmi_spinlock);
+
+	type = ioread8(data->pmi_reg + PMI_READ_TYPE);
+	pr_debug("pmi: got message of type %d\n", type);
+
+	if (type & PMI_ACK && !data->completion) {
+		printk(KERN_WARNING "pmi: got unexpected ACK message.\n");
+		rc = -EIO;
+		goto unlock;
+	}
+
+	if (data->completion && !(type & PMI_ACK)) {
+		printk(KERN_WARNING "pmi: expected ACK, but got %d\n", type);
+		rc = -EIO;
+		goto unlock;
+	}
+
+	data->msg.type = type;
+	data->msg.data0 = ioread8(data->pmi_reg + PMI_READ_DATA0);
+	data->msg.data1 = ioread8(data->pmi_reg + PMI_READ_DATA1);
+	data->msg.data2 = ioread8(data->pmi_reg + PMI_READ_DATA2);
+	rc = 0;
+unlock:
+	spin_unlock(&data->pmi_spinlock);
+
+	if (rc == -EIO) {
+		rc = IRQ_HANDLED;
+		goto out;
+	}
+
+	if (data->msg.type & PMI_ACK) {
+		complete(data->completion);
+		rc = IRQ_HANDLED;
+		goto out;
+	}
+
+	schedule_work(&data->work);
+
+	rc = IRQ_HANDLED;
+out:
+	return rc;
+}
+
+
+static const struct of_device_id pmi_match[] = {
+	{ .type = "ibm,pmi", .name = "ibm,pmi" },
+	{ .type = "ibm,pmi" },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, pmi_match);
+
+static void pmi_notify_handlers(struct work_struct *work)
+{
+	struct pmi_handler *handler;
+
+	spin_lock(&data->handler_spinlock);
+	list_for_each_entry(handler, &data->handler, node) {
+		pr_debug("pmi: notifying handler %p\n", handler);
+		if (handler->type == data->msg.type)
+			handler->handle_pmi_message(data->msg);
+	}
+	spin_unlock(&data->handler_spinlock);
+}
+
+static int pmi_of_probe(struct platform_device *dev)
+{
+	struct device_node *np = dev->dev.of_node;
+	int rc;
+
+	if (data) {
+		printk(KERN_ERR "pmi: driver has already been initialized.\n");
+		rc = -EBUSY;
+		goto out;
+	}
+
+	data = kzalloc(sizeof(struct pmi_data), GFP_KERNEL);
+	if (!data) {
+		printk(KERN_ERR "pmi: could not allocate memory.\n");
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	data->pmi_reg = of_iomap(np, 0);
+	if (!data->pmi_reg) {
+		printk(KERN_ERR "pmi: invalid register address.\n");
+		rc = -EFAULT;
+		goto error_cleanup_data;
+	}
+
+	INIT_LIST_HEAD(&data->handler);
+
+	mutex_init(&data->msg_mutex);
+	spin_lock_init(&data->pmi_spinlock);
+	spin_lock_init(&data->handler_spinlock);
+
+	INIT_WORK(&data->work, pmi_notify_handlers);
+
+	data->dev = dev;
+
+	data->irq = irq_of_parse_and_map(np, 0);
+	if (!data->irq) {
+		printk(KERN_ERR "pmi: invalid interrupt.\n");
+		rc = -EFAULT;
+		goto error_cleanup_iomap;
+	}
+
+	rc = request_irq(data->irq, pmi_irq_handler, 0, "pmi", NULL);
+	if (rc) {
+		printk(KERN_ERR "pmi: can't request IRQ %d: returned %d\n",
+				data->irq, rc);
+		goto error_cleanup_iomap;
+	}
+
+	printk(KERN_INFO "pmi: found pmi device at addr %p.\n", data->pmi_reg);
+
+	goto out;
+
+error_cleanup_iomap:
+	iounmap(data->pmi_reg);
+
+error_cleanup_data:
+	kfree(data);
+
+out:
+	return rc;
+}
+
+static int pmi_of_remove(struct platform_device *dev)
+{
+	struct pmi_handler *handler, *tmp;
+
+	free_irq(data->irq, NULL);
+	iounmap(data->pmi_reg);
+
+	spin_lock(&data->handler_spinlock);
+
+	list_for_each_entry_safe(handler, tmp, &data->handler, node)
+		list_del(&handler->node);
+
+	spin_unlock(&data->handler_spinlock);
+
+	kfree(data);
+	data = NULL;
+
+	return 0;
+}
+
+static struct platform_driver pmi_of_platform_driver = {
+	.probe		= pmi_of_probe,
+	.remove		= pmi_of_remove,
+	.driver = {
+		.name = "pmi",
+		.of_match_table = pmi_match,
+	},
+};
+module_platform_driver(pmi_of_platform_driver);
+
+int pmi_send_message(pmi_message_t msg)
+{
+	unsigned long flags;
+	DECLARE_COMPLETION_ONSTACK(completion);
+
+	if (!data)
+		return -ENODEV;
+
+	mutex_lock(&data->msg_mutex);
+
+	data->msg = msg;
+	pr_debug("pmi_send_message: msg is %08x\n", *(u32*)&msg);
+
+	data->completion = &completion;
+
+	spin_lock_irqsave(&data->pmi_spinlock, flags);
+	iowrite8(msg.data0, data->pmi_reg + PMI_WRITE_DATA0);
+	iowrite8(msg.data1, data->pmi_reg + PMI_WRITE_DATA1);
+	iowrite8(msg.data2, data->pmi_reg + PMI_WRITE_DATA2);
+	iowrite8(msg.type, data->pmi_reg + PMI_WRITE_TYPE);
+	spin_unlock_irqrestore(&data->pmi_spinlock, flags);
+
+	pr_debug("pmi_send_message: wait for completion\n");
+
+	wait_for_completion_interruptible_timeout(data->completion,
+						  PMI_TIMEOUT);
+
+	data->completion = NULL;
+
+	mutex_unlock(&data->msg_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pmi_send_message);
+
+int pmi_register_handler(struct pmi_handler *handler)
+{
+	if (!data)
+		return -ENODEV;
+
+	spin_lock(&data->handler_spinlock);
+	list_add_tail(&handler->node, &data->handler);
+	spin_unlock(&data->handler_spinlock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pmi_register_handler);
+
+void pmi_unregister_handler(struct pmi_handler *handler)
+{
+	if (!data)
+		return;
+
+	pr_debug("pmi: unregistering handler %p\n", handler);
+
+	spin_lock(&data->handler_spinlock);
+	list_del(&handler->node);
+	spin_unlock(&data->handler_spinlock);
+}
+EXPORT_SYMBOL_GPL(pmi_unregister_handler);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+MODULE_DESCRIPTION("IBM Platform Management Interrupt driver");
diff --git a/arch/powerpc/sysdev/rtc_cmos_setup.c b/arch/powerpc/sysdev/rtc_cmos_setup.c
new file mode 100644
index 0000000000..47cc87bd6a
--- /dev/null
+++ b/arch/powerpc/sysdev/rtc_cmos_setup.c
@@ -0,0 +1,69 @@
+/*
+ * Setup code for PC-style Real-Time Clock.
+ *
+ * Author: Wade Farnsworth <wfarnsworth@mvista.com>
+ *
+ * 2007 (c) MontaVista Software, Inc. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mc146818rtc.h>
+#include <linux/of_address.h>
+
+
+static int  __init add_rtc(void)
+{
+	struct device_node *np;
+	struct platform_device *pd;
+	struct resource res[2];
+	unsigned int num_res = 1;
+	int ret;
+
+	memset(&res, 0, sizeof(res));
+
+	np = of_find_compatible_node(NULL, NULL, "pnpPNP,b00");
+	if (!np)
+		return -ENODEV;
+
+	ret = of_address_to_resource(np, 0, &res[0]);
+	of_node_put(np);
+	if (ret)
+		return ret;
+
+	/*
+	 * RTC_PORT(x) is hardcoded in asm/mc146818rtc.h.  Verify that the
+	 * address provided by the device node matches.
+	 */
+	if (res[0].start != RTC_PORT(0))
+		return -EINVAL;
+
+	np = of_find_compatible_node(NULL, NULL, "chrp,iic");
+	if (!np)
+		np = of_find_compatible_node(NULL, NULL, "pnpPNP,000");
+	if (np) {
+		of_node_put(np);
+		/*
+		 * Use a fixed interrupt value of 8 since on PPC if we are
+		 * using this its off an i8259 which we ensure has interrupt
+		 * numbers 0..15.
+		 */
+		res[1].start = 8;
+		res[1].end = 8;
+		res[1].flags = IORESOURCE_IRQ;
+		num_res++;
+	}
+
+	pd = platform_device_register_simple("rtc_cmos", -1,
+					     &res[0], num_res);
+
+	return PTR_ERR_OR_ZERO(pd);
+}
+fs_initcall(add_rtc);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/sysdev/tsi108_dev.c b/arch/powerpc/sysdev/tsi108_dev.c
new file mode 100644
index 0000000000..db520c40cb
--- /dev/null
+++ b/arch/powerpc/sysdev/tsi108_dev.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * tsi108/109 device setup code
+ *
+ * Maintained by Roy Zang < tie-fei.zang@freescale.com >
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/export.h>
+#include <linux/device.h>
+#include <linux/etherdevice.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_net.h>
+#include <asm/tsi108.h>
+
+#include <linux/atomic.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <mm/mmu_decl.h>
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) do { printk(fmt); } while(0)
+#else
+#define DBG(fmt...) do { } while(0)
+#endif
+
+static phys_addr_t tsi108_csr_base = -1;
+
+phys_addr_t get_csrbase(void)
+{
+	struct device_node *tsi;
+
+	if (tsi108_csr_base != -1)
+		return tsi108_csr_base;
+
+	tsi = of_find_node_by_type(NULL, "tsi-bridge");
+	if (tsi) {
+		struct resource res;
+		of_address_to_resource(tsi, 0, &res);
+		tsi108_csr_base = res.start;
+		of_node_put(tsi);
+	}
+	return tsi108_csr_base;
+}
+EXPORT_SYMBOL(get_csrbase);
+
+u32 get_vir_csrbase(void)
+{
+	return (u32) (ioremap(get_csrbase(), 0x10000));
+}
+EXPORT_SYMBOL(get_vir_csrbase);
+
+static int __init tsi108_eth_of_init(void)
+{
+	struct device_node *np;
+	unsigned int i = 0;
+	struct platform_device *tsi_eth_dev;
+	struct resource res;
+	int ret;
+
+	for_each_compatible_node(np, "network", "tsi108-ethernet") {
+		struct resource r[2];
+		struct device_node *phy, *mdio;
+		hw_info tsi_eth_data;
+		const unsigned int *phy_id;
+		const phandle *ph;
+
+		memset(r, 0, sizeof(r));
+		memset(&tsi_eth_data, 0, sizeof(tsi_eth_data));
+
+		ret = of_address_to_resource(np, 0, &r[0]);
+		DBG("%s: name:start->end = %s:%pR\n",
+		    __func__, r[0].name, &r[0]);
+		if (ret)
+			goto err;
+
+		r[1].name = "tx";
+		r[1].start = irq_of_parse_and_map(np, 0);
+		r[1].end = irq_of_parse_and_map(np, 0);
+		r[1].flags = IORESOURCE_IRQ;
+		DBG("%s: name:start->end = %s:%pR\n",
+			__func__, r[1].name, &r[1]);
+
+		tsi_eth_dev =
+		    platform_device_register_simple("tsi-ethernet", i++, &r[0],
+						    1);
+
+		if (IS_ERR(tsi_eth_dev)) {
+			ret = PTR_ERR(tsi_eth_dev);
+			goto err;
+		}
+
+		of_get_mac_address(np, tsi_eth_data.mac_addr);
+
+		ph = of_get_property(np, "mdio-handle", NULL);
+		mdio = of_find_node_by_phandle(*ph);
+		ret = of_address_to_resource(mdio, 0, &res);
+		of_node_put(mdio);
+		if (ret)
+			goto unreg;
+
+		ph = of_get_property(np, "phy-handle", NULL);
+		phy = of_find_node_by_phandle(*ph);
+
+		if (phy == NULL) {
+			ret = -ENODEV;
+			goto unreg;
+		}
+
+		phy_id = of_get_property(phy, "reg", NULL);
+
+		tsi_eth_data.regs = r[0].start;
+		tsi_eth_data.phyregs = res.start;
+		tsi_eth_data.phy = *phy_id;
+		tsi_eth_data.irq_num = irq_of_parse_and_map(np, 0);
+
+		/* Some boards with the TSI108 bridge (e.g. Holly)
+		 * have a miswiring of the ethernet PHYs which
+		 * requires a workaround.  The special
+		 * "txc-rxc-delay-disable" property enables this
+		 * workaround.  FIXME: Need to port the tsi108_eth
+		 * driver itself to phylib and use a non-misleading
+		 * name for the workaround flag - it's not actually to
+		 * do with the model of PHY in use */
+		if (of_property_read_bool(phy, "txc-rxc-delay-disable"))
+			tsi_eth_data.phy_type = TSI108_PHY_BCM54XX;
+		of_node_put(phy);
+
+		ret =
+		    platform_device_add_data(tsi_eth_dev, &tsi_eth_data,
+					     sizeof(hw_info));
+		if (ret)
+			goto unreg;
+	}
+	return 0;
+unreg:
+	platform_device_unregister(tsi_eth_dev);
+err:
+	of_node_put(np);
+	return ret;
+}
+
+arch_initcall(tsi108_eth_of_init);
diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c
new file mode 100644
index 0000000000..0e42f7bad7
--- /dev/null
+++ b/arch/powerpc/sysdev/tsi108_pci.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Common routines for Tundra Semiconductor TSI108 host bridge.
+ *
+ * 2004-2005 (c) Tundra Semiconductor Corp.
+ * Author: Alex Bounine (alexandreb@tundra.com)
+ * Author: Roy Zang (tie-fei.zang@freescale.com)
+ * 	   Add pci interrupt router host
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/interrupt.h>
+#include <linux/of_address.h>
+
+#include <asm/byteorder.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <linux/uaccess.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/tsi108.h>
+#include <asm/tsi108_pci.h>
+#include <asm/tsi108_irq.h>
+
+#undef DEBUG
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+#define tsi_mk_config_addr(bus, devfunc, offset) \
+	((((bus)<<16) | ((devfunc)<<8) | (offset & 0xfc)) + tsi108_pci_cfg_base)
+
+u32 tsi108_pci_cfg_base;
+static u32 tsi108_pci_cfg_phys;
+u32 tsi108_csr_vir_base;
+static struct irq_domain *pci_irq_host;
+
+extern u32 get_vir_csrbase(void);
+extern u32 tsi108_read_reg(u32 reg_offset);
+extern void tsi108_write_reg(u32 reg_offset, u32 val);
+
+int
+tsi108_direct_write_config(struct pci_bus *bus, unsigned int devfunc,
+			   int offset, int len, u32 val)
+{
+	volatile unsigned char *cfg_addr;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	if (ppc_md.pci_exclude_device)
+		if (ppc_md.pci_exclude_device(hose, bus->number, devfunc))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+	cfg_addr = (unsigned char *)(tsi_mk_config_addr(bus->number,
+							devfunc, offset) |
+							(offset & 0x03));
+
+#ifdef DEBUG
+	printk("PCI CFG write : ");
+	printk("%d:0x%x:0x%x ", bus->number, devfunc, offset);
+	printk("%d ADDR=0x%08x ", len, (uint) cfg_addr);
+	printk("data = 0x%08x\n", val);
+#endif
+
+	switch (len) {
+	case 1:
+		out_8((u8 *) cfg_addr, val);
+		break;
+	case 2:
+		out_le16((u16 *) cfg_addr, val);
+		break;
+	default:
+		out_le32((u32 *) cfg_addr, val);
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+void tsi108_clear_pci_error(u32 pci_cfg_base)
+{
+	u32 err_stat, err_addr, pci_stat;
+
+	/*
+	 * Quietly clear PB and PCI error flags set as result
+	 * of PCI/X configuration read requests.
+	 */
+
+	/* Read PB Error Log Registers */
+
+	err_stat = tsi108_read_reg(TSI108_PB_OFFSET + TSI108_PB_ERRCS);
+	err_addr = tsi108_read_reg(TSI108_PB_OFFSET + TSI108_PB_AERR);
+
+	if (err_stat & TSI108_PB_ERRCS_ES) {
+		/* Clear error flag */
+		tsi108_write_reg(TSI108_PB_OFFSET + TSI108_PB_ERRCS,
+				 TSI108_PB_ERRCS_ES);
+
+		/* Clear read error reported in PB_ISR */
+		tsi108_write_reg(TSI108_PB_OFFSET + TSI108_PB_ISR,
+				 TSI108_PB_ISR_PBS_RD_ERR);
+
+		/* Clear PCI/X bus cfg errors if applicable */
+		if ((err_addr & 0xFF000000) == pci_cfg_base) {
+			pci_stat =
+			    tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_CSR);
+			tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_CSR,
+					 pci_stat);
+		}
+	}
+
+	return;
+}
+
+#define __tsi108_read_pci_config(x, addr, op)		\
+	__asm__ __volatile__(				\
+		"	"op" %0,0,%1\n"		\
+		"1:	eieio\n"			\
+		"2:\n"					\
+		".section .fixup,\"ax\"\n"		\
+		"3:	li %0,-1\n"			\
+		"	b 2b\n"				\
+		".previous\n"				\
+		EX_TABLE(1b, 3b)			\
+		: "=r"(x) : "r"(addr))
+
+int
+tsi108_direct_read_config(struct pci_bus *bus, unsigned int devfn, int offset,
+			  int len, u32 * val)
+{
+	volatile unsigned char *cfg_addr;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	u32 temp;
+
+	if (ppc_md.pci_exclude_device)
+		if (ppc_md.pci_exclude_device(hose, bus->number, devfn))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+	cfg_addr = (unsigned char *)(tsi_mk_config_addr(bus->number,
+							devfn,
+							offset) | (offset &
+								   0x03));
+
+	switch (len) {
+	case 1:
+		__tsi108_read_pci_config(temp, cfg_addr, "lbzx");
+		break;
+	case 2:
+		__tsi108_read_pci_config(temp, cfg_addr, "lhbrx");
+		break;
+	default:
+		__tsi108_read_pci_config(temp, cfg_addr, "lwbrx");
+		break;
+	}
+
+	*val = temp;
+
+#ifdef DEBUG
+	if ((0xFFFFFFFF != temp) && (0xFFFF != temp) && (0xFF != temp)) {
+		printk("PCI CFG read : ");
+		printk("%d:0x%x:0x%x ", bus->number, devfn, offset);
+		printk("%d ADDR=0x%08x ", len, (uint) cfg_addr);
+		printk("data = 0x%x\n", *val);
+	}
+#endif
+	return PCIBIOS_SUCCESSFUL;
+}
+
+void tsi108_clear_pci_cfg_error(void)
+{
+	tsi108_clear_pci_error(tsi108_pci_cfg_phys);
+}
+
+static struct pci_ops tsi108_direct_pci_ops = {
+	.read = tsi108_direct_read_config,
+	.write = tsi108_direct_write_config,
+};
+
+int __init tsi108_setup_pci(struct device_node *dev, u32 cfg_phys, int primary)
+{
+	int len;
+	struct pci_controller *hose;
+	struct resource rsrc;
+	const int *bus_range;
+	int has_address = 0;
+
+	/* PCI Config mapping */
+	tsi108_pci_cfg_base = (u32)ioremap(cfg_phys, TSI108_PCI_CFG_SIZE);
+	tsi108_pci_cfg_phys = cfg_phys;
+	DBG("TSI_PCI: %s tsi108_pci_cfg_base=0x%x\n", __func__,
+	    tsi108_pci_cfg_base);
+
+	/* Fetch host bridge registers address */
+	has_address = (of_address_to_resource(dev, 0, &rsrc) == 0);
+
+	/* Get bus range if any */
+	bus_range = of_get_property(dev, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int)) {
+		printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+		       " bus 0\n", dev);
+	}
+
+	hose = pcibios_alloc_controller(dev);
+
+	if (!hose) {
+		printk("PCI Host bridge init failed\n");
+		return -ENOMEM;
+	}
+
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	(hose)->ops = &tsi108_direct_pci_ops;
+
+	pr_info("Found tsi108 PCI host bridge at 0x%pa. Firmware bus number: %d->%d\n",
+		&rsrc.start, hose->first_busno, hose->last_busno);
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(hose, dev, primary);
+	return 0;
+}
+
+/*
+ * Low level utility functions
+ */
+
+static void tsi108_pci_int_mask(u_int irq)
+{
+	u_int irp_cfg;
+	int int_line = (irq - IRQ_PCI_INTAD_BASE);
+
+	irp_cfg = tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL);
+	mb();
+	irp_cfg |= (1 << int_line);	/* INTx_DIR = output */
+	irp_cfg &= ~(3 << (8 + (int_line * 2)));	/* INTx_TYPE = unused */
+	tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL, irp_cfg);
+	mb();
+	irp_cfg = tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL);
+}
+
+static void tsi108_pci_int_unmask(u_int irq)
+{
+	u_int irp_cfg;
+	int int_line = (irq - IRQ_PCI_INTAD_BASE);
+
+	irp_cfg = tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL);
+	mb();
+	irp_cfg &= ~(1 << int_line);
+	irp_cfg |= (3 << (8 + (int_line * 2)));
+	tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL, irp_cfg);
+	mb();
+}
+
+static void __init init_pci_source(void)
+{
+	tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL,
+			0x0000ff00);
+	tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_ENABLE,
+			TSI108_PCI_IRP_ENABLE_P_INT);
+	mb();
+}
+
+static inline unsigned int get_pci_source(void)
+{
+	u_int temp = 0;
+	int irq = -1;
+	int i;
+	u_int pci_irp_stat;
+	static int mask = 0;
+
+	/* Read PCI/X block interrupt status register */
+	pci_irp_stat = tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_STAT);
+	mb();
+
+	if (pci_irp_stat & TSI108_PCI_IRP_STAT_P_INT) {
+		/* Process Interrupt from PCI bus INTA# - INTD# lines */
+		temp =
+		    tsi108_read_reg(TSI108_PCI_OFFSET +
+				    TSI108_PCI_IRP_INTAD) & 0xf;
+		mb();
+		for (i = 0; i < 4; i++, mask++) {
+			if (temp & (1 << mask % 4)) {
+				irq = IRQ_PCI_INTA + mask % 4;
+				mask++;
+				break;
+			}
+		}
+
+		/* Disable interrupts from PCI block */
+		temp = tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_ENABLE);
+		tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_ENABLE,
+				temp & ~TSI108_PCI_IRP_ENABLE_P_INT);
+		mb();
+		(void)tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_ENABLE);
+		mb();
+	}
+#ifdef DEBUG
+	else {
+		printk("TSI108_PIC: error in TSI108_PCI_IRP_STAT\n");
+		pci_irp_stat =
+		    tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_STAT);
+		temp =
+		    tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_INTAD);
+		mb();
+		printk(">> stat=0x%08x intad=0x%08x ", pci_irp_stat, temp);
+		temp =
+		    tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL);
+		mb();
+		printk("cfg_ctl=0x%08x ", temp);
+		temp =
+		    tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_ENABLE);
+		mb();
+		printk("irp_enable=0x%08x\n", temp);
+	}
+#endif	/* end of DEBUG */
+
+	return irq;
+}
+
+
+/*
+ * Linux descriptor level callbacks
+ */
+
+static void tsi108_pci_irq_unmask(struct irq_data *d)
+{
+	tsi108_pci_int_unmask(d->irq);
+
+	/* Enable interrupts from PCI block */
+	tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_ENABLE,
+			 tsi108_read_reg(TSI108_PCI_OFFSET +
+					 TSI108_PCI_IRP_ENABLE) |
+			 TSI108_PCI_IRP_ENABLE_P_INT);
+	mb();
+}
+
+static void tsi108_pci_irq_mask(struct irq_data *d)
+{
+	tsi108_pci_int_mask(d->irq);
+}
+
+static void tsi108_pci_irq_ack(struct irq_data *d)
+{
+	tsi108_pci_int_mask(d->irq);
+}
+
+/*
+ * Interrupt controller descriptor for cascaded PCI interrupt controller.
+ */
+
+static struct irq_chip tsi108_pci_irq = {
+	.name = "tsi108_PCI_int",
+	.irq_mask = tsi108_pci_irq_mask,
+	.irq_ack = tsi108_pci_irq_ack,
+	.irq_unmask = tsi108_pci_irq_unmask,
+};
+
+static int pci_irq_host_xlate(struct irq_domain *h, struct device_node *ct,
+			    const u32 *intspec, unsigned int intsize,
+			    irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+{
+	*out_hwirq = intspec[0];
+	*out_flags = IRQ_TYPE_LEVEL_HIGH;
+	return 0;
+}
+
+static int pci_irq_host_map(struct irq_domain *h, unsigned int virq,
+			  irq_hw_number_t hw)
+{	unsigned int irq;
+	DBG("%s(%d, 0x%lx)\n", __func__, virq, hw);
+	if ((virq >= 1) && (virq <= 4)){
+		irq = virq + IRQ_PCI_INTAD_BASE - 1;
+		irq_set_status_flags(irq, IRQ_LEVEL);
+		irq_set_chip(irq, &tsi108_pci_irq);
+	}
+	return 0;
+}
+
+static const struct irq_domain_ops pci_irq_domain_ops = {
+	.map = pci_irq_host_map,
+	.xlate = pci_irq_host_xlate,
+};
+
+/*
+ * Exported functions
+ */
+
+/*
+ * The Tsi108 PCI interrupts initialization routine.
+ *
+ * The INTA# - INTD# interrupts on the PCI bus are reported by the PCI block
+ * to the MPIC using single interrupt source (IRQ_TSI108_PCI). Therefore the
+ * PCI block has to be treated as a cascaded interrupt controller connected
+ * to the MPIC.
+ */
+
+void __init tsi108_pci_int_init(struct device_node *node)
+{
+	DBG("Tsi108_pci_int_init: initializing PCI interrupts\n");
+
+	pci_irq_host = irq_domain_add_legacy(node, NR_IRQS_LEGACY, 0, 0,
+					     &pci_irq_domain_ops, NULL);
+	if (pci_irq_host == NULL) {
+		printk(KERN_ERR "pci_irq_host: failed to allocate irq domain!\n");
+		return;
+	}
+
+	init_pci_source();
+}
+
+void tsi108_irq_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int cascade_irq = get_pci_source();
+
+	if (cascade_irq)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
diff --git a/arch/powerpc/sysdev/udbg_memcons.c b/arch/powerpc/sysdev/udbg_memcons.c
new file mode 100644
index 0000000000..5020044400
--- /dev/null
+++ b/arch/powerpc/sysdev/udbg_memcons.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * A udbg backend which logs messages and reads input from in memory
+ * buffers.
+ *
+ * The console output can be read from memcons_output which is a
+ * circular buffer whose next write position is stored in memcons.output_pos.
+ *
+ * Input may be passed by writing into the memcons_input buffer when it is
+ * empty. The input buffer is empty when both input_pos == input_start and
+ * *input_start == '\0'.
+ *
+ * Copyright (C) 2003-2005 Anton Blanchard and Milton Miller, IBM Corp
+ * Copyright (C) 2013 Alistair Popple, IBM Corp
+ */
+
+#include <linux/kernel.h>
+#include <asm/barrier.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/udbg.h>
+
+struct memcons {
+	char *output_start;
+	char *output_pos;
+	char *output_end;
+	char *input_start;
+	char *input_pos;
+	char *input_end;
+};
+
+static char memcons_output[CONFIG_PPC_MEMCONS_OUTPUT_SIZE];
+static char memcons_input[CONFIG_PPC_MEMCONS_INPUT_SIZE];
+
+struct memcons memcons = {
+	.output_start = memcons_output,
+	.output_pos = memcons_output,
+	.output_end = &memcons_output[CONFIG_PPC_MEMCONS_OUTPUT_SIZE],
+	.input_start = memcons_input,
+	.input_pos = memcons_input,
+	.input_end = &memcons_input[CONFIG_PPC_MEMCONS_INPUT_SIZE],
+};
+
+void memcons_putc(char c)
+{
+	char *new_output_pos;
+
+	*memcons.output_pos = c;
+	wmb();
+	new_output_pos = memcons.output_pos + 1;
+	if (new_output_pos >= memcons.output_end)
+		new_output_pos = memcons.output_start;
+
+	memcons.output_pos = new_output_pos;
+}
+
+int memcons_getc_poll(void)
+{
+	char c;
+	char *new_input_pos;
+
+	if (*memcons.input_pos) {
+		c = *memcons.input_pos;
+
+		new_input_pos = memcons.input_pos + 1;
+		if (new_input_pos >= memcons.input_end)
+			new_input_pos = memcons.input_start;
+		else if (*new_input_pos == '\0')
+			new_input_pos = memcons.input_start;
+
+		*memcons.input_pos = '\0';
+		wmb();
+		memcons.input_pos = new_input_pos;
+		return c;
+	}
+
+	return -1;
+}
+
+int memcons_getc(void)
+{
+	int c;
+
+	while (1) {
+		c = memcons_getc_poll();
+		if (c == -1)
+			cpu_relax();
+		else
+			break;
+	}
+
+	return c;
+}
+
+void __init udbg_init_memcons(void)
+{
+	udbg_putc = memcons_putc;
+	udbg_getc = memcons_getc;
+	udbg_getc_poll = memcons_getc_poll;
+}
diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig
new file mode 100644
index 0000000000..063d919589
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/Kconfig
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_XICS
+	def_bool n
+	select PPC_SMP_MUXED_IPI
+	select HARDIRQS_SW_RESEND
+
+config PPC_ICP_NATIVE
+	def_bool n
+
+config PPC_ICP_HV
+	def_bool n
+
+config PPC_ICS_RTAS
+	def_bool n
+
+config PPC_ICS_NATIVE
+	def_bool n
diff --git a/arch/powerpc/sysdev/xics/Makefile b/arch/powerpc/sysdev/xics/Makefile
new file mode 100644
index 0000000000..747063927c
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y				+= xics-common.o
+obj-$(CONFIG_PPC_ICP_NATIVE)	+= icp-native.o
+obj-$(CONFIG_PPC_ICP_HV)	+= icp-hv.o
+obj-$(CONFIG_PPC_ICS_RTAS)	+= ics-rtas.o
+obj-$(CONFIG_PPC_ICS_NATIVE)	+= ics-native.o
+obj-$(CONFIG_PPC_POWERNV)	+= ics-opal.o icp-opal.o
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
new file mode 100644
index 0000000000..cf8db19a4f
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2011 IBM Corporation.
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/io.h>
+#include <asm/hvcall.h>
+
+static inline unsigned int icp_hv_get_xirr(unsigned char cppr)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+	unsigned int ret = XICS_IRQ_SPURIOUS;
+
+	rc = plpar_hcall(H_XIRR, retbuf, cppr);
+	if (rc == H_SUCCESS) {
+		ret = (unsigned int)retbuf[0];
+	} else {
+		pr_err("%s: bad return code xirr cppr=0x%x returned %ld\n",
+			__func__, cppr, rc);
+		WARN_ON_ONCE(1);
+	}
+
+	return ret;
+}
+
+static inline void icp_hv_set_cppr(u8 value)
+{
+	long rc = plpar_hcall_norets(H_CPPR, value);
+	if (rc != H_SUCCESS) {
+		pr_err("%s: bad return code cppr cppr=0x%x returned %ld\n",
+			__func__, value, rc);
+		WARN_ON_ONCE(1);
+	}
+}
+
+static inline void icp_hv_set_xirr(unsigned int value)
+{
+	long rc = plpar_hcall_norets(H_EOI, value);
+	if (rc != H_SUCCESS) {
+		pr_err("%s: bad return code eoi xirr=0x%x returned %ld\n",
+			__func__, value, rc);
+		WARN_ON_ONCE(1);
+		icp_hv_set_cppr(value >> 24);
+	}
+}
+
+static inline void icp_hv_set_qirr(int n_cpu , u8 value)
+{
+	int hw_cpu = get_hard_smp_processor_id(n_cpu);
+	long rc;
+
+	/* Make sure all previous accesses are ordered before IPI sending */
+	mb();
+	rc = plpar_hcall_norets(H_IPI, hw_cpu, value);
+	if (rc != H_SUCCESS) {
+		pr_err("%s: bad return code qirr cpu=%d hw_cpu=%d mfrr=0x%x "
+			"returned %ld\n", __func__, n_cpu, hw_cpu, value, rc);
+		WARN_ON_ONCE(1);
+	}
+}
+
+static void icp_hv_eoi(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	iosync();
+	icp_hv_set_xirr((xics_pop_cppr() << 24) | hw_irq);
+}
+
+static void icp_hv_teardown_cpu(void)
+{
+	int cpu = smp_processor_id();
+
+	/* Clear any pending IPI */
+	icp_hv_set_qirr(cpu, 0xff);
+}
+
+static void icp_hv_flush_ipi(void)
+{
+	/* We take the ipi irq but and never return so we
+	 * need to EOI the IPI, but want to leave our priority 0
+	 *
+	 * should we check all the other interrupts too?
+	 * should we be flagging idle loop instead?
+	 * or creating some task to be scheduled?
+	 */
+
+	icp_hv_set_xirr((0x00 << 24) | XICS_IPI);
+}
+
+static unsigned int icp_hv_get_irq(void)
+{
+	unsigned int xirr = icp_hv_get_xirr(xics_cppr_top());
+	unsigned int vec = xirr & 0x00ffffff;
+	unsigned int irq;
+
+	if (vec == XICS_IRQ_SPURIOUS)
+		return 0;
+
+	irq = irq_find_mapping(xics_host, vec);
+	if (likely(irq)) {
+		xics_push_cppr(vec);
+		return irq;
+	}
+
+	/* We don't have a linux mapping, so have rtas mask it. */
+	xics_mask_unknown_vec(vec);
+
+	/* We might learn about it later, so EOI it */
+	icp_hv_set_xirr(xirr);
+
+	return 0;
+}
+
+static void icp_hv_set_cpu_priority(unsigned char cppr)
+{
+	xics_set_base_cppr(cppr);
+	icp_hv_set_cppr(cppr);
+	iosync();
+}
+
+#ifdef CONFIG_SMP
+
+static void icp_hv_cause_ipi(int cpu)
+{
+	icp_hv_set_qirr(cpu, IPI_PRIORITY);
+}
+
+static irqreturn_t icp_hv_ipi_action(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	icp_hv_set_qirr(cpu, 0xff);
+
+	return smp_ipi_demux();
+}
+
+#endif /* CONFIG_SMP */
+
+static const struct icp_ops icp_hv_ops = {
+	.get_irq	= icp_hv_get_irq,
+	.eoi		= icp_hv_eoi,
+	.set_priority	= icp_hv_set_cpu_priority,
+	.teardown_cpu	= icp_hv_teardown_cpu,
+	.flush_ipi	= icp_hv_flush_ipi,
+#ifdef CONFIG_SMP
+	.ipi_action	= icp_hv_ipi_action,
+	.cause_ipi	= icp_hv_cause_ipi,
+#endif
+};
+
+int __init icp_hv_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xicp");
+	if (!np)
+		np = of_find_node_by_type(NULL,
+				    "PowerPC-External-Interrupt-Presentation");
+	if (!np)
+		return -ENODEV;
+
+	icp_ops = &icp_hv_ops;
+
+	of_node_put(np);
+	return 0;
+}
+
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
new file mode 100644
index 0000000000..f6ec6dba92
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2011 IBM Corporation.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/kvm_ppc.h>
+#include <asm/dbell.h>
+
+struct icp_ipl {
+	union {
+		u32 word;
+		u8 bytes[4];
+	} xirr_poll;
+	union {
+		u32 word;
+		u8 bytes[4];
+	} xirr;
+	u32 dummy;
+	union {
+		u32 word;
+		u8 bytes[4];
+	} qirr;
+	u32 link_a;
+	u32 link_b;
+	u32 link_c;
+};
+
+static struct icp_ipl __iomem *icp_native_regs[NR_CPUS];
+
+static inline unsigned int icp_native_get_xirr(void)
+{
+	int cpu = smp_processor_id();
+	unsigned int xirr;
+
+	/* Handled an interrupt latched by KVM */
+	xirr = kvmppc_get_xics_latch();
+	if (xirr)
+		return xirr;
+
+	return in_be32(&icp_native_regs[cpu]->xirr.word);
+}
+
+static inline void icp_native_set_xirr(unsigned int value)
+{
+	int cpu = smp_processor_id();
+
+	out_be32(&icp_native_regs[cpu]->xirr.word, value);
+}
+
+static inline void icp_native_set_cppr(u8 value)
+{
+	int cpu = smp_processor_id();
+
+	out_8(&icp_native_regs[cpu]->xirr.bytes[0], value);
+}
+
+static inline void icp_native_set_qirr(int n_cpu, u8 value)
+{
+	out_8(&icp_native_regs[n_cpu]->qirr.bytes[0], value);
+}
+
+static void icp_native_set_cpu_priority(unsigned char cppr)
+{
+	xics_set_base_cppr(cppr);
+	icp_native_set_cppr(cppr);
+	iosync();
+}
+
+void icp_native_eoi(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	iosync();
+	icp_native_set_xirr((xics_pop_cppr() << 24) | hw_irq);
+}
+
+static void icp_native_teardown_cpu(void)
+{
+	int cpu = smp_processor_id();
+
+	/* Clear any pending IPI */
+	icp_native_set_qirr(cpu, 0xff);
+}
+
+static void icp_native_flush_ipi(void)
+{
+	/* We take the ipi irq but and never return so we
+	 * need to EOI the IPI, but want to leave our priority 0
+	 *
+	 * should we check all the other interrupts too?
+	 * should we be flagging idle loop instead?
+	 * or creating some task to be scheduled?
+	 */
+
+	icp_native_set_xirr((0x00 << 24) | XICS_IPI);
+}
+
+static unsigned int icp_native_get_irq(void)
+{
+	unsigned int xirr = icp_native_get_xirr();
+	unsigned int vec = xirr & 0x00ffffff;
+	unsigned int irq;
+
+	if (vec == XICS_IRQ_SPURIOUS)
+		return 0;
+
+	irq = irq_find_mapping(xics_host, vec);
+	if (likely(irq)) {
+		xics_push_cppr(vec);
+		return irq;
+	}
+
+	/* We don't have a linux mapping, so have rtas mask it. */
+	xics_mask_unknown_vec(vec);
+
+	/* We might learn about it later, so EOI it */
+	icp_native_set_xirr(xirr);
+
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+
+static void icp_native_cause_ipi(int cpu)
+{
+	kvmppc_set_host_ipi(cpu);
+	icp_native_set_qirr(cpu, IPI_PRIORITY);
+}
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void icp_native_cause_ipi_rm(int cpu)
+{
+	/*
+	 * Currently not used to send IPIs to another CPU
+	 * on the same core. Only caller is KVM real mode.
+	 * Need the physical address of the XICS to be
+	 * previously saved in kvm_hstate in the paca.
+	 */
+	void __iomem *xics_phys;
+
+	/*
+	 * Just like the cause_ipi functions, it is required to
+	 * include a full barrier before causing the IPI.
+	 */
+	xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys;
+	mb();
+	__raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR);
+}
+#endif
+
+/*
+ * Called when an interrupt is received on an off-line CPU to
+ * clear the interrupt, so that the CPU can go back to nap mode.
+ */
+void icp_native_flush_interrupt(void)
+{
+	unsigned int xirr = icp_native_get_xirr();
+	unsigned int vec = xirr & 0x00ffffff;
+
+	if (vec == XICS_IRQ_SPURIOUS)
+		return;
+	if (vec == XICS_IPI) {
+		/* Clear pending IPI */
+		int cpu = smp_processor_id();
+		kvmppc_clear_host_ipi(cpu);
+		icp_native_set_qirr(cpu, 0xff);
+	} else {
+		pr_err("XICS: hw interrupt 0x%x to offline cpu, disabling\n",
+		       vec);
+		xics_mask_unknown_vec(vec);
+	}
+	/* EOI the interrupt */
+	icp_native_set_xirr(xirr);
+}
+
+void xics_wake_cpu(int cpu)
+{
+	icp_native_set_qirr(cpu, IPI_PRIORITY);
+}
+EXPORT_SYMBOL_GPL(xics_wake_cpu);
+
+static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	kvmppc_clear_host_ipi(cpu);
+	icp_native_set_qirr(cpu, 0xff);
+
+	return smp_ipi_demux();
+}
+
+#endif /* CONFIG_SMP */
+
+static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr,
+					 unsigned long size)
+{
+	char *rname;
+	int i, cpu = -1;
+
+	/* This may look gross but it's good enough for now, we don't quite
+	 * have a hard -> linux processor id matching.
+	 */
+	for_each_possible_cpu(i) {
+		if (!cpu_present(i))
+			continue;
+		if (hw_id == get_hard_smp_processor_id(i)) {
+			cpu = i;
+			break;
+		}
+	}
+
+	/* Fail, skip that CPU. Don't print, it's normal, some XICS come up
+	 * with way more entries in there than you have CPUs
+	 */
+	if (cpu == -1)
+		return 0;
+
+	rname = kasprintf(GFP_KERNEL, "CPU %d [0x%x] Interrupt Presentation",
+			  cpu, hw_id);
+
+	if (!request_mem_region(addr, size, rname)) {
+		pr_warn("icp_native: Could not reserve ICP MMIO for CPU %d, interrupt server #0x%x\n",
+			cpu, hw_id);
+		return -EBUSY;
+	}
+
+	icp_native_regs[cpu] = ioremap(addr, size);
+	kvmppc_set_xics_phys(cpu, addr);
+	if (!icp_native_regs[cpu]) {
+		pr_warn("icp_native: Failed ioremap for CPU %d, interrupt server #0x%x, addr %#lx\n",
+			cpu, hw_id, addr);
+		release_mem_region(addr, size);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static int __init icp_native_init_one_node(struct device_node *np,
+					   unsigned int *indx)
+{
+	unsigned int ilen;
+	const __be32 *ireg;
+	int i;
+	int num_reg;
+	int num_servers = 0;
+
+	/* This code does the theorically broken assumption that the interrupt
+	 * server numbers are the same as the hard CPU numbers.
+	 * This happens to be the case so far but we are playing with fire...
+	 * should be fixed one of these days. -BenH.
+	 */
+	ireg = of_get_property(np, "ibm,interrupt-server-ranges", &ilen);
+
+	/* Do that ever happen ? we'll know soon enough... but even good'old
+	 * f80 does have that property ..
+	 */
+	WARN_ON((ireg == NULL) || (ilen != 2*sizeof(u32)));
+
+	if (ireg) {
+		*indx = of_read_number(ireg, 1);
+		if (ilen >= 2*sizeof(u32))
+			num_servers = of_read_number(ireg + 1, 1);
+	}
+
+	num_reg = of_address_count(np);
+	if (num_servers && (num_servers != num_reg)) {
+		pr_err("icp_native: ICP reg len (%d) != num servers (%d)",
+		       num_reg, num_servers);
+		return -1;
+	}
+
+	for (i = 0; i < num_reg; i++) {
+		struct resource r;
+		int err;
+
+		err = of_address_to_resource(np, i, &r);
+		if (err) {
+			pr_err("icp_native: Could not translate ICP MMIO"
+			       " for interrupt server 0x%x (%d)\n", *indx, err);
+			return -1;
+		}
+
+		if (icp_native_map_one_cpu(*indx, r.start, resource_size(&r)))
+			return -1;
+
+		(*indx)++;
+	}
+	return 0;
+}
+
+static const struct icp_ops icp_native_ops = {
+	.get_irq	= icp_native_get_irq,
+	.eoi		= icp_native_eoi,
+	.set_priority	= icp_native_set_cpu_priority,
+	.teardown_cpu	= icp_native_teardown_cpu,
+	.flush_ipi	= icp_native_flush_ipi,
+#ifdef CONFIG_SMP
+	.ipi_action	= icp_native_ipi_action,
+	.cause_ipi	= icp_native_cause_ipi,
+#endif
+};
+
+int __init icp_native_init(void)
+{
+	struct device_node *np;
+	u32 indx = 0;
+	int found = 0;
+
+	for_each_compatible_node(np, NULL, "ibm,ppc-xicp")
+		if (icp_native_init_one_node(np, &indx) == 0)
+			found = 1;
+	if (!found) {
+		for_each_node_by_type(np,
+			"PowerPC-External-Interrupt-Presentation") {
+				if (icp_native_init_one_node(np, &indx) == 0)
+					found = 1;
+		}
+	}
+
+	if (found == 0)
+		return -ENODEV;
+
+	icp_ops = &icp_native_ops;
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
new file mode 100644
index 0000000000..4dae624b9f
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2016 IBM Corporation.
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/io.h>
+#include <asm/opal.h>
+#include <asm/kvm_ppc.h>
+
+static void icp_opal_teardown_cpu(void)
+{
+	int hw_cpu = hard_smp_processor_id();
+
+	/* Clear any pending IPI */
+	opal_int_set_mfrr(hw_cpu, 0xff);
+}
+
+static void icp_opal_flush_ipi(void)
+{
+	/*
+	 * We take the ipi irq but and never return so we need to EOI the IPI,
+	 * but want to leave our priority 0.
+	 *
+	 * Should we check all the other interrupts too?
+	 * Should we be flagging idle loop instead?
+	 * Or creating some task to be scheduled?
+	 */
+	if (opal_int_eoi((0x00 << 24) | XICS_IPI) > 0)
+		force_external_irq_replay();
+}
+
+static unsigned int icp_opal_get_xirr(void)
+{
+	unsigned int kvm_xirr;
+	__be32 hw_xirr;
+	int64_t rc;
+
+	/* Handle an interrupt latched by KVM first */
+	kvm_xirr = kvmppc_get_xics_latch();
+	if (kvm_xirr)
+		return kvm_xirr;
+
+	/* Then ask OPAL */
+	rc = opal_int_get_xirr(&hw_xirr, false);
+	if (rc < 0)
+		return 0;
+	return be32_to_cpu(hw_xirr);
+}
+
+static unsigned int icp_opal_get_irq(void)
+{
+	unsigned int xirr;
+	unsigned int vec;
+	unsigned int irq;
+
+	xirr = icp_opal_get_xirr();
+	vec = xirr & 0x00ffffff;
+	if (vec == XICS_IRQ_SPURIOUS)
+		return 0;
+
+	irq = irq_find_mapping(xics_host, vec);
+	if (likely(irq)) {
+		xics_push_cppr(vec);
+		return irq;
+	}
+
+	/* We don't have a linux mapping, so have rtas mask it. */
+	xics_mask_unknown_vec(vec);
+
+	/* We might learn about it later, so EOI it */
+	if (opal_int_eoi(xirr) > 0)
+		force_external_irq_replay();
+
+	return 0;
+}
+
+static void icp_opal_set_cpu_priority(unsigned char cppr)
+{
+	/*
+	 * Here be dragons. The caller has asked to allow only IPI's and not
+	 * external interrupts. But OPAL XIVE doesn't support that. So instead
+	 * of allowing no interrupts allow all. That's still not right, but
+	 * currently the only caller who does this is xics_migrate_irqs_away()
+	 * and it works in that case.
+	 */
+	if (cppr >= DEFAULT_PRIORITY)
+		cppr = LOWEST_PRIORITY;
+
+	xics_set_base_cppr(cppr);
+	opal_int_set_cppr(cppr);
+	iosync();
+}
+
+static void icp_opal_eoi(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int64_t rc;
+
+	iosync();
+	rc = opal_int_eoi((xics_pop_cppr() << 24) | hw_irq);
+
+	/*
+	 * EOI tells us whether there are more interrupts to fetch.
+	 *
+	 * Some HW implementations might not be able to send us another
+	 * external interrupt in that case, so we force a replay.
+	 */
+	if (rc > 0)
+		force_external_irq_replay();
+}
+
+#ifdef CONFIG_SMP
+
+static void icp_opal_cause_ipi(int cpu)
+{
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+
+	kvmppc_set_host_ipi(cpu);
+	opal_int_set_mfrr(hw_cpu, IPI_PRIORITY);
+}
+
+static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	kvmppc_clear_host_ipi(cpu);
+	opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
+
+	return smp_ipi_demux();
+}
+
+/*
+ * Called when an interrupt is received on an off-line CPU to
+ * clear the interrupt, so that the CPU can go back to nap mode.
+ */
+void icp_opal_flush_interrupt(void)
+{
+	unsigned int xirr;
+	unsigned int vec;
+
+	do {
+		xirr = icp_opal_get_xirr();
+		vec = xirr & 0x00ffffff;
+		if (vec == XICS_IRQ_SPURIOUS)
+			break;
+		if (vec == XICS_IPI) {
+			/* Clear pending IPI */
+			int cpu = smp_processor_id();
+			kvmppc_clear_host_ipi(cpu);
+			opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
+		} else {
+			pr_err("XICS: hw interrupt 0x%x to offline cpu, "
+			       "disabling\n", vec);
+			xics_mask_unknown_vec(vec);
+		}
+
+		/* EOI the interrupt */
+	} while (opal_int_eoi(xirr) > 0);
+}
+
+#endif /* CONFIG_SMP */
+
+static const struct icp_ops icp_opal_ops = {
+	.get_irq	= icp_opal_get_irq,
+	.eoi		= icp_opal_eoi,
+	.set_priority	= icp_opal_set_cpu_priority,
+	.teardown_cpu	= icp_opal_teardown_cpu,
+	.flush_ipi	= icp_opal_flush_ipi,
+#ifdef CONFIG_SMP
+	.ipi_action	= icp_opal_ipi_action,
+	.cause_ipi	= icp_opal_cause_ipi,
+#endif
+};
+
+int __init icp_opal_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
+	if (!np)
+		return -ENODEV;
+
+	icp_ops = &icp_opal_ops;
+
+	printk("XICS: Using OPAL ICP fallbacks\n");
+
+	of_node_put(np);
+	return 0;
+}
+
diff --git a/arch/powerpc/sysdev/xics/ics-native.c b/arch/powerpc/sysdev/xics/ics-native.c
new file mode 100644
index 0000000000..112c8a1e81
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/ics-native.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ICS backend for OPAL managed interrupts.
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+//#define DEBUG
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/spinlock.h>
+#include <linux/msi.h>
+#include <linux/list.h>
+
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/opal.h>
+#include <asm/firmware.h>
+
+struct ics_native {
+	struct ics		ics;
+	struct device_node	*node;
+	void __iomem    	*base;
+	u32             	ibase;
+	u32             	icount;
+};
+#define to_ics_native(_ics)     container_of(_ics, struct ics_native, ics)
+
+static void __iomem *ics_native_xive(struct ics_native *in, unsigned int vec)
+{
+	return in->base + 0x800 + ((vec - in->ibase) << 2);
+}
+
+static void ics_native_unmask_irq(struct irq_data *d)
+{
+	unsigned int vec = (unsigned int)irqd_to_hwirq(d);
+	struct ics *ics = irq_data_get_irq_chip_data(d);
+	struct ics_native *in = to_ics_native(ics);
+	unsigned int server;
+
+	pr_devel("ics-native: unmask virq %d [hw 0x%x]\n", d->irq, vec);
+
+	if (vec < in->ibase || vec >= (in->ibase + in->icount))
+		return;
+
+	server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0);
+	out_be32(ics_native_xive(in, vec), (server << 8) | DEFAULT_PRIORITY);
+}
+
+static unsigned int ics_native_startup(struct irq_data *d)
+{
+#ifdef CONFIG_PCI_MSI
+	/*
+	 * The generic MSI code returns with the interrupt disabled on the
+	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
+	 * at that level, so we do it here by hand.
+	 */
+	if (irq_data_get_msi_desc(d))
+		pci_msi_unmask_irq(d);
+#endif
+
+	/* unmask it */
+	ics_native_unmask_irq(d);
+	return 0;
+}
+
+static void ics_native_do_mask(struct ics_native *in, unsigned int vec)
+{
+	out_be32(ics_native_xive(in, vec), 0xff);
+}
+
+static void ics_native_mask_irq(struct irq_data *d)
+{
+	unsigned int vec = (unsigned int)irqd_to_hwirq(d);
+	struct ics *ics = irq_data_get_irq_chip_data(d);
+	struct ics_native *in = to_ics_native(ics);
+
+	pr_devel("ics-native: mask virq %d [hw 0x%x]\n", d->irq, vec);
+
+	if (vec < in->ibase || vec >= (in->ibase + in->icount))
+		return;
+	ics_native_do_mask(in, vec);
+}
+
+static int ics_native_set_affinity(struct irq_data *d,
+				   const struct cpumask *cpumask,
+				   bool force)
+{
+	unsigned int vec = (unsigned int)irqd_to_hwirq(d);
+	struct ics *ics = irq_data_get_irq_chip_data(d);
+	struct ics_native *in = to_ics_native(ics);
+	int server;
+	u32 xive;
+
+	if (vec < in->ibase || vec >= (in->ibase + in->icount))
+		return -EINVAL;
+
+	server = xics_get_irq_server(d->irq, cpumask, 1);
+	if (server == -1) {
+		pr_warn("%s: No online cpus in the mask %*pb for irq %d\n",
+			__func__, cpumask_pr_args(cpumask), d->irq);
+		return -1;
+	}
+
+	xive = in_be32(ics_native_xive(in, vec));
+	xive = (xive & 0xff) | (server << 8);
+	out_be32(ics_native_xive(in, vec), xive);
+
+	return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip ics_native_irq_chip = {
+	.name = "ICS",
+	.irq_startup		= ics_native_startup,
+	.irq_mask		= ics_native_mask_irq,
+	.irq_unmask		= ics_native_unmask_irq,
+	.irq_eoi		= NULL, /* Patched at init time */
+	.irq_set_affinity 	= ics_native_set_affinity,
+	.irq_set_type		= xics_set_irq_type,
+	.irq_retrigger		= xics_retrigger,
+};
+
+static int ics_native_check(struct ics *ics, unsigned int hw_irq)
+{
+	struct ics_native *in = to_ics_native(ics);
+
+	pr_devel("%s: hw_irq=0x%x\n", __func__, hw_irq);
+
+	if (hw_irq < in->ibase || hw_irq >= (in->ibase + in->icount))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void ics_native_mask_unknown(struct ics *ics, unsigned long vec)
+{
+	struct ics_native *in = to_ics_native(ics);
+
+	if (vec < in->ibase || vec >= (in->ibase + in->icount))
+		return;
+
+	ics_native_do_mask(in, vec);
+}
+
+static long ics_native_get_server(struct ics *ics, unsigned long vec)
+{
+	struct ics_native *in = to_ics_native(ics);
+	u32 xive;
+
+	if (vec < in->ibase || vec >= (in->ibase + in->icount))
+		return -EINVAL;
+
+	xive = in_be32(ics_native_xive(in, vec));
+	return (xive >> 8) & 0xfff;
+}
+
+static int ics_native_host_match(struct ics *ics, struct device_node *node)
+{
+	struct ics_native *in = to_ics_native(ics);
+
+	return in->node == node;
+}
+
+static struct ics ics_native_template = {
+	.check		= ics_native_check,
+	.mask_unknown	= ics_native_mask_unknown,
+	.get_server	= ics_native_get_server,
+	.host_match	= ics_native_host_match,
+	.chip = &ics_native_irq_chip,
+};
+
+static int __init ics_native_add_one(struct device_node *np)
+{
+	struct ics_native *ics;
+	u32 ranges[2];
+	int rc, count;
+
+	ics = kzalloc(sizeof(struct ics_native), GFP_KERNEL);
+	if (!ics)
+		return -ENOMEM;
+	ics->node = of_node_get(np);
+	memcpy(&ics->ics, &ics_native_template, sizeof(struct ics));
+
+	ics->base = of_iomap(np, 0);
+	if (!ics->base) {
+		pr_err("Failed to map %pOFP\n", np);
+		rc = -ENOMEM;
+		goto fail;
+	}
+
+	count = of_property_count_u32_elems(np, "interrupt-ranges");
+	if (count < 2 || count & 1) {
+		pr_err("Failed to read interrupt-ranges of %pOFP\n", np);
+		rc = -EINVAL;
+		goto fail;
+	}
+	if (count > 2) {
+		pr_warn("ICS %pOFP has %d ranges, only one supported\n",
+			np, count >> 1);
+	}
+	rc = of_property_read_u32_array(np, "interrupt-ranges",
+					ranges, 2);
+	if (rc) {
+		pr_err("Failed to read interrupt-ranges of %pOFP\n", np);
+		goto fail;
+	}
+	ics->ibase = ranges[0];
+	ics->icount = ranges[1];
+
+	pr_info("ICS native initialized for sources %d..%d\n",
+		ics->ibase, ics->ibase + ics->icount - 1);
+
+	/* Register ourselves */
+	xics_register_ics(&ics->ics);
+
+	return 0;
+fail:
+	of_node_put(ics->node);
+	kfree(ics);
+	return rc;
+}
+
+int __init ics_native_init(void)
+{
+	struct device_node *ics;
+	bool found_one = false;
+
+	/* We need to patch our irq chip's EOI to point to the
+	 * right ICP
+	 */
+	ics_native_irq_chip.irq_eoi = icp_ops->eoi;
+
+	/* Find native ICS in the device-tree */
+	for_each_compatible_node(ics, NULL, "openpower,xics-sources") {
+		if (ics_native_add_one(ics) == 0)
+			found_one = true;
+	}
+
+	if (found_one)
+		pr_info("ICS native backend registered\n");
+
+	return found_one ? 0 : -ENODEV;
+}
diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c
new file mode 100644
index 0000000000..5fe73dabab
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/ics-opal.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ICS backend for OPAL managed interrupts.
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+#undef DEBUG
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/msi.h>
+
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/opal.h>
+#include <asm/firmware.h>
+
+static int ics_opal_mangle_server(int server)
+{
+	/* No link for now */
+	return server << 2;
+}
+
+static int ics_opal_unmangle_server(int server)
+{
+	/* No link for now */
+	return server >> 2;
+}
+
+static void ics_opal_unmask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int64_t rc;
+	int server;
+
+	pr_devel("ics-hal: unmask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+
+	server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0);
+	server = ics_opal_mangle_server(server);
+
+	rc = opal_set_xive(hw_irq, server, DEFAULT_PRIORITY);
+	if (rc != OPAL_SUCCESS)
+		pr_err("%s: opal_set_xive(irq=%d [hw 0x%x] server=%x)"
+		       " error %lld\n",
+		       __func__, d->irq, hw_irq, server, rc);
+}
+
+static unsigned int ics_opal_startup(struct irq_data *d)
+{
+	ics_opal_unmask_irq(d);
+	return 0;
+}
+
+static void ics_opal_mask_real_irq(unsigned int hw_irq)
+{
+	int server = ics_opal_mangle_server(xics_default_server);
+	int64_t rc;
+
+	if (hw_irq == XICS_IPI)
+		return;
+
+	/* Have to set XIVE to 0xff to be able to remove a slot */
+	rc = opal_set_xive(hw_irq, server, 0xff);
+	if (rc != OPAL_SUCCESS)
+		pr_err("%s: opal_set_xive(0xff) irq=%u returned %lld\n",
+		       __func__, hw_irq, rc);
+}
+
+static void ics_opal_mask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	pr_devel("ics-hal: mask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+	ics_opal_mask_real_irq(hw_irq);
+}
+
+static int ics_opal_set_affinity(struct irq_data *d,
+				 const struct cpumask *cpumask,
+				 bool force)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	__be16 oserver;
+	int16_t server;
+	int8_t priority;
+	int64_t rc;
+	int wanted_server;
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return -1;
+
+	rc = opal_get_xive(hw_irq, &oserver, &priority);
+	if (rc != OPAL_SUCCESS) {
+		pr_err("%s: opal_get_xive(irq=%d [hw 0x%x]) error %lld\n",
+		       __func__, d->irq, hw_irq, rc);
+		return -1;
+	}
+
+	wanted_server = xics_get_irq_server(d->irq, cpumask, 1);
+	if (wanted_server < 0) {
+		pr_warn("%s: No online cpus in the mask %*pb for irq %d\n",
+			__func__, cpumask_pr_args(cpumask), d->irq);
+		return -1;
+	}
+	server = ics_opal_mangle_server(wanted_server);
+
+	pr_debug("ics-hal: set-affinity irq %d [hw 0x%x] server: 0x%x/0x%x\n",
+		 d->irq, hw_irq, wanted_server, server);
+
+	rc = opal_set_xive(hw_irq, server, priority);
+	if (rc != OPAL_SUCCESS) {
+		pr_err("%s: opal_set_xive(irq=%d [hw 0x%x] server=%x)"
+		       " error %lld\n",
+		       __func__, d->irq, hw_irq, server, rc);
+		return -1;
+	}
+	return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip ics_opal_irq_chip = {
+	.name = "OPAL ICS",
+	.irq_startup = ics_opal_startup,
+	.irq_mask = ics_opal_mask_irq,
+	.irq_unmask = ics_opal_unmask_irq,
+	.irq_eoi = NULL, /* Patched at init time */
+	.irq_set_affinity = ics_opal_set_affinity,
+	.irq_set_type = xics_set_irq_type,
+	.irq_retrigger = xics_retrigger,
+};
+
+static int ics_opal_host_match(struct ics *ics, struct device_node *node)
+{
+	return 1;
+}
+
+static int ics_opal_check(struct ics *ics, unsigned int hw_irq)
+{
+	int64_t rc;
+	__be16 server;
+	int8_t priority;
+
+	if (WARN_ON(hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS))
+		return -EINVAL;
+
+	/* Check if HAL knows about this interrupt */
+	rc = opal_get_xive(hw_irq, &server, &priority);
+	if (rc != OPAL_SUCCESS)
+		return -ENXIO;
+
+	return 0;
+}
+
+static void ics_opal_mask_unknown(struct ics *ics, unsigned long vec)
+{
+	int64_t rc;
+	__be16 server;
+	int8_t priority;
+
+	/* Check if HAL knows about this interrupt */
+	rc = opal_get_xive(vec, &server, &priority);
+	if (rc != OPAL_SUCCESS)
+		return;
+
+	ics_opal_mask_real_irq(vec);
+}
+
+static long ics_opal_get_server(struct ics *ics, unsigned long vec)
+{
+	int64_t rc;
+	__be16 server;
+	int8_t priority;
+
+	/* Check if HAL knows about this interrupt */
+	rc = opal_get_xive(vec, &server, &priority);
+	if (rc != OPAL_SUCCESS)
+		return -1;
+	return ics_opal_unmangle_server(be16_to_cpu(server));
+}
+
+/* Only one global & state struct ics */
+static struct ics ics_hal = {
+	.check		= ics_opal_check,
+	.mask_unknown	= ics_opal_mask_unknown,
+	.get_server	= ics_opal_get_server,
+	.host_match	= ics_opal_host_match,
+	.chip		= &ics_opal_irq_chip,
+};
+
+int __init ics_opal_init(void)
+{
+	if (!firmware_has_feature(FW_FEATURE_OPAL))
+		return -ENODEV;
+
+	/* We need to patch our irq chip's EOI to point to the
+	 * right ICP
+	 */
+	ics_opal_irq_chip.irq_eoi = icp_ops->eoi;
+
+	/* Register ourselves */
+	xics_register_ics(&ics_hal);
+
+	pr_info("ICS OPAL backend registered\n");
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
new file mode 100644
index 0000000000..b772a833d9
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/msi.h>
+
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/rtas.h>
+
+/* RTAS service tokens */
+static int ibm_get_xive;
+static int ibm_set_xive;
+static int ibm_int_on;
+static int ibm_int_off;
+
+static void ics_rtas_unmask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int call_status;
+	int server;
+
+	pr_devel("xics: unmask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+
+	server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0);
+
+	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server,
+				DEFAULT_PRIORITY);
+	if (call_status != 0) {
+		printk(KERN_ERR
+			"%s: ibm_set_xive irq %u server %x returned %d\n",
+			__func__, hw_irq, server, call_status);
+		return;
+	}
+
+	/* Now unmask the interrupt (often a no-op) */
+	call_status = rtas_call(ibm_int_on, 1, 1, NULL, hw_irq);
+	if (call_status != 0) {
+		printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n",
+			__func__, hw_irq, call_status);
+		return;
+	}
+}
+
+static unsigned int ics_rtas_startup(struct irq_data *d)
+{
+	/* unmask it */
+	ics_rtas_unmask_irq(d);
+	return 0;
+}
+
+static void ics_rtas_mask_real_irq(unsigned int hw_irq)
+{
+	int call_status;
+
+	if (hw_irq == XICS_IPI)
+		return;
+
+	call_status = rtas_call(ibm_int_off, 1, 1, NULL, hw_irq);
+	if (call_status != 0) {
+		printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n",
+			__func__, hw_irq, call_status);
+		return;
+	}
+
+	/* Have to set XIVE to 0xff to be able to remove a slot */
+	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq,
+				xics_default_server, 0xff);
+	if (call_status != 0) {
+		printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n",
+			__func__, hw_irq, call_status);
+		return;
+	}
+}
+
+static void ics_rtas_mask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	pr_devel("xics: mask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+	ics_rtas_mask_real_irq(hw_irq);
+}
+
+static int ics_rtas_set_affinity(struct irq_data *d,
+				 const struct cpumask *cpumask,
+				 bool force)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int status;
+	int xics_status[2];
+	int irq_server;
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return -1;
+
+	status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq);
+
+	if (status) {
+		printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
+			__func__, hw_irq, status);
+		return -1;
+	}
+
+	irq_server = xics_get_irq_server(d->irq, cpumask, 1);
+	if (irq_server == -1) {
+		pr_warn("%s: No online cpus in the mask %*pb for irq %d\n",
+			__func__, cpumask_pr_args(cpumask), d->irq);
+		return -1;
+	}
+
+	pr_debug("%s: irq %d [hw 0x%x] server: 0x%x\n", __func__, d->irq,
+		 hw_irq, irq_server);
+
+	status = rtas_call(ibm_set_xive, 3, 1, NULL,
+			   hw_irq, irq_server, xics_status[1]);
+
+	if (status) {
+		printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
+			__func__, hw_irq, status);
+		return -1;
+	}
+
+	return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip ics_rtas_irq_chip = {
+	.name = "XICS",
+	.irq_startup = ics_rtas_startup,
+	.irq_mask = ics_rtas_mask_irq,
+	.irq_unmask = ics_rtas_unmask_irq,
+	.irq_eoi = NULL, /* Patched at init time */
+	.irq_set_affinity = ics_rtas_set_affinity,
+	.irq_set_type = xics_set_irq_type,
+	.irq_retrigger = xics_retrigger,
+};
+
+static int ics_rtas_check(struct ics *ics, unsigned int hw_irq)
+{
+	int status[2];
+	int rc;
+
+	if (WARN_ON(hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS))
+		return -EINVAL;
+
+	/* Check if RTAS knows about this interrupt */
+	rc = rtas_call(ibm_get_xive, 1, 3, status, hw_irq);
+	if (rc)
+		return -ENXIO;
+
+	return 0;
+}
+
+static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec)
+{
+	ics_rtas_mask_real_irq(vec);
+}
+
+static long ics_rtas_get_server(struct ics *ics, unsigned long vec)
+{
+	int rc, status[2];
+
+	rc = rtas_call(ibm_get_xive, 1, 3, status, vec);
+	if (rc)
+		return -1;
+	return status[0];
+}
+
+static int ics_rtas_host_match(struct ics *ics, struct device_node *node)
+{
+	/* IBM machines have interrupt parents of various funky types for things
+	 * like vdevices, events, etc... The trick we use here is to match
+	 * everything here except the legacy 8259 which is compatible "chrp,iic"
+	 */
+	return !of_device_is_compatible(node, "chrp,iic");
+}
+
+/* Only one global & state struct ics */
+static struct ics ics_rtas = {
+	.check		= ics_rtas_check,
+	.mask_unknown	= ics_rtas_mask_unknown,
+	.get_server	= ics_rtas_get_server,
+	.host_match	= ics_rtas_host_match,
+	.chip = &ics_rtas_irq_chip,
+};
+
+__init int ics_rtas_init(void)
+{
+	ibm_get_xive = rtas_function_token(RTAS_FN_IBM_GET_XIVE);
+	ibm_set_xive = rtas_function_token(RTAS_FN_IBM_SET_XIVE);
+	ibm_int_on  = rtas_function_token(RTAS_FN_IBM_INT_ON);
+	ibm_int_off = rtas_function_token(RTAS_FN_IBM_INT_OFF);
+
+	/* We enable the RTAS "ICS" if RTAS is present with the
+	 * appropriate tokens
+	 */
+	if (ibm_get_xive == RTAS_UNKNOWN_SERVICE ||
+	    ibm_set_xive == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	/* We need to patch our irq chip's EOI to point to the
+	 * right ICP
+	 */
+	ics_rtas_irq_chip.irq_eoi = icp_ops->eoi;
+
+	/* Register ourselves */
+	xics_register_ics(&ics_rtas);
+
+	return 0;
+}
+
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
new file mode 100644
index 0000000000..d3a4156e87
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -0,0 +1,544 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2011 IBM Corporation.
+ */
+#include <linux/types.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/debugfs.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/rtas.h>
+#include <asm/xics.h>
+#include <asm/firmware.h>
+
+/* Globals common to all ICP/ICS implementations */
+const struct icp_ops	*icp_ops;
+
+unsigned int xics_default_server		= 0xff;
+unsigned int xics_default_distrib_server	= 0;
+unsigned int xics_interrupt_server_size		= 8;
+
+DEFINE_PER_CPU(struct xics_cppr, xics_cppr);
+
+struct irq_domain *xics_host;
+
+static struct ics *xics_ics;
+
+void xics_update_irq_servers(void)
+{
+	int i, j;
+	struct device_node *np;
+	u32 ilen;
+	const __be32 *ireg;
+	u32 hcpuid;
+
+	/* Find the server numbers for the boot cpu. */
+	np = of_get_cpu_node(boot_cpuid, NULL);
+	BUG_ON(!np);
+
+	hcpuid = get_hard_smp_processor_id(boot_cpuid);
+	xics_default_server = xics_default_distrib_server = hcpuid;
+
+	pr_devel("xics: xics_default_server = 0x%x\n", xics_default_server);
+
+	ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen);
+	if (!ireg) {
+		of_node_put(np);
+		return;
+	}
+
+	i = ilen / sizeof(int);
+
+	/* Global interrupt distribution server is specified in the last
+	 * entry of "ibm,ppc-interrupt-gserver#s" property. Get the last
+	 * entry fom this property for current boot cpu id and use it as
+	 * default distribution server
+	 */
+	for (j = 0; j < i; j += 2) {
+		if (be32_to_cpu(ireg[j]) == hcpuid) {
+			xics_default_distrib_server = be32_to_cpu(ireg[j+1]);
+			break;
+		}
+	}
+	pr_devel("xics: xics_default_distrib_server = 0x%x\n",
+		 xics_default_distrib_server);
+	of_node_put(np);
+}
+
+/* GIQ stuff, currently only supported on RTAS setups, will have
+ * to be sorted properly for bare metal
+ */
+void xics_set_cpu_giq(unsigned int gserver, unsigned int join)
+{
+#ifdef CONFIG_PPC_RTAS
+	int index;
+	int status;
+
+	if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL))
+		return;
+
+	index = (1UL << xics_interrupt_server_size) - 1 - gserver;
+
+	status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join);
+
+	WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n",
+	     GLOBAL_INTERRUPT_QUEUE, index, join, status);
+#endif
+}
+
+void xics_setup_cpu(void)
+{
+	icp_ops->set_priority(LOWEST_PRIORITY);
+
+	xics_set_cpu_giq(xics_default_distrib_server, 1);
+}
+
+void xics_mask_unknown_vec(unsigned int vec)
+{
+	pr_err("Interrupt 0x%x (real) is invalid, disabling it.\n", vec);
+
+	if (WARN_ON(!xics_ics))
+		return;
+	xics_ics->mask_unknown(xics_ics, vec);
+}
+
+
+#ifdef CONFIG_SMP
+
+static void __init xics_request_ipi(void)
+{
+	unsigned int ipi;
+
+	ipi = irq_create_mapping(xics_host, XICS_IPI);
+	BUG_ON(!ipi);
+
+	/*
+	 * IPIs are marked IRQF_PERCPU. The handler was set in map.
+	 */
+	BUG_ON(request_irq(ipi, icp_ops->ipi_action,
+			   IRQF_NO_DEBUG | IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL));
+}
+
+void __init xics_smp_probe(void)
+{
+	/* Register all the IPIs */
+	xics_request_ipi();
+
+	/* Setup cause_ipi callback based on which ICP is used */
+	smp_ops->cause_ipi = icp_ops->cause_ipi;
+}
+
+#endif /* CONFIG_SMP */
+
+noinstr void xics_teardown_cpu(void)
+{
+	struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr);
+
+	/*
+	 * we have to reset the cppr index to 0 because we're
+	 * not going to return from the IPI
+	 */
+	os_cppr->index = 0;
+	icp_ops->set_priority(0);
+	icp_ops->teardown_cpu();
+}
+
+noinstr void xics_kexec_teardown_cpu(int secondary)
+{
+	xics_teardown_cpu();
+
+	icp_ops->flush_ipi();
+
+	/*
+	 * Some machines need to have at least one cpu in the GIQ,
+	 * so leave the master cpu in the group.
+	 */
+	if (secondary)
+		xics_set_cpu_giq(xics_default_distrib_server, 0);
+}
+
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* Interrupts are disabled. */
+void xics_migrate_irqs_away(void)
+{
+	int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id();
+	unsigned int irq, virq;
+	struct irq_desc *desc;
+
+	pr_debug("%s: CPU %u\n", __func__, cpu);
+
+	/* If we used to be the default server, move to the new "boot_cpuid" */
+	if (hw_cpu == xics_default_server)
+		xics_update_irq_servers();
+
+	/* Reject any interrupt that was queued to us... */
+	icp_ops->set_priority(0);
+
+	/* Remove ourselves from the global interrupt queue */
+	xics_set_cpu_giq(xics_default_distrib_server, 0);
+
+	for_each_irq_desc(virq, desc) {
+		struct irq_chip *chip;
+		long server;
+		unsigned long flags;
+		struct irq_data *irqd;
+
+		/* We can't set affinity on ISA interrupts */
+		if (virq < NR_IRQS_LEGACY)
+			continue;
+		/* We only need to migrate enabled IRQS */
+		if (!desc->action)
+			continue;
+		/* We need a mapping in the XICS IRQ domain */
+		irqd = irq_domain_get_irq_data(xics_host, virq);
+		if (!irqd)
+			continue;
+		irq = irqd_to_hwirq(irqd);
+		/* We need to get IPIs still. */
+		if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
+			continue;
+		chip = irq_desc_get_chip(desc);
+		if (!chip || !chip->irq_set_affinity)
+			continue;
+
+		raw_spin_lock_irqsave(&desc->lock, flags);
+
+		/* Locate interrupt server */
+		server = xics_ics->get_server(xics_ics, irq);
+		if (server < 0) {
+			pr_err("%s: Can't find server for irq %d/%x\n",
+			       __func__, virq, irq);
+			goto unlock;
+		}
+
+		/* We only support delivery to all cpus or to one cpu.
+		 * The irq has to be migrated only in the single cpu
+		 * case.
+		 */
+		if (server != hw_cpu)
+			goto unlock;
+
+		/* This is expected during cpu offline. */
+		if (cpu_online(cpu))
+			pr_warn("IRQ %u affinity broken off cpu %u\n",
+				virq, cpu);
+
+		/* Reset affinity to all cpus */
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+		irq_set_affinity(virq, cpu_all_mask);
+		continue;
+unlock:
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+	}
+
+	/* Allow "sufficient" time to drop any inflight IRQ's */
+	mdelay(5);
+
+	/*
+	 * Allow IPIs again. This is done at the very end, after migrating all
+	 * interrupts, the expectation is that we'll only get woken up by an IPI
+	 * interrupt beyond this point, but leave externals masked just to be
+	 * safe. If we're using icp-opal this may actually allow all
+	 * interrupts anyway, but that should be OK.
+	 */
+	icp_ops->set_priority(DEFAULT_PRIORITY);
+
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+#ifdef CONFIG_SMP
+/*
+ * For the moment we only implement delivery to all cpus or one cpu.
+ *
+ * If the requested affinity is cpu_all_mask, we set global affinity.
+ * If not we set it to the first cpu in the mask, even if multiple cpus
+ * are set. This is so things like irqbalance (which set core and package
+ * wide affinities) do the right thing.
+ *
+ * We need to fix this to implement support for the links
+ */
+int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
+			unsigned int strict_check)
+{
+
+	if (!distribute_irqs)
+		return xics_default_server;
+
+	if (!cpumask_subset(cpu_possible_mask, cpumask)) {
+		int server = cpumask_first_and(cpu_online_mask, cpumask);
+
+		if (server < nr_cpu_ids)
+			return get_hard_smp_processor_id(server);
+
+		if (strict_check)
+			return -1;
+	}
+
+	/*
+	 * Workaround issue with some versions of JS20 firmware that
+	 * deliver interrupts to cpus which haven't been started. This
+	 * happens when using the maxcpus= boot option.
+	 */
+	if (cpumask_equal(cpu_online_mask, cpu_present_mask))
+		return xics_default_distrib_server;
+
+	return xics_default_server;
+}
+#endif /* CONFIG_SMP */
+
+static int xics_host_match(struct irq_domain *h, struct device_node *node,
+			   enum irq_domain_bus_token bus_token)
+{
+	if (WARN_ON(!xics_ics))
+		return 0;
+	return xics_ics->host_match(xics_ics, node) ? 1 : 0;
+}
+
+/* Dummies */
+static void xics_ipi_unmask(struct irq_data *d) { }
+static void xics_ipi_mask(struct irq_data *d) { }
+
+static struct irq_chip xics_ipi_chip = {
+	.name = "XICS",
+	.irq_eoi = NULL, /* Patched at init time */
+	.irq_mask = xics_ipi_mask,
+	.irq_unmask = xics_ipi_unmask,
+};
+
+static int xics_host_map(struct irq_domain *domain, unsigned int virq,
+			 irq_hw_number_t hwirq)
+{
+	pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hwirq);
+
+	/*
+	 * Mark interrupts as edge sensitive by default so that resend
+	 * actually works. The device-tree parsing will turn the LSIs
+	 * back to level.
+	 */
+	irq_clear_status_flags(virq, IRQ_LEVEL);
+
+	/* Don't call into ICS for IPIs */
+	if (hwirq == XICS_IPI) {
+		irq_set_chip_and_handler(virq, &xics_ipi_chip,
+					 handle_percpu_irq);
+		return 0;
+	}
+
+	if (WARN_ON(!xics_ics))
+		return -EINVAL;
+
+	if (xics_ics->check(xics_ics, hwirq))
+		return -EINVAL;
+
+	/* Let the ICS be the chip data for the XICS domain. For ICS native */
+	irq_domain_set_info(domain, virq, hwirq, xics_ics->chip,
+			    xics_ics, handle_fasteoi_irq, NULL, NULL);
+
+	return 0;
+}
+
+static int xics_host_xlate(struct irq_domain *h, struct device_node *ct,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	*out_hwirq = intspec[0];
+
+	/*
+	 * If intsize is at least 2, we look for the type in the second cell,
+	 * we assume the LSB indicates a level interrupt.
+	 */
+	if (intsize > 1) {
+		if (intspec[1] & 1)
+			*out_flags = IRQ_TYPE_LEVEL_LOW;
+		else
+			*out_flags = IRQ_TYPE_EDGE_RISING;
+	} else
+		*out_flags = IRQ_TYPE_LEVEL_LOW;
+
+	return 0;
+}
+
+int xics_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+	/*
+	 * We only support these. This has really no effect other than setting
+	 * the corresponding descriptor bits mind you but those will in turn
+	 * affect the resend function when re-enabling an edge interrupt.
+	 *
+	 * Set set the default to edge as explained in map().
+	 */
+	if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE)
+		flow_type = IRQ_TYPE_EDGE_RISING;
+
+	if (flow_type != IRQ_TYPE_EDGE_RISING &&
+	    flow_type != IRQ_TYPE_LEVEL_LOW)
+		return -EINVAL;
+
+	irqd_set_trigger_type(d, flow_type);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+int xics_retrigger(struct irq_data *data)
+{
+	/*
+	 * We need to push a dummy CPPR when retriggering, since the subsequent
+	 * EOI will try to pop it. Passing 0 works, as the function hard codes
+	 * the priority value anyway.
+	 */
+	xics_push_cppr(0);
+
+	/* Tell the core to do a soft retrigger */
+	return 0;
+}
+
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+static int xics_host_domain_translate(struct irq_domain *d, struct irq_fwspec *fwspec,
+				      unsigned long *hwirq, unsigned int *type)
+{
+	return xics_host_xlate(d, to_of_node(fwspec->fwnode), fwspec->param,
+			       fwspec->param_count, hwirq, type);
+}
+
+static int xics_host_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs, void *arg)
+{
+	struct irq_fwspec *fwspec = arg;
+	irq_hw_number_t hwirq;
+	unsigned int type = IRQ_TYPE_NONE;
+	int i, rc;
+
+	rc = xics_host_domain_translate(domain, fwspec, &hwirq, &type);
+	if (rc)
+		return rc;
+
+	pr_debug("%s %d/%lx #%d\n", __func__, virq, hwirq, nr_irqs);
+
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_set_info(domain, virq + i, hwirq + i, xics_ics->chip,
+				    xics_ics, handle_fasteoi_irq, NULL, NULL);
+
+	return 0;
+}
+
+static void xics_host_domain_free(struct irq_domain *domain,
+				  unsigned int virq, unsigned int nr_irqs)
+{
+	pr_debug("%s %d #%d\n", __func__, virq, nr_irqs);
+}
+#endif
+
+static const struct irq_domain_ops xics_host_ops = {
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+	.alloc	= xics_host_domain_alloc,
+	.free	= xics_host_domain_free,
+	.translate = xics_host_domain_translate,
+#endif
+	.match = xics_host_match,
+	.map = xics_host_map,
+	.xlate = xics_host_xlate,
+};
+
+static int __init xics_allocate_domain(void)
+{
+	struct fwnode_handle *fn;
+
+	fn = irq_domain_alloc_named_fwnode("XICS");
+	if (!fn)
+		return -ENOMEM;
+
+	xics_host = irq_domain_create_tree(fn, &xics_host_ops, NULL);
+	if (!xics_host) {
+		irq_domain_free_fwnode(fn);
+		return -ENOMEM;
+	}
+
+	irq_set_default_host(xics_host);
+	return 0;
+}
+
+void __init xics_register_ics(struct ics *ics)
+{
+	if (WARN_ONCE(xics_ics, "XICS: Source Controller is already defined !"))
+		return;
+	xics_ics = ics;
+}
+
+static void __init xics_get_server_size(void)
+{
+	struct device_node *np;
+	const __be32 *isize;
+
+	/* We fetch the interrupt server size from the first ICS node
+	 * we find if any
+	 */
+	np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xics");
+	if (!np)
+		return;
+
+	isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
+	if (isize)
+		xics_interrupt_server_size = be32_to_cpu(*isize);
+
+	of_node_put(np);
+}
+
+void __init xics_init(void)
+{
+	int rc = -1;
+
+	/* Fist locate ICP */
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		rc = icp_hv_init();
+	if (rc < 0) {
+		rc = icp_native_init();
+		if (rc == -ENODEV)
+		    rc = icp_opal_init();
+	}
+	if (rc < 0) {
+		pr_warn("XICS: Cannot find a Presentation Controller !\n");
+		return;
+	}
+
+	/* Copy get_irq callback over to ppc_md */
+	ppc_md.get_irq = icp_ops->get_irq;
+
+	/* Patch up IPI chip EOI */
+	xics_ipi_chip.irq_eoi = icp_ops->eoi;
+
+	/* Now locate ICS */
+	rc = ics_rtas_init();
+	if (rc < 0)
+		rc = ics_opal_init();
+	if (rc < 0)
+		rc = ics_native_init();
+	if (rc < 0)
+		pr_warn("XICS: Cannot find a Source Controller !\n");
+
+	/* Initialize common bits */
+	xics_get_server_size();
+	xics_update_irq_servers();
+	rc = xics_allocate_domain();
+	if (rc < 0)
+		pr_err("XICS: Failed to create IRQ domain");
+	xics_setup_cpu();
+}
diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig
new file mode 100644
index 0000000000..785c292d10
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/Kconfig
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_XIVE
+	bool
+	select PPC_SMP_MUXED_IPI
+	select HARDIRQS_SW_RESEND
+
+config PPC_XIVE_NATIVE
+	bool
+	select PPC_XIVE
+	depends on PPC_POWERNV
+
+config PPC_XIVE_SPAPR
+	bool
+	select PPC_XIVE
diff --git a/arch/powerpc/sysdev/xive/Makefile b/arch/powerpc/sysdev/xive/Makefile
new file mode 100644
index 0000000000..e510888389
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-y				+= common.o
+obj-$(CONFIG_PPC_XIVE_NATIVE)	+= native.o
+obj-$(CONFIG_PPC_XIVE_SPAPR)	+= spapr.o
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
new file mode 100644
index 0000000000..a289cb97c1
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -0,0 +1,1864 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2016,2017 IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "xive: " fmt
+
+#include <linux/types.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/debugfs.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/msi.h>
+#include <linux/vmalloc.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/xmon.h>
+
+#include "xive-internal.h"
+
+#undef DEBUG_FLUSH
+#undef DEBUG_ALL
+
+#ifdef DEBUG_ALL
+#define DBG_VERBOSE(fmt, ...)	pr_devel("cpu %d - " fmt, \
+					 smp_processor_id(), ## __VA_ARGS__)
+#else
+#define DBG_VERBOSE(fmt...)	do { } while(0)
+#endif
+
+bool __xive_enabled;
+EXPORT_SYMBOL_GPL(__xive_enabled);
+bool xive_cmdline_disabled;
+
+/* We use only one priority for now */
+static u8 xive_irq_priority;
+
+/* TIMA exported to KVM */
+void __iomem *xive_tima;
+EXPORT_SYMBOL_GPL(xive_tima);
+u32 xive_tima_offset;
+
+/* Backend ops */
+static const struct xive_ops *xive_ops;
+
+/* Our global interrupt domain */
+static struct irq_domain *xive_irq_domain;
+
+#ifdef CONFIG_SMP
+/* The IPIs use the same logical irq number when on the same chip */
+static struct xive_ipi_desc {
+	unsigned int irq;
+	char name[16];
+	atomic_t started;
+} *xive_ipis;
+
+/*
+ * Use early_cpu_to_node() for hot-plugged CPUs
+ */
+static unsigned int xive_ipi_cpu_to_irq(unsigned int cpu)
+{
+	return xive_ipis[early_cpu_to_node(cpu)].irq;
+}
+#endif
+
+/* Xive state for each CPU */
+static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu);
+
+/* An invalid CPU target */
+#define XIVE_INVALID_TARGET	(-1)
+
+/*
+ * Global toggle to switch on/off StoreEOI
+ */
+static bool xive_store_eoi = true;
+
+static bool xive_is_store_eoi(struct xive_irq_data *xd)
+{
+	return xd->flags & XIVE_IRQ_FLAG_STORE_EOI && xive_store_eoi;
+}
+
+/*
+ * Read the next entry in a queue, return its content if it's valid
+ * or 0 if there is no new entry.
+ *
+ * The queue pointer is moved forward unless "just_peek" is set
+ */
+static u32 xive_read_eq(struct xive_q *q, bool just_peek)
+{
+	u32 cur;
+
+	if (!q->qpage)
+		return 0;
+	cur = be32_to_cpup(q->qpage + q->idx);
+
+	/* Check valid bit (31) vs current toggle polarity */
+	if ((cur >> 31) == q->toggle)
+		return 0;
+
+	/* If consuming from the queue ... */
+	if (!just_peek) {
+		/* Next entry */
+		q->idx = (q->idx + 1) & q->msk;
+
+		/* Wrap around: flip valid toggle */
+		if (q->idx == 0)
+			q->toggle ^= 1;
+	}
+	/* Mask out the valid bit (31) */
+	return cur & 0x7fffffff;
+}
+
+/*
+ * Scans all the queue that may have interrupts in them
+ * (based on "pending_prio") in priority order until an
+ * interrupt is found or all the queues are empty.
+ *
+ * Then updates the CPPR (Current Processor Priority
+ * Register) based on the most favored interrupt found
+ * (0xff if none) and return what was found (0 if none).
+ *
+ * If just_peek is set, return the most favored pending
+ * interrupt if any but don't update the queue pointers.
+ *
+ * Note: This function can operate generically on any number
+ * of queues (up to 8). The current implementation of the XIVE
+ * driver only uses a single queue however.
+ *
+ * Note2: This will also "flush" "the pending_count" of a queue
+ * into the "count" when that queue is observed to be empty.
+ * This is used to keep track of the amount of interrupts
+ * targetting a queue. When an interrupt is moved away from
+ * a queue, we only decrement that queue count once the queue
+ * has been observed empty to avoid races.
+ */
+static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
+{
+	u32 irq = 0;
+	u8 prio = 0;
+
+	/* Find highest pending priority */
+	while (xc->pending_prio != 0) {
+		struct xive_q *q;
+
+		prio = ffs(xc->pending_prio) - 1;
+		DBG_VERBOSE("scan_irq: trying prio %d\n", prio);
+
+		/* Try to fetch */
+		irq = xive_read_eq(&xc->queue[prio], just_peek);
+
+		/* Found something ? That's it */
+		if (irq) {
+			if (just_peek || irq_to_desc(irq))
+				break;
+			/*
+			 * We should never get here; if we do then we must
+			 * have failed to synchronize the interrupt properly
+			 * when shutting it down.
+			 */
+			pr_crit("xive: got interrupt %d without descriptor, dropping\n",
+				irq);
+			WARN_ON(1);
+			continue;
+		}
+
+		/* Clear pending bits */
+		xc->pending_prio &= ~(1 << prio);
+
+		/*
+		 * Check if the queue count needs adjusting due to
+		 * interrupts being moved away. See description of
+		 * xive_dec_target_count()
+		 */
+		q = &xc->queue[prio];
+		if (atomic_read(&q->pending_count)) {
+			int p = atomic_xchg(&q->pending_count, 0);
+			if (p) {
+				WARN_ON(p > atomic_read(&q->count));
+				atomic_sub(p, &q->count);
+			}
+		}
+	}
+
+	/* If nothing was found, set CPPR to 0xff */
+	if (irq == 0)
+		prio = 0xff;
+
+	/* Update HW CPPR to match if necessary */
+	if (prio != xc->cppr) {
+		DBG_VERBOSE("scan_irq: adjusting CPPR to %d\n", prio);
+		xc->cppr = prio;
+		out_8(xive_tima + xive_tima_offset + TM_CPPR, prio);
+	}
+
+	return irq;
+}
+
+/*
+ * This is used to perform the magic loads from an ESB
+ * described in xive-regs.h
+ */
+static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset)
+{
+	u64 val;
+
+	if (offset == XIVE_ESB_SET_PQ_10 && xive_is_store_eoi(xd))
+		offset |= XIVE_ESB_LD_ST_MO;
+
+	if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw)
+		val = xive_ops->esb_rw(xd->hw_irq, offset, 0, 0);
+	else
+		val = in_be64(xd->eoi_mmio + offset);
+
+	return (u8)val;
+}
+
+static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data)
+{
+	if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw)
+		xive_ops->esb_rw(xd->hw_irq, offset, data, 1);
+	else
+		out_be64(xd->eoi_mmio + offset, data);
+}
+
+#if defined(CONFIG_XMON) || defined(CONFIG_DEBUG_FS)
+static void xive_irq_data_dump(struct xive_irq_data *xd, char *buffer, size_t size)
+{
+	u64 val = xive_esb_read(xd, XIVE_ESB_GET);
+
+	snprintf(buffer, size, "flags=%c%c%c PQ=%c%c 0x%016llx 0x%016llx",
+		 xive_is_store_eoi(xd) ? 'S' : ' ',
+		 xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ',
+		 xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ',
+		 val & XIVE_ESB_VAL_P ? 'P' : '-',
+		 val & XIVE_ESB_VAL_Q ? 'Q' : '-',
+		 xd->trig_page, xd->eoi_page);
+}
+#endif
+
+#ifdef CONFIG_XMON
+static notrace void xive_dump_eq(const char *name, struct xive_q *q)
+{
+	u32 i0, i1, idx;
+
+	if (!q->qpage)
+		return;
+	idx = q->idx;
+	i0 = be32_to_cpup(q->qpage + idx);
+	idx = (idx + 1) & q->msk;
+	i1 = be32_to_cpup(q->qpage + idx);
+	xmon_printf("%s idx=%d T=%d %08x %08x ...", name,
+		     q->idx, q->toggle, i0, i1);
+}
+
+notrace void xmon_xive_do_dump(int cpu)
+{
+	struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
+
+	xmon_printf("CPU %d:", cpu);
+	if (xc) {
+		xmon_printf("pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr);
+
+#ifdef CONFIG_SMP
+		{
+			char buffer[128];
+
+			xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer));
+			xmon_printf("IPI=0x%08x %s", xc->hw_ipi, buffer);
+		}
+#endif
+		xive_dump_eq("EQ", &xc->queue[xive_irq_priority]);
+	}
+	xmon_printf("\n");
+}
+
+static struct irq_data *xive_get_irq_data(u32 hw_irq)
+{
+	unsigned int irq = irq_find_mapping(xive_irq_domain, hw_irq);
+
+	return irq ? irq_get_irq_data(irq) : NULL;
+}
+
+int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d)
+{
+	int rc;
+	u32 target;
+	u8 prio;
+	u32 lirq;
+
+	rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq);
+	if (rc) {
+		xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc);
+		return rc;
+	}
+
+	xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ",
+		    hw_irq, target, prio, lirq);
+
+	if (!d)
+		d = xive_get_irq_data(hw_irq);
+
+	if (d) {
+		char buffer[128];
+
+		xive_irq_data_dump(irq_data_get_irq_handler_data(d),
+				   buffer, sizeof(buffer));
+		xmon_printf("%s", buffer);
+	}
+
+	xmon_printf("\n");
+	return 0;
+}
+
+void xmon_xive_get_irq_all(void)
+{
+	unsigned int i;
+	struct irq_desc *desc;
+
+	for_each_irq_desc(i, desc) {
+		struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i);
+
+		if (d)
+			xmon_xive_get_irq_config(irqd_to_hwirq(d), d);
+	}
+}
+
+#endif /* CONFIG_XMON */
+
+static unsigned int xive_get_irq(void)
+{
+	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+	u32 irq;
+
+	/*
+	 * This can be called either as a result of a HW interrupt or
+	 * as a "replay" because EOI decided there was still something
+	 * in one of the queues.
+	 *
+	 * First we perform an ACK cycle in order to update our mask
+	 * of pending priorities. This will also have the effect of
+	 * updating the CPPR to the most favored pending interrupts.
+	 *
+	 * In the future, if we have a way to differentiate a first
+	 * entry (on HW interrupt) from a replay triggered by EOI,
+	 * we could skip this on replays unless we soft-mask tells us
+	 * that a new HW interrupt occurred.
+	 */
+	xive_ops->update_pending(xc);
+
+	DBG_VERBOSE("get_irq: pending=%02x\n", xc->pending_prio);
+
+	/* Scan our queue(s) for interrupts */
+	irq = xive_scan_interrupts(xc, false);
+
+	DBG_VERBOSE("get_irq: got irq 0x%x, new pending=0x%02x\n",
+	    irq, xc->pending_prio);
+
+	/* Return pending interrupt if any */
+	if (irq == XIVE_BAD_IRQ)
+		return 0;
+	return irq;
+}
+
+/*
+ * After EOI'ing an interrupt, we need to re-check the queue
+ * to see if another interrupt is pending since multiple
+ * interrupts can coalesce into a single notification to the
+ * CPU.
+ *
+ * If we find that there is indeed more in there, we call
+ * force_external_irq_replay() to make Linux synthetize an
+ * external interrupt on the next call to local_irq_restore().
+ */
+static void xive_do_queue_eoi(struct xive_cpu *xc)
+{
+	if (xive_scan_interrupts(xc, true) != 0) {
+		DBG_VERBOSE("eoi: pending=0x%02x\n", xc->pending_prio);
+		force_external_irq_replay();
+	}
+}
+
+/*
+ * EOI an interrupt at the source. There are several methods
+ * to do this depending on the HW version and source type
+ */
+static void xive_do_source_eoi(struct xive_irq_data *xd)
+{
+	u8 eoi_val;
+
+	xd->stale_p = false;
+
+	/* If the XIVE supports the new "store EOI facility, use it */
+	if (xive_is_store_eoi(xd)) {
+		xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0);
+		return;
+	}
+
+	/*
+	 * For LSIs, we use the "EOI cycle" special load rather than
+	 * PQ bits, as they are automatically re-triggered in HW when
+	 * still pending.
+	 */
+	if (xd->flags & XIVE_IRQ_FLAG_LSI) {
+		xive_esb_read(xd, XIVE_ESB_LOAD_EOI);
+		return;
+	}
+
+	/*
+	 * Otherwise, we use the special MMIO that does a clear of
+	 * both P and Q and returns the old Q. This allows us to then
+	 * do a re-trigger if Q was set rather than synthesizing an
+	 * interrupt in software
+	 */
+	eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
+	DBG_VERBOSE("eoi_val=%x\n", eoi_val);
+
+	/* Re-trigger if needed */
+	if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio)
+		out_be64(xd->trig_mmio, 0);
+}
+
+/* irq_chip eoi callback, called with irq descriptor lock held */
+static void xive_irq_eoi(struct irq_data *d)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+
+	DBG_VERBOSE("eoi_irq: irq=%d [0x%lx] pending=%02x\n",
+		    d->irq, irqd_to_hwirq(d), xc->pending_prio);
+
+	/*
+	 * EOI the source if it hasn't been disabled and hasn't
+	 * been passed-through to a KVM guest
+	 */
+	if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
+	    !(xd->flags & XIVE_IRQ_FLAG_NO_EOI))
+		xive_do_source_eoi(xd);
+	else
+		xd->stale_p = true;
+
+	/*
+	 * Clear saved_p to indicate that it's no longer occupying
+	 * a queue slot on the target queue
+	 */
+	xd->saved_p = false;
+
+	/* Check for more work in the queue */
+	xive_do_queue_eoi(xc);
+}
+
+/*
+ * Helper used to mask and unmask an interrupt source.
+ */
+static void xive_do_source_set_mask(struct xive_irq_data *xd,
+				    bool mask)
+{
+	u64 val;
+
+	pr_debug("%s: HW 0x%x %smask\n", __func__, xd->hw_irq, mask ? "" : "un");
+
+	/*
+	 * If the interrupt had P set, it may be in a queue.
+	 *
+	 * We need to make sure we don't re-enable it until it
+	 * has been fetched from that queue and EOId. We keep
+	 * a copy of that P state and use it to restore the
+	 * ESB accordingly on unmask.
+	 */
+	if (mask) {
+		val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
+		if (!xd->stale_p && !!(val & XIVE_ESB_VAL_P))
+			xd->saved_p = true;
+		xd->stale_p = false;
+	} else if (xd->saved_p) {
+		xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
+		xd->saved_p = false;
+	} else {
+		xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
+		xd->stale_p = false;
+	}
+}
+
+/*
+ * Try to chose "cpu" as a new interrupt target. Increments
+ * the queue accounting for that target if it's not already
+ * full.
+ */
+static bool xive_try_pick_target(int cpu)
+{
+	struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
+	struct xive_q *q = &xc->queue[xive_irq_priority];
+	int max;
+
+	/*
+	 * Calculate max number of interrupts in that queue.
+	 *
+	 * We leave a gap of 1 just in case...
+	 */
+	max = (q->msk + 1) - 1;
+	return !!atomic_add_unless(&q->count, 1, max);
+}
+
+/*
+ * Un-account an interrupt for a target CPU. We don't directly
+ * decrement q->count since the interrupt might still be present
+ * in the queue.
+ *
+ * Instead increment a separate counter "pending_count" which
+ * will be substracted from "count" later when that CPU observes
+ * the queue to be empty.
+ */
+static void xive_dec_target_count(int cpu)
+{
+	struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
+	struct xive_q *q = &xc->queue[xive_irq_priority];
+
+	if (WARN_ON(cpu < 0 || !xc)) {
+		pr_err("%s: cpu=%d xc=%p\n", __func__, cpu, xc);
+		return;
+	}
+
+	/*
+	 * We increment the "pending count" which will be used
+	 * to decrement the target queue count whenever it's next
+	 * processed and found empty. This ensure that we don't
+	 * decrement while we still have the interrupt there
+	 * occupying a slot.
+	 */
+	atomic_inc(&q->pending_count);
+}
+
+/* Find a tentative CPU target in a CPU mask */
+static int xive_find_target_in_mask(const struct cpumask *mask,
+				    unsigned int fuzz)
+{
+	int cpu, first, num, i;
+
+	/* Pick up a starting point CPU in the mask based on  fuzz */
+	num = min_t(int, cpumask_weight(mask), nr_cpu_ids);
+	first = fuzz % num;
+
+	/* Locate it */
+	cpu = cpumask_first(mask);
+	for (i = 0; i < first && cpu < nr_cpu_ids; i++)
+		cpu = cpumask_next(cpu, mask);
+
+	/* Sanity check */
+	if (WARN_ON(cpu >= nr_cpu_ids))
+		cpu = cpumask_first(cpu_online_mask);
+
+	/* Remember first one to handle wrap-around */
+	first = cpu;
+
+	/*
+	 * Now go through the entire mask until we find a valid
+	 * target.
+	 */
+	do {
+		/*
+		 * We re-check online as the fallback case passes us
+		 * an untested affinity mask
+		 */
+		if (cpu_online(cpu) && xive_try_pick_target(cpu))
+			return cpu;
+		cpu = cpumask_next(cpu, mask);
+		/* Wrap around */
+		if (cpu >= nr_cpu_ids)
+			cpu = cpumask_first(mask);
+	} while (cpu != first);
+
+	return -1;
+}
+
+/*
+ * Pick a target CPU for an interrupt. This is done at
+ * startup or if the affinity is changed in a way that
+ * invalidates the current target.
+ */
+static int xive_pick_irq_target(struct irq_data *d,
+				const struct cpumask *affinity)
+{
+	static unsigned int fuzz;
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+	cpumask_var_t mask;
+	int cpu = -1;
+
+	/*
+	 * If we have chip IDs, first we try to build a mask of
+	 * CPUs matching the CPU and find a target in there
+	 */
+	if (xd->src_chip != XIVE_INVALID_CHIP_ID &&
+		zalloc_cpumask_var(&mask, GFP_ATOMIC)) {
+		/* Build a mask of matching chip IDs */
+		for_each_cpu_and(cpu, affinity, cpu_online_mask) {
+			struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
+			if (xc->chip_id == xd->src_chip)
+				cpumask_set_cpu(cpu, mask);
+		}
+		/* Try to find a target */
+		if (cpumask_empty(mask))
+			cpu = -1;
+		else
+			cpu = xive_find_target_in_mask(mask, fuzz++);
+		free_cpumask_var(mask);
+		if (cpu >= 0)
+			return cpu;
+		fuzz--;
+	}
+
+	/* No chip IDs, fallback to using the affinity mask */
+	return xive_find_target_in_mask(affinity, fuzz++);
+}
+
+static unsigned int xive_irq_startup(struct irq_data *d)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int target, rc;
+
+	xd->saved_p = false;
+	xd->stale_p = false;
+
+	pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d);
+
+	/* Pick a target */
+	target = xive_pick_irq_target(d, irq_data_get_affinity_mask(d));
+	if (target == XIVE_INVALID_TARGET) {
+		/* Try again breaking affinity */
+		target = xive_pick_irq_target(d, cpu_online_mask);
+		if (target == XIVE_INVALID_TARGET)
+			return -ENXIO;
+		pr_warn("irq %d started with broken affinity\n", d->irq);
+	}
+
+	/* Sanity check */
+	if (WARN_ON(target == XIVE_INVALID_TARGET ||
+		    target >= nr_cpu_ids))
+		target = smp_processor_id();
+
+	xd->target = target;
+
+	/*
+	 * Configure the logical number to be the Linux IRQ number
+	 * and set the target queue
+	 */
+	rc = xive_ops->configure_irq(hw_irq,
+				     get_hard_smp_processor_id(target),
+				     xive_irq_priority, d->irq);
+	if (rc)
+		return rc;
+
+	/* Unmask the ESB */
+	xive_do_source_set_mask(xd, false);
+
+	return 0;
+}
+
+/* called with irq descriptor lock held */
+static void xive_irq_shutdown(struct irq_data *d)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d);
+
+	if (WARN_ON(xd->target == XIVE_INVALID_TARGET))
+		return;
+
+	/* Mask the interrupt at the source */
+	xive_do_source_set_mask(xd, true);
+
+	/*
+	 * Mask the interrupt in HW in the IVT/EAS and set the number
+	 * to be the "bad" IRQ number
+	 */
+	xive_ops->configure_irq(hw_irq,
+				get_hard_smp_processor_id(xd->target),
+				0xff, XIVE_BAD_IRQ);
+
+	xive_dec_target_count(xd->target);
+	xd->target = XIVE_INVALID_TARGET;
+}
+
+static void xive_irq_unmask(struct irq_data *d)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+	pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd);
+
+	xive_do_source_set_mask(xd, false);
+}
+
+static void xive_irq_mask(struct irq_data *d)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+	pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd);
+
+	xive_do_source_set_mask(xd, true);
+}
+
+static int xive_irq_set_affinity(struct irq_data *d,
+				 const struct cpumask *cpumask,
+				 bool force)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	u32 target, old_target;
+	int rc = 0;
+
+	pr_debug("%s: irq %d/0x%x\n", __func__, d->irq, hw_irq);
+
+	/* Is this valid ? */
+	if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids)
+		return -EINVAL;
+
+	/*
+	 * If existing target is already in the new mask, and is
+	 * online then do nothing.
+	 */
+	if (xd->target != XIVE_INVALID_TARGET &&
+	    cpu_online(xd->target) &&
+	    cpumask_test_cpu(xd->target, cpumask))
+		return IRQ_SET_MASK_OK;
+
+	/* Pick a new target */
+	target = xive_pick_irq_target(d, cpumask);
+
+	/* No target found */
+	if (target == XIVE_INVALID_TARGET)
+		return -ENXIO;
+
+	/* Sanity check */
+	if (WARN_ON(target >= nr_cpu_ids))
+		target = smp_processor_id();
+
+	old_target = xd->target;
+
+	/*
+	 * Only configure the irq if it's not currently passed-through to
+	 * a KVM guest
+	 */
+	if (!irqd_is_forwarded_to_vcpu(d))
+		rc = xive_ops->configure_irq(hw_irq,
+					     get_hard_smp_processor_id(target),
+					     xive_irq_priority, d->irq);
+	if (rc < 0) {
+		pr_err("Error %d reconfiguring irq %d\n", rc, d->irq);
+		return rc;
+	}
+
+	pr_debug("  target: 0x%x\n", target);
+	xd->target = target;
+
+	/* Give up previous target */
+	if (old_target != XIVE_INVALID_TARGET)
+	    xive_dec_target_count(old_target);
+
+	return IRQ_SET_MASK_OK;
+}
+
+static int xive_irq_set_type(struct irq_data *d, unsigned int flow_type)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+	/*
+	 * We only support these. This has really no effect other than setting
+	 * the corresponding descriptor bits mind you but those will in turn
+	 * affect the resend function when re-enabling an edge interrupt.
+	 *
+	 * Set the default to edge as explained in map().
+	 */
+	if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE)
+		flow_type = IRQ_TYPE_EDGE_RISING;
+
+	if (flow_type != IRQ_TYPE_EDGE_RISING &&
+	    flow_type != IRQ_TYPE_LEVEL_LOW)
+		return -EINVAL;
+
+	irqd_set_trigger_type(d, flow_type);
+
+	/*
+	 * Double check it matches what the FW thinks
+	 *
+	 * NOTE: We don't know yet if the PAPR interface will provide
+	 * the LSI vs MSI information apart from the device-tree so
+	 * this check might have to move into an optional backend call
+	 * that is specific to the native backend
+	 */
+	if ((flow_type == IRQ_TYPE_LEVEL_LOW) !=
+	    !!(xd->flags & XIVE_IRQ_FLAG_LSI)) {
+		pr_warn("Interrupt %d (HW 0x%x) type mismatch, Linux says %s, FW says %s\n",
+			d->irq, (u32)irqd_to_hwirq(d),
+			(flow_type == IRQ_TYPE_LEVEL_LOW) ? "Level" : "Edge",
+			(xd->flags & XIVE_IRQ_FLAG_LSI) ? "Level" : "Edge");
+	}
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static int xive_irq_retrigger(struct irq_data *d)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+	/* This should be only for MSIs */
+	if (WARN_ON(xd->flags & XIVE_IRQ_FLAG_LSI))
+		return 0;
+
+	/*
+	 * To perform a retrigger, we first set the PQ bits to
+	 * 11, then perform an EOI.
+	 */
+	xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
+	xive_do_source_eoi(xd);
+
+	return 1;
+}
+
+/*
+ * Caller holds the irq descriptor lock, so this won't be called
+ * concurrently with xive_get_irqchip_state on the same interrupt.
+ */
+static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int rc;
+	u8 pq;
+
+	/*
+	 * This is called by KVM with state non-NULL for enabling
+	 * pass-through or NULL for disabling it
+	 */
+	if (state) {
+		irqd_set_forwarded_to_vcpu(d);
+
+		/* Set it to PQ=10 state to prevent further sends */
+		pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
+		if (!xd->stale_p) {
+			xd->saved_p = !!(pq & XIVE_ESB_VAL_P);
+			xd->stale_p = !xd->saved_p;
+		}
+
+		/* No target ? nothing to do */
+		if (xd->target == XIVE_INVALID_TARGET) {
+			/*
+			 * An untargetted interrupt should have been
+			 * also masked at the source
+			 */
+			WARN_ON(xd->saved_p);
+
+			return 0;
+		}
+
+		/*
+		 * If P was set, adjust state to PQ=11 to indicate
+		 * that a resend is needed for the interrupt to reach
+		 * the guest. Also remember the value of P.
+		 *
+		 * This also tells us that it's in flight to a host queue
+		 * or has already been fetched but hasn't been EOIed yet
+		 * by the host. This it's potentially using up a host
+		 * queue slot. This is important to know because as long
+		 * as this is the case, we must not hard-unmask it when
+		 * "returning" that interrupt to the host.
+		 *
+		 * This saved_p is cleared by the host EOI, when we know
+		 * for sure the queue slot is no longer in use.
+		 */
+		if (xd->saved_p) {
+			xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
+
+			/*
+			 * Sync the XIVE source HW to ensure the interrupt
+			 * has gone through the EAS before we change its
+			 * target to the guest. That should guarantee us
+			 * that we *will* eventually get an EOI for it on
+			 * the host. Otherwise there would be a small window
+			 * for P to be seen here but the interrupt going
+			 * to the guest queue.
+			 */
+			if (xive_ops->sync_source)
+				xive_ops->sync_source(hw_irq);
+		}
+	} else {
+		irqd_clr_forwarded_to_vcpu(d);
+
+		/* No host target ? hard mask and return */
+		if (xd->target == XIVE_INVALID_TARGET) {
+			xive_do_source_set_mask(xd, true);
+			return 0;
+		}
+
+		/*
+		 * Sync the XIVE source HW to ensure the interrupt
+		 * has gone through the EAS before we change its
+		 * target to the host.
+		 */
+		if (xive_ops->sync_source)
+			xive_ops->sync_source(hw_irq);
+
+		/*
+		 * By convention we are called with the interrupt in
+		 * a PQ=10 or PQ=11 state, ie, it won't fire and will
+		 * have latched in Q whether there's a pending HW
+		 * interrupt or not.
+		 *
+		 * First reconfigure the target.
+		 */
+		rc = xive_ops->configure_irq(hw_irq,
+					     get_hard_smp_processor_id(xd->target),
+					     xive_irq_priority, d->irq);
+		if (rc)
+			return rc;
+
+		/*
+		 * Then if saved_p is not set, effectively re-enable the
+		 * interrupt with an EOI. If it is set, we know there is
+		 * still a message in a host queue somewhere that will be
+		 * EOId eventually.
+		 *
+		 * Note: We don't check irqd_irq_disabled(). Effectively,
+		 * we *will* let the irq get through even if masked if the
+		 * HW is still firing it in order to deal with the whole
+		 * saved_p business properly. If the interrupt triggers
+		 * while masked, the generic code will re-mask it anyway.
+		 */
+		if (!xd->saved_p)
+			xive_do_source_eoi(xd);
+
+	}
+	return 0;
+}
+
+/* Called with irq descriptor lock held. */
+static int xive_get_irqchip_state(struct irq_data *data,
+				  enum irqchip_irq_state which, bool *state)
+{
+	struct xive_irq_data *xd = irq_data_get_irq_handler_data(data);
+	u8 pq;
+
+	switch (which) {
+	case IRQCHIP_STATE_ACTIVE:
+		pq = xive_esb_read(xd, XIVE_ESB_GET);
+
+		/*
+		 * The esb value being all 1's means we couldn't get
+		 * the PQ state of the interrupt through mmio. It may
+		 * happen, for example when querying a PHB interrupt
+		 * while the PHB is in an error state. We consider the
+		 * interrupt to be inactive in that case.
+		 */
+		*state = (pq != XIVE_ESB_INVALID) && !xd->stale_p &&
+			(xd->saved_p || (!!(pq & XIVE_ESB_VAL_P) &&
+			 !irqd_irq_disabled(data)));
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+static struct irq_chip xive_irq_chip = {
+	.name = "XIVE-IRQ",
+	.irq_startup = xive_irq_startup,
+	.irq_shutdown = xive_irq_shutdown,
+	.irq_eoi = xive_irq_eoi,
+	.irq_mask = xive_irq_mask,
+	.irq_unmask = xive_irq_unmask,
+	.irq_set_affinity = xive_irq_set_affinity,
+	.irq_set_type = xive_irq_set_type,
+	.irq_retrigger = xive_irq_retrigger,
+	.irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity,
+	.irq_get_irqchip_state = xive_get_irqchip_state,
+};
+
+bool is_xive_irq(struct irq_chip *chip)
+{
+	return chip == &xive_irq_chip;
+}
+EXPORT_SYMBOL_GPL(is_xive_irq);
+
+void xive_cleanup_irq_data(struct xive_irq_data *xd)
+{
+	pr_debug("%s for HW 0x%x\n", __func__, xd->hw_irq);
+
+	if (xd->eoi_mmio) {
+		iounmap(xd->eoi_mmio);
+		if (xd->eoi_mmio == xd->trig_mmio)
+			xd->trig_mmio = NULL;
+		xd->eoi_mmio = NULL;
+	}
+	if (xd->trig_mmio) {
+		iounmap(xd->trig_mmio);
+		xd->trig_mmio = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(xive_cleanup_irq_data);
+
+static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw)
+{
+	struct xive_irq_data *xd;
+	int rc;
+
+	xd = kzalloc(sizeof(struct xive_irq_data), GFP_KERNEL);
+	if (!xd)
+		return -ENOMEM;
+	rc = xive_ops->populate_irq_data(hw, xd);
+	if (rc) {
+		kfree(xd);
+		return rc;
+	}
+	xd->target = XIVE_INVALID_TARGET;
+	irq_set_handler_data(virq, xd);
+
+	/*
+	 * Turn OFF by default the interrupt being mapped. A side
+	 * effect of this check is the mapping the ESB page of the
+	 * interrupt in the Linux address space. This prevents page
+	 * fault issues in the crash handler which masks all
+	 * interrupts.
+	 */
+	xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
+
+	return 0;
+}
+
+void xive_irq_free_data(unsigned int virq)
+{
+	struct xive_irq_data *xd = irq_get_handler_data(virq);
+
+	if (!xd)
+		return;
+	irq_set_handler_data(virq, NULL);
+	xive_cleanup_irq_data(xd);
+	kfree(xd);
+}
+EXPORT_SYMBOL_GPL(xive_irq_free_data);
+
+#ifdef CONFIG_SMP
+
+static void xive_cause_ipi(int cpu)
+{
+	struct xive_cpu *xc;
+	struct xive_irq_data *xd;
+
+	xc = per_cpu(xive_cpu, cpu);
+
+	DBG_VERBOSE("IPI CPU %d -> %d (HW IRQ 0x%x)\n",
+		    smp_processor_id(), cpu, xc->hw_ipi);
+
+	xd = &xc->ipi_data;
+	if (WARN_ON(!xd->trig_mmio))
+		return;
+	out_be64(xd->trig_mmio, 0);
+}
+
+static irqreturn_t xive_muxed_ipi_action(int irq, void *dev_id)
+{
+	return smp_ipi_demux();
+}
+
+static void xive_ipi_eoi(struct irq_data *d)
+{
+	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+
+	/* Handle possible race with unplug and drop stale IPIs */
+	if (!xc)
+		return;
+
+	DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n",
+		    d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio);
+
+	xive_do_source_eoi(&xc->ipi_data);
+	xive_do_queue_eoi(xc);
+}
+
+static void xive_ipi_do_nothing(struct irq_data *d)
+{
+	/*
+	 * Nothing to do, we never mask/unmask IPIs, but the callback
+	 * has to exist for the struct irq_chip.
+	 */
+}
+
+static struct irq_chip xive_ipi_chip = {
+	.name = "XIVE-IPI",
+	.irq_eoi = xive_ipi_eoi,
+	.irq_mask = xive_ipi_do_nothing,
+	.irq_unmask = xive_ipi_do_nothing,
+};
+
+/*
+ * IPIs are marked per-cpu. We use separate HW interrupts under the
+ * hood but associated with the same "linux" interrupt
+ */
+struct xive_ipi_alloc_info {
+	irq_hw_number_t hwirq;
+};
+
+static int xive_ipi_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				     unsigned int nr_irqs, void *arg)
+{
+	struct xive_ipi_alloc_info *info = arg;
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		irq_domain_set_info(domain, virq + i, info->hwirq + i, &xive_ipi_chip,
+				    domain->host_data, handle_percpu_irq,
+				    NULL, NULL);
+	}
+	return 0;
+}
+
+static const struct irq_domain_ops xive_ipi_irq_domain_ops = {
+	.alloc  = xive_ipi_irq_domain_alloc,
+};
+
+static int __init xive_init_ipis(void)
+{
+	struct fwnode_handle *fwnode;
+	struct irq_domain *ipi_domain;
+	unsigned int node;
+	int ret = -ENOMEM;
+
+	fwnode = irq_domain_alloc_named_fwnode("XIVE-IPI");
+	if (!fwnode)
+		goto out;
+
+	ipi_domain = irq_domain_create_linear(fwnode, nr_node_ids,
+					      &xive_ipi_irq_domain_ops, NULL);
+	if (!ipi_domain)
+		goto out_free_fwnode;
+
+	xive_ipis = kcalloc(nr_node_ids, sizeof(*xive_ipis), GFP_KERNEL | __GFP_NOFAIL);
+	if (!xive_ipis)
+		goto out_free_domain;
+
+	for_each_node(node) {
+		struct xive_ipi_desc *xid = &xive_ipis[node];
+		struct xive_ipi_alloc_info info = { node };
+
+		/*
+		 * Map one IPI interrupt per node for all cpus of that node.
+		 * Since the HW interrupt number doesn't have any meaning,
+		 * simply use the node number.
+		 */
+		ret = irq_domain_alloc_irqs(ipi_domain, 1, node, &info);
+		if (ret < 0)
+			goto out_free_xive_ipis;
+		xid->irq = ret;
+
+		snprintf(xid->name, sizeof(xid->name), "IPI-%d", node);
+	}
+
+	return ret;
+
+out_free_xive_ipis:
+	kfree(xive_ipis);
+out_free_domain:
+	irq_domain_remove(ipi_domain);
+out_free_fwnode:
+	irq_domain_free_fwnode(fwnode);
+out:
+	return ret;
+}
+
+static int xive_request_ipi(unsigned int cpu)
+{
+	struct xive_ipi_desc *xid = &xive_ipis[early_cpu_to_node(cpu)];
+	int ret;
+
+	if (atomic_inc_return(&xid->started) > 1)
+		return 0;
+
+	ret = request_irq(xid->irq, xive_muxed_ipi_action,
+			  IRQF_NO_DEBUG | IRQF_PERCPU | IRQF_NO_THREAD,
+			  xid->name, NULL);
+
+	WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
+	return ret;
+}
+
+static int xive_setup_cpu_ipi(unsigned int cpu)
+{
+	unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
+	struct xive_cpu *xc;
+	int rc;
+
+	pr_debug("Setting up IPI for CPU %d\n", cpu);
+
+	xc = per_cpu(xive_cpu, cpu);
+
+	/* Check if we are already setup */
+	if (xc->hw_ipi != XIVE_BAD_IRQ)
+		return 0;
+
+	/* Register the IPI */
+	xive_request_ipi(cpu);
+
+	/* Grab an IPI from the backend, this will populate xc->hw_ipi */
+	if (xive_ops->get_ipi(cpu, xc))
+		return -EIO;
+
+	/*
+	 * Populate the IRQ data in the xive_cpu structure and
+	 * configure the HW / enable the IPIs.
+	 */
+	rc = xive_ops->populate_irq_data(xc->hw_ipi, &xc->ipi_data);
+	if (rc) {
+		pr_err("Failed to populate IPI data on CPU %d\n", cpu);
+		return -EIO;
+	}
+	rc = xive_ops->configure_irq(xc->hw_ipi,
+				     get_hard_smp_processor_id(cpu),
+				     xive_irq_priority, xive_ipi_irq);
+	if (rc) {
+		pr_err("Failed to map IPI CPU %d\n", cpu);
+		return -EIO;
+	}
+	pr_debug("CPU %d HW IPI 0x%x, virq %d, trig_mmio=%p\n", cpu,
+		 xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio);
+
+	/* Unmask it */
+	xive_do_source_set_mask(&xc->ipi_data, false);
+
+	return 0;
+}
+
+noinstr static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc)
+{
+	unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
+
+	/* Disable the IPI and free the IRQ data */
+
+	/* Already cleaned up ? */
+	if (xc->hw_ipi == XIVE_BAD_IRQ)
+		return;
+
+	/* TODO: clear IPI mapping */
+
+	/* Mask the IPI */
+	xive_do_source_set_mask(&xc->ipi_data, true);
+
+	/*
+	 * Note: We don't call xive_cleanup_irq_data() to free
+	 * the mappings as this is called from an IPI on kexec
+	 * which is not a safe environment to call iounmap()
+	 */
+
+	/* Deconfigure/mask in the backend */
+	xive_ops->configure_irq(xc->hw_ipi, hard_smp_processor_id(),
+				0xff, xive_ipi_irq);
+
+	/* Free the IPIs in the backend */
+	xive_ops->put_ipi(cpu, xc);
+}
+
+void __init xive_smp_probe(void)
+{
+	smp_ops->cause_ipi = xive_cause_ipi;
+
+	/* Register the IPI */
+	xive_init_ipis();
+
+	/* Allocate and setup IPI for the boot CPU */
+	xive_setup_cpu_ipi(smp_processor_id());
+}
+
+#endif /* CONFIG_SMP */
+
+static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq,
+			       irq_hw_number_t hw)
+{
+	int rc;
+
+	/*
+	 * Mark interrupts as edge sensitive by default so that resend
+	 * actually works. Will fix that up below if needed.
+	 */
+	irq_clear_status_flags(virq, IRQ_LEVEL);
+
+	rc = xive_irq_alloc_data(virq, hw);
+	if (rc)
+		return rc;
+
+	irq_set_chip_and_handler(virq, &xive_irq_chip, handle_fasteoi_irq);
+
+	return 0;
+}
+
+static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq)
+{
+	xive_irq_free_data(virq);
+}
+
+static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct,
+				 const u32 *intspec, unsigned int intsize,
+				 irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	*out_hwirq = intspec[0];
+
+	/*
+	 * If intsize is at least 2, we look for the type in the second cell,
+	 * we assume the LSB indicates a level interrupt.
+	 */
+	if (intsize > 1) {
+		if (intspec[1] & 1)
+			*out_flags = IRQ_TYPE_LEVEL_LOW;
+		else
+			*out_flags = IRQ_TYPE_EDGE_RISING;
+	} else
+		*out_flags = IRQ_TYPE_LEVEL_LOW;
+
+	return 0;
+}
+
+static int xive_irq_domain_match(struct irq_domain *h, struct device_node *node,
+				 enum irq_domain_bus_token bus_token)
+{
+	return xive_ops->match(node);
+}
+
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+static const char * const esb_names[] = { "RESET", "OFF", "PENDING", "QUEUED" };
+
+static const struct {
+	u64  mask;
+	char *name;
+} xive_irq_flags[] = {
+	{ XIVE_IRQ_FLAG_STORE_EOI, "STORE_EOI" },
+	{ XIVE_IRQ_FLAG_LSI,       "LSI"       },
+	{ XIVE_IRQ_FLAG_H_INT_ESB, "H_INT_ESB" },
+	{ XIVE_IRQ_FLAG_NO_EOI,    "NO_EOI"    },
+};
+
+static void xive_irq_domain_debug_show(struct seq_file *m, struct irq_domain *d,
+				       struct irq_data *irqd, int ind)
+{
+	struct xive_irq_data *xd;
+	u64 val;
+	int i;
+
+	/* No IRQ domain level information. To be done */
+	if (!irqd)
+		return;
+
+	if (!is_xive_irq(irq_data_get_irq_chip(irqd)))
+		return;
+
+	seq_printf(m, "%*sXIVE:\n", ind, "");
+	ind++;
+
+	xd = irq_data_get_irq_handler_data(irqd);
+	if (!xd) {
+		seq_printf(m, "%*snot assigned\n", ind, "");
+		return;
+	}
+
+	val = xive_esb_read(xd, XIVE_ESB_GET);
+	seq_printf(m, "%*sESB:      %s\n", ind, "", esb_names[val & 0x3]);
+	seq_printf(m, "%*sPstate:   %s %s\n", ind, "", xd->stale_p ? "stale" : "",
+		   xd->saved_p ? "saved" : "");
+	seq_printf(m, "%*sTarget:   %d\n", ind, "", xd->target);
+	seq_printf(m, "%*sChip:     %d\n", ind, "", xd->src_chip);
+	seq_printf(m, "%*sTrigger:  0x%016llx\n", ind, "", xd->trig_page);
+	seq_printf(m, "%*sEOI:      0x%016llx\n", ind, "", xd->eoi_page);
+	seq_printf(m, "%*sFlags:    0x%llx\n", ind, "", xd->flags);
+	for (i = 0; i < ARRAY_SIZE(xive_irq_flags); i++) {
+		if (xd->flags & xive_irq_flags[i].mask)
+			seq_printf(m, "%*s%s\n", ind + 12, "", xive_irq_flags[i].name);
+	}
+}
+#endif
+
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+static int xive_irq_domain_translate(struct irq_domain *d,
+				     struct irq_fwspec *fwspec,
+				     unsigned long *hwirq,
+				     unsigned int *type)
+{
+	return xive_irq_domain_xlate(d, to_of_node(fwspec->fwnode),
+				     fwspec->param, fwspec->param_count,
+				     hwirq, type);
+}
+
+static int xive_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				 unsigned int nr_irqs, void *arg)
+{
+	struct irq_fwspec *fwspec = arg;
+	irq_hw_number_t hwirq;
+	unsigned int type = IRQ_TYPE_NONE;
+	int i, rc;
+
+	rc = xive_irq_domain_translate(domain, fwspec, &hwirq, &type);
+	if (rc)
+		return rc;
+
+	pr_debug("%s %d/0x%lx #%d\n", __func__, virq, hwirq, nr_irqs);
+
+	for (i = 0; i < nr_irqs; i++) {
+		/* TODO: call xive_irq_domain_map() */
+
+		/*
+		 * Mark interrupts as edge sensitive by default so that resend
+		 * actually works. Will fix that up below if needed.
+		 */
+		irq_clear_status_flags(virq, IRQ_LEVEL);
+
+		/* allocates and sets handler data */
+		rc = xive_irq_alloc_data(virq + i, hwirq + i);
+		if (rc)
+			return rc;
+
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &xive_irq_chip, domain->host_data);
+		irq_set_handler(virq + i, handle_fasteoi_irq);
+	}
+
+	return 0;
+}
+
+static void xive_irq_domain_free(struct irq_domain *domain,
+				 unsigned int virq, unsigned int nr_irqs)
+{
+	int i;
+
+	pr_debug("%s %d #%d\n", __func__, virq, nr_irqs);
+
+	for (i = 0; i < nr_irqs; i++)
+		xive_irq_free_data(virq + i);
+}
+#endif
+
+static const struct irq_domain_ops xive_irq_domain_ops = {
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+	.alloc	= xive_irq_domain_alloc,
+	.free	= xive_irq_domain_free,
+	.translate = xive_irq_domain_translate,
+#endif
+	.match = xive_irq_domain_match,
+	.map = xive_irq_domain_map,
+	.unmap = xive_irq_domain_unmap,
+	.xlate = xive_irq_domain_xlate,
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+	.debug_show = xive_irq_domain_debug_show,
+#endif
+};
+
+static void __init xive_init_host(struct device_node *np)
+{
+	xive_irq_domain = irq_domain_add_tree(np, &xive_irq_domain_ops, NULL);
+	if (WARN_ON(xive_irq_domain == NULL))
+		return;
+	irq_set_default_host(xive_irq_domain);
+}
+
+static void xive_cleanup_cpu_queues(unsigned int cpu, struct xive_cpu *xc)
+{
+	if (xc->queue[xive_irq_priority].qpage)
+		xive_ops->cleanup_queue(cpu, xc, xive_irq_priority);
+}
+
+static int xive_setup_cpu_queues(unsigned int cpu, struct xive_cpu *xc)
+{
+	int rc = 0;
+
+	/* We setup 1 queues for now with a 64k page */
+	if (!xc->queue[xive_irq_priority].qpage)
+		rc = xive_ops->setup_queue(cpu, xc, xive_irq_priority);
+
+	return rc;
+}
+
+static int xive_prepare_cpu(unsigned int cpu)
+{
+	struct xive_cpu *xc;
+
+	xc = per_cpu(xive_cpu, cpu);
+	if (!xc) {
+		xc = kzalloc_node(sizeof(struct xive_cpu),
+				  GFP_KERNEL, cpu_to_node(cpu));
+		if (!xc)
+			return -ENOMEM;
+		xc->hw_ipi = XIVE_BAD_IRQ;
+		xc->chip_id = XIVE_INVALID_CHIP_ID;
+		if (xive_ops->prepare_cpu)
+			xive_ops->prepare_cpu(cpu, xc);
+
+		per_cpu(xive_cpu, cpu) = xc;
+	}
+
+	/* Setup EQs if not already */
+	return xive_setup_cpu_queues(cpu, xc);
+}
+
+static void xive_setup_cpu(void)
+{
+	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+
+	/* The backend might have additional things to do */
+	if (xive_ops->setup_cpu)
+		xive_ops->setup_cpu(smp_processor_id(), xc);
+
+	/* Set CPPR to 0xff to enable flow of interrupts */
+	xc->cppr = 0xff;
+	out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff);
+}
+
+#ifdef CONFIG_SMP
+void xive_smp_setup_cpu(void)
+{
+	pr_debug("SMP setup CPU %d\n", smp_processor_id());
+
+	/* This will have already been done on the boot CPU */
+	if (smp_processor_id() != boot_cpuid)
+		xive_setup_cpu();
+
+}
+
+int xive_smp_prepare_cpu(unsigned int cpu)
+{
+	int rc;
+
+	/* Allocate per-CPU data and queues */
+	rc = xive_prepare_cpu(cpu);
+	if (rc)
+		return rc;
+
+	/* Allocate and setup IPI for the new CPU */
+	return xive_setup_cpu_ipi(cpu);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc)
+{
+	u32 irq;
+
+	/* We assume local irqs are disabled */
+	WARN_ON(!irqs_disabled());
+
+	/* Check what's already in the CPU queue */
+	while ((irq = xive_scan_interrupts(xc, false)) != 0) {
+		/*
+		 * We need to re-route that interrupt to its new destination.
+		 * First get and lock the descriptor
+		 */
+		struct irq_desc *desc = irq_to_desc(irq);
+		struct irq_data *d = irq_desc_get_irq_data(desc);
+		struct xive_irq_data *xd;
+
+		/*
+		 * Ignore anything that isn't a XIVE irq and ignore
+		 * IPIs, so can just be dropped.
+		 */
+		if (d->domain != xive_irq_domain)
+			continue;
+
+		/*
+		 * The IRQ should have already been re-routed, it's just a
+		 * stale in the old queue, so re-trigger it in order to make
+		 * it reach is new destination.
+		 */
+#ifdef DEBUG_FLUSH
+		pr_info("CPU %d: Got irq %d while offline, re-sending...\n",
+			cpu, irq);
+#endif
+		raw_spin_lock(&desc->lock);
+		xd = irq_desc_get_handler_data(desc);
+
+		/*
+		 * Clear saved_p to indicate that it's no longer pending
+		 */
+		xd->saved_p = false;
+
+		/*
+		 * For LSIs, we EOI, this will cause a resend if it's
+		 * still asserted. Otherwise do an MSI retrigger.
+		 */
+		if (xd->flags & XIVE_IRQ_FLAG_LSI)
+			xive_do_source_eoi(xd);
+		else
+			xive_irq_retrigger(d);
+
+		raw_spin_unlock(&desc->lock);
+	}
+}
+
+void xive_smp_disable_cpu(void)
+{
+	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+	unsigned int cpu = smp_processor_id();
+
+	/* Migrate interrupts away from the CPU */
+	irq_migrate_all_off_this_cpu();
+
+	/* Set CPPR to 0 to disable flow of interrupts */
+	xc->cppr = 0;
+	out_8(xive_tima + xive_tima_offset + TM_CPPR, 0);
+
+	/* Flush everything still in the queue */
+	xive_flush_cpu_queue(cpu, xc);
+
+	/* Re-enable CPPR  */
+	xc->cppr = 0xff;
+	out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff);
+}
+
+void xive_flush_interrupt(void)
+{
+	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+	unsigned int cpu = smp_processor_id();
+
+	/* Called if an interrupt occurs while the CPU is hot unplugged */
+	xive_flush_cpu_queue(cpu, xc);
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+#endif /* CONFIG_SMP */
+
+noinstr void xive_teardown_cpu(void)
+{
+	struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+	unsigned int cpu = smp_processor_id();
+
+	/* Set CPPR to 0 to disable flow of interrupts */
+	xc->cppr = 0;
+	out_8(xive_tima + xive_tima_offset + TM_CPPR, 0);
+
+	if (xive_ops->teardown_cpu)
+		xive_ops->teardown_cpu(cpu, xc);
+
+#ifdef CONFIG_SMP
+	/* Get rid of IPI */
+	xive_cleanup_cpu_ipi(cpu, xc);
+#endif
+
+	/* Disable and free the queues */
+	xive_cleanup_cpu_queues(cpu, xc);
+}
+
+void xive_shutdown(void)
+{
+	xive_ops->shutdown();
+}
+
+bool __init xive_core_init(struct device_node *np, const struct xive_ops *ops,
+			   void __iomem *area, u32 offset, u8 max_prio)
+{
+	xive_tima = area;
+	xive_tima_offset = offset;
+	xive_ops = ops;
+	xive_irq_priority = max_prio;
+
+	ppc_md.get_irq = xive_get_irq;
+	__xive_enabled = true;
+
+	pr_debug("Initializing host..\n");
+	xive_init_host(np);
+
+	pr_debug("Initializing boot CPU..\n");
+
+	/* Allocate per-CPU data and queues */
+	xive_prepare_cpu(smp_processor_id());
+
+	/* Get ready for interrupts */
+	xive_setup_cpu();
+
+	pr_info("Interrupt handling initialized with %s backend\n",
+		xive_ops->name);
+	pr_info("Using priority %d for all interrupts\n", max_prio);
+
+	return true;
+}
+
+__be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift)
+{
+	unsigned int alloc_order;
+	struct page *pages;
+	__be32 *qpage;
+
+	alloc_order = xive_alloc_order(queue_shift);
+	pages = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, alloc_order);
+	if (!pages)
+		return ERR_PTR(-ENOMEM);
+	qpage = (__be32 *)page_address(pages);
+	memset(qpage, 0, 1 << queue_shift);
+
+	return qpage;
+}
+
+static int __init xive_off(char *arg)
+{
+	xive_cmdline_disabled = true;
+	return 1;
+}
+__setup("xive=off", xive_off);
+
+static int __init xive_store_eoi_cmdline(char *arg)
+{
+	if (!arg)
+		return 1;
+
+	if (strncmp(arg, "off", 3) == 0) {
+		pr_info("StoreEOI disabled on kernel command line\n");
+		xive_store_eoi = false;
+	}
+	return 1;
+}
+__setup("xive.store-eoi=", xive_store_eoi_cmdline);
+
+#ifdef CONFIG_DEBUG_FS
+static void xive_debug_show_ipi(struct seq_file *m, int cpu)
+{
+	struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
+
+	seq_printf(m, "CPU %d: ", cpu);
+	if (xc) {
+		seq_printf(m, "pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr);
+
+#ifdef CONFIG_SMP
+		{
+			char buffer[128];
+
+			xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer));
+			seq_printf(m, "IPI=0x%08x %s", xc->hw_ipi, buffer);
+		}
+#endif
+	}
+	seq_puts(m, "\n");
+}
+
+static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int rc;
+	u32 target;
+	u8 prio;
+	u32 lirq;
+	char buffer[128];
+
+	rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq);
+	if (rc) {
+		seq_printf(m, "IRQ 0x%08x : no config rc=%d\n", hw_irq, rc);
+		return;
+	}
+
+	seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ",
+		   hw_irq, target, prio, lirq);
+
+	xive_irq_data_dump(irq_data_get_irq_handler_data(d), buffer, sizeof(buffer));
+	seq_puts(m, buffer);
+	seq_puts(m, "\n");
+}
+
+static int xive_irq_debug_show(struct seq_file *m, void *private)
+{
+	unsigned int i;
+	struct irq_desc *desc;
+
+	for_each_irq_desc(i, desc) {
+		struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i);
+
+		if (d)
+			xive_debug_show_irq(m, d);
+	}
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(xive_irq_debug);
+
+static int xive_ipi_debug_show(struct seq_file *m, void *private)
+{
+	int cpu;
+
+	if (xive_ops->debug_show)
+		xive_ops->debug_show(m, private);
+
+	for_each_online_cpu(cpu)
+		xive_debug_show_ipi(m, cpu);
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(xive_ipi_debug);
+
+static void xive_eq_debug_show_one(struct seq_file *m, struct xive_q *q, u8 prio)
+{
+	int i;
+
+	seq_printf(m, "EQ%d idx=%d T=%d\n", prio, q->idx, q->toggle);
+	if (q->qpage) {
+		for (i = 0; i < q->msk + 1; i++) {
+			if (!(i % 8))
+				seq_printf(m, "%05d ", i);
+			seq_printf(m, "%08x%s", be32_to_cpup(q->qpage + i),
+				   (i + 1) % 8 ? " " : "\n");
+		}
+	}
+	seq_puts(m, "\n");
+}
+
+static int xive_eq_debug_show(struct seq_file *m, void *private)
+{
+	int cpu = (long)m->private;
+	struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
+
+	if (xc)
+		xive_eq_debug_show_one(m, &xc->queue[xive_irq_priority],
+				       xive_irq_priority);
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(xive_eq_debug);
+
+static void xive_core_debugfs_create(void)
+{
+	struct dentry *xive_dir;
+	struct dentry *xive_eq_dir;
+	long cpu;
+	char name[16];
+
+	xive_dir = debugfs_create_dir("xive", arch_debugfs_dir);
+	if (IS_ERR(xive_dir))
+		return;
+
+	debugfs_create_file("ipis", 0400, xive_dir,
+			    NULL, &xive_ipi_debug_fops);
+	debugfs_create_file("interrupts", 0400, xive_dir,
+			    NULL, &xive_irq_debug_fops);
+	xive_eq_dir = debugfs_create_dir("eqs", xive_dir);
+	for_each_possible_cpu(cpu) {
+		snprintf(name, sizeof(name), "cpu%ld", cpu);
+		debugfs_create_file(name, 0400, xive_eq_dir, (void *)cpu,
+				    &xive_eq_debug_fops);
+	}
+	debugfs_create_bool("store-eoi", 0600, xive_dir, &xive_store_eoi);
+
+	if (xive_ops->debug_create)
+		xive_ops->debug_create(xive_dir);
+}
+#else
+static inline void xive_core_debugfs_create(void) { }
+#endif /* CONFIG_DEBUG_FS */
+
+int xive_core_debug_init(void)
+{
+	if (xive_enabled() && IS_ENABLED(CONFIG_DEBUG_FS))
+		xive_core_debugfs_create();
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
new file mode 100644
index 0000000000..f1c0fa6ece
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -0,0 +1,877 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2016,2017 IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "xive: " fmt
+
+#include <linux/types.h>
+#include <linux/irq.h>
+#include <linux/debugfs.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/cpumask.h>
+#include <linux/mm.h>
+#include <linux/kmemleak.h>
+
+#include <asm/machdep.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/opal.h>
+#include <asm/kvm_ppc.h>
+
+#include "xive-internal.h"
+
+
+static u32 xive_provision_size;
+static u32 *xive_provision_chips;
+static u32 xive_provision_chip_count;
+static u32 xive_queue_shift;
+static u32 xive_pool_vps = XIVE_INVALID_VP;
+static struct kmem_cache *xive_provision_cache;
+static bool xive_has_single_esc;
+bool xive_has_save_restore;
+
+int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
+{
+	__be64 flags, eoi_page, trig_page;
+	__be32 esb_shift, src_chip;
+	u64 opal_flags;
+	s64 rc;
+
+	memset(data, 0, sizeof(*data));
+
+	rc = opal_xive_get_irq_info(hw_irq, &flags, &eoi_page, &trig_page,
+				    &esb_shift, &src_chip);
+	if (rc) {
+		pr_err("opal_xive_get_irq_info(0x%x) returned %lld\n",
+		       hw_irq, rc);
+		return -EINVAL;
+	}
+
+	opal_flags = be64_to_cpu(flags);
+	if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI)
+		data->flags |= XIVE_IRQ_FLAG_STORE_EOI;
+	if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI2)
+		data->flags |= XIVE_IRQ_FLAG_STORE_EOI;
+	if (opal_flags & OPAL_XIVE_IRQ_LSI)
+		data->flags |= XIVE_IRQ_FLAG_LSI;
+	data->eoi_page = be64_to_cpu(eoi_page);
+	data->trig_page = be64_to_cpu(trig_page);
+	data->esb_shift = be32_to_cpu(esb_shift);
+	data->src_chip = be32_to_cpu(src_chip);
+
+	data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift);
+	if (!data->eoi_mmio) {
+		pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq);
+		return -ENOMEM;
+	}
+
+	data->hw_irq = hw_irq;
+
+	if (!data->trig_page)
+		return 0;
+	if (data->trig_page == data->eoi_page) {
+		data->trig_mmio = data->eoi_mmio;
+		return 0;
+	}
+
+	data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift);
+	if (!data->trig_mmio) {
+		pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq);
+		return -ENOMEM;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_populate_irq_data);
+
+int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
+{
+	s64 rc;
+
+	for (;;) {
+		rc = opal_xive_set_irq_config(hw_irq, target, prio, sw_irq);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(OPAL_BUSY_DELAY_MS);
+	}
+	return rc == 0 ? 0 : -ENXIO;
+}
+EXPORT_SYMBOL_GPL(xive_native_configure_irq);
+
+static int xive_native_get_irq_config(u32 hw_irq, u32 *target, u8 *prio,
+				      u32 *sw_irq)
+{
+	s64 rc;
+	__be64 vp;
+	__be32 lirq;
+
+	rc = opal_xive_get_irq_config(hw_irq, &vp, prio, &lirq);
+
+	*target = be64_to_cpu(vp);
+	*sw_irq = be32_to_cpu(lirq);
+
+	return rc == 0 ? 0 : -ENXIO;
+}
+
+#define vp_err(vp, fmt, ...) pr_err("VP[0x%x]: " fmt, vp, ##__VA_ARGS__)
+
+/* This can be called multiple time to change a queue configuration */
+int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
+				__be32 *qpage, u32 order, bool can_escalate)
+{
+	s64 rc = 0;
+	__be64 qeoi_page_be;
+	__be32 esc_irq_be;
+	u64 flags, qpage_phys;
+
+	/* If there's an actual queue page, clean it */
+	if (order) {
+		if (WARN_ON(!qpage))
+			return -EINVAL;
+		qpage_phys = __pa(qpage);
+	} else
+		qpage_phys = 0;
+
+	/* Initialize the rest of the fields */
+	q->msk = order ? ((1u << (order - 2)) - 1) : 0;
+	q->idx = 0;
+	q->toggle = 0;
+
+	rc = opal_xive_get_queue_info(vp_id, prio, NULL, NULL,
+				      &qeoi_page_be,
+				      &esc_irq_be,
+				      NULL);
+	if (rc) {
+		vp_err(vp_id, "Failed to get queue %d info : %lld\n", prio, rc);
+		rc = -EIO;
+		goto fail;
+	}
+	q->eoi_phys = be64_to_cpu(qeoi_page_be);
+
+	/* Default flags */
+	flags = OPAL_XIVE_EQ_ALWAYS_NOTIFY | OPAL_XIVE_EQ_ENABLED;
+
+	/* Escalation needed ? */
+	if (can_escalate) {
+		q->esc_irq = be32_to_cpu(esc_irq_be);
+		flags |= OPAL_XIVE_EQ_ESCALATE;
+	}
+
+	/* Configure and enable the queue in HW */
+	for (;;) {
+		rc = opal_xive_set_queue_info(vp_id, prio, qpage_phys, order, flags);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(OPAL_BUSY_DELAY_MS);
+	}
+	if (rc) {
+		vp_err(vp_id, "Failed to set queue %d info: %lld\n", prio, rc);
+		rc = -EIO;
+	} else {
+		/*
+		 * KVM code requires all of the above to be visible before
+		 * q->qpage is set due to how it manages IPI EOIs
+		 */
+		wmb();
+		q->qpage = qpage;
+	}
+fail:
+	return rc;
+}
+EXPORT_SYMBOL_GPL(xive_native_configure_queue);
+
+static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
+{
+	s64 rc;
+
+	/* Disable the queue in HW */
+	for (;;) {
+		rc = opal_xive_set_queue_info(vp_id, prio, 0, 0, 0);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(OPAL_BUSY_DELAY_MS);
+	}
+	if (rc)
+		vp_err(vp_id, "Failed to disable queue %d : %lld\n", prio, rc);
+}
+
+void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
+{
+	__xive_native_disable_queue(vp_id, q, prio);
+}
+EXPORT_SYMBOL_GPL(xive_native_disable_queue);
+
+static int xive_native_setup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
+{
+	struct xive_q *q = &xc->queue[prio];
+	__be32 *qpage;
+
+	qpage = xive_queue_page_alloc(cpu, xive_queue_shift);
+	if (IS_ERR(qpage))
+		return PTR_ERR(qpage);
+
+	return xive_native_configure_queue(get_hard_smp_processor_id(cpu),
+					   q, prio, qpage, xive_queue_shift, false);
+}
+
+static void xive_native_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
+{
+	struct xive_q *q = &xc->queue[prio];
+	unsigned int alloc_order;
+
+	/*
+	 * We use the variant with no iounmap as this is called on exec
+	 * from an IPI and iounmap isn't safe
+	 */
+	__xive_native_disable_queue(get_hard_smp_processor_id(cpu), q, prio);
+	alloc_order = xive_alloc_order(xive_queue_shift);
+	free_pages((unsigned long)q->qpage, alloc_order);
+	q->qpage = NULL;
+}
+
+static bool xive_native_match(struct device_node *node)
+{
+	return of_device_is_compatible(node, "ibm,opal-xive-vc");
+}
+
+static s64 opal_xive_allocate_irq(u32 chip_id)
+{
+	s64 irq = opal_xive_allocate_irq_raw(chip_id);
+
+	/*
+	 * Old versions of skiboot can incorrectly return 0xffffffff to
+	 * indicate no space, fix it up here.
+	 */
+	return irq == 0xffffffff ? OPAL_RESOURCE : irq;
+}
+
+#ifdef CONFIG_SMP
+static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
+{
+	s64 irq;
+
+	/* Allocate an IPI and populate info about it */
+	for (;;) {
+		irq = opal_xive_allocate_irq(xc->chip_id);
+		if (irq == OPAL_BUSY) {
+			msleep(OPAL_BUSY_DELAY_MS);
+			continue;
+		}
+		if (irq < 0) {
+			pr_err("Failed to allocate IPI on CPU %d\n", cpu);
+			return -ENXIO;
+		}
+		xc->hw_ipi = irq;
+		break;
+	}
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+u32 xive_native_alloc_irq_on_chip(u32 chip_id)
+{
+	s64 rc;
+
+	for (;;) {
+		rc = opal_xive_allocate_irq(chip_id);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(OPAL_BUSY_DELAY_MS);
+	}
+	if (rc < 0)
+		return 0;
+	return rc;
+}
+EXPORT_SYMBOL_GPL(xive_native_alloc_irq_on_chip);
+
+void xive_native_free_irq(u32 irq)
+{
+	for (;;) {
+		s64 rc = opal_xive_free_irq(irq);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(OPAL_BUSY_DELAY_MS);
+	}
+}
+EXPORT_SYMBOL_GPL(xive_native_free_irq);
+
+#ifdef CONFIG_SMP
+static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc)
+{
+	s64 rc;
+
+	/* Free the IPI */
+	if (xc->hw_ipi == XIVE_BAD_IRQ)
+		return;
+	for (;;) {
+		rc = opal_xive_free_irq(xc->hw_ipi);
+		if (rc == OPAL_BUSY) {
+			msleep(OPAL_BUSY_DELAY_MS);
+			continue;
+		}
+		xc->hw_ipi = XIVE_BAD_IRQ;
+		break;
+	}
+}
+#endif /* CONFIG_SMP */
+
+static void xive_native_shutdown(void)
+{
+	/* Switch the XIVE to emulation mode */
+	opal_xive_reset(OPAL_XIVE_MODE_EMU);
+}
+
+/*
+ * Perform an "ack" cycle on the current thread, thus
+ * grabbing the pending active priorities and updating
+ * the CPPR to the most favored one.
+ */
+static void xive_native_update_pending(struct xive_cpu *xc)
+{
+	u8 he, cppr;
+	u16 ack;
+
+	/* Perform the acknowledge hypervisor to register cycle */
+	ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_HV_REG));
+
+	/* Synchronize subsequent queue accesses */
+	mb();
+
+	/*
+	 * Grab the CPPR and the "HE" field which indicates the source
+	 * of the hypervisor interrupt (if any)
+	 */
+	cppr = ack & 0xff;
+	he = (ack >> 8) >> 6;
+	switch(he) {
+	case TM_QW3_NSR_HE_NONE: /* Nothing to see here */
+		break;
+	case TM_QW3_NSR_HE_PHYS: /* Physical thread interrupt */
+		if (cppr == 0xff)
+			return;
+		/* Mark the priority pending */
+		xc->pending_prio |= 1 << cppr;
+
+		/*
+		 * A new interrupt should never have a CPPR less favored
+		 * than our current one.
+		 */
+		if (cppr >= xc->cppr)
+			pr_err("CPU %d odd ack CPPR, got %d at %d\n",
+			       smp_processor_id(), cppr, xc->cppr);
+
+		/* Update our idea of what the CPPR is */
+		xc->cppr = cppr;
+		break;
+	case TM_QW3_NSR_HE_POOL: /* HV Pool interrupt (unused) */
+	case TM_QW3_NSR_HE_LSI:  /* Legacy FW LSI (unused) */
+		pr_err("CPU %d got unexpected interrupt type HE=%d\n",
+		       smp_processor_id(), he);
+		return;
+	}
+}
+
+static void xive_native_prepare_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+	xc->chip_id = cpu_to_chip_id(cpu);
+}
+
+static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+	s64 rc;
+	u32 vp;
+	__be64 vp_cam_be;
+	u64 vp_cam;
+
+	if (xive_pool_vps == XIVE_INVALID_VP)
+		return;
+
+	/* Check if pool VP already active, if it is, pull it */
+	if (in_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2) & TM_QW2W2_VP)
+		in_be64(xive_tima + TM_SPC_PULL_POOL_CTX);
+
+	/* Enable the pool VP */
+	vp = xive_pool_vps + cpu;
+	for (;;) {
+		rc = opal_xive_set_vp_info(vp, OPAL_XIVE_VP_ENABLED, 0);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(OPAL_BUSY_DELAY_MS);
+	}
+	if (rc) {
+		pr_err("Failed to enable pool VP on CPU %d\n", cpu);
+		return;
+	}
+
+	/* Grab it's CAM value */
+	rc = opal_xive_get_vp_info(vp, NULL, &vp_cam_be, NULL, NULL);
+	if (rc) {
+		pr_err("Failed to get pool VP info CPU %d\n", cpu);
+		return;
+	}
+	vp_cam = be64_to_cpu(vp_cam_be);
+
+	/* Push it on the CPU (set LSMFB to 0xff to skip backlog scan) */
+	out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD0, 0xff);
+	out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2, TM_QW2W2_VP | vp_cam);
+}
+
+static void xive_native_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+	s64 rc;
+	u32 vp;
+
+	if (xive_pool_vps == XIVE_INVALID_VP)
+		return;
+
+	/* Pull the pool VP from the CPU */
+	in_be64(xive_tima + TM_SPC_PULL_POOL_CTX);
+
+	/* Disable it */
+	vp = xive_pool_vps + cpu;
+	for (;;) {
+		rc = opal_xive_set_vp_info(vp, 0, 0);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(OPAL_BUSY_DELAY_MS);
+	}
+}
+
+void xive_native_sync_source(u32 hw_irq)
+{
+	opal_xive_sync(XIVE_SYNC_EAS, hw_irq);
+}
+EXPORT_SYMBOL_GPL(xive_native_sync_source);
+
+void xive_native_sync_queue(u32 hw_irq)
+{
+	opal_xive_sync(XIVE_SYNC_QUEUE, hw_irq);
+}
+EXPORT_SYMBOL_GPL(xive_native_sync_queue);
+
+#ifdef CONFIG_DEBUG_FS
+static int xive_native_debug_create(struct dentry *xive_dir)
+{
+	debugfs_create_bool("save-restore", 0600, xive_dir, &xive_has_save_restore);
+	return 0;
+}
+#endif
+
+static const struct xive_ops xive_native_ops = {
+	.populate_irq_data	= xive_native_populate_irq_data,
+	.configure_irq		= xive_native_configure_irq,
+	.get_irq_config		= xive_native_get_irq_config,
+	.setup_queue		= xive_native_setup_queue,
+	.cleanup_queue		= xive_native_cleanup_queue,
+	.match			= xive_native_match,
+	.shutdown		= xive_native_shutdown,
+	.update_pending		= xive_native_update_pending,
+	.prepare_cpu		= xive_native_prepare_cpu,
+	.setup_cpu		= xive_native_setup_cpu,
+	.teardown_cpu		= xive_native_teardown_cpu,
+	.sync_source		= xive_native_sync_source,
+#ifdef CONFIG_SMP
+	.get_ipi		= xive_native_get_ipi,
+	.put_ipi		= xive_native_put_ipi,
+#endif /* CONFIG_SMP */
+#ifdef CONFIG_DEBUG_FS
+	.debug_create		= xive_native_debug_create,
+#endif /* CONFIG_DEBUG_FS */
+	.name			= "native",
+};
+
+static bool __init xive_parse_provisioning(struct device_node *np)
+{
+	int rc;
+
+	if (of_property_read_u32(np, "ibm,xive-provision-page-size",
+				 &xive_provision_size) < 0)
+		return true;
+	rc = of_property_count_elems_of_size(np, "ibm,xive-provision-chips", 4);
+	if (rc < 0) {
+		pr_err("Error %d getting provision chips array\n", rc);
+		return false;
+	}
+	xive_provision_chip_count = rc;
+	if (rc == 0)
+		return true;
+
+	xive_provision_chips = kcalloc(4, xive_provision_chip_count,
+				       GFP_KERNEL);
+	if (WARN_ON(!xive_provision_chips))
+		return false;
+
+	rc = of_property_read_u32_array(np, "ibm,xive-provision-chips",
+					xive_provision_chips,
+					xive_provision_chip_count);
+	if (rc < 0) {
+		pr_err("Error %d reading provision chips array\n", rc);
+		return false;
+	}
+
+	xive_provision_cache = kmem_cache_create("xive-provision",
+						 xive_provision_size,
+						 xive_provision_size,
+						 0, NULL);
+	if (!xive_provision_cache) {
+		pr_err("Failed to allocate provision cache\n");
+		return false;
+	}
+	return true;
+}
+
+static void __init xive_native_setup_pools(void)
+{
+	/* Allocate a pool big enough */
+	pr_debug("Allocating VP block for pool size %u\n", nr_cpu_ids);
+
+	xive_pool_vps = xive_native_alloc_vp_block(nr_cpu_ids);
+	if (WARN_ON(xive_pool_vps == XIVE_INVALID_VP))
+		pr_err("Failed to allocate pool VP, KVM might not function\n");
+
+	pr_debug("Pool VPs allocated at 0x%x for %u max CPUs\n",
+		 xive_pool_vps, nr_cpu_ids);
+}
+
+u32 xive_native_default_eq_shift(void)
+{
+	return xive_queue_shift;
+}
+EXPORT_SYMBOL_GPL(xive_native_default_eq_shift);
+
+unsigned long xive_tima_os;
+EXPORT_SYMBOL_GPL(xive_tima_os);
+
+bool __init xive_native_init(void)
+{
+	struct device_node *np;
+	struct resource r;
+	void __iomem *tima;
+	struct property *prop;
+	u8 max_prio = 7;
+	const __be32 *p;
+	u32 val, cpu;
+	s64 rc;
+
+	if (xive_cmdline_disabled)
+		return false;
+
+	pr_devel("xive_native_init()\n");
+	np = of_find_compatible_node(NULL, NULL, "ibm,opal-xive-pe");
+	if (!np) {
+		pr_devel("not found !\n");
+		return false;
+	}
+	pr_devel("Found %pOF\n", np);
+
+	/* Resource 1 is HV window */
+	if (of_address_to_resource(np, 1, &r)) {
+		pr_err("Failed to get thread mgmnt area resource\n");
+		goto err_put;
+	}
+	tima = ioremap(r.start, resource_size(&r));
+	if (!tima) {
+		pr_err("Failed to map thread mgmnt area\n");
+		goto err_put;
+	}
+
+	/* Read number of priorities */
+	if (of_property_read_u32(np, "ibm,xive-#priorities", &val) == 0)
+		max_prio = val - 1;
+
+	/* Iterate the EQ sizes and pick one */
+	of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, p, val) {
+		xive_queue_shift = val;
+		if (val == PAGE_SHIFT)
+			break;
+	}
+
+	/* Do we support single escalation */
+	xive_has_single_esc = of_property_read_bool(np, "single-escalation-support");
+
+	xive_has_save_restore = of_property_read_bool(np, "vp-save-restore");
+
+	/* Configure Thread Management areas for KVM */
+	for_each_possible_cpu(cpu)
+		kvmppc_set_xive_tima(cpu, r.start, tima);
+
+	/* Resource 2 is OS window */
+	if (of_address_to_resource(np, 2, &r)) {
+		pr_err("Failed to get thread mgmnt area resource\n");
+		goto err_put;
+	}
+
+	xive_tima_os = r.start;
+
+	/* Grab size of provisioning pages */
+	xive_parse_provisioning(np);
+
+	/* Switch the XIVE to exploitation mode */
+	rc = opal_xive_reset(OPAL_XIVE_MODE_EXPL);
+	if (rc) {
+		pr_err("Switch to exploitation mode failed with error %lld\n", rc);
+		goto err_put;
+	}
+
+	/* Setup some dummy HV pool VPs */
+	xive_native_setup_pools();
+
+	/* Initialize XIVE core with our backend */
+	if (!xive_core_init(np, &xive_native_ops, tima, TM_QW3_HV_PHYS,
+			    max_prio)) {
+		opal_xive_reset(OPAL_XIVE_MODE_EMU);
+		goto err_put;
+	}
+	of_node_put(np);
+	pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10));
+	return true;
+
+err_put:
+	of_node_put(np);
+	return false;
+}
+
+static bool xive_native_provision_pages(void)
+{
+	u32 i;
+	void *p;
+
+	for (i = 0; i < xive_provision_chip_count; i++) {
+		u32 chip = xive_provision_chips[i];
+
+		/*
+		 * XXX TODO: Try to make the allocation local to the node where
+		 * the chip resides.
+		 */
+		p = kmem_cache_alloc(xive_provision_cache, GFP_KERNEL);
+		if (!p) {
+			pr_err("Failed to allocate provisioning page\n");
+			return false;
+		}
+		kmemleak_ignore(p);
+		opal_xive_donate_page(chip, __pa(p));
+	}
+	return true;
+}
+
+u32 xive_native_alloc_vp_block(u32 max_vcpus)
+{
+	s64 rc;
+	u32 order;
+
+	order = fls(max_vcpus) - 1;
+	if (max_vcpus > (1 << order))
+		order++;
+
+	pr_debug("VP block alloc, for max VCPUs %d use order %d\n",
+		 max_vcpus, order);
+
+	for (;;) {
+		rc = opal_xive_alloc_vp_block(order);
+		switch (rc) {
+		case OPAL_BUSY:
+			msleep(OPAL_BUSY_DELAY_MS);
+			break;
+		case OPAL_XIVE_PROVISIONING:
+			if (!xive_native_provision_pages())
+				return XIVE_INVALID_VP;
+			break;
+		default:
+			if (rc < 0) {
+				pr_err("OPAL failed to allocate VCPUs order %d, err %lld\n",
+				       order, rc);
+				return XIVE_INVALID_VP;
+			}
+			return rc;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(xive_native_alloc_vp_block);
+
+void xive_native_free_vp_block(u32 vp_base)
+{
+	s64 rc;
+
+	if (vp_base == XIVE_INVALID_VP)
+		return;
+
+	rc = opal_xive_free_vp_block(vp_base);
+	if (rc < 0)
+		pr_warn("OPAL error %lld freeing VP block\n", rc);
+}
+EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
+
+int xive_native_enable_vp(u32 vp_id, bool single_escalation)
+{
+	s64 rc;
+	u64 flags = OPAL_XIVE_VP_ENABLED;
+
+	if (single_escalation)
+		flags |= OPAL_XIVE_VP_SINGLE_ESCALATION;
+	for (;;) {
+		rc = opal_xive_set_vp_info(vp_id, flags, 0);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(OPAL_BUSY_DELAY_MS);
+	}
+	if (rc)
+		vp_err(vp_id, "Failed to enable VP : %lld\n", rc);
+	return rc ? -EIO : 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_enable_vp);
+
+int xive_native_disable_vp(u32 vp_id)
+{
+	s64 rc;
+
+	for (;;) {
+		rc = opal_xive_set_vp_info(vp_id, 0, 0);
+		if (rc != OPAL_BUSY)
+			break;
+		msleep(OPAL_BUSY_DELAY_MS);
+	}
+	if (rc)
+		vp_err(vp_id, "Failed to disable VP : %lld\n", rc);
+	return rc ? -EIO : 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_disable_vp);
+
+int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
+{
+	__be64 vp_cam_be;
+	__be32 vp_chip_id_be;
+	s64 rc;
+
+	rc = opal_xive_get_vp_info(vp_id, NULL, &vp_cam_be, NULL, &vp_chip_id_be);
+	if (rc) {
+		vp_err(vp_id, "Failed to get VP info : %lld\n", rc);
+		return -EIO;
+	}
+	*out_cam_id = be64_to_cpu(vp_cam_be) & 0xffffffffu;
+	*out_chip_id = be32_to_cpu(vp_chip_id_be);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
+
+bool xive_native_has_single_escalation(void)
+{
+	return xive_has_single_esc;
+}
+EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
+
+bool xive_native_has_save_restore(void)
+{
+	return xive_has_save_restore;
+}
+EXPORT_SYMBOL_GPL(xive_native_has_save_restore);
+
+int xive_native_get_queue_info(u32 vp_id, u32 prio,
+			       u64 *out_qpage,
+			       u64 *out_qsize,
+			       u64 *out_qeoi_page,
+			       u32 *out_escalate_irq,
+			       u64 *out_qflags)
+{
+	__be64 qpage;
+	__be64 qsize;
+	__be64 qeoi_page;
+	__be32 escalate_irq;
+	__be64 qflags;
+	s64 rc;
+
+	rc = opal_xive_get_queue_info(vp_id, prio, &qpage, &qsize,
+				      &qeoi_page, &escalate_irq, &qflags);
+	if (rc) {
+		vp_err(vp_id, "failed to get queue %d info : %lld\n", prio, rc);
+		return -EIO;
+	}
+
+	if (out_qpage)
+		*out_qpage = be64_to_cpu(qpage);
+	if (out_qsize)
+		*out_qsize = be64_to_cpu(qsize);
+	if (out_qeoi_page)
+		*out_qeoi_page = be64_to_cpu(qeoi_page);
+	if (out_escalate_irq)
+		*out_escalate_irq = be32_to_cpu(escalate_irq);
+	if (out_qflags)
+		*out_qflags = be64_to_cpu(qflags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_queue_info);
+
+int xive_native_get_queue_state(u32 vp_id, u32 prio, u32 *qtoggle, u32 *qindex)
+{
+	__be32 opal_qtoggle;
+	__be32 opal_qindex;
+	s64 rc;
+
+	rc = opal_xive_get_queue_state(vp_id, prio, &opal_qtoggle,
+				       &opal_qindex);
+	if (rc) {
+		vp_err(vp_id, "failed to get queue %d state : %lld\n", prio, rc);
+		return -EIO;
+	}
+
+	if (qtoggle)
+		*qtoggle = be32_to_cpu(opal_qtoggle);
+	if (qindex)
+		*qindex = be32_to_cpu(opal_qindex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_queue_state);
+
+int xive_native_set_queue_state(u32 vp_id, u32 prio, u32 qtoggle, u32 qindex)
+{
+	s64 rc;
+
+	rc = opal_xive_set_queue_state(vp_id, prio, qtoggle, qindex);
+	if (rc) {
+		vp_err(vp_id, "failed to set queue %d state : %lld\n", prio, rc);
+		return -EIO;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_set_queue_state);
+
+bool xive_native_has_queue_state_support(void)
+{
+	return opal_check_token(OPAL_XIVE_GET_QUEUE_STATE) &&
+		opal_check_token(OPAL_XIVE_SET_QUEUE_STATE);
+}
+EXPORT_SYMBOL_GPL(xive_native_has_queue_state_support);
+
+int xive_native_get_vp_state(u32 vp_id, u64 *out_state)
+{
+	__be64 state;
+	s64 rc;
+
+	rc = opal_xive_get_vp_state(vp_id, &state);
+	if (rc) {
+		vp_err(vp_id, "failed to get vp state : %lld\n", rc);
+		return -EIO;
+	}
+
+	if (out_state)
+		*out_state = be64_to_cpu(state);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_vp_state);
+
+machine_arch_initcall(powernv, xive_core_debug_init);
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
new file mode 100644
index 0000000000..e454192643
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -0,0 +1,892 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2016,2017 IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "xive: " fmt
+
+#include <linux/types.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/bitmap.h>
+#include <linux/cpumask.h>
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/libfdt.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/hvcall.h>
+#include <asm/svm.h>
+#include <asm/ultravisor.h>
+
+#include "xive-internal.h"
+
+static u32 xive_queue_shift;
+
+struct xive_irq_bitmap {
+	unsigned long		*bitmap;
+	unsigned int		base;
+	unsigned int		count;
+	spinlock_t		lock;
+	struct list_head	list;
+};
+
+static LIST_HEAD(xive_irq_bitmaps);
+
+static int __init xive_irq_bitmap_add(int base, int count)
+{
+	struct xive_irq_bitmap *xibm;
+
+	xibm = kzalloc(sizeof(*xibm), GFP_KERNEL);
+	if (!xibm)
+		return -ENOMEM;
+
+	spin_lock_init(&xibm->lock);
+	xibm->base = base;
+	xibm->count = count;
+	xibm->bitmap = bitmap_zalloc(xibm->count, GFP_KERNEL);
+	if (!xibm->bitmap) {
+		kfree(xibm);
+		return -ENOMEM;
+	}
+	list_add(&xibm->list, &xive_irq_bitmaps);
+
+	pr_info("Using IRQ range [%x-%x]", xibm->base,
+		xibm->base + xibm->count - 1);
+	return 0;
+}
+
+static void xive_irq_bitmap_remove_all(void)
+{
+	struct xive_irq_bitmap *xibm, *tmp;
+
+	list_for_each_entry_safe(xibm, tmp, &xive_irq_bitmaps, list) {
+		list_del(&xibm->list);
+		bitmap_free(xibm->bitmap);
+		kfree(xibm);
+	}
+}
+
+static int __xive_irq_bitmap_alloc(struct xive_irq_bitmap *xibm)
+{
+	int irq;
+
+	irq = find_first_zero_bit(xibm->bitmap, xibm->count);
+	if (irq != xibm->count) {
+		set_bit(irq, xibm->bitmap);
+		irq += xibm->base;
+	} else {
+		irq = -ENOMEM;
+	}
+
+	return irq;
+}
+
+static int xive_irq_bitmap_alloc(void)
+{
+	struct xive_irq_bitmap *xibm;
+	unsigned long flags;
+	int irq = -ENOENT;
+
+	list_for_each_entry(xibm, &xive_irq_bitmaps, list) {
+		spin_lock_irqsave(&xibm->lock, flags);
+		irq = __xive_irq_bitmap_alloc(xibm);
+		spin_unlock_irqrestore(&xibm->lock, flags);
+		if (irq >= 0)
+			break;
+	}
+	return irq;
+}
+
+static void xive_irq_bitmap_free(int irq)
+{
+	unsigned long flags;
+	struct xive_irq_bitmap *xibm;
+
+	list_for_each_entry(xibm, &xive_irq_bitmaps, list) {
+		if ((irq >= xibm->base) && (irq < xibm->base + xibm->count)) {
+			spin_lock_irqsave(&xibm->lock, flags);
+			clear_bit(irq - xibm->base, xibm->bitmap);
+			spin_unlock_irqrestore(&xibm->lock, flags);
+			break;
+		}
+	}
+}
+
+
+/* Based on the similar routines in RTAS */
+static unsigned int plpar_busy_delay_time(long rc)
+{
+	unsigned int ms = 0;
+
+	if (H_IS_LONG_BUSY(rc)) {
+		ms = get_longbusy_msecs(rc);
+	} else if (rc == H_BUSY) {
+		ms = 10; /* seems appropriate for XIVE hcalls */
+	}
+
+	return ms;
+}
+
+static unsigned int plpar_busy_delay(int rc)
+{
+	unsigned int ms;
+
+	ms = plpar_busy_delay_time(rc);
+	if (ms)
+		mdelay(ms);
+
+	return ms;
+}
+
+/*
+ * Note: this call has a partition wide scope and can take a while to
+ * complete. If it returns H_LONG_BUSY_* it should be retried
+ * periodically.
+ */
+static long plpar_int_reset(unsigned long flags)
+{
+	long rc;
+
+	do {
+		rc = plpar_hcall_norets(H_INT_RESET, flags);
+	} while (plpar_busy_delay(rc));
+
+	if (rc)
+		pr_err("H_INT_RESET failed %ld\n", rc);
+
+	return rc;
+}
+
+static long plpar_int_get_source_info(unsigned long flags,
+				      unsigned long lisn,
+				      unsigned long *src_flags,
+				      unsigned long *eoi_page,
+				      unsigned long *trig_page,
+				      unsigned long *esb_shift)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	do {
+		rc = plpar_hcall(H_INT_GET_SOURCE_INFO, retbuf, flags, lisn);
+	} while (plpar_busy_delay(rc));
+
+	if (rc) {
+		pr_err("H_INT_GET_SOURCE_INFO lisn=0x%lx failed %ld\n", lisn, rc);
+		return rc;
+	}
+
+	*src_flags = retbuf[0];
+	*eoi_page  = retbuf[1];
+	*trig_page = retbuf[2];
+	*esb_shift = retbuf[3];
+
+	pr_debug("H_INT_GET_SOURCE_INFO lisn=0x%lx flags=0x%lx eoi=0x%lx trig=0x%lx shift=0x%lx\n",
+		 lisn, retbuf[0], retbuf[1], retbuf[2], retbuf[3]);
+
+	return 0;
+}
+
+#define XIVE_SRC_SET_EISN (1ull << (63 - 62))
+#define XIVE_SRC_MASK     (1ull << (63 - 63)) /* unused */
+
+static long plpar_int_set_source_config(unsigned long flags,
+					unsigned long lisn,
+					unsigned long target,
+					unsigned long prio,
+					unsigned long sw_irq)
+{
+	long rc;
+
+
+	pr_debug("H_INT_SET_SOURCE_CONFIG flags=0x%lx lisn=0x%lx target=%ld prio=%ld sw_irq=%ld\n",
+		 flags, lisn, target, prio, sw_irq);
+
+
+	do {
+		rc = plpar_hcall_norets(H_INT_SET_SOURCE_CONFIG, flags, lisn,
+					target, prio, sw_irq);
+	} while (plpar_busy_delay(rc));
+
+	if (rc) {
+		pr_err("H_INT_SET_SOURCE_CONFIG lisn=0x%lx target=%ld prio=%ld failed %ld\n",
+		       lisn, target, prio, rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+static long plpar_int_get_source_config(unsigned long flags,
+					unsigned long lisn,
+					unsigned long *target,
+					unsigned long *prio,
+					unsigned long *sw_irq)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	pr_debug("H_INT_GET_SOURCE_CONFIG flags=0x%lx lisn=0x%lx\n", flags, lisn);
+
+	do {
+		rc = plpar_hcall(H_INT_GET_SOURCE_CONFIG, retbuf, flags, lisn,
+				 target, prio, sw_irq);
+	} while (plpar_busy_delay(rc));
+
+	if (rc) {
+		pr_err("H_INT_GET_SOURCE_CONFIG lisn=0x%lx failed %ld\n",
+		       lisn, rc);
+		return rc;
+	}
+
+	*target = retbuf[0];
+	*prio   = retbuf[1];
+	*sw_irq = retbuf[2];
+
+	pr_debug("H_INT_GET_SOURCE_CONFIG target=%ld prio=%ld sw_irq=%ld\n",
+		 retbuf[0], retbuf[1], retbuf[2]);
+
+	return 0;
+}
+
+static long plpar_int_get_queue_info(unsigned long flags,
+				     unsigned long target,
+				     unsigned long priority,
+				     unsigned long *esn_page,
+				     unsigned long *esn_size)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	do {
+		rc = plpar_hcall(H_INT_GET_QUEUE_INFO, retbuf, flags, target,
+				 priority);
+	} while (plpar_busy_delay(rc));
+
+	if (rc) {
+		pr_err("H_INT_GET_QUEUE_INFO cpu=%ld prio=%ld failed %ld\n",
+		       target, priority, rc);
+		return rc;
+	}
+
+	*esn_page = retbuf[0];
+	*esn_size = retbuf[1];
+
+	pr_debug("H_INT_GET_QUEUE_INFO cpu=%ld prio=%ld page=0x%lx size=0x%lx\n",
+		 target, priority, retbuf[0], retbuf[1]);
+
+	return 0;
+}
+
+#define XIVE_EQ_ALWAYS_NOTIFY (1ull << (63 - 63))
+
+static long plpar_int_set_queue_config(unsigned long flags,
+				       unsigned long target,
+				       unsigned long priority,
+				       unsigned long qpage,
+				       unsigned long qsize)
+{
+	long rc;
+
+	pr_debug("H_INT_SET_QUEUE_CONFIG flags=0x%lx target=%ld priority=0x%lx qpage=0x%lx qsize=0x%lx\n",
+		 flags,  target, priority, qpage, qsize);
+
+	do {
+		rc = plpar_hcall_norets(H_INT_SET_QUEUE_CONFIG, flags, target,
+					priority, qpage, qsize);
+	} while (plpar_busy_delay(rc));
+
+	if (rc) {
+		pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=0x%lx returned %ld\n",
+		       target, priority, qpage, rc);
+		return  rc;
+	}
+
+	return 0;
+}
+
+static long plpar_int_sync(unsigned long flags, unsigned long lisn)
+{
+	long rc;
+
+	do {
+		rc = plpar_hcall_norets(H_INT_SYNC, flags, lisn);
+	} while (plpar_busy_delay(rc));
+
+	if (rc) {
+		pr_err("H_INT_SYNC lisn=0x%lx returned %ld\n", lisn, rc);
+		return  rc;
+	}
+
+	return 0;
+}
+
+#define XIVE_ESB_FLAG_STORE (1ull << (63 - 63))
+
+static long plpar_int_esb(unsigned long flags,
+			  unsigned long lisn,
+			  unsigned long offset,
+			  unsigned long in_data,
+			  unsigned long *out_data)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	pr_debug("H_INT_ESB flags=0x%lx lisn=0x%lx offset=0x%lx in=0x%lx\n",
+		 flags,  lisn, offset, in_data);
+
+	do {
+		rc = plpar_hcall(H_INT_ESB, retbuf, flags, lisn, offset,
+				 in_data);
+	} while (plpar_busy_delay(rc));
+
+	if (rc) {
+		pr_err("H_INT_ESB lisn=0x%lx offset=0x%lx returned %ld\n",
+		       lisn, offset, rc);
+		return  rc;
+	}
+
+	*out_data = retbuf[0];
+
+	return 0;
+}
+
+static u64 xive_spapr_esb_rw(u32 lisn, u32 offset, u64 data, bool write)
+{
+	unsigned long read_data;
+	long rc;
+
+	rc = plpar_int_esb(write ? XIVE_ESB_FLAG_STORE : 0,
+			   lisn, offset, data, &read_data);
+	if (rc)
+		return -1;
+
+	return write ? 0 : read_data;
+}
+
+#define XIVE_SRC_H_INT_ESB     (1ull << (63 - 60))
+#define XIVE_SRC_LSI           (1ull << (63 - 61))
+#define XIVE_SRC_TRIGGER       (1ull << (63 - 62))
+#define XIVE_SRC_STORE_EOI     (1ull << (63 - 63))
+
+static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
+{
+	long rc;
+	unsigned long flags;
+	unsigned long eoi_page;
+	unsigned long trig_page;
+	unsigned long esb_shift;
+
+	memset(data, 0, sizeof(*data));
+
+	rc = plpar_int_get_source_info(0, hw_irq, &flags, &eoi_page, &trig_page,
+				       &esb_shift);
+	if (rc)
+		return  -EINVAL;
+
+	if (flags & XIVE_SRC_H_INT_ESB)
+		data->flags  |= XIVE_IRQ_FLAG_H_INT_ESB;
+	if (flags & XIVE_SRC_STORE_EOI)
+		data->flags  |= XIVE_IRQ_FLAG_STORE_EOI;
+	if (flags & XIVE_SRC_LSI)
+		data->flags  |= XIVE_IRQ_FLAG_LSI;
+	data->eoi_page  = eoi_page;
+	data->esb_shift = esb_shift;
+	data->trig_page = trig_page;
+
+	data->hw_irq = hw_irq;
+
+	/*
+	 * No chip-id for the sPAPR backend. This has an impact how we
+	 * pick a target. See xive_pick_irq_target().
+	 */
+	data->src_chip = XIVE_INVALID_CHIP_ID;
+
+	/*
+	 * When the H_INT_ESB flag is set, the H_INT_ESB hcall should
+	 * be used for interrupt management. Skip the remapping of the
+	 * ESB pages which are not available.
+	 */
+	if (data->flags & XIVE_IRQ_FLAG_H_INT_ESB)
+		return 0;
+
+	data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift);
+	if (!data->eoi_mmio) {
+		pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq);
+		return -ENOMEM;
+	}
+
+	/* Full function page supports trigger */
+	if (flags & XIVE_SRC_TRIGGER) {
+		data->trig_mmio = data->eoi_mmio;
+		return 0;
+	}
+
+	data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift);
+	if (!data->trig_mmio) {
+		iounmap(data->eoi_mmio);
+		pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static int xive_spapr_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
+{
+	long rc;
+
+	rc = plpar_int_set_source_config(XIVE_SRC_SET_EISN, hw_irq, target,
+					 prio, sw_irq);
+
+	return rc == 0 ? 0 : -ENXIO;
+}
+
+static int xive_spapr_get_irq_config(u32 hw_irq, u32 *target, u8 *prio,
+				     u32 *sw_irq)
+{
+	long rc;
+	unsigned long h_target;
+	unsigned long h_prio;
+	unsigned long h_sw_irq;
+
+	rc = plpar_int_get_source_config(0, hw_irq, &h_target, &h_prio,
+					 &h_sw_irq);
+
+	*target = h_target;
+	*prio = h_prio;
+	*sw_irq = h_sw_irq;
+
+	return rc == 0 ? 0 : -ENXIO;
+}
+
+/* This can be called multiple time to change a queue configuration */
+static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
+				   __be32 *qpage, u32 order)
+{
+	s64 rc = 0;
+	unsigned long esn_page;
+	unsigned long esn_size;
+	u64 flags, qpage_phys;
+
+	/* If there's an actual queue page, clean it */
+	if (order) {
+		if (WARN_ON(!qpage))
+			return -EINVAL;
+		qpage_phys = __pa(qpage);
+	} else {
+		qpage_phys = 0;
+	}
+
+	/* Initialize the rest of the fields */
+	q->msk = order ? ((1u << (order - 2)) - 1) : 0;
+	q->idx = 0;
+	q->toggle = 0;
+
+	rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size);
+	if (rc) {
+		pr_err("Error %lld getting queue info CPU %d prio %d\n", rc,
+		       target, prio);
+		rc = -EIO;
+		goto fail;
+	}
+
+	/* TODO: add support for the notification page */
+	q->eoi_phys = esn_page;
+
+	/* Default is to always notify */
+	flags = XIVE_EQ_ALWAYS_NOTIFY;
+
+	/* Configure and enable the queue in HW */
+	rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order);
+	if (rc) {
+		pr_err("Error %lld setting queue for CPU %d prio %d\n", rc,
+		       target, prio);
+		rc = -EIO;
+	} else {
+		q->qpage = qpage;
+		if (is_secure_guest())
+			uv_share_page(PHYS_PFN(qpage_phys),
+					1 << xive_alloc_order(order));
+	}
+fail:
+	return rc;
+}
+
+static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc,
+				  u8 prio)
+{
+	struct xive_q *q = &xc->queue[prio];
+	__be32 *qpage;
+
+	qpage = xive_queue_page_alloc(cpu, xive_queue_shift);
+	if (IS_ERR(qpage))
+		return PTR_ERR(qpage);
+
+	return xive_spapr_configure_queue(get_hard_smp_processor_id(cpu),
+					  q, prio, qpage, xive_queue_shift);
+}
+
+static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
+				  u8 prio)
+{
+	struct xive_q *q = &xc->queue[prio];
+	unsigned int alloc_order;
+	long rc;
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+
+	rc = plpar_int_set_queue_config(0, hw_cpu, prio, 0, 0);
+	if (rc)
+		pr_err("Error %ld setting queue for CPU %d prio %d\n", rc,
+		       hw_cpu, prio);
+
+	alloc_order = xive_alloc_order(xive_queue_shift);
+	if (is_secure_guest())
+		uv_unshare_page(PHYS_PFN(__pa(q->qpage)), 1 << alloc_order);
+	free_pages((unsigned long)q->qpage, alloc_order);
+	q->qpage = NULL;
+}
+
+static bool xive_spapr_match(struct device_node *node)
+{
+	/* Ignore cascaded controllers for the moment */
+	return true;
+}
+
+#ifdef CONFIG_SMP
+static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc)
+{
+	int irq = xive_irq_bitmap_alloc();
+
+	if (irq < 0) {
+		pr_err("Failed to allocate IPI on CPU %d\n", cpu);
+		return -ENXIO;
+	}
+
+	xc->hw_ipi = irq;
+	return 0;
+}
+
+static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc)
+{
+	if (xc->hw_ipi == XIVE_BAD_IRQ)
+		return;
+
+	xive_irq_bitmap_free(xc->hw_ipi);
+	xc->hw_ipi = XIVE_BAD_IRQ;
+}
+#endif /* CONFIG_SMP */
+
+static void xive_spapr_shutdown(void)
+{
+	plpar_int_reset(0);
+}
+
+/*
+ * Perform an "ack" cycle on the current thread. Grab the pending
+ * active priorities and update the CPPR to the most favored one.
+ */
+static void xive_spapr_update_pending(struct xive_cpu *xc)
+{
+	u8 nsr, cppr;
+	u16 ack;
+
+	/*
+	 * Perform the "Acknowledge O/S to Register" cycle.
+	 *
+	 * Let's speedup the access to the TIMA using the raw I/O
+	 * accessor as we don't need the synchronisation routine of
+	 * the higher level ones
+	 */
+	ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_OS_REG));
+
+	/* Synchronize subsequent queue accesses */
+	mb();
+
+	/*
+	 * Grab the CPPR and the "NSR" field which indicates the source
+	 * of the interrupt (if any)
+	 */
+	cppr = ack & 0xff;
+	nsr = ack >> 8;
+
+	if (nsr & TM_QW1_NSR_EO) {
+		if (cppr == 0xff)
+			return;
+		/* Mark the priority pending */
+		xc->pending_prio |= 1 << cppr;
+
+		/*
+		 * A new interrupt should never have a CPPR less favored
+		 * than our current one.
+		 */
+		if (cppr >= xc->cppr)
+			pr_err("CPU %d odd ack CPPR, got %d at %d\n",
+			       smp_processor_id(), cppr, xc->cppr);
+
+		/* Update our idea of what the CPPR is */
+		xc->cppr = cppr;
+	}
+}
+
+static void xive_spapr_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+	/* Only some debug on the TIMA settings */
+	pr_debug("(HW value: %08x %08x %08x)\n",
+		 in_be32(xive_tima + TM_QW1_OS + TM_WORD0),
+		 in_be32(xive_tima + TM_QW1_OS + TM_WORD1),
+		 in_be32(xive_tima + TM_QW1_OS + TM_WORD2));
+}
+
+static void xive_spapr_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+	/* Nothing to do */;
+}
+
+static void xive_spapr_sync_source(u32 hw_irq)
+{
+	/* Specs are unclear on what this is doing */
+	plpar_int_sync(0, hw_irq);
+}
+
+static int xive_spapr_debug_show(struct seq_file *m, void *private)
+{
+	struct xive_irq_bitmap *xibm;
+	char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+
+	if (!buf)
+		return -ENOMEM;
+
+	list_for_each_entry(xibm, &xive_irq_bitmaps, list) {
+		memset(buf, 0, PAGE_SIZE);
+		bitmap_print_to_pagebuf(true, buf, xibm->bitmap, xibm->count);
+		seq_printf(m, "bitmap #%d: %s", xibm->count, buf);
+	}
+	kfree(buf);
+
+	return 0;
+}
+
+static const struct xive_ops xive_spapr_ops = {
+	.populate_irq_data	= xive_spapr_populate_irq_data,
+	.configure_irq		= xive_spapr_configure_irq,
+	.get_irq_config		= xive_spapr_get_irq_config,
+	.setup_queue		= xive_spapr_setup_queue,
+	.cleanup_queue		= xive_spapr_cleanup_queue,
+	.match			= xive_spapr_match,
+	.shutdown		= xive_spapr_shutdown,
+	.update_pending		= xive_spapr_update_pending,
+	.setup_cpu		= xive_spapr_setup_cpu,
+	.teardown_cpu		= xive_spapr_teardown_cpu,
+	.sync_source		= xive_spapr_sync_source,
+	.esb_rw			= xive_spapr_esb_rw,
+#ifdef CONFIG_SMP
+	.get_ipi		= xive_spapr_get_ipi,
+	.put_ipi		= xive_spapr_put_ipi,
+	.debug_show		= xive_spapr_debug_show,
+#endif /* CONFIG_SMP */
+	.name			= "spapr",
+};
+
+/*
+ * get max priority from "/ibm,plat-res-int-priorities"
+ */
+static bool __init xive_get_max_prio(u8 *max_prio)
+{
+	struct device_node *rootdn;
+	const __be32 *reg;
+	u32 len;
+	int prio, found;
+
+	rootdn = of_find_node_by_path("/");
+	if (!rootdn) {
+		pr_err("not root node found !\n");
+		return false;
+	}
+
+	reg = of_get_property(rootdn, "ibm,plat-res-int-priorities", &len);
+	of_node_put(rootdn);
+	if (!reg) {
+		pr_err("Failed to read 'ibm,plat-res-int-priorities' property\n");
+		return false;
+	}
+
+	if (len % (2 * sizeof(u32)) != 0) {
+		pr_err("invalid 'ibm,plat-res-int-priorities' property\n");
+		return false;
+	}
+
+	/* HW supports priorities in the range [0-7] and 0xFF is a
+	 * wildcard priority used to mask. We scan the ranges reserved
+	 * by the hypervisor to find the lowest priority we can use.
+	 */
+	found = 0xFF;
+	for (prio = 0; prio < 8; prio++) {
+		int reserved = 0;
+		int i;
+
+		for (i = 0; i < len / (2 * sizeof(u32)); i++) {
+			int base  = be32_to_cpu(reg[2 * i]);
+			int range = be32_to_cpu(reg[2 * i + 1]);
+
+			if (prio >= base && prio < base + range)
+				reserved++;
+		}
+
+		if (!reserved)
+			found = prio;
+	}
+
+	if (found == 0xFF) {
+		pr_err("no valid priority found in 'ibm,plat-res-int-priorities'\n");
+		return false;
+	}
+
+	*max_prio = found;
+	return true;
+}
+
+static const u8 *__init get_vec5_feature(unsigned int index)
+{
+	unsigned long root, chosen;
+	int size;
+	const u8 *vec5;
+
+	root = of_get_flat_dt_root();
+	chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
+	if (chosen == -FDT_ERR_NOTFOUND)
+		return NULL;
+
+	vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
+	if (!vec5)
+		return NULL;
+
+	if (size <= index)
+		return NULL;
+
+	return vec5 + index;
+}
+
+static bool __init xive_spapr_disabled(void)
+{
+	const u8 *vec5_xive;
+
+	vec5_xive = get_vec5_feature(OV5_INDX(OV5_XIVE_SUPPORT));
+	if (vec5_xive) {
+		u8 val;
+
+		val = *vec5_xive & OV5_FEAT(OV5_XIVE_SUPPORT);
+		switch (val) {
+		case OV5_FEAT(OV5_XIVE_EITHER):
+		case OV5_FEAT(OV5_XIVE_LEGACY):
+			break;
+		case OV5_FEAT(OV5_XIVE_EXPLOIT):
+			/* Hypervisor only supports XIVE */
+			if (xive_cmdline_disabled)
+				pr_warn("WARNING: Ignoring cmdline option xive=off\n");
+			return false;
+		default:
+			pr_warn("%s: Unknown xive support option: 0x%x\n",
+				__func__, val);
+			break;
+		}
+	}
+
+	return xive_cmdline_disabled;
+}
+
+bool __init xive_spapr_init(void)
+{
+	struct device_node *np;
+	struct resource r;
+	void __iomem *tima;
+	struct property *prop;
+	u8 max_prio;
+	u32 val;
+	u32 len;
+	const __be32 *reg;
+	int i, err;
+
+	if (xive_spapr_disabled())
+		return false;
+
+	pr_devel("%s()\n", __func__);
+	np = of_find_compatible_node(NULL, NULL, "ibm,power-ivpe");
+	if (!np) {
+		pr_devel("not found !\n");
+		return false;
+	}
+	pr_devel("Found %s\n", np->full_name);
+
+	/* Resource 1 is the OS ring TIMA */
+	if (of_address_to_resource(np, 1, &r)) {
+		pr_err("Failed to get thread mgmnt area resource\n");
+		goto err_put;
+	}
+	tima = ioremap(r.start, resource_size(&r));
+	if (!tima) {
+		pr_err("Failed to map thread mgmnt area\n");
+		goto err_put;
+	}
+
+	if (!xive_get_max_prio(&max_prio))
+		goto err_unmap;
+
+	/* Feed the IRQ number allocator with the ranges given in the DT */
+	reg = of_get_property(np, "ibm,xive-lisn-ranges", &len);
+	if (!reg) {
+		pr_err("Failed to read 'ibm,xive-lisn-ranges' property\n");
+		goto err_unmap;
+	}
+
+	if (len % (2 * sizeof(u32)) != 0) {
+		pr_err("invalid 'ibm,xive-lisn-ranges' property\n");
+		goto err_unmap;
+	}
+
+	for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2) {
+		err = xive_irq_bitmap_add(be32_to_cpu(reg[0]),
+					  be32_to_cpu(reg[1]));
+		if (err < 0)
+			goto err_mem_free;
+	}
+
+	/* Iterate the EQ sizes and pick one */
+	of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, reg, val) {
+		xive_queue_shift = val;
+		if (val == PAGE_SHIFT)
+			break;
+	}
+
+	/* Initialize XIVE core with our backend */
+	if (!xive_core_init(np, &xive_spapr_ops, tima, TM_QW1_OS, max_prio))
+		goto err_mem_free;
+
+	of_node_put(np);
+	pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10));
+	return true;
+
+err_mem_free:
+	xive_irq_bitmap_remove_all();
+err_unmap:
+	iounmap(tima);
+err_put:
+	of_node_put(np);
+	return false;
+}
+
+machine_arch_initcall(pseries, xive_core_debug_init);
diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h
new file mode 100644
index 0000000000..fe6d95d54a
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/xive-internal.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2016,2017 IBM Corporation.
+ */
+#ifndef __XIVE_INTERNAL_H
+#define __XIVE_INTERNAL_H
+
+/*
+ * A "disabled" interrupt should never fire, to catch problems
+ * we set its logical number to this
+ */
+#define XIVE_BAD_IRQ		0x7fffffff
+#define XIVE_MAX_IRQ		(XIVE_BAD_IRQ - 1)
+
+/* Each CPU carry one of these with various per-CPU state */
+struct xive_cpu {
+#ifdef CONFIG_SMP
+	/* HW irq number and data of IPI */
+	u32 hw_ipi;
+	struct xive_irq_data ipi_data;
+#endif /* CONFIG_SMP */
+
+	int chip_id;
+
+	/* Queue datas. Only one is populated */
+#define XIVE_MAX_QUEUES	8
+	struct xive_q queue[XIVE_MAX_QUEUES];
+
+	/*
+	 * Pending mask. Each bit corresponds to a priority that
+	 * potentially has pending interrupts.
+	 */
+	u8 pending_prio;
+
+	/* Cache of HW CPPR */
+	u8 cppr;
+};
+
+/* Backend ops */
+struct xive_ops {
+	int	(*populate_irq_data)(u32 hw_irq, struct xive_irq_data *data);
+	int 	(*configure_irq)(u32 hw_irq, u32 target, u8 prio, u32 sw_irq);
+	int	(*get_irq_config)(u32 hw_irq, u32 *target, u8 *prio,
+				  u32 *sw_irq);
+	int	(*setup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio);
+	void	(*cleanup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio);
+	void	(*prepare_cpu)(unsigned int cpu, struct xive_cpu *xc);
+	void	(*setup_cpu)(unsigned int cpu, struct xive_cpu *xc);
+	void	(*teardown_cpu)(unsigned int cpu, struct xive_cpu *xc);
+	bool	(*match)(struct device_node *np);
+	void	(*shutdown)(void);
+
+	void	(*update_pending)(struct xive_cpu *xc);
+	void	(*sync_source)(u32 hw_irq);
+	u64	(*esb_rw)(u32 hw_irq, u32 offset, u64 data, bool write);
+#ifdef CONFIG_SMP
+	int	(*get_ipi)(unsigned int cpu, struct xive_cpu *xc);
+	void	(*put_ipi)(unsigned int cpu, struct xive_cpu *xc);
+#endif
+	int	(*debug_show)(struct seq_file *m, void *private);
+	int	(*debug_create)(struct dentry *xive_dir);
+	const char *name;
+};
+
+bool xive_core_init(struct device_node *np, const struct xive_ops *ops,
+		    void __iomem *area, u32 offset, u8 max_prio);
+__be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift);
+int xive_core_debug_init(void);
+
+static inline u32 xive_alloc_order(u32 queue_shift)
+{
+	return (queue_shift > PAGE_SHIFT) ? (queue_shift - PAGE_SHIFT) : 0;
+}
+
+extern bool xive_cmdline_disabled;
+extern bool xive_has_save_restore;
+
+#endif /*  __XIVE_INTERNAL_H */
diff --git a/arch/powerpc/tools/checkpatch.sh b/arch/powerpc/tools/checkpatch.sh
new file mode 100755
index 0000000000..91c04802ec
--- /dev/null
+++ b/arch/powerpc/tools/checkpatch.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+# Copyright 2018, Michael Ellerman, IBM Corporation.
+#
+# Wrapper around checkpatch that uses our preferred settings
+
+script_base=$(realpath $(dirname $0))
+
+exec $script_base/../../../scripts/checkpatch.pl \
+	--subjective \
+	--no-summary \
+	--show-types \
+	--ignore ARCH_INCLUDE_LINUX \
+	--ignore BIT_MACRO \
+	--ignore COMPARISON_TO_NULL \
+	--ignore EMAIL_SUBJECT \
+	--ignore FILE_PATH_CHANGES \
+	--ignore GLOBAL_INITIALISERS \
+	--ignore LINE_SPACING \
+	--ignore MULTIPLE_ASSIGNMENTS \
+	--ignore DT_SPLIT_BINDING_PATCH \
+	$@
diff --git a/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh b/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh
new file mode 100755
index 0000000000..0670690350
--- /dev/null
+++ b/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+set -o pipefail
+
+# To debug, uncomment the following line
+# set -x
+
+# Output from -fpatchable-function-entry can only vary on ppc64 elfv2, so this
+# should not be invoked for other targets. Therefore we can pass in -m64 and
+# -mabi explicitly, to take care of toolchains defaulting to other targets.
+
+# Test whether the compile option -fpatchable-function-entry exists and
+# generates appropriate code
+echo "int func() { return 0; }" | \
+    $* -m64 -mabi=elfv2 -S -x c -O2 -fpatchable-function-entry=2 - -o - 2> /dev/null | \
+    grep -q "__patchable_function_entries"
+
+# Test whether nops are generated after the local entry point
+echo "int x; int func() { return x; }" | \
+    $* -m64 -mabi=elfv2 -S -x c -O2 -fpatchable-function-entry=2 - -o - 2> /dev/null | \
+    awk 'BEGIN { RS = ";" } /\.localentry.*nop.*\n[[:space:]]*nop/ { print $0 }' | \
+    grep -q "func:"
+
+exit 0
diff --git a/arch/powerpc/tools/gcc-check-mprofile-kernel.sh b/arch/powerpc/tools/gcc-check-mprofile-kernel.sh
new file mode 100755
index 0000000000..a31a56016c
--- /dev/null
+++ b/arch/powerpc/tools/gcc-check-mprofile-kernel.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+set -o pipefail
+
+# To debug, uncomment the following line
+# set -x
+
+# -mprofile-kernel is only supported on 64-bit, so this should not be invoked
+# for 32-bit. We pass in -m64 explicitly, and -mbig-endian and -mlittle-endian
+# are passed in from Kconfig, which takes care of toolchains defaulting to
+# other targets.
+
+# Test whether the compile option -mprofile-kernel exists and generates
+# profiling code (ie. a call to _mcount()).
+echo "int func() { return 0; }" | \
+    $* -m64 -S -x c -O2 -p -mprofile-kernel - -o - \
+    2> /dev/null | grep -q "_mcount"
+
+# Test whether the notrace attribute correctly suppresses calls to _mcount().
+
+echo -e "#include <linux/compiler.h>\nnotrace int func() { return 0; }" | \
+    $* -m64 -S -x c -O2 -p -mprofile-kernel - -o - \
+    2> /dev/null | grep -q "_mcount" && \
+    exit 1
+
+exit 0
diff --git a/arch/powerpc/tools/head_check.sh b/arch/powerpc/tools/head_check.sh
new file mode 100644
index 0000000000..689907cda9
--- /dev/null
+++ b/arch/powerpc/tools/head_check.sh
@@ -0,0 +1,80 @@
+# Copyright © 2016 IBM Corporation
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version
+# 2 of the License, or (at your option) any later version.
+
+# This script checks the head of a vmlinux for linker stubs that
+# break our placement of fixed-location code for 64-bit.
+
+# based on relocs_check.pl
+# Copyright © 2009 IBM Corporation
+
+# NOTE!
+#
+# If the build dies here, it's likely code in head_64.S/exception-64*.S or
+# nearby, is branching to labels it can't reach directly, which results in the
+# linker inserting branch stubs. This can move code around in ways that break
+# the fixed section calculations (head-64.h). To debug this, disassemble the
+# vmlinux and look for branch stubs (long_branch, plt_branch, etc.) in the
+# fixed section region (0 - 0x8000ish). Check what code is calling those stubs,
+# and perhaps change so a direct branch can reach.
+#
+# A ".linker_stub_catch" section is used to catch some stubs generated by
+# early .text code, which tend to get placed at the start of the section.
+# If there are too many such stubs, they can overflow this section. Expanding
+# it may help (or reducing the number of stub branches).
+#
+# Linker stubs use the TOC pointer, so even if fixed section code could
+# tolerate them being inserted into head code, they can't be allowed in low
+# level entry code (boot, interrupt vectors, etc) until r2 is set up. This
+# could cause the kernel to die in early boot.
+
+# Allow for verbose output
+if [ "$V" = "1" ]; then
+	set -x
+fi
+
+if [ $# -lt 2 ]; then
+	echo "$0 [path to nm] [path to vmlinux]" 1>&2
+	exit 1
+fi
+
+# Have Kbuild supply the path to nm so we handle cross compilation.
+nm="$1"
+vmlinux="$2"
+
+# gcc-4.6-era toolchain make _stext an A (absolute) symbol rather than T
+$nm "$vmlinux" | grep -e " [TA] _stext$" -e " t start_first_256B$" -e " a text_start$" -e " t start_text$" > .tmp_symbols.txt
+
+
+vma=$(grep -e " [TA] _stext$" .tmp_symbols.txt | cut -d' ' -f1)
+
+expected_start_head_addr="$vma"
+
+start_head_addr=$(grep " t start_first_256B$" .tmp_symbols.txt | cut -d' ' -f1)
+
+if [ "$start_head_addr" != "$expected_start_head_addr" ]; then
+	echo "ERROR: head code starts at $start_head_addr, should be $expected_start_head_addr" 1>&2
+	echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option" 1>&2
+	echo "ERROR: see comments in arch/powerpc/tools/head_check.sh" 1>&2
+
+	exit 1
+fi
+
+top_vma=$(echo "$vma" | cut -d'0' -f1)
+
+expected_start_text_addr=$(grep " a text_start$" .tmp_symbols.txt | cut -d' ' -f1 | sed "s/^0/$top_vma/")
+
+start_text_addr=$(grep " t start_text$" .tmp_symbols.txt | cut -d' ' -f1)
+
+if [ "$start_text_addr" != "$expected_start_text_addr" ]; then
+	echo "ERROR: start_text address is $start_text_addr, should be $expected_start_text_addr" 1>&2
+	echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option" 1>&2
+	echo "ERROR: see comments in arch/powerpc/tools/head_check.sh" 1>&2
+
+	exit 1
+fi
+
+rm -f .tmp_symbols.txt
diff --git a/arch/powerpc/tools/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh
new file mode 100755
index 0000000000..6b350e7501
--- /dev/null
+++ b/arch/powerpc/tools/relocs_check.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+# Copyright © 2015 IBM Corporation
+
+
+# This script checks the relocations of a vmlinux for "suspicious"
+# relocations.
+
+# based on relocs_check.pl
+# Copyright © 2009 IBM Corporation
+
+if [ $# -lt 3 ]; then
+	echo "$0 [path to objdump] [path to nm] [path to vmlinux]" 1>&2
+	exit 1
+fi
+
+bad_relocs=$(
+${srctree}/scripts/relocs_check.sh "$@" |
+	# These relocations are okay
+	# On PPC64:
+	#	R_PPC64_RELATIVE, R_PPC64_NONE
+	# On PPC:
+	#	R_PPC_RELATIVE, R_PPC_ADDR16_HI,
+	#	R_PPC_ADDR16_HA,R_PPC_ADDR16_LO,
+	#	R_PPC_NONE
+	grep -F -w -v 'R_PPC64_RELATIVE
+R_PPC64_NONE
+R_PPC64_UADDR64
+R_PPC_ADDR16_LO
+R_PPC_ADDR16_HI
+R_PPC_ADDR16_HA
+R_PPC_RELATIVE
+R_PPC_NONE'
+)
+
+if [ -z "$bad_relocs" ]; then
+	exit 0
+fi
+
+num_bad=$(echo "$bad_relocs" | wc -l)
+echo "WARNING: $num_bad bad relocations"
+echo "$bad_relocs"
diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
new file mode 100755
index 0000000000..8301efee1e
--- /dev/null
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+# Copyright © 2016,2020 IBM Corporation
+#
+# This script checks the unrelocated code of a vmlinux for "suspicious"
+# branches to relocated code (head_64.S code).
+
+# Have Kbuild supply the path to objdump and nm so we handle cross compilation.
+objdump="$1"
+nm="$2"
+vmlinux="$3"
+
+kstart=0xc000000000000000
+
+end_intr=0x$($nm -p "$vmlinux" |
+	sed -E -n '/\s+[[:alpha:]]\s+__end_interrupts\s*$/{s///p;q}')
+if [ "$end_intr" = "0x" ]; then
+	exit 0
+fi
+
+# we know that there is a correct branch to
+# __start_initialization_multiplatform, so find its address
+# so we can exclude it.
+sim=0x$($nm -p "$vmlinux" |
+	sed -E -n '/\s+[[:alpha:]]\s+__start_initialization_multiplatform\s*$/{s///p;q}')
+
+$objdump -D --no-show-raw-insn --start-address="$kstart" --stop-address="$end_intr" "$vmlinux" |
+sed -E -n '
+# match lines that start with a kernel address
+/^c[0-9a-f]*:\s*b/ {
+	# drop branches via ctr or lr
+	/\<b.?.?(ct|l)r/d
+	# cope with some differences between Clang and GNU objdumps
+	s/\<bt.?\s*[[:digit:]]+,/beq/
+	s/\<bf.?\s*[[:digit:]]+,/bne/
+	# tidy up
+	s/\s0x/ /
+	s/://
+	# format for the loop below
+	s/^(\S+)\s+(\S+)\s+(\S+)\s*(\S*).*$/\1:\2:\3:\4/
+	# strip out condition registers
+	s/:cr[0-7],/:/
+	p
+}' | {
+
+all_good=true
+while IFS=: read -r from branch to sym; do
+	case "$to" in
+	c*)	to="0x$to"
+		;;
+	.+*)
+		to=${to#.+}
+		if [ "$branch" = 'b' ]; then
+			if (( to >= 0x2000000 )); then
+				to=$(( to - 0x4000000 ))
+			fi
+		elif (( to >= 0x8000 )); then
+			to=$(( to - 0x10000 ))
+		fi
+		printf -v to '0x%x' $(( "0x$from" + to ))
+		;;
+	*)	printf 'Unkown branch format\n'
+		;;
+	esac
+	if [ "$to" = "$sim" ]; then
+		continue
+	fi
+	if (( to > end_intr )); then
+		if $all_good; then
+			printf '%s\n' 'WARNING: Unrelocated relative branches'
+			all_good=false
+		fi
+		printf '%s %s-> %s %s\n' "$from" "$branch" "$to" "$sym"
+	fi
+done
+
+$all_good
+
+}
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
new file mode 100644
index 0000000000..682c7c0a6f
--- /dev/null
+++ b/arch/powerpc/xmon/Makefile
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for xmon
+
+GCOV_PROFILE := n
+KCOV_INSTRUMENT := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
+
+# Disable ftrace for the entire directory
+ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE)
+
+ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
+
+# Clang stores addresses on the stack causing the frame size to blow
+# out. See https://github.com/ClangBuiltLinux/linux/issues/252
+ccflags-$(CONFIG_CC_IS_CLANG) += -Wframe-larger-than=4096
+
+obj-y			+= xmon.o nonstdio.o spr_access.o xmon_bpts.o
+
+ifdef CONFIG_XMON_DISASSEMBLY
+obj-y			+= ppc-dis.o ppc-opc.o
+obj-$(CONFIG_SPU_BASE)	+= spu-dis.o spu-opc.o
+endif
diff --git a/arch/powerpc/xmon/ansidecl.h b/arch/powerpc/xmon/ansidecl.h
new file mode 100644
index 0000000000..17a85a52b6
--- /dev/null
+++ b/arch/powerpc/xmon/ansidecl.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* ANSI and traditional C compatibility macros
+   Copyright 1991, 1992 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+ */
+
+/* ANSI and traditional C compatibility macros
+
+   ANSI C is assumed if __STDC__ is #defined.
+
+   Macro	ANSI C definition	Traditional C definition
+   -----	---- - ----------	----------- - ----------
+   PTR		`void *'		`char *'
+   LONG_DOUBLE	`long double'		`double'
+   VOLATILE	`volatile'		`'
+   SIGNED	`signed'		`'
+   PTRCONST	`void *const'		`char *'
+   ANSI_PROTOTYPES  1			not defined
+
+   CONST is also defined, but is obsolete.  Just use const.
+
+   DEFUN (name, arglist, args)
+
+	Defines function NAME.
+
+	ARGLIST lists the arguments, separated by commas and enclosed in
+	parentheses.  ARGLIST becomes the argument list in traditional C.
+
+	ARGS list the arguments with their types.  It becomes a prototype in
+	ANSI C, and the type declarations in traditional C.  Arguments should
+	be separated with `AND'.  For functions with a variable number of
+	arguments, the last thing listed should be `DOTS'.
+
+   DEFUN_VOID (name)
+
+	Defines a function NAME, which takes no arguments.
+
+   obsolete --     EXFUN (name, (prototype))	-- obsolete.
+
+	Replaced by PARAMS.  Do not use; will disappear someday soon.
+	Was used in external function declarations.
+	In ANSI C it is `NAME PROTOTYPE' (so PROTOTYPE should be enclosed in
+	parentheses).  In traditional C it is `NAME()'.
+	For a function that takes no arguments, PROTOTYPE should be `(void)'.
+
+    PARAMS ((args))
+
+	We could use the EXFUN macro to handle prototype declarations, but
+	the name is misleading and the result is ugly.  So we just define a
+	simple macro to handle the parameter lists, as in:
+
+	      static int foo PARAMS ((int, char));
+
+	This produces:  `static int foo();' or `static int foo (int, char);'
+
+	EXFUN would have done it like this:
+
+	      static int EXFUN (foo, (int, char));
+
+	but the function is not external...and it's hard to visually parse
+	the function name out of the mess.   EXFUN should be considered
+	obsolete; new code should be written to use PARAMS.
+
+    For example:
+	extern int printf PARAMS ((CONST char *format DOTS));
+	int DEFUN(fprintf, (stream, format),
+		  FILE *stream AND CONST char *format DOTS) { ... }
+	void DEFUN_VOID(abort) { ... }
+*/
+
+#ifndef	_ANSIDECL_H
+
+#define	_ANSIDECL_H	1
+
+
+/* Every source file includes this file,
+   so they will all get the switch for lint.  */
+/* LINTLIBRARY */
+
+
+#if defined (__STDC__) || defined (_AIX) || (defined (__mips) && defined (_SYSTYPE_SVR4)) || defined(WIN32)
+/* All known AIX compilers implement these things (but don't always
+   define __STDC__).  The RISC/OS MIPS compiler defines these things
+   in SVR4 mode, but does not define __STDC__.  */
+
+#define	PTR		void *
+#define	PTRCONST	void *CONST
+#define	LONG_DOUBLE	long double
+
+#define	AND		,
+#define	NOARGS		void
+#define	CONST		const
+#define	VOLATILE	volatile
+#define	SIGNED		signed
+#define	DOTS		, ...
+
+#define	EXFUN(name, proto)		name proto
+#define	DEFUN(name, arglist, args)	name(args)
+#define	DEFUN_VOID(name)		name(void)
+
+#define PROTO(type, name, arglist)	type name arglist
+#define PARAMS(paramlist)		paramlist
+#define ANSI_PROTOTYPES			1
+
+#else	/* Not ANSI C.  */
+
+#define	PTR		char *
+#define	PTRCONST	PTR
+#define	LONG_DOUBLE	double
+
+#define	AND		;
+#define	NOARGS
+#define	CONST
+#ifndef const /* some systems define it in header files for non-ansi mode */
+#define	const
+#endif
+#define	VOLATILE
+#define	SIGNED
+#define	DOTS
+
+#define	EXFUN(name, proto)		name()
+#define	DEFUN(name, arglist, args)	name arglist args;
+#define	DEFUN_VOID(name)		name()
+#define PROTO(type, name, arglist) type name ()
+#define PARAMS(paramlist)		()
+
+#endif	/* ANSI C.  */
+
+#endif	/* ansidecl.h	*/
diff --git a/arch/powerpc/xmon/dis-asm.h b/arch/powerpc/xmon/dis-asm.h
new file mode 100644
index 0000000000..c4c982d640
--- /dev/null
+++ b/arch/powerpc/xmon/dis-asm.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _POWERPC_XMON_DIS_ASM_H
+#define _POWERPC_XMON_DIS_ASM_H
+/*
+ * Copyright (C) 2006 Michael Ellerman, IBM Corporation.
+ */
+
+extern void print_address (unsigned long memaddr);
+
+#ifdef CONFIG_XMON_DISASSEMBLY
+extern int print_insn_powerpc(unsigned long insn, unsigned long memaddr);
+extern int print_insn_spu(unsigned long insn, unsigned long memaddr);
+#else
+static inline int print_insn_powerpc(unsigned long insn, unsigned long memaddr)
+{
+	printf("%.8lx", insn);
+	return 0;
+}
+
+static inline int print_insn_spu(unsigned long insn, unsigned long memaddr)
+{
+	printf("%.8lx", insn);
+	return 0;
+}
+#endif
+
+#endif /* _POWERPC_XMON_DIS_ASM_H */
diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c
new file mode 100644
index 0000000000..9b0d85bff0
--- /dev/null
+++ b/arch/powerpc/xmon/nonstdio.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ */
+#include <linux/string.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include "nonstdio.h"
+
+static bool paginating, paginate_skipping;
+static unsigned long paginate_lpp; /* Lines Per Page */
+static unsigned long paginate_pos;
+
+void xmon_start_pagination(void)
+{
+	paginating = true;
+	paginate_skipping = false;
+	paginate_pos = 0;
+}
+
+void xmon_end_pagination(void)
+{
+	paginating = false;
+}
+
+void xmon_set_pagination_lpp(unsigned long lpp)
+{
+	paginate_lpp = lpp;
+}
+
+static int xmon_readchar(void)
+{
+	if (udbg_getc)
+		return udbg_getc();
+	return -1;
+}
+
+static int xmon_write(const char *ptr, int nb)
+{
+	int rv = 0;
+	const char *p = ptr, *q;
+	const char msg[] = "[Hit a key (a:all, q:truncate, any:next page)]";
+
+	if (nb <= 0)
+		return rv;
+
+	if (paginating && paginate_skipping)
+		return nb;
+
+	if (paginate_lpp) {
+		while (paginating && (q = strchr(p, '\n'))) {
+			rv += udbg_write(p, q - p + 1);
+			p = q + 1;
+			paginate_pos++;
+
+			if (paginate_pos >= paginate_lpp) {
+				udbg_write(msg, strlen(msg));
+
+				switch (xmon_readchar()) {
+				case 'a':
+					paginating = false;
+					break;
+				case 'q':
+					paginate_skipping = true;
+					break;
+				default:
+					/* nothing */
+					break;
+				}
+
+				paginate_pos = 0;
+				udbg_write("\r\n", 2);
+
+				if (paginate_skipping)
+					return nb;
+			}
+		}
+	}
+
+	return rv + udbg_write(p, nb - (p - ptr));
+}
+
+int xmon_putchar(int c)
+{
+	char ch = c;
+
+	if (c == '\n')
+		xmon_putchar('\r');
+	return xmon_write(&ch, 1) == 1? c: -1;
+}
+
+static char line[256];
+static char *lineptr;
+static int lineleft;
+
+static int xmon_getchar(void)
+{
+	int c;
+
+	if (lineleft == 0) {
+		lineptr = line;
+		for (;;) {
+			c = xmon_readchar();
+			if (c == -1 || c == 4)
+				break;
+			if (c == '\r' || c == '\n') {
+				*lineptr++ = '\n';
+				xmon_putchar('\n');
+				break;
+			}
+			switch (c) {
+			case 0177:
+			case '\b':
+				if (lineptr > line) {
+					xmon_putchar('\b');
+					xmon_putchar(' ');
+					xmon_putchar('\b');
+					--lineptr;
+				}
+				break;
+			case 'U' & 0x1F:
+				while (lineptr > line) {
+					xmon_putchar('\b');
+					xmon_putchar(' ');
+					xmon_putchar('\b');
+					--lineptr;
+				}
+				break;
+			default:
+				if (lineptr >= &line[sizeof(line) - 1])
+					xmon_putchar('\a');
+				else {
+					xmon_putchar(c);
+					*lineptr++ = c;
+				}
+			}
+		}
+		lineleft = lineptr - line;
+		lineptr = line;
+	}
+	if (lineleft == 0)
+		return -1;
+	--lineleft;
+	return *lineptr++;
+}
+
+char *xmon_gets(char *str, int nb)
+{
+	char *p;
+	int c;
+
+	for (p = str; p < str + nb - 1; ) {
+		c = xmon_getchar();
+		if (c == -1) {
+			if (p == str)
+				return NULL;
+			break;
+		}
+		*p++ = c;
+		if (c == '\n')
+			break;
+	}
+	*p = 0;
+	return str;
+}
+
+void xmon_printf(const char *format, ...)
+{
+	va_list args;
+	static char xmon_outbuf[1024];
+	int rc, n;
+
+	va_start(args, format);
+	n = vsnprintf(xmon_outbuf, sizeof(xmon_outbuf), format, args);
+	va_end(args);
+
+	rc = xmon_write(xmon_outbuf, n);
+
+	if (n && rc == 0) {
+		/* No udbg hooks, fallback to printk() - dangerous */
+		pr_cont("%s", xmon_outbuf);
+	}
+}
+
+void xmon_puts(const char *str)
+{
+	xmon_write(str, strlen(str));
+}
diff --git a/arch/powerpc/xmon/nonstdio.h b/arch/powerpc/xmon/nonstdio.h
new file mode 100644
index 0000000000..e8deac6c84
--- /dev/null
+++ b/arch/powerpc/xmon/nonstdio.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define EOF	(-1)
+
+extern void xmon_set_pagination_lpp(unsigned long lpp);
+extern void xmon_start_pagination(void);
+extern void xmon_end_pagination(void);
+extern int xmon_putchar(int c);
+extern void xmon_puts(const char *);
+extern char *xmon_gets(char *, int);
+extern __printf(1, 2) void xmon_printf(const char *fmt, ...);
+
+#define printf	xmon_printf
+#define putchar	xmon_putchar
diff --git a/arch/powerpc/xmon/ppc-dis.c b/arch/powerpc/xmon/ppc-dis.c
new file mode 100644
index 0000000000..75fa98221d
--- /dev/null
+++ b/arch/powerpc/xmon/ppc-dis.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* ppc-dis.c -- Disassemble PowerPC instructions
+   Copyright (C) 1994-2016 Free Software Foundation, Inc.
+   Written by Ian Lance Taylor, Cygnus Support
+
+This file is part of GDB, GAS, and the GNU binutils.
+
+ */
+
+#include <asm/cputable.h>
+#include <asm/cpu_has_feature.h>
+#include "nonstdio.h"
+#include "ansidecl.h"
+#include "ppc.h"
+#include "dis-asm.h"
+
+/* This file provides several disassembler functions, all of which use
+   the disassembler interface defined in dis-asm.h.  Several functions
+   are provided because this file handles disassembly for the PowerPC
+   in both big and little endian mode and also for the POWER (RS/6000)
+   chip.  */
+
+/* Extract the operand value from the PowerPC or POWER instruction.  */
+
+static long
+operand_value_powerpc (const struct powerpc_operand *operand,
+		       unsigned long insn, ppc_cpu_t dialect)
+{
+  long value;
+  int invalid;
+  /* Extract the value from the instruction.  */
+  if (operand->extract)
+    value = (*operand->extract) (insn, dialect, &invalid);
+  else
+    {
+      if (operand->shift >= 0)
+	value = (insn >> operand->shift) & operand->bitm;
+      else
+	value = (insn << -operand->shift) & operand->bitm;
+      if ((operand->flags & PPC_OPERAND_SIGNED) != 0)
+	{
+	  /* BITM is always some number of zeros followed by some
+	     number of ones, followed by some number of zeros.  */
+	  unsigned long top = operand->bitm;
+	  /* top & -top gives the rightmost 1 bit, so this
+	     fills in any trailing zeros.  */
+	  top |= (top & -top) - 1;
+	  top &= ~(top >> 1);
+	  value = (value ^ top) - top;
+	}
+    }
+
+  return value;
+}
+
+/* Determine whether the optional operand(s) should be printed.  */
+
+static int
+skip_optional_operands (const unsigned char *opindex,
+			unsigned long insn, ppc_cpu_t dialect)
+{
+  const struct powerpc_operand *operand;
+
+  for (; *opindex != 0; opindex++)
+    {
+      operand = &powerpc_operands[*opindex];
+      if ((operand->flags & PPC_OPERAND_NEXT) != 0
+	  || ((operand->flags & PPC_OPERAND_OPTIONAL) != 0
+	      && operand_value_powerpc (operand, insn, dialect) !=
+		 ppc_optional_operand_value (operand)))
+	return 0;
+    }
+
+  return 1;
+}
+
+/* Find a match for INSN in the opcode table, given machine DIALECT.
+   A DIALECT of -1 is special, matching all machine opcode variations.  */
+
+static const struct powerpc_opcode *
+lookup_powerpc (unsigned long insn, ppc_cpu_t dialect)
+{
+  const struct powerpc_opcode *opcode;
+  const struct powerpc_opcode *opcode_end;
+
+  opcode_end = powerpc_opcodes + powerpc_num_opcodes;
+  /* Find the first match in the opcode table for this major opcode.  */
+  for (opcode = powerpc_opcodes; opcode < opcode_end; ++opcode)
+    {
+      const unsigned char *opindex;
+      const struct powerpc_operand *operand;
+      int invalid;
+
+      if ((insn & opcode->mask) != opcode->opcode
+	  || (dialect != (ppc_cpu_t) -1
+	      && ((opcode->flags & dialect) == 0
+		  || (opcode->deprecated & dialect) != 0)))
+	continue;
+
+      /* Check validity of operands.  */
+      invalid = 0;
+      for (opindex = opcode->operands; *opindex != 0; opindex++)
+	{
+	  operand = powerpc_operands + *opindex;
+	  if (operand->extract)
+	    (*operand->extract) (insn, dialect, &invalid);
+	}
+      if (invalid)
+	continue;
+
+      return opcode;
+    }
+
+  return NULL;
+}
+
+/* Print a PowerPC or POWER instruction.  */
+
+int print_insn_powerpc (unsigned long insn, unsigned long memaddr)
+{
+  const struct powerpc_opcode *opcode;
+  bool insn_is_short;
+  ppc_cpu_t dialect;
+
+  dialect = PPC_OPCODE_PPC | PPC_OPCODE_COMMON
+            | PPC_OPCODE_64 | PPC_OPCODE_POWER4 | PPC_OPCODE_ALTIVEC;
+
+  if (cpu_has_feature(CPU_FTRS_POWER5))
+    dialect |= PPC_OPCODE_POWER5;
+
+  if (cpu_has_feature(CPU_FTRS_CELL))
+    dialect |= (PPC_OPCODE_CELL | PPC_OPCODE_ALTIVEC);
+
+  if (cpu_has_feature(CPU_FTRS_POWER6))
+    dialect |= (PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_ALTIVEC);
+
+  if (cpu_has_feature(CPU_FTRS_POWER7))
+    dialect |= (PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_POWER7
+                | PPC_OPCODE_ALTIVEC | PPC_OPCODE_VSX);
+
+  if (cpu_has_feature(CPU_FTRS_POWER8))
+    dialect |= (PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_POWER7
+		| PPC_OPCODE_POWER8 | PPC_OPCODE_HTM
+		| PPC_OPCODE_ALTIVEC | PPC_OPCODE_ALTIVEC2 | PPC_OPCODE_VSX);
+
+  if (cpu_has_feature(CPU_FTRS_POWER9))
+    dialect |= (PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_POWER7
+		| PPC_OPCODE_POWER8 | PPC_OPCODE_POWER9 | PPC_OPCODE_HTM
+		| PPC_OPCODE_ALTIVEC | PPC_OPCODE_ALTIVEC2
+		| PPC_OPCODE_VSX | PPC_OPCODE_VSX3);
+
+  /* Get the major opcode of the insn.  */
+  opcode = NULL;
+  insn_is_short = false;
+
+  if (opcode == NULL)
+    opcode = lookup_powerpc (insn, dialect);
+  if (opcode == NULL && (dialect & PPC_OPCODE_ANY) != 0)
+    opcode = lookup_powerpc (insn, (ppc_cpu_t) -1);
+
+  if (opcode != NULL)
+    {
+      const unsigned char *opindex;
+      const struct powerpc_operand *operand;
+      int need_comma;
+      int need_paren;
+      int skip_optional;
+
+      if (opcode->operands[0] != 0)
+	printf("%-7s ", opcode->name);
+      else
+	printf("%s", opcode->name);
+
+      if (insn_is_short)
+        /* The operands will be fetched out of the 16-bit instruction.  */
+        insn >>= 16;
+
+      /* Now extract and print the operands.  */
+      need_comma = 0;
+      need_paren = 0;
+      skip_optional = -1;
+      for (opindex = opcode->operands; *opindex != 0; opindex++)
+	{
+	  long value;
+
+	  operand = powerpc_operands + *opindex;
+
+	  /* Operands that are marked FAKE are simply ignored.  We
+	     already made sure that the extract function considered
+	     the instruction to be valid.  */
+	  if ((operand->flags & PPC_OPERAND_FAKE) != 0)
+	    continue;
+
+	  /* If all of the optional operands have the value zero,
+	     then don't print any of them.  */
+	  if ((operand->flags & PPC_OPERAND_OPTIONAL) != 0)
+	    {
+	      if (skip_optional < 0)
+		skip_optional = skip_optional_operands (opindex, insn,
+							dialect);
+	      if (skip_optional)
+		continue;
+	    }
+
+	  value = operand_value_powerpc (operand, insn, dialect);
+
+	  if (need_comma)
+	    {
+	      printf(",");
+	      need_comma = 0;
+	    }
+
+	  /* Print the operand as directed by the flags.  */
+	  if ((operand->flags & PPC_OPERAND_GPR) != 0
+	      || ((operand->flags & PPC_OPERAND_GPR_0) != 0 && value != 0))
+	    printf("r%ld", value);
+	  else if ((operand->flags & PPC_OPERAND_FPR) != 0)
+	    printf("f%ld", value);
+	  else if ((operand->flags & PPC_OPERAND_VR) != 0)
+	    printf("v%ld", value);
+	  else if ((operand->flags & PPC_OPERAND_VSR) != 0)
+	    printf("vs%ld", value);
+	  else if ((operand->flags & PPC_OPERAND_RELATIVE) != 0)
+	    print_address(memaddr + value);
+	  else if ((operand->flags & PPC_OPERAND_ABSOLUTE) != 0)
+	    print_address(value & 0xffffffff);
+	  else if ((operand->flags & PPC_OPERAND_FSL) != 0)
+	    printf("fsl%ld", value);
+	  else if ((operand->flags & PPC_OPERAND_FCR) != 0)
+	    printf("fcr%ld", value);
+	  else if ((operand->flags & PPC_OPERAND_UDI) != 0)
+	    printf("%ld", value);
+	  else if ((operand->flags & PPC_OPERAND_CR_REG) != 0
+		   && (((dialect & PPC_OPCODE_PPC) != 0)
+		       || ((dialect & PPC_OPCODE_VLE) != 0)))
+	    printf("cr%ld", value);
+	  else if (((operand->flags & PPC_OPERAND_CR_BIT) != 0)
+		   && (((dialect & PPC_OPCODE_PPC) != 0)
+		       || ((dialect & PPC_OPCODE_VLE) != 0)))
+	    {
+	      static const char *cbnames[4] = { "lt", "gt", "eq", "so" };
+	      int cr;
+	      int cc;
+
+	      cr = value >> 2;
+	      if (cr != 0)
+		printf("4*cr%d+", cr);
+	      cc = value & 3;
+	      printf("%s", cbnames[cc]);
+	    }
+	  else
+	    printf("%d", (int) value);
+
+	  if (need_paren)
+	    {
+	      printf(")");
+	      need_paren = 0;
+	    }
+
+	  if ((operand->flags & PPC_OPERAND_PARENS) == 0)
+	    need_comma = 1;
+	  else
+	    {
+	      printf("(");
+	      need_paren = 1;
+	    }
+	}
+
+      /* We have found and printed an instruction.
+         If it was a short VLE instruction we have more to do.  */
+      if (insn_is_short)
+        {
+          memaddr += 2;
+          return 2;
+        }
+      else
+        /* Otherwise, return.  */
+        return 4;
+    }
+
+  /* We could not find a match.  */
+  printf(".long 0x%lx", insn);
+
+  return 4;
+}
diff --git a/arch/powerpc/xmon/ppc-opc.c b/arch/powerpc/xmon/ppc-opc.c
new file mode 100644
index 0000000000..0774d71145
--- /dev/null
+++ b/arch/powerpc/xmon/ppc-opc.c
@@ -0,0 +1,7280 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* ppc-opc.c -- PowerPC opcode list
+   Copyright (C) 1994-2016 Free Software Foundation, Inc.
+   Written by Ian Lance Taylor, Cygnus Support
+
+   This file is part of GDB, GAS, and the GNU binutils.
+
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include "nonstdio.h"
+#include "ppc.h"
+
+#define ATTRIBUTE_UNUSED
+#define _(x)	x
+
+/* This file holds the PowerPC opcode table.  The opcode table
+   includes almost all of the extended instruction mnemonics.  This
+   permits the disassembler to use them, and simplifies the assembler
+   logic, at the cost of increasing the table size.  The table is
+   strictly constant data, so the compiler should be able to put it in
+   the .text section.
+
+   This file also holds the operand table.  All knowledge about
+   inserting operands into instructions and vice-versa is kept in this
+   file.  */
+
+/* Local insertion and extraction functions.  */
+
+static unsigned long insert_arx (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_arx (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_ary (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_ary (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_bat (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_bat (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_bba (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_bba (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_bdm (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_bdm (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_bdp (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_bdp (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_bo (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_bo (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_boe (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_boe (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_esync (unsigned long, long, ppc_cpu_t, const char **);
+static unsigned long insert_dcmxs (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_dcmxs (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_dxd (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_dxd (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_dxdn (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_dxdn (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_fxm (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_fxm (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_li20 (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_li20 (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_ls (unsigned long, long, ppc_cpu_t, const char **);
+static unsigned long insert_mbe (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_mbe (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_mb6 (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_mb6 (unsigned long, ppc_cpu_t, int *);
+static long extract_nb (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_nbi (unsigned long, long, ppc_cpu_t, const char **);
+static unsigned long insert_nsi (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_nsi (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_oimm (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_oimm (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_ral (unsigned long, long, ppc_cpu_t, const char **);
+static unsigned long insert_ram (unsigned long, long, ppc_cpu_t, const char **);
+static unsigned long insert_raq (unsigned long, long, ppc_cpu_t, const char **);
+static unsigned long insert_ras (unsigned long, long, ppc_cpu_t, const char **);
+static unsigned long insert_rbs (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_rbs (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_rbx (unsigned long, long, ppc_cpu_t, const char **);
+static unsigned long insert_rx (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_rx (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_ry (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_ry (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_sh6 (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_sh6 (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_sci8 (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_sci8 (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_sci8n (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_sci8n (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_sd4h (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_sd4h (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_sd4w (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_sd4w (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_spr (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_spr (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_sprg (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_sprg (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_tbr (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_tbr (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_xt6 (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_xt6 (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_xtq6 (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_xtq6 (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_xa6 (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_xa6 (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_xb6 (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_xb6 (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_xb6s (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_xb6s (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_xc6 (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_xc6 (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_dm (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_dm (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_vlesi (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_vlesi (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_vlensi (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_vlensi (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_vleui (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_vleui (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_vleil (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_vleil (unsigned long, ppc_cpu_t, int *);
+
+/* The operands table.
+
+   The fields are bitm, shift, insert, extract, flags.
+
+   We used to put parens around the various additions, like the one
+   for BA just below.  However, that caused trouble with feeble
+   compilers with a limit on depth of a parenthesized expression, like
+   (reportedly) the compiler in Microsoft Developer Studio 5.  So we
+   omit the parens, since the macros are never used in a context where
+   the addition will be ambiguous.  */
+
+const struct powerpc_operand powerpc_operands[] =
+{
+  /* The zero index is used to indicate the end of the list of
+     operands.  */
+#define UNUSED 0
+  { 0, 0, NULL, NULL, 0 },
+
+  /* The BA field in an XL form instruction.  */
+#define BA UNUSED + 1
+  /* The BI field in a B form or XL form instruction.  */
+#define BI BA
+#define BI_MASK (0x1f << 16)
+  { 0x1f, 16, NULL, NULL, PPC_OPERAND_CR_BIT },
+
+  /* The BA field in an XL form instruction when it must be the same
+     as the BT field in the same instruction.  */
+#define BAT BA + 1
+  { 0x1f, 16, insert_bat, extract_bat, PPC_OPERAND_FAKE },
+
+  /* The BB field in an XL form instruction.  */
+#define BB BAT + 1
+#define BB_MASK (0x1f << 11)
+  { 0x1f, 11, NULL, NULL, PPC_OPERAND_CR_BIT },
+
+  /* The BB field in an XL form instruction when it must be the same
+     as the BA field in the same instruction.  */
+#define BBA BB + 1
+  /* The VB field in a VX form instruction when it must be the same
+     as the VA field in the same instruction.  */
+#define VBA BBA
+  { 0x1f, 11, insert_bba, extract_bba, PPC_OPERAND_FAKE },
+
+  /* The BD field in a B form instruction.  The lower two bits are
+     forced to zero.  */
+#define BD BBA + 1
+  { 0xfffc, 0, NULL, NULL, PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
+
+  /* The BD field in a B form instruction when absolute addressing is
+     used.  */
+#define BDA BD + 1
+  { 0xfffc, 0, NULL, NULL, PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED },
+
+  /* The BD field in a B form instruction when the - modifier is used.
+     This sets the y bit of the BO field appropriately.  */
+#define BDM BDA + 1
+  { 0xfffc, 0, insert_bdm, extract_bdm,
+    PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
+
+  /* The BD field in a B form instruction when the - modifier is used
+     and absolute address is used.  */
+#define BDMA BDM + 1
+  { 0xfffc, 0, insert_bdm, extract_bdm,
+    PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED },
+
+  /* The BD field in a B form instruction when the + modifier is used.
+     This sets the y bit of the BO field appropriately.  */
+#define BDP BDMA + 1
+  { 0xfffc, 0, insert_bdp, extract_bdp,
+    PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
+
+  /* The BD field in a B form instruction when the + modifier is used
+     and absolute addressing is used.  */
+#define BDPA BDP + 1
+  { 0xfffc, 0, insert_bdp, extract_bdp,
+    PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED },
+
+  /* The BF field in an X or XL form instruction.  */
+#define BF BDPA + 1
+  /* The CRFD field in an X form instruction.  */
+#define CRFD BF
+  /* The CRD field in an XL form instruction.  */
+#define CRD BF
+  { 0x7, 23, NULL, NULL, PPC_OPERAND_CR_REG },
+
+  /* The BF field in an X or XL form instruction.  */
+#define BFF BF + 1
+  { 0x7, 23, NULL, NULL, 0 },
+
+  /* An optional BF field.  This is used for comparison instructions,
+     in which an omitted BF field is taken as zero.  */
+#define OBF BFF + 1
+  { 0x7, 23, NULL, NULL, PPC_OPERAND_CR_REG | PPC_OPERAND_OPTIONAL },
+
+  /* The BFA field in an X or XL form instruction.  */
+#define BFA OBF + 1
+  { 0x7, 18, NULL, NULL, PPC_OPERAND_CR_REG },
+
+  /* The BO field in a B form instruction.  Certain values are
+     illegal.  */
+#define BO BFA + 1
+#define BO_MASK (0x1f << 21)
+  { 0x1f, 21, insert_bo, extract_bo, 0 },
+
+  /* The BO field in a B form instruction when the + or - modifier is
+     used.  This is like the BO field, but it must be even.  */
+#define BOE BO + 1
+  { 0x1e, 21, insert_boe, extract_boe, 0 },
+
+  /* The RM field in an X form instruction.  */
+#define RM BOE + 1
+  { 0x3, 11, NULL, NULL, 0 },
+
+#define BH RM + 1
+  { 0x3, 11, NULL, NULL, PPC_OPERAND_OPTIONAL },
+
+  /* The BT field in an X or XL form instruction.  */
+#define BT BH + 1
+  { 0x1f, 21, NULL, NULL, PPC_OPERAND_CR_BIT },
+
+  /* The BI16 field in a BD8 form instruction.  */
+#define BI16 BT + 1
+  { 0x3, 8, NULL, NULL, PPC_OPERAND_CR_BIT },
+
+  /* The BI32 field in a BD15 form instruction.  */
+#define BI32 BI16 + 1
+  { 0xf, 16, NULL, NULL, PPC_OPERAND_CR_BIT },
+
+  /* The BO32 field in a BD15 form instruction.  */
+#define BO32 BI32 + 1
+  { 0x3, 20, NULL, NULL, 0 },
+
+  /* The B8 field in a BD8 form instruction.  */
+#define B8 BO32 + 1
+  { 0x1fe, -1, NULL, NULL, PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
+
+  /* The B15 field in a BD15 form instruction.  The lowest bit is
+     forced to zero.  */
+#define B15 B8 + 1
+  { 0xfffe, 0, NULL, NULL, PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
+
+  /* The B24 field in a BD24 form instruction.  The lowest bit is
+     forced to zero.  */
+#define B24 B15 + 1
+  { 0x1fffffe, 0, NULL, NULL, PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
+
+  /* The condition register number portion of the BI field in a B form
+     or XL form instruction.  This is used for the extended
+     conditional branch mnemonics, which set the lower two bits of the
+     BI field.  This field is optional.  */
+#define CR B24 + 1
+  { 0x7, 18, NULL, NULL, PPC_OPERAND_CR_REG | PPC_OPERAND_OPTIONAL },
+
+  /* The CRB field in an X form instruction.  */
+#define CRB CR + 1
+  /* The MB field in an M form instruction.  */
+#define MB CRB
+#define MB_MASK (0x1f << 6)
+  { 0x1f, 6, NULL, NULL, 0 },
+
+  /* The CRD32 field in an XL form instruction.  */
+#define CRD32 CRB + 1
+  { 0x3, 21, NULL, NULL, PPC_OPERAND_CR_REG },
+
+  /* The CRFS field in an X form instruction.  */
+#define CRFS CRD32 + 1
+  { 0x7, 0, NULL, NULL, PPC_OPERAND_CR_REG },
+
+#define CRS CRFS + 1
+  { 0x3, 18, NULL, NULL, PPC_OPERAND_CR_REG | PPC_OPERAND_OPTIONAL },
+
+  /* The CT field in an X form instruction.  */
+#define CT CRS + 1
+  /* The MO field in an mbar instruction.  */
+#define MO CT
+  { 0x1f, 21, NULL, NULL, PPC_OPERAND_OPTIONAL },
+
+  /* The D field in a D form instruction.  This is a displacement off
+     a register, and implies that the next operand is a register in
+     parentheses.  */
+#define D CT + 1
+  { 0xffff, 0, NULL, NULL, PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED },
+
+  /* The D8 field in a D form instruction.  This is a displacement off
+     a register, and implies that the next operand is a register in
+     parentheses.  */
+#define D8 D + 1
+  { 0xff, 0, NULL, NULL, PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED },
+
+  /* The DCMX field in an X form instruction.  */
+#define DCMX D8 + 1
+  { 0x7f, 16, NULL, NULL, 0 },
+
+  /* The split DCMX field in an X form instruction.  */
+#define DCMXS DCMX + 1
+  { 0x7f, PPC_OPSHIFT_INV, insert_dcmxs, extract_dcmxs, 0 },
+
+  /* The DQ field in a DQ form instruction.  This is like D, but the
+     lower four bits are forced to zero. */
+#define DQ DCMXS + 1
+  { 0xfff0, 0, NULL, NULL,
+    PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED | PPC_OPERAND_DQ },
+
+  /* The DS field in a DS form instruction.  This is like D, but the
+     lower two bits are forced to zero.  */
+#define DS DQ + 1
+  { 0xfffc, 0, NULL, NULL,
+    PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED | PPC_OPERAND_DS },
+
+  /* The DUIS or BHRBE fields in a XFX form instruction, 10 bits
+     unsigned imediate */
+#define DUIS DS + 1
+#define BHRBE DUIS
+  { 0x3ff, 11, NULL, NULL, 0 },
+
+  /* The split D field in a DX form instruction.  */
+#define DXD DUIS + 1
+  { 0xffff, PPC_OPSHIFT_INV, insert_dxd, extract_dxd,
+    PPC_OPERAND_SIGNED | PPC_OPERAND_SIGNOPT},
+
+  /* The split ND field in a DX form instruction.
+     This is the same as the DX field, only negated.  */
+#define NDXD DXD + 1
+  { 0xffff, PPC_OPSHIFT_INV, insert_dxdn, extract_dxdn,
+    PPC_OPERAND_NEGATIVE | PPC_OPERAND_SIGNED | PPC_OPERAND_SIGNOPT},
+
+  /* The E field in a wrteei instruction.  */
+  /* And the W bit in the pair singles instructions.  */
+  /* And the ST field in a VX form instruction.  */
+#define E NDXD + 1
+#define PSW E
+#define ST E
+  { 0x1, 15, NULL, NULL, 0 },
+
+  /* The FL1 field in a POWER SC form instruction.  */
+#define FL1 E + 1
+  /* The U field in an X form instruction.  */
+#define U FL1
+  { 0xf, 12, NULL, NULL, 0 },
+
+  /* The FL2 field in a POWER SC form instruction.  */
+#define FL2 FL1 + 1
+  { 0x7, 2, NULL, NULL, 0 },
+
+  /* The FLM field in an XFL form instruction.  */
+#define FLM FL2 + 1
+  { 0xff, 17, NULL, NULL, 0 },
+
+  /* The FRA field in an X or A form instruction.  */
+#define FRA FLM + 1
+#define FRA_MASK (0x1f << 16)
+  { 0x1f, 16, NULL, NULL, PPC_OPERAND_FPR },
+
+  /* The FRAp field of DFP instructions.  */
+#define FRAp FRA + 1
+  { 0x1e, 16, NULL, NULL, PPC_OPERAND_FPR },
+
+  /* The FRB field in an X or A form instruction.  */
+#define FRB FRAp + 1
+#define FRB_MASK (0x1f << 11)
+  { 0x1f, 11, NULL, NULL, PPC_OPERAND_FPR },
+
+  /* The FRBp field of DFP instructions.  */
+#define FRBp FRB + 1
+  { 0x1e, 11, NULL, NULL, PPC_OPERAND_FPR },
+
+  /* The FRC field in an A form instruction.  */
+#define FRC FRBp + 1
+#define FRC_MASK (0x1f << 6)
+  { 0x1f, 6, NULL, NULL, PPC_OPERAND_FPR },
+
+  /* The FRS field in an X form instruction or the FRT field in a D, X
+     or A form instruction.  */
+#define FRS FRC + 1
+#define FRT FRS
+  { 0x1f, 21, NULL, NULL, PPC_OPERAND_FPR },
+
+  /* The FRSp field of stfdp or the FRTp field of lfdp and DFP
+     instructions.  */
+#define FRSp FRS + 1
+#define FRTp FRSp
+  { 0x1e, 21, NULL, NULL, PPC_OPERAND_FPR },
+
+  /* The FXM field in an XFX instruction.  */
+#define FXM FRSp + 1
+  { 0xff, 12, insert_fxm, extract_fxm, 0 },
+
+  /* Power4 version for mfcr.  */
+#define FXM4 FXM + 1
+  { 0xff, 12, insert_fxm, extract_fxm,
+    PPC_OPERAND_OPTIONAL | PPC_OPERAND_OPTIONAL_VALUE},
+  /* If the FXM4 operand is omitted, use the sentinel value -1.  */
+  { -1, -1, NULL, NULL, 0},
+
+  /* The IMM20 field in an LI instruction.  */
+#define IMM20 FXM4 + 2
+  { 0xfffff, PPC_OPSHIFT_INV, insert_li20, extract_li20, PPC_OPERAND_SIGNED},
+
+  /* The L field in a D or X form instruction.  */
+#define L IMM20 + 1
+  { 0x1, 21, NULL, NULL, 0 },
+
+  /* The optional L field in tlbie and tlbiel instructions.  */
+#define LOPT L + 1
+  /* The R field in a HTM X form instruction.  */
+#define HTM_R LOPT
+  { 0x1, 21, NULL, NULL, PPC_OPERAND_OPTIONAL },
+
+  /* The optional (for 32-bit) L field in cmp[l][i] instructions.  */
+#define L32OPT LOPT + 1
+  { 0x1, 21, NULL, NULL, PPC_OPERAND_OPTIONAL | PPC_OPERAND_OPTIONAL32 },
+
+  /* The L field in dcbf instruction.  */
+#define L2OPT L32OPT + 1
+  { 0x3, 21, NULL, NULL, PPC_OPERAND_OPTIONAL },
+
+  /* The LEV field in a POWER SVC form instruction.  */
+#define SVC_LEV L2OPT + 1
+  { 0x7f, 5, NULL, NULL, 0 },
+
+  /* The LEV field in an SC form instruction.  */
+#define LEV SVC_LEV + 1
+  { 0x7f, 5, NULL, NULL, PPC_OPERAND_OPTIONAL },
+
+  /* The LI field in an I form instruction.  The lower two bits are
+     forced to zero.  */
+#define LI LEV + 1
+  { 0x3fffffc, 0, NULL, NULL, PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
+
+  /* The LI field in an I form instruction when used as an absolute
+     address.  */
+#define LIA LI + 1
+  { 0x3fffffc, 0, NULL, NULL, PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED },
+
+  /* The LS or WC field in an X (sync or wait) form instruction.  */
+#define LS LIA + 1
+#define WC LS
+  { 0x3, 21, insert_ls, NULL, PPC_OPERAND_OPTIONAL },
+
+  /* The ME field in an M form instruction.  */
+#define ME LS + 1
+#define ME_MASK (0x1f << 1)
+  { 0x1f, 1, NULL, NULL, 0 },
+
+  /* The MB and ME fields in an M form instruction expressed a single
+     operand which is a bitmask indicating which bits to select.  This
+     is a two operand form using PPC_OPERAND_NEXT.  See the
+     description in opcode/ppc.h for what this means.  */
+#define MBE ME + 1
+  { 0x1f, 6, NULL, NULL, PPC_OPERAND_OPTIONAL | PPC_OPERAND_NEXT },
+  { -1, 0, insert_mbe, extract_mbe, 0 },
+
+  /* The MB or ME field in an MD or MDS form instruction.  The high
+     bit is wrapped to the low end.  */
+#define MB6 MBE + 2
+#define ME6 MB6
+#define MB6_MASK (0x3f << 5)
+  { 0x3f, 5, insert_mb6, extract_mb6, 0 },
+
+  /* The NB field in an X form instruction.  The value 32 is stored as
+     0.  */
+#define NB MB6 + 1
+  { 0x1f, 11, NULL, extract_nb, PPC_OPERAND_PLUS1 },
+
+  /* The NBI field in an lswi instruction, which has special value
+     restrictions.  The value 32 is stored as 0.  */
+#define NBI NB + 1
+  { 0x1f, 11, insert_nbi, extract_nb, PPC_OPERAND_PLUS1 },
+
+  /* The NSI field in a D form instruction.  This is the same as the
+     SI field, only negated.  */
+#define NSI NBI + 1
+  { 0xffff, 0, insert_nsi, extract_nsi,
+    PPC_OPERAND_NEGATIVE | PPC_OPERAND_SIGNED },
+
+  /* The NSI field in a D form instruction when we accept a wide range
+     of positive values.  */
+#define NSISIGNOPT NSI + 1
+  { 0xffff, 0, insert_nsi, extract_nsi,
+    PPC_OPERAND_NEGATIVE | PPC_OPERAND_SIGNED | PPC_OPERAND_SIGNOPT },
+
+  /* The RA field in an D, DS, DQ, X, XO, M, or MDS form instruction.  */
+#define RA NSISIGNOPT + 1
+#define RA_MASK (0x1f << 16)
+  { 0x1f, 16, NULL, NULL, PPC_OPERAND_GPR },
+
+  /* As above, but 0 in the RA field means zero, not r0.  */
+#define RA0 RA + 1
+  { 0x1f, 16, NULL, NULL, PPC_OPERAND_GPR_0 },
+
+  /* The RA field in the DQ form lq or an lswx instruction, which have special
+     value restrictions.  */
+#define RAQ RA0 + 1
+#define RAX RAQ
+  { 0x1f, 16, insert_raq, NULL, PPC_OPERAND_GPR_0 },
+
+  /* The RA field in a D or X form instruction which is an updating
+     load, which means that the RA field may not be zero and may not
+     equal the RT field.  */
+#define RAL RAQ + 1
+  { 0x1f, 16, insert_ral, NULL, PPC_OPERAND_GPR_0 },
+
+  /* The RA field in an lmw instruction, which has special value
+     restrictions.  */
+#define RAM RAL + 1
+  { 0x1f, 16, insert_ram, NULL, PPC_OPERAND_GPR_0 },
+
+  /* The RA field in a D or X form instruction which is an updating
+     store or an updating floating point load, which means that the RA
+     field may not be zero.  */
+#define RAS RAM + 1
+  { 0x1f, 16, insert_ras, NULL, PPC_OPERAND_GPR_0 },
+
+  /* The RA field of the tlbwe, dccci and iccci instructions,
+     which are optional.  */
+#define RAOPT RAS + 1
+  { 0x1f, 16, NULL, NULL, PPC_OPERAND_GPR | PPC_OPERAND_OPTIONAL },
+
+  /* The RB field in an X, XO, M, or MDS form instruction.  */
+#define RB RAOPT + 1
+#define RB_MASK (0x1f << 11)
+  { 0x1f, 11, NULL, NULL, PPC_OPERAND_GPR },
+
+  /* The RB field in an X form instruction when it must be the same as
+     the RS field in the instruction.  This is used for extended
+     mnemonics like mr.  */
+#define RBS RB + 1
+  { 0x1f, 11, insert_rbs, extract_rbs, PPC_OPERAND_FAKE },
+
+  /* The RB field in an lswx instruction, which has special value
+     restrictions.  */
+#define RBX RBS + 1
+  { 0x1f, 11, insert_rbx, NULL, PPC_OPERAND_GPR },
+
+  /* The RB field of the dccci and iccci instructions, which are optional.  */
+#define RBOPT RBX + 1
+  { 0x1f, 11, NULL, NULL, PPC_OPERAND_GPR | PPC_OPERAND_OPTIONAL },
+
+  /* The RC register field in an maddld, maddhd or maddhdu instruction.  */
+#define RC RBOPT + 1
+  { 0x1f, 6, NULL, NULL, PPC_OPERAND_GPR },
+
+  /* The RS field in a D, DS, X, XFX, XS, M, MD or MDS form
+     instruction or the RT field in a D, DS, X, XFX or XO form
+     instruction.  */
+#define RS RC + 1
+#define RT RS
+#define RT_MASK (0x1f << 21)
+#define RD RS
+  { 0x1f, 21, NULL, NULL, PPC_OPERAND_GPR },
+
+  /* The RS and RT fields of the DS form stq and DQ form lq instructions,
+     which have special value restrictions.  */
+#define RSQ RS + 1
+#define RTQ RSQ
+  { 0x1e, 21, NULL, NULL, PPC_OPERAND_GPR },
+
+  /* The RS field of the tlbwe instruction, which is optional.  */
+#define RSO RSQ + 1
+#define RTO RSO
+  { 0x1f, 21, NULL, NULL, PPC_OPERAND_GPR | PPC_OPERAND_OPTIONAL },
+
+  /* The RX field of the SE_RR form instruction.  */
+#define RX RSO + 1
+  { 0x1f, PPC_OPSHIFT_INV, insert_rx, extract_rx, PPC_OPERAND_GPR },
+
+  /* The ARX field of the SE_RR form instruction.  */
+#define ARX RX + 1
+  { 0x1f, PPC_OPSHIFT_INV, insert_arx, extract_arx, PPC_OPERAND_GPR },
+
+  /* The RY field of the SE_RR form instruction.  */
+#define RY ARX + 1
+#define RZ RY
+  { 0x1f, PPC_OPSHIFT_INV, insert_ry, extract_ry, PPC_OPERAND_GPR },
+
+  /* The ARY field of the SE_RR form instruction.  */
+#define ARY RY + 1
+  { 0x1f, PPC_OPSHIFT_INV, insert_ary, extract_ary, PPC_OPERAND_GPR },
+
+  /* The SCLSCI8 field in a D form instruction.  */
+#define SCLSCI8 ARY + 1
+  { 0xffffffff, PPC_OPSHIFT_INV, insert_sci8, extract_sci8, 0 },
+
+  /* The SCLSCI8N field in a D form instruction.  This is the same as the
+     SCLSCI8 field, only negated.  */
+#define SCLSCI8N SCLSCI8 + 1
+  { 0xffffffff, PPC_OPSHIFT_INV, insert_sci8n, extract_sci8n,
+    PPC_OPERAND_NEGATIVE | PPC_OPERAND_SIGNED },
+
+  /* The SD field of the SD4 form instruction.  */
+#define SE_SD SCLSCI8N + 1
+  { 0xf, 8, NULL, NULL, PPC_OPERAND_PARENS },
+
+  /* The SD field of the SD4 form instruction, for halfword.  */
+#define SE_SDH SE_SD + 1
+  { 0x1e, PPC_OPSHIFT_INV, insert_sd4h, extract_sd4h, PPC_OPERAND_PARENS },
+
+  /* The SD field of the SD4 form instruction, for word.  */
+#define SE_SDW SE_SDH + 1
+  { 0x3c, PPC_OPSHIFT_INV, insert_sd4w, extract_sd4w, PPC_OPERAND_PARENS },
+
+  /* The SH field in an X or M form instruction.  */
+#define SH SE_SDW + 1
+#define SH_MASK (0x1f << 11)
+  /* The other UIMM field in a EVX form instruction.  */
+#define EVUIMM SH
+  /* The FC field in an atomic X form instruction.  */
+#define FC SH
+  { 0x1f, 11, NULL, NULL, 0 },
+
+  /* The SI field in a HTM X form instruction.  */
+#define HTM_SI SH + 1
+  { 0x1f, 11, NULL, NULL, PPC_OPERAND_SIGNED },
+
+  /* The SH field in an MD form instruction.  This is split.  */
+#define SH6 HTM_SI + 1
+#define SH6_MASK ((0x1f << 11) | (1 << 1))
+  { 0x3f, PPC_OPSHIFT_INV, insert_sh6, extract_sh6, 0 },
+
+  /* The SH field of the tlbwe instruction, which is optional.  */
+#define SHO SH6 + 1
+  { 0x1f, 11, NULL, NULL, PPC_OPERAND_OPTIONAL },
+
+  /* The SI field in a D form instruction.  */
+#define SI SHO + 1
+  { 0xffff, 0, NULL, NULL, PPC_OPERAND_SIGNED },
+
+  /* The SI field in a D form instruction when we accept a wide range
+     of positive values.  */
+#define SISIGNOPT SI + 1
+  { 0xffff, 0, NULL, NULL, PPC_OPERAND_SIGNED | PPC_OPERAND_SIGNOPT },
+
+  /* The SI8 field in a D form instruction.  */
+#define SI8 SISIGNOPT + 1
+  { 0xff, 0, NULL, NULL, PPC_OPERAND_SIGNED },
+
+  /* The SPR field in an XFX form instruction.  This is flipped--the
+     lower 5 bits are stored in the upper 5 and vice- versa.  */
+#define SPR SI8 + 1
+#define PMR SPR
+#define TMR SPR
+#define SPR_MASK (0x3ff << 11)
+  { 0x3ff, 11, insert_spr, extract_spr, 0 },
+
+  /* The BAT index number in an XFX form m[ft]ibat[lu] instruction.  */
+#define SPRBAT SPR + 1
+#define SPRBAT_MASK (0x3 << 17)
+  { 0x3, 17, NULL, NULL, 0 },
+
+  /* The SPRG register number in an XFX form m[ft]sprg instruction.  */
+#define SPRG SPRBAT + 1
+  { 0x1f, 16, insert_sprg, extract_sprg, 0 },
+
+  /* The SR field in an X form instruction.  */
+#define SR SPRG + 1
+  /* The 4-bit UIMM field in a VX form instruction.  */
+#define UIMM4 SR
+  { 0xf, 16, NULL, NULL, 0 },
+
+  /* The STRM field in an X AltiVec form instruction.  */
+#define STRM SR + 1
+  /* The T field in a tlbilx form instruction.  */
+#define T STRM
+  /* The L field in wclr instructions.  */
+#define L2 STRM
+  { 0x3, 21, NULL, NULL, 0 },
+
+  /* The ESYNC field in an X (sync) form instruction.  */
+#define ESYNC STRM + 1
+  { 0xf, 16, insert_esync, NULL, PPC_OPERAND_OPTIONAL },
+
+  /* The SV field in a POWER SC form instruction.  */
+#define SV ESYNC + 1
+  { 0x3fff, 2, NULL, NULL, 0 },
+
+  /* The TBR field in an XFX form instruction.  This is like the SPR
+     field, but it is optional.  */
+#define TBR SV + 1
+  { 0x3ff, 11, insert_tbr, extract_tbr,
+    PPC_OPERAND_OPTIONAL | PPC_OPERAND_OPTIONAL_VALUE},
+  /* If the TBR operand is ommitted, use the value 268.  */
+  { -1, 268, NULL, NULL, 0},
+
+  /* The TO field in a D or X form instruction.  */
+#define TO TBR + 2
+#define DUI TO
+#define TO_MASK (0x1f << 21)
+  { 0x1f, 21, NULL, NULL, 0 },
+
+  /* The UI field in a D form instruction.  */
+#define UI TO + 1
+  { 0xffff, 0, NULL, NULL, 0 },
+
+#define UISIGNOPT UI + 1
+  { 0xffff, 0, NULL, NULL, PPC_OPERAND_SIGNOPT },
+
+  /* The IMM field in an SE_IM5 instruction.  */
+#define UI5 UISIGNOPT + 1
+  { 0x1f, 4, NULL, NULL, 0 },
+
+  /* The OIMM field in an SE_OIM5 instruction.  */
+#define OIMM5 UI5 + 1
+  { 0x1f, PPC_OPSHIFT_INV, insert_oimm, extract_oimm, PPC_OPERAND_PLUS1 },
+
+  /* The UI7 field in an SE_LI instruction.  */
+#define UI7 OIMM5 + 1
+  { 0x7f, 4, NULL, NULL, 0 },
+
+  /* The VA field in a VA, VX or VXR form instruction.  */
+#define VA UI7 + 1
+  { 0x1f, 16, NULL, NULL, PPC_OPERAND_VR },
+
+  /* The VB field in a VA, VX or VXR form instruction.  */
+#define VB VA + 1
+  { 0x1f, 11, NULL, NULL, PPC_OPERAND_VR },
+
+  /* The VC field in a VA form instruction.  */
+#define VC VB + 1
+  { 0x1f, 6, NULL, NULL, PPC_OPERAND_VR },
+
+  /* The VD or VS field in a VA, VX, VXR or X form instruction.  */
+#define VD VC + 1
+#define VS VD
+  { 0x1f, 21, NULL, NULL, PPC_OPERAND_VR },
+
+  /* The SIMM field in a VX form instruction, and TE in Z form.  */
+#define SIMM VD + 1
+#define TE SIMM
+  { 0x1f, 16, NULL, NULL, PPC_OPERAND_SIGNED},
+
+  /* The UIMM field in a VX form instruction.  */
+#define UIMM SIMM + 1
+#define DCTL UIMM
+  { 0x1f, 16, NULL, NULL, 0 },
+
+  /* The 3-bit UIMM field in a VX form instruction.  */
+#define UIMM3 UIMM + 1
+  { 0x7, 16, NULL, NULL, 0 },
+
+  /* The 6-bit UIM field in a X form instruction.  */
+#define UIM6 UIMM3 + 1
+  { 0x3f, 16, NULL, NULL, 0 },
+
+  /* The SIX field in a VX form instruction.  */
+#define SIX UIM6 + 1
+  { 0xf, 11, NULL, NULL, 0 },
+
+  /* The PS field in a VX form instruction.  */
+#define PS SIX + 1
+  { 0x1, 9, NULL, NULL, 0 },
+
+  /* The SHB field in a VA form instruction.  */
+#define SHB PS + 1
+  { 0xf, 6, NULL, NULL, 0 },
+
+  /* The other UIMM field in a half word EVX form instruction.  */
+#define EVUIMM_2 SHB + 1
+  { 0x3e, 10, NULL, NULL, PPC_OPERAND_PARENS },
+
+  /* The other UIMM field in a word EVX form instruction.  */
+#define EVUIMM_4 EVUIMM_2 + 1
+  { 0x7c, 9, NULL, NULL, PPC_OPERAND_PARENS },
+
+  /* The other UIMM field in a double EVX form instruction.  */
+#define EVUIMM_8 EVUIMM_4 + 1
+  { 0xf8, 8, NULL, NULL, PPC_OPERAND_PARENS },
+
+  /* The WS or DRM field in an X form instruction.  */
+#define WS EVUIMM_8 + 1
+#define DRM WS
+  { 0x7, 11, NULL, NULL, 0 },
+
+  /* PowerPC paired singles extensions.  */
+  /* W bit in the pair singles instructions for x type instructions.  */
+#define PSWM WS + 1
+  /* The BO16 field in a BD8 form instruction.  */
+#define BO16 PSWM
+  {  0x1, 10, 0, 0, 0 },
+
+  /* IDX bits for quantization in the pair singles instructions.  */
+#define PSQ PSWM + 1
+  {  0x7, 12, 0, 0, 0 },
+
+  /* IDX bits for quantization in the pair singles x-type instructions.  */
+#define PSQM PSQ + 1
+  {  0x7, 7, 0, 0, 0 },
+
+  /* Smaller D field for quantization in the pair singles instructions.  */
+#define PSD PSQM + 1
+  {  0xfff, 0, 0, 0,  PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED },
+
+  /* The L field in an mtmsrd or A form instruction or R or W in an X form.  */
+#define A_L PSD + 1
+#define W A_L
+#define X_R A_L
+  { 0x1, 16, NULL, NULL, PPC_OPERAND_OPTIONAL },
+
+  /* The RMC or CY field in a Z23 form instruction.  */
+#define RMC A_L + 1
+#define CY RMC
+  { 0x3, 9, NULL, NULL, 0 },
+
+#define R RMC + 1
+  { 0x1, 16, NULL, NULL, 0 },
+
+#define RIC R + 1
+  { 0x3, 18, NULL, NULL, PPC_OPERAND_OPTIONAL },
+
+#define PRS RIC + 1
+  { 0x1, 17, NULL, NULL, PPC_OPERAND_OPTIONAL },
+
+#define SP PRS + 1
+  { 0x3, 19, NULL, NULL, 0 },
+
+#define S SP + 1
+  { 0x1, 20, NULL, NULL, 0 },
+
+  /* The S field in a XL form instruction.  */
+#define SXL S + 1
+  { 0x1, 11, NULL, NULL, PPC_OPERAND_OPTIONAL | PPC_OPERAND_OPTIONAL_VALUE},
+  /* If the SXL operand is ommitted, use the value 1.  */
+  { -1, 1, NULL, NULL, 0},
+
+  /* SH field starting at bit position 16.  */
+#define SH16 SXL + 2
+  /* The DCM and DGM fields in a Z form instruction.  */
+#define DCM SH16
+#define DGM DCM
+  { 0x3f, 10, NULL, NULL, 0 },
+
+  /* The EH field in larx instruction.  */
+#define EH SH16 + 1
+  { 0x1, 0, NULL, NULL, PPC_OPERAND_OPTIONAL },
+
+  /* The L field in an mtfsf or XFL form instruction.  */
+  /* The A field in a HTM X form instruction.  */
+#define XFL_L EH + 1
+#define HTM_A XFL_L
+  { 0x1, 25, NULL, NULL, PPC_OPERAND_OPTIONAL},
+
+  /* Xilinx APU related masks and macros */
+#define FCRT XFL_L + 1
+#define FCRT_MASK (0x1f << 21)
+  { 0x1f, 21, 0, 0, PPC_OPERAND_FCR },
+
+  /* Xilinx FSL related masks and macros */
+#define FSL FCRT + 1
+#define FSL_MASK (0x1f << 11)
+  { 0x1f, 11, 0, 0, PPC_OPERAND_FSL },
+
+  /* Xilinx UDI related masks and macros */
+#define URT FSL + 1
+  { 0x1f, 21, 0, 0, PPC_OPERAND_UDI },
+
+#define URA URT + 1
+  { 0x1f, 16, 0, 0, PPC_OPERAND_UDI },
+
+#define URB URA + 1
+  { 0x1f, 11, 0, 0, PPC_OPERAND_UDI },
+
+#define URC URB + 1
+  { 0x1f, 6, 0, 0, PPC_OPERAND_UDI },
+
+  /* The VLESIMM field in a D form instruction.  */
+#define VLESIMM URC + 1
+  { 0xffff, PPC_OPSHIFT_INV, insert_vlesi, extract_vlesi,
+    PPC_OPERAND_SIGNED | PPC_OPERAND_SIGNOPT },
+
+  /* The VLENSIMM field in a D form instruction.  */
+#define VLENSIMM VLESIMM + 1
+  { 0xffff, PPC_OPSHIFT_INV, insert_vlensi, extract_vlensi,
+    PPC_OPERAND_NEGATIVE | PPC_OPERAND_SIGNED | PPC_OPERAND_SIGNOPT },
+
+  /* The VLEUIMM field in a D form instruction.  */
+#define VLEUIMM VLENSIMM + 1
+  { 0xffff, PPC_OPSHIFT_INV, insert_vleui, extract_vleui, 0 },
+
+  /* The VLEUIMML field in a D form instruction.  */
+#define VLEUIMML VLEUIMM + 1
+  { 0xffff, PPC_OPSHIFT_INV, insert_vleil, extract_vleil, 0 },
+
+  /* The XT and XS fields in an XX1 or XX3 form instruction.  This is split.  */
+#define XS6 VLEUIMML + 1
+#define XT6 XS6
+  { 0x3f, PPC_OPSHIFT_INV, insert_xt6, extract_xt6, PPC_OPERAND_VSR },
+
+  /* The XT and XS fields in an DQ form VSX instruction.  This is split.  */
+#define XSQ6 XT6 + 1
+#define XTQ6 XSQ6
+  { 0x3f, PPC_OPSHIFT_INV, insert_xtq6, extract_xtq6, PPC_OPERAND_VSR },
+
+  /* The XA field in an XX3 form instruction.  This is split.  */
+#define XA6 XTQ6 + 1
+  { 0x3f, PPC_OPSHIFT_INV, insert_xa6, extract_xa6, PPC_OPERAND_VSR },
+
+  /* The XB field in an XX2 or XX3 form instruction.  This is split.  */
+#define XB6 XA6 + 1
+  { 0x3f, PPC_OPSHIFT_INV, insert_xb6, extract_xb6, PPC_OPERAND_VSR },
+
+  /* The XB field in an XX3 form instruction when it must be the same as
+     the XA field in the instruction.  This is used in extended mnemonics
+     like xvmovdp.  This is split.  */
+#define XB6S XB6 + 1
+  { 0x3f, PPC_OPSHIFT_INV, insert_xb6s, extract_xb6s, PPC_OPERAND_FAKE },
+
+  /* The XC field in an XX4 form instruction.  This is split.  */
+#define XC6 XB6S + 1
+  { 0x3f, PPC_OPSHIFT_INV, insert_xc6, extract_xc6, PPC_OPERAND_VSR },
+
+  /* The DM or SHW field in an XX3 form instruction.  */
+#define DM XC6 + 1
+#define SHW DM
+  { 0x3, 8, NULL, NULL, 0 },
+
+  /* The DM field in an extended mnemonic XX3 form instruction.  */
+#define DMEX DM + 1
+  { 0x3, 8, insert_dm, extract_dm, 0 },
+
+  /* The UIM field in an XX2 form instruction.  */
+#define UIM DMEX + 1
+  /* The 2-bit UIMM field in a VX form instruction.  */
+#define UIMM2 UIM
+  /* The 2-bit L field in a darn instruction.  */
+#define LRAND UIM
+  { 0x3, 16, NULL, NULL, 0 },
+
+#define ERAT_T UIM + 1
+  { 0x7, 21, NULL, NULL, 0 },
+
+#define IH ERAT_T + 1
+  { 0x7, 21, NULL, NULL, PPC_OPERAND_OPTIONAL },
+
+  /* The 8-bit IMM8 field in a XX1 form instruction.  */
+#define IMM8 IH + 1
+  { 0xff, 11, NULL, NULL, PPC_OPERAND_SIGNOPT },
+};
+
+const unsigned int num_powerpc_operands = (sizeof (powerpc_operands)
+					   / sizeof (powerpc_operands[0]));
+
+/* The functions used to insert and extract complicated operands.  */
+
+/* The ARX, ARY, RX and RY operands are alternate encodings of GPRs.  */
+
+static unsigned long
+insert_arx (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  if (value >= 8 && value < 24)
+    return insn | ((value - 8) & 0xf);
+  else
+    {
+      *errmsg = _("invalid register");
+      return 0;
+    }
+}
+
+static long
+extract_arx (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return (insn & 0xf) + 8;
+}
+
+static unsigned long
+insert_ary (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  if (value >= 8 && value < 24)
+    return insn | (((value - 8) & 0xf) << 4);
+  else
+    {
+      *errmsg = _("invalid register");
+      return 0;
+    }
+}
+
+static long
+extract_ary (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn >> 4) & 0xf) + 8;
+}
+
+static unsigned long
+insert_rx (unsigned long insn,
+	   long value,
+	   ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	   const char **errmsg)
+{
+  if (value >= 0 && value < 8)
+    return insn | value;
+  else if (value >= 24 && value <= 31)
+    return insn | (value - 16);
+  else
+    {
+      *errmsg = _("invalid register");
+      return 0;
+    }
+}
+
+static long
+extract_rx (unsigned long insn,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    int *invalid ATTRIBUTE_UNUSED)
+{
+  int value = insn & 0xf;
+  if (value >= 0 && value < 8)
+    return value;
+  else
+    return value + 16;
+}
+
+static unsigned long
+insert_ry (unsigned long insn,
+	   long value,
+	   ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	   const char **errmsg)
+{
+  if (value >= 0 && value < 8)
+    return insn | (value << 4);
+  else if (value >= 24 && value <= 31)
+    return insn | ((value - 16) << 4);
+  else
+    {
+      *errmsg = _("invalid register");
+      return 0;
+    }
+}
+
+static long
+extract_ry (unsigned long insn,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    int *invalid ATTRIBUTE_UNUSED)
+{
+  int value = (insn >> 4) & 0xf;
+  if (value >= 0 && value < 8)
+    return value;
+  else
+    return value + 16;
+}
+
+/* The BA field in an XL form instruction when it must be the same as
+   the BT field in the same instruction.  This operand is marked FAKE.
+   The insertion function just copies the BT field into the BA field,
+   and the extraction function just checks that the fields are the
+   same.  */
+
+static unsigned long
+insert_bat (unsigned long insn,
+	    long value ATTRIBUTE_UNUSED,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | (((insn >> 21) & 0x1f) << 16);
+}
+
+static long
+extract_bat (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid)
+{
+  if (((insn >> 21) & 0x1f) != ((insn >> 16) & 0x1f))
+    *invalid = 1;
+  return 0;
+}
+
+/* The BB field in an XL form instruction when it must be the same as
+   the BA field in the same instruction.  This operand is marked FAKE.
+   The insertion function just copies the BA field into the BB field,
+   and the extraction function just checks that the fields are the
+   same.  */
+
+static unsigned long
+insert_bba (unsigned long insn,
+	    long value ATTRIBUTE_UNUSED,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | (((insn >> 16) & 0x1f) << 11);
+}
+
+static long
+extract_bba (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid)
+{
+  if (((insn >> 16) & 0x1f) != ((insn >> 11) & 0x1f))
+    *invalid = 1;
+  return 0;
+}
+
+/* The BD field in a B form instruction when the - modifier is used.
+   This modifier means that the branch is not expected to be taken.
+   For chips built to versions of the architecture prior to version 2
+   (ie. not Power4 compatible), we set the y bit of the BO field to 1
+   if the offset is negative.  When extracting, we require that the y
+   bit be 1 and that the offset be positive, since if the y bit is 0
+   we just want to print the normal form of the instruction.
+   Power4 compatible targets use two bits, "a", and "t", instead of
+   the "y" bit.  "at" == 00 => no hint, "at" == 01 => unpredictable,
+   "at" == 10 => not taken, "at" == 11 => taken.  The "t" bit is 00001
+   in BO field, the "a" bit is 00010 for branch on CR(BI) and 01000
+   for branch on CTR.  We only handle the taken/not-taken hint here.
+   Note that we don't relax the conditions tested here when
+   disassembling with -Many because insns using extract_bdm and
+   extract_bdp always occur in pairs.  One or the other will always
+   be valid.  */
+
+#define ISA_V2 (PPC_OPCODE_POWER4 | PPC_OPCODE_E500MC | PPC_OPCODE_TITAN)
+
+static unsigned long
+insert_bdm (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  if ((dialect & ISA_V2) == 0)
+    {
+      if ((value & 0x8000) != 0)
+	insn |= 1 << 21;
+    }
+  else
+    {
+      if ((insn & (0x14 << 21)) == (0x04 << 21))
+	insn |= 0x02 << 21;
+      else if ((insn & (0x14 << 21)) == (0x10 << 21))
+	insn |= 0x08 << 21;
+    }
+  return insn | (value & 0xfffc);
+}
+
+static long
+extract_bdm (unsigned long insn,
+	     ppc_cpu_t dialect,
+	     int *invalid)
+{
+  if ((dialect & ISA_V2) == 0)
+    {
+      if (((insn & (1 << 21)) == 0) != ((insn & (1 << 15)) == 0))
+	*invalid = 1;
+    }
+  else
+    {
+      if ((insn & (0x17 << 21)) != (0x06 << 21)
+	  && (insn & (0x1d << 21)) != (0x18 << 21))
+	*invalid = 1;
+    }
+
+  return ((insn & 0xfffc) ^ 0x8000) - 0x8000;
+}
+
+/* The BD field in a B form instruction when the + modifier is used.
+   This is like BDM, above, except that the branch is expected to be
+   taken.  */
+
+static unsigned long
+insert_bdp (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  if ((dialect & ISA_V2) == 0)
+    {
+      if ((value & 0x8000) == 0)
+	insn |= 1 << 21;
+    }
+  else
+    {
+      if ((insn & (0x14 << 21)) == (0x04 << 21))
+	insn |= 0x03 << 21;
+      else if ((insn & (0x14 << 21)) == (0x10 << 21))
+	insn |= 0x09 << 21;
+    }
+  return insn | (value & 0xfffc);
+}
+
+static long
+extract_bdp (unsigned long insn,
+	     ppc_cpu_t dialect,
+	     int *invalid)
+{
+  if ((dialect & ISA_V2) == 0)
+    {
+      if (((insn & (1 << 21)) == 0) == ((insn & (1 << 15)) == 0))
+	*invalid = 1;
+    }
+  else
+    {
+      if ((insn & (0x17 << 21)) != (0x07 << 21)
+	  && (insn & (0x1d << 21)) != (0x19 << 21))
+	*invalid = 1;
+    }
+
+  return ((insn & 0xfffc) ^ 0x8000) - 0x8000;
+}
+
+static inline int
+valid_bo_pre_v2 (long value)
+{
+  /* Certain encodings have bits that are required to be zero.
+     These are (z must be zero, y may be anything):
+	 0000y
+	 0001y
+	 001zy
+	 0100y
+	 0101y
+	 011zy
+	 1z00y
+	 1z01y
+	 1z1zz
+  */
+  if ((value & 0x14) == 0)
+    return 1;
+  else if ((value & 0x14) == 0x4)
+    return (value & 0x2) == 0;
+  else if ((value & 0x14) == 0x10)
+    return (value & 0x8) == 0;
+  else
+    return value == 0x14;
+}
+
+static inline int
+valid_bo_post_v2 (long value)
+{
+  /* Certain encodings have bits that are required to be zero.
+     These are (z must be zero, a & t may be anything):
+	 0000z
+	 0001z
+	 001at
+	 0100z
+	 0101z
+	 011at
+	 1a00t
+	 1a01t
+	 1z1zz
+  */
+  if ((value & 0x14) == 0)
+    return (value & 0x1) == 0;
+  else if ((value & 0x14) == 0x14)
+    return value == 0x14;
+  else
+    return 1;
+}
+
+/* Check for legal values of a BO field.  */
+
+static int
+valid_bo (long value, ppc_cpu_t dialect, int extract)
+{
+  int valid_y = valid_bo_pre_v2 (value);
+  int valid_at = valid_bo_post_v2 (value);
+
+  /* When disassembling with -Many, accept either encoding on the
+     second pass through opcodes.  */
+  if (extract && dialect == ~(ppc_cpu_t) PPC_OPCODE_ANY)
+    return valid_y || valid_at;
+  if ((dialect & ISA_V2) == 0)
+    return valid_y;
+  else
+    return valid_at;
+}
+
+/* The BO field in a B form instruction.  Warn about attempts to set
+   the field to an illegal value.  */
+
+static unsigned long
+insert_bo (unsigned long insn,
+	   long value,
+	   ppc_cpu_t dialect,
+	   const char **errmsg)
+{
+  if (!valid_bo (value, dialect, 0))
+    *errmsg = _("invalid conditional option");
+  else if (PPC_OP (insn) == 19 && (insn & 0x400) && ! (value & 4))
+    *errmsg = _("invalid counter access");
+  return insn | ((value & 0x1f) << 21);
+}
+
+static long
+extract_bo (unsigned long insn,
+	    ppc_cpu_t dialect,
+	    int *invalid)
+{
+  long value;
+
+  value = (insn >> 21) & 0x1f;
+  if (!valid_bo (value, dialect, 1))
+    *invalid = 1;
+  return value;
+}
+
+/* The BO field in a B form instruction when the + or - modifier is
+   used.  This is like the BO field, but it must be even.  When
+   extracting it, we force it to be even.  */
+
+static unsigned long
+insert_boe (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect,
+	    const char **errmsg)
+{
+  if (!valid_bo (value, dialect, 0))
+    *errmsg = _("invalid conditional option");
+  else if (PPC_OP (insn) == 19 && (insn & 0x400) && ! (value & 4))
+    *errmsg = _("invalid counter access");
+  else if ((value & 1) != 0)
+    *errmsg = _("attempt to set y bit when using + or - modifier");
+
+  return insn | ((value & 0x1f) << 21);
+}
+
+static long
+extract_boe (unsigned long insn,
+	     ppc_cpu_t dialect,
+	     int *invalid)
+{
+  long value;
+
+  value = (insn >> 21) & 0x1f;
+  if (!valid_bo (value, dialect, 1))
+    *invalid = 1;
+  return value & 0x1e;
+}
+
+/* The DCMX field in a X form instruction when the field is split
+   into separate DC, DM and DX fields.  */
+
+static unsigned long
+insert_dcmxs (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x1f) << 16) | ((value & 0x20) >> 3) | (value & 0x40);
+}
+
+static long
+extract_dcmxs (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return (insn & 0x40) | ((insn << 3) & 0x20) | ((insn >> 16) & 0x1f);
+}
+
+/* The D field in a DX form instruction when the field is split
+   into separate D0, D1 and D2 fields.  */
+
+static unsigned long
+insert_dxd (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | (value & 0xffc1) | ((value & 0x3e) << 15);
+}
+
+static long
+extract_dxd (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  unsigned long dxd = (insn & 0xffc1) | ((insn >> 15) & 0x3e);
+  return (dxd ^ 0x8000) - 0x8000;
+}
+
+static unsigned long
+insert_dxdn (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insert_dxd (insn, -value, dialect, errmsg);
+}
+
+static long
+extract_dxdn (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return -extract_dxd (insn, dialect, invalid);
+}
+
+/* FXM mask in mfcr and mtcrf instructions.  */
+
+static unsigned long
+insert_fxm (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect,
+	    const char **errmsg)
+{
+  /* If we're handling the mfocrf and mtocrf insns ensure that exactly
+     one bit of the mask field is set.  */
+  if ((insn & (1 << 20)) != 0)
+    {
+      if (value == 0 || (value & -value) != value)
+	{
+	  *errmsg = _("invalid mask field");
+	  value = 0;
+	}
+    }
+
+  /* If only one bit of the FXM field is set, we can use the new form
+     of the instruction, which is faster.  Unlike the Power4 branch hint
+     encoding, this is not backward compatible.  Do not generate the
+     new form unless -mpower4 has been given, or -many and the two
+     operand form of mfcr was used.  */
+  else if (value > 0
+	   && (value & -value) == value
+	   && ((dialect & PPC_OPCODE_POWER4) != 0
+	       || ((dialect & PPC_OPCODE_ANY) != 0
+		   && (insn & (0x3ff << 1)) == 19 << 1)))
+    insn |= 1 << 20;
+
+  /* Any other value on mfcr is an error.  */
+  else if ((insn & (0x3ff << 1)) == 19 << 1)
+    {
+      /* A value of -1 means we used the one operand form of
+	 mfcr which is valid.  */
+      if (value != -1)
+        *errmsg = _("invalid mfcr mask");
+      value = 0;
+    }
+
+  return insn | ((value & 0xff) << 12);
+}
+
+static long
+extract_fxm (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid)
+{
+  long mask = (insn >> 12) & 0xff;
+
+  /* Is this a Power4 insn?  */
+  if ((insn & (1 << 20)) != 0)
+    {
+      /* Exactly one bit of MASK should be set.  */
+      if (mask == 0 || (mask & -mask) != mask)
+	*invalid = 1;
+    }
+
+  /* Check that non-power4 form of mfcr has a zero MASK.  */
+  else if ((insn & (0x3ff << 1)) == 19 << 1)
+    {
+      if (mask != 0)
+	*invalid = 1;
+      else
+	mask = -1;
+    }
+
+  return mask;
+}
+
+static unsigned long
+insert_li20 (unsigned long insn,
+	     long value,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0xf0000) >> 5) | ((value & 0x0f800) << 5) | (value & 0x7ff);
+}
+
+static long
+extract_li20 (unsigned long insn,
+	      ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	      int *invalid ATTRIBUTE_UNUSED)
+{
+  long ext = ((insn & 0x4000) == 0x4000) ? 0xfff00000 : 0x00000000;
+
+  return ext
+         | (((insn >> 11) & 0xf) << 16)
+         | (((insn >> 17) & 0xf) << 12)
+         | (((insn >> 16) & 0x1) << 11)
+         | (insn & 0x7ff);
+}
+
+/* The 2-bit L field in a SYNC or WC field in a WAIT instruction.
+   For SYNC, some L values are reserved:
+     * Value 3 is reserved on newer server cpus.
+     * Values 2 and 3 are reserved on all other cpus.  */
+
+static unsigned long
+insert_ls (unsigned long insn,
+	   long value,
+	   ppc_cpu_t dialect,
+	   const char **errmsg)
+{
+  /* For SYNC, some L values are illegal.  */
+  if (((insn >> 1) & 0x3ff) == 598)
+    {
+      long max_lvalue = (dialect & PPC_OPCODE_POWER4) ? 2 : 1;
+      if (value > max_lvalue)
+	{
+	  *errmsg = _("illegal L operand value");
+	  return insn;
+	}
+    }
+
+  return insn | ((value & 0x3) << 21);
+}
+
+/* The 4-bit E field in a sync instruction that accepts 2 operands.
+   If ESYNC is non-zero, then the L field must be either 0 or 1 and
+   the complement of ESYNC-bit2.  */
+
+static unsigned long
+insert_esync (unsigned long insn,
+	      long value,
+	      ppc_cpu_t dialect,
+	      const char **errmsg)
+{
+  unsigned long ls = (insn >> 21) & 0x03;
+
+  if (value == 0)
+    {
+      if (((dialect & PPC_OPCODE_E6500) != 0 && ls > 1)
+	  || ((dialect & PPC_OPCODE_POWER9) != 0 && ls > 2))
+        *errmsg = _("illegal L operand value");
+      return insn;
+    }
+
+  if ((ls & ~0x1)
+      || (((value >> 1) & 0x1) ^ ls) == 0)
+        *errmsg = _("incompatible L operand value");
+
+  return insn | ((value & 0xf) << 16);
+}
+
+/* The MB and ME fields in an M form instruction expressed as a single
+   operand which is itself a bitmask.  The extraction function always
+   marks it as invalid, since we never want to recognize an
+   instruction which uses a field of this type.  */
+
+static unsigned long
+insert_mbe (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg)
+{
+  unsigned long uval, mask;
+  int mb, me, mx, count, last;
+
+  uval = value;
+
+  if (uval == 0)
+    {
+      *errmsg = _("illegal bitmask");
+      return insn;
+    }
+
+  mb = 0;
+  me = 32;
+  if ((uval & 1) != 0)
+    last = 1;
+  else
+    last = 0;
+  count = 0;
+
+  /* mb: location of last 0->1 transition */
+  /* me: location of last 1->0 transition */
+  /* count: # transitions */
+
+  for (mx = 0, mask = 1L << 31; mx < 32; ++mx, mask >>= 1)
+    {
+      if ((uval & mask) && !last)
+	{
+	  ++count;
+	  mb = mx;
+	  last = 1;
+	}
+      else if (!(uval & mask) && last)
+	{
+	  ++count;
+	  me = mx;
+	  last = 0;
+	}
+    }
+  if (me == 0)
+    me = 32;
+
+  if (count != 2 && (count != 0 || ! last))
+    *errmsg = _("illegal bitmask");
+
+  return insn | (mb << 6) | ((me - 1) << 1);
+}
+
+static long
+extract_mbe (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid)
+{
+  long ret;
+  int mb, me;
+  int i;
+
+  *invalid = 1;
+
+  mb = (insn >> 6) & 0x1f;
+  me = (insn >> 1) & 0x1f;
+  if (mb < me + 1)
+    {
+      ret = 0;
+      for (i = mb; i <= me; i++)
+	ret |= 1L << (31 - i);
+    }
+  else if (mb == me + 1)
+    ret = ~0;
+  else /* (mb > me + 1) */
+    {
+      ret = ~0;
+      for (i = me + 1; i < mb; i++)
+	ret &= ~(1L << (31 - i));
+    }
+  return ret;
+}
+
+/* The MB or ME field in an MD or MDS form instruction.  The high bit
+   is wrapped to the low end.  */
+
+static unsigned long
+insert_mb6 (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x1f) << 6) | (value & 0x20);
+}
+
+static long
+extract_mb6 (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn >> 6) & 0x1f) | (insn & 0x20);
+}
+
+/* The NB field in an X form instruction.  The value 32 is stored as
+   0.  */
+
+static long
+extract_nb (unsigned long insn,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    int *invalid ATTRIBUTE_UNUSED)
+{
+  long ret;
+
+  ret = (insn >> 11) & 0x1f;
+  if (ret == 0)
+    ret = 32;
+  return ret;
+}
+
+/* The NB field in an lswi instruction, which has special value
+   restrictions.  The value 32 is stored as 0.  */
+
+static unsigned long
+insert_nbi (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  long rtvalue = (insn & RT_MASK) >> 21;
+  long ravalue = (insn & RA_MASK) >> 16;
+
+  if (value == 0)
+    value = 32;
+  if (rtvalue + (value + 3) / 4 > (rtvalue > ravalue ? ravalue + 32
+						     : ravalue))
+    *errmsg = _("address register in load range");
+  return insn | ((value & 0x1f) << 11);
+}
+
+/* The NSI field in a D form instruction.  This is the same as the SI
+   field, only negated.  The extraction function always marks it as
+   invalid, since we never want to recognize an instruction which uses
+   a field of this type.  */
+
+static unsigned long
+insert_nsi (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | (-value & 0xffff);
+}
+
+static long
+extract_nsi (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid)
+{
+  *invalid = 1;
+  return -(((insn & 0xffff) ^ 0x8000) - 0x8000);
+}
+
+/* The RA field in a D or X form instruction which is an updating
+   load, which means that the RA field may not be zero and may not
+   equal the RT field.  */
+
+static unsigned long
+insert_ral (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg)
+{
+  if (value == 0
+      || (unsigned long) value == ((insn >> 21) & 0x1f))
+    *errmsg = "invalid register operand when updating";
+  return insn | ((value & 0x1f) << 16);
+}
+
+/* The RA field in an lmw instruction, which has special value
+   restrictions.  */
+
+static unsigned long
+insert_ram (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg)
+{
+  if ((unsigned long) value >= ((insn >> 21) & 0x1f))
+    *errmsg = _("index register in load range");
+  return insn | ((value & 0x1f) << 16);
+}
+
+/* The RA field in the DQ form lq or an lswx instruction, which have special
+   value restrictions.  */
+
+static unsigned long
+insert_raq (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg)
+{
+  long rtvalue = (insn & RT_MASK) >> 21;
+
+  if (value == rtvalue)
+    *errmsg = _("source and target register operands must be different");
+  return insn | ((value & 0x1f) << 16);
+}
+
+/* The RA field in a D or X form instruction which is an updating
+   store or an updating floating point load, which means that the RA
+   field may not be zero.  */
+
+static unsigned long
+insert_ras (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg)
+{
+  if (value == 0)
+    *errmsg = _("invalid register operand when updating");
+  return insn | ((value & 0x1f) << 16);
+}
+
+/* The RB field in an X form instruction when it must be the same as
+   the RS field in the instruction.  This is used for extended
+   mnemonics like mr.  This operand is marked FAKE.  The insertion
+   function just copies the BT field into the BA field, and the
+   extraction function just checks that the fields are the same.  */
+
+static unsigned long
+insert_rbs (unsigned long insn,
+	    long value ATTRIBUTE_UNUSED,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | (((insn >> 21) & 0x1f) << 11);
+}
+
+static long
+extract_rbs (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid)
+{
+  if (((insn >> 21) & 0x1f) != ((insn >> 11) & 0x1f))
+    *invalid = 1;
+  return 0;
+}
+
+/* The RB field in an lswx instruction, which has special value
+   restrictions.  */
+
+static unsigned long
+insert_rbx (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg)
+{
+  long rtvalue = (insn & RT_MASK) >> 21;
+
+  if (value == rtvalue)
+    *errmsg = _("source and target register operands must be different");
+  return insn | ((value & 0x1f) << 11);
+}
+
+/* The SCI8 field is made up of SCL and {U,N}I8 fields.  */
+static unsigned long
+insert_sci8 (unsigned long insn,
+	     long value,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     const char **errmsg)
+{
+  unsigned int fill_scale = 0;
+  unsigned long ui8 = value;
+
+  if ((ui8 & 0xffffff00) == 0)
+    ;
+  else if ((ui8 & 0xffffff00) == 0xffffff00)
+    fill_scale = 0x400;
+  else if ((ui8 & 0xffff00ff) == 0)
+    {
+      fill_scale = 1 << 8;
+      ui8 >>= 8;
+    }
+  else if ((ui8 & 0xffff00ff) == 0xffff00ff)
+    {
+      fill_scale = 0x400 | (1 << 8);
+      ui8 >>= 8;
+    }
+  else if ((ui8 & 0xff00ffff) == 0)
+    {
+      fill_scale = 2 << 8;
+      ui8 >>= 16;
+    }
+  else if ((ui8 & 0xff00ffff) == 0xff00ffff)
+    {
+      fill_scale = 0x400 | (2 << 8);
+      ui8 >>= 16;
+    }
+  else if ((ui8 & 0x00ffffff) == 0)
+    {
+      fill_scale = 3 << 8;
+      ui8 >>= 24;
+    }
+  else if ((ui8 & 0x00ffffff) == 0x00ffffff)
+    {
+      fill_scale = 0x400 | (3 << 8);
+      ui8 >>= 24;
+    }
+  else
+    {
+      *errmsg = _("illegal immediate value");
+      ui8 = 0;
+    }
+
+  return insn | fill_scale | (ui8 & 0xff);
+}
+
+static long
+extract_sci8 (unsigned long insn,
+	      ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	      int *invalid ATTRIBUTE_UNUSED)
+{
+  int fill = insn & 0x400;
+  int scale_factor = (insn & 0x300) >> 5;
+  long value = (insn & 0xff) << scale_factor;
+
+  if (fill != 0)
+    value |= ~((long) 0xff << scale_factor);
+  return value;
+}
+
+static unsigned long
+insert_sci8n (unsigned long insn,
+	      long value,
+	      ppc_cpu_t dialect,
+	      const char **errmsg)
+{
+  return insert_sci8 (insn, -value, dialect, errmsg);
+}
+
+static long
+extract_sci8n (unsigned long insn,
+	       ppc_cpu_t dialect,
+	       int *invalid)
+{
+  return -extract_sci8 (insn, dialect, invalid);
+}
+
+static unsigned long
+insert_sd4h (unsigned long insn,
+	     long value,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x1e) << 7);
+}
+
+static long
+extract_sd4h (unsigned long insn,
+	      ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	      int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn >> 8) & 0xf) << 1;
+}
+
+static unsigned long
+insert_sd4w (unsigned long insn,
+	     long value,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x3c) << 6);
+}
+
+static long
+extract_sd4w (unsigned long insn,
+	      ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	      int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn >> 8) & 0xf) << 2;
+}
+
+static unsigned long
+insert_oimm (unsigned long insn,
+	     long value,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | (((value - 1) & 0x1f) << 4);
+}
+
+static long
+extract_oimm (unsigned long insn,
+	      ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	      int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn >> 4) & 0x1f) + 1;
+}
+
+/* The SH field in an MD form instruction.  This is split.  */
+
+static unsigned long
+insert_sh6 (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  /* SH6 operand in the rldixor instructions.  */
+  if (PPC_OP (insn) == 4)
+    return insn | ((value & 0x1f) << 6) | ((value & 0x20) >> 5);
+  else
+    return insn | ((value & 0x1f) << 11) | ((value & 0x20) >> 4);
+}
+
+static long
+extract_sh6 (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  /* SH6 operand in the rldixor instructions.  */
+  if (PPC_OP (insn) == 4)
+    return ((insn >> 6) & 0x1f) | ((insn << 5) & 0x20);
+  else
+    return ((insn >> 11) & 0x1f) | ((insn << 4) & 0x20);
+}
+
+/* The SPR field in an XFX form instruction.  This is flipped--the
+   lower 5 bits are stored in the upper 5 and vice- versa.  */
+
+static unsigned long
+insert_spr (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x1f) << 16) | ((value & 0x3e0) << 6);
+}
+
+static long
+extract_spr (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn >> 16) & 0x1f) | ((insn >> 6) & 0x3e0);
+}
+
+/* Some dialects have 8 SPRG registers instead of the standard 4.  */
+#define ALLOW8_SPRG (PPC_OPCODE_BOOKE | PPC_OPCODE_405)
+
+static unsigned long
+insert_sprg (unsigned long insn,
+	     long value,
+	     ppc_cpu_t dialect,
+	     const char **errmsg)
+{
+  if (value > 7
+      || (value > 3 && (dialect & ALLOW8_SPRG) == 0))
+    *errmsg = _("invalid sprg number");
+
+  /* If this is mfsprg4..7 then use spr 260..263 which can be read in
+     user mode.  Anything else must use spr 272..279.  */
+  if (value <= 3 || (insn & 0x100) != 0)
+    value |= 0x10;
+
+  return insn | ((value & 0x17) << 16);
+}
+
+static long
+extract_sprg (unsigned long insn,
+	      ppc_cpu_t dialect,
+	      int *invalid)
+{
+  unsigned long val = (insn >> 16) & 0x1f;
+
+  /* mfsprg can use 260..263 and 272..279.  mtsprg only uses spr 272..279
+     If not BOOKE, 405 or VLE, then both use only 272..275.  */
+  if ((val - 0x10 > 3 && (dialect & ALLOW8_SPRG) == 0)
+      || (val - 0x10 > 7 && (insn & 0x100) != 0)
+      || val <= 3
+      || (val & 8) != 0)
+    *invalid = 1;
+  return val & 7;
+}
+
+/* The TBR field in an XFX instruction.  This is just like SPR, but it
+   is optional.  */
+
+static unsigned long
+insert_tbr (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg)
+{
+  if (value != 268 && value != 269)
+    *errmsg = _("invalid tbr number");
+  return insn | ((value & 0x1f) << 16) | ((value & 0x3e0) << 6);
+}
+
+static long
+extract_tbr (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid)
+{
+  long ret;
+
+  ret = ((insn >> 16) & 0x1f) | ((insn >> 6) & 0x3e0);
+  if (ret != 268 && ret != 269)
+    *invalid = 1;
+  return ret;
+}
+
+/* The XT and XS fields in an XX1 or XX3 form instruction.  This is split.  */
+
+static unsigned long
+insert_xt6 (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x1f) << 21) | ((value & 0x20) >> 5);
+}
+
+static long
+extract_xt6 (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn << 5) & 0x20) | ((insn >> 21) & 0x1f);
+}
+
+/* The XT and XS fields in an DQ form VSX instruction.  This is split.  */
+static unsigned long
+insert_xtq6 (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x1f) << 21) | ((value & 0x20) >> 2);
+}
+
+static long
+extract_xtq6 (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn << 2) & 0x20) | ((insn >> 21) & 0x1f);
+}
+
+/* The XA field in an XX3 form instruction.  This is split.  */
+
+static unsigned long
+insert_xa6 (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x1f) << 16) | ((value & 0x20) >> 3);
+}
+
+static long
+extract_xa6 (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn << 3) & 0x20) | ((insn >> 16) & 0x1f);
+}
+
+/* The XB field in an XX3 form instruction.  This is split.  */
+
+static unsigned long
+insert_xb6 (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x1f) << 11) | ((value & 0x20) >> 4);
+}
+
+static long
+extract_xb6 (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn << 4) & 0x20) | ((insn >> 11) & 0x1f);
+}
+
+/* The XB field in an XX3 form instruction when it must be the same as
+   the XA field in the instruction.  This is used for extended
+   mnemonics like xvmovdp.  This operand is marked FAKE.  The insertion
+   function just copies the XA field into the XB field, and the
+   extraction function just checks that the fields are the same.  */
+
+static unsigned long
+insert_xb6s (unsigned long insn,
+	    long value ATTRIBUTE_UNUSED,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | (((insn >> 16) & 0x1f) << 11) | (((insn >> 2) & 0x1) << 1);
+}
+
+static long
+extract_xb6s (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid)
+{
+  if ((((insn >> 16) & 0x1f) != ((insn >> 11) & 0x1f))
+      || (((insn >> 2) & 0x1) != ((insn >> 1) & 0x1)))
+    *invalid = 1;
+  return 0;
+}
+
+/* The XC field in an XX4 form instruction.  This is split.  */
+
+static unsigned long
+insert_xc6 (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x1f) << 6) | ((value & 0x20) >> 2);
+}
+
+static long
+extract_xc6 (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn << 2) & 0x20) | ((insn >> 6) & 0x1f);
+}
+
+static unsigned long
+insert_dm (unsigned long insn,
+	   long value,
+	   ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	   const char **errmsg)
+{
+  if (value != 0 && value != 1)
+    *errmsg = _("invalid constant");
+  return insn | (((value) ? 3 : 0) << 8);
+}
+
+static long
+extract_dm (unsigned long insn,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    int *invalid)
+{
+  long value;
+
+  value = (insn >> 8) & 3;
+  if (value != 0 && value != 3)
+    *invalid = 1;
+  return (value) ? 1 : 0;
+}
+
+/* The VLESIMM field in an I16A form instruction.  This is split.  */
+
+static unsigned long
+insert_vlesi (unsigned long insn,
+            long value,
+            ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+            const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0xf800) << 10) | (value & 0x7ff);
+}
+
+static long
+extract_vlesi (unsigned long insn,
+             ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+             int *invalid ATTRIBUTE_UNUSED)
+{
+  long value = ((insn >> 10) & 0xf800) | (insn & 0x7ff);
+  value = (value ^ 0x8000) - 0x8000;
+  return value;
+}
+
+static unsigned long
+insert_vlensi (unsigned long insn,
+            long value,
+            ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+            const char **errmsg ATTRIBUTE_UNUSED)
+{
+  value = -value;
+  return insn | ((value & 0xf800) << 10) | (value & 0x7ff);
+}
+static long
+extract_vlensi (unsigned long insn,
+             ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+             int *invalid ATTRIBUTE_UNUSED)
+{
+  long value = ((insn >> 10) & 0xf800) | (insn & 0x7ff);
+  value = (value ^ 0x8000) - 0x8000;
+  /* Don't use for disassembly.  */
+  *invalid = 1;
+  return -value;
+}
+
+/* The VLEUIMM field in an I16A form instruction.  This is split.  */
+
+static unsigned long
+insert_vleui (unsigned long insn,
+            long value,
+            ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+            const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0xf800) << 10) | (value & 0x7ff);
+}
+
+static long
+extract_vleui (unsigned long insn,
+             ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+             int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn >> 10) & 0xf800) | (insn & 0x7ff);
+}
+
+/* The VLEUIMML field in an I16L form instruction.  This is split.  */
+
+static unsigned long
+insert_vleil (unsigned long insn,
+            long value,
+            ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+            const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0xf800) << 5) | (value & 0x7ff);
+}
+
+static long
+extract_vleil (unsigned long insn,
+             ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+             int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn >> 5) & 0xf800) | (insn & 0x7ff);
+}
+
+
+/* Macros used to form opcodes.  */
+
+/* The main opcode.  */
+#define OP(x) ((((unsigned long)(x)) & 0x3f) << 26)
+#define OP_MASK OP (0x3f)
+
+/* The main opcode combined with a trap code in the TO field of a D
+   form instruction.  Used for extended mnemonics for the trap
+   instructions.  */
+#define OPTO(x,to) (OP (x) | ((((unsigned long)(to)) & 0x1f) << 21))
+#define OPTO_MASK (OP_MASK | TO_MASK)
+
+/* The main opcode combined with a comparison size bit in the L field
+   of a D form or X form instruction.  Used for extended mnemonics for
+   the comparison instructions.  */
+#define OPL(x,l) (OP (x) | ((((unsigned long)(l)) & 1) << 21))
+#define OPL_MASK OPL (0x3f,1)
+
+/* The main opcode combined with an update code in D form instruction.
+   Used for extended mnemonics for VLE memory instructions.  */
+#define OPVUP(x,vup) (OP (x) | ((((unsigned long)(vup)) & 0xff) << 8))
+#define OPVUP_MASK OPVUP (0x3f,  0xff)
+
+/* The main opcode combined with an update code and the RT fields specified in
+   D form instruction.  Used for VLE volatile context save/restore
+   instructions.  */
+#define OPVUPRT(x,vup,rt) (OPVUP (x, vup) | ((((unsigned long)(rt)) & 0x1f) << 21))
+#define OPVUPRT_MASK OPVUPRT (0x3f, 0xff, 0x1f)
+
+/* An A form instruction.  */
+#define A(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0x1f) << 1) | (((unsigned long)(rc)) & 1))
+#define A_MASK A (0x3f, 0x1f, 1)
+
+/* An A_MASK with the FRB field fixed.  */
+#define AFRB_MASK (A_MASK | FRB_MASK)
+
+/* An A_MASK with the FRC field fixed.  */
+#define AFRC_MASK (A_MASK | FRC_MASK)
+
+/* An A_MASK with the FRA and FRC fields fixed.  */
+#define AFRAFRC_MASK (A_MASK | FRA_MASK | FRC_MASK)
+
+/* An AFRAFRC_MASK, but with L bit clear.  */
+#define AFRALFRC_MASK (AFRAFRC_MASK & ~((unsigned long) 1 << 16))
+
+/* A B form instruction.  */
+#define B(op, aa, lk) (OP (op) | ((((unsigned long)(aa)) & 1) << 1) | ((lk) & 1))
+#define B_MASK B (0x3f, 1, 1)
+
+/* A BD8 form instruction.  This is a 16-bit instruction.  */
+#define BD8(op, aa, lk) (((((unsigned long)(op)) & 0x3f) << 10) | (((aa) & 1) << 9) | (((lk) & 1) << 8))
+#define BD8_MASK BD8 (0x3f, 1, 1)
+
+/* Another BD8 form instruction.  This is a 16-bit instruction.  */
+#define BD8IO(op) ((((unsigned long)(op)) & 0x1f) << 11)
+#define BD8IO_MASK BD8IO (0x1f)
+
+/* A BD8 form instruction for simplified mnemonics.  */
+#define EBD8IO(op, bo, bi) (BD8IO ((op)) | ((bo) << 10) | ((bi) << 8))
+/* A mask that excludes BO32 and BI32.  */
+#define EBD8IO1_MASK 0xf800
+/* A mask that includes BO32 and excludes BI32.  */
+#define EBD8IO2_MASK 0xfc00
+/* A mask that include BO32 AND BI32.  */
+#define EBD8IO3_MASK 0xff00
+
+/* A BD15 form instruction.  */
+#define BD15(op, aa, lk) (OP (op) | ((((unsigned long)(aa)) & 0xf) << 22) | ((lk) & 1))
+#define BD15_MASK BD15 (0x3f, 0xf, 1)
+
+/* A BD15 form instruction for extended conditional branch mnemonics.  */
+#define EBD15(op, aa, bo, lk) (((op) & 0x3f) << 26) | (((aa) & 0xf) << 22) | (((bo) & 0x3) << 20) | ((lk) & 1)
+#define EBD15_MASK 0xfff00001
+
+/* A BD15 form instruction for extended conditional branch mnemonics with BI.  */
+#define EBD15BI(op, aa, bo, bi, lk) (((op) & 0x3f) << 26) \
+                                    | (((aa) & 0xf) << 22) \
+                                    | (((bo) & 0x3) << 20) \
+                                    | (((bi) & 0x3) << 16) \
+                                    | ((lk) & 1)
+#define EBD15BI_MASK  0xfff30001
+
+/* A BD24 form instruction.  */
+#define BD24(op, aa, lk) (OP (op) | ((((unsigned long)(aa)) & 1) << 25) | ((lk) & 1))
+#define BD24_MASK BD24 (0x3f, 1, 1)
+
+/* A B form instruction setting the BO field.  */
+#define BBO(op, bo, aa, lk) (B ((op), (aa), (lk)) | ((((unsigned long)(bo)) & 0x1f) << 21))
+#define BBO_MASK BBO (0x3f, 0x1f, 1, 1)
+
+/* A BBO_MASK with the y bit of the BO field removed.  This permits
+   matching a conditional branch regardless of the setting of the y
+   bit.  Similarly for the 'at' bits used for power4 branch hints.  */
+#define Y_MASK	 (((unsigned long) 1) << 21)
+#define AT1_MASK (((unsigned long) 3) << 21)
+#define AT2_MASK (((unsigned long) 9) << 21)
+#define BBOY_MASK  (BBO_MASK &~ Y_MASK)
+#define BBOAT_MASK (BBO_MASK &~ AT1_MASK)
+
+/* A B form instruction setting the BO field and the condition bits of
+   the BI field.  */
+#define BBOCB(op, bo, cb, aa, lk) \
+  (BBO ((op), (bo), (aa), (lk)) | ((((unsigned long)(cb)) & 0x3) << 16))
+#define BBOCB_MASK BBOCB (0x3f, 0x1f, 0x3, 1, 1)
+
+/* A BBOCB_MASK with the y bit of the BO field removed.  */
+#define BBOYCB_MASK (BBOCB_MASK &~ Y_MASK)
+#define BBOATCB_MASK (BBOCB_MASK &~ AT1_MASK)
+#define BBOAT2CB_MASK (BBOCB_MASK &~ AT2_MASK)
+
+/* A BBOYCB_MASK in which the BI field is fixed.  */
+#define BBOYBI_MASK (BBOYCB_MASK | BI_MASK)
+#define BBOATBI_MASK (BBOAT2CB_MASK | BI_MASK)
+
+/* A VLE C form instruction.  */
+#define C_LK(x, lk) (((((unsigned long)(x)) & 0x7fff) << 1) | ((lk) & 1))
+#define C_LK_MASK C_LK(0x7fff, 1)
+#define C(x) ((((unsigned long)(x)) & 0xffff))
+#define C_MASK C(0xffff)
+
+/* An Context form instruction.  */
+#define CTX(op, xop)   (OP (op) | (((unsigned long)(xop)) & 0x7))
+#define CTX_MASK CTX(0x3f, 0x7)
+
+/* A User Context form instruction.  */
+#define UCTX(op, xop)  (OP (op) | (((unsigned long)(xop)) & 0x1f))
+#define UCTX_MASK UCTX(0x3f, 0x1f)
+
+/* The main opcode mask with the RA field clear.  */
+#define DRA_MASK (OP_MASK | RA_MASK)
+
+/* A DQ form VSX instruction.  */
+#define DQX(op, xop) (OP (op) | ((xop) & 0x7))
+#define DQX_MASK DQX (0x3f, 7)
+
+/* A DS form instruction.  */
+#define DSO(op, xop) (OP (op) | ((xop) & 0x3))
+#define DS_MASK DSO (0x3f, 3)
+
+/* An DX form instruction.  */
+#define DX(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x1f) << 1))
+#define DX_MASK DX (0x3f, 0x1f)
+
+/* An EVSEL form instruction.  */
+#define EVSEL(op, xop) (OP (op) | (((unsigned long)(xop)) & 0xff) << 3)
+#define EVSEL_MASK EVSEL(0x3f, 0xff)
+
+/* An IA16 form instruction.  */
+#define IA16(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x1f) << 11)
+#define IA16_MASK IA16(0x3f, 0x1f)
+
+/* An I16A form instruction.  */
+#define I16A(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x1f) << 11)
+#define I16A_MASK I16A(0x3f, 0x1f)
+
+/* An I16L form instruction.  */
+#define I16L(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x1f) << 11)
+#define I16L_MASK I16L(0x3f, 0x1f)
+
+/* An IM7 form instruction.  */
+#define IM7(op) ((((unsigned long)(op)) & 0x1f) << 11)
+#define IM7_MASK IM7(0x1f)
+
+/* An M form instruction.  */
+#define M(op, rc) (OP (op) | ((rc) & 1))
+#define M_MASK M (0x3f, 1)
+
+/* An LI20 form instruction.  */
+#define LI20(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x1) << 15)
+#define LI20_MASK LI20(0x3f, 0x1)
+
+/* An M form instruction with the ME field specified.  */
+#define MME(op, me, rc) (M ((op), (rc)) | ((((unsigned long)(me)) & 0x1f) << 1))
+
+/* An M_MASK with the MB and ME fields fixed.  */
+#define MMBME_MASK (M_MASK | MB_MASK | ME_MASK)
+
+/* An M_MASK with the SH and ME fields fixed.  */
+#define MSHME_MASK (M_MASK | SH_MASK | ME_MASK)
+
+/* An MD form instruction.  */
+#define MD(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0x7) << 2) | ((rc) & 1))
+#define MD_MASK MD (0x3f, 0x7, 1)
+
+/* An MD_MASK with the MB field fixed.  */
+#define MDMB_MASK (MD_MASK | MB6_MASK)
+
+/* An MD_MASK with the SH field fixed.  */
+#define MDSH_MASK (MD_MASK | SH6_MASK)
+
+/* An MDS form instruction.  */
+#define MDS(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0xf) << 1) | ((rc) & 1))
+#define MDS_MASK MDS (0x3f, 0xf, 1)
+
+/* An MDS_MASK with the MB field fixed.  */
+#define MDSMB_MASK (MDS_MASK | MB6_MASK)
+
+/* An SC form instruction.  */
+#define SC(op, sa, lk) (OP (op) | ((((unsigned long)(sa)) & 1) << 1) | ((lk) & 1))
+#define SC_MASK (OP_MASK | (((unsigned long)0x3ff) << 16) | (((unsigned long)1) << 1) | 1)
+
+/* An SCI8 form instruction.  */
+#define SCI8(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x1f) << 11))
+#define SCI8_MASK SCI8(0x3f, 0x1f)
+
+/* An SCI8 form instruction.  */
+#define SCI8BF(op, fop, xop) (OP (op) | ((((unsigned long)(xop)) & 0x1f) << 11) | (((fop) & 7) << 23))
+#define SCI8BF_MASK SCI8BF(0x3f, 7, 0x1f)
+
+/* An SD4 form instruction.  This is a 16-bit instruction.  */
+#define SD4(op) ((((unsigned long)(op)) & 0xf) << 12)
+#define SD4_MASK SD4(0xf)
+
+/* An SE_IM5 form instruction.  This is a 16-bit instruction.  */
+#define SE_IM5(op, xop) (((((unsigned long)(op)) & 0x3f) << 10) | (((xop) & 0x1) << 9))
+#define SE_IM5_MASK SE_IM5(0x3f, 1)
+
+/* An SE_R form instruction.  This is a 16-bit instruction.  */
+#define SE_R(op, xop) (((((unsigned long)(op)) & 0x3f) << 10) | (((xop) & 0x3f) << 4))
+#define SE_R_MASK SE_R(0x3f, 0x3f)
+
+/* An SE_RR form instruction.  This is a 16-bit instruction.  */
+#define SE_RR(op, xop) (((((unsigned long)(op)) & 0x3f) << 10) | (((xop) & 0x3) << 8))
+#define SE_RR_MASK SE_RR(0x3f, 3)
+
+/* A VX form instruction.  */
+#define VX(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x7ff))
+
+/* The mask for an VX form instruction.  */
+#define VX_MASK	VX(0x3f, 0x7ff)
+
+/* A VX_MASK with the VA field fixed.  */
+#define VXVA_MASK (VX_MASK | (0x1f << 16))
+
+/* A VX_MASK with the VB field fixed.  */
+#define VXVB_MASK (VX_MASK | (0x1f << 11))
+
+/* A VX_MASK with the VA and VB fields fixed.  */
+#define VXVAVB_MASK (VX_MASK | (0x1f << 16) | (0x1f << 11))
+
+/* A VX_MASK with the VD and VA fields fixed.  */
+#define VXVDVA_MASK (VX_MASK | (0x1f << 21) | (0x1f << 16))
+
+/* A VX_MASK with a UIMM4 field.  */
+#define VXUIMM4_MASK (VX_MASK | (0x1 << 20))
+
+/* A VX_MASK with a UIMM3 field.  */
+#define VXUIMM3_MASK (VX_MASK | (0x3 << 19))
+
+/* A VX_MASK with a UIMM2 field.  */
+#define VXUIMM2_MASK (VX_MASK | (0x7 << 18))
+
+/* A VX_MASK with a PS field.  */
+#define VXPS_MASK (VX_MASK & ~(0x1 << 9))
+
+/* A VX_MASK with the VA field fixed with a PS field.  */
+#define VXVAPS_MASK ((VX_MASK | (0x1f << 16)) & ~(0x1 << 9))
+
+/* A VA form instruction.  */
+#define VXA(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x03f))
+
+/* The mask for an VA form instruction.  */
+#define VXA_MASK VXA(0x3f, 0x3f)
+
+/* A VXA_MASK with a SHB field.  */
+#define VXASHB_MASK (VXA_MASK | (1 << 10))
+
+/* A VXR form instruction.  */
+#define VXR(op, xop, rc) (OP (op) | (((rc) & 1) << 10) | (((unsigned long)(xop)) & 0x3ff))
+
+/* The mask for a VXR form instruction.  */
+#define VXR_MASK VXR(0x3f, 0x3ff, 1)
+
+/* A VX form instruction with a VA tertiary opcode.  */
+#define VXVA(op, xop, vaop) (VX(op,xop) | (((vaop) & 0x1f) << 16))
+
+#define VXASH(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x1f) << 1))
+#define VXASH_MASK VXASH (0x3f, 0x1f)
+
+/* An X form instruction.  */
+#define X(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x3ff) << 1))
+
+/* A X form instruction for Quad-Precision FP Instructions.  */
+#define XVA(op, xop, vaop) (X(op,xop) | (((vaop) & 0x1f) << 16))
+
+/* An EX form instruction.  */
+#define EX(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x7ff))
+
+/* The mask for an EX form instruction.  */
+#define EX_MASK EX (0x3f, 0x7ff)
+
+/* An XX2 form instruction.  */
+#define XX2(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x1ff) << 2))
+
+/* A XX2 form instruction with the VA bits specified.  */
+#define XX2VA(op, xop, vaop) (XX2(op,xop) | (((vaop) & 0x1f) << 16))
+
+/* An XX3 form instruction.  */
+#define XX3(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0xff) << 3))
+
+/* An XX3 form instruction with the RC bit specified.  */
+#define XX3RC(op, xop, rc) (OP (op) | (((rc) & 1) << 10) | ((((unsigned long)(xop)) & 0x7f) << 3))
+
+/* An XX4 form instruction.  */
+#define XX4(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x3) << 4))
+
+/* A Z form instruction.  */
+#define Z(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x1ff) << 1))
+
+/* An X form instruction with the RC bit specified.  */
+#define XRC(op, xop, rc) (X ((op), (xop)) | ((rc) & 1))
+
+/* A X form instruction for Quad-Precision FP Instructions with RC bit.  */
+#define XVARC(op, xop, vaop, rc) (XVA ((op), (xop), (vaop)) | ((rc) & 1))
+
+/* An X form instruction with the RA bits specified as two ops.  */
+#define XMMF(op, xop, mop0, mop1) (X ((op), (xop)) | ((mop0) & 3) << 19 | ((mop1) & 7) << 16)
+
+/* A Z form instruction with the RC bit specified.  */
+#define ZRC(op, xop, rc) (Z ((op), (xop)) | ((rc) & 1))
+
+/* The mask for an X form instruction.  */
+#define X_MASK XRC (0x3f, 0x3ff, 1)
+
+/* The mask for an X form instruction with the BF bits specified.  */
+#define XBF_MASK (X_MASK | (3 << 21))
+
+/* An X form wait instruction with everything filled in except the WC field.  */
+#define XWC_MASK (XRC (0x3f, 0x3ff, 1) | (7 << 23) | RA_MASK | RB_MASK)
+
+/* The mask for an XX1 form instruction.  */
+#define XX1_MASK X (0x3f, 0x3ff)
+
+/* An XX1_MASK with the RB field fixed.  */
+#define XX1RB_MASK (XX1_MASK | RB_MASK)
+
+/* The mask for an XX2 form instruction.  */
+#define XX2_MASK (XX2 (0x3f, 0x1ff) | (0x1f << 16))
+
+/* The mask for an XX2 form instruction with the UIM bits specified.  */
+#define XX2UIM_MASK (XX2 (0x3f, 0x1ff) | (7 << 18))
+
+/* The mask for an XX2 form instruction with the 4 UIM bits specified.  */
+#define XX2UIM4_MASK (XX2 (0x3f, 0x1ff) | (1 << 20))
+
+/* The mask for an XX2 form instruction with the BF bits specified.  */
+#define XX2BF_MASK (XX2_MASK | (3 << 21) | (1))
+
+/* The mask for an XX2 form instruction with the BF and DCMX bits specified.  */
+#define XX2BFD_MASK (XX2 (0x3f, 0x1ff) | 1)
+
+/* The mask for an XX2 form instruction with a split DCMX bits specified.  */
+#define XX2DCMXS_MASK XX2 (0x3f, 0x1ee)
+
+/* The mask for an XX3 form instruction.  */
+#define XX3_MASK XX3 (0x3f, 0xff)
+
+/* The mask for an XX3 form instruction with the BF bits specified.  */
+#define XX3BF_MASK (XX3 (0x3f, 0xff) | (3 << 21) | (1))
+
+/* The mask for an XX3 form instruction with the DM or SHW bits specified.  */
+#define XX3DM_MASK (XX3 (0x3f, 0x1f) | (1 << 10))
+#define XX3SHW_MASK XX3DM_MASK
+
+/* The mask for an XX4 form instruction.  */
+#define XX4_MASK XX4 (0x3f, 0x3)
+
+/* An X form wait instruction with everything filled in except the WC field.  */
+#define XWC_MASK (XRC (0x3f, 0x3ff, 1) | (7 << 23) | RA_MASK | RB_MASK)
+
+/* The mask for an XMMF form instruction.  */
+#define XMMF_MASK (XMMF (0x3f, 0x3ff, 3, 7) | (1))
+
+/* The mask for a Z form instruction.  */
+#define Z_MASK ZRC (0x3f, 0x1ff, 1)
+#define Z2_MASK ZRC (0x3f, 0xff, 1)
+
+/* An X_MASK with the RA/VA field fixed.  */
+#define XRA_MASK (X_MASK | RA_MASK)
+#define XVA_MASK XRA_MASK
+
+/* An XRA_MASK with the A_L/W field clear.  */
+#define XWRA_MASK (XRA_MASK & ~((unsigned long) 1 << 16))
+#define XRLA_MASK XWRA_MASK
+
+/* An X_MASK with the RB field fixed.  */
+#define XRB_MASK (X_MASK | RB_MASK)
+
+/* An X_MASK with the RT field fixed.  */
+#define XRT_MASK (X_MASK | RT_MASK)
+
+/* An XRT_MASK mask with the L bits clear.  */
+#define XLRT_MASK (XRT_MASK & ~((unsigned long) 0x3 << 21))
+
+/* An X_MASK with the RA and RB fields fixed.  */
+#define XRARB_MASK (X_MASK | RA_MASK | RB_MASK)
+
+/* An XBF_MASK with the RA and RB fields fixed.  */
+#define XBFRARB_MASK (XBF_MASK | RA_MASK | RB_MASK)
+
+/* An XRARB_MASK, but with the L bit clear.  */
+#define XRLARB_MASK (XRARB_MASK & ~((unsigned long) 1 << 16))
+
+/* An XRARB_MASK, but with the L bits in a darn instruction clear.  */
+#define XLRAND_MASK (XRARB_MASK & ~((unsigned long) 3 << 16))
+
+/* An X_MASK with the RT and RA fields fixed.  */
+#define XRTRA_MASK (X_MASK | RT_MASK | RA_MASK)
+
+/* An X_MASK with the RT and RB fields fixed.  */
+#define XRTRB_MASK (X_MASK | RT_MASK | RB_MASK)
+
+/* An XRTRA_MASK, but with L bit clear.  */
+#define XRTLRA_MASK (XRTRA_MASK & ~((unsigned long) 1 << 21))
+
+/* An X_MASK with the RT, RA and RB fields fixed.  */
+#define XRTRARB_MASK (X_MASK | RT_MASK | RA_MASK | RB_MASK)
+
+/* An XRTRARB_MASK, but with L bit clear.  */
+#define XRTLRARB_MASK (XRTRARB_MASK & ~((unsigned long) 1 << 21))
+
+/* An XRTRARB_MASK, but with A bit clear.  */
+#define XRTARARB_MASK (XRTRARB_MASK & ~((unsigned long) 1 << 25))
+
+/* An XRTRARB_MASK, but with BF bits clear.  */
+#define XRTBFRARB_MASK (XRTRARB_MASK & ~((unsigned long) 7 << 23))
+
+/* An X form instruction with the L bit specified.  */
+#define XOPL(op, xop, l) (X ((op), (xop)) | ((((unsigned long)(l)) & 1) << 21))
+
+/* An X form instruction with the L bits specified.  */
+#define XOPL2(op, xop, l) (X ((op), (xop)) | ((((unsigned long)(l)) & 3) << 21))
+
+/* An X form instruction with the L bit and RC bit specified.  */
+#define XRCL(op, xop, l, rc) (XRC ((op), (xop), (rc)) | ((((unsigned long)(l)) & 1) << 21))
+
+/* An X form instruction with RT fields specified */
+#define XRT(op, xop, rt) (X ((op), (xop)) \
+        | ((((unsigned long)(rt)) & 0x1f) << 21))
+
+/* An X form instruction with RT and RA fields specified */
+#define XRTRA(op, xop, rt, ra) (X ((op), (xop)) \
+        | ((((unsigned long)(rt)) & 0x1f) << 21) \
+        | ((((unsigned long)(ra)) & 0x1f) << 16))
+
+/* The mask for an X form comparison instruction.  */
+#define XCMP_MASK (X_MASK | (((unsigned long)1) << 22))
+
+/* The mask for an X form comparison instruction with the L field
+   fixed.  */
+#define XCMPL_MASK (XCMP_MASK | (((unsigned long)1) << 21))
+
+/* An X form trap instruction with the TO field specified.  */
+#define XTO(op, xop, to) (X ((op), (xop)) | ((((unsigned long)(to)) & 0x1f) << 21))
+#define XTO_MASK (X_MASK | TO_MASK)
+
+/* An X form tlb instruction with the SH field specified.  */
+#define XTLB(op, xop, sh) (X ((op), (xop)) | ((((unsigned long)(sh)) & 0x1f) << 11))
+#define XTLB_MASK (X_MASK | SH_MASK)
+
+/* An X form sync instruction.  */
+#define XSYNC(op, xop, l) (X ((op), (xop)) | ((((unsigned long)(l)) & 3) << 21))
+
+/* An X form sync instruction with everything filled in except the LS field.  */
+#define XSYNC_MASK (0xff9fffff)
+
+/* An X form sync instruction with everything filled in except the L and E fields.  */
+#define XSYNCLE_MASK (0xff90ffff)
+
+/* An X_MASK, but with the EH bit clear.  */
+#define XEH_MASK (X_MASK & ~((unsigned long )1))
+
+/* An X form AltiVec dss instruction.  */
+#define XDSS(op, xop, a) (X ((op), (xop)) | ((((unsigned long)(a)) & 1) << 25))
+#define XDSS_MASK XDSS(0x3f, 0x3ff, 1)
+
+/* An XFL form instruction.  */
+#define XFL(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0x3ff) << 1) | (((unsigned long)(rc)) & 1))
+#define XFL_MASK XFL (0x3f, 0x3ff, 1)
+
+/* An X form isel instruction.  */
+#define XISEL(op, xop)	(OP (op) | ((((unsigned long)(xop)) & 0x1f) << 1))
+#define XISEL_MASK	XISEL(0x3f, 0x1f)
+
+/* An XL form instruction with the LK field set to 0.  */
+#define XL(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x3ff) << 1))
+
+/* An XL form instruction which uses the LK field.  */
+#define XLLK(op, xop, lk) (XL ((op), (xop)) | ((lk) & 1))
+
+/* The mask for an XL form instruction.  */
+#define XL_MASK XLLK (0x3f, 0x3ff, 1)
+
+/* An XL_MASK with the RT, RA and RB fields fixed, but S bit clear.  */
+#define XLS_MASK ((XL_MASK | RT_MASK | RA_MASK | RB_MASK) & ~(1 << 11))
+
+/* An XL form instruction which explicitly sets the BO field.  */
+#define XLO(op, bo, xop, lk) \
+  (XLLK ((op), (xop), (lk)) | ((((unsigned long)(bo)) & 0x1f) << 21))
+#define XLO_MASK (XL_MASK | BO_MASK)
+
+/* An XL form instruction which explicitly sets the y bit of the BO
+   field.  */
+#define XLYLK(op, xop, y, lk) (XLLK ((op), (xop), (lk)) | ((((unsigned long)(y)) & 1) << 21))
+#define XLYLK_MASK (XL_MASK | Y_MASK)
+
+/* An XL form instruction which sets the BO field and the condition
+   bits of the BI field.  */
+#define XLOCB(op, bo, cb, xop, lk) \
+  (XLO ((op), (bo), (xop), (lk)) | ((((unsigned long)(cb)) & 3) << 16))
+#define XLOCB_MASK XLOCB (0x3f, 0x1f, 0x3, 0x3ff, 1)
+
+/* An XL_MASK or XLYLK_MASK or XLOCB_MASK with the BB field fixed.  */
+#define XLBB_MASK (XL_MASK | BB_MASK)
+#define XLYBB_MASK (XLYLK_MASK | BB_MASK)
+#define XLBOCBBB_MASK (XLOCB_MASK | BB_MASK)
+
+/* A mask for branch instructions using the BH field.  */
+#define XLBH_MASK (XL_MASK | (0x1c << 11))
+
+/* An XL_MASK with the BO and BB fields fixed.  */
+#define XLBOBB_MASK (XL_MASK | BO_MASK | BB_MASK)
+
+/* An XL_MASK with the BO, BI and BB fields fixed.  */
+#define XLBOBIBB_MASK (XL_MASK | BO_MASK | BI_MASK | BB_MASK)
+
+/* An X form mbar instruction with MO field.  */
+#define XMBAR(op, xop, mo) (X ((op), (xop)) | ((((unsigned long)(mo)) & 1) << 21))
+
+/* An XO form instruction.  */
+#define XO(op, xop, oe, rc) \
+  (OP (op) | ((((unsigned long)(xop)) & 0x1ff) << 1) | ((((unsigned long)(oe)) & 1) << 10) | (((unsigned long)(rc)) & 1))
+#define XO_MASK XO (0x3f, 0x1ff, 1, 1)
+
+/* An XO_MASK with the RB field fixed.  */
+#define XORB_MASK (XO_MASK | RB_MASK)
+
+/* An XOPS form instruction for paired singles.  */
+#define XOPS(op, xop, rc) \
+  (OP (op) | ((((unsigned long)(xop)) & 0x3ff) << 1) | (((unsigned long)(rc)) & 1))
+#define XOPS_MASK XOPS (0x3f, 0x3ff, 1)
+
+
+/* An XS form instruction.  */
+#define XS(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0x1ff) << 2) | (((unsigned long)(rc)) & 1))
+#define XS_MASK XS (0x3f, 0x1ff, 1)
+
+/* A mask for the FXM version of an XFX form instruction.  */
+#define XFXFXM_MASK (X_MASK | (1 << 11) | (1 << 20))
+
+/* An XFX form instruction with the FXM field filled in.  */
+#define XFXM(op, xop, fxm, p4) \
+  (X ((op), (xop)) | ((((unsigned long)(fxm)) & 0xff) << 12) \
+   | ((unsigned long)(p4) << 20))
+
+/* An XFX form instruction with the SPR field filled in.  */
+#define XSPR(op, xop, spr) \
+  (X ((op), (xop)) | ((((unsigned long)(spr)) & 0x1f) << 16) | ((((unsigned long)(spr)) & 0x3e0) << 6))
+#define XSPR_MASK (X_MASK | SPR_MASK)
+
+/* An XFX form instruction with the SPR field filled in except for the
+   SPRBAT field.  */
+#define XSPRBAT_MASK (XSPR_MASK &~ SPRBAT_MASK)
+
+/* An XFX form instruction with the SPR field filled in except for the
+   SPRG field.  */
+#define XSPRG_MASK (XSPR_MASK & ~(0x1f << 16))
+
+/* An X form instruction with everything filled in except the E field.  */
+#define XE_MASK (0xffff7fff)
+
+/* An X form user context instruction.  */
+#define XUC(op, xop)  (OP (op) | (((unsigned long)(xop)) & 0x1f))
+#define XUC_MASK      XUC(0x3f, 0x1f)
+
+/* An XW form instruction.  */
+#define XW(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0x3f) << 1) | ((rc) & 1))
+/* The mask for a G form instruction. rc not supported at present.  */
+#define XW_MASK XW (0x3f, 0x3f, 0)
+
+/* An APU form instruction.  */
+#define APU(op, xop, rc) (OP (op) | (((unsigned long)(xop)) & 0x3ff) << 1 | ((rc) & 1))
+
+/* The mask for an APU form instruction.  */
+#define APU_MASK APU (0x3f, 0x3ff, 1)
+#define APU_RT_MASK (APU_MASK | RT_MASK)
+#define APU_RA_MASK (APU_MASK | RA_MASK)
+
+/* The BO encodings used in extended conditional branch mnemonics.  */
+#define BODNZF	(0x0)
+#define BODNZFP	(0x1)
+#define BODZF	(0x2)
+#define BODZFP	(0x3)
+#define BODNZT	(0x8)
+#define BODNZTP	(0x9)
+#define BODZT	(0xa)
+#define BODZTP	(0xb)
+
+#define BOF	(0x4)
+#define BOFP	(0x5)
+#define BOFM4	(0x6)
+#define BOFP4	(0x7)
+#define BOT	(0xc)
+#define BOTP	(0xd)
+#define BOTM4	(0xe)
+#define BOTP4	(0xf)
+
+#define BODNZ	(0x10)
+#define BODNZP	(0x11)
+#define BODZ	(0x12)
+#define BODZP	(0x13)
+#define BODNZM4 (0x18)
+#define BODNZP4 (0x19)
+#define BODZM4	(0x1a)
+#define BODZP4	(0x1b)
+
+#define BOU	(0x14)
+
+/* The BO16 encodings used in extended VLE conditional branch mnemonics.  */
+#define BO16F   (0x0)
+#define BO16T   (0x1)
+
+/* The BO32 encodings used in extended VLE conditional branch mnemonics.  */
+#define BO32F   (0x0)
+#define BO32T   (0x1)
+#define BO32DNZ (0x2)
+#define BO32DZ  (0x3)
+
+/* The BI condition bit encodings used in extended conditional branch
+   mnemonics.  */
+#define CBLT	(0)
+#define CBGT	(1)
+#define CBEQ	(2)
+#define CBSO	(3)
+
+/* The TO encodings used in extended trap mnemonics.  */
+#define TOLGT	(0x1)
+#define TOLLT	(0x2)
+#define TOEQ	(0x4)
+#define TOLGE	(0x5)
+#define TOLNL	(0x5)
+#define TOLLE	(0x6)
+#define TOLNG	(0x6)
+#define TOGT	(0x8)
+#define TOGE	(0xc)
+#define TONL	(0xc)
+#define TOLT	(0x10)
+#define TOLE	(0x14)
+#define TONG	(0x14)
+#define TONE	(0x18)
+#define TOU	(0x1f)
+
+/* Smaller names for the flags so each entry in the opcodes table will
+   fit on a single line.  */
+#undef	PPC
+#define PPC	PPC_OPCODE_PPC
+#define PPCCOM	PPC_OPCODE_PPC | PPC_OPCODE_COMMON
+#define POWER4	PPC_OPCODE_POWER4
+#define POWER5	PPC_OPCODE_POWER5
+#define POWER6	PPC_OPCODE_POWER6
+#define POWER7	PPC_OPCODE_POWER7
+#define POWER8	PPC_OPCODE_POWER8
+#define POWER9	PPC_OPCODE_POWER9
+#define CELL	PPC_OPCODE_CELL
+#define PPC64	PPC_OPCODE_64 | PPC_OPCODE_64_BRIDGE
+#define NON32	(PPC_OPCODE_64 | PPC_OPCODE_POWER4	\
+		 | PPC_OPCODE_EFS | PPC_OPCODE_E500MC | PPC_OPCODE_TITAN)
+#define PPC403	PPC_OPCODE_403
+#define PPC405	PPC_OPCODE_405
+#define PPC440	PPC_OPCODE_440
+#define PPC464	PPC440
+#define PPC476	PPC_OPCODE_476
+#define PPC750	PPC_OPCODE_750
+#define PPC7450 PPC_OPCODE_7450
+#define PPC860	PPC_OPCODE_860
+#define PPCPS	PPC_OPCODE_PPCPS
+#define PPCVEC	PPC_OPCODE_ALTIVEC
+#define PPCVEC2	PPC_OPCODE_ALTIVEC2
+#define PPCVEC3	PPC_OPCODE_ALTIVEC2
+#define PPCVSX	PPC_OPCODE_VSX
+#define PPCVSX2	PPC_OPCODE_VSX
+#define PPCVSX3	PPC_OPCODE_VSX3
+#define POWER	PPC_OPCODE_POWER
+#define POWER2	PPC_OPCODE_POWER | PPC_OPCODE_POWER2
+#define PWR2COM PPC_OPCODE_POWER | PPC_OPCODE_POWER2 | PPC_OPCODE_COMMON
+#define PPCPWR2 PPC_OPCODE_PPC | PPC_OPCODE_POWER | PPC_OPCODE_POWER2 | PPC_OPCODE_COMMON
+#define COM	PPC_OPCODE_POWER | PPC_OPCODE_PPC | PPC_OPCODE_COMMON
+#define M601	PPC_OPCODE_POWER | PPC_OPCODE_601
+#define PWRCOM	PPC_OPCODE_POWER | PPC_OPCODE_601 | PPC_OPCODE_COMMON
+#define MFDEC1	PPC_OPCODE_POWER
+#define MFDEC2	PPC_OPCODE_PPC | PPC_OPCODE_601 | PPC_OPCODE_BOOKE | PPC_OPCODE_TITAN
+#define BOOKE	PPC_OPCODE_BOOKE
+#define NO371	PPC_OPCODE_BOOKE | PPC_OPCODE_PPCPS | PPC_OPCODE_EFS
+#define PPCE300 PPC_OPCODE_E300
+#define PPCSPE	PPC_OPCODE_SPE
+#define PPCISEL PPC_OPCODE_ISEL
+#define PPCEFS	PPC_OPCODE_EFS
+#define PPCBRLK PPC_OPCODE_BRLOCK
+#define PPCPMR	PPC_OPCODE_PMR
+#define PPCTMR  PPC_OPCODE_TMR
+#define PPCCHLK PPC_OPCODE_CACHELCK
+#define PPCRFMCI	PPC_OPCODE_RFMCI
+#define E500MC  PPC_OPCODE_E500MC
+#define PPCA2	PPC_OPCODE_A2
+#define TITAN   PPC_OPCODE_TITAN
+#define MULHW   PPC_OPCODE_405 | PPC_OPCODE_440 | TITAN
+#define E500	PPC_OPCODE_E500
+#define E6500	PPC_OPCODE_E6500
+#define PPCVLE  PPC_OPCODE_VLE
+#define PPCHTM  PPC_OPCODE_HTM
+#define E200Z4  PPC_OPCODE_E200Z4
+/* The list of embedded processors that use the embedded operand ordering
+   for the 3 operand dcbt and dcbtst instructions.  */
+#define DCBT_EO	(PPC_OPCODE_E500 | PPC_OPCODE_E500MC | PPC_OPCODE_476 \
+		 | PPC_OPCODE_A2)
+
+
+
+/* The opcode table.
+
+   The format of the opcode table is:
+
+   NAME		OPCODE		MASK	     FLAGS	ANTI		{OPERANDS}
+
+   NAME is the name of the instruction.
+   OPCODE is the instruction opcode.
+   MASK is the opcode mask; this is used to tell the disassembler
+     which bits in the actual opcode must match OPCODE.
+   FLAGS are flags indicating which processors support the instruction.
+   ANTI indicates which processors don't support the instruction.
+   OPERANDS is the list of operands.
+
+   The disassembler reads the table in order and prints the first
+   instruction which matches, so this table is sorted to put more
+   specific instructions before more general instructions.
+
+   This table must be sorted by major opcode.  Please try to keep it
+   vaguely sorted within major opcode too, except of course where
+   constrained otherwise by disassembler operation.  */
+
+const struct powerpc_opcode powerpc_opcodes[] = {
+{"attn",	X(0,256),	X_MASK,	  POWER4|PPCA2,	PPC476|PPCVLE,	{0}},
+{"tdlgti",	OPTO(2,TOLGT),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdllti",	OPTO(2,TOLLT),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdeqi",	OPTO(2,TOEQ),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdlgei",	OPTO(2,TOLGE),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdlnli",	OPTO(2,TOLNL),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdllei",	OPTO(2,TOLLE),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdlngi",	OPTO(2,TOLNG),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdgti",	OPTO(2,TOGT),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdgei",	OPTO(2,TOGE),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdnli",	OPTO(2,TONL),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdlti",	OPTO(2,TOLT),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdlei",	OPTO(2,TOLE),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdngi",	OPTO(2,TONG),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdnei",	OPTO(2,TONE),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdui",	OPTO(2,TOU),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdi",		OP(2),		OP_MASK,     PPC64,	PPCVLE,		{TO, RA, SI}},
+
+{"twlgti",	OPTO(3,TOLGT),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tlgti",	OPTO(3,TOLGT),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twllti",	OPTO(3,TOLLT),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tllti",	OPTO(3,TOLLT),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"tweqi",	OPTO(3,TOEQ),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"teqi",	OPTO(3,TOEQ),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twlgei",	OPTO(3,TOLGE),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tlgei",	OPTO(3,TOLGE),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twlnli",	OPTO(3,TOLNL),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tlnli",	OPTO(3,TOLNL),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twllei",	OPTO(3,TOLLE),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tllei",	OPTO(3,TOLLE),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twlngi",	OPTO(3,TOLNG),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tlngi",	OPTO(3,TOLNG),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twgti",	OPTO(3,TOGT),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tgti",	OPTO(3,TOGT),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twgei",	OPTO(3,TOGE),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tgei",	OPTO(3,TOGE),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twnli",	OPTO(3,TONL),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tnli",	OPTO(3,TONL),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twlti",	OPTO(3,TOLT),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tlti",	OPTO(3,TOLT),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twlei",	OPTO(3,TOLE),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tlei",	OPTO(3,TOLE),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twngi",	OPTO(3,TONG),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tngi",	OPTO(3,TONG),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twnei",	OPTO(3,TONE),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tnei",	OPTO(3,TONE),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twui",	OPTO(3,TOU),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tui",		OPTO(3,TOU),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twi",		OP(3),		OP_MASK,     PPCCOM,	PPCVLE,		{TO, RA, SI}},
+{"ti",		OP(3),		OP_MASK,     PWRCOM,	PPCVLE,		{TO, RA, SI}},
+
+{"ps_cmpu0",	X  (4,	 0),	XBF_MASK,    PPCPS,	0,		{BF, FRA, FRB}},
+{"vaddubm",	VX (4,	 0),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vmul10cuq",	VX (4,	 1),	VXVB_MASK,   PPCVEC3,	0,		{VD, VA}},
+{"vmaxub",	VX (4,	 2),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vrlb",	VX (4,	 4),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpequb",	VXR(4,	 6,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpneb",	VXR(4,	 7,0),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"vmuloub",	VX (4,	 8),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vaddfp",	VX (4,	10),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"psq_lx",	XW (4,	 6,0),	XW_MASK,     PPCPS,	0,		{FRT,RA,RB,PSWM,PSQM}},
+{"vmrghb",	VX (4,	12),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"psq_stx",	XW (4,	 7,0),	XW_MASK,     PPCPS,	0,		{FRS,RA,RB,PSWM,PSQM}},
+{"vpkuhum",	VX (4,	14),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"mulhhwu",	XRC(4,	 8,0),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"mulhhwu.",	XRC(4,	 8,1),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"ps_sum0",	A  (4,	10,0),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_sum0.",	A  (4,	10,1),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_sum1",	A  (4,	11,0),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_sum1.",	A  (4,	11,1),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_muls0",	A  (4,	12,0),	AFRB_MASK,   PPCPS,	0,		{FRT, FRA, FRC}},
+{"machhwu",	XO (4,	12,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"ps_muls0.",	A  (4,	12,1),	AFRB_MASK,   PPCPS,	0,		{FRT, FRA, FRC}},
+{"machhwu.",	XO (4,	12,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"ps_muls1",	A  (4,	13,0),	AFRB_MASK,   PPCPS,	0,		{FRT, FRA, FRC}},
+{"ps_muls1.",	A  (4,	13,1),	AFRB_MASK,   PPCPS,	0,		{FRT, FRA, FRC}},
+{"ps_madds0",	A  (4,	14,0),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_madds0.",	A  (4,	14,1),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_madds1",	A  (4,	15,0),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_madds1.",	A  (4,	15,1),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"vmhaddshs",	VXA(4,	32),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"vmhraddshs",	VXA(4,	33),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"vmladduhm",	VXA(4,	34),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"vmsumudm",	VXA(4,	35),	VXA_MASK,    PPCVEC3,	0,		{VD, VA, VB, VC}},
+{"ps_div",	A  (4,	18,0),	AFRC_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"vmsumubm",	VXA(4,	36),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"ps_div.",	A  (4,	18,1),	AFRC_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"vmsummbm",	VXA(4,	37),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"vmsumuhm",	VXA(4,	38),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"vmsumuhs",	VXA(4,	39),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"ps_sub",	A  (4,	20,0),	AFRC_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"vmsumshm",	VXA(4,	40),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"ps_sub.",	A  (4,	20,1),	AFRC_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"vmsumshs",	VXA(4,	41),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"ps_add",	A  (4,	21,0),	AFRC_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"vsel",	VXA(4,	42),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"ps_add.",	A  (4,	21,1),	AFRC_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"vperm",	VXA(4,	43),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"vsldoi",	VXA(4,	44),	VXASHB_MASK, PPCVEC,	0,		{VD, VA, VB, SHB}},
+{"vpermxor",	VXA(4,	45),	VXA_MASK,    PPCVEC2,	0,		{VD, VA, VB, VC}},
+{"ps_sel",	A  (4,	23,0),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"vmaddfp",	VXA(4,	46),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VC, VB}},
+{"ps_sel.",	A  (4,	23,1),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"vnmsubfp",	VXA(4,	47),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VC, VB}},
+{"ps_res",	A  (4,	24,0), AFRAFRC_MASK, PPCPS,	0,		{FRT, FRB}},
+{"maddhd",	VXA(4,	48),	VXA_MASK,    POWER9,	0,		{RT, RA, RB, RC}},
+{"ps_res.",	A  (4,	24,1), AFRAFRC_MASK, PPCPS,	0,		{FRT, FRB}},
+{"maddhdu",	VXA(4,	49),	VXA_MASK,    POWER9,	0,		{RT, RA, RB, RC}},
+{"ps_mul",	A  (4,	25,0),	AFRB_MASK,   PPCPS,	0,		{FRT, FRA, FRC}},
+{"ps_mul.",	A  (4,	25,1),	AFRB_MASK,   PPCPS,	0,		{FRT, FRA, FRC}},
+{"maddld",	VXA(4,	51),	VXA_MASK,    POWER9,	0,		{RT, RA, RB, RC}},
+{"ps_rsqrte",	A  (4,	26,0), AFRAFRC_MASK, PPCPS,	0,		{FRT, FRB}},
+{"ps_rsqrte.",	A  (4,	26,1), AFRAFRC_MASK, PPCPS,	0,		{FRT, FRB}},
+{"ps_msub",	A  (4,	28,0),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_msub.",	A  (4,	28,1),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_madd",	A  (4,	29,0),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_madd.",	A  (4,	29,1),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"vpermr",	VXA(4,	59),	VXA_MASK,    PPCVEC3,	0,		{VD, VA, VB, VC}},
+{"ps_nmsub",	A  (4,	30,0),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"vaddeuqm",	VXA(4,	60),	VXA_MASK,    PPCVEC2,	0,		{VD, VA, VB, VC}},
+{"ps_nmsub.",	A  (4,	30,1),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"vaddecuq",	VXA(4,	61),	VXA_MASK,    PPCVEC2,	0,		{VD, VA, VB, VC}},
+{"ps_nmadd",	A  (4,	31,0),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"vsubeuqm",	VXA(4,	62),	VXA_MASK,    PPCVEC2,	0,		{VD, VA, VB, VC}},
+{"ps_nmadd.",	A  (4,	31,1),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"vsubecuq",	VXA(4,	63),	VXA_MASK,    PPCVEC2,	0,		{VD, VA, VB, VC}},
+{"ps_cmpo0",	X  (4,	32),	XBF_MASK,    PPCPS,	0,		{BF, FRA, FRB}},
+{"vadduhm",	VX (4,	64),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vmul10ecuq",	VX (4,	65),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vmaxuh",	VX (4,	66),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vrlh",	VX (4,	68),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpequh",	VXR(4,	70,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpneh",	VXR(4,	71,0),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"vmulouh",	VX (4,	72),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vsubfp",	VX (4,	74),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"psq_lux",	XW (4,	38,0),	XW_MASK,     PPCPS,	0,		{FRT,RA,RB,PSWM,PSQM}},
+{"vmrghh",	VX (4,	76),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"psq_stux",	XW (4,	39,0),	XW_MASK,     PPCPS,	0,		{FRS,RA,RB,PSWM,PSQM}},
+{"vpkuwum",	VX (4,	78),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"ps_neg",	XRC(4,	40,0),	XRA_MASK,    PPCPS,	0,		{FRT, FRB}},
+{"mulhhw",	XRC(4,	40,0),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"ps_neg.",	XRC(4,	40,1),	XRA_MASK,    PPCPS,	0,		{FRT, FRB}},
+{"mulhhw.",	XRC(4,	40,1),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"machhw",	XO (4,	44,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"machhw.",	XO (4,	44,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmachhw",	XO (4,	46,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmachhw.",	XO (4,	46,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"ps_cmpu1",	X  (4,	64),	XBF_MASK,    PPCPS,	0,		{BF, FRA, FRB}},
+{"vadduwm",	VX (4,	128),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vmaxuw",	VX (4,	130),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vrlw",	VX (4,	132),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vrlwmi",	VX (4,	133),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vcmpequw",	VXR(4,	134,0), VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpnew",	VXR(4,	135,0),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"vmulouw",	VX (4,	136),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vmuluwm",	VX (4,	137),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vmrghw",	VX (4,	140),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vpkuhus",	VX (4,	142),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"ps_mr",	XRC(4,	72,0),	XRA_MASK,    PPCPS,	0,		{FRT, FRB}},
+{"ps_mr.",	XRC(4,	72,1),	XRA_MASK,    PPCPS,	0,		{FRT, FRB}},
+{"machhwsu",	XO (4,	76,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"machhwsu.",	XO (4,	76,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"ps_cmpo1",	X  (4,	96),	XBF_MASK,    PPCPS,	0,		{BF, FRA, FRB}},
+{"vaddudm",	VX (4, 192),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vmaxud",	VX (4, 194),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vrld",	VX (4, 196),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vrldmi",	VX (4, 197),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vcmpeqfp",	VXR(4, 198,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpequd",	VXR(4, 199,0),	VXR_MASK,    PPCVEC2,	0,		{VD, VA, VB}},
+{"vpkuwus",	VX (4, 206),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"machhws",	XO (4, 108,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"machhws.",	XO (4, 108,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmachhws",	XO (4, 110,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmachhws.",	XO (4, 110,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vadduqm",	VX (4, 256),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vmaxsb",	VX (4, 258),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vslb",	VX (4, 260),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpnezb",	VXR(4, 263,0),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"vmulosb",	VX (4, 264),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vrefp",	VX (4, 266),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"vmrglb",	VX (4, 268),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vpkshus",	VX (4, 270),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"ps_nabs",	XRC(4, 136,0),	XRA_MASK,    PPCPS,	0,		{FRT, FRB}},
+{"mulchwu",	XRC(4, 136,0),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"ps_nabs.",	XRC(4, 136,1),	XRA_MASK,    PPCPS,	0,		{FRT, FRB}},
+{"mulchwu.",	XRC(4, 136,1),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"macchwu",	XO (4, 140,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"macchwu.",	XO (4, 140,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vaddcuq",	VX (4, 320),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vmaxsh",	VX (4, 322),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vslh",	VX (4, 324),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpnezh",	VXR(4, 327,0),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"vmulosh",	VX (4, 328),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vrsqrtefp",	VX (4, 330),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"vmrglh",	VX (4, 332),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vpkswus",	VX (4, 334),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"mulchw",	XRC(4, 168,0),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"mulchw.",	XRC(4, 168,1),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"macchw",	XO (4, 172,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"macchw.",	XO (4, 172,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmacchw",	XO (4, 174,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmacchw.",	XO (4, 174,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vaddcuw",	VX (4, 384),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vmaxsw",	VX (4, 386),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vslw",	VX (4, 388),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vrlwnm",	VX (4, 389),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vcmpnezw",	VXR(4, 391,0),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"vmulosw",	VX (4, 392),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vexptefp",	VX (4, 394),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"vmrglw",	VX (4, 396),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vpkshss",	VX (4, 398),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"macchwsu",	XO (4, 204,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"macchwsu.",	XO (4, 204,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vmaxsd",	VX (4, 450),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vsl",		VX (4, 452),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vrldnm",	VX (4, 453),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vcmpgefp",	VXR(4, 454,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vlogefp",	VX (4, 458),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"vpkswss",	VX (4, 462),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"macchws",	XO (4, 236,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"macchws.",	XO (4, 236,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmacchws",	XO (4, 238,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmacchws.",	XO (4, 238,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evaddw",	VX (4, 512),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vaddubs",	VX (4, 512),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vmul10uq",	VX (4, 513),	VXVB_MASK,   PPCVEC3,	0,		{VD, VA}},
+{"evaddiw",	VX (4, 514),	VX_MASK,     PPCSPE,	0,		{RS, RB, UIMM}},
+{"vminub",	VX (4, 514),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evsubfw",	VX (4, 516),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evsubw",	VX (4, 516),	VX_MASK,     PPCSPE,	0,		{RS, RB, RA}},
+{"vsrb",	VX (4, 516),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evsubifw",	VX (4, 518),	VX_MASK,     PPCSPE,	0,		{RS, UIMM, RB}},
+{"evsubiw",	VX (4, 518),	VX_MASK,     PPCSPE,	0,		{RS, RB, UIMM}},
+{"vcmpgtub",	VXR(4, 518,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"evabs",	VX (4, 520),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"vmuleub",	VX (4, 520),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evneg",	VX (4, 521),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evextsb",	VX (4, 522),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"vrfin",	VX (4, 522),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"evextsh",	VX (4, 523),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evrndw",	VX (4, 524),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"vspltb",	VX (4, 524),   VXUIMM4_MASK, PPCVEC,	0,		{VD, VB, UIMM4}},
+{"vextractub",	VX (4, 525),   VXUIMM4_MASK, PPCVEC3,	0,		{VD, VB, UIMM4}},
+{"evcntlzw",	VX (4, 525),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evcntlsw",	VX (4, 526),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"vupkhsb",	VX (4, 526),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"brinc",	VX (4, 527),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"ps_abs",	XRC(4, 264,0),	XRA_MASK,    PPCPS,	0,		{FRT, FRB}},
+{"ps_abs.",	XRC(4, 264,1),	XRA_MASK,    PPCPS,	0,		{FRT, FRB}},
+{"evand",	VX (4, 529),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evandc",	VX (4, 530),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evxor",	VX (4, 534),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmr",	VX (4, 535),	VX_MASK,     PPCSPE,	0,		{RS, RA, BBA}},
+{"evor",	VX (4, 535),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evnor",	VX (4, 536),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evnot",	VX (4, 536),	VX_MASK,     PPCSPE,	0,		{RS, RA, BBA}},
+{"get",		APU(4, 268,0),	APU_RA_MASK, PPC405,	0,		{RT, FSL}},
+{"eveqv",	VX (4, 537),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evorc",	VX (4, 539),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evnand",	VX (4, 542),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evsrwu",	VX (4, 544),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evsrws",	VX (4, 545),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evsrwiu",	VX (4, 546),	VX_MASK,     PPCSPE,	0,		{RS, RA, EVUIMM}},
+{"evsrwis",	VX (4, 547),	VX_MASK,     PPCSPE,	0,		{RS, RA, EVUIMM}},
+{"evslw",	VX (4, 548),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evslwi",	VX (4, 550),	VX_MASK,     PPCSPE,	0,		{RS, RA, EVUIMM}},
+{"evrlw",	VX (4, 552),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evsplati",	VX (4, 553),	VX_MASK,     PPCSPE,	0,		{RS, SIMM}},
+{"evrlwi",	VX (4, 554),	VX_MASK,     PPCSPE,	0,		{RS, RA, EVUIMM}},
+{"evsplatfi",	VX (4, 555),	VX_MASK,     PPCSPE,	0,		{RS, SIMM}},
+{"evmergehi",	VX (4, 556),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmergelo",	VX (4, 557),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmergehilo",	VX (4, 558),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmergelohi",	VX (4, 559),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evcmpgtu",	VX (4, 560),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"evcmpgts",	VX (4, 561),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"evcmpltu",	VX (4, 562),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"evcmplts",	VX (4, 563),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"evcmpeq",	VX (4, 564),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"cget",	APU(4, 284,0),	APU_RA_MASK, PPC405,	0,		{RT, FSL}},
+{"vadduhs",	VX (4, 576),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vmul10euq",	VX (4, 577),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vminuh",	VX (4, 578),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vsrh",	VX (4, 580),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpgtuh",	VXR(4, 582,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vmuleuh",	VX (4, 584),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vrfiz",	VX (4, 586),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"vsplth",	VX (4, 588),   VXUIMM3_MASK, PPCVEC,	0,		{VD, VB, UIMM3}},
+{"vextractuh",	VX (4, 589),   VXUIMM4_MASK, PPCVEC3,	0,		{VD, VB, UIMM4}},
+{"vupkhsh",	VX (4, 590),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"nget",	APU(4, 300,0),	APU_RA_MASK, PPC405,	0,		{RT, FSL}},
+{"evsel",	EVSEL(4,79),	EVSEL_MASK,  PPCSPE,	0,		{RS, RA, RB, CRFS}},
+{"ncget",	APU(4, 316,0),	APU_RA_MASK, PPC405,	0,		{RT, FSL}},
+{"evfsadd",	VX (4, 640),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vadduws",	VX (4, 640),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evfssub",	VX (4, 641),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vminuw",	VX (4, 642),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evfsabs",	VX (4, 644),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"vsrw",	VX (4, 644),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evfsnabs",	VX (4, 645),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evfsneg",	VX (4, 646),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"vcmpgtuw",	VXR(4, 646,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vmuleuw",	VX (4, 648),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evfsmul",	VX (4, 648),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evfsdiv",	VX (4, 649),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vrfip",	VX (4, 650),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"evfscmpgt",	VX (4, 652),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"vspltw",	VX (4, 652),   VXUIMM2_MASK, PPCVEC,	0,		{VD, VB, UIMM2}},
+{"vextractuw",	VX (4, 653),   VXUIMM4_MASK, PPCVEC3,	0,		{VD, VB, UIMM4}},
+{"evfscmplt",	VX (4, 653),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"evfscmpeq",	VX (4, 654),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"vupklsb",	VX (4, 654),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"evfscfui",	VX (4, 656),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"evfscfsi",	VX (4, 657),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"evfscfuf",	VX (4, 658),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"evfscfsf",	VX (4, 659),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"evfsctui",	VX (4, 660),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"evfsctsi",	VX (4, 661),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"evfsctuf",	VX (4, 662),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"evfsctsf",	VX (4, 663),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"evfsctuiz",	VX (4, 664),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"put",		APU(4, 332,0),	APU_RT_MASK, PPC405,	0,		{RA, FSL}},
+{"evfsctsiz",	VX (4, 666),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"evfststgt",	VX (4, 668),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"evfststlt",	VX (4, 669),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"evfststeq",	VX (4, 670),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"cput",	APU(4, 348,0),	APU_RT_MASK, PPC405,	0,		{RA, FSL}},
+{"efsadd",	VX (4, 704),	VX_MASK,     PPCEFS,	0,		{RS, RA, RB}},
+{"efssub",	VX (4, 705),	VX_MASK,     PPCEFS,	0,		{RS, RA, RB}},
+{"vminud",	VX (4, 706),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"efsabs",	VX (4, 708),	VX_MASK,     PPCEFS,	0,		{RS, RA}},
+{"vsr",		VX (4, 708),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"efsnabs",	VX (4, 709),	VX_MASK,     PPCEFS,	0,		{RS, RA}},
+{"efsneg",	VX (4, 710),	VX_MASK,     PPCEFS,	0,		{RS, RA}},
+{"vcmpgtfp",	VXR(4, 710,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpgtud",	VXR(4, 711,0),	VXR_MASK,    PPCVEC2,	0,		{VD, VA, VB}},
+{"efsmul",	VX (4, 712),	VX_MASK,     PPCEFS,	0,		{RS, RA, RB}},
+{"efsdiv",	VX (4, 713),	VX_MASK,     PPCEFS,	0,		{RS, RA, RB}},
+{"vrfim",	VX (4, 714),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"efscmpgt",	VX (4, 716),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"vextractd",	VX (4, 717),   VXUIMM4_MASK, PPCVEC3,	0,		{VD, VB, UIMM4}},
+{"efscmplt",	VX (4, 717),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"efscmpeq",	VX (4, 718),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"vupklsh",	VX (4, 718),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"efscfd",	VX (4, 719),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efscfui",	VX (4, 720),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efscfsi",	VX (4, 721),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efscfuf",	VX (4, 722),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efscfsf",	VX (4, 723),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efsctui",	VX (4, 724),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efsctsi",	VX (4, 725),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efsctuf",	VX (4, 726),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efsctsf",	VX (4, 727),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efsctuiz",	VX (4, 728),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"nput",	APU(4, 364,0),	APU_RT_MASK, PPC405,	0,		{RA, FSL}},
+{"efsctsiz",	VX (4, 730),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efststgt",	VX (4, 732),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"efststlt",	VX (4, 733),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"efststeq",	VX (4, 734),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"efdadd",	VX (4, 736),	VX_MASK,     PPCEFS,	0,		{RS, RA, RB}},
+{"efdsub",	VX (4, 737),	VX_MASK,     PPCEFS,	0,		{RS, RA, RB}},
+{"efdcfuid",	VX (4, 738),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdcfsid",	VX (4, 739),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdabs",	VX (4, 740),	VX_MASK,     PPCEFS,	0,		{RS, RA}},
+{"efdnabs",	VX (4, 741),	VX_MASK,     PPCEFS,	0,		{RS, RA}},
+{"efdneg",	VX (4, 742),	VX_MASK,     PPCEFS,	0,		{RS, RA}},
+{"efdmul",	VX (4, 744),	VX_MASK,     PPCEFS,	0,		{RS, RA, RB}},
+{"efddiv",	VX (4, 745),	VX_MASK,     PPCEFS,	0,		{RS, RA, RB}},
+{"efdctuidz",	VX (4, 746),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdctsidz",	VX (4, 747),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdcmpgt",	VX (4, 748),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"efdcmplt",	VX (4, 749),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"efdcmpeq",	VX (4, 750),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"efdcfs",	VX (4, 751),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdcfui",	VX (4, 752),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdcfsi",	VX (4, 753),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdcfuf",	VX (4, 754),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdcfsf",	VX (4, 755),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdctui",	VX (4, 756),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdctsi",	VX (4, 757),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdctuf",	VX (4, 758),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdctsf",	VX (4, 759),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdctuiz",	VX (4, 760),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"ncput",	APU(4, 380,0),	APU_RT_MASK, PPC405,	0,		{RA, FSL}},
+{"efdctsiz",	VX (4, 762),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdtstgt",	VX (4, 764),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"efdtstlt",	VX (4, 765),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"efdtsteq",	VX (4, 766),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"evlddx",	VX (4, 768),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vaddsbs",	VX (4, 768),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evldd",	VX (4, 769),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_8, RA}},
+{"evldwx",	VX (4, 770),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vminsb",	VX (4, 770),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evldw",	VX (4, 771),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_8, RA}},
+{"evldhx",	VX (4, 772),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vsrab",	VX (4, 772),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evldh",	VX (4, 773),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_8, RA}},
+{"vcmpgtsb",	VXR(4, 774,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"evlhhesplatx",VX (4, 776),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vmulesb",	VX (4, 776),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evlhhesplat",	VX (4, 777),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_2, RA}},
+{"vcfux",	VX (4, 778),	VX_MASK,     PPCVEC,	0,		{VD, VB, UIMM}},
+{"vcuxwfp",	VX (4, 778),	VX_MASK,     PPCVEC,	0,		{VD, VB, UIMM}},
+{"evlhhousplatx",VX(4, 780),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vspltisb",	VX (4, 780),	VXVB_MASK,   PPCVEC,	0,		{VD, SIMM}},
+{"vinsertb",	VX (4, 781),   VXUIMM4_MASK, PPCVEC3,	0,		{VD, VB, UIMM4}},
+{"evlhhousplat",VX (4, 781),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_2, RA}},
+{"evlhhossplatx",VX(4, 782),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vpkpx",	VX (4, 782),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evlhhossplat",VX (4, 783),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_2, RA}},
+{"mullhwu",	XRC(4, 392,0),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"evlwhex",	VX (4, 784),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"mullhwu.",	XRC(4, 392,1),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"evlwhe",	VX (4, 785),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_4, RA}},
+{"evlwhoux",	VX (4, 788),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evlwhou",	VX (4, 789),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_4, RA}},
+{"evlwhosx",	VX (4, 790),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evlwhos",	VX (4, 791),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_4, RA}},
+{"maclhwu",	XO (4, 396,0,0),XO_MASK,     MULHW,	0,		{RT, RA, RB}},
+{"evlwwsplatx",	VX (4, 792),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"maclhwu.",	XO (4, 396,0,1),XO_MASK,     MULHW,	0,		{RT, RA, RB}},
+{"evlwwsplat",	VX (4, 793),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_4, RA}},
+{"evlwhsplatx",	VX (4, 796),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evlwhsplat",	VX (4, 797),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_4, RA}},
+{"evstddx",	VX (4, 800),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evstdd",	VX (4, 801),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_8, RA}},
+{"evstdwx",	VX (4, 802),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evstdw",	VX (4, 803),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_8, RA}},
+{"evstdhx",	VX (4, 804),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evstdh",	VX (4, 805),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_8, RA}},
+{"evstwhex",	VX (4, 816),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evstwhe",	VX (4, 817),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_4, RA}},
+{"evstwhox",	VX (4, 820),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evstwho",	VX (4, 821),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_4, RA}},
+{"evstwwex",	VX (4, 824),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evstwwe",	VX (4, 825),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_4, RA}},
+{"evstwwox",	VX (4, 828),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evstwwo",	VX (4, 829),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_4, RA}},
+{"vaddshs",	VX (4, 832),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"bcdcpsgn.",	VX (4, 833),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vminsh",	VX (4, 834),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vsrah",	VX (4, 836),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpgtsh",	VXR(4, 838,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vmulesh",	VX (4, 840),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcfsx",	VX (4, 842),	VX_MASK,     PPCVEC,	0,		{VD, VB, UIMM}},
+{"vcsxwfp",	VX (4, 842),	VX_MASK,     PPCVEC,	0,		{VD, VB, UIMM}},
+{"vspltish",	VX (4, 844),	VXVB_MASK,   PPCVEC,	0,		{VD, SIMM}},
+{"vinserth",	VX (4, 845),   VXUIMM4_MASK, PPCVEC3,	0,		{VD, VB, UIMM4}},
+{"vupkhpx",	VX (4, 846),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"mullhw",	XRC(4, 424,0),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"mullhw.",	XRC(4, 424,1),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"maclhw",	XO (4, 428,0,0),XO_MASK,     MULHW,	0,		{RT, RA, RB}},
+{"maclhw.",	XO (4, 428,0,1),XO_MASK,     MULHW,	0,		{RT, RA, RB}},
+{"nmaclhw",	XO (4, 430,0,0),XO_MASK,     MULHW,	0,		{RT, RA, RB}},
+{"nmaclhw.",	XO (4, 430,0,1),XO_MASK,     MULHW,	0,		{RT, RA, RB}},
+{"vaddsws",	VX (4, 896),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vminsw",	VX (4, 898),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vsraw",	VX (4, 900),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpgtsw",	VXR(4, 902,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vmulesw",	VX (4, 904),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vctuxs",	VX (4, 906),	VX_MASK,     PPCVEC,	0,		{VD, VB, UIMM}},
+{"vcfpuxws",	VX (4, 906),	VX_MASK,     PPCVEC,	0,		{VD, VB, UIMM}},
+{"vspltisw",	VX (4, 908),	VXVB_MASK,   PPCVEC,	0,		{VD, SIMM}},
+{"vinsertw",	VX (4, 909),   VXUIMM4_MASK, PPCVEC3,	0,		{VD, VB, UIMM4}},
+{"maclhwsu",	XO (4, 460,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"maclhwsu.",	XO (4, 460,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vminsd",	VX (4, 962),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vsrad",	VX (4, 964),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vcmpbfp",	VXR(4, 966,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpgtsd",	VXR(4, 967,0),	VXR_MASK,    PPCVEC2,	0,		{VD, VA, VB}},
+{"vctsxs",	VX (4, 970),	VX_MASK,     PPCVEC,	0,		{VD, VB, UIMM}},
+{"vcfpsxws",	VX (4, 970),	VX_MASK,     PPCVEC,	0,		{VD, VB, UIMM}},
+{"vinsertd",	VX (4, 973),   VXUIMM4_MASK, PPCVEC3,	0,		{VD, VB, UIMM4}},
+{"vupklpx",	VX (4, 974),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"maclhws",	XO (4, 492,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"maclhws.",	XO (4, 492,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmaclhws",	XO (4, 494,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmaclhws.",	XO (4, 494,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vsububm",	VX (4,1024),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"bcdadd.",	VX (4,1025),	VXPS_MASK,   PPCVEC2,	0,		{VD, VA, VB, PS}},
+{"vavgub",	VX (4,1026),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vabsdub",	VX (4,1027),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmhessf",	VX (4,1027),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vand",	VX (4,1028),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpequb.",	VXR(4,	 6,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpneb.",	VXR(4,	 7,1),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"udi0fcm.",	APU(4, 515,0),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"udi0fcm",	APU(4, 515,1),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"evmhossf",	VX (4,1031),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vpmsumb",	VX (4,1032),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmheumi",	VX (4,1032),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhesmi",	VX (4,1033),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vmaxfp",	VX (4,1034),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evmhesmf",	VX (4,1035),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhoumi",	VX (4,1036),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vslo",	VX (4,1036),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evmhosmi",	VX (4,1037),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhosmf",	VX (4,1039),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"machhwuo",	XO (4,	12,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"machhwuo.",	XO (4,	12,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"ps_merge00",	XOPS(4,528,0),	XOPS_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"ps_merge00.",	XOPS(4,528,1),	XOPS_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"evmhessfa",	VX (4,1059),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhossfa",	VX (4,1063),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmheumia",	VX (4,1064),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhesmia",	VX (4,1065),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhesmfa",	VX (4,1067),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhoumia",	VX (4,1068),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhosmia",	VX (4,1069),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhosmfa",	VX (4,1071),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vsubuhm",	VX (4,1088),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"bcdsub.",	VX (4,1089),	VXPS_MASK,   PPCVEC2,	0,		{VD, VA, VB, PS}},
+{"vavguh",	VX (4,1090),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vabsduh",	VX (4,1091),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vandc",	VX (4,1092),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpequh.",	VXR(4,	70,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi1fcm.",	APU(4, 547,0),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"udi1fcm",	APU(4, 547,1),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"vcmpneh.",	VXR(4,	71,1),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"evmwhssf",	VX (4,1095),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vpmsumh",	VX (4,1096),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmwlumi",	VX (4,1096),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vminfp",	VX (4,1098),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evmwhumi",	VX (4,1100),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vsro",	VX (4,1100),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evmwhsmi",	VX (4,1101),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vpkudum",	VX (4,1102),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmwhsmf",	VX (4,1103),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwssf",	VX (4,1107),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"machhwo",	XO (4,	44,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evmwumi",	VX (4,1112),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"machhwo.",	XO (4,	44,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evmwsmi",	VX (4,1113),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwsmf",	VX (4,1115),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"nmachhwo",	XO (4,	46,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmachhwo.",	XO (4,	46,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"ps_merge01",	XOPS(4,560,0),	XOPS_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"ps_merge01.",	XOPS(4,560,1),	XOPS_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"evmwhssfa",	VX (4,1127),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwlumia",	VX (4,1128),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwhumia",	VX (4,1132),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwhsmia",	VX (4,1133),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwhsmfa",	VX (4,1135),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwssfa",	VX (4,1139),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwumia",	VX (4,1144),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwsmia",	VX (4,1145),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwsmfa",	VX (4,1147),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vsubuwm",	VX (4,1152),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"bcdus.",	VX (4,1153),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vavguw",	VX (4,1154),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vabsduw",	VX (4,1155),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vmr",		VX (4,1156),	VX_MASK,     PPCVEC,	0,		{VD, VA, VBA}},
+{"vor",		VX (4,1156),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpnew.",	VXR(4, 135,1),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"vpmsumw",	VX (4,1160),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vcmpequw.",	VXR(4, 134,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi2fcm.",	APU(4, 579,0),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"udi2fcm",	APU(4, 579,1),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"machhwsuo",	XO (4,	76,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"machhwsuo.",	XO (4,	76,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"ps_merge10",	XOPS(4,592,0),	XOPS_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"ps_merge10.",	XOPS(4,592,1),	XOPS_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"vsubudm",	VX (4,1216),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evaddusiaaw",	VX (4,1216),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"bcds.",	VX (4,1217),	VXPS_MASK,   PPCVEC3,	0,		{VD, VA, VB, PS}},
+{"evaddssiaaw",	VX (4,1217),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evsubfusiaaw",VX (4,1218),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evsubfssiaaw",VX (4,1219),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evmra",	VX (4,1220),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"vxor",	VX (4,1220),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evdivws",	VX (4,1222),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vcmpeqfp.",	VXR(4, 198,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi3fcm.",	APU(4, 611,0),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"vcmpequd.",	VXR(4, 199,1),	VXR_MASK,    PPCVEC2,	0,		{VD, VA, VB}},
+{"udi3fcm",	APU(4, 611,1),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"evdivwu",	VX (4,1223),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vpmsumd",	VX (4,1224),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evaddumiaaw",	VX (4,1224),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evaddsmiaaw",	VX (4,1225),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evsubfumiaaw",VX (4,1226),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evsubfsmiaaw",VX (4,1227),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"vpkudus",	VX (4,1230),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"machhwso",	XO (4, 108,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"machhwso.",	XO (4, 108,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmachhwso",	XO (4, 110,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmachhwso.",	XO (4, 110,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"ps_merge11",	XOPS(4,624,0),	XOPS_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"ps_merge11.",	XOPS(4,624,1),	XOPS_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"vsubuqm",	VX (4,1280),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmheusiaaw",	VX (4,1280),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"bcdtrunc.",	VX (4,1281),	VXPS_MASK,   PPCVEC3,	0,		{VD, VA, VB, PS}},
+{"evmhessiaaw",	VX (4,1281),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vavgsb",	VX (4,1282),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evmhessfaaw",	VX (4,1283),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhousiaaw",	VX (4,1284),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vnot",	VX (4,1284),	VX_MASK,     PPCVEC,	0,		{VD, VA, VBA}},
+{"vnor",	VX (4,1284),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evmhossiaaw",	VX (4,1285),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"udi4fcm.",	APU(4, 643,0),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"udi4fcm",	APU(4, 643,1),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"vcmpnezb.",	VXR(4, 263,1),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"evmhossfaaw",	VX (4,1287),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmheumiaaw",	VX (4,1288),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vcipher",	VX (4,1288),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vcipherlast",	VX (4,1289),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmhesmiaaw",	VX (4,1289),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhesmfaaw",	VX (4,1291),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vgbbd",	VX (4,1292),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"evmhoumiaaw",	VX (4,1292),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhosmiaaw",	VX (4,1293),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhosmfaaw",	VX (4,1295),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"macchwuo",	XO (4, 140,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"macchwuo.",	XO (4, 140,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evmhegumiaa",	VX (4,1320),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhegsmiaa",	VX (4,1321),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhegsmfaa",	VX (4,1323),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhogumiaa",	VX (4,1324),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhogsmiaa",	VX (4,1325),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhogsmfaa",	VX (4,1327),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vsubcuq",	VX (4,1344),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmwlusiaaw",	VX (4,1344),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"bcdutrunc.",	VX (4,1345),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"evmwlssiaaw",	VX (4,1345),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vavgsh",	VX (4,1346),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vorc",	VX (4,1348),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"udi5fcm.",	APU(4, 675,0),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"udi5fcm",	APU(4, 675,1),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"vcmpnezh.",	VXR(4, 327,1),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"vncipher",	VX (4,1352),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmwlumiaaw",	VX (4,1352),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vncipherlast",VX (4,1353),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmwlsmiaaw",	VX (4,1353),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vbpermq",	VX (4,1356),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vpksdus",	VX (4,1358),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmwssfaa",	VX (4,1363),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"macchwo",	XO (4, 172,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evmwumiaa",	VX (4,1368),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"macchwo.",	XO (4, 172,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evmwsmiaa",	VX (4,1369),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwsmfaa",	VX (4,1371),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"nmacchwo",	XO (4, 174,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmacchwo.",	XO (4, 174,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evmheusianw",	VX (4,1408),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vsubcuw",	VX (4,1408),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evmhessianw",	VX (4,1409),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"bcdctsq.",	VXVA(4,1409,0),	VXVA_MASK,   PPCVEC3,	0,		{VD, VB}},
+{"bcdcfsq.",	VXVA(4,1409,2),	VXVAPS_MASK, PPCVEC3,	0,		{VD, VB, PS}},
+{"bcdctz.",	VXVA(4,1409,4),	VXVAPS_MASK, PPCVEC3,	0,		{VD, VB, PS}},
+{"bcdctn.",	VXVA(4,1409,5),	VXVA_MASK,   PPCVEC3,	0,		{VD, VB}},
+{"bcdcfz.",	VXVA(4,1409,6),	VXVAPS_MASK, PPCVEC3,	0,		{VD, VB, PS}},
+{"bcdcfn.",	VXVA(4,1409,7),	VXVAPS_MASK, PPCVEC3,	0,		{VD, VB, PS}},
+{"bcdsetsgn.",	VXVA(4,1409,31), VXVAPS_MASK, PPCVEC3,	0,		{VD, VB, PS}},
+{"vavgsw",	VX (4,1410),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evmhessfanw",	VX (4,1411),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vnand",	VX (4,1412),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmhousianw",	VX (4,1412),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhossianw",	VX (4,1413),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"udi6fcm.",	APU(4, 707,0),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"udi6fcm",	APU(4, 707,1),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"vcmpnezw.",	VXR(4, 391,1),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"evmhossfanw",	VX (4,1415),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmheumianw",	VX (4,1416),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhesmianw",	VX (4,1417),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhesmfanw",	VX (4,1419),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhoumianw",	VX (4,1420),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhosmianw",	VX (4,1421),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhosmfanw",	VX (4,1423),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"macchwsuo",	XO (4, 204,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"macchwsuo.",	XO (4, 204,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evmhegumian",	VX (4,1448),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhegsmian",	VX (4,1449),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhegsmfan",	VX (4,1451),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhogumian",	VX (4,1452),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhogsmian",	VX (4,1453),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhogsmfan",	VX (4,1455),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwlusianw",	VX (4,1472),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"bcdsr.",	VX (4,1473),	VXPS_MASK,   PPCVEC3,	0,		{VD, VA, VB, PS}},
+{"evmwlssianw",	VX (4,1473),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vsld",	VX (4,1476),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vcmpgefp.",	VXR(4, 454,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi7fcm.",	APU(4, 739,0),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"udi7fcm",	APU(4, 739,1),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"vsbox",	VX (4,1480),	VXVB_MASK,   PPCVEC2,	0,		{VD, VA}},
+{"evmwlumianw",	VX (4,1480),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwlsmianw",	VX (4,1481),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vbpermd",	VX (4,1484),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vpksdss",	VX (4,1486),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmwssfan",	VX (4,1491),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"macchwso",	XO (4, 236,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evmwumian",	VX (4,1496),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"macchwso.",	XO (4, 236,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evmwsmian",	VX (4,1497),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwsmfan",	VX (4,1499),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"nmacchwso",	XO (4, 238,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmacchwso.",	XO (4, 238,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vsububs",	VX (4,1536),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vclzlsbb",	VXVA(4,1538,0), VXVA_MASK,   PPCVEC3,	0,		{RT, VB}},
+{"vctzlsbb",	VXVA(4,1538,1), VXVA_MASK,   PPCVEC3,	0,		{RT, VB}},
+{"vnegw",	VXVA(4,1538,6), VXVA_MASK,   PPCVEC3,	0,		{VD, VB}},
+{"vnegd",	VXVA(4,1538,7), VXVA_MASK,   PPCVEC3,	0,		{VD, VB}},
+{"vprtybw",	VXVA(4,1538,8), VXVA_MASK,   PPCVEC3,	0,		{VD, VB}},
+{"vprtybd",	VXVA(4,1538,9), VXVA_MASK,   PPCVEC3,	0,		{VD, VB}},
+{"vprtybq",	VXVA(4,1538,10), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"vextsb2w",	VXVA(4,1538,16), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"vextsh2w",	VXVA(4,1538,17), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"vextsb2d",	VXVA(4,1538,24), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"vextsh2d",	VXVA(4,1538,25), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"vextsw2d",	VXVA(4,1538,26), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"vctzb",	VXVA(4,1538,28), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"vctzh",	VXVA(4,1538,29), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"vctzw",	VXVA(4,1538,30), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"vctzd",	VXVA(4,1538,31), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"mfvscr",	VX (4,1540),	VXVAVB_MASK, PPCVEC,	0,		{VD}},
+{"vcmpgtub.",	VXR(4, 518,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi8fcm.",	APU(4, 771,0),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"udi8fcm",	APU(4, 771,1),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"vsum4ubs",	VX (4,1544),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vextublx",	VX (4,1549),	VX_MASK,     PPCVEC3,	0,		{RT, RA, VB}},
+{"vsubuhs",	VX (4,1600),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"mtvscr",	VX (4,1604),	VXVDVA_MASK, PPCVEC,	0,		{VB}},
+{"vcmpgtuh.",	VXR(4, 582,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vsum4shs",	VX (4,1608),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"udi9fcm.",	APU(4, 804,0),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"udi9fcm",	APU(4, 804,1),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"vextuhlx",	VX (4,1613),	VX_MASK,     PPCVEC3,	0,		{RT, RA, VB}},
+{"vupkhsw",	VX (4,1614),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vsubuws",	VX (4,1664),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vshasigmaw",	VX (4,1666),	VX_MASK,     PPCVEC2,	0,		{VD, VA, ST, SIX}},
+{"veqv",	VX (4,1668),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vcmpgtuw.",	VXR(4, 646,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi10fcm.",	APU(4, 835,0),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"udi10fcm",	APU(4, 835,1),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"vsum2sws",	VX (4,1672),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vmrgow",	VX (4,1676),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vextuwlx",	VX (4,1677),	VX_MASK,     PPCVEC3,	0,		{RT, RA, VB}},
+{"vshasigmad",	VX (4,1730),	VX_MASK,     PPCVEC2,	0,		{VD, VA, ST, SIX}},
+{"vsrd",	VX (4,1732),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vcmpgtfp.",	VXR(4, 710,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi11fcm.",	APU(4, 867,0),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"vcmpgtud.",	VXR(4, 711,1),	VXR_MASK,    PPCVEC2,	0,		{VD, VA, VB}},
+{"udi11fcm",	APU(4, 867,1),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"vupklsw",	VX (4,1742),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vsubsbs",	VX (4,1792),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vclzb",	VX (4,1794),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vpopcntb",	VX (4,1795),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vsrv",	VX (4,1796),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vcmpgtsb.",	VXR(4, 774,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi12fcm.",	APU(4, 899,0),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"udi12fcm",	APU(4, 899,1),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"vsum4sbs",	VX (4,1800),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vextubrx",	VX (4,1805),	VX_MASK,     PPCVEC3,	0,		{RT, RA, VB}},
+{"maclhwuo",	XO (4, 396,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"maclhwuo.",	XO (4, 396,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vsubshs",	VX (4,1856),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vclzh",	VX (4,1858),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vpopcnth",	VX (4,1859),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vslv",	VX (4,1860),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vcmpgtsh.",	VXR(4, 838,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vextuhrx",	VX (4,1869),	VX_MASK,     PPCVEC3,	0,		{RT, RA, VB}},
+{"udi13fcm.",	APU(4, 931,0),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"udi13fcm",	APU(4, 931,1),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"maclhwo",	XO (4, 428,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"maclhwo.",	XO (4, 428,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmaclhwo",	XO (4, 430,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmaclhwo.",	XO (4, 430,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vsubsws",	VX (4,1920),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vclzw",	VX (4,1922),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vpopcntw",	VX (4,1923),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vcmpgtsw.",	VXR(4, 902,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi14fcm.",	APU(4, 963,0),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"udi14fcm",	APU(4, 963,1),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"vsumsws",	VX (4,1928),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vmrgew",	VX (4,1932),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vextuwrx",	VX (4,1933),	VX_MASK,     PPCVEC3,	0,		{RT, RA, VB}},
+{"maclhwsuo",	XO (4, 460,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"maclhwsuo.",	XO (4, 460,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vclzd",	VX (4,1986),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vpopcntd",	VX (4,1987),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vcmpbfp.",	VXR(4, 966,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi15fcm.",	APU(4, 995,0),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"vcmpgtsd.",	VXR(4, 967,1),	VXR_MASK,    PPCVEC2,	0,		{VD, VA, VB}},
+{"udi15fcm",	APU(4, 995,1),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"maclhwso",	XO (4, 492,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"maclhwso.",	XO (4, 492,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmaclhwso",	XO (4, 494,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmaclhwso.",	XO (4, 494,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"dcbz_l",	X  (4,1014),	XRT_MASK,    PPCPS,	0,		{RA, RB}},
+
+{"mulli",	OP(7),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA, SI}},
+{"muli",	OP(7),		OP_MASK,     PWRCOM,	PPCVLE,		{RT, RA, SI}},
+
+{"subfic",	OP(8),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA, SI}},
+{"sfi",		OP(8),		OP_MASK,     PWRCOM,	PPCVLE,		{RT, RA, SI}},
+
+{"dozi",	OP(9),		OP_MASK,     M601,	PPCVLE,		{RT, RA, SI}},
+
+{"cmplwi",	OPL(10,0),	OPL_MASK,    PPCCOM,	PPCVLE,		{OBF, RA, UISIGNOPT}},
+{"cmpldi",	OPL(10,1),	OPL_MASK,    PPC64,	PPCVLE,		{OBF, RA, UISIGNOPT}},
+{"cmpli",	OP(10),		OP_MASK,     PPC,	PPCVLE,		{BF, L32OPT, RA, UISIGNOPT}},
+{"cmpli",	OP(10),		OP_MASK,     PWRCOM,	PPC|PPCVLE,	{BF, RA, UISIGNOPT}},
+
+{"cmpwi",	OPL(11,0),	OPL_MASK,    PPCCOM,	PPCVLE,		{OBF, RA, SI}},
+{"cmpdi",	OPL(11,1),	OPL_MASK,    PPC64,	PPCVLE,		{OBF, RA, SI}},
+{"cmpi",	OP(11),		OP_MASK,     PPC,	PPCVLE,		{BF, L32OPT, RA, SI}},
+{"cmpi",	OP(11),		OP_MASK,     PWRCOM,	PPC|PPCVLE,	{BF, RA, SI}},
+
+{"addic",	OP(12),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA, SI}},
+{"ai",		OP(12),		OP_MASK,     PWRCOM,	PPCVLE,		{RT, RA, SI}},
+{"subic",	OP(12),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA, NSI}},
+
+{"addic.",	OP(13),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA, SI}},
+{"ai.",		OP(13),		OP_MASK,     PWRCOM,	PPCVLE,		{RT, RA, SI}},
+{"subic.",	OP(13),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA, NSI}},
+
+{"li",		OP(14),		DRA_MASK,    PPCCOM,	PPCVLE,		{RT, SI}},
+{"lil",		OP(14),		DRA_MASK,    PWRCOM,	PPCVLE,		{RT, SI}},
+{"addi",	OP(14),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA0, SI}},
+{"cal",		OP(14),		OP_MASK,     PWRCOM,	PPCVLE,		{RT, D, RA0}},
+{"subi",	OP(14),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA0, NSI}},
+{"la",		OP(14),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, D, RA0}},
+
+{"lis",		OP(15),		DRA_MASK,    PPCCOM,	PPCVLE,		{RT, SISIGNOPT}},
+{"liu",		OP(15),		DRA_MASK,    PWRCOM,	PPCVLE,		{RT, SISIGNOPT}},
+{"addis",	OP(15),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA0, SISIGNOPT}},
+{"cau",		OP(15),		OP_MASK,     PWRCOM,	PPCVLE,		{RT, RA0, SISIGNOPT}},
+{"subis",	OP(15),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA0, NSISIGNOPT}},
+
+{"bdnz-",    BBO(16,BODNZ,0,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDM}},
+{"bdnz+",    BBO(16,BODNZ,0,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDP}},
+{"bdnz",     BBO(16,BODNZ,0,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BD}},
+{"bdn",	     BBO(16,BODNZ,0,0),		BBOATBI_MASK,  PWRCOM,	 PPCVLE,	{BD}},
+{"bdnzl-",   BBO(16,BODNZ,0,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDM}},
+{"bdnzl+",   BBO(16,BODNZ,0,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDP}},
+{"bdnzl",    BBO(16,BODNZ,0,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BD}},
+{"bdnl",     BBO(16,BODNZ,0,1),		BBOATBI_MASK,  PWRCOM,	 PPCVLE,	{BD}},
+{"bdnza-",   BBO(16,BODNZ,1,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDMA}},
+{"bdnza+",   BBO(16,BODNZ,1,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDPA}},
+{"bdnza",    BBO(16,BODNZ,1,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDA}},
+{"bdna",     BBO(16,BODNZ,1,0),		BBOATBI_MASK,  PWRCOM,	 PPCVLE,	{BDA}},
+{"bdnzla-",  BBO(16,BODNZ,1,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDMA}},
+{"bdnzla+",  BBO(16,BODNZ,1,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDPA}},
+{"bdnzla",   BBO(16,BODNZ,1,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDA}},
+{"bdnla",    BBO(16,BODNZ,1,1),		BBOATBI_MASK,  PWRCOM,	 PPCVLE,	{BDA}},
+{"bdz-",     BBO(16,BODZ,0,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDM}},
+{"bdz+",     BBO(16,BODZ,0,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDP}},
+{"bdz",	     BBO(16,BODZ,0,0),		BBOATBI_MASK,  COM,	 PPCVLE,	{BD}},
+{"bdzl-",    BBO(16,BODZ,0,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDM}},
+{"bdzl+",    BBO(16,BODZ,0,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDP}},
+{"bdzl",     BBO(16,BODZ,0,1),		BBOATBI_MASK,  COM,	 PPCVLE,	{BD}},
+{"bdza-",    BBO(16,BODZ,1,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDMA}},
+{"bdza+",    BBO(16,BODZ,1,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDPA}},
+{"bdza",     BBO(16,BODZ,1,0),		BBOATBI_MASK,  COM,	 PPCVLE,	{BDA}},
+{"bdzla-",   BBO(16,BODZ,1,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDMA}},
+{"bdzla+",   BBO(16,BODZ,1,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDPA}},
+{"bdzla",    BBO(16,BODZ,1,1),		BBOATBI_MASK,  COM,	 PPCVLE,	{BDA}},
+
+{"bge-",     BBOCB(16,BOF,CBLT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bge+",     BBOCB(16,BOF,CBLT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bge",	     BBOCB(16,BOF,CBLT,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bnl-",     BBOCB(16,BOF,CBLT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bnl+",     BBOCB(16,BOF,CBLT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bnl",	     BBOCB(16,BOF,CBLT,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bgel-",    BBOCB(16,BOF,CBLT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bgel+",    BBOCB(16,BOF,CBLT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bgel",     BBOCB(16,BOF,CBLT,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bnll-",    BBOCB(16,BOF,CBLT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bnll+",    BBOCB(16,BOF,CBLT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bnll",     BBOCB(16,BOF,CBLT,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bgea-",    BBOCB(16,BOF,CBLT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bgea+",    BBOCB(16,BOF,CBLT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bgea",     BBOCB(16,BOF,CBLT,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bnla-",    BBOCB(16,BOF,CBLT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bnla+",    BBOCB(16,BOF,CBLT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bnla",     BBOCB(16,BOF,CBLT,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bgela-",   BBOCB(16,BOF,CBLT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bgela+",   BBOCB(16,BOF,CBLT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bgela",    BBOCB(16,BOF,CBLT,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bnlla-",   BBOCB(16,BOF,CBLT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bnlla+",   BBOCB(16,BOF,CBLT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bnlla",    BBOCB(16,BOF,CBLT,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"ble-",     BBOCB(16,BOF,CBGT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"ble+",     BBOCB(16,BOF,CBGT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"ble",	     BBOCB(16,BOF,CBGT,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bng-",     BBOCB(16,BOF,CBGT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bng+",     BBOCB(16,BOF,CBGT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bng",	     BBOCB(16,BOF,CBGT,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"blel-",    BBOCB(16,BOF,CBGT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"blel+",    BBOCB(16,BOF,CBGT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"blel",     BBOCB(16,BOF,CBGT,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bngl-",    BBOCB(16,BOF,CBGT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bngl+",    BBOCB(16,BOF,CBGT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bngl",     BBOCB(16,BOF,CBGT,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"blea-",    BBOCB(16,BOF,CBGT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"blea+",    BBOCB(16,BOF,CBGT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"blea",     BBOCB(16,BOF,CBGT,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bnga-",    BBOCB(16,BOF,CBGT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bnga+",    BBOCB(16,BOF,CBGT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bnga",     BBOCB(16,BOF,CBGT,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"blela-",   BBOCB(16,BOF,CBGT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"blela+",   BBOCB(16,BOF,CBGT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"blela",    BBOCB(16,BOF,CBGT,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bngla-",   BBOCB(16,BOF,CBGT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bngla+",   BBOCB(16,BOF,CBGT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bngla",    BBOCB(16,BOF,CBGT,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bne-",     BBOCB(16,BOF,CBEQ,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bne+",     BBOCB(16,BOF,CBEQ,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bne",	     BBOCB(16,BOF,CBEQ,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bnel-",    BBOCB(16,BOF,CBEQ,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bnel+",    BBOCB(16,BOF,CBEQ,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bnel",     BBOCB(16,BOF,CBEQ,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bnea-",    BBOCB(16,BOF,CBEQ,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bnea+",    BBOCB(16,BOF,CBEQ,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bnea",     BBOCB(16,BOF,CBEQ,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bnela-",   BBOCB(16,BOF,CBEQ,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bnela+",   BBOCB(16,BOF,CBEQ,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bnela",    BBOCB(16,BOF,CBEQ,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bns-",     BBOCB(16,BOF,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bns+",     BBOCB(16,BOF,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bns",	     BBOCB(16,BOF,CBSO,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bnu-",     BBOCB(16,BOF,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bnu+",     BBOCB(16,BOF,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bnu",	     BBOCB(16,BOF,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BD}},
+{"bnsl-",    BBOCB(16,BOF,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bnsl+",    BBOCB(16,BOF,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bnsl",     BBOCB(16,BOF,CBSO,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bnul-",    BBOCB(16,BOF,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bnul+",    BBOCB(16,BOF,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bnul",     BBOCB(16,BOF,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BD}},
+{"bnsa-",    BBOCB(16,BOF,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bnsa+",    BBOCB(16,BOF,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bnsa",     BBOCB(16,BOF,CBSO,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bnua-",    BBOCB(16,BOF,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bnua+",    BBOCB(16,BOF,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bnua",     BBOCB(16,BOF,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDA}},
+{"bnsla-",   BBOCB(16,BOF,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bnsla+",   BBOCB(16,BOF,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bnsla",    BBOCB(16,BOF,CBSO,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bnula-",   BBOCB(16,BOF,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bnula+",   BBOCB(16,BOF,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bnula",    BBOCB(16,BOF,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDA}},
+
+{"blt-",     BBOCB(16,BOT,CBLT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"blt+",     BBOCB(16,BOT,CBLT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"blt",	     BBOCB(16,BOT,CBLT,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bltl-",    BBOCB(16,BOT,CBLT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bltl+",    BBOCB(16,BOT,CBLT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bltl",     BBOCB(16,BOT,CBLT,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"blta-",    BBOCB(16,BOT,CBLT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"blta+",    BBOCB(16,BOT,CBLT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"blta",     BBOCB(16,BOT,CBLT,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bltla-",   BBOCB(16,BOT,CBLT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bltla+",   BBOCB(16,BOT,CBLT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bltla",    BBOCB(16,BOT,CBLT,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bgt-",     BBOCB(16,BOT,CBGT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bgt+",     BBOCB(16,BOT,CBGT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bgt",	     BBOCB(16,BOT,CBGT,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bgtl-",    BBOCB(16,BOT,CBGT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bgtl+",    BBOCB(16,BOT,CBGT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bgtl",     BBOCB(16,BOT,CBGT,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bgta-",    BBOCB(16,BOT,CBGT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bgta+",    BBOCB(16,BOT,CBGT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bgta",     BBOCB(16,BOT,CBGT,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bgtla-",   BBOCB(16,BOT,CBGT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bgtla+",   BBOCB(16,BOT,CBGT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bgtla",    BBOCB(16,BOT,CBGT,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"beq-",     BBOCB(16,BOT,CBEQ,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"beq+",     BBOCB(16,BOT,CBEQ,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"beq",	     BBOCB(16,BOT,CBEQ,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"beql-",    BBOCB(16,BOT,CBEQ,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"beql+",    BBOCB(16,BOT,CBEQ,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"beql",     BBOCB(16,BOT,CBEQ,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"beqa-",    BBOCB(16,BOT,CBEQ,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"beqa+",    BBOCB(16,BOT,CBEQ,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"beqa",     BBOCB(16,BOT,CBEQ,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"beqla-",   BBOCB(16,BOT,CBEQ,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"beqla+",   BBOCB(16,BOT,CBEQ,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"beqla",    BBOCB(16,BOT,CBEQ,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bso-",     BBOCB(16,BOT,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bso+",     BBOCB(16,BOT,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bso",	     BBOCB(16,BOT,CBSO,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bun-",     BBOCB(16,BOT,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bun+",     BBOCB(16,BOT,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bun",	     BBOCB(16,BOT,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BD}},
+{"bsol-",    BBOCB(16,BOT,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bsol+",    BBOCB(16,BOT,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bsol",     BBOCB(16,BOT,CBSO,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bunl-",    BBOCB(16,BOT,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bunl+",    BBOCB(16,BOT,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bunl",     BBOCB(16,BOT,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BD}},
+{"bsoa-",    BBOCB(16,BOT,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bsoa+",    BBOCB(16,BOT,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bsoa",     BBOCB(16,BOT,CBSO,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"buna-",    BBOCB(16,BOT,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"buna+",    BBOCB(16,BOT,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"buna",     BBOCB(16,BOT,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDA}},
+{"bsola-",   BBOCB(16,BOT,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bsola+",   BBOCB(16,BOT,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bsola",    BBOCB(16,BOT,CBSO,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bunla-",   BBOCB(16,BOT,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bunla+",   BBOCB(16,BOT,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bunla",    BBOCB(16,BOT,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDA}},
+
+{"bdnzf-",   BBO(16,BODNZF,0,0),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDM}},
+{"bdnzf+",   BBO(16,BODNZF,0,0),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDP}},
+{"bdnzf",    BBO(16,BODNZF,0,0),	BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bdnzfl-",  BBO(16,BODNZF,0,1),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDM}},
+{"bdnzfl+",  BBO(16,BODNZF,0,1),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDP}},
+{"bdnzfl",   BBO(16,BODNZF,0,1),	BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bdnzfa-",  BBO(16,BODNZF,1,0),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDMA}},
+{"bdnzfa+",  BBO(16,BODNZF,1,0),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDPA}},
+{"bdnzfa",   BBO(16,BODNZF,1,0),	BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bdnzfla-", BBO(16,BODNZF,1,1),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDMA}},
+{"bdnzfla+", BBO(16,BODNZF,1,1),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDPA}},
+{"bdnzfla",  BBO(16,BODNZF,1,1),	BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bdzf-",    BBO(16,BODZF,0,0),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDM}},
+{"bdzf+",    BBO(16,BODZF,0,0),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDP}},
+{"bdzf",     BBO(16,BODZF,0,0),		BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bdzfl-",   BBO(16,BODZF,0,1),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDM}},
+{"bdzfl+",   BBO(16,BODZF,0,1),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDP}},
+{"bdzfl",    BBO(16,BODZF,0,1),		BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bdzfa-",   BBO(16,BODZF,1,0),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDMA}},
+{"bdzfa+",   BBO(16,BODZF,1,0),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDPA}},
+{"bdzfa",    BBO(16,BODZF,1,0),		BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bdzfla-",  BBO(16,BODZF,1,1),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDMA}},
+{"bdzfla+",  BBO(16,BODZF,1,1),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDPA}},
+{"bdzfla",   BBO(16,BODZF,1,1),		BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BDA}},
+
+{"bf-",	     BBO(16,BOF,0,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDM}},
+{"bf+",	     BBO(16,BOF,0,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDP}},
+{"bf",	     BBO(16,BOF,0,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bbf",	     BBO(16,BOF,0,0),		BBOAT_MASK,    PWRCOM,	 PPCVLE,	{BI, BD}},
+{"bfl-",     BBO(16,BOF,0,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDM}},
+{"bfl+",     BBO(16,BOF,0,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDP}},
+{"bfl",	     BBO(16,BOF,0,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bbfl",     BBO(16,BOF,0,1),		BBOAT_MASK,    PWRCOM,	 PPCVLE,	{BI, BD}},
+{"bfa-",     BBO(16,BOF,1,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDMA}},
+{"bfa+",     BBO(16,BOF,1,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDPA}},
+{"bfa",	     BBO(16,BOF,1,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bbfa",     BBO(16,BOF,1,0),		BBOAT_MASK,    PWRCOM,	 PPCVLE,	{BI, BDA}},
+{"bfla-",    BBO(16,BOF,1,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDMA}},
+{"bfla+",    BBO(16,BOF,1,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDPA}},
+{"bfla",     BBO(16,BOF,1,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bbfla",    BBO(16,BOF,1,1),		BBOAT_MASK,    PWRCOM,	 PPCVLE,	{BI, BDA}},
+
+{"bdnzt-",   BBO(16,BODNZT,0,0),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDM}},
+{"bdnzt+",   BBO(16,BODNZT,0,0),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDP}},
+{"bdnzt",    BBO(16,BODNZT,0,0),	BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bdnztl-",  BBO(16,BODNZT,0,1),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDM}},
+{"bdnztl+",  BBO(16,BODNZT,0,1),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDP}},
+{"bdnztl",   BBO(16,BODNZT,0,1),	BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bdnzta-",  BBO(16,BODNZT,1,0),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDMA}},
+{"bdnzta+",  BBO(16,BODNZT,1,0),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDPA}},
+{"bdnzta",   BBO(16,BODNZT,1,0),	BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bdnztla-", BBO(16,BODNZT,1,1),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDMA}},
+{"bdnztla+", BBO(16,BODNZT,1,1),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDPA}},
+{"bdnztla",  BBO(16,BODNZT,1,1),	BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bdzt-",    BBO(16,BODZT,0,0),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDM}},
+{"bdzt+",    BBO(16,BODZT,0,0),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDP}},
+{"bdzt",     BBO(16,BODZT,0,0),		BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bdztl-",   BBO(16,BODZT,0,1),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDM}},
+{"bdztl+",   BBO(16,BODZT,0,1),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDP}},
+{"bdztl",    BBO(16,BODZT,0,1),		BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bdzta-",   BBO(16,BODZT,1,0),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDMA}},
+{"bdzta+",   BBO(16,BODZT,1,0),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDPA}},
+{"bdzta",    BBO(16,BODZT,1,0),		BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bdztla-",  BBO(16,BODZT,1,1),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDMA}},
+{"bdztla+",  BBO(16,BODZT,1,1),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDPA}},
+{"bdztla",   BBO(16,BODZT,1,1),		BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BDA}},
+
+{"bt-",	     BBO(16,BOT,0,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDM}},
+{"bt+",	     BBO(16,BOT,0,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDP}},
+{"bt",	     BBO(16,BOT,0,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bbt",	     BBO(16,BOT,0,0),		BBOAT_MASK,    PWRCOM,	 PPCVLE,	{BI, BD}},
+{"btl-",     BBO(16,BOT,0,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDM}},
+{"btl+",     BBO(16,BOT,0,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDP}},
+{"btl",	     BBO(16,BOT,0,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bbtl",     BBO(16,BOT,0,1),		BBOAT_MASK,    PWRCOM,	 PPCVLE,	{BI, BD}},
+{"bta-",     BBO(16,BOT,1,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDMA}},
+{"bta+",     BBO(16,BOT,1,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDPA}},
+{"bta",	     BBO(16,BOT,1,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bbta",     BBO(16,BOT,1,0),		BBOAT_MASK,    PWRCOM,	 PPCVLE,	{BI, BDA}},
+{"btla-",    BBO(16,BOT,1,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDMA}},
+{"btla+",    BBO(16,BOT,1,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDPA}},
+{"btla",     BBO(16,BOT,1,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bbtla",    BBO(16,BOT,1,1),		BBOAT_MASK,    PWRCOM,	 PPCVLE,	{BI, BDA}},
+
+{"bc-",		B(16,0,0),	B_MASK,	     PPCCOM,	PPCVLE,		{BOE, BI, BDM}},
+{"bc+",		B(16,0,0),	B_MASK,	     PPCCOM,	PPCVLE,		{BOE, BI, BDP}},
+{"bc",		B(16,0,0),	B_MASK,	     COM,	PPCVLE,		{BO, BI, BD}},
+{"bcl-",	B(16,0,1),	B_MASK,	     PPCCOM,	PPCVLE,		{BOE, BI, BDM}},
+{"bcl+",	B(16,0,1),	B_MASK,	     PPCCOM,	PPCVLE,		{BOE, BI, BDP}},
+{"bcl",		B(16,0,1),	B_MASK,	     COM,	PPCVLE,		{BO, BI, BD}},
+{"bca-",	B(16,1,0),	B_MASK,	     PPCCOM,	PPCVLE,		{BOE, BI, BDMA}},
+{"bca+",	B(16,1,0),	B_MASK,	     PPCCOM,	PPCVLE,		{BOE, BI, BDPA}},
+{"bca",		B(16,1,0),	B_MASK,	     COM,	PPCVLE,		{BO, BI, BDA}},
+{"bcla-",	B(16,1,1),	B_MASK,	     PPCCOM,	PPCVLE,		{BOE, BI, BDMA}},
+{"bcla+",	B(16,1,1),	B_MASK,	     PPCCOM,	PPCVLE,		{BOE, BI, BDPA}},
+{"bcla",	B(16,1,1),	B_MASK,	     COM,	PPCVLE,		{BO, BI, BDA}},
+
+{"svc",		SC(17,0,0),	SC_MASK,     POWER,	PPCVLE,		{SVC_LEV, FL1, FL2}},
+{"svcl",	SC(17,0,1),	SC_MASK,     POWER,	PPCVLE,		{SVC_LEV, FL1, FL2}},
+{"sc",		SC(17,1,0),	SC_MASK,     PPC,	PPCVLE,		{LEV}},
+{"svca",	SC(17,1,0),	SC_MASK,     PWRCOM,	PPCVLE,		{SV}},
+{"svcla",	SC(17,1,1),	SC_MASK,     POWER,	PPCVLE,		{SV}},
+
+{"b",		B(18,0,0),	B_MASK,	     COM,	PPCVLE,		{LI}},
+{"bl",		B(18,0,1),	B_MASK,	     COM,	PPCVLE,		{LI}},
+{"ba",		B(18,1,0),	B_MASK,	     COM,	PPCVLE,		{LIA}},
+{"bla",		B(18,1,1),	B_MASK,	     COM,	PPCVLE,		{LIA}},
+
+{"mcrf",     XL(19,0), XLBB_MASK|(3<<21)|(3<<16), COM,	PPCVLE,		{BF, BFA}},
+
+{"addpcis",  DX(19,2),		DX_MASK,     POWER9,	PPCVLE,		{RT, DXD}},
+{"subpcis",  DX(19,2),		DX_MASK,     POWER9,	PPCVLE,		{RT, NDXD}},
+
+{"bdnzlr",   XLO(19,BODNZ,16,0),	XLBOBIBB_MASK, PPCCOM,	 PPCVLE,	{0}},
+{"bdnzlr-",  XLO(19,BODNZ,16,0),	XLBOBIBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{0}},
+{"bdnzlrl",  XLO(19,BODNZ,16,1),	XLBOBIBB_MASK, PPCCOM,	 PPCVLE,	{0}},
+{"bdnzlrl-", XLO(19,BODNZ,16,1),	XLBOBIBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{0}},
+{"bdnzlr+",  XLO(19,BODNZP,16,0),	XLBOBIBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{0}},
+{"bdnzlrl+", XLO(19,BODNZP,16,1),	XLBOBIBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{0}},
+{"bdzlr",    XLO(19,BODZ,16,0),		XLBOBIBB_MASK, PPCCOM,	 PPCVLE,	{0}},
+{"bdzlr-",   XLO(19,BODZ,16,0),		XLBOBIBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{0}},
+{"bdzlrl",   XLO(19,BODZ,16,1),		XLBOBIBB_MASK, PPCCOM,	 PPCVLE,	{0}},
+{"bdzlrl-",  XLO(19,BODZ,16,1),		XLBOBIBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{0}},
+{"bdzlr+",   XLO(19,BODZP,16,0),	XLBOBIBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{0}},
+{"bdzlrl+",  XLO(19,BODZP,16,1),	XLBOBIBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{0}},
+{"blr",	     XLO(19,BOU,16,0),		XLBOBIBB_MASK, PPCCOM,	 PPCVLE,	{0}},
+{"br",	     XLO(19,BOU,16,0),		XLBOBIBB_MASK, PWRCOM,	 PPCVLE,	{0}},
+{"blrl",     XLO(19,BOU,16,1),		XLBOBIBB_MASK, PPCCOM,	 PPCVLE,	{0}},
+{"brl",	     XLO(19,BOU,16,1),		XLBOBIBB_MASK, PWRCOM,	 PPCVLE,	{0}},
+{"bdnzlr-",  XLO(19,BODNZM4,16,0),	XLBOBIBB_MASK, ISA_V2,	 PPCVLE,	{0}},
+{"bdnzlrl-", XLO(19,BODNZM4,16,1),	XLBOBIBB_MASK, ISA_V2,	 PPCVLE,	{0}},
+{"bdnzlr+",  XLO(19,BODNZP4,16,0),	XLBOBIBB_MASK, ISA_V2,	 PPCVLE,	{0}},
+{"bdnzlrl+", XLO(19,BODNZP4,16,1),	XLBOBIBB_MASK, ISA_V2,	 PPCVLE,	{0}},
+{"bdzlr-",   XLO(19,BODZM4,16,0),	XLBOBIBB_MASK, ISA_V2,	 PPCVLE,	{0}},
+{"bdzlrl-",  XLO(19,BODZM4,16,1),	XLBOBIBB_MASK, ISA_V2,	 PPCVLE,	{0}},
+{"bdzlr+",   XLO(19,BODZP4,16,0),	XLBOBIBB_MASK, ISA_V2,	 PPCVLE,	{0}},
+{"bdzlrl+",  XLO(19,BODZP4,16,1),	XLBOBIBB_MASK, ISA_V2,	 PPCVLE,	{0}},
+
+{"bgelr",    XLOCB(19,BOF,CBLT,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bgelr-",   XLOCB(19,BOF,CBLT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bger",     XLOCB(19,BOF,CBLT,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bnllr",    XLOCB(19,BOF,CBLT,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnllr-",   XLOCB(19,BOF,CBLT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnlr",     XLOCB(19,BOF,CBLT,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bgelrl",   XLOCB(19,BOF,CBLT,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bgelrl-",  XLOCB(19,BOF,CBLT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgerl",    XLOCB(19,BOF,CBLT,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bnllrl",   XLOCB(19,BOF,CBLT,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnllrl-",  XLOCB(19,BOF,CBLT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnlrl",    XLOCB(19,BOF,CBLT,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"blelr",    XLOCB(19,BOF,CBGT,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"blelr-",   XLOCB(19,BOF,CBGT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bler",     XLOCB(19,BOF,CBGT,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bnglr",    XLOCB(19,BOF,CBGT,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnglr-",   XLOCB(19,BOF,CBGT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bngr",     XLOCB(19,BOF,CBGT,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"blelrl",   XLOCB(19,BOF,CBGT,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"blelrl-",  XLOCB(19,BOF,CBGT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"blerl",    XLOCB(19,BOF,CBGT,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bnglrl",   XLOCB(19,BOF,CBGT,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnglrl-",  XLOCB(19,BOF,CBGT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bngrl",    XLOCB(19,BOF,CBGT,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bnelr",    XLOCB(19,BOF,CBEQ,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnelr-",   XLOCB(19,BOF,CBEQ,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bner",     XLOCB(19,BOF,CBEQ,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bnelrl",   XLOCB(19,BOF,CBEQ,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnelrl-",  XLOCB(19,BOF,CBEQ,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnerl",    XLOCB(19,BOF,CBEQ,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bnslr",    XLOCB(19,BOF,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnslr-",   XLOCB(19,BOF,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnsr",     XLOCB(19,BOF,CBSO,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bnulr",    XLOCB(19,BOF,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnulr-",   XLOCB(19,BOF,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnslrl",   XLOCB(19,BOF,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnslrl-",  XLOCB(19,BOF,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnsrl",    XLOCB(19,BOF,CBSO,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bnulrl",   XLOCB(19,BOF,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnulrl-",  XLOCB(19,BOF,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgelr+",   XLOCB(19,BOFP,CBLT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnllr+",   XLOCB(19,BOFP,CBLT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgelrl+",  XLOCB(19,BOFP,CBLT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnllrl+",  XLOCB(19,BOFP,CBLT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"blelr+",   XLOCB(19,BOFP,CBGT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnglr+",   XLOCB(19,BOFP,CBGT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"blelrl+",  XLOCB(19,BOFP,CBGT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnglrl+",  XLOCB(19,BOFP,CBGT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnelr+",   XLOCB(19,BOFP,CBEQ,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnelrl+",  XLOCB(19,BOFP,CBEQ,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnslr+",   XLOCB(19,BOFP,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnulr+",   XLOCB(19,BOFP,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnslrl+",  XLOCB(19,BOFP,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnulrl+",  XLOCB(19,BOFP,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgelr-",   XLOCB(19,BOFM4,CBLT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnllr-",   XLOCB(19,BOFM4,CBLT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgelrl-",  XLOCB(19,BOFM4,CBLT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnllrl-",  XLOCB(19,BOFM4,CBLT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"blelr-",   XLOCB(19,BOFM4,CBGT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnglr-",   XLOCB(19,BOFM4,CBGT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"blelrl-",  XLOCB(19,BOFM4,CBGT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnglrl-",  XLOCB(19,BOFM4,CBGT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnelr-",   XLOCB(19,BOFM4,CBEQ,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnelrl-",  XLOCB(19,BOFM4,CBEQ,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnslr-",   XLOCB(19,BOFM4,CBSO,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnulr-",   XLOCB(19,BOFM4,CBSO,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnslrl-",  XLOCB(19,BOFM4,CBSO,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnulrl-",  XLOCB(19,BOFM4,CBSO,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgelr+",   XLOCB(19,BOFP4,CBLT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnllr+",   XLOCB(19,BOFP4,CBLT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgelrl+",  XLOCB(19,BOFP4,CBLT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnllrl+",  XLOCB(19,BOFP4,CBLT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"blelr+",   XLOCB(19,BOFP4,CBGT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnglr+",   XLOCB(19,BOFP4,CBGT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"blelrl+",  XLOCB(19,BOFP4,CBGT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnglrl+",  XLOCB(19,BOFP4,CBGT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnelr+",   XLOCB(19,BOFP4,CBEQ,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnelrl+",  XLOCB(19,BOFP4,CBEQ,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnslr+",   XLOCB(19,BOFP4,CBSO,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnulr+",   XLOCB(19,BOFP4,CBSO,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnslrl+",  XLOCB(19,BOFP4,CBSO,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnulrl+",  XLOCB(19,BOFP4,CBSO,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bltlr",    XLOCB(19,BOT,CBLT,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bltlr-",   XLOCB(19,BOT,CBLT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bltr",     XLOCB(19,BOT,CBLT,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bltlrl",   XLOCB(19,BOT,CBLT,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bltlrl-",  XLOCB(19,BOT,CBLT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bltrl",    XLOCB(19,BOT,CBLT,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bgtlr",    XLOCB(19,BOT,CBGT,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bgtlr-",   XLOCB(19,BOT,CBGT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgtr",     XLOCB(19,BOT,CBGT,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bgtlrl",   XLOCB(19,BOT,CBGT,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bgtlrl-",  XLOCB(19,BOT,CBGT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgtrl",    XLOCB(19,BOT,CBGT,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"beqlr",    XLOCB(19,BOT,CBEQ,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"beqlr-",   XLOCB(19,BOT,CBEQ,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"beqr",     XLOCB(19,BOT,CBEQ,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"beqlrl",   XLOCB(19,BOT,CBEQ,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"beqlrl-",  XLOCB(19,BOT,CBEQ,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"beqrl",    XLOCB(19,BOT,CBEQ,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bsolr",    XLOCB(19,BOT,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bsolr-",   XLOCB(19,BOT,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bsor",     XLOCB(19,BOT,CBSO,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bunlr",    XLOCB(19,BOT,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bunlr-",   XLOCB(19,BOT,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bsolrl",   XLOCB(19,BOT,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bsolrl-",  XLOCB(19,BOT,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bsorl",    XLOCB(19,BOT,CBSO,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bunlrl",   XLOCB(19,BOT,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bunlrl-",  XLOCB(19,BOT,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bltlr+",   XLOCB(19,BOTP,CBLT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bltlrl+",  XLOCB(19,BOTP,CBLT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgtlr+",   XLOCB(19,BOTP,CBGT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgtlrl+",  XLOCB(19,BOTP,CBGT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"beqlr+",   XLOCB(19,BOTP,CBEQ,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"beqlrl+",  XLOCB(19,BOTP,CBEQ,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bsolr+",   XLOCB(19,BOTP,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bunlr+",   XLOCB(19,BOTP,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bsolrl+",  XLOCB(19,BOTP,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bunlrl+",  XLOCB(19,BOTP,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bltlr-",   XLOCB(19,BOTM4,CBLT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bltlrl-",  XLOCB(19,BOTM4,CBLT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgtlr-",   XLOCB(19,BOTM4,CBGT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgtlrl-",  XLOCB(19,BOTM4,CBGT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"beqlr-",   XLOCB(19,BOTM4,CBEQ,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"beqlrl-",  XLOCB(19,BOTM4,CBEQ,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bsolr-",   XLOCB(19,BOTM4,CBSO,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bunlr-",   XLOCB(19,BOTM4,CBSO,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bsolrl-",  XLOCB(19,BOTM4,CBSO,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bunlrl-",  XLOCB(19,BOTM4,CBSO,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bltlr+",   XLOCB(19,BOTP4,CBLT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bltlrl+",  XLOCB(19,BOTP4,CBLT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgtlr+",   XLOCB(19,BOTP4,CBGT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgtlrl+",  XLOCB(19,BOTP4,CBGT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"beqlr+",   XLOCB(19,BOTP4,CBEQ,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"beqlrl+",  XLOCB(19,BOTP4,CBEQ,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bsolr+",   XLOCB(19,BOTP4,CBSO,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bunlr+",   XLOCB(19,BOTP4,CBSO,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bsolrl+",  XLOCB(19,BOTP4,CBSO,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bunlrl+",  XLOCB(19,BOTP4,CBSO,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+
+{"bdnzflr",  XLO(19,BODNZF,16,0),	XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bdnzflr-", XLO(19,BODNZF,16,0),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdnzflrl", XLO(19,BODNZF,16,1),	XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bdnzflrl-",XLO(19,BODNZF,16,1),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdnzflr+", XLO(19,BODNZFP,16,0),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdnzflrl+",XLO(19,BODNZFP,16,1),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdzflr",   XLO(19,BODZF,16,0),	XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bdzflr-",  XLO(19,BODZF,16,0),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdzflrl",  XLO(19,BODZF,16,1),	XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bdzflrl-", XLO(19,BODZF,16,1),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdzflr+",  XLO(19,BODZFP,16,0),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdzflrl+", XLO(19,BODZFP,16,1),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bflr",     XLO(19,BOF,16,0),		XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bflr-",    XLO(19,BOF,16,0),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bbfr",     XLO(19,BOF,16,0),		XLBOBB_MASK,   PWRCOM,	 PPCVLE,	{BI}},
+{"bflrl",    XLO(19,BOF,16,1),		XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bflrl-",   XLO(19,BOF,16,1),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bbfrl",    XLO(19,BOF,16,1),		XLBOBB_MASK,   PWRCOM,	 PPCVLE,	{BI}},
+{"bflr+",    XLO(19,BOFP,16,0),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bflrl+",   XLO(19,BOFP,16,1),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bflr-",    XLO(19,BOFM4,16,0),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"bflrl-",   XLO(19,BOFM4,16,1),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"bflr+",    XLO(19,BOFP4,16,0),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"bflrl+",   XLO(19,BOFP4,16,1),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"bdnztlr",  XLO(19,BODNZT,16,0),	XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bdnztlr-", XLO(19,BODNZT,16,0),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdnztlrl", XLO(19,BODNZT,16,1),	XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bdnztlrl-", XLO(19,BODNZT,16,1),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdnztlr+", XLO(19,BODNZTP,16,0),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdnztlrl+", XLO(19,BODNZTP,16,1),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdztlr",   XLO(19,BODZT,16,0),	XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bdztlr-",  XLO(19,BODZT,16,0),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdztlrl",  XLO(19,BODZT,16,1),	XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bdztlrl-", XLO(19,BODZT,16,1),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdztlr+",  XLO(19,BODZTP,16,0),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdztlrl+", XLO(19,BODZTP,16,1),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"btlr",     XLO(19,BOT,16,0),		XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"btlr-",    XLO(19,BOT,16,0),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bbtr",     XLO(19,BOT,16,0),		XLBOBB_MASK,   PWRCOM,	 PPCVLE,	{BI}},
+{"btlrl",    XLO(19,BOT,16,1),		XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"btlrl-",   XLO(19,BOT,16,1),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bbtrl",    XLO(19,BOT,16,1),		XLBOBB_MASK,   PWRCOM,	 PPCVLE,	{BI}},
+{"btlr+",    XLO(19,BOTP,16,0),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"btlrl+",   XLO(19,BOTP,16,1),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"btlr-",    XLO(19,BOTM4,16,0),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"btlrl-",   XLO(19,BOTM4,16,1),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"btlr+",    XLO(19,BOTP4,16,0),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"btlrl+",   XLO(19,BOTP4,16,1),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+
+{"bclr-",    XLYLK(19,16,0,0),		XLYBB_MASK,    PPCCOM,	 PPCVLE,	{BOE, BI}},
+{"bclrl-",   XLYLK(19,16,0,1),		XLYBB_MASK,    PPCCOM,	 PPCVLE,	{BOE, BI}},
+{"bclr+",    XLYLK(19,16,1,0),		XLYBB_MASK,    PPCCOM,	 PPCVLE,	{BOE, BI}},
+{"bclrl+",   XLYLK(19,16,1,1),		XLYBB_MASK,    PPCCOM,	 PPCVLE,	{BOE, BI}},
+{"bclr",     XLLK(19,16,0),		XLBH_MASK,     PPCCOM,	 PPCVLE,	{BO, BI, BH}},
+{"bcr",	     XLLK(19,16,0),		XLBB_MASK,     PWRCOM,	 PPCVLE,	{BO, BI}},
+{"bclrl",    XLLK(19,16,1),		XLBH_MASK,     PPCCOM,	 PPCVLE,	{BO, BI, BH}},
+{"bcrl",     XLLK(19,16,1),		XLBB_MASK,     PWRCOM,	 PPCVLE,	{BO, BI}},
+
+{"rfid",	XL(19,18),	0xffffffff,  PPC64,	PPCVLE,	{0}},
+
+{"crnot",	XL(19,33),	XL_MASK,     PPCCOM,	PPCVLE,		{BT, BA, BBA}},
+{"crnor",	XL(19,33),	XL_MASK,     COM,	PPCVLE,		{BT, BA, BB}},
+{"rfmci",	X(19,38),    0xffffffff, PPCRFMCI|PPCA2|PPC476, PPCVLE,	{0}},
+
+{"rfdi",	XL(19,39),	0xffffffff,  E500MC,	PPCVLE,		{0}},
+{"rfi",		XL(19,50),	0xffffffff,  COM,	PPCVLE,		{0}},
+{"rfci",	XL(19,51), 0xffffffff, PPC403|BOOKE|PPCE300|PPCA2|PPC476, PPCVLE, {0}},
+
+{"rfsvc",	XL(19,82),	0xffffffff,  POWER,	PPCVLE,		{0}},
+
+{"rfgi",	XL(19,102),   0xffffffff, E500MC|PPCA2,	PPCVLE,		{0}},
+
+{"crandc",	XL(19,129),	XL_MASK,     COM,	PPCVLE,		{BT, BA, BB}},
+
+{"rfebb",	XL(19,146),	XLS_MASK,    POWER8,	PPCVLE,		{SXL}},
+
+{"isync",	XL(19,150),	0xffffffff,  PPCCOM,	PPCVLE,		{0}},
+{"ics",		XL(19,150),	0xffffffff,  PWRCOM,	PPCVLE,		{0}},
+
+{"crclr",	XL(19,193),	XL_MASK,     PPCCOM,	PPCVLE,		{BT, BAT, BBA}},
+{"crxor",	XL(19,193),	XL_MASK,     COM,	PPCVLE,		{BT, BA, BB}},
+
+{"dnh",		X(19,198),	X_MASK,	     E500MC,	PPCVLE,		{DUI, DUIS}},
+
+{"crnand",	XL(19,225),	XL_MASK,     COM,	PPCVLE,		{BT, BA, BB}},
+
+{"crand",	XL(19,257),	XL_MASK,     COM,	PPCVLE,		{BT, BA, BB}},
+
+{"hrfid",	XL(19,274),    0xffffffff, POWER5|CELL, PPC476|PPCVLE,	{0}},
+
+{"crset",	XL(19,289),	XL_MASK,     PPCCOM,	PPCVLE,		{BT, BAT, BBA}},
+{"creqv",	XL(19,289),	XL_MASK,     COM,	PPCVLE,		{BT, BA, BB}},
+
+{"urfid",	XL(19,306),	0xffffffff,  POWER9,	PPCVLE,		{0}},
+{"stop",	XL(19,370),	0xffffffff,  POWER9,	PPCVLE,		{0}},
+
+{"doze",	XL(19,402),	0xffffffff,  POWER6,	POWER9|PPCVLE,	{0}},
+
+{"crorc",	XL(19,417),	XL_MASK,     COM,	PPCVLE,		{BT, BA, BB}},
+
+{"nap",		XL(19,434),	0xffffffff,  POWER6,	POWER9|PPCVLE,	{0}},
+
+{"crmove",	XL(19,449),	XL_MASK,     PPCCOM,	PPCVLE,		{BT, BA, BBA}},
+{"cror",	XL(19,449),	XL_MASK,     COM,	PPCVLE,		{BT, BA, BB}},
+
+{"sleep",	XL(19,466),	0xffffffff,  POWER6,	POWER9|PPCVLE,	{0}},
+{"rvwinkle",	XL(19,498),	0xffffffff,  POWER6,	POWER9|PPCVLE,	{0}},
+
+{"bctr",    XLO(19,BOU,528,0),		XLBOBIBB_MASK, COM,	 PPCVLE,	{0}},
+{"bctrl",   XLO(19,BOU,528,1),		XLBOBIBB_MASK, COM,	 PPCVLE,	{0}},
+
+{"bgectr",  XLOCB(19,BOF,CBLT,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bgectr-", XLOCB(19,BOF,CBLT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnlctr",  XLOCB(19,BOF,CBLT,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnlctr-", XLOCB(19,BOF,CBLT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgectrl", XLOCB(19,BOF,CBLT,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bgectrl-",XLOCB(19,BOF,CBLT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnlctrl", XLOCB(19,BOF,CBLT,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnlctrl-",XLOCB(19,BOF,CBLT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"blectr",  XLOCB(19,BOF,CBGT,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"blectr-", XLOCB(19,BOF,CBGT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bngctr",  XLOCB(19,BOF,CBGT,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bngctr-", XLOCB(19,BOF,CBGT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"blectrl", XLOCB(19,BOF,CBGT,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"blectrl-",XLOCB(19,BOF,CBGT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bngctrl", XLOCB(19,BOF,CBGT,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bngctrl-",XLOCB(19,BOF,CBGT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnectr",  XLOCB(19,BOF,CBEQ,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnectr-", XLOCB(19,BOF,CBEQ,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnectrl", XLOCB(19,BOF,CBEQ,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnectrl-",XLOCB(19,BOF,CBEQ,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnsctr",  XLOCB(19,BOF,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnsctr-", XLOCB(19,BOF,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnuctr",  XLOCB(19,BOF,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnuctr-", XLOCB(19,BOF,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnsctrl", XLOCB(19,BOF,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnsctrl-",XLOCB(19,BOF,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnuctrl", XLOCB(19,BOF,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnuctrl-",XLOCB(19,BOF,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgectr+", XLOCB(19,BOFP,CBLT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnlctr+", XLOCB(19,BOFP,CBLT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgectrl+",XLOCB(19,BOFP,CBLT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnlctrl+",XLOCB(19,BOFP,CBLT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"blectr+", XLOCB(19,BOFP,CBGT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bngctr+", XLOCB(19,BOFP,CBGT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"blectrl+",XLOCB(19,BOFP,CBGT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bngctrl+",XLOCB(19,BOFP,CBGT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnectr+", XLOCB(19,BOFP,CBEQ,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnectrl+",XLOCB(19,BOFP,CBEQ,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnsctr+", XLOCB(19,BOFP,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnuctr+", XLOCB(19,BOFP,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnsctrl+",XLOCB(19,BOFP,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnuctrl+",XLOCB(19,BOFP,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgectr-", XLOCB(19,BOFM4,CBLT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnlctr-", XLOCB(19,BOFM4,CBLT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgectrl-",XLOCB(19,BOFM4,CBLT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnlctrl-",XLOCB(19,BOFM4,CBLT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"blectr-", XLOCB(19,BOFM4,CBGT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bngctr-", XLOCB(19,BOFM4,CBGT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"blectrl-",XLOCB(19,BOFM4,CBGT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bngctrl-",XLOCB(19,BOFM4,CBGT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnectr-", XLOCB(19,BOFM4,CBEQ,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnectrl-",XLOCB(19,BOFM4,CBEQ,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnsctr-", XLOCB(19,BOFM4,CBSO,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnuctr-", XLOCB(19,BOFM4,CBSO,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnsctrl-",XLOCB(19,BOFM4,CBSO,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnuctrl-",XLOCB(19,BOFM4,CBSO,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgectr+", XLOCB(19,BOFP4,CBLT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnlctr+", XLOCB(19,BOFP4,CBLT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgectrl+",XLOCB(19,BOFP4,CBLT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnlctrl+",XLOCB(19,BOFP4,CBLT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"blectr+", XLOCB(19,BOFP4,CBGT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bngctr+", XLOCB(19,BOFP4,CBGT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"blectrl+",XLOCB(19,BOFP4,CBGT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bngctrl+",XLOCB(19,BOFP4,CBGT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnectr+", XLOCB(19,BOFP4,CBEQ,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnectrl+",XLOCB(19,BOFP4,CBEQ,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnsctr+", XLOCB(19,BOFP4,CBSO,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnuctr+", XLOCB(19,BOFP4,CBSO,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnsctrl+",XLOCB(19,BOFP4,CBSO,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnuctrl+",XLOCB(19,BOFP4,CBSO,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bltctr",  XLOCB(19,BOT,CBLT,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bltctr-", XLOCB(19,BOT,CBLT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bltctrl", XLOCB(19,BOT,CBLT,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bltctrl-",XLOCB(19,BOT,CBLT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgtctr",  XLOCB(19,BOT,CBGT,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bgtctr-", XLOCB(19,BOT,CBGT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgtctrl", XLOCB(19,BOT,CBGT,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bgtctrl-",XLOCB(19,BOT,CBGT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"beqctr",  XLOCB(19,BOT,CBEQ,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"beqctr-", XLOCB(19,BOT,CBEQ,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"beqctrl", XLOCB(19,BOT,CBEQ,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"beqctrl-",XLOCB(19,BOT,CBEQ,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bsoctr",  XLOCB(19,BOT,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bsoctr-", XLOCB(19,BOT,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bunctr",  XLOCB(19,BOT,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bunctr-", XLOCB(19,BOT,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bsoctrl", XLOCB(19,BOT,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bsoctrl-",XLOCB(19,BOT,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bunctrl", XLOCB(19,BOT,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bunctrl-",XLOCB(19,BOT,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bltctr+", XLOCB(19,BOTP,CBLT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bltctrl+",XLOCB(19,BOTP,CBLT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgtctr+", XLOCB(19,BOTP,CBGT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgtctrl+",XLOCB(19,BOTP,CBGT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"beqctr+", XLOCB(19,BOTP,CBEQ,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"beqctrl+",XLOCB(19,BOTP,CBEQ,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bsoctr+", XLOCB(19,BOTP,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bunctr+", XLOCB(19,BOTP,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bsoctrl+",XLOCB(19,BOTP,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bunctrl+",XLOCB(19,BOTP,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bltctr-", XLOCB(19,BOTM4,CBLT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bltctrl-",XLOCB(19,BOTM4,CBLT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgtctr-", XLOCB(19,BOTM4,CBGT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgtctrl-",XLOCB(19,BOTM4,CBGT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"beqctr-", XLOCB(19,BOTM4,CBEQ,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"beqctrl-",XLOCB(19,BOTM4,CBEQ,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bsoctr-", XLOCB(19,BOTM4,CBSO,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bunctr-", XLOCB(19,BOTM4,CBSO,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bsoctrl-",XLOCB(19,BOTM4,CBSO,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bunctrl-",XLOCB(19,BOTM4,CBSO,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bltctr+", XLOCB(19,BOTP4,CBLT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bltctrl+",XLOCB(19,BOTP4,CBLT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgtctr+", XLOCB(19,BOTP4,CBGT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgtctrl+",XLOCB(19,BOTP4,CBGT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"beqctr+", XLOCB(19,BOTP4,CBEQ,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"beqctrl+",XLOCB(19,BOTP4,CBEQ,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bsoctr+", XLOCB(19,BOTP4,CBSO,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bunctr+", XLOCB(19,BOTP4,CBSO,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bsoctrl+",XLOCB(19,BOTP4,CBSO,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bunctrl+",XLOCB(19,BOTP4,CBSO,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+
+{"bfctr",   XLO(19,BOF,528,0),		XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bfctr-",  XLO(19,BOF,528,0),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bfctrl",  XLO(19,BOF,528,1),		XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bfctrl-", XLO(19,BOF,528,1),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bfctr+",  XLO(19,BOFP,528,0),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bfctrl+", XLO(19,BOFP,528,1),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bfctr-",  XLO(19,BOFM4,528,0),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"bfctrl-", XLO(19,BOFM4,528,1),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"bfctr+",  XLO(19,BOFP4,528,0),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"bfctrl+", XLO(19,BOFP4,528,1),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"btctr",   XLO(19,BOT,528,0),		XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"btctr-",  XLO(19,BOT,528,0),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"btctrl",  XLO(19,BOT,528,1),		XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"btctrl-", XLO(19,BOT,528,1),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"btctr+",  XLO(19,BOTP,528,0),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"btctrl+", XLO(19,BOTP,528,1),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"btctr-",  XLO(19,BOTM4,528,0),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"btctrl-", XLO(19,BOTM4,528,1),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"btctr+",  XLO(19,BOTP4,528,0),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"btctrl+", XLO(19,BOTP4,528,1),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+
+{"bcctr-",  XLYLK(19,528,0,0),		XLYBB_MASK,    PPCCOM,	 PPCVLE,	{BOE, BI}},
+{"bcctrl-", XLYLK(19,528,0,1),		XLYBB_MASK,    PPCCOM,	 PPCVLE,	{BOE, BI}},
+{"bcctr+",  XLYLK(19,528,1,0),		XLYBB_MASK,    PPCCOM,	 PPCVLE,	{BOE, BI}},
+{"bcctrl+", XLYLK(19,528,1,1),		XLYBB_MASK,    PPCCOM,	 PPCVLE,	{BOE, BI}},
+{"bcctr",   XLLK(19,528,0),		XLBH_MASK,     PPCCOM,	 PPCVLE,	{BO, BI, BH}},
+{"bcc",	    XLLK(19,528,0),		XLBB_MASK,     PWRCOM,	 PPCVLE,	{BO, BI}},
+{"bcctrl",  XLLK(19,528,1),		XLBH_MASK,     PPCCOM,	 PPCVLE,	{BO, BI, BH}},
+{"bccl",    XLLK(19,528,1),		XLBB_MASK,     PWRCOM,	 PPCVLE,	{BO, BI}},
+
+{"bctar-",  XLYLK(19,560,0,0),		XLYBB_MASK,    POWER8,	 PPCVLE,	{BOE, BI}},
+{"bctarl-", XLYLK(19,560,0,1),		XLYBB_MASK,    POWER8,	 PPCVLE,	{BOE, BI}},
+{"bctar+",  XLYLK(19,560,1,0),		XLYBB_MASK,    POWER8,	 PPCVLE,	{BOE, BI}},
+{"bctarl+", XLYLK(19,560,1,1),		XLYBB_MASK,    POWER8,	 PPCVLE,	{BOE, BI}},
+{"bctar",   XLLK(19,560,0),		XLBH_MASK,     POWER8,	 PPCVLE,	{BO, BI, BH}},
+{"bctarl",  XLLK(19,560,1),		XLBH_MASK,     POWER8,	 PPCVLE,	{BO, BI, BH}},
+
+{"rlwimi",	M(20,0),	M_MASK,	     PPCCOM,	PPCVLE,		{RA, RS, SH, MBE, ME}},
+{"rlimi",	M(20,0),	M_MASK,	     PWRCOM,	PPCVLE,		{RA, RS, SH, MBE, ME}},
+
+{"rlwimi.",	M(20,1),	M_MASK,	     PPCCOM,	PPCVLE,		{RA, RS, SH, MBE, ME}},
+{"rlimi.",	M(20,1),	M_MASK,	     PWRCOM,	PPCVLE,		{RA, RS, SH, MBE, ME}},
+
+{"rotlwi",	MME(21,31,0),	MMBME_MASK,  PPCCOM,	PPCVLE,		{RA, RS, SH}},
+{"clrlwi",	MME(21,31,0),	MSHME_MASK,  PPCCOM,	PPCVLE,		{RA, RS, MB}},
+{"rlwinm",	M(21,0),	M_MASK,	     PPCCOM,	PPCVLE,		{RA, RS, SH, MBE, ME}},
+{"rlinm",	M(21,0),	M_MASK,	     PWRCOM,	PPCVLE,		{RA, RS, SH, MBE, ME}},
+{"rotlwi.",	MME(21,31,1),	MMBME_MASK,  PPCCOM,	PPCVLE,		{RA, RS, SH}},
+{"clrlwi.",	MME(21,31,1),	MSHME_MASK,  PPCCOM,	PPCVLE,		{RA, RS, MB}},
+{"rlwinm.",	M(21,1),	M_MASK,	     PPCCOM,	PPCVLE,		{RA, RS, SH, MBE, ME}},
+{"rlinm.",	M(21,1),	M_MASK,	     PWRCOM,	PPCVLE,		{RA, RS, SH, MBE, ME}},
+
+{"rlmi",	M(22,0),	M_MASK,	     M601,	PPCVLE,		{RA, RS, RB, MBE, ME}},
+{"rlmi.",	M(22,1),	M_MASK,	     M601,	PPCVLE,		{RA, RS, RB, MBE, ME}},
+
+{"rotlw",	MME(23,31,0),	MMBME_MASK,  PPCCOM,	PPCVLE,		{RA, RS, RB}},
+{"rlwnm",	M(23,0),	M_MASK,	     PPCCOM,	PPCVLE,		{RA, RS, RB, MBE, ME}},
+{"rlnm",	M(23,0),	M_MASK,	     PWRCOM,	PPCVLE,		{RA, RS, RB, MBE, ME}},
+{"rotlw.",	MME(23,31,1),	MMBME_MASK,  PPCCOM,	PPCVLE,		{RA, RS, RB}},
+{"rlwnm.",	M(23,1),	M_MASK,	     PPCCOM,	PPCVLE,		{RA, RS, RB, MBE, ME}},
+{"rlnm.",	M(23,1),	M_MASK,	     PWRCOM,	PPCVLE,		{RA, RS, RB, MBE, ME}},
+
+{"nop",		OP(24),		0xffffffff,  PPCCOM,	PPCVLE,		{0}},
+{"ori",		OP(24),		OP_MASK,     PPCCOM,	PPCVLE,		{RA, RS, UI}},
+{"oril",	OP(24),		OP_MASK,     PWRCOM,	PPCVLE,		{RA, RS, UI}},
+
+{"oris",	OP(25),		OP_MASK,     PPCCOM,	PPCVLE,		{RA, RS, UI}},
+{"oriu",	OP(25),		OP_MASK,     PWRCOM,	PPCVLE,		{RA, RS, UI}},
+
+{"xnop",	OP(26),		0xffffffff,  PPCCOM,	PPCVLE,		{0}},
+{"xori",	OP(26),		OP_MASK,     PPCCOM,	PPCVLE,		{RA, RS, UI}},
+{"xoril",	OP(26),		OP_MASK,     PWRCOM,	PPCVLE,		{RA, RS, UI}},
+
+{"xoris",	OP(27),		OP_MASK,     PPCCOM,	PPCVLE,		{RA, RS, UI}},
+{"xoriu",	OP(27),		OP_MASK,     PWRCOM,	PPCVLE,		{RA, RS, UI}},
+
+{"andi.",	OP(28),		OP_MASK,     PPCCOM,	PPCVLE,		{RA, RS, UI}},
+{"andil.",	OP(28),		OP_MASK,     PWRCOM,	PPCVLE,		{RA, RS, UI}},
+
+{"andis.",	OP(29),		OP_MASK,     PPCCOM,	PPCVLE,		{RA, RS, UI}},
+{"andiu.",	OP(29),		OP_MASK,     PWRCOM,	PPCVLE,		{RA, RS, UI}},
+
+{"rotldi",	MD(30,0,0),	MDMB_MASK,   PPC64,	PPCVLE,		{RA, RS, SH6}},
+{"clrldi",	MD(30,0,0),	MDSH_MASK,   PPC64,	PPCVLE,		{RA, RS, MB6}},
+{"rldicl",	MD(30,0,0),	MD_MASK,     PPC64,	PPCVLE,		{RA, RS, SH6, MB6}},
+{"rotldi.",	MD(30,0,1),	MDMB_MASK,   PPC64,	PPCVLE,		{RA, RS, SH6}},
+{"clrldi.",	MD(30,0,1),	MDSH_MASK,   PPC64,	PPCVLE,		{RA, RS, MB6}},
+{"rldicl.",	MD(30,0,1),	MD_MASK,     PPC64,	PPCVLE,		{RA, RS, SH6, MB6}},
+
+{"rldicr",	MD(30,1,0),	MD_MASK,     PPC64,	PPCVLE,		{RA, RS, SH6, ME6}},
+{"rldicr.",	MD(30,1,1),	MD_MASK,     PPC64,	PPCVLE,		{RA, RS, SH6, ME6}},
+
+{"rldic",	MD(30,2,0),	MD_MASK,     PPC64,	PPCVLE,		{RA, RS, SH6, MB6}},
+{"rldic.",	MD(30,2,1),	MD_MASK,     PPC64,	PPCVLE,		{RA, RS, SH6, MB6}},
+
+{"rldimi",	MD(30,3,0),	MD_MASK,     PPC64,	PPCVLE,		{RA, RS, SH6, MB6}},
+{"rldimi.",	MD(30,3,1),	MD_MASK,     PPC64,	PPCVLE,		{RA, RS, SH6, MB6}},
+
+{"rotld",	MDS(30,8,0),	MDSMB_MASK,  PPC64,	PPCVLE,		{RA, RS, RB}},
+{"rldcl",	MDS(30,8,0),	MDS_MASK,    PPC64,	PPCVLE,		{RA, RS, RB, MB6}},
+{"rotld.",	MDS(30,8,1),	MDSMB_MASK,  PPC64,	PPCVLE,		{RA, RS, RB}},
+{"rldcl.",	MDS(30,8,1),	MDS_MASK,    PPC64,	PPCVLE,		{RA, RS, RB, MB6}},
+
+{"rldcr",	MDS(30,9,0),	MDS_MASK,    PPC64,	PPCVLE,		{RA, RS, RB, ME6}},
+{"rldcr.",	MDS(30,9,1),	MDS_MASK,    PPC64,	PPCVLE,		{RA, RS, RB, ME6}},
+
+{"cmpw",	XOPL(31,0,0),	XCMPL_MASK,  PPCCOM,	0,		{OBF, RA, RB}},
+{"cmpd",	XOPL(31,0,1),	XCMPL_MASK,  PPC64,	0,		{OBF, RA, RB}},
+{"cmp",		X(31,0),	XCMP_MASK,   PPC,	0,		{BF, L32OPT, RA, RB}},
+{"cmp",		X(31,0),	XCMPL_MASK,  PWRCOM,	PPC,		{BF, RA, RB}},
+
+{"twlgt",	XTO(31,4,TOLGT), XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tlgt",	XTO(31,4,TOLGT), XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twllt",	XTO(31,4,TOLLT), XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tllt",	XTO(31,4,TOLLT), XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"tweq",	XTO(31,4,TOEQ),	 XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"teq",		XTO(31,4,TOEQ),	 XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twlge",	XTO(31,4,TOLGE), XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tlge",	XTO(31,4,TOLGE), XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twlnl",	XTO(31,4,TOLNL), XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tlnl",	XTO(31,4,TOLNL), XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twlle",	XTO(31,4,TOLLE), XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tlle",	XTO(31,4,TOLLE), XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twlng",	XTO(31,4,TOLNG), XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tlng",	XTO(31,4,TOLNG), XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twgt",	XTO(31,4,TOGT),	 XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tgt",		XTO(31,4,TOGT),	 XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twge",	XTO(31,4,TOGE),	 XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tge",		XTO(31,4,TOGE),	 XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twnl",	XTO(31,4,TONL),	 XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tnl",		XTO(31,4,TONL),	 XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twlt",	XTO(31,4,TOLT),	 XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tlt",		XTO(31,4,TOLT),	 XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twle",	XTO(31,4,TOLE),	 XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tle",		XTO(31,4,TOLE),	 XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twng",	XTO(31,4,TONG),	 XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tng",		XTO(31,4,TONG),	 XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twne",	XTO(31,4,TONE),	 XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tne",		XTO(31,4,TONE),	 XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"trap",	XTO(31,4,TOU),	 0xffffffff, PPCCOM,	0,		{0}},
+{"twu",		XTO(31,4,TOU),	 XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tu",		XTO(31,4,TOU),	 XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"tw",		X(31,4),	 X_MASK,     PPCCOM,	0,		{TO, RA, RB}},
+{"t",		X(31,4),	 X_MASK,     PWRCOM,	0,		{TO, RA, RB}},
+
+{"lvsl",	X(31,6),	X_MASK,	     PPCVEC,	0,		{VD, RA0, RB}},
+{"lvebx",	X(31,7),	X_MASK,	     PPCVEC,	0,		{VD, RA0, RB}},
+{"lbfcmx",	APU(31,7,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"subfc",	XO(31,8,0,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"sf",		XO(31,8,0,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"subc",	XO(31,8,0,0),	XO_MASK,     PPCCOM,	0,		{RT, RB, RA}},
+{"subfc.",	XO(31,8,0,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"sf.",		XO(31,8,0,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"subc.",	XO(31,8,0,1),	XO_MASK,     PPCCOM,	0,		{RT, RB, RA}},
+
+{"mulhdu",	XO(31,9,0,0),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+{"mulhdu.",	XO(31,9,0,1),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+
+{"addc",	XO(31,10,0,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"a",		XO(31,10,0,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"addc.",	XO(31,10,0,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"a.",		XO(31,10,0,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+
+{"mulhwu",	XO(31,11,0,0),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"mulhwu.",	XO(31,11,0,1),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+
+{"lxsiwzx",	X(31,12),	XX1_MASK,    PPCVSX2,	0,		{XT6, RA0, RB}},
+
+{"isellt",	X(31,15),	X_MASK,	     PPCISEL,	0,		{RT, RA0, RB}},
+
+{"tlbilxlpid",	XTO(31,18,0),	XTO_MASK, E500MC|PPCA2,	0,		{0}},
+{"tlbilxpid",	XTO(31,18,1),	XTO_MASK, E500MC|PPCA2,	0,		{0}},
+{"tlbilxva",	XTO(31,18,3),	XTO_MASK, E500MC|PPCA2,	0,		{RA0, RB}},
+{"tlbilx",	X(31,18),	X_MASK,	  E500MC|PPCA2,	0,		{T, RA0, RB}},
+
+{"mfcr",	XFXM(31,19,0,0), XFXFXM_MASK, COM,	0,		{RT, FXM4}},
+{"mfocrf",	XFXM(31,19,0,1), XFXFXM_MASK, COM,	0,		{RT, FXM}},
+
+{"lwarx",	X(31,20),	XEH_MASK,    PPC,	0,		{RT, RA0, RB, EH}},
+
+{"ldx",		X(31,21),	X_MASK,	     PPC64,	0,		{RT, RA0, RB}},
+
+{"icbt",	X(31,22),  X_MASK, BOOKE|PPCE300|PPCA2|PPC476, 0,	{CT, RA0, RB}},
+
+{"lwzx",	X(31,23),	X_MASK,	     PPCCOM,	0,		{RT, RA0, RB}},
+{"lx",		X(31,23),	X_MASK,	     PWRCOM,	0,		{RT, RA, RB}},
+
+{"slw",		XRC(31,24,0),	X_MASK,	     PPCCOM,	0,		{RA, RS, RB}},
+{"sl",		XRC(31,24,0),	X_MASK,	     PWRCOM,	0,		{RA, RS, RB}},
+{"slw.",	XRC(31,24,1),	X_MASK,	     PPCCOM,	0,		{RA, RS, RB}},
+{"sl.",		XRC(31,24,1),	X_MASK,	     PWRCOM,	0,		{RA, RS, RB}},
+
+{"cntlzw",	XRC(31,26,0),	XRB_MASK,    PPCCOM,	0,		{RA, RS}},
+{"cntlz",	XRC(31,26,0),	XRB_MASK,    PWRCOM,	0,		{RA, RS}},
+{"cntlzw.",	XRC(31,26,1),	XRB_MASK,    PPCCOM,	0,		{RA, RS}},
+{"cntlz.",	XRC(31,26,1),	XRB_MASK,    PWRCOM,	0,		{RA, RS}},
+
+{"sld",		XRC(31,27,0),	X_MASK,	     PPC64,	0,		{RA, RS, RB}},
+{"sld.",	XRC(31,27,1),	X_MASK,	     PPC64,	0,		{RA, RS, RB}},
+
+{"and",		XRC(31,28,0),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+{"and.",	XRC(31,28,1),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+
+{"maskg",	XRC(31,29,0),	X_MASK,	     M601,	PPCA2,		{RA, RS, RB}},
+{"maskg.",	XRC(31,29,1),	X_MASK,	     M601,	PPCA2,		{RA, RS, RB}},
+
+{"ldepx",	X(31,29),	X_MASK,	  E500MC|PPCA2, 0,		{RT, RA0, RB}},
+
+{"waitasec",	X(31,30),      XRTRARB_MASK, POWER8,	POWER9,		{0}},
+{"wait",	X(31,30),	XWC_MASK,    POWER9,	0,		{WC}},
+
+{"lwepx",	X(31,31),	X_MASK,	  E500MC|PPCA2, 0,		{RT, RA0, RB}},
+
+{"cmplw",	XOPL(31,32,0),	XCMPL_MASK,  PPCCOM,	0,		{OBF, RA, RB}},
+{"cmpld",	XOPL(31,32,1),	XCMPL_MASK,  PPC64,	0,		{OBF, RA, RB}},
+{"cmpl",	X(31,32),	XCMP_MASK,   PPC,	0,		{BF, L32OPT, RA, RB}},
+{"cmpl",	X(31,32),	XCMPL_MASK,  PWRCOM,	PPC,		{BF, RA, RB}},
+
+{"lvsr",	X(31,38),	X_MASK,	     PPCVEC,	0,		{VD, RA0, RB}},
+{"lvehx",	X(31,39),	X_MASK,	     PPCVEC,	0,		{VD, RA0, RB}},
+{"lhfcmx",	APU(31,39,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"mviwsplt",	X(31,46),	X_MASK,	     PPCVEC2,	0,		{VD, RA, RB}},
+
+{"iselgt",	X(31,47),	X_MASK,	     PPCISEL,	0,		{RT, RA0, RB}},
+
+{"lvewx",	X(31,71),	X_MASK,	     PPCVEC,	0,		{VD, RA0, RB}},
+
+{"addg6s",	XO(31,74,0,0),	XO_MASK,     POWER6,	0,		{RT, RA, RB}},
+
+{"lxsiwax",	X(31,76),	XX1_MASK,    PPCVSX2,	0,		{XT6, RA0, RB}},
+
+{"iseleq",	X(31,79),	X_MASK,	     PPCISEL,	0,		{RT, RA0, RB}},
+
+{"isel",	XISEL(31,15), XISEL_MASK, PPCISEL|TITAN, 0,		{RT, RA0, RB, CRB}},
+
+{"subf",	XO(31,40,0,0),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"sub",		XO(31,40,0,0),	XO_MASK,     PPC,	0,		{RT, RB, RA}},
+{"subf.",	XO(31,40,0,1),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"sub.",	XO(31,40,0,1),	XO_MASK,     PPC,	0,		{RT, RB, RA}},
+
+{"mfvsrd",	X(31,51),	XX1RB_MASK,   PPCVSX2,	0,		{RA, XS6}},
+{"mffprd",	X(31,51),	XX1RB_MASK|1, PPCVSX2,	0,		{RA, FRS}},
+{"mfvrd",	X(31,51)|1,	XX1RB_MASK|1, PPCVSX2,	0,		{RA, VS}},
+{"eratilx",	X(31,51),	X_MASK,	     PPCA2,	0,		{ERAT_T, RA, RB}},
+
+{"lbarx",	X(31,52),	XEH_MASK, POWER8|E6500, 0,		{RT, RA0, RB, EH}},
+
+{"ldux",	X(31,53),	X_MASK,	     PPC64,	0,		{RT, RAL, RB}},
+
+{"dcbst",	X(31,54),	XRT_MASK,    PPC,	0,		{RA0, RB}},
+
+{"lwzux",	X(31,55),	X_MASK,	     PPCCOM,	0,		{RT, RAL, RB}},
+{"lux",		X(31,55),	X_MASK,	     PWRCOM,	0,		{RT, RA, RB}},
+
+{"cntlzd",	XRC(31,58,0),	XRB_MASK,    PPC64,	0,		{RA, RS}},
+{"cntlzd.",	XRC(31,58,1),	XRB_MASK,    PPC64,	0,		{RA, RS}},
+
+{"andc",	XRC(31,60,0),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+{"andc.",	XRC(31,60,1),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+
+{"waitrsv",	X(31,62)|(1<<21), 0xffffffff, E500MC|PPCA2, 0,		{0}},
+{"waitimpl",	X(31,62)|(2<<21), 0xffffffff, E500MC|PPCA2, 0,		{0}},
+{"wait",	X(31,62),	XWC_MASK,    E500MC|PPCA2, 0,		{WC}},
+
+{"dcbstep",	XRT(31,63,0),	XRT_MASK,    E500MC|PPCA2, 0,		{RA0, RB}},
+
+{"tdlgt",	XTO(31,68,TOLGT), XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdllt",	XTO(31,68,TOLLT), XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdeq",	XTO(31,68,TOEQ),  XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdlge",	XTO(31,68,TOLGE), XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdlnl",	XTO(31,68,TOLNL), XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdlle",	XTO(31,68,TOLLE), XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdlng",	XTO(31,68,TOLNG), XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdgt",	XTO(31,68,TOGT),  XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdge",	XTO(31,68,TOGE),  XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdnl",	XTO(31,68,TONL),  XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdlt",	XTO(31,68,TOLT),  XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdle",	XTO(31,68,TOLE),  XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdng",	XTO(31,68,TONG),  XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdne",	XTO(31,68,TONE),  XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdu",		XTO(31,68,TOU),	  XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"td",		X(31,68),	X_MASK,	     PPC64,	0,		{TO, RA, RB}},
+
+{"lwfcmx",	APU(31,71,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+{"mulhd",	XO(31,73,0,0),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+{"mulhd.",	XO(31,73,0,1),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+
+{"mulhw",	XO(31,75,0,0),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"mulhw.",	XO(31,75,0,1),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+
+{"dlmzb",	XRC(31,78,0), X_MASK, PPC403|PPC440|TITAN, 0,		{RA, RS, RB}},
+{"dlmzb.",	XRC(31,78,1), X_MASK, PPC403|PPC440|TITAN, 0,		{RA, RS, RB}},
+
+{"mtsrd",	X(31,82),  XRB_MASK|(1<<20), PPC64,	0,		{SR, RS}},
+
+{"mfmsr",	X(31,83),	XRARB_MASK,  COM,	0,		{RT}},
+
+{"ldarx",	X(31,84),	XEH_MASK,    PPC64,	0,		{RT, RA0, RB, EH}},
+
+{"dcbfl",	XOPL(31,86,1),	XRT_MASK,    POWER5,	PPC476,		{RA0, RB}},
+{"dcbf",	X(31,86),	XLRT_MASK,   PPC,	0,		{RA0, RB, L2OPT}},
+
+{"lbzx",	X(31,87),	X_MASK,	     COM,	0,		{RT, RA0, RB}},
+
+{"lbepx",	X(31,95),	X_MASK,	  E500MC|PPCA2, 0,		{RT, RA0, RB}},
+
+{"dni",		XRC(31,97,1),	XRB_MASK,    E6500,	0,		{DUI, DCTL}},
+
+{"lvx",		X(31,103),	X_MASK,	     PPCVEC,	0,		{VD, RA0, RB}},
+{"lqfcmx",	APU(31,103,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"neg",		XO(31,104,0,0),	XORB_MASK,   COM,	0,		{RT, RA}},
+{"neg.",	XO(31,104,0,1),	XORB_MASK,   COM,	0,		{RT, RA}},
+
+{"mul",		XO(31,107,0,0),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+{"mul.",	XO(31,107,0,1),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+
+{"mvidsplt",	X(31,110),	X_MASK,	     PPCVEC2,	0,		{VD, RA, RB}},
+
+{"mtsrdin",	X(31,114),	XRA_MASK,    PPC64,	0,		{RS, RB}},
+
+{"mffprwz",	X(31,115),	XX1RB_MASK|1, PPCVSX2,	0,		{RA, FRS}},
+{"mfvrwz",	X(31,115)|1,	XX1RB_MASK|1, PPCVSX2,	0,		{RA, VS}},
+{"mfvsrwz",	X(31,115),	XX1RB_MASK,   PPCVSX2,	0,		{RA, XS6}},
+
+{"lharx",	X(31,116),	XEH_MASK, POWER8|E6500, 0,		{RT, RA0, RB, EH}},
+
+{"clf",		X(31,118),	XTO_MASK,    POWER,	0,		{RA, RB}},
+
+{"lbzux",	X(31,119),	X_MASK,	     COM,	0,		{RT, RAL, RB}},
+
+{"popcntb",	X(31,122),	XRB_MASK,    POWER5,	0,		{RA, RS}},
+
+{"not",		XRC(31,124,0),	X_MASK,	     COM,	0,		{RA, RS, RBS}},
+{"nor",		XRC(31,124,0),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+{"not.",	XRC(31,124,1),	X_MASK,	     COM,	0,		{RA, RS, RBS}},
+{"nor.",	XRC(31,124,1),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+
+{"dcbfep",	XRT(31,127,0),	XRT_MASK, E500MC|PPCA2, 0,		{RA0, RB}},
+
+{"setb",	X(31,128),	XRB_MASK|(3<<16), POWER9, 0,		{RT, BFA}},
+
+{"wrtee",	X(31,131), XRARB_MASK, PPC403|BOOKE|PPCA2|PPC476, 0,	{RS}},
+
+{"dcbtstls",	X(31,134),	X_MASK, PPCCHLK|PPC476|TITAN, 0,	{CT, RA0, RB}},
+
+{"stvebx",	X(31,135),	X_MASK,	     PPCVEC,	0,		{VS, RA0, RB}},
+{"stbfcmx",	APU(31,135,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"subfe",	XO(31,136,0,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"sfe",		XO(31,136,0,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"subfe.",	XO(31,136,0,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"sfe.",	XO(31,136,0,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+
+{"adde",	XO(31,138,0,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"ae",		XO(31,138,0,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"adde.",	XO(31,138,0,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"ae.",		XO(31,138,0,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+
+{"stxsiwx",	X(31,140),	XX1_MASK,    PPCVSX2,	0,		{XS6, RA0, RB}},
+
+{"msgsndp",	XRTRA(31,142,0,0), XRTRA_MASK, POWER8,	0,		{RB}},
+{"dcbtstlse",	X(31,142),	X_MASK,	     PPCCHLK,	E500MC,		{CT, RA0, RB}},
+
+{"mtcr",	XFXM(31,144,0xff,0), XRARB_MASK, COM,	0,		{RS}},
+{"mtcrf",	XFXM(31,144,0,0), XFXFXM_MASK, COM,	0,		{FXM, RS}},
+{"mtocrf",	XFXM(31,144,0,1), XFXFXM_MASK, COM,	0,		{FXM, RS}},
+
+{"mtmsr",	X(31,146),	XRLARB_MASK, COM,	0,		{RS, A_L}},
+
+{"mtsle",	X(31,147),    XRTLRARB_MASK, POWER8,	0,		{L}},
+
+{"eratsx",	XRC(31,147,0),	X_MASK,	     PPCA2,	0,		{RT, RA0, RB}},
+{"eratsx.",	XRC(31,147,1),	X_MASK,	     PPCA2,	0,		{RT, RA0, RB}},
+
+{"stdx",	X(31,149),	X_MASK,	     PPC64,	0,		{RS, RA0, RB}},
+
+{"stwcx.",	XRC(31,150,1),	X_MASK,	     PPC,	0,		{RS, RA0, RB}},
+
+{"stwx",	X(31,151),	X_MASK,	     PPCCOM,	0,		{RS, RA0, RB}},
+{"stx",		X(31,151),	X_MASK,	     PWRCOM,	0,		{RS, RA, RB}},
+
+{"slq",		XRC(31,152,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"slq.",	XRC(31,152,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+
+{"sle",		XRC(31,153,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"sle.",	XRC(31,153,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+
+{"prtyw",	X(31,154),    XRB_MASK, POWER6|PPCA2|PPC476, 0,		{RA, RS}},
+
+{"stdepx",	X(31,157),	X_MASK,	  E500MC|PPCA2, 0,		{RS, RA0, RB}},
+
+{"stwepx",	X(31,159),	X_MASK,	  E500MC|PPCA2, 0,		{RS, RA0, RB}},
+
+{"wrteei",	X(31,163), XE_MASK, PPC403|BOOKE|PPCA2|PPC476, 0,	{E}},
+
+{"dcbtls",	X(31,166),	X_MASK,	 PPCCHLK|PPC476|TITAN, 0,	{CT, RA0, RB}},
+
+{"stvehx",	X(31,167),	X_MASK,	     PPCVEC,	0,		{VS, RA0, RB}},
+{"sthfcmx",	APU(31,167,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"addex",	ZRC(31,170,0),	Z2_MASK,     POWER9,	0,		{RT, RA, RB, CY}},
+
+{"msgclrp",	XRTRA(31,174,0,0), XRTRA_MASK, POWER8,	0,		{RB}},
+{"dcbtlse",	X(31,174),	X_MASK,	     PPCCHLK,	E500MC,		{CT, RA0, RB}},
+
+{"mtmsrd",	X(31,178),	XRLARB_MASK, PPC64,	0,		{RS, A_L}},
+
+{"mtvsrd",	X(31,179),	XX1RB_MASK,   PPCVSX2,	0,		{XT6, RA}},
+{"mtfprd",	X(31,179),	XX1RB_MASK|1, PPCVSX2,	0,		{FRT, RA}},
+{"mtvrd",	X(31,179)|1,	XX1RB_MASK|1, PPCVSX2,	0,		{VD, RA}},
+{"eratre",	X(31,179),	X_MASK,	     PPCA2,	0,		{RT, RA, WS}},
+
+{"stdux",	X(31,181),	X_MASK,	     PPC64,	0,		{RS, RAS, RB}},
+
+{"stqcx.",	XRC(31,182,1),	X_MASK,	     POWER8,	0,		{RSQ, RA0, RB}},
+{"wchkall",	X(31,182),	X_MASK,	     PPCA2,	0,		{OBF}},
+
+{"stwux",	X(31,183),	X_MASK,	     PPCCOM,	0,		{RS, RAS, RB}},
+{"stux",	X(31,183),	X_MASK,	     PWRCOM,	0,		{RS, RA0, RB}},
+
+{"sliq",	XRC(31,184,0),	X_MASK,	     M601,	0,		{RA, RS, SH}},
+{"sliq.",	XRC(31,184,1),	X_MASK,	     M601,	0,		{RA, RS, SH}},
+
+{"prtyd",	X(31,186),	XRB_MASK, POWER6|PPCA2,	0,		{RA, RS}},
+
+{"cmprb",	X(31,192),	XCMP_MASK,   POWER9,	0,		{BF, L, RA, RB}},
+
+{"icblq.",	XRC(31,198,1),	X_MASK,	     E6500,	0,		{CT, RA0, RB}},
+
+{"stvewx",	X(31,199),	X_MASK,	     PPCVEC,	0,		{VS, RA0, RB}},
+{"stwfcmx",	APU(31,199,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"subfze",	XO(31,200,0,0),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"sfze",	XO(31,200,0,0),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+{"subfze.",	XO(31,200,0,1),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"sfze.",	XO(31,200,0,1),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+
+{"addze",	XO(31,202,0,0),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"aze",		XO(31,202,0,0),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+{"addze.",	XO(31,202,0,1),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"aze.",	XO(31,202,0,1),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+
+{"msgsnd",	XRTRA(31,206,0,0), XRTRA_MASK, E500MC|PPCA2|POWER8, 0,	{RB}},
+
+{"mtsr",	X(31,210), XRB_MASK|(1<<20), COM,	NON32,		{SR, RS}},
+
+{"mtfprwa",	X(31,211),	XX1RB_MASK|1, PPCVSX2,	0,		{FRT, RA}},
+{"mtvrwa",	X(31,211)|1,	XX1RB_MASK|1, PPCVSX2,	0,		{VD, RA}},
+{"mtvsrwa",	X(31,211),	XX1RB_MASK,   PPCVSX2,	0,		{XT6, RA}},
+{"eratwe",	X(31,211),	X_MASK,	     PPCA2,	0,		{RS, RA, WS}},
+
+{"ldawx.",	XRC(31,212,1),	X_MASK,	     PPCA2,	0,		{RT, RA0, RB}},
+
+{"stdcx.",	XRC(31,214,1),	X_MASK,	     PPC64,	0,		{RS, RA0, RB}},
+
+{"stbx",	X(31,215),	X_MASK,	     COM,	0,		{RS, RA0, RB}},
+
+{"sllq",	XRC(31,216,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"sllq.",	XRC(31,216,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+
+{"sleq",	XRC(31,217,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"sleq.",	XRC(31,217,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+
+{"stbepx",	X(31,223),	X_MASK,	  E500MC|PPCA2, 0,		{RS, RA0, RB}},
+
+{"cmpeqb",	X(31,224),	XCMPL_MASK,  POWER9,	0,		{BF, RA, RB}},
+
+{"icblc",	X(31,230),	X_MASK,	PPCCHLK|PPC476|TITAN, 0,	{CT, RA0, RB}},
+
+{"stvx",	X(31,231),	X_MASK,	     PPCVEC,	0,		{VS, RA0, RB}},
+{"stqfcmx",	APU(31,231,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"subfme",	XO(31,232,0,0),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"sfme",	XO(31,232,0,0),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+{"subfme.",	XO(31,232,0,1),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"sfme.",	XO(31,232,0,1),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+
+{"mulld",	XO(31,233,0,0),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+{"mulld.",	XO(31,233,0,1),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+
+{"addme",	XO(31,234,0,0),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"ame",		XO(31,234,0,0),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+{"addme.",	XO(31,234,0,1),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"ame.",	XO(31,234,0,1),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+
+{"mullw",	XO(31,235,0,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"muls",	XO(31,235,0,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"mullw.",	XO(31,235,0,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"muls.",	XO(31,235,0,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+
+{"icblce",	X(31,238),	X_MASK,	     PPCCHLK,	E500MC|PPCA2,	{CT, RA, RB}},
+{"msgclr",	XRTRA(31,238,0,0), XRTRA_MASK, E500MC|PPCA2|POWER8, 0,	{RB}},
+{"mtsrin",	X(31,242),	XRA_MASK,    PPC,	NON32,		{RS, RB}},
+{"mtsri",	X(31,242),	XRA_MASK,    POWER,	NON32,		{RS, RB}},
+
+{"mtfprwz",	X(31,243),	XX1RB_MASK|1, PPCVSX2,	0,		{FRT, RA}},
+{"mtvrwz",	X(31,243)|1,	XX1RB_MASK|1, PPCVSX2,	0,		{VD, RA}},
+{"mtvsrwz",	X(31,243),	XX1RB_MASK,   PPCVSX2,	0,		{XT6, RA}},
+
+{"dcbtstt",	XRT(31,246,0x10), XRT_MASK,  POWER7,	0,		{RA0, RB}},
+{"dcbtst",	X(31,246),	X_MASK,	     POWER4,	DCBT_EO,	{RA0, RB, CT}},
+{"dcbtst",	X(31,246),	X_MASK,	     DCBT_EO,	0,		{CT, RA0, RB}},
+{"dcbtst",	X(31,246),	X_MASK,	     PPC,	POWER4|DCBT_EO,	{RA0, RB}},
+
+{"stbux",	X(31,247),	X_MASK,	     COM,	0,		{RS, RAS, RB}},
+
+{"slliq",	XRC(31,248,0),	X_MASK,	     M601,	0,		{RA, RS, SH}},
+{"slliq.",	XRC(31,248,1),	X_MASK,	     M601,	0,		{RA, RS, SH}},
+
+{"bpermd",	X(31,252),	X_MASK,	  POWER7|PPCA2,	0,		{RA, RS, RB}},
+
+{"dcbtstep",	XRT(31,255,0),	X_MASK,	  E500MC|PPCA2, 0,		{RT, RA0, RB}},
+
+{"mfdcrx",	X(31,259),	X_MASK, BOOKE|PPCA2|PPC476, TITAN,	{RS, RA}},
+{"mfdcrx.",	XRC(31,259,1),	X_MASK,	     PPCA2,	0,		{RS, RA}},
+
+{"lvexbx",	X(31,261),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
+
+{"icbt",	X(31,262),	XRT_MASK,    PPC403,	0,		{RA, RB}},
+
+{"lvepxl",	X(31,263),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
+
+{"ldfcmx",	APU(31,263,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+{"doz",		XO(31,264,0,0),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+{"doz.",	XO(31,264,0,1),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+
+{"modud",	X(31,265),	X_MASK,	     POWER9,	0,		{RT, RA, RB}},
+
+{"add",		XO(31,266,0,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"cax",		XO(31,266,0,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"add.",	XO(31,266,0,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"cax.",	XO(31,266,0,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+
+{"moduw",	X(31,267),	X_MASK,	     POWER9,	0,		{RT, RA, RB}},
+
+{"lxvx",	X(31,268),	XX1_MASK|1<<6, PPCVSX3,	0,		{XT6, RA0, RB}},
+{"lxvl",	X(31,269),	XX1_MASK,    PPCVSX3,	0,		{XT6, RA0, RB}},
+
+{"ehpriv",	X(31,270),	0xffffffff,  E500MC|PPCA2, 0,		{0}},
+
+{"tlbiel",	X(31,274),	X_MASK|1<<20,POWER9,	PPC476,		{RB, RSO, RIC, PRS, X_R}},
+{"tlbiel",	X(31,274),	XRTLRA_MASK, POWER4,	POWER9|PPC476,	{RB, LOPT}},
+
+{"mfapidi",	X(31,275),	X_MASK,	     BOOKE,	E500|TITAN,	{RT, RA}},
+
+{"lqarx",	X(31,276),	XEH_MASK,    POWER8,	0,		{RTQ, RAX, RBX, EH}},
+
+{"lscbx",	XRC(31,277,0),	X_MASK,	     M601,	0,		{RT, RA, RB}},
+{"lscbx.",	XRC(31,277,1),	X_MASK,	     M601,	0,		{RT, RA, RB}},
+
+{"dcbtt",	XRT(31,278,0x10), XRT_MASK,  POWER7,	0,		{RA0, RB}},
+{"dcbt",	X(31,278),	X_MASK,	     POWER4,	DCBT_EO,	{RA0, RB, CT}},
+{"dcbt",	X(31,278),	X_MASK,	     DCBT_EO,	0,		{CT, RA0, RB}},
+{"dcbt",	X(31,278),	X_MASK,	     PPC,	POWER4|DCBT_EO,	{RA0, RB}},
+
+{"lhzx",	X(31,279),	X_MASK,	     COM,	0,		{RT, RA0, RB}},
+
+{"cdtbcd",	X(31,282),	XRB_MASK,    POWER6,	0,		{RA, RS}},
+
+{"eqv",		XRC(31,284,0),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+{"eqv.",	XRC(31,284,1),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+
+{"lhepx",	X(31,287),	X_MASK,	  E500MC|PPCA2, 0,		{RT, RA0, RB}},
+
+{"mfdcrux",	X(31,291),	X_MASK,	     PPC464,	0,		{RS, RA}},
+
+{"lvexhx",	X(31,293),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
+{"lvepx",	X(31,295),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
+
+{"lxvll",	X(31,301),	XX1_MASK,    PPCVSX3,	0,		{XT6, RA0, RB}},
+
+{"mfbhrbe",	X(31,302),	X_MASK,	     POWER8,	0,		{RT, BHRBE}},
+
+{"tlbie",	X(31,306),	X_MASK|1<<20,POWER9,	TITAN,		{RB, RS, RIC, PRS, X_R}},
+{"tlbie",	X(31,306),	XRA_MASK,    POWER7,	POWER9|TITAN,	{RB, RS}},
+{"tlbie",	X(31,306),	XRTLRA_MASK, PPC,    E500|POWER7|TITAN,	{RB, LOPT}},
+{"tlbi",	X(31,306),	XRT_MASK,    POWER,	0,		{RA0, RB}},
+
+{"mfvsrld",	X(31,307),	XX1RB_MASK,  PPCVSX3,	0,		{RA, XS6}},
+
+{"ldmx",	X(31,309),	X_MASK,	     POWER9,	0,		{RT, RA0, RB}},
+
+{"eciwx",	X(31,310),	X_MASK,	     PPC,	E500|TITAN,	{RT, RA0, RB}},
+
+{"lhzux",	X(31,311),	X_MASK,	     COM,	0,		{RT, RAL, RB}},
+
+{"cbcdtd",	X(31,314),	XRB_MASK,    POWER6,	0,		{RA, RS}},
+
+{"xor",		XRC(31,316,0),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+{"xor.",	XRC(31,316,1),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+
+{"dcbtep",	XRT(31,319,0),	X_MASK,	  E500MC|PPCA2, 0,		{RT, RA0, RB}},
+
+{"mfexisr",	XSPR(31,323, 64), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfexier",	XSPR(31,323, 66), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbr0",	XSPR(31,323,128), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbr1",	XSPR(31,323,129), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbr2",	XSPR(31,323,130), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbr3",	XSPR(31,323,131), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbr4",	XSPR(31,323,132), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbr5",	XSPR(31,323,133), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbr6",	XSPR(31,323,134), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbr7",	XSPR(31,323,135), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbear",	XSPR(31,323,144), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbesr",	XSPR(31,323,145), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfiocr",	XSPR(31,323,160), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmacr0",	XSPR(31,323,192), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmact0",	XSPR(31,323,193), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmada0",	XSPR(31,323,194), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmasa0",	XSPR(31,323,195), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmacc0",	XSPR(31,323,196), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmacr1",	XSPR(31,323,200), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmact1",	XSPR(31,323,201), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmada1",	XSPR(31,323,202), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmasa1",	XSPR(31,323,203), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmacc1",	XSPR(31,323,204), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmacr2",	XSPR(31,323,208), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmact2",	XSPR(31,323,209), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmada2",	XSPR(31,323,210), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmasa2",	XSPR(31,323,211), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmacc2",	XSPR(31,323,212), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmacr3",	XSPR(31,323,216), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmact3",	XSPR(31,323,217), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmada3",	XSPR(31,323,218), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmasa3",	XSPR(31,323,219), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmacc3",	XSPR(31,323,220), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmasr",	XSPR(31,323,224), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdcr",	X(31,323), X_MASK, PPC403|BOOKE|PPCA2|PPC476, E500|TITAN, {RT, SPR}},
+{"mfdcr.",	XRC(31,323,1),	X_MASK,	     PPCA2,	0,		{RT, SPR}},
+
+{"lvexwx",	X(31,325),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
+
+{"dcread",	X(31,326),	X_MASK,	  PPC476|TITAN,	0,		{RT, RA0, RB}},
+
+{"div",		XO(31,331,0,0),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+{"div.",	XO(31,331,0,1),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+
+{"lxvdsx",	X(31,332),	XX1_MASK,    PPCVSX,	0,		{XT6, RA0, RB}},
+
+{"mfpmr",	X(31,334),	X_MASK, PPCPMR|PPCE300, 0,		{RT, PMR}},
+{"mftmr",	X(31,366),	X_MASK,	PPCTMR|E6500,	0,		{RT, TMR}},
+
+{"slbsync",	X(31,338),	0xffffffff,  POWER9,	0,		{0}},
+
+{"mfmq",	XSPR(31,339,  0), XSPR_MASK, M601,	0,		{RT}},
+{"mfxer",	XSPR(31,339,  1), XSPR_MASK, COM,	0,		{RT}},
+{"mfrtcu",	XSPR(31,339,  4), XSPR_MASK, COM,	TITAN,		{RT}},
+{"mfrtcl",	XSPR(31,339,  5), XSPR_MASK, COM,	TITAN,		{RT}},
+{"mfdec",	XSPR(31,339,  6), XSPR_MASK, MFDEC1,	0,		{RT}},
+{"mflr",	XSPR(31,339,  8), XSPR_MASK, COM,	0,		{RT}},
+{"mfctr",	XSPR(31,339,  9), XSPR_MASK, COM,	0,		{RT}},
+{"mfdscr",	XSPR(31,339, 17), XSPR_MASK, POWER6,	0,		{RT}},
+{"mftid",	XSPR(31,339, 17), XSPR_MASK, POWER,	0,		{RT}},
+{"mfdsisr",	XSPR(31,339, 18), XSPR_MASK, COM,	TITAN,		{RT}},
+{"mfdar",	XSPR(31,339, 19), XSPR_MASK, COM,	TITAN,		{RT}},
+{"mfdec",	XSPR(31,339, 22), XSPR_MASK, MFDEC2,	MFDEC1,		{RT}},
+{"mfsdr0",	XSPR(31,339, 24), XSPR_MASK, POWER,	0,		{RT}},
+{"mfsdr1",	XSPR(31,339, 25), XSPR_MASK, COM,	TITAN,		{RT}},
+{"mfsrr0",	XSPR(31,339, 26), XSPR_MASK, COM,	0,		{RT}},
+{"mfsrr1",	XSPR(31,339, 27), XSPR_MASK, COM,	0,		{RT}},
+{"mfcfar",	XSPR(31,339, 28), XSPR_MASK, POWER6,	0,		{RT}},
+{"mfpid",	XSPR(31,339, 48), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfcsrr0",	XSPR(31,339, 58), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfcsrr1",	XSPR(31,339, 59), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfdear",	XSPR(31,339, 61), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfesr",	XSPR(31,339, 62), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivpr",	XSPR(31,339, 63), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfctrl",	XSPR(31,339,136), XSPR_MASK, POWER4,	0,		{RT}},
+{"mfcmpa",	XSPR(31,339,144), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfcmpb",	XSPR(31,339,145), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfcmpc",	XSPR(31,339,146), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfcmpd",	XSPR(31,339,147), XSPR_MASK, PPC860,	0,		{RT}},
+{"mficr",	XSPR(31,339,148), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfder",	XSPR(31,339,149), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfcounta",	XSPR(31,339,150), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfcountb",	XSPR(31,339,151), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfcmpe",	XSPR(31,339,152), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfcmpf",	XSPR(31,339,153), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfcmpg",	XSPR(31,339,154), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfcmph",	XSPR(31,339,155), XSPR_MASK, PPC860,	0,		{RT}},
+{"mflctrl1",	XSPR(31,339,156), XSPR_MASK, PPC860,	0,		{RT}},
+{"mflctrl2",	XSPR(31,339,157), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfictrl",	XSPR(31,339,158), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfbar",	XSPR(31,339,159), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfvrsave",	XSPR(31,339,256), XSPR_MASK, PPCVEC,	0,		{RT}},
+{"mfusprg0",	XSPR(31,339,256), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfsprg",	XSPR(31,339,256), XSPRG_MASK, PPC,	0,		{RT, SPRG}},
+{"mfsprg4",	XSPR(31,339,260), XSPR_MASK, PPC405|BOOKE, 0,		{RT}},
+{"mfsprg5",	XSPR(31,339,261), XSPR_MASK, PPC405|BOOKE, 0,		{RT}},
+{"mfsprg6",	XSPR(31,339,262), XSPR_MASK, PPC405|BOOKE, 0,		{RT}},
+{"mfsprg7",	XSPR(31,339,263), XSPR_MASK, PPC405|BOOKE, 0,		{RT}},
+{"mftbu",	XSPR(31,339,269), XSPR_MASK, POWER4|BOOKE, 0,		{RT}},
+{"mftb",	X(31,339),	  X_MASK,    POWER4|BOOKE, 0,		{RT, TBR}},
+{"mftbl",	XSPR(31,339,268), XSPR_MASK, POWER4|BOOKE, 0,		{RT}},
+{"mfsprg0",	XSPR(31,339,272), XSPR_MASK, PPC,	0,		{RT}},
+{"mfsprg1",	XSPR(31,339,273), XSPR_MASK, PPC,	0,		{RT}},
+{"mfsprg2",	XSPR(31,339,274), XSPR_MASK, PPC,	0,		{RT}},
+{"mfsprg3",	XSPR(31,339,275), XSPR_MASK, PPC,	0,		{RT}},
+{"mfasr",	XSPR(31,339,280), XSPR_MASK, PPC64,	0,		{RT}},
+{"mfear",	XSPR(31,339,282), XSPR_MASK, PPC,	TITAN,		{RT}},
+{"mfpir",	XSPR(31,339,286), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfpvr",	XSPR(31,339,287), XSPR_MASK, PPC,	0,		{RT}},
+{"mfdbsr",	XSPR(31,339,304), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfdbcr0",	XSPR(31,339,308), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfdbcr1",	XSPR(31,339,309), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfdbcr2",	XSPR(31,339,310), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfiac1",	XSPR(31,339,312), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfiac2",	XSPR(31,339,313), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfiac3",	XSPR(31,339,314), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfiac4",	XSPR(31,339,315), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfdac1",	XSPR(31,339,316), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfdac2",	XSPR(31,339,317), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfdvc1",	XSPR(31,339,318), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfdvc2",	XSPR(31,339,319), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mftsr",	XSPR(31,339,336), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mftcr",	XSPR(31,339,340), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor0",	XSPR(31,339,400), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor1",	XSPR(31,339,401), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor2",	XSPR(31,339,402), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor3",	XSPR(31,339,403), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor4",	XSPR(31,339,404), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor5",	XSPR(31,339,405), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor6",	XSPR(31,339,406), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor7",	XSPR(31,339,407), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor8",	XSPR(31,339,408), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor9",	XSPR(31,339,409), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor10",	XSPR(31,339,410), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor11",	XSPR(31,339,411), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor12",	XSPR(31,339,412), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor13",	XSPR(31,339,413), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor14",	XSPR(31,339,414), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor15",	XSPR(31,339,415), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfspefscr",	XSPR(31,339,512), XSPR_MASK, PPCSPE,	0,		{RT}},
+{"mfbbear",	XSPR(31,339,513), XSPR_MASK, PPCBRLK,	0,		{RT}},
+{"mfbbtar",	XSPR(31,339,514), XSPR_MASK, PPCBRLK,	0,		{RT}},
+{"mfivor32",	XSPR(31,339,528), XSPR_MASK, PPCSPE,	0,		{RT}},
+{"mfibatu",	XSPR(31,339,528), XSPRBAT_MASK, PPC,	TITAN,		{RT, SPRBAT}},
+{"mfivor33",	XSPR(31,339,529), XSPR_MASK, PPCSPE,	0,		{RT}},
+{"mfibatl",	XSPR(31,339,529), XSPRBAT_MASK, PPC,	TITAN,		{RT, SPRBAT}},
+{"mfivor34",	XSPR(31,339,530), XSPR_MASK, PPCSPE,	0,		{RT}},
+{"mfivor35",	XSPR(31,339,531), XSPR_MASK, PPCPMR,	0,		{RT}},
+{"mfdbatu",	XSPR(31,339,536), XSPRBAT_MASK, PPC,	TITAN,		{RT, SPRBAT}},
+{"mfdbatl",	XSPR(31,339,537), XSPRBAT_MASK, PPC,	TITAN,		{RT, SPRBAT}},
+{"mfic_cst",	XSPR(31,339,560), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfic_adr",	XSPR(31,339,561), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfic_dat",	XSPR(31,339,562), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfdc_cst",	XSPR(31,339,568), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfdc_adr",	XSPR(31,339,569), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfdc_dat",	XSPR(31,339,570), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmcsrr0",	XSPR(31,339,570), XSPR_MASK, PPCRFMCI,	0,		{RT}},
+{"mfmcsrr1",	XSPR(31,339,571), XSPR_MASK, PPCRFMCI,	0,		{RT}},
+{"mfmcsr",	XSPR(31,339,572), XSPR_MASK, PPCRFMCI,	0,		{RT}},
+{"mfmcar",	XSPR(31,339,573), XSPR_MASK, PPCRFMCI,	TITAN,		{RT}},
+{"mfdpdr",	XSPR(31,339,630), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfdpir",	XSPR(31,339,631), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfimmr",	XSPR(31,339,638), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmi_ctr",	XSPR(31,339,784), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmi_ap",	XSPR(31,339,786), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmi_epn",	XSPR(31,339,787), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmi_twc",	XSPR(31,339,789), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmi_rpn",	XSPR(31,339,790), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmd_ctr",	XSPR(31,339,792), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfm_casid",	XSPR(31,339,793), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmd_ap",	XSPR(31,339,794), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmd_epn",	XSPR(31,339,795), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmd_twb",	XSPR(31,339,796), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmd_twc",	XSPR(31,339,797), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmd_rpn",	XSPR(31,339,798), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfm_tw",	XSPR(31,339,799), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmi_dbcam",	XSPR(31,339,816), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmi_dbram0",	XSPR(31,339,817), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmi_dbram1",	XSPR(31,339,818), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmd_dbcam",	XSPR(31,339,824), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmd_dbram0",	XSPR(31,339,825), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmd_dbram1",	XSPR(31,339,826), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfivndx",	XSPR(31,339,880), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfdvndx",	XSPR(31,339,881), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfivlim",	XSPR(31,339,882), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfdvlim",	XSPR(31,339,883), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfclcsr",	XSPR(31,339,884), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfccr1",	XSPR(31,339,888), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfppr",	XSPR(31,339,896), XSPR_MASK, POWER7,	0,		{RT}},
+{"mfppr32",	XSPR(31,339,898), XSPR_MASK, POWER7,	0,		{RT}},
+{"mfrstcfg",	XSPR(31,339,923), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfdcdbtrl",	XSPR(31,339,924), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfdcdbtrh",	XSPR(31,339,925), XSPR_MASK, TITAN,	0,		{RT}},
+{"mficdbtr",	XSPR(31,339,927), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfummcr0",	XSPR(31,339,936), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfupmc1",	XSPR(31,339,937), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfupmc2",	XSPR(31,339,938), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfusia",	XSPR(31,339,939), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfummcr1",	XSPR(31,339,940), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfupmc3",	XSPR(31,339,941), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfupmc4",	XSPR(31,339,942), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfzpr",	XSPR(31,339,944), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfpid",	XSPR(31,339,945), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfmmucr",	XSPR(31,339,946), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfccr0",	XSPR(31,339,947), XSPR_MASK, PPC405|TITAN, 0,		{RT}},
+{"mfiac3",	XSPR(31,339,948), XSPR_MASK, PPC405,	0,		{RT}},
+{"mfiac4",	XSPR(31,339,949), XSPR_MASK, PPC405,	0,		{RT}},
+{"mfdvc1",	XSPR(31,339,950), XSPR_MASK, PPC405,	0,		{RT}},
+{"mfdvc2",	XSPR(31,339,951), XSPR_MASK, PPC405,	0,		{RT}},
+{"mfmmcr0",	XSPR(31,339,952), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfpmc1",	XSPR(31,339,953), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfsgr",	XSPR(31,339,953), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdcwr",	XSPR(31,339,954), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfpmc2",	XSPR(31,339,954), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfsia",	XSPR(31,339,955), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfsler",	XSPR(31,339,955), XSPR_MASK, PPC405,	0,		{RT}},
+{"mfmmcr1",	XSPR(31,339,956), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfsu0r",	XSPR(31,339,956), XSPR_MASK, PPC405,	0,		{RT}},
+{"mfdbcr1",	XSPR(31,339,957), XSPR_MASK, PPC405,	0,		{RT}},
+{"mfpmc3",	XSPR(31,339,957), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfpmc4",	XSPR(31,339,958), XSPR_MASK, PPC750,	0,		{RT}},
+{"mficdbdr",	XSPR(31,339,979), XSPR_MASK, PPC403|TITAN, 0,		{RT}},
+{"mfesr",	XSPR(31,339,980), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdear",	XSPR(31,339,981), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfevpr",	XSPR(31,339,982), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfcdbcr",	XSPR(31,339,983), XSPR_MASK, PPC403,	0,		{RT}},
+{"mftsr",	XSPR(31,339,984), XSPR_MASK, PPC403,	0,		{RT}},
+{"mftcr",	XSPR(31,339,986), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfpit",	XSPR(31,339,987), XSPR_MASK, PPC403,	0,		{RT}},
+{"mftbhi",	XSPR(31,339,988), XSPR_MASK, PPC403,	0,		{RT}},
+{"mftblo",	XSPR(31,339,989), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfsrr2",	XSPR(31,339,990), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfsrr3",	XSPR(31,339,991), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdbsr",	XSPR(31,339,1008), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdbcr0",	XSPR(31,339,1010), XSPR_MASK, PPC405,	0,		{RT}},
+{"mfdbdr",	XSPR(31,339,1011), XSPR_MASK, TITAN,	0,		{RS}},
+{"mfiac1",	XSPR(31,339,1012), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfiac2",	XSPR(31,339,1013), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdac1",	XSPR(31,339,1014), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdac2",	XSPR(31,339,1015), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfl2cr",	XSPR(31,339,1017), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfdccr",	XSPR(31,339,1018), XSPR_MASK, PPC403,	0,		{RT}},
+{"mficcr",	XSPR(31,339,1019), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfictc",	XSPR(31,339,1019), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfpbl1",	XSPR(31,339,1020), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfthrm1",	XSPR(31,339,1020), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfpbu1",	XSPR(31,339,1021), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfthrm2",	XSPR(31,339,1021), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfpbl2",	XSPR(31,339,1022), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfthrm3",	XSPR(31,339,1022), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfpbu2",	XSPR(31,339,1023), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfspr",	X(31,339),	X_MASK,	     COM,	0,		{RT, SPR}},
+
+{"lwax",	X(31,341),	X_MASK,	     PPC64,	0,		{RT, RA0, RB}},
+
+{"dst",		XDSS(31,342,0),	XDSS_MASK,   PPCVEC,	0,		{RA, RB, STRM}},
+
+{"lhax",	X(31,343),	X_MASK,	     COM,	0,		{RT, RA0, RB}},
+
+{"lvxl",	X(31,359),	X_MASK,	     PPCVEC,	0,		{VD, RA0, RB}},
+
+{"abs",		XO(31,360,0,0),	XORB_MASK,   M601,	0,		{RT, RA}},
+{"abs.",	XO(31,360,0,1),	XORB_MASK,   M601,	0,		{RT, RA}},
+
+{"divs",	XO(31,363,0,0),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+{"divs.",	XO(31,363,0,1),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+
+{"lxvwsx",	X(31,364),	XX1_MASK,    PPCVSX3,	0,		{XT6, RA0, RB}},
+
+{"tlbia",	X(31,370),	0xffffffff,  PPC,	E500|TITAN,	{0}},
+
+{"mftbu",	XSPR(31,371,269), XSPR_MASK, PPC,	NO371|POWER4,	{RT}},
+{"mftb",	X(31,371),	X_MASK,	     PPC,	NO371|POWER4,	{RT, TBR}},
+{"mftbl",	XSPR(31,371,268), XSPR_MASK, PPC,	NO371|POWER4,	{RT}},
+
+{"lwaux",	X(31,373),	X_MASK,	     PPC64,	0,		{RT, RAL, RB}},
+
+{"dstst",	XDSS(31,374,0),	XDSS_MASK,   PPCVEC,	0,		{RA, RB, STRM}},
+
+{"lhaux",	X(31,375),	X_MASK,	     COM,	0,		{RT, RAL, RB}},
+
+{"popcntw",	X(31,378),	XRB_MASK,    POWER7|PPCA2, 0,		{RA, RS}},
+
+{"mtdcrx",	X(31,387),	X_MASK,	     BOOKE|PPCA2|PPC476, TITAN,	{RA, RS}},
+{"mtdcrx.",	XRC(31,387,1),	X_MASK,	     PPCA2,	0,		{RA, RS}},
+
+{"stvexbx",	X(31,389),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
+
+{"dcblc",	X(31,390),	X_MASK,	 PPCCHLK|PPC476|TITAN, 0,	{CT, RA0, RB}},
+{"stdfcmx",	APU(31,391,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"divdeu",	XO(31,393,0,0),	XO_MASK,     POWER7|PPCA2, 0,		{RT, RA, RB}},
+{"divdeu.",	XO(31,393,0,1),	XO_MASK,     POWER7|PPCA2, 0,		{RT, RA, RB}},
+{"divweu",	XO(31,395,0,0),	XO_MASK,     POWER7|PPCA2, 0,		{RT, RA, RB}},
+{"divweu.",	XO(31,395,0,1),	XO_MASK,     POWER7|PPCA2, 0,		{RT, RA, RB}},
+
+{"stxvx",	X(31,396),	XX1_MASK,    PPCVSX3,	0,		{XS6, RA0, RB}},
+{"stxvl",	X(31,397),	XX1_MASK,    PPCVSX3,	0,		{XS6, RA0, RB}},
+
+{"dcblce",	X(31,398),	X_MASK,	     PPCCHLK,	E500MC,		{CT, RA, RB}},
+
+{"slbmte",	X(31,402),	XRA_MASK,    PPC64,	0,		{RS, RB}},
+
+{"mtvsrws",	X(31,403),	XX1RB_MASK,  PPCVSX3,	0,		{XT6, RA}},
+
+{"pbt.",	XRC(31,404,1),	X_MASK,	     POWER8,	0,		{RS, RA0, RB}},
+
+{"icswx",	XRC(31,406,0),	X_MASK,	  POWER7|PPCA2,	0,		{RS, RA, RB}},
+{"icswx.",	XRC(31,406,1),	X_MASK,	  POWER7|PPCA2,	0,		{RS, RA, RB}},
+
+{"sthx",	X(31,407),	X_MASK,	     COM,	0,		{RS, RA0, RB}},
+
+{"orc",		XRC(31,412,0),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+{"orc.",	XRC(31,412,1),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+
+{"sthepx",	X(31,415),	X_MASK,	  E500MC|PPCA2, 0,		{RS, RA0, RB}},
+
+{"mtdcrux",	X(31,419),	X_MASK,	     PPC464,	0,		{RA, RS}},
+
+{"stvexhx",	X(31,421),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
+
+{"dcblq.",	XRC(31,422,1),	X_MASK,	     E6500,	0,		{CT, RA0, RB}},
+
+{"divde",	XO(31,425,0,0),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+{"divde.",	XO(31,425,0,1),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+{"divwe",	XO(31,427,0,0),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+{"divwe.",	XO(31,427,0,1),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+
+{"stxvll",	X(31,429),	XX1_MASK,    PPCVSX3,	0,		{XS6, RA0, RB}},
+
+{"clrbhrb",	X(31,430),	0xffffffff,  POWER8,	0,		{0}},
+
+{"slbie",	X(31,434),	XRTRA_MASK,  PPC64,	0,		{RB}},
+
+{"mtvsrdd",	X(31,435),	XX1_MASK,    PPCVSX3,	0,		{XT6, RA0, RB}},
+
+{"ecowx",	X(31,438),	X_MASK,	     PPC,	E500|TITAN,	{RT, RA0, RB}},
+
+{"sthux",	X(31,439),	X_MASK,	     COM,	0,		{RS, RAS, RB}},
+
+{"mdors",	0x7f9ce378,	0xffffffff,  E500MC,	0,		{0}},
+
+{"miso",	0x7f5ad378,	0xffffffff,  E6500,	0,		{0}},
+
+/* The "yield", "mdoio" and "mdoom" instructions are extended mnemonics for
+   "or rX,rX,rX", with rX being r27, r29 and r30 respectively.	*/
+{"yield",	0x7f7bdb78,	0xffffffff,  POWER7,	0,		{0}},
+{"mdoio",	0x7fbdeb78,	0xffffffff,  POWER7,	0,		{0}},
+{"mdoom",	0x7fdef378,	0xffffffff,  POWER7,	0,		{0}},
+{"mr",		XRC(31,444,0),	X_MASK,	     COM,	0,		{RA, RS, RBS}},
+{"or",		XRC(31,444,0),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+{"mr.",		XRC(31,444,1),	X_MASK,	     COM,	0,		{RA, RS, RBS}},
+{"or.",		XRC(31,444,1),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+
+{"mtexisr",	XSPR(31,451, 64), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtexier",	XSPR(31,451, 66), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbr0",	XSPR(31,451,128), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbr1",	XSPR(31,451,129), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbr2",	XSPR(31,451,130), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbr3",	XSPR(31,451,131), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbr4",	XSPR(31,451,132), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbr5",	XSPR(31,451,133), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbr6",	XSPR(31,451,134), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbr7",	XSPR(31,451,135), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbear",	XSPR(31,451,144), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbesr",	XSPR(31,451,145), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtiocr",	XSPR(31,451,160), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmacr0",	XSPR(31,451,192), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmact0",	XSPR(31,451,193), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmada0",	XSPR(31,451,194), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmasa0",	XSPR(31,451,195), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmacc0",	XSPR(31,451,196), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmacr1",	XSPR(31,451,200), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmact1",	XSPR(31,451,201), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmada1",	XSPR(31,451,202), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmasa1",	XSPR(31,451,203), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmacc1",	XSPR(31,451,204), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmacr2",	XSPR(31,451,208), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmact2",	XSPR(31,451,209), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmada2",	XSPR(31,451,210), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmasa2",	XSPR(31,451,211), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmacc2",	XSPR(31,451,212), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmacr3",	XSPR(31,451,216), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmact3",	XSPR(31,451,217), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmada3",	XSPR(31,451,218), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmasa3",	XSPR(31,451,219), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmacc3",	XSPR(31,451,220), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmasr",	XSPR(31,451,224), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdcr",	X(31,451), X_MASK, PPC403|BOOKE|PPCA2|PPC476, E500|TITAN, {SPR, RS}},
+{"mtdcr.",	XRC(31,451,1), X_MASK,	     PPCA2,	0,		{SPR, RS}},
+
+{"stvexwx",	X(31,453),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
+
+{"dccci",	X(31,454), XRT_MASK, PPC403|PPC440|TITAN|PPCA2, 0,	{RAOPT, RBOPT}},
+{"dci",		X(31,454),	XRARB_MASK, PPCA2|PPC476, 0,		{CT}},
+
+{"divdu",	XO(31,457,0,0),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+{"divdu.",	XO(31,457,0,1),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+
+{"divwu",	XO(31,459,0,0),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"divwu.",	XO(31,459,0,1),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+
+{"mtpmr",	X(31,462),	X_MASK, PPCPMR|PPCE300, 0,		{PMR, RS}},
+{"mttmr",	X(31,494),	X_MASK,	PPCTMR|E6500,	0,		{TMR, RS}},
+
+{"slbieg",	X(31,466),	XRA_MASK,    POWER9,	0,		{RS, RB}},
+
+{"mtmq",	XSPR(31,467,  0), XSPR_MASK, M601,	0,		{RS}},
+{"mtxer",	XSPR(31,467,  1), XSPR_MASK, COM,	0,		{RS}},
+{"mtlr",	XSPR(31,467,  8), XSPR_MASK, COM,	0,		{RS}},
+{"mtctr",	XSPR(31,467,  9), XSPR_MASK, COM,	0,		{RS}},
+{"mtdscr",	XSPR(31,467, 17), XSPR_MASK, POWER6,	0,		{RS}},
+{"mttid",	XSPR(31,467, 17), XSPR_MASK, POWER,	0,		{RS}},
+{"mtdsisr",	XSPR(31,467, 18), XSPR_MASK, COM,	TITAN,		{RS}},
+{"mtdar",	XSPR(31,467, 19), XSPR_MASK, COM,	TITAN,		{RS}},
+{"mtrtcu",	XSPR(31,467, 20), XSPR_MASK, COM,	TITAN,		{RS}},
+{"mtrtcl",	XSPR(31,467, 21), XSPR_MASK, COM,	TITAN,		{RS}},
+{"mtdec",	XSPR(31,467, 22), XSPR_MASK, COM,	0,		{RS}},
+{"mtsdr0",	XSPR(31,467, 24), XSPR_MASK, POWER,	0,		{RS}},
+{"mtsdr1",	XSPR(31,467, 25), XSPR_MASK, COM,	TITAN,		{RS}},
+{"mtsrr0",	XSPR(31,467, 26), XSPR_MASK, COM,	0,		{RS}},
+{"mtsrr1",	XSPR(31,467, 27), XSPR_MASK, COM,	0,		{RS}},
+{"mtcfar",	XSPR(31,467, 28), XSPR_MASK, POWER6,	0,		{RS}},
+{"mtpid",	XSPR(31,467, 48), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtdecar",	XSPR(31,467, 54), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtcsrr0",	XSPR(31,467, 58), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtcsrr1",	XSPR(31,467, 59), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtdear",	XSPR(31,467, 61), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtesr",	XSPR(31,467, 62), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivpr",	XSPR(31,467, 63), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtcmpa",	XSPR(31,467,144), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtcmpb",	XSPR(31,467,145), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtcmpc",	XSPR(31,467,146), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtcmpd",	XSPR(31,467,147), XSPR_MASK, PPC860,	0,		{RS}},
+{"mticr",	XSPR(31,467,148), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtder",	XSPR(31,467,149), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtcounta",	XSPR(31,467,150), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtcountb",	XSPR(31,467,151), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtctrl",	XSPR(31,467,152), XSPR_MASK, POWER4,	0,		{RS}},
+{"mtcmpe",	XSPR(31,467,152), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtcmpf",	XSPR(31,467,153), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtcmpg",	XSPR(31,467,154), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtcmph",	XSPR(31,467,155), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtlctrl1",	XSPR(31,467,156), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtlctrl2",	XSPR(31,467,157), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtictrl",	XSPR(31,467,158), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtbar",	XSPR(31,467,159), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtvrsave",	XSPR(31,467,256), XSPR_MASK, PPCVEC,	0,		{RS}},
+{"mtusprg0",	XSPR(31,467,256), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtsprg",	XSPR(31,467,256), XSPRG_MASK, PPC,	0,		{SPRG, RS}},
+{"mtsprg0",	XSPR(31,467,272), XSPR_MASK, PPC,	0,		{RS}},
+{"mtsprg1",	XSPR(31,467,273), XSPR_MASK, PPC,	0,		{RS}},
+{"mtsprg2",	XSPR(31,467,274), XSPR_MASK, PPC,	0,		{RS}},
+{"mtsprg3",	XSPR(31,467,275), XSPR_MASK, PPC,	0,		{RS}},
+{"mtsprg4",	XSPR(31,467,276), XSPR_MASK, PPC405|BOOKE, 0,		{RS}},
+{"mtsprg5",	XSPR(31,467,277), XSPR_MASK, PPC405|BOOKE, 0,		{RS}},
+{"mtsprg6",	XSPR(31,467,278), XSPR_MASK, PPC405|BOOKE, 0,		{RS}},
+{"mtsprg7",	XSPR(31,467,279), XSPR_MASK, PPC405|BOOKE, 0,		{RS}},
+{"mtasr",	XSPR(31,467,280), XSPR_MASK, PPC64,	0,		{RS}},
+{"mtear",	XSPR(31,467,282), XSPR_MASK, PPC,	TITAN,		{RS}},
+{"mttbl",	XSPR(31,467,284), XSPR_MASK, PPC,	0,		{RS}},
+{"mttbu",	XSPR(31,467,285), XSPR_MASK, PPC,	0,		{RS}},
+{"mtdbsr",	XSPR(31,467,304), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtdbcr0",	XSPR(31,467,308), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtdbcr1",	XSPR(31,467,309), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtdbcr2",	XSPR(31,467,310), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtiac1",	XSPR(31,467,312), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtiac2",	XSPR(31,467,313), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtiac3",	XSPR(31,467,314), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtiac4",	XSPR(31,467,315), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtdac1",	XSPR(31,467,316), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtdac2",	XSPR(31,467,317), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtdvc1",	XSPR(31,467,318), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtdvc2",	XSPR(31,467,319), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mttsr",	XSPR(31,467,336), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mttcr",	XSPR(31,467,340), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor0",	XSPR(31,467,400), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor1",	XSPR(31,467,401), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor2",	XSPR(31,467,402), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor3",	XSPR(31,467,403), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor4",	XSPR(31,467,404), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor5",	XSPR(31,467,405), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor6",	XSPR(31,467,406), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor7",	XSPR(31,467,407), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor8",	XSPR(31,467,408), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor9",	XSPR(31,467,409), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor10",	XSPR(31,467,410), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor11",	XSPR(31,467,411), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor12",	XSPR(31,467,412), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor13",	XSPR(31,467,413), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor14",	XSPR(31,467,414), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor15",	XSPR(31,467,415), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtspefscr",	XSPR(31,467,512), XSPR_MASK, PPCSPE,	0,		{RS}},
+{"mtbbear",	XSPR(31,467,513), XSPR_MASK, PPCBRLK,	0,		{RS}},
+{"mtbbtar",	XSPR(31,467,514), XSPR_MASK, PPCBRLK,	0,		{RS}},
+{"mtivor32",	XSPR(31,467,528), XSPR_MASK, PPCSPE,	0,		{RS}},
+{"mtibatu",	XSPR(31,467,528), XSPRBAT_MASK, PPC,	TITAN,		{SPRBAT, RS}},
+{"mtivor33",	XSPR(31,467,529), XSPR_MASK, PPCSPE,	0,		{RS}},
+{"mtibatl",	XSPR(31,467,529), XSPRBAT_MASK, PPC,	TITAN,		{SPRBAT, RS}},
+{"mtivor34",	XSPR(31,467,530), XSPR_MASK, PPCSPE,	0,		{RS}},
+{"mtivor35",	XSPR(31,467,531), XSPR_MASK, PPCPMR,	0,		{RS}},
+{"mtdbatu",	XSPR(31,467,536), XSPRBAT_MASK, PPC,	TITAN,		{SPRBAT, RS}},
+{"mtdbatl",	XSPR(31,467,537), XSPRBAT_MASK, PPC,	TITAN,		{SPRBAT, RS}},
+{"mtmcsrr0",	XSPR(31,467,570), XSPR_MASK, PPCRFMCI,	0,		{RS}},
+{"mtmcsrr1",	XSPR(31,467,571), XSPR_MASK, PPCRFMCI,	0,		{RS}},
+{"mtmcsr",	XSPR(31,467,572), XSPR_MASK, PPCRFMCI,	0,		{RS}},
+{"mtivndx",	XSPR(31,467,880), XSPR_MASK, TITAN,	0,		{RS}},
+{"mtdvndx",	XSPR(31,467,881), XSPR_MASK, TITAN,	0,		{RS}},
+{"mtivlim",	XSPR(31,467,882), XSPR_MASK, TITAN,	0,		{RS}},
+{"mtdvlim",	XSPR(31,467,883), XSPR_MASK, TITAN,	0,		{RS}},
+{"mtclcsr",	XSPR(31,467,884), XSPR_MASK, TITAN,	0,		{RS}},
+{"mtccr1",	XSPR(31,467,888), XSPR_MASK, TITAN,	0,		{RS}},
+{"mtppr",	XSPR(31,467,896), XSPR_MASK, POWER7,	0,		{RS}},
+{"mtppr32",	XSPR(31,467,898), XSPR_MASK, POWER7,	0,		{RS}},
+{"mtummcr0",	XSPR(31,467,936), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtupmc1",	XSPR(31,467,937), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtupmc2",	XSPR(31,467,938), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtusia",	XSPR(31,467,939), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtummcr1",	XSPR(31,467,940), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtupmc3",	XSPR(31,467,941), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtupmc4",	XSPR(31,467,942), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtzpr",	XSPR(31,467,944), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtpid",	XSPR(31,467,945), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtrmmucr",	XSPR(31,467,946), XSPR_MASK, TITAN,	0,		{RS}},
+{"mtccr0",	XSPR(31,467,947), XSPR_MASK, PPC405|TITAN, 0,		{RS}},
+{"mtiac3",	XSPR(31,467,948), XSPR_MASK, PPC405,	0,		{RS}},
+{"mtiac4",	XSPR(31,467,949), XSPR_MASK, PPC405,	0,		{RS}},
+{"mtdvc1",	XSPR(31,467,950), XSPR_MASK, PPC405,	0,		{RS}},
+{"mtdvc2",	XSPR(31,467,951), XSPR_MASK, PPC405,	0,		{RS}},
+{"mtmmcr0",	XSPR(31,467,952), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtpmc1",	XSPR(31,467,953), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtsgr",	XSPR(31,467,953), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdcwr",	XSPR(31,467,954), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtpmc2",	XSPR(31,467,954), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtsia",	XSPR(31,467,955), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtsler",	XSPR(31,467,955), XSPR_MASK, PPC405,	0,		{RS}},
+{"mtmmcr1",	XSPR(31,467,956), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtsu0r",	XSPR(31,467,956), XSPR_MASK, PPC405,	0,		{RS}},
+{"mtdbcr1",	XSPR(31,467,957), XSPR_MASK, PPC405,	0,		{RS}},
+{"mtpmc3",	XSPR(31,467,957), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtpmc4",	XSPR(31,467,958), XSPR_MASK, PPC750,	0,		{RS}},
+{"mticdbdr",	XSPR(31,467,979), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtesr",	XSPR(31,467,980), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdear",	XSPR(31,467,981), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtevpr",	XSPR(31,467,982), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtcdbcr",	XSPR(31,467,983), XSPR_MASK, PPC403,	0,		{RS}},
+{"mttsr",	XSPR(31,467,984), XSPR_MASK, PPC403,	0,		{RS}},
+{"mttcr",	XSPR(31,467,986), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtpit",	XSPR(31,467,987), XSPR_MASK, PPC403,	0,		{RS}},
+{"mttbhi",	XSPR(31,467,988), XSPR_MASK, PPC403,	0,		{RS}},
+{"mttblo",	XSPR(31,467,989), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtsrr2",	XSPR(31,467,990), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtsrr3",	XSPR(31,467,991), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdbsr",	XSPR(31,467,1008), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdbdr",	XSPR(31,467,1011), XSPR_MASK, TITAN,	0,		{RS}},
+{"mtdbcr0",	XSPR(31,467,1010), XSPR_MASK, PPC405,	0,		{RS}},
+{"mtiac1",	XSPR(31,467,1012), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtiac2",	XSPR(31,467,1013), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdac1",	XSPR(31,467,1014), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdac2",	XSPR(31,467,1015), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtl2cr",	XSPR(31,467,1017), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtdccr",	XSPR(31,467,1018), XSPR_MASK, PPC403,	0,		{RS}},
+{"mticcr",	XSPR(31,467,1019), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtictc",	XSPR(31,467,1019), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtpbl1",	XSPR(31,467,1020), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtthrm1",	XSPR(31,467,1020), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtpbu1",	XSPR(31,467,1021), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtthrm2",	XSPR(31,467,1021), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtpbl2",	XSPR(31,467,1022), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtthrm3",	XSPR(31,467,1022), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtpbu2",	XSPR(31,467,1023), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtspr",	X(31,467),	X_MASK,	     COM,	0,		{SPR, RS}},
+
+{"dcbi",	X(31,470),	XRT_MASK,    PPC,	0,		{RA0, RB}},
+
+{"nand",	XRC(31,476,0),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+{"nand.",	XRC(31,476,1),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+
+{"dsn",		X(31,483),	XRT_MASK,    E500MC,	0,		{RA, RB}},
+
+{"dcread",	X(31,486),	X_MASK,	 PPC403|PPC440, PPCA2|PPC476,	{RT, RA0, RB}},
+
+{"icbtls",	X(31,486),	X_MASK,	 PPCCHLK|PPC476|TITAN, 0,	{CT, RA0, RB}},
+
+{"stvxl",	X(31,487),	X_MASK,	     PPCVEC,	0,		{VS, RA0, RB}},
+
+{"nabs",	XO(31,488,0,0),	XORB_MASK,   M601,	0,		{RT, RA}},
+{"nabs.",	XO(31,488,0,1),	XORB_MASK,   M601,	0,		{RT, RA}},
+
+{"divd",	XO(31,489,0,0),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+{"divd.",	XO(31,489,0,1),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+
+{"divw",	XO(31,491,0,0),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"divw.",	XO(31,491,0,1),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+
+{"icbtlse",	X(31,494),	X_MASK,	     PPCCHLK,	E500MC,		{CT, RA, RB}},
+
+{"slbia",	X(31,498),	0xff1fffff,  POWER6,	0,		{IH}},
+{"slbia",	X(31,498),	0xffffffff,  PPC64,	POWER6,		{0}},
+
+{"cli",		X(31,502),	XRB_MASK,    POWER,	0,		{RT, RA}},
+
+{"popcntd",	X(31,506),	XRB_MASK, POWER7|PPCA2,	0,		{RA, RS}},
+
+{"cmpb",	X(31,508),	X_MASK, POWER6|PPCA2|PPC476, 0,		{RA, RS, RB}},
+
+{"mcrxr",	X(31,512),	XBFRARB_MASK, COM,	POWER7,		{BF}},
+
+{"lbdcbx",	X(31,514),	X_MASK,      E200Z4,	0,		{RT, RA, RB}},
+{"lbdx",	X(31,515),	X_MASK,	     E500MC,	0,		{RT, RA, RB}},
+
+{"bblels",	X(31,518),	X_MASK,	     PPCBRLK,	0,		{0}},
+
+{"lvlx",	X(31,519),	X_MASK,	     CELL,	0,		{VD, RA0, RB}},
+{"lbfcmux",	APU(31,519,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"subfco",	XO(31,8,1,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"sfo",		XO(31,8,1,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"subco",	XO(31,8,1,0),	XO_MASK,     PPCCOM,	0,		{RT, RB, RA}},
+{"subfco.",	XO(31,8,1,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"sfo.",	XO(31,8,1,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"subco.",	XO(31,8,1,1),	XO_MASK,     PPCCOM,	0,		{RT, RB, RA}},
+
+{"addco",	XO(31,10,1,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"ao",		XO(31,10,1,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"addco.",	XO(31,10,1,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"ao.",		XO(31,10,1,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+
+{"lxsspx",	X(31,524),	XX1_MASK,    PPCVSX2,	0,		{XT6, RA0, RB}},
+
+{"clcs",	X(31,531),	XRB_MASK,    M601,	0,		{RT, RA}},
+
+{"ldbrx",	X(31,532),	X_MASK, CELL|POWER7|PPCA2, 0,		{RT, RA0, RB}},
+
+{"lswx",	X(31,533),	X_MASK,	     PPCCOM,	E500|E500MC,	{RT, RAX, RBX}},
+{"lsx",		X(31,533),	X_MASK,	     PWRCOM,	0,		{RT, RA, RB}},
+
+{"lwbrx",	X(31,534),	X_MASK,	     PPCCOM,	0,		{RT, RA0, RB}},
+{"lbrx",	X(31,534),	X_MASK,	     PWRCOM,	0,		{RT, RA, RB}},
+
+{"lfsx",	X(31,535),	X_MASK,	     COM,	PPCEFS,		{FRT, RA0, RB}},
+
+{"srw",		XRC(31,536,0),	X_MASK,	     PPCCOM,	0,		{RA, RS, RB}},
+{"sr",		XRC(31,536,0),	X_MASK,	     PWRCOM,	0,		{RA, RS, RB}},
+{"srw.",	XRC(31,536,1),	X_MASK,	     PPCCOM,	0,		{RA, RS, RB}},
+{"sr.",		XRC(31,536,1),	X_MASK,	     PWRCOM,	0,		{RA, RS, RB}},
+
+{"rrib",	XRC(31,537,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"rrib.",	XRC(31,537,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+
+{"cnttzw",	XRC(31,538,0),	XRB_MASK,    POWER9,	0,		{RA, RS}},
+{"cnttzw.",	XRC(31,538,1),	XRB_MASK,    POWER9,	0,		{RA, RS}},
+
+{"srd",		XRC(31,539,0),	X_MASK,	     PPC64,	0,		{RA, RS, RB}},
+{"srd.",	XRC(31,539,1),	X_MASK,	     PPC64,	0,		{RA, RS, RB}},
+
+{"maskir",	XRC(31,541,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"maskir.",	XRC(31,541,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+
+{"lhdcbx",	X(31,546),	X_MASK,      E200Z4,	0,		{RT, RA, RB}},
+{"lhdx",	X(31,547),	X_MASK,	     E500MC,	0,		{RT, RA, RB}},
+
+{"lvtrx",	X(31,549),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
+
+{"bbelr",	X(31,550),	X_MASK,	     PPCBRLK,	0,		{0}},
+
+{"lvrx",	X(31,551),	X_MASK,	     CELL,	0,		{VD, RA0, RB}},
+{"lhfcmux",	APU(31,551,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"subfo",	XO(31,40,1,0),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"subo",	XO(31,40,1,0),	XO_MASK,     PPC,	0,		{RT, RB, RA}},
+{"subfo.",	XO(31,40,1,1),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"subo.",	XO(31,40,1,1),	XO_MASK,     PPC,	0,		{RT, RB, RA}},
+
+{"tlbsync",	X(31,566),	0xffffffff,  PPC,	0,		{0}},
+
+{"lfsux",	X(31,567),	X_MASK,	     COM,	PPCEFS,		{FRT, RAS, RB}},
+
+{"cnttzd",	XRC(31,570,0),	XRB_MASK,    POWER9,	0,		{RA, RS}},
+{"cnttzd.",	XRC(31,570,1),	XRB_MASK,    POWER9,	0,		{RA, RS}},
+
+{"mcrxrx",	X(31,576),     XBFRARB_MASK, POWER9,	0,		{BF}},
+
+{"lwdcbx",	X(31,578),	X_MASK,      E200Z4,	0,		{RT, RA, RB}},
+{"lwdx",	X(31,579),	X_MASK,	     E500MC,	0,		{RT, RA, RB}},
+
+{"lvtlx",	X(31,581),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
+
+{"lwat",	X(31,582),	X_MASK,	     POWER9,	0,		{RT, RA0, FC}},
+
+{"lwfcmux",	APU(31,583,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"lxsdx",	X(31,588),	XX1_MASK,    PPCVSX,	0,		{XT6, RA0, RB}},
+
+{"mfsr",	X(31,595), XRB_MASK|(1<<20), COM,	NON32,		{RT, SR}},
+
+{"lswi",	X(31,597),	X_MASK,	     PPCCOM,	E500|E500MC,	{RT, RAX, NBI}},
+{"lsi",		X(31,597),	X_MASK,	     PWRCOM,	0,		{RT, RA0, NB}},
+
+{"hwsync",	XSYNC(31,598,0), 0xffffffff, POWER4,	BOOKE|PPC476,	{0}},
+{"lwsync",	XSYNC(31,598,1), 0xffffffff, PPC,	E500,		{0}},
+{"ptesync",	XSYNC(31,598,2), 0xffffffff, PPC64,	0,		{0}},
+{"sync",	X(31,598),     XSYNCLE_MASK, E6500,	0,		{LS, ESYNC}},
+{"sync",	X(31,598),     XSYNC_MASK,   PPCCOM,	BOOKE|PPC476,	{LS}},
+{"msync",	X(31,598),     0xffffffff, BOOKE|PPCA2|PPC476, 0,	{0}},
+{"sync",	X(31,598),     0xffffffff,   BOOKE|PPC476, E6500,	{0}},
+{"lwsync",	X(31,598),     0xffffffff,   E500,	0,		{0}},
+{"dcs",		X(31,598),     0xffffffff,   PWRCOM,	0,		{0}},
+
+{"lfdx",	X(31,599),	X_MASK,	     COM,	PPCEFS,		{FRT, RA0, RB}},
+
+{"mffgpr",	XRC(31,607,0),	XRA_MASK,    POWER6,	POWER7,		{FRT, RB}},
+{"lfdepx",	X(31,607),	X_MASK,	  E500MC|PPCA2, 0,		{FRT, RA0, RB}},
+
+{"lddx",	X(31,611),	X_MASK,	     E500MC,	0,		{RT, RA, RB}},
+
+{"lvswx",	X(31,613),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
+
+{"ldat",	X(31,614),	X_MASK,	     POWER9,	0,		{RT, RA0, FC}},
+
+{"lqfcmux",	APU(31,615,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"nego",	XO(31,104,1,0),	XORB_MASK,   COM,	0,		{RT, RA}},
+{"nego.",	XO(31,104,1,1),	XORB_MASK,   COM,	0,		{RT, RA}},
+
+{"mulo",	XO(31,107,1,0),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+{"mulo.",	XO(31,107,1,1),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+
+{"mfsri",	X(31,627),	X_MASK,	     M601,	0,		{RT, RA, RB}},
+
+{"dclst",	X(31,630),	XRB_MASK,    M601,	0,		{RS, RA}},
+
+{"lfdux",	X(31,631),	X_MASK,	     COM,	PPCEFS,		{FRT, RAS, RB}},
+
+{"stbdcbx",	X(31,642),	X_MASK,      E200Z4,	0,		{RS, RA, RB}},
+{"stbdx",	X(31,643),	X_MASK,	     E500MC,	0,		{RS, RA, RB}},
+
+{"stvlx",	X(31,647),	X_MASK,	     CELL,	0,		{VS, RA0, RB}},
+{"stbfcmux",	APU(31,647,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"stxsspx",	X(31,652),	XX1_MASK,    PPCVSX2,	0,		{XS6, RA0, RB}},
+
+{"tbegin.",	XRC(31,654,1), XRTLRARB_MASK, PPCHTM,	0,		{HTM_R}},
+
+{"subfeo",	XO(31,136,1,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"sfeo",	XO(31,136,1,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"subfeo.",	XO(31,136,1,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"sfeo.",	XO(31,136,1,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+
+{"addeo",	XO(31,138,1,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"aeo",		XO(31,138,1,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"addeo.",	XO(31,138,1,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"aeo.",	XO(31,138,1,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+
+{"mfsrin",	X(31,659),	XRA_MASK,    PPC,	NON32,		{RT, RB}},
+
+{"stdbrx",	X(31,660),	X_MASK, CELL|POWER7|PPCA2, 0,		{RS, RA0, RB}},
+
+{"stswx",	X(31,661),	X_MASK,	     PPCCOM,	E500|E500MC,	{RS, RA0, RB}},
+{"stsx",	X(31,661),	X_MASK,	     PWRCOM,	0,		{RS, RA0, RB}},
+
+{"stwbrx",	X(31,662),	X_MASK,	     PPCCOM,	0,		{RS, RA0, RB}},
+{"stbrx",	X(31,662),	X_MASK,	     PWRCOM,	0,		{RS, RA0, RB}},
+
+{"stfsx",	X(31,663),	X_MASK,	     COM,	PPCEFS,		{FRS, RA0, RB}},
+
+{"srq",		XRC(31,664,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"srq.",	XRC(31,664,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+
+{"sre",		XRC(31,665,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"sre.",	XRC(31,665,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+
+{"sthdcbx",	X(31,674),	X_MASK,      E200Z4,	0,		{RS, RA, RB}},
+{"sthdx",	X(31,675),	X_MASK,	     E500MC,	0,		{RS, RA, RB}},
+
+{"stvfrx",	X(31,677),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
+
+{"stvrx",	X(31,679),	X_MASK,	     CELL,	0,		{VS, RA0, RB}},
+{"sthfcmux",	APU(31,679,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"tendall.",	XRC(31,686,1)|(1<<25), XRTRARB_MASK, PPCHTM, 0,		{0}},
+{"tend.",	XRC(31,686,1), XRTARARB_MASK, PPCHTM,	0,		{HTM_A}},
+
+{"stbcx.",	XRC(31,694,1),	X_MASK,	  POWER8|E6500, 0,		{RS, RA0, RB}},
+
+{"stfsux",	X(31,695),	X_MASK,	     COM,	PPCEFS,		{FRS, RAS, RB}},
+
+{"sriq",	XRC(31,696,0),	X_MASK,	     M601,	0,		{RA, RS, SH}},
+{"sriq.",	XRC(31,696,1),	X_MASK,	     M601,	0,		{RA, RS, SH}},
+
+{"stwdcbx",	X(31,706),	X_MASK,	     E200Z4,	0,		{RS, RA, RB}},
+{"stwdx",	X(31,707),	X_MASK,	     E500MC,	0,		{RS, RA, RB}},
+
+{"stvflx",	X(31,709),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
+
+{"stwat",	X(31,710),	X_MASK,	     POWER9,	0,		{RS, RA0, FC}},
+
+{"stwfcmux",	APU(31,711,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"stxsdx",	X(31,716),	XX1_MASK,    PPCVSX,	0,		{XS6, RA0, RB}},
+
+{"tcheck",	X(31,718),   XRTBFRARB_MASK, PPCHTM,	0,		{BF}},
+
+{"subfzeo",	XO(31,200,1,0),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"sfzeo",	XO(31,200,1,0),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+{"subfzeo.",	XO(31,200,1,1),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"sfzeo.",	XO(31,200,1,1),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+
+{"addzeo",	XO(31,202,1,0),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"azeo",	XO(31,202,1,0),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+{"addzeo.",	XO(31,202,1,1),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"azeo.",	XO(31,202,1,1),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+
+{"stswi",	X(31,725),	X_MASK,	     PPCCOM,	E500|E500MC,	{RS, RA0, NB}},
+{"stsi",	X(31,725),	X_MASK,	     PWRCOM,	0,		{RS, RA0, NB}},
+
+{"sthcx.",	XRC(31,726,1),	X_MASK,	  POWER8|E6500, 0,		{RS, RA0, RB}},
+
+{"stfdx",	X(31,727),	X_MASK,	     COM,	PPCEFS,		{FRS, RA0, RB}},
+
+{"srlq",	XRC(31,728,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"srlq.",	XRC(31,728,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+
+{"sreq",	XRC(31,729,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"sreq.",	XRC(31,729,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+
+{"mftgpr",	XRC(31,735,0),	XRA_MASK,    POWER6,	POWER7,		{RT, FRB}},
+{"stfdepx",	X(31,735),	X_MASK,	  E500MC|PPCA2, 0,		{FRS, RA0, RB}},
+
+{"stddx",	X(31,739),	X_MASK,	     E500MC,	0,		{RS, RA, RB}},
+
+{"stvswx",	X(31,741),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
+
+{"stdat",	X(31,742),	X_MASK,	     POWER9,	0,		{RS, RA0, FC}},
+
+{"stqfcmux",	APU(31,743,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"subfmeo",	XO(31,232,1,0),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"sfmeo",	XO(31,232,1,0),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+{"subfmeo.",	XO(31,232,1,1),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"sfmeo.",	XO(31,232,1,1),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+
+{"mulldo",	XO(31,233,1,0),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+{"mulldo.",	XO(31,233,1,1),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+
+{"addmeo",	XO(31,234,1,0),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"ameo",	XO(31,234,1,0),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+{"addmeo.",	XO(31,234,1,1),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"ameo.",	XO(31,234,1,1),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+
+{"mullwo",	XO(31,235,1,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"mulso",	XO(31,235,1,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"mullwo.",	XO(31,235,1,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"mulso.",	XO(31,235,1,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+
+{"tsuspend.",	XRCL(31,750,0,1), XRTRARB_MASK,PPCHTM,	0,		{0}},
+{"tresume.",	XRCL(31,750,1,1), XRTRARB_MASK,PPCHTM,	0,		{0}},
+{"tsr.",	XRC(31,750,1),	  XRTLRARB_MASK,PPCHTM,	0,		{L}},
+
+{"darn",	X(31,755),	XLRAND_MASK, POWER9,	0,		{RT, LRAND}},
+
+{"dcba",	X(31,758), XRT_MASK, PPC405|PPC7450|BOOKE|PPCA2|PPC476, 0, {RA0, RB}},
+{"dcbal",	XOPL(31,758,1), XRT_MASK,    E500MC,	0,		{RA0, RB}},
+
+{"stfdux",	X(31,759),	X_MASK,	     COM,	PPCEFS,		{FRS, RAS, RB}},
+
+{"srliq",	XRC(31,760,0),	X_MASK,	     M601,	0,		{RA, RS, SH}},
+{"srliq.",	XRC(31,760,1),	X_MASK,	     M601,	0,		{RA, RS, SH}},
+
+{"lvsm",	X(31,773),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
+
+{"copy",	XOPL(31,774,1),	XRT_MASK,    POWER9,	0,		{RA0, RB}},
+
+{"stvepxl",	X(31,775),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
+{"lvlxl",	X(31,775),	X_MASK,	     CELL,	0,		{VD, RA0, RB}},
+{"ldfcmux",	APU(31,775,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"dozo",	XO(31,264,1,0),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+{"dozo.",	XO(31,264,1,1),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+
+{"addo",	XO(31,266,1,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"caxo",	XO(31,266,1,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"addo.",	XO(31,266,1,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"caxo.",	XO(31,266,1,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+
+{"modsd",	X(31,777),	X_MASK,	     POWER9,	0,		{RT, RA, RB}},
+{"modsw",	X(31,779),	X_MASK,	     POWER9,	0,		{RT, RA, RB}},
+
+{"lxvw4x",	X(31,780),	XX1_MASK,    PPCVSX,	0,		{XT6, RA0, RB}},
+{"lxsibzx",	X(31,781),	XX1_MASK,    PPCVSX3,	0,		{XT6, RA0, RB}},
+
+{"tabortwc.",	XRC(31,782,1),	X_MASK,	     PPCHTM,	0,		{TO, RA, RB}},
+
+{"tlbivax",	X(31,786),	XRT_MASK, BOOKE|PPCA2|PPC476, 0,	{RA0, RB}},
+
+{"lwzcix",	X(31,789),	X_MASK,	     POWER6,	0,		{RT, RA0, RB}},
+
+{"lhbrx",	X(31,790),	X_MASK,	     COM,	0,		{RT, RA0, RB}},
+
+{"lfdpx",	X(31,791),	X_MASK,	     POWER6,	POWER7,		{FRTp, RA0, RB}},
+{"lfqx",	X(31,791),	X_MASK,	     POWER2,	0,		{FRT, RA, RB}},
+
+{"sraw",	XRC(31,792,0),	X_MASK,	     PPCCOM,	0,		{RA, RS, RB}},
+{"sra",		XRC(31,792,0),	X_MASK,	     PWRCOM,	0,		{RA, RS, RB}},
+{"sraw.",	XRC(31,792,1),	X_MASK,	     PPCCOM,	0,		{RA, RS, RB}},
+{"sra.",	XRC(31,792,1),	X_MASK,	     PWRCOM,	0,		{RA, RS, RB}},
+
+{"srad",	XRC(31,794,0),	X_MASK,	     PPC64,	0,		{RA, RS, RB}},
+{"srad.",	XRC(31,794,1),	X_MASK,	     PPC64,	0,		{RA, RS, RB}},
+
+{"lfddx",	X(31,803),	X_MASK,	     E500MC,	0,		{FRT, RA, RB}},
+
+{"lvtrxl",	X(31,805),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
+{"stvepx",	X(31,807),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
+{"lvrxl",	X(31,807),	X_MASK,	     CELL,	0,		{VD, RA0, RB}},
+
+{"lxvh8x",	X(31,812),	XX1_MASK,    PPCVSX3,	0,		{XT6, RA0, RB}},
+{"lxsihzx",	X(31,813),	XX1_MASK,    PPCVSX3,	0,		{XT6, RA0, RB}},
+
+{"tabortdc.",	XRC(31,814,1),	X_MASK,	     PPCHTM,	0,		{TO, RA, RB}},
+
+{"rac",		X(31,818),	X_MASK,	     M601,	0,		{RT, RA, RB}},
+
+{"erativax",	X(31,819),	X_MASK,	     PPCA2,	0,		{RS, RA0, RB}},
+
+{"lhzcix",	X(31,821),	X_MASK,	     POWER6,	0,		{RT, RA0, RB}},
+
+{"dss",		XDSS(31,822,0),	XDSS_MASK,   PPCVEC,	0,		{STRM}},
+
+{"lfqux",	X(31,823),	X_MASK,	     POWER2,	0,		{FRT, RA, RB}},
+
+{"srawi",	XRC(31,824,0),	X_MASK,	     PPCCOM,	0,		{RA, RS, SH}},
+{"srai",	XRC(31,824,0),	X_MASK,	     PWRCOM,	0,		{RA, RS, SH}},
+{"srawi.",	XRC(31,824,1),	X_MASK,	     PPCCOM,	0,		{RA, RS, SH}},
+{"srai.",	XRC(31,824,1),	X_MASK,	     PWRCOM,	0,		{RA, RS, SH}},
+
+{"sradi",	XS(31,413,0),	XS_MASK,     PPC64,	0,		{RA, RS, SH6}},
+{"sradi.",	XS(31,413,1),	XS_MASK,     PPC64,	0,		{RA, RS, SH6}},
+
+{"lvtlxl",	X(31,837),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
+
+{"cpabort",	X(31,838),	XRTRARB_MASK,POWER9,	0,		{0}},
+
+{"divo",	XO(31,331,1,0),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+{"divo.",	XO(31,331,1,1),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+
+{"lxvd2x",	X(31,844),	XX1_MASK,    PPCVSX,	0,		{XT6, RA0, RB}},
+{"lxvx",	X(31,844),	XX1_MASK,    POWER8,	POWER9|PPCVSX3,	{XT6, RA0, RB}},
+
+{"tabortwci.",	XRC(31,846,1),	X_MASK,	     PPCHTM,	0,		{TO, RA, HTM_SI}},
+
+{"tlbsrx.",	XRC(31,850,1),	XRT_MASK,    PPCA2,	0,		{RA0, RB}},
+
+{"slbiag",	X(31,850),	XRARB_MASK,  POWER9,	0,		{RS}},
+{"slbmfev",	X(31,851),	XRLA_MASK,   POWER9,	0,		{RT, RB, A_L}},
+{"slbmfev",	X(31,851),	XRA_MASK,    PPC64,	POWER9,		{RT, RB}},
+
+{"lbzcix",	X(31,853),	X_MASK,	     POWER6,	0,		{RT, RA0, RB}},
+
+{"eieio",	X(31,854),	0xffffffff,  PPC,   BOOKE|PPCA2|PPC476,	{0}},
+{"mbar",	X(31,854),	X_MASK,	   BOOKE|PPCA2|PPC476, 0,	{MO}},
+{"eieio",	XMBAR(31,854,1),0xffffffff,  E500,	0,		{0}},
+{"eieio",	X(31,854),	0xffffffff, PPCA2|PPC476, 0,		{0}},
+
+{"lfiwax",	X(31,855),	X_MASK, POWER6|PPCA2|PPC476, 0,		{FRT, RA0, RB}},
+
+{"lvswxl",	X(31,869),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
+
+{"abso",	XO(31,360,1,0),	XORB_MASK,   M601,	0,		{RT, RA}},
+{"abso.",	XO(31,360,1,1),	XORB_MASK,   M601,	0,		{RT, RA}},
+
+{"divso",	XO(31,363,1,0),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+{"divso.",	XO(31,363,1,1),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+
+{"lxvb16x",	X(31,876),	XX1_MASK,    PPCVSX3,	0,		{XT6, RA0, RB}},
+
+{"tabortdci.",	XRC(31,878,1),	X_MASK,	     PPCHTM,	0,		{TO, RA, HTM_SI}},
+
+{"rmieg",	X(31,882),	XRTRA_MASK,  POWER9,	0,		{RB}},
+
+{"ldcix",	X(31,885),	X_MASK,	     POWER6,	0,		{RT, RA0, RB}},
+
+{"msgsync",	X(31,886),	0xffffffff,  POWER9,	0,		{0}},
+
+{"lfiwzx",	X(31,887),	X_MASK,	  POWER7|PPCA2,	0,		{FRT, RA0, RB}},
+
+{"extswsli",	XS(31,445,0),	XS_MASK,     POWER9,	0,		{RA, RS, SH6}},
+{"extswsli.",	XS(31,445,1),	XS_MASK,     POWER9,	0,		{RA, RS, SH6}},
+
+{"paste.",	XRCL(31,902,1,1),XRT_MASK,   POWER9,	0,		{RA0, RB}},
+
+{"stvlxl",	X(31,903),	X_MASK,	     CELL,	0,		{VS, RA0, RB}},
+{"stdfcmux",	APU(31,903,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"divdeuo",	XO(31,393,1,0),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+{"divdeuo.",	XO(31,393,1,1),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+{"divweuo",	XO(31,395,1,0),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+{"divweuo.",	XO(31,395,1,1),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+
+{"stxvw4x",	X(31,908),	XX1_MASK,    PPCVSX,	0,		{XS6, RA0, RB}},
+{"stxsibx",	X(31,909),	XX1_MASK,    PPCVSX3,	0,		{XS6, RA0, RB}},
+
+{"tabort.",	XRC(31,910,1),	XRTRB_MASK,  PPCHTM,	0,		{RA}},
+
+{"tlbsx",	XRC(31,914,0),	X_MASK, PPC403|BOOKE|PPCA2|PPC476, 0,	{RTO, RA0, RB}},
+{"tlbsx.",	XRC(31,914,1),	X_MASK, PPC403|BOOKE|PPCA2|PPC476, 0,	{RTO, RA0, RB}},
+
+{"slbmfee",	X(31,915),	XRLA_MASK,   POWER9,	0,		{RT, RB, A_L}},
+{"slbmfee",	X(31,915),	XRA_MASK,    PPC64,	POWER9,		{RT, RB}},
+
+{"stwcix",	X(31,917),	X_MASK,	     POWER6,	0,		{RS, RA0, RB}},
+
+{"sthbrx",	X(31,918),	X_MASK,	     COM,	0,		{RS, RA0, RB}},
+
+{"stfdpx",	X(31,919),	X_MASK,	     POWER6,	POWER7,		{FRSp, RA0, RB}},
+{"stfqx",	X(31,919),	X_MASK,	     POWER2,	0,		{FRS, RA0, RB}},
+
+{"sraq",	XRC(31,920,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"sraq.",	XRC(31,920,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+
+{"srea",	XRC(31,921,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"srea.",	XRC(31,921,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+
+{"extsh",	XRC(31,922,0),	XRB_MASK,    PPCCOM,	0,		{RA, RS}},
+{"exts",	XRC(31,922,0),	XRB_MASK,    PWRCOM,	0,		{RA, RS}},
+{"extsh.",	XRC(31,922,1),	XRB_MASK,    PPCCOM,	0,		{RA, RS}},
+{"exts.",	XRC(31,922,1),	XRB_MASK,    PWRCOM,	0,		{RA, RS}},
+
+{"stfddx",	X(31,931),	X_MASK,	     E500MC,	0,		{FRS, RA, RB}},
+
+{"stvfrxl",	X(31,933),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
+
+{"wclrone",	XOPL2(31,934,2),XRT_MASK,    PPCA2,	0,		{RA0, RB}},
+{"wclrall",	X(31,934),	XRARB_MASK,  PPCA2,	0,		{L2}},
+{"wclr",	X(31,934),	X_MASK,	     PPCA2,	0,		{L2, RA0, RB}},
+
+{"stvrxl",	X(31,935),	X_MASK,	     CELL,	0,		{VS, RA0, RB}},
+
+{"divdeo",	XO(31,425,1,0),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+{"divdeo.",	XO(31,425,1,1),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+{"divweo",	XO(31,427,1,0),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+{"divweo.",	XO(31,427,1,1),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+
+{"stxvh8x",	X(31,940),	XX1_MASK,    PPCVSX3,	0,		{XS6, RA0, RB}},
+{"stxsihx",	X(31,941),	XX1_MASK,    PPCVSX3,	0,		{XS6, RA0, RB}},
+
+{"treclaim.",	XRC(31,942,1),	XRTRB_MASK,  PPCHTM,	0,		{RA}},
+
+{"tlbrehi",	XTLB(31,946,0),	XTLB_MASK,   PPC403,	PPCA2,		{RT, RA}},
+{"tlbrelo",	XTLB(31,946,1),	XTLB_MASK,   PPC403,	PPCA2,		{RT, RA}},
+{"tlbre",	X(31,946),  X_MASK, PPC403|BOOKE|PPCA2|PPC476, 0,	{RSO, RAOPT, SHO}},
+
+{"sthcix",	X(31,949),	X_MASK,	     POWER6,	0,		{RS, RA0, RB}},
+
+{"icswepx",	XRC(31,950,0),	X_MASK,	     PPCA2,	0,		{RS, RA, RB}},
+{"icswepx.",	XRC(31,950,1),	X_MASK,	     PPCA2,	0,		{RS, RA, RB}},
+
+{"stfqux",	X(31,951),	X_MASK,	     POWER2,	0,		{FRS, RA, RB}},
+
+{"sraiq",	XRC(31,952,0),	X_MASK,	     M601,	0,		{RA, RS, SH}},
+{"sraiq.",	XRC(31,952,1),	X_MASK,	     M601,	0,		{RA, RS, SH}},
+
+{"extsb",	XRC(31,954,0),	XRB_MASK,    PPC,	0,		{RA, RS}},
+{"extsb.",	XRC(31,954,1),	XRB_MASK,    PPC,	0,		{RA, RS}},
+
+{"stvflxl",	X(31,965),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
+
+{"iccci",	X(31,966), XRT_MASK, PPC403|PPC440|TITAN|PPCA2, 0,	 {RAOPT, RBOPT}},
+{"ici",		X(31,966),	XRARB_MASK,  PPCA2|PPC476, 0,		{CT}},
+
+{"divduo",	XO(31,457,1,0),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+{"divduo.",	XO(31,457,1,1),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+
+{"divwuo",	XO(31,459,1,0),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"divwuo.",	XO(31,459,1,1),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+
+{"stxvd2x",	X(31,972),	XX1_MASK,    PPCVSX,	0,		{XS6, RA0, RB}},
+{"stxvx",	X(31,972),	XX1_MASK,    POWER8,	POWER9|PPCVSX3,	{XS6, RA0, RB}},
+
+{"tlbld",	X(31,978),	XRTRA_MASK,  PPC, PPC403|BOOKE|PPCA2|PPC476, {RB}},
+{"tlbwehi",	XTLB(31,978,0),	XTLB_MASK,   PPC403,	0,		{RT, RA}},
+{"tlbwelo",	XTLB(31,978,1),	XTLB_MASK,   PPC403,	0,		{RT, RA}},
+{"tlbwe",	X(31,978),  X_MASK, PPC403|BOOKE|PPCA2|PPC476, 0,	{RSO, RAOPT, SHO}},
+
+{"slbfee.",	XRC(31,979,1),	XRA_MASK,    POWER6,	0,		{RT, RB}},
+
+{"stbcix",	X(31,981),	X_MASK,	     POWER6,	0,		{RS, RA0, RB}},
+
+{"icbi",	X(31,982),	XRT_MASK,    PPC,	0,		{RA0, RB}},
+
+{"stfiwx",	X(31,983),	X_MASK,	     PPC,	PPCEFS,		{FRS, RA0, RB}},
+
+{"extsw",	XRC(31,986,0),	XRB_MASK,    PPC64,	0,		{RA, RS}},
+{"extsw.",	XRC(31,986,1),	XRB_MASK,    PPC64,	0,		{RA, RS}},
+
+{"icbiep",	XRT(31,991,0),	XRT_MASK,    E500MC|PPCA2, 0,		{RA0, RB}},
+
+{"stvswxl",	X(31,997),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
+
+{"icread",	X(31,998),     XRT_MASK, PPC403|PPC440|PPC476|TITAN, 0,	{RA0, RB}},
+
+{"nabso",	XO(31,488,1,0),	XORB_MASK,   M601,	0,		{RT, RA}},
+{"nabso.",	XO(31,488,1,1),	XORB_MASK,   M601,	0,		{RT, RA}},
+
+{"divdo",	XO(31,489,1,0),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+{"divdo.",	XO(31,489,1,1),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+
+{"divwo",	XO(31,491,1,0),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"divwo.",	XO(31,491,1,1),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+
+{"stxvb16x",	X(31,1004),	XX1_MASK,    PPCVSX3,	0,		{XS6, RA0, RB}},
+
+{"trechkpt.",	XRC(31,1006,1),	XRTRARB_MASK,PPCHTM,	0,		{0}},
+
+{"tlbli",	X(31,1010),	XRTRA_MASK,  PPC,	TITAN,		{RB}},
+
+{"stdcix",	X(31,1013),	X_MASK,	     POWER6,	0,		{RS, RA0, RB}},
+
+{"dcbz",	X(31,1014),	XRT_MASK,    PPC,	0,		{RA0, RB}},
+{"dclz",	X(31,1014),	XRT_MASK,    PPC,	0,		{RA0, RB}},
+
+{"dcbzep",	XRT(31,1023,0),	XRT_MASK,    E500MC|PPCA2, 0,		{RA0, RB}},
+
+{"dcbzl",	XOPL(31,1014,1), XRT_MASK,   POWER4|E500MC, PPC476,	{RA0, RB}},
+
+{"cctpl",	0x7c210b78,	0xffffffff,  CELL,	0,		{0}},
+{"cctpm",	0x7c421378,	0xffffffff,  CELL,	0,		{0}},
+{"cctph",	0x7c631b78,	0xffffffff,  CELL,	0,		{0}},
+
+{"dstt",	XDSS(31,342,1),	XDSS_MASK,   PPCVEC,	0,		{RA, RB, STRM}},
+{"dststt",	XDSS(31,374,1),	XDSS_MASK,   PPCVEC,	0,		{RA, RB, STRM}},
+{"dssall",	XDSS(31,822,1),	XDSS_MASK,   PPCVEC,	0,		{0}},
+
+{"db8cyc",	0x7f9ce378,	0xffffffff,  CELL,	0,		{0}},
+{"db10cyc",	0x7fbdeb78,	0xffffffff,  CELL,	0,		{0}},
+{"db12cyc",	0x7fdef378,	0xffffffff,  CELL,	0,		{0}},
+{"db16cyc",	0x7ffffb78,	0xffffffff,  CELL,	0,		{0}},
+
+{"lwz",		OP(32),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, D, RA0}},
+{"l",		OP(32),		OP_MASK,     PWRCOM,	PPCVLE,		{RT, D, RA0}},
+
+{"lwzu",	OP(33),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, D, RAL}},
+{"lu",		OP(33),		OP_MASK,     PWRCOM,	PPCVLE,		{RT, D, RA0}},
+
+{"lbz",		OP(34),		OP_MASK,     COM,	PPCVLE,		{RT, D, RA0}},
+
+{"lbzu",	OP(35),		OP_MASK,     COM,	PPCVLE,		{RT, D, RAL}},
+
+{"stw",		OP(36),		OP_MASK,     PPCCOM,	PPCVLE,		{RS, D, RA0}},
+{"st",		OP(36),		OP_MASK,     PWRCOM,	PPCVLE,		{RS, D, RA0}},
+
+{"stwu",	OP(37),		OP_MASK,     PPCCOM,	PPCVLE,		{RS, D, RAS}},
+{"stu",		OP(37),		OP_MASK,     PWRCOM,	PPCVLE,		{RS, D, RA0}},
+
+{"stb",		OP(38),		OP_MASK,     COM,	PPCVLE,		{RS, D, RA0}},
+
+{"stbu",	OP(39),		OP_MASK,     COM,	PPCVLE,		{RS, D, RAS}},
+
+{"lhz",		OP(40),		OP_MASK,     COM,	PPCVLE,		{RT, D, RA0}},
+
+{"lhzu",	OP(41),		OP_MASK,     COM,	PPCVLE,		{RT, D, RAL}},
+
+{"lha",		OP(42),		OP_MASK,     COM,	PPCVLE,		{RT, D, RA0}},
+
+{"lhau",	OP(43),		OP_MASK,     COM,	PPCVLE,		{RT, D, RAL}},
+
+{"sth",		OP(44),		OP_MASK,     COM,	PPCVLE,		{RS, D, RA0}},
+
+{"sthu",	OP(45),		OP_MASK,     COM,	PPCVLE,		{RS, D, RAS}},
+
+{"lmw",		OP(46),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, D, RAM}},
+{"lm",		OP(46),		OP_MASK,     PWRCOM,	PPCVLE,		{RT, D, RA0}},
+
+{"stmw",	OP(47),		OP_MASK,     PPCCOM,	PPCVLE,		{RS, D, RA0}},
+{"stm",		OP(47),		OP_MASK,     PWRCOM,	PPCVLE,		{RS, D, RA0}},
+
+{"lfs",		OP(48),		OP_MASK,     COM,	PPCEFS|PPCVLE,	{FRT, D, RA0}},
+
+{"lfsu",	OP(49),		OP_MASK,     COM,	PPCEFS|PPCVLE,	{FRT, D, RAS}},
+
+{"lfd",		OP(50),		OP_MASK,     COM,	PPCEFS|PPCVLE,	{FRT, D, RA0}},
+
+{"lfdu",	OP(51),		OP_MASK,     COM,	PPCEFS|PPCVLE,	{FRT, D, RAS}},
+
+{"stfs",	OP(52),		OP_MASK,     COM,	PPCEFS|PPCVLE,	{FRS, D, RA0}},
+
+{"stfsu",	OP(53),		OP_MASK,     COM,	PPCEFS|PPCVLE,	{FRS, D, RAS}},
+
+{"stfd",	OP(54),		OP_MASK,     COM,	PPCEFS|PPCVLE,	{FRS, D, RA0}},
+
+{"stfdu",	OP(55),		OP_MASK,     COM,	PPCEFS|PPCVLE,	{FRS, D, RAS}},
+
+{"lq",		OP(56),		OP_MASK,     POWER4,	PPC476|PPCVLE,	{RTQ, DQ, RAQ}},
+{"psq_l",	OP(56),		OP_MASK,     PPCPS,	PPCVLE,		{FRT,PSD,RA,PSW,PSQ}},
+{"lfq",		OP(56),		OP_MASK,     POWER2,	PPCVLE,		{FRT, D, RA0}},
+
+{"lxsd",	DSO(57,2),	DS_MASK,     PPCVSX3,	PPCVLE,		{VD, DS, RA0}},
+{"lxssp",	DSO(57,3),	DS_MASK,     PPCVSX3,	PPCVLE,		{VD, DS, RA0}},
+{"lfdp",	OP(57),		OP_MASK,     POWER6,	POWER7|PPCVLE,	{FRTp, DS, RA0}},
+{"psq_lu",	OP(57),		OP_MASK,     PPCPS,	PPCVLE,		{FRT,PSD,RA,PSW,PSQ}},
+{"lfqu",	OP(57),		OP_MASK,     POWER2,	PPCVLE,		{FRT, D, RA0}},
+
+{"ld",		DSO(58,0),	DS_MASK,     PPC64,	PPCVLE,		{RT, DS, RA0}},
+{"ldu",		DSO(58,1),	DS_MASK,     PPC64,	PPCVLE,		{RT, DS, RAL}},
+{"lwa",		DSO(58,2),	DS_MASK,     PPC64,	PPCVLE,		{RT, DS, RA0}},
+
+{"dadd",	XRC(59,2,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+{"dadd.",	XRC(59,2,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"dqua",	ZRC(59,3,0),	Z2_MASK,     POWER6,	PPCVLE,		{FRT,FRA,FRB,RMC}},
+{"dqua.",	ZRC(59,3,1),	Z2_MASK,     POWER6,	PPCVLE,		{FRT,FRA,FRB,RMC}},
+
+{"fdivs",	A(59,18,0),	AFRC_MASK,   PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+{"fdivs.",	A(59,18,1),	AFRC_MASK,   PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+
+{"fsubs",	A(59,20,0),	AFRC_MASK,   PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+{"fsubs.",	A(59,20,1),	AFRC_MASK,   PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+
+{"fadds",	A(59,21,0),	AFRC_MASK,   PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+{"fadds.",	A(59,21,1),	AFRC_MASK,   PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+
+{"fsqrts",	A(59,22,0),    AFRAFRC_MASK, PPC,	TITAN|PPCVLE,	{FRT, FRB}},
+{"fsqrts.",	A(59,22,1),    AFRAFRC_MASK, PPC,	TITAN|PPCVLE,	{FRT, FRB}},
+
+{"fres",	A(59,24,0),   AFRAFRC_MASK,  POWER7,	PPCVLE,		{FRT, FRB}},
+{"fres",	A(59,24,0),   AFRALFRC_MASK, PPC,	POWER7|PPCVLE,	{FRT, FRB, A_L}},
+{"fres.",	A(59,24,1),   AFRAFRC_MASK,  POWER7,	PPCVLE,		{FRT, FRB}},
+{"fres.",	A(59,24,1),   AFRALFRC_MASK, PPC,	POWER7|PPCVLE,	{FRT, FRB, A_L}},
+
+{"fmuls",	A(59,25,0),	AFRB_MASK,   PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC}},
+{"fmuls.",	A(59,25,1),	AFRB_MASK,   PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC}},
+
+{"frsqrtes",	A(59,26,0),   AFRAFRC_MASK,  POWER7,	PPCVLE,		{FRT, FRB}},
+{"frsqrtes",	A(59,26,0),   AFRALFRC_MASK, POWER5,	POWER7|PPCVLE,	{FRT, FRB, A_L}},
+{"frsqrtes.",	A(59,26,1),   AFRAFRC_MASK,  POWER7,	PPCVLE,		{FRT, FRB}},
+{"frsqrtes.",	A(59,26,1),   AFRALFRC_MASK, POWER5,	POWER7|PPCVLE,	{FRT, FRB, A_L}},
+
+{"fmsubs",	A(59,28,0),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fmsubs.",	A(59,28,1),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+
+{"fmadds",	A(59,29,0),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fmadds.",	A(59,29,1),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+
+{"fnmsubs",	A(59,30,0),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fnmsubs.",	A(59,30,1),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+
+{"fnmadds",	A(59,31,0),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fnmadds.",	A(59,31,1),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+
+{"dmul",	XRC(59,34,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+{"dmul.",	XRC(59,34,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"drrnd",	ZRC(59,35,0),	Z2_MASK,     POWER6,	PPCVLE,		{FRT, FRA, FRB, RMC}},
+{"drrnd.",	ZRC(59,35,1),	Z2_MASK,     POWER6,	PPCVLE,		{FRT, FRA, FRB, RMC}},
+
+{"dscli",	ZRC(59,66,0),	Z_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, SH16}},
+{"dscli.",	ZRC(59,66,1),	Z_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, SH16}},
+
+{"dquai",	ZRC(59,67,0),	Z2_MASK,     POWER6,	PPCVLE,		{TE, FRT,FRB,RMC}},
+{"dquai.",	ZRC(59,67,1),	Z2_MASK,     POWER6,	PPCVLE,		{TE, FRT,FRB,RMC}},
+
+{"dscri",	ZRC(59,98,0),	Z_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, SH16}},
+{"dscri.",	ZRC(59,98,1),	Z_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, SH16}},
+
+{"drintx",	ZRC(59,99,0),	Z2_MASK,     POWER6,	PPCVLE,		{R, FRT, FRB, RMC}},
+{"drintx.",	ZRC(59,99,1),	Z2_MASK,     POWER6,	PPCVLE,		{R, FRT, FRB, RMC}},
+
+{"dcmpo",	X(59,130),	X_MASK,	     POWER6,	PPCVLE,		{BF,  FRA, FRB}},
+
+{"dtstex",	X(59,162),	X_MASK,	     POWER6,	PPCVLE,		{BF,  FRA, FRB}},
+{"dtstdc",	Z(59,194),	Z_MASK,	     POWER6,	PPCVLE,		{BF,  FRA, DCM}},
+{"dtstdg",	Z(59,226),	Z_MASK,	     POWER6,	PPCVLE,		{BF,  FRA, DGM}},
+
+{"drintn",	ZRC(59,227,0),	Z2_MASK,     POWER6,	PPCVLE,		{R, FRT, FRB, RMC}},
+{"drintn.",	ZRC(59,227,1),	Z2_MASK,     POWER6,	PPCVLE,		{R, FRT, FRB, RMC}},
+
+{"dctdp",	XRC(59,258,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRB}},
+{"dctdp.",	XRC(59,258,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRB}},
+
+{"dctfix",	XRC(59,290,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRB}},
+{"dctfix.",	XRC(59,290,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRB}},
+
+{"ddedpd",	XRC(59,322,0),	X_MASK,	     POWER6,	PPCVLE,		{SP, FRT, FRB}},
+{"ddedpd.",	XRC(59,322,1),	X_MASK,	     POWER6,	PPCVLE,		{SP, FRT, FRB}},
+
+{"dxex",	XRC(59,354,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRB}},
+{"dxex.",	XRC(59,354,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRB}},
+
+{"dsub",	XRC(59,514,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+{"dsub.",	XRC(59,514,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"ddiv",	XRC(59,546,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+{"ddiv.",	XRC(59,546,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"dcmpu",	X(59,642),	X_MASK,	     POWER6,	PPCVLE,		{BF,  FRA, FRB}},
+
+{"dtstsf",	X(59,674),	X_MASK,	     POWER6,	PPCVLE,		{BF,  FRA, FRB}},
+{"dtstsfi",	X(59,675),	X_MASK|1<<22,POWER9,	PPCVLE,		{BF, UIM6, FRB}},
+
+{"drsp",	XRC(59,770,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRB}},
+{"drsp.",	XRC(59,770,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRB}},
+
+{"dcffix",	XRC(59,802,0), X_MASK|FRA_MASK, POWER7,	PPCVLE,		{FRT, FRB}},
+{"dcffix.",	XRC(59,802,1), X_MASK|FRA_MASK, POWER7,	PPCVLE,		{FRT, FRB}},
+
+{"denbcd",	XRC(59,834,0),	X_MASK,	     POWER6,	PPCVLE,		{S, FRT, FRB}},
+{"denbcd.",	XRC(59,834,1),	X_MASK,	     POWER6,	PPCVLE,		{S, FRT, FRB}},
+
+{"fcfids",	XRC(59,846,0),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+{"fcfids.",	XRC(59,846,1),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+
+{"diex",	XRC(59,866,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+{"diex.",	XRC(59,866,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"fcfidus",	XRC(59,974,0),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+{"fcfidus.",	XRC(59,974,1),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+
+{"xsaddsp",	XX3(60,0),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsmaddasp",	XX3(60,1),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxsldwi",	XX3(60,2),	XX3SHW_MASK, PPCVSX,	PPCVLE,		{XT6, XA6, XB6, SHW}},
+{"xscmpeqdp",	XX3(60,3),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsrsqrtesp",	XX2(60,10),	XX2_MASK,    PPCVSX2,	PPCVLE,		{XT6, XB6}},
+{"xssqrtsp",	XX2(60,11),	XX2_MASK,    PPCVSX2,	PPCVLE,		{XT6, XB6}},
+{"xxsel",	XX4(60,3),	XX4_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6, XC6}},
+{"xssubsp",	XX3(60,8),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsmaddmsp",	XX3(60,9),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxspltd",	XX3(60,10),	XX3DM_MASK,  PPCVSX,	PPCVLE,		{XT6, XA6, XB6S, DMEX}},
+{"xxmrghd",	XX3(60,10),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxswapd",	XX3(60,10)|(2<<8), XX3_MASK, PPCVSX,	PPCVLE,		{XT6, XA6, XB6S}},
+{"xxmrgld",	XX3(60,10)|(3<<8), XX3_MASK, PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxpermdi",	XX3(60,10),	XX3DM_MASK,  PPCVSX,	PPCVLE,		{XT6, XA6, XB6, DM}},
+{"xscmpgtdp",	XX3(60,11),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsresp",	XX2(60,26),	XX2_MASK,    PPCVSX2,	PPCVLE,		{XT6, XB6}},
+{"xsmulsp",	XX3(60,16),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsmsubasp",	XX3(60,17),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxmrghw",	XX3(60,18),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscmpgedp",	XX3(60,19),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsdivsp",	XX3(60,24),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsmsubmsp",	XX3(60,25),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxperm",	XX3(60,26),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsadddp",	XX3(60,32),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsmaddadp",	XX3(60,33),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscmpudp",	XX3(60,35),	XX3BF_MASK,  PPCVSX,	PPCVLE,		{BF, XA6, XB6}},
+{"xscvdpuxws",	XX2(60,72),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsrdpi",	XX2(60,73),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsrsqrtedp",	XX2(60,74),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xssqrtdp",	XX2(60,75),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xssubdp",	XX3(60,40),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsmaddmdp",	XX3(60,41),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscmpodp",	XX3(60,43),	XX3BF_MASK,  PPCVSX,	PPCVLE,		{BF, XA6, XB6}},
+{"xscvdpsxws",	XX2(60,88),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsrdpiz",	XX2(60,89),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsredp",	XX2(60,90),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsmuldp",	XX3(60,48),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsmsubadp",	XX3(60,49),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxmrglw",	XX3(60,50),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsrdpip",	XX2(60,105),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xstsqrtdp",	XX2(60,106),	XX2BF_MASK,  PPCVSX,	PPCVLE,		{BF, XB6}},
+{"xsrdpic",	XX2(60,107),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsdivdp",	XX3(60,56),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsmsubmdp",	XX3(60,57),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxpermr",	XX3(60,58),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscmpexpdp",	XX3(60,59),	XX3BF_MASK,  PPCVSX3,	PPCVLE,		{BF, XA6, XB6}},
+{"xsrdpim",	XX2(60,121),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xstdivdp",	XX3(60,61),	XX3BF_MASK,  PPCVSX,	PPCVLE,		{BF, XA6, XB6}},
+{"xvaddsp",	XX3(60,64),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvmaddasp",	XX3(60,65),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpeqsp",	XX3RC(60,67,0),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpeqsp.",	XX3RC(60,67,1),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvspuxws",	XX2(60,136),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrspi",	XX2(60,137),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrsqrtesp",	XX2(60,138),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvsqrtsp",	XX2(60,139),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvsubsp",	XX3(60,72),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvmaddmsp",	XX3(60,73),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpgtsp",	XX3RC(60,75,0),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpgtsp.",	XX3RC(60,75,1),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvspsxws",	XX2(60,152),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrspiz",	XX2(60,153),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvresp",	XX2(60,154),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvmulsp",	XX3(60,80),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvmsubasp",	XX3(60,81),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxspltw",	XX2(60,164),	XX2UIM_MASK, PPCVSX,	PPCVLE,		{XT6, XB6, UIM}},
+{"xxextractuw",	XX2(60,165),   XX2UIM4_MASK, PPCVSX3,	PPCVLE,		{XT6, XB6, UIMM4}},
+{"xvcmpgesp",	XX3RC(60,83,0),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpgesp.",	XX3RC(60,83,1),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvuxwsp",	XX2(60,168),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrspip",	XX2(60,169),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvtsqrtsp",	XX2(60,170),	XX2BF_MASK,  PPCVSX,	PPCVLE,		{BF, XB6}},
+{"xvrspic",	XX2(60,171),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvdivsp",	XX3(60,88),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvmsubmsp",	XX3(60,89),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxspltib",	X(60,360),   XX1_MASK|3<<19, PPCVSX3,	PPCVLE,		{XT6, IMM8}},
+{"xxinsertw",	XX2(60,181),   XX2UIM4_MASK, PPCVSX3,	PPCVLE,		{XT6, XB6, UIMM4}},
+{"xvcvsxwsp",	XX2(60,184),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrspim",	XX2(60,185),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvtdivsp",	XX3(60,93),	XX3BF_MASK,  PPCVSX,	PPCVLE,		{BF, XA6, XB6}},
+{"xvadddp",	XX3(60,96),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvmaddadp",	XX3(60,97),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpeqdp",	XX3RC(60,99,0),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpeqdp.",	XX3RC(60,99,1),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvdpuxws",	XX2(60,200),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrdpi",	XX2(60,201),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrsqrtedp",	XX2(60,202),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvsqrtdp",	XX2(60,203),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvsubdp",	XX3(60,104),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvmaddmdp",	XX3(60,105),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpgtdp",	XX3RC(60,107,0), XX3_MASK,   PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpgtdp.",	XX3RC(60,107,1), XX3_MASK,   PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvdpsxws",	XX2(60,216),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrdpiz",	XX2(60,217),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvredp",	XX2(60,218),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvmuldp",	XX3(60,112),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvmsubadp",	XX3(60,113),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpgedp",	XX3RC(60,115,0), XX3_MASK,   PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpgedp.",	XX3RC(60,115,1), XX3_MASK,   PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvuxwdp",	XX2(60,232),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrdpip",	XX2(60,233),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvtsqrtdp",	XX2(60,234),	XX2BF_MASK,  PPCVSX,	PPCVLE,		{BF, XB6}},
+{"xvrdpic",	XX2(60,235),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvdivdp",	XX3(60,120),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvmsubmdp",	XX3(60,121),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvsxwdp",	XX2(60,248),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrdpim",	XX2(60,249),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvtdivdp",	XX3(60,125),	XX3BF_MASK,  PPCVSX,	PPCVLE,		{BF, XA6, XB6}},
+{"xsmaxcdp",	XX3(60,128),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsnmaddasp",	XX3(60,129),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxland",	XX3(60,130),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscvdpsp",	XX2(60,265),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xscvdpspn",	XX2(60,267),	XX2_MASK,    PPCVSX2,	PPCVLE,		{XT6, XB6}},
+{"xsmincdp",	XX3(60,136),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsnmaddmsp",	XX3(60,137),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxlandc",	XX3(60,138),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsrsp",	XX2(60,281),	XX2_MASK,    PPCVSX2,	PPCVLE,		{XT6, XB6}},
+{"xsmaxjdp",	XX3(60,144),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsnmsubasp",	XX3(60,145),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxlor",	XX3(60,146),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscvuxdsp",	XX2(60,296),	XX2_MASK,    PPCVSX2,	PPCVLE,		{XT6, XB6}},
+{"xststdcsp",	XX2(60,298),	XX2BFD_MASK, PPCVSX3,	PPCVLE,		{BF, XB6, DCMX}},
+{"xsminjdp",	XX3(60,152),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsnmsubmsp",	XX3(60,153),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxlxor",	XX3(60,154),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscvsxdsp",	XX2(60,312),	XX2_MASK,    PPCVSX2,	PPCVLE,		{XT6, XB6}},
+{"xsmaxdp",	XX3(60,160),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsnmaddadp",	XX3(60,161),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxlnor",	XX3(60,162),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscvdpuxds",	XX2(60,328),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xscvspdp",	XX2(60,329),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xscvspdpn",	XX2(60,331),	XX2_MASK,    PPCVSX2,	PPCVLE,		{XT6, XB6}},
+{"xsmindp",	XX3(60,168),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsnmaddmdp",	XX3(60,169),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxlorc",	XX3(60,170),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscvdpsxds",	XX2(60,344),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsabsdp",	XX2(60,345),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsxexpdp",	XX2VA(60,347,0),XX2_MASK|1,  PPCVSX3,	PPCVLE,		{RT, XB6}},
+{"xsxsigdp",	XX2VA(60,347,1),XX2_MASK|1,  PPCVSX3,	PPCVLE,		{RT, XB6}},
+{"xscvhpdp",	XX2VA(60,347,16),XX2_MASK,   PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xscvdphp",	XX2VA(60,347,17),XX2_MASK,   PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xscpsgndp",	XX3(60,176),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsnmsubadp",	XX3(60,177),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxlnand",	XX3(60,178),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscvuxddp",	XX2(60,360),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsnabsdp",	XX2(60,361),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xststdcdp",	XX2(60,362),	XX2BFD_MASK, PPCVSX3,	PPCVLE,		{BF, XB6, DCMX}},
+{"xsnmsubmdp",	XX3(60,185),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxleqv",	XX3(60,186),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscvsxddp",	XX2(60,376),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsnegdp",	XX2(60,377),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvmaxsp",	XX3(60,192),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvnmaddasp",	XX3(60,193),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvspuxds",	XX2(60,392),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvcvdpsp",	XX2(60,393),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvminsp",	XX3(60,200),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvnmaddmsp",	XX3(60,201),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvspsxds",	XX2(60,408),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvabssp",	XX2(60,409),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvmovsp",	XX3(60,208),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6S}},
+{"xvcpsgnsp",	XX3(60,208),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvnmsubasp",	XX3(60,209),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvuxdsp",	XX2(60,424),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvnabssp",	XX2(60,425),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvtstdcsp",	XX2(60,426),  XX2DCMXS_MASK, PPCVSX3,	PPCVLE,		{XT6, XB6, DCMXS}},
+{"xviexpsp",	XX3(60,216),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvnmsubmsp",	XX3(60,217),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvsxdsp",	XX2(60,440),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvnegsp",	XX2(60,441),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvmaxdp",	XX3(60,224),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvnmaddadp",	XX3(60,225),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvdpuxds",	XX2(60,456),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvcvspdp",	XX2(60,457),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsiexpdp",	X(60,918),	XX1_MASK,    PPCVSX3,	PPCVLE,		{XT6, RA, RB}},
+{"xvmindp",	XX3(60,232),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvnmaddmdp",	XX3(60,233),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvdpsxds",	XX2(60,472),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvabsdp",	XX2(60,473),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvxexpdp",	XX2VA(60,475,0),XX2_MASK,    PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xvxsigdp",	XX2VA(60,475,1),XX2_MASK,    PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xxbrh",	XX2VA(60,475,7),XX2_MASK,    PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xvxexpsp",	XX2VA(60,475,8),XX2_MASK,    PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xvxsigsp",	XX2VA(60,475,9),XX2_MASK,    PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xxbrw",	XX2VA(60,475,15),XX2_MASK,   PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xxbrd",	XX2VA(60,475,23),XX2_MASK,   PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xvcvhpsp",	XX2VA(60,475,24),XX2_MASK,   PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xvcvsphp",	XX2VA(60,475,25),XX2_MASK,   PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xxbrq",	XX2VA(60,475,31),XX2_MASK,   PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xvmovdp",	XX3(60,240),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6S}},
+{"xvcpsgndp",	XX3(60,240),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvnmsubadp",	XX3(60,241),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvuxddp",	XX2(60,488),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvnabsdp",	XX2(60,489),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvtstdcdp",	XX2(60,490),  XX2DCMXS_MASK, PPCVSX3,	PPCVLE,		{XT6, XB6, DCMXS}},
+{"xviexpdp",	XX3(60,248),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvnmsubmdp",	XX3(60,249),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvsxddp",	XX2(60,504),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvnegdp",	XX2(60,505),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+
+{"psq_st",	OP(60),		OP_MASK,     PPCPS,	PPCVLE,		{FRS,PSD,RA,PSW,PSQ}},
+{"stfq",	OP(60),		OP_MASK,     POWER2,	PPCVLE,		{FRS, D, RA}},
+
+{"lxv",		DQX(61,1),	DQX_MASK,    PPCVSX3,	PPCVLE,		{XTQ6, DQ, RA0}},
+{"stxv",	DQX(61,5),	DQX_MASK,    PPCVSX3,	PPCVLE,		{XSQ6, DQ, RA0}},
+{"stxsd",	DSO(61,2),	DS_MASK,     PPCVSX3,	PPCVLE,		{VS, DS, RA0}},
+{"stxssp",	DSO(61,3),	DS_MASK,     PPCVSX3,	PPCVLE,		{VS, DS, RA0}},
+{"stfdp",	OP(61),		OP_MASK,     POWER6,	POWER7|PPCVLE,	{FRSp, DS, RA0}},
+{"psq_stu",	OP(61),		OP_MASK,     PPCPS,	PPCVLE,		{FRS,PSD,RA,PSW,PSQ}},
+{"stfqu",	OP(61),		OP_MASK,     POWER2,	PPCVLE,		{FRS, D, RA}},
+
+{"std",		DSO(62,0),	DS_MASK,     PPC64,	PPCVLE,		{RS, DS, RA0}},
+{"stdu",	DSO(62,1),	DS_MASK,     PPC64,	PPCVLE,		{RS, DS, RAS}},
+{"stq",		DSO(62,2),	DS_MASK,     POWER4,	PPC476|PPCVLE,	{RSQ, DS, RA0}},
+
+{"fcmpu",	X(63,0),	XBF_MASK,    COM,	PPCEFS|PPCVLE,	{BF, FRA, FRB}},
+
+{"daddq",	XRC(63,2,0),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp}},
+{"daddq.",	XRC(63,2,1),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp}},
+
+{"dquaq",	ZRC(63,3,0),	Z2_MASK,     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp, RMC}},
+{"dquaq.",	ZRC(63,3,1),	Z2_MASK,     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp, RMC}},
+
+{"xsaddqp",	XRC(63,4,0),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+{"xsaddqpo",	XRC(63,4,1),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"xsrqpi",	ZRC(63,5,0),	Z2_MASK,     PPCVSX3,	PPCVLE,		{R, VD, VB, RMC}},
+{"xsrqpix",	ZRC(63,5,1),	Z2_MASK,     PPCVSX3,	PPCVLE,		{R, VD, VB, RMC}},
+
+{"fcpsgn",	XRC(63,8,0),	X_MASK, POWER6|PPCA2|PPC476, PPCVLE,	{FRT, FRA, FRB}},
+{"fcpsgn.",	XRC(63,8,1),	X_MASK, POWER6|PPCA2|PPC476, PPCVLE,	{FRT, FRA, FRB}},
+
+{"frsp",	XRC(63,12,0),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+{"frsp.",	XRC(63,12,1),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+
+{"fctiw",	XRC(63,14,0),	XRA_MASK,    PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+{"fcir",	XRC(63,14,0),	XRA_MASK,    PWR2COM,	PPCVLE,		{FRT, FRB}},
+{"fctiw.",	XRC(63,14,1),	XRA_MASK,    PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+{"fcir.",	XRC(63,14,1),	XRA_MASK,    PWR2COM,	PPCVLE,		{FRT, FRB}},
+
+{"fctiwz",	XRC(63,15,0),	XRA_MASK,    PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+{"fcirz",	XRC(63,15,0),	XRA_MASK,    PWR2COM,	PPCVLE,		{FRT, FRB}},
+{"fctiwz.",	XRC(63,15,1),	XRA_MASK,    PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+{"fcirz.",	XRC(63,15,1),	XRA_MASK,    PWR2COM,	PPCVLE,		{FRT, FRB}},
+
+{"fdiv",	A(63,18,0),	AFRC_MASK,   PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+{"fd",		A(63,18,0),	AFRC_MASK,   PWRCOM,	PPCVLE,		{FRT, FRA, FRB}},
+{"fdiv.",	A(63,18,1),	AFRC_MASK,   PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+{"fd.",		A(63,18,1),	AFRC_MASK,   PWRCOM,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"fsub",	A(63,20,0),	AFRC_MASK,   PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+{"fs",		A(63,20,0),	AFRC_MASK,   PWRCOM,	PPCVLE,		{FRT, FRA, FRB}},
+{"fsub.",	A(63,20,1),	AFRC_MASK,   PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+{"fs.",		A(63,20,1),	AFRC_MASK,   PWRCOM,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"fadd",	A(63,21,0),	AFRC_MASK,   PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+{"fa",		A(63,21,0),	AFRC_MASK,   PWRCOM,	PPCVLE,		{FRT, FRA, FRB}},
+{"fadd.",	A(63,21,1),	AFRC_MASK,   PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+{"fa.",		A(63,21,1),	AFRC_MASK,   PWRCOM,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"fsqrt",	A(63,22,0),    AFRAFRC_MASK, PPCPWR2,	TITAN|PPCVLE,	{FRT, FRB}},
+{"fsqrt.",	A(63,22,1),    AFRAFRC_MASK, PPCPWR2,	TITAN|PPCVLE,	{FRT, FRB}},
+
+{"fsel",	A(63,23,0),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fsel.",	A(63,23,1),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+
+{"fre",		A(63,24,0),   AFRAFRC_MASK,  POWER7,	PPCVLE,		{FRT, FRB}},
+{"fre",		A(63,24,0),   AFRALFRC_MASK, POWER5,	POWER7|PPCVLE,	{FRT, FRB, A_L}},
+{"fre.",	A(63,24,1),   AFRAFRC_MASK,  POWER7,	PPCVLE,		{FRT, FRB}},
+{"fre.",	A(63,24,1),   AFRALFRC_MASK, POWER5,	POWER7|PPCVLE,	{FRT, FRB, A_L}},
+
+{"fmul",	A(63,25,0),	AFRB_MASK,   PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC}},
+{"fm",		A(63,25,0),	AFRB_MASK,   PWRCOM,	PPCVLE|PPCVLE,	{FRT, FRA, FRC}},
+{"fmul.",	A(63,25,1),	AFRB_MASK,   PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC}},
+{"fm.",		A(63,25,1),	AFRB_MASK,   PWRCOM,	PPCVLE|PPCVLE,	{FRT, FRA, FRC}},
+
+{"frsqrte",	A(63,26,0),   AFRAFRC_MASK,  POWER7,	PPCVLE,		{FRT, FRB}},
+{"frsqrte",	A(63,26,0),   AFRALFRC_MASK, PPC,	POWER7|PPCVLE,	{FRT, FRB, A_L}},
+{"frsqrte.",	A(63,26,1),   AFRAFRC_MASK,  POWER7,	PPCVLE,		{FRT, FRB}},
+{"frsqrte.",	A(63,26,1),   AFRALFRC_MASK, PPC,	POWER7|PPCVLE,	{FRT, FRB, A_L}},
+
+{"fmsub",	A(63,28,0),	A_MASK,	     PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fms",		A(63,28,0),	A_MASK,	     PWRCOM,	PPCVLE,		{FRT, FRA, FRC, FRB}},
+{"fmsub.",	A(63,28,1),	A_MASK,	     PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fms.",	A(63,28,1),	A_MASK,	     PWRCOM,	PPCVLE,		{FRT, FRA, FRC, FRB}},
+
+{"fmadd",	A(63,29,0),	A_MASK,	     PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fma",		A(63,29,0),	A_MASK,	     PWRCOM,	PPCVLE,		{FRT, FRA, FRC, FRB}},
+{"fmadd.",	A(63,29,1),	A_MASK,	     PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fma.",	A(63,29,1),	A_MASK,	     PWRCOM,	PPCVLE,		{FRT, FRA, FRC, FRB}},
+
+{"fnmsub",	A(63,30,0),	A_MASK,	     PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fnms",	A(63,30,0),	A_MASK,	     PWRCOM,	PPCVLE,		{FRT, FRA, FRC, FRB}},
+{"fnmsub.",	A(63,30,1),	A_MASK,	     PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fnms.",	A(63,30,1),	A_MASK,	     PWRCOM,	PPCVLE,		{FRT, FRA, FRC, FRB}},
+
+{"fnmadd",	A(63,31,0),	A_MASK,	     PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fnma",	A(63,31,0),	A_MASK,	     PWRCOM,	PPCVLE,		{FRT, FRA, FRC, FRB}},
+{"fnmadd.",	A(63,31,1),	A_MASK,	     PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fnma.",	A(63,31,1),	A_MASK,	     PWRCOM,	PPCVLE,		{FRT, FRA, FRC, FRB}},
+
+{"fcmpo",	X(63,32),	XBF_MASK,    COM,	PPCEFS|PPCVLE,	{BF, FRA, FRB}},
+
+{"dmulq",	XRC(63,34,0),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp}},
+{"dmulq.",	XRC(63,34,1),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp}},
+
+{"drrndq",	ZRC(63,35,0),	Z2_MASK,     POWER6,	PPCVLE,		{FRTp, FRA, FRBp, RMC}},
+{"drrndq.",	ZRC(63,35,1),	Z2_MASK,     POWER6,	PPCVLE,		{FRTp, FRA, FRBp, RMC}},
+
+{"xsmulqp",	XRC(63,36,0),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+{"xsmulqpo",	XRC(63,36,1),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"xsrqpxp",	Z(63,37),	Z2_MASK,     PPCVSX3,	PPCVLE,		{R, VD, VB, RMC}},
+
+{"mtfsb1",	XRC(63,38,0),	XRARB_MASK,  COM,	PPCVLE,		{BT}},
+{"mtfsb1.",	XRC(63,38,1),	XRARB_MASK,  COM,	PPCVLE,		{BT}},
+
+{"fneg",	XRC(63,40,0),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+{"fneg.",	XRC(63,40,1),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+
+{"mcrfs",      X(63,64), XRB_MASK|(3<<21)|(3<<16), COM,	PPCVLE,		{BF, BFA}},
+
+{"dscliq",	ZRC(63,66,0),	Z_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, SH16}},
+{"dscliq.",	ZRC(63,66,1),	Z_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, SH16}},
+
+{"dquaiq",	ZRC(63,67,0),	Z2_MASK,     POWER6,	PPCVLE,		{TE, FRTp, FRBp, RMC}},
+{"dquaiq.",	ZRC(63,67,1),	Z2_MASK,     POWER6,	PPCVLE,		{TE, FRTp, FRBp, RMC}},
+
+{"mtfsb0",	XRC(63,70,0),	XRARB_MASK,  COM,	PPCVLE,		{BT}},
+{"mtfsb0.",	XRC(63,70,1),	XRARB_MASK,  COM,	PPCVLE,		{BT}},
+
+{"fmr",		XRC(63,72,0),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+{"fmr.",	XRC(63,72,1),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+
+{"dscriq",	ZRC(63,98,0),	Z_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, SH16}},
+{"dscriq.",	ZRC(63,98,1),	Z_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, SH16}},
+
+{"drintxq",	ZRC(63,99,0),	Z2_MASK,     POWER6,	PPCVLE,		{R, FRTp, FRBp, RMC}},
+{"drintxq.",	ZRC(63,99,1),	Z2_MASK,     POWER6,	PPCVLE,		{R, FRTp, FRBp, RMC}},
+
+{"xscpsgnqp",	X(63,100),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"ftdiv",	X(63,128),	XBF_MASK,    POWER7,	PPCVLE,		{BF, FRA, FRB}},
+
+{"dcmpoq",	X(63,130),	X_MASK,	     POWER6,	PPCVLE,		{BF, FRAp, FRBp}},
+
+{"xscmpoqp",	X(63,132),	XBF_MASK,    PPCVSX3,	PPCVLE,		{BF, VA, VB}},
+
+{"mtfsfi",  XRC(63,134,0), XWRA_MASK|(3<<21)|(1<<11), POWER6|PPCA2|PPC476, PPCVLE, {BFF, U, W}},
+{"mtfsfi",  XRC(63,134,0), XRA_MASK|(3<<21)|(1<<11), COM, POWER6|PPCA2|PPC476|PPCVLE, {BFF, U}},
+{"mtfsfi.", XRC(63,134,1), XWRA_MASK|(3<<21)|(1<<11), POWER6|PPCA2|PPC476, PPCVLE, {BFF, U, W}},
+{"mtfsfi.", XRC(63,134,1), XRA_MASK|(3<<21)|(1<<11), COM, POWER6|PPCA2|PPC476|PPCVLE, {BFF, U}},
+
+{"fnabs",	XRC(63,136,0),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+{"fnabs.",	XRC(63,136,1),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+
+{"fctiwu",	XRC(63,142,0),	XRA_MASK,    POWER7,	PPCVLE,		{FRT, FRB}},
+{"fctiwu.",	XRC(63,142,1),	XRA_MASK,    POWER7,	PPCVLE,		{FRT, FRB}},
+{"fctiwuz",	XRC(63,143,0),	XRA_MASK,    POWER7,	PPCVLE,		{FRT, FRB}},
+{"fctiwuz.",	XRC(63,143,1),	XRA_MASK,    POWER7,	PPCVLE,		{FRT, FRB}},
+
+{"ftsqrt",	X(63,160),	XBF_MASK|FRA_MASK, POWER7, PPCVLE,	{BF, FRB}},
+
+{"dtstexq",	X(63,162),	X_MASK,	     POWER6,	PPCVLE,		{BF, FRAp, FRBp}},
+
+{"xscmpexpqp",	X(63,164),	XBF_MASK,    PPCVSX3,	PPCVLE,		{BF, VA, VB}},
+
+{"dtstdcq",	Z(63,194),	Z_MASK,	     POWER6,	PPCVLE,		{BF, FRAp, DCM}},
+{"dtstdgq",	Z(63,226),	Z_MASK,	     POWER6,	PPCVLE,		{BF, FRAp, DGM}},
+
+{"drintnq",	ZRC(63,227,0),	Z2_MASK,     POWER6,	PPCVLE,		{R, FRTp, FRBp, RMC}},
+{"drintnq.",	ZRC(63,227,1),	Z2_MASK,     POWER6,	PPCVLE,		{R, FRTp, FRBp, RMC}},
+
+{"dctqpq",	XRC(63,258,0),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRB}},
+{"dctqpq.",	XRC(63,258,1),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRB}},
+
+{"fabs",	XRC(63,264,0),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+{"fabs.",	XRC(63,264,1),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+
+{"dctfixq",	XRC(63,290,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRBp}},
+{"dctfixq.",	XRC(63,290,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRBp}},
+
+{"ddedpdq",	XRC(63,322,0),	X_MASK,	     POWER6,	PPCVLE,		{SP, FRTp, FRBp}},
+{"ddedpdq.",	XRC(63,322,1),	X_MASK,	     POWER6,	PPCVLE,		{SP, FRTp, FRBp}},
+
+{"dxexq",	XRC(63,354,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRBp}},
+{"dxexq.",	XRC(63,354,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRBp}},
+
+{"xsmaddqp",	XRC(63,388,0),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+{"xsmaddqpo",	XRC(63,388,1),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"frin",	XRC(63,392,0),	XRA_MASK,    POWER5,	PPCVLE,		{FRT, FRB}},
+{"frin.",	XRC(63,392,1),	XRA_MASK,    POWER5,	PPCVLE,		{FRT, FRB}},
+
+{"xsmsubqp",	XRC(63,420,0),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+{"xsmsubqpo",	XRC(63,420,1),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"friz",	XRC(63,424,0),	XRA_MASK,    POWER5,	PPCVLE,		{FRT, FRB}},
+{"friz.",	XRC(63,424,1),	XRA_MASK,    POWER5,	PPCVLE,		{FRT, FRB}},
+
+{"xsnmaddqp",	XRC(63,452,0),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+{"xsnmaddqpo",	XRC(63,452,1),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"frip",	XRC(63,456,0),	XRA_MASK,    POWER5,	PPCVLE,		{FRT, FRB}},
+{"frip.",	XRC(63,456,1),	XRA_MASK,    POWER5,	PPCVLE,		{FRT, FRB}},
+
+{"xsnmsubqp",	XRC(63,484,0),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+{"xsnmsubqpo",	XRC(63,484,1),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"frim",	XRC(63,488,0),	XRA_MASK,    POWER5,	PPCVLE,		{FRT, FRB}},
+{"frim.",	XRC(63,488,1),	XRA_MASK,    POWER5,	PPCVLE,		{FRT, FRB}},
+
+{"dsubq",	XRC(63,514,0),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp}},
+{"dsubq.",	XRC(63,514,1),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp}},
+
+{"xssubqp",	XRC(63,516,0),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+{"xssubqpo",	XRC(63,516,1),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"ddivq",	XRC(63,546,0),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp}},
+{"ddivq.",	XRC(63,546,1),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp}},
+
+{"xsdivqp",	XRC(63,548,0),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+{"xsdivqpo",	XRC(63,548,1),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"mffs",	XRC(63,583,0),	XRARB_MASK,  COM,	PPCEFS|PPCVLE,	{FRT}},
+{"mffs.",	XRC(63,583,1),	XRARB_MASK,  COM,	PPCEFS|PPCVLE,	{FRT}},
+
+{"mffsce",	XMMF(63,583,0,1), XMMF_MASK|RB_MASK, POWER9, PPCVLE,	{FRT}},
+{"mffscdrn",	XMMF(63,583,2,4), XMMF_MASK,         POWER9, PPCVLE,	{FRT, FRB}},
+{"mffscdrni",	XMMF(63,583,2,5), XMMF_MASK|(3<<14), POWER9, PPCVLE,	{FRT, DRM}},
+{"mffscrn",	XMMF(63,583,2,6), XMMF_MASK,         POWER9, PPCVLE,	{FRT, FRB}},
+{"mffscrni",	XMMF(63,583,2,7), XMMF_MASK|(7<<13), POWER9, PPCVLE,	{FRT, RM}},
+{"mffsl",	XMMF(63,583,3,0), XMMF_MASK|RB_MASK, POWER9, PPCVLE,	{FRT}},
+
+{"dcmpuq",	X(63,642),	X_MASK,	     POWER6,	PPCVLE,		{BF, FRAp, FRBp}},
+
+{"xscmpuqp",	X(63,644),	XBF_MASK,    PPCVSX3,	PPCVLE,		{BF, VA, VB}},
+
+{"dtstsfq",	X(63,674),	X_MASK,	     POWER6,	PPCVLE,		{BF, FRA, FRBp}},
+{"dtstsfiq",	X(63,675),	X_MASK|1<<22,POWER9,	PPCVLE,		{BF, UIM6, FRBp}},
+
+{"xststdcqp",	X(63,708),	X_MASK,	     PPCVSX3,	PPCVLE,		{BF, VB, DCMX}},
+
+{"mtfsf",	XFL(63,711,0),	XFL_MASK, POWER6|PPCA2|PPC476, PPCVLE,	{FLM, FRB, XFL_L, W}},
+{"mtfsf",	XFL(63,711,0),	XFL_MASK,    COM, POWER6|PPCA2|PPC476|PPCEFS|PPCVLE, {FLM, FRB}},
+{"mtfsf.",	XFL(63,711,1),	XFL_MASK, POWER6|PPCA2|PPC476, PPCVLE,	{FLM, FRB, XFL_L, W}},
+{"mtfsf.",	XFL(63,711,1),	XFL_MASK,    COM, POWER6|PPCA2|PPC476|PPCEFS|PPCVLE, {FLM, FRB}},
+
+{"drdpq",	XRC(63,770,0),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRBp}},
+{"drdpq.",	XRC(63,770,1),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRBp}},
+
+{"dcffixq",	XRC(63,802,0),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRB}},
+{"dcffixq.",	XRC(63,802,1),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRB}},
+
+{"xsabsqp",	XVA(63,804,0),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xsxexpqp",	XVA(63,804,2),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xsnabsqp",	XVA(63,804,8),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xsnegqp",	XVA(63,804,16),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xsxsigqp",	XVA(63,804,18),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xssqrtqp",	XVARC(63,804,27,0), XVA_MASK, PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xssqrtqpo",	XVARC(63,804,27,1), XVA_MASK, PPCVSX3,	PPCVLE,		{VD, VB}},
+
+{"fctid",	XRC(63,814,0),	XRA_MASK,    PPC64,	PPCVLE,		{FRT, FRB}},
+{"fctid",	XRC(63,814,0),	XRA_MASK,    PPC476,	PPCVLE,		{FRT, FRB}},
+{"fctid.",	XRC(63,814,1),	XRA_MASK,    PPC64,	PPCVLE,		{FRT, FRB}},
+{"fctid.",	XRC(63,814,1),	XRA_MASK,    PPC476,	PPCVLE,		{FRT, FRB}},
+
+{"fctidz",	XRC(63,815,0),	XRA_MASK,    PPC64,	PPCVLE,		{FRT, FRB}},
+{"fctidz",	XRC(63,815,0),	XRA_MASK,    PPC476,	PPCVLE,		{FRT, FRB}},
+{"fctidz.",	XRC(63,815,1),	XRA_MASK,    PPC64,	PPCVLE,		{FRT, FRB}},
+{"fctidz.",	XRC(63,815,1),	XRA_MASK,    PPC476,	PPCVLE,		{FRT, FRB}},
+
+{"denbcdq",	XRC(63,834,0),	X_MASK,	     POWER6,	PPCVLE,		{S, FRTp, FRBp}},
+{"denbcdq.",	XRC(63,834,1),	X_MASK,	     POWER6,	PPCVLE,		{S, FRTp, FRBp}},
+
+{"xscvqpuwz",	XVA(63,836,1),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xscvudqp",	XVA(63,836,2),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xscvqpswz",	XVA(63,836,9),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xscvsdqp",	XVA(63,836,10),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xscvqpudz",	XVA(63,836,17),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xscvqpdp",	XVARC(63,836,20,0), XVA_MASK, PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xscvqpdpo",	XVARC(63,836,20,1), XVA_MASK, PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xscvdpqp",	XVA(63,836,22),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xscvqpsdz",	XVA(63,836,25),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+
+{"fmrgow",	X(63,838),	X_MASK,	     PPCVSX2,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"fcfid",	XRC(63,846,0),	XRA_MASK,    PPC64,	PPCVLE,		{FRT, FRB}},
+{"fcfid",	XRC(63,846,0),	XRA_MASK,    PPC476,	PPCVLE,		{FRT, FRB}},
+{"fcfid.",	XRC(63,846,1),	XRA_MASK,    PPC64,	PPCVLE,		{FRT, FRB}},
+{"fcfid.",	XRC(63,846,1),	XRA_MASK,    PPC476,	PPCVLE,		{FRT, FRB}},
+
+{"diexq",	XRC(63,866,0),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRA, FRBp}},
+{"diexq.",	XRC(63,866,1),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRA, FRBp}},
+
+{"xsiexpqp",	X(63,868),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"fctidu",	XRC(63,942,0),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+{"fctidu.",	XRC(63,942,1),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+
+{"fctiduz",	XRC(63,943,0),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+{"fctiduz.",	XRC(63,943,1),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+
+{"fmrgew",	X(63,966),	X_MASK,	     PPCVSX2,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"fcfidu",	XRC(63,974,0),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+{"fcfidu.",	XRC(63,974,1),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+};
+
+const int powerpc_num_opcodes =
+  sizeof (powerpc_opcodes) / sizeof (powerpc_opcodes[0]);
+
+/* The VLE opcode table.
+
+   The format of this opcode table is the same as the main opcode table.  */
+
+const struct powerpc_opcode vle_opcodes[] = {
+{"se_illegal",	C(0),		C_MASK,		PPCVLE,	0,		{}},
+{"se_isync",	C(1),		C_MASK,		PPCVLE,	0,		{}},
+{"se_sc",	C(2),		C_MASK,		PPCVLE,	0,		{}},
+{"se_blr",	C_LK(2,0),	C_LK_MASK,	PPCVLE,	0,		{}},
+{"se_blrl",	C_LK(2,1),	C_LK_MASK,	PPCVLE,	0,		{}},
+{"se_bctr",	C_LK(3,0),	C_LK_MASK,	PPCVLE,	0,		{}},
+{"se_bctrl",	C_LK(3,1),	C_LK_MASK,	PPCVLE,	0,		{}},
+{"se_rfi",	C(8),		C_MASK,		PPCVLE,	0,		{}},
+{"se_rfci",	C(9),		C_MASK,		PPCVLE,	0,		{}},
+{"se_rfdi",	C(10),		C_MASK,		PPCVLE,	0,		{}},
+{"se_rfmci",	C(11),		C_MASK, PPCRFMCI|PPCVLE, 0,		{}},
+{"se_not",	SE_R(0,2),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_neg",	SE_R(0,3),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_mflr",	SE_R(0,8),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_mtlr",	SE_R(0,9),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_mfctr",	SE_R(0,10),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_mtctr",	SE_R(0,11),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_extzb",	SE_R(0,12),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_extsb",	SE_R(0,13),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_extzh",	SE_R(0,14),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_extsh",	SE_R(0,15),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_mr",	SE_RR(0,1),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_mtar",	SE_RR(0,2),	SE_RR_MASK,	PPCVLE,	0,		{ARX, RY}},
+{"se_mfar",	SE_RR(0,3),	SE_RR_MASK,	PPCVLE,	0,		{RX, ARY}},
+{"se_add",	SE_RR(1,0),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_mullw",	SE_RR(1,1),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_sub",	SE_RR(1,2),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_subf",	SE_RR(1,3),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_cmp",	SE_RR(3,0),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_cmpl",	SE_RR(3,1),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_cmph",	SE_RR(3,2),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_cmphl",	SE_RR(3,3),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+
+{"e_cmpi",	SCI8BF(6,0,21),	SCI8BF_MASK,	PPCVLE,	0,		{CRD32, RA, SCLSCI8}},
+{"e_cmpwi",	SCI8BF(6,0,21),	SCI8BF_MASK,	PPCVLE,	0,		{CRD32, RA, SCLSCI8}},
+{"e_cmpli",	SCI8BF(6,1,21),	SCI8BF_MASK,	PPCVLE,	0,		{CRD32, RA, SCLSCI8}},
+{"e_cmplwi",	SCI8BF(6,1,21),	SCI8BF_MASK,	PPCVLE,	0,		{CRD32, RA, SCLSCI8}},
+{"e_addi",	SCI8(6,16),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8}},
+{"e_subi",	SCI8(6,16),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8N}},
+{"e_addi.",	SCI8(6,17),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8}},
+{"e_addic",	SCI8(6,18),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8}},
+{"e_subic",	SCI8(6,18),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8N}},
+{"e_addic.",	SCI8(6,19),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8}},
+{"e_subic.",	SCI8(6,19),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8N}},
+{"e_mulli",	SCI8(6,20),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8}},
+{"e_subfic",	SCI8(6,22),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8}},
+{"e_subfic.",	SCI8(6,23),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8}},
+{"e_andi",	SCI8(6,24),	SCI8_MASK,	PPCVLE,	0,		{RA, RS, SCLSCI8}},
+{"e_andi.",	SCI8(6,25),	SCI8_MASK,	PPCVLE,	0,		{RA, RS, SCLSCI8}},
+{"e_nop",	SCI8(6,26),	0xffffffff,	PPCVLE,	0,		{0}},
+{"e_ori",	SCI8(6,26),	SCI8_MASK,	PPCVLE,	0,		{RA, RS, SCLSCI8}},
+{"e_ori.",	SCI8(6,27),	SCI8_MASK,	PPCVLE,	0,		{RA, RS, SCLSCI8}},
+{"e_xori",	SCI8(6,28),	SCI8_MASK,	PPCVLE,	0,		{RA, RS, SCLSCI8}},
+{"e_xori.",	SCI8(6,29),	SCI8_MASK,	PPCVLE,	0,		{RA, RS, SCLSCI8}},
+{"e_lbzu",	OPVUP(6,0),	OPVUP_MASK,	PPCVLE,	0,		{RT, D8, RA0}},
+{"e_lhau",	OPVUP(6,3),	OPVUP_MASK,	PPCVLE,	0,		{RT, D8, RA0}},
+{"e_lhzu",	OPVUP(6,1),	OPVUP_MASK,	PPCVLE,	0,		{RT, D8, RA0}},
+{"e_lmw",	OPVUP(6,8),	OPVUP_MASK,	PPCVLE,	0,		{RT, D8, RA0}},
+{"e_lwzu",	OPVUP(6,2),	OPVUP_MASK,	PPCVLE,	0,		{RT, D8, RA0}},
+{"e_stbu",	OPVUP(6,4),	OPVUP_MASK,	PPCVLE,	0,		{RT, D8, RA0}},
+{"e_sthu",	OPVUP(6,5),	OPVUP_MASK,	PPCVLE,	0,		{RT, D8, RA0}},
+{"e_stwu",	OPVUP(6,6),	OPVUP_MASK,	PPCVLE,	0,		{RT, D8, RA0}},
+{"e_stmw",	OPVUP(6,9),	OPVUP_MASK,	PPCVLE,	0,		{RT, D8, RA0}},
+{"e_ldmvgprw",	OPVUPRT(6,16,0),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_stmvgprw",	OPVUPRT(6,17,0),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_ldmvsprw",	OPVUPRT(6,16,1),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_stmvsprw",	OPVUPRT(6,17,1),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_ldmvsrrw",	OPVUPRT(6,16,4),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_stmvsrrw",	OPVUPRT(6,17,4),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_ldmvcsrrw",	OPVUPRT(6,16,5),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_stmvcsrrw",	OPVUPRT(6,17,5),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_ldmvdsrrw",	OPVUPRT(6,16,6),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_stmvdsrrw",	OPVUPRT(6,17,6),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_add16i",	OP(7),		OP_MASK,	PPCVLE,	0,		{RT, RA, SI}},
+{"e_la",	OP(7),		OP_MASK,	PPCVLE,	0,		{RT, D, RA0}},
+{"e_sub16i",	OP(7),		OP_MASK,	PPCVLE,	0,		{RT, RA, NSI}},
+
+{"se_addi",	SE_IM5(8,0),	SE_IM5_MASK,	PPCVLE,	0,		{RX, OIMM5}},
+{"se_cmpli",	SE_IM5(8,1),	SE_IM5_MASK,	PPCVLE,	0,		{RX, OIMM5}},
+{"se_subi",	SE_IM5(9,0),	SE_IM5_MASK,	PPCVLE,	0,		{RX, OIMM5}},
+{"se_subi.",	SE_IM5(9,1),	SE_IM5_MASK,	PPCVLE,	0,		{RX, OIMM5}},
+{"se_cmpi",	SE_IM5(10,1),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+{"se_bmaski",	SE_IM5(11,0),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+{"se_andi",	SE_IM5(11,1),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+
+{"e_lbz",	OP(12),		OP_MASK,	PPCVLE,	0,		{RT, D, RA0}},
+{"e_stb",	OP(13),		OP_MASK,	PPCVLE,	0,		{RT, D, RA0}},
+{"e_lha",	OP(14),		OP_MASK,	PPCVLE,	0,		{RT, D, RA0}},
+
+{"se_srw",	SE_RR(16,0),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_sraw",	SE_RR(16,1),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_slw",	SE_RR(16,2),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_nop",	SE_RR(17,0),	0xffff,		PPCVLE,	0,		{0}},
+{"se_or",	SE_RR(17,0),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_andc",	SE_RR(17,1),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_and",	SE_RR(17,2),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_and.",	SE_RR(17,3),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_li",	IM7(9),		IM7_MASK,	PPCVLE,	0,		{RX, UI7}},
+
+{"e_lwz",	OP(20),		OP_MASK,	PPCVLE,	0,		{RT, D, RA0}},
+{"e_stw",	OP(21),		OP_MASK,	PPCVLE,	0,		{RT, D, RA0}},
+{"e_lhz",	OP(22),		OP_MASK,	PPCVLE,	0,		{RT, D, RA0}},
+{"e_sth",	OP(23),		OP_MASK,	PPCVLE,	0,		{RT, D, RA0}},
+
+{"se_bclri",	SE_IM5(24,0),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+{"se_bgeni",	SE_IM5(24,1),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+{"se_bseti",	SE_IM5(25,0),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+{"se_btsti",	SE_IM5(25,1),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+{"se_srwi",	SE_IM5(26,0),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+{"se_srawi",	SE_IM5(26,1),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+{"se_slwi",	SE_IM5(27,0),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+
+{"e_lis",	I16L(28,28),	I16L_MASK,	PPCVLE,	0,		{RD, VLEUIMML}},
+{"e_and2is.",	I16L(28,29),	I16L_MASK,	PPCVLE,	0,		{RD, VLEUIMML}},
+{"e_or2is",	I16L(28,26),	I16L_MASK,	PPCVLE,	0,		{RD, VLEUIMML}},
+{"e_and2i.",	I16L(28,25),	I16L_MASK,	PPCVLE,	0,		{RD, VLEUIMML}},
+{"e_or2i",	I16L(28,24),	I16L_MASK,	PPCVLE,	0,		{RD, VLEUIMML}},
+{"e_cmphl16i",	IA16(28,23),	IA16_MASK,	PPCVLE,	0,		{RA, VLEUIMM}},
+{"e_cmph16i",	IA16(28,22),	IA16_MASK,	PPCVLE,	0,		{RA, VLESIMM}},
+{"e_cmpl16i",	I16A(28,21),	I16A_MASK,	PPCVLE,	0,		{RA, VLEUIMM}},
+{"e_mull2i",	I16A(28,20),	I16A_MASK,	PPCVLE,	0,		{RA, VLESIMM}},
+{"e_cmp16i",	IA16(28,19),	IA16_MASK,	PPCVLE,	0,		{RA, VLESIMM}},
+{"e_sub2is",	I16A(28,18),	I16A_MASK,	PPCVLE,	0,		{RA, VLENSIMM}},
+{"e_add2is",	I16A(28,18),	I16A_MASK,	PPCVLE,	0,		{RA, VLESIMM}},
+{"e_sub2i.",	I16A(28,17),	I16A_MASK,	PPCVLE,	0,		{RA, VLENSIMM}},
+{"e_add2i.",	I16A(28,17),	I16A_MASK,	PPCVLE,	0,		{RA, VLESIMM}},
+{"e_li",	LI20(28,0),	LI20_MASK,	PPCVLE,	0,		{RT, IMM20}},
+{"e_rlwimi",	M(29,0),	M_MASK,		PPCVLE,	0,		{RA, RS, SH, MB, ME}},
+{"e_rlwinm",	M(29,1),	M_MASK,		PPCVLE,	0,		{RA, RT, SH, MBE, ME}},
+{"e_b",		BD24(30,0,0),	BD24_MASK,	PPCVLE,	0,		{B24}},
+{"e_bl",	BD24(30,0,1),	BD24_MASK,	PPCVLE,	0,		{B24}},
+{"e_bdnz",	EBD15(30,8,BO32DNZ,0),	EBD15_MASK, PPCVLE, 0,		{B15}},
+{"e_bdnzl",	EBD15(30,8,BO32DNZ,1),	EBD15_MASK, PPCVLE, 0,		{B15}},
+{"e_bdz",	EBD15(30,8,BO32DZ,0),	EBD15_MASK, PPCVLE, 0,		{B15}},
+{"e_bdzl",	EBD15(30,8,BO32DZ,1),	EBD15_MASK, PPCVLE, 0,		{B15}},
+{"e_bge",	EBD15BI(30,8,BO32F,CBLT,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bgel",	EBD15BI(30,8,BO32F,CBLT,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bnl",	EBD15BI(30,8,BO32F,CBLT,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bnll",	EBD15BI(30,8,BO32F,CBLT,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_blt",	EBD15BI(30,8,BO32T,CBLT,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bltl",	EBD15BI(30,8,BO32T,CBLT,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bgt",	EBD15BI(30,8,BO32T,CBGT,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bgtl",	EBD15BI(30,8,BO32T,CBGT,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_ble",	EBD15BI(30,8,BO32F,CBGT,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_blel",	EBD15BI(30,8,BO32F,CBGT,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bng",	EBD15BI(30,8,BO32F,CBGT,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bngl",	EBD15BI(30,8,BO32F,CBGT,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bne",	EBD15BI(30,8,BO32F,CBEQ,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bnel",	EBD15BI(30,8,BO32F,CBEQ,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_beq",	EBD15BI(30,8,BO32T,CBEQ,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_beql",	EBD15BI(30,8,BO32T,CBEQ,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bso",	EBD15BI(30,8,BO32T,CBSO,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bsol",	EBD15BI(30,8,BO32T,CBSO,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bun",	EBD15BI(30,8,BO32T,CBSO,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bunl",	EBD15BI(30,8,BO32T,CBSO,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bns",	EBD15BI(30,8,BO32F,CBSO,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bnsl",	EBD15BI(30,8,BO32F,CBSO,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bnu",	EBD15BI(30,8,BO32F,CBSO,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bnul",	EBD15BI(30,8,BO32F,CBSO,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bc",	BD15(30,8,0),	BD15_MASK,	PPCVLE,	0,		{BO32, BI32, B15}},
+{"e_bcl",	BD15(30,8,1),	BD15_MASK,	PPCVLE,	0,		{BO32, BI32, B15}},
+
+{"e_bf",	EBD15(30,8,BO32F,0), EBD15_MASK, PPCVLE, 0,		{BI32,B15}},
+{"e_bfl",	EBD15(30,8,BO32F,1), EBD15_MASK, PPCVLE, 0,		{BI32,B15}},
+{"e_bt",	EBD15(30,8,BO32T,0), EBD15_MASK, PPCVLE, 0,		{BI32,B15}},
+{"e_btl",	EBD15(30,8,BO32T,1), EBD15_MASK, PPCVLE, 0,		{BI32,B15}},
+
+{"e_cmph",	X(31,14),	X_MASK,		PPCVLE,	0,		{CRD, RA, RB}},
+{"e_cmphl",	X(31,46),	X_MASK,		PPCVLE,	0,		{CRD, RA, RB}},
+{"e_crandc",	XL(31,129),	XL_MASK,	PPCVLE,	0,		{BT, BA, BB}},
+{"e_crnand",	XL(31,225),	XL_MASK,	PPCVLE,	0,		{BT, BA, BB}},
+{"e_crnot",	XL(31,33),	XL_MASK,	PPCVLE,	0,		{BT, BA, BBA}},
+{"e_crnor",	XL(31,33),	XL_MASK,	PPCVLE,	0,		{BT, BA, BB}},
+{"e_crclr",	XL(31,193),	XL_MASK,	PPCVLE,	0,		{BT, BAT, BBA}},
+{"e_crxor",	XL(31,193),	XL_MASK,	PPCVLE,	0,		{BT, BA, BB}},
+{"e_mcrf",	XL(31,16),	XL_MASK,	PPCVLE,	0,		{CRD, CR}},
+{"e_slwi",	EX(31,112),	EX_MASK,	PPCVLE,	0,		{RA, RS, SH}},
+{"e_slwi.",	EX(31,113),	EX_MASK,	PPCVLE,	0,		{RA, RS, SH}},
+
+{"e_crand",	XL(31,257),	XL_MASK,	PPCVLE,	0,		{BT, BA, BB}},
+
+{"e_rlw",	EX(31,560),	EX_MASK,	PPCVLE,	0,		{RA, RS, RB}},
+{"e_rlw.",	EX(31,561),	EX_MASK,	PPCVLE,	0,		{RA, RS, RB}},
+
+{"e_crset",	XL(31,289),	XL_MASK,	PPCVLE,	0,		{BT, BAT, BBA}},
+{"e_creqv",	XL(31,289),	XL_MASK,	PPCVLE,	0,		{BT, BA, BB}},
+
+{"e_rlwi",	EX(31,624),	EX_MASK,	PPCVLE,	0,		{RA, RS, SH}},
+{"e_rlwi.",	EX(31,625),	EX_MASK,	PPCVLE,	0,		{RA, RS, SH}},
+
+{"e_crorc",	XL(31,417),	XL_MASK,	PPCVLE,	0,		{BT, BA, BB}},
+
+{"e_crmove",	XL(31,449),	XL_MASK,	PPCVLE,	0,		{BT, BA, BBA}},
+{"e_cror",	XL(31,449),	XL_MASK,	PPCVLE,	0,		{BT, BA, BB}},
+
+{"mtmas1",	XSPR(31,467,625), XSPR_MASK,	PPCVLE,	0,		{RS}},
+
+{"e_srwi",	EX(31,1136),	EX_MASK,	PPCVLE,	0,		{RA, RS, SH}},
+{"e_srwi.",	EX(31,1137),	EX_MASK,	PPCVLE,	0,		{RA, RS, SH}},
+
+{"se_lbz",	SD4(8),		SD4_MASK,	PPCVLE,	0,		{RZ, SE_SD, RX}},
+
+{"se_stb",	SD4(9),		SD4_MASK,	PPCVLE,	0,		{RZ, SE_SD, RX}},
+
+{"se_lhz",	SD4(10),	SD4_MASK,	PPCVLE,	0,		{RZ, SE_SDH, RX}},
+
+{"se_sth",	SD4(11),	SD4_MASK,	PPCVLE,	0,		{RZ, SE_SDH, RX}},
+
+{"se_lwz",	SD4(12),	SD4_MASK,	PPCVLE,	0,		{RZ, SE_SDW, RX}},
+
+{"se_stw",	SD4(13),	SD4_MASK,	PPCVLE,	0,		{RZ, SE_SDW, RX}},
+
+{"se_bge",	EBD8IO(28,0,0),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bnl",	EBD8IO(28,0,0),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_ble",	EBD8IO(28,0,1),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bng",	EBD8IO(28,0,1),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bne",	EBD8IO(28,0,2),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bns",	EBD8IO(28,0,3),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bnu",	EBD8IO(28,0,3),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bf",	EBD8IO(28,0,0),	EBD8IO2_MASK,	PPCVLE,	0,		{BI16, B8}},
+{"se_blt",	EBD8IO(28,1,0),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bgt",	EBD8IO(28,1,1),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_beq",	EBD8IO(28,1,2),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bso",	EBD8IO(28,1,3),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bun",	EBD8IO(28,1,3),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bt",	EBD8IO(28,1,0),	EBD8IO2_MASK,	PPCVLE,	0,		{BI16, B8}},
+{"se_bc",	BD8IO(28),	BD8IO_MASK,	PPCVLE,	0,		{BO16, BI16, B8}},
+{"se_b",	BD8(58,0,0),	BD8_MASK,	PPCVLE,	0,		{B8}},
+{"se_bl",	BD8(58,0,1),	BD8_MASK,	PPCVLE,	0,		{B8}},
+};
+
+const int vle_num_opcodes =
+  sizeof (vle_opcodes) / sizeof (vle_opcodes[0]);
+
+/* The macro table.  This is only used by the assembler.  */
+
+/* The expressions of the form (-x ! 31) & (x | 31) have the value 0
+   when x=0; 32-x when x is between 1 and 31; are negative if x is
+   negative; and are 32 or more otherwise.  This is what you want
+   when, for instance, you are emulating a right shift by a
+   rotate-left-and-mask, because the underlying instructions support
+   shifts of size 0 but not shifts of size 32.  By comparison, when
+   extracting x bits from some word you want to use just 32-x, because
+   the underlying instructions don't support extracting 0 bits but do
+   support extracting the whole word (32 bits in this case).  */
+
+const struct powerpc_macro powerpc_macros[] = {
+{"extldi",   4,	PPC64,	"rldicr %0,%1,%3,(%2)-1"},
+{"extldi.",  4,	PPC64,	"rldicr. %0,%1,%3,(%2)-1"},
+{"extrdi",   4,	PPC64,	"rldicl %0,%1,((%2)+(%3))&((%2)+(%3)<>64),64-(%2)"},
+{"extrdi.",  4,	PPC64,	"rldicl. %0,%1,((%2)+(%3))&((%2)+(%3)<>64),64-(%2)"},
+{"insrdi",   4,	PPC64,	"rldimi %0,%1,64-((%2)+(%3)),%3"},
+{"insrdi.",  4,	PPC64,	"rldimi. %0,%1,64-((%2)+(%3)),%3"},
+{"rotrdi",   3,	PPC64,	"rldicl %0,%1,(-(%2)!63)&((%2)|63),0"},
+{"rotrdi.",  3,	PPC64,	"rldicl. %0,%1,(-(%2)!63)&((%2)|63),0"},
+{"sldi",     3,	PPC64,	"rldicr %0,%1,%2,63-(%2)"},
+{"sldi.",    3,	PPC64,	"rldicr. %0,%1,%2,63-(%2)"},
+{"srdi",     3,	PPC64,	"rldicl %0,%1,(-(%2)!63)&((%2)|63),%2"},
+{"srdi.",    3,	PPC64,	"rldicl. %0,%1,(-(%2)!63)&((%2)|63),%2"},
+{"clrrdi",   3,	PPC64,	"rldicr %0,%1,0,63-(%2)"},
+{"clrrdi.",  3,	PPC64,	"rldicr. %0,%1,0,63-(%2)"},
+{"clrlsldi", 4,	PPC64,	"rldic %0,%1,%3,(%2)-(%3)"},
+{"clrlsldi.",4,	PPC64,	"rldic. %0,%1,%3,(%2)-(%3)"},
+
+{"extlwi",   4,	PPCCOM,	"rlwinm %0,%1,%3,0,(%2)-1"},
+{"extlwi.",  4,	PPCCOM,	"rlwinm. %0,%1,%3,0,(%2)-1"},
+{"extrwi",   4,	PPCCOM,	"rlwinm %0,%1,((%2)+(%3))&((%2)+(%3)<>32),32-(%2),31"},
+{"extrwi.",  4,	PPCCOM,	"rlwinm. %0,%1,((%2)+(%3))&((%2)+(%3)<>32),32-(%2),31"},
+{"inslwi",   4,	PPCCOM,	"rlwimi %0,%1,(-(%3)!31)&((%3)|31),%3,(%2)+(%3)-1"},
+{"inslwi.",  4,	PPCCOM,	"rlwimi. %0,%1,(-(%3)!31)&((%3)|31),%3,(%2)+(%3)-1"},
+{"insrwi",   4,	PPCCOM,	"rlwimi %0,%1,32-((%2)+(%3)),%3,(%2)+(%3)-1"},
+{"insrwi.",  4,	PPCCOM,	"rlwimi. %0,%1,32-((%2)+(%3)),%3,(%2)+(%3)-1"},
+{"rotrwi",   3,	PPCCOM,	"rlwinm %0,%1,(-(%2)!31)&((%2)|31),0,31"},
+{"rotrwi.",  3,	PPCCOM,	"rlwinm. %0,%1,(-(%2)!31)&((%2)|31),0,31"},
+{"slwi",     3,	PPCCOM,	"rlwinm %0,%1,%2,0,31-(%2)"},
+{"sli",      3,	PWRCOM,	"rlinm %0,%1,%2,0,31-(%2)"},
+{"slwi.",    3,	PPCCOM,	"rlwinm. %0,%1,%2,0,31-(%2)"},
+{"sli.",     3,	PWRCOM,	"rlinm. %0,%1,%2,0,31-(%2)"},
+{"srwi",     3,	PPCCOM,	"rlwinm %0,%1,(-(%2)!31)&((%2)|31),%2,31"},
+{"sri",      3,	PWRCOM,	"rlinm %0,%1,(-(%2)!31)&((%2)|31),%2,31"},
+{"srwi.",    3,	PPCCOM,	"rlwinm. %0,%1,(-(%2)!31)&((%2)|31),%2,31"},
+{"sri.",     3,	PWRCOM,	"rlinm. %0,%1,(-(%2)!31)&((%2)|31),%2,31"},
+{"clrrwi",   3,	PPCCOM,	"rlwinm %0,%1,0,0,31-(%2)"},
+{"clrrwi.",  3,	PPCCOM,	"rlwinm. %0,%1,0,0,31-(%2)"},
+{"clrlslwi", 4,	PPCCOM,	"rlwinm %0,%1,%3,(%2)-(%3),31-(%3)"},
+{"clrlslwi.",4, PPCCOM,	"rlwinm. %0,%1,%3,(%2)-(%3),31-(%3)"},
+
+{"e_extlwi", 4,	PPCVLE, "e_rlwinm %0,%1,%3,0,(%2)-1"},
+{"e_extrwi", 4,	PPCVLE, "e_rlwinm %0,%1,((%2)+(%3))&((%2)+(%3)<>32),32-(%2),31"},
+{"e_inslwi", 4,	PPCVLE, "e_rlwimi %0,%1,(-(%3)!31)&((%3)|31),%3,(%2)+(%3)-1"},
+{"e_insrwi", 4,	PPCVLE, "e_rlwimi %0,%1,32-((%2)+(%3)),%3,(%2)+(%3)-1"},
+{"e_rotlwi", 3,	PPCVLE, "e_rlwinm %0,%1,%2,0,31"},
+{"e_rotrwi", 3,	PPCVLE, "e_rlwinm %0,%1,(-(%2)!31)&((%2)|31),0,31"},
+{"e_slwi",   3,	PPCVLE, "e_rlwinm %0,%1,%2,0,31-(%2)"},
+{"e_srwi",   3,	PPCVLE, "e_rlwinm %0,%1,(-(%2)!31)&((%2)|31),%2,31"},
+{"e_clrlwi", 3,	PPCVLE, "e_rlwinm %0,%1,0,%2,31"},
+{"e_clrrwi", 3,	PPCVLE, "e_rlwinm %0,%1,0,0,31-(%2)"},
+{"e_clrlslwi",4, PPCVLE, "e_rlwinm %0,%1,%3,(%2)-(%3),31-(%3)"},
+};
+
+const int powerpc_num_macros =
+  sizeof (powerpc_macros) / sizeof (powerpc_macros[0]);
diff --git a/arch/powerpc/xmon/ppc.h b/arch/powerpc/xmon/ppc.h
new file mode 100644
index 0000000000..1d98b8dd13
--- /dev/null
+++ b/arch/powerpc/xmon/ppc.h
@@ -0,0 +1,450 @@
+/* ppc.h -- Header file for PowerPC opcode table
+   Copyright (C) 1994-2016 Free Software Foundation, Inc.
+   Written by Ian Lance Taylor, Cygnus Support
+
+This file is part of GDB, GAS, and the GNU binutils.
+
+GDB, GAS, and the GNU binutils are free software; you can redistribute
+them and/or modify them under the terms of the GNU General Public
+License as published by the Free Software Foundation; either version
+1, or (at your option) any later version.
+
+GDB, GAS, and the GNU binutils are distributed in the hope that they
+will be useful, but WITHOUT ANY WARRANTY; without even the implied
+warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this file; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.  */
+
+#ifndef PPC_H
+#define PPC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef uint64_t ppc_cpu_t;
+
+/* The opcode table is an array of struct powerpc_opcode.  */
+
+struct powerpc_opcode
+{
+  /* The opcode name.  */
+  const char *name;
+
+  /* The opcode itself.  Those bits which will be filled in with
+     operands are zeroes.  */
+  unsigned long opcode;
+
+  /* The opcode mask.  This is used by the disassembler.  This is a
+     mask containing ones indicating those bits which must match the
+     opcode field, and zeroes indicating those bits which need not
+     match (and are presumably filled in by operands).  */
+  unsigned long mask;
+
+  /* One bit flags for the opcode.  These are used to indicate which
+     specific processors support the instructions.  The defined values
+     are listed below.  */
+  ppc_cpu_t flags;
+
+  /* One bit flags for the opcode.  These are used to indicate which
+     specific processors no longer support the instructions.  The defined
+     values are listed below.  */
+  ppc_cpu_t deprecated;
+
+  /* An array of operand codes.  Each code is an index into the
+     operand table.  They appear in the order which the operands must
+     appear in assembly code, and are terminated by a zero.  */
+  unsigned char operands[8];
+};
+
+/* The table itself is sorted by major opcode number, and is otherwise
+   in the order in which the disassembler should consider
+   instructions.  */
+extern const struct powerpc_opcode powerpc_opcodes[];
+extern const int powerpc_num_opcodes;
+extern const struct powerpc_opcode vle_opcodes[];
+extern const int vle_num_opcodes;
+
+/* Values defined for the flags field of a struct powerpc_opcode.  */
+
+/* Opcode is defined for the PowerPC architecture.  */
+#define PPC_OPCODE_PPC			 1
+
+/* Opcode is defined for the POWER (RS/6000) architecture.  */
+#define PPC_OPCODE_POWER		 2
+
+/* Opcode is defined for the POWER2 (Rios 2) architecture.  */
+#define PPC_OPCODE_POWER2		 4
+
+/* Opcode is supported by the Motorola PowerPC 601 processor.  The 601
+   is assumed to support all PowerPC (PPC_OPCODE_PPC) instructions,
+   but it also supports many additional POWER instructions.  */
+#define PPC_OPCODE_601			 8
+
+/* Opcode is supported in both the Power and PowerPC architectures
+   (ie, compiler's -mcpu=common or assembler's -mcom).  More than just
+   the intersection of PPC_OPCODE_PPC with the union of PPC_OPCODE_POWER
+   and PPC_OPCODE_POWER2 because many instructions changed mnemonics
+   between POWER and POWERPC.  */
+#define PPC_OPCODE_COMMON	      0x10
+
+/* Opcode is supported for any Power or PowerPC platform (this is
+   for the assembler's -many option, and it eliminates duplicates).  */
+#define PPC_OPCODE_ANY		      0x20
+
+/* Opcode is only defined on 64 bit architectures.  */
+#define PPC_OPCODE_64		      0x40
+
+/* Opcode is supported as part of the 64-bit bridge.  */
+#define PPC_OPCODE_64_BRIDGE	      0x80
+
+/* Opcode is supported by Altivec Vector Unit */
+#define PPC_OPCODE_ALTIVEC	     0x100
+
+/* Opcode is supported by PowerPC 403 processor.  */
+#define PPC_OPCODE_403		     0x200
+
+/* Opcode is supported by PowerPC BookE processor.  */
+#define PPC_OPCODE_BOOKE	     0x400
+
+/* Opcode is supported by PowerPC 440 processor.  */
+#define PPC_OPCODE_440		     0x800
+
+/* Opcode is only supported by Power4 architecture.  */
+#define PPC_OPCODE_POWER4	    0x1000
+
+/* Opcode is only supported by Power7 architecture.  */
+#define PPC_OPCODE_POWER7	    0x2000
+
+/* Opcode is only supported by e500x2 Core.  */
+#define PPC_OPCODE_SPE		    0x4000
+
+/* Opcode is supported by e500x2 Integer select APU.  */
+#define PPC_OPCODE_ISEL		    0x8000
+
+/* Opcode is an e500 SPE floating point instruction.  */
+#define PPC_OPCODE_EFS		   0x10000
+
+/* Opcode is supported by branch locking APU.  */
+#define PPC_OPCODE_BRLOCK	   0x20000
+
+/* Opcode is supported by performance monitor APU.  */
+#define PPC_OPCODE_PMR		   0x40000
+
+/* Opcode is supported by cache locking APU.  */
+#define PPC_OPCODE_CACHELCK	   0x80000
+
+/* Opcode is supported by machine check APU.  */
+#define PPC_OPCODE_RFMCI	  0x100000
+
+/* Opcode is only supported by Power5 architecture.  */
+#define PPC_OPCODE_POWER5	  0x200000
+
+/* Opcode is supported by PowerPC e300 family.  */
+#define PPC_OPCODE_E300           0x400000
+
+/* Opcode is only supported by Power6 architecture.  */
+#define PPC_OPCODE_POWER6	  0x800000
+
+/* Opcode is only supported by PowerPC Cell family.  */
+#define PPC_OPCODE_CELL		 0x1000000
+
+/* Opcode is supported by CPUs with paired singles support.  */
+#define PPC_OPCODE_PPCPS	 0x2000000
+
+/* Opcode is supported by Power E500MC */
+#define PPC_OPCODE_E500MC        0x4000000
+
+/* Opcode is supported by PowerPC 405 processor.  */
+#define PPC_OPCODE_405		 0x8000000
+
+/* Opcode is supported by Vector-Scalar (VSX) Unit */
+#define PPC_OPCODE_VSX		0x10000000
+
+/* Opcode is supported by A2.  */
+#define PPC_OPCODE_A2	 	0x20000000
+
+/* Opcode is supported by PowerPC 476 processor.  */
+#define PPC_OPCODE_476		0x40000000
+
+/* Opcode is supported by AppliedMicro Titan core */
+#define PPC_OPCODE_TITAN        0x80000000
+
+/* Opcode which is supported by the e500 family */
+#define PPC_OPCODE_E500	       0x100000000ull
+
+/* Opcode is supported by Extended Altivec Vector Unit */
+#define PPC_OPCODE_ALTIVEC2    0x200000000ull
+
+/* Opcode is supported by Power E6500 */
+#define PPC_OPCODE_E6500       0x400000000ull
+
+/* Opcode is supported by Thread management APU */
+#define PPC_OPCODE_TMR         0x800000000ull
+
+/* Opcode which is supported by the VLE extension.  */
+#define PPC_OPCODE_VLE	      0x1000000000ull
+
+/* Opcode is only supported by Power8 architecture.  */
+#define PPC_OPCODE_POWER8     0x2000000000ull
+
+/* Opcode which is supported by the Hardware Transactional Memory extension.  */
+/* Currently, this is the same as the POWER8 mask.  If another cpu comes out
+   that isn't a superset of POWER8, we can define this to its own mask.  */
+#define PPC_OPCODE_HTM        PPC_OPCODE_POWER8
+
+/* Opcode is supported by ppc750cl.  */
+#define PPC_OPCODE_750	      0x4000000000ull
+
+/* Opcode is supported by ppc7450.  */
+#define PPC_OPCODE_7450	      0x8000000000ull
+
+/* Opcode is supported by ppc821/850/860.  */
+#define PPC_OPCODE_860	      0x10000000000ull
+
+/* Opcode is only supported by Power9 architecture.  */
+#define PPC_OPCODE_POWER9     0x20000000000ull
+
+/* Opcode is supported by Vector-Scalar (VSX) Unit from ISA 2.08.  */
+#define PPC_OPCODE_VSX3       0x40000000000ull
+
+  /* Opcode is supported by e200z4.  */
+#define PPC_OPCODE_E200Z4     0x80000000000ull
+
+/* A macro to extract the major opcode from an instruction.  */
+#define PPC_OP(i) (((i) >> 26) & 0x3f)
+
+/* A macro to determine if the instruction is a 2-byte VLE insn.  */
+#define PPC_OP_SE_VLE(m) ((m) <= 0xffff)
+
+/* A macro to extract the major opcode from a VLE instruction.  */
+#define VLE_OP(i,m) (((i) >> ((m) <= 0xffff ? 10 : 26)) & 0x3f)
+
+/* A macro to convert a VLE opcode to a VLE opcode segment.  */
+#define VLE_OP_TO_SEG(i) ((i) >> 1)
+
+/* The operands table is an array of struct powerpc_operand.  */
+
+struct powerpc_operand
+{
+  /* A bitmask of bits in the operand.  */
+  unsigned int bitm;
+
+  /* The shift operation to be applied to the operand.  No shift
+     is made if this is zero.  For positive values, the operand
+     is shifted left by SHIFT.  For negative values, the operand
+     is shifted right by -SHIFT.  Use PPC_OPSHIFT_INV to indicate
+     that BITM and SHIFT cannot be used to determine where the
+     operand goes in the insn.  */
+  int shift;
+
+  /* Insertion function.  This is used by the assembler.  To insert an
+     operand value into an instruction, check this field.
+
+     If it is NULL, execute
+	 if (o->shift >= 0)
+	   i |= (op & o->bitm) << o->shift;
+	 else
+	   i |= (op & o->bitm) >> -o->shift;
+     (i is the instruction which we are filling in, o is a pointer to
+     this structure, and op is the operand value).
+
+     If this field is not NULL, then simply call it with the
+     instruction and the operand value.  It will return the new value
+     of the instruction.  If the ERRMSG argument is not NULL, then if
+     the operand value is illegal, *ERRMSG will be set to a warning
+     string (the operand will be inserted in any case).  If the
+     operand value is legal, *ERRMSG will be unchanged (most operands
+     can accept any value).  */
+  unsigned long (*insert)
+    (unsigned long instruction, long op, ppc_cpu_t dialect, const char **errmsg);
+
+  /* Extraction function.  This is used by the disassembler.  To
+     extract this operand type from an instruction, check this field.
+
+     If it is NULL, compute
+	 if (o->shift >= 0)
+	   op = (i >> o->shift) & o->bitm;
+	 else
+	   op = (i << -o->shift) & o->bitm;
+	 if ((o->flags & PPC_OPERAND_SIGNED) != 0)
+	   sign_extend (op);
+     (i is the instruction, o is a pointer to this structure, and op
+     is the result).
+
+     If this field is not NULL, then simply call it with the
+     instruction value.  It will return the value of the operand.  If
+     the INVALID argument is not NULL, *INVALID will be set to
+     non-zero if this operand type can not actually be extracted from
+     this operand (i.e., the instruction does not match).  If the
+     operand is valid, *INVALID will not be changed.  */
+  long (*extract) (unsigned long instruction, ppc_cpu_t dialect, int *invalid);
+
+  /* One bit syntax flags.  */
+  unsigned long flags;
+};
+
+/* Elements in the table are retrieved by indexing with values from
+   the operands field of the powerpc_opcodes table.  */
+
+extern const struct powerpc_operand powerpc_operands[];
+extern const unsigned int num_powerpc_operands;
+
+/* Use with the shift field of a struct powerpc_operand to indicate
+     that BITM and SHIFT cannot be used to determine where the operand
+     goes in the insn.  */
+#define PPC_OPSHIFT_INV (-1U << 31)
+
+/* Values defined for the flags field of a struct powerpc_operand.  */
+
+/* This operand takes signed values.  */
+#define PPC_OPERAND_SIGNED (0x1)
+
+/* This operand takes signed values, but also accepts a full positive
+   range of values when running in 32 bit mode.  That is, if bits is
+   16, it takes any value from -0x8000 to 0xffff.  In 64 bit mode,
+   this flag is ignored.  */
+#define PPC_OPERAND_SIGNOPT (0x2)
+
+/* This operand does not actually exist in the assembler input.  This
+   is used to support extended mnemonics such as mr, for which two
+   operands fields are identical.  The assembler should call the
+   insert function with any op value.  The disassembler should call
+   the extract function, ignore the return value, and check the value
+   placed in the valid argument.  */
+#define PPC_OPERAND_FAKE (0x4)
+
+/* The next operand should be wrapped in parentheses rather than
+   separated from this one by a comma.  This is used for the load and
+   store instructions which want their operands to look like
+       reg,displacement(reg)
+   */
+#define PPC_OPERAND_PARENS (0x8)
+
+/* This operand may use the symbolic names for the CR fields, which
+   are
+       lt  0	gt  1	eq  2	so  3	un  3
+       cr0 0	cr1 1	cr2 2	cr3 3
+       cr4 4	cr5 5	cr6 6	cr7 7
+   These may be combined arithmetically, as in cr2*4+gt.  These are
+   only supported on the PowerPC, not the POWER.  */
+#define PPC_OPERAND_CR_BIT (0x10)
+
+/* This operand names a register.  The disassembler uses this to print
+   register names with a leading 'r'.  */
+#define PPC_OPERAND_GPR (0x20)
+
+/* Like PPC_OPERAND_GPR, but don't print a leading 'r' for r0.  */
+#define PPC_OPERAND_GPR_0 (0x40)
+
+/* This operand names a floating point register.  The disassembler
+   prints these with a leading 'f'.  */
+#define PPC_OPERAND_FPR (0x80)
+
+/* This operand is a relative branch displacement.  The disassembler
+   prints these symbolically if possible.  */
+#define PPC_OPERAND_RELATIVE (0x100)
+
+/* This operand is an absolute branch address.  The disassembler
+   prints these symbolically if possible.  */
+#define PPC_OPERAND_ABSOLUTE (0x200)
+
+/* This operand is optional, and is zero if omitted.  This is used for
+   example, in the optional BF field in the comparison instructions.  The
+   assembler must count the number of operands remaining on the line,
+   and the number of operands remaining for the opcode, and decide
+   whether this operand is present or not.  The disassembler should
+   print this operand out only if it is not zero.  */
+#define PPC_OPERAND_OPTIONAL (0x400)
+
+/* This flag is only used with PPC_OPERAND_OPTIONAL.  If this operand
+   is omitted, then for the next operand use this operand value plus
+   1, ignoring the next operand field for the opcode.  This wretched
+   hack is needed because the Power rotate instructions can take
+   either 4 or 5 operands.  The disassembler should print this operand
+   out regardless of the PPC_OPERAND_OPTIONAL field.  */
+#define PPC_OPERAND_NEXT (0x800)
+
+/* This operand should be regarded as a negative number for the
+   purposes of overflow checking (i.e., the normal most negative
+   number is disallowed and one more than the normal most positive
+   number is allowed).  This flag will only be set for a signed
+   operand.  */
+#define PPC_OPERAND_NEGATIVE (0x1000)
+
+/* This operand names a vector unit register.  The disassembler
+   prints these with a leading 'v'.  */
+#define PPC_OPERAND_VR (0x2000)
+
+/* This operand is for the DS field in a DS form instruction.  */
+#define PPC_OPERAND_DS (0x4000)
+
+/* This operand is for the DQ field in a DQ form instruction.  */
+#define PPC_OPERAND_DQ (0x8000)
+
+/* Valid range of operand is 0..n rather than 0..n-1.  */
+#define PPC_OPERAND_PLUS1 (0x10000)
+
+/* Xilinx APU and FSL related operands */
+#define PPC_OPERAND_FSL (0x20000)
+#define PPC_OPERAND_FCR (0x40000)
+#define PPC_OPERAND_UDI (0x80000)
+
+/* This operand names a vector-scalar unit register.  The disassembler
+   prints these with a leading 'vs'.  */
+#define PPC_OPERAND_VSR (0x100000)
+
+/* This is a CR FIELD that does not use symbolic names.  */
+#define PPC_OPERAND_CR_REG (0x200000)
+
+/* This flag is only used with PPC_OPERAND_OPTIONAL.  If this operand
+   is omitted, then the value it should use for the operand is stored
+   in the SHIFT field of the immediatly following operand field.  */
+#define PPC_OPERAND_OPTIONAL_VALUE (0x400000)
+
+/* This flag is only used with PPC_OPERAND_OPTIONAL.  The operand is
+   only optional when generating 32-bit code.  */
+#define PPC_OPERAND_OPTIONAL32 (0x800000)
+
+/* The POWER and PowerPC assemblers use a few macros.  We keep them
+   with the operands table for simplicity.  The macro table is an
+   array of struct powerpc_macro.  */
+
+struct powerpc_macro
+{
+  /* The macro name.  */
+  const char *name;
+
+  /* The number of operands the macro takes.  */
+  unsigned int operands;
+
+  /* One bit flags for the opcode.  These are used to indicate which
+     specific processors support the instructions.  The values are the
+     same as those for the struct powerpc_opcode flags field.  */
+  ppc_cpu_t flags;
+
+  /* A format string to turn the macro into a normal instruction.
+     Each %N in the string is replaced with operand number N (zero
+     based).  */
+  const char *format;
+};
+
+extern const struct powerpc_macro powerpc_macros[];
+extern const int powerpc_num_macros;
+
+static inline long
+ppc_optional_operand_value (const struct powerpc_operand *operand)
+{
+  if ((operand->flags & PPC_OPERAND_OPTIONAL_VALUE) != 0)
+    return (operand+1)->shift;
+  return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PPC_H */
diff --git a/arch/powerpc/xmon/spr_access.S b/arch/powerpc/xmon/spr_access.S
new file mode 100644
index 0000000000..c308ddf268
--- /dev/null
+++ b/arch/powerpc/xmon/spr_access.S
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/ppc_asm.h>
+#include <asm/asm-compat.h>
+
+/* unsigned long xmon_mfspr(sprn, default_value) */
+_GLOBAL(xmon_mfspr)
+	LOAD_REG_ADDR(r5, .Lmfspr_table)
+	b	xmon_mxspr
+
+/* void xmon_mtspr(sprn, new_value) */
+_GLOBAL(xmon_mtspr)
+	LOAD_REG_ADDR(r5, .Lmtspr_table)
+	b	xmon_mxspr
+
+/*
+ * r3 = sprn
+ * r4 = default or new value
+ * r5 = table base
+ */
+xmon_mxspr:
+	/*
+	 * To index into the table of mxsprs we need:
+	 *  i = (sprn & 0x3ff) * 8
+	 * or using rwlinm:
+	 *  i = (sprn << 3) & (0x3ff << 3)
+	 */
+	rlwinm	r3, r3, 3, 0x3ff << 3
+	add	r5, r5, r3
+	mtctr	r5
+	mr	r3, r4 /* put default_value in r3 for mfspr */
+	bctr
+
+.Lmfspr_table:
+	spr = 0
+	.rept	1024
+	mfspr	r3, spr
+	blr
+	spr = spr + 1
+	.endr
+
+.Lmtspr_table:
+	spr = 0
+	.rept	1024
+	mtspr	spr, r4
+	blr
+	spr = spr + 1
+	.endr
diff --git a/arch/powerpc/xmon/spu-dis.c b/arch/powerpc/xmon/spu-dis.c
new file mode 100644
index 0000000000..4b0a4e640f
--- /dev/null
+++ b/arch/powerpc/xmon/spu-dis.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Disassemble SPU instructions
+
+   Copyright 2006 Free Software Foundation, Inc.
+
+   This file is part of GDB, GAS, and the GNU binutils.
+
+ */
+
+#include <linux/string.h>
+#include "nonstdio.h"
+#include "ansidecl.h"
+#include "spu.h"
+#include "dis-asm.h"
+
+/* This file provides a disassembler function which uses
+   the disassembler interface defined in dis-asm.h.   */
+
+extern const struct spu_opcode spu_opcodes[];
+extern const int spu_num_opcodes;
+
+#define SPU_DISASM_TBL_SIZE (1 << 11)
+static const struct spu_opcode *spu_disassemble_table[SPU_DISASM_TBL_SIZE];
+
+static void
+init_spu_disassemble (void)
+{
+  int i;
+
+  /* If two instructions have the same opcode then we prefer the first
+   * one.  In most cases it is just an alternate mnemonic. */
+  for (i = 0; i < spu_num_opcodes; i++)
+    {
+      int o = spu_opcodes[i].opcode;
+      if (o >= SPU_DISASM_TBL_SIZE)
+	continue; /* abort (); */
+      if (spu_disassemble_table[o] == 0)
+	spu_disassemble_table[o] = &spu_opcodes[i];
+    }
+}
+
+/* Determine the instruction from the 10 least significant bits. */
+static const struct spu_opcode *
+get_index_for_opcode (unsigned int insn)
+{
+  const struct spu_opcode *index;
+  unsigned int opcode = insn >> (32-11);
+
+  /* Init the table.  This assumes that element 0/opcode 0 (currently
+   * NOP) is always used */
+  if (spu_disassemble_table[0] == 0)
+    init_spu_disassemble ();
+
+  if ((index = spu_disassemble_table[opcode & 0x780]) != 0
+      && index->insn_type == RRR)
+    return index;
+
+  if ((index = spu_disassemble_table[opcode & 0x7f0]) != 0
+      && (index->insn_type == RI18 || index->insn_type == LBT))
+    return index;
+
+  if ((index = spu_disassemble_table[opcode & 0x7f8]) != 0
+      && index->insn_type == RI10)
+    return index;
+
+  if ((index = spu_disassemble_table[opcode & 0x7fc]) != 0
+      && (index->insn_type == RI16))
+    return index;
+
+  if ((index = spu_disassemble_table[opcode & 0x7fe]) != 0
+      && (index->insn_type == RI8))
+    return index;
+
+  if ((index = spu_disassemble_table[opcode & 0x7ff]) != 0)
+    return index;
+
+  return NULL;
+}
+
+/* Print a Spu instruction.  */
+
+int
+print_insn_spu (unsigned long insn, unsigned long memaddr)
+{
+  int value;
+  int hex_value;
+  const struct spu_opcode *index;
+  enum spu_insns tag;
+
+  index = get_index_for_opcode (insn);
+
+  if (index == 0)
+    {
+      printf(".long 0x%lx", insn);
+    }
+  else
+    {
+      int i;
+      int paren = 0;
+      tag = (enum spu_insns)(index - spu_opcodes);
+      printf("%s", index->mnemonic);
+      if (tag == M_BI || tag == M_BISL || tag == M_IRET || tag == M_BISLED
+	  || tag == M_BIHNZ || tag == M_BIHZ || tag == M_BINZ || tag == M_BIZ
+          || tag == M_SYNC || tag == M_HBR)
+	{
+	  int fb = (insn >> (32-18)) & 0x7f;
+	  if (fb & 0x40)
+	    printf(tag == M_SYNC ? "c" : "p");
+	  if (fb & 0x20)
+	    printf("d");
+	  if (fb & 0x10)
+	    printf("e");
+	}
+      if (index->arg[0] != 0)
+	printf("\t");
+      hex_value = 0;
+      for (i = 1;  i <= index->arg[0]; i++)
+	{
+	  int arg = index->arg[i];
+	  if (arg != A_P && !paren && i > 1)
+	    printf(",");
+
+	  switch (arg)
+	    {
+	    case A_T:
+	      printf("$%lu",
+				     DECODE_INSN_RT (insn));
+	      break;
+	    case A_A:
+	      printf("$%lu",
+				     DECODE_INSN_RA (insn));
+	      break;
+	    case A_B:
+	      printf("$%lu",
+				     DECODE_INSN_RB (insn));
+	      break;
+	    case A_C:
+	      printf("$%lu",
+				     DECODE_INSN_RC (insn));
+	      break;
+	    case A_S:
+	      printf("$sp%lu",
+				     DECODE_INSN_RA (insn));
+	      break;
+	    case A_H:
+	      printf("$ch%lu",
+				     DECODE_INSN_RA (insn));
+	      break;
+	    case A_P:
+	      paren++;
+	      printf("(");
+	      break;
+	    case A_U7A:
+	      printf("%lu",
+				     173 - DECODE_INSN_U8 (insn));
+	      break;
+	    case A_U7B:
+	      printf("%lu",
+				     155 - DECODE_INSN_U8 (insn));
+	      break;
+	    case A_S3:
+	    case A_S6:
+	    case A_S7:
+	    case A_S7N:
+	    case A_U3:
+	    case A_U5:
+	    case A_U6:
+	    case A_U7:
+	      hex_value = DECODE_INSN_I7 (insn);
+	      printf("%d", hex_value);
+	      break;
+	    case A_S11:
+	      print_address(memaddr + DECODE_INSN_I9a (insn) * 4);
+	      break;
+	    case A_S11I:
+	      print_address(memaddr + DECODE_INSN_I9b (insn) * 4);
+	      break;
+	    case A_S10:
+	    case A_S10B:
+	      hex_value = DECODE_INSN_I10 (insn);
+	      printf("%d", hex_value);
+	      break;
+	    case A_S14:
+	      hex_value = DECODE_INSN_I10 (insn) * 16;
+	      printf("%d", hex_value);
+	      break;
+	    case A_S16:
+	      hex_value = DECODE_INSN_I16 (insn);
+	      printf("%d", hex_value);
+	      break;
+	    case A_X16:
+	      hex_value = DECODE_INSN_U16 (insn);
+	      printf("%u", hex_value);
+	      break;
+	    case A_R18:
+	      value = DECODE_INSN_I16 (insn) * 4;
+	      if (value == 0)
+		printf("%d", value);
+	      else
+		{
+		  hex_value = memaddr + value;
+		  print_address(hex_value & 0x3ffff);
+		}
+	      break;
+	    case A_S18:
+	      value = DECODE_INSN_U16 (insn) * 4;
+	      if (value == 0)
+		printf("%d", value);
+	      else
+		print_address(value);
+	      break;
+	    case A_U18:
+	      value = DECODE_INSN_U18 (insn);
+	      if (value == 0 || 1)
+		{
+		  hex_value = value;
+		  printf("%u", value);
+		}
+	      else
+		print_address(value);
+	      break;
+	    case A_U14:
+	      hex_value = DECODE_INSN_U14 (insn);
+	      printf("%u", hex_value);
+	      break;
+	    }
+	  if (arg != A_P && paren)
+	    {
+	      printf(")");
+	      paren--;
+	    }
+	}
+      if (hex_value > 16)
+	printf("\t# %x", hex_value);
+    }
+  return 4;
+}
diff --git a/arch/powerpc/xmon/spu-insns.h b/arch/powerpc/xmon/spu-insns.h
new file mode 100644
index 0000000000..7e1126a199
--- /dev/null
+++ b/arch/powerpc/xmon/spu-insns.h
@@ -0,0 +1,399 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* SPU ELF support for BFD.
+
+   Copyright 2006 Free Software Foundation, Inc.
+
+   This file is part of BFD, the Binary File Descriptor library.
+
+ */
+
+/* SPU Opcode Table
+
+-=-=-= FORMAT =-=-=-
+                                             		                                             
+       +----+-------+-------+-------+-------+  		       +------------+-------+-------+-------+
+RRR    | op |  RC   |  RB   |  RA   |  RT   |		RI7    | op         |  I7   |  RA   |  RT   |
+       +----+-------+-------+-------+-------+		       +------------+-------+-------+-------+
+        0  3       1       1       2       3		        0          1       1       2       3 
+                   0       7       4       1		                   0       7       4       1 
+
+       +-----------+--------+-------+-------+		       +---------+----------+-------+-------+
+RI8    | op        |   I8   |  RA   |  RT   |		RI10   | op      |   I10    |  RA   |  RT   |
+       +-----------+--------+-------+-------+		       +---------+----------+-------+-------+
+        0         9        1       2       3		        0       7          1       2       3 
+                           7       4       1		                           7       4       1 
+
+       +----------+-----------------+-------+		       +--------+-------------------+-------+
+RI16   | op       |       I16       |  RT   |		RI18   | op     |       I18         |  RT   |
+       +----------+-----------------+-------+		       +--------+-------------------+-------+
+        0        8                 2       3		        0      6                   2       3 
+                                   4       1		                                   4       1 
+
+       +------------+-------+-------+-------+		       +-------+--+-----------------+-------+
+RR     | op         |  RB   |  RA   |  RT   |		LBT    | op    |RO|       I16       |  RO   |
+       +------------+-------+-------+-------+		       +-------+--+-----------------+-------+
+        0          1       1       2       3		        0     6  8                 2       3 
+                   0       7       4       1		                                   4       1 
+
+							       +------------+----+--+-------+-------+
+							LBTI   | op         | // |RO|  RA   |  RO   |
+							       +------------+----+--+-------+-------+
+							        0          1    1  1       2       3
+							                   0    5  7       4       1
+
+-=-=-= OPCODE =-=-=-
+
+OPCODE field specifies the most significant 11bit of the instruction. Some formats don't have 11bits for opcode field, and in this
+case, bit field other than op are defined as 0s. For example, opcode of fma instruction which is RRR format is defined as 0x700,
+since 0x700 -> 11'b11100000000, this means opcode is 4'b1110, and other 7bits are defined as 7'b0000000.
+
+-=-=-= ASM_FORMAT =-=-=-
+
+RRR category						RI7 category                               
+	ASM_RRR		mnemonic RC, RA, RB, RT		        ASM_RI4         mnemonic RT, RA, I4
+							        ASM_RI7         mnemonic RT, RA, I7
+
+RI8 category						RI10 category                               
+	ASM_RUI8	mnemonic RT, RA, UI8		        ASM_AI10        mnemonic RA, I10    
+							        ASM_RI10        mnemonic RT, RA, R10
+							        ASM_RI10IDX     mnemonic RT, I10(RA)
+
+RI16 category						RI18 category                           
+	ASM_I16W	mnemonic I16W			        ASM_RI18        mnemonic RT, I18
+	ASM_RI16	mnemonic RT, I16
+	ASM_RI16W	mnemonic RT, I16W
+
+RR category						LBT category                                    
+	ASM_MFSPR	mnemonic RT, SA			        ASM_LBT         mnemonic brinst, brtarg 
+	ASM_MTSPR	mnemonic SA, RT			                                                
+	ASM_NOOP	mnemonic			LBTI category                                   
+	ASM_RA		mnemonic RA			        ASM_LBTI        mnemonic brinst, RA     
+	ASM_RAB		mnemonic RA, RB
+	ASM_RDCH	mnemonic RT, CA
+	ASM_RR		mnemonic RT, RA, RB
+	ASM_RT		mnemonic RT
+	ASM_RTA		mnemonic RT, RA
+	ASM_WRCH	mnemonic CA, RT
+
+Note that RRR instructions have the names for RC and RT reversed from
+what's in the ISA, in order to put RT in the same position it appears
+for other formats.
+
+-=-=-= DEPENDENCY =-=-=-
+
+DEPENDENCY filed consists of 5 digits. This represents which register is used as source and which register is used as target.
+The first(most significant) digit is always 0. Then it is followd by RC, RB, RA and RT digits.
+If the digit is 0, this means the corresponding register is not used in the instruction.
+If the digit is 1, this means the corresponding register is used as a source in the instruction.
+If the digit is 2, this means the corresponding register is used as a target in the instruction.
+If the digit is 3, this means the corresponding register is used as both source and target in the instruction.
+For example, fms instruction has 00113 as the DEPENDENCY field. This means RC is not used in this operation, RB and RA are
+used as sources and RT is the target.
+
+-=-=-= PIPE =-=-=-
+
+This field shows which execution pipe is used for the instruction
+
+pipe0 execution pipelines:
+	FP6	SP floating pipeline
+	FP7	integer operations executed in SP floating pipeline
+	FPD	DP floating pipeline
+	FX2	FXU pipeline
+	FX3	Rotate/Shift pipeline
+	FXB	Byte pipeline
+	NOP	No pipeline
+
+pipe1 execution pipelines:
+	BR	Branch pipeline
+	LNOP	No pipeline
+	LS	Load/Store pipeline
+	SHUF	Shuffle pipeline
+	SPR	SPR/CH pipeline
+
+*/
+
+#define _A0() {0}
+#define _A1(a) {1,a}
+#define _A2(a,b) {2,a,b}
+#define _A3(a,b,c) {3,a,b,c}
+#define _A4(a,b,c,d) {4,a,b,c,d}
+
+/*    TAG		FORMAT	OPCODE	MNEMONIC	ASM_FORMAT	DEPENDENCY	PIPE	COMMENT				*/
+/*									0[RC][RB][RA][RT]					*/
+/*									1:src, 2:target						*/
+
+APUOP(M_BR,		RI16,	0x190,	"br",		_A1(A_R18),	00000,	BR)	/* BRel          IP<-IP+I16 */
+APUOP(M_BRSL,		RI16,	0x198,	"brsl",		_A2(A_T,A_R18),	00002,	BR)	/* BRelSetLink   RT,IP<-IP,IP+I16 */
+APUOP(M_BRA,		RI16,	0x180,	"bra",		_A1(A_S18),	00000,	BR)	/* BRAbs         IP<-I16 */
+APUOP(M_BRASL,		RI16,	0x188,	"brasl",	_A2(A_T,A_S18),	00002,	BR)	/* BRAbsSetLink  RT,IP<-IP,I16 */
+APUOP(M_FSMBI,		RI16,	0x194,	"fsmbi",	_A2(A_T,A_X16),	00002,	SHUF)	/* FormSelMask%I RT<-fsm(I16) */
+APUOP(M_LQA,		RI16,	0x184,	"lqa",		_A2(A_T,A_S18),	00002,	LS)	/* LoadQAbs      RT<-M[I16] */
+APUOP(M_LQR,		RI16,	0x19C,	"lqr",		_A2(A_T,A_R18),	00002,	LS)	/* LoadQRel      RT<-M[IP+I16] */
+APUOP(M_STOP,		RR,	0x000,	"stop",		_A0(),		00000,	BR)	/* STOP          stop */
+APUOP(M_STOP2,		RR,	0x000,	"stop",		_A1(A_U14),	00000,	BR)	/* STOP          stop */
+APUOP(M_STOPD,		RR,	0x140,	"stopd",	_A3(A_T,A_A,A_B),         00111,	BR)	/* STOPD         stop (with register dependencies) */
+APUOP(M_LNOP,		RR,	0x001,	"lnop",		_A0(),		00000,	LNOP)	/* LNOP          no_operation */
+APUOP(M_SYNC,		RR,	0x002,	"sync",		_A0(),		00000,	BR)	/* SYNC          flush_pipe */
+APUOP(M_DSYNC,		RR,	0x003,	"dsync",	_A0(),		00000,	BR)	/* DSYNC         flush_store_queue */
+APUOP(M_MFSPR,		RR,	0x00c,	"mfspr",	_A2(A_T,A_S),	00002,	SPR)	/* MFSPR         RT<-SA */
+APUOP(M_RDCH,		RR,	0x00d,	"rdch",		_A2(A_T,A_H),	00002,	SPR)	/* ReaDCHannel   RT<-CA:data */
+APUOP(M_RCHCNT,		RR,	0x00f,	"rchcnt",	_A2(A_T,A_H),	00002,	SPR)	/* ReaDCHanCouNT RT<-CA:count */
+APUOP(M_HBRA,		LBT,	0x080,	"hbra",		_A2(A_S11,A_S18),	00000,	LS)	/* HBRA          BTB[B9]<-M[I16] */
+APUOP(M_HBRR,		LBT,	0x090,	"hbrr",		_A2(A_S11,A_R18),	00000,	LS)	/* HBRR          BTB[B9]<-M[IP+I16] */
+APUOP(M_BRZ,		RI16,	0x100,	"brz",		_A2(A_T,A_R18),	00001,	BR)	/* BRZ           IP<-IP+I16_if(RT) */
+APUOP(M_BRNZ,		RI16,	0x108,	"brnz",		_A2(A_T,A_R18),	00001,	BR)	/* BRNZ          IP<-IP+I16_if(RT) */
+APUOP(M_BRHZ,		RI16,	0x110,	"brhz",		_A2(A_T,A_R18),	00001,	BR)	/* BRHZ          IP<-IP+I16_if(RT) */
+APUOP(M_BRHNZ,		RI16,	0x118,	"brhnz",	_A2(A_T,A_R18),	00001,	BR)	/* BRHNZ         IP<-IP+I16_if(RT) */
+APUOP(M_STQA,		RI16,	0x104,	"stqa",		_A2(A_T,A_S18),	00001,	LS)	/* SToreQAbs     M[I16]<-RT */
+APUOP(M_STQR,		RI16,	0x11C,	"stqr",		_A2(A_T,A_R18),	00001,	LS)	/* SToreQRel     M[IP+I16]<-RT */
+APUOP(M_MTSPR,		RR,	0x10c,	"mtspr",	_A2(A_S,A_T),	00001,	SPR)	/* MTSPR         SA<-RT */
+APUOP(M_WRCH,		RR,	0x10d,	"wrch",		_A2(A_H,A_T),	00001,	SPR)	/* ChanWRite     CA<-RT */
+APUOP(M_LQD,		RI10,	0x1a0,	"lqd",		_A4(A_T,A_S14,A_P,A_A),	00012,	LS)	/* LoadQDisp     RT<-M[Ra+I10] */
+APUOP(M_BI,		RR,	0x1a8,	"bi",		_A1(A_A),		00010,	BR)	/* BI            IP<-RA */
+APUOP(M_BISL,		RR,	0x1a9,	"bisl",		_A2(A_T,A_A),	00012,	BR)	/* BISL          RT,IP<-IP,RA */
+APUOP(M_IRET,  		RR,	0x1aa,	"iret",	        _A1(A_A), 	00010,	BR)	/* IRET          IP<-SRR0 */
+APUOP(M_IRET2, 		RR,	0x1aa,	"iret",	        _A0(),	 	00010,	BR)	/* IRET          IP<-SRR0 */
+APUOP(M_BISLED,		RR,	0x1ab,	"bisled",	_A2(A_T,A_A),	00012,	BR)	/* BISLED        RT,IP<-IP,RA_if(ext) */
+APUOP(M_HBR,		LBTI,	0x1ac,	"hbr",		_A2(A_S11I,A_A),	00010,	LS)	/* HBR           BTB[B9]<-M[Ra] */
+APUOP(M_FREST,		RR,	0x1b8,	"frest",	_A2(A_T,A_A),	00012,	SHUF)	/* FREST         RT<-recip(RA) */
+APUOP(M_FRSQEST,	RR,	0x1b9,	"frsqest",	_A2(A_T,A_A),	00012,	SHUF)	/* FRSQEST       RT<-rsqrt(RA) */
+APUOP(M_FSM,		RR,	0x1b4,	"fsm",		_A2(A_T,A_A),	00012,	SHUF)	/* FormSelMask%  RT<-expand(Ra) */
+APUOP(M_FSMH,		RR,	0x1b5,	"fsmh",		_A2(A_T,A_A),	00012,	SHUF)	/* FormSelMask%  RT<-expand(Ra) */
+APUOP(M_FSMB,		RR,	0x1b6,	"fsmb",		_A2(A_T,A_A),	00012,	SHUF)	/* FormSelMask%  RT<-expand(Ra) */
+APUOP(M_GB,		RR,	0x1b0,	"gb",		_A2(A_T,A_A),	00012,	SHUF)	/* GatherBits%   RT<-gather(RA) */
+APUOP(M_GBH,		RR,	0x1b1,	"gbh",		_A2(A_T,A_A),	00012,	SHUF)	/* GatherBits%   RT<-gather(RA) */
+APUOP(M_GBB,		RR,	0x1b2,	"gbb",		_A2(A_T,A_A),	00012,	SHUF)	/* GatherBits%   RT<-gather(RA) */
+APUOP(M_CBD,		RI7,	0x1f4,	"cbd",		_A4(A_T,A_U7,A_P,A_A),	00012,	SHUF)	/* genCtl%%insD  RT<-sta(Ra+I4,siz) */
+APUOP(M_CHD,		RI7,	0x1f5,	"chd",		_A4(A_T,A_U7,A_P,A_A),	00012,	SHUF)	/* genCtl%%insD  RT<-sta(Ra+I4,siz) */
+APUOP(M_CWD,		RI7,	0x1f6,	"cwd",		_A4(A_T,A_U7,A_P,A_A),	00012,	SHUF)	/* genCtl%%insD  RT<-sta(Ra+I4,siz) */
+APUOP(M_CDD,		RI7,	0x1f7,	"cdd",		_A4(A_T,A_U7,A_P,A_A),	00012,	SHUF)	/* genCtl%%insD  RT<-sta(Ra+I4,siz) */
+APUOP(M_ROTQBII,	RI7,	0x1f8,	"rotqbii",	_A3(A_T,A_A,A_U3),	00012,	SHUF)	/* ROTQBII       RT<-RA<<<I7 */
+APUOP(M_ROTQBYI,	RI7,	0x1fc,	"rotqbyi",	_A3(A_T,A_A,A_S7N),	00012,	SHUF)	/* ROTQBYI       RT<-RA<<<(I7*8) */
+APUOP(M_ROTQMBII,	RI7,	0x1f9,	"rotqmbii",	_A3(A_T,A_A,A_S3),	00012,	SHUF)	/* ROTQMBII      RT<-RA<<I7 */
+APUOP(M_ROTQMBYI,	RI7,	0x1fd,	"rotqmbyi",	_A3(A_T,A_A,A_S6),	00012,	SHUF)	/* ROTQMBYI      RT<-RA<<I7 */
+APUOP(M_SHLQBII,	RI7,	0x1fb,	"shlqbii",	_A3(A_T,A_A,A_U3),	00012,	SHUF)	/* SHLQBII       RT<-RA<<I7 */
+APUOP(M_SHLQBYI,	RI7,	0x1ff,	"shlqbyi",	_A3(A_T,A_A,A_U5),	00012,	SHUF)	/* SHLQBYI       RT<-RA<<I7 */
+APUOP(M_STQD,		RI10,	0x120,	"stqd",		_A4(A_T,A_S14,A_P,A_A),	00011,	LS)	/* SToreQDisp    M[Ra+I10]<-RT */
+APUOP(M_BIHNZ,		RR,	0x12b,	"bihnz",	_A2(A_T,A_A),	00011,	BR)	/* BIHNZ         IP<-RA_if(RT) */
+APUOP(M_BIHZ,		RR,	0x12a,	"bihz",		_A2(A_T,A_A),	00011,	BR)	/* BIHZ          IP<-RA_if(RT) */
+APUOP(M_BINZ,		RR,	0x129,	"binz",		_A2(A_T,A_A),	00011,	BR)	/* BINZ          IP<-RA_if(RT) */
+APUOP(M_BIZ,		RR,	0x128,	"biz",		_A2(A_T,A_A),	00011,	BR)	/* BIZ           IP<-RA_if(RT) */
+APUOP(M_CBX,		RR,	0x1d4,	"cbx",		_A3(A_T,A_A,A_B),		00112,	SHUF)	/* genCtl%%insX  RT<-sta(Ra+Rb,siz) */
+APUOP(M_CHX,		RR,	0x1d5,	"chx",		_A3(A_T,A_A,A_B),		00112,	SHUF)	/* genCtl%%insX  RT<-sta(Ra+Rb,siz) */
+APUOP(M_CWX,		RR,	0x1d6,	"cwx",		_A3(A_T,A_A,A_B),		00112,	SHUF)	/* genCtl%%insX  RT<-sta(Ra+Rb,siz) */
+APUOP(M_CDX,		RR,	0x1d7,	"cdx",		_A3(A_T,A_A,A_B),		00112,	SHUF)	/* genCtl%%insX  RT<-sta(Ra+Rb,siz) */
+APUOP(M_LQX,		RR,	0x1c4,	"lqx",		_A3(A_T,A_A,A_B),		00112,	LS)	/* LoadQindeX    RT<-M[Ra+Rb] */
+APUOP(M_ROTQBI,		RR,	0x1d8,	"rotqbi",	_A3(A_T,A_A,A_B),		00112,	SHUF)	/* ROTQBI        RT<-RA<<<Rb */
+APUOP(M_ROTQMBI,	RR,	0x1d9,	"rotqmbi",	_A3(A_T,A_A,A_B),		00112,	SHUF)	/* ROTQMBI       RT<-RA<<Rb */
+APUOP(M_SHLQBI,		RR,	0x1db,	"shlqbi",	_A3(A_T,A_A,A_B),		00112,	SHUF)	/* SHLQBI        RT<-RA<<Rb */
+APUOP(M_ROTQBY,		RR,	0x1dc,	"rotqby",	_A3(A_T,A_A,A_B),		00112,		SHUF)	/* ROTQBY        RT<-RA<<<(Rb*8) */
+APUOP(M_ROTQMBY,	RR,	0x1dd,	"rotqmby",	_A3(A_T,A_A,A_B),		00112,		SHUF)	/* ROTQMBY       RT<-RA<<Rb */
+APUOP(M_SHLQBY,		RR,	0x1df,	"shlqby",	_A3(A_T,A_A,A_B),		00112,	SHUF)	/* SHLQBY        RT<-RA<<Rb */
+APUOP(M_ROTQBYBI,	RR,	0x1cc,	"rotqbybi",	_A3(A_T,A_A,A_B),		00112,		SHUF)	/* ROTQBYBI      RT<-RA<<Rb */
+APUOP(M_ROTQMBYBI,	RR,	0x1cd,	"rotqmbybi",	_A3(A_T,A_A,A_B),		00112,		SHUF)	/* ROTQMBYBI     RT<-RA<<Rb */
+APUOP(M_SHLQBYBI,	RR,	0x1cf,	"shlqbybi",	_A3(A_T,A_A,A_B),		00112,	SHUF)	/* SHLQBYBI      RT<-RA<<Rb */
+APUOP(M_STQX,		RR,	0x144,	"stqx",		_A3(A_T,A_A,A_B),		00111,	LS)	/* SToreQindeX   M[Ra+Rb]<-RT */
+APUOP(M_SHUFB,		RRR,	0x580,	"shufb",	_A4(A_C,A_A,A_B,A_T),	02111,	SHUF)	/* SHUFfleBytes  RC<-f(RA,RB,RT) */
+APUOP(M_IL,		RI16,	0x204,	"il",		_A2(A_T,A_S16),	00002,	FX2)	/* ImmLoad       RT<-sxt(I16) */
+APUOP(M_ILH,		RI16,	0x20c,	"ilh",		_A2(A_T,A_X16),	00002,	FX2)	/* ImmLoadH      RT<-I16 */
+APUOP(M_ILHU,		RI16,	0x208,	"ilhu",		_A2(A_T,A_X16),	00002,	FX2)	/* ImmLoadHUpper RT<-I16<<16 */
+APUOP(M_ILA,		RI18,	0x210,	"ila",		_A2(A_T,A_U18),	00002,	FX2)	/* ImmLoadAddr   RT<-zxt(I18) */
+APUOP(M_NOP,		RR,	0x201,	"nop",		_A1(A_T),		00000,	NOP)	/* XNOP          no_operation */
+APUOP(M_NOP2,		RR,	0x201,	"nop",		_A0(),		00000,	NOP)	/* XNOP          no_operation */
+APUOP(M_IOHL,		RI16,	0x304,	"iohl",		_A2(A_T,A_X16),	00003,	FX2)	/* AddImmeXt     RT<-RT+sxt(I16) */
+APUOP(M_ANDBI,		RI10,	0x0b0,	"andbi",	_A3(A_T,A_A,A_S10B),	00012,	FX2)	/* AND%I         RT<-RA&I10 */
+APUOP(M_ANDHI,		RI10,	0x0a8,	"andhi",	_A3(A_T,A_A,A_S10),	00012,	FX2)	/* AND%I         RT<-RA&I10 */
+APUOP(M_ANDI,		RI10,	0x0a0,	"andi",		_A3(A_T,A_A,A_S10),	00012,	FX2)	/* AND%I         RT<-RA&I10 */
+APUOP(M_ORBI,		RI10,	0x030,	"orbi",		_A3(A_T,A_A,A_S10B),	00012,	FX2)	/* OR%I          RT<-RA|I10 */
+APUOP(M_ORHI,		RI10,	0x028,	"orhi",		_A3(A_T,A_A,A_S10),	00012,	FX2)	/* OR%I          RT<-RA|I10 */
+APUOP(M_ORI,		RI10,	0x020,	"ori",		_A3(A_T,A_A,A_S10),	00012,	FX2)	/* OR%I          RT<-RA|I10 */
+APUOP(M_ORX,		RR,	0x1f0,	"orx",		_A2(A_T,A_A),		00012,	BR)	/* ORX           RT<-RA.w0|RA.w1|RA.w2|RA.w3 */
+APUOP(M_XORBI,		RI10,	0x230,	"xorbi",	_A3(A_T,A_A,A_S10B),	00012,	FX2)	/* XOR%I         RT<-RA^I10 */
+APUOP(M_XORHI,		RI10,	0x228,	"xorhi",	_A3(A_T,A_A,A_S10),	00012,	FX2)	/* XOR%I         RT<-RA^I10 */
+APUOP(M_XORI,		RI10,	0x220,	"xori",		_A3(A_T,A_A,A_S10),	00012,	FX2)	/* XOR%I         RT<-RA^I10 */
+APUOP(M_AHI,		RI10,	0x0e8,	"ahi",		_A3(A_T,A_A,A_S10),	00012,	FX2)	/* Add%Immed     RT<-RA+I10 */
+APUOP(M_AI,		RI10,	0x0e0,	"ai",		_A3(A_T,A_A,A_S10),	00012,	FX2)	/* Add%Immed     RT<-RA+I10 */
+APUOP(M_SFHI,		RI10,	0x068,	"sfhi",		_A3(A_T,A_A,A_S10),	00012,	FX2)	/* SubFrom%Imm   RT<-I10-RA */
+APUOP(M_SFI,		RI10,	0x060,	"sfi",		_A3(A_T,A_A,A_S10),	00012,	FX2)	/* SubFrom%Imm   RT<-I10-RA */
+APUOP(M_CGTBI,		RI10,	0x270,	"cgtbi",	_A3(A_T,A_A,A_S10B),	00012,	FX2)	/* CGT%I         RT<-(RA>I10) */
+APUOP(M_CGTHI,		RI10,	0x268,	"cgthi",	_A3(A_T,A_A,A_S10),	00012,	FX2)	/* CGT%I         RT<-(RA>I10) */
+APUOP(M_CGTI,		RI10,	0x260,	"cgti",		_A3(A_T,A_A,A_S10),	00012,	FX2)	/* CGT%I         RT<-(RA>I10) */
+APUOP(M_CLGTBI,		RI10,	0x2f0,	"clgtbi",	_A3(A_T,A_A,A_S10B),	00012,	FX2)	/* CLGT%I        RT<-(RA>I10) */
+APUOP(M_CLGTHI,		RI10,	0x2e8,	"clgthi",	_A3(A_T,A_A,A_S10),	00012,	FX2)	/* CLGT%I        RT<-(RA>I10) */
+APUOP(M_CLGTI,		RI10,	0x2e0,	"clgti",	_A3(A_T,A_A,A_S10),	00012,	FX2)	/* CLGT%I        RT<-(RA>I10) */
+APUOP(M_CEQBI,		RI10,	0x3f0,	"ceqbi",	_A3(A_T,A_A,A_S10B),	00012,	FX2)	/* CEQ%I         RT<-(RA=I10) */
+APUOP(M_CEQHI,		RI10,	0x3e8,	"ceqhi",	_A3(A_T,A_A,A_S10),	00012,	FX2)	/* CEQ%I         RT<-(RA=I10) */
+APUOP(M_CEQI,		RI10,	0x3e0,	"ceqi",		_A3(A_T,A_A,A_S10),	00012,	FX2)	/* CEQ%I         RT<-(RA=I10) */
+APUOP(M_HGTI,		RI10,	0x278,	"hgti",		_A3(A_T,A_A,A_S10),	00010,	FX2)	/* HaltGTI       halt_if(RA>I10) */
+APUOP(M_HGTI2,		RI10,	0x278,	"hgti",		_A2(A_A,A_S10),	00010,	FX2)	/* HaltGTI       halt_if(RA>I10) */
+APUOP(M_HLGTI,		RI10,	0x2f8,	"hlgti",	_A3(A_T,A_A,A_S10),	00010,	FX2)	/* HaltLGTI      halt_if(RA>I10) */
+APUOP(M_HLGTI2,		RI10,	0x2f8,	"hlgti",	_A2(A_A,A_S10),	00010,	FX2)	/* HaltLGTI      halt_if(RA>I10) */
+APUOP(M_HEQI,		RI10,	0x3f8,	"heqi",		_A3(A_T,A_A,A_S10),	00010,	FX2)	/* HaltEQImm     halt_if(RA=I10) */
+APUOP(M_HEQI2,		RI10,	0x3f8,	"heqi",		_A2(A_A,A_S10),	00010,	FX2)	/* HaltEQImm     halt_if(RA=I10) */
+APUOP(M_MPYI,		RI10,	0x3a0,	"mpyi",		_A3(A_T,A_A,A_S10),	00012,	FP7)	/* MPYI          RT<-RA*I10 */
+APUOP(M_MPYUI,		RI10,	0x3a8,	"mpyui",	_A3(A_T,A_A,A_S10),	00012,	FP7)	/* MPYUI         RT<-RA*I10 */
+APUOP(M_CFLTS,		RI8,	0x3b0,	"cflts",	_A3(A_T,A_A,A_U7A),	00012,	FP7)	/* CFLTS         RT<-int(RA,I8) */
+APUOP(M_CFLTU,		RI8,	0x3b2,	"cfltu",	_A3(A_T,A_A,A_U7A),	00012,	FP7)	/* CFLTU         RT<-int(RA,I8) */
+APUOP(M_CSFLT,		RI8,	0x3b4,	"csflt",	_A3(A_T,A_A,A_U7B),	00012,	FP7)	/* CSFLT         RT<-flt(RA,I8) */
+APUOP(M_CUFLT,		RI8,	0x3b6,	"cuflt",	_A3(A_T,A_A,A_U7B),	00012,	FP7)	/* CUFLT         RT<-flt(RA,I8) */
+APUOP(M_FESD,		RR,	0x3b8,	"fesd",		_A2(A_T,A_A),	00012,	FPD)	/* FESD          RT<-double(RA) */
+APUOP(M_FRDS,		RR,	0x3b9,	"frds",		_A2(A_T,A_A),	00012,	FPD)	/* FRDS          RT<-single(RA) */
+APUOP(M_FSCRRD,		RR,	0x398,	"fscrrd",	_A1(A_T),		00002,	FPD)	/* FSCRRD        RT<-FP_status */
+APUOP(M_FSCRWR,		RR,	0x3ba,	"fscrwr",	_A2(A_T,A_A),	00010,	FP7)	/* FSCRWR        FP_status<-RA */
+APUOP(M_FSCRWR2,	RR,	0x3ba,	"fscrwr",	_A1(A_A),		00010,	FP7)	/* FSCRWR        FP_status<-RA */
+APUOP(M_CLZ,		RR,	0x2a5,	"clz",		_A2(A_T,A_A),	00012,	FX2)	/* CLZ           RT<-clz(RA) */
+APUOP(M_CNTB,		RR,	0x2b4,	"cntb",		_A2(A_T,A_A),	00012,	FXB)	/* CNT           RT<-pop(RA) */
+APUOP(M_XSBH,		RR,	0x2b6,	"xsbh",		_A2(A_T,A_A),	00012,	FX2)	/* eXtSignBtoH   RT<-sign_ext(RA) */
+APUOP(M_XSHW,		RR,	0x2ae,	"xshw",		_A2(A_T,A_A),	00012,	FX2)	/* eXtSignHtoW   RT<-sign_ext(RA) */
+APUOP(M_XSWD,		RR,	0x2a6,	"xswd",		_A2(A_T,A_A),	00012,	FX2)	/* eXtSignWtoD   RT<-sign_ext(RA) */
+APUOP(M_ROTI,		RI7,	0x078,	"roti",		_A3(A_T,A_A,A_S7N),	00012,	FX3)	/* ROT%I         RT<-RA<<<I7 */
+APUOP(M_ROTMI,		RI7,	0x079,	"rotmi",	_A3(A_T,A_A,A_S7),	00012,	FX3)	/* ROT%MI        RT<-RA<<I7 */
+APUOP(M_ROTMAI,		RI7,	0x07a,	"rotmai",	_A3(A_T,A_A,A_S7),	00012,	FX3)	/* ROTMA%I       RT<-RA<<I7 */
+APUOP(M_SHLI,		RI7,	0x07b,	"shli",		_A3(A_T,A_A,A_U6),	00012,	FX3)	/* SHL%I         RT<-RA<<I7 */
+APUOP(M_ROTHI,		RI7,	0x07c,	"rothi",	_A3(A_T,A_A,A_S7N),	00012,	FX3)	/* ROT%I         RT<-RA<<<I7 */
+APUOP(M_ROTHMI,		RI7,	0x07d,	"rothmi",	_A3(A_T,A_A,A_S6),	00012,	FX3)	/* ROT%MI        RT<-RA<<I7 */
+APUOP(M_ROTMAHI,	RI7,	0x07e,	"rotmahi",	_A3(A_T,A_A,A_S6),	00012,	FX3)	/* ROTMA%I       RT<-RA<<I7 */
+APUOP(M_SHLHI,		RI7,	0x07f,	"shlhi",	_A3(A_T,A_A,A_U5),	00012,	FX3)	/* SHL%I         RT<-RA<<I7 */
+APUOP(M_A,		RR,	0x0c0,	"a",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* Add%          RT<-RA+RB */
+APUOP(M_AH,		RR,	0x0c8,	"ah",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* Add%          RT<-RA+RB */
+APUOP(M_SF,		RR,	0x040,	"sf",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* SubFrom%      RT<-RB-RA */
+APUOP(M_SFH,		RR,	0x048,	"sfh",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* SubFrom%      RT<-RB-RA */
+APUOP(M_CGT,		RR,	0x240,	"cgt",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* CGT%          RT<-(RA>RB) */
+APUOP(M_CGTB,		RR,	0x250,	"cgtb",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* CGT%          RT<-(RA>RB) */
+APUOP(M_CGTH,		RR,	0x248,	"cgth",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* CGT%          RT<-(RA>RB) */
+APUOP(M_CLGT,		RR,	0x2c0,	"clgt",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* CLGT%         RT<-(RA>RB) */
+APUOP(M_CLGTB,		RR,	0x2d0,	"clgtb",	_A3(A_T,A_A,A_B),		00112,	FX2)	/* CLGT%         RT<-(RA>RB) */
+APUOP(M_CLGTH,		RR,	0x2c8,	"clgth",	_A3(A_T,A_A,A_B),		00112,	FX2)	/* CLGT%         RT<-(RA>RB) */
+APUOP(M_CEQ,		RR,	0x3c0,	"ceq",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* CEQ%          RT<-(RA=RB) */
+APUOP(M_CEQB,		RR,	0x3d0,	"ceqb",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* CEQ%          RT<-(RA=RB) */
+APUOP(M_CEQH,		RR,	0x3c8,	"ceqh",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* CEQ%          RT<-(RA=RB) */
+APUOP(M_HGT,		RR,	0x258,	"hgt",		_A3(A_T,A_A,A_B),		00110,	FX2)	/* HaltGT        halt_if(RA>RB) */
+APUOP(M_HGT2,		RR,	0x258,	"hgt",		_A2(A_A,A_B),	00110,	FX2)	/* HaltGT        halt_if(RA>RB) */
+APUOP(M_HLGT,		RR,	0x2d8,	"hlgt",		_A3(A_T,A_A,A_B),		00110,	FX2)	/* HaltLGT       halt_if(RA>RB) */
+APUOP(M_HLGT2,		RR,	0x2d8,	"hlgt",		_A2(A_A,A_B),	00110,	FX2)	/* HaltLGT       halt_if(RA>RB) */
+APUOP(M_HEQ,		RR,	0x3d8,	"heq",		_A3(A_T,A_A,A_B),		00110,	FX2)	/* HaltEQ        halt_if(RA=RB) */
+APUOP(M_HEQ2,		RR,	0x3d8,	"heq",		_A2(A_A,A_B),	00110,	FX2)	/* HaltEQ        halt_if(RA=RB) */
+APUOP(M_FCEQ,		RR,	0x3c2,	"fceq",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* FCEQ          RT<-(RA=RB) */
+APUOP(M_FCMEQ,		RR,	0x3ca,	"fcmeq",	_A3(A_T,A_A,A_B),		00112,	FX2)	/* FCMEQ         RT<-(|RA|=|RB|) */
+APUOP(M_FCGT,		RR,	0x2c2,	"fcgt",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* FCGT          RT<-(RA<RB) */
+APUOP(M_FCMGT,		RR,	0x2ca,	"fcmgt",	_A3(A_T,A_A,A_B),		00112,	FX2)	/* FCMGT         RT<-(|RA|<|RB|) */
+APUOP(M_AND,		RR,	0x0c1,	"and",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* AND           RT<-RA&RB */
+APUOP(M_NAND,		RR,	0x0c9,	"nand",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* NAND          RT<-!(RA&RB) */
+APUOP(M_OR,		RR,	0x041,	"or",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* OR            RT<-RA|RB */
+APUOP(M_NOR,		RR,	0x049,	"nor",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* NOR           RT<-!(RA&RB) */
+APUOP(M_XOR,		RR,	0x241,	"xor",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* XOR           RT<-RA^RB */
+APUOP(M_EQV,		RR,	0x249,	"eqv",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* EQuiValent    RT<-!(RA^RB) */
+APUOP(M_ANDC,		RR,	0x2c1,	"andc",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* ANDComplement RT<-RA&!RB */
+APUOP(M_ORC,		RR,	0x2c9,	"orc",		_A3(A_T,A_A,A_B),		00112,	FX2)	/* ORComplement  RT<-RA|!RB */
+APUOP(M_ABSDB,		RR,	0x053,	"absdb",	_A3(A_T,A_A,A_B),		00112,	FXB)	/* ABSoluteDiff  RT<-|RA-RB| */
+APUOP(M_AVGB,		RR,	0x0d3,	"avgb",		_A3(A_T,A_A,A_B),		00112,	FXB)	/* AVG%          RT<-(RA+RB+1)/2 */
+APUOP(M_SUMB,		RR,	0x253,	"sumb",		_A3(A_T,A_A,A_B),		00112,	FXB)	/* SUM%          RT<-f(RA,RB) */
+APUOP(M_DFA,		RR,	0x2cc,	"dfa",		_A3(A_T,A_A,A_B),		00112,	FPD)	/* DFAdd         RT<-RA+RB */
+APUOP(M_DFM,		RR,	0x2ce,	"dfm",		_A3(A_T,A_A,A_B),		00112,	FPD)	/* DFMul         RT<-RA*RB */
+APUOP(M_DFS,		RR,	0x2cd,	"dfs",		_A3(A_T,A_A,A_B),		00112,	FPD)	/* DFSub         RT<-RA-RB */
+APUOP(M_FA,		RR,	0x2c4,	"fa",		_A3(A_T,A_A,A_B),		00112,	FP6)	/* FAdd          RT<-RA+RB */
+APUOP(M_FM,		RR,	0x2c6,	"fm",		_A3(A_T,A_A,A_B),		00112,	FP6)	/* FMul          RT<-RA*RB */
+APUOP(M_FS,		RR,	0x2c5,	"fs",		_A3(A_T,A_A,A_B),		00112,	FP6)	/* FSub          RT<-RA-RB */
+APUOP(M_MPY,		RR,	0x3c4,	"mpy",		_A3(A_T,A_A,A_B),		00112,	FP7)	/* MPY           RT<-RA*RB */
+APUOP(M_MPYH,		RR,	0x3c5,	"mpyh",		_A3(A_T,A_A,A_B),		00112,	FP7)	/* MPYH          RT<-(RAh*RB)<<16 */
+APUOP(M_MPYHH,		RR,	0x3c6,	"mpyhh",	_A3(A_T,A_A,A_B),		00112,	FP7)	/* MPYHH         RT<-RAh*RBh */
+APUOP(M_MPYHHU,		RR,	0x3ce,	"mpyhhu",	_A3(A_T,A_A,A_B),		00112,	FP7)	/* MPYHHU        RT<-RAh*RBh */
+APUOP(M_MPYS,		RR,	0x3c7,	"mpys",		_A3(A_T,A_A,A_B),		00112,	FP7)	/* MPYS          RT<-(RA*RB)>>16 */
+APUOP(M_MPYU,		RR,	0x3cc,	"mpyu",		_A3(A_T,A_A,A_B),		00112,	FP7)	/* MPYU          RT<-RA*RB */
+APUOP(M_FI,		RR,	0x3d4,	"fi",		_A3(A_T,A_A,A_B),		00112,	FP7)	/* FInterpolate  RT<-f(RA,RB) */
+APUOP(M_ROT,		RR,	0x058,	"rot",		_A3(A_T,A_A,A_B),		00112,	FX3)	/* ROT%          RT<-RA<<<RB */
+APUOP(M_ROTM,		RR,	0x059,	"rotm",		_A3(A_T,A_A,A_B),		00112,	FX3)	/* ROT%M         RT<-RA<<Rb */
+APUOP(M_ROTMA,		RR,	0x05a,	"rotma",	_A3(A_T,A_A,A_B),		00112,	FX3)	/* ROTMA%        RT<-RA<<Rb */
+APUOP(M_SHL,		RR,	0x05b,	"shl",		_A3(A_T,A_A,A_B),		00112,	FX3)	/* SHL%          RT<-RA<<Rb */
+APUOP(M_ROTH,		RR,	0x05c,	"roth",		_A3(A_T,A_A,A_B),		00112,	FX3)	/* ROT%          RT<-RA<<<RB */
+APUOP(M_ROTHM,		RR,	0x05d,	"rothm",	_A3(A_T,A_A,A_B),		00112,	FX3)	/* ROT%M         RT<-RA<<Rb */
+APUOP(M_ROTMAH,		RR,	0x05e,	"rotmah",	_A3(A_T,A_A,A_B),		00112,	FX3)	/* ROTMA%        RT<-RA<<Rb */
+APUOP(M_SHLH,		RR,	0x05f,	"shlh",		_A3(A_T,A_A,A_B),		00112,	FX3)	/* SHL%          RT<-RA<<Rb */
+APUOP(M_MPYHHA,		RR,	0x346,	"mpyhha",	_A3(A_T,A_A,A_B),		00113,	FP7)	/* MPYHHA        RT<-RAh*RBh+RT */
+APUOP(M_MPYHHAU,	RR,	0x34e,	"mpyhhau",	_A3(A_T,A_A,A_B),		00113,	FP7)	/* MPYHHAU       RT<-RAh*RBh+RT */
+APUOP(M_DFMA,		RR,	0x35c,	"dfma",		_A3(A_T,A_A,A_B),		00113,	FPD)	/* DFMAdd        RT<-RT+RA*RB */
+APUOP(M_DFMS,		RR,	0x35d,	"dfms",		_A3(A_T,A_A,A_B),		00113,	FPD)	/* DFMSub        RT<-RA*RB-RT */
+APUOP(M_DFNMS,		RR,	0x35e,	"dfnms",	_A3(A_T,A_A,A_B),		00113,	FPD)	/* DFNMSub       RT<-RT-RA*RB */
+APUOP(M_DFNMA,		RR,	0x35f,	"dfnma",	_A3(A_T,A_A,A_B),		00113,	FPD)	/* DFNMAdd       RT<-(-RT)-RA*RB */
+APUOP(M_FMA,		RRR,	0x700,	"fma",		_A4(A_C,A_A,A_B,A_T),	02111,	FP6)	/* FMAdd         RC<-RT+RA*RB */
+APUOP(M_FMS,		RRR,	0x780,	"fms",		_A4(A_C,A_A,A_B,A_T),	02111,	FP6)	/* FMSub         RC<-RA*RB-RT */
+APUOP(M_FNMS,		RRR,	0x680,	"fnms",		_A4(A_C,A_A,A_B,A_T),	02111,	FP6)	/* FNMSub        RC<-RT-RA*RB */
+APUOP(M_MPYA,		RRR,	0x600,	"mpya",		_A4(A_C,A_A,A_B,A_T),	02111,	FP7)	/* MPYA          RC<-RA*RB+RT */
+APUOP(M_SELB,		RRR,	0x400,	"selb",		_A4(A_C,A_A,A_B,A_T),	02111,	FX2)	/* SELectBits    RC<-RA&RT|RB&!RT */
+/* for system function call, this uses op-code of mtspr */
+APUOP(M_SYSCALL,	RI7,    0x10c,	"syscall",      _A3(A_T,A_A,A_S7N),	00002,	SPR)        /* System Call */
+/*
+pseudo instruction:
+system call
+value of I9	operation
+0	halt
+1		rt[0] = open(MEM[ra[0]],	ra[1])
+2		rt[0] = close(ra[0])
+3		rt[0] = read(ra[0],	MEM[ra[1]],	ra[2])
+4		rt[0] = write(ra[0],	MEM[ra[1]],	ra[2])
+5		printf(MEM[ra[0]],	ra[1],	ra[2],	ra[3])
+42		rt[0] = clock()
+52		rt[0] = lseek(ra0,	ra1,	ra2)
+
+*/
+
+
+/* new multiprecision add/sub */
+APUOP(M_ADDX,		RR,	0x340,	"addx",		_A3(A_T,A_A,A_B),		00113,		FX2)	/* Add_eXtended  RT<-RA+RB+RT */
+APUOP(M_CG,		RR,	0x0c2,	"cg",		_A3(A_T,A_A,A_B),		00112,		FX2)	/* CarryGenerate RT<-cout(RA+RB) */
+APUOP(M_CGX,		RR,	0x342,	"cgx",		_A3(A_T,A_A,A_B),		00113,		FX2)	/* CarryGen_eXtd RT<-cout(RA+RB+RT) */
+APUOP(M_SFX,		RR,	0x341,	"sfx",		_A3(A_T,A_A,A_B),		00113,		FX2)	/* Add_eXtended  RT<-RA+RB+RT */
+APUOP(M_BG,		RR,	0x042,	"bg",		_A3(A_T,A_A,A_B),		00112,		FX2)	/* CarryGenerate RT<-cout(RA+RB) */
+APUOP(M_BGX,		RR,	0x343,	"bgx",		_A3(A_T,A_A,A_B),		00113,		FX2)	/* CarryGen_eXtd RT<-cout(RA+RB+RT) */
+
+/*
+
+The following ops are a subset of above except with feature bits set.
+Feature bits are bits 11-17 of the instruction:
+
+  11 - C & P feature bit
+  12 - disable interrupts
+  13 - enable interrupts
+
+*/
+APUOPFB(M_BID,		RR,	0x1a8,	0x20,	"bid",		_A1(A_A),		00010,	BR)	/* BI            IP<-RA */
+APUOPFB(M_BIE,		RR,	0x1a8,	0x10,	"bie",		_A1(A_A),		00010,	BR)	/* BI            IP<-RA */
+APUOPFB(M_BISLD,	RR,	0x1a9,	0x20,	"bisld",	_A2(A_T,A_A),	00012,	BR)	/* BISL          RT,IP<-IP,RA */
+APUOPFB(M_BISLE,	RR,	0x1a9,	0x10,	"bisle",	_A2(A_T,A_A),	00012,	BR)	/* BISL          RT,IP<-IP,RA */
+APUOPFB(M_IRETD,  	RR,	0x1aa,	0x20,	"iretd",	_A1(A_A), 	00010,	BR)	/* IRET          IP<-SRR0 */
+APUOPFB(M_IRETD2,  	RR,	0x1aa,	0x20,	"iretd",	_A0(),	 	00010,	BR)	/* IRET          IP<-SRR0 */
+APUOPFB(M_IRETE,  	RR,	0x1aa,	0x10,	"irete",	_A1(A_A), 	00010,	BR)	/* IRET          IP<-SRR0 */
+APUOPFB(M_IRETE2,  	RR,	0x1aa,	0x10,	"irete",	_A0(),	 	00010,	BR)	/* IRET          IP<-SRR0 */
+APUOPFB(M_BISLEDD,	RR,	0x1ab,	0x20,	"bisledd",	_A2(A_T,A_A),	00012,	BR)	/* BISLED        RT,IP<-IP,RA_if(ext) */
+APUOPFB(M_BISLEDE,	RR,	0x1ab,	0x10,	"bislede",	_A2(A_T,A_A),	00012,	BR)	/* BISLED        RT,IP<-IP,RA_if(ext) */
+APUOPFB(M_BIHNZD,	RR,	0x12b,	0x20,	"bihnzd",	_A2(A_T,A_A),	00011,	BR)	/* BIHNZ         IP<-RA_if(RT) */
+APUOPFB(M_BIHNZE,	RR,	0x12b,	0x10,	"bihnze",	_A2(A_T,A_A),	00011,	BR)	/* BIHNZ         IP<-RA_if(RT) */
+APUOPFB(M_BIHZD,	RR,	0x12a,	0x20,	"bihzd",	_A2(A_T,A_A),	00011,	BR)	/* BIHZ          IP<-RA_if(RT) */
+APUOPFB(M_BIHZE,	RR,	0x12a,	0x10,	"bihze",	_A2(A_T,A_A),	00011,	BR)	/* BIHZ          IP<-RA_if(RT) */
+APUOPFB(M_BINZD,	RR,	0x129,	0x20,	"binzd",	_A2(A_T,A_A),	00011,	BR)	/* BINZ          IP<-RA_if(RT) */
+APUOPFB(M_BINZE,	RR,	0x129,	0x10,	"binze",	_A2(A_T,A_A),	00011,	BR)	/* BINZ          IP<-RA_if(RT) */
+APUOPFB(M_BIZD,		RR,	0x128,	0x20,	"bizd",		_A2(A_T,A_A),	00011,	BR)	/* BIZ           IP<-RA_if(RT) */
+APUOPFB(M_BIZE,		RR,	0x128,	0x10,	"bize",		_A2(A_T,A_A),	00011,	BR)	/* BIZ           IP<-RA_if(RT) */
+APUOPFB(M_SYNCC,	RR,	0x002,	0x40,	"syncc",	_A0(),		00000,	BR)	/* SYNCC          flush_pipe */
+APUOPFB(M_HBRP,		LBTI,	0x1ac,	0x40,	"hbrp",		_A0(),	        00010,	LS)	/* HBR           BTB[B9]<-M[Ra] */
+
+/* Synonyms required by the AS manual. */
+APUOP(M_LR,		RI10,	0x020,	"lr",		_A2(A_T,A_A),	00012,	FX2)	/* OR%I          RT<-RA|I10 */
+APUOP(M_BIHT,		RR,	0x12b,	"biht", 	_A2(A_T,A_A),	00011,	BR)	/* BIHNZ         IP<-RA_if(RT) */
+APUOP(M_BIHF,		RR,	0x12a,	"bihf",		_A2(A_T,A_A),	00011,	BR)	/* BIHZ          IP<-RA_if(RT) */
+APUOP(M_BIT,		RR,	0x129,	"bit",		_A2(A_T,A_A),	00011,	BR)	/* BINZ          IP<-RA_if(RT) */
+APUOP(M_BIF,		RR,	0x128,	"bif",		_A2(A_T,A_A),	00011,	BR)	/* BIZ           IP<-RA_if(RT) */
+APUOPFB(M_BIHTD,	RR,	0x12b,	0x20,	"bihtd",	_A2(A_T,A_A),	00011,	BR)	/* BIHNF         IP<-RA_if(RT) */
+APUOPFB(M_BIHTE,	RR,	0x12b,	0x10,	"bihte",	_A2(A_T,A_A),	00011,	BR)	/* BIHNF         IP<-RA_if(RT) */
+APUOPFB(M_BIHFD,	RR,	0x12a,	0x20,	"bihfd",	_A2(A_T,A_A),	00011,	BR)	/* BIHZ          IP<-RA_if(RT) */
+APUOPFB(M_BIHFE,	RR,	0x12a,	0x10,	"bihfe",	_A2(A_T,A_A),	00011,	BR)	/* BIHZ          IP<-RA_if(RT) */
+APUOPFB(M_BITD, 	RR,	0x129,	0x20,	"bitd", 	_A2(A_T,A_A),	00011,	BR)	/* BINF          IP<-RA_if(RT) */
+APUOPFB(M_BITE, 	RR,	0x129,	0x10,	"bite", 	_A2(A_T,A_A),	00011,	BR)	/* BINF          IP<-RA_if(RT) */
+APUOPFB(M_BIFD,		RR,	0x128,	0x20,	"bifd",		_A2(A_T,A_A),	00011,	BR)	/* BIZ           IP<-RA_if(RT) */
+APUOPFB(M_BIFE,		RR,	0x128,	0x10,	"bife",		_A2(A_T,A_A),	00011,	BR)	/* BIZ           IP<-RA_if(RT) */
+
+#undef _A0
+#undef _A1
+#undef _A2
+#undef _A3
+#undef _A4
diff --git a/arch/powerpc/xmon/spu-opc.c b/arch/powerpc/xmon/spu-opc.c
new file mode 100644
index 0000000000..6d8197cc54
--- /dev/null
+++ b/arch/powerpc/xmon/spu-opc.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* SPU opcode list
+
+   Copyright 2006 Free Software Foundation, Inc.
+
+   This file is part of GDB, GAS, and the GNU binutils.
+
+ */
+
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include "spu.h"
+
+/* This file holds the Spu opcode table */
+
+
+/*
+   Example contents of spu-insn.h
+      id_tag	mode	mode	type	opcode	mnemonic	asmtype	    dependency		FPU	L/S?	branch?	instruction   
+                QUAD	WORD                                               (0,RC,RB,RA,RT)    latency  			              		
+   APUOP(M_LQD,	1,	0,	RI9,	0x1f8,	"lqd",		ASM_RI9IDX,	00012,		FXU,	1,	0)	Load Quadword d-form 
+ */
+
+const struct spu_opcode spu_opcodes[] = {
+#define APUOP(TAG,MACFORMAT,OPCODE,MNEMONIC,ASMFORMAT,DEP,PIPE) \
+	{ MACFORMAT, OPCODE, MNEMONIC, ASMFORMAT },
+#define APUOPFB(TAG,MACFORMAT,OPCODE,FB,MNEMONIC,ASMFORMAT,DEP,PIPE) \
+	{ MACFORMAT, OPCODE, MNEMONIC, ASMFORMAT },
+#include "spu-insns.h"
+#undef APUOP
+#undef APUOPFB
+};
+
+const int spu_num_opcodes = ARRAY_SIZE(spu_opcodes);
diff --git a/arch/powerpc/xmon/spu.h b/arch/powerpc/xmon/spu.h
new file mode 100644
index 0000000000..2d13b1a5fa
--- /dev/null
+++ b/arch/powerpc/xmon/spu.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* SPU ELF support for BFD.
+
+   Copyright 2006 Free Software Foundation, Inc.
+
+   This file is part of GDB, GAS, and the GNU binutils.
+
+ */
+
+
+/* These two enums are from rel_apu/common/spu_asm_format.h */
+/* definition of instruction format */
+typedef enum {
+  RRR,
+  RI18,
+  RI16,
+  RI10,
+  RI8,
+  RI7,
+  RR,
+  LBT,
+  LBTI,
+  IDATA,
+  UNKNOWN_IFORMAT
+} spu_iformat;
+
+/* These values describe assembly instruction arguments.  They indicate
+ * how to encode, range checking and which relocation to use. */
+typedef enum {
+  A_T,  /* register at pos 0 */
+  A_A,  /* register at pos 7 */
+  A_B,  /* register at pos 14 */
+  A_C,  /* register at pos 21 */
+  A_S,  /* special purpose register at pos 7 */
+  A_H,  /* channel register at pos 7 */
+  A_P,  /* parenthesis, this has to separate regs from immediates */
+  A_S3,
+  A_S6,
+  A_S7N,
+  A_S7,
+  A_U7A,
+  A_U7B,
+  A_S10B,
+  A_S10,
+  A_S11,
+  A_S11I,
+  A_S14,
+  A_S16,
+  A_S18,
+  A_R18,
+  A_U3,
+  A_U5,
+  A_U6,
+  A_U7,
+  A_U14,
+  A_X16,
+  A_U18,
+  A_MAX
+} spu_aformat;
+
+enum spu_insns {
+#define APUOP(TAG,MACFORMAT,OPCODE,MNEMONIC,ASMFORMAT,DEP,PIPE) \
+	TAG,
+#define APUOPFB(TAG,MACFORMAT,OPCODE,FB,MNEMONIC,ASMFORMAT,DEP,PIPE) \
+	TAG,
+#include "spu-insns.h"
+#undef APUOP
+#undef APUOPFB
+        M_SPU_MAX
+};
+
+struct spu_opcode
+{
+   spu_iformat insn_type;
+   unsigned int opcode;
+   char *mnemonic;
+   int arg[5];
+};
+
+#define SIGNED_EXTRACT(insn,size,pos) (((int)((insn) << (32-size-pos))) >> (32-size))
+#define UNSIGNED_EXTRACT(insn,size,pos) (((insn) >> pos) & ((1 << size)-1))
+
+#define DECODE_INSN_RT(insn) (insn & 0x7f)
+#define DECODE_INSN_RA(insn) ((insn >> 7) & 0x7f)
+#define DECODE_INSN_RB(insn) ((insn >> 14) & 0x7f)
+#define DECODE_INSN_RC(insn) ((insn >> 21) & 0x7f)
+
+#define DECODE_INSN_I10(insn) SIGNED_EXTRACT(insn,10,14)
+#define DECODE_INSN_U10(insn) UNSIGNED_EXTRACT(insn,10,14)
+
+/* For branching, immediate loads, hbr and  lqa/stqa. */
+#define DECODE_INSN_I16(insn) SIGNED_EXTRACT(insn,16,7)
+#define DECODE_INSN_U16(insn) UNSIGNED_EXTRACT(insn,16,7)
+
+/* for stop */
+#define DECODE_INSN_U14(insn) UNSIGNED_EXTRACT(insn,14,0)
+
+/* For ila */
+#define DECODE_INSN_I18(insn) SIGNED_EXTRACT(insn,18,7)
+#define DECODE_INSN_U18(insn) UNSIGNED_EXTRACT(insn,18,7)
+
+/* For rotate and shift and generate control mask */
+#define DECODE_INSN_I7(insn) SIGNED_EXTRACT(insn,7,14)
+#define DECODE_INSN_U7(insn) UNSIGNED_EXTRACT(insn,7,14)
+
+/* For float <-> int conversion */
+#define DECODE_INSN_I8(insn)  SIGNED_EXTRACT(insn,8,14)
+#define DECODE_INSN_U8(insn) UNSIGNED_EXTRACT(insn,8,14)
+
+/* For hbr  */
+#define DECODE_INSN_I9a(insn) ((SIGNED_EXTRACT(insn,2,23) << 7) | UNSIGNED_EXTRACT(insn,7,0))
+#define DECODE_INSN_I9b(insn) ((SIGNED_EXTRACT(insn,2,14) << 7) | UNSIGNED_EXTRACT(insn,7,0))
+#define DECODE_INSN_U9a(insn) ((UNSIGNED_EXTRACT(insn,2,23) << 7) | UNSIGNED_EXTRACT(insn,7,0))
+#define DECODE_INSN_U9b(insn) ((UNSIGNED_EXTRACT(insn,2,14) << 7) | UNSIGNED_EXTRACT(insn,7,0))
+
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
new file mode 100644
index 0000000000..b3b94cd377
--- /dev/null
+++ b/arch/powerpc/xmon/xmon.c
@@ -0,0 +1,4371 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Routines providing a simple monitor for use on the PowerMac.
+ *
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ * Copyright (C) 2001 PPC64 Team, IBM Corp
+ * Copyrignt (C) 2006 Michael Ellerman, IBM Corp
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/sched/signal.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+#include <linux/reboot.h>
+#include <linux/delay.h>
+#include <linux/kallsyms.h>
+#include <linux/kmsg_dump.h>
+#include <linux/cpumask.h>
+#include <linux/export.h>
+#include <linux/sysrq.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/bug.h>
+#include <linux/nmi.h>
+#include <linux/ctype.h>
+#include <linux/highmem.h>
+#include <linux/security.h>
+#include <linux/debugfs.h>
+
+#include <asm/ptrace.h>
+#include <asm/smp.h>
+#include <asm/string.h>
+#include <asm/machdep.h>
+#include <asm/xmon.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/cputable.h>
+#include <asm/rtas.h>
+#include <asm/sstep.h>
+#include <asm/irq_regs.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/setjmp.h>
+#include <asm/reg.h>
+#include <asm/debug.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/xive.h>
+#include <asm/opal.h>
+#include <asm/firmware.h>
+#include <asm/code-patching.h>
+#include <asm/sections.h>
+#include <asm/inst.h>
+#include <asm/interrupt.h>
+
+#ifdef CONFIG_PPC64
+#include <asm/hvcall.h>
+#include <asm/paca.h>
+#include <asm/lppaca.h>
+#endif
+
+#include "nonstdio.h"
+#include "dis-asm.h"
+#include "xmon_bpts.h"
+
+#ifdef CONFIG_SMP
+static cpumask_t cpus_in_xmon = CPU_MASK_NONE;
+static unsigned long xmon_taken = 1;
+static int xmon_owner;
+static int xmon_gate;
+static int xmon_batch;
+static unsigned long xmon_batch_start_cpu;
+static cpumask_t xmon_batch_cpus = CPU_MASK_NONE;
+#else
+#define xmon_owner 0
+#endif /* CONFIG_SMP */
+
+static unsigned long in_xmon __read_mostly = 0;
+static int xmon_on = IS_ENABLED(CONFIG_XMON_DEFAULT);
+static bool xmon_is_ro = IS_ENABLED(CONFIG_XMON_DEFAULT_RO_MODE);
+
+static unsigned long adrs;
+static int size = 1;
+#define MAX_DUMP (64 * 1024)
+static unsigned long ndump = 64;
+#define MAX_IDUMP (MAX_DUMP >> 2)
+static unsigned long nidump = 16;
+static unsigned long ncsum = 4096;
+static int termch;
+static char tmpstr[KSYM_NAME_LEN];
+static int tracing_enabled;
+
+static long bus_error_jmp[JMP_BUF_LEN];
+static int catch_memory_errors;
+static int catch_spr_faults;
+static long *xmon_fault_jmp[NR_CPUS];
+
+/* Breakpoint stuff */
+struct bpt {
+	unsigned long	address;
+	u32		*instr;
+	atomic_t	ref_count;
+	int		enabled;
+	unsigned long	pad;
+};
+
+/* Bits in bpt.enabled */
+#define BP_CIABR	1
+#define BP_TRAP		2
+#define BP_DABR		4
+
+static struct bpt bpts[NBPTS];
+static struct bpt dabr[HBP_NUM_MAX];
+static struct bpt *iabr;
+static unsigned int bpinstr = PPC_RAW_TRAP();
+
+#define BP_NUM(bp)	((bp) - bpts + 1)
+
+/* Prototypes */
+static int cmds(struct pt_regs *);
+static int mread(unsigned long, void *, int);
+static int mwrite(unsigned long, void *, int);
+static int mread_instr(unsigned long, ppc_inst_t *);
+static int handle_fault(struct pt_regs *);
+static void byterev(unsigned char *, int);
+static void memex(void);
+static int bsesc(void);
+static void dump(void);
+static void show_pte(unsigned long);
+static void prdump(unsigned long, long);
+static int ppc_inst_dump(unsigned long, long, int);
+static void dump_log_buf(void);
+
+#ifdef CONFIG_SMP
+static int xmon_switch_cpu(unsigned long);
+static int xmon_batch_next_cpu(void);
+static int batch_cmds(struct pt_regs *);
+#endif
+
+#ifdef CONFIG_PPC_POWERNV
+static void dump_opal_msglog(void);
+#else
+static inline void dump_opal_msglog(void)
+{
+	printf("Machine is not running OPAL firmware.\n");
+}
+#endif
+
+static void backtrace(struct pt_regs *);
+static void excprint(struct pt_regs *);
+static void prregs(struct pt_regs *);
+static void memops(int);
+static void memlocate(void);
+static void memzcan(void);
+static void memdiffs(unsigned char *, unsigned char *, unsigned, unsigned);
+int skipbl(void);
+int scanhex(unsigned long *valp);
+static void scannl(void);
+static int hexdigit(int);
+void getstring(char *, int);
+static void flush_input(void);
+static int inchar(void);
+static void take_input(char *);
+static int  read_spr(int, unsigned long *);
+static void write_spr(int, unsigned long);
+static void super_regs(void);
+static void remove_bpts(void);
+static void insert_bpts(void);
+static void remove_cpu_bpts(void);
+static void insert_cpu_bpts(void);
+static struct bpt *at_breakpoint(unsigned long pc);
+static struct bpt *in_breakpoint_table(unsigned long pc, unsigned long *offp);
+static int  do_step(struct pt_regs *);
+static void bpt_cmds(void);
+static void cacheflush(void);
+static int  cpu_cmd(void);
+static void csum(void);
+static void bootcmds(void);
+static void proccall(void);
+static void show_tasks(void);
+void dump_segments(void);
+static void symbol_lookup(void);
+static void xmon_show_stack(unsigned long sp, unsigned long lr,
+			    unsigned long pc);
+static void xmon_print_symbol(unsigned long address, const char *mid,
+			      const char *after);
+static const char *getvecname(unsigned long vec);
+
+static int do_spu_cmd(void);
+
+#ifdef CONFIG_44x
+static void dump_tlb_44x(void);
+#endif
+#ifdef CONFIG_PPC_BOOK3E_64
+static void dump_tlb_book3e(void);
+#endif
+
+static void clear_all_bpt(void);
+
+#ifdef CONFIG_PPC64
+#define REG		"%.16lx"
+#else
+#define REG		"%.8lx"
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define GETWORD(v)	(((v)[3] << 24) + ((v)[2] << 16) + ((v)[1] << 8) + (v)[0])
+#else
+#define GETWORD(v)	(((v)[0] << 24) + ((v)[1] << 16) + ((v)[2] << 8) + (v)[3])
+#endif
+
+static const char *xmon_ro_msg = "Operation disabled: xmon in read-only mode\n";
+
+static char *help_string = "\
+Commands:\n\
+  b	show breakpoints\n\
+  bd	set data breakpoint\n\
+  bi	set instruction breakpoint\n\
+  bc	clear breakpoint\n"
+#ifdef CONFIG_SMP
+  "\
+  c	print cpus stopped in xmon\n\
+  c#	try to switch to cpu number h (in hex)\n\
+  c# $	run command '$' (one of 'r','S' or 't') on all cpus in xmon\n"
+#endif
+  "\
+  C	checksum\n\
+  d	dump bytes\n\
+  d1	dump 1 byte values\n\
+  d2	dump 2 byte values\n\
+  d4	dump 4 byte values\n\
+  d8	dump 8 byte values\n\
+  di	dump instructions\n\
+  df	dump float values\n\
+  dd	dump double values\n\
+  dl    dump the kernel log buffer\n"
+#ifdef CONFIG_PPC_POWERNV
+  "\
+  do    dump the OPAL message log\n"
+#endif
+#ifdef CONFIG_PPC64
+  "\
+  dp[#]	dump paca for current cpu, or cpu #\n\
+  dpa	dump paca for all possible cpus\n"
+#endif
+  "\
+  dr	dump stream of raw bytes\n\
+  dv	dump virtual address translation \n\
+  dt	dump the tracing buffers (uses printk)\n\
+  dtc	dump the tracing buffers for current CPU (uses printk)\n\
+"
+#ifdef CONFIG_PPC_POWERNV
+"  dx#   dump xive on CPU #\n\
+  dxi#  dump xive irq state #\n\
+  dxa   dump xive on all CPUs\n"
+#endif
+"  e	print exception information\n\
+  f	flush cache\n\
+  la	lookup symbol+offset of specified address\n\
+  ls	lookup address of specified symbol\n\
+  lp s [#]	lookup address of percpu symbol s for current cpu, or cpu #\n\
+  m	examine/change memory\n\
+  mm	move a block of memory\n\
+  ms	set a block of memory\n\
+  md	compare two blocks of memory\n\
+  ml	locate a block of memory\n\
+  mz	zero a block of memory\n\
+  mi	show information about memory allocation\n\
+  p 	call a procedure\n\
+  P 	list processes/tasks\n\
+  r	print registers\n\
+  s	single step\n"
+#ifdef CONFIG_SPU_BASE
+"  ss	stop execution on all spus\n\
+  sr	restore execution on stopped spus\n\
+  sf  #	dump spu fields for spu # (in hex)\n\
+  sd  #	dump spu local store for spu # (in hex)\n\
+  sdi #	disassemble spu local store for spu # (in hex)\n"
+#endif
+"  S	print special registers\n\
+  Sa    print all SPRs\n\
+  Sr #	read SPR #\n\
+  Sw #v write v to SPR #\n\
+  t	print backtrace\n\
+  x	exit monitor and recover\n\
+  X	exit monitor and don't recover\n"
+#if defined(CONFIG_PPC_BOOK3S_64)
+"  u	dump segment table or SLB\n"
+#elif defined(CONFIG_PPC_BOOK3S_32)
+"  u	dump segment registers\n"
+#elif defined(CONFIG_44x) || defined(CONFIG_PPC_BOOK3E_64)
+"  u	dump TLB\n"
+#endif
+"  U	show uptime information\n"
+"  ?	help\n"
+"  # n	limit output to n lines per page (for dp, dpa, dl)\n"
+"  zr	reboot\n"
+"  zh	halt\n"
+;
+
+#ifdef CONFIG_SECURITY
+static bool xmon_is_locked_down(void)
+{
+	static bool lockdown;
+
+	if (!lockdown) {
+		lockdown = !!security_locked_down(LOCKDOWN_XMON_RW);
+		if (lockdown) {
+			printf("xmon: Disabled due to kernel lockdown\n");
+			xmon_is_ro = true;
+		}
+	}
+
+	if (!xmon_is_ro) {
+		xmon_is_ro = !!security_locked_down(LOCKDOWN_XMON_WR);
+		if (xmon_is_ro)
+			printf("xmon: Read-only due to kernel lockdown\n");
+	}
+
+	return lockdown;
+}
+#else /* CONFIG_SECURITY */
+static inline bool xmon_is_locked_down(void)
+{
+	return false;
+}
+#endif
+
+static struct pt_regs *xmon_regs;
+
+static inline void sync(void)
+{
+	asm volatile("sync; isync");
+}
+
+static inline void cflush(void *p)
+{
+	asm volatile ("dcbf 0,%0; icbi 0,%0" : : "r" (p));
+}
+
+static inline void cinval(void *p)
+{
+	asm volatile ("dcbi 0,%0; icbi 0,%0" : : "r" (p));
+}
+
+/**
+ * write_ciabr() - write the CIABR SPR
+ * @ciabr:	The value to write.
+ *
+ * This function writes a value to the CIARB register either directly
+ * through mtspr instruction if the kernel is in HV privilege mode or
+ * call a hypervisor function to achieve the same in case the kernel
+ * is in supervisor privilege mode.
+ */
+static void write_ciabr(unsigned long ciabr)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return;
+
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		mtspr(SPRN_CIABR, ciabr);
+		return;
+	}
+	plpar_set_ciabr(ciabr);
+}
+
+/**
+ * set_ciabr() - set the CIABR
+ * @addr:	The value to set.
+ *
+ * This function sets the correct privilege value into the HW
+ * breakpoint address before writing it up in the CIABR register.
+ */
+static void set_ciabr(unsigned long addr)
+{
+	addr &= ~CIABR_PRIV;
+
+	if (cpu_has_feature(CPU_FTR_HVMODE))
+		addr |= CIABR_PRIV_HYPER;
+	else
+		addr |= CIABR_PRIV_SUPER;
+	write_ciabr(addr);
+}
+
+/*
+ * Disable surveillance (the service processor watchdog function)
+ * while we are in xmon.
+ * XXX we should re-enable it when we leave. :)
+ */
+#define SURVEILLANCE_TOKEN	9000
+
+static inline void disable_surveillance(void)
+{
+#ifdef CONFIG_PPC_PSERIES
+	/* Since this can't be a module, args should end up below 4GB. */
+	static struct rtas_args args;
+	const s32 token = rtas_function_token(RTAS_FN_SET_INDICATOR);
+
+	/*
+	 * At this point we have got all the cpus we can into
+	 * xmon, so there is hopefully no other cpu calling RTAS
+	 * at the moment, even though we don't take rtas.lock.
+	 * If we did try to take rtas.lock there would be a
+	 * real possibility of deadlock.
+	 */
+	if (token == RTAS_UNKNOWN_SERVICE)
+		return;
+
+	rtas_call_unlocked(&args, token, 3, 1, NULL,
+			   SURVEILLANCE_TOKEN, 0, 0);
+
+#endif /* CONFIG_PPC_PSERIES */
+}
+
+#ifdef CONFIG_SMP
+static int xmon_speaker;
+
+static void get_output_lock(void)
+{
+	int me = smp_processor_id() + 0x100;
+	int last_speaker = 0, prev;
+	long timeout;
+
+	if (xmon_speaker == me)
+		return;
+
+	for (;;) {
+		last_speaker = cmpxchg(&xmon_speaker, 0, me);
+		if (last_speaker == 0)
+			return;
+
+		/*
+		 * Wait a full second for the lock, we might be on a slow
+		 * console, but check every 100us.
+		 */
+		timeout = 10000;
+		while (xmon_speaker == last_speaker) {
+			if (--timeout > 0) {
+				udelay(100);
+				continue;
+			}
+
+			/* hostile takeover */
+			prev = cmpxchg(&xmon_speaker, last_speaker, me);
+			if (prev == last_speaker)
+				return;
+			break;
+		}
+	}
+}
+
+static void release_output_lock(void)
+{
+	xmon_speaker = 0;
+}
+
+int cpus_are_in_xmon(void)
+{
+	return !cpumask_empty(&cpus_in_xmon);
+}
+
+static bool wait_for_other_cpus(int ncpus)
+{
+	unsigned long timeout;
+
+	/* We wait for 2s, which is a metric "little while" */
+	for (timeout = 20000; timeout != 0; --timeout) {
+		if (cpumask_weight(&cpus_in_xmon) >= ncpus)
+			return true;
+		udelay(100);
+		barrier();
+	}
+
+	return false;
+}
+#else /* CONFIG_SMP */
+static inline void get_output_lock(void) {}
+static inline void release_output_lock(void) {}
+#endif
+
+static void xmon_touch_watchdogs(void)
+{
+	touch_softlockup_watchdog_sync();
+	rcu_cpu_stall_reset();
+	touch_nmi_watchdog();
+}
+
+static int xmon_core(struct pt_regs *regs, volatile int fromipi)
+{
+	volatile int cmd = 0;
+	struct bpt *volatile bp;
+	long recurse_jmp[JMP_BUF_LEN];
+	bool locked_down;
+	unsigned long offset;
+	unsigned long flags;
+#ifdef CONFIG_SMP
+	int cpu;
+	int secondary;
+#endif
+
+	local_irq_save(flags);
+	hard_irq_disable();
+
+	locked_down = xmon_is_locked_down();
+
+	if (!fromipi) {
+		tracing_enabled = tracing_is_on();
+		tracing_off();
+	}
+
+	bp = in_breakpoint_table(regs->nip, &offset);
+	if (bp != NULL) {
+		regs_set_return_ip(regs, bp->address + offset);
+		atomic_dec(&bp->ref_count);
+	}
+
+	remove_cpu_bpts();
+
+#ifdef CONFIG_SMP
+	cpu = smp_processor_id();
+	if (cpumask_test_cpu(cpu, &cpus_in_xmon)) {
+		/*
+		 * We catch SPR read/write faults here because the 0x700, 0xf60
+		 * etc. handlers don't call debugger_fault_handler().
+		 */
+		if (catch_spr_faults)
+			longjmp(bus_error_jmp, 1);
+		get_output_lock();
+		excprint(regs);
+		printf("cpu 0x%x: Exception %lx %s in xmon, "
+		       "returning to main loop\n",
+		       cpu, regs->trap, getvecname(TRAP(regs)));
+		release_output_lock();
+		longjmp(xmon_fault_jmp[cpu], 1);
+	}
+
+	if (setjmp(recurse_jmp) != 0) {
+		if (!in_xmon || !xmon_gate) {
+			get_output_lock();
+			printf("xmon: WARNING: bad recursive fault "
+			       "on cpu 0x%x\n", cpu);
+			release_output_lock();
+			goto waiting;
+		}
+		secondary = !(xmon_taken && cpu == xmon_owner);
+		goto cmdloop;
+	}
+
+	xmon_fault_jmp[cpu] = recurse_jmp;
+
+	bp = NULL;
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT))
+		bp = at_breakpoint(regs->nip);
+	if (bp || regs_is_unrecoverable(regs))
+		fromipi = 0;
+
+	if (!fromipi) {
+		get_output_lock();
+		if (!locked_down)
+			excprint(regs);
+		if (bp) {
+			printf("cpu 0x%x stopped at breakpoint 0x%tx (",
+			       cpu, BP_NUM(bp));
+			xmon_print_symbol(regs->nip, " ", ")\n");
+		}
+		if (regs_is_unrecoverable(regs))
+			printf("WARNING: exception is not recoverable, "
+			       "can't continue\n");
+		release_output_lock();
+	}
+
+	cpumask_set_cpu(cpu, &cpus_in_xmon);
+
+ waiting:
+	secondary = 1;
+	spin_begin();
+	while (secondary && !xmon_gate) {
+		if (in_xmon == 0) {
+			if (fromipi) {
+				spin_end();
+				goto leave;
+			}
+			secondary = test_and_set_bit(0, &in_xmon);
+		}
+		spin_cpu_relax();
+		touch_nmi_watchdog();
+	}
+	spin_end();
+
+	if (!secondary && !xmon_gate) {
+		/* we are the first cpu to come in */
+		/* interrupt other cpu(s) */
+		int ncpus = num_online_cpus();
+
+		xmon_owner = cpu;
+		mb();
+		if (ncpus > 1) {
+			/*
+			 * A system reset (trap == 0x100) can be triggered on
+			 * all CPUs, so when we come in via 0x100 try waiting
+			 * for the other CPUs to come in before we send the
+			 * debugger break (IPI). This is similar to
+			 * crash_kexec_secondary().
+			 */
+			if (TRAP(regs) !=  INTERRUPT_SYSTEM_RESET || !wait_for_other_cpus(ncpus))
+				smp_send_debugger_break();
+
+			wait_for_other_cpus(ncpus);
+		}
+		remove_bpts();
+		disable_surveillance();
+
+		if (!locked_down) {
+			/* for breakpoint or single step, print curr insn */
+			if (bp || TRAP(regs) == INTERRUPT_TRACE)
+				ppc_inst_dump(regs->nip, 1, 0);
+			printf("enter ? for help\n");
+		}
+
+		mb();
+		xmon_gate = 1;
+		barrier();
+		touch_nmi_watchdog();
+	}
+
+ cmdloop:
+	while (in_xmon) {
+		if (secondary) {
+			spin_begin();
+			if (cpu == xmon_owner) {
+				if (!test_and_set_bit(0, &xmon_taken)) {
+					secondary = 0;
+					spin_end();
+					continue;
+				}
+				/* missed it */
+				while (cpu == xmon_owner)
+					spin_cpu_relax();
+			}
+			spin_cpu_relax();
+			touch_nmi_watchdog();
+		} else {
+			cmd = 1;
+#ifdef CONFIG_SMP
+			if (xmon_batch)
+				cmd = batch_cmds(regs);
+#endif
+			if (!locked_down && cmd)
+				cmd = cmds(regs);
+			if (locked_down || cmd != 0) {
+				/* exiting xmon */
+				insert_bpts();
+				xmon_gate = 0;
+				wmb();
+				in_xmon = 0;
+				break;
+			}
+			/* have switched to some other cpu */
+			secondary = 1;
+		}
+	}
+ leave:
+	cpumask_clear_cpu(cpu, &cpus_in_xmon);
+	xmon_fault_jmp[cpu] = NULL;
+#else
+	/* UP is simple... */
+	if (in_xmon) {
+		printf("Exception %lx %s in xmon, returning to main loop\n",
+		       regs->trap, getvecname(TRAP(regs)));
+		longjmp(xmon_fault_jmp[0], 1);
+	}
+	if (setjmp(recurse_jmp) == 0) {
+		xmon_fault_jmp[0] = recurse_jmp;
+		in_xmon = 1;
+
+		excprint(regs);
+		bp = at_breakpoint(regs->nip);
+		if (bp) {
+			printf("Stopped at breakpoint %tx (", BP_NUM(bp));
+			xmon_print_symbol(regs->nip, " ", ")\n");
+		}
+		if (regs_is_unrecoverable(regs))
+			printf("WARNING: exception is not recoverable, "
+			       "can't continue\n");
+		remove_bpts();
+		disable_surveillance();
+		if (!locked_down) {
+			/* for breakpoint or single step, print current insn */
+			if (bp || TRAP(regs) == INTERRUPT_TRACE)
+				ppc_inst_dump(regs->nip, 1, 0);
+			printf("enter ? for help\n");
+		}
+	}
+
+	if (!locked_down)
+		cmd = cmds(regs);
+
+	insert_bpts();
+	in_xmon = 0;
+#endif
+
+#ifdef CONFIG_BOOKE
+	if (regs->msr & MSR_DE) {
+		bp = at_breakpoint(regs->nip);
+		if (bp != NULL) {
+			regs_set_return_ip(regs, (unsigned long) &bp->instr[0]);
+			atomic_inc(&bp->ref_count);
+		}
+	}
+#else
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) {
+		bp = at_breakpoint(regs->nip);
+		if (bp != NULL) {
+			int stepped = emulate_step(regs, ppc_inst_read(bp->instr));
+			if (stepped == 0) {
+				regs_set_return_ip(regs, (unsigned long) &bp->instr[0]);
+				atomic_inc(&bp->ref_count);
+			} else if (stepped < 0) {
+				printf("Couldn't single-step %s instruction\n",
+				    IS_RFID(ppc_inst_read(bp->instr))? "rfid": "mtmsrd");
+			}
+		}
+	}
+#endif
+	if (locked_down)
+		clear_all_bpt();
+	else
+		insert_cpu_bpts();
+
+	xmon_touch_watchdogs();
+	local_irq_restore(flags);
+
+	return cmd != 'X' && cmd != EOF;
+}
+
+int xmon(struct pt_regs *excp)
+{
+	struct pt_regs regs;
+
+	if (excp == NULL) {
+		ppc_save_regs(&regs);
+		excp = &regs;
+	}
+
+	return xmon_core(excp, 0);
+}
+EXPORT_SYMBOL(xmon);
+
+irqreturn_t xmon_irq(int irq, void *d)
+{
+	unsigned long flags;
+	local_irq_save(flags);
+	printf("Keyboard interrupt\n");
+	xmon(get_irq_regs());
+	local_irq_restore(flags);
+	return IRQ_HANDLED;
+}
+
+static int xmon_bpt(struct pt_regs *regs)
+{
+	struct bpt *bp;
+	unsigned long offset;
+
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
+		return 0;
+
+	/* Are we at the trap at bp->instr[1] for some bp? */
+	bp = in_breakpoint_table(regs->nip, &offset);
+	if (bp != NULL && (offset == 4 || offset == 8)) {
+		regs_set_return_ip(regs, bp->address + offset);
+		atomic_dec(&bp->ref_count);
+		return 1;
+	}
+
+	/* Are we at a breakpoint? */
+	bp = at_breakpoint(regs->nip);
+	if (!bp)
+		return 0;
+
+	xmon_core(regs, 0);
+
+	return 1;
+}
+
+static int xmon_sstep(struct pt_regs *regs)
+{
+	if (user_mode(regs))
+		return 0;
+	xmon_core(regs, 0);
+	return 1;
+}
+
+static int xmon_break_match(struct pt_regs *regs)
+{
+	int i;
+
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
+		return 0;
+	for (i = 0; i < nr_wp_slots(); i++) {
+		if (dabr[i].enabled)
+			goto found;
+	}
+	return 0;
+
+found:
+	xmon_core(regs, 0);
+	return 1;
+}
+
+static int xmon_iabr_match(struct pt_regs *regs)
+{
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
+		return 0;
+	if (iabr == NULL)
+		return 0;
+	xmon_core(regs, 0);
+	return 1;
+}
+
+static int xmon_ipi(struct pt_regs *regs)
+{
+#ifdef CONFIG_SMP
+	if (in_xmon && !cpumask_test_cpu(smp_processor_id(), &cpus_in_xmon))
+		xmon_core(regs, 1);
+#endif
+	return 0;
+}
+
+static int xmon_fault_handler(struct pt_regs *regs)
+{
+	struct bpt *bp;
+	unsigned long offset;
+
+	if (in_xmon && catch_memory_errors)
+		handle_fault(regs);	/* doesn't return */
+
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) {
+		bp = in_breakpoint_table(regs->nip, &offset);
+		if (bp != NULL) {
+			regs_set_return_ip(regs, bp->address + offset);
+			atomic_dec(&bp->ref_count);
+		}
+	}
+
+	return 0;
+}
+
+/* Force enable xmon if not already enabled */
+static inline void force_enable_xmon(void)
+{
+	/* Enable xmon hooks if needed */
+	if (!xmon_on) {
+		printf("xmon: Enabling debugger hooks\n");
+		xmon_on = 1;
+	}
+}
+
+static struct bpt *at_breakpoint(unsigned long pc)
+{
+	int i;
+	struct bpt *volatile bp;
+
+	bp = bpts;
+	for (i = 0; i < NBPTS; ++i, ++bp)
+		if (bp->enabled && pc == bp->address)
+			return bp;
+	return NULL;
+}
+
+static struct bpt *in_breakpoint_table(unsigned long nip, unsigned long *offp)
+{
+	unsigned long off;
+
+	off = nip - (unsigned long)bpt_table;
+	if (off >= sizeof(bpt_table))
+		return NULL;
+	*offp = off & (BPT_SIZE - 1);
+	if (off & 3)
+		return NULL;
+	return bpts + (off / BPT_SIZE);
+}
+
+static struct bpt *new_breakpoint(unsigned long a)
+{
+	struct bpt *bp;
+
+	a &= ~3UL;
+	bp = at_breakpoint(a);
+	if (bp)
+		return bp;
+
+	for (bp = bpts; bp < &bpts[NBPTS]; ++bp) {
+		if (!bp->enabled && atomic_read(&bp->ref_count) == 0) {
+			bp->address = a;
+			bp->instr = (void *)(bpt_table + ((bp - bpts) * BPT_WORDS));
+			return bp;
+		}
+	}
+
+	printf("Sorry, no free breakpoints.  Please clear one first.\n");
+	return NULL;
+}
+
+static void insert_bpts(void)
+{
+	int i;
+	ppc_inst_t instr, instr2;
+	struct bpt *bp, *bp2;
+
+	bp = bpts;
+	for (i = 0; i < NBPTS; ++i, ++bp) {
+		if ((bp->enabled & (BP_TRAP|BP_CIABR)) == 0)
+			continue;
+		if (!mread_instr(bp->address, &instr)) {
+			printf("Couldn't read instruction at %lx, "
+			       "disabling breakpoint there\n", bp->address);
+			bp->enabled = 0;
+			continue;
+		}
+		if (!can_single_step(ppc_inst_val(instr))) {
+			printf("Breakpoint at %lx is on an instruction that can't be single stepped, disabling it\n",
+					bp->address);
+			bp->enabled = 0;
+			continue;
+		}
+		/*
+		 * Check the address is not a suffix by looking for a prefix in
+		 * front of it.
+		 */
+		if (mread_instr(bp->address - 4, &instr2) == 8) {
+			printf("Breakpoint at %lx is on the second word of a prefixed instruction, disabling it\n",
+			       bp->address);
+			bp->enabled = 0;
+			continue;
+		}
+		/*
+		 * We might still be a suffix - if the prefix has already been
+		 * replaced by a breakpoint we won't catch it with the above
+		 * test.
+		 */
+		bp2 = at_breakpoint(bp->address - 4);
+		if (bp2 && ppc_inst_prefixed(ppc_inst_read(bp2->instr))) {
+			printf("Breakpoint at %lx is on the second word of a prefixed instruction, disabling it\n",
+			       bp->address);
+			bp->enabled = 0;
+			continue;
+		}
+
+		patch_instruction(bp->instr, instr);
+		patch_instruction(ppc_inst_next(bp->instr, bp->instr),
+				  ppc_inst(bpinstr));
+		if (bp->enabled & BP_CIABR)
+			continue;
+		if (patch_instruction((u32 *)bp->address,
+				      ppc_inst(bpinstr)) != 0) {
+			printf("Couldn't write instruction at %lx, "
+			       "disabling breakpoint there\n", bp->address);
+			bp->enabled &= ~BP_TRAP;
+			continue;
+		}
+	}
+}
+
+static void insert_cpu_bpts(void)
+{
+	int i;
+	struct arch_hw_breakpoint brk;
+
+	for (i = 0; i < nr_wp_slots(); i++) {
+		if (dabr[i].enabled) {
+			brk.address = dabr[i].address;
+			brk.type = (dabr[i].enabled & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
+			brk.len = 8;
+			brk.hw_len = 8;
+			__set_breakpoint(i, &brk);
+		}
+	}
+
+	if (iabr)
+		set_ciabr(iabr->address);
+}
+
+static void remove_bpts(void)
+{
+	int i;
+	struct bpt *bp;
+	ppc_inst_t instr;
+
+	bp = bpts;
+	for (i = 0; i < NBPTS; ++i, ++bp) {
+		if ((bp->enabled & (BP_TRAP|BP_CIABR)) != BP_TRAP)
+			continue;
+		if (mread_instr(bp->address, &instr)
+		    && ppc_inst_equal(instr, ppc_inst(bpinstr))
+		    && patch_instruction(
+			(u32 *)bp->address, ppc_inst_read(bp->instr)) != 0)
+			printf("Couldn't remove breakpoint at %lx\n",
+			       bp->address);
+	}
+}
+
+static void remove_cpu_bpts(void)
+{
+	hw_breakpoint_disable();
+	write_ciabr(0);
+}
+
+/* Based on uptime_proc_show(). */
+static void
+show_uptime(void)
+{
+	struct timespec64 uptime;
+
+	if (setjmp(bus_error_jmp) == 0) {
+		catch_memory_errors = 1;
+		sync();
+
+		ktime_get_coarse_boottime_ts64(&uptime);
+		printf("Uptime: %lu.%.2lu seconds\n", (unsigned long)uptime.tv_sec,
+			((unsigned long)uptime.tv_nsec / (NSEC_PER_SEC/100)));
+
+		sync();
+		__delay(200);						\
+	}
+	catch_memory_errors = 0;
+}
+
+static void set_lpp_cmd(void)
+{
+	unsigned long lpp;
+
+	if (!scanhex(&lpp)) {
+		printf("Invalid number.\n");
+		lpp = 0;
+	}
+	xmon_set_pagination_lpp(lpp);
+}
+/* Command interpreting routine */
+static char *last_cmd;
+
+static int
+cmds(struct pt_regs *excp)
+{
+	int cmd = 0;
+
+	last_cmd = NULL;
+	xmon_regs = excp;
+
+	xmon_show_stack(excp->gpr[1], excp->link, excp->nip);
+
+	for(;;) {
+#ifdef CONFIG_SMP
+		printf("%x:", smp_processor_id());
+#endif /* CONFIG_SMP */
+		printf("mon> ");
+		flush_input();
+		termch = 0;
+		cmd = skipbl();
+		if( cmd == '\n' ) {
+			if (last_cmd == NULL)
+				continue;
+			take_input(last_cmd);
+			last_cmd = NULL;
+			cmd = inchar();
+		}
+		switch (cmd) {
+		case 'm':
+			cmd = inchar();
+			switch (cmd) {
+			case 'm':
+			case 's':
+			case 'd':
+				memops(cmd);
+				break;
+			case 'l':
+				memlocate();
+				break;
+			case 'z':
+				if (xmon_is_ro) {
+					printf(xmon_ro_msg);
+					break;
+				}
+				memzcan();
+				break;
+			case 'i':
+				show_mem();
+				break;
+			default:
+				termch = cmd;
+				memex();
+			}
+			break;
+		case 'd':
+			dump();
+			break;
+		case 'l':
+			symbol_lookup();
+			break;
+		case 'r':
+			prregs(excp);	/* print regs */
+			break;
+		case 'e':
+			excprint(excp);
+			break;
+		case 'S':
+			super_regs();
+			break;
+		case 't':
+			backtrace(excp);
+			break;
+		case 'f':
+			cacheflush();
+			break;
+		case 's':
+			if (do_spu_cmd() == 0)
+				break;
+			if (do_step(excp))
+				return cmd;
+			break;
+		case 'x':
+		case 'X':
+			if (tracing_enabled)
+				tracing_on();
+			return cmd;
+		case EOF:
+			printf(" <no input ...>\n");
+			mdelay(2000);
+			return cmd;
+		case '?':
+			xmon_puts(help_string);
+			break;
+		case '#':
+			set_lpp_cmd();
+			break;
+		case 'b':
+			bpt_cmds();
+			break;
+		case 'C':
+			csum();
+			break;
+		case 'c':
+			if (cpu_cmd())
+				return 0;
+			break;
+		case 'z':
+			bootcmds();
+			break;
+		case 'p':
+			if (xmon_is_ro) {
+				printf(xmon_ro_msg);
+				break;
+			}
+			proccall();
+			break;
+		case 'P':
+			show_tasks();
+			break;
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_64S_HASH_MMU)
+		case 'u':
+			dump_segments();
+			break;
+#elif defined(CONFIG_44x)
+		case 'u':
+			dump_tlb_44x();
+			break;
+#elif defined(CONFIG_PPC_BOOK3E_64)
+		case 'u':
+			dump_tlb_book3e();
+			break;
+#endif
+		case 'U':
+			show_uptime();
+			break;
+		default:
+			printf("Unrecognized command: ");
+			do {
+				if (' ' < cmd && cmd <= '~')
+					putchar(cmd);
+				else
+					printf("\\x%x", cmd);
+				cmd = inchar();
+			} while (cmd != '\n');
+			printf(" (type ? for help)\n");
+			break;
+		}
+	}
+}
+
+#ifdef CONFIG_BOOKE
+static int do_step(struct pt_regs *regs)
+{
+	regs_set_return_msr(regs, regs->msr | MSR_DE);
+	mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
+	return 1;
+}
+#else
+/*
+ * Step a single instruction.
+ * Some instructions we emulate, others we execute with MSR_SE set.
+ */
+static int do_step(struct pt_regs *regs)
+{
+	ppc_inst_t instr;
+	int stepped;
+
+	force_enable_xmon();
+	/* check we are in 64-bit kernel mode, translation enabled */
+	if ((regs->msr & (MSR_64BIT|MSR_PR|MSR_IR)) == (MSR_64BIT|MSR_IR)) {
+		if (mread_instr(regs->nip, &instr)) {
+			stepped = emulate_step(regs, instr);
+			if (stepped < 0) {
+				printf("Couldn't single-step %s instruction\n",
+				       (IS_RFID(instr)? "rfid": "mtmsrd"));
+				return 0;
+			}
+			if (stepped > 0) {
+				set_trap(regs, 0xd00);
+				printf("stepped to ");
+				xmon_print_symbol(regs->nip, " ", "\n");
+				ppc_inst_dump(regs->nip, 1, 0);
+				return 0;
+			}
+		}
+	}
+	regs_set_return_msr(regs, regs->msr | MSR_SE);
+	return 1;
+}
+#endif
+
+static void bootcmds(void)
+{
+	char tmp[64];
+	int cmd;
+
+	cmd = inchar();
+	if (cmd == 'r') {
+		getstring(tmp, 64);
+		ppc_md.restart(tmp);
+	} else if (cmd == 'h') {
+		ppc_md.halt();
+	} else if (cmd == 'p') {
+		do_kernel_power_off();
+	}
+}
+
+#ifdef CONFIG_SMP
+static int xmon_switch_cpu(unsigned long cpu)
+{
+	int timeout;
+
+	xmon_taken = 0;
+	mb();
+	xmon_owner = cpu;
+	timeout = 10000000;
+	while (!xmon_taken) {
+		if (--timeout == 0) {
+			if (test_and_set_bit(0, &xmon_taken))
+				break;
+			/* take control back */
+			mb();
+			xmon_owner = smp_processor_id();
+			printf("cpu 0x%lx didn't take control\n", cpu);
+			return 0;
+		}
+		barrier();
+	}
+	return 1;
+}
+
+static int xmon_batch_next_cpu(void)
+{
+	unsigned long cpu;
+
+	while (!cpumask_empty(&xmon_batch_cpus)) {
+		cpu = cpumask_next_wrap(smp_processor_id(), &xmon_batch_cpus,
+					xmon_batch_start_cpu, true);
+		if (cpu >= nr_cpu_ids)
+			break;
+		if (xmon_batch_start_cpu == -1)
+			xmon_batch_start_cpu = cpu;
+		if (xmon_switch_cpu(cpu))
+			return 0;
+		cpumask_clear_cpu(cpu, &xmon_batch_cpus);
+	}
+
+	xmon_batch = 0;
+	printf("%x:mon> \n", smp_processor_id());
+	return 1;
+}
+
+static int batch_cmds(struct pt_regs *excp)
+{
+	int cmd;
+
+	/* simulate command entry */
+	cmd = xmon_batch;
+	termch = '\n';
+
+	last_cmd = NULL;
+	xmon_regs = excp;
+
+	printf("%x:", smp_processor_id());
+	printf("mon> ");
+	printf("%c\n", (char)cmd);
+
+	switch (cmd) {
+	case 'r':
+		prregs(excp);	/* print regs */
+		break;
+	case 'S':
+		super_regs();
+		break;
+	case 't':
+		backtrace(excp);
+		break;
+	}
+
+	cpumask_clear_cpu(smp_processor_id(), &xmon_batch_cpus);
+
+	return xmon_batch_next_cpu();
+}
+
+static int cpu_cmd(void)
+{
+	unsigned long cpu, first_cpu, last_cpu;
+
+	cpu = skipbl();
+	if (cpu == '#') {
+		xmon_batch = skipbl();
+		if (xmon_batch) {
+			switch (xmon_batch) {
+			case 'r':
+			case 'S':
+			case 't':
+				cpumask_copy(&xmon_batch_cpus, &cpus_in_xmon);
+				if (cpumask_weight(&xmon_batch_cpus) <= 1) {
+					printf("There are no other cpus in xmon\n");
+					break;
+				}
+				xmon_batch_start_cpu = -1;
+				if (!xmon_batch_next_cpu())
+					return 1;
+				break;
+			default:
+				printf("c# only supports 'r', 'S' and 't' commands\n");
+			}
+			xmon_batch = 0;
+			return 0;
+		}
+	}
+	termch = cpu;
+
+	if (!scanhex(&cpu)) {
+		/* print cpus waiting or in xmon */
+		printf("cpus stopped:");
+		last_cpu = first_cpu = NR_CPUS;
+		for_each_possible_cpu(cpu) {
+			if (cpumask_test_cpu(cpu, &cpus_in_xmon)) {
+				if (cpu == last_cpu + 1) {
+					last_cpu = cpu;
+				} else {
+					if (last_cpu != first_cpu)
+						printf("-0x%lx", last_cpu);
+					last_cpu = first_cpu = cpu;
+					printf(" 0x%lx", cpu);
+				}
+			}
+		}
+		if (last_cpu != first_cpu)
+			printf("-0x%lx", last_cpu);
+		printf("\n");
+		return 0;
+	}
+	/* try to switch to cpu specified */
+	if (!cpumask_test_cpu(cpu, &cpus_in_xmon)) {
+		printf("cpu 0x%lx isn't in xmon\n", cpu);
+#ifdef CONFIG_PPC64
+		printf("backtrace of paca[0x%lx].saved_r1 (possibly stale):\n", cpu);
+		xmon_show_stack(paca_ptrs[cpu]->saved_r1, 0, 0);
+#endif
+		return 0;
+	}
+
+	return xmon_switch_cpu(cpu);
+}
+#else
+static int cpu_cmd(void)
+{
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+static unsigned short fcstab[256] = {
+	0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
+	0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
+	0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
+	0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
+	0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
+	0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
+	0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
+	0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
+	0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
+	0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
+	0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
+	0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
+	0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
+	0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
+	0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
+	0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
+	0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
+	0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
+	0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
+	0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
+	0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
+	0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
+	0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
+	0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
+	0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
+	0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
+	0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
+	0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
+	0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
+	0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
+	0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
+	0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78
+};
+
+#define FCS(fcs, c)	(((fcs) >> 8) ^ fcstab[((fcs) ^ (c)) & 0xff])
+
+static void
+csum(void)
+{
+	unsigned int i;
+	unsigned short fcs;
+	unsigned char v;
+
+	if (!scanhex(&adrs))
+		return;
+	if (!scanhex(&ncsum))
+		return;
+	fcs = 0xffff;
+	for (i = 0; i < ncsum; ++i) {
+		if (mread(adrs+i, &v, 1) == 0) {
+			printf("csum stopped at "REG"\n", adrs+i);
+			break;
+		}
+		fcs = FCS(fcs, v);
+	}
+	printf("%x\n", fcs);
+}
+
+/*
+ * Check if this is a suitable place to put a breakpoint.
+ */
+static long check_bp_loc(unsigned long addr)
+{
+	ppc_inst_t instr;
+
+	addr &= ~3;
+	if (!is_kernel_addr(addr)) {
+		printf("Breakpoints may only be placed at kernel addresses\n");
+		return 0;
+	}
+	if (!mread_instr(addr, &instr)) {
+		printf("Can't read instruction at address %lx\n", addr);
+		return 0;
+	}
+	if (!can_single_step(ppc_inst_val(instr))) {
+		printf("Breakpoints may not be placed on instructions that can't be single stepped\n");
+		return 0;
+	}
+	return 1;
+}
+
+static int find_free_data_bpt(void)
+{
+	int i;
+
+	for (i = 0; i < nr_wp_slots(); i++) {
+		if (!dabr[i].enabled)
+			return i;
+	}
+	printf("Couldn't find free breakpoint register\n");
+	return -1;
+}
+
+static void print_data_bpts(void)
+{
+	int i;
+
+	for (i = 0; i < nr_wp_slots(); i++) {
+		if (!dabr[i].enabled)
+			continue;
+
+		printf("   data   "REG"  [", dabr[i].address);
+		if (dabr[i].enabled & 1)
+			printf("r");
+		if (dabr[i].enabled & 2)
+			printf("w");
+		printf("]\n");
+	}
+}
+
+static char *breakpoint_help_string =
+    "Breakpoint command usage:\n"
+    "b                show breakpoints\n"
+    "b <addr> [cnt]   set breakpoint at given instr addr\n"
+    "bc               clear all breakpoints\n"
+    "bc <n/addr>      clear breakpoint number n or at addr\n"
+    "bi <addr> [cnt]  set hardware instr breakpoint (POWER8 only)\n"
+    "bd <addr> [cnt]  set hardware data breakpoint\n"
+    "";
+
+static void
+bpt_cmds(void)
+{
+	int cmd;
+	unsigned long a;
+	int i;
+	struct bpt *bp;
+
+	cmd = inchar();
+
+	switch (cmd) {
+	case 'd': {	/* bd - hardware data breakpoint */
+		static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n";
+		int mode;
+		if (xmon_is_ro) {
+			printf(xmon_ro_msg);
+			break;
+		}
+		if (!ppc_breakpoint_available()) {
+			printf("Hardware data breakpoint not supported on this cpu\n");
+			break;
+		}
+		i = find_free_data_bpt();
+		if (i < 0)
+			break;
+		mode = 7;
+		cmd = inchar();
+		if (cmd == 'r')
+			mode = 5;
+		else if (cmd == 'w')
+			mode = 6;
+		else
+			termch = cmd;
+		dabr[i].address = 0;
+		dabr[i].enabled = 0;
+		if (scanhex(&dabr[i].address)) {
+			if (!is_kernel_addr(dabr[i].address)) {
+				printf(badaddr);
+				break;
+			}
+			dabr[i].address &= ~HW_BRK_TYPE_DABR;
+			dabr[i].enabled = mode | BP_DABR;
+		}
+
+		force_enable_xmon();
+		break;
+	}
+
+	case 'i':	/* bi - hardware instr breakpoint */
+		if (xmon_is_ro) {
+			printf(xmon_ro_msg);
+			break;
+		}
+		if (!cpu_has_feature(CPU_FTR_ARCH_207S)) {
+			printf("Hardware instruction breakpoint "
+			       "not supported on this cpu\n");
+			break;
+		}
+		if (iabr) {
+			iabr->enabled &= ~BP_CIABR;
+			iabr = NULL;
+		}
+		if (!scanhex(&a))
+			break;
+		if (!check_bp_loc(a))
+			break;
+		bp = new_breakpoint(a);
+		if (bp != NULL) {
+			bp->enabled |= BP_CIABR;
+			iabr = bp;
+			force_enable_xmon();
+		}
+		break;
+
+	case 'c':
+		if (!scanhex(&a)) {
+			/* clear all breakpoints */
+			for (i = 0; i < NBPTS; ++i)
+				bpts[i].enabled = 0;
+			iabr = NULL;
+			for (i = 0; i < nr_wp_slots(); i++)
+				dabr[i].enabled = 0;
+
+			printf("All breakpoints cleared\n");
+			break;
+		}
+
+		if (a <= NBPTS && a >= 1) {
+			/* assume a breakpoint number */
+			bp = &bpts[a-1];	/* bp nums are 1 based */
+		} else {
+			/* assume a breakpoint address */
+			bp = at_breakpoint(a);
+			if (bp == NULL) {
+				printf("No breakpoint at %lx\n", a);
+				break;
+			}
+		}
+
+		printf("Cleared breakpoint %tx (", BP_NUM(bp));
+		xmon_print_symbol(bp->address, " ", ")\n");
+		bp->enabled = 0;
+		break;
+
+	default:
+		termch = cmd;
+		cmd = skipbl();
+		if (cmd == '?') {
+			printf(breakpoint_help_string);
+			break;
+		}
+		termch = cmd;
+
+		if (xmon_is_ro || !scanhex(&a)) {
+			/* print all breakpoints */
+			printf("   type            address\n");
+			print_data_bpts();
+			for (bp = bpts; bp < &bpts[NBPTS]; ++bp) {
+				if (!bp->enabled)
+					continue;
+				printf("%tx %s   ", BP_NUM(bp),
+				    (bp->enabled & BP_CIABR) ? "inst": "trap");
+				xmon_print_symbol(bp->address, "  ", "\n");
+			}
+			break;
+		}
+
+		if (!check_bp_loc(a))
+			break;
+		bp = new_breakpoint(a);
+		if (bp != NULL) {
+			bp->enabled |= BP_TRAP;
+			force_enable_xmon();
+		}
+		break;
+	}
+}
+
+/* Very cheap human name for vector lookup. */
+static
+const char *getvecname(unsigned long vec)
+{
+	char *ret;
+
+	switch (vec) {
+	case 0x100:	ret = "(System Reset)"; break;
+	case 0x200:	ret = "(Machine Check)"; break;
+	case 0x300:	ret = "(Data Access)"; break;
+	case 0x380:
+		if (radix_enabled())
+			ret = "(Data Access Out of Range)";
+		else
+			ret = "(Data SLB Access)";
+		break;
+	case 0x400:	ret = "(Instruction Access)"; break;
+	case 0x480:
+		if (radix_enabled())
+			ret = "(Instruction Access Out of Range)";
+		else
+			ret = "(Instruction SLB Access)";
+		break;
+	case 0x500:	ret = "(Hardware Interrupt)"; break;
+	case 0x600:	ret = "(Alignment)"; break;
+	case 0x700:	ret = "(Program Check)"; break;
+	case 0x800:	ret = "(FPU Unavailable)"; break;
+	case 0x900:	ret = "(Decrementer)"; break;
+	case 0x980:	ret = "(Hypervisor Decrementer)"; break;
+	case 0xa00:	ret = "(Doorbell)"; break;
+	case 0xc00:	ret = "(System Call)"; break;
+	case 0xd00:	ret = "(Single Step)"; break;
+	case 0xe40:	ret = "(Emulation Assist)"; break;
+	case 0xe60:	ret = "(HMI)"; break;
+	case 0xe80:	ret = "(Hypervisor Doorbell)"; break;
+	case 0xf00:	ret = "(Performance Monitor)"; break;
+	case 0xf20:	ret = "(Altivec Unavailable)"; break;
+	case 0x1300:	ret = "(Instruction Breakpoint)"; break;
+	case 0x1500:	ret = "(Denormalisation)"; break;
+	case 0x1700:	ret = "(Altivec Assist)"; break;
+	case 0x3000:	ret = "(System Call Vectored)"; break;
+	default: ret = "";
+	}
+	return ret;
+}
+
+static void get_function_bounds(unsigned long pc, unsigned long *startp,
+				unsigned long *endp)
+{
+	unsigned long size, offset;
+	const char *name;
+
+	*startp = *endp = 0;
+	if (pc == 0)
+		return;
+	if (setjmp(bus_error_jmp) == 0) {
+		catch_memory_errors = 1;
+		sync();
+		name = kallsyms_lookup(pc, &size, &offset, NULL, tmpstr);
+		if (name != NULL) {
+			*startp = pc - offset;
+			*endp = pc - offset + size;
+		}
+		sync();
+	}
+	catch_memory_errors = 0;
+}
+
+#define LRSAVE_OFFSET		(STACK_FRAME_LR_SAVE * sizeof(unsigned long))
+
+static void xmon_show_stack(unsigned long sp, unsigned long lr,
+			    unsigned long pc)
+{
+	int max_to_print = 64;
+	unsigned long ip;
+	unsigned long newsp;
+	unsigned long marker;
+	struct pt_regs regs;
+
+	while (max_to_print--) {
+		if (!is_kernel_addr(sp)) {
+			if (sp != 0)
+				printf("SP (%lx) is in userspace\n", sp);
+			break;
+		}
+
+		if (!mread(sp + LRSAVE_OFFSET, &ip, sizeof(unsigned long))
+		    || !mread(sp, &newsp, sizeof(unsigned long))) {
+			printf("Couldn't read stack frame at %lx\n", sp);
+			break;
+		}
+
+		/*
+		 * For the first stack frame, try to work out if
+		 * LR and/or the saved LR value in the bottommost
+		 * stack frame are valid.
+		 */
+		if ((pc | lr) != 0) {
+			unsigned long fnstart, fnend;
+			unsigned long nextip;
+			int printip = 1;
+
+			get_function_bounds(pc, &fnstart, &fnend);
+			nextip = 0;
+			if (newsp > sp)
+				mread(newsp + LRSAVE_OFFSET, &nextip,
+				      sizeof(unsigned long));
+			if (lr == ip) {
+				if (!is_kernel_addr(lr)
+				    || (fnstart <= lr && lr < fnend))
+					printip = 0;
+			} else if (lr == nextip) {
+				printip = 0;
+			} else if (is_kernel_addr(lr)
+				   && !(fnstart <= lr && lr < fnend)) {
+				printf("[link register   ] ");
+				xmon_print_symbol(lr, " ", "\n");
+			}
+			if (printip) {
+				printf("["REG"] ", sp);
+				xmon_print_symbol(ip, " ", " (unreliable)\n");
+			}
+			pc = lr = 0;
+
+		} else {
+			printf("["REG"] ", sp);
+			xmon_print_symbol(ip, " ", "\n");
+		}
+
+		/* Look for "regs" marker to see if this is
+		   an exception frame. */
+		if (mread(sp + STACK_INT_FRAME_MARKER, &marker, sizeof(unsigned long))
+		    && marker == STACK_FRAME_REGS_MARKER) {
+			if (mread(sp + STACK_INT_FRAME_REGS, &regs, sizeof(regs)) != sizeof(regs)) {
+				printf("Couldn't read registers at %lx\n",
+				       sp + STACK_INT_FRAME_REGS);
+				break;
+			}
+			printf("--- Exception: %lx %s at ", regs.trap,
+			       getvecname(TRAP(&regs)));
+			pc = regs.nip;
+			lr = regs.link;
+			xmon_print_symbol(pc, " ", "\n");
+		}
+
+		if (newsp == 0)
+			break;
+
+		sp = newsp;
+	}
+}
+
+static void backtrace(struct pt_regs *excp)
+{
+	unsigned long sp;
+
+	if (scanhex(&sp))
+		xmon_show_stack(sp, 0, 0);
+	else
+		xmon_show_stack(excp->gpr[1], excp->link, excp->nip);
+	scannl();
+}
+
+static void print_bug_trap(struct pt_regs *regs)
+{
+#ifdef CONFIG_BUG
+	const struct bug_entry *bug;
+	unsigned long addr;
+
+	if (regs->msr & MSR_PR)
+		return;		/* not in kernel */
+	addr = regs->nip;	/* address of trap instruction */
+	if (!is_kernel_addr(addr))
+		return;
+	bug = find_bug(regs->nip);
+	if (bug == NULL)
+		return;
+	if (is_warning_bug(bug))
+		return;
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+	printf("kernel BUG at %s:%u!\n",
+	       (char *)bug + bug->file_disp, bug->line);
+#else
+	printf("kernel BUG at %px!\n", (void *)bug + bug->bug_addr_disp);
+#endif
+#endif /* CONFIG_BUG */
+}
+
+static void excprint(struct pt_regs *fp)
+{
+	unsigned long trap;
+
+#ifdef CONFIG_SMP
+	printf("cpu 0x%x: ", smp_processor_id());
+#endif /* CONFIG_SMP */
+
+	trap = TRAP(fp);
+	printf("Vector: %lx %s at [%px]\n", fp->trap, getvecname(trap), fp);
+	printf("    pc: ");
+	xmon_print_symbol(fp->nip, ": ", "\n");
+
+	printf("    lr: ");
+	xmon_print_symbol(fp->link, ": ", "\n");
+
+	printf("    sp: %lx\n", fp->gpr[1]);
+	printf("   msr: %lx\n", fp->msr);
+
+	if (trap == INTERRUPT_DATA_STORAGE ||
+	    trap == INTERRUPT_DATA_SEGMENT ||
+	    trap == INTERRUPT_ALIGNMENT ||
+	    trap == INTERRUPT_MACHINE_CHECK) {
+		printf("   dar: %lx\n", fp->dar);
+		if (trap != INTERRUPT_DATA_SEGMENT)
+			printf(" dsisr: %lx\n", fp->dsisr);
+	}
+
+	printf("  current = 0x%px\n", current);
+#ifdef CONFIG_PPC64
+	printf("  paca    = 0x%px\t irqmask: 0x%02x\t irq_happened: 0x%02x\n",
+	       local_paca, local_paca->irq_soft_mask, local_paca->irq_happened);
+#endif
+	if (current) {
+		printf("    pid   = %d, comm = %s\n",
+		       current->pid, current->comm);
+	}
+
+	if (trap == INTERRUPT_PROGRAM)
+		print_bug_trap(fp);
+
+	printf(linux_banner);
+}
+
+static void prregs(struct pt_regs *fp)
+{
+	int n, trap;
+	unsigned long base;
+	struct pt_regs regs;
+
+	if (scanhex(&base)) {
+		if (setjmp(bus_error_jmp) == 0) {
+			catch_memory_errors = 1;
+			sync();
+			regs = *(struct pt_regs *)base;
+			sync();
+			__delay(200);
+		} else {
+			catch_memory_errors = 0;
+			printf("*** Error reading registers from "REG"\n",
+			       base);
+			return;
+		}
+		catch_memory_errors = 0;
+		fp = &regs;
+	}
+
+#ifdef CONFIG_PPC64
+#define R_PER_LINE 2
+#else
+#define R_PER_LINE 4
+#endif
+
+	for (n = 0; n < 32; ++n) {
+		printf("R%.2d = "REG"%s", n, fp->gpr[n],
+			(n % R_PER_LINE) == R_PER_LINE - 1 ? "\n" : "   ");
+	}
+
+	printf("pc  = ");
+	xmon_print_symbol(fp->nip, " ", "\n");
+	if (!trap_is_syscall(fp) && cpu_has_feature(CPU_FTR_CFAR)) {
+		printf("cfar= ");
+		xmon_print_symbol(fp->orig_gpr3, " ", "\n");
+	}
+	printf("lr  = ");
+	xmon_print_symbol(fp->link, " ", "\n");
+	printf("msr = "REG"   cr  = %.8lx\n", fp->msr, fp->ccr);
+	printf("ctr = "REG"   xer = "REG"   trap = %4lx\n",
+	       fp->ctr, fp->xer, fp->trap);
+	trap = TRAP(fp);
+	if (trap == INTERRUPT_DATA_STORAGE ||
+	    trap == INTERRUPT_DATA_SEGMENT ||
+	    trap == INTERRUPT_ALIGNMENT)
+		printf("dar = "REG"   dsisr = %.8lx\n", fp->dar, fp->dsisr);
+}
+
+static void cacheflush(void)
+{
+	int cmd;
+	unsigned long nflush;
+
+	cmd = inchar();
+	if (cmd != 'i')
+		termch = cmd;
+	scanhex((void *)&adrs);
+	if (termch != '\n')
+		termch = 0;
+	nflush = 1;
+	scanhex(&nflush);
+	nflush = (nflush + L1_CACHE_BYTES - 1) / L1_CACHE_BYTES;
+	if (setjmp(bus_error_jmp) == 0) {
+		catch_memory_errors = 1;
+		sync();
+
+		if (cmd != 'i' || IS_ENABLED(CONFIG_PPC_BOOK3S_64)) {
+			for (; nflush > 0; --nflush, adrs += L1_CACHE_BYTES)
+				cflush((void *) adrs);
+		} else {
+			for (; nflush > 0; --nflush, adrs += L1_CACHE_BYTES)
+				cinval((void *) adrs);
+		}
+		sync();
+		/* wait a little while to see if we get a machine check */
+		__delay(200);
+	}
+	catch_memory_errors = 0;
+}
+
+extern unsigned long xmon_mfspr(int spr, unsigned long default_value);
+extern void xmon_mtspr(int spr, unsigned long value);
+
+static int
+read_spr(int n, unsigned long *vp)
+{
+	unsigned long ret = -1UL;
+	int ok = 0;
+
+	if (setjmp(bus_error_jmp) == 0) {
+		catch_spr_faults = 1;
+		sync();
+
+		ret = xmon_mfspr(n, *vp);
+
+		sync();
+		*vp = ret;
+		ok = 1;
+	}
+	catch_spr_faults = 0;
+
+	return ok;
+}
+
+static void
+write_spr(int n, unsigned long val)
+{
+	if (xmon_is_ro) {
+		printf(xmon_ro_msg);
+		return;
+	}
+
+	if (setjmp(bus_error_jmp) == 0) {
+		catch_spr_faults = 1;
+		sync();
+
+		xmon_mtspr(n, val);
+
+		sync();
+	} else {
+		printf("SPR 0x%03x (%4d) Faulted during write\n", n, n);
+	}
+	catch_spr_faults = 0;
+}
+
+static void dump_206_sprs(void)
+{
+#ifdef CONFIG_PPC64
+	if (!cpu_has_feature(CPU_FTR_ARCH_206))
+		return;
+
+	/* Actually some of these pre-date 2.06, but whatever */
+
+	printf("srr0   = %.16lx  srr1  = %.16lx dsisr  = %.8lx\n",
+		mfspr(SPRN_SRR0), mfspr(SPRN_SRR1), mfspr(SPRN_DSISR));
+	printf("dscr   = %.16lx  ppr   = %.16lx pir    = %.8lx\n",
+		mfspr(SPRN_DSCR), mfspr(SPRN_PPR), mfspr(SPRN_PIR));
+	printf("amr    = %.16lx  uamor = %.16lx\n",
+		mfspr(SPRN_AMR), mfspr(SPRN_UAMOR));
+
+	if (!(mfmsr() & MSR_HV))
+		return;
+
+	printf("sdr1   = %.16lx  hdar  = %.16lx hdsisr = %.8lx\n",
+		mfspr(SPRN_SDR1), mfspr(SPRN_HDAR), mfspr(SPRN_HDSISR));
+	printf("hsrr0  = %.16lx hsrr1  = %.16lx hdec   = %.16lx\n",
+		mfspr(SPRN_HSRR0), mfspr(SPRN_HSRR1), mfspr(SPRN_HDEC));
+	printf("lpcr   = %.16lx  pcr   = %.16lx lpidr  = %.8lx\n",
+		mfspr(SPRN_LPCR), mfspr(SPRN_PCR), mfspr(SPRN_LPID));
+	printf("hsprg0 = %.16lx hsprg1 = %.16lx amor   = %.16lx\n",
+		mfspr(SPRN_HSPRG0), mfspr(SPRN_HSPRG1), mfspr(SPRN_AMOR));
+	printf("dabr   = %.16lx dabrx  = %.16lx\n",
+		mfspr(SPRN_DABR), mfspr(SPRN_DABRX));
+#endif
+}
+
+static void dump_207_sprs(void)
+{
+#ifdef CONFIG_PPC64
+	unsigned long msr;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return;
+
+	printf("dpdes  = %.16lx  tir   = %.16lx cir    = %.8lx\n",
+		mfspr(SPRN_DPDES), mfspr(SPRN_TIR), mfspr(SPRN_CIR));
+
+	printf("fscr   = %.16lx  tar   = %.16lx pspb   = %.8lx\n",
+		mfspr(SPRN_FSCR), mfspr(SPRN_TAR), mfspr(SPRN_PSPB));
+
+	msr = mfmsr();
+	if (msr & MSR_TM) {
+		/* Only if TM has been enabled in the kernel */
+		printf("tfhar  = %.16lx  tfiar = %.16lx texasr = %.16lx\n",
+			mfspr(SPRN_TFHAR), mfspr(SPRN_TFIAR),
+			mfspr(SPRN_TEXASR));
+	}
+
+	printf("mmcr0  = %.16lx  mmcr1 = %.16lx mmcr2  = %.16lx\n",
+		mfspr(SPRN_MMCR0), mfspr(SPRN_MMCR1), mfspr(SPRN_MMCR2));
+	printf("pmc1   = %.8lx pmc2 = %.8lx  pmc3 = %.8lx  pmc4   = %.8lx\n",
+		mfspr(SPRN_PMC1), mfspr(SPRN_PMC2),
+		mfspr(SPRN_PMC3), mfspr(SPRN_PMC4));
+	printf("mmcra  = %.16lx   siar = %.16lx pmc5   = %.8lx\n",
+		mfspr(SPRN_MMCRA), mfspr(SPRN_SIAR), mfspr(SPRN_PMC5));
+	printf("sdar   = %.16lx   sier = %.16lx pmc6   = %.8lx\n",
+		mfspr(SPRN_SDAR), mfspr(SPRN_SIER), mfspr(SPRN_PMC6));
+	printf("ebbhr  = %.16lx  ebbrr = %.16lx bescr  = %.16lx\n",
+		mfspr(SPRN_EBBHR), mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR));
+	printf("iamr   = %.16lx\n", mfspr(SPRN_IAMR));
+
+	if (!(msr & MSR_HV))
+		return;
+
+	printf("hfscr  = %.16lx  dhdes = %.16lx rpr    = %.16lx\n",
+		mfspr(SPRN_HFSCR), mfspr(SPRN_DHDES), mfspr(SPRN_RPR));
+	printf("dawr0  = %.16lx dawrx0 = %.16lx\n",
+	       mfspr(SPRN_DAWR0), mfspr(SPRN_DAWRX0));
+	if (nr_wp_slots() > 1) {
+		printf("dawr1  = %.16lx dawrx1 = %.16lx\n",
+		       mfspr(SPRN_DAWR1), mfspr(SPRN_DAWRX1));
+	}
+	printf("ciabr  = %.16lx\n", mfspr(SPRN_CIABR));
+#endif
+}
+
+static void dump_300_sprs(void)
+{
+#ifdef CONFIG_PPC64
+	bool hv = mfmsr() & MSR_HV;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
+		return;
+
+	if (cpu_has_feature(CPU_FTR_P9_TIDR)) {
+		printf("pidr   = %.16lx  tidr  = %.16lx\n",
+			mfspr(SPRN_PID), mfspr(SPRN_TIDR));
+	} else {
+		printf("pidr   = %.16lx\n",
+			mfspr(SPRN_PID));
+	}
+
+	printf("psscr  = %.16lx\n",
+		hv ? mfspr(SPRN_PSSCR) : mfspr(SPRN_PSSCR_PR));
+
+	if (!hv)
+		return;
+
+	printf("ptcr   = %.16lx  asdr  = %.16lx\n",
+		mfspr(SPRN_PTCR), mfspr(SPRN_ASDR));
+#endif
+}
+
+static void dump_310_sprs(void)
+{
+#ifdef CONFIG_PPC64
+	if (!cpu_has_feature(CPU_FTR_ARCH_31))
+		return;
+
+	printf("mmcr3  = %.16lx, sier2  = %.16lx, sier3  = %.16lx\n",
+		mfspr(SPRN_MMCR3), mfspr(SPRN_SIER2), mfspr(SPRN_SIER3));
+
+#endif
+}
+
+static void dump_one_spr(int spr, bool show_unimplemented)
+{
+	unsigned long val;
+
+	val = 0xdeadbeef;
+	if (!read_spr(spr, &val)) {
+		printf("SPR 0x%03x (%4d) Faulted during read\n", spr, spr);
+		return;
+	}
+
+	if (val == 0xdeadbeef) {
+		/* Looks like read was a nop, confirm */
+		val = 0x0badcafe;
+		if (!read_spr(spr, &val)) {
+			printf("SPR 0x%03x (%4d) Faulted during read\n", spr, spr);
+			return;
+		}
+
+		if (val == 0x0badcafe) {
+			if (show_unimplemented)
+				printf("SPR 0x%03x (%4d) Unimplemented\n", spr, spr);
+			return;
+		}
+	}
+
+	printf("SPR 0x%03x (%4d) = 0x%lx\n", spr, spr, val);
+}
+
+static void super_regs(void)
+{
+	static unsigned long regno;
+	int cmd;
+	int spr;
+
+	cmd = skipbl();
+
+	switch (cmd) {
+	case '\n': {
+		unsigned long sp, toc;
+		asm("mr %0,1" : "=r" (sp) :);
+		asm("mr %0,2" : "=r" (toc) :);
+
+		printf("msr    = "REG"  sprg0 = "REG"\n",
+		       mfmsr(), mfspr(SPRN_SPRG0));
+		printf("pvr    = "REG"  sprg1 = "REG"\n",
+		       mfspr(SPRN_PVR), mfspr(SPRN_SPRG1));
+		printf("dec    = "REG"  sprg2 = "REG"\n",
+		       mfspr(SPRN_DEC), mfspr(SPRN_SPRG2));
+		printf("sp     = "REG"  sprg3 = "REG"\n", sp, mfspr(SPRN_SPRG3));
+		printf("toc    = "REG"  dar   = "REG"\n", toc, mfspr(SPRN_DAR));
+
+		dump_206_sprs();
+		dump_207_sprs();
+		dump_300_sprs();
+		dump_310_sprs();
+
+		return;
+	}
+	case 'w': {
+		unsigned long val;
+		scanhex(&regno);
+		val = 0;
+		read_spr(regno, &val);
+		scanhex(&val);
+		write_spr(regno, val);
+		dump_one_spr(regno, true);
+		break;
+	}
+	case 'r':
+		scanhex(&regno);
+		dump_one_spr(regno, true);
+		break;
+	case 'a':
+		/* dump ALL SPRs */
+		for (spr = 1; spr < 1024; ++spr)
+			dump_one_spr(spr, false);
+		break;
+	}
+
+	scannl();
+}
+
+/*
+ * Stuff for reading and writing memory safely
+ */
+static int
+mread(unsigned long adrs, void *buf, int size)
+{
+	volatile int n;
+	char *p, *q;
+
+	n = 0;
+	if (setjmp(bus_error_jmp) == 0) {
+		catch_memory_errors = 1;
+		sync();
+		p = (char *)adrs;
+		q = (char *)buf;
+		switch (size) {
+		case 2:
+			*(u16 *)q = *(u16 *)p;
+			break;
+		case 4:
+			*(u32 *)q = *(u32 *)p;
+			break;
+		case 8:
+			*(u64 *)q = *(u64 *)p;
+			break;
+		default:
+			for( ; n < size; ++n) {
+				*q++ = *p++;
+				sync();
+			}
+		}
+		sync();
+		/* wait a little while to see if we get a machine check */
+		__delay(200);
+		n = size;
+	}
+	catch_memory_errors = 0;
+	return n;
+}
+
+static int
+mwrite(unsigned long adrs, void *buf, int size)
+{
+	volatile int n;
+	char *p, *q;
+
+	n = 0;
+
+	if (xmon_is_ro) {
+		printf(xmon_ro_msg);
+		return n;
+	}
+
+	if (setjmp(bus_error_jmp) == 0) {
+		catch_memory_errors = 1;
+		sync();
+		p = (char *) adrs;
+		q = (char *) buf;
+		switch (size) {
+		case 2:
+			*(u16 *)p = *(u16 *)q;
+			break;
+		case 4:
+			*(u32 *)p = *(u32 *)q;
+			break;
+		case 8:
+			*(u64 *)p = *(u64 *)q;
+			break;
+		default:
+			for ( ; n < size; ++n) {
+				*p++ = *q++;
+				sync();
+			}
+		}
+		sync();
+		/* wait a little while to see if we get a machine check */
+		__delay(200);
+		n = size;
+	} else {
+		printf("*** Error writing address "REG"\n", adrs + n);
+	}
+	catch_memory_errors = 0;
+	return n;
+}
+
+static int
+mread_instr(unsigned long adrs, ppc_inst_t *instr)
+{
+	volatile int n;
+
+	n = 0;
+	if (setjmp(bus_error_jmp) == 0) {
+		catch_memory_errors = 1;
+		sync();
+		*instr = ppc_inst_read((u32 *)adrs);
+		sync();
+		/* wait a little while to see if we get a machine check */
+		__delay(200);
+		n = ppc_inst_len(*instr);
+	}
+	catch_memory_errors = 0;
+	return n;
+}
+
+static int fault_type;
+static int fault_except;
+static char *fault_chars[] = { "--", "**", "##" };
+
+static int handle_fault(struct pt_regs *regs)
+{
+	fault_except = TRAP(regs);
+	switch (TRAP(regs)) {
+	case 0x200:
+		fault_type = 0;
+		break;
+	case 0x300:
+	case 0x380:
+		fault_type = 1;
+		break;
+	default:
+		fault_type = 2;
+	}
+
+	longjmp(bus_error_jmp, 1);
+
+	return 0;
+}
+
+#define SWAP(a, b, t)	((t) = (a), (a) = (b), (b) = (t))
+
+static void
+byterev(unsigned char *val, int size)
+{
+	int t;
+	
+	switch (size) {
+	case 2:
+		SWAP(val[0], val[1], t);
+		break;
+	case 4:
+		SWAP(val[0], val[3], t);
+		SWAP(val[1], val[2], t);
+		break;
+	case 8: /* is there really any use for this? */
+		SWAP(val[0], val[7], t);
+		SWAP(val[1], val[6], t);
+		SWAP(val[2], val[5], t);
+		SWAP(val[3], val[4], t);
+		break;
+	}
+}
+
+static int brev;
+static int mnoread;
+
+static char *memex_help_string =
+    "Memory examine command usage:\n"
+    "m [addr] [flags] examine/change memory\n"
+    "  addr is optional.  will start where left off.\n"
+    "  flags may include chars from this set:\n"
+    "    b   modify by bytes (default)\n"
+    "    w   modify by words (2 byte)\n"
+    "    l   modify by longs (4 byte)\n"
+    "    d   modify by doubleword (8 byte)\n"
+    "    r   toggle reverse byte order mode\n"
+    "    n   do not read memory (for i/o spaces)\n"
+    "    .   ok to read (default)\n"
+    "NOTE: flags are saved as defaults\n"
+    "";
+
+static char *memex_subcmd_help_string =
+    "Memory examine subcommands:\n"
+    "  hexval   write this val to current location\n"
+    "  'string' write chars from string to this location\n"
+    "  '        increment address\n"
+    "  ^        decrement address\n"
+    "  /        increment addr by 0x10.  //=0x100, ///=0x1000, etc\n"
+    "  \\        decrement addr by 0x10.  \\\\=0x100, \\\\\\=0x1000, etc\n"
+    "  `        clear no-read flag\n"
+    "  ;        stay at this addr\n"
+    "  v        change to byte mode\n"
+    "  w        change to word (2 byte) mode\n"
+    "  l        change to long (4 byte) mode\n"
+    "  u        change to doubleword (8 byte) mode\n"
+    "  m addr   change current addr\n"
+    "  n        toggle no-read flag\n"
+    "  r        toggle byte reverse flag\n"
+    "  < count  back up count bytes\n"
+    "  > count  skip forward count bytes\n"
+    "  x        exit this mode\n"
+    "";
+
+static void
+memex(void)
+{
+	int cmd, inc, i, nslash;
+	unsigned long n;
+	unsigned char val[16];
+
+	scanhex((void *)&adrs);
+	cmd = skipbl();
+	if (cmd == '?') {
+		printf(memex_help_string);
+		return;
+	} else {
+		termch = cmd;
+	}
+	last_cmd = "m\n";
+	while ((cmd = skipbl()) != '\n') {
+		switch( cmd ){
+		case 'b':	size = 1;	break;
+		case 'w':	size = 2;	break;
+		case 'l':	size = 4;	break;
+		case 'd':	size = 8;	break;
+		case 'r': 	brev = !brev;	break;
+		case 'n':	mnoread = 1;	break;
+		case '.':	mnoread = 0;	break;
+		}
+	}
+	if( size <= 0 )
+		size = 1;
+	else if( size > 8 )
+		size = 8;
+	for(;;){
+		if (!mnoread)
+			n = mread(adrs, val, size);
+		printf(REG"%c", adrs, brev? 'r': ' ');
+		if (!mnoread) {
+			if (brev)
+				byterev(val, size);
+			putchar(' ');
+			for (i = 0; i < n; ++i)
+				printf("%.2x", val[i]);
+			for (; i < size; ++i)
+				printf("%s", fault_chars[fault_type]);
+		}
+		putchar(' ');
+		inc = size;
+		nslash = 0;
+		for(;;){
+			if( scanhex(&n) ){
+				for (i = 0; i < size; ++i)
+					val[i] = n >> (i * 8);
+				if (!brev)
+					byterev(val, size);
+				mwrite(adrs, val, size);
+				inc = size;
+			}
+			cmd = skipbl();
+			if (cmd == '\n')
+				break;
+			inc = 0;
+			switch (cmd) {
+			case '\'':
+				for(;;){
+					n = inchar();
+					if( n == '\\' )
+						n = bsesc();
+					else if( n == '\'' )
+						break;
+					for (i = 0; i < size; ++i)
+						val[i] = n >> (i * 8);
+					if (!brev)
+						byterev(val, size);
+					mwrite(adrs, val, size);
+					adrs += size;
+				}
+				adrs -= size;
+				inc = size;
+				break;
+			case ',':
+				adrs += size;
+				break;
+			case '.':
+				mnoread = 0;
+				break;
+			case ';':
+				break;
+			case 'x':
+			case EOF:
+				scannl();
+				return;
+			case 'b':
+			case 'v':
+				size = 1;
+				break;
+			case 'w':
+				size = 2;
+				break;
+			case 'l':
+				size = 4;
+				break;
+			case 'u':
+				size = 8;
+				break;
+			case '^':
+				adrs -= size;
+				break;
+			case '/':
+				if (nslash > 0)
+					adrs -= 1 << nslash;
+				else
+					nslash = 0;
+				nslash += 4;
+				adrs += 1 << nslash;
+				break;
+			case '\\':
+				if (nslash < 0)
+					adrs += 1 << -nslash;
+				else
+					nslash = 0;
+				nslash -= 4;
+				adrs -= 1 << -nslash;
+				break;
+			case 'm':
+				scanhex((void *)&adrs);
+				break;
+			case 'n':
+				mnoread = 1;
+				break;
+			case 'r':
+				brev = !brev;
+				break;
+			case '<':
+				n = size;
+				scanhex(&n);
+				adrs -= n;
+				break;
+			case '>':
+				n = size;
+				scanhex(&n);
+				adrs += n;
+				break;
+			case '?':
+				printf(memex_subcmd_help_string);
+				break;
+			}
+		}
+		adrs += inc;
+	}
+}
+
+static int
+bsesc(void)
+{
+	int c;
+
+	c = inchar();
+	switch( c ){
+	case 'n':	c = '\n';	break;
+	case 'r':	c = '\r';	break;
+	case 'b':	c = '\b';	break;
+	case 't':	c = '\t';	break;
+	}
+	return c;
+}
+
+static void xmon_rawdump (unsigned long adrs, long ndump)
+{
+	long n, m, r, nr;
+	unsigned char temp[16];
+
+	for (n = ndump; n > 0;) {
+		r = n < 16? n: 16;
+		nr = mread(adrs, temp, r);
+		adrs += nr;
+		for (m = 0; m < r; ++m) {
+			if (m < nr)
+				printf("%.2x", temp[m]);
+			else
+				printf("%s", fault_chars[fault_type]);
+		}
+		n -= r;
+		if (nr < r)
+			break;
+	}
+	printf("\n");
+}
+
+static void dump_tracing(void)
+{
+	int c;
+
+	c = inchar();
+	if (c == 'c')
+		ftrace_dump(DUMP_ORIG);
+	else
+		ftrace_dump(DUMP_ALL);
+}
+
+#ifdef CONFIG_PPC64
+static void dump_one_paca(int cpu)
+{
+	struct paca_struct *p;
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	int i = 0;
+#endif
+
+	if (setjmp(bus_error_jmp) != 0) {
+		printf("*** Error dumping paca for cpu 0x%x!\n", cpu);
+		return;
+	}
+
+	catch_memory_errors = 1;
+	sync();
+
+	p = paca_ptrs[cpu];
+
+	printf("paca for cpu 0x%x @ %px:\n", cpu, p);
+
+	printf(" %-*s = %s\n", 25, "possible", cpu_possible(cpu) ? "yes" : "no");
+	printf(" %-*s = %s\n", 25, "present", cpu_present(cpu) ? "yes" : "no");
+	printf(" %-*s = %s\n", 25, "online", cpu_online(cpu) ? "yes" : "no");
+
+#define DUMP(paca, name, format)				\
+	printf(" %-*s = "format"\t(0x%lx)\n", 25, #name, 18, paca->name, \
+		offsetof(struct paca_struct, name));
+
+	DUMP(p, lock_token, "%#-*x");
+	DUMP(p, paca_index, "%#-*x");
+#ifndef CONFIG_PPC_KERNEL_PCREL
+	DUMP(p, kernel_toc, "%#-*llx");
+#endif
+	DUMP(p, kernelbase, "%#-*llx");
+	DUMP(p, kernel_msr, "%#-*llx");
+	DUMP(p, emergency_sp, "%-*px");
+#ifdef CONFIG_PPC_BOOK3S_64
+	DUMP(p, nmi_emergency_sp, "%-*px");
+	DUMP(p, mc_emergency_sp, "%-*px");
+	DUMP(p, in_nmi, "%#-*x");
+	DUMP(p, in_mce, "%#-*x");
+	DUMP(p, hmi_event_available, "%#-*x");
+#endif
+	DUMP(p, data_offset, "%#-*llx");
+	DUMP(p, hw_cpu_id, "%#-*x");
+	DUMP(p, cpu_start, "%#-*x");
+	DUMP(p, kexec_state, "%#-*x");
+#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	if (!early_radix_enabled()) {
+		for (i = 0; i < SLB_NUM_BOLTED; i++) {
+			u64 esid, vsid;
+
+			if (!p->slb_shadow_ptr)
+				continue;
+
+			esid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].esid);
+			vsid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].vsid);
+
+			if (esid || vsid) {
+				printf(" %-*s[%d] = 0x%016llx 0x%016llx\n",
+				       22, "slb_shadow", i, esid, vsid);
+			}
+		}
+		DUMP(p, vmalloc_sllp, "%#-*x");
+		DUMP(p, stab_rr, "%#-*x");
+		DUMP(p, slb_used_bitmap, "%#-*x");
+		DUMP(p, slb_kern_bitmap, "%#-*x");
+
+		if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
+			DUMP(p, slb_cache_ptr, "%#-*x");
+			for (i = 0; i < SLB_CACHE_ENTRIES; i++)
+				printf(" %-*s[%d] = 0x%016x\n",
+				       22, "slb_cache", i, p->slb_cache[i]);
+		}
+	}
+#endif
+
+	DUMP(p, rfi_flush_fallback_area, "%-*px");
+#endif
+	DUMP(p, dscr_default, "%#-*llx");
+#ifdef CONFIG_PPC_BOOK3E_64
+	DUMP(p, pgd, "%-*px");
+	DUMP(p, kernel_pgd, "%-*px");
+	DUMP(p, tcd_ptr, "%-*px");
+	DUMP(p, mc_kstack, "%-*px");
+	DUMP(p, crit_kstack, "%-*px");
+	DUMP(p, dbg_kstack, "%-*px");
+#endif
+	DUMP(p, __current, "%-*px");
+	DUMP(p, kstack, "%#-*llx");
+	printf(" %-*s = 0x%016llx\n", 25, "kstack_base", p->kstack & ~(THREAD_SIZE - 1));
+#ifdef CONFIG_STACKPROTECTOR
+	DUMP(p, canary, "%#-*lx");
+#endif
+	DUMP(p, saved_r1, "%#-*llx");
+#ifdef CONFIG_PPC_BOOK3E_64
+	DUMP(p, trap_save, "%#-*x");
+#endif
+	DUMP(p, irq_soft_mask, "%#-*x");
+	DUMP(p, irq_happened, "%#-*x");
+#ifdef CONFIG_MMIOWB
+	DUMP(p, mmiowb_state.nesting_count, "%#-*x");
+	DUMP(p, mmiowb_state.mmiowb_pending, "%#-*x");
+#endif
+	DUMP(p, irq_work_pending, "%#-*x");
+	DUMP(p, sprg_vdso, "%#-*llx");
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	DUMP(p, tm_scratch, "%#-*llx");
+#endif
+
+#ifdef CONFIG_PPC_POWERNV
+	DUMP(p, idle_state, "%#-*lx");
+	if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
+		DUMP(p, thread_idle_state, "%#-*x");
+		DUMP(p, subcore_sibling_mask, "%#-*x");
+	} else {
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+		DUMP(p, requested_psscr, "%#-*llx");
+		DUMP(p, dont_stop.counter, "%#-*x");
+#endif
+	}
+#endif
+
+	DUMP(p, accounting.utime, "%#-*lx");
+	DUMP(p, accounting.stime, "%#-*lx");
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+	DUMP(p, accounting.utime_scaled, "%#-*lx");
+#endif
+	DUMP(p, accounting.starttime, "%#-*lx");
+	DUMP(p, accounting.starttime_user, "%#-*lx");
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+	DUMP(p, accounting.startspurr, "%#-*lx");
+	DUMP(p, accounting.utime_sspurr, "%#-*lx");
+#endif
+	DUMP(p, accounting.steal_time, "%#-*lx");
+#undef DUMP
+
+	catch_memory_errors = 0;
+	sync();
+}
+
+static void dump_all_pacas(void)
+{
+	int cpu;
+
+	if (num_possible_cpus() == 0) {
+		printf("No possible cpus, use 'dp #' to dump individual cpus\n");
+		return;
+	}
+
+	for_each_possible_cpu(cpu)
+		dump_one_paca(cpu);
+}
+
+static void dump_pacas(void)
+{
+	unsigned long num;
+	int c;
+
+	c = inchar();
+	if (c == 'a') {
+		dump_all_pacas();
+		return;
+	}
+
+	termch = c;	/* Put c back, it wasn't 'a' */
+
+	if (scanhex(&num))
+		dump_one_paca(num);
+	else
+		dump_one_paca(xmon_owner);
+}
+#endif
+
+#ifdef CONFIG_PPC_POWERNV
+static void dump_one_xive(int cpu)
+{
+	unsigned int hwid = get_hard_smp_processor_id(cpu);
+	bool hv = cpu_has_feature(CPU_FTR_HVMODE);
+
+	if (hv) {
+		opal_xive_dump(XIVE_DUMP_TM_HYP, hwid);
+		opal_xive_dump(XIVE_DUMP_TM_POOL, hwid);
+		opal_xive_dump(XIVE_DUMP_TM_OS, hwid);
+		opal_xive_dump(XIVE_DUMP_TM_USER, hwid);
+		opal_xive_dump(XIVE_DUMP_VP, hwid);
+		opal_xive_dump(XIVE_DUMP_EMU_STATE, hwid);
+	}
+
+	if (setjmp(bus_error_jmp) != 0) {
+		catch_memory_errors = 0;
+		printf("*** Error dumping xive on cpu %d\n", cpu);
+		return;
+	}
+
+	catch_memory_errors = 1;
+	sync();
+	xmon_xive_do_dump(cpu);
+	sync();
+	__delay(200);
+	catch_memory_errors = 0;
+}
+
+static void dump_all_xives(void)
+{
+	int cpu;
+
+	if (num_online_cpus() == 0) {
+		printf("No possible cpus, use 'dx #' to dump individual cpus\n");
+		return;
+	}
+
+	for_each_online_cpu(cpu)
+		dump_one_xive(cpu);
+}
+
+static void dump_xives(void)
+{
+	unsigned long num;
+	int c;
+
+	if (!xive_enabled()) {
+		printf("Xive disabled on this system\n");
+		return;
+	}
+
+	c = inchar();
+	if (c == 'a') {
+		dump_all_xives();
+		return;
+	} else if (c == 'i') {
+		if (scanhex(&num))
+			xmon_xive_get_irq_config(num, NULL);
+		else
+			xmon_xive_get_irq_all();
+		return;
+	}
+
+	termch = c;	/* Put c back, it wasn't 'a' */
+
+	if (scanhex(&num))
+		dump_one_xive(num);
+	else
+		dump_one_xive(xmon_owner);
+}
+#endif /* CONFIG_PPC_POWERNV */
+
+static void dump_by_size(unsigned long addr, long count, int size)
+{
+	unsigned char temp[16];
+	int i, j;
+	u64 val;
+
+	count = ALIGN(count, 16);
+
+	for (i = 0; i < count; i += 16, addr += 16) {
+		printf(REG, addr);
+
+		if (mread(addr, temp, 16) != 16) {
+			printf("\nFaulted reading %d bytes from 0x"REG"\n", 16, addr);
+			return;
+		}
+
+		for (j = 0; j < 16; j += size) {
+			putchar(' ');
+			switch (size) {
+			case 1: val = temp[j]; break;
+			case 2: val = *(u16 *)&temp[j]; break;
+			case 4: val = *(u32 *)&temp[j]; break;
+			case 8: val = *(u64 *)&temp[j]; break;
+			default: val = 0;
+			}
+
+			printf("%0*llx", size * 2, val);
+		}
+		printf("  |");
+		for (j = 0; j < 16; ++j) {
+			val = temp[j];
+			putchar(' ' <= val && val <= '~' ? val : '.');
+		}
+		printf("|\n");
+	}
+}
+
+static void
+dump(void)
+{
+	static char last[] = { "d?\n" };
+	int c;
+
+	c = inchar();
+
+#ifdef CONFIG_PPC64
+	if (c == 'p') {
+		xmon_start_pagination();
+		dump_pacas();
+		xmon_end_pagination();
+		return;
+	}
+#endif
+#ifdef CONFIG_PPC_POWERNV
+	if (c == 'x') {
+		xmon_start_pagination();
+		dump_xives();
+		xmon_end_pagination();
+		return;
+	}
+#endif
+
+	if (c == 't') {
+		dump_tracing();
+		return;
+	}
+
+	if (c == '\n')
+		termch = c;
+
+	scanhex((void *)&adrs);
+	if (termch != '\n')
+		termch = 0;
+	if (c == 'i') {
+		scanhex(&nidump);
+		if (nidump == 0)
+			nidump = 16;
+		else if (nidump > MAX_IDUMP)
+			nidump = MAX_IDUMP;
+		adrs += ppc_inst_dump(adrs, nidump, 1);
+		last_cmd = "di\n";
+	} else if (c == 'l') {
+		dump_log_buf();
+	} else if (c == 'o') {
+		dump_opal_msglog();
+	} else if (c == 'v') {
+		/* dump virtual to physical translation */
+		show_pte(adrs);
+	} else if (c == 'r') {
+		scanhex(&ndump);
+		if (ndump == 0)
+			ndump = 64;
+		xmon_rawdump(adrs, ndump);
+		adrs += ndump;
+		last_cmd = "dr\n";
+	} else {
+		scanhex(&ndump);
+		if (ndump == 0)
+			ndump = 64;
+		else if (ndump > MAX_DUMP)
+			ndump = MAX_DUMP;
+
+		switch (c) {
+		case '8':
+		case '4':
+		case '2':
+		case '1':
+			ndump = ALIGN(ndump, 16);
+			dump_by_size(adrs, ndump, c - '0');
+			last[1] = c;
+			last_cmd = last;
+			break;
+		default:
+			prdump(adrs, ndump);
+			last_cmd = "d\n";
+		}
+
+		adrs += ndump;
+	}
+}
+
+static void
+prdump(unsigned long adrs, long ndump)
+{
+	long n, m, c, r, nr;
+	unsigned char temp[16];
+
+	for (n = ndump; n > 0;) {
+		printf(REG, adrs);
+		putchar(' ');
+		r = n < 16? n: 16;
+		nr = mread(adrs, temp, r);
+		adrs += nr;
+		for (m = 0; m < r; ++m) {
+			if ((m & (sizeof(long) - 1)) == 0 && m > 0)
+				putchar(' ');
+			if (m < nr)
+				printf("%.2x", temp[m]);
+			else
+				printf("%s", fault_chars[fault_type]);
+		}
+		for (; m < 16; ++m) {
+			if ((m & (sizeof(long) - 1)) == 0)
+				putchar(' ');
+			printf("  ");
+		}
+		printf("  |");
+		for (m = 0; m < r; ++m) {
+			if (m < nr) {
+				c = temp[m];
+				putchar(' ' <= c && c <= '~'? c: '.');
+			} else
+				putchar(' ');
+		}
+		n -= r;
+		for (; m < 16; ++m)
+			putchar(' ');
+		printf("|\n");
+		if (nr < r)
+			break;
+	}
+}
+
+typedef int (*instruction_dump_func)(unsigned long inst, unsigned long addr);
+
+static int
+generic_inst_dump(unsigned long adr, long count, int praddr,
+			instruction_dump_func dump_func)
+{
+	int nr, dotted;
+	unsigned long first_adr;
+	ppc_inst_t inst, last_inst = ppc_inst(0);
+
+	dotted = 0;
+	for (first_adr = adr; count > 0; --count, adr += ppc_inst_len(inst)) {
+		nr = mread_instr(adr, &inst);
+		if (nr == 0) {
+			if (praddr) {
+				const char *x = fault_chars[fault_type];
+				printf(REG"  %s%s%s%s\n", adr, x, x, x, x);
+			}
+			break;
+		}
+		if (adr > first_adr && ppc_inst_equal(inst, last_inst)) {
+			if (!dotted) {
+				printf(" ...\n");
+				dotted = 1;
+			}
+			continue;
+		}
+		dotted = 0;
+		last_inst = inst;
+		if (praddr)
+			printf(REG"  %08lx", adr, ppc_inst_as_ulong(inst));
+		printf("\t");
+		if (!ppc_inst_prefixed(inst))
+			dump_func(ppc_inst_val(inst), adr);
+		else
+			dump_func(ppc_inst_as_ulong(inst), adr);
+		printf("\n");
+	}
+	return adr - first_adr;
+}
+
+static int
+ppc_inst_dump(unsigned long adr, long count, int praddr)
+{
+	return generic_inst_dump(adr, count, praddr, print_insn_powerpc);
+}
+
+void
+print_address(unsigned long addr)
+{
+	xmon_print_symbol(addr, "\t# ", "");
+}
+
+static void
+dump_log_buf(void)
+{
+	struct kmsg_dump_iter iter;
+	static unsigned char buf[1024];
+	size_t len;
+
+	if (setjmp(bus_error_jmp) != 0) {
+		printf("Error dumping printk buffer!\n");
+		return;
+	}
+
+	catch_memory_errors = 1;
+	sync();
+
+	kmsg_dump_rewind(&iter);
+	xmon_start_pagination();
+	while (kmsg_dump_get_line(&iter, false, buf, sizeof(buf), &len)) {
+		buf[len] = '\0';
+		printf("%s", buf);
+	}
+	xmon_end_pagination();
+
+	sync();
+	/* wait a little while to see if we get a machine check */
+	__delay(200);
+	catch_memory_errors = 0;
+}
+
+#ifdef CONFIG_PPC_POWERNV
+static void dump_opal_msglog(void)
+{
+	unsigned char buf[128];
+	ssize_t res;
+	volatile loff_t pos = 0;
+
+	if (!firmware_has_feature(FW_FEATURE_OPAL)) {
+		printf("Machine is not running OPAL firmware.\n");
+		return;
+	}
+
+	if (setjmp(bus_error_jmp) != 0) {
+		printf("Error dumping OPAL msglog!\n");
+		return;
+	}
+
+	catch_memory_errors = 1;
+	sync();
+
+	xmon_start_pagination();
+	while ((res = opal_msglog_copy(buf, pos, sizeof(buf) - 1))) {
+		if (res < 0) {
+			printf("Error dumping OPAL msglog! Error: %zd\n", res);
+			break;
+		}
+		buf[res] = '\0';
+		printf("%s", buf);
+		pos += res;
+	}
+	xmon_end_pagination();
+
+	sync();
+	/* wait a little while to see if we get a machine check */
+	__delay(200);
+	catch_memory_errors = 0;
+}
+#endif
+
+/*
+ * Memory operations - move, set, print differences
+ */
+static unsigned long mdest;		/* destination address */
+static unsigned long msrc;		/* source address */
+static unsigned long mval;		/* byte value to set memory to */
+static unsigned long mcount;		/* # bytes to affect */
+static unsigned long mdiffs;		/* max # differences to print */
+
+static void
+memops(int cmd)
+{
+	scanhex((void *)&mdest);
+	if( termch != '\n' )
+		termch = 0;
+	scanhex((void *)(cmd == 's'? &mval: &msrc));
+	if( termch != '\n' )
+		termch = 0;
+	scanhex((void *)&mcount);
+	switch( cmd ){
+	case 'm':
+		if (xmon_is_ro) {
+			printf(xmon_ro_msg);
+			break;
+		}
+		memmove((void *)mdest, (void *)msrc, mcount);
+		break;
+	case 's':
+		if (xmon_is_ro) {
+			printf(xmon_ro_msg);
+			break;
+		}
+		memset((void *)mdest, mval, mcount);
+		break;
+	case 'd':
+		if( termch != '\n' )
+			termch = 0;
+		scanhex((void *)&mdiffs);
+		memdiffs((unsigned char *)mdest, (unsigned char *)msrc, mcount, mdiffs);
+		break;
+	}
+}
+
+static void
+memdiffs(unsigned char *p1, unsigned char *p2, unsigned nb, unsigned maxpr)
+{
+	unsigned n, prt;
+
+	prt = 0;
+	for( n = nb; n > 0; --n )
+		if( *p1++ != *p2++ )
+			if( ++prt <= maxpr )
+				printf("%px %.2x # %px %.2x\n", p1 - 1,
+					p1[-1], p2 - 1, p2[-1]);
+	if( prt > maxpr )
+		printf("Total of %d differences\n", prt);
+}
+
+static unsigned mend;
+static unsigned mask;
+
+static void
+memlocate(void)
+{
+	unsigned a, n;
+	unsigned char val[4];
+
+	last_cmd = "ml";
+	scanhex((void *)&mdest);
+	if (termch != '\n') {
+		termch = 0;
+		scanhex((void *)&mend);
+		if (termch != '\n') {
+			termch = 0;
+			scanhex((void *)&mval);
+			mask = ~0;
+			if (termch != '\n') termch = 0;
+			scanhex((void *)&mask);
+		}
+	}
+	n = 0;
+	for (a = mdest; a < mend; a += 4) {
+		if (mread(a, val, 4) == 4
+			&& ((GETWORD(val) ^ mval) & mask) == 0) {
+			printf("%.16x:  %.16x\n", a, GETWORD(val));
+			if (++n >= 10)
+				break;
+		}
+	}
+}
+
+static unsigned long mskip = 0x1000;
+static unsigned long mlim = 0xffffffff;
+
+static void
+memzcan(void)
+{
+	unsigned char v;
+	unsigned a;
+	int ok, ook;
+
+	scanhex(&mdest);
+	if (termch != '\n') termch = 0;
+	scanhex(&mskip);
+	if (termch != '\n') termch = 0;
+	scanhex(&mlim);
+	ook = 0;
+	for (a = mdest; a < mlim; a += mskip) {
+		ok = mread(a, &v, 1);
+		if (ok && !ook) {
+			printf("%.8x .. ", a);
+		} else if (!ok && ook)
+			printf("%.8lx\n", a - mskip);
+		ook = ok;
+		if (a + mskip < a)
+			break;
+	}
+	if (ook)
+		printf("%.8lx\n", a - mskip);
+}
+
+static void show_task(struct task_struct *volatile tsk)
+{
+	unsigned int p_state = READ_ONCE(tsk->__state);
+	char state;
+
+	/*
+	 * Cloned from kdb_task_state_char(), which is not entirely
+	 * appropriate for calling from xmon. This could be moved
+	 * to a common, generic, routine used by both.
+	 */
+	state = (p_state == TASK_RUNNING) ? 'R' :
+		(p_state & TASK_UNINTERRUPTIBLE) ? 'D' :
+		(p_state & TASK_STOPPED) ? 'T' :
+		(p_state & TASK_TRACED) ? 'C' :
+		(tsk->exit_state & EXIT_ZOMBIE) ? 'Z' :
+		(tsk->exit_state & EXIT_DEAD) ? 'E' :
+		(p_state & TASK_INTERRUPTIBLE) ? 'S' : '?';
+
+	printf("%16px %16lx %16px %6d %6d %c %2d %s\n", tsk,
+		tsk->thread.ksp, tsk->thread.regs,
+		tsk->pid, rcu_dereference(tsk->parent)->pid,
+		state, task_cpu(tsk),
+		tsk->comm);
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static void format_pte(void *ptep, unsigned long pte)
+{
+	pte_t entry = __pte(pte);
+
+	printf("ptep @ 0x%016lx = 0x%016lx\n", (unsigned long)ptep, pte);
+	printf("Maps physical address = 0x%016lx\n", pte & PTE_RPN_MASK);
+
+	printf("Flags = %s%s%s%s%s\n",
+	       pte_young(entry) ? "Accessed " : "",
+	       pte_dirty(entry) ? "Dirty " : "",
+	       pte_read(entry)  ? "Read " : "",
+	       pte_write(entry) ? "Write " : "",
+	       pte_exec(entry)  ? "Exec " : "");
+}
+
+static void show_pte(unsigned long addr)
+{
+	unsigned long tskv = 0;
+	struct task_struct *volatile tsk = NULL;
+	struct mm_struct *volatile mm;
+	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	if (!scanhex(&tskv))
+		mm = &init_mm;
+	else
+		tsk = (struct task_struct *)tskv;
+
+	if (tsk == NULL)
+		mm = &init_mm;
+	else
+		mm = tsk->active_mm;
+
+	if (setjmp(bus_error_jmp) != 0) {
+		catch_memory_errors = 0;
+		printf("*** Error dumping pte for task %px\n", tsk);
+		return;
+	}
+
+	catch_memory_errors = 1;
+	sync();
+
+	if (mm == &init_mm)
+		pgdp = pgd_offset_k(addr);
+	else
+		pgdp = pgd_offset(mm, addr);
+
+	p4dp = p4d_offset(pgdp, addr);
+
+	if (p4d_none(*p4dp)) {
+		printf("No valid P4D\n");
+		return;
+	}
+
+	if (p4d_is_leaf(*p4dp)) {
+		format_pte(p4dp, p4d_val(*p4dp));
+		return;
+	}
+
+	printf("p4dp @ 0x%px = 0x%016lx\n", p4dp, p4d_val(*p4dp));
+
+	pudp = pud_offset(p4dp, addr);
+
+	if (pud_none(*pudp)) {
+		printf("No valid PUD\n");
+		return;
+	}
+
+	if (pud_is_leaf(*pudp)) {
+		format_pte(pudp, pud_val(*pudp));
+		return;
+	}
+
+	printf("pudp @ 0x%px = 0x%016lx\n", pudp, pud_val(*pudp));
+
+	pmdp = pmd_offset(pudp, addr);
+
+	if (pmd_none(*pmdp)) {
+		printf("No valid PMD\n");
+		return;
+	}
+
+	if (pmd_is_leaf(*pmdp)) {
+		format_pte(pmdp, pmd_val(*pmdp));
+		return;
+	}
+	printf("pmdp @ 0x%px = 0x%016lx\n", pmdp, pmd_val(*pmdp));
+
+	ptep = pte_offset_map(pmdp, addr);
+	if (!ptep || pte_none(*ptep)) {
+		if (ptep)
+			pte_unmap(ptep);
+		printf("no valid PTE\n");
+		return;
+	}
+
+	format_pte(ptep, pte_val(*ptep));
+	pte_unmap(ptep);
+
+	sync();
+	__delay(200);
+	catch_memory_errors = 0;
+}
+#else
+static void show_pte(unsigned long addr)
+{
+	printf("show_pte not yet implemented\n");
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+static void show_tasks(void)
+{
+	unsigned long tskv;
+	struct task_struct *volatile tsk = NULL;
+
+	printf("     task_struct     ->thread.ksp    ->thread.regs    PID   PPID S  P CMD\n");
+
+	if (scanhex(&tskv))
+		tsk = (struct task_struct *)tskv;
+
+	if (setjmp(bus_error_jmp) != 0) {
+		catch_memory_errors = 0;
+		printf("*** Error dumping task %px\n", tsk);
+		return;
+	}
+
+	catch_memory_errors = 1;
+	sync();
+
+	if (tsk)
+		show_task(tsk);
+	else
+		for_each_process(tsk)
+			show_task(tsk);
+
+	sync();
+	__delay(200);
+	catch_memory_errors = 0;
+}
+
+static void proccall(void)
+{
+	unsigned long args[8];
+	unsigned long ret;
+	int i;
+	typedef unsigned long (*callfunc_t)(unsigned long, unsigned long,
+			unsigned long, unsigned long, unsigned long,
+			unsigned long, unsigned long, unsigned long);
+	callfunc_t func;
+
+	if (!scanhex(&adrs))
+		return;
+	if (termch != '\n')
+		termch = 0;
+	for (i = 0; i < 8; ++i)
+		args[i] = 0;
+	for (i = 0; i < 8; ++i) {
+		if (!scanhex(&args[i]) || termch == '\n')
+			break;
+		termch = 0;
+	}
+	func = (callfunc_t) adrs;
+	ret = 0;
+	if (setjmp(bus_error_jmp) == 0) {
+		catch_memory_errors = 1;
+		sync();
+		ret = func(args[0], args[1], args[2], args[3],
+			   args[4], args[5], args[6], args[7]);
+		sync();
+		printf("return value is 0x%lx\n", ret);
+	} else {
+		printf("*** %x exception occurred\n", fault_except);
+	}
+	catch_memory_errors = 0;
+}
+
+/* Input scanning routines */
+int
+skipbl(void)
+{
+	int c;
+
+	if( termch != 0 ){
+		c = termch;
+		termch = 0;
+	} else
+		c = inchar();
+	while( c == ' ' || c == '\t' )
+		c = inchar();
+	return c;
+}
+
+#define N_PTREGS	44
+static const char *regnames[N_PTREGS] = {
+	"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
+	"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+	"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+	"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
+	"pc", "msr", "or3", "ctr", "lr", "xer", "ccr",
+#ifdef CONFIG_PPC64
+	"softe",
+#else
+	"mq",
+#endif
+	"trap", "dar", "dsisr", "res"
+};
+
+int
+scanhex(unsigned long *vp)
+{
+	int c, d;
+	unsigned long v;
+
+	c = skipbl();
+	if (c == '%') {
+		/* parse register name */
+		char regname[8];
+		int i;
+
+		for (i = 0; i < sizeof(regname) - 1; ++i) {
+			c = inchar();
+			if (!isalnum(c)) {
+				termch = c;
+				break;
+			}
+			regname[i] = c;
+		}
+		regname[i] = 0;
+		i = match_string(regnames, N_PTREGS, regname);
+		if (i < 0) {
+			printf("invalid register name '%%%s'\n", regname);
+			return 0;
+		}
+		if (xmon_regs == NULL) {
+			printf("regs not available\n");
+			return 0;
+		}
+		*vp = ((unsigned long *)xmon_regs)[i];
+		return 1;
+	}
+
+	/* skip leading "0x" if any */
+
+	if (c == '0') {
+		c = inchar();
+		if (c == 'x') {
+			c = inchar();
+		} else {
+			d = hexdigit(c);
+			if (d == EOF) {
+				termch = c;
+				*vp = 0;
+				return 1;
+			}
+		}
+	} else if (c == '$') {
+		int i;
+		for (i=0; i<63; i++) {
+			c = inchar();
+			if (isspace(c) || c == '\0') {
+				termch = c;
+				break;
+			}
+			tmpstr[i] = c;
+		}
+		tmpstr[i++] = 0;
+		*vp = 0;
+		if (setjmp(bus_error_jmp) == 0) {
+			catch_memory_errors = 1;
+			sync();
+			*vp = kallsyms_lookup_name(tmpstr);
+			sync();
+		}
+		catch_memory_errors = 0;
+		if (!(*vp)) {
+			printf("unknown symbol '%s'\n", tmpstr);
+			return 0;
+		}
+		return 1;
+	}
+
+	d = hexdigit(c);
+	if (d == EOF) {
+		termch = c;
+		return 0;
+	}
+	v = 0;
+	do {
+		v = (v << 4) + d;
+		c = inchar();
+		d = hexdigit(c);
+	} while (d != EOF);
+	termch = c;
+	*vp = v;
+	return 1;
+}
+
+static void
+scannl(void)
+{
+	int c;
+
+	c = termch;
+	termch = 0;
+	while( c != '\n' )
+		c = inchar();
+}
+
+static int hexdigit(int c)
+{
+	if( '0' <= c && c <= '9' )
+		return c - '0';
+	if( 'A' <= c && c <= 'F' )
+		return c - ('A' - 10);
+	if( 'a' <= c && c <= 'f' )
+		return c - ('a' - 10);
+	return EOF;
+}
+
+void
+getstring(char *s, int size)
+{
+	int c;
+
+	c = skipbl();
+	if (c == '\n') {
+		*s = 0;
+		return;
+	}
+
+	do {
+		if( size > 1 ){
+			*s++ = c;
+			--size;
+		}
+		c = inchar();
+	} while( c != ' ' && c != '\t' && c != '\n' );
+	termch = c;
+	*s = 0;
+}
+
+static char line[256];
+static char *lineptr;
+
+static void
+flush_input(void)
+{
+	lineptr = NULL;
+}
+
+static int
+inchar(void)
+{
+	if (lineptr == NULL || *lineptr == 0) {
+		if (xmon_gets(line, sizeof(line)) == NULL) {
+			lineptr = NULL;
+			return EOF;
+		}
+		lineptr = line;
+	}
+	return *lineptr++;
+}
+
+static void
+take_input(char *str)
+{
+	lineptr = str;
+}
+
+
+static void
+symbol_lookup(void)
+{
+	int type = inchar();
+	unsigned long addr, cpu;
+	void __percpu *ptr = NULL;
+	static char tmp[64];
+
+	switch (type) {
+	case 'a':
+		if (scanhex(&addr))
+			xmon_print_symbol(addr, ": ", "\n");
+		termch = 0;
+		break;
+	case 's':
+		getstring(tmp, 64);
+		if (setjmp(bus_error_jmp) == 0) {
+			catch_memory_errors = 1;
+			sync();
+			addr = kallsyms_lookup_name(tmp);
+			if (addr)
+				printf("%s: %lx\n", tmp, addr);
+			else
+				printf("Symbol '%s' not found.\n", tmp);
+			sync();
+		}
+		catch_memory_errors = 0;
+		termch = 0;
+		break;
+	case 'p':
+		getstring(tmp, 64);
+		if (setjmp(bus_error_jmp) == 0) {
+			catch_memory_errors = 1;
+			sync();
+			ptr = (void __percpu *)kallsyms_lookup_name(tmp);
+			sync();
+		}
+
+		if (ptr &&
+		    ptr >= (void __percpu *)__per_cpu_start &&
+		    ptr < (void __percpu *)__per_cpu_end)
+		{
+			if (scanhex(&cpu) && cpu < num_possible_cpus()) {
+				addr = (unsigned long)per_cpu_ptr(ptr, cpu);
+			} else {
+				cpu = raw_smp_processor_id();
+				addr = (unsigned long)this_cpu_ptr(ptr);
+			}
+
+			printf("%s for cpu 0x%lx: %lx\n", tmp, cpu, addr);
+		} else {
+			printf("Percpu symbol '%s' not found.\n", tmp);
+		}
+
+		catch_memory_errors = 0;
+		termch = 0;
+		break;
+	}
+}
+
+
+/* Print an address in numeric and symbolic form (if possible) */
+static void xmon_print_symbol(unsigned long address, const char *mid,
+			      const char *after)
+{
+	char *modname;
+	const char *volatile name = NULL;
+	unsigned long offset, size;
+
+	printf(REG, address);
+	if (setjmp(bus_error_jmp) == 0) {
+		catch_memory_errors = 1;
+		sync();
+		name = kallsyms_lookup(address, &size, &offset, &modname,
+				       tmpstr);
+		sync();
+		/* wait a little while to see if we get a machine check */
+		__delay(200);
+	}
+
+	catch_memory_errors = 0;
+
+	if (name) {
+		printf("%s%s+%#lx/%#lx", mid, name, offset, size);
+		if (modname)
+			printf(" [%s]", modname);
+	}
+	printf("%s", after);
+}
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+void dump_segments(void)
+{
+	int i;
+	unsigned long esid,vsid;
+	unsigned long llp;
+
+	printf("SLB contents of cpu 0x%x\n", smp_processor_id());
+
+	for (i = 0; i < mmu_slb_size; i++) {
+		asm volatile("slbmfee  %0,%1" : "=r" (esid) : "r" (i));
+		asm volatile("slbmfev  %0,%1" : "=r" (vsid) : "r" (i));
+
+		if (!esid && !vsid)
+			continue;
+
+		printf("%02d %016lx %016lx", i, esid, vsid);
+
+		if (!(esid & SLB_ESID_V)) {
+			printf("\n");
+			continue;
+		}
+
+		llp = vsid & SLB_VSID_LLP;
+		if (vsid & SLB_VSID_B_1T) {
+			printf("  1T  ESID=%9lx  VSID=%13lx LLP:%3lx \n",
+				GET_ESID_1T(esid),
+				(vsid & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T,
+				llp);
+		} else {
+			printf(" 256M ESID=%9lx  VSID=%13lx LLP:%3lx \n",
+				GET_ESID(esid),
+				(vsid & ~SLB_VSID_B) >> SLB_VSID_SHIFT,
+				llp);
+		}
+	}
+}
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+void dump_segments(void)
+{
+	int i;
+
+	printf("sr0-15 =");
+	for (i = 0; i < 16; ++i)
+		printf(" %x", mfsr(i << 28));
+	printf("\n");
+}
+#endif
+
+#ifdef CONFIG_44x
+static void dump_tlb_44x(void)
+{
+	int i;
+
+	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
+		unsigned long w0,w1,w2;
+		asm volatile("tlbre  %0,%1,0" : "=r" (w0) : "r" (i));
+		asm volatile("tlbre  %0,%1,1" : "=r" (w1) : "r" (i));
+		asm volatile("tlbre  %0,%1,2" : "=r" (w2) : "r" (i));
+		printf("[%02x] %08lx %08lx %08lx ", i, w0, w1, w2);
+		if (w0 & PPC44x_TLB_VALID) {
+			printf("V %08lx -> %01lx%08lx %c%c%c%c%c",
+			       w0 & PPC44x_TLB_EPN_MASK,
+			       w1 & PPC44x_TLB_ERPN_MASK,
+			       w1 & PPC44x_TLB_RPN_MASK,
+			       (w2 & PPC44x_TLB_W) ? 'W' : 'w',
+			       (w2 & PPC44x_TLB_I) ? 'I' : 'i',
+			       (w2 & PPC44x_TLB_M) ? 'M' : 'm',
+			       (w2 & PPC44x_TLB_G) ? 'G' : 'g',
+			       (w2 & PPC44x_TLB_E) ? 'E' : 'e');
+		}
+		printf("\n");
+	}
+}
+#endif /* CONFIG_44x */
+
+#ifdef CONFIG_PPC_BOOK3E_64
+static void dump_tlb_book3e(void)
+{
+	u32 mmucfg;
+	u64 ramask;
+	int i, tlb, ntlbs, pidsz, lpidsz, rasz;
+	int mmu_version;
+	static const char *pgsz_names[] = {
+		"  1K",
+		"  2K",
+		"  4K",
+		"  8K",
+		" 16K",
+		" 32K",
+		" 64K",
+		"128K",
+		"256K",
+		"512K",
+		"  1M",
+		"  2M",
+		"  4M",
+		"  8M",
+		" 16M",
+		" 32M",
+		" 64M",
+		"128M",
+		"256M",
+		"512M",
+		"  1G",
+		"  2G",
+		"  4G",
+		"  8G",
+		" 16G",
+		" 32G",
+		" 64G",
+		"128G",
+		"256G",
+		"512G",
+		"  1T",
+		"  2T",
+	};
+
+	/* Gather some infos about the MMU */
+	mmucfg = mfspr(SPRN_MMUCFG);
+	mmu_version = (mmucfg & 3) + 1;
+	ntlbs = ((mmucfg >> 2) & 3) + 1;
+	pidsz = ((mmucfg >> 6) & 0x1f) + 1;
+	lpidsz = (mmucfg >> 24) & 0xf;
+	rasz = (mmucfg >> 16) & 0x7f;
+	printf("Book3E MMU MAV=%d.0,%d TLBs,%d-bit PID,%d-bit LPID,%d-bit RA\n",
+	       mmu_version, ntlbs, pidsz, lpidsz, rasz);
+	ramask = (1ull << rasz) - 1;
+
+	for (tlb = 0; tlb < ntlbs; tlb++) {
+		u32 tlbcfg;
+		int nent, assoc, new_cc = 1;
+		printf("TLB %d:\n------\n", tlb);
+		switch(tlb) {
+		case 0:
+			tlbcfg = mfspr(SPRN_TLB0CFG);
+			break;
+		case 1:
+			tlbcfg = mfspr(SPRN_TLB1CFG);
+			break;
+		case 2:
+			tlbcfg = mfspr(SPRN_TLB2CFG);
+			break;
+		case 3:
+			tlbcfg = mfspr(SPRN_TLB3CFG);
+			break;
+		default:
+			printf("Unsupported TLB number !\n");
+			continue;
+		}
+		nent = tlbcfg & 0xfff;
+		assoc = (tlbcfg >> 24) & 0xff;
+		for (i = 0; i < nent; i++) {
+			u32 mas0 = MAS0_TLBSEL(tlb);
+			u32 mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K);
+			u64 mas2 = 0;
+			u64 mas7_mas3;
+			int esel = i, cc = i;
+
+			if (assoc != 0) {
+				cc = i / assoc;
+				esel = i % assoc;
+				mas2 = cc * 0x1000;
+			}
+
+			mas0 |= MAS0_ESEL(esel);
+			mtspr(SPRN_MAS0, mas0);
+			mtspr(SPRN_MAS1, mas1);
+			mtspr(SPRN_MAS2, mas2);
+			asm volatile("tlbre  0,0,0" : : : "memory");
+			mas1 = mfspr(SPRN_MAS1);
+			mas2 = mfspr(SPRN_MAS2);
+			mas7_mas3 = mfspr(SPRN_MAS7_MAS3);
+			if (assoc && (i % assoc) == 0)
+				new_cc = 1;
+			if (!(mas1 & MAS1_VALID))
+				continue;
+			if (assoc == 0)
+				printf("%04x- ", i);
+			else if (new_cc)
+				printf("%04x-%c", cc, 'A' + esel);
+			else
+				printf("    |%c", 'A' + esel);
+			new_cc = 0;
+			printf(" %016llx %04x %s %c%c AS%c",
+			       mas2 & ~0x3ffull,
+			       (mas1 >> 16) & 0x3fff,
+			       pgsz_names[(mas1 >> 7) & 0x1f],
+			       mas1 & MAS1_IND ? 'I' : ' ',
+			       mas1 & MAS1_IPROT ? 'P' : ' ',
+			       mas1 & MAS1_TS ? '1' : '0');
+			printf(" %c%c%c%c%c%c%c",
+			       mas2 & MAS2_X0 ? 'a' : ' ',
+			       mas2 & MAS2_X1 ? 'v' : ' ',
+			       mas2 & MAS2_W  ? 'w' : ' ',
+			       mas2 & MAS2_I  ? 'i' : ' ',
+			       mas2 & MAS2_M  ? 'm' : ' ',
+			       mas2 & MAS2_G  ? 'g' : ' ',
+			       mas2 & MAS2_E  ? 'e' : ' ');
+			printf(" %016llx", mas7_mas3 & ramask & ~0x7ffull);
+			if (mas1 & MAS1_IND)
+				printf(" %s\n",
+				       pgsz_names[(mas7_mas3 >> 1) & 0x1f]);
+			else
+				printf(" U%c%c%c S%c%c%c\n",
+				       mas7_mas3 & MAS3_UX ? 'x' : ' ',
+				       mas7_mas3 & MAS3_UW ? 'w' : ' ',
+				       mas7_mas3 & MAS3_UR ? 'r' : ' ',
+				       mas7_mas3 & MAS3_SX ? 'x' : ' ',
+				       mas7_mas3 & MAS3_SW ? 'w' : ' ',
+				       mas7_mas3 & MAS3_SR ? 'r' : ' ');
+		}
+	}
+}
+#endif /* CONFIG_PPC_BOOK3E_64 */
+
+static void xmon_init(int enable)
+{
+	if (enable) {
+		__debugger = xmon;
+		__debugger_ipi = xmon_ipi;
+		__debugger_bpt = xmon_bpt;
+		__debugger_sstep = xmon_sstep;
+		__debugger_iabr_match = xmon_iabr_match;
+		__debugger_break_match = xmon_break_match;
+		__debugger_fault_handler = xmon_fault_handler;
+	} else {
+		__debugger = NULL;
+		__debugger_ipi = NULL;
+		__debugger_bpt = NULL;
+		__debugger_sstep = NULL;
+		__debugger_iabr_match = NULL;
+		__debugger_break_match = NULL;
+		__debugger_fault_handler = NULL;
+	}
+}
+
+#ifdef CONFIG_MAGIC_SYSRQ
+static void sysrq_handle_xmon(u8 key)
+{
+	if (xmon_is_locked_down()) {
+		clear_all_bpt();
+		xmon_init(0);
+		return;
+	}
+	/* ensure xmon is enabled */
+	xmon_init(1);
+	debugger(get_irq_regs());
+	if (!xmon_on)
+		xmon_init(0);
+}
+
+static const struct sysrq_key_op sysrq_xmon_op = {
+	.handler =	sysrq_handle_xmon,
+	.help_msg =	"xmon(x)",
+	.action_msg =	"Entering xmon",
+};
+
+static int __init setup_xmon_sysrq(void)
+{
+	register_sysrq_key('x', &sysrq_xmon_op);
+	return 0;
+}
+device_initcall(setup_xmon_sysrq);
+#endif /* CONFIG_MAGIC_SYSRQ */
+
+static void clear_all_bpt(void)
+{
+	int i;
+
+	/* clear/unpatch all breakpoints */
+	remove_bpts();
+	remove_cpu_bpts();
+
+	/* Disable all breakpoints */
+	for (i = 0; i < NBPTS; ++i)
+		bpts[i].enabled = 0;
+
+	/* Clear any data or iabr breakpoints */
+	iabr = NULL;
+	for (i = 0; i < nr_wp_slots(); i++)
+		dabr[i].enabled = 0;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int xmon_dbgfs_set(void *data, u64 val)
+{
+	xmon_on = !!val;
+	xmon_init(xmon_on);
+
+	/* make sure all breakpoints removed when disabling */
+	if (!xmon_on) {
+		clear_all_bpt();
+		get_output_lock();
+		printf("xmon: All breakpoints cleared\n");
+		release_output_lock();
+	}
+
+	return 0;
+}
+
+static int xmon_dbgfs_get(void *data, u64 *val)
+{
+	*val = xmon_on;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(xmon_dbgfs_ops, xmon_dbgfs_get,
+			xmon_dbgfs_set, "%llu\n");
+
+static int __init setup_xmon_dbgfs(void)
+{
+	debugfs_create_file("xmon", 0600, arch_debugfs_dir, NULL,
+			    &xmon_dbgfs_ops);
+	return 0;
+}
+device_initcall(setup_xmon_dbgfs);
+#endif /* CONFIG_DEBUG_FS */
+
+static int xmon_early __initdata;
+
+static int __init early_parse_xmon(char *p)
+{
+	if (xmon_is_locked_down()) {
+		xmon_init(0);
+		xmon_early = 0;
+		xmon_on = 0;
+	} else if (!p || strncmp(p, "early", 5) == 0) {
+		/* just "xmon" is equivalent to "xmon=early" */
+		xmon_init(1);
+		xmon_early = 1;
+		xmon_on = 1;
+	} else if (strncmp(p, "on", 2) == 0) {
+		xmon_init(1);
+		xmon_on = 1;
+	} else if (strncmp(p, "rw", 2) == 0) {
+		xmon_init(1);
+		xmon_on = 1;
+		xmon_is_ro = false;
+	} else if (strncmp(p, "ro", 2) == 0) {
+		xmon_init(1);
+		xmon_on = 1;
+		xmon_is_ro = true;
+	} else if (strncmp(p, "off", 3) == 0)
+		xmon_on = 0;
+	else
+		return 1;
+
+	return 0;
+}
+early_param("xmon", early_parse_xmon);
+
+void __init xmon_setup(void)
+{
+	if (xmon_on)
+		xmon_init(1);
+	if (xmon_early)
+		debugger(NULL);
+}
+
+#ifdef CONFIG_SPU_BASE
+
+struct spu_info {
+	struct spu *spu;
+	u64 saved_mfc_sr1_RW;
+	u32 saved_spu_runcntl_RW;
+	unsigned long dump_addr;
+	u8 stopped_ok;
+};
+
+#define XMON_NUM_SPUS	16	/* Enough for current hardware */
+
+static struct spu_info spu_info[XMON_NUM_SPUS];
+
+void __init xmon_register_spus(struct list_head *list)
+{
+	struct spu *spu;
+
+	list_for_each_entry(spu, list, full_list) {
+		if (spu->number >= XMON_NUM_SPUS) {
+			WARN_ON(1);
+			continue;
+		}
+
+		spu_info[spu->number].spu = spu;
+		spu_info[spu->number].stopped_ok = 0;
+		spu_info[spu->number].dump_addr = (unsigned long)
+				spu_info[spu->number].spu->local_store;
+	}
+}
+
+static void stop_spus(void)
+{
+	struct spu *spu;
+	volatile int i;
+	u64 tmp;
+
+	for (i = 0; i < XMON_NUM_SPUS; i++) {
+		if (!spu_info[i].spu)
+			continue;
+
+		if (setjmp(bus_error_jmp) == 0) {
+			catch_memory_errors = 1;
+			sync();
+
+			spu = spu_info[i].spu;
+
+			spu_info[i].saved_spu_runcntl_RW =
+				in_be32(&spu->problem->spu_runcntl_RW);
+
+			tmp = spu_mfc_sr1_get(spu);
+			spu_info[i].saved_mfc_sr1_RW = tmp;
+
+			tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+			spu_mfc_sr1_set(spu, tmp);
+
+			sync();
+			__delay(200);
+
+			spu_info[i].stopped_ok = 1;
+
+			printf("Stopped spu %.2d (was %s)\n", i,
+					spu_info[i].saved_spu_runcntl_RW ?
+					"running" : "stopped");
+		} else {
+			catch_memory_errors = 0;
+			printf("*** Error stopping spu %.2d\n", i);
+		}
+		catch_memory_errors = 0;
+	}
+}
+
+static void restart_spus(void)
+{
+	struct spu *spu;
+	volatile int i;
+
+	for (i = 0; i < XMON_NUM_SPUS; i++) {
+		if (!spu_info[i].spu)
+			continue;
+
+		if (!spu_info[i].stopped_ok) {
+			printf("*** Error, spu %d was not successfully stopped"
+					", not restarting\n", i);
+			continue;
+		}
+
+		if (setjmp(bus_error_jmp) == 0) {
+			catch_memory_errors = 1;
+			sync();
+
+			spu = spu_info[i].spu;
+			spu_mfc_sr1_set(spu, spu_info[i].saved_mfc_sr1_RW);
+			out_be32(&spu->problem->spu_runcntl_RW,
+					spu_info[i].saved_spu_runcntl_RW);
+
+			sync();
+			__delay(200);
+
+			printf("Restarted spu %.2d\n", i);
+		} else {
+			catch_memory_errors = 0;
+			printf("*** Error restarting spu %.2d\n", i);
+		}
+		catch_memory_errors = 0;
+	}
+}
+
+#define DUMP_WIDTH	23
+#define DUMP_VALUE(format, field, value)				\
+do {									\
+	if (setjmp(bus_error_jmp) == 0) {				\
+		catch_memory_errors = 1;				\
+		sync();							\
+		printf("  %-*s = "format"\n", DUMP_WIDTH,		\
+				#field, value);				\
+		sync();							\
+		__delay(200);						\
+	} else {							\
+		catch_memory_errors = 0;				\
+		printf("  %-*s = *** Error reading field.\n",		\
+					DUMP_WIDTH, #field);		\
+	}								\
+	catch_memory_errors = 0;					\
+} while (0)
+
+#define DUMP_FIELD(obj, format, field)	\
+	DUMP_VALUE(format, field, obj->field)
+
+static void dump_spu_fields(struct spu *spu)
+{
+	printf("Dumping spu fields at address %p:\n", spu);
+
+	DUMP_FIELD(spu, "0x%x", number);
+	DUMP_FIELD(spu, "%s", name);
+	DUMP_FIELD(spu, "0x%lx", local_store_phys);
+	DUMP_FIELD(spu, "0x%p", local_store);
+	DUMP_FIELD(spu, "0x%lx", ls_size);
+	DUMP_FIELD(spu, "0x%x", node);
+	DUMP_FIELD(spu, "0x%lx", flags);
+	DUMP_FIELD(spu, "%llu", class_0_pending);
+	DUMP_FIELD(spu, "0x%llx", class_0_dar);
+	DUMP_FIELD(spu, "0x%llx", class_1_dar);
+	DUMP_FIELD(spu, "0x%llx", class_1_dsisr);
+	DUMP_FIELD(spu, "0x%x", irqs[0]);
+	DUMP_FIELD(spu, "0x%x", irqs[1]);
+	DUMP_FIELD(spu, "0x%x", irqs[2]);
+	DUMP_FIELD(spu, "0x%x", slb_replace);
+	DUMP_FIELD(spu, "%d", pid);
+	DUMP_FIELD(spu, "0x%p", mm);
+	DUMP_FIELD(spu, "0x%p", ctx);
+	DUMP_FIELD(spu, "0x%p", rq);
+	DUMP_FIELD(spu, "0x%llx", timestamp);
+	DUMP_FIELD(spu, "0x%lx", problem_phys);
+	DUMP_FIELD(spu, "0x%p", problem);
+	DUMP_VALUE("0x%x", problem->spu_runcntl_RW,
+			in_be32(&spu->problem->spu_runcntl_RW));
+	DUMP_VALUE("0x%x", problem->spu_status_R,
+			in_be32(&spu->problem->spu_status_R));
+	DUMP_VALUE("0x%x", problem->spu_npc_RW,
+			in_be32(&spu->problem->spu_npc_RW));
+	DUMP_FIELD(spu, "0x%p", priv2);
+	DUMP_FIELD(spu, "0x%p", pdata);
+}
+
+static int spu_inst_dump(unsigned long adr, long count, int praddr)
+{
+	return generic_inst_dump(adr, count, praddr, print_insn_spu);
+}
+
+static void dump_spu_ls(unsigned long num, int subcmd)
+{
+	unsigned long offset, addr, ls_addr;
+
+	if (setjmp(bus_error_jmp) == 0) {
+		catch_memory_errors = 1;
+		sync();
+		ls_addr = (unsigned long)spu_info[num].spu->local_store;
+		sync();
+		__delay(200);
+	} else {
+		catch_memory_errors = 0;
+		printf("*** Error: accessing spu info for spu %ld\n", num);
+		return;
+	}
+	catch_memory_errors = 0;
+
+	if (scanhex(&offset))
+		addr = ls_addr + offset;
+	else
+		addr = spu_info[num].dump_addr;
+
+	if (addr >= ls_addr + LS_SIZE) {
+		printf("*** Error: address outside of local store\n");
+		return;
+	}
+
+	switch (subcmd) {
+	case 'i':
+		addr += spu_inst_dump(addr, 16, 1);
+		last_cmd = "sdi\n";
+		break;
+	default:
+		prdump(addr, 64);
+		addr += 64;
+		last_cmd = "sd\n";
+		break;
+	}
+
+	spu_info[num].dump_addr = addr;
+}
+
+static int do_spu_cmd(void)
+{
+	static unsigned long num = 0;
+	int cmd, subcmd = 0;
+
+	cmd = inchar();
+	switch (cmd) {
+	case 's':
+		stop_spus();
+		break;
+	case 'r':
+		restart_spus();
+		break;
+	case 'd':
+		subcmd = inchar();
+		if (isxdigit(subcmd) || subcmd == '\n')
+			termch = subcmd;
+		fallthrough;
+	case 'f':
+		scanhex(&num);
+		if (num >= XMON_NUM_SPUS || !spu_info[num].spu) {
+			printf("*** Error: invalid spu number\n");
+			return 0;
+		}
+
+		switch (cmd) {
+		case 'f':
+			dump_spu_fields(spu_info[num].spu);
+			break;
+		default:
+			dump_spu_ls(num, subcmd);
+			break;
+		}
+
+		break;
+	default:
+		return -1;
+	}
+
+	return 0;
+}
+#else /* ! CONFIG_SPU_BASE */
+static int do_spu_cmd(void)
+{
+	return -1;
+}
+#endif
diff --git a/arch/powerpc/xmon/xmon_bpts.S b/arch/powerpc/xmon/xmon_bpts.S
new file mode 100644
index 0000000000..69726814cd
--- /dev/null
+++ b/arch/powerpc/xmon/xmon_bpts.S
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/ppc_asm.h>
+#include <asm/asm-compat.h>
+#include <asm/asm-offsets.h>
+#include "xmon_bpts.h"
+
+/* Prefixed instructions can not cross 64 byte boundaries */
+.align 6
+.global bpt_table
+bpt_table:
+	.space NBPTS * BPT_SIZE
diff --git a/arch/powerpc/xmon/xmon_bpts.h b/arch/powerpc/xmon/xmon_bpts.h
new file mode 100644
index 0000000000..377068f52e
--- /dev/null
+++ b/arch/powerpc/xmon/xmon_bpts.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef XMON_BPTS_H
+#define XMON_BPTS_H
+
+#define NBPTS	256
+#ifndef __ASSEMBLY__
+#include <asm/inst.h>
+#define BPT_SIZE	(sizeof(ppc_inst_t) * 2)
+#define BPT_WORDS	(BPT_SIZE / sizeof(ppc_inst_t))
+
+extern unsigned int bpt_table[NBPTS * BPT_WORDS];
+#endif /* __ASSEMBLY__ */
+
+#endif /* XMON_BPTS_H */